From 12b4117668fd3c3c24f33279b8243cf44bba5825 Mon Sep 17 00:00:00 2001 From: Michal Babinski Date: Thu, 26 Oct 2023 09:19:23 -0700 Subject: [PATCH 01/91] add scripts for reiding --- nmdc_automation/re_iding/re_id.py | 347 ++++++++++++++++++++++++++++++ nmdc_automation/re_iding/utils.py | 31 +++ 2 files changed, 378 insertions(+) create mode 100755 nmdc_automation/re_iding/re_id.py create mode 100644 nmdc_automation/re_iding/utils.py diff --git a/nmdc_automation/re_iding/re_id.py b/nmdc_automation/re_iding/re_id.py new file mode 100755 index 00000000..8538c1e4 --- /dev/null +++ b/nmdc_automation/re_iding/re_id.py @@ -0,0 +1,347 @@ +#!/usr/bin/env python +import os +import sys +import re +from pymongo import MongoClient +import hashlib +import json +from subprocess import check_output +from nmdc_automation.api import NmdcRuntimeApi +from nmdc_automation.config import Config + +base = "https://data.microbiomedata.org/data" +base_dir = "/global/cfs/cdirs/m3408/results" + + +sets = [ + "read_qc_analysis_activity_set", + "metagenome_assembly_set", + "read_based_taxonomy_analysis_activity_set" + ] + + +mapping_log = open("mapping.log", "a") + + +def log_mapping(idtype, old, new): + """ + Logs the mapping information. + + Parameters: + - idtype: The type of the ID (e.g., 'data', 'activity') + - old: The old ID value + - new: The new ID value + """ + mapping_log.write(f"{idtype}\t{old}\t{new}\n") + + +def read_map(): + """ + Reads a mapping list from a file and returns it as a dictionary. + + Returns: + - omap: A dictionary with old ID as key and new ID as value. + """ + omap = {} + with open("map.lst") as f: + for line in f: + (k, v) = line.rstrip().split("\t") + omap[k] = v + return omap + + +def find_dir(db, old_id): + """ + Finds and returns the directory name associated with the given ID. + + Parameters: + - db: Database connection object + - old_id: The old ID for which the directory name is required + """ + query_by_omics_id = {"was_informed_by": old_id} + activity_record = db.read_qc_analysis_activity_set.find_one(query_by_omics_id) + query_by_id = {"id": activity_record['has_output'][0]} + data_object_record = db.data_object_set.find_one(query_by_id) + return data_object_record['url'].split('/')[4] + + +def process(db, old_id, new_id): + """ + Process the given old ID and returns the associated data. + + Parameters: + - db: Database connection object + - old_id: The old ID to be processed + - new_id: The new ID to be used + + Returns: + - out: A dictionary containing processed data. + """ + directory_name = find_dir(db, old_id) + assocaited_data_object = {"data_object_set": []} + for col in sets: + query_by_old_id = {"was_informed_by": old_id} + res = db[col].find(query_by_old_id) + count = 0 + for doc in res: + count += 1 + if count != 1: + raise ValueError("Too many matches. Failing") + doc.pop("_id") + atype = doc['type'] + func_name = atype.lower().replace("nmdc:", "") + func = globals()[func_name] + activity_records, data_object_records = func(db, doc, new_id) + assocaited_data_object[col] =[activity_records] + assocaited_data_object["data_object_set"].extend(data_object_records) + json.dump(assocaited_data_object, open(f"{new_id}.json", "w"), indent=2) + return assocaited_data_object + + +def minter(config,shoulder): + """ + Creates a new ID based on the provided shoulder. + + Parameters: + - shoulder: The base string for creating the new ID + + Returns: + - A new ID string + """ + + runtime_api = NmdcRuntimeApi(config) + + return runtime_api.minter(shoulder) + +def compute_new_paths(old_url, new_base_dir, omic_id, act_id): + """ + Use the url to compute the new file name path and url + """ + file_name = old_url.split("/")[-1] + file_extenstion = file_name.lstrip("nmdc_").split("_", maxsplit=1)[-1] + new_file_name = f"{act_id}_{file_extenstion}" + destination = os.path.join(new_base_dir, new_file_name) + new_url = f"{base}/{omic_id}/{act_id}/{new_file_name}" + return new_url, destination, new_file_name + + +def md5_sum(fn): + """ + Calculate the MD5 hash of a file. + + Args: + - fn (str): Path to the file for which the MD5 hash is to be computed. + + Returns: + - str: The MD5 hash of the file. + """ + with open(fn, "rb") as f: + file_hash = hashlib.md5() + while chunk := f.read(8192): + file_hash.update(chunk) + return file_hash.hexdigest() + + +def find_type(obj): + """ + Determine the data type of an object based on its URL extension. + + Args: + - obj (dict): Dictionary containing the 'url' key which will be inspected to determine the data type. + + Returns: + - str: The determined data type or None if the type could not be determined. + """ + if "data_object_type" in obj: + return obj["data_object_type"] + url = obj["url"] + if url.endswith("_covstats.txt"): + return "Assembly Coverage Stats" + elif url.endswith("_gottcha2_report.tsv"): + return "GOTTCHA2 Classification Report" + elif url.endswith("_gottcha2_report_full.tsv"): + return "GOTTCHA2 Report Full" + else: + sys.stderr.write(f"Missing type: {url}") + return None + + +def copy_outputs(db, outputs, omic_id, act_id): + """ + Copy output data objects and generate new metadata for them. + + Args: + - db (MongoClient): MongoDB client instance to fetch data. + - outputs (list): List of output object IDs. + - omic_id (str): ID of the omics process. + - act_id (str): ID of the activity. + + Returns: + - tuple: List of new object IDs and the new objects themselves. + """ + new_data_objects = [] + new_ids = [] + new_base_dir = os.path.join(base_dir, omic_id, act_id) + os.makedirs(new_base_dir, exist_ok=True) + for data_obj_id in outputs: + data_obj = db.data_object_set.find_one({"id": data_obj_id}) + old_url = data_obj["url"] + new_url, dst, new_fn = compute_new_paths(old_url, new_base_dir, omic_id, act_id) + new_id = minter("dobj") + log_mapping("data", data_obj["id"], new_id) + + # Create new obj + data_obj.pop("_id") + desc = data_obj["description"] + data_obj["description"] = re.sub('[^ ]+$', f"{omic_id}", desc) + data_obj["url"] = new_url + data_obj["id"] = new_id + data_obj["name"] = new_fn + data_type = find_type(data_obj) + data_obj["data_object_type"] = data_type + + # Link the file + src = old_url.replace(base, base_dir) + func_name = "bogus" + if data_type: + func_name = data_type.replace(" ", "_").lower() + if func_name in globals(): + sys.stderr.write(f"Using func {func_name}\n") + func = globals()[func_name] + md5, size = func(src, dst, omic_id, act_id) + data_obj["file_size_bytes"] = size + data_obj["md5_checksum"] = md5 + else: + os.link(src, dst) + + # Add to the lists + new_ids.append(new_id) + new_data_objects.append(data_obj) + return new_ids, new_data_objects + + +def rewrite_id(src, dst, old_id, new_id, prefix=None): + """ + Rewrite lines in a file, replacing occurrences of an old ID with a new ID. + An optional prefix can be specified to limit which lines are modified. + + Args: + - src (str): Source file path. + - dst (str): Destination file path. + - old_id (str): ID to be replaced. + - new_id (str): Replacement ID. + - prefix (str, optional): Prefix character to determine which lines to modify. Defaults to None. + + Returns: + - tuple: MD5 checksum and size (in bytes) of the modified file. + """ + fsrc = open(src) + fdst = open(dst, "w") + for line in fsrc: + if not prefix or (prefix and line[0] == prefix): + line = line.replace(old_id, new_id) + fdst.write(line) + fsrc.close() + fdst.close() + md5 = md5_sum(dst) + size = os.stat(dst).st_size + return md5, size + + +def find_assembly_id(src): + fsrc = open(src) + line = fsrc.readline() + return "_".join(line[1:].split("_")[0:-1]) + + +def assembly_contigs(src, dst, omic_id, act_id): + scaf = src.replace("_contigs", "_scaffolds") + old_id = find_assembly_id(scaf) + return rewrite_id(src, dst, old_id, act_id, prefix=">") + + +def assembly_scaffolds(src, dst, omic_id, act_id): + old_id = find_assembly_id(src) + return rewrite_id(src, dst, old_id, act_id, prefix=">") + + +def assembly_coverage_stats(src, dst, omic_id, act_id): + scaf = src.replace("_covstats.txt", "_scaffolds.fna") + old_id = find_assembly_id(scaf) + return rewrite_id(src, dst, old_id, act_id) + + +def assembly_agp(src, dst, omic_id, act_id): + scaf = src.replace("_assembly.agp", "_scaffolds.fna") + old_id = find_assembly_id(scaf) + return rewrite_id(src, dst, old_id, act_id) + + +def convert_script(script, src, dst, old_id, act_id): + cmd = ["./rewrite_bam.sh", src, dst, old_id, act_id] + results = check_output(cmd) + md5 = md5_sum(dst) + size = os.stat(dst).st_size + return md5, size + + +def assembly_coverage_bam(src, dst, omic_id, act_id): + scaf = src.replace("_pairedMapped_sorted.bam", "_scaffolds.fna") + old_id = find_assembly_id(scaf) + return convert_script("./rewrite_bam.sh", src, dst, old_id, act_id) + + +def xassembly_info_file(src, dst, omic_id, act_id): + return [] + +def readqcanalysisactivity(db, doc, new_omic_id): + new_act_id = minter("wfrqc") + ".1" + log_mapping("activity", doc["id"], new_act_id) + doc["id"] = new_act_id + doc["git_url"] = "https://github.com/microbiomedata/ReadsQC" + doc["version"] = "v1.0.8" + doc["was_informed_by"] = new_omic_id + doc["name"] = f"Read QC Activity for {new_act_id}" + new_ids, new_data_objects = copy_outputs(db, doc['has_output'], new_omic_id, new_act_id) + doc["has_output"] = new_ids + return doc, new_data_objects + + +def metagenomeassembly(db, doc, new_omic_id): + new_act_id = minter("wfmgas") + ".1" + log_mapping("activity", doc["id"], new_act_id) + doc["id"] = new_act_id + doc["git_url"] = "https://github.com/microbiomedata/metaAssembly" + doc["version"] = "v1.0.3" + doc["was_informed_by"] = new_omic_id + doc["name"] = f"Metagenome Assembly Activity for {new_act_id}" + new_ids, new_data_objects = copy_outputs(db, doc['has_output'], new_omic_id, new_act_id) + doc["has_output"] = new_ids + return doc, new_data_objects + +def readbasedanalysis(db, doc, new_omic_id): + new_act_id = minter("wfrbt") + ".1" + log_mapping("activity", doc["id"], new_act_id) + doc["id"] = new_act_id + doc["git_url"] = "https://github.com/microbiomedata/ReadbasedAnalysis" + doc["version"] = "v1.0.5" + doc["was_informed_by"] = new_omic_id + doc["name"] = f"Metagenome Assembly Activity for {new_act_id}" + new_ids, new_data_objects = copy_outputs(db, doc['has_output'], new_omic_id, new_act_id) + doc["has_output"] = new_ids + return doc, new_data_objects + +if __name__ == "__main__": + mongo_url = os.environ["MONGO_URL"] + client = MongoClient(mongo_url, directConnection=True) + db = client.nmdc + # Read mapping list + # This should have: + # was_informed_by_old\twas_informed_by_new + # e.g. + # nmdc:mga0xxxxx nmdc:omprc-11-xxxxx + omic_map = read_map() + for omic in omic_map: + process(db, omic, omic_map[omic]) + # for each omics process + # for act in [ diff --git a/nmdc_automation/re_iding/utils.py b/nmdc_automation/re_iding/utils.py new file mode 100644 index 00000000..d77efc71 --- /dev/null +++ b/nmdc_automation/re_iding/utils.py @@ -0,0 +1,31 @@ +import subprocess +import gzip + +def rewrite_bam(type, old_bam, new_bam, old_id, new_id): + + if type == "inside": + print(f"Rewriting {new_bam}") + + cmd1 = ["samtools", "view", "-h", old_bam] + cmd2 = ["sed", f"s/{old_id}/{new_id}/g"] + cmd3 = ["samtools", "view", "-hb", "-o", ] + + # Create a pipeline: cmd1 | cmd2 | cmd3 + p1 = subprocess.Popen(cmd1, stdout=subprocess.PIPE) + p2 = subprocess.Popen(cmd2, stdin=p1.stdout, stdout=subprocess.PIPE) + p1.stdout.close() + p3 = subprocess.Popen(cmd3, stdin=p2.stdout) + p2.stdout.close() + p3.communicate() + + else: + with open(new_bam, 'w') as f: + pass # touch file + + +def rewrite_sam(input_sam, output_sam, old_id, new_id): + + with gzip.open(input_sam, 'rt') as f_in, gzip.open(output_sam, 'wt') as f_out: + for line in f_in: + f_out.write(line.replace(old_id, new_id)) + From 913262d9f804f46be115bcf7dda4977395606d2c Mon Sep 17 00:00:00 2001 From: Michael Thornton Date: Thu, 26 Oct 2023 10:23:06 -0700 Subject: [PATCH 02/91] update .gitignore to exclude pycharm project files --- .gitignore | 1 + 1 file changed, 1 insertion(+) diff --git a/.gitignore b/.gitignore index 01196e5a..0f010ee0 100644 --- a/.gitignore +++ b/.gitignore @@ -5,3 +5,4 @@ test_data/afile.sha256 htmlcov/ .coverage attic +.idea/ From 04cc8eda04d219016ec297d995d840b5769eefbb Mon Sep 17 00:00:00 2001 From: Michael Thornton Date: Thu, 26 Oct 2023 11:45:03 -0700 Subject: [PATCH 03/91] initial script framework, Napa config file, and run_query method on API client --- configs/napa_config.toml | 29 ++++++++ nmdc_automation/api/nmdcapi.py | 6 ++ .../rebuild_metagenome_workflow_records.py | 69 +++++++++++++++++++ 3 files changed, 104 insertions(+) create mode 100644 configs/napa_config.toml create mode 100644 nmdc_automation/re_iding/scripts/rebuild_metagenome_workflow_records.py diff --git a/configs/napa_config.toml b/configs/napa_config.toml new file mode 100644 index 00000000..de986b9e --- /dev/null +++ b/configs/napa_config.toml @@ -0,0 +1,29 @@ +[cromwell] +cromwell_url = "http://localhost:8088/api/workflows/v1" +cromwell_api = "http://localhost:9999" + +[directories] +stage_dir = "/path/to/stage/dir" +template_dir = "/path/to/template/dir" +data_dir = "/tmp" +raw_dir = "/path/to/raw/data/files" + +[site] +resource = "Resource Name" +site = "Processing Site" + +[nmdc] +url_root = "https://data.microbiomedata.org/data/" +api_url = "https://api-napa.microbiomedata.org/" + +[state] +watch_state = "State File" +agent_state = "/tmp/agent.state" +activity_id_state = "/Path/to/activity_id_state" + +[workflows] +workflows_config = "./configs/workflows.yaml" + +[credentials] +client_id = "xxxxxx" +client_secret = "xxxxxxxx" diff --git a/nmdc_automation/api/nmdcapi.py b/nmdc_automation/api/nmdcapi.py index 1412dd02..a90a5370 100755 --- a/nmdc_automation/api/nmdcapi.py +++ b/nmdc_automation/api/nmdcapi.py @@ -304,6 +304,12 @@ def update_op(self, opid, done=None, results=None, meta=None): resp = requests.patch(url, headers=self.header, data=json.dumps(d)) return resp.json() + @refresh_token + def run_query(self, query): + url = "%squeries:run" % self._base_url + resp = requests.post(url, headers=self.header, data=json.dumps(query)) + return resp.json() + def jprint(obj): print(json.dumps(obj, indent=2)) diff --git a/nmdc_automation/re_iding/scripts/rebuild_metagenome_workflow_records.py b/nmdc_automation/re_iding/scripts/rebuild_metagenome_workflow_records.py new file mode 100644 index 00000000..09ecfee4 --- /dev/null +++ b/nmdc_automation/re_iding/scripts/rebuild_metagenome_workflow_records.py @@ -0,0 +1,69 @@ +#!/usr/bin/env python3 +# coding: utf-8 +# nmdc_schema/napa_compliance/scripts/rebuild_metagenome_workflow_records.py +""" +rebuild_metagenome_workflow_records.py: Rebuild metagenome workflow records +after re-ID-ing of OmicsProcessing records. +""" +import logging +import os +from pathlib import Path +# import requests +import time + +import click + +from nmdc_automation.config import Config +from nmdc_automation.api import NmdcRuntimeApi + +GOLD_STUDY_ID = "gold:Gs0114663" +STUDY_ID = "nmdc:sty-11-aygzgv51" +NAPA_CONFIG = Path("../../../configs/napa_config.toml") + +@click.command() +@click.option("--study_id", default=STUDY_ID, help="Updated study ID") +@click.option("--site_config", type=click.Path(exists=True), + default=NAPA_CONFIG, help="Site configuration file") +def rebuild_workflow_records(study_id: str, site_config: bool): + """ + Rebuild metagenome workflow records after re-ID-ing of Study, Biosample, and + OmicsProcessing records by: + 1. Retrieving all OmicsProcessing records for updated study ID + 2. For each OmicsProcessing record, retrieve the corresponding + WorkflowExecutionActivity records: + a. ReadQcAnalysisActivity + b. ReadBasedTaxonomyAnalysisActivity + c. MetagenomeAssembly + d. MetagenomeAnnotationActivity + e. MagsAnalysisActivity + 3. For each WorkflowExecutionActivity record: + TODO - summarize Michal's flowchart here + + """ + start_time = time.time() + logging.info("starting missing_neon_soils_ecosystem_data.py...") + logging.info(f"study_id: {study_id}") + + runtime_api_client = NmdcRuntimeApi(site_config) + + + + # 1. Retrieve all OmicsProcessing records for the given GOLD study ID + # https://api-napa.microbiomedata.org/omics_processing_sets?find=omics_processing_set&filter=part_of%3Agold:Gs0114663&per_page=99&page=1 + + params = { + "find": "omics_processing_set", + "filter": { + "part_of": { + "$elemMatch": {"$eq": study_id} + } + } + } + response = runtime_api_client.run_query(params) + print(response) + + # 2. For each OmicsProcessing record, retrieve the corresponding + + +if __name__ == "__main__": + rebuild_workflow_records() \ No newline at end of file From 1b118ba7bcee53a8e3f31cff9eadf1a2f5838cba Mon Sep 17 00:00:00 2001 From: Michal Babinski Date: Thu, 26 Oct 2023 12:05:30 -0700 Subject: [PATCH 04/91] added api_url reference --- configs/site_configuration.toml | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/configs/site_configuration.toml b/configs/site_configuration.toml index ff716dd0..866db4c9 100644 --- a/configs/site_configuration.toml +++ b/configs/site_configuration.toml @@ -14,6 +14,7 @@ site = "Processing Site" [nmdc] url_root = "https://data.microbiomedata.org/data/" +api_url = "https://api.microbiomedata.org" [state] watch_state = "State File" @@ -21,7 +22,7 @@ agent_state = "/tmp/agent.state" activity_id_state = "/Path/to/activity_id_state" [workflows] -workflows_config = "./configs/workflows.yaml" +workflows_config = "/configs/workflows.yaml" [credentials] client_id = "xxxxxx" From 4ea1cf3cf85227ef7256dac1f8236ecba9fc13d6 Mon Sep 17 00:00:00 2001 From: Michal Babinski Date: Thu, 26 Oct 2023 13:49:01 -0700 Subject: [PATCH 05/91] minimal config for reiding workflows --- configs/re_iding_worklfows.yaml | 184 ++++++++++++++++++++++++++++++++ 1 file changed, 184 insertions(+) create mode 100644 configs/re_iding_worklfows.yaml diff --git a/configs/re_iding_worklfows.yaml b/configs/re_iding_worklfows.yaml new file mode 100644 index 00000000..7fbce6ef --- /dev/null +++ b/configs/re_iding_worklfows.yaml @@ -0,0 +1,184 @@ +Workflows: + - Name: Reads QC + Type: nmdc:ReadQcAnalysisActivity + Enabled: True + Git_repo: https://github.com/microbiomedata/ReadsQC + Version: v1.0.8 + WDL: rqcfilter.wdl + Collection: read_qc_analysis_activity_set + Filter Input Objects: + - Metagenome Raw Reads + Predecessors: + - Sequencing + - Sequencing Interleaved + Input_prefix: nmdc_rqcfilter + Inputs: + input_files: do:Metagenome Raw Reads + proj: "{activity_id}" + Activity: + name: "Read QC Activity for {id}" + input_read_bases: "{outputs.stats.input_read_bases}" + input_read_count: "{outputs.stats.input_read_count}" + output_read_bases: "{outputs.stats.output_read_bases}" + output_read_count: "{outputs.stats.output_read_count}" + type: nmdc:ReadQcAnalysisActivity + Outputs: + - output: filtered_final + name: Reads QC result fastq (clean data) + suffix: "_filtered.fastq.gz" + data_object_type: Filtered Sequencing Reads + description: "Reads QC for {id}" + - output: filtered_stats_final + name: Reads QC summary statistics + suffix: "_filterStats.txt" + data_object_type: QC Statistics + description: "Reads QC summary for {id}" + - output: rqc_info + name: File containing read filtering information + suffix: "_readsQC.info" + data_object_type: Read Filtering Info File + description: "Read filtering info for {id}" + + - Name: Metagenome Assembly + Type: nmdc:MetagenomeAssembly + Enabled: True + Git_repo: https://github.com/microbiomedata/metaAssembly + Version: v1.0.3 + WDL: jgi_assembly.wdl + Collection: metagenome_assembly_set + Predecessors: + - Reads QC + - Reads QC Interleave + Input_prefix: jgi_metaASM + Inputs: + input_file: do:Filtered Sequencing Reads + rename_contig_prefix: "{activity_id}" + proj: "{activity_id}" + Activity: + name: "Metagenome Assembly Activity for {id}" + type: nmdc:MetagenomeAssembly + asm_score: "{outputs.stats.asm_score}" + contig_bp: "{outputs.stats.contig_bp}" + contigs: "{outputs.stats.contigs}" + ctg_l50: "{outputs.stats.ctg_l50}" + ctg_l90: "{outputs.stats.ctg_l90}" + ctg_logsum: "{outputs.stats.ctg_logsum}" + ctg_max: "{outputs.stats.ctg_max}" + ctg_n50: "{outputs.stats.ctg_n50}" + ctg_n90: "{outputs.stats.ctg_n90}" + ctg_powsum: "{outputs.stats.ctg_powsum}" + gap_pct: "{outputs.stats.gap_pct}" + gc_avg: "{outputs.stats.gc_avg}" + gc_std: "{outputs.stats.gc_std}" + scaf_bp: "{outputs.stats.scaf_bp}" + scaf_l50: "{outputs.stats.scaf_l50}" + scaf_l90: "{outputs.stats.scaf_l90}" + scaf_l_gt50k: "{outputs.stats.scaf_l_gt50k}" + scaf_logsum: "{outputs.stats.scaf_logsum}" + scaf_max: "{outputs.stats.scaf_max}" + scaf_n50: "{outputs.stats.scaf_n50}" + scaf_n90: "{outputs.stats.scaf_n90}" + scaf_n_gt50k: "{outputs.stats.scaf_n_gt50k}" + scaf_pct_gt50k: "{outputs.stats.scaf_pct_gt50k}" + scaf_powsum: "{outputs.stats.scaf_powsum}" + scaffolds: "{outputs.stats.scaffolds}" + Outputs: + - output: contig + name: Final assembly contigs fasta + suffix: "_contigs.fna" + data_object_type: Assembly Contigs + description: "Assembly contigs for {id}" + - output: scaffold + name: Final assembly scaffolds fasta + suffix: "_scaffolds.fna" + data_object_type: Assembly Scaffolds + description: "Assembly scaffolds for {id}" + - output: covstats + name: Assembled contigs coverage information + suffix: "_covstats.txt" + data_object_type: Assembly Coverage Stats + description: "Coverage Stats for {id}" + - output: agp + name: An AGP format file that describes the assembly + suffix: "_assembly.agp" + data_object_type: Assembly AGP + description: "AGP for {id}" + - output: bam + name: Sorted bam file of reads mapping back to the final assembly + suffix: "_pairedMapped_sorted.bam" + data_object_type: Assembly Coverage BAM + description: "Sorted Bam for {id}" + - output: asminfo + name: File containing assembly info + suffix: "_metaAsm.info" + data_object_type: Assembly Info File + description: "Assembly info for {id}" + + - Name: Readbased Analysis + Type: nmdc:ReadBasedTaxonomyAnalysisActivity + Enabled: True + Git_repo: https://github.com/microbiomedata/ReadbasedAnalysis + Version: v1.0.5 + WDL: ReadbasedAnalysis.wdl + Collection: read_based_taxonomy_analysis_activity_set + Predecessors: + - Reads QC + - Reads QC Interleave + Input_prefix: ReadbasedAnalysis + Inputs: + input_file: do:Filtered Sequencing Reads + proj: "{activity_id}" + Activity: + name: "Readbased Taxonomy Analysis Activity for {id}" + type: nmdc:ReadBasedTaxonomyAnalysisActivity + Outputs: + - output: final_gottcha2_report_tsv + data_object_type: GOTTCHA2 Classification Report + description: GOTTCHA2 Classification for {id} + name: GOTTCHA2 classification report file + suffix: _gottcha2_report.tsv + - output: final_gottcha2_full_tsv + data_object_type: GOTTCHA2 Report Full + description: GOTTCHA2 Full Report for {id} + name: GOTTCHA2 report file + suffix: _gottcha2_full_tsv + - output: final_gottcha2_krona_html + data_object_type: GOTTCHA2 Krona Plot + description: GOTTCHA2 Krona for {id} + name: GOTTCHA2 krona plot HTML file + suffix: _gottcha2_krona.html + - output: final_centrifuge_classification_tsv + data_object_type: Centrifuge Taxonomic Classification + description: Centrifuge Classification for {id} + name: Centrifuge output read classification file + suffix: _centrifuge_classification.tsv + - output: final_centrifuge_report_tsv + data_object_type: Centrifuge output report file + description: Centrifuge Report for {id} + name: Centrifuge Classification Report + suffix: _centrifuge_report.tsv + - output: final_centrifuge_krona_html + data_object_type: Centrifuge Krona Plot + description: Centrifuge Krona for {id} + name: Centrifug krona plot HTML file + suffix: _centrifuge_krona.html + - output: final_kraken2_classification_tsv + data_object_type: Kraken2 Taxonomic Classification + description: Kraken2 Classification for {id} + name: Kraken2 output read classification file + suffix: _kraken2_classification.tsv + - output: final_kraken2_report_tsv + data_object_type: Kraken2 Classification Report + description: Kraken2 Report for {id} + name: Kraken2 output report file + suffix: _kraken2_report.tsv + - output: final_kraken2_krona_html + data_object_type: Kraken2 Krona Plot + description: Kraken2 Krona for {id} + name: Kraken2 Krona plot HTML file + suffix: _kraken2_krona.html + - output: info_file + data_object_type: Read Based Analysis Info File + description: Read based analysis info for {id} + name: File containing reads based analysis information + suffix: profiler.info \ No newline at end of file From 210889068d21348b986fdc9f7c2fbe190135d18c Mon Sep 17 00:00:00 2001 From: Michal Babinski Date: Thu, 26 Oct 2023 13:53:01 -0700 Subject: [PATCH 06/91] added activity record and data object creation --- nmdc_automation/re_iding/re_id.py | 92 +++++++++++++++++++------------ 1 file changed, 56 insertions(+), 36 deletions(-) diff --git a/nmdc_automation/re_iding/re_id.py b/nmdc_automation/re_iding/re_id.py index 8538c1e4..0acf2a3b 100755 --- a/nmdc_automation/re_iding/re_id.py +++ b/nmdc_automation/re_iding/re_id.py @@ -2,12 +2,20 @@ import os import sys import re +import yaml +import datetime +import pytz from pymongo import MongoClient import hashlib import json from subprocess import check_output from nmdc_automation.api import NmdcRuntimeApi from nmdc_automation.config import Config +import nmdc_schema.nmdc as nmdc +from linkml_runtime.dumpers import json_dumper + +###GLOBAL###### +nmdc_db = nmdc.Database() base = "https://data.microbiomedata.org/data" base_dir = "/global/cfs/cdirs/m3408/results" @@ -22,6 +30,11 @@ mapping_log = open("mapping.log", "a") +def read_workflows_config(config_file): + with open(config_file, "r") as file: + workflow_data = yaml.safe_load(file) + + return workflow_data def log_mapping(idtype, old, new): """ @@ -113,6 +126,7 @@ def minter(config,shoulder): return runtime_api.minter(shoulder) + def compute_new_paths(old_url, new_base_dir, omic_id, act_id): """ Use the url to compute the new file name path and url @@ -294,42 +308,48 @@ def assembly_coverage_bam(src, dst, omic_id, act_id): def xassembly_info_file(src, dst, omic_id, act_id): return [] -def readqcanalysisactivity(db, doc, new_omic_id): - new_act_id = minter("wfrqc") + ".1" - log_mapping("activity", doc["id"], new_act_id) - doc["id"] = new_act_id - doc["git_url"] = "https://github.com/microbiomedata/ReadsQC" - doc["version"] = "v1.0.8" - doc["was_informed_by"] = new_omic_id - doc["name"] = f"Read QC Activity for {new_act_id}" - new_ids, new_data_objects = copy_outputs(db, doc['has_output'], new_omic_id, new_act_id) - doc["has_output"] = new_ids - return doc, new_data_objects - - -def metagenomeassembly(db, doc, new_omic_id): - new_act_id = minter("wfmgas") + ".1" - log_mapping("activity", doc["id"], new_act_id) - doc["id"] = new_act_id - doc["git_url"] = "https://github.com/microbiomedata/metaAssembly" - doc["version"] = "v1.0.3" - doc["was_informed_by"] = new_omic_id - doc["name"] = f"Metagenome Assembly Activity for {new_act_id}" - new_ids, new_data_objects = copy_outputs(db, doc['has_output'], new_omic_id, new_act_id) - doc["has_output"] = new_ids - return doc, new_data_objects - -def readbasedanalysis(db, doc, new_omic_id): - new_act_id = minter("wfrbt") + ".1" - log_mapping("activity", doc["id"], new_act_id) - doc["id"] = new_act_id - doc["git_url"] = "https://github.com/microbiomedata/ReadbasedAnalysis" - doc["version"] = "v1.0.5" - doc["was_informed_by"] = new_omic_id - doc["name"] = f"Metagenome Assembly Activity for {new_act_id}" - new_ids, new_data_objects = copy_outputs(db, doc['has_output'], new_omic_id, new_act_id) - doc["has_output"] = new_ids - return doc, new_data_objects +def make_activity_set(nmdc_db, omics_id, has_input, has_output,workflow_record): + database_activity_set = getattr(nmdc_db, workflow_record["Collection"]) + # Lookup the nmdc schema range class + database_activity_range = getattr(nmdc_db, workflow_record["ActivityRange"]) + # Mint an ID + new_id = minter(workflow_record["Type"]) + + activity_id = new_id + database_activity_set.append( + database_activity_range( + id=activity_id, + name=workflow_record["Activity"]["name"].replace("{id}", activity_id), + git_url=workflow_record["Git_repo"], + version=workflow_record["Version"], + part_of=[omics_id], + execution_resource="Perlmutter - Nersc", + started_at_time=datetime.datetime.now(pytz.utc).isoformat(), + has_input=has_input, + has_output=has_output, + type=workflow_record["Type"], + ended_at_time=datetime.datetime.now(pytz.utc).isoformat(), + was_informed_by=omics_id, + ) + ) + +def make_data_object(data_object_record, omics_id): + + nmdc_db.data_object_set.append( + nmdc.DataObject( + file_size_bytes=data_object_record["file_size"], + name=data_object_record["data_object_name"], + url=data_object_record["data_object_url"], + data_object_type=["data_object_type"], + type="nmdc:DataObject", + id=minter("nmdc:DataObject"), + md5_checksum=data_object_record["md5_checksum"], + description=data_object_record["description"].replace( + "{id}",omics_id + ), + ) + ) + if __name__ == "__main__": mongo_url = os.environ["MONGO_URL"] From 444ee0d76dd097c91e8bf407ca9f71cf87c4ec1b Mon Sep 17 00:00:00 2001 From: Michael Thornton Date: Thu, 26 Oct 2023 15:07:34 -0700 Subject: [PATCH 07/91] Add basic RuntimeUserApi client, and Napa specific configurations --- configs/napa_config.toml | 8 ++++- nmdc_automation/api/__init__.py | 2 +- nmdc_automation/api/nmdcapi.py | 62 +++++++++++++++++++++++++++++++- nmdc_automation/config/config.py | 11 ++++++ 4 files changed, 80 insertions(+), 3 deletions(-) diff --git a/configs/napa_config.toml b/configs/napa_config.toml index de986b9e..c969d3a3 100644 --- a/configs/napa_config.toml +++ b/configs/napa_config.toml @@ -14,7 +14,13 @@ site = "Processing Site" [nmdc] url_root = "https://data.microbiomedata.org/data/" -api_url = "https://api-napa.microbiomedata.org/" +api_url = "https://api.microbiomedata.org/" + +[napa] +base_url = "https://api-napa.microbiomedata.org/" +username = "mbthornton" +password = "H8jkYxc6rwrzA_k7g_fM" + [state] watch_state = "State File" diff --git a/nmdc_automation/api/__init__.py b/nmdc_automation/api/__init__.py index f4c70039..eff0d2e4 100644 --- a/nmdc_automation/api/__init__.py +++ b/nmdc_automation/api/__init__.py @@ -1 +1 @@ -from .nmdcapi import NmdcRuntimeApi +from .nmdcapi import NmdcRuntimeApi, NmdcRuntimeUserApi diff --git a/nmdc_automation/api/nmdcapi.py b/nmdc_automation/api/nmdcapi.py index a90a5370..416fe993 100755 --- a/nmdc_automation/api/nmdcapi.py +++ b/nmdc_automation/api/nmdcapi.py @@ -4,11 +4,12 @@ import sys import os from os.path import join, dirname +from pydantic import BaseModel import requests import hashlib import mimetypes from time import time -from datetime import datetime +from datetime import datetime, timedelta, timezone from nmdc_automation.config import Config import logging @@ -31,6 +32,10 @@ def _get_sha256(fn): f.write("\n") return sha +def expiry_dt_from_now(days=0, hours=0, minutes=0, seconds=0): + return datetime.now(timezone.utc) + timedelta(days=days, hours=hours, + minutes=minutes, + seconds=seconds) class NmdcRuntimeApi: token = None @@ -311,6 +316,61 @@ def run_query(self, query): return resp.json() +class NmdcRuntimeUserApi: + """ + Basic Runtime API Client with user/password authentication. + """ + def __init__(self, username: str, password: str, base_url: str): + self.username = username + self.password = password + self.base_url = base_url + self.headers = {} + self.token_response = None + self.refresh_token_after = None + + def ensure_token(self): + if (self.refresh_token_after is None or datetime.now(timezone.utc) > + self.refresh_token_after): + self.get_token() + def get_token(self): + token_request_body = { + "grant_type": "password", + "username": self.username, + "password": self.password, + "scope": '', + "client_id": "", + "client_secret": "", + } + headers = { + 'accept': 'application/json', + 'Content-Type': 'application/x-www-form-urlencoded', + } + rv = requests.post( + self.base_url + "token", data=token_request_body + ) + self.token_response = rv.json() + if "access_token" not in self.token_response: + raise Exception(f"Getting token failed: {self.token_response}") + + self.headers[ + "Authorization"] = f'Bearer {self.token_response["access_token"]}' + self.refresh_token_after = expiry_dt_from_now( + **self.token_response["expires"] + ) - timedelta(seconds=5) + + def request(self, method, url_path, params_or_json_data=None): + self.ensure_token() + kwargs = {"url": self.base_url + url_path, "headers": self.headers} + if isinstance(params_or_json_data, BaseModel): + params_or_json_data = params_or_json_data.dict(exclude_unset=True) + if method.upper() == "GET": + kwargs["params"] = params_or_json_data + else: + kwargs["json"] = params_or_json_data + rv = requests.request(method, **kwargs) + rv.raise_for_status() + return rv + def jprint(obj): print(json.dumps(obj, indent=2)) diff --git a/nmdc_automation/config/config.py b/nmdc_automation/config/config.py index fd812a2a..8f515bc0 100644 --- a/nmdc_automation/config/config.py +++ b/nmdc_automation/config/config.py @@ -47,6 +47,17 @@ def url_root(self): def api_url(self): return self.config_data["nmdc"]["api_url"] + @property + def napa_base_url(self): + return self.config_data["napa"]["base_url"] + @property + def napa_username(self): + return self.config_data["napa"]["username"] + + @property + def napa_password(self): + return self.config_data["napa"]["password"] + @property def watch_state(self): return self.config_data["state"]["watch_state"] From f06d7314e5e335dd89f212c0f7e97aaa7450265e Mon Sep 17 00:00:00 2001 From: Michael Thornton Date: Thu, 26 Oct 2023 15:08:19 -0700 Subject: [PATCH 08/91] Update rebuild_metagenome script - successfully calls queries:run endpoint --- .../rebuild_metagenome_workflow_records.py | 46 ++++++++++--------- 1 file changed, 24 insertions(+), 22 deletions(-) diff --git a/nmdc_automation/re_iding/scripts/rebuild_metagenome_workflow_records.py b/nmdc_automation/re_iding/scripts/rebuild_metagenome_workflow_records.py index 09ecfee4..a9300aee 100644 --- a/nmdc_automation/re_iding/scripts/rebuild_metagenome_workflow_records.py +++ b/nmdc_automation/re_iding/scripts/rebuild_metagenome_workflow_records.py @@ -6,24 +6,25 @@ after re-ID-ing of OmicsProcessing records. """ import logging -import os -from pathlib import Path -# import requests import time +from pathlib import Path import click +from nmdc_automation.api import NmdcRuntimeUserApi from nmdc_automation.config import Config -from nmdc_automation.api import NmdcRuntimeApi GOLD_STUDY_ID = "gold:Gs0114663" STUDY_ID = "nmdc:sty-11-aygzgv51" NAPA_CONFIG = Path("../../../configs/napa_config.toml") + @click.command() @click.option("--study_id", default=STUDY_ID, help="Updated study ID") -@click.option("--site_config", type=click.Path(exists=True), - default=NAPA_CONFIG, help="Site configuration file") +@click.option( + "--site_config", type=click.Path(exists=True), default=NAPA_CONFIG, + help="Site configuration file" +) def rebuild_workflow_records(study_id: str, site_config: bool): """ Rebuild metagenome workflow records after re-ID-ing of Study, Biosample, and @@ -44,26 +45,27 @@ def rebuild_workflow_records(study_id: str, site_config: bool): logging.info("starting missing_neon_soils_ecosystem_data.py...") logging.info(f"study_id: {study_id}") - runtime_api_client = NmdcRuntimeApi(site_config) - - + config = Config(site_config) + query_api_client = NmdcRuntimeUserApi( + username=config.napa_username, password=config.napa_password, + base_url=config.napa_base_url, ) # 1. Retrieve all OmicsProcessing records for the given GOLD study ID - # https://api-napa.microbiomedata.org/omics_processing_sets?find=omics_processing_set&filter=part_of%3Agold:Gs0114663&per_page=99&page=1 - - params = { - "find": "omics_processing_set", - "filter": { - "part_of": { - "$elemMatch": {"$eq": study_id} - } - } - } - response = runtime_api_client.run_query(params) - print(response) + url = "queries:run" + params = {"find": "omics_processing_set", + "filter": {"part_of": {"$elemMatch": {"$eq": study_id}}}} + response = query_api_client.request("POST", url, params_or_json_data=params) + if response.status_code != 200: + raise Exception( + f"Error retrieving OmicsProcessing records for study {study_id}" + ) + omics_processing_records = response.json()["cursor"]["firstBatch"] + logging.info( + f"Retrieved {len(omics_processing_records)} OmicsProcessing records for study {study_id}" + ) # 2. For each OmicsProcessing record, retrieve the corresponding if __name__ == "__main__": - rebuild_workflow_records() \ No newline at end of file + rebuild_workflow_records() From 4c88894170895e97547c4bdb6bf12b6bb27a8741 Mon Sep 17 00:00:00 2001 From: Michael Thornton Date: Thu, 26 Oct 2023 15:08:58 -0700 Subject: [PATCH 09/91] ignore lock files --- .gitignore | 1 + 1 file changed, 1 insertion(+) diff --git a/.gitignore b/.gitignore index 0f010ee0..8e4cb121 100644 --- a/.gitignore +++ b/.gitignore @@ -6,3 +6,4 @@ htmlcov/ .coverage attic .idea/ +*.lock From 0188da5984565ca4afa50558f56c71b385af45b0 Mon Sep 17 00:00:00 2001 From: Michael Thornton Date: Thu, 26 Oct 2023 15:21:03 -0700 Subject: [PATCH 10/91] refactor get_omics_processing_records to the API client --- nmdc_automation/api/nmdcapi.py | 15 +++++++++++++++ .../rebuild_metagenome_workflow_records.py | 15 +++------------ 2 files changed, 18 insertions(+), 12 deletions(-) diff --git a/nmdc_automation/api/nmdcapi.py b/nmdc_automation/api/nmdcapi.py index 416fe993..43864a33 100755 --- a/nmdc_automation/api/nmdcapi.py +++ b/nmdc_automation/api/nmdcapi.py @@ -371,6 +371,21 @@ def request(self, method, url_path, params_or_json_data=None): rv.raise_for_status() return rv + def get_omics_processing_records_for_nmdc_study(self, nmdc_study_id: str): + """ + Retrieve all OmicsProcessing records for the given NMDC study ID. + """ + url = "queries:run" + params = {"find": "omics_processing_set", + "filter": {"part_of": {"$elemMatch": {"$eq": nmdc_study_id}}}} + response = self.request("POST", url, params_or_json_data=params) + if response.status_code != 200: + raise Exception( + f"Error retrieving OmicsProcessing records for study {nmdc_study_id}" + ) + omics_processing_records = response.json()["cursor"]["firstBatch"] + return omics_processing_records + def jprint(obj): print(json.dumps(obj, indent=2)) diff --git a/nmdc_automation/re_iding/scripts/rebuild_metagenome_workflow_records.py b/nmdc_automation/re_iding/scripts/rebuild_metagenome_workflow_records.py index a9300aee..27ab84df 100644 --- a/nmdc_automation/re_iding/scripts/rebuild_metagenome_workflow_records.py +++ b/nmdc_automation/re_iding/scripts/rebuild_metagenome_workflow_records.py @@ -50,21 +50,12 @@ def rebuild_workflow_records(study_id: str, site_config: bool): username=config.napa_username, password=config.napa_password, base_url=config.napa_base_url, ) - # 1. Retrieve all OmicsProcessing records for the given GOLD study ID - url = "queries:run" - params = {"find": "omics_processing_set", - "filter": {"part_of": {"$elemMatch": {"$eq": study_id}}}} - response = query_api_client.request("POST", url, params_or_json_data=params) - if response.status_code != 200: - raise Exception( - f"Error retrieving OmicsProcessing records for study {study_id}" - ) - omics_processing_records = response.json()["cursor"]["firstBatch"] + # 1. Retrieve all OmicsProcessing records for the updated NMDC study ID + omics_processing_records = query_api_client.get_omics_processing_records_for_nmdc_study(study_id) logging.info( f"Retrieved {len(omics_processing_records)} OmicsProcessing records for study {study_id}" ) - - # 2. For each OmicsProcessing record, retrieve the corresponding + # 2. For each OmicsProcessing record, retrieve the informed_by records if __name__ == "__main__": From 3d2144b314b5927e2f491672dc3d27f9689f7762 Mon Sep 17 00:00:00 2001 From: Michael Thornton Date: Thu, 26 Oct 2023 15:42:52 -0700 Subject: [PATCH 11/91] update script to find legacy IDs The old IDs can be either gold_sequencing_project_identifiers or alternative_identifiers --- .../rebuild_metagenome_workflow_records.py | 37 +++++++++++++++++-- 1 file changed, 34 insertions(+), 3 deletions(-) diff --git a/nmdc_automation/re_iding/scripts/rebuild_metagenome_workflow_records.py b/nmdc_automation/re_iding/scripts/rebuild_metagenome_workflow_records.py index 27ab84df..8b1d23be 100644 --- a/nmdc_automation/re_iding/scripts/rebuild_metagenome_workflow_records.py +++ b/nmdc_automation/re_iding/scripts/rebuild_metagenome_workflow_records.py @@ -19,6 +19,31 @@ NAPA_CONFIG = Path("../../../configs/napa_config.toml") +def _get_legacy_id(omics_processing_record: dict) -> str: + """ + Get the legacy ID for the given OmicsProcessing record. + """ + legacy_id = None + legacy_ids = [] + gold_ids = omics_processing_record.get("gold_sequencing_project_identifiers", []) + legacy_ids.extend(gold_ids) + alternative_ids = omics_processing_record.get("alternative_identifiers", []) + legacy_ids.extend(alternative_ids) + if len(legacy_ids) == 0: + logging.warning( + f"No legacy IDs found for omics_processing_record: {omics_processing_record['id']}" + ) + return None + elif len(legacy_ids) > 1: + logging.warning( + f"Multiple legacy IDs found for omics_processing_record: {omics_processing_record['id']}" + ) + return None + else: + legacy_id = legacy_ids[0] + logging.info(f"legacy_id: {legacy_id}") + return legacy_id + @click.command() @click.option("--study_id", default=STUDY_ID, help="Updated study ID") @click.option( @@ -51,11 +76,17 @@ def rebuild_workflow_records(study_id: str, site_config: bool): base_url=config.napa_base_url, ) # 1. Retrieve all OmicsProcessing records for the updated NMDC study ID - omics_processing_records = query_api_client.get_omics_processing_records_for_nmdc_study(study_id) + omics_processing_records = query_api_client.get_omics_processing_records_for_nmdc_study( + study_id + ) logging.info( f"Retrieved {len(omics_processing_records)} OmicsProcessing records for study {study_id}" - ) - # 2. For each OmicsProcessing record, retrieve the informed_by records + ) + # 2. For each OmicsProcessing record, find the legacy identifier: + for omics_processing_record in omics_processing_records: + legacy_id = _get_legacy_id(omics_processing_record) + logging.info(f"legacy_id: {legacy_id}") + if __name__ == "__main__": From 5551b46e2e6e148eb596929b9b66cce6199e618e Mon Sep 17 00:00:00 2001 From: Michael Thornton Date: Thu, 26 Oct 2023 17:24:16 -0700 Subject: [PATCH 12/91] basic script framework - finds reads QC and downstream workflow activities --- nmdc_automation/api/nmdcapi.py | 17 +++++++++++ .../rebuild_metagenome_workflow_records.py | 29 ++++++++++++++++++- 2 files changed, 45 insertions(+), 1 deletion(-) diff --git a/nmdc_automation/api/nmdcapi.py b/nmdc_automation/api/nmdcapi.py index 43864a33..e8cad9f9 100755 --- a/nmdc_automation/api/nmdcapi.py +++ b/nmdc_automation/api/nmdcapi.py @@ -386,6 +386,23 @@ def get_omics_processing_records_for_nmdc_study(self, nmdc_study_id: str): omics_processing_records = response.json()["cursor"]["firstBatch"] return omics_processing_records + def get_workflow_activity_informed_by(self, workflow_activity_set: str, + informed_by_id: str): + """ + Retrieve a workflow activity record for the given workflow activity set + and informed by a given OmicsProcessing ID. + """ + url = "queries:run" + params = {"find": workflow_activity_set, + "filter": {"was_informed_by": informed_by_id}} + response = self.request("POST", url, params_or_json_data=params) + if response.status_code != 200: + raise Exception( + f"Error retrieving {workflow_activity_set} record informed by {informed_by_id}" + ) + workflow_activity_record = response.json()["cursor"]["firstBatch"] + return workflow_activity_record + def jprint(obj): print(json.dumps(obj, indent=2)) diff --git a/nmdc_automation/re_iding/scripts/rebuild_metagenome_workflow_records.py b/nmdc_automation/re_iding/scripts/rebuild_metagenome_workflow_records.py index 8b1d23be..7ff618be 100644 --- a/nmdc_automation/re_iding/scripts/rebuild_metagenome_workflow_records.py +++ b/nmdc_automation/re_iding/scripts/rebuild_metagenome_workflow_records.py @@ -41,7 +41,6 @@ def _get_legacy_id(omics_processing_record: dict) -> str: return None else: legacy_id = legacy_ids[0] - logging.info(f"legacy_id: {legacy_id}") return legacy_id @click.command() @@ -84,9 +83,37 @@ def rebuild_workflow_records(study_id: str, site_config: bool): ) # 2. For each OmicsProcessing record, find the legacy identifier: for omics_processing_record in omics_processing_records: + logging.info(f"omics_processing_record: " + f"{omics_processing_record['id']}") legacy_id = _get_legacy_id(omics_processing_record) logging.info(f"legacy_id: {legacy_id}") + # reads QC records + # Downstream WorkflowExecutionActivity records depend on the `has_output` + # data object of the ReadQcAnalysisActivity record. + set_name = "read_qc_analysis_activity_set" + read_qc_records = query_api_client.get_workflow_activity_informed_by( + set_name, legacy_id + ) + logging.info(f"Found {len(read_qc_records)} read_qc_records") + + # downstream workflow activity sets + taxonomy_records, read_based_analysis_records, metagenome_assembly_records, \ + metagenome_annotation_records, mags_records = [], [], [], [], [] + + downstream_workflow_activity_sets = { + "read_based_taxonomy_analysis_activity_set": taxonomy_records, + "read_based_analysis_activity_set": read_based_analysis_records, + "metagenome_assembly_set": metagenome_assembly_records, + "metagenome_annotation_activity_set": metagenome_annotation_records, + "mags_activity_set": mags_records, + } + + for set_name, records in downstream_workflow_activity_sets.items(): + records = query_api_client.get_workflow_activity_informed_by( + set_name, legacy_id + ) + logging.info(f"Found {len(records)} {set_name} records") if __name__ == "__main__": From f6a7908b237486f387454092ff604070f379bb8a Mon Sep 17 00:00:00 2001 From: Michal Babinski Date: Sat, 28 Oct 2023 07:01:28 -0700 Subject: [PATCH 13/91] added write out to json for study --- .../scripts/rebuild_metagenome_workflow_records.py | 10 ++++++++-- 1 file changed, 8 insertions(+), 2 deletions(-) diff --git a/nmdc_automation/re_iding/scripts/rebuild_metagenome_workflow_records.py b/nmdc_automation/re_iding/scripts/rebuild_metagenome_workflow_records.py index 7ff618be..ab054665 100644 --- a/nmdc_automation/re_iding/scripts/rebuild_metagenome_workflow_records.py +++ b/nmdc_automation/re_iding/scripts/rebuild_metagenome_workflow_records.py @@ -8,7 +8,7 @@ import logging import time from pathlib import Path - +import json import click from nmdc_automation.api import NmdcRuntimeUserApi @@ -73,7 +73,7 @@ def rebuild_workflow_records(study_id: str, site_config: bool): query_api_client = NmdcRuntimeUserApi( username=config.napa_username, password=config.napa_password, base_url=config.napa_base_url, ) - + # 1. Retrieve all OmicsProcessing records for the updated NMDC study ID omics_processing_records = query_api_client.get_omics_processing_records_for_nmdc_study( study_id @@ -81,6 +81,8 @@ def rebuild_workflow_records(study_id: str, site_config: bool): logging.info( f"Retrieved {len(omics_processing_records)} OmicsProcessing records for study {study_id}" ) + + workflow_records_per_study = [] # 2. For each OmicsProcessing record, find the legacy identifier: for omics_processing_record in omics_processing_records: logging.info(f"omics_processing_record: " @@ -113,7 +115,11 @@ def rebuild_workflow_records(study_id: str, site_config: bool): records = query_api_client.get_workflow_activity_informed_by( set_name, legacy_id ) + workflow_records_per_study.append(records) logging.info(f"Found {len(records)} {set_name} records") + + with open(f"{study_id}_assocated_record_dump.json", 'w') as json_file: + json.dump(workflow_records_per_study, json_file, indent=4) if __name__ == "__main__": From e9e7267e06ac859409df1a0a870c835bec973ec9 Mon Sep 17 00:00:00 2001 From: Michal Babinski Date: Sat, 28 Oct 2023 07:42:22 -0700 Subject: [PATCH 14/91] rename file, add database object post --- .../{utils.py => re_id_file_operations.py} | 0 .../re_iding/{re_id.py => re_id_process.py} | 114 +++--------------- 2 files changed, 19 insertions(+), 95 deletions(-) rename nmdc_automation/re_iding/{utils.py => re_id_file_operations.py} (100%) rename nmdc_automation/re_iding/{re_id.py => re_id_process.py} (75%) diff --git a/nmdc_automation/re_iding/utils.py b/nmdc_automation/re_iding/re_id_file_operations.py similarity index 100% rename from nmdc_automation/re_iding/utils.py rename to nmdc_automation/re_iding/re_id_file_operations.py diff --git a/nmdc_automation/re_iding/re_id.py b/nmdc_automation/re_iding/re_id_process.py similarity index 75% rename from nmdc_automation/re_iding/re_id.py rename to nmdc_automation/re_iding/re_id_process.py index 0acf2a3b..ec975584 100755 --- a/nmdc_automation/re_iding/re_id.py +++ b/nmdc_automation/re_iding/re_id_process.py @@ -6,17 +6,15 @@ import datetime import pytz from pymongo import MongoClient -import hashlib import json -from subprocess import check_output from nmdc_automation.api import NmdcRuntimeApi -from nmdc_automation.config import Config +from re_id_file_operations import * import nmdc_schema.nmdc as nmdc from linkml_runtime.dumpers import json_dumper ###GLOBAL###### nmdc_db = nmdc.Database() - +runtime_api = NmdcRuntimeApi("../../configs/site_configuration.toml") base = "https://data.microbiomedata.org/data" base_dir = "/global/cfs/cdirs/m3408/results" @@ -139,23 +137,6 @@ def compute_new_paths(old_url, new_base_dir, omic_id, act_id): return new_url, destination, new_file_name -def md5_sum(fn): - """ - Calculate the MD5 hash of a file. - - Args: - - fn (str): Path to the file for which the MD5 hash is to be computed. - - Returns: - - str: The MD5 hash of the file. - """ - with open(fn, "rb") as f: - file_hash = hashlib.md5() - while chunk := f.read(8192): - file_hash.update(chunk) - return file_hash.hexdigest() - - def find_type(obj): """ Determine the data type of an object based on its URL extension. @@ -234,80 +215,6 @@ def copy_outputs(db, outputs, omic_id, act_id): return new_ids, new_data_objects -def rewrite_id(src, dst, old_id, new_id, prefix=None): - """ - Rewrite lines in a file, replacing occurrences of an old ID with a new ID. - An optional prefix can be specified to limit which lines are modified. - - Args: - - src (str): Source file path. - - dst (str): Destination file path. - - old_id (str): ID to be replaced. - - new_id (str): Replacement ID. - - prefix (str, optional): Prefix character to determine which lines to modify. Defaults to None. - - Returns: - - tuple: MD5 checksum and size (in bytes) of the modified file. - """ - fsrc = open(src) - fdst = open(dst, "w") - for line in fsrc: - if not prefix or (prefix and line[0] == prefix): - line = line.replace(old_id, new_id) - fdst.write(line) - fsrc.close() - fdst.close() - md5 = md5_sum(dst) - size = os.stat(dst).st_size - return md5, size - - -def find_assembly_id(src): - fsrc = open(src) - line = fsrc.readline() - return "_".join(line[1:].split("_")[0:-1]) - - -def assembly_contigs(src, dst, omic_id, act_id): - scaf = src.replace("_contigs", "_scaffolds") - old_id = find_assembly_id(scaf) - return rewrite_id(src, dst, old_id, act_id, prefix=">") - - -def assembly_scaffolds(src, dst, omic_id, act_id): - old_id = find_assembly_id(src) - return rewrite_id(src, dst, old_id, act_id, prefix=">") - - -def assembly_coverage_stats(src, dst, omic_id, act_id): - scaf = src.replace("_covstats.txt", "_scaffolds.fna") - old_id = find_assembly_id(scaf) - return rewrite_id(src, dst, old_id, act_id) - - -def assembly_agp(src, dst, omic_id, act_id): - scaf = src.replace("_assembly.agp", "_scaffolds.fna") - old_id = find_assembly_id(scaf) - return rewrite_id(src, dst, old_id, act_id) - - -def convert_script(script, src, dst, old_id, act_id): - cmd = ["./rewrite_bam.sh", src, dst, old_id, act_id] - results = check_output(cmd) - md5 = md5_sum(dst) - size = os.stat(dst).st_size - return md5, size - - -def assembly_coverage_bam(src, dst, omic_id, act_id): - scaf = src.replace("_pairedMapped_sorted.bam", "_scaffolds.fna") - old_id = find_assembly_id(scaf) - return convert_script("./rewrite_bam.sh", src, dst, old_id, act_id) - - -def xassembly_info_file(src, dst, omic_id, act_id): - return [] - def make_activity_set(nmdc_db, omics_id, has_input, has_output,workflow_record): database_activity_set = getattr(nmdc_db, workflow_record["Collection"]) # Lookup the nmdc schema range class @@ -350,6 +257,23 @@ def make_data_object(data_object_record, omics_id): ) ) +def post_database_object_to_runtime(datase_object): + + nmdc_database_object = json.loads( + json_dumper.dumps(datase_object, inject_type=False) + ) + res = runtime_api.post_objects(nmdc_database_object) + return res + +def main(): + #TODO + #1. Read in json dump of analysis records + #2. Process records for reads qc - generate new metadata, make new records and data objects (this will include file copies and renaming) + #3. save data of updated reads qc records + #4. Fetch old records for readbased analysis and assembly, generate new metadata, make new records and data objects (this will include file copies and renaming files and ids in files) + #5. Validate new records, submit them via runtime api + #6. Write seperate process to delete old records once we have + pass if __name__ == "__main__": mongo_url = os.environ["MONGO_URL"] From 219b007130c8ddb8fb15d0f32e7413f23b25a3e8 Mon Sep 17 00:00:00 2001 From: Michal Babinski Date: Sat, 28 Oct 2023 07:42:49 -0700 Subject: [PATCH 15/91] rename file, move all file operation here --- .../re_iding/re_id_file_operations.py | 93 +++++++++++++++++++ 1 file changed, 93 insertions(+) diff --git a/nmdc_automation/re_iding/re_id_file_operations.py b/nmdc_automation/re_iding/re_id_file_operations.py index d77efc71..cf1d982d 100644 --- a/nmdc_automation/re_iding/re_id_file_operations.py +++ b/nmdc_automation/re_iding/re_id_file_operations.py @@ -1,5 +1,98 @@ import subprocess import gzip +import os +import hashlib +from subprocess import check_output + +def md5_sum(fn): + """ + Calculate the MD5 hash of a file. + + Args: + - fn (str): Path to the file for which the MD5 hash is to be computed. + + Returns: + - str: The MD5 hash of the file. + """ + with open(fn, "rb") as f: + file_hash = hashlib.md5() + while chunk := f.read(8192): + file_hash.update(chunk) + return file_hash.hexdigest() + +def rewrite_id(src, dst, old_id, new_id, prefix=None): + """ + Rewrite lines in a file, replacing occurrences of an old ID with a new ID. + An optional prefix can be specified to limit which lines are modified. + + Args: + - src (str): Source file path. + - dst (str): Destination file path. + - old_id (str): ID to be replaced. + - new_id (str): Replacement ID. + - prefix (str, optional): Prefix character to determine which lines to modify. Defaults to None. + + Returns: + - tuple: MD5 checksum and size (in bytes) of the modified file. + """ + fsrc = open(src) + fdst = open(dst, "w") + for line in fsrc: + if not prefix or (prefix and line[0] == prefix): + line = line.replace(old_id, new_id) + fdst.write(line) + fsrc.close() + fdst.close() + md5 = md5_sum(dst) + size = os.stat(dst).st_size + return md5, size + + +def find_assembly_id(src): + fsrc = open(src) + line = fsrc.readline() + return "_".join(line[1:].split("_")[0:-1]) + + +def assembly_contigs(src, dst, omic_id, act_id): + scaf = src.replace("_contigs", "_scaffolds") + old_id = find_assembly_id(scaf) + return rewrite_id(src, dst, old_id, act_id, prefix=">") + + +def assembly_scaffolds(src, dst, omic_id, act_id): + old_id = find_assembly_id(src) + return rewrite_id(src, dst, old_id, act_id, prefix=">") + + +def assembly_coverage_stats(src, dst, omic_id, act_id): + scaf = src.replace("_covstats.txt", "_scaffolds.fna") + old_id = find_assembly_id(scaf) + return rewrite_id(src, dst, old_id, act_id) + + +def assembly_agp(src, dst, omic_id, act_id): + scaf = src.replace("_assembly.agp", "_scaffolds.fna") + old_id = find_assembly_id(scaf) + return rewrite_id(src, dst, old_id, act_id) + + +def convert_script(script, src, dst, old_id, act_id): + cmd = ["./rewrite_bam.sh", src, dst, old_id, act_id] + results = check_output(cmd) + md5 = md5_sum(dst) + size = os.stat(dst).st_size + return md5, size + + +def assembly_coverage_bam(src, dst, omic_id, act_id): + scaf = src.replace("_pairedMapped_sorted.bam", "_scaffolds.fna") + old_id = find_assembly_id(scaf) + return convert_script("./rewrite_bam.sh", src, dst, old_id, act_id) + + +def xassembly_info_file(src, dst, omic_id, act_id): + return [] def rewrite_bam(type, old_bam, new_bam, old_id, new_id): From 0481caaa1b15e6ddeb99c82c96d296e9cc299722 Mon Sep 17 00:00:00 2001 From: Michael Thornton Date: Tue, 31 Oct 2023 12:49:46 -0700 Subject: [PATCH 16/91] fixes for #23 Add read_qc_analysis_activity_set --- nmdc_automation/api/nmdcapi.py | 13 + ...sty-11-aygzgv51_assocated_record_dump.json | 34722 ++++++++++++++++ .../rebuild_metagenome_workflow_records.py | 33 +- 3 files changed, 34767 insertions(+), 1 deletion(-) create mode 100644 nmdc_automation/re_iding/scripts/nmdc:sty-11-aygzgv51_assocated_record_dump.json diff --git a/nmdc_automation/api/nmdcapi.py b/nmdc_automation/api/nmdcapi.py index e8cad9f9..2c86bcd7 100755 --- a/nmdc_automation/api/nmdcapi.py +++ b/nmdc_automation/api/nmdcapi.py @@ -403,6 +403,19 @@ def get_workflow_activity_informed_by(self, workflow_activity_set: str, workflow_activity_record = response.json()["cursor"]["firstBatch"] return workflow_activity_record + def get_data_object_by_id(self, data_object_id: str): + """ + Retrieve a data object record for the given data object ID. + """ + url = f"data_objects/{data_object_id}" + response = self.request("GET", url) + if response.status_code != 200: + raise Exception( + f"Error retrieving data object record for {data_object_id}" + ) + data_object_record = response.json() + return data_object_record + def jprint(obj): print(json.dumps(obj, indent=2)) diff --git a/nmdc_automation/re_iding/scripts/nmdc:sty-11-aygzgv51_assocated_record_dump.json b/nmdc_automation/re_iding/scripts/nmdc:sty-11-aygzgv51_assocated_record_dump.json new file mode 100644 index 00000000..a7146766 --- /dev/null +++ b/nmdc_automation/re_iding/scripts/nmdc:sty-11-aygzgv51_assocated_record_dump.json @@ -0,0 +1,34722 @@ +[ + { + "_id": { + "$oid": "649b009773e8249959349b33" + }, + "id": "nmdc:omprc-11-bn8jcq58", + "name": "Sand microcosm microbial communities from a hyporheic zone in Columbia River, Washington, USA - GW-RW T2_23-Sept-14", + "description": "Sterilized sand packs were incubated back in the ground and collected at time point T2.", + "has_input": [ + "nmdc:bsm-11-qq8s6x03" + ], + "has_output": [ + "jgi:55d740280d8785342fcf7e39" + ], + "part_of": [ + "nmdc:sty-11-aygzgv51" + ], + "add_date": "2015-05-28", + "mod_date": "2021-06-15", + "ncbi_project_name": "Sand microcosm microbial communities from a hyporheic zone in Columbia River, Washington, USA - GW-RW T2_23-Sept-14", + "omics_type": { + "has_raw_value": "Metagenome" + }, + "principal_investigator": { + "has_raw_value": "James Stegen" + }, + "processing_institution": "JGI", + "type": "nmdc:OmicsProcessing", + "gold_sequencing_project_identifiers": [ + "gold:Gp0115663" + ], + "downstream_workflow_activity_records": [ + { + "_id": { + "$oid": "649b009d6bdd4fd20273c88b" + }, + "has_input": [ + "nmdc:30a06664f29cffbbbc49abad86eae6fc" + ], + "part_of": [ + "nmdc:mga0h9dt75" + ], + "git_url": "https://github.com/microbiomedata/metaG/releases/tag/0.1", + "has_output": [ + "nmdc:7bf778baef033d36f118f8591256d6ef", + "nmdc:b99ce8adc125c95f0bfdadf36a3f6848" + ], + "was_informed_by": "gold:Gp0115663", + "input_read_count": 32238374, + "output_read_bases": 4608772924, + "id": "nmdc:b31abf9d7fe53e2f802bb53e2d13542b", + "execution_resource": "NERSC-Cori", + "input_read_bases": 4867994474, + "name": "Read QC Activity for nmdc:mga0h9dt75", + "output_read_count": 30774080, + "started_at_time": "2021-10-11T02:28:26Z", + "type": "nmdc:ReadQCAnalysisActivity", + "ended_at_time": "2021-10-11T04:56:04+00:00", + "output_data_objects": [ + { + "name": "Gp0115663_Filtered Reads", + "description": "Filtered Reads for Gp0115663", + "data_object_type": "Filtered Sequencing Reads", + "url": "https://data.microbiomedata.org/data/nmdc:mga0h9dt75/qa/nmdc_mga0h9dt75_filtered.fastq.gz", + "md5_checksum": "7bf778baef033d36f118f8591256d6ef", + "id": "nmdc:7bf778baef033d36f118f8591256d6ef", + "file_size_bytes": 2571324879 + }, + { + "name": "Gp0115663_Filtered Stats", + "description": "Filtered Stats for Gp0115663", + "data_object_type": "QC Statistics", + "url": "https://data.microbiomedata.org/data/nmdc:mga0h9dt75/qa/nmdc_mga0h9dt75_filterStats.txt", + "md5_checksum": "b99ce8adc125c95f0bfdadf36a3f6848", + "id": "nmdc:b99ce8adc125c95f0bfdadf36a3f6848", + "file_size_bytes": 290 + } + ] + }, + { + "_id": { + "$oid": "649b009bff710ae353f8cf4f" + }, + "has_input": [ + "nmdc:7bf778baef033d36f118f8591256d6ef" + ], + "git_url": "https://github.com/microbiomedata/metaG/releases/tag/0.1", + "has_output": [ + "nmdc:bc7c1bda004aab357c8f6cf5a42242f9", + "nmdc:9481434cadd0d6c154e2ec4c11ef0e04", + "nmdc:6b5bc6ce7f11c1336a5f85a98fc18541", + "nmdc:933c71bbc2f4a2e84d50f0d3864cf940", + "nmdc:1a208e2519770ef50740ac39f1b9ba9a", + "nmdc:f112a3840464ae7a9cf4a3bf295edd5c", + "nmdc:7ca01ea379f0baed96f87d1435925f95", + "nmdc:c85f2f2b4a518c4adb23970448a5cb45", + "nmdc:94ee1bc2dc74830a21d5c3471d6cf223" + ], + "was_informed_by": "gold:Gp0115663", + "id": "nmdc:b31abf9d7fe53e2f802bb53e2d13542b", + "execution_resource": "NERSC-Cori", + "name": "ReadBased Analysis Activity for nmdc:mga0h9dt75", + "started_at_time": "2021-10-11T02:28:26Z", + "type": "nmdc:ReadbasedAnalysis", + "ended_at_time": "2021-10-11T04:56:04+00:00", + "output_data_objects": [ + { + "name": "Gp0115663_Gottcha2 TSV report", + "description": "Gottcha2 TSV report for Gp0115663", + "url": "https://data.microbiomedata.org/data/nmdc:mga0h9dt75/ReadbasedAnalysis/nmdc_mga0h9dt75_gottcha2_report.tsv", + "md5_checksum": "bc7c1bda004aab357c8f6cf5a42242f9", + "id": "nmdc:bc7c1bda004aab357c8f6cf5a42242f9", + "file_size_bytes": 13174 + }, + { + "name": "Gp0115663_Gottcha2 full TSV report", + "description": "Gottcha2 full TSV report for Gp0115663", + "url": "https://data.microbiomedata.org/data/nmdc:mga0h9dt75/ReadbasedAnalysis/nmdc_mga0h9dt75_gottcha2_report_full.tsv", + "md5_checksum": "9481434cadd0d6c154e2ec4c11ef0e04", + "id": "nmdc:9481434cadd0d6c154e2ec4c11ef0e04", + "file_size_bytes": 1035818 + }, + { + "name": "Gp0115663_Gottcha2 Krona HTML report", + "description": "Gottcha2 Krona HTML report for Gp0115663", + "data_object_type": "GOTTCHA2 Krona Plot", + "url": "https://data.microbiomedata.org/data/nmdc:mga0h9dt75/ReadbasedAnalysis/nmdc_mga0h9dt75_gottcha2_krona.html", + "md5_checksum": "6b5bc6ce7f11c1336a5f85a98fc18541", + "id": "nmdc:6b5bc6ce7f11c1336a5f85a98fc18541", + "file_size_bytes": 262669 + }, + { + "name": "Gp0115663_Centrifuge classification TSV report", + "description": "Centrifuge classification TSV report for Gp0115663", + "data_object_type": "Centrifuge Taxonomic Classification", + "url": "https://data.microbiomedata.org/data/nmdc:mga0h9dt75/ReadbasedAnalysis/nmdc_mga0h9dt75_centrifuge_classification.tsv", + "md5_checksum": "933c71bbc2f4a2e84d50f0d3864cf940", + "id": "nmdc:933c71bbc2f4a2e84d50f0d3864cf940", + "file_size_bytes": 2189843623 + }, + { + "name": "Gp0115663_Centrifuge TSV report", + "description": "Centrifuge TSV report for Gp0115663", + "data_object_type": "Centrifuge Classification Report", + "url": "https://data.microbiomedata.org/data/nmdc:mga0h9dt75/ReadbasedAnalysis/nmdc_mga0h9dt75_centrifuge_report.tsv", + "md5_checksum": "1a208e2519770ef50740ac39f1b9ba9a", + "id": "nmdc:1a208e2519770ef50740ac39f1b9ba9a", + "file_size_bytes": 260134 + }, + { + "name": "Gp0115663_Centrifuge Krona HTML report", + "description": "Centrifuge Krona HTML report for Gp0115663", + "data_object_type": "Centrifuge Krona Plot", + "url": "https://data.microbiomedata.org/data/nmdc:mga0h9dt75/ReadbasedAnalysis/nmdc_mga0h9dt75_centrifuge_krona.html", + "md5_checksum": "f112a3840464ae7a9cf4a3bf295edd5c", + "id": "nmdc:f112a3840464ae7a9cf4a3bf295edd5c", + "file_size_bytes": 2343980 + }, + { + "name": "Gp0115663_Kraken classification TSV report", + "description": "Kraken classification TSV report for Gp0115663", + "data_object_type": "Kraken2 Taxonomic Classification", + "url": "https://data.microbiomedata.org/data/nmdc:mga0h9dt75/ReadbasedAnalysis/nmdc_mga0h9dt75_kraken2_classification.tsv", + "md5_checksum": "7ca01ea379f0baed96f87d1435925f95", + "id": "nmdc:7ca01ea379f0baed96f87d1435925f95", + "file_size_bytes": 1785563917 + }, + { + "name": "Gp0115663_Kraken2 TSV report", + "description": "Kraken2 TSV report for Gp0115663", + "data_object_type": "Kraken2 Classification Report", + "url": "https://data.microbiomedata.org/data/nmdc:mga0h9dt75/ReadbasedAnalysis/nmdc_mga0h9dt75_kraken2_report.tsv", + "md5_checksum": "c85f2f2b4a518c4adb23970448a5cb45", + "id": "nmdc:c85f2f2b4a518c4adb23970448a5cb45", + "file_size_bytes": 699896 + }, + { + "name": "Gp0115663_Kraken2 Krona HTML report", + "description": "Kraken2 Krona HTML report for Gp0115663", + "data_object_type": "Kraken2 Krona Plot", + "url": "https://data.microbiomedata.org/data/nmdc:mga0h9dt75/ReadbasedAnalysis/nmdc_mga0h9dt75_kraken2_krona.html", + "md5_checksum": "94ee1bc2dc74830a21d5c3471d6cf223", + "id": "nmdc:94ee1bc2dc74830a21d5c3471d6cf223", + "file_size_bytes": 4221977 + } + ] + }, + { + "_id": { + "$oid": "61e71a31833bcf838a701ec1" + }, + "has_input": [ + "nmdc:7bf778baef033d36f118f8591256d6ef" + ], + "part_of": [ + "nmdc:mga0h9dt75" + ], + "git_url": "https://github.com/microbiomedata/metaG/releases/tag/0.1", + "has_output": [ + "nmdc:bc7c1bda004aab357c8f6cf5a42242f9", + "nmdc:9481434cadd0d6c154e2ec4c11ef0e04", + "nmdc:6b5bc6ce7f11c1336a5f85a98fc18541", + "nmdc:933c71bbc2f4a2e84d50f0d3864cf940", + "nmdc:1a208e2519770ef50740ac39f1b9ba9a", + "nmdc:f112a3840464ae7a9cf4a3bf295edd5c", + "nmdc:7ca01ea379f0baed96f87d1435925f95", + "nmdc:c85f2f2b4a518c4adb23970448a5cb45", + "nmdc:94ee1bc2dc74830a21d5c3471d6cf223" + ], + "was_informed_by": "gold:Gp0115663", + "id": "nmdc:b31abf9d7fe53e2f802bb53e2d13542b", + "execution_resource": "NERSC-Cori", + "name": "ReadBased Analysis Activity for nmdc:mga0h9dt75", + "started_at_time": "2021-10-11T02:28:26Z", + "type": "nmdc:ReadbasedAnalysis", + "ended_at_time": "2021-10-11T04:56:04+00:00", + "output_data_objects": [ + { + "name": "Gp0115663_Gottcha2 TSV report", + "description": "Gottcha2 TSV report for Gp0115663", + "url": "https://data.microbiomedata.org/data/nmdc:mga0h9dt75/ReadbasedAnalysis/nmdc_mga0h9dt75_gottcha2_report.tsv", + "md5_checksum": "bc7c1bda004aab357c8f6cf5a42242f9", + "id": "nmdc:bc7c1bda004aab357c8f6cf5a42242f9", + "file_size_bytes": 13174 + }, + { + "name": "Gp0115663_Gottcha2 full TSV report", + "description": "Gottcha2 full TSV report for Gp0115663", + "url": "https://data.microbiomedata.org/data/nmdc:mga0h9dt75/ReadbasedAnalysis/nmdc_mga0h9dt75_gottcha2_report_full.tsv", + "md5_checksum": "9481434cadd0d6c154e2ec4c11ef0e04", + "id": "nmdc:9481434cadd0d6c154e2ec4c11ef0e04", + "file_size_bytes": 1035818 + }, + { + "name": "Gp0115663_Gottcha2 Krona HTML report", + "description": "Gottcha2 Krona HTML report for Gp0115663", + "data_object_type": "GOTTCHA2 Krona Plot", + "url": "https://data.microbiomedata.org/data/nmdc:mga0h9dt75/ReadbasedAnalysis/nmdc_mga0h9dt75_gottcha2_krona.html", + "md5_checksum": "6b5bc6ce7f11c1336a5f85a98fc18541", + "id": "nmdc:6b5bc6ce7f11c1336a5f85a98fc18541", + "file_size_bytes": 262669 + }, + { + "name": "Gp0115663_Centrifuge classification TSV report", + "description": "Centrifuge classification TSV report for Gp0115663", + "data_object_type": "Centrifuge Taxonomic Classification", + "url": "https://data.microbiomedata.org/data/nmdc:mga0h9dt75/ReadbasedAnalysis/nmdc_mga0h9dt75_centrifuge_classification.tsv", + "md5_checksum": "933c71bbc2f4a2e84d50f0d3864cf940", + "id": "nmdc:933c71bbc2f4a2e84d50f0d3864cf940", + "file_size_bytes": 2189843623 + }, + { + "name": "Gp0115663_Centrifuge TSV report", + "description": "Centrifuge TSV report for Gp0115663", + "data_object_type": "Centrifuge Classification Report", + "url": "https://data.microbiomedata.org/data/nmdc:mga0h9dt75/ReadbasedAnalysis/nmdc_mga0h9dt75_centrifuge_report.tsv", + "md5_checksum": "1a208e2519770ef50740ac39f1b9ba9a", + "id": "nmdc:1a208e2519770ef50740ac39f1b9ba9a", + "file_size_bytes": 260134 + }, + { + "name": "Gp0115663_Centrifuge Krona HTML report", + "description": "Centrifuge Krona HTML report for Gp0115663", + "data_object_type": "Centrifuge Krona Plot", + "url": "https://data.microbiomedata.org/data/nmdc:mga0h9dt75/ReadbasedAnalysis/nmdc_mga0h9dt75_centrifuge_krona.html", + "md5_checksum": "f112a3840464ae7a9cf4a3bf295edd5c", + "id": "nmdc:f112a3840464ae7a9cf4a3bf295edd5c", + "file_size_bytes": 2343980 + }, + { + "name": "Gp0115663_Kraken classification TSV report", + "description": "Kraken classification TSV report for Gp0115663", + "data_object_type": "Kraken2 Taxonomic Classification", + "url": "https://data.microbiomedata.org/data/nmdc:mga0h9dt75/ReadbasedAnalysis/nmdc_mga0h9dt75_kraken2_classification.tsv", + "md5_checksum": "7ca01ea379f0baed96f87d1435925f95", + "id": "nmdc:7ca01ea379f0baed96f87d1435925f95", + "file_size_bytes": 1785563917 + }, + { + "name": "Gp0115663_Kraken2 TSV report", + "description": "Kraken2 TSV report for Gp0115663", + "data_object_type": "Kraken2 Classification Report", + "url": "https://data.microbiomedata.org/data/nmdc:mga0h9dt75/ReadbasedAnalysis/nmdc_mga0h9dt75_kraken2_report.tsv", + "md5_checksum": "c85f2f2b4a518c4adb23970448a5cb45", + "id": "nmdc:c85f2f2b4a518c4adb23970448a5cb45", + "file_size_bytes": 699896 + }, + { + "name": "Gp0115663_Kraken2 Krona HTML report", + "description": "Kraken2 Krona HTML report for Gp0115663", + "data_object_type": "Kraken2 Krona Plot", + "url": "https://data.microbiomedata.org/data/nmdc:mga0h9dt75/ReadbasedAnalysis/nmdc_mga0h9dt75_kraken2_krona.html", + "md5_checksum": "94ee1bc2dc74830a21d5c3471d6cf223", + "id": "nmdc:94ee1bc2dc74830a21d5c3471d6cf223", + "file_size_bytes": 4221977 + } + ] + }, + { + "_id": { + "$oid": "649b005f2ca5ee4adb139fb9" + }, + "has_input": [ + "nmdc:7bf778baef033d36f118f8591256d6ef" + ], + "part_of": [ + "nmdc:mga0h9dt75" + ], + "ctg_logsum": 214373, + "scaf_logsum": 215363, + "gap_pct": 0.00188, + "git_url": "https://github.com/microbiomedata/metaG/releases/tag/0.1", + "has_output": [ + "nmdc:deddd162bf0128fba13b3bc1ca38d1aa", + "nmdc:b3573e3cda5a06611de71ca04c5c14cc", + "nmdc:c6d0d4cea985ca6fb50a060e15b4a856", + "nmdc:f450e3800e17691d5874c89fc46c186a", + "nmdc:31dc958d116d02122509e90b0883954f" + ], + "asm_score": 6.577, + "was_informed_by": "gold:Gp0115663", + "ctg_powsum": 24284, + "scaf_max": 68135, + "id": "nmdc:b31abf9d7fe53e2f802bb53e2d13542b", + "scaf_powsum": 24422, + "execution_resource": "NERSC-Cori", + "contigs": 169784, + "name": "Assembly Activity for nmdc:mga0h9dt75", + "ctg_max": 68135, + "gc_std": 0.11726, + "contig_bp": 83494920, + "gc_avg": 0.46001, + "started_at_time": "2021-10-11T02:28:26Z", + "scaf_bp": 83496490, + "type": "nmdc:MetagenomeAssembly", + "scaffolds": 169645, + "ended_at_time": "2021-10-11T04:56:04+00:00", + "ctg_l50": 470, + "ctg_l90": 290, + "ctg_n50": 45584, + "ctg_n90": 141996, + "scaf_l50": 470, + "scaf_l90": 290, + "scaf_n50": 45550, + "scaf_n90": 141870, + "scaf_l_gt50k": 68135, + "scaf_n_gt50k": 1, + "scaf_pct_gt50k": 0.08160224, + "output_data_objects": [ + { + "name": "Gp0115663_Assembled contigs fasta", + "description": "Assembled contigs fasta for Gp0115663", + "data_object_type": "Assembly Contigs", + "url": "https://data.microbiomedata.org/data/nmdc:mga0h9dt75/assembly/nmdc_mga0h9dt75_contigs.fna", + "md5_checksum": "deddd162bf0128fba13b3bc1ca38d1aa", + "id": "nmdc:deddd162bf0128fba13b3bc1ca38d1aa", + "file_size_bytes": 90115831 + }, + { + "name": "Gp0115663_Assembled scaffold fasta", + "description": "Assembled scaffold fasta for Gp0115663", + "data_object_type": "Assembly Scaffolds", + "url": "https://data.microbiomedata.org/data/nmdc:mga0h9dt75/assembly/nmdc_mga0h9dt75_scaffolds.fna", + "md5_checksum": "b3573e3cda5a06611de71ca04c5c14cc", + "id": "nmdc:b3573e3cda5a06611de71ca04c5c14cc", + "file_size_bytes": 89604715 + }, + { + "name": "Gp0115663_Metagenome Contig Coverage Stats", + "description": "Metagenome Contig Coverage Stats for Gp0115663", + "url": "https://data.microbiomedata.org/data/nmdc:mga0h9dt75/assembly/nmdc_mga0h9dt75_covstats.txt", + "md5_checksum": "c6d0d4cea985ca6fb50a060e15b4a856", + "id": "nmdc:c6d0d4cea985ca6fb50a060e15b4a856", + "file_size_bytes": 13412363 + }, + { + "name": "Gp0115663_Assembled AGP file", + "description": "Assembled AGP file for Gp0115663", + "data_object_type": "Assembly AGP", + "url": "https://data.microbiomedata.org/data/nmdc:mga0h9dt75/assembly/nmdc_mga0h9dt75_assembly.agp", + "md5_checksum": "f450e3800e17691d5874c89fc46c186a", + "id": "nmdc:f450e3800e17691d5874c89fc46c186a", + "file_size_bytes": 12542171 + }, + { + "name": "Gp0115663_Metagenome Alignment BAM file", + "description": "Metagenome Alignment BAM file for Gp0115663", + "data_object_type": "Assembly Coverage BAM", + "url": "https://data.microbiomedata.org/data/nmdc:mga0h9dt75/assembly/nmdc_mga0h9dt75_pairedMapped_sorted.bam", + "md5_checksum": "31dc958d116d02122509e90b0883954f", + "id": "nmdc:31dc958d116d02122509e90b0883954f", + "file_size_bytes": 2773429299 + } + ] + }, + { + "_id": { + "$oid": "649b005bbf2caae0415ef9d6" + }, + "has_input": [ + "nmdc:deddd162bf0128fba13b3bc1ca38d1aa" + ], + "part_of": [ + "nmdc:mga0h9dt75" + ], + "git_url": "https://github.com/microbiomedata/metaG/releases/tag/0.1", + "has_output": [ + "nmdc:879988d212ecec46928b8598e2f8391f", + "nmdc:884b95102f5965cc0ee2d9b7f198e5a4", + "nmdc:002e4ebc728f8b91cb5f298d340ab013", + "nmdc:6851078f29716d89e3f41f0969ae7bf0", + "nmdc:4f88c89459f36655eb7c1eceec19602a", + "nmdc:a068b9ce6ebb7deb15ff932b513817a9", + "nmdc:618b18fa8635c80cc0091371f451a6f0", + "nmdc:17e55a1a1a133ffbf8cbe4024d997a6f", + "nmdc:8f80142c0f5723af5a3b44b7ff4e4339", + "nmdc:fdd2e8741ffef40db383674a10bb4d11", + "nmdc:8eb49ac20a6c2721d6db227f4fb3356a", + "nmdc:75f481e0d98793cfb4f9508cb3e31622" + ], + "was_informed_by": "gold:Gp0115663", + "id": "nmdc:b31abf9d7fe53e2f802bb53e2d13542b", + "execution_resource": "NERSC-Cori", + "name": "Annotation Activity for nmdc:mga0h9dt75", + "started_at_time": "2021-10-11T02:28:26Z", + "type": "nmdc:MetagenomeAnnotationActivity", + "ended_at_time": "2021-10-11T04:56:04+00:00", + "output_data_objects": [ + { + "name": "Gp0115663_Protein FAA", + "description": "Protein FAA for Gp0115663", + "data_object_type": "Annotation Amino Acid FASTA", + "url": "https://data.microbiomedata.org/data/nmdc:mga0h9dt75/annotation/nmdc_mga0h9dt75_proteins.faa", + "md5_checksum": "879988d212ecec46928b8598e2f8391f", + "id": "nmdc:879988d212ecec46928b8598e2f8391f", + "file_size_bytes": 50165060 + }, + { + "name": "Gp0115663_Structural annotation GFF file", + "description": "Structural annotation GFF file for Gp0115663", + "data_object_type": "Structural Annotation GFF", + "url": "https://data.microbiomedata.org/data/nmdc:mga0h9dt75/annotation/nmdc_mga0h9dt75_structural_annotation.gff", + "md5_checksum": "884b95102f5965cc0ee2d9b7f198e5a4", + "id": "nmdc:884b95102f5965cc0ee2d9b7f198e5a4", + "file_size_bytes": 2767 + }, + { + "name": "Gp0115663_Functional annotation GFF file", + "description": "Functional annotation GFF file for Gp0115663", + "data_object_type": "Functional Annotation GFF", + "url": "https://data.microbiomedata.org/data/nmdc:mga0h9dt75/annotation/nmdc_mga0h9dt75_functional_annotation.gff", + "md5_checksum": "002e4ebc728f8b91cb5f298d340ab013", + "id": "nmdc:002e4ebc728f8b91cb5f298d340ab013", + "file_size_bytes": 55139586 + }, + { + "name": "Gp0115663_KO TSV file", + "description": "KO TSV file for Gp0115663", + "data_object_type": "Annotation KEGG Orthology", + "url": "https://data.microbiomedata.org/data/nmdc:mga0h9dt75/annotation/nmdc_mga0h9dt75_ko.tsv", + "md5_checksum": "6851078f29716d89e3f41f0969ae7bf0", + "id": "nmdc:6851078f29716d89e3f41f0969ae7bf0", + "file_size_bytes": 6023696 + }, + { + "name": "Gp0115663_EC TSV file", + "description": "EC TSV file for Gp0115663", + "data_object_type": "Annotation Enzyme Commission", + "url": "https://data.microbiomedata.org/data/nmdc:mga0h9dt75/annotation/nmdc_mga0h9dt75_ec.tsv", + "md5_checksum": "4f88c89459f36655eb7c1eceec19602a", + "id": "nmdc:4f88c89459f36655eb7c1eceec19602a", + "file_size_bytes": 3982918 + }, + { + "name": "Gp0115663_COG GFF file", + "description": "COG GFF file for Gp0115663", + "url": "https://data.microbiomedata.org/data/nmdc:mga0h9dt75/annotation/nmdc_mga0h9dt75_cog.gff", + "md5_checksum": "a068b9ce6ebb7deb15ff932b513817a9", + "id": "nmdc:a068b9ce6ebb7deb15ff932b513817a9", + "file_size_bytes": 27362917 + }, + { + "name": "Gp0115663_PFAM GFF file", + "description": "PFAM GFF file for Gp0115663", + "url": "https://data.microbiomedata.org/data/nmdc:mga0h9dt75/annotation/nmdc_mga0h9dt75_pfam.gff", + "md5_checksum": "618b18fa8635c80cc0091371f451a6f0", + "id": "nmdc:618b18fa8635c80cc0091371f451a6f0", + "file_size_bytes": 21572048 + }, + { + "name": "Gp0115663_TigrFam GFF file", + "description": "TigrFam GFF file for Gp0115663", + "url": "https://data.microbiomedata.org/data/nmdc:mga0h9dt75/annotation/nmdc_mga0h9dt75_tigrfam.gff", + "md5_checksum": "17e55a1a1a133ffbf8cbe4024d997a6f", + "id": "nmdc:17e55a1a1a133ffbf8cbe4024d997a6f", + "file_size_bytes": 2900068 + }, + { + "name": "Gp0115663_SMART GFF file", + "description": "SMART GFF file for Gp0115663", + "url": "https://data.microbiomedata.org/data/nmdc:mga0h9dt75/annotation/nmdc_mga0h9dt75_smart.gff", + "md5_checksum": "8f80142c0f5723af5a3b44b7ff4e4339", + "id": "nmdc:8f80142c0f5723af5a3b44b7ff4e4339", + "file_size_bytes": 6905519 + }, + { + "name": "Gp0115663_SuperFam GFF file", + "description": "SuperFam GFF file for Gp0115663", + "url": "https://data.microbiomedata.org/data/nmdc:mga0h9dt75/annotation/nmdc_mga0h9dt75_supfam.gff", + "md5_checksum": "fdd2e8741ffef40db383674a10bb4d11", + "id": "nmdc:fdd2e8741ffef40db383674a10bb4d11", + "file_size_bytes": 38787856 + }, + { + "name": "Gp0115663_Cath FunFam GFF file", + "description": "Cath FunFam GFF file for Gp0115663", + "url": "https://data.microbiomedata.org/data/nmdc:mga0h9dt75/annotation/nmdc_mga0h9dt75_cath_funfam.gff", + "md5_checksum": "8eb49ac20a6c2721d6db227f4fb3356a", + "id": "nmdc:8eb49ac20a6c2721d6db227f4fb3356a", + "file_size_bytes": 30134783 + }, + { + "name": "Gp0115663_KO_EC GFF file", + "description": "KO_EC GFF file for Gp0115663", + "url": "https://data.microbiomedata.org/data/nmdc:mga0h9dt75/annotation/nmdc_mga0h9dt75_ko_ec.gff", + "md5_checksum": "75f481e0d98793cfb4f9508cb3e31622", + "id": "nmdc:75f481e0d98793cfb4f9508cb3e31622", + "file_size_bytes": 19194308 + } + ] + }, + { + "_id": { + "$oid": "649b0052ec087f6bbab34734" + }, + "has_input": [ + "nmdc:deddd162bf0128fba13b3bc1ca38d1aa", + "nmdc:31dc958d116d02122509e90b0883954f", + "nmdc:002e4ebc728f8b91cb5f298d340ab013" + ], + "too_short_contig_num": 159810, + "part_of": [ + "nmdc:mga0h9dt75" + ], + "binned_contig_num": 684, + "git_url": "https://github.com/microbiomedata/metaG/releases/tag/0.1", + "has_output": [ + "nmdc:d41d8cd98f00b204e9800998ecf8427e", + "nmdc:c092b018cb4652c4ca0620b37a4b3fad", + "nmdc:70d7c8a307f47adb05056bee1b01f9d4", + "nmdc:4545ab2039ae70f4439a93316f4fb7bc", + "nmdc:280b63ae1cc1fa8d6154a0681d47c399", + "nmdc:27c07072f175571200b5931550adb8aa" + ], + "was_informed_by": "gold:Gp0115663", + "input_contig_num": 169782, + "id": "nmdc:b31abf9d7fe53e2f802bb53e2d13542b", + "execution_resource": "NERSC-Cori", + "name": "MAGs Analysis Activity for nmdc:mga0h9dt75", + "mags_list": [ + { + "number_of_contig": 61, + "completeness": 13.82, + "bin_name": "bins.1", + "gene_count": 294, + "bin_quality": "LQ", + "gtdbtk_species": "", + "gtdbtk_order": "", + "num_16s": 1, + "gtdbtk_family": "", + "gtdbtk_domain": "", + "contamination": 0.62, + "gtdbtk_class": "", + "gtdbtk_phylum": "", + "num_5s": 1, + "num_23s": 0, + "gtdbtk_genus": "", + "num_t_rna": 0 + }, + { + "number_of_contig": 485, + "completeness": 66.03, + "bin_name": "bins.2", + "gene_count": 2871, + "bin_quality": "LQ", + "gtdbtk_species": "", + "gtdbtk_order": "", + "num_16s": 0, + "gtdbtk_family": "", + "gtdbtk_domain": "", + "contamination": 10.87, + "gtdbtk_class": "", + "gtdbtk_phylum": "", + "num_5s": 1, + "num_23s": 0, + "gtdbtk_genus": "", + "num_t_rna": 32 + }, + { + "number_of_contig": 56, + "completeness": 34.23, + "bin_name": "bins.3", + "gene_count": 337, + "bin_quality": "LQ", + "gtdbtk_species": "", + "gtdbtk_order": "", + "num_16s": 0, + "gtdbtk_family": "", + "gtdbtk_domain": "", + "contamination": 0.0, + "gtdbtk_class": "", + "gtdbtk_phylum": "", + "num_5s": 0, + "num_23s": 0, + "gtdbtk_genus": "", + "num_t_rna": 9 + }, + { + "number_of_contig": 63, + "completeness": 6.9, + "bin_name": "bins.4", + "gene_count": 276, + "bin_quality": "LQ", + "gtdbtk_species": "", + "gtdbtk_order": "", + "num_16s": 0, + "gtdbtk_family": "", + "gtdbtk_domain": "", + "contamination": 0.0, + "gtdbtk_class": "", + "gtdbtk_phylum": "", + "num_5s": 0, + "num_23s": 0, + "gtdbtk_genus": "", + "num_t_rna": 6 + }, + { + "number_of_contig": 19, + "completeness": 4.45, + "bin_name": "bins.5", + "gene_count": 463, + "bin_quality": "LQ", + "gtdbtk_species": "", + "gtdbtk_order": "", + "num_16s": 0, + "gtdbtk_family": "", + "gtdbtk_domain": "", + "contamination": 0.0, + "gtdbtk_class": "", + "gtdbtk_phylum": "", + "num_5s": 0, + "num_23s": 0, + "gtdbtk_genus": "", + "num_t_rna": 4 + } + ], + "unbinned_contig_num": 9288, + "started_at_time": "2021-10-11T02:28:26Z", + "type": "nmdc:MAGsAnalysisActivity", + "ended_at_time": "2021-10-11T04:56:04+00:00", + "output_data_objects": [ + { + "name": "gold:Gp0452679_metabat2 bin checkm quality assessment result", + "description": "metabat2 bin checkm quality assessment result for gold:Gp0452679", + "data_object_type": "CheckM Statistics", + "url": "https://data.microbiomedata.org/data/nmdc:mga0fsjy84/MAGs/nmdc_mga0fsjy84_checkm_qa.out", + "md5_checksum": "d41d8cd98f00b204e9800998ecf8427e", + "id": "nmdc:d41d8cd98f00b204e9800998ecf8427e", + "file_size_bytes": 0 + }, + { + "name": "Gp0115663_tooShort (< 3kb) filtered contigs fasta file by metaBat2", + "description": "tooShort (< 3kb) filtered contigs fasta file by metaBat2 for Gp0115663", + "url": "https://data.microbiomedata.org/data/nmdc:mga0h9dt75/MAGs/nmdc_mga0h9dt75_bins.tooShort.fa", + "md5_checksum": "c092b018cb4652c4ca0620b37a4b3fad", + "id": "nmdc:c092b018cb4652c4ca0620b37a4b3fad", + "file_size_bytes": 70411007 + }, + { + "name": "Gp0115663_unbinned fasta file from metabat2", + "description": "unbinned fasta file from metabat2 for Gp0115663", + "url": "https://data.microbiomedata.org/data/nmdc:mga0h9dt75/MAGs/nmdc_mga0h9dt75_bins.unbinned.fa", + "md5_checksum": "70d7c8a307f47adb05056bee1b01f9d4", + "id": "nmdc:70d7c8a307f47adb05056bee1b01f9d4", + "file_size_bytes": 15998690 + }, + { + "name": "Gp0115663_metabat2 bin checkm quality assessment result", + "description": "metabat2 bin checkm quality assessment result for Gp0115663", + "data_object_type": "CheckM Statistics", + "url": "https://data.microbiomedata.org/data/nmdc:mga0h9dt75/MAGs/nmdc_mga0h9dt75_checkm_qa.out", + "md5_checksum": "4545ab2039ae70f4439a93316f4fb7bc", + "id": "nmdc:4545ab2039ae70f4439a93316f4fb7bc", + "file_size_bytes": 1530 + }, + { + "name": "Gp0115663_high-quality and medium-quality bins", + "description": "high-quality and medium-quality bins for Gp0115663", + "data_object_type": "Metagenome Bins", + "url": "https://data.microbiomedata.org/data/nmdc:mga0h9dt75/MAGs/nmdc_mga0h9dt75_hqmq_bin.zip", + "md5_checksum": "280b63ae1cc1fa8d6154a0681d47c399", + "id": "nmdc:280b63ae1cc1fa8d6154a0681d47c399", + "file_size_bytes": 182 + }, + { + "name": "Gp0115663_metabat2 bins", + "description": "metabat2 bins for Gp0115663", + "url": "https://data.microbiomedata.org/data/nmdc:mga0h9dt75/MAGs/nmdc_mga0h9dt75_metabat_bin.zip", + "md5_checksum": "27c07072f175571200b5931550adb8aa", + "id": "nmdc:27c07072f175571200b5931550adb8aa", + "file_size_bytes": 1114314 + } + ] + } + ] + }, + { + "_id": { + "$oid": "649b009773e8249959349b34" + }, + "id": "nmdc:omprc-11-zp2ar437", + "name": "Sand microcosm microbial communities from a hyporheic zone in Columbia River, Washington, USA - GW-RW T3_23-Sept-14", + "description": "Sterilized sand packs were incubated back in the ground and collected at time point T3.", + "has_input": [ + "nmdc:bsm-11-4qsqg549" + ], + "has_output": [ + "jgi:55d817fc0d8785342fcf8274" + ], + "part_of": [ + "nmdc:sty-11-aygzgv51" + ], + "add_date": "2015-05-28", + "mod_date": "2021-06-15", + "ncbi_project_name": "Sand microcosm microbial communities from a hyporheic zone in Columbia River, Washington, USA - GW-RW T3_23-Sept-14", + "omics_type": { + "has_raw_value": "Metagenome" + }, + "principal_investigator": { + "has_raw_value": "James Stegen" + }, + "processing_institution": "JGI", + "type": "nmdc:OmicsProcessing", + "gold_sequencing_project_identifiers": [ + "gold:Gp0115666" + ], + "downstream_workflow_activity_records": [ + { + "_id": { + "$oid": "649b009d6bdd4fd20273c887" + }, + "has_input": [ + "nmdc:76893480c05758ad2977df78a5b050e5" + ], + "part_of": [ + "nmdc:mga0eehe16" + ], + "git_url": "https://github.com/microbiomedata/metaG/releases/tag/0.1", + "has_output": [ + "nmdc:0b301d2dd917c2be31422dd0e986dd5e", + "nmdc:0634e8261ce976d167457993d7f7a4ec" + ], + "was_informed_by": "gold:Gp0115666", + "input_read_count": 22183982, + "output_read_bases": 3025260554, + "id": "nmdc:db181675b157d27d9b0b2f35b5cbf03e", + "execution_resource": "NERSC-Cori", + "input_read_bases": 3349781282, + "name": "Read QC Activity for nmdc:mga0eehe16", + "output_read_count": 20195754, + "started_at_time": "2021-10-11T02:28:09Z", + "type": "nmdc:ReadQCAnalysisActivity", + "ended_at_time": "2021-10-11T04:06:19+00:00", + "output_data_objects": [ + { + "name": "Gp0115666_Filtered Reads", + "description": "Filtered Reads for Gp0115666", + "data_object_type": "Filtered Sequencing Reads", + "url": "https://data.microbiomedata.org/data/nmdc:mga0eehe16/qa/nmdc_mga0eehe16_filtered.fastq.gz", + "md5_checksum": "0b301d2dd917c2be31422dd0e986dd5e", + "id": "nmdc:0b301d2dd917c2be31422dd0e986dd5e", + "file_size_bytes": 1806510860 + }, + { + "name": "Gp0115666_Filtered Stats", + "description": "Filtered Stats for Gp0115666", + "data_object_type": "QC Statistics", + "url": "https://data.microbiomedata.org/data/nmdc:mga0eehe16/qa/nmdc_mga0eehe16_filterStats.txt", + "md5_checksum": "0634e8261ce976d167457993d7f7a4ec", + "id": "nmdc:0634e8261ce976d167457993d7f7a4ec", + "file_size_bytes": 289 + } + ] + }, + { + "_id": { + "$oid": "649b009bff710ae353f8cf55" + }, + "has_input": [ + "nmdc:0b301d2dd917c2be31422dd0e986dd5e" + ], + "git_url": "https://github.com/microbiomedata/metaG/releases/tag/0.1", + "has_output": [ + "nmdc:17454627f873cc37e80700c4751c81d6", + "nmdc:e0479eb7fd3345aaf134640e0b9e11b0", + "nmdc:a8433a0b17d7380fc836e4c9f85a7a54", + "nmdc:9e061ad19d4a6a3f209d1992d02df9f9", + "nmdc:1d46eebd0f194f57dd9e92c9bc992891", + "nmdc:e5227b1cfdbc266c44d23028c92150a9", + "nmdc:05f7680c6646904cfb16fc146c0fed4a", + "nmdc:368cf81424348cdf46d17c13908280e7", + "nmdc:b5091cfeed4fbea8316e50fbceea89bc" + ], + "was_informed_by": "gold:Gp0115666", + "id": "nmdc:db181675b157d27d9b0b2f35b5cbf03e", + "execution_resource": "NERSC-Cori", + "name": "ReadBased Analysis Activity for nmdc:mga0eehe16", + "started_at_time": "2021-10-11T02:28:09Z", + "type": "nmdc:ReadbasedAnalysis", + "ended_at_time": "2021-10-11T04:06:19+00:00", + "output_data_objects": [ + { + "name": "Gp0115666_Gottcha2 TSV report", + "description": "Gottcha2 TSV report for Gp0115666", + "url": "https://data.microbiomedata.org/data/nmdc:mga0eehe16/ReadbasedAnalysis/nmdc_mga0eehe16_gottcha2_report.tsv", + "md5_checksum": "17454627f873cc37e80700c4751c81d6", + "id": "nmdc:17454627f873cc37e80700c4751c81d6", + "file_size_bytes": 10721 + }, + { + "name": "Gp0115666_Gottcha2 full TSV report", + "description": "Gottcha2 full TSV report for Gp0115666", + "url": "https://data.microbiomedata.org/data/nmdc:mga0eehe16/ReadbasedAnalysis/nmdc_mga0eehe16_gottcha2_report_full.tsv", + "md5_checksum": "e0479eb7fd3345aaf134640e0b9e11b0", + "id": "nmdc:e0479eb7fd3345aaf134640e0b9e11b0", + "file_size_bytes": 920924 + }, + { + "name": "Gp0115666_Gottcha2 Krona HTML report", + "description": "Gottcha2 Krona HTML report for Gp0115666", + "data_object_type": "GOTTCHA2 Krona Plot", + "url": "https://data.microbiomedata.org/data/nmdc:mga0eehe16/ReadbasedAnalysis/nmdc_mga0eehe16_gottcha2_krona.html", + "md5_checksum": "a8433a0b17d7380fc836e4c9f85a7a54", + "id": "nmdc:a8433a0b17d7380fc836e4c9f85a7a54", + "file_size_bytes": 257441 + }, + { + "name": "Gp0115666_Centrifuge classification TSV report", + "description": "Centrifuge classification TSV report for Gp0115666", + "data_object_type": "Centrifuge Taxonomic Classification", + "url": "https://data.microbiomedata.org/data/nmdc:mga0eehe16/ReadbasedAnalysis/nmdc_mga0eehe16_centrifuge_classification.tsv", + "md5_checksum": "9e061ad19d4a6a3f209d1992d02df9f9", + "id": "nmdc:9e061ad19d4a6a3f209d1992d02df9f9", + "file_size_bytes": 1468295025 + }, + { + "name": "Gp0115666_Centrifuge TSV report", + "description": "Centrifuge TSV report for Gp0115666", + "data_object_type": "Centrifuge Classification Report", + "url": "https://data.microbiomedata.org/data/nmdc:mga0eehe16/ReadbasedAnalysis/nmdc_mga0eehe16_centrifuge_report.tsv", + "md5_checksum": "1d46eebd0f194f57dd9e92c9bc992891", + "id": "nmdc:1d46eebd0f194f57dd9e92c9bc992891", + "file_size_bytes": 257081 + }, + { + "name": "Gp0115666_Centrifuge Krona HTML report", + "description": "Centrifuge Krona HTML report for Gp0115666", + "data_object_type": "Centrifuge Krona Plot", + "url": "https://data.microbiomedata.org/data/nmdc:mga0eehe16/ReadbasedAnalysis/nmdc_mga0eehe16_centrifuge_krona.html", + "md5_checksum": "e5227b1cfdbc266c44d23028c92150a9", + "id": "nmdc:e5227b1cfdbc266c44d23028c92150a9", + "file_size_bytes": 2331968 + }, + { + "name": "Gp0115666_Kraken classification TSV report", + "description": "Kraken classification TSV report for Gp0115666", + "data_object_type": "Kraken2 Taxonomic Classification", + "url": "https://data.microbiomedata.org/data/nmdc:mga0eehe16/ReadbasedAnalysis/nmdc_mga0eehe16_kraken2_classification.tsv", + "md5_checksum": "05f7680c6646904cfb16fc146c0fed4a", + "id": "nmdc:05f7680c6646904cfb16fc146c0fed4a", + "file_size_bytes": 1204548180 + }, + { + "name": "Gp0115666_Kraken2 TSV report", + "description": "Kraken2 TSV report for Gp0115666", + "data_object_type": "Kraken2 Classification Report", + "url": "https://data.microbiomedata.org/data/nmdc:mga0eehe16/ReadbasedAnalysis/nmdc_mga0eehe16_kraken2_report.tsv", + "md5_checksum": "368cf81424348cdf46d17c13908280e7", + "id": "nmdc:368cf81424348cdf46d17c13908280e7", + "file_size_bytes": 653697 + }, + { + "name": "Gp0115666_Kraken2 Krona HTML report", + "description": "Kraken2 Krona HTML report for Gp0115666", + "data_object_type": "Kraken2 Krona Plot", + "url": "https://data.microbiomedata.org/data/nmdc:mga0eehe16/ReadbasedAnalysis/nmdc_mga0eehe16_kraken2_krona.html", + "md5_checksum": "b5091cfeed4fbea8316e50fbceea89bc", + "id": "nmdc:b5091cfeed4fbea8316e50fbceea89bc", + "file_size_bytes": 3983935 + } + ] + }, + { + "_id": { + "$oid": "61e71a15833bcf838a701c88" + }, + "has_input": [ + "nmdc:0b301d2dd917c2be31422dd0e986dd5e" + ], + "part_of": [ + "nmdc:mga0eehe16" + ], + "git_url": "https://github.com/microbiomedata/metaG/releases/tag/0.1", + "has_output": [ + "nmdc:17454627f873cc37e80700c4751c81d6", + "nmdc:e0479eb7fd3345aaf134640e0b9e11b0", + "nmdc:a8433a0b17d7380fc836e4c9f85a7a54", + "nmdc:9e061ad19d4a6a3f209d1992d02df9f9", + "nmdc:1d46eebd0f194f57dd9e92c9bc992891", + "nmdc:e5227b1cfdbc266c44d23028c92150a9", + "nmdc:05f7680c6646904cfb16fc146c0fed4a", + "nmdc:368cf81424348cdf46d17c13908280e7", + "nmdc:b5091cfeed4fbea8316e50fbceea89bc" + ], + "was_informed_by": "gold:Gp0115666", + "id": "nmdc:db181675b157d27d9b0b2f35b5cbf03e", + "execution_resource": "NERSC-Cori", + "name": "ReadBased Analysis Activity for nmdc:mga0eehe16", + "started_at_time": "2021-10-11T02:28:09Z", + "type": "nmdc:ReadbasedAnalysis", + "ended_at_time": "2021-10-11T04:06:19+00:00", + "output_data_objects": [ + { + "name": "Gp0115666_Gottcha2 TSV report", + "description": "Gottcha2 TSV report for Gp0115666", + "url": "https://data.microbiomedata.org/data/nmdc:mga0eehe16/ReadbasedAnalysis/nmdc_mga0eehe16_gottcha2_report.tsv", + "md5_checksum": "17454627f873cc37e80700c4751c81d6", + "id": "nmdc:17454627f873cc37e80700c4751c81d6", + "file_size_bytes": 10721 + }, + { + "name": "Gp0115666_Gottcha2 full TSV report", + "description": "Gottcha2 full TSV report for Gp0115666", + "url": "https://data.microbiomedata.org/data/nmdc:mga0eehe16/ReadbasedAnalysis/nmdc_mga0eehe16_gottcha2_report_full.tsv", + "md5_checksum": "e0479eb7fd3345aaf134640e0b9e11b0", + "id": "nmdc:e0479eb7fd3345aaf134640e0b9e11b0", + "file_size_bytes": 920924 + }, + { + "name": "Gp0115666_Gottcha2 Krona HTML report", + "description": "Gottcha2 Krona HTML report for Gp0115666", + "data_object_type": "GOTTCHA2 Krona Plot", + "url": "https://data.microbiomedata.org/data/nmdc:mga0eehe16/ReadbasedAnalysis/nmdc_mga0eehe16_gottcha2_krona.html", + "md5_checksum": "a8433a0b17d7380fc836e4c9f85a7a54", + "id": "nmdc:a8433a0b17d7380fc836e4c9f85a7a54", + "file_size_bytes": 257441 + }, + { + "name": "Gp0115666_Centrifuge classification TSV report", + "description": "Centrifuge classification TSV report for Gp0115666", + "data_object_type": "Centrifuge Taxonomic Classification", + "url": "https://data.microbiomedata.org/data/nmdc:mga0eehe16/ReadbasedAnalysis/nmdc_mga0eehe16_centrifuge_classification.tsv", + "md5_checksum": "9e061ad19d4a6a3f209d1992d02df9f9", + "id": "nmdc:9e061ad19d4a6a3f209d1992d02df9f9", + "file_size_bytes": 1468295025 + }, + { + "name": "Gp0115666_Centrifuge TSV report", + "description": "Centrifuge TSV report for Gp0115666", + "data_object_type": "Centrifuge Classification Report", + "url": "https://data.microbiomedata.org/data/nmdc:mga0eehe16/ReadbasedAnalysis/nmdc_mga0eehe16_centrifuge_report.tsv", + "md5_checksum": "1d46eebd0f194f57dd9e92c9bc992891", + "id": "nmdc:1d46eebd0f194f57dd9e92c9bc992891", + "file_size_bytes": 257081 + }, + { + "name": "Gp0115666_Centrifuge Krona HTML report", + "description": "Centrifuge Krona HTML report for Gp0115666", + "data_object_type": "Centrifuge Krona Plot", + "url": "https://data.microbiomedata.org/data/nmdc:mga0eehe16/ReadbasedAnalysis/nmdc_mga0eehe16_centrifuge_krona.html", + "md5_checksum": "e5227b1cfdbc266c44d23028c92150a9", + "id": "nmdc:e5227b1cfdbc266c44d23028c92150a9", + "file_size_bytes": 2331968 + }, + { + "name": "Gp0115666_Kraken classification TSV report", + "description": "Kraken classification TSV report for Gp0115666", + "data_object_type": "Kraken2 Taxonomic Classification", + "url": "https://data.microbiomedata.org/data/nmdc:mga0eehe16/ReadbasedAnalysis/nmdc_mga0eehe16_kraken2_classification.tsv", + "md5_checksum": "05f7680c6646904cfb16fc146c0fed4a", + "id": "nmdc:05f7680c6646904cfb16fc146c0fed4a", + "file_size_bytes": 1204548180 + }, + { + "name": "Gp0115666_Kraken2 TSV report", + "description": "Kraken2 TSV report for Gp0115666", + "data_object_type": "Kraken2 Classification Report", + "url": "https://data.microbiomedata.org/data/nmdc:mga0eehe16/ReadbasedAnalysis/nmdc_mga0eehe16_kraken2_report.tsv", + "md5_checksum": "368cf81424348cdf46d17c13908280e7", + "id": "nmdc:368cf81424348cdf46d17c13908280e7", + "file_size_bytes": 653697 + }, + { + "name": "Gp0115666_Kraken2 Krona HTML report", + "description": "Kraken2 Krona HTML report for Gp0115666", + "data_object_type": "Kraken2 Krona Plot", + "url": "https://data.microbiomedata.org/data/nmdc:mga0eehe16/ReadbasedAnalysis/nmdc_mga0eehe16_kraken2_krona.html", + "md5_checksum": "b5091cfeed4fbea8316e50fbceea89bc", + "id": "nmdc:b5091cfeed4fbea8316e50fbceea89bc", + "file_size_bytes": 3983935 + } + ] + }, + { + "_id": { + "$oid": "649b005f2ca5ee4adb139fbb" + }, + "has_input": [ + "nmdc:0b301d2dd917c2be31422dd0e986dd5e" + ], + "part_of": [ + "nmdc:mga0eehe16" + ], + "ctg_logsum": 181484, + "scaf_logsum": 182081, + "gap_pct": 0.00163, + "git_url": "https://github.com/microbiomedata/metaG/releases/tag/0.1", + "has_output": [ + "nmdc:e557facdf4c3066ba4b5ba168995ba85", + "nmdc:92cc678ca9e54cb92118b9ae746fb996", + "nmdc:7082b41c627571a03466f94ba80c15b8", + "nmdc:c5ccd39d97d652d5ec8804202a324b0e", + "nmdc:3ece2c377622cebdddfb9322047cb115" + ], + "asm_score": 5.224, + "was_informed_by": "gold:Gp0115666", + "ctg_powsum": 20653, + "scaf_max": 25973, + "id": "nmdc:db181675b157d27d9b0b2f35b5cbf03e", + "scaf_powsum": 20721, + "execution_resource": "NERSC-Cori", + "contigs": 116661, + "name": "Assembly Activity for nmdc:mga0eehe16", + "ctg_max": 25973, + "gc_std": 0.10759, + "contig_bp": 58735100, + "gc_avg": 0.57262, + "started_at_time": "2021-10-11T02:28:09Z", + "scaf_bp": 58736060, + "type": "nmdc:MetagenomeAssembly", + "scaffolds": 116565, + "ended_at_time": "2021-10-11T04:06:19+00:00", + "ctg_l50": 493, + "ctg_l90": 286, + "ctg_n50": 27791, + "ctg_n90": 95962, + "scaf_l50": 493, + "scaf_l90": 286, + "scaf_n50": 27775, + "scaf_n90": 95875, + "output_data_objects": [ + { + "name": "Gp0115666_Assembled contigs fasta", + "description": "Assembled contigs fasta for Gp0115666", + "data_object_type": "Assembly Contigs", + "url": "https://data.microbiomedata.org/data/nmdc:mga0eehe16/assembly/nmdc_mga0eehe16_contigs.fna", + "md5_checksum": "e557facdf4c3066ba4b5ba168995ba85", + "id": "nmdc:e557facdf4c3066ba4b5ba168995ba85", + "file_size_bytes": 63269472 + }, + { + "name": "Gp0115666_Assembled scaffold fasta", + "description": "Assembled scaffold fasta for Gp0115666", + "data_object_type": "Assembly Scaffolds", + "url": "https://data.microbiomedata.org/data/nmdc:mga0eehe16/assembly/nmdc_mga0eehe16_scaffolds.fna", + "md5_checksum": "92cc678ca9e54cb92118b9ae746fb996", + "id": "nmdc:92cc678ca9e54cb92118b9ae746fb996", + "file_size_bytes": 62917914 + }, + { + "name": "Gp0115666_Metagenome Contig Coverage Stats", + "description": "Metagenome Contig Coverage Stats for Gp0115666", + "url": "https://data.microbiomedata.org/data/nmdc:mga0eehe16/assembly/nmdc_mga0eehe16_covstats.txt", + "md5_checksum": "7082b41c627571a03466f94ba80c15b8", + "id": "nmdc:7082b41c627571a03466f94ba80c15b8", + "file_size_bytes": 9179769 + }, + { + "name": "Gp0115666_Assembled AGP file", + "description": "Assembled AGP file for Gp0115666", + "data_object_type": "Assembly AGP", + "url": "https://data.microbiomedata.org/data/nmdc:mga0eehe16/assembly/nmdc_mga0eehe16_assembly.agp", + "md5_checksum": "c5ccd39d97d652d5ec8804202a324b0e", + "id": "nmdc:c5ccd39d97d652d5ec8804202a324b0e", + "file_size_bytes": 8550216 + }, + { + "name": "Gp0115666_Metagenome Alignment BAM file", + "description": "Metagenome Alignment BAM file for Gp0115666", + "data_object_type": "Assembly Coverage BAM", + "url": "https://data.microbiomedata.org/data/nmdc:mga0eehe16/assembly/nmdc_mga0eehe16_pairedMapped_sorted.bam", + "md5_checksum": "3ece2c377622cebdddfb9322047cb115", + "id": "nmdc:3ece2c377622cebdddfb9322047cb115", + "file_size_bytes": 1940309089 + } + ] + }, + { + "_id": { + "$oid": "649b005bbf2caae0415ef9ce" + }, + "has_input": [ + "nmdc:e557facdf4c3066ba4b5ba168995ba85" + ], + "part_of": [ + "nmdc:mga0eehe16" + ], + "git_url": "https://github.com/microbiomedata/metaG/releases/tag/0.1", + "has_output": [ + "nmdc:4d509c29cad07f0b18d3f7e0e724c493", + "nmdc:60d04bb0a2d1a1d593bd849a2a13e405", + "nmdc:91cd273ea95a29b2c4e326c56eafe08a", + "nmdc:e08c6253ec5a15eb43d8cb4d69d09d4c", + "nmdc:9edfc4fee191b722148af1e2648f787f", + "nmdc:886402044865256b80bfaf42ca148a61", + "nmdc:1b2bc9b96a15ebdfe3ff1e30027544af", + "nmdc:2d730834b8841b7a7ad30786bff382fa", + "nmdc:46d62d69e48d7aeecb87106e02102753", + "nmdc:1896e41000aa9e4acc98cc7702e42304", + "nmdc:f40bfd77fb3f24be2529fdafc01104c7", + "nmdc:920be8f090654360619fbb16163b8513" + ], + "was_informed_by": "gold:Gp0115666", + "id": "nmdc:db181675b157d27d9b0b2f35b5cbf03e", + "execution_resource": "NERSC-Cori", + "name": "Annotation Activity for nmdc:mga0eehe16", + "started_at_time": "2021-10-11T02:28:09Z", + "type": "nmdc:MetagenomeAnnotationActivity", + "ended_at_time": "2021-10-11T04:06:19+00:00", + "output_data_objects": [ + { + "name": "Gp0115666_Protein FAA", + "description": "Protein FAA for Gp0115666", + "data_object_type": "Annotation Amino Acid FASTA", + "url": "https://data.microbiomedata.org/data/nmdc:mga0eehe16/annotation/nmdc_mga0eehe16_proteins.faa", + "md5_checksum": "4d509c29cad07f0b18d3f7e0e724c493", + "id": "nmdc:4d509c29cad07f0b18d3f7e0e724c493", + "file_size_bytes": 35706777 + }, + { + "name": "Gp0115666_Structural annotation GFF file", + "description": "Structural annotation GFF file for Gp0115666", + "data_object_type": "Structural Annotation GFF", + "url": "https://data.microbiomedata.org/data/nmdc:mga0eehe16/annotation/nmdc_mga0eehe16_structural_annotation.gff", + "md5_checksum": "60d04bb0a2d1a1d593bd849a2a13e405", + "id": "nmdc:60d04bb0a2d1a1d593bd849a2a13e405", + "file_size_bytes": 2520 + }, + { + "name": "Gp0115666_Functional annotation GFF file", + "description": "Functional annotation GFF file for Gp0115666", + "data_object_type": "Functional Annotation GFF", + "url": "https://data.microbiomedata.org/data/nmdc:mga0eehe16/annotation/nmdc_mga0eehe16_functional_annotation.gff", + "md5_checksum": "91cd273ea95a29b2c4e326c56eafe08a", + "id": "nmdc:91cd273ea95a29b2c4e326c56eafe08a", + "file_size_bytes": 40030386 + }, + { + "name": "Gp0115666_KO TSV file", + "description": "KO TSV file for Gp0115666", + "data_object_type": "Annotation KEGG Orthology", + "url": "https://data.microbiomedata.org/data/nmdc:mga0eehe16/annotation/nmdc_mga0eehe16_ko.tsv", + "md5_checksum": "e08c6253ec5a15eb43d8cb4d69d09d4c", + "id": "nmdc:e08c6253ec5a15eb43d8cb4d69d09d4c", + "file_size_bytes": 5584125 + }, + { + "name": "Gp0115666_EC TSV file", + "description": "EC TSV file for Gp0115666", + "data_object_type": "Annotation Enzyme Commission", + "url": "https://data.microbiomedata.org/data/nmdc:mga0eehe16/annotation/nmdc_mga0eehe16_ec.tsv", + "md5_checksum": "9edfc4fee191b722148af1e2648f787f", + "id": "nmdc:9edfc4fee191b722148af1e2648f787f", + "file_size_bytes": 3575242 + }, + { + "name": "Gp0115666_COG GFF file", + "description": "COG GFF file for Gp0115666", + "url": "https://data.microbiomedata.org/data/nmdc:mga0eehe16/annotation/nmdc_mga0eehe16_cog.gff", + "md5_checksum": "886402044865256b80bfaf42ca148a61", + "id": "nmdc:886402044865256b80bfaf42ca148a61", + "file_size_bytes": 23390091 + }, + { + "name": "Gp0115666_PFAM GFF file", + "description": "PFAM GFF file for Gp0115666", + "url": "https://data.microbiomedata.org/data/nmdc:mga0eehe16/annotation/nmdc_mga0eehe16_pfam.gff", + "md5_checksum": "1b2bc9b96a15ebdfe3ff1e30027544af", + "id": "nmdc:1b2bc9b96a15ebdfe3ff1e30027544af", + "file_size_bytes": 18444613 + }, + { + "name": "Gp0115666_TigrFam GFF file", + "description": "TigrFam GFF file for Gp0115666", + "url": "https://data.microbiomedata.org/data/nmdc:mga0eehe16/annotation/nmdc_mga0eehe16_tigrfam.gff", + "md5_checksum": "2d730834b8841b7a7ad30786bff382fa", + "id": "nmdc:2d730834b8841b7a7ad30786bff382fa", + "file_size_bytes": 2596225 + }, + { + "name": "Gp0115666_SMART GFF file", + "description": "SMART GFF file for Gp0115666", + "url": "https://data.microbiomedata.org/data/nmdc:mga0eehe16/annotation/nmdc_mga0eehe16_smart.gff", + "md5_checksum": "46d62d69e48d7aeecb87106e02102753", + "id": "nmdc:46d62d69e48d7aeecb87106e02102753", + "file_size_bytes": 4932262 + }, + { + "name": "Gp0115666_SuperFam GFF file", + "description": "SuperFam GFF file for Gp0115666", + "url": "https://data.microbiomedata.org/data/nmdc:mga0eehe16/annotation/nmdc_mga0eehe16_supfam.gff", + "md5_checksum": "1896e41000aa9e4acc98cc7702e42304", + "id": "nmdc:1896e41000aa9e4acc98cc7702e42304", + "file_size_bytes": 28911479 + }, + { + "name": "Gp0115666_Cath FunFam GFF file", + "description": "Cath FunFam GFF file for Gp0115666", + "url": "https://data.microbiomedata.org/data/nmdc:mga0eehe16/annotation/nmdc_mga0eehe16_cath_funfam.gff", + "md5_checksum": "f40bfd77fb3f24be2529fdafc01104c7", + "id": "nmdc:f40bfd77fb3f24be2529fdafc01104c7", + "file_size_bytes": 22881869 + }, + { + "name": "Gp0115666_KO_EC GFF file", + "description": "KO_EC GFF file for Gp0115666", + "url": "https://data.microbiomedata.org/data/nmdc:mga0eehe16/annotation/nmdc_mga0eehe16_ko_ec.gff", + "md5_checksum": "920be8f090654360619fbb16163b8513", + "id": "nmdc:920be8f090654360619fbb16163b8513", + "file_size_bytes": 17844749 + } + ] + }, + { + "_id": { + "$oid": "649b0052ec087f6bbab34730" + }, + "has_input": [ + "nmdc:e557facdf4c3066ba4b5ba168995ba85", + "nmdc:3ece2c377622cebdddfb9322047cb115", + "nmdc:91cd273ea95a29b2c4e326c56eafe08a" + ], + "too_short_contig_num": 108937, + "part_of": [ + "nmdc:mga0eehe16" + ], + "binned_contig_num": 899, + "git_url": "https://github.com/microbiomedata/metaG/releases/tag/0.1", + "has_output": [ + "nmdc:d41d8cd98f00b204e9800998ecf8427e", + "nmdc:9944a9020ce981a2423ca81424998e66", + "nmdc:d2a24728b9006fd4fb4bf4f326138dc2", + "nmdc:415dfed655f9c4673f2cce4f9947c2e4", + "nmdc:be6482b534716166ce5daea5a07cba06", + "nmdc:df08913532a84681996a29d1a1c127b3" + ], + "was_informed_by": "gold:Gp0115666", + "input_contig_num": 116661, + "id": "nmdc:db181675b157d27d9b0b2f35b5cbf03e", + "execution_resource": "NERSC-Cori", + "name": "MAGs Analysis Activity for nmdc:mga0eehe16", + "mags_list": [ + { + "number_of_contig": 216, + "completeness": 41.57, + "bin_name": "bins.1", + "gene_count": 1176, + "bin_quality": "LQ", + "gtdbtk_species": "", + "gtdbtk_order": "", + "num_16s": 0, + "gtdbtk_family": "", + "gtdbtk_domain": "", + "contamination": 0.93, + "gtdbtk_class": "", + "gtdbtk_phylum": "", + "num_5s": 0, + "num_23s": 0, + "gtdbtk_genus": "", + "num_t_rna": 11 + }, + { + "number_of_contig": 683, + "completeness": 87.59, + "bin_name": "bins.2", + "gene_count": 4526, + "bin_quality": "LQ", + "gtdbtk_species": "", + "gtdbtk_order": "", + "num_16s": 1, + "gtdbtk_family": "", + "gtdbtk_domain": "", + "contamination": 33.23, + "gtdbtk_class": "", + "gtdbtk_phylum": "", + "num_5s": 0, + "num_23s": 0, + "gtdbtk_genus": "", + "num_t_rna": 56 + } + ], + "unbinned_contig_num": 6825, + "started_at_time": "2021-10-11T02:28:09Z", + "type": "nmdc:MAGsAnalysisActivity", + "ended_at_time": "2021-10-11T04:06:19+00:00", + "output_data_objects": [ + { + "name": "gold:Gp0452679_metabat2 bin checkm quality assessment result", + "description": "metabat2 bin checkm quality assessment result for gold:Gp0452679", + "data_object_type": "CheckM Statistics", + "url": "https://data.microbiomedata.org/data/nmdc:mga0fsjy84/MAGs/nmdc_mga0fsjy84_checkm_qa.out", + "md5_checksum": "d41d8cd98f00b204e9800998ecf8427e", + "id": "nmdc:d41d8cd98f00b204e9800998ecf8427e", + "file_size_bytes": 0 + }, + { + "name": "Gp0115666_tooShort (< 3kb) filtered contigs fasta file by metaBat2", + "description": "tooShort (< 3kb) filtered contigs fasta file by metaBat2 for Gp0115666", + "url": "https://data.microbiomedata.org/data/nmdc:mga0eehe16/MAGs/nmdc_mga0eehe16_bins.tooShort.fa", + "md5_checksum": "9944a9020ce981a2423ca81424998e66", + "id": "nmdc:9944a9020ce981a2423ca81424998e66", + "file_size_bytes": 46766610 + }, + { + "name": "Gp0115666_unbinned fasta file from metabat2", + "description": "unbinned fasta file from metabat2 for Gp0115666", + "url": "https://data.microbiomedata.org/data/nmdc:mga0eehe16/MAGs/nmdc_mga0eehe16_bins.unbinned.fa", + "md5_checksum": "d2a24728b9006fd4fb4bf4f326138dc2", + "id": "nmdc:d2a24728b9006fd4fb4bf4f326138dc2", + "file_size_bytes": 11382048 + }, + { + "name": "Gp0115666_metabat2 bin checkm quality assessment result", + "description": "metabat2 bin checkm quality assessment result for Gp0115666", + "data_object_type": "CheckM Statistics", + "url": "https://data.microbiomedata.org/data/nmdc:mga0eehe16/MAGs/nmdc_mga0eehe16_checkm_qa.out", + "md5_checksum": "415dfed655f9c4673f2cce4f9947c2e4", + "id": "nmdc:415dfed655f9c4673f2cce4f9947c2e4", + "file_size_bytes": 1020 + }, + { + "name": "Gp0115666_high-quality and medium-quality bins", + "description": "high-quality and medium-quality bins for Gp0115666", + "data_object_type": "Metagenome Bins", + "url": "https://data.microbiomedata.org/data/nmdc:mga0eehe16/MAGs/nmdc_mga0eehe16_hqmq_bin.zip", + "md5_checksum": "be6482b534716166ce5daea5a07cba06", + "id": "nmdc:be6482b534716166ce5daea5a07cba06", + "file_size_bytes": 182 + }, + { + "name": "Gp0115666_metabat2 bins", + "description": "metabat2 bins for Gp0115666", + "url": "https://data.microbiomedata.org/data/nmdc:mga0eehe16/MAGs/nmdc_mga0eehe16_metabat_bin.zip", + "md5_checksum": "df08913532a84681996a29d1a1c127b3", + "id": "nmdc:df08913532a84681996a29d1a1c127b3", + "file_size_bytes": 1559491 + } + ] + } + ] + }, + { + "_id": { + "$oid": "649b009773e8249959349b35" + }, + "id": "nmdc:omprc-11-wepaa271", + "name": "Sand microcosm microbial communities from a hyporheic zone in Columbia River, Washington, USA - GW-RW T3_30-Apr-14", + "description": "Sterilized sand packs were incubated back in the ground and collected at time point T3.", + "has_input": [ + "nmdc:bsm-11-srz83p34" + ], + "has_output": [ + "jgi:55d740240d8785342fcf7e37" + ], + "part_of": [ + "nmdc:sty-11-aygzgv51" + ], + "add_date": "2015-05-28", + "mod_date": "2021-06-15", + "ncbi_project_name": "Sand microcosm microbial communities from a hyporheic zone in Columbia River, Washington, USA - GW-RW T3_30-Apr-14", + "omics_type": { + "has_raw_value": "Metagenome" + }, + "principal_investigator": { + "has_raw_value": "James Stegen" + }, + "processing_institution": "JGI", + "type": "nmdc:OmicsProcessing", + "gold_sequencing_project_identifiers": [ + "gold:Gp0115668" + ], + "downstream_workflow_activity_records": [ + { + "_id": { + "$oid": "649b009d6bdd4fd20273c88c" + }, + "has_input": [ + "nmdc:0967bbbe5ee2737f66bc6ee7bf366bbb" + ], + "part_of": [ + "nmdc:mga0n66h21" + ], + "git_url": "https://github.com/microbiomedata/metaG/releases/tag/0.1", + "has_output": [ + "nmdc:121b1c25e803f2a010ae5a2206a8d1d2", + "nmdc:63fb5949ebafd1846ba60f2ce033191c" + ], + "was_informed_by": "gold:Gp0115668", + "input_read_count": 35064492, + "output_read_bases": 5069132469, + "id": "nmdc:0aec70779dcdcc1b577d7e372c0eca87", + "execution_resource": "NERSC-Cori", + "input_read_bases": 5294738292, + "name": "Read QC Activity for nmdc:mga0n66h21", + "output_read_count": 33873238, + "started_at_time": "2021-10-11T02:28:43Z", + "type": "nmdc:ReadQCAnalysisActivity", + "ended_at_time": "2021-10-11T05:19:17+00:00", + "output_data_objects": [ + { + "name": "Gp0115668_Filtered Reads", + "description": "Filtered Reads for Gp0115668", + "data_object_type": "Filtered Sequencing Reads", + "url": "https://data.microbiomedata.org/data/nmdc:mga0n66h21/qa/nmdc_mga0n66h21_filtered.fastq.gz", + "md5_checksum": "121b1c25e803f2a010ae5a2206a8d1d2", + "id": "nmdc:121b1c25e803f2a010ae5a2206a8d1d2", + "file_size_bytes": 2665008319 + }, + { + "name": "Gp0115668_Filtered Stats", + "description": "Filtered Stats for Gp0115668", + "data_object_type": "QC Statistics", + "url": "https://data.microbiomedata.org/data/nmdc:mga0n66h21/qa/nmdc_mga0n66h21_filterStats.txt", + "md5_checksum": "63fb5949ebafd1846ba60f2ce033191c", + "id": "nmdc:63fb5949ebafd1846ba60f2ce033191c", + "file_size_bytes": 289 + } + ] + }, + { + "_id": { + "$oid": "649b009bff710ae353f8cf53" + }, + "has_input": [ + "nmdc:121b1c25e803f2a010ae5a2206a8d1d2" + ], + "git_url": "https://github.com/microbiomedata/metaG/releases/tag/0.1", + "has_output": [ + "nmdc:8bdf8bbee24242aaaee763c1d851c05e", + "nmdc:2529ede10eb159148711d016ec022af3", + "nmdc:a0631ed87dc2e7c69355ef575dbe4e60", + "nmdc:93d26b69073bd4d6283aee3c7e5997d4", + "nmdc:d7a49bf0d9797a2b603643a2de896b5c", + "nmdc:890f9f52d828e1ea8277b52566763069", + "nmdc:371b7fabbcbc2d22c3ca84b422a88863", + "nmdc:8677985c5e8ad92dd6d051f85950a636", + "nmdc:9b2f355a4c2ff3651a3d1179212e2914" + ], + "was_informed_by": "gold:Gp0115668", + "id": "nmdc:0aec70779dcdcc1b577d7e372c0eca87", + "execution_resource": "NERSC-Cori", + "name": "ReadBased Analysis Activity for nmdc:mga0n66h21", + "started_at_time": "2021-10-11T02:28:43Z", + "type": "nmdc:ReadbasedAnalysis", + "ended_at_time": "2021-10-11T05:19:17+00:00", + "output_data_objects": [ + { + "name": "Gp0115668_Gottcha2 TSV report", + "description": "Gottcha2 TSV report for Gp0115668", + "url": "https://data.microbiomedata.org/data/nmdc:mga0n66h21/ReadbasedAnalysis/nmdc_mga0n66h21_gottcha2_report.tsv", + "md5_checksum": "8bdf8bbee24242aaaee763c1d851c05e", + "id": "nmdc:8bdf8bbee24242aaaee763c1d851c05e", + "file_size_bytes": 13875 + }, + { + "name": "Gp0115668_Gottcha2 full TSV report", + "description": "Gottcha2 full TSV report for Gp0115668", + "url": "https://data.microbiomedata.org/data/nmdc:mga0n66h21/ReadbasedAnalysis/nmdc_mga0n66h21_gottcha2_report_full.tsv", + "md5_checksum": "2529ede10eb159148711d016ec022af3", + "id": "nmdc:2529ede10eb159148711d016ec022af3", + "file_size_bytes": 956974 + }, + { + "name": "Gp0115668_Gottcha2 Krona HTML report", + "description": "Gottcha2 Krona HTML report for Gp0115668", + "data_object_type": "GOTTCHA2 Krona Plot", + "url": "https://data.microbiomedata.org/data/nmdc:mga0n66h21/ReadbasedAnalysis/nmdc_mga0n66h21_gottcha2_krona.html", + "md5_checksum": "a0631ed87dc2e7c69355ef575dbe4e60", + "id": "nmdc:a0631ed87dc2e7c69355ef575dbe4e60", + "file_size_bytes": 265076 + }, + { + "name": "Gp0115668_Centrifuge classification TSV report", + "description": "Centrifuge classification TSV report for Gp0115668", + "data_object_type": "Centrifuge Taxonomic Classification", + "url": "https://data.microbiomedata.org/data/nmdc:mga0n66h21/ReadbasedAnalysis/nmdc_mga0n66h21_centrifuge_classification.tsv", + "md5_checksum": "93d26b69073bd4d6283aee3c7e5997d4", + "id": "nmdc:93d26b69073bd4d6283aee3c7e5997d4", + "file_size_bytes": 2377445510 + }, + { + "name": "Gp0115668_Centrifuge TSV report", + "description": "Centrifuge TSV report for Gp0115668", + "data_object_type": "Centrifuge Classification Report", + "url": "https://data.microbiomedata.org/data/nmdc:mga0n66h21/ReadbasedAnalysis/nmdc_mga0n66h21_centrifuge_report.tsv", + "md5_checksum": "d7a49bf0d9797a2b603643a2de896b5c", + "id": "nmdc:d7a49bf0d9797a2b603643a2de896b5c", + "file_size_bytes": 258291 + }, + { + "name": "Gp0115668_Centrifuge Krona HTML report", + "description": "Centrifuge Krona HTML report for Gp0115668", + "data_object_type": "Centrifuge Krona Plot", + "url": "https://data.microbiomedata.org/data/nmdc:mga0n66h21/ReadbasedAnalysis/nmdc_mga0n66h21_centrifuge_krona.html", + "md5_checksum": "890f9f52d828e1ea8277b52566763069", + "id": "nmdc:890f9f52d828e1ea8277b52566763069", + "file_size_bytes": 2333775 + }, + { + "name": "Gp0115668_Kraken classification TSV report", + "description": "Kraken classification TSV report for Gp0115668", + "data_object_type": "Kraken2 Taxonomic Classification", + "url": "https://data.microbiomedata.org/data/nmdc:mga0n66h21/ReadbasedAnalysis/nmdc_mga0n66h21_kraken2_classification.tsv", + "md5_checksum": "371b7fabbcbc2d22c3ca84b422a88863", + "id": "nmdc:371b7fabbcbc2d22c3ca84b422a88863", + "file_size_bytes": 1966520263 + }, + { + "name": "Gp0115668_Kraken2 TSV report", + "description": "Kraken2 TSV report for Gp0115668", + "data_object_type": "Kraken2 Classification Report", + "url": "https://data.microbiomedata.org/data/nmdc:mga0n66h21/ReadbasedAnalysis/nmdc_mga0n66h21_kraken2_report.tsv", + "md5_checksum": "8677985c5e8ad92dd6d051f85950a636", + "id": "nmdc:8677985c5e8ad92dd6d051f85950a636", + "file_size_bytes": 707661 + }, + { + "name": "Gp0115668_Kraken2 Krona HTML report", + "description": "Kraken2 Krona HTML report for Gp0115668", + "data_object_type": "Kraken2 Krona Plot", + "url": "https://data.microbiomedata.org/data/nmdc:mga0n66h21/ReadbasedAnalysis/nmdc_mga0n66h21_kraken2_krona.html", + "md5_checksum": "9b2f355a4c2ff3651a3d1179212e2914", + "id": "nmdc:9b2f355a4c2ff3651a3d1179212e2914", + "file_size_bytes": 4276256 + } + ] + }, + { + "_id": { + "$oid": "61e71a4d833bcf838a7021ce" + }, + "has_input": [ + "nmdc:121b1c25e803f2a010ae5a2206a8d1d2" + ], + "part_of": [ + "nmdc:mga0n66h21" + ], + "git_url": "https://github.com/microbiomedata/metaG/releases/tag/0.1", + "has_output": [ + "nmdc:8bdf8bbee24242aaaee763c1d851c05e", + "nmdc:2529ede10eb159148711d016ec022af3", + "nmdc:a0631ed87dc2e7c69355ef575dbe4e60", + "nmdc:93d26b69073bd4d6283aee3c7e5997d4", + "nmdc:d7a49bf0d9797a2b603643a2de896b5c", + "nmdc:890f9f52d828e1ea8277b52566763069", + "nmdc:371b7fabbcbc2d22c3ca84b422a88863", + "nmdc:8677985c5e8ad92dd6d051f85950a636", + "nmdc:9b2f355a4c2ff3651a3d1179212e2914" + ], + "was_informed_by": "gold:Gp0115668", + "id": "nmdc:0aec70779dcdcc1b577d7e372c0eca87", + "execution_resource": "NERSC-Cori", + "name": "ReadBased Analysis Activity for nmdc:mga0n66h21", + "started_at_time": "2021-10-11T02:28:43Z", + "type": "nmdc:ReadbasedAnalysis", + "ended_at_time": "2021-10-11T05:19:17+00:00", + "output_data_objects": [ + { + "name": "Gp0115668_Gottcha2 TSV report", + "description": "Gottcha2 TSV report for Gp0115668", + "url": "https://data.microbiomedata.org/data/nmdc:mga0n66h21/ReadbasedAnalysis/nmdc_mga0n66h21_gottcha2_report.tsv", + "md5_checksum": "8bdf8bbee24242aaaee763c1d851c05e", + "id": "nmdc:8bdf8bbee24242aaaee763c1d851c05e", + "file_size_bytes": 13875 + }, + { + "name": "Gp0115668_Gottcha2 full TSV report", + "description": "Gottcha2 full TSV report for Gp0115668", + "url": "https://data.microbiomedata.org/data/nmdc:mga0n66h21/ReadbasedAnalysis/nmdc_mga0n66h21_gottcha2_report_full.tsv", + "md5_checksum": "2529ede10eb159148711d016ec022af3", + "id": "nmdc:2529ede10eb159148711d016ec022af3", + "file_size_bytes": 956974 + }, + { + "name": "Gp0115668_Gottcha2 Krona HTML report", + "description": "Gottcha2 Krona HTML report for Gp0115668", + "data_object_type": "GOTTCHA2 Krona Plot", + "url": "https://data.microbiomedata.org/data/nmdc:mga0n66h21/ReadbasedAnalysis/nmdc_mga0n66h21_gottcha2_krona.html", + "md5_checksum": "a0631ed87dc2e7c69355ef575dbe4e60", + "id": "nmdc:a0631ed87dc2e7c69355ef575dbe4e60", + "file_size_bytes": 265076 + }, + { + "name": "Gp0115668_Centrifuge classification TSV report", + "description": "Centrifuge classification TSV report for Gp0115668", + "data_object_type": "Centrifuge Taxonomic Classification", + "url": "https://data.microbiomedata.org/data/nmdc:mga0n66h21/ReadbasedAnalysis/nmdc_mga0n66h21_centrifuge_classification.tsv", + "md5_checksum": "93d26b69073bd4d6283aee3c7e5997d4", + "id": "nmdc:93d26b69073bd4d6283aee3c7e5997d4", + "file_size_bytes": 2377445510 + }, + { + "name": "Gp0115668_Centrifuge TSV report", + "description": "Centrifuge TSV report for Gp0115668", + "data_object_type": "Centrifuge Classification Report", + "url": "https://data.microbiomedata.org/data/nmdc:mga0n66h21/ReadbasedAnalysis/nmdc_mga0n66h21_centrifuge_report.tsv", + "md5_checksum": "d7a49bf0d9797a2b603643a2de896b5c", + "id": "nmdc:d7a49bf0d9797a2b603643a2de896b5c", + "file_size_bytes": 258291 + }, + { + "name": "Gp0115668_Centrifuge Krona HTML report", + "description": "Centrifuge Krona HTML report for Gp0115668", + "data_object_type": "Centrifuge Krona Plot", + "url": "https://data.microbiomedata.org/data/nmdc:mga0n66h21/ReadbasedAnalysis/nmdc_mga0n66h21_centrifuge_krona.html", + "md5_checksum": "890f9f52d828e1ea8277b52566763069", + "id": "nmdc:890f9f52d828e1ea8277b52566763069", + "file_size_bytes": 2333775 + }, + { + "name": "Gp0115668_Kraken classification TSV report", + "description": "Kraken classification TSV report for Gp0115668", + "data_object_type": "Kraken2 Taxonomic Classification", + "url": "https://data.microbiomedata.org/data/nmdc:mga0n66h21/ReadbasedAnalysis/nmdc_mga0n66h21_kraken2_classification.tsv", + "md5_checksum": "371b7fabbcbc2d22c3ca84b422a88863", + "id": "nmdc:371b7fabbcbc2d22c3ca84b422a88863", + "file_size_bytes": 1966520263 + }, + { + "name": "Gp0115668_Kraken2 TSV report", + "description": "Kraken2 TSV report for Gp0115668", + "data_object_type": "Kraken2 Classification Report", + "url": "https://data.microbiomedata.org/data/nmdc:mga0n66h21/ReadbasedAnalysis/nmdc_mga0n66h21_kraken2_report.tsv", + "md5_checksum": "8677985c5e8ad92dd6d051f85950a636", + "id": "nmdc:8677985c5e8ad92dd6d051f85950a636", + "file_size_bytes": 707661 + }, + { + "name": "Gp0115668_Kraken2 Krona HTML report", + "description": "Kraken2 Krona HTML report for Gp0115668", + "data_object_type": "Kraken2 Krona Plot", + "url": "https://data.microbiomedata.org/data/nmdc:mga0n66h21/ReadbasedAnalysis/nmdc_mga0n66h21_kraken2_krona.html", + "md5_checksum": "9b2f355a4c2ff3651a3d1179212e2914", + "id": "nmdc:9b2f355a4c2ff3651a3d1179212e2914", + "file_size_bytes": 4276256 + } + ] + }, + { + "_id": { + "$oid": "649b005f2ca5ee4adb139fc3" + }, + "has_input": [ + "nmdc:121b1c25e803f2a010ae5a2206a8d1d2" + ], + "part_of": [ + "nmdc:mga0n66h21" + ], + "ctg_logsum": 489108, + "scaf_logsum": 491574, + "gap_pct": 0.00308, + "git_url": "https://github.com/microbiomedata/metaG/releases/tag/0.1", + "has_output": [ + "nmdc:b2b862aede4f333acec79aac3afc7254", + "nmdc:15d4494dad1e12523aa9afb56b1e7cdb", + "nmdc:6ccb798d615b67dfb9c64ff32d6586c4", + "nmdc:da27801a4e0ab450485f5a3aeb75a7d6", + "nmdc:f7a4bb0be4599b544360617190b45681" + ], + "asm_score": 4.087, + "was_informed_by": "gold:Gp0115668", + "ctg_powsum": 53542, + "scaf_max": 53286, + "id": "nmdc:0aec70779dcdcc1b577d7e372c0eca87", + "scaf_powsum": 53839, + "execution_resource": "NERSC-Cori", + "contigs": 323269, + "name": "Assembly Activity for nmdc:mga0n66h21", + "ctg_max": 53286, + "gc_std": 0.10793, + "contig_bp": 169601906, + "gc_avg": 0.39548, + "started_at_time": "2021-10-11T02:28:43Z", + "scaf_bp": 169607136, + "type": "nmdc:MetagenomeAssembly", + "scaffolds": 322890, + "ended_at_time": "2021-10-11T05:19:17+00:00", + "ctg_l50": 525, + "ctg_l90": 299, + "ctg_n50": 83667, + "ctg_n90": 263711, + "scaf_l50": 526, + "scaf_l90": 299, + "scaf_n50": 83307, + "scaf_n90": 263381, + "scaf_l_gt50k": 53286, + "scaf_n_gt50k": 1, + "scaf_pct_gt50k": 0.03141731, + "output_data_objects": [ + { + "name": "Gp0115668_Assembled contigs fasta", + "description": "Assembled contigs fasta for Gp0115668", + "data_object_type": "Assembly Contigs", + "url": "https://data.microbiomedata.org/data/nmdc:mga0n66h21/assembly/nmdc_mga0n66h21_contigs.fna", + "md5_checksum": "b2b862aede4f333acec79aac3afc7254", + "id": "nmdc:b2b862aede4f333acec79aac3afc7254", + "file_size_bytes": 182488593 + }, + { + "name": "Gp0115668_Assembled scaffold fasta", + "description": "Assembled scaffold fasta for Gp0115668", + "data_object_type": "Assembly Scaffolds", + "url": "https://data.microbiomedata.org/data/nmdc:mga0n66h21/assembly/nmdc_mga0n66h21_scaffolds.fna", + "md5_checksum": "15d4494dad1e12523aa9afb56b1e7cdb", + "id": "nmdc:15d4494dad1e12523aa9afb56b1e7cdb", + "file_size_bytes": 181514952 + }, + { + "name": "Gp0115668_Metagenome Contig Coverage Stats", + "description": "Metagenome Contig Coverage Stats for Gp0115668", + "url": "https://data.microbiomedata.org/data/nmdc:mga0n66h21/assembly/nmdc_mga0n66h21_covstats.txt", + "md5_checksum": "6ccb798d615b67dfb9c64ff32d6586c4", + "id": "nmdc:6ccb798d615b67dfb9c64ff32d6586c4", + "file_size_bytes": 25682298 + }, + { + "name": "Gp0115668_Assembled AGP file", + "description": "Assembled AGP file for Gp0115668", + "data_object_type": "Assembly AGP", + "url": "https://data.microbiomedata.org/data/nmdc:mga0n66h21/assembly/nmdc_mga0n66h21_assembly.agp", + "md5_checksum": "da27801a4e0ab450485f5a3aeb75a7d6", + "id": "nmdc:da27801a4e0ab450485f5a3aeb75a7d6", + "file_size_bytes": 24103161 + }, + { + "name": "Gp0115668_Metagenome Alignment BAM file", + "description": "Metagenome Alignment BAM file for Gp0115668", + "data_object_type": "Assembly Coverage BAM", + "url": "https://data.microbiomedata.org/data/nmdc:mga0n66h21/assembly/nmdc_mga0n66h21_pairedMapped_sorted.bam", + "md5_checksum": "f7a4bb0be4599b544360617190b45681", + "id": "nmdc:f7a4bb0be4599b544360617190b45681", + "file_size_bytes": 2958311801 + } + ] + }, + { + "_id": { + "$oid": "649b005bbf2caae0415ef9d8" + }, + "has_input": [ + "nmdc:b2b862aede4f333acec79aac3afc7254" + ], + "part_of": [ + "nmdc:mga0n66h21" + ], + "git_url": "https://github.com/microbiomedata/metaG/releases/tag/0.1", + "has_output": [ + "nmdc:5cb6273cd171d1ae5a8d77c8f131517f", + "nmdc:d49149a48134c1091c001448cc91f8e2", + "nmdc:7a861805138d425525f298c1790b58ed", + "nmdc:0d0a80f2dafb68f4659709dd2ebd2f28", + "nmdc:a8b689fdef54bf7235532de634cf553e", + "nmdc:017daaa53039bc1135ca8f013596eb14", + "nmdc:e3eb963d76dc6bdc54756cfa80977611", + "nmdc:ab1d561046fbe146ac418e4ed822e861", + "nmdc:51054c4da9edc391b03418b5f9327815", + "nmdc:335576d20d4f5c061a875529cbe9572c", + "nmdc:6c5387ac5acb8b340a2c2a9e17e62bae", + "nmdc:eea36326caba5baa0536ac2f5e36d497" + ], + "was_informed_by": "gold:Gp0115668", + "id": "nmdc:0aec70779dcdcc1b577d7e372c0eca87", + "execution_resource": "NERSC-Cori", + "name": "Annotation Activity for nmdc:mga0n66h21", + "started_at_time": "2021-10-11T02:28:43Z", + "type": "nmdc:MetagenomeAnnotationActivity", + "ended_at_time": "2021-10-11T05:19:17+00:00", + "output_data_objects": [ + { + "name": "Gp0115668_Protein FAA", + "description": "Protein FAA for Gp0115668", + "data_object_type": "Annotation Amino Acid FASTA", + "url": "https://data.microbiomedata.org/data/nmdc:mga0n66h21/annotation/nmdc_mga0n66h21_proteins.faa", + "md5_checksum": "5cb6273cd171d1ae5a8d77c8f131517f", + "id": "nmdc:5cb6273cd171d1ae5a8d77c8f131517f", + "file_size_bytes": 88016165 + }, + { + "name": "Gp0115668_Structural annotation GFF file", + "description": "Structural annotation GFF file for Gp0115668", + "data_object_type": "Structural Annotation GFF", + "url": "https://data.microbiomedata.org/data/nmdc:mga0n66h21/annotation/nmdc_mga0n66h21_structural_annotation.gff", + "md5_checksum": "d49149a48134c1091c001448cc91f8e2", + "id": "nmdc:d49149a48134c1091c001448cc91f8e2", + "file_size_bytes": 2527 + }, + { + "name": "Gp0115668_Functional annotation GFF file", + "description": "Functional annotation GFF file for Gp0115668", + "data_object_type": "Functional Annotation GFF", + "url": "https://data.microbiomedata.org/data/nmdc:mga0n66h21/annotation/nmdc_mga0n66h21_functional_annotation.gff", + "md5_checksum": "7a861805138d425525f298c1790b58ed", + "id": "nmdc:7a861805138d425525f298c1790b58ed", + "file_size_bytes": 91926507 + }, + { + "name": "Gp0115668_KO TSV file", + "description": "KO TSV file for Gp0115668", + "data_object_type": "Annotation KEGG Orthology", + "url": "https://data.microbiomedata.org/data/nmdc:mga0n66h21/annotation/nmdc_mga0n66h21_ko.tsv", + "md5_checksum": "0d0a80f2dafb68f4659709dd2ebd2f28", + "id": "nmdc:0d0a80f2dafb68f4659709dd2ebd2f28", + "file_size_bytes": 6651856 + }, + { + "name": "Gp0115668_EC TSV file", + "description": "EC TSV file for Gp0115668", + "data_object_type": "Annotation Enzyme Commission", + "url": "https://data.microbiomedata.org/data/nmdc:mga0n66h21/annotation/nmdc_mga0n66h21_ec.tsv", + "md5_checksum": "a8b689fdef54bf7235532de634cf553e", + "id": "nmdc:a8b689fdef54bf7235532de634cf553e", + "file_size_bytes": 4156019 + }, + { + "name": "Gp0115668_COG GFF file", + "description": "COG GFF file for Gp0115668", + "url": "https://data.microbiomedata.org/data/nmdc:mga0n66h21/annotation/nmdc_mga0n66h21_cog.gff", + "md5_checksum": "017daaa53039bc1135ca8f013596eb14", + "id": "nmdc:017daaa53039bc1135ca8f013596eb14", + "file_size_bytes": 33686729 + }, + { + "name": "Gp0115668_PFAM GFF file", + "description": "PFAM GFF file for Gp0115668", + "url": "https://data.microbiomedata.org/data/nmdc:mga0n66h21/annotation/nmdc_mga0n66h21_pfam.gff", + "md5_checksum": "e3eb963d76dc6bdc54756cfa80977611", + "id": "nmdc:e3eb963d76dc6bdc54756cfa80977611", + "file_size_bytes": 29534588 + }, + { + "name": "Gp0115668_TigrFam GFF file", + "description": "TigrFam GFF file for Gp0115668", + "url": "https://data.microbiomedata.org/data/nmdc:mga0n66h21/annotation/nmdc_mga0n66h21_tigrfam.gff", + "md5_checksum": "ab1d561046fbe146ac418e4ed822e861", + "id": "nmdc:ab1d561046fbe146ac418e4ed822e861", + "file_size_bytes": 2596288 + }, + { + "name": "Gp0115668_SMART GFF file", + "description": "SMART GFF file for Gp0115668", + "url": "https://data.microbiomedata.org/data/nmdc:mga0n66h21/annotation/nmdc_mga0n66h21_smart.gff", + "md5_checksum": "51054c4da9edc391b03418b5f9327815", + "id": "nmdc:51054c4da9edc391b03418b5f9327815", + "file_size_bytes": 18133874 + }, + { + "name": "Gp0115668_SuperFam GFF file", + "description": "SuperFam GFF file for Gp0115668", + "url": "https://data.microbiomedata.org/data/nmdc:mga0n66h21/annotation/nmdc_mga0n66h21_supfam.gff", + "md5_checksum": "335576d20d4f5c061a875529cbe9572c", + "id": "nmdc:335576d20d4f5c061a875529cbe9572c", + "file_size_bytes": 61337132 + }, + { + "name": "Gp0115668_Cath FunFam GFF file", + "description": "Cath FunFam GFF file for Gp0115668", + "url": "https://data.microbiomedata.org/data/nmdc:mga0n66h21/annotation/nmdc_mga0n66h21_cath_funfam.gff", + "md5_checksum": "6c5387ac5acb8b340a2c2a9e17e62bae", + "id": "nmdc:6c5387ac5acb8b340a2c2a9e17e62bae", + "file_size_bytes": 52005922 + }, + { + "name": "Gp0115668_KO_EC GFF file", + "description": "KO_EC GFF file for Gp0115668", + "url": "https://data.microbiomedata.org/data/nmdc:mga0n66h21/annotation/nmdc_mga0n66h21_ko_ec.gff", + "md5_checksum": "eea36326caba5baa0536ac2f5e36d497", + "id": "nmdc:eea36326caba5baa0536ac2f5e36d497", + "file_size_bytes": 21150415 + } + ] + }, + { + "_id": { + "$oid": "649b0052ec087f6bbab34736" + }, + "has_input": [ + "nmdc:b2b862aede4f333acec79aac3afc7254", + "nmdc:f7a4bb0be4599b544360617190b45681", + "nmdc:7a861805138d425525f298c1790b58ed" + ], + "too_short_contig_num": 297764, + "part_of": [ + "nmdc:mga0n66h21" + ], + "binned_contig_num": 1669, + "git_url": "https://github.com/microbiomedata/metaG/releases/tag/0.1", + "has_output": [ + "nmdc:d41d8cd98f00b204e9800998ecf8427e", + "nmdc:8c05fc754583d51714bc1aa81396e59d", + "nmdc:9ef1be5df79aee7c64f2addc4bda6afa", + "nmdc:60db1474ee6a099c10e4fdc728420cf8", + "nmdc:5a36d8ba758ee510ab2be3e01fda3e0f", + "nmdc:3f4c7c98bb94687eb96382799c8626fe" + ], + "was_informed_by": "gold:Gp0115668", + "input_contig_num": 323261, + "id": "nmdc:0aec70779dcdcc1b577d7e372c0eca87", + "execution_resource": "NERSC-Cori", + "name": "MAGs Analysis Activity for nmdc:mga0n66h21", + "mags_list": [ + { + "number_of_contig": 1013, + "completeness": 12.29, + "bin_name": "bins.1", + "gene_count": 4188, + "bin_quality": "LQ", + "gtdbtk_species": "", + "gtdbtk_order": "", + "num_16s": 0, + "gtdbtk_family": "", + "gtdbtk_domain": "", + "contamination": 2.32, + "gtdbtk_class": "", + "gtdbtk_phylum": "", + "num_5s": 0, + "num_23s": 0, + "gtdbtk_genus": "", + "num_t_rna": 20 + }, + { + "number_of_contig": 599, + "completeness": 58.72, + "bin_name": "bins.2", + "gene_count": 2940, + "bin_quality": "LQ", + "gtdbtk_species": "", + "gtdbtk_order": "", + "num_16s": 1, + "gtdbtk_family": "", + "gtdbtk_domain": "", + "contamination": 12.95, + "gtdbtk_class": "", + "gtdbtk_phylum": "", + "num_5s": 0, + "num_23s": 1, + "gtdbtk_genus": "", + "num_t_rna": 25 + }, + { + "number_of_contig": 57, + "completeness": 4.0, + "bin_name": "bins.3", + "gene_count": 258, + "bin_quality": "LQ", + "gtdbtk_species": "", + "gtdbtk_order": "", + "num_16s": 0, + "gtdbtk_family": "", + "gtdbtk_domain": "", + "contamination": 0.03, + "gtdbtk_class": "", + "gtdbtk_phylum": "", + "num_5s": 0, + "num_23s": 0, + "gtdbtk_genus": "", + "num_t_rna": 1 + } + ], + "unbinned_contig_num": 23828, + "started_at_time": "2021-10-11T02:28:43Z", + "type": "nmdc:MAGsAnalysisActivity", + "ended_at_time": "2021-10-11T05:19:17+00:00", + "output_data_objects": [ + { + "name": "gold:Gp0452679_metabat2 bin checkm quality assessment result", + "description": "metabat2 bin checkm quality assessment result for gold:Gp0452679", + "data_object_type": "CheckM Statistics", + "url": "https://data.microbiomedata.org/data/nmdc:mga0fsjy84/MAGs/nmdc_mga0fsjy84_checkm_qa.out", + "md5_checksum": "d41d8cd98f00b204e9800998ecf8427e", + "id": "nmdc:d41d8cd98f00b204e9800998ecf8427e", + "file_size_bytes": 0 + }, + { + "name": "Gp0115668_tooShort (< 3kb) filtered contigs fasta file by metaBat2", + "description": "tooShort (< 3kb) filtered contigs fasta file by metaBat2 for Gp0115668", + "url": "https://data.microbiomedata.org/data/nmdc:mga0n66h21/MAGs/nmdc_mga0n66h21_bins.tooShort.fa", + "md5_checksum": "8c05fc754583d51714bc1aa81396e59d", + "id": "nmdc:8c05fc754583d51714bc1aa81396e59d", + "file_size_bytes": 136315210 + }, + { + "name": "Gp0115668_unbinned fasta file from metabat2", + "description": "unbinned fasta file from metabat2 for Gp0115668", + "url": "https://data.microbiomedata.org/data/nmdc:mga0n66h21/MAGs/nmdc_mga0n66h21_bins.unbinned.fa", + "md5_checksum": "9ef1be5df79aee7c64f2addc4bda6afa", + "id": "nmdc:9ef1be5df79aee7c64f2addc4bda6afa", + "file_size_bytes": 39131745 + }, + { + "name": "Gp0115668_metabat2 bin checkm quality assessment result", + "description": "metabat2 bin checkm quality assessment result for Gp0115668", + "data_object_type": "CheckM Statistics", + "url": "https://data.microbiomedata.org/data/nmdc:mga0n66h21/MAGs/nmdc_mga0n66h21_checkm_qa.out", + "md5_checksum": "60db1474ee6a099c10e4fdc728420cf8", + "id": "nmdc:60db1474ee6a099c10e4fdc728420cf8", + "file_size_bytes": 1176 + }, + { + "name": "Gp0115668_high-quality and medium-quality bins", + "description": "high-quality and medium-quality bins for Gp0115668", + "data_object_type": "Metagenome Bins", + "url": "https://data.microbiomedata.org/data/nmdc:mga0n66h21/MAGs/nmdc_mga0n66h21_hqmq_bin.zip", + "md5_checksum": "5a36d8ba758ee510ab2be3e01fda3e0f", + "id": "nmdc:5a36d8ba758ee510ab2be3e01fda3e0f", + "file_size_bytes": 182 + }, + { + "name": "Gp0115668_metabat2 bins", + "description": "metabat2 bins for Gp0115668", + "url": "https://data.microbiomedata.org/data/nmdc:mga0n66h21/MAGs/nmdc_mga0n66h21_metabat_bin.zip", + "md5_checksum": "3f4c7c98bb94687eb96382799c8626fe", + "id": "nmdc:3f4c7c98bb94687eb96382799c8626fe", + "file_size_bytes": 2145953 + } + ] + } + ] + }, + { + "_id": { + "$oid": "649b009773e8249959349b36" + }, + "id": "nmdc:omprc-11-hymrq852", + "name": "Sand microcosm microbial communities from a hyporheic zone in Columbia River, Washington, USA - GW-RW T4_4-Nov-14", + "description": "Sterilized sand packs were incubated back in the ground and collected at time point T4.", + "has_input": [ + "nmdc:bsm-11-11219w54" + ], + "has_output": [ + "jgi:55a9cb010d87852b21508920" + ], + "part_of": [ + "nmdc:sty-11-aygzgv51" + ], + "add_date": "2015-05-28", + "mod_date": "2021-06-15", + "ncbi_project_name": "Sand microcosm microbial communities from a hyporheic zone in Columbia River, Washington, USA - GW-RW T4_4-Nov-14", + "omics_type": { + "has_raw_value": "Metagenome" + }, + "principal_investigator": { + "has_raw_value": "James Stegen" + }, + "processing_institution": "JGI", + "type": "nmdc:OmicsProcessing", + "gold_sequencing_project_identifiers": [ + "gold:Gp0115679" + ], + "downstream_workflow_activity_records": [ + { + "_id": { + "$oid": "649b009d6bdd4fd20273c884" + }, + "has_input": [ + "nmdc:3bf389b767cf8a49224dc0028e55eeb7" + ], + "part_of": [ + "nmdc:mga0gg1q48" + ], + "git_url": "https://github.com/microbiomedata/metaG/releases/tag/0.1", + "has_output": [ + "nmdc:7e294ff66cb7ddf84edf9c8bed576bcd", + "nmdc:08e2a96f7aaaff5ff6f747cfe6f49e49" + ], + "was_informed_by": "gold:Gp0115679", + "input_read_count": 67696542, + "output_read_bases": 9825387057, + "id": "nmdc:8bc23e2bd69c8af5538db3d6192a3b5b", + "execution_resource": "NERSC-Cori", + "input_read_bases": 10222177842, + "name": "Read QC Activity for nmdc:mga0gg1q48", + "output_read_count": 67147510, + "started_at_time": "2021-10-11T02:23:30Z", + "type": "nmdc:ReadQCAnalysisActivity", + "ended_at_time": "2021-10-11T06:30:42+00:00", + "output_data_objects": [ + { + "name": "Gp0115679_Filtered Reads", + "description": "Filtered Reads for Gp0115679", + "data_object_type": "Filtered Sequencing Reads", + "url": "https://data.microbiomedata.org/data/nmdc:mga0gg1q48/qa/nmdc_mga0gg1q48_filtered.fastq.gz", + "md5_checksum": "7e294ff66cb7ddf84edf9c8bed576bcd", + "id": "nmdc:7e294ff66cb7ddf84edf9c8bed576bcd", + "file_size_bytes": 5673282665 + }, + { + "name": "Gp0115679_Filtered Stats", + "description": "Filtered Stats for Gp0115679", + "data_object_type": "QC Statistics", + "url": "https://data.microbiomedata.org/data/nmdc:mga0gg1q48/qa/nmdc_mga0gg1q48_filterStats.txt", + "md5_checksum": "08e2a96f7aaaff5ff6f747cfe6f49e49", + "id": "nmdc:08e2a96f7aaaff5ff6f747cfe6f49e49", + "file_size_bytes": 276 + } + ] + }, + { + "_id": { + "$oid": "649b009bff710ae353f8cf4b" + }, + "has_input": [ + "nmdc:7e294ff66cb7ddf84edf9c8bed576bcd" + ], + "git_url": "https://github.com/microbiomedata/metaG/releases/tag/0.1", + "has_output": [ + "nmdc:e20f8c00473472fa073adde871860801", + "nmdc:52f8c91d04e8d179af98e7fac35a8ff1", + "nmdc:f721d9dd168b0dea080b191a4396167e", + "nmdc:ab77e396ec643b58b54da92848b88a96", + "nmdc:f2514844e47a9e3d268671f80f152bc1", + "nmdc:a3e49f39f33c54bc8d9430a947cd4b16", + "nmdc:17bc87145b0dcabbb8e3de0f393f4d4d", + "nmdc:aecb320fdfe4c4da35c0206dd34e0f40", + "nmdc:77860ee043ae9738e7702a3f665b15fa" + ], + "was_informed_by": "gold:Gp0115679", + "id": "nmdc:8bc23e2bd69c8af5538db3d6192a3b5b", + "execution_resource": "NERSC-Cori", + "name": "ReadBased Analysis Activity for nmdc:mga0gg1q48", + "started_at_time": "2021-10-11T02:23:30Z", + "type": "nmdc:ReadbasedAnalysis", + "ended_at_time": "2021-10-11T06:30:42+00:00", + "output_data_objects": [ + { + "name": "Gp0115679_Gottcha2 TSV report", + "description": "Gottcha2 TSV report for Gp0115679", + "url": "https://data.microbiomedata.org/data/nmdc:mga0gg1q48/ReadbasedAnalysis/nmdc_mga0gg1q48_gottcha2_report.tsv", + "md5_checksum": "e20f8c00473472fa073adde871860801", + "id": "nmdc:e20f8c00473472fa073adde871860801", + "file_size_bytes": 18551 + }, + { + "name": "Gp0115679_Gottcha2 full TSV report", + "description": "Gottcha2 full TSV report for Gp0115679", + "url": "https://data.microbiomedata.org/data/nmdc:mga0gg1q48/ReadbasedAnalysis/nmdc_mga0gg1q48_gottcha2_report_full.tsv", + "md5_checksum": "52f8c91d04e8d179af98e7fac35a8ff1", + "id": "nmdc:52f8c91d04e8d179af98e7fac35a8ff1", + "file_size_bytes": 1200541 + }, + { + "name": "Gp0115679_Gottcha2 Krona HTML report", + "description": "Gottcha2 Krona HTML report for Gp0115679", + "data_object_type": "GOTTCHA2 Krona Plot", + "url": "https://data.microbiomedata.org/data/nmdc:mga0gg1q48/ReadbasedAnalysis/nmdc_mga0gg1q48_gottcha2_krona.html", + "md5_checksum": "f721d9dd168b0dea080b191a4396167e", + "id": "nmdc:f721d9dd168b0dea080b191a4396167e", + "file_size_bytes": 278990 + }, + { + "name": "Gp0115679_Centrifuge classification TSV report", + "description": "Centrifuge classification TSV report for Gp0115679", + "data_object_type": "Centrifuge Taxonomic Classification", + "url": "https://data.microbiomedata.org/data/nmdc:mga0gg1q48/ReadbasedAnalysis/nmdc_mga0gg1q48_centrifuge_classification.tsv", + "md5_checksum": "ab77e396ec643b58b54da92848b88a96", + "id": "nmdc:ab77e396ec643b58b54da92848b88a96", + "file_size_bytes": 4742886512 + }, + { + "name": "Gp0115679_Centrifuge TSV report", + "description": "Centrifuge TSV report for Gp0115679", + "data_object_type": "Centrifuge Classification Report", + "url": "https://data.microbiomedata.org/data/nmdc:mga0gg1q48/ReadbasedAnalysis/nmdc_mga0gg1q48_centrifuge_report.tsv", + "md5_checksum": "f2514844e47a9e3d268671f80f152bc1", + "id": "nmdc:f2514844e47a9e3d268671f80f152bc1", + "file_size_bytes": 266907 + }, + { + "name": "Gp0115679_Centrifuge Krona HTML report", + "description": "Centrifuge Krona HTML report for Gp0115679", + "data_object_type": "Centrifuge Krona Plot", + "url": "https://data.microbiomedata.org/data/nmdc:mga0gg1q48/ReadbasedAnalysis/nmdc_mga0gg1q48_centrifuge_krona.html", + "md5_checksum": "a3e49f39f33c54bc8d9430a947cd4b16", + "id": "nmdc:a3e49f39f33c54bc8d9430a947cd4b16", + "file_size_bytes": 2359747 + }, + { + "name": "Gp0115679_Kraken classification TSV report", + "description": "Kraken classification TSV report for Gp0115679", + "data_object_type": "Kraken2 Taxonomic Classification", + "url": "https://data.microbiomedata.org/data/nmdc:mga0gg1q48/ReadbasedAnalysis/nmdc_mga0gg1q48_kraken2_classification.tsv", + "md5_checksum": "17bc87145b0dcabbb8e3de0f393f4d4d", + "id": "nmdc:17bc87145b0dcabbb8e3de0f393f4d4d", + "file_size_bytes": 3859620862 + }, + { + "name": "Gp0115679_Kraken2 TSV report", + "description": "Kraken2 TSV report for Gp0115679", + "data_object_type": "Kraken2 Classification Report", + "url": "https://data.microbiomedata.org/data/nmdc:mga0gg1q48/ReadbasedAnalysis/nmdc_mga0gg1q48_kraken2_report.tsv", + "md5_checksum": "aecb320fdfe4c4da35c0206dd34e0f40", + "id": "nmdc:aecb320fdfe4c4da35c0206dd34e0f40", + "file_size_bytes": 729541 + }, + { + "name": "Gp0115679_Kraken2 Krona HTML report", + "description": "Kraken2 Krona HTML report for Gp0115679", + "data_object_type": "Kraken2 Krona Plot", + "url": "https://data.microbiomedata.org/data/nmdc:mga0gg1q48/ReadbasedAnalysis/nmdc_mga0gg1q48_kraken2_krona.html", + "md5_checksum": "77860ee043ae9738e7702a3f665b15fa", + "id": "nmdc:77860ee043ae9738e7702a3f665b15fa", + "file_size_bytes": 4358324 + } + ] + }, + { + "_id": { + "$oid": "61e719fa833bcf838a701935" + }, + "has_input": [ + "nmdc:7e294ff66cb7ddf84edf9c8bed576bcd" + ], + "part_of": [ + "nmdc:mga0gg1q48" + ], + "git_url": "https://github.com/microbiomedata/metaG/releases/tag/0.1", + "has_output": [ + "nmdc:e20f8c00473472fa073adde871860801", + "nmdc:52f8c91d04e8d179af98e7fac35a8ff1", + "nmdc:f721d9dd168b0dea080b191a4396167e", + "nmdc:ab77e396ec643b58b54da92848b88a96", + "nmdc:f2514844e47a9e3d268671f80f152bc1", + "nmdc:a3e49f39f33c54bc8d9430a947cd4b16", + "nmdc:17bc87145b0dcabbb8e3de0f393f4d4d", + "nmdc:aecb320fdfe4c4da35c0206dd34e0f40", + "nmdc:77860ee043ae9738e7702a3f665b15fa" + ], + "was_informed_by": "gold:Gp0115679", + "id": "nmdc:8bc23e2bd69c8af5538db3d6192a3b5b", + "execution_resource": "NERSC-Cori", + "name": "ReadBased Analysis Activity for nmdc:mga0gg1q48", + "started_at_time": "2021-10-11T02:23:30Z", + "type": "nmdc:ReadbasedAnalysis", + "ended_at_time": "2021-10-11T06:30:42+00:00", + "output_data_objects": [ + { + "name": "Gp0115679_Gottcha2 TSV report", + "description": "Gottcha2 TSV report for Gp0115679", + "url": "https://data.microbiomedata.org/data/nmdc:mga0gg1q48/ReadbasedAnalysis/nmdc_mga0gg1q48_gottcha2_report.tsv", + "md5_checksum": "e20f8c00473472fa073adde871860801", + "id": "nmdc:e20f8c00473472fa073adde871860801", + "file_size_bytes": 18551 + }, + { + "name": "Gp0115679_Gottcha2 full TSV report", + "description": "Gottcha2 full TSV report for Gp0115679", + "url": "https://data.microbiomedata.org/data/nmdc:mga0gg1q48/ReadbasedAnalysis/nmdc_mga0gg1q48_gottcha2_report_full.tsv", + "md5_checksum": "52f8c91d04e8d179af98e7fac35a8ff1", + "id": "nmdc:52f8c91d04e8d179af98e7fac35a8ff1", + "file_size_bytes": 1200541 + }, + { + "name": "Gp0115679_Gottcha2 Krona HTML report", + "description": "Gottcha2 Krona HTML report for Gp0115679", + "data_object_type": "GOTTCHA2 Krona Plot", + "url": "https://data.microbiomedata.org/data/nmdc:mga0gg1q48/ReadbasedAnalysis/nmdc_mga0gg1q48_gottcha2_krona.html", + "md5_checksum": "f721d9dd168b0dea080b191a4396167e", + "id": "nmdc:f721d9dd168b0dea080b191a4396167e", + "file_size_bytes": 278990 + }, + { + "name": "Gp0115679_Centrifuge classification TSV report", + "description": "Centrifuge classification TSV report for Gp0115679", + "data_object_type": "Centrifuge Taxonomic Classification", + "url": "https://data.microbiomedata.org/data/nmdc:mga0gg1q48/ReadbasedAnalysis/nmdc_mga0gg1q48_centrifuge_classification.tsv", + "md5_checksum": "ab77e396ec643b58b54da92848b88a96", + "id": "nmdc:ab77e396ec643b58b54da92848b88a96", + "file_size_bytes": 4742886512 + }, + { + "name": "Gp0115679_Centrifuge TSV report", + "description": "Centrifuge TSV report for Gp0115679", + "data_object_type": "Centrifuge Classification Report", + "url": "https://data.microbiomedata.org/data/nmdc:mga0gg1q48/ReadbasedAnalysis/nmdc_mga0gg1q48_centrifuge_report.tsv", + "md5_checksum": "f2514844e47a9e3d268671f80f152bc1", + "id": "nmdc:f2514844e47a9e3d268671f80f152bc1", + "file_size_bytes": 266907 + }, + { + "name": "Gp0115679_Centrifuge Krona HTML report", + "description": "Centrifuge Krona HTML report for Gp0115679", + "data_object_type": "Centrifuge Krona Plot", + "url": "https://data.microbiomedata.org/data/nmdc:mga0gg1q48/ReadbasedAnalysis/nmdc_mga0gg1q48_centrifuge_krona.html", + "md5_checksum": "a3e49f39f33c54bc8d9430a947cd4b16", + "id": "nmdc:a3e49f39f33c54bc8d9430a947cd4b16", + "file_size_bytes": 2359747 + }, + { + "name": "Gp0115679_Kraken classification TSV report", + "description": "Kraken classification TSV report for Gp0115679", + "data_object_type": "Kraken2 Taxonomic Classification", + "url": "https://data.microbiomedata.org/data/nmdc:mga0gg1q48/ReadbasedAnalysis/nmdc_mga0gg1q48_kraken2_classification.tsv", + "md5_checksum": "17bc87145b0dcabbb8e3de0f393f4d4d", + "id": "nmdc:17bc87145b0dcabbb8e3de0f393f4d4d", + "file_size_bytes": 3859620862 + }, + { + "name": "Gp0115679_Kraken2 TSV report", + "description": "Kraken2 TSV report for Gp0115679", + "data_object_type": "Kraken2 Classification Report", + "url": "https://data.microbiomedata.org/data/nmdc:mga0gg1q48/ReadbasedAnalysis/nmdc_mga0gg1q48_kraken2_report.tsv", + "md5_checksum": "aecb320fdfe4c4da35c0206dd34e0f40", + "id": "nmdc:aecb320fdfe4c4da35c0206dd34e0f40", + "file_size_bytes": 729541 + }, + { + "name": "Gp0115679_Kraken2 Krona HTML report", + "description": "Kraken2 Krona HTML report for Gp0115679", + "data_object_type": "Kraken2 Krona Plot", + "url": "https://data.microbiomedata.org/data/nmdc:mga0gg1q48/ReadbasedAnalysis/nmdc_mga0gg1q48_kraken2_krona.html", + "md5_checksum": "77860ee043ae9738e7702a3f665b15fa", + "id": "nmdc:77860ee043ae9738e7702a3f665b15fa", + "file_size_bytes": 4358324 + } + ] + }, + { + "_id": { + "$oid": "649b005f2ca5ee4adb139fbd" + }, + "has_input": [ + "nmdc:7e294ff66cb7ddf84edf9c8bed576bcd" + ], + "part_of": [ + "nmdc:mga0gg1q48" + ], + "ctg_logsum": 682158, + "scaf_logsum": 725191, + "gap_pct": 0.02692, + "git_url": "https://github.com/microbiomedata/metaG/releases/tag/0.1", + "has_output": [ + "nmdc:e4314c3743795e0be8beda8b7f806557", + "nmdc:2a288a5827b66c88f8abf202bbe37aab", + "nmdc:a51c7b3a70601a885594936fd6c753bc", + "nmdc:8851d6fed8e5bbee88aeb7af77bbcfe3", + "nmdc:002ed5f389b8a13735d27a8741290f6b" + ], + "asm_score": 12.582, + "was_informed_by": "gold:Gp0115679", + "ctg_powsum": 84136, + "scaf_max": 884972, + "id": "nmdc:8bc23e2bd69c8af5538db3d6192a3b5b", + "scaf_powsum": 89882, + "execution_resource": "NERSC-Cori", + "contigs": 531791, + "name": "Assembly Activity for nmdc:mga0gg1q48", + "ctg_max": 719201, + "gc_std": 0.09689, + "contig_bp": 254202396, + "gc_avg": 0.48697, + "started_at_time": "2021-10-11T02:23:30Z", + "scaf_bp": 254270837, + "type": "nmdc:MetagenomeAssembly", + "scaffolds": 525116, + "ended_at_time": "2021-10-11T06:30:42+00:00", + "ctg_l50": 449, + "ctg_l90": 285, + "ctg_n50": 139317, + "ctg_n90": 451813, + "scaf_l50": 455, + "scaf_l90": 285, + "scaf_n50": 133535, + "scaf_n90": 445430, + "scaf_l_gt50k": 3540548, + "scaf_n_gt50k": 34, + "scaf_pct_gt50k": 1.3924317, + "output_data_objects": [ + { + "name": "Gp0115679_Assembled contigs fasta", + "description": "Assembled contigs fasta for Gp0115679", + "data_object_type": "Assembly Contigs", + "url": "https://data.microbiomedata.org/data/nmdc:mga0gg1q48/assembly/nmdc_mga0gg1q48_contigs.fna", + "md5_checksum": "e4314c3743795e0be8beda8b7f806557", + "id": "nmdc:e4314c3743795e0be8beda8b7f806557", + "file_size_bytes": 275030840 + }, + { + "name": "Gp0115679_Assembled scaffold fasta", + "description": "Assembled scaffold fasta for Gp0115679", + "data_object_type": "Assembly Scaffolds", + "url": "https://data.microbiomedata.org/data/nmdc:mga0gg1q48/assembly/nmdc_mga0gg1q48_scaffolds.fna", + "md5_checksum": "2a288a5827b66c88f8abf202bbe37aab", + "id": "nmdc:2a288a5827b66c88f8abf202bbe37aab", + "file_size_bytes": 273327529 + }, + { + "name": "Gp0115679_Metagenome Contig Coverage Stats", + "description": "Metagenome Contig Coverage Stats for Gp0115679", + "url": "https://data.microbiomedata.org/data/nmdc:mga0gg1q48/assembly/nmdc_mga0gg1q48_covstats.txt", + "md5_checksum": "a51c7b3a70601a885594936fd6c753bc", + "id": "nmdc:a51c7b3a70601a885594936fd6c753bc", + "file_size_bytes": 42368790 + }, + { + "name": "Gp0115679_Assembled AGP file", + "description": "Assembled AGP file for Gp0115679", + "data_object_type": "Assembly AGP", + "url": "https://data.microbiomedata.org/data/nmdc:mga0gg1q48/assembly/nmdc_mga0gg1q48_assembly.agp", + "md5_checksum": "8851d6fed8e5bbee88aeb7af77bbcfe3", + "id": "nmdc:8851d6fed8e5bbee88aeb7af77bbcfe3", + "file_size_bytes": 40232148 + }, + { + "name": "Gp0115679_Metagenome Alignment BAM file", + "description": "Metagenome Alignment BAM file for Gp0115679", + "data_object_type": "Assembly Coverage BAM", + "url": "https://data.microbiomedata.org/data/nmdc:mga0gg1q48/assembly/nmdc_mga0gg1q48_pairedMapped_sorted.bam", + "md5_checksum": "002ed5f389b8a13735d27a8741290f6b", + "id": "nmdc:002ed5f389b8a13735d27a8741290f6b", + "file_size_bytes": 6236105158 + } + ] + }, + { + "_id": { + "$oid": "649b005bbf2caae0415ef9c9" + }, + "has_input": [ + "nmdc:e4314c3743795e0be8beda8b7f806557" + ], + "part_of": [ + "nmdc:mga0gg1q48" + ], + "git_url": "https://github.com/microbiomedata/metaG/releases/tag/0.1", + "has_output": [ + "nmdc:ac3faa8ad0e8e7827fcf6b882ec90706", + "nmdc:e3712dbbf0d0bfa14b9b340e73ebf4d0", + "nmdc:8aed63ca1302c874040e74aceb54ff05", + "nmdc:6361a06de62d93909abfb565a47fd5f0", + "nmdc:bd9d330d1d6a925066003d653a171ca5", + "nmdc:c497ffc128d6738bf3868529eb7ff899", + "nmdc:b67886515193abbd1eec79de067b3196", + "nmdc:05e7a016dddba90801c29de448c43c3c", + "nmdc:7effd4db11316ff95f6a8303807d530f", + "nmdc:503770f008dd2cf04d73821412dcf23a", + "nmdc:c33049c64af55f8ac54d52c861b0a221", + "nmdc:b162efd63f79bc34de66f61348471b74" + ], + "was_informed_by": "gold:Gp0115679", + "id": "nmdc:8bc23e2bd69c8af5538db3d6192a3b5b", + "execution_resource": "NERSC-Cori", + "name": "Annotation Activity for nmdc:mga0gg1q48", + "started_at_time": "2021-10-11T02:23:30Z", + "type": "nmdc:MetagenomeAnnotationActivity", + "ended_at_time": "2021-10-11T06:30:42+00:00", + "output_data_objects": [ + { + "name": "Gp0115679_Protein FAA", + "description": "Protein FAA for Gp0115679", + "data_object_type": "Annotation Amino Acid FASTA", + "url": "https://data.microbiomedata.org/data/nmdc:mga0gg1q48/annotation/nmdc_mga0gg1q48_proteins.faa", + "md5_checksum": "ac3faa8ad0e8e7827fcf6b882ec90706", + "id": "nmdc:ac3faa8ad0e8e7827fcf6b882ec90706", + "file_size_bytes": 151048115 + }, + { + "name": "Gp0115679_Structural annotation GFF file", + "description": "Structural annotation GFF file for Gp0115679", + "data_object_type": "Structural Annotation GFF", + "url": "https://data.microbiomedata.org/data/nmdc:mga0gg1q48/annotation/nmdc_mga0gg1q48_structural_annotation.gff", + "md5_checksum": "e3712dbbf0d0bfa14b9b340e73ebf4d0", + "id": "nmdc:e3712dbbf0d0bfa14b9b340e73ebf4d0", + "file_size_bytes": 2549 + }, + { + "name": "Gp0115679_Functional annotation GFF file", + "description": "Functional annotation GFF file for Gp0115679", + "data_object_type": "Functional Annotation GFF", + "url": "https://data.microbiomedata.org/data/nmdc:mga0gg1q48/annotation/nmdc_mga0gg1q48_functional_annotation.gff", + "md5_checksum": "8aed63ca1302c874040e74aceb54ff05", + "id": "nmdc:8aed63ca1302c874040e74aceb54ff05", + "file_size_bytes": 166415068 + }, + { + "name": "Gp0115679_KO TSV file", + "description": "KO TSV file for Gp0115679", + "data_object_type": "Annotation KEGG Orthology", + "url": "https://data.microbiomedata.org/data/nmdc:mga0gg1q48/annotation/nmdc_mga0gg1q48_ko.tsv", + "md5_checksum": "6361a06de62d93909abfb565a47fd5f0", + "id": "nmdc:6361a06de62d93909abfb565a47fd5f0", + "file_size_bytes": 18038415 + }, + { + "name": "Gp0115679_EC TSV file", + "description": "EC TSV file for Gp0115679", + "data_object_type": "Annotation Enzyme Commission", + "url": "https://data.microbiomedata.org/data/nmdc:mga0gg1q48/annotation/nmdc_mga0gg1q48_ec.tsv", + "md5_checksum": "bd9d330d1d6a925066003d653a171ca5", + "id": "nmdc:bd9d330d1d6a925066003d653a171ca5", + "file_size_bytes": 11896121 + }, + { + "name": "Gp0115679_COG GFF file", + "description": "COG GFF file for Gp0115679", + "url": "https://data.microbiomedata.org/data/nmdc:mga0gg1q48/annotation/nmdc_mga0gg1q48_cog.gff", + "md5_checksum": "c497ffc128d6738bf3868529eb7ff899", + "id": "nmdc:c497ffc128d6738bf3868529eb7ff899", + "file_size_bytes": 81943107 + }, + { + "name": "Gp0115679_PFAM GFF file", + "description": "PFAM GFF file for Gp0115679", + "url": "https://data.microbiomedata.org/data/nmdc:mga0gg1q48/annotation/nmdc_mga0gg1q48_pfam.gff", + "md5_checksum": "b67886515193abbd1eec79de067b3196", + "id": "nmdc:b67886515193abbd1eec79de067b3196", + "file_size_bytes": 65136506 + }, + { + "name": "Gp0115679_TigrFam GFF file", + "description": "TigrFam GFF file for Gp0115679", + "url": "https://data.microbiomedata.org/data/nmdc:mga0gg1q48/annotation/nmdc_mga0gg1q48_tigrfam.gff", + "md5_checksum": "05e7a016dddba90801c29de448c43c3c", + "id": "nmdc:05e7a016dddba90801c29de448c43c3c", + "file_size_bytes": 8536835 + }, + { + "name": "Gp0115679_SMART GFF file", + "description": "SMART GFF file for Gp0115679", + "url": "https://data.microbiomedata.org/data/nmdc:mga0gg1q48/annotation/nmdc_mga0gg1q48_smart.gff", + "md5_checksum": "7effd4db11316ff95f6a8303807d530f", + "id": "nmdc:7effd4db11316ff95f6a8303807d530f", + "file_size_bytes": 19907975 + }, + { + "name": "Gp0115679_SuperFam GFF file", + "description": "SuperFam GFF file for Gp0115679", + "url": "https://data.microbiomedata.org/data/nmdc:mga0gg1q48/annotation/nmdc_mga0gg1q48_supfam.gff", + "md5_checksum": "503770f008dd2cf04d73821412dcf23a", + "id": "nmdc:503770f008dd2cf04d73821412dcf23a", + "file_size_bytes": 107636995 + }, + { + "name": "Gp0115679_Cath FunFam GFF file", + "description": "Cath FunFam GFF file for Gp0115679", + "url": "https://data.microbiomedata.org/data/nmdc:mga0gg1q48/annotation/nmdc_mga0gg1q48_cath_funfam.gff", + "md5_checksum": "c33049c64af55f8ac54d52c861b0a221", + "id": "nmdc:c33049c64af55f8ac54d52c861b0a221", + "file_size_bytes": 89046662 + }, + { + "name": "Gp0115679_KO_EC GFF file", + "description": "KO_EC GFF file for Gp0115679", + "url": "https://data.microbiomedata.org/data/nmdc:mga0gg1q48/annotation/nmdc_mga0gg1q48_ko_ec.gff", + "md5_checksum": "b162efd63f79bc34de66f61348471b74", + "id": "nmdc:b162efd63f79bc34de66f61348471b74", + "file_size_bytes": 57348606 + } + ] + }, + { + "_id": { + "$oid": "649b0052ec087f6bbab3472e" + }, + "has_input": [ + "nmdc:e4314c3743795e0be8beda8b7f806557", + "nmdc:002ed5f389b8a13735d27a8741290f6b", + "nmdc:8aed63ca1302c874040e74aceb54ff05" + ], + "too_short_contig_num": 504368, + "part_of": [ + "nmdc:mga0gg1q48" + ], + "binned_contig_num": 1887, + "git_url": "https://github.com/microbiomedata/metaG/releases/tag/0.1", + "has_output": [ + "nmdc:d41d8cd98f00b204e9800998ecf8427e", + "nmdc:d830e60f4fb30ecb0610f991dcc70e47", + "nmdc:d33af65556b85b1aaf3a5c48b6e294de", + "nmdc:d2d655091735e6308aafca1e1633aad9", + "nmdc:17c6259329da1bbe6da5a18274452a8d", + "nmdc:9250ad41cb19e04a6002e62bda38bbfb" + ], + "was_informed_by": "gold:Gp0115679", + "input_contig_num": 531775, + "id": "nmdc:8bc23e2bd69c8af5538db3d6192a3b5b", + "execution_resource": "NERSC-Cori", + "name": "MAGs Analysis Activity for nmdc:mga0gg1q48", + "mags_list": [ + { + "number_of_contig": 73, + "completeness": 95.65, + "bin_name": "bins.1", + "gene_count": 2974, + "bin_quality": "HQ", + "gtdbtk_species": "", + "gtdbtk_order": "UBA11222", + "num_16s": 1, + "gtdbtk_family": "UBA11222", + "gtdbtk_domain": "Bacteria", + "contamination": 0.22, + "gtdbtk_class": "Alphaproteobacteria", + "gtdbtk_phylum": "Proteobacteria", + "num_5s": 1, + "num_23s": 1, + "gtdbtk_genus": "UBA11222", + "num_t_rna": 45 + }, + { + "number_of_contig": 253, + "completeness": 39.12, + "bin_name": "bins.10", + "gene_count": 1586, + "bin_quality": "LQ", + "gtdbtk_species": "", + "gtdbtk_order": "", + "num_16s": 0, + "gtdbtk_family": "", + "gtdbtk_domain": "", + "contamination": 1.79, + "gtdbtk_class": "", + "gtdbtk_phylum": "", + "num_5s": 0, + "num_23s": 0, + "gtdbtk_genus": "", + "num_t_rna": 20 + }, + { + "number_of_contig": 135, + "completeness": 16.83, + "bin_name": "bins.2", + "gene_count": 706, + "bin_quality": "LQ", + "gtdbtk_species": "", + "gtdbtk_order": "", + "num_16s": 0, + "gtdbtk_family": "", + "gtdbtk_domain": "", + "contamination": 0.0, + "gtdbtk_class": "", + "gtdbtk_phylum": "", + "num_5s": 0, + "num_23s": 0, + "gtdbtk_genus": "", + "num_t_rna": 11 + }, + { + "number_of_contig": 144, + "completeness": 22.53, + "bin_name": "bins.3", + "gene_count": 731, + "bin_quality": "LQ", + "gtdbtk_species": "", + "gtdbtk_order": "", + "num_16s": 0, + "gtdbtk_family": "", + "gtdbtk_domain": "", + "contamination": 0.14, + "gtdbtk_class": "", + "gtdbtk_phylum": "", + "num_5s": 0, + "num_23s": 0, + "gtdbtk_genus": "", + "num_t_rna": 16 + }, + { + "number_of_contig": 273, + "completeness": 68.97, + "bin_name": "bins.4", + "gene_count": 2023, + "bin_quality": "MQ", + "gtdbtk_species": "UBA5335 sp002862435", + "gtdbtk_order": "UBA5335", + "num_16s": 0, + "gtdbtk_family": "UBA5335", + "gtdbtk_domain": "Bacteria", + "contamination": 1.72, + "gtdbtk_class": "Gammaproteobacteria", + "gtdbtk_phylum": "Proteobacteria", + "num_5s": 0, + "num_23s": 0, + "gtdbtk_genus": "UBA5335", + "num_t_rna": 33 + }, + { + "number_of_contig": 3, + "completeness": 8.33, + "bin_name": "bins.5", + "gene_count": 306, + "bin_quality": "LQ", + "gtdbtk_species": "", + "gtdbtk_order": "", + "num_16s": 0, + "gtdbtk_family": "", + "gtdbtk_domain": "", + "contamination": 16.67, + "gtdbtk_class": "", + "gtdbtk_phylum": "", + "num_5s": 0, + "num_23s": 0, + "gtdbtk_genus": "", + "num_t_rna": 15 + }, + { + "number_of_contig": 4, + "completeness": 77.01, + "bin_name": "bins.6", + "gene_count": 976, + "bin_quality": "MQ", + "gtdbtk_species": "", + "gtdbtk_order": "UBA9983_A", + "num_16s": 1, + "gtdbtk_family": "UBA2163", + "gtdbtk_domain": "Bacteria", + "contamination": 1.72, + "gtdbtk_class": "Paceibacteria", + "gtdbtk_phylum": "Patescibacteria", + "num_5s": 1, + "num_23s": 1, + "gtdbtk_genus": "1-14-0-10-47-16", + "num_t_rna": 34 + }, + { + "number_of_contig": 309, + "completeness": 74.97, + "bin_name": "bins.7", + "gene_count": 2072, + "bin_quality": "MQ", + "gtdbtk_species": "", + "gtdbtk_order": "Pseudomonadales", + "num_16s": 0, + "gtdbtk_family": "UBA3067", + "gtdbtk_domain": "Bacteria", + "contamination": 2.13, + "gtdbtk_class": "Gammaproteobacteria", + "gtdbtk_phylum": "Proteobacteria", + "num_5s": 1, + "num_23s": 1, + "gtdbtk_genus": "UBA3067", + "num_t_rna": 28 + }, + { + "number_of_contig": 182, + "completeness": 92.61, + "bin_name": "bins.8", + "gene_count": 3044, + "bin_quality": "MQ", + "gtdbtk_species": "", + "gtdbtk_order": "UBA11222", + "num_16s": 0, + "gtdbtk_family": "UBA11222", + "gtdbtk_domain": "Bacteria", + "contamination": 5.0, + "gtdbtk_class": "Alphaproteobacteria", + "gtdbtk_phylum": "Proteobacteria", + "num_5s": 1, + "num_23s": 1, + "gtdbtk_genus": "UBA11222", + "num_t_rna": 41 + }, + { + "number_of_contig": 511, + "completeness": 48.64, + "bin_name": "bins.9", + "gene_count": 2267, + "bin_quality": "LQ", + "gtdbtk_species": "", + "gtdbtk_order": "", + "num_16s": 0, + "gtdbtk_family": "", + "gtdbtk_domain": "", + "contamination": 5.03, + "gtdbtk_class": "", + "gtdbtk_phylum": "", + "num_5s": 0, + "num_23s": 0, + "gtdbtk_genus": "", + "num_t_rna": 15 + } + ], + "unbinned_contig_num": 25520, + "started_at_time": "2021-10-11T02:23:30Z", + "type": "nmdc:MAGsAnalysisActivity", + "ended_at_time": "2021-10-11T06:30:42+00:00", + "output_data_objects": [ + { + "name": "gold:Gp0452679_metabat2 bin checkm quality assessment result", + "description": "metabat2 bin checkm quality assessment result for gold:Gp0452679", + "data_object_type": "CheckM Statistics", + "url": "https://data.microbiomedata.org/data/nmdc:mga0fsjy84/MAGs/nmdc_mga0fsjy84_checkm_qa.out", + "md5_checksum": "d41d8cd98f00b204e9800998ecf8427e", + "id": "nmdc:d41d8cd98f00b204e9800998ecf8427e", + "file_size_bytes": 0 + }, + { + "name": "Gp0115679_tooShort (< 3kb) filtered contigs fasta file by metaBat2", + "description": "tooShort (< 3kb) filtered contigs fasta file by metaBat2 for Gp0115679", + "url": "https://data.microbiomedata.org/data/nmdc:mga0gg1q48/MAGs/nmdc_mga0gg1q48_bins.tooShort.fa", + "md5_checksum": "d830e60f4fb30ecb0610f991dcc70e47", + "id": "nmdc:d830e60f4fb30ecb0610f991dcc70e47", + "file_size_bytes": 215033122 + }, + { + "name": "Gp0115679_unbinned fasta file from metabat2", + "description": "unbinned fasta file from metabat2 for Gp0115679", + "url": "https://data.microbiomedata.org/data/nmdc:mga0gg1q48/MAGs/nmdc_mga0gg1q48_bins.unbinned.fa", + "md5_checksum": "d33af65556b85b1aaf3a5c48b6e294de", + "id": "nmdc:d33af65556b85b1aaf3a5c48b6e294de", + "file_size_bytes": 44057142 + }, + { + "name": "Gp0115679_metabat2 bin checkm quality assessment result", + "description": "metabat2 bin checkm quality assessment result for Gp0115679", + "data_object_type": "CheckM Statistics", + "url": "https://data.microbiomedata.org/data/nmdc:mga0gg1q48/MAGs/nmdc_mga0gg1q48_checkm_qa.out", + "md5_checksum": "d2d655091735e6308aafca1e1633aad9", + "id": "nmdc:d2d655091735e6308aafca1e1633aad9", + "file_size_bytes": 2394 + }, + { + "name": "Gp0115679_high-quality and medium-quality bins", + "description": "high-quality and medium-quality bins for Gp0115679", + "data_object_type": "Metagenome Bins", + "url": "https://data.microbiomedata.org/data/nmdc:mga0gg1q48/MAGs/nmdc_mga0gg1q48_hqmq_bin.zip", + "md5_checksum": "17c6259329da1bbe6da5a18274452a8d", + "id": "nmdc:17c6259329da1bbe6da5a18274452a8d", + "file_size_bytes": 3215059 + }, + { + "name": "Gp0115679_metabat2 bins", + "description": "metabat2 bins for Gp0115679", + "url": "https://data.microbiomedata.org/data/nmdc:mga0gg1q48/MAGs/nmdc_mga0gg1q48_metabat_bin.zip", + "md5_checksum": "9250ad41cb19e04a6002e62bda38bbfb", + "id": "nmdc:9250ad41cb19e04a6002e62bda38bbfb", + "file_size_bytes": 1649649 + } + ] + } + ] + }, + { + "_id": { + "$oid": "649b009773e8249959349b37" + }, + "id": "nmdc:omprc-11-yt8css91", + "name": "Sand microcosm microbial communities from a hyporheic zone in Columbia River, Washington, USA - GW-RW T3_25-Nov-14", + "description": "Sterilized sand packs were incubated back in the ground and collected at time point T3.", + "has_input": [ + "nmdc:bsm-11-ynevd369" + ], + "has_output": [ + "jgi:55d818010d8785342fcf8278" + ], + "part_of": [ + "nmdc:sty-11-aygzgv51" + ], + "add_date": "2015-05-28", + "mod_date": "2021-06-15", + "ncbi_project_name": "Sand microcosm microbial communities from a hyporheic zone in Columbia River, Washington, USA - GW-RW T3_25-Nov-14", + "omics_type": { + "has_raw_value": "Metagenome" + }, + "principal_investigator": { + "has_raw_value": "James Stegen" + }, + "processing_institution": "JGI", + "type": "nmdc:OmicsProcessing", + "gold_sequencing_project_identifiers": [ + "gold:Gp0115667" + ], + "downstream_workflow_activity_records": [ + { + "_id": { + "$oid": "649b009d6bdd4fd20273c882" + }, + "has_input": [ + "nmdc:cb2e0605e8f22a398d982e35aee57715" + ], + "part_of": [ + "nmdc:mga0n0je44" + ], + "git_url": "https://github.com/microbiomedata/metaG/releases/tag/0.1", + "has_output": [ + "nmdc:7d4057e3a44a05171c13fb0ed3e2294a", + "nmdc:dae7c6e067f69ef6db39b4240cc450ba" + ], + "was_informed_by": "gold:Gp0115667", + "input_read_count": 19416222, + "output_read_bases": 2825090769, + "id": "nmdc:8093869c91384d3299431e56019f7de0", + "execution_resource": "NERSC-Cori", + "input_read_bases": 2931849522, + "name": "Read QC Activity for nmdc:mga0n0je44", + "output_read_count": 18855352, + "started_at_time": "2021-10-11T02:28:16Z", + "type": "nmdc:ReadQCAnalysisActivity", + "ended_at_time": "2021-10-11T03:58:24+00:00", + "output_data_objects": [ + { + "name": "Gp0115667_Filtered Reads", + "description": "Filtered Reads for Gp0115667", + "data_object_type": "Filtered Sequencing Reads", + "url": "https://data.microbiomedata.org/data/nmdc:mga0n0je44/qa/nmdc_mga0n0je44_filtered.fastq.gz", + "md5_checksum": "7d4057e3a44a05171c13fb0ed3e2294a", + "id": "nmdc:7d4057e3a44a05171c13fb0ed3e2294a", + "file_size_bytes": 1599931347 + }, + { + "name": "Gp0115667_Filtered Stats", + "description": "Filtered Stats for Gp0115667", + "data_object_type": "QC Statistics", + "url": "https://data.microbiomedata.org/data/nmdc:mga0n0je44/qa/nmdc_mga0n0je44_filterStats.txt", + "md5_checksum": "dae7c6e067f69ef6db39b4240cc450ba", + "id": "nmdc:dae7c6e067f69ef6db39b4240cc450ba", + "file_size_bytes": 286 + } + ] + }, + { + "_id": { + "$oid": "649b009bff710ae353f8cf4a" + }, + "has_input": [ + "nmdc:7d4057e3a44a05171c13fb0ed3e2294a" + ], + "git_url": "https://github.com/microbiomedata/metaG/releases/tag/0.1", + "has_output": [ + "nmdc:56edf81e5f5102edf7e416bc9430fbb6", + "nmdc:c3d0f03afb44520ef5f2ea14e6daf705", + "nmdc:2afff209a40ca4895307f3a47080c534", + "nmdc:d76c80bf15c4fd84f28c7150f24a8143", + "nmdc:b9d6d8a8297f9a604ac85a334a3412de", + "nmdc:fe4bd9f63c32f50676792e3c4adced08", + "nmdc:eb189cbf0543203d2521397b73d4d34b", + "nmdc:ce3f002a824efde4a7134e6cd2e6306b", + "nmdc:ac90bf3384ce44d097f7897ac5ff8134" + ], + "was_informed_by": "gold:Gp0115667", + "id": "nmdc:8093869c91384d3299431e56019f7de0", + "execution_resource": "NERSC-Cori", + "name": "ReadBased Analysis Activity for nmdc:mga0n0je44", + "started_at_time": "2021-10-11T02:28:16Z", + "type": "nmdc:ReadbasedAnalysis", + "ended_at_time": "2021-10-11T03:58:24+00:00", + "output_data_objects": [ + { + "name": "Gp0115667_Gottcha2 TSV report", + "description": "Gottcha2 TSV report for Gp0115667", + "url": "https://data.microbiomedata.org/data/nmdc:mga0n0je44/ReadbasedAnalysis/nmdc_mga0n0je44_gottcha2_report.tsv", + "md5_checksum": "56edf81e5f5102edf7e416bc9430fbb6", + "id": "nmdc:56edf81e5f5102edf7e416bc9430fbb6", + "file_size_bytes": 10576 + }, + { + "name": "Gp0115667_Gottcha2 full TSV report", + "description": "Gottcha2 full TSV report for Gp0115667", + "url": "https://data.microbiomedata.org/data/nmdc:mga0n0je44/ReadbasedAnalysis/nmdc_mga0n0je44_gottcha2_report_full.tsv", + "md5_checksum": "c3d0f03afb44520ef5f2ea14e6daf705", + "id": "nmdc:c3d0f03afb44520ef5f2ea14e6daf705", + "file_size_bytes": 792905 + }, + { + "name": "Gp0115667_Gottcha2 Krona HTML report", + "description": "Gottcha2 Krona HTML report for Gp0115667", + "data_object_type": "GOTTCHA2 Krona Plot", + "url": "https://data.microbiomedata.org/data/nmdc:mga0n0je44/ReadbasedAnalysis/nmdc_mga0n0je44_gottcha2_krona.html", + "md5_checksum": "2afff209a40ca4895307f3a47080c534", + "id": "nmdc:2afff209a40ca4895307f3a47080c534", + "file_size_bytes": 254763 + }, + { + "name": "Gp0115667_Centrifuge classification TSV report", + "description": "Centrifuge classification TSV report for Gp0115667", + "data_object_type": "Centrifuge Taxonomic Classification", + "url": "https://data.microbiomedata.org/data/nmdc:mga0n0je44/ReadbasedAnalysis/nmdc_mga0n0je44_centrifuge_classification.tsv", + "md5_checksum": "d76c80bf15c4fd84f28c7150f24a8143", + "id": "nmdc:d76c80bf15c4fd84f28c7150f24a8143", + "file_size_bytes": 1336111813 + }, + { + "name": "Gp0115667_Centrifuge TSV report", + "description": "Centrifuge TSV report for Gp0115667", + "data_object_type": "Centrifuge Classification Report", + "url": "https://data.microbiomedata.org/data/nmdc:mga0n0je44/ReadbasedAnalysis/nmdc_mga0n0je44_centrifuge_report.tsv", + "md5_checksum": "b9d6d8a8297f9a604ac85a334a3412de", + "id": "nmdc:b9d6d8a8297f9a604ac85a334a3412de", + "file_size_bytes": 254506 + }, + { + "name": "Gp0115667_Centrifuge Krona HTML report", + "description": "Centrifuge Krona HTML report for Gp0115667", + "data_object_type": "Centrifuge Krona Plot", + "url": "https://data.microbiomedata.org/data/nmdc:mga0n0je44/ReadbasedAnalysis/nmdc_mga0n0je44_centrifuge_krona.html", + "md5_checksum": "fe4bd9f63c32f50676792e3c4adced08", + "id": "nmdc:fe4bd9f63c32f50676792e3c4adced08", + "file_size_bytes": 2323153 + }, + { + "name": "Gp0115667_Kraken classification TSV report", + "description": "Kraken classification TSV report for Gp0115667", + "data_object_type": "Kraken2 Taxonomic Classification", + "url": "https://data.microbiomedata.org/data/nmdc:mga0n0je44/ReadbasedAnalysis/nmdc_mga0n0je44_kraken2_classification.tsv", + "md5_checksum": "eb189cbf0543203d2521397b73d4d34b", + "id": "nmdc:eb189cbf0543203d2521397b73d4d34b", + "file_size_bytes": 1097852664 + }, + { + "name": "Gp0115667_Kraken2 TSV report", + "description": "Kraken2 TSV report for Gp0115667", + "data_object_type": "Kraken2 Classification Report", + "url": "https://data.microbiomedata.org/data/nmdc:mga0n0je44/ReadbasedAnalysis/nmdc_mga0n0je44_kraken2_report.tsv", + "md5_checksum": "ce3f002a824efde4a7134e6cd2e6306b", + "id": "nmdc:ce3f002a824efde4a7134e6cd2e6306b", + "file_size_bytes": 639213 + }, + { + "name": "Gp0115667_Kraken2 Krona HTML report", + "description": "Kraken2 Krona HTML report for Gp0115667", + "data_object_type": "Kraken2 Krona Plot", + "url": "https://data.microbiomedata.org/data/nmdc:mga0n0je44/ReadbasedAnalysis/nmdc_mga0n0je44_kraken2_krona.html", + "md5_checksum": "ac90bf3384ce44d097f7897ac5ff8134", + "id": "nmdc:ac90bf3384ce44d097f7897ac5ff8134", + "file_size_bytes": 3979807 + } + ] + }, + { + "_id": { + "$oid": "61e71a10833bcf838a701aaa" + }, + "has_input": [ + "nmdc:7d4057e3a44a05171c13fb0ed3e2294a" + ], + "part_of": [ + "nmdc:mga0n0je44" + ], + "git_url": "https://github.com/microbiomedata/metaG/releases/tag/0.1", + "has_output": [ + "nmdc:56edf81e5f5102edf7e416bc9430fbb6", + "nmdc:c3d0f03afb44520ef5f2ea14e6daf705", + "nmdc:2afff209a40ca4895307f3a47080c534", + "nmdc:d76c80bf15c4fd84f28c7150f24a8143", + "nmdc:b9d6d8a8297f9a604ac85a334a3412de", + "nmdc:fe4bd9f63c32f50676792e3c4adced08", + "nmdc:eb189cbf0543203d2521397b73d4d34b", + "nmdc:ce3f002a824efde4a7134e6cd2e6306b", + "nmdc:ac90bf3384ce44d097f7897ac5ff8134" + ], + "was_informed_by": "gold:Gp0115667", + "id": "nmdc:8093869c91384d3299431e56019f7de0", + "execution_resource": "NERSC-Cori", + "name": "ReadBased Analysis Activity for nmdc:mga0n0je44", + "started_at_time": "2021-10-11T02:28:16Z", + "type": "nmdc:ReadbasedAnalysis", + "ended_at_time": "2021-10-11T03:58:24+00:00", + "output_data_objects": [ + { + "name": "Gp0115667_Gottcha2 TSV report", + "description": "Gottcha2 TSV report for Gp0115667", + "url": "https://data.microbiomedata.org/data/nmdc:mga0n0je44/ReadbasedAnalysis/nmdc_mga0n0je44_gottcha2_report.tsv", + "md5_checksum": "56edf81e5f5102edf7e416bc9430fbb6", + "id": "nmdc:56edf81e5f5102edf7e416bc9430fbb6", + "file_size_bytes": 10576 + }, + { + "name": "Gp0115667_Gottcha2 full TSV report", + "description": "Gottcha2 full TSV report for Gp0115667", + "url": "https://data.microbiomedata.org/data/nmdc:mga0n0je44/ReadbasedAnalysis/nmdc_mga0n0je44_gottcha2_report_full.tsv", + "md5_checksum": "c3d0f03afb44520ef5f2ea14e6daf705", + "id": "nmdc:c3d0f03afb44520ef5f2ea14e6daf705", + "file_size_bytes": 792905 + }, + { + "name": "Gp0115667_Gottcha2 Krona HTML report", + "description": "Gottcha2 Krona HTML report for Gp0115667", + "data_object_type": "GOTTCHA2 Krona Plot", + "url": "https://data.microbiomedata.org/data/nmdc:mga0n0je44/ReadbasedAnalysis/nmdc_mga0n0je44_gottcha2_krona.html", + "md5_checksum": "2afff209a40ca4895307f3a47080c534", + "id": "nmdc:2afff209a40ca4895307f3a47080c534", + "file_size_bytes": 254763 + }, + { + "name": "Gp0115667_Centrifuge classification TSV report", + "description": "Centrifuge classification TSV report for Gp0115667", + "data_object_type": "Centrifuge Taxonomic Classification", + "url": "https://data.microbiomedata.org/data/nmdc:mga0n0je44/ReadbasedAnalysis/nmdc_mga0n0je44_centrifuge_classification.tsv", + "md5_checksum": "d76c80bf15c4fd84f28c7150f24a8143", + "id": "nmdc:d76c80bf15c4fd84f28c7150f24a8143", + "file_size_bytes": 1336111813 + }, + { + "name": "Gp0115667_Centrifuge TSV report", + "description": "Centrifuge TSV report for Gp0115667", + "data_object_type": "Centrifuge Classification Report", + "url": "https://data.microbiomedata.org/data/nmdc:mga0n0je44/ReadbasedAnalysis/nmdc_mga0n0je44_centrifuge_report.tsv", + "md5_checksum": "b9d6d8a8297f9a604ac85a334a3412de", + "id": "nmdc:b9d6d8a8297f9a604ac85a334a3412de", + "file_size_bytes": 254506 + }, + { + "name": "Gp0115667_Centrifuge Krona HTML report", + "description": "Centrifuge Krona HTML report for Gp0115667", + "data_object_type": "Centrifuge Krona Plot", + "url": "https://data.microbiomedata.org/data/nmdc:mga0n0je44/ReadbasedAnalysis/nmdc_mga0n0je44_centrifuge_krona.html", + "md5_checksum": "fe4bd9f63c32f50676792e3c4adced08", + "id": "nmdc:fe4bd9f63c32f50676792e3c4adced08", + "file_size_bytes": 2323153 + }, + { + "name": "Gp0115667_Kraken classification TSV report", + "description": "Kraken classification TSV report for Gp0115667", + "data_object_type": "Kraken2 Taxonomic Classification", + "url": "https://data.microbiomedata.org/data/nmdc:mga0n0je44/ReadbasedAnalysis/nmdc_mga0n0je44_kraken2_classification.tsv", + "md5_checksum": "eb189cbf0543203d2521397b73d4d34b", + "id": "nmdc:eb189cbf0543203d2521397b73d4d34b", + "file_size_bytes": 1097852664 + }, + { + "name": "Gp0115667_Kraken2 TSV report", + "description": "Kraken2 TSV report for Gp0115667", + "data_object_type": "Kraken2 Classification Report", + "url": "https://data.microbiomedata.org/data/nmdc:mga0n0je44/ReadbasedAnalysis/nmdc_mga0n0je44_kraken2_report.tsv", + "md5_checksum": "ce3f002a824efde4a7134e6cd2e6306b", + "id": "nmdc:ce3f002a824efde4a7134e6cd2e6306b", + "file_size_bytes": 639213 + }, + { + "name": "Gp0115667_Kraken2 Krona HTML report", + "description": "Kraken2 Krona HTML report for Gp0115667", + "data_object_type": "Kraken2 Krona Plot", + "url": "https://data.microbiomedata.org/data/nmdc:mga0n0je44/ReadbasedAnalysis/nmdc_mga0n0je44_kraken2_krona.html", + "md5_checksum": "ac90bf3384ce44d097f7897ac5ff8134", + "id": "nmdc:ac90bf3384ce44d097f7897ac5ff8134", + "file_size_bytes": 3979807 + } + ] + }, + { + "_id": { + "$oid": "649b005f2ca5ee4adb139fb7" + }, + "has_input": [ + "nmdc:7d4057e3a44a05171c13fb0ed3e2294a" + ], + "part_of": [ + "nmdc:mga0n0je44" + ], + "ctg_logsum": 195440, + "scaf_logsum": 196103, + "gap_pct": 0.00293, + "git_url": "https://github.com/microbiomedata/metaG/releases/tag/0.1", + "has_output": [ + "nmdc:b3cefc5a9599a4fb9432132baf7f5565", + "nmdc:b60f674a01e3f7fff5ead95f330cef4f", + "nmdc:2e4532cb03bb1e9201976b9d65893788", + "nmdc:e49f8a26a9cd0420b688c967bbacb4c6", + "nmdc:d9b957c7efe7f753fe67441d0be605c6" + ], + "asm_score": 17.061, + "was_informed_by": "gold:Gp0115667", + "ctg_powsum": 25448, + "scaf_max": 245816, + "id": "nmdc:8093869c91384d3299431e56019f7de0", + "scaf_powsum": 25552, + "execution_resource": "NERSC-Cori", + "contigs": 116132, + "name": "Assembly Activity for nmdc:mga0n0je44", + "ctg_max": 245816, + "gc_std": 0.12277, + "contig_bp": 58413782, + "gc_avg": 0.47644, + "started_at_time": "2021-10-11T02:28:16Z", + "scaf_bp": 58415492, + "type": "nmdc:MetagenomeAssembly", + "scaffolds": 116033, + "ended_at_time": "2021-10-11T03:58:24+00:00", + "ctg_l50": 479, + "ctg_l90": 286, + "ctg_n50": 26909, + "ctg_n90": 95138, + "scaf_l50": 479, + "scaf_l90": 286, + "scaf_n50": 26889, + "scaf_n90": 95057, + "scaf_l_gt50k": 1865703, + "scaf_n_gt50k": 17, + "scaf_pct_gt50k": 3.1938498, + "output_data_objects": [ + { + "name": "Gp0115667_Assembled contigs fasta", + "description": "Assembled contigs fasta for Gp0115667", + "data_object_type": "Assembly Contigs", + "url": "https://data.microbiomedata.org/data/nmdc:mga0n0je44/assembly/nmdc_mga0n0je44_contigs.fna", + "md5_checksum": "b3cefc5a9599a4fb9432132baf7f5565", + "id": "nmdc:b3cefc5a9599a4fb9432132baf7f5565", + "file_size_bytes": 62926054 + }, + { + "name": "Gp0115667_Assembled scaffold fasta", + "description": "Assembled scaffold fasta for Gp0115667", + "data_object_type": "Assembly Scaffolds", + "url": "https://data.microbiomedata.org/data/nmdc:mga0n0je44/assembly/nmdc_mga0n0je44_scaffolds.fna", + "md5_checksum": "b60f674a01e3f7fff5ead95f330cef4f", + "id": "nmdc:b60f674a01e3f7fff5ead95f330cef4f", + "file_size_bytes": 62577490 + }, + { + "name": "Gp0115667_Metagenome Contig Coverage Stats", + "description": "Metagenome Contig Coverage Stats for Gp0115667", + "url": "https://data.microbiomedata.org/data/nmdc:mga0n0je44/assembly/nmdc_mga0n0je44_covstats.txt", + "md5_checksum": "2e4532cb03bb1e9201976b9d65893788", + "id": "nmdc:2e4532cb03bb1e9201976b9d65893788", + "file_size_bytes": 9189143 + }, + { + "name": "Gp0115667_Assembled AGP file", + "description": "Assembled AGP file for Gp0115667", + "data_object_type": "Assembly AGP", + "url": "https://data.microbiomedata.org/data/nmdc:mga0n0je44/assembly/nmdc_mga0n0je44_assembly.agp", + "md5_checksum": "e49f8a26a9cd0420b688c967bbacb4c6", + "id": "nmdc:e49f8a26a9cd0420b688c967bbacb4c6", + "file_size_bytes": 8508903 + }, + { + "name": "Gp0115667_Metagenome Alignment BAM file", + "description": "Metagenome Alignment BAM file for Gp0115667", + "data_object_type": "Assembly Coverage BAM", + "url": "https://data.microbiomedata.org/data/nmdc:mga0n0je44/assembly/nmdc_mga0n0je44_pairedMapped_sorted.bam", + "md5_checksum": "d9b957c7efe7f753fe67441d0be605c6", + "id": "nmdc:d9b957c7efe7f753fe67441d0be605c6", + "file_size_bytes": 1771039554 + } + ] + }, + { + "_id": { + "$oid": "649b005bbf2caae0415ef9cc" + }, + "has_input": [ + "nmdc:b3cefc5a9599a4fb9432132baf7f5565" + ], + "part_of": [ + "nmdc:mga0n0je44" + ], + "git_url": "https://github.com/microbiomedata/metaG/releases/tag/0.1", + "has_output": [ + "nmdc:45e8b887fc06ddbf2af3ecf9c91a7bf7", + "nmdc:26ab4381753f685c44091e1f17d8bab5", + "nmdc:5a378f3975ab6c2cf2a36b0b007ea3f8", + "nmdc:6df49253fee066c699d6a5191a0efaed", + "nmdc:5e35e51a595f892968e57681ee448e5f", + "nmdc:ae1bc890152d28387f65c65d434b97ea", + "nmdc:fb736eaba77cbd99135ddbc32168db94", + "nmdc:3b00892f95bc4dedaf4384685a75d52f", + "nmdc:b8c0d7c187169f34aafc17308aeea2ed", + "nmdc:2a8e4bb3922ec664bbb5ce49a30cc87e", + "nmdc:34eddc2289f3e3b4707a6c8060f6dd99", + "nmdc:0a51a22e2cf94c853657381549aa8f04" + ], + "was_informed_by": "gold:Gp0115667", + "id": "nmdc:8093869c91384d3299431e56019f7de0", + "execution_resource": "NERSC-Cori", + "name": "Annotation Activity for nmdc:mga0n0je44", + "started_at_time": "2021-10-11T02:28:16Z", + "type": "nmdc:MetagenomeAnnotationActivity", + "ended_at_time": "2021-10-11T03:58:24+00:00", + "output_data_objects": [ + { + "name": "Gp0115667_Protein FAA", + "description": "Protein FAA for Gp0115667", + "data_object_type": "Annotation Amino Acid FASTA", + "url": "https://data.microbiomedata.org/data/nmdc:mga0n0je44/annotation/nmdc_mga0n0je44_proteins.faa", + "md5_checksum": "45e8b887fc06ddbf2af3ecf9c91a7bf7", + "id": "nmdc:45e8b887fc06ddbf2af3ecf9c91a7bf7", + "file_size_bytes": 31564336 + }, + { + "name": "Gp0115667_Structural annotation GFF file", + "description": "Structural annotation GFF file for Gp0115667", + "data_object_type": "Structural Annotation GFF", + "url": "https://data.microbiomedata.org/data/nmdc:mga0n0je44/annotation/nmdc_mga0n0je44_structural_annotation.gff", + "md5_checksum": "26ab4381753f685c44091e1f17d8bab5", + "id": "nmdc:26ab4381753f685c44091e1f17d8bab5", + "file_size_bytes": 2760 + }, + { + "name": "Gp0115667_Functional annotation GFF file", + "description": "Functional annotation GFF file for Gp0115667", + "data_object_type": "Functional Annotation GFF", + "url": "https://data.microbiomedata.org/data/nmdc:mga0n0je44/annotation/nmdc_mga0n0je44_functional_annotation.gff", + "md5_checksum": "5a378f3975ab6c2cf2a36b0b007ea3f8", + "id": "nmdc:5a378f3975ab6c2cf2a36b0b007ea3f8", + "file_size_bytes": 34525554 + }, + { + "name": "Gp0115667_KO TSV file", + "description": "KO TSV file for Gp0115667", + "data_object_type": "Annotation KEGG Orthology", + "url": "https://data.microbiomedata.org/data/nmdc:mga0n0je44/annotation/nmdc_mga0n0je44_ko.tsv", + "md5_checksum": "6df49253fee066c699d6a5191a0efaed", + "id": "nmdc:6df49253fee066c699d6a5191a0efaed", + "file_size_bytes": 3439857 + }, + { + "name": "Gp0115667_EC TSV file", + "description": "EC TSV file for Gp0115667", + "data_object_type": "Annotation Enzyme Commission", + "url": "https://data.microbiomedata.org/data/nmdc:mga0n0je44/annotation/nmdc_mga0n0je44_ec.tsv", + "md5_checksum": "5e35e51a595f892968e57681ee448e5f", + "id": "nmdc:5e35e51a595f892968e57681ee448e5f", + "file_size_bytes": 2203532 + }, + { + "name": "Gp0115667_COG GFF file", + "description": "COG GFF file for Gp0115667", + "url": "https://data.microbiomedata.org/data/nmdc:mga0n0je44/annotation/nmdc_mga0n0je44_cog.gff", + "md5_checksum": "ae1bc890152d28387f65c65d434b97ea", + "id": "nmdc:ae1bc890152d28387f65c65d434b97ea", + "file_size_bytes": 15384958 + }, + { + "name": "Gp0115667_PFAM GFF file", + "description": "PFAM GFF file for Gp0115667", + "url": "https://data.microbiomedata.org/data/nmdc:mga0n0je44/annotation/nmdc_mga0n0je44_pfam.gff", + "md5_checksum": "fb736eaba77cbd99135ddbc32168db94", + "id": "nmdc:fb736eaba77cbd99135ddbc32168db94", + "file_size_bytes": 12472999 + }, + { + "name": "Gp0115667_TigrFam GFF file", + "description": "TigrFam GFF file for Gp0115667", + "url": "https://data.microbiomedata.org/data/nmdc:mga0n0je44/annotation/nmdc_mga0n0je44_tigrfam.gff", + "md5_checksum": "3b00892f95bc4dedaf4384685a75d52f", + "id": "nmdc:3b00892f95bc4dedaf4384685a75d52f", + "file_size_bytes": 1755779 + }, + { + "name": "Gp0115667_SMART GFF file", + "description": "SMART GFF file for Gp0115667", + "url": "https://data.microbiomedata.org/data/nmdc:mga0n0je44/annotation/nmdc_mga0n0je44_smart.gff", + "md5_checksum": "b8c0d7c187169f34aafc17308aeea2ed", + "id": "nmdc:b8c0d7c187169f34aafc17308aeea2ed", + "file_size_bytes": 3937293 + }, + { + "name": "Gp0115667_SuperFam GFF file", + "description": "SuperFam GFF file for Gp0115667", + "url": "https://data.microbiomedata.org/data/nmdc:mga0n0je44/annotation/nmdc_mga0n0je44_supfam.gff", + "md5_checksum": "2a8e4bb3922ec664bbb5ce49a30cc87e", + "id": "nmdc:2a8e4bb3922ec664bbb5ce49a30cc87e", + "file_size_bytes": 22725250 + }, + { + "name": "Gp0115667_Cath FunFam GFF file", + "description": "Cath FunFam GFF file for Gp0115667", + "url": "https://data.microbiomedata.org/data/nmdc:mga0n0je44/annotation/nmdc_mga0n0je44_cath_funfam.gff", + "md5_checksum": "34eddc2289f3e3b4707a6c8060f6dd99", + "id": "nmdc:34eddc2289f3e3b4707a6c8060f6dd99", + "file_size_bytes": 17788890 + }, + { + "name": "Gp0115667_KO_EC GFF file", + "description": "KO_EC GFF file for Gp0115667", + "url": "https://data.microbiomedata.org/data/nmdc:mga0n0je44/annotation/nmdc_mga0n0je44_ko_ec.gff", + "md5_checksum": "0a51a22e2cf94c853657381549aa8f04", + "id": "nmdc:0a51a22e2cf94c853657381549aa8f04", + "file_size_bytes": 11004264 + } + ] + }, + { + "_id": { + "$oid": "649b0052ec087f6bbab3472a" + }, + "has_input": [ + "nmdc:b3cefc5a9599a4fb9432132baf7f5565", + "nmdc:d9b957c7efe7f753fe67441d0be605c6", + "nmdc:5a378f3975ab6c2cf2a36b0b007ea3f8" + ], + "too_short_contig_num": 109354, + "part_of": [ + "nmdc:mga0n0je44" + ], + "binned_contig_num": 596, + "git_url": "https://github.com/microbiomedata/metaG/releases/tag/0.1", + "has_output": [ + "nmdc:d41d8cd98f00b204e9800998ecf8427e", + "nmdc:1277a6924ab380e001a7208e7ebbb0e3", + "nmdc:48772112891988a2ef3f0c40786c11fd", + "nmdc:527e2c19607c225a707db67b5be01b6f", + "nmdc:027626ff998bf1e495e32d09cab4bb08", + "nmdc:733e798989606c802b3bbfc952a38841" + ], + "was_informed_by": "gold:Gp0115667", + "input_contig_num": 116127, + "id": "nmdc:8093869c91384d3299431e56019f7de0", + "execution_resource": "NERSC-Cori", + "name": "MAGs Analysis Activity for nmdc:mga0n0je44", + "mags_list": [ + { + "number_of_contig": 166, + "completeness": 19.51, + "bin_name": "bins.1", + "gene_count": 906, + "bin_quality": "LQ", + "gtdbtk_species": "", + "gtdbtk_order": "", + "num_16s": 0, + "gtdbtk_family": "", + "gtdbtk_domain": "", + "contamination": 0.04, + "gtdbtk_class": "", + "gtdbtk_phylum": "", + "num_5s": 0, + "num_23s": 0, + "gtdbtk_genus": "", + "num_t_rna": 14 + }, + { + "number_of_contig": 70, + "completeness": 99.78, + "bin_name": "bins.2", + "gene_count": 3225, + "bin_quality": "MQ", + "gtdbtk_species": "", + "gtdbtk_order": "UBA11222", + "num_16s": 1, + "gtdbtk_family": "UBA11222", + "gtdbtk_domain": "Bacteria", + "contamination": 0.43, + "gtdbtk_class": "Alphaproteobacteria", + "gtdbtk_phylum": "Proteobacteria", + "num_5s": 0, + "num_23s": 0, + "gtdbtk_genus": "UBA11222", + "num_t_rna": 49 + }, + { + "number_of_contig": 67, + "completeness": 41.5, + "bin_name": "bins.3", + "gene_count": 464, + "bin_quality": "LQ", + "gtdbtk_species": "", + "gtdbtk_order": "", + "num_16s": 0, + "gtdbtk_family": "", + "gtdbtk_domain": "", + "contamination": 3.76, + "gtdbtk_class": "", + "gtdbtk_phylum": "", + "num_5s": 1, + "num_23s": 1, + "gtdbtk_genus": "", + "num_t_rna": 10 + }, + { + "number_of_contig": 293, + "completeness": 56.99, + "bin_name": "bins.4", + "gene_count": 1734, + "bin_quality": "MQ", + "gtdbtk_species": "", + "gtdbtk_order": "Pseudomonadales", + "num_16s": 0, + "gtdbtk_family": "UBA3067", + "gtdbtk_domain": "Bacteria", + "contamination": 3.01, + "gtdbtk_class": "Gammaproteobacteria", + "gtdbtk_phylum": "Proteobacteria", + "num_5s": 0, + "num_23s": 0, + "gtdbtk_genus": "UBA3067", + "num_t_rna": 27 + } + ], + "unbinned_contig_num": 6177, + "started_at_time": "2021-10-11T02:28:16Z", + "type": "nmdc:MAGsAnalysisActivity", + "ended_at_time": "2021-10-11T03:58:24+00:00", + "output_data_objects": [ + { + "name": "gold:Gp0452679_metabat2 bin checkm quality assessment result", + "description": "metabat2 bin checkm quality assessment result for gold:Gp0452679", + "data_object_type": "CheckM Statistics", + "url": "https://data.microbiomedata.org/data/nmdc:mga0fsjy84/MAGs/nmdc_mga0fsjy84_checkm_qa.out", + "md5_checksum": "d41d8cd98f00b204e9800998ecf8427e", + "id": "nmdc:d41d8cd98f00b204e9800998ecf8427e", + "file_size_bytes": 0 + }, + { + "name": "Gp0115667_tooShort (< 3kb) filtered contigs fasta file by metaBat2", + "description": "tooShort (< 3kb) filtered contigs fasta file by metaBat2 for Gp0115667", + "url": "https://data.microbiomedata.org/data/nmdc:mga0n0je44/MAGs/nmdc_mga0n0je44_bins.tooShort.fa", + "md5_checksum": "1277a6924ab380e001a7208e7ebbb0e3", + "id": "nmdc:1277a6924ab380e001a7208e7ebbb0e3", + "file_size_bytes": 46335107 + }, + { + "name": "Gp0115667_unbinned fasta file from metabat2", + "description": "unbinned fasta file from metabat2 for Gp0115667", + "url": "https://data.microbiomedata.org/data/nmdc:mga0n0je44/MAGs/nmdc_mga0n0je44_bins.unbinned.fa", + "md5_checksum": "48772112891988a2ef3f0c40786c11fd", + "id": "nmdc:48772112891988a2ef3f0c40786c11fd", + "file_size_bytes": 10701981 + }, + { + "name": "Gp0115667_metabat2 bin checkm quality assessment result", + "description": "metabat2 bin checkm quality assessment result for Gp0115667", + "data_object_type": "CheckM Statistics", + "url": "https://data.microbiomedata.org/data/nmdc:mga0n0je44/MAGs/nmdc_mga0n0je44_checkm_qa.out", + "md5_checksum": "527e2c19607c225a707db67b5be01b6f", + "id": "nmdc:527e2c19607c225a707db67b5be01b6f", + "file_size_bytes": 1360 + }, + { + "name": "Gp0115667_high-quality and medium-quality bins", + "description": "high-quality and medium-quality bins for Gp0115667", + "data_object_type": "Metagenome Bins", + "url": "https://data.microbiomedata.org/data/nmdc:mga0n0je44/MAGs/nmdc_mga0n0je44_hqmq_bin.zip", + "md5_checksum": "027626ff998bf1e495e32d09cab4bb08", + "id": "nmdc:027626ff998bf1e495e32d09cab4bb08", + "file_size_bytes": 1462611 + }, + { + "name": "Gp0115667_metabat2 bins", + "description": "metabat2 bins for Gp0115667", + "url": "https://data.microbiomedata.org/data/nmdc:mga0n0je44/MAGs/nmdc_mga0n0je44_metabat_bin.zip", + "md5_checksum": "733e798989606c802b3bbfc952a38841", + "id": "nmdc:733e798989606c802b3bbfc952a38841", + "file_size_bytes": 334014 + } + ] + } + ] + }, + { + "_id": { + "$oid": "649b009773e8249959349b38" + }, + "id": "nmdc:omprc-11-hgehsc37", + "name": "Sand microcosm microbial communities from a hyporheic zone in Columbia River, Washington, USA - GW-RW T2_25-Nov-14", + "description": "Sterilized sand packs were incubated back in the ground and collected at time point T2.", + "has_input": [ + "nmdc:bsm-11-qxntpg05" + ], + "has_output": [ + "jgi:55d817f20d8785342fcf826c" + ], + "part_of": [ + "nmdc:sty-11-aygzgv51" + ], + "add_date": "2015-05-28", + "mod_date": "2021-06-15", + "ncbi_project_name": "Sand microcosm microbial communities from a hyporheic zone in Columbia River, Washington, USA - GW-RW T2_25-Nov-14", + "omics_type": { + "has_raw_value": "Metagenome" + }, + "principal_investigator": { + "has_raw_value": "James Stegen" + }, + "processing_institution": "JGI", + "type": "nmdc:OmicsProcessing", + "gold_sequencing_project_identifiers": [ + "gold:Gp0115664" + ], + "downstream_workflow_activity_records": [ + { + "_id": { + "$oid": "649b009d6bdd4fd20273c87b" + }, + "has_input": [ + "nmdc:86929bf5b2afcb965129dcf0eae2d8fc" + ], + "part_of": [ + "nmdc:mga0dm3v04" + ], + "git_url": "https://github.com/microbiomedata/metaG/releases/tag/0.1", + "has_output": [ + "nmdc:232e31505b6a0251df2303c0563d64c1", + "nmdc:f3f4f75f19c92af6e98d2b45cccaacd5" + ], + "was_informed_by": "gold:Gp0115664", + "input_read_count": 19058974, + "output_read_bases": 2597325375, + "id": "nmdc:10c6e87f449fdc27c6b8bfbc9e25d6a3", + "execution_resource": "NERSC-Cori", + "input_read_bases": 2877905074, + "name": "Read QC Activity for nmdc:mga0dm3v04", + "output_read_count": 17338778, + "started_at_time": "2021-10-11T02:28:16Z", + "type": "nmdc:ReadQCAnalysisActivity", + "ended_at_time": "2021-10-11T03:33:34+00:00", + "output_data_objects": [ + { + "name": "Gp0115664_Filtered Reads", + "description": "Filtered Reads for Gp0115664", + "data_object_type": "Filtered Sequencing Reads", + "url": "https://data.microbiomedata.org/data/nmdc:mga0dm3v04/qa/nmdc_mga0dm3v04_filtered.fastq.gz", + "md5_checksum": "232e31505b6a0251df2303c0563d64c1", + "id": "nmdc:232e31505b6a0251df2303c0563d64c1", + "file_size_bytes": 1566732675 + }, + { + "name": "Gp0115664_Filtered Stats", + "description": "Filtered Stats for Gp0115664", + "data_object_type": "QC Statistics", + "url": "https://data.microbiomedata.org/data/nmdc:mga0dm3v04/qa/nmdc_mga0dm3v04_filterStats.txt", + "md5_checksum": "f3f4f75f19c92af6e98d2b45cccaacd5", + "id": "nmdc:f3f4f75f19c92af6e98d2b45cccaacd5", + "file_size_bytes": 289 + } + ] + }, + { + "_id": { + "$oid": "649b009bff710ae353f8cf3f" + }, + "has_input": [ + "nmdc:232e31505b6a0251df2303c0563d64c1" + ], + "git_url": "https://github.com/microbiomedata/metaG/releases/tag/0.1", + "has_output": [ + "nmdc:9d61d9f0c31a98f88ad8cde86254148d", + "nmdc:7f93f97242aed036019f13492f5af35c", + "nmdc:b4d0179bcc68b5186a3544d9ee0c6941", + "nmdc:a4243f71a0288f489c566ae85d85891d", + "nmdc:f8b6ef830b94c6470056a3cd0a0eafc1", + "nmdc:a80779b32415ef001d0403f0b618b612", + "nmdc:01581429336a43d7dc2f85b8d49d6c6e", + "nmdc:ce47d6686edb7b3472102d5883229c45", + "nmdc:29b75e78b0b7fd8115614d8e9d341d46" + ], + "was_informed_by": "gold:Gp0115664", + "id": "nmdc:10c6e87f449fdc27c6b8bfbc9e25d6a3", + "execution_resource": "NERSC-Cori", + "name": "ReadBased Analysis Activity for nmdc:mga0dm3v04", + "started_at_time": "2021-10-11T02:28:16Z", + "type": "nmdc:ReadbasedAnalysis", + "ended_at_time": "2021-10-11T03:33:34+00:00", + "output_data_objects": [ + { + "name": "Gp0115664_Gottcha2 TSV report", + "description": "Gottcha2 TSV report for Gp0115664", + "url": "https://data.microbiomedata.org/data/nmdc:mga0dm3v04/ReadbasedAnalysis/nmdc_mga0dm3v04_gottcha2_report.tsv", + "md5_checksum": "9d61d9f0c31a98f88ad8cde86254148d", + "id": "nmdc:9d61d9f0c31a98f88ad8cde86254148d", + "file_size_bytes": 9591 + }, + { + "name": "Gp0115664_Gottcha2 full TSV report", + "description": "Gottcha2 full TSV report for Gp0115664", + "url": "https://data.microbiomedata.org/data/nmdc:mga0dm3v04/ReadbasedAnalysis/nmdc_mga0dm3v04_gottcha2_report_full.tsv", + "md5_checksum": "7f93f97242aed036019f13492f5af35c", + "id": "nmdc:7f93f97242aed036019f13492f5af35c", + "file_size_bytes": 885985 + }, + { + "name": "Gp0115664_Gottcha2 Krona HTML report", + "description": "Gottcha2 Krona HTML report for Gp0115664", + "data_object_type": "GOTTCHA2 Krona Plot", + "url": "https://data.microbiomedata.org/data/nmdc:mga0dm3v04/ReadbasedAnalysis/nmdc_mga0dm3v04_gottcha2_krona.html", + "md5_checksum": "b4d0179bcc68b5186a3544d9ee0c6941", + "id": "nmdc:b4d0179bcc68b5186a3544d9ee0c6941", + "file_size_bytes": 251303 + }, + { + "name": "Gp0115664_Centrifuge classification TSV report", + "description": "Centrifuge classification TSV report for Gp0115664", + "data_object_type": "Centrifuge Taxonomic Classification", + "url": "https://data.microbiomedata.org/data/nmdc:mga0dm3v04/ReadbasedAnalysis/nmdc_mga0dm3v04_centrifuge_classification.tsv", + "md5_checksum": "a4243f71a0288f489c566ae85d85891d", + "id": "nmdc:a4243f71a0288f489c566ae85d85891d", + "file_size_bytes": 1268144933 + }, + { + "name": "Gp0115664_Centrifuge TSV report", + "description": "Centrifuge TSV report for Gp0115664", + "data_object_type": "Centrifuge Classification Report", + "url": "https://data.microbiomedata.org/data/nmdc:mga0dm3v04/ReadbasedAnalysis/nmdc_mga0dm3v04_centrifuge_report.tsv", + "md5_checksum": "f8b6ef830b94c6470056a3cd0a0eafc1", + "id": "nmdc:f8b6ef830b94c6470056a3cd0a0eafc1", + "file_size_bytes": 254575 + }, + { + "name": "Gp0115664_Centrifuge Krona HTML report", + "description": "Centrifuge Krona HTML report for Gp0115664", + "data_object_type": "Centrifuge Krona Plot", + "url": "https://data.microbiomedata.org/data/nmdc:mga0dm3v04/ReadbasedAnalysis/nmdc_mga0dm3v04_centrifuge_krona.html", + "md5_checksum": "a80779b32415ef001d0403f0b618b612", + "id": "nmdc:a80779b32415ef001d0403f0b618b612", + "file_size_bytes": 2327293 + }, + { + "name": "Gp0115664_Kraken classification TSV report", + "description": "Kraken classification TSV report for Gp0115664", + "data_object_type": "Kraken2 Taxonomic Classification", + "url": "https://data.microbiomedata.org/data/nmdc:mga0dm3v04/ReadbasedAnalysis/nmdc_mga0dm3v04_kraken2_classification.tsv", + "md5_checksum": "01581429336a43d7dc2f85b8d49d6c6e", + "id": "nmdc:01581429336a43d7dc2f85b8d49d6c6e", + "file_size_bytes": 1037932028 + }, + { + "name": "Gp0115664_Kraken2 TSV report", + "description": "Kraken2 TSV report for Gp0115664", + "data_object_type": "Kraken2 Classification Report", + "url": "https://data.microbiomedata.org/data/nmdc:mga0dm3v04/ReadbasedAnalysis/nmdc_mga0dm3v04_kraken2_report.tsv", + "md5_checksum": "ce47d6686edb7b3472102d5883229c45", + "id": "nmdc:ce47d6686edb7b3472102d5883229c45", + "file_size_bytes": 641242 + }, + { + "name": "Gp0115664_Kraken2 Krona HTML report", + "description": "Kraken2 Krona HTML report for Gp0115664", + "data_object_type": "Kraken2 Krona Plot", + "url": "https://data.microbiomedata.org/data/nmdc:mga0dm3v04/ReadbasedAnalysis/nmdc_mga0dm3v04_kraken2_krona.html", + "md5_checksum": "29b75e78b0b7fd8115614d8e9d341d46", + "id": "nmdc:29b75e78b0b7fd8115614d8e9d341d46", + "file_size_bytes": 3995680 + } + ] + }, + { + "_id": { + "$oid": "61e719dd833bcf838a70154e" + }, + "has_input": [ + "nmdc:232e31505b6a0251df2303c0563d64c1" + ], + "part_of": [ + "nmdc:mga0dm3v04" + ], + "git_url": "https://github.com/microbiomedata/metaG/releases/tag/0.1", + "has_output": [ + "nmdc:9d61d9f0c31a98f88ad8cde86254148d", + "nmdc:7f93f97242aed036019f13492f5af35c", + "nmdc:b4d0179bcc68b5186a3544d9ee0c6941", + "nmdc:a4243f71a0288f489c566ae85d85891d", + "nmdc:f8b6ef830b94c6470056a3cd0a0eafc1", + "nmdc:a80779b32415ef001d0403f0b618b612", + "nmdc:01581429336a43d7dc2f85b8d49d6c6e", + "nmdc:ce47d6686edb7b3472102d5883229c45", + "nmdc:29b75e78b0b7fd8115614d8e9d341d46" + ], + "was_informed_by": "gold:Gp0115664", + "id": "nmdc:10c6e87f449fdc27c6b8bfbc9e25d6a3", + "execution_resource": "NERSC-Cori", + "name": "ReadBased Analysis Activity for nmdc:mga0dm3v04", + "started_at_time": "2021-10-11T02:28:16Z", + "type": "nmdc:ReadbasedAnalysis", + "ended_at_time": "2021-10-11T03:33:34+00:00", + "output_data_objects": [ + { + "name": "Gp0115664_Gottcha2 TSV report", + "description": "Gottcha2 TSV report for Gp0115664", + "url": "https://data.microbiomedata.org/data/nmdc:mga0dm3v04/ReadbasedAnalysis/nmdc_mga0dm3v04_gottcha2_report.tsv", + "md5_checksum": "9d61d9f0c31a98f88ad8cde86254148d", + "id": "nmdc:9d61d9f0c31a98f88ad8cde86254148d", + "file_size_bytes": 9591 + }, + { + "name": "Gp0115664_Gottcha2 full TSV report", + "description": "Gottcha2 full TSV report for Gp0115664", + "url": "https://data.microbiomedata.org/data/nmdc:mga0dm3v04/ReadbasedAnalysis/nmdc_mga0dm3v04_gottcha2_report_full.tsv", + "md5_checksum": "7f93f97242aed036019f13492f5af35c", + "id": "nmdc:7f93f97242aed036019f13492f5af35c", + "file_size_bytes": 885985 + }, + { + "name": "Gp0115664_Gottcha2 Krona HTML report", + "description": "Gottcha2 Krona HTML report for Gp0115664", + "data_object_type": "GOTTCHA2 Krona Plot", + "url": "https://data.microbiomedata.org/data/nmdc:mga0dm3v04/ReadbasedAnalysis/nmdc_mga0dm3v04_gottcha2_krona.html", + "md5_checksum": "b4d0179bcc68b5186a3544d9ee0c6941", + "id": "nmdc:b4d0179bcc68b5186a3544d9ee0c6941", + "file_size_bytes": 251303 + }, + { + "name": "Gp0115664_Centrifuge classification TSV report", + "description": "Centrifuge classification TSV report for Gp0115664", + "data_object_type": "Centrifuge Taxonomic Classification", + "url": "https://data.microbiomedata.org/data/nmdc:mga0dm3v04/ReadbasedAnalysis/nmdc_mga0dm3v04_centrifuge_classification.tsv", + "md5_checksum": "a4243f71a0288f489c566ae85d85891d", + "id": "nmdc:a4243f71a0288f489c566ae85d85891d", + "file_size_bytes": 1268144933 + }, + { + "name": "Gp0115664_Centrifuge TSV report", + "description": "Centrifuge TSV report for Gp0115664", + "data_object_type": "Centrifuge Classification Report", + "url": "https://data.microbiomedata.org/data/nmdc:mga0dm3v04/ReadbasedAnalysis/nmdc_mga0dm3v04_centrifuge_report.tsv", + "md5_checksum": "f8b6ef830b94c6470056a3cd0a0eafc1", + "id": "nmdc:f8b6ef830b94c6470056a3cd0a0eafc1", + "file_size_bytes": 254575 + }, + { + "name": "Gp0115664_Centrifuge Krona HTML report", + "description": "Centrifuge Krona HTML report for Gp0115664", + "data_object_type": "Centrifuge Krona Plot", + "url": "https://data.microbiomedata.org/data/nmdc:mga0dm3v04/ReadbasedAnalysis/nmdc_mga0dm3v04_centrifuge_krona.html", + "md5_checksum": "a80779b32415ef001d0403f0b618b612", + "id": "nmdc:a80779b32415ef001d0403f0b618b612", + "file_size_bytes": 2327293 + }, + { + "name": "Gp0115664_Kraken classification TSV report", + "description": "Kraken classification TSV report for Gp0115664", + "data_object_type": "Kraken2 Taxonomic Classification", + "url": "https://data.microbiomedata.org/data/nmdc:mga0dm3v04/ReadbasedAnalysis/nmdc_mga0dm3v04_kraken2_classification.tsv", + "md5_checksum": "01581429336a43d7dc2f85b8d49d6c6e", + "id": "nmdc:01581429336a43d7dc2f85b8d49d6c6e", + "file_size_bytes": 1037932028 + }, + { + "name": "Gp0115664_Kraken2 TSV report", + "description": "Kraken2 TSV report for Gp0115664", + "data_object_type": "Kraken2 Classification Report", + "url": "https://data.microbiomedata.org/data/nmdc:mga0dm3v04/ReadbasedAnalysis/nmdc_mga0dm3v04_kraken2_report.tsv", + "md5_checksum": "ce47d6686edb7b3472102d5883229c45", + "id": "nmdc:ce47d6686edb7b3472102d5883229c45", + "file_size_bytes": 641242 + }, + { + "name": "Gp0115664_Kraken2 Krona HTML report", + "description": "Kraken2 Krona HTML report for Gp0115664", + "data_object_type": "Kraken2 Krona Plot", + "url": "https://data.microbiomedata.org/data/nmdc:mga0dm3v04/ReadbasedAnalysis/nmdc_mga0dm3v04_kraken2_krona.html", + "md5_checksum": "29b75e78b0b7fd8115614d8e9d341d46", + "id": "nmdc:29b75e78b0b7fd8115614d8e9d341d46", + "file_size_bytes": 3995680 + } + ] + }, + { + "_id": { + "$oid": "649b005f2ca5ee4adb139fb0" + }, + "has_input": [ + "nmdc:232e31505b6a0251df2303c0563d64c1" + ], + "part_of": [ + "nmdc:mga0dm3v04" + ], + "ctg_logsum": 60365, + "scaf_logsum": 60806, + "gap_pct": 0.00196, + "git_url": "https://github.com/microbiomedata/metaG/releases/tag/0.1", + "has_output": [ + "nmdc:3faf965a2e745048afed5d1c065a78c4", + "nmdc:2d99daff632b19ebdea3f3e5784e2fbc", + "nmdc:d8f255300e5f214baad3c3b4b3c0b51b", + "nmdc:1f9a75569aedc406a3db8ff779b03c19", + "nmdc:faeb84260d97f23162a6176b9442a5c8" + ], + "asm_score": 4.21, + "was_informed_by": "gold:Gp0115664", + "ctg_powsum": 6668.288, + "scaf_max": 15348, + "id": "nmdc:10c6e87f449fdc27c6b8bfbc9e25d6a3", + "scaf_powsum": 6720.964, + "execution_resource": "NERSC-Cori", + "contigs": 78376, + "name": "Assembly Activity for nmdc:mga0dm3v04", + "ctg_max": 15348, + "gc_std": 0.11459, + "contig_bp": 33088752, + "gc_avg": 0.5432, + "started_at_time": "2021-10-11T02:28:16Z", + "scaf_bp": 33089402, + "type": "nmdc:MetagenomeAssembly", + "scaffolds": 78311, + "ended_at_time": "2021-10-11T03:33:34+00:00", + "ctg_l50": 377, + "ctg_l90": 283, + "ctg_n50": 23883, + "ctg_n90": 67231, + "scaf_l50": 377, + "scaf_l90": 283, + "scaf_n50": 23850, + "scaf_n90": 67169, + "output_data_objects": [ + { + "name": "Gp0115664_Assembled contigs fasta", + "description": "Assembled contigs fasta for Gp0115664", + "data_object_type": "Assembly Contigs", + "url": "https://data.microbiomedata.org/data/nmdc:mga0dm3v04/assembly/nmdc_mga0dm3v04_contigs.fna", + "md5_checksum": "3faf965a2e745048afed5d1c065a78c4", + "id": "nmdc:3faf965a2e745048afed5d1c065a78c4", + "file_size_bytes": 36012597 + }, + { + "name": "Gp0115664_Assembled scaffold fasta", + "description": "Assembled scaffold fasta for Gp0115664", + "data_object_type": "Assembly Scaffolds", + "url": "https://data.microbiomedata.org/data/nmdc:mga0dm3v04/assembly/nmdc_mga0dm3v04_scaffolds.fna", + "md5_checksum": "2d99daff632b19ebdea3f3e5784e2fbc", + "id": "nmdc:2d99daff632b19ebdea3f3e5784e2fbc", + "file_size_bytes": 35776428 + }, + { + "name": "Gp0115664_Metagenome Contig Coverage Stats", + "description": "Metagenome Contig Coverage Stats for Gp0115664", + "url": "https://data.microbiomedata.org/data/nmdc:mga0dm3v04/assembly/nmdc_mga0dm3v04_covstats.txt", + "md5_checksum": "d8f255300e5f214baad3c3b4b3c0b51b", + "id": "nmdc:d8f255300e5f214baad3c3b4b3c0b51b", + "file_size_bytes": 6143277 + }, + { + "name": "Gp0115664_Assembled AGP file", + "description": "Assembled AGP file for Gp0115664", + "data_object_type": "Assembly AGP", + "url": "https://data.microbiomedata.org/data/nmdc:mga0dm3v04/assembly/nmdc_mga0dm3v04_assembly.agp", + "md5_checksum": "1f9a75569aedc406a3db8ff779b03c19", + "id": "nmdc:1f9a75569aedc406a3db8ff779b03c19", + "file_size_bytes": 5710214 + }, + { + "name": "Gp0115664_Metagenome Alignment BAM file", + "description": "Metagenome Alignment BAM file for Gp0115664", + "data_object_type": "Assembly Coverage BAM", + "url": "https://data.microbiomedata.org/data/nmdc:mga0dm3v04/assembly/nmdc_mga0dm3v04_pairedMapped_sorted.bam", + "md5_checksum": "faeb84260d97f23162a6176b9442a5c8", + "id": "nmdc:faeb84260d97f23162a6176b9442a5c8", + "file_size_bytes": 1670248615 + } + ] + }, + { + "_id": { + "$oid": "649b005bbf2caae0415ef9c5" + }, + "has_input": [ + "nmdc:3faf965a2e745048afed5d1c065a78c4" + ], + "part_of": [ + "nmdc:mga0dm3v04" + ], + "git_url": "https://github.com/microbiomedata/metaG/releases/tag/0.1", + "has_output": [ + "nmdc:338a8f2f739dfc89557e090d604302f6", + "nmdc:0ce03dd69826edcc8b5f6dd01ca176dc", + "nmdc:dc720d27299f6f5c1d38c4dcf1dfc8db", + "nmdc:bc7f7df6865acffd4e07f8b592573eb9", + "nmdc:be38bedd77ab3c072bafbb2c201c953d", + "nmdc:d7318549a735853b679d15171f5c7ea7", + "nmdc:c1617e0980c6e52149692aee39e30f8c", + "nmdc:bd5a9b5e55605ece8873d6ac05e76e0d", + "nmdc:eb1fba5cad14c3e211baa2de796bca2e", + "nmdc:2146449222f410a286e4786bf19c9a5e", + "nmdc:20ced78c72f67d064bddcc8d5534ebb6", + "nmdc:7ffe90ceb10c9f40f755aa8d7aa30170" + ], + "was_informed_by": "gold:Gp0115664", + "id": "nmdc:10c6e87f449fdc27c6b8bfbc9e25d6a3", + "execution_resource": "NERSC-Cori", + "name": "Annotation Activity for nmdc:mga0dm3v04", + "started_at_time": "2021-10-11T02:28:16Z", + "type": "nmdc:MetagenomeAnnotationActivity", + "ended_at_time": "2021-10-11T03:33:34+00:00", + "output_data_objects": [ + { + "name": "Gp0115664_Protein FAA", + "description": "Protein FAA for Gp0115664", + "data_object_type": "Annotation Amino Acid FASTA", + "url": "https://data.microbiomedata.org/data/nmdc:mga0dm3v04/annotation/nmdc_mga0dm3v04_proteins.faa", + "md5_checksum": "338a8f2f739dfc89557e090d604302f6", + "id": "nmdc:338a8f2f739dfc89557e090d604302f6", + "file_size_bytes": 21010319 + }, + { + "name": "Gp0115664_Structural annotation GFF file", + "description": "Structural annotation GFF file for Gp0115664", + "data_object_type": "Structural Annotation GFF", + "url": "https://data.microbiomedata.org/data/nmdc:mga0dm3v04/annotation/nmdc_mga0dm3v04_structural_annotation.gff", + "md5_checksum": "0ce03dd69826edcc8b5f6dd01ca176dc", + "id": "nmdc:0ce03dd69826edcc8b5f6dd01ca176dc", + "file_size_bytes": 2497 + }, + { + "name": "Gp0115664_Functional annotation GFF file", + "description": "Functional annotation GFF file for Gp0115664", + "data_object_type": "Functional Annotation GFF", + "url": "https://data.microbiomedata.org/data/nmdc:mga0dm3v04/annotation/nmdc_mga0dm3v04_functional_annotation.gff", + "md5_checksum": "dc720d27299f6f5c1d38c4dcf1dfc8db", + "id": "nmdc:dc720d27299f6f5c1d38c4dcf1dfc8db", + "file_size_bytes": 24426623 + }, + { + "name": "Gp0115664_KO TSV file", + "description": "KO TSV file for Gp0115664", + "data_object_type": "Annotation KEGG Orthology", + "url": "https://data.microbiomedata.org/data/nmdc:mga0dm3v04/annotation/nmdc_mga0dm3v04_ko.tsv", + "md5_checksum": "bc7f7df6865acffd4e07f8b592573eb9", + "id": "nmdc:bc7f7df6865acffd4e07f8b592573eb9", + "file_size_bytes": 2875393 + }, + { + "name": "Gp0115664_EC TSV file", + "description": "EC TSV file for Gp0115664", + "data_object_type": "Annotation Enzyme Commission", + "url": "https://data.microbiomedata.org/data/nmdc:mga0dm3v04/annotation/nmdc_mga0dm3v04_ec.tsv", + "md5_checksum": "be38bedd77ab3c072bafbb2c201c953d", + "id": "nmdc:be38bedd77ab3c072bafbb2c201c953d", + "file_size_bytes": 1882878 + }, + { + "name": "Gp0115664_COG GFF file", + "description": "COG GFF file for Gp0115664", + "url": "https://data.microbiomedata.org/data/nmdc:mga0dm3v04/annotation/nmdc_mga0dm3v04_cog.gff", + "md5_checksum": "d7318549a735853b679d15171f5c7ea7", + "id": "nmdc:d7318549a735853b679d15171f5c7ea7", + "file_size_bytes": 12475107 + }, + { + "name": "Gp0115664_PFAM GFF file", + "description": "PFAM GFF file for Gp0115664", + "url": "https://data.microbiomedata.org/data/nmdc:mga0dm3v04/annotation/nmdc_mga0dm3v04_pfam.gff", + "md5_checksum": "c1617e0980c6e52149692aee39e30f8c", + "id": "nmdc:c1617e0980c6e52149692aee39e30f8c", + "file_size_bytes": 9305713 + }, + { + "name": "Gp0115664_TigrFam GFF file", + "description": "TigrFam GFF file for Gp0115664", + "url": "https://data.microbiomedata.org/data/nmdc:mga0dm3v04/annotation/nmdc_mga0dm3v04_tigrfam.gff", + "md5_checksum": "bd5a9b5e55605ece8873d6ac05e76e0d", + "id": "nmdc:bd5a9b5e55605ece8873d6ac05e76e0d", + "file_size_bytes": 1181236 + }, + { + "name": "Gp0115664_SMART GFF file", + "description": "SMART GFF file for Gp0115664", + "url": "https://data.microbiomedata.org/data/nmdc:mga0dm3v04/annotation/nmdc_mga0dm3v04_smart.gff", + "md5_checksum": "eb1fba5cad14c3e211baa2de796bca2e", + "id": "nmdc:eb1fba5cad14c3e211baa2de796bca2e", + "file_size_bytes": 2718910 + }, + { + "name": "Gp0115664_SuperFam GFF file", + "description": "SuperFam GFF file for Gp0115664", + "url": "https://data.microbiomedata.org/data/nmdc:mga0dm3v04/annotation/nmdc_mga0dm3v04_supfam.gff", + "md5_checksum": "2146449222f410a286e4786bf19c9a5e", + "id": "nmdc:2146449222f410a286e4786bf19c9a5e", + "file_size_bytes": 16463047 + }, + { + "name": "Gp0115664_Cath FunFam GFF file", + "description": "Cath FunFam GFF file for Gp0115664", + "url": "https://data.microbiomedata.org/data/nmdc:mga0dm3v04/annotation/nmdc_mga0dm3v04_cath_funfam.gff", + "md5_checksum": "20ced78c72f67d064bddcc8d5534ebb6", + "id": "nmdc:20ced78c72f67d064bddcc8d5534ebb6", + "file_size_bytes": 12501882 + }, + { + "name": "Gp0115664_KO_EC GFF file", + "description": "KO_EC GFF file for Gp0115664", + "url": "https://data.microbiomedata.org/data/nmdc:mga0dm3v04/annotation/nmdc_mga0dm3v04_ko_ec.gff", + "md5_checksum": "7ffe90ceb10c9f40f755aa8d7aa30170", + "id": "nmdc:7ffe90ceb10c9f40f755aa8d7aa30170", + "file_size_bytes": 9217314 + } + ] + }, + { + "_id": { + "$oid": "649b0052ec087f6bbab34723" + }, + "has_input": [ + "nmdc:3faf965a2e745048afed5d1c065a78c4", + "nmdc:faeb84260d97f23162a6176b9442a5c8", + "nmdc:dc720d27299f6f5c1d38c4dcf1dfc8db" + ], + "too_short_contig_num": 75364, + "part_of": [ + "nmdc:mga0dm3v04" + ], + "binned_contig_num": 220, + "git_url": "https://github.com/microbiomedata/metaG/releases/tag/0.1", + "has_output": [ + "nmdc:d41d8cd98f00b204e9800998ecf8427e", + "nmdc:767a36b1bffa42d3d25af3f81b15e11b", + "nmdc:994fd58ab9a53c19ba1cdb830e37a132", + "nmdc:db59a64c874a9e06c1f1ba58df96fe0d", + "nmdc:0d45611a5d0c80679c00fa759c939df0", + "nmdc:bb5835f621252fca37967e00245517ac" + ], + "was_informed_by": "gold:Gp0115664", + "input_contig_num": 78376, + "id": "nmdc:10c6e87f449fdc27c6b8bfbc9e25d6a3", + "execution_resource": "NERSC-Cori", + "name": "MAGs Analysis Activity for nmdc:mga0dm3v04", + "mags_list": [ + { + "number_of_contig": 220, + "completeness": 45.41, + "bin_name": "bins.1", + "gene_count": 1182, + "bin_quality": "LQ", + "gtdbtk_species": "", + "gtdbtk_order": "", + "num_16s": 0, + "gtdbtk_family": "", + "gtdbtk_domain": "", + "contamination": 0.72, + "gtdbtk_class": "", + "gtdbtk_phylum": "", + "num_5s": 0, + "num_23s": 0, + "gtdbtk_genus": "", + "num_t_rna": 16 + } + ], + "unbinned_contig_num": 2792, + "started_at_time": "2021-10-11T02:28:16Z", + "type": "nmdc:MAGsAnalysisActivity", + "ended_at_time": "2021-10-11T03:33:34+00:00", + "output_data_objects": [ + { + "name": "gold:Gp0452679_metabat2 bin checkm quality assessment result", + "description": "metabat2 bin checkm quality assessment result for gold:Gp0452679", + "data_object_type": "CheckM Statistics", + "url": "https://data.microbiomedata.org/data/nmdc:mga0fsjy84/MAGs/nmdc_mga0fsjy84_checkm_qa.out", + "md5_checksum": "d41d8cd98f00b204e9800998ecf8427e", + "id": "nmdc:d41d8cd98f00b204e9800998ecf8427e", + "file_size_bytes": 0 + }, + { + "name": "Gp0115664_tooShort (< 3kb) filtered contigs fasta file by metaBat2", + "description": "tooShort (< 3kb) filtered contigs fasta file by metaBat2 for Gp0115664", + "url": "https://data.microbiomedata.org/data/nmdc:mga0dm3v04/MAGs/nmdc_mga0dm3v04_bins.tooShort.fa", + "md5_checksum": "767a36b1bffa42d3d25af3f81b15e11b", + "id": "nmdc:767a36b1bffa42d3d25af3f81b15e11b", + "file_size_bytes": 30368582 + }, + { + "name": "Gp0115664_unbinned fasta file from metabat2", + "description": "unbinned fasta file from metabat2 for Gp0115664", + "url": "https://data.microbiomedata.org/data/nmdc:mga0dm3v04/MAGs/nmdc_mga0dm3v04_bins.unbinned.fa", + "md5_checksum": "994fd58ab9a53c19ba1cdb830e37a132", + "id": "nmdc:994fd58ab9a53c19ba1cdb830e37a132", + "file_size_bytes": 4608000 + }, + { + "name": "Gp0115664_metabat2 bin checkm quality assessment result", + "description": "metabat2 bin checkm quality assessment result for Gp0115664", + "data_object_type": "CheckM Statistics", + "url": "https://data.microbiomedata.org/data/nmdc:mga0dm3v04/MAGs/nmdc_mga0dm3v04_checkm_qa.out", + "md5_checksum": "db59a64c874a9e06c1f1ba58df96fe0d", + "id": "nmdc:db59a64c874a9e06c1f1ba58df96fe0d", + "file_size_bytes": 845 + }, + { + "name": "Gp0115664_high-quality and medium-quality bins", + "description": "high-quality and medium-quality bins for Gp0115664", + "data_object_type": "Metagenome Bins", + "url": "https://data.microbiomedata.org/data/nmdc:mga0dm3v04/MAGs/nmdc_mga0dm3v04_hqmq_bin.zip", + "md5_checksum": "0d45611a5d0c80679c00fa759c939df0", + "id": "nmdc:0d45611a5d0c80679c00fa759c939df0", + "file_size_bytes": 182 + }, + { + "name": "Gp0115664_metabat2 bins", + "description": "metabat2 bins for Gp0115664", + "url": "https://data.microbiomedata.org/data/nmdc:mga0dm3v04/MAGs/nmdc_mga0dm3v04_metabat_bin.zip", + "md5_checksum": "bb5835f621252fca37967e00245517ac", + "id": "nmdc:bb5835f621252fca37967e00245517ac", + "file_size_bytes": 314358 + } + ] + } + ] + }, + { + "_id": { + "$oid": "649b009773e8249959349b39" + }, + "id": "nmdc:omprc-11-7vsv7h78", + "name": "Sand microcosm microbial communities from a hyporheic zone in Columbia River, Washington, USA - GW-RW T4_30-Apr-14", + "description": "Sterilized sand packs were incubated back in the ground and collected at time point T4.", + "has_input": [ + "nmdc:bsm-11-j0wbx741" + ], + "has_output": [ + "jgi:55f23d790d8785306f96497e" + ], + "part_of": [ + "nmdc:sty-11-aygzgv51" + ], + "add_date": "2015-05-28", + "mod_date": "2021-06-15", + "ncbi_project_name": "Sand microcosm microbial communities from a hyporheic zone in Columbia River, Washington, USA - GW-RW T4_30-Apr-14", + "omics_type": { + "has_raw_value": "Metagenome" + }, + "principal_investigator": { + "has_raw_value": "James Stegen" + }, + "processing_institution": "JGI", + "type": "nmdc:OmicsProcessing", + "gold_sequencing_project_identifiers": [ + "gold:Gp0115678" + ], + "downstream_workflow_activity_records": [ + { + "_id": { + "$oid": "649b009d6bdd4fd20273c881" + }, + "has_input": [ + "nmdc:0e6219b7901669483a0a0386cfc01f93" + ], + "part_of": [ + "nmdc:mga026tn70" + ], + "git_url": "https://github.com/microbiomedata/metaG/releases/tag/0.1", + "has_output": [ + "nmdc:e0ce93b88419f87568ff206e0efe3a24", + "nmdc:7bf8ff4cf0d98cccd8e1c20f77dd1690" + ], + "was_informed_by": "gold:Gp0115678", + "input_read_count": 51286688, + "output_read_bases": 7231449575, + "id": "nmdc:88c0fc9b5b0bb0cd814448bbfba0da6a", + "execution_resource": "NERSC-Cori", + "input_read_bases": 7744289888, + "name": "Read QC Activity for nmdc:mga026tn70", + "output_read_count": 48276864, + "started_at_time": "2021-10-11T02:23:30Z", + "type": "nmdc:ReadQCAnalysisActivity", + "ended_at_time": "2021-10-11T06:18:17+00:00", + "output_data_objects": [ + { + "name": "Gp0115678_Filtered Reads", + "description": "Filtered Reads for Gp0115678", + "data_object_type": "Filtered Sequencing Reads", + "url": "https://data.microbiomedata.org/data/nmdc:mga026tn70/qa/nmdc_mga026tn70_filtered.fastq.gz", + "md5_checksum": "e0ce93b88419f87568ff206e0efe3a24", + "id": "nmdc:e0ce93b88419f87568ff206e0efe3a24", + "file_size_bytes": 4090026888 + }, + { + "name": "Gp0115678_Filtered Stats", + "description": "Filtered Stats for Gp0115678", + "data_object_type": "QC Statistics", + "url": "https://data.microbiomedata.org/data/nmdc:mga026tn70/qa/nmdc_mga026tn70_filterStats.txt", + "md5_checksum": "7bf8ff4cf0d98cccd8e1c20f77dd1690", + "id": "nmdc:7bf8ff4cf0d98cccd8e1c20f77dd1690", + "file_size_bytes": 292 + } + ] + }, + { + "_id": { + "$oid": "649b009bff710ae353f8cf49" + }, + "has_input": [ + "nmdc:e0ce93b88419f87568ff206e0efe3a24" + ], + "git_url": "https://github.com/microbiomedata/metaG/releases/tag/0.1", + "has_output": [ + "nmdc:05bab80e2ff02d160b8e808f056ee2b5", + "nmdc:12b2d6afc355bce76249d750a9fab534", + "nmdc:18214017d56658a48723c9c998dcba7e", + "nmdc:99ef009c73c128e561a4b9dcb70d7ff2", + "nmdc:78dab6988b57c654462ef3dbeb64d8d6", + "nmdc:f9c01985f057825149d35de0650095a8", + "nmdc:bcea8bbe63625ad0f3142abe69a4a11d", + "nmdc:054c3097c9682bc9a6e07f88fdecc0ee", + "nmdc:38d41d4299141abe28bf0405af80cdfc" + ], + "was_informed_by": "gold:Gp0115678", + "id": "nmdc:88c0fc9b5b0bb0cd814448bbfba0da6a", + "execution_resource": "NERSC-Cori", + "name": "ReadBased Analysis Activity for nmdc:mga026tn70", + "started_at_time": "2021-10-11T02:23:30Z", + "type": "nmdc:ReadbasedAnalysis", + "ended_at_time": "2021-10-11T06:18:17+00:00", + "output_data_objects": [ + { + "name": "Gp0115678_Gottcha2 TSV report", + "description": "Gottcha2 TSV report for Gp0115678", + "url": "https://data.microbiomedata.org/data/nmdc:mga026tn70/ReadbasedAnalysis/nmdc_mga026tn70_gottcha2_report.tsv", + "md5_checksum": "05bab80e2ff02d160b8e808f056ee2b5", + "id": "nmdc:05bab80e2ff02d160b8e808f056ee2b5", + "file_size_bytes": 19085 + }, + { + "name": "Gp0115678_Gottcha2 full TSV report", + "description": "Gottcha2 full TSV report for Gp0115678", + "url": "https://data.microbiomedata.org/data/nmdc:mga026tn70/ReadbasedAnalysis/nmdc_mga026tn70_gottcha2_report_full.tsv", + "md5_checksum": "12b2d6afc355bce76249d750a9fab534", + "id": "nmdc:12b2d6afc355bce76249d750a9fab534", + "file_size_bytes": 1243929 + }, + { + "name": "Gp0115678_Gottcha2 Krona HTML report", + "description": "Gottcha2 Krona HTML report for Gp0115678", + "data_object_type": "GOTTCHA2 Krona Plot", + "url": "https://data.microbiomedata.org/data/nmdc:mga026tn70/ReadbasedAnalysis/nmdc_mga026tn70_gottcha2_krona.html", + "md5_checksum": "18214017d56658a48723c9c998dcba7e", + "id": "nmdc:18214017d56658a48723c9c998dcba7e", + "file_size_bytes": 281148 + }, + { + "name": "Gp0115678_Centrifuge classification TSV report", + "description": "Centrifuge classification TSV report for Gp0115678", + "data_object_type": "Centrifuge Taxonomic Classification", + "url": "https://data.microbiomedata.org/data/nmdc:mga026tn70/ReadbasedAnalysis/nmdc_mga026tn70_centrifuge_classification.tsv", + "md5_checksum": "99ef009c73c128e561a4b9dcb70d7ff2", + "id": "nmdc:99ef009c73c128e561a4b9dcb70d7ff2", + "file_size_bytes": 3491726958 + }, + { + "name": "Gp0115678_Centrifuge TSV report", + "description": "Centrifuge TSV report for Gp0115678", + "data_object_type": "Centrifuge Classification Report", + "url": "https://data.microbiomedata.org/data/nmdc:mga026tn70/ReadbasedAnalysis/nmdc_mga026tn70_centrifuge_report.tsv", + "md5_checksum": "78dab6988b57c654462ef3dbeb64d8d6", + "id": "nmdc:78dab6988b57c654462ef3dbeb64d8d6", + "file_size_bytes": 264123 + }, + { + "name": "Gp0115678_Centrifuge Krona HTML report", + "description": "Centrifuge Krona HTML report for Gp0115678", + "data_object_type": "Centrifuge Krona Plot", + "url": "https://data.microbiomedata.org/data/nmdc:mga026tn70/ReadbasedAnalysis/nmdc_mga026tn70_centrifuge_krona.html", + "md5_checksum": "f9c01985f057825149d35de0650095a8", + "id": "nmdc:f9c01985f057825149d35de0650095a8", + "file_size_bytes": 2352347 + }, + { + "name": "Gp0115678_Kraken classification TSV report", + "description": "Kraken classification TSV report for Gp0115678", + "data_object_type": "Kraken2 Taxonomic Classification", + "url": "https://data.microbiomedata.org/data/nmdc:mga026tn70/ReadbasedAnalysis/nmdc_mga026tn70_kraken2_classification.tsv", + "md5_checksum": "bcea8bbe63625ad0f3142abe69a4a11d", + "id": "nmdc:bcea8bbe63625ad0f3142abe69a4a11d", + "file_size_bytes": 2880889483 + }, + { + "name": "Gp0115678_Kraken2 TSV report", + "description": "Kraken2 TSV report for Gp0115678", + "data_object_type": "Kraken2 Classification Report", + "url": "https://data.microbiomedata.org/data/nmdc:mga026tn70/ReadbasedAnalysis/nmdc_mga026tn70_kraken2_report.tsv", + "md5_checksum": "054c3097c9682bc9a6e07f88fdecc0ee", + "id": "nmdc:054c3097c9682bc9a6e07f88fdecc0ee", + "file_size_bytes": 735519 + }, + { + "name": "Gp0115678_Kraken2 Krona HTML report", + "description": "Kraken2 Krona HTML report for Gp0115678", + "data_object_type": "Kraken2 Krona Plot", + "url": "https://data.microbiomedata.org/data/nmdc:mga026tn70/ReadbasedAnalysis/nmdc_mga026tn70_kraken2_krona.html", + "md5_checksum": "38d41d4299141abe28bf0405af80cdfc", + "id": "nmdc:38d41d4299141abe28bf0405af80cdfc", + "file_size_bytes": 4410156 + } + ] + }, + { + "_id": { + "$oid": "61e719f6833bcf838a701854" + }, + "has_input": [ + "nmdc:e0ce93b88419f87568ff206e0efe3a24" + ], + "part_of": [ + "nmdc:mga026tn70" + ], + "git_url": "https://github.com/microbiomedata/metaG/releases/tag/0.1", + "has_output": [ + "nmdc:05bab80e2ff02d160b8e808f056ee2b5", + "nmdc:12b2d6afc355bce76249d750a9fab534", + "nmdc:18214017d56658a48723c9c998dcba7e", + "nmdc:99ef009c73c128e561a4b9dcb70d7ff2", + "nmdc:78dab6988b57c654462ef3dbeb64d8d6", + "nmdc:f9c01985f057825149d35de0650095a8", + "nmdc:bcea8bbe63625ad0f3142abe69a4a11d", + "nmdc:054c3097c9682bc9a6e07f88fdecc0ee", + "nmdc:38d41d4299141abe28bf0405af80cdfc" + ], + "was_informed_by": "gold:Gp0115678", + "id": "nmdc:88c0fc9b5b0bb0cd814448bbfba0da6a", + "execution_resource": "NERSC-Cori", + "name": "ReadBased Analysis Activity for nmdc:mga026tn70", + "started_at_time": "2021-10-11T02:23:30Z", + "type": "nmdc:ReadbasedAnalysis", + "ended_at_time": "2021-10-11T06:18:17+00:00", + "output_data_objects": [ + { + "name": "Gp0115678_Gottcha2 TSV report", + "description": "Gottcha2 TSV report for Gp0115678", + "url": "https://data.microbiomedata.org/data/nmdc:mga026tn70/ReadbasedAnalysis/nmdc_mga026tn70_gottcha2_report.tsv", + "md5_checksum": "05bab80e2ff02d160b8e808f056ee2b5", + "id": "nmdc:05bab80e2ff02d160b8e808f056ee2b5", + "file_size_bytes": 19085 + }, + { + "name": "Gp0115678_Gottcha2 full TSV report", + "description": "Gottcha2 full TSV report for Gp0115678", + "url": "https://data.microbiomedata.org/data/nmdc:mga026tn70/ReadbasedAnalysis/nmdc_mga026tn70_gottcha2_report_full.tsv", + "md5_checksum": "12b2d6afc355bce76249d750a9fab534", + "id": "nmdc:12b2d6afc355bce76249d750a9fab534", + "file_size_bytes": 1243929 + }, + { + "name": "Gp0115678_Gottcha2 Krona HTML report", + "description": "Gottcha2 Krona HTML report for Gp0115678", + "data_object_type": "GOTTCHA2 Krona Plot", + "url": "https://data.microbiomedata.org/data/nmdc:mga026tn70/ReadbasedAnalysis/nmdc_mga026tn70_gottcha2_krona.html", + "md5_checksum": "18214017d56658a48723c9c998dcba7e", + "id": "nmdc:18214017d56658a48723c9c998dcba7e", + "file_size_bytes": 281148 + }, + { + "name": "Gp0115678_Centrifuge classification TSV report", + "description": "Centrifuge classification TSV report for Gp0115678", + "data_object_type": "Centrifuge Taxonomic Classification", + "url": "https://data.microbiomedata.org/data/nmdc:mga026tn70/ReadbasedAnalysis/nmdc_mga026tn70_centrifuge_classification.tsv", + "md5_checksum": "99ef009c73c128e561a4b9dcb70d7ff2", + "id": "nmdc:99ef009c73c128e561a4b9dcb70d7ff2", + "file_size_bytes": 3491726958 + }, + { + "name": "Gp0115678_Centrifuge TSV report", + "description": "Centrifuge TSV report for Gp0115678", + "data_object_type": "Centrifuge Classification Report", + "url": "https://data.microbiomedata.org/data/nmdc:mga026tn70/ReadbasedAnalysis/nmdc_mga026tn70_centrifuge_report.tsv", + "md5_checksum": "78dab6988b57c654462ef3dbeb64d8d6", + "id": "nmdc:78dab6988b57c654462ef3dbeb64d8d6", + "file_size_bytes": 264123 + }, + { + "name": "Gp0115678_Centrifuge Krona HTML report", + "description": "Centrifuge Krona HTML report for Gp0115678", + "data_object_type": "Centrifuge Krona Plot", + "url": "https://data.microbiomedata.org/data/nmdc:mga026tn70/ReadbasedAnalysis/nmdc_mga026tn70_centrifuge_krona.html", + "md5_checksum": "f9c01985f057825149d35de0650095a8", + "id": "nmdc:f9c01985f057825149d35de0650095a8", + "file_size_bytes": 2352347 + }, + { + "name": "Gp0115678_Kraken classification TSV report", + "description": "Kraken classification TSV report for Gp0115678", + "data_object_type": "Kraken2 Taxonomic Classification", + "url": "https://data.microbiomedata.org/data/nmdc:mga026tn70/ReadbasedAnalysis/nmdc_mga026tn70_kraken2_classification.tsv", + "md5_checksum": "bcea8bbe63625ad0f3142abe69a4a11d", + "id": "nmdc:bcea8bbe63625ad0f3142abe69a4a11d", + "file_size_bytes": 2880889483 + }, + { + "name": "Gp0115678_Kraken2 TSV report", + "description": "Kraken2 TSV report for Gp0115678", + "data_object_type": "Kraken2 Classification Report", + "url": "https://data.microbiomedata.org/data/nmdc:mga026tn70/ReadbasedAnalysis/nmdc_mga026tn70_kraken2_report.tsv", + "md5_checksum": "054c3097c9682bc9a6e07f88fdecc0ee", + "id": "nmdc:054c3097c9682bc9a6e07f88fdecc0ee", + "file_size_bytes": 735519 + }, + { + "name": "Gp0115678_Kraken2 Krona HTML report", + "description": "Kraken2 Krona HTML report for Gp0115678", + "data_object_type": "Kraken2 Krona Plot", + "url": "https://data.microbiomedata.org/data/nmdc:mga026tn70/ReadbasedAnalysis/nmdc_mga026tn70_kraken2_krona.html", + "md5_checksum": "38d41d4299141abe28bf0405af80cdfc", + "id": "nmdc:38d41d4299141abe28bf0405af80cdfc", + "file_size_bytes": 4410156 + } + ] + }, + { + "_id": { + "$oid": "649b005f2ca5ee4adb139fb5" + }, + "has_input": [ + "nmdc:e0ce93b88419f87568ff206e0efe3a24" + ], + "part_of": [ + "nmdc:mga026tn70" + ], + "ctg_logsum": 494917, + "scaf_logsum": 496628, + "gap_pct": 0.00163, + "git_url": "https://github.com/microbiomedata/metaG/releases/tag/0.1", + "has_output": [ + "nmdc:d305e212cce8f84f14561d3957c968b1", + "nmdc:fb12da7c2d6d1f9d9c7a1511702758bb", + "nmdc:444562a4e7108077b7e541a5d9064086", + "nmdc:6c400425b7188b24ac49533d9ce0d43b", + "nmdc:1c63639a894aa686e77e57787fcafbc6" + ], + "asm_score": 7.785, + "was_informed_by": "gold:Gp0115678", + "ctg_powsum": 57423, + "scaf_max": 116556, + "id": "nmdc:88c0fc9b5b0bb0cd814448bbfba0da6a", + "scaf_powsum": 57689, + "execution_resource": "NERSC-Cori", + "contigs": 383712, + "name": "Assembly Activity for nmdc:mga026tn70", + "ctg_max": 116556, + "gc_std": 0.13426, + "contig_bp": 190310453, + "gc_avg": 0.48844, + "started_at_time": "2021-10-11T02:23:30Z", + "scaf_bp": 190313553, + "type": "nmdc:MetagenomeAssembly", + "scaffolds": 383447, + "ended_at_time": "2021-10-11T06:18:17+00:00", + "ctg_l50": 474, + "ctg_l90": 290, + "ctg_n50": 102228, + "ctg_n90": 321321, + "scaf_l50": 474, + "scaf_l90": 290, + "scaf_n50": 102177, + "scaf_n90": 321076, + "scaf_l_gt50k": 453691, + "scaf_n_gt50k": 6, + "scaf_pct_gt50k": 0.23839132, + "output_data_objects": [ + { + "name": "Gp0115678_Assembled contigs fasta", + "description": "Assembled contigs fasta for Gp0115678", + "data_object_type": "Assembly Contigs", + "url": "https://data.microbiomedata.org/data/nmdc:mga026tn70/assembly/nmdc_mga026tn70_contigs.fna", + "md5_checksum": "d305e212cce8f84f14561d3957c968b1", + "id": "nmdc:d305e212cce8f84f14561d3957c968b1", + "file_size_bytes": 205441595 + }, + { + "name": "Gp0115678_Assembled scaffold fasta", + "description": "Assembled scaffold fasta for Gp0115678", + "data_object_type": "Assembly Scaffolds", + "url": "https://data.microbiomedata.org/data/nmdc:mga026tn70/assembly/nmdc_mga026tn70_scaffolds.fna", + "md5_checksum": "fb12da7c2d6d1f9d9c7a1511702758bb", + "id": "nmdc:fb12da7c2d6d1f9d9c7a1511702758bb", + "file_size_bytes": 204286677 + }, + { + "name": "Gp0115678_Metagenome Contig Coverage Stats", + "description": "Metagenome Contig Coverage Stats for Gp0115678", + "url": "https://data.microbiomedata.org/data/nmdc:mga026tn70/assembly/nmdc_mga026tn70_covstats.txt", + "md5_checksum": "444562a4e7108077b7e541a5d9064086", + "id": "nmdc:444562a4e7108077b7e541a5d9064086", + "file_size_bytes": 30470067 + }, + { + "name": "Gp0115678_Assembled AGP file", + "description": "Assembled AGP file for Gp0115678", + "data_object_type": "Assembly AGP", + "url": "https://data.microbiomedata.org/data/nmdc:mga026tn70/assembly/nmdc_mga026tn70_assembly.agp", + "md5_checksum": "6c400425b7188b24ac49533d9ce0d43b", + "id": "nmdc:6c400425b7188b24ac49533d9ce0d43b", + "file_size_bytes": 28619270 + }, + { + "name": "Gp0115678_Metagenome Alignment BAM file", + "description": "Metagenome Alignment BAM file for Gp0115678", + "data_object_type": "Assembly Coverage BAM", + "url": "https://data.microbiomedata.org/data/nmdc:mga026tn70/assembly/nmdc_mga026tn70_pairedMapped_sorted.bam", + "md5_checksum": "1c63639a894aa686e77e57787fcafbc6", + "id": "nmdc:1c63639a894aa686e77e57787fcafbc6", + "file_size_bytes": 4471336607 + } + ] + }, + { + "_id": { + "$oid": "649b005bbf2caae0415ef9c8" + }, + "has_input": [ + "nmdc:d305e212cce8f84f14561d3957c968b1" + ], + "part_of": [ + "nmdc:mga026tn70" + ], + "git_url": "https://github.com/microbiomedata/metaG/releases/tag/0.1", + "has_output": [ + "nmdc:ecfb1a4d469d9f95a91c8a3a3d5475af", + "nmdc:4eeee677df10364f622a0d4789522c69", + "nmdc:351ff91eddf2bc89acbdf04eab68aef1", + "nmdc:64b9d934918b78de80f1cf80a013557f", + "nmdc:903f2015c41660ae53e16bfc369d566a", + "nmdc:bf72ad74b2375abe730ecf7dc50b1557", + "nmdc:92f4707b0b022c217463f76d229dd3cb", + "nmdc:4f6f494c878aeff4308f2de2b2682ea6", + "nmdc:c44ff7df84f2b777b7fee22f7d28e205", + "nmdc:b4fad8c887bc33c67a3316475ccc3572", + "nmdc:4a3d00839e3067973b06771a31bbae93", + "nmdc:f01768e30cdd8f7650f631883d1c5d23" + ], + "was_informed_by": "gold:Gp0115678", + "id": "nmdc:88c0fc9b5b0bb0cd814448bbfba0da6a", + "execution_resource": "NERSC-Cori", + "name": "Annotation Activity for nmdc:mga026tn70", + "started_at_time": "2021-10-11T02:23:30Z", + "type": "nmdc:MetagenomeAnnotationActivity", + "ended_at_time": "2021-10-11T06:18:17+00:00", + "output_data_objects": [ + { + "name": "Gp0115678_Protein FAA", + "description": "Protein FAA for Gp0115678", + "data_object_type": "Annotation Amino Acid FASTA", + "url": "https://data.microbiomedata.org/data/nmdc:mga026tn70/annotation/nmdc_mga026tn70_proteins.faa", + "md5_checksum": "ecfb1a4d469d9f95a91c8a3a3d5475af", + "id": "nmdc:ecfb1a4d469d9f95a91c8a3a3d5475af", + "file_size_bytes": 109377096 + }, + { + "name": "Gp0115678_Structural annotation GFF file", + "description": "Structural annotation GFF file for Gp0115678", + "data_object_type": "Structural Annotation GFF", + "url": "https://data.microbiomedata.org/data/nmdc:mga026tn70/annotation/nmdc_mga026tn70_structural_annotation.gff", + "md5_checksum": "4eeee677df10364f622a0d4789522c69", + "id": "nmdc:4eeee677df10364f622a0d4789522c69", + "file_size_bytes": 2533 + }, + { + "name": "Gp0115678_Functional annotation GFF file", + "description": "Functional annotation GFF file for Gp0115678", + "data_object_type": "Functional Annotation GFF", + "url": "https://data.microbiomedata.org/data/nmdc:mga026tn70/annotation/nmdc_mga026tn70_functional_annotation.gff", + "md5_checksum": "351ff91eddf2bc89acbdf04eab68aef1", + "id": "nmdc:351ff91eddf2bc89acbdf04eab68aef1", + "file_size_bytes": 118933051 + }, + { + "name": "Gp0115678_KO TSV file", + "description": "KO TSV file for Gp0115678", + "data_object_type": "Annotation KEGG Orthology", + "url": "https://data.microbiomedata.org/data/nmdc:mga026tn70/annotation/nmdc_mga026tn70_ko.tsv", + "md5_checksum": "64b9d934918b78de80f1cf80a013557f", + "id": "nmdc:64b9d934918b78de80f1cf80a013557f", + "file_size_bytes": 12839157 + }, + { + "name": "Gp0115678_EC TSV file", + "description": "EC TSV file for Gp0115678", + "data_object_type": "Annotation Enzyme Commission", + "url": "https://data.microbiomedata.org/data/nmdc:mga026tn70/annotation/nmdc_mga026tn70_ec.tsv", + "md5_checksum": "903f2015c41660ae53e16bfc369d566a", + "id": "nmdc:903f2015c41660ae53e16bfc369d566a", + "file_size_bytes": 8227424 + }, + { + "name": "Gp0115678_COG GFF file", + "description": "COG GFF file for Gp0115678", + "url": "https://data.microbiomedata.org/data/nmdc:mga026tn70/annotation/nmdc_mga026tn70_cog.gff", + "md5_checksum": "bf72ad74b2375abe730ecf7dc50b1557", + "id": "nmdc:bf72ad74b2375abe730ecf7dc50b1557", + "file_size_bytes": 57084923 + }, + { + "name": "Gp0115678_PFAM GFF file", + "description": "PFAM GFF file for Gp0115678", + "url": "https://data.microbiomedata.org/data/nmdc:mga026tn70/annotation/nmdc_mga026tn70_pfam.gff", + "md5_checksum": "92f4707b0b022c217463f76d229dd3cb", + "id": "nmdc:92f4707b0b022c217463f76d229dd3cb", + "file_size_bytes": 46625196 + }, + { + "name": "Gp0115678_TigrFam GFF file", + "description": "TigrFam GFF file for Gp0115678", + "url": "https://data.microbiomedata.org/data/nmdc:mga026tn70/annotation/nmdc_mga026tn70_tigrfam.gff", + "md5_checksum": "4f6f494c878aeff4308f2de2b2682ea6", + "id": "nmdc:4f6f494c878aeff4308f2de2b2682ea6", + "file_size_bytes": 5472483 + }, + { + "name": "Gp0115678_SMART GFF file", + "description": "SMART GFF file for Gp0115678", + "url": "https://data.microbiomedata.org/data/nmdc:mga026tn70/annotation/nmdc_mga026tn70_smart.gff", + "md5_checksum": "c44ff7df84f2b777b7fee22f7d28e205", + "id": "nmdc:c44ff7df84f2b777b7fee22f7d28e205", + "file_size_bytes": 18005129 + }, + { + "name": "Gp0115678_SuperFam GFF file", + "description": "SuperFam GFF file for Gp0115678", + "url": "https://data.microbiomedata.org/data/nmdc:mga026tn70/annotation/nmdc_mga026tn70_supfam.gff", + "md5_checksum": "b4fad8c887bc33c67a3316475ccc3572", + "id": "nmdc:b4fad8c887bc33c67a3316475ccc3572", + "file_size_bytes": 80713018 + }, + { + "name": "Gp0115678_Cath FunFam GFF file", + "description": "Cath FunFam GFF file for Gp0115678", + "url": "https://data.microbiomedata.org/data/nmdc:mga026tn70/annotation/nmdc_mga026tn70_cath_funfam.gff", + "md5_checksum": "4a3d00839e3067973b06771a31bbae93", + "id": "nmdc:4a3d00839e3067973b06771a31bbae93", + "file_size_bytes": 66327975 + }, + { + "name": "Gp0115678_KO_EC GFF file", + "description": "KO_EC GFF file for Gp0115678", + "url": "https://data.microbiomedata.org/data/nmdc:mga026tn70/annotation/nmdc_mga026tn70_ko_ec.gff", + "md5_checksum": "f01768e30cdd8f7650f631883d1c5d23", + "id": "nmdc:f01768e30cdd8f7650f631883d1c5d23", + "file_size_bytes": 40908900 + } + ] + }, + { + "_id": { + "$oid": "649b0052ec087f6bbab3472b" + }, + "has_input": [ + "nmdc:d305e212cce8f84f14561d3957c968b1", + "nmdc:1c63639a894aa686e77e57787fcafbc6", + "nmdc:351ff91eddf2bc89acbdf04eab68aef1" + ], + "too_short_contig_num": 362617, + "part_of": [ + "nmdc:mga026tn70" + ], + "binned_contig_num": 2089, + "git_url": "https://github.com/microbiomedata/metaG/releases/tag/0.1", + "has_output": [ + "nmdc:d41d8cd98f00b204e9800998ecf8427e", + "nmdc:cf2d0eb0281d2822373d4e7d25c8d1e6", + "nmdc:85defe7977c263b8fba3f31f89f101f9", + "nmdc:19a6a8410cece1118a06763023cc1313", + "nmdc:54ed3f096ca7eacec9e5078ca45a6530", + "nmdc:8493c05e428d90f8893e4c58755b2e95" + ], + "was_informed_by": "gold:Gp0115678", + "input_contig_num": 383711, + "id": "nmdc:88c0fc9b5b0bb0cd814448bbfba0da6a", + "execution_resource": "NERSC-Cori", + "name": "MAGs Analysis Activity for nmdc:mga026tn70", + "mags_list": [ + { + "number_of_contig": 5, + "completeness": 0.31, + "bin_name": "bins.1", + "gene_count": 264, + "bin_quality": "LQ", + "gtdbtk_species": "", + "gtdbtk_order": "", + "num_16s": 0, + "gtdbtk_family": "", + "gtdbtk_domain": "", + "contamination": 0.0, + "gtdbtk_class": "", + "gtdbtk_phylum": "", + "num_5s": 0, + "num_23s": 0, + "gtdbtk_genus": "", + "num_t_rna": 0 + }, + { + "number_of_contig": 231, + "completeness": 50.86, + "bin_name": "bins.2", + "gene_count": 1187, + "bin_quality": "MQ", + "gtdbtk_species": "", + "gtdbtk_order": "Pseudomonadales", + "num_16s": 0, + "gtdbtk_family": "UBA3067", + "gtdbtk_domain": "Bacteria", + "contamination": 0.86, + "gtdbtk_class": "Gammaproteobacteria", + "gtdbtk_phylum": "Proteobacteria", + "num_5s": 1, + "num_23s": 0, + "gtdbtk_genus": "UBA3067", + "num_t_rna": 19 + }, + { + "number_of_contig": 675, + "completeness": 74.51, + "bin_name": "bins.3", + "gene_count": 4479, + "bin_quality": "MQ", + "gtdbtk_species": "", + "gtdbtk_order": "Burkholderiales", + "num_16s": 1, + "gtdbtk_family": "Burkholderiaceae", + "gtdbtk_domain": "Bacteria", + "contamination": 1.06, + "gtdbtk_class": "Gammaproteobacteria", + "gtdbtk_phylum": "Proteobacteria", + "num_5s": 1, + "num_23s": 1, + "gtdbtk_genus": "Rhizobacter", + "num_t_rna": 37 + }, + { + "number_of_contig": 314, + "completeness": 64.35, + "bin_name": "bins.4", + "gene_count": 1988, + "bin_quality": "MQ", + "gtdbtk_species": "", + "gtdbtk_order": "Sphingomonadales", + "num_16s": 0, + "gtdbtk_family": "Sphingomonadaceae", + "gtdbtk_domain": "Bacteria", + "contamination": 0.48, + "gtdbtk_class": "Alphaproteobacteria", + "gtdbtk_phylum": "Proteobacteria", + "num_5s": 0, + "num_23s": 0, + "gtdbtk_genus": "Ga0077559", + "num_t_rna": 24 + }, + { + "number_of_contig": 574, + "completeness": 73.7, + "bin_name": "bins.5", + "gene_count": 3601, + "bin_quality": "MQ", + "gtdbtk_species": "", + "gtdbtk_order": "Burkholderiales", + "num_16s": 0, + "gtdbtk_family": "Burkholderiaceae", + "gtdbtk_domain": "Bacteria", + "contamination": 1.88, + "gtdbtk_class": "Gammaproteobacteria", + "gtdbtk_phylum": "Proteobacteria", + "num_5s": 0, + "num_23s": 0, + "gtdbtk_genus": "Aquabacterium", + "num_t_rna": 32 + }, + { + "number_of_contig": 290, + "completeness": 91.23, + "bin_name": "bins.6", + "gene_count": 3090, + "bin_quality": "MQ", + "gtdbtk_species": "", + "gtdbtk_order": "Pseudomonadales", + "num_16s": 0, + "gtdbtk_family": "Moraxellaceae", + "gtdbtk_domain": "Bacteria", + "contamination": 0.0, + "gtdbtk_class": "Gammaproteobacteria", + "gtdbtk_phylum": "Proteobacteria", + "num_5s": 0, + "num_23s": 0, + "gtdbtk_genus": "", + "num_t_rna": 32 + } + ], + "unbinned_contig_num": 19005, + "started_at_time": "2021-10-11T02:23:30Z", + "type": "nmdc:MAGsAnalysisActivity", + "ended_at_time": "2021-10-11T06:18:17+00:00", + "output_data_objects": [ + { + "name": "gold:Gp0452679_metabat2 bin checkm quality assessment result", + "description": "metabat2 bin checkm quality assessment result for gold:Gp0452679", + "data_object_type": "CheckM Statistics", + "url": "https://data.microbiomedata.org/data/nmdc:mga0fsjy84/MAGs/nmdc_mga0fsjy84_checkm_qa.out", + "md5_checksum": "d41d8cd98f00b204e9800998ecf8427e", + "id": "nmdc:d41d8cd98f00b204e9800998ecf8427e", + "file_size_bytes": 0 + }, + { + "name": "Gp0115678_tooShort (< 3kb) filtered contigs fasta file by metaBat2", + "description": "tooShort (< 3kb) filtered contigs fasta file by metaBat2 for Gp0115678", + "url": "https://data.microbiomedata.org/data/nmdc:mga026tn70/MAGs/nmdc_mga026tn70_bins.tooShort.fa", + "md5_checksum": "cf2d0eb0281d2822373d4e7d25c8d1e6", + "id": "nmdc:cf2d0eb0281d2822373d4e7d25c8d1e6", + "file_size_bytes": 160811096 + }, + { + "name": "Gp0115678_unbinned fasta file from metabat2", + "description": "unbinned fasta file from metabat2 for Gp0115678", + "url": "https://data.microbiomedata.org/data/nmdc:mga026tn70/MAGs/nmdc_mga026tn70_bins.unbinned.fa", + "md5_checksum": "85defe7977c263b8fba3f31f89f101f9", + "id": "nmdc:85defe7977c263b8fba3f31f89f101f9", + "file_size_bytes": 31022166 + }, + { + "name": "Gp0115678_metabat2 bin checkm quality assessment result", + "description": "metabat2 bin checkm quality assessment result for Gp0115678", + "data_object_type": "CheckM Statistics", + "url": "https://data.microbiomedata.org/data/nmdc:mga026tn70/MAGs/nmdc_mga026tn70_checkm_qa.out", + "md5_checksum": "19a6a8410cece1118a06763023cc1313", + "id": "nmdc:19a6a8410cece1118a06763023cc1313", + "file_size_bytes": 1690 + }, + { + "name": "Gp0115678_high-quality and medium-quality bins", + "description": "high-quality and medium-quality bins for Gp0115678", + "data_object_type": "Metagenome Bins", + "url": "https://data.microbiomedata.org/data/nmdc:mga026tn70/MAGs/nmdc_mga026tn70_hqmq_bin.zip", + "md5_checksum": "54ed3f096ca7eacec9e5078ca45a6530", + "id": "nmdc:54ed3f096ca7eacec9e5078ca45a6530", + "file_size_bytes": 4026276 + }, + { + "name": "Gp0115678_metabat2 bins", + "description": "metabat2 bins for Gp0115678", + "url": "https://data.microbiomedata.org/data/nmdc:mga026tn70/MAGs/nmdc_mga026tn70_metabat_bin.zip", + "md5_checksum": "8493c05e428d90f8893e4c58755b2e95", + "id": "nmdc:8493c05e428d90f8893e4c58755b2e95", + "file_size_bytes": 72078 + } + ] + } + ] + }, + { + "_id": { + "$oid": "649b009773e8249959349b3a" + }, + "id": "nmdc:omprc-11-5r54nt37", + "name": "Riverbed sediment microbial communities from areas with vegetation nearby in Columbia River, Washington, USA - GW-RW S2_20_30", + "description": "Riverbed sediment samples were collected from an area with a lot of surrounding vegetation in a hyporheic zone (subsurface groundwater - surface water mixing zone)", + "has_input": [ + "nmdc:bsm-11-r7ggfc16" + ], + "has_output": [ + "jgi:574fde547ded5e3df1ee13fa" + ], + "part_of": [ + "nmdc:sty-11-aygzgv51" + ], + "add_date": "2016-01-11", + "mod_date": "2021-06-15", + "ncbi_project_name": "Riverbed sediment microbial communities from areas with vegetation nearby in Columbia River, Washington, USA - GW-RW S2_20_30", + "omics_type": { + "has_raw_value": "Metagenome" + }, + "principal_investigator": { + "has_raw_value": "James Stegen" + }, + "processing_institution": "JGI", + "type": "nmdc:OmicsProcessing", + "gold_sequencing_project_identifiers": [ + "gold:Gp0127623" + ], + "downstream_workflow_activity_records": [ + { + "_id": { + "$oid": "649b009d6bdd4fd20273c85a" + }, + "has_input": [ + "nmdc:14766bc431808b2a29c03beecb66bbac" + ], + "part_of": [ + "nmdc:mga03eyz63" + ], + "git_url": "https://github.com/microbiomedata/metaG/releases/tag/0.1", + "has_output": [ + "nmdc:6a8409b21c45ba9feba873ec269c8ff7", + "nmdc:61fb06de10fe3a0c49c5afe14ab7fb32" + ], + "was_informed_by": "gold:Gp0127623", + "input_read_count": 23705118, + "output_read_bases": 3409425046, + "id": "nmdc:e05db57d44a39f083df9a1803551b79b", + "execution_resource": "NERSC-Cori", + "input_read_bases": 3579472818, + "name": "Read QC Activity for nmdc:mga03eyz63", + "output_read_count": 22801896, + "started_at_time": "2021-10-11T02:23:30Z", + "type": "nmdc:ReadQCAnalysisActivity", + "ended_at_time": "2021-10-11T02:42:25+00:00", + "output_data_objects": [ + { + "name": "Gp0127623_Filtered Reads", + "description": "Filtered Reads for Gp0127623", + "data_object_type": "Filtered Sequencing Reads", + "url": "https://data.microbiomedata.org/data/nmdc:mga03eyz63/qa/nmdc_mga03eyz63_filtered.fastq.gz", + "md5_checksum": "6a8409b21c45ba9feba873ec269c8ff7", + "id": "nmdc:6a8409b21c45ba9feba873ec269c8ff7", + "file_size_bytes": 1917552858 + }, + { + "name": "Gp0127623_Filtered Stats", + "description": "Filtered Stats for Gp0127623", + "data_object_type": "QC Statistics", + "url": "https://data.microbiomedata.org/data/nmdc:mga03eyz63/qa/nmdc_mga03eyz63_filterStats.txt", + "md5_checksum": "61fb06de10fe3a0c49c5afe14ab7fb32", + "id": "nmdc:61fb06de10fe3a0c49c5afe14ab7fb32", + "file_size_bytes": 283 + } + ] + }, + { + "_id": { + "$oid": "649b009bff710ae353f8cf17" + }, + "has_input": [ + "nmdc:6a8409b21c45ba9feba873ec269c8ff7" + ], + "git_url": "https://github.com/microbiomedata/metaG/releases/tag/0.1", + "has_output": [ + "nmdc:ac39e916e17e08a845bb40d97519d8be", + "nmdc:c6fd5c573ef8605d9b43ff9c698af423", + "nmdc:eda0c04d692ecf137585676c15924626", + "nmdc:d9ea063be9ab8ea102c1e2ec2fa9f177", + "nmdc:e1f164c534830cd628d67c564ace863b", + "nmdc:a1062576d998b7b82e39b8d8520fa37e", + "nmdc:040e6ca695283a12711c16344acd1e76", + "nmdc:ed4ced0ccbe3f6b34c35bd842e882cad", + "nmdc:f2eed9669268f69dbc31f0c4f839fccf" + ], + "was_informed_by": "gold:Gp0127623", + "id": "nmdc:e05db57d44a39f083df9a1803551b79b", + "execution_resource": "NERSC-Cori", + "name": "ReadBased Analysis Activity for nmdc:mga03eyz63", + "started_at_time": "2021-10-11T02:23:30Z", + "type": "nmdc:ReadbasedAnalysis", + "ended_at_time": "2021-10-11T02:42:25+00:00", + "output_data_objects": [ + { + "name": "Gp0127623_Gottcha2 TSV report", + "description": "Gottcha2 TSV report for Gp0127623", + "url": "https://data.microbiomedata.org/data/nmdc:mga03eyz63/ReadbasedAnalysis/nmdc_mga03eyz63_gottcha2_report.tsv", + "md5_checksum": "ac39e916e17e08a845bb40d97519d8be", + "id": "nmdc:ac39e916e17e08a845bb40d97519d8be", + "file_size_bytes": 1553 + }, + { + "name": "Gp0127623_Gottcha2 full TSV report", + "description": "Gottcha2 full TSV report for Gp0127623", + "url": "https://data.microbiomedata.org/data/nmdc:mga03eyz63/ReadbasedAnalysis/nmdc_mga03eyz63_gottcha2_report_full.tsv", + "md5_checksum": "c6fd5c573ef8605d9b43ff9c698af423", + "id": "nmdc:c6fd5c573ef8605d9b43ff9c698af423", + "file_size_bytes": 836575 + }, + { + "name": "Gp0127623_Gottcha2 Krona HTML report", + "description": "Gottcha2 Krona HTML report for Gp0127623", + "data_object_type": "GOTTCHA2 Krona Plot", + "url": "https://data.microbiomedata.org/data/nmdc:mga03eyz63/ReadbasedAnalysis/nmdc_mga03eyz63_gottcha2_krona.html", + "md5_checksum": "eda0c04d692ecf137585676c15924626", + "id": "nmdc:eda0c04d692ecf137585676c15924626", + "file_size_bytes": 231097 + }, + { + "name": "Gp0127623_Centrifuge classification TSV report", + "description": "Centrifuge classification TSV report for Gp0127623", + "data_object_type": "Centrifuge Taxonomic Classification", + "url": "https://data.microbiomedata.org/data/nmdc:mga03eyz63/ReadbasedAnalysis/nmdc_mga03eyz63_centrifuge_classification.tsv", + "md5_checksum": "d9ea063be9ab8ea102c1e2ec2fa9f177", + "id": "nmdc:d9ea063be9ab8ea102c1e2ec2fa9f177", + "file_size_bytes": 1669254765 + }, + { + "name": "Gp0127623_Centrifuge TSV report", + "description": "Centrifuge TSV report for Gp0127623", + "data_object_type": "Centrifuge Classification Report", + "url": "https://data.microbiomedata.org/data/nmdc:mga03eyz63/ReadbasedAnalysis/nmdc_mga03eyz63_centrifuge_report.tsv", + "md5_checksum": "e1f164c534830cd628d67c564ace863b", + "id": "nmdc:e1f164c534830cd628d67c564ace863b", + "file_size_bytes": 255784 + }, + { + "name": "Gp0127623_Centrifuge Krona HTML report", + "description": "Centrifuge Krona HTML report for Gp0127623", + "data_object_type": "Centrifuge Krona Plot", + "url": "https://data.microbiomedata.org/data/nmdc:mga03eyz63/ReadbasedAnalysis/nmdc_mga03eyz63_centrifuge_krona.html", + "md5_checksum": "a1062576d998b7b82e39b8d8520fa37e", + "id": "nmdc:a1062576d998b7b82e39b8d8520fa37e", + "file_size_bytes": 2333760 + }, + { + "name": "Gp0127623_Kraken classification TSV report", + "description": "Kraken classification TSV report for Gp0127623", + "data_object_type": "Kraken2 Taxonomic Classification", + "url": "https://data.microbiomedata.org/data/nmdc:mga03eyz63/ReadbasedAnalysis/nmdc_mga03eyz63_kraken2_classification.tsv", + "md5_checksum": "040e6ca695283a12711c16344acd1e76", + "id": "nmdc:040e6ca695283a12711c16344acd1e76", + "file_size_bytes": 1335651191 + }, + { + "name": "Gp0127623_Kraken2 TSV report", + "description": "Kraken2 TSV report for Gp0127623", + "data_object_type": "Kraken2 Classification Report", + "url": "https://data.microbiomedata.org/data/nmdc:mga03eyz63/ReadbasedAnalysis/nmdc_mga03eyz63_kraken2_report.tsv", + "md5_checksum": "ed4ced0ccbe3f6b34c35bd842e882cad", + "id": "nmdc:ed4ced0ccbe3f6b34c35bd842e882cad", + "file_size_bytes": 647609 + }, + { + "name": "Gp0127623_Kraken2 Krona HTML report", + "description": "Kraken2 Krona HTML report for Gp0127623", + "data_object_type": "Kraken2 Krona Plot", + "url": "https://data.microbiomedata.org/data/nmdc:mga03eyz63/ReadbasedAnalysis/nmdc_mga03eyz63_kraken2_krona.html", + "md5_checksum": "f2eed9669268f69dbc31f0c4f839fccf", + "id": "nmdc:f2eed9669268f69dbc31f0c4f839fccf", + "file_size_bytes": 3949449 + } + ] + }, + { + "_id": { + "$oid": "61e718fc833bcf838a6ff4c9" + }, + "has_input": [ + "nmdc:6a8409b21c45ba9feba873ec269c8ff7" + ], + "part_of": [ + "nmdc:mga03eyz63" + ], + "git_url": "https://github.com/microbiomedata/metaG/releases/tag/0.1", + "has_output": [ + "nmdc:ac39e916e17e08a845bb40d97519d8be", + "nmdc:c6fd5c573ef8605d9b43ff9c698af423", + "nmdc:eda0c04d692ecf137585676c15924626", + "nmdc:d9ea063be9ab8ea102c1e2ec2fa9f177", + "nmdc:e1f164c534830cd628d67c564ace863b", + "nmdc:a1062576d998b7b82e39b8d8520fa37e", + "nmdc:040e6ca695283a12711c16344acd1e76", + "nmdc:ed4ced0ccbe3f6b34c35bd842e882cad", + "nmdc:f2eed9669268f69dbc31f0c4f839fccf" + ], + "was_informed_by": "gold:Gp0127623", + "id": "nmdc:e05db57d44a39f083df9a1803551b79b", + "execution_resource": "NERSC-Cori", + "name": "ReadBased Analysis Activity for nmdc:mga03eyz63", + "started_at_time": "2021-10-11T02:23:30Z", + "type": "nmdc:ReadbasedAnalysis", + "ended_at_time": "2021-10-11T02:42:25+00:00", + "output_data_objects": [ + { + "name": "Gp0127623_Gottcha2 TSV report", + "description": "Gottcha2 TSV report for Gp0127623", + "url": "https://data.microbiomedata.org/data/nmdc:mga03eyz63/ReadbasedAnalysis/nmdc_mga03eyz63_gottcha2_report.tsv", + "md5_checksum": "ac39e916e17e08a845bb40d97519d8be", + "id": "nmdc:ac39e916e17e08a845bb40d97519d8be", + "file_size_bytes": 1553 + }, + { + "name": "Gp0127623_Gottcha2 full TSV report", + "description": "Gottcha2 full TSV report for Gp0127623", + "url": "https://data.microbiomedata.org/data/nmdc:mga03eyz63/ReadbasedAnalysis/nmdc_mga03eyz63_gottcha2_report_full.tsv", + "md5_checksum": "c6fd5c573ef8605d9b43ff9c698af423", + "id": "nmdc:c6fd5c573ef8605d9b43ff9c698af423", + "file_size_bytes": 836575 + }, + { + "name": "Gp0127623_Gottcha2 Krona HTML report", + "description": "Gottcha2 Krona HTML report for Gp0127623", + "data_object_type": "GOTTCHA2 Krona Plot", + "url": "https://data.microbiomedata.org/data/nmdc:mga03eyz63/ReadbasedAnalysis/nmdc_mga03eyz63_gottcha2_krona.html", + "md5_checksum": "eda0c04d692ecf137585676c15924626", + "id": "nmdc:eda0c04d692ecf137585676c15924626", + "file_size_bytes": 231097 + }, + { + "name": "Gp0127623_Centrifuge classification TSV report", + "description": "Centrifuge classification TSV report for Gp0127623", + "data_object_type": "Centrifuge Taxonomic Classification", + "url": "https://data.microbiomedata.org/data/nmdc:mga03eyz63/ReadbasedAnalysis/nmdc_mga03eyz63_centrifuge_classification.tsv", + "md5_checksum": "d9ea063be9ab8ea102c1e2ec2fa9f177", + "id": "nmdc:d9ea063be9ab8ea102c1e2ec2fa9f177", + "file_size_bytes": 1669254765 + }, + { + "name": "Gp0127623_Centrifuge TSV report", + "description": "Centrifuge TSV report for Gp0127623", + "data_object_type": "Centrifuge Classification Report", + "url": "https://data.microbiomedata.org/data/nmdc:mga03eyz63/ReadbasedAnalysis/nmdc_mga03eyz63_centrifuge_report.tsv", + "md5_checksum": "e1f164c534830cd628d67c564ace863b", + "id": "nmdc:e1f164c534830cd628d67c564ace863b", + "file_size_bytes": 255784 + }, + { + "name": "Gp0127623_Centrifuge Krona HTML report", + "description": "Centrifuge Krona HTML report for Gp0127623", + "data_object_type": "Centrifuge Krona Plot", + "url": "https://data.microbiomedata.org/data/nmdc:mga03eyz63/ReadbasedAnalysis/nmdc_mga03eyz63_centrifuge_krona.html", + "md5_checksum": "a1062576d998b7b82e39b8d8520fa37e", + "id": "nmdc:a1062576d998b7b82e39b8d8520fa37e", + "file_size_bytes": 2333760 + }, + { + "name": "Gp0127623_Kraken classification TSV report", + "description": "Kraken classification TSV report for Gp0127623", + "data_object_type": "Kraken2 Taxonomic Classification", + "url": "https://data.microbiomedata.org/data/nmdc:mga03eyz63/ReadbasedAnalysis/nmdc_mga03eyz63_kraken2_classification.tsv", + "md5_checksum": "040e6ca695283a12711c16344acd1e76", + "id": "nmdc:040e6ca695283a12711c16344acd1e76", + "file_size_bytes": 1335651191 + }, + { + "name": "Gp0127623_Kraken2 TSV report", + "description": "Kraken2 TSV report for Gp0127623", + "data_object_type": "Kraken2 Classification Report", + "url": "https://data.microbiomedata.org/data/nmdc:mga03eyz63/ReadbasedAnalysis/nmdc_mga03eyz63_kraken2_report.tsv", + "md5_checksum": "ed4ced0ccbe3f6b34c35bd842e882cad", + "id": "nmdc:ed4ced0ccbe3f6b34c35bd842e882cad", + "file_size_bytes": 647609 + }, + { + "name": "Gp0127623_Kraken2 Krona HTML report", + "description": "Kraken2 Krona HTML report for Gp0127623", + "data_object_type": "Kraken2 Krona Plot", + "url": "https://data.microbiomedata.org/data/nmdc:mga03eyz63/ReadbasedAnalysis/nmdc_mga03eyz63_kraken2_krona.html", + "md5_checksum": "f2eed9669268f69dbc31f0c4f839fccf", + "id": "nmdc:f2eed9669268f69dbc31f0c4f839fccf", + "file_size_bytes": 3949449 + } + ] + }, + { + "_id": { + "$oid": "649b005f2ca5ee4adb139f9f" + }, + "has_input": [ + "nmdc:6a8409b21c45ba9feba873ec269c8ff7" + ], + "part_of": [ + "nmdc:mga03eyz63" + ], + "ctg_logsum": 70596, + "scaf_logsum": 70885, + "gap_pct": 0.00063, + "git_url": "https://github.com/microbiomedata/metaG/releases/tag/0.1", + "has_output": [ + "nmdc:3373ef564b5b97fa472dc8f2c2277dbc", + "nmdc:a0377bb7d752e66b754753fcefb5005a", + "nmdc:081017d0d9e68a999c245618eb907c08", + "nmdc:4a6ed00a6c2156c142d7bbec6baa36b5", + "nmdc:21fb280328baf81e8135733eaf440b66" + ], + "asm_score": 3.626, + "was_informed_by": "gold:Gp0127623", + "ctg_powsum": 7584.611, + "scaf_max": 12785, + "id": "nmdc:e05db57d44a39f083df9a1803551b79b", + "scaf_powsum": 7618.086, + "execution_resource": "NERSC-Cori", + "contigs": 118423, + "name": "Assembly Activity for nmdc:mga03eyz63", + "ctg_max": 11834, + "gc_std": 0.12108, + "contig_bp": 50762396, + "gc_avg": 0.59992, + "started_at_time": "2021-10-11T02:23:30Z", + "scaf_bp": 50762716, + "type": "nmdc:MetagenomeAssembly", + "scaffolds": 118391, + "ended_at_time": "2021-10-11T02:42:25+00:00", + "ctg_l50": 402, + "ctg_l90": 285, + "ctg_n50": 37682, + "ctg_n90": 100987, + "scaf_l50": 402, + "scaf_l90": 285, + "scaf_n50": 37659, + "scaf_n90": 100956, + "output_data_objects": [ + { + "name": "Gp0127623_Assembled contigs fasta", + "description": "Assembled contigs fasta for Gp0127623", + "data_object_type": "Assembly Contigs", + "url": "https://data.microbiomedata.org/data/nmdc:mga03eyz63/assembly/nmdc_mga03eyz63_contigs.fna", + "md5_checksum": "3373ef564b5b97fa472dc8f2c2277dbc", + "id": "nmdc:3373ef564b5b97fa472dc8f2c2277dbc", + "file_size_bytes": 55220158 + }, + { + "name": "Gp0127623_Assembled scaffold fasta", + "description": "Assembled scaffold fasta for Gp0127623", + "data_object_type": "Assembly Scaffolds", + "url": "https://data.microbiomedata.org/data/nmdc:mga03eyz63/assembly/nmdc_mga03eyz63_scaffolds.fna", + "md5_checksum": "a0377bb7d752e66b754753fcefb5005a", + "id": "nmdc:a0377bb7d752e66b754753fcefb5005a", + "file_size_bytes": 54864386 + }, + { + "name": "Gp0127623_Metagenome Contig Coverage Stats", + "description": "Metagenome Contig Coverage Stats for Gp0127623", + "url": "https://data.microbiomedata.org/data/nmdc:mga03eyz63/assembly/nmdc_mga03eyz63_covstats.txt", + "md5_checksum": "081017d0d9e68a999c245618eb907c08", + "id": "nmdc:081017d0d9e68a999c245618eb907c08", + "file_size_bytes": 9321875 + }, + { + "name": "Gp0127623_Assembled AGP file", + "description": "Assembled AGP file for Gp0127623", + "data_object_type": "Assembly AGP", + "url": "https://data.microbiomedata.org/data/nmdc:mga03eyz63/assembly/nmdc_mga03eyz63_assembly.agp", + "md5_checksum": "4a6ed00a6c2156c142d7bbec6baa36b5", + "id": "nmdc:4a6ed00a6c2156c142d7bbec6baa36b5", + "file_size_bytes": 8670291 + }, + { + "name": "Gp0127623_Metagenome Alignment BAM file", + "description": "Metagenome Alignment BAM file for Gp0127623", + "data_object_type": "Assembly Coverage BAM", + "url": "https://data.microbiomedata.org/data/nmdc:mga03eyz63/assembly/nmdc_mga03eyz63_pairedMapped_sorted.bam", + "md5_checksum": "21fb280328baf81e8135733eaf440b66", + "id": "nmdc:21fb280328baf81e8135733eaf440b66", + "file_size_bytes": 2062412797 + } + ] + }, + { + "_id": { + "$oid": "649b005bbf2caae0415ef99a" + }, + "has_input": [ + "nmdc:3373ef564b5b97fa472dc8f2c2277dbc" + ], + "part_of": [ + "nmdc:mga03eyz63" + ], + "git_url": "https://github.com/microbiomedata/metaG/releases/tag/0.1", + "has_output": [ + "nmdc:8ac52d00bad1f9349da2acde572006b6", + "nmdc:9dd5eb06fe24f63d5012e34e364a580c", + "nmdc:05107e0217e199d7b0cd571db88f7d09", + "nmdc:02ffcaeeb9a73edea47ba3671396026a", + "nmdc:b9b4ccafc50787f86ef03680eb23848d", + "nmdc:fbd178d9c302b841e3fde3ab9acd8160", + "nmdc:1bcc35e753e7dad78ef8ae4989eb901a", + "nmdc:f6d6d2ea3c539560ad30bbd6df8bc71a", + "nmdc:45536a48cef31f2c3870c7bacb3d785a", + "nmdc:a52d057d005504857f82bcf661dd7676", + "nmdc:b92cb96900a31a3c70ccf9cfe45f02c3", + "nmdc:32eca4cab8525b09cf1b0ed2353f9278" + ], + "was_informed_by": "gold:Gp0127623", + "id": "nmdc:e05db57d44a39f083df9a1803551b79b", + "execution_resource": "NERSC-Cori", + "name": "Annotation Activity for nmdc:mga03eyz63", + "started_at_time": "2021-10-11T02:23:30Z", + "type": "nmdc:MetagenomeAnnotationActivity", + "ended_at_time": "2021-10-11T02:42:25+00:00", + "output_data_objects": [ + { + "name": "Gp0127623_Protein FAA", + "description": "Protein FAA for Gp0127623", + "data_object_type": "Annotation Amino Acid FASTA", + "url": "https://data.microbiomedata.org/data/nmdc:mga03eyz63/annotation/nmdc_mga03eyz63_proteins.faa", + "md5_checksum": "8ac52d00bad1f9349da2acde572006b6", + "id": "nmdc:8ac52d00bad1f9349da2acde572006b6", + "file_size_bytes": 32224726 + }, + { + "name": "Gp0127623_Structural annotation GFF file", + "description": "Structural annotation GFF file for Gp0127623", + "data_object_type": "Structural Annotation GFF", + "url": "https://data.microbiomedata.org/data/nmdc:mga03eyz63/annotation/nmdc_mga03eyz63_structural_annotation.gff", + "md5_checksum": "9dd5eb06fe24f63d5012e34e364a580c", + "id": "nmdc:9dd5eb06fe24f63d5012e34e364a580c", + "file_size_bytes": 2512 + }, + { + "name": "Gp0127623_Functional annotation GFF file", + "description": "Functional annotation GFF file for Gp0127623", + "data_object_type": "Functional Annotation GFF", + "url": "https://data.microbiomedata.org/data/nmdc:mga03eyz63/annotation/nmdc_mga03eyz63_functional_annotation.gff", + "md5_checksum": "05107e0217e199d7b0cd571db88f7d09", + "id": "nmdc:05107e0217e199d7b0cd571db88f7d09", + "file_size_bytes": 37779373 + }, + { + "name": "Gp0127623_KO TSV file", + "description": "KO TSV file for Gp0127623", + "data_object_type": "Annotation KEGG Orthology", + "url": "https://data.microbiomedata.org/data/nmdc:mga03eyz63/annotation/nmdc_mga03eyz63_ko.tsv", + "md5_checksum": "02ffcaeeb9a73edea47ba3671396026a", + "id": "nmdc:02ffcaeeb9a73edea47ba3671396026a", + "file_size_bytes": 4343179 + }, + { + "name": "Gp0127623_EC TSV file", + "description": "EC TSV file for Gp0127623", + "data_object_type": "Annotation Enzyme Commission", + "url": "https://data.microbiomedata.org/data/nmdc:mga03eyz63/annotation/nmdc_mga03eyz63_ec.tsv", + "md5_checksum": "b9b4ccafc50787f86ef03680eb23848d", + "id": "nmdc:b9b4ccafc50787f86ef03680eb23848d", + "file_size_bytes": 2966454 + }, + { + "name": "Gp0127623_COG GFF file", + "description": "COG GFF file for Gp0127623", + "url": "https://data.microbiomedata.org/data/nmdc:mga03eyz63/annotation/nmdc_mga03eyz63_cog.gff", + "md5_checksum": "fbd178d9c302b841e3fde3ab9acd8160", + "id": "nmdc:fbd178d9c302b841e3fde3ab9acd8160", + "file_size_bytes": 22023330 + }, + { + "name": "Gp0127623_PFAM GFF file", + "description": "PFAM GFF file for Gp0127623", + "url": "https://data.microbiomedata.org/data/nmdc:mga03eyz63/annotation/nmdc_mga03eyz63_pfam.gff", + "md5_checksum": "1bcc35e753e7dad78ef8ae4989eb901a", + "id": "nmdc:1bcc35e753e7dad78ef8ae4989eb901a", + "file_size_bytes": 15956001 + }, + { + "name": "Gp0127623_TigrFam GFF file", + "description": "TigrFam GFF file for Gp0127623", + "url": "https://data.microbiomedata.org/data/nmdc:mga03eyz63/annotation/nmdc_mga03eyz63_tigrfam.gff", + "md5_checksum": "f6d6d2ea3c539560ad30bbd6df8bc71a", + "id": "nmdc:f6d6d2ea3c539560ad30bbd6df8bc71a", + "file_size_bytes": 1656727 + }, + { + "name": "Gp0127623_SMART GFF file", + "description": "SMART GFF file for Gp0127623", + "url": "https://data.microbiomedata.org/data/nmdc:mga03eyz63/annotation/nmdc_mga03eyz63_smart.gff", + "md5_checksum": "45536a48cef31f2c3870c7bacb3d785a", + "id": "nmdc:45536a48cef31f2c3870c7bacb3d785a", + "file_size_bytes": 4731416 + }, + { + "name": "Gp0127623_SuperFam GFF file", + "description": "SuperFam GFF file for Gp0127623", + "url": "https://data.microbiomedata.org/data/nmdc:mga03eyz63/annotation/nmdc_mga03eyz63_supfam.gff", + "md5_checksum": "a52d057d005504857f82bcf661dd7676", + "id": "nmdc:a52d057d005504857f82bcf661dd7676", + "file_size_bytes": 27616681 + }, + { + "name": "Gp0127623_Cath FunFam GFF file", + "description": "Cath FunFam GFF file for Gp0127623", + "url": "https://data.microbiomedata.org/data/nmdc:mga03eyz63/annotation/nmdc_mga03eyz63_cath_funfam.gff", + "md5_checksum": "b92cb96900a31a3c70ccf9cfe45f02c3", + "id": "nmdc:b92cb96900a31a3c70ccf9cfe45f02c3", + "file_size_bytes": 20817140 + }, + { + "name": "Gp0127623_KO_EC GFF file", + "description": "KO_EC GFF file for Gp0127623", + "url": "https://data.microbiomedata.org/data/nmdc:mga03eyz63/annotation/nmdc_mga03eyz63_ko_ec.gff", + "md5_checksum": "32eca4cab8525b09cf1b0ed2353f9278", + "id": "nmdc:32eca4cab8525b09cf1b0ed2353f9278", + "file_size_bytes": 13827629 + } + ] + }, + { + "_id": { + "$oid": "649b0052ec087f6bbab346fd" + }, + "has_input": [ + "nmdc:3373ef564b5b97fa472dc8f2c2277dbc", + "nmdc:21fb280328baf81e8135733eaf440b66", + "nmdc:05107e0217e199d7b0cd571db88f7d09" + ], + "too_short_contig_num": 114220, + "part_of": [ + "nmdc:mga03eyz63" + ], + "binned_contig_num": 171, + "git_url": "https://github.com/microbiomedata/metaG/releases/tag/0.1", + "has_output": [ + "nmdc:d41d8cd98f00b204e9800998ecf8427e", + "nmdc:e63c76f92bc0ae95dfc238c099296e91", + "nmdc:3dfba77d38712870f8c415203f991496", + "nmdc:5e98d27533164fdf67c07cc224090547", + "nmdc:bfbe3e3a21e8a089c4c7a0d945c79b7b", + "nmdc:c70853ef1a6ab162b85df5215a76666b" + ], + "was_informed_by": "gold:Gp0127623", + "input_contig_num": 118423, + "id": "nmdc:e05db57d44a39f083df9a1803551b79b", + "execution_resource": "NERSC-Cori", + "name": "MAGs Analysis Activity for nmdc:mga03eyz63", + "mags_list": [ + { + "number_of_contig": 171, + "completeness": 30.1, + "bin_name": "bins.1", + "gene_count": 991, + "bin_quality": "LQ", + "gtdbtk_species": "", + "gtdbtk_order": "", + "num_16s": 0, + "gtdbtk_family": "", + "gtdbtk_domain": "", + "contamination": 0.97, + "gtdbtk_class": "", + "gtdbtk_phylum": "", + "num_5s": 0, + "num_23s": 0, + "gtdbtk_genus": "", + "num_t_rna": 12 + } + ], + "unbinned_contig_num": 4032, + "started_at_time": "2021-10-11T02:23:30Z", + "type": "nmdc:MAGsAnalysisActivity", + "ended_at_time": "2021-10-11T02:42:25+00:00", + "output_data_objects": [ + { + "name": "gold:Gp0452679_metabat2 bin checkm quality assessment result", + "description": "metabat2 bin checkm quality assessment result for gold:Gp0452679", + "data_object_type": "CheckM Statistics", + "url": "https://data.microbiomedata.org/data/nmdc:mga0fsjy84/MAGs/nmdc_mga0fsjy84_checkm_qa.out", + "md5_checksum": "d41d8cd98f00b204e9800998ecf8427e", + "id": "nmdc:d41d8cd98f00b204e9800998ecf8427e", + "file_size_bytes": 0 + }, + { + "name": "Gp0127623_tooShort (< 3kb) filtered contigs fasta file by metaBat2", + "description": "tooShort (< 3kb) filtered contigs fasta file by metaBat2 for Gp0127623", + "url": "https://data.microbiomedata.org/data/nmdc:mga03eyz63/MAGs/nmdc_mga03eyz63_bins.tooShort.fa", + "md5_checksum": "e63c76f92bc0ae95dfc238c099296e91", + "id": "nmdc:e63c76f92bc0ae95dfc238c099296e91", + "file_size_bytes": 48421824 + }, + { + "name": "Gp0127623_unbinned fasta file from metabat2", + "description": "unbinned fasta file from metabat2 for Gp0127623", + "url": "https://data.microbiomedata.org/data/nmdc:mga03eyz63/MAGs/nmdc_mga03eyz63_bins.unbinned.fa", + "md5_checksum": "3dfba77d38712870f8c415203f991496", + "id": "nmdc:3dfba77d38712870f8c415203f991496", + "file_size_bytes": 6028115 + }, + { + "name": "Gp0127623_metabat2 bin checkm quality assessment result", + "description": "metabat2 bin checkm quality assessment result for Gp0127623", + "data_object_type": "CheckM Statistics", + "url": "https://data.microbiomedata.org/data/nmdc:mga03eyz63/MAGs/nmdc_mga03eyz63_checkm_qa.out", + "md5_checksum": "5e98d27533164fdf67c07cc224090547", + "id": "nmdc:5e98d27533164fdf67c07cc224090547", + "file_size_bytes": 765 + }, + { + "name": "Gp0127623_high-quality and medium-quality bins", + "description": "high-quality and medium-quality bins for Gp0127623", + "data_object_type": "Metagenome Bins", + "url": "https://data.microbiomedata.org/data/nmdc:mga03eyz63/MAGs/nmdc_mga03eyz63_hqmq_bin.zip", + "md5_checksum": "bfbe3e3a21e8a089c4c7a0d945c79b7b", + "id": "nmdc:bfbe3e3a21e8a089c4c7a0d945c79b7b", + "file_size_bytes": 182 + }, + { + "name": "Gp0127623_metabat2 bins", + "description": "metabat2 bins for Gp0127623", + "url": "https://data.microbiomedata.org/data/nmdc:mga03eyz63/MAGs/nmdc_mga03eyz63_metabat_bin.zip", + "md5_checksum": "c70853ef1a6ab162b85df5215a76666b", + "id": "nmdc:c70853ef1a6ab162b85df5215a76666b", + "file_size_bytes": 236177 + } + ] + } + ] + }, + { + "_id": { + "$oid": "649b009773e8249959349b3b" + }, + "id": "nmdc:omprc-11-76ebsj44", + "name": "Riverbed sediment microbial communities from areas with no vegetation in Columbia River, Washington, USA - GW-RW N1_20_30", + "description": "Riverbed sediment samples were collected from an area with no vegetation in a hyporheic zone (subsurface groundwater - surface water mixing zone)", + "has_input": [ + "nmdc:bsm-11-k3t2wk45" + ], + "has_output": [ + "jgi:574fde787ded5e3df1ee1416" + ], + "part_of": [ + "nmdc:sty-11-aygzgv51" + ], + "add_date": "2016-01-11", + "mod_date": "2021-06-15", + "ncbi_project_name": "Riverbed sediment microbial communities from areas with no vegetation in Columbia River, Washington, USA - GW-RW N1_20_30", + "omics_type": { + "has_raw_value": "Metagenome" + }, + "principal_investigator": { + "has_raw_value": "James Stegen" + }, + "processing_institution": "JGI", + "type": "nmdc:OmicsProcessing", + "gold_sequencing_project_identifiers": [ + "gold:Gp0127625" + ], + "downstream_workflow_activity_records": [ + { + "_id": { + "$oid": "649b009d6bdd4fd20273c858" + }, + "has_input": [ + "nmdc:93c62425e46296c35415039d7fd9cb56" + ], + "part_of": [ + "nmdc:mga0bfpq58" + ], + "git_url": "https://github.com/microbiomedata/metaG/releases/tag/0.1", + "has_output": [ + "nmdc:2d13b3a30339b9c5b4fba099f9d4b10f", + "nmdc:42be49edad69619e550ddd69d150490f" + ], + "was_informed_by": "gold:Gp0127625", + "input_read_count": 26227312, + "output_read_bases": 3764845015, + "id": "nmdc:aa214e53dc8472f9ff99b02245d8f943", + "execution_resource": "NERSC-Cori", + "input_read_bases": 3960324112, + "name": "Read QC Activity for nmdc:mga0bfpq58", + "output_read_count": 25182244, + "started_at_time": "2021-10-11T02:23:30Z", + "type": "nmdc:ReadQCAnalysisActivity", + "ended_at_time": "2021-10-11T03:29:50+00:00", + "output_data_objects": [ + { + "name": "Gp0127625_Filtered Reads", + "description": "Filtered Reads for Gp0127625", + "data_object_type": "Filtered Sequencing Reads", + "url": "https://data.microbiomedata.org/data/nmdc:mga0bfpq58/qa/nmdc_mga0bfpq58_filtered.fastq.gz", + "md5_checksum": "2d13b3a30339b9c5b4fba099f9d4b10f", + "id": "nmdc:2d13b3a30339b9c5b4fba099f9d4b10f", + "file_size_bytes": 2037866145 + }, + { + "name": "Gp0127625_Filtered Stats", + "description": "Filtered Stats for Gp0127625", + "data_object_type": "QC Statistics", + "url": "https://data.microbiomedata.org/data/nmdc:mga0bfpq58/qa/nmdc_mga0bfpq58_filterStats.txt", + "md5_checksum": "42be49edad69619e550ddd69d150490f", + "id": "nmdc:42be49edad69619e550ddd69d150490f", + "file_size_bytes": 284 + } + ] + }, + { + "_id": { + "$oid": "649b009bff710ae353f8cf15" + }, + "has_input": [ + "nmdc:2d13b3a30339b9c5b4fba099f9d4b10f" + ], + "git_url": "https://github.com/microbiomedata/metaG/releases/tag/0.1", + "has_output": [ + "nmdc:550b631e1de3e01392154e54493d47ef", + "nmdc:3f14ff51550d9d78dae3a7ec08514907", + "nmdc:1a7b8f8968f451b5d5ccb97a10a56d89", + "nmdc:b09795fc768257d881e8ce547be0ce68", + "nmdc:064ba18473eb80ff0b484311565d2894", + "nmdc:a7b6cc370371668be2e3bb90f5ca0fd1", + "nmdc:60c663a34b79db2ee71edf1afe4c14e3", + "nmdc:bc8acb862c8942616ef07302667c334f", + "nmdc:b797ed6cb135c993b582cac368b2a93c" + ], + "was_informed_by": "gold:Gp0127625", + "id": "nmdc:aa214e53dc8472f9ff99b02245d8f943", + "execution_resource": "NERSC-Cori", + "name": "ReadBased Analysis Activity for nmdc:mga0bfpq58", + "started_at_time": "2021-10-11T02:23:30Z", + "type": "nmdc:ReadbasedAnalysis", + "ended_at_time": "2021-10-11T03:29:50+00:00", + "output_data_objects": [ + { + "name": "Gp0127625_Gottcha2 TSV report", + "description": "Gottcha2 TSV report for Gp0127625", + "url": "https://data.microbiomedata.org/data/nmdc:mga0bfpq58/ReadbasedAnalysis/nmdc_mga0bfpq58_gottcha2_report.tsv", + "md5_checksum": "550b631e1de3e01392154e54493d47ef", + "id": "nmdc:550b631e1de3e01392154e54493d47ef", + "file_size_bytes": 754 + }, + { + "name": "Gp0127625_Gottcha2 full TSV report", + "description": "Gottcha2 full TSV report for Gp0127625", + "url": "https://data.microbiomedata.org/data/nmdc:mga0bfpq58/ReadbasedAnalysis/nmdc_mga0bfpq58_gottcha2_report_full.tsv", + "md5_checksum": "3f14ff51550d9d78dae3a7ec08514907", + "id": "nmdc:3f14ff51550d9d78dae3a7ec08514907", + "file_size_bytes": 641658 + }, + { + "name": "Gp0127625_Gottcha2 Krona HTML report", + "description": "Gottcha2 Krona HTML report for Gp0127625", + "data_object_type": "GOTTCHA2 Krona Plot", + "url": "https://data.microbiomedata.org/data/nmdc:mga0bfpq58/ReadbasedAnalysis/nmdc_mga0bfpq58_gottcha2_krona.html", + "md5_checksum": "1a7b8f8968f451b5d5ccb97a10a56d89", + "id": "nmdc:1a7b8f8968f451b5d5ccb97a10a56d89", + "file_size_bytes": 228494 + }, + { + "name": "Gp0127625_Centrifuge classification TSV report", + "description": "Centrifuge classification TSV report for Gp0127625", + "data_object_type": "Centrifuge Taxonomic Classification", + "url": "https://data.microbiomedata.org/data/nmdc:mga0bfpq58/ReadbasedAnalysis/nmdc_mga0bfpq58_centrifuge_classification.tsv", + "md5_checksum": "b09795fc768257d881e8ce547be0ce68", + "id": "nmdc:b09795fc768257d881e8ce547be0ce68", + "file_size_bytes": 1849982678 + }, + { + "name": "Gp0127625_Centrifuge TSV report", + "description": "Centrifuge TSV report for Gp0127625", + "data_object_type": "Centrifuge Classification Report", + "url": "https://data.microbiomedata.org/data/nmdc:mga0bfpq58/ReadbasedAnalysis/nmdc_mga0bfpq58_centrifuge_report.tsv", + "md5_checksum": "064ba18473eb80ff0b484311565d2894", + "id": "nmdc:064ba18473eb80ff0b484311565d2894", + "file_size_bytes": 253852 + }, + { + "name": "Gp0127625_Centrifuge Krona HTML report", + "description": "Centrifuge Krona HTML report for Gp0127625", + "data_object_type": "Centrifuge Krona Plot", + "url": "https://data.microbiomedata.org/data/nmdc:mga0bfpq58/ReadbasedAnalysis/nmdc_mga0bfpq58_centrifuge_krona.html", + "md5_checksum": "a7b6cc370371668be2e3bb90f5ca0fd1", + "id": "nmdc:a7b6cc370371668be2e3bb90f5ca0fd1", + "file_size_bytes": 2331556 + }, + { + "name": "Gp0127625_Kraken classification TSV report", + "description": "Kraken classification TSV report for Gp0127625", + "data_object_type": "Kraken2 Taxonomic Classification", + "url": "https://data.microbiomedata.org/data/nmdc:mga0bfpq58/ReadbasedAnalysis/nmdc_mga0bfpq58_kraken2_classification.tsv", + "md5_checksum": "60c663a34b79db2ee71edf1afe4c14e3", + "id": "nmdc:60c663a34b79db2ee71edf1afe4c14e3", + "file_size_bytes": 1471976767 + }, + { + "name": "Gp0127625_Kraken2 TSV report", + "description": "Kraken2 TSV report for Gp0127625", + "data_object_type": "Kraken2 Classification Report", + "url": "https://data.microbiomedata.org/data/nmdc:mga0bfpq58/ReadbasedAnalysis/nmdc_mga0bfpq58_kraken2_report.tsv", + "md5_checksum": "bc8acb862c8942616ef07302667c334f", + "id": "nmdc:bc8acb862c8942616ef07302667c334f", + "file_size_bytes": 627498 + }, + { + "name": "Gp0127625_Kraken2 Krona HTML report", + "description": "Kraken2 Krona HTML report for Gp0127625", + "data_object_type": "Kraken2 Krona Plot", + "url": "https://data.microbiomedata.org/data/nmdc:mga0bfpq58/ReadbasedAnalysis/nmdc_mga0bfpq58_kraken2_krona.html", + "md5_checksum": "b797ed6cb135c993b582cac368b2a93c", + "id": "nmdc:b797ed6cb135c993b582cac368b2a93c", + "file_size_bytes": 3921941 + } + ] + }, + { + "_id": { + "$oid": "61e718e2833bcf838a6ff0ce" + }, + "has_input": [ + "nmdc:2d13b3a30339b9c5b4fba099f9d4b10f" + ], + "part_of": [ + "nmdc:mga0bfpq58" + ], + "git_url": "https://github.com/microbiomedata/metaG/releases/tag/0.1", + "has_output": [ + "nmdc:550b631e1de3e01392154e54493d47ef", + "nmdc:3f14ff51550d9d78dae3a7ec08514907", + "nmdc:1a7b8f8968f451b5d5ccb97a10a56d89", + "nmdc:b09795fc768257d881e8ce547be0ce68", + "nmdc:064ba18473eb80ff0b484311565d2894", + "nmdc:a7b6cc370371668be2e3bb90f5ca0fd1", + "nmdc:60c663a34b79db2ee71edf1afe4c14e3", + "nmdc:bc8acb862c8942616ef07302667c334f", + "nmdc:b797ed6cb135c993b582cac368b2a93c" + ], + "was_informed_by": "gold:Gp0127625", + "id": "nmdc:aa214e53dc8472f9ff99b02245d8f943", + "execution_resource": "NERSC-Cori", + "name": "ReadBased Analysis Activity for nmdc:mga0bfpq58", + "started_at_time": "2021-10-11T02:23:30Z", + "type": "nmdc:ReadbasedAnalysis", + "ended_at_time": "2021-10-11T03:29:50+00:00", + "output_data_objects": [ + { + "name": "Gp0127625_Gottcha2 TSV report", + "description": "Gottcha2 TSV report for Gp0127625", + "url": "https://data.microbiomedata.org/data/nmdc:mga0bfpq58/ReadbasedAnalysis/nmdc_mga0bfpq58_gottcha2_report.tsv", + "md5_checksum": "550b631e1de3e01392154e54493d47ef", + "id": "nmdc:550b631e1de3e01392154e54493d47ef", + "file_size_bytes": 754 + }, + { + "name": "Gp0127625_Gottcha2 full TSV report", + "description": "Gottcha2 full TSV report for Gp0127625", + "url": "https://data.microbiomedata.org/data/nmdc:mga0bfpq58/ReadbasedAnalysis/nmdc_mga0bfpq58_gottcha2_report_full.tsv", + "md5_checksum": "3f14ff51550d9d78dae3a7ec08514907", + "id": "nmdc:3f14ff51550d9d78dae3a7ec08514907", + "file_size_bytes": 641658 + }, + { + "name": "Gp0127625_Gottcha2 Krona HTML report", + "description": "Gottcha2 Krona HTML report for Gp0127625", + "data_object_type": "GOTTCHA2 Krona Plot", + "url": "https://data.microbiomedata.org/data/nmdc:mga0bfpq58/ReadbasedAnalysis/nmdc_mga0bfpq58_gottcha2_krona.html", + "md5_checksum": "1a7b8f8968f451b5d5ccb97a10a56d89", + "id": "nmdc:1a7b8f8968f451b5d5ccb97a10a56d89", + "file_size_bytes": 228494 + }, + { + "name": "Gp0127625_Centrifuge classification TSV report", + "description": "Centrifuge classification TSV report for Gp0127625", + "data_object_type": "Centrifuge Taxonomic Classification", + "url": "https://data.microbiomedata.org/data/nmdc:mga0bfpq58/ReadbasedAnalysis/nmdc_mga0bfpq58_centrifuge_classification.tsv", + "md5_checksum": "b09795fc768257d881e8ce547be0ce68", + "id": "nmdc:b09795fc768257d881e8ce547be0ce68", + "file_size_bytes": 1849982678 + }, + { + "name": "Gp0127625_Centrifuge TSV report", + "description": "Centrifuge TSV report for Gp0127625", + "data_object_type": "Centrifuge Classification Report", + "url": "https://data.microbiomedata.org/data/nmdc:mga0bfpq58/ReadbasedAnalysis/nmdc_mga0bfpq58_centrifuge_report.tsv", + "md5_checksum": "064ba18473eb80ff0b484311565d2894", + "id": "nmdc:064ba18473eb80ff0b484311565d2894", + "file_size_bytes": 253852 + }, + { + "name": "Gp0127625_Centrifuge Krona HTML report", + "description": "Centrifuge Krona HTML report for Gp0127625", + "data_object_type": "Centrifuge Krona Plot", + "url": "https://data.microbiomedata.org/data/nmdc:mga0bfpq58/ReadbasedAnalysis/nmdc_mga0bfpq58_centrifuge_krona.html", + "md5_checksum": "a7b6cc370371668be2e3bb90f5ca0fd1", + "id": "nmdc:a7b6cc370371668be2e3bb90f5ca0fd1", + "file_size_bytes": 2331556 + }, + { + "name": "Gp0127625_Kraken classification TSV report", + "description": "Kraken classification TSV report for Gp0127625", + "data_object_type": "Kraken2 Taxonomic Classification", + "url": "https://data.microbiomedata.org/data/nmdc:mga0bfpq58/ReadbasedAnalysis/nmdc_mga0bfpq58_kraken2_classification.tsv", + "md5_checksum": "60c663a34b79db2ee71edf1afe4c14e3", + "id": "nmdc:60c663a34b79db2ee71edf1afe4c14e3", + "file_size_bytes": 1471976767 + }, + { + "name": "Gp0127625_Kraken2 TSV report", + "description": "Kraken2 TSV report for Gp0127625", + "data_object_type": "Kraken2 Classification Report", + "url": "https://data.microbiomedata.org/data/nmdc:mga0bfpq58/ReadbasedAnalysis/nmdc_mga0bfpq58_kraken2_report.tsv", + "md5_checksum": "bc8acb862c8942616ef07302667c334f", + "id": "nmdc:bc8acb862c8942616ef07302667c334f", + "file_size_bytes": 627498 + }, + { + "name": "Gp0127625_Kraken2 Krona HTML report", + "description": "Kraken2 Krona HTML report for Gp0127625", + "data_object_type": "Kraken2 Krona Plot", + "url": "https://data.microbiomedata.org/data/nmdc:mga0bfpq58/ReadbasedAnalysis/nmdc_mga0bfpq58_kraken2_krona.html", + "md5_checksum": "b797ed6cb135c993b582cac368b2a93c", + "id": "nmdc:b797ed6cb135c993b582cac368b2a93c", + "file_size_bytes": 3921941 + } + ] + }, + { + "_id": { + "$oid": "649b005f2ca5ee4adb139f89" + }, + "has_input": [ + "nmdc:2d13b3a30339b9c5b4fba099f9d4b10f" + ], + "part_of": [ + "nmdc:mga0bfpq58" + ], + "ctg_logsum": 452076, + "scaf_logsum": 453436, + "gap_pct": 0.00138, + "git_url": "https://github.com/microbiomedata/metaG/releases/tag/0.1", + "has_output": [ + "nmdc:5b6e7cbece9167002b12c3415afa9bb8", + "nmdc:f2ef7ceaaedf4d6bf377ce82687b06e3", + "nmdc:d231edb2040700184064615a28e65ee5", + "nmdc:9e3e55fe2f337ee0192604f8aa13da8e", + "nmdc:ff612445b348b65f906cd8858c4ec54e" + ], + "asm_score": 3.923, + "was_informed_by": "gold:Gp0127625", + "ctg_powsum": 49204, + "scaf_max": 29400, + "id": "nmdc:aa214e53dc8472f9ff99b02245d8f943", + "scaf_powsum": 49370, + "execution_resource": "NERSC-Cori", + "contigs": 300102, + "name": "Assembly Activity for nmdc:mga0bfpq58", + "ctg_max": 29400, + "gc_std": 0.0955, + "contig_bp": 159709614, + "gc_avg": 0.6367, + "started_at_time": "2021-10-11T02:23:30Z", + "scaf_bp": 159711824, + "type": "nmdc:MetagenomeAssembly", + "scaffolds": 299890, + "ended_at_time": "2021-10-11T03:29:50+00:00", + "ctg_l50": 546, + "ctg_l90": 301, + "ctg_n50": 78532, + "ctg_n90": 244428, + "scaf_l50": 546, + "scaf_l90": 301, + "scaf_n50": 78517, + "scaf_n90": 244244, + "output_data_objects": [ + { + "name": "Gp0127625_Assembled contigs fasta", + "description": "Assembled contigs fasta for Gp0127625", + "data_object_type": "Assembly Contigs", + "url": "https://data.microbiomedata.org/data/nmdc:mga0bfpq58/assembly/nmdc_mga0bfpq58_contigs.fna", + "md5_checksum": "5b6e7cbece9167002b12c3415afa9bb8", + "id": "nmdc:5b6e7cbece9167002b12c3415afa9bb8", + "file_size_bytes": 171703232 + }, + { + "name": "Gp0127625_Assembled scaffold fasta", + "description": "Assembled scaffold fasta for Gp0127625", + "data_object_type": "Assembly Scaffolds", + "url": "https://data.microbiomedata.org/data/nmdc:mga0bfpq58/assembly/nmdc_mga0bfpq58_scaffolds.fna", + "md5_checksum": "f2ef7ceaaedf4d6bf377ce82687b06e3", + "id": "nmdc:f2ef7ceaaedf4d6bf377ce82687b06e3", + "file_size_bytes": 170799869 + }, + { + "name": "Gp0127625_Metagenome Contig Coverage Stats", + "description": "Metagenome Contig Coverage Stats for Gp0127625", + "url": "https://data.microbiomedata.org/data/nmdc:mga0bfpq58/assembly/nmdc_mga0bfpq58_covstats.txt", + "md5_checksum": "d231edb2040700184064615a28e65ee5", + "id": "nmdc:d231edb2040700184064615a28e65ee5", + "file_size_bytes": 23875845 + }, + { + "name": "Gp0127625_Assembled AGP file", + "description": "Assembled AGP file for Gp0127625", + "data_object_type": "Assembly AGP", + "url": "https://data.microbiomedata.org/data/nmdc:mga0bfpq58/assembly/nmdc_mga0bfpq58_assembly.agp", + "md5_checksum": "9e3e55fe2f337ee0192604f8aa13da8e", + "id": "nmdc:9e3e55fe2f337ee0192604f8aa13da8e", + "file_size_bytes": 22351137 + }, + { + "name": "Gp0127625_Metagenome Alignment BAM file", + "description": "Metagenome Alignment BAM file for Gp0127625", + "data_object_type": "Assembly Coverage BAM", + "url": "https://data.microbiomedata.org/data/nmdc:mga0bfpq58/assembly/nmdc_mga0bfpq58_pairedMapped_sorted.bam", + "md5_checksum": "ff612445b348b65f906cd8858c4ec54e", + "id": "nmdc:ff612445b348b65f906cd8858c4ec54e", + "file_size_bytes": 2304803186 + } + ] + }, + { + "_id": { + "$oid": "649b005bbf2caae0415ef999" + }, + "has_input": [ + "nmdc:5b6e7cbece9167002b12c3415afa9bb8" + ], + "part_of": [ + "nmdc:mga0bfpq58" + ], + "git_url": "https://github.com/microbiomedata/metaG/releases/tag/0.1", + "has_output": [ + "nmdc:b1cae75f11c5efc7b37ea38c8d690e09", + "nmdc:035d81e38b01174de882d15a859390a0", + "nmdc:da95ab39eb229378ef9c1c7317f58c36", + "nmdc:7ba2f365814fc2ae2896849d4dbb619d", + "nmdc:91ade9a89599592c1e699b8990a11fba", + "nmdc:a1c78cb8202825bd692c572b1537b549", + "nmdc:b83f7bca7166e0bbeb5d260af5920d00", + "nmdc:2f8d30335b71e6d7f29458795d20daf4", + "nmdc:77a22d4fe5949259acc0f12eafe264a2", + "nmdc:f21f3b5ed41e8945b4eebdbb044f832a", + "nmdc:4a365e4bb51f09bb4f21470a753eac42", + "nmdc:5a230cb34060373c2e9a0af8b8040f46" + ], + "was_informed_by": "gold:Gp0127625", + "id": "nmdc:aa214e53dc8472f9ff99b02245d8f943", + "execution_resource": "NERSC-Cori", + "name": "Annotation Activity for nmdc:mga0bfpq58", + "started_at_time": "2021-10-11T02:23:30Z", + "type": "nmdc:MetagenomeAnnotationActivity", + "ended_at_time": "2021-10-11T03:29:50+00:00", + "output_data_objects": [ + { + "name": "Gp0127625_Protein FAA", + "description": "Protein FAA for Gp0127625", + "data_object_type": "Annotation Amino Acid FASTA", + "url": "https://data.microbiomedata.org/data/nmdc:mga0bfpq58/annotation/nmdc_mga0bfpq58_proteins.faa", + "md5_checksum": "b1cae75f11c5efc7b37ea38c8d690e09", + "id": "nmdc:b1cae75f11c5efc7b37ea38c8d690e09", + "file_size_bytes": 96076876 + }, + { + "name": "Gp0127625_Structural annotation GFF file", + "description": "Structural annotation GFF file for Gp0127625", + "data_object_type": "Structural Annotation GFF", + "url": "https://data.microbiomedata.org/data/nmdc:mga0bfpq58/annotation/nmdc_mga0bfpq58_structural_annotation.gff", + "md5_checksum": "035d81e38b01174de882d15a859390a0", + "id": "nmdc:035d81e38b01174de882d15a859390a0", + "file_size_bytes": 2526 + }, + { + "name": "Gp0127625_Functional annotation GFF file", + "description": "Functional annotation GFF file for Gp0127625", + "data_object_type": "Functional Annotation GFF", + "url": "https://data.microbiomedata.org/data/nmdc:mga0bfpq58/annotation/nmdc_mga0bfpq58_functional_annotation.gff", + "md5_checksum": "da95ab39eb229378ef9c1c7317f58c36", + "id": "nmdc:da95ab39eb229378ef9c1c7317f58c36", + "file_size_bytes": 106301187 + }, + { + "name": "Gp0127625_KO TSV file", + "description": "KO TSV file for Gp0127625", + "data_object_type": "Annotation KEGG Orthology", + "url": "https://data.microbiomedata.org/data/nmdc:mga0bfpq58/annotation/nmdc_mga0bfpq58_ko.tsv", + "md5_checksum": "7ba2f365814fc2ae2896849d4dbb619d", + "id": "nmdc:7ba2f365814fc2ae2896849d4dbb619d", + "file_size_bytes": 12012992 + }, + { + "name": "Gp0127625_EC TSV file", + "description": "EC TSV file for Gp0127625", + "data_object_type": "Annotation Enzyme Commission", + "url": "https://data.microbiomedata.org/data/nmdc:mga0bfpq58/annotation/nmdc_mga0bfpq58_ec.tsv", + "md5_checksum": "91ade9a89599592c1e699b8990a11fba", + "id": "nmdc:91ade9a89599592c1e699b8990a11fba", + "file_size_bytes": 7987608 + }, + { + "name": "Gp0127625_COG GFF file", + "description": "COG GFF file for Gp0127625", + "url": "https://data.microbiomedata.org/data/nmdc:mga0bfpq58/annotation/nmdc_mga0bfpq58_cog.gff", + "md5_checksum": "a1c78cb8202825bd692c572b1537b549", + "id": "nmdc:a1c78cb8202825bd692c572b1537b549", + "file_size_bytes": 63761051 + }, + { + "name": "Gp0127625_PFAM GFF file", + "description": "PFAM GFF file for Gp0127625", + "url": "https://data.microbiomedata.org/data/nmdc:mga0bfpq58/annotation/nmdc_mga0bfpq58_pfam.gff", + "md5_checksum": "b83f7bca7166e0bbeb5d260af5920d00", + "id": "nmdc:b83f7bca7166e0bbeb5d260af5920d00", + "file_size_bytes": 49051515 + }, + { + "name": "Gp0127625_TigrFam GFF file", + "description": "TigrFam GFF file for Gp0127625", + "url": "https://data.microbiomedata.org/data/nmdc:mga0bfpq58/annotation/nmdc_mga0bfpq58_tigrfam.gff", + "md5_checksum": "2f8d30335b71e6d7f29458795d20daf4", + "id": "nmdc:2f8d30335b71e6d7f29458795d20daf4", + "file_size_bytes": 5446717 + }, + { + "name": "Gp0127625_SMART GFF file", + "description": "SMART GFF file for Gp0127625", + "url": "https://data.microbiomedata.org/data/nmdc:mga0bfpq58/annotation/nmdc_mga0bfpq58_smart.gff", + "md5_checksum": "77a22d4fe5949259acc0f12eafe264a2", + "id": "nmdc:77a22d4fe5949259acc0f12eafe264a2", + "file_size_bytes": 14046377 + }, + { + "name": "Gp0127625_SuperFam GFF file", + "description": "SuperFam GFF file for Gp0127625", + "url": "https://data.microbiomedata.org/data/nmdc:mga0bfpq58/annotation/nmdc_mga0bfpq58_supfam.gff", + "md5_checksum": "f21f3b5ed41e8945b4eebdbb044f832a", + "id": "nmdc:f21f3b5ed41e8945b4eebdbb044f832a", + "file_size_bytes": 79091420 + }, + { + "name": "Gp0127625_Cath FunFam GFF file", + "description": "Cath FunFam GFF file for Gp0127625", + "url": "https://data.microbiomedata.org/data/nmdc:mga0bfpq58/annotation/nmdc_mga0bfpq58_cath_funfam.gff", + "md5_checksum": "4a365e4bb51f09bb4f21470a753eac42", + "id": "nmdc:4a365e4bb51f09bb4f21470a753eac42", + "file_size_bytes": 60777542 + }, + { + "name": "Gp0127625_KO_EC GFF file", + "description": "KO_EC GFF file for Gp0127625", + "url": "https://data.microbiomedata.org/data/nmdc:mga0bfpq58/annotation/nmdc_mga0bfpq58_ko_ec.gff", + "md5_checksum": "5a230cb34060373c2e9a0af8b8040f46", + "id": "nmdc:5a230cb34060373c2e9a0af8b8040f46", + "file_size_bytes": 38117675 + } + ] + }, + { + "_id": { + "$oid": "649b0052ec087f6bbab346fc" + }, + "has_input": [ + "nmdc:5b6e7cbece9167002b12c3415afa9bb8", + "nmdc:ff612445b348b65f906cd8858c4ec54e", + "nmdc:da95ab39eb229378ef9c1c7317f58c36" + ], + "too_short_contig_num": 275414, + "part_of": [ + "nmdc:mga0bfpq58" + ], + "binned_contig_num": 1195, + "git_url": "https://github.com/microbiomedata/metaG/releases/tag/0.1", + "has_output": [ + "nmdc:d41d8cd98f00b204e9800998ecf8427e", + "nmdc:27f14f2f1af3ad7d17505a6ddc52d860", + "nmdc:b66d8fd47536ed5299c280aa873e2130", + "nmdc:dac476e3a7a8cdb2f3be5946ae437906", + "nmdc:1ce4d3dcf2c9cbe245b437ca14a2772f", + "nmdc:d312dfb56973b50497bab8faf7409db8" + ], + "was_informed_by": "gold:Gp0127625", + "input_contig_num": 300100, + "id": "nmdc:aa214e53dc8472f9ff99b02245d8f943", + "execution_resource": "NERSC-Cori", + "name": "MAGs Analysis Activity for nmdc:mga0bfpq58", + "mags_list": [ + { + "number_of_contig": 382, + "completeness": 47.74, + "bin_name": "bins.1", + "gene_count": 2054, + "bin_quality": "LQ", + "gtdbtk_species": "", + "gtdbtk_order": "", + "num_16s": 0, + "gtdbtk_family": "", + "gtdbtk_domain": "", + "contamination": 2.69, + "gtdbtk_class": "", + "gtdbtk_phylum": "", + "num_5s": 0, + "num_23s": 0, + "gtdbtk_genus": "", + "num_t_rna": 21 + }, + { + "number_of_contig": 197, + "completeness": 22.93, + "bin_name": "bins.2", + "gene_count": 1005, + "bin_quality": "LQ", + "gtdbtk_species": "", + "gtdbtk_order": "", + "num_16s": 0, + "gtdbtk_family": "", + "gtdbtk_domain": "", + "contamination": 1.03, + "gtdbtk_class": "", + "gtdbtk_phylum": "", + "num_5s": 0, + "num_23s": 0, + "gtdbtk_genus": "", + "num_t_rna": 17 + }, + { + "number_of_contig": 95, + "completeness": 7.24, + "bin_name": "bins.3", + "gene_count": 447, + "bin_quality": "LQ", + "gtdbtk_species": "", + "gtdbtk_order": "", + "num_16s": 0, + "gtdbtk_family": "", + "gtdbtk_domain": "", + "contamination": 1.72, + "gtdbtk_class": "", + "gtdbtk_phylum": "", + "num_5s": 0, + "num_23s": 0, + "gtdbtk_genus": "", + "num_t_rna": 2 + }, + { + "number_of_contig": 193, + "completeness": 17.79, + "bin_name": "bins.4", + "gene_count": 1386, + "bin_quality": "LQ", + "gtdbtk_species": "", + "gtdbtk_order": "", + "num_16s": 0, + "gtdbtk_family": "", + "gtdbtk_domain": "", + "contamination": 0.97, + "gtdbtk_class": "", + "gtdbtk_phylum": "", + "num_5s": 0, + "num_23s": 0, + "gtdbtk_genus": "", + "num_t_rna": 19 + }, + { + "number_of_contig": 328, + "completeness": 37.37, + "bin_name": "bins.5", + "gene_count": 1978, + "bin_quality": "LQ", + "gtdbtk_species": "", + "gtdbtk_order": "", + "num_16s": 0, + "gtdbtk_family": "", + "gtdbtk_domain": "", + "contamination": 2.56, + "gtdbtk_class": "", + "gtdbtk_phylum": "", + "num_5s": 1, + "num_23s": 0, + "gtdbtk_genus": "", + "num_t_rna": 35 + } + ], + "unbinned_contig_num": 23491, + "started_at_time": "2021-10-11T02:23:30Z", + "type": "nmdc:MAGsAnalysisActivity", + "ended_at_time": "2021-10-11T03:29:50+00:00", + "output_data_objects": [ + { + "name": "gold:Gp0452679_metabat2 bin checkm quality assessment result", + "description": "metabat2 bin checkm quality assessment result for gold:Gp0452679", + "data_object_type": "CheckM Statistics", + "url": "https://data.microbiomedata.org/data/nmdc:mga0fsjy84/MAGs/nmdc_mga0fsjy84_checkm_qa.out", + "md5_checksum": "d41d8cd98f00b204e9800998ecf8427e", + "id": "nmdc:d41d8cd98f00b204e9800998ecf8427e", + "file_size_bytes": 0 + }, + { + "name": "Gp0127625_tooShort (< 3kb) filtered contigs fasta file by metaBat2", + "description": "tooShort (< 3kb) filtered contigs fasta file by metaBat2 for Gp0127625", + "url": "https://data.microbiomedata.org/data/nmdc:mga0bfpq58/MAGs/nmdc_mga0bfpq58_bins.tooShort.fa", + "md5_checksum": "27f14f2f1af3ad7d17505a6ddc52d860", + "id": "nmdc:27f14f2f1af3ad7d17505a6ddc52d860", + "file_size_bytes": 128750891 + }, + { + "name": "Gp0127625_unbinned fasta file from metabat2", + "description": "unbinned fasta file from metabat2 for Gp0127625", + "url": "https://data.microbiomedata.org/data/nmdc:mga0bfpq58/MAGs/nmdc_mga0bfpq58_bins.unbinned.fa", + "md5_checksum": "b66d8fd47536ed5299c280aa873e2130", + "id": "nmdc:b66d8fd47536ed5299c280aa873e2130", + "file_size_bytes": 37223163 + }, + { + "name": "Gp0127625_metabat2 bin checkm quality assessment result", + "description": "metabat2 bin checkm quality assessment result for Gp0127625", + "data_object_type": "CheckM Statistics", + "url": "https://data.microbiomedata.org/data/nmdc:mga0bfpq58/MAGs/nmdc_mga0bfpq58_checkm_qa.out", + "md5_checksum": "dac476e3a7a8cdb2f3be5946ae437906", + "id": "nmdc:dac476e3a7a8cdb2f3be5946ae437906", + "file_size_bytes": 1413 + }, + { + "name": "Gp0127625_high-quality and medium-quality bins", + "description": "high-quality and medium-quality bins for Gp0127625", + "data_object_type": "Metagenome Bins", + "url": "https://data.microbiomedata.org/data/nmdc:mga0bfpq58/MAGs/nmdc_mga0bfpq58_hqmq_bin.zip", + "md5_checksum": "1ce4d3dcf2c9cbe245b437ca14a2772f", + "id": "nmdc:1ce4d3dcf2c9cbe245b437ca14a2772f", + "file_size_bytes": 182 + }, + { + "name": "Gp0127625_metabat2 bins", + "description": "metabat2 bins for Gp0127625", + "url": "https://data.microbiomedata.org/data/nmdc:mga0bfpq58/MAGs/nmdc_mga0bfpq58_metabat_bin.zip", + "md5_checksum": "d312dfb56973b50497bab8faf7409db8", + "id": "nmdc:d312dfb56973b50497bab8faf7409db8", + "file_size_bytes": 1729165 + } + ] + } + ] + }, + { + "_id": { + "$oid": "649b009773e8249959349b3c" + }, + "id": "nmdc:omprc-11-s6wqag22", + "name": "Riverbed sediment microbial communities from areas with no vegetation in Columbia River, Washington, USA - GW-RW N3_40_50", + "description": "Riverbed sediment samples were collected from an area with no vegetation in a hyporheic zone (subsurface groundwater - surface water mixing zone)", + "has_input": [ + "nmdc:bsm-11-mxdygh62" + ], + "has_output": [ + "jgi:574fde7b7ded5e3df1ee1418" + ], + "part_of": [ + "nmdc:sty-11-aygzgv51" + ], + "add_date": "2016-01-11", + "mod_date": "2021-06-15", + "ncbi_project_name": "Riverbed sediment microbial communities from areas with no vegetation in Columbia River, Washington, USA - GW-RW N3_40_50", + "omics_type": { + "has_raw_value": "Metagenome" + }, + "principal_investigator": { + "has_raw_value": "James Stegen" + }, + "processing_institution": "JGI", + "type": "nmdc:OmicsProcessing", + "gold_sequencing_project_identifiers": [ + "gold:Gp0127626" + ], + "downstream_workflow_activity_records": [ + { + "_id": { + "$oid": "649b009d6bdd4fd20273c86c" + }, + "has_input": [ + "nmdc:8bee270fc5b3a39f7e7609b60e191766" + ], + "part_of": [ + "nmdc:mga04xnj45" + ], + "git_url": "https://github.com/microbiomedata/metaG/releases/tag/0.1", + "has_output": [ + "nmdc:07499ad2f2b80f42bd7109732b1eef90", + "nmdc:9089d07fdee5ed03e901c1656206af02" + ], + "was_informed_by": "gold:Gp0127626", + "input_read_count": 24223170, + "output_read_bases": 3405205631, + "id": "nmdc:0ef6e04f4cefbcd9e2d3d2a7717baa3e", + "execution_resource": "NERSC-Cori", + "input_read_bases": 3657698670, + "name": "Read QC Activity for nmdc:mga04xnj45", + "output_read_count": 22768968, + "started_at_time": "2021-12-01T21:31:29Z", + "type": "nmdc:ReadQCAnalysisActivity", + "ended_at_time": "2021-12-02T20:54:56+00:00", + "output_data_objects": [ + { + "name": "Gp0127626_Filtered Reads", + "description": "Filtered Reads for Gp0127626", + "data_object_type": "Filtered Sequencing Reads", + "url": "https://data.microbiomedata.org/data/nmdc:mga04xnj45/qa/nmdc_mga04xnj45_filtered.fastq.gz", + "md5_checksum": "07499ad2f2b80f42bd7109732b1eef90", + "id": "nmdc:07499ad2f2b80f42bd7109732b1eef90", + "file_size_bytes": 1944721961 + }, + { + "name": "Gp0127626_Filtered Stats", + "description": "Filtered Stats for Gp0127626", + "data_object_type": "QC Statistics", + "url": "https://data.microbiomedata.org/data/nmdc:mga04xnj45/qa/nmdc_mga04xnj45_filterStats.txt", + "md5_checksum": "9089d07fdee5ed03e901c1656206af02", + "id": "nmdc:9089d07fdee5ed03e901c1656206af02", + "file_size_bytes": 287 + } + ] + }, + { + "_id": { + "$oid": "649b009bff710ae353f8cf30" + }, + "has_input": [ + "nmdc:07499ad2f2b80f42bd7109732b1eef90" + ], + "git_url": "https://github.com/microbiomedata/metaG/releases/tag/0.1", + "has_output": [ + "nmdc:a91f8dccb2baa53550216f5bdfbf1473", + "nmdc:a81ddf4e3bc044e8601554117cd887aa", + "nmdc:a012dc3a7b44774019c313fd8ee88efc", + "nmdc:dd4023a1488bdfc73b12c422b62b274a", + "nmdc:2f9b1c55d52cc61affbe99f5163b48c8", + "nmdc:ccf7f447a25ebf354ce44b3f1f90f223", + "nmdc:2c8efdb77cbcd1276c4fb386fd37bd6d", + "nmdc:806b27f1fa5a423100b113bb56edc708", + "nmdc:bb3e6793c4f036b9756f075d41846964" + ], + "was_informed_by": "gold:Gp0127626", + "id": "nmdc:0ef6e04f4cefbcd9e2d3d2a7717baa3e", + "execution_resource": "NERSC-Cori", + "name": "ReadBased Analysis Activity for nmdc:mga04xnj45", + "started_at_time": "2021-12-01T21:31:29Z", + "type": "nmdc:ReadbasedAnalysis", + "ended_at_time": "2021-12-02T20:54:56+00:00", + "output_data_objects": [ + { + "name": "Gp0127626_Gottcha2 TSV report", + "description": "Gottcha2 TSV report for Gp0127626", + "url": "https://data.microbiomedata.org/data/nmdc:mga04xnj45/ReadbasedAnalysis/nmdc_mga04xnj45_gottcha2_report.tsv", + "md5_checksum": "a91f8dccb2baa53550216f5bdfbf1473", + "id": "nmdc:a91f8dccb2baa53550216f5bdfbf1473", + "file_size_bytes": 2399 + }, + { + "name": "Gp0127626_Gottcha2 full TSV report", + "description": "Gottcha2 full TSV report for Gp0127626", + "url": "https://data.microbiomedata.org/data/nmdc:mga04xnj45/ReadbasedAnalysis/nmdc_mga04xnj45_gottcha2_report_full.tsv", + "md5_checksum": "a81ddf4e3bc044e8601554117cd887aa", + "id": "nmdc:a81ddf4e3bc044e8601554117cd887aa", + "file_size_bytes": 743066 + }, + { + "name": "Gp0127626_Gottcha2 Krona HTML report", + "description": "Gottcha2 Krona HTML report for Gp0127626", + "data_object_type": "GOTTCHA2 Krona Plot", + "url": "https://data.microbiomedata.org/data/nmdc:mga04xnj45/ReadbasedAnalysis/nmdc_mga04xnj45_gottcha2_krona.html", + "md5_checksum": "a012dc3a7b44774019c313fd8ee88efc", + "id": "nmdc:a012dc3a7b44774019c313fd8ee88efc", + "file_size_bytes": 233970 + }, + { + "name": "Gp0127626_Centrifuge classification TSV report", + "description": "Centrifuge classification TSV report for Gp0127626", + "data_object_type": "Centrifuge Taxonomic Classification", + "url": "https://data.microbiomedata.org/data/nmdc:mga04xnj45/ReadbasedAnalysis/nmdc_mga04xnj45_centrifuge_classification.tsv", + "md5_checksum": "dd4023a1488bdfc73b12c422b62b274a", + "id": "nmdc:dd4023a1488bdfc73b12c422b62b274a", + "file_size_bytes": 1673697764 + }, + { + "name": "Gp0127626_Centrifuge TSV report", + "description": "Centrifuge TSV report for Gp0127626", + "data_object_type": "Centrifuge Classification Report", + "url": "https://data.microbiomedata.org/data/nmdc:mga04xnj45/ReadbasedAnalysis/nmdc_mga04xnj45_centrifuge_report.tsv", + "md5_checksum": "2f9b1c55d52cc61affbe99f5163b48c8", + "id": "nmdc:2f9b1c55d52cc61affbe99f5163b48c8", + "file_size_bytes": 253730 + }, + { + "name": "Gp0127626_Centrifuge Krona HTML report", + "description": "Centrifuge Krona HTML report for Gp0127626", + "data_object_type": "Centrifuge Krona Plot", + "url": "https://data.microbiomedata.org/data/nmdc:mga04xnj45/ReadbasedAnalysis/nmdc_mga04xnj45_centrifuge_krona.html", + "md5_checksum": "ccf7f447a25ebf354ce44b3f1f90f223", + "id": "nmdc:ccf7f447a25ebf354ce44b3f1f90f223", + "file_size_bytes": 2327521 + }, + { + "name": "Gp0127626_Kraken classification TSV report", + "description": "Kraken classification TSV report for Gp0127626", + "data_object_type": "Kraken2 Taxonomic Classification", + "url": "https://data.microbiomedata.org/data/nmdc:mga04xnj45/ReadbasedAnalysis/nmdc_mga04xnj45_kraken2_classification.tsv", + "md5_checksum": "2c8efdb77cbcd1276c4fb386fd37bd6d", + "id": "nmdc:2c8efdb77cbcd1276c4fb386fd37bd6d", + "file_size_bytes": 1343921825 + }, + { + "name": "Gp0127626_Kraken2 TSV report", + "description": "Kraken2 TSV report for Gp0127626", + "data_object_type": "Kraken2 Classification Report", + "url": "https://data.microbiomedata.org/data/nmdc:mga04xnj45/ReadbasedAnalysis/nmdc_mga04xnj45_kraken2_report.tsv", + "md5_checksum": "806b27f1fa5a423100b113bb56edc708", + "id": "nmdc:806b27f1fa5a423100b113bb56edc708", + "file_size_bytes": 638478 + }, + { + "name": "Gp0127626_Kraken2 Krona HTML report", + "description": "Kraken2 Krona HTML report for Gp0127626", + "data_object_type": "Kraken2 Krona Plot", + "url": "https://data.microbiomedata.org/data/nmdc:mga04xnj45/ReadbasedAnalysis/nmdc_mga04xnj45_kraken2_krona.html", + "md5_checksum": "bb3e6793c4f036b9756f075d41846964", + "id": "nmdc:bb3e6793c4f036b9756f075d41846964", + "file_size_bytes": 3987411 + } + ] + }, + { + "_id": { + "$oid": "61e7195e833bcf838a700602" + }, + "has_input": [ + "nmdc:07499ad2f2b80f42bd7109732b1eef90" + ], + "part_of": [ + "nmdc:mga04xnj45" + ], + "git_url": "https://github.com/microbiomedata/metaG/releases/tag/0.1", + "has_output": [ + "nmdc:a91f8dccb2baa53550216f5bdfbf1473", + "nmdc:a81ddf4e3bc044e8601554117cd887aa", + "nmdc:a012dc3a7b44774019c313fd8ee88efc", + "nmdc:dd4023a1488bdfc73b12c422b62b274a", + "nmdc:2f9b1c55d52cc61affbe99f5163b48c8", + "nmdc:ccf7f447a25ebf354ce44b3f1f90f223", + "nmdc:2c8efdb77cbcd1276c4fb386fd37bd6d", + "nmdc:806b27f1fa5a423100b113bb56edc708", + "nmdc:bb3e6793c4f036b9756f075d41846964" + ], + "was_informed_by": "gold:Gp0127626", + "id": "nmdc:0ef6e04f4cefbcd9e2d3d2a7717baa3e", + "execution_resource": "NERSC-Cori", + "name": "ReadBased Analysis Activity for nmdc:mga04xnj45", + "started_at_time": "2021-12-01T21:31:29Z", + "type": "nmdc:ReadbasedAnalysis", + "ended_at_time": "2021-12-02T20:54:56+00:00", + "output_data_objects": [ + { + "name": "Gp0127626_Gottcha2 TSV report", + "description": "Gottcha2 TSV report for Gp0127626", + "url": "https://data.microbiomedata.org/data/nmdc:mga04xnj45/ReadbasedAnalysis/nmdc_mga04xnj45_gottcha2_report.tsv", + "md5_checksum": "a91f8dccb2baa53550216f5bdfbf1473", + "id": "nmdc:a91f8dccb2baa53550216f5bdfbf1473", + "file_size_bytes": 2399 + }, + { + "name": "Gp0127626_Gottcha2 full TSV report", + "description": "Gottcha2 full TSV report for Gp0127626", + "url": "https://data.microbiomedata.org/data/nmdc:mga04xnj45/ReadbasedAnalysis/nmdc_mga04xnj45_gottcha2_report_full.tsv", + "md5_checksum": "a81ddf4e3bc044e8601554117cd887aa", + "id": "nmdc:a81ddf4e3bc044e8601554117cd887aa", + "file_size_bytes": 743066 + }, + { + "name": "Gp0127626_Gottcha2 Krona HTML report", + "description": "Gottcha2 Krona HTML report for Gp0127626", + "data_object_type": "GOTTCHA2 Krona Plot", + "url": "https://data.microbiomedata.org/data/nmdc:mga04xnj45/ReadbasedAnalysis/nmdc_mga04xnj45_gottcha2_krona.html", + "md5_checksum": "a012dc3a7b44774019c313fd8ee88efc", + "id": "nmdc:a012dc3a7b44774019c313fd8ee88efc", + "file_size_bytes": 233970 + }, + { + "name": "Gp0127626_Centrifuge classification TSV report", + "description": "Centrifuge classification TSV report for Gp0127626", + "data_object_type": "Centrifuge Taxonomic Classification", + "url": "https://data.microbiomedata.org/data/nmdc:mga04xnj45/ReadbasedAnalysis/nmdc_mga04xnj45_centrifuge_classification.tsv", + "md5_checksum": "dd4023a1488bdfc73b12c422b62b274a", + "id": "nmdc:dd4023a1488bdfc73b12c422b62b274a", + "file_size_bytes": 1673697764 + }, + { + "name": "Gp0127626_Centrifuge TSV report", + "description": "Centrifuge TSV report for Gp0127626", + "data_object_type": "Centrifuge Classification Report", + "url": "https://data.microbiomedata.org/data/nmdc:mga04xnj45/ReadbasedAnalysis/nmdc_mga04xnj45_centrifuge_report.tsv", + "md5_checksum": "2f9b1c55d52cc61affbe99f5163b48c8", + "id": "nmdc:2f9b1c55d52cc61affbe99f5163b48c8", + "file_size_bytes": 253730 + }, + { + "name": "Gp0127626_Centrifuge Krona HTML report", + "description": "Centrifuge Krona HTML report for Gp0127626", + "data_object_type": "Centrifuge Krona Plot", + "url": "https://data.microbiomedata.org/data/nmdc:mga04xnj45/ReadbasedAnalysis/nmdc_mga04xnj45_centrifuge_krona.html", + "md5_checksum": "ccf7f447a25ebf354ce44b3f1f90f223", + "id": "nmdc:ccf7f447a25ebf354ce44b3f1f90f223", + "file_size_bytes": 2327521 + }, + { + "name": "Gp0127626_Kraken classification TSV report", + "description": "Kraken classification TSV report for Gp0127626", + "data_object_type": "Kraken2 Taxonomic Classification", + "url": "https://data.microbiomedata.org/data/nmdc:mga04xnj45/ReadbasedAnalysis/nmdc_mga04xnj45_kraken2_classification.tsv", + "md5_checksum": "2c8efdb77cbcd1276c4fb386fd37bd6d", + "id": "nmdc:2c8efdb77cbcd1276c4fb386fd37bd6d", + "file_size_bytes": 1343921825 + }, + { + "name": "Gp0127626_Kraken2 TSV report", + "description": "Kraken2 TSV report for Gp0127626", + "data_object_type": "Kraken2 Classification Report", + "url": "https://data.microbiomedata.org/data/nmdc:mga04xnj45/ReadbasedAnalysis/nmdc_mga04xnj45_kraken2_report.tsv", + "md5_checksum": "806b27f1fa5a423100b113bb56edc708", + "id": "nmdc:806b27f1fa5a423100b113bb56edc708", + "file_size_bytes": 638478 + }, + { + "name": "Gp0127626_Kraken2 Krona HTML report", + "description": "Kraken2 Krona HTML report for Gp0127626", + "data_object_type": "Kraken2 Krona Plot", + "url": "https://data.microbiomedata.org/data/nmdc:mga04xnj45/ReadbasedAnalysis/nmdc_mga04xnj45_kraken2_krona.html", + "md5_checksum": "bb3e6793c4f036b9756f075d41846964", + "id": "nmdc:bb3e6793c4f036b9756f075d41846964", + "file_size_bytes": 3987411 + } + ] + }, + { + "_id": { + "$oid": "649b005f2ca5ee4adb139f9e" + }, + "has_input": [ + "nmdc:07499ad2f2b80f42bd7109732b1eef90" + ], + "part_of": [ + "nmdc:mga04xnj45" + ], + "ctg_logsum": 63429, + "scaf_logsum": 63657, + "gap_pct": 0.00092, + "git_url": "https://github.com/microbiomedata/metaG/releases/tag/0.1", + "has_output": [ + "nmdc:6d72d9fb6a282f8872cd3d5b8ce1a29d", + "nmdc:2d89ade1cc6267bb77b48daa176442f2", + "nmdc:79588f527e08eace069ddc63171f004c", + "nmdc:cc855d3c15387d078c6919d1b19f8c05", + "nmdc:ef722a8ecd2b85d9202560df41eca7ed" + ], + "asm_score": 7.629, + "was_informed_by": "gold:Gp0127626", + "ctg_powsum": 7359.443, + "scaf_max": 30685, + "id": "nmdc:0ef6e04f4cefbcd9e2d3d2a7717baa3e", + "scaf_powsum": 7386.413, + "execution_resource": "NERSC-Cori", + "contigs": 105397, + "name": "Assembly Activity for nmdc:mga04xnj45", + "ctg_max": 30685, + "gc_std": 0.09232, + "gc_avg": 0.60819, + "contig_bp": 43390261, + "started_at_time": "2021-12-01T21:31:29Z", + "scaf_bp": 43390661, + "type": "nmdc:MetagenomeAssembly", + "scaffolds": 105366, + "ended_at_time": "2021-12-02T20:54:56+00:00", + "ctg_l50": 368, + "ctg_l90": 284, + "ctg_n50": 34766, + "ctg_n90": 91597, + "scaf_l50": 368, + "scaf_l90": 284, + "scaf_n50": 34749, + "scaf_n90": 91567, + "output_data_objects": [ + { + "name": "Gp0127626_Assembled contigs fasta", + "description": "Assembled contigs fasta for Gp0127626", + "data_object_type": "Assembly Contigs", + "url": "https://data.microbiomedata.org/data/nmdc:mga04xnj45/assembly/nmdc_mga04xnj45_contigs.fna", + "md5_checksum": "6d72d9fb6a282f8872cd3d5b8ce1a29d", + "id": "nmdc:6d72d9fb6a282f8872cd3d5b8ce1a29d", + "file_size_bytes": 47315336 + }, + { + "name": "Gp0127626_Assembled scaffold fasta", + "description": "Assembled scaffold fasta for Gp0127626", + "data_object_type": "Assembly Scaffolds", + "url": "https://data.microbiomedata.org/data/nmdc:mga04xnj45/assembly/nmdc_mga04xnj45_scaffolds.fna", + "md5_checksum": "2d89ade1cc6267bb77b48daa176442f2", + "id": "nmdc:2d89ade1cc6267bb77b48daa176442f2", + "file_size_bytes": 46998743 + }, + { + "name": "Gp0127626_Metagenome Contig Coverage Stats", + "description": "Metagenome Contig Coverage Stats for Gp0127626", + "url": "https://data.microbiomedata.org/data/nmdc:mga04xnj45/assembly/nmdc_mga04xnj45_covstats.txt", + "md5_checksum": "79588f527e08eace069ddc63171f004c", + "id": "nmdc:79588f527e08eace069ddc63171f004c", + "file_size_bytes": 8270233 + }, + { + "name": "Gp0127626_Assembled AGP file", + "description": "Assembled AGP file for Gp0127626", + "data_object_type": "Assembly AGP", + "url": "https://data.microbiomedata.org/data/nmdc:mga04xnj45/assembly/nmdc_mga04xnj45_assembly.agp", + "md5_checksum": "cc855d3c15387d078c6919d1b19f8c05", + "id": "nmdc:cc855d3c15387d078c6919d1b19f8c05", + "file_size_bytes": 7690333 + }, + { + "name": "Gp0127626_Metagenome Alignment BAM file", + "description": "Metagenome Alignment BAM file for Gp0127626", + "data_object_type": "Assembly Coverage BAM", + "url": "https://data.microbiomedata.org/data/nmdc:mga04xnj45/assembly/nmdc_mga04xnj45_pairedMapped_sorted.bam", + "md5_checksum": "ef722a8ecd2b85d9202560df41eca7ed", + "id": "nmdc:ef722a8ecd2b85d9202560df41eca7ed", + "file_size_bytes": 2083099081 + } + ] + }, + { + "_id": { + "$oid": "649b005bbf2caae0415ef9b1" + }, + "has_input": [ + "nmdc:6d72d9fb6a282f8872cd3d5b8ce1a29d" + ], + "part_of": [ + "nmdc:mga04xnj45" + ], + "git_url": "https://github.com/microbiomedata/metaG/releases/tag/0.1", + "has_output": [ + "nmdc:26360324fcaed21fd48b54972cce09cb", + "nmdc:d2be135e631726360cf6ac23a3d56629", + "nmdc:b2fdf525bc1ddadb30427cba91c63483", + "nmdc:75ff61c1b51ace76d6e01930ae41c38c", + "nmdc:4210daa7b1b0b84a6e5b6591e4e93c55", + "nmdc:cfd7a714b2e18f136d6dc48b9162e1c0", + "nmdc:80a1ed51631f5fbc43032aa4afbfbf1d", + "nmdc:52e64eec8c715affde1612b871e2490e", + "nmdc:4687e89ae41c98bc49ca81ded0b4c622", + "nmdc:8cc7e6c8e232891c3ac7d952302905b6", + "nmdc:445ce659140104b37475c5c2e3fb7761", + "nmdc:32e15eb7eab763990dbb0ce947321718" + ], + "was_informed_by": "gold:Gp0127626", + "id": "nmdc:0ef6e04f4cefbcd9e2d3d2a7717baa3e", + "execution_resource": "NERSC-Cori", + "name": "Annotation Activity for nmdc:mga04xnj45", + "started_at_time": "2021-12-01T21:31:29Z", + "type": "nmdc:MetagenomeAnnotationActivity", + "ended_at_time": "2021-12-02T20:54:56+00:00", + "output_data_objects": [ + { + "name": "Gp0127626_Protein FAA", + "description": "Protein FAA for Gp0127626", + "data_object_type": "Annotation Amino Acid FASTA", + "url": "https://data.microbiomedata.org/data/nmdc:mga04xnj45/annotation/nmdc_mga04xnj45_proteins.faa", + "md5_checksum": "26360324fcaed21fd48b54972cce09cb", + "id": "nmdc:26360324fcaed21fd48b54972cce09cb", + "file_size_bytes": 28150597 + }, + { + "name": "Gp0127626_Structural annotation GFF file", + "description": "Structural annotation GFF file for Gp0127626", + "data_object_type": "Structural Annotation GFF", + "url": "https://data.microbiomedata.org/data/nmdc:mga04xnj45/annotation/nmdc_mga04xnj45_structural_annotation.gff", + "md5_checksum": "d2be135e631726360cf6ac23a3d56629", + "id": "nmdc:d2be135e631726360cf6ac23a3d56629", + "file_size_bytes": 2511 + }, + { + "name": "Gp0127626_Functional annotation GFF file", + "description": "Functional annotation GFF file for Gp0127626", + "data_object_type": "Functional Annotation GFF", + "url": "https://data.microbiomedata.org/data/nmdc:mga04xnj45/annotation/nmdc_mga04xnj45_functional_annotation.gff", + "md5_checksum": "b2fdf525bc1ddadb30427cba91c63483", + "id": "nmdc:b2fdf525bc1ddadb30427cba91c63483", + "file_size_bytes": 33351979 + }, + { + "name": "Gp0127626_KO TSV file", + "description": "KO TSV file for Gp0127626", + "data_object_type": "Annotation KEGG Orthology", + "url": "https://data.microbiomedata.org/data/nmdc:mga04xnj45/annotation/nmdc_mga04xnj45_ko.tsv", + "md5_checksum": "75ff61c1b51ace76d6e01930ae41c38c", + "id": "nmdc:75ff61c1b51ace76d6e01930ae41c38c", + "file_size_bytes": 3842650 + }, + { + "name": "Gp0127626_EC TSV file", + "description": "EC TSV file for Gp0127626", + "data_object_type": "Annotation Enzyme Commission", + "url": "https://data.microbiomedata.org/data/nmdc:mga04xnj45/annotation/nmdc_mga04xnj45_ec.tsv", + "md5_checksum": "4210daa7b1b0b84a6e5b6591e4e93c55", + "id": "nmdc:4210daa7b1b0b84a6e5b6591e4e93c55", + "file_size_bytes": 2561980 + }, + { + "name": "Gp0127626_COG GFF file", + "description": "COG GFF file for Gp0127626", + "url": "https://data.microbiomedata.org/data/nmdc:mga04xnj45/annotation/nmdc_mga04xnj45_cog.gff", + "md5_checksum": "cfd7a714b2e18f136d6dc48b9162e1c0", + "id": "nmdc:cfd7a714b2e18f136d6dc48b9162e1c0", + "file_size_bytes": 19108716 + }, + { + "name": "Gp0127626_PFAM GFF file", + "description": "PFAM GFF file for Gp0127626", + "url": "https://data.microbiomedata.org/data/nmdc:mga04xnj45/annotation/nmdc_mga04xnj45_pfam.gff", + "md5_checksum": "80a1ed51631f5fbc43032aa4afbfbf1d", + "id": "nmdc:80a1ed51631f5fbc43032aa4afbfbf1d", + "file_size_bytes": 13800768 + }, + { + "name": "Gp0127626_TigrFam GFF file", + "description": "TigrFam GFF file for Gp0127626", + "url": "https://data.microbiomedata.org/data/nmdc:mga04xnj45/annotation/nmdc_mga04xnj45_tigrfam.gff", + "md5_checksum": "52e64eec8c715affde1612b871e2490e", + "id": "nmdc:52e64eec8c715affde1612b871e2490e", + "file_size_bytes": 1446190 + }, + { + "name": "Gp0127626_SMART GFF file", + "description": "SMART GFF file for Gp0127626", + "url": "https://data.microbiomedata.org/data/nmdc:mga04xnj45/annotation/nmdc_mga04xnj45_smart.gff", + "md5_checksum": "4687e89ae41c98bc49ca81ded0b4c622", + "id": "nmdc:4687e89ae41c98bc49ca81ded0b4c622", + "file_size_bytes": 4252918 + }, + { + "name": "Gp0127626_SuperFam GFF file", + "description": "SuperFam GFF file for Gp0127626", + "url": "https://data.microbiomedata.org/data/nmdc:mga04xnj45/annotation/nmdc_mga04xnj45_supfam.gff", + "md5_checksum": "8cc7e6c8e232891c3ac7d952302905b6", + "id": "nmdc:8cc7e6c8e232891c3ac7d952302905b6", + "file_size_bytes": 24007157 + }, + { + "name": "Gp0127626_Cath FunFam GFF file", + "description": "Cath FunFam GFF file for Gp0127626", + "url": "https://data.microbiomedata.org/data/nmdc:mga04xnj45/annotation/nmdc_mga04xnj45_cath_funfam.gff", + "md5_checksum": "445ce659140104b37475c5c2e3fb7761", + "id": "nmdc:445ce659140104b37475c5c2e3fb7761", + "file_size_bytes": 17990080 + }, + { + "name": "Gp0127626_KO_EC GFF file", + "description": "KO_EC GFF file for Gp0127626", + "url": "https://data.microbiomedata.org/data/nmdc:mga04xnj45/annotation/nmdc_mga04xnj45_ko_ec.gff", + "md5_checksum": "32e15eb7eab763990dbb0ce947321718", + "id": "nmdc:32e15eb7eab763990dbb0ce947321718", + "file_size_bytes": 12235401 + } + ] + }, + { + "_id": { + "$oid": "649b0052ec087f6bbab34712" + }, + "has_input": [ + "nmdc:6d72d9fb6a282f8872cd3d5b8ce1a29d", + "nmdc:ef722a8ecd2b85d9202560df41eca7ed", + "nmdc:b2fdf525bc1ddadb30427cba91c63483" + ], + "too_short_contig_num": 102702, + "part_of": [ + "nmdc:mga04xnj45" + ], + "binned_contig_num": 230, + "git_url": "https://github.com/microbiomedata/metaG/releases/tag/0.1", + "has_output": [ + "nmdc:66dea8d60f61c7a150ae4cbc3ce88757", + "nmdc:1b0f148dc6a3a6a007482d1b03fe7e6a" + ], + "was_informed_by": "gold:Gp0127626", + "input_contig_num": 105397, + "id": "nmdc:0ef6e04f4cefbcd9e2d3d2a7717baa3e", + "execution_resource": "NERSC-Cori", + "name": "MAGs Analysis Activity for nmdc:mga04xnj45", + "mags_list": [ + { + "number_of_contig": 230, + "completeness": 81.4, + "bin_name": "bins.1", + "gene_count": 2055, + "bin_quality": "MQ", + "gtdbtk_species": "", + "gtdbtk_order": "Nitrososphaerales", + "num_16s": 0, + "gtdbtk_family": "Nitrososphaeraceae", + "gtdbtk_domain": "Archaea", + "contamination": 2.43, + "gtdbtk_class": "Nitrososphaeria", + "gtdbtk_phylum": "Crenarchaeota", + "num_5s": 1, + "num_23s": 0, + "gtdbtk_genus": "", + "num_t_rna": 37 + } + ], + "unbinned_contig_num": 2465, + "started_at_time": "2021-12-01T21:31:29Z", + "type": "nmdc:MAGsAnalysisActivity", + "ended_at_time": "2021-12-02T20:54:56+00:00", + "output_data_objects": [ + { + "name": "Gp0127626_metabat2 bin checkm quality assessment result", + "description": "metabat2 bin checkm quality assessment result for Gp0127626", + "data_object_type": "CheckM Statistics", + "url": "https://data.microbiomedata.org/data/nmdc:mga04xnj45/MAGs/nmdc_mga04xnj45_checkm_qa.out", + "md5_checksum": "66dea8d60f61c7a150ae4cbc3ce88757", + "id": "nmdc:66dea8d60f61c7a150ae4cbc3ce88757", + "file_size_bytes": 765 + }, + { + "name": "Gp0127626_high-quality and medium-quality bins", + "description": "high-quality and medium-quality bins for Gp0127626", + "data_object_type": "Metagenome Bins", + "url": "https://data.microbiomedata.org/data/nmdc:mga04xnj45/MAGs/nmdc_mga04xnj45_hqmq_bin.zip", + "md5_checksum": "1b0f148dc6a3a6a007482d1b03fe7e6a", + "id": "nmdc:1b0f148dc6a3a6a007482d1b03fe7e6a", + "file_size_bytes": 520239 + } + ] + } + ] + }, + { + "_id": { + "$oid": "649b009773e8249959349b3d" + }, + "id": "nmdc:omprc-11-x0es2p18", + "name": "Riverbed sediment microbial communities from areas with no vegetation in Columbia River, Washington, USA - GW-RW N3_10_20", + "description": "Riverbed sediment samples were collected from an area with no vegetation in a hyporheic zone (subsurface groundwater - surface water mixing zone)", + "has_input": [ + "nmdc:bsm-11-msqbhe76" + ], + "has_output": [ + "jgi:574fde577ded5e3df1ee13fc" + ], + "part_of": [ + "nmdc:sty-11-aygzgv51" + ], + "add_date": "2016-01-11", + "mod_date": "2021-06-15", + "ncbi_project_name": "Riverbed sediment microbial communities from areas with no vegetation in Columbia River, Washington, USA - GW-RW N3_10_20", + "omics_type": { + "has_raw_value": "Metagenome" + }, + "principal_investigator": { + "has_raw_value": "James Stegen" + }, + "processing_institution": "JGI", + "type": "nmdc:OmicsProcessing", + "gold_sequencing_project_identifiers": [ + "gold:Gp0127624" + ], + "downstream_workflow_activity_records": [ + { + "_id": { + "$oid": "649b009d6bdd4fd20273c85b" + }, + "has_input": [ + "nmdc:e24b00c4de7a24629f5933940070e06c" + ], + "part_of": [ + "nmdc:mga0e8jh10" + ], + "git_url": "https://github.com/microbiomedata/metaG/releases/tag/0.1", + "has_output": [ + "nmdc:8585f6896702bddf64b02191be5921f4", + "nmdc:b9b6464ecc746a4cc39b549696c5fe9c" + ], + "was_informed_by": "gold:Gp0127624", + "input_read_count": 25674112, + "output_read_bases": 3361311014, + "id": "nmdc:20efb77f7b08a72b0c887c059686b7cf", + "execution_resource": "NERSC-Cori", + "input_read_bases": 3876790912, + "name": "Read QC Activity for nmdc:mga0e8jh10", + "output_read_count": 22503352, + "started_at_time": "2021-10-11T02:23:30Z", + "type": "nmdc:ReadQCAnalysisActivity", + "ended_at_time": "2021-10-11T03:30:59+00:00", + "output_data_objects": [ + { + "name": "Gp0127624_Filtered Reads", + "description": "Filtered Reads for Gp0127624", + "data_object_type": "Filtered Sequencing Reads", + "url": "https://data.microbiomedata.org/data/nmdc:mga0e8jh10/qa/nmdc_mga0e8jh10_filtered.fastq.gz", + "md5_checksum": "8585f6896702bddf64b02191be5921f4", + "id": "nmdc:8585f6896702bddf64b02191be5921f4", + "file_size_bytes": 1795382596 + }, + { + "name": "Gp0127624_Filtered Stats", + "description": "Filtered Stats for Gp0127624", + "data_object_type": "QC Statistics", + "url": "https://data.microbiomedata.org/data/nmdc:mga0e8jh10/qa/nmdc_mga0e8jh10_filterStats.txt", + "md5_checksum": "b9b6464ecc746a4cc39b549696c5fe9c", + "id": "nmdc:b9b6464ecc746a4cc39b549696c5fe9c", + "file_size_bytes": 289 + } + ] + }, + { + "_id": { + "$oid": "649b009bff710ae353f8cf1c" + }, + "has_input": [ + "nmdc:8585f6896702bddf64b02191be5921f4" + ], + "git_url": "https://github.com/microbiomedata/metaG/releases/tag/0.1", + "has_output": [ + "nmdc:fef871a81032dd1f3e57dc1c7d5aa3db", + "nmdc:6c7fec765f2a225f168ebb1f69961013", + "nmdc:6e660d5a062f9c3ad7b49d8d438453d7", + "nmdc:77db34862804280185d3b1ce961e5338", + "nmdc:84e3efb84d961d189ece310911ccf475", + "nmdc:b8fd31679921f8b68c80917e14caa260", + "nmdc:715c66c69b621478da7d48481f3cbd1d", + "nmdc:0781e8042688219035efafe7d75858d0", + "nmdc:85547ab860ef9d6877ba7abc8881740a" + ], + "was_informed_by": "gold:Gp0127624", + "id": "nmdc:20efb77f7b08a72b0c887c059686b7cf", + "execution_resource": "NERSC-Cori", + "name": "ReadBased Analysis Activity for nmdc:mga0e8jh10", + "started_at_time": "2021-10-11T02:23:30Z", + "type": "nmdc:ReadbasedAnalysis", + "ended_at_time": "2021-10-11T03:30:59+00:00", + "output_data_objects": [ + { + "name": "Gp0127624_Gottcha2 TSV report", + "description": "Gottcha2 TSV report for Gp0127624", + "url": "https://data.microbiomedata.org/data/nmdc:mga0e8jh10/ReadbasedAnalysis/nmdc_mga0e8jh10_gottcha2_report.tsv", + "md5_checksum": "fef871a81032dd1f3e57dc1c7d5aa3db", + "id": "nmdc:fef871a81032dd1f3e57dc1c7d5aa3db", + "file_size_bytes": 1500 + }, + { + "name": "Gp0127624_Gottcha2 full TSV report", + "description": "Gottcha2 full TSV report for Gp0127624", + "url": "https://data.microbiomedata.org/data/nmdc:mga0e8jh10/ReadbasedAnalysis/nmdc_mga0e8jh10_gottcha2_report_full.tsv", + "md5_checksum": "6c7fec765f2a225f168ebb1f69961013", + "id": "nmdc:6c7fec765f2a225f168ebb1f69961013", + "file_size_bytes": 692993 + }, + { + "name": "Gp0127624_Gottcha2 Krona HTML report", + "description": "Gottcha2 Krona HTML report for Gp0127624", + "data_object_type": "GOTTCHA2 Krona Plot", + "url": "https://data.microbiomedata.org/data/nmdc:mga0e8jh10/ReadbasedAnalysis/nmdc_mga0e8jh10_gottcha2_krona.html", + "md5_checksum": "6e660d5a062f9c3ad7b49d8d438453d7", + "id": "nmdc:6e660d5a062f9c3ad7b49d8d438453d7", + "file_size_bytes": 230779 + }, + { + "name": "Gp0127624_Centrifuge classification TSV report", + "description": "Centrifuge classification TSV report for Gp0127624", + "data_object_type": "Centrifuge Taxonomic Classification", + "url": "https://data.microbiomedata.org/data/nmdc:mga0e8jh10/ReadbasedAnalysis/nmdc_mga0e8jh10_centrifuge_classification.tsv", + "md5_checksum": "77db34862804280185d3b1ce961e5338", + "id": "nmdc:77db34862804280185d3b1ce961e5338", + "file_size_bytes": 1645928829 + }, + { + "name": "Gp0127624_Centrifuge TSV report", + "description": "Centrifuge TSV report for Gp0127624", + "data_object_type": "Centrifuge Classification Report", + "url": "https://data.microbiomedata.org/data/nmdc:mga0e8jh10/ReadbasedAnalysis/nmdc_mga0e8jh10_centrifuge_report.tsv", + "md5_checksum": "84e3efb84d961d189ece310911ccf475", + "id": "nmdc:84e3efb84d961d189ece310911ccf475", + "file_size_bytes": 254646 + }, + { + "name": "Gp0127624_Centrifuge Krona HTML report", + "description": "Centrifuge Krona HTML report for Gp0127624", + "data_object_type": "Centrifuge Krona Plot", + "url": "https://data.microbiomedata.org/data/nmdc:mga0e8jh10/ReadbasedAnalysis/nmdc_mga0e8jh10_centrifuge_krona.html", + "md5_checksum": "b8fd31679921f8b68c80917e14caa260", + "id": "nmdc:b8fd31679921f8b68c80917e14caa260", + "file_size_bytes": 2332082 + }, + { + "name": "Gp0127624_Kraken classification TSV report", + "description": "Kraken classification TSV report for Gp0127624", + "data_object_type": "Kraken2 Taxonomic Classification", + "url": "https://data.microbiomedata.org/data/nmdc:mga0e8jh10/ReadbasedAnalysis/nmdc_mga0e8jh10_kraken2_classification.tsv", + "md5_checksum": "715c66c69b621478da7d48481f3cbd1d", + "id": "nmdc:715c66c69b621478da7d48481f3cbd1d", + "file_size_bytes": 1316771556 + }, + { + "name": "Gp0127624_Kraken2 TSV report", + "description": "Kraken2 TSV report for Gp0127624", + "data_object_type": "Kraken2 Classification Report", + "url": "https://data.microbiomedata.org/data/nmdc:mga0e8jh10/ReadbasedAnalysis/nmdc_mga0e8jh10_kraken2_report.tsv", + "md5_checksum": "0781e8042688219035efafe7d75858d0", + "id": "nmdc:0781e8042688219035efafe7d75858d0", + "file_size_bytes": 626940 + }, + { + "name": "Gp0127624_Kraken2 Krona HTML report", + "description": "Kraken2 Krona HTML report for Gp0127624", + "data_object_type": "Kraken2 Krona Plot", + "url": "https://data.microbiomedata.org/data/nmdc:mga0e8jh10/ReadbasedAnalysis/nmdc_mga0e8jh10_kraken2_krona.html", + "md5_checksum": "85547ab860ef9d6877ba7abc8881740a", + "id": "nmdc:85547ab860ef9d6877ba7abc8881740a", + "file_size_bytes": 3921891 + } + ] + }, + { + "_id": { + "$oid": "61e7191b833bcf838a6ff905" + }, + "has_input": [ + "nmdc:8585f6896702bddf64b02191be5921f4" + ], + "part_of": [ + "nmdc:mga0e8jh10" + ], + "git_url": "https://github.com/microbiomedata/metaG/releases/tag/0.1", + "has_output": [ + "nmdc:fef871a81032dd1f3e57dc1c7d5aa3db", + "nmdc:6c7fec765f2a225f168ebb1f69961013", + "nmdc:6e660d5a062f9c3ad7b49d8d438453d7", + "nmdc:77db34862804280185d3b1ce961e5338", + "nmdc:84e3efb84d961d189ece310911ccf475", + "nmdc:b8fd31679921f8b68c80917e14caa260", + "nmdc:715c66c69b621478da7d48481f3cbd1d", + "nmdc:0781e8042688219035efafe7d75858d0", + "nmdc:85547ab860ef9d6877ba7abc8881740a" + ], + "was_informed_by": "gold:Gp0127624", + "id": "nmdc:20efb77f7b08a72b0c887c059686b7cf", + "execution_resource": "NERSC-Cori", + "name": "ReadBased Analysis Activity for nmdc:mga0e8jh10", + "started_at_time": "2021-10-11T02:23:30Z", + "type": "nmdc:ReadbasedAnalysis", + "ended_at_time": "2021-10-11T03:30:59+00:00", + "output_data_objects": [ + { + "name": "Gp0127624_Gottcha2 TSV report", + "description": "Gottcha2 TSV report for Gp0127624", + "url": "https://data.microbiomedata.org/data/nmdc:mga0e8jh10/ReadbasedAnalysis/nmdc_mga0e8jh10_gottcha2_report.tsv", + "md5_checksum": "fef871a81032dd1f3e57dc1c7d5aa3db", + "id": "nmdc:fef871a81032dd1f3e57dc1c7d5aa3db", + "file_size_bytes": 1500 + }, + { + "name": "Gp0127624_Gottcha2 full TSV report", + "description": "Gottcha2 full TSV report for Gp0127624", + "url": "https://data.microbiomedata.org/data/nmdc:mga0e8jh10/ReadbasedAnalysis/nmdc_mga0e8jh10_gottcha2_report_full.tsv", + "md5_checksum": "6c7fec765f2a225f168ebb1f69961013", + "id": "nmdc:6c7fec765f2a225f168ebb1f69961013", + "file_size_bytes": 692993 + }, + { + "name": "Gp0127624_Gottcha2 Krona HTML report", + "description": "Gottcha2 Krona HTML report for Gp0127624", + "data_object_type": "GOTTCHA2 Krona Plot", + "url": "https://data.microbiomedata.org/data/nmdc:mga0e8jh10/ReadbasedAnalysis/nmdc_mga0e8jh10_gottcha2_krona.html", + "md5_checksum": "6e660d5a062f9c3ad7b49d8d438453d7", + "id": "nmdc:6e660d5a062f9c3ad7b49d8d438453d7", + "file_size_bytes": 230779 + }, + { + "name": "Gp0127624_Centrifuge classification TSV report", + "description": "Centrifuge classification TSV report for Gp0127624", + "data_object_type": "Centrifuge Taxonomic Classification", + "url": "https://data.microbiomedata.org/data/nmdc:mga0e8jh10/ReadbasedAnalysis/nmdc_mga0e8jh10_centrifuge_classification.tsv", + "md5_checksum": "77db34862804280185d3b1ce961e5338", + "id": "nmdc:77db34862804280185d3b1ce961e5338", + "file_size_bytes": 1645928829 + }, + { + "name": "Gp0127624_Centrifuge TSV report", + "description": "Centrifuge TSV report for Gp0127624", + "data_object_type": "Centrifuge Classification Report", + "url": "https://data.microbiomedata.org/data/nmdc:mga0e8jh10/ReadbasedAnalysis/nmdc_mga0e8jh10_centrifuge_report.tsv", + "md5_checksum": "84e3efb84d961d189ece310911ccf475", + "id": "nmdc:84e3efb84d961d189ece310911ccf475", + "file_size_bytes": 254646 + }, + { + "name": "Gp0127624_Centrifuge Krona HTML report", + "description": "Centrifuge Krona HTML report for Gp0127624", + "data_object_type": "Centrifuge Krona Plot", + "url": "https://data.microbiomedata.org/data/nmdc:mga0e8jh10/ReadbasedAnalysis/nmdc_mga0e8jh10_centrifuge_krona.html", + "md5_checksum": "b8fd31679921f8b68c80917e14caa260", + "id": "nmdc:b8fd31679921f8b68c80917e14caa260", + "file_size_bytes": 2332082 + }, + { + "name": "Gp0127624_Kraken classification TSV report", + "description": "Kraken classification TSV report for Gp0127624", + "data_object_type": "Kraken2 Taxonomic Classification", + "url": "https://data.microbiomedata.org/data/nmdc:mga0e8jh10/ReadbasedAnalysis/nmdc_mga0e8jh10_kraken2_classification.tsv", + "md5_checksum": "715c66c69b621478da7d48481f3cbd1d", + "id": "nmdc:715c66c69b621478da7d48481f3cbd1d", + "file_size_bytes": 1316771556 + }, + { + "name": "Gp0127624_Kraken2 TSV report", + "description": "Kraken2 TSV report for Gp0127624", + "data_object_type": "Kraken2 Classification Report", + "url": "https://data.microbiomedata.org/data/nmdc:mga0e8jh10/ReadbasedAnalysis/nmdc_mga0e8jh10_kraken2_report.tsv", + "md5_checksum": "0781e8042688219035efafe7d75858d0", + "id": "nmdc:0781e8042688219035efafe7d75858d0", + "file_size_bytes": 626940 + }, + { + "name": "Gp0127624_Kraken2 Krona HTML report", + "description": "Kraken2 Krona HTML report for Gp0127624", + "data_object_type": "Kraken2 Krona Plot", + "url": "https://data.microbiomedata.org/data/nmdc:mga0e8jh10/ReadbasedAnalysis/nmdc_mga0e8jh10_kraken2_krona.html", + "md5_checksum": "85547ab860ef9d6877ba7abc8881740a", + "id": "nmdc:85547ab860ef9d6877ba7abc8881740a", + "file_size_bytes": 3921891 + } + ] + }, + { + "_id": { + "$oid": "649b005f2ca5ee4adb139f8d" + }, + "has_input": [ + "nmdc:8585f6896702bddf64b02191be5921f4" + ], + "part_of": [ + "nmdc:mga0e8jh10" + ], + "ctg_logsum": 174168, + "scaf_logsum": 174680, + "gap_pct": 0.0009, + "git_url": "https://github.com/microbiomedata/metaG/releases/tag/0.1", + "has_output": [ + "nmdc:464a9db7a94e7e0646b1ff8b501d82f3", + "nmdc:0a50f88775f36e9238152f3319252853", + "nmdc:f0dc2f598fa06efbe99843bddaf54f60", + "nmdc:a4405d49e8efe2ee124d25e2414de56c", + "nmdc:8c37ab0b3594cc975348041e4841f6ac" + ], + "asm_score": 5.95, + "was_informed_by": "gold:Gp0127624", + "ctg_powsum": 19404, + "scaf_max": 33408, + "id": "nmdc:20efb77f7b08a72b0c887c059686b7cf", + "scaf_powsum": 19462, + "execution_resource": "NERSC-Cori", + "contigs": 191010, + "name": "Assembly Activity for nmdc:mga0e8jh10", + "ctg_max": 33408, + "gc_std": 0.09154, + "contig_bp": 88102698, + "gc_avg": 0.62452, + "started_at_time": "2021-10-11T02:23:30Z", + "scaf_bp": 88103488, + "type": "nmdc:MetagenomeAssembly", + "scaffolds": 190940, + "ended_at_time": "2021-10-11T03:30:59+00:00", + "ctg_l50": 434, + "ctg_l90": 288, + "ctg_n50": 56361, + "ctg_n90": 162547, + "scaf_l50": 434, + "scaf_l90": 288, + "scaf_n50": 56334, + "scaf_n90": 162481, + "output_data_objects": [ + { + "name": "Gp0127624_Assembled contigs fasta", + "description": "Assembled contigs fasta for Gp0127624", + "data_object_type": "Assembly Contigs", + "url": "https://data.microbiomedata.org/data/nmdc:mga0e8jh10/assembly/nmdc_mga0e8jh10_contigs.fna", + "md5_checksum": "464a9db7a94e7e0646b1ff8b501d82f3", + "id": "nmdc:464a9db7a94e7e0646b1ff8b501d82f3", + "file_size_bytes": 95468011 + }, + { + "name": "Gp0127624_Assembled scaffold fasta", + "description": "Assembled scaffold fasta for Gp0127624", + "data_object_type": "Assembly Scaffolds", + "url": "https://data.microbiomedata.org/data/nmdc:mga0e8jh10/assembly/nmdc_mga0e8jh10_scaffolds.fna", + "md5_checksum": "0a50f88775f36e9238152f3319252853", + "id": "nmdc:0a50f88775f36e9238152f3319252853", + "file_size_bytes": 94893921 + }, + { + "name": "Gp0127624_Metagenome Contig Coverage Stats", + "description": "Metagenome Contig Coverage Stats for Gp0127624", + "url": "https://data.microbiomedata.org/data/nmdc:mga0e8jh10/assembly/nmdc_mga0e8jh10_covstats.txt", + "md5_checksum": "f0dc2f598fa06efbe99843bddaf54f60", + "id": "nmdc:f0dc2f598fa06efbe99843bddaf54f60", + "file_size_bytes": 15112642 + }, + { + "name": "Gp0127624_Assembled AGP file", + "description": "Assembled AGP file for Gp0127624", + "data_object_type": "Assembly AGP", + "url": "https://data.microbiomedata.org/data/nmdc:mga0e8jh10/assembly/nmdc_mga0e8jh10_assembly.agp", + "md5_checksum": "a4405d49e8efe2ee124d25e2414de56c", + "id": "nmdc:a4405d49e8efe2ee124d25e2414de56c", + "file_size_bytes": 14126849 + }, + { + "name": "Gp0127624_Metagenome Alignment BAM file", + "description": "Metagenome Alignment BAM file for Gp0127624", + "data_object_type": "Assembly Coverage BAM", + "url": "https://data.microbiomedata.org/data/nmdc:mga0e8jh10/assembly/nmdc_mga0e8jh10_pairedMapped_sorted.bam", + "md5_checksum": "8c37ab0b3594cc975348041e4841f6ac", + "id": "nmdc:8c37ab0b3594cc975348041e4841f6ac", + "file_size_bytes": 1976821836 + } + ] + }, + { + "_id": { + "$oid": "649b005bbf2caae0415ef9a2" + }, + "has_input": [ + "nmdc:464a9db7a94e7e0646b1ff8b501d82f3" + ], + "part_of": [ + "nmdc:mga0e8jh10" + ], + "git_url": "https://github.com/microbiomedata/metaG/releases/tag/0.1", + "has_output": [ + "nmdc:40d15cb24063dbb6097fd1626f62db95", + "nmdc:f70325438abce4c6f56e6c82619dd44a", + "nmdc:c5cf33c1f2f68a7c63fef6dd623a97c0", + "nmdc:4aca66fe81c8c056fa5617c7aa77bc7d", + "nmdc:303a5e88a0eae8942082e9e13f9f6eba", + "nmdc:d919f65e54a8351324e332a5daa6a831", + "nmdc:764c7c2b5554fc6b860b036cab22e0ef", + "nmdc:d0a86560767836f901bdd2625bea46e3", + "nmdc:2f64111072a2b19a726aed9c9f54bba7", + "nmdc:51a011777869ff58b977991f5c90fc47", + "nmdc:53f57253df5119d338b9813aa81c7c9b", + "nmdc:c4aa03608fa7442a05cd23fdcc29bc21" + ], + "was_informed_by": "gold:Gp0127624", + "id": "nmdc:20efb77f7b08a72b0c887c059686b7cf", + "execution_resource": "NERSC-Cori", + "name": "Annotation Activity for nmdc:mga0e8jh10", + "started_at_time": "2021-10-11T02:23:30Z", + "type": "nmdc:MetagenomeAnnotationActivity", + "ended_at_time": "2021-10-11T03:30:59+00:00", + "output_data_objects": [ + { + "name": "Gp0127624_Protein FAA", + "description": "Protein FAA for Gp0127624", + "data_object_type": "Annotation Amino Acid FASTA", + "url": "https://data.microbiomedata.org/data/nmdc:mga0e8jh10/annotation/nmdc_mga0e8jh10_proteins.faa", + "md5_checksum": "40d15cb24063dbb6097fd1626f62db95", + "id": "nmdc:40d15cb24063dbb6097fd1626f62db95", + "file_size_bytes": 55458746 + }, + { + "name": "Gp0127624_Structural annotation GFF file", + "description": "Structural annotation GFF file for Gp0127624", + "data_object_type": "Structural Annotation GFF", + "url": "https://data.microbiomedata.org/data/nmdc:mga0e8jh10/annotation/nmdc_mga0e8jh10_structural_annotation.gff", + "md5_checksum": "f70325438abce4c6f56e6c82619dd44a", + "id": "nmdc:f70325438abce4c6f56e6c82619dd44a", + "file_size_bytes": 2518 + }, + { + "name": "Gp0127624_Functional annotation GFF file", + "description": "Functional annotation GFF file for Gp0127624", + "data_object_type": "Functional Annotation GFF", + "url": "https://data.microbiomedata.org/data/nmdc:mga0e8jh10/annotation/nmdc_mga0e8jh10_functional_annotation.gff", + "md5_checksum": "c5cf33c1f2f68a7c63fef6dd623a97c0", + "id": "nmdc:c5cf33c1f2f68a7c63fef6dd623a97c0", + "file_size_bytes": 63778960 + }, + { + "name": "Gp0127624_KO TSV file", + "description": "KO TSV file for Gp0127624", + "data_object_type": "Annotation KEGG Orthology", + "url": "https://data.microbiomedata.org/data/nmdc:mga0e8jh10/annotation/nmdc_mga0e8jh10_ko.tsv", + "md5_checksum": "4aca66fe81c8c056fa5617c7aa77bc7d", + "id": "nmdc:4aca66fe81c8c056fa5617c7aa77bc7d", + "file_size_bytes": 7252005 + }, + { + "name": "Gp0127624_EC TSV file", + "description": "EC TSV file for Gp0127624", + "data_object_type": "Annotation Enzyme Commission", + "url": "https://data.microbiomedata.org/data/nmdc:mga0e8jh10/annotation/nmdc_mga0e8jh10_ec.tsv", + "md5_checksum": "303a5e88a0eae8942082e9e13f9f6eba", + "id": "nmdc:303a5e88a0eae8942082e9e13f9f6eba", + "file_size_bytes": 4835920 + }, + { + "name": "Gp0127624_COG GFF file", + "description": "COG GFF file for Gp0127624", + "url": "https://data.microbiomedata.org/data/nmdc:mga0e8jh10/annotation/nmdc_mga0e8jh10_cog.gff", + "md5_checksum": "d919f65e54a8351324e332a5daa6a831", + "id": "nmdc:d919f65e54a8351324e332a5daa6a831", + "file_size_bytes": 37494199 + }, + { + "name": "Gp0127624_PFAM GFF file", + "description": "PFAM GFF file for Gp0127624", + "url": "https://data.microbiomedata.org/data/nmdc:mga0e8jh10/annotation/nmdc_mga0e8jh10_pfam.gff", + "md5_checksum": "764c7c2b5554fc6b860b036cab22e0ef", + "id": "nmdc:764c7c2b5554fc6b860b036cab22e0ef", + "file_size_bytes": 27739105 + }, + { + "name": "Gp0127624_TigrFam GFF file", + "description": "TigrFam GFF file for Gp0127624", + "url": "https://data.microbiomedata.org/data/nmdc:mga0e8jh10/annotation/nmdc_mga0e8jh10_tigrfam.gff", + "md5_checksum": "d0a86560767836f901bdd2625bea46e3", + "id": "nmdc:d0a86560767836f901bdd2625bea46e3", + "file_size_bytes": 3077428 + }, + { + "name": "Gp0127624_SMART GFF file", + "description": "SMART GFF file for Gp0127624", + "url": "https://data.microbiomedata.org/data/nmdc:mga0e8jh10/annotation/nmdc_mga0e8jh10_smart.gff", + "md5_checksum": "2f64111072a2b19a726aed9c9f54bba7", + "id": "nmdc:2f64111072a2b19a726aed9c9f54bba7", + "file_size_bytes": 8547849 + }, + { + "name": "Gp0127624_SuperFam GFF file", + "description": "SuperFam GFF file for Gp0127624", + "url": "https://data.microbiomedata.org/data/nmdc:mga0e8jh10/annotation/nmdc_mga0e8jh10_supfam.gff", + "md5_checksum": "51a011777869ff58b977991f5c90fc47", + "id": "nmdc:51a011777869ff58b977991f5c90fc47", + "file_size_bytes": 46844460 + }, + { + "name": "Gp0127624_Cath FunFam GFF file", + "description": "Cath FunFam GFF file for Gp0127624", + "url": "https://data.microbiomedata.org/data/nmdc:mga0e8jh10/annotation/nmdc_mga0e8jh10_cath_funfam.gff", + "md5_checksum": "53f57253df5119d338b9813aa81c7c9b", + "id": "nmdc:53f57253df5119d338b9813aa81c7c9b", + "file_size_bytes": 35558659 + }, + { + "name": "Gp0127624_KO_EC GFF file", + "description": "KO_EC GFF file for Gp0127624", + "url": "https://data.microbiomedata.org/data/nmdc:mga0e8jh10/annotation/nmdc_mga0e8jh10_ko_ec.gff", + "md5_checksum": "c4aa03608fa7442a05cd23fdcc29bc21", + "id": "nmdc:c4aa03608fa7442a05cd23fdcc29bc21", + "file_size_bytes": 23055213 + } + ] + }, + { + "_id": { + "$oid": "649b0052ec087f6bbab34701" + }, + "has_input": [ + "nmdc:464a9db7a94e7e0646b1ff8b501d82f3", + "nmdc:8c37ab0b3594cc975348041e4841f6ac", + "nmdc:c5cf33c1f2f68a7c63fef6dd623a97c0" + ], + "too_short_contig_num": 182057, + "part_of": [ + "nmdc:mga0e8jh10" + ], + "binned_contig_num": 364, + "git_url": "https://github.com/microbiomedata/metaG/releases/tag/0.1", + "has_output": [ + "nmdc:d41d8cd98f00b204e9800998ecf8427e", + "nmdc:73aca2cc587d8a632a730dcc6ff53d3b", + "nmdc:822be4fbeadb0c8c24f4a680d646b62f", + "nmdc:6b39bdb404c651428634ad28f8f15e2a", + "nmdc:0bd9d9e5f15087ccd35c38956bb3a210", + "nmdc:2d174febedeca0ce515939dd53d6ccb9" + ], + "was_informed_by": "gold:Gp0127624", + "input_contig_num": 191010, + "id": "nmdc:20efb77f7b08a72b0c887c059686b7cf", + "execution_resource": "NERSC-Cori", + "name": "MAGs Analysis Activity for nmdc:mga0e8jh10", + "mags_list": [ + { + "number_of_contig": 69, + "completeness": 11.21, + "bin_name": "bins.1", + "gene_count": 328, + "bin_quality": "LQ", + "gtdbtk_species": "", + "gtdbtk_order": "", + "num_16s": 0, + "gtdbtk_family": "", + "gtdbtk_domain": "", + "contamination": 0.0, + "gtdbtk_class": "", + "gtdbtk_phylum": "", + "num_5s": 0, + "num_23s": 0, + "gtdbtk_genus": "", + "num_t_rna": 8 + }, + { + "number_of_contig": 194, + "completeness": 75.24, + "bin_name": "bins.2", + "gene_count": 2023, + "bin_quality": "MQ", + "gtdbtk_species": "", + "gtdbtk_order": "Nitrososphaerales", + "num_16s": 0, + "gtdbtk_family": "Nitrososphaeraceae", + "gtdbtk_domain": "Archaea", + "contamination": 1.78, + "gtdbtk_class": "Nitrososphaeria", + "gtdbtk_phylum": "Crenarchaeota", + "num_5s": 1, + "num_23s": 0, + "gtdbtk_genus": "", + "num_t_rna": 35 + }, + { + "number_of_contig": 101, + "completeness": 19.54, + "bin_name": "bins.3", + "gene_count": 585, + "bin_quality": "LQ", + "gtdbtk_species": "", + "gtdbtk_order": "", + "num_16s": 0, + "gtdbtk_family": "", + "gtdbtk_domain": "", + "contamination": 0.0, + "gtdbtk_class": "", + "gtdbtk_phylum": "", + "num_5s": 0, + "num_23s": 0, + "gtdbtk_genus": "", + "num_t_rna": 10 + } + ], + "unbinned_contig_num": 8589, + "started_at_time": "2021-10-11T02:23:30Z", + "type": "nmdc:MAGsAnalysisActivity", + "ended_at_time": "2021-10-11T03:30:59+00:00", + "output_data_objects": [ + { + "name": "gold:Gp0452679_metabat2 bin checkm quality assessment result", + "description": "metabat2 bin checkm quality assessment result for gold:Gp0452679", + "data_object_type": "CheckM Statistics", + "url": "https://data.microbiomedata.org/data/nmdc:mga0fsjy84/MAGs/nmdc_mga0fsjy84_checkm_qa.out", + "md5_checksum": "d41d8cd98f00b204e9800998ecf8427e", + "id": "nmdc:d41d8cd98f00b204e9800998ecf8427e", + "file_size_bytes": 0 + }, + { + "name": "Gp0127624_tooShort (< 3kb) filtered contigs fasta file by metaBat2", + "description": "tooShort (< 3kb) filtered contigs fasta file by metaBat2 for Gp0127624", + "url": "https://data.microbiomedata.org/data/nmdc:mga0e8jh10/MAGs/nmdc_mga0e8jh10_bins.tooShort.fa", + "md5_checksum": "73aca2cc587d8a632a730dcc6ff53d3b", + "id": "nmdc:73aca2cc587d8a632a730dcc6ff53d3b", + "file_size_bytes": 79198373 + }, + { + "name": "Gp0127624_unbinned fasta file from metabat2", + "description": "unbinned fasta file from metabat2 for Gp0127624", + "url": "https://data.microbiomedata.org/data/nmdc:mga0e8jh10/MAGs/nmdc_mga0e8jh10_bins.unbinned.fa", + "md5_checksum": "822be4fbeadb0c8c24f4a680d646b62f", + "id": "nmdc:822be4fbeadb0c8c24f4a680d646b62f", + "file_size_bytes": 13854717 + }, + { + "name": "Gp0127624_metabat2 bin checkm quality assessment result", + "description": "metabat2 bin checkm quality assessment result for Gp0127624", + "data_object_type": "CheckM Statistics", + "url": "https://data.microbiomedata.org/data/nmdc:mga0e8jh10/MAGs/nmdc_mga0e8jh10_checkm_qa.out", + "md5_checksum": "6b39bdb404c651428634ad28f8f15e2a", + "id": "nmdc:6b39bdb404c651428634ad28f8f15e2a", + "file_size_bytes": 1106 + }, + { + "name": "Gp0127624_high-quality and medium-quality bins", + "description": "high-quality and medium-quality bins for Gp0127624", + "data_object_type": "Metagenome Bins", + "url": "https://data.microbiomedata.org/data/nmdc:mga0e8jh10/MAGs/nmdc_mga0e8jh10_hqmq_bin.zip", + "md5_checksum": "0bd9d9e5f15087ccd35c38956bb3a210", + "id": "nmdc:0bd9d9e5f15087ccd35c38956bb3a210", + "file_size_bytes": 507790 + }, + { + "name": "Gp0127624_metabat2 bins", + "description": "metabat2 bins for Gp0127624", + "url": "https://data.microbiomedata.org/data/nmdc:mga0e8jh10/MAGs/nmdc_mga0e8jh10_metabat_bin.zip", + "md5_checksum": "2d174febedeca0ce515939dd53d6ccb9", + "id": "nmdc:2d174febedeca0ce515939dd53d6ccb9", + "file_size_bytes": 230699 + } + ] + } + ] + }, + { + "_id": { + "$oid": "649b009773e8249959349b3e" + }, + "id": "nmdc:omprc-11-1nvcer55", + "name": "Riverbed sediment microbial communities from areas with vegetation nearby in Columbia River, Washington, USA - GW-RW S3_10_20", + "description": "Riverbed sediment samples were collected from an area with a lot of surrounding vegetation in a hyporheic zone (subsurface groundwater - surface water mixing zone)", + "has_input": [ + "nmdc:bsm-11-3sfanv57" + ], + "has_output": [ + "jgi:574fde587ded5e3df1ee13fd" + ], + "part_of": [ + "nmdc:sty-11-aygzgv51" + ], + "add_date": "2016-01-11", + "mod_date": "2021-06-15", + "ncbi_project_name": "Riverbed sediment microbial communities from areas with vegetation nearby in Columbia River, Washington, USA - GW-RW S3_10_20", + "omics_type": { + "has_raw_value": "Metagenome" + }, + "principal_investigator": { + "has_raw_value": "James Stegen" + }, + "processing_institution": "JGI", + "type": "nmdc:OmicsProcessing", + "gold_sequencing_project_identifiers": [ + "gold:Gp0127629" + ], + "downstream_workflow_activity_records": [ + { + "_id": { + "$oid": "649b009d6bdd4fd20273c869" + }, + "has_input": [ + "nmdc:22f8150866c51b35726066d2ec13c5ca" + ], + "part_of": [ + "nmdc:mga071r920" + ], + "git_url": "https://github.com/microbiomedata/metaG/releases/tag/0.1", + "has_output": [ + "nmdc:0db98173ae3395106e24d250b2655f06", + "nmdc:bc0874c01bbd31c644cd598e2fdad3c4" + ], + "was_informed_by": "gold:Gp0127629", + "input_read_count": 23886420, + "output_read_bases": 3395256515, + "id": "nmdc:b82754c2c692809f9e59ff9824278c32", + "execution_resource": "NERSC-Cori", + "input_read_bases": 3606849420, + "name": "Read QC Activity for nmdc:mga071r920", + "output_read_count": 22738452, + "started_at_time": "2021-10-11T02:23:35Z", + "type": "nmdc:ReadQCAnalysisActivity", + "ended_at_time": "2021-10-11T03:33:33+00:00", + "output_data_objects": [ + { + "name": "Gp0127629_Filtered Reads", + "description": "Filtered Reads for Gp0127629", + "data_object_type": "Filtered Sequencing Reads", + "url": "https://data.microbiomedata.org/data/nmdc:mga071r920/qa/nmdc_mga071r920_filtered.fastq.gz", + "md5_checksum": "0db98173ae3395106e24d250b2655f06", + "id": "nmdc:0db98173ae3395106e24d250b2655f06", + "file_size_bytes": 1807840952 + }, + { + "name": "Gp0127629_Filtered Stats", + "description": "Filtered Stats for Gp0127629", + "data_object_type": "QC Statistics", + "url": "https://data.microbiomedata.org/data/nmdc:mga071r920/qa/nmdc_mga071r920_filterStats.txt", + "md5_checksum": "bc0874c01bbd31c644cd598e2fdad3c4", + "id": "nmdc:bc0874c01bbd31c644cd598e2fdad3c4", + "file_size_bytes": 284 + } + ] + }, + { + "_id": { + "$oid": "649b009bff710ae353f8cf2b" + }, + "has_input": [ + "nmdc:0db98173ae3395106e24d250b2655f06" + ], + "git_url": "https://github.com/microbiomedata/metaG/releases/tag/0.1", + "has_output": [ + "nmdc:f4f810491708ff25956cddd005cc9944", + "nmdc:67e3c200d3765733af33d1db1f4bf968", + "nmdc:26cd6390e8362da2ee1d7691360d2dfb", + "nmdc:80fe705d97ef4a0701b1320e9ba19a82", + "nmdc:6a216ec913587e26ddc036b703126d76", + "nmdc:ebed7286f886596764a66a0d1dac3e43", + "nmdc:80dd3584d257e8f84b59118ffd0d5e21", + "nmdc:61b5fe5664ca99f6354c7a5a0222678c", + "nmdc:81108175d5ef2ca158f516bfc75d3cd9" + ], + "was_informed_by": "gold:Gp0127629", + "id": "nmdc:b82754c2c692809f9e59ff9824278c32", + "execution_resource": "NERSC-Cori", + "name": "ReadBased Analysis Activity for nmdc:mga071r920", + "started_at_time": "2021-10-11T02:23:35Z", + "type": "nmdc:ReadbasedAnalysis", + "ended_at_time": "2021-10-11T03:33:33+00:00", + "output_data_objects": [ + { + "name": "Gp0127629_Gottcha2 TSV report", + "description": "Gottcha2 TSV report for Gp0127629", + "url": "https://data.microbiomedata.org/data/nmdc:mga071r920/ReadbasedAnalysis/nmdc_mga071r920_gottcha2_report.tsv", + "md5_checksum": "f4f810491708ff25956cddd005cc9944", + "id": "nmdc:f4f810491708ff25956cddd005cc9944", + "file_size_bytes": 1206 + }, + { + "name": "Gp0127629_Gottcha2 full TSV report", + "description": "Gottcha2 full TSV report for Gp0127629", + "url": "https://data.microbiomedata.org/data/nmdc:mga071r920/ReadbasedAnalysis/nmdc_mga071r920_gottcha2_report_full.tsv", + "md5_checksum": "67e3c200d3765733af33d1db1f4bf968", + "id": "nmdc:67e3c200d3765733af33d1db1f4bf968", + "file_size_bytes": 662074 + }, + { + "name": "Gp0127629_Gottcha2 Krona HTML report", + "description": "Gottcha2 Krona HTML report for Gp0127629", + "data_object_type": "GOTTCHA2 Krona Plot", + "url": "https://data.microbiomedata.org/data/nmdc:mga071r920/ReadbasedAnalysis/nmdc_mga071r920_gottcha2_krona.html", + "md5_checksum": "26cd6390e8362da2ee1d7691360d2dfb", + "id": "nmdc:26cd6390e8362da2ee1d7691360d2dfb", + "file_size_bytes": 229307 + }, + { + "name": "Gp0127629_Centrifuge classification TSV report", + "description": "Centrifuge classification TSV report for Gp0127629", + "data_object_type": "Centrifuge Taxonomic Classification", + "url": "https://data.microbiomedata.org/data/nmdc:mga071r920/ReadbasedAnalysis/nmdc_mga071r920_centrifuge_classification.tsv", + "md5_checksum": "80fe705d97ef4a0701b1320e9ba19a82", + "id": "nmdc:80fe705d97ef4a0701b1320e9ba19a82", + "file_size_bytes": 1667543500 + }, + { + "name": "Gp0127629_Centrifuge TSV report", + "description": "Centrifuge TSV report for Gp0127629", + "data_object_type": "Centrifuge Classification Report", + "url": "https://data.microbiomedata.org/data/nmdc:mga071r920/ReadbasedAnalysis/nmdc_mga071r920_centrifuge_report.tsv", + "md5_checksum": "6a216ec913587e26ddc036b703126d76", + "id": "nmdc:6a216ec913587e26ddc036b703126d76", + "file_size_bytes": 253079 + }, + { + "name": "Gp0127629_Centrifuge Krona HTML report", + "description": "Centrifuge Krona HTML report for Gp0127629", + "data_object_type": "Centrifuge Krona Plot", + "url": "https://data.microbiomedata.org/data/nmdc:mga071r920/ReadbasedAnalysis/nmdc_mga071r920_centrifuge_krona.html", + "md5_checksum": "ebed7286f886596764a66a0d1dac3e43", + "id": "nmdc:ebed7286f886596764a66a0d1dac3e43", + "file_size_bytes": 2326900 + }, + { + "name": "Gp0127629_Kraken classification TSV report", + "description": "Kraken classification TSV report for Gp0127629", + "data_object_type": "Kraken2 Taxonomic Classification", + "url": "https://data.microbiomedata.org/data/nmdc:mga071r920/ReadbasedAnalysis/nmdc_mga071r920_kraken2_classification.tsv", + "md5_checksum": "80dd3584d257e8f84b59118ffd0d5e21", + "id": "nmdc:80dd3584d257e8f84b59118ffd0d5e21", + "file_size_bytes": 1328025421 + }, + { + "name": "Gp0127629_Kraken2 TSV report", + "description": "Kraken2 TSV report for Gp0127629", + "data_object_type": "Kraken2 Classification Report", + "url": "https://data.microbiomedata.org/data/nmdc:mga071r920/ReadbasedAnalysis/nmdc_mga071r920_kraken2_report.tsv", + "md5_checksum": "61b5fe5664ca99f6354c7a5a0222678c", + "id": "nmdc:61b5fe5664ca99f6354c7a5a0222678c", + "file_size_bytes": 628969 + }, + { + "name": "Gp0127629_Kraken2 Krona HTML report", + "description": "Kraken2 Krona HTML report for Gp0127629", + "data_object_type": "Kraken2 Krona Plot", + "url": "https://data.microbiomedata.org/data/nmdc:mga071r920/ReadbasedAnalysis/nmdc_mga071r920_kraken2_krona.html", + "md5_checksum": "81108175d5ef2ca158f516bfc75d3cd9", + "id": "nmdc:81108175d5ef2ca158f516bfc75d3cd9", + "file_size_bytes": 3933712 + } + ] + }, + { + "_id": { + "$oid": "61e7195d833bcf838a70058b" + }, + "has_input": [ + "nmdc:0db98173ae3395106e24d250b2655f06" + ], + "part_of": [ + "nmdc:mga071r920" + ], + "git_url": "https://github.com/microbiomedata/metaG/releases/tag/0.1", + "has_output": [ + "nmdc:f4f810491708ff25956cddd005cc9944", + "nmdc:67e3c200d3765733af33d1db1f4bf968", + "nmdc:26cd6390e8362da2ee1d7691360d2dfb", + "nmdc:80fe705d97ef4a0701b1320e9ba19a82", + "nmdc:6a216ec913587e26ddc036b703126d76", + "nmdc:ebed7286f886596764a66a0d1dac3e43", + "nmdc:80dd3584d257e8f84b59118ffd0d5e21", + "nmdc:61b5fe5664ca99f6354c7a5a0222678c", + "nmdc:81108175d5ef2ca158f516bfc75d3cd9" + ], + "was_informed_by": "gold:Gp0127629", + "id": "nmdc:b82754c2c692809f9e59ff9824278c32", + "execution_resource": "NERSC-Cori", + "name": "ReadBased Analysis Activity for nmdc:mga071r920", + "started_at_time": "2021-10-11T02:23:35Z", + "type": "nmdc:ReadbasedAnalysis", + "ended_at_time": "2021-10-11T03:33:33+00:00", + "output_data_objects": [ + { + "name": "Gp0127629_Gottcha2 TSV report", + "description": "Gottcha2 TSV report for Gp0127629", + "url": "https://data.microbiomedata.org/data/nmdc:mga071r920/ReadbasedAnalysis/nmdc_mga071r920_gottcha2_report.tsv", + "md5_checksum": "f4f810491708ff25956cddd005cc9944", + "id": "nmdc:f4f810491708ff25956cddd005cc9944", + "file_size_bytes": 1206 + }, + { + "name": "Gp0127629_Gottcha2 full TSV report", + "description": "Gottcha2 full TSV report for Gp0127629", + "url": "https://data.microbiomedata.org/data/nmdc:mga071r920/ReadbasedAnalysis/nmdc_mga071r920_gottcha2_report_full.tsv", + "md5_checksum": "67e3c200d3765733af33d1db1f4bf968", + "id": "nmdc:67e3c200d3765733af33d1db1f4bf968", + "file_size_bytes": 662074 + }, + { + "name": "Gp0127629_Gottcha2 Krona HTML report", + "description": "Gottcha2 Krona HTML report for Gp0127629", + "data_object_type": "GOTTCHA2 Krona Plot", + "url": "https://data.microbiomedata.org/data/nmdc:mga071r920/ReadbasedAnalysis/nmdc_mga071r920_gottcha2_krona.html", + "md5_checksum": "26cd6390e8362da2ee1d7691360d2dfb", + "id": "nmdc:26cd6390e8362da2ee1d7691360d2dfb", + "file_size_bytes": 229307 + }, + { + "name": "Gp0127629_Centrifuge classification TSV report", + "description": "Centrifuge classification TSV report for Gp0127629", + "data_object_type": "Centrifuge Taxonomic Classification", + "url": "https://data.microbiomedata.org/data/nmdc:mga071r920/ReadbasedAnalysis/nmdc_mga071r920_centrifuge_classification.tsv", + "md5_checksum": "80fe705d97ef4a0701b1320e9ba19a82", + "id": "nmdc:80fe705d97ef4a0701b1320e9ba19a82", + "file_size_bytes": 1667543500 + }, + { + "name": "Gp0127629_Centrifuge TSV report", + "description": "Centrifuge TSV report for Gp0127629", + "data_object_type": "Centrifuge Classification Report", + "url": "https://data.microbiomedata.org/data/nmdc:mga071r920/ReadbasedAnalysis/nmdc_mga071r920_centrifuge_report.tsv", + "md5_checksum": "6a216ec913587e26ddc036b703126d76", + "id": "nmdc:6a216ec913587e26ddc036b703126d76", + "file_size_bytes": 253079 + }, + { + "name": "Gp0127629_Centrifuge Krona HTML report", + "description": "Centrifuge Krona HTML report for Gp0127629", + "data_object_type": "Centrifuge Krona Plot", + "url": "https://data.microbiomedata.org/data/nmdc:mga071r920/ReadbasedAnalysis/nmdc_mga071r920_centrifuge_krona.html", + "md5_checksum": "ebed7286f886596764a66a0d1dac3e43", + "id": "nmdc:ebed7286f886596764a66a0d1dac3e43", + "file_size_bytes": 2326900 + }, + { + "name": "Gp0127629_Kraken classification TSV report", + "description": "Kraken classification TSV report for Gp0127629", + "data_object_type": "Kraken2 Taxonomic Classification", + "url": "https://data.microbiomedata.org/data/nmdc:mga071r920/ReadbasedAnalysis/nmdc_mga071r920_kraken2_classification.tsv", + "md5_checksum": "80dd3584d257e8f84b59118ffd0d5e21", + "id": "nmdc:80dd3584d257e8f84b59118ffd0d5e21", + "file_size_bytes": 1328025421 + }, + { + "name": "Gp0127629_Kraken2 TSV report", + "description": "Kraken2 TSV report for Gp0127629", + "data_object_type": "Kraken2 Classification Report", + "url": "https://data.microbiomedata.org/data/nmdc:mga071r920/ReadbasedAnalysis/nmdc_mga071r920_kraken2_report.tsv", + "md5_checksum": "61b5fe5664ca99f6354c7a5a0222678c", + "id": "nmdc:61b5fe5664ca99f6354c7a5a0222678c", + "file_size_bytes": 628969 + }, + { + "name": "Gp0127629_Kraken2 Krona HTML report", + "description": "Kraken2 Krona HTML report for Gp0127629", + "data_object_type": "Kraken2 Krona Plot", + "url": "https://data.microbiomedata.org/data/nmdc:mga071r920/ReadbasedAnalysis/nmdc_mga071r920_kraken2_krona.html", + "md5_checksum": "81108175d5ef2ca158f516bfc75d3cd9", + "id": "nmdc:81108175d5ef2ca158f516bfc75d3cd9", + "file_size_bytes": 3933712 + } + ] + }, + { + "_id": { + "$oid": "649b005f2ca5ee4adb139f9a" + }, + "has_input": [ + "nmdc:0db98173ae3395106e24d250b2655f06" + ], + "part_of": [ + "nmdc:mga071r920" + ], + "ctg_logsum": 212258, + "scaf_logsum": 212917, + "gap_pct": 0.00151, + "git_url": "https://github.com/microbiomedata/metaG/releases/tag/0.1", + "has_output": [ + "nmdc:7badcefc26b24213b514cd4c3c9a87d7", + "nmdc:89dd3c10791083ae5a5b30c2154deabd", + "nmdc:5e503e3abe6eb9e94c34a55da5bbafdc", + "nmdc:0a1f96cd74ec9f1a6668924745689014", + "nmdc:1608f12840c36ac1d882cc6ef4f4627f" + ], + "asm_score": 3.305, + "was_informed_by": "gold:Gp0127629", + "ctg_powsum": 22751, + "scaf_max": 23996, + "id": "nmdc:b82754c2c692809f9e59ff9824278c32", + "scaf_powsum": 22826, + "execution_resource": "NERSC-Cori", + "contigs": 208553, + "name": "Assembly Activity for nmdc:mga071r920", + "ctg_max": 23996, + "gc_std": 0.1053, + "contig_bp": 101011771, + "gc_avg": 0.62056, + "started_at_time": "2021-10-11T02:23:35Z", + "scaf_bp": 101013301, + "type": "nmdc:MetagenomeAssembly", + "scaffolds": 208427, + "ended_at_time": "2021-10-11T03:33:33+00:00", + "ctg_l50": 478, + "ctg_l90": 290, + "ctg_n50": 59884, + "ctg_n90": 174522, + "scaf_l50": 478, + "scaf_l90": 290, + "scaf_n50": 59864, + "scaf_n90": 174416, + "output_data_objects": [ + { + "name": "Gp0127629_Assembled contigs fasta", + "description": "Assembled contigs fasta for Gp0127629", + "data_object_type": "Assembly Contigs", + "url": "https://data.microbiomedata.org/data/nmdc:mga071r920/assembly/nmdc_mga071r920_contigs.fna", + "md5_checksum": "7badcefc26b24213b514cd4c3c9a87d7", + "id": "nmdc:7badcefc26b24213b514cd4c3c9a87d7", + "file_size_bytes": 109144090 + }, + { + "name": "Gp0127629_Assembled scaffold fasta", + "description": "Assembled scaffold fasta for Gp0127629", + "data_object_type": "Assembly Scaffolds", + "url": "https://data.microbiomedata.org/data/nmdc:mga071r920/assembly/nmdc_mga071r920_scaffolds.fna", + "md5_checksum": "89dd3c10791083ae5a5b30c2154deabd", + "id": "nmdc:89dd3c10791083ae5a5b30c2154deabd", + "file_size_bytes": 108517023 + }, + { + "name": "Gp0127629_Metagenome Contig Coverage Stats", + "description": "Metagenome Contig Coverage Stats for Gp0127629", + "url": "https://data.microbiomedata.org/data/nmdc:mga071r920/assembly/nmdc_mga071r920_covstats.txt", + "md5_checksum": "5e503e3abe6eb9e94c34a55da5bbafdc", + "id": "nmdc:5e503e3abe6eb9e94c34a55da5bbafdc", + "file_size_bytes": 16536925 + }, + { + "name": "Gp0127629_Assembled AGP file", + "description": "Assembled AGP file for Gp0127629", + "data_object_type": "Assembly AGP", + "url": "https://data.microbiomedata.org/data/nmdc:mga071r920/assembly/nmdc_mga071r920_assembly.agp", + "md5_checksum": "0a1f96cd74ec9f1a6668924745689014", + "id": "nmdc:0a1f96cd74ec9f1a6668924745689014", + "file_size_bytes": 15454045 + }, + { + "name": "Gp0127629_Metagenome Alignment BAM file", + "description": "Metagenome Alignment BAM file for Gp0127629", + "data_object_type": "Assembly Coverage BAM", + "url": "https://data.microbiomedata.org/data/nmdc:mga071r920/assembly/nmdc_mga071r920_pairedMapped_sorted.bam", + "md5_checksum": "1608f12840c36ac1d882cc6ef4f4627f", + "id": "nmdc:1608f12840c36ac1d882cc6ef4f4627f", + "file_size_bytes": 2001264626 + } + ] + }, + { + "_id": { + "$oid": "649b005bbf2caae0415ef9af" + }, + "has_input": [ + "nmdc:7badcefc26b24213b514cd4c3c9a87d7" + ], + "part_of": [ + "nmdc:mga071r920" + ], + "git_url": "https://github.com/microbiomedata/metaG/releases/tag/0.1", + "has_output": [ + "nmdc:ba15f54043fad473edec771b60f5b040", + "nmdc:f6d684abab1c60b2b95ade84644e6a38", + "nmdc:496e0fa5ac1c04849338c972189ee3f6", + "nmdc:311ffbbfc80f28908615a1f18492ae5e", + "nmdc:7d90dec4cdc8c8e8a4bc8f7bddf2e0c2", + "nmdc:1116328ed7ba951246f0eec1d3f065b4", + "nmdc:325e47bc009aeba79fc767e3b6daeee2", + "nmdc:f820db8ce6a1ae7c3e8af40729f5b62b", + "nmdc:96ab6fa258a08490082b4f99269f3e8d", + "nmdc:a2b630c408bd557d693b147f95627fdc", + "nmdc:ba87cd24242288e0b6d8f32a2bcbbb80", + "nmdc:9c97bd7a5e4978e31ed1e5386c3619f3" + ], + "was_informed_by": "gold:Gp0127629", + "id": "nmdc:b82754c2c692809f9e59ff9824278c32", + "execution_resource": "NERSC-Cori", + "name": "Annotation Activity for nmdc:mga071r920", + "started_at_time": "2021-10-11T02:23:35Z", + "type": "nmdc:MetagenomeAnnotationActivity", + "ended_at_time": "2021-10-11T03:33:33+00:00", + "output_data_objects": [ + { + "name": "Gp0127629_Protein FAA", + "description": "Protein FAA for Gp0127629", + "data_object_type": "Annotation Amino Acid FASTA", + "url": "https://data.microbiomedata.org/data/nmdc:mga071r920/annotation/nmdc_mga071r920_proteins.faa", + "md5_checksum": "ba15f54043fad473edec771b60f5b040", + "id": "nmdc:ba15f54043fad473edec771b60f5b040", + "file_size_bytes": 62222526 + }, + { + "name": "Gp0127629_Structural annotation GFF file", + "description": "Structural annotation GFF file for Gp0127629", + "data_object_type": "Structural Annotation GFF", + "url": "https://data.microbiomedata.org/data/nmdc:mga071r920/annotation/nmdc_mga071r920_structural_annotation.gff", + "md5_checksum": "f6d684abab1c60b2b95ade84644e6a38", + "id": "nmdc:f6d684abab1c60b2b95ade84644e6a38", + "file_size_bytes": 2521 + }, + { + "name": "Gp0127629_Functional annotation GFF file", + "description": "Functional annotation GFF file for Gp0127629", + "data_object_type": "Functional Annotation GFF", + "url": "https://data.microbiomedata.org/data/nmdc:mga071r920/annotation/nmdc_mga071r920_functional_annotation.gff", + "md5_checksum": "496e0fa5ac1c04849338c972189ee3f6", + "id": "nmdc:496e0fa5ac1c04849338c972189ee3f6", + "file_size_bytes": 70803412 + }, + { + "name": "Gp0127629_KO TSV file", + "description": "KO TSV file for Gp0127629", + "data_object_type": "Annotation KEGG Orthology", + "url": "https://data.microbiomedata.org/data/nmdc:mga071r920/annotation/nmdc_mga071r920_ko.tsv", + "md5_checksum": "311ffbbfc80f28908615a1f18492ae5e", + "id": "nmdc:311ffbbfc80f28908615a1f18492ae5e", + "file_size_bytes": 8203743 + }, + { + "name": "Gp0127629_EC TSV file", + "description": "EC TSV file for Gp0127629", + "data_object_type": "Annotation Enzyme Commission", + "url": "https://data.microbiomedata.org/data/nmdc:mga071r920/annotation/nmdc_mga071r920_ec.tsv", + "md5_checksum": "7d90dec4cdc8c8e8a4bc8f7bddf2e0c2", + "id": "nmdc:7d90dec4cdc8c8e8a4bc8f7bddf2e0c2", + "file_size_bytes": 5508974 + }, + { + "name": "Gp0127629_COG GFF file", + "description": "COG GFF file for Gp0127629", + "url": "https://data.microbiomedata.org/data/nmdc:mga071r920/annotation/nmdc_mga071r920_cog.gff", + "md5_checksum": "1116328ed7ba951246f0eec1d3f065b4", + "id": "nmdc:1116328ed7ba951246f0eec1d3f065b4", + "file_size_bytes": 42250648 + }, + { + "name": "Gp0127629_PFAM GFF file", + "description": "PFAM GFF file for Gp0127629", + "url": "https://data.microbiomedata.org/data/nmdc:mga071r920/annotation/nmdc_mga071r920_pfam.gff", + "md5_checksum": "325e47bc009aeba79fc767e3b6daeee2", + "id": "nmdc:325e47bc009aeba79fc767e3b6daeee2", + "file_size_bytes": 31677996 + }, + { + "name": "Gp0127629_TigrFam GFF file", + "description": "TigrFam GFF file for Gp0127629", + "url": "https://data.microbiomedata.org/data/nmdc:mga071r920/annotation/nmdc_mga071r920_tigrfam.gff", + "md5_checksum": "f820db8ce6a1ae7c3e8af40729f5b62b", + "id": "nmdc:f820db8ce6a1ae7c3e8af40729f5b62b", + "file_size_bytes": 3472661 + }, + { + "name": "Gp0127629_SMART GFF file", + "description": "SMART GFF file for Gp0127629", + "url": "https://data.microbiomedata.org/data/nmdc:mga071r920/annotation/nmdc_mga071r920_smart.gff", + "md5_checksum": "96ab6fa258a08490082b4f99269f3e8d", + "id": "nmdc:96ab6fa258a08490082b4f99269f3e8d", + "file_size_bytes": 9149681 + }, + { + "name": "Gp0127629_SuperFam GFF file", + "description": "SuperFam GFF file for Gp0127629", + "url": "https://data.microbiomedata.org/data/nmdc:mga071r920/annotation/nmdc_mga071r920_supfam.gff", + "md5_checksum": "a2b630c408bd557d693b147f95627fdc", + "id": "nmdc:a2b630c408bd557d693b147f95627fdc", + "file_size_bytes": 52308332 + }, + { + "name": "Gp0127629_Cath FunFam GFF file", + "description": "Cath FunFam GFF file for Gp0127629", + "url": "https://data.microbiomedata.org/data/nmdc:mga071r920/annotation/nmdc_mga071r920_cath_funfam.gff", + "md5_checksum": "ba87cd24242288e0b6d8f32a2bcbbb80", + "id": "nmdc:ba87cd24242288e0b6d8f32a2bcbbb80", + "file_size_bytes": 39926818 + }, + { + "name": "Gp0127629_KO_EC GFF file", + "description": "KO_EC GFF file for Gp0127629", + "url": "https://data.microbiomedata.org/data/nmdc:mga071r920/annotation/nmdc_mga071r920_ko_ec.gff", + "md5_checksum": "9c97bd7a5e4978e31ed1e5386c3619f3", + "id": "nmdc:9c97bd7a5e4978e31ed1e5386c3619f3", + "file_size_bytes": 26101397 + } + ] + }, + { + "_id": { + "$oid": "649b0052ec087f6bbab3470c" + }, + "has_input": [ + "nmdc:7badcefc26b24213b514cd4c3c9a87d7", + "nmdc:1608f12840c36ac1d882cc6ef4f4627f", + "nmdc:496e0fa5ac1c04849338c972189ee3f6" + ], + "too_short_contig_num": 195955, + "part_of": [ + "nmdc:mga071r920" + ], + "binned_contig_num": 271, + "git_url": "https://github.com/microbiomedata/metaG/releases/tag/0.1", + "has_output": [ + "nmdc:d41d8cd98f00b204e9800998ecf8427e", + "nmdc:2bbd475ff6a15058b38244e71456024a", + "nmdc:70901a70c06fdcfc71efa2d004e210fd", + "nmdc:d52b4ae6b61161082fee7d42ecf5ee87", + "nmdc:58d9cd30ca53424cd0f1ce27d0a8a885", + "nmdc:8f4f5294de942734837fba3d68ffc6b4" + ], + "was_informed_by": "gold:Gp0127629", + "input_contig_num": 208551, + "id": "nmdc:b82754c2c692809f9e59ff9824278c32", + "execution_resource": "NERSC-Cori", + "name": "MAGs Analysis Activity for nmdc:mga071r920", + "mags_list": [ + { + "number_of_contig": 177, + "completeness": 9.71, + "bin_name": "bins.1", + "gene_count": 1122, + "bin_quality": "LQ", + "gtdbtk_species": "", + "gtdbtk_order": "", + "num_16s": 0, + "gtdbtk_family": "", + "gtdbtk_domain": "", + "contamination": 0.0, + "gtdbtk_class": "", + "gtdbtk_phylum": "", + "num_5s": 1, + "num_23s": 0, + "gtdbtk_genus": "", + "num_t_rna": 14 + }, + { + "number_of_contig": 94, + "completeness": 16.81, + "bin_name": "bins.2", + "gene_count": 465, + "bin_quality": "LQ", + "gtdbtk_species": "", + "gtdbtk_order": "", + "num_16s": 0, + "gtdbtk_family": "", + "gtdbtk_domain": "", + "contamination": 0.34, + "gtdbtk_class": "", + "gtdbtk_phylum": "", + "num_5s": 0, + "num_23s": 0, + "gtdbtk_genus": "", + "num_t_rna": 5 + } + ], + "unbinned_contig_num": 12325, + "started_at_time": "2021-10-11T02:23:35Z", + "type": "nmdc:MAGsAnalysisActivity", + "ended_at_time": "2021-10-11T03:33:33+00:00", + "output_data_objects": [ + { + "name": "gold:Gp0452679_metabat2 bin checkm quality assessment result", + "description": "metabat2 bin checkm quality assessment result for gold:Gp0452679", + "data_object_type": "CheckM Statistics", + "url": "https://data.microbiomedata.org/data/nmdc:mga0fsjy84/MAGs/nmdc_mga0fsjy84_checkm_qa.out", + "md5_checksum": "d41d8cd98f00b204e9800998ecf8427e", + "id": "nmdc:d41d8cd98f00b204e9800998ecf8427e", + "file_size_bytes": 0 + }, + { + "name": "Gp0127629_tooShort (< 3kb) filtered contigs fasta file by metaBat2", + "description": "tooShort (< 3kb) filtered contigs fasta file by metaBat2 for Gp0127629", + "url": "https://data.microbiomedata.org/data/nmdc:mga071r920/MAGs/nmdc_mga071r920_bins.tooShort.fa", + "md5_checksum": "2bbd475ff6a15058b38244e71456024a", + "id": "nmdc:2bbd475ff6a15058b38244e71456024a", + "file_size_bytes": 88674437 + }, + { + "name": "Gp0127629_unbinned fasta file from metabat2", + "description": "unbinned fasta file from metabat2 for Gp0127629", + "url": "https://data.microbiomedata.org/data/nmdc:mga071r920/MAGs/nmdc_mga071r920_bins.unbinned.fa", + "md5_checksum": "70901a70c06fdcfc71efa2d004e210fd", + "id": "nmdc:70901a70c06fdcfc71efa2d004e210fd", + "file_size_bytes": 19226945 + }, + { + "name": "Gp0127629_metabat2 bin checkm quality assessment result", + "description": "metabat2 bin checkm quality assessment result for Gp0127629", + "data_object_type": "CheckM Statistics", + "url": "https://data.microbiomedata.org/data/nmdc:mga071r920/MAGs/nmdc_mga071r920_checkm_qa.out", + "md5_checksum": "d52b4ae6b61161082fee7d42ecf5ee87", + "id": "nmdc:d52b4ae6b61161082fee7d42ecf5ee87", + "file_size_bytes": 978 + }, + { + "name": "Gp0127629_high-quality and medium-quality bins", + "description": "high-quality and medium-quality bins for Gp0127629", + "data_object_type": "Metagenome Bins", + "url": "https://data.microbiomedata.org/data/nmdc:mga071r920/MAGs/nmdc_mga071r920_hqmq_bin.zip", + "md5_checksum": "58d9cd30ca53424cd0f1ce27d0a8a885", + "id": "nmdc:58d9cd30ca53424cd0f1ce27d0a8a885", + "file_size_bytes": 182 + }, + { + "name": "Gp0127629_metabat2 bins", + "description": "metabat2 bins for Gp0127629", + "url": "https://data.microbiomedata.org/data/nmdc:mga071r920/MAGs/nmdc_mga071r920_metabat_bin.zip", + "md5_checksum": "8f4f5294de942734837fba3d68ffc6b4", + "id": "nmdc:8f4f5294de942734837fba3d68ffc6b4", + "file_size_bytes": 377953 + } + ] + } + ] + }, + { + "_id": { + "$oid": "649b009773e8249959349b3f" + }, + "id": "nmdc:omprc-11-b051xn44", + "name": "Riverbed sediment microbial communities from areas with vegetation nearby in Columbia River, Washington, USA - GW-RW S3_0_10", + "description": "Riverbed sediment samples were collected from an area with a lot of surrounding vegetation in a hyporheic zone (subsurface groundwater - surface water mixing zone)", + "has_input": [ + "nmdc:bsm-11-jdsasr43" + ], + "has_output": [ + "jgi:574fe09a7ded5e3df1ee1485" + ], + "part_of": [ + "nmdc:sty-11-aygzgv51" + ], + "add_date": "2016-01-11", + "mod_date": "2021-06-15", + "ncbi_project_name": "Riverbed sediment microbial communities from areas with vegetation nearby in Columbia River, Washington, USA - GW-RW S3_0_10", + "omics_type": { + "has_raw_value": "Metagenome" + }, + "principal_investigator": { + "has_raw_value": "James Stegen" + }, + "processing_institution": "JGI", + "type": "nmdc:OmicsProcessing", + "gold_sequencing_project_identifiers": [ + "gold:Gp0127628" + ], + "downstream_workflow_activity_records": [ + { + "_id": { + "$oid": "649b009d6bdd4fd20273c863" + }, + "has_input": [ + "nmdc:efca984ecf94cc8de2aeabf94e0b87cc" + ], + "part_of": [ + "nmdc:mga0x5c381" + ], + "git_url": "https://github.com/microbiomedata/metaG/releases/tag/0.1", + "has_output": [ + "nmdc:f6f1760721d73fc57919b2115a1d47ec", + "nmdc:2225f9d41343590d818186fa2d66852d" + ], + "was_informed_by": "gold:Gp0127628", + "input_read_count": 31715882, + "output_read_bases": 4516265181, + "id": "nmdc:a634b0691cf34899d9b09bc4573d4c36", + "execution_resource": "NERSC-Cori", + "input_read_bases": 4789098182, + "name": "Read QC Activity for nmdc:mga0x5c381", + "output_read_count": 30212248, + "started_at_time": "2021-10-11T02:25:13Z", + "type": "nmdc:ReadQCAnalysisActivity", + "ended_at_time": "2021-10-11T04:45:59+00:00", + "output_data_objects": [ + { + "name": "Gp0127628_Filtered Reads", + "description": "Filtered Reads for Gp0127628", + "data_object_type": "Filtered Sequencing Reads", + "url": "https://data.microbiomedata.org/data/nmdc:mga0x5c381/qa/nmdc_mga0x5c381_filtered.fastq.gz", + "md5_checksum": "f6f1760721d73fc57919b2115a1d47ec", + "id": "nmdc:f6f1760721d73fc57919b2115a1d47ec", + "file_size_bytes": 2548975208 + }, + { + "name": "Gp0127628_Filtered Stats", + "description": "Filtered Stats for Gp0127628", + "data_object_type": "QC Statistics", + "url": "https://data.microbiomedata.org/data/nmdc:mga0x5c381/qa/nmdc_mga0x5c381_filterStats.txt", + "md5_checksum": "2225f9d41343590d818186fa2d66852d", + "id": "nmdc:2225f9d41343590d818186fa2d66852d", + "file_size_bytes": 291 + } + ] + }, + { + "_id": { + "$oid": "649b009bff710ae353f8cf24" + }, + "has_input": [ + "nmdc:f6f1760721d73fc57919b2115a1d47ec" + ], + "git_url": "https://github.com/microbiomedata/metaG/releases/tag/0.1", + "has_output": [ + "nmdc:a6ed9af48a9ad473ab66721829a5c226", + "nmdc:335dbf6f1055de0950988a002f432c0b", + "nmdc:35da19bc0e50db1f9a02fe1550d1df0e", + "nmdc:224085164a389c6f207967ed03b3e6af", + "nmdc:39ba17263c144761a8bdcc1645c034f5", + "nmdc:84debc9bd1c09328d60f073d7fc2db4f", + "nmdc:8f75800abbcf5a94043ad677d7cb975c", + "nmdc:aae9e961d8ed716457616c8a8841037b", + "nmdc:ba83d6ab837403f4bcbc9400a0460457" + ], + "was_informed_by": "gold:Gp0127628", + "id": "nmdc:a634b0691cf34899d9b09bc4573d4c36", + "execution_resource": "NERSC-Cori", + "name": "ReadBased Analysis Activity for nmdc:mga0x5c381", + "started_at_time": "2021-10-11T02:25:13Z", + "type": "nmdc:ReadbasedAnalysis", + "ended_at_time": "2021-10-11T04:45:59+00:00", + "output_data_objects": [ + { + "name": "Gp0127628_Gottcha2 TSV report", + "description": "Gottcha2 TSV report for Gp0127628", + "url": "https://data.microbiomedata.org/data/nmdc:mga0x5c381/ReadbasedAnalysis/nmdc_mga0x5c381_gottcha2_report.tsv", + "md5_checksum": "a6ed9af48a9ad473ab66721829a5c226", + "id": "nmdc:a6ed9af48a9ad473ab66721829a5c226", + "file_size_bytes": 3472 + }, + { + "name": "Gp0127628_Gottcha2 full TSV report", + "description": "Gottcha2 full TSV report for Gp0127628", + "url": "https://data.microbiomedata.org/data/nmdc:mga0x5c381/ReadbasedAnalysis/nmdc_mga0x5c381_gottcha2_report_full.tsv", + "md5_checksum": "335dbf6f1055de0950988a002f432c0b", + "id": "nmdc:335dbf6f1055de0950988a002f432c0b", + "file_size_bytes": 863867 + }, + { + "name": "Gp0127628_Gottcha2 Krona HTML report", + "description": "Gottcha2 Krona HTML report for Gp0127628", + "data_object_type": "GOTTCHA2 Krona Plot", + "url": "https://data.microbiomedata.org/data/nmdc:mga0x5c381/ReadbasedAnalysis/nmdc_mga0x5c381_gottcha2_krona.html", + "md5_checksum": "35da19bc0e50db1f9a02fe1550d1df0e", + "id": "nmdc:35da19bc0e50db1f9a02fe1550d1df0e", + "file_size_bytes": 234974 + }, + { + "name": "Gp0127628_Centrifuge classification TSV report", + "description": "Centrifuge classification TSV report for Gp0127628", + "data_object_type": "Centrifuge Taxonomic Classification", + "url": "https://data.microbiomedata.org/data/nmdc:mga0x5c381/ReadbasedAnalysis/nmdc_mga0x5c381_centrifuge_classification.tsv", + "md5_checksum": "224085164a389c6f207967ed03b3e6af", + "id": "nmdc:224085164a389c6f207967ed03b3e6af", + "file_size_bytes": 2220789142 + }, + { + "name": "Gp0127628_Centrifuge TSV report", + "description": "Centrifuge TSV report for Gp0127628", + "data_object_type": "Centrifuge Classification Report", + "url": "https://data.microbiomedata.org/data/nmdc:mga0x5c381/ReadbasedAnalysis/nmdc_mga0x5c381_centrifuge_report.tsv", + "md5_checksum": "39ba17263c144761a8bdcc1645c034f5", + "id": "nmdc:39ba17263c144761a8bdcc1645c034f5", + "file_size_bytes": 257030 + }, + { + "name": "Gp0127628_Centrifuge Krona HTML report", + "description": "Centrifuge Krona HTML report for Gp0127628", + "data_object_type": "Centrifuge Krona Plot", + "url": "https://data.microbiomedata.org/data/nmdc:mga0x5c381/ReadbasedAnalysis/nmdc_mga0x5c381_centrifuge_krona.html", + "md5_checksum": "84debc9bd1c09328d60f073d7fc2db4f", + "id": "nmdc:84debc9bd1c09328d60f073d7fc2db4f", + "file_size_bytes": 2337568 + }, + { + "name": "Gp0127628_Kraken classification TSV report", + "description": "Kraken classification TSV report for Gp0127628", + "data_object_type": "Kraken2 Taxonomic Classification", + "url": "https://data.microbiomedata.org/data/nmdc:mga0x5c381/ReadbasedAnalysis/nmdc_mga0x5c381_kraken2_classification.tsv", + "md5_checksum": "8f75800abbcf5a94043ad677d7cb975c", + "id": "nmdc:8f75800abbcf5a94043ad677d7cb975c", + "file_size_bytes": 1776487262 + }, + { + "name": "Gp0127628_Kraken2 TSV report", + "description": "Kraken2 TSV report for Gp0127628", + "data_object_type": "Kraken2 Classification Report", + "url": "https://data.microbiomedata.org/data/nmdc:mga0x5c381/ReadbasedAnalysis/nmdc_mga0x5c381_kraken2_report.tsv", + "md5_checksum": "aae9e961d8ed716457616c8a8841037b", + "id": "nmdc:aae9e961d8ed716457616c8a8841037b", + "file_size_bytes": 664011 + }, + { + "name": "Gp0127628_Kraken2 Krona HTML report", + "description": "Kraken2 Krona HTML report for Gp0127628", + "data_object_type": "Kraken2 Krona Plot", + "url": "https://data.microbiomedata.org/data/nmdc:mga0x5c381/ReadbasedAnalysis/nmdc_mga0x5c381_kraken2_krona.html", + "md5_checksum": "ba83d6ab837403f4bcbc9400a0460457", + "id": "nmdc:ba83d6ab837403f4bcbc9400a0460457", + "file_size_bytes": 4035375 + } + ] + }, + { + "_id": { + "$oid": "61e7193b833bcf838a6fff9c" + }, + "has_input": [ + "nmdc:f6f1760721d73fc57919b2115a1d47ec" + ], + "part_of": [ + "nmdc:mga0x5c381" + ], + "git_url": "https://github.com/microbiomedata/metaG/releases/tag/0.1", + "has_output": [ + "nmdc:a6ed9af48a9ad473ab66721829a5c226", + "nmdc:335dbf6f1055de0950988a002f432c0b", + "nmdc:35da19bc0e50db1f9a02fe1550d1df0e", + "nmdc:224085164a389c6f207967ed03b3e6af", + "nmdc:39ba17263c144761a8bdcc1645c034f5", + "nmdc:84debc9bd1c09328d60f073d7fc2db4f", + "nmdc:8f75800abbcf5a94043ad677d7cb975c", + "nmdc:aae9e961d8ed716457616c8a8841037b", + "nmdc:ba83d6ab837403f4bcbc9400a0460457" + ], + "was_informed_by": "gold:Gp0127628", + "id": "nmdc:a634b0691cf34899d9b09bc4573d4c36", + "execution_resource": "NERSC-Cori", + "name": "ReadBased Analysis Activity for nmdc:mga0x5c381", + "started_at_time": "2021-10-11T02:25:13Z", + "type": "nmdc:ReadbasedAnalysis", + "ended_at_time": "2021-10-11T04:45:59+00:00", + "output_data_objects": [ + { + "name": "Gp0127628_Gottcha2 TSV report", + "description": "Gottcha2 TSV report for Gp0127628", + "url": "https://data.microbiomedata.org/data/nmdc:mga0x5c381/ReadbasedAnalysis/nmdc_mga0x5c381_gottcha2_report.tsv", + "md5_checksum": "a6ed9af48a9ad473ab66721829a5c226", + "id": "nmdc:a6ed9af48a9ad473ab66721829a5c226", + "file_size_bytes": 3472 + }, + { + "name": "Gp0127628_Gottcha2 full TSV report", + "description": "Gottcha2 full TSV report for Gp0127628", + "url": "https://data.microbiomedata.org/data/nmdc:mga0x5c381/ReadbasedAnalysis/nmdc_mga0x5c381_gottcha2_report_full.tsv", + "md5_checksum": "335dbf6f1055de0950988a002f432c0b", + "id": "nmdc:335dbf6f1055de0950988a002f432c0b", + "file_size_bytes": 863867 + }, + { + "name": "Gp0127628_Gottcha2 Krona HTML report", + "description": "Gottcha2 Krona HTML report for Gp0127628", + "data_object_type": "GOTTCHA2 Krona Plot", + "url": "https://data.microbiomedata.org/data/nmdc:mga0x5c381/ReadbasedAnalysis/nmdc_mga0x5c381_gottcha2_krona.html", + "md5_checksum": "35da19bc0e50db1f9a02fe1550d1df0e", + "id": "nmdc:35da19bc0e50db1f9a02fe1550d1df0e", + "file_size_bytes": 234974 + }, + { + "name": "Gp0127628_Centrifuge classification TSV report", + "description": "Centrifuge classification TSV report for Gp0127628", + "data_object_type": "Centrifuge Taxonomic Classification", + "url": "https://data.microbiomedata.org/data/nmdc:mga0x5c381/ReadbasedAnalysis/nmdc_mga0x5c381_centrifuge_classification.tsv", + "md5_checksum": "224085164a389c6f207967ed03b3e6af", + "id": "nmdc:224085164a389c6f207967ed03b3e6af", + "file_size_bytes": 2220789142 + }, + { + "name": "Gp0127628_Centrifuge TSV report", + "description": "Centrifuge TSV report for Gp0127628", + "data_object_type": "Centrifuge Classification Report", + "url": "https://data.microbiomedata.org/data/nmdc:mga0x5c381/ReadbasedAnalysis/nmdc_mga0x5c381_centrifuge_report.tsv", + "md5_checksum": "39ba17263c144761a8bdcc1645c034f5", + "id": "nmdc:39ba17263c144761a8bdcc1645c034f5", + "file_size_bytes": 257030 + }, + { + "name": "Gp0127628_Centrifuge Krona HTML report", + "description": "Centrifuge Krona HTML report for Gp0127628", + "data_object_type": "Centrifuge Krona Plot", + "url": "https://data.microbiomedata.org/data/nmdc:mga0x5c381/ReadbasedAnalysis/nmdc_mga0x5c381_centrifuge_krona.html", + "md5_checksum": "84debc9bd1c09328d60f073d7fc2db4f", + "id": "nmdc:84debc9bd1c09328d60f073d7fc2db4f", + "file_size_bytes": 2337568 + }, + { + "name": "Gp0127628_Kraken classification TSV report", + "description": "Kraken classification TSV report for Gp0127628", + "data_object_type": "Kraken2 Taxonomic Classification", + "url": "https://data.microbiomedata.org/data/nmdc:mga0x5c381/ReadbasedAnalysis/nmdc_mga0x5c381_kraken2_classification.tsv", + "md5_checksum": "8f75800abbcf5a94043ad677d7cb975c", + "id": "nmdc:8f75800abbcf5a94043ad677d7cb975c", + "file_size_bytes": 1776487262 + }, + { + "name": "Gp0127628_Kraken2 TSV report", + "description": "Kraken2 TSV report for Gp0127628", + "data_object_type": "Kraken2 Classification Report", + "url": "https://data.microbiomedata.org/data/nmdc:mga0x5c381/ReadbasedAnalysis/nmdc_mga0x5c381_kraken2_report.tsv", + "md5_checksum": "aae9e961d8ed716457616c8a8841037b", + "id": "nmdc:aae9e961d8ed716457616c8a8841037b", + "file_size_bytes": 664011 + }, + { + "name": "Gp0127628_Kraken2 Krona HTML report", + "description": "Kraken2 Krona HTML report for Gp0127628", + "data_object_type": "Kraken2 Krona Plot", + "url": "https://data.microbiomedata.org/data/nmdc:mga0x5c381/ReadbasedAnalysis/nmdc_mga0x5c381_kraken2_krona.html", + "md5_checksum": "ba83d6ab837403f4bcbc9400a0460457", + "id": "nmdc:ba83d6ab837403f4bcbc9400a0460457", + "file_size_bytes": 4035375 + } + ] + }, + { + "_id": { + "$oid": "649b005f2ca5ee4adb139fae" + }, + "has_input": [ + "nmdc:f6f1760721d73fc57919b2115a1d47ec" + ], + "part_of": [ + "nmdc:mga0x5c381" + ], + "ctg_logsum": 110768, + "scaf_logsum": 111226, + "gap_pct": 0.00124, + "git_url": "https://github.com/microbiomedata/metaG/releases/tag/0.1", + "has_output": [ + "nmdc:9e550afb3bcd8d66807f861ecfed815b", + "nmdc:5e79fce62ffa8c4479be5159143797e0", + "nmdc:682fd042d6adcd93f75c3eae2cf32241", + "nmdc:9d607ebd92ad5bcbaaa405884d4a83a3", + "nmdc:9163caaba1f60d1af9a551559069ca08" + ], + "asm_score": 4.319, + "was_informed_by": "gold:Gp0127628", + "ctg_powsum": 11962, + "scaf_max": 45540, + "id": "nmdc:a634b0691cf34899d9b09bc4573d4c36", + "scaf_powsum": 12026, + "execution_resource": "NERSC-Cori", + "contigs": 157859, + "name": "Assembly Activity for nmdc:mga0x5c381", + "ctg_max": 40273, + "gc_std": 0.10673, + "contig_bp": 68288279, + "gc_avg": 0.61453, + "started_at_time": "2021-10-11T02:25:13Z", + "scaf_bp": 68289129, + "type": "nmdc:MetagenomeAssembly", + "scaffolds": 157774, + "ended_at_time": "2021-10-11T04:45:59+00:00", + "ctg_l50": 400, + "ctg_l90": 285, + "ctg_n50": 49248, + "ctg_n90": 135173, + "scaf_l50": 400, + "scaf_l90": 285, + "scaf_n50": 49230, + "scaf_n90": 135095, + "output_data_objects": [ + { + "name": "Gp0127628_Assembled contigs fasta", + "description": "Assembled contigs fasta for Gp0127628", + "data_object_type": "Assembly Contigs", + "url": "https://data.microbiomedata.org/data/nmdc:mga0x5c381/assembly/nmdc_mga0x5c381_contigs.fna", + "md5_checksum": "9e550afb3bcd8d66807f861ecfed815b", + "id": "nmdc:9e550afb3bcd8d66807f861ecfed815b", + "file_size_bytes": 74277737 + }, + { + "name": "Gp0127628_Assembled scaffold fasta", + "description": "Assembled scaffold fasta for Gp0127628", + "data_object_type": "Assembly Scaffolds", + "url": "https://data.microbiomedata.org/data/nmdc:mga0x5c381/assembly/nmdc_mga0x5c381_scaffolds.fna", + "md5_checksum": "5e79fce62ffa8c4479be5159143797e0", + "id": "nmdc:5e79fce62ffa8c4479be5159143797e0", + "file_size_bytes": 73802989 + }, + { + "name": "Gp0127628_Metagenome Contig Coverage Stats", + "description": "Metagenome Contig Coverage Stats for Gp0127628", + "url": "https://data.microbiomedata.org/data/nmdc:mga0x5c381/assembly/nmdc_mga0x5c381_covstats.txt", + "md5_checksum": "682fd042d6adcd93f75c3eae2cf32241", + "id": "nmdc:682fd042d6adcd93f75c3eae2cf32241", + "file_size_bytes": 12462125 + }, + { + "name": "Gp0127628_Assembled AGP file", + "description": "Assembled AGP file for Gp0127628", + "data_object_type": "Assembly AGP", + "url": "https://data.microbiomedata.org/data/nmdc:mga0x5c381/assembly/nmdc_mga0x5c381_assembly.agp", + "md5_checksum": "9d607ebd92ad5bcbaaa405884d4a83a3", + "id": "nmdc:9d607ebd92ad5bcbaaa405884d4a83a3", + "file_size_bytes": 11636352 + }, + { + "name": "Gp0127628_Metagenome Alignment BAM file", + "description": "Metagenome Alignment BAM file for Gp0127628", + "data_object_type": "Assembly Coverage BAM", + "url": "https://data.microbiomedata.org/data/nmdc:mga0x5c381/assembly/nmdc_mga0x5c381_pairedMapped_sorted.bam", + "md5_checksum": "9163caaba1f60d1af9a551559069ca08", + "id": "nmdc:9163caaba1f60d1af9a551559069ca08", + "file_size_bytes": 2743529039 + } + ] + }, + { + "_id": { + "$oid": "649b005bbf2caae0415ef9aa" + }, + "has_input": [ + "nmdc:9e550afb3bcd8d66807f861ecfed815b" + ], + "part_of": [ + "nmdc:mga0x5c381" + ], + "git_url": "https://github.com/microbiomedata/metaG/releases/tag/0.1", + "has_output": [ + "nmdc:9c21fbee23b4098d69ac618d32fe44c3", + "nmdc:c668eaf35e0ebbb7a304271a03dfd3cd", + "nmdc:cf08b19ebb993d895845588d073c02fe", + "nmdc:e110cecd0dcfbefbde06b88e89047c94", + "nmdc:5f393bad4aacf75d348d7e7d5fe00a06", + "nmdc:c8834a004633752f76b91883416c34b8", + "nmdc:adc813c11b8b32e205aa65ab971d4159", + "nmdc:eecb4098ed258acb0820c17e9e308a9d", + "nmdc:cd2cbf38f357d4c7ec5080072e994861", + "nmdc:1e7aefe1539f0dbe510f805a8d0a6930", + "nmdc:29e9378a37cc56837c1343de85993789", + "nmdc:5faeccd78a03acd094263a777faa5fe2" + ], + "was_informed_by": "gold:Gp0127628", + "id": "nmdc:a634b0691cf34899d9b09bc4573d4c36", + "execution_resource": "NERSC-Cori", + "name": "Annotation Activity for nmdc:mga0x5c381", + "started_at_time": "2021-10-11T02:25:13Z", + "type": "nmdc:MetagenomeAnnotationActivity", + "ended_at_time": "2021-10-11T04:45:59+00:00", + "output_data_objects": [ + { + "name": "Gp0127628_Protein FAA", + "description": "Protein FAA for Gp0127628", + "data_object_type": "Annotation Amino Acid FASTA", + "url": "https://data.microbiomedata.org/data/nmdc:mga0x5c381/annotation/nmdc_mga0x5c381_proteins.faa", + "md5_checksum": "9c21fbee23b4098d69ac618d32fe44c3", + "id": "nmdc:9c21fbee23b4098d69ac618d32fe44c3", + "file_size_bytes": 43551850 + }, + { + "name": "Gp0127628_Structural annotation GFF file", + "description": "Structural annotation GFF file for Gp0127628", + "data_object_type": "Structural Annotation GFF", + "url": "https://data.microbiomedata.org/data/nmdc:mga0x5c381/annotation/nmdc_mga0x5c381_structural_annotation.gff", + "md5_checksum": "c668eaf35e0ebbb7a304271a03dfd3cd", + "id": "nmdc:c668eaf35e0ebbb7a304271a03dfd3cd", + "file_size_bytes": 2518 + }, + { + "name": "Gp0127628_Functional annotation GFF file", + "description": "Functional annotation GFF file for Gp0127628", + "data_object_type": "Functional Annotation GFF", + "url": "https://data.microbiomedata.org/data/nmdc:mga0x5c381/annotation/nmdc_mga0x5c381_functional_annotation.gff", + "md5_checksum": "cf08b19ebb993d895845588d073c02fe", + "id": "nmdc:cf08b19ebb993d895845588d073c02fe", + "file_size_bytes": 50830515 + }, + { + "name": "Gp0127628_KO TSV file", + "description": "KO TSV file for Gp0127628", + "data_object_type": "Annotation KEGG Orthology", + "url": "https://data.microbiomedata.org/data/nmdc:mga0x5c381/annotation/nmdc_mga0x5c381_ko.tsv", + "md5_checksum": "e110cecd0dcfbefbde06b88e89047c94", + "id": "nmdc:e110cecd0dcfbefbde06b88e89047c94", + "file_size_bytes": 5904167 + }, + { + "name": "Gp0127628_EC TSV file", + "description": "EC TSV file for Gp0127628", + "data_object_type": "Annotation Enzyme Commission", + "url": "https://data.microbiomedata.org/data/nmdc:mga0x5c381/annotation/nmdc_mga0x5c381_ec.tsv", + "md5_checksum": "5f393bad4aacf75d348d7e7d5fe00a06", + "id": "nmdc:5f393bad4aacf75d348d7e7d5fe00a06", + "file_size_bytes": 3917008 + }, + { + "name": "Gp0127628_COG GFF file", + "description": "COG GFF file for Gp0127628", + "url": "https://data.microbiomedata.org/data/nmdc:mga0x5c381/annotation/nmdc_mga0x5c381_cog.gff", + "md5_checksum": "c8834a004633752f76b91883416c34b8", + "id": "nmdc:c8834a004633752f76b91883416c34b8", + "file_size_bytes": 29634134 + }, + { + "name": "Gp0127628_PFAM GFF file", + "description": "PFAM GFF file for Gp0127628", + "url": "https://data.microbiomedata.org/data/nmdc:mga0x5c381/annotation/nmdc_mga0x5c381_pfam.gff", + "md5_checksum": "adc813c11b8b32e205aa65ab971d4159", + "id": "nmdc:adc813c11b8b32e205aa65ab971d4159", + "file_size_bytes": 21661208 + }, + { + "name": "Gp0127628_TigrFam GFF file", + "description": "TigrFam GFF file for Gp0127628", + "url": "https://data.microbiomedata.org/data/nmdc:mga0x5c381/annotation/nmdc_mga0x5c381_tigrfam.gff", + "md5_checksum": "eecb4098ed258acb0820c17e9e308a9d", + "id": "nmdc:eecb4098ed258acb0820c17e9e308a9d", + "file_size_bytes": 2198767 + }, + { + "name": "Gp0127628_SMART GFF file", + "description": "SMART GFF file for Gp0127628", + "url": "https://data.microbiomedata.org/data/nmdc:mga0x5c381/annotation/nmdc_mga0x5c381_smart.gff", + "md5_checksum": "cd2cbf38f357d4c7ec5080072e994861", + "id": "nmdc:cd2cbf38f357d4c7ec5080072e994861", + "file_size_bytes": 6281175 + }, + { + "name": "Gp0127628_SuperFam GFF file", + "description": "SuperFam GFF file for Gp0127628", + "url": "https://data.microbiomedata.org/data/nmdc:mga0x5c381/annotation/nmdc_mga0x5c381_supfam.gff", + "md5_checksum": "1e7aefe1539f0dbe510f805a8d0a6930", + "id": "nmdc:1e7aefe1539f0dbe510f805a8d0a6930", + "file_size_bytes": 36891824 + }, + { + "name": "Gp0127628_Cath FunFam GFF file", + "description": "Cath FunFam GFF file for Gp0127628", + "url": "https://data.microbiomedata.org/data/nmdc:mga0x5c381/annotation/nmdc_mga0x5c381_cath_funfam.gff", + "md5_checksum": "29e9378a37cc56837c1343de85993789", + "id": "nmdc:29e9378a37cc56837c1343de85993789", + "file_size_bytes": 27671574 + }, + { + "name": "Gp0127628_KO_EC GFF file", + "description": "KO_EC GFF file for Gp0127628", + "url": "https://data.microbiomedata.org/data/nmdc:mga0x5c381/annotation/nmdc_mga0x5c381_ko_ec.gff", + "md5_checksum": "5faeccd78a03acd094263a777faa5fe2", + "id": "nmdc:5faeccd78a03acd094263a777faa5fe2", + "file_size_bytes": 18790529 + } + ] + }, + { + "_id": { + "$oid": "649b0052ec087f6bbab34705" + }, + "has_input": [ + "nmdc:9e550afb3bcd8d66807f861ecfed815b", + "nmdc:9163caaba1f60d1af9a551559069ca08", + "nmdc:cf08b19ebb993d895845588d073c02fe" + ], + "too_short_contig_num": 151485, + "part_of": [ + "nmdc:mga0x5c381" + ], + "binned_contig_num": 238, + "git_url": "https://github.com/microbiomedata/metaG/releases/tag/0.1", + "has_output": [ + "nmdc:d41d8cd98f00b204e9800998ecf8427e", + "nmdc:13137fa415f537d2874808d8c75c1b3d", + "nmdc:196d2699f8fdab4e38c8a638f92093b1", + "nmdc:b67b26f8f76faa347575352000021faf", + "nmdc:166c8a0ad2f4d57e9b16cdc699d56c09", + "nmdc:5ef5ad24cfe3990c0256d420f51f9010" + ], + "was_informed_by": "gold:Gp0127628", + "input_contig_num": 157858, + "id": "nmdc:a634b0691cf34899d9b09bc4573d4c36", + "execution_resource": "NERSC-Cori", + "name": "MAGs Analysis Activity for nmdc:mga0x5c381", + "mags_list": [ + { + "number_of_contig": 238, + "completeness": 30.86, + "bin_name": "bins.1", + "gene_count": 1126, + "bin_quality": "LQ", + "gtdbtk_species": "", + "gtdbtk_order": "", + "num_16s": 0, + "gtdbtk_family": "", + "gtdbtk_domain": "", + "contamination": 0.0, + "gtdbtk_class": "", + "gtdbtk_phylum": "", + "num_5s": 0, + "num_23s": 0, + "gtdbtk_genus": "", + "num_t_rna": 5 + } + ], + "unbinned_contig_num": 6135, + "started_at_time": "2021-10-11T02:25:13Z", + "type": "nmdc:MAGsAnalysisActivity", + "ended_at_time": "2021-10-11T04:45:59+00:00", + "output_data_objects": [ + { + "name": "gold:Gp0452679_metabat2 bin checkm quality assessment result", + "description": "metabat2 bin checkm quality assessment result for gold:Gp0452679", + "data_object_type": "CheckM Statistics", + "url": "https://data.microbiomedata.org/data/nmdc:mga0fsjy84/MAGs/nmdc_mga0fsjy84_checkm_qa.out", + "md5_checksum": "d41d8cd98f00b204e9800998ecf8427e", + "id": "nmdc:d41d8cd98f00b204e9800998ecf8427e", + "file_size_bytes": 0 + }, + { + "name": "Gp0127628_tooShort (< 3kb) filtered contigs fasta file by metaBat2", + "description": "tooShort (< 3kb) filtered contigs fasta file by metaBat2 for Gp0127628", + "url": "https://data.microbiomedata.org/data/nmdc:mga0x5c381/MAGs/nmdc_mga0x5c381_bins.tooShort.fa", + "md5_checksum": "13137fa415f537d2874808d8c75c1b3d", + "id": "nmdc:13137fa415f537d2874808d8c75c1b3d", + "file_size_bytes": 63661919 + }, + { + "name": "Gp0127628_unbinned fasta file from metabat2", + "description": "unbinned fasta file from metabat2 for Gp0127628", + "url": "https://data.microbiomedata.org/data/nmdc:mga0x5c381/MAGs/nmdc_mga0x5c381_bins.unbinned.fa", + "md5_checksum": "196d2699f8fdab4e38c8a638f92093b1", + "id": "nmdc:196d2699f8fdab4e38c8a638f92093b1", + "file_size_bytes": 9649261 + }, + { + "name": "Gp0127628_metabat2 bin checkm quality assessment result", + "description": "metabat2 bin checkm quality assessment result for Gp0127628", + "data_object_type": "CheckM Statistics", + "url": "https://data.microbiomedata.org/data/nmdc:mga0x5c381/MAGs/nmdc_mga0x5c381_checkm_qa.out", + "md5_checksum": "b67b26f8f76faa347575352000021faf", + "id": "nmdc:b67b26f8f76faa347575352000021faf", + "file_size_bytes": 785 + }, + { + "name": "Gp0127628_high-quality and medium-quality bins", + "description": "high-quality and medium-quality bins for Gp0127628", + "data_object_type": "Metagenome Bins", + "url": "https://data.microbiomedata.org/data/nmdc:mga0x5c381/MAGs/nmdc_mga0x5c381_hqmq_bin.zip", + "md5_checksum": "166c8a0ad2f4d57e9b16cdc699d56c09", + "id": "nmdc:166c8a0ad2f4d57e9b16cdc699d56c09", + "file_size_bytes": 182 + }, + { + "name": "Gp0127628_metabat2 bins", + "description": "metabat2 bins for Gp0127628", + "url": "https://data.microbiomedata.org/data/nmdc:mga0x5c381/MAGs/nmdc_mga0x5c381_metabat_bin.zip", + "md5_checksum": "5ef5ad24cfe3990c0256d420f51f9010", + "id": "nmdc:5ef5ad24cfe3990c0256d420f51f9010", + "file_size_bytes": 279359 + } + ] + } + ] + }, + { + "_id": { + "$oid": "649b009773e8249959349b40" + }, + "id": "nmdc:omprc-11-k8kt2j31", + "name": "Riverbed sediment microbial communities from areas with vegetation nearby in Columbia River, Washington, USA - GW-RW S1_20_30", + "description": "Riverbed sediment samples were collected from an area with a lot of surrounding vegetation in a hyporheic zone (subsurface groundwater - surface water mixing zone)", + "has_input": [ + "nmdc:bsm-11-4vqhvw07" + ], + "has_output": [ + "jgi:574fde5b7ded5e3df1ee13ff" + ], + "part_of": [ + "nmdc:sty-11-aygzgv51" + ], + "add_date": "2016-01-11", + "mod_date": "2021-06-15", + "ncbi_project_name": "Riverbed sediment microbial communities from areas with vegetation nearby in Columbia River, Washington, USA - GW-RW S1_20_30", + "omics_type": { + "has_raw_value": "Metagenome" + }, + "principal_investigator": { + "has_raw_value": "James Stegen" + }, + "processing_institution": "JGI", + "type": "nmdc:OmicsProcessing", + "gold_sequencing_project_identifiers": [ + "gold:Gp0127631" + ], + "downstream_workflow_activity_records": [ + { + "_id": { + "$oid": "649b009d6bdd4fd20273c862" + }, + "has_input": [ + "nmdc:9c97e4b734b9cac731fe30fb07a32bb7" + ], + "part_of": [ + "nmdc:mga0jx8k09" + ], + "git_url": "https://github.com/microbiomedata/metaG/releases/tag/0.1", + "has_output": [ + "nmdc:6969fd7f4b1a5a34fb30d31b92cd6bf8", + "nmdc:b280141d234edf10cde8794539700654" + ], + "was_informed_by": "gold:Gp0127631", + "input_read_count": 26419652, + "output_read_bases": 3798930297, + "id": "nmdc:00a9e34a36f0ea767ff48aa2f411874f", + "execution_resource": "NERSC-Cori", + "input_read_bases": 3989367452, + "name": "Read QC Activity for nmdc:mga0jx8k09", + "output_read_count": 25434840, + "started_at_time": "2021-10-11T02:26:22Z", + "type": "nmdc:ReadQCAnalysisActivity", + "ended_at_time": "2021-10-11T04:40:31+00:00", + "output_data_objects": [ + { + "name": "Gp0127631_Filtered Reads", + "description": "Filtered Reads for Gp0127631", + "data_object_type": "Filtered Sequencing Reads", + "url": "https://data.microbiomedata.org/data/nmdc:mga0jx8k09/qa/nmdc_mga0jx8k09_filtered.fastq.gz", + "md5_checksum": "6969fd7f4b1a5a34fb30d31b92cd6bf8", + "id": "nmdc:6969fd7f4b1a5a34fb30d31b92cd6bf8", + "file_size_bytes": 2030538721 + }, + { + "name": "Gp0127631_Filtered Stats", + "description": "Filtered Stats for Gp0127631", + "data_object_type": "QC Statistics", + "url": "https://data.microbiomedata.org/data/nmdc:mga0jx8k09/qa/nmdc_mga0jx8k09_filterStats.txt", + "md5_checksum": "b280141d234edf10cde8794539700654", + "id": "nmdc:b280141d234edf10cde8794539700654", + "file_size_bytes": 284 + } + ] + }, + { + "_id": { + "$oid": "649b009bff710ae353f8cf22" + }, + "has_input": [ + "nmdc:6969fd7f4b1a5a34fb30d31b92cd6bf8" + ], + "git_url": "https://github.com/microbiomedata/metaG/releases/tag/0.1", + "has_output": [ + "nmdc:b78e8246144185beb95c0caf65ef1f1a", + "nmdc:8875c6ce19e13ed9a88447f2f78bb049", + "nmdc:3b0aee019c772a695bf4cc8f4a390f4e", + "nmdc:0d1729a83798b752f33eeb8d97afe972", + "nmdc:77561a0de3bb8aae04d110429fd9ad0c", + "nmdc:ea27c005b1788434c2198ad60939d4bc", + "nmdc:6a46583da876b9d6287302308df0b9fd", + "nmdc:af619dc5a0423509a4beaca26aa61000", + "nmdc:50093825ec73dcabe66aa353de766beb" + ], + "was_informed_by": "gold:Gp0127631", + "id": "nmdc:00a9e34a36f0ea767ff48aa2f411874f", + "execution_resource": "NERSC-Cori", + "name": "ReadBased Analysis Activity for nmdc:mga0jx8k09", + "started_at_time": "2021-10-11T02:26:22Z", + "type": "nmdc:ReadbasedAnalysis", + "ended_at_time": "2021-10-11T04:40:31+00:00", + "output_data_objects": [ + { + "name": "Gp0127631_Gottcha2 TSV report", + "description": "Gottcha2 TSV report for Gp0127631", + "url": "https://data.microbiomedata.org/data/nmdc:mga0jx8k09/ReadbasedAnalysis/nmdc_mga0jx8k09_gottcha2_report.tsv", + "md5_checksum": "b78e8246144185beb95c0caf65ef1f1a", + "id": "nmdc:b78e8246144185beb95c0caf65ef1f1a", + "file_size_bytes": 1227 + }, + { + "name": "Gp0127631_Gottcha2 full TSV report", + "description": "Gottcha2 full TSV report for Gp0127631", + "url": "https://data.microbiomedata.org/data/nmdc:mga0jx8k09/ReadbasedAnalysis/nmdc_mga0jx8k09_gottcha2_report_full.tsv", + "md5_checksum": "8875c6ce19e13ed9a88447f2f78bb049", + "id": "nmdc:8875c6ce19e13ed9a88447f2f78bb049", + "file_size_bytes": 647196 + }, + { + "name": "Gp0127631_Gottcha2 Krona HTML report", + "description": "Gottcha2 Krona HTML report for Gp0127631", + "data_object_type": "GOTTCHA2 Krona Plot", + "url": "https://data.microbiomedata.org/data/nmdc:mga0jx8k09/ReadbasedAnalysis/nmdc_mga0jx8k09_gottcha2_krona.html", + "md5_checksum": "3b0aee019c772a695bf4cc8f4a390f4e", + "id": "nmdc:3b0aee019c772a695bf4cc8f4a390f4e", + "file_size_bytes": 229312 + }, + { + "name": "Gp0127631_Centrifuge classification TSV report", + "description": "Centrifuge classification TSV report for Gp0127631", + "data_object_type": "Centrifuge Taxonomic Classification", + "url": "https://data.microbiomedata.org/data/nmdc:mga0jx8k09/ReadbasedAnalysis/nmdc_mga0jx8k09_centrifuge_classification.tsv", + "md5_checksum": "0d1729a83798b752f33eeb8d97afe972", + "id": "nmdc:0d1729a83798b752f33eeb8d97afe972", + "file_size_bytes": 1861431092 + }, + { + "name": "Gp0127631_Centrifuge TSV report", + "description": "Centrifuge TSV report for Gp0127631", + "data_object_type": "Centrifuge Classification Report", + "url": "https://data.microbiomedata.org/data/nmdc:mga0jx8k09/ReadbasedAnalysis/nmdc_mga0jx8k09_centrifuge_report.tsv", + "md5_checksum": "77561a0de3bb8aae04d110429fd9ad0c", + "id": "nmdc:77561a0de3bb8aae04d110429fd9ad0c", + "file_size_bytes": 254665 + }, + { + "name": "Gp0127631_Centrifuge Krona HTML report", + "description": "Centrifuge Krona HTML report for Gp0127631", + "data_object_type": "Centrifuge Krona Plot", + "url": "https://data.microbiomedata.org/data/nmdc:mga0jx8k09/ReadbasedAnalysis/nmdc_mga0jx8k09_centrifuge_krona.html", + "md5_checksum": "ea27c005b1788434c2198ad60939d4bc", + "id": "nmdc:ea27c005b1788434c2198ad60939d4bc", + "file_size_bytes": 2334578 + }, + { + "name": "Gp0127631_Kraken classification TSV report", + "description": "Kraken classification TSV report for Gp0127631", + "data_object_type": "Kraken2 Taxonomic Classification", + "url": "https://data.microbiomedata.org/data/nmdc:mga0jx8k09/ReadbasedAnalysis/nmdc_mga0jx8k09_kraken2_classification.tsv", + "md5_checksum": "6a46583da876b9d6287302308df0b9fd", + "id": "nmdc:6a46583da876b9d6287302308df0b9fd", + "file_size_bytes": 1483354621 + }, + { + "name": "Gp0127631_Kraken2 TSV report", + "description": "Kraken2 TSV report for Gp0127631", + "data_object_type": "Kraken2 Classification Report", + "url": "https://data.microbiomedata.org/data/nmdc:mga0jx8k09/ReadbasedAnalysis/nmdc_mga0jx8k09_kraken2_report.tsv", + "md5_checksum": "af619dc5a0423509a4beaca26aa61000", + "id": "nmdc:af619dc5a0423509a4beaca26aa61000", + "file_size_bytes": 640329 + }, + { + "name": "Gp0127631_Kraken2 Krona HTML report", + "description": "Kraken2 Krona HTML report for Gp0127631", + "data_object_type": "Kraken2 Krona Plot", + "url": "https://data.microbiomedata.org/data/nmdc:mga0jx8k09/ReadbasedAnalysis/nmdc_mga0jx8k09_kraken2_krona.html", + "md5_checksum": "50093825ec73dcabe66aa353de766beb", + "id": "nmdc:50093825ec73dcabe66aa353de766beb", + "file_size_bytes": 3993246 + } + ] + }, + { + "_id": { + "$oid": "61e71938833bcf838a6ffe7a" + }, + "has_input": [ + "nmdc:6969fd7f4b1a5a34fb30d31b92cd6bf8" + ], + "part_of": [ + "nmdc:mga0jx8k09" + ], + "git_url": "https://github.com/microbiomedata/metaG/releases/tag/0.1", + "has_output": [ + "nmdc:b78e8246144185beb95c0caf65ef1f1a", + "nmdc:8875c6ce19e13ed9a88447f2f78bb049", + "nmdc:3b0aee019c772a695bf4cc8f4a390f4e", + "nmdc:0d1729a83798b752f33eeb8d97afe972", + "nmdc:77561a0de3bb8aae04d110429fd9ad0c", + "nmdc:ea27c005b1788434c2198ad60939d4bc", + "nmdc:6a46583da876b9d6287302308df0b9fd", + "nmdc:af619dc5a0423509a4beaca26aa61000", + "nmdc:50093825ec73dcabe66aa353de766beb" + ], + "was_informed_by": "gold:Gp0127631", + "id": "nmdc:00a9e34a36f0ea767ff48aa2f411874f", + "execution_resource": "NERSC-Cori", + "name": "ReadBased Analysis Activity for nmdc:mga0jx8k09", + "started_at_time": "2021-10-11T02:26:22Z", + "type": "nmdc:ReadbasedAnalysis", + "ended_at_time": "2021-10-11T04:40:31+00:00", + "output_data_objects": [ + { + "name": "Gp0127631_Gottcha2 TSV report", + "description": "Gottcha2 TSV report for Gp0127631", + "url": "https://data.microbiomedata.org/data/nmdc:mga0jx8k09/ReadbasedAnalysis/nmdc_mga0jx8k09_gottcha2_report.tsv", + "md5_checksum": "b78e8246144185beb95c0caf65ef1f1a", + "id": "nmdc:b78e8246144185beb95c0caf65ef1f1a", + "file_size_bytes": 1227 + }, + { + "name": "Gp0127631_Gottcha2 full TSV report", + "description": "Gottcha2 full TSV report for Gp0127631", + "url": "https://data.microbiomedata.org/data/nmdc:mga0jx8k09/ReadbasedAnalysis/nmdc_mga0jx8k09_gottcha2_report_full.tsv", + "md5_checksum": "8875c6ce19e13ed9a88447f2f78bb049", + "id": "nmdc:8875c6ce19e13ed9a88447f2f78bb049", + "file_size_bytes": 647196 + }, + { + "name": "Gp0127631_Gottcha2 Krona HTML report", + "description": "Gottcha2 Krona HTML report for Gp0127631", + "data_object_type": "GOTTCHA2 Krona Plot", + "url": "https://data.microbiomedata.org/data/nmdc:mga0jx8k09/ReadbasedAnalysis/nmdc_mga0jx8k09_gottcha2_krona.html", + "md5_checksum": "3b0aee019c772a695bf4cc8f4a390f4e", + "id": "nmdc:3b0aee019c772a695bf4cc8f4a390f4e", + "file_size_bytes": 229312 + }, + { + "name": "Gp0127631_Centrifuge classification TSV report", + "description": "Centrifuge classification TSV report for Gp0127631", + "data_object_type": "Centrifuge Taxonomic Classification", + "url": "https://data.microbiomedata.org/data/nmdc:mga0jx8k09/ReadbasedAnalysis/nmdc_mga0jx8k09_centrifuge_classification.tsv", + "md5_checksum": "0d1729a83798b752f33eeb8d97afe972", + "id": "nmdc:0d1729a83798b752f33eeb8d97afe972", + "file_size_bytes": 1861431092 + }, + { + "name": "Gp0127631_Centrifuge TSV report", + "description": "Centrifuge TSV report for Gp0127631", + "data_object_type": "Centrifuge Classification Report", + "url": "https://data.microbiomedata.org/data/nmdc:mga0jx8k09/ReadbasedAnalysis/nmdc_mga0jx8k09_centrifuge_report.tsv", + "md5_checksum": "77561a0de3bb8aae04d110429fd9ad0c", + "id": "nmdc:77561a0de3bb8aae04d110429fd9ad0c", + "file_size_bytes": 254665 + }, + { + "name": "Gp0127631_Centrifuge Krona HTML report", + "description": "Centrifuge Krona HTML report for Gp0127631", + "data_object_type": "Centrifuge Krona Plot", + "url": "https://data.microbiomedata.org/data/nmdc:mga0jx8k09/ReadbasedAnalysis/nmdc_mga0jx8k09_centrifuge_krona.html", + "md5_checksum": "ea27c005b1788434c2198ad60939d4bc", + "id": "nmdc:ea27c005b1788434c2198ad60939d4bc", + "file_size_bytes": 2334578 + }, + { + "name": "Gp0127631_Kraken classification TSV report", + "description": "Kraken classification TSV report for Gp0127631", + "data_object_type": "Kraken2 Taxonomic Classification", + "url": "https://data.microbiomedata.org/data/nmdc:mga0jx8k09/ReadbasedAnalysis/nmdc_mga0jx8k09_kraken2_classification.tsv", + "md5_checksum": "6a46583da876b9d6287302308df0b9fd", + "id": "nmdc:6a46583da876b9d6287302308df0b9fd", + "file_size_bytes": 1483354621 + }, + { + "name": "Gp0127631_Kraken2 TSV report", + "description": "Kraken2 TSV report for Gp0127631", + "data_object_type": "Kraken2 Classification Report", + "url": "https://data.microbiomedata.org/data/nmdc:mga0jx8k09/ReadbasedAnalysis/nmdc_mga0jx8k09_kraken2_report.tsv", + "md5_checksum": "af619dc5a0423509a4beaca26aa61000", + "id": "nmdc:af619dc5a0423509a4beaca26aa61000", + "file_size_bytes": 640329 + }, + { + "name": "Gp0127631_Kraken2 Krona HTML report", + "description": "Kraken2 Krona HTML report for Gp0127631", + "data_object_type": "Kraken2 Krona Plot", + "url": "https://data.microbiomedata.org/data/nmdc:mga0jx8k09/ReadbasedAnalysis/nmdc_mga0jx8k09_kraken2_krona.html", + "md5_checksum": "50093825ec73dcabe66aa353de766beb", + "id": "nmdc:50093825ec73dcabe66aa353de766beb", + "file_size_bytes": 3993246 + } + ] + }, + { + "_id": { + "$oid": "649b005f2ca5ee4adb139f94" + }, + "has_input": [ + "nmdc:6969fd7f4b1a5a34fb30d31b92cd6bf8" + ], + "part_of": [ + "nmdc:mga0jx8k09" + ], + "ctg_logsum": 306128, + "scaf_logsum": 307525, + "gap_pct": 0.00196, + "git_url": "https://github.com/microbiomedata/metaG/releases/tag/0.1", + "has_output": [ + "nmdc:1eb44ff780f2aad1053ca336b53d7b98", + "nmdc:992fb303b5ced60489fea0ce6dae71f9", + "nmdc:a0f466071ed249babf1a5653e1c20a02", + "nmdc:5eddebfbfabfd9c0e71c2699bee73870", + "nmdc:0ecd5e99ec93ba17c7b02483560bafdf" + ], + "asm_score": 3.117, + "was_informed_by": "gold:Gp0127631", + "ctg_powsum": 32898, + "scaf_max": 14244, + "id": "nmdc:00a9e34a36f0ea767ff48aa2f411874f", + "scaf_powsum": 33057, + "execution_resource": "NERSC-Cori", + "contigs": 237399, + "name": "Assembly Activity for nmdc:mga0jx8k09", + "ctg_max": 14244, + "gc_std": 0.09594, + "contig_bp": 119367623, + "gc_avg": 0.62364, + "started_at_time": "2021-10-11T02:26:22Z", + "scaf_bp": 119369963, + "type": "nmdc:MetagenomeAssembly", + "scaffolds": 237183, + "ended_at_time": "2021-10-11T04:40:31+00:00", + "ctg_l50": 499, + "ctg_l90": 292, + "ctg_n50": 64310, + "ctg_n90": 195626, + "scaf_l50": 500, + "scaf_l90": 292, + "scaf_n50": 64017, + "scaf_n90": 195424, + "output_data_objects": [ + { + "name": "Gp0127631_Assembled contigs fasta", + "description": "Assembled contigs fasta for Gp0127631", + "data_object_type": "Assembly Contigs", + "url": "https://data.microbiomedata.org/data/nmdc:mga0jx8k09/assembly/nmdc_mga0jx8k09_contigs.fna", + "md5_checksum": "1eb44ff780f2aad1053ca336b53d7b98", + "id": "nmdc:1eb44ff780f2aad1053ca336b53d7b98", + "file_size_bytes": 128714098 + }, + { + "name": "Gp0127631_Assembled scaffold fasta", + "description": "Assembled scaffold fasta for Gp0127631", + "data_object_type": "Assembly Scaffolds", + "url": "https://data.microbiomedata.org/data/nmdc:mga0jx8k09/assembly/nmdc_mga0jx8k09_scaffolds.fna", + "md5_checksum": "992fb303b5ced60489fea0ce6dae71f9", + "id": "nmdc:992fb303b5ced60489fea0ce6dae71f9", + "file_size_bytes": 127998496 + }, + { + "name": "Gp0127631_Metagenome Contig Coverage Stats", + "description": "Metagenome Contig Coverage Stats for Gp0127631", + "url": "https://data.microbiomedata.org/data/nmdc:mga0jx8k09/assembly/nmdc_mga0jx8k09_covstats.txt", + "md5_checksum": "a0f466071ed249babf1a5653e1c20a02", + "id": "nmdc:a0f466071ed249babf1a5653e1c20a02", + "file_size_bytes": 18831462 + }, + { + "name": "Gp0127631_Assembled AGP file", + "description": "Assembled AGP file for Gp0127631", + "data_object_type": "Assembly AGP", + "url": "https://data.microbiomedata.org/data/nmdc:mga0jx8k09/assembly/nmdc_mga0jx8k09_assembly.agp", + "md5_checksum": "5eddebfbfabfd9c0e71c2699bee73870", + "id": "nmdc:5eddebfbfabfd9c0e71c2699bee73870", + "file_size_bytes": 17634272 + }, + { + "name": "Gp0127631_Metagenome Alignment BAM file", + "description": "Metagenome Alignment BAM file for Gp0127631", + "data_object_type": "Assembly Coverage BAM", + "url": "https://data.microbiomedata.org/data/nmdc:mga0jx8k09/assembly/nmdc_mga0jx8k09_pairedMapped_sorted.bam", + "md5_checksum": "0ecd5e99ec93ba17c7b02483560bafdf", + "id": "nmdc:0ecd5e99ec93ba17c7b02483560bafdf", + "file_size_bytes": 2245356551 + } + ] + }, + { + "_id": { + "$oid": "649b005bbf2caae0415ef9a8" + }, + "has_input": [ + "nmdc:1eb44ff780f2aad1053ca336b53d7b98" + ], + "part_of": [ + "nmdc:mga0jx8k09" + ], + "git_url": "https://github.com/microbiomedata/metaG/releases/tag/0.1", + "has_output": [ + "nmdc:8c26f97b6a3196ed09dc4f54857d4972", + "nmdc:c7112633e322d7bc609bd479f7ddddb9", + "nmdc:2f6baf7176d2d904c02ae71875a8d326", + "nmdc:1abb9d211201bef0cb545e70a65de8cf", + "nmdc:985a23612611fb258d2dbaee1e4458f5", + "nmdc:44c3fa82e71af5647b7619b0dd8a0728", + "nmdc:fb70c00e07d0b93b12cacbded87dcea6", + "nmdc:98e1311ba5e96a176baccdb9a95439f9", + "nmdc:0685da4455dde2dec9f221b9356f008c", + "nmdc:14b7f064a3a2fad830fad893ff3257bc", + "nmdc:1b8b64c254f88dd9a8e3cd42bde7b7ba", + "nmdc:01769b6920ba82884f19ac3f88428db1" + ], + "was_informed_by": "gold:Gp0127631", + "id": "nmdc:00a9e34a36f0ea767ff48aa2f411874f", + "execution_resource": "NERSC-Cori", + "name": "Annotation Activity for nmdc:mga0jx8k09", + "started_at_time": "2021-10-11T02:26:22Z", + "type": "nmdc:MetagenomeAnnotationActivity", + "ended_at_time": "2021-10-11T04:40:31+00:00", + "output_data_objects": [ + { + "name": "Gp0127631_Protein FAA", + "description": "Protein FAA for Gp0127631", + "data_object_type": "Annotation Amino Acid FASTA", + "url": "https://data.microbiomedata.org/data/nmdc:mga0jx8k09/annotation/nmdc_mga0jx8k09_proteins.faa", + "md5_checksum": "8c26f97b6a3196ed09dc4f54857d4972", + "id": "nmdc:8c26f97b6a3196ed09dc4f54857d4972", + "file_size_bytes": 72966123 + }, + { + "name": "Gp0127631_Structural annotation GFF file", + "description": "Structural annotation GFF file for Gp0127631", + "data_object_type": "Structural Annotation GFF", + "url": "https://data.microbiomedata.org/data/nmdc:mga0jx8k09/annotation/nmdc_mga0jx8k09_structural_annotation.gff", + "md5_checksum": "c7112633e322d7bc609bd479f7ddddb9", + "id": "nmdc:c7112633e322d7bc609bd479f7ddddb9", + "file_size_bytes": 2524 + }, + { + "name": "Gp0127631_Functional annotation GFF file", + "description": "Functional annotation GFF file for Gp0127631", + "data_object_type": "Functional Annotation GFF", + "url": "https://data.microbiomedata.org/data/nmdc:mga0jx8k09/annotation/nmdc_mga0jx8k09_functional_annotation.gff", + "md5_checksum": "2f6baf7176d2d904c02ae71875a8d326", + "id": "nmdc:2f6baf7176d2d904c02ae71875a8d326", + "file_size_bytes": 81929295 + }, + { + "name": "Gp0127631_KO TSV file", + "description": "KO TSV file for Gp0127631", + "data_object_type": "Annotation KEGG Orthology", + "url": "https://data.microbiomedata.org/data/nmdc:mga0jx8k09/annotation/nmdc_mga0jx8k09_ko.tsv", + "md5_checksum": "1abb9d211201bef0cb545e70a65de8cf", + "id": "nmdc:1abb9d211201bef0cb545e70a65de8cf", + "file_size_bytes": 8979915 + }, + { + "name": "Gp0127631_EC TSV file", + "description": "EC TSV file for Gp0127631", + "data_object_type": "Annotation Enzyme Commission", + "url": "https://data.microbiomedata.org/data/nmdc:mga0jx8k09/annotation/nmdc_mga0jx8k09_ec.tsv", + "md5_checksum": "985a23612611fb258d2dbaee1e4458f5", + "id": "nmdc:985a23612611fb258d2dbaee1e4458f5", + "file_size_bytes": 5914861 + }, + { + "name": "Gp0127631_COG GFF file", + "description": "COG GFF file for Gp0127631", + "url": "https://data.microbiomedata.org/data/nmdc:mga0jx8k09/annotation/nmdc_mga0jx8k09_cog.gff", + "md5_checksum": "44c3fa82e71af5647b7619b0dd8a0728", + "id": "nmdc:44c3fa82e71af5647b7619b0dd8a0728", + "file_size_bytes": 47190255 + }, + { + "name": "Gp0127631_PFAM GFF file", + "description": "PFAM GFF file for Gp0127631", + "url": "https://data.microbiomedata.org/data/nmdc:mga0jx8k09/annotation/nmdc_mga0jx8k09_pfam.gff", + "md5_checksum": "fb70c00e07d0b93b12cacbded87dcea6", + "id": "nmdc:fb70c00e07d0b93b12cacbded87dcea6", + "file_size_bytes": 35794646 + }, + { + "name": "Gp0127631_TigrFam GFF file", + "description": "TigrFam GFF file for Gp0127631", + "url": "https://data.microbiomedata.org/data/nmdc:mga0jx8k09/annotation/nmdc_mga0jx8k09_tigrfam.gff", + "md5_checksum": "98e1311ba5e96a176baccdb9a95439f9", + "id": "nmdc:98e1311ba5e96a176baccdb9a95439f9", + "file_size_bytes": 3856365 + }, + { + "name": "Gp0127631_SMART GFF file", + "description": "SMART GFF file for Gp0127631", + "url": "https://data.microbiomedata.org/data/nmdc:mga0jx8k09/annotation/nmdc_mga0jx8k09_smart.gff", + "md5_checksum": "0685da4455dde2dec9f221b9356f008c", + "id": "nmdc:0685da4455dde2dec9f221b9356f008c", + "file_size_bytes": 10561278 + }, + { + "name": "Gp0127631_SuperFam GFF file", + "description": "SuperFam GFF file for Gp0127631", + "url": "https://data.microbiomedata.org/data/nmdc:mga0jx8k09/annotation/nmdc_mga0jx8k09_supfam.gff", + "md5_checksum": "14b7f064a3a2fad830fad893ff3257bc", + "id": "nmdc:14b7f064a3a2fad830fad893ff3257bc", + "file_size_bytes": 59641133 + }, + { + "name": "Gp0127631_Cath FunFam GFF file", + "description": "Cath FunFam GFF file for Gp0127631", + "url": "https://data.microbiomedata.org/data/nmdc:mga0jx8k09/annotation/nmdc_mga0jx8k09_cath_funfam.gff", + "md5_checksum": "1b8b64c254f88dd9a8e3cd42bde7b7ba", + "id": "nmdc:1b8b64c254f88dd9a8e3cd42bde7b7ba", + "file_size_bytes": 45160077 + }, + { + "name": "Gp0127631_KO_EC GFF file", + "description": "KO_EC GFF file for Gp0127631", + "url": "https://data.microbiomedata.org/data/nmdc:mga0jx8k09/annotation/nmdc_mga0jx8k09_ko_ec.gff", + "md5_checksum": "01769b6920ba82884f19ac3f88428db1", + "id": "nmdc:01769b6920ba82884f19ac3f88428db1", + "file_size_bytes": 28510384 + } + ] + }, + { + "_id": { + "$oid": "649b0052ec087f6bbab34706" + }, + "has_input": [ + "nmdc:1eb44ff780f2aad1053ca336b53d7b98", + "nmdc:0ecd5e99ec93ba17c7b02483560bafdf", + "nmdc:2f6baf7176d2d904c02ae71875a8d326" + ], + "too_short_contig_num": 219869, + "part_of": [ + "nmdc:mga0jx8k09" + ], + "binned_contig_num": 506, + "git_url": "https://github.com/microbiomedata/metaG/releases/tag/0.1", + "has_output": [ + "nmdc:d41d8cd98f00b204e9800998ecf8427e", + "nmdc:53faea62cf1183292bc6fca374f75ed1", + "nmdc:7a6616d3262630c2aea2923e3c2683d0", + "nmdc:e16dde65e7229d69949c9e2dee7e2413", + "nmdc:58acda197bd8136a80d5047342008cdf", + "nmdc:8d5e2b8a8dede83c2f74182f506f9176" + ], + "was_informed_by": "gold:Gp0127631", + "input_contig_num": 237399, + "id": "nmdc:00a9e34a36f0ea767ff48aa2f411874f", + "execution_resource": "NERSC-Cori", + "name": "MAGs Analysis Activity for nmdc:mga0jx8k09", + "mags_list": [ + { + "number_of_contig": 151, + "completeness": 11.4, + "bin_name": "bins.1", + "gene_count": 748, + "bin_quality": "LQ", + "gtdbtk_species": "", + "gtdbtk_order": "", + "num_16s": 0, + "gtdbtk_family": "", + "gtdbtk_domain": "", + "contamination": 0.0, + "gtdbtk_class": "", + "gtdbtk_phylum": "", + "num_5s": 0, + "num_23s": 0, + "gtdbtk_genus": "", + "num_t_rna": 6 + }, + { + "number_of_contig": 268, + "completeness": 7.47, + "bin_name": "bins.2", + "gene_count": 1304, + "bin_quality": "LQ", + "gtdbtk_species": "", + "gtdbtk_order": "", + "num_16s": 0, + "gtdbtk_family": "", + "gtdbtk_domain": "", + "contamination": 0.0, + "gtdbtk_class": "", + "gtdbtk_phylum": "", + "num_5s": 0, + "num_23s": 0, + "gtdbtk_genus": "", + "num_t_rna": 12 + }, + { + "number_of_contig": 87, + "completeness": 13.32, + "bin_name": "bins.3", + "gene_count": 412, + "bin_quality": "LQ", + "gtdbtk_species": "", + "gtdbtk_order": "", + "num_16s": 0, + "gtdbtk_family": "", + "gtdbtk_domain": "", + "contamination": 0.0, + "gtdbtk_class": "", + "gtdbtk_phylum": "", + "num_5s": 0, + "num_23s": 0, + "gtdbtk_genus": "", + "num_t_rna": 4 + } + ], + "unbinned_contig_num": 17024, + "started_at_time": "2021-10-11T02:26:22Z", + "type": "nmdc:MAGsAnalysisActivity", + "ended_at_time": "2021-10-11T04:40:31+00:00", + "output_data_objects": [ + { + "name": "gold:Gp0452679_metabat2 bin checkm quality assessment result", + "description": "metabat2 bin checkm quality assessment result for gold:Gp0452679", + "data_object_type": "CheckM Statistics", + "url": "https://data.microbiomedata.org/data/nmdc:mga0fsjy84/MAGs/nmdc_mga0fsjy84_checkm_qa.out", + "md5_checksum": "d41d8cd98f00b204e9800998ecf8427e", + "id": "nmdc:d41d8cd98f00b204e9800998ecf8427e", + "file_size_bytes": 0 + }, + { + "name": "Gp0127631_tooShort (< 3kb) filtered contigs fasta file by metaBat2", + "description": "tooShort (< 3kb) filtered contigs fasta file by metaBat2 for Gp0127631", + "url": "https://data.microbiomedata.org/data/nmdc:mga0jx8k09/MAGs/nmdc_mga0jx8k09_bins.tooShort.fa", + "md5_checksum": "53faea62cf1183292bc6fca374f75ed1", + "id": "nmdc:53faea62cf1183292bc6fca374f75ed1", + "file_size_bytes": 99316833 + }, + { + "name": "Gp0127631_unbinned fasta file from metabat2", + "description": "unbinned fasta file from metabat2 for Gp0127631", + "url": "https://data.microbiomedata.org/data/nmdc:mga0jx8k09/MAGs/nmdc_mga0jx8k09_bins.unbinned.fa", + "md5_checksum": "7a6616d3262630c2aea2923e3c2683d0", + "id": "nmdc:7a6616d3262630c2aea2923e3c2683d0", + "file_size_bytes": 27381739 + }, + { + "name": "Gp0127631_metabat2 bin checkm quality assessment result", + "description": "metabat2 bin checkm quality assessment result for Gp0127631", + "data_object_type": "CheckM Statistics", + "url": "https://data.microbiomedata.org/data/nmdc:mga0jx8k09/MAGs/nmdc_mga0jx8k09_checkm_qa.out", + "md5_checksum": "e16dde65e7229d69949c9e2dee7e2413", + "id": "nmdc:e16dde65e7229d69949c9e2dee7e2413", + "file_size_bytes": 1085 + }, + { + "name": "Gp0127631_high-quality and medium-quality bins", + "description": "high-quality and medium-quality bins for Gp0127631", + "data_object_type": "Metagenome Bins", + "url": "https://data.microbiomedata.org/data/nmdc:mga0jx8k09/MAGs/nmdc_mga0jx8k09_hqmq_bin.zip", + "md5_checksum": "58acda197bd8136a80d5047342008cdf", + "id": "nmdc:58acda197bd8136a80d5047342008cdf", + "file_size_bytes": 182 + }, + { + "name": "Gp0127631_metabat2 bins", + "description": "metabat2 bins for Gp0127631", + "url": "https://data.microbiomedata.org/data/nmdc:mga0jx8k09/MAGs/nmdc_mga0jx8k09_metabat_bin.zip", + "md5_checksum": "8d5e2b8a8dede83c2f74182f506f9176", + "id": "nmdc:8d5e2b8a8dede83c2f74182f506f9176", + "file_size_bytes": 596616 + } + ] + } + ] + }, + { + "_id": { + "$oid": "649b009773e8249959349b41" + }, + "id": "nmdc:omprc-11-9pbab972", + "name": "Riverbed sediment microbial communities from areas with no vegetation in Columbia River, Washington, USA - GW-RW N3_30_40", + "description": "Riverbed sediment samples were collected from an area with no vegetation in a hyporheic zone (subsurface groundwater - surface water mixing zone)", + "has_input": [ + "nmdc:bsm-11-3yjh4z33" + ], + "has_output": [ + "jgi:574fde7c7ded5e3df1ee1419" + ], + "part_of": [ + "nmdc:sty-11-aygzgv51" + ], + "add_date": "2016-01-11", + "mod_date": "2021-06-15", + "ncbi_project_name": "Riverbed sediment microbial communities from areas with no vegetation in Columbia River, Washington, USA - GW-RW N3_30_40", + "omics_type": { + "has_raw_value": "Metagenome" + }, + "principal_investigator": { + "has_raw_value": "James Stegen" + }, + "processing_institution": "JGI", + "type": "nmdc:OmicsProcessing", + "gold_sequencing_project_identifiers": [ + "gold:Gp0127630" + ], + "downstream_workflow_activity_records": [ + { + "_id": { + "$oid": "649b009d6bdd4fd20273c865" + }, + "has_input": [ + "nmdc:0e737a8e36535f70bff074004ee1f9c0" + ], + "part_of": [ + "nmdc:mga09n3g47" + ], + "git_url": "https://github.com/microbiomedata/metaG/releases/tag/0.1", + "has_output": [ + "nmdc:eaffb16b5247d85c08f8af73bcb8b65e", + "nmdc:088fd18cb9169097e739289d2e5ebb13" + ], + "was_informed_by": "gold:Gp0127630", + "input_read_count": 28569382, + "output_read_bases": 4016672570, + "id": "nmdc:9fb85875014e14636fbb93d97d62f4bd", + "execution_resource": "NERSC-Cori", + "input_read_bases": 4313976682, + "name": "Read QC Activity for nmdc:mga09n3g47", + "output_read_count": 26868700, + "started_at_time": "2021-10-11T02:26:53Z", + "type": "nmdc:ReadQCAnalysisActivity", + "ended_at_time": "2021-10-11T04:54:22+00:00", + "output_data_objects": [ + { + "name": "Gp0127630_Filtered Reads", + "description": "Filtered Reads for Gp0127630", + "data_object_type": "Filtered Sequencing Reads", + "url": "https://data.microbiomedata.org/data/nmdc:mga09n3g47/qa/nmdc_mga09n3g47_filtered.fastq.gz", + "md5_checksum": "eaffb16b5247d85c08f8af73bcb8b65e", + "id": "nmdc:eaffb16b5247d85c08f8af73bcb8b65e", + "file_size_bytes": 2294158265 + }, + { + "name": "Gp0127630_Filtered Stats", + "description": "Filtered Stats for Gp0127630", + "data_object_type": "QC Statistics", + "url": "https://data.microbiomedata.org/data/nmdc:mga09n3g47/qa/nmdc_mga09n3g47_filterStats.txt", + "md5_checksum": "088fd18cb9169097e739289d2e5ebb13", + "id": "nmdc:088fd18cb9169097e739289d2e5ebb13", + "file_size_bytes": 288 + } + ] + }, + { + "_id": { + "$oid": "649b009bff710ae353f8cf27" + }, + "has_input": [ + "nmdc:eaffb16b5247d85c08f8af73bcb8b65e" + ], + "git_url": "https://github.com/microbiomedata/metaG/releases/tag/0.1", + "has_output": [ + "nmdc:ad8aa7d317d86bcd1b33e6e68a917198", + "nmdc:e5f1da9ed5be2adcd65763d387387c9f", + "nmdc:db82b41936f37bbbeaa027ffc25b58cd", + "nmdc:2f21fd19f055d1931ab82016ed781a12", + "nmdc:890f494d1dd5e130d6c1688e78f27ff2", + "nmdc:813232a3034ddb9a05efc2f2e9b78cce", + "nmdc:ef490241b537bb4c19bd5548cd7b7f6b", + "nmdc:6a7de24b01ad1c63ba6edb758e25af40", + "nmdc:fc8a855916eb1ba0f7d278b7c1f1786f" + ], + "was_informed_by": "gold:Gp0127630", + "id": "nmdc:9fb85875014e14636fbb93d97d62f4bd", + "execution_resource": "NERSC-Cori", + "name": "ReadBased Analysis Activity for nmdc:mga09n3g47", + "started_at_time": "2021-10-11T02:26:53Z", + "type": "nmdc:ReadbasedAnalysis", + "ended_at_time": "2021-10-11T04:54:22+00:00", + "output_data_objects": [ + { + "name": "Gp0127630_Gottcha2 TSV report", + "description": "Gottcha2 TSV report for Gp0127630", + "url": "https://data.microbiomedata.org/data/nmdc:mga09n3g47/ReadbasedAnalysis/nmdc_mga09n3g47_gottcha2_report.tsv", + "md5_checksum": "ad8aa7d317d86bcd1b33e6e68a917198", + "id": "nmdc:ad8aa7d317d86bcd1b33e6e68a917198", + "file_size_bytes": 3373 + }, + { + "name": "Gp0127630_Gottcha2 full TSV report", + "description": "Gottcha2 full TSV report for Gp0127630", + "url": "https://data.microbiomedata.org/data/nmdc:mga09n3g47/ReadbasedAnalysis/nmdc_mga09n3g47_gottcha2_report_full.tsv", + "md5_checksum": "e5f1da9ed5be2adcd65763d387387c9f", + "id": "nmdc:e5f1da9ed5be2adcd65763d387387c9f", + "file_size_bytes": 791488 + }, + { + "name": "Gp0127630_Gottcha2 Krona HTML report", + "description": "Gottcha2 Krona HTML report for Gp0127630", + "data_object_type": "GOTTCHA2 Krona Plot", + "url": "https://data.microbiomedata.org/data/nmdc:mga09n3g47/ReadbasedAnalysis/nmdc_mga09n3g47_gottcha2_krona.html", + "md5_checksum": "db82b41936f37bbbeaa027ffc25b58cd", + "id": "nmdc:db82b41936f37bbbeaa027ffc25b58cd", + "file_size_bytes": 235803 + }, + { + "name": "Gp0127630_Centrifuge classification TSV report", + "description": "Centrifuge classification TSV report for Gp0127630", + "data_object_type": "Centrifuge Taxonomic Classification", + "url": "https://data.microbiomedata.org/data/nmdc:mga09n3g47/ReadbasedAnalysis/nmdc_mga09n3g47_centrifuge_classification.tsv", + "md5_checksum": "2f21fd19f055d1931ab82016ed781a12", + "id": "nmdc:2f21fd19f055d1931ab82016ed781a12", + "file_size_bytes": 1974171566 + }, + { + "name": "Gp0127630_Centrifuge TSV report", + "description": "Centrifuge TSV report for Gp0127630", + "data_object_type": "Centrifuge Classification Report", + "url": "https://data.microbiomedata.org/data/nmdc:mga09n3g47/ReadbasedAnalysis/nmdc_mga09n3g47_centrifuge_report.tsv", + "md5_checksum": "890f494d1dd5e130d6c1688e78f27ff2", + "id": "nmdc:890f494d1dd5e130d6c1688e78f27ff2", + "file_size_bytes": 255012 + }, + { + "name": "Gp0127630_Centrifuge Krona HTML report", + "description": "Centrifuge Krona HTML report for Gp0127630", + "data_object_type": "Centrifuge Krona Plot", + "url": "https://data.microbiomedata.org/data/nmdc:mga09n3g47/ReadbasedAnalysis/nmdc_mga09n3g47_centrifuge_krona.html", + "md5_checksum": "813232a3034ddb9a05efc2f2e9b78cce", + "id": "nmdc:813232a3034ddb9a05efc2f2e9b78cce", + "file_size_bytes": 2330430 + }, + { + "name": "Gp0127630_Kraken classification TSV report", + "description": "Kraken classification TSV report for Gp0127630", + "data_object_type": "Kraken2 Taxonomic Classification", + "url": "https://data.microbiomedata.org/data/nmdc:mga09n3g47/ReadbasedAnalysis/nmdc_mga09n3g47_kraken2_classification.tsv", + "md5_checksum": "ef490241b537bb4c19bd5548cd7b7f6b", + "id": "nmdc:ef490241b537bb4c19bd5548cd7b7f6b", + "file_size_bytes": 1584744477 + }, + { + "name": "Gp0127630_Kraken2 TSV report", + "description": "Kraken2 TSV report for Gp0127630", + "data_object_type": "Kraken2 Classification Report", + "url": "https://data.microbiomedata.org/data/nmdc:mga09n3g47/ReadbasedAnalysis/nmdc_mga09n3g47_kraken2_report.tsv", + "md5_checksum": "6a7de24b01ad1c63ba6edb758e25af40", + "id": "nmdc:6a7de24b01ad1c63ba6edb758e25af40", + "file_size_bytes": 650172 + }, + { + "name": "Gp0127630_Kraken2 Krona HTML report", + "description": "Kraken2 Krona HTML report for Gp0127630", + "data_object_type": "Kraken2 Krona Plot", + "url": "https://data.microbiomedata.org/data/nmdc:mga09n3g47/ReadbasedAnalysis/nmdc_mga09n3g47_kraken2_krona.html", + "md5_checksum": "fc8a855916eb1ba0f7d278b7c1f1786f", + "id": "nmdc:fc8a855916eb1ba0f7d278b7c1f1786f", + "file_size_bytes": 3962195 + } + ] + }, + { + "_id": { + "$oid": "61e71939833bcf838a6fff09" + }, + "has_input": [ + "nmdc:eaffb16b5247d85c08f8af73bcb8b65e" + ], + "part_of": [ + "nmdc:mga09n3g47" + ], + "git_url": "https://github.com/microbiomedata/metaG/releases/tag/0.1", + "has_output": [ + "nmdc:ad8aa7d317d86bcd1b33e6e68a917198", + "nmdc:e5f1da9ed5be2adcd65763d387387c9f", + "nmdc:db82b41936f37bbbeaa027ffc25b58cd", + "nmdc:2f21fd19f055d1931ab82016ed781a12", + "nmdc:890f494d1dd5e130d6c1688e78f27ff2", + "nmdc:813232a3034ddb9a05efc2f2e9b78cce", + "nmdc:ef490241b537bb4c19bd5548cd7b7f6b", + "nmdc:6a7de24b01ad1c63ba6edb758e25af40", + "nmdc:fc8a855916eb1ba0f7d278b7c1f1786f" + ], + "was_informed_by": "gold:Gp0127630", + "id": "nmdc:9fb85875014e14636fbb93d97d62f4bd", + "execution_resource": "NERSC-Cori", + "name": "ReadBased Analysis Activity for nmdc:mga09n3g47", + "started_at_time": "2021-10-11T02:26:53Z", + "type": "nmdc:ReadbasedAnalysis", + "ended_at_time": "2021-10-11T04:54:22+00:00", + "output_data_objects": [ + { + "name": "Gp0127630_Gottcha2 TSV report", + "description": "Gottcha2 TSV report for Gp0127630", + "url": "https://data.microbiomedata.org/data/nmdc:mga09n3g47/ReadbasedAnalysis/nmdc_mga09n3g47_gottcha2_report.tsv", + "md5_checksum": "ad8aa7d317d86bcd1b33e6e68a917198", + "id": "nmdc:ad8aa7d317d86bcd1b33e6e68a917198", + "file_size_bytes": 3373 + }, + { + "name": "Gp0127630_Gottcha2 full TSV report", + "description": "Gottcha2 full TSV report for Gp0127630", + "url": "https://data.microbiomedata.org/data/nmdc:mga09n3g47/ReadbasedAnalysis/nmdc_mga09n3g47_gottcha2_report_full.tsv", + "md5_checksum": "e5f1da9ed5be2adcd65763d387387c9f", + "id": "nmdc:e5f1da9ed5be2adcd65763d387387c9f", + "file_size_bytes": 791488 + }, + { + "name": "Gp0127630_Gottcha2 Krona HTML report", + "description": "Gottcha2 Krona HTML report for Gp0127630", + "data_object_type": "GOTTCHA2 Krona Plot", + "url": "https://data.microbiomedata.org/data/nmdc:mga09n3g47/ReadbasedAnalysis/nmdc_mga09n3g47_gottcha2_krona.html", + "md5_checksum": "db82b41936f37bbbeaa027ffc25b58cd", + "id": "nmdc:db82b41936f37bbbeaa027ffc25b58cd", + "file_size_bytes": 235803 + }, + { + "name": "Gp0127630_Centrifuge classification TSV report", + "description": "Centrifuge classification TSV report for Gp0127630", + "data_object_type": "Centrifuge Taxonomic Classification", + "url": "https://data.microbiomedata.org/data/nmdc:mga09n3g47/ReadbasedAnalysis/nmdc_mga09n3g47_centrifuge_classification.tsv", + "md5_checksum": "2f21fd19f055d1931ab82016ed781a12", + "id": "nmdc:2f21fd19f055d1931ab82016ed781a12", + "file_size_bytes": 1974171566 + }, + { + "name": "Gp0127630_Centrifuge TSV report", + "description": "Centrifuge TSV report for Gp0127630", + "data_object_type": "Centrifuge Classification Report", + "url": "https://data.microbiomedata.org/data/nmdc:mga09n3g47/ReadbasedAnalysis/nmdc_mga09n3g47_centrifuge_report.tsv", + "md5_checksum": "890f494d1dd5e130d6c1688e78f27ff2", + "id": "nmdc:890f494d1dd5e130d6c1688e78f27ff2", + "file_size_bytes": 255012 + }, + { + "name": "Gp0127630_Centrifuge Krona HTML report", + "description": "Centrifuge Krona HTML report for Gp0127630", + "data_object_type": "Centrifuge Krona Plot", + "url": "https://data.microbiomedata.org/data/nmdc:mga09n3g47/ReadbasedAnalysis/nmdc_mga09n3g47_centrifuge_krona.html", + "md5_checksum": "813232a3034ddb9a05efc2f2e9b78cce", + "id": "nmdc:813232a3034ddb9a05efc2f2e9b78cce", + "file_size_bytes": 2330430 + }, + { + "name": "Gp0127630_Kraken classification TSV report", + "description": "Kraken classification TSV report for Gp0127630", + "data_object_type": "Kraken2 Taxonomic Classification", + "url": "https://data.microbiomedata.org/data/nmdc:mga09n3g47/ReadbasedAnalysis/nmdc_mga09n3g47_kraken2_classification.tsv", + "md5_checksum": "ef490241b537bb4c19bd5548cd7b7f6b", + "id": "nmdc:ef490241b537bb4c19bd5548cd7b7f6b", + "file_size_bytes": 1584744477 + }, + { + "name": "Gp0127630_Kraken2 TSV report", + "description": "Kraken2 TSV report for Gp0127630", + "data_object_type": "Kraken2 Classification Report", + "url": "https://data.microbiomedata.org/data/nmdc:mga09n3g47/ReadbasedAnalysis/nmdc_mga09n3g47_kraken2_report.tsv", + "md5_checksum": "6a7de24b01ad1c63ba6edb758e25af40", + "id": "nmdc:6a7de24b01ad1c63ba6edb758e25af40", + "file_size_bytes": 650172 + }, + { + "name": "Gp0127630_Kraken2 Krona HTML report", + "description": "Kraken2 Krona HTML report for Gp0127630", + "data_object_type": "Kraken2 Krona Plot", + "url": "https://data.microbiomedata.org/data/nmdc:mga09n3g47/ReadbasedAnalysis/nmdc_mga09n3g47_kraken2_krona.html", + "md5_checksum": "fc8a855916eb1ba0f7d278b7c1f1786f", + "id": "nmdc:fc8a855916eb1ba0f7d278b7c1f1786f", + "file_size_bytes": 3962195 + } + ] + }, + { + "_id": { + "$oid": "649b005f2ca5ee4adb139f95" + }, + "has_input": [ + "nmdc:eaffb16b5247d85c08f8af73bcb8b65e" + ], + "part_of": [ + "nmdc:mga09n3g47" + ], + "ctg_logsum": 77070, + "scaf_logsum": 77428, + "gap_pct": 0.00093, + "git_url": "https://github.com/microbiomedata/metaG/releases/tag/0.1", + "has_output": [ + "nmdc:7b35237c97a75f17ba74be0fe96416c9", + "nmdc:118dd6190bdaf127d3c105cc73012cc3", + "nmdc:9e129133978cb4c4cc4bae9fc28a8a49", + "nmdc:33d86c437a046031ea2b4bed5a2d2d6b", + "nmdc:873f16e03e0f94c9ec28573fb10ad6d8" + ], + "asm_score": 6.312, + "was_informed_by": "gold:Gp0127630", + "ctg_powsum": 8755.579, + "scaf_max": 31136, + "id": "nmdc:9fb85875014e14636fbb93d97d62f4bd", + "scaf_powsum": 8795.268, + "execution_resource": "NERSC-Cori", + "contigs": 127321, + "name": "Assembly Activity for nmdc:mga09n3g47", + "ctg_max": 31136, + "gc_std": 0.09346, + "contig_bp": 52740992, + "gc_avg": 0.61288, + "started_at_time": "2021-10-11T02:26:53Z", + "scaf_bp": 52741482, + "type": "nmdc:MetagenomeAssembly", + "scaffolds": 127272, + "ended_at_time": "2021-10-11T04:54:22+00:00", + "ctg_l50": 372, + "ctg_l90": 284, + "ctg_n50": 41888, + "ctg_n90": 110882, + "scaf_l50": 372, + "scaf_l90": 284, + "scaf_n50": 41856, + "scaf_n90": 110834, + "output_data_objects": [ + { + "name": "Gp0127630_Assembled contigs fasta", + "description": "Assembled contigs fasta for Gp0127630", + "data_object_type": "Assembly Contigs", + "url": "https://data.microbiomedata.org/data/nmdc:mga09n3g47/assembly/nmdc_mga09n3g47_contigs.fna", + "md5_checksum": "7b35237c97a75f17ba74be0fe96416c9", + "id": "nmdc:7b35237c97a75f17ba74be0fe96416c9", + "file_size_bytes": 57511432 + }, + { + "name": "Gp0127630_Assembled scaffold fasta", + "description": "Assembled scaffold fasta for Gp0127630", + "data_object_type": "Assembly Scaffolds", + "url": "https://data.microbiomedata.org/data/nmdc:mga09n3g47/assembly/nmdc_mga09n3g47_scaffolds.fna", + "md5_checksum": "118dd6190bdaf127d3c105cc73012cc3", + "id": "nmdc:118dd6190bdaf127d3c105cc73012cc3", + "file_size_bytes": 57128690 + }, + { + "name": "Gp0127630_Metagenome Contig Coverage Stats", + "description": "Metagenome Contig Coverage Stats for Gp0127630", + "url": "https://data.microbiomedata.org/data/nmdc:mga09n3g47/assembly/nmdc_mga09n3g47_covstats.txt", + "md5_checksum": "9e129133978cb4c4cc4bae9fc28a8a49", + "id": "nmdc:9e129133978cb4c4cc4bae9fc28a8a49", + "file_size_bytes": 10020081 + }, + { + "name": "Gp0127630_Assembled AGP file", + "description": "Assembled AGP file for Gp0127630", + "data_object_type": "Assembly AGP", + "url": "https://data.microbiomedata.org/data/nmdc:mga09n3g47/assembly/nmdc_mga09n3g47_assembly.agp", + "md5_checksum": "33d86c437a046031ea2b4bed5a2d2d6b", + "id": "nmdc:33d86c437a046031ea2b4bed5a2d2d6b", + "file_size_bytes": 9337675 + }, + { + "name": "Gp0127630_Metagenome Alignment BAM file", + "description": "Metagenome Alignment BAM file for Gp0127630", + "data_object_type": "Assembly Coverage BAM", + "url": "https://data.microbiomedata.org/data/nmdc:mga09n3g47/assembly/nmdc_mga09n3g47_pairedMapped_sorted.bam", + "md5_checksum": "873f16e03e0f94c9ec28573fb10ad6d8", + "id": "nmdc:873f16e03e0f94c9ec28573fb10ad6d8", + "file_size_bytes": 2461822274 + } + ] + }, + { + "_id": { + "$oid": "649b005bbf2caae0415ef9ad" + }, + "has_input": [ + "nmdc:7b35237c97a75f17ba74be0fe96416c9" + ], + "part_of": [ + "nmdc:mga09n3g47" + ], + "git_url": "https://github.com/microbiomedata/metaG/releases/tag/0.1", + "has_output": [ + "nmdc:f7735eb161908954feda34285993f1b9", + "nmdc:c6053080461e8cc0bbadd13e0775e108", + "nmdc:4878e3d5a95e67c0bb81da53e03400be", + "nmdc:dbc4d4e179a86aa95211de3e62219191", + "nmdc:5bdd96be3fbc888969d92c2ed6392846", + "nmdc:78026e2afc7644463828fbbfa4d8d727", + "nmdc:ef99a9afe80e1acc086694ca8ab4cca7", + "nmdc:f949efd8a6b6affb4707a4314980e86e", + "nmdc:2f9f0b8164c35117da1e121e63ad772f", + "nmdc:1e3d433d3cb308d086dec26916b6b1bf", + "nmdc:d467bd6407a5a41798aa84df69a4a31d", + "nmdc:4cb3db8f0ff98bf805f4750af65eb9d1" + ], + "was_informed_by": "gold:Gp0127630", + "id": "nmdc:9fb85875014e14636fbb93d97d62f4bd", + "execution_resource": "NERSC-Cori", + "name": "Annotation Activity for nmdc:mga09n3g47", + "started_at_time": "2021-10-11T02:26:53Z", + "type": "nmdc:MetagenomeAnnotationActivity", + "ended_at_time": "2021-10-11T04:54:22+00:00", + "output_data_objects": [ + { + "name": "Gp0127630_Protein FAA", + "description": "Protein FAA for Gp0127630", + "data_object_type": "Annotation Amino Acid FASTA", + "url": "https://data.microbiomedata.org/data/nmdc:mga09n3g47/annotation/nmdc_mga09n3g47_proteins.faa", + "md5_checksum": "f7735eb161908954feda34285993f1b9", + "id": "nmdc:f7735eb161908954feda34285993f1b9", + "file_size_bytes": 34246728 + }, + { + "name": "Gp0127630_Structural annotation GFF file", + "description": "Structural annotation GFF file for Gp0127630", + "data_object_type": "Structural Annotation GFF", + "url": "https://data.microbiomedata.org/data/nmdc:mga09n3g47/annotation/nmdc_mga09n3g47_structural_annotation.gff", + "md5_checksum": "c6053080461e8cc0bbadd13e0775e108", + "id": "nmdc:c6053080461e8cc0bbadd13e0775e108", + "file_size_bytes": 2515 + }, + { + "name": "Gp0127630_Functional annotation GFF file", + "description": "Functional annotation GFF file for Gp0127630", + "data_object_type": "Functional Annotation GFF", + "url": "https://data.microbiomedata.org/data/nmdc:mga09n3g47/annotation/nmdc_mga09n3g47_functional_annotation.gff", + "md5_checksum": "4878e3d5a95e67c0bb81da53e03400be", + "id": "nmdc:4878e3d5a95e67c0bb81da53e03400be", + "file_size_bytes": 40345940 + }, + { + "name": "Gp0127630_KO TSV file", + "description": "KO TSV file for Gp0127630", + "data_object_type": "Annotation KEGG Orthology", + "url": "https://data.microbiomedata.org/data/nmdc:mga09n3g47/annotation/nmdc_mga09n3g47_ko.tsv", + "md5_checksum": "dbc4d4e179a86aa95211de3e62219191", + "id": "nmdc:dbc4d4e179a86aa95211de3e62219191", + "file_size_bytes": 4543233 + }, + { + "name": "Gp0127630_EC TSV file", + "description": "EC TSV file for Gp0127630", + "data_object_type": "Annotation Enzyme Commission", + "url": "https://data.microbiomedata.org/data/nmdc:mga09n3g47/annotation/nmdc_mga09n3g47_ec.tsv", + "md5_checksum": "5bdd96be3fbc888969d92c2ed6392846", + "id": "nmdc:5bdd96be3fbc888969d92c2ed6392846", + "file_size_bytes": 3027431 + }, + { + "name": "Gp0127630_COG GFF file", + "description": "COG GFF file for Gp0127630", + "url": "https://data.microbiomedata.org/data/nmdc:mga09n3g47/annotation/nmdc_mga09n3g47_cog.gff", + "md5_checksum": "78026e2afc7644463828fbbfa4d8d727", + "id": "nmdc:78026e2afc7644463828fbbfa4d8d727", + "file_size_bytes": 23085097 + }, + { + "name": "Gp0127630_PFAM GFF file", + "description": "PFAM GFF file for Gp0127630", + "url": "https://data.microbiomedata.org/data/nmdc:mga09n3g47/annotation/nmdc_mga09n3g47_pfam.gff", + "md5_checksum": "ef99a9afe80e1acc086694ca8ab4cca7", + "id": "nmdc:ef99a9afe80e1acc086694ca8ab4cca7", + "file_size_bytes": 16769237 + }, + { + "name": "Gp0127630_TigrFam GFF file", + "description": "TigrFam GFF file for Gp0127630", + "url": "https://data.microbiomedata.org/data/nmdc:mga09n3g47/annotation/nmdc_mga09n3g47_tigrfam.gff", + "md5_checksum": "f949efd8a6b6affb4707a4314980e86e", + "id": "nmdc:f949efd8a6b6affb4707a4314980e86e", + "file_size_bytes": 1710760 + }, + { + "name": "Gp0127630_SMART GFF file", + "description": "SMART GFF file for Gp0127630", + "url": "https://data.microbiomedata.org/data/nmdc:mga09n3g47/annotation/nmdc_mga09n3g47_smart.gff", + "md5_checksum": "2f9f0b8164c35117da1e121e63ad772f", + "id": "nmdc:2f9f0b8164c35117da1e121e63ad772f", + "file_size_bytes": 5166448 + }, + { + "name": "Gp0127630_SuperFam GFF file", + "description": "SuperFam GFF file for Gp0127630", + "url": "https://data.microbiomedata.org/data/nmdc:mga09n3g47/annotation/nmdc_mga09n3g47_supfam.gff", + "md5_checksum": "1e3d433d3cb308d086dec26916b6b1bf", + "id": "nmdc:1e3d433d3cb308d086dec26916b6b1bf", + "file_size_bytes": 29155547 + }, + { + "name": "Gp0127630_Cath FunFam GFF file", + "description": "Cath FunFam GFF file for Gp0127630", + "url": "https://data.microbiomedata.org/data/nmdc:mga09n3g47/annotation/nmdc_mga09n3g47_cath_funfam.gff", + "md5_checksum": "d467bd6407a5a41798aa84df69a4a31d", + "id": "nmdc:d467bd6407a5a41798aa84df69a4a31d", + "file_size_bytes": 21679406 + }, + { + "name": "Gp0127630_KO_EC GFF file", + "description": "KO_EC GFF file for Gp0127630", + "url": "https://data.microbiomedata.org/data/nmdc:mga09n3g47/annotation/nmdc_mga09n3g47_ko_ec.gff", + "md5_checksum": "4cb3db8f0ff98bf805f4750af65eb9d1", + "id": "nmdc:4cb3db8f0ff98bf805f4750af65eb9d1", + "file_size_bytes": 14461252 + } + ] + }, + { + "_id": { + "$oid": "649b0052ec087f6bbab34707" + }, + "has_input": [ + "nmdc:7b35237c97a75f17ba74be0fe96416c9", + "nmdc:873f16e03e0f94c9ec28573fb10ad6d8", + "nmdc:4878e3d5a95e67c0bb81da53e03400be" + ], + "too_short_contig_num": 123771, + "part_of": [ + "nmdc:mga09n3g47" + ], + "binned_contig_num": 313, + "git_url": "https://github.com/microbiomedata/metaG/releases/tag/0.1", + "has_output": [ + "nmdc:d41d8cd98f00b204e9800998ecf8427e", + "nmdc:ce09d99bdfdf0379b09a3ae75c65d830", + "nmdc:acd651395108c71dd20eeebf9b177d06", + "nmdc:850a6fbbd2993f4dfeb5a40485e67f8e", + "nmdc:287529453d35eab4acb72032a59994d0", + "nmdc:4ad58f05545a75edc1b933a0b0286d16" + ], + "was_informed_by": "gold:Gp0127630", + "input_contig_num": 127321, + "id": "nmdc:9fb85875014e14636fbb93d97d62f4bd", + "execution_resource": "NERSC-Cori", + "name": "MAGs Analysis Activity for nmdc:mga09n3g47", + "mags_list": [ + { + "number_of_contig": 86, + "completeness": 19.9, + "bin_name": "bins.1", + "gene_count": 422, + "bin_quality": "LQ", + "gtdbtk_species": "", + "gtdbtk_order": "", + "num_16s": 0, + "gtdbtk_family": "", + "gtdbtk_domain": "", + "contamination": 0.0, + "gtdbtk_class": "", + "gtdbtk_phylum": "", + "num_5s": 0, + "num_23s": 0, + "gtdbtk_genus": "", + "num_t_rna": 7 + }, + { + "number_of_contig": 227, + "completeness": 70.23, + "bin_name": "bins.2", + "gene_count": 1932, + "bin_quality": "MQ", + "gtdbtk_species": "", + "gtdbtk_order": "Nitrososphaerales", + "num_16s": 0, + "gtdbtk_family": "Nitrososphaeraceae", + "gtdbtk_domain": "Archaea", + "contamination": 1.94, + "gtdbtk_class": "Nitrososphaeria", + "gtdbtk_phylum": "Crenarchaeota", + "num_5s": 1, + "num_23s": 0, + "gtdbtk_genus": "", + "num_t_rna": 35 + } + ], + "unbinned_contig_num": 3237, + "started_at_time": "2021-10-11T02:26:53Z", + "type": "nmdc:MAGsAnalysisActivity", + "ended_at_time": "2021-10-11T04:54:22+00:00", + "output_data_objects": [ + { + "name": "gold:Gp0452679_metabat2 bin checkm quality assessment result", + "description": "metabat2 bin checkm quality assessment result for gold:Gp0452679", + "data_object_type": "CheckM Statistics", + "url": "https://data.microbiomedata.org/data/nmdc:mga0fsjy84/MAGs/nmdc_mga0fsjy84_checkm_qa.out", + "md5_checksum": "d41d8cd98f00b204e9800998ecf8427e", + "id": "nmdc:d41d8cd98f00b204e9800998ecf8427e", + "file_size_bytes": 0 + }, + { + "name": "Gp0127630_tooShort (< 3kb) filtered contigs fasta file by metaBat2", + "description": "tooShort (< 3kb) filtered contigs fasta file by metaBat2 for Gp0127630", + "url": "https://data.microbiomedata.org/data/nmdc:mga09n3g47/MAGs/nmdc_mga09n3g47_bins.tooShort.fa", + "md5_checksum": "ce09d99bdfdf0379b09a3ae75c65d830", + "id": "nmdc:ce09d99bdfdf0379b09a3ae75c65d830", + "file_size_bytes": 50450286 + }, + { + "name": "Gp0127630_unbinned fasta file from metabat2", + "description": "unbinned fasta file from metabat2 for Gp0127630", + "url": "https://data.microbiomedata.org/data/nmdc:mga09n3g47/MAGs/nmdc_mga09n3g47_bins.unbinned.fa", + "md5_checksum": "acd651395108c71dd20eeebf9b177d06", + "id": "nmdc:acd651395108c71dd20eeebf9b177d06", + "file_size_bytes": 5114463 + }, + { + "name": "Gp0127630_metabat2 bin checkm quality assessment result", + "description": "metabat2 bin checkm quality assessment result for Gp0127630", + "data_object_type": "CheckM Statistics", + "url": "https://data.microbiomedata.org/data/nmdc:mga09n3g47/MAGs/nmdc_mga09n3g47_checkm_qa.out", + "md5_checksum": "850a6fbbd2993f4dfeb5a40485e67f8e", + "id": "nmdc:850a6fbbd2993f4dfeb5a40485e67f8e", + "file_size_bytes": 948 + }, + { + "name": "Gp0127630_high-quality and medium-quality bins", + "description": "high-quality and medium-quality bins for Gp0127630", + "data_object_type": "Metagenome Bins", + "url": "https://data.microbiomedata.org/data/nmdc:mga09n3g47/MAGs/nmdc_mga09n3g47_hqmq_bin.zip", + "md5_checksum": "287529453d35eab4acb72032a59994d0", + "id": "nmdc:287529453d35eab4acb72032a59994d0", + "file_size_bytes": 484667 + }, + { + "name": "Gp0127630_metabat2 bins", + "description": "metabat2 bins for Gp0127630", + "url": "https://data.microbiomedata.org/data/nmdc:mga09n3g47/MAGs/nmdc_mga09n3g47_metabat_bin.zip", + "md5_checksum": "4ad58f05545a75edc1b933a0b0286d16", + "id": "nmdc:4ad58f05545a75edc1b933a0b0286d16", + "file_size_bytes": 110526 + } + ] + } + ] + }, + { + "_id": { + "$oid": "649b009773e8249959349b42" + }, + "id": "nmdc:omprc-11-0g415160", + "name": "Riverbed sediment microbial communities from areas with vegetation nearby in Columbia River, Washington, USA - GW-RW S1_30_40", + "description": "Riverbed sediment samples were collected from an area with a lot of surrounding vegetation in a hyporheic zone (subsurface groundwater - surface water mixing zone)", + "has_input": [ + "nmdc:bsm-11-nbgp1x53" + ], + "has_output": [ + "jgi:574fde5e7ded5e3df1ee1401" + ], + "part_of": [ + "nmdc:sty-11-aygzgv51" + ], + "add_date": "2016-01-11", + "mod_date": "2021-06-15", + "ncbi_project_name": "Riverbed sediment microbial communities from areas with vegetation nearby in Columbia River, Washington, USA - GW-RW S1_30_40", + "omics_type": { + "has_raw_value": "Metagenome" + }, + "principal_investigator": { + "has_raw_value": "James Stegen" + }, + "processing_institution": "JGI", + "type": "nmdc:OmicsProcessing", + "gold_sequencing_project_identifiers": [ + "gold:Gp0127633" + ], + "downstream_workflow_activity_records": [ + { + "_id": { + "$oid": "649b009d6bdd4fd20273c867" + }, + "has_input": [ + "nmdc:c0b8d6516c48cfe5a0b110abe67ee983" + ], + "part_of": [ + "nmdc:mga05zvf81" + ], + "git_url": "https://github.com/microbiomedata/metaG/releases/tag/0.1", + "has_output": [ + "nmdc:7cbd497624d8b60ab2a5e7fdbe4730f2", + "nmdc:eccf0501d08f920a88b6598d573a8e3e" + ], + "was_informed_by": "gold:Gp0127633", + "input_read_count": 23291434, + "output_read_bases": 3367024367, + "id": "nmdc:c88a6f4e80cd6159dbbdeaeffbc28f55", + "execution_resource": "NERSC-Cori", + "input_read_bases": 3517006534, + "name": "Read QC Activity for nmdc:mga05zvf81", + "output_read_count": 22556158, + "started_at_time": "2021-10-11T02:24:58Z", + "type": "nmdc:ReadQCAnalysisActivity", + "ended_at_time": "2021-10-11T03:40:06+00:00", + "output_data_objects": [ + { + "name": "Gp0127633_Filtered Reads", + "description": "Filtered Reads for Gp0127633", + "data_object_type": "Filtered Sequencing Reads", + "url": "https://data.microbiomedata.org/data/nmdc:mga05zvf81/qa/nmdc_mga05zvf81_filtered.fastq.gz", + "md5_checksum": "7cbd497624d8b60ab2a5e7fdbe4730f2", + "id": "nmdc:7cbd497624d8b60ab2a5e7fdbe4730f2", + "file_size_bytes": 1727224362 + }, + { + "name": "Gp0127633_Filtered Stats", + "description": "Filtered Stats for Gp0127633", + "data_object_type": "QC Statistics", + "url": "https://data.microbiomedata.org/data/nmdc:mga05zvf81/qa/nmdc_mga05zvf81_filterStats.txt", + "md5_checksum": "eccf0501d08f920a88b6598d573a8e3e", + "id": "nmdc:eccf0501d08f920a88b6598d573a8e3e", + "file_size_bytes": 280 + } + ] + }, + { + "_id": { + "$oid": "649b009bff710ae353f8cf29" + }, + "has_input": [ + "nmdc:7cbd497624d8b60ab2a5e7fdbe4730f2" + ], + "git_url": "https://github.com/microbiomedata/metaG/releases/tag/0.1", + "has_output": [ + "nmdc:8bd9eb762acabbac5d079c379c28e381", + "nmdc:77351dd18ca40e5552ac1380ba94acbf", + "nmdc:f445af1a7774572d156f55a898d26f09", + "nmdc:e11fcbf66318878c05984fa3d893e3b7", + "nmdc:28beb8baabdaf346f2066b40f375a152", + "nmdc:1f74a43724c4afed5563499d05601e22", + "nmdc:4825177c6d0a8b67db82e6070cfbc35f", + "nmdc:275268a6b5aca33c427d11877bcfa674", + "nmdc:89e810af4915f0e117eaa60550587453" + ], + "was_informed_by": "gold:Gp0127633", + "id": "nmdc:c88a6f4e80cd6159dbbdeaeffbc28f55", + "execution_resource": "NERSC-Cori", + "name": "ReadBased Analysis Activity for nmdc:mga05zvf81", + "started_at_time": "2021-10-11T02:24:58Z", + "type": "nmdc:ReadbasedAnalysis", + "ended_at_time": "2021-10-11T03:40:06+00:00", + "output_data_objects": [ + { + "name": "Gp0127633_Gottcha2 TSV report", + "description": "Gottcha2 TSV report for Gp0127633", + "url": "https://data.microbiomedata.org/data/nmdc:mga05zvf81/ReadbasedAnalysis/nmdc_mga05zvf81_gottcha2_report.tsv", + "md5_checksum": "8bd9eb762acabbac5d079c379c28e381", + "id": "nmdc:8bd9eb762acabbac5d079c379c28e381", + "file_size_bytes": 875 + }, + { + "name": "Gp0127633_Gottcha2 full TSV report", + "description": "Gottcha2 full TSV report for Gp0127633", + "url": "https://data.microbiomedata.org/data/nmdc:mga05zvf81/ReadbasedAnalysis/nmdc_mga05zvf81_gottcha2_report_full.tsv", + "md5_checksum": "77351dd18ca40e5552ac1380ba94acbf", + "id": "nmdc:77351dd18ca40e5552ac1380ba94acbf", + "file_size_bytes": 578856 + }, + { + "name": "Gp0127633_Gottcha2 Krona HTML report", + "description": "Gottcha2 Krona HTML report for Gp0127633", + "data_object_type": "GOTTCHA2 Krona Plot", + "url": "https://data.microbiomedata.org/data/nmdc:mga05zvf81/ReadbasedAnalysis/nmdc_mga05zvf81_gottcha2_krona.html", + "md5_checksum": "f445af1a7774572d156f55a898d26f09", + "id": "nmdc:f445af1a7774572d156f55a898d26f09", + "file_size_bytes": 228067 + }, + { + "name": "Gp0127633_Centrifuge classification TSV report", + "description": "Centrifuge classification TSV report for Gp0127633", + "data_object_type": "Centrifuge Taxonomic Classification", + "url": "https://data.microbiomedata.org/data/nmdc:mga05zvf81/ReadbasedAnalysis/nmdc_mga05zvf81_centrifuge_classification.tsv", + "md5_checksum": "e11fcbf66318878c05984fa3d893e3b7", + "id": "nmdc:e11fcbf66318878c05984fa3d893e3b7", + "file_size_bytes": 1646942155 + }, + { + "name": "Gp0127633_Centrifuge TSV report", + "description": "Centrifuge TSV report for Gp0127633", + "data_object_type": "Centrifuge Classification Report", + "url": "https://data.microbiomedata.org/data/nmdc:mga05zvf81/ReadbasedAnalysis/nmdc_mga05zvf81_centrifuge_report.tsv", + "md5_checksum": "28beb8baabdaf346f2066b40f375a152", + "id": "nmdc:28beb8baabdaf346f2066b40f375a152", + "file_size_bytes": 252735 + }, + { + "name": "Gp0127633_Centrifuge Krona HTML report", + "description": "Centrifuge Krona HTML report for Gp0127633", + "data_object_type": "Centrifuge Krona Plot", + "url": "https://data.microbiomedata.org/data/nmdc:mga05zvf81/ReadbasedAnalysis/nmdc_mga05zvf81_centrifuge_krona.html", + "md5_checksum": "1f74a43724c4afed5563499d05601e22", + "id": "nmdc:1f74a43724c4afed5563499d05601e22", + "file_size_bytes": 2329168 + }, + { + "name": "Gp0127633_Kraken classification TSV report", + "description": "Kraken classification TSV report for Gp0127633", + "data_object_type": "Kraken2 Taxonomic Classification", + "url": "https://data.microbiomedata.org/data/nmdc:mga05zvf81/ReadbasedAnalysis/nmdc_mga05zvf81_kraken2_classification.tsv", + "md5_checksum": "4825177c6d0a8b67db82e6070cfbc35f", + "id": "nmdc:4825177c6d0a8b67db82e6070cfbc35f", + "file_size_bytes": 1310443491 + }, + { + "name": "Gp0127633_Kraken2 TSV report", + "description": "Kraken2 TSV report for Gp0127633", + "data_object_type": "Kraken2 Classification Report", + "url": "https://data.microbiomedata.org/data/nmdc:mga05zvf81/ReadbasedAnalysis/nmdc_mga05zvf81_kraken2_report.tsv", + "md5_checksum": "275268a6b5aca33c427d11877bcfa674", + "id": "nmdc:275268a6b5aca33c427d11877bcfa674", + "file_size_bytes": 621441 + }, + { + "name": "Gp0127633_Kraken2 Krona HTML report", + "description": "Kraken2 Krona HTML report for Gp0127633", + "data_object_type": "Kraken2 Krona Plot", + "url": "https://data.microbiomedata.org/data/nmdc:mga05zvf81/ReadbasedAnalysis/nmdc_mga05zvf81_kraken2_krona.html", + "md5_checksum": "89e810af4915f0e117eaa60550587453", + "id": "nmdc:89e810af4915f0e117eaa60550587453", + "file_size_bytes": 3891844 + } + ] + }, + { + "_id": { + "$oid": "61e7193c833bcf838a70001a" + }, + "has_input": [ + "nmdc:7cbd497624d8b60ab2a5e7fdbe4730f2" + ], + "part_of": [ + "nmdc:mga05zvf81" + ], + "git_url": "https://github.com/microbiomedata/metaG/releases/tag/0.1", + "has_output": [ + "nmdc:8bd9eb762acabbac5d079c379c28e381", + "nmdc:77351dd18ca40e5552ac1380ba94acbf", + "nmdc:f445af1a7774572d156f55a898d26f09", + "nmdc:e11fcbf66318878c05984fa3d893e3b7", + "nmdc:28beb8baabdaf346f2066b40f375a152", + "nmdc:1f74a43724c4afed5563499d05601e22", + "nmdc:4825177c6d0a8b67db82e6070cfbc35f", + "nmdc:275268a6b5aca33c427d11877bcfa674", + "nmdc:89e810af4915f0e117eaa60550587453" + ], + "was_informed_by": "gold:Gp0127633", + "id": "nmdc:c88a6f4e80cd6159dbbdeaeffbc28f55", + "execution_resource": "NERSC-Cori", + "name": "ReadBased Analysis Activity for nmdc:mga05zvf81", + "started_at_time": "2021-10-11T02:24:58Z", + "type": "nmdc:ReadbasedAnalysis", + "ended_at_time": "2021-10-11T03:40:06+00:00", + "output_data_objects": [ + { + "name": "Gp0127633_Gottcha2 TSV report", + "description": "Gottcha2 TSV report for Gp0127633", + "url": "https://data.microbiomedata.org/data/nmdc:mga05zvf81/ReadbasedAnalysis/nmdc_mga05zvf81_gottcha2_report.tsv", + "md5_checksum": "8bd9eb762acabbac5d079c379c28e381", + "id": "nmdc:8bd9eb762acabbac5d079c379c28e381", + "file_size_bytes": 875 + }, + { + "name": "Gp0127633_Gottcha2 full TSV report", + "description": "Gottcha2 full TSV report for Gp0127633", + "url": "https://data.microbiomedata.org/data/nmdc:mga05zvf81/ReadbasedAnalysis/nmdc_mga05zvf81_gottcha2_report_full.tsv", + "md5_checksum": "77351dd18ca40e5552ac1380ba94acbf", + "id": "nmdc:77351dd18ca40e5552ac1380ba94acbf", + "file_size_bytes": 578856 + }, + { + "name": "Gp0127633_Gottcha2 Krona HTML report", + "description": "Gottcha2 Krona HTML report for Gp0127633", + "data_object_type": "GOTTCHA2 Krona Plot", + "url": "https://data.microbiomedata.org/data/nmdc:mga05zvf81/ReadbasedAnalysis/nmdc_mga05zvf81_gottcha2_krona.html", + "md5_checksum": "f445af1a7774572d156f55a898d26f09", + "id": "nmdc:f445af1a7774572d156f55a898d26f09", + "file_size_bytes": 228067 + }, + { + "name": "Gp0127633_Centrifuge classification TSV report", + "description": "Centrifuge classification TSV report for Gp0127633", + "data_object_type": "Centrifuge Taxonomic Classification", + "url": "https://data.microbiomedata.org/data/nmdc:mga05zvf81/ReadbasedAnalysis/nmdc_mga05zvf81_centrifuge_classification.tsv", + "md5_checksum": "e11fcbf66318878c05984fa3d893e3b7", + "id": "nmdc:e11fcbf66318878c05984fa3d893e3b7", + "file_size_bytes": 1646942155 + }, + { + "name": "Gp0127633_Centrifuge TSV report", + "description": "Centrifuge TSV report for Gp0127633", + "data_object_type": "Centrifuge Classification Report", + "url": "https://data.microbiomedata.org/data/nmdc:mga05zvf81/ReadbasedAnalysis/nmdc_mga05zvf81_centrifuge_report.tsv", + "md5_checksum": "28beb8baabdaf346f2066b40f375a152", + "id": "nmdc:28beb8baabdaf346f2066b40f375a152", + "file_size_bytes": 252735 + }, + { + "name": "Gp0127633_Centrifuge Krona HTML report", + "description": "Centrifuge Krona HTML report for Gp0127633", + "data_object_type": "Centrifuge Krona Plot", + "url": "https://data.microbiomedata.org/data/nmdc:mga05zvf81/ReadbasedAnalysis/nmdc_mga05zvf81_centrifuge_krona.html", + "md5_checksum": "1f74a43724c4afed5563499d05601e22", + "id": "nmdc:1f74a43724c4afed5563499d05601e22", + "file_size_bytes": 2329168 + }, + { + "name": "Gp0127633_Kraken classification TSV report", + "description": "Kraken classification TSV report for Gp0127633", + "data_object_type": "Kraken2 Taxonomic Classification", + "url": "https://data.microbiomedata.org/data/nmdc:mga05zvf81/ReadbasedAnalysis/nmdc_mga05zvf81_kraken2_classification.tsv", + "md5_checksum": "4825177c6d0a8b67db82e6070cfbc35f", + "id": "nmdc:4825177c6d0a8b67db82e6070cfbc35f", + "file_size_bytes": 1310443491 + }, + { + "name": "Gp0127633_Kraken2 TSV report", + "description": "Kraken2 TSV report for Gp0127633", + "data_object_type": "Kraken2 Classification Report", + "url": "https://data.microbiomedata.org/data/nmdc:mga05zvf81/ReadbasedAnalysis/nmdc_mga05zvf81_kraken2_report.tsv", + "md5_checksum": "275268a6b5aca33c427d11877bcfa674", + "id": "nmdc:275268a6b5aca33c427d11877bcfa674", + "file_size_bytes": 621441 + }, + { + "name": "Gp0127633_Kraken2 Krona HTML report", + "description": "Kraken2 Krona HTML report for Gp0127633", + "data_object_type": "Kraken2 Krona Plot", + "url": "https://data.microbiomedata.org/data/nmdc:mga05zvf81/ReadbasedAnalysis/nmdc_mga05zvf81_kraken2_krona.html", + "md5_checksum": "89e810af4915f0e117eaa60550587453", + "id": "nmdc:89e810af4915f0e117eaa60550587453", + "file_size_bytes": 3891844 + } + ] + }, + { + "_id": { + "$oid": "649b005f2ca5ee4adb139f97" + }, + "has_input": [ + "nmdc:7cbd497624d8b60ab2a5e7fdbe4730f2" + ], + "part_of": [ + "nmdc:mga05zvf81" + ], + "ctg_logsum": 378958, + "scaf_logsum": 380592, + "gap_pct": 0.00189, + "git_url": "https://github.com/microbiomedata/metaG/releases/tag/0.1", + "has_output": [ + "nmdc:ea5ca9478871b3e2600e1df0d748cbef", + "nmdc:327e130872e4c5faac2f1c9f8dea2316", + "nmdc:f61f1e62791a38beae95bd95833a6784", + "nmdc:416254a3bfc685dd16c11d65a222305f", + "nmdc:bc054294600fa310924f104484effd3e" + ], + "asm_score": 4.48, + "was_informed_by": "gold:Gp0127633", + "ctg_powsum": 41464, + "scaf_max": 30530, + "id": "nmdc:c88a6f4e80cd6159dbbdeaeffbc28f55", + "scaf_powsum": 41655, + "execution_resource": "NERSC-Cori", + "contigs": 272879, + "name": "Assembly Activity for nmdc:mga05zvf81", + "ctg_max": 30530, + "gc_std": 0.08353, + "contig_bp": 141974737, + "gc_avg": 0.63381, + "started_at_time": "2021-10-11T02:24:58Z", + "scaf_bp": 141977427, + "type": "nmdc:MetagenomeAssembly", + "scaffolds": 272628, + "ended_at_time": "2021-10-11T03:40:06+00:00", + "ctg_l50": 526, + "ctg_l90": 298, + "ctg_n50": 72824, + "ctg_n90": 224178, + "scaf_l50": 527, + "scaf_l90": 298, + "scaf_n50": 72571, + "scaf_n90": 223970, + "output_data_objects": [ + { + "name": "Gp0127633_Assembled contigs fasta", + "description": "Assembled contigs fasta for Gp0127633", + "data_object_type": "Assembly Contigs", + "url": "https://data.microbiomedata.org/data/nmdc:mga05zvf81/assembly/nmdc_mga05zvf81_contigs.fna", + "md5_checksum": "ea5ca9478871b3e2600e1df0d748cbef", + "id": "nmdc:ea5ca9478871b3e2600e1df0d748cbef", + "file_size_bytes": 152814586 + }, + { + "name": "Gp0127633_Assembled scaffold fasta", + "description": "Assembled scaffold fasta for Gp0127633", + "data_object_type": "Assembly Scaffolds", + "url": "https://data.microbiomedata.org/data/nmdc:mga05zvf81/assembly/nmdc_mga05zvf81_scaffolds.fna", + "md5_checksum": "327e130872e4c5faac2f1c9f8dea2316", + "id": "nmdc:327e130872e4c5faac2f1c9f8dea2316", + "file_size_bytes": 151993436 + }, + { + "name": "Gp0127633_Metagenome Contig Coverage Stats", + "description": "Metagenome Contig Coverage Stats for Gp0127633", + "url": "https://data.microbiomedata.org/data/nmdc:mga05zvf81/assembly/nmdc_mga05zvf81_covstats.txt", + "md5_checksum": "f61f1e62791a38beae95bd95833a6784", + "id": "nmdc:f61f1e62791a38beae95bd95833a6784", + "file_size_bytes": 21678212 + }, + { + "name": "Gp0127633_Assembled AGP file", + "description": "Assembled AGP file for Gp0127633", + "data_object_type": "Assembly AGP", + "url": "https://data.microbiomedata.org/data/nmdc:mga05zvf81/assembly/nmdc_mga05zvf81_assembly.agp", + "md5_checksum": "416254a3bfc685dd16c11d65a222305f", + "id": "nmdc:416254a3bfc685dd16c11d65a222305f", + "file_size_bytes": 20304047 + }, + { + "name": "Gp0127633_Metagenome Alignment BAM file", + "description": "Metagenome Alignment BAM file for Gp0127633", + "data_object_type": "Assembly Coverage BAM", + "url": "https://data.microbiomedata.org/data/nmdc:mga05zvf81/assembly/nmdc_mga05zvf81_pairedMapped_sorted.bam", + "md5_checksum": "bc054294600fa310924f104484effd3e", + "id": "nmdc:bc054294600fa310924f104484effd3e", + "file_size_bytes": 1959649749 + } + ] + }, + { + "_id": { + "$oid": "649b005bbf2caae0415ef9b0" + }, + "has_input": [ + "nmdc:ea5ca9478871b3e2600e1df0d748cbef" + ], + "part_of": [ + "nmdc:mga05zvf81" + ], + "git_url": "https://github.com/microbiomedata/metaG/releases/tag/0.1", + "has_output": [ + "nmdc:8defcf55f08cd56d8b2560e27f490ca5", + "nmdc:a6031c0a101419dd413a0804937425ca", + "nmdc:43069b1146c84c064b7ff334dc9ff100", + "nmdc:acc5a2c445dc6e00668c9a5d50aecdb8", + "nmdc:ec91d5d7a8af4fb845e22cbe7ab82bde", + "nmdc:3cd238ff1bb176b7a159aeb34a7c4683", + "nmdc:5103ea2a481ea3b82f1aa98ab7a36998", + "nmdc:8f7429420cbefb9e27bcdbe6252e5288", + "nmdc:6d69127dc30609e4861a7b2443b99164", + "nmdc:00243bcaf50313d937a7685380a876bb", + "nmdc:ec6ffd40772dee9d48dbec0beb6b3321", + "nmdc:907439e314b4f4623244e2cec8532098" + ], + "was_informed_by": "gold:Gp0127633", + "id": "nmdc:c88a6f4e80cd6159dbbdeaeffbc28f55", + "execution_resource": "NERSC-Cori", + "name": "Annotation Activity for nmdc:mga05zvf81", + "started_at_time": "2021-10-11T02:24:58Z", + "type": "nmdc:MetagenomeAnnotationActivity", + "ended_at_time": "2021-10-11T03:40:06+00:00", + "output_data_objects": [ + { + "name": "Gp0127633_Protein FAA", + "description": "Protein FAA for Gp0127633", + "data_object_type": "Annotation Amino Acid FASTA", + "url": "https://data.microbiomedata.org/data/nmdc:mga05zvf81/annotation/nmdc_mga05zvf81_proteins.faa", + "md5_checksum": "8defcf55f08cd56d8b2560e27f490ca5", + "id": "nmdc:8defcf55f08cd56d8b2560e27f490ca5", + "file_size_bytes": 85918779 + }, + { + "name": "Gp0127633_Structural annotation GFF file", + "description": "Structural annotation GFF file for Gp0127633", + "data_object_type": "Structural Annotation GFF", + "url": "https://data.microbiomedata.org/data/nmdc:mga05zvf81/annotation/nmdc_mga05zvf81_structural_annotation.gff", + "md5_checksum": "a6031c0a101419dd413a0804937425ca", + "id": "nmdc:a6031c0a101419dd413a0804937425ca", + "file_size_bytes": 2527 + }, + { + "name": "Gp0127633_Functional annotation GFF file", + "description": "Functional annotation GFF file for Gp0127633", + "data_object_type": "Functional Annotation GFF", + "url": "https://data.microbiomedata.org/data/nmdc:mga05zvf81/annotation/nmdc_mga05zvf81_functional_annotation.gff", + "md5_checksum": "43069b1146c84c064b7ff334dc9ff100", + "id": "nmdc:43069b1146c84c064b7ff334dc9ff100", + "file_size_bytes": 95647963 + }, + { + "name": "Gp0127633_KO TSV file", + "description": "KO TSV file for Gp0127633", + "data_object_type": "Annotation KEGG Orthology", + "url": "https://data.microbiomedata.org/data/nmdc:mga05zvf81/annotation/nmdc_mga05zvf81_ko.tsv", + "md5_checksum": "acc5a2c445dc6e00668c9a5d50aecdb8", + "id": "nmdc:acc5a2c445dc6e00668c9a5d50aecdb8", + "file_size_bytes": 10638485 + }, + { + "name": "Gp0127633_EC TSV file", + "description": "EC TSV file for Gp0127633", + "data_object_type": "Annotation Enzyme Commission", + "url": "https://data.microbiomedata.org/data/nmdc:mga05zvf81/annotation/nmdc_mga05zvf81_ec.tsv", + "md5_checksum": "ec91d5d7a8af4fb845e22cbe7ab82bde", + "id": "nmdc:ec91d5d7a8af4fb845e22cbe7ab82bde", + "file_size_bytes": 6991172 + }, + { + "name": "Gp0127633_COG GFF file", + "description": "COG GFF file for Gp0127633", + "url": "https://data.microbiomedata.org/data/nmdc:mga05zvf81/annotation/nmdc_mga05zvf81_cog.gff", + "md5_checksum": "3cd238ff1bb176b7a159aeb34a7c4683", + "id": "nmdc:3cd238ff1bb176b7a159aeb34a7c4683", + "file_size_bytes": 56525933 + }, + { + "name": "Gp0127633_PFAM GFF file", + "description": "PFAM GFF file for Gp0127633", + "url": "https://data.microbiomedata.org/data/nmdc:mga05zvf81/annotation/nmdc_mga05zvf81_pfam.gff", + "md5_checksum": "5103ea2a481ea3b82f1aa98ab7a36998", + "id": "nmdc:5103ea2a481ea3b82f1aa98ab7a36998", + "file_size_bytes": 43189711 + }, + { + "name": "Gp0127633_TigrFam GFF file", + "description": "TigrFam GFF file for Gp0127633", + "url": "https://data.microbiomedata.org/data/nmdc:mga05zvf81/annotation/nmdc_mga05zvf81_tigrfam.gff", + "md5_checksum": "8f7429420cbefb9e27bcdbe6252e5288", + "id": "nmdc:8f7429420cbefb9e27bcdbe6252e5288", + "file_size_bytes": 4806086 + }, + { + "name": "Gp0127633_SMART GFF file", + "description": "SMART GFF file for Gp0127633", + "url": "https://data.microbiomedata.org/data/nmdc:mga05zvf81/annotation/nmdc_mga05zvf81_smart.gff", + "md5_checksum": "6d69127dc30609e4861a7b2443b99164", + "id": "nmdc:6d69127dc30609e4861a7b2443b99164", + "file_size_bytes": 12776467 + }, + { + "name": "Gp0127633_SuperFam GFF file", + "description": "SuperFam GFF file for Gp0127633", + "url": "https://data.microbiomedata.org/data/nmdc:mga05zvf81/annotation/nmdc_mga05zvf81_supfam.gff", + "md5_checksum": "00243bcaf50313d937a7685380a876bb", + "id": "nmdc:00243bcaf50313d937a7685380a876bb", + "file_size_bytes": 70607320 + }, + { + "name": "Gp0127633_Cath FunFam GFF file", + "description": "Cath FunFam GFF file for Gp0127633", + "url": "https://data.microbiomedata.org/data/nmdc:mga05zvf81/annotation/nmdc_mga05zvf81_cath_funfam.gff", + "md5_checksum": "ec6ffd40772dee9d48dbec0beb6b3321", + "id": "nmdc:ec6ffd40772dee9d48dbec0beb6b3321", + "file_size_bytes": 53950895 + }, + { + "name": "Gp0127633_KO_EC GFF file", + "description": "KO_EC GFF file for Gp0127633", + "url": "https://data.microbiomedata.org/data/nmdc:mga05zvf81/annotation/nmdc_mga05zvf81_ko_ec.gff", + "md5_checksum": "907439e314b4f4623244e2cec8532098", + "id": "nmdc:907439e314b4f4623244e2cec8532098", + "file_size_bytes": 33781965 + } + ] + }, + { + "_id": { + "$oid": "649b0052ec087f6bbab3470e" + }, + "has_input": [ + "nmdc:ea5ca9478871b3e2600e1df0d748cbef", + "nmdc:bc054294600fa310924f104484effd3e", + "nmdc:43069b1146c84c064b7ff334dc9ff100" + ], + "too_short_contig_num": 252383, + "part_of": [ + "nmdc:mga05zvf81" + ], + "binned_contig_num": 738, + "git_url": "https://github.com/microbiomedata/metaG/releases/tag/0.1", + "has_output": [ + "nmdc:d41d8cd98f00b204e9800998ecf8427e", + "nmdc:00415cf72f9a77f907e3467a08b123c5", + "nmdc:83064ec7bfc35a79a1ca76fdd8ad75fd", + "nmdc:3f435d6da551400a4ba4400fa3608e7f", + "nmdc:c66f93153962f8b80c8f3d6978b6d802", + "nmdc:ce2a364ec51a1d6311a319509751266e" + ], + "was_informed_by": "gold:Gp0127633", + "input_contig_num": 272872, + "id": "nmdc:c88a6f4e80cd6159dbbdeaeffbc28f55", + "execution_resource": "NERSC-Cori", + "name": "MAGs Analysis Activity for nmdc:mga05zvf81", + "mags_list": [ + { + "number_of_contig": 83, + "completeness": 0.0, + "bin_name": "bins.1", + "gene_count": 600, + "bin_quality": "LQ", + "gtdbtk_species": "", + "gtdbtk_order": "", + "num_16s": 0, + "gtdbtk_family": "", + "gtdbtk_domain": "", + "contamination": 0.0, + "gtdbtk_class": "", + "gtdbtk_phylum": "", + "num_5s": 0, + "num_23s": 0, + "gtdbtk_genus": "", + "num_t_rna": 5 + }, + { + "number_of_contig": 142, + "completeness": 43.03, + "bin_name": "bins.2", + "gene_count": 746, + "bin_quality": "LQ", + "gtdbtk_species": "", + "gtdbtk_order": "", + "num_16s": 0, + "gtdbtk_family": "", + "gtdbtk_domain": "", + "contamination": 1.72, + "gtdbtk_class": "", + "gtdbtk_phylum": "", + "num_5s": 0, + "num_23s": 0, + "gtdbtk_genus": "", + "num_t_rna": 6 + }, + { + "number_of_contig": 194, + "completeness": 73.62, + "bin_name": "bins.3", + "gene_count": 1844, + "bin_quality": "MQ", + "gtdbtk_species": "", + "gtdbtk_order": "Nitrososphaerales", + "num_16s": 0, + "gtdbtk_family": "Nitrososphaeraceae", + "gtdbtk_domain": "Archaea", + "contamination": 2.43, + "gtdbtk_class": "Nitrososphaeria", + "gtdbtk_phylum": "Crenarchaeota", + "num_5s": 1, + "num_23s": 0, + "gtdbtk_genus": "", + "num_t_rna": 31 + }, + { + "number_of_contig": 91, + "completeness": 10.82, + "bin_name": "bins.4", + "gene_count": 442, + "bin_quality": "LQ", + "gtdbtk_species": "", + "gtdbtk_order": "", + "num_16s": 0, + "gtdbtk_family": "", + "gtdbtk_domain": "", + "contamination": 0.0, + "gtdbtk_class": "", + "gtdbtk_phylum": "", + "num_5s": 0, + "num_23s": 0, + "gtdbtk_genus": "", + "num_t_rna": 2 + }, + { + "number_of_contig": 82, + "completeness": 10.97, + "bin_name": "bins.5", + "gene_count": 385, + "bin_quality": "LQ", + "gtdbtk_species": "", + "gtdbtk_order": "", + "num_16s": 1, + "gtdbtk_family": "", + "gtdbtk_domain": "", + "contamination": 0.0, + "gtdbtk_class": "", + "gtdbtk_phylum": "", + "num_5s": 0, + "num_23s": 0, + "gtdbtk_genus": "", + "num_t_rna": 3 + }, + { + "number_of_contig": 146, + "completeness": 31.6, + "bin_name": "bins.6", + "gene_count": 800, + "bin_quality": "LQ", + "gtdbtk_species": "", + "gtdbtk_order": "", + "num_16s": 1, + "gtdbtk_family": "", + "gtdbtk_domain": "", + "contamination": 1.6, + "gtdbtk_class": "", + "gtdbtk_phylum": "", + "num_5s": 0, + "num_23s": 1, + "gtdbtk_genus": "", + "num_t_rna": 20 + } + ], + "unbinned_contig_num": 19751, + "started_at_time": "2021-10-11T02:24:58Z", + "type": "nmdc:MAGsAnalysisActivity", + "ended_at_time": "2021-10-11T03:40:06+00:00", + "output_data_objects": [ + { + "name": "gold:Gp0452679_metabat2 bin checkm quality assessment result", + "description": "metabat2 bin checkm quality assessment result for gold:Gp0452679", + "data_object_type": "CheckM Statistics", + "url": "https://data.microbiomedata.org/data/nmdc:mga0fsjy84/MAGs/nmdc_mga0fsjy84_checkm_qa.out", + "md5_checksum": "d41d8cd98f00b204e9800998ecf8427e", + "id": "nmdc:d41d8cd98f00b204e9800998ecf8427e", + "file_size_bytes": 0 + }, + { + "name": "Gp0127633_tooShort (< 3kb) filtered contigs fasta file by metaBat2", + "description": "tooShort (< 3kb) filtered contigs fasta file by metaBat2 for Gp0127633", + "url": "https://data.microbiomedata.org/data/nmdc:mga05zvf81/MAGs/nmdc_mga05zvf81_bins.tooShort.fa", + "md5_checksum": "00415cf72f9a77f907e3467a08b123c5", + "id": "nmdc:00415cf72f9a77f907e3467a08b123c5", + "file_size_bytes": 116930318 + }, + { + "name": "Gp0127633_unbinned fasta file from metabat2", + "description": "unbinned fasta file from metabat2 for Gp0127633", + "url": "https://data.microbiomedata.org/data/nmdc:mga05zvf81/MAGs/nmdc_mga05zvf81_bins.unbinned.fa", + "md5_checksum": "83064ec7bfc35a79a1ca76fdd8ad75fd", + "id": "nmdc:83064ec7bfc35a79a1ca76fdd8ad75fd", + "file_size_bytes": 31883888 + }, + { + "name": "Gp0127633_metabat2 bin checkm quality assessment result", + "description": "metabat2 bin checkm quality assessment result for Gp0127633", + "data_object_type": "CheckM Statistics", + "url": "https://data.microbiomedata.org/data/nmdc:mga05zvf81/MAGs/nmdc_mga05zvf81_checkm_qa.out", + "md5_checksum": "3f435d6da551400a4ba4400fa3608e7f", + "id": "nmdc:3f435d6da551400a4ba4400fa3608e7f", + "file_size_bytes": 1590 + }, + { + "name": "Gp0127633_high-quality and medium-quality bins", + "description": "high-quality and medium-quality bins for Gp0127633", + "data_object_type": "Metagenome Bins", + "url": "https://data.microbiomedata.org/data/nmdc:mga05zvf81/MAGs/nmdc_mga05zvf81_hqmq_bin.zip", + "md5_checksum": "c66f93153962f8b80c8f3d6978b6d802", + "id": "nmdc:c66f93153962f8b80c8f3d6978b6d802", + "file_size_bytes": 460412 + }, + { + "name": "Gp0127633_metabat2 bins", + "description": "metabat2 bins for Gp0127633", + "url": "https://data.microbiomedata.org/data/nmdc:mga05zvf81/MAGs/nmdc_mga05zvf81_metabat_bin.zip", + "md5_checksum": "ce2a364ec51a1d6311a319509751266e", + "id": "nmdc:ce2a364ec51a1d6311a319509751266e", + "file_size_bytes": 753147 + } + ] + } + ] + }, + { + "_id": { + "$oid": "649b009773e8249959349b43" + }, + "id": "nmdc:omprc-11-z5qv0f24", + "name": "Riverbed sediment microbial communities from areas with vegetation nearby in Columbia River, Washington, USA - GW-RW S1_40_50", + "description": "Riverbed sediment samples were collected from an area with a lot of surrounding vegetation in a hyporheic zone (subsurface groundwater - surface water mixing zone)", + "has_input": [ + "nmdc:bsm-11-v0q5ak63" + ], + "has_output": [ + "jgi:574fe0967ded5e3df1ee1482" + ], + "part_of": [ + "nmdc:sty-11-aygzgv51" + ], + "add_date": "2016-01-11", + "mod_date": "2021-06-15", + "ncbi_project_name": "Riverbed sediment microbial communities from areas with vegetation nearby in Columbia River, Washington, USA - GW-RW S1_40_50", + "omics_type": { + "has_raw_value": "Metagenome" + }, + "principal_investigator": { + "has_raw_value": "James Stegen" + }, + "processing_institution": "JGI", + "type": "nmdc:OmicsProcessing", + "gold_sequencing_project_identifiers": [ + "gold:Gp0127627" + ], + "downstream_workflow_activity_records": [ + { + "_id": { + "$oid": "649b009d6bdd4fd20273c866" + }, + "has_input": [ + "nmdc:45f15cded08bad75a2ef9d7e4b1f42de" + ], + "part_of": [ + "nmdc:mga0daby71" + ], + "git_url": "https://github.com/microbiomedata/metaG/releases/tag/0.1", + "has_output": [ + "nmdc:ed95796b3fd964c6bedb141d70737ebf", + "nmdc:308ae373809697291bbc7947a1e4ed2d" + ], + "was_informed_by": "gold:Gp0127627", + "input_read_count": 20505370, + "output_read_bases": 2992084693, + "id": "nmdc:ecaab4b51de694b35269756fd7e31ed9", + "execution_resource": "NERSC-Cori", + "input_read_bases": 3096310870, + "name": "Read QC Activity for nmdc:mga0daby71", + "output_read_count": 19995028, + "started_at_time": "2021-11-13T18:47:34Z", + "type": "nmdc:ReadQCAnalysisActivity", + "ended_at_time": "2021-11-13T19:08:49+00:00", + "output_data_objects": [ + { + "name": "Gp0127627_Filtered Reads", + "description": "Filtered Reads for Gp0127627", + "data_object_type": "Filtered Sequencing Reads", + "url": "https://data.microbiomedata.org/data/nmdc:mga0daby71/qa/nmdc_mga0daby71_filtered.fastq.gz", + "md5_checksum": "ed95796b3fd964c6bedb141d70737ebf", + "id": "nmdc:ed95796b3fd964c6bedb141d70737ebf", + "file_size_bytes": 1752924191 + }, + { + "name": "Gp0127627_Filtered Stats", + "description": "Filtered Stats for Gp0127627", + "data_object_type": "QC Statistics", + "url": "https://data.microbiomedata.org/data/nmdc:mga0daby71/qa/nmdc_mga0daby71_filterStats.txt", + "md5_checksum": "308ae373809697291bbc7947a1e4ed2d", + "id": "nmdc:308ae373809697291bbc7947a1e4ed2d", + "file_size_bytes": 281 + } + ] + }, + { + "_id": { + "$oid": "649b009bff710ae353f8cf2a" + }, + "has_input": [ + "nmdc:ed95796b3fd964c6bedb141d70737ebf" + ], + "git_url": "https://github.com/microbiomedata/metaG/releases/tag/0.1", + "has_output": [ + "nmdc:a5ac6665e5d66242b1c885a911236982", + "nmdc:d19478a191693d643157a89c69cc02d1", + "nmdc:679a82699663e88a5e8828ee081fa967", + "nmdc:95b3150e6fb62195c1e5ebf06f87c7d5", + "nmdc:0380e478962be82e0d97a6339f7f3b91", + "nmdc:0c1d139abdfa9fa10f26923abb4d6bda", + "nmdc:f388f7f0d79d0b2bbec1c3c0c5641814", + "nmdc:a2a0029691c04851f4a98003a773fe3f", + "nmdc:bab24ab64ad432d115f182df7198d46e" + ], + "was_informed_by": "gold:Gp0127627", + "id": "nmdc:ecaab4b51de694b35269756fd7e31ed9", + "execution_resource": "NERSC-Cori", + "name": "ReadBased Analysis Activity for nmdc:mga0daby71", + "started_at_time": "2021-11-13T18:47:34Z", + "type": "nmdc:ReadbasedAnalysis", + "ended_at_time": "2021-11-13T19:08:49+00:00", + "output_data_objects": [ + { + "name": "Gp0127627_Gottcha2 TSV report", + "description": "Gottcha2 TSV report for Gp0127627", + "url": "https://data.microbiomedata.org/data/nmdc:mga0daby71/ReadbasedAnalysis/nmdc_mga0daby71_gottcha2_report.tsv", + "md5_checksum": "a5ac6665e5d66242b1c885a911236982", + "id": "nmdc:a5ac6665e5d66242b1c885a911236982", + "file_size_bytes": 5530 + }, + { + "name": "Gp0127627_Gottcha2 full TSV report", + "description": "Gottcha2 full TSV report for Gp0127627", + "url": "https://data.microbiomedata.org/data/nmdc:mga0daby71/ReadbasedAnalysis/nmdc_mga0daby71_gottcha2_report_full.tsv", + "md5_checksum": "d19478a191693d643157a89c69cc02d1", + "id": "nmdc:d19478a191693d643157a89c69cc02d1", + "file_size_bytes": 825047 + }, + { + "name": "Gp0127627_Gottcha2 Krona HTML report", + "description": "Gottcha2 Krona HTML report for Gp0127627", + "data_object_type": "GOTTCHA2 Krona Plot", + "url": "https://data.microbiomedata.org/data/nmdc:mga0daby71/ReadbasedAnalysis/nmdc_mga0daby71_gottcha2_krona.html", + "md5_checksum": "679a82699663e88a5e8828ee081fa967", + "id": "nmdc:679a82699663e88a5e8828ee081fa967", + "file_size_bytes": 241114 + }, + { + "name": "Gp0127627_Centrifuge classification TSV report", + "description": "Centrifuge classification TSV report for Gp0127627", + "data_object_type": "Centrifuge Taxonomic Classification", + "url": "https://data.microbiomedata.org/data/nmdc:mga0daby71/ReadbasedAnalysis/nmdc_mga0daby71_centrifuge_classification.tsv", + "md5_checksum": "95b3150e6fb62195c1e5ebf06f87c7d5", + "id": "nmdc:95b3150e6fb62195c1e5ebf06f87c7d5", + "file_size_bytes": 1463660267 + }, + { + "name": "Gp0127627_Centrifuge TSV report", + "description": "Centrifuge TSV report for Gp0127627", + "data_object_type": "Centrifuge Classification Report", + "url": "https://data.microbiomedata.org/data/nmdc:mga0daby71/ReadbasedAnalysis/nmdc_mga0daby71_centrifuge_report.tsv", + "md5_checksum": "0380e478962be82e0d97a6339f7f3b91", + "id": "nmdc:0380e478962be82e0d97a6339f7f3b91", + "file_size_bytes": 254347 + }, + { + "name": "Gp0127627_Centrifuge Krona HTML report", + "description": "Centrifuge Krona HTML report for Gp0127627", + "data_object_type": "Centrifuge Krona Plot", + "url": "https://data.microbiomedata.org/data/nmdc:mga0daby71/ReadbasedAnalysis/nmdc_mga0daby71_centrifuge_krona.html", + "md5_checksum": "0c1d139abdfa9fa10f26923abb4d6bda", + "id": "nmdc:0c1d139abdfa9fa10f26923abb4d6bda", + "file_size_bytes": 2330603 + }, + { + "name": "Gp0127627_Kraken classification TSV report", + "description": "Kraken classification TSV report for Gp0127627", + "data_object_type": "Kraken2 Taxonomic Classification", + "url": "https://data.microbiomedata.org/data/nmdc:mga0daby71/ReadbasedAnalysis/nmdc_mga0daby71_kraken2_classification.tsv", + "md5_checksum": "f388f7f0d79d0b2bbec1c3c0c5641814", + "id": "nmdc:f388f7f0d79d0b2bbec1c3c0c5641814", + "file_size_bytes": 1177609473 + }, + { + "name": "Gp0127627_Kraken2 TSV report", + "description": "Kraken2 TSV report for Gp0127627", + "data_object_type": "Kraken2 Classification Report", + "url": "https://data.microbiomedata.org/data/nmdc:mga0daby71/ReadbasedAnalysis/nmdc_mga0daby71_kraken2_report.tsv", + "md5_checksum": "a2a0029691c04851f4a98003a773fe3f", + "id": "nmdc:a2a0029691c04851f4a98003a773fe3f", + "file_size_bytes": 643281 + }, + { + "name": "Gp0127627_Kraken2 Krona HTML report", + "description": "Kraken2 Krona HTML report for Gp0127627", + "data_object_type": "Kraken2 Krona Plot", + "url": "https://data.microbiomedata.org/data/nmdc:mga0daby71/ReadbasedAnalysis/nmdc_mga0daby71_kraken2_krona.html", + "md5_checksum": "bab24ab64ad432d115f182df7198d46e", + "id": "nmdc:bab24ab64ad432d115f182df7198d46e", + "file_size_bytes": 3926756 + } + ] + }, + { + "_id": { + "$oid": "61e7195c833bcf838a70049b" + }, + "has_input": [ + "nmdc:ed95796b3fd964c6bedb141d70737ebf" + ], + "part_of": [ + "nmdc:mga0daby71" + ], + "git_url": "https://github.com/microbiomedata/metaG/releases/tag/0.1", + "has_output": [ + "nmdc:a5ac6665e5d66242b1c885a911236982", + "nmdc:d19478a191693d643157a89c69cc02d1", + "nmdc:679a82699663e88a5e8828ee081fa967", + "nmdc:95b3150e6fb62195c1e5ebf06f87c7d5", + "nmdc:0380e478962be82e0d97a6339f7f3b91", + "nmdc:0c1d139abdfa9fa10f26923abb4d6bda", + "nmdc:f388f7f0d79d0b2bbec1c3c0c5641814", + "nmdc:a2a0029691c04851f4a98003a773fe3f", + "nmdc:bab24ab64ad432d115f182df7198d46e" + ], + "was_informed_by": "gold:Gp0127627", + "id": "nmdc:ecaab4b51de694b35269756fd7e31ed9", + "execution_resource": "NERSC-Cori", + "name": "ReadBased Analysis Activity for nmdc:mga0daby71", + "started_at_time": "2021-11-13T18:47:34Z", + "type": "nmdc:ReadbasedAnalysis", + "ended_at_time": "2021-11-13T19:08:49+00:00", + "output_data_objects": [ + { + "name": "Gp0127627_Gottcha2 TSV report", + "description": "Gottcha2 TSV report for Gp0127627", + "url": "https://data.microbiomedata.org/data/nmdc:mga0daby71/ReadbasedAnalysis/nmdc_mga0daby71_gottcha2_report.tsv", + "md5_checksum": "a5ac6665e5d66242b1c885a911236982", + "id": "nmdc:a5ac6665e5d66242b1c885a911236982", + "file_size_bytes": 5530 + }, + { + "name": "Gp0127627_Gottcha2 full TSV report", + "description": "Gottcha2 full TSV report for Gp0127627", + "url": "https://data.microbiomedata.org/data/nmdc:mga0daby71/ReadbasedAnalysis/nmdc_mga0daby71_gottcha2_report_full.tsv", + "md5_checksum": "d19478a191693d643157a89c69cc02d1", + "id": "nmdc:d19478a191693d643157a89c69cc02d1", + "file_size_bytes": 825047 + }, + { + "name": "Gp0127627_Gottcha2 Krona HTML report", + "description": "Gottcha2 Krona HTML report for Gp0127627", + "data_object_type": "GOTTCHA2 Krona Plot", + "url": "https://data.microbiomedata.org/data/nmdc:mga0daby71/ReadbasedAnalysis/nmdc_mga0daby71_gottcha2_krona.html", + "md5_checksum": "679a82699663e88a5e8828ee081fa967", + "id": "nmdc:679a82699663e88a5e8828ee081fa967", + "file_size_bytes": 241114 + }, + { + "name": "Gp0127627_Centrifuge classification TSV report", + "description": "Centrifuge classification TSV report for Gp0127627", + "data_object_type": "Centrifuge Taxonomic Classification", + "url": "https://data.microbiomedata.org/data/nmdc:mga0daby71/ReadbasedAnalysis/nmdc_mga0daby71_centrifuge_classification.tsv", + "md5_checksum": "95b3150e6fb62195c1e5ebf06f87c7d5", + "id": "nmdc:95b3150e6fb62195c1e5ebf06f87c7d5", + "file_size_bytes": 1463660267 + }, + { + "name": "Gp0127627_Centrifuge TSV report", + "description": "Centrifuge TSV report for Gp0127627", + "data_object_type": "Centrifuge Classification Report", + "url": "https://data.microbiomedata.org/data/nmdc:mga0daby71/ReadbasedAnalysis/nmdc_mga0daby71_centrifuge_report.tsv", + "md5_checksum": "0380e478962be82e0d97a6339f7f3b91", + "id": "nmdc:0380e478962be82e0d97a6339f7f3b91", + "file_size_bytes": 254347 + }, + { + "name": "Gp0127627_Centrifuge Krona HTML report", + "description": "Centrifuge Krona HTML report for Gp0127627", + "data_object_type": "Centrifuge Krona Plot", + "url": "https://data.microbiomedata.org/data/nmdc:mga0daby71/ReadbasedAnalysis/nmdc_mga0daby71_centrifuge_krona.html", + "md5_checksum": "0c1d139abdfa9fa10f26923abb4d6bda", + "id": "nmdc:0c1d139abdfa9fa10f26923abb4d6bda", + "file_size_bytes": 2330603 + }, + { + "name": "Gp0127627_Kraken classification TSV report", + "description": "Kraken classification TSV report for Gp0127627", + "data_object_type": "Kraken2 Taxonomic Classification", + "url": "https://data.microbiomedata.org/data/nmdc:mga0daby71/ReadbasedAnalysis/nmdc_mga0daby71_kraken2_classification.tsv", + "md5_checksum": "f388f7f0d79d0b2bbec1c3c0c5641814", + "id": "nmdc:f388f7f0d79d0b2bbec1c3c0c5641814", + "file_size_bytes": 1177609473 + }, + { + "name": "Gp0127627_Kraken2 TSV report", + "description": "Kraken2 TSV report for Gp0127627", + "data_object_type": "Kraken2 Classification Report", + "url": "https://data.microbiomedata.org/data/nmdc:mga0daby71/ReadbasedAnalysis/nmdc_mga0daby71_kraken2_report.tsv", + "md5_checksum": "a2a0029691c04851f4a98003a773fe3f", + "id": "nmdc:a2a0029691c04851f4a98003a773fe3f", + "file_size_bytes": 643281 + }, + { + "name": "Gp0127627_Kraken2 Krona HTML report", + "description": "Kraken2 Krona HTML report for Gp0127627", + "data_object_type": "Kraken2 Krona Plot", + "url": "https://data.microbiomedata.org/data/nmdc:mga0daby71/ReadbasedAnalysis/nmdc_mga0daby71_kraken2_krona.html", + "md5_checksum": "bab24ab64ad432d115f182df7198d46e", + "id": "nmdc:bab24ab64ad432d115f182df7198d46e", + "file_size_bytes": 3926756 + } + ] + }, + { + "_id": { + "$oid": "649b005f2ca5ee4adb139f99" + }, + "has_input": [ + "nmdc:ed95796b3fd964c6bedb141d70737ebf" + ], + "part_of": [ + "nmdc:mga0daby71" + ], + "ctg_logsum": 6346.305, + "scaf_logsum": 6368.36, + "gap_pct": 0.00044, + "git_url": "https://github.com/microbiomedata/metaG/releases/tag/0.1", + "has_output": [ + "nmdc:a7db57faea894bec6603a69abfdfcf7d", + "nmdc:8e798fcdd761feff51cab6a9c97ed7ae", + "nmdc:0d3200307a90e23525d3fefa7a25f867", + "nmdc:e6e7f40bb1f1e333904f20dc3c317e37", + "nmdc:08f1ba3d3d380a167182c1beb7da304f" + ], + "asm_score": 4.807, + "was_informed_by": "gold:Gp0127627", + "ctg_powsum": 681.483, + "scaf_max": 15604, + "id": "nmdc:ecaab4b51de694b35269756fd7e31ed9", + "scaf_powsum": 683.717, + "execution_resource": "NERSC-Cori", + "contigs": 51188, + "name": "Assembly Activity for nmdc:mga0daby71", + "ctg_max": 15604, + "gc_std": 0.11462, + "gc_avg": 0.57328, + "contig_bp": 18008171, + "started_at_time": "2021-11-13T18:47:34Z", + "scaf_bp": 18008251, + "type": "nmdc:MetagenomeAssembly", + "scaffolds": 51180, + "ended_at_time": "2021-11-13T19:08:49+00:00", + "ctg_l50": 321, + "ctg_l90": 282, + "ctg_n50": 20415, + "ctg_n90": 44756, + "scaf_l50": 321, + "scaf_l90": 282, + "scaf_n50": 20413, + "scaf_n90": 44748, + "output_data_objects": [ + { + "name": "Gp0127627_Assembled contigs fasta", + "description": "Assembled contigs fasta for Gp0127627", + "data_object_type": "Assembly Contigs", + "url": "https://data.microbiomedata.org/data/nmdc:mga0daby71/assembly/nmdc_mga0daby71_contigs.fna", + "md5_checksum": "a7db57faea894bec6603a69abfdfcf7d", + "id": "nmdc:a7db57faea894bec6603a69abfdfcf7d", + "file_size_bytes": 19853676 + }, + { + "name": "Gp0127627_Assembled scaffold fasta", + "description": "Assembled scaffold fasta for Gp0127627", + "data_object_type": "Assembly Scaffolds", + "url": "https://data.microbiomedata.org/data/nmdc:mga0daby71/assembly/nmdc_mga0daby71_scaffolds.fna", + "md5_checksum": "8e798fcdd761feff51cab6a9c97ed7ae", + "id": "nmdc:8e798fcdd761feff51cab6a9c97ed7ae", + "file_size_bytes": 19699986 + }, + { + "name": "Gp0127627_Metagenome Contig Coverage Stats", + "description": "Metagenome Contig Coverage Stats for Gp0127627", + "url": "https://data.microbiomedata.org/data/nmdc:mga0daby71/assembly/nmdc_mga0daby71_covstats.txt", + "md5_checksum": "0d3200307a90e23525d3fefa7a25f867", + "id": "nmdc:0d3200307a90e23525d3fefa7a25f867", + "file_size_bytes": 3997845 + }, + { + "name": "Gp0127627_Assembled AGP file", + "description": "Assembled AGP file for Gp0127627", + "data_object_type": "Assembly AGP", + "url": "https://data.microbiomedata.org/data/nmdc:mga0daby71/assembly/nmdc_mga0daby71_assembly.agp", + "md5_checksum": "e6e7f40bb1f1e333904f20dc3c317e37", + "id": "nmdc:e6e7f40bb1f1e333904f20dc3c317e37", + "file_size_bytes": 3715901 + }, + { + "name": "Gp0127627_Metagenome Alignment BAM file", + "description": "Metagenome Alignment BAM file for Gp0127627", + "data_object_type": "Assembly Coverage BAM", + "url": "https://data.microbiomedata.org/data/nmdc:mga0daby71/assembly/nmdc_mga0daby71_pairedMapped_sorted.bam", + "md5_checksum": "08f1ba3d3d380a167182c1beb7da304f", + "id": "nmdc:08f1ba3d3d380a167182c1beb7da304f", + "file_size_bytes": 1854522814 + } + ] + }, + { + "_id": { + "$oid": "649b005bbf2caae0415ef9ab" + }, + "has_input": [ + "nmdc:a7db57faea894bec6603a69abfdfcf7d" + ], + "part_of": [ + "nmdc:mga0daby71" + ], + "git_url": "https://github.com/microbiomedata/metaG/releases/tag/0.1", + "has_output": [ + "nmdc:6003e73aa18ac6aa3cc0f7e020c7170e", + "nmdc:7e5852b8ca5590f81c543ea69398410f", + "nmdc:cf868630ca2d9037e69e82cfb76a7bd7", + "nmdc:9e52b5a16f0eff5df36bd46038702a52", + "nmdc:c44dceb1684f1a4249e7b8e944a2b7cf", + "nmdc:4a788566d47b89e8bc79eea6e26f2c42", + "nmdc:3d01f11a480f59cefdc67e7b6c7f9fc6", + "nmdc:196a8e27999a32a6168d23f30d84f37b", + "nmdc:c3040fe67c2c8b2924c6db6c53b268ce", + "nmdc:5594ce118ad4b2f9ec03adc10ebb6267", + "nmdc:9d2ac6550f5a1dc3d4b3743e8fe2ceec", + "nmdc:2c73a261047ff94b898c190418373075" + ], + "was_informed_by": "gold:Gp0127627", + "id": "nmdc:ecaab4b51de694b35269756fd7e31ed9", + "execution_resource": "NERSC-Cori", + "name": "Annotation Activity for nmdc:mga0daby71", + "started_at_time": "2021-11-13T18:47:34Z", + "type": "nmdc:MetagenomeAnnotationActivity", + "ended_at_time": "2021-11-13T19:08:49+00:00", + "output_data_objects": [ + { + "name": "Gp0127627_Protein FAA", + "description": "Protein FAA for Gp0127627", + "data_object_type": "Annotation Amino Acid FASTA", + "url": "https://data.microbiomedata.org/data/nmdc:mga0daby71/annotation/nmdc_mga0daby71_proteins.faa", + "md5_checksum": "6003e73aa18ac6aa3cc0f7e020c7170e", + "id": "nmdc:6003e73aa18ac6aa3cc0f7e020c7170e", + "file_size_bytes": 12141650 + }, + { + "name": "Gp0127627_Structural annotation GFF file", + "description": "Structural annotation GFF file for Gp0127627", + "data_object_type": "Structural Annotation GFF", + "url": "https://data.microbiomedata.org/data/nmdc:mga0daby71/annotation/nmdc_mga0daby71_structural_annotation.gff", + "md5_checksum": "7e5852b8ca5590f81c543ea69398410f", + "id": "nmdc:7e5852b8ca5590f81c543ea69398410f", + "file_size_bytes": 8716031 + }, + { + "name": "Gp0127627_Functional annotation GFF file", + "description": "Functional annotation GFF file for Gp0127627", + "data_object_type": "Functional Annotation GFF", + "url": "https://data.microbiomedata.org/data/nmdc:mga0daby71/annotation/nmdc_mga0daby71_functional_annotation.gff", + "md5_checksum": "cf868630ca2d9037e69e82cfb76a7bd7", + "id": "nmdc:cf868630ca2d9037e69e82cfb76a7bd7", + "file_size_bytes": 14995284 + }, + { + "name": "Gp0127627_KO TSV file", + "description": "KO TSV file for Gp0127627", + "data_object_type": "Annotation KEGG Orthology", + "url": "https://data.microbiomedata.org/data/nmdc:mga0daby71/annotation/nmdc_mga0daby71_ko.tsv", + "md5_checksum": "9e52b5a16f0eff5df36bd46038702a52", + "id": "nmdc:9e52b5a16f0eff5df36bd46038702a52", + "file_size_bytes": 1782540 + }, + { + "name": "Gp0127627_EC TSV file", + "description": "EC TSV file for Gp0127627", + "data_object_type": "Annotation Enzyme Commission", + "url": "https://data.microbiomedata.org/data/nmdc:mga0daby71/annotation/nmdc_mga0daby71_ec.tsv", + "md5_checksum": "c44dceb1684f1a4249e7b8e944a2b7cf", + "id": "nmdc:c44dceb1684f1a4249e7b8e944a2b7cf", + "file_size_bytes": 1180943 + }, + { + "name": "Gp0127627_COG GFF file", + "description": "COG GFF file for Gp0127627", + "url": "https://data.microbiomedata.org/data/nmdc:mga0daby71/annotation/nmdc_mga0daby71_cog.gff", + "md5_checksum": "4a788566d47b89e8bc79eea6e26f2c42", + "id": "nmdc:4a788566d47b89e8bc79eea6e26f2c42", + "file_size_bytes": 8144598 + }, + { + "name": "Gp0127627_PFAM GFF file", + "description": "PFAM GFF file for Gp0127627", + "url": "https://data.microbiomedata.org/data/nmdc:mga0daby71/annotation/nmdc_mga0daby71_pfam.gff", + "md5_checksum": "3d01f11a480f59cefdc67e7b6c7f9fc6", + "id": "nmdc:3d01f11a480f59cefdc67e7b6c7f9fc6", + "file_size_bytes": 5854816 + }, + { + "name": "Gp0127627_TigrFam GFF file", + "description": "TigrFam GFF file for Gp0127627", + "url": "https://data.microbiomedata.org/data/nmdc:mga0daby71/annotation/nmdc_mga0daby71_tigrfam.gff", + "md5_checksum": "196a8e27999a32a6168d23f30d84f37b", + "id": "nmdc:196a8e27999a32a6168d23f30d84f37b", + "file_size_bytes": 549612 + }, + { + "name": "Gp0127627_SMART GFF file", + "description": "SMART GFF file for Gp0127627", + "url": "https://data.microbiomedata.org/data/nmdc:mga0daby71/annotation/nmdc_mga0daby71_smart.gff", + "md5_checksum": "c3040fe67c2c8b2924c6db6c53b268ce", + "id": "nmdc:c3040fe67c2c8b2924c6db6c53b268ce", + "file_size_bytes": 1739035 + }, + { + "name": "Gp0127627_SuperFam GFF file", + "description": "SuperFam GFF file for Gp0127627", + "url": "https://data.microbiomedata.org/data/nmdc:mga0daby71/annotation/nmdc_mga0daby71_supfam.gff", + "md5_checksum": "5594ce118ad4b2f9ec03adc10ebb6267", + "id": "nmdc:5594ce118ad4b2f9ec03adc10ebb6267", + "file_size_bytes": 10326655 + }, + { + "name": "Gp0127627_Cath FunFam GFF file", + "description": "Cath FunFam GFF file for Gp0127627", + "url": "https://data.microbiomedata.org/data/nmdc:mga0daby71/annotation/nmdc_mga0daby71_cath_funfam.gff", + "md5_checksum": "9d2ac6550f5a1dc3d4b3743e8fe2ceec", + "id": "nmdc:9d2ac6550f5a1dc3d4b3743e8fe2ceec", + "file_size_bytes": 7571959 + }, + { + "name": "Gp0127627_KO_EC GFF file", + "description": "KO_EC GFF file for Gp0127627", + "url": "https://data.microbiomedata.org/data/nmdc:mga0daby71/annotation/nmdc_mga0daby71_ko_ec.gff", + "md5_checksum": "2c73a261047ff94b898c190418373075", + "id": "nmdc:2c73a261047ff94b898c190418373075", + "file_size_bytes": 5683569 + } + ] + }, + { + "_id": { + "$oid": "649b0052ec087f6bbab3470b" + }, + "has_input": [ + "nmdc:a7db57faea894bec6603a69abfdfcf7d", + "nmdc:08f1ba3d3d380a167182c1beb7da304f", + "nmdc:cf868630ca2d9037e69e82cfb76a7bd7" + ], + "too_short_contig_num": 50792, + "part_of": [ + "nmdc:mga0daby71" + ], + "git_url": "https://github.com/microbiomedata/metaG/releases/tag/0.1", + "has_output": [ + "nmdc:d41d8cd98f00b204e9800998ecf8427e", + "nmdc:ba468a2c4f4810d87ba95ad9e123483d" + ], + "was_informed_by": "gold:Gp0127627", + "input_contig_num": 51188, + "id": "nmdc:ecaab4b51de694b35269756fd7e31ed9", + "execution_resource": "NERSC-Cori", + "name": "MAGs Analysis Activity for nmdc:mga0daby71", + "mags_list": [], + "unbinned_contig_num": 396, + "started_at_time": "2021-11-13T18:47:34Z", + "type": "nmdc:MAGsAnalysisActivity", + "ended_at_time": "2021-11-13T19:08:49+00:00", + "output_data_objects": [ + { + "name": "gold:Gp0452679_metabat2 bin checkm quality assessment result", + "description": "metabat2 bin checkm quality assessment result for gold:Gp0452679", + "data_object_type": "CheckM Statistics", + "url": "https://data.microbiomedata.org/data/nmdc:mga0fsjy84/MAGs/nmdc_mga0fsjy84_checkm_qa.out", + "md5_checksum": "d41d8cd98f00b204e9800998ecf8427e", + "id": "nmdc:d41d8cd98f00b204e9800998ecf8427e", + "file_size_bytes": 0 + }, + { + "name": "Gp0127627_high-quality and medium-quality bins", + "description": "high-quality and medium-quality bins for Gp0127627", + "data_object_type": "Metagenome Bins", + "url": "https://data.microbiomedata.org/data/nmdc:mga0daby71/MAGs/nmdc_mga0daby71_hqmq_bin.zip", + "md5_checksum": "ba468a2c4f4810d87ba95ad9e123483d", + "id": "nmdc:ba468a2c4f4810d87ba95ad9e123483d", + "file_size_bytes": 182 + } + ] + } + ] + }, + { + "_id": { + "$oid": "649b009773e8249959349b44" + }, + "id": "nmdc:omprc-11-8qms8262", + "name": "Riverbed sediment microbial communities from areas with vegetation nearby in Columbia River, Washington, USA - GW-RW S3_40_50", + "description": "Riverbed sediment samples were collected from an area with a lot of surrounding vegetation in a hyporheic zone (subsurface groundwater - surface water mixing zone)", + "has_input": [ + "nmdc:bsm-11-0xprxw22" + ], + "has_output": [ + "jgi:574fde807ded5e3df1ee141b" + ], + "part_of": [ + "nmdc:sty-11-aygzgv51" + ], + "add_date": "2016-01-11", + "mod_date": "2021-06-15", + "ncbi_project_name": "Riverbed sediment microbial communities from areas with vegetation nearby in Columbia River, Washington, USA - GW-RW S3_40_50", + "omics_type": { + "has_raw_value": "Metagenome" + }, + "principal_investigator": { + "has_raw_value": "James Stegen" + }, + "processing_institution": "JGI", + "type": "nmdc:OmicsProcessing", + "gold_sequencing_project_identifiers": [ + "gold:Gp0127632" + ], + "downstream_workflow_activity_records": [ + { + "_id": { + "$oid": "649b009d6bdd4fd20273c868" + }, + "has_input": [ + "nmdc:5cbd7ceb39903cbded77b36ae866fe9f" + ], + "part_of": [ + "nmdc:mga0b6cy30" + ], + "git_url": "https://github.com/microbiomedata/metaG/releases/tag/0.1", + "has_output": [ + "nmdc:a43bfb55389206c2fc5ddb53e6aa2bc6", + "nmdc:919c5aade4fffb76f743a33b035b2839" + ], + "was_informed_by": "gold:Gp0127632", + "input_read_count": 27906294, + "output_read_bases": 3905482172, + "id": "nmdc:2d5f97b7f6c0f385d6185e05ac989b1e", + "execution_resource": "NERSC-Cori", + "input_read_bases": 4213850394, + "name": "Read QC Activity for nmdc:mga0b6cy30", + "output_read_count": 26116440, + "started_at_time": "2021-10-11T02:23:42Z", + "type": "nmdc:ReadQCAnalysisActivity", + "ended_at_time": "2021-10-11T04:08:32+00:00", + "output_data_objects": [ + { + "name": "Gp0127632_Filtered Reads", + "description": "Filtered Reads for Gp0127632", + "data_object_type": "Filtered Sequencing Reads", + "url": "https://data.microbiomedata.org/data/nmdc:mga0b6cy30/qa/nmdc_mga0b6cy30_filtered.fastq.gz", + "md5_checksum": "a43bfb55389206c2fc5ddb53e6aa2bc6", + "id": "nmdc:a43bfb55389206c2fc5ddb53e6aa2bc6", + "file_size_bytes": 2199178772 + }, + { + "name": "Gp0127632_Filtered Stats", + "description": "Filtered Stats for Gp0127632", + "data_object_type": "QC Statistics", + "url": "https://data.microbiomedata.org/data/nmdc:mga0b6cy30/qa/nmdc_mga0b6cy30_filterStats.txt", + "md5_checksum": "919c5aade4fffb76f743a33b035b2839", + "id": "nmdc:919c5aade4fffb76f743a33b035b2839", + "file_size_bytes": 289 + } + ] + }, + { + "_id": { + "$oid": "649b009bff710ae353f8cf31" + }, + "has_input": [ + "nmdc:a43bfb55389206c2fc5ddb53e6aa2bc6" + ], + "git_url": "https://github.com/microbiomedata/metaG/releases/tag/0.1", + "has_output": [ + "nmdc:3e583cccbbc068e0879ba6618bb6407c", + "nmdc:6c54105711e818c4d8169ab595b05efe", + "nmdc:adb155cdb656648496484998a62fb96f", + "nmdc:0a03ac5737750a3b336e7299e9f01ead", + "nmdc:f345b3a57c37097a860e38d5e83835b8", + "nmdc:c1f4471d943b284720a8becb5a2e32b4", + "nmdc:50cfcfc5d0d89245b8370abf6bfef23c", + "nmdc:a8dd7aa20043510158ad3b2bbe961b42", + "nmdc:e350fda9bd0651755171d79b413b8da3" + ], + "was_informed_by": "gold:Gp0127632", + "id": "nmdc:2d5f97b7f6c0f385d6185e05ac989b1e", + "execution_resource": "NERSC-Cori", + "name": "ReadBased Analysis Activity for nmdc:mga0b6cy30", + "started_at_time": "2021-10-11T02:23:42Z", + "type": "nmdc:ReadbasedAnalysis", + "ended_at_time": "2021-10-11T04:08:32+00:00", + "output_data_objects": [ + { + "name": "Gp0127632_Gottcha2 TSV report", + "description": "Gottcha2 TSV report for Gp0127632", + "url": "https://data.microbiomedata.org/data/nmdc:mga0b6cy30/ReadbasedAnalysis/nmdc_mga0b6cy30_gottcha2_report.tsv", + "md5_checksum": "3e583cccbbc068e0879ba6618bb6407c", + "id": "nmdc:3e583cccbbc068e0879ba6618bb6407c", + "file_size_bytes": 2899 + }, + { + "name": "Gp0127632_Gottcha2 full TSV report", + "description": "Gottcha2 full TSV report for Gp0127632", + "url": "https://data.microbiomedata.org/data/nmdc:mga0b6cy30/ReadbasedAnalysis/nmdc_mga0b6cy30_gottcha2_report_full.tsv", + "md5_checksum": "6c54105711e818c4d8169ab595b05efe", + "id": "nmdc:6c54105711e818c4d8169ab595b05efe", + "file_size_bytes": 769416 + }, + { + "name": "Gp0127632_Gottcha2 Krona HTML report", + "description": "Gottcha2 Krona HTML report for Gp0127632", + "data_object_type": "GOTTCHA2 Krona Plot", + "url": "https://data.microbiomedata.org/data/nmdc:mga0b6cy30/ReadbasedAnalysis/nmdc_mga0b6cy30_gottcha2_krona.html", + "md5_checksum": "adb155cdb656648496484998a62fb96f", + "id": "nmdc:adb155cdb656648496484998a62fb96f", + "file_size_bytes": 235384 + }, + { + "name": "Gp0127632_Centrifuge classification TSV report", + "description": "Centrifuge classification TSV report for Gp0127632", + "data_object_type": "Centrifuge Taxonomic Classification", + "url": "https://data.microbiomedata.org/data/nmdc:mga0b6cy30/ReadbasedAnalysis/nmdc_mga0b6cy30_centrifuge_classification.tsv", + "md5_checksum": "0a03ac5737750a3b336e7299e9f01ead", + "id": "nmdc:0a03ac5737750a3b336e7299e9f01ead", + "file_size_bytes": 1917130445 + }, + { + "name": "Gp0127632_Centrifuge TSV report", + "description": "Centrifuge TSV report for Gp0127632", + "data_object_type": "Centrifuge Classification Report", + "url": "https://data.microbiomedata.org/data/nmdc:mga0b6cy30/ReadbasedAnalysis/nmdc_mga0b6cy30_centrifuge_report.tsv", + "md5_checksum": "f345b3a57c37097a860e38d5e83835b8", + "id": "nmdc:f345b3a57c37097a860e38d5e83835b8", + "file_size_bytes": 255290 + }, + { + "name": "Gp0127632_Centrifuge Krona HTML report", + "description": "Centrifuge Krona HTML report for Gp0127632", + "data_object_type": "Centrifuge Krona Plot", + "url": "https://data.microbiomedata.org/data/nmdc:mga0b6cy30/ReadbasedAnalysis/nmdc_mga0b6cy30_centrifuge_krona.html", + "md5_checksum": "c1f4471d943b284720a8becb5a2e32b4", + "id": "nmdc:c1f4471d943b284720a8becb5a2e32b4", + "file_size_bytes": 2333225 + }, + { + "name": "Gp0127632_Kraken classification TSV report", + "description": "Kraken classification TSV report for Gp0127632", + "data_object_type": "Kraken2 Taxonomic Classification", + "url": "https://data.microbiomedata.org/data/nmdc:mga0b6cy30/ReadbasedAnalysis/nmdc_mga0b6cy30_kraken2_classification.tsv", + "md5_checksum": "50cfcfc5d0d89245b8370abf6bfef23c", + "id": "nmdc:50cfcfc5d0d89245b8370abf6bfef23c", + "file_size_bytes": 1537863470 + }, + { + "name": "Gp0127632_Kraken2 TSV report", + "description": "Kraken2 TSV report for Gp0127632", + "data_object_type": "Kraken2 Classification Report", + "url": "https://data.microbiomedata.org/data/nmdc:mga0b6cy30/ReadbasedAnalysis/nmdc_mga0b6cy30_kraken2_report.tsv", + "md5_checksum": "a8dd7aa20043510158ad3b2bbe961b42", + "id": "nmdc:a8dd7aa20043510158ad3b2bbe961b42", + "file_size_bytes": 648597 + }, + { + "name": "Gp0127632_Kraken2 Krona HTML report", + "description": "Kraken2 Krona HTML report for Gp0127632", + "data_object_type": "Kraken2 Krona Plot", + "url": "https://data.microbiomedata.org/data/nmdc:mga0b6cy30/ReadbasedAnalysis/nmdc_mga0b6cy30_kraken2_krona.html", + "md5_checksum": "e350fda9bd0651755171d79b413b8da3", + "id": "nmdc:e350fda9bd0651755171d79b413b8da3", + "file_size_bytes": 3959152 + } + ] + }, + { + "_id": { + "$oid": "61e7195d833bcf838a700521" + }, + "has_input": [ + "nmdc:a43bfb55389206c2fc5ddb53e6aa2bc6" + ], + "part_of": [ + "nmdc:mga0b6cy30" + ], + "git_url": "https://github.com/microbiomedata/metaG/releases/tag/0.1", + "has_output": [ + "nmdc:3e583cccbbc068e0879ba6618bb6407c", + "nmdc:6c54105711e818c4d8169ab595b05efe", + "nmdc:adb155cdb656648496484998a62fb96f", + "nmdc:0a03ac5737750a3b336e7299e9f01ead", + "nmdc:f345b3a57c37097a860e38d5e83835b8", + "nmdc:c1f4471d943b284720a8becb5a2e32b4", + "nmdc:50cfcfc5d0d89245b8370abf6bfef23c", + "nmdc:a8dd7aa20043510158ad3b2bbe961b42", + "nmdc:e350fda9bd0651755171d79b413b8da3" + ], + "was_informed_by": "gold:Gp0127632", + "id": "nmdc:2d5f97b7f6c0f385d6185e05ac989b1e", + "execution_resource": "NERSC-Cori", + "name": "ReadBased Analysis Activity for nmdc:mga0b6cy30", + "started_at_time": "2021-10-11T02:23:42Z", + "type": "nmdc:ReadbasedAnalysis", + "ended_at_time": "2021-10-11T04:08:32+00:00", + "output_data_objects": [ + { + "name": "Gp0127632_Gottcha2 TSV report", + "description": "Gottcha2 TSV report for Gp0127632", + "url": "https://data.microbiomedata.org/data/nmdc:mga0b6cy30/ReadbasedAnalysis/nmdc_mga0b6cy30_gottcha2_report.tsv", + "md5_checksum": "3e583cccbbc068e0879ba6618bb6407c", + "id": "nmdc:3e583cccbbc068e0879ba6618bb6407c", + "file_size_bytes": 2899 + }, + { + "name": "Gp0127632_Gottcha2 full TSV report", + "description": "Gottcha2 full TSV report for Gp0127632", + "url": "https://data.microbiomedata.org/data/nmdc:mga0b6cy30/ReadbasedAnalysis/nmdc_mga0b6cy30_gottcha2_report_full.tsv", + "md5_checksum": "6c54105711e818c4d8169ab595b05efe", + "id": "nmdc:6c54105711e818c4d8169ab595b05efe", + "file_size_bytes": 769416 + }, + { + "name": "Gp0127632_Gottcha2 Krona HTML report", + "description": "Gottcha2 Krona HTML report for Gp0127632", + "data_object_type": "GOTTCHA2 Krona Plot", + "url": "https://data.microbiomedata.org/data/nmdc:mga0b6cy30/ReadbasedAnalysis/nmdc_mga0b6cy30_gottcha2_krona.html", + "md5_checksum": "adb155cdb656648496484998a62fb96f", + "id": "nmdc:adb155cdb656648496484998a62fb96f", + "file_size_bytes": 235384 + }, + { + "name": "Gp0127632_Centrifuge classification TSV report", + "description": "Centrifuge classification TSV report for Gp0127632", + "data_object_type": "Centrifuge Taxonomic Classification", + "url": "https://data.microbiomedata.org/data/nmdc:mga0b6cy30/ReadbasedAnalysis/nmdc_mga0b6cy30_centrifuge_classification.tsv", + "md5_checksum": "0a03ac5737750a3b336e7299e9f01ead", + "id": "nmdc:0a03ac5737750a3b336e7299e9f01ead", + "file_size_bytes": 1917130445 + }, + { + "name": "Gp0127632_Centrifuge TSV report", + "description": "Centrifuge TSV report for Gp0127632", + "data_object_type": "Centrifuge Classification Report", + "url": "https://data.microbiomedata.org/data/nmdc:mga0b6cy30/ReadbasedAnalysis/nmdc_mga0b6cy30_centrifuge_report.tsv", + "md5_checksum": "f345b3a57c37097a860e38d5e83835b8", + "id": "nmdc:f345b3a57c37097a860e38d5e83835b8", + "file_size_bytes": 255290 + }, + { + "name": "Gp0127632_Centrifuge Krona HTML report", + "description": "Centrifuge Krona HTML report for Gp0127632", + "data_object_type": "Centrifuge Krona Plot", + "url": "https://data.microbiomedata.org/data/nmdc:mga0b6cy30/ReadbasedAnalysis/nmdc_mga0b6cy30_centrifuge_krona.html", + "md5_checksum": "c1f4471d943b284720a8becb5a2e32b4", + "id": "nmdc:c1f4471d943b284720a8becb5a2e32b4", + "file_size_bytes": 2333225 + }, + { + "name": "Gp0127632_Kraken classification TSV report", + "description": "Kraken classification TSV report for Gp0127632", + "data_object_type": "Kraken2 Taxonomic Classification", + "url": "https://data.microbiomedata.org/data/nmdc:mga0b6cy30/ReadbasedAnalysis/nmdc_mga0b6cy30_kraken2_classification.tsv", + "md5_checksum": "50cfcfc5d0d89245b8370abf6bfef23c", + "id": "nmdc:50cfcfc5d0d89245b8370abf6bfef23c", + "file_size_bytes": 1537863470 + }, + { + "name": "Gp0127632_Kraken2 TSV report", + "description": "Kraken2 TSV report for Gp0127632", + "data_object_type": "Kraken2 Classification Report", + "url": "https://data.microbiomedata.org/data/nmdc:mga0b6cy30/ReadbasedAnalysis/nmdc_mga0b6cy30_kraken2_report.tsv", + "md5_checksum": "a8dd7aa20043510158ad3b2bbe961b42", + "id": "nmdc:a8dd7aa20043510158ad3b2bbe961b42", + "file_size_bytes": 648597 + }, + { + "name": "Gp0127632_Kraken2 Krona HTML report", + "description": "Kraken2 Krona HTML report for Gp0127632", + "data_object_type": "Kraken2 Krona Plot", + "url": "https://data.microbiomedata.org/data/nmdc:mga0b6cy30/ReadbasedAnalysis/nmdc_mga0b6cy30_kraken2_krona.html", + "md5_checksum": "e350fda9bd0651755171d79b413b8da3", + "id": "nmdc:e350fda9bd0651755171d79b413b8da3", + "file_size_bytes": 3959152 + } + ] + }, + { + "_id": { + "$oid": "649b005f2ca5ee4adb139f9b" + }, + "has_input": [ + "nmdc:a43bfb55389206c2fc5ddb53e6aa2bc6" + ], + "part_of": [ + "nmdc:mga0b6cy30" + ], + "ctg_logsum": 81568, + "scaf_logsum": 81839, + "gap_pct": 0.00096, + "git_url": "https://github.com/microbiomedata/metaG/releases/tag/0.1", + "has_output": [ + "nmdc:b5094d52c6d48836de0aac261c622868", + "nmdc:4d9d83ac8db218e6d0bd4f29801c3ce3", + "nmdc:f8fad4cf225943d8fddec3fa3402c53a", + "nmdc:52f130d084757d6e27177ed108e9e5bf", + "nmdc:9e5deaa9e7ac3f5f90d79b6520d39d53" + ], + "asm_score": 5.986, + "was_informed_by": "gold:Gp0127632", + "ctg_powsum": 9274.272, + "scaf_max": 23706, + "id": "nmdc:2d5f97b7f6c0f385d6185e05ac989b1e", + "scaf_powsum": 9304.689, + "execution_resource": "NERSC-Cori", + "contigs": 132499, + "name": "Assembly Activity for nmdc:mga0b6cy30", + "ctg_max": 23706, + "gc_std": 0.09103, + "contig_bp": 54959738, + "gc_avg": 0.61354, + "started_at_time": "2021-10-11T02:23:42Z", + "scaf_bp": 54960268, + "type": "nmdc:MetagenomeAssembly", + "scaffolds": 132455, + "ended_at_time": "2021-10-11T04:08:32+00:00", + "ctg_l50": 372, + "ctg_l90": 285, + "ctg_n50": 43541, + "ctg_n90": 113564, + "scaf_l50": 372, + "scaf_l90": 285, + "scaf_n50": 43524, + "scaf_n90": 113522, + "output_data_objects": [ + { + "name": "Gp0127632_Assembled contigs fasta", + "description": "Assembled contigs fasta for Gp0127632", + "data_object_type": "Assembly Contigs", + "url": "https://data.microbiomedata.org/data/nmdc:mga0b6cy30/assembly/nmdc_mga0b6cy30_contigs.fna", + "md5_checksum": "b5094d52c6d48836de0aac261c622868", + "id": "nmdc:b5094d52c6d48836de0aac261c622868", + "file_size_bytes": 59930370 + }, + { + "name": "Gp0127632_Assembled scaffold fasta", + "description": "Assembled scaffold fasta for Gp0127632", + "data_object_type": "Assembly Scaffolds", + "url": "https://data.microbiomedata.org/data/nmdc:mga0b6cy30/assembly/nmdc_mga0b6cy30_scaffolds.fna", + "md5_checksum": "4d9d83ac8db218e6d0bd4f29801c3ce3", + "id": "nmdc:4d9d83ac8db218e6d0bd4f29801c3ce3", + "file_size_bytes": 59532251 + }, + { + "name": "Gp0127632_Metagenome Contig Coverage Stats", + "description": "Metagenome Contig Coverage Stats for Gp0127632", + "url": "https://data.microbiomedata.org/data/nmdc:mga0b6cy30/assembly/nmdc_mga0b6cy30_covstats.txt", + "md5_checksum": "f8fad4cf225943d8fddec3fa3402c53a", + "id": "nmdc:f8fad4cf225943d8fddec3fa3402c53a", + "file_size_bytes": 10428676 + }, + { + "name": "Gp0127632_Assembled AGP file", + "description": "Assembled AGP file for Gp0127632", + "data_object_type": "Assembly AGP", + "url": "https://data.microbiomedata.org/data/nmdc:mga0b6cy30/assembly/nmdc_mga0b6cy30_assembly.agp", + "md5_checksum": "52f130d084757d6e27177ed108e9e5bf", + "id": "nmdc:52f130d084757d6e27177ed108e9e5bf", + "file_size_bytes": 9725931 + }, + { + "name": "Gp0127632_Metagenome Alignment BAM file", + "description": "Metagenome Alignment BAM file for Gp0127632", + "data_object_type": "Assembly Coverage BAM", + "url": "https://data.microbiomedata.org/data/nmdc:mga0b6cy30/assembly/nmdc_mga0b6cy30_pairedMapped_sorted.bam", + "md5_checksum": "9e5deaa9e7ac3f5f90d79b6520d39d53", + "id": "nmdc:9e5deaa9e7ac3f5f90d79b6520d39d53", + "file_size_bytes": 2363431165 + } + ] + }, + { + "_id": { + "$oid": "649b005bbf2caae0415ef9ae" + }, + "has_input": [ + "nmdc:b5094d52c6d48836de0aac261c622868" + ], + "part_of": [ + "nmdc:mga0b6cy30" + ], + "git_url": "https://github.com/microbiomedata/metaG/releases/tag/0.1", + "has_output": [ + "nmdc:42989e75458691fbd17e537582c56d5e", + "nmdc:09240a6d1afc5f8b965a80a64aa96ef4", + "nmdc:c595237698baaf882fdeeac92f1b02be", + "nmdc:cd87df7a80ed03eef7d9923b9e9621e4", + "nmdc:57053d5594bb80495014664df22b0bb0", + "nmdc:3c82ee6a19674bd5abd4072cb137d96f", + "nmdc:c9bf48d6c88b3db0f431a08d93873c4a", + "nmdc:2475726e21bd8369f76d529f55f21a3f", + "nmdc:5698830d572ddc4e35a5f6642da7981a", + "nmdc:18cdb0f987a2d417d0a39a685e435729", + "nmdc:b34e4d1823bd5cd88aa42832d10b3431", + "nmdc:dc544f4796d49c520372e1872c5aea49" + ], + "was_informed_by": "gold:Gp0127632", + "id": "nmdc:2d5f97b7f6c0f385d6185e05ac989b1e", + "execution_resource": "NERSC-Cori", + "name": "Annotation Activity for nmdc:mga0b6cy30", + "started_at_time": "2021-10-11T02:23:42Z", + "type": "nmdc:MetagenomeAnnotationActivity", + "ended_at_time": "2021-10-11T04:08:32+00:00", + "output_data_objects": [ + { + "name": "Gp0127632_Protein FAA", + "description": "Protein FAA for Gp0127632", + "data_object_type": "Annotation Amino Acid FASTA", + "url": "https://data.microbiomedata.org/data/nmdc:mga0b6cy30/annotation/nmdc_mga0b6cy30_proteins.faa", + "md5_checksum": "42989e75458691fbd17e537582c56d5e", + "id": "nmdc:42989e75458691fbd17e537582c56d5e", + "file_size_bytes": 35685584 + }, + { + "name": "Gp0127632_Structural annotation GFF file", + "description": "Structural annotation GFF file for Gp0127632", + "data_object_type": "Structural Annotation GFF", + "url": "https://data.microbiomedata.org/data/nmdc:mga0b6cy30/annotation/nmdc_mga0b6cy30_structural_annotation.gff", + "md5_checksum": "09240a6d1afc5f8b965a80a64aa96ef4", + "id": "nmdc:09240a6d1afc5f8b965a80a64aa96ef4", + "file_size_bytes": 2512 + }, + { + "name": "Gp0127632_Functional annotation GFF file", + "description": "Functional annotation GFF file for Gp0127632", + "data_object_type": "Functional Annotation GFF", + "url": "https://data.microbiomedata.org/data/nmdc:mga0b6cy30/annotation/nmdc_mga0b6cy30_functional_annotation.gff", + "md5_checksum": "c595237698baaf882fdeeac92f1b02be", + "id": "nmdc:c595237698baaf882fdeeac92f1b02be", + "file_size_bytes": 41979225 + }, + { + "name": "Gp0127632_KO TSV file", + "description": "KO TSV file for Gp0127632", + "data_object_type": "Annotation KEGG Orthology", + "url": "https://data.microbiomedata.org/data/nmdc:mga0b6cy30/annotation/nmdc_mga0b6cy30_ko.tsv", + "md5_checksum": "cd87df7a80ed03eef7d9923b9e9621e4", + "id": "nmdc:cd87df7a80ed03eef7d9923b9e9621e4", + "file_size_bytes": 4726366 + }, + { + "name": "Gp0127632_EC TSV file", + "description": "EC TSV file for Gp0127632", + "data_object_type": "Annotation Enzyme Commission", + "url": "https://data.microbiomedata.org/data/nmdc:mga0b6cy30/annotation/nmdc_mga0b6cy30_ec.tsv", + "md5_checksum": "57053d5594bb80495014664df22b0bb0", + "id": "nmdc:57053d5594bb80495014664df22b0bb0", + "file_size_bytes": 3155078 + }, + { + "name": "Gp0127632_COG GFF file", + "description": "COG GFF file for Gp0127632", + "url": "https://data.microbiomedata.org/data/nmdc:mga0b6cy30/annotation/nmdc_mga0b6cy30_cog.gff", + "md5_checksum": "3c82ee6a19674bd5abd4072cb137d96f", + "id": "nmdc:3c82ee6a19674bd5abd4072cb137d96f", + "file_size_bytes": 23956687 + }, + { + "name": "Gp0127632_PFAM GFF file", + "description": "PFAM GFF file for Gp0127632", + "url": "https://data.microbiomedata.org/data/nmdc:mga0b6cy30/annotation/nmdc_mga0b6cy30_pfam.gff", + "md5_checksum": "c9bf48d6c88b3db0f431a08d93873c4a", + "id": "nmdc:c9bf48d6c88b3db0f431a08d93873c4a", + "file_size_bytes": 17333907 + }, + { + "name": "Gp0127632_TigrFam GFF file", + "description": "TigrFam GFF file for Gp0127632", + "url": "https://data.microbiomedata.org/data/nmdc:mga0b6cy30/annotation/nmdc_mga0b6cy30_tigrfam.gff", + "md5_checksum": "2475726e21bd8369f76d529f55f21a3f", + "id": "nmdc:2475726e21bd8369f76d529f55f21a3f", + "file_size_bytes": 1771706 + }, + { + "name": "Gp0127632_SMART GFF file", + "description": "SMART GFF file for Gp0127632", + "url": "https://data.microbiomedata.org/data/nmdc:mga0b6cy30/annotation/nmdc_mga0b6cy30_smart.gff", + "md5_checksum": "5698830d572ddc4e35a5f6642da7981a", + "id": "nmdc:5698830d572ddc4e35a5f6642da7981a", + "file_size_bytes": 5383998 + }, + { + "name": "Gp0127632_SuperFam GFF file", + "description": "SuperFam GFF file for Gp0127632", + "url": "https://data.microbiomedata.org/data/nmdc:mga0b6cy30/annotation/nmdc_mga0b6cy30_supfam.gff", + "md5_checksum": "18cdb0f987a2d417d0a39a685e435729", + "id": "nmdc:18cdb0f987a2d417d0a39a685e435729", + "file_size_bytes": 30162479 + }, + { + "name": "Gp0127632_Cath FunFam GFF file", + "description": "Cath FunFam GFF file for Gp0127632", + "url": "https://data.microbiomedata.org/data/nmdc:mga0b6cy30/annotation/nmdc_mga0b6cy30_cath_funfam.gff", + "md5_checksum": "b34e4d1823bd5cd88aa42832d10b3431", + "id": "nmdc:b34e4d1823bd5cd88aa42832d10b3431", + "file_size_bytes": 22459777 + }, + { + "name": "Gp0127632_KO_EC GFF file", + "description": "KO_EC GFF file for Gp0127632", + "url": "https://data.microbiomedata.org/data/nmdc:mga0b6cy30/annotation/nmdc_mga0b6cy30_ko_ec.gff", + "md5_checksum": "dc544f4796d49c520372e1872c5aea49", + "id": "nmdc:dc544f4796d49c520372e1872c5aea49", + "file_size_bytes": 15047897 + } + ] + }, + { + "_id": { + "$oid": "649b0052ec087f6bbab3470d" + }, + "has_input": [ + "nmdc:b5094d52c6d48836de0aac261c622868", + "nmdc:9e5deaa9e7ac3f5f90d79b6520d39d53", + "nmdc:c595237698baaf882fdeeac92f1b02be" + ], + "too_short_contig_num": 128818, + "part_of": [ + "nmdc:mga0b6cy30" + ], + "binned_contig_num": 313, + "git_url": "https://github.com/microbiomedata/metaG/releases/tag/0.1", + "has_output": [ + "nmdc:d41d8cd98f00b204e9800998ecf8427e", + "nmdc:2941988fcfb708d20ad1e44682c78e22", + "nmdc:a6bc8d9d5ba5fe9713829aa7aef3c4cd", + "nmdc:2914266e7ac7a8668c6f8d8722466c69", + "nmdc:0fd97ca0ce01d42361ce817d3753a65e", + "nmdc:8e7832cac0ae99e2b63dfdfa34c24927" + ], + "was_informed_by": "gold:Gp0127632", + "input_contig_num": 132499, + "id": "nmdc:2d5f97b7f6c0f385d6185e05ac989b1e", + "execution_resource": "NERSC-Cori", + "name": "MAGs Analysis Activity for nmdc:mga0b6cy30", + "mags_list": [ + { + "number_of_contig": 84, + "completeness": 27.81, + "bin_name": "bins.1", + "gene_count": 437, + "bin_quality": "LQ", + "gtdbtk_species": "", + "gtdbtk_order": "", + "num_16s": 0, + "gtdbtk_family": "", + "gtdbtk_domain": "", + "contamination": 1.71, + "gtdbtk_class": "", + "gtdbtk_phylum": "", + "num_5s": 1, + "num_23s": 1, + "gtdbtk_genus": "", + "num_t_rna": 10 + }, + { + "number_of_contig": 229, + "completeness": 71.45, + "bin_name": "bins.2", + "gene_count": 1997, + "bin_quality": "MQ", + "gtdbtk_species": "", + "gtdbtk_order": "Nitrososphaerales", + "num_16s": 0, + "gtdbtk_family": "Nitrososphaeraceae", + "gtdbtk_domain": "Archaea", + "contamination": 0.97, + "gtdbtk_class": "Nitrososphaeria", + "gtdbtk_phylum": "Crenarchaeota", + "num_5s": 1, + "num_23s": 0, + "gtdbtk_genus": "", + "num_t_rna": 36 + } + ], + "unbinned_contig_num": 3368, + "started_at_time": "2021-10-11T02:23:42Z", + "type": "nmdc:MAGsAnalysisActivity", + "ended_at_time": "2021-10-11T04:08:32+00:00", + "output_data_objects": [ + { + "name": "gold:Gp0452679_metabat2 bin checkm quality assessment result", + "description": "metabat2 bin checkm quality assessment result for gold:Gp0452679", + "data_object_type": "CheckM Statistics", + "url": "https://data.microbiomedata.org/data/nmdc:mga0fsjy84/MAGs/nmdc_mga0fsjy84_checkm_qa.out", + "md5_checksum": "d41d8cd98f00b204e9800998ecf8427e", + "id": "nmdc:d41d8cd98f00b204e9800998ecf8427e", + "file_size_bytes": 0 + }, + { + "name": "Gp0127632_tooShort (< 3kb) filtered contigs fasta file by metaBat2", + "description": "tooShort (< 3kb) filtered contigs fasta file by metaBat2 for Gp0127632", + "url": "https://data.microbiomedata.org/data/nmdc:mga0b6cy30/MAGs/nmdc_mga0b6cy30_bins.tooShort.fa", + "md5_checksum": "2941988fcfb708d20ad1e44682c78e22", + "id": "nmdc:2941988fcfb708d20ad1e44682c78e22", + "file_size_bytes": 52475207 + }, + { + "name": "Gp0127632_unbinned fasta file from metabat2", + "description": "unbinned fasta file from metabat2 for Gp0127632", + "url": "https://data.microbiomedata.org/data/nmdc:mga0b6cy30/MAGs/nmdc_mga0b6cy30_bins.unbinned.fa", + "md5_checksum": "a6bc8d9d5ba5fe9713829aa7aef3c4cd", + "id": "nmdc:a6bc8d9d5ba5fe9713829aa7aef3c4cd", + "file_size_bytes": 5473493 + }, + { + "name": "Gp0127632_metabat2 bin checkm quality assessment result", + "description": "metabat2 bin checkm quality assessment result for Gp0127632", + "data_object_type": "CheckM Statistics", + "url": "https://data.microbiomedata.org/data/nmdc:mga0b6cy30/MAGs/nmdc_mga0b6cy30_checkm_qa.out", + "md5_checksum": "2914266e7ac7a8668c6f8d8722466c69", + "id": "nmdc:2914266e7ac7a8668c6f8d8722466c69", + "file_size_bytes": 948 + }, + { + "name": "Gp0127632_high-quality and medium-quality bins", + "description": "high-quality and medium-quality bins for Gp0127632", + "data_object_type": "Metagenome Bins", + "url": "https://data.microbiomedata.org/data/nmdc:mga0b6cy30/MAGs/nmdc_mga0b6cy30_hqmq_bin.zip", + "md5_checksum": "0fd97ca0ce01d42361ce817d3753a65e", + "id": "nmdc:0fd97ca0ce01d42361ce817d3753a65e", + "file_size_bytes": 497493 + }, + { + "name": "Gp0127632_metabat2 bins", + "description": "metabat2 bins for Gp0127632", + "url": "https://data.microbiomedata.org/data/nmdc:mga0b6cy30/MAGs/nmdc_mga0b6cy30_metabat_bin.zip", + "md5_checksum": "8e7832cac0ae99e2b63dfdfa34c24927", + "id": "nmdc:8e7832cac0ae99e2b63dfdfa34c24927", + "file_size_bytes": 108323 + } + ] + } + ] + }, + { + "_id": { + "$oid": "649b009773e8249959349b45" + }, + "id": "nmdc:omprc-11-k675bw84", + "name": "Riverbed sediment microbial communities from areas with no vegetation in Columbia River, Washington, USA - GW-RW N2_30_40", + "description": "Riverbed sediment samples were collected from an area with no vegetation in a hyporheic zone (subsurface groundwater - surface water mixing zone)", + "has_input": [ + "nmdc:bsm-11-rtf54942" + ], + "has_output": [ + "jgi:574fe09f7ded5e3df1ee1489" + ], + "part_of": [ + "nmdc:sty-11-aygzgv51" + ], + "add_date": "2016-01-11", + "mod_date": "2021-06-15", + "ncbi_project_name": "Riverbed sediment microbial communities from areas with no vegetation in Columbia River, Washington, USA - GW-RW N2_30_40", + "omics_type": { + "has_raw_value": "Metagenome" + }, + "principal_investigator": { + "has_raw_value": "James Stegen" + }, + "processing_institution": "JGI", + "type": "nmdc:OmicsProcessing", + "gold_sequencing_project_identifiers": [ + "gold:Gp0127636" + ], + "downstream_workflow_activity_records": [ + { + "_id": { + "$oid": "649b009d6bdd4fd20273c864" + }, + "has_input": [ + "nmdc:341830a5735c34968da2304bc27edd2a" + ], + "part_of": [ + "nmdc:mga02tph39" + ], + "git_url": "https://github.com/microbiomedata/metaG/releases/tag/0.1", + "has_output": [ + "nmdc:e4f5675c728fd1896682eb669656b5d6", + "nmdc:64f455185b1bc610a8d74a84ed12683f" + ], + "was_informed_by": "gold:Gp0127636", + "input_read_count": 31642056, + "output_read_bases": 4354491393, + "id": "nmdc:3a0bb3c0ec1ec60e0487cb98f7f19a90", + "execution_resource": "NERSC-Cori", + "input_read_bases": 4777950456, + "name": "Read QC Activity for nmdc:mga02tph39", + "output_read_count": 29115818, + "started_at_time": "2021-10-11T02:23:42Z", + "type": "nmdc:ReadQCAnalysisActivity", + "ended_at_time": "2021-11-13T18:49:37+00:00", + "output_data_objects": [ + { + "name": "Gp0127636_Filtered Reads", + "description": "Filtered Reads for Gp0127636", + "data_object_type": "Filtered Sequencing Reads", + "url": "https://data.microbiomedata.org/data/nmdc:mga02tph39/qa/nmdc_mga02tph39_filtered.fastq.gz", + "md5_checksum": "e4f5675c728fd1896682eb669656b5d6", + "id": "nmdc:e4f5675c728fd1896682eb669656b5d6", + "file_size_bytes": 2463342132 + }, + { + "name": "Gp0127636_Filtered Stats", + "description": "Filtered Stats for Gp0127636", + "data_object_type": "QC Statistics", + "url": "https://data.microbiomedata.org/data/nmdc:mga02tph39/qa/nmdc_mga02tph39_filterStats.txt", + "md5_checksum": "64f455185b1bc610a8d74a84ed12683f", + "id": "nmdc:64f455185b1bc610a8d74a84ed12683f", + "file_size_bytes": 293 + } + ] + }, + { + "_id": { + "$oid": "649b009bff710ae353f8cf26" + }, + "has_input": [ + "nmdc:e4f5675c728fd1896682eb669656b5d6" + ], + "git_url": "https://github.com/microbiomedata/metaG/releases/tag/0.1", + "has_output": [ + "nmdc:50d80a30d4ff113e36f6fd64b1f28547", + "nmdc:c2cd20a2011592a76397f49dc3acd6b7", + "nmdc:827ad863c875ea14473c9903d192fa73", + "nmdc:957074ca49765b22348e27b0133d8ba0", + "nmdc:9253645582296696cb33b11754832574", + "nmdc:9aef1d9e04acfe0b7fb1b9dc3b842912", + "nmdc:75180fce38f38a6307231b47a8d2b23b", + "nmdc:b4524a34937893768dbd3752068dee0c", + "nmdc:f1543441c59aaaf8ec52036a5bbbe3f4" + ], + "was_informed_by": "gold:Gp0127636", + "id": "nmdc:3a0bb3c0ec1ec60e0487cb98f7f19a90", + "execution_resource": "NERSC-Cori", + "name": "ReadBased Analysis Activity for nmdc:mga02tph39", + "started_at_time": "2021-10-11T02:23:42Z", + "type": "nmdc:ReadbasedAnalysis", + "ended_at_time": "2021-11-13T18:49:37+00:00", + "output_data_objects": [ + { + "name": "Gp0127636_Gottcha2 TSV report", + "description": "Gottcha2 TSV report for Gp0127636", + "url": "https://data.microbiomedata.org/data/nmdc:mga02tph39/ReadbasedAnalysis/nmdc_mga02tph39_gottcha2_report.tsv", + "md5_checksum": "50d80a30d4ff113e36f6fd64b1f28547", + "id": "nmdc:50d80a30d4ff113e36f6fd64b1f28547", + "file_size_bytes": 5547 + }, + { + "name": "Gp0127636_Gottcha2 full TSV report", + "description": "Gottcha2 full TSV report for Gp0127636", + "url": "https://data.microbiomedata.org/data/nmdc:mga02tph39/ReadbasedAnalysis/nmdc_mga02tph39_gottcha2_report_full.tsv", + "md5_checksum": "c2cd20a2011592a76397f49dc3acd6b7", + "id": "nmdc:c2cd20a2011592a76397f49dc3acd6b7", + "file_size_bytes": 965042 + }, + { + "name": "Gp0127636_Gottcha2 Krona HTML report", + "description": "Gottcha2 Krona HTML report for Gp0127636", + "data_object_type": "GOTTCHA2 Krona Plot", + "url": "https://data.microbiomedata.org/data/nmdc:mga02tph39/ReadbasedAnalysis/nmdc_mga02tph39_gottcha2_krona.html", + "md5_checksum": "827ad863c875ea14473c9903d192fa73", + "id": "nmdc:827ad863c875ea14473c9903d192fa73", + "file_size_bytes": 242495 + }, + { + "name": "Gp0127636_Centrifuge classification TSV report", + "description": "Centrifuge classification TSV report for Gp0127636", + "data_object_type": "Centrifuge Taxonomic Classification", + "url": "https://data.microbiomedata.org/data/nmdc:mga02tph39/ReadbasedAnalysis/nmdc_mga02tph39_centrifuge_classification.tsv", + "md5_checksum": "957074ca49765b22348e27b0133d8ba0", + "id": "nmdc:957074ca49765b22348e27b0133d8ba0", + "file_size_bytes": 2151939041 + }, + { + "name": "Gp0127636_Centrifuge TSV report", + "description": "Centrifuge TSV report for Gp0127636", + "data_object_type": "Centrifuge Classification Report", + "url": "https://data.microbiomedata.org/data/nmdc:mga02tph39/ReadbasedAnalysis/nmdc_mga02tph39_centrifuge_report.tsv", + "md5_checksum": "9253645582296696cb33b11754832574", + "id": "nmdc:9253645582296696cb33b11754832574", + "file_size_bytes": 257932 + }, + { + "name": "Gp0127636_Centrifuge Krona HTML report", + "description": "Centrifuge Krona HTML report for Gp0127636", + "data_object_type": "Centrifuge Krona Plot", + "url": "https://data.microbiomedata.org/data/nmdc:mga02tph39/ReadbasedAnalysis/nmdc_mga02tph39_centrifuge_krona.html", + "md5_checksum": "9aef1d9e04acfe0b7fb1b9dc3b842912", + "id": "nmdc:9aef1d9e04acfe0b7fb1b9dc3b842912", + "file_size_bytes": 2335219 + }, + { + "name": "Gp0127636_Kraken classification TSV report", + "description": "Kraken classification TSV report for Gp0127636", + "data_object_type": "Kraken2 Taxonomic Classification", + "url": "https://data.microbiomedata.org/data/nmdc:mga02tph39/ReadbasedAnalysis/nmdc_mga02tph39_kraken2_classification.tsv", + "md5_checksum": "75180fce38f38a6307231b47a8d2b23b", + "id": "nmdc:75180fce38f38a6307231b47a8d2b23b", + "file_size_bytes": 1746049273 + }, + { + "name": "Gp0127636_Kraken2 TSV report", + "description": "Kraken2 TSV report for Gp0127636", + "data_object_type": "Kraken2 Classification Report", + "url": "https://data.microbiomedata.org/data/nmdc:mga02tph39/ReadbasedAnalysis/nmdc_mga02tph39_kraken2_report.tsv", + "md5_checksum": "b4524a34937893768dbd3752068dee0c", + "id": "nmdc:b4524a34937893768dbd3752068dee0c", + "file_size_bytes": 660975 + }, + { + "name": "Gp0127636_Kraken2 Krona HTML report", + "description": "Kraken2 Krona HTML report for Gp0127636", + "data_object_type": "Kraken2 Krona Plot", + "url": "https://data.microbiomedata.org/data/nmdc:mga02tph39/ReadbasedAnalysis/nmdc_mga02tph39_kraken2_krona.html", + "md5_checksum": "f1543441c59aaaf8ec52036a5bbbe3f4", + "id": "nmdc:f1543441c59aaaf8ec52036a5bbbe3f4", + "file_size_bytes": 4020978 + } + ] + }, + { + "_id": { + "$oid": "61e71959833bcf838a70040a" + }, + "has_input": [ + "nmdc:e4f5675c728fd1896682eb669656b5d6" + ], + "part_of": [ + "nmdc:mga02tph39" + ], + "git_url": "https://github.com/microbiomedata/metaG/releases/tag/0.1", + "has_output": [ + "nmdc:50d80a30d4ff113e36f6fd64b1f28547", + "nmdc:c2cd20a2011592a76397f49dc3acd6b7", + "nmdc:827ad863c875ea14473c9903d192fa73", + "nmdc:957074ca49765b22348e27b0133d8ba0", + "nmdc:9253645582296696cb33b11754832574", + "nmdc:9aef1d9e04acfe0b7fb1b9dc3b842912", + "nmdc:75180fce38f38a6307231b47a8d2b23b", + "nmdc:b4524a34937893768dbd3752068dee0c", + "nmdc:f1543441c59aaaf8ec52036a5bbbe3f4" + ], + "was_informed_by": "gold:Gp0127636", + "id": "nmdc:3a0bb3c0ec1ec60e0487cb98f7f19a90", + "execution_resource": "NERSC-Cori", + "name": "ReadBased Analysis Activity for nmdc:mga02tph39", + "started_at_time": "2021-10-11T02:23:42Z", + "type": "nmdc:ReadbasedAnalysis", + "ended_at_time": "2021-11-13T18:49:37+00:00", + "output_data_objects": [ + { + "name": "Gp0127636_Gottcha2 TSV report", + "description": "Gottcha2 TSV report for Gp0127636", + "url": "https://data.microbiomedata.org/data/nmdc:mga02tph39/ReadbasedAnalysis/nmdc_mga02tph39_gottcha2_report.tsv", + "md5_checksum": "50d80a30d4ff113e36f6fd64b1f28547", + "id": "nmdc:50d80a30d4ff113e36f6fd64b1f28547", + "file_size_bytes": 5547 + }, + { + "name": "Gp0127636_Gottcha2 full TSV report", + "description": "Gottcha2 full TSV report for Gp0127636", + "url": "https://data.microbiomedata.org/data/nmdc:mga02tph39/ReadbasedAnalysis/nmdc_mga02tph39_gottcha2_report_full.tsv", + "md5_checksum": "c2cd20a2011592a76397f49dc3acd6b7", + "id": "nmdc:c2cd20a2011592a76397f49dc3acd6b7", + "file_size_bytes": 965042 + }, + { + "name": "Gp0127636_Gottcha2 Krona HTML report", + "description": "Gottcha2 Krona HTML report for Gp0127636", + "data_object_type": "GOTTCHA2 Krona Plot", + "url": "https://data.microbiomedata.org/data/nmdc:mga02tph39/ReadbasedAnalysis/nmdc_mga02tph39_gottcha2_krona.html", + "md5_checksum": "827ad863c875ea14473c9903d192fa73", + "id": "nmdc:827ad863c875ea14473c9903d192fa73", + "file_size_bytes": 242495 + }, + { + "name": "Gp0127636_Centrifuge classification TSV report", + "description": "Centrifuge classification TSV report for Gp0127636", + "data_object_type": "Centrifuge Taxonomic Classification", + "url": "https://data.microbiomedata.org/data/nmdc:mga02tph39/ReadbasedAnalysis/nmdc_mga02tph39_centrifuge_classification.tsv", + "md5_checksum": "957074ca49765b22348e27b0133d8ba0", + "id": "nmdc:957074ca49765b22348e27b0133d8ba0", + "file_size_bytes": 2151939041 + }, + { + "name": "Gp0127636_Centrifuge TSV report", + "description": "Centrifuge TSV report for Gp0127636", + "data_object_type": "Centrifuge Classification Report", + "url": "https://data.microbiomedata.org/data/nmdc:mga02tph39/ReadbasedAnalysis/nmdc_mga02tph39_centrifuge_report.tsv", + "md5_checksum": "9253645582296696cb33b11754832574", + "id": "nmdc:9253645582296696cb33b11754832574", + "file_size_bytes": 257932 + }, + { + "name": "Gp0127636_Centrifuge Krona HTML report", + "description": "Centrifuge Krona HTML report for Gp0127636", + "data_object_type": "Centrifuge Krona Plot", + "url": "https://data.microbiomedata.org/data/nmdc:mga02tph39/ReadbasedAnalysis/nmdc_mga02tph39_centrifuge_krona.html", + "md5_checksum": "9aef1d9e04acfe0b7fb1b9dc3b842912", + "id": "nmdc:9aef1d9e04acfe0b7fb1b9dc3b842912", + "file_size_bytes": 2335219 + }, + { + "name": "Gp0127636_Kraken classification TSV report", + "description": "Kraken classification TSV report for Gp0127636", + "data_object_type": "Kraken2 Taxonomic Classification", + "url": "https://data.microbiomedata.org/data/nmdc:mga02tph39/ReadbasedAnalysis/nmdc_mga02tph39_kraken2_classification.tsv", + "md5_checksum": "75180fce38f38a6307231b47a8d2b23b", + "id": "nmdc:75180fce38f38a6307231b47a8d2b23b", + "file_size_bytes": 1746049273 + }, + { + "name": "Gp0127636_Kraken2 TSV report", + "description": "Kraken2 TSV report for Gp0127636", + "data_object_type": "Kraken2 Classification Report", + "url": "https://data.microbiomedata.org/data/nmdc:mga02tph39/ReadbasedAnalysis/nmdc_mga02tph39_kraken2_report.tsv", + "md5_checksum": "b4524a34937893768dbd3752068dee0c", + "id": "nmdc:b4524a34937893768dbd3752068dee0c", + "file_size_bytes": 660975 + }, + { + "name": "Gp0127636_Kraken2 Krona HTML report", + "description": "Kraken2 Krona HTML report for Gp0127636", + "data_object_type": "Kraken2 Krona Plot", + "url": "https://data.microbiomedata.org/data/nmdc:mga02tph39/ReadbasedAnalysis/nmdc_mga02tph39_kraken2_krona.html", + "md5_checksum": "f1543441c59aaaf8ec52036a5bbbe3f4", + "id": "nmdc:f1543441c59aaaf8ec52036a5bbbe3f4", + "file_size_bytes": 4020978 + } + ] + }, + { + "_id": { + "$oid": "649b005f2ca5ee4adb139f91" + }, + "has_input": [ + "nmdc:e4f5675c728fd1896682eb669656b5d6" + ], + "part_of": [ + "nmdc:mga02tph39" + ], + "ctg_logsum": 36469, + "scaf_logsum": 36615, + "gap_pct": 0.00062, + "git_url": "https://github.com/microbiomedata/metaG/releases/tag/0.1", + "has_output": [ + "nmdc:36692b7b93756aaabd7f1f6259753c4e", + "nmdc:8d02adf1319d5b95c2abc6ed5b5c1683", + "nmdc:9830a711accd3a5ed899a2e616d0f4bf", + "nmdc:481fbd8cdeacd71e54a45c78d5decb36", + "nmdc:a24edc9ffd773c30cea8ea709988307a" + ], + "asm_score": 3.618, + "was_informed_by": "gold:Gp0127636", + "ctg_powsum": 3976.058, + "scaf_max": 23067, + "id": "nmdc:3a0bb3c0ec1ec60e0487cb98f7f19a90", + "scaf_powsum": 3993.143, + "execution_resource": "NERSC-Cori", + "contigs": 95606, + "name": "Assembly Activity for nmdc:mga02tph39", + "ctg_max": 23067, + "gc_std": 0.11099, + "gc_avg": 0.57474, + "contig_bp": 35573088, + "started_at_time": "2021-10-11T02:23:42Z", + "scaf_bp": 35573308, + "type": "nmdc:MetagenomeAssembly", + "scaffolds": 95584, + "ended_at_time": "2021-11-13T18:49:37+00:00", + "ctg_l50": 329, + "ctg_l90": 282, + "ctg_n50": 35238, + "ctg_n90": 83377, + "scaf_l50": 329, + "scaf_l90": 282, + "scaf_n50": 35220, + "scaf_n90": 83355, + "output_data_objects": [ + { + "name": "Gp0127636_Assembled contigs fasta", + "description": "Assembled contigs fasta for Gp0127636", + "data_object_type": "Assembly Contigs", + "url": "https://data.microbiomedata.org/data/nmdc:mga02tph39/assembly/nmdc_mga02tph39_contigs.fna", + "md5_checksum": "36692b7b93756aaabd7f1f6259753c4e", + "id": "nmdc:36692b7b93756aaabd7f1f6259753c4e", + "file_size_bytes": 39062008 + }, + { + "name": "Gp0127636_Assembled scaffold fasta", + "description": "Assembled scaffold fasta for Gp0127636", + "data_object_type": "Assembly Scaffolds", + "url": "https://data.microbiomedata.org/data/nmdc:mga02tph39/assembly/nmdc_mga02tph39_scaffolds.fna", + "md5_checksum": "8d02adf1319d5b95c2abc6ed5b5c1683", + "id": "nmdc:8d02adf1319d5b95c2abc6ed5b5c1683", + "file_size_bytes": 38774844 + }, + { + "name": "Gp0127636_Metagenome Contig Coverage Stats", + "description": "Metagenome Contig Coverage Stats for Gp0127636", + "url": "https://data.microbiomedata.org/data/nmdc:mga02tph39/assembly/nmdc_mga02tph39_covstats.txt", + "md5_checksum": "9830a711accd3a5ed899a2e616d0f4bf", + "id": "nmdc:9830a711accd3a5ed899a2e616d0f4bf", + "file_size_bytes": 7495949 + }, + { + "name": "Gp0127636_Assembled AGP file", + "description": "Assembled AGP file for Gp0127636", + "data_object_type": "Assembly AGP", + "url": "https://data.microbiomedata.org/data/nmdc:mga02tph39/assembly/nmdc_mga02tph39_assembly.agp", + "md5_checksum": "481fbd8cdeacd71e54a45c78d5decb36", + "id": "nmdc:481fbd8cdeacd71e54a45c78d5decb36", + "file_size_bytes": 6962527 + }, + { + "name": "Gp0127636_Metagenome Alignment BAM file", + "description": "Metagenome Alignment BAM file for Gp0127636", + "data_object_type": "Assembly Coverage BAM", + "url": "https://data.microbiomedata.org/data/nmdc:mga02tph39/assembly/nmdc_mga02tph39_pairedMapped_sorted.bam", + "md5_checksum": "a24edc9ffd773c30cea8ea709988307a", + "id": "nmdc:a24edc9ffd773c30cea8ea709988307a", + "file_size_bytes": 2624769069 + } + ] + }, + { + "_id": { + "$oid": "649b005bbf2caae0415ef9ac" + }, + "has_input": [ + "nmdc:36692b7b93756aaabd7f1f6259753c4e" + ], + "part_of": [ + "nmdc:mga02tph39" + ], + "git_url": "https://github.com/microbiomedata/metaG/releases/tag/0.1", + "has_output": [ + "nmdc:a5d97f323fe7117cb38a2eea1f2246d2", + "nmdc:2b791fb3e2964d7808388b32086e0de2", + "nmdc:f61ed86592491b2d83b5893749e12406", + "nmdc:e983789bdc08364b00a000684062ed16", + "nmdc:3cd47d66b6e9006ff683a2eda168285f", + "nmdc:e056ee666e8001bdb6f790efb3394093", + "nmdc:2b90fcb7628c3ffa9e7a14a32612b7af", + "nmdc:4e2f1d4b2d20bfb0209a320a60c4aeac", + "nmdc:dd24a8b0f774555ac91e663416745428", + "nmdc:2e76b71475b854e2bf2d0aa15a53dd7d", + "nmdc:2f297176cd51b2ede33c313f713b40b1", + "nmdc:678a7af05a89d9d4f5f5d598dc2e3013" + ], + "was_informed_by": "gold:Gp0127636", + "id": "nmdc:3a0bb3c0ec1ec60e0487cb98f7f19a90", + "execution_resource": "NERSC-Cori", + "name": "Annotation Activity for nmdc:mga02tph39", + "started_at_time": "2021-10-11T02:23:42Z", + "type": "nmdc:MetagenomeAnnotationActivity", + "ended_at_time": "2021-11-13T18:49:37+00:00", + "output_data_objects": [ + { + "name": "Gp0127636_Protein FAA", + "description": "Protein FAA for Gp0127636", + "data_object_type": "Annotation Amino Acid FASTA", + "url": "https://data.microbiomedata.org/data/nmdc:mga02tph39/annotation/nmdc_mga02tph39_proteins.faa", + "md5_checksum": "a5d97f323fe7117cb38a2eea1f2246d2", + "id": "nmdc:a5d97f323fe7117cb38a2eea1f2246d2", + "file_size_bytes": 23469553 + }, + { + "name": "Gp0127636_Structural annotation GFF file", + "description": "Structural annotation GFF file for Gp0127636", + "data_object_type": "Structural Annotation GFF", + "url": "https://data.microbiomedata.org/data/nmdc:mga02tph39/annotation/nmdc_mga02tph39_structural_annotation.gff", + "md5_checksum": "2b791fb3e2964d7808388b32086e0de2", + "id": "nmdc:2b791fb3e2964d7808388b32086e0de2", + "file_size_bytes": 16532352 + }, + { + "name": "Gp0127636_Functional annotation GFF file", + "description": "Functional annotation GFF file for Gp0127636", + "data_object_type": "Functional Annotation GFF", + "url": "https://data.microbiomedata.org/data/nmdc:mga02tph39/annotation/nmdc_mga02tph39_functional_annotation.gff", + "md5_checksum": "f61ed86592491b2d83b5893749e12406", + "id": "nmdc:f61ed86592491b2d83b5893749e12406", + "file_size_bytes": 28432426 + }, + { + "name": "Gp0127636_KO TSV file", + "description": "KO TSV file for Gp0127636", + "data_object_type": "Annotation KEGG Orthology", + "url": "https://data.microbiomedata.org/data/nmdc:mga02tph39/annotation/nmdc_mga02tph39_ko.tsv", + "md5_checksum": "e983789bdc08364b00a000684062ed16", + "id": "nmdc:e983789bdc08364b00a000684062ed16", + "file_size_bytes": 3189682 + }, + { + "name": "Gp0127636_EC TSV file", + "description": "EC TSV file for Gp0127636", + "data_object_type": "Annotation Enzyme Commission", + "url": "https://data.microbiomedata.org/data/nmdc:mga02tph39/annotation/nmdc_mga02tph39_ec.tsv", + "md5_checksum": "3cd47d66b6e9006ff683a2eda168285f", + "id": "nmdc:3cd47d66b6e9006ff683a2eda168285f", + "file_size_bytes": 2100535 + }, + { + "name": "Gp0127636_COG GFF file", + "description": "COG GFF file for Gp0127636", + "url": "https://data.microbiomedata.org/data/nmdc:mga02tph39/annotation/nmdc_mga02tph39_cog.gff", + "md5_checksum": "e056ee666e8001bdb6f790efb3394093", + "id": "nmdc:e056ee666e8001bdb6f790efb3394093", + "file_size_bytes": 15585690 + }, + { + "name": "Gp0127636_PFAM GFF file", + "description": "PFAM GFF file for Gp0127636", + "url": "https://data.microbiomedata.org/data/nmdc:mga02tph39/annotation/nmdc_mga02tph39_pfam.gff", + "md5_checksum": "2b90fcb7628c3ffa9e7a14a32612b7af", + "id": "nmdc:2b90fcb7628c3ffa9e7a14a32612b7af", + "file_size_bytes": 11182350 + }, + { + "name": "Gp0127636_TigrFam GFF file", + "description": "TigrFam GFF file for Gp0127636", + "url": "https://data.microbiomedata.org/data/nmdc:mga02tph39/annotation/nmdc_mga02tph39_tigrfam.gff", + "md5_checksum": "4e2f1d4b2d20bfb0209a320a60c4aeac", + "id": "nmdc:4e2f1d4b2d20bfb0209a320a60c4aeac", + "file_size_bytes": 995758 + }, + { + "name": "Gp0127636_SMART GFF file", + "description": "SMART GFF file for Gp0127636", + "url": "https://data.microbiomedata.org/data/nmdc:mga02tph39/annotation/nmdc_mga02tph39_smart.gff", + "md5_checksum": "dd24a8b0f774555ac91e663416745428", + "id": "nmdc:dd24a8b0f774555ac91e663416745428", + "file_size_bytes": 3256325 + }, + { + "name": "Gp0127636_SuperFam GFF file", + "description": "SuperFam GFF file for Gp0127636", + "url": "https://data.microbiomedata.org/data/nmdc:mga02tph39/annotation/nmdc_mga02tph39_supfam.gff", + "md5_checksum": "2e76b71475b854e2bf2d0aa15a53dd7d", + "id": "nmdc:2e76b71475b854e2bf2d0aa15a53dd7d", + "file_size_bytes": 19666317 + }, + { + "name": "Gp0127636_Cath FunFam GFF file", + "description": "Cath FunFam GFF file for Gp0127636", + "url": "https://data.microbiomedata.org/data/nmdc:mga02tph39/annotation/nmdc_mga02tph39_cath_funfam.gff", + "md5_checksum": "2f297176cd51b2ede33c313f713b40b1", + "id": "nmdc:2f297176cd51b2ede33c313f713b40b1", + "file_size_bytes": 14458019 + }, + { + "name": "Gp0127636_KO_EC GFF file", + "description": "KO_EC GFF file for Gp0127636", + "url": "https://data.microbiomedata.org/data/nmdc:mga02tph39/annotation/nmdc_mga02tph39_ko_ec.gff", + "md5_checksum": "678a7af05a89d9d4f5f5d598dc2e3013", + "id": "nmdc:678a7af05a89d9d4f5f5d598dc2e3013", + "file_size_bytes": 10187098 + } + ] + }, + { + "_id": { + "$oid": "649b0052ec087f6bbab34709" + }, + "has_input": [ + "nmdc:36692b7b93756aaabd7f1f6259753c4e", + "nmdc:a24edc9ffd773c30cea8ea709988307a", + "nmdc:f61ed86592491b2d83b5893749e12406" + ], + "too_short_contig_num": 93687, + "part_of": [ + "nmdc:mga02tph39" + ], + "git_url": "https://github.com/microbiomedata/metaG/releases/tag/0.1", + "has_output": [ + "nmdc:d41d8cd98f00b204e9800998ecf8427e", + "nmdc:2d1e318b8b815a8a5487f23315d0fe02" + ], + "was_informed_by": "gold:Gp0127636", + "input_contig_num": 95606, + "id": "nmdc:3a0bb3c0ec1ec60e0487cb98f7f19a90", + "execution_resource": "NERSC-Cori", + "name": "MAGs Analysis Activity for nmdc:mga02tph39", + "mags_list": [], + "unbinned_contig_num": 1919, + "started_at_time": "2021-10-11T02:23:42Z", + "type": "nmdc:MAGsAnalysisActivity", + "ended_at_time": "2021-11-13T18:49:37+00:00", + "output_data_objects": [ + { + "name": "gold:Gp0452679_metabat2 bin checkm quality assessment result", + "description": "metabat2 bin checkm quality assessment result for gold:Gp0452679", + "data_object_type": "CheckM Statistics", + "url": "https://data.microbiomedata.org/data/nmdc:mga0fsjy84/MAGs/nmdc_mga0fsjy84_checkm_qa.out", + "md5_checksum": "d41d8cd98f00b204e9800998ecf8427e", + "id": "nmdc:d41d8cd98f00b204e9800998ecf8427e", + "file_size_bytes": 0 + }, + { + "name": "Gp0127636_high-quality and medium-quality bins", + "description": "high-quality and medium-quality bins for Gp0127636", + "data_object_type": "Metagenome Bins", + "url": "https://data.microbiomedata.org/data/nmdc:mga02tph39/MAGs/nmdc_mga02tph39_hqmq_bin.zip", + "md5_checksum": "2d1e318b8b815a8a5487f23315d0fe02", + "id": "nmdc:2d1e318b8b815a8a5487f23315d0fe02", + "file_size_bytes": 182 + } + ] + } + ] + }, + { + "_id": { + "$oid": "649b009773e8249959349b46" + }, + "id": "nmdc:omprc-11-mbv2jc69", + "name": "Riverbed sediment microbial communities from areas with no vegetation in Columbia River, Washington, USA - GW-RW N2_50_60", + "description": "Riverbed sediment samples were collected from an area with no vegetation in a hyporheic zone (subsurface groundwater - surface water mixing zone)", + "has_input": [ + "nmdc:bsm-11-jdgzjq31" + ], + "has_output": [ + "jgi:574fe09c7ded5e3df1ee1487" + ], + "part_of": [ + "nmdc:sty-11-aygzgv51" + ], + "add_date": "2016-01-11", + "mod_date": "2021-06-15", + "ncbi_project_name": "Riverbed sediment microbial communities from areas with no vegetation in Columbia River, Washington, USA - GW-RW N2_50_60", + "omics_type": { + "has_raw_value": "Metagenome" + }, + "principal_investigator": { + "has_raw_value": "James Stegen" + }, + "processing_institution": "JGI", + "type": "nmdc:OmicsProcessing", + "gold_sequencing_project_identifiers": [ + "gold:Gp0127634" + ], + "downstream_workflow_activity_records": [ + { + "_id": { + "$oid": "649b009d6bdd4fd20273c86a" + }, + "has_input": [ + "nmdc:2b7712d32a159eca66fc50936de000a5" + ], + "part_of": [ + "nmdc:mga0r0vf18" + ], + "git_url": "https://github.com/microbiomedata/metaG/releases/tag/0.1", + "has_output": [ + "nmdc:ac889627d813c8e34cfbf79a4264c590", + "nmdc:0dfd55be1779ae7922d80aa22034c9a1" + ], + "was_informed_by": "gold:Gp0127634", + "input_read_count": 29872658, + "output_read_bases": 4172764161, + "id": "nmdc:d6415e0c3e4f9b8702c1ae98c6fb2f48", + "execution_resource": "NERSC-Cori", + "input_read_bases": 4510771358, + "name": "Read QC Activity for nmdc:mga0r0vf18", + "output_read_count": 27896694, + "started_at_time": "2021-10-11T02:23:30Z", + "type": "nmdc:ReadQCAnalysisActivity", + "ended_at_time": "2021-10-11T04:49:55+00:00", + "output_data_objects": [ + { + "name": "Gp0127634_Filtered Reads", + "description": "Filtered Reads for Gp0127634", + "data_object_type": "Filtered Sequencing Reads", + "url": "https://data.microbiomedata.org/data/nmdc:mga0r0vf18/qa/nmdc_mga0r0vf18_filtered.fastq.gz", + "md5_checksum": "ac889627d813c8e34cfbf79a4264c590", + "id": "nmdc:ac889627d813c8e34cfbf79a4264c590", + "file_size_bytes": 2316462404 + }, + { + "name": "Gp0127634_Filtered Stats", + "description": "Filtered Stats for Gp0127634", + "data_object_type": "QC Statistics", + "url": "https://data.microbiomedata.org/data/nmdc:mga0r0vf18/qa/nmdc_mga0r0vf18_filterStats.txt", + "md5_checksum": "0dfd55be1779ae7922d80aa22034c9a1", + "id": "nmdc:0dfd55be1779ae7922d80aa22034c9a1", + "file_size_bytes": 291 + } + ] + }, + { + "_id": { + "$oid": "649b009bff710ae353f8cf2d" + }, + "has_input": [ + "nmdc:ac889627d813c8e34cfbf79a4264c590" + ], + "git_url": "https://github.com/microbiomedata/metaG/releases/tag/0.1", + "has_output": [ + "nmdc:0526ea84f6e7893f5b6d62a32f81a199", + "nmdc:1a7380f5adb59f36c98c840bf28ad4bd", + "nmdc:366ab38bb6de9591f31a086d42ac23d6", + "nmdc:c44ba44bc6910c2f3ed3a60a52b4a616", + "nmdc:0ca043b630ba304cb80603e8332c78cf", + "nmdc:059ff39ced52c0df45a331c4e9e10fdd", + "nmdc:7bfa3b5b29ec5cf9882251585d99f9bf", + "nmdc:2fceef0aaf1c3c3e3edd8d69bb72c8d3", + "nmdc:678e7c401a6971629f7d3ada83b307ab" + ], + "was_informed_by": "gold:Gp0127634", + "id": "nmdc:d6415e0c3e4f9b8702c1ae98c6fb2f48", + "execution_resource": "NERSC-Cori", + "name": "ReadBased Analysis Activity for nmdc:mga0r0vf18", + "started_at_time": "2021-10-11T02:23:30Z", + "type": "nmdc:ReadbasedAnalysis", + "ended_at_time": "2021-10-11T04:49:55+00:00", + "output_data_objects": [ + { + "name": "Gp0127634_Gottcha2 TSV report", + "description": "Gottcha2 TSV report for Gp0127634", + "url": "https://data.microbiomedata.org/data/nmdc:mga0r0vf18/ReadbasedAnalysis/nmdc_mga0r0vf18_gottcha2_report.tsv", + "md5_checksum": "0526ea84f6e7893f5b6d62a32f81a199", + "id": "nmdc:0526ea84f6e7893f5b6d62a32f81a199", + "file_size_bytes": 4224 + }, + { + "name": "Gp0127634_Gottcha2 full TSV report", + "description": "Gottcha2 full TSV report for Gp0127634", + "url": "https://data.microbiomedata.org/data/nmdc:mga0r0vf18/ReadbasedAnalysis/nmdc_mga0r0vf18_gottcha2_report_full.tsv", + "md5_checksum": "1a7380f5adb59f36c98c840bf28ad4bd", + "id": "nmdc:1a7380f5adb59f36c98c840bf28ad4bd", + "file_size_bytes": 875501 + }, + { + "name": "Gp0127634_Gottcha2 Krona HTML report", + "description": "Gottcha2 Krona HTML report for Gp0127634", + "data_object_type": "GOTTCHA2 Krona Plot", + "url": "https://data.microbiomedata.org/data/nmdc:mga0r0vf18/ReadbasedAnalysis/nmdc_mga0r0vf18_gottcha2_krona.html", + "md5_checksum": "366ab38bb6de9591f31a086d42ac23d6", + "id": "nmdc:366ab38bb6de9591f31a086d42ac23d6", + "file_size_bytes": 238755 + }, + { + "name": "Gp0127634_Centrifuge classification TSV report", + "description": "Centrifuge classification TSV report for Gp0127634", + "data_object_type": "Centrifuge Taxonomic Classification", + "url": "https://data.microbiomedata.org/data/nmdc:mga0r0vf18/ReadbasedAnalysis/nmdc_mga0r0vf18_centrifuge_classification.tsv", + "md5_checksum": "c44ba44bc6910c2f3ed3a60a52b4a616", + "id": "nmdc:c44ba44bc6910c2f3ed3a60a52b4a616", + "file_size_bytes": 2051793471 + }, + { + "name": "Gp0127634_Centrifuge TSV report", + "description": "Centrifuge TSV report for Gp0127634", + "data_object_type": "Centrifuge Classification Report", + "url": "https://data.microbiomedata.org/data/nmdc:mga0r0vf18/ReadbasedAnalysis/nmdc_mga0r0vf18_centrifuge_report.tsv", + "md5_checksum": "0ca043b630ba304cb80603e8332c78cf", + "id": "nmdc:0ca043b630ba304cb80603e8332c78cf", + "file_size_bytes": 256560 + }, + { + "name": "Gp0127634_Centrifuge Krona HTML report", + "description": "Centrifuge Krona HTML report for Gp0127634", + "data_object_type": "Centrifuge Krona Plot", + "url": "https://data.microbiomedata.org/data/nmdc:mga0r0vf18/ReadbasedAnalysis/nmdc_mga0r0vf18_centrifuge_krona.html", + "md5_checksum": "059ff39ced52c0df45a331c4e9e10fdd", + "id": "nmdc:059ff39ced52c0df45a331c4e9e10fdd", + "file_size_bytes": 2334325 + }, + { + "name": "Gp0127634_Kraken classification TSV report", + "description": "Kraken classification TSV report for Gp0127634", + "data_object_type": "Kraken2 Taxonomic Classification", + "url": "https://data.microbiomedata.org/data/nmdc:mga0r0vf18/ReadbasedAnalysis/nmdc_mga0r0vf18_kraken2_classification.tsv", + "md5_checksum": "7bfa3b5b29ec5cf9882251585d99f9bf", + "id": "nmdc:7bfa3b5b29ec5cf9882251585d99f9bf", + "file_size_bytes": 1649071235 + }, + { + "name": "Gp0127634_Kraken2 TSV report", + "description": "Kraken2 TSV report for Gp0127634", + "data_object_type": "Kraken2 Classification Report", + "url": "https://data.microbiomedata.org/data/nmdc:mga0r0vf18/ReadbasedAnalysis/nmdc_mga0r0vf18_kraken2_report.tsv", + "md5_checksum": "2fceef0aaf1c3c3e3edd8d69bb72c8d3", + "id": "nmdc:2fceef0aaf1c3c3e3edd8d69bb72c8d3", + "file_size_bytes": 654782 + }, + { + "name": "Gp0127634_Kraken2 Krona HTML report", + "description": "Kraken2 Krona HTML report for Gp0127634", + "data_object_type": "Kraken2 Krona Plot", + "url": "https://data.microbiomedata.org/data/nmdc:mga0r0vf18/ReadbasedAnalysis/nmdc_mga0r0vf18_kraken2_krona.html", + "md5_checksum": "678e7c401a6971629f7d3ada83b307ab", + "id": "nmdc:678e7c401a6971629f7d3ada83b307ab", + "file_size_bytes": 3988988 + } + ] + }, + { + "_id": { + "$oid": "61e71979833bcf838a700840" + }, + "has_input": [ + "nmdc:ac889627d813c8e34cfbf79a4264c590" + ], + "part_of": [ + "nmdc:mga0r0vf18" + ], + "git_url": "https://github.com/microbiomedata/metaG/releases/tag/0.1", + "has_output": [ + "nmdc:0526ea84f6e7893f5b6d62a32f81a199", + "nmdc:1a7380f5adb59f36c98c840bf28ad4bd", + "nmdc:366ab38bb6de9591f31a086d42ac23d6", + "nmdc:c44ba44bc6910c2f3ed3a60a52b4a616", + "nmdc:0ca043b630ba304cb80603e8332c78cf", + "nmdc:059ff39ced52c0df45a331c4e9e10fdd", + "nmdc:7bfa3b5b29ec5cf9882251585d99f9bf", + "nmdc:2fceef0aaf1c3c3e3edd8d69bb72c8d3", + "nmdc:678e7c401a6971629f7d3ada83b307ab" + ], + "was_informed_by": "gold:Gp0127634", + "id": "nmdc:d6415e0c3e4f9b8702c1ae98c6fb2f48", + "execution_resource": "NERSC-Cori", + "name": "ReadBased Analysis Activity for nmdc:mga0r0vf18", + "started_at_time": "2021-10-11T02:23:30Z", + "type": "nmdc:ReadbasedAnalysis", + "ended_at_time": "2021-10-11T04:49:55+00:00", + "output_data_objects": [ + { + "name": "Gp0127634_Gottcha2 TSV report", + "description": "Gottcha2 TSV report for Gp0127634", + "url": "https://data.microbiomedata.org/data/nmdc:mga0r0vf18/ReadbasedAnalysis/nmdc_mga0r0vf18_gottcha2_report.tsv", + "md5_checksum": "0526ea84f6e7893f5b6d62a32f81a199", + "id": "nmdc:0526ea84f6e7893f5b6d62a32f81a199", + "file_size_bytes": 4224 + }, + { + "name": "Gp0127634_Gottcha2 full TSV report", + "description": "Gottcha2 full TSV report for Gp0127634", + "url": "https://data.microbiomedata.org/data/nmdc:mga0r0vf18/ReadbasedAnalysis/nmdc_mga0r0vf18_gottcha2_report_full.tsv", + "md5_checksum": "1a7380f5adb59f36c98c840bf28ad4bd", + "id": "nmdc:1a7380f5adb59f36c98c840bf28ad4bd", + "file_size_bytes": 875501 + }, + { + "name": "Gp0127634_Gottcha2 Krona HTML report", + "description": "Gottcha2 Krona HTML report for Gp0127634", + "data_object_type": "GOTTCHA2 Krona Plot", + "url": "https://data.microbiomedata.org/data/nmdc:mga0r0vf18/ReadbasedAnalysis/nmdc_mga0r0vf18_gottcha2_krona.html", + "md5_checksum": "366ab38bb6de9591f31a086d42ac23d6", + "id": "nmdc:366ab38bb6de9591f31a086d42ac23d6", + "file_size_bytes": 238755 + }, + { + "name": "Gp0127634_Centrifuge classification TSV report", + "description": "Centrifuge classification TSV report for Gp0127634", + "data_object_type": "Centrifuge Taxonomic Classification", + "url": "https://data.microbiomedata.org/data/nmdc:mga0r0vf18/ReadbasedAnalysis/nmdc_mga0r0vf18_centrifuge_classification.tsv", + "md5_checksum": "c44ba44bc6910c2f3ed3a60a52b4a616", + "id": "nmdc:c44ba44bc6910c2f3ed3a60a52b4a616", + "file_size_bytes": 2051793471 + }, + { + "name": "Gp0127634_Centrifuge TSV report", + "description": "Centrifuge TSV report for Gp0127634", + "data_object_type": "Centrifuge Classification Report", + "url": "https://data.microbiomedata.org/data/nmdc:mga0r0vf18/ReadbasedAnalysis/nmdc_mga0r0vf18_centrifuge_report.tsv", + "md5_checksum": "0ca043b630ba304cb80603e8332c78cf", + "id": "nmdc:0ca043b630ba304cb80603e8332c78cf", + "file_size_bytes": 256560 + }, + { + "name": "Gp0127634_Centrifuge Krona HTML report", + "description": "Centrifuge Krona HTML report for Gp0127634", + "data_object_type": "Centrifuge Krona Plot", + "url": "https://data.microbiomedata.org/data/nmdc:mga0r0vf18/ReadbasedAnalysis/nmdc_mga0r0vf18_centrifuge_krona.html", + "md5_checksum": "059ff39ced52c0df45a331c4e9e10fdd", + "id": "nmdc:059ff39ced52c0df45a331c4e9e10fdd", + "file_size_bytes": 2334325 + }, + { + "name": "Gp0127634_Kraken classification TSV report", + "description": "Kraken classification TSV report for Gp0127634", + "data_object_type": "Kraken2 Taxonomic Classification", + "url": "https://data.microbiomedata.org/data/nmdc:mga0r0vf18/ReadbasedAnalysis/nmdc_mga0r0vf18_kraken2_classification.tsv", + "md5_checksum": "7bfa3b5b29ec5cf9882251585d99f9bf", + "id": "nmdc:7bfa3b5b29ec5cf9882251585d99f9bf", + "file_size_bytes": 1649071235 + }, + { + "name": "Gp0127634_Kraken2 TSV report", + "description": "Kraken2 TSV report for Gp0127634", + "data_object_type": "Kraken2 Classification Report", + "url": "https://data.microbiomedata.org/data/nmdc:mga0r0vf18/ReadbasedAnalysis/nmdc_mga0r0vf18_kraken2_report.tsv", + "md5_checksum": "2fceef0aaf1c3c3e3edd8d69bb72c8d3", + "id": "nmdc:2fceef0aaf1c3c3e3edd8d69bb72c8d3", + "file_size_bytes": 654782 + }, + { + "name": "Gp0127634_Kraken2 Krona HTML report", + "description": "Kraken2 Krona HTML report for Gp0127634", + "data_object_type": "Kraken2 Krona Plot", + "url": "https://data.microbiomedata.org/data/nmdc:mga0r0vf18/ReadbasedAnalysis/nmdc_mga0r0vf18_kraken2_krona.html", + "md5_checksum": "678e7c401a6971629f7d3ada83b307ab", + "id": "nmdc:678e7c401a6971629f7d3ada83b307ab", + "file_size_bytes": 3988988 + } + ] + }, + { + "_id": { + "$oid": "649b005f2ca5ee4adb139f9c" + }, + "has_input": [ + "nmdc:ac889627d813c8e34cfbf79a4264c590" + ], + "part_of": [ + "nmdc:mga0r0vf18" + ], + "ctg_logsum": 142091, + "scaf_logsum": 142614, + "gap_pct": 0.00138, + "git_url": "https://github.com/microbiomedata/metaG/releases/tag/0.1", + "has_output": [ + "nmdc:2a30cf44cc596923301befc34edf6c0a", + "nmdc:f147264a5a4a7eec4d68f05ab52ecc1d", + "nmdc:9bd1b25df71c0a6f9ca408ddc045ffed", + "nmdc:825969095ff134b195b06a40fcc6089a", + "nmdc:356d9ca409747590849dd894998166ee" + ], + "asm_score": 5.751, + "was_informed_by": "gold:Gp0127634", + "ctg_powsum": 15837, + "scaf_max": 33833, + "id": "nmdc:d6415e0c3e4f9b8702c1ae98c6fb2f48", + "scaf_powsum": 15897, + "execution_resource": "NERSC-Cori", + "contigs": 175824, + "name": "Assembly Activity for nmdc:mga0r0vf18", + "ctg_max": 33833, + "gc_std": 0.09424, + "contig_bp": 78219291, + "gc_avg": 0.62214, + "started_at_time": "2021-10-11T02:23:30Z", + "scaf_bp": 78220371, + "type": "nmdc:MetagenomeAssembly", + "scaffolds": 175734, + "ended_at_time": "2021-10-11T04:49:55+00:00", + "ctg_l50": 412, + "ctg_l90": 286, + "ctg_n50": 53340, + "ctg_n90": 150131, + "scaf_l50": 412, + "scaf_l90": 286, + "scaf_n50": 53321, + "scaf_n90": 150048, + "output_data_objects": [ + { + "name": "Gp0127634_Assembled contigs fasta", + "description": "Assembled contigs fasta for Gp0127634", + "data_object_type": "Assembly Contigs", + "url": "https://data.microbiomedata.org/data/nmdc:mga0r0vf18/assembly/nmdc_mga0r0vf18_contigs.fna", + "md5_checksum": "2a30cf44cc596923301befc34edf6c0a", + "id": "nmdc:2a30cf44cc596923301befc34edf6c0a", + "file_size_bytes": 84939887 + }, + { + "name": "Gp0127634_Assembled scaffold fasta", + "description": "Assembled scaffold fasta for Gp0127634", + "data_object_type": "Assembly Scaffolds", + "url": "https://data.microbiomedata.org/data/nmdc:mga0r0vf18/assembly/nmdc_mga0r0vf18_scaffolds.fna", + "md5_checksum": "f147264a5a4a7eec4d68f05ab52ecc1d", + "id": "nmdc:f147264a5a4a7eec4d68f05ab52ecc1d", + "file_size_bytes": 84411544 + }, + { + "name": "Gp0127634_Metagenome Contig Coverage Stats", + "description": "Metagenome Contig Coverage Stats for Gp0127634", + "url": "https://data.microbiomedata.org/data/nmdc:mga0r0vf18/assembly/nmdc_mga0r0vf18_covstats.txt", + "md5_checksum": "9bd1b25df71c0a6f9ca408ddc045ffed", + "id": "nmdc:9bd1b25df71c0a6f9ca408ddc045ffed", + "file_size_bytes": 13895509 + }, + { + "name": "Gp0127634_Assembled AGP file", + "description": "Assembled AGP file for Gp0127634", + "data_object_type": "Assembly AGP", + "url": "https://data.microbiomedata.org/data/nmdc:mga0r0vf18/assembly/nmdc_mga0r0vf18_assembly.agp", + "md5_checksum": "825969095ff134b195b06a40fcc6089a", + "id": "nmdc:825969095ff134b195b06a40fcc6089a", + "file_size_bytes": 12985962 + }, + { + "name": "Gp0127634_Metagenome Alignment BAM file", + "description": "Metagenome Alignment BAM file for Gp0127634", + "data_object_type": "Assembly Coverage BAM", + "url": "https://data.microbiomedata.org/data/nmdc:mga0r0vf18/assembly/nmdc_mga0r0vf18_pairedMapped_sorted.bam", + "md5_checksum": "356d9ca409747590849dd894998166ee", + "id": "nmdc:356d9ca409747590849dd894998166ee", + "file_size_bytes": 2516463401 + } + ] + }, + { + "_id": { + "$oid": "649b005bbf2caae0415ef9b3" + }, + "has_input": [ + "nmdc:2a30cf44cc596923301befc34edf6c0a" + ], + "part_of": [ + "nmdc:mga0r0vf18" + ], + "git_url": "https://github.com/microbiomedata/metaG/releases/tag/0.1", + "has_output": [ + "nmdc:ca16203099dc1d6bbce00320bb753974", + "nmdc:fffbb7b52a4886755df429e22a152427", + "nmdc:f63b43e7797845fa94dc6f552ba1ea39", + "nmdc:8ab8f39bfc76267daa4ce5a34811bff1", + "nmdc:d6ff8f2f0d5c77495b2b43a7020e5730", + "nmdc:763d16c5dbadbeba61ceee91ed5209f3", + "nmdc:52cba722f402eea06fda75ec1e5a5103", + "nmdc:ad358ce4b479febc34a2acdd9f249517", + "nmdc:10a0ca82cf662ac4d9b465f05ed1fb2b", + "nmdc:d0e8459e010015e726c31f0f8c18d359", + "nmdc:41d7ca149efb4c12bce48e5a19649a84", + "nmdc:9da1883e60979e17665b0211198c35f0" + ], + "was_informed_by": "gold:Gp0127634", + "id": "nmdc:d6415e0c3e4f9b8702c1ae98c6fb2f48", + "execution_resource": "NERSC-Cori", + "name": "Annotation Activity for nmdc:mga0r0vf18", + "started_at_time": "2021-10-11T02:23:30Z", + "type": "nmdc:MetagenomeAnnotationActivity", + "ended_at_time": "2021-10-11T04:49:55+00:00", + "output_data_objects": [ + { + "name": "Gp0127634_Protein FAA", + "description": "Protein FAA for Gp0127634", + "data_object_type": "Annotation Amino Acid FASTA", + "url": "https://data.microbiomedata.org/data/nmdc:mga0r0vf18/annotation/nmdc_mga0r0vf18_proteins.faa", + "md5_checksum": "ca16203099dc1d6bbce00320bb753974", + "id": "nmdc:ca16203099dc1d6bbce00320bb753974", + "file_size_bytes": 49630516 + }, + { + "name": "Gp0127634_Structural annotation GFF file", + "description": "Structural annotation GFF file for Gp0127634", + "data_object_type": "Structural Annotation GFF", + "url": "https://data.microbiomedata.org/data/nmdc:mga0r0vf18/annotation/nmdc_mga0r0vf18_structural_annotation.gff", + "md5_checksum": "fffbb7b52a4886755df429e22a152427", + "id": "nmdc:fffbb7b52a4886755df429e22a152427", + "file_size_bytes": 2519 + }, + { + "name": "Gp0127634_Functional annotation GFF file", + "description": "Functional annotation GFF file for Gp0127634", + "data_object_type": "Functional Annotation GFF", + "url": "https://data.microbiomedata.org/data/nmdc:mga0r0vf18/annotation/nmdc_mga0r0vf18_functional_annotation.gff", + "md5_checksum": "f63b43e7797845fa94dc6f552ba1ea39", + "id": "nmdc:f63b43e7797845fa94dc6f552ba1ea39", + "file_size_bytes": 57589694 + }, + { + "name": "Gp0127634_KO TSV file", + "description": "KO TSV file for Gp0127634", + "data_object_type": "Annotation KEGG Orthology", + "url": "https://data.microbiomedata.org/data/nmdc:mga0r0vf18/annotation/nmdc_mga0r0vf18_ko.tsv", + "md5_checksum": "8ab8f39bfc76267daa4ce5a34811bff1", + "id": "nmdc:8ab8f39bfc76267daa4ce5a34811bff1", + "file_size_bytes": 6602379 + }, + { + "name": "Gp0127634_EC TSV file", + "description": "EC TSV file for Gp0127634", + "data_object_type": "Annotation Enzyme Commission", + "url": "https://data.microbiomedata.org/data/nmdc:mga0r0vf18/annotation/nmdc_mga0r0vf18_ec.tsv", + "md5_checksum": "d6ff8f2f0d5c77495b2b43a7020e5730", + "id": "nmdc:d6ff8f2f0d5c77495b2b43a7020e5730", + "file_size_bytes": 4399755 + }, + { + "name": "Gp0127634_COG GFF file", + "description": "COG GFF file for Gp0127634", + "url": "https://data.microbiomedata.org/data/nmdc:mga0r0vf18/annotation/nmdc_mga0r0vf18_cog.gff", + "md5_checksum": "763d16c5dbadbeba61ceee91ed5209f3", + "id": "nmdc:763d16c5dbadbeba61ceee91ed5209f3", + "file_size_bytes": 33737036 + }, + { + "name": "Gp0127634_PFAM GFF file", + "description": "PFAM GFF file for Gp0127634", + "url": "https://data.microbiomedata.org/data/nmdc:mga0r0vf18/annotation/nmdc_mga0r0vf18_pfam.gff", + "md5_checksum": "52cba722f402eea06fda75ec1e5a5103", + "id": "nmdc:52cba722f402eea06fda75ec1e5a5103", + "file_size_bytes": 24757263 + }, + { + "name": "Gp0127634_TigrFam GFF file", + "description": "TigrFam GFF file for Gp0127634", + "url": "https://data.microbiomedata.org/data/nmdc:mga0r0vf18/annotation/nmdc_mga0r0vf18_tigrfam.gff", + "md5_checksum": "ad358ce4b479febc34a2acdd9f249517", + "id": "nmdc:ad358ce4b479febc34a2acdd9f249517", + "file_size_bytes": 2661782 + }, + { + "name": "Gp0127634_SMART GFF file", + "description": "SMART GFF file for Gp0127634", + "url": "https://data.microbiomedata.org/data/nmdc:mga0r0vf18/annotation/nmdc_mga0r0vf18_smart.gff", + "md5_checksum": "10a0ca82cf662ac4d9b465f05ed1fb2b", + "id": "nmdc:10a0ca82cf662ac4d9b465f05ed1fb2b", + "file_size_bytes": 7506881 + }, + { + "name": "Gp0127634_SuperFam GFF file", + "description": "SuperFam GFF file for Gp0127634", + "url": "https://data.microbiomedata.org/data/nmdc:mga0r0vf18/annotation/nmdc_mga0r0vf18_supfam.gff", + "md5_checksum": "d0e8459e010015e726c31f0f8c18d359", + "id": "nmdc:d0e8459e010015e726c31f0f8c18d359", + "file_size_bytes": 42013513 + }, + { + "name": "Gp0127634_Cath FunFam GFF file", + "description": "Cath FunFam GFF file for Gp0127634", + "url": "https://data.microbiomedata.org/data/nmdc:mga0r0vf18/annotation/nmdc_mga0r0vf18_cath_funfam.gff", + "md5_checksum": "41d7ca149efb4c12bce48e5a19649a84", + "id": "nmdc:41d7ca149efb4c12bce48e5a19649a84", + "file_size_bytes": 31747110 + }, + { + "name": "Gp0127634_KO_EC GFF file", + "description": "KO_EC GFF file for Gp0127634", + "url": "https://data.microbiomedata.org/data/nmdc:mga0r0vf18/annotation/nmdc_mga0r0vf18_ko_ec.gff", + "md5_checksum": "9da1883e60979e17665b0211198c35f0", + "id": "nmdc:9da1883e60979e17665b0211198c35f0", + "file_size_bytes": 20999001 + } + ] + }, + { + "_id": { + "$oid": "649b0052ec087f6bbab34713" + }, + "has_input": [ + "nmdc:2a30cf44cc596923301befc34edf6c0a", + "nmdc:356d9ca409747590849dd894998166ee", + "nmdc:f63b43e7797845fa94dc6f552ba1ea39" + ], + "too_short_contig_num": 168596, + "part_of": [ + "nmdc:mga0r0vf18" + ], + "binned_contig_num": 278, + "git_url": "https://github.com/microbiomedata/metaG/releases/tag/0.1", + "has_output": [ + "nmdc:d41d8cd98f00b204e9800998ecf8427e", + "nmdc:3c8eadbcf4f583090d8f378ea6758799", + "nmdc:1be647dc835ee8fe666fe9893266bd21", + "nmdc:6cc278c455cafc691333c0a74fe6c540", + "nmdc:de4d0180489bdaa5526977508a489b99", + "nmdc:16a08c4a3a6e9c70a5d47209177d0e60" + ], + "was_informed_by": "gold:Gp0127634", + "input_contig_num": 175822, + "id": "nmdc:d6415e0c3e4f9b8702c1ae98c6fb2f48", + "execution_resource": "NERSC-Cori", + "name": "MAGs Analysis Activity for nmdc:mga0r0vf18", + "mags_list": [ + { + "number_of_contig": 235, + "completeness": 68.28, + "bin_name": "bins.1", + "gene_count": 2056, + "bin_quality": "MQ", + "gtdbtk_species": "", + "gtdbtk_order": "Nitrososphaerales", + "num_16s": 0, + "gtdbtk_family": "Nitrososphaeraceae", + "gtdbtk_domain": "Archaea", + "contamination": 2.91, + "gtdbtk_class": "Nitrososphaeria", + "gtdbtk_phylum": "Crenarchaeota", + "num_5s": 1, + "num_23s": 0, + "gtdbtk_genus": "", + "num_t_rna": 34 + }, + { + "number_of_contig": 43, + "completeness": 10.69, + "bin_name": "bins.2", + "gene_count": 247, + "bin_quality": "LQ", + "gtdbtk_species": "", + "gtdbtk_order": "", + "num_16s": 0, + "gtdbtk_family": "", + "gtdbtk_domain": "", + "contamination": 0.0, + "gtdbtk_class": "", + "gtdbtk_phylum": "", + "num_5s": 0, + "num_23s": 0, + "gtdbtk_genus": "", + "num_t_rna": 4 + } + ], + "unbinned_contig_num": 6948, + "started_at_time": "2021-10-11T02:23:30Z", + "type": "nmdc:MAGsAnalysisActivity", + "ended_at_time": "2021-10-11T04:49:55+00:00", + "output_data_objects": [ + { + "name": "gold:Gp0452679_metabat2 bin checkm quality assessment result", + "description": "metabat2 bin checkm quality assessment result for gold:Gp0452679", + "data_object_type": "CheckM Statistics", + "url": "https://data.microbiomedata.org/data/nmdc:mga0fsjy84/MAGs/nmdc_mga0fsjy84_checkm_qa.out", + "md5_checksum": "d41d8cd98f00b204e9800998ecf8427e", + "id": "nmdc:d41d8cd98f00b204e9800998ecf8427e", + "file_size_bytes": 0 + }, + { + "name": "Gp0127634_tooShort (< 3kb) filtered contigs fasta file by metaBat2", + "description": "tooShort (< 3kb) filtered contigs fasta file by metaBat2 for Gp0127634", + "url": "https://data.microbiomedata.org/data/nmdc:mga0r0vf18/MAGs/nmdc_mga0r0vf18_bins.tooShort.fa", + "md5_checksum": "3c8eadbcf4f583090d8f378ea6758799", + "id": "nmdc:3c8eadbcf4f583090d8f378ea6758799", + "file_size_bytes": 71683990 + }, + { + "name": "Gp0127634_unbinned fasta file from metabat2", + "description": "unbinned fasta file from metabat2 for Gp0127634", + "url": "https://data.microbiomedata.org/data/nmdc:mga0r0vf18/MAGs/nmdc_mga0r0vf18_bins.unbinned.fa", + "md5_checksum": "1be647dc835ee8fe666fe9893266bd21", + "id": "nmdc:1be647dc835ee8fe666fe9893266bd21", + "file_size_bytes": 11353478 + }, + { + "name": "Gp0127634_metabat2 bin checkm quality assessment result", + "description": "metabat2 bin checkm quality assessment result for Gp0127634", + "data_object_type": "CheckM Statistics", + "url": "https://data.microbiomedata.org/data/nmdc:mga0r0vf18/MAGs/nmdc_mga0r0vf18_checkm_qa.out", + "md5_checksum": "6cc278c455cafc691333c0a74fe6c540", + "id": "nmdc:6cc278c455cafc691333c0a74fe6c540", + "file_size_bytes": 936 + }, + { + "name": "Gp0127634_high-quality and medium-quality bins", + "description": "high-quality and medium-quality bins for Gp0127634", + "data_object_type": "Metagenome Bins", + "url": "https://data.microbiomedata.org/data/nmdc:mga0r0vf18/MAGs/nmdc_mga0r0vf18_hqmq_bin.zip", + "md5_checksum": "de4d0180489bdaa5526977508a489b99", + "id": "nmdc:de4d0180489bdaa5526977508a489b99", + "file_size_bytes": 518340 + }, + { + "name": "Gp0127634_metabat2 bins", + "description": "metabat2 bins for Gp0127634", + "url": "https://data.microbiomedata.org/data/nmdc:mga0r0vf18/MAGs/nmdc_mga0r0vf18_metabat_bin.zip", + "md5_checksum": "16a08c4a3a6e9c70a5d47209177d0e60", + "id": "nmdc:16a08c4a3a6e9c70a5d47209177d0e60", + "file_size_bytes": 63768 + } + ] + } + ] + }, + { + "_id": { + "$oid": "649b009773e8249959349b47" + }, + "id": "nmdc:omprc-11-kc23zq65", + "name": "Riverbed sediment microbial communities from areas with no vegetation in Columbia River, Washington, USA - GW-RW N1_10_20", + "description": "Riverbed sediment samples were collected from an area with no vegetation in a hyporheic zone (subsurface groundwater - surface water mixing zone)", + "has_input": [ + "nmdc:bsm-11-59xteq78" + ], + "has_output": [ + "jgi:574fde607ded5e3df1ee1403" + ], + "part_of": [ + "nmdc:sty-11-aygzgv51" + ], + "add_date": "2016-01-11", + "mod_date": "2021-06-15", + "ncbi_project_name": "Riverbed sediment microbial communities from areas with no vegetation in Columbia River, Washington, USA - GW-RW N1_10_20", + "omics_type": { + "has_raw_value": "Metagenome" + }, + "principal_investigator": { + "has_raw_value": "James Stegen" + }, + "processing_institution": "JGI", + "type": "nmdc:OmicsProcessing", + "gold_sequencing_project_identifiers": [ + "gold:Gp0127635" + ], + "downstream_workflow_activity_records": [ + { + "_id": { + "$oid": "649b009d6bdd4fd20273c860" + }, + "has_input": [ + "nmdc:1a16fdf096087338922b288165a924b8" + ], + "part_of": [ + "nmdc:mga0ak4p20" + ], + "git_url": "https://github.com/microbiomedata/metaG/releases/tag/0.1", + "has_output": [ + "nmdc:f8bc16e232f7ba0f6d6b5ca35a708c36", + "nmdc:fbc260443529d6e8067efdac3b58a8c1" + ], + "was_informed_by": "gold:Gp0127635", + "input_read_count": 25320866, + "output_read_bases": 3673182178, + "id": "nmdc:80cd0785782060a937ca17d1b417b0b6", + "execution_resource": "NERSC-Cori", + "input_read_bases": 3823450766, + "name": "Read QC Activity for nmdc:mga0ak4p20", + "output_read_count": 24600396, + "started_at_time": "2021-10-11T02:26:59Z", + "type": "nmdc:ReadQCAnalysisActivity", + "ended_at_time": "2021-10-11T04:11:48+00:00", + "output_data_objects": [ + { + "name": "Gp0127635_Filtered Reads", + "description": "Filtered Reads for Gp0127635", + "data_object_type": "Filtered Sequencing Reads", + "url": "https://data.microbiomedata.org/data/nmdc:mga0ak4p20/qa/nmdc_mga0ak4p20_filtered.fastq.gz", + "md5_checksum": "f8bc16e232f7ba0f6d6b5ca35a708c36", + "id": "nmdc:f8bc16e232f7ba0f6d6b5ca35a708c36", + "file_size_bytes": 1951049105 + }, + { + "name": "Gp0127635_Filtered Stats", + "description": "Filtered Stats for Gp0127635", + "data_object_type": "QC Statistics", + "url": "https://data.microbiomedata.org/data/nmdc:mga0ak4p20/qa/nmdc_mga0ak4p20_filterStats.txt", + "md5_checksum": "fbc260443529d6e8067efdac3b58a8c1", + "id": "nmdc:fbc260443529d6e8067efdac3b58a8c1", + "file_size_bytes": 280 + } + ] + }, + { + "_id": { + "$oid": "649b009bff710ae353f8cf25" + }, + "has_input": [ + "nmdc:f8bc16e232f7ba0f6d6b5ca35a708c36" + ], + "git_url": "https://github.com/microbiomedata/metaG/releases/tag/0.1", + "has_output": [ + "nmdc:d8a410c52c8f6cf0097b674492cc3926", + "nmdc:ddec46781153da60da815c65871f5413", + "nmdc:e626ec18dba4885613240927cbb99d8b", + "nmdc:f8486e4ee029038a452a3484db10cabc", + "nmdc:4121f2ec52b80b7feb9d9a4749080125", + "nmdc:5b8c1cd8ba47041c20d3e18cb902a854", + "nmdc:59807dae5216b11c96df5593a26d9a88", + "nmdc:a491f6797bd7294dbc5ba301efb3466e", + "nmdc:6748020214a3d68ad588e3548107208e" + ], + "was_informed_by": "gold:Gp0127635", + "id": "nmdc:80cd0785782060a937ca17d1b417b0b6", + "execution_resource": "NERSC-Cori", + "name": "ReadBased Analysis Activity for nmdc:mga0ak4p20", + "started_at_time": "2021-10-11T02:26:59Z", + "type": "nmdc:ReadbasedAnalysis", + "ended_at_time": "2021-10-11T04:11:48+00:00", + "output_data_objects": [ + { + "name": "Gp0127635_Gottcha2 TSV report", + "description": "Gottcha2 TSV report for Gp0127635", + "url": "https://data.microbiomedata.org/data/nmdc:mga0ak4p20/ReadbasedAnalysis/nmdc_mga0ak4p20_gottcha2_report.tsv", + "md5_checksum": "d8a410c52c8f6cf0097b674492cc3926", + "id": "nmdc:d8a410c52c8f6cf0097b674492cc3926", + "file_size_bytes": 3696 + }, + { + "name": "Gp0127635_Gottcha2 full TSV report", + "description": "Gottcha2 full TSV report for Gp0127635", + "url": "https://data.microbiomedata.org/data/nmdc:mga0ak4p20/ReadbasedAnalysis/nmdc_mga0ak4p20_gottcha2_report_full.tsv", + "md5_checksum": "ddec46781153da60da815c65871f5413", + "id": "nmdc:ddec46781153da60da815c65871f5413", + "file_size_bytes": 677459 + }, + { + "name": "Gp0127635_Gottcha2 Krona HTML report", + "description": "Gottcha2 Krona HTML report for Gp0127635", + "data_object_type": "GOTTCHA2 Krona Plot", + "url": "https://data.microbiomedata.org/data/nmdc:mga0ak4p20/ReadbasedAnalysis/nmdc_mga0ak4p20_gottcha2_krona.html", + "md5_checksum": "e626ec18dba4885613240927cbb99d8b", + "id": "nmdc:e626ec18dba4885613240927cbb99d8b", + "file_size_bytes": 236164 + }, + { + "name": "Gp0127635_Centrifuge classification TSV report", + "description": "Centrifuge classification TSV report for Gp0127635", + "data_object_type": "Centrifuge Taxonomic Classification", + "url": "https://data.microbiomedata.org/data/nmdc:mga0ak4p20/ReadbasedAnalysis/nmdc_mga0ak4p20_centrifuge_classification.tsv", + "md5_checksum": "f8486e4ee029038a452a3484db10cabc", + "id": "nmdc:f8486e4ee029038a452a3484db10cabc", + "file_size_bytes": 1796179546 + }, + { + "name": "Gp0127635_Centrifuge TSV report", + "description": "Centrifuge TSV report for Gp0127635", + "data_object_type": "Centrifuge Classification Report", + "url": "https://data.microbiomedata.org/data/nmdc:mga0ak4p20/ReadbasedAnalysis/nmdc_mga0ak4p20_centrifuge_report.tsv", + "md5_checksum": "4121f2ec52b80b7feb9d9a4749080125", + "id": "nmdc:4121f2ec52b80b7feb9d9a4749080125", + "file_size_bytes": 254661 + }, + { + "name": "Gp0127635_Centrifuge Krona HTML report", + "description": "Centrifuge Krona HTML report for Gp0127635", + "data_object_type": "Centrifuge Krona Plot", + "url": "https://data.microbiomedata.org/data/nmdc:mga0ak4p20/ReadbasedAnalysis/nmdc_mga0ak4p20_centrifuge_krona.html", + "md5_checksum": "5b8c1cd8ba47041c20d3e18cb902a854", + "id": "nmdc:5b8c1cd8ba47041c20d3e18cb902a854", + "file_size_bytes": 2333534 + }, + { + "name": "Gp0127635_Kraken classification TSV report", + "description": "Kraken classification TSV report for Gp0127635", + "data_object_type": "Kraken2 Taxonomic Classification", + "url": "https://data.microbiomedata.org/data/nmdc:mga0ak4p20/ReadbasedAnalysis/nmdc_mga0ak4p20_kraken2_classification.tsv", + "md5_checksum": "59807dae5216b11c96df5593a26d9a88", + "id": "nmdc:59807dae5216b11c96df5593a26d9a88", + "file_size_bytes": 1432249556 + }, + { + "name": "Gp0127635_Kraken2 TSV report", + "description": "Kraken2 TSV report for Gp0127635", + "data_object_type": "Kraken2 Classification Report", + "url": "https://data.microbiomedata.org/data/nmdc:mga0ak4p20/ReadbasedAnalysis/nmdc_mga0ak4p20_kraken2_report.tsv", + "md5_checksum": "a491f6797bd7294dbc5ba301efb3466e", + "id": "nmdc:a491f6797bd7294dbc5ba301efb3466e", + "file_size_bytes": 639738 + }, + { + "name": "Gp0127635_Kraken2 Krona HTML report", + "description": "Kraken2 Krona HTML report for Gp0127635", + "data_object_type": "Kraken2 Krona Plot", + "url": "https://data.microbiomedata.org/data/nmdc:mga0ak4p20/ReadbasedAnalysis/nmdc_mga0ak4p20_kraken2_krona.html", + "md5_checksum": "6748020214a3d68ad588e3548107208e", + "id": "nmdc:6748020214a3d68ad588e3548107208e", + "file_size_bytes": 3996293 + } + ] + }, + { + "_id": { + "$oid": "61e71936833bcf838a6ffdfc" + }, + "has_input": [ + "nmdc:f8bc16e232f7ba0f6d6b5ca35a708c36" + ], + "part_of": [ + "nmdc:mga0ak4p20" + ], + "git_url": "https://github.com/microbiomedata/metaG/releases/tag/0.1", + "has_output": [ + "nmdc:d8a410c52c8f6cf0097b674492cc3926", + "nmdc:ddec46781153da60da815c65871f5413", + "nmdc:e626ec18dba4885613240927cbb99d8b", + "nmdc:f8486e4ee029038a452a3484db10cabc", + "nmdc:4121f2ec52b80b7feb9d9a4749080125", + "nmdc:5b8c1cd8ba47041c20d3e18cb902a854", + "nmdc:59807dae5216b11c96df5593a26d9a88", + "nmdc:a491f6797bd7294dbc5ba301efb3466e", + "nmdc:6748020214a3d68ad588e3548107208e" + ], + "was_informed_by": "gold:Gp0127635", + "id": "nmdc:80cd0785782060a937ca17d1b417b0b6", + "execution_resource": "NERSC-Cori", + "name": "ReadBased Analysis Activity for nmdc:mga0ak4p20", + "started_at_time": "2021-10-11T02:26:59Z", + "type": "nmdc:ReadbasedAnalysis", + "ended_at_time": "2021-10-11T04:11:48+00:00", + "output_data_objects": [ + { + "name": "Gp0127635_Gottcha2 TSV report", + "description": "Gottcha2 TSV report for Gp0127635", + "url": "https://data.microbiomedata.org/data/nmdc:mga0ak4p20/ReadbasedAnalysis/nmdc_mga0ak4p20_gottcha2_report.tsv", + "md5_checksum": "d8a410c52c8f6cf0097b674492cc3926", + "id": "nmdc:d8a410c52c8f6cf0097b674492cc3926", + "file_size_bytes": 3696 + }, + { + "name": "Gp0127635_Gottcha2 full TSV report", + "description": "Gottcha2 full TSV report for Gp0127635", + "url": "https://data.microbiomedata.org/data/nmdc:mga0ak4p20/ReadbasedAnalysis/nmdc_mga0ak4p20_gottcha2_report_full.tsv", + "md5_checksum": "ddec46781153da60da815c65871f5413", + "id": "nmdc:ddec46781153da60da815c65871f5413", + "file_size_bytes": 677459 + }, + { + "name": "Gp0127635_Gottcha2 Krona HTML report", + "description": "Gottcha2 Krona HTML report for Gp0127635", + "data_object_type": "GOTTCHA2 Krona Plot", + "url": "https://data.microbiomedata.org/data/nmdc:mga0ak4p20/ReadbasedAnalysis/nmdc_mga0ak4p20_gottcha2_krona.html", + "md5_checksum": "e626ec18dba4885613240927cbb99d8b", + "id": "nmdc:e626ec18dba4885613240927cbb99d8b", + "file_size_bytes": 236164 + }, + { + "name": "Gp0127635_Centrifuge classification TSV report", + "description": "Centrifuge classification TSV report for Gp0127635", + "data_object_type": "Centrifuge Taxonomic Classification", + "url": "https://data.microbiomedata.org/data/nmdc:mga0ak4p20/ReadbasedAnalysis/nmdc_mga0ak4p20_centrifuge_classification.tsv", + "md5_checksum": "f8486e4ee029038a452a3484db10cabc", + "id": "nmdc:f8486e4ee029038a452a3484db10cabc", + "file_size_bytes": 1796179546 + }, + { + "name": "Gp0127635_Centrifuge TSV report", + "description": "Centrifuge TSV report for Gp0127635", + "data_object_type": "Centrifuge Classification Report", + "url": "https://data.microbiomedata.org/data/nmdc:mga0ak4p20/ReadbasedAnalysis/nmdc_mga0ak4p20_centrifuge_report.tsv", + "md5_checksum": "4121f2ec52b80b7feb9d9a4749080125", + "id": "nmdc:4121f2ec52b80b7feb9d9a4749080125", + "file_size_bytes": 254661 + }, + { + "name": "Gp0127635_Centrifuge Krona HTML report", + "description": "Centrifuge Krona HTML report for Gp0127635", + "data_object_type": "Centrifuge Krona Plot", + "url": "https://data.microbiomedata.org/data/nmdc:mga0ak4p20/ReadbasedAnalysis/nmdc_mga0ak4p20_centrifuge_krona.html", + "md5_checksum": "5b8c1cd8ba47041c20d3e18cb902a854", + "id": "nmdc:5b8c1cd8ba47041c20d3e18cb902a854", + "file_size_bytes": 2333534 + }, + { + "name": "Gp0127635_Kraken classification TSV report", + "description": "Kraken classification TSV report for Gp0127635", + "data_object_type": "Kraken2 Taxonomic Classification", + "url": "https://data.microbiomedata.org/data/nmdc:mga0ak4p20/ReadbasedAnalysis/nmdc_mga0ak4p20_kraken2_classification.tsv", + "md5_checksum": "59807dae5216b11c96df5593a26d9a88", + "id": "nmdc:59807dae5216b11c96df5593a26d9a88", + "file_size_bytes": 1432249556 + }, + { + "name": "Gp0127635_Kraken2 TSV report", + "description": "Kraken2 TSV report for Gp0127635", + "data_object_type": "Kraken2 Classification Report", + "url": "https://data.microbiomedata.org/data/nmdc:mga0ak4p20/ReadbasedAnalysis/nmdc_mga0ak4p20_kraken2_report.tsv", + "md5_checksum": "a491f6797bd7294dbc5ba301efb3466e", + "id": "nmdc:a491f6797bd7294dbc5ba301efb3466e", + "file_size_bytes": 639738 + }, + { + "name": "Gp0127635_Kraken2 Krona HTML report", + "description": "Kraken2 Krona HTML report for Gp0127635", + "data_object_type": "Kraken2 Krona Plot", + "url": "https://data.microbiomedata.org/data/nmdc:mga0ak4p20/ReadbasedAnalysis/nmdc_mga0ak4p20_kraken2_krona.html", + "md5_checksum": "6748020214a3d68ad588e3548107208e", + "id": "nmdc:6748020214a3d68ad588e3548107208e", + "file_size_bytes": 3996293 + } + ] + }, + { + "_id": { + "$oid": "649b005f2ca5ee4adb139f90" + }, + "has_input": [ + "nmdc:f8bc16e232f7ba0f6d6b5ca35a708c36" + ], + "part_of": [ + "nmdc:mga0ak4p20" + ], + "ctg_logsum": 269360, + "scaf_logsum": 270403, + "gap_pct": 0.00195, + "git_url": "https://github.com/microbiomedata/metaG/releases/tag/0.1", + "has_output": [ + "nmdc:3d1b5043e0c49ac6062aeba4ebbba910", + "nmdc:4d4497f63f95f7d2f8986178dab3ae52", + "nmdc:ac98d3d128ec5b045a9ef019a5653b99", + "nmdc:1d0302bec371a73f040d052f4b66277c", + "nmdc:2d8cca230f439e38f1e628666e40e013" + ], + "asm_score": 3.934, + "was_informed_by": "gold:Gp0127635", + "ctg_powsum": 29422, + "scaf_max": 23775, + "id": "nmdc:80cd0785782060a937ca17d1b417b0b6", + "scaf_powsum": 29544, + "execution_resource": "NERSC-Cori", + "contigs": 206757, + "name": "Assembly Activity for nmdc:mga0ak4p20", + "ctg_max": 23775, + "gc_std": 0.10033, + "contig_bp": 103842002, + "gc_avg": 0.61621, + "started_at_time": "2021-10-11T02:26:59Z", + "scaf_bp": 103844032, + "type": "nmdc:MetagenomeAssembly", + "scaffolds": 206599, + "ended_at_time": "2021-10-11T04:11:48+00:00", + "ctg_l50": 496, + "ctg_l90": 290, + "ctg_n50": 55322, + "ctg_n90": 171862, + "scaf_l50": 497, + "scaf_l90": 290, + "scaf_n50": 55067, + "scaf_n90": 171721, + "output_data_objects": [ + { + "name": "Gp0127635_Assembled contigs fasta", + "description": "Assembled contigs fasta for Gp0127635", + "data_object_type": "Assembly Contigs", + "url": "https://data.microbiomedata.org/data/nmdc:mga0ak4p20/assembly/nmdc_mga0ak4p20_contigs.fna", + "md5_checksum": "3d1b5043e0c49ac6062aeba4ebbba910", + "id": "nmdc:3d1b5043e0c49ac6062aeba4ebbba910", + "file_size_bytes": 111964628 + }, + { + "name": "Gp0127635_Assembled scaffold fasta", + "description": "Assembled scaffold fasta for Gp0127635", + "data_object_type": "Assembly Scaffolds", + "url": "https://data.microbiomedata.org/data/nmdc:mga0ak4p20/assembly/nmdc_mga0ak4p20_scaffolds.fna", + "md5_checksum": "4d4497f63f95f7d2f8986178dab3ae52", + "id": "nmdc:4d4497f63f95f7d2f8986178dab3ae52", + "file_size_bytes": 111342667 + }, + { + "name": "Gp0127635_Metagenome Contig Coverage Stats", + "description": "Metagenome Contig Coverage Stats for Gp0127635", + "url": "https://data.microbiomedata.org/data/nmdc:mga0ak4p20/assembly/nmdc_mga0ak4p20_covstats.txt", + "md5_checksum": "ac98d3d128ec5b045a9ef019a5653b99", + "id": "nmdc:ac98d3d128ec5b045a9ef019a5653b99", + "file_size_bytes": 16397988 + }, + { + "name": "Gp0127635_Assembled AGP file", + "description": "Assembled AGP file for Gp0127635", + "data_object_type": "Assembly AGP", + "url": "https://data.microbiomedata.org/data/nmdc:mga0ak4p20/assembly/nmdc_mga0ak4p20_assembly.agp", + "md5_checksum": "1d0302bec371a73f040d052f4b66277c", + "id": "nmdc:1d0302bec371a73f040d052f4b66277c", + "file_size_bytes": 15325341 + }, + { + "name": "Gp0127635_Metagenome Alignment BAM file", + "description": "Metagenome Alignment BAM file for Gp0127635", + "data_object_type": "Assembly Coverage BAM", + "url": "https://data.microbiomedata.org/data/nmdc:mga0ak4p20/assembly/nmdc_mga0ak4p20_pairedMapped_sorted.bam", + "md5_checksum": "2d8cca230f439e38f1e628666e40e013", + "id": "nmdc:2d8cca230f439e38f1e628666e40e013", + "file_size_bytes": 2159251548 + } + ] + }, + { + "_id": { + "$oid": "649b005bbf2caae0415ef9a7" + }, + "has_input": [ + "nmdc:3d1b5043e0c49ac6062aeba4ebbba910" + ], + "part_of": [ + "nmdc:mga0ak4p20" + ], + "git_url": "https://github.com/microbiomedata/metaG/releases/tag/0.1", + "has_output": [ + "nmdc:bb7eae2b3dbc58168b9122098f078bb5", + "nmdc:2af7f6c008858f2f0d47c00fa9758129", + "nmdc:dd3668477e39a65243179dfb9e4bf26e", + "nmdc:be0e9a5999ddfd46bf5daac56aa96b86", + "nmdc:95a6a1f91bf18bc1a781a8890d2e1bc5", + "nmdc:6960907313875913a789e1fda46ed34e", + "nmdc:033da43cdca9f81ed2270a9094fdb065", + "nmdc:e9603ffd918db8a21df1310b890315ff", + "nmdc:fd98e0cfe1f4ca7b9e4af833c5ef199c", + "nmdc:03481d99958ae1c9dcccb8fd91c0bbf7", + "nmdc:f0a96fb57947358a42053e9fb7134e70", + "nmdc:9737b61f2e6e923ac662e0a1c4f6aaa9" + ], + "was_informed_by": "gold:Gp0127635", + "id": "nmdc:80cd0785782060a937ca17d1b417b0b6", + "execution_resource": "NERSC-Cori", + "name": "Annotation Activity for nmdc:mga0ak4p20", + "started_at_time": "2021-10-11T02:26:59Z", + "type": "nmdc:MetagenomeAnnotationActivity", + "ended_at_time": "2021-10-11T04:11:48+00:00", + "output_data_objects": [ + { + "name": "Gp0127635_Protein FAA", + "description": "Protein FAA for Gp0127635", + "data_object_type": "Annotation Amino Acid FASTA", + "url": "https://data.microbiomedata.org/data/nmdc:mga0ak4p20/annotation/nmdc_mga0ak4p20_proteins.faa", + "md5_checksum": "bb7eae2b3dbc58168b9122098f078bb5", + "id": "nmdc:bb7eae2b3dbc58168b9122098f078bb5", + "file_size_bytes": 63157189 + }, + { + "name": "Gp0127635_Structural annotation GFF file", + "description": "Structural annotation GFF file for Gp0127635", + "data_object_type": "Structural Annotation GFF", + "url": "https://data.microbiomedata.org/data/nmdc:mga0ak4p20/annotation/nmdc_mga0ak4p20_structural_annotation.gff", + "md5_checksum": "2af7f6c008858f2f0d47c00fa9758129", + "id": "nmdc:2af7f6c008858f2f0d47c00fa9758129", + "file_size_bytes": 2526 + }, + { + "name": "Gp0127635_Functional annotation GFF file", + "description": "Functional annotation GFF file for Gp0127635", + "data_object_type": "Functional Annotation GFF", + "url": "https://data.microbiomedata.org/data/nmdc:mga0ak4p20/annotation/nmdc_mga0ak4p20_functional_annotation.gff", + "md5_checksum": "dd3668477e39a65243179dfb9e4bf26e", + "id": "nmdc:dd3668477e39a65243179dfb9e4bf26e", + "file_size_bytes": 71092075 + }, + { + "name": "Gp0127635_KO TSV file", + "description": "KO TSV file for Gp0127635", + "data_object_type": "Annotation KEGG Orthology", + "url": "https://data.microbiomedata.org/data/nmdc:mga0ak4p20/annotation/nmdc_mga0ak4p20_ko.tsv", + "md5_checksum": "be0e9a5999ddfd46bf5daac56aa96b86", + "id": "nmdc:be0e9a5999ddfd46bf5daac56aa96b86", + "file_size_bytes": 8023056 + }, + { + "name": "Gp0127635_EC TSV file", + "description": "EC TSV file for Gp0127635", + "data_object_type": "Annotation Enzyme Commission", + "url": "https://data.microbiomedata.org/data/nmdc:mga0ak4p20/annotation/nmdc_mga0ak4p20_ec.tsv", + "md5_checksum": "95a6a1f91bf18bc1a781a8890d2e1bc5", + "id": "nmdc:95a6a1f91bf18bc1a781a8890d2e1bc5", + "file_size_bytes": 5303502 + }, + { + "name": "Gp0127635_COG GFF file", + "description": "COG GFF file for Gp0127635", + "url": "https://data.microbiomedata.org/data/nmdc:mga0ak4p20/annotation/nmdc_mga0ak4p20_cog.gff", + "md5_checksum": "6960907313875913a789e1fda46ed34e", + "id": "nmdc:6960907313875913a789e1fda46ed34e", + "file_size_bytes": 42106254 + }, + { + "name": "Gp0127635_PFAM GFF file", + "description": "PFAM GFF file for Gp0127635", + "url": "https://data.microbiomedata.org/data/nmdc:mga0ak4p20/annotation/nmdc_mga0ak4p20_pfam.gff", + "md5_checksum": "033da43cdca9f81ed2270a9094fdb065", + "id": "nmdc:033da43cdca9f81ed2270a9094fdb065", + "file_size_bytes": 31806020 + }, + { + "name": "Gp0127635_TigrFam GFF file", + "description": "TigrFam GFF file for Gp0127635", + "url": "https://data.microbiomedata.org/data/nmdc:mga0ak4p20/annotation/nmdc_mga0ak4p20_tigrfam.gff", + "md5_checksum": "e9603ffd918db8a21df1310b890315ff", + "id": "nmdc:e9603ffd918db8a21df1310b890315ff", + "file_size_bytes": 3500524 + }, + { + "name": "Gp0127635_SMART GFF file", + "description": "SMART GFF file for Gp0127635", + "url": "https://data.microbiomedata.org/data/nmdc:mga0ak4p20/annotation/nmdc_mga0ak4p20_smart.gff", + "md5_checksum": "fd98e0cfe1f4ca7b9e4af833c5ef199c", + "id": "nmdc:fd98e0cfe1f4ca7b9e4af833c5ef199c", + "file_size_bytes": 9346082 + }, + { + "name": "Gp0127635_SuperFam GFF file", + "description": "SuperFam GFF file for Gp0127635", + "url": "https://data.microbiomedata.org/data/nmdc:mga0ak4p20/annotation/nmdc_mga0ak4p20_supfam.gff", + "md5_checksum": "03481d99958ae1c9dcccb8fd91c0bbf7", + "id": "nmdc:03481d99958ae1c9dcccb8fd91c0bbf7", + "file_size_bytes": 52582333 + }, + { + "name": "Gp0127635_Cath FunFam GFF file", + "description": "Cath FunFam GFF file for Gp0127635", + "url": "https://data.microbiomedata.org/data/nmdc:mga0ak4p20/annotation/nmdc_mga0ak4p20_cath_funfam.gff", + "md5_checksum": "f0a96fb57947358a42053e9fb7134e70", + "id": "nmdc:f0a96fb57947358a42053e9fb7134e70", + "file_size_bytes": 40179818 + }, + { + "name": "Gp0127635_KO_EC GFF file", + "description": "KO_EC GFF file for Gp0127635", + "url": "https://data.microbiomedata.org/data/nmdc:mga0ak4p20/annotation/nmdc_mga0ak4p20_ko_ec.gff", + "md5_checksum": "9737b61f2e6e923ac662e0a1c4f6aaa9", + "id": "nmdc:9737b61f2e6e923ac662e0a1c4f6aaa9", + "file_size_bytes": 25482964 + } + ] + }, + { + "_id": { + "$oid": "649b0052ec087f6bbab34708" + }, + "has_input": [ + "nmdc:3d1b5043e0c49ac6062aeba4ebbba910", + "nmdc:2d8cca230f439e38f1e628666e40e013", + "nmdc:dd3668477e39a65243179dfb9e4bf26e" + ], + "too_short_contig_num": 192406, + "part_of": [ + "nmdc:mga0ak4p20" + ], + "binned_contig_num": 502, + "git_url": "https://github.com/microbiomedata/metaG/releases/tag/0.1", + "has_output": [ + "nmdc:d41d8cd98f00b204e9800998ecf8427e", + "nmdc:daed5e3af5201fe510e780f155f90bc3", + "nmdc:7cdb1c384c8bc63b3c127e5bc434ac6b", + "nmdc:b5ae13756638f09d74fdbe03183b231f", + "nmdc:1dc5796596177362849da19fc4e50b13", + "nmdc:fba0bfa144e9ef179edb10b5a941c259" + ], + "was_informed_by": "gold:Gp0127635", + "input_contig_num": 206754, + "id": "nmdc:80cd0785782060a937ca17d1b417b0b6", + "execution_resource": "NERSC-Cori", + "name": "MAGs Analysis Activity for nmdc:mga0ak4p20", + "mags_list": [ + { + "number_of_contig": 203, + "completeness": 41.91, + "bin_name": "bins.1", + "gene_count": 1456, + "bin_quality": "LQ", + "gtdbtk_species": "", + "gtdbtk_order": "", + "num_16s": 0, + "gtdbtk_family": "", + "gtdbtk_domain": "", + "contamination": 3.88, + "gtdbtk_class": "", + "gtdbtk_phylum": "", + "num_5s": 0, + "num_23s": 0, + "gtdbtk_genus": "", + "num_t_rna": 24 + }, + { + "number_of_contig": 171, + "completeness": 8.33, + "bin_name": "bins.2", + "gene_count": 880, + "bin_quality": "LQ", + "gtdbtk_species": "", + "gtdbtk_order": "", + "num_16s": 0, + "gtdbtk_family": "", + "gtdbtk_domain": "", + "contamination": 0.0, + "gtdbtk_class": "", + "gtdbtk_phylum": "", + "num_5s": 0, + "num_23s": 0, + "gtdbtk_genus": "", + "num_t_rna": 5 + }, + { + "number_of_contig": 55, + "completeness": 14.66, + "bin_name": "bins.3", + "gene_count": 269, + "bin_quality": "LQ", + "gtdbtk_species": "", + "gtdbtk_order": "", + "num_16s": 0, + "gtdbtk_family": "", + "gtdbtk_domain": "", + "contamination": 0.0, + "gtdbtk_class": "", + "gtdbtk_phylum": "", + "num_5s": 0, + "num_23s": 0, + "gtdbtk_genus": "", + "num_t_rna": 1 + }, + { + "number_of_contig": 73, + "completeness": 0.0, + "bin_name": "bins.4", + "gene_count": 475, + "bin_quality": "LQ", + "gtdbtk_species": "", + "gtdbtk_order": "", + "num_16s": 0, + "gtdbtk_family": "", + "gtdbtk_domain": "", + "contamination": 0.0, + "gtdbtk_class": "", + "gtdbtk_phylum": "", + "num_5s": 0, + "num_23s": 0, + "gtdbtk_genus": "", + "num_t_rna": 6 + } + ], + "unbinned_contig_num": 13846, + "started_at_time": "2021-10-11T02:26:59Z", + "type": "nmdc:MAGsAnalysisActivity", + "ended_at_time": "2021-10-11T04:11:48+00:00", + "output_data_objects": [ + { + "name": "gold:Gp0452679_metabat2 bin checkm quality assessment result", + "description": "metabat2 bin checkm quality assessment result for gold:Gp0452679", + "data_object_type": "CheckM Statistics", + "url": "https://data.microbiomedata.org/data/nmdc:mga0fsjy84/MAGs/nmdc_mga0fsjy84_checkm_qa.out", + "md5_checksum": "d41d8cd98f00b204e9800998ecf8427e", + "id": "nmdc:d41d8cd98f00b204e9800998ecf8427e", + "file_size_bytes": 0 + }, + { + "name": "Gp0127635_tooShort (< 3kb) filtered contigs fasta file by metaBat2", + "description": "tooShort (< 3kb) filtered contigs fasta file by metaBat2 for Gp0127635", + "url": "https://data.microbiomedata.org/data/nmdc:mga0ak4p20/MAGs/nmdc_mga0ak4p20_bins.tooShort.fa", + "md5_checksum": "daed5e3af5201fe510e780f155f90bc3", + "id": "nmdc:daed5e3af5201fe510e780f155f90bc3", + "file_size_bytes": 86476884 + }, + { + "name": "Gp0127635_unbinned fasta file from metabat2", + "description": "unbinned fasta file from metabat2 for Gp0127635", + "url": "https://data.microbiomedata.org/data/nmdc:mga0ak4p20/MAGs/nmdc_mga0ak4p20_bins.unbinned.fa", + "md5_checksum": "7cdb1c384c8bc63b3c127e5bc434ac6b", + "id": "nmdc:7cdb1c384c8bc63b3c127e5bc434ac6b", + "file_size_bytes": 22898396 + }, + { + "name": "Gp0127635_metabat2 bin checkm quality assessment result", + "description": "metabat2 bin checkm quality assessment result for Gp0127635", + "data_object_type": "CheckM Statistics", + "url": "https://data.microbiomedata.org/data/nmdc:mga0ak4p20/MAGs/nmdc_mga0ak4p20_checkm_qa.out", + "md5_checksum": "b5ae13756638f09d74fdbe03183b231f", + "id": "nmdc:b5ae13756638f09d74fdbe03183b231f", + "file_size_bytes": 1240 + }, + { + "name": "Gp0127635_high-quality and medium-quality bins", + "description": "high-quality and medium-quality bins for Gp0127635", + "data_object_type": "Metagenome Bins", + "url": "https://data.microbiomedata.org/data/nmdc:mga0ak4p20/MAGs/nmdc_mga0ak4p20_hqmq_bin.zip", + "md5_checksum": "1dc5796596177362849da19fc4e50b13", + "id": "nmdc:1dc5796596177362849da19fc4e50b13", + "file_size_bytes": 182 + }, + { + "name": "Gp0127635_metabat2 bins", + "description": "metabat2 bins for Gp0127635", + "url": "https://data.microbiomedata.org/data/nmdc:mga0ak4p20/MAGs/nmdc_mga0ak4p20_metabat_bin.zip", + "md5_checksum": "fba0bfa144e9ef179edb10b5a941c259", + "id": "nmdc:fba0bfa144e9ef179edb10b5a941c259", + "file_size_bytes": 795127 + } + ] + } + ] + }, + { + "_id": { + "$oid": "649b009773e8249959349b48" + }, + "id": "nmdc:omprc-11-c8dzx197", + "name": "Riverbed sediment microbial communities from areas with vegetation nearby in Columbia River, Washington, USA - GW-RW S1_0_10", + "description": "Riverbed sediment samples were collected from an area with a lot of surrounding vegetation in a hyporheic zone (subsurface groundwater - surface water mixing zone)", + "has_input": [ + "nmdc:bsm-11-kwfbp795" + ], + "has_output": [ + "jgi:574fde647ded5e3df1ee1406" + ], + "part_of": [ + "nmdc:sty-11-aygzgv51" + ], + "add_date": "2016-01-11", + "mod_date": "2021-06-15", + "ncbi_project_name": "Riverbed sediment microbial communities from areas with vegetation nearby in Columbia River, Washington, USA - GW-RW S1_0_10", + "omics_type": { + "has_raw_value": "Metagenome" + }, + "principal_investigator": { + "has_raw_value": "James Stegen" + }, + "processing_institution": "JGI", + "type": "nmdc:OmicsProcessing", + "gold_sequencing_project_identifiers": [ + "gold:Gp0127637" + ], + "downstream_workflow_activity_records": [ + { + "_id": { + "$oid": "649b009d6bdd4fd20273c85f" + }, + "has_input": [ + "nmdc:320ac579913ecc4c218607b6b3b915b3" + ], + "part_of": [ + "nmdc:mga0sb9b30" + ], + "git_url": "https://github.com/microbiomedata/metaG/releases/tag/0.1", + "has_output": [ + "nmdc:805310f4b1e39a0cc9e5b5787576cb8b", + "nmdc:611e67df261e050860b1075c6a6a5ff5" + ], + "was_informed_by": "gold:Gp0127637", + "input_read_count": 24239336, + "output_read_bases": 2975652755, + "id": "nmdc:c6d11b07b7b33c0d906ce0d4a58a7ccf", + "execution_resource": "NERSC-Cori", + "input_read_bases": 3660139736, + "name": "Read QC Activity for nmdc:mga0sb9b30", + "output_read_count": 19917090, + "started_at_time": "2021-10-11T02:24:01Z", + "type": "nmdc:ReadQCAnalysisActivity", + "ended_at_time": "2021-10-11T03:11:56+00:00", + "output_data_objects": [ + { + "name": "Gp0127637_Filtered Reads", + "description": "Filtered Reads for Gp0127637", + "data_object_type": "Filtered Sequencing Reads", + "url": "https://data.microbiomedata.org/data/nmdc:mga0sb9b30/qa/nmdc_mga0sb9b30_filtered.fastq.gz", + "md5_checksum": "805310f4b1e39a0cc9e5b5787576cb8b", + "id": "nmdc:805310f4b1e39a0cc9e5b5787576cb8b", + "file_size_bytes": 1553219358 + }, + { + "name": "Gp0127637_Filtered Stats", + "description": "Filtered Stats for Gp0127637", + "data_object_type": "QC Statistics", + "url": "https://data.microbiomedata.org/data/nmdc:mga0sb9b30/qa/nmdc_mga0sb9b30_filterStats.txt", + "md5_checksum": "611e67df261e050860b1075c6a6a5ff5", + "id": "nmdc:611e67df261e050860b1075c6a6a5ff5", + "file_size_bytes": 289 + } + ] + }, + { + "_id": { + "$oid": "649b009bff710ae353f8cf23" + }, + "has_input": [ + "nmdc:805310f4b1e39a0cc9e5b5787576cb8b" + ], + "git_url": "https://github.com/microbiomedata/metaG/releases/tag/0.1", + "has_output": [ + "nmdc:9268e073dacb7f7cd5f9513393cb0b2a", + "nmdc:37dd1d73ad47979ee5284830d27df535", + "nmdc:43bffbfb830c6e3ccc140ec0dff1e773", + "nmdc:cb3bd5ca5088484cb4e580ad91d736b2", + "nmdc:f44a5d59785cdededea0fe4a6a429c30", + "nmdc:81a6efbd082e07bc2db174a88d64a272", + "nmdc:f63856a84bc9afb8954ccdb1803d5fde", + "nmdc:9a1826f66ee45187d627076d11dc491f", + "nmdc:67adb9cc2c75251f556a90b1a959ea72" + ], + "was_informed_by": "gold:Gp0127637", + "id": "nmdc:c6d11b07b7b33c0d906ce0d4a58a7ccf", + "execution_resource": "NERSC-Cori", + "name": "ReadBased Analysis Activity for nmdc:mga0sb9b30", + "started_at_time": "2021-10-11T02:24:01Z", + "type": "nmdc:ReadbasedAnalysis", + "ended_at_time": "2021-10-11T03:11:56+00:00", + "output_data_objects": [ + { + "name": "Gp0127637_Gottcha2 TSV report", + "description": "Gottcha2 TSV report for Gp0127637", + "url": "https://data.microbiomedata.org/data/nmdc:mga0sb9b30/ReadbasedAnalysis/nmdc_mga0sb9b30_gottcha2_report.tsv", + "md5_checksum": "9268e073dacb7f7cd5f9513393cb0b2a", + "id": "nmdc:9268e073dacb7f7cd5f9513393cb0b2a", + "file_size_bytes": 660 + }, + { + "name": "Gp0127637_Gottcha2 full TSV report", + "description": "Gottcha2 full TSV report for Gp0127637", + "url": "https://data.microbiomedata.org/data/nmdc:mga0sb9b30/ReadbasedAnalysis/nmdc_mga0sb9b30_gottcha2_report_full.tsv", + "md5_checksum": "37dd1d73ad47979ee5284830d27df535", + "id": "nmdc:37dd1d73ad47979ee5284830d27df535", + "file_size_bytes": 594054 + }, + { + "name": "Gp0127637_Gottcha2 Krona HTML report", + "description": "Gottcha2 Krona HTML report for Gp0127637", + "data_object_type": "GOTTCHA2 Krona Plot", + "url": "https://data.microbiomedata.org/data/nmdc:mga0sb9b30/ReadbasedAnalysis/nmdc_mga0sb9b30_gottcha2_krona.html", + "md5_checksum": "43bffbfb830c6e3ccc140ec0dff1e773", + "id": "nmdc:43bffbfb830c6e3ccc140ec0dff1e773", + "file_size_bytes": 227750 + }, + { + "name": "Gp0127637_Centrifuge classification TSV report", + "description": "Centrifuge classification TSV report for Gp0127637", + "data_object_type": "Centrifuge Taxonomic Classification", + "url": "https://data.microbiomedata.org/data/nmdc:mga0sb9b30/ReadbasedAnalysis/nmdc_mga0sb9b30_centrifuge_classification.tsv", + "md5_checksum": "cb3bd5ca5088484cb4e580ad91d736b2", + "id": "nmdc:cb3bd5ca5088484cb4e580ad91d736b2", + "file_size_bytes": 1457058272 + }, + { + "name": "Gp0127637_Centrifuge TSV report", + "description": "Centrifuge TSV report for Gp0127637", + "data_object_type": "Centrifuge Classification Report", + "url": "https://data.microbiomedata.org/data/nmdc:mga0sb9b30/ReadbasedAnalysis/nmdc_mga0sb9b30_centrifuge_report.tsv", + "md5_checksum": "f44a5d59785cdededea0fe4a6a429c30", + "id": "nmdc:f44a5d59785cdededea0fe4a6a429c30", + "file_size_bytes": 251867 + }, + { + "name": "Gp0127637_Centrifuge Krona HTML report", + "description": "Centrifuge Krona HTML report for Gp0127637", + "data_object_type": "Centrifuge Krona Plot", + "url": "https://data.microbiomedata.org/data/nmdc:mga0sb9b30/ReadbasedAnalysis/nmdc_mga0sb9b30_centrifuge_krona.html", + "md5_checksum": "81a6efbd082e07bc2db174a88d64a272", + "id": "nmdc:81a6efbd082e07bc2db174a88d64a272", + "file_size_bytes": 2325282 + }, + { + "name": "Gp0127637_Kraken classification TSV report", + "description": "Kraken classification TSV report for Gp0127637", + "data_object_type": "Kraken2 Taxonomic Classification", + "url": "https://data.microbiomedata.org/data/nmdc:mga0sb9b30/ReadbasedAnalysis/nmdc_mga0sb9b30_kraken2_classification.tsv", + "md5_checksum": "f63856a84bc9afb8954ccdb1803d5fde", + "id": "nmdc:f63856a84bc9afb8954ccdb1803d5fde", + "file_size_bytes": 1160106364 + }, + { + "name": "Gp0127637_Kraken2 TSV report", + "description": "Kraken2 TSV report for Gp0127637", + "data_object_type": "Kraken2 Classification Report", + "url": "https://data.microbiomedata.org/data/nmdc:mga0sb9b30/ReadbasedAnalysis/nmdc_mga0sb9b30_kraken2_report.tsv", + "md5_checksum": "9a1826f66ee45187d627076d11dc491f", + "id": "nmdc:9a1826f66ee45187d627076d11dc491f", + "file_size_bytes": 613810 + }, + { + "name": "Gp0127637_Kraken2 Krona HTML report", + "description": "Kraken2 Krona HTML report for Gp0127637", + "data_object_type": "Kraken2 Krona Plot", + "url": "https://data.microbiomedata.org/data/nmdc:mga0sb9b30/ReadbasedAnalysis/nmdc_mga0sb9b30_kraken2_krona.html", + "md5_checksum": "67adb9cc2c75251f556a90b1a959ea72", + "id": "nmdc:67adb9cc2c75251f556a90b1a959ea72", + "file_size_bytes": 3853908 + } + ] + }, + { + "_id": { + "$oid": "61e7191f833bcf838a6ffa50" + }, + "has_input": [ + "nmdc:805310f4b1e39a0cc9e5b5787576cb8b" + ], + "part_of": [ + "nmdc:mga0sb9b30" + ], + "git_url": "https://github.com/microbiomedata/metaG/releases/tag/0.1", + "has_output": [ + "nmdc:9268e073dacb7f7cd5f9513393cb0b2a", + "nmdc:37dd1d73ad47979ee5284830d27df535", + "nmdc:43bffbfb830c6e3ccc140ec0dff1e773", + "nmdc:cb3bd5ca5088484cb4e580ad91d736b2", + "nmdc:f44a5d59785cdededea0fe4a6a429c30", + "nmdc:81a6efbd082e07bc2db174a88d64a272", + "nmdc:f63856a84bc9afb8954ccdb1803d5fde", + "nmdc:9a1826f66ee45187d627076d11dc491f", + "nmdc:67adb9cc2c75251f556a90b1a959ea72" + ], + "was_informed_by": "gold:Gp0127637", + "id": "nmdc:c6d11b07b7b33c0d906ce0d4a58a7ccf", + "execution_resource": "NERSC-Cori", + "name": "ReadBased Analysis Activity for nmdc:mga0sb9b30", + "started_at_time": "2021-10-11T02:24:01Z", + "type": "nmdc:ReadbasedAnalysis", + "ended_at_time": "2021-10-11T03:11:56+00:00", + "output_data_objects": [ + { + "name": "Gp0127637_Gottcha2 TSV report", + "description": "Gottcha2 TSV report for Gp0127637", + "url": "https://data.microbiomedata.org/data/nmdc:mga0sb9b30/ReadbasedAnalysis/nmdc_mga0sb9b30_gottcha2_report.tsv", + "md5_checksum": "9268e073dacb7f7cd5f9513393cb0b2a", + "id": "nmdc:9268e073dacb7f7cd5f9513393cb0b2a", + "file_size_bytes": 660 + }, + { + "name": "Gp0127637_Gottcha2 full TSV report", + "description": "Gottcha2 full TSV report for Gp0127637", + "url": "https://data.microbiomedata.org/data/nmdc:mga0sb9b30/ReadbasedAnalysis/nmdc_mga0sb9b30_gottcha2_report_full.tsv", + "md5_checksum": "37dd1d73ad47979ee5284830d27df535", + "id": "nmdc:37dd1d73ad47979ee5284830d27df535", + "file_size_bytes": 594054 + }, + { + "name": "Gp0127637_Gottcha2 Krona HTML report", + "description": "Gottcha2 Krona HTML report for Gp0127637", + "data_object_type": "GOTTCHA2 Krona Plot", + "url": "https://data.microbiomedata.org/data/nmdc:mga0sb9b30/ReadbasedAnalysis/nmdc_mga0sb9b30_gottcha2_krona.html", + "md5_checksum": "43bffbfb830c6e3ccc140ec0dff1e773", + "id": "nmdc:43bffbfb830c6e3ccc140ec0dff1e773", + "file_size_bytes": 227750 + }, + { + "name": "Gp0127637_Centrifuge classification TSV report", + "description": "Centrifuge classification TSV report for Gp0127637", + "data_object_type": "Centrifuge Taxonomic Classification", + "url": "https://data.microbiomedata.org/data/nmdc:mga0sb9b30/ReadbasedAnalysis/nmdc_mga0sb9b30_centrifuge_classification.tsv", + "md5_checksum": "cb3bd5ca5088484cb4e580ad91d736b2", + "id": "nmdc:cb3bd5ca5088484cb4e580ad91d736b2", + "file_size_bytes": 1457058272 + }, + { + "name": "Gp0127637_Centrifuge TSV report", + "description": "Centrifuge TSV report for Gp0127637", + "data_object_type": "Centrifuge Classification Report", + "url": "https://data.microbiomedata.org/data/nmdc:mga0sb9b30/ReadbasedAnalysis/nmdc_mga0sb9b30_centrifuge_report.tsv", + "md5_checksum": "f44a5d59785cdededea0fe4a6a429c30", + "id": "nmdc:f44a5d59785cdededea0fe4a6a429c30", + "file_size_bytes": 251867 + }, + { + "name": "Gp0127637_Centrifuge Krona HTML report", + "description": "Centrifuge Krona HTML report for Gp0127637", + "data_object_type": "Centrifuge Krona Plot", + "url": "https://data.microbiomedata.org/data/nmdc:mga0sb9b30/ReadbasedAnalysis/nmdc_mga0sb9b30_centrifuge_krona.html", + "md5_checksum": "81a6efbd082e07bc2db174a88d64a272", + "id": "nmdc:81a6efbd082e07bc2db174a88d64a272", + "file_size_bytes": 2325282 + }, + { + "name": "Gp0127637_Kraken classification TSV report", + "description": "Kraken classification TSV report for Gp0127637", + "data_object_type": "Kraken2 Taxonomic Classification", + "url": "https://data.microbiomedata.org/data/nmdc:mga0sb9b30/ReadbasedAnalysis/nmdc_mga0sb9b30_kraken2_classification.tsv", + "md5_checksum": "f63856a84bc9afb8954ccdb1803d5fde", + "id": "nmdc:f63856a84bc9afb8954ccdb1803d5fde", + "file_size_bytes": 1160106364 + }, + { + "name": "Gp0127637_Kraken2 TSV report", + "description": "Kraken2 TSV report for Gp0127637", + "data_object_type": "Kraken2 Classification Report", + "url": "https://data.microbiomedata.org/data/nmdc:mga0sb9b30/ReadbasedAnalysis/nmdc_mga0sb9b30_kraken2_report.tsv", + "md5_checksum": "9a1826f66ee45187d627076d11dc491f", + "id": "nmdc:9a1826f66ee45187d627076d11dc491f", + "file_size_bytes": 613810 + }, + { + "name": "Gp0127637_Kraken2 Krona HTML report", + "description": "Kraken2 Krona HTML report for Gp0127637", + "data_object_type": "Kraken2 Krona Plot", + "url": "https://data.microbiomedata.org/data/nmdc:mga0sb9b30/ReadbasedAnalysis/nmdc_mga0sb9b30_kraken2_krona.html", + "md5_checksum": "67adb9cc2c75251f556a90b1a959ea72", + "id": "nmdc:67adb9cc2c75251f556a90b1a959ea72", + "file_size_bytes": 3853908 + } + ] + }, + { + "_id": { + "$oid": "649b005f2ca5ee4adb139f93" + }, + "has_input": [ + "nmdc:805310f4b1e39a0cc9e5b5787576cb8b" + ], + "part_of": [ + "nmdc:mga0sb9b30" + ], + "ctg_logsum": 271617, + "scaf_logsum": 272416, + "gap_pct": 0.00166, + "git_url": "https://github.com/microbiomedata/metaG/releases/tag/0.1", + "has_output": [ + "nmdc:aee81646e593045bbb32a0012870b88b", + "nmdc:f1026db242cad285204c9c3d6307c183", + "nmdc:b02b0a0145d14e97a31e6a6f7e4b8dc8", + "nmdc:8afcf1e8b7b3f35edaefee7a0c31e19f", + "nmdc:dee5fa37f57a24685b65e00380d6e433" + ], + "asm_score": 5.062, + "was_informed_by": "gold:Gp0127637", + "ctg_powsum": 29885, + "scaf_max": 43650, + "id": "nmdc:c6d11b07b7b33c0d906ce0d4a58a7ccf", + "scaf_powsum": 29983, + "execution_resource": "NERSC-Cori", + "contigs": 214863, + "name": "Assembly Activity for nmdc:mga0sb9b30", + "ctg_max": 43650, + "gc_std": 0.08814, + "contig_bp": 108739484, + "gc_avg": 0.63266, + "started_at_time": "2021-10-11T02:24:01Z", + "scaf_bp": 108741284, + "type": "nmdc:MetagenomeAssembly", + "scaffolds": 214737, + "ended_at_time": "2021-10-11T03:11:56+00:00", + "ctg_l50": 505, + "ctg_l90": 294, + "ctg_n50": 58474, + "ctg_n90": 177521, + "scaf_l50": 505, + "scaf_l90": 294, + "scaf_n50": 58469, + "scaf_n90": 177412, + "output_data_objects": [ + { + "name": "Gp0127637_Assembled contigs fasta", + "description": "Assembled contigs fasta for Gp0127637", + "data_object_type": "Assembly Contigs", + "url": "https://data.microbiomedata.org/data/nmdc:mga0sb9b30/assembly/nmdc_mga0sb9b30_contigs.fna", + "md5_checksum": "aee81646e593045bbb32a0012870b88b", + "id": "nmdc:aee81646e593045bbb32a0012870b88b", + "file_size_bytes": 117200777 + }, + { + "name": "Gp0127637_Assembled scaffold fasta", + "description": "Assembled scaffold fasta for Gp0127637", + "data_object_type": "Assembly Scaffolds", + "url": "https://data.microbiomedata.org/data/nmdc:mga0sb9b30/assembly/nmdc_mga0sb9b30_scaffolds.fna", + "md5_checksum": "f1026db242cad285204c9c3d6307c183", + "id": "nmdc:f1026db242cad285204c9c3d6307c183", + "file_size_bytes": 116554638 + }, + { + "name": "Gp0127637_Metagenome Contig Coverage Stats", + "description": "Metagenome Contig Coverage Stats for Gp0127637", + "url": "https://data.microbiomedata.org/data/nmdc:mga0sb9b30/assembly/nmdc_mga0sb9b30_covstats.txt", + "md5_checksum": "b02b0a0145d14e97a31e6a6f7e4b8dc8", + "id": "nmdc:b02b0a0145d14e97a31e6a6f7e4b8dc8", + "file_size_bytes": 17037754 + }, + { + "name": "Gp0127637_Assembled AGP file", + "description": "Assembled AGP file for Gp0127637", + "data_object_type": "Assembly AGP", + "url": "https://data.microbiomedata.org/data/nmdc:mga0sb9b30/assembly/nmdc_mga0sb9b30_assembly.agp", + "md5_checksum": "8afcf1e8b7b3f35edaefee7a0c31e19f", + "id": "nmdc:8afcf1e8b7b3f35edaefee7a0c31e19f", + "file_size_bytes": 15931363 + }, + { + "name": "Gp0127637_Metagenome Alignment BAM file", + "description": "Metagenome Alignment BAM file for Gp0127637", + "data_object_type": "Assembly Coverage BAM", + "url": "https://data.microbiomedata.org/data/nmdc:mga0sb9b30/assembly/nmdc_mga0sb9b30_pairedMapped_sorted.bam", + "md5_checksum": "dee5fa37f57a24685b65e00380d6e433", + "id": "nmdc:dee5fa37f57a24685b65e00380d6e433", + "file_size_bytes": 1739825120 + } + ] + }, + { + "_id": { + "$oid": "649b005bbf2caae0415ef9a6" + }, + "has_input": [ + "nmdc:aee81646e593045bbb32a0012870b88b" + ], + "part_of": [ + "nmdc:mga0sb9b30" + ], + "git_url": "https://github.com/microbiomedata/metaG/releases/tag/0.1", + "has_output": [ + "nmdc:69603434971f93dbd79860c18dd5c61a", + "nmdc:bf8f822c6730b4cc73715ced3d25c262", + "nmdc:b9ec0754ffaa338c899244703bc91386", + "nmdc:22402cc61770feb5a0aaa4f760808366", + "nmdc:8c96f7faa38c361acc247b5a107a6b54", + "nmdc:7a28d1eafd3a3c181e95f61eb3d18bf1", + "nmdc:89a8657f659710b3927baab155917fdf", + "nmdc:9b9ecf34f2f6ef6865d4864f5debfbb7", + "nmdc:5cae6736713d02ccbe26543d733875cb", + "nmdc:a64350eb947c199cc1fbfb087191c0c7", + "nmdc:b8492828a1ad078d9c3192bab4d9a3fa", + "nmdc:2471f27b6cf11b6f93c791c273989731" + ], + "was_informed_by": "gold:Gp0127637", + "id": "nmdc:c6d11b07b7b33c0d906ce0d4a58a7ccf", + "execution_resource": "NERSC-Cori", + "name": "Annotation Activity for nmdc:mga0sb9b30", + "started_at_time": "2021-10-11T02:24:01Z", + "type": "nmdc:MetagenomeAnnotationActivity", + "ended_at_time": "2021-10-11T03:11:56+00:00", + "output_data_objects": [ + { + "name": "Gp0127637_Protein FAA", + "description": "Protein FAA for Gp0127637", + "data_object_type": "Annotation Amino Acid FASTA", + "url": "https://data.microbiomedata.org/data/nmdc:mga0sb9b30/annotation/nmdc_mga0sb9b30_proteins.faa", + "md5_checksum": "69603434971f93dbd79860c18dd5c61a", + "id": "nmdc:69603434971f93dbd79860c18dd5c61a", + "file_size_bytes": 66263123 + }, + { + "name": "Gp0127637_Structural annotation GFF file", + "description": "Structural annotation GFF file for Gp0127637", + "data_object_type": "Structural Annotation GFF", + "url": "https://data.microbiomedata.org/data/nmdc:mga0sb9b30/annotation/nmdc_mga0sb9b30_structural_annotation.gff", + "md5_checksum": "bf8f822c6730b4cc73715ced3d25c262", + "id": "nmdc:bf8f822c6730b4cc73715ced3d25c262", + "file_size_bytes": 2521 + }, + { + "name": "Gp0127637_Functional annotation GFF file", + "description": "Functional annotation GFF file for Gp0127637", + "data_object_type": "Functional Annotation GFF", + "url": "https://data.microbiomedata.org/data/nmdc:mga0sb9b30/annotation/nmdc_mga0sb9b30_functional_annotation.gff", + "md5_checksum": "b9ec0754ffaa338c899244703bc91386", + "id": "nmdc:b9ec0754ffaa338c899244703bc91386", + "file_size_bytes": 74459552 + }, + { + "name": "Gp0127637_KO TSV file", + "description": "KO TSV file for Gp0127637", + "data_object_type": "Annotation KEGG Orthology", + "url": "https://data.microbiomedata.org/data/nmdc:mga0sb9b30/annotation/nmdc_mga0sb9b30_ko.tsv", + "md5_checksum": "22402cc61770feb5a0aaa4f760808366", + "id": "nmdc:22402cc61770feb5a0aaa4f760808366", + "file_size_bytes": 8394894 + }, + { + "name": "Gp0127637_EC TSV file", + "description": "EC TSV file for Gp0127637", + "data_object_type": "Annotation Enzyme Commission", + "url": "https://data.microbiomedata.org/data/nmdc:mga0sb9b30/annotation/nmdc_mga0sb9b30_ec.tsv", + "md5_checksum": "8c96f7faa38c361acc247b5a107a6b54", + "id": "nmdc:8c96f7faa38c361acc247b5a107a6b54", + "file_size_bytes": 5556852 + }, + { + "name": "Gp0127637_COG GFF file", + "description": "COG GFF file for Gp0127637", + "url": "https://data.microbiomedata.org/data/nmdc:mga0sb9b30/annotation/nmdc_mga0sb9b30_cog.gff", + "md5_checksum": "7a28d1eafd3a3c181e95f61eb3d18bf1", + "id": "nmdc:7a28d1eafd3a3c181e95f61eb3d18bf1", + "file_size_bytes": 44328195 + }, + { + "name": "Gp0127637_PFAM GFF file", + "description": "PFAM GFF file for Gp0127637", + "url": "https://data.microbiomedata.org/data/nmdc:mga0sb9b30/annotation/nmdc_mga0sb9b30_pfam.gff", + "md5_checksum": "89a8657f659710b3927baab155917fdf", + "id": "nmdc:89a8657f659710b3927baab155917fdf", + "file_size_bytes": 33562431 + }, + { + "name": "Gp0127637_TigrFam GFF file", + "description": "TigrFam GFF file for Gp0127637", + "url": "https://data.microbiomedata.org/data/nmdc:mga0sb9b30/annotation/nmdc_mga0sb9b30_tigrfam.gff", + "md5_checksum": "9b9ecf34f2f6ef6865d4864f5debfbb7", + "id": "nmdc:9b9ecf34f2f6ef6865d4864f5debfbb7", + "file_size_bytes": 3752251 + }, + { + "name": "Gp0127637_SMART GFF file", + "description": "SMART GFF file for Gp0127637", + "url": "https://data.microbiomedata.org/data/nmdc:mga0sb9b30/annotation/nmdc_mga0sb9b30_smart.gff", + "md5_checksum": "5cae6736713d02ccbe26543d733875cb", + "id": "nmdc:5cae6736713d02ccbe26543d733875cb", + "file_size_bytes": 9871224 + }, + { + "name": "Gp0127637_SuperFam GFF file", + "description": "SuperFam GFF file for Gp0127637", + "url": "https://data.microbiomedata.org/data/nmdc:mga0sb9b30/annotation/nmdc_mga0sb9b30_supfam.gff", + "md5_checksum": "a64350eb947c199cc1fbfb087191c0c7", + "id": "nmdc:a64350eb947c199cc1fbfb087191c0c7", + "file_size_bytes": 55329770 + }, + { + "name": "Gp0127637_Cath FunFam GFF file", + "description": "Cath FunFam GFF file for Gp0127637", + "url": "https://data.microbiomedata.org/data/nmdc:mga0sb9b30/annotation/nmdc_mga0sb9b30_cath_funfam.gff", + "md5_checksum": "b8492828a1ad078d9c3192bab4d9a3fa", + "id": "nmdc:b8492828a1ad078d9c3192bab4d9a3fa", + "file_size_bytes": 42052238 + }, + { + "name": "Gp0127637_KO_EC GFF file", + "description": "KO_EC GFF file for Gp0127637", + "url": "https://data.microbiomedata.org/data/nmdc:mga0sb9b30/annotation/nmdc_mga0sb9b30_ko_ec.gff", + "md5_checksum": "2471f27b6cf11b6f93c791c273989731", + "id": "nmdc:2471f27b6cf11b6f93c791c273989731", + "file_size_bytes": 26689447 + } + ] + }, + { + "_id": { + "$oid": "649b0052ec087f6bbab34702" + }, + "has_input": [ + "nmdc:aee81646e593045bbb32a0012870b88b", + "nmdc:dee5fa37f57a24685b65e00380d6e433", + "nmdc:b9ec0754ffaa338c899244703bc91386" + ], + "too_short_contig_num": 200319, + "part_of": [ + "nmdc:mga0sb9b30" + ], + "binned_contig_num": 482, + "git_url": "https://github.com/microbiomedata/metaG/releases/tag/0.1", + "has_output": [ + "nmdc:d41d8cd98f00b204e9800998ecf8427e", + "nmdc:7968c6b88e49f066bd24982b4d54965b", + "nmdc:120fbaa7439eb628d9a982de573446a8", + "nmdc:347a7ee18b37674e031cca9046e92623", + "nmdc:de1da5ea4bfdf3131a6c510b79b145c2", + "nmdc:382d00338a5e4829285e58a203de153e" + ], + "was_informed_by": "gold:Gp0127637", + "input_contig_num": 214863, + "id": "nmdc:c6d11b07b7b33c0d906ce0d4a58a7ccf", + "execution_resource": "NERSC-Cori", + "name": "MAGs Analysis Activity for nmdc:mga0sb9b30", + "mags_list": [ + { + "number_of_contig": 59, + "completeness": 8.33, + "bin_name": "bins.1", + "gene_count": 295, + "bin_quality": "LQ", + "gtdbtk_species": "", + "gtdbtk_order": "", + "num_16s": 0, + "gtdbtk_family": "", + "gtdbtk_domain": "", + "contamination": 0.0, + "gtdbtk_class": "", + "gtdbtk_phylum": "", + "num_5s": 0, + "num_23s": 0, + "gtdbtk_genus": "", + "num_t_rna": 0 + }, + { + "number_of_contig": 233, + "completeness": 45.87, + "bin_name": "bins.2", + "gene_count": 1342, + "bin_quality": "LQ", + "gtdbtk_species": "", + "gtdbtk_order": "", + "num_16s": 0, + "gtdbtk_family": "", + "gtdbtk_domain": "", + "contamination": 1.28, + "gtdbtk_class": "", + "gtdbtk_phylum": "", + "num_5s": 0, + "num_23s": 0, + "gtdbtk_genus": "", + "num_t_rna": 18 + }, + { + "number_of_contig": 190, + "completeness": 75.08, + "bin_name": "bins.3", + "gene_count": 1991, + "bin_quality": "MQ", + "gtdbtk_species": "", + "gtdbtk_order": "Nitrososphaerales", + "num_16s": 0, + "gtdbtk_family": "Nitrososphaeraceae", + "gtdbtk_domain": "Archaea", + "contamination": 1.21, + "gtdbtk_class": "Nitrososphaeria", + "gtdbtk_phylum": "Crenarchaeota", + "num_5s": 1, + "num_23s": 1, + "gtdbtk_genus": "", + "num_t_rna": 37 + } + ], + "unbinned_contig_num": 14062, + "started_at_time": "2021-10-11T02:24:01Z", + "type": "nmdc:MAGsAnalysisActivity", + "ended_at_time": "2021-10-11T03:11:56+00:00", + "output_data_objects": [ + { + "name": "gold:Gp0452679_metabat2 bin checkm quality assessment result", + "description": "metabat2 bin checkm quality assessment result for gold:Gp0452679", + "data_object_type": "CheckM Statistics", + "url": "https://data.microbiomedata.org/data/nmdc:mga0fsjy84/MAGs/nmdc_mga0fsjy84_checkm_qa.out", + "md5_checksum": "d41d8cd98f00b204e9800998ecf8427e", + "id": "nmdc:d41d8cd98f00b204e9800998ecf8427e", + "file_size_bytes": 0 + }, + { + "name": "Gp0127637_tooShort (< 3kb) filtered contigs fasta file by metaBat2", + "description": "tooShort (< 3kb) filtered contigs fasta file by metaBat2 for Gp0127637", + "url": "https://data.microbiomedata.org/data/nmdc:mga0sb9b30/MAGs/nmdc_mga0sb9b30_bins.tooShort.fa", + "md5_checksum": "7968c6b88e49f066bd24982b4d54965b", + "id": "nmdc:7968c6b88e49f066bd24982b4d54965b", + "file_size_bytes": 91577123 + }, + { + "name": "Gp0127637_unbinned fasta file from metabat2", + "description": "unbinned fasta file from metabat2 for Gp0127637", + "url": "https://data.microbiomedata.org/data/nmdc:mga0sb9b30/MAGs/nmdc_mga0sb9b30_bins.unbinned.fa", + "md5_checksum": "120fbaa7439eb628d9a982de573446a8", + "id": "nmdc:120fbaa7439eb628d9a982de573446a8", + "file_size_bytes": 22556841 + }, + { + "name": "Gp0127637_metabat2 bin checkm quality assessment result", + "description": "metabat2 bin checkm quality assessment result for Gp0127637", + "data_object_type": "CheckM Statistics", + "url": "https://data.microbiomedata.org/data/nmdc:mga0sb9b30/MAGs/nmdc_mga0sb9b30_checkm_qa.out", + "md5_checksum": "347a7ee18b37674e031cca9046e92623", + "id": "nmdc:347a7ee18b37674e031cca9046e92623", + "file_size_bytes": 1092 + }, + { + "name": "Gp0127637_high-quality and medium-quality bins", + "description": "high-quality and medium-quality bins for Gp0127637", + "data_object_type": "Metagenome Bins", + "url": "https://data.microbiomedata.org/data/nmdc:mga0sb9b30/MAGs/nmdc_mga0sb9b30_hqmq_bin.zip", + "md5_checksum": "de1da5ea4bfdf3131a6c510b79b145c2", + "id": "nmdc:de1da5ea4bfdf3131a6c510b79b145c2", + "file_size_bytes": 504932 + }, + { + "name": "Gp0127637_metabat2 bins", + "description": "metabat2 bins for Gp0127637", + "url": "https://data.microbiomedata.org/data/nmdc:mga0sb9b30/MAGs/nmdc_mga0sb9b30_metabat_bin.zip", + "md5_checksum": "382d00338a5e4829285e58a203de153e", + "id": "nmdc:382d00338a5e4829285e58a203de153e", + "file_size_bytes": 432910 + } + ] + } + ] + }, + { + "_id": { + "$oid": "649b009773e8249959349b49" + }, + "id": "nmdc:omprc-11-tgxmb243", + "name": "Riverbed sediment microbial communities from areas with no vegetation in Columbia River, Washington, USA - GW-RW N2_40_50", + "description": "Riverbed sediment samples were collected from an area with no vegetation in a hyporheic zone (subsurface groundwater - surface water mixing zone)", + "has_input": [ + "nmdc:bsm-11-pq3zmp51" + ], + "has_output": [ + "jgi:574fde837ded5e3df1ee141d" + ], + "part_of": [ + "nmdc:sty-11-aygzgv51" + ], + "add_date": "2016-01-11", + "mod_date": "2021-06-15", + "ncbi_project_name": "Riverbed sediment microbial communities from areas with no vegetation in Columbia River, Washington, USA - GW-RW N2_40_50", + "omics_type": { + "has_raw_value": "Metagenome" + }, + "principal_investigator": { + "has_raw_value": "James Stegen" + }, + "processing_institution": "JGI", + "type": "nmdc:OmicsProcessing", + "gold_sequencing_project_identifiers": [ + "gold:Gp0127638" + ], + "downstream_workflow_activity_records": [ + { + "_id": { + "$oid": "649b009d6bdd4fd20273c873" + }, + "has_input": [ + "nmdc:56b2d94789953adf1b4ed35f09f0edd4" + ], + "part_of": [ + "nmdc:mga0hjgc20" + ], + "git_url": "https://github.com/microbiomedata/metaG/releases/tag/0.1", + "has_output": [ + "nmdc:56ba2416c050decd6c16c618c1e4a752", + "nmdc:5c9398042e9ff608befa78e86597bdf0" + ], + "was_informed_by": "gold:Gp0127638", + "input_read_count": 21721428, + "output_read_bases": 2949961420, + "id": "nmdc:668844f5ea6cefdcb893db0bb6afc92a", + "execution_resource": "NERSC-Cori", + "input_read_bases": 3279935628, + "name": "Read QC Activity for nmdc:mga0hjgc20", + "output_read_count": 19723416, + "started_at_time": "2021-12-01T21:31:29Z", + "type": "nmdc:ReadQCAnalysisActivity", + "ended_at_time": "2021-12-02T20:49:51+00:00", + "output_data_objects": [ + { + "name": "Gp0127638_Filtered Reads", + "description": "Filtered Reads for Gp0127638", + "data_object_type": "Filtered Sequencing Reads", + "url": "https://data.microbiomedata.org/data/nmdc:mga0hjgc20/qa/nmdc_mga0hjgc20_filtered.fastq.gz", + "md5_checksum": "56ba2416c050decd6c16c618c1e4a752", + "id": "nmdc:56ba2416c050decd6c16c618c1e4a752", + "file_size_bytes": 1649318115 + }, + { + "name": "Gp0127638_Filtered Stats", + "description": "Filtered Stats for Gp0127638", + "data_object_type": "QC Statistics", + "url": "https://data.microbiomedata.org/data/nmdc:mga0hjgc20/qa/nmdc_mga0hjgc20_filterStats.txt", + "md5_checksum": "5c9398042e9ff608befa78e86597bdf0", + "id": "nmdc:5c9398042e9ff608befa78e86597bdf0", + "file_size_bytes": 283 + } + ] + }, + { + "_id": { + "$oid": "649b009bff710ae353f8cf35" + }, + "has_input": [ + "nmdc:56ba2416c050decd6c16c618c1e4a752" + ], + "git_url": "https://github.com/microbiomedata/metaG/releases/tag/0.1", + "has_output": [ + "nmdc:dbbd6ca6777b71d1fac4aae2cd947deb", + "nmdc:b6de56746a284f8226dd86817c8ae04e", + "nmdc:d9572e708af9f0a06e98cfddfb298359", + "nmdc:e9946f36795474182b7759d3d7532b57", + "nmdc:33ff1d85d17d763afc9e21e481cc10d2", + "nmdc:997a66f49a232750bd7132639f3387e7", + "nmdc:d3f604a59babf001839d38a617b62931", + "nmdc:3abfaa434ee1449cbbb69985e48488b4", + "nmdc:70c2fc1a2c7c0032528ff91ad1576465" + ], + "was_informed_by": "gold:Gp0127638", + "id": "nmdc:668844f5ea6cefdcb893db0bb6afc92a", + "execution_resource": "NERSC-Cori", + "name": "ReadBased Analysis Activity for nmdc:mga0hjgc20", + "started_at_time": "2021-12-01T21:31:29Z", + "type": "nmdc:ReadbasedAnalysis", + "ended_at_time": "2021-12-02T20:49:51+00:00", + "output_data_objects": [ + { + "name": "Gp0127638_Gottcha2 TSV report", + "description": "Gottcha2 TSV report for Gp0127638", + "url": "https://data.microbiomedata.org/data/nmdc:mga0hjgc20/ReadbasedAnalysis/nmdc_mga0hjgc20_gottcha2_report.tsv", + "md5_checksum": "dbbd6ca6777b71d1fac4aae2cd947deb", + "id": "nmdc:dbbd6ca6777b71d1fac4aae2cd947deb", + "file_size_bytes": 2025 + }, + { + "name": "Gp0127638_Gottcha2 full TSV report", + "description": "Gottcha2 full TSV report for Gp0127638", + "url": "https://data.microbiomedata.org/data/nmdc:mga0hjgc20/ReadbasedAnalysis/nmdc_mga0hjgc20_gottcha2_report_full.tsv", + "md5_checksum": "b6de56746a284f8226dd86817c8ae04e", + "id": "nmdc:b6de56746a284f8226dd86817c8ae04e", + "file_size_bytes": 655633 + }, + { + "name": "Gp0127638_Gottcha2 Krona HTML report", + "description": "Gottcha2 Krona HTML report for Gp0127638", + "data_object_type": "GOTTCHA2 Krona Plot", + "url": "https://data.microbiomedata.org/data/nmdc:mga0hjgc20/ReadbasedAnalysis/nmdc_mga0hjgc20_gottcha2_krona.html", + "md5_checksum": "d9572e708af9f0a06e98cfddfb298359", + "id": "nmdc:d9572e708af9f0a06e98cfddfb298359", + "file_size_bytes": 232133 + }, + { + "name": "Gp0127638_Centrifuge classification TSV report", + "description": "Centrifuge classification TSV report for Gp0127638", + "data_object_type": "Centrifuge Taxonomic Classification", + "url": "https://data.microbiomedata.org/data/nmdc:mga0hjgc20/ReadbasedAnalysis/nmdc_mga0hjgc20_centrifuge_classification.tsv", + "md5_checksum": "e9946f36795474182b7759d3d7532b57", + "id": "nmdc:e9946f36795474182b7759d3d7532b57", + "file_size_bytes": 1448205544 + }, + { + "name": "Gp0127638_Centrifuge TSV report", + "description": "Centrifuge TSV report for Gp0127638", + "data_object_type": "Centrifuge Classification Report", + "url": "https://data.microbiomedata.org/data/nmdc:mga0hjgc20/ReadbasedAnalysis/nmdc_mga0hjgc20_centrifuge_report.tsv", + "md5_checksum": "33ff1d85d17d763afc9e21e481cc10d2", + "id": "nmdc:33ff1d85d17d763afc9e21e481cc10d2", + "file_size_bytes": 253872 + }, + { + "name": "Gp0127638_Centrifuge Krona HTML report", + "description": "Centrifuge Krona HTML report for Gp0127638", + "data_object_type": "Centrifuge Krona Plot", + "url": "https://data.microbiomedata.org/data/nmdc:mga0hjgc20/ReadbasedAnalysis/nmdc_mga0hjgc20_centrifuge_krona.html", + "md5_checksum": "997a66f49a232750bd7132639f3387e7", + "id": "nmdc:997a66f49a232750bd7132639f3387e7", + "file_size_bytes": 2331772 + }, + { + "name": "Gp0127638_Kraken classification TSV report", + "description": "Kraken classification TSV report for Gp0127638", + "data_object_type": "Kraken2 Taxonomic Classification", + "url": "https://data.microbiomedata.org/data/nmdc:mga0hjgc20/ReadbasedAnalysis/nmdc_mga0hjgc20_kraken2_classification.tsv", + "md5_checksum": "d3f604a59babf001839d38a617b62931", + "id": "nmdc:d3f604a59babf001839d38a617b62931", + "file_size_bytes": 1157365410 + }, + { + "name": "Gp0127638_Kraken2 TSV report", + "description": "Kraken2 TSV report for Gp0127638", + "data_object_type": "Kraken2 Classification Report", + "url": "https://data.microbiomedata.org/data/nmdc:mga0hjgc20/ReadbasedAnalysis/nmdc_mga0hjgc20_kraken2_report.tsv", + "md5_checksum": "3abfaa434ee1449cbbb69985e48488b4", + "id": "nmdc:3abfaa434ee1449cbbb69985e48488b4", + "file_size_bytes": 621484 + }, + { + "name": "Gp0127638_Kraken2 Krona HTML report", + "description": "Kraken2 Krona HTML report for Gp0127638", + "data_object_type": "Kraken2 Krona Plot", + "url": "https://data.microbiomedata.org/data/nmdc:mga0hjgc20/ReadbasedAnalysis/nmdc_mga0hjgc20_kraken2_krona.html", + "md5_checksum": "70c2fc1a2c7c0032528ff91ad1576465", + "id": "nmdc:70c2fc1a2c7c0032528ff91ad1576465", + "file_size_bytes": 3896830 + } + ] + }, + { + "_id": { + "$oid": "61e719b5833bcf838a7010e1" + }, + "has_input": [ + "nmdc:56ba2416c050decd6c16c618c1e4a752" + ], + "part_of": [ + "nmdc:mga0hjgc20" + ], + "git_url": "https://github.com/microbiomedata/metaG/releases/tag/0.1", + "has_output": [ + "nmdc:dbbd6ca6777b71d1fac4aae2cd947deb", + "nmdc:b6de56746a284f8226dd86817c8ae04e", + "nmdc:d9572e708af9f0a06e98cfddfb298359", + "nmdc:e9946f36795474182b7759d3d7532b57", + "nmdc:33ff1d85d17d763afc9e21e481cc10d2", + "nmdc:997a66f49a232750bd7132639f3387e7", + "nmdc:d3f604a59babf001839d38a617b62931", + "nmdc:3abfaa434ee1449cbbb69985e48488b4", + "nmdc:70c2fc1a2c7c0032528ff91ad1576465" + ], + "was_informed_by": "gold:Gp0127638", + "id": "nmdc:668844f5ea6cefdcb893db0bb6afc92a", + "execution_resource": "NERSC-Cori", + "name": "ReadBased Analysis Activity for nmdc:mga0hjgc20", + "started_at_time": "2021-12-01T21:31:29Z", + "type": "nmdc:ReadbasedAnalysis", + "ended_at_time": "2021-12-02T20:49:51+00:00", + "output_data_objects": [ + { + "name": "Gp0127638_Gottcha2 TSV report", + "description": "Gottcha2 TSV report for Gp0127638", + "url": "https://data.microbiomedata.org/data/nmdc:mga0hjgc20/ReadbasedAnalysis/nmdc_mga0hjgc20_gottcha2_report.tsv", + "md5_checksum": "dbbd6ca6777b71d1fac4aae2cd947deb", + "id": "nmdc:dbbd6ca6777b71d1fac4aae2cd947deb", + "file_size_bytes": 2025 + }, + { + "name": "Gp0127638_Gottcha2 full TSV report", + "description": "Gottcha2 full TSV report for Gp0127638", + "url": "https://data.microbiomedata.org/data/nmdc:mga0hjgc20/ReadbasedAnalysis/nmdc_mga0hjgc20_gottcha2_report_full.tsv", + "md5_checksum": "b6de56746a284f8226dd86817c8ae04e", + "id": "nmdc:b6de56746a284f8226dd86817c8ae04e", + "file_size_bytes": 655633 + }, + { + "name": "Gp0127638_Gottcha2 Krona HTML report", + "description": "Gottcha2 Krona HTML report for Gp0127638", + "data_object_type": "GOTTCHA2 Krona Plot", + "url": "https://data.microbiomedata.org/data/nmdc:mga0hjgc20/ReadbasedAnalysis/nmdc_mga0hjgc20_gottcha2_krona.html", + "md5_checksum": "d9572e708af9f0a06e98cfddfb298359", + "id": "nmdc:d9572e708af9f0a06e98cfddfb298359", + "file_size_bytes": 232133 + }, + { + "name": "Gp0127638_Centrifuge classification TSV report", + "description": "Centrifuge classification TSV report for Gp0127638", + "data_object_type": "Centrifuge Taxonomic Classification", + "url": "https://data.microbiomedata.org/data/nmdc:mga0hjgc20/ReadbasedAnalysis/nmdc_mga0hjgc20_centrifuge_classification.tsv", + "md5_checksum": "e9946f36795474182b7759d3d7532b57", + "id": "nmdc:e9946f36795474182b7759d3d7532b57", + "file_size_bytes": 1448205544 + }, + { + "name": "Gp0127638_Centrifuge TSV report", + "description": "Centrifuge TSV report for Gp0127638", + "data_object_type": "Centrifuge Classification Report", + "url": "https://data.microbiomedata.org/data/nmdc:mga0hjgc20/ReadbasedAnalysis/nmdc_mga0hjgc20_centrifuge_report.tsv", + "md5_checksum": "33ff1d85d17d763afc9e21e481cc10d2", + "id": "nmdc:33ff1d85d17d763afc9e21e481cc10d2", + "file_size_bytes": 253872 + }, + { + "name": "Gp0127638_Centrifuge Krona HTML report", + "description": "Centrifuge Krona HTML report for Gp0127638", + "data_object_type": "Centrifuge Krona Plot", + "url": "https://data.microbiomedata.org/data/nmdc:mga0hjgc20/ReadbasedAnalysis/nmdc_mga0hjgc20_centrifuge_krona.html", + "md5_checksum": "997a66f49a232750bd7132639f3387e7", + "id": "nmdc:997a66f49a232750bd7132639f3387e7", + "file_size_bytes": 2331772 + }, + { + "name": "Gp0127638_Kraken classification TSV report", + "description": "Kraken classification TSV report for Gp0127638", + "data_object_type": "Kraken2 Taxonomic Classification", + "url": "https://data.microbiomedata.org/data/nmdc:mga0hjgc20/ReadbasedAnalysis/nmdc_mga0hjgc20_kraken2_classification.tsv", + "md5_checksum": "d3f604a59babf001839d38a617b62931", + "id": "nmdc:d3f604a59babf001839d38a617b62931", + "file_size_bytes": 1157365410 + }, + { + "name": "Gp0127638_Kraken2 TSV report", + "description": "Kraken2 TSV report for Gp0127638", + "data_object_type": "Kraken2 Classification Report", + "url": "https://data.microbiomedata.org/data/nmdc:mga0hjgc20/ReadbasedAnalysis/nmdc_mga0hjgc20_kraken2_report.tsv", + "md5_checksum": "3abfaa434ee1449cbbb69985e48488b4", + "id": "nmdc:3abfaa434ee1449cbbb69985e48488b4", + "file_size_bytes": 621484 + }, + { + "name": "Gp0127638_Kraken2 Krona HTML report", + "description": "Kraken2 Krona HTML report for Gp0127638", + "data_object_type": "Kraken2 Krona Plot", + "url": "https://data.microbiomedata.org/data/nmdc:mga0hjgc20/ReadbasedAnalysis/nmdc_mga0hjgc20_kraken2_krona.html", + "md5_checksum": "70c2fc1a2c7c0032528ff91ad1576465", + "id": "nmdc:70c2fc1a2c7c0032528ff91ad1576465", + "file_size_bytes": 3896830 + } + ] + }, + { + "_id": { + "$oid": "649b005f2ca5ee4adb139fac" + }, + "has_input": [ + "nmdc:56ba2416c050decd6c16c618c1e4a752" + ], + "part_of": [ + "nmdc:mga0hjgc20" + ], + "ctg_logsum": 141543, + "scaf_logsum": 141966, + "gap_pct": 0.00109, + "git_url": "https://github.com/microbiomedata/metaG/releases/tag/0.1", + "has_output": [ + "nmdc:5122503797ac0ed9694a6f4feecab955", + "nmdc:d7ee4628101b11bc5fb67d961a4e1a0a", + "nmdc:0944f2c0dd70a751117fb10d9a41fddc", + "nmdc:1917dcbbe1efcc2a57c511648a7f332e", + "nmdc:6420476f7e93425a68aa00b8e09cd6e7" + ], + "asm_score": 6.89, + "was_informed_by": "gold:Gp0127638", + "ctg_powsum": 15753, + "scaf_max": 48487, + "id": "nmdc:668844f5ea6cefdcb893db0bb6afc92a", + "scaf_powsum": 15801, + "execution_resource": "NERSC-Cori", + "contigs": 169698, + "name": "Assembly Activity for nmdc:mga0hjgc20", + "ctg_max": 48487, + "gc_std": 0.08917, + "gc_avg": 0.63213, + "contig_bp": 77783768, + "started_at_time": "2021-12-01T21:31:29Z", + "scaf_bp": 77784618, + "type": "nmdc:MetagenomeAssembly", + "scaffolds": 169622, + "ended_at_time": "2021-12-02T20:49:51+00:00", + "ctg_l50": 433, + "ctg_l90": 289, + "ctg_n50": 51455, + "ctg_n90": 144304, + "scaf_l50": 433, + "scaf_l90": 289, + "scaf_n50": 51437, + "scaf_n90": 144234, + "output_data_objects": [ + { + "name": "Gp0127638_Assembled contigs fasta", + "description": "Assembled contigs fasta for Gp0127638", + "data_object_type": "Assembly Contigs", + "url": "https://data.microbiomedata.org/data/nmdc:mga0hjgc20/assembly/nmdc_mga0hjgc20_contigs.fna", + "md5_checksum": "5122503797ac0ed9694a6f4feecab955", + "id": "nmdc:5122503797ac0ed9694a6f4feecab955", + "file_size_bytes": 84307064 + }, + { + "name": "Gp0127638_Assembled scaffold fasta", + "description": "Assembled scaffold fasta for Gp0127638", + "data_object_type": "Assembly Scaffolds", + "url": "https://data.microbiomedata.org/data/nmdc:mga0hjgc20/assembly/nmdc_mga0hjgc20_scaffolds.fna", + "md5_checksum": "d7ee4628101b11bc5fb67d961a4e1a0a", + "id": "nmdc:d7ee4628101b11bc5fb67d961a4e1a0a", + "file_size_bytes": 83796938 + }, + { + "name": "Gp0127638_Metagenome Contig Coverage Stats", + "description": "Metagenome Contig Coverage Stats for Gp0127638", + "url": "https://data.microbiomedata.org/data/nmdc:mga0hjgc20/assembly/nmdc_mga0hjgc20_covstats.txt", + "md5_checksum": "0944f2c0dd70a751117fb10d9a41fddc", + "id": "nmdc:0944f2c0dd70a751117fb10d9a41fddc", + "file_size_bytes": 13413799 + }, + { + "name": "Gp0127638_Assembled AGP file", + "description": "Assembled AGP file for Gp0127638", + "data_object_type": "Assembly AGP", + "url": "https://data.microbiomedata.org/data/nmdc:mga0hjgc20/assembly/nmdc_mga0hjgc20_assembly.agp", + "md5_checksum": "1917dcbbe1efcc2a57c511648a7f332e", + "id": "nmdc:1917dcbbe1efcc2a57c511648a7f332e", + "file_size_bytes": 12526116 + }, + { + "name": "Gp0127638_Metagenome Alignment BAM file", + "description": "Metagenome Alignment BAM file for Gp0127638", + "data_object_type": "Assembly Coverage BAM", + "url": "https://data.microbiomedata.org/data/nmdc:mga0hjgc20/assembly/nmdc_mga0hjgc20_pairedMapped_sorted.bam", + "md5_checksum": "6420476f7e93425a68aa00b8e09cd6e7", + "id": "nmdc:6420476f7e93425a68aa00b8e09cd6e7", + "file_size_bytes": 1810224630 + } + ] + }, + { + "_id": { + "$oid": "649b005bbf2caae0415ef9bc" + }, + "has_input": [ + "nmdc:5122503797ac0ed9694a6f4feecab955" + ], + "part_of": [ + "nmdc:mga0hjgc20" + ], + "git_url": "https://github.com/microbiomedata/metaG/releases/tag/0.1", + "has_output": [ + "nmdc:f56690d136c4dafdc1eaa64a21fd9210", + "nmdc:8be4e8ac2d00bf1d5b4863c36dc3678c", + "nmdc:41453202313c56e06b0cc00b5ee6c375", + "nmdc:e06bd74dce2e5b839b35ac1012d93ba4", + "nmdc:f2786d1f8a17bedd0104b01ec06ebfce", + "nmdc:37cb3fb060da091a84f1baa7ef3743fc", + "nmdc:34680897818585cefbef6e69109e7de4", + "nmdc:a00404838fbe9f846a704e1dbb14f2b2", + "nmdc:700dd121a0ac41e3fa8077d7330adae7", + "nmdc:e429651ae53a18b07d99880d09a19b26", + "nmdc:b22aab3cc1b9231102b23c31b418eff4", + "nmdc:ebb5a6a7ad1f14fd8cf2178ec59969ef" + ], + "was_informed_by": "gold:Gp0127638", + "id": "nmdc:668844f5ea6cefdcb893db0bb6afc92a", + "execution_resource": "NERSC-Cori", + "name": "Annotation Activity for nmdc:mga0hjgc20", + "started_at_time": "2021-12-01T21:31:29Z", + "type": "nmdc:MetagenomeAnnotationActivity", + "ended_at_time": "2021-12-02T20:49:51+00:00", + "output_data_objects": [ + { + "name": "Gp0127638_Protein FAA", + "description": "Protein FAA for Gp0127638", + "data_object_type": "Annotation Amino Acid FASTA", + "url": "https://data.microbiomedata.org/data/nmdc:mga0hjgc20/annotation/nmdc_mga0hjgc20_proteins.faa", + "md5_checksum": "f56690d136c4dafdc1eaa64a21fd9210", + "id": "nmdc:f56690d136c4dafdc1eaa64a21fd9210", + "file_size_bytes": 49236514 + }, + { + "name": "Gp0127638_Structural annotation GFF file", + "description": "Structural annotation GFF file for Gp0127638", + "data_object_type": "Structural Annotation GFF", + "url": "https://data.microbiomedata.org/data/nmdc:mga0hjgc20/annotation/nmdc_mga0hjgc20_structural_annotation.gff", + "md5_checksum": "8be4e8ac2d00bf1d5b4863c36dc3678c", + "id": "nmdc:8be4e8ac2d00bf1d5b4863c36dc3678c", + "file_size_bytes": 2519 + }, + { + "name": "Gp0127638_Functional annotation GFF file", + "description": "Functional annotation GFF file for Gp0127638", + "data_object_type": "Functional Annotation GFF", + "url": "https://data.microbiomedata.org/data/nmdc:mga0hjgc20/annotation/nmdc_mga0hjgc20_functional_annotation.gff", + "md5_checksum": "41453202313c56e06b0cc00b5ee6c375", + "id": "nmdc:41453202313c56e06b0cc00b5ee6c375", + "file_size_bytes": 56761027 + }, + { + "name": "Gp0127638_KO TSV file", + "description": "KO TSV file for Gp0127638", + "data_object_type": "Annotation KEGG Orthology", + "url": "https://data.microbiomedata.org/data/nmdc:mga0hjgc20/annotation/nmdc_mga0hjgc20_ko.tsv", + "md5_checksum": "e06bd74dce2e5b839b35ac1012d93ba4", + "id": "nmdc:e06bd74dce2e5b839b35ac1012d93ba4", + "file_size_bytes": 6728487 + }, + { + "name": "Gp0127638_EC TSV file", + "description": "EC TSV file for Gp0127638", + "data_object_type": "Annotation Enzyme Commission", + "url": "https://data.microbiomedata.org/data/nmdc:mga0hjgc20/annotation/nmdc_mga0hjgc20_ec.tsv", + "md5_checksum": "f2786d1f8a17bedd0104b01ec06ebfce", + "id": "nmdc:f2786d1f8a17bedd0104b01ec06ebfce", + "file_size_bytes": 4522678 + }, + { + "name": "Gp0127638_COG GFF file", + "description": "COG GFF file for Gp0127638", + "url": "https://data.microbiomedata.org/data/nmdc:mga0hjgc20/annotation/nmdc_mga0hjgc20_cog.gff", + "md5_checksum": "37cb3fb060da091a84f1baa7ef3743fc", + "id": "nmdc:37cb3fb060da091a84f1baa7ef3743fc", + "file_size_bytes": 33992392 + }, + { + "name": "Gp0127638_PFAM GFF file", + "description": "PFAM GFF file for Gp0127638", + "url": "https://data.microbiomedata.org/data/nmdc:mga0hjgc20/annotation/nmdc_mga0hjgc20_pfam.gff", + "md5_checksum": "34680897818585cefbef6e69109e7de4", + "id": "nmdc:34680897818585cefbef6e69109e7de4", + "file_size_bytes": 25203872 + }, + { + "name": "Gp0127638_TigrFam GFF file", + "description": "TigrFam GFF file for Gp0127638", + "url": "https://data.microbiomedata.org/data/nmdc:mga0hjgc20/annotation/nmdc_mga0hjgc20_tigrfam.gff", + "md5_checksum": "a00404838fbe9f846a704e1dbb14f2b2", + "id": "nmdc:a00404838fbe9f846a704e1dbb14f2b2", + "file_size_bytes": 2852587 + }, + { + "name": "Gp0127638_SMART GFF file", + "description": "SMART GFF file for Gp0127638", + "url": "https://data.microbiomedata.org/data/nmdc:mga0hjgc20/annotation/nmdc_mga0hjgc20_smart.gff", + "md5_checksum": "700dd121a0ac41e3fa8077d7330adae7", + "id": "nmdc:700dd121a0ac41e3fa8077d7330adae7", + "file_size_bytes": 7723231 + }, + { + "name": "Gp0127638_SuperFam GFF file", + "description": "SuperFam GFF file for Gp0127638", + "url": "https://data.microbiomedata.org/data/nmdc:mga0hjgc20/annotation/nmdc_mga0hjgc20_supfam.gff", + "md5_checksum": "e429651ae53a18b07d99880d09a19b26", + "id": "nmdc:e429651ae53a18b07d99880d09a19b26", + "file_size_bytes": 42064836 + }, + { + "name": "Gp0127638_Cath FunFam GFF file", + "description": "Cath FunFam GFF file for Gp0127638", + "url": "https://data.microbiomedata.org/data/nmdc:mga0hjgc20/annotation/nmdc_mga0hjgc20_cath_funfam.gff", + "md5_checksum": "b22aab3cc1b9231102b23c31b418eff4", + "id": "nmdc:b22aab3cc1b9231102b23c31b418eff4", + "file_size_bytes": 32005228 + }, + { + "name": "Gp0127638_KO_EC GFF file", + "description": "KO_EC GFF file for Gp0127638", + "url": "https://data.microbiomedata.org/data/nmdc:mga0hjgc20/annotation/nmdc_mga0hjgc20_ko_ec.gff", + "md5_checksum": "ebb5a6a7ad1f14fd8cf2178ec59969ef", + "id": "nmdc:ebb5a6a7ad1f14fd8cf2178ec59969ef", + "file_size_bytes": 21405596 + } + ] + }, + { + "_id": { + "$oid": "649b0052ec087f6bbab3471d" + }, + "has_input": [ + "nmdc:5122503797ac0ed9694a6f4feecab955", + "nmdc:6420476f7e93425a68aa00b8e09cd6e7", + "nmdc:41453202313c56e06b0cc00b5ee6c375" + ], + "too_short_contig_num": 162130, + "part_of": [ + "nmdc:mga0hjgc20" + ], + "binned_contig_num": 189, + "git_url": "https://github.com/microbiomedata/metaG/releases/tag/0.1", + "has_output": [ + "nmdc:dcdd7e33e92d3658fe68056f21b57f5d", + "nmdc:8ca8e2250dc68643e937163323f2a826" + ], + "was_informed_by": "gold:Gp0127638", + "input_contig_num": 169697, + "id": "nmdc:668844f5ea6cefdcb893db0bb6afc92a", + "execution_resource": "NERSC-Cori", + "name": "MAGs Analysis Activity for nmdc:mga0hjgc20", + "mags_list": [ + { + "number_of_contig": 189, + "completeness": 73.5, + "bin_name": "bins.1", + "gene_count": 2020, + "bin_quality": "MQ", + "gtdbtk_species": "", + "gtdbtk_order": "Nitrososphaerales", + "num_16s": 0, + "gtdbtk_family": "Nitrososphaeraceae", + "gtdbtk_domain": "Archaea", + "contamination": 0.97, + "gtdbtk_class": "Nitrososphaeria", + "gtdbtk_phylum": "Crenarchaeota", + "num_5s": 0, + "num_23s": 0, + "gtdbtk_genus": "", + "num_t_rna": 37 + } + ], + "unbinned_contig_num": 7378, + "started_at_time": "2021-12-01T21:31:29Z", + "type": "nmdc:MAGsAnalysisActivity", + "ended_at_time": "2021-12-02T20:49:51+00:00", + "output_data_objects": [ + { + "name": "Gp0127638_metabat2 bin checkm quality assessment result", + "description": "metabat2 bin checkm quality assessment result for Gp0127638", + "data_object_type": "CheckM Statistics", + "url": "https://data.microbiomedata.org/data/nmdc:mga0hjgc20/MAGs/nmdc_mga0hjgc20_checkm_qa.out", + "md5_checksum": "dcdd7e33e92d3658fe68056f21b57f5d", + "id": "nmdc:dcdd7e33e92d3658fe68056f21b57f5d", + "file_size_bytes": 760 + }, + { + "name": "Gp0127638_high-quality and medium-quality bins", + "description": "high-quality and medium-quality bins for Gp0127638", + "data_object_type": "Metagenome Bins", + "url": "https://data.microbiomedata.org/data/nmdc:mga0hjgc20/MAGs/nmdc_mga0hjgc20_hqmq_bin.zip", + "md5_checksum": "8ca8e2250dc68643e937163323f2a826", + "id": "nmdc:8ca8e2250dc68643e937163323f2a826", + "file_size_bytes": 508443 + } + ] + } + ] + }, + { + "_id": { + "$oid": "649b009773e8249959349b4a" + }, + "id": "nmdc:omprc-11-t0xjjc50", + "name": "Sand microcosm microbial communities from a hyporheic zone in Columbia River, Washington, USA - GW-RW T4_10-June-14", + "description": "Sterilized sand packs were incubated back in the ground and collected at time point T4.", + "has_input": [ + "nmdc:bsm-11-vg9vy382" + ], + "has_output": [ + "jgi:55d7402a0d8785342fcf7e3b" + ], + "part_of": [ + "nmdc:sty-11-aygzgv51" + ], + "add_date": "2015-05-28", + "mod_date": "2021-06-15", + "ncbi_project_name": "Sand microcosm microbial communities from a hyporheic zone in Columbia River, Washington, USA - GW-RW T4_10-June-14", + "omics_type": { + "has_raw_value": "Metagenome" + }, + "principal_investigator": { + "has_raw_value": "James Stegen" + }, + "processing_institution": "JGI", + "type": "nmdc:OmicsProcessing", + "gold_sequencing_project_identifiers": [ + "gold:Gp0115670" + ], + "downstream_workflow_activity_records": [ + { + "_id": { + "$oid": "649b009d6bdd4fd20273c888" + }, + "has_input": [ + "nmdc:aa477a857eb9da284635b774477f3f54" + ], + "part_of": [ + "nmdc:mga0d7pj22" + ], + "git_url": "https://github.com/microbiomedata/metaG/releases/tag/0.1", + "has_output": [ + "nmdc:7f6b353300583c60d2d668880b4134cd", + "nmdc:a4f65d101293fa4345cd865f86597464" + ], + "was_informed_by": "gold:Gp0115670", + "input_read_count": 36554212, + "output_read_bases": 5044444014, + "id": "nmdc:f1cbe3cc181b2e689272b4223b68c15f", + "execution_resource": "NERSC-Cori", + "input_read_bases": 5519686012, + "name": "Read QC Activity for nmdc:mga0d7pj22", + "output_read_count": 33663942, + "started_at_time": "2021-10-11T02:28:43Z", + "type": "nmdc:ReadQCAnalysisActivity", + "ended_at_time": "2021-10-11T05:55:52+00:00", + "output_data_objects": [ + { + "name": "Gp0115670_Filtered Reads", + "description": "Filtered Reads for Gp0115670", + "data_object_type": "Filtered Sequencing Reads", + "url": "https://data.microbiomedata.org/data/nmdc:mga0d7pj22/qa/nmdc_mga0d7pj22_filtered.fastq.gz", + "md5_checksum": "7f6b353300583c60d2d668880b4134cd", + "id": "nmdc:7f6b353300583c60d2d668880b4134cd", + "file_size_bytes": 3012174785 + }, + { + "name": "Gp0115670_Filtered Stats", + "description": "Filtered Stats for Gp0115670", + "data_object_type": "QC Statistics", + "url": "https://data.microbiomedata.org/data/nmdc:mga0d7pj22/qa/nmdc_mga0d7pj22_filterStats.txt", + "md5_checksum": "a4f65d101293fa4345cd865f86597464", + "id": "nmdc:a4f65d101293fa4345cd865f86597464", + "file_size_bytes": 291 + } + ] + }, + { + "_id": { + "$oid": "649b009bff710ae353f8cf4d" + }, + "has_input": [ + "nmdc:7f6b353300583c60d2d668880b4134cd" + ], + "git_url": "https://github.com/microbiomedata/metaG/releases/tag/0.1", + "has_output": [ + "nmdc:e316502f9e7a78c9db3996ef832aa9d7", + "nmdc:1ac2be77491e7d425da1d62f69f1508d", + "nmdc:de5b15fa9d3bdbc3abcc2475ee351323", + "nmdc:a9bbb74833404a2bf3bbd05e83a7a0ed", + "nmdc:c065784bed2b2495d512af93d05967de", + "nmdc:a34dbcbdebae0861e41c09e7b9a5d9f0", + "nmdc:b2122f5a910a1d4ae8a62956d1cd731c", + "nmdc:8a26d8496a70f4777be0e1237092e44c", + "nmdc:694b83f0b6f599948d4248dd48dd9ba9" + ], + "was_informed_by": "gold:Gp0115670", + "id": "nmdc:f1cbe3cc181b2e689272b4223b68c15f", + "execution_resource": "NERSC-Cori", + "name": "ReadBased Analysis Activity for nmdc:mga0d7pj22", + "started_at_time": "2021-10-11T02:28:43Z", + "type": "nmdc:ReadbasedAnalysis", + "ended_at_time": "2021-10-11T05:55:52+00:00", + "output_data_objects": [ + { + "name": "Gp0115670_Gottcha2 TSV report", + "description": "Gottcha2 TSV report for Gp0115670", + "url": "https://data.microbiomedata.org/data/nmdc:mga0d7pj22/ReadbasedAnalysis/nmdc_mga0d7pj22_gottcha2_report.tsv", + "md5_checksum": "e316502f9e7a78c9db3996ef832aa9d7", + "id": "nmdc:e316502f9e7a78c9db3996ef832aa9d7", + "file_size_bytes": 13758 + }, + { + "name": "Gp0115670_Gottcha2 full TSV report", + "description": "Gottcha2 full TSV report for Gp0115670", + "url": "https://data.microbiomedata.org/data/nmdc:mga0d7pj22/ReadbasedAnalysis/nmdc_mga0d7pj22_gottcha2_report_full.tsv", + "md5_checksum": "1ac2be77491e7d425da1d62f69f1508d", + "id": "nmdc:1ac2be77491e7d425da1d62f69f1508d", + "file_size_bytes": 1116084 + }, + { + "name": "Gp0115670_Gottcha2 Krona HTML report", + "description": "Gottcha2 Krona HTML report for Gp0115670", + "data_object_type": "GOTTCHA2 Krona Plot", + "url": "https://data.microbiomedata.org/data/nmdc:mga0d7pj22/ReadbasedAnalysis/nmdc_mga0d7pj22_gottcha2_krona.html", + "md5_checksum": "de5b15fa9d3bdbc3abcc2475ee351323", + "id": "nmdc:de5b15fa9d3bdbc3abcc2475ee351323", + "file_size_bytes": 268542 + }, + { + "name": "Gp0115670_Centrifuge classification TSV report", + "description": "Centrifuge classification TSV report for Gp0115670", + "data_object_type": "Centrifuge Taxonomic Classification", + "url": "https://data.microbiomedata.org/data/nmdc:mga0d7pj22/ReadbasedAnalysis/nmdc_mga0d7pj22_centrifuge_classification.tsv", + "md5_checksum": "a9bbb74833404a2bf3bbd05e83a7a0ed", + "id": "nmdc:a9bbb74833404a2bf3bbd05e83a7a0ed", + "file_size_bytes": 2458475116 + }, + { + "name": "Gp0115670_Centrifuge TSV report", + "description": "Centrifuge TSV report for Gp0115670", + "data_object_type": "Centrifuge Classification Report", + "url": "https://data.microbiomedata.org/data/nmdc:mga0d7pj22/ReadbasedAnalysis/nmdc_mga0d7pj22_centrifuge_report.tsv", + "md5_checksum": "c065784bed2b2495d512af93d05967de", + "id": "nmdc:c065784bed2b2495d512af93d05967de", + "file_size_bytes": 261692 + }, + { + "name": "Gp0115670_Centrifuge Krona HTML report", + "description": "Centrifuge Krona HTML report for Gp0115670", + "data_object_type": "Centrifuge Krona Plot", + "url": "https://data.microbiomedata.org/data/nmdc:mga0d7pj22/ReadbasedAnalysis/nmdc_mga0d7pj22_centrifuge_krona.html", + "md5_checksum": "a34dbcbdebae0861e41c09e7b9a5d9f0", + "id": "nmdc:a34dbcbdebae0861e41c09e7b9a5d9f0", + "file_size_bytes": 2343355 + }, + { + "name": "Gp0115670_Kraken classification TSV report", + "description": "Kraken classification TSV report for Gp0115670", + "data_object_type": "Kraken2 Taxonomic Classification", + "url": "https://data.microbiomedata.org/data/nmdc:mga0d7pj22/ReadbasedAnalysis/nmdc_mga0d7pj22_kraken2_classification.tsv", + "md5_checksum": "b2122f5a910a1d4ae8a62956d1cd731c", + "id": "nmdc:b2122f5a910a1d4ae8a62956d1cd731c", + "file_size_bytes": 2019980511 + }, + { + "name": "Gp0115670_Kraken2 TSV report", + "description": "Kraken2 TSV report for Gp0115670", + "data_object_type": "Kraken2 Classification Report", + "url": "https://data.microbiomedata.org/data/nmdc:mga0d7pj22/ReadbasedAnalysis/nmdc_mga0d7pj22_kraken2_report.tsv", + "md5_checksum": "8a26d8496a70f4777be0e1237092e44c", + "id": "nmdc:8a26d8496a70f4777be0e1237092e44c", + "file_size_bytes": 694029 + }, + { + "name": "Gp0115670_Kraken2 Krona HTML report", + "description": "Kraken2 Krona HTML report for Gp0115670", + "data_object_type": "Kraken2 Krona Plot", + "url": "https://data.microbiomedata.org/data/nmdc:mga0d7pj22/ReadbasedAnalysis/nmdc_mga0d7pj22_kraken2_krona.html", + "md5_checksum": "694b83f0b6f599948d4248dd48dd9ba9", + "id": "nmdc:694b83f0b6f599948d4248dd48dd9ba9", + "file_size_bytes": 4190653 + } + ] + }, + { + "_id": { + "$oid": "61e71a12833bcf838a701ba9" + }, + "has_input": [ + "nmdc:7f6b353300583c60d2d668880b4134cd" + ], + "part_of": [ + "nmdc:mga0d7pj22" + ], + "git_url": "https://github.com/microbiomedata/metaG/releases/tag/0.1", + "has_output": [ + "nmdc:e316502f9e7a78c9db3996ef832aa9d7", + "nmdc:1ac2be77491e7d425da1d62f69f1508d", + "nmdc:de5b15fa9d3bdbc3abcc2475ee351323", + "nmdc:a9bbb74833404a2bf3bbd05e83a7a0ed", + "nmdc:c065784bed2b2495d512af93d05967de", + "nmdc:a34dbcbdebae0861e41c09e7b9a5d9f0", + "nmdc:b2122f5a910a1d4ae8a62956d1cd731c", + "nmdc:8a26d8496a70f4777be0e1237092e44c", + "nmdc:694b83f0b6f599948d4248dd48dd9ba9" + ], + "was_informed_by": "gold:Gp0115670", + "id": "nmdc:f1cbe3cc181b2e689272b4223b68c15f", + "execution_resource": "NERSC-Cori", + "name": "ReadBased Analysis Activity for nmdc:mga0d7pj22", + "started_at_time": "2021-10-11T02:28:43Z", + "type": "nmdc:ReadbasedAnalysis", + "ended_at_time": "2021-10-11T05:55:52+00:00", + "output_data_objects": [ + { + "name": "Gp0115670_Gottcha2 TSV report", + "description": "Gottcha2 TSV report for Gp0115670", + "url": "https://data.microbiomedata.org/data/nmdc:mga0d7pj22/ReadbasedAnalysis/nmdc_mga0d7pj22_gottcha2_report.tsv", + "md5_checksum": "e316502f9e7a78c9db3996ef832aa9d7", + "id": "nmdc:e316502f9e7a78c9db3996ef832aa9d7", + "file_size_bytes": 13758 + }, + { + "name": "Gp0115670_Gottcha2 full TSV report", + "description": "Gottcha2 full TSV report for Gp0115670", + "url": "https://data.microbiomedata.org/data/nmdc:mga0d7pj22/ReadbasedAnalysis/nmdc_mga0d7pj22_gottcha2_report_full.tsv", + "md5_checksum": "1ac2be77491e7d425da1d62f69f1508d", + "id": "nmdc:1ac2be77491e7d425da1d62f69f1508d", + "file_size_bytes": 1116084 + }, + { + "name": "Gp0115670_Gottcha2 Krona HTML report", + "description": "Gottcha2 Krona HTML report for Gp0115670", + "data_object_type": "GOTTCHA2 Krona Plot", + "url": "https://data.microbiomedata.org/data/nmdc:mga0d7pj22/ReadbasedAnalysis/nmdc_mga0d7pj22_gottcha2_krona.html", + "md5_checksum": "de5b15fa9d3bdbc3abcc2475ee351323", + "id": "nmdc:de5b15fa9d3bdbc3abcc2475ee351323", + "file_size_bytes": 268542 + }, + { + "name": "Gp0115670_Centrifuge classification TSV report", + "description": "Centrifuge classification TSV report for Gp0115670", + "data_object_type": "Centrifuge Taxonomic Classification", + "url": "https://data.microbiomedata.org/data/nmdc:mga0d7pj22/ReadbasedAnalysis/nmdc_mga0d7pj22_centrifuge_classification.tsv", + "md5_checksum": "a9bbb74833404a2bf3bbd05e83a7a0ed", + "id": "nmdc:a9bbb74833404a2bf3bbd05e83a7a0ed", + "file_size_bytes": 2458475116 + }, + { + "name": "Gp0115670_Centrifuge TSV report", + "description": "Centrifuge TSV report for Gp0115670", + "data_object_type": "Centrifuge Classification Report", + "url": "https://data.microbiomedata.org/data/nmdc:mga0d7pj22/ReadbasedAnalysis/nmdc_mga0d7pj22_centrifuge_report.tsv", + "md5_checksum": "c065784bed2b2495d512af93d05967de", + "id": "nmdc:c065784bed2b2495d512af93d05967de", + "file_size_bytes": 261692 + }, + { + "name": "Gp0115670_Centrifuge Krona HTML report", + "description": "Centrifuge Krona HTML report for Gp0115670", + "data_object_type": "Centrifuge Krona Plot", + "url": "https://data.microbiomedata.org/data/nmdc:mga0d7pj22/ReadbasedAnalysis/nmdc_mga0d7pj22_centrifuge_krona.html", + "md5_checksum": "a34dbcbdebae0861e41c09e7b9a5d9f0", + "id": "nmdc:a34dbcbdebae0861e41c09e7b9a5d9f0", + "file_size_bytes": 2343355 + }, + { + "name": "Gp0115670_Kraken classification TSV report", + "description": "Kraken classification TSV report for Gp0115670", + "data_object_type": "Kraken2 Taxonomic Classification", + "url": "https://data.microbiomedata.org/data/nmdc:mga0d7pj22/ReadbasedAnalysis/nmdc_mga0d7pj22_kraken2_classification.tsv", + "md5_checksum": "b2122f5a910a1d4ae8a62956d1cd731c", + "id": "nmdc:b2122f5a910a1d4ae8a62956d1cd731c", + "file_size_bytes": 2019980511 + }, + { + "name": "Gp0115670_Kraken2 TSV report", + "description": "Kraken2 TSV report for Gp0115670", + "data_object_type": "Kraken2 Classification Report", + "url": "https://data.microbiomedata.org/data/nmdc:mga0d7pj22/ReadbasedAnalysis/nmdc_mga0d7pj22_kraken2_report.tsv", + "md5_checksum": "8a26d8496a70f4777be0e1237092e44c", + "id": "nmdc:8a26d8496a70f4777be0e1237092e44c", + "file_size_bytes": 694029 + }, + { + "name": "Gp0115670_Kraken2 Krona HTML report", + "description": "Kraken2 Krona HTML report for Gp0115670", + "data_object_type": "Kraken2 Krona Plot", + "url": "https://data.microbiomedata.org/data/nmdc:mga0d7pj22/ReadbasedAnalysis/nmdc_mga0d7pj22_kraken2_krona.html", + "md5_checksum": "694b83f0b6f599948d4248dd48dd9ba9", + "id": "nmdc:694b83f0b6f599948d4248dd48dd9ba9", + "file_size_bytes": 4190653 + } + ] + }, + { + "_id": { + "$oid": "649b005f2ca5ee4adb139fc0" + }, + "has_input": [ + "nmdc:7f6b353300583c60d2d668880b4134cd" + ], + "part_of": [ + "nmdc:mga0d7pj22" + ], + "ctg_logsum": 272574, + "scaf_logsum": 274450, + "gap_pct": 0.00346, + "git_url": "https://github.com/microbiomedata/metaG/releases/tag/0.1", + "has_output": [ + "nmdc:975cdb0a18df949be4efb80d1dc4ef0b", + "nmdc:1dfaed4da055c5fd4226abe08bd91db9", + "nmdc:8a749340eefc40901a22a0ef603bc803", + "nmdc:ad027e4c3ca67907154c03feeebbd97b", + "nmdc:c4f2407273babd894282d4d0f20be5d1" + ], + "asm_score": 12.57, + "was_informed_by": "gold:Gp0115670", + "ctg_powsum": 33596, + "scaf_max": 211520, + "id": "nmdc:f1cbe3cc181b2e689272b4223b68c15f", + "scaf_powsum": 33865, + "execution_resource": "NERSC-Cori", + "contigs": 152605, + "name": "Assembly Activity for nmdc:mga0d7pj22", + "ctg_max": 211520, + "gc_std": 0.125, + "contig_bp": 79563543, + "gc_avg": 0.57036, + "started_at_time": "2021-10-11T02:28:43Z", + "scaf_bp": 79566293, + "type": "nmdc:MetagenomeAssembly", + "scaffolds": 152330, + "ended_at_time": "2021-10-11T05:55:52+00:00", + "ctg_l50": 492, + "ctg_l90": 290, + "ctg_n50": 35595, + "ctg_n90": 126332, + "scaf_l50": 493, + "scaf_l90": 290, + "scaf_n50": 35340, + "scaf_n90": 126070, + "scaf_l_gt50k": 1744421, + "scaf_n_gt50k": 21, + "scaf_pct_gt50k": 2.192412, + "output_data_objects": [ + { + "name": "Gp0115670_Assembled contigs fasta", + "description": "Assembled contigs fasta for Gp0115670", + "data_object_type": "Assembly Contigs", + "url": "https://data.microbiomedata.org/data/nmdc:mga0d7pj22/assembly/nmdc_mga0d7pj22_contigs.fna", + "md5_checksum": "975cdb0a18df949be4efb80d1dc4ef0b", + "id": "nmdc:975cdb0a18df949be4efb80d1dc4ef0b", + "file_size_bytes": 85578260 + }, + { + "name": "Gp0115670_Assembled scaffold fasta", + "description": "Assembled scaffold fasta for Gp0115670", + "data_object_type": "Assembly Scaffolds", + "url": "https://data.microbiomedata.org/data/nmdc:mga0d7pj22/assembly/nmdc_mga0d7pj22_scaffolds.fna", + "md5_checksum": "1dfaed4da055c5fd4226abe08bd91db9", + "id": "nmdc:1dfaed4da055c5fd4226abe08bd91db9", + "file_size_bytes": 85115954 + }, + { + "name": "Gp0115670_Metagenome Contig Coverage Stats", + "description": "Metagenome Contig Coverage Stats for Gp0115670", + "url": "https://data.microbiomedata.org/data/nmdc:mga0d7pj22/assembly/nmdc_mga0d7pj22_covstats.txt", + "md5_checksum": "8a749340eefc40901a22a0ef603bc803", + "id": "nmdc:8a749340eefc40901a22a0ef603bc803", + "file_size_bytes": 12068883 + }, + { + "name": "Gp0115670_Assembled AGP file", + "description": "Assembled AGP file for Gp0115670", + "data_object_type": "Assembly AGP", + "url": "https://data.microbiomedata.org/data/nmdc:mga0d7pj22/assembly/nmdc_mga0d7pj22_assembly.agp", + "md5_checksum": "ad027e4c3ca67907154c03feeebbd97b", + "id": "nmdc:ad027e4c3ca67907154c03feeebbd97b", + "file_size_bytes": 11264235 + }, + { + "name": "Gp0115670_Metagenome Alignment BAM file", + "description": "Metagenome Alignment BAM file for Gp0115670", + "data_object_type": "Assembly Coverage BAM", + "url": "https://data.microbiomedata.org/data/nmdc:mga0d7pj22/assembly/nmdc_mga0d7pj22_pairedMapped_sorted.bam", + "md5_checksum": "c4f2407273babd894282d4d0f20be5d1", + "id": "nmdc:c4f2407273babd894282d4d0f20be5d1", + "file_size_bytes": 3245960211 + } + ] + }, + { + "_id": { + "$oid": "649b005bbf2caae0415ef9cf" + }, + "has_input": [ + "nmdc:975cdb0a18df949be4efb80d1dc4ef0b" + ], + "part_of": [ + "nmdc:mga0d7pj22" + ], + "git_url": "https://github.com/microbiomedata/metaG/releases/tag/0.1", + "has_output": [ + "nmdc:21230aff7bb5b266fb544905f9ac5ce2", + "nmdc:91c5cc265ef61ab83111a5bc9462e8b2", + "nmdc:0bc4d8b8ef11724c3d7e728b0e8e0ea5", + "nmdc:811910b7d8c300befddd039e833b0453", + "nmdc:9ed55d9535d1592866a66e9d5cd936a2", + "nmdc:a127efaa423e6dd6d24d7ab67cc2124a", + "nmdc:4b56646de8c37278beaaf9797e4ddf2f", + "nmdc:53a0873376e22fef62f2740f6afead21", + "nmdc:36748318682076112ba81283c8bc767a", + "nmdc:5dd32385b351847f23ec4eac63eb70ff", + "nmdc:95076052a4d5d57e1ed0c7699e4f5472", + "nmdc:6ae89cc4b2fb7d09614c106d3358be27" + ], + "was_informed_by": "gold:Gp0115670", + "id": "nmdc:f1cbe3cc181b2e689272b4223b68c15f", + "execution_resource": "NERSC-Cori", + "name": "Annotation Activity for nmdc:mga0d7pj22", + "started_at_time": "2021-10-11T02:28:43Z", + "type": "nmdc:MetagenomeAnnotationActivity", + "ended_at_time": "2021-10-11T05:55:52+00:00", + "output_data_objects": [ + { + "name": "Gp0115670_Protein FAA", + "description": "Protein FAA for Gp0115670", + "data_object_type": "Annotation Amino Acid FASTA", + "url": "https://data.microbiomedata.org/data/nmdc:mga0d7pj22/annotation/nmdc_mga0d7pj22_proteins.faa", + "md5_checksum": "21230aff7bb5b266fb544905f9ac5ce2", + "id": "nmdc:21230aff7bb5b266fb544905f9ac5ce2", + "file_size_bytes": 46061226 + }, + { + "name": "Gp0115670_Structural annotation GFF file", + "description": "Structural annotation GFF file for Gp0115670", + "data_object_type": "Structural Annotation GFF", + "url": "https://data.microbiomedata.org/data/nmdc:mga0d7pj22/annotation/nmdc_mga0d7pj22_structural_annotation.gff", + "md5_checksum": "91c5cc265ef61ab83111a5bc9462e8b2", + "id": "nmdc:91c5cc265ef61ab83111a5bc9462e8b2", + "file_size_bytes": 2769 + }, + { + "name": "Gp0115670_Functional annotation GFF file", + "description": "Functional annotation GFF file for Gp0115670", + "data_object_type": "Functional Annotation GFF", + "url": "https://data.microbiomedata.org/data/nmdc:mga0d7pj22/annotation/nmdc_mga0d7pj22_functional_annotation.gff", + "md5_checksum": "0bc4d8b8ef11724c3d7e728b0e8e0ea5", + "id": "nmdc:0bc4d8b8ef11724c3d7e728b0e8e0ea5", + "file_size_bytes": 50449176 + }, + { + "name": "Gp0115670_KO TSV file", + "description": "KO TSV file for Gp0115670", + "data_object_type": "Annotation KEGG Orthology", + "url": "https://data.microbiomedata.org/data/nmdc:mga0d7pj22/annotation/nmdc_mga0d7pj22_ko.tsv", + "md5_checksum": "811910b7d8c300befddd039e833b0453", + "id": "nmdc:811910b7d8c300befddd039e833b0453", + "file_size_bytes": 6653168 + }, + { + "name": "Gp0115670_EC TSV file", + "description": "EC TSV file for Gp0115670", + "data_object_type": "Annotation Enzyme Commission", + "url": "https://data.microbiomedata.org/data/nmdc:mga0d7pj22/annotation/nmdc_mga0d7pj22_ec.tsv", + "md5_checksum": "9ed55d9535d1592866a66e9d5cd936a2", + "id": "nmdc:9ed55d9535d1592866a66e9d5cd936a2", + "file_size_bytes": 4232890 + }, + { + "name": "Gp0115670_COG GFF file", + "description": "COG GFF file for Gp0115670", + "url": "https://data.microbiomedata.org/data/nmdc:mga0d7pj22/annotation/nmdc_mga0d7pj22_cog.gff", + "md5_checksum": "a127efaa423e6dd6d24d7ab67cc2124a", + "id": "nmdc:a127efaa423e6dd6d24d7ab67cc2124a", + "file_size_bytes": 28376544 + }, + { + "name": "Gp0115670_PFAM GFF file", + "description": "PFAM GFF file for Gp0115670", + "url": "https://data.microbiomedata.org/data/nmdc:mga0d7pj22/annotation/nmdc_mga0d7pj22_pfam.gff", + "md5_checksum": "4b56646de8c37278beaaf9797e4ddf2f", + "id": "nmdc:4b56646de8c37278beaaf9797e4ddf2f", + "file_size_bytes": 22850790 + }, + { + "name": "Gp0115670_TigrFam GFF file", + "description": "TigrFam GFF file for Gp0115670", + "url": "https://data.microbiomedata.org/data/nmdc:mga0d7pj22/annotation/nmdc_mga0d7pj22_tigrfam.gff", + "md5_checksum": "53a0873376e22fef62f2740f6afead21", + "id": "nmdc:53a0873376e22fef62f2740f6afead21", + "file_size_bytes": 3099434 + }, + { + "name": "Gp0115670_SMART GFF file", + "description": "SMART GFF file for Gp0115670", + "url": "https://data.microbiomedata.org/data/nmdc:mga0d7pj22/annotation/nmdc_mga0d7pj22_smart.gff", + "md5_checksum": "36748318682076112ba81283c8bc767a", + "id": "nmdc:36748318682076112ba81283c8bc767a", + "file_size_bytes": 6433811 + }, + { + "name": "Gp0115670_SuperFam GFF file", + "description": "SuperFam GFF file for Gp0115670", + "url": "https://data.microbiomedata.org/data/nmdc:mga0d7pj22/annotation/nmdc_mga0d7pj22_supfam.gff", + "md5_checksum": "5dd32385b351847f23ec4eac63eb70ff", + "id": "nmdc:5dd32385b351847f23ec4eac63eb70ff", + "file_size_bytes": 36427587 + }, + { + "name": "Gp0115670_Cath FunFam GFF file", + "description": "Cath FunFam GFF file for Gp0115670", + "url": "https://data.microbiomedata.org/data/nmdc:mga0d7pj22/annotation/nmdc_mga0d7pj22_cath_funfam.gff", + "md5_checksum": "95076052a4d5d57e1ed0c7699e4f5472", + "id": "nmdc:95076052a4d5d57e1ed0c7699e4f5472", + "file_size_bytes": 28909664 + }, + { + "name": "Gp0115670_KO_EC GFF file", + "description": "KO_EC GFF file for Gp0115670", + "url": "https://data.microbiomedata.org/data/nmdc:mga0d7pj22/annotation/nmdc_mga0d7pj22_ko_ec.gff", + "md5_checksum": "6ae89cc4b2fb7d09614c106d3358be27", + "id": "nmdc:6ae89cc4b2fb7d09614c106d3358be27", + "file_size_bytes": 21214802 + } + ] + }, + { + "_id": { + "$oid": "649b0052ec087f6bbab3472d" + }, + "has_input": [ + "nmdc:975cdb0a18df949be4efb80d1dc4ef0b", + "nmdc:c4f2407273babd894282d4d0f20be5d1", + "nmdc:0bc4d8b8ef11724c3d7e728b0e8e0ea5" + ], + "too_short_contig_num": 142606, + "part_of": [ + "nmdc:mga0d7pj22" + ], + "binned_contig_num": 1261, + "git_url": "https://github.com/microbiomedata/metaG/releases/tag/0.1", + "has_output": [ + "nmdc:d41d8cd98f00b204e9800998ecf8427e", + "nmdc:fd5fe3f1faaaf3cd8a88d9bbfb016827", + "nmdc:e27b736ee699ef2a8468a684811aaabd", + "nmdc:b0866d1a944aa27e34dc7a140aeaf336", + "nmdc:0875e5107d03a40832d15e5cf80adbbc", + "nmdc:9b60c7c905d34e08427781eafbce9b12" + ], + "was_informed_by": "gold:Gp0115670", + "input_contig_num": 152605, + "id": "nmdc:f1cbe3cc181b2e689272b4223b68c15f", + "execution_resource": "NERSC-Cori", + "name": "MAGs Analysis Activity for nmdc:mga0d7pj22", + "mags_list": [ + { + "number_of_contig": 118, + "completeness": 23.28, + "bin_name": "bins.1", + "gene_count": 572, + "bin_quality": "LQ", + "gtdbtk_species": "", + "gtdbtk_order": "", + "num_16s": 0, + "gtdbtk_family": "", + "gtdbtk_domain": "", + "contamination": 0.0, + "gtdbtk_class": "", + "gtdbtk_phylum": "", + "num_5s": 0, + "num_23s": 0, + "gtdbtk_genus": "", + "num_t_rna": 5 + }, + { + "number_of_contig": 151, + "completeness": 38.09, + "bin_name": "bins.2", + "gene_count": 725, + "bin_quality": "LQ", + "gtdbtk_species": "", + "gtdbtk_order": "", + "num_16s": 0, + "gtdbtk_family": "", + "gtdbtk_domain": "", + "contamination": 0.0, + "gtdbtk_class": "", + "gtdbtk_phylum": "", + "num_5s": 0, + "num_23s": 0, + "gtdbtk_genus": "", + "num_t_rna": 9 + }, + { + "number_of_contig": 100, + "completeness": 99.01, + "bin_name": "bins.3", + "gene_count": 3233, + "bin_quality": "HQ", + "gtdbtk_species": "", + "gtdbtk_order": "Sphingomonadales", + "num_16s": 1, + "gtdbtk_family": "Sphingomonadaceae", + "gtdbtk_domain": "Bacteria", + "contamination": 1.38, + "gtdbtk_class": "Alphaproteobacteria", + "gtdbtk_phylum": "Proteobacteria", + "num_5s": 1, + "num_23s": 2, + "gtdbtk_genus": "Novosphingobium", + "num_t_rna": 47 + }, + { + "number_of_contig": 135, + "completeness": 34.24, + "bin_name": "bins.4", + "gene_count": 689, + "bin_quality": "LQ", + "gtdbtk_species": "", + "gtdbtk_order": "", + "num_16s": 0, + "gtdbtk_family": "", + "gtdbtk_domain": "", + "contamination": 0.91, + "gtdbtk_class": "", + "gtdbtk_phylum": "", + "num_5s": 0, + "num_23s": 0, + "gtdbtk_genus": "", + "num_t_rna": 6 + }, + { + "number_of_contig": 652, + "completeness": 57.14, + "bin_name": "bins.5", + "gene_count": 3635, + "bin_quality": "MQ", + "gtdbtk_species": "", + "gtdbtk_order": "Burkholderiales", + "num_16s": 1, + "gtdbtk_family": "Burkholderiaceae", + "gtdbtk_domain": "Bacteria", + "contamination": 2.6, + "gtdbtk_class": "Gammaproteobacteria", + "gtdbtk_phylum": "Proteobacteria", + "num_5s": 1, + "num_23s": 1, + "gtdbtk_genus": "Rhizobacter", + "num_t_rna": 27 + }, + { + "number_of_contig": 105, + "completeness": 27.22, + "bin_name": "bins.6", + "gene_count": 509, + "bin_quality": "LQ", + "gtdbtk_species": "", + "gtdbtk_order": "", + "num_16s": 0, + "gtdbtk_family": "", + "gtdbtk_domain": "", + "contamination": 0.19, + "gtdbtk_class": "", + "gtdbtk_phylum": "", + "num_5s": 0, + "num_23s": 0, + "gtdbtk_genus": "", + "num_t_rna": 9 + } + ], + "unbinned_contig_num": 8738, + "started_at_time": "2021-10-11T02:28:43Z", + "type": "nmdc:MAGsAnalysisActivity", + "ended_at_time": "2021-10-11T05:55:52+00:00", + "output_data_objects": [ + { + "name": "gold:Gp0452679_metabat2 bin checkm quality assessment result", + "description": "metabat2 bin checkm quality assessment result for gold:Gp0452679", + "data_object_type": "CheckM Statistics", + "url": "https://data.microbiomedata.org/data/nmdc:mga0fsjy84/MAGs/nmdc_mga0fsjy84_checkm_qa.out", + "md5_checksum": "d41d8cd98f00b204e9800998ecf8427e", + "id": "nmdc:d41d8cd98f00b204e9800998ecf8427e", + "file_size_bytes": 0 + }, + { + "name": "Gp0115670_tooShort (< 3kb) filtered contigs fasta file by metaBat2", + "description": "tooShort (< 3kb) filtered contigs fasta file by metaBat2 for Gp0115670", + "url": "https://data.microbiomedata.org/data/nmdc:mga0d7pj22/MAGs/nmdc_mga0d7pj22_bins.tooShort.fa", + "md5_checksum": "fd5fe3f1faaaf3cd8a88d9bbfb016827", + "id": "nmdc:fd5fe3f1faaaf3cd8a88d9bbfb016827", + "file_size_bytes": 61828850 + }, + { + "name": "Gp0115670_unbinned fasta file from metabat2", + "description": "unbinned fasta file from metabat2 for Gp0115670", + "url": "https://data.microbiomedata.org/data/nmdc:mga0d7pj22/MAGs/nmdc_mga0d7pj22_bins.unbinned.fa", + "md5_checksum": "e27b736ee699ef2a8468a684811aaabd", + "id": "nmdc:e27b736ee699ef2a8468a684811aaabd", + "file_size_bytes": 15075820 + }, + { + "name": "Gp0115670_metabat2 bin checkm quality assessment result", + "description": "metabat2 bin checkm quality assessment result for Gp0115670", + "data_object_type": "CheckM Statistics", + "url": "https://data.microbiomedata.org/data/nmdc:mga0d7pj22/MAGs/nmdc_mga0d7pj22_checkm_qa.out", + "md5_checksum": "b0866d1a944aa27e34dc7a140aeaf336", + "id": "nmdc:b0866d1a944aa27e34dc7a140aeaf336", + "file_size_bytes": 1690 + }, + { + "name": "Gp0115670_high-quality and medium-quality bins", + "description": "high-quality and medium-quality bins for Gp0115670", + "data_object_type": "Metagenome Bins", + "url": "https://data.microbiomedata.org/data/nmdc:mga0d7pj22/MAGs/nmdc_mga0d7pj22_hqmq_bin.zip", + "md5_checksum": "0875e5107d03a40832d15e5cf80adbbc", + "id": "nmdc:0875e5107d03a40832d15e5cf80adbbc", + "file_size_bytes": 1944800 + }, + { + "name": "Gp0115670_metabat2 bins", + "description": "metabat2 bins for Gp0115670", + "url": "https://data.microbiomedata.org/data/nmdc:mga0d7pj22/MAGs/nmdc_mga0d7pj22_metabat_bin.zip", + "md5_checksum": "9b60c7c905d34e08427781eafbce9b12", + "id": "nmdc:9b60c7c905d34e08427781eafbce9b12", + "file_size_bytes": 658258 + } + ] + } + ] + }, + { + "_id": { + "$oid": "649b009773e8249959349b4b" + }, + "id": "nmdc:omprc-11-1avd3d16", + "name": "Sand microcosm microbial communities from a hyporheic zone in Columbia River, Washington, USA - GW-RW T4_21-May-14", + "description": "Sterilized sand packs were incubated back in the ground and collected at time point T4.", + "has_input": [ + "nmdc:bsm-11-5xjtzc47" + ], + "has_output": [ + "jgi:55d7402c0d8785342fcf7e3e" + ], + "part_of": [ + "nmdc:sty-11-aygzgv51" + ], + "add_date": "2015-05-28", + "mod_date": "2021-06-15", + "ncbi_project_name": "Sand microcosm microbial communities from a hyporheic zone in Columbia River, Washington, USA - GW-RW T4_21-May-14", + "omics_type": { + "has_raw_value": "Metagenome" + }, + "principal_investigator": { + "has_raw_value": "James Stegen" + }, + "processing_institution": "JGI", + "type": "nmdc:OmicsProcessing", + "gold_sequencing_project_identifiers": [ + "gold:Gp0115674" + ], + "downstream_workflow_activity_records": [ + { + "_id": { + "$oid": "649b009d6bdd4fd20273c88a" + }, + "has_input": [ + "nmdc:d94c174a22116c2db7ab8c47619e30aa" + ], + "part_of": [ + "nmdc:mga0cf0450" + ], + "git_url": "https://github.com/microbiomedata/metaG/releases/tag/0.1", + "has_output": [ + "nmdc:538fd5695eb3decd48891e72acebb8ce", + "nmdc:dde2b1748e16380e63476430ee27083a" + ], + "was_informed_by": "gold:Gp0115674", + "input_read_count": 26546332, + "output_read_bases": 3862169938, + "id": "nmdc:954288cfef2de46d4b895fac3811a7d0", + "execution_resource": "NERSC-Cori", + "input_read_bases": 4008496132, + "name": "Read QC Activity for nmdc:mga0cf0450", + "output_read_count": 25776010, + "started_at_time": "2021-10-11T02:28:52Z", + "type": "nmdc:ReadQCAnalysisActivity", + "ended_at_time": "2021-10-11T05:21:41+00:00", + "output_data_objects": [ + { + "name": "Gp0115674_Filtered Reads", + "description": "Filtered Reads for Gp0115674", + "data_object_type": "Filtered Sequencing Reads", + "url": "https://data.microbiomedata.org/data/nmdc:mga0cf0450/qa/nmdc_mga0cf0450_filtered.fastq.gz", + "md5_checksum": "538fd5695eb3decd48891e72acebb8ce", + "id": "nmdc:538fd5695eb3decd48891e72acebb8ce", + "file_size_bytes": 2126353222 + }, + { + "name": "Gp0115674_Filtered Stats", + "description": "Filtered Stats for Gp0115674", + "data_object_type": "QC Statistics", + "url": "https://data.microbiomedata.org/data/nmdc:mga0cf0450/qa/nmdc_mga0cf0450_filterStats.txt", + "md5_checksum": "dde2b1748e16380e63476430ee27083a", + "id": "nmdc:dde2b1748e16380e63476430ee27083a", + "file_size_bytes": 288 + } + ] + }, + { + "_id": { + "$oid": "649b009bff710ae353f8cf50" + }, + "has_input": [ + "nmdc:538fd5695eb3decd48891e72acebb8ce" + ], + "git_url": "https://github.com/microbiomedata/metaG/releases/tag/0.1", + "has_output": [ + "nmdc:7d6ec08ff0d080997fda7c7417f9c3d4", + "nmdc:df0dfd58dc386f5e0ded0b65b4a88c58", + "nmdc:ce3f31985e0a99f97bd4751bc2469bcb", + "nmdc:f8740b1fadbc29aef50d32706c955199", + "nmdc:80abfcc9b09476af4083b2af1760834f", + "nmdc:f189624af50d8d62908f8ddd5f3451ad", + "nmdc:09302fbc8e30758a95fac09ee5cfd449", + "nmdc:e44f717fc6f3458c17b4f5129a5e7920", + "nmdc:19eb52a96c1dedc9036ec9a0aaeda079" + ], + "was_informed_by": "gold:Gp0115674", + "id": "nmdc:954288cfef2de46d4b895fac3811a7d0", + "execution_resource": "NERSC-Cori", + "name": "ReadBased Analysis Activity for nmdc:mga0cf0450", + "started_at_time": "2021-10-11T02:28:52Z", + "type": "nmdc:ReadbasedAnalysis", + "ended_at_time": "2021-10-11T05:21:41+00:00", + "output_data_objects": [ + { + "name": "Gp0115674_Gottcha2 TSV report", + "description": "Gottcha2 TSV report for Gp0115674", + "url": "https://data.microbiomedata.org/data/nmdc:mga0cf0450/ReadbasedAnalysis/nmdc_mga0cf0450_gottcha2_report.tsv", + "md5_checksum": "7d6ec08ff0d080997fda7c7417f9c3d4", + "id": "nmdc:7d6ec08ff0d080997fda7c7417f9c3d4", + "file_size_bytes": 13768 + }, + { + "name": "Gp0115674_Gottcha2 full TSV report", + "description": "Gottcha2 full TSV report for Gp0115674", + "url": "https://data.microbiomedata.org/data/nmdc:mga0cf0450/ReadbasedAnalysis/nmdc_mga0cf0450_gottcha2_report_full.tsv", + "md5_checksum": "df0dfd58dc386f5e0ded0b65b4a88c58", + "id": "nmdc:df0dfd58dc386f5e0ded0b65b4a88c58", + "file_size_bytes": 1022858 + }, + { + "name": "Gp0115674_Gottcha2 Krona HTML report", + "description": "Gottcha2 Krona HTML report for Gp0115674", + "data_object_type": "GOTTCHA2 Krona Plot", + "url": "https://data.microbiomedata.org/data/nmdc:mga0cf0450/ReadbasedAnalysis/nmdc_mga0cf0450_gottcha2_krona.html", + "md5_checksum": "ce3f31985e0a99f97bd4751bc2469bcb", + "id": "nmdc:ce3f31985e0a99f97bd4751bc2469bcb", + "file_size_bytes": 269166 + }, + { + "name": "Gp0115674_Centrifuge classification TSV report", + "description": "Centrifuge classification TSV report for Gp0115674", + "data_object_type": "Centrifuge Taxonomic Classification", + "url": "https://data.microbiomedata.org/data/nmdc:mga0cf0450/ReadbasedAnalysis/nmdc_mga0cf0450_centrifuge_classification.tsv", + "md5_checksum": "f8740b1fadbc29aef50d32706c955199", + "id": "nmdc:f8740b1fadbc29aef50d32706c955199", + "file_size_bytes": 1904303690 + }, + { + "name": "Gp0115674_Centrifuge TSV report", + "description": "Centrifuge TSV report for Gp0115674", + "data_object_type": "Centrifuge Classification Report", + "url": "https://data.microbiomedata.org/data/nmdc:mga0cf0450/ReadbasedAnalysis/nmdc_mga0cf0450_centrifuge_report.tsv", + "md5_checksum": "80abfcc9b09476af4083b2af1760834f", + "id": "nmdc:80abfcc9b09476af4083b2af1760834f", + "file_size_bytes": 258748 + }, + { + "name": "Gp0115674_Centrifuge Krona HTML report", + "description": "Centrifuge Krona HTML report for Gp0115674", + "data_object_type": "Centrifuge Krona Plot", + "url": "https://data.microbiomedata.org/data/nmdc:mga0cf0450/ReadbasedAnalysis/nmdc_mga0cf0450_centrifuge_krona.html", + "md5_checksum": "f189624af50d8d62908f8ddd5f3451ad", + "id": "nmdc:f189624af50d8d62908f8ddd5f3451ad", + "file_size_bytes": 2335000 + }, + { + "name": "Gp0115674_Kraken classification TSV report", + "description": "Kraken classification TSV report for Gp0115674", + "data_object_type": "Kraken2 Taxonomic Classification", + "url": "https://data.microbiomedata.org/data/nmdc:mga0cf0450/ReadbasedAnalysis/nmdc_mga0cf0450_kraken2_classification.tsv", + "md5_checksum": "09302fbc8e30758a95fac09ee5cfd449", + "id": "nmdc:09302fbc8e30758a95fac09ee5cfd449", + "file_size_bytes": 1574286150 + }, + { + "name": "Gp0115674_Kraken2 TSV report", + "description": "Kraken2 TSV report for Gp0115674", + "data_object_type": "Kraken2 Classification Report", + "url": "https://data.microbiomedata.org/data/nmdc:mga0cf0450/ReadbasedAnalysis/nmdc_mga0cf0450_kraken2_report.tsv", + "md5_checksum": "e44f717fc6f3458c17b4f5129a5e7920", + "id": "nmdc:e44f717fc6f3458c17b4f5129a5e7920", + "file_size_bytes": 671800 + }, + { + "name": "Gp0115674_Kraken2 Krona HTML report", + "description": "Kraken2 Krona HTML report for Gp0115674", + "data_object_type": "Kraken2 Krona Plot", + "url": "https://data.microbiomedata.org/data/nmdc:mga0cf0450/ReadbasedAnalysis/nmdc_mga0cf0450_kraken2_krona.html", + "md5_checksum": "19eb52a96c1dedc9036ec9a0aaeda079", + "id": "nmdc:19eb52a96c1dedc9036ec9a0aaeda079", + "file_size_bytes": 4070548 + } + ] + }, + { + "_id": { + "$oid": "61e71a31833bcf838a701e57" + }, + "has_input": [ + "nmdc:538fd5695eb3decd48891e72acebb8ce" + ], + "part_of": [ + "nmdc:mga0cf0450" + ], + "git_url": "https://github.com/microbiomedata/metaG/releases/tag/0.1", + "has_output": [ + "nmdc:7d6ec08ff0d080997fda7c7417f9c3d4", + "nmdc:df0dfd58dc386f5e0ded0b65b4a88c58", + "nmdc:ce3f31985e0a99f97bd4751bc2469bcb", + "nmdc:f8740b1fadbc29aef50d32706c955199", + "nmdc:80abfcc9b09476af4083b2af1760834f", + "nmdc:f189624af50d8d62908f8ddd5f3451ad", + "nmdc:09302fbc8e30758a95fac09ee5cfd449", + "nmdc:e44f717fc6f3458c17b4f5129a5e7920", + "nmdc:19eb52a96c1dedc9036ec9a0aaeda079" + ], + "was_informed_by": "gold:Gp0115674", + "id": "nmdc:954288cfef2de46d4b895fac3811a7d0", + "execution_resource": "NERSC-Cori", + "name": "ReadBased Analysis Activity for nmdc:mga0cf0450", + "started_at_time": "2021-10-11T02:28:52Z", + "type": "nmdc:ReadbasedAnalysis", + "ended_at_time": "2021-10-11T05:21:41+00:00", + "output_data_objects": [ + { + "name": "Gp0115674_Gottcha2 TSV report", + "description": "Gottcha2 TSV report for Gp0115674", + "url": "https://data.microbiomedata.org/data/nmdc:mga0cf0450/ReadbasedAnalysis/nmdc_mga0cf0450_gottcha2_report.tsv", + "md5_checksum": "7d6ec08ff0d080997fda7c7417f9c3d4", + "id": "nmdc:7d6ec08ff0d080997fda7c7417f9c3d4", + "file_size_bytes": 13768 + }, + { + "name": "Gp0115674_Gottcha2 full TSV report", + "description": "Gottcha2 full TSV report for Gp0115674", + "url": "https://data.microbiomedata.org/data/nmdc:mga0cf0450/ReadbasedAnalysis/nmdc_mga0cf0450_gottcha2_report_full.tsv", + "md5_checksum": "df0dfd58dc386f5e0ded0b65b4a88c58", + "id": "nmdc:df0dfd58dc386f5e0ded0b65b4a88c58", + "file_size_bytes": 1022858 + }, + { + "name": "Gp0115674_Gottcha2 Krona HTML report", + "description": "Gottcha2 Krona HTML report for Gp0115674", + "data_object_type": "GOTTCHA2 Krona Plot", + "url": "https://data.microbiomedata.org/data/nmdc:mga0cf0450/ReadbasedAnalysis/nmdc_mga0cf0450_gottcha2_krona.html", + "md5_checksum": "ce3f31985e0a99f97bd4751bc2469bcb", + "id": "nmdc:ce3f31985e0a99f97bd4751bc2469bcb", + "file_size_bytes": 269166 + }, + { + "name": "Gp0115674_Centrifuge classification TSV report", + "description": "Centrifuge classification TSV report for Gp0115674", + "data_object_type": "Centrifuge Taxonomic Classification", + "url": "https://data.microbiomedata.org/data/nmdc:mga0cf0450/ReadbasedAnalysis/nmdc_mga0cf0450_centrifuge_classification.tsv", + "md5_checksum": "f8740b1fadbc29aef50d32706c955199", + "id": "nmdc:f8740b1fadbc29aef50d32706c955199", + "file_size_bytes": 1904303690 + }, + { + "name": "Gp0115674_Centrifuge TSV report", + "description": "Centrifuge TSV report for Gp0115674", + "data_object_type": "Centrifuge Classification Report", + "url": "https://data.microbiomedata.org/data/nmdc:mga0cf0450/ReadbasedAnalysis/nmdc_mga0cf0450_centrifuge_report.tsv", + "md5_checksum": "80abfcc9b09476af4083b2af1760834f", + "id": "nmdc:80abfcc9b09476af4083b2af1760834f", + "file_size_bytes": 258748 + }, + { + "name": "Gp0115674_Centrifuge Krona HTML report", + "description": "Centrifuge Krona HTML report for Gp0115674", + "data_object_type": "Centrifuge Krona Plot", + "url": "https://data.microbiomedata.org/data/nmdc:mga0cf0450/ReadbasedAnalysis/nmdc_mga0cf0450_centrifuge_krona.html", + "md5_checksum": "f189624af50d8d62908f8ddd5f3451ad", + "id": "nmdc:f189624af50d8d62908f8ddd5f3451ad", + "file_size_bytes": 2335000 + }, + { + "name": "Gp0115674_Kraken classification TSV report", + "description": "Kraken classification TSV report for Gp0115674", + "data_object_type": "Kraken2 Taxonomic Classification", + "url": "https://data.microbiomedata.org/data/nmdc:mga0cf0450/ReadbasedAnalysis/nmdc_mga0cf0450_kraken2_classification.tsv", + "md5_checksum": "09302fbc8e30758a95fac09ee5cfd449", + "id": "nmdc:09302fbc8e30758a95fac09ee5cfd449", + "file_size_bytes": 1574286150 + }, + { + "name": "Gp0115674_Kraken2 TSV report", + "description": "Kraken2 TSV report for Gp0115674", + "data_object_type": "Kraken2 Classification Report", + "url": "https://data.microbiomedata.org/data/nmdc:mga0cf0450/ReadbasedAnalysis/nmdc_mga0cf0450_kraken2_report.tsv", + "md5_checksum": "e44f717fc6f3458c17b4f5129a5e7920", + "id": "nmdc:e44f717fc6f3458c17b4f5129a5e7920", + "file_size_bytes": 671800 + }, + { + "name": "Gp0115674_Kraken2 Krona HTML report", + "description": "Kraken2 Krona HTML report for Gp0115674", + "data_object_type": "Kraken2 Krona Plot", + "url": "https://data.microbiomedata.org/data/nmdc:mga0cf0450/ReadbasedAnalysis/nmdc_mga0cf0450_kraken2_krona.html", + "md5_checksum": "19eb52a96c1dedc9036ec9a0aaeda079", + "id": "nmdc:19eb52a96c1dedc9036ec9a0aaeda079", + "file_size_bytes": 4070548 + } + ] + }, + { + "_id": { + "$oid": "649b005f2ca5ee4adb139fb6" + }, + "has_input": [ + "nmdc:538fd5695eb3decd48891e72acebb8ce" + ], + "part_of": [ + "nmdc:mga0cf0450" + ], + "ctg_logsum": 272042, + "scaf_logsum": 272657, + "gap_pct": 0.00172, + "git_url": "https://github.com/microbiomedata/metaG/releases/tag/0.1", + "has_output": [ + "nmdc:ed2e4b90c8c2947486cc5c3c5828f949", + "nmdc:e8fa9ae5e04a2969d220d81f1fb752f2", + "nmdc:5f308ea3cb43a331cda55ac9f91c6a53", + "nmdc:604ed99b7c622082ddf174bb11d2787f", + "nmdc:a0263d8b11653306a05f598395ca603a" + ], + "asm_score": 18.19, + "was_informed_by": "gold:Gp0115674", + "ctg_powsum": 36133, + "scaf_max": 176505, + "id": "nmdc:954288cfef2de46d4b895fac3811a7d0", + "scaf_powsum": 36239, + "execution_resource": "NERSC-Cori", + "contigs": 139326, + "name": "Assembly Activity for nmdc:mga0cf0450", + "ctg_max": 176505, + "gc_std": 0.12397, + "contig_bp": 73195425, + "gc_avg": 0.56886, + "started_at_time": "2021-10-11T02:28:52Z", + "scaf_bp": 73196685, + "type": "nmdc:MetagenomeAssembly", + "scaffolds": 139236, + "ended_at_time": "2021-10-11T05:21:41+00:00", + "ctg_l50": 481, + "ctg_l90": 290, + "ctg_n50": 30768, + "ctg_n90": 115008, + "scaf_l50": 482, + "scaf_l90": 290, + "scaf_n50": 30582, + "scaf_n90": 114932, + "scaf_l_gt50k": 2506146, + "scaf_n_gt50k": 32, + "scaf_pct_gt50k": 3.4238515, + "output_data_objects": [ + { + "name": "Gp0115674_Assembled contigs fasta", + "description": "Assembled contigs fasta for Gp0115674", + "data_object_type": "Assembly Contigs", + "url": "https://data.microbiomedata.org/data/nmdc:mga0cf0450/assembly/nmdc_mga0cf0450_contigs.fna", + "md5_checksum": "ed2e4b90c8c2947486cc5c3c5828f949", + "id": "nmdc:ed2e4b90c8c2947486cc5c3c5828f949", + "file_size_bytes": 78686505 + }, + { + "name": "Gp0115674_Assembled scaffold fasta", + "description": "Assembled scaffold fasta for Gp0115674", + "data_object_type": "Assembly Scaffolds", + "url": "https://data.microbiomedata.org/data/nmdc:mga0cf0450/assembly/nmdc_mga0cf0450_scaffolds.fna", + "md5_checksum": "e8fa9ae5e04a2969d220d81f1fb752f2", + "id": "nmdc:e8fa9ae5e04a2969d220d81f1fb752f2", + "file_size_bytes": 78267725 + }, + { + "name": "Gp0115674_Metagenome Contig Coverage Stats", + "description": "Metagenome Contig Coverage Stats for Gp0115674", + "url": "https://data.microbiomedata.org/data/nmdc:mga0cf0450/assembly/nmdc_mga0cf0450_covstats.txt", + "md5_checksum": "5f308ea3cb43a331cda55ac9f91c6a53", + "id": "nmdc:5f308ea3cb43a331cda55ac9f91c6a53", + "file_size_bytes": 10980044 + }, + { + "name": "Gp0115674_Assembled AGP file", + "description": "Assembled AGP file for Gp0115674", + "data_object_type": "Assembly AGP", + "url": "https://data.microbiomedata.org/data/nmdc:mga0cf0450/assembly/nmdc_mga0cf0450_assembly.agp", + "md5_checksum": "604ed99b7c622082ddf174bb11d2787f", + "id": "nmdc:604ed99b7c622082ddf174bb11d2787f", + "file_size_bytes": 10249514 + }, + { + "name": "Gp0115674_Metagenome Alignment BAM file", + "description": "Metagenome Alignment BAM file for Gp0115674", + "data_object_type": "Assembly Coverage BAM", + "url": "https://data.microbiomedata.org/data/nmdc:mga0cf0450/assembly/nmdc_mga0cf0450_pairedMapped_sorted.bam", + "md5_checksum": "a0263d8b11653306a05f598395ca603a", + "id": "nmdc:a0263d8b11653306a05f598395ca603a", + "file_size_bytes": 2304306876 + } + ] + }, + { + "_id": { + "$oid": "649b005bbf2caae0415ef9d4" + }, + "has_input": [ + "nmdc:ed2e4b90c8c2947486cc5c3c5828f949" + ], + "part_of": [ + "nmdc:mga0cf0450" + ], + "git_url": "https://github.com/microbiomedata/metaG/releases/tag/0.1", + "has_output": [ + "nmdc:9ae7cb8ba4bee2ce9a46c963d00ba6ba", + "nmdc:ce90743969776fd717671aeb21d37379", + "nmdc:1a4f5145ccf0838811fe570a93549fdf", + "nmdc:662dae8ba0ea9dda93637c2ea60c1f4e", + "nmdc:b5db445feb8edb47022c2a0ee86d828d", + "nmdc:157d24f6f63091fbe9ef98cc3090975d", + "nmdc:afa217feffb94965aa1839041305237e", + "nmdc:4a00e0c0bc479b8e6f1139c8de3149d5", + "nmdc:ffcd280a63fab7bcfa5422f34070d87f", + "nmdc:9fb334fc9409e6db51aaa1f960b08f4b", + "nmdc:d5676c01e67f71559a382850f42c3493", + "nmdc:121fab4d5bff0dcbb9d1849738a72347" + ], + "was_informed_by": "gold:Gp0115674", + "id": "nmdc:954288cfef2de46d4b895fac3811a7d0", + "execution_resource": "NERSC-Cori", + "name": "Annotation Activity for nmdc:mga0cf0450", + "started_at_time": "2021-10-11T02:28:52Z", + "type": "nmdc:MetagenomeAnnotationActivity", + "ended_at_time": "2021-10-11T05:21:41+00:00", + "output_data_objects": [ + { + "name": "Gp0115674_Protein FAA", + "description": "Protein FAA for Gp0115674", + "data_object_type": "Annotation Amino Acid FASTA", + "url": "https://data.microbiomedata.org/data/nmdc:mga0cf0450/annotation/nmdc_mga0cf0450_proteins.faa", + "md5_checksum": "9ae7cb8ba4bee2ce9a46c963d00ba6ba", + "id": "nmdc:9ae7cb8ba4bee2ce9a46c963d00ba6ba", + "file_size_bytes": 43650605 + }, + { + "name": "Gp0115674_Structural annotation GFF file", + "description": "Structural annotation GFF file for Gp0115674", + "data_object_type": "Structural Annotation GFF", + "url": "https://data.microbiomedata.org/data/nmdc:mga0cf0450/annotation/nmdc_mga0cf0450_structural_annotation.gff", + "md5_checksum": "ce90743969776fd717671aeb21d37379", + "id": "nmdc:ce90743969776fd717671aeb21d37379", + "file_size_bytes": 2529 + }, + { + "name": "Gp0115674_Functional annotation GFF file", + "description": "Functional annotation GFF file for Gp0115674", + "data_object_type": "Functional Annotation GFF", + "url": "https://data.microbiomedata.org/data/nmdc:mga0cf0450/annotation/nmdc_mga0cf0450_functional_annotation.gff", + "md5_checksum": "1a4f5145ccf0838811fe570a93549fdf", + "id": "nmdc:1a4f5145ccf0838811fe570a93549fdf", + "file_size_bytes": 47604509 + }, + { + "name": "Gp0115674_KO TSV file", + "description": "KO TSV file for Gp0115674", + "data_object_type": "Annotation KEGG Orthology", + "url": "https://data.microbiomedata.org/data/nmdc:mga0cf0450/annotation/nmdc_mga0cf0450_ko.tsv", + "md5_checksum": "662dae8ba0ea9dda93637c2ea60c1f4e", + "id": "nmdc:662dae8ba0ea9dda93637c2ea60c1f4e", + "file_size_bytes": 6436472 + }, + { + "name": "Gp0115674_EC TSV file", + "description": "EC TSV file for Gp0115674", + "data_object_type": "Annotation Enzyme Commission", + "url": "https://data.microbiomedata.org/data/nmdc:mga0cf0450/annotation/nmdc_mga0cf0450_ec.tsv", + "md5_checksum": "b5db445feb8edb47022c2a0ee86d828d", + "id": "nmdc:b5db445feb8edb47022c2a0ee86d828d", + "file_size_bytes": 4111562 + }, + { + "name": "Gp0115674_COG GFF file", + "description": "COG GFF file for Gp0115674", + "url": "https://data.microbiomedata.org/data/nmdc:mga0cf0450/annotation/nmdc_mga0cf0450_cog.gff", + "md5_checksum": "157d24f6f63091fbe9ef98cc3090975d", + "id": "nmdc:157d24f6f63091fbe9ef98cc3090975d", + "file_size_bytes": 27373015 + }, + { + "name": "Gp0115674_PFAM GFF file", + "description": "PFAM GFF file for Gp0115674", + "url": "https://data.microbiomedata.org/data/nmdc:mga0cf0450/annotation/nmdc_mga0cf0450_pfam.gff", + "md5_checksum": "afa217feffb94965aa1839041305237e", + "id": "nmdc:afa217feffb94965aa1839041305237e", + "file_size_bytes": 22153817 + }, + { + "name": "Gp0115674_TigrFam GFF file", + "description": "TigrFam GFF file for Gp0115674", + "url": "https://data.microbiomedata.org/data/nmdc:mga0cf0450/annotation/nmdc_mga0cf0450_tigrfam.gff", + "md5_checksum": "4a00e0c0bc479b8e6f1139c8de3149d5", + "id": "nmdc:4a00e0c0bc479b8e6f1139c8de3149d5", + "file_size_bytes": 2995281 + }, + { + "name": "Gp0115674_SMART GFF file", + "description": "SMART GFF file for Gp0115674", + "url": "https://data.microbiomedata.org/data/nmdc:mga0cf0450/annotation/nmdc_mga0cf0450_smart.gff", + "md5_checksum": "ffcd280a63fab7bcfa5422f34070d87f", + "id": "nmdc:ffcd280a63fab7bcfa5422f34070d87f", + "file_size_bytes": 6393135 + }, + { + "name": "Gp0115674_SuperFam GFF file", + "description": "SuperFam GFF file for Gp0115674", + "url": "https://data.microbiomedata.org/data/nmdc:mga0cf0450/annotation/nmdc_mga0cf0450_supfam.gff", + "md5_checksum": "9fb334fc9409e6db51aaa1f960b08f4b", + "id": "nmdc:9fb334fc9409e6db51aaa1f960b08f4b", + "file_size_bytes": 35023258 + }, + { + "name": "Gp0115674_Cath FunFam GFF file", + "description": "Cath FunFam GFF file for Gp0115674", + "url": "https://data.microbiomedata.org/data/nmdc:mga0cf0450/annotation/nmdc_mga0cf0450_cath_funfam.gff", + "md5_checksum": "d5676c01e67f71559a382850f42c3493", + "id": "nmdc:d5676c01e67f71559a382850f42c3493", + "file_size_bytes": 27788764 + }, + { + "name": "Gp0115674_KO_EC GFF file", + "description": "KO_EC GFF file for Gp0115674", + "url": "https://data.microbiomedata.org/data/nmdc:mga0cf0450/annotation/nmdc_mga0cf0450_ko_ec.gff", + "md5_checksum": "121fab4d5bff0dcbb9d1849738a72347", + "id": "nmdc:121fab4d5bff0dcbb9d1849738a72347", + "file_size_bytes": 20542466 + } + ] + }, + { + "_id": { + "$oid": "649b0052ec087f6bbab34732" + }, + "has_input": [ + "nmdc:ed2e4b90c8c2947486cc5c3c5828f949", + "nmdc:a0263d8b11653306a05f598395ca603a", + "nmdc:1a4f5145ccf0838811fe570a93549fdf" + ], + "too_short_contig_num": 131855, + "part_of": [ + "nmdc:mga0cf0450" + ], + "binned_contig_num": 1119, + "git_url": "https://github.com/microbiomedata/metaG/releases/tag/0.1", + "has_output": [ + "nmdc:d41d8cd98f00b204e9800998ecf8427e", + "nmdc:6a03eb0156b154ea68ffff9b473e73a5", + "nmdc:33a477987509b67fcfa5096d20c7c40b", + "nmdc:314c92c3a9458e1aa304e3c474209acf", + "nmdc:a4f9093efaf84855cab58880b262afd5", + "nmdc:1a29af6f30c21f38b25e4553605f50ef" + ], + "was_informed_by": "gold:Gp0115674", + "input_contig_num": 139324, + "id": "nmdc:954288cfef2de46d4b895fac3811a7d0", + "execution_resource": "NERSC-Cori", + "name": "MAGs Analysis Activity for nmdc:mga0cf0450", + "mags_list": [ + { + "number_of_contig": 198, + "completeness": 100.0, + "bin_name": "bins.1", + "gene_count": 5608, + "bin_quality": "HQ", + "gtdbtk_species": "", + "gtdbtk_order": "Burkholderiales", + "num_16s": 1, + "gtdbtk_family": "Burkholderiaceae", + "gtdbtk_domain": "Bacteria", + "contamination": 1.29, + "gtdbtk_class": "Gammaproteobacteria", + "gtdbtk_phylum": "Proteobacteria", + "num_5s": 1, + "num_23s": 1, + "gtdbtk_genus": "Rhizobacter", + "num_t_rna": 46 + }, + { + "number_of_contig": 353, + "completeness": 88.62, + "bin_name": "bins.2", + "gene_count": 3146, + "bin_quality": "MQ", + "gtdbtk_species": "", + "gtdbtk_order": "Sphingomonadales", + "num_16s": 0, + "gtdbtk_family": "Sphingomonadaceae", + "gtdbtk_domain": "Bacteria", + "contamination": 2.0, + "gtdbtk_class": "Alphaproteobacteria", + "gtdbtk_phylum": "Proteobacteria", + "num_5s": 0, + "num_23s": 0, + "gtdbtk_genus": "Novosphingobium", + "num_t_rna": 40 + }, + { + "number_of_contig": 273, + "completeness": 51.61, + "bin_name": "bins.3", + "gene_count": 1397, + "bin_quality": "MQ", + "gtdbtk_species": "", + "gtdbtk_order": "Pseudomonadales", + "num_16s": 0, + "gtdbtk_family": "UBA3067", + "gtdbtk_domain": "Bacteria", + "contamination": 0.8, + "gtdbtk_class": "Gammaproteobacteria", + "gtdbtk_phylum": "Proteobacteria", + "num_5s": 0, + "num_23s": 0, + "gtdbtk_genus": "UBA3067", + "num_t_rna": 17 + }, + { + "number_of_contig": 295, + "completeness": 49.14, + "bin_name": "bins.4", + "gene_count": 1695, + "bin_quality": "LQ", + "gtdbtk_species": "", + "gtdbtk_order": "", + "num_16s": 0, + "gtdbtk_family": "", + "gtdbtk_domain": "", + "contamination": 0.0, + "gtdbtk_class": "", + "gtdbtk_phylum": "", + "num_5s": 0, + "num_23s": 0, + "gtdbtk_genus": "", + "num_t_rna": 16 + } + ], + "unbinned_contig_num": 6350, + "started_at_time": "2021-10-11T02:28:52Z", + "type": "nmdc:MAGsAnalysisActivity", + "ended_at_time": "2021-10-11T05:21:41+00:00", + "output_data_objects": [ + { + "name": "gold:Gp0452679_metabat2 bin checkm quality assessment result", + "description": "metabat2 bin checkm quality assessment result for gold:Gp0452679", + "data_object_type": "CheckM Statistics", + "url": "https://data.microbiomedata.org/data/nmdc:mga0fsjy84/MAGs/nmdc_mga0fsjy84_checkm_qa.out", + "md5_checksum": "d41d8cd98f00b204e9800998ecf8427e", + "id": "nmdc:d41d8cd98f00b204e9800998ecf8427e", + "file_size_bytes": 0 + }, + { + "name": "Gp0115674_tooShort (< 3kb) filtered contigs fasta file by metaBat2", + "description": "tooShort (< 3kb) filtered contigs fasta file by metaBat2 for Gp0115674", + "url": "https://data.microbiomedata.org/data/nmdc:mga0cf0450/MAGs/nmdc_mga0cf0450_bins.tooShort.fa", + "md5_checksum": "6a03eb0156b154ea68ffff9b473e73a5", + "id": "nmdc:6a03eb0156b154ea68ffff9b473e73a5", + "file_size_bytes": 56345518 + }, + { + "name": "Gp0115674_unbinned fasta file from metabat2", + "description": "unbinned fasta file from metabat2 for Gp0115674", + "url": "https://data.microbiomedata.org/data/nmdc:mga0cf0450/MAGs/nmdc_mga0cf0450_bins.unbinned.fa", + "md5_checksum": "33a477987509b67fcfa5096d20c7c40b", + "id": "nmdc:33a477987509b67fcfa5096d20c7c40b", + "file_size_bytes": 10836032 + }, + { + "name": "Gp0115674_metabat2 bin checkm quality assessment result", + "description": "metabat2 bin checkm quality assessment result for Gp0115674", + "data_object_type": "CheckM Statistics", + "url": "https://data.microbiomedata.org/data/nmdc:mga0cf0450/MAGs/nmdc_mga0cf0450_checkm_qa.out", + "md5_checksum": "314c92c3a9458e1aa304e3c474209acf", + "id": "nmdc:314c92c3a9458e1aa304e3c474209acf", + "file_size_bytes": 1360 + }, + { + "name": "Gp0115674_high-quality and medium-quality bins", + "description": "high-quality and medium-quality bins for Gp0115674", + "data_object_type": "Metagenome Bins", + "url": "https://data.microbiomedata.org/data/nmdc:mga0cf0450/MAGs/nmdc_mga0cf0450_hqmq_bin.zip", + "md5_checksum": "a4f9093efaf84855cab58880b262afd5", + "id": "nmdc:a4f9093efaf84855cab58880b262afd5", + "file_size_bytes": 2974639 + }, + { + "name": "Gp0115674_metabat2 bins", + "description": "metabat2 bins for Gp0115674", + "url": "https://data.microbiomedata.org/data/nmdc:mga0cf0450/MAGs/nmdc_mga0cf0450_metabat_bin.zip", + "md5_checksum": "1a29af6f30c21f38b25e4553605f50ef", + "id": "nmdc:1a29af6f30c21f38b25e4553605f50ef", + "file_size_bytes": 469326 + } + ] + } + ] + }, + { + "_id": { + "$oid": "649b009773e8249959349b4c" + }, + "id": "nmdc:omprc-11-hk1bje46", + "name": "Sand microcosm microbial communities from a hyporheic zone in Columbia River, Washington, USA - GW-RW T4_2-Sept-14", + "description": "Sterilized sand packs were incubated back in the ground and collected at time point T4.", + "has_input": [ + "nmdc:bsm-11-5h7px351" + ], + "has_output": [ + "jgi:55d817f70d8785342fcf8270" + ], + "part_of": [ + "nmdc:sty-11-aygzgv51" + ], + "add_date": "2015-05-28", + "mod_date": "2021-06-15", + "ncbi_project_name": "Sand microcosm microbial communities from a hyporheic zone in Columbia River, Washington, USA - GW-RW T4_2-Sept-14", + "omics_type": { + "has_raw_value": "Metagenome" + }, + "principal_investigator": { + "has_raw_value": "James Stegen" + }, + "processing_institution": "JGI", + "type": "nmdc:OmicsProcessing", + "gold_sequencing_project_identifiers": [ + "gold:Gp0115673" + ], + "downstream_workflow_activity_records": [ + { + "_id": { + "$oid": "649b009d6bdd4fd20273c876" + }, + "has_input": [ + "nmdc:3783bc4ce3716b6d299533bc3f6591b6" + ], + "part_of": [ + "nmdc:mga0kpja70" + ], + "git_url": "https://github.com/microbiomedata/metaG/releases/tag/0.1", + "has_output": [ + "nmdc:268918f610926421d2af43f175553680", + "nmdc:4610980cf3558f5a9830797ead97362a" + ], + "was_informed_by": "gold:Gp0115673", + "input_read_count": 17796788, + "output_read_bases": 2520029380, + "id": "nmdc:7ae51c3485db8a27225f08083565b28e", + "execution_resource": "NERSC-Cori", + "input_read_bases": 2687314988, + "name": "Read QC Activity for nmdc:mga0kpja70", + "output_read_count": 16817496, + "started_at_time": "2021-10-11T02:28:36Z", + "type": "nmdc:ReadQCAnalysisActivity", + "ended_at_time": "2021-10-11T03:32:43+00:00", + "output_data_objects": [ + { + "name": "Gp0115673_Filtered Reads", + "description": "Filtered Reads for Gp0115673", + "data_object_type": "Filtered Sequencing Reads", + "url": "https://data.microbiomedata.org/data/nmdc:mga0kpja70/qa/nmdc_mga0kpja70_filtered.fastq.gz", + "md5_checksum": "268918f610926421d2af43f175553680", + "id": "nmdc:268918f610926421d2af43f175553680", + "file_size_bytes": 1492820163 + }, + { + "name": "Gp0115673_Filtered Stats", + "description": "Filtered Stats for Gp0115673", + "data_object_type": "QC Statistics", + "url": "https://data.microbiomedata.org/data/nmdc:mga0kpja70/qa/nmdc_mga0kpja70_filterStats.txt", + "md5_checksum": "4610980cf3558f5a9830797ead97362a", + "id": "nmdc:4610980cf3558f5a9830797ead97362a", + "file_size_bytes": 287 + } + ] + }, + { + "_id": { + "$oid": "649b009bff710ae353f8cf44" + }, + "has_input": [ + "nmdc:268918f610926421d2af43f175553680" + ], + "git_url": "https://github.com/microbiomedata/metaG/releases/tag/0.1", + "has_output": [ + "nmdc:c7b24571b61a33018cf118b5424b787f", + "nmdc:e185734176505343bf4c83c16a0a9fe2", + "nmdc:7c6b0ef44450c747580826a2e218844b", + "nmdc:5b98c377f424d7609f1a09e350cfb837", + "nmdc:b5f7a68a94b356001014d1be024231af", + "nmdc:75bca66cfcdd38331c10edbba03fa0d3", + "nmdc:35bf579641b2ffb3614098d9811a4968", + "nmdc:801b79f5442e5bfaa0d15f76786cfbc0", + "nmdc:a7030fa8e9622e3396c2b96448e90c3b" + ], + "was_informed_by": "gold:Gp0115673", + "id": "nmdc:7ae51c3485db8a27225f08083565b28e", + "execution_resource": "NERSC-Cori", + "name": "ReadBased Analysis Activity for nmdc:mga0kpja70", + "started_at_time": "2021-10-11T02:28:36Z", + "type": "nmdc:ReadbasedAnalysis", + "ended_at_time": "2021-10-11T03:32:43+00:00", + "output_data_objects": [ + { + "name": "Gp0115673_Gottcha2 TSV report", + "description": "Gottcha2 TSV report for Gp0115673", + "url": "https://data.microbiomedata.org/data/nmdc:mga0kpja70/ReadbasedAnalysis/nmdc_mga0kpja70_gottcha2_report.tsv", + "md5_checksum": "c7b24571b61a33018cf118b5424b787f", + "id": "nmdc:c7b24571b61a33018cf118b5424b787f", + "file_size_bytes": 9782 + }, + { + "name": "Gp0115673_Gottcha2 full TSV report", + "description": "Gottcha2 full TSV report for Gp0115673", + "url": "https://data.microbiomedata.org/data/nmdc:mga0kpja70/ReadbasedAnalysis/nmdc_mga0kpja70_gottcha2_report_full.tsv", + "md5_checksum": "e185734176505343bf4c83c16a0a9fe2", + "id": "nmdc:e185734176505343bf4c83c16a0a9fe2", + "file_size_bytes": 856112 + }, + { + "name": "Gp0115673_Gottcha2 Krona HTML report", + "description": "Gottcha2 Krona HTML report for Gp0115673", + "data_object_type": "GOTTCHA2 Krona Plot", + "url": "https://data.microbiomedata.org/data/nmdc:mga0kpja70/ReadbasedAnalysis/nmdc_mga0kpja70_gottcha2_krona.html", + "md5_checksum": "7c6b0ef44450c747580826a2e218844b", + "id": "nmdc:7c6b0ef44450c747580826a2e218844b", + "file_size_bytes": 255142 + }, + { + "name": "Gp0115673_Centrifuge classification TSV report", + "description": "Centrifuge classification TSV report for Gp0115673", + "data_object_type": "Centrifuge Taxonomic Classification", + "url": "https://data.microbiomedata.org/data/nmdc:mga0kpja70/ReadbasedAnalysis/nmdc_mga0kpja70_centrifuge_classification.tsv", + "md5_checksum": "5b98c377f424d7609f1a09e350cfb837", + "id": "nmdc:5b98c377f424d7609f1a09e350cfb837", + "file_size_bytes": 1218364738 + }, + { + "name": "Gp0115673_Centrifuge TSV report", + "description": "Centrifuge TSV report for Gp0115673", + "data_object_type": "Centrifuge Classification Report", + "url": "https://data.microbiomedata.org/data/nmdc:mga0kpja70/ReadbasedAnalysis/nmdc_mga0kpja70_centrifuge_report.tsv", + "md5_checksum": "b5f7a68a94b356001014d1be024231af", + "id": "nmdc:b5f7a68a94b356001014d1be024231af", + "file_size_bytes": 254923 + }, + { + "name": "Gp0115673_Centrifuge Krona HTML report", + "description": "Centrifuge Krona HTML report for Gp0115673", + "data_object_type": "Centrifuge Krona Plot", + "url": "https://data.microbiomedata.org/data/nmdc:mga0kpja70/ReadbasedAnalysis/nmdc_mga0kpja70_centrifuge_krona.html", + "md5_checksum": "75bca66cfcdd38331c10edbba03fa0d3", + "id": "nmdc:75bca66cfcdd38331c10edbba03fa0d3", + "file_size_bytes": 2323219 + }, + { + "name": "Gp0115673_Kraken classification TSV report", + "description": "Kraken classification TSV report for Gp0115673", + "data_object_type": "Kraken2 Taxonomic Classification", + "url": "https://data.microbiomedata.org/data/nmdc:mga0kpja70/ReadbasedAnalysis/nmdc_mga0kpja70_kraken2_classification.tsv", + "md5_checksum": "35bf579641b2ffb3614098d9811a4968", + "id": "nmdc:35bf579641b2ffb3614098d9811a4968", + "file_size_bytes": 1001134031 + }, + { + "name": "Gp0115673_Kraken2 TSV report", + "description": "Kraken2 TSV report for Gp0115673", + "data_object_type": "Kraken2 Classification Report", + "url": "https://data.microbiomedata.org/data/nmdc:mga0kpja70/ReadbasedAnalysis/nmdc_mga0kpja70_kraken2_report.tsv", + "md5_checksum": "801b79f5442e5bfaa0d15f76786cfbc0", + "id": "nmdc:801b79f5442e5bfaa0d15f76786cfbc0", + "file_size_bytes": 640671 + }, + { + "name": "Gp0115673_Kraken2 Krona HTML report", + "description": "Kraken2 Krona HTML report for Gp0115673", + "data_object_type": "Kraken2 Krona Plot", + "url": "https://data.microbiomedata.org/data/nmdc:mga0kpja70/ReadbasedAnalysis/nmdc_mga0kpja70_kraken2_krona.html", + "md5_checksum": "a7030fa8e9622e3396c2b96448e90c3b", + "id": "nmdc:a7030fa8e9622e3396c2b96448e90c3b", + "file_size_bytes": 3995499 + } + ] + }, + { + "_id": { + "$oid": "61e719b7833bcf838a7011dc" + }, + "has_input": [ + "nmdc:268918f610926421d2af43f175553680" + ], + "part_of": [ + "nmdc:mga0kpja70" + ], + "git_url": "https://github.com/microbiomedata/metaG/releases/tag/0.1", + "has_output": [ + "nmdc:c7b24571b61a33018cf118b5424b787f", + "nmdc:e185734176505343bf4c83c16a0a9fe2", + "nmdc:7c6b0ef44450c747580826a2e218844b", + "nmdc:5b98c377f424d7609f1a09e350cfb837", + "nmdc:b5f7a68a94b356001014d1be024231af", + "nmdc:75bca66cfcdd38331c10edbba03fa0d3", + "nmdc:35bf579641b2ffb3614098d9811a4968", + "nmdc:801b79f5442e5bfaa0d15f76786cfbc0", + "nmdc:a7030fa8e9622e3396c2b96448e90c3b" + ], + "was_informed_by": "gold:Gp0115673", + "id": "nmdc:7ae51c3485db8a27225f08083565b28e", + "execution_resource": "NERSC-Cori", + "name": "ReadBased Analysis Activity for nmdc:mga0kpja70", + "started_at_time": "2021-10-11T02:28:36Z", + "type": "nmdc:ReadbasedAnalysis", + "ended_at_time": "2021-10-11T03:32:43+00:00", + "output_data_objects": [ + { + "name": "Gp0115673_Gottcha2 TSV report", + "description": "Gottcha2 TSV report for Gp0115673", + "url": "https://data.microbiomedata.org/data/nmdc:mga0kpja70/ReadbasedAnalysis/nmdc_mga0kpja70_gottcha2_report.tsv", + "md5_checksum": "c7b24571b61a33018cf118b5424b787f", + "id": "nmdc:c7b24571b61a33018cf118b5424b787f", + "file_size_bytes": 9782 + }, + { + "name": "Gp0115673_Gottcha2 full TSV report", + "description": "Gottcha2 full TSV report for Gp0115673", + "url": "https://data.microbiomedata.org/data/nmdc:mga0kpja70/ReadbasedAnalysis/nmdc_mga0kpja70_gottcha2_report_full.tsv", + "md5_checksum": "e185734176505343bf4c83c16a0a9fe2", + "id": "nmdc:e185734176505343bf4c83c16a0a9fe2", + "file_size_bytes": 856112 + }, + { + "name": "Gp0115673_Gottcha2 Krona HTML report", + "description": "Gottcha2 Krona HTML report for Gp0115673", + "data_object_type": "GOTTCHA2 Krona Plot", + "url": "https://data.microbiomedata.org/data/nmdc:mga0kpja70/ReadbasedAnalysis/nmdc_mga0kpja70_gottcha2_krona.html", + "md5_checksum": "7c6b0ef44450c747580826a2e218844b", + "id": "nmdc:7c6b0ef44450c747580826a2e218844b", + "file_size_bytes": 255142 + }, + { + "name": "Gp0115673_Centrifuge classification TSV report", + "description": "Centrifuge classification TSV report for Gp0115673", + "data_object_type": "Centrifuge Taxonomic Classification", + "url": "https://data.microbiomedata.org/data/nmdc:mga0kpja70/ReadbasedAnalysis/nmdc_mga0kpja70_centrifuge_classification.tsv", + "md5_checksum": "5b98c377f424d7609f1a09e350cfb837", + "id": "nmdc:5b98c377f424d7609f1a09e350cfb837", + "file_size_bytes": 1218364738 + }, + { + "name": "Gp0115673_Centrifuge TSV report", + "description": "Centrifuge TSV report for Gp0115673", + "data_object_type": "Centrifuge Classification Report", + "url": "https://data.microbiomedata.org/data/nmdc:mga0kpja70/ReadbasedAnalysis/nmdc_mga0kpja70_centrifuge_report.tsv", + "md5_checksum": "b5f7a68a94b356001014d1be024231af", + "id": "nmdc:b5f7a68a94b356001014d1be024231af", + "file_size_bytes": 254923 + }, + { + "name": "Gp0115673_Centrifuge Krona HTML report", + "description": "Centrifuge Krona HTML report for Gp0115673", + "data_object_type": "Centrifuge Krona Plot", + "url": "https://data.microbiomedata.org/data/nmdc:mga0kpja70/ReadbasedAnalysis/nmdc_mga0kpja70_centrifuge_krona.html", + "md5_checksum": "75bca66cfcdd38331c10edbba03fa0d3", + "id": "nmdc:75bca66cfcdd38331c10edbba03fa0d3", + "file_size_bytes": 2323219 + }, + { + "name": "Gp0115673_Kraken classification TSV report", + "description": "Kraken classification TSV report for Gp0115673", + "data_object_type": "Kraken2 Taxonomic Classification", + "url": "https://data.microbiomedata.org/data/nmdc:mga0kpja70/ReadbasedAnalysis/nmdc_mga0kpja70_kraken2_classification.tsv", + "md5_checksum": "35bf579641b2ffb3614098d9811a4968", + "id": "nmdc:35bf579641b2ffb3614098d9811a4968", + "file_size_bytes": 1001134031 + }, + { + "name": "Gp0115673_Kraken2 TSV report", + "description": "Kraken2 TSV report for Gp0115673", + "data_object_type": "Kraken2 Classification Report", + "url": "https://data.microbiomedata.org/data/nmdc:mga0kpja70/ReadbasedAnalysis/nmdc_mga0kpja70_kraken2_report.tsv", + "md5_checksum": "801b79f5442e5bfaa0d15f76786cfbc0", + "id": "nmdc:801b79f5442e5bfaa0d15f76786cfbc0", + "file_size_bytes": 640671 + }, + { + "name": "Gp0115673_Kraken2 Krona HTML report", + "description": "Kraken2 Krona HTML report for Gp0115673", + "data_object_type": "Kraken2 Krona Plot", + "url": "https://data.microbiomedata.org/data/nmdc:mga0kpja70/ReadbasedAnalysis/nmdc_mga0kpja70_kraken2_krona.html", + "md5_checksum": "a7030fa8e9622e3396c2b96448e90c3b", + "id": "nmdc:a7030fa8e9622e3396c2b96448e90c3b", + "file_size_bytes": 3995499 + } + ] + }, + { + "_id": { + "$oid": "649b005f2ca5ee4adb139fa8" + }, + "has_input": [ + "nmdc:268918f610926421d2af43f175553680" + ], + "part_of": [ + "nmdc:mga0kpja70" + ], + "ctg_logsum": 160283, + "scaf_logsum": 161291, + "gap_pct": 0.0036, + "git_url": "https://github.com/microbiomedata/metaG/releases/tag/0.1", + "has_output": [ + "nmdc:06d4964c0822abd6f94ca883c122f7ce", + "nmdc:bad916c69afe839097650b0b9526a841", + "nmdc:a187658f262fa495de43707aabcbf480", + "nmdc:c525c04f90889be615025c667908370c", + "nmdc:2e293158750df042be7422826125bef2" + ], + "asm_score": 6.419, + "was_informed_by": "gold:Gp0115673", + "ctg_powsum": 18694, + "scaf_max": 39252, + "id": "nmdc:7ae51c3485db8a27225f08083565b28e", + "scaf_powsum": 18825, + "execution_resource": "NERSC-Cori", + "contigs": 89808, + "name": "Assembly Activity for nmdc:mga0kpja70", + "ctg_max": 39252, + "gc_std": 0.11246, + "contig_bp": 46120517, + "gc_avg": 0.55483, + "started_at_time": "2021-10-11T02:28:36Z", + "scaf_bp": 46122177, + "type": "nmdc:MetagenomeAssembly", + "scaffolds": 89660, + "ended_at_time": "2021-10-11T03:32:43+00:00", + "ctg_l50": 493, + "ctg_l90": 286, + "ctg_n50": 19910, + "ctg_n90": 73487, + "scaf_l50": 494, + "scaf_l90": 286, + "scaf_n50": 19797, + "scaf_n90": 73347, + "output_data_objects": [ + { + "name": "Gp0115673_Assembled contigs fasta", + "description": "Assembled contigs fasta for Gp0115673", + "data_object_type": "Assembly Contigs", + "url": "https://data.microbiomedata.org/data/nmdc:mga0kpja70/assembly/nmdc_mga0kpja70_contigs.fna", + "md5_checksum": "06d4964c0822abd6f94ca883c122f7ce", + "id": "nmdc:06d4964c0822abd6f94ca883c122f7ce", + "file_size_bytes": 49610158 + }, + { + "name": "Gp0115673_Assembled scaffold fasta", + "description": "Assembled scaffold fasta for Gp0115673", + "data_object_type": "Assembly Scaffolds", + "url": "https://data.microbiomedata.org/data/nmdc:mga0kpja70/assembly/nmdc_mga0kpja70_scaffolds.fna", + "md5_checksum": "bad916c69afe839097650b0b9526a841", + "id": "nmdc:bad916c69afe839097650b0b9526a841", + "file_size_bytes": 49338957 + }, + { + "name": "Gp0115673_Metagenome Contig Coverage Stats", + "description": "Metagenome Contig Coverage Stats for Gp0115673", + "url": "https://data.microbiomedata.org/data/nmdc:mga0kpja70/assembly/nmdc_mga0kpja70_covstats.txt", + "md5_checksum": "a187658f262fa495de43707aabcbf480", + "id": "nmdc:a187658f262fa495de43707aabcbf480", + "file_size_bytes": 7048516 + }, + { + "name": "Gp0115673_Assembled AGP file", + "description": "Assembled AGP file for Gp0115673", + "data_object_type": "Assembly AGP", + "url": "https://data.microbiomedata.org/data/nmdc:mga0kpja70/assembly/nmdc_mga0kpja70_assembly.agp", + "md5_checksum": "c525c04f90889be615025c667908370c", + "id": "nmdc:c525c04f90889be615025c667908370c", + "file_size_bytes": 6557406 + }, + { + "name": "Gp0115673_Metagenome Alignment BAM file", + "description": "Metagenome Alignment BAM file for Gp0115673", + "data_object_type": "Assembly Coverage BAM", + "url": "https://data.microbiomedata.org/data/nmdc:mga0kpja70/assembly/nmdc_mga0kpja70_pairedMapped_sorted.bam", + "md5_checksum": "2e293158750df042be7422826125bef2", + "id": "nmdc:2e293158750df042be7422826125bef2", + "file_size_bytes": 1601507411 + } + ] + }, + { + "_id": { + "$oid": "649b005bbf2caae0415ef9c0" + }, + "has_input": [ + "nmdc:06d4964c0822abd6f94ca883c122f7ce" + ], + "part_of": [ + "nmdc:mga0kpja70" + ], + "git_url": "https://github.com/microbiomedata/metaG/releases/tag/0.1", + "has_output": [ + "nmdc:be3b8decbc48f9588daca36ca4c883ab", + "nmdc:106c834bb14367ec6154d1b04f2a1021", + "nmdc:dfe3eed1eee6d6764ae22a2c6b0209e5", + "nmdc:84e3913c75d155fc45f04bc04810063a", + "nmdc:418e74fcbe4b97b8d74cb697a3b3feb4", + "nmdc:2d57dd06178c83c1f9c4bfaecf34b8b4", + "nmdc:42173701162f4fdb727bc4eded48c2a1", + "nmdc:89b8851da4dca184654a76128048e09a", + "nmdc:e0d0721c6051fb0eebd70635882639c1", + "nmdc:e9b0a3709e78dd9dfdba4eff7103c425", + "nmdc:e627abd2dfaee1fbf695de11211c6971", + "nmdc:a04e32711e814e733114531a666606c6" + ], + "was_informed_by": "gold:Gp0115673", + "id": "nmdc:7ae51c3485db8a27225f08083565b28e", + "execution_resource": "NERSC-Cori", + "name": "Annotation Activity for nmdc:mga0kpja70", + "started_at_time": "2021-10-11T02:28:36Z", + "type": "nmdc:MetagenomeAnnotationActivity", + "ended_at_time": "2021-10-11T03:32:43+00:00", + "output_data_objects": [ + { + "name": "Gp0115673_Protein FAA", + "description": "Protein FAA for Gp0115673", + "data_object_type": "Annotation Amino Acid FASTA", + "url": "https://data.microbiomedata.org/data/nmdc:mga0kpja70/annotation/nmdc_mga0kpja70_proteins.faa", + "md5_checksum": "be3b8decbc48f9588daca36ca4c883ab", + "id": "nmdc:be3b8decbc48f9588daca36ca4c883ab", + "file_size_bytes": 27487621 + }, + { + "name": "Gp0115673_Structural annotation GFF file", + "description": "Structural annotation GFF file for Gp0115673", + "data_object_type": "Structural Annotation GFF", + "url": "https://data.microbiomedata.org/data/nmdc:mga0kpja70/annotation/nmdc_mga0kpja70_structural_annotation.gff", + "md5_checksum": "106c834bb14367ec6154d1b04f2a1021", + "id": "nmdc:106c834bb14367ec6154d1b04f2a1021", + "file_size_bytes": 2505 + }, + { + "name": "Gp0115673_Functional annotation GFF file", + "description": "Functional annotation GFF file for Gp0115673", + "data_object_type": "Functional Annotation GFF", + "url": "https://data.microbiomedata.org/data/nmdc:mga0kpja70/annotation/nmdc_mga0kpja70_functional_annotation.gff", + "md5_checksum": "dfe3eed1eee6d6764ae22a2c6b0209e5", + "id": "nmdc:dfe3eed1eee6d6764ae22a2c6b0209e5", + "file_size_bytes": 30665845 + }, + { + "name": "Gp0115673_KO TSV file", + "description": "KO TSV file for Gp0115673", + "data_object_type": "Annotation KEGG Orthology", + "url": "https://data.microbiomedata.org/data/nmdc:mga0kpja70/annotation/nmdc_mga0kpja70_ko.tsv", + "md5_checksum": "84e3913c75d155fc45f04bc04810063a", + "id": "nmdc:84e3913c75d155fc45f04bc04810063a", + "file_size_bytes": 4142989 + }, + { + "name": "Gp0115673_EC TSV file", + "description": "EC TSV file for Gp0115673", + "data_object_type": "Annotation Enzyme Commission", + "url": "https://data.microbiomedata.org/data/nmdc:mga0kpja70/annotation/nmdc_mga0kpja70_ec.tsv", + "md5_checksum": "418e74fcbe4b97b8d74cb697a3b3feb4", + "id": "nmdc:418e74fcbe4b97b8d74cb697a3b3feb4", + "file_size_bytes": 2665975 + }, + { + "name": "Gp0115673_COG GFF file", + "description": "COG GFF file for Gp0115673", + "url": "https://data.microbiomedata.org/data/nmdc:mga0kpja70/annotation/nmdc_mga0kpja70_cog.gff", + "md5_checksum": "2d57dd06178c83c1f9c4bfaecf34b8b4", + "id": "nmdc:2d57dd06178c83c1f9c4bfaecf34b8b4", + "file_size_bytes": 17716812 + }, + { + "name": "Gp0115673_PFAM GFF file", + "description": "PFAM GFF file for Gp0115673", + "url": "https://data.microbiomedata.org/data/nmdc:mga0kpja70/annotation/nmdc_mga0kpja70_pfam.gff", + "md5_checksum": "42173701162f4fdb727bc4eded48c2a1", + "id": "nmdc:42173701162f4fdb727bc4eded48c2a1", + "file_size_bytes": 14043787 + }, + { + "name": "Gp0115673_TigrFam GFF file", + "description": "TigrFam GFF file for Gp0115673", + "url": "https://data.microbiomedata.org/data/nmdc:mga0kpja70/annotation/nmdc_mga0kpja70_tigrfam.gff", + "md5_checksum": "89b8851da4dca184654a76128048e09a", + "id": "nmdc:89b8851da4dca184654a76128048e09a", + "file_size_bytes": 2009579 + }, + { + "name": "Gp0115673_SMART GFF file", + "description": "SMART GFF file for Gp0115673", + "url": "https://data.microbiomedata.org/data/nmdc:mga0kpja70/annotation/nmdc_mga0kpja70_smart.gff", + "md5_checksum": "e0d0721c6051fb0eebd70635882639c1", + "id": "nmdc:e0d0721c6051fb0eebd70635882639c1", + "file_size_bytes": 3834400 + }, + { + "name": "Gp0115673_SuperFam GFF file", + "description": "SuperFam GFF file for Gp0115673", + "url": "https://data.microbiomedata.org/data/nmdc:mga0kpja70/annotation/nmdc_mga0kpja70_supfam.gff", + "md5_checksum": "e9b0a3709e78dd9dfdba4eff7103c425", + "id": "nmdc:e9b0a3709e78dd9dfdba4eff7103c425", + "file_size_bytes": 22131290 + }, + { + "name": "Gp0115673_Cath FunFam GFF file", + "description": "Cath FunFam GFF file for Gp0115673", + "url": "https://data.microbiomedata.org/data/nmdc:mga0kpja70/annotation/nmdc_mga0kpja70_cath_funfam.gff", + "md5_checksum": "e627abd2dfaee1fbf695de11211c6971", + "id": "nmdc:e627abd2dfaee1fbf695de11211c6971", + "file_size_bytes": 17702997 + }, + { + "name": "Gp0115673_KO_EC GFF file", + "description": "KO_EC GFF file for Gp0115673", + "url": "https://data.microbiomedata.org/data/nmdc:mga0kpja70/annotation/nmdc_mga0kpja70_ko_ec.gff", + "md5_checksum": "a04e32711e814e733114531a666606c6", + "id": "nmdc:a04e32711e814e733114531a666606c6", + "file_size_bytes": 13225993 + } + ] + }, + { + "_id": { + "$oid": "649b0052ec087f6bbab3471f" + }, + "has_input": [ + "nmdc:06d4964c0822abd6f94ca883c122f7ce", + "nmdc:2e293158750df042be7422826125bef2", + "nmdc:dfe3eed1eee6d6764ae22a2c6b0209e5" + ], + "too_short_contig_num": 83787, + "part_of": [ + "nmdc:mga0kpja70" + ], + "binned_contig_num": 890, + "git_url": "https://github.com/microbiomedata/metaG/releases/tag/0.1", + "has_output": [ + "nmdc:d41d8cd98f00b204e9800998ecf8427e", + "nmdc:c907101a9eb50d1e522d1fc11b4d3164", + "nmdc:f80fbdbf31ee0ac76353d59e64b789bc", + "nmdc:af15089c0cb19ec9bd65f98e59dc94f1", + "nmdc:70d3f2afd9f32a2bdaa81a6fc547f6fb", + "nmdc:f40d84a4fc0c87d76c144777f9e8a8ea" + ], + "was_informed_by": "gold:Gp0115673", + "input_contig_num": 89806, + "id": "nmdc:7ae51c3485db8a27225f08083565b28e", + "execution_resource": "NERSC-Cori", + "name": "MAGs Analysis Activity for nmdc:mga0kpja70", + "mags_list": [ + { + "number_of_contig": 67, + "completeness": 12.5, + "bin_name": "bins.1", + "gene_count": 318, + "bin_quality": "LQ", + "gtdbtk_species": "", + "gtdbtk_order": "", + "num_16s": 0, + "gtdbtk_family": "", + "gtdbtk_domain": "", + "contamination": 0.0, + "gtdbtk_class": "", + "gtdbtk_phylum": "", + "num_5s": 0, + "num_23s": 0, + "gtdbtk_genus": "", + "num_t_rna": 4 + }, + { + "number_of_contig": 823, + "completeness": 97.81, + "bin_name": "bins.2", + "gene_count": 5828, + "bin_quality": "LQ", + "gtdbtk_species": "", + "gtdbtk_order": "", + "num_16s": 1, + "gtdbtk_family": "", + "gtdbtk_domain": "", + "contamination": 66.19, + "gtdbtk_class": "", + "gtdbtk_phylum": "", + "num_5s": 0, + "num_23s": 0, + "gtdbtk_genus": "", + "num_t_rna": 63 + } + ], + "unbinned_contig_num": 5129, + "started_at_time": "2021-10-11T02:28:36Z", + "type": "nmdc:MAGsAnalysisActivity", + "ended_at_time": "2021-10-11T03:32:43+00:00", + "output_data_objects": [ + { + "name": "gold:Gp0452679_metabat2 bin checkm quality assessment result", + "description": "metabat2 bin checkm quality assessment result for gold:Gp0452679", + "data_object_type": "CheckM Statistics", + "url": "https://data.microbiomedata.org/data/nmdc:mga0fsjy84/MAGs/nmdc_mga0fsjy84_checkm_qa.out", + "md5_checksum": "d41d8cd98f00b204e9800998ecf8427e", + "id": "nmdc:d41d8cd98f00b204e9800998ecf8427e", + "file_size_bytes": 0 + }, + { + "name": "Gp0115673_tooShort (< 3kb) filtered contigs fasta file by metaBat2", + "description": "tooShort (< 3kb) filtered contigs fasta file by metaBat2 for Gp0115673", + "url": "https://data.microbiomedata.org/data/nmdc:mga0kpja70/MAGs/nmdc_mga0kpja70_bins.tooShort.fa", + "md5_checksum": "c907101a9eb50d1e522d1fc11b4d3164", + "id": "nmdc:c907101a9eb50d1e522d1fc11b4d3164", + "file_size_bytes": 35344893 + }, + { + "name": "Gp0115673_unbinned fasta file from metabat2", + "description": "unbinned fasta file from metabat2 for Gp0115673", + "url": "https://data.microbiomedata.org/data/nmdc:mga0kpja70/MAGs/nmdc_mga0kpja70_bins.unbinned.fa", + "md5_checksum": "f80fbdbf31ee0ac76353d59e64b789bc", + "id": "nmdc:f80fbdbf31ee0ac76353d59e64b789bc", + "file_size_bytes": 8810307 + }, + { + "name": "Gp0115673_metabat2 bin checkm quality assessment result", + "description": "metabat2 bin checkm quality assessment result for Gp0115673", + "data_object_type": "CheckM Statistics", + "url": "https://data.microbiomedata.org/data/nmdc:mga0kpja70/MAGs/nmdc_mga0kpja70_checkm_qa.out", + "md5_checksum": "af15089c0cb19ec9bd65f98e59dc94f1", + "id": "nmdc:af15089c0cb19ec9bd65f98e59dc94f1", + "file_size_bytes": 942 + }, + { + "name": "Gp0115673_high-quality and medium-quality bins", + "description": "high-quality and medium-quality bins for Gp0115673", + "data_object_type": "Metagenome Bins", + "url": "https://data.microbiomedata.org/data/nmdc:mga0kpja70/MAGs/nmdc_mga0kpja70_hqmq_bin.zip", + "md5_checksum": "70d3f2afd9f32a2bdaa81a6fc547f6fb", + "id": "nmdc:70d3f2afd9f32a2bdaa81a6fc547f6fb", + "file_size_bytes": 182 + }, + { + "name": "Gp0115673_metabat2 bins", + "description": "metabat2 bins for Gp0115673", + "url": "https://data.microbiomedata.org/data/nmdc:mga0kpja70/MAGs/nmdc_mga0kpja70_metabat_bin.zip", + "md5_checksum": "f40d84a4fc0c87d76c144777f9e8a8ea", + "id": "nmdc:f40d84a4fc0c87d76c144777f9e8a8ea", + "file_size_bytes": 1658458 + } + ] + } + ] + }, + { + "_id": { + "$oid": "649b009773e8249959349b4d" + }, + "id": "nmdc:omprc-11-qtje8r57", + "name": "Sand microcosm microbial communities from a hyporheic zone in Columbia River, Washington, USA - GW-RW T4_12-Aug-14", + "description": "Sterilized sand packs were incubated back in the ground and collected at time point T4.", + "has_input": [ + "nmdc:bsm-11-wzdqhh45" + ], + "has_output": [ + "jgi:55d817fa0d8785342fcf8272" + ], + "part_of": [ + "nmdc:sty-11-aygzgv51" + ], + "add_date": "2015-05-28", + "mod_date": "2021-06-15", + "ncbi_project_name": "Sand microcosm microbial communities from a hyporheic zone in Columbia River, Washington, USA - GW-RW T4_12-Aug-14", + "omics_type": { + "has_raw_value": "Metagenome" + }, + "principal_investigator": { + "has_raw_value": "James Stegen" + }, + "processing_institution": "JGI", + "type": "nmdc:OmicsProcessing", + "gold_sequencing_project_identifiers": [ + "gold:Gp0115671" + ], + "downstream_workflow_activity_records": [ + { + "_id": { + "$oid": "649b009d6bdd4fd20273c891" + }, + "has_input": [ + "nmdc:57d2e9b1a32e13f859c8b6e450ac3402" + ], + "part_of": [ + "nmdc:mga0rw1351" + ], + "git_url": "https://github.com/microbiomedata/metaG/releases/tag/0.1", + "has_output": [ + "nmdc:445f37bc3019e9fe3b29a2ac5bcbfc9c", + "nmdc:24440b4c5534da30eee650b68eccda84" + ], + "was_informed_by": "gold:Gp0115671", + "input_read_count": 22298982, + "output_read_bases": 3062549086, + "id": "nmdc:c61259a6fe99bc2482a8619c099e6cc2", + "execution_resource": "NERSC-Cori", + "input_read_bases": 3367146282, + "name": "Read QC Activity for nmdc:mga0rw1351", + "output_read_count": 20445042, + "started_at_time": "2021-10-11T02:27:50Z", + "type": "nmdc:ReadQCAnalysisActivity", + "ended_at_time": "2021-10-11T03:39:05+00:00", + "output_data_objects": [ + { + "name": "Gp0115671_Filtered Reads", + "description": "Filtered Reads for Gp0115671", + "data_object_type": "Filtered Sequencing Reads", + "url": "https://data.microbiomedata.org/data/nmdc:mga0rw1351/qa/nmdc_mga0rw1351_filtered.fastq.gz", + "md5_checksum": "445f37bc3019e9fe3b29a2ac5bcbfc9c", + "id": "nmdc:445f37bc3019e9fe3b29a2ac5bcbfc9c", + "file_size_bytes": 1806996776 + }, + { + "name": "Gp0115671_Filtered Stats", + "description": "Filtered Stats for Gp0115671", + "data_object_type": "QC Statistics", + "url": "https://data.microbiomedata.org/data/nmdc:mga0rw1351/qa/nmdc_mga0rw1351_filterStats.txt", + "md5_checksum": "24440b4c5534da30eee650b68eccda84", + "id": "nmdc:24440b4c5534da30eee650b68eccda84", + "file_size_bytes": 289 + } + ] + }, + { + "_id": { + "$oid": "649b009bff710ae353f8cf56" + }, + "has_input": [ + "nmdc:445f37bc3019e9fe3b29a2ac5bcbfc9c" + ], + "git_url": "https://github.com/microbiomedata/metaG/releases/tag/0.1", + "has_output": [ + "nmdc:358559c32b69eff51758db66ac01021b", + "nmdc:befbd648249c2871bd27999120e50bf7", + "nmdc:cacb8f623a808d0cae094d46f2801dd3", + "nmdc:1b15ffb745e320a9bf0cac7e672e974b", + "nmdc:90b77c7118bf6ec1f99836a50d562a7f", + "nmdc:e0736ff520260ba2097c02b9e767362c", + "nmdc:a00960655f9e80726fdb0fade1bec958", + "nmdc:366bf195f71d2c35a9b47c0f29381e85", + "nmdc:e111cd4927f6736e5de6f6e81e7e6d72" + ], + "was_informed_by": "gold:Gp0115671", + "id": "nmdc:c61259a6fe99bc2482a8619c099e6cc2", + "execution_resource": "NERSC-Cori", + "name": "ReadBased Analysis Activity for nmdc:mga0rw1351", + "started_at_time": "2021-10-11T02:27:50Z", + "type": "nmdc:ReadbasedAnalysis", + "ended_at_time": "2021-10-11T03:39:05+00:00", + "output_data_objects": [ + { + "name": "Gp0115671_Gottcha2 TSV report", + "description": "Gottcha2 TSV report for Gp0115671", + "url": "https://data.microbiomedata.org/data/nmdc:mga0rw1351/ReadbasedAnalysis/nmdc_mga0rw1351_gottcha2_report.tsv", + "md5_checksum": "358559c32b69eff51758db66ac01021b", + "id": "nmdc:358559c32b69eff51758db66ac01021b", + "file_size_bytes": 11833 + }, + { + "name": "Gp0115671_Gottcha2 full TSV report", + "description": "Gottcha2 full TSV report for Gp0115671", + "url": "https://data.microbiomedata.org/data/nmdc:mga0rw1351/ReadbasedAnalysis/nmdc_mga0rw1351_gottcha2_report_full.tsv", + "md5_checksum": "befbd648249c2871bd27999120e50bf7", + "id": "nmdc:befbd648249c2871bd27999120e50bf7", + "file_size_bytes": 888177 + }, + { + "name": "Gp0115671_Gottcha2 Krona HTML report", + "description": "Gottcha2 Krona HTML report for Gp0115671", + "data_object_type": "GOTTCHA2 Krona Plot", + "url": "https://data.microbiomedata.org/data/nmdc:mga0rw1351/ReadbasedAnalysis/nmdc_mga0rw1351_gottcha2_krona.html", + "md5_checksum": "cacb8f623a808d0cae094d46f2801dd3", + "id": "nmdc:cacb8f623a808d0cae094d46f2801dd3", + "file_size_bytes": 261703 + }, + { + "name": "Gp0115671_Centrifuge classification TSV report", + "description": "Centrifuge classification TSV report for Gp0115671", + "data_object_type": "Centrifuge Taxonomic Classification", + "url": "https://data.microbiomedata.org/data/nmdc:mga0rw1351/ReadbasedAnalysis/nmdc_mga0rw1351_centrifuge_classification.tsv", + "md5_checksum": "1b15ffb745e320a9bf0cac7e672e974b", + "id": "nmdc:1b15ffb745e320a9bf0cac7e672e974b", + "file_size_bytes": 1474970402 + }, + { + "name": "Gp0115671_Centrifuge TSV report", + "description": "Centrifuge TSV report for Gp0115671", + "data_object_type": "Centrifuge Classification Report", + "url": "https://data.microbiomedata.org/data/nmdc:mga0rw1351/ReadbasedAnalysis/nmdc_mga0rw1351_centrifuge_report.tsv", + "md5_checksum": "90b77c7118bf6ec1f99836a50d562a7f", + "id": "nmdc:90b77c7118bf6ec1f99836a50d562a7f", + "file_size_bytes": 255777 + }, + { + "name": "Gp0115671_Centrifuge Krona HTML report", + "description": "Centrifuge Krona HTML report for Gp0115671", + "data_object_type": "Centrifuge Krona Plot", + "url": "https://data.microbiomedata.org/data/nmdc:mga0rw1351/ReadbasedAnalysis/nmdc_mga0rw1351_centrifuge_krona.html", + "md5_checksum": "e0736ff520260ba2097c02b9e767362c", + "id": "nmdc:e0736ff520260ba2097c02b9e767362c", + "file_size_bytes": 2329875 + }, + { + "name": "Gp0115671_Kraken classification TSV report", + "description": "Kraken classification TSV report for Gp0115671", + "data_object_type": "Kraken2 Taxonomic Classification", + "url": "https://data.microbiomedata.org/data/nmdc:mga0rw1351/ReadbasedAnalysis/nmdc_mga0rw1351_kraken2_classification.tsv", + "md5_checksum": "a00960655f9e80726fdb0fade1bec958", + "id": "nmdc:a00960655f9e80726fdb0fade1bec958", + "file_size_bytes": 1213240496 + }, + { + "name": "Gp0115671_Kraken2 TSV report", + "description": "Kraken2 TSV report for Gp0115671", + "data_object_type": "Kraken2 Classification Report", + "url": "https://data.microbiomedata.org/data/nmdc:mga0rw1351/ReadbasedAnalysis/nmdc_mga0rw1351_kraken2_report.tsv", + "md5_checksum": "366bf195f71d2c35a9b47c0f29381e85", + "id": "nmdc:366bf195f71d2c35a9b47c0f29381e85", + "file_size_bytes": 659715 + }, + { + "name": "Gp0115671_Kraken2 Krona HTML report", + "description": "Kraken2 Krona HTML report for Gp0115671", + "data_object_type": "Kraken2 Krona Plot", + "url": "https://data.microbiomedata.org/data/nmdc:mga0rw1351/ReadbasedAnalysis/nmdc_mga0rw1351_kraken2_krona.html", + "md5_checksum": "e111cd4927f6736e5de6f6e81e7e6d72", + "id": "nmdc:e111cd4927f6736e5de6f6e81e7e6d72", + "file_size_bytes": 4010701 + } + ] + }, + { + "_id": { + "$oid": "61e71a4c833bcf838a702155" + }, + "has_input": [ + "nmdc:445f37bc3019e9fe3b29a2ac5bcbfc9c" + ], + "part_of": [ + "nmdc:mga0rw1351" + ], + "git_url": "https://github.com/microbiomedata/metaG/releases/tag/0.1", + "has_output": [ + "nmdc:358559c32b69eff51758db66ac01021b", + "nmdc:befbd648249c2871bd27999120e50bf7", + "nmdc:cacb8f623a808d0cae094d46f2801dd3", + "nmdc:1b15ffb745e320a9bf0cac7e672e974b", + "nmdc:90b77c7118bf6ec1f99836a50d562a7f", + "nmdc:e0736ff520260ba2097c02b9e767362c", + "nmdc:a00960655f9e80726fdb0fade1bec958", + "nmdc:366bf195f71d2c35a9b47c0f29381e85", + "nmdc:e111cd4927f6736e5de6f6e81e7e6d72" + ], + "was_informed_by": "gold:Gp0115671", + "id": "nmdc:c61259a6fe99bc2482a8619c099e6cc2", + "execution_resource": "NERSC-Cori", + "name": "ReadBased Analysis Activity for nmdc:mga0rw1351", + "started_at_time": "2021-10-11T02:27:50Z", + "type": "nmdc:ReadbasedAnalysis", + "ended_at_time": "2021-10-11T03:39:05+00:00", + "output_data_objects": [ + { + "name": "Gp0115671_Gottcha2 TSV report", + "description": "Gottcha2 TSV report for Gp0115671", + "url": "https://data.microbiomedata.org/data/nmdc:mga0rw1351/ReadbasedAnalysis/nmdc_mga0rw1351_gottcha2_report.tsv", + "md5_checksum": "358559c32b69eff51758db66ac01021b", + "id": "nmdc:358559c32b69eff51758db66ac01021b", + "file_size_bytes": 11833 + }, + { + "name": "Gp0115671_Gottcha2 full TSV report", + "description": "Gottcha2 full TSV report for Gp0115671", + "url": "https://data.microbiomedata.org/data/nmdc:mga0rw1351/ReadbasedAnalysis/nmdc_mga0rw1351_gottcha2_report_full.tsv", + "md5_checksum": "befbd648249c2871bd27999120e50bf7", + "id": "nmdc:befbd648249c2871bd27999120e50bf7", + "file_size_bytes": 888177 + }, + { + "name": "Gp0115671_Gottcha2 Krona HTML report", + "description": "Gottcha2 Krona HTML report for Gp0115671", + "data_object_type": "GOTTCHA2 Krona Plot", + "url": "https://data.microbiomedata.org/data/nmdc:mga0rw1351/ReadbasedAnalysis/nmdc_mga0rw1351_gottcha2_krona.html", + "md5_checksum": "cacb8f623a808d0cae094d46f2801dd3", + "id": "nmdc:cacb8f623a808d0cae094d46f2801dd3", + "file_size_bytes": 261703 + }, + { + "name": "Gp0115671_Centrifuge classification TSV report", + "description": "Centrifuge classification TSV report for Gp0115671", + "data_object_type": "Centrifuge Taxonomic Classification", + "url": "https://data.microbiomedata.org/data/nmdc:mga0rw1351/ReadbasedAnalysis/nmdc_mga0rw1351_centrifuge_classification.tsv", + "md5_checksum": "1b15ffb745e320a9bf0cac7e672e974b", + "id": "nmdc:1b15ffb745e320a9bf0cac7e672e974b", + "file_size_bytes": 1474970402 + }, + { + "name": "Gp0115671_Centrifuge TSV report", + "description": "Centrifuge TSV report for Gp0115671", + "data_object_type": "Centrifuge Classification Report", + "url": "https://data.microbiomedata.org/data/nmdc:mga0rw1351/ReadbasedAnalysis/nmdc_mga0rw1351_centrifuge_report.tsv", + "md5_checksum": "90b77c7118bf6ec1f99836a50d562a7f", + "id": "nmdc:90b77c7118bf6ec1f99836a50d562a7f", + "file_size_bytes": 255777 + }, + { + "name": "Gp0115671_Centrifuge Krona HTML report", + "description": "Centrifuge Krona HTML report for Gp0115671", + "data_object_type": "Centrifuge Krona Plot", + "url": "https://data.microbiomedata.org/data/nmdc:mga0rw1351/ReadbasedAnalysis/nmdc_mga0rw1351_centrifuge_krona.html", + "md5_checksum": "e0736ff520260ba2097c02b9e767362c", + "id": "nmdc:e0736ff520260ba2097c02b9e767362c", + "file_size_bytes": 2329875 + }, + { + "name": "Gp0115671_Kraken classification TSV report", + "description": "Kraken classification TSV report for Gp0115671", + "data_object_type": "Kraken2 Taxonomic Classification", + "url": "https://data.microbiomedata.org/data/nmdc:mga0rw1351/ReadbasedAnalysis/nmdc_mga0rw1351_kraken2_classification.tsv", + "md5_checksum": "a00960655f9e80726fdb0fade1bec958", + "id": "nmdc:a00960655f9e80726fdb0fade1bec958", + "file_size_bytes": 1213240496 + }, + { + "name": "Gp0115671_Kraken2 TSV report", + "description": "Kraken2 TSV report for Gp0115671", + "data_object_type": "Kraken2 Classification Report", + "url": "https://data.microbiomedata.org/data/nmdc:mga0rw1351/ReadbasedAnalysis/nmdc_mga0rw1351_kraken2_report.tsv", + "md5_checksum": "366bf195f71d2c35a9b47c0f29381e85", + "id": "nmdc:366bf195f71d2c35a9b47c0f29381e85", + "file_size_bytes": 659715 + }, + { + "name": "Gp0115671_Kraken2 Krona HTML report", + "description": "Kraken2 Krona HTML report for Gp0115671", + "data_object_type": "Kraken2 Krona Plot", + "url": "https://data.microbiomedata.org/data/nmdc:mga0rw1351/ReadbasedAnalysis/nmdc_mga0rw1351_kraken2_krona.html", + "md5_checksum": "e111cd4927f6736e5de6f6e81e7e6d72", + "id": "nmdc:e111cd4927f6736e5de6f6e81e7e6d72", + "file_size_bytes": 4010701 + } + ] + }, + { + "_id": { + "$oid": "649b005f2ca5ee4adb139fc1" + }, + "has_input": [ + "nmdc:445f37bc3019e9fe3b29a2ac5bcbfc9c" + ], + "part_of": [ + "nmdc:mga0rw1351" + ], + "ctg_logsum": 111611, + "scaf_logsum": 112140, + "gap_pct": 0.00155, + "git_url": "https://github.com/microbiomedata/metaG/releases/tag/0.1", + "has_output": [ + "nmdc:0a1ebd847e3bb8f928ef491497f8355b", + "nmdc:be4cab04a701bce0ed99605109bd5d6f", + "nmdc:cc4d3160618a82f81518bdc97ce1f5e2", + "nmdc:473ca208ab97399a644c8e5326e765e5", + "nmdc:69371e513bebd1069a0ed26cc2c914cb" + ], + "asm_score": 3.588, + "was_informed_by": "gold:Gp0115671", + "ctg_powsum": 12152, + "scaf_max": 16504, + "id": "nmdc:c61259a6fe99bc2482a8619c099e6cc2", + "scaf_powsum": 12215, + "execution_resource": "NERSC-Cori", + "contigs": 120326, + "name": "Assembly Activity for nmdc:mga0rw1351", + "ctg_max": 16504, + "gc_std": 0.11331, + "contig_bp": 54171370, + "gc_avg": 0.54451, + "started_at_time": "2021-10-11T02:27:50Z", + "scaf_bp": 54172210, + "type": "nmdc:MetagenomeAssembly", + "scaffolds": 120242, + "ended_at_time": "2021-10-11T03:39:05+00:00", + "ctg_l50": 421, + "ctg_l90": 285, + "ctg_n50": 34725, + "ctg_n90": 101428, + "scaf_l50": 421, + "scaf_l90": 285, + "scaf_n50": 34687, + "scaf_n90": 101345, + "output_data_objects": [ + { + "name": "Gp0115671_Assembled contigs fasta", + "description": "Assembled contigs fasta for Gp0115671", + "data_object_type": "Assembly Contigs", + "url": "https://data.microbiomedata.org/data/nmdc:mga0rw1351/assembly/nmdc_mga0rw1351_contigs.fna", + "md5_checksum": "0a1ebd847e3bb8f928ef491497f8355b", + "id": "nmdc:0a1ebd847e3bb8f928ef491497f8355b", + "file_size_bytes": 58744710 + }, + { + "name": "Gp0115671_Assembled scaffold fasta", + "description": "Assembled scaffold fasta for Gp0115671", + "data_object_type": "Assembly Scaffolds", + "url": "https://data.microbiomedata.org/data/nmdc:mga0rw1351/assembly/nmdc_mga0rw1351_scaffolds.fna", + "md5_checksum": "be4cab04a701bce0ed99605109bd5d6f", + "id": "nmdc:be4cab04a701bce0ed99605109bd5d6f", + "file_size_bytes": 58382380 + }, + { + "name": "Gp0115671_Metagenome Contig Coverage Stats", + "description": "Metagenome Contig Coverage Stats for Gp0115671", + "url": "https://data.microbiomedata.org/data/nmdc:mga0rw1351/assembly/nmdc_mga0rw1351_covstats.txt", + "md5_checksum": "cc4d3160618a82f81518bdc97ce1f5e2", + "id": "nmdc:cc4d3160618a82f81518bdc97ce1f5e2", + "file_size_bytes": 9464710 + }, + { + "name": "Gp0115671_Assembled AGP file", + "description": "Assembled AGP file for Gp0115671", + "data_object_type": "Assembly AGP", + "url": "https://data.microbiomedata.org/data/nmdc:mga0rw1351/assembly/nmdc_mga0rw1351_assembly.agp", + "md5_checksum": "473ca208ab97399a644c8e5326e765e5", + "id": "nmdc:473ca208ab97399a644c8e5326e765e5", + "file_size_bytes": 8820452 + }, + { + "name": "Gp0115671_Metagenome Alignment BAM file", + "description": "Metagenome Alignment BAM file for Gp0115671", + "data_object_type": "Assembly Coverage BAM", + "url": "https://data.microbiomedata.org/data/nmdc:mga0rw1351/assembly/nmdc_mga0rw1351_pairedMapped_sorted.bam", + "md5_checksum": "69371e513bebd1069a0ed26cc2c914cb", + "id": "nmdc:69371e513bebd1069a0ed26cc2c914cb", + "file_size_bytes": 1938214126 + } + ] + }, + { + "_id": { + "$oid": "649b005bbf2caae0415ef9d3" + }, + "has_input": [ + "nmdc:0a1ebd847e3bb8f928ef491497f8355b" + ], + "part_of": [ + "nmdc:mga0rw1351" + ], + "git_url": "https://github.com/microbiomedata/metaG/releases/tag/0.1", + "has_output": [ + "nmdc:147b97234576ba123a9f3c63eb249ecf", + "nmdc:3e037f5f744c9f8e4aa355222cc620ae", + "nmdc:10d19849864ecdb722335200d0607bbe", + "nmdc:0ce9fa5958b6445f7be463538e89e9b1", + "nmdc:a3bc059d9350034f835be4e754486c73", + "nmdc:da9866461051130a44f0982b1a65c061", + "nmdc:676fff23fb641ee8af8a2b948fc5b46e", + "nmdc:a4aa56158a292b63078eb029ed1d90a9", + "nmdc:6a28f85e8b5addccb429cc7f8964e496", + "nmdc:d5b21cce7406ab46611c49dc1ab658ed", + "nmdc:8ead1ab881fd48527d853b0d0601b4bc", + "nmdc:ad206c1031a6f0a7805034dee03ff889" + ], + "was_informed_by": "gold:Gp0115671", + "id": "nmdc:c61259a6fe99bc2482a8619c099e6cc2", + "execution_resource": "NERSC-Cori", + "name": "Annotation Activity for nmdc:mga0rw1351", + "started_at_time": "2021-10-11T02:27:50Z", + "type": "nmdc:MetagenomeAnnotationActivity", + "ended_at_time": "2021-10-11T03:39:05+00:00", + "output_data_objects": [ + { + "name": "Gp0115671_Protein FAA", + "description": "Protein FAA for Gp0115671", + "data_object_type": "Annotation Amino Acid FASTA", + "url": "https://data.microbiomedata.org/data/nmdc:mga0rw1351/annotation/nmdc_mga0rw1351_proteins.faa", + "md5_checksum": "147b97234576ba123a9f3c63eb249ecf", + "id": "nmdc:147b97234576ba123a9f3c63eb249ecf", + "file_size_bytes": 32911597 + }, + { + "name": "Gp0115671_Structural annotation GFF file", + "description": "Structural annotation GFF file for Gp0115671", + "data_object_type": "Structural Annotation GFF", + "url": "https://data.microbiomedata.org/data/nmdc:mga0rw1351/annotation/nmdc_mga0rw1351_structural_annotation.gff", + "md5_checksum": "3e037f5f744c9f8e4aa355222cc620ae", + "id": "nmdc:3e037f5f744c9f8e4aa355222cc620ae", + "file_size_bytes": 2516 + }, + { + "name": "Gp0115671_Functional annotation GFF file", + "description": "Functional annotation GFF file for Gp0115671", + "data_object_type": "Functional Annotation GFF", + "url": "https://data.microbiomedata.org/data/nmdc:mga0rw1351/annotation/nmdc_mga0rw1351_functional_annotation.gff", + "md5_checksum": "10d19849864ecdb722335200d0607bbe", + "id": "nmdc:10d19849864ecdb722335200d0607bbe", + "file_size_bytes": 38009425 + }, + { + "name": "Gp0115671_KO TSV file", + "description": "KO TSV file for Gp0115671", + "data_object_type": "Annotation KEGG Orthology", + "url": "https://data.microbiomedata.org/data/nmdc:mga0rw1351/annotation/nmdc_mga0rw1351_ko.tsv", + "md5_checksum": "0ce9fa5958b6445f7be463538e89e9b1", + "id": "nmdc:0ce9fa5958b6445f7be463538e89e9b1", + "file_size_bytes": 4994549 + }, + { + "name": "Gp0115671_EC TSV file", + "description": "EC TSV file for Gp0115671", + "data_object_type": "Annotation Enzyme Commission", + "url": "https://data.microbiomedata.org/data/nmdc:mga0rw1351/annotation/nmdc_mga0rw1351_ec.tsv", + "md5_checksum": "a3bc059d9350034f835be4e754486c73", + "id": "nmdc:a3bc059d9350034f835be4e754486c73", + "file_size_bytes": 3207987 + }, + { + "name": "Gp0115671_COG GFF file", + "description": "COG GFF file for Gp0115671", + "url": "https://data.microbiomedata.org/data/nmdc:mga0rw1351/annotation/nmdc_mga0rw1351_cog.gff", + "md5_checksum": "da9866461051130a44f0982b1a65c061", + "id": "nmdc:da9866461051130a44f0982b1a65c061", + "file_size_bytes": 21138081 + }, + { + "name": "Gp0115671_PFAM GFF file", + "description": "PFAM GFF file for Gp0115671", + "url": "https://data.microbiomedata.org/data/nmdc:mga0rw1351/annotation/nmdc_mga0rw1351_pfam.gff", + "md5_checksum": "676fff23fb641ee8af8a2b948fc5b46e", + "id": "nmdc:676fff23fb641ee8af8a2b948fc5b46e", + "file_size_bytes": 16269399 + }, + { + "name": "Gp0115671_TigrFam GFF file", + "description": "TigrFam GFF file for Gp0115671", + "url": "https://data.microbiomedata.org/data/nmdc:mga0rw1351/annotation/nmdc_mga0rw1351_tigrfam.gff", + "md5_checksum": "a4aa56158a292b63078eb029ed1d90a9", + "id": "nmdc:a4aa56158a292b63078eb029ed1d90a9", + "file_size_bytes": 2189740 + }, + { + "name": "Gp0115671_SMART GFF file", + "description": "SMART GFF file for Gp0115671", + "url": "https://data.microbiomedata.org/data/nmdc:mga0rw1351/annotation/nmdc_mga0rw1351_smart.gff", + "md5_checksum": "6a28f85e8b5addccb429cc7f8964e496", + "id": "nmdc:6a28f85e8b5addccb429cc7f8964e496", + "file_size_bytes": 4669463 + }, + { + "name": "Gp0115671_SuperFam GFF file", + "description": "SuperFam GFF file for Gp0115671", + "url": "https://data.microbiomedata.org/data/nmdc:mga0rw1351/annotation/nmdc_mga0rw1351_supfam.gff", + "md5_checksum": "d5b21cce7406ab46611c49dc1ab658ed", + "id": "nmdc:d5b21cce7406ab46611c49dc1ab658ed", + "file_size_bytes": 26589549 + }, + { + "name": "Gp0115671_Cath FunFam GFF file", + "description": "Cath FunFam GFF file for Gp0115671", + "url": "https://data.microbiomedata.org/data/nmdc:mga0rw1351/annotation/nmdc_mga0rw1351_cath_funfam.gff", + "md5_checksum": "8ead1ab881fd48527d853b0d0601b4bc", + "id": "nmdc:8ead1ab881fd48527d853b0d0601b4bc", + "file_size_bytes": 20889965 + }, + { + "name": "Gp0115671_KO_EC GFF file", + "description": "KO_EC GFF file for Gp0115671", + "url": "https://data.microbiomedata.org/data/nmdc:mga0rw1351/annotation/nmdc_mga0rw1351_ko_ec.gff", + "md5_checksum": "ad206c1031a6f0a7805034dee03ff889", + "id": "nmdc:ad206c1031a6f0a7805034dee03ff889", + "file_size_bytes": 15914575 + } + ] + }, + { + "_id": { + "$oid": "649b0052ec087f6bbab34733" + }, + "has_input": [ + "nmdc:0a1ebd847e3bb8f928ef491497f8355b", + "nmdc:69371e513bebd1069a0ed26cc2c914cb", + "nmdc:10d19849864ecdb722335200d0607bbe" + ], + "too_short_contig_num": 114372, + "part_of": [ + "nmdc:mga0rw1351" + ], + "binned_contig_num": 328, + "git_url": "https://github.com/microbiomedata/metaG/releases/tag/0.1", + "has_output": [ + "nmdc:d41d8cd98f00b204e9800998ecf8427e", + "nmdc:57fd559aaca7b976f3b38bb1a3ce362b", + "nmdc:43a900225e93216944b4eec3a01f7db7", + "nmdc:cad0e18a4d2c4067a2724f41e449cb86", + "nmdc:55577aa26faf185b3b3f4c78711e7715", + "nmdc:c484ee1e530a0c9b47069c0288110e47" + ], + "was_informed_by": "gold:Gp0115671", + "input_contig_num": 120326, + "id": "nmdc:c61259a6fe99bc2482a8619c099e6cc2", + "execution_resource": "NERSC-Cori", + "name": "MAGs Analysis Activity for nmdc:mga0rw1351", + "mags_list": [ + { + "number_of_contig": 173, + "completeness": 26.29, + "bin_name": "bins.1", + "gene_count": 875, + "bin_quality": "LQ", + "gtdbtk_species": "", + "gtdbtk_order": "", + "num_16s": 0, + "gtdbtk_family": "", + "gtdbtk_domain": "", + "contamination": 0.18, + "gtdbtk_class": "", + "gtdbtk_phylum": "", + "num_5s": 1, + "num_23s": 1, + "gtdbtk_genus": "", + "num_t_rna": 14 + }, + { + "number_of_contig": 155, + "completeness": 24.1, + "bin_name": "bins.2", + "gene_count": 806, + "bin_quality": "LQ", + "gtdbtk_species": "", + "gtdbtk_order": "", + "num_16s": 0, + "gtdbtk_family": "", + "gtdbtk_domain": "", + "contamination": 0.0, + "gtdbtk_class": "", + "gtdbtk_phylum": "", + "num_5s": 0, + "num_23s": 0, + "gtdbtk_genus": "", + "num_t_rna": 9 + } + ], + "unbinned_contig_num": 5626, + "started_at_time": "2021-10-11T02:27:50Z", + "type": "nmdc:MAGsAnalysisActivity", + "ended_at_time": "2021-10-11T03:39:05+00:00", + "output_data_objects": [ + { + "name": "gold:Gp0452679_metabat2 bin checkm quality assessment result", + "description": "metabat2 bin checkm quality assessment result for gold:Gp0452679", + "data_object_type": "CheckM Statistics", + "url": "https://data.microbiomedata.org/data/nmdc:mga0fsjy84/MAGs/nmdc_mga0fsjy84_checkm_qa.out", + "md5_checksum": "d41d8cd98f00b204e9800998ecf8427e", + "id": "nmdc:d41d8cd98f00b204e9800998ecf8427e", + "file_size_bytes": 0 + }, + { + "name": "Gp0115671_tooShort (< 3kb) filtered contigs fasta file by metaBat2", + "description": "tooShort (< 3kb) filtered contigs fasta file by metaBat2 for Gp0115671", + "url": "https://data.microbiomedata.org/data/nmdc:mga0rw1351/MAGs/nmdc_mga0rw1351_bins.tooShort.fa", + "md5_checksum": "57fd559aaca7b976f3b38bb1a3ce362b", + "id": "nmdc:57fd559aaca7b976f3b38bb1a3ce362b", + "file_size_bytes": 48167943 + }, + { + "name": "Gp0115671_unbinned fasta file from metabat2", + "description": "unbinned fasta file from metabat2 for Gp0115671", + "url": "https://data.microbiomedata.org/data/nmdc:mga0rw1351/MAGs/nmdc_mga0rw1351_bins.unbinned.fa", + "md5_checksum": "43a900225e93216944b4eec3a01f7db7", + "id": "nmdc:43a900225e93216944b4eec3a01f7db7", + "file_size_bytes": 9124730 + }, + { + "name": "Gp0115671_metabat2 bin checkm quality assessment result", + "description": "metabat2 bin checkm quality assessment result for Gp0115671", + "data_object_type": "CheckM Statistics", + "url": "https://data.microbiomedata.org/data/nmdc:mga0rw1351/MAGs/nmdc_mga0rw1351_checkm_qa.out", + "md5_checksum": "cad0e18a4d2c4067a2724f41e449cb86", + "id": "nmdc:cad0e18a4d2c4067a2724f41e449cb86", + "file_size_bytes": 1014 + }, + { + "name": "Gp0115671_high-quality and medium-quality bins", + "description": "high-quality and medium-quality bins for Gp0115671", + "data_object_type": "Metagenome Bins", + "url": "https://data.microbiomedata.org/data/nmdc:mga0rw1351/MAGs/nmdc_mga0rw1351_hqmq_bin.zip", + "md5_checksum": "55577aa26faf185b3b3f4c78711e7715", + "id": "nmdc:55577aa26faf185b3b3f4c78711e7715", + "file_size_bytes": 182 + }, + { + "name": "Gp0115671_metabat2 bins", + "description": "metabat2 bins for Gp0115671", + "url": "https://data.microbiomedata.org/data/nmdc:mga0rw1351/MAGs/nmdc_mga0rw1351_metabat_bin.zip", + "md5_checksum": "c484ee1e530a0c9b47069c0288110e47", + "id": "nmdc:c484ee1e530a0c9b47069c0288110e47", + "file_size_bytes": 444082 + } + ] + } + ] + }, + { + "_id": { + "$oid": "649b009773e8249959349b4e" + }, + "id": "nmdc:omprc-11-7ey2jr63", + "name": "Sand microcosm microbial communities from a hyporheic zone in Columbia River, Washington, USA - GW-RW T4_23-Sept-14", + "description": "Sterilized sand packs were incubated back in the ground and collected at time point T4.", + "has_input": [ + "nmdc:bsm-11-pkgtg048" + ], + "has_output": [ + "jgi:55d740220d8785342fcf7e35" + ], + "part_of": [ + "nmdc:sty-11-aygzgv51" + ], + "add_date": "2015-05-28", + "mod_date": "2021-06-15", + "ncbi_project_name": "Sand microcosm microbial communities from a hyporheic zone in Columbia River, Washington, USA - GW-RW T4_23-Sept-14", + "omics_type": { + "has_raw_value": "Metagenome" + }, + "principal_investigator": { + "has_raw_value": "James Stegen" + }, + "processing_institution": "JGI", + "type": "nmdc:OmicsProcessing", + "gold_sequencing_project_identifiers": [ + "gold:Gp0115676" + ], + "downstream_workflow_activity_records": [ + { + "_id": { + "$oid": "649b009d6bdd4fd20273c883" + }, + "has_input": [ + "nmdc:5672111f6f33b8aff5f65e69ebb41c5e" + ], + "part_of": [ + "nmdc:mga0w3a067" + ], + "git_url": "https://github.com/microbiomedata/metaG/releases/tag/0.1", + "has_output": [ + "nmdc:e777bc518da4bbe0ab7b2959f00e2b08", + "nmdc:79815495339053b7935b55dbde02b2ff" + ], + "was_informed_by": "gold:Gp0115676", + "input_read_count": 39069214, + "output_read_bases": 5550744725, + "id": "nmdc:b2025bb4a2c7616273d414e4093f63ca", + "execution_resource": "NERSC-Cori", + "input_read_bases": 5899451314, + "name": "Read QC Activity for nmdc:mga0w3a067", + "output_read_count": 37037822, + "started_at_time": "2021-10-11T02:26:37Z", + "type": "nmdc:ReadQCAnalysisActivity", + "ended_at_time": "2021-10-11T05:40:05+00:00", + "output_data_objects": [ + { + "name": "Gp0115676_Filtered Reads", + "description": "Filtered Reads for Gp0115676", + "data_object_type": "Filtered Sequencing Reads", + "url": "https://data.microbiomedata.org/data/nmdc:mga0w3a067/qa/nmdc_mga0w3a067_filtered.fastq.gz", + "md5_checksum": "e777bc518da4bbe0ab7b2959f00e2b08", + "id": "nmdc:e777bc518da4bbe0ab7b2959f00e2b08", + "file_size_bytes": 3113249122 + }, + { + "name": "Gp0115676_Filtered Stats", + "description": "Filtered Stats for Gp0115676", + "data_object_type": "QC Statistics", + "url": "https://data.microbiomedata.org/data/nmdc:mga0w3a067/qa/nmdc_mga0w3a067_filterStats.txt", + "md5_checksum": "79815495339053b7935b55dbde02b2ff", + "id": "nmdc:79815495339053b7935b55dbde02b2ff", + "file_size_bytes": 292 + } + ] + }, + { + "_id": { + "$oid": "649b009bff710ae353f8cf4c" + }, + "has_input": [ + "nmdc:e777bc518da4bbe0ab7b2959f00e2b08" + ], + "git_url": "https://github.com/microbiomedata/metaG/releases/tag/0.1", + "has_output": [ + "nmdc:13343b2533892633bcc3655a1ebe788f", + "nmdc:87b36326bee32ad5642e3ffc2f5ac7db", + "nmdc:95a2de8be672fd50bf542215194dc4d4", + "nmdc:6cd0210b345d6908ad8ab683b1a11572", + "nmdc:5049a65d2a42d73c5d47373e990b70f7", + "nmdc:6e1e28773094884d35c04072309e285a", + "nmdc:7fa3aba8b1e31ccc00cf56f04f5605ac", + "nmdc:3b3abe337d79d09e9c7ba0a40045ad93", + "nmdc:e8602b20781cdbbd84e6dcb92c048a6b" + ], + "was_informed_by": "gold:Gp0115676", + "id": "nmdc:b2025bb4a2c7616273d414e4093f63ca", + "execution_resource": "NERSC-Cori", + "name": "ReadBased Analysis Activity for nmdc:mga0w3a067", + "started_at_time": "2021-10-11T02:26:37Z", + "type": "nmdc:ReadbasedAnalysis", + "ended_at_time": "2021-10-11T05:40:05+00:00", + "output_data_objects": [ + { + "name": "Gp0115676_Gottcha2 TSV report", + "description": "Gottcha2 TSV report for Gp0115676", + "url": "https://data.microbiomedata.org/data/nmdc:mga0w3a067/ReadbasedAnalysis/nmdc_mga0w3a067_gottcha2_report.tsv", + "md5_checksum": "13343b2533892633bcc3655a1ebe788f", + "id": "nmdc:13343b2533892633bcc3655a1ebe788f", + "file_size_bytes": 13659 + }, + { + "name": "Gp0115676_Gottcha2 full TSV report", + "description": "Gottcha2 full TSV report for Gp0115676", + "url": "https://data.microbiomedata.org/data/nmdc:mga0w3a067/ReadbasedAnalysis/nmdc_mga0w3a067_gottcha2_report_full.tsv", + "md5_checksum": "87b36326bee32ad5642e3ffc2f5ac7db", + "id": "nmdc:87b36326bee32ad5642e3ffc2f5ac7db", + "file_size_bytes": 1168924 + }, + { + "name": "Gp0115676_Gottcha2 Krona HTML report", + "description": "Gottcha2 Krona HTML report for Gp0115676", + "data_object_type": "GOTTCHA2 Krona Plot", + "url": "https://data.microbiomedata.org/data/nmdc:mga0w3a067/ReadbasedAnalysis/nmdc_mga0w3a067_gottcha2_krona.html", + "md5_checksum": "95a2de8be672fd50bf542215194dc4d4", + "id": "nmdc:95a2de8be672fd50bf542215194dc4d4", + "file_size_bytes": 267660 + }, + { + "name": "Gp0115676_Centrifuge classification TSV report", + "description": "Centrifuge classification TSV report for Gp0115676", + "data_object_type": "Centrifuge Taxonomic Classification", + "url": "https://data.microbiomedata.org/data/nmdc:mga0w3a067/ReadbasedAnalysis/nmdc_mga0w3a067_centrifuge_classification.tsv", + "md5_checksum": "6cd0210b345d6908ad8ab683b1a11572", + "id": "nmdc:6cd0210b345d6908ad8ab683b1a11572", + "file_size_bytes": 2721808152 + }, + { + "name": "Gp0115676_Centrifuge TSV report", + "description": "Centrifuge TSV report for Gp0115676", + "data_object_type": "Centrifuge Classification Report", + "url": "https://data.microbiomedata.org/data/nmdc:mga0w3a067/ReadbasedAnalysis/nmdc_mga0w3a067_centrifuge_report.tsv", + "md5_checksum": "5049a65d2a42d73c5d47373e990b70f7", + "id": "nmdc:5049a65d2a42d73c5d47373e990b70f7", + "file_size_bytes": 263207 + }, + { + "name": "Gp0115676_Centrifuge Krona HTML report", + "description": "Centrifuge Krona HTML report for Gp0115676", + "data_object_type": "Centrifuge Krona Plot", + "url": "https://data.microbiomedata.org/data/nmdc:mga0w3a067/ReadbasedAnalysis/nmdc_mga0w3a067_centrifuge_krona.html", + "md5_checksum": "6e1e28773094884d35c04072309e285a", + "id": "nmdc:6e1e28773094884d35c04072309e285a", + "file_size_bytes": 2347912 + }, + { + "name": "Gp0115676_Kraken classification TSV report", + "description": "Kraken classification TSV report for Gp0115676", + "data_object_type": "Kraken2 Taxonomic Classification", + "url": "https://data.microbiomedata.org/data/nmdc:mga0w3a067/ReadbasedAnalysis/nmdc_mga0w3a067_kraken2_classification.tsv", + "md5_checksum": "7fa3aba8b1e31ccc00cf56f04f5605ac", + "id": "nmdc:7fa3aba8b1e31ccc00cf56f04f5605ac", + "file_size_bytes": 2224468607 + }, + { + "name": "Gp0115676_Kraken2 TSV report", + "description": "Kraken2 TSV report for Gp0115676", + "data_object_type": "Kraken2 Classification Report", + "url": "https://data.microbiomedata.org/data/nmdc:mga0w3a067/ReadbasedAnalysis/nmdc_mga0w3a067_kraken2_report.tsv", + "md5_checksum": "3b3abe337d79d09e9c7ba0a40045ad93", + "id": "nmdc:3b3abe337d79d09e9c7ba0a40045ad93", + "file_size_bytes": 701128 + }, + { + "name": "Gp0115676_Kraken2 Krona HTML report", + "description": "Kraken2 Krona HTML report for Gp0115676", + "data_object_type": "Kraken2 Krona Plot", + "url": "https://data.microbiomedata.org/data/nmdc:mga0w3a067/ReadbasedAnalysis/nmdc_mga0w3a067_kraken2_krona.html", + "md5_checksum": "e8602b20781cdbbd84e6dcb92c048a6b", + "id": "nmdc:e8602b20781cdbbd84e6dcb92c048a6b", + "file_size_bytes": 4217185 + } + ] + }, + { + "_id": { + "$oid": "61e71a12833bcf838a701b3a" + }, + "has_input": [ + "nmdc:e777bc518da4bbe0ab7b2959f00e2b08" + ], + "part_of": [ + "nmdc:mga0w3a067" + ], + "git_url": "https://github.com/microbiomedata/metaG/releases/tag/0.1", + "has_output": [ + "nmdc:13343b2533892633bcc3655a1ebe788f", + "nmdc:87b36326bee32ad5642e3ffc2f5ac7db", + "nmdc:95a2de8be672fd50bf542215194dc4d4", + "nmdc:6cd0210b345d6908ad8ab683b1a11572", + "nmdc:5049a65d2a42d73c5d47373e990b70f7", + "nmdc:6e1e28773094884d35c04072309e285a", + "nmdc:7fa3aba8b1e31ccc00cf56f04f5605ac", + "nmdc:3b3abe337d79d09e9c7ba0a40045ad93", + "nmdc:e8602b20781cdbbd84e6dcb92c048a6b" + ], + "was_informed_by": "gold:Gp0115676", + "id": "nmdc:b2025bb4a2c7616273d414e4093f63ca", + "execution_resource": "NERSC-Cori", + "name": "ReadBased Analysis Activity for nmdc:mga0w3a067", + "started_at_time": "2021-10-11T02:26:37Z", + "type": "nmdc:ReadbasedAnalysis", + "ended_at_time": "2021-10-11T05:40:05+00:00", + "output_data_objects": [ + { + "name": "Gp0115676_Gottcha2 TSV report", + "description": "Gottcha2 TSV report for Gp0115676", + "url": "https://data.microbiomedata.org/data/nmdc:mga0w3a067/ReadbasedAnalysis/nmdc_mga0w3a067_gottcha2_report.tsv", + "md5_checksum": "13343b2533892633bcc3655a1ebe788f", + "id": "nmdc:13343b2533892633bcc3655a1ebe788f", + "file_size_bytes": 13659 + }, + { + "name": "Gp0115676_Gottcha2 full TSV report", + "description": "Gottcha2 full TSV report for Gp0115676", + "url": "https://data.microbiomedata.org/data/nmdc:mga0w3a067/ReadbasedAnalysis/nmdc_mga0w3a067_gottcha2_report_full.tsv", + "md5_checksum": "87b36326bee32ad5642e3ffc2f5ac7db", + "id": "nmdc:87b36326bee32ad5642e3ffc2f5ac7db", + "file_size_bytes": 1168924 + }, + { + "name": "Gp0115676_Gottcha2 Krona HTML report", + "description": "Gottcha2 Krona HTML report for Gp0115676", + "data_object_type": "GOTTCHA2 Krona Plot", + "url": "https://data.microbiomedata.org/data/nmdc:mga0w3a067/ReadbasedAnalysis/nmdc_mga0w3a067_gottcha2_krona.html", + "md5_checksum": "95a2de8be672fd50bf542215194dc4d4", + "id": "nmdc:95a2de8be672fd50bf542215194dc4d4", + "file_size_bytes": 267660 + }, + { + "name": "Gp0115676_Centrifuge classification TSV report", + "description": "Centrifuge classification TSV report for Gp0115676", + "data_object_type": "Centrifuge Taxonomic Classification", + "url": "https://data.microbiomedata.org/data/nmdc:mga0w3a067/ReadbasedAnalysis/nmdc_mga0w3a067_centrifuge_classification.tsv", + "md5_checksum": "6cd0210b345d6908ad8ab683b1a11572", + "id": "nmdc:6cd0210b345d6908ad8ab683b1a11572", + "file_size_bytes": 2721808152 + }, + { + "name": "Gp0115676_Centrifuge TSV report", + "description": "Centrifuge TSV report for Gp0115676", + "data_object_type": "Centrifuge Classification Report", + "url": "https://data.microbiomedata.org/data/nmdc:mga0w3a067/ReadbasedAnalysis/nmdc_mga0w3a067_centrifuge_report.tsv", + "md5_checksum": "5049a65d2a42d73c5d47373e990b70f7", + "id": "nmdc:5049a65d2a42d73c5d47373e990b70f7", + "file_size_bytes": 263207 + }, + { + "name": "Gp0115676_Centrifuge Krona HTML report", + "description": "Centrifuge Krona HTML report for Gp0115676", + "data_object_type": "Centrifuge Krona Plot", + "url": "https://data.microbiomedata.org/data/nmdc:mga0w3a067/ReadbasedAnalysis/nmdc_mga0w3a067_centrifuge_krona.html", + "md5_checksum": "6e1e28773094884d35c04072309e285a", + "id": "nmdc:6e1e28773094884d35c04072309e285a", + "file_size_bytes": 2347912 + }, + { + "name": "Gp0115676_Kraken classification TSV report", + "description": "Kraken classification TSV report for Gp0115676", + "data_object_type": "Kraken2 Taxonomic Classification", + "url": "https://data.microbiomedata.org/data/nmdc:mga0w3a067/ReadbasedAnalysis/nmdc_mga0w3a067_kraken2_classification.tsv", + "md5_checksum": "7fa3aba8b1e31ccc00cf56f04f5605ac", + "id": "nmdc:7fa3aba8b1e31ccc00cf56f04f5605ac", + "file_size_bytes": 2224468607 + }, + { + "name": "Gp0115676_Kraken2 TSV report", + "description": "Kraken2 TSV report for Gp0115676", + "data_object_type": "Kraken2 Classification Report", + "url": "https://data.microbiomedata.org/data/nmdc:mga0w3a067/ReadbasedAnalysis/nmdc_mga0w3a067_kraken2_report.tsv", + "md5_checksum": "3b3abe337d79d09e9c7ba0a40045ad93", + "id": "nmdc:3b3abe337d79d09e9c7ba0a40045ad93", + "file_size_bytes": 701128 + }, + { + "name": "Gp0115676_Kraken2 Krona HTML report", + "description": "Kraken2 Krona HTML report for Gp0115676", + "data_object_type": "Kraken2 Krona Plot", + "url": "https://data.microbiomedata.org/data/nmdc:mga0w3a067/ReadbasedAnalysis/nmdc_mga0w3a067_kraken2_krona.html", + "md5_checksum": "e8602b20781cdbbd84e6dcb92c048a6b", + "id": "nmdc:e8602b20781cdbbd84e6dcb92c048a6b", + "file_size_bytes": 4217185 + } + ] + }, + { + "_id": { + "$oid": "649b005f2ca5ee4adb139fbe" + }, + "has_input": [ + "nmdc:e777bc518da4bbe0ab7b2959f00e2b08" + ], + "part_of": [ + "nmdc:mga0w3a067" + ], + "ctg_logsum": 335229, + "scaf_logsum": 337025, + "gap_pct": 0.00236, + "git_url": "https://github.com/microbiomedata/metaG/releases/tag/0.1", + "has_output": [ + "nmdc:19987e32391f846db382edabf14ba43e", + "nmdc:1a4c5ace6c1b54e057d282031e8bc2c6", + "nmdc:af7a38646011c9e6d0ad2b1ebd7f47c9", + "nmdc:1b665fb0fbbf40a13122100c927b398b", + "nmdc:7c1232ff8d861d2e2c111a1dc4a70480" + ], + "asm_score": 10.939, + "was_informed_by": "gold:Gp0115676", + "ctg_powsum": 40696, + "scaf_max": 163197, + "id": "nmdc:b2025bb4a2c7616273d414e4093f63ca", + "scaf_powsum": 40973, + "execution_resource": "NERSC-Cori", + "contigs": 187125, + "name": "Assembly Activity for nmdc:mga0w3a067", + "ctg_max": 163197, + "gc_std": 0.10616, + "contig_bp": 97611209, + "gc_avg": 0.5929, + "started_at_time": "2021-10-11T02:26:37Z", + "scaf_bp": 97613509, + "type": "nmdc:MetagenomeAssembly", + "scaffolds": 186895, + "ended_at_time": "2021-10-11T05:40:05+00:00", + "ctg_l50": 499, + "ctg_l90": 288, + "ctg_n50": 42676, + "ctg_n90": 155670, + "scaf_l50": 499, + "scaf_l90": 288, + "scaf_n50": 42593, + "scaf_n90": 155449, + "scaf_l_gt50k": 743033, + "scaf_n_gt50k": 11, + "scaf_pct_gt50k": 0.7611989, + "output_data_objects": [ + { + "name": "Gp0115676_Assembled contigs fasta", + "description": "Assembled contigs fasta for Gp0115676", + "data_object_type": "Assembly Contigs", + "url": "https://data.microbiomedata.org/data/nmdc:mga0w3a067/assembly/nmdc_mga0w3a067_contigs.fna", + "md5_checksum": "19987e32391f846db382edabf14ba43e", + "id": "nmdc:19987e32391f846db382edabf14ba43e", + "file_size_bytes": 105010680 + }, + { + "name": "Gp0115676_Assembled scaffold fasta", + "description": "Assembled scaffold fasta for Gp0115676", + "data_object_type": "Assembly Scaffolds", + "url": "https://data.microbiomedata.org/data/nmdc:mga0w3a067/assembly/nmdc_mga0w3a067_scaffolds.fna", + "md5_checksum": "1a4c5ace6c1b54e057d282031e8bc2c6", + "id": "nmdc:1a4c5ace6c1b54e057d282031e8bc2c6", + "file_size_bytes": 104445982 + }, + { + "name": "Gp0115676_Metagenome Contig Coverage Stats", + "description": "Metagenome Contig Coverage Stats for Gp0115676", + "url": "https://data.microbiomedata.org/data/nmdc:mga0w3a067/assembly/nmdc_mga0w3a067_covstats.txt", + "md5_checksum": "af7a38646011c9e6d0ad2b1ebd7f47c9", + "id": "nmdc:af7a38646011c9e6d0ad2b1ebd7f47c9", + "file_size_bytes": 14811778 + }, + { + "name": "Gp0115676_Assembled AGP file", + "description": "Assembled AGP file for Gp0115676", + "data_object_type": "Assembly AGP", + "url": "https://data.microbiomedata.org/data/nmdc:mga0w3a067/assembly/nmdc_mga0w3a067_assembly.agp", + "md5_checksum": "1b665fb0fbbf40a13122100c927b398b", + "id": "nmdc:1b665fb0fbbf40a13122100c927b398b", + "file_size_bytes": 13854137 + }, + { + "name": "Gp0115676_Metagenome Alignment BAM file", + "description": "Metagenome Alignment BAM file for Gp0115676", + "data_object_type": "Assembly Coverage BAM", + "url": "https://data.microbiomedata.org/data/nmdc:mga0w3a067/assembly/nmdc_mga0w3a067_pairedMapped_sorted.bam", + "md5_checksum": "7c1232ff8d861d2e2c111a1dc4a70480", + "id": "nmdc:7c1232ff8d861d2e2c111a1dc4a70480", + "file_size_bytes": 3366223347 + } + ] + }, + { + "_id": { + "$oid": "649b005bbf2caae0415ef9cb" + }, + "has_input": [ + "nmdc:19987e32391f846db382edabf14ba43e" + ], + "part_of": [ + "nmdc:mga0w3a067" + ], + "git_url": "https://github.com/microbiomedata/metaG/releases/tag/0.1", + "has_output": [ + "nmdc:35adf26b13c97c40147af2f067e0c9be", + "nmdc:3de29d8dede94769e7753f0aaee86691", + "nmdc:6fa3d1e5fae636b4199ff57b4776a51c", + "nmdc:b865dcd9976c90dbc8459ec7ccc72d45", + "nmdc:98b9ea6588dc9ff918298c4a7c567edf", + "nmdc:d8fbe8d24c00eee2ef163e3bb428b718", + "nmdc:ed68f1e7fd4873f1ea756d0c58a9c550", + "nmdc:4d0469ae5b27dd4045d637d2493ccba9", + "nmdc:a893783f6886e31b6bca5b6baede9f66", + "nmdc:2225c723ccf0fd5ea309cfb5ca90d536", + "nmdc:1abd69f8096f98174d95d9a3a13c2a3b", + "nmdc:83647c3e1ed96fda36f7c119a3e98182" + ], + "was_informed_by": "gold:Gp0115676", + "id": "nmdc:b2025bb4a2c7616273d414e4093f63ca", + "execution_resource": "NERSC-Cori", + "name": "Annotation Activity for nmdc:mga0w3a067", + "started_at_time": "2021-10-11T02:26:37Z", + "type": "nmdc:MetagenomeAnnotationActivity", + "ended_at_time": "2021-10-11T05:40:05+00:00", + "output_data_objects": [ + { + "name": "Gp0115676_Protein FAA", + "description": "Protein FAA for Gp0115676", + "data_object_type": "Annotation Amino Acid FASTA", + "url": "https://data.microbiomedata.org/data/nmdc:mga0w3a067/annotation/nmdc_mga0w3a067_proteins.faa", + "md5_checksum": "35adf26b13c97c40147af2f067e0c9be", + "id": "nmdc:35adf26b13c97c40147af2f067e0c9be", + "file_size_bytes": 59120149 + }, + { + "name": "Gp0115676_Structural annotation GFF file", + "description": "Structural annotation GFF file for Gp0115676", + "data_object_type": "Structural Annotation GFF", + "url": "https://data.microbiomedata.org/data/nmdc:mga0w3a067/annotation/nmdc_mga0w3a067_structural_annotation.gff", + "md5_checksum": "3de29d8dede94769e7753f0aaee86691", + "id": "nmdc:3de29d8dede94769e7753f0aaee86691", + "file_size_bytes": 2524 + }, + { + "name": "Gp0115676_Functional annotation GFF file", + "description": "Functional annotation GFF file for Gp0115676", + "data_object_type": "Functional Annotation GFF", + "url": "https://data.microbiomedata.org/data/nmdc:mga0w3a067/annotation/nmdc_mga0w3a067_functional_annotation.gff", + "md5_checksum": "6fa3d1e5fae636b4199ff57b4776a51c", + "id": "nmdc:6fa3d1e5fae636b4199ff57b4776a51c", + "file_size_bytes": 65284624 + }, + { + "name": "Gp0115676_KO TSV file", + "description": "KO TSV file for Gp0115676", + "data_object_type": "Annotation KEGG Orthology", + "url": "https://data.microbiomedata.org/data/nmdc:mga0w3a067/annotation/nmdc_mga0w3a067_ko.tsv", + "md5_checksum": "b865dcd9976c90dbc8459ec7ccc72d45", + "id": "nmdc:b865dcd9976c90dbc8459ec7ccc72d45", + "file_size_bytes": 9219020 + }, + { + "name": "Gp0115676_EC TSV file", + "description": "EC TSV file for Gp0115676", + "data_object_type": "Annotation Enzyme Commission", + "url": "https://data.microbiomedata.org/data/nmdc:mga0w3a067/annotation/nmdc_mga0w3a067_ec.tsv", + "md5_checksum": "98b9ea6588dc9ff918298c4a7c567edf", + "id": "nmdc:98b9ea6588dc9ff918298c4a7c567edf", + "file_size_bytes": 5972063 + }, + { + "name": "Gp0115676_COG GFF file", + "description": "COG GFF file for Gp0115676", + "url": "https://data.microbiomedata.org/data/nmdc:mga0w3a067/annotation/nmdc_mga0w3a067_cog.gff", + "md5_checksum": "d8fbe8d24c00eee2ef163e3bb428b718", + "id": "nmdc:d8fbe8d24c00eee2ef163e3bb428b718", + "file_size_bytes": 39290017 + }, + { + "name": "Gp0115676_PFAM GFF file", + "description": "PFAM GFF file for Gp0115676", + "url": "https://data.microbiomedata.org/data/nmdc:mga0w3a067/annotation/nmdc_mga0w3a067_pfam.gff", + "md5_checksum": "ed68f1e7fd4873f1ea756d0c58a9c550", + "id": "nmdc:ed68f1e7fd4873f1ea756d0c58a9c550", + "file_size_bytes": 31343624 + }, + { + "name": "Gp0115676_TigrFam GFF file", + "description": "TigrFam GFF file for Gp0115676", + "url": "https://data.microbiomedata.org/data/nmdc:mga0w3a067/annotation/nmdc_mga0w3a067_tigrfam.gff", + "md5_checksum": "4d0469ae5b27dd4045d637d2493ccba9", + "id": "nmdc:4d0469ae5b27dd4045d637d2493ccba9", + "file_size_bytes": 4260344 + }, + { + "name": "Gp0115676_SMART GFF file", + "description": "SMART GFF file for Gp0115676", + "url": "https://data.microbiomedata.org/data/nmdc:mga0w3a067/annotation/nmdc_mga0w3a067_smart.gff", + "md5_checksum": "a893783f6886e31b6bca5b6baede9f66", + "id": "nmdc:a893783f6886e31b6bca5b6baede9f66", + "file_size_bytes": 8240017 + }, + { + "name": "Gp0115676_SuperFam GFF file", + "description": "SuperFam GFF file for Gp0115676", + "url": "https://data.microbiomedata.org/data/nmdc:mga0w3a067/annotation/nmdc_mga0w3a067_supfam.gff", + "md5_checksum": "2225c723ccf0fd5ea309cfb5ca90d536", + "id": "nmdc:2225c723ccf0fd5ea309cfb5ca90d536", + "file_size_bytes": 48186264 + }, + { + "name": "Gp0115676_Cath FunFam GFF file", + "description": "Cath FunFam GFF file for Gp0115676", + "url": "https://data.microbiomedata.org/data/nmdc:mga0w3a067/annotation/nmdc_mga0w3a067_cath_funfam.gff", + "md5_checksum": "1abd69f8096f98174d95d9a3a13c2a3b", + "id": "nmdc:1abd69f8096f98174d95d9a3a13c2a3b", + "file_size_bytes": 38259823 + }, + { + "name": "Gp0115676_KO_EC GFF file", + "description": "KO_EC GFF file for Gp0115676", + "url": "https://data.microbiomedata.org/data/nmdc:mga0w3a067/annotation/nmdc_mga0w3a067_ko_ec.gff", + "md5_checksum": "83647c3e1ed96fda36f7c119a3e98182", + "id": "nmdc:83647c3e1ed96fda36f7c119a3e98182", + "file_size_bytes": 29337291 + } + ] + }, + { + "_id": { + "$oid": "649b0052ec087f6bbab3472c" + }, + "has_input": [ + "nmdc:19987e32391f846db382edabf14ba43e", + "nmdc:7c1232ff8d861d2e2c111a1dc4a70480", + "nmdc:6fa3d1e5fae636b4199ff57b4776a51c" + ], + "too_short_contig_num": 175121, + "part_of": [ + "nmdc:mga0w3a067" + ], + "binned_contig_num": 1550, + "git_url": "https://github.com/microbiomedata/metaG/releases/tag/0.1", + "has_output": [ + "nmdc:d41d8cd98f00b204e9800998ecf8427e", + "nmdc:71667f3b8ee0cb5acadc541fa6914022", + "nmdc:0141a64077e0f18adc42cb1915a00fa2", + "nmdc:982b47616dde63a388400fcc57d7c5b0", + "nmdc:313eb61bc7577e272eca6332e923f9c4", + "nmdc:763eb40a8905e9b0d459c45222f1b05e" + ], + "was_informed_by": "gold:Gp0115676", + "input_contig_num": 187123, + "id": "nmdc:b2025bb4a2c7616273d414e4093f63ca", + "execution_resource": "NERSC-Cori", + "name": "MAGs Analysis Activity for nmdc:mga0w3a067", + "mags_list": [ + { + "number_of_contig": 457, + "completeness": 95.14, + "bin_name": "bins.1", + "gene_count": 6260, + "bin_quality": "LQ", + "gtdbtk_species": "", + "gtdbtk_order": "", + "num_16s": 2, + "gtdbtk_family": "", + "gtdbtk_domain": "", + "contamination": 76.52, + "gtdbtk_class": "", + "gtdbtk_phylum": "", + "num_5s": 1, + "num_23s": 1, + "gtdbtk_genus": "", + "num_t_rna": 85 + }, + { + "number_of_contig": 24, + "completeness": 4.17, + "bin_name": "bins.2", + "gene_count": 246, + "bin_quality": "LQ", + "gtdbtk_species": "", + "gtdbtk_order": "", + "num_16s": 0, + "gtdbtk_family": "", + "gtdbtk_domain": "", + "contamination": 0.0, + "gtdbtk_class": "", + "gtdbtk_phylum": "", + "num_5s": 1, + "num_23s": 1, + "gtdbtk_genus": "", + "num_t_rna": 5 + }, + { + "number_of_contig": 175, + "completeness": 36.21, + "bin_name": "bins.3", + "gene_count": 937, + "bin_quality": "LQ", + "gtdbtk_species": "", + "gtdbtk_order": "", + "num_16s": 0, + "gtdbtk_family": "", + "gtdbtk_domain": "", + "contamination": 0.0, + "gtdbtk_class": "", + "gtdbtk_phylum": "", + "num_5s": 0, + "num_23s": 0, + "gtdbtk_genus": "", + "num_t_rna": 12 + }, + { + "number_of_contig": 485, + "completeness": 43.26, + "bin_name": "bins.4", + "gene_count": 2590, + "bin_quality": "LQ", + "gtdbtk_species": "", + "gtdbtk_order": "", + "num_16s": 0, + "gtdbtk_family": "", + "gtdbtk_domain": "", + "contamination": 0.55, + "gtdbtk_class": "", + "gtdbtk_phylum": "", + "num_5s": 1, + "num_23s": 1, + "gtdbtk_genus": "", + "num_t_rna": 29 + }, + { + "number_of_contig": 339, + "completeness": 79.0, + "bin_name": "bins.5", + "gene_count": 2464, + "bin_quality": "MQ", + "gtdbtk_species": "", + "gtdbtk_order": "UBA11222", + "num_16s": 0, + "gtdbtk_family": "UBA11222", + "gtdbtk_domain": "Bacteria", + "contamination": 3.71, + "gtdbtk_class": "Alphaproteobacteria", + "gtdbtk_phylum": "Proteobacteria", + "num_5s": 0, + "num_23s": 0, + "gtdbtk_genus": "UBA11222", + "num_t_rna": 32 + }, + { + "number_of_contig": 70, + "completeness": 0.0, + "bin_name": "bins.6", + "gene_count": 298, + "bin_quality": "LQ", + "gtdbtk_species": "", + "gtdbtk_order": "", + "num_16s": 0, + "gtdbtk_family": "", + "gtdbtk_domain": "", + "contamination": 0.0, + "gtdbtk_class": "", + "gtdbtk_phylum": "", + "num_5s": 0, + "num_23s": 0, + "gtdbtk_genus": "", + "num_t_rna": 1 + } + ], + "unbinned_contig_num": 10452, + "started_at_time": "2021-10-11T02:26:37Z", + "type": "nmdc:MAGsAnalysisActivity", + "ended_at_time": "2021-10-11T05:40:05+00:00", + "output_data_objects": [ + { + "name": "gold:Gp0452679_metabat2 bin checkm quality assessment result", + "description": "metabat2 bin checkm quality assessment result for gold:Gp0452679", + "data_object_type": "CheckM Statistics", + "url": "https://data.microbiomedata.org/data/nmdc:mga0fsjy84/MAGs/nmdc_mga0fsjy84_checkm_qa.out", + "md5_checksum": "d41d8cd98f00b204e9800998ecf8427e", + "id": "nmdc:d41d8cd98f00b204e9800998ecf8427e", + "file_size_bytes": 0 + }, + { + "name": "Gp0115676_tooShort (< 3kb) filtered contigs fasta file by metaBat2", + "description": "tooShort (< 3kb) filtered contigs fasta file by metaBat2 for Gp0115676", + "url": "https://data.microbiomedata.org/data/nmdc:mga0w3a067/MAGs/nmdc_mga0w3a067_bins.tooShort.fa", + "md5_checksum": "71667f3b8ee0cb5acadc541fa6914022", + "id": "nmdc:71667f3b8ee0cb5acadc541fa6914022", + "file_size_bytes": 75793492 + }, + { + "name": "Gp0115676_unbinned fasta file from metabat2", + "description": "unbinned fasta file from metabat2 for Gp0115676", + "url": "https://data.microbiomedata.org/data/nmdc:mga0w3a067/MAGs/nmdc_mga0w3a067_bins.unbinned.fa", + "md5_checksum": "0141a64077e0f18adc42cb1915a00fa2", + "id": "nmdc:0141a64077e0f18adc42cb1915a00fa2", + "file_size_bytes": 17366889 + }, + { + "name": "Gp0115676_metabat2 bin checkm quality assessment result", + "description": "metabat2 bin checkm quality assessment result for Gp0115676", + "data_object_type": "CheckM Statistics", + "url": "https://data.microbiomedata.org/data/nmdc:mga0w3a067/MAGs/nmdc_mga0w3a067_checkm_qa.out", + "md5_checksum": "982b47616dde63a388400fcc57d7c5b0", + "id": "nmdc:982b47616dde63a388400fcc57d7c5b0", + "file_size_bytes": 1700 + }, + { + "name": "Gp0115676_high-quality and medium-quality bins", + "description": "high-quality and medium-quality bins for Gp0115676", + "data_object_type": "Metagenome Bins", + "url": "https://data.microbiomedata.org/data/nmdc:mga0w3a067/MAGs/nmdc_mga0w3a067_hqmq_bin.zip", + "md5_checksum": "313eb61bc7577e272eca6332e923f9c4", + "id": "nmdc:313eb61bc7577e272eca6332e923f9c4", + "file_size_bytes": 677741 + }, + { + "name": "Gp0115676_metabat2 bins", + "description": "metabat2 bins for Gp0115676", + "url": "https://data.microbiomedata.org/data/nmdc:mga0w3a067/MAGs/nmdc_mga0w3a067_metabat_bin.zip", + "md5_checksum": "763eb40a8905e9b0d459c45222f1b05e", + "id": "nmdc:763eb40a8905e9b0d459c45222f1b05e", + "file_size_bytes": 2885722 + } + ] + } + ] + }, + { + "_id": { + "$oid": "649b009773e8249959349b4f" + }, + "id": "nmdc:omprc-11-qngh7497", + "name": "Sand microcosm microbial communities from a hyporheic zone in Columbia River, Washington, USA - GW-RW T4_25-Nov-14", + "description": "Sterilized sand packs were incubated back in the ground and collected at time point T4.", + "has_input": [ + "nmdc:bsm-11-8362vs44" + ], + "has_output": [ + "jgi:55a9caff0d87852b2150891e" + ], + "part_of": [ + "nmdc:sty-11-aygzgv51" + ], + "add_date": "2015-05-28", + "mod_date": "2021-06-15", + "ncbi_project_name": "Sand microcosm microbial communities from a hyporheic zone in Columbia River, Washington, USA - GW-RW T4_25-Nov-14", + "omics_type": { + "has_raw_value": "Metagenome" + }, + "principal_investigator": { + "has_raw_value": "James Stegen" + }, + "processing_institution": "JGI", + "type": "nmdc:OmicsProcessing", + "gold_sequencing_project_identifiers": [ + "gold:Gp0115677" + ], + "downstream_workflow_activity_records": [ + { + "_id": { + "$oid": "649b009d6bdd4fd20273c87a" + }, + "has_input": [ + "nmdc:80ca2cf2e3edcac29eb62b43f62e25c3" + ], + "part_of": [ + "nmdc:mga0zb0766" + ], + "git_url": "https://github.com/microbiomedata/metaG/releases/tag/0.1", + "has_output": [ + "nmdc:63c857b3011dec61a08044d518291f23", + "nmdc:2a79d7978caecf9b08fb2029fa42c9b3" + ], + "was_informed_by": "gold:Gp0115677", + "input_read_count": 65434428, + "output_read_bases": 9483843059, + "id": "nmdc:4fa91d90b1bd5f821a7f09edc8426939", + "execution_resource": "NERSC-Cori", + "input_read_bases": 9880598628, + "name": "Read QC Activity for nmdc:mga0zb0766", + "output_read_count": 64887080, + "started_at_time": "2021-10-11T02:24:49Z", + "type": "nmdc:ReadQCAnalysisActivity", + "ended_at_time": "2021-10-11T06:26:42+00:00", + "output_data_objects": [ + { + "name": "Gp0115677_Filtered Reads", + "description": "Filtered Reads for Gp0115677", + "data_object_type": "Filtered Sequencing Reads", + "url": "https://data.microbiomedata.org/data/nmdc:mga0zb0766/qa/nmdc_mga0zb0766_filtered.fastq.gz", + "md5_checksum": "63c857b3011dec61a08044d518291f23", + "id": "nmdc:63c857b3011dec61a08044d518291f23", + "file_size_bytes": 5307348388 + }, + { + "name": "Gp0115677_Filtered Stats", + "description": "Filtered Stats for Gp0115677", + "data_object_type": "QC Statistics", + "url": "https://data.microbiomedata.org/data/nmdc:mga0zb0766/qa/nmdc_mga0zb0766_filterStats.txt", + "md5_checksum": "2a79d7978caecf9b08fb2029fa42c9b3", + "id": "nmdc:2a79d7978caecf9b08fb2029fa42c9b3", + "file_size_bytes": 279 + } + ] + }, + { + "_id": { + "$oid": "649b009bff710ae353f8cf3c" + }, + "has_input": [ + "nmdc:63c857b3011dec61a08044d518291f23" + ], + "git_url": "https://github.com/microbiomedata/metaG/releases/tag/0.1", + "has_output": [ + "nmdc:ba32f20b0cc5143783e00c5d1ba15223", + "nmdc:c1730daf5e6017219fd9fc079e42c132", + "nmdc:55b6c047c48f5bf9fb156f139992e4d8", + "nmdc:1c2e2dff881b35a25b4622bbc66c3140", + "nmdc:50f771c7bc17a0b184c2a10a24013f08", + "nmdc:229017cdb1832bb718d22dc27db44125", + "nmdc:49d5d11132bd5a02c3dd077d42a6a16b", + "nmdc:bdd701b44e67929ec8bbe279697da937", + "nmdc:d35583a5ed45df5a58bf084fc67bf988" + ], + "was_informed_by": "gold:Gp0115677", + "id": "nmdc:4fa91d90b1bd5f821a7f09edc8426939", + "execution_resource": "NERSC-Cori", + "name": "ReadBased Analysis Activity for nmdc:mga0zb0766", + "started_at_time": "2021-10-11T02:24:49Z", + "type": "nmdc:ReadbasedAnalysis", + "ended_at_time": "2021-10-11T06:26:42+00:00", + "output_data_objects": [ + { + "name": "Gp0115677_Gottcha2 TSV report", + "description": "Gottcha2 TSV report for Gp0115677", + "url": "https://data.microbiomedata.org/data/nmdc:mga0zb0766/ReadbasedAnalysis/nmdc_mga0zb0766_gottcha2_report.tsv", + "md5_checksum": "ba32f20b0cc5143783e00c5d1ba15223", + "id": "nmdc:ba32f20b0cc5143783e00c5d1ba15223", + "file_size_bytes": 17895 + }, + { + "name": "Gp0115677_Gottcha2 full TSV report", + "description": "Gottcha2 full TSV report for Gp0115677", + "url": "https://data.microbiomedata.org/data/nmdc:mga0zb0766/ReadbasedAnalysis/nmdc_mga0zb0766_gottcha2_report_full.tsv", + "md5_checksum": "c1730daf5e6017219fd9fc079e42c132", + "id": "nmdc:c1730daf5e6017219fd9fc079e42c132", + "file_size_bytes": 1182538 + }, + { + "name": "Gp0115677_Gottcha2 Krona HTML report", + "description": "Gottcha2 Krona HTML report for Gp0115677", + "data_object_type": "GOTTCHA2 Krona Plot", + "url": "https://data.microbiomedata.org/data/nmdc:mga0zb0766/ReadbasedAnalysis/nmdc_mga0zb0766_gottcha2_krona.html", + "md5_checksum": "55b6c047c48f5bf9fb156f139992e4d8", + "id": "nmdc:55b6c047c48f5bf9fb156f139992e4d8", + "file_size_bytes": 276802 + }, + { + "name": "Gp0115677_Centrifuge classification TSV report", + "description": "Centrifuge classification TSV report for Gp0115677", + "data_object_type": "Centrifuge Taxonomic Classification", + "url": "https://data.microbiomedata.org/data/nmdc:mga0zb0766/ReadbasedAnalysis/nmdc_mga0zb0766_centrifuge_classification.tsv", + "md5_checksum": "1c2e2dff881b35a25b4622bbc66c3140", + "id": "nmdc:1c2e2dff881b35a25b4622bbc66c3140", + "file_size_bytes": 4716470614 + }, + { + "name": "Gp0115677_Centrifuge TSV report", + "description": "Centrifuge TSV report for Gp0115677", + "data_object_type": "Centrifuge Classification Report", + "url": "https://data.microbiomedata.org/data/nmdc:mga0zb0766/ReadbasedAnalysis/nmdc_mga0zb0766_centrifuge_report.tsv", + "md5_checksum": "50f771c7bc17a0b184c2a10a24013f08", + "id": "nmdc:50f771c7bc17a0b184c2a10a24013f08", + "file_size_bytes": 267231 + }, + { + "name": "Gp0115677_Centrifuge Krona HTML report", + "description": "Centrifuge Krona HTML report for Gp0115677", + "data_object_type": "Centrifuge Krona Plot", + "url": "https://data.microbiomedata.org/data/nmdc:mga0zb0766/ReadbasedAnalysis/nmdc_mga0zb0766_centrifuge_krona.html", + "md5_checksum": "229017cdb1832bb718d22dc27db44125", + "id": "nmdc:229017cdb1832bb718d22dc27db44125", + "file_size_bytes": 2356003 + }, + { + "name": "Gp0115677_Kraken classification TSV report", + "description": "Kraken classification TSV report for Gp0115677", + "data_object_type": "Kraken2 Taxonomic Classification", + "url": "https://data.microbiomedata.org/data/nmdc:mga0zb0766/ReadbasedAnalysis/nmdc_mga0zb0766_kraken2_classification.tsv", + "md5_checksum": "49d5d11132bd5a02c3dd077d42a6a16b", + "id": "nmdc:49d5d11132bd5a02c3dd077d42a6a16b", + "file_size_bytes": 3857487871 + }, + { + "name": "Gp0115677_Kraken2 TSV report", + "description": "Kraken2 TSV report for Gp0115677", + "data_object_type": "Kraken2 Classification Report", + "url": "https://data.microbiomedata.org/data/nmdc:mga0zb0766/ReadbasedAnalysis/nmdc_mga0zb0766_kraken2_report.tsv", + "md5_checksum": "bdd701b44e67929ec8bbe279697da937", + "id": "nmdc:bdd701b44e67929ec8bbe279697da937", + "file_size_bytes": 708598 + }, + { + "name": "Gp0115677_Kraken2 Krona HTML report", + "description": "Kraken2 Krona HTML report for Gp0115677", + "data_object_type": "Kraken2 Krona Plot", + "url": "https://data.microbiomedata.org/data/nmdc:mga0zb0766/ReadbasedAnalysis/nmdc_mga0zb0766_kraken2_krona.html", + "md5_checksum": "d35583a5ed45df5a58bf084fc67bf988", + "id": "nmdc:d35583a5ed45df5a58bf084fc67bf988", + "file_size_bytes": 4250180 + } + ] + }, + { + "_id": { + "$oid": "61e719d5833bcf838a70143c" + }, + "has_input": [ + "nmdc:63c857b3011dec61a08044d518291f23" + ], + "part_of": [ + "nmdc:mga0zb0766" + ], + "git_url": "https://github.com/microbiomedata/metaG/releases/tag/0.1", + "has_output": [ + "nmdc:ba32f20b0cc5143783e00c5d1ba15223", + "nmdc:c1730daf5e6017219fd9fc079e42c132", + "nmdc:55b6c047c48f5bf9fb156f139992e4d8", + "nmdc:1c2e2dff881b35a25b4622bbc66c3140", + "nmdc:50f771c7bc17a0b184c2a10a24013f08", + "nmdc:229017cdb1832bb718d22dc27db44125", + "nmdc:49d5d11132bd5a02c3dd077d42a6a16b", + "nmdc:bdd701b44e67929ec8bbe279697da937", + "nmdc:d35583a5ed45df5a58bf084fc67bf988" + ], + "was_informed_by": "gold:Gp0115677", + "id": "nmdc:4fa91d90b1bd5f821a7f09edc8426939", + "execution_resource": "NERSC-Cori", + "name": "ReadBased Analysis Activity for nmdc:mga0zb0766", + "started_at_time": "2021-10-11T02:24:49Z", + "type": "nmdc:ReadbasedAnalysis", + "ended_at_time": "2021-10-11T06:26:42+00:00", + "output_data_objects": [ + { + "name": "Gp0115677_Gottcha2 TSV report", + "description": "Gottcha2 TSV report for Gp0115677", + "url": "https://data.microbiomedata.org/data/nmdc:mga0zb0766/ReadbasedAnalysis/nmdc_mga0zb0766_gottcha2_report.tsv", + "md5_checksum": "ba32f20b0cc5143783e00c5d1ba15223", + "id": "nmdc:ba32f20b0cc5143783e00c5d1ba15223", + "file_size_bytes": 17895 + }, + { + "name": "Gp0115677_Gottcha2 full TSV report", + "description": "Gottcha2 full TSV report for Gp0115677", + "url": "https://data.microbiomedata.org/data/nmdc:mga0zb0766/ReadbasedAnalysis/nmdc_mga0zb0766_gottcha2_report_full.tsv", + "md5_checksum": "c1730daf5e6017219fd9fc079e42c132", + "id": "nmdc:c1730daf5e6017219fd9fc079e42c132", + "file_size_bytes": 1182538 + }, + { + "name": "Gp0115677_Gottcha2 Krona HTML report", + "description": "Gottcha2 Krona HTML report for Gp0115677", + "data_object_type": "GOTTCHA2 Krona Plot", + "url": "https://data.microbiomedata.org/data/nmdc:mga0zb0766/ReadbasedAnalysis/nmdc_mga0zb0766_gottcha2_krona.html", + "md5_checksum": "55b6c047c48f5bf9fb156f139992e4d8", + "id": "nmdc:55b6c047c48f5bf9fb156f139992e4d8", + "file_size_bytes": 276802 + }, + { + "name": "Gp0115677_Centrifuge classification TSV report", + "description": "Centrifuge classification TSV report for Gp0115677", + "data_object_type": "Centrifuge Taxonomic Classification", + "url": "https://data.microbiomedata.org/data/nmdc:mga0zb0766/ReadbasedAnalysis/nmdc_mga0zb0766_centrifuge_classification.tsv", + "md5_checksum": "1c2e2dff881b35a25b4622bbc66c3140", + "id": "nmdc:1c2e2dff881b35a25b4622bbc66c3140", + "file_size_bytes": 4716470614 + }, + { + "name": "Gp0115677_Centrifuge TSV report", + "description": "Centrifuge TSV report for Gp0115677", + "data_object_type": "Centrifuge Classification Report", + "url": "https://data.microbiomedata.org/data/nmdc:mga0zb0766/ReadbasedAnalysis/nmdc_mga0zb0766_centrifuge_report.tsv", + "md5_checksum": "50f771c7bc17a0b184c2a10a24013f08", + "id": "nmdc:50f771c7bc17a0b184c2a10a24013f08", + "file_size_bytes": 267231 + }, + { + "name": "Gp0115677_Centrifuge Krona HTML report", + "description": "Centrifuge Krona HTML report for Gp0115677", + "data_object_type": "Centrifuge Krona Plot", + "url": "https://data.microbiomedata.org/data/nmdc:mga0zb0766/ReadbasedAnalysis/nmdc_mga0zb0766_centrifuge_krona.html", + "md5_checksum": "229017cdb1832bb718d22dc27db44125", + "id": "nmdc:229017cdb1832bb718d22dc27db44125", + "file_size_bytes": 2356003 + }, + { + "name": "Gp0115677_Kraken classification TSV report", + "description": "Kraken classification TSV report for Gp0115677", + "data_object_type": "Kraken2 Taxonomic Classification", + "url": "https://data.microbiomedata.org/data/nmdc:mga0zb0766/ReadbasedAnalysis/nmdc_mga0zb0766_kraken2_classification.tsv", + "md5_checksum": "49d5d11132bd5a02c3dd077d42a6a16b", + "id": "nmdc:49d5d11132bd5a02c3dd077d42a6a16b", + "file_size_bytes": 3857487871 + }, + { + "name": "Gp0115677_Kraken2 TSV report", + "description": "Kraken2 TSV report for Gp0115677", + "data_object_type": "Kraken2 Classification Report", + "url": "https://data.microbiomedata.org/data/nmdc:mga0zb0766/ReadbasedAnalysis/nmdc_mga0zb0766_kraken2_report.tsv", + "md5_checksum": "bdd701b44e67929ec8bbe279697da937", + "id": "nmdc:bdd701b44e67929ec8bbe279697da937", + "file_size_bytes": 708598 + }, + { + "name": "Gp0115677_Kraken2 Krona HTML report", + "description": "Kraken2 Krona HTML report for Gp0115677", + "data_object_type": "Kraken2 Krona Plot", + "url": "https://data.microbiomedata.org/data/nmdc:mga0zb0766/ReadbasedAnalysis/nmdc_mga0zb0766_kraken2_krona.html", + "md5_checksum": "d35583a5ed45df5a58bf084fc67bf988", + "id": "nmdc:d35583a5ed45df5a58bf084fc67bf988", + "file_size_bytes": 4250180 + } + ] + }, + { + "_id": { + "$oid": "649b005f2ca5ee4adb139fad" + }, + "has_input": [ + "nmdc:63c857b3011dec61a08044d518291f23" + ], + "part_of": [ + "nmdc:mga0zb0766" + ], + "ctg_logsum": 407938, + "scaf_logsum": 442802, + "gap_pct": 0.02562, + "git_url": "https://github.com/microbiomedata/metaG/releases/tag/0.1", + "has_output": [ + "nmdc:3d9e14d6f7a854042a7d71def080409b", + "nmdc:26d0d64ca7c850f0e04a4c33690bd178", + "nmdc:8f8a0622cfe39054bd20f11116c78402", + "nmdc:623aa370c44897cf30844647c2f5bd94", + "nmdc:f4a1cf24281f14a666a1bfc9afc0aab5" + ], + "asm_score": 13.853, + "was_informed_by": "gold:Gp0115677", + "ctg_powsum": 50872, + "scaf_max": 582605, + "id": "nmdc:4fa91d90b1bd5f821a7f09edc8426939", + "scaf_powsum": 55815, + "execution_resource": "NERSC-Cori", + "contigs": 548764, + "name": "Assembly Activity for nmdc:mga0zb0766", + "ctg_max": 464697, + "gc_std": 0.11035, + "contig_bp": 229799767, + "gc_avg": 0.55184, + "started_at_time": "2021-10-11T02:24:49Z", + "scaf_bp": 229858665, + "type": "nmdc:MetagenomeAssembly", + "scaffolds": 543003, + "ended_at_time": "2021-10-11T06:26:42+00:00", + "ctg_l50": 375, + "ctg_l90": 283, + "ctg_n50": 171281, + "ctg_n90": 471697, + "scaf_l50": 378, + "scaf_l90": 283, + "scaf_n50": 164840, + "scaf_n90": 466121, + "scaf_l_gt50k": 2790937, + "scaf_n_gt50k": 23, + "scaf_pct_gt50k": 1.2141969, + "output_data_objects": [ + { + "name": "Gp0115677_Assembled contigs fasta", + "description": "Assembled contigs fasta for Gp0115677", + "data_object_type": "Assembly Contigs", + "url": "https://data.microbiomedata.org/data/nmdc:mga0zb0766/assembly/nmdc_mga0zb0766_contigs.fna", + "md5_checksum": "3d9e14d6f7a854042a7d71def080409b", + "id": "nmdc:3d9e14d6f7a854042a7d71def080409b", + "file_size_bytes": 250747283 + }, + { + "name": "Gp0115677_Assembled scaffold fasta", + "description": "Assembled scaffold fasta for Gp0115677", + "data_object_type": "Assembly Scaffolds", + "url": "https://data.microbiomedata.org/data/nmdc:mga0zb0766/assembly/nmdc_mga0zb0766_scaffolds.fna", + "md5_checksum": "26d0d64ca7c850f0e04a4c33690bd178", + "id": "nmdc:26d0d64ca7c850f0e04a4c33690bd178", + "file_size_bytes": 249006954 + }, + { + "name": "Gp0115677_Metagenome Contig Coverage Stats", + "description": "Metagenome Contig Coverage Stats for Gp0115677", + "url": "https://data.microbiomedata.org/data/nmdc:mga0zb0766/assembly/nmdc_mga0zb0766_covstats.txt", + "md5_checksum": "8f8a0622cfe39054bd20f11116c78402", + "id": "nmdc:8f8a0622cfe39054bd20f11116c78402", + "file_size_bytes": 43716675 + }, + { + "name": "Gp0115677_Assembled AGP file", + "description": "Assembled AGP file for Gp0115677", + "data_object_type": "Assembly AGP", + "url": "https://data.microbiomedata.org/data/nmdc:mga0zb0766/assembly/nmdc_mga0zb0766_assembly.agp", + "md5_checksum": "623aa370c44897cf30844647c2f5bd94", + "id": "nmdc:623aa370c44897cf30844647c2f5bd94", + "file_size_bytes": 41409581 + }, + { + "name": "Gp0115677_Metagenome Alignment BAM file", + "description": "Metagenome Alignment BAM file for Gp0115677", + "data_object_type": "Assembly Coverage BAM", + "url": "https://data.microbiomedata.org/data/nmdc:mga0zb0766/assembly/nmdc_mga0zb0766_pairedMapped_sorted.bam", + "md5_checksum": "f4a1cf24281f14a666a1bfc9afc0aab5", + "id": "nmdc:f4a1cf24281f14a666a1bfc9afc0aab5", + "file_size_bytes": 5828772757 + } + ] + }, + { + "_id": { + "$oid": "649b005bbf2caae0415ef9c1" + }, + "has_input": [ + "nmdc:3d9e14d6f7a854042a7d71def080409b" + ], + "part_of": [ + "nmdc:mga0zb0766" + ], + "git_url": "https://github.com/microbiomedata/metaG/releases/tag/0.1", + "has_output": [ + "nmdc:4f9d82516561ee307b1ab4841255aff0", + "nmdc:a658e9045fde900cdc78d0578446b960", + "nmdc:075c3477b8874aa8d6c4dbc1360a2b38", + "nmdc:9a338a51c6ca2ec4e0da4e15903be407", + "nmdc:0f9e627ace8d9b8420e957bcd033244a", + "nmdc:144a997b22098f5fe748d2fa069cdc71", + "nmdc:82dc44c196f4b6b5552e8360f21f93a0", + "nmdc:9238a5420065e1da9da31c270c90268a", + "nmdc:ce31f29ff8fed6d0a973d61157af7220", + "nmdc:016cbd549e03d896ed746ab91771b21a", + "nmdc:7ef0abcd7fba705f6e9e26dcb8b1da8d", + "nmdc:c935ce264779684a01c9a7777e506d02" + ], + "was_informed_by": "gold:Gp0115677", + "id": "nmdc:4fa91d90b1bd5f821a7f09edc8426939", + "execution_resource": "NERSC-Cori", + "name": "Annotation Activity for nmdc:mga0zb0766", + "started_at_time": "2021-10-11T02:24:49Z", + "type": "nmdc:MetagenomeAnnotationActivity", + "ended_at_time": "2021-10-11T06:26:42+00:00", + "output_data_objects": [ + { + "name": "Gp0115677_Protein FAA", + "description": "Protein FAA for Gp0115677", + "data_object_type": "Annotation Amino Acid FASTA", + "url": "https://data.microbiomedata.org/data/nmdc:mga0zb0766/annotation/nmdc_mga0zb0766_proteins.faa", + "md5_checksum": "4f9d82516561ee307b1ab4841255aff0", + "id": "nmdc:4f9d82516561ee307b1ab4841255aff0", + "file_size_bytes": 144603933 + }, + { + "name": "Gp0115677_Structural annotation GFF file", + "description": "Structural annotation GFF file for Gp0115677", + "data_object_type": "Structural Annotation GFF", + "url": "https://data.microbiomedata.org/data/nmdc:mga0zb0766/annotation/nmdc_mga0zb0766_structural_annotation.gff", + "md5_checksum": "a658e9045fde900cdc78d0578446b960", + "id": "nmdc:a658e9045fde900cdc78d0578446b960", + "file_size_bytes": 2546 + }, + { + "name": "Gp0115677_Functional annotation GFF file", + "description": "Functional annotation GFF file for Gp0115677", + "data_object_type": "Functional Annotation GFF", + "url": "https://data.microbiomedata.org/data/nmdc:mga0zb0766/annotation/nmdc_mga0zb0766_functional_annotation.gff", + "md5_checksum": "075c3477b8874aa8d6c4dbc1360a2b38", + "id": "nmdc:075c3477b8874aa8d6c4dbc1360a2b38", + "file_size_bytes": 167984752 + }, + { + "name": "Gp0115677_KO TSV file", + "description": "KO TSV file for Gp0115677", + "data_object_type": "Annotation KEGG Orthology", + "url": "https://data.microbiomedata.org/data/nmdc:mga0zb0766/annotation/nmdc_mga0zb0766_ko.tsv", + "md5_checksum": "9a338a51c6ca2ec4e0da4e15903be407", + "id": "nmdc:9a338a51c6ca2ec4e0da4e15903be407", + "file_size_bytes": 19341535 + }, + { + "name": "Gp0115677_EC TSV file", + "description": "EC TSV file for Gp0115677", + "data_object_type": "Annotation Enzyme Commission", + "url": "https://data.microbiomedata.org/data/nmdc:mga0zb0766/annotation/nmdc_mga0zb0766_ec.tsv", + "md5_checksum": "0f9e627ace8d9b8420e957bcd033244a", + "id": "nmdc:0f9e627ace8d9b8420e957bcd033244a", + "file_size_bytes": 12533246 + }, + { + "name": "Gp0115677_COG GFF file", + "description": "COG GFF file for Gp0115677", + "url": "https://data.microbiomedata.org/data/nmdc:mga0zb0766/annotation/nmdc_mga0zb0766_cog.gff", + "md5_checksum": "144a997b22098f5fe748d2fa069cdc71", + "id": "nmdc:144a997b22098f5fe748d2fa069cdc71", + "file_size_bytes": 85841510 + }, + { + "name": "Gp0115677_PFAM GFF file", + "description": "PFAM GFF file for Gp0115677", + "url": "https://data.microbiomedata.org/data/nmdc:mga0zb0766/annotation/nmdc_mga0zb0766_pfam.gff", + "md5_checksum": "82dc44c196f4b6b5552e8360f21f93a0", + "id": "nmdc:82dc44c196f4b6b5552e8360f21f93a0", + "file_size_bytes": 64139943 + }, + { + "name": "Gp0115677_TigrFam GFF file", + "description": "TigrFam GFF file for Gp0115677", + "url": "https://data.microbiomedata.org/data/nmdc:mga0zb0766/annotation/nmdc_mga0zb0766_tigrfam.gff", + "md5_checksum": "9238a5420065e1da9da31c270c90268a", + "id": "nmdc:9238a5420065e1da9da31c270c90268a", + "file_size_bytes": 7585101 + }, + { + "name": "Gp0115677_SMART GFF file", + "description": "SMART GFF file for Gp0115677", + "url": "https://data.microbiomedata.org/data/nmdc:mga0zb0766/annotation/nmdc_mga0zb0766_smart.gff", + "md5_checksum": "ce31f29ff8fed6d0a973d61157af7220", + "id": "nmdc:ce31f29ff8fed6d0a973d61157af7220", + "file_size_bytes": 18353478 + }, + { + "name": "Gp0115677_SuperFam GFF file", + "description": "SuperFam GFF file for Gp0115677", + "url": "https://data.microbiomedata.org/data/nmdc:mga0zb0766/annotation/nmdc_mga0zb0766_supfam.gff", + "md5_checksum": "016cbd549e03d896ed746ab91771b21a", + "id": "nmdc:016cbd549e03d896ed746ab91771b21a", + "file_size_bytes": 107179327 + }, + { + "name": "Gp0115677_Cath FunFam GFF file", + "description": "Cath FunFam GFF file for Gp0115677", + "url": "https://data.microbiomedata.org/data/nmdc:mga0zb0766/annotation/nmdc_mga0zb0766_cath_funfam.gff", + "md5_checksum": "7ef0abcd7fba705f6e9e26dcb8b1da8d", + "id": "nmdc:7ef0abcd7fba705f6e9e26dcb8b1da8d", + "file_size_bytes": 85056001 + }, + { + "name": "Gp0115677_KO_EC GFF file", + "description": "KO_EC GFF file for Gp0115677", + "url": "https://data.microbiomedata.org/data/nmdc:mga0zb0766/annotation/nmdc_mga0zb0766_ko_ec.gff", + "md5_checksum": "c935ce264779684a01c9a7777e506d02", + "id": "nmdc:c935ce264779684a01c9a7777e506d02", + "file_size_bytes": 61547317 + } + ] + }, + { + "_id": { + "$oid": "649b0052ec087f6bbab34724" + }, + "has_input": [ + "nmdc:3d9e14d6f7a854042a7d71def080409b", + "nmdc:f4a1cf24281f14a666a1bfc9afc0aab5", + "nmdc:075c3477b8874aa8d6c4dbc1360a2b38" + ], + "too_short_contig_num": 532333, + "part_of": [ + "nmdc:mga0zb0766" + ], + "binned_contig_num": 969, + "git_url": "https://github.com/microbiomedata/metaG/releases/tag/0.1", + "has_output": [ + "nmdc:d41d8cd98f00b204e9800998ecf8427e", + "nmdc:603009bd6294d2318d929a57b5d3e5d3", + "nmdc:c5334a4e305f78c294c304c3c0526826", + "nmdc:6adacc1ba06e5e451f3636c394c71ae8", + "nmdc:77d4e2a7f358b9ac1d53b69d7e8c45e1", + "nmdc:42c3fb9a3906f6b413f99e3276bb7550" + ], + "was_informed_by": "gold:Gp0115677", + "input_contig_num": 548756, + "id": "nmdc:4fa91d90b1bd5f821a7f09edc8426939", + "execution_resource": "NERSC-Cori", + "name": "MAGs Analysis Activity for nmdc:mga0zb0766", + "mags_list": [ + { + "number_of_contig": 68, + "completeness": 3.17, + "bin_name": "bins.1", + "gene_count": 329, + "bin_quality": "LQ", + "gtdbtk_species": "", + "gtdbtk_order": "", + "num_16s": 0, + "gtdbtk_family": "", + "gtdbtk_domain": "", + "contamination": 0.0, + "gtdbtk_class": "", + "gtdbtk_phylum": "", + "num_5s": 0, + "num_23s": 0, + "gtdbtk_genus": "", + "num_t_rna": 2 + }, + { + "number_of_contig": 282, + "completeness": 59.56, + "bin_name": "bins.2", + "gene_count": 1735, + "bin_quality": "MQ", + "gtdbtk_species": "UBA5335 sp002862435", + "gtdbtk_order": "UBA5335", + "num_16s": 0, + "gtdbtk_family": "UBA5335", + "gtdbtk_domain": "Bacteria", + "contamination": 0.0, + "gtdbtk_class": "Gammaproteobacteria", + "gtdbtk_phylum": "Proteobacteria", + "num_5s": 0, + "num_23s": 0, + "gtdbtk_genus": "UBA5335", + "num_t_rna": 26 + }, + { + "number_of_contig": 3, + "completeness": 54.6, + "bin_name": "bins.3", + "gene_count": 751, + "bin_quality": "MQ", + "gtdbtk_species": "", + "gtdbtk_order": "UBA9983_A", + "num_16s": 1, + "gtdbtk_family": "UBA2163", + "gtdbtk_domain": "Bacteria", + "contamination": 1.72, + "gtdbtk_class": "Paceibacteria", + "gtdbtk_phylum": "Patescibacteria", + "num_5s": 1, + "num_23s": 1, + "gtdbtk_genus": "1-14-0-10-47-16", + "num_t_rna": 22 + }, + { + "number_of_contig": 90, + "completeness": 98.7, + "bin_name": "bins.4", + "gene_count": 3042, + "bin_quality": "MQ", + "gtdbtk_species": "", + "gtdbtk_order": "UBA11222", + "num_16s": 0, + "gtdbtk_family": "UBA11222", + "gtdbtk_domain": "Bacteria", + "contamination": 0.0, + "gtdbtk_class": "Alphaproteobacteria", + "gtdbtk_phylum": "Proteobacteria", + "num_5s": 2, + "num_23s": 2, + "gtdbtk_genus": "UBA11222", + "num_t_rna": 46 + }, + { + "number_of_contig": 325, + "completeness": 73.34, + "bin_name": "bins.5", + "gene_count": 2576, + "bin_quality": "MQ", + "gtdbtk_species": "", + "gtdbtk_order": "UBA11222", + "num_16s": 1, + "gtdbtk_family": "UBA11222", + "gtdbtk_domain": "Bacteria", + "contamination": 0.91, + "gtdbtk_class": "Alphaproteobacteria", + "gtdbtk_phylum": "Proteobacteria", + "num_5s": 0, + "num_23s": 0, + "gtdbtk_genus": "UBA11222", + "num_t_rna": 35 + }, + { + "number_of_contig": 199, + "completeness": 49.14, + "bin_name": "bins.6", + "gene_count": 1046, + "bin_quality": "LQ", + "gtdbtk_species": "", + "gtdbtk_order": "", + "num_16s": 0, + "gtdbtk_family": "", + "gtdbtk_domain": "", + "contamination": 0.0, + "gtdbtk_class": "", + "gtdbtk_phylum": "", + "num_5s": 2, + "num_23s": 2, + "gtdbtk_genus": "", + "num_t_rna": 21 + }, + { + "number_of_contig": 2, + "completeness": 24.32, + "bin_name": "bins.7", + "gene_count": 329, + "bin_quality": "LQ", + "gtdbtk_species": "", + "gtdbtk_order": "", + "num_16s": 0, + "gtdbtk_family": "", + "gtdbtk_domain": "", + "contamination": 0.0, + "gtdbtk_class": "", + "gtdbtk_phylum": "", + "num_5s": 0, + "num_23s": 0, + "gtdbtk_genus": "", + "num_t_rna": 16 + } + ], + "unbinned_contig_num": 15454, + "started_at_time": "2021-10-11T02:24:49Z", + "type": "nmdc:MAGsAnalysisActivity", + "ended_at_time": "2021-10-11T06:26:42+00:00", + "output_data_objects": [ + { + "name": "gold:Gp0452679_metabat2 bin checkm quality assessment result", + "description": "metabat2 bin checkm quality assessment result for gold:Gp0452679", + "data_object_type": "CheckM Statistics", + "url": "https://data.microbiomedata.org/data/nmdc:mga0fsjy84/MAGs/nmdc_mga0fsjy84_checkm_qa.out", + "md5_checksum": "d41d8cd98f00b204e9800998ecf8427e", + "id": "nmdc:d41d8cd98f00b204e9800998ecf8427e", + "file_size_bytes": 0 + }, + { + "name": "Gp0115677_tooShort (< 3kb) filtered contigs fasta file by metaBat2", + "description": "tooShort (< 3kb) filtered contigs fasta file by metaBat2 for Gp0115677", + "url": "https://data.microbiomedata.org/data/nmdc:mga0zb0766/MAGs/nmdc_mga0zb0766_bins.tooShort.fa", + "md5_checksum": "603009bd6294d2318d929a57b5d3e5d3", + "id": "nmdc:603009bd6294d2318d929a57b5d3e5d3", + "file_size_bytes": 215021876 + }, + { + "name": "Gp0115677_unbinned fasta file from metabat2", + "description": "unbinned fasta file from metabat2 for Gp0115677", + "url": "https://data.microbiomedata.org/data/nmdc:mga0zb0766/MAGs/nmdc_mga0zb0766_bins.unbinned.fa", + "md5_checksum": "c5334a4e305f78c294c304c3c0526826", + "id": "nmdc:c5334a4e305f78c294c304c3c0526826", + "file_size_bytes": 26658018 + }, + { + "name": "Gp0115677_metabat2 bin checkm quality assessment result", + "description": "metabat2 bin checkm quality assessment result for Gp0115677", + "data_object_type": "CheckM Statistics", + "url": "https://data.microbiomedata.org/data/nmdc:mga0zb0766/MAGs/nmdc_mga0zb0766_checkm_qa.out", + "md5_checksum": "6adacc1ba06e5e451f3636c394c71ae8", + "id": "nmdc:6adacc1ba06e5e451f3636c394c71ae8", + "file_size_bytes": 1859 + }, + { + "name": "Gp0115677_high-quality and medium-quality bins", + "description": "high-quality and medium-quality bins for Gp0115677", + "data_object_type": "Metagenome Bins", + "url": "https://data.microbiomedata.org/data/nmdc:mga0zb0766/MAGs/nmdc_mga0zb0766_hqmq_bin.zip", + "md5_checksum": "77d4e2a7f358b9ac1d53b69d7e8c45e1", + "id": "nmdc:77d4e2a7f358b9ac1d53b69d7e8c45e1", + "file_size_bytes": 2309404 + }, + { + "name": "Gp0115677_metabat2 bins", + "description": "metabat2 bins for Gp0115677", + "url": "https://data.microbiomedata.org/data/nmdc:mga0zb0766/MAGs/nmdc_mga0zb0766_metabat_bin.zip", + "md5_checksum": "42c3fb9a3906f6b413f99e3276bb7550", + "id": "nmdc:42c3fb9a3906f6b413f99e3276bb7550", + "file_size_bytes": 450699 + } + ] + } + ] + }, + { + "_id": { + "$oid": "649b009773e8249959349b50" + }, + "id": "nmdc:omprc-11-jk7zjz92", + "name": "Sand microcosm microbial communities from a hyporheic zone in Columbia River, Washington, USA- GW-RW T4_22-July-14", + "description": "Sterilized sand packs were incubated back in the ground and collected at time point T4.", + "has_input": [ + "nmdc:bsm-11-a5d23e19" + ], + "has_output": [ + "jgi:55d817f30d8785342fcf826d" + ], + "part_of": [ + "nmdc:sty-11-aygzgv51" + ], + "add_date": "2015-05-28", + "mod_date": "2021-06-15", + "ncbi_project_name": "Sand microcosm microbial communities from a hyporheic zone in Columbia River, Washington, USA- GW-RW T4_22-July-14", + "omics_type": { + "has_raw_value": "Metagenome" + }, + "principal_investigator": { + "has_raw_value": "James Stegen" + }, + "processing_institution": "JGI", + "type": "nmdc:OmicsProcessing", + "gold_sequencing_project_identifiers": [ + "gold:Gp0115675" + ], + "downstream_workflow_activity_records": [ + { + "_id": { + "$oid": "649b009d6bdd4fd20273c87d" + }, + "has_input": [ + "nmdc:4a9a0183b794a98c57e5b5ce959a3f65" + ], + "part_of": [ + "nmdc:mga0vf2h47" + ], + "git_url": "https://github.com/microbiomedata/metaG/releases/tag/0.1", + "has_output": [ + "nmdc:54e3a71218d04224719e0dc8a7fdf9c7", + "nmdc:2507e3f107100ce0c72c57191d450818" + ], + "was_informed_by": "gold:Gp0115675", + "input_read_count": 18827380, + "output_read_bases": 2508839784, + "id": "nmdc:4cfac32b9cd7a8dd01d49117e1078a79", + "execution_resource": "NERSC-Cori", + "input_read_bases": 2842934380, + "name": "Read QC Activity for nmdc:mga0vf2h47", + "output_read_count": 16749572, + "started_at_time": "2021-10-11T02:28:05Z", + "type": "nmdc:ReadQCAnalysisActivity", + "ended_at_time": "2021-10-11T03:25:21+00:00", + "output_data_objects": [ + { + "name": "Gp0115675_Filtered Reads", + "description": "Filtered Reads for Gp0115675", + "data_object_type": "Filtered Sequencing Reads", + "url": "https://data.microbiomedata.org/data/nmdc:mga0vf2h47/qa/nmdc_mga0vf2h47_filtered.fastq.gz", + "md5_checksum": "54e3a71218d04224719e0dc8a7fdf9c7", + "id": "nmdc:54e3a71218d04224719e0dc8a7fdf9c7", + "file_size_bytes": 1533239347 + }, + { + "name": "Gp0115675_Filtered Stats", + "description": "Filtered Stats for Gp0115675", + "data_object_type": "QC Statistics", + "url": "https://data.microbiomedata.org/data/nmdc:mga0vf2h47/qa/nmdc_mga0vf2h47_filterStats.txt", + "md5_checksum": "2507e3f107100ce0c72c57191d450818", + "id": "nmdc:2507e3f107100ce0c72c57191d450818", + "file_size_bytes": 287 + } + ] + }, + { + "_id": { + "$oid": "649b009bff710ae353f8cf41" + }, + "has_input": [ + "nmdc:54e3a71218d04224719e0dc8a7fdf9c7" + ], + "git_url": "https://github.com/microbiomedata/metaG/releases/tag/0.1", + "has_output": [ + "nmdc:60d673988c4f4447feb5985e8501e914", + "nmdc:a8f93ed13033eb949109b4e83980a893", + "nmdc:31dd6eb616f1e9815778453ab1601195", + "nmdc:6d7a930d79f220b06cde8fbf8339e744", + "nmdc:0aaac507db0e29827e1c87df47324932", + "nmdc:6aec8677139ed24ef9cfe0c75b30056f", + "nmdc:d39369f32ada967d7cf52cb503fccf4a", + "nmdc:1ec0247d86889fcef13f39a58a92b066", + "nmdc:242a1c60f6cb14ba8430375171fda436" + ], + "was_informed_by": "gold:Gp0115675", + "id": "nmdc:4cfac32b9cd7a8dd01d49117e1078a79", + "execution_resource": "NERSC-Cori", + "name": "ReadBased Analysis Activity for nmdc:mga0vf2h47", + "started_at_time": "2021-10-11T02:28:05Z", + "type": "nmdc:ReadbasedAnalysis", + "ended_at_time": "2021-10-11T03:25:21+00:00", + "output_data_objects": [ + { + "name": "Gp0115675_Gottcha2 TSV report", + "description": "Gottcha2 TSV report for Gp0115675", + "url": "https://data.microbiomedata.org/data/nmdc:mga0vf2h47/ReadbasedAnalysis/nmdc_mga0vf2h47_gottcha2_report.tsv", + "md5_checksum": "60d673988c4f4447feb5985e8501e914", + "id": "nmdc:60d673988c4f4447feb5985e8501e914", + "file_size_bytes": 8921 + }, + { + "name": "Gp0115675_Gottcha2 full TSV report", + "description": "Gottcha2 full TSV report for Gp0115675", + "url": "https://data.microbiomedata.org/data/nmdc:mga0vf2h47/ReadbasedAnalysis/nmdc_mga0vf2h47_gottcha2_report_full.tsv", + "md5_checksum": "a8f93ed13033eb949109b4e83980a893", + "id": "nmdc:a8f93ed13033eb949109b4e83980a893", + "file_size_bytes": 871109 + }, + { + "name": "Gp0115675_Gottcha2 Krona HTML report", + "description": "Gottcha2 Krona HTML report for Gp0115675", + "data_object_type": "GOTTCHA2 Krona Plot", + "url": "https://data.microbiomedata.org/data/nmdc:mga0vf2h47/ReadbasedAnalysis/nmdc_mga0vf2h47_gottcha2_krona.html", + "md5_checksum": "31dd6eb616f1e9815778453ab1601195", + "id": "nmdc:31dd6eb616f1e9815778453ab1601195", + "file_size_bytes": 252578 + }, + { + "name": "Gp0115675_Centrifuge classification TSV report", + "description": "Centrifuge classification TSV report for Gp0115675", + "data_object_type": "Centrifuge Taxonomic Classification", + "url": "https://data.microbiomedata.org/data/nmdc:mga0vf2h47/ReadbasedAnalysis/nmdc_mga0vf2h47_centrifuge_classification.tsv", + "md5_checksum": "6d7a930d79f220b06cde8fbf8339e744", + "id": "nmdc:6d7a930d79f220b06cde8fbf8339e744", + "file_size_bytes": 1218767711 + }, + { + "name": "Gp0115675_Centrifuge TSV report", + "description": "Centrifuge TSV report for Gp0115675", + "data_object_type": "Centrifuge Classification Report", + "url": "https://data.microbiomedata.org/data/nmdc:mga0vf2h47/ReadbasedAnalysis/nmdc_mga0vf2h47_centrifuge_report.tsv", + "md5_checksum": "0aaac507db0e29827e1c87df47324932", + "id": "nmdc:0aaac507db0e29827e1c87df47324932", + "file_size_bytes": 254260 + }, + { + "name": "Gp0115675_Centrifuge Krona HTML report", + "description": "Centrifuge Krona HTML report for Gp0115675", + "data_object_type": "Centrifuge Krona Plot", + "url": "https://data.microbiomedata.org/data/nmdc:mga0vf2h47/ReadbasedAnalysis/nmdc_mga0vf2h47_centrifuge_krona.html", + "md5_checksum": "6aec8677139ed24ef9cfe0c75b30056f", + "id": "nmdc:6aec8677139ed24ef9cfe0c75b30056f", + "file_size_bytes": 2324387 + }, + { + "name": "Gp0115675_Kraken classification TSV report", + "description": "Kraken classification TSV report for Gp0115675", + "data_object_type": "Kraken2 Taxonomic Classification", + "url": "https://data.microbiomedata.org/data/nmdc:mga0vf2h47/ReadbasedAnalysis/nmdc_mga0vf2h47_kraken2_classification.tsv", + "md5_checksum": "d39369f32ada967d7cf52cb503fccf4a", + "id": "nmdc:d39369f32ada967d7cf52cb503fccf4a", + "file_size_bytes": 1001846607 + }, + { + "name": "Gp0115675_Kraken2 TSV report", + "description": "Kraken2 TSV report for Gp0115675", + "data_object_type": "Kraken2 Classification Report", + "url": "https://data.microbiomedata.org/data/nmdc:mga0vf2h47/ReadbasedAnalysis/nmdc_mga0vf2h47_kraken2_report.tsv", + "md5_checksum": "1ec0247d86889fcef13f39a58a92b066", + "id": "nmdc:1ec0247d86889fcef13f39a58a92b066", + "file_size_bytes": 635541 + }, + { + "name": "Gp0115675_Kraken2 Krona HTML report", + "description": "Kraken2 Krona HTML report for Gp0115675", + "data_object_type": "Kraken2 Krona Plot", + "url": "https://data.microbiomedata.org/data/nmdc:mga0vf2h47/ReadbasedAnalysis/nmdc_mga0vf2h47_kraken2_krona.html", + "md5_checksum": "242a1c60f6cb14ba8430375171fda436", + "id": "nmdc:242a1c60f6cb14ba8430375171fda436", + "file_size_bytes": 3968420 + } + ] + }, + { + "_id": { + "$oid": "61e719dc833bcf838a7014d6" + }, + "has_input": [ + "nmdc:54e3a71218d04224719e0dc8a7fdf9c7" + ], + "part_of": [ + "nmdc:mga0vf2h47" + ], + "git_url": "https://github.com/microbiomedata/metaG/releases/tag/0.1", + "has_output": [ + "nmdc:60d673988c4f4447feb5985e8501e914", + "nmdc:a8f93ed13033eb949109b4e83980a893", + "nmdc:31dd6eb616f1e9815778453ab1601195", + "nmdc:6d7a930d79f220b06cde8fbf8339e744", + "nmdc:0aaac507db0e29827e1c87df47324932", + "nmdc:6aec8677139ed24ef9cfe0c75b30056f", + "nmdc:d39369f32ada967d7cf52cb503fccf4a", + "nmdc:1ec0247d86889fcef13f39a58a92b066", + "nmdc:242a1c60f6cb14ba8430375171fda436" + ], + "was_informed_by": "gold:Gp0115675", + "id": "nmdc:4cfac32b9cd7a8dd01d49117e1078a79", + "execution_resource": "NERSC-Cori", + "name": "ReadBased Analysis Activity for nmdc:mga0vf2h47", + "started_at_time": "2021-10-11T02:28:05Z", + "type": "nmdc:ReadbasedAnalysis", + "ended_at_time": "2021-10-11T03:25:21+00:00", + "output_data_objects": [ + { + "name": "Gp0115675_Gottcha2 TSV report", + "description": "Gottcha2 TSV report for Gp0115675", + "url": "https://data.microbiomedata.org/data/nmdc:mga0vf2h47/ReadbasedAnalysis/nmdc_mga0vf2h47_gottcha2_report.tsv", + "md5_checksum": "60d673988c4f4447feb5985e8501e914", + "id": "nmdc:60d673988c4f4447feb5985e8501e914", + "file_size_bytes": 8921 + }, + { + "name": "Gp0115675_Gottcha2 full TSV report", + "description": "Gottcha2 full TSV report for Gp0115675", + "url": "https://data.microbiomedata.org/data/nmdc:mga0vf2h47/ReadbasedAnalysis/nmdc_mga0vf2h47_gottcha2_report_full.tsv", + "md5_checksum": "a8f93ed13033eb949109b4e83980a893", + "id": "nmdc:a8f93ed13033eb949109b4e83980a893", + "file_size_bytes": 871109 + }, + { + "name": "Gp0115675_Gottcha2 Krona HTML report", + "description": "Gottcha2 Krona HTML report for Gp0115675", + "data_object_type": "GOTTCHA2 Krona Plot", + "url": "https://data.microbiomedata.org/data/nmdc:mga0vf2h47/ReadbasedAnalysis/nmdc_mga0vf2h47_gottcha2_krona.html", + "md5_checksum": "31dd6eb616f1e9815778453ab1601195", + "id": "nmdc:31dd6eb616f1e9815778453ab1601195", + "file_size_bytes": 252578 + }, + { + "name": "Gp0115675_Centrifuge classification TSV report", + "description": "Centrifuge classification TSV report for Gp0115675", + "data_object_type": "Centrifuge Taxonomic Classification", + "url": "https://data.microbiomedata.org/data/nmdc:mga0vf2h47/ReadbasedAnalysis/nmdc_mga0vf2h47_centrifuge_classification.tsv", + "md5_checksum": "6d7a930d79f220b06cde8fbf8339e744", + "id": "nmdc:6d7a930d79f220b06cde8fbf8339e744", + "file_size_bytes": 1218767711 + }, + { + "name": "Gp0115675_Centrifuge TSV report", + "description": "Centrifuge TSV report for Gp0115675", + "data_object_type": "Centrifuge Classification Report", + "url": "https://data.microbiomedata.org/data/nmdc:mga0vf2h47/ReadbasedAnalysis/nmdc_mga0vf2h47_centrifuge_report.tsv", + "md5_checksum": "0aaac507db0e29827e1c87df47324932", + "id": "nmdc:0aaac507db0e29827e1c87df47324932", + "file_size_bytes": 254260 + }, + { + "name": "Gp0115675_Centrifuge Krona HTML report", + "description": "Centrifuge Krona HTML report for Gp0115675", + "data_object_type": "Centrifuge Krona Plot", + "url": "https://data.microbiomedata.org/data/nmdc:mga0vf2h47/ReadbasedAnalysis/nmdc_mga0vf2h47_centrifuge_krona.html", + "md5_checksum": "6aec8677139ed24ef9cfe0c75b30056f", + "id": "nmdc:6aec8677139ed24ef9cfe0c75b30056f", + "file_size_bytes": 2324387 + }, + { + "name": "Gp0115675_Kraken classification TSV report", + "description": "Kraken classification TSV report for Gp0115675", + "data_object_type": "Kraken2 Taxonomic Classification", + "url": "https://data.microbiomedata.org/data/nmdc:mga0vf2h47/ReadbasedAnalysis/nmdc_mga0vf2h47_kraken2_classification.tsv", + "md5_checksum": "d39369f32ada967d7cf52cb503fccf4a", + "id": "nmdc:d39369f32ada967d7cf52cb503fccf4a", + "file_size_bytes": 1001846607 + }, + { + "name": "Gp0115675_Kraken2 TSV report", + "description": "Kraken2 TSV report for Gp0115675", + "data_object_type": "Kraken2 Classification Report", + "url": "https://data.microbiomedata.org/data/nmdc:mga0vf2h47/ReadbasedAnalysis/nmdc_mga0vf2h47_kraken2_report.tsv", + "md5_checksum": "1ec0247d86889fcef13f39a58a92b066", + "id": "nmdc:1ec0247d86889fcef13f39a58a92b066", + "file_size_bytes": 635541 + }, + { + "name": "Gp0115675_Kraken2 Krona HTML report", + "description": "Kraken2 Krona HTML report for Gp0115675", + "data_object_type": "Kraken2 Krona Plot", + "url": "https://data.microbiomedata.org/data/nmdc:mga0vf2h47/ReadbasedAnalysis/nmdc_mga0vf2h47_kraken2_krona.html", + "md5_checksum": "242a1c60f6cb14ba8430375171fda436", + "id": "nmdc:242a1c60f6cb14ba8430375171fda436", + "file_size_bytes": 3968420 + } + ] + }, + { + "_id": { + "$oid": "649b005f2ca5ee4adb139faf" + }, + "has_input": [ + "nmdc:54e3a71218d04224719e0dc8a7fdf9c7" + ], + "part_of": [ + "nmdc:mga0vf2h47" + ], + "ctg_logsum": 115425, + "scaf_logsum": 116377, + "gap_pct": 0.00425, + "git_url": "https://github.com/microbiomedata/metaG/releases/tag/0.1", + "has_output": [ + "nmdc:dd5cad9348fc41cb18ac989185fed0b5", + "nmdc:6d02084941141ac9a1876c621a50aef0", + "nmdc:cc8faed3494579d793c08ede54cb5b3a", + "nmdc:8891e46c9766f2b84d45fd6e46078a64", + "nmdc:80470769e7531b46c709d12c65487ffe" + ], + "asm_score": 4.718, + "was_informed_by": "gold:Gp0115675", + "ctg_powsum": 13174, + "scaf_max": 25635, + "id": "nmdc:4cfac32b9cd7a8dd01d49117e1078a79", + "scaf_powsum": 13311, + "execution_resource": "NERSC-Cori", + "contigs": 80858, + "name": "Assembly Activity for nmdc:mga0vf2h47", + "ctg_max": 25635, + "gc_std": 0.10716, + "contig_bp": 38571486, + "gc_avg": 0.56103, + "started_at_time": "2021-10-11T02:28:05Z", + "scaf_bp": 38573126, + "type": "nmdc:MetagenomeAssembly", + "scaffolds": 80703, + "ended_at_time": "2021-10-11T03:25:21+00:00", + "ctg_l50": 435, + "ctg_l90": 284, + "ctg_n50": 19932, + "ctg_n90": 68422, + "scaf_l50": 436, + "scaf_l90": 284, + "scaf_n50": 19754, + "scaf_n90": 68272, + "output_data_objects": [ + { + "name": "Gp0115675_Assembled contigs fasta", + "description": "Assembled contigs fasta for Gp0115675", + "data_object_type": "Assembly Contigs", + "url": "https://data.microbiomedata.org/data/nmdc:mga0vf2h47/assembly/nmdc_mga0vf2h47_contigs.fna", + "md5_checksum": "dd5cad9348fc41cb18ac989185fed0b5", + "id": "nmdc:dd5cad9348fc41cb18ac989185fed0b5", + "file_size_bytes": 41662357 + }, + { + "name": "Gp0115675_Assembled scaffold fasta", + "description": "Assembled scaffold fasta for Gp0115675", + "data_object_type": "Assembly Scaffolds", + "url": "https://data.microbiomedata.org/data/nmdc:mga0vf2h47/assembly/nmdc_mga0vf2h47_scaffolds.fna", + "md5_checksum": "6d02084941141ac9a1876c621a50aef0", + "id": "nmdc:6d02084941141ac9a1876c621a50aef0", + "file_size_bytes": 41417652 + }, + { + "name": "Gp0115675_Metagenome Contig Coverage Stats", + "description": "Metagenome Contig Coverage Stats for Gp0115675", + "url": "https://data.microbiomedata.org/data/nmdc:mga0vf2h47/assembly/nmdc_mga0vf2h47_covstats.txt", + "md5_checksum": "cc8faed3494579d793c08ede54cb5b3a", + "id": "nmdc:cc8faed3494579d793c08ede54cb5b3a", + "file_size_bytes": 6338871 + }, + { + "name": "Gp0115675_Assembled AGP file", + "description": "Assembled AGP file for Gp0115675", + "data_object_type": "Assembly AGP", + "url": "https://data.microbiomedata.org/data/nmdc:mga0vf2h47/assembly/nmdc_mga0vf2h47_assembly.agp", + "md5_checksum": "8891e46c9766f2b84d45fd6e46078a64", + "id": "nmdc:8891e46c9766f2b84d45fd6e46078a64", + "file_size_bytes": 5901316 + }, + { + "name": "Gp0115675_Metagenome Alignment BAM file", + "description": "Metagenome Alignment BAM file for Gp0115675", + "data_object_type": "Assembly Coverage BAM", + "url": "https://data.microbiomedata.org/data/nmdc:mga0vf2h47/assembly/nmdc_mga0vf2h47_pairedMapped_sorted.bam", + "md5_checksum": "80470769e7531b46c709d12c65487ffe", + "id": "nmdc:80470769e7531b46c709d12c65487ffe", + "file_size_bytes": 1635169657 + } + ] + }, + { + "_id": { + "$oid": "649b005bbf2caae0415ef9c2" + }, + "has_input": [ + "nmdc:dd5cad9348fc41cb18ac989185fed0b5" + ], + "part_of": [ + "nmdc:mga0vf2h47" + ], + "git_url": "https://github.com/microbiomedata/metaG/releases/tag/0.1", + "has_output": [ + "nmdc:93ea50ce57263b498b781240c04dbf46", + "nmdc:71195b9bc697bf29cd865718a689eb1b", + "nmdc:d8cccd9c5cd237c238e5ba443c477db5", + "nmdc:1cb17c4c7681345f53a7f4ef5c319fba", + "nmdc:17e386be26f52833c463a89733ef2e34", + "nmdc:3e9b2fd11f2f5c16f9f25560e3b6fc55", + "nmdc:b11e36753299e36fa92670cf75165698", + "nmdc:70ac1de5fbc6cc835d5a0d1855f7a28a", + "nmdc:b9e3eb74fa7fee0fac886f8a436b9ecf", + "nmdc:faa27c2be6dc56e66f739dbffcbb6bef", + "nmdc:b080e9d168c0c1330fda64814afe335b", + "nmdc:4ea799de0bc051409b7231801eea0129" + ], + "was_informed_by": "gold:Gp0115675", + "id": "nmdc:4cfac32b9cd7a8dd01d49117e1078a79", + "execution_resource": "NERSC-Cori", + "name": "Annotation Activity for nmdc:mga0vf2h47", + "started_at_time": "2021-10-11T02:28:05Z", + "type": "nmdc:MetagenomeAnnotationActivity", + "ended_at_time": "2021-10-11T03:25:21+00:00", + "output_data_objects": [ + { + "name": "Gp0115675_Protein FAA", + "description": "Protein FAA for Gp0115675", + "data_object_type": "Annotation Amino Acid FASTA", + "url": "https://data.microbiomedata.org/data/nmdc:mga0vf2h47/annotation/nmdc_mga0vf2h47_proteins.faa", + "md5_checksum": "93ea50ce57263b498b781240c04dbf46", + "id": "nmdc:93ea50ce57263b498b781240c04dbf46", + "file_size_bytes": 23383485 + }, + { + "name": "Gp0115675_Structural annotation GFF file", + "description": "Structural annotation GFF file for Gp0115675", + "data_object_type": "Structural Annotation GFF", + "url": "https://data.microbiomedata.org/data/nmdc:mga0vf2h47/annotation/nmdc_mga0vf2h47_structural_annotation.gff", + "md5_checksum": "71195b9bc697bf29cd865718a689eb1b", + "id": "nmdc:71195b9bc697bf29cd865718a689eb1b", + "file_size_bytes": 2508 + }, + { + "name": "Gp0115675_Functional annotation GFF file", + "description": "Functional annotation GFF file for Gp0115675", + "data_object_type": "Functional Annotation GFF", + "url": "https://data.microbiomedata.org/data/nmdc:mga0vf2h47/annotation/nmdc_mga0vf2h47_functional_annotation.gff", + "md5_checksum": "d8cccd9c5cd237c238e5ba443c477db5", + "id": "nmdc:d8cccd9c5cd237c238e5ba443c477db5", + "file_size_bytes": 26575202 + }, + { + "name": "Gp0115675_KO TSV file", + "description": "KO TSV file for Gp0115675", + "data_object_type": "Annotation KEGG Orthology", + "url": "https://data.microbiomedata.org/data/nmdc:mga0vf2h47/annotation/nmdc_mga0vf2h47_ko.tsv", + "md5_checksum": "1cb17c4c7681345f53a7f4ef5c319fba", + "id": "nmdc:1cb17c4c7681345f53a7f4ef5c319fba", + "file_size_bytes": 3577030 + }, + { + "name": "Gp0115675_EC TSV file", + "description": "EC TSV file for Gp0115675", + "data_object_type": "Annotation Enzyme Commission", + "url": "https://data.microbiomedata.org/data/nmdc:mga0vf2h47/annotation/nmdc_mga0vf2h47_ec.tsv", + "md5_checksum": "17e386be26f52833c463a89733ef2e34", + "id": "nmdc:17e386be26f52833c463a89733ef2e34", + "file_size_bytes": 2294485 + }, + { + "name": "Gp0115675_COG GFF file", + "description": "COG GFF file for Gp0115675", + "url": "https://data.microbiomedata.org/data/nmdc:mga0vf2h47/annotation/nmdc_mga0vf2h47_cog.gff", + "md5_checksum": "3e9b2fd11f2f5c16f9f25560e3b6fc55", + "id": "nmdc:3e9b2fd11f2f5c16f9f25560e3b6fc55", + "file_size_bytes": 15181628 + }, + { + "name": "Gp0115675_PFAM GFF file", + "description": "PFAM GFF file for Gp0115675", + "url": "https://data.microbiomedata.org/data/nmdc:mga0vf2h47/annotation/nmdc_mga0vf2h47_pfam.gff", + "md5_checksum": "b11e36753299e36fa92670cf75165698", + "id": "nmdc:b11e36753299e36fa92670cf75165698", + "file_size_bytes": 11905020 + }, + { + "name": "Gp0115675_TigrFam GFF file", + "description": "TigrFam GFF file for Gp0115675", + "url": "https://data.microbiomedata.org/data/nmdc:mga0vf2h47/annotation/nmdc_mga0vf2h47_tigrfam.gff", + "md5_checksum": "70ac1de5fbc6cc835d5a0d1855f7a28a", + "id": "nmdc:70ac1de5fbc6cc835d5a0d1855f7a28a", + "file_size_bytes": 1629352 + }, + { + "name": "Gp0115675_SMART GFF file", + "description": "SMART GFF file for Gp0115675", + "url": "https://data.microbiomedata.org/data/nmdc:mga0vf2h47/annotation/nmdc_mga0vf2h47_smart.gff", + "md5_checksum": "b9e3eb74fa7fee0fac886f8a436b9ecf", + "id": "nmdc:b9e3eb74fa7fee0fac886f8a436b9ecf", + "file_size_bytes": 3360419 + }, + { + "name": "Gp0115675_SuperFam GFF file", + "description": "SuperFam GFF file for Gp0115675", + "url": "https://data.microbiomedata.org/data/nmdc:mga0vf2h47/annotation/nmdc_mga0vf2h47_supfam.gff", + "md5_checksum": "faa27c2be6dc56e66f739dbffcbb6bef", + "id": "nmdc:faa27c2be6dc56e66f739dbffcbb6bef", + "file_size_bytes": 19134944 + }, + { + "name": "Gp0115675_Cath FunFam GFF file", + "description": "Cath FunFam GFF file for Gp0115675", + "url": "https://data.microbiomedata.org/data/nmdc:mga0vf2h47/annotation/nmdc_mga0vf2h47_cath_funfam.gff", + "md5_checksum": "b080e9d168c0c1330fda64814afe335b", + "id": "nmdc:b080e9d168c0c1330fda64814afe335b", + "file_size_bytes": 15037016 + }, + { + "name": "Gp0115675_KO_EC GFF file", + "description": "KO_EC GFF file for Gp0115675", + "url": "https://data.microbiomedata.org/data/nmdc:mga0vf2h47/annotation/nmdc_mga0vf2h47_ko_ec.gff", + "md5_checksum": "4ea799de0bc051409b7231801eea0129", + "id": "nmdc:4ea799de0bc051409b7231801eea0129", + "file_size_bytes": 11398449 + } + ] + }, + { + "_id": { + "$oid": "649b0052ec087f6bbab34722" + }, + "has_input": [ + "nmdc:dd5cad9348fc41cb18ac989185fed0b5", + "nmdc:80470769e7531b46c709d12c65487ffe", + "nmdc:d8cccd9c5cd237c238e5ba443c477db5" + ], + "too_short_contig_num": 76352, + "part_of": [ + "nmdc:mga0vf2h47" + ], + "binned_contig_num": 846, + "git_url": "https://github.com/microbiomedata/metaG/releases/tag/0.1", + "has_output": [ + "nmdc:d41d8cd98f00b204e9800998ecf8427e", + "nmdc:826503b4204b77c319c0bb353d69818e", + "nmdc:9a02c2954014bb8dcd62800609dd3ec5", + "nmdc:d15ed915946e095d045d73f4b4de019d", + "nmdc:8de4404b1a6601bae7d7d5fd51bd131a", + "nmdc:55f66520d821205e80dcd303cc2793bc" + ], + "was_informed_by": "gold:Gp0115675", + "input_contig_num": 80857, + "id": "nmdc:4cfac32b9cd7a8dd01d49117e1078a79", + "execution_resource": "NERSC-Cori", + "name": "MAGs Analysis Activity for nmdc:mga0vf2h47", + "mags_list": [ + { + "number_of_contig": 579, + "completeness": 73.87, + "bin_name": "bins.1", + "gene_count": 3274, + "bin_quality": "LQ", + "gtdbtk_species": "", + "gtdbtk_order": "", + "num_16s": 1, + "gtdbtk_family": "", + "gtdbtk_domain": "", + "contamination": 25.78, + "gtdbtk_class": "", + "gtdbtk_phylum": "", + "num_5s": 1, + "num_23s": 1, + "gtdbtk_genus": "", + "num_t_rna": 37 + }, + { + "number_of_contig": 199, + "completeness": 36.21, + "bin_name": "bins.2", + "gene_count": 1070, + "bin_quality": "LQ", + "gtdbtk_species": "", + "gtdbtk_order": "", + "num_16s": 0, + "gtdbtk_family": "", + "gtdbtk_domain": "", + "contamination": 0.0, + "gtdbtk_class": "", + "gtdbtk_phylum": "", + "num_5s": 0, + "num_23s": 0, + "gtdbtk_genus": "", + "num_t_rna": 16 + }, + { + "number_of_contig": 68, + "completeness": 4.17, + "bin_name": "bins.3", + "gene_count": 480, + "bin_quality": "LQ", + "gtdbtk_species": "", + "gtdbtk_order": "", + "num_16s": 0, + "gtdbtk_family": "", + "gtdbtk_domain": "", + "contamination": 4.17, + "gtdbtk_class": "", + "gtdbtk_phylum": "", + "num_5s": 0, + "num_23s": 0, + "gtdbtk_genus": "", + "num_t_rna": 5 + } + ], + "unbinned_contig_num": 3659, + "started_at_time": "2021-10-11T02:28:05Z", + "type": "nmdc:MAGsAnalysisActivity", + "ended_at_time": "2021-10-11T03:25:21+00:00", + "output_data_objects": [ + { + "name": "gold:Gp0452679_metabat2 bin checkm quality assessment result", + "description": "metabat2 bin checkm quality assessment result for gold:Gp0452679", + "data_object_type": "CheckM Statistics", + "url": "https://data.microbiomedata.org/data/nmdc:mga0fsjy84/MAGs/nmdc_mga0fsjy84_checkm_qa.out", + "md5_checksum": "d41d8cd98f00b204e9800998ecf8427e", + "id": "nmdc:d41d8cd98f00b204e9800998ecf8427e", + "file_size_bytes": 0 + }, + { + "name": "Gp0115675_tooShort (< 3kb) filtered contigs fasta file by metaBat2", + "description": "tooShort (< 3kb) filtered contigs fasta file by metaBat2 for Gp0115675", + "url": "https://data.microbiomedata.org/data/nmdc:mga0vf2h47/MAGs/nmdc_mga0vf2h47_bins.tooShort.fa", + "md5_checksum": "826503b4204b77c319c0bb353d69818e", + "id": "nmdc:826503b4204b77c319c0bb353d69818e", + "file_size_bytes": 31246547 + }, + { + "name": "Gp0115675_unbinned fasta file from metabat2", + "description": "unbinned fasta file from metabat2 for Gp0115675", + "url": "https://data.microbiomedata.org/data/nmdc:mga0vf2h47/MAGs/nmdc_mga0vf2h47_bins.unbinned.fa", + "md5_checksum": "9a02c2954014bb8dcd62800609dd3ec5", + "id": "nmdc:9a02c2954014bb8dcd62800609dd3ec5", + "file_size_bytes": 6258719 + }, + { + "name": "Gp0115675_metabat2 bin checkm quality assessment result", + "description": "metabat2 bin checkm quality assessment result for Gp0115675", + "data_object_type": "CheckM Statistics", + "url": "https://data.microbiomedata.org/data/nmdc:mga0vf2h47/MAGs/nmdc_mga0vf2h47_checkm_qa.out", + "md5_checksum": "d15ed915946e095d045d73f4b4de019d", + "id": "nmdc:d15ed915946e095d045d73f4b4de019d", + "file_size_bytes": 1092 + }, + { + "name": "Gp0115675_high-quality and medium-quality bins", + "description": "high-quality and medium-quality bins for Gp0115675", + "data_object_type": "Metagenome Bins", + "url": "https://data.microbiomedata.org/data/nmdc:mga0vf2h47/MAGs/nmdc_mga0vf2h47_hqmq_bin.zip", + "md5_checksum": "8de4404b1a6601bae7d7d5fd51bd131a", + "id": "nmdc:8de4404b1a6601bae7d7d5fd51bd131a", + "file_size_bytes": 182 + }, + { + "name": "Gp0115675_metabat2 bins", + "description": "metabat2 bins for Gp0115675", + "url": "https://data.microbiomedata.org/data/nmdc:mga0vf2h47/MAGs/nmdc_mga0vf2h47_metabat_bin.zip", + "md5_checksum": "55f66520d821205e80dcd303cc2793bc", + "id": "nmdc:55f66520d821205e80dcd303cc2793bc", + "file_size_bytes": 1259160 + } + ] + } + ] + }, + { + "_id": { + "$oid": "649b009773e8249959349b51" + }, + "id": "nmdc:omprc-11-2jt0jk84", + "name": "Sand microcosm microbial communities from a hyporheic zone in Columbia River, Washington, USA - GW-RW T2_30-Apr-14", + "description": "Sterilized sand packs were incubated back in the ground and collected at time point T2.", + "has_input": [ + "nmdc:bsm-11-qjtgh002" + ], + "has_output": [ + "jgi:55f23d820d8785306f964980" + ], + "part_of": [ + "nmdc:sty-11-aygzgv51" + ], + "add_date": "2015-05-28", + "mod_date": "2021-06-15", + "ncbi_project_name": "Sand microcosm microbial communities from a hyporheic zone in Columbia River, Washington, USA - GW-RW T2_30-Apr-14", + "omics_type": { + "has_raw_value": "Metagenome" + }, + "principal_investigator": { + "has_raw_value": "James Stegen" + }, + "processing_institution": "JGI", + "type": "nmdc:OmicsProcessing", + "gold_sequencing_project_identifiers": [ + "gold:Gp0115665" + ], + "downstream_workflow_activity_records": [ + { + "_id": { + "$oid": "649b009d6bdd4fd20273c88e" + }, + "has_input": [ + "nmdc:0d39aafcd16496457fbb3be0f785b67f" + ], + "part_of": [ + "nmdc:mga06n7k74" + ], + "git_url": "https://github.com/microbiomedata/metaG/releases/tag/0.1", + "has_output": [ + "nmdc:b0462e18cf9dafc9d2207a58bf085530", + "nmdc:f0e1b9004b0e9aafb06c444444a522c7" + ], + "was_informed_by": "gold:Gp0115665", + "input_read_count": 50719572, + "output_read_bases": 7175148255, + "id": "nmdc:ad03f306fbd2bb1f6302eedfc1cde9b2", + "execution_resource": "NERSC-Cori", + "input_read_bases": 7658655372, + "name": "Read QC Activity for nmdc:mga06n7k74", + "output_read_count": 47896142, + "started_at_time": "2021-10-11T02:28:54Z", + "type": "nmdc:ReadQCAnalysisActivity", + "ended_at_time": "2021-10-11T06:19:29+00:00", + "output_data_objects": [ + { + "name": "Gp0115665_Filtered Reads", + "description": "Filtered Reads for Gp0115665", + "data_object_type": "Filtered Sequencing Reads", + "url": "https://data.microbiomedata.org/data/nmdc:mga06n7k74/qa/nmdc_mga06n7k74_filtered.fastq.gz", + "md5_checksum": "b0462e18cf9dafc9d2207a58bf085530", + "id": "nmdc:b0462e18cf9dafc9d2207a58bf085530", + "file_size_bytes": 4096192298 + }, + { + "name": "Gp0115665_Filtered Stats", + "description": "Filtered Stats for Gp0115665", + "data_object_type": "QC Statistics", + "url": "https://data.microbiomedata.org/data/nmdc:mga06n7k74/qa/nmdc_mga06n7k74_filterStats.txt", + "md5_checksum": "f0e1b9004b0e9aafb06c444444a522c7", + "id": "nmdc:f0e1b9004b0e9aafb06c444444a522c7", + "file_size_bytes": 291 + } + ] + }, + { + "_id": { + "$oid": "649b009bff710ae353f8cf57" + }, + "has_input": [ + "nmdc:b0462e18cf9dafc9d2207a58bf085530" + ], + "git_url": "https://github.com/microbiomedata/metaG/releases/tag/0.1", + "has_output": [ + "nmdc:432fedddcbacb4e69c0350354ab44080", + "nmdc:50b9a4c83b2ec0d1dd683cb8814ed5ad", + "nmdc:e3d7339ba5c7677be13854f391462474", + "nmdc:7bf922ee2f9fc298c031e2ff7d5abe0d", + "nmdc:33a20a77c3dc5b4feb102d66dfbfbe11", + "nmdc:30bdf0aedf771221ca3f7f18ff4e0067", + "nmdc:8e21ac30de17de0d1051d7d223d0aa0f", + "nmdc:64459bec7843953a70f8ea2b09a7e9de", + "nmdc:9aa0ec113eb8dd22e7f574216d1760b2" + ], + "was_informed_by": "gold:Gp0115665", + "id": "nmdc:ad03f306fbd2bb1f6302eedfc1cde9b2", + "execution_resource": "NERSC-Cori", + "name": "ReadBased Analysis Activity for nmdc:mga06n7k74", + "started_at_time": "2021-10-11T02:28:54Z", + "type": "nmdc:ReadbasedAnalysis", + "ended_at_time": "2021-10-11T06:19:29+00:00", + "output_data_objects": [ + { + "name": "Gp0115665_Gottcha2 TSV report", + "description": "Gottcha2 TSV report for Gp0115665", + "url": "https://data.microbiomedata.org/data/nmdc:mga06n7k74/ReadbasedAnalysis/nmdc_mga06n7k74_gottcha2_report.tsv", + "md5_checksum": "432fedddcbacb4e69c0350354ab44080", + "id": "nmdc:432fedddcbacb4e69c0350354ab44080", + "file_size_bytes": 18015 + }, + { + "name": "Gp0115665_Gottcha2 full TSV report", + "description": "Gottcha2 full TSV report for Gp0115665", + "url": "https://data.microbiomedata.org/data/nmdc:mga06n7k74/ReadbasedAnalysis/nmdc_mga06n7k74_gottcha2_report_full.tsv", + "md5_checksum": "50b9a4c83b2ec0d1dd683cb8814ed5ad", + "id": "nmdc:50b9a4c83b2ec0d1dd683cb8814ed5ad", + "file_size_bytes": 1283220 + }, + { + "name": "Gp0115665_Gottcha2 Krona HTML report", + "description": "Gottcha2 Krona HTML report for Gp0115665", + "data_object_type": "GOTTCHA2 Krona Plot", + "url": "https://data.microbiomedata.org/data/nmdc:mga06n7k74/ReadbasedAnalysis/nmdc_mga06n7k74_gottcha2_krona.html", + "md5_checksum": "e3d7339ba5c7677be13854f391462474", + "id": "nmdc:e3d7339ba5c7677be13854f391462474", + "file_size_bytes": 281366 + }, + { + "name": "Gp0115665_Centrifuge classification TSV report", + "description": "Centrifuge classification TSV report for Gp0115665", + "data_object_type": "Centrifuge Taxonomic Classification", + "url": "https://data.microbiomedata.org/data/nmdc:mga06n7k74/ReadbasedAnalysis/nmdc_mga06n7k74_centrifuge_classification.tsv", + "md5_checksum": "7bf922ee2f9fc298c031e2ff7d5abe0d", + "id": "nmdc:7bf922ee2f9fc298c031e2ff7d5abe0d", + "file_size_bytes": 3481369185 + }, + { + "name": "Gp0115665_Centrifuge TSV report", + "description": "Centrifuge TSV report for Gp0115665", + "data_object_type": "Centrifuge Classification Report", + "url": "https://data.microbiomedata.org/data/nmdc:mga06n7k74/ReadbasedAnalysis/nmdc_mga06n7k74_centrifuge_report.tsv", + "md5_checksum": "33a20a77c3dc5b4feb102d66dfbfbe11", + "id": "nmdc:33a20a77c3dc5b4feb102d66dfbfbe11", + "file_size_bytes": 263480 + }, + { + "name": "Gp0115665_Centrifuge Krona HTML report", + "description": "Centrifuge Krona HTML report for Gp0115665", + "data_object_type": "Centrifuge Krona Plot", + "url": "https://data.microbiomedata.org/data/nmdc:mga06n7k74/ReadbasedAnalysis/nmdc_mga06n7k74_centrifuge_krona.html", + "md5_checksum": "30bdf0aedf771221ca3f7f18ff4e0067", + "id": "nmdc:30bdf0aedf771221ca3f7f18ff4e0067", + "file_size_bytes": 2347079 + }, + { + "name": "Gp0115665_Kraken classification TSV report", + "description": "Kraken classification TSV report for Gp0115665", + "data_object_type": "Kraken2 Taxonomic Classification", + "url": "https://data.microbiomedata.org/data/nmdc:mga06n7k74/ReadbasedAnalysis/nmdc_mga06n7k74_kraken2_classification.tsv", + "md5_checksum": "8e21ac30de17de0d1051d7d223d0aa0f", + "id": "nmdc:8e21ac30de17de0d1051d7d223d0aa0f", + "file_size_bytes": 2866138771 + }, + { + "name": "Gp0115665_Kraken2 TSV report", + "description": "Kraken2 TSV report for Gp0115665", + "data_object_type": "Kraken2 Classification Report", + "url": "https://data.microbiomedata.org/data/nmdc:mga06n7k74/ReadbasedAnalysis/nmdc_mga06n7k74_kraken2_report.tsv", + "md5_checksum": "64459bec7843953a70f8ea2b09a7e9de", + "id": "nmdc:64459bec7843953a70f8ea2b09a7e9de", + "file_size_bytes": 728030 + }, + { + "name": "Gp0115665_Kraken2 Krona HTML report", + "description": "Kraken2 Krona HTML report for Gp0115665", + "data_object_type": "Kraken2 Krona Plot", + "url": "https://data.microbiomedata.org/data/nmdc:mga06n7k74/ReadbasedAnalysis/nmdc_mga06n7k74_kraken2_krona.html", + "md5_checksum": "9aa0ec113eb8dd22e7f574216d1760b2", + "id": "nmdc:9aa0ec113eb8dd22e7f574216d1760b2", + "file_size_bytes": 4374689 + } + ] + }, + { + "_id": { + "$oid": "61e71a36833bcf838a702021" + }, + "has_input": [ + "nmdc:b0462e18cf9dafc9d2207a58bf085530" + ], + "part_of": [ + "nmdc:mga06n7k74" + ], + "git_url": "https://github.com/microbiomedata/metaG/releases/tag/0.1", + "has_output": [ + "nmdc:432fedddcbacb4e69c0350354ab44080", + "nmdc:50b9a4c83b2ec0d1dd683cb8814ed5ad", + "nmdc:e3d7339ba5c7677be13854f391462474", + "nmdc:7bf922ee2f9fc298c031e2ff7d5abe0d", + "nmdc:33a20a77c3dc5b4feb102d66dfbfbe11", + "nmdc:30bdf0aedf771221ca3f7f18ff4e0067", + "nmdc:8e21ac30de17de0d1051d7d223d0aa0f", + "nmdc:64459bec7843953a70f8ea2b09a7e9de", + "nmdc:9aa0ec113eb8dd22e7f574216d1760b2" + ], + "was_informed_by": "gold:Gp0115665", + "id": "nmdc:ad03f306fbd2bb1f6302eedfc1cde9b2", + "execution_resource": "NERSC-Cori", + "name": "ReadBased Analysis Activity for nmdc:mga06n7k74", + "started_at_time": "2021-10-11T02:28:54Z", + "type": "nmdc:ReadbasedAnalysis", + "ended_at_time": "2021-10-11T06:19:29+00:00", + "output_data_objects": [ + { + "name": "Gp0115665_Gottcha2 TSV report", + "description": "Gottcha2 TSV report for Gp0115665", + "url": "https://data.microbiomedata.org/data/nmdc:mga06n7k74/ReadbasedAnalysis/nmdc_mga06n7k74_gottcha2_report.tsv", + "md5_checksum": "432fedddcbacb4e69c0350354ab44080", + "id": "nmdc:432fedddcbacb4e69c0350354ab44080", + "file_size_bytes": 18015 + }, + { + "name": "Gp0115665_Gottcha2 full TSV report", + "description": "Gottcha2 full TSV report for Gp0115665", + "url": "https://data.microbiomedata.org/data/nmdc:mga06n7k74/ReadbasedAnalysis/nmdc_mga06n7k74_gottcha2_report_full.tsv", + "md5_checksum": "50b9a4c83b2ec0d1dd683cb8814ed5ad", + "id": "nmdc:50b9a4c83b2ec0d1dd683cb8814ed5ad", + "file_size_bytes": 1283220 + }, + { + "name": "Gp0115665_Gottcha2 Krona HTML report", + "description": "Gottcha2 Krona HTML report for Gp0115665", + "data_object_type": "GOTTCHA2 Krona Plot", + "url": "https://data.microbiomedata.org/data/nmdc:mga06n7k74/ReadbasedAnalysis/nmdc_mga06n7k74_gottcha2_krona.html", + "md5_checksum": "e3d7339ba5c7677be13854f391462474", + "id": "nmdc:e3d7339ba5c7677be13854f391462474", + "file_size_bytes": 281366 + }, + { + "name": "Gp0115665_Centrifuge classification TSV report", + "description": "Centrifuge classification TSV report for Gp0115665", + "data_object_type": "Centrifuge Taxonomic Classification", + "url": "https://data.microbiomedata.org/data/nmdc:mga06n7k74/ReadbasedAnalysis/nmdc_mga06n7k74_centrifuge_classification.tsv", + "md5_checksum": "7bf922ee2f9fc298c031e2ff7d5abe0d", + "id": "nmdc:7bf922ee2f9fc298c031e2ff7d5abe0d", + "file_size_bytes": 3481369185 + }, + { + "name": "Gp0115665_Centrifuge TSV report", + "description": "Centrifuge TSV report for Gp0115665", + "data_object_type": "Centrifuge Classification Report", + "url": "https://data.microbiomedata.org/data/nmdc:mga06n7k74/ReadbasedAnalysis/nmdc_mga06n7k74_centrifuge_report.tsv", + "md5_checksum": "33a20a77c3dc5b4feb102d66dfbfbe11", + "id": "nmdc:33a20a77c3dc5b4feb102d66dfbfbe11", + "file_size_bytes": 263480 + }, + { + "name": "Gp0115665_Centrifuge Krona HTML report", + "description": "Centrifuge Krona HTML report for Gp0115665", + "data_object_type": "Centrifuge Krona Plot", + "url": "https://data.microbiomedata.org/data/nmdc:mga06n7k74/ReadbasedAnalysis/nmdc_mga06n7k74_centrifuge_krona.html", + "md5_checksum": "30bdf0aedf771221ca3f7f18ff4e0067", + "id": "nmdc:30bdf0aedf771221ca3f7f18ff4e0067", + "file_size_bytes": 2347079 + }, + { + "name": "Gp0115665_Kraken classification TSV report", + "description": "Kraken classification TSV report for Gp0115665", + "data_object_type": "Kraken2 Taxonomic Classification", + "url": "https://data.microbiomedata.org/data/nmdc:mga06n7k74/ReadbasedAnalysis/nmdc_mga06n7k74_kraken2_classification.tsv", + "md5_checksum": "8e21ac30de17de0d1051d7d223d0aa0f", + "id": "nmdc:8e21ac30de17de0d1051d7d223d0aa0f", + "file_size_bytes": 2866138771 + }, + { + "name": "Gp0115665_Kraken2 TSV report", + "description": "Kraken2 TSV report for Gp0115665", + "data_object_type": "Kraken2 Classification Report", + "url": "https://data.microbiomedata.org/data/nmdc:mga06n7k74/ReadbasedAnalysis/nmdc_mga06n7k74_kraken2_report.tsv", + "md5_checksum": "64459bec7843953a70f8ea2b09a7e9de", + "id": "nmdc:64459bec7843953a70f8ea2b09a7e9de", + "file_size_bytes": 728030 + }, + { + "name": "Gp0115665_Kraken2 Krona HTML report", + "description": "Kraken2 Krona HTML report for Gp0115665", + "data_object_type": "Kraken2 Krona Plot", + "url": "https://data.microbiomedata.org/data/nmdc:mga06n7k74/ReadbasedAnalysis/nmdc_mga06n7k74_kraken2_krona.html", + "md5_checksum": "9aa0ec113eb8dd22e7f574216d1760b2", + "id": "nmdc:9aa0ec113eb8dd22e7f574216d1760b2", + "file_size_bytes": 4374689 + } + ] + }, + { + "_id": { + "$oid": "649b005f2ca5ee4adb139fc2" + }, + "has_input": [ + "nmdc:b0462e18cf9dafc9d2207a58bf085530" + ], + "part_of": [ + "nmdc:mga06n7k74" + ], + "ctg_logsum": 427633, + "scaf_logsum": 429769, + "gap_pct": 0.00206, + "git_url": "https://github.com/microbiomedata/metaG/releases/tag/0.1", + "has_output": [ + "nmdc:9704e757dc537a7f06c6f83fc633cf64", + "nmdc:2674db4e7e6171864fa47f0b3b5a9603", + "nmdc:ab6c496a5e3ab895fee3812fd992e1e7", + "nmdc:5a1240fa0a6bf92c95e852c0352e5839", + "nmdc:e28c85b50e0b654626e655755165aff5" + ], + "asm_score": 5.768, + "was_informed_by": "gold:Gp0115665", + "ctg_powsum": 48025, + "scaf_max": 44931, + "id": "nmdc:ad03f306fbd2bb1f6302eedfc1cde9b2", + "scaf_powsum": 48321, + "execution_resource": "NERSC-Cori", + "contigs": 352055, + "name": "Assembly Activity for nmdc:mga06n7k74", + "ctg_max": 44931, + "gc_std": 0.13027, + "contig_bp": 172051088, + "gc_avg": 0.51918, + "started_at_time": "2021-10-11T02:28:54Z", + "scaf_bp": 172054628, + "type": "nmdc:MetagenomeAssembly", + "scaffolds": 351728, + "ended_at_time": "2021-10-11T06:19:29+00:00", + "ctg_l50": 468, + "ctg_l90": 289, + "ctg_n50": 95561, + "ctg_n90": 294969, + "scaf_l50": 468, + "scaf_l90": 289, + "scaf_n50": 95446, + "scaf_n90": 294658, + "output_data_objects": [ + { + "name": "Gp0115665_Assembled contigs fasta", + "description": "Assembled contigs fasta for Gp0115665", + "data_object_type": "Assembly Contigs", + "url": "https://data.microbiomedata.org/data/nmdc:mga06n7k74/assembly/nmdc_mga06n7k74_contigs.fna", + "md5_checksum": "9704e757dc537a7f06c6f83fc633cf64", + "id": "nmdc:9704e757dc537a7f06c6f83fc633cf64", + "file_size_bytes": 185880663 + }, + { + "name": "Gp0115665_Assembled scaffold fasta", + "description": "Assembled scaffold fasta for Gp0115665", + "data_object_type": "Assembly Scaffolds", + "url": "https://data.microbiomedata.org/data/nmdc:mga06n7k74/assembly/nmdc_mga06n7k74_scaffolds.fna", + "md5_checksum": "2674db4e7e6171864fa47f0b3b5a9603", + "id": "nmdc:2674db4e7e6171864fa47f0b3b5a9603", + "file_size_bytes": 184819604 + }, + { + "name": "Gp0115665_Metagenome Contig Coverage Stats", + "description": "Metagenome Contig Coverage Stats for Gp0115665", + "url": "https://data.microbiomedata.org/data/nmdc:mga06n7k74/assembly/nmdc_mga06n7k74_covstats.txt", + "md5_checksum": "ab6c496a5e3ab895fee3812fd992e1e7", + "id": "nmdc:ab6c496a5e3ab895fee3812fd992e1e7", + "file_size_bytes": 27961807 + }, + { + "name": "Gp0115665_Assembled AGP file", + "description": "Assembled AGP file for Gp0115665", + "data_object_type": "Assembly AGP", + "url": "https://data.microbiomedata.org/data/nmdc:mga06n7k74/assembly/nmdc_mga06n7k74_assembly.agp", + "md5_checksum": "5a1240fa0a6bf92c95e852c0352e5839", + "id": "nmdc:5a1240fa0a6bf92c95e852c0352e5839", + "file_size_bytes": 26248242 + }, + { + "name": "Gp0115665_Metagenome Alignment BAM file", + "description": "Metagenome Alignment BAM file for Gp0115665", + "data_object_type": "Assembly Coverage BAM", + "url": "https://data.microbiomedata.org/data/nmdc:mga06n7k74/assembly/nmdc_mga06n7k74_pairedMapped_sorted.bam", + "md5_checksum": "e28c85b50e0b654626e655755165aff5", + "id": "nmdc:e28c85b50e0b654626e655755165aff5", + "file_size_bytes": 4460978045 + } + ] + }, + { + "_id": { + "$oid": "649b005bbf2caae0415ef9d2" + }, + "has_input": [ + "nmdc:9704e757dc537a7f06c6f83fc633cf64" + ], + "part_of": [ + "nmdc:mga06n7k74" + ], + "git_url": "https://github.com/microbiomedata/metaG/releases/tag/0.1", + "has_output": [ + "nmdc:2d23b05bda1c60f2ef6d54c8fe5fb5e7", + "nmdc:6c55ce2e0d6e74d217d850b273c4f0c4", + "nmdc:b3add25cdb76a537e70617ac6a1d1fc5", + "nmdc:b782707ae2cf5676596ca99800deea26", + "nmdc:6a8565bf52f70efa03c755a9f0b82d7d", + "nmdc:f5d79b4c69825e0b66153e7582cb489b", + "nmdc:f66a0eaa9432ef5a2dd390214f47eed5", + "nmdc:26cc0a40aab6bfc64d24afa760b43102", + "nmdc:83785a6e8f7658dc2354b9bad1b86d01", + "nmdc:0f03207aa38d1aec8afdbf2bec1e4990", + "nmdc:4876eed2bee3b3b7b2ac827857410be6", + "nmdc:bb5b62735a896d189c9a274c6e091bab" + ], + "was_informed_by": "gold:Gp0115665", + "id": "nmdc:ad03f306fbd2bb1f6302eedfc1cde9b2", + "execution_resource": "NERSC-Cori", + "name": "Annotation Activity for nmdc:mga06n7k74", + "started_at_time": "2021-10-11T02:28:54Z", + "type": "nmdc:MetagenomeAnnotationActivity", + "ended_at_time": "2021-10-11T06:19:29+00:00", + "output_data_objects": [ + { + "name": "Gp0115665_Protein FAA", + "description": "Protein FAA for Gp0115665", + "data_object_type": "Annotation Amino Acid FASTA", + "url": "https://data.microbiomedata.org/data/nmdc:mga06n7k74/annotation/nmdc_mga06n7k74_proteins.faa", + "md5_checksum": "2d23b05bda1c60f2ef6d54c8fe5fb5e7", + "id": "nmdc:2d23b05bda1c60f2ef6d54c8fe5fb5e7", + "file_size_bytes": 100719814 + }, + { + "name": "Gp0115665_Structural annotation GFF file", + "description": "Structural annotation GFF file for Gp0115665", + "data_object_type": "Structural Annotation GFF", + "url": "https://data.microbiomedata.org/data/nmdc:mga06n7k74/annotation/nmdc_mga06n7k74_structural_annotation.gff", + "md5_checksum": "6c55ce2e0d6e74d217d850b273c4f0c4", + "id": "nmdc:6c55ce2e0d6e74d217d850b273c4f0c4", + "file_size_bytes": 2534 + }, + { + "name": "Gp0115665_Functional annotation GFF file", + "description": "Functional annotation GFF file for Gp0115665", + "data_object_type": "Functional Annotation GFF", + "url": "https://data.microbiomedata.org/data/nmdc:mga06n7k74/annotation/nmdc_mga06n7k74_functional_annotation.gff", + "md5_checksum": "b3add25cdb76a537e70617ac6a1d1fc5", + "id": "nmdc:b3add25cdb76a537e70617ac6a1d1fc5", + "file_size_bytes": 110405026 + }, + { + "name": "Gp0115665_KO TSV file", + "description": "KO TSV file for Gp0115665", + "data_object_type": "Annotation KEGG Orthology", + "url": "https://data.microbiomedata.org/data/nmdc:mga06n7k74/annotation/nmdc_mga06n7k74_ko.tsv", + "md5_checksum": "b782707ae2cf5676596ca99800deea26", + "id": "nmdc:b782707ae2cf5676596ca99800deea26", + "file_size_bytes": 12963636 + }, + { + "name": "Gp0115665_EC TSV file", + "description": "EC TSV file for Gp0115665", + "data_object_type": "Annotation Enzyme Commission", + "url": "https://data.microbiomedata.org/data/nmdc:mga06n7k74/annotation/nmdc_mga06n7k74_ec.tsv", + "md5_checksum": "6a8565bf52f70efa03c755a9f0b82d7d", + "id": "nmdc:6a8565bf52f70efa03c755a9f0b82d7d", + "file_size_bytes": 8371381 + }, + { + "name": "Gp0115665_COG GFF file", + "description": "COG GFF file for Gp0115665", + "url": "https://data.microbiomedata.org/data/nmdc:mga06n7k74/annotation/nmdc_mga06n7k74_cog.gff", + "md5_checksum": "f5d79b4c69825e0b66153e7582cb489b", + "id": "nmdc:f5d79b4c69825e0b66153e7582cb489b", + "file_size_bytes": 56948501 + }, + { + "name": "Gp0115665_PFAM GFF file", + "description": "PFAM GFF file for Gp0115665", + "url": "https://data.microbiomedata.org/data/nmdc:mga06n7k74/annotation/nmdc_mga06n7k74_pfam.gff", + "md5_checksum": "f66a0eaa9432ef5a2dd390214f47eed5", + "id": "nmdc:f66a0eaa9432ef5a2dd390214f47eed5", + "file_size_bytes": 45618277 + }, + { + "name": "Gp0115665_TigrFam GFF file", + "description": "TigrFam GFF file for Gp0115665", + "url": "https://data.microbiomedata.org/data/nmdc:mga06n7k74/annotation/nmdc_mga06n7k74_tigrfam.gff", + "md5_checksum": "26cc0a40aab6bfc64d24afa760b43102", + "id": "nmdc:26cc0a40aab6bfc64d24afa760b43102", + "file_size_bytes": 5245489 + }, + { + "name": "Gp0115665_SMART GFF file", + "description": "SMART GFF file for Gp0115665", + "url": "https://data.microbiomedata.org/data/nmdc:mga06n7k74/annotation/nmdc_mga06n7k74_smart.gff", + "md5_checksum": "83785a6e8f7658dc2354b9bad1b86d01", + "id": "nmdc:83785a6e8f7658dc2354b9bad1b86d01", + "file_size_bytes": 15993417 + }, + { + "name": "Gp0115665_SuperFam GFF file", + "description": "SuperFam GFF file for Gp0115665", + "url": "https://data.microbiomedata.org/data/nmdc:mga06n7k74/annotation/nmdc_mga06n7k74_supfam.gff", + "md5_checksum": "0f03207aa38d1aec8afdbf2bec1e4990", + "id": "nmdc:0f03207aa38d1aec8afdbf2bec1e4990", + "file_size_bytes": 76926960 + }, + { + "name": "Gp0115665_Cath FunFam GFF file", + "description": "Cath FunFam GFF file for Gp0115665", + "url": "https://data.microbiomedata.org/data/nmdc:mga06n7k74/annotation/nmdc_mga06n7k74_cath_funfam.gff", + "md5_checksum": "4876eed2bee3b3b7b2ac827857410be6", + "id": "nmdc:4876eed2bee3b3b7b2ac827857410be6", + "file_size_bytes": 61571084 + }, + { + "name": "Gp0115665_KO_EC GFF file", + "description": "KO_EC GFF file for Gp0115665", + "url": "https://data.microbiomedata.org/data/nmdc:mga06n7k74/annotation/nmdc_mga06n7k74_ko_ec.gff", + "md5_checksum": "bb5b62735a896d189c9a274c6e091bab", + "id": "nmdc:bb5b62735a896d189c9a274c6e091bab", + "file_size_bytes": 41244685 + } + ] + }, + { + "_id": { + "$oid": "649b0052ec087f6bbab34738" + }, + "has_input": [ + "nmdc:9704e757dc537a7f06c6f83fc633cf64", + "nmdc:e28c85b50e0b654626e655755165aff5", + "nmdc:b3add25cdb76a537e70617ac6a1d1fc5" + ], + "too_short_contig_num": 331533, + "part_of": [ + "nmdc:mga06n7k74" + ], + "binned_contig_num": 1636, + "git_url": "https://github.com/microbiomedata/metaG/releases/tag/0.1", + "has_output": [ + "nmdc:d41d8cd98f00b204e9800998ecf8427e", + "nmdc:79794b0497c1a4a292778ddb94504f7a", + "nmdc:e26dc245e491a521a94fbb9ab1b4293d", + "nmdc:45cb473694eb3cfa8abc7768e87ef303", + "nmdc:e344d87dbac42a645fd3c7d5b9d0a1a5", + "nmdc:1098bd9921c6ab8f52aca786e3b7bf1d" + ], + "was_informed_by": "gold:Gp0115665", + "input_contig_num": 352053, + "id": "nmdc:ad03f306fbd2bb1f6302eedfc1cde9b2", + "execution_resource": "NERSC-Cori", + "name": "MAGs Analysis Activity for nmdc:mga06n7k74", + "mags_list": [ + { + "number_of_contig": 211, + "completeness": 44.36, + "bin_name": "bins.1", + "gene_count": 1029, + "bin_quality": "LQ", + "gtdbtk_species": "", + "gtdbtk_order": "", + "num_16s": 0, + "gtdbtk_family": "", + "gtdbtk_domain": "", + "contamination": 0.49, + "gtdbtk_class": "", + "gtdbtk_phylum": "", + "num_5s": 0, + "num_23s": 0, + "gtdbtk_genus": "", + "num_t_rna": 21 + }, + { + "number_of_contig": 564, + "completeness": 79.11, + "bin_name": "bins.2", + "gene_count": 4164, + "bin_quality": "MQ", + "gtdbtk_species": "", + "gtdbtk_order": "Burkholderiales", + "num_16s": 0, + "gtdbtk_family": "Burkholderiaceae", + "gtdbtk_domain": "Bacteria", + "contamination": 3.11, + "gtdbtk_class": "Gammaproteobacteria", + "gtdbtk_phylum": "Proteobacteria", + "num_5s": 0, + "num_23s": 0, + "gtdbtk_genus": "Aquabacterium", + "num_t_rna": 33 + }, + { + "number_of_contig": 646, + "completeness": 72.48, + "bin_name": "bins.3", + "gene_count": 4108, + "bin_quality": "MQ", + "gtdbtk_species": "", + "gtdbtk_order": "Burkholderiales", + "num_16s": 1, + "gtdbtk_family": "Burkholderiaceae", + "gtdbtk_domain": "Bacteria", + "contamination": 2.62, + "gtdbtk_class": "Gammaproteobacteria", + "gtdbtk_phylum": "Proteobacteria", + "num_5s": 0, + "num_23s": 0, + "gtdbtk_genus": "Rhizobacter", + "num_t_rna": 28 + }, + { + "number_of_contig": 67, + "completeness": 1.97, + "bin_name": "bins.4", + "gene_count": 257, + "bin_quality": "LQ", + "gtdbtk_species": "", + "gtdbtk_order": "", + "num_16s": 0, + "gtdbtk_family": "", + "gtdbtk_domain": "", + "contamination": 0.0, + "gtdbtk_class": "", + "gtdbtk_phylum": "", + "num_5s": 0, + "num_23s": 0, + "gtdbtk_genus": "", + "num_t_rna": 3 + }, + { + "number_of_contig": 64, + "completeness": 7.47, + "bin_name": "bins.5", + "gene_count": 259, + "bin_quality": "LQ", + "gtdbtk_species": "", + "gtdbtk_order": "", + "num_16s": 0, + "gtdbtk_family": "", + "gtdbtk_domain": "", + "contamination": 0.0, + "gtdbtk_class": "", + "gtdbtk_phylum": "", + "num_5s": 0, + "num_23s": 0, + "gtdbtk_genus": "", + "num_t_rna": 4 + }, + { + "number_of_contig": 84, + "completeness": 3.88, + "bin_name": "bins.6", + "gene_count": 313, + "bin_quality": "LQ", + "gtdbtk_species": "", + "gtdbtk_order": "", + "num_16s": 0, + "gtdbtk_family": "", + "gtdbtk_domain": "", + "contamination": 0.0, + "gtdbtk_class": "", + "gtdbtk_phylum": "", + "num_5s": 0, + "num_23s": 0, + "gtdbtk_genus": "", + "num_t_rna": 0 + } + ], + "unbinned_contig_num": 18884, + "started_at_time": "2021-10-11T02:28:54Z", + "type": "nmdc:MAGsAnalysisActivity", + "ended_at_time": "2021-10-11T06:19:29+00:00", + "output_data_objects": [ + { + "name": "gold:Gp0452679_metabat2 bin checkm quality assessment result", + "description": "metabat2 bin checkm quality assessment result for gold:Gp0452679", + "data_object_type": "CheckM Statistics", + "url": "https://data.microbiomedata.org/data/nmdc:mga0fsjy84/MAGs/nmdc_mga0fsjy84_checkm_qa.out", + "md5_checksum": "d41d8cd98f00b204e9800998ecf8427e", + "id": "nmdc:d41d8cd98f00b204e9800998ecf8427e", + "file_size_bytes": 0 + }, + { + "name": "Gp0115665_tooShort (< 3kb) filtered contigs fasta file by metaBat2", + "description": "tooShort (< 3kb) filtered contigs fasta file by metaBat2 for Gp0115665", + "url": "https://data.microbiomedata.org/data/nmdc:mga06n7k74/MAGs/nmdc_mga06n7k74_bins.tooShort.fa", + "md5_checksum": "79794b0497c1a4a292778ddb94504f7a", + "id": "nmdc:79794b0497c1a4a292778ddb94504f7a", + "file_size_bytes": 146322768 + }, + { + "name": "Gp0115665_unbinned fasta file from metabat2", + "description": "unbinned fasta file from metabat2 for Gp0115665", + "url": "https://data.microbiomedata.org/data/nmdc:mga06n7k74/MAGs/nmdc_mga06n7k74_bins.unbinned.fa", + "md5_checksum": "e26dc245e491a521a94fbb9ab1b4293d", + "id": "nmdc:e26dc245e491a521a94fbb9ab1b4293d", + "file_size_bytes": 30116585 + }, + { + "name": "Gp0115665_metabat2 bin checkm quality assessment result", + "description": "metabat2 bin checkm quality assessment result for Gp0115665", + "data_object_type": "CheckM Statistics", + "url": "https://data.microbiomedata.org/data/nmdc:mga06n7k74/MAGs/nmdc_mga06n7k74_checkm_qa.out", + "md5_checksum": "45cb473694eb3cfa8abc7768e87ef303", + "id": "nmdc:45cb473694eb3cfa8abc7768e87ef303", + "file_size_bytes": 1700 + }, + { + "name": "Gp0115665_high-quality and medium-quality bins", + "description": "high-quality and medium-quality bins for Gp0115665", + "data_object_type": "Metagenome Bins", + "url": "https://data.microbiomedata.org/data/nmdc:mga06n7k74/MAGs/nmdc_mga06n7k74_hqmq_bin.zip", + "md5_checksum": "e344d87dbac42a645fd3c7d5b9d0a1a5", + "id": "nmdc:e344d87dbac42a645fd3c7d5b9d0a1a5", + "file_size_bytes": 2294379 + }, + { + "name": "Gp0115665_metabat2 bins", + "description": "metabat2 bins for Gp0115665", + "url": "https://data.microbiomedata.org/data/nmdc:mga06n7k74/MAGs/nmdc_mga06n7k74_metabat_bin.zip", + "md5_checksum": "1098bd9921c6ab8f52aca786e3b7bf1d", + "id": "nmdc:1098bd9921c6ab8f52aca786e3b7bf1d", + "file_size_bytes": 534425 + } + ] + } + ] + }, + { + "_id": { + "$oid": "649b009773e8249959349b52" + }, + "id": "nmdc:omprc-11-hqmmwn16", + "name": "Sand microcosm microbial communities from a hyporheic zone in Columbia River, Washington, USA - GW-RW T4_1-July-14", + "description": "Sterilized sand packs were incubated back in the ground and collected at time point T4.", + "has_input": [ + "nmdc:bsm-11-47nxfg85" + ], + "has_output": [ + "jgi:55d817fe0d8785342fcf8276" + ], + "part_of": [ + "nmdc:sty-11-aygzgv51" + ], + "add_date": "2015-05-28", + "mod_date": "2021-06-15", + "ncbi_project_name": "Sand microcosm microbial communities from a hyporheic zone in Columbia River, Washington, USA - GW-RW T4_1-July-14", + "omics_type": { + "has_raw_value": "Metagenome" + }, + "principal_investigator": { + "has_raw_value": "James Stegen" + }, + "processing_institution": "JGI", + "type": "nmdc:OmicsProcessing", + "gold_sequencing_project_identifiers": [ + "gold:Gp0115669" + ], + "downstream_workflow_activity_records": [ + { + "_id": { + "$oid": "649b009d6bdd4fd20273c88d" + }, + "has_input": [ + "nmdc:f18b96b7d225d2f64f7b29015150113f" + ], + "part_of": [ + "nmdc:mga0k85x37" + ], + "git_url": "https://github.com/microbiomedata/metaG/releases/tag/0.1", + "has_output": [ + "nmdc:6eef104db92b99c9741b26c667d75cd9", + "nmdc:58fde3e96dbb28af9133bede850a2653" + ], + "was_informed_by": "gold:Gp0115669", + "input_read_count": 20957834, + "output_read_bases": 3065138996, + "id": "nmdc:ed7745adccd65c2dd20dfa24c2922db2", + "execution_resource": "NERSC-Cori", + "input_read_bases": 3164632934, + "name": "Read QC Activity for nmdc:mga0k85x37", + "output_read_count": 20454422, + "started_at_time": "2021-10-11T02:28:43Z", + "type": "nmdc:ReadQCAnalysisActivity", + "ended_at_time": "2021-10-11T04:20:07+00:00", + "output_data_objects": [ + { + "name": "Gp0115669_Filtered Reads", + "description": "Filtered Reads for Gp0115669", + "data_object_type": "Filtered Sequencing Reads", + "url": "https://data.microbiomedata.org/data/nmdc:mga0k85x37/qa/nmdc_mga0k85x37_filtered.fastq.gz", + "md5_checksum": "6eef104db92b99c9741b26c667d75cd9", + "id": "nmdc:6eef104db92b99c9741b26c667d75cd9", + "file_size_bytes": 1806935637 + }, + { + "name": "Gp0115669_Filtered Stats", + "description": "Filtered Stats for Gp0115669", + "data_object_type": "QC Statistics", + "url": "https://data.microbiomedata.org/data/nmdc:mga0k85x37/qa/nmdc_mga0k85x37_filterStats.txt", + "md5_checksum": "58fde3e96dbb28af9133bede850a2653", + "id": "nmdc:58fde3e96dbb28af9133bede850a2653", + "file_size_bytes": 286 + } + ] + }, + { + "_id": { + "$oid": "649b009bff710ae353f8cf51" + }, + "has_input": [ + "nmdc:6eef104db92b99c9741b26c667d75cd9" + ], + "git_url": "https://github.com/microbiomedata/metaG/releases/tag/0.1", + "has_output": [ + "nmdc:05933784d02331b60b2531e2025cd3b7", + "nmdc:50fc279637cb7048aaaeec9b223d0286", + "nmdc:c3add9c5d34e3ca719096ba3ba9b1c08", + "nmdc:2777a04ec7e23aff356bb4f2733e55b7", + "nmdc:de45d70cc01749e9b5691dc24674545d", + "nmdc:534f97f3792b74385c4da305196a1b1d", + "nmdc:fc3e489df923ec344ac0cce7316f49d6", + "nmdc:07b6457a094fab96563168ed287dc59f", + "nmdc:164a1bc50e8d6509446ae2877be8231c" + ], + "was_informed_by": "gold:Gp0115669", + "id": "nmdc:ed7745adccd65c2dd20dfa24c2922db2", + "execution_resource": "NERSC-Cori", + "name": "ReadBased Analysis Activity for nmdc:mga0k85x37", + "started_at_time": "2021-10-11T02:28:43Z", + "type": "nmdc:ReadbasedAnalysis", + "ended_at_time": "2021-10-11T04:20:07+00:00", + "output_data_objects": [ + { + "name": "Gp0115669_Gottcha2 TSV report", + "description": "Gottcha2 TSV report for Gp0115669", + "url": "https://data.microbiomedata.org/data/nmdc:mga0k85x37/ReadbasedAnalysis/nmdc_mga0k85x37_gottcha2_report.tsv", + "md5_checksum": "05933784d02331b60b2531e2025cd3b7", + "id": "nmdc:05933784d02331b60b2531e2025cd3b7", + "file_size_bytes": 11362 + }, + { + "name": "Gp0115669_Gottcha2 full TSV report", + "description": "Gottcha2 full TSV report for Gp0115669", + "url": "https://data.microbiomedata.org/data/nmdc:mga0k85x37/ReadbasedAnalysis/nmdc_mga0k85x37_gottcha2_report_full.tsv", + "md5_checksum": "50fc279637cb7048aaaeec9b223d0286", + "id": "nmdc:50fc279637cb7048aaaeec9b223d0286", + "file_size_bytes": 909325 + }, + { + "name": "Gp0115669_Gottcha2 Krona HTML report", + "description": "Gottcha2 Krona HTML report for Gp0115669", + "data_object_type": "GOTTCHA2 Krona Plot", + "url": "https://data.microbiomedata.org/data/nmdc:mga0k85x37/ReadbasedAnalysis/nmdc_mga0k85x37_gottcha2_krona.html", + "md5_checksum": "c3add9c5d34e3ca719096ba3ba9b1c08", + "id": "nmdc:c3add9c5d34e3ca719096ba3ba9b1c08", + "file_size_bytes": 261412 + }, + { + "name": "Gp0115669_Centrifuge classification TSV report", + "description": "Centrifuge classification TSV report for Gp0115669", + "data_object_type": "Centrifuge Taxonomic Classification", + "url": "https://data.microbiomedata.org/data/nmdc:mga0k85x37/ReadbasedAnalysis/nmdc_mga0k85x37_centrifuge_classification.tsv", + "md5_checksum": "2777a04ec7e23aff356bb4f2733e55b7", + "id": "nmdc:2777a04ec7e23aff356bb4f2733e55b7", + "file_size_bytes": 1481087410 + }, + { + "name": "Gp0115669_Centrifuge TSV report", + "description": "Centrifuge TSV report for Gp0115669", + "data_object_type": "Centrifuge Classification Report", + "url": "https://data.microbiomedata.org/data/nmdc:mga0k85x37/ReadbasedAnalysis/nmdc_mga0k85x37_centrifuge_report.tsv", + "md5_checksum": "de45d70cc01749e9b5691dc24674545d", + "id": "nmdc:de45d70cc01749e9b5691dc24674545d", + "file_size_bytes": 256139 + }, + { + "name": "Gp0115669_Centrifuge Krona HTML report", + "description": "Centrifuge Krona HTML report for Gp0115669", + "data_object_type": "Centrifuge Krona Plot", + "url": "https://data.microbiomedata.org/data/nmdc:mga0k85x37/ReadbasedAnalysis/nmdc_mga0k85x37_centrifuge_krona.html", + "md5_checksum": "534f97f3792b74385c4da305196a1b1d", + "id": "nmdc:534f97f3792b74385c4da305196a1b1d", + "file_size_bytes": 2323658 + }, + { + "name": "Gp0115669_Kraken classification TSV report", + "description": "Kraken classification TSV report for Gp0115669", + "data_object_type": "Kraken2 Taxonomic Classification", + "url": "https://data.microbiomedata.org/data/nmdc:mga0k85x37/ReadbasedAnalysis/nmdc_mga0k85x37_kraken2_classification.tsv", + "md5_checksum": "fc3e489df923ec344ac0cce7316f49d6", + "id": "nmdc:fc3e489df923ec344ac0cce7316f49d6", + "file_size_bytes": 1220980345 + }, + { + "name": "Gp0115669_Kraken2 TSV report", + "description": "Kraken2 TSV report for Gp0115669", + "data_object_type": "Kraken2 Classification Report", + "url": "https://data.microbiomedata.org/data/nmdc:mga0k85x37/ReadbasedAnalysis/nmdc_mga0k85x37_kraken2_report.tsv", + "md5_checksum": "07b6457a094fab96563168ed287dc59f", + "id": "nmdc:07b6457a094fab96563168ed287dc59f", + "file_size_bytes": 651795 + }, + { + "name": "Gp0115669_Kraken2 Krona HTML report", + "description": "Kraken2 Krona HTML report for Gp0115669", + "data_object_type": "Kraken2 Krona Plot", + "url": "https://data.microbiomedata.org/data/nmdc:mga0k85x37/ReadbasedAnalysis/nmdc_mga0k85x37_kraken2_krona.html", + "md5_checksum": "164a1bc50e8d6509446ae2877be8231c", + "id": "nmdc:164a1bc50e8d6509446ae2877be8231c", + "file_size_bytes": 3963303 + } + ] + }, + { + "_id": { + "$oid": "61e71a34833bcf838a701fb0" + }, + "has_input": [ + "nmdc:6eef104db92b99c9741b26c667d75cd9" + ], + "part_of": [ + "nmdc:mga0k85x37" + ], + "git_url": "https://github.com/microbiomedata/metaG/releases/tag/0.1", + "has_output": [ + "nmdc:05933784d02331b60b2531e2025cd3b7", + "nmdc:50fc279637cb7048aaaeec9b223d0286", + "nmdc:c3add9c5d34e3ca719096ba3ba9b1c08", + "nmdc:2777a04ec7e23aff356bb4f2733e55b7", + "nmdc:de45d70cc01749e9b5691dc24674545d", + "nmdc:534f97f3792b74385c4da305196a1b1d", + "nmdc:fc3e489df923ec344ac0cce7316f49d6", + "nmdc:07b6457a094fab96563168ed287dc59f", + "nmdc:164a1bc50e8d6509446ae2877be8231c" + ], + "was_informed_by": "gold:Gp0115669", + "id": "nmdc:ed7745adccd65c2dd20dfa24c2922db2", + "execution_resource": "NERSC-Cori", + "name": "ReadBased Analysis Activity for nmdc:mga0k85x37", + "started_at_time": "2021-10-11T02:28:43Z", + "type": "nmdc:ReadbasedAnalysis", + "ended_at_time": "2021-10-11T04:20:07+00:00", + "output_data_objects": [ + { + "name": "Gp0115669_Gottcha2 TSV report", + "description": "Gottcha2 TSV report for Gp0115669", + "url": "https://data.microbiomedata.org/data/nmdc:mga0k85x37/ReadbasedAnalysis/nmdc_mga0k85x37_gottcha2_report.tsv", + "md5_checksum": "05933784d02331b60b2531e2025cd3b7", + "id": "nmdc:05933784d02331b60b2531e2025cd3b7", + "file_size_bytes": 11362 + }, + { + "name": "Gp0115669_Gottcha2 full TSV report", + "description": "Gottcha2 full TSV report for Gp0115669", + "url": "https://data.microbiomedata.org/data/nmdc:mga0k85x37/ReadbasedAnalysis/nmdc_mga0k85x37_gottcha2_report_full.tsv", + "md5_checksum": "50fc279637cb7048aaaeec9b223d0286", + "id": "nmdc:50fc279637cb7048aaaeec9b223d0286", + "file_size_bytes": 909325 + }, + { + "name": "Gp0115669_Gottcha2 Krona HTML report", + "description": "Gottcha2 Krona HTML report for Gp0115669", + "data_object_type": "GOTTCHA2 Krona Plot", + "url": "https://data.microbiomedata.org/data/nmdc:mga0k85x37/ReadbasedAnalysis/nmdc_mga0k85x37_gottcha2_krona.html", + "md5_checksum": "c3add9c5d34e3ca719096ba3ba9b1c08", + "id": "nmdc:c3add9c5d34e3ca719096ba3ba9b1c08", + "file_size_bytes": 261412 + }, + { + "name": "Gp0115669_Centrifuge classification TSV report", + "description": "Centrifuge classification TSV report for Gp0115669", + "data_object_type": "Centrifuge Taxonomic Classification", + "url": "https://data.microbiomedata.org/data/nmdc:mga0k85x37/ReadbasedAnalysis/nmdc_mga0k85x37_centrifuge_classification.tsv", + "md5_checksum": "2777a04ec7e23aff356bb4f2733e55b7", + "id": "nmdc:2777a04ec7e23aff356bb4f2733e55b7", + "file_size_bytes": 1481087410 + }, + { + "name": "Gp0115669_Centrifuge TSV report", + "description": "Centrifuge TSV report for Gp0115669", + "data_object_type": "Centrifuge Classification Report", + "url": "https://data.microbiomedata.org/data/nmdc:mga0k85x37/ReadbasedAnalysis/nmdc_mga0k85x37_centrifuge_report.tsv", + "md5_checksum": "de45d70cc01749e9b5691dc24674545d", + "id": "nmdc:de45d70cc01749e9b5691dc24674545d", + "file_size_bytes": 256139 + }, + { + "name": "Gp0115669_Centrifuge Krona HTML report", + "description": "Centrifuge Krona HTML report for Gp0115669", + "data_object_type": "Centrifuge Krona Plot", + "url": "https://data.microbiomedata.org/data/nmdc:mga0k85x37/ReadbasedAnalysis/nmdc_mga0k85x37_centrifuge_krona.html", + "md5_checksum": "534f97f3792b74385c4da305196a1b1d", + "id": "nmdc:534f97f3792b74385c4da305196a1b1d", + "file_size_bytes": 2323658 + }, + { + "name": "Gp0115669_Kraken classification TSV report", + "description": "Kraken classification TSV report for Gp0115669", + "data_object_type": "Kraken2 Taxonomic Classification", + "url": "https://data.microbiomedata.org/data/nmdc:mga0k85x37/ReadbasedAnalysis/nmdc_mga0k85x37_kraken2_classification.tsv", + "md5_checksum": "fc3e489df923ec344ac0cce7316f49d6", + "id": "nmdc:fc3e489df923ec344ac0cce7316f49d6", + "file_size_bytes": 1220980345 + }, + { + "name": "Gp0115669_Kraken2 TSV report", + "description": "Kraken2 TSV report for Gp0115669", + "data_object_type": "Kraken2 Classification Report", + "url": "https://data.microbiomedata.org/data/nmdc:mga0k85x37/ReadbasedAnalysis/nmdc_mga0k85x37_kraken2_report.tsv", + "md5_checksum": "07b6457a094fab96563168ed287dc59f", + "id": "nmdc:07b6457a094fab96563168ed287dc59f", + "file_size_bytes": 651795 + }, + { + "name": "Gp0115669_Kraken2 Krona HTML report", + "description": "Kraken2 Krona HTML report for Gp0115669", + "data_object_type": "Kraken2 Krona Plot", + "url": "https://data.microbiomedata.org/data/nmdc:mga0k85x37/ReadbasedAnalysis/nmdc_mga0k85x37_kraken2_krona.html", + "md5_checksum": "164a1bc50e8d6509446ae2877be8231c", + "id": "nmdc:164a1bc50e8d6509446ae2877be8231c", + "file_size_bytes": 3963303 + } + ] + }, + { + "_id": { + "$oid": "649b005f2ca5ee4adb139fba" + }, + "has_input": [ + "nmdc:6eef104db92b99c9741b26c667d75cd9" + ], + "part_of": [ + "nmdc:mga0k85x37" + ], + "ctg_logsum": 151663, + "scaf_logsum": 152336, + "gap_pct": 0.00222, + "git_url": "https://github.com/microbiomedata/metaG/releases/tag/0.1", + "has_output": [ + "nmdc:03eb095e55df50d639fab237d06c14ac", + "nmdc:569cb5da239e82dce1b40bfa7e2fd518", + "nmdc:b77ef3014c80797cc88509adf02be002", + "nmdc:62d08517e0ba0f991f2d8bbd66061d78", + "nmdc:568b82cb6038fec5df04c30cbd874098" + ], + "asm_score": 4.733, + "was_informed_by": "gold:Gp0115669", + "ctg_powsum": 17017, + "scaf_max": 20100, + "id": "nmdc:ed7745adccd65c2dd20dfa24c2922db2", + "scaf_powsum": 17101, + "execution_resource": "NERSC-Cori", + "contigs": 114114, + "name": "Assembly Activity for nmdc:mga0k85x37", + "ctg_max": 20100, + "gc_std": 0.11871, + "contig_bp": 54567489, + "gc_avg": 0.55923, + "started_at_time": "2021-10-11T02:28:43Z", + "scaf_bp": 54568699, + "type": "nmdc:MetagenomeAssembly", + "scaffolds": 114011, + "ended_at_time": "2021-10-11T04:20:07+00:00", + "ctg_l50": 451, + "ctg_l90": 285, + "ctg_n50": 29019, + "ctg_n90": 94816, + "scaf_l50": 451, + "scaf_l90": 285, + "scaf_n50": 28976, + "scaf_n90": 94720, + "output_data_objects": [ + { + "name": "Gp0115669_Assembled contigs fasta", + "description": "Assembled contigs fasta for Gp0115669", + "data_object_type": "Assembly Contigs", + "url": "https://data.microbiomedata.org/data/nmdc:mga0k85x37/assembly/nmdc_mga0k85x37_contigs.fna", + "md5_checksum": "03eb095e55df50d639fab237d06c14ac", + "id": "nmdc:03eb095e55df50d639fab237d06c14ac", + "file_size_bytes": 58951440 + }, + { + "name": "Gp0115669_Assembled scaffold fasta", + "description": "Assembled scaffold fasta for Gp0115669", + "data_object_type": "Assembly Scaffolds", + "url": "https://data.microbiomedata.org/data/nmdc:mga0k85x37/assembly/nmdc_mga0k85x37_scaffolds.fna", + "md5_checksum": "569cb5da239e82dce1b40bfa7e2fd518", + "id": "nmdc:569cb5da239e82dce1b40bfa7e2fd518", + "file_size_bytes": 58607757 + }, + { + "name": "Gp0115669_Metagenome Contig Coverage Stats", + "description": "Metagenome Contig Coverage Stats for Gp0115669", + "url": "https://data.microbiomedata.org/data/nmdc:mga0k85x37/assembly/nmdc_mga0k85x37_covstats.txt", + "md5_checksum": "b77ef3014c80797cc88509adf02be002", + "id": "nmdc:b77ef3014c80797cc88509adf02be002", + "file_size_bytes": 8978635 + }, + { + "name": "Gp0115669_Assembled AGP file", + "description": "Assembled AGP file for Gp0115669", + "data_object_type": "Assembly AGP", + "url": "https://data.microbiomedata.org/data/nmdc:mga0k85x37/assembly/nmdc_mga0k85x37_assembly.agp", + "md5_checksum": "62d08517e0ba0f991f2d8bbd66061d78", + "id": "nmdc:62d08517e0ba0f991f2d8bbd66061d78", + "file_size_bytes": 8358006 + }, + { + "name": "Gp0115669_Metagenome Alignment BAM file", + "description": "Metagenome Alignment BAM file for Gp0115669", + "data_object_type": "Assembly Coverage BAM", + "url": "https://data.microbiomedata.org/data/nmdc:mga0k85x37/assembly/nmdc_mga0k85x37_pairedMapped_sorted.bam", + "md5_checksum": "568b82cb6038fec5df04c30cbd874098", + "id": "nmdc:568b82cb6038fec5df04c30cbd874098", + "file_size_bytes": 1940308720 + } + ] + }, + { + "_id": { + "$oid": "649b005bbf2caae0415ef9d0" + }, + "has_input": [ + "nmdc:03eb095e55df50d639fab237d06c14ac" + ], + "part_of": [ + "nmdc:mga0k85x37" + ], + "git_url": "https://github.com/microbiomedata/metaG/releases/tag/0.1", + "has_output": [ + "nmdc:8a5f288604c61556ff3e827725864fd1", + "nmdc:0180998d6f3a3021638f04d9c0b35019", + "nmdc:950b8c4ebd1da50e2ca079273540f3af", + "nmdc:96ec49c6124cf4f8f3e7da3525348477", + "nmdc:12ca374a58bf899e42ed2c191a239e71", + "nmdc:b8ae2993aa29c8e04c00580dfdb82650", + "nmdc:7901c83b5a41e54854c96ab0b081ebd6", + "nmdc:762fe35b733dd82f89f5dce44fa54ed1", + "nmdc:661b70d6f41a44fcc1913b101f79d86a", + "nmdc:e1843a865023d75edd3139c14b8c355e", + "nmdc:a21449989b0b0884901602528b3f423e", + "nmdc:7f52547663f4eeea33de1e437012981e" + ], + "was_informed_by": "gold:Gp0115669", + "id": "nmdc:ed7745adccd65c2dd20dfa24c2922db2", + "execution_resource": "NERSC-Cori", + "name": "Annotation Activity for nmdc:mga0k85x37", + "started_at_time": "2021-10-11T02:28:43Z", + "type": "nmdc:MetagenomeAnnotationActivity", + "ended_at_time": "2021-10-11T04:20:07+00:00", + "output_data_objects": [ + { + "name": "Gp0115669_Protein FAA", + "description": "Protein FAA for Gp0115669", + "data_object_type": "Annotation Amino Acid FASTA", + "url": "https://data.microbiomedata.org/data/nmdc:mga0k85x37/annotation/nmdc_mga0k85x37_proteins.faa", + "md5_checksum": "8a5f288604c61556ff3e827725864fd1", + "id": "nmdc:8a5f288604c61556ff3e827725864fd1", + "file_size_bytes": 32524652 + }, + { + "name": "Gp0115669_Structural annotation GFF file", + "description": "Structural annotation GFF file for Gp0115669", + "data_object_type": "Structural Annotation GFF", + "url": "https://data.microbiomedata.org/data/nmdc:mga0k85x37/annotation/nmdc_mga0k85x37_structural_annotation.gff", + "md5_checksum": "0180998d6f3a3021638f04d9c0b35019", + "id": "nmdc:0180998d6f3a3021638f04d9c0b35019", + "file_size_bytes": 2514 + }, + { + "name": "Gp0115669_Functional annotation GFF file", + "description": "Functional annotation GFF file for Gp0115669", + "data_object_type": "Functional Annotation GFF", + "url": "https://data.microbiomedata.org/data/nmdc:mga0k85x37/annotation/nmdc_mga0k85x37_functional_annotation.gff", + "md5_checksum": "950b8c4ebd1da50e2ca079273540f3af", + "id": "nmdc:950b8c4ebd1da50e2ca079273540f3af", + "file_size_bytes": 36685287 + }, + { + "name": "Gp0115669_KO TSV file", + "description": "KO TSV file for Gp0115669", + "data_object_type": "Annotation KEGG Orthology", + "url": "https://data.microbiomedata.org/data/nmdc:mga0k85x37/annotation/nmdc_mga0k85x37_ko.tsv", + "md5_checksum": "96ec49c6124cf4f8f3e7da3525348477", + "id": "nmdc:96ec49c6124cf4f8f3e7da3525348477", + "file_size_bytes": 4815732 + }, + { + "name": "Gp0115669_EC TSV file", + "description": "EC TSV file for Gp0115669", + "data_object_type": "Annotation Enzyme Commission", + "url": "https://data.microbiomedata.org/data/nmdc:mga0k85x37/annotation/nmdc_mga0k85x37_ec.tsv", + "md5_checksum": "12ca374a58bf899e42ed2c191a239e71", + "id": "nmdc:12ca374a58bf899e42ed2c191a239e71", + "file_size_bytes": 3090911 + }, + { + "name": "Gp0115669_COG GFF file", + "description": "COG GFF file for Gp0115669", + "url": "https://data.microbiomedata.org/data/nmdc:mga0k85x37/annotation/nmdc_mga0k85x37_cog.gff", + "md5_checksum": "b8ae2993aa29c8e04c00580dfdb82650", + "id": "nmdc:b8ae2993aa29c8e04c00580dfdb82650", + "file_size_bytes": 20357759 + }, + { + "name": "Gp0115669_PFAM GFF file", + "description": "PFAM GFF file for Gp0115669", + "url": "https://data.microbiomedata.org/data/nmdc:mga0k85x37/annotation/nmdc_mga0k85x37_pfam.gff", + "md5_checksum": "7901c83b5a41e54854c96ab0b081ebd6", + "id": "nmdc:7901c83b5a41e54854c96ab0b081ebd6", + "file_size_bytes": 15876941 + }, + { + "name": "Gp0115669_TigrFam GFF file", + "description": "TigrFam GFF file for Gp0115669", + "url": "https://data.microbiomedata.org/data/nmdc:mga0k85x37/annotation/nmdc_mga0k85x37_tigrfam.gff", + "md5_checksum": "762fe35b733dd82f89f5dce44fa54ed1", + "id": "nmdc:762fe35b733dd82f89f5dce44fa54ed1", + "file_size_bytes": 2104873 + }, + { + "name": "Gp0115669_SMART GFF file", + "description": "SMART GFF file for Gp0115669", + "url": "https://data.microbiomedata.org/data/nmdc:mga0k85x37/annotation/nmdc_mga0k85x37_smart.gff", + "md5_checksum": "661b70d6f41a44fcc1913b101f79d86a", + "id": "nmdc:661b70d6f41a44fcc1913b101f79d86a", + "file_size_bytes": 4523437 + }, + { + "name": "Gp0115669_SuperFam GFF file", + "description": "SuperFam GFF file for Gp0115669", + "url": "https://data.microbiomedata.org/data/nmdc:mga0k85x37/annotation/nmdc_mga0k85x37_supfam.gff", + "md5_checksum": "e1843a865023d75edd3139c14b8c355e", + "id": "nmdc:e1843a865023d75edd3139c14b8c355e", + "file_size_bytes": 25872277 + }, + { + "name": "Gp0115669_Cath FunFam GFF file", + "description": "Cath FunFam GFF file for Gp0115669", + "url": "https://data.microbiomedata.org/data/nmdc:mga0k85x37/annotation/nmdc_mga0k85x37_cath_funfam.gff", + "md5_checksum": "a21449989b0b0884901602528b3f423e", + "id": "nmdc:a21449989b0b0884901602528b3f423e", + "file_size_bytes": 20254021 + }, + { + "name": "Gp0115669_KO_EC GFF file", + "description": "KO_EC GFF file for Gp0115669", + "url": "https://data.microbiomedata.org/data/nmdc:mga0k85x37/annotation/nmdc_mga0k85x37_ko_ec.gff", + "md5_checksum": "7f52547663f4eeea33de1e437012981e", + "id": "nmdc:7f52547663f4eeea33de1e437012981e", + "file_size_bytes": 15397038 + } + ] + }, + { + "_id": { + "$oid": "649b0052ec087f6bbab34735" + }, + "has_input": [ + "nmdc:03eb095e55df50d639fab237d06c14ac", + "nmdc:568b82cb6038fec5df04c30cbd874098", + "nmdc:950b8c4ebd1da50e2ca079273540f3af" + ], + "too_short_contig_num": 107191, + "part_of": [ + "nmdc:mga0k85x37" + ], + "binned_contig_num": 651, + "git_url": "https://github.com/microbiomedata/metaG/releases/tag/0.1", + "has_output": [ + "nmdc:d41d8cd98f00b204e9800998ecf8427e", + "nmdc:420b015f88d0b88ab582805f39ed2b47", + "nmdc:ee8a556be3a57008c1c05ff9fe83437e", + "nmdc:6fd5dfbd1500a60620194b5b9a4aab8a", + "nmdc:6a7eb248822ec0994ddeffe8b5aae7b1", + "nmdc:6a80769f6812a45615890cc2b03e9abf" + ], + "was_informed_by": "gold:Gp0115669", + "input_contig_num": 114113, + "id": "nmdc:ed7745adccd65c2dd20dfa24c2922db2", + "execution_resource": "NERSC-Cori", + "name": "MAGs Analysis Activity for nmdc:mga0k85x37", + "mags_list": [ + { + "number_of_contig": 48, + "completeness": 13.04, + "bin_name": "bins.1", + "gene_count": 245, + "bin_quality": "LQ", + "gtdbtk_species": "", + "gtdbtk_order": "", + "num_16s": 0, + "gtdbtk_family": "", + "gtdbtk_domain": "", + "contamination": 0.0, + "gtdbtk_class": "", + "gtdbtk_phylum": "", + "num_5s": 0, + "num_23s": 0, + "gtdbtk_genus": "", + "num_t_rna": 5 + }, + { + "number_of_contig": 379, + "completeness": 72.42, + "bin_name": "bins.2", + "gene_count": 2513, + "bin_quality": "MQ", + "gtdbtk_species": "", + "gtdbtk_order": "Sphingomonadales", + "num_16s": 0, + "gtdbtk_family": "Sphingomonadaceae", + "gtdbtk_domain": "Bacteria", + "contamination": 1.85, + "gtdbtk_class": "Alphaproteobacteria", + "gtdbtk_phylum": "Proteobacteria", + "num_5s": 0, + "num_23s": 0, + "gtdbtk_genus": "Novosphingobium", + "num_t_rna": 32 + }, + { + "number_of_contig": 224, + "completeness": 29.36, + "bin_name": "bins.3", + "gene_count": 1148, + "bin_quality": "LQ", + "gtdbtk_species": "", + "gtdbtk_order": "", + "num_16s": 0, + "gtdbtk_family": "", + "gtdbtk_domain": "", + "contamination": 0.43, + "gtdbtk_class": "", + "gtdbtk_phylum": "", + "num_5s": 0, + "num_23s": 0, + "gtdbtk_genus": "", + "num_t_rna": 13 + } + ], + "unbinned_contig_num": 6271, + "started_at_time": "2021-10-11T02:28:43Z", + "type": "nmdc:MAGsAnalysisActivity", + "ended_at_time": "2021-10-11T04:20:07+00:00", + "output_data_objects": [ + { + "name": "gold:Gp0452679_metabat2 bin checkm quality assessment result", + "description": "metabat2 bin checkm quality assessment result for gold:Gp0452679", + "data_object_type": "CheckM Statistics", + "url": "https://data.microbiomedata.org/data/nmdc:mga0fsjy84/MAGs/nmdc_mga0fsjy84_checkm_qa.out", + "md5_checksum": "d41d8cd98f00b204e9800998ecf8427e", + "id": "nmdc:d41d8cd98f00b204e9800998ecf8427e", + "file_size_bytes": 0 + }, + { + "name": "Gp0115669_tooShort (< 3kb) filtered contigs fasta file by metaBat2", + "description": "tooShort (< 3kb) filtered contigs fasta file by metaBat2 for Gp0115669", + "url": "https://data.microbiomedata.org/data/nmdc:mga0k85x37/MAGs/nmdc_mga0k85x37_bins.tooShort.fa", + "md5_checksum": "420b015f88d0b88ab582805f39ed2b47", + "id": "nmdc:420b015f88d0b88ab582805f39ed2b47", + "file_size_bytes": 44979790 + }, + { + "name": "Gp0115669_unbinned fasta file from metabat2", + "description": "unbinned fasta file from metabat2 for Gp0115669", + "url": "https://data.microbiomedata.org/data/nmdc:mga0k85x37/MAGs/nmdc_mga0k85x37_bins.unbinned.fa", + "md5_checksum": "ee8a556be3a57008c1c05ff9fe83437e", + "id": "nmdc:ee8a556be3a57008c1c05ff9fe83437e", + "file_size_bytes": 10530111 + }, + { + "name": "Gp0115669_metabat2 bin checkm quality assessment result", + "description": "metabat2 bin checkm quality assessment result for Gp0115669", + "data_object_type": "CheckM Statistics", + "url": "https://data.microbiomedata.org/data/nmdc:mga0k85x37/MAGs/nmdc_mga0k85x37_checkm_qa.out", + "md5_checksum": "6fd5dfbd1500a60620194b5b9a4aab8a", + "id": "nmdc:6fd5dfbd1500a60620194b5b9a4aab8a", + "file_size_bytes": 1190 + }, + { + "name": "Gp0115669_high-quality and medium-quality bins", + "description": "high-quality and medium-quality bins for Gp0115669", + "data_object_type": "Metagenome Bins", + "url": "https://data.microbiomedata.org/data/nmdc:mga0k85x37/MAGs/nmdc_mga0k85x37_hqmq_bin.zip", + "md5_checksum": "6a7eb248822ec0994ddeffe8b5aae7b1", + "id": "nmdc:6a7eb248822ec0994ddeffe8b5aae7b1", + "file_size_bytes": 681479 + }, + { + "name": "Gp0115669_metabat2 bins", + "description": "metabat2 bins for Gp0115669", + "url": "https://data.microbiomedata.org/data/nmdc:mga0k85x37/MAGs/nmdc_mga0k85x37_metabat_bin.zip", + "md5_checksum": "6a80769f6812a45615890cc2b03e9abf", + "id": "nmdc:6a80769f6812a45615890cc2b03e9abf", + "file_size_bytes": 359752 + } + ] + } + ] + }, + { + "_id": { + "$oid": "649b009773e8249959349b53" + }, + "id": "nmdc:omprc-11-qsxwf517", + "name": "Sand microcosm microbial communities from a hyporheic zone in Columbia River, Washington, USA - GW-RW T4_14-Oct-14", + "description": "Sterilized sand packs were incubated back in the ground and collected at time point T4.", + "has_input": [ + "nmdc:bsm-11-sdhyr752" + ], + "has_output": [ + "jgi:55d7402b0d8785342fcf7e3c" + ], + "part_of": [ + "nmdc:sty-11-aygzgv51" + ], + "add_date": "2015-05-28", + "mod_date": "2021-06-15", + "ncbi_project_name": "Sand microcosm microbial communities from a hyporheic zone in Columbia River, Washington, USA - GW-RW T4_14-Oct-14", + "omics_type": { + "has_raw_value": "Metagenome" + }, + "principal_investigator": { + "has_raw_value": "James Stegen" + }, + "processing_institution": "JGI", + "type": "nmdc:OmicsProcessing", + "gold_sequencing_project_identifiers": [ + "gold:Gp0115672" + ], + "downstream_workflow_activity_records": [ + { + "_id": { + "$oid": "649b009d6bdd4fd20273c889" + }, + "has_input": [ + "nmdc:1f6998a48aec6f4008a92d2b8e17d314" + ], + "part_of": [ + "nmdc:mga0cwhj53" + ], + "git_url": "https://github.com/microbiomedata/metaG/releases/tag/0.1", + "has_output": [ + "nmdc:eb516fb673793f5161fb634fc19de310", + "nmdc:f4b68d1bd25f8d2fa8986aeef5fbec3f" + ], + "was_informed_by": "gold:Gp0115672", + "input_read_count": 34522052, + "output_read_bases": 5012430912, + "id": "nmdc:50eb8825777d1294abac150521e5c2db", + "execution_resource": "NERSC-Cori", + "input_read_bases": 5212829852, + "name": "Read QC Activity for nmdc:mga0cwhj53", + "output_read_count": 33454554, + "started_at_time": "2021-10-11T02:28:16Z", + "type": "nmdc:ReadQCAnalysisActivity", + "ended_at_time": "2021-10-11T05:56:20+00:00", + "output_data_objects": [ + { + "name": "Gp0115672_Filtered Reads", + "description": "Filtered Reads for Gp0115672", + "data_object_type": "Filtered Sequencing Reads", + "url": "https://data.microbiomedata.org/data/nmdc:mga0cwhj53/qa/nmdc_mga0cwhj53_filtered.fastq.gz", + "md5_checksum": "eb516fb673793f5161fb634fc19de310", + "id": "nmdc:eb516fb673793f5161fb634fc19de310", + "file_size_bytes": 2704299418 + }, + { + "name": "Gp0115672_Filtered Stats", + "description": "Filtered Stats for Gp0115672", + "data_object_type": "QC Statistics", + "url": "https://data.microbiomedata.org/data/nmdc:mga0cwhj53/qa/nmdc_mga0cwhj53_filterStats.txt", + "md5_checksum": "f4b68d1bd25f8d2fa8986aeef5fbec3f", + "id": "nmdc:f4b68d1bd25f8d2fa8986aeef5fbec3f", + "file_size_bytes": 290 + } + ] + }, + { + "_id": { + "$oid": "649b009bff710ae353f8cf52" + }, + "has_input": [ + "nmdc:eb516fb673793f5161fb634fc19de310" + ], + "git_url": "https://github.com/microbiomedata/metaG/releases/tag/0.1", + "has_output": [ + "nmdc:5a9326e2e450663a5ed8c97389136b25", + "nmdc:6044f2e33e0dd3e951484e9c50ae10f4", + "nmdc:39a46887587926c9b81e126bb1036005", + "nmdc:b8dde2c047141d9097317c86f723eded", + "nmdc:d530342b37f0785f92650e9650f31d6a", + "nmdc:6672aa851b5d39d7381211232b4f6cb2", + "nmdc:61e3c875231ae8999b5aa1dbf7d55cca", + "nmdc:3049835ed4e3533acce49e9cc60b03fc", + "nmdc:3266e79813577aae1d4377c62e73332c" + ], + "was_informed_by": "gold:Gp0115672", + "id": "nmdc:50eb8825777d1294abac150521e5c2db", + "execution_resource": "NERSC-Cori", + "name": "ReadBased Analysis Activity for nmdc:mga0cwhj53", + "started_at_time": "2021-10-11T02:28:16Z", + "type": "nmdc:ReadbasedAnalysis", + "ended_at_time": "2021-10-11T05:56:20+00:00", + "output_data_objects": [ + { + "name": "Gp0115672_Gottcha2 TSV report", + "description": "Gottcha2 TSV report for Gp0115672", + "url": "https://data.microbiomedata.org/data/nmdc:mga0cwhj53/ReadbasedAnalysis/nmdc_mga0cwhj53_gottcha2_report.tsv", + "md5_checksum": "5a9326e2e450663a5ed8c97389136b25", + "id": "nmdc:5a9326e2e450663a5ed8c97389136b25", + "file_size_bytes": 15806 + }, + { + "name": "Gp0115672_Gottcha2 full TSV report", + "description": "Gottcha2 full TSV report for Gp0115672", + "url": "https://data.microbiomedata.org/data/nmdc:mga0cwhj53/ReadbasedAnalysis/nmdc_mga0cwhj53_gottcha2_report_full.tsv", + "md5_checksum": "6044f2e33e0dd3e951484e9c50ae10f4", + "id": "nmdc:6044f2e33e0dd3e951484e9c50ae10f4", + "file_size_bytes": 1142479 + }, + { + "name": "Gp0115672_Gottcha2 Krona HTML report", + "description": "Gottcha2 Krona HTML report for Gp0115672", + "data_object_type": "GOTTCHA2 Krona Plot", + "url": "https://data.microbiomedata.org/data/nmdc:mga0cwhj53/ReadbasedAnalysis/nmdc_mga0cwhj53_gottcha2_krona.html", + "md5_checksum": "39a46887587926c9b81e126bb1036005", + "id": "nmdc:39a46887587926c9b81e126bb1036005", + "file_size_bytes": 273611 + }, + { + "name": "Gp0115672_Centrifuge classification TSV report", + "description": "Centrifuge classification TSV report for Gp0115672", + "data_object_type": "Centrifuge Taxonomic Classification", + "url": "https://data.microbiomedata.org/data/nmdc:mga0cwhj53/ReadbasedAnalysis/nmdc_mga0cwhj53_centrifuge_classification.tsv", + "md5_checksum": "b8dde2c047141d9097317c86f723eded", + "id": "nmdc:b8dde2c047141d9097317c86f723eded", + "file_size_bytes": 2436637487 + }, + { + "name": "Gp0115672_Centrifuge TSV report", + "description": "Centrifuge TSV report for Gp0115672", + "data_object_type": "Centrifuge Classification Report", + "url": "https://data.microbiomedata.org/data/nmdc:mga0cwhj53/ReadbasedAnalysis/nmdc_mga0cwhj53_centrifuge_report.tsv", + "md5_checksum": "d530342b37f0785f92650e9650f31d6a", + "id": "nmdc:d530342b37f0785f92650e9650f31d6a", + "file_size_bytes": 261520 + }, + { + "name": "Gp0115672_Centrifuge Krona HTML report", + "description": "Centrifuge Krona HTML report for Gp0115672", + "data_object_type": "Centrifuge Krona Plot", + "url": "https://data.microbiomedata.org/data/nmdc:mga0cwhj53/ReadbasedAnalysis/nmdc_mga0cwhj53_centrifuge_krona.html", + "md5_checksum": "6672aa851b5d39d7381211232b4f6cb2", + "id": "nmdc:6672aa851b5d39d7381211232b4f6cb2", + "file_size_bytes": 2342832 + }, + { + "name": "Gp0115672_Kraken classification TSV report", + "description": "Kraken classification TSV report for Gp0115672", + "data_object_type": "Kraken2 Taxonomic Classification", + "url": "https://data.microbiomedata.org/data/nmdc:mga0cwhj53/ReadbasedAnalysis/nmdc_mga0cwhj53_kraken2_classification.tsv", + "md5_checksum": "61e3c875231ae8999b5aa1dbf7d55cca", + "id": "nmdc:61e3c875231ae8999b5aa1dbf7d55cca", + "file_size_bytes": 1993150715 + }, + { + "name": "Gp0115672_Kraken2 TSV report", + "description": "Kraken2 TSV report for Gp0115672", + "data_object_type": "Kraken2 Classification Report", + "url": "https://data.microbiomedata.org/data/nmdc:mga0cwhj53/ReadbasedAnalysis/nmdc_mga0cwhj53_kraken2_report.tsv", + "md5_checksum": "3049835ed4e3533acce49e9cc60b03fc", + "id": "nmdc:3049835ed4e3533acce49e9cc60b03fc", + "file_size_bytes": 693572 + }, + { + "name": "Gp0115672_Kraken2 Krona HTML report", + "description": "Kraken2 Krona HTML report for Gp0115672", + "data_object_type": "Kraken2 Krona Plot", + "url": "https://data.microbiomedata.org/data/nmdc:mga0cwhj53/ReadbasedAnalysis/nmdc_mga0cwhj53_kraken2_krona.html", + "md5_checksum": "3266e79813577aae1d4377c62e73332c", + "id": "nmdc:3266e79813577aae1d4377c62e73332c", + "file_size_bytes": 4177114 + } + ] + }, + { + "_id": { + "$oid": "61e71a33833bcf838a701f34" + }, + "has_input": [ + "nmdc:eb516fb673793f5161fb634fc19de310" + ], + "part_of": [ + "nmdc:mga0cwhj53" + ], + "git_url": "https://github.com/microbiomedata/metaG/releases/tag/0.1", + "has_output": [ + "nmdc:5a9326e2e450663a5ed8c97389136b25", + "nmdc:6044f2e33e0dd3e951484e9c50ae10f4", + "nmdc:39a46887587926c9b81e126bb1036005", + "nmdc:b8dde2c047141d9097317c86f723eded", + "nmdc:d530342b37f0785f92650e9650f31d6a", + "nmdc:6672aa851b5d39d7381211232b4f6cb2", + "nmdc:61e3c875231ae8999b5aa1dbf7d55cca", + "nmdc:3049835ed4e3533acce49e9cc60b03fc", + "nmdc:3266e79813577aae1d4377c62e73332c" + ], + "was_informed_by": "gold:Gp0115672", + "id": "nmdc:50eb8825777d1294abac150521e5c2db", + "execution_resource": "NERSC-Cori", + "name": "ReadBased Analysis Activity for nmdc:mga0cwhj53", + "started_at_time": "2021-10-11T02:28:16Z", + "type": "nmdc:ReadbasedAnalysis", + "ended_at_time": "2021-10-11T05:56:20+00:00", + "output_data_objects": [ + { + "name": "Gp0115672_Gottcha2 TSV report", + "description": "Gottcha2 TSV report for Gp0115672", + "url": "https://data.microbiomedata.org/data/nmdc:mga0cwhj53/ReadbasedAnalysis/nmdc_mga0cwhj53_gottcha2_report.tsv", + "md5_checksum": "5a9326e2e450663a5ed8c97389136b25", + "id": "nmdc:5a9326e2e450663a5ed8c97389136b25", + "file_size_bytes": 15806 + }, + { + "name": "Gp0115672_Gottcha2 full TSV report", + "description": "Gottcha2 full TSV report for Gp0115672", + "url": "https://data.microbiomedata.org/data/nmdc:mga0cwhj53/ReadbasedAnalysis/nmdc_mga0cwhj53_gottcha2_report_full.tsv", + "md5_checksum": "6044f2e33e0dd3e951484e9c50ae10f4", + "id": "nmdc:6044f2e33e0dd3e951484e9c50ae10f4", + "file_size_bytes": 1142479 + }, + { + "name": "Gp0115672_Gottcha2 Krona HTML report", + "description": "Gottcha2 Krona HTML report for Gp0115672", + "data_object_type": "GOTTCHA2 Krona Plot", + "url": "https://data.microbiomedata.org/data/nmdc:mga0cwhj53/ReadbasedAnalysis/nmdc_mga0cwhj53_gottcha2_krona.html", + "md5_checksum": "39a46887587926c9b81e126bb1036005", + "id": "nmdc:39a46887587926c9b81e126bb1036005", + "file_size_bytes": 273611 + }, + { + "name": "Gp0115672_Centrifuge classification TSV report", + "description": "Centrifuge classification TSV report for Gp0115672", + "data_object_type": "Centrifuge Taxonomic Classification", + "url": "https://data.microbiomedata.org/data/nmdc:mga0cwhj53/ReadbasedAnalysis/nmdc_mga0cwhj53_centrifuge_classification.tsv", + "md5_checksum": "b8dde2c047141d9097317c86f723eded", + "id": "nmdc:b8dde2c047141d9097317c86f723eded", + "file_size_bytes": 2436637487 + }, + { + "name": "Gp0115672_Centrifuge TSV report", + "description": "Centrifuge TSV report for Gp0115672", + "data_object_type": "Centrifuge Classification Report", + "url": "https://data.microbiomedata.org/data/nmdc:mga0cwhj53/ReadbasedAnalysis/nmdc_mga0cwhj53_centrifuge_report.tsv", + "md5_checksum": "d530342b37f0785f92650e9650f31d6a", + "id": "nmdc:d530342b37f0785f92650e9650f31d6a", + "file_size_bytes": 261520 + }, + { + "name": "Gp0115672_Centrifuge Krona HTML report", + "description": "Centrifuge Krona HTML report for Gp0115672", + "data_object_type": "Centrifuge Krona Plot", + "url": "https://data.microbiomedata.org/data/nmdc:mga0cwhj53/ReadbasedAnalysis/nmdc_mga0cwhj53_centrifuge_krona.html", + "md5_checksum": "6672aa851b5d39d7381211232b4f6cb2", + "id": "nmdc:6672aa851b5d39d7381211232b4f6cb2", + "file_size_bytes": 2342832 + }, + { + "name": "Gp0115672_Kraken classification TSV report", + "description": "Kraken classification TSV report for Gp0115672", + "data_object_type": "Kraken2 Taxonomic Classification", + "url": "https://data.microbiomedata.org/data/nmdc:mga0cwhj53/ReadbasedAnalysis/nmdc_mga0cwhj53_kraken2_classification.tsv", + "md5_checksum": "61e3c875231ae8999b5aa1dbf7d55cca", + "id": "nmdc:61e3c875231ae8999b5aa1dbf7d55cca", + "file_size_bytes": 1993150715 + }, + { + "name": "Gp0115672_Kraken2 TSV report", + "description": "Kraken2 TSV report for Gp0115672", + "data_object_type": "Kraken2 Classification Report", + "url": "https://data.microbiomedata.org/data/nmdc:mga0cwhj53/ReadbasedAnalysis/nmdc_mga0cwhj53_kraken2_report.tsv", + "md5_checksum": "3049835ed4e3533acce49e9cc60b03fc", + "id": "nmdc:3049835ed4e3533acce49e9cc60b03fc", + "file_size_bytes": 693572 + }, + { + "name": "Gp0115672_Kraken2 Krona HTML report", + "description": "Kraken2 Krona HTML report for Gp0115672", + "data_object_type": "Kraken2 Krona Plot", + "url": "https://data.microbiomedata.org/data/nmdc:mga0cwhj53/ReadbasedAnalysis/nmdc_mga0cwhj53_kraken2_krona.html", + "md5_checksum": "3266e79813577aae1d4377c62e73332c", + "id": "nmdc:3266e79813577aae1d4377c62e73332c", + "file_size_bytes": 4177114 + } + ] + }, + { + "_id": { + "$oid": "649b005f2ca5ee4adb139fbf" + }, + "has_input": [ + "nmdc:eb516fb673793f5161fb634fc19de310" + ], + "part_of": [ + "nmdc:mga0cwhj53" + ], + "ctg_logsum": 447149, + "scaf_logsum": 448446, + "gap_pct": 0.0019, + "git_url": "https://github.com/microbiomedata/metaG/releases/tag/0.1", + "has_output": [ + "nmdc:6f762f7b079f8c2633ef674a8264879f", + "nmdc:26cc1c91f5f5e79d50041ff4623398b5", + "nmdc:bd9d5497c4e2e0ea61df1f3f239107f7", + "nmdc:362a9857666fe2f4e90bf6a818f551cc", + "nmdc:afd1d03b38bc5deb9c196264bcea8795" + ], + "asm_score": 13.127, + "was_informed_by": "gold:Gp0115672", + "ctg_powsum": 55923, + "scaf_max": 157008, + "id": "nmdc:50eb8825777d1294abac150521e5c2db", + "scaf_powsum": 56113, + "execution_resource": "NERSC-Cori", + "contigs": 221046, + "name": "Assembly Activity for nmdc:mga0cwhj53", + "ctg_max": 157008, + "gc_std": 0.10619, + "contig_bp": 120471215, + "gc_avg": 0.56196, + "started_at_time": "2021-10-11T02:28:16Z", + "scaf_bp": 120473505, + "type": "nmdc:MetagenomeAssembly", + "scaffolds": 220853, + "ended_at_time": "2021-10-11T05:56:20+00:00", + "ctg_l50": 528, + "ctg_l90": 293, + "ctg_n50": 48327, + "ctg_n90": 178881, + "scaf_l50": 529, + "scaf_l90": 293, + "scaf_n50": 48077, + "scaf_n90": 178708, + "scaf_l_gt50k": 2147966, + "scaf_n_gt50k": 28, + "scaf_pct_gt50k": 1.7829365, + "output_data_objects": [ + { + "name": "Gp0115672_Assembled contigs fasta", + "description": "Assembled contigs fasta for Gp0115672", + "data_object_type": "Assembly Contigs", + "url": "https://data.microbiomedata.org/data/nmdc:mga0cwhj53/assembly/nmdc_mga0cwhj53_contigs.fna", + "md5_checksum": "6f762f7b079f8c2633ef674a8264879f", + "id": "nmdc:6f762f7b079f8c2633ef674a8264879f", + "file_size_bytes": 129321165 + }, + { + "name": "Gp0115672_Assembled scaffold fasta", + "description": "Assembled scaffold fasta for Gp0115672", + "data_object_type": "Assembly Scaffolds", + "url": "https://data.microbiomedata.org/data/nmdc:mga0cwhj53/assembly/nmdc_mga0cwhj53_scaffolds.fna", + "md5_checksum": "26cc1c91f5f5e79d50041ff4623398b5", + "id": "nmdc:26cc1c91f5f5e79d50041ff4623398b5", + "file_size_bytes": 128655263 + }, + { + "name": "Gp0115672_Metagenome Contig Coverage Stats", + "description": "Metagenome Contig Coverage Stats for Gp0115672", + "url": "https://data.microbiomedata.org/data/nmdc:mga0cwhj53/assembly/nmdc_mga0cwhj53_covstats.txt", + "md5_checksum": "bd9d5497c4e2e0ea61df1f3f239107f7", + "id": "nmdc:bd9d5497c4e2e0ea61df1f3f239107f7", + "file_size_bytes": 17496249 + }, + { + "name": "Gp0115672_Assembled AGP file", + "description": "Assembled AGP file for Gp0115672", + "data_object_type": "Assembly AGP", + "url": "https://data.microbiomedata.org/data/nmdc:mga0cwhj53/assembly/nmdc_mga0cwhj53_assembly.agp", + "md5_checksum": "362a9857666fe2f4e90bf6a818f551cc", + "id": "nmdc:362a9857666fe2f4e90bf6a818f551cc", + "file_size_bytes": 16401188 + }, + { + "name": "Gp0115672_Metagenome Alignment BAM file", + "description": "Metagenome Alignment BAM file for Gp0115672", + "data_object_type": "Assembly Coverage BAM", + "url": "https://data.microbiomedata.org/data/nmdc:mga0cwhj53/assembly/nmdc_mga0cwhj53_pairedMapped_sorted.bam", + "md5_checksum": "afd1d03b38bc5deb9c196264bcea8795", + "id": "nmdc:afd1d03b38bc5deb9c196264bcea8795", + "file_size_bytes": 2952467259 + } + ] + }, + { + "_id": { + "$oid": "649b005bbf2caae0415ef9d1" + }, + "has_input": [ + "nmdc:6f762f7b079f8c2633ef674a8264879f" + ], + "part_of": [ + "nmdc:mga0cwhj53" + ], + "git_url": "https://github.com/microbiomedata/metaG/releases/tag/0.1", + "has_output": [ + "nmdc:84e3590be0f59007275fdf459d464f74", + "nmdc:7dd630b842f587768235714e8a95f377", + "nmdc:38d776837c2208b557e2e4e5428c879d", + "nmdc:e38cb3355892042cb02580c26c083cd9", + "nmdc:d55119e8f094efa075c44b22e8b2f689", + "nmdc:02a9ad5732172f04d1da83d145f63226", + "nmdc:73811b72087e57f23db32f4a0ca4fb9c", + "nmdc:dfc18c0f97e80c14ca6ca1bc2ba7a809", + "nmdc:5a843529ffac8227515c5ea399ee4815", + "nmdc:82ac29a9999c6bc097cb0f35e4177e35", + "nmdc:5b0e8395559ef0d8a341ae0e132e60f6", + "nmdc:1e74c3df751a59a34e5c0d87f4a37563" + ], + "was_informed_by": "gold:Gp0115672", + "id": "nmdc:50eb8825777d1294abac150521e5c2db", + "execution_resource": "NERSC-Cori", + "name": "Annotation Activity for nmdc:mga0cwhj53", + "started_at_time": "2021-10-11T02:28:16Z", + "type": "nmdc:MetagenomeAnnotationActivity", + "ended_at_time": "2021-10-11T05:56:20+00:00", + "output_data_objects": [ + { + "name": "Gp0115672_Protein FAA", + "description": "Protein FAA for Gp0115672", + "data_object_type": "Annotation Amino Acid FASTA", + "url": "https://data.microbiomedata.org/data/nmdc:mga0cwhj53/annotation/nmdc_mga0cwhj53_proteins.faa", + "md5_checksum": "84e3590be0f59007275fdf459d464f74", + "id": "nmdc:84e3590be0f59007275fdf459d464f74", + "file_size_bytes": 71651089 + }, + { + "name": "Gp0115672_Structural annotation GFF file", + "description": "Structural annotation GFF file for Gp0115672", + "data_object_type": "Structural Annotation GFF", + "url": "https://data.microbiomedata.org/data/nmdc:mga0cwhj53/annotation/nmdc_mga0cwhj53_structural_annotation.gff", + "md5_checksum": "7dd630b842f587768235714e8a95f377", + "id": "nmdc:7dd630b842f587768235714e8a95f377", + "file_size_bytes": 2534 + }, + { + "name": "Gp0115672_Functional annotation GFF file", + "description": "Functional annotation GFF file for Gp0115672", + "data_object_type": "Functional Annotation GFF", + "url": "https://data.microbiomedata.org/data/nmdc:mga0cwhj53/annotation/nmdc_mga0cwhj53_functional_annotation.gff", + "md5_checksum": "38d776837c2208b557e2e4e5428c879d", + "id": "nmdc:38d776837c2208b557e2e4e5428c879d", + "file_size_bytes": 78213025 + }, + { + "name": "Gp0115672_KO TSV file", + "description": "KO TSV file for Gp0115672", + "data_object_type": "Annotation KEGG Orthology", + "url": "https://data.microbiomedata.org/data/nmdc:mga0cwhj53/annotation/nmdc_mga0cwhj53_ko.tsv", + "md5_checksum": "e38cb3355892042cb02580c26c083cd9", + "id": "nmdc:e38cb3355892042cb02580c26c083cd9", + "file_size_bytes": 10621211 + }, + { + "name": "Gp0115672_EC TSV file", + "description": "EC TSV file for Gp0115672", + "data_object_type": "Annotation Enzyme Commission", + "url": "https://data.microbiomedata.org/data/nmdc:mga0cwhj53/annotation/nmdc_mga0cwhj53_ec.tsv", + "md5_checksum": "d55119e8f094efa075c44b22e8b2f689", + "id": "nmdc:d55119e8f094efa075c44b22e8b2f689", + "file_size_bytes": 6814564 + }, + { + "name": "Gp0115672_COG GFF file", + "description": "COG GFF file for Gp0115672", + "url": "https://data.microbiomedata.org/data/nmdc:mga0cwhj53/annotation/nmdc_mga0cwhj53_cog.gff", + "md5_checksum": "02a9ad5732172f04d1da83d145f63226", + "id": "nmdc:02a9ad5732172f04d1da83d145f63226", + "file_size_bytes": 45617917 + }, + { + "name": "Gp0115672_PFAM GFF file", + "description": "PFAM GFF file for Gp0115672", + "url": "https://data.microbiomedata.org/data/nmdc:mga0cwhj53/annotation/nmdc_mga0cwhj53_pfam.gff", + "md5_checksum": "73811b72087e57f23db32f4a0ca4fb9c", + "id": "nmdc:73811b72087e57f23db32f4a0ca4fb9c", + "file_size_bytes": 37040943 + }, + { + "name": "Gp0115672_TigrFam GFF file", + "description": "TigrFam GFF file for Gp0115672", + "url": "https://data.microbiomedata.org/data/nmdc:mga0cwhj53/annotation/nmdc_mga0cwhj53_tigrfam.gff", + "md5_checksum": "dfc18c0f97e80c14ca6ca1bc2ba7a809", + "id": "nmdc:dfc18c0f97e80c14ca6ca1bc2ba7a809", + "file_size_bytes": 5380314 + }, + { + "name": "Gp0115672_SMART GFF file", + "description": "SMART GFF file for Gp0115672", + "url": "https://data.microbiomedata.org/data/nmdc:mga0cwhj53/annotation/nmdc_mga0cwhj53_smart.gff", + "md5_checksum": "5a843529ffac8227515c5ea399ee4815", + "id": "nmdc:5a843529ffac8227515c5ea399ee4815", + "file_size_bytes": 10141642 + }, + { + "name": "Gp0115672_SuperFam GFF file", + "description": "SuperFam GFF file for Gp0115672", + "url": "https://data.microbiomedata.org/data/nmdc:mga0cwhj53/annotation/nmdc_mga0cwhj53_supfam.gff", + "md5_checksum": "82ac29a9999c6bc097cb0f35e4177e35", + "id": "nmdc:82ac29a9999c6bc097cb0f35e4177e35", + "file_size_bytes": 56808220 + }, + { + "name": "Gp0115672_Cath FunFam GFF file", + "description": "Cath FunFam GFF file for Gp0115672", + "url": "https://data.microbiomedata.org/data/nmdc:mga0cwhj53/annotation/nmdc_mga0cwhj53_cath_funfam.gff", + "md5_checksum": "5b0e8395559ef0d8a341ae0e132e60f6", + "id": "nmdc:5b0e8395559ef0d8a341ae0e132e60f6", + "file_size_bytes": 45632833 + }, + { + "name": "Gp0115672_KO_EC GFF file", + "description": "KO_EC GFF file for Gp0115672", + "url": "https://data.microbiomedata.org/data/nmdc:mga0cwhj53/annotation/nmdc_mga0cwhj53_ko_ec.gff", + "md5_checksum": "1e74c3df751a59a34e5c0d87f4a37563", + "id": "nmdc:1e74c3df751a59a34e5c0d87f4a37563", + "file_size_bytes": 33782864 + } + ] + }, + { + "_id": { + "$oid": "649b0052ec087f6bbab34737" + }, + "has_input": [ + "nmdc:6f762f7b079f8c2633ef674a8264879f", + "nmdc:afd1d03b38bc5deb9c196264bcea8795", + "nmdc:38d776837c2208b557e2e4e5428c879d" + ], + "too_short_contig_num": 206294, + "part_of": [ + "nmdc:mga0cwhj53" + ], + "binned_contig_num": 1785, + "git_url": "https://github.com/microbiomedata/metaG/releases/tag/0.1", + "has_output": [ + "nmdc:d41d8cd98f00b204e9800998ecf8427e", + "nmdc:2b6e0195e34697039eff38b51026be24", + "nmdc:f02d361fbef7549e2289bf4da623787d", + "nmdc:2de282e5507477269238ead458f11ac0", + "nmdc:3abae1a573f9f0ac6da47e1ab9b9a723", + "nmdc:4d315d8dac1d9605d110ff2298b10229" + ], + "was_informed_by": "gold:Gp0115672", + "input_contig_num": 221045, + "id": "nmdc:50eb8825777d1294abac150521e5c2db", + "execution_resource": "NERSC-Cori", + "name": "MAGs Analysis Activity for nmdc:mga0cwhj53", + "mags_list": [ + { + "number_of_contig": 316, + "completeness": 61.03, + "bin_name": "bins.1", + "gene_count": 2148, + "bin_quality": "MQ", + "gtdbtk_species": "", + "gtdbtk_order": "Sphingomonadales", + "num_16s": 0, + "gtdbtk_family": "Sphingomonadaceae", + "gtdbtk_domain": "Bacteria", + "contamination": 0.85, + "gtdbtk_class": "Alphaproteobacteria", + "gtdbtk_phylum": "Proteobacteria", + "num_5s": 0, + "num_23s": 0, + "gtdbtk_genus": "Novosphingobium", + "num_t_rna": 19 + }, + { + "number_of_contig": 130, + "completeness": 34.64, + "bin_name": "bins.2", + "gene_count": 675, + "bin_quality": "LQ", + "gtdbtk_species": "", + "gtdbtk_order": "", + "num_16s": 0, + "gtdbtk_family": "", + "gtdbtk_domain": "", + "contamination": 0.0, + "gtdbtk_class": "", + "gtdbtk_phylum": "", + "num_5s": 0, + "num_23s": 0, + "gtdbtk_genus": "", + "num_t_rna": 5 + }, + { + "number_of_contig": 201, + "completeness": 19.13, + "bin_name": "bins.3", + "gene_count": 1000, + "bin_quality": "LQ", + "gtdbtk_species": "", + "gtdbtk_order": "", + "num_16s": 0, + "gtdbtk_family": "", + "gtdbtk_domain": "", + "contamination": 0.0, + "gtdbtk_class": "", + "gtdbtk_phylum": "", + "num_5s": 0, + "num_23s": 0, + "gtdbtk_genus": "", + "num_t_rna": 2 + }, + { + "number_of_contig": 256, + "completeness": 75.9, + "bin_name": "bins.4", + "gene_count": 2131, + "bin_quality": "MQ", + "gtdbtk_species": "UBA5335 sp002862435", + "gtdbtk_order": "UBA5335", + "num_16s": 0, + "gtdbtk_family": "UBA5335", + "gtdbtk_domain": "Bacteria", + "contamination": 1.52, + "gtdbtk_class": "Gammaproteobacteria", + "gtdbtk_phylum": "Proteobacteria", + "num_5s": 0, + "num_23s": 0, + "gtdbtk_genus": "UBA5335", + "num_t_rna": 22 + }, + { + "number_of_contig": 254, + "completeness": 100.0, + "bin_name": "bins.5", + "gene_count": 6188, + "bin_quality": "LQ", + "gtdbtk_species": "", + "gtdbtk_order": "", + "num_16s": 2, + "gtdbtk_family": "", + "gtdbtk_domain": "", + "contamination": 95.83, + "gtdbtk_class": "", + "gtdbtk_phylum": "", + "num_5s": 2, + "num_23s": 2, + "gtdbtk_genus": "", + "num_t_rna": 86 + }, + { + "number_of_contig": 106, + "completeness": 7.24, + "bin_name": "bins.6", + "gene_count": 524, + "bin_quality": "LQ", + "gtdbtk_species": "", + "gtdbtk_order": "", + "num_16s": 0, + "gtdbtk_family": "", + "gtdbtk_domain": "", + "contamination": 0.0, + "gtdbtk_class": "", + "gtdbtk_phylum": "", + "num_5s": 0, + "num_23s": 0, + "gtdbtk_genus": "", + "num_t_rna": 16 + }, + { + "number_of_contig": 306, + "completeness": 65.74, + "bin_name": "bins.7", + "gene_count": 2357, + "bin_quality": "MQ", + "gtdbtk_species": "", + "gtdbtk_order": "UBA11222", + "num_16s": 0, + "gtdbtk_family": "UBA11222", + "gtdbtk_domain": "Bacteria", + "contamination": 2.3, + "gtdbtk_class": "Alphaproteobacteria", + "gtdbtk_phylum": "Proteobacteria", + "num_5s": 0, + "num_23s": 0, + "gtdbtk_genus": "UBA11222", + "num_t_rna": 29 + }, + { + "number_of_contig": 216, + "completeness": 47.34, + "bin_name": "bins.8", + "gene_count": 1203, + "bin_quality": "LQ", + "gtdbtk_species": "", + "gtdbtk_order": "", + "num_16s": 0, + "gtdbtk_family": "", + "gtdbtk_domain": "", + "contamination": 0.0, + "gtdbtk_class": "", + "gtdbtk_phylum": "", + "num_5s": 0, + "num_23s": 0, + "gtdbtk_genus": "", + "num_t_rna": 12 + } + ], + "unbinned_contig_num": 12966, + "started_at_time": "2021-10-11T02:28:16Z", + "type": "nmdc:MAGsAnalysisActivity", + "ended_at_time": "2021-10-11T05:56:20+00:00", + "output_data_objects": [ + { + "name": "gold:Gp0452679_metabat2 bin checkm quality assessment result", + "description": "metabat2 bin checkm quality assessment result for gold:Gp0452679", + "data_object_type": "CheckM Statistics", + "url": "https://data.microbiomedata.org/data/nmdc:mga0fsjy84/MAGs/nmdc_mga0fsjy84_checkm_qa.out", + "md5_checksum": "d41d8cd98f00b204e9800998ecf8427e", + "id": "nmdc:d41d8cd98f00b204e9800998ecf8427e", + "file_size_bytes": 0 + }, + { + "name": "Gp0115672_tooShort (< 3kb) filtered contigs fasta file by metaBat2", + "description": "tooShort (< 3kb) filtered contigs fasta file by metaBat2 for Gp0115672", + "url": "https://data.microbiomedata.org/data/nmdc:mga0cwhj53/MAGs/nmdc_mga0cwhj53_bins.tooShort.fa", + "md5_checksum": "2b6e0195e34697039eff38b51026be24", + "id": "nmdc:2b6e0195e34697039eff38b51026be24", + "file_size_bytes": 91055942 + }, + { + "name": "Gp0115672_unbinned fasta file from metabat2", + "description": "unbinned fasta file from metabat2 for Gp0115672", + "url": "https://data.microbiomedata.org/data/nmdc:mga0cwhj53/MAGs/nmdc_mga0cwhj53_bins.unbinned.fa", + "md5_checksum": "f02d361fbef7549e2289bf4da623787d", + "id": "nmdc:f02d361fbef7549e2289bf4da623787d", + "file_size_bytes": 23202832 + }, + { + "name": "Gp0115672_metabat2 bin checkm quality assessment result", + "description": "metabat2 bin checkm quality assessment result for Gp0115672", + "data_object_type": "CheckM Statistics", + "url": "https://data.microbiomedata.org/data/nmdc:mga0cwhj53/MAGs/nmdc_mga0cwhj53_checkm_qa.out", + "md5_checksum": "2de282e5507477269238ead458f11ac0", + "id": "nmdc:2de282e5507477269238ead458f11ac0", + "file_size_bytes": 2040 + }, + { + "name": "Gp0115672_high-quality and medium-quality bins", + "description": "high-quality and medium-quality bins for Gp0115672", + "data_object_type": "Metagenome Bins", + "url": "https://data.microbiomedata.org/data/nmdc:mga0cwhj53/MAGs/nmdc_mga0cwhj53_hqmq_bin.zip", + "md5_checksum": "3abae1a573f9f0ac6da47e1ab9b9a723", + "id": "nmdc:3abae1a573f9f0ac6da47e1ab9b9a723", + "file_size_bytes": 1815861 + }, + { + "name": "Gp0115672_metabat2 bins", + "description": "metabat2 bins for Gp0115672", + "url": "https://data.microbiomedata.org/data/nmdc:mga0cwhj53/MAGs/nmdc_mga0cwhj53_metabat_bin.zip", + "md5_checksum": "4d315d8dac1d9605d110ff2298b10229", + "id": "nmdc:4d315d8dac1d9605d110ff2298b10229", + "file_size_bytes": 2757900 + } + ] + } + ] + }, + { + "_id": { + "$oid": "649b009773e8249959349b54" + }, + "id": "nmdc:omprc-11-932jcd76", + "name": "Riverbed sediment microbial communities from areas with no vegetation in Columbia River, Washington, USA - GW-RW N3_50_60", + "description": "Riverbed sediment samples were collected from an area with no vegetation in a hyporheic zone (subsurface groundwater - surface water mixing zone)", + "has_input": [ + "nmdc:bsm-11-pvcgp635" + ], + "has_output": [ + "jgi:574fe0a17ded5e3df1ee148a" + ], + "part_of": [ + "nmdc:sty-11-aygzgv51" + ], + "add_date": "2016-01-11", + "mod_date": "2021-06-15", + "ncbi_project_name": "Riverbed sediment microbial communities from areas with no vegetation in Columbia River, Washington, USA - GW-RW N3_50_60", + "omics_type": { + "has_raw_value": "Metagenome" + }, + "principal_investigator": { + "has_raw_value": "James Stegen" + }, + "processing_institution": "JGI", + "type": "nmdc:OmicsProcessing", + "gold_sequencing_project_identifiers": [ + "gold:Gp0127640" + ], + "downstream_workflow_activity_records": [ + { + "_id": { + "$oid": "649b009d6bdd4fd20273c875" + }, + "has_input": [ + "nmdc:0094fcbe3a051a8000b8823c8db540f8" + ], + "part_of": [ + "nmdc:mga06rnc11" + ], + "git_url": "https://github.com/microbiomedata/metaG/releases/tag/0.1", + "has_output": [ + "nmdc:534c94e20d292a6bf09c0a42b550b4c2", + "nmdc:db5ccad12d6ddb46947fbd815aae7f9a" + ], + "was_informed_by": "gold:Gp0127640", + "input_read_count": 28754670, + "output_read_bases": 4186416440, + "id": "nmdc:414c4647eddd8081308d92da2d59815e", + "execution_resource": "NERSC-Cori", + "input_read_bases": 4341955170, + "name": "Read QC Activity for nmdc:mga06rnc11", + "output_read_count": 27981268, + "started_at_time": "2021-10-11T02:24:27Z", + "type": "nmdc:ReadQCAnalysisActivity", + "ended_at_time": "2021-10-11T04:33:17+00:00", + "output_data_objects": [ + { + "name": "Gp0127640_Filtered Reads", + "description": "Filtered Reads for Gp0127640", + "data_object_type": "Filtered Sequencing Reads", + "url": "https://data.microbiomedata.org/data/nmdc:mga06rnc11/qa/nmdc_mga06rnc11_filtered.fastq.gz", + "md5_checksum": "534c94e20d292a6bf09c0a42b550b4c2", + "id": "nmdc:534c94e20d292a6bf09c0a42b550b4c2", + "file_size_bytes": 2416846292 + }, + { + "name": "Gp0127640_Filtered Stats", + "description": "Filtered Stats for Gp0127640", + "data_object_type": "QC Statistics", + "url": "https://data.microbiomedata.org/data/nmdc:mga06rnc11/qa/nmdc_mga06rnc11_filterStats.txt", + "md5_checksum": "db5ccad12d6ddb46947fbd815aae7f9a", + "id": "nmdc:db5ccad12d6ddb46947fbd815aae7f9a", + "file_size_bytes": 285 + } + ] + }, + { + "_id": { + "$oid": "649b009bff710ae353f8cf3a" + }, + "has_input": [ + "nmdc:534c94e20d292a6bf09c0a42b550b4c2" + ], + "git_url": "https://github.com/microbiomedata/metaG/releases/tag/0.1", + "has_output": [ + "nmdc:7e79b2eba131ed6df71a56f47b1b901f", + "nmdc:bc82dcb8151fc20c22be71b6531a1fb2", + "nmdc:d5e45563875efca0653ba2dd47ee3d68", + "nmdc:bf5aa70f6ff14da2ef1393124ec29c4d", + "nmdc:61f1f6d57fd4d445682e25ec34901721", + "nmdc:7c31728fc2a51c8d202f9f74b1919886", + "nmdc:f36c2b28e63d21ca4d9e84035450c8e1", + "nmdc:e2939606fc9ff1c0046b333e1740f258", + "nmdc:d47144fd7ec0608e7677550d9589c889" + ], + "was_informed_by": "gold:Gp0127640", + "id": "nmdc:414c4647eddd8081308d92da2d59815e", + "execution_resource": "NERSC-Cori", + "name": "ReadBased Analysis Activity for nmdc:mga06rnc11", + "started_at_time": "2021-10-11T02:24:27Z", + "type": "nmdc:ReadbasedAnalysis", + "ended_at_time": "2021-10-11T04:33:17+00:00", + "output_data_objects": [ + { + "name": "Gp0127640_Gottcha2 TSV report", + "description": "Gottcha2 TSV report for Gp0127640", + "url": "https://data.microbiomedata.org/data/nmdc:mga06rnc11/ReadbasedAnalysis/nmdc_mga06rnc11_gottcha2_report.tsv", + "md5_checksum": "7e79b2eba131ed6df71a56f47b1b901f", + "id": "nmdc:7e79b2eba131ed6df71a56f47b1b901f", + "file_size_bytes": 3824 + }, + { + "name": "Gp0127640_Gottcha2 full TSV report", + "description": "Gottcha2 full TSV report for Gp0127640", + "url": "https://data.microbiomedata.org/data/nmdc:mga06rnc11/ReadbasedAnalysis/nmdc_mga06rnc11_gottcha2_report_full.tsv", + "md5_checksum": "bc82dcb8151fc20c22be71b6531a1fb2", + "id": "nmdc:bc82dcb8151fc20c22be71b6531a1fb2", + "file_size_bytes": 850491 + }, + { + "name": "Gp0127640_Gottcha2 Krona HTML report", + "description": "Gottcha2 Krona HTML report for Gp0127640", + "data_object_type": "GOTTCHA2 Krona Plot", + "url": "https://data.microbiomedata.org/data/nmdc:mga06rnc11/ReadbasedAnalysis/nmdc_mga06rnc11_gottcha2_krona.html", + "md5_checksum": "d5e45563875efca0653ba2dd47ee3d68", + "id": "nmdc:d5e45563875efca0653ba2dd47ee3d68", + "file_size_bytes": 236151 + }, + { + "name": "Gp0127640_Centrifuge classification TSV report", + "description": "Centrifuge classification TSV report for Gp0127640", + "data_object_type": "Centrifuge Taxonomic Classification", + "url": "https://data.microbiomedata.org/data/nmdc:mga06rnc11/ReadbasedAnalysis/nmdc_mga06rnc11_centrifuge_classification.tsv", + "md5_checksum": "bf5aa70f6ff14da2ef1393124ec29c4d", + "id": "nmdc:bf5aa70f6ff14da2ef1393124ec29c4d", + "file_size_bytes": 2057333090 + }, + { + "name": "Gp0127640_Centrifuge TSV report", + "description": "Centrifuge TSV report for Gp0127640", + "data_object_type": "Centrifuge Classification Report", + "url": "https://data.microbiomedata.org/data/nmdc:mga06rnc11/ReadbasedAnalysis/nmdc_mga06rnc11_centrifuge_report.tsv", + "md5_checksum": "61f1f6d57fd4d445682e25ec34901721", + "id": "nmdc:61f1f6d57fd4d445682e25ec34901721", + "file_size_bytes": 256577 + }, + { + "name": "Gp0127640_Centrifuge Krona HTML report", + "description": "Centrifuge Krona HTML report for Gp0127640", + "data_object_type": "Centrifuge Krona Plot", + "url": "https://data.microbiomedata.org/data/nmdc:mga06rnc11/ReadbasedAnalysis/nmdc_mga06rnc11_centrifuge_krona.html", + "md5_checksum": "7c31728fc2a51c8d202f9f74b1919886", + "id": "nmdc:7c31728fc2a51c8d202f9f74b1919886", + "file_size_bytes": 2334984 + }, + { + "name": "Gp0127640_Kraken classification TSV report", + "description": "Kraken classification TSV report for Gp0127640", + "data_object_type": "Kraken2 Taxonomic Classification", + "url": "https://data.microbiomedata.org/data/nmdc:mga06rnc11/ReadbasedAnalysis/nmdc_mga06rnc11_kraken2_classification.tsv", + "md5_checksum": "f36c2b28e63d21ca4d9e84035450c8e1", + "id": "nmdc:f36c2b28e63d21ca4d9e84035450c8e1", + "file_size_bytes": 1658481192 + }, + { + "name": "Gp0127640_Kraken2 TSV report", + "description": "Kraken2 TSV report for Gp0127640", + "data_object_type": "Kraken2 Classification Report", + "url": "https://data.microbiomedata.org/data/nmdc:mga06rnc11/ReadbasedAnalysis/nmdc_mga06rnc11_kraken2_report.tsv", + "md5_checksum": "e2939606fc9ff1c0046b333e1740f258", + "id": "nmdc:e2939606fc9ff1c0046b333e1740f258", + "file_size_bytes": 653129 + }, + { + "name": "Gp0127640_Kraken2 Krona HTML report", + "description": "Kraken2 Krona HTML report for Gp0127640", + "data_object_type": "Kraken2 Krona Plot", + "url": "https://data.microbiomedata.org/data/nmdc:mga06rnc11/ReadbasedAnalysis/nmdc_mga06rnc11_kraken2_krona.html", + "md5_checksum": "d47144fd7ec0608e7677550d9589c889", + "id": "nmdc:d47144fd7ec0608e7677550d9589c889", + "file_size_bytes": 3977820 + } + ] + }, + { + "_id": { + "$oid": "61e7199d833bcf838a700ec0" + }, + "has_input": [ + "nmdc:534c94e20d292a6bf09c0a42b550b4c2" + ], + "part_of": [ + "nmdc:mga06rnc11" + ], + "git_url": "https://github.com/microbiomedata/metaG/releases/tag/0.1", + "has_output": [ + "nmdc:7e79b2eba131ed6df71a56f47b1b901f", + "nmdc:bc82dcb8151fc20c22be71b6531a1fb2", + "nmdc:d5e45563875efca0653ba2dd47ee3d68", + "nmdc:bf5aa70f6ff14da2ef1393124ec29c4d", + "nmdc:61f1f6d57fd4d445682e25ec34901721", + "nmdc:7c31728fc2a51c8d202f9f74b1919886", + "nmdc:f36c2b28e63d21ca4d9e84035450c8e1", + "nmdc:e2939606fc9ff1c0046b333e1740f258", + "nmdc:d47144fd7ec0608e7677550d9589c889" + ], + "was_informed_by": "gold:Gp0127640", + "id": "nmdc:414c4647eddd8081308d92da2d59815e", + "execution_resource": "NERSC-Cori", + "name": "ReadBased Analysis Activity for nmdc:mga06rnc11", + "started_at_time": "2021-10-11T02:24:27Z", + "type": "nmdc:ReadbasedAnalysis", + "ended_at_time": "2021-10-11T04:33:17+00:00", + "output_data_objects": [ + { + "name": "Gp0127640_Gottcha2 TSV report", + "description": "Gottcha2 TSV report for Gp0127640", + "url": "https://data.microbiomedata.org/data/nmdc:mga06rnc11/ReadbasedAnalysis/nmdc_mga06rnc11_gottcha2_report.tsv", + "md5_checksum": "7e79b2eba131ed6df71a56f47b1b901f", + "id": "nmdc:7e79b2eba131ed6df71a56f47b1b901f", + "file_size_bytes": 3824 + }, + { + "name": "Gp0127640_Gottcha2 full TSV report", + "description": "Gottcha2 full TSV report for Gp0127640", + "url": "https://data.microbiomedata.org/data/nmdc:mga06rnc11/ReadbasedAnalysis/nmdc_mga06rnc11_gottcha2_report_full.tsv", + "md5_checksum": "bc82dcb8151fc20c22be71b6531a1fb2", + "id": "nmdc:bc82dcb8151fc20c22be71b6531a1fb2", + "file_size_bytes": 850491 + }, + { + "name": "Gp0127640_Gottcha2 Krona HTML report", + "description": "Gottcha2 Krona HTML report for Gp0127640", + "data_object_type": "GOTTCHA2 Krona Plot", + "url": "https://data.microbiomedata.org/data/nmdc:mga06rnc11/ReadbasedAnalysis/nmdc_mga06rnc11_gottcha2_krona.html", + "md5_checksum": "d5e45563875efca0653ba2dd47ee3d68", + "id": "nmdc:d5e45563875efca0653ba2dd47ee3d68", + "file_size_bytes": 236151 + }, + { + "name": "Gp0127640_Centrifuge classification TSV report", + "description": "Centrifuge classification TSV report for Gp0127640", + "data_object_type": "Centrifuge Taxonomic Classification", + "url": "https://data.microbiomedata.org/data/nmdc:mga06rnc11/ReadbasedAnalysis/nmdc_mga06rnc11_centrifuge_classification.tsv", + "md5_checksum": "bf5aa70f6ff14da2ef1393124ec29c4d", + "id": "nmdc:bf5aa70f6ff14da2ef1393124ec29c4d", + "file_size_bytes": 2057333090 + }, + { + "name": "Gp0127640_Centrifuge TSV report", + "description": "Centrifuge TSV report for Gp0127640", + "data_object_type": "Centrifuge Classification Report", + "url": "https://data.microbiomedata.org/data/nmdc:mga06rnc11/ReadbasedAnalysis/nmdc_mga06rnc11_centrifuge_report.tsv", + "md5_checksum": "61f1f6d57fd4d445682e25ec34901721", + "id": "nmdc:61f1f6d57fd4d445682e25ec34901721", + "file_size_bytes": 256577 + }, + { + "name": "Gp0127640_Centrifuge Krona HTML report", + "description": "Centrifuge Krona HTML report for Gp0127640", + "data_object_type": "Centrifuge Krona Plot", + "url": "https://data.microbiomedata.org/data/nmdc:mga06rnc11/ReadbasedAnalysis/nmdc_mga06rnc11_centrifuge_krona.html", + "md5_checksum": "7c31728fc2a51c8d202f9f74b1919886", + "id": "nmdc:7c31728fc2a51c8d202f9f74b1919886", + "file_size_bytes": 2334984 + }, + { + "name": "Gp0127640_Kraken classification TSV report", + "description": "Kraken classification TSV report for Gp0127640", + "data_object_type": "Kraken2 Taxonomic Classification", + "url": "https://data.microbiomedata.org/data/nmdc:mga06rnc11/ReadbasedAnalysis/nmdc_mga06rnc11_kraken2_classification.tsv", + "md5_checksum": "f36c2b28e63d21ca4d9e84035450c8e1", + "id": "nmdc:f36c2b28e63d21ca4d9e84035450c8e1", + "file_size_bytes": 1658481192 + }, + { + "name": "Gp0127640_Kraken2 TSV report", + "description": "Kraken2 TSV report for Gp0127640", + "data_object_type": "Kraken2 Classification Report", + "url": "https://data.microbiomedata.org/data/nmdc:mga06rnc11/ReadbasedAnalysis/nmdc_mga06rnc11_kraken2_report.tsv", + "md5_checksum": "e2939606fc9ff1c0046b333e1740f258", + "id": "nmdc:e2939606fc9ff1c0046b333e1740f258", + "file_size_bytes": 653129 + }, + { + "name": "Gp0127640_Kraken2 Krona HTML report", + "description": "Kraken2 Krona HTML report for Gp0127640", + "data_object_type": "Kraken2 Krona Plot", + "url": "https://data.microbiomedata.org/data/nmdc:mga06rnc11/ReadbasedAnalysis/nmdc_mga06rnc11_kraken2_krona.html", + "md5_checksum": "d47144fd7ec0608e7677550d9589c889", + "id": "nmdc:d47144fd7ec0608e7677550d9589c889", + "file_size_bytes": 3977820 + } + ] + }, + { + "_id": { + "$oid": "649b005f2ca5ee4adb139fa6" + }, + "has_input": [ + "nmdc:534c94e20d292a6bf09c0a42b550b4c2" + ], + "part_of": [ + "nmdc:mga06rnc11" + ], + "ctg_logsum": 42879, + "scaf_logsum": 42987, + "gap_pct": 0.0005, + "git_url": "https://github.com/microbiomedata/metaG/releases/tag/0.1", + "has_output": [ + "nmdc:b85a322271c7f93ef295141d12cb2dbc", + "nmdc:794445b3fedfaec8af9b70b167bc6852", + "nmdc:d389ae4f8a92c21423fc77aa054ba985", + "nmdc:765541c2865f6047d5e2e8e7299908e4", + "nmdc:78b554dd52492c3d1e401d0c9198b89b" + ], + "asm_score": 5.471, + "was_informed_by": "gold:Gp0127640", + "ctg_powsum": 4901.253, + "scaf_max": 27880, + "id": "nmdc:414c4647eddd8081308d92da2d59815e", + "scaf_powsum": 4913.296, + "execution_resource": "NERSC-Cori", + "contigs": 106665, + "name": "Assembly Activity for nmdc:mga06rnc11", + "ctg_max": 27880, + "gc_std": 0.10189, + "contig_bp": 40331509, + "gc_avg": 0.58648, + "started_at_time": "2021-10-11T02:24:27Z", + "scaf_bp": 40331709, + "type": "nmdc:MetagenomeAssembly", + "scaffolds": 106645, + "ended_at_time": "2021-10-11T04:33:17+00:00", + "ctg_l50": 336, + "ctg_l90": 282, + "ctg_n50": 38543, + "ctg_n90": 94525, + "scaf_l50": 336, + "scaf_l90": 282, + "scaf_n50": 38534, + "scaf_n90": 94506, + "output_data_objects": [ + { + "name": "Gp0127640_Assembled contigs fasta", + "description": "Assembled contigs fasta for Gp0127640", + "data_object_type": "Assembly Contigs", + "url": "https://data.microbiomedata.org/data/nmdc:mga06rnc11/assembly/nmdc_mga06rnc11_contigs.fna", + "md5_checksum": "b85a322271c7f93ef295141d12cb2dbc", + "id": "nmdc:b85a322271c7f93ef295141d12cb2dbc", + "file_size_bytes": 44243651 + }, + { + "name": "Gp0127640_Assembled scaffold fasta", + "description": "Assembled scaffold fasta for Gp0127640", + "data_object_type": "Assembly Scaffolds", + "url": "https://data.microbiomedata.org/data/nmdc:mga06rnc11/assembly/nmdc_mga06rnc11_scaffolds.fna", + "md5_checksum": "794445b3fedfaec8af9b70b167bc6852", + "id": "nmdc:794445b3fedfaec8af9b70b167bc6852", + "file_size_bytes": 43923338 + }, + { + "name": "Gp0127640_Metagenome Contig Coverage Stats", + "description": "Metagenome Contig Coverage Stats for Gp0127640", + "url": "https://data.microbiomedata.org/data/nmdc:mga06rnc11/assembly/nmdc_mga06rnc11_covstats.txt", + "md5_checksum": "d389ae4f8a92c21423fc77aa054ba985", + "id": "nmdc:d389ae4f8a92c21423fc77aa054ba985", + "file_size_bytes": 8365383 + }, + { + "name": "Gp0127640_Assembled AGP file", + "description": "Assembled AGP file for Gp0127640", + "data_object_type": "Assembly AGP", + "url": "https://data.microbiomedata.org/data/nmdc:mga06rnc11/assembly/nmdc_mga06rnc11_assembly.agp", + "md5_checksum": "765541c2865f6047d5e2e8e7299908e4", + "id": "nmdc:765541c2865f6047d5e2e8e7299908e4", + "file_size_bytes": 7782777 + }, + { + "name": "Gp0127640_Metagenome Alignment BAM file", + "description": "Metagenome Alignment BAM file for Gp0127640", + "data_object_type": "Assembly Coverage BAM", + "url": "https://data.microbiomedata.org/data/nmdc:mga06rnc11/assembly/nmdc_mga06rnc11_pairedMapped_sorted.bam", + "md5_checksum": "78b554dd52492c3d1e401d0c9198b89b", + "id": "nmdc:78b554dd52492c3d1e401d0c9198b89b", + "file_size_bytes": 2578128724 + } + ] + }, + { + "_id": { + "$oid": "649b005bbf2caae0415ef9bb" + }, + "has_input": [ + "nmdc:b85a322271c7f93ef295141d12cb2dbc" + ], + "part_of": [ + "nmdc:mga06rnc11" + ], + "git_url": "https://github.com/microbiomedata/metaG/releases/tag/0.1", + "has_output": [ + "nmdc:13e64b02d230f76008e42256a48d1cec", + "nmdc:7babb0c9f662679659b7b1bee469f073", + "nmdc:e84b1e43d546c9793c3a4d9eaa8cee86", + "nmdc:2e3e5b7ffa39e533db8ed1d925426f50", + "nmdc:62e46d35a6aff3a52b39c6bb04dc6161", + "nmdc:93fa7de9c74cfcff99bb74e27fa94674", + "nmdc:63bad86a6d7fb23b5a4683ae36820622", + "nmdc:d6b80bb748b4d6fbe52c15300ad2137b", + "nmdc:46722961c280df725d15489e82502031", + "nmdc:6f1a0029cb25f1433de1d7c241bc7553", + "nmdc:6d2839963f616d810e66435b3bbe018a", + "nmdc:efbf36ca49c40ad0367ecd23c012b29b" + ], + "was_informed_by": "gold:Gp0127640", + "id": "nmdc:414c4647eddd8081308d92da2d59815e", + "execution_resource": "NERSC-Cori", + "name": "Annotation Activity for nmdc:mga06rnc11", + "started_at_time": "2021-10-11T02:24:27Z", + "type": "nmdc:MetagenomeAnnotationActivity", + "ended_at_time": "2021-10-11T04:33:17+00:00", + "output_data_objects": [ + { + "name": "Gp0127640_Protein FAA", + "description": "Protein FAA for Gp0127640", + "data_object_type": "Annotation Amino Acid FASTA", + "url": "https://data.microbiomedata.org/data/nmdc:mga06rnc11/annotation/nmdc_mga06rnc11_proteins.faa", + "md5_checksum": "13e64b02d230f76008e42256a48d1cec", + "id": "nmdc:13e64b02d230f76008e42256a48d1cec", + "file_size_bytes": 26637626 + }, + { + "name": "Gp0127640_Structural annotation GFF file", + "description": "Structural annotation GFF file for Gp0127640", + "data_object_type": "Structural Annotation GFF", + "url": "https://data.microbiomedata.org/data/nmdc:mga06rnc11/annotation/nmdc_mga06rnc11_structural_annotation.gff", + "md5_checksum": "7babb0c9f662679659b7b1bee469f073", + "id": "nmdc:7babb0c9f662679659b7b1bee469f073", + "file_size_bytes": 2515 + }, + { + "name": "Gp0127640_Functional annotation GFF file", + "description": "Functional annotation GFF file for Gp0127640", + "data_object_type": "Functional Annotation GFF", + "url": "https://data.microbiomedata.org/data/nmdc:mga06rnc11/annotation/nmdc_mga06rnc11_functional_annotation.gff", + "md5_checksum": "e84b1e43d546c9793c3a4d9eaa8cee86", + "id": "nmdc:e84b1e43d546c9793c3a4d9eaa8cee86", + "file_size_bytes": 32184781 + }, + { + "name": "Gp0127640_KO TSV file", + "description": "KO TSV file for Gp0127640", + "data_object_type": "Annotation KEGG Orthology", + "url": "https://data.microbiomedata.org/data/nmdc:mga06rnc11/annotation/nmdc_mga06rnc11_ko.tsv", + "md5_checksum": "2e3e5b7ffa39e533db8ed1d925426f50", + "id": "nmdc:2e3e5b7ffa39e533db8ed1d925426f50", + "file_size_bytes": 3620933 + }, + { + "name": "Gp0127640_EC TSV file", + "description": "EC TSV file for Gp0127640", + "data_object_type": "Annotation Enzyme Commission", + "url": "https://data.microbiomedata.org/data/nmdc:mga06rnc11/annotation/nmdc_mga06rnc11_ec.tsv", + "md5_checksum": "62e46d35a6aff3a52b39c6bb04dc6161", + "id": "nmdc:62e46d35a6aff3a52b39c6bb04dc6161", + "file_size_bytes": 2390086 + }, + { + "name": "Gp0127640_COG GFF file", + "description": "COG GFF file for Gp0127640", + "url": "https://data.microbiomedata.org/data/nmdc:mga06rnc11/annotation/nmdc_mga06rnc11_cog.gff", + "md5_checksum": "93fa7de9c74cfcff99bb74e27fa94674", + "id": "nmdc:93fa7de9c74cfcff99bb74e27fa94674", + "file_size_bytes": 17898567 + }, + { + "name": "Gp0127640_PFAM GFF file", + "description": "PFAM GFF file for Gp0127640", + "url": "https://data.microbiomedata.org/data/nmdc:mga06rnc11/annotation/nmdc_mga06rnc11_pfam.gff", + "md5_checksum": "63bad86a6d7fb23b5a4683ae36820622", + "id": "nmdc:63bad86a6d7fb23b5a4683ae36820622", + "file_size_bytes": 12585366 + }, + { + "name": "Gp0127640_TigrFam GFF file", + "description": "TigrFam GFF file for Gp0127640", + "url": "https://data.microbiomedata.org/data/nmdc:mga06rnc11/annotation/nmdc_mga06rnc11_tigrfam.gff", + "md5_checksum": "d6b80bb748b4d6fbe52c15300ad2137b", + "id": "nmdc:d6b80bb748b4d6fbe52c15300ad2137b", + "file_size_bytes": 1170952 + }, + { + "name": "Gp0127640_SMART GFF file", + "description": "SMART GFF file for Gp0127640", + "url": "https://data.microbiomedata.org/data/nmdc:mga06rnc11/annotation/nmdc_mga06rnc11_smart.gff", + "md5_checksum": "46722961c280df725d15489e82502031", + "id": "nmdc:46722961c280df725d15489e82502031", + "file_size_bytes": 3891425 + }, + { + "name": "Gp0127640_SuperFam GFF file", + "description": "SuperFam GFF file for Gp0127640", + "url": "https://data.microbiomedata.org/data/nmdc:mga06rnc11/annotation/nmdc_mga06rnc11_supfam.gff", + "md5_checksum": "6f1a0029cb25f1433de1d7c241bc7553", + "id": "nmdc:6f1a0029cb25f1433de1d7c241bc7553", + "file_size_bytes": 22543435 + }, + { + "name": "Gp0127640_Cath FunFam GFF file", + "description": "Cath FunFam GFF file for Gp0127640", + "url": "https://data.microbiomedata.org/data/nmdc:mga06rnc11/annotation/nmdc_mga06rnc11_cath_funfam.gff", + "md5_checksum": "6d2839963f616d810e66435b3bbe018a", + "id": "nmdc:6d2839963f616d810e66435b3bbe018a", + "file_size_bytes": 16572925 + }, + { + "name": "Gp0127640_KO_EC GFF file", + "description": "KO_EC GFF file for Gp0127640", + "url": "https://data.microbiomedata.org/data/nmdc:mga06rnc11/annotation/nmdc_mga06rnc11_ko_ec.gff", + "md5_checksum": "efbf36ca49c40ad0367ecd23c012b29b", + "id": "nmdc:efbf36ca49c40ad0367ecd23c012b29b", + "file_size_bytes": 11571776 + } + ] + }, + { + "_id": { + "$oid": "649b0052ec087f6bbab3471a" + }, + "has_input": [ + "nmdc:b85a322271c7f93ef295141d12cb2dbc", + "nmdc:78b554dd52492c3d1e401d0c9198b89b", + "nmdc:e84b1e43d546c9793c3a4d9eaa8cee86" + ], + "too_short_contig_num": 104867, + "part_of": [ + "nmdc:mga06rnc11" + ], + "binned_contig_num": 213, + "git_url": "https://github.com/microbiomedata/metaG/releases/tag/0.1", + "has_output": [ + "nmdc:d41d8cd98f00b204e9800998ecf8427e", + "nmdc:ce395376d0bc7121e4dc5efc774d5e74", + "nmdc:a16cbb06b91ebfb45f5a010effc1cfde", + "nmdc:97ae130ca2f75c66b8cbd60c4d35463a", + "nmdc:5945311235c6195ad409ab30e2b72c0c", + "nmdc:d1cf2992bd60e25032eedeb09858d14b" + ], + "was_informed_by": "gold:Gp0127640", + "input_contig_num": 106665, + "id": "nmdc:414c4647eddd8081308d92da2d59815e", + "execution_resource": "NERSC-Cori", + "name": "MAGs Analysis Activity for nmdc:mga06rnc11", + "mags_list": [ + { + "number_of_contig": 213, + "completeness": 48.94, + "bin_name": "bins.1", + "gene_count": 1422, + "bin_quality": "LQ", + "gtdbtk_species": "", + "gtdbtk_order": "", + "num_16s": 0, + "gtdbtk_family": "", + "gtdbtk_domain": "", + "contamination": 0.97, + "gtdbtk_class": "", + "gtdbtk_phylum": "", + "num_5s": 0, + "num_23s": 0, + "gtdbtk_genus": "", + "num_t_rna": 30 + } + ], + "unbinned_contig_num": 1585, + "started_at_time": "2021-10-11T02:24:27Z", + "type": "nmdc:MAGsAnalysisActivity", + "ended_at_time": "2021-10-11T04:33:17+00:00", + "output_data_objects": [ + { + "name": "gold:Gp0452679_metabat2 bin checkm quality assessment result", + "description": "metabat2 bin checkm quality assessment result for gold:Gp0452679", + "data_object_type": "CheckM Statistics", + "url": "https://data.microbiomedata.org/data/nmdc:mga0fsjy84/MAGs/nmdc_mga0fsjy84_checkm_qa.out", + "md5_checksum": "d41d8cd98f00b204e9800998ecf8427e", + "id": "nmdc:d41d8cd98f00b204e9800998ecf8427e", + "file_size_bytes": 0 + }, + { + "name": "Gp0127640_tooShort (< 3kb) filtered contigs fasta file by metaBat2", + "description": "tooShort (< 3kb) filtered contigs fasta file by metaBat2 for Gp0127640", + "url": "https://data.microbiomedata.org/data/nmdc:mga06rnc11/MAGs/nmdc_mga06rnc11_bins.tooShort.fa", + "md5_checksum": "ce395376d0bc7121e4dc5efc774d5e74", + "id": "nmdc:ce395376d0bc7121e4dc5efc774d5e74", + "file_size_bytes": 40358420 + }, + { + "name": "Gp0127640_unbinned fasta file from metabat2", + "description": "unbinned fasta file from metabat2 for Gp0127640", + "url": "https://data.microbiomedata.org/data/nmdc:mga06rnc11/MAGs/nmdc_mga06rnc11_bins.unbinned.fa", + "md5_checksum": "a16cbb06b91ebfb45f5a010effc1cfde", + "id": "nmdc:a16cbb06b91ebfb45f5a010effc1cfde", + "file_size_bytes": 2755747 + }, + { + "name": "Gp0127640_metabat2 bin checkm quality assessment result", + "description": "metabat2 bin checkm quality assessment result for Gp0127640", + "data_object_type": "CheckM Statistics", + "url": "https://data.microbiomedata.org/data/nmdc:mga06rnc11/MAGs/nmdc_mga06rnc11_checkm_qa.out", + "md5_checksum": "97ae130ca2f75c66b8cbd60c4d35463a", + "id": "nmdc:97ae130ca2f75c66b8cbd60c4d35463a", + "file_size_bytes": 760 + }, + { + "name": "Gp0127640_high-quality and medium-quality bins", + "description": "high-quality and medium-quality bins for Gp0127640", + "data_object_type": "Metagenome Bins", + "url": "https://data.microbiomedata.org/data/nmdc:mga06rnc11/MAGs/nmdc_mga06rnc11_hqmq_bin.zip", + "md5_checksum": "5945311235c6195ad409ab30e2b72c0c", + "id": "nmdc:5945311235c6195ad409ab30e2b72c0c", + "file_size_bytes": 182 + }, + { + "name": "Gp0127640_metabat2 bins", + "description": "metabat2 bins for Gp0127640", + "url": "https://data.microbiomedata.org/data/nmdc:mga06rnc11/MAGs/nmdc_mga06rnc11_metabat_bin.zip", + "md5_checksum": "d1cf2992bd60e25032eedeb09858d14b", + "id": "nmdc:d1cf2992bd60e25032eedeb09858d14b", + "file_size_bytes": 345388 + } + ] + } + ] + }, + { + "_id": { + "$oid": "649b009773e8249959349b55" + }, + "id": "nmdc:omprc-11-p0jdew93", + "name": "Riverbed sediment microbial communities from areas with vegetation nearby in Columbia River, Washington, USA - GW-RW S1_10_20", + "description": "Riverbed sediment samples were collected from an area with a lot of surrounding vegetation in a hyporheic zone (subsurface groundwater - surface water mixing zone)", + "has_input": [ + "nmdc:bsm-11-fgtanh42" + ], + "has_output": [ + "jgi:574fde697ded5e3df1ee140a" + ], + "part_of": [ + "nmdc:sty-11-aygzgv51" + ], + "add_date": "2016-01-11", + "mod_date": "2021-06-15", + "ncbi_project_name": "Riverbed sediment microbial communities from areas with vegetation nearby in Columbia River, Washington, USA - GW-RW S1_10_20", + "omics_type": { + "has_raw_value": "Metagenome" + }, + "principal_investigator": { + "has_raw_value": "James Stegen" + }, + "processing_institution": "JGI", + "type": "nmdc:OmicsProcessing", + "gold_sequencing_project_identifiers": [ + "gold:Gp0127641" + ], + "downstream_workflow_activity_records": [ + { + "_id": { + "$oid": "649b009d6bdd4fd20273c872" + }, + "has_input": [ + "nmdc:c59690f54a7afb65869c9c683e3eef7f" + ], + "part_of": [ + "nmdc:mga0822t33" + ], + "git_url": "https://github.com/microbiomedata/metaG/releases/tag/0.1", + "has_output": [ + "nmdc:a2700afe93abad6f004a3701348622a2", + "nmdc:aaa9a8a3d8e147116953394a8755742d" + ], + "was_informed_by": "gold:Gp0127641", + "input_read_count": 24261468, + "output_read_bases": 3340338011, + "id": "nmdc:363fe7a0dd914e046b274fea70625c52", + "execution_resource": "NERSC-Cori", + "input_read_bases": 3663481668, + "name": "Read QC Activity for nmdc:mga0822t33", + "output_read_count": 22362924, + "started_at_time": "2021-10-11T02:27:18Z", + "type": "nmdc:ReadQCAnalysisActivity", + "ended_at_time": "2021-10-11T04:05:47+00:00", + "output_data_objects": [ + { + "name": "Gp0127641_Filtered Reads", + "description": "Filtered Reads for Gp0127641", + "data_object_type": "Filtered Sequencing Reads", + "url": "https://data.microbiomedata.org/data/nmdc:mga0822t33/qa/nmdc_mga0822t33_filtered.fastq.gz", + "md5_checksum": "a2700afe93abad6f004a3701348622a2", + "id": "nmdc:a2700afe93abad6f004a3701348622a2", + "file_size_bytes": 1787020792 + }, + { + "name": "Gp0127641_Filtered Stats", + "description": "Filtered Stats for Gp0127641", + "data_object_type": "QC Statistics", + "url": "https://data.microbiomedata.org/data/nmdc:mga0822t33/qa/nmdc_mga0822t33_filterStats.txt", + "md5_checksum": "aaa9a8a3d8e147116953394a8755742d", + "id": "nmdc:aaa9a8a3d8e147116953394a8755742d", + "file_size_bytes": 289 + } + ] + }, + { + "_id": { + "$oid": "649b009bff710ae353f8cf37" + }, + "has_input": [ + "nmdc:a2700afe93abad6f004a3701348622a2" + ], + "git_url": "https://github.com/microbiomedata/metaG/releases/tag/0.1", + "has_output": [ + "nmdc:0d021c80bfd39c8293a8b355b8ff3605", + "nmdc:a42312841b816448d8bd5d3adfa65f58", + "nmdc:f473f4a99336a49105d2722888ae0510", + "nmdc:ae51ea50660f44fa3b317a45f3015556", + "nmdc:ef39b44a90c8525e93f45e500b3ae934", + "nmdc:e2653a4ce3f34c235ad7b01e87dd1016", + "nmdc:869730c4d81163e0c238dd4ae27ebd9e", + "nmdc:dc193d1a1693589003f992c820606bab", + "nmdc:2f36b41c419efa1b1dfb6a9576b965ee" + ], + "was_informed_by": "gold:Gp0127641", + "id": "nmdc:363fe7a0dd914e046b274fea70625c52", + "execution_resource": "NERSC-Cori", + "name": "ReadBased Analysis Activity for nmdc:mga0822t33", + "started_at_time": "2021-10-11T02:27:18Z", + "type": "nmdc:ReadbasedAnalysis", + "ended_at_time": "2021-10-11T04:05:47+00:00", + "output_data_objects": [ + { + "name": "Gp0127641_Gottcha2 TSV report", + "description": "Gottcha2 TSV report for Gp0127641", + "url": "https://data.microbiomedata.org/data/nmdc:mga0822t33/ReadbasedAnalysis/nmdc_mga0822t33_gottcha2_report.tsv", + "md5_checksum": "0d021c80bfd39c8293a8b355b8ff3605", + "id": "nmdc:0d021c80bfd39c8293a8b355b8ff3605", + "file_size_bytes": 3331 + }, + { + "name": "Gp0127641_Gottcha2 full TSV report", + "description": "Gottcha2 full TSV report for Gp0127641", + "url": "https://data.microbiomedata.org/data/nmdc:mga0822t33/ReadbasedAnalysis/nmdc_mga0822t33_gottcha2_report_full.tsv", + "md5_checksum": "a42312841b816448d8bd5d3adfa65f58", + "id": "nmdc:a42312841b816448d8bd5d3adfa65f58", + "file_size_bytes": 761359 + }, + { + "name": "Gp0127641_Gottcha2 Krona HTML report", + "description": "Gottcha2 Krona HTML report for Gp0127641", + "data_object_type": "GOTTCHA2 Krona Plot", + "url": "https://data.microbiomedata.org/data/nmdc:mga0822t33/ReadbasedAnalysis/nmdc_mga0822t33_gottcha2_krona.html", + "md5_checksum": "f473f4a99336a49105d2722888ae0510", + "id": "nmdc:f473f4a99336a49105d2722888ae0510", + "file_size_bytes": 236161 + }, + { + "name": "Gp0127641_Centrifuge classification TSV report", + "description": "Centrifuge classification TSV report for Gp0127641", + "data_object_type": "Centrifuge Taxonomic Classification", + "url": "https://data.microbiomedata.org/data/nmdc:mga0822t33/ReadbasedAnalysis/nmdc_mga0822t33_centrifuge_classification.tsv", + "md5_checksum": "ae51ea50660f44fa3b317a45f3015556", + "id": "nmdc:ae51ea50660f44fa3b317a45f3015556", + "file_size_bytes": 1635953327 + }, + { + "name": "Gp0127641_Centrifuge TSV report", + "description": "Centrifuge TSV report for Gp0127641", + "data_object_type": "Centrifuge Classification Report", + "url": "https://data.microbiomedata.org/data/nmdc:mga0822t33/ReadbasedAnalysis/nmdc_mga0822t33_centrifuge_report.tsv", + "md5_checksum": "ef39b44a90c8525e93f45e500b3ae934", + "id": "nmdc:ef39b44a90c8525e93f45e500b3ae934", + "file_size_bytes": 255166 + }, + { + "name": "Gp0127641_Centrifuge Krona HTML report", + "description": "Centrifuge Krona HTML report for Gp0127641", + "data_object_type": "Centrifuge Krona Plot", + "url": "https://data.microbiomedata.org/data/nmdc:mga0822t33/ReadbasedAnalysis/nmdc_mga0822t33_centrifuge_krona.html", + "md5_checksum": "e2653a4ce3f34c235ad7b01e87dd1016", + "id": "nmdc:e2653a4ce3f34c235ad7b01e87dd1016", + "file_size_bytes": 2332521 + }, + { + "name": "Gp0127641_Kraken classification TSV report", + "description": "Kraken classification TSV report for Gp0127641", + "data_object_type": "Kraken2 Taxonomic Classification", + "url": "https://data.microbiomedata.org/data/nmdc:mga0822t33/ReadbasedAnalysis/nmdc_mga0822t33_kraken2_classification.tsv", + "md5_checksum": "869730c4d81163e0c238dd4ae27ebd9e", + "id": "nmdc:869730c4d81163e0c238dd4ae27ebd9e", + "file_size_bytes": 1307934195 + }, + { + "name": "Gp0127641_Kraken2 TSV report", + "description": "Kraken2 TSV report for Gp0127641", + "data_object_type": "Kraken2 Classification Report", + "url": "https://data.microbiomedata.org/data/nmdc:mga0822t33/ReadbasedAnalysis/nmdc_mga0822t33_kraken2_report.tsv", + "md5_checksum": "dc193d1a1693589003f992c820606bab", + "id": "nmdc:dc193d1a1693589003f992c820606bab", + "file_size_bytes": 635050 + }, + { + "name": "Gp0127641_Kraken2 Krona HTML report", + "description": "Kraken2 Krona HTML report for Gp0127641", + "data_object_type": "Kraken2 Krona Plot", + "url": "https://data.microbiomedata.org/data/nmdc:mga0822t33/ReadbasedAnalysis/nmdc_mga0822t33_kraken2_krona.html", + "md5_checksum": "2f36b41c419efa1b1dfb6a9576b965ee", + "id": "nmdc:2f36b41c419efa1b1dfb6a9576b965ee", + "file_size_bytes": 3964515 + } + ] + }, + { + "_id": { + "$oid": "61e7199c833bcf838a700e42" + }, + "has_input": [ + "nmdc:a2700afe93abad6f004a3701348622a2" + ], + "part_of": [ + "nmdc:mga0822t33" + ], + "git_url": "https://github.com/microbiomedata/metaG/releases/tag/0.1", + "has_output": [ + "nmdc:0d021c80bfd39c8293a8b355b8ff3605", + "nmdc:a42312841b816448d8bd5d3adfa65f58", + "nmdc:f473f4a99336a49105d2722888ae0510", + "nmdc:ae51ea50660f44fa3b317a45f3015556", + "nmdc:ef39b44a90c8525e93f45e500b3ae934", + "nmdc:e2653a4ce3f34c235ad7b01e87dd1016", + "nmdc:869730c4d81163e0c238dd4ae27ebd9e", + "nmdc:dc193d1a1693589003f992c820606bab", + "nmdc:2f36b41c419efa1b1dfb6a9576b965ee" + ], + "was_informed_by": "gold:Gp0127641", + "id": "nmdc:363fe7a0dd914e046b274fea70625c52", + "execution_resource": "NERSC-Cori", + "name": "ReadBased Analysis Activity for nmdc:mga0822t33", + "started_at_time": "2021-10-11T02:27:18Z", + "type": "nmdc:ReadbasedAnalysis", + "ended_at_time": "2021-10-11T04:05:47+00:00", + "output_data_objects": [ + { + "name": "Gp0127641_Gottcha2 TSV report", + "description": "Gottcha2 TSV report for Gp0127641", + "url": "https://data.microbiomedata.org/data/nmdc:mga0822t33/ReadbasedAnalysis/nmdc_mga0822t33_gottcha2_report.tsv", + "md5_checksum": "0d021c80bfd39c8293a8b355b8ff3605", + "id": "nmdc:0d021c80bfd39c8293a8b355b8ff3605", + "file_size_bytes": 3331 + }, + { + "name": "Gp0127641_Gottcha2 full TSV report", + "description": "Gottcha2 full TSV report for Gp0127641", + "url": "https://data.microbiomedata.org/data/nmdc:mga0822t33/ReadbasedAnalysis/nmdc_mga0822t33_gottcha2_report_full.tsv", + "md5_checksum": "a42312841b816448d8bd5d3adfa65f58", + "id": "nmdc:a42312841b816448d8bd5d3adfa65f58", + "file_size_bytes": 761359 + }, + { + "name": "Gp0127641_Gottcha2 Krona HTML report", + "description": "Gottcha2 Krona HTML report for Gp0127641", + "data_object_type": "GOTTCHA2 Krona Plot", + "url": "https://data.microbiomedata.org/data/nmdc:mga0822t33/ReadbasedAnalysis/nmdc_mga0822t33_gottcha2_krona.html", + "md5_checksum": "f473f4a99336a49105d2722888ae0510", + "id": "nmdc:f473f4a99336a49105d2722888ae0510", + "file_size_bytes": 236161 + }, + { + "name": "Gp0127641_Centrifuge classification TSV report", + "description": "Centrifuge classification TSV report for Gp0127641", + "data_object_type": "Centrifuge Taxonomic Classification", + "url": "https://data.microbiomedata.org/data/nmdc:mga0822t33/ReadbasedAnalysis/nmdc_mga0822t33_centrifuge_classification.tsv", + "md5_checksum": "ae51ea50660f44fa3b317a45f3015556", + "id": "nmdc:ae51ea50660f44fa3b317a45f3015556", + "file_size_bytes": 1635953327 + }, + { + "name": "Gp0127641_Centrifuge TSV report", + "description": "Centrifuge TSV report for Gp0127641", + "data_object_type": "Centrifuge Classification Report", + "url": "https://data.microbiomedata.org/data/nmdc:mga0822t33/ReadbasedAnalysis/nmdc_mga0822t33_centrifuge_report.tsv", + "md5_checksum": "ef39b44a90c8525e93f45e500b3ae934", + "id": "nmdc:ef39b44a90c8525e93f45e500b3ae934", + "file_size_bytes": 255166 + }, + { + "name": "Gp0127641_Centrifuge Krona HTML report", + "description": "Centrifuge Krona HTML report for Gp0127641", + "data_object_type": "Centrifuge Krona Plot", + "url": "https://data.microbiomedata.org/data/nmdc:mga0822t33/ReadbasedAnalysis/nmdc_mga0822t33_centrifuge_krona.html", + "md5_checksum": "e2653a4ce3f34c235ad7b01e87dd1016", + "id": "nmdc:e2653a4ce3f34c235ad7b01e87dd1016", + "file_size_bytes": 2332521 + }, + { + "name": "Gp0127641_Kraken classification TSV report", + "description": "Kraken classification TSV report for Gp0127641", + "data_object_type": "Kraken2 Taxonomic Classification", + "url": "https://data.microbiomedata.org/data/nmdc:mga0822t33/ReadbasedAnalysis/nmdc_mga0822t33_kraken2_classification.tsv", + "md5_checksum": "869730c4d81163e0c238dd4ae27ebd9e", + "id": "nmdc:869730c4d81163e0c238dd4ae27ebd9e", + "file_size_bytes": 1307934195 + }, + { + "name": "Gp0127641_Kraken2 TSV report", + "description": "Kraken2 TSV report for Gp0127641", + "data_object_type": "Kraken2 Classification Report", + "url": "https://data.microbiomedata.org/data/nmdc:mga0822t33/ReadbasedAnalysis/nmdc_mga0822t33_kraken2_report.tsv", + "md5_checksum": "dc193d1a1693589003f992c820606bab", + "id": "nmdc:dc193d1a1693589003f992c820606bab", + "file_size_bytes": 635050 + }, + { + "name": "Gp0127641_Kraken2 Krona HTML report", + "description": "Kraken2 Krona HTML report for Gp0127641", + "data_object_type": "Kraken2 Krona Plot", + "url": "https://data.microbiomedata.org/data/nmdc:mga0822t33/ReadbasedAnalysis/nmdc_mga0822t33_kraken2_krona.html", + "md5_checksum": "2f36b41c419efa1b1dfb6a9576b965ee", + "id": "nmdc:2f36b41c419efa1b1dfb6a9576b965ee", + "file_size_bytes": 3964515 + } + ] + }, + { + "_id": { + "$oid": "649b005f2ca5ee4adb139fa2" + }, + "has_input": [ + "nmdc:a2700afe93abad6f004a3701348622a2" + ], + "part_of": [ + "nmdc:mga0822t33" + ], + "ctg_logsum": 224925, + "scaf_logsum": 225846, + "gap_pct": 0.00137, + "git_url": "https://github.com/microbiomedata/metaG/releases/tag/0.1", + "has_output": [ + "nmdc:18f0d53f503c855c0093677df58366e0", + "nmdc:2fe3e02d47d8e1d66ccb15c0e42bf1e0", + "nmdc:04ad2128f72c26a4fa2d0ee7b1709ee9", + "nmdc:b89858508c524a03011cd5191f7589fa", + "nmdc:6974d394df454501e0515b31a2415367" + ], + "asm_score": 3.367, + "was_informed_by": "gold:Gp0127641", + "ctg_powsum": 24264, + "scaf_max": 18020, + "id": "nmdc:363fe7a0dd914e046b274fea70625c52", + "scaf_powsum": 24365, + "execution_resource": "NERSC-Cori", + "contigs": 191907, + "name": "Assembly Activity for nmdc:mga0822t33", + "ctg_max": 18020, + "gc_std": 0.10192, + "contig_bp": 94878155, + "gc_avg": 0.61857, + "started_at_time": "2021-10-11T02:27:18Z", + "scaf_bp": 94879455, + "type": "nmdc:MetagenomeAssembly", + "scaffolds": 191777, + "ended_at_time": "2021-10-11T04:05:47+00:00", + "ctg_l50": 489, + "ctg_l90": 290, + "ctg_n50": 53038, + "ctg_n90": 159679, + "scaf_l50": 489, + "scaf_l90": 290, + "scaf_n50": 53021, + "scaf_n90": 159560, + "output_data_objects": [ + { + "name": "Gp0127641_Assembled contigs fasta", + "description": "Assembled contigs fasta for Gp0127641", + "data_object_type": "Assembly Contigs", + "url": "https://data.microbiomedata.org/data/nmdc:mga0822t33/assembly/nmdc_mga0822t33_contigs.fna", + "md5_checksum": "18f0d53f503c855c0093677df58366e0", + "id": "nmdc:18f0d53f503c855c0093677df58366e0", + "file_size_bytes": 102384540 + }, + { + "name": "Gp0127641_Assembled scaffold fasta", + "description": "Assembled scaffold fasta for Gp0127641", + "data_object_type": "Assembly Scaffolds", + "url": "https://data.microbiomedata.org/data/nmdc:mga0822t33/assembly/nmdc_mga0822t33_scaffolds.fna", + "md5_checksum": "2fe3e02d47d8e1d66ccb15c0e42bf1e0", + "id": "nmdc:2fe3e02d47d8e1d66ccb15c0e42bf1e0", + "file_size_bytes": 101806869 + }, + { + "name": "Gp0127641_Metagenome Contig Coverage Stats", + "description": "Metagenome Contig Coverage Stats for Gp0127641", + "url": "https://data.microbiomedata.org/data/nmdc:mga0822t33/assembly/nmdc_mga0822t33_covstats.txt", + "md5_checksum": "04ad2128f72c26a4fa2d0ee7b1709ee9", + "id": "nmdc:04ad2128f72c26a4fa2d0ee7b1709ee9", + "file_size_bytes": 15204446 + }, + { + "name": "Gp0127641_Assembled AGP file", + "description": "Assembled AGP file for Gp0127641", + "data_object_type": "Assembly AGP", + "url": "https://data.microbiomedata.org/data/nmdc:mga0822t33/assembly/nmdc_mga0822t33_assembly.agp", + "md5_checksum": "b89858508c524a03011cd5191f7589fa", + "id": "nmdc:b89858508c524a03011cd5191f7589fa", + "file_size_bytes": 14206204 + }, + { + "name": "Gp0127641_Metagenome Alignment BAM file", + "description": "Metagenome Alignment BAM file for Gp0127641", + "data_object_type": "Assembly Coverage BAM", + "url": "https://data.microbiomedata.org/data/nmdc:mga0822t33/assembly/nmdc_mga0822t33_pairedMapped_sorted.bam", + "md5_checksum": "6974d394df454501e0515b31a2415367", + "id": "nmdc:6974d394df454501e0515b31a2415367", + "file_size_bytes": 1967753614 + } + ] + }, + { + "_id": { + "$oid": "649b005bbf2caae0415ef9b9" + }, + "has_input": [ + "nmdc:18f0d53f503c855c0093677df58366e0" + ], + "part_of": [ + "nmdc:mga0822t33" + ], + "git_url": "https://github.com/microbiomedata/metaG/releases/tag/0.1", + "has_output": [ + "nmdc:f33a2a1789f5e913c3ef0dd0440a4877", + "nmdc:9aba4a0c78cb073609b129c4bb65fe2d", + "nmdc:2477ce1de68bdb1322eec1ffad5c74ac", + "nmdc:65768fea44cbd0183b286ab8f9883394", + "nmdc:b8ac75e77d2bc2607877e33ab692c43b", + "nmdc:31018e605b1569eb64006f2108b9d7d4", + "nmdc:c7ee9f693971a7686d8ff701fddbcb4a", + "nmdc:5c0d5f63853ca572d8d73cac9a36c8d7", + "nmdc:058c5e17eeeea69b2bf0b1b3c2838aea", + "nmdc:b836f94d526c1936d080a4aa7c0646c9", + "nmdc:0100d09c52d0c243b5ae45d95e6a22dc", + "nmdc:64b87140003d1a5a3d9ac939be55e57d" + ], + "was_informed_by": "gold:Gp0127641", + "id": "nmdc:363fe7a0dd914e046b274fea70625c52", + "execution_resource": "NERSC-Cori", + "name": "Annotation Activity for nmdc:mga0822t33", + "started_at_time": "2021-10-11T02:27:18Z", + "type": "nmdc:MetagenomeAnnotationActivity", + "ended_at_time": "2021-10-11T04:05:47+00:00", + "output_data_objects": [ + { + "name": "Gp0127641_Protein FAA", + "description": "Protein FAA for Gp0127641", + "data_object_type": "Annotation Amino Acid FASTA", + "url": "https://data.microbiomedata.org/data/nmdc:mga0822t33/annotation/nmdc_mga0822t33_proteins.faa", + "md5_checksum": "f33a2a1789f5e913c3ef0dd0440a4877", + "id": "nmdc:f33a2a1789f5e913c3ef0dd0440a4877", + "file_size_bytes": 57768168 + }, + { + "name": "Gp0127641_Structural annotation GFF file", + "description": "Structural annotation GFF file for Gp0127641", + "data_object_type": "Structural Annotation GFF", + "url": "https://data.microbiomedata.org/data/nmdc:mga0822t33/annotation/nmdc_mga0822t33_structural_annotation.gff", + "md5_checksum": "9aba4a0c78cb073609b129c4bb65fe2d", + "id": "nmdc:9aba4a0c78cb073609b129c4bb65fe2d", + "file_size_bytes": 2522 + }, + { + "name": "Gp0127641_Functional annotation GFF file", + "description": "Functional annotation GFF file for Gp0127641", + "data_object_type": "Functional Annotation GFF", + "url": "https://data.microbiomedata.org/data/nmdc:mga0822t33/annotation/nmdc_mga0822t33_functional_annotation.gff", + "md5_checksum": "2477ce1de68bdb1322eec1ffad5c74ac", + "id": "nmdc:2477ce1de68bdb1322eec1ffad5c74ac", + "file_size_bytes": 65167139 + }, + { + "name": "Gp0127641_KO TSV file", + "description": "KO TSV file for Gp0127641", + "data_object_type": "Annotation KEGG Orthology", + "url": "https://data.microbiomedata.org/data/nmdc:mga0822t33/annotation/nmdc_mga0822t33_ko.tsv", + "md5_checksum": "65768fea44cbd0183b286ab8f9883394", + "id": "nmdc:65768fea44cbd0183b286ab8f9883394", + "file_size_bytes": 7266122 + }, + { + "name": "Gp0127641_EC TSV file", + "description": "EC TSV file for Gp0127641", + "data_object_type": "Annotation Enzyme Commission", + "url": "https://data.microbiomedata.org/data/nmdc:mga0822t33/annotation/nmdc_mga0822t33_ec.tsv", + "md5_checksum": "b8ac75e77d2bc2607877e33ab692c43b", + "id": "nmdc:b8ac75e77d2bc2607877e33ab692c43b", + "file_size_bytes": 4793386 + }, + { + "name": "Gp0127641_COG GFF file", + "description": "COG GFF file for Gp0127641", + "url": "https://data.microbiomedata.org/data/nmdc:mga0822t33/annotation/nmdc_mga0822t33_cog.gff", + "md5_checksum": "31018e605b1569eb64006f2108b9d7d4", + "id": "nmdc:31018e605b1569eb64006f2108b9d7d4", + "file_size_bytes": 38184948 + }, + { + "name": "Gp0127641_PFAM GFF file", + "description": "PFAM GFF file for Gp0127641", + "url": "https://data.microbiomedata.org/data/nmdc:mga0822t33/annotation/nmdc_mga0822t33_pfam.gff", + "md5_checksum": "c7ee9f693971a7686d8ff701fddbcb4a", + "id": "nmdc:c7ee9f693971a7686d8ff701fddbcb4a", + "file_size_bytes": 28867184 + }, + { + "name": "Gp0127641_TigrFam GFF file", + "description": "TigrFam GFF file for Gp0127641", + "url": "https://data.microbiomedata.org/data/nmdc:mga0822t33/annotation/nmdc_mga0822t33_tigrfam.gff", + "md5_checksum": "5c0d5f63853ca572d8d73cac9a36c8d7", + "id": "nmdc:5c0d5f63853ca572d8d73cac9a36c8d7", + "file_size_bytes": 3122581 + }, + { + "name": "Gp0127641_SMART GFF file", + "description": "SMART GFF file for Gp0127641", + "url": "https://data.microbiomedata.org/data/nmdc:mga0822t33/annotation/nmdc_mga0822t33_smart.gff", + "md5_checksum": "058c5e17eeeea69b2bf0b1b3c2838aea", + "id": "nmdc:058c5e17eeeea69b2bf0b1b3c2838aea", + "file_size_bytes": 8368877 + }, + { + "name": "Gp0127641_SuperFam GFF file", + "description": "SuperFam GFF file for Gp0127641", + "url": "https://data.microbiomedata.org/data/nmdc:mga0822t33/annotation/nmdc_mga0822t33_supfam.gff", + "md5_checksum": "b836f94d526c1936d080a4aa7c0646c9", + "id": "nmdc:b836f94d526c1936d080a4aa7c0646c9", + "file_size_bytes": 47986944 + }, + { + "name": "Gp0127641_Cath FunFam GFF file", + "description": "Cath FunFam GFF file for Gp0127641", + "url": "https://data.microbiomedata.org/data/nmdc:mga0822t33/annotation/nmdc_mga0822t33_cath_funfam.gff", + "md5_checksum": "0100d09c52d0c243b5ae45d95e6a22dc", + "id": "nmdc:0100d09c52d0c243b5ae45d95e6a22dc", + "file_size_bytes": 36349993 + }, + { + "name": "Gp0127641_KO_EC GFF file", + "description": "KO_EC GFF file for Gp0127641", + "url": "https://data.microbiomedata.org/data/nmdc:mga0822t33/annotation/nmdc_mga0822t33_ko_ec.gff", + "md5_checksum": "64b87140003d1a5a3d9ac939be55e57d", + "id": "nmdc:64b87140003d1a5a3d9ac939be55e57d", + "file_size_bytes": 23113010 + } + ] + }, + { + "_id": { + "$oid": "649b0052ec087f6bbab34719" + }, + "has_input": [ + "nmdc:18f0d53f503c855c0093677df58366e0", + "nmdc:6974d394df454501e0515b31a2415367", + "nmdc:2477ce1de68bdb1322eec1ffad5c74ac" + ], + "too_short_contig_num": 179152, + "part_of": [ + "nmdc:mga0822t33" + ], + "binned_contig_num": 464, + "git_url": "https://github.com/microbiomedata/metaG/releases/tag/0.1", + "has_output": [ + "nmdc:d41d8cd98f00b204e9800998ecf8427e", + "nmdc:024b6771e169aeaf57a3b10acc6045a1", + "nmdc:545cd253ad26116236dec9937b32d8ef", + "nmdc:1785cfe7cf0546dc8702193921a2f566", + "nmdc:0a2a5650358b51ffcd3bbcfc874ac5c9", + "nmdc:8f6b89831cabcd1dc7aa5e26d87f5063" + ], + "was_informed_by": "gold:Gp0127641", + "input_contig_num": 191906, + "id": "nmdc:363fe7a0dd914e046b274fea70625c52", + "execution_resource": "NERSC-Cori", + "name": "MAGs Analysis Activity for nmdc:mga0822t33", + "mags_list": [ + { + "number_of_contig": 142, + "completeness": 24.43, + "bin_name": "bins.1", + "gene_count": 832, + "bin_quality": "LQ", + "gtdbtk_species": "", + "gtdbtk_order": "", + "num_16s": 1, + "gtdbtk_family": "", + "gtdbtk_domain": "", + "contamination": 0.0, + "gtdbtk_class": "", + "gtdbtk_phylum": "", + "num_5s": 0, + "num_23s": 0, + "gtdbtk_genus": "", + "num_t_rna": 15 + }, + { + "number_of_contig": 322, + "completeness": 46.21, + "bin_name": "bins.2", + "gene_count": 1652, + "bin_quality": "LQ", + "gtdbtk_species": "", + "gtdbtk_order": "", + "num_16s": 0, + "gtdbtk_family": "", + "gtdbtk_domain": "", + "contamination": 0.0, + "gtdbtk_class": "", + "gtdbtk_phylum": "", + "num_5s": 1, + "num_23s": 1, + "gtdbtk_genus": "", + "num_t_rna": 21 + } + ], + "unbinned_contig_num": 12290, + "started_at_time": "2021-10-11T02:27:18Z", + "type": "nmdc:MAGsAnalysisActivity", + "ended_at_time": "2021-10-11T04:05:47+00:00", + "output_data_objects": [ + { + "name": "gold:Gp0452679_metabat2 bin checkm quality assessment result", + "description": "metabat2 bin checkm quality assessment result for gold:Gp0452679", + "data_object_type": "CheckM Statistics", + "url": "https://data.microbiomedata.org/data/nmdc:mga0fsjy84/MAGs/nmdc_mga0fsjy84_checkm_qa.out", + "md5_checksum": "d41d8cd98f00b204e9800998ecf8427e", + "id": "nmdc:d41d8cd98f00b204e9800998ecf8427e", + "file_size_bytes": 0 + }, + { + "name": "Gp0127641_tooShort (< 3kb) filtered contigs fasta file by metaBat2", + "description": "tooShort (< 3kb) filtered contigs fasta file by metaBat2 for Gp0127641", + "url": "https://data.microbiomedata.org/data/nmdc:mga0822t33/MAGs/nmdc_mga0822t33_bins.tooShort.fa", + "md5_checksum": "024b6771e169aeaf57a3b10acc6045a1", + "id": "nmdc:024b6771e169aeaf57a3b10acc6045a1", + "file_size_bytes": 80852741 + }, + { + "name": "Gp0127641_unbinned fasta file from metabat2", + "description": "unbinned fasta file from metabat2 for Gp0127641", + "url": "https://data.microbiomedata.org/data/nmdc:mga0822t33/MAGs/nmdc_mga0822t33_bins.unbinned.fa", + "md5_checksum": "545cd253ad26116236dec9937b32d8ef", + "id": "nmdc:545cd253ad26116236dec9937b32d8ef", + "file_size_bytes": 19497941 + }, + { + "name": "Gp0127641_metabat2 bin checkm quality assessment result", + "description": "metabat2 bin checkm quality assessment result for Gp0127641", + "data_object_type": "CheckM Statistics", + "url": "https://data.microbiomedata.org/data/nmdc:mga0822t33/MAGs/nmdc_mga0822t33_checkm_qa.out", + "md5_checksum": "1785cfe7cf0546dc8702193921a2f566", + "id": "nmdc:1785cfe7cf0546dc8702193921a2f566", + "file_size_bytes": 936 + }, + { + "name": "Gp0127641_high-quality and medium-quality bins", + "description": "high-quality and medium-quality bins for Gp0127641", + "data_object_type": "Metagenome Bins", + "url": "https://data.microbiomedata.org/data/nmdc:mga0822t33/MAGs/nmdc_mga0822t33_hqmq_bin.zip", + "md5_checksum": "0a2a5650358b51ffcd3bbcfc874ac5c9", + "id": "nmdc:0a2a5650358b51ffcd3bbcfc874ac5c9", + "file_size_bytes": 182 + }, + { + "name": "Gp0127641_metabat2 bins", + "description": "metabat2 bins for Gp0127641", + "url": "https://data.microbiomedata.org/data/nmdc:mga0822t33/MAGs/nmdc_mga0822t33_metabat_bin.zip", + "md5_checksum": "8f6b89831cabcd1dc7aa5e26d87f5063", + "id": "nmdc:8f6b89831cabcd1dc7aa5e26d87f5063", + "file_size_bytes": 625863 + } + ] + } + ] + }, + { + "_id": { + "$oid": "649b009773e8249959349b56" + }, + "id": "nmdc:omprc-11-dtsr6z90", + "name": "Riverbed sediment microbial communities from areas with no vegetation in Columbia River, Washington, USA - GW-RW N1_0_10", + "description": "Riverbed sediment samples were collected from an area with no vegetation in a hyporheic zone (subsurface groundwater - surface water mixing zone)", + "has_input": [ + "nmdc:bsm-11-g079t498" + ], + "has_output": [ + "jgi:574fde6c7ded5e3df1ee140c" + ], + "part_of": [ + "nmdc:sty-11-aygzgv51" + ], + "add_date": "2016-01-11", + "mod_date": "2021-06-15", + "ncbi_project_name": "Riverbed sediment microbial communities from areas with no vegetation in Columbia River, Washington, USA - GW-RW N1_0_10", + "omics_type": { + "has_raw_value": "Metagenome" + }, + "principal_investigator": { + "has_raw_value": "James Stegen" + }, + "processing_institution": "JGI", + "type": "nmdc:OmicsProcessing", + "gold_sequencing_project_identifiers": [ + "gold:Gp0127643" + ], + "downstream_workflow_activity_records": [ + { + "_id": { + "$oid": "649b009d6bdd4fd20273c879" + }, + "has_input": [ + "nmdc:8b553dbdd47b90ed7f55d5747822f5d5" + ], + "part_of": [ + "nmdc:mga0evc178" + ], + "git_url": "https://github.com/microbiomedata/metaG/releases/tag/0.1", + "has_output": [ + "nmdc:2ef23543e3064ca73c3034713d87c026", + "nmdc:87b172ead58a37be8d199c0acfc96759" + ], + "was_informed_by": "gold:Gp0127643", + "input_read_count": 25305566, + "output_read_bases": 3510483777, + "id": "nmdc:30cd614596fd2a4b87220523f8a6ff30", + "execution_resource": "NERSC-Cori", + "input_read_bases": 3821140466, + "name": "Read QC Activity for nmdc:mga0evc178", + "output_read_count": 23508042, + "started_at_time": "2021-10-11T02:27:00Z", + "type": "nmdc:ReadQCAnalysisActivity", + "ended_at_time": "2021-10-11T04:04:16+00:00", + "output_data_objects": [ + { + "name": "Gp0127643_Filtered Reads", + "description": "Filtered Reads for Gp0127643", + "data_object_type": "Filtered Sequencing Reads", + "url": "https://data.microbiomedata.org/data/nmdc:mga0evc178/qa/nmdc_mga0evc178_filtered.fastq.gz", + "md5_checksum": "2ef23543e3064ca73c3034713d87c026", + "id": "nmdc:2ef23543e3064ca73c3034713d87c026", + "file_size_bytes": 1891088172 + }, + { + "name": "Gp0127643_Filtered Stats", + "description": "Filtered Stats for Gp0127643", + "data_object_type": "QC Statistics", + "url": "https://data.microbiomedata.org/data/nmdc:mga0evc178/qa/nmdc_mga0evc178_filterStats.txt", + "md5_checksum": "87b172ead58a37be8d199c0acfc96759", + "id": "nmdc:87b172ead58a37be8d199c0acfc96759", + "file_size_bytes": 289 + } + ] + }, + { + "_id": { + "$oid": "649b009bff710ae353f8cf36" + }, + "has_input": [ + "nmdc:2ef23543e3064ca73c3034713d87c026" + ], + "git_url": "https://github.com/microbiomedata/metaG/releases/tag/0.1", + "has_output": [ + "nmdc:e8f825653e5736e29b73de55bd11a270", + "nmdc:99bb1311b220e9a03da619fe5fb58f0f", + "nmdc:5c97bc15d4d5999f140664b3b2777c6d", + "nmdc:c9074b2e05765afd68463dc301b87995", + "nmdc:ed2c05d1702a9a811b8a98de748bc82a", + "nmdc:6465fe59472b111ead1f0414ccf39f62", + "nmdc:9855ca52bce074c34dcebfd154fa94ff", + "nmdc:ed8059f366d60112deb41a0c307bc6fc", + "nmdc:f98bae155bced880c058ecde7d539c18" + ], + "was_informed_by": "gold:Gp0127643", + "id": "nmdc:30cd614596fd2a4b87220523f8a6ff30", + "execution_resource": "NERSC-Cori", + "name": "ReadBased Analysis Activity for nmdc:mga0evc178", + "started_at_time": "2021-10-11T02:27:00Z", + "type": "nmdc:ReadbasedAnalysis", + "ended_at_time": "2021-10-11T04:04:16+00:00", + "output_data_objects": [ + { + "name": "Gp0127643_Gottcha2 TSV report", + "description": "Gottcha2 TSV report for Gp0127643", + "url": "https://data.microbiomedata.org/data/nmdc:mga0evc178/ReadbasedAnalysis/nmdc_mga0evc178_gottcha2_report.tsv", + "md5_checksum": "e8f825653e5736e29b73de55bd11a270", + "id": "nmdc:e8f825653e5736e29b73de55bd11a270", + "file_size_bytes": 1326 + }, + { + "name": "Gp0127643_Gottcha2 full TSV report", + "description": "Gottcha2 full TSV report for Gp0127643", + "url": "https://data.microbiomedata.org/data/nmdc:mga0evc178/ReadbasedAnalysis/nmdc_mga0evc178_gottcha2_report_full.tsv", + "md5_checksum": "99bb1311b220e9a03da619fe5fb58f0f", + "id": "nmdc:99bb1311b220e9a03da619fe5fb58f0f", + "file_size_bytes": 664131 + }, + { + "name": "Gp0127643_Gottcha2 Krona HTML report", + "description": "Gottcha2 Krona HTML report for Gp0127643", + "data_object_type": "GOTTCHA2 Krona Plot", + "url": "https://data.microbiomedata.org/data/nmdc:mga0evc178/ReadbasedAnalysis/nmdc_mga0evc178_gottcha2_krona.html", + "md5_checksum": "5c97bc15d4d5999f140664b3b2777c6d", + "id": "nmdc:5c97bc15d4d5999f140664b3b2777c6d", + "file_size_bytes": 229630 + }, + { + "name": "Gp0127643_Centrifuge classification TSV report", + "description": "Centrifuge classification TSV report for Gp0127643", + "data_object_type": "Centrifuge Taxonomic Classification", + "url": "https://data.microbiomedata.org/data/nmdc:mga0evc178/ReadbasedAnalysis/nmdc_mga0evc178_centrifuge_classification.tsv", + "md5_checksum": "c9074b2e05765afd68463dc301b87995", + "id": "nmdc:c9074b2e05765afd68463dc301b87995", + "file_size_bytes": 1726867547 + }, + { + "name": "Gp0127643_Centrifuge TSV report", + "description": "Centrifuge TSV report for Gp0127643", + "data_object_type": "Centrifuge Classification Report", + "url": "https://data.microbiomedata.org/data/nmdc:mga0evc178/ReadbasedAnalysis/nmdc_mga0evc178_centrifuge_report.tsv", + "md5_checksum": "ed2c05d1702a9a811b8a98de748bc82a", + "id": "nmdc:ed2c05d1702a9a811b8a98de748bc82a", + "file_size_bytes": 254021 + }, + { + "name": "Gp0127643_Centrifuge Krona HTML report", + "description": "Centrifuge Krona HTML report for Gp0127643", + "data_object_type": "Centrifuge Krona Plot", + "url": "https://data.microbiomedata.org/data/nmdc:mga0evc178/ReadbasedAnalysis/nmdc_mga0evc178_centrifuge_krona.html", + "md5_checksum": "6465fe59472b111ead1f0414ccf39f62", + "id": "nmdc:6465fe59472b111ead1f0414ccf39f62", + "file_size_bytes": 2331702 + }, + { + "name": "Gp0127643_Kraken classification TSV report", + "description": "Kraken classification TSV report for Gp0127643", + "data_object_type": "Kraken2 Taxonomic Classification", + "url": "https://data.microbiomedata.org/data/nmdc:mga0evc178/ReadbasedAnalysis/nmdc_mga0evc178_kraken2_classification.tsv", + "md5_checksum": "9855ca52bce074c34dcebfd154fa94ff", + "id": "nmdc:9855ca52bce074c34dcebfd154fa94ff", + "file_size_bytes": 1376409913 + }, + { + "name": "Gp0127643_Kraken2 TSV report", + "description": "Kraken2 TSV report for Gp0127643", + "data_object_type": "Kraken2 Classification Report", + "url": "https://data.microbiomedata.org/data/nmdc:mga0evc178/ReadbasedAnalysis/nmdc_mga0evc178_kraken2_report.tsv", + "md5_checksum": "ed8059f366d60112deb41a0c307bc6fc", + "id": "nmdc:ed8059f366d60112deb41a0c307bc6fc", + "file_size_bytes": 640506 + }, + { + "name": "Gp0127643_Kraken2 Krona HTML report", + "description": "Kraken2 Krona HTML report for Gp0127643", + "data_object_type": "Kraken2 Krona Plot", + "url": "https://data.microbiomedata.org/data/nmdc:mga0evc178/ReadbasedAnalysis/nmdc_mga0evc178_kraken2_krona.html", + "md5_checksum": "f98bae155bced880c058ecde7d539c18", + "id": "nmdc:f98bae155bced880c058ecde7d539c18", + "file_size_bytes": 3998448 + } + ] + }, + { + "_id": { + "$oid": "61e719b6833bcf838a70116b" + }, + "has_input": [ + "nmdc:2ef23543e3064ca73c3034713d87c026" + ], + "part_of": [ + "nmdc:mga0evc178" + ], + "git_url": "https://github.com/microbiomedata/metaG/releases/tag/0.1", + "has_output": [ + "nmdc:e8f825653e5736e29b73de55bd11a270", + "nmdc:99bb1311b220e9a03da619fe5fb58f0f", + "nmdc:5c97bc15d4d5999f140664b3b2777c6d", + "nmdc:c9074b2e05765afd68463dc301b87995", + "nmdc:ed2c05d1702a9a811b8a98de748bc82a", + "nmdc:6465fe59472b111ead1f0414ccf39f62", + "nmdc:9855ca52bce074c34dcebfd154fa94ff", + "nmdc:ed8059f366d60112deb41a0c307bc6fc", + "nmdc:f98bae155bced880c058ecde7d539c18" + ], + "was_informed_by": "gold:Gp0127643", + "id": "nmdc:30cd614596fd2a4b87220523f8a6ff30", + "execution_resource": "NERSC-Cori", + "name": "ReadBased Analysis Activity for nmdc:mga0evc178", + "started_at_time": "2021-10-11T02:27:00Z", + "type": "nmdc:ReadbasedAnalysis", + "ended_at_time": "2021-10-11T04:04:16+00:00", + "output_data_objects": [ + { + "name": "Gp0127643_Gottcha2 TSV report", + "description": "Gottcha2 TSV report for Gp0127643", + "url": "https://data.microbiomedata.org/data/nmdc:mga0evc178/ReadbasedAnalysis/nmdc_mga0evc178_gottcha2_report.tsv", + "md5_checksum": "e8f825653e5736e29b73de55bd11a270", + "id": "nmdc:e8f825653e5736e29b73de55bd11a270", + "file_size_bytes": 1326 + }, + { + "name": "Gp0127643_Gottcha2 full TSV report", + "description": "Gottcha2 full TSV report for Gp0127643", + "url": "https://data.microbiomedata.org/data/nmdc:mga0evc178/ReadbasedAnalysis/nmdc_mga0evc178_gottcha2_report_full.tsv", + "md5_checksum": "99bb1311b220e9a03da619fe5fb58f0f", + "id": "nmdc:99bb1311b220e9a03da619fe5fb58f0f", + "file_size_bytes": 664131 + }, + { + "name": "Gp0127643_Gottcha2 Krona HTML report", + "description": "Gottcha2 Krona HTML report for Gp0127643", + "data_object_type": "GOTTCHA2 Krona Plot", + "url": "https://data.microbiomedata.org/data/nmdc:mga0evc178/ReadbasedAnalysis/nmdc_mga0evc178_gottcha2_krona.html", + "md5_checksum": "5c97bc15d4d5999f140664b3b2777c6d", + "id": "nmdc:5c97bc15d4d5999f140664b3b2777c6d", + "file_size_bytes": 229630 + }, + { + "name": "Gp0127643_Centrifuge classification TSV report", + "description": "Centrifuge classification TSV report for Gp0127643", + "data_object_type": "Centrifuge Taxonomic Classification", + "url": "https://data.microbiomedata.org/data/nmdc:mga0evc178/ReadbasedAnalysis/nmdc_mga0evc178_centrifuge_classification.tsv", + "md5_checksum": "c9074b2e05765afd68463dc301b87995", + "id": "nmdc:c9074b2e05765afd68463dc301b87995", + "file_size_bytes": 1726867547 + }, + { + "name": "Gp0127643_Centrifuge TSV report", + "description": "Centrifuge TSV report for Gp0127643", + "data_object_type": "Centrifuge Classification Report", + "url": "https://data.microbiomedata.org/data/nmdc:mga0evc178/ReadbasedAnalysis/nmdc_mga0evc178_centrifuge_report.tsv", + "md5_checksum": "ed2c05d1702a9a811b8a98de748bc82a", + "id": "nmdc:ed2c05d1702a9a811b8a98de748bc82a", + "file_size_bytes": 254021 + }, + { + "name": "Gp0127643_Centrifuge Krona HTML report", + "description": "Centrifuge Krona HTML report for Gp0127643", + "data_object_type": "Centrifuge Krona Plot", + "url": "https://data.microbiomedata.org/data/nmdc:mga0evc178/ReadbasedAnalysis/nmdc_mga0evc178_centrifuge_krona.html", + "md5_checksum": "6465fe59472b111ead1f0414ccf39f62", + "id": "nmdc:6465fe59472b111ead1f0414ccf39f62", + "file_size_bytes": 2331702 + }, + { + "name": "Gp0127643_Kraken classification TSV report", + "description": "Kraken classification TSV report for Gp0127643", + "data_object_type": "Kraken2 Taxonomic Classification", + "url": "https://data.microbiomedata.org/data/nmdc:mga0evc178/ReadbasedAnalysis/nmdc_mga0evc178_kraken2_classification.tsv", + "md5_checksum": "9855ca52bce074c34dcebfd154fa94ff", + "id": "nmdc:9855ca52bce074c34dcebfd154fa94ff", + "file_size_bytes": 1376409913 + }, + { + "name": "Gp0127643_Kraken2 TSV report", + "description": "Kraken2 TSV report for Gp0127643", + "data_object_type": "Kraken2 Classification Report", + "url": "https://data.microbiomedata.org/data/nmdc:mga0evc178/ReadbasedAnalysis/nmdc_mga0evc178_kraken2_report.tsv", + "md5_checksum": "ed8059f366d60112deb41a0c307bc6fc", + "id": "nmdc:ed8059f366d60112deb41a0c307bc6fc", + "file_size_bytes": 640506 + }, + { + "name": "Gp0127643_Kraken2 Krona HTML report", + "description": "Kraken2 Krona HTML report for Gp0127643", + "data_object_type": "Kraken2 Krona Plot", + "url": "https://data.microbiomedata.org/data/nmdc:mga0evc178/ReadbasedAnalysis/nmdc_mga0evc178_kraken2_krona.html", + "md5_checksum": "f98bae155bced880c058ecde7d539c18", + "id": "nmdc:f98bae155bced880c058ecde7d539c18", + "file_size_bytes": 3998448 + } + ] + }, + { + "_id": { + "$oid": "649b005f2ca5ee4adb139fa9" + }, + "has_input": [ + "nmdc:2ef23543e3064ca73c3034713d87c026" + ], + "part_of": [ + "nmdc:mga0evc178" + ], + "ctg_logsum": 258957, + "scaf_logsum": 260132, + "gap_pct": 0.00166, + "git_url": "https://github.com/microbiomedata/metaG/releases/tag/0.1", + "has_output": [ + "nmdc:a3a85f9f946ff34f28dfd4b5f8590f23", + "nmdc:001fd34d98a73eee6be5a41004e67469", + "nmdc:9b45294f72cb55b2f039366d33183fa3", + "nmdc:b2ec4f5a3f02869684bdfaf065d75c54", + "nmdc:fa61e18d49a2012f115d970f0a195986" + ], + "asm_score": 3.329, + "was_informed_by": "gold:Gp0127643", + "ctg_powsum": 27868, + "scaf_max": 12873, + "id": "nmdc:30cd614596fd2a4b87220523f8a6ff30", + "scaf_powsum": 27998, + "execution_resource": "NERSC-Cori", + "contigs": 208967, + "name": "Assembly Activity for nmdc:mga0evc178", + "ctg_max": 12873, + "gc_std": 0.09438, + "contig_bp": 104567589, + "gc_avg": 0.63102, + "started_at_time": "2021-10-11T02:27:00Z", + "scaf_bp": 104569329, + "type": "nmdc:MetagenomeAssembly", + "scaffolds": 208793, + "ended_at_time": "2021-10-11T04:04:16+00:00", + "ctg_l50": 497, + "ctg_l90": 292, + "ctg_n50": 57164, + "ctg_n90": 172414, + "scaf_l50": 498, + "scaf_l90": 292, + "scaf_n50": 56935, + "scaf_n90": 172256, + "output_data_objects": [ + { + "name": "Gp0127643_Assembled contigs fasta", + "description": "Assembled contigs fasta for Gp0127643", + "data_object_type": "Assembly Contigs", + "url": "https://data.microbiomedata.org/data/nmdc:mga0evc178/assembly/nmdc_mga0evc178_contigs.fna", + "md5_checksum": "a3a85f9f946ff34f28dfd4b5f8590f23", + "id": "nmdc:a3a85f9f946ff34f28dfd4b5f8590f23", + "file_size_bytes": 112772885 + }, + { + "name": "Gp0127643_Assembled scaffold fasta", + "description": "Assembled scaffold fasta for Gp0127643", + "data_object_type": "Assembly Scaffolds", + "url": "https://data.microbiomedata.org/data/nmdc:mga0evc178/assembly/nmdc_mga0evc178_scaffolds.fna", + "md5_checksum": "001fd34d98a73eee6be5a41004e67469", + "id": "nmdc:001fd34d98a73eee6be5a41004e67469", + "file_size_bytes": 112143079 + }, + { + "name": "Gp0127643_Metagenome Contig Coverage Stats", + "description": "Metagenome Contig Coverage Stats for Gp0127643", + "url": "https://data.microbiomedata.org/data/nmdc:mga0evc178/assembly/nmdc_mga0evc178_covstats.txt", + "md5_checksum": "9b45294f72cb55b2f039366d33183fa3", + "id": "nmdc:9b45294f72cb55b2f039366d33183fa3", + "file_size_bytes": 16563197 + }, + { + "name": "Gp0127643_Assembled AGP file", + "description": "Assembled AGP file for Gp0127643", + "data_object_type": "Assembly AGP", + "url": "https://data.microbiomedata.org/data/nmdc:mga0evc178/assembly/nmdc_mga0evc178_assembly.agp", + "md5_checksum": "b2ec4f5a3f02869684bdfaf065d75c54", + "id": "nmdc:b2ec4f5a3f02869684bdfaf065d75c54", + "file_size_bytes": 15493398 + }, + { + "name": "Gp0127643_Metagenome Alignment BAM file", + "description": "Metagenome Alignment BAM file for Gp0127643", + "data_object_type": "Assembly Coverage BAM", + "url": "https://data.microbiomedata.org/data/nmdc:mga0evc178/assembly/nmdc_mga0evc178_pairedMapped_sorted.bam", + "md5_checksum": "fa61e18d49a2012f115d970f0a195986", + "id": "nmdc:fa61e18d49a2012f115d970f0a195986", + "file_size_bytes": 2085429752 + } + ] + }, + { + "_id": { + "$oid": "649b005bbf2caae0415ef9bd" + }, + "has_input": [ + "nmdc:a3a85f9f946ff34f28dfd4b5f8590f23" + ], + "part_of": [ + "nmdc:mga0evc178" + ], + "git_url": "https://github.com/microbiomedata/metaG/releases/tag/0.1", + "has_output": [ + "nmdc:b2cd0d1a024094fd4e308c21d439ed5f", + "nmdc:6151bacd37618698c28b00151b4998f8", + "nmdc:744277086ab01222a91233536d5e8976", + "nmdc:9c8a359c69bcb1179241f9a3c727fa23", + "nmdc:027b72af172d078f88471d932cf6d473", + "nmdc:ff24990735aa002e828ff7204a456ad2", + "nmdc:e884ad501d1bb3bcf006f0999020ce0f", + "nmdc:8321f818f53371491a7a80ef7e063ca6", + "nmdc:6f799842fe74ebff7942a026dbf9b1bf", + "nmdc:8ee84a629a5899c25e0fbd0f07084530", + "nmdc:6697cdb0b1dcf83e7ecb8fcefa0703ef", + "nmdc:d2990b0bd86e50209dcada6fa6b09510" + ], + "was_informed_by": "gold:Gp0127643", + "id": "nmdc:30cd614596fd2a4b87220523f8a6ff30", + "execution_resource": "NERSC-Cori", + "name": "Annotation Activity for nmdc:mga0evc178", + "started_at_time": "2021-10-11T02:27:00Z", + "type": "nmdc:MetagenomeAnnotationActivity", + "ended_at_time": "2021-10-11T04:04:16+00:00", + "output_data_objects": [ + { + "name": "Gp0127643_Protein FAA", + "description": "Protein FAA for Gp0127643", + "data_object_type": "Annotation Amino Acid FASTA", + "url": "https://data.microbiomedata.org/data/nmdc:mga0evc178/annotation/nmdc_mga0evc178_proteins.faa", + "md5_checksum": "b2cd0d1a024094fd4e308c21d439ed5f", + "id": "nmdc:b2cd0d1a024094fd4e308c21d439ed5f", + "file_size_bytes": 63917762 + }, + { + "name": "Gp0127643_Structural annotation GFF file", + "description": "Structural annotation GFF file for Gp0127643", + "data_object_type": "Structural Annotation GFF", + "url": "https://data.microbiomedata.org/data/nmdc:mga0evc178/annotation/nmdc_mga0evc178_structural_annotation.gff", + "md5_checksum": "6151bacd37618698c28b00151b4998f8", + "id": "nmdc:6151bacd37618698c28b00151b4998f8", + "file_size_bytes": 2521 + }, + { + "name": "Gp0127643_Functional annotation GFF file", + "description": "Functional annotation GFF file for Gp0127643", + "data_object_type": "Functional Annotation GFF", + "url": "https://data.microbiomedata.org/data/nmdc:mga0evc178/annotation/nmdc_mga0evc178_functional_annotation.gff", + "md5_checksum": "744277086ab01222a91233536d5e8976", + "id": "nmdc:744277086ab01222a91233536d5e8976", + "file_size_bytes": 71811800 + }, + { + "name": "Gp0127643_KO TSV file", + "description": "KO TSV file for Gp0127643", + "data_object_type": "Annotation KEGG Orthology", + "url": "https://data.microbiomedata.org/data/nmdc:mga0evc178/annotation/nmdc_mga0evc178_ko.tsv", + "md5_checksum": "9c8a359c69bcb1179241f9a3c727fa23", + "id": "nmdc:9c8a359c69bcb1179241f9a3c727fa23", + "file_size_bytes": 7959243 + }, + { + "name": "Gp0127643_EC TSV file", + "description": "EC TSV file for Gp0127643", + "data_object_type": "Annotation Enzyme Commission", + "url": "https://data.microbiomedata.org/data/nmdc:mga0evc178/annotation/nmdc_mga0evc178_ec.tsv", + "md5_checksum": "027b72af172d078f88471d932cf6d473", + "id": "nmdc:027b72af172d078f88471d932cf6d473", + "file_size_bytes": 5202338 + }, + { + "name": "Gp0127643_COG GFF file", + "description": "COG GFF file for Gp0127643", + "url": "https://data.microbiomedata.org/data/nmdc:mga0evc178/annotation/nmdc_mga0evc178_cog.gff", + "md5_checksum": "ff24990735aa002e828ff7204a456ad2", + "id": "nmdc:ff24990735aa002e828ff7204a456ad2", + "file_size_bytes": 41649279 + }, + { + "name": "Gp0127643_PFAM GFF file", + "description": "PFAM GFF file for Gp0127643", + "url": "https://data.microbiomedata.org/data/nmdc:mga0evc178/annotation/nmdc_mga0evc178_pfam.gff", + "md5_checksum": "e884ad501d1bb3bcf006f0999020ce0f", + "id": "nmdc:e884ad501d1bb3bcf006f0999020ce0f", + "file_size_bytes": 31529168 + }, + { + "name": "Gp0127643_TigrFam GFF file", + "description": "TigrFam GFF file for Gp0127643", + "url": "https://data.microbiomedata.org/data/nmdc:mga0evc178/annotation/nmdc_mga0evc178_tigrfam.gff", + "md5_checksum": "8321f818f53371491a7a80ef7e063ca6", + "id": "nmdc:8321f818f53371491a7a80ef7e063ca6", + "file_size_bytes": 3378599 + }, + { + "name": "Gp0127643_SMART GFF file", + "description": "SMART GFF file for Gp0127643", + "url": "https://data.microbiomedata.org/data/nmdc:mga0evc178/annotation/nmdc_mga0evc178_smart.gff", + "md5_checksum": "6f799842fe74ebff7942a026dbf9b1bf", + "id": "nmdc:6f799842fe74ebff7942a026dbf9b1bf", + "file_size_bytes": 9132037 + }, + { + "name": "Gp0127643_SuperFam GFF file", + "description": "SuperFam GFF file for Gp0127643", + "url": "https://data.microbiomedata.org/data/nmdc:mga0evc178/annotation/nmdc_mga0evc178_supfam.gff", + "md5_checksum": "8ee84a629a5899c25e0fbd0f07084530", + "id": "nmdc:8ee84a629a5899c25e0fbd0f07084530", + "file_size_bytes": 52720037 + }, + { + "name": "Gp0127643_Cath FunFam GFF file", + "description": "Cath FunFam GFF file for Gp0127643", + "url": "https://data.microbiomedata.org/data/nmdc:mga0evc178/annotation/nmdc_mga0evc178_cath_funfam.gff", + "md5_checksum": "6697cdb0b1dcf83e7ecb8fcefa0703ef", + "id": "nmdc:6697cdb0b1dcf83e7ecb8fcefa0703ef", + "file_size_bytes": 39643020 + }, + { + "name": "Gp0127643_KO_EC GFF file", + "description": "KO_EC GFF file for Gp0127643", + "url": "https://data.microbiomedata.org/data/nmdc:mga0evc178/annotation/nmdc_mga0evc178_ko_ec.gff", + "md5_checksum": "d2990b0bd86e50209dcada6fa6b09510", + "id": "nmdc:d2990b0bd86e50209dcada6fa6b09510", + "file_size_bytes": 25272687 + } + ] + }, + { + "_id": { + "$oid": "649b0052ec087f6bbab3471e" + }, + "has_input": [ + "nmdc:a3a85f9f946ff34f28dfd4b5f8590f23", + "nmdc:fa61e18d49a2012f115d970f0a195986", + "nmdc:744277086ab01222a91233536d5e8976" + ], + "too_short_contig_num": 194066, + "part_of": [ + "nmdc:mga0evc178" + ], + "binned_contig_num": 470, + "git_url": "https://github.com/microbiomedata/metaG/releases/tag/0.1", + "has_output": [ + "nmdc:d41d8cd98f00b204e9800998ecf8427e", + "nmdc:ed8acb6d21b14da131350d9c52aa7041", + "nmdc:d81e3cc17fa762a717dcf324a0aa3d45", + "nmdc:bd388cba93a77cde2f5791fa0f580865", + "nmdc:30695aca02693c6aba316db3e9f565a8", + "nmdc:79de6d81848956e1c06a811bc9bdab81" + ], + "was_informed_by": "gold:Gp0127643", + "input_contig_num": 208967, + "id": "nmdc:30cd614596fd2a4b87220523f8a6ff30", + "execution_resource": "NERSC-Cori", + "name": "MAGs Analysis Activity for nmdc:mga0evc178", + "mags_list": [ + { + "number_of_contig": 470, + "completeness": 30.73, + "bin_name": "bins.1", + "gene_count": 2501, + "bin_quality": "LQ", + "gtdbtk_species": "", + "gtdbtk_order": "", + "num_16s": 0, + "gtdbtk_family": "", + "gtdbtk_domain": "", + "contamination": 1.71, + "gtdbtk_class": "", + "gtdbtk_phylum": "", + "num_5s": 0, + "num_23s": 0, + "gtdbtk_genus": "", + "num_t_rna": 22 + } + ], + "unbinned_contig_num": 14431, + "started_at_time": "2021-10-11T02:27:00Z", + "type": "nmdc:MAGsAnalysisActivity", + "ended_at_time": "2021-10-11T04:04:16+00:00", + "output_data_objects": [ + { + "name": "gold:Gp0452679_metabat2 bin checkm quality assessment result", + "description": "metabat2 bin checkm quality assessment result for gold:Gp0452679", + "data_object_type": "CheckM Statistics", + "url": "https://data.microbiomedata.org/data/nmdc:mga0fsjy84/MAGs/nmdc_mga0fsjy84_checkm_qa.out", + "md5_checksum": "d41d8cd98f00b204e9800998ecf8427e", + "id": "nmdc:d41d8cd98f00b204e9800998ecf8427e", + "file_size_bytes": 0 + }, + { + "name": "Gp0127643_tooShort (< 3kb) filtered contigs fasta file by metaBat2", + "description": "tooShort (< 3kb) filtered contigs fasta file by metaBat2 for Gp0127643", + "url": "https://data.microbiomedata.org/data/nmdc:mga0evc178/MAGs/nmdc_mga0evc178_bins.tooShort.fa", + "md5_checksum": "ed8acb6d21b14da131350d9c52aa7041", + "id": "nmdc:ed8acb6d21b14da131350d9c52aa7041", + "file_size_bytes": 87917684 + }, + { + "name": "Gp0127643_unbinned fasta file from metabat2", + "description": "unbinned fasta file from metabat2 for Gp0127643", + "url": "https://data.microbiomedata.org/data/nmdc:mga0evc178/MAGs/nmdc_mga0evc178_bins.unbinned.fa", + "md5_checksum": "d81e3cc17fa762a717dcf324a0aa3d45", + "id": "nmdc:d81e3cc17fa762a717dcf324a0aa3d45", + "file_size_bytes": 22746526 + }, + { + "name": "Gp0127643_metabat2 bin checkm quality assessment result", + "description": "metabat2 bin checkm quality assessment result for Gp0127643", + "data_object_type": "CheckM Statistics", + "url": "https://data.microbiomedata.org/data/nmdc:mga0evc178/MAGs/nmdc_mga0evc178_checkm_qa.out", + "md5_checksum": "bd388cba93a77cde2f5791fa0f580865", + "id": "nmdc:bd388cba93a77cde2f5791fa0f580865", + "file_size_bytes": 785 + }, + { + "name": "Gp0127643_high-quality and medium-quality bins", + "description": "high-quality and medium-quality bins for Gp0127643", + "data_object_type": "Metagenome Bins", + "url": "https://data.microbiomedata.org/data/nmdc:mga0evc178/MAGs/nmdc_mga0evc178_hqmq_bin.zip", + "md5_checksum": "30695aca02693c6aba316db3e9f565a8", + "id": "nmdc:30695aca02693c6aba316db3e9f565a8", + "file_size_bytes": 182 + }, + { + "name": "Gp0127643_metabat2 bins", + "description": "metabat2 bins for Gp0127643", + "url": "https://data.microbiomedata.org/data/nmdc:mga0evc178/MAGs/nmdc_mga0evc178_metabat_bin.zip", + "md5_checksum": "79de6d81848956e1c06a811bc9bdab81", + "id": "nmdc:79de6d81848956e1c06a811bc9bdab81", + "file_size_bytes": 614113 + } + ] + } + ] + }, + { + "_id": { + "$oid": "649b009773e8249959349b57" + }, + "id": "nmdc:omprc-11-hwadfm25", + "name": "Riverbed sediment microbial communities from areas with vegetation nearby in Columbia River, Washington, USA - GW-RW S3_30_40", + "description": "Riverbed sediment samples were collected from an area with a lot of surrounding vegetation in a hyporheic zone (subsurface groundwater - surface water mixing zone)", + "has_input": [ + "nmdc:bsm-11-n80sx618" + ], + "has_output": [ + "jgi:574fe0a87ded5e3df1ee148e" + ], + "part_of": [ + "nmdc:sty-11-aygzgv51" + ], + "add_date": "2016-01-11", + "mod_date": "2021-06-15", + "ncbi_project_name": "Riverbed sediment microbial communities from areas with vegetation nearby in Columbia River, Washington, USA - GW-RW S3_30_40", + "omics_type": { + "has_raw_value": "Metagenome" + }, + "principal_investigator": { + "has_raw_value": "James Stegen" + }, + "processing_institution": "JGI", + "type": "nmdc:OmicsProcessing", + "gold_sequencing_project_identifiers": [ + "gold:Gp0127644" + ], + "downstream_workflow_activity_records": [ + { + "_id": { + "$oid": "649b009d6bdd4fd20273c86b" + }, + "has_input": [ + "nmdc:a1d8fff4b02719c4d0f9c442cf052f69" + ], + "part_of": [ + "nmdc:mga0bpf635" + ], + "git_url": "https://github.com/microbiomedata/metaG/releases/tag/0.1", + "has_output": [ + "nmdc:98da35678c59689ce738b2a6bc708692", + "nmdc:ff08ea52254e0cc1011c56656505b27b" + ], + "was_informed_by": "gold:Gp0127644", + "input_read_count": 11431762, + "output_read_bases": 1245433047, + "id": "nmdc:9990ae6d30bab3988f258ae8224acd89", + "execution_resource": "NERSC-Cori", + "input_read_bases": 1726196062, + "name": "Read QC Activity for nmdc:mga0bpf635", + "output_read_count": 8322164, + "started_at_time": "2021-10-11T02:26:47Z", + "type": "nmdc:ReadQCAnalysisActivity", + "ended_at_time": "2021-10-11T02:55:00+00:00", + "output_data_objects": [ + { + "name": "Gp0127644_Filtered Reads", + "description": "Filtered Reads for Gp0127644", + "data_object_type": "Filtered Sequencing Reads", + "url": "https://data.microbiomedata.org/data/nmdc:mga0bpf635/qa/nmdc_mga0bpf635_filtered.fastq.gz", + "md5_checksum": "98da35678c59689ce738b2a6bc708692", + "id": "nmdc:98da35678c59689ce738b2a6bc708692", + "file_size_bytes": 694199131 + }, + { + "name": "Gp0127644_Filtered Stats", + "description": "Filtered Stats for Gp0127644", + "data_object_type": "QC Statistics", + "url": "https://data.microbiomedata.org/data/nmdc:mga0bpf635/qa/nmdc_mga0bpf635_filterStats.txt", + "md5_checksum": "ff08ea52254e0cc1011c56656505b27b", + "id": "nmdc:ff08ea52254e0cc1011c56656505b27b", + "file_size_bytes": 280 + } + ] + }, + { + "_id": { + "$oid": "649b009bff710ae353f8cf38" + }, + "has_input": [ + "nmdc:98da35678c59689ce738b2a6bc708692" + ], + "git_url": "https://github.com/microbiomedata/metaG/releases/tag/0.1", + "has_output": [ + "nmdc:dc2e21becda8d6b010a95897cf97ae90", + "nmdc:0dd334c92557f3a8ac8c78b437c75eaf", + "nmdc:425873a08e598b0ca2987ff7b9b5da1f", + "nmdc:b0f2449065b52935ddba8abd6ae6bc88", + "nmdc:9baa708296f62334e099cf61711b5e16", + "nmdc:f2a43278b06876cae5d4e8cdef17cfe1", + "nmdc:f1a811dbc523f9a27dbc004b8a66f0cb", + "nmdc:8983fa1acb03f2905bbec3a6ccee2854", + "nmdc:a07c6c5fb68d1a56e39d93e8745b96cb" + ], + "was_informed_by": "gold:Gp0127644", + "id": "nmdc:9990ae6d30bab3988f258ae8224acd89", + "execution_resource": "NERSC-Cori", + "name": "ReadBased Analysis Activity for nmdc:mga0bpf635", + "started_at_time": "2021-10-11T02:26:47Z", + "type": "nmdc:ReadbasedAnalysis", + "ended_at_time": "2021-10-11T02:55:00+00:00", + "output_data_objects": [ + { + "name": "gold:Gp0452677_Gottcha2 TSV report", + "description": "Gottcha2 TSV report for gold:Gp0452677", + "url": "https://data.microbiomedata.org/data/nmdc:mga0fbpz25/ReadbasedAnalysis/nmdc_mga0fbpz25_gottcha2_report.tsv", + "md5_checksum": "dc2e21becda8d6b010a95897cf97ae90", + "id": "nmdc:dc2e21becda8d6b010a95897cf97ae90", + "file_size_bytes": 109 + }, + { + "name": "Gp0127644_Gottcha2 full TSV report", + "description": "Gottcha2 full TSV report for Gp0127644", + "url": "https://data.microbiomedata.org/data/nmdc:mga0bpf635/ReadbasedAnalysis/nmdc_mga0bpf635_gottcha2_report_full.tsv", + "md5_checksum": "0dd334c92557f3a8ac8c78b437c75eaf", + "id": "nmdc:0dd334c92557f3a8ac8c78b437c75eaf", + "file_size_bytes": 426075 + }, + { + "name": "gold:Gp0452677_Gottcha2 Krona HTML report", + "description": "Gottcha2 Krona HTML report for gold:Gp0452677", + "data_object_type": "GOTTCHA2 Krona Plot", + "url": "https://data.microbiomedata.org/data/nmdc:mga0fbpz25/ReadbasedAnalysis/nmdc_mga0fbpz25_gottcha2_krona.html", + "md5_checksum": "425873a08e598b0ca2987ff7b9b5da1f", + "id": "nmdc:425873a08e598b0ca2987ff7b9b5da1f", + "file_size_bytes": 226638 + }, + { + "name": "Gp0127644_Centrifuge classification TSV report", + "description": "Centrifuge classification TSV report for Gp0127644", + "data_object_type": "Centrifuge Taxonomic Classification", + "url": "https://data.microbiomedata.org/data/nmdc:mga0bpf635/ReadbasedAnalysis/nmdc_mga0bpf635_centrifuge_classification.tsv", + "md5_checksum": "b0f2449065b52935ddba8abd6ae6bc88", + "id": "nmdc:b0f2449065b52935ddba8abd6ae6bc88", + "file_size_bytes": 610862986 + }, + { + "name": "Gp0127644_Centrifuge TSV report", + "description": "Centrifuge TSV report for Gp0127644", + "data_object_type": "Centrifuge Classification Report", + "url": "https://data.microbiomedata.org/data/nmdc:mga0bpf635/ReadbasedAnalysis/nmdc_mga0bpf635_centrifuge_report.tsv", + "md5_checksum": "9baa708296f62334e099cf61711b5e16", + "id": "nmdc:9baa708296f62334e099cf61711b5e16", + "file_size_bytes": 243322 + }, + { + "name": "Gp0127644_Centrifuge Krona HTML report", + "description": "Centrifuge Krona HTML report for Gp0127644", + "data_object_type": "Centrifuge Krona Plot", + "url": "https://data.microbiomedata.org/data/nmdc:mga0bpf635/ReadbasedAnalysis/nmdc_mga0bpf635_centrifuge_krona.html", + "md5_checksum": "f2a43278b06876cae5d4e8cdef17cfe1", + "id": "nmdc:f2a43278b06876cae5d4e8cdef17cfe1", + "file_size_bytes": 2294995 + }, + { + "name": "Gp0127644_Kraken classification TSV report", + "description": "Kraken classification TSV report for Gp0127644", + "data_object_type": "Kraken2 Taxonomic Classification", + "url": "https://data.microbiomedata.org/data/nmdc:mga0bpf635/ReadbasedAnalysis/nmdc_mga0bpf635_kraken2_classification.tsv", + "md5_checksum": "f1a811dbc523f9a27dbc004b8a66f0cb", + "id": "nmdc:f1a811dbc523f9a27dbc004b8a66f0cb", + "file_size_bytes": 487178087 + }, + { + "name": "Gp0127644_Kraken2 TSV report", + "description": "Kraken2 TSV report for Gp0127644", + "data_object_type": "Kraken2 Classification Report", + "url": "https://data.microbiomedata.org/data/nmdc:mga0bpf635/ReadbasedAnalysis/nmdc_mga0bpf635_kraken2_report.tsv", + "md5_checksum": "8983fa1acb03f2905bbec3a6ccee2854", + "id": "nmdc:8983fa1acb03f2905bbec3a6ccee2854", + "file_size_bytes": 557688 + }, + { + "name": "Gp0127644_Kraken2 Krona HTML report", + "description": "Kraken2 Krona HTML report for Gp0127644", + "data_object_type": "Kraken2 Krona Plot", + "url": "https://data.microbiomedata.org/data/nmdc:mga0bpf635/ReadbasedAnalysis/nmdc_mga0bpf635_kraken2_krona.html", + "md5_checksum": "a07c6c5fb68d1a56e39d93e8745b96cb", + "id": "nmdc:a07c6c5fb68d1a56e39d93e8745b96cb", + "file_size_bytes": 3567307 + } + ] + }, + { + "_id": { + "$oid": "61e7197c833bcf838a700966" + }, + "has_input": [ + "nmdc:98da35678c59689ce738b2a6bc708692" + ], + "part_of": [ + "nmdc:mga0bpf635" + ], + "git_url": "https://github.com/microbiomedata/metaG/releases/tag/0.1", + "has_output": [ + "nmdc:dc2e21becda8d6b010a95897cf97ae90", + "nmdc:0dd334c92557f3a8ac8c78b437c75eaf", + "nmdc:425873a08e598b0ca2987ff7b9b5da1f", + "nmdc:b0f2449065b52935ddba8abd6ae6bc88", + "nmdc:9baa708296f62334e099cf61711b5e16", + "nmdc:f2a43278b06876cae5d4e8cdef17cfe1", + "nmdc:f1a811dbc523f9a27dbc004b8a66f0cb", + "nmdc:8983fa1acb03f2905bbec3a6ccee2854", + "nmdc:a07c6c5fb68d1a56e39d93e8745b96cb" + ], + "was_informed_by": "gold:Gp0127644", + "id": "nmdc:9990ae6d30bab3988f258ae8224acd89", + "execution_resource": "NERSC-Cori", + "name": "ReadBased Analysis Activity for nmdc:mga0bpf635", + "started_at_time": "2021-10-11T02:26:47Z", + "type": "nmdc:ReadbasedAnalysis", + "ended_at_time": "2021-10-11T02:55:00+00:00", + "output_data_objects": [ + { + "name": "gold:Gp0452677_Gottcha2 TSV report", + "description": "Gottcha2 TSV report for gold:Gp0452677", + "url": "https://data.microbiomedata.org/data/nmdc:mga0fbpz25/ReadbasedAnalysis/nmdc_mga0fbpz25_gottcha2_report.tsv", + "md5_checksum": "dc2e21becda8d6b010a95897cf97ae90", + "id": "nmdc:dc2e21becda8d6b010a95897cf97ae90", + "file_size_bytes": 109 + }, + { + "name": "Gp0127644_Gottcha2 full TSV report", + "description": "Gottcha2 full TSV report for Gp0127644", + "url": "https://data.microbiomedata.org/data/nmdc:mga0bpf635/ReadbasedAnalysis/nmdc_mga0bpf635_gottcha2_report_full.tsv", + "md5_checksum": "0dd334c92557f3a8ac8c78b437c75eaf", + "id": "nmdc:0dd334c92557f3a8ac8c78b437c75eaf", + "file_size_bytes": 426075 + }, + { + "name": "gold:Gp0452677_Gottcha2 Krona HTML report", + "description": "Gottcha2 Krona HTML report for gold:Gp0452677", + "data_object_type": "GOTTCHA2 Krona Plot", + "url": "https://data.microbiomedata.org/data/nmdc:mga0fbpz25/ReadbasedAnalysis/nmdc_mga0fbpz25_gottcha2_krona.html", + "md5_checksum": "425873a08e598b0ca2987ff7b9b5da1f", + "id": "nmdc:425873a08e598b0ca2987ff7b9b5da1f", + "file_size_bytes": 226638 + }, + { + "name": "Gp0127644_Centrifuge classification TSV report", + "description": "Centrifuge classification TSV report for Gp0127644", + "data_object_type": "Centrifuge Taxonomic Classification", + "url": "https://data.microbiomedata.org/data/nmdc:mga0bpf635/ReadbasedAnalysis/nmdc_mga0bpf635_centrifuge_classification.tsv", + "md5_checksum": "b0f2449065b52935ddba8abd6ae6bc88", + "id": "nmdc:b0f2449065b52935ddba8abd6ae6bc88", + "file_size_bytes": 610862986 + }, + { + "name": "Gp0127644_Centrifuge TSV report", + "description": "Centrifuge TSV report for Gp0127644", + "data_object_type": "Centrifuge Classification Report", + "url": "https://data.microbiomedata.org/data/nmdc:mga0bpf635/ReadbasedAnalysis/nmdc_mga0bpf635_centrifuge_report.tsv", + "md5_checksum": "9baa708296f62334e099cf61711b5e16", + "id": "nmdc:9baa708296f62334e099cf61711b5e16", + "file_size_bytes": 243322 + }, + { + "name": "Gp0127644_Centrifuge Krona HTML report", + "description": "Centrifuge Krona HTML report for Gp0127644", + "data_object_type": "Centrifuge Krona Plot", + "url": "https://data.microbiomedata.org/data/nmdc:mga0bpf635/ReadbasedAnalysis/nmdc_mga0bpf635_centrifuge_krona.html", + "md5_checksum": "f2a43278b06876cae5d4e8cdef17cfe1", + "id": "nmdc:f2a43278b06876cae5d4e8cdef17cfe1", + "file_size_bytes": 2294995 + }, + { + "name": "Gp0127644_Kraken classification TSV report", + "description": "Kraken classification TSV report for Gp0127644", + "data_object_type": "Kraken2 Taxonomic Classification", + "url": "https://data.microbiomedata.org/data/nmdc:mga0bpf635/ReadbasedAnalysis/nmdc_mga0bpf635_kraken2_classification.tsv", + "md5_checksum": "f1a811dbc523f9a27dbc004b8a66f0cb", + "id": "nmdc:f1a811dbc523f9a27dbc004b8a66f0cb", + "file_size_bytes": 487178087 + }, + { + "name": "Gp0127644_Kraken2 TSV report", + "description": "Kraken2 TSV report for Gp0127644", + "data_object_type": "Kraken2 Classification Report", + "url": "https://data.microbiomedata.org/data/nmdc:mga0bpf635/ReadbasedAnalysis/nmdc_mga0bpf635_kraken2_report.tsv", + "md5_checksum": "8983fa1acb03f2905bbec3a6ccee2854", + "id": "nmdc:8983fa1acb03f2905bbec3a6ccee2854", + "file_size_bytes": 557688 + }, + { + "name": "Gp0127644_Kraken2 Krona HTML report", + "description": "Kraken2 Krona HTML report for Gp0127644", + "data_object_type": "Kraken2 Krona Plot", + "url": "https://data.microbiomedata.org/data/nmdc:mga0bpf635/ReadbasedAnalysis/nmdc_mga0bpf635_kraken2_krona.html", + "md5_checksum": "a07c6c5fb68d1a56e39d93e8745b96cb", + "id": "nmdc:a07c6c5fb68d1a56e39d93e8745b96cb", + "file_size_bytes": 3567307 + } + ] + }, + { + "_id": { + "$oid": "649b005f2ca5ee4adb139fa1" + }, + "has_input": [ + "nmdc:98da35678c59689ce738b2a6bc708692" + ], + "part_of": [ + "nmdc:mga0bpf635" + ], + "ctg_logsum": 37962, + "scaf_logsum": 38062, + "gap_pct": 0.00069, + "git_url": "https://github.com/microbiomedata/metaG/releases/tag/0.1", + "has_output": [ + "nmdc:16f77f4aaed29f3acc31646e1ce06b2d", + "nmdc:b6afa25cadc614083204383bbad06f48", + "nmdc:87b1ea13d41499eeb5eb67932db01423", + "nmdc:72a38c353753abcb6d046385bf2950f6", + "nmdc:ecf498b9aa15f9d000845ffdfa7eb521" + ], + "asm_score": 3.712, + "was_informed_by": "gold:Gp0127644", + "ctg_powsum": 4162.045, + "scaf_max": 11252, + "id": "nmdc:9990ae6d30bab3988f258ae8224acd89", + "scaf_powsum": 4172.955, + "execution_resource": "NERSC-Cori", + "contigs": 46135, + "name": "Assembly Activity for nmdc:mga0bpf635", + "ctg_max": 11252, + "gc_std": 0.09328, + "contig_bp": 20152503, + "gc_avg": 0.6086, + "started_at_time": "2021-10-11T02:26:47Z", + "scaf_bp": 20152643, + "type": "nmdc:MetagenomeAssembly", + "scaffolds": 46121, + "ended_at_time": "2021-10-11T02:55:00+00:00", + "ctg_l50": 394, + "ctg_l90": 285, + "ctg_n50": 14034, + "ctg_n90": 39639, + "scaf_l50": 395, + "scaf_l90": 285, + "scaf_n50": 13959, + "scaf_n90": 39626, + "output_data_objects": [ + { + "name": "Gp0127644_Assembled contigs fasta", + "description": "Assembled contigs fasta for Gp0127644", + "data_object_type": "Assembly Contigs", + "url": "https://data.microbiomedata.org/data/nmdc:mga0bpf635/assembly/nmdc_mga0bpf635_contigs.fna", + "md5_checksum": "16f77f4aaed29f3acc31646e1ce06b2d", + "id": "nmdc:16f77f4aaed29f3acc31646e1ce06b2d", + "file_size_bytes": 21881611 + }, + { + "name": "Gp0127644_Assembled scaffold fasta", + "description": "Assembled scaffold fasta for Gp0127644", + "data_object_type": "Assembly Scaffolds", + "url": "https://data.microbiomedata.org/data/nmdc:mga0bpf635/assembly/nmdc_mga0bpf635_scaffolds.fna", + "md5_checksum": "b6afa25cadc614083204383bbad06f48", + "id": "nmdc:b6afa25cadc614083204383bbad06f48", + "file_size_bytes": 21742982 + }, + { + "name": "Gp0127644_Metagenome Contig Coverage Stats", + "description": "Metagenome Contig Coverage Stats for Gp0127644", + "url": "https://data.microbiomedata.org/data/nmdc:mga0bpf635/assembly/nmdc_mga0bpf635_covstats.txt", + "md5_checksum": "87b1ea13d41499eeb5eb67932db01423", + "id": "nmdc:87b1ea13d41499eeb5eb67932db01423", + "file_size_bytes": 3612085 + }, + { + "name": "Gp0127644_Assembled AGP file", + "description": "Assembled AGP file for Gp0127644", + "data_object_type": "Assembly AGP", + "url": "https://data.microbiomedata.org/data/nmdc:mga0bpf635/assembly/nmdc_mga0bpf635_assembly.agp", + "md5_checksum": "72a38c353753abcb6d046385bf2950f6", + "id": "nmdc:72a38c353753abcb6d046385bf2950f6", + "file_size_bytes": 3350598 + }, + { + "name": "Gp0127644_Metagenome Alignment BAM file", + "description": "Metagenome Alignment BAM file for Gp0127644", + "data_object_type": "Assembly Coverage BAM", + "url": "https://data.microbiomedata.org/data/nmdc:mga0bpf635/assembly/nmdc_mga0bpf635_pairedMapped_sorted.bam", + "md5_checksum": "ecf498b9aa15f9d000845ffdfa7eb521", + "id": "nmdc:ecf498b9aa15f9d000845ffdfa7eb521", + "file_size_bytes": 746781339 + } + ] + }, + { + "_id": { + "$oid": "649b005bbf2caae0415ef9b4" + }, + "has_input": [ + "nmdc:16f77f4aaed29f3acc31646e1ce06b2d" + ], + "part_of": [ + "nmdc:mga0bpf635" + ], + "git_url": "https://github.com/microbiomedata/metaG/releases/tag/0.1", + "has_output": [ + "nmdc:9d960cad4d88795aba8bb1acbe415fc9", + "nmdc:cb5d98ee6e459ce1cc2d14295424eef1", + "nmdc:349838000a53b6655a5b12edf6351c50", + "nmdc:7bb072409221978dbea8ff5cb0bdba1e", + "nmdc:3d69ade973d1652bd6f061b2122ffe36", + "nmdc:2a9b9a21fe5fb84219e0be5f153665be", + "nmdc:83e64b9fc9406a72d18e8dd4742bac1a", + "nmdc:cdc4cc8629b7c61f1708f654aaaa9932", + "nmdc:f8d79375a2bf82f257e0015efeee6f26", + "nmdc:c9b4806132d19e740822b1a84bc4f07d", + "nmdc:e304e10eb60423c23486e140594d1a7b", + "nmdc:9b78f0ac527ee7287ae532a896582948" + ], + "was_informed_by": "gold:Gp0127644", + "id": "nmdc:9990ae6d30bab3988f258ae8224acd89", + "execution_resource": "NERSC-Cori", + "name": "Annotation Activity for nmdc:mga0bpf635", + "started_at_time": "2021-10-11T02:26:47Z", + "type": "nmdc:MetagenomeAnnotationActivity", + "ended_at_time": "2021-10-11T02:55:00+00:00", + "output_data_objects": [ + { + "name": "Gp0127644_Protein FAA", + "description": "Protein FAA for Gp0127644", + "data_object_type": "Annotation Amino Acid FASTA", + "url": "https://data.microbiomedata.org/data/nmdc:mga0bpf635/annotation/nmdc_mga0bpf635_proteins.faa", + "md5_checksum": "9d960cad4d88795aba8bb1acbe415fc9", + "id": "nmdc:9d960cad4d88795aba8bb1acbe415fc9", + "file_size_bytes": 12848136 + }, + { + "name": "Gp0127644_Structural annotation GFF file", + "description": "Structural annotation GFF file for Gp0127644", + "data_object_type": "Structural Annotation GFF", + "url": "https://data.microbiomedata.org/data/nmdc:mga0bpf635/annotation/nmdc_mga0bpf635_structural_annotation.gff", + "md5_checksum": "cb5d98ee6e459ce1cc2d14295424eef1", + "id": "nmdc:cb5d98ee6e459ce1cc2d14295424eef1", + "file_size_bytes": 2488 + }, + { + "name": "Gp0127644_Functional annotation GFF file", + "description": "Functional annotation GFF file for Gp0127644", + "data_object_type": "Functional Annotation GFF", + "url": "https://data.microbiomedata.org/data/nmdc:mga0bpf635/annotation/nmdc_mga0bpf635_functional_annotation.gff", + "md5_checksum": "349838000a53b6655a5b12edf6351c50", + "id": "nmdc:349838000a53b6655a5b12edf6351c50", + "file_size_bytes": 15112193 + }, + { + "name": "Gp0127644_KO TSV file", + "description": "KO TSV file for Gp0127644", + "data_object_type": "Annotation KEGG Orthology", + "url": "https://data.microbiomedata.org/data/nmdc:mga0bpf635/annotation/nmdc_mga0bpf635_ko.tsv", + "md5_checksum": "7bb072409221978dbea8ff5cb0bdba1e", + "id": "nmdc:7bb072409221978dbea8ff5cb0bdba1e", + "file_size_bytes": 1814299 + }, + { + "name": "Gp0127644_EC TSV file", + "description": "EC TSV file for Gp0127644", + "data_object_type": "Annotation Enzyme Commission", + "url": "https://data.microbiomedata.org/data/nmdc:mga0bpf635/annotation/nmdc_mga0bpf635_ec.tsv", + "md5_checksum": "3d69ade973d1652bd6f061b2122ffe36", + "id": "nmdc:3d69ade973d1652bd6f061b2122ffe36", + "file_size_bytes": 1233948 + }, + { + "name": "Gp0127644_COG GFF file", + "description": "COG GFF file for Gp0127644", + "url": "https://data.microbiomedata.org/data/nmdc:mga0bpf635/annotation/nmdc_mga0bpf635_cog.gff", + "md5_checksum": "2a9b9a21fe5fb84219e0be5f153665be", + "id": "nmdc:2a9b9a21fe5fb84219e0be5f153665be", + "file_size_bytes": 9028987 + }, + { + "name": "Gp0127644_PFAM GFF file", + "description": "PFAM GFF file for Gp0127644", + "url": "https://data.microbiomedata.org/data/nmdc:mga0bpf635/annotation/nmdc_mga0bpf635_pfam.gff", + "md5_checksum": "83e64b9fc9406a72d18e8dd4742bac1a", + "id": "nmdc:83e64b9fc9406a72d18e8dd4742bac1a", + "file_size_bytes": 6574998 + }, + { + "name": "Gp0127644_TigrFam GFF file", + "description": "TigrFam GFF file for Gp0127644", + "url": "https://data.microbiomedata.org/data/nmdc:mga0bpf635/annotation/nmdc_mga0bpf635_tigrfam.gff", + "md5_checksum": "cdc4cc8629b7c61f1708f654aaaa9932", + "id": "nmdc:cdc4cc8629b7c61f1708f654aaaa9932", + "file_size_bytes": 783908 + }, + { + "name": "Gp0127644_SMART GFF file", + "description": "SMART GFF file for Gp0127644", + "url": "https://data.microbiomedata.org/data/nmdc:mga0bpf635/annotation/nmdc_mga0bpf635_smart.gff", + "md5_checksum": "f8d79375a2bf82f257e0015efeee6f26", + "id": "nmdc:f8d79375a2bf82f257e0015efeee6f26", + "file_size_bytes": 2030043 + }, + { + "name": "Gp0127644_SuperFam GFF file", + "description": "SuperFam GFF file for Gp0127644", + "url": "https://data.microbiomedata.org/data/nmdc:mga0bpf635/annotation/nmdc_mga0bpf635_supfam.gff", + "md5_checksum": "c9b4806132d19e740822b1a84bc4f07d", + "id": "nmdc:c9b4806132d19e740822b1a84bc4f07d", + "file_size_bytes": 11227652 + }, + { + "name": "Gp0127644_Cath FunFam GFF file", + "description": "Cath FunFam GFF file for Gp0127644", + "url": "https://data.microbiomedata.org/data/nmdc:mga0bpf635/annotation/nmdc_mga0bpf635_cath_funfam.gff", + "md5_checksum": "e304e10eb60423c23486e140594d1a7b", + "id": "nmdc:e304e10eb60423c23486e140594d1a7b", + "file_size_bytes": 8555821 + }, + { + "name": "Gp0127644_KO_EC GFF file", + "description": "KO_EC GFF file for Gp0127644", + "url": "https://data.microbiomedata.org/data/nmdc:mga0bpf635/annotation/nmdc_mga0bpf635_ko_ec.gff", + "md5_checksum": "9b78f0ac527ee7287ae532a896582948", + "id": "nmdc:9b78f0ac527ee7287ae532a896582948", + "file_size_bytes": 5791094 + } + ] + }, + { + "_id": { + "$oid": "649b0052ec087f6bbab34714" + }, + "has_input": [ + "nmdc:16f77f4aaed29f3acc31646e1ce06b2d", + "nmdc:ecf498b9aa15f9d000845ffdfa7eb521", + "nmdc:349838000a53b6655a5b12edf6351c50" + ], + "too_short_contig_num": 44192, + "part_of": [ + "nmdc:mga0bpf635" + ], + "binned_contig_num": 157, + "git_url": "https://github.com/microbiomedata/metaG/releases/tag/0.1", + "has_output": [ + "nmdc:d41d8cd98f00b204e9800998ecf8427e", + "nmdc:4857d71459f50147c8ae97ffce40caa5", + "nmdc:65522bf77241109a74354d0e294597f9", + "nmdc:30d6c9fb23abb0849991fad01e0393f1", + "nmdc:a76c8c9034b877334a75e7c0b7c2c830", + "nmdc:9d712c5924d6d0ee6d7305918e69302d" + ], + "was_informed_by": "gold:Gp0127644", + "input_contig_num": 46135, + "id": "nmdc:9990ae6d30bab3988f258ae8224acd89", + "execution_resource": "NERSC-Cori", + "name": "MAGs Analysis Activity for nmdc:mga0bpf635", + "mags_list": [ + { + "number_of_contig": 157, + "completeness": 39.0, + "bin_name": "bins.1", + "gene_count": 891, + "bin_quality": "LQ", + "gtdbtk_species": "", + "gtdbtk_order": "", + "num_16s": 1, + "gtdbtk_family": "", + "gtdbtk_domain": "", + "contamination": 0.0, + "gtdbtk_class": "", + "gtdbtk_phylum": "", + "num_5s": 0, + "num_23s": 0, + "gtdbtk_genus": "", + "num_t_rna": 19 + } + ], + "unbinned_contig_num": 1786, + "started_at_time": "2021-10-11T02:26:47Z", + "type": "nmdc:MAGsAnalysisActivity", + "ended_at_time": "2021-10-11T02:55:00+00:00", + "output_data_objects": [ + { + "name": "gold:Gp0452679_metabat2 bin checkm quality assessment result", + "description": "metabat2 bin checkm quality assessment result for gold:Gp0452679", + "data_object_type": "CheckM Statistics", + "url": "https://data.microbiomedata.org/data/nmdc:mga0fsjy84/MAGs/nmdc_mga0fsjy84_checkm_qa.out", + "md5_checksum": "d41d8cd98f00b204e9800998ecf8427e", + "id": "nmdc:d41d8cd98f00b204e9800998ecf8427e", + "file_size_bytes": 0 + }, + { + "name": "Gp0127644_tooShort (< 3kb) filtered contigs fasta file by metaBat2", + "description": "tooShort (< 3kb) filtered contigs fasta file by metaBat2 for Gp0127644", + "url": "https://data.microbiomedata.org/data/nmdc:mga0bpf635/MAGs/nmdc_mga0bpf635_bins.tooShort.fa", + "md5_checksum": "4857d71459f50147c8ae97ffce40caa5", + "id": "nmdc:4857d71459f50147c8ae97ffce40caa5", + "file_size_bytes": 18310651 + }, + { + "name": "Gp0127644_unbinned fasta file from metabat2", + "description": "unbinned fasta file from metabat2 for Gp0127644", + "url": "https://data.microbiomedata.org/data/nmdc:mga0bpf635/MAGs/nmdc_mga0bpf635_bins.unbinned.fa", + "md5_checksum": "65522bf77241109a74354d0e294597f9", + "id": "nmdc:65522bf77241109a74354d0e294597f9", + "file_size_bytes": 2858628 + }, + { + "name": "Gp0127644_metabat2 bin checkm quality assessment result", + "description": "metabat2 bin checkm quality assessment result for Gp0127644", + "data_object_type": "CheckM Statistics", + "url": "https://data.microbiomedata.org/data/nmdc:mga0bpf635/MAGs/nmdc_mga0bpf635_checkm_qa.out", + "md5_checksum": "30d6c9fb23abb0849991fad01e0393f1", + "id": "nmdc:30d6c9fb23abb0849991fad01e0393f1", + "file_size_bytes": 760 + }, + { + "name": "Gp0127644_high-quality and medium-quality bins", + "description": "high-quality and medium-quality bins for Gp0127644", + "data_object_type": "Metagenome Bins", + "url": "https://data.microbiomedata.org/data/nmdc:mga0bpf635/MAGs/nmdc_mga0bpf635_hqmq_bin.zip", + "md5_checksum": "a76c8c9034b877334a75e7c0b7c2c830", + "id": "nmdc:a76c8c9034b877334a75e7c0b7c2c830", + "file_size_bytes": 182 + }, + { + "name": "Gp0127644_metabat2 bins", + "description": "metabat2 bins for Gp0127644", + "url": "https://data.microbiomedata.org/data/nmdc:mga0bpf635/MAGs/nmdc_mga0bpf635_metabat_bin.zip", + "md5_checksum": "9d712c5924d6d0ee6d7305918e69302d", + "id": "nmdc:9d712c5924d6d0ee6d7305918e69302d", + "file_size_bytes": 218004 + } + ] + } + ] + }, + { + "_id": { + "$oid": "649b009773e8249959349b58" + }, + "id": "nmdc:omprc-11-vnnn4722", + "name": "Riverbed sediment microbial communities from areas with no vegetation in Columbia River, Washington, USA - GW-RW N1_30_40", + "description": "Riverbed sediment samples were collected from an area with no vegetation in a hyporheic zone (subsurface groundwater - surface water mixing zone)", + "has_input": [ + "nmdc:bsm-11-tzp60785" + ], + "has_output": [ + "jgi:574fde667ded5e3df1ee1407" + ], + "part_of": [ + "nmdc:sty-11-aygzgv51" + ], + "add_date": "2016-01-11", + "mod_date": "2021-06-15", + "ncbi_project_name": "Riverbed sediment microbial communities from areas with no vegetation in Columbia River, Washington, USA - GW-RW N1_30_40", + "omics_type": { + "has_raw_value": "Metagenome" + }, + "principal_investigator": { + "has_raw_value": "James Stegen" + }, + "processing_institution": "JGI", + "type": "nmdc:OmicsProcessing", + "gold_sequencing_project_identifiers": [ + "gold:Gp0127639" + ], + "downstream_workflow_activity_records": [ + { + "_id": { + "$oid": "649b009d6bdd4fd20273c871" + }, + "has_input": [ + "nmdc:ae9087ed8e1ead2407bca45a47725633" + ], + "part_of": [ + "nmdc:mga09wpw60" + ], + "git_url": "https://github.com/microbiomedata/metaG/releases/tag/0.1", + "has_output": [ + "nmdc:833077b40372c6daa20beaed04ed0ae1", + "nmdc:b68178eebde030fad0850797adbb2624" + ], + "was_informed_by": "gold:Gp0127639", + "input_read_count": 23535784, + "output_read_bases": 2989527376, + "id": "nmdc:b8fea91a695311b43660f2e7044ad15c", + "execution_resource": "NERSC-Cori", + "input_read_bases": 3553903384, + "name": "Read QC Activity for nmdc:mga09wpw60", + "output_read_count": 20011156, + "started_at_time": "2021-10-11T02:27:08Z", + "type": "nmdc:ReadQCAnalysisActivity", + "ended_at_time": "2021-10-11T03:27:12+00:00", + "output_data_objects": [ + { + "name": "Gp0127639_Filtered Reads", + "description": "Filtered Reads for Gp0127639", + "data_object_type": "Filtered Sequencing Reads", + "url": "https://data.microbiomedata.org/data/nmdc:mga09wpw60/qa/nmdc_mga09wpw60_filtered.fastq.gz", + "md5_checksum": "833077b40372c6daa20beaed04ed0ae1", + "id": "nmdc:833077b40372c6daa20beaed04ed0ae1", + "file_size_bytes": 1585232805 + }, + { + "name": "Gp0127639_Filtered Stats", + "description": "Filtered Stats for Gp0127639", + "data_object_type": "QC Statistics", + "url": "https://data.microbiomedata.org/data/nmdc:mga09wpw60/qa/nmdc_mga09wpw60_filterStats.txt", + "md5_checksum": "b68178eebde030fad0850797adbb2624", + "id": "nmdc:b68178eebde030fad0850797adbb2624", + "file_size_bytes": 289 + } + ] + }, + { + "_id": { + "$oid": "649b009bff710ae353f8cf46" + }, + "has_input": [ + "nmdc:833077b40372c6daa20beaed04ed0ae1" + ], + "git_url": "https://github.com/microbiomedata/metaG/releases/tag/0.1", + "has_output": [ + "nmdc:514172bb91ef3b125ae2d001b47bff0b", + "nmdc:82f072d1931154fbc722531d3d0dc41c", + "nmdc:62a817ebcbfaf2c8feb1abedc35a736f", + "nmdc:81281fef2c0778516a84b3a672cc0230", + "nmdc:86ae054ba9def1126579c8f76db8a07a", + "nmdc:9db20a88fa3d02eb00f64d1671ef8521", + "nmdc:848fc10ed4365047cb139a4b40303808", + "nmdc:94e422e0bae86c608fba1c3815e08e92", + "nmdc:c6eb85143a2489921c53f8184d536129" + ], + "was_informed_by": "gold:Gp0127639", + "id": "nmdc:b8fea91a695311b43660f2e7044ad15c", + "execution_resource": "NERSC-Cori", + "name": "ReadBased Analysis Activity for nmdc:mga09wpw60", + "started_at_time": "2021-10-11T02:27:08Z", + "type": "nmdc:ReadbasedAnalysis", + "ended_at_time": "2021-10-11T03:27:12+00:00", + "output_data_objects": [ + { + "name": "Gp0127639_Gottcha2 TSV report", + "description": "Gottcha2 TSV report for Gp0127639", + "url": "https://data.microbiomedata.org/data/nmdc:mga09wpw60/ReadbasedAnalysis/nmdc_mga09wpw60_gottcha2_report.tsv", + "md5_checksum": "514172bb91ef3b125ae2d001b47bff0b", + "id": "nmdc:514172bb91ef3b125ae2d001b47bff0b", + "file_size_bytes": 648 + }, + { + "name": "Gp0127639_Gottcha2 full TSV report", + "description": "Gottcha2 full TSV report for Gp0127639", + "url": "https://data.microbiomedata.org/data/nmdc:mga09wpw60/ReadbasedAnalysis/nmdc_mga09wpw60_gottcha2_report_full.tsv", + "md5_checksum": "82f072d1931154fbc722531d3d0dc41c", + "id": "nmdc:82f072d1931154fbc722531d3d0dc41c", + "file_size_bytes": 588644 + }, + { + "name": "Gp0127639_Gottcha2 Krona HTML report", + "description": "Gottcha2 Krona HTML report for Gp0127639", + "data_object_type": "GOTTCHA2 Krona Plot", + "url": "https://data.microbiomedata.org/data/nmdc:mga09wpw60/ReadbasedAnalysis/nmdc_mga09wpw60_gottcha2_krona.html", + "md5_checksum": "62a817ebcbfaf2c8feb1abedc35a736f", + "id": "nmdc:62a817ebcbfaf2c8feb1abedc35a736f", + "file_size_bytes": 228175 + }, + { + "name": "Gp0127639_Centrifuge classification TSV report", + "description": "Centrifuge classification TSV report for Gp0127639", + "data_object_type": "Centrifuge Taxonomic Classification", + "url": "https://data.microbiomedata.org/data/nmdc:mga09wpw60/ReadbasedAnalysis/nmdc_mga09wpw60_centrifuge_classification.tsv", + "md5_checksum": "81281fef2c0778516a84b3a672cc0230", + "id": "nmdc:81281fef2c0778516a84b3a672cc0230", + "file_size_bytes": 1468498728 + }, + { + "name": "Gp0127639_Centrifuge TSV report", + "description": "Centrifuge TSV report for Gp0127639", + "data_object_type": "Centrifuge Classification Report", + "url": "https://data.microbiomedata.org/data/nmdc:mga09wpw60/ReadbasedAnalysis/nmdc_mga09wpw60_centrifuge_report.tsv", + "md5_checksum": "86ae054ba9def1126579c8f76db8a07a", + "id": "nmdc:86ae054ba9def1126579c8f76db8a07a", + "file_size_bytes": 251338 + }, + { + "name": "Gp0127639_Centrifuge Krona HTML report", + "description": "Centrifuge Krona HTML report for Gp0127639", + "data_object_type": "Centrifuge Krona Plot", + "url": "https://data.microbiomedata.org/data/nmdc:mga09wpw60/ReadbasedAnalysis/nmdc_mga09wpw60_centrifuge_krona.html", + "md5_checksum": "9db20a88fa3d02eb00f64d1671ef8521", + "id": "nmdc:9db20a88fa3d02eb00f64d1671ef8521", + "file_size_bytes": 2322720 + }, + { + "name": "Gp0127639_Kraken classification TSV report", + "description": "Kraken classification TSV report for Gp0127639", + "data_object_type": "Kraken2 Taxonomic Classification", + "url": "https://data.microbiomedata.org/data/nmdc:mga09wpw60/ReadbasedAnalysis/nmdc_mga09wpw60_kraken2_classification.tsv", + "md5_checksum": "848fc10ed4365047cb139a4b40303808", + "id": "nmdc:848fc10ed4365047cb139a4b40303808", + "file_size_bytes": 1168015909 + }, + { + "name": "Gp0127639_Kraken2 TSV report", + "description": "Kraken2 TSV report for Gp0127639", + "data_object_type": "Kraken2 Classification Report", + "url": "https://data.microbiomedata.org/data/nmdc:mga09wpw60/ReadbasedAnalysis/nmdc_mga09wpw60_kraken2_report.tsv", + "md5_checksum": "94e422e0bae86c608fba1c3815e08e92", + "id": "nmdc:94e422e0bae86c608fba1c3815e08e92", + "file_size_bytes": 616202 + }, + { + "name": "Gp0127639_Kraken2 Krona HTML report", + "description": "Kraken2 Krona HTML report for Gp0127639", + "data_object_type": "Kraken2 Krona Plot", + "url": "https://data.microbiomedata.org/data/nmdc:mga09wpw60/ReadbasedAnalysis/nmdc_mga09wpw60_kraken2_krona.html", + "md5_checksum": "c6eb85143a2489921c53f8184d536129", + "id": "nmdc:c6eb85143a2489921c53f8184d536129", + "file_size_bytes": 3863456 + } + ] + }, + { + "_id": { + "$oid": "61e7199a833bcf838a700d65" + }, + "has_input": [ + "nmdc:833077b40372c6daa20beaed04ed0ae1" + ], + "part_of": [ + "nmdc:mga09wpw60" + ], + "git_url": "https://github.com/microbiomedata/metaG/releases/tag/0.1", + "has_output": [ + "nmdc:514172bb91ef3b125ae2d001b47bff0b", + "nmdc:82f072d1931154fbc722531d3d0dc41c", + "nmdc:62a817ebcbfaf2c8feb1abedc35a736f", + "nmdc:81281fef2c0778516a84b3a672cc0230", + "nmdc:86ae054ba9def1126579c8f76db8a07a", + "nmdc:9db20a88fa3d02eb00f64d1671ef8521", + "nmdc:848fc10ed4365047cb139a4b40303808", + "nmdc:94e422e0bae86c608fba1c3815e08e92", + "nmdc:c6eb85143a2489921c53f8184d536129" + ], + "was_informed_by": "gold:Gp0127639", + "id": "nmdc:b8fea91a695311b43660f2e7044ad15c", + "execution_resource": "NERSC-Cori", + "name": "ReadBased Analysis Activity for nmdc:mga09wpw60", + "started_at_time": "2021-10-11T02:27:08Z", + "type": "nmdc:ReadbasedAnalysis", + "ended_at_time": "2021-10-11T03:27:12+00:00", + "output_data_objects": [ + { + "name": "Gp0127639_Gottcha2 TSV report", + "description": "Gottcha2 TSV report for Gp0127639", + "url": "https://data.microbiomedata.org/data/nmdc:mga09wpw60/ReadbasedAnalysis/nmdc_mga09wpw60_gottcha2_report.tsv", + "md5_checksum": "514172bb91ef3b125ae2d001b47bff0b", + "id": "nmdc:514172bb91ef3b125ae2d001b47bff0b", + "file_size_bytes": 648 + }, + { + "name": "Gp0127639_Gottcha2 full TSV report", + "description": "Gottcha2 full TSV report for Gp0127639", + "url": "https://data.microbiomedata.org/data/nmdc:mga09wpw60/ReadbasedAnalysis/nmdc_mga09wpw60_gottcha2_report_full.tsv", + "md5_checksum": "82f072d1931154fbc722531d3d0dc41c", + "id": "nmdc:82f072d1931154fbc722531d3d0dc41c", + "file_size_bytes": 588644 + }, + { + "name": "Gp0127639_Gottcha2 Krona HTML report", + "description": "Gottcha2 Krona HTML report for Gp0127639", + "data_object_type": "GOTTCHA2 Krona Plot", + "url": "https://data.microbiomedata.org/data/nmdc:mga09wpw60/ReadbasedAnalysis/nmdc_mga09wpw60_gottcha2_krona.html", + "md5_checksum": "62a817ebcbfaf2c8feb1abedc35a736f", + "id": "nmdc:62a817ebcbfaf2c8feb1abedc35a736f", + "file_size_bytes": 228175 + }, + { + "name": "Gp0127639_Centrifuge classification TSV report", + "description": "Centrifuge classification TSV report for Gp0127639", + "data_object_type": "Centrifuge Taxonomic Classification", + "url": "https://data.microbiomedata.org/data/nmdc:mga09wpw60/ReadbasedAnalysis/nmdc_mga09wpw60_centrifuge_classification.tsv", + "md5_checksum": "81281fef2c0778516a84b3a672cc0230", + "id": "nmdc:81281fef2c0778516a84b3a672cc0230", + "file_size_bytes": 1468498728 + }, + { + "name": "Gp0127639_Centrifuge TSV report", + "description": "Centrifuge TSV report for Gp0127639", + "data_object_type": "Centrifuge Classification Report", + "url": "https://data.microbiomedata.org/data/nmdc:mga09wpw60/ReadbasedAnalysis/nmdc_mga09wpw60_centrifuge_report.tsv", + "md5_checksum": "86ae054ba9def1126579c8f76db8a07a", + "id": "nmdc:86ae054ba9def1126579c8f76db8a07a", + "file_size_bytes": 251338 + }, + { + "name": "Gp0127639_Centrifuge Krona HTML report", + "description": "Centrifuge Krona HTML report for Gp0127639", + "data_object_type": "Centrifuge Krona Plot", + "url": "https://data.microbiomedata.org/data/nmdc:mga09wpw60/ReadbasedAnalysis/nmdc_mga09wpw60_centrifuge_krona.html", + "md5_checksum": "9db20a88fa3d02eb00f64d1671ef8521", + "id": "nmdc:9db20a88fa3d02eb00f64d1671ef8521", + "file_size_bytes": 2322720 + }, + { + "name": "Gp0127639_Kraken classification TSV report", + "description": "Kraken classification TSV report for Gp0127639", + "data_object_type": "Kraken2 Taxonomic Classification", + "url": "https://data.microbiomedata.org/data/nmdc:mga09wpw60/ReadbasedAnalysis/nmdc_mga09wpw60_kraken2_classification.tsv", + "md5_checksum": "848fc10ed4365047cb139a4b40303808", + "id": "nmdc:848fc10ed4365047cb139a4b40303808", + "file_size_bytes": 1168015909 + }, + { + "name": "Gp0127639_Kraken2 TSV report", + "description": "Kraken2 TSV report for Gp0127639", + "data_object_type": "Kraken2 Classification Report", + "url": "https://data.microbiomedata.org/data/nmdc:mga09wpw60/ReadbasedAnalysis/nmdc_mga09wpw60_kraken2_report.tsv", + "md5_checksum": "94e422e0bae86c608fba1c3815e08e92", + "id": "nmdc:94e422e0bae86c608fba1c3815e08e92", + "file_size_bytes": 616202 + }, + { + "name": "Gp0127639_Kraken2 Krona HTML report", + "description": "Kraken2 Krona HTML report for Gp0127639", + "data_object_type": "Kraken2 Krona Plot", + "url": "https://data.microbiomedata.org/data/nmdc:mga09wpw60/ReadbasedAnalysis/nmdc_mga09wpw60_kraken2_krona.html", + "md5_checksum": "c6eb85143a2489921c53f8184d536129", + "id": "nmdc:c6eb85143a2489921c53f8184d536129", + "file_size_bytes": 3863456 + } + ] + }, + { + "_id": { + "$oid": "649b005f2ca5ee4adb139fa3" + }, + "has_input": [ + "nmdc:833077b40372c6daa20beaed04ed0ae1" + ], + "part_of": [ + "nmdc:mga09wpw60" + ], + "ctg_logsum": 317684, + "scaf_logsum": 318786, + "gap_pct": 0.0017, + "git_url": "https://github.com/microbiomedata/metaG/releases/tag/0.1", + "has_output": [ + "nmdc:2b73310c6eef1ece5bb01f235b22fdbd", + "nmdc:8f14c016997dd96f70f547df930717be", + "nmdc:5966e5e32744a14549b19b4c92a606a5", + "nmdc:1fcd489b3ae86a76bf297cc19b50392d", + "nmdc:5b90d13539ce840980db101fa7c1df96" + ], + "asm_score": 3.397, + "was_informed_by": "gold:Gp0127639", + "ctg_powsum": 34356, + "scaf_max": 19860, + "id": "nmdc:b8fea91a695311b43660f2e7044ad15c", + "scaf_powsum": 34485, + "execution_resource": "NERSC-Cori", + "contigs": 212560, + "name": "Assembly Activity for nmdc:mga09wpw60", + "ctg_max": 19860, + "gc_std": 0.09375, + "contig_bp": 112053293, + "gc_avg": 0.63186, + "started_at_time": "2021-10-11T02:27:08Z", + "scaf_bp": 112055193, + "type": "nmdc:MetagenomeAssembly", + "scaffolds": 212379, + "ended_at_time": "2021-10-11T03:27:12+00:00", + "ctg_l50": 538, + "ctg_l90": 298, + "ctg_n50": 55584, + "ctg_n90": 173977, + "scaf_l50": 539, + "scaf_l90": 298, + "scaf_n50": 55395, + "scaf_n90": 173826, + "output_data_objects": [ + { + "name": "Gp0127639_Assembled contigs fasta", + "description": "Assembled contigs fasta for Gp0127639", + "data_object_type": "Assembly Contigs", + "url": "https://data.microbiomedata.org/data/nmdc:mga09wpw60/assembly/nmdc_mga09wpw60_contigs.fna", + "md5_checksum": "2b73310c6eef1ece5bb01f235b22fdbd", + "id": "nmdc:2b73310c6eef1ece5bb01f235b22fdbd", + "file_size_bytes": 120497476 + }, + { + "name": "Gp0127639_Assembled scaffold fasta", + "description": "Assembled scaffold fasta for Gp0127639", + "data_object_type": "Assembly Scaffolds", + "url": "https://data.microbiomedata.org/data/nmdc:mga09wpw60/assembly/nmdc_mga09wpw60_scaffolds.fna", + "md5_checksum": "8f14c016997dd96f70f547df930717be", + "id": "nmdc:8f14c016997dd96f70f547df930717be", + "file_size_bytes": 119857107 + }, + { + "name": "Gp0127639_Metagenome Contig Coverage Stats", + "description": "Metagenome Contig Coverage Stats for Gp0127639", + "url": "https://data.microbiomedata.org/data/nmdc:mga09wpw60/assembly/nmdc_mga09wpw60_covstats.txt", + "md5_checksum": "5966e5e32744a14549b19b4c92a606a5", + "id": "nmdc:5966e5e32744a14549b19b4c92a606a5", + "file_size_bytes": 16872665 + }, + { + "name": "Gp0127639_Assembled AGP file", + "description": "Assembled AGP file for Gp0127639", + "data_object_type": "Assembly AGP", + "url": "https://data.microbiomedata.org/data/nmdc:mga09wpw60/assembly/nmdc_mga09wpw60_assembly.agp", + "md5_checksum": "1fcd489b3ae86a76bf297cc19b50392d", + "id": "nmdc:1fcd489b3ae86a76bf297cc19b50392d", + "file_size_bytes": 15768901 + }, + { + "name": "Gp0127639_Metagenome Alignment BAM file", + "description": "Metagenome Alignment BAM file for Gp0127639", + "data_object_type": "Assembly Coverage BAM", + "url": "https://data.microbiomedata.org/data/nmdc:mga09wpw60/assembly/nmdc_mga09wpw60_pairedMapped_sorted.bam", + "md5_checksum": "5b90d13539ce840980db101fa7c1df96", + "id": "nmdc:5b90d13539ce840980db101fa7c1df96", + "file_size_bytes": 1779135536 + } + ] + }, + { + "_id": { + "$oid": "649b005bbf2caae0415ef9b7" + }, + "has_input": [ + "nmdc:2b73310c6eef1ece5bb01f235b22fdbd" + ], + "part_of": [ + "nmdc:mga09wpw60" + ], + "git_url": "https://github.com/microbiomedata/metaG/releases/tag/0.1", + "has_output": [ + "nmdc:6c09d55cfb8872b30eb1832394f80beb", + "nmdc:2e3cc72d21590667259f6356882ce63b", + "nmdc:2dee5eaa50c8eeb6e3bc8471501d9964", + "nmdc:7ec4cfdd88352d703a2bb64b99bd56c5", + "nmdc:16bedd944e5e836924c28b006026c348", + "nmdc:8764070f565c50998968e0739420f5cc", + "nmdc:9e6accc90d61ea572819dcdb591e41a7", + "nmdc:32b9518ee41cadb157f3c0f9ec91476c", + "nmdc:432d591bd525ae429e837431d44954f7", + "nmdc:3120d5d5d27d142f898f70a8cc1b076e", + "nmdc:d37ff61fdae942030a1b07e855cf1abd", + "nmdc:56995366ba4186639a8ff4fd4defbd5e" + ], + "was_informed_by": "gold:Gp0127639", + "id": "nmdc:b8fea91a695311b43660f2e7044ad15c", + "execution_resource": "NERSC-Cori", + "name": "Annotation Activity for nmdc:mga09wpw60", + "started_at_time": "2021-10-11T02:27:08Z", + "type": "nmdc:MetagenomeAnnotationActivity", + "ended_at_time": "2021-10-11T03:27:12+00:00", + "output_data_objects": [ + { + "name": "Gp0127639_Protein FAA", + "description": "Protein FAA for Gp0127639", + "data_object_type": "Annotation Amino Acid FASTA", + "url": "https://data.microbiomedata.org/data/nmdc:mga09wpw60/annotation/nmdc_mga09wpw60_proteins.faa", + "md5_checksum": "6c09d55cfb8872b30eb1832394f80beb", + "id": "nmdc:6c09d55cfb8872b30eb1832394f80beb", + "file_size_bytes": 67573912 + }, + { + "name": "Gp0127639_Structural annotation GFF file", + "description": "Structural annotation GFF file for Gp0127639", + "data_object_type": "Structural Annotation GFF", + "url": "https://data.microbiomedata.org/data/nmdc:mga09wpw60/annotation/nmdc_mga09wpw60_structural_annotation.gff", + "md5_checksum": "2e3cc72d21590667259f6356882ce63b", + "id": "nmdc:2e3cc72d21590667259f6356882ce63b", + "file_size_bytes": 2526 + }, + { + "name": "Gp0127639_Functional annotation GFF file", + "description": "Functional annotation GFF file for Gp0127639", + "data_object_type": "Functional Annotation GFF", + "url": "https://data.microbiomedata.org/data/nmdc:mga09wpw60/annotation/nmdc_mga09wpw60_functional_annotation.gff", + "md5_checksum": "2dee5eaa50c8eeb6e3bc8471501d9964", + "id": "nmdc:2dee5eaa50c8eeb6e3bc8471501d9964", + "file_size_bytes": 75196016 + }, + { + "name": "Gp0127639_KO TSV file", + "description": "KO TSV file for Gp0127639", + "data_object_type": "Annotation KEGG Orthology", + "url": "https://data.microbiomedata.org/data/nmdc:mga09wpw60/annotation/nmdc_mga09wpw60_ko.tsv", + "md5_checksum": "7ec4cfdd88352d703a2bb64b99bd56c5", + "id": "nmdc:7ec4cfdd88352d703a2bb64b99bd56c5", + "file_size_bytes": 8707597 + }, + { + "name": "Gp0127639_EC TSV file", + "description": "EC TSV file for Gp0127639", + "data_object_type": "Annotation Enzyme Commission", + "url": "https://data.microbiomedata.org/data/nmdc:mga09wpw60/annotation/nmdc_mga09wpw60_ec.tsv", + "md5_checksum": "16bedd944e5e836924c28b006026c348", + "id": "nmdc:16bedd944e5e836924c28b006026c348", + "file_size_bytes": 5769544 + }, + { + "name": "Gp0127639_COG GFF file", + "description": "COG GFF file for Gp0127639", + "url": "https://data.microbiomedata.org/data/nmdc:mga09wpw60/annotation/nmdc_mga09wpw60_cog.gff", + "md5_checksum": "8764070f565c50998968e0739420f5cc", + "id": "nmdc:8764070f565c50998968e0739420f5cc", + "file_size_bytes": 45648468 + }, + { + "name": "Gp0127639_PFAM GFF file", + "description": "PFAM GFF file for Gp0127639", + "url": "https://data.microbiomedata.org/data/nmdc:mga09wpw60/annotation/nmdc_mga09wpw60_pfam.gff", + "md5_checksum": "9e6accc90d61ea572819dcdb591e41a7", + "id": "nmdc:9e6accc90d61ea572819dcdb591e41a7", + "file_size_bytes": 34995151 + }, + { + "name": "Gp0127639_TigrFam GFF file", + "description": "TigrFam GFF file for Gp0127639", + "url": "https://data.microbiomedata.org/data/nmdc:mga09wpw60/annotation/nmdc_mga09wpw60_tigrfam.gff", + "md5_checksum": "32b9518ee41cadb157f3c0f9ec91476c", + "id": "nmdc:32b9518ee41cadb157f3c0f9ec91476c", + "file_size_bytes": 4060116 + }, + { + "name": "Gp0127639_SMART GFF file", + "description": "SMART GFF file for Gp0127639", + "url": "https://data.microbiomedata.org/data/nmdc:mga09wpw60/annotation/nmdc_mga09wpw60_smart.gff", + "md5_checksum": "432d591bd525ae429e837431d44954f7", + "id": "nmdc:432d591bd525ae429e837431d44954f7", + "file_size_bytes": 10056742 + }, + { + "name": "Gp0127639_SuperFam GFF file", + "description": "SuperFam GFF file for Gp0127639", + "url": "https://data.microbiomedata.org/data/nmdc:mga09wpw60/annotation/nmdc_mga09wpw60_supfam.gff", + "md5_checksum": "3120d5d5d27d142f898f70a8cc1b076e", + "id": "nmdc:3120d5d5d27d142f898f70a8cc1b076e", + "file_size_bytes": 56435804 + }, + { + "name": "Gp0127639_Cath FunFam GFF file", + "description": "Cath FunFam GFF file for Gp0127639", + "url": "https://data.microbiomedata.org/data/nmdc:mga09wpw60/annotation/nmdc_mga09wpw60_cath_funfam.gff", + "md5_checksum": "d37ff61fdae942030a1b07e855cf1abd", + "id": "nmdc:d37ff61fdae942030a1b07e855cf1abd", + "file_size_bytes": 43456195 + }, + { + "name": "Gp0127639_KO_EC GFF file", + "description": "KO_EC GFF file for Gp0127639", + "url": "https://data.microbiomedata.org/data/nmdc:mga09wpw60/annotation/nmdc_mga09wpw60_ko_ec.gff", + "md5_checksum": "56995366ba4186639a8ff4fd4defbd5e", + "id": "nmdc:56995366ba4186639a8ff4fd4defbd5e", + "file_size_bytes": 27657123 + } + ] + }, + { + "_id": { + "$oid": "649b0052ec087f6bbab3471b" + }, + "has_input": [ + "nmdc:2b73310c6eef1ece5bb01f235b22fdbd", + "nmdc:5b90d13539ce840980db101fa7c1df96", + "nmdc:2dee5eaa50c8eeb6e3bc8471501d9964" + ], + "too_short_contig_num": 194918, + "part_of": [ + "nmdc:mga09wpw60" + ], + "binned_contig_num": 732, + "git_url": "https://github.com/microbiomedata/metaG/releases/tag/0.1", + "has_output": [ + "nmdc:d41d8cd98f00b204e9800998ecf8427e", + "nmdc:820dbad1b0ddd3c728e77aceee09ea28", + "nmdc:24fbfc69ded61dffff95ba2f8475239c", + "nmdc:1837710887027f94b0f25208edb35cbe", + "nmdc:7072cfd6665082a95b2c09a4bc88760c", + "nmdc:b0db190d9d1093ef87a5efb8a600e9ef" + ], + "was_informed_by": "gold:Gp0127639", + "input_contig_num": 212559, + "id": "nmdc:b8fea91a695311b43660f2e7044ad15c", + "execution_resource": "NERSC-Cori", + "name": "MAGs Analysis Activity for nmdc:mga09wpw60", + "mags_list": [ + { + "number_of_contig": 85, + "completeness": 18.1, + "bin_name": "bins.1", + "gene_count": 437, + "bin_quality": "LQ", + "gtdbtk_species": "", + "gtdbtk_order": "", + "num_16s": 0, + "gtdbtk_family": "", + "gtdbtk_domain": "", + "contamination": 0.0, + "gtdbtk_class": "", + "gtdbtk_phylum": "", + "num_5s": 0, + "num_23s": 0, + "gtdbtk_genus": "", + "num_t_rna": 8 + }, + { + "number_of_contig": 59, + "completeness": 15.92, + "bin_name": "bins.2", + "gene_count": 343, + "bin_quality": "LQ", + "gtdbtk_species": "", + "gtdbtk_order": "", + "num_16s": 0, + "gtdbtk_family": "", + "gtdbtk_domain": "", + "contamination": 0.84, + "gtdbtk_class": "", + "gtdbtk_phylum": "", + "num_5s": 0, + "num_23s": 0, + "gtdbtk_genus": "", + "num_t_rna": 8 + }, + { + "number_of_contig": 258, + "completeness": 21.26, + "bin_name": "bins.3", + "gene_count": 1440, + "bin_quality": "LQ", + "gtdbtk_species": "", + "gtdbtk_order": "", + "num_16s": 0, + "gtdbtk_family": "", + "gtdbtk_domain": "", + "contamination": 0.0, + "gtdbtk_class": "", + "gtdbtk_phylum": "", + "num_5s": 0, + "num_23s": 0, + "gtdbtk_genus": "", + "num_t_rna": 19 + }, + { + "number_of_contig": 101, + "completeness": 29.13, + "bin_name": "bins.4", + "gene_count": 560, + "bin_quality": "LQ", + "gtdbtk_species": "", + "gtdbtk_order": "", + "num_16s": 0, + "gtdbtk_family": "", + "gtdbtk_domain": "", + "contamination": 0.97, + "gtdbtk_class": "", + "gtdbtk_phylum": "", + "num_5s": 0, + "num_23s": 0, + "gtdbtk_genus": "", + "num_t_rna": 6 + }, + { + "number_of_contig": 116, + "completeness": 1.53, + "bin_name": "bins.5", + "gene_count": 763, + "bin_quality": "LQ", + "gtdbtk_species": "", + "gtdbtk_order": "", + "num_16s": 0, + "gtdbtk_family": "", + "gtdbtk_domain": "", + "contamination": 0.0, + "gtdbtk_class": "", + "gtdbtk_phylum": "", + "num_5s": 0, + "num_23s": 0, + "gtdbtk_genus": "", + "num_t_rna": 10 + }, + { + "number_of_contig": 113, + "completeness": 9.72, + "bin_name": "bins.6", + "gene_count": 531, + "bin_quality": "LQ", + "gtdbtk_species": "", + "gtdbtk_order": "", + "num_16s": 0, + "gtdbtk_family": "", + "gtdbtk_domain": "", + "contamination": 0.0, + "gtdbtk_class": "", + "gtdbtk_phylum": "", + "num_5s": 0, + "num_23s": 0, + "gtdbtk_genus": "", + "num_t_rna": 4 + } + ], + "unbinned_contig_num": 16909, + "started_at_time": "2021-10-11T02:27:08Z", + "type": "nmdc:MAGsAnalysisActivity", + "ended_at_time": "2021-10-11T03:27:12+00:00", + "output_data_objects": [ + { + "name": "gold:Gp0452679_metabat2 bin checkm quality assessment result", + "description": "metabat2 bin checkm quality assessment result for gold:Gp0452679", + "data_object_type": "CheckM Statistics", + "url": "https://data.microbiomedata.org/data/nmdc:mga0fsjy84/MAGs/nmdc_mga0fsjy84_checkm_qa.out", + "md5_checksum": "d41d8cd98f00b204e9800998ecf8427e", + "id": "nmdc:d41d8cd98f00b204e9800998ecf8427e", + "file_size_bytes": 0 + }, + { + "name": "Gp0127639_tooShort (< 3kb) filtered contigs fasta file by metaBat2", + "description": "tooShort (< 3kb) filtered contigs fasta file by metaBat2 for Gp0127639", + "url": "https://data.microbiomedata.org/data/nmdc:mga09wpw60/MAGs/nmdc_mga09wpw60_bins.tooShort.fa", + "md5_checksum": "820dbad1b0ddd3c728e77aceee09ea28", + "id": "nmdc:820dbad1b0ddd3c728e77aceee09ea28", + "file_size_bytes": 90173016 + }, + { + "name": "Gp0127639_unbinned fasta file from metabat2", + "description": "unbinned fasta file from metabat2 for Gp0127639", + "url": "https://data.microbiomedata.org/data/nmdc:mga09wpw60/MAGs/nmdc_mga09wpw60_bins.unbinned.fa", + "md5_checksum": "24fbfc69ded61dffff95ba2f8475239c", + "id": "nmdc:24fbfc69ded61dffff95ba2f8475239c", + "file_size_bytes": 27021291 + }, + { + "name": "Gp0127639_metabat2 bin checkm quality assessment result", + "description": "metabat2 bin checkm quality assessment result for Gp0127639", + "data_object_type": "CheckM Statistics", + "url": "https://data.microbiomedata.org/data/nmdc:mga09wpw60/MAGs/nmdc_mga09wpw60_checkm_qa.out", + "md5_checksum": "1837710887027f94b0f25208edb35cbe", + "id": "nmdc:1837710887027f94b0f25208edb35cbe", + "file_size_bytes": 1570 + }, + { + "name": "Gp0127639_high-quality and medium-quality bins", + "description": "high-quality and medium-quality bins for Gp0127639", + "data_object_type": "Metagenome Bins", + "url": "https://data.microbiomedata.org/data/nmdc:mga09wpw60/MAGs/nmdc_mga09wpw60_hqmq_bin.zip", + "md5_checksum": "7072cfd6665082a95b2c09a4bc88760c", + "id": "nmdc:7072cfd6665082a95b2c09a4bc88760c", + "file_size_bytes": 182 + }, + { + "name": "Gp0127639_metabat2 bins", + "description": "metabat2 bins for Gp0127639", + "url": "https://data.microbiomedata.org/data/nmdc:mga09wpw60/MAGs/nmdc_mga09wpw60_metabat_bin.zip", + "md5_checksum": "b0db190d9d1093ef87a5efb8a600e9ef", + "id": "nmdc:b0db190d9d1093ef87a5efb8a600e9ef", + "file_size_bytes": 1000457 + } + ] + } + ] + }, + { + "_id": { + "$oid": "649b009773e8249959349b59" + }, + "id": "nmdc:omprc-11-p21wp875", + "name": "Riverbed sediment microbial communities from areas with no vegetation in Columbia River, Washington, USA - GW-RW N2_0_30", + "description": "Riverbed sediment samples were collected from an area with no vegetation in a hyporheic zone (subsurface groundwater - surface water mixing zone)", + "has_input": [ + "nmdc:bsm-11-qpve9v25" + ], + "has_output": [ + "jgi:574fe0a67ded5e3df1ee148d" + ], + "part_of": [ + "nmdc:sty-11-aygzgv51" + ], + "add_date": "2016-01-11", + "mod_date": "2021-06-15", + "ncbi_project_name": "Riverbed sediment microbial communities from areas with no vegetation in Columbia River, Washington, USA - GW-RW N2_0_30", + "omics_type": { + "has_raw_value": "Metagenome" + }, + "principal_investigator": { + "has_raw_value": "James Stegen" + }, + "processing_institution": "JGI", + "type": "nmdc:OmicsProcessing", + "gold_sequencing_project_identifiers": [ + "gold:Gp0127642" + ], + "downstream_workflow_activity_records": [ + { + "_id": { + "$oid": "649b009d6bdd4fd20273c874" + }, + "has_input": [ + "nmdc:ac3a54ab71fd4e15763cd3e01c7a91bf" + ], + "part_of": [ + "nmdc:mga0cvxk30" + ], + "git_url": "https://github.com/microbiomedata/metaG/releases/tag/0.1", + "has_output": [ + "nmdc:603166d1e0da357d356a2029215d76ea", + "nmdc:639d9630c859c9b2f6f7a2eff1e1a863" + ], + "was_informed_by": "gold:Gp0127642", + "input_read_count": 28024960, + "output_read_bases": 4095196321, + "id": "nmdc:2de70a234a7c626b9f512b8aa3b73717", + "execution_resource": "NERSC-Cori", + "input_read_bases": 4231768960, + "name": "Read QC Activity for nmdc:mga0cvxk30", + "output_read_count": 27378404, + "started_at_time": "2021-12-01T21:30:33Z", + "type": "nmdc:ReadQCAnalysisActivity", + "ended_at_time": "2021-12-02T20:50:24+00:00", + "output_data_objects": [ + { + "name": "Gp0127642_Filtered Reads", + "description": "Filtered Reads for Gp0127642", + "data_object_type": "Filtered Sequencing Reads", + "url": "https://data.microbiomedata.org/data/nmdc:mga0cvxk30/qa/nmdc_mga0cvxk30_filtered.fastq.gz", + "md5_checksum": "603166d1e0da357d356a2029215d76ea", + "id": "nmdc:603166d1e0da357d356a2029215d76ea", + "file_size_bytes": 2304174057 + }, + { + "name": "Gp0127642_Filtered Stats", + "description": "Filtered Stats for Gp0127642", + "data_object_type": "QC Statistics", + "url": "https://data.microbiomedata.org/data/nmdc:mga0cvxk30/qa/nmdc_mga0cvxk30_filterStats.txt", + "md5_checksum": "639d9630c859c9b2f6f7a2eff1e1a863", + "id": "nmdc:639d9630c859c9b2f6f7a2eff1e1a863", + "file_size_bytes": 284 + } + ] + }, + { + "_id": { + "$oid": "649b009bff710ae353f8cf3b" + }, + "has_input": [ + "nmdc:603166d1e0da357d356a2029215d76ea" + ], + "git_url": "https://github.com/microbiomedata/metaG/releases/tag/0.1", + "has_output": [ + "nmdc:bc7f6a9435c3a9aaca7ce9efe9d16e41", + "nmdc:0a079e34648ce23b0837dff31e2be5df", + "nmdc:f19bf1723f0f0e9f2158b137d2618b08", + "nmdc:81fc62d01a53a7ab5037829a158f0b64", + "nmdc:05cc05eefdcb0d7bac19031619244a4b", + "nmdc:bb92f0d18280f32aacf482a43a841372", + "nmdc:2fddd33160498548fa73e95dfc304d1a", + "nmdc:272e3daee292c6e284026ee95b72d290", + "nmdc:bca8c2988929e7c176ec7b6609445db2" + ], + "was_informed_by": "gold:Gp0127642", + "id": "nmdc:2de70a234a7c626b9f512b8aa3b73717", + "execution_resource": "NERSC-Cori", + "name": "ReadBased Analysis Activity for nmdc:mga0cvxk30", + "started_at_time": "2021-12-01T21:30:33Z", + "type": "nmdc:ReadbasedAnalysis", + "ended_at_time": "2021-12-02T20:50:24+00:00", + "output_data_objects": [ + { + "name": "Gp0127642_Gottcha2 TSV report", + "description": "Gottcha2 TSV report for Gp0127642", + "url": "https://data.microbiomedata.org/data/nmdc:mga0cvxk30/ReadbasedAnalysis/nmdc_mga0cvxk30_gottcha2_report.tsv", + "md5_checksum": "bc7f6a9435c3a9aaca7ce9efe9d16e41", + "id": "nmdc:bc7f6a9435c3a9aaca7ce9efe9d16e41", + "file_size_bytes": 5303 + }, + { + "name": "Gp0127642_Gottcha2 full TSV report", + "description": "Gottcha2 full TSV report for Gp0127642", + "url": "https://data.microbiomedata.org/data/nmdc:mga0cvxk30/ReadbasedAnalysis/nmdc_mga0cvxk30_gottcha2_report_full.tsv", + "md5_checksum": "0a079e34648ce23b0837dff31e2be5df", + "id": "nmdc:0a079e34648ce23b0837dff31e2be5df", + "file_size_bytes": 948120 + }, + { + "name": "Gp0127642_Gottcha2 Krona HTML report", + "description": "Gottcha2 Krona HTML report for Gp0127642", + "data_object_type": "GOTTCHA2 Krona Plot", + "url": "https://data.microbiomedata.org/data/nmdc:mga0cvxk30/ReadbasedAnalysis/nmdc_mga0cvxk30_gottcha2_krona.html", + "md5_checksum": "f19bf1723f0f0e9f2158b137d2618b08", + "id": "nmdc:f19bf1723f0f0e9f2158b137d2618b08", + "file_size_bytes": 241990 + }, + { + "name": "Gp0127642_Centrifuge classification TSV report", + "description": "Centrifuge classification TSV report for Gp0127642", + "data_object_type": "Centrifuge Taxonomic Classification", + "url": "https://data.microbiomedata.org/data/nmdc:mga0cvxk30/ReadbasedAnalysis/nmdc_mga0cvxk30_centrifuge_classification.tsv", + "md5_checksum": "81fc62d01a53a7ab5037829a158f0b64", + "id": "nmdc:81fc62d01a53a7ab5037829a158f0b64", + "file_size_bytes": 2023464022 + }, + { + "name": "Gp0127642_Centrifuge TSV report", + "description": "Centrifuge TSV report for Gp0127642", + "data_object_type": "Centrifuge Classification Report", + "url": "https://data.microbiomedata.org/data/nmdc:mga0cvxk30/ReadbasedAnalysis/nmdc_mga0cvxk30_centrifuge_report.tsv", + "md5_checksum": "05cc05eefdcb0d7bac19031619244a4b", + "id": "nmdc:05cc05eefdcb0d7bac19031619244a4b", + "file_size_bytes": 257700 + }, + { + "name": "Gp0127642_Centrifuge Krona HTML report", + "description": "Centrifuge Krona HTML report for Gp0127642", + "data_object_type": "Centrifuge Krona Plot", + "url": "https://data.microbiomedata.org/data/nmdc:mga0cvxk30/ReadbasedAnalysis/nmdc_mga0cvxk30_centrifuge_krona.html", + "md5_checksum": "bb92f0d18280f32aacf482a43a841372", + "id": "nmdc:bb92f0d18280f32aacf482a43a841372", + "file_size_bytes": 2339227 + }, + { + "name": "Gp0127642_Kraken classification TSV report", + "description": "Kraken classification TSV report for Gp0127642", + "data_object_type": "Kraken2 Taxonomic Classification", + "url": "https://data.microbiomedata.org/data/nmdc:mga0cvxk30/ReadbasedAnalysis/nmdc_mga0cvxk30_kraken2_classification.tsv", + "md5_checksum": "2fddd33160498548fa73e95dfc304d1a", + "id": "nmdc:2fddd33160498548fa73e95dfc304d1a", + "file_size_bytes": 1630988221 + }, + { + "name": "Gp0127642_Kraken2 TSV report", + "description": "Kraken2 TSV report for Gp0127642", + "data_object_type": "Kraken2 Classification Report", + "url": "https://data.microbiomedata.org/data/nmdc:mga0cvxk30/ReadbasedAnalysis/nmdc_mga0cvxk30_kraken2_report.tsv", + "md5_checksum": "272e3daee292c6e284026ee95b72d290", + "id": "nmdc:272e3daee292c6e284026ee95b72d290", + "file_size_bytes": 659136 + }, + { + "name": "Gp0127642_Kraken2 Krona HTML report", + "description": "Kraken2 Krona HTML report for Gp0127642", + "data_object_type": "Kraken2 Krona Plot", + "url": "https://data.microbiomedata.org/data/nmdc:mga0cvxk30/ReadbasedAnalysis/nmdc_mga0cvxk30_kraken2_krona.html", + "md5_checksum": "bca8c2988929e7c176ec7b6609445db2", + "id": "nmdc:bca8c2988929e7c176ec7b6609445db2", + "file_size_bytes": 4013188 + } + ] + }, + { + "_id": { + "$oid": "61e7199f833bcf838a700f38" + }, + "has_input": [ + "nmdc:603166d1e0da357d356a2029215d76ea" + ], + "part_of": [ + "nmdc:mga0cvxk30" + ], + "git_url": "https://github.com/microbiomedata/metaG/releases/tag/0.1", + "has_output": [ + "nmdc:bc7f6a9435c3a9aaca7ce9efe9d16e41", + "nmdc:0a079e34648ce23b0837dff31e2be5df", + "nmdc:f19bf1723f0f0e9f2158b137d2618b08", + "nmdc:81fc62d01a53a7ab5037829a158f0b64", + "nmdc:05cc05eefdcb0d7bac19031619244a4b", + "nmdc:bb92f0d18280f32aacf482a43a841372", + "nmdc:2fddd33160498548fa73e95dfc304d1a", + "nmdc:272e3daee292c6e284026ee95b72d290", + "nmdc:bca8c2988929e7c176ec7b6609445db2" + ], + "was_informed_by": "gold:Gp0127642", + "id": "nmdc:2de70a234a7c626b9f512b8aa3b73717", + "execution_resource": "NERSC-Cori", + "name": "ReadBased Analysis Activity for nmdc:mga0cvxk30", + "started_at_time": "2021-12-01T21:30:33Z", + "type": "nmdc:ReadbasedAnalysis", + "ended_at_time": "2021-12-02T20:50:24+00:00", + "output_data_objects": [ + { + "name": "Gp0127642_Gottcha2 TSV report", + "description": "Gottcha2 TSV report for Gp0127642", + "url": "https://data.microbiomedata.org/data/nmdc:mga0cvxk30/ReadbasedAnalysis/nmdc_mga0cvxk30_gottcha2_report.tsv", + "md5_checksum": "bc7f6a9435c3a9aaca7ce9efe9d16e41", + "id": "nmdc:bc7f6a9435c3a9aaca7ce9efe9d16e41", + "file_size_bytes": 5303 + }, + { + "name": "Gp0127642_Gottcha2 full TSV report", + "description": "Gottcha2 full TSV report for Gp0127642", + "url": "https://data.microbiomedata.org/data/nmdc:mga0cvxk30/ReadbasedAnalysis/nmdc_mga0cvxk30_gottcha2_report_full.tsv", + "md5_checksum": "0a079e34648ce23b0837dff31e2be5df", + "id": "nmdc:0a079e34648ce23b0837dff31e2be5df", + "file_size_bytes": 948120 + }, + { + "name": "Gp0127642_Gottcha2 Krona HTML report", + "description": "Gottcha2 Krona HTML report for Gp0127642", + "data_object_type": "GOTTCHA2 Krona Plot", + "url": "https://data.microbiomedata.org/data/nmdc:mga0cvxk30/ReadbasedAnalysis/nmdc_mga0cvxk30_gottcha2_krona.html", + "md5_checksum": "f19bf1723f0f0e9f2158b137d2618b08", + "id": "nmdc:f19bf1723f0f0e9f2158b137d2618b08", + "file_size_bytes": 241990 + }, + { + "name": "Gp0127642_Centrifuge classification TSV report", + "description": "Centrifuge classification TSV report for Gp0127642", + "data_object_type": "Centrifuge Taxonomic Classification", + "url": "https://data.microbiomedata.org/data/nmdc:mga0cvxk30/ReadbasedAnalysis/nmdc_mga0cvxk30_centrifuge_classification.tsv", + "md5_checksum": "81fc62d01a53a7ab5037829a158f0b64", + "id": "nmdc:81fc62d01a53a7ab5037829a158f0b64", + "file_size_bytes": 2023464022 + }, + { + "name": "Gp0127642_Centrifuge TSV report", + "description": "Centrifuge TSV report for Gp0127642", + "data_object_type": "Centrifuge Classification Report", + "url": "https://data.microbiomedata.org/data/nmdc:mga0cvxk30/ReadbasedAnalysis/nmdc_mga0cvxk30_centrifuge_report.tsv", + "md5_checksum": "05cc05eefdcb0d7bac19031619244a4b", + "id": "nmdc:05cc05eefdcb0d7bac19031619244a4b", + "file_size_bytes": 257700 + }, + { + "name": "Gp0127642_Centrifuge Krona HTML report", + "description": "Centrifuge Krona HTML report for Gp0127642", + "data_object_type": "Centrifuge Krona Plot", + "url": "https://data.microbiomedata.org/data/nmdc:mga0cvxk30/ReadbasedAnalysis/nmdc_mga0cvxk30_centrifuge_krona.html", + "md5_checksum": "bb92f0d18280f32aacf482a43a841372", + "id": "nmdc:bb92f0d18280f32aacf482a43a841372", + "file_size_bytes": 2339227 + }, + { + "name": "Gp0127642_Kraken classification TSV report", + "description": "Kraken classification TSV report for Gp0127642", + "data_object_type": "Kraken2 Taxonomic Classification", + "url": "https://data.microbiomedata.org/data/nmdc:mga0cvxk30/ReadbasedAnalysis/nmdc_mga0cvxk30_kraken2_classification.tsv", + "md5_checksum": "2fddd33160498548fa73e95dfc304d1a", + "id": "nmdc:2fddd33160498548fa73e95dfc304d1a", + "file_size_bytes": 1630988221 + }, + { + "name": "Gp0127642_Kraken2 TSV report", + "description": "Kraken2 TSV report for Gp0127642", + "data_object_type": "Kraken2 Classification Report", + "url": "https://data.microbiomedata.org/data/nmdc:mga0cvxk30/ReadbasedAnalysis/nmdc_mga0cvxk30_kraken2_report.tsv", + "md5_checksum": "272e3daee292c6e284026ee95b72d290", + "id": "nmdc:272e3daee292c6e284026ee95b72d290", + "file_size_bytes": 659136 + }, + { + "name": "Gp0127642_Kraken2 Krona HTML report", + "description": "Kraken2 Krona HTML report for Gp0127642", + "data_object_type": "Kraken2 Krona Plot", + "url": "https://data.microbiomedata.org/data/nmdc:mga0cvxk30/ReadbasedAnalysis/nmdc_mga0cvxk30_kraken2_krona.html", + "md5_checksum": "bca8c2988929e7c176ec7b6609445db2", + "id": "nmdc:bca8c2988929e7c176ec7b6609445db2", + "file_size_bytes": 4013188 + } + ] + }, + { + "_id": { + "$oid": "649b005f2ca5ee4adb139fa7" + }, + "has_input": [ + "nmdc:603166d1e0da357d356a2029215d76ea" + ], + "part_of": [ + "nmdc:mga0cvxk30" + ], + "ctg_logsum": 50653, + "scaf_logsum": 50816, + "gap_pct": 0.00106, + "git_url": "https://github.com/microbiomedata/metaG/releases/tag/0.1", + "has_output": [ + "nmdc:9c2c077dd8f43350b83c1c1ba853bbbc", + "nmdc:9a3dfedede65ba1253a84264492e909c", + "nmdc:0772cb4473177c4e0046c7fd9cb65b27", + "nmdc:7d0ccfaeac8981d1300b8c17abed052b", + "nmdc:a5b5801b13f062bc09a1405d0a01e6ac" + ], + "asm_score": 7.947, + "was_informed_by": "gold:Gp0127642", + "ctg_powsum": 5974.26, + "scaf_max": 27286, + "id": "nmdc:2de70a234a7c626b9f512b8aa3b73717", + "scaf_powsum": 5993.216, + "execution_resource": "NERSC-Cori", + "contigs": 103206, + "name": "Assembly Activity for nmdc:mga0cvxk30", + "ctg_max": 27286, + "gc_std": 0.1028, + "gc_avg": 0.60377, + "contig_bp": 40567169, + "started_at_time": "2021-12-01T21:30:33Z", + "scaf_bp": 40567599, + "type": "nmdc:MetagenomeAssembly", + "scaffolds": 103181, + "ended_at_time": "2021-12-02T20:50:24+00:00", + "ctg_l50": 348, + "ctg_l90": 283, + "ctg_n50": 35487, + "ctg_n90": 88775, + "scaf_l50": 348, + "scaf_l90": 283, + "scaf_n50": 35472, + "scaf_n90": 88751, + "output_data_objects": [ + { + "name": "Gp0127642_Assembled contigs fasta", + "description": "Assembled contigs fasta for Gp0127642", + "data_object_type": "Assembly Contigs", + "url": "https://data.microbiomedata.org/data/nmdc:mga0cvxk30/assembly/nmdc_mga0cvxk30_contigs.fna", + "md5_checksum": "9c2c077dd8f43350b83c1c1ba853bbbc", + "id": "nmdc:9c2c077dd8f43350b83c1c1ba853bbbc", + "file_size_bytes": 44374790 + }, + { + "name": "Gp0127642_Assembled scaffold fasta", + "description": "Assembled scaffold fasta for Gp0127642", + "data_object_type": "Assembly Scaffolds", + "url": "https://data.microbiomedata.org/data/nmdc:mga0cvxk30/assembly/nmdc_mga0cvxk30_scaffolds.fna", + "md5_checksum": "9a3dfedede65ba1253a84264492e909c", + "id": "nmdc:9a3dfedede65ba1253a84264492e909c", + "file_size_bytes": 44064962 + }, + { + "name": "Gp0127642_Metagenome Contig Coverage Stats", + "description": "Metagenome Contig Coverage Stats for Gp0127642", + "url": "https://data.microbiomedata.org/data/nmdc:mga0cvxk30/assembly/nmdc_mga0cvxk30_covstats.txt", + "md5_checksum": "0772cb4473177c4e0046c7fd9cb65b27", + "id": "nmdc:0772cb4473177c4e0046c7fd9cb65b27", + "file_size_bytes": 8090415 + }, + { + "name": "Gp0127642_Assembled AGP file", + "description": "Assembled AGP file for Gp0127642", + "data_object_type": "Assembly AGP", + "url": "https://data.microbiomedata.org/data/nmdc:mga0cvxk30/assembly/nmdc_mga0cvxk30_assembly.agp", + "md5_checksum": "7d0ccfaeac8981d1300b8c17abed052b", + "id": "nmdc:7d0ccfaeac8981d1300b8c17abed052b", + "file_size_bytes": 7524067 + }, + { + "name": "Gp0127642_Metagenome Alignment BAM file", + "description": "Metagenome Alignment BAM file for Gp0127642", + "data_object_type": "Assembly Coverage BAM", + "url": "https://data.microbiomedata.org/data/nmdc:mga0cvxk30/assembly/nmdc_mga0cvxk30_pairedMapped_sorted.bam", + "md5_checksum": "a5b5801b13f062bc09a1405d0a01e6ac", + "id": "nmdc:a5b5801b13f062bc09a1405d0a01e6ac", + "file_size_bytes": 2461892983 + } + ] + }, + { + "_id": { + "$oid": "649b005bbf2caae0415ef9ba" + }, + "has_input": [ + "nmdc:9c2c077dd8f43350b83c1c1ba853bbbc" + ], + "part_of": [ + "nmdc:mga0cvxk30" + ], + "git_url": "https://github.com/microbiomedata/metaG/releases/tag/0.1", + "has_output": [ + "nmdc:e6270776fe3cb9f4e8e2958f9d8d6151", + "nmdc:f442172aba544a550f1e294bc615fd1d", + "nmdc:c0f7ac45facbbb7b74bb7ce11af11910", + "nmdc:63db41425c31ceda578a9e2a801dcb98", + "nmdc:1cf9336281454b1747a86f9877f47ce8", + "nmdc:1cb7ab56a921ed80d21dad5b2d41c139", + "nmdc:157326e95b92fa83ab5755c22acf5837", + "nmdc:f001a06864e30347885e5a76ae89ae92", + "nmdc:aa1e3207b62ca31a87da28ad4c3e6e92", + "nmdc:5119eebdfebd43b4af243a61cc8e45eb", + "nmdc:4e6178de376e5e228c8b5c17ce3d0621", + "nmdc:d89f026da3dfb4ee7d4884a47ce5739d" + ], + "was_informed_by": "gold:Gp0127642", + "id": "nmdc:2de70a234a7c626b9f512b8aa3b73717", + "execution_resource": "NERSC-Cori", + "name": "Annotation Activity for nmdc:mga0cvxk30", + "started_at_time": "2021-12-01T21:30:33Z", + "type": "nmdc:MetagenomeAnnotationActivity", + "ended_at_time": "2021-12-02T20:50:24+00:00", + "output_data_objects": [ + { + "name": "Gp0127642_Protein FAA", + "description": "Protein FAA for Gp0127642", + "data_object_type": "Annotation Amino Acid FASTA", + "url": "https://data.microbiomedata.org/data/nmdc:mga0cvxk30/annotation/nmdc_mga0cvxk30_proteins.faa", + "md5_checksum": "e6270776fe3cb9f4e8e2958f9d8d6151", + "id": "nmdc:e6270776fe3cb9f4e8e2958f9d8d6151", + "file_size_bytes": 26699570 + }, + { + "name": "Gp0127642_Structural annotation GFF file", + "description": "Structural annotation GFF file for Gp0127642", + "data_object_type": "Structural Annotation GFF", + "url": "https://data.microbiomedata.org/data/nmdc:mga0cvxk30/annotation/nmdc_mga0cvxk30_structural_annotation.gff", + "md5_checksum": "f442172aba544a550f1e294bc615fd1d", + "id": "nmdc:f442172aba544a550f1e294bc615fd1d", + "file_size_bytes": 2505 + }, + { + "name": "Gp0127642_Functional annotation GFF file", + "description": "Functional annotation GFF file for Gp0127642", + "data_object_type": "Functional Annotation GFF", + "url": "https://data.microbiomedata.org/data/nmdc:mga0cvxk30/annotation/nmdc_mga0cvxk30_functional_annotation.gff", + "md5_checksum": "c0f7ac45facbbb7b74bb7ce11af11910", + "id": "nmdc:c0f7ac45facbbb7b74bb7ce11af11910", + "file_size_bytes": 32011364 + }, + { + "name": "Gp0127642_KO TSV file", + "description": "KO TSV file for Gp0127642", + "data_object_type": "Annotation KEGG Orthology", + "url": "https://data.microbiomedata.org/data/nmdc:mga0cvxk30/annotation/nmdc_mga0cvxk30_ko.tsv", + "md5_checksum": "63db41425c31ceda578a9e2a801dcb98", + "id": "nmdc:63db41425c31ceda578a9e2a801dcb98", + "file_size_bytes": 3660508 + }, + { + "name": "Gp0127642_EC TSV file", + "description": "EC TSV file for Gp0127642", + "data_object_type": "Annotation Enzyme Commission", + "url": "https://data.microbiomedata.org/data/nmdc:mga0cvxk30/annotation/nmdc_mga0cvxk30_ec.tsv", + "md5_checksum": "1cf9336281454b1747a86f9877f47ce8", + "id": "nmdc:1cf9336281454b1747a86f9877f47ce8", + "file_size_bytes": 2451794 + }, + { + "name": "Gp0127642_COG GFF file", + "description": "COG GFF file for Gp0127642", + "url": "https://data.microbiomedata.org/data/nmdc:mga0cvxk30/annotation/nmdc_mga0cvxk30_cog.gff", + "md5_checksum": "1cb7ab56a921ed80d21dad5b2d41c139", + "id": "nmdc:1cb7ab56a921ed80d21dad5b2d41c139", + "file_size_bytes": 18356139 + }, + { + "name": "Gp0127642_PFAM GFF file", + "description": "PFAM GFF file for Gp0127642", + "url": "https://data.microbiomedata.org/data/nmdc:mga0cvxk30/annotation/nmdc_mga0cvxk30_pfam.gff", + "md5_checksum": "157326e95b92fa83ab5755c22acf5837", + "id": "nmdc:157326e95b92fa83ab5755c22acf5837", + "file_size_bytes": 13044512 + }, + { + "name": "Gp0127642_TigrFam GFF file", + "description": "TigrFam GFF file for Gp0127642", + "url": "https://data.microbiomedata.org/data/nmdc:mga0cvxk30/annotation/nmdc_mga0cvxk30_tigrfam.gff", + "md5_checksum": "f001a06864e30347885e5a76ae89ae92", + "id": "nmdc:f001a06864e30347885e5a76ae89ae92", + "file_size_bytes": 1280537 + }, + { + "name": "Gp0127642_SMART GFF file", + "description": "SMART GFF file for Gp0127642", + "url": "https://data.microbiomedata.org/data/nmdc:mga0cvxk30/annotation/nmdc_mga0cvxk30_smart.gff", + "md5_checksum": "aa1e3207b62ca31a87da28ad4c3e6e92", + "id": "nmdc:aa1e3207b62ca31a87da28ad4c3e6e92", + "file_size_bytes": 4029242 + }, + { + "name": "Gp0127642_SuperFam GFF file", + "description": "SuperFam GFF file for Gp0127642", + "url": "https://data.microbiomedata.org/data/nmdc:mga0cvxk30/annotation/nmdc_mga0cvxk30_supfam.gff", + "md5_checksum": "5119eebdfebd43b4af243a61cc8e45eb", + "id": "nmdc:5119eebdfebd43b4af243a61cc8e45eb", + "file_size_bytes": 23011352 + }, + { + "name": "Gp0127642_Cath FunFam GFF file", + "description": "Cath FunFam GFF file for Gp0127642", + "url": "https://data.microbiomedata.org/data/nmdc:mga0cvxk30/annotation/nmdc_mga0cvxk30_cath_funfam.gff", + "md5_checksum": "4e6178de376e5e228c8b5c17ce3d0621", + "id": "nmdc:4e6178de376e5e228c8b5c17ce3d0621", + "file_size_bytes": 17039992 + }, + { + "name": "Gp0127642_KO_EC GFF file", + "description": "KO_EC GFF file for Gp0127642", + "url": "https://data.microbiomedata.org/data/nmdc:mga0cvxk30/annotation/nmdc_mga0cvxk30_ko_ec.gff", + "md5_checksum": "d89f026da3dfb4ee7d4884a47ce5739d", + "id": "nmdc:d89f026da3dfb4ee7d4884a47ce5739d", + "file_size_bytes": 11677748 + } + ] + }, + { + "_id": { + "$oid": "649b0052ec087f6bbab3471c" + }, + "has_input": [ + "nmdc:9c2c077dd8f43350b83c1c1ba853bbbc", + "nmdc:a5b5801b13f062bc09a1405d0a01e6ac", + "nmdc:c0f7ac45facbbb7b74bb7ce11af11910" + ], + "too_short_contig_num": 101249, + "part_of": [ + "nmdc:mga0cvxk30" + ], + "binned_contig_num": 213, + "git_url": "https://github.com/microbiomedata/metaG/releases/tag/0.1", + "has_output": [ + "nmdc:ac59797a394f8e4aa971e5c1d016e23e", + "nmdc:46858bd4b45bdaa4e4344820f3c54b3b" + ], + "was_informed_by": "gold:Gp0127642", + "input_contig_num": 103206, + "id": "nmdc:2de70a234a7c626b9f512b8aa3b73717", + "execution_resource": "NERSC-Cori", + "name": "MAGs Analysis Activity for nmdc:mga0cvxk30", + "mags_list": [ + { + "number_of_contig": 213, + "completeness": 71.17, + "bin_name": "bins.1", + "gene_count": 1914, + "bin_quality": "MQ", + "gtdbtk_species": "", + "gtdbtk_order": "Nitrososphaerales", + "num_16s": 0, + "gtdbtk_family": "Nitrososphaeraceae", + "gtdbtk_domain": "Archaea", + "contamination": 0.97, + "gtdbtk_class": "Nitrososphaeria", + "gtdbtk_phylum": "Crenarchaeota", + "num_5s": 1, + "num_23s": 0, + "gtdbtk_genus": "", + "num_t_rna": 31 + } + ], + "unbinned_contig_num": 1744, + "started_at_time": "2021-12-01T21:30:33Z", + "type": "nmdc:MAGsAnalysisActivity", + "ended_at_time": "2021-12-02T20:50:24+00:00", + "output_data_objects": [ + { + "name": "Gp0127642_metabat2 bin checkm quality assessment result", + "description": "metabat2 bin checkm quality assessment result for Gp0127642", + "data_object_type": "CheckM Statistics", + "url": "https://data.microbiomedata.org/data/nmdc:mga0cvxk30/MAGs/nmdc_mga0cvxk30_checkm_qa.out", + "md5_checksum": "ac59797a394f8e4aa971e5c1d016e23e", + "id": "nmdc:ac59797a394f8e4aa971e5c1d016e23e", + "file_size_bytes": 765 + }, + { + "name": "Gp0127642_high-quality and medium-quality bins", + "description": "high-quality and medium-quality bins for Gp0127642", + "data_object_type": "Metagenome Bins", + "url": "https://data.microbiomedata.org/data/nmdc:mga0cvxk30/MAGs/nmdc_mga0cvxk30_hqmq_bin.zip", + "md5_checksum": "46858bd4b45bdaa4e4344820f3c54b3b", + "id": "nmdc:46858bd4b45bdaa4e4344820f3c54b3b", + "file_size_bytes": 472684 + } + ] + } + ] + }, + { + "_id": { + "$oid": "649b009773e8249959349b5a" + }, + "id": "nmdc:omprc-11-vs67yj43", + "name": "Riverbed sediment microbial communities from areas with no vegetation in Columbia River, Washington, USA - GW-RW N3_0_10", + "description": "Riverbed sediment samples were collected from an area with no vegetation in a hyporheic zone (subsurface groundwater - surface water mixing zone)", + "has_input": [ + "nmdc:bsm-11-0n5nks24" + ], + "has_output": [ + "jgi:574fde867ded5e3df1ee1420" + ], + "part_of": [ + "nmdc:sty-11-aygzgv51" + ], + "add_date": "2016-01-11", + "mod_date": "2021-06-15", + "ncbi_project_name": "Riverbed sediment microbial communities from areas with no vegetation in Columbia River, Washington, USA - GW-RW N3_0_10", + "omics_type": { + "has_raw_value": "Metagenome" + }, + "principal_investigator": { + "has_raw_value": "James Stegen" + }, + "processing_institution": "JGI", + "type": "nmdc:OmicsProcessing", + "gold_sequencing_project_identifiers": [ + "gold:Gp0127646" + ], + "downstream_workflow_activity_records": [ + { + "_id": { + "$oid": "649b009d6bdd4fd20273c86f" + }, + "has_input": [ + "nmdc:94b1d19ad74cfb1be53ebb45dcf5f70c" + ], + "part_of": [ + "nmdc:mga0dm4q17" + ], + "git_url": "https://github.com/microbiomedata/metaG/releases/tag/0.1", + "has_output": [ + "nmdc:208a3777ef0b99408f0d5832dee576e0", + "nmdc:8533a56006bdc1841b6fc16e99b6a84a" + ], + "was_informed_by": "gold:Gp0127646", + "input_read_count": 27835800, + "output_read_bases": 3867340900, + "id": "nmdc:1a0a2a1b5db9455e9dbbacf102059180", + "execution_resource": "NERSC-Cori", + "input_read_bases": 4203205800, + "name": "Read QC Activity for nmdc:mga0dm4q17", + "output_read_count": 25862834, + "started_at_time": "2021-10-11T02:23:42Z", + "type": "nmdc:ReadQCAnalysisActivity", + "ended_at_time": "2021-10-11T04:05:12+00:00", + "output_data_objects": [ + { + "name": "Gp0127646_Filtered Reads", + "description": "Filtered Reads for Gp0127646", + "data_object_type": "Filtered Sequencing Reads", + "url": "https://data.microbiomedata.org/data/nmdc:mga0dm4q17/qa/nmdc_mga0dm4q17_filtered.fastq.gz", + "md5_checksum": "208a3777ef0b99408f0d5832dee576e0", + "id": "nmdc:208a3777ef0b99408f0d5832dee576e0", + "file_size_bytes": 2209739723 + }, + { + "name": "Gp0127646_Filtered Stats", + "description": "Filtered Stats for Gp0127646", + "data_object_type": "QC Statistics", + "url": "https://data.microbiomedata.org/data/nmdc:mga0dm4q17/qa/nmdc_mga0dm4q17_filterStats.txt", + "md5_checksum": "8533a56006bdc1841b6fc16e99b6a84a", + "id": "nmdc:8533a56006bdc1841b6fc16e99b6a84a", + "file_size_bytes": 291 + } + ] + }, + { + "_id": { + "$oid": "649b009bff710ae353f8cf33" + }, + "has_input": [ + "nmdc:208a3777ef0b99408f0d5832dee576e0" + ], + "git_url": "https://github.com/microbiomedata/metaG/releases/tag/0.1", + "has_output": [ + "nmdc:3e0598df41941463bac0fdec5df29f55", + "nmdc:1a625b148d8f6d9fe9aeab6cfb67df6c", + "nmdc:bc8e157195d042d7207d67b4982fea96", + "nmdc:a8fc683bb9b3aba316cb605c5fb591ec", + "nmdc:b5fe0189dbf00662d78cc55b8b0cc803", + "nmdc:cd10cca62774e66f60d60380ee18132e", + "nmdc:b13ee2ee52d15c3669aecd2e913f2658", + "nmdc:09a2d722810b3d90207bc4cfa626133b", + "nmdc:c3a8d9f48266a43ad74fc581132e2bba" + ], + "was_informed_by": "gold:Gp0127646", + "id": "nmdc:1a0a2a1b5db9455e9dbbacf102059180", + "execution_resource": "NERSC-Cori", + "name": "ReadBased Analysis Activity for nmdc:mga0dm4q17", + "started_at_time": "2021-10-11T02:23:42Z", + "type": "nmdc:ReadbasedAnalysis", + "ended_at_time": "2021-10-11T04:05:12+00:00", + "output_data_objects": [ + { + "name": "Gp0127646_Gottcha2 TSV report", + "description": "Gottcha2 TSV report for Gp0127646", + "url": "https://data.microbiomedata.org/data/nmdc:mga0dm4q17/ReadbasedAnalysis/nmdc_mga0dm4q17_gottcha2_report.tsv", + "md5_checksum": "3e0598df41941463bac0fdec5df29f55", + "id": "nmdc:3e0598df41941463bac0fdec5df29f55", + "file_size_bytes": 4650 + }, + { + "name": "Gp0127646_Gottcha2 full TSV report", + "description": "Gottcha2 full TSV report for Gp0127646", + "url": "https://data.microbiomedata.org/data/nmdc:mga0dm4q17/ReadbasedAnalysis/nmdc_mga0dm4q17_gottcha2_report_full.tsv", + "md5_checksum": "1a625b148d8f6d9fe9aeab6cfb67df6c", + "id": "nmdc:1a625b148d8f6d9fe9aeab6cfb67df6c", + "file_size_bytes": 877659 + }, + { + "name": "Gp0127646_Gottcha2 Krona HTML report", + "description": "Gottcha2 Krona HTML report for Gp0127646", + "data_object_type": "GOTTCHA2 Krona Plot", + "url": "https://data.microbiomedata.org/data/nmdc:mga0dm4q17/ReadbasedAnalysis/nmdc_mga0dm4q17_gottcha2_krona.html", + "md5_checksum": "bc8e157195d042d7207d67b4982fea96", + "id": "nmdc:bc8e157195d042d7207d67b4982fea96", + "file_size_bytes": 236676 + }, + { + "name": "Gp0127646_Centrifuge classification TSV report", + "description": "Centrifuge classification TSV report for Gp0127646", + "data_object_type": "Centrifuge Taxonomic Classification", + "url": "https://data.microbiomedata.org/data/nmdc:mga0dm4q17/ReadbasedAnalysis/nmdc_mga0dm4q17_centrifuge_classification.tsv", + "md5_checksum": "a8fc683bb9b3aba316cb605c5fb591ec", + "id": "nmdc:a8fc683bb9b3aba316cb605c5fb591ec", + "file_size_bytes": 1901493736 + }, + { + "name": "Gp0127646_Centrifuge TSV report", + "description": "Centrifuge TSV report for Gp0127646", + "data_object_type": "Centrifuge Classification Report", + "url": "https://data.microbiomedata.org/data/nmdc:mga0dm4q17/ReadbasedAnalysis/nmdc_mga0dm4q17_centrifuge_report.tsv", + "md5_checksum": "b5fe0189dbf00662d78cc55b8b0cc803", + "id": "nmdc:b5fe0189dbf00662d78cc55b8b0cc803", + "file_size_bytes": 256274 + }, + { + "name": "Gp0127646_Centrifuge Krona HTML report", + "description": "Centrifuge Krona HTML report for Gp0127646", + "data_object_type": "Centrifuge Krona Plot", + "url": "https://data.microbiomedata.org/data/nmdc:mga0dm4q17/ReadbasedAnalysis/nmdc_mga0dm4q17_centrifuge_krona.html", + "md5_checksum": "cd10cca62774e66f60d60380ee18132e", + "id": "nmdc:cd10cca62774e66f60d60380ee18132e", + "file_size_bytes": 2333722 + }, + { + "name": "Gp0127646_Kraken classification TSV report", + "description": "Kraken classification TSV report for Gp0127646", + "data_object_type": "Kraken2 Taxonomic Classification", + "url": "https://data.microbiomedata.org/data/nmdc:mga0dm4q17/ReadbasedAnalysis/nmdc_mga0dm4q17_kraken2_classification.tsv", + "md5_checksum": "b13ee2ee52d15c3669aecd2e913f2658", + "id": "nmdc:b13ee2ee52d15c3669aecd2e913f2658", + "file_size_bytes": 1534616616 + }, + { + "name": "Gp0127646_Kraken2 TSV report", + "description": "Kraken2 TSV report for Gp0127646", + "data_object_type": "Kraken2 Classification Report", + "url": "https://data.microbiomedata.org/data/nmdc:mga0dm4q17/ReadbasedAnalysis/nmdc_mga0dm4q17_kraken2_report.tsv", + "md5_checksum": "09a2d722810b3d90207bc4cfa626133b", + "id": "nmdc:09a2d722810b3d90207bc4cfa626133b", + "file_size_bytes": 663507 + }, + { + "name": "Gp0127646_Kraken2 Krona HTML report", + "description": "Kraken2 Krona HTML report for Gp0127646", + "data_object_type": "Kraken2 Krona Plot", + "url": "https://data.microbiomedata.org/data/nmdc:mga0dm4q17/ReadbasedAnalysis/nmdc_mga0dm4q17_kraken2_krona.html", + "md5_checksum": "c3a8d9f48266a43ad74fc581132e2bba", + "id": "nmdc:c3a8d9f48266a43ad74fc581132e2bba", + "file_size_bytes": 4031909 + } + ] + }, + { + "_id": { + "$oid": "61e7197d833bcf838a7009e9" + }, + "has_input": [ + "nmdc:208a3777ef0b99408f0d5832dee576e0" + ], + "part_of": [ + "nmdc:mga0dm4q17" + ], + "git_url": "https://github.com/microbiomedata/metaG/releases/tag/0.1", + "has_output": [ + "nmdc:3e0598df41941463bac0fdec5df29f55", + "nmdc:1a625b148d8f6d9fe9aeab6cfb67df6c", + "nmdc:bc8e157195d042d7207d67b4982fea96", + "nmdc:a8fc683bb9b3aba316cb605c5fb591ec", + "nmdc:b5fe0189dbf00662d78cc55b8b0cc803", + "nmdc:cd10cca62774e66f60d60380ee18132e", + "nmdc:b13ee2ee52d15c3669aecd2e913f2658", + "nmdc:09a2d722810b3d90207bc4cfa626133b", + "nmdc:c3a8d9f48266a43ad74fc581132e2bba" + ], + "was_informed_by": "gold:Gp0127646", + "id": "nmdc:1a0a2a1b5db9455e9dbbacf102059180", + "execution_resource": "NERSC-Cori", + "name": "ReadBased Analysis Activity for nmdc:mga0dm4q17", + "started_at_time": "2021-10-11T02:23:42Z", + "type": "nmdc:ReadbasedAnalysis", + "ended_at_time": "2021-10-11T04:05:12+00:00", + "output_data_objects": [ + { + "name": "Gp0127646_Gottcha2 TSV report", + "description": "Gottcha2 TSV report for Gp0127646", + "url": "https://data.microbiomedata.org/data/nmdc:mga0dm4q17/ReadbasedAnalysis/nmdc_mga0dm4q17_gottcha2_report.tsv", + "md5_checksum": "3e0598df41941463bac0fdec5df29f55", + "id": "nmdc:3e0598df41941463bac0fdec5df29f55", + "file_size_bytes": 4650 + }, + { + "name": "Gp0127646_Gottcha2 full TSV report", + "description": "Gottcha2 full TSV report for Gp0127646", + "url": "https://data.microbiomedata.org/data/nmdc:mga0dm4q17/ReadbasedAnalysis/nmdc_mga0dm4q17_gottcha2_report_full.tsv", + "md5_checksum": "1a625b148d8f6d9fe9aeab6cfb67df6c", + "id": "nmdc:1a625b148d8f6d9fe9aeab6cfb67df6c", + "file_size_bytes": 877659 + }, + { + "name": "Gp0127646_Gottcha2 Krona HTML report", + "description": "Gottcha2 Krona HTML report for Gp0127646", + "data_object_type": "GOTTCHA2 Krona Plot", + "url": "https://data.microbiomedata.org/data/nmdc:mga0dm4q17/ReadbasedAnalysis/nmdc_mga0dm4q17_gottcha2_krona.html", + "md5_checksum": "bc8e157195d042d7207d67b4982fea96", + "id": "nmdc:bc8e157195d042d7207d67b4982fea96", + "file_size_bytes": 236676 + }, + { + "name": "Gp0127646_Centrifuge classification TSV report", + "description": "Centrifuge classification TSV report for Gp0127646", + "data_object_type": "Centrifuge Taxonomic Classification", + "url": "https://data.microbiomedata.org/data/nmdc:mga0dm4q17/ReadbasedAnalysis/nmdc_mga0dm4q17_centrifuge_classification.tsv", + "md5_checksum": "a8fc683bb9b3aba316cb605c5fb591ec", + "id": "nmdc:a8fc683bb9b3aba316cb605c5fb591ec", + "file_size_bytes": 1901493736 + }, + { + "name": "Gp0127646_Centrifuge TSV report", + "description": "Centrifuge TSV report for Gp0127646", + "data_object_type": "Centrifuge Classification Report", + "url": "https://data.microbiomedata.org/data/nmdc:mga0dm4q17/ReadbasedAnalysis/nmdc_mga0dm4q17_centrifuge_report.tsv", + "md5_checksum": "b5fe0189dbf00662d78cc55b8b0cc803", + "id": "nmdc:b5fe0189dbf00662d78cc55b8b0cc803", + "file_size_bytes": 256274 + }, + { + "name": "Gp0127646_Centrifuge Krona HTML report", + "description": "Centrifuge Krona HTML report for Gp0127646", + "data_object_type": "Centrifuge Krona Plot", + "url": "https://data.microbiomedata.org/data/nmdc:mga0dm4q17/ReadbasedAnalysis/nmdc_mga0dm4q17_centrifuge_krona.html", + "md5_checksum": "cd10cca62774e66f60d60380ee18132e", + "id": "nmdc:cd10cca62774e66f60d60380ee18132e", + "file_size_bytes": 2333722 + }, + { + "name": "Gp0127646_Kraken classification TSV report", + "description": "Kraken classification TSV report for Gp0127646", + "data_object_type": "Kraken2 Taxonomic Classification", + "url": "https://data.microbiomedata.org/data/nmdc:mga0dm4q17/ReadbasedAnalysis/nmdc_mga0dm4q17_kraken2_classification.tsv", + "md5_checksum": "b13ee2ee52d15c3669aecd2e913f2658", + "id": "nmdc:b13ee2ee52d15c3669aecd2e913f2658", + "file_size_bytes": 1534616616 + }, + { + "name": "Gp0127646_Kraken2 TSV report", + "description": "Kraken2 TSV report for Gp0127646", + "data_object_type": "Kraken2 Classification Report", + "url": "https://data.microbiomedata.org/data/nmdc:mga0dm4q17/ReadbasedAnalysis/nmdc_mga0dm4q17_kraken2_report.tsv", + "md5_checksum": "09a2d722810b3d90207bc4cfa626133b", + "id": "nmdc:09a2d722810b3d90207bc4cfa626133b", + "file_size_bytes": 663507 + }, + { + "name": "Gp0127646_Kraken2 Krona HTML report", + "description": "Kraken2 Krona HTML report for Gp0127646", + "data_object_type": "Kraken2 Krona Plot", + "url": "https://data.microbiomedata.org/data/nmdc:mga0dm4q17/ReadbasedAnalysis/nmdc_mga0dm4q17_kraken2_krona.html", + "md5_checksum": "c3a8d9f48266a43ad74fc581132e2bba", + "id": "nmdc:c3a8d9f48266a43ad74fc581132e2bba", + "file_size_bytes": 4031909 + } + ] + }, + { + "_id": { + "$oid": "649b005f2ca5ee4adb139f9d" + }, + "has_input": [ + "nmdc:208a3777ef0b99408f0d5832dee576e0" + ], + "part_of": [ + "nmdc:mga0dm4q17" + ], + "ctg_logsum": 20856, + "scaf_logsum": 20954, + "gap_pct": 0.00116, + "git_url": "https://github.com/microbiomedata/metaG/releases/tag/0.1", + "has_output": [ + "nmdc:c2301a45b987661e5e6f32eaf6928003", + "nmdc:6233a266773aa722d6a3c2556b0c1cb5", + "nmdc:c5460716df8c1d47e081837c8cc5d281", + "nmdc:9437132a95f356e7cc6513f862f38f81", + "nmdc:0d0ee85be3a079b0eba5bb872c842f7d" + ], + "asm_score": 17.863, + "was_informed_by": "gold:Gp0127646", + "ctg_powsum": 2534.931, + "scaf_max": 88400, + "id": "nmdc:1a0a2a1b5db9455e9dbbacf102059180", + "scaf_powsum": 2545.156, + "execution_resource": "NERSC-Cori", + "contigs": 81653, + "name": "Assembly Activity for nmdc:mga0dm4q17", + "ctg_max": 88400, + "gc_std": 0.13273, + "contig_bp": 30097213, + "gc_avg": 0.55961, + "started_at_time": "2021-10-11T02:23:42Z", + "scaf_bp": 30097563, + "type": "nmdc:MetagenomeAssembly", + "scaffolds": 81627, + "ended_at_time": "2021-10-11T04:05:12+00:00", + "ctg_l50": 332, + "ctg_l90": 282, + "ctg_n50": 30532, + "ctg_n90": 71638, + "scaf_l50": 332, + "scaf_l90": 282, + "scaf_n50": 30518, + "scaf_n90": 71614, + "scaf_l_gt50k": 150260, + "scaf_n_gt50k": 2, + "scaf_pct_gt50k": 0.49924305, + "output_data_objects": [ + { + "name": "Gp0127646_Assembled contigs fasta", + "description": "Assembled contigs fasta for Gp0127646", + "data_object_type": "Assembly Contigs", + "url": "https://data.microbiomedata.org/data/nmdc:mga0dm4q17/assembly/nmdc_mga0dm4q17_contigs.fna", + "md5_checksum": "c2301a45b987661e5e6f32eaf6928003", + "id": "nmdc:c2301a45b987661e5e6f32eaf6928003", + "file_size_bytes": 33070670 + }, + { + "name": "Gp0127646_Assembled scaffold fasta", + "description": "Assembled scaffold fasta for Gp0127646", + "data_object_type": "Assembly Scaffolds", + "url": "https://data.microbiomedata.org/data/nmdc:mga0dm4q17/assembly/nmdc_mga0dm4q17_scaffolds.fna", + "md5_checksum": "6233a266773aa722d6a3c2556b0c1cb5", + "id": "nmdc:6233a266773aa722d6a3c2556b0c1cb5", + "file_size_bytes": 32825592 + }, + { + "name": "Gp0127646_Metagenome Contig Coverage Stats", + "description": "Metagenome Contig Coverage Stats for Gp0127646", + "url": "https://data.microbiomedata.org/data/nmdc:mga0dm4q17/assembly/nmdc_mga0dm4q17_covstats.txt", + "md5_checksum": "c5460716df8c1d47e081837c8cc5d281", + "id": "nmdc:c5460716df8c1d47e081837c8cc5d281", + "file_size_bytes": 6393678 + }, + { + "name": "Gp0127646_Assembled AGP file", + "description": "Assembled AGP file for Gp0127646", + "data_object_type": "Assembly AGP", + "url": "https://data.microbiomedata.org/data/nmdc:mga0dm4q17/assembly/nmdc_mga0dm4q17_assembly.agp", + "md5_checksum": "9437132a95f356e7cc6513f862f38f81", + "id": "nmdc:9437132a95f356e7cc6513f862f38f81", + "file_size_bytes": 5942403 + }, + { + "name": "Gp0127646_Metagenome Alignment BAM file", + "description": "Metagenome Alignment BAM file for Gp0127646", + "data_object_type": "Assembly Coverage BAM", + "url": "https://data.microbiomedata.org/data/nmdc:mga0dm4q17/assembly/nmdc_mga0dm4q17_pairedMapped_sorted.bam", + "md5_checksum": "0d0ee85be3a079b0eba5bb872c842f7d", + "id": "nmdc:0d0ee85be3a079b0eba5bb872c842f7d", + "file_size_bytes": 2346665933 + } + ] + }, + { + "_id": { + "$oid": "649b005bbf2caae0415ef9b2" + }, + "has_input": [ + "nmdc:c2301a45b987661e5e6f32eaf6928003" + ], + "part_of": [ + "nmdc:mga0dm4q17" + ], + "git_url": "https://github.com/microbiomedata/metaG/releases/tag/0.1", + "has_output": [ + "nmdc:b907352a805a209c5b7e10f6ce9e3ceb", + "nmdc:769c049c4b3301900de0c62666e8c297", + "nmdc:3dcb9f83f3921fc7f3e7a2050584cc77", + "nmdc:1b81cc955690e81f18c2bc1533e7ee89", + "nmdc:dd94ee1dbd107bf14e8be72b8f546290", + "nmdc:e271f0ef1c44b514304c35a7913751e3", + "nmdc:b3d3f1ef308b7555cbea077cc00dbc95", + "nmdc:d18d6a67ad7e17514b0c4b502ea69ac0", + "nmdc:62920faf364dea6a1d028878d49a2989", + "nmdc:757bd3295026410cb03690d4dae95935", + "nmdc:19eef79eefc81cbe6d7d4586d8be5d23", + "nmdc:fc8598d9d6926e6ac8bb9c488016734a" + ], + "was_informed_by": "gold:Gp0127646", + "id": "nmdc:1a0a2a1b5db9455e9dbbacf102059180", + "execution_resource": "NERSC-Cori", + "name": "Annotation Activity for nmdc:mga0dm4q17", + "started_at_time": "2021-10-11T02:23:42Z", + "type": "nmdc:MetagenomeAnnotationActivity", + "ended_at_time": "2021-10-11T04:05:12+00:00", + "output_data_objects": [ + { + "name": "Gp0127646_Protein FAA", + "description": "Protein FAA for Gp0127646", + "data_object_type": "Annotation Amino Acid FASTA", + "url": "https://data.microbiomedata.org/data/nmdc:mga0dm4q17/annotation/nmdc_mga0dm4q17_proteins.faa", + "md5_checksum": "b907352a805a209c5b7e10f6ce9e3ceb", + "id": "nmdc:b907352a805a209c5b7e10f6ce9e3ceb", + "file_size_bytes": 18886480 + }, + { + "name": "Gp0127646_Structural annotation GFF file", + "description": "Structural annotation GFF file for Gp0127646", + "data_object_type": "Structural Annotation GFF", + "url": "https://data.microbiomedata.org/data/nmdc:mga0dm4q17/annotation/nmdc_mga0dm4q17_structural_annotation.gff", + "md5_checksum": "769c049c4b3301900de0c62666e8c297", + "id": "nmdc:769c049c4b3301900de0c62666e8c297", + "file_size_bytes": 2883 + }, + { + "name": "Gp0127646_Functional annotation GFF file", + "description": "Functional annotation GFF file for Gp0127646", + "data_object_type": "Functional Annotation GFF", + "url": "https://data.microbiomedata.org/data/nmdc:mga0dm4q17/annotation/nmdc_mga0dm4q17_functional_annotation.gff", + "md5_checksum": "3dcb9f83f3921fc7f3e7a2050584cc77", + "id": "nmdc:3dcb9f83f3921fc7f3e7a2050584cc77", + "file_size_bytes": 23048582 + }, + { + "name": "Gp0127646_KO TSV file", + "description": "KO TSV file for Gp0127646", + "data_object_type": "Annotation KEGG Orthology", + "url": "https://data.microbiomedata.org/data/nmdc:mga0dm4q17/annotation/nmdc_mga0dm4q17_ko.tsv", + "md5_checksum": "1b81cc955690e81f18c2bc1533e7ee89", + "id": "nmdc:1b81cc955690e81f18c2bc1533e7ee89", + "file_size_bytes": 2643070 + }, + { + "name": "Gp0127646_EC TSV file", + "description": "EC TSV file for Gp0127646", + "data_object_type": "Annotation Enzyme Commission", + "url": "https://data.microbiomedata.org/data/nmdc:mga0dm4q17/annotation/nmdc_mga0dm4q17_ec.tsv", + "md5_checksum": "dd94ee1dbd107bf14e8be72b8f546290", + "id": "nmdc:dd94ee1dbd107bf14e8be72b8f546290", + "file_size_bytes": 1742846 + }, + { + "name": "Gp0127646_COG GFF file", + "description": "COG GFF file for Gp0127646", + "url": "https://data.microbiomedata.org/data/nmdc:mga0dm4q17/annotation/nmdc_mga0dm4q17_cog.gff", + "md5_checksum": "e271f0ef1c44b514304c35a7913751e3", + "id": "nmdc:e271f0ef1c44b514304c35a7913751e3", + "file_size_bytes": 12090733 + }, + { + "name": "Gp0127646_PFAM GFF file", + "description": "PFAM GFF file for Gp0127646", + "url": "https://data.microbiomedata.org/data/nmdc:mga0dm4q17/annotation/nmdc_mga0dm4q17_pfam.gff", + "md5_checksum": "b3d3f1ef308b7555cbea077cc00dbc95", + "id": "nmdc:b3d3f1ef308b7555cbea077cc00dbc95", + "file_size_bytes": 8631888 + }, + { + "name": "Gp0127646_TigrFam GFF file", + "description": "TigrFam GFF file for Gp0127646", + "url": "https://data.microbiomedata.org/data/nmdc:mga0dm4q17/annotation/nmdc_mga0dm4q17_tigrfam.gff", + "md5_checksum": "d18d6a67ad7e17514b0c4b502ea69ac0", + "id": "nmdc:d18d6a67ad7e17514b0c4b502ea69ac0", + "file_size_bytes": 840759 + }, + { + "name": "Gp0127646_SMART GFF file", + "description": "SMART GFF file for Gp0127646", + "url": "https://data.microbiomedata.org/data/nmdc:mga0dm4q17/annotation/nmdc_mga0dm4q17_smart.gff", + "md5_checksum": "62920faf364dea6a1d028878d49a2989", + "id": "nmdc:62920faf364dea6a1d028878d49a2989", + "file_size_bytes": 2684392 + }, + { + "name": "Gp0127646_SuperFam GFF file", + "description": "SuperFam GFF file for Gp0127646", + "url": "https://data.microbiomedata.org/data/nmdc:mga0dm4q17/annotation/nmdc_mga0dm4q17_supfam.gff", + "md5_checksum": "757bd3295026410cb03690d4dae95935", + "id": "nmdc:757bd3295026410cb03690d4dae95935", + "file_size_bytes": 15569120 + }, + { + "name": "Gp0127646_Cath FunFam GFF file", + "description": "Cath FunFam GFF file for Gp0127646", + "url": "https://data.microbiomedata.org/data/nmdc:mga0dm4q17/annotation/nmdc_mga0dm4q17_cath_funfam.gff", + "md5_checksum": "19eef79eefc81cbe6d7d4586d8be5d23", + "id": "nmdc:19eef79eefc81cbe6d7d4586d8be5d23", + "file_size_bytes": 11479737 + }, + { + "name": "Gp0127646_KO_EC GFF file", + "description": "KO_EC GFF file for Gp0127646", + "url": "https://data.microbiomedata.org/data/nmdc:mga0dm4q17/annotation/nmdc_mga0dm4q17_ko_ec.gff", + "md5_checksum": "fc8598d9d6926e6ac8bb9c488016734a", + "id": "nmdc:fc8598d9d6926e6ac8bb9c488016734a", + "file_size_bytes": 8425263 + } + ] + }, + { + "_id": { + "$oid": "649b0052ec087f6bbab34715" + }, + "has_input": [ + "nmdc:c2301a45b987661e5e6f32eaf6928003", + "nmdc:0d0ee85be3a079b0eba5bb872c842f7d", + "nmdc:3dcb9f83f3921fc7f3e7a2050584cc77" + ], + "too_short_contig_num": 80674, + "part_of": [ + "nmdc:mga0dm4q17" + ], + "binned_contig_num": 20, + "git_url": "https://github.com/microbiomedata/metaG/releases/tag/0.1", + "has_output": [ + "nmdc:d41d8cd98f00b204e9800998ecf8427e", + "nmdc:de605dd3ecac26d6a35740c09448b171", + "nmdc:9392ab9668a1c347f010004c2f0cc8db", + "nmdc:e8bdcd7b113a14b29a3026b73cd18c20", + "nmdc:d75d0006d0009e7e14f2ad8044a3cbfb", + "nmdc:17e9a7763327f2b5d3f841079c2f68d8" + ], + "was_informed_by": "gold:Gp0127646", + "input_contig_num": 81652, + "id": "nmdc:1a0a2a1b5db9455e9dbbacf102059180", + "execution_resource": "NERSC-Cori", + "name": "MAGs Analysis Activity for nmdc:mga0dm4q17", + "mags_list": [ + { + "number_of_contig": 20, + "completeness": 1.36, + "bin_name": "bins.1", + "gene_count": 275, + "bin_quality": "LQ", + "gtdbtk_species": "", + "gtdbtk_order": "", + "num_16s": 1, + "gtdbtk_family": "", + "gtdbtk_domain": "", + "contamination": 0.0, + "gtdbtk_class": "", + "gtdbtk_phylum": "", + "num_5s": 0, + "num_23s": 2, + "gtdbtk_genus": "", + "num_t_rna": 10 + } + ], + "unbinned_contig_num": 958, + "started_at_time": "2021-10-11T02:23:42Z", + "type": "nmdc:MAGsAnalysisActivity", + "ended_at_time": "2021-10-11T04:05:12+00:00", + "output_data_objects": [ + { + "name": "gold:Gp0452679_metabat2 bin checkm quality assessment result", + "description": "metabat2 bin checkm quality assessment result for gold:Gp0452679", + "data_object_type": "CheckM Statistics", + "url": "https://data.microbiomedata.org/data/nmdc:mga0fsjy84/MAGs/nmdc_mga0fsjy84_checkm_qa.out", + "md5_checksum": "d41d8cd98f00b204e9800998ecf8427e", + "id": "nmdc:d41d8cd98f00b204e9800998ecf8427e", + "file_size_bytes": 0 + }, + { + "name": "Gp0127646_tooShort (< 3kb) filtered contigs fasta file by metaBat2", + "description": "tooShort (< 3kb) filtered contigs fasta file by metaBat2 for Gp0127646", + "url": "https://data.microbiomedata.org/data/nmdc:mga0dm4q17/MAGs/nmdc_mga0dm4q17_bins.tooShort.fa", + "md5_checksum": "de605dd3ecac26d6a35740c09448b171", + "id": "nmdc:de605dd3ecac26d6a35740c09448b171", + "file_size_bytes": 31210054 + }, + { + "name": "Gp0127646_unbinned fasta file from metabat2", + "description": "unbinned fasta file from metabat2 for Gp0127646", + "url": "https://data.microbiomedata.org/data/nmdc:mga0dm4q17/MAGs/nmdc_mga0dm4q17_bins.unbinned.fa", + "md5_checksum": "9392ab9668a1c347f010004c2f0cc8db", + "id": "nmdc:9392ab9668a1c347f010004c2f0cc8db", + "file_size_bytes": 1595698 + }, + { + "name": "Gp0127647_metabat2 bin checkm quality assessment result", + "description": "metabat2 bin checkm quality assessment result for Gp0127647", + "data_object_type": "CheckM Statistics", + "url": "https://data.microbiomedata.org/data/nmdc:mga0g0e588/MAGs/nmdc_mga0g0e588_checkm_qa.out", + "md5_checksum": "e8bdcd7b113a14b29a3026b73cd18c20", + "id": "nmdc:e8bdcd7b113a14b29a3026b73cd18c20", + "file_size_bytes": 775 + }, + { + "name": "Gp0127646_high-quality and medium-quality bins", + "description": "high-quality and medium-quality bins for Gp0127646", + "data_object_type": "Metagenome Bins", + "url": "https://data.microbiomedata.org/data/nmdc:mga0dm4q17/MAGs/nmdc_mga0dm4q17_hqmq_bin.zip", + "md5_checksum": "d75d0006d0009e7e14f2ad8044a3cbfb", + "id": "nmdc:d75d0006d0009e7e14f2ad8044a3cbfb", + "file_size_bytes": 182 + }, + { + "name": "Gp0127646_metabat2 bins", + "description": "metabat2 bins for Gp0127646", + "url": "https://data.microbiomedata.org/data/nmdc:mga0dm4q17/MAGs/nmdc_mga0dm4q17_metabat_bin.zip", + "md5_checksum": "17e9a7763327f2b5d3f841079c2f68d8", + "id": "nmdc:17e9a7763327f2b5d3f841079c2f68d8", + "file_size_bytes": 82006 + } + ] + } + ] + }, + { + "_id": { + "$oid": "649b009773e8249959349b5b" + }, + "id": "nmdc:omprc-11-nhf5m035", + "name": "Riverbed sediment microbial communities from areas with vegetation nearby in Columbia River, Washington, USA - GW-RW S1_50_60", + "description": "Riverbed sediment samples were collected from an area with a lot of surrounding vegetation in a hyporheic zone (subsurface groundwater - surface water mixing zone)", + "has_input": [ + "nmdc:bsm-11-b7nrtg75" + ], + "has_output": [ + "jgi:574fe0ac7ded5e3df1ee1491" + ], + "part_of": [ + "nmdc:sty-11-aygzgv51" + ], + "add_date": "2016-01-11", + "mod_date": "2021-06-15", + "ncbi_project_name": "Riverbed sediment microbial communities from areas with vegetation nearby in Columbia River, Washington, USA - GW-RW S1_50_60", + "omics_type": { + "has_raw_value": "Metagenome" + }, + "principal_investigator": { + "has_raw_value": "James Stegen" + }, + "processing_institution": "JGI", + "type": "nmdc:OmicsProcessing", + "gold_sequencing_project_identifiers": [ + "gold:Gp0127648" + ], + "downstream_workflow_activity_records": [ + { + "_id": { + "$oid": "649b009d6bdd4fd20273c870" + }, + "has_input": [ + "nmdc:22bf7ba401619da2a191e7b30544a8ac" + ], + "part_of": [ + "nmdc:mga0andh11" + ], + "git_url": "https://github.com/microbiomedata/metaG/releases/tag/0.1", + "has_output": [ + "nmdc:fcc3a92dd2b6ab6045f4be27da6f2cdd", + "nmdc:2208c88cac6b941799d4492dbf5f0887" + ], + "was_informed_by": "gold:Gp0127648", + "input_read_count": 28064750, + "output_read_bases": 3953713958, + "id": "nmdc:cb02cfd1451743d94381a247cf0a9d65", + "execution_resource": "NERSC-Cori", + "input_read_bases": 4237777250, + "name": "Read QC Activity for nmdc:mga0andh11", + "output_read_count": 26438892, + "started_at_time": "2021-10-11T02:23:29Z", + "type": "nmdc:ReadQCAnalysisActivity", + "ended_at_time": "2021-10-11T04:13:04+00:00", + "output_data_objects": [ + { + "name": "Gp0127648_Filtered Reads", + "description": "Filtered Reads for Gp0127648", + "data_object_type": "Filtered Sequencing Reads", + "url": "https://data.microbiomedata.org/data/nmdc:mga0andh11/qa/nmdc_mga0andh11_filtered.fastq.gz", + "md5_checksum": "fcc3a92dd2b6ab6045f4be27da6f2cdd", + "id": "nmdc:fcc3a92dd2b6ab6045f4be27da6f2cdd", + "file_size_bytes": 2191252492 + }, + { + "name": "Gp0127648_Filtered Stats", + "description": "Filtered Stats for Gp0127648", + "data_object_type": "QC Statistics", + "url": "https://data.microbiomedata.org/data/nmdc:mga0andh11/qa/nmdc_mga0andh11_filterStats.txt", + "md5_checksum": "2208c88cac6b941799d4492dbf5f0887", + "id": "nmdc:2208c88cac6b941799d4492dbf5f0887", + "file_size_bytes": 289 + } + ] + }, + { + "_id": { + "$oid": "649b009bff710ae353f8cf39" + }, + "has_input": [ + "nmdc:fcc3a92dd2b6ab6045f4be27da6f2cdd" + ], + "git_url": "https://github.com/microbiomedata/metaG/releases/tag/0.1", + "has_output": [ + "nmdc:5e64b9ccf92f0c974c51bd8393dea50c", + "nmdc:1357df297d8d8a872b335e0c3222d102", + "nmdc:5b510e336e60b6120b43e9b6420a074e", + "nmdc:33bf814280051c220e0c4a06f7935728", + "nmdc:e77a1d052b0d2a99e0a1df3b3c038f7c", + "nmdc:0efb0ad19234056d7e2e3726dead3622", + "nmdc:222bac312efdd6c86d2475ad224b7907", + "nmdc:baaca868b1fed932b463e489708dd741", + "nmdc:b549d169e5b0693152555373a6d8ee75" + ], + "was_informed_by": "gold:Gp0127648", + "id": "nmdc:cb02cfd1451743d94381a247cf0a9d65", + "execution_resource": "NERSC-Cori", + "name": "ReadBased Analysis Activity for nmdc:mga0andh11", + "started_at_time": "2021-10-11T02:23:29Z", + "type": "nmdc:ReadbasedAnalysis", + "ended_at_time": "2021-10-11T04:13:04+00:00", + "output_data_objects": [ + { + "name": "Gp0127648_Gottcha2 TSV report", + "description": "Gottcha2 TSV report for Gp0127648", + "url": "https://data.microbiomedata.org/data/nmdc:mga0andh11/ReadbasedAnalysis/nmdc_mga0andh11_gottcha2_report.tsv", + "md5_checksum": "5e64b9ccf92f0c974c51bd8393dea50c", + "id": "nmdc:5e64b9ccf92f0c974c51bd8393dea50c", + "file_size_bytes": 3323 + }, + { + "name": "Gp0127648_Gottcha2 full TSV report", + "description": "Gottcha2 full TSV report for Gp0127648", + "url": "https://data.microbiomedata.org/data/nmdc:mga0andh11/ReadbasedAnalysis/nmdc_mga0andh11_gottcha2_report_full.tsv", + "md5_checksum": "1357df297d8d8a872b335e0c3222d102", + "id": "nmdc:1357df297d8d8a872b335e0c3222d102", + "file_size_bytes": 782039 + }, + { + "name": "Gp0127648_Gottcha2 Krona HTML report", + "description": "Gottcha2 Krona HTML report for Gp0127648", + "data_object_type": "GOTTCHA2 Krona Plot", + "url": "https://data.microbiomedata.org/data/nmdc:mga0andh11/ReadbasedAnalysis/nmdc_mga0andh11_gottcha2_krona.html", + "md5_checksum": "5b510e336e60b6120b43e9b6420a074e", + "id": "nmdc:5b510e336e60b6120b43e9b6420a074e", + "file_size_bytes": 236971 + }, + { + "name": "Gp0127648_Centrifuge classification TSV report", + "description": "Centrifuge classification TSV report for Gp0127648", + "data_object_type": "Centrifuge Taxonomic Classification", + "url": "https://data.microbiomedata.org/data/nmdc:mga0andh11/ReadbasedAnalysis/nmdc_mga0andh11_centrifuge_classification.tsv", + "md5_checksum": "33bf814280051c220e0c4a06f7935728", + "id": "nmdc:33bf814280051c220e0c4a06f7935728", + "file_size_bytes": 1945479328 + }, + { + "name": "Gp0127648_Centrifuge TSV report", + "description": "Centrifuge TSV report for Gp0127648", + "data_object_type": "Centrifuge Classification Report", + "url": "https://data.microbiomedata.org/data/nmdc:mga0andh11/ReadbasedAnalysis/nmdc_mga0andh11_centrifuge_report.tsv", + "md5_checksum": "e77a1d052b0d2a99e0a1df3b3c038f7c", + "id": "nmdc:e77a1d052b0d2a99e0a1df3b3c038f7c", + "file_size_bytes": 255338 + }, + { + "name": "Gp0127648_Centrifuge Krona HTML report", + "description": "Centrifuge Krona HTML report for Gp0127648", + "data_object_type": "Centrifuge Krona Plot", + "url": "https://data.microbiomedata.org/data/nmdc:mga0andh11/ReadbasedAnalysis/nmdc_mga0andh11_centrifuge_krona.html", + "md5_checksum": "0efb0ad19234056d7e2e3726dead3622", + "id": "nmdc:0efb0ad19234056d7e2e3726dead3622", + "file_size_bytes": 2333371 + }, + { + "name": "Gp0127648_Kraken classification TSV report", + "description": "Kraken classification TSV report for Gp0127648", + "data_object_type": "Kraken2 Taxonomic Classification", + "url": "https://data.microbiomedata.org/data/nmdc:mga0andh11/ReadbasedAnalysis/nmdc_mga0andh11_kraken2_classification.tsv", + "md5_checksum": "222bac312efdd6c86d2475ad224b7907", + "id": "nmdc:222bac312efdd6c86d2475ad224b7907", + "file_size_bytes": 1562011343 + }, + { + "name": "Gp0127648_Kraken2 TSV report", + "description": "Kraken2 TSV report for Gp0127648", + "data_object_type": "Kraken2 Classification Report", + "url": "https://data.microbiomedata.org/data/nmdc:mga0andh11/ReadbasedAnalysis/nmdc_mga0andh11_kraken2_report.tsv", + "md5_checksum": "baaca868b1fed932b463e489708dd741", + "id": "nmdc:baaca868b1fed932b463e489708dd741", + "file_size_bytes": 647859 + }, + { + "name": "Gp0127648_Kraken2 Krona HTML report", + "description": "Kraken2 Krona HTML report for Gp0127648", + "data_object_type": "Kraken2 Krona Plot", + "url": "https://data.microbiomedata.org/data/nmdc:mga0andh11/ReadbasedAnalysis/nmdc_mga0andh11_kraken2_krona.html", + "md5_checksum": "b549d169e5b0693152555373a6d8ee75", + "id": "nmdc:b549d169e5b0693152555373a6d8ee75", + "file_size_bytes": 3952548 + } + ] + }, + { + "_id": { + "$oid": "61e7199b833bcf838a700dd2" + }, + "has_input": [ + "nmdc:fcc3a92dd2b6ab6045f4be27da6f2cdd" + ], + "part_of": [ + "nmdc:mga0andh11" + ], + "git_url": "https://github.com/microbiomedata/metaG/releases/tag/0.1", + "has_output": [ + "nmdc:5e64b9ccf92f0c974c51bd8393dea50c", + "nmdc:1357df297d8d8a872b335e0c3222d102", + "nmdc:5b510e336e60b6120b43e9b6420a074e", + "nmdc:33bf814280051c220e0c4a06f7935728", + "nmdc:e77a1d052b0d2a99e0a1df3b3c038f7c", + "nmdc:0efb0ad19234056d7e2e3726dead3622", + "nmdc:222bac312efdd6c86d2475ad224b7907", + "nmdc:baaca868b1fed932b463e489708dd741", + "nmdc:b549d169e5b0693152555373a6d8ee75" + ], + "was_informed_by": "gold:Gp0127648", + "id": "nmdc:cb02cfd1451743d94381a247cf0a9d65", + "execution_resource": "NERSC-Cori", + "name": "ReadBased Analysis Activity for nmdc:mga0andh11", + "started_at_time": "2021-10-11T02:23:29Z", + "type": "nmdc:ReadbasedAnalysis", + "ended_at_time": "2021-10-11T04:13:04+00:00", + "output_data_objects": [ + { + "name": "Gp0127648_Gottcha2 TSV report", + "description": "Gottcha2 TSV report for Gp0127648", + "url": "https://data.microbiomedata.org/data/nmdc:mga0andh11/ReadbasedAnalysis/nmdc_mga0andh11_gottcha2_report.tsv", + "md5_checksum": "5e64b9ccf92f0c974c51bd8393dea50c", + "id": "nmdc:5e64b9ccf92f0c974c51bd8393dea50c", + "file_size_bytes": 3323 + }, + { + "name": "Gp0127648_Gottcha2 full TSV report", + "description": "Gottcha2 full TSV report for Gp0127648", + "url": "https://data.microbiomedata.org/data/nmdc:mga0andh11/ReadbasedAnalysis/nmdc_mga0andh11_gottcha2_report_full.tsv", + "md5_checksum": "1357df297d8d8a872b335e0c3222d102", + "id": "nmdc:1357df297d8d8a872b335e0c3222d102", + "file_size_bytes": 782039 + }, + { + "name": "Gp0127648_Gottcha2 Krona HTML report", + "description": "Gottcha2 Krona HTML report for Gp0127648", + "data_object_type": "GOTTCHA2 Krona Plot", + "url": "https://data.microbiomedata.org/data/nmdc:mga0andh11/ReadbasedAnalysis/nmdc_mga0andh11_gottcha2_krona.html", + "md5_checksum": "5b510e336e60b6120b43e9b6420a074e", + "id": "nmdc:5b510e336e60b6120b43e9b6420a074e", + "file_size_bytes": 236971 + }, + { + "name": "Gp0127648_Centrifuge classification TSV report", + "description": "Centrifuge classification TSV report for Gp0127648", + "data_object_type": "Centrifuge Taxonomic Classification", + "url": "https://data.microbiomedata.org/data/nmdc:mga0andh11/ReadbasedAnalysis/nmdc_mga0andh11_centrifuge_classification.tsv", + "md5_checksum": "33bf814280051c220e0c4a06f7935728", + "id": "nmdc:33bf814280051c220e0c4a06f7935728", + "file_size_bytes": 1945479328 + }, + { + "name": "Gp0127648_Centrifuge TSV report", + "description": "Centrifuge TSV report for Gp0127648", + "data_object_type": "Centrifuge Classification Report", + "url": "https://data.microbiomedata.org/data/nmdc:mga0andh11/ReadbasedAnalysis/nmdc_mga0andh11_centrifuge_report.tsv", + "md5_checksum": "e77a1d052b0d2a99e0a1df3b3c038f7c", + "id": "nmdc:e77a1d052b0d2a99e0a1df3b3c038f7c", + "file_size_bytes": 255338 + }, + { + "name": "Gp0127648_Centrifuge Krona HTML report", + "description": "Centrifuge Krona HTML report for Gp0127648", + "data_object_type": "Centrifuge Krona Plot", + "url": "https://data.microbiomedata.org/data/nmdc:mga0andh11/ReadbasedAnalysis/nmdc_mga0andh11_centrifuge_krona.html", + "md5_checksum": "0efb0ad19234056d7e2e3726dead3622", + "id": "nmdc:0efb0ad19234056d7e2e3726dead3622", + "file_size_bytes": 2333371 + }, + { + "name": "Gp0127648_Kraken classification TSV report", + "description": "Kraken classification TSV report for Gp0127648", + "data_object_type": "Kraken2 Taxonomic Classification", + "url": "https://data.microbiomedata.org/data/nmdc:mga0andh11/ReadbasedAnalysis/nmdc_mga0andh11_kraken2_classification.tsv", + "md5_checksum": "222bac312efdd6c86d2475ad224b7907", + "id": "nmdc:222bac312efdd6c86d2475ad224b7907", + "file_size_bytes": 1562011343 + }, + { + "name": "Gp0127648_Kraken2 TSV report", + "description": "Kraken2 TSV report for Gp0127648", + "data_object_type": "Kraken2 Classification Report", + "url": "https://data.microbiomedata.org/data/nmdc:mga0andh11/ReadbasedAnalysis/nmdc_mga0andh11_kraken2_report.tsv", + "md5_checksum": "baaca868b1fed932b463e489708dd741", + "id": "nmdc:baaca868b1fed932b463e489708dd741", + "file_size_bytes": 647859 + }, + { + "name": "Gp0127648_Kraken2 Krona HTML report", + "description": "Kraken2 Krona HTML report for Gp0127648", + "data_object_type": "Kraken2 Krona Plot", + "url": "https://data.microbiomedata.org/data/nmdc:mga0andh11/ReadbasedAnalysis/nmdc_mga0andh11_kraken2_krona.html", + "md5_checksum": "b549d169e5b0693152555373a6d8ee75", + "id": "nmdc:b549d169e5b0693152555373a6d8ee75", + "file_size_bytes": 3952548 + } + ] + }, + { + "_id": { + "$oid": "649b005f2ca5ee4adb139fa5" + }, + "has_input": [ + "nmdc:fcc3a92dd2b6ab6045f4be27da6f2cdd" + ], + "part_of": [ + "nmdc:mga0andh11" + ], + "ctg_logsum": 91193, + "scaf_logsum": 91521, + "gap_pct": 0.0011, + "git_url": "https://github.com/microbiomedata/metaG/releases/tag/0.1", + "has_output": [ + "nmdc:ca10f7bae0565946414188c9121ee338", + "nmdc:cf23062373806986b70244b1fabbd17b", + "nmdc:99b2c3c91b299b9426cca9dfb10b0cea", + "nmdc:303d7282e6f91afaa9564c65107d4086", + "nmdc:4a6ffadb01b62dd73278429808c1a39a" + ], + "asm_score": 4.996, + "was_informed_by": "gold:Gp0127648", + "ctg_powsum": 10170, + "scaf_max": 23974, + "id": "nmdc:cb02cfd1451743d94381a247cf0a9d65", + "scaf_powsum": 10208, + "execution_resource": "NERSC-Cori", + "contigs": 147340, + "name": "Assembly Activity for nmdc:mga0andh11", + "ctg_max": 23974, + "gc_std": 0.0855, + "contig_bp": 61886959, + "gc_avg": 0.61759, + "started_at_time": "2021-10-11T02:23:29Z", + "scaf_bp": 61887639, + "type": "nmdc:MetagenomeAssembly", + "scaffolds": 147272, + "ended_at_time": "2021-10-11T04:13:04+00:00", + "ctg_l50": 381, + "ctg_l90": 285, + "ctg_n50": 47493, + "ctg_n90": 126039, + "scaf_l50": 381, + "scaf_l90": 285, + "scaf_n50": 47464, + "scaf_n90": 125972, + "output_data_objects": [ + { + "name": "Gp0127648_Assembled contigs fasta", + "description": "Assembled contigs fasta for Gp0127648", + "data_object_type": "Assembly Contigs", + "url": "https://data.microbiomedata.org/data/nmdc:mga0andh11/assembly/nmdc_mga0andh11_contigs.fna", + "md5_checksum": "ca10f7bae0565946414188c9121ee338", + "id": "nmdc:ca10f7bae0565946414188c9121ee338", + "file_size_bytes": 67439267 + }, + { + "name": "Gp0127648_Assembled scaffold fasta", + "description": "Assembled scaffold fasta for Gp0127648", + "data_object_type": "Assembly Scaffolds", + "url": "https://data.microbiomedata.org/data/nmdc:mga0andh11/assembly/nmdc_mga0andh11_scaffolds.fna", + "md5_checksum": "cf23062373806986b70244b1fabbd17b", + "id": "nmdc:cf23062373806986b70244b1fabbd17b", + "file_size_bytes": 66996134 + }, + { + "name": "Gp0127648_Metagenome Contig Coverage Stats", + "description": "Metagenome Contig Coverage Stats for Gp0127648", + "url": "https://data.microbiomedata.org/data/nmdc:mga0andh11/assembly/nmdc_mga0andh11_covstats.txt", + "md5_checksum": "99b2c3c91b299b9426cca9dfb10b0cea", + "id": "nmdc:99b2c3c91b299b9426cca9dfb10b0cea", + "file_size_bytes": 11610674 + }, + { + "name": "Gp0127648_Assembled AGP file", + "description": "Assembled AGP file for Gp0127648", + "data_object_type": "Assembly AGP", + "url": "https://data.microbiomedata.org/data/nmdc:mga0andh11/assembly/nmdc_mga0andh11_assembly.agp", + "md5_checksum": "303d7282e6f91afaa9564c65107d4086", + "id": "nmdc:303d7282e6f91afaa9564c65107d4086", + "file_size_bytes": 10842402 + }, + { + "name": "Gp0127648_Metagenome Alignment BAM file", + "description": "Metagenome Alignment BAM file for Gp0127648", + "data_object_type": "Assembly Coverage BAM", + "url": "https://data.microbiomedata.org/data/nmdc:mga0andh11/assembly/nmdc_mga0andh11_pairedMapped_sorted.bam", + "md5_checksum": "4a6ffadb01b62dd73278429808c1a39a", + "id": "nmdc:4a6ffadb01b62dd73278429808c1a39a", + "file_size_bytes": 2362185094 + } + ] + }, + { + "_id": { + "$oid": "649b005bbf2caae0415ef9b8" + }, + "has_input": [ + "nmdc:ca10f7bae0565946414188c9121ee338" + ], + "part_of": [ + "nmdc:mga0andh11" + ], + "git_url": "https://github.com/microbiomedata/metaG/releases/tag/0.1", + "has_output": [ + "nmdc:c4a719f3a899f7aa760f627f7b1ae6e7", + "nmdc:80ab4116b1cdfbc3e4c4d06e5990d735", + "nmdc:600011ab7e39465d3f9f28d5d93a4248", + "nmdc:0c8d98b369900cd19da39235e3eae6db", + "nmdc:16c37f8c4f74e7e81b7900536da55e39", + "nmdc:a7fc228cd8d224bbf2843ba6a6648480", + "nmdc:a57d9d86c20cfd13ddc56027110485ba", + "nmdc:6a4be27e2e7454941b73aa843471f25d", + "nmdc:be3d2a77be3ccd810d679f03204f8bac", + "nmdc:eb5ac02ce17f687c5ccf5a64548c559e", + "nmdc:81ff9f257ffe63ca5d04db9e767620b1", + "nmdc:8768f37ff001a86a25ae34c7deee9d9a" + ], + "was_informed_by": "gold:Gp0127648", + "id": "nmdc:cb02cfd1451743d94381a247cf0a9d65", + "execution_resource": "NERSC-Cori", + "name": "Annotation Activity for nmdc:mga0andh11", + "started_at_time": "2021-10-11T02:23:29Z", + "type": "nmdc:MetagenomeAnnotationActivity", + "ended_at_time": "2021-10-11T04:13:04+00:00", + "output_data_objects": [ + { + "name": "Gp0127648_Protein FAA", + "description": "Protein FAA for Gp0127648", + "data_object_type": "Annotation Amino Acid FASTA", + "url": "https://data.microbiomedata.org/data/nmdc:mga0andh11/annotation/nmdc_mga0andh11_proteins.faa", + "md5_checksum": "c4a719f3a899f7aa760f627f7b1ae6e7", + "id": "nmdc:c4a719f3a899f7aa760f627f7b1ae6e7", + "file_size_bytes": 40118426 + }, + { + "name": "Gp0127648_Structural annotation GFF file", + "description": "Structural annotation GFF file for Gp0127648", + "data_object_type": "Structural Annotation GFF", + "url": "https://data.microbiomedata.org/data/nmdc:mga0andh11/annotation/nmdc_mga0andh11_structural_annotation.gff", + "md5_checksum": "80ab4116b1cdfbc3e4c4d06e5990d735", + "id": "nmdc:80ab4116b1cdfbc3e4c4d06e5990d735", + "file_size_bytes": 2507 + }, + { + "name": "Gp0127648_Functional annotation GFF file", + "description": "Functional annotation GFF file for Gp0127648", + "data_object_type": "Functional Annotation GFF", + "url": "https://data.microbiomedata.org/data/nmdc:mga0andh11/annotation/nmdc_mga0andh11_functional_annotation.gff", + "md5_checksum": "600011ab7e39465d3f9f28d5d93a4248", + "id": "nmdc:600011ab7e39465d3f9f28d5d93a4248", + "file_size_bytes": 47178055 + }, + { + "name": "Gp0127648_KO TSV file", + "description": "KO TSV file for Gp0127648", + "data_object_type": "Annotation KEGG Orthology", + "url": "https://data.microbiomedata.org/data/nmdc:mga0andh11/annotation/nmdc_mga0andh11_ko.tsv", + "md5_checksum": "0c8d98b369900cd19da39235e3eae6db", + "id": "nmdc:0c8d98b369900cd19da39235e3eae6db", + "file_size_bytes": 5498487 + }, + { + "name": "Gp0127648_EC TSV file", + "description": "EC TSV file for Gp0127648", + "data_object_type": "Annotation Enzyme Commission", + "url": "https://data.microbiomedata.org/data/nmdc:mga0andh11/annotation/nmdc_mga0andh11_ec.tsv", + "md5_checksum": "16c37f8c4f74e7e81b7900536da55e39", + "id": "nmdc:16c37f8c4f74e7e81b7900536da55e39", + "file_size_bytes": 3650457 + }, + { + "name": "Gp0127648_COG GFF file", + "description": "COG GFF file for Gp0127648", + "url": "https://data.microbiomedata.org/data/nmdc:mga0andh11/annotation/nmdc_mga0andh11_cog.gff", + "md5_checksum": "a7fc228cd8d224bbf2843ba6a6648480", + "id": "nmdc:a7fc228cd8d224bbf2843ba6a6648480", + "file_size_bytes": 27226505 + }, + { + "name": "Gp0127648_PFAM GFF file", + "description": "PFAM GFF file for Gp0127648", + "url": "https://data.microbiomedata.org/data/nmdc:mga0andh11/annotation/nmdc_mga0andh11_pfam.gff", + "md5_checksum": "a57d9d86c20cfd13ddc56027110485ba", + "id": "nmdc:a57d9d86c20cfd13ddc56027110485ba", + "file_size_bytes": 19896169 + }, + { + "name": "Gp0127648_TigrFam GFF file", + "description": "TigrFam GFF file for Gp0127648", + "url": "https://data.microbiomedata.org/data/nmdc:mga0andh11/annotation/nmdc_mga0andh11_tigrfam.gff", + "md5_checksum": "6a4be27e2e7454941b73aa843471f25d", + "id": "nmdc:6a4be27e2e7454941b73aa843471f25d", + "file_size_bytes": 2105656 + }, + { + "name": "Gp0127648_SMART GFF file", + "description": "SMART GFF file for Gp0127648", + "url": "https://data.microbiomedata.org/data/nmdc:mga0andh11/annotation/nmdc_mga0andh11_smart.gff", + "md5_checksum": "be3d2a77be3ccd810d679f03204f8bac", + "id": "nmdc:be3d2a77be3ccd810d679f03204f8bac", + "file_size_bytes": 6062323 + }, + { + "name": "Gp0127648_SuperFam GFF file", + "description": "SuperFam GFF file for Gp0127648", + "url": "https://data.microbiomedata.org/data/nmdc:mga0andh11/annotation/nmdc_mga0andh11_supfam.gff", + "md5_checksum": "eb5ac02ce17f687c5ccf5a64548c559e", + "id": "nmdc:eb5ac02ce17f687c5ccf5a64548c559e", + "file_size_bytes": 33896425 + }, + { + "name": "Gp0127648_Cath FunFam GFF file", + "description": "Cath FunFam GFF file for Gp0127648", + "url": "https://data.microbiomedata.org/data/nmdc:mga0andh11/annotation/nmdc_mga0andh11_cath_funfam.gff", + "md5_checksum": "81ff9f257ffe63ca5d04db9e767620b1", + "id": "nmdc:81ff9f257ffe63ca5d04db9e767620b1", + "file_size_bytes": 25515156 + }, + { + "name": "Gp0127648_KO_EC GFF file", + "description": "KO_EC GFF file for Gp0127648", + "url": "https://data.microbiomedata.org/data/nmdc:mga0andh11/annotation/nmdc_mga0andh11_ko_ec.gff", + "md5_checksum": "8768f37ff001a86a25ae34c7deee9d9a", + "id": "nmdc:8768f37ff001a86a25ae34c7deee9d9a", + "file_size_bytes": 17491444 + } + ] + }, + { + "_id": { + "$oid": "649b0052ec087f6bbab34718" + }, + "has_input": [ + "nmdc:ca10f7bae0565946414188c9121ee338", + "nmdc:4a6ffadb01b62dd73278429808c1a39a", + "nmdc:600011ab7e39465d3f9f28d5d93a4248" + ], + "too_short_contig_num": 142847, + "part_of": [ + "nmdc:mga0andh11" + ], + "binned_contig_num": 329, + "git_url": "https://github.com/microbiomedata/metaG/releases/tag/0.1", + "has_output": [ + "nmdc:d41d8cd98f00b204e9800998ecf8427e", + "nmdc:8b67e5038c55083e2aa8e19c5d05fef8", + "nmdc:fc8454a790709b36d7ca96cd99359d26", + "nmdc:942bd7c28c52e6301bf97dab0ea2852a", + "nmdc:82ebf9065be9715e1230a50bf7a02197", + "nmdc:897536007e7e3525457df5d3baddd593" + ], + "was_informed_by": "gold:Gp0127648", + "input_contig_num": 147340, + "id": "nmdc:cb02cfd1451743d94381a247cf0a9d65", + "execution_resource": "NERSC-Cori", + "name": "MAGs Analysis Activity for nmdc:mga0andh11", + "mags_list": [ + { + "number_of_contig": 255, + "completeness": 70.91, + "bin_name": "bins.1", + "gene_count": 1857, + "bin_quality": "MQ", + "gtdbtk_species": "", + "gtdbtk_order": "Nitrososphaerales", + "num_16s": 1, + "gtdbtk_family": "Nitrososphaeraceae", + "gtdbtk_domain": "Archaea", + "contamination": 1.94, + "gtdbtk_class": "Nitrososphaeria", + "gtdbtk_phylum": "Crenarchaeota", + "num_5s": 1, + "num_23s": 0, + "gtdbtk_genus": "", + "num_t_rna": 28 + }, + { + "number_of_contig": 74, + "completeness": 19.91, + "bin_name": "bins.2", + "gene_count": 380, + "bin_quality": "LQ", + "gtdbtk_species": "", + "gtdbtk_order": "", + "num_16s": 0, + "gtdbtk_family": "", + "gtdbtk_domain": "", + "contamination": 0.0, + "gtdbtk_class": "", + "gtdbtk_phylum": "", + "num_5s": 0, + "num_23s": 0, + "gtdbtk_genus": "", + "num_t_rna": 5 + } + ], + "unbinned_contig_num": 4164, + "started_at_time": "2021-10-11T02:23:29Z", + "type": "nmdc:MAGsAnalysisActivity", + "ended_at_time": "2021-10-11T04:13:04+00:00", + "output_data_objects": [ + { + "name": "gold:Gp0452679_metabat2 bin checkm quality assessment result", + "description": "metabat2 bin checkm quality assessment result for gold:Gp0452679", + "data_object_type": "CheckM Statistics", + "url": "https://data.microbiomedata.org/data/nmdc:mga0fsjy84/MAGs/nmdc_mga0fsjy84_checkm_qa.out", + "md5_checksum": "d41d8cd98f00b204e9800998ecf8427e", + "id": "nmdc:d41d8cd98f00b204e9800998ecf8427e", + "file_size_bytes": 0 + }, + { + "name": "Gp0127648_tooShort (< 3kb) filtered contigs fasta file by metaBat2", + "description": "tooShort (< 3kb) filtered contigs fasta file by metaBat2 for Gp0127648", + "url": "https://data.microbiomedata.org/data/nmdc:mga0andh11/MAGs/nmdc_mga0andh11_bins.tooShort.fa", + "md5_checksum": "8b67e5038c55083e2aa8e19c5d05fef8", + "id": "nmdc:8b67e5038c55083e2aa8e19c5d05fef8", + "file_size_bytes": 58962192 + }, + { + "name": "Gp0127648_unbinned fasta file from metabat2", + "description": "unbinned fasta file from metabat2 for Gp0127648", + "url": "https://data.microbiomedata.org/data/nmdc:mga0andh11/MAGs/nmdc_mga0andh11_bins.unbinned.fa", + "md5_checksum": "fc8454a790709b36d7ca96cd99359d26", + "id": "nmdc:fc8454a790709b36d7ca96cd99359d26", + "file_size_bytes": 6656731 + }, + { + "name": "Gp0127648_metabat2 bin checkm quality assessment result", + "description": "metabat2 bin checkm quality assessment result for Gp0127648", + "data_object_type": "CheckM Statistics", + "url": "https://data.microbiomedata.org/data/nmdc:mga0andh11/MAGs/nmdc_mga0andh11_checkm_qa.out", + "md5_checksum": "942bd7c28c52e6301bf97dab0ea2852a", + "id": "nmdc:942bd7c28c52e6301bf97dab0ea2852a", + "file_size_bytes": 930 + }, + { + "name": "Gp0127648_high-quality and medium-quality bins", + "description": "high-quality and medium-quality bins for Gp0127648", + "data_object_type": "Metagenome Bins", + "url": "https://data.microbiomedata.org/data/nmdc:mga0andh11/MAGs/nmdc_mga0andh11_hqmq_bin.zip", + "md5_checksum": "82ebf9065be9715e1230a50bf7a02197", + "id": "nmdc:82ebf9065be9715e1230a50bf7a02197", + "file_size_bytes": 466157 + }, + { + "name": "Gp0127648_metabat2 bins", + "description": "metabat2 bins for Gp0127648", + "url": "https://data.microbiomedata.org/data/nmdc:mga0andh11/MAGs/nmdc_mga0andh11_metabat_bin.zip", + "md5_checksum": "897536007e7e3525457df5d3baddd593", + "id": "nmdc:897536007e7e3525457df5d3baddd593", + "file_size_bytes": 90255 + } + ] + } + ] + }, + { + "_id": { + "$oid": "649b009773e8249959349b5c" + }, + "id": "nmdc:omprc-11-w3v30q48", + "name": "Riverbed sediment microbial communities from areas with vegetation nearby in Columbia River, Washington, USA - GW-RW S2_10_20", + "description": "Riverbed sediment samples were collected from an area with a lot of surrounding vegetation in a hyporheic zone (subsurface groundwater - surface water mixing zone)", + "has_input": [ + "nmdc:bsm-11-q44pjf87" + ], + "has_output": [ + "jgi:574fde8a7ded5e3df1ee1422" + ], + "part_of": [ + "nmdc:sty-11-aygzgv51" + ], + "add_date": "2016-01-11", + "mod_date": "2021-06-15", + "ncbi_project_name": "Riverbed sediment microbial communities from areas with vegetation nearby in Columbia River, Washington, USA - GW-RW S2_10_20", + "omics_type": { + "has_raw_value": "Metagenome" + }, + "principal_investigator": { + "has_raw_value": "James Stegen" + }, + "processing_institution": "JGI", + "type": "nmdc:OmicsProcessing", + "gold_sequencing_project_identifiers": [ + "gold:Gp0127647" + ], + "downstream_workflow_activity_records": [ + { + "_id": { + "$oid": "649b009d6bdd4fd20273c86e" + }, + "has_input": [ + "nmdc:34b881e1c01cbdc1f8dc1b1fc07e46a7" + ], + "part_of": [ + "nmdc:mga0g0e588" + ], + "git_url": "https://github.com/microbiomedata/metaG/releases/tag/0.1", + "has_output": [ + "nmdc:c082eff434fe4863c0e29c79b759d100", + "nmdc:7f204d0d1d45e77b39d9c9b2362c6b0b" + ], + "was_informed_by": "gold:Gp0127647", + "input_read_count": 24906858, + "output_read_bases": 3608754154, + "id": "nmdc:36fa8157feb62e7d176b99031e5e41a9", + "execution_resource": "NERSC-Cori", + "input_read_bases": 3760935558, + "name": "Read QC Activity for nmdc:mga0g0e588", + "output_read_count": 24128544, + "started_at_time": "2021-10-11T02:24:27Z", + "type": "nmdc:ReadQCAnalysisActivity", + "ended_at_time": "2021-10-11T03:38:33+00:00", + "output_data_objects": [ + { + "name": "Gp0127647_Filtered Reads", + "description": "Filtered Reads for Gp0127647", + "data_object_type": "Filtered Sequencing Reads", + "url": "https://data.microbiomedata.org/data/nmdc:mga0g0e588/qa/nmdc_mga0g0e588_filtered.fastq.gz", + "md5_checksum": "c082eff434fe4863c0e29c79b759d100", + "id": "nmdc:c082eff434fe4863c0e29c79b759d100", + "file_size_bytes": 2052448806 + }, + { + "name": "Gp0127647_Filtered Stats", + "description": "Filtered Stats for Gp0127647", + "data_object_type": "QC Statistics", + "url": "https://data.microbiomedata.org/data/nmdc:mga0g0e588/qa/nmdc_mga0g0e588_filterStats.txt", + "md5_checksum": "7f204d0d1d45e77b39d9c9b2362c6b0b", + "id": "nmdc:7f204d0d1d45e77b39d9c9b2362c6b0b", + "file_size_bytes": 282 + } + ] + }, + { + "_id": { + "$oid": "649b009bff710ae353f8cf32" + }, + "has_input": [ + "nmdc:c082eff434fe4863c0e29c79b759d100" + ], + "git_url": "https://github.com/microbiomedata/metaG/releases/tag/0.1", + "has_output": [ + "nmdc:7e1438bf8076daf46f3d782d8f9656b4", + "nmdc:cfd63309cd38a293615ddce5e8ea6402", + "nmdc:7e353b7bfb1586773fa00b515dffe6ec", + "nmdc:6667be33e7867ca2aabfa5d663e2970a", + "nmdc:7ee0b0b21444ee06752e6b9c32f476af", + "nmdc:d3b27bed597f07ad4bb4a500ad2fb928", + "nmdc:45617f93e5f072fbad25a0308ead6c3d", + "nmdc:460e7594fcd06678df1b9c5e5075cb4d", + "nmdc:ab80fc324c9206a41a66d64227a97179" + ], + "was_informed_by": "gold:Gp0127647", + "id": "nmdc:36fa8157feb62e7d176b99031e5e41a9", + "execution_resource": "NERSC-Cori", + "name": "ReadBased Analysis Activity for nmdc:mga0g0e588", + "started_at_time": "2021-10-11T02:24:27Z", + "type": "nmdc:ReadbasedAnalysis", + "ended_at_time": "2021-10-11T03:38:33+00:00", + "output_data_objects": [ + { + "name": "Gp0127647_Gottcha2 TSV report", + "description": "Gottcha2 TSV report for Gp0127647", + "url": "https://data.microbiomedata.org/data/nmdc:mga0g0e588/ReadbasedAnalysis/nmdc_mga0g0e588_gottcha2_report.tsv", + "md5_checksum": "7e1438bf8076daf46f3d782d8f9656b4", + "id": "nmdc:7e1438bf8076daf46f3d782d8f9656b4", + "file_size_bytes": 4666 + }, + { + "name": "Gp0127647_Gottcha2 full TSV report", + "description": "Gottcha2 full TSV report for Gp0127647", + "url": "https://data.microbiomedata.org/data/nmdc:mga0g0e588/ReadbasedAnalysis/nmdc_mga0g0e588_gottcha2_report_full.tsv", + "md5_checksum": "cfd63309cd38a293615ddce5e8ea6402", + "id": "nmdc:cfd63309cd38a293615ddce5e8ea6402", + "file_size_bytes": 786018 + }, + { + "name": "Gp0127647_Gottcha2 Krona HTML report", + "description": "Gottcha2 Krona HTML report for Gp0127647", + "data_object_type": "GOTTCHA2 Krona Plot", + "url": "https://data.microbiomedata.org/data/nmdc:mga0g0e588/ReadbasedAnalysis/nmdc_mga0g0e588_gottcha2_krona.html", + "md5_checksum": "7e353b7bfb1586773fa00b515dffe6ec", + "id": "nmdc:7e353b7bfb1586773fa00b515dffe6ec", + "file_size_bytes": 237895 + }, + { + "name": "Gp0127647_Centrifuge classification TSV report", + "description": "Centrifuge classification TSV report for Gp0127647", + "data_object_type": "Centrifuge Taxonomic Classification", + "url": "https://data.microbiomedata.org/data/nmdc:mga0g0e588/ReadbasedAnalysis/nmdc_mga0g0e588_centrifuge_classification.tsv", + "md5_checksum": "6667be33e7867ca2aabfa5d663e2970a", + "id": "nmdc:6667be33e7867ca2aabfa5d663e2970a", + "file_size_bytes": 1767305277 + }, + { + "name": "Gp0127647_Centrifuge TSV report", + "description": "Centrifuge TSV report for Gp0127647", + "data_object_type": "Centrifuge Classification Report", + "url": "https://data.microbiomedata.org/data/nmdc:mga0g0e588/ReadbasedAnalysis/nmdc_mga0g0e588_centrifuge_report.tsv", + "md5_checksum": "7ee0b0b21444ee06752e6b9c32f476af", + "id": "nmdc:7ee0b0b21444ee06752e6b9c32f476af", + "file_size_bytes": 254858 + }, + { + "name": "Gp0127647_Centrifuge Krona HTML report", + "description": "Centrifuge Krona HTML report for Gp0127647", + "data_object_type": "Centrifuge Krona Plot", + "url": "https://data.microbiomedata.org/data/nmdc:mga0g0e588/ReadbasedAnalysis/nmdc_mga0g0e588_centrifuge_krona.html", + "md5_checksum": "d3b27bed597f07ad4bb4a500ad2fb928", + "id": "nmdc:d3b27bed597f07ad4bb4a500ad2fb928", + "file_size_bytes": 2332396 + }, + { + "name": "Gp0127647_Kraken classification TSV report", + "description": "Kraken classification TSV report for Gp0127647", + "data_object_type": "Kraken2 Taxonomic Classification", + "url": "https://data.microbiomedata.org/data/nmdc:mga0g0e588/ReadbasedAnalysis/nmdc_mga0g0e588_kraken2_classification.tsv", + "md5_checksum": "45617f93e5f072fbad25a0308ead6c3d", + "id": "nmdc:45617f93e5f072fbad25a0308ead6c3d", + "file_size_bytes": 1419938277 + }, + { + "name": "Gp0127647_Kraken2 TSV report", + "description": "Kraken2 TSV report for Gp0127647", + "data_object_type": "Kraken2 Classification Report", + "url": "https://data.microbiomedata.org/data/nmdc:mga0g0e588/ReadbasedAnalysis/nmdc_mga0g0e588_kraken2_report.tsv", + "md5_checksum": "460e7594fcd06678df1b9c5e5075cb4d", + "id": "nmdc:460e7594fcd06678df1b9c5e5075cb4d", + "file_size_bytes": 661837 + }, + { + "name": "Gp0127647_Kraken2 Krona HTML report", + "description": "Kraken2 Krona HTML report for Gp0127647", + "data_object_type": "Kraken2 Krona Plot", + "url": "https://data.microbiomedata.org/data/nmdc:mga0g0e588/ReadbasedAnalysis/nmdc_mga0g0e588_kraken2_krona.html", + "md5_checksum": "ab80fc324c9206a41a66d64227a97179", + "id": "nmdc:ab80fc324c9206a41a66d64227a97179", + "file_size_bytes": 4028822 + } + ] + }, + { + "_id": { + "$oid": "61e7197e833bcf838a700a51" + }, + "has_input": [ + "nmdc:c082eff434fe4863c0e29c79b759d100" + ], + "part_of": [ + "nmdc:mga0g0e588" + ], + "git_url": "https://github.com/microbiomedata/metaG/releases/tag/0.1", + "has_output": [ + "nmdc:7e1438bf8076daf46f3d782d8f9656b4", + "nmdc:cfd63309cd38a293615ddce5e8ea6402", + "nmdc:7e353b7bfb1586773fa00b515dffe6ec", + "nmdc:6667be33e7867ca2aabfa5d663e2970a", + "nmdc:7ee0b0b21444ee06752e6b9c32f476af", + "nmdc:d3b27bed597f07ad4bb4a500ad2fb928", + "nmdc:45617f93e5f072fbad25a0308ead6c3d", + "nmdc:460e7594fcd06678df1b9c5e5075cb4d", + "nmdc:ab80fc324c9206a41a66d64227a97179" + ], + "was_informed_by": "gold:Gp0127647", + "id": "nmdc:36fa8157feb62e7d176b99031e5e41a9", + "execution_resource": "NERSC-Cori", + "name": "ReadBased Analysis Activity for nmdc:mga0g0e588", + "started_at_time": "2021-10-11T02:24:27Z", + "type": "nmdc:ReadbasedAnalysis", + "ended_at_time": "2021-10-11T03:38:33+00:00", + "output_data_objects": [ + { + "name": "Gp0127647_Gottcha2 TSV report", + "description": "Gottcha2 TSV report for Gp0127647", + "url": "https://data.microbiomedata.org/data/nmdc:mga0g0e588/ReadbasedAnalysis/nmdc_mga0g0e588_gottcha2_report.tsv", + "md5_checksum": "7e1438bf8076daf46f3d782d8f9656b4", + "id": "nmdc:7e1438bf8076daf46f3d782d8f9656b4", + "file_size_bytes": 4666 + }, + { + "name": "Gp0127647_Gottcha2 full TSV report", + "description": "Gottcha2 full TSV report for Gp0127647", + "url": "https://data.microbiomedata.org/data/nmdc:mga0g0e588/ReadbasedAnalysis/nmdc_mga0g0e588_gottcha2_report_full.tsv", + "md5_checksum": "cfd63309cd38a293615ddce5e8ea6402", + "id": "nmdc:cfd63309cd38a293615ddce5e8ea6402", + "file_size_bytes": 786018 + }, + { + "name": "Gp0127647_Gottcha2 Krona HTML report", + "description": "Gottcha2 Krona HTML report for Gp0127647", + "data_object_type": "GOTTCHA2 Krona Plot", + "url": "https://data.microbiomedata.org/data/nmdc:mga0g0e588/ReadbasedAnalysis/nmdc_mga0g0e588_gottcha2_krona.html", + "md5_checksum": "7e353b7bfb1586773fa00b515dffe6ec", + "id": "nmdc:7e353b7bfb1586773fa00b515dffe6ec", + "file_size_bytes": 237895 + }, + { + "name": "Gp0127647_Centrifuge classification TSV report", + "description": "Centrifuge classification TSV report for Gp0127647", + "data_object_type": "Centrifuge Taxonomic Classification", + "url": "https://data.microbiomedata.org/data/nmdc:mga0g0e588/ReadbasedAnalysis/nmdc_mga0g0e588_centrifuge_classification.tsv", + "md5_checksum": "6667be33e7867ca2aabfa5d663e2970a", + "id": "nmdc:6667be33e7867ca2aabfa5d663e2970a", + "file_size_bytes": 1767305277 + }, + { + "name": "Gp0127647_Centrifuge TSV report", + "description": "Centrifuge TSV report for Gp0127647", + "data_object_type": "Centrifuge Classification Report", + "url": "https://data.microbiomedata.org/data/nmdc:mga0g0e588/ReadbasedAnalysis/nmdc_mga0g0e588_centrifuge_report.tsv", + "md5_checksum": "7ee0b0b21444ee06752e6b9c32f476af", + "id": "nmdc:7ee0b0b21444ee06752e6b9c32f476af", + "file_size_bytes": 254858 + }, + { + "name": "Gp0127647_Centrifuge Krona HTML report", + "description": "Centrifuge Krona HTML report for Gp0127647", + "data_object_type": "Centrifuge Krona Plot", + "url": "https://data.microbiomedata.org/data/nmdc:mga0g0e588/ReadbasedAnalysis/nmdc_mga0g0e588_centrifuge_krona.html", + "md5_checksum": "d3b27bed597f07ad4bb4a500ad2fb928", + "id": "nmdc:d3b27bed597f07ad4bb4a500ad2fb928", + "file_size_bytes": 2332396 + }, + { + "name": "Gp0127647_Kraken classification TSV report", + "description": "Kraken classification TSV report for Gp0127647", + "data_object_type": "Kraken2 Taxonomic Classification", + "url": "https://data.microbiomedata.org/data/nmdc:mga0g0e588/ReadbasedAnalysis/nmdc_mga0g0e588_kraken2_classification.tsv", + "md5_checksum": "45617f93e5f072fbad25a0308ead6c3d", + "id": "nmdc:45617f93e5f072fbad25a0308ead6c3d", + "file_size_bytes": 1419938277 + }, + { + "name": "Gp0127647_Kraken2 TSV report", + "description": "Kraken2 TSV report for Gp0127647", + "data_object_type": "Kraken2 Classification Report", + "url": "https://data.microbiomedata.org/data/nmdc:mga0g0e588/ReadbasedAnalysis/nmdc_mga0g0e588_kraken2_report.tsv", + "md5_checksum": "460e7594fcd06678df1b9c5e5075cb4d", + "id": "nmdc:460e7594fcd06678df1b9c5e5075cb4d", + "file_size_bytes": 661837 + }, + { + "name": "Gp0127647_Kraken2 Krona HTML report", + "description": "Kraken2 Krona HTML report for Gp0127647", + "data_object_type": "Kraken2 Krona Plot", + "url": "https://data.microbiomedata.org/data/nmdc:mga0g0e588/ReadbasedAnalysis/nmdc_mga0g0e588_kraken2_krona.html", + "md5_checksum": "ab80fc324c9206a41a66d64227a97179", + "id": "nmdc:ab80fc324c9206a41a66d64227a97179", + "file_size_bytes": 4028822 + } + ] + }, + { + "_id": { + "$oid": "649b005f2ca5ee4adb139fa0" + }, + "has_input": [ + "nmdc:c082eff434fe4863c0e29c79b759d100" + ], + "part_of": [ + "nmdc:mga0g0e588" + ], + "ctg_logsum": 37666, + "scaf_logsum": 37899, + "gap_pct": 0.00092, + "git_url": "https://github.com/microbiomedata/metaG/releases/tag/0.1", + "has_output": [ + "nmdc:05952c056a6db782ba77c6369206838a", + "nmdc:6fa8f2d4236fda4f628436ed85094e3b", + "nmdc:82be5b6248eb4b0bfef1c9afa5c5c0bc", + "nmdc:fee22437c76dc343846f41e1be538b9d", + "nmdc:7fc9fd7844b6ce48869a0ad5216da4dc" + ], + "asm_score": 14.664, + "was_informed_by": "gold:Gp0127647", + "ctg_powsum": 4336.355, + "scaf_max": 96788, + "id": "nmdc:36fa8157feb62e7d176b99031e5e41a9", + "scaf_powsum": 4362.772, + "execution_resource": "NERSC-Cori", + "contigs": 97351, + "name": "Assembly Activity for nmdc:mga0g0e588", + "ctg_max": 96788, + "gc_std": 0.13435, + "contig_bp": 38110297, + "gc_avg": 0.5552, + "started_at_time": "2021-10-11T02:24:27Z", + "scaf_bp": 38110647, + "type": "nmdc:MetagenomeAssembly", + "scaffolds": 97316, + "ended_at_time": "2021-10-11T03:38:33+00:00", + "ctg_l50": 353, + "ctg_l90": 283, + "ctg_n50": 34144, + "ctg_n90": 85387, + "scaf_l50": 353, + "scaf_l90": 283, + "scaf_n50": 34125, + "scaf_n90": 85353, + "scaf_l_gt50k": 153917, + "scaf_n_gt50k": 2, + "scaf_pct_gt50k": 0.40386876, + "output_data_objects": [ + { + "name": "Gp0127647_Assembled contigs fasta", + "description": "Assembled contigs fasta for Gp0127647", + "data_object_type": "Assembly Contigs", + "url": "https://data.microbiomedata.org/data/nmdc:mga0g0e588/assembly/nmdc_mga0g0e588_contigs.fna", + "md5_checksum": "05952c056a6db782ba77c6369206838a", + "id": "nmdc:05952c056a6db782ba77c6369206838a", + "file_size_bytes": 41696500 + }, + { + "name": "Gp0127647_Assembled scaffold fasta", + "description": "Assembled scaffold fasta for Gp0127647", + "data_object_type": "Assembly Scaffolds", + "url": "https://data.microbiomedata.org/data/nmdc:mga0g0e588/assembly/nmdc_mga0g0e588_scaffolds.fna", + "md5_checksum": "6fa8f2d4236fda4f628436ed85094e3b", + "id": "nmdc:6fa8f2d4236fda4f628436ed85094e3b", + "file_size_bytes": 41403892 + }, + { + "name": "Gp0127647_Metagenome Contig Coverage Stats", + "description": "Metagenome Contig Coverage Stats for Gp0127647", + "url": "https://data.microbiomedata.org/data/nmdc:mga0g0e588/assembly/nmdc_mga0g0e588_covstats.txt", + "md5_checksum": "82be5b6248eb4b0bfef1c9afa5c5c0bc", + "id": "nmdc:82be5b6248eb4b0bfef1c9afa5c5c0bc", + "file_size_bytes": 7629542 + }, + { + "name": "Gp0127647_Assembled AGP file", + "description": "Assembled AGP file for Gp0127647", + "data_object_type": "Assembly AGP", + "url": "https://data.microbiomedata.org/data/nmdc:mga0g0e588/assembly/nmdc_mga0g0e588_assembly.agp", + "md5_checksum": "fee22437c76dc343846f41e1be538b9d", + "id": "nmdc:fee22437c76dc343846f41e1be538b9d", + "file_size_bytes": 7091204 + }, + { + "name": "Gp0127647_Metagenome Alignment BAM file", + "description": "Metagenome Alignment BAM file for Gp0127647", + "data_object_type": "Assembly Coverage BAM", + "url": "https://data.microbiomedata.org/data/nmdc:mga0g0e588/assembly/nmdc_mga0g0e588_pairedMapped_sorted.bam", + "md5_checksum": "7fc9fd7844b6ce48869a0ad5216da4dc", + "id": "nmdc:7fc9fd7844b6ce48869a0ad5216da4dc", + "file_size_bytes": 2190560397 + } + ] + }, + { + "_id": { + "$oid": "649b005bbf2caae0415ef9b5" + }, + "has_input": [ + "nmdc:05952c056a6db782ba77c6369206838a" + ], + "part_of": [ + "nmdc:mga0g0e588" + ], + "git_url": "https://github.com/microbiomedata/metaG/releases/tag/0.1", + "has_output": [ + "nmdc:b95b8538748c921fac6c93ba55d43e2c", + "nmdc:9c63632766a4946bc76829a7dafe49c0", + "nmdc:0c5e791c8170181aa3e43d710e7c55eb", + "nmdc:358cb8682dd2d5c1b7a691e9f7734acc", + "nmdc:d770a8c872a3a359bf3482e564c56988", + "nmdc:cdecaf6cff3fc2d559cc3313599b137b", + "nmdc:7dedc14d5645ae32f913d8f823ba5aa3", + "nmdc:809e6d246bd10968d4da074db08216d9", + "nmdc:546d11411d30ab337a215d0094fc36b6", + "nmdc:6eb654de91a99eb4e01e1bf9513a6208", + "nmdc:a8ae7ed318e7c170aeed508f331ce5b2", + "nmdc:455f95c7c15739b2fddc6f62b03253ed" + ], + "was_informed_by": "gold:Gp0127647", + "id": "nmdc:36fa8157feb62e7d176b99031e5e41a9", + "execution_resource": "NERSC-Cori", + "name": "Annotation Activity for nmdc:mga0g0e588", + "started_at_time": "2021-10-11T02:24:27Z", + "type": "nmdc:MetagenomeAnnotationActivity", + "ended_at_time": "2021-10-11T03:38:33+00:00", + "output_data_objects": [ + { + "name": "Gp0127647_Protein FAA", + "description": "Protein FAA for Gp0127647", + "data_object_type": "Annotation Amino Acid FASTA", + "url": "https://data.microbiomedata.org/data/nmdc:mga0g0e588/annotation/nmdc_mga0g0e588_proteins.faa", + "md5_checksum": "b95b8538748c921fac6c93ba55d43e2c", + "id": "nmdc:b95b8538748c921fac6c93ba55d43e2c", + "file_size_bytes": 23580407 + }, + { + "name": "Gp0127647_Structural annotation GFF file", + "description": "Structural annotation GFF file for Gp0127647", + "data_object_type": "Structural Annotation GFF", + "url": "https://data.microbiomedata.org/data/nmdc:mga0g0e588/annotation/nmdc_mga0g0e588_structural_annotation.gff", + "md5_checksum": "9c63632766a4946bc76829a7dafe49c0", + "id": "nmdc:9c63632766a4946bc76829a7dafe49c0", + "file_size_bytes": 2925 + }, + { + "name": "Gp0127647_Functional annotation GFF file", + "description": "Functional annotation GFF file for Gp0127647", + "data_object_type": "Functional Annotation GFF", + "url": "https://data.microbiomedata.org/data/nmdc:mga0g0e588/annotation/nmdc_mga0g0e588_functional_annotation.gff", + "md5_checksum": "0c5e791c8170181aa3e43d710e7c55eb", + "id": "nmdc:0c5e791c8170181aa3e43d710e7c55eb", + "file_size_bytes": 28355659 + }, + { + "name": "Gp0127647_KO TSV file", + "description": "KO TSV file for Gp0127647", + "data_object_type": "Annotation KEGG Orthology", + "url": "https://data.microbiomedata.org/data/nmdc:mga0g0e588/annotation/nmdc_mga0g0e588_ko.tsv", + "md5_checksum": "358cb8682dd2d5c1b7a691e9f7734acc", + "id": "nmdc:358cb8682dd2d5c1b7a691e9f7734acc", + "file_size_bytes": 3251676 + }, + { + "name": "Gp0127647_EC TSV file", + "description": "EC TSV file for Gp0127647", + "data_object_type": "Annotation Enzyme Commission", + "url": "https://data.microbiomedata.org/data/nmdc:mga0g0e588/annotation/nmdc_mga0g0e588_ec.tsv", + "md5_checksum": "d770a8c872a3a359bf3482e564c56988", + "id": "nmdc:d770a8c872a3a359bf3482e564c56988", + "file_size_bytes": 2134531 + }, + { + "name": "Gp0127647_COG GFF file", + "description": "COG GFF file for Gp0127647", + "url": "https://data.microbiomedata.org/data/nmdc:mga0g0e588/annotation/nmdc_mga0g0e588_cog.gff", + "md5_checksum": "cdecaf6cff3fc2d559cc3313599b137b", + "id": "nmdc:cdecaf6cff3fc2d559cc3313599b137b", + "file_size_bytes": 15119260 + }, + { + "name": "Gp0127647_PFAM GFF file", + "description": "PFAM GFF file for Gp0127647", + "url": "https://data.microbiomedata.org/data/nmdc:mga0g0e588/annotation/nmdc_mga0g0e588_pfam.gff", + "md5_checksum": "7dedc14d5645ae32f913d8f823ba5aa3", + "id": "nmdc:7dedc14d5645ae32f913d8f823ba5aa3", + "file_size_bytes": 11013734 + }, + { + "name": "Gp0127647_TigrFam GFF file", + "description": "TigrFam GFF file for Gp0127647", + "url": "https://data.microbiomedata.org/data/nmdc:mga0g0e588/annotation/nmdc_mga0g0e588_tigrfam.gff", + "md5_checksum": "809e6d246bd10968d4da074db08216d9", + "id": "nmdc:809e6d246bd10968d4da074db08216d9", + "file_size_bytes": 1131416 + }, + { + "name": "Gp0127647_SMART GFF file", + "description": "SMART GFF file for Gp0127647", + "url": "https://data.microbiomedata.org/data/nmdc:mga0g0e588/annotation/nmdc_mga0g0e588_smart.gff", + "md5_checksum": "546d11411d30ab337a215d0094fc36b6", + "id": "nmdc:546d11411d30ab337a215d0094fc36b6", + "file_size_bytes": 3424877 + }, + { + "name": "Gp0127647_SuperFam GFF file", + "description": "SuperFam GFF file for Gp0127647", + "url": "https://data.microbiomedata.org/data/nmdc:mga0g0e588/annotation/nmdc_mga0g0e588_supfam.gff", + "md5_checksum": "6eb654de91a99eb4e01e1bf9513a6208", + "id": "nmdc:6eb654de91a99eb4e01e1bf9513a6208", + "file_size_bytes": 19463761 + }, + { + "name": "Gp0127647_Cath FunFam GFF file", + "description": "Cath FunFam GFF file for Gp0127647", + "url": "https://data.microbiomedata.org/data/nmdc:mga0g0e588/annotation/nmdc_mga0g0e588_cath_funfam.gff", + "md5_checksum": "a8ae7ed318e7c170aeed508f331ce5b2", + "id": "nmdc:a8ae7ed318e7c170aeed508f331ce5b2", + "file_size_bytes": 14536820 + }, + { + "name": "Gp0127647_KO_EC GFF file", + "description": "KO_EC GFF file for Gp0127647", + "url": "https://data.microbiomedata.org/data/nmdc:mga0g0e588/annotation/nmdc_mga0g0e588_ko_ec.gff", + "md5_checksum": "455f95c7c15739b2fddc6f62b03253ed", + "id": "nmdc:455f95c7c15739b2fddc6f62b03253ed", + "file_size_bytes": 10367039 + } + ] + }, + { + "_id": { + "$oid": "649b0052ec087f6bbab34716" + }, + "has_input": [ + "nmdc:05952c056a6db782ba77c6369206838a", + "nmdc:7fc9fd7844b6ce48869a0ad5216da4dc", + "nmdc:0c5e791c8170181aa3e43d710e7c55eb" + ], + "too_short_contig_num": 95291, + "part_of": [ + "nmdc:mga0g0e588" + ], + "binned_contig_num": 20, + "git_url": "https://github.com/microbiomedata/metaG/releases/tag/0.1", + "has_output": [ + "nmdc:d41d8cd98f00b204e9800998ecf8427e", + "nmdc:8ec4227eca7ea06fed4e866c4de4a5c9", + "nmdc:40c0cbc75e2b698572b8b94d91fdc236", + "nmdc:e8bdcd7b113a14b29a3026b73cd18c20", + "nmdc:03b448db547a556e988a0d4948dab424", + "nmdc:6e92868d1912cb8f5b32fbf507721d16" + ], + "was_informed_by": "gold:Gp0127647", + "input_contig_num": 97351, + "id": "nmdc:36fa8157feb62e7d176b99031e5e41a9", + "execution_resource": "NERSC-Cori", + "name": "MAGs Analysis Activity for nmdc:mga0g0e588", + "mags_list": [ + { + "number_of_contig": 20, + "completeness": 1.36, + "bin_name": "bins.1", + "gene_count": 310, + "bin_quality": "LQ", + "gtdbtk_species": "", + "gtdbtk_order": "", + "num_16s": 2, + "gtdbtk_family": "", + "gtdbtk_domain": "", + "contamination": 0.0, + "gtdbtk_class": "", + "gtdbtk_phylum": "", + "num_5s": 0, + "num_23s": 2, + "gtdbtk_genus": "", + "num_t_rna": 14 + } + ], + "unbinned_contig_num": 2040, + "started_at_time": "2021-10-11T02:24:27Z", + "type": "nmdc:MAGsAnalysisActivity", + "ended_at_time": "2021-10-11T03:38:33+00:00", + "output_data_objects": [ + { + "name": "gold:Gp0452679_metabat2 bin checkm quality assessment result", + "description": "metabat2 bin checkm quality assessment result for gold:Gp0452679", + "data_object_type": "CheckM Statistics", + "url": "https://data.microbiomedata.org/data/nmdc:mga0fsjy84/MAGs/nmdc_mga0fsjy84_checkm_qa.out", + "md5_checksum": "d41d8cd98f00b204e9800998ecf8427e", + "id": "nmdc:d41d8cd98f00b204e9800998ecf8427e", + "file_size_bytes": 0 + }, + { + "name": "Gp0127647_tooShort (< 3kb) filtered contigs fasta file by metaBat2", + "description": "tooShort (< 3kb) filtered contigs fasta file by metaBat2 for Gp0127647", + "url": "https://data.microbiomedata.org/data/nmdc:mga0g0e588/MAGs/nmdc_mga0g0e588_bins.tooShort.fa", + "md5_checksum": "8ec4227eca7ea06fed4e866c4de4a5c9", + "id": "nmdc:8ec4227eca7ea06fed4e866c4de4a5c9", + "file_size_bytes": 38197270 + }, + { + "name": "Gp0127647_unbinned fasta file from metabat2", + "description": "unbinned fasta file from metabat2 for Gp0127647", + "url": "https://data.microbiomedata.org/data/nmdc:mga0g0e588/MAGs/nmdc_mga0g0e588_bins.unbinned.fa", + "md5_checksum": "40c0cbc75e2b698572b8b94d91fdc236", + "id": "nmdc:40c0cbc75e2b698572b8b94d91fdc236", + "file_size_bytes": 3202231 + }, + { + "name": "Gp0127647_metabat2 bin checkm quality assessment result", + "description": "metabat2 bin checkm quality assessment result for Gp0127647", + "data_object_type": "CheckM Statistics", + "url": "https://data.microbiomedata.org/data/nmdc:mga0g0e588/MAGs/nmdc_mga0g0e588_checkm_qa.out", + "md5_checksum": "e8bdcd7b113a14b29a3026b73cd18c20", + "id": "nmdc:e8bdcd7b113a14b29a3026b73cd18c20", + "file_size_bytes": 775 + }, + { + "name": "Gp0127647_high-quality and medium-quality bins", + "description": "high-quality and medium-quality bins for Gp0127647", + "data_object_type": "Metagenome Bins", + "url": "https://data.microbiomedata.org/data/nmdc:mga0g0e588/MAGs/nmdc_mga0g0e588_hqmq_bin.zip", + "md5_checksum": "03b448db547a556e988a0d4948dab424", + "id": "nmdc:03b448db547a556e988a0d4948dab424", + "file_size_bytes": 182 + }, + { + "name": "Gp0127647_metabat2 bins", + "description": "metabat2 bins for Gp0127647", + "url": "https://data.microbiomedata.org/data/nmdc:mga0g0e588/MAGs/nmdc_mga0g0e588_metabat_bin.zip", + "md5_checksum": "6e92868d1912cb8f5b32fbf507721d16", + "id": "nmdc:6e92868d1912cb8f5b32fbf507721d16", + "file_size_bytes": 91931 + } + ] + } + ] + }, + { + "_id": { + "$oid": "649b009773e8249959349b5d" + }, + "id": "nmdc:omprc-11-vykcbs96", + "name": "Riverbed sediment microbial communities from areas with no vegetation in Columbia River, Washington, USA - GW-RW N1_40_50", + "description": "Riverbed sediment samples were collected from an area with no vegetation in a hyporheic zone (subsurface groundwater - surface water mixing zone)", + "has_input": [ + "nmdc:bsm-11-ffqcqd73" + ], + "has_output": [ + "jgi:574fde6e7ded5e3df1ee140d" + ], + "part_of": [ + "nmdc:sty-11-aygzgv51" + ], + "add_date": "2016-01-11", + "mod_date": "2021-06-15", + "ncbi_project_name": "Riverbed sediment microbial communities from areas with no vegetation in Columbia River, Washington, USA - GW-RW N1_40_50", + "omics_type": { + "has_raw_value": "Metagenome" + }, + "principal_investigator": { + "has_raw_value": "James Stegen" + }, + "processing_institution": "JGI", + "type": "nmdc:OmicsProcessing", + "gold_sequencing_project_identifiers": [ + "gold:Gp0127645" + ], + "downstream_workflow_activity_records": [ + { + "_id": { + "$oid": "649b009d6bdd4fd20273c86d" + }, + "has_input": [ + "nmdc:5e7fc22a1527c7ff74e245bbb352fa91" + ], + "part_of": [ + "nmdc:mga0jbfx89" + ], + "git_url": "https://github.com/microbiomedata/metaG/releases/tag/0.1", + "has_output": [ + "nmdc:034df323b47f010f27e7c032d445a891", + "nmdc:ca137bf5e2df6541425f22b5d1fec492" + ], + "was_informed_by": "gold:Gp0127645", + "input_read_count": 24139032, + "output_read_bases": 3475317024, + "id": "nmdc:f18f00c9c36689307a6b4188b7b18e32", + "execution_resource": "NERSC-Cori", + "input_read_bases": 3644993832, + "name": "Read QC Activity for nmdc:mga0jbfx89", + "output_read_count": 23262948, + "started_at_time": "2021-10-11T02:24:42Z", + "type": "nmdc:ReadQCAnalysisActivity", + "ended_at_time": "2021-10-11T04:07:11+00:00", + "output_data_objects": [ + { + "name": "Gp0127645_Filtered Reads", + "description": "Filtered Reads for Gp0127645", + "data_object_type": "Filtered Sequencing Reads", + "url": "https://data.microbiomedata.org/data/nmdc:mga0jbfx89/qa/nmdc_mga0jbfx89_filtered.fastq.gz", + "md5_checksum": "034df323b47f010f27e7c032d445a891", + "id": "nmdc:034df323b47f010f27e7c032d445a891", + "file_size_bytes": 1909192845 + }, + { + "name": "Gp0127645_Filtered Stats", + "description": "Filtered Stats for Gp0127645", + "data_object_type": "QC Statistics", + "url": "https://data.microbiomedata.org/data/nmdc:mga0jbfx89/qa/nmdc_mga0jbfx89_filterStats.txt", + "md5_checksum": "ca137bf5e2df6541425f22b5d1fec492", + "id": "nmdc:ca137bf5e2df6541425f22b5d1fec492", + "file_size_bytes": 283 + } + ] + }, + { + "_id": { + "$oid": "649b009bff710ae353f8cf43" + }, + "has_input": [ + "nmdc:034df323b47f010f27e7c032d445a891" + ], + "git_url": "https://github.com/microbiomedata/metaG/releases/tag/0.1", + "has_output": [ + "nmdc:694374188ba4372344536fa26a2282b8", + "nmdc:e11dfa7178e8c426c7c930b57aa40377", + "nmdc:46e203465faf61780fad8f626e9ab623", + "nmdc:7a6b2ded3f49663d9916eaea3e129dc7", + "nmdc:6f8be89c7aab1c3f392b4f80c7ddf6a5", + "nmdc:4299b438a815becc8beed40fcb803e9f", + "nmdc:4ae4dbd13c7338df5c00555bc6755947", + "nmdc:2be07eb38d408077a55ecb48e123f7f8", + "nmdc:f318581f0df6e04b7ae2384f9237da06" + ], + "was_informed_by": "gold:Gp0127645", + "id": "nmdc:f18f00c9c36689307a6b4188b7b18e32", + "execution_resource": "NERSC-Cori", + "name": "ReadBased Analysis Activity for nmdc:mga0jbfx89", + "started_at_time": "2021-10-11T02:24:42Z", + "type": "nmdc:ReadbasedAnalysis", + "ended_at_time": "2021-10-11T04:07:11+00:00", + "output_data_objects": [ + { + "name": "Gp0127645_Gottcha2 TSV report", + "description": "Gottcha2 TSV report for Gp0127645", + "url": "https://data.microbiomedata.org/data/nmdc:mga0jbfx89/ReadbasedAnalysis/nmdc_mga0jbfx89_gottcha2_report.tsv", + "md5_checksum": "694374188ba4372344536fa26a2282b8", + "id": "nmdc:694374188ba4372344536fa26a2282b8", + "file_size_bytes": 3780 + }, + { + "name": "Gp0127645_Gottcha2 full TSV report", + "description": "Gottcha2 full TSV report for Gp0127645", + "url": "https://data.microbiomedata.org/data/nmdc:mga0jbfx89/ReadbasedAnalysis/nmdc_mga0jbfx89_gottcha2_report_full.tsv", + "md5_checksum": "e11dfa7178e8c426c7c930b57aa40377", + "id": "nmdc:e11dfa7178e8c426c7c930b57aa40377", + "file_size_bytes": 822292 + }, + { + "name": "Gp0127645_Gottcha2 Krona HTML report", + "description": "Gottcha2 Krona HTML report for Gp0127645", + "data_object_type": "GOTTCHA2 Krona Plot", + "url": "https://data.microbiomedata.org/data/nmdc:mga0jbfx89/ReadbasedAnalysis/nmdc_mga0jbfx89_gottcha2_krona.html", + "md5_checksum": "46e203465faf61780fad8f626e9ab623", + "id": "nmdc:46e203465faf61780fad8f626e9ab623", + "file_size_bytes": 236496 + }, + { + "name": "Gp0127645_Centrifuge classification TSV report", + "description": "Centrifuge classification TSV report for Gp0127645", + "data_object_type": "Centrifuge Taxonomic Classification", + "url": "https://data.microbiomedata.org/data/nmdc:mga0jbfx89/ReadbasedAnalysis/nmdc_mga0jbfx89_centrifuge_classification.tsv", + "md5_checksum": "7a6b2ded3f49663d9916eaea3e129dc7", + "id": "nmdc:7a6b2ded3f49663d9916eaea3e129dc7", + "file_size_bytes": 1699052782 + }, + { + "name": "Gp0127645_Centrifuge TSV report", + "description": "Centrifuge TSV report for Gp0127645", + "data_object_type": "Centrifuge Classification Report", + "url": "https://data.microbiomedata.org/data/nmdc:mga0jbfx89/ReadbasedAnalysis/nmdc_mga0jbfx89_centrifuge_report.tsv", + "md5_checksum": "6f8be89c7aab1c3f392b4f80c7ddf6a5", + "id": "nmdc:6f8be89c7aab1c3f392b4f80c7ddf6a5", + "file_size_bytes": 256209 + }, + { + "name": "Gp0127645_Centrifuge Krona HTML report", + "description": "Centrifuge Krona HTML report for Gp0127645", + "data_object_type": "Centrifuge Krona Plot", + "url": "https://data.microbiomedata.org/data/nmdc:mga0jbfx89/ReadbasedAnalysis/nmdc_mga0jbfx89_centrifuge_krona.html", + "md5_checksum": "4299b438a815becc8beed40fcb803e9f", + "id": "nmdc:4299b438a815becc8beed40fcb803e9f", + "file_size_bytes": 2336400 + }, + { + "name": "Gp0127645_Kraken classification TSV report", + "description": "Kraken classification TSV report for Gp0127645", + "data_object_type": "Kraken2 Taxonomic Classification", + "url": "https://data.microbiomedata.org/data/nmdc:mga0jbfx89/ReadbasedAnalysis/nmdc_mga0jbfx89_kraken2_classification.tsv", + "md5_checksum": "4ae4dbd13c7338df5c00555bc6755947", + "id": "nmdc:4ae4dbd13c7338df5c00555bc6755947", + "file_size_bytes": 1359323947 + }, + { + "name": "Gp0127645_Kraken2 TSV report", + "description": "Kraken2 TSV report for Gp0127645", + "data_object_type": "Kraken2 Classification Report", + "url": "https://data.microbiomedata.org/data/nmdc:mga0jbfx89/ReadbasedAnalysis/nmdc_mga0jbfx89_kraken2_report.tsv", + "md5_checksum": "2be07eb38d408077a55ecb48e123f7f8", + "id": "nmdc:2be07eb38d408077a55ecb48e123f7f8", + "file_size_bytes": 651624 + }, + { + "name": "Gp0127645_Kraken2 Krona HTML report", + "description": "Kraken2 Krona HTML report for Gp0127645", + "data_object_type": "Kraken2 Krona Plot", + "url": "https://data.microbiomedata.org/data/nmdc:mga0jbfx89/ReadbasedAnalysis/nmdc_mga0jbfx89_kraken2_krona.html", + "md5_checksum": "f318581f0df6e04b7ae2384f9237da06", + "id": "nmdc:f318581f0df6e04b7ae2384f9237da06", + "file_size_bytes": 3973557 + } + ] + }, + { + "_id": { + "$oid": "61e7197f833bcf838a700ac3" + }, + "has_input": [ + "nmdc:034df323b47f010f27e7c032d445a891" + ], + "part_of": [ + "nmdc:mga0jbfx89" + ], + "git_url": "https://github.com/microbiomedata/metaG/releases/tag/0.1", + "has_output": [ + "nmdc:694374188ba4372344536fa26a2282b8", + "nmdc:e11dfa7178e8c426c7c930b57aa40377", + "nmdc:46e203465faf61780fad8f626e9ab623", + "nmdc:7a6b2ded3f49663d9916eaea3e129dc7", + "nmdc:6f8be89c7aab1c3f392b4f80c7ddf6a5", + "nmdc:4299b438a815becc8beed40fcb803e9f", + "nmdc:4ae4dbd13c7338df5c00555bc6755947", + "nmdc:2be07eb38d408077a55ecb48e123f7f8", + "nmdc:f318581f0df6e04b7ae2384f9237da06" + ], + "was_informed_by": "gold:Gp0127645", + "id": "nmdc:f18f00c9c36689307a6b4188b7b18e32", + "execution_resource": "NERSC-Cori", + "name": "ReadBased Analysis Activity for nmdc:mga0jbfx89", + "started_at_time": "2021-10-11T02:24:42Z", + "type": "nmdc:ReadbasedAnalysis", + "ended_at_time": "2021-10-11T04:07:11+00:00", + "output_data_objects": [ + { + "name": "Gp0127645_Gottcha2 TSV report", + "description": "Gottcha2 TSV report for Gp0127645", + "url": "https://data.microbiomedata.org/data/nmdc:mga0jbfx89/ReadbasedAnalysis/nmdc_mga0jbfx89_gottcha2_report.tsv", + "md5_checksum": "694374188ba4372344536fa26a2282b8", + "id": "nmdc:694374188ba4372344536fa26a2282b8", + "file_size_bytes": 3780 + }, + { + "name": "Gp0127645_Gottcha2 full TSV report", + "description": "Gottcha2 full TSV report for Gp0127645", + "url": "https://data.microbiomedata.org/data/nmdc:mga0jbfx89/ReadbasedAnalysis/nmdc_mga0jbfx89_gottcha2_report_full.tsv", + "md5_checksum": "e11dfa7178e8c426c7c930b57aa40377", + "id": "nmdc:e11dfa7178e8c426c7c930b57aa40377", + "file_size_bytes": 822292 + }, + { + "name": "Gp0127645_Gottcha2 Krona HTML report", + "description": "Gottcha2 Krona HTML report for Gp0127645", + "data_object_type": "GOTTCHA2 Krona Plot", + "url": "https://data.microbiomedata.org/data/nmdc:mga0jbfx89/ReadbasedAnalysis/nmdc_mga0jbfx89_gottcha2_krona.html", + "md5_checksum": "46e203465faf61780fad8f626e9ab623", + "id": "nmdc:46e203465faf61780fad8f626e9ab623", + "file_size_bytes": 236496 + }, + { + "name": "Gp0127645_Centrifuge classification TSV report", + "description": "Centrifuge classification TSV report for Gp0127645", + "data_object_type": "Centrifuge Taxonomic Classification", + "url": "https://data.microbiomedata.org/data/nmdc:mga0jbfx89/ReadbasedAnalysis/nmdc_mga0jbfx89_centrifuge_classification.tsv", + "md5_checksum": "7a6b2ded3f49663d9916eaea3e129dc7", + "id": "nmdc:7a6b2ded3f49663d9916eaea3e129dc7", + "file_size_bytes": 1699052782 + }, + { + "name": "Gp0127645_Centrifuge TSV report", + "description": "Centrifuge TSV report for Gp0127645", + "data_object_type": "Centrifuge Classification Report", + "url": "https://data.microbiomedata.org/data/nmdc:mga0jbfx89/ReadbasedAnalysis/nmdc_mga0jbfx89_centrifuge_report.tsv", + "md5_checksum": "6f8be89c7aab1c3f392b4f80c7ddf6a5", + "id": "nmdc:6f8be89c7aab1c3f392b4f80c7ddf6a5", + "file_size_bytes": 256209 + }, + { + "name": "Gp0127645_Centrifuge Krona HTML report", + "description": "Centrifuge Krona HTML report for Gp0127645", + "data_object_type": "Centrifuge Krona Plot", + "url": "https://data.microbiomedata.org/data/nmdc:mga0jbfx89/ReadbasedAnalysis/nmdc_mga0jbfx89_centrifuge_krona.html", + "md5_checksum": "4299b438a815becc8beed40fcb803e9f", + "id": "nmdc:4299b438a815becc8beed40fcb803e9f", + "file_size_bytes": 2336400 + }, + { + "name": "Gp0127645_Kraken classification TSV report", + "description": "Kraken classification TSV report for Gp0127645", + "data_object_type": "Kraken2 Taxonomic Classification", + "url": "https://data.microbiomedata.org/data/nmdc:mga0jbfx89/ReadbasedAnalysis/nmdc_mga0jbfx89_kraken2_classification.tsv", + "md5_checksum": "4ae4dbd13c7338df5c00555bc6755947", + "id": "nmdc:4ae4dbd13c7338df5c00555bc6755947", + "file_size_bytes": 1359323947 + }, + { + "name": "Gp0127645_Kraken2 TSV report", + "description": "Kraken2 TSV report for Gp0127645", + "data_object_type": "Kraken2 Classification Report", + "url": "https://data.microbiomedata.org/data/nmdc:mga0jbfx89/ReadbasedAnalysis/nmdc_mga0jbfx89_kraken2_report.tsv", + "md5_checksum": "2be07eb38d408077a55ecb48e123f7f8", + "id": "nmdc:2be07eb38d408077a55ecb48e123f7f8", + "file_size_bytes": 651624 + }, + { + "name": "Gp0127645_Kraken2 Krona HTML report", + "description": "Kraken2 Krona HTML report for Gp0127645", + "data_object_type": "Kraken2 Krona Plot", + "url": "https://data.microbiomedata.org/data/nmdc:mga0jbfx89/ReadbasedAnalysis/nmdc_mga0jbfx89_kraken2_krona.html", + "md5_checksum": "f318581f0df6e04b7ae2384f9237da06", + "id": "nmdc:f318581f0df6e04b7ae2384f9237da06", + "file_size_bytes": 3973557 + } + ] + }, + { + "_id": { + "$oid": "649b005f2ca5ee4adb139fa4" + }, + "has_input": [ + "nmdc:034df323b47f010f27e7c032d445a891" + ], + "part_of": [ + "nmdc:mga0jbfx89" + ], + "ctg_logsum": 65663, + "scaf_logsum": 65979, + "gap_pct": 0.00097, + "git_url": "https://github.com/microbiomedata/metaG/releases/tag/0.1", + "has_output": [ + "nmdc:3685fdcfffdf34d2802c692dc0515e33", + "nmdc:7891adab80c63d98169e3cb7b4331f1e", + "nmdc:d883460ae5f8cbabc3d437e745935040", + "nmdc:f36166196caa529e09f3b93e17db3acc", + "nmdc:08a13111a5314ec4c8dbaa59790dc2f1" + ], + "asm_score": 2.823, + "was_informed_by": "gold:Gp0127645", + "ctg_powsum": 6960.932, + "scaf_max": 6924, + "id": "nmdc:f18f00c9c36689307a6b4188b7b18e32", + "scaf_powsum": 6995.401, + "execution_resource": "NERSC-Cori", + "contigs": 106865, + "name": "Assembly Activity for nmdc:mga0jbfx89", + "ctg_max": 6924, + "gc_std": 0.12472, + "contig_bp": 45473855, + "gc_avg": 0.58373, + "started_at_time": "2021-10-11T02:24:42Z", + "scaf_bp": 45474295, + "type": "nmdc:MetagenomeAssembly", + "scaffolds": 106821, + "ended_at_time": "2021-10-11T04:07:11+00:00", + "ctg_l50": 395, + "ctg_l90": 284, + "ctg_n50": 33845, + "ctg_n90": 92046, + "scaf_l50": 395, + "scaf_l90": 284, + "scaf_n50": 33825, + "scaf_n90": 92004, + "output_data_objects": [ + { + "name": "Gp0127645_Assembled contigs fasta", + "description": "Assembled contigs fasta for Gp0127645", + "data_object_type": "Assembly Contigs", + "url": "https://data.microbiomedata.org/data/nmdc:mga0jbfx89/assembly/nmdc_mga0jbfx89_contigs.fna", + "md5_checksum": "3685fdcfffdf34d2802c692dc0515e33", + "id": "nmdc:3685fdcfffdf34d2802c692dc0515e33", + "file_size_bytes": 49479236 + }, + { + "name": "Gp0127645_Assembled scaffold fasta", + "description": "Assembled scaffold fasta for Gp0127645", + "data_object_type": "Assembly Scaffolds", + "url": "https://data.microbiomedata.org/data/nmdc:mga0jbfx89/assembly/nmdc_mga0jbfx89_scaffolds.fna", + "md5_checksum": "7891adab80c63d98169e3cb7b4331f1e", + "id": "nmdc:7891adab80c63d98169e3cb7b4331f1e", + "file_size_bytes": 49157929 + }, + { + "name": "Gp0127645_Metagenome Contig Coverage Stats", + "description": "Metagenome Contig Coverage Stats for Gp0127645", + "url": "https://data.microbiomedata.org/data/nmdc:mga0jbfx89/assembly/nmdc_mga0jbfx89_covstats.txt", + "md5_checksum": "d883460ae5f8cbabc3d437e745935040", + "id": "nmdc:d883460ae5f8cbabc3d437e745935040", + "file_size_bytes": 8394481 + }, + { + "name": "Gp0127645_Assembled AGP file", + "description": "Assembled AGP file for Gp0127645", + "data_object_type": "Assembly AGP", + "url": "https://data.microbiomedata.org/data/nmdc:mga0jbfx89/assembly/nmdc_mga0jbfx89_assembly.agp", + "md5_checksum": "f36166196caa529e09f3b93e17db3acc", + "id": "nmdc:f36166196caa529e09f3b93e17db3acc", + "file_size_bytes": 7804199 + }, + { + "name": "Gp0127645_Metagenome Alignment BAM file", + "description": "Metagenome Alignment BAM file for Gp0127645", + "data_object_type": "Assembly Coverage BAM", + "url": "https://data.microbiomedata.org/data/nmdc:mga0jbfx89/assembly/nmdc_mga0jbfx89_pairedMapped_sorted.bam", + "md5_checksum": "08a13111a5314ec4c8dbaa59790dc2f1", + "id": "nmdc:08a13111a5314ec4c8dbaa59790dc2f1", + "file_size_bytes": 2047004915 + } + ] + }, + { + "_id": { + "$oid": "649b005bbf2caae0415ef9b6" + }, + "has_input": [ + "nmdc:3685fdcfffdf34d2802c692dc0515e33" + ], + "part_of": [ + "nmdc:mga0jbfx89" + ], + "git_url": "https://github.com/microbiomedata/metaG/releases/tag/0.1", + "has_output": [ + "nmdc:b14fecfaa99eaad42128e409aa7ae3ec", + "nmdc:851584f7bcec80cddec4b113fe6cfcea", + "nmdc:d0280881c70c54946d9b5170e62b904b", + "nmdc:7c1894478af7b8205bb4760acb93c353", + "nmdc:ac413560dfdbcea1f0697391b593c552", + "nmdc:80f846ff418e4758f4c6b9a96ba2b8ca", + "nmdc:bbfcd35137b7cb018945a531704805eb", + "nmdc:c1c10952c472a97fb7de8bc7dbce564b", + "nmdc:b86dba5a29f4ca25cec7c0590e0b4771", + "nmdc:a701026580285ca67816cb9a2f272ca6", + "nmdc:5ce71fa6aebdb4fb9f843e89ab53ca9b", + "nmdc:47c0e39e60bd4d688a29ede2af2cee35" + ], + "was_informed_by": "gold:Gp0127645", + "id": "nmdc:f18f00c9c36689307a6b4188b7b18e32", + "execution_resource": "NERSC-Cori", + "name": "Annotation Activity for nmdc:mga0jbfx89", + "started_at_time": "2021-10-11T02:24:42Z", + "type": "nmdc:MetagenomeAnnotationActivity", + "ended_at_time": "2021-10-11T04:07:11+00:00", + "output_data_objects": [ + { + "name": "Gp0127645_Protein FAA", + "description": "Protein FAA for Gp0127645", + "data_object_type": "Annotation Amino Acid FASTA", + "url": "https://data.microbiomedata.org/data/nmdc:mga0jbfx89/annotation/nmdc_mga0jbfx89_proteins.faa", + "md5_checksum": "b14fecfaa99eaad42128e409aa7ae3ec", + "id": "nmdc:b14fecfaa99eaad42128e409aa7ae3ec", + "file_size_bytes": 29015561 + }, + { + "name": "Gp0127645_Structural annotation GFF file", + "description": "Structural annotation GFF file for Gp0127645", + "data_object_type": "Structural Annotation GFF", + "url": "https://data.microbiomedata.org/data/nmdc:mga0jbfx89/annotation/nmdc_mga0jbfx89_structural_annotation.gff", + "md5_checksum": "851584f7bcec80cddec4b113fe6cfcea", + "id": "nmdc:851584f7bcec80cddec4b113fe6cfcea", + "file_size_bytes": 2506 + }, + { + "name": "Gp0127645_Functional annotation GFF file", + "description": "Functional annotation GFF file for Gp0127645", + "data_object_type": "Functional Annotation GFF", + "url": "https://data.microbiomedata.org/data/nmdc:mga0jbfx89/annotation/nmdc_mga0jbfx89_functional_annotation.gff", + "md5_checksum": "d0280881c70c54946d9b5170e62b904b", + "id": "nmdc:d0280881c70c54946d9b5170e62b904b", + "file_size_bytes": 34124039 + }, + { + "name": "Gp0127645_KO TSV file", + "description": "KO TSV file for Gp0127645", + "data_object_type": "Annotation KEGG Orthology", + "url": "https://data.microbiomedata.org/data/nmdc:mga0jbfx89/annotation/nmdc_mga0jbfx89_ko.tsv", + "md5_checksum": "7c1894478af7b8205bb4760acb93c353", + "id": "nmdc:7c1894478af7b8205bb4760acb93c353", + "file_size_bytes": 3942110 + }, + { + "name": "Gp0127645_EC TSV file", + "description": "EC TSV file for Gp0127645", + "data_object_type": "Annotation Enzyme Commission", + "url": "https://data.microbiomedata.org/data/nmdc:mga0jbfx89/annotation/nmdc_mga0jbfx89_ec.tsv", + "md5_checksum": "ac413560dfdbcea1f0697391b593c552", + "id": "nmdc:ac413560dfdbcea1f0697391b593c552", + "file_size_bytes": 2691460 + }, + { + "name": "Gp0127645_COG GFF file", + "description": "COG GFF file for Gp0127645", + "url": "https://data.microbiomedata.org/data/nmdc:mga0jbfx89/annotation/nmdc_mga0jbfx89_cog.gff", + "md5_checksum": "80f846ff418e4758f4c6b9a96ba2b8ca", + "id": "nmdc:80f846ff418e4758f4c6b9a96ba2b8ca", + "file_size_bytes": 19597211 + }, + { + "name": "Gp0127645_PFAM GFF file", + "description": "PFAM GFF file for Gp0127645", + "url": "https://data.microbiomedata.org/data/nmdc:mga0jbfx89/annotation/nmdc_mga0jbfx89_pfam.gff", + "md5_checksum": "bbfcd35137b7cb018945a531704805eb", + "id": "nmdc:bbfcd35137b7cb018945a531704805eb", + "file_size_bytes": 14110039 + }, + { + "name": "Gp0127645_TigrFam GFF file", + "description": "TigrFam GFF file for Gp0127645", + "url": "https://data.microbiomedata.org/data/nmdc:mga0jbfx89/annotation/nmdc_mga0jbfx89_tigrfam.gff", + "md5_checksum": "c1c10952c472a97fb7de8bc7dbce564b", + "id": "nmdc:c1c10952c472a97fb7de8bc7dbce564b", + "file_size_bytes": 1502814 + }, + { + "name": "Gp0127645_SMART GFF file", + "description": "SMART GFF file for Gp0127645", + "url": "https://data.microbiomedata.org/data/nmdc:mga0jbfx89/annotation/nmdc_mga0jbfx89_smart.gff", + "md5_checksum": "b86dba5a29f4ca25cec7c0590e0b4771", + "id": "nmdc:b86dba5a29f4ca25cec7c0590e0b4771", + "file_size_bytes": 4354176 + }, + { + "name": "Gp0127645_SuperFam GFF file", + "description": "SuperFam GFF file for Gp0127645", + "url": "https://data.microbiomedata.org/data/nmdc:mga0jbfx89/annotation/nmdc_mga0jbfx89_supfam.gff", + "md5_checksum": "a701026580285ca67816cb9a2f272ca6", + "id": "nmdc:a701026580285ca67816cb9a2f272ca6", + "file_size_bytes": 24911282 + }, + { + "name": "Gp0127645_Cath FunFam GFF file", + "description": "Cath FunFam GFF file for Gp0127645", + "url": "https://data.microbiomedata.org/data/nmdc:mga0jbfx89/annotation/nmdc_mga0jbfx89_cath_funfam.gff", + "md5_checksum": "5ce71fa6aebdb4fb9f843e89ab53ca9b", + "id": "nmdc:5ce71fa6aebdb4fb9f843e89ab53ca9b", + "file_size_bytes": 18832113 + }, + { + "name": "Gp0127645_KO_EC GFF file", + "description": "KO_EC GFF file for Gp0127645", + "url": "https://data.microbiomedata.org/data/nmdc:mga0jbfx89/annotation/nmdc_mga0jbfx89_ko_ec.gff", + "md5_checksum": "47c0e39e60bd4d688a29ede2af2cee35", + "id": "nmdc:47c0e39e60bd4d688a29ede2af2cee35", + "file_size_bytes": 12581509 + } + ] + }, + { + "_id": { + "$oid": "649b0052ec087f6bbab34717" + }, + "has_input": [ + "nmdc:3685fdcfffdf34d2802c692dc0515e33", + "nmdc:08a13111a5314ec4c8dbaa59790dc2f1", + "nmdc:d0280881c70c54946d9b5170e62b904b" + ], + "too_short_contig_num": 102729, + "part_of": [ + "nmdc:mga0jbfx89" + ], + "binned_contig_num": 61, + "git_url": "https://github.com/microbiomedata/metaG/releases/tag/0.1", + "has_output": [ + "nmdc:d41d8cd98f00b204e9800998ecf8427e", + "nmdc:47d1233f5afdd7b00790ac2ca8be778a", + "nmdc:637bc2394dcb4869149370683ccc9e61", + "nmdc:a8e49a136701e388199a72f02bb6d288", + "nmdc:b0d2597d04809508e9dd0bcb48c7edad", + "nmdc:106983a66b58a2d07f0592d9379ad635" + ], + "was_informed_by": "gold:Gp0127645", + "input_contig_num": 106865, + "id": "nmdc:f18f00c9c36689307a6b4188b7b18e32", + "execution_resource": "NERSC-Cori", + "name": "MAGs Analysis Activity for nmdc:mga0jbfx89", + "mags_list": [ + { + "number_of_contig": 61, + "completeness": 18.77, + "bin_name": "bins.1", + "gene_count": 307, + "bin_quality": "LQ", + "gtdbtk_species": "", + "gtdbtk_order": "", + "num_16s": 0, + "gtdbtk_family": "", + "gtdbtk_domain": "", + "contamination": 0.0, + "gtdbtk_class": "", + "gtdbtk_phylum": "", + "num_5s": 0, + "num_23s": 0, + "gtdbtk_genus": "", + "num_t_rna": 7 + } + ], + "unbinned_contig_num": 4075, + "started_at_time": "2021-10-11T02:24:42Z", + "type": "nmdc:MAGsAnalysisActivity", + "ended_at_time": "2021-10-11T04:07:11+00:00", + "output_data_objects": [ + { + "name": "gold:Gp0452679_metabat2 bin checkm quality assessment result", + "description": "metabat2 bin checkm quality assessment result for gold:Gp0452679", + "data_object_type": "CheckM Statistics", + "url": "https://data.microbiomedata.org/data/nmdc:mga0fsjy84/MAGs/nmdc_mga0fsjy84_checkm_qa.out", + "md5_checksum": "d41d8cd98f00b204e9800998ecf8427e", + "id": "nmdc:d41d8cd98f00b204e9800998ecf8427e", + "file_size_bytes": 0 + }, + { + "name": "Gp0127645_tooShort (< 3kb) filtered contigs fasta file by metaBat2", + "description": "tooShort (< 3kb) filtered contigs fasta file by metaBat2 for Gp0127645", + "url": "https://data.microbiomedata.org/data/nmdc:mga0jbfx89/MAGs/nmdc_mga0jbfx89_bins.tooShort.fa", + "md5_checksum": "47d1233f5afdd7b00790ac2ca8be778a", + "id": "nmdc:47d1233f5afdd7b00790ac2ca8be778a", + "file_size_bytes": 43078346 + }, + { + "name": "Gp0127645_unbinned fasta file from metabat2", + "description": "unbinned fasta file from metabat2 for Gp0127645", + "url": "https://data.microbiomedata.org/data/nmdc:mga0jbfx89/MAGs/nmdc_mga0jbfx89_bins.unbinned.fa", + "md5_checksum": "637bc2394dcb4869149370683ccc9e61", + "id": "nmdc:637bc2394dcb4869149370683ccc9e61", + "file_size_bytes": 6153132 + }, + { + "name": "Gp0127645_metabat2 bin checkm quality assessment result", + "description": "metabat2 bin checkm quality assessment result for Gp0127645", + "data_object_type": "CheckM Statistics", + "url": "https://data.microbiomedata.org/data/nmdc:mga0jbfx89/MAGs/nmdc_mga0jbfx89_checkm_qa.out", + "md5_checksum": "a8e49a136701e388199a72f02bb6d288", + "id": "nmdc:a8e49a136701e388199a72f02bb6d288", + "file_size_bytes": 765 + }, + { + "name": "Gp0127645_high-quality and medium-quality bins", + "description": "high-quality and medium-quality bins for Gp0127645", + "data_object_type": "Metagenome Bins", + "url": "https://data.microbiomedata.org/data/nmdc:mga0jbfx89/MAGs/nmdc_mga0jbfx89_hqmq_bin.zip", + "md5_checksum": "b0d2597d04809508e9dd0bcb48c7edad", + "id": "nmdc:b0d2597d04809508e9dd0bcb48c7edad", + "file_size_bytes": 182 + }, + { + "name": "Gp0127645_metabat2 bins", + "description": "metabat2 bins for Gp0127645", + "url": "https://data.microbiomedata.org/data/nmdc:mga0jbfx89/MAGs/nmdc_mga0jbfx89_metabat_bin.zip", + "md5_checksum": "106983a66b58a2d07f0592d9379ad635", + "id": "nmdc:106983a66b58a2d07f0592d9379ad635", + "file_size_bytes": 76018 + } + ] + } + ] + }, + { + "_id": { + "$oid": "649b009773e8249959349b5e" + }, + "id": "nmdc:omprc-11-dw7shd52", + "name": "Riverbed sediment microbial communities from areas with vegetation nearby in Columbia River, Washington, USA - GW-RW S2_50_60", + "description": "Riverbed sediment samples were collected from an area with a lot of surrounding vegetation in a hyporheic zone (subsurface groundwater - surface water mixing zone)", + "has_input": [ + "nmdc:bsm-11-7fedgs13" + ], + "has_output": [ + "jgi:574fde8c7ded5e3df1ee1424" + ], + "part_of": [ + "nmdc:sty-11-aygzgv51" + ], + "add_date": "2016-01-11", + "mod_date": "2021-06-15", + "ncbi_project_name": "Riverbed sediment microbial communities from areas with vegetation nearby in Columbia River, Washington, USA - GW-RW S2_50_60", + "omics_type": { + "has_raw_value": "Metagenome" + }, + "principal_investigator": { + "has_raw_value": "James Stegen" + }, + "processing_institution": "JGI", + "type": "nmdc:OmicsProcessing", + "gold_sequencing_project_identifiers": [ + "gold:Gp0127649" + ], + "downstream_workflow_activity_records": [ + { + "_id": { + "$oid": "649b009d6bdd4fd20273c885" + }, + "has_input": [ + "nmdc:5895de3040f750a5ce1b5238158fd51c" + ], + "part_of": [ + "nmdc:mga0j4fe07" + ], + "git_url": "https://github.com/microbiomedata/metaG/releases/tag/0.1", + "has_output": [ + "nmdc:ed0ea2f2ef6b667c5f8e60cd7d197cf5", + "nmdc:25a7ff469ffae5906d6ade4d74cab88f" + ], + "was_informed_by": "gold:Gp0127649", + "input_read_count": 24889788, + "output_read_bases": 3558782964, + "id": "nmdc:74b40db9b9eef8519a02f49fe6034be5", + "execution_resource": "NERSC-Cori", + "input_read_bases": 3758357988, + "name": "Read QC Activity for nmdc:mga0j4fe07", + "output_read_count": 23803802, + "started_at_time": "2021-10-11T02:23:29Z", + "type": "nmdc:ReadQCAnalysisActivity", + "ended_at_time": "2021-10-11T03:38:32+00:00", + "output_data_objects": [ + { + "name": "Gp0127649_Filtered Reads", + "description": "Filtered Reads for Gp0127649", + "data_object_type": "Filtered Sequencing Reads", + "url": "https://data.microbiomedata.org/data/nmdc:mga0j4fe07/qa/nmdc_mga0j4fe07_filtered.fastq.gz", + "md5_checksum": "ed0ea2f2ef6b667c5f8e60cd7d197cf5", + "id": "nmdc:ed0ea2f2ef6b667c5f8e60cd7d197cf5", + "file_size_bytes": 1967546513 + }, + { + "name": "Gp0127649_Filtered Stats", + "description": "Filtered Stats for Gp0127649", + "data_object_type": "QC Statistics", + "url": "https://data.microbiomedata.org/data/nmdc:mga0j4fe07/qa/nmdc_mga0j4fe07_filterStats.txt", + "md5_checksum": "25a7ff469ffae5906d6ade4d74cab88f", + "id": "nmdc:25a7ff469ffae5906d6ade4d74cab88f", + "file_size_bytes": 283 + } + ] + }, + { + "_id": { + "$oid": "649b009bff710ae353f8cf48" + }, + "has_input": [ + "nmdc:ed0ea2f2ef6b667c5f8e60cd7d197cf5" + ], + "git_url": "https://github.com/microbiomedata/metaG/releases/tag/0.1", + "has_output": [ + "nmdc:c30cb5928ad608e7c8fe1ce77d81933a", + "nmdc:4aa159b1ee973c6e3e309ef60d351018", + "nmdc:8c1683fa4041bd10711aa3beb4735811", + "nmdc:b8be7144441cbd6fbe4a8193f9e055ab", + "nmdc:d4f57641e41f0249f3fde7b973289cf5", + "nmdc:4e9ec619c5611cb0166ea127496fadeb", + "nmdc:ed2b2495ca211e17298ca2e212fe3811", + "nmdc:05d35fc4e391296ff0e716c3fcbbee89", + "nmdc:0d07551972f3230ec2ef4a0e04929b97" + ], + "was_informed_by": "gold:Gp0127649", + "id": "nmdc:74b40db9b9eef8519a02f49fe6034be5", + "execution_resource": "NERSC-Cori", + "name": "ReadBased Analysis Activity for nmdc:mga0j4fe07", + "started_at_time": "2021-10-11T02:23:29Z", + "type": "nmdc:ReadbasedAnalysis", + "ended_at_time": "2021-10-11T03:38:32+00:00", + "output_data_objects": [ + { + "name": "Gp0127649_Gottcha2 TSV report", + "description": "Gottcha2 TSV report for Gp0127649", + "url": "https://data.microbiomedata.org/data/nmdc:mga0j4fe07/ReadbasedAnalysis/nmdc_mga0j4fe07_gottcha2_report.tsv", + "md5_checksum": "c30cb5928ad608e7c8fe1ce77d81933a", + "id": "nmdc:c30cb5928ad608e7c8fe1ce77d81933a", + "file_size_bytes": 2079 + }, + { + "name": "Gp0127649_Gottcha2 full TSV report", + "description": "Gottcha2 full TSV report for Gp0127649", + "url": "https://data.microbiomedata.org/data/nmdc:mga0j4fe07/ReadbasedAnalysis/nmdc_mga0j4fe07_gottcha2_report_full.tsv", + "md5_checksum": "4aa159b1ee973c6e3e309ef60d351018", + "id": "nmdc:4aa159b1ee973c6e3e309ef60d351018", + "file_size_bytes": 642861 + }, + { + "name": "Gp0127649_Gottcha2 Krona HTML report", + "description": "Gottcha2 Krona HTML report for Gp0127649", + "data_object_type": "GOTTCHA2 Krona Plot", + "url": "https://data.microbiomedata.org/data/nmdc:mga0j4fe07/ReadbasedAnalysis/nmdc_mga0j4fe07_gottcha2_krona.html", + "md5_checksum": "8c1683fa4041bd10711aa3beb4735811", + "id": "nmdc:8c1683fa4041bd10711aa3beb4735811", + "file_size_bytes": 230792 + }, + { + "name": "Gp0127649_Centrifuge classification TSV report", + "description": "Centrifuge classification TSV report for Gp0127649", + "data_object_type": "Centrifuge Taxonomic Classification", + "url": "https://data.microbiomedata.org/data/nmdc:mga0j4fe07/ReadbasedAnalysis/nmdc_mga0j4fe07_centrifuge_classification.tsv", + "md5_checksum": "b8be7144441cbd6fbe4a8193f9e055ab", + "id": "nmdc:b8be7144441cbd6fbe4a8193f9e055ab", + "file_size_bytes": 1743695420 + }, + { + "name": "Gp0127649_Centrifuge TSV report", + "description": "Centrifuge TSV report for Gp0127649", + "data_object_type": "Centrifuge Classification Report", + "url": "https://data.microbiomedata.org/data/nmdc:mga0j4fe07/ReadbasedAnalysis/nmdc_mga0j4fe07_centrifuge_report.tsv", + "md5_checksum": "d4f57641e41f0249f3fde7b973289cf5", + "id": "nmdc:d4f57641e41f0249f3fde7b973289cf5", + "file_size_bytes": 254036 + }, + { + "name": "Gp0127649_Centrifuge Krona HTML report", + "description": "Centrifuge Krona HTML report for Gp0127649", + "data_object_type": "Centrifuge Krona Plot", + "url": "https://data.microbiomedata.org/data/nmdc:mga0j4fe07/ReadbasedAnalysis/nmdc_mga0j4fe07_centrifuge_krona.html", + "md5_checksum": "4e9ec619c5611cb0166ea127496fadeb", + "id": "nmdc:4e9ec619c5611cb0166ea127496fadeb", + "file_size_bytes": 2332943 + }, + { + "name": "Gp0127649_Kraken classification TSV report", + "description": "Kraken classification TSV report for Gp0127649", + "data_object_type": "Kraken2 Taxonomic Classification", + "url": "https://data.microbiomedata.org/data/nmdc:mga0j4fe07/ReadbasedAnalysis/nmdc_mga0j4fe07_kraken2_classification.tsv", + "md5_checksum": "ed2b2495ca211e17298ca2e212fe3811", + "id": "nmdc:ed2b2495ca211e17298ca2e212fe3811", + "file_size_bytes": 1387669799 + }, + { + "name": "Gp0127649_Kraken2 TSV report", + "description": "Kraken2 TSV report for Gp0127649", + "data_object_type": "Kraken2 Classification Report", + "url": "https://data.microbiomedata.org/data/nmdc:mga0j4fe07/ReadbasedAnalysis/nmdc_mga0j4fe07_kraken2_report.tsv", + "md5_checksum": "05d35fc4e391296ff0e716c3fcbbee89", + "id": "nmdc:05d35fc4e391296ff0e716c3fcbbee89", + "file_size_bytes": 637131 + }, + { + "name": "Gp0127649_Kraken2 Krona HTML report", + "description": "Kraken2 Krona HTML report for Gp0127649", + "data_object_type": "Kraken2 Krona Plot", + "url": "https://data.microbiomedata.org/data/nmdc:mga0j4fe07/ReadbasedAnalysis/nmdc_mga0j4fe07_kraken2_krona.html", + "md5_checksum": "0d07551972f3230ec2ef4a0e04929b97", + "id": "nmdc:0d07551972f3230ec2ef4a0e04929b97", + "file_size_bytes": 3976407 + } + ] + }, + { + "_id": { + "$oid": "61e719f8833bcf838a7018c7" + }, + "has_input": [ + "nmdc:ed0ea2f2ef6b667c5f8e60cd7d197cf5" + ], + "part_of": [ + "nmdc:mga0j4fe07" + ], + "git_url": "https://github.com/microbiomedata/metaG/releases/tag/0.1", + "has_output": [ + "nmdc:c30cb5928ad608e7c8fe1ce77d81933a", + "nmdc:4aa159b1ee973c6e3e309ef60d351018", + "nmdc:8c1683fa4041bd10711aa3beb4735811", + "nmdc:b8be7144441cbd6fbe4a8193f9e055ab", + "nmdc:d4f57641e41f0249f3fde7b973289cf5", + "nmdc:4e9ec619c5611cb0166ea127496fadeb", + "nmdc:ed2b2495ca211e17298ca2e212fe3811", + "nmdc:05d35fc4e391296ff0e716c3fcbbee89", + "nmdc:0d07551972f3230ec2ef4a0e04929b97" + ], + "was_informed_by": "gold:Gp0127649", + "id": "nmdc:74b40db9b9eef8519a02f49fe6034be5", + "execution_resource": "NERSC-Cori", + "name": "ReadBased Analysis Activity for nmdc:mga0j4fe07", + "started_at_time": "2021-10-11T02:23:29Z", + "type": "nmdc:ReadbasedAnalysis", + "ended_at_time": "2021-10-11T03:38:32+00:00", + "output_data_objects": [ + { + "name": "Gp0127649_Gottcha2 TSV report", + "description": "Gottcha2 TSV report for Gp0127649", + "url": "https://data.microbiomedata.org/data/nmdc:mga0j4fe07/ReadbasedAnalysis/nmdc_mga0j4fe07_gottcha2_report.tsv", + "md5_checksum": "c30cb5928ad608e7c8fe1ce77d81933a", + "id": "nmdc:c30cb5928ad608e7c8fe1ce77d81933a", + "file_size_bytes": 2079 + }, + { + "name": "Gp0127649_Gottcha2 full TSV report", + "description": "Gottcha2 full TSV report for Gp0127649", + "url": "https://data.microbiomedata.org/data/nmdc:mga0j4fe07/ReadbasedAnalysis/nmdc_mga0j4fe07_gottcha2_report_full.tsv", + "md5_checksum": "4aa159b1ee973c6e3e309ef60d351018", + "id": "nmdc:4aa159b1ee973c6e3e309ef60d351018", + "file_size_bytes": 642861 + }, + { + "name": "Gp0127649_Gottcha2 Krona HTML report", + "description": "Gottcha2 Krona HTML report for Gp0127649", + "data_object_type": "GOTTCHA2 Krona Plot", + "url": "https://data.microbiomedata.org/data/nmdc:mga0j4fe07/ReadbasedAnalysis/nmdc_mga0j4fe07_gottcha2_krona.html", + "md5_checksum": "8c1683fa4041bd10711aa3beb4735811", + "id": "nmdc:8c1683fa4041bd10711aa3beb4735811", + "file_size_bytes": 230792 + }, + { + "name": "Gp0127649_Centrifuge classification TSV report", + "description": "Centrifuge classification TSV report for Gp0127649", + "data_object_type": "Centrifuge Taxonomic Classification", + "url": "https://data.microbiomedata.org/data/nmdc:mga0j4fe07/ReadbasedAnalysis/nmdc_mga0j4fe07_centrifuge_classification.tsv", + "md5_checksum": "b8be7144441cbd6fbe4a8193f9e055ab", + "id": "nmdc:b8be7144441cbd6fbe4a8193f9e055ab", + "file_size_bytes": 1743695420 + }, + { + "name": "Gp0127649_Centrifuge TSV report", + "description": "Centrifuge TSV report for Gp0127649", + "data_object_type": "Centrifuge Classification Report", + "url": "https://data.microbiomedata.org/data/nmdc:mga0j4fe07/ReadbasedAnalysis/nmdc_mga0j4fe07_centrifuge_report.tsv", + "md5_checksum": "d4f57641e41f0249f3fde7b973289cf5", + "id": "nmdc:d4f57641e41f0249f3fde7b973289cf5", + "file_size_bytes": 254036 + }, + { + "name": "Gp0127649_Centrifuge Krona HTML report", + "description": "Centrifuge Krona HTML report for Gp0127649", + "data_object_type": "Centrifuge Krona Plot", + "url": "https://data.microbiomedata.org/data/nmdc:mga0j4fe07/ReadbasedAnalysis/nmdc_mga0j4fe07_centrifuge_krona.html", + "md5_checksum": "4e9ec619c5611cb0166ea127496fadeb", + "id": "nmdc:4e9ec619c5611cb0166ea127496fadeb", + "file_size_bytes": 2332943 + }, + { + "name": "Gp0127649_Kraken classification TSV report", + "description": "Kraken classification TSV report for Gp0127649", + "data_object_type": "Kraken2 Taxonomic Classification", + "url": "https://data.microbiomedata.org/data/nmdc:mga0j4fe07/ReadbasedAnalysis/nmdc_mga0j4fe07_kraken2_classification.tsv", + "md5_checksum": "ed2b2495ca211e17298ca2e212fe3811", + "id": "nmdc:ed2b2495ca211e17298ca2e212fe3811", + "file_size_bytes": 1387669799 + }, + { + "name": "Gp0127649_Kraken2 TSV report", + "description": "Kraken2 TSV report for Gp0127649", + "data_object_type": "Kraken2 Classification Report", + "url": "https://data.microbiomedata.org/data/nmdc:mga0j4fe07/ReadbasedAnalysis/nmdc_mga0j4fe07_kraken2_report.tsv", + "md5_checksum": "05d35fc4e391296ff0e716c3fcbbee89", + "id": "nmdc:05d35fc4e391296ff0e716c3fcbbee89", + "file_size_bytes": 637131 + }, + { + "name": "Gp0127649_Kraken2 Krona HTML report", + "description": "Kraken2 Krona HTML report for Gp0127649", + "data_object_type": "Kraken2 Krona Plot", + "url": "https://data.microbiomedata.org/data/nmdc:mga0j4fe07/ReadbasedAnalysis/nmdc_mga0j4fe07_kraken2_krona.html", + "md5_checksum": "0d07551972f3230ec2ef4a0e04929b97", + "id": "nmdc:0d07551972f3230ec2ef4a0e04929b97", + "file_size_bytes": 3976407 + } + ] + }, + { + "_id": { + "$oid": "649b005f2ca5ee4adb139fb8" + }, + "has_input": [ + "nmdc:ed0ea2f2ef6b667c5f8e60cd7d197cf5" + ], + "part_of": [ + "nmdc:mga0j4fe07" + ], + "ctg_logsum": 157844, + "scaf_logsum": 158661, + "gap_pct": 0.00147, + "git_url": "https://github.com/microbiomedata/metaG/releases/tag/0.1", + "has_output": [ + "nmdc:5ada15f24d3de4a96521532a4ced6018", + "nmdc:fc32ae27239661670605b59c395dd770", + "nmdc:d6e996af3275c4cdd3e51376517e2b6b", + "nmdc:f52600933fc5a09f7cead5c065d6b100", + "nmdc:5d9826a5f5164cfe20bfc1343144c96f" + ], + "asm_score": 3.279, + "was_informed_by": "gold:Gp0127649", + "ctg_powsum": 16877, + "scaf_max": 28201, + "id": "nmdc:74b40db9b9eef8519a02f49fe6034be5", + "scaf_powsum": 16967, + "execution_resource": "NERSC-Cori", + "contigs": 190009, + "name": "Assembly Activity for nmdc:mga0j4fe07", + "ctg_max": 28201, + "gc_std": 0.09385, + "contig_bp": 87528185, + "gc_avg": 0.62766, + "started_at_time": "2021-10-11T02:23:29Z", + "scaf_bp": 87529475, + "type": "nmdc:MetagenomeAssembly", + "scaffolds": 189880, + "ended_at_time": "2021-10-11T03:38:32+00:00", + "ctg_l50": 440, + "ctg_l90": 289, + "ctg_n50": 57445, + "ctg_n90": 160942, + "scaf_l50": 440, + "scaf_l90": 289, + "scaf_n50": 57416, + "scaf_n90": 160823, + "output_data_objects": [ + { + "name": "Gp0127649_Assembled contigs fasta", + "description": "Assembled contigs fasta for Gp0127649", + "data_object_type": "Assembly Contigs", + "url": "https://data.microbiomedata.org/data/nmdc:mga0j4fe07/assembly/nmdc_mga0j4fe07_contigs.fna", + "md5_checksum": "5ada15f24d3de4a96521532a4ced6018", + "id": "nmdc:5ada15f24d3de4a96521532a4ced6018", + "file_size_bytes": 94852732 + }, + { + "name": "Gp0127649_Assembled scaffold fasta", + "description": "Assembled scaffold fasta for Gp0127649", + "data_object_type": "Assembly Scaffolds", + "url": "https://data.microbiomedata.org/data/nmdc:mga0j4fe07/assembly/nmdc_mga0j4fe07_scaffolds.fna", + "md5_checksum": "fc32ae27239661670605b59c395dd770", + "id": "nmdc:fc32ae27239661670605b59c395dd770", + "file_size_bytes": 94280572 + }, + { + "name": "Gp0127649_Metagenome Contig Coverage Stats", + "description": "Metagenome Contig Coverage Stats for Gp0127649", + "url": "https://data.microbiomedata.org/data/nmdc:mga0j4fe07/assembly/nmdc_mga0j4fe07_covstats.txt", + "md5_checksum": "d6e996af3275c4cdd3e51376517e2b6b", + "id": "nmdc:d6e996af3275c4cdd3e51376517e2b6b", + "file_size_bytes": 15029734 + }, + { + "name": "Gp0127649_Assembled AGP file", + "description": "Assembled AGP file for Gp0127649", + "data_object_type": "Assembly AGP", + "url": "https://data.microbiomedata.org/data/nmdc:mga0j4fe07/assembly/nmdc_mga0j4fe07_assembly.agp", + "md5_checksum": "f52600933fc5a09f7cead5c065d6b100", + "id": "nmdc:f52600933fc5a09f7cead5c065d6b100", + "file_size_bytes": 14057243 + }, + { + "name": "Gp0127649_Metagenome Alignment BAM file", + "description": "Metagenome Alignment BAM file for Gp0127649", + "data_object_type": "Assembly Coverage BAM", + "url": "https://data.microbiomedata.org/data/nmdc:mga0j4fe07/assembly/nmdc_mga0j4fe07_pairedMapped_sorted.bam", + "md5_checksum": "5d9826a5f5164cfe20bfc1343144c96f", + "id": "nmdc:5d9826a5f5164cfe20bfc1343144c96f", + "file_size_bytes": 2147322298 + } + ] + }, + { + "_id": { + "$oid": "649b005bbf2caae0415ef9ca" + }, + "has_input": [ + "nmdc:5ada15f24d3de4a96521532a4ced6018" + ], + "part_of": [ + "nmdc:mga0j4fe07" + ], + "git_url": "https://github.com/microbiomedata/metaG/releases/tag/0.1", + "has_output": [ + "nmdc:4e5d87bb4bb3198f5b9955622a781376", + "nmdc:40f79a8b021a3de27c464087fad9f092", + "nmdc:aba74592cf7aa507179e9544c008a0ec", + "nmdc:29500fc3a86f2767cc3752ba02fa0a05", + "nmdc:ba8fedc9b57d401ad0cc2b329038c5a9", + "nmdc:66bd5f2b62818742c6df5c39d1952a99", + "nmdc:e60c77fb34f71861ceacf988074949af", + "nmdc:3738ab59fb56002a9f38d95b101957bd", + "nmdc:2f34c5db7846cbf37add471c0dbca951", + "nmdc:fa7f659afca037861ae65e08092f2d83", + "nmdc:9ee627031c0b425974fa1aa4d695d4ae", + "nmdc:2fc423fd55e34d3400c9a6924df67633" + ], + "was_informed_by": "gold:Gp0127649", + "id": "nmdc:74b40db9b9eef8519a02f49fe6034be5", + "execution_resource": "NERSC-Cori", + "name": "Annotation Activity for nmdc:mga0j4fe07", + "started_at_time": "2021-10-11T02:23:29Z", + "type": "nmdc:MetagenomeAnnotationActivity", + "ended_at_time": "2021-10-11T03:38:32+00:00", + "output_data_objects": [ + { + "name": "Gp0127649_Protein FAA", + "description": "Protein FAA for Gp0127649", + "data_object_type": "Annotation Amino Acid FASTA", + "url": "https://data.microbiomedata.org/data/nmdc:mga0j4fe07/annotation/nmdc_mga0j4fe07_proteins.faa", + "md5_checksum": "4e5d87bb4bb3198f5b9955622a781376", + "id": "nmdc:4e5d87bb4bb3198f5b9955622a781376", + "file_size_bytes": 55301156 + }, + { + "name": "Gp0127649_Structural annotation GFF file", + "description": "Structural annotation GFF file for Gp0127649", + "data_object_type": "Structural Annotation GFF", + "url": "https://data.microbiomedata.org/data/nmdc:mga0j4fe07/annotation/nmdc_mga0j4fe07_structural_annotation.gff", + "md5_checksum": "40f79a8b021a3de27c464087fad9f092", + "id": "nmdc:40f79a8b021a3de27c464087fad9f092", + "file_size_bytes": 2518 + }, + { + "name": "Gp0127649_Functional annotation GFF file", + "description": "Functional annotation GFF file for Gp0127649", + "data_object_type": "Functional Annotation GFF", + "url": "https://data.microbiomedata.org/data/nmdc:mga0j4fe07/annotation/nmdc_mga0j4fe07_functional_annotation.gff", + "md5_checksum": "aba74592cf7aa507179e9544c008a0ec", + "id": "nmdc:aba74592cf7aa507179e9544c008a0ec", + "file_size_bytes": 63464973 + }, + { + "name": "Gp0127649_KO TSV file", + "description": "KO TSV file for Gp0127649", + "data_object_type": "Annotation KEGG Orthology", + "url": "https://data.microbiomedata.org/data/nmdc:mga0j4fe07/annotation/nmdc_mga0j4fe07_ko.tsv", + "md5_checksum": "29500fc3a86f2767cc3752ba02fa0a05", + "id": "nmdc:29500fc3a86f2767cc3752ba02fa0a05", + "file_size_bytes": 7317450 + }, + { + "name": "Gp0127649_EC TSV file", + "description": "EC TSV file for Gp0127649", + "data_object_type": "Annotation Enzyme Commission", + "url": "https://data.microbiomedata.org/data/nmdc:mga0j4fe07/annotation/nmdc_mga0j4fe07_ec.tsv", + "md5_checksum": "ba8fedc9b57d401ad0cc2b329038c5a9", + "id": "nmdc:ba8fedc9b57d401ad0cc2b329038c5a9", + "file_size_bytes": 4888576 + }, + { + "name": "Gp0127649_COG GFF file", + "description": "COG GFF file for Gp0127649", + "url": "https://data.microbiomedata.org/data/nmdc:mga0j4fe07/annotation/nmdc_mga0j4fe07_cog.gff", + "md5_checksum": "66bd5f2b62818742c6df5c39d1952a99", + "id": "nmdc:66bd5f2b62818742c6df5c39d1952a99", + "file_size_bytes": 36960882 + }, + { + "name": "Gp0127649_PFAM GFF file", + "description": "PFAM GFF file for Gp0127649", + "url": "https://data.microbiomedata.org/data/nmdc:mga0j4fe07/annotation/nmdc_mga0j4fe07_pfam.gff", + "md5_checksum": "e60c77fb34f71861ceacf988074949af", + "id": "nmdc:e60c77fb34f71861ceacf988074949af", + "file_size_bytes": 27535342 + }, + { + "name": "Gp0127649_TigrFam GFF file", + "description": "TigrFam GFF file for Gp0127649", + "url": "https://data.microbiomedata.org/data/nmdc:mga0j4fe07/annotation/nmdc_mga0j4fe07_tigrfam.gff", + "md5_checksum": "3738ab59fb56002a9f38d95b101957bd", + "id": "nmdc:3738ab59fb56002a9f38d95b101957bd", + "file_size_bytes": 2999247 + }, + { + "name": "Gp0127649_SMART GFF file", + "description": "SMART GFF file for Gp0127649", + "url": "https://data.microbiomedata.org/data/nmdc:mga0j4fe07/annotation/nmdc_mga0j4fe07_smart.gff", + "md5_checksum": "2f34c5db7846cbf37add471c0dbca951", + "id": "nmdc:2f34c5db7846cbf37add471c0dbca951", + "file_size_bytes": 8199823 + }, + { + "name": "Gp0127649_SuperFam GFF file", + "description": "SuperFam GFF file for Gp0127649", + "url": "https://data.microbiomedata.org/data/nmdc:mga0j4fe07/annotation/nmdc_mga0j4fe07_supfam.gff", + "md5_checksum": "fa7f659afca037861ae65e08092f2d83", + "id": "nmdc:fa7f659afca037861ae65e08092f2d83", + "file_size_bytes": 46114509 + }, + { + "name": "Gp0127649_Cath FunFam GFF file", + "description": "Cath FunFam GFF file for Gp0127649", + "url": "https://data.microbiomedata.org/data/nmdc:mga0j4fe07/annotation/nmdc_mga0j4fe07_cath_funfam.gff", + "md5_checksum": "9ee627031c0b425974fa1aa4d695d4ae", + "id": "nmdc:9ee627031c0b425974fa1aa4d695d4ae", + "file_size_bytes": 34807554 + }, + { + "name": "Gp0127649_KO_EC GFF file", + "description": "KO_EC GFF file for Gp0127649", + "url": "https://data.microbiomedata.org/data/nmdc:mga0j4fe07/annotation/nmdc_mga0j4fe07_ko_ec.gff", + "md5_checksum": "2fc423fd55e34d3400c9a6924df67633", + "id": "nmdc:2fc423fd55e34d3400c9a6924df67633", + "file_size_bytes": 23276630 + } + ] + }, + { + "_id": { + "$oid": "649b0052ec087f6bbab34729" + }, + "has_input": [ + "nmdc:5ada15f24d3de4a96521532a4ced6018", + "nmdc:5d9826a5f5164cfe20bfc1343144c96f", + "nmdc:aba74592cf7aa507179e9544c008a0ec" + ], + "too_short_contig_num": 180499, + "part_of": [ + "nmdc:mga0j4fe07" + ], + "binned_contig_num": 211, + "git_url": "https://github.com/microbiomedata/metaG/releases/tag/0.1", + "has_output": [ + "nmdc:d41d8cd98f00b204e9800998ecf8427e", + "nmdc:f84d25fee16a4dece54f5580d893ecaa", + "nmdc:ed61fb0056b08bc82f4545c49b744c2a", + "nmdc:8d4bce832a16affbcc3efeb8364e8eaa", + "nmdc:40273505b8b3dddd3ee5cb5c83871067", + "nmdc:b767f2b59d0fd9e650914e140cacf104" + ], + "was_informed_by": "gold:Gp0127649", + "input_contig_num": 190009, + "id": "nmdc:74b40db9b9eef8519a02f49fe6034be5", + "execution_resource": "NERSC-Cori", + "name": "MAGs Analysis Activity for nmdc:mga0j4fe07", + "mags_list": [ + { + "number_of_contig": 64, + "completeness": 16.46, + "bin_name": "bins.1", + "gene_count": 305, + "bin_quality": "LQ", + "gtdbtk_species": "", + "gtdbtk_order": "", + "num_16s": 0, + "gtdbtk_family": "", + "gtdbtk_domain": "", + "contamination": 0.47, + "gtdbtk_class": "", + "gtdbtk_phylum": "", + "num_5s": 0, + "num_23s": 0, + "gtdbtk_genus": "", + "num_t_rna": 4 + }, + { + "number_of_contig": 147, + "completeness": 19.16, + "bin_name": "bins.2", + "gene_count": 744, + "bin_quality": "LQ", + "gtdbtk_species": "", + "gtdbtk_order": "", + "num_16s": 0, + "gtdbtk_family": "", + "gtdbtk_domain": "", + "contamination": 0.0, + "gtdbtk_class": "", + "gtdbtk_phylum": "", + "num_5s": 0, + "num_23s": 0, + "gtdbtk_genus": "", + "num_t_rna": 15 + } + ], + "unbinned_contig_num": 9299, + "started_at_time": "2021-10-11T02:23:29Z", + "type": "nmdc:MAGsAnalysisActivity", + "ended_at_time": "2021-10-11T03:38:32+00:00", + "output_data_objects": [ + { + "name": "gold:Gp0452679_metabat2 bin checkm quality assessment result", + "description": "metabat2 bin checkm quality assessment result for gold:Gp0452679", + "data_object_type": "CheckM Statistics", + "url": "https://data.microbiomedata.org/data/nmdc:mga0fsjy84/MAGs/nmdc_mga0fsjy84_checkm_qa.out", + "md5_checksum": "d41d8cd98f00b204e9800998ecf8427e", + "id": "nmdc:d41d8cd98f00b204e9800998ecf8427e", + "file_size_bytes": 0 + }, + { + "name": "Gp0127649_tooShort (< 3kb) filtered contigs fasta file by metaBat2", + "description": "tooShort (< 3kb) filtered contigs fasta file by metaBat2 for Gp0127649", + "url": "https://data.microbiomedata.org/data/nmdc:mga0j4fe07/MAGs/nmdc_mga0j4fe07_bins.tooShort.fa", + "md5_checksum": "f84d25fee16a4dece54f5580d893ecaa", + "id": "nmdc:f84d25fee16a4dece54f5580d893ecaa", + "file_size_bytes": 79592416 + }, + { + "name": "Gp0127649_unbinned fasta file from metabat2", + "description": "unbinned fasta file from metabat2 for Gp0127649", + "url": "https://data.microbiomedata.org/data/nmdc:mga0j4fe07/MAGs/nmdc_mga0j4fe07_bins.unbinned.fa", + "md5_checksum": "ed61fb0056b08bc82f4545c49b744c2a", + "id": "nmdc:ed61fb0056b08bc82f4545c49b744c2a", + "file_size_bytes": 14383032 + }, + { + "name": "Gp0127649_metabat2 bin checkm quality assessment result", + "description": "metabat2 bin checkm quality assessment result for Gp0127649", + "data_object_type": "CheckM Statistics", + "url": "https://data.microbiomedata.org/data/nmdc:mga0j4fe07/MAGs/nmdc_mga0j4fe07_checkm_qa.out", + "md5_checksum": "8d4bce832a16affbcc3efeb8364e8eaa", + "id": "nmdc:8d4bce832a16affbcc3efeb8364e8eaa", + "file_size_bytes": 942 + }, + { + "name": "Gp0127649_high-quality and medium-quality bins", + "description": "high-quality and medium-quality bins for Gp0127649", + "data_object_type": "Metagenome Bins", + "url": "https://data.microbiomedata.org/data/nmdc:mga0j4fe07/MAGs/nmdc_mga0j4fe07_hqmq_bin.zip", + "md5_checksum": "40273505b8b3dddd3ee5cb5c83871067", + "id": "nmdc:40273505b8b3dddd3ee5cb5c83871067", + "file_size_bytes": 182 + }, + { + "name": "Gp0127649_metabat2 bins", + "description": "metabat2 bins for Gp0127649", + "url": "https://data.microbiomedata.org/data/nmdc:mga0j4fe07/MAGs/nmdc_mga0j4fe07_metabat_bin.zip", + "md5_checksum": "b767f2b59d0fd9e650914e140cacf104", + "id": "nmdc:b767f2b59d0fd9e650914e140cacf104", + "file_size_bytes": 269239 + } + ] + } + ] + }, + { + "_id": { + "$oid": "649b009773e8249959349b5f" + }, + "id": "nmdc:omprc-11-j43hz774", + "name": "Riverbed sediment microbial communities from areas with vegetation nearby in Columbia River, Washington, USA - GW-RW S3_50_60", + "description": "Riverbed sediment samples were collected from an area with a lot of surrounding vegetation in a hyporheic zone (subsurface groundwater - surface water mixing zone)", + "has_input": [ + "nmdc:bsm-11-xngecc18" + ], + "has_output": [ + "jgi:574fe0af7ded5e3df1ee1493" + ], + "part_of": [ + "nmdc:sty-11-aygzgv51" + ], + "add_date": "2016-01-11", + "mod_date": "2021-06-15", + "ncbi_project_name": "Riverbed sediment microbial communities from areas with vegetation nearby in Columbia River, Washington, USA - GW-RW S3_50_60", + "omics_type": { + "has_raw_value": "Metagenome" + }, + "principal_investigator": { + "has_raw_value": "James Stegen" + }, + "processing_institution": "JGI", + "type": "nmdc:OmicsProcessing", + "gold_sequencing_project_identifiers": [ + "gold:Gp0127652" + ], + "downstream_workflow_activity_records": [ + { + "_id": { + "$oid": "649b009d6bdd4fd20273c87e" + }, + "has_input": [ + "nmdc:b0548475f69b48e2d150cb90ae27f2c6" + ], + "part_of": [ + "nmdc:mga0mfxf90" + ], + "git_url": "https://github.com/microbiomedata/metaG/releases/tag/0.1", + "has_output": [ + "nmdc:60f03b815160b29125c2bd0776a330bf", + "nmdc:c40fa552711f6b19130b2a559f2d4cdc" + ], + "was_informed_by": "gold:Gp0127652", + "input_read_count": 26604768, + "output_read_bases": 3697162034, + "id": "nmdc:c86126b11f214f19721c56fadf91d87c", + "execution_resource": "NERSC-Cori", + "input_read_bases": 4017319968, + "name": "Read QC Activity for nmdc:mga0mfxf90", + "output_read_count": 24717950, + "started_at_time": "2021-10-11T02:27:08Z", + "type": "nmdc:ReadQCAnalysisActivity", + "ended_at_time": "2021-10-11T04:45:21+00:00", + "output_data_objects": [ + { + "name": "Gp0127652_Filtered Reads", + "description": "Filtered Reads for Gp0127652", + "data_object_type": "Filtered Sequencing Reads", + "url": "https://data.microbiomedata.org/data/nmdc:mga0mfxf90/qa/nmdc_mga0mfxf90_filtered.fastq.gz", + "md5_checksum": "60f03b815160b29125c2bd0776a330bf", + "id": "nmdc:60f03b815160b29125c2bd0776a330bf", + "file_size_bytes": 2019434951 + }, + { + "name": "Gp0127652_Filtered Stats", + "description": "Filtered Stats for Gp0127652", + "data_object_type": "QC Statistics", + "url": "https://data.microbiomedata.org/data/nmdc:mga0mfxf90/qa/nmdc_mga0mfxf90_filterStats.txt", + "md5_checksum": "c40fa552711f6b19130b2a559f2d4cdc", + "id": "nmdc:c40fa552711f6b19130b2a559f2d4cdc", + "file_size_bytes": 290 + } + ] + }, + { + "_id": { + "$oid": "649b009bff710ae353f8cf40" + }, + "has_input": [ + "nmdc:60f03b815160b29125c2bd0776a330bf" + ], + "git_url": "https://github.com/microbiomedata/metaG/releases/tag/0.1", + "has_output": [ + "nmdc:70f29a321c925cfc0e2003515f708400", + "nmdc:93d5419c0b31e0696ab8ffef477945fb", + "nmdc:9cd3b2939adabd809741ae6a84260266", + "nmdc:acea91fced8993a40cf1eb9cda29c4cd", + "nmdc:b623a0d3bdff34fb97530c74bb558aeb", + "nmdc:e461b2e81a22514fcd691caeaa7b0ca1", + "nmdc:38b7c63d0157f8bf4316f4295f0e6e28", + "nmdc:be0c2bc71cefcb0f0a23d270d047f30c", + "nmdc:1df4b479c887b43319d89cc80dc35239" + ], + "was_informed_by": "gold:Gp0127652", + "id": "nmdc:c86126b11f214f19721c56fadf91d87c", + "execution_resource": "NERSC-Cori", + "name": "ReadBased Analysis Activity for nmdc:mga0mfxf90", + "started_at_time": "2021-10-11T02:27:08Z", + "type": "nmdc:ReadbasedAnalysis", + "ended_at_time": "2021-10-11T04:45:21+00:00", + "output_data_objects": [ + { + "name": "Gp0127652_Gottcha2 TSV report", + "description": "Gottcha2 TSV report for Gp0127652", + "url": "https://data.microbiomedata.org/data/nmdc:mga0mfxf90/ReadbasedAnalysis/nmdc_mga0mfxf90_gottcha2_report.tsv", + "md5_checksum": "70f29a321c925cfc0e2003515f708400", + "id": "nmdc:70f29a321c925cfc0e2003515f708400", + "file_size_bytes": 1524 + }, + { + "name": "Gp0127652_Gottcha2 full TSV report", + "description": "Gottcha2 full TSV report for Gp0127652", + "url": "https://data.microbiomedata.org/data/nmdc:mga0mfxf90/ReadbasedAnalysis/nmdc_mga0mfxf90_gottcha2_report_full.tsv", + "md5_checksum": "93d5419c0b31e0696ab8ffef477945fb", + "id": "nmdc:93d5419c0b31e0696ab8ffef477945fb", + "file_size_bytes": 670250 + }, + { + "name": "Gp0127652_Gottcha2 Krona HTML report", + "description": "Gottcha2 Krona HTML report for Gp0127652", + "data_object_type": "GOTTCHA2 Krona Plot", + "url": "https://data.microbiomedata.org/data/nmdc:mga0mfxf90/ReadbasedAnalysis/nmdc_mga0mfxf90_gottcha2_krona.html", + "md5_checksum": "9cd3b2939adabd809741ae6a84260266", + "id": "nmdc:9cd3b2939adabd809741ae6a84260266", + "file_size_bytes": 229949 + }, + { + "name": "Gp0127652_Centrifuge classification TSV report", + "description": "Centrifuge classification TSV report for Gp0127652", + "data_object_type": "Centrifuge Taxonomic Classification", + "url": "https://data.microbiomedata.org/data/nmdc:mga0mfxf90/ReadbasedAnalysis/nmdc_mga0mfxf90_centrifuge_classification.tsv", + "md5_checksum": "acea91fced8993a40cf1eb9cda29c4cd", + "id": "nmdc:acea91fced8993a40cf1eb9cda29c4cd", + "file_size_bytes": 1814515284 + }, + { + "name": "Gp0127652_Centrifuge TSV report", + "description": "Centrifuge TSV report for Gp0127652", + "data_object_type": "Centrifuge Classification Report", + "url": "https://data.microbiomedata.org/data/nmdc:mga0mfxf90/ReadbasedAnalysis/nmdc_mga0mfxf90_centrifuge_report.tsv", + "md5_checksum": "b623a0d3bdff34fb97530c74bb558aeb", + "id": "nmdc:b623a0d3bdff34fb97530c74bb558aeb", + "file_size_bytes": 253730 + }, + { + "name": "Gp0127652_Centrifuge Krona HTML report", + "description": "Centrifuge Krona HTML report for Gp0127652", + "data_object_type": "Centrifuge Krona Plot", + "url": "https://data.microbiomedata.org/data/nmdc:mga0mfxf90/ReadbasedAnalysis/nmdc_mga0mfxf90_centrifuge_krona.html", + "md5_checksum": "e461b2e81a22514fcd691caeaa7b0ca1", + "id": "nmdc:e461b2e81a22514fcd691caeaa7b0ca1", + "file_size_bytes": 2330558 + }, + { + "name": "Gp0127652_Kraken classification TSV report", + "description": "Kraken classification TSV report for Gp0127652", + "data_object_type": "Kraken2 Taxonomic Classification", + "url": "https://data.microbiomedata.org/data/nmdc:mga0mfxf90/ReadbasedAnalysis/nmdc_mga0mfxf90_kraken2_classification.tsv", + "md5_checksum": "38b7c63d0157f8bf4316f4295f0e6e28", + "id": "nmdc:38b7c63d0157f8bf4316f4295f0e6e28", + "file_size_bytes": 1445957300 + }, + { + "name": "Gp0127652_Kraken2 TSV report", + "description": "Kraken2 TSV report for Gp0127652", + "data_object_type": "Kraken2 Classification Report", + "url": "https://data.microbiomedata.org/data/nmdc:mga0mfxf90/ReadbasedAnalysis/nmdc_mga0mfxf90_kraken2_report.tsv", + "md5_checksum": "be0c2bc71cefcb0f0a23d270d047f30c", + "id": "nmdc:be0c2bc71cefcb0f0a23d270d047f30c", + "file_size_bytes": 639677 + }, + { + "name": "Gp0127652_Kraken2 Krona HTML report", + "description": "Kraken2 Krona HTML report for Gp0127652", + "data_object_type": "Kraken2 Krona Plot", + "url": "https://data.microbiomedata.org/data/nmdc:mga0mfxf90/ReadbasedAnalysis/nmdc_mga0mfxf90_kraken2_krona.html", + "md5_checksum": "1df4b479c887b43319d89cc80dc35239", + "id": "nmdc:1df4b479c887b43319d89cc80dc35239", + "file_size_bytes": 3991377 + } + ] + }, + { + "_id": { + "$oid": "61e719de833bcf838a7015b2" + }, + "has_input": [ + "nmdc:60f03b815160b29125c2bd0776a330bf" + ], + "part_of": [ + "nmdc:mga0mfxf90" + ], + "git_url": "https://github.com/microbiomedata/metaG/releases/tag/0.1", + "has_output": [ + "nmdc:70f29a321c925cfc0e2003515f708400", + "nmdc:93d5419c0b31e0696ab8ffef477945fb", + "nmdc:9cd3b2939adabd809741ae6a84260266", + "nmdc:acea91fced8993a40cf1eb9cda29c4cd", + "nmdc:b623a0d3bdff34fb97530c74bb558aeb", + "nmdc:e461b2e81a22514fcd691caeaa7b0ca1", + "nmdc:38b7c63d0157f8bf4316f4295f0e6e28", + "nmdc:be0c2bc71cefcb0f0a23d270d047f30c", + "nmdc:1df4b479c887b43319d89cc80dc35239" + ], + "was_informed_by": "gold:Gp0127652", + "id": "nmdc:c86126b11f214f19721c56fadf91d87c", + "execution_resource": "NERSC-Cori", + "name": "ReadBased Analysis Activity for nmdc:mga0mfxf90", + "started_at_time": "2021-10-11T02:27:08Z", + "type": "nmdc:ReadbasedAnalysis", + "ended_at_time": "2021-10-11T04:45:21+00:00", + "output_data_objects": [ + { + "name": "Gp0127652_Gottcha2 TSV report", + "description": "Gottcha2 TSV report for Gp0127652", + "url": "https://data.microbiomedata.org/data/nmdc:mga0mfxf90/ReadbasedAnalysis/nmdc_mga0mfxf90_gottcha2_report.tsv", + "md5_checksum": "70f29a321c925cfc0e2003515f708400", + "id": "nmdc:70f29a321c925cfc0e2003515f708400", + "file_size_bytes": 1524 + }, + { + "name": "Gp0127652_Gottcha2 full TSV report", + "description": "Gottcha2 full TSV report for Gp0127652", + "url": "https://data.microbiomedata.org/data/nmdc:mga0mfxf90/ReadbasedAnalysis/nmdc_mga0mfxf90_gottcha2_report_full.tsv", + "md5_checksum": "93d5419c0b31e0696ab8ffef477945fb", + "id": "nmdc:93d5419c0b31e0696ab8ffef477945fb", + "file_size_bytes": 670250 + }, + { + "name": "Gp0127652_Gottcha2 Krona HTML report", + "description": "Gottcha2 Krona HTML report for Gp0127652", + "data_object_type": "GOTTCHA2 Krona Plot", + "url": "https://data.microbiomedata.org/data/nmdc:mga0mfxf90/ReadbasedAnalysis/nmdc_mga0mfxf90_gottcha2_krona.html", + "md5_checksum": "9cd3b2939adabd809741ae6a84260266", + "id": "nmdc:9cd3b2939adabd809741ae6a84260266", + "file_size_bytes": 229949 + }, + { + "name": "Gp0127652_Centrifuge classification TSV report", + "description": "Centrifuge classification TSV report for Gp0127652", + "data_object_type": "Centrifuge Taxonomic Classification", + "url": "https://data.microbiomedata.org/data/nmdc:mga0mfxf90/ReadbasedAnalysis/nmdc_mga0mfxf90_centrifuge_classification.tsv", + "md5_checksum": "acea91fced8993a40cf1eb9cda29c4cd", + "id": "nmdc:acea91fced8993a40cf1eb9cda29c4cd", + "file_size_bytes": 1814515284 + }, + { + "name": "Gp0127652_Centrifuge TSV report", + "description": "Centrifuge TSV report for Gp0127652", + "data_object_type": "Centrifuge Classification Report", + "url": "https://data.microbiomedata.org/data/nmdc:mga0mfxf90/ReadbasedAnalysis/nmdc_mga0mfxf90_centrifuge_report.tsv", + "md5_checksum": "b623a0d3bdff34fb97530c74bb558aeb", + "id": "nmdc:b623a0d3bdff34fb97530c74bb558aeb", + "file_size_bytes": 253730 + }, + { + "name": "Gp0127652_Centrifuge Krona HTML report", + "description": "Centrifuge Krona HTML report for Gp0127652", + "data_object_type": "Centrifuge Krona Plot", + "url": "https://data.microbiomedata.org/data/nmdc:mga0mfxf90/ReadbasedAnalysis/nmdc_mga0mfxf90_centrifuge_krona.html", + "md5_checksum": "e461b2e81a22514fcd691caeaa7b0ca1", + "id": "nmdc:e461b2e81a22514fcd691caeaa7b0ca1", + "file_size_bytes": 2330558 + }, + { + "name": "Gp0127652_Kraken classification TSV report", + "description": "Kraken classification TSV report for Gp0127652", + "data_object_type": "Kraken2 Taxonomic Classification", + "url": "https://data.microbiomedata.org/data/nmdc:mga0mfxf90/ReadbasedAnalysis/nmdc_mga0mfxf90_kraken2_classification.tsv", + "md5_checksum": "38b7c63d0157f8bf4316f4295f0e6e28", + "id": "nmdc:38b7c63d0157f8bf4316f4295f0e6e28", + "file_size_bytes": 1445957300 + }, + { + "name": "Gp0127652_Kraken2 TSV report", + "description": "Kraken2 TSV report for Gp0127652", + "data_object_type": "Kraken2 Classification Report", + "url": "https://data.microbiomedata.org/data/nmdc:mga0mfxf90/ReadbasedAnalysis/nmdc_mga0mfxf90_kraken2_report.tsv", + "md5_checksum": "be0c2bc71cefcb0f0a23d270d047f30c", + "id": "nmdc:be0c2bc71cefcb0f0a23d270d047f30c", + "file_size_bytes": 639677 + }, + { + "name": "Gp0127652_Kraken2 Krona HTML report", + "description": "Kraken2 Krona HTML report for Gp0127652", + "data_object_type": "Kraken2 Krona Plot", + "url": "https://data.microbiomedata.org/data/nmdc:mga0mfxf90/ReadbasedAnalysis/nmdc_mga0mfxf90_kraken2_krona.html", + "md5_checksum": "1df4b479c887b43319d89cc80dc35239", + "id": "nmdc:1df4b479c887b43319d89cc80dc35239", + "file_size_bytes": 3991377 + } + ] + }, + { + "_id": { + "$oid": "649b005f2ca5ee4adb139fb1" + }, + "has_input": [ + "nmdc:60f03b815160b29125c2bd0776a330bf" + ], + "part_of": [ + "nmdc:mga0mfxf90" + ], + "ctg_logsum": 293195, + "scaf_logsum": 294510, + "gap_pct": 0.0019, + "git_url": "https://github.com/microbiomedata/metaG/releases/tag/0.1", + "has_output": [ + "nmdc:a550eb6e614b375c1089ab816163ea63", + "nmdc:9f194d271c352af3f68f2afeb1dbd499", + "nmdc:b0a79069110825cfe5525a8fc4f02cb6", + "nmdc:f54e8bda482b1cb8bc8e121ee5f39e07", + "nmdc:c8c5056ee57126695073137d0c1d3d04" + ], + "asm_score": 3.266, + "was_informed_by": "gold:Gp0127652", + "ctg_powsum": 31744, + "scaf_max": 16883, + "id": "nmdc:c86126b11f214f19721c56fadf91d87c", + "scaf_powsum": 31903, + "execution_resource": "NERSC-Cori", + "contigs": 216252, + "name": "Assembly Activity for nmdc:mga0mfxf90", + "ctg_max": 16883, + "gc_std": 0.09516, + "contig_bp": 108575090, + "gc_avg": 0.63494, + "started_at_time": "2021-10-11T02:27:08Z", + "scaf_bp": 108577150, + "type": "nmdc:MetagenomeAssembly", + "scaffolds": 216046, + "ended_at_time": "2021-10-11T04:45:21+00:00", + "ctg_l50": 493, + "ctg_l90": 290, + "ctg_n50": 57034, + "ctg_n90": 179762, + "scaf_l50": 493, + "scaf_l90": 290, + "scaf_n50": 56962, + "scaf_n90": 179563, + "output_data_objects": [ + { + "name": "Gp0127652_Assembled contigs fasta", + "description": "Assembled contigs fasta for Gp0127652", + "data_object_type": "Assembly Contigs", + "url": "https://data.microbiomedata.org/data/nmdc:mga0mfxf90/assembly/nmdc_mga0mfxf90_contigs.fna", + "md5_checksum": "a550eb6e614b375c1089ab816163ea63", + "id": "nmdc:a550eb6e614b375c1089ab816163ea63", + "file_size_bytes": 117075841 + }, + { + "name": "Gp0127652_Assembled scaffold fasta", + "description": "Assembled scaffold fasta for Gp0127652", + "data_object_type": "Assembly Scaffolds", + "url": "https://data.microbiomedata.org/data/nmdc:mga0mfxf90/assembly/nmdc_mga0mfxf90_scaffolds.fna", + "md5_checksum": "9f194d271c352af3f68f2afeb1dbd499", + "id": "nmdc:9f194d271c352af3f68f2afeb1dbd499", + "file_size_bytes": 116423675 + }, + { + "name": "Gp0127652_Metagenome Contig Coverage Stats", + "description": "Metagenome Contig Coverage Stats for Gp0127652", + "url": "https://data.microbiomedata.org/data/nmdc:mga0mfxf90/assembly/nmdc_mga0mfxf90_covstats.txt", + "md5_checksum": "b0a79069110825cfe5525a8fc4f02cb6", + "id": "nmdc:b0a79069110825cfe5525a8fc4f02cb6", + "file_size_bytes": 17141637 + }, + { + "name": "Gp0127652_Assembled AGP file", + "description": "Assembled AGP file for Gp0127652", + "data_object_type": "Assembly AGP", + "url": "https://data.microbiomedata.org/data/nmdc:mga0mfxf90/assembly/nmdc_mga0mfxf90_assembly.agp", + "md5_checksum": "f54e8bda482b1cb8bc8e121ee5f39e07", + "id": "nmdc:f54e8bda482b1cb8bc8e121ee5f39e07", + "file_size_bytes": 16044279 + }, + { + "name": "Gp0127652_Metagenome Alignment BAM file", + "description": "Metagenome Alignment BAM file for Gp0127652", + "data_object_type": "Assembly Coverage BAM", + "url": "https://data.microbiomedata.org/data/nmdc:mga0mfxf90/assembly/nmdc_mga0mfxf90_pairedMapped_sorted.bam", + "md5_checksum": "c8c5056ee57126695073137d0c1d3d04", + "id": "nmdc:c8c5056ee57126695073137d0c1d3d04", + "file_size_bytes": 2224050507 + } + ] + }, + { + "_id": { + "$oid": "649b005bbf2caae0415ef9c3" + }, + "has_input": [ + "nmdc:a550eb6e614b375c1089ab816163ea63" + ], + "part_of": [ + "nmdc:mga0mfxf90" + ], + "git_url": "https://github.com/microbiomedata/metaG/releases/tag/0.1", + "has_output": [ + "nmdc:096c54bce5ec1cc5d41ac64553e42cb3", + "nmdc:ac8cd253a39e6e5fe0a0930f3bf6888a", + "nmdc:863dc502676573c59ce69b1ff786042a", + "nmdc:28ed2a9e345d0e542127fd1dc2173ae7", + "nmdc:a826d96e791f69ff7759d57f44a8a510", + "nmdc:58e310990be01a574eef05b3f5dd1495", + "nmdc:28ce5c4c605a1c4538ce63987252c0ad", + "nmdc:6de9ddf0b07c9bcf1409aceb7ee2f941", + "nmdc:6342c9c98e297d2e39a2144c7ca0191b", + "nmdc:d20aa781d3ad6b0face7cc9c412bc3f7", + "nmdc:db2e4b8f6cc1e8dc934e14b93589805a", + "nmdc:f51f9d679d1b045f4ebc61dab7fc2f08" + ], + "was_informed_by": "gold:Gp0127652", + "id": "nmdc:c86126b11f214f19721c56fadf91d87c", + "execution_resource": "NERSC-Cori", + "name": "Annotation Activity for nmdc:mga0mfxf90", + "started_at_time": "2021-10-11T02:27:08Z", + "type": "nmdc:MetagenomeAnnotationActivity", + "ended_at_time": "2021-10-11T04:45:21+00:00", + "output_data_objects": [ + { + "name": "Gp0127652_Protein FAA", + "description": "Protein FAA for Gp0127652", + "data_object_type": "Annotation Amino Acid FASTA", + "url": "https://data.microbiomedata.org/data/nmdc:mga0mfxf90/annotation/nmdc_mga0mfxf90_proteins.faa", + "md5_checksum": "096c54bce5ec1cc5d41ac64553e42cb3", + "id": "nmdc:096c54bce5ec1cc5d41ac64553e42cb3", + "file_size_bytes": 66555768 + }, + { + "name": "Gp0127652_Structural annotation GFF file", + "description": "Structural annotation GFF file for Gp0127652", + "data_object_type": "Structural Annotation GFF", + "url": "https://data.microbiomedata.org/data/nmdc:mga0mfxf90/annotation/nmdc_mga0mfxf90_structural_annotation.gff", + "md5_checksum": "ac8cd253a39e6e5fe0a0930f3bf6888a", + "id": "nmdc:ac8cd253a39e6e5fe0a0930f3bf6888a", + "file_size_bytes": 2521 + }, + { + "name": "Gp0127652_Functional annotation GFF file", + "description": "Functional annotation GFF file for Gp0127652", + "data_object_type": "Functional Annotation GFF", + "url": "https://data.microbiomedata.org/data/nmdc:mga0mfxf90/annotation/nmdc_mga0mfxf90_functional_annotation.gff", + "md5_checksum": "863dc502676573c59ce69b1ff786042a", + "id": "nmdc:863dc502676573c59ce69b1ff786042a", + "file_size_bytes": 74520486 + }, + { + "name": "Gp0127652_KO TSV file", + "description": "KO TSV file for Gp0127652", + "data_object_type": "Annotation KEGG Orthology", + "url": "https://data.microbiomedata.org/data/nmdc:mga0mfxf90/annotation/nmdc_mga0mfxf90_ko.tsv", + "md5_checksum": "28ed2a9e345d0e542127fd1dc2173ae7", + "id": "nmdc:28ed2a9e345d0e542127fd1dc2173ae7", + "file_size_bytes": 8379185 + }, + { + "name": "Gp0127652_EC TSV file", + "description": "EC TSV file for Gp0127652", + "data_object_type": "Annotation Enzyme Commission", + "url": "https://data.microbiomedata.org/data/nmdc:mga0mfxf90/annotation/nmdc_mga0mfxf90_ec.tsv", + "md5_checksum": "a826d96e791f69ff7759d57f44a8a510", + "id": "nmdc:a826d96e791f69ff7759d57f44a8a510", + "file_size_bytes": 5555311 + }, + { + "name": "Gp0127652_COG GFF file", + "description": "COG GFF file for Gp0127652", + "url": "https://data.microbiomedata.org/data/nmdc:mga0mfxf90/annotation/nmdc_mga0mfxf90_cog.gff", + "md5_checksum": "58e310990be01a574eef05b3f5dd1495", + "id": "nmdc:58e310990be01a574eef05b3f5dd1495", + "file_size_bytes": 43385646 + }, + { + "name": "Gp0127652_PFAM GFF file", + "description": "PFAM GFF file for Gp0127652", + "url": "https://data.microbiomedata.org/data/nmdc:mga0mfxf90/annotation/nmdc_mga0mfxf90_pfam.gff", + "md5_checksum": "28ce5c4c605a1c4538ce63987252c0ad", + "id": "nmdc:28ce5c4c605a1c4538ce63987252c0ad", + "file_size_bytes": 33061709 + }, + { + "name": "Gp0127652_TigrFam GFF file", + "description": "TigrFam GFF file for Gp0127652", + "url": "https://data.microbiomedata.org/data/nmdc:mga0mfxf90/annotation/nmdc_mga0mfxf90_tigrfam.gff", + "md5_checksum": "6de9ddf0b07c9bcf1409aceb7ee2f941", + "id": "nmdc:6de9ddf0b07c9bcf1409aceb7ee2f941", + "file_size_bytes": 3665042 + }, + { + "name": "Gp0127652_SMART GFF file", + "description": "SMART GFF file for Gp0127652", + "url": "https://data.microbiomedata.org/data/nmdc:mga0mfxf90/annotation/nmdc_mga0mfxf90_smart.gff", + "md5_checksum": "6342c9c98e297d2e39a2144c7ca0191b", + "id": "nmdc:6342c9c98e297d2e39a2144c7ca0191b", + "file_size_bytes": 9667737 + }, + { + "name": "Gp0127652_SuperFam GFF file", + "description": "SuperFam GFF file for Gp0127652", + "url": "https://data.microbiomedata.org/data/nmdc:mga0mfxf90/annotation/nmdc_mga0mfxf90_supfam.gff", + "md5_checksum": "d20aa781d3ad6b0face7cc9c412bc3f7", + "id": "nmdc:d20aa781d3ad6b0face7cc9c412bc3f7", + "file_size_bytes": 54593577 + }, + { + "name": "Gp0127652_Cath FunFam GFF file", + "description": "Cath FunFam GFF file for Gp0127652", + "url": "https://data.microbiomedata.org/data/nmdc:mga0mfxf90/annotation/nmdc_mga0mfxf90_cath_funfam.gff", + "md5_checksum": "db2e4b8f6cc1e8dc934e14b93589805a", + "id": "nmdc:db2e4b8f6cc1e8dc934e14b93589805a", + "file_size_bytes": 41409254 + }, + { + "name": "Gp0127652_KO_EC GFF file", + "description": "KO_EC GFF file for Gp0127652", + "url": "https://data.microbiomedata.org/data/nmdc:mga0mfxf90/annotation/nmdc_mga0mfxf90_ko_ec.gff", + "md5_checksum": "f51f9d679d1b045f4ebc61dab7fc2f08", + "id": "nmdc:f51f9d679d1b045f4ebc61dab7fc2f08", + "file_size_bytes": 26617726 + } + ] + }, + { + "_id": { + "$oid": "649b0052ec087f6bbab34727" + }, + "has_input": [ + "nmdc:a550eb6e614b375c1089ab816163ea63", + "nmdc:c8c5056ee57126695073137d0c1d3d04", + "nmdc:863dc502676573c59ce69b1ff786042a" + ], + "too_short_contig_num": 200309, + "part_of": [ + "nmdc:mga0mfxf90" + ], + "binned_contig_num": 835, + "git_url": "https://github.com/microbiomedata/metaG/releases/tag/0.1", + "has_output": [ + "nmdc:d41d8cd98f00b204e9800998ecf8427e", + "nmdc:4371932b5834f2deadb2fbfc42b056f7", + "nmdc:5a8d8441e6e472837809ee31d517d32a", + "nmdc:16016a7b2388048eec469f73395bc478", + "nmdc:1e604f9f29f74c6169c4d27f839bb7b0", + "nmdc:21467369d04671628ae67afbaf1d2076" + ], + "was_informed_by": "gold:Gp0127652", + "input_contig_num": 216252, + "id": "nmdc:c86126b11f214f19721c56fadf91d87c", + "execution_resource": "NERSC-Cori", + "name": "MAGs Analysis Activity for nmdc:mga0mfxf90", + "mags_list": [ + { + "number_of_contig": 233, + "completeness": 12.16, + "bin_name": "bins.1", + "gene_count": 1133, + "bin_quality": "LQ", + "gtdbtk_species": "", + "gtdbtk_order": "", + "num_16s": 0, + "gtdbtk_family": "", + "gtdbtk_domain": "", + "contamination": 0.0, + "gtdbtk_class": "", + "gtdbtk_phylum": "", + "num_5s": 0, + "num_23s": 0, + "gtdbtk_genus": "", + "num_t_rna": 10 + }, + { + "number_of_contig": 349, + "completeness": 45.68, + "bin_name": "bins.2", + "gene_count": 1809, + "bin_quality": "LQ", + "gtdbtk_species": "", + "gtdbtk_order": "", + "num_16s": 0, + "gtdbtk_family": "", + "gtdbtk_domain": "", + "contamination": 10.0, + "gtdbtk_class": "", + "gtdbtk_phylum": "", + "num_5s": 1, + "num_23s": 1, + "gtdbtk_genus": "", + "num_t_rna": 16 + }, + { + "number_of_contig": 106, + "completeness": 17.54, + "bin_name": "bins.3", + "gene_count": 552, + "bin_quality": "LQ", + "gtdbtk_species": "", + "gtdbtk_order": "", + "num_16s": 0, + "gtdbtk_family": "", + "gtdbtk_domain": "", + "contamination": 0.0, + "gtdbtk_class": "", + "gtdbtk_phylum": "", + "num_5s": 0, + "num_23s": 0, + "gtdbtk_genus": "", + "num_t_rna": 12 + }, + { + "number_of_contig": 147, + "completeness": 14.66, + "bin_name": "bins.4", + "gene_count": 668, + "bin_quality": "LQ", + "gtdbtk_species": "", + "gtdbtk_order": "", + "num_16s": 0, + "gtdbtk_family": "", + "gtdbtk_domain": "", + "contamination": 0.0, + "gtdbtk_class": "", + "gtdbtk_phylum": "", + "num_5s": 0, + "num_23s": 0, + "gtdbtk_genus": "", + "num_t_rna": 5 + } + ], + "unbinned_contig_num": 15108, + "started_at_time": "2021-10-11T02:27:08Z", + "type": "nmdc:MAGsAnalysisActivity", + "ended_at_time": "2021-10-11T04:45:21+00:00", + "output_data_objects": [ + { + "name": "gold:Gp0452679_metabat2 bin checkm quality assessment result", + "description": "metabat2 bin checkm quality assessment result for gold:Gp0452679", + "data_object_type": "CheckM Statistics", + "url": "https://data.microbiomedata.org/data/nmdc:mga0fsjy84/MAGs/nmdc_mga0fsjy84_checkm_qa.out", + "md5_checksum": "d41d8cd98f00b204e9800998ecf8427e", + "id": "nmdc:d41d8cd98f00b204e9800998ecf8427e", + "file_size_bytes": 0 + }, + { + "name": "Gp0127652_tooShort (< 3kb) filtered contigs fasta file by metaBat2", + "description": "tooShort (< 3kb) filtered contigs fasta file by metaBat2 for Gp0127652", + "url": "https://data.microbiomedata.org/data/nmdc:mga0mfxf90/MAGs/nmdc_mga0mfxf90_bins.tooShort.fa", + "md5_checksum": "4371932b5834f2deadb2fbfc42b056f7", + "id": "nmdc:4371932b5834f2deadb2fbfc42b056f7", + "file_size_bytes": 89154072 + }, + { + "name": "Gp0127652_unbinned fasta file from metabat2", + "description": "unbinned fasta file from metabat2 for Gp0127652", + "url": "https://data.microbiomedata.org/data/nmdc:mga0mfxf90/MAGs/nmdc_mga0mfxf90_bins.unbinned.fa", + "md5_checksum": "5a8d8441e6e472837809ee31d517d32a", + "id": "nmdc:5a8d8441e6e472837809ee31d517d32a", + "file_size_bytes": 24514353 + }, + { + "name": "Gp0127652_metabat2 bin checkm quality assessment result", + "description": "metabat2 bin checkm quality assessment result for Gp0127652", + "data_object_type": "CheckM Statistics", + "url": "https://data.microbiomedata.org/data/nmdc:mga0mfxf90/MAGs/nmdc_mga0mfxf90_checkm_qa.out", + "md5_checksum": "16016a7b2388048eec469f73395bc478", + "id": "nmdc:16016a7b2388048eec469f73395bc478", + "file_size_bytes": 1320 + }, + { + "name": "Gp0127652_high-quality and medium-quality bins", + "description": "high-quality and medium-quality bins for Gp0127652", + "data_object_type": "Metagenome Bins", + "url": "https://data.microbiomedata.org/data/nmdc:mga0mfxf90/MAGs/nmdc_mga0mfxf90_hqmq_bin.zip", + "md5_checksum": "1e604f9f29f74c6169c4d27f839bb7b0", + "id": "nmdc:1e604f9f29f74c6169c4d27f839bb7b0", + "file_size_bytes": 182 + }, + { + "name": "Gp0127652_metabat2 bins", + "description": "metabat2 bins for Gp0127652", + "url": "https://data.microbiomedata.org/data/nmdc:mga0mfxf90/MAGs/nmdc_mga0mfxf90_metabat_bin.zip", + "md5_checksum": "21467369d04671628ae67afbaf1d2076", + "id": "nmdc:21467369d04671628ae67afbaf1d2076", + "file_size_bytes": 1013750 + } + ] + } + ] + }, + { + "_id": { + "$oid": "649b009773e8249959349b60" + }, + "id": "nmdc:omprc-11-kgxpef29", + "name": "Riverbed sediment microbial communities from areas with vegetation nearby in Columbia River, Washington, USA - GW-RW S2_40_50", + "description": "Riverbed sediment samples were collected from an area with a lot of surrounding vegetation in a hyporheic zone (subsurface groundwater - surface water mixing zone)", + "has_input": [ + "nmdc:bsm-11-tpk9x619" + ], + "has_output": [ + "jgi:574fe0b17ded5e3df1ee1494" + ], + "part_of": [ + "nmdc:sty-11-aygzgv51" + ], + "add_date": "2016-01-11", + "mod_date": "2021-06-15", + "ncbi_project_name": "Riverbed sediment microbial communities from areas with vegetation nearby in Columbia River, Washington, USA - GW-RW S2_40_50", + "omics_type": { + "has_raw_value": "Metagenome" + }, + "principal_investigator": { + "has_raw_value": "James Stegen" + }, + "processing_institution": "JGI", + "type": "nmdc:OmicsProcessing", + "gold_sequencing_project_identifiers": [ + "gold:Gp0127654" + ], + "downstream_workflow_activity_records": [ + { + "_id": { + "$oid": "649b009d6bdd4fd20273c87f" + }, + "has_input": [ + "nmdc:c87a7a87a5218698fbdd8ad39085b892" + ], + "part_of": [ + "nmdc:mga0h0s362" + ], + "git_url": "https://github.com/microbiomedata/metaG/releases/tag/0.1", + "has_output": [ + "nmdc:c4f29a07f3ce03ee2a2d11c90e8b43d6", + "nmdc:9c600ec3be94d876f00d22808f3e8a59" + ], + "was_informed_by": "gold:Gp0127654", + "input_read_count": 30951192, + "output_read_bases": 4526478748, + "id": "nmdc:168441535388b19bbdee0928b42e5b20", + "execution_resource": "NERSC-Cori", + "input_read_bases": 4673629992, + "name": "Read QC Activity for nmdc:mga0h0s362", + "output_read_count": 30289044, + "started_at_time": "2021-10-11T02:23:29Z", + "type": "nmdc:ReadQCAnalysisActivity", + "ended_at_time": "2021-10-11T03:58:56+00:00", + "output_data_objects": [ + { + "name": "Gp0127654_Filtered Reads", + "description": "Filtered Reads for Gp0127654", + "data_object_type": "Filtered Sequencing Reads", + "url": "https://data.microbiomedata.org/data/nmdc:mga0h0s362/qa/nmdc_mga0h0s362_filtered.fastq.gz", + "md5_checksum": "c4f29a07f3ce03ee2a2d11c90e8b43d6", + "id": "nmdc:c4f29a07f3ce03ee2a2d11c90e8b43d6", + "file_size_bytes": 2479437709 + }, + { + "name": "Gp0127654_Filtered Stats", + "description": "Filtered Stats for Gp0127654", + "data_object_type": "QC Statistics", + "url": "https://data.microbiomedata.org/data/nmdc:mga0h0s362/qa/nmdc_mga0h0s362_filterStats.txt", + "md5_checksum": "9c600ec3be94d876f00d22808f3e8a59", + "id": "nmdc:9c600ec3be94d876f00d22808f3e8a59", + "file_size_bytes": 284 + } + ] + }, + { + "_id": { + "$oid": "649b009bff710ae353f8cf45" + }, + "has_input": [ + "nmdc:c4f29a07f3ce03ee2a2d11c90e8b43d6" + ], + "git_url": "https://github.com/microbiomedata/metaG/releases/tag/0.1", + "has_output": [ + "nmdc:130ee7559789726a2cadccd3126dacad", + "nmdc:c955eae73afbfe1ad4c4eb2eac51f3f3", + "nmdc:7ccb4ee5a0728322154b29a79d13c842", + "nmdc:8b88e19f3d4f22c8bb71f66e7aec6dba", + "nmdc:35a0d72edac6c5e7f9c8ddf86c5534e0", + "nmdc:f808a89810cdb2a911a5b5388b70ce94", + "nmdc:dfc90170aa038c2425702be223cb2f23", + "nmdc:84255d3bab9ea79151db5ad7bcbc677c", + "nmdc:1c8339d96884c4a408de7804e00490d1" + ], + "was_informed_by": "gold:Gp0127654", + "id": "nmdc:168441535388b19bbdee0928b42e5b20", + "execution_resource": "NERSC-Cori", + "name": "ReadBased Analysis Activity for nmdc:mga0h0s362", + "started_at_time": "2021-10-11T02:23:29Z", + "type": "nmdc:ReadbasedAnalysis", + "ended_at_time": "2021-10-11T03:58:56+00:00", + "output_data_objects": [ + { + "name": "Gp0127654_Gottcha2 TSV report", + "description": "Gottcha2 TSV report for Gp0127654", + "url": "https://data.microbiomedata.org/data/nmdc:mga0h0s362/ReadbasedAnalysis/nmdc_mga0h0s362_gottcha2_report.tsv", + "md5_checksum": "130ee7559789726a2cadccd3126dacad", + "id": "nmdc:130ee7559789726a2cadccd3126dacad", + "file_size_bytes": 3508 + }, + { + "name": "Gp0127654_Gottcha2 full TSV report", + "description": "Gottcha2 full TSV report for Gp0127654", + "url": "https://data.microbiomedata.org/data/nmdc:mga0h0s362/ReadbasedAnalysis/nmdc_mga0h0s362_gottcha2_report_full.tsv", + "md5_checksum": "c955eae73afbfe1ad4c4eb2eac51f3f3", + "id": "nmdc:c955eae73afbfe1ad4c4eb2eac51f3f3", + "file_size_bytes": 798264 + }, + { + "name": "Gp0127654_Gottcha2 Krona HTML report", + "description": "Gottcha2 Krona HTML report for Gp0127654", + "data_object_type": "GOTTCHA2 Krona Plot", + "url": "https://data.microbiomedata.org/data/nmdc:mga0h0s362/ReadbasedAnalysis/nmdc_mga0h0s362_gottcha2_krona.html", + "md5_checksum": "7ccb4ee5a0728322154b29a79d13c842", + "id": "nmdc:7ccb4ee5a0728322154b29a79d13c842", + "file_size_bytes": 234834 + }, + { + "name": "Gp0127654_Centrifuge classification TSV report", + "description": "Centrifuge classification TSV report for Gp0127654", + "data_object_type": "Centrifuge Taxonomic Classification", + "url": "https://data.microbiomedata.org/data/nmdc:mga0h0s362/ReadbasedAnalysis/nmdc_mga0h0s362_centrifuge_classification.tsv", + "md5_checksum": "8b88e19f3d4f22c8bb71f66e7aec6dba", + "id": "nmdc:8b88e19f3d4f22c8bb71f66e7aec6dba", + "file_size_bytes": 2231971137 + }, + { + "name": "Gp0127654_Centrifuge TSV report", + "description": "Centrifuge TSV report for Gp0127654", + "data_object_type": "Centrifuge Classification Report", + "url": "https://data.microbiomedata.org/data/nmdc:mga0h0s362/ReadbasedAnalysis/nmdc_mga0h0s362_centrifuge_report.tsv", + "md5_checksum": "35a0d72edac6c5e7f9c8ddf86c5534e0", + "id": "nmdc:35a0d72edac6c5e7f9c8ddf86c5534e0", + "file_size_bytes": 257151 + }, + { + "name": "Gp0127654_Centrifuge Krona HTML report", + "description": "Centrifuge Krona HTML report for Gp0127654", + "data_object_type": "Centrifuge Krona Plot", + "url": "https://data.microbiomedata.org/data/nmdc:mga0h0s362/ReadbasedAnalysis/nmdc_mga0h0s362_centrifuge_krona.html", + "md5_checksum": "f808a89810cdb2a911a5b5388b70ce94", + "id": "nmdc:f808a89810cdb2a911a5b5388b70ce94", + "file_size_bytes": 2341088 + }, + { + "name": "Gp0127654_Kraken classification TSV report", + "description": "Kraken classification TSV report for Gp0127654", + "data_object_type": "Kraken2 Taxonomic Classification", + "url": "https://data.microbiomedata.org/data/nmdc:mga0h0s362/ReadbasedAnalysis/nmdc_mga0h0s362_kraken2_classification.tsv", + "md5_checksum": "dfc90170aa038c2425702be223cb2f23", + "id": "nmdc:dfc90170aa038c2425702be223cb2f23", + "file_size_bytes": 1782429285 + }, + { + "name": "Gp0127654_Kraken2 TSV report", + "description": "Kraken2 TSV report for Gp0127654", + "data_object_type": "Kraken2 Classification Report", + "url": "https://data.microbiomedata.org/data/nmdc:mga0h0s362/ReadbasedAnalysis/nmdc_mga0h0s362_kraken2_report.tsv", + "md5_checksum": "84255d3bab9ea79151db5ad7bcbc677c", + "id": "nmdc:84255d3bab9ea79151db5ad7bcbc677c", + "file_size_bytes": 661482 + }, + { + "name": "Gp0127654_Kraken2 Krona HTML report", + "description": "Kraken2 Krona HTML report for Gp0127654", + "data_object_type": "Kraken2 Krona Plot", + "url": "https://data.microbiomedata.org/data/nmdc:mga0h0s362/ReadbasedAnalysis/nmdc_mga0h0s362_kraken2_krona.html", + "md5_checksum": "1c8339d96884c4a408de7804e00490d1", + "id": "nmdc:1c8339d96884c4a408de7804e00490d1", + "file_size_bytes": 4020719 + } + ] + }, + { + "_id": { + "$oid": "61e719f6833bcf838a7017f0" + }, + "has_input": [ + "nmdc:c4f29a07f3ce03ee2a2d11c90e8b43d6" + ], + "part_of": [ + "nmdc:mga0h0s362" + ], + "git_url": "https://github.com/microbiomedata/metaG/releases/tag/0.1", + "has_output": [ + "nmdc:130ee7559789726a2cadccd3126dacad", + "nmdc:c955eae73afbfe1ad4c4eb2eac51f3f3", + "nmdc:7ccb4ee5a0728322154b29a79d13c842", + "nmdc:8b88e19f3d4f22c8bb71f66e7aec6dba", + "nmdc:35a0d72edac6c5e7f9c8ddf86c5534e0", + "nmdc:f808a89810cdb2a911a5b5388b70ce94", + "nmdc:dfc90170aa038c2425702be223cb2f23", + "nmdc:84255d3bab9ea79151db5ad7bcbc677c", + "nmdc:1c8339d96884c4a408de7804e00490d1" + ], + "was_informed_by": "gold:Gp0127654", + "id": "nmdc:168441535388b19bbdee0928b42e5b20", + "execution_resource": "NERSC-Cori", + "name": "ReadBased Analysis Activity for nmdc:mga0h0s362", + "started_at_time": "2021-10-11T02:23:29Z", + "type": "nmdc:ReadbasedAnalysis", + "ended_at_time": "2021-10-11T03:58:56+00:00", + "output_data_objects": [ + { + "name": "Gp0127654_Gottcha2 TSV report", + "description": "Gottcha2 TSV report for Gp0127654", + "url": "https://data.microbiomedata.org/data/nmdc:mga0h0s362/ReadbasedAnalysis/nmdc_mga0h0s362_gottcha2_report.tsv", + "md5_checksum": "130ee7559789726a2cadccd3126dacad", + "id": "nmdc:130ee7559789726a2cadccd3126dacad", + "file_size_bytes": 3508 + }, + { + "name": "Gp0127654_Gottcha2 full TSV report", + "description": "Gottcha2 full TSV report for Gp0127654", + "url": "https://data.microbiomedata.org/data/nmdc:mga0h0s362/ReadbasedAnalysis/nmdc_mga0h0s362_gottcha2_report_full.tsv", + "md5_checksum": "c955eae73afbfe1ad4c4eb2eac51f3f3", + "id": "nmdc:c955eae73afbfe1ad4c4eb2eac51f3f3", + "file_size_bytes": 798264 + }, + { + "name": "Gp0127654_Gottcha2 Krona HTML report", + "description": "Gottcha2 Krona HTML report for Gp0127654", + "data_object_type": "GOTTCHA2 Krona Plot", + "url": "https://data.microbiomedata.org/data/nmdc:mga0h0s362/ReadbasedAnalysis/nmdc_mga0h0s362_gottcha2_krona.html", + "md5_checksum": "7ccb4ee5a0728322154b29a79d13c842", + "id": "nmdc:7ccb4ee5a0728322154b29a79d13c842", + "file_size_bytes": 234834 + }, + { + "name": "Gp0127654_Centrifuge classification TSV report", + "description": "Centrifuge classification TSV report for Gp0127654", + "data_object_type": "Centrifuge Taxonomic Classification", + "url": "https://data.microbiomedata.org/data/nmdc:mga0h0s362/ReadbasedAnalysis/nmdc_mga0h0s362_centrifuge_classification.tsv", + "md5_checksum": "8b88e19f3d4f22c8bb71f66e7aec6dba", + "id": "nmdc:8b88e19f3d4f22c8bb71f66e7aec6dba", + "file_size_bytes": 2231971137 + }, + { + "name": "Gp0127654_Centrifuge TSV report", + "description": "Centrifuge TSV report for Gp0127654", + "data_object_type": "Centrifuge Classification Report", + "url": "https://data.microbiomedata.org/data/nmdc:mga0h0s362/ReadbasedAnalysis/nmdc_mga0h0s362_centrifuge_report.tsv", + "md5_checksum": "35a0d72edac6c5e7f9c8ddf86c5534e0", + "id": "nmdc:35a0d72edac6c5e7f9c8ddf86c5534e0", + "file_size_bytes": 257151 + }, + { + "name": "Gp0127654_Centrifuge Krona HTML report", + "description": "Centrifuge Krona HTML report for Gp0127654", + "data_object_type": "Centrifuge Krona Plot", + "url": "https://data.microbiomedata.org/data/nmdc:mga0h0s362/ReadbasedAnalysis/nmdc_mga0h0s362_centrifuge_krona.html", + "md5_checksum": "f808a89810cdb2a911a5b5388b70ce94", + "id": "nmdc:f808a89810cdb2a911a5b5388b70ce94", + "file_size_bytes": 2341088 + }, + { + "name": "Gp0127654_Kraken classification TSV report", + "description": "Kraken classification TSV report for Gp0127654", + "data_object_type": "Kraken2 Taxonomic Classification", + "url": "https://data.microbiomedata.org/data/nmdc:mga0h0s362/ReadbasedAnalysis/nmdc_mga0h0s362_kraken2_classification.tsv", + "md5_checksum": "dfc90170aa038c2425702be223cb2f23", + "id": "nmdc:dfc90170aa038c2425702be223cb2f23", + "file_size_bytes": 1782429285 + }, + { + "name": "Gp0127654_Kraken2 TSV report", + "description": "Kraken2 TSV report for Gp0127654", + "data_object_type": "Kraken2 Classification Report", + "url": "https://data.microbiomedata.org/data/nmdc:mga0h0s362/ReadbasedAnalysis/nmdc_mga0h0s362_kraken2_report.tsv", + "md5_checksum": "84255d3bab9ea79151db5ad7bcbc677c", + "id": "nmdc:84255d3bab9ea79151db5ad7bcbc677c", + "file_size_bytes": 661482 + }, + { + "name": "Gp0127654_Kraken2 Krona HTML report", + "description": "Kraken2 Krona HTML report for Gp0127654", + "data_object_type": "Kraken2 Krona Plot", + "url": "https://data.microbiomedata.org/data/nmdc:mga0h0s362/ReadbasedAnalysis/nmdc_mga0h0s362_kraken2_krona.html", + "md5_checksum": "1c8339d96884c4a408de7804e00490d1", + "id": "nmdc:1c8339d96884c4a408de7804e00490d1", + "file_size_bytes": 4020719 + } + ] + }, + { + "_id": { + "$oid": "649b005f2ca5ee4adb139fb4" + }, + "has_input": [ + "nmdc:c4f29a07f3ce03ee2a2d11c90e8b43d6" + ], + "part_of": [ + "nmdc:mga0h0s362" + ], + "ctg_logsum": 130142, + "scaf_logsum": 130537, + "gap_pct": 0.0008, + "git_url": "https://github.com/microbiomedata/metaG/releases/tag/0.1", + "has_output": [ + "nmdc:909ae2a351ab1b99dfa877969ba33fc0", + "nmdc:1bd3a82d1ced0a3a4e4b207ecdeedc50", + "nmdc:e2281ea2c0342c7243ac6a3179948547", + "nmdc:ad045e491d27a8a2a4bb13c62ed74fd8", + "nmdc:d8e09db1617046117fbb15631cf4977f" + ], + "asm_score": 4.409, + "was_informed_by": "gold:Gp0127654", + "ctg_powsum": 13918, + "scaf_max": 69027, + "id": "nmdc:168441535388b19bbdee0928b42e5b20", + "scaf_powsum": 13961, + "execution_resource": "NERSC-Cori", + "contigs": 197669, + "name": "Assembly Activity for nmdc:mga0h0s362", + "ctg_max": 69027, + "gc_std": 0.09749, + "contig_bp": 85731750, + "gc_avg": 0.62891, + "started_at_time": "2021-10-11T02:23:29Z", + "scaf_bp": 85732440, + "type": "nmdc:MetagenomeAssembly", + "scaffolds": 197600, + "ended_at_time": "2021-10-11T03:58:56+00:00", + "ctg_l50": 404, + "ctg_l90": 286, + "ctg_n50": 62467, + "ctg_n90": 168661, + "scaf_l50": 404, + "scaf_l90": 286, + "scaf_n50": 62435, + "scaf_n90": 168596, + "scaf_l_gt50k": 69027, + "scaf_n_gt50k": 1, + "scaf_pct_gt50k": 0.080514446, + "output_data_objects": [ + { + "name": "Gp0127654_Assembled contigs fasta", + "description": "Assembled contigs fasta for Gp0127654", + "data_object_type": "Assembly Contigs", + "url": "https://data.microbiomedata.org/data/nmdc:mga0h0s362/assembly/nmdc_mga0h0s362_contigs.fna", + "md5_checksum": "909ae2a351ab1b99dfa877969ba33fc0", + "id": "nmdc:909ae2a351ab1b99dfa877969ba33fc0", + "file_size_bytes": 93264957 + }, + { + "name": "Gp0127654_Assembled scaffold fasta", + "description": "Assembled scaffold fasta for Gp0127654", + "data_object_type": "Assembly Scaffolds", + "url": "https://data.microbiomedata.org/data/nmdc:mga0h0s362/assembly/nmdc_mga0h0s362_scaffolds.fna", + "md5_checksum": "1bd3a82d1ced0a3a4e4b207ecdeedc50", + "id": "nmdc:1bd3a82d1ced0a3a4e4b207ecdeedc50", + "file_size_bytes": 92670816 + }, + { + "name": "Gp0127654_Metagenome Contig Coverage Stats", + "description": "Metagenome Contig Coverage Stats for Gp0127654", + "url": "https://data.microbiomedata.org/data/nmdc:mga0h0s362/assembly/nmdc_mga0h0s362_covstats.txt", + "md5_checksum": "e2281ea2c0342c7243ac6a3179948547", + "id": "nmdc:e2281ea2c0342c7243ac6a3179948547", + "file_size_bytes": 15633835 + }, + { + "name": "Gp0127654_Assembled AGP file", + "description": "Assembled AGP file for Gp0127654", + "data_object_type": "Assembly AGP", + "url": "https://data.microbiomedata.org/data/nmdc:mga0h0s362/assembly/nmdc_mga0h0s362_assembly.agp", + "md5_checksum": "ad045e491d27a8a2a4bb13c62ed74fd8", + "id": "nmdc:ad045e491d27a8a2a4bb13c62ed74fd8", + "file_size_bytes": 14624353 + }, + { + "name": "Gp0127654_Metagenome Alignment BAM file", + "description": "Metagenome Alignment BAM file for Gp0127654", + "data_object_type": "Assembly Coverage BAM", + "url": "https://data.microbiomedata.org/data/nmdc:mga0h0s362/assembly/nmdc_mga0h0s362_pairedMapped_sorted.bam", + "md5_checksum": "d8e09db1617046117fbb15631cf4977f", + "id": "nmdc:d8e09db1617046117fbb15631cf4977f", + "file_size_bytes": 2687176632 + } + ] + }, + { + "_id": { + "$oid": "649b005bbf2caae0415ef9c7" + }, + "has_input": [ + "nmdc:909ae2a351ab1b99dfa877969ba33fc0" + ], + "part_of": [ + "nmdc:mga0h0s362" + ], + "git_url": "https://github.com/microbiomedata/metaG/releases/tag/0.1", + "has_output": [ + "nmdc:7e7c871dbe9ed0b2692444b77d0afe8d", + "nmdc:7b466cbbadfde9b125f2a31e48d8c60d", + "nmdc:6a03c0a78fa59ac0a55777a9ea73e5d0", + "nmdc:2275c42fa5206d646c7b477b184b9519", + "nmdc:9c7fc55c2cbc986d520695dfb69b3e26", + "nmdc:fabdc762526357e8a6f288a07f947f06", + "nmdc:1e8dcb98dfc7598e3965af187c296f12", + "nmdc:86f1a8ccf1532e11fc09d94dc39af57c", + "nmdc:8add80a0fe95822917e4e7eaf275ed4f", + "nmdc:6268ff527b56548792e7dca811500436", + "nmdc:ff7ac6fb709d1f0f7b476c9a5b29524e", + "nmdc:6c50fdd87bdba9116c1ff81e21b8a95c" + ], + "was_informed_by": "gold:Gp0127654", + "id": "nmdc:168441535388b19bbdee0928b42e5b20", + "execution_resource": "NERSC-Cori", + "name": "Annotation Activity for nmdc:mga0h0s362", + "started_at_time": "2021-10-11T02:23:29Z", + "type": "nmdc:MetagenomeAnnotationActivity", + "ended_at_time": "2021-10-11T03:58:56+00:00", + "output_data_objects": [ + { + "name": "Gp0127654_Protein FAA", + "description": "Protein FAA for Gp0127654", + "data_object_type": "Annotation Amino Acid FASTA", + "url": "https://data.microbiomedata.org/data/nmdc:mga0h0s362/annotation/nmdc_mga0h0s362_proteins.faa", + "md5_checksum": "7e7c871dbe9ed0b2692444b77d0afe8d", + "id": "nmdc:7e7c871dbe9ed0b2692444b77d0afe8d", + "file_size_bytes": 55142968 + }, + { + "name": "Gp0127654_Structural annotation GFF file", + "description": "Structural annotation GFF file for Gp0127654", + "data_object_type": "Structural Annotation GFF", + "url": "https://data.microbiomedata.org/data/nmdc:mga0h0s362/annotation/nmdc_mga0h0s362_structural_annotation.gff", + "md5_checksum": "7b466cbbadfde9b125f2a31e48d8c60d", + "id": "nmdc:7b466cbbadfde9b125f2a31e48d8c60d", + "file_size_bytes": 2518 + }, + { + "name": "Gp0127654_Functional annotation GFF file", + "description": "Functional annotation GFF file for Gp0127654", + "data_object_type": "Functional Annotation GFF", + "url": "https://data.microbiomedata.org/data/nmdc:mga0h0s362/annotation/nmdc_mga0h0s362_functional_annotation.gff", + "md5_checksum": "6a03c0a78fa59ac0a55777a9ea73e5d0", + "id": "nmdc:6a03c0a78fa59ac0a55777a9ea73e5d0", + "file_size_bytes": 64337475 + }, + { + "name": "Gp0127654_KO TSV file", + "description": "KO TSV file for Gp0127654", + "data_object_type": "Annotation KEGG Orthology", + "url": "https://data.microbiomedata.org/data/nmdc:mga0h0s362/annotation/nmdc_mga0h0s362_ko.tsv", + "md5_checksum": "2275c42fa5206d646c7b477b184b9519", + "id": "nmdc:2275c42fa5206d646c7b477b184b9519", + "file_size_bytes": 7628926 + }, + { + "name": "Gp0127654_EC TSV file", + "description": "EC TSV file for Gp0127654", + "data_object_type": "Annotation Enzyme Commission", + "url": "https://data.microbiomedata.org/data/nmdc:mga0h0s362/annotation/nmdc_mga0h0s362_ec.tsv", + "md5_checksum": "9c7fc55c2cbc986d520695dfb69b3e26", + "id": "nmdc:9c7fc55c2cbc986d520695dfb69b3e26", + "file_size_bytes": 5084393 + }, + { + "name": "Gp0127654_COG GFF file", + "description": "COG GFF file for Gp0127654", + "url": "https://data.microbiomedata.org/data/nmdc:mga0h0s362/annotation/nmdc_mga0h0s362_cog.gff", + "md5_checksum": "fabdc762526357e8a6f288a07f947f06", + "id": "nmdc:fabdc762526357e8a6f288a07f947f06", + "file_size_bytes": 37680499 + }, + { + "name": "Gp0127654_PFAM GFF file", + "description": "PFAM GFF file for Gp0127654", + "url": "https://data.microbiomedata.org/data/nmdc:mga0h0s362/annotation/nmdc_mga0h0s362_pfam.gff", + "md5_checksum": "1e8dcb98dfc7598e3965af187c296f12", + "id": "nmdc:1e8dcb98dfc7598e3965af187c296f12", + "file_size_bytes": 27765282 + }, + { + "name": "Gp0127654_TigrFam GFF file", + "description": "TigrFam GFF file for Gp0127654", + "url": "https://data.microbiomedata.org/data/nmdc:mga0h0s362/annotation/nmdc_mga0h0s362_tigrfam.gff", + "md5_checksum": "86f1a8ccf1532e11fc09d94dc39af57c", + "id": "nmdc:86f1a8ccf1532e11fc09d94dc39af57c", + "file_size_bytes": 2970208 + }, + { + "name": "Gp0127654_SMART GFF file", + "description": "SMART GFF file for Gp0127654", + "url": "https://data.microbiomedata.org/data/nmdc:mga0h0s362/annotation/nmdc_mga0h0s362_smart.gff", + "md5_checksum": "8add80a0fe95822917e4e7eaf275ed4f", + "id": "nmdc:8add80a0fe95822917e4e7eaf275ed4f", + "file_size_bytes": 8172309 + }, + { + "name": "Gp0127654_SuperFam GFF file", + "description": "SuperFam GFF file for Gp0127654", + "url": "https://data.microbiomedata.org/data/nmdc:mga0h0s362/annotation/nmdc_mga0h0s362_supfam.gff", + "md5_checksum": "6268ff527b56548792e7dca811500436", + "id": "nmdc:6268ff527b56548792e7dca811500436", + "file_size_bytes": 46611499 + }, + { + "name": "Gp0127654_Cath FunFam GFF file", + "description": "Cath FunFam GFF file for Gp0127654", + "url": "https://data.microbiomedata.org/data/nmdc:mga0h0s362/annotation/nmdc_mga0h0s362_cath_funfam.gff", + "md5_checksum": "ff7ac6fb709d1f0f7b476c9a5b29524e", + "id": "nmdc:ff7ac6fb709d1f0f7b476c9a5b29524e", + "file_size_bytes": 35108681 + }, + { + "name": "Gp0127654_KO_EC GFF file", + "description": "KO_EC GFF file for Gp0127654", + "url": "https://data.microbiomedata.org/data/nmdc:mga0h0s362/annotation/nmdc_mga0h0s362_ko_ec.gff", + "md5_checksum": "6c50fdd87bdba9116c1ff81e21b8a95c", + "id": "nmdc:6c50fdd87bdba9116c1ff81e21b8a95c", + "file_size_bytes": 24261565 + } + ] + }, + { + "_id": { + "$oid": "649b0052ec087f6bbab34728" + }, + "has_input": [ + "nmdc:909ae2a351ab1b99dfa877969ba33fc0", + "nmdc:d8e09db1617046117fbb15631cf4977f", + "nmdc:6a03c0a78fa59ac0a55777a9ea73e5d0" + ], + "too_short_contig_num": 189586, + "part_of": [ + "nmdc:mga0h0s362" + ], + "binned_contig_num": 56, + "git_url": "https://github.com/microbiomedata/metaG/releases/tag/0.1", + "has_output": [ + "nmdc:d41d8cd98f00b204e9800998ecf8427e", + "nmdc:920bcae91eae59ed8b9b19bcb7392ac5", + "nmdc:d13bc24bdf72e7ba00d60f0e2e0805e8", + "nmdc:3fd777151ef41b39b272cb42c1d5e8ba", + "nmdc:470edf3d79702d3b806b545db595ca02", + "nmdc:8fc6f1a0269aa5179d72c52cf1a9726e" + ], + "was_informed_by": "gold:Gp0127654", + "input_contig_num": 197669, + "id": "nmdc:168441535388b19bbdee0928b42e5b20", + "execution_resource": "NERSC-Cori", + "name": "MAGs Analysis Activity for nmdc:mga0h0s362", + "mags_list": [ + { + "number_of_contig": 56, + "completeness": 18.09, + "bin_name": "bins.1", + "gene_count": 272, + "bin_quality": "LQ", + "gtdbtk_species": "", + "gtdbtk_order": "", + "num_16s": 0, + "gtdbtk_family": "", + "gtdbtk_domain": "", + "contamination": 0.0, + "gtdbtk_class": "", + "gtdbtk_phylum": "", + "num_5s": 0, + "num_23s": 0, + "gtdbtk_genus": "", + "num_t_rna": 5 + } + ], + "unbinned_contig_num": 8027, + "started_at_time": "2021-10-11T02:23:29Z", + "type": "nmdc:MAGsAnalysisActivity", + "ended_at_time": "2021-10-11T03:58:56+00:00", + "output_data_objects": [ + { + "name": "gold:Gp0452679_metabat2 bin checkm quality assessment result", + "description": "metabat2 bin checkm quality assessment result for gold:Gp0452679", + "data_object_type": "CheckM Statistics", + "url": "https://data.microbiomedata.org/data/nmdc:mga0fsjy84/MAGs/nmdc_mga0fsjy84_checkm_qa.out", + "md5_checksum": "d41d8cd98f00b204e9800998ecf8427e", + "id": "nmdc:d41d8cd98f00b204e9800998ecf8427e", + "file_size_bytes": 0 + }, + { + "name": "Gp0127654_tooShort (< 3kb) filtered contigs fasta file by metaBat2", + "description": "tooShort (< 3kb) filtered contigs fasta file by metaBat2 for Gp0127654", + "url": "https://data.microbiomedata.org/data/nmdc:mga0h0s362/MAGs/nmdc_mga0h0s362_bins.tooShort.fa", + "md5_checksum": "920bcae91eae59ed8b9b19bcb7392ac5", + "id": "nmdc:920bcae91eae59ed8b9b19bcb7392ac5", + "file_size_bytes": 80638518 + }, + { + "name": "Gp0127654_unbinned fasta file from metabat2", + "description": "unbinned fasta file from metabat2 for Gp0127654", + "url": "https://data.microbiomedata.org/data/nmdc:mga0h0s362/MAGs/nmdc_mga0h0s362_bins.unbinned.fa", + "md5_checksum": "d13bc24bdf72e7ba00d60f0e2e0805e8", + "id": "nmdc:d13bc24bdf72e7ba00d60f0e2e0805e8", + "file_size_bytes": 12400628 + }, + { + "name": "Gp0127654_metabat2 bin checkm quality assessment result", + "description": "metabat2 bin checkm quality assessment result for Gp0127654", + "data_object_type": "CheckM Statistics", + "url": "https://data.microbiomedata.org/data/nmdc:mga0h0s362/MAGs/nmdc_mga0h0s362_checkm_qa.out", + "md5_checksum": "3fd777151ef41b39b272cb42c1d5e8ba", + "id": "nmdc:3fd777151ef41b39b272cb42c1d5e8ba", + "file_size_bytes": 785 + }, + { + "name": "Gp0127654_high-quality and medium-quality bins", + "description": "high-quality and medium-quality bins for Gp0127654", + "data_object_type": "Metagenome Bins", + "url": "https://data.microbiomedata.org/data/nmdc:mga0h0s362/MAGs/nmdc_mga0h0s362_hqmq_bin.zip", + "md5_checksum": "470edf3d79702d3b806b545db595ca02", + "id": "nmdc:470edf3d79702d3b806b545db595ca02", + "file_size_bytes": 182 + }, + { + "name": "Gp0127654_metabat2 bins", + "description": "metabat2 bins for Gp0127654", + "url": "https://data.microbiomedata.org/data/nmdc:mga0h0s362/MAGs/nmdc_mga0h0s362_metabat_bin.zip", + "md5_checksum": "8fc6f1a0269aa5179d72c52cf1a9726e", + "id": "nmdc:8fc6f1a0269aa5179d72c52cf1a9726e", + "file_size_bytes": 69938 + } + ] + } + ] + }, + { + "_id": { + "$oid": "649b009773e8249959349b61" + }, + "id": "nmdc:omprc-11-qrsway30", + "name": "Riverbed sediment microbial communities from areas with vegetation nearby in Columbia River, Washington, USA - GW-RW S2_30_40", + "description": "Riverbed sediment samples were collected from an area with a lot of surrounding vegetation in a hyporheic zone (subsurface groundwater - surface water mixing zone)", + "has_input": [ + "nmdc:bsm-11-mmr87q87" + ], + "has_output": [ + "jgi:574fe0b47ded5e3df1ee1496" + ], + "part_of": [ + "nmdc:sty-11-aygzgv51" + ], + "add_date": "2016-01-11", + "mod_date": "2021-06-18", + "ncbi_project_name": "Riverbed sediment microbial communities from areas with vegetation nearby in Columbia River, Washington, USA - GW-RW S2_30_40", + "omics_type": { + "has_raw_value": "Metagenome" + }, + "principal_investigator": { + "has_raw_value": "James Stegen" + }, + "processing_institution": "JGI", + "type": "nmdc:OmicsProcessing", + "gold_sequencing_project_identifiers": [ + "gold:Gp0127656" + ], + "downstream_workflow_activity_records": [ + { + "_id": { + "$oid": "649b009d6bdd4fd20273c87c" + }, + "has_input": [ + "nmdc:a604c87c632165bb5223eebda60801d0" + ], + "part_of": [ + "nmdc:mga00hh562" + ], + "git_url": "https://github.com/microbiomedata/metaG/releases/tag/0.1", + "has_output": [ + "nmdc:cec95659bb04ae095f51821ddaa9fa59", + "nmdc:7b4f365bbe942a523890abf13d1b6436" + ], + "was_informed_by": "gold:Gp0127656", + "input_read_count": 27317020, + "output_read_bases": 3960490395, + "id": "nmdc:8e2d8da1d05b292a52a33732a6bc4391", + "execution_resource": "NERSC-Cori", + "input_read_bases": 4124870020, + "name": "Read QC Activity for nmdc:mga00hh562", + "output_read_count": 26481746, + "started_at_time": "2021-10-11T02:23:35Z", + "type": "nmdc:ReadQCAnalysisActivity", + "ended_at_time": "2021-10-11T03:58:56+00:00", + "output_data_objects": [ + { + "name": "Gp0127656_Filtered Reads", + "description": "Filtered Reads for Gp0127656", + "data_object_type": "Filtered Sequencing Reads", + "url": "https://data.microbiomedata.org/data/nmdc:mga00hh562/qa/nmdc_mga00hh562_filtered.fastq.gz", + "md5_checksum": "cec95659bb04ae095f51821ddaa9fa59", + "id": "nmdc:cec95659bb04ae095f51821ddaa9fa59", + "file_size_bytes": 2195848744 + }, + { + "name": "Gp0127656_Filtered Stats", + "description": "Filtered Stats for Gp0127656", + "data_object_type": "QC Statistics", + "url": "https://data.microbiomedata.org/data/nmdc:mga00hh562/qa/nmdc_mga00hh562_filterStats.txt", + "md5_checksum": "7b4f365bbe942a523890abf13d1b6436", + "id": "nmdc:7b4f365bbe942a523890abf13d1b6436", + "file_size_bytes": 284 + } + ] + }, + { + "_id": { + "$oid": "649b009bff710ae353f8cf3e" + }, + "has_input": [ + "nmdc:cec95659bb04ae095f51821ddaa9fa59" + ], + "git_url": "https://github.com/microbiomedata/metaG/releases/tag/0.1", + "has_output": [ + "nmdc:ccbe419157d8286626330fd0eb0dd0e0", + "nmdc:92ab65cdaca3367552e03d895123e04f", + "nmdc:0b3ff6503723d6ea9b84552f68ed4270", + "nmdc:8e5ad12b7fa8873463088d7bf361f7c5", + "nmdc:a3255df52cd6150f03bbf7cbd655ec76", + "nmdc:a25a5d7e399624e5e5735b65a9dd322a", + "nmdc:dd953aebfd5cf624a5ffa8c6d6b64b08", + "nmdc:96f47f6cd2350fb1c7c7b746d2e9d811", + "nmdc:ae369194e4b24e137fc23da0412277a6" + ], + "was_informed_by": "gold:Gp0127656", + "id": "nmdc:8e2d8da1d05b292a52a33732a6bc4391", + "execution_resource": "NERSC-Cori", + "name": "ReadBased Analysis Activity for nmdc:mga00hh562", + "started_at_time": "2021-10-11T02:23:35Z", + "type": "nmdc:ReadbasedAnalysis", + "ended_at_time": "2021-10-11T03:58:56+00:00", + "output_data_objects": [ + { + "name": "Gp0127656_Gottcha2 TSV report", + "description": "Gottcha2 TSV report for Gp0127656", + "url": "https://data.microbiomedata.org/data/nmdc:mga00hh562/ReadbasedAnalysis/nmdc_mga00hh562_gottcha2_report.tsv", + "md5_checksum": "ccbe419157d8286626330fd0eb0dd0e0", + "id": "nmdc:ccbe419157d8286626330fd0eb0dd0e0", + "file_size_bytes": 2418 + }, + { + "name": "Gp0127656_Gottcha2 full TSV report", + "description": "Gottcha2 full TSV report for Gp0127656", + "url": "https://data.microbiomedata.org/data/nmdc:mga00hh562/ReadbasedAnalysis/nmdc_mga00hh562_gottcha2_report_full.tsv", + "md5_checksum": "92ab65cdaca3367552e03d895123e04f", + "id": "nmdc:92ab65cdaca3367552e03d895123e04f", + "file_size_bytes": 759212 + }, + { + "name": "Gp0127656_Gottcha2 Krona HTML report", + "description": "Gottcha2 Krona HTML report for Gp0127656", + "data_object_type": "GOTTCHA2 Krona Plot", + "url": "https://data.microbiomedata.org/data/nmdc:mga00hh562/ReadbasedAnalysis/nmdc_mga00hh562_gottcha2_krona.html", + "md5_checksum": "0b3ff6503723d6ea9b84552f68ed4270", + "id": "nmdc:0b3ff6503723d6ea9b84552f68ed4270", + "file_size_bytes": 231563 + }, + { + "name": "Gp0127656_Centrifuge classification TSV report", + "description": "Centrifuge classification TSV report for Gp0127656", + "data_object_type": "Centrifuge Taxonomic Classification", + "url": "https://data.microbiomedata.org/data/nmdc:mga00hh562/ReadbasedAnalysis/nmdc_mga00hh562_centrifuge_classification.tsv", + "md5_checksum": "8e5ad12b7fa8873463088d7bf361f7c5", + "id": "nmdc:8e5ad12b7fa8873463088d7bf361f7c5", + "file_size_bytes": 1950007455 + }, + { + "name": "Gp0127656_Centrifuge TSV report", + "description": "Centrifuge TSV report for Gp0127656", + "data_object_type": "Centrifuge Classification Report", + "url": "https://data.microbiomedata.org/data/nmdc:mga00hh562/ReadbasedAnalysis/nmdc_mga00hh562_centrifuge_report.tsv", + "md5_checksum": "a3255df52cd6150f03bbf7cbd655ec76", + "id": "nmdc:a3255df52cd6150f03bbf7cbd655ec76", + "file_size_bytes": 255724 + }, + { + "name": "Gp0127656_Centrifuge Krona HTML report", + "description": "Centrifuge Krona HTML report for Gp0127656", + "data_object_type": "Centrifuge Krona Plot", + "url": "https://data.microbiomedata.org/data/nmdc:mga00hh562/ReadbasedAnalysis/nmdc_mga00hh562_centrifuge_krona.html", + "md5_checksum": "a25a5d7e399624e5e5735b65a9dd322a", + "id": "nmdc:a25a5d7e399624e5e5735b65a9dd322a", + "file_size_bytes": 2337553 + }, + { + "name": "Gp0127656_Kraken classification TSV report", + "description": "Kraken classification TSV report for Gp0127656", + "data_object_type": "Kraken2 Taxonomic Classification", + "url": "https://data.microbiomedata.org/data/nmdc:mga00hh562/ReadbasedAnalysis/nmdc_mga00hh562_kraken2_classification.tsv", + "md5_checksum": "dd953aebfd5cf624a5ffa8c6d6b64b08", + "id": "nmdc:dd953aebfd5cf624a5ffa8c6d6b64b08", + "file_size_bytes": 1555636513 + }, + { + "name": "Gp0127656_Kraken2 TSV report", + "description": "Kraken2 TSV report for Gp0127656", + "data_object_type": "Kraken2 Classification Report", + "url": "https://data.microbiomedata.org/data/nmdc:mga00hh562/ReadbasedAnalysis/nmdc_mga00hh562_kraken2_report.tsv", + "md5_checksum": "96f47f6cd2350fb1c7c7b746d2e9d811", + "id": "nmdc:96f47f6cd2350fb1c7c7b746d2e9d811", + "file_size_bytes": 647090 + }, + { + "name": "Gp0127656_Kraken2 Krona HTML report", + "description": "Kraken2 Krona HTML report for Gp0127656", + "data_object_type": "Kraken2 Krona Plot", + "url": "https://data.microbiomedata.org/data/nmdc:mga00hh562/ReadbasedAnalysis/nmdc_mga00hh562_kraken2_krona.html", + "md5_checksum": "ae369194e4b24e137fc23da0412277a6", + "id": "nmdc:ae369194e4b24e137fc23da0412277a6", + "file_size_bytes": 3939982 + } + ] + }, + { + "_id": { + "$oid": "61e719f0833bcf838a701752" + }, + "has_input": [ + "nmdc:cec95659bb04ae095f51821ddaa9fa59" + ], + "part_of": [ + "nmdc:mga00hh562" + ], + "git_url": "https://github.com/microbiomedata/metaG/releases/tag/0.1", + "has_output": [ + "nmdc:ccbe419157d8286626330fd0eb0dd0e0", + "nmdc:92ab65cdaca3367552e03d895123e04f", + "nmdc:0b3ff6503723d6ea9b84552f68ed4270", + "nmdc:8e5ad12b7fa8873463088d7bf361f7c5", + "nmdc:a3255df52cd6150f03bbf7cbd655ec76", + "nmdc:a25a5d7e399624e5e5735b65a9dd322a", + "nmdc:dd953aebfd5cf624a5ffa8c6d6b64b08", + "nmdc:96f47f6cd2350fb1c7c7b746d2e9d811", + "nmdc:ae369194e4b24e137fc23da0412277a6" + ], + "was_informed_by": "gold:Gp0127656", + "id": "nmdc:8e2d8da1d05b292a52a33732a6bc4391", + "execution_resource": "NERSC-Cori", + "name": "ReadBased Analysis Activity for nmdc:mga00hh562", + "started_at_time": "2021-10-11T02:23:35Z", + "type": "nmdc:ReadbasedAnalysis", + "ended_at_time": "2021-10-11T03:58:56+00:00", + "output_data_objects": [ + { + "name": "Gp0127656_Gottcha2 TSV report", + "description": "Gottcha2 TSV report for Gp0127656", + "url": "https://data.microbiomedata.org/data/nmdc:mga00hh562/ReadbasedAnalysis/nmdc_mga00hh562_gottcha2_report.tsv", + "md5_checksum": "ccbe419157d8286626330fd0eb0dd0e0", + "id": "nmdc:ccbe419157d8286626330fd0eb0dd0e0", + "file_size_bytes": 2418 + }, + { + "name": "Gp0127656_Gottcha2 full TSV report", + "description": "Gottcha2 full TSV report for Gp0127656", + "url": "https://data.microbiomedata.org/data/nmdc:mga00hh562/ReadbasedAnalysis/nmdc_mga00hh562_gottcha2_report_full.tsv", + "md5_checksum": "92ab65cdaca3367552e03d895123e04f", + "id": "nmdc:92ab65cdaca3367552e03d895123e04f", + "file_size_bytes": 759212 + }, + { + "name": "Gp0127656_Gottcha2 Krona HTML report", + "description": "Gottcha2 Krona HTML report for Gp0127656", + "data_object_type": "GOTTCHA2 Krona Plot", + "url": "https://data.microbiomedata.org/data/nmdc:mga00hh562/ReadbasedAnalysis/nmdc_mga00hh562_gottcha2_krona.html", + "md5_checksum": "0b3ff6503723d6ea9b84552f68ed4270", + "id": "nmdc:0b3ff6503723d6ea9b84552f68ed4270", + "file_size_bytes": 231563 + }, + { + "name": "Gp0127656_Centrifuge classification TSV report", + "description": "Centrifuge classification TSV report for Gp0127656", + "data_object_type": "Centrifuge Taxonomic Classification", + "url": "https://data.microbiomedata.org/data/nmdc:mga00hh562/ReadbasedAnalysis/nmdc_mga00hh562_centrifuge_classification.tsv", + "md5_checksum": "8e5ad12b7fa8873463088d7bf361f7c5", + "id": "nmdc:8e5ad12b7fa8873463088d7bf361f7c5", + "file_size_bytes": 1950007455 + }, + { + "name": "Gp0127656_Centrifuge TSV report", + "description": "Centrifuge TSV report for Gp0127656", + "data_object_type": "Centrifuge Classification Report", + "url": "https://data.microbiomedata.org/data/nmdc:mga00hh562/ReadbasedAnalysis/nmdc_mga00hh562_centrifuge_report.tsv", + "md5_checksum": "a3255df52cd6150f03bbf7cbd655ec76", + "id": "nmdc:a3255df52cd6150f03bbf7cbd655ec76", + "file_size_bytes": 255724 + }, + { + "name": "Gp0127656_Centrifuge Krona HTML report", + "description": "Centrifuge Krona HTML report for Gp0127656", + "data_object_type": "Centrifuge Krona Plot", + "url": "https://data.microbiomedata.org/data/nmdc:mga00hh562/ReadbasedAnalysis/nmdc_mga00hh562_centrifuge_krona.html", + "md5_checksum": "a25a5d7e399624e5e5735b65a9dd322a", + "id": "nmdc:a25a5d7e399624e5e5735b65a9dd322a", + "file_size_bytes": 2337553 + }, + { + "name": "Gp0127656_Kraken classification TSV report", + "description": "Kraken classification TSV report for Gp0127656", + "data_object_type": "Kraken2 Taxonomic Classification", + "url": "https://data.microbiomedata.org/data/nmdc:mga00hh562/ReadbasedAnalysis/nmdc_mga00hh562_kraken2_classification.tsv", + "md5_checksum": "dd953aebfd5cf624a5ffa8c6d6b64b08", + "id": "nmdc:dd953aebfd5cf624a5ffa8c6d6b64b08", + "file_size_bytes": 1555636513 + }, + { + "name": "Gp0127656_Kraken2 TSV report", + "description": "Kraken2 TSV report for Gp0127656", + "data_object_type": "Kraken2 Classification Report", + "url": "https://data.microbiomedata.org/data/nmdc:mga00hh562/ReadbasedAnalysis/nmdc_mga00hh562_kraken2_report.tsv", + "md5_checksum": "96f47f6cd2350fb1c7c7b746d2e9d811", + "id": "nmdc:96f47f6cd2350fb1c7c7b746d2e9d811", + "file_size_bytes": 647090 + }, + { + "name": "Gp0127656_Kraken2 Krona HTML report", + "description": "Kraken2 Krona HTML report for Gp0127656", + "data_object_type": "Kraken2 Krona Plot", + "url": "https://data.microbiomedata.org/data/nmdc:mga00hh562/ReadbasedAnalysis/nmdc_mga00hh562_kraken2_krona.html", + "md5_checksum": "ae369194e4b24e137fc23da0412277a6", + "id": "nmdc:ae369194e4b24e137fc23da0412277a6", + "file_size_bytes": 3939982 + } + ] + }, + { + "_id": { + "$oid": "649b005f2ca5ee4adb139fb2" + }, + "has_input": [ + "nmdc:cec95659bb04ae095f51821ddaa9fa59" + ], + "part_of": [ + "nmdc:mga00hh562" + ], + "ctg_logsum": 98556, + "scaf_logsum": 99077, + "gap_pct": 0.00105, + "git_url": "https://github.com/microbiomedata/metaG/releases/tag/0.1", + "has_output": [ + "nmdc:8106808f8e245ef9a46a4e31561eba7f", + "nmdc:55385159fa8361d7ff747cdc1155512b", + "nmdc:4741908a5b07eaa2312ff3e6d2d991aa", + "nmdc:172e5cf3b5c5bf8e4896058dad3e814a", + "nmdc:941f749a92155321c5ce7e5aa32d3b55" + ], + "asm_score": 2.914, + "was_informed_by": "gold:Gp0127656", + "ctg_powsum": 10453, + "scaf_max": 9079, + "id": "nmdc:8e2d8da1d05b292a52a33732a6bc4391", + "scaf_powsum": 10508, + "execution_resource": "NERSC-Cori", + "contigs": 169495, + "name": "Assembly Activity for nmdc:mga00hh562", + "ctg_max": 9079, + "gc_std": 0.09653, + "contig_bp": 72511508, + "gc_avg": 0.62989, + "started_at_time": "2021-10-11T02:23:35Z", + "scaf_bp": 72512268, + "type": "nmdc:MetagenomeAssembly", + "scaffolds": 169419, + "ended_at_time": "2021-10-11T03:58:56+00:00", + "ctg_l50": 399, + "ctg_l90": 286, + "ctg_n50": 54638, + "ctg_n90": 144448, + "scaf_l50": 399, + "scaf_l90": 286, + "scaf_n50": 54616, + "scaf_n90": 144376, + "output_data_objects": [ + { + "name": "Gp0127656_Assembled contigs fasta", + "description": "Assembled contigs fasta for Gp0127656", + "data_object_type": "Assembly Contigs", + "url": "https://data.microbiomedata.org/data/nmdc:mga00hh562/assembly/nmdc_mga00hh562_contigs.fna", + "md5_checksum": "8106808f8e245ef9a46a4e31561eba7f", + "id": "nmdc:8106808f8e245ef9a46a4e31561eba7f", + "file_size_bytes": 78938478 + }, + { + "name": "Gp0127656_Assembled scaffold fasta", + "description": "Assembled scaffold fasta for Gp0127656", + "data_object_type": "Assembly Scaffolds", + "url": "https://data.microbiomedata.org/data/nmdc:mga00hh562/assembly/nmdc_mga00hh562_scaffolds.fna", + "md5_checksum": "55385159fa8361d7ff747cdc1155512b", + "id": "nmdc:55385159fa8361d7ff747cdc1155512b", + "file_size_bytes": 78428743 + }, + { + "name": "Gp0127656_Metagenome Contig Coverage Stats", + "description": "Metagenome Contig Coverage Stats for Gp0127656", + "url": "https://data.microbiomedata.org/data/nmdc:mga00hh562/assembly/nmdc_mga00hh562_covstats.txt", + "md5_checksum": "4741908a5b07eaa2312ff3e6d2d991aa", + "id": "nmdc:4741908a5b07eaa2312ff3e6d2d991aa", + "file_size_bytes": 13384382 + }, + { + "name": "Gp0127656_Assembled AGP file", + "description": "Assembled AGP file for Gp0127656", + "data_object_type": "Assembly AGP", + "url": "https://data.microbiomedata.org/data/nmdc:mga00hh562/assembly/nmdc_mga00hh562_assembly.agp", + "md5_checksum": "172e5cf3b5c5bf8e4896058dad3e814a", + "id": "nmdc:172e5cf3b5c5bf8e4896058dad3e814a", + "file_size_bytes": 12508060 + }, + { + "name": "Gp0127656_Metagenome Alignment BAM file", + "description": "Metagenome Alignment BAM file for Gp0127656", + "data_object_type": "Assembly Coverage BAM", + "url": "https://data.microbiomedata.org/data/nmdc:mga00hh562/assembly/nmdc_mga00hh562_pairedMapped_sorted.bam", + "md5_checksum": "941f749a92155321c5ce7e5aa32d3b55", + "id": "nmdc:941f749a92155321c5ce7e5aa32d3b55", + "file_size_bytes": 2375706529 + } + ] + }, + { + "_id": { + "$oid": "649b005bbf2caae0415ef9c6" + }, + "has_input": [ + "nmdc:8106808f8e245ef9a46a4e31561eba7f" + ], + "part_of": [ + "nmdc:mga00hh562" + ], + "git_url": "https://github.com/microbiomedata/metaG/releases/tag/0.1", + "has_output": [ + "nmdc:18f68cc8acda8d33d5fd6f21a9166aa8", + "nmdc:87d5f3a505d23c1aa2deea960702d55b", + "nmdc:8e8be343bbb1ba11f3e15867b419d05d", + "nmdc:91c2485c0ebf683aed3e7935ec60b7d1", + "nmdc:fb6740e86534daeea41ab6d5cf9d91d2", + "nmdc:19da9b3f211164643f276bc74604c9b0", + "nmdc:19905547dfa37274a9f91c9caaf6bacc", + "nmdc:30c2b0722d225938975243ab1041ed12", + "nmdc:623e913fa98f88f6037754daf5d9ffc5", + "nmdc:ec56df16785bc67e073128f09366ec43", + "nmdc:2831d1ead0af4681b2ae1a9f21733637", + "nmdc:53f225f74011f7d30fcfd5c60b3693ae" + ], + "was_informed_by": "gold:Gp0127656", + "id": "nmdc:8e2d8da1d05b292a52a33732a6bc4391", + "execution_resource": "NERSC-Cori", + "name": "Annotation Activity for nmdc:mga00hh562", + "started_at_time": "2021-10-11T02:23:35Z", + "type": "nmdc:MetagenomeAnnotationActivity", + "ended_at_time": "2021-10-11T03:58:56+00:00", + "output_data_objects": [ + { + "name": "Gp0127656_Protein FAA", + "description": "Protein FAA for Gp0127656", + "data_object_type": "Annotation Amino Acid FASTA", + "url": "https://data.microbiomedata.org/data/nmdc:mga00hh562/annotation/nmdc_mga00hh562_proteins.faa", + "md5_checksum": "18f68cc8acda8d33d5fd6f21a9166aa8", + "id": "nmdc:18f68cc8acda8d33d5fd6f21a9166aa8", + "file_size_bytes": 46951183 + }, + { + "name": "Gp0127656_Structural annotation GFF file", + "description": "Structural annotation GFF file for Gp0127656", + "data_object_type": "Structural Annotation GFF", + "url": "https://data.microbiomedata.org/data/nmdc:mga00hh562/annotation/nmdc_mga00hh562_structural_annotation.gff", + "md5_checksum": "87d5f3a505d23c1aa2deea960702d55b", + "id": "nmdc:87d5f3a505d23c1aa2deea960702d55b", + "file_size_bytes": 2511 + }, + { + "name": "Gp0127656_Functional annotation GFF file", + "description": "Functional annotation GFF file for Gp0127656", + "data_object_type": "Functional Annotation GFF", + "url": "https://data.microbiomedata.org/data/nmdc:mga00hh562/annotation/nmdc_mga00hh562_functional_annotation.gff", + "md5_checksum": "8e8be343bbb1ba11f3e15867b419d05d", + "id": "nmdc:8e8be343bbb1ba11f3e15867b419d05d", + "file_size_bytes": 54902900 + }, + { + "name": "Gp0127656_KO TSV file", + "description": "KO TSV file for Gp0127656", + "data_object_type": "Annotation KEGG Orthology", + "url": "https://data.microbiomedata.org/data/nmdc:mga00hh562/annotation/nmdc_mga00hh562_ko.tsv", + "md5_checksum": "91c2485c0ebf683aed3e7935ec60b7d1", + "id": "nmdc:91c2485c0ebf683aed3e7935ec60b7d1", + "file_size_bytes": 6468844 + }, + { + "name": "Gp0127656_EC TSV file", + "description": "EC TSV file for Gp0127656", + "data_object_type": "Annotation Enzyme Commission", + "url": "https://data.microbiomedata.org/data/nmdc:mga00hh562/annotation/nmdc_mga00hh562_ec.tsv", + "md5_checksum": "fb6740e86534daeea41ab6d5cf9d91d2", + "id": "nmdc:fb6740e86534daeea41ab6d5cf9d91d2", + "file_size_bytes": 4308547 + }, + { + "name": "Gp0127656_COG GFF file", + "description": "COG GFF file for Gp0127656", + "url": "https://data.microbiomedata.org/data/nmdc:mga00hh562/annotation/nmdc_mga00hh562_cog.gff", + "md5_checksum": "19da9b3f211164643f276bc74604c9b0", + "id": "nmdc:19da9b3f211164643f276bc74604c9b0", + "file_size_bytes": 32139189 + }, + { + "name": "Gp0127656_PFAM GFF file", + "description": "PFAM GFF file for Gp0127656", + "url": "https://data.microbiomedata.org/data/nmdc:mga00hh562/annotation/nmdc_mga00hh562_pfam.gff", + "md5_checksum": "19905547dfa37274a9f91c9caaf6bacc", + "id": "nmdc:19905547dfa37274a9f91c9caaf6bacc", + "file_size_bytes": 23590201 + }, + { + "name": "Gp0127656_TigrFam GFF file", + "description": "TigrFam GFF file for Gp0127656", + "url": "https://data.microbiomedata.org/data/nmdc:mga00hh562/annotation/nmdc_mga00hh562_tigrfam.gff", + "md5_checksum": "30c2b0722d225938975243ab1041ed12", + "id": "nmdc:30c2b0722d225938975243ab1041ed12", + "file_size_bytes": 2485400 + }, + { + "name": "Gp0127656_SMART GFF file", + "description": "SMART GFF file for Gp0127656", + "url": "https://data.microbiomedata.org/data/nmdc:mga00hh562/annotation/nmdc_mga00hh562_smart.gff", + "md5_checksum": "623e913fa98f88f6037754daf5d9ffc5", + "id": "nmdc:623e913fa98f88f6037754daf5d9ffc5", + "file_size_bytes": 6932331 + }, + { + "name": "Gp0127656_SuperFam GFF file", + "description": "SuperFam GFF file for Gp0127656", + "url": "https://data.microbiomedata.org/data/nmdc:mga00hh562/annotation/nmdc_mga00hh562_supfam.gff", + "md5_checksum": "ec56df16785bc67e073128f09366ec43", + "id": "nmdc:ec56df16785bc67e073128f09366ec43", + "file_size_bytes": 39880284 + }, + { + "name": "Gp0127656_Cath FunFam GFF file", + "description": "Cath FunFam GFF file for Gp0127656", + "url": "https://data.microbiomedata.org/data/nmdc:mga00hh562/annotation/nmdc_mga00hh562_cath_funfam.gff", + "md5_checksum": "2831d1ead0af4681b2ae1a9f21733637", + "id": "nmdc:2831d1ead0af4681b2ae1a9f21733637", + "file_size_bytes": 29872897 + }, + { + "name": "Gp0127656_KO_EC GFF file", + "description": "KO_EC GFF file for Gp0127656", + "url": "https://data.microbiomedata.org/data/nmdc:mga00hh562/annotation/nmdc_mga00hh562_ko_ec.gff", + "md5_checksum": "53f225f74011f7d30fcfd5c60b3693ae", + "id": "nmdc:53f225f74011f7d30fcfd5c60b3693ae", + "file_size_bytes": 20564625 + } + ] + }, + { + "_id": { + "$oid": "649b0052ec087f6bbab34726" + }, + "has_input": [ + "nmdc:8106808f8e245ef9a46a4e31561eba7f", + "nmdc:941f749a92155321c5ce7e5aa32d3b55", + "nmdc:8e8be343bbb1ba11f3e15867b419d05d" + ], + "too_short_contig_num": 163283, + "part_of": [ + "nmdc:mga00hh562" + ], + "binned_contig_num": 83, + "git_url": "https://github.com/microbiomedata/metaG/releases/tag/0.1", + "has_output": [ + "nmdc:d41d8cd98f00b204e9800998ecf8427e", + "nmdc:313c88df1890a33d388bdb23c7ad37c3", + "nmdc:ae567f55fe899da83831fda23dcd7a20", + "nmdc:5a8dbda6aec0825b4159d5b53481db90", + "nmdc:060a7f90c5c5123cac41ed946a5482af", + "nmdc:e9f5d03e8264308ed77da0b63eb738fe" + ], + "was_informed_by": "gold:Gp0127656", + "input_contig_num": 169495, + "id": "nmdc:8e2d8da1d05b292a52a33732a6bc4391", + "execution_resource": "NERSC-Cori", + "name": "MAGs Analysis Activity for nmdc:mga00hh562", + "mags_list": [ + { + "number_of_contig": 83, + "completeness": 14.35, + "bin_name": "bins.1", + "gene_count": 388, + "bin_quality": "LQ", + "gtdbtk_species": "", + "gtdbtk_order": "", + "num_16s": 0, + "gtdbtk_family": "", + "gtdbtk_domain": "", + "contamination": 0.0, + "gtdbtk_class": "", + "gtdbtk_phylum": "", + "num_5s": 0, + "num_23s": 0, + "gtdbtk_genus": "", + "num_t_rna": 5 + } + ], + "unbinned_contig_num": 6129, + "started_at_time": "2021-10-11T02:23:35Z", + "type": "nmdc:MAGsAnalysisActivity", + "ended_at_time": "2021-10-11T03:58:56+00:00", + "output_data_objects": [ + { + "name": "gold:Gp0452679_metabat2 bin checkm quality assessment result", + "description": "metabat2 bin checkm quality assessment result for gold:Gp0452679", + "data_object_type": "CheckM Statistics", + "url": "https://data.microbiomedata.org/data/nmdc:mga0fsjy84/MAGs/nmdc_mga0fsjy84_checkm_qa.out", + "md5_checksum": "d41d8cd98f00b204e9800998ecf8427e", + "id": "nmdc:d41d8cd98f00b204e9800998ecf8427e", + "file_size_bytes": 0 + }, + { + "name": "Gp0127656_tooShort (< 3kb) filtered contigs fasta file by metaBat2", + "description": "tooShort (< 3kb) filtered contigs fasta file by metaBat2 for Gp0127656", + "url": "https://data.microbiomedata.org/data/nmdc:mga00hh562/MAGs/nmdc_mga00hh562_bins.tooShort.fa", + "md5_checksum": "313c88df1890a33d388bdb23c7ad37c3", + "id": "nmdc:313c88df1890a33d388bdb23c7ad37c3", + "file_size_bytes": 69332992 + }, + { + "name": "Gp0127656_unbinned fasta file from metabat2", + "description": "unbinned fasta file from metabat2 for Gp0127656", + "url": "https://data.microbiomedata.org/data/nmdc:mga00hh562/MAGs/nmdc_mga00hh562_bins.unbinned.fa", + "md5_checksum": "ae567f55fe899da83831fda23dcd7a20", + "id": "nmdc:ae567f55fe899da83831fda23dcd7a20", + "file_size_bytes": 9275333 + }, + { + "name": "Gp0127656_metabat2 bin checkm quality assessment result", + "description": "metabat2 bin checkm quality assessment result for Gp0127656", + "data_object_type": "CheckM Statistics", + "url": "https://data.microbiomedata.org/data/nmdc:mga00hh562/MAGs/nmdc_mga00hh562_checkm_qa.out", + "md5_checksum": "5a8dbda6aec0825b4159d5b53481db90", + "id": "nmdc:5a8dbda6aec0825b4159d5b53481db90", + "file_size_bytes": 775 + }, + { + "name": "Gp0127656_high-quality and medium-quality bins", + "description": "high-quality and medium-quality bins for Gp0127656", + "data_object_type": "Metagenome Bins", + "url": "https://data.microbiomedata.org/data/nmdc:mga00hh562/MAGs/nmdc_mga00hh562_hqmq_bin.zip", + "md5_checksum": "060a7f90c5c5123cac41ed946a5482af", + "id": "nmdc:060a7f90c5c5123cac41ed946a5482af", + "file_size_bytes": 182 + }, + { + "name": "Gp0127656_metabat2 bins", + "description": "metabat2 bins for Gp0127656", + "url": "https://data.microbiomedata.org/data/nmdc:mga00hh562/MAGs/nmdc_mga00hh562_metabat_bin.zip", + "md5_checksum": "e9f5d03e8264308ed77da0b63eb738fe", + "id": "nmdc:e9f5d03e8264308ed77da0b63eb738fe", + "file_size_bytes": 101752 + } + ] + } + ] + }, + { + "_id": { + "$oid": "649b009773e8249959349b62" + }, + "id": "nmdc:omprc-11-nry91b19", + "name": "Riverbed sediment microbial communities from areas with no vegetation in Columbia River, Washington, USA - GW-RW N3_20_30", + "description": "Riverbed sediment samples were collected from an area with no vegetation in a hyporheic zone (subsurface groundwater - surface water mixing zone)", + "has_input": [ + "nmdc:bsm-11-jqzk1523" + ], + "has_output": [ + "jgi:574fde907ded5e3df1ee1426" + ], + "part_of": [ + "nmdc:sty-11-aygzgv51" + ], + "add_date": "2016-01-11", + "mod_date": "2021-06-15", + "ncbi_project_name": "Riverbed sediment microbial communities from areas with no vegetation in Columbia River, Washington, USA - GW-RW N3_20_30", + "omics_type": { + "has_raw_value": "Metagenome" + }, + "principal_investigator": { + "has_raw_value": "James Stegen" + }, + "processing_institution": "JGI", + "type": "nmdc:OmicsProcessing", + "gold_sequencing_project_identifiers": [ + "gold:Gp0127651" + ], + "downstream_workflow_activity_records": [ + { + "_id": { + "$oid": "649b009d6bdd4fd20273c877" + }, + "has_input": [ + "nmdc:8254ce50b88be8c384fd37fe21e0d0c4" + ], + "part_of": [ + "nmdc:mga08hnt47" + ], + "git_url": "https://github.com/microbiomedata/metaG/releases/tag/0.1", + "has_output": [ + "nmdc:2791a196017767af3b5b21a3029799c0", + "nmdc:92cb49efbff5d5977e00dbad1c4d0d9f" + ], + "was_informed_by": "gold:Gp0127651", + "input_read_count": 23728904, + "output_read_bases": 3352071049, + "id": "nmdc:158a1d28cb7b14fdffb0f092a644b0f6", + "execution_resource": "NERSC-Cori", + "input_read_bases": 3583064504, + "name": "Read QC Activity for nmdc:mga08hnt47", + "output_read_count": 22416634, + "started_at_time": "2021-10-11T02:27:15Z", + "type": "nmdc:ReadQCAnalysisActivity", + "ended_at_time": "2021-10-11T03:57:48+00:00", + "output_data_objects": [ + { + "name": "Gp0127651_Filtered Reads", + "description": "Filtered Reads for Gp0127651", + "data_object_type": "Filtered Sequencing Reads", + "url": "https://data.microbiomedata.org/data/nmdc:mga08hnt47/qa/nmdc_mga08hnt47_filtered.fastq.gz", + "md5_checksum": "2791a196017767af3b5b21a3029799c0", + "id": "nmdc:2791a196017767af3b5b21a3029799c0", + "file_size_bytes": 1856919615 + }, + { + "name": "Gp0127651_Filtered Stats", + "description": "Filtered Stats for Gp0127651", + "data_object_type": "QC Statistics", + "url": "https://data.microbiomedata.org/data/nmdc:mga08hnt47/qa/nmdc_mga08hnt47_filterStats.txt", + "md5_checksum": "92cb49efbff5d5977e00dbad1c4d0d9f", + "id": "nmdc:92cb49efbff5d5977e00dbad1c4d0d9f", + "file_size_bytes": 283 + } + ] + }, + { + "_id": { + "$oid": "649b009bff710ae353f8cf42" + }, + "has_input": [ + "nmdc:2791a196017767af3b5b21a3029799c0" + ], + "git_url": "https://github.com/microbiomedata/metaG/releases/tag/0.1", + "has_output": [ + "nmdc:53ee263960c39126e039656a121deb96", + "nmdc:2781b9269b8e24f49a1a301d44d0e3d5", + "nmdc:0ed808b8ce29d39c3b555e7d5bf4c274", + "nmdc:a7d8f038b87bd28843e30c5dd115704b", + "nmdc:b4cbc81c986c67c1037c8b7280924683", + "nmdc:e0c61a191258597984a05d86eaf4d71f", + "nmdc:e1cbcfa86444a4ff4e992bcb6653d18f", + "nmdc:d2e10038a40e81e81ba94f75ed1ec52c", + "nmdc:ddba84cd45462d3a55df4ac62bb4eeb8" + ], + "was_informed_by": "gold:Gp0127651", + "id": "nmdc:158a1d28cb7b14fdffb0f092a644b0f6", + "execution_resource": "NERSC-Cori", + "name": "ReadBased Analysis Activity for nmdc:mga08hnt47", + "started_at_time": "2021-10-11T02:27:15Z", + "type": "nmdc:ReadbasedAnalysis", + "ended_at_time": "2021-10-11T03:57:48+00:00", + "output_data_objects": [ + { + "name": "Gp0127651_Gottcha2 TSV report", + "description": "Gottcha2 TSV report for Gp0127651", + "url": "https://data.microbiomedata.org/data/nmdc:mga08hnt47/ReadbasedAnalysis/nmdc_mga08hnt47_gottcha2_report.tsv", + "md5_checksum": "53ee263960c39126e039656a121deb96", + "id": "nmdc:53ee263960c39126e039656a121deb96", + "file_size_bytes": 1199 + }, + { + "name": "Gp0127651_Gottcha2 full TSV report", + "description": "Gottcha2 full TSV report for Gp0127651", + "url": "https://data.microbiomedata.org/data/nmdc:mga08hnt47/ReadbasedAnalysis/nmdc_mga08hnt47_gottcha2_report_full.tsv", + "md5_checksum": "2781b9269b8e24f49a1a301d44d0e3d5", + "id": "nmdc:2781b9269b8e24f49a1a301d44d0e3d5", + "file_size_bytes": 703299 + }, + { + "name": "Gp0127651_Gottcha2 Krona HTML report", + "description": "Gottcha2 Krona HTML report for Gp0127651", + "data_object_type": "GOTTCHA2 Krona Plot", + "url": "https://data.microbiomedata.org/data/nmdc:mga08hnt47/ReadbasedAnalysis/nmdc_mga08hnt47_gottcha2_krona.html", + "md5_checksum": "0ed808b8ce29d39c3b555e7d5bf4c274", + "id": "nmdc:0ed808b8ce29d39c3b555e7d5bf4c274", + "file_size_bytes": 229311 + }, + { + "name": "Gp0127651_Centrifuge classification TSV report", + "description": "Centrifuge classification TSV report for Gp0127651", + "data_object_type": "Centrifuge Taxonomic Classification", + "url": "https://data.microbiomedata.org/data/nmdc:mga08hnt47/ReadbasedAnalysis/nmdc_mga08hnt47_centrifuge_classification.tsv", + "md5_checksum": "a7d8f038b87bd28843e30c5dd115704b", + "id": "nmdc:a7d8f038b87bd28843e30c5dd115704b", + "file_size_bytes": 1642196063 + }, + { + "name": "Gp0127651_Centrifuge TSV report", + "description": "Centrifuge TSV report for Gp0127651", + "data_object_type": "Centrifuge Classification Report", + "url": "https://data.microbiomedata.org/data/nmdc:mga08hnt47/ReadbasedAnalysis/nmdc_mga08hnt47_centrifuge_report.tsv", + "md5_checksum": "b4cbc81c986c67c1037c8b7280924683", + "id": "nmdc:b4cbc81c986c67c1037c8b7280924683", + "file_size_bytes": 254418 + }, + { + "name": "Gp0127651_Centrifuge Krona HTML report", + "description": "Centrifuge Krona HTML report for Gp0127651", + "data_object_type": "Centrifuge Krona Plot", + "url": "https://data.microbiomedata.org/data/nmdc:mga08hnt47/ReadbasedAnalysis/nmdc_mga08hnt47_centrifuge_krona.html", + "md5_checksum": "e0c61a191258597984a05d86eaf4d71f", + "id": "nmdc:e0c61a191258597984a05d86eaf4d71f", + "file_size_bytes": 2333132 + }, + { + "name": "Gp0127651_Kraken classification TSV report", + "description": "Kraken classification TSV report for Gp0127651", + "data_object_type": "Kraken2 Taxonomic Classification", + "url": "https://data.microbiomedata.org/data/nmdc:mga08hnt47/ReadbasedAnalysis/nmdc_mga08hnt47_kraken2_classification.tsv", + "md5_checksum": "e1cbcfa86444a4ff4e992bcb6653d18f", + "id": "nmdc:e1cbcfa86444a4ff4e992bcb6653d18f", + "file_size_bytes": 1309125719 + }, + { + "name": "Gp0127651_Kraken2 TSV report", + "description": "Kraken2 TSV report for Gp0127651", + "data_object_type": "Kraken2 Classification Report", + "url": "https://data.microbiomedata.org/data/nmdc:mga08hnt47/ReadbasedAnalysis/nmdc_mga08hnt47_kraken2_report.tsv", + "md5_checksum": "d2e10038a40e81e81ba94f75ed1ec52c", + "id": "nmdc:d2e10038a40e81e81ba94f75ed1ec52c", + "file_size_bytes": 639737 + }, + { + "name": "Gp0127651_Kraken2 Krona HTML report", + "description": "Kraken2 Krona HTML report for Gp0127651", + "data_object_type": "Kraken2 Krona Plot", + "url": "https://data.microbiomedata.org/data/nmdc:mga08hnt47/ReadbasedAnalysis/nmdc_mga08hnt47_kraken2_krona.html", + "md5_checksum": "ddba84cd45462d3a55df4ac62bb4eeb8", + "id": "nmdc:ddba84cd45462d3a55df4ac62bb4eeb8", + "file_size_bytes": 3988966 + } + ] + }, + { + "_id": { + "$oid": "61e719ba833bcf838a7012b5" + }, + "has_input": [ + "nmdc:2791a196017767af3b5b21a3029799c0" + ], + "part_of": [ + "nmdc:mga08hnt47" + ], + "git_url": "https://github.com/microbiomedata/metaG/releases/tag/0.1", + "has_output": [ + "nmdc:53ee263960c39126e039656a121deb96", + "nmdc:2781b9269b8e24f49a1a301d44d0e3d5", + "nmdc:0ed808b8ce29d39c3b555e7d5bf4c274", + "nmdc:a7d8f038b87bd28843e30c5dd115704b", + "nmdc:b4cbc81c986c67c1037c8b7280924683", + "nmdc:e0c61a191258597984a05d86eaf4d71f", + "nmdc:e1cbcfa86444a4ff4e992bcb6653d18f", + "nmdc:d2e10038a40e81e81ba94f75ed1ec52c", + "nmdc:ddba84cd45462d3a55df4ac62bb4eeb8" + ], + "was_informed_by": "gold:Gp0127651", + "id": "nmdc:158a1d28cb7b14fdffb0f092a644b0f6", + "execution_resource": "NERSC-Cori", + "name": "ReadBased Analysis Activity for nmdc:mga08hnt47", + "started_at_time": "2021-10-11T02:27:15Z", + "type": "nmdc:ReadbasedAnalysis", + "ended_at_time": "2021-10-11T03:57:48+00:00", + "output_data_objects": [ + { + "name": "Gp0127651_Gottcha2 TSV report", + "description": "Gottcha2 TSV report for Gp0127651", + "url": "https://data.microbiomedata.org/data/nmdc:mga08hnt47/ReadbasedAnalysis/nmdc_mga08hnt47_gottcha2_report.tsv", + "md5_checksum": "53ee263960c39126e039656a121deb96", + "id": "nmdc:53ee263960c39126e039656a121deb96", + "file_size_bytes": 1199 + }, + { + "name": "Gp0127651_Gottcha2 full TSV report", + "description": "Gottcha2 full TSV report for Gp0127651", + "url": "https://data.microbiomedata.org/data/nmdc:mga08hnt47/ReadbasedAnalysis/nmdc_mga08hnt47_gottcha2_report_full.tsv", + "md5_checksum": "2781b9269b8e24f49a1a301d44d0e3d5", + "id": "nmdc:2781b9269b8e24f49a1a301d44d0e3d5", + "file_size_bytes": 703299 + }, + { + "name": "Gp0127651_Gottcha2 Krona HTML report", + "description": "Gottcha2 Krona HTML report for Gp0127651", + "data_object_type": "GOTTCHA2 Krona Plot", + "url": "https://data.microbiomedata.org/data/nmdc:mga08hnt47/ReadbasedAnalysis/nmdc_mga08hnt47_gottcha2_krona.html", + "md5_checksum": "0ed808b8ce29d39c3b555e7d5bf4c274", + "id": "nmdc:0ed808b8ce29d39c3b555e7d5bf4c274", + "file_size_bytes": 229311 + }, + { + "name": "Gp0127651_Centrifuge classification TSV report", + "description": "Centrifuge classification TSV report for Gp0127651", + "data_object_type": "Centrifuge Taxonomic Classification", + "url": "https://data.microbiomedata.org/data/nmdc:mga08hnt47/ReadbasedAnalysis/nmdc_mga08hnt47_centrifuge_classification.tsv", + "md5_checksum": "a7d8f038b87bd28843e30c5dd115704b", + "id": "nmdc:a7d8f038b87bd28843e30c5dd115704b", + "file_size_bytes": 1642196063 + }, + { + "name": "Gp0127651_Centrifuge TSV report", + "description": "Centrifuge TSV report for Gp0127651", + "data_object_type": "Centrifuge Classification Report", + "url": "https://data.microbiomedata.org/data/nmdc:mga08hnt47/ReadbasedAnalysis/nmdc_mga08hnt47_centrifuge_report.tsv", + "md5_checksum": "b4cbc81c986c67c1037c8b7280924683", + "id": "nmdc:b4cbc81c986c67c1037c8b7280924683", + "file_size_bytes": 254418 + }, + { + "name": "Gp0127651_Centrifuge Krona HTML report", + "description": "Centrifuge Krona HTML report for Gp0127651", + "data_object_type": "Centrifuge Krona Plot", + "url": "https://data.microbiomedata.org/data/nmdc:mga08hnt47/ReadbasedAnalysis/nmdc_mga08hnt47_centrifuge_krona.html", + "md5_checksum": "e0c61a191258597984a05d86eaf4d71f", + "id": "nmdc:e0c61a191258597984a05d86eaf4d71f", + "file_size_bytes": 2333132 + }, + { + "name": "Gp0127651_Kraken classification TSV report", + "description": "Kraken classification TSV report for Gp0127651", + "data_object_type": "Kraken2 Taxonomic Classification", + "url": "https://data.microbiomedata.org/data/nmdc:mga08hnt47/ReadbasedAnalysis/nmdc_mga08hnt47_kraken2_classification.tsv", + "md5_checksum": "e1cbcfa86444a4ff4e992bcb6653d18f", + "id": "nmdc:e1cbcfa86444a4ff4e992bcb6653d18f", + "file_size_bytes": 1309125719 + }, + { + "name": "Gp0127651_Kraken2 TSV report", + "description": "Kraken2 TSV report for Gp0127651", + "data_object_type": "Kraken2 Classification Report", + "url": "https://data.microbiomedata.org/data/nmdc:mga08hnt47/ReadbasedAnalysis/nmdc_mga08hnt47_kraken2_report.tsv", + "md5_checksum": "d2e10038a40e81e81ba94f75ed1ec52c", + "id": "nmdc:d2e10038a40e81e81ba94f75ed1ec52c", + "file_size_bytes": 639737 + }, + { + "name": "Gp0127651_Kraken2 Krona HTML report", + "description": "Kraken2 Krona HTML report for Gp0127651", + "data_object_type": "Kraken2 Krona Plot", + "url": "https://data.microbiomedata.org/data/nmdc:mga08hnt47/ReadbasedAnalysis/nmdc_mga08hnt47_kraken2_krona.html", + "md5_checksum": "ddba84cd45462d3a55df4ac62bb4eeb8", + "id": "nmdc:ddba84cd45462d3a55df4ac62bb4eeb8", + "file_size_bytes": 3988966 + } + ] + }, + { + "_id": { + "$oid": "649b005f2ca5ee4adb139fab" + }, + "has_input": [ + "nmdc:2791a196017767af3b5b21a3029799c0" + ], + "part_of": [ + "nmdc:mga08hnt47" + ], + "ctg_logsum": 192880, + "scaf_logsum": 193641, + "gap_pct": 0.00165, + "git_url": "https://github.com/microbiomedata/metaG/releases/tag/0.1", + "has_output": [ + "nmdc:8483663a943ff4c0fc0249353676bfc1", + "nmdc:ccca920c56ad3d050e2d8801bcbe4855", + "nmdc:f21e374c1c31c02bd0e41228cc7895c3", + "nmdc:f43ae7935184d10ba65961171efcac34", + "nmdc:838162ead3f121f5bc02bc1234a32a55" + ], + "asm_score": 4.164, + "was_informed_by": "gold:Gp0127651", + "ctg_powsum": 20759, + "scaf_max": 29106, + "id": "nmdc:158a1d28cb7b14fdffb0f092a644b0f6", + "scaf_powsum": 20844, + "execution_resource": "NERSC-Cori", + "contigs": 180439, + "name": "Assembly Activity for nmdc:mga08hnt47", + "ctg_max": 29106, + "gc_std": 0.109, + "contig_bp": 88911005, + "gc_avg": 0.62144, + "started_at_time": "2021-10-11T02:27:15Z", + "scaf_bp": 88912475, + "type": "nmdc:MetagenomeAssembly", + "scaffolds": 180310, + "ended_at_time": "2021-10-11T03:57:48+00:00", + "ctg_l50": 492, + "ctg_l90": 292, + "ctg_n50": 51430, + "ctg_n90": 149085, + "scaf_l50": 493, + "scaf_l90": 292, + "scaf_n50": 51225, + "scaf_n90": 148971, + "output_data_objects": [ + { + "name": "Gp0127651_Assembled contigs fasta", + "description": "Assembled contigs fasta for Gp0127651", + "data_object_type": "Assembly Contigs", + "url": "https://data.microbiomedata.org/data/nmdc:mga08hnt47/assembly/nmdc_mga08hnt47_contigs.fna", + "md5_checksum": "8483663a943ff4c0fc0249353676bfc1", + "id": "nmdc:8483663a943ff4c0fc0249353676bfc1", + "file_size_bytes": 95957530 + }, + { + "name": "Gp0127651_Assembled scaffold fasta", + "description": "Assembled scaffold fasta for Gp0127651", + "data_object_type": "Assembly Scaffolds", + "url": "https://data.microbiomedata.org/data/nmdc:mga08hnt47/assembly/nmdc_mga08hnt47_scaffolds.fna", + "md5_checksum": "ccca920c56ad3d050e2d8801bcbe4855", + "id": "nmdc:ccca920c56ad3d050e2d8801bcbe4855", + "file_size_bytes": 95414704 + }, + { + "name": "Gp0127651_Metagenome Contig Coverage Stats", + "description": "Metagenome Contig Coverage Stats for Gp0127651", + "url": "https://data.microbiomedata.org/data/nmdc:mga08hnt47/assembly/nmdc_mga08hnt47_covstats.txt", + "md5_checksum": "f21e374c1c31c02bd0e41228cc7895c3", + "id": "nmdc:f21e374c1c31c02bd0e41228cc7895c3", + "file_size_bytes": 14289388 + }, + { + "name": "Gp0127651_Assembled AGP file", + "description": "Assembled AGP file for Gp0127651", + "data_object_type": "Assembly AGP", + "url": "https://data.microbiomedata.org/data/nmdc:mga08hnt47/assembly/nmdc_mga08hnt47_assembly.agp", + "md5_checksum": "f43ae7935184d10ba65961171efcac34", + "id": "nmdc:f43ae7935184d10ba65961171efcac34", + "file_size_bytes": 13343603 + }, + { + "name": "Gp0127651_Metagenome Alignment BAM file", + "description": "Metagenome Alignment BAM file for Gp0127651", + "data_object_type": "Assembly Coverage BAM", + "url": "https://data.microbiomedata.org/data/nmdc:mga08hnt47/assembly/nmdc_mga08hnt47_pairedMapped_sorted.bam", + "md5_checksum": "838162ead3f121f5bc02bc1234a32a55", + "id": "nmdc:838162ead3f121f5bc02bc1234a32a55", + "file_size_bytes": 2037589818 + } + ] + }, + { + "_id": { + "$oid": "649b005bbf2caae0415ef9bf" + }, + "has_input": [ + "nmdc:8483663a943ff4c0fc0249353676bfc1" + ], + "part_of": [ + "nmdc:mga08hnt47" + ], + "git_url": "https://github.com/microbiomedata/metaG/releases/tag/0.1", + "has_output": [ + "nmdc:d8dc4f31293c549b12bbcab915d708cc", + "nmdc:415256907dcafaa68778a2ba358d9ac5", + "nmdc:f0c60a537e6867bf62fde15577669453", + "nmdc:e0f16b60c50581799b7ecb254e61e537", + "nmdc:6eb21304f0762bd8c11b98826d310321", + "nmdc:4ea7982c99cbb6d8ccc9fd949bee09ec", + "nmdc:f389dc8a93de9f21322db385b2788f5f", + "nmdc:8e6659ce96dfa72ceefda39c74fb1dce", + "nmdc:89bc9cf9183fed6700cde44fad41b830", + "nmdc:84aae368e77c1d07c6b6e8deecbc3f3b", + "nmdc:ee5612e5ee82ec2d57029d1bc4e1756f", + "nmdc:68c06be8d27d1697b4a6955537b318c8" + ], + "was_informed_by": "gold:Gp0127651", + "id": "nmdc:158a1d28cb7b14fdffb0f092a644b0f6", + "execution_resource": "NERSC-Cori", + "name": "Annotation Activity for nmdc:mga08hnt47", + "started_at_time": "2021-10-11T02:27:15Z", + "type": "nmdc:MetagenomeAnnotationActivity", + "ended_at_time": "2021-10-11T03:57:48+00:00", + "output_data_objects": [ + { + "name": "Gp0127651_Protein FAA", + "description": "Protein FAA for Gp0127651", + "data_object_type": "Annotation Amino Acid FASTA", + "url": "https://data.microbiomedata.org/data/nmdc:mga08hnt47/annotation/nmdc_mga08hnt47_proteins.faa", + "md5_checksum": "d8dc4f31293c549b12bbcab915d708cc", + "id": "nmdc:d8dc4f31293c549b12bbcab915d708cc", + "file_size_bytes": 54370216 + }, + { + "name": "Gp0127651_Structural annotation GFF file", + "description": "Structural annotation GFF file for Gp0127651", + "data_object_type": "Structural Annotation GFF", + "url": "https://data.microbiomedata.org/data/nmdc:mga08hnt47/annotation/nmdc_mga08hnt47_structural_annotation.gff", + "md5_checksum": "415256907dcafaa68778a2ba358d9ac5", + "id": "nmdc:415256907dcafaa68778a2ba358d9ac5", + "file_size_bytes": 2517 + }, + { + "name": "Gp0127651_Functional annotation GFF file", + "description": "Functional annotation GFF file for Gp0127651", + "data_object_type": "Functional Annotation GFF", + "url": "https://data.microbiomedata.org/data/nmdc:mga08hnt47/annotation/nmdc_mga08hnt47_functional_annotation.gff", + "md5_checksum": "f0c60a537e6867bf62fde15577669453", + "id": "nmdc:f0c60a537e6867bf62fde15577669453", + "file_size_bytes": 61364019 + }, + { + "name": "Gp0127651_KO TSV file", + "description": "KO TSV file for Gp0127651", + "data_object_type": "Annotation KEGG Orthology", + "url": "https://data.microbiomedata.org/data/nmdc:mga08hnt47/annotation/nmdc_mga08hnt47_ko.tsv", + "md5_checksum": "e0f16b60c50581799b7ecb254e61e537", + "id": "nmdc:e0f16b60c50581799b7ecb254e61e537", + "file_size_bytes": 6908291 + }, + { + "name": "Gp0127651_EC TSV file", + "description": "EC TSV file for Gp0127651", + "data_object_type": "Annotation Enzyme Commission", + "url": "https://data.microbiomedata.org/data/nmdc:mga08hnt47/annotation/nmdc_mga08hnt47_ec.tsv", + "md5_checksum": "6eb21304f0762bd8c11b98826d310321", + "id": "nmdc:6eb21304f0762bd8c11b98826d310321", + "file_size_bytes": 4650091 + }, + { + "name": "Gp0127651_COG GFF file", + "description": "COG GFF file for Gp0127651", + "url": "https://data.microbiomedata.org/data/nmdc:mga08hnt47/annotation/nmdc_mga08hnt47_cog.gff", + "md5_checksum": "4ea7982c99cbb6d8ccc9fd949bee09ec", + "id": "nmdc:4ea7982c99cbb6d8ccc9fd949bee09ec", + "file_size_bytes": 36137856 + }, + { + "name": "Gp0127651_PFAM GFF file", + "description": "PFAM GFF file for Gp0127651", + "url": "https://data.microbiomedata.org/data/nmdc:mga08hnt47/annotation/nmdc_mga08hnt47_pfam.gff", + "md5_checksum": "f389dc8a93de9f21322db385b2788f5f", + "id": "nmdc:f389dc8a93de9f21322db385b2788f5f", + "file_size_bytes": 27173740 + }, + { + "name": "Gp0127651_TigrFam GFF file", + "description": "TigrFam GFF file for Gp0127651", + "url": "https://data.microbiomedata.org/data/nmdc:mga08hnt47/annotation/nmdc_mga08hnt47_tigrfam.gff", + "md5_checksum": "8e6659ce96dfa72ceefda39c74fb1dce", + "id": "nmdc:8e6659ce96dfa72ceefda39c74fb1dce", + "file_size_bytes": 2943355 + }, + { + "name": "Gp0127651_SMART GFF file", + "description": "SMART GFF file for Gp0127651", + "url": "https://data.microbiomedata.org/data/nmdc:mga08hnt47/annotation/nmdc_mga08hnt47_smart.gff", + "md5_checksum": "89bc9cf9183fed6700cde44fad41b830", + "id": "nmdc:89bc9cf9183fed6700cde44fad41b830", + "file_size_bytes": 7927726 + }, + { + "name": "Gp0127651_SuperFam GFF file", + "description": "SuperFam GFF file for Gp0127651", + "url": "https://data.microbiomedata.org/data/nmdc:mga08hnt47/annotation/nmdc_mga08hnt47_supfam.gff", + "md5_checksum": "84aae368e77c1d07c6b6e8deecbc3f3b", + "id": "nmdc:84aae368e77c1d07c6b6e8deecbc3f3b", + "file_size_bytes": 45499652 + }, + { + "name": "Gp0127651_Cath FunFam GFF file", + "description": "Cath FunFam GFF file for Gp0127651", + "url": "https://data.microbiomedata.org/data/nmdc:mga08hnt47/annotation/nmdc_mga08hnt47_cath_funfam.gff", + "md5_checksum": "ee5612e5ee82ec2d57029d1bc4e1756f", + "id": "nmdc:ee5612e5ee82ec2d57029d1bc4e1756f", + "file_size_bytes": 34280847 + }, + { + "name": "Gp0127651_KO_EC GFF file", + "description": "KO_EC GFF file for Gp0127651", + "url": "https://data.microbiomedata.org/data/nmdc:mga08hnt47/annotation/nmdc_mga08hnt47_ko_ec.gff", + "md5_checksum": "68c06be8d27d1697b4a6955537b318c8", + "id": "nmdc:68c06be8d27d1697b4a6955537b318c8", + "file_size_bytes": 21943549 + } + ] + }, + { + "_id": { + "$oid": "649b0052ec087f6bbab34721" + }, + "has_input": [ + "nmdc:8483663a943ff4c0fc0249353676bfc1", + "nmdc:838162ead3f121f5bc02bc1234a32a55", + "nmdc:f0c60a537e6867bf62fde15577669453" + ], + "too_short_contig_num": 168908, + "part_of": [ + "nmdc:mga08hnt47" + ], + "binned_contig_num": 216, + "git_url": "https://github.com/microbiomedata/metaG/releases/tag/0.1", + "has_output": [ + "nmdc:d41d8cd98f00b204e9800998ecf8427e", + "nmdc:6f012bfca6cb653f92eaf927003de0fa", + "nmdc:298e0a0c98ebe4fb673da7de9fcb03a2", + "nmdc:66fd77d80cc9257da98c5bce4cb30626", + "nmdc:06caec963e007225d1d9411078829100", + "nmdc:eb5216cc4e09d88c4c59a76c4808a693" + ], + "was_informed_by": "gold:Gp0127651", + "input_contig_num": 180437, + "id": "nmdc:158a1d28cb7b14fdffb0f092a644b0f6", + "execution_resource": "NERSC-Cori", + "name": "MAGs Analysis Activity for nmdc:mga08hnt47", + "mags_list": [ + { + "number_of_contig": 216, + "completeness": 36.79, + "bin_name": "bins.1", + "gene_count": 1612, + "bin_quality": "LQ", + "gtdbtk_species": "", + "gtdbtk_order": "", + "num_16s": 0, + "gtdbtk_family": "", + "gtdbtk_domain": "", + "contamination": 0.97, + "gtdbtk_class": "", + "gtdbtk_phylum": "", + "num_5s": 0, + "num_23s": 0, + "gtdbtk_genus": "", + "num_t_rna": 28 + } + ], + "unbinned_contig_num": 11313, + "started_at_time": "2021-10-11T02:27:15Z", + "type": "nmdc:MAGsAnalysisActivity", + "ended_at_time": "2021-10-11T03:57:48+00:00", + "output_data_objects": [ + { + "name": "gold:Gp0452679_metabat2 bin checkm quality assessment result", + "description": "metabat2 bin checkm quality assessment result for gold:Gp0452679", + "data_object_type": "CheckM Statistics", + "url": "https://data.microbiomedata.org/data/nmdc:mga0fsjy84/MAGs/nmdc_mga0fsjy84_checkm_qa.out", + "md5_checksum": "d41d8cd98f00b204e9800998ecf8427e", + "id": "nmdc:d41d8cd98f00b204e9800998ecf8427e", + "file_size_bytes": 0 + }, + { + "name": "Gp0127651_tooShort (< 3kb) filtered contigs fasta file by metaBat2", + "description": "tooShort (< 3kb) filtered contigs fasta file by metaBat2 for Gp0127651", + "url": "https://data.microbiomedata.org/data/nmdc:mga08hnt47/MAGs/nmdc_mga08hnt47_bins.tooShort.fa", + "md5_checksum": "6f012bfca6cb653f92eaf927003de0fa", + "id": "nmdc:6f012bfca6cb653f92eaf927003de0fa", + "file_size_bytes": 77381118 + }, + { + "name": "Gp0127651_unbinned fasta file from metabat2", + "description": "unbinned fasta file from metabat2 for Gp0127651", + "url": "https://data.microbiomedata.org/data/nmdc:mga08hnt47/MAGs/nmdc_mga08hnt47_bins.unbinned.fa", + "md5_checksum": "298e0a0c98ebe4fb673da7de9fcb03a2", + "id": "nmdc:298e0a0c98ebe4fb673da7de9fcb03a2", + "file_size_bytes": 17278743 + }, + { + "name": "Gp0127651_metabat2 bin checkm quality assessment result", + "description": "metabat2 bin checkm quality assessment result for Gp0127651", + "data_object_type": "CheckM Statistics", + "url": "https://data.microbiomedata.org/data/nmdc:mga08hnt47/MAGs/nmdc_mga08hnt47_checkm_qa.out", + "md5_checksum": "66fd77d80cc9257da98c5bce4cb30626", + "id": "nmdc:66fd77d80cc9257da98c5bce4cb30626", + "file_size_bytes": 760 + }, + { + "name": "Gp0127651_high-quality and medium-quality bins", + "description": "high-quality and medium-quality bins for Gp0127651", + "data_object_type": "Metagenome Bins", + "url": "https://data.microbiomedata.org/data/nmdc:mga08hnt47/MAGs/nmdc_mga08hnt47_hqmq_bin.zip", + "md5_checksum": "06caec963e007225d1d9411078829100", + "id": "nmdc:06caec963e007225d1d9411078829100", + "file_size_bytes": 182 + }, + { + "name": "Gp0127651_metabat2 bins", + "description": "metabat2 bins for Gp0127651", + "url": "https://data.microbiomedata.org/data/nmdc:mga08hnt47/MAGs/nmdc_mga08hnt47_metabat_bin.zip", + "md5_checksum": "eb5216cc4e09d88c4c59a76c4808a693", + "id": "nmdc:eb5216cc4e09d88c4c59a76c4808a693", + "file_size_bytes": 397044 + } + ] + } + ] + }, + { + "_id": { + "$oid": "649b009773e8249959349b63" + }, + "id": "nmdc:omprc-11-0n8y1d07", + "name": "Riverbed sediment microbial communities from areas with vegetation nearby in Columbia River, Washington, USA - GW-RW S3_20_30", + "description": "Riverbed sediment samples were collected from an area with a lot of surrounding vegetation in a hyporheic zone (subsurface groundwater - surface water mixing zone)", + "has_input": [ + "nmdc:bsm-11-a7fxtx60" + ], + "has_output": [ + "jgi:574fde947ded5e3df1ee1429" + ], + "part_of": [ + "nmdc:sty-11-aygzgv51" + ], + "add_date": "2016-01-11", + "mod_date": "2021-06-15", + "ncbi_project_name": "Riverbed sediment microbial communities from areas with vegetation nearby in Columbia River, Washington, USA - GW-RW S3_20_30", + "omics_type": { + "has_raw_value": "Metagenome" + }, + "principal_investigator": { + "has_raw_value": "James Stegen" + }, + "processing_institution": "JGI", + "type": "nmdc:OmicsProcessing", + "gold_sequencing_project_identifiers": [ + "gold:Gp0127655" + ], + "downstream_workflow_activity_records": [ + { + "_id": { + "$oid": "649b009d6bdd4fd20273c880" + }, + "has_input": [ + "nmdc:898017d076d5d2daaf902e9141f0600a" + ], + "part_of": [ + "nmdc:mga0317978" + ], + "git_url": "https://github.com/microbiomedata/metaG/releases/tag/0.1", + "has_output": [ + "nmdc:04b9014981f7035c39bd7f870613ed93", + "nmdc:b66266969ab3df4c1cb2b16c1fa7d098" + ], + "was_informed_by": "gold:Gp0127655", + "input_read_count": 23985924, + "output_read_bases": 3400452550, + "id": "nmdc:65af38817454a315aeb8c67ab27e1469", + "execution_resource": "NERSC-Cori", + "input_read_bases": 3621874524, + "name": "Read QC Activity for nmdc:mga0317978", + "output_read_count": 22751496, + "started_at_time": "2021-10-11T02:23:42Z", + "type": "nmdc:ReadQCAnalysisActivity", + "ended_at_time": "2021-10-11T03:21:25+00:00", + "output_data_objects": [ + { + "name": "Gp0127655_Filtered Reads", + "description": "Filtered Reads for Gp0127655", + "data_object_type": "Filtered Sequencing Reads", + "url": "https://data.microbiomedata.org/data/nmdc:mga0317978/qa/nmdc_mga0317978_filtered.fastq.gz", + "md5_checksum": "04b9014981f7035c39bd7f870613ed93", + "id": "nmdc:04b9014981f7035c39bd7f870613ed93", + "file_size_bytes": 1880069213 + }, + { + "name": "Gp0127655_Filtered Stats", + "description": "Filtered Stats for Gp0127655", + "data_object_type": "QC Statistics", + "url": "https://data.microbiomedata.org/data/nmdc:mga0317978/qa/nmdc_mga0317978_filterStats.txt", + "md5_checksum": "b66266969ab3df4c1cb2b16c1fa7d098", + "id": "nmdc:b66266969ab3df4c1cb2b16c1fa7d098", + "file_size_bytes": 286 + } + ] + }, + { + "_id": { + "$oid": "649b009bff710ae353f8cf47" + }, + "has_input": [ + "nmdc:04b9014981f7035c39bd7f870613ed93" + ], + "git_url": "https://github.com/microbiomedata/metaG/releases/tag/0.1", + "has_output": [ + "nmdc:46371c7bc8259e459f975f915aaac26f", + "nmdc:5dd9bc51105920f3f629e8106235af3b", + "nmdc:1879e0e9af6d568ac9c7ffdb47fc7f12", + "nmdc:e3f410adc2347396abfdec2a848000d9", + "nmdc:ed6c4f17d6ae759487164ca8ed5edf45", + "nmdc:6d54f73f251de1bd5c4ca8665f098ac0", + "nmdc:1d4f5a605d4549801fda16da567efe56", + "nmdc:8bb5c66575c7c953719ae9947600ad49", + "nmdc:157f7672690ba8207808cc4386ff10a4" + ], + "was_informed_by": "gold:Gp0127655", + "id": "nmdc:65af38817454a315aeb8c67ab27e1469", + "execution_resource": "NERSC-Cori", + "name": "ReadBased Analysis Activity for nmdc:mga0317978", + "started_at_time": "2021-10-11T02:23:42Z", + "type": "nmdc:ReadbasedAnalysis", + "ended_at_time": "2021-10-11T03:21:25+00:00", + "output_data_objects": [ + { + "name": "Gp0127655_Gottcha2 TSV report", + "description": "Gottcha2 TSV report for Gp0127655", + "url": "https://data.microbiomedata.org/data/nmdc:mga0317978/ReadbasedAnalysis/nmdc_mga0317978_gottcha2_report.tsv", + "md5_checksum": "46371c7bc8259e459f975f915aaac26f", + "id": "nmdc:46371c7bc8259e459f975f915aaac26f", + "file_size_bytes": 2178 + }, + { + "name": "Gp0127655_Gottcha2 full TSV report", + "description": "Gottcha2 full TSV report for Gp0127655", + "url": "https://data.microbiomedata.org/data/nmdc:mga0317978/ReadbasedAnalysis/nmdc_mga0317978_gottcha2_report_full.tsv", + "md5_checksum": "5dd9bc51105920f3f629e8106235af3b", + "id": "nmdc:5dd9bc51105920f3f629e8106235af3b", + "file_size_bytes": 697690 + }, + { + "name": "Gp0127655_Gottcha2 Krona HTML report", + "description": "Gottcha2 Krona HTML report for Gp0127655", + "data_object_type": "GOTTCHA2 Krona Plot", + "url": "https://data.microbiomedata.org/data/nmdc:mga0317978/ReadbasedAnalysis/nmdc_mga0317978_gottcha2_krona.html", + "md5_checksum": "1879e0e9af6d568ac9c7ffdb47fc7f12", + "id": "nmdc:1879e0e9af6d568ac9c7ffdb47fc7f12", + "file_size_bytes": 231103 + }, + { + "name": "Gp0127655_Centrifuge classification TSV report", + "description": "Centrifuge classification TSV report for Gp0127655", + "data_object_type": "Centrifuge Taxonomic Classification", + "url": "https://data.microbiomedata.org/data/nmdc:mga0317978/ReadbasedAnalysis/nmdc_mga0317978_centrifuge_classification.tsv", + "md5_checksum": "e3f410adc2347396abfdec2a848000d9", + "id": "nmdc:e3f410adc2347396abfdec2a848000d9", + "file_size_bytes": 1676897166 + }, + { + "name": "Gp0127655_Centrifuge TSV report", + "description": "Centrifuge TSV report for Gp0127655", + "data_object_type": "Centrifuge Classification Report", + "url": "https://data.microbiomedata.org/data/nmdc:mga0317978/ReadbasedAnalysis/nmdc_mga0317978_centrifuge_report.tsv", + "md5_checksum": "ed6c4f17d6ae759487164ca8ed5edf45", + "id": "nmdc:ed6c4f17d6ae759487164ca8ed5edf45", + "file_size_bytes": 253692 + }, + { + "name": "Gp0127655_Centrifuge Krona HTML report", + "description": "Centrifuge Krona HTML report for Gp0127655", + "data_object_type": "Centrifuge Krona Plot", + "url": "https://data.microbiomedata.org/data/nmdc:mga0317978/ReadbasedAnalysis/nmdc_mga0317978_centrifuge_krona.html", + "md5_checksum": "6d54f73f251de1bd5c4ca8665f098ac0", + "id": "nmdc:6d54f73f251de1bd5c4ca8665f098ac0", + "file_size_bytes": 2329422 + }, + { + "name": "Gp0127655_Kraken classification TSV report", + "description": "Kraken classification TSV report for Gp0127655", + "data_object_type": "Kraken2 Taxonomic Classification", + "url": "https://data.microbiomedata.org/data/nmdc:mga0317978/ReadbasedAnalysis/nmdc_mga0317978_kraken2_classification.tsv", + "md5_checksum": "1d4f5a605d4549801fda16da567efe56", + "id": "nmdc:1d4f5a605d4549801fda16da567efe56", + "file_size_bytes": 1336793184 + }, + { + "name": "Gp0127655_Kraken2 TSV report", + "description": "Kraken2 TSV report for Gp0127655", + "data_object_type": "Kraken2 Classification Report", + "url": "https://data.microbiomedata.org/data/nmdc:mga0317978/ReadbasedAnalysis/nmdc_mga0317978_kraken2_report.tsv", + "md5_checksum": "8bb5c66575c7c953719ae9947600ad49", + "id": "nmdc:8bb5c66575c7c953719ae9947600ad49", + "file_size_bytes": 632192 + }, + { + "name": "Gp0127655_Kraken2 Krona HTML report", + "description": "Kraken2 Krona HTML report for Gp0127655", + "data_object_type": "Kraken2 Krona Plot", + "url": "https://data.microbiomedata.org/data/nmdc:mga0317978/ReadbasedAnalysis/nmdc_mga0317978_kraken2_krona.html", + "md5_checksum": "157f7672690ba8207808cc4386ff10a4", + "id": "nmdc:157f7672690ba8207808cc4386ff10a4", + "file_size_bytes": 3946317 + } + ] + }, + { + "_id": { + "$oid": "61e719df833bcf838a701627" + }, + "has_input": [ + "nmdc:04b9014981f7035c39bd7f870613ed93" + ], + "part_of": [ + "nmdc:mga0317978" + ], + "git_url": "https://github.com/microbiomedata/metaG/releases/tag/0.1", + "has_output": [ + "nmdc:46371c7bc8259e459f975f915aaac26f", + "nmdc:5dd9bc51105920f3f629e8106235af3b", + "nmdc:1879e0e9af6d568ac9c7ffdb47fc7f12", + "nmdc:e3f410adc2347396abfdec2a848000d9", + "nmdc:ed6c4f17d6ae759487164ca8ed5edf45", + "nmdc:6d54f73f251de1bd5c4ca8665f098ac0", + "nmdc:1d4f5a605d4549801fda16da567efe56", + "nmdc:8bb5c66575c7c953719ae9947600ad49", + "nmdc:157f7672690ba8207808cc4386ff10a4" + ], + "was_informed_by": "gold:Gp0127655", + "id": "nmdc:65af38817454a315aeb8c67ab27e1469", + "execution_resource": "NERSC-Cori", + "name": "ReadBased Analysis Activity for nmdc:mga0317978", + "started_at_time": "2021-10-11T02:23:42Z", + "type": "nmdc:ReadbasedAnalysis", + "ended_at_time": "2021-10-11T03:21:25+00:00", + "output_data_objects": [ + { + "name": "Gp0127655_Gottcha2 TSV report", + "description": "Gottcha2 TSV report for Gp0127655", + "url": "https://data.microbiomedata.org/data/nmdc:mga0317978/ReadbasedAnalysis/nmdc_mga0317978_gottcha2_report.tsv", + "md5_checksum": "46371c7bc8259e459f975f915aaac26f", + "id": "nmdc:46371c7bc8259e459f975f915aaac26f", + "file_size_bytes": 2178 + }, + { + "name": "Gp0127655_Gottcha2 full TSV report", + "description": "Gottcha2 full TSV report for Gp0127655", + "url": "https://data.microbiomedata.org/data/nmdc:mga0317978/ReadbasedAnalysis/nmdc_mga0317978_gottcha2_report_full.tsv", + "md5_checksum": "5dd9bc51105920f3f629e8106235af3b", + "id": "nmdc:5dd9bc51105920f3f629e8106235af3b", + "file_size_bytes": 697690 + }, + { + "name": "Gp0127655_Gottcha2 Krona HTML report", + "description": "Gottcha2 Krona HTML report for Gp0127655", + "data_object_type": "GOTTCHA2 Krona Plot", + "url": "https://data.microbiomedata.org/data/nmdc:mga0317978/ReadbasedAnalysis/nmdc_mga0317978_gottcha2_krona.html", + "md5_checksum": "1879e0e9af6d568ac9c7ffdb47fc7f12", + "id": "nmdc:1879e0e9af6d568ac9c7ffdb47fc7f12", + "file_size_bytes": 231103 + }, + { + "name": "Gp0127655_Centrifuge classification TSV report", + "description": "Centrifuge classification TSV report for Gp0127655", + "data_object_type": "Centrifuge Taxonomic Classification", + "url": "https://data.microbiomedata.org/data/nmdc:mga0317978/ReadbasedAnalysis/nmdc_mga0317978_centrifuge_classification.tsv", + "md5_checksum": "e3f410adc2347396abfdec2a848000d9", + "id": "nmdc:e3f410adc2347396abfdec2a848000d9", + "file_size_bytes": 1676897166 + }, + { + "name": "Gp0127655_Centrifuge TSV report", + "description": "Centrifuge TSV report for Gp0127655", + "data_object_type": "Centrifuge Classification Report", + "url": "https://data.microbiomedata.org/data/nmdc:mga0317978/ReadbasedAnalysis/nmdc_mga0317978_centrifuge_report.tsv", + "md5_checksum": "ed6c4f17d6ae759487164ca8ed5edf45", + "id": "nmdc:ed6c4f17d6ae759487164ca8ed5edf45", + "file_size_bytes": 253692 + }, + { + "name": "Gp0127655_Centrifuge Krona HTML report", + "description": "Centrifuge Krona HTML report for Gp0127655", + "data_object_type": "Centrifuge Krona Plot", + "url": "https://data.microbiomedata.org/data/nmdc:mga0317978/ReadbasedAnalysis/nmdc_mga0317978_centrifuge_krona.html", + "md5_checksum": "6d54f73f251de1bd5c4ca8665f098ac0", + "id": "nmdc:6d54f73f251de1bd5c4ca8665f098ac0", + "file_size_bytes": 2329422 + }, + { + "name": "Gp0127655_Kraken classification TSV report", + "description": "Kraken classification TSV report for Gp0127655", + "data_object_type": "Kraken2 Taxonomic Classification", + "url": "https://data.microbiomedata.org/data/nmdc:mga0317978/ReadbasedAnalysis/nmdc_mga0317978_kraken2_classification.tsv", + "md5_checksum": "1d4f5a605d4549801fda16da567efe56", + "id": "nmdc:1d4f5a605d4549801fda16da567efe56", + "file_size_bytes": 1336793184 + }, + { + "name": "Gp0127655_Kraken2 TSV report", + "description": "Kraken2 TSV report for Gp0127655", + "data_object_type": "Kraken2 Classification Report", + "url": "https://data.microbiomedata.org/data/nmdc:mga0317978/ReadbasedAnalysis/nmdc_mga0317978_kraken2_report.tsv", + "md5_checksum": "8bb5c66575c7c953719ae9947600ad49", + "id": "nmdc:8bb5c66575c7c953719ae9947600ad49", + "file_size_bytes": 632192 + }, + { + "name": "Gp0127655_Kraken2 Krona HTML report", + "description": "Kraken2 Krona HTML report for Gp0127655", + "data_object_type": "Kraken2 Krona Plot", + "url": "https://data.microbiomedata.org/data/nmdc:mga0317978/ReadbasedAnalysis/nmdc_mga0317978_kraken2_krona.html", + "md5_checksum": "157f7672690ba8207808cc4386ff10a4", + "id": "nmdc:157f7672690ba8207808cc4386ff10a4", + "file_size_bytes": 3946317 + } + ] + }, + { + "_id": { + "$oid": "649b005f2ca5ee4adb139fb3" + }, + "has_input": [ + "nmdc:04b9014981f7035c39bd7f870613ed93" + ], + "part_of": [ + "nmdc:mga0317978" + ], + "ctg_logsum": 170806, + "scaf_logsum": 171254, + "gap_pct": 0.00086, + "git_url": "https://github.com/microbiomedata/metaG/releases/tag/0.1", + "has_output": [ + "nmdc:98bc1e8aa3703e255a930f6c6f923453", + "nmdc:769bd168524b84f2d10dfdb2a42a909d", + "nmdc:5bd5f8108ae1d767ea5a79ebde3d83de", + "nmdc:933de420870147e58137b328e0d54d87", + "nmdc:2b699163734ee73cbccc94e4767d36c0" + ], + "asm_score": 3.393, + "was_informed_by": "gold:Gp0127655", + "ctg_powsum": 18408, + "scaf_max": 16317, + "id": "nmdc:65af38817454a315aeb8c67ab27e1469", + "scaf_powsum": 18458, + "execution_resource": "NERSC-Cori", + "contigs": 182939, + "name": "Assembly Activity for nmdc:mga0317978", + "ctg_max": 16317, + "gc_std": 0.09607, + "contig_bp": 86362605, + "gc_avg": 0.63666, + "started_at_time": "2021-10-11T02:23:42Z", + "scaf_bp": 86363345, + "type": "nmdc:MetagenomeAssembly", + "scaffolds": 182865, + "ended_at_time": "2021-10-11T03:21:25+00:00", + "ctg_l50": 456, + "ctg_l90": 289, + "ctg_n50": 53760, + "ctg_n90": 154881, + "scaf_l50": 457, + "scaf_l90": 289, + "scaf_n50": 53484, + "scaf_n90": 154812, + "output_data_objects": [ + { + "name": "Gp0127655_Assembled contigs fasta", + "description": "Assembled contigs fasta for Gp0127655", + "data_object_type": "Assembly Contigs", + "url": "https://data.microbiomedata.org/data/nmdc:mga0317978/assembly/nmdc_mga0317978_contigs.fna", + "md5_checksum": "98bc1e8aa3703e255a930f6c6f923453", + "id": "nmdc:98bc1e8aa3703e255a930f6c6f923453", + "file_size_bytes": 93445462 + }, + { + "name": "Gp0127655_Assembled scaffold fasta", + "description": "Assembled scaffold fasta for Gp0127655", + "data_object_type": "Assembly Scaffolds", + "url": "https://data.microbiomedata.org/data/nmdc:mga0317978/assembly/nmdc_mga0317978_scaffolds.fna", + "md5_checksum": "769bd168524b84f2d10dfdb2a42a909d", + "id": "nmdc:769bd168524b84f2d10dfdb2a42a909d", + "file_size_bytes": 92895420 + }, + { + "name": "Gp0127655_Metagenome Contig Coverage Stats", + "description": "Metagenome Contig Coverage Stats for Gp0127655", + "url": "https://data.microbiomedata.org/data/nmdc:mga0317978/assembly/nmdc_mga0317978_covstats.txt", + "md5_checksum": "5bd5f8108ae1d767ea5a79ebde3d83de", + "id": "nmdc:5bd5f8108ae1d767ea5a79ebde3d83de", + "file_size_bytes": 14474338 + }, + { + "name": "Gp0127655_Assembled AGP file", + "description": "Assembled AGP file for Gp0127655", + "data_object_type": "Assembly AGP", + "url": "https://data.microbiomedata.org/data/nmdc:mga0317978/assembly/nmdc_mga0317978_assembly.agp", + "md5_checksum": "933de420870147e58137b328e0d54d87", + "id": "nmdc:933de420870147e58137b328e0d54d87", + "file_size_bytes": 13523380 + }, + { + "name": "Gp0127655_Metagenome Alignment BAM file", + "description": "Metagenome Alignment BAM file for Gp0127655", + "data_object_type": "Assembly Coverage BAM", + "url": "https://data.microbiomedata.org/data/nmdc:mga0317978/assembly/nmdc_mga0317978_pairedMapped_sorted.bam", + "md5_checksum": "2b699163734ee73cbccc94e4767d36c0", + "id": "nmdc:2b699163734ee73cbccc94e4767d36c0", + "file_size_bytes": 2057808015 + } + ] + }, + { + "_id": { + "$oid": "649b005bbf2caae0415ef9c4" + }, + "has_input": [ + "nmdc:98bc1e8aa3703e255a930f6c6f923453" + ], + "part_of": [ + "nmdc:mga0317978" + ], + "git_url": "https://github.com/microbiomedata/metaG/releases/tag/0.1", + "has_output": [ + "nmdc:9b57eb78fd2e8f0af8b55cf5fb3a2bab", + "nmdc:6b11bf4eaf9723559b6015296b802252", + "nmdc:0940fbdf18becd76e7dd3abcfaba12b5", + "nmdc:a1cd7e1382fd1818c42860a0555f1f57", + "nmdc:3a27c2da0a3d05e4c44547afb2875195", + "nmdc:1c8529ca35ee0b275b8ca3d2b5c565ec", + "nmdc:8bf1c44c4a9fc7f55dcf58be1273b46f", + "nmdc:acb8325b4800ff62e3fda52b21b92ecc", + "nmdc:a044873e470ce9f2be06ae99cd1cc242", + "nmdc:40f0627934454a354886609d7068a12c", + "nmdc:60255b31e223a7b5bad8f186b6f65d7c", + "nmdc:b8d559d4ea779c4076e3c9e1e92bddcf" + ], + "was_informed_by": "gold:Gp0127655", + "id": "nmdc:65af38817454a315aeb8c67ab27e1469", + "execution_resource": "NERSC-Cori", + "name": "Annotation Activity for nmdc:mga0317978", + "started_at_time": "2021-10-11T02:23:42Z", + "type": "nmdc:MetagenomeAnnotationActivity", + "ended_at_time": "2021-10-11T03:21:25+00:00", + "output_data_objects": [ + { + "name": "Gp0127655_Protein FAA", + "description": "Protein FAA for Gp0127655", + "data_object_type": "Annotation Amino Acid FASTA", + "url": "https://data.microbiomedata.org/data/nmdc:mga0317978/annotation/nmdc_mga0317978_proteins.faa", + "md5_checksum": "9b57eb78fd2e8f0af8b55cf5fb3a2bab", + "id": "nmdc:9b57eb78fd2e8f0af8b55cf5fb3a2bab", + "file_size_bytes": 53898203 + }, + { + "name": "Gp0127655_Structural annotation GFF file", + "description": "Structural annotation GFF file for Gp0127655", + "data_object_type": "Structural Annotation GFF", + "url": "https://data.microbiomedata.org/data/nmdc:mga0317978/annotation/nmdc_mga0317978_structural_annotation.gff", + "md5_checksum": "6b11bf4eaf9723559b6015296b802252", + "id": "nmdc:6b11bf4eaf9723559b6015296b802252", + "file_size_bytes": 2515 + }, + { + "name": "Gp0127655_Functional annotation GFF file", + "description": "Functional annotation GFF file for Gp0127655", + "data_object_type": "Functional Annotation GFF", + "url": "https://data.microbiomedata.org/data/nmdc:mga0317978/annotation/nmdc_mga0317978_functional_annotation.gff", + "md5_checksum": "0940fbdf18becd76e7dd3abcfaba12b5", + "id": "nmdc:0940fbdf18becd76e7dd3abcfaba12b5", + "file_size_bytes": 61535970 + }, + { + "name": "Gp0127655_KO TSV file", + "description": "KO TSV file for Gp0127655", + "data_object_type": "Annotation KEGG Orthology", + "url": "https://data.microbiomedata.org/data/nmdc:mga0317978/annotation/nmdc_mga0317978_ko.tsv", + "md5_checksum": "a1cd7e1382fd1818c42860a0555f1f57", + "id": "nmdc:a1cd7e1382fd1818c42860a0555f1f57", + "file_size_bytes": 6994761 + }, + { + "name": "Gp0127655_EC TSV file", + "description": "EC TSV file for Gp0127655", + "data_object_type": "Annotation Enzyme Commission", + "url": "https://data.microbiomedata.org/data/nmdc:mga0317978/annotation/nmdc_mga0317978_ec.tsv", + "md5_checksum": "3a27c2da0a3d05e4c44547afb2875195", + "id": "nmdc:3a27c2da0a3d05e4c44547afb2875195", + "file_size_bytes": 4598688 + }, + { + "name": "Gp0127655_COG GFF file", + "description": "COG GFF file for Gp0127655", + "url": "https://data.microbiomedata.org/data/nmdc:mga0317978/annotation/nmdc_mga0317978_cog.gff", + "md5_checksum": "1c8529ca35ee0b275b8ca3d2b5c565ec", + "id": "nmdc:1c8529ca35ee0b275b8ca3d2b5c565ec", + "file_size_bytes": 36290392 + }, + { + "name": "Gp0127655_PFAM GFF file", + "description": "PFAM GFF file for Gp0127655", + "url": "https://data.microbiomedata.org/data/nmdc:mga0317978/annotation/nmdc_mga0317978_pfam.gff", + "md5_checksum": "8bf1c44c4a9fc7f55dcf58be1273b46f", + "id": "nmdc:8bf1c44c4a9fc7f55dcf58be1273b46f", + "file_size_bytes": 27016921 + }, + { + "name": "Gp0127655_TigrFam GFF file", + "description": "TigrFam GFF file for Gp0127655", + "url": "https://data.microbiomedata.org/data/nmdc:mga0317978/annotation/nmdc_mga0317978_tigrfam.gff", + "md5_checksum": "acb8325b4800ff62e3fda52b21b92ecc", + "id": "nmdc:acb8325b4800ff62e3fda52b21b92ecc", + "file_size_bytes": 2768301 + }, + { + "name": "Gp0127655_SMART GFF file", + "description": "SMART GFF file for Gp0127655", + "url": "https://data.microbiomedata.org/data/nmdc:mga0317978/annotation/nmdc_mga0317978_smart.gff", + "md5_checksum": "a044873e470ce9f2be06ae99cd1cc242", + "id": "nmdc:a044873e470ce9f2be06ae99cd1cc242", + "file_size_bytes": 7806208 + }, + { + "name": "Gp0127655_SuperFam GFF file", + "description": "SuperFam GFF file for Gp0127655", + "url": "https://data.microbiomedata.org/data/nmdc:mga0317978/annotation/nmdc_mga0317978_supfam.gff", + "md5_checksum": "40f0627934454a354886609d7068a12c", + "id": "nmdc:40f0627934454a354886609d7068a12c", + "file_size_bytes": 45276498 + }, + { + "name": "Gp0127655_Cath FunFam GFF file", + "description": "Cath FunFam GFF file for Gp0127655", + "url": "https://data.microbiomedata.org/data/nmdc:mga0317978/annotation/nmdc_mga0317978_cath_funfam.gff", + "md5_checksum": "60255b31e223a7b5bad8f186b6f65d7c", + "id": "nmdc:60255b31e223a7b5bad8f186b6f65d7c", + "file_size_bytes": 33794110 + }, + { + "name": "Gp0127655_KO_EC GFF file", + "description": "KO_EC GFF file for Gp0127655", + "url": "https://data.microbiomedata.org/data/nmdc:mga0317978/annotation/nmdc_mga0317978_ko_ec.gff", + "md5_checksum": "b8d559d4ea779c4076e3c9e1e92bddcf", + "id": "nmdc:b8d559d4ea779c4076e3c9e1e92bddcf", + "file_size_bytes": 22249696 + } + ] + }, + { + "_id": { + "$oid": "649b0052ec087f6bbab34725" + }, + "has_input": [ + "nmdc:98bc1e8aa3703e255a930f6c6f923453", + "nmdc:2b699163734ee73cbccc94e4767d36c0", + "nmdc:0940fbdf18becd76e7dd3abcfaba12b5" + ], + "too_short_contig_num": 173159, + "part_of": [ + "nmdc:mga0317978" + ], + "binned_contig_num": 412, + "git_url": "https://github.com/microbiomedata/metaG/releases/tag/0.1", + "has_output": [ + "nmdc:d41d8cd98f00b204e9800998ecf8427e", + "nmdc:58f2cc63798346be853bccacdd7ca30d", + "nmdc:8b2dbaba9c1219096831ad99d8b7c056", + "nmdc:c562d8d5ccc986d672b4e48e006fafab", + "nmdc:2eaf0a7d519ac7c034d63797d735080c", + "nmdc:668a0a6dbd840dd2178a00c2af4c2237" + ], + "was_informed_by": "gold:Gp0127655", + "input_contig_num": 182939, + "id": "nmdc:65af38817454a315aeb8c67ab27e1469", + "execution_resource": "NERSC-Cori", + "name": "MAGs Analysis Activity for nmdc:mga0317978", + "mags_list": [ + { + "number_of_contig": 412, + "completeness": 27.84, + "bin_name": "bins.1", + "gene_count": 2086, + "bin_quality": "LQ", + "gtdbtk_species": "", + "gtdbtk_order": "", + "num_16s": 0, + "gtdbtk_family": "", + "gtdbtk_domain": "", + "contamination": 0.0, + "gtdbtk_class": "", + "gtdbtk_phylum": "", + "num_5s": 0, + "num_23s": 0, + "gtdbtk_genus": "", + "num_t_rna": 22 + } + ], + "unbinned_contig_num": 9368, + "started_at_time": "2021-10-11T02:23:42Z", + "type": "nmdc:MAGsAnalysisActivity", + "ended_at_time": "2021-10-11T03:21:25+00:00", + "output_data_objects": [ + { + "name": "gold:Gp0452679_metabat2 bin checkm quality assessment result", + "description": "metabat2 bin checkm quality assessment result for gold:Gp0452679", + "data_object_type": "CheckM Statistics", + "url": "https://data.microbiomedata.org/data/nmdc:mga0fsjy84/MAGs/nmdc_mga0fsjy84_checkm_qa.out", + "md5_checksum": "d41d8cd98f00b204e9800998ecf8427e", + "id": "nmdc:d41d8cd98f00b204e9800998ecf8427e", + "file_size_bytes": 0 + }, + { + "name": "Gp0127655_tooShort (< 3kb) filtered contigs fasta file by metaBat2", + "description": "tooShort (< 3kb) filtered contigs fasta file by metaBat2 for Gp0127655", + "url": "https://data.microbiomedata.org/data/nmdc:mga0317978/MAGs/nmdc_mga0317978_bins.tooShort.fa", + "md5_checksum": "58f2cc63798346be853bccacdd7ca30d", + "id": "nmdc:58f2cc63798346be853bccacdd7ca30d", + "file_size_bytes": 77075570 + }, + { + "name": "Gp0127655_unbinned fasta file from metabat2", + "description": "unbinned fasta file from metabat2 for Gp0127655", + "url": "https://data.microbiomedata.org/data/nmdc:mga0317978/MAGs/nmdc_mga0317978_bins.unbinned.fa", + "md5_checksum": "8b2dbaba9c1219096831ad99d8b7c056", + "id": "nmdc:8b2dbaba9c1219096831ad99d8b7c056", + "file_size_bytes": 14551969 + }, + { + "name": "Gp0127655_metabat2 bin checkm quality assessment result", + "description": "metabat2 bin checkm quality assessment result for Gp0127655", + "data_object_type": "CheckM Statistics", + "url": "https://data.microbiomedata.org/data/nmdc:mga0317978/MAGs/nmdc_mga0317978_checkm_qa.out", + "md5_checksum": "c562d8d5ccc986d672b4e48e006fafab", + "id": "nmdc:c562d8d5ccc986d672b4e48e006fafab", + "file_size_bytes": 775 + }, + { + "name": "Gp0127655_high-quality and medium-quality bins", + "description": "high-quality and medium-quality bins for Gp0127655", + "data_object_type": "Metagenome Bins", + "url": "https://data.microbiomedata.org/data/nmdc:mga0317978/MAGs/nmdc_mga0317978_hqmq_bin.zip", + "md5_checksum": "2eaf0a7d519ac7c034d63797d735080c", + "id": "nmdc:2eaf0a7d519ac7c034d63797d735080c", + "file_size_bytes": 182 + }, + { + "name": "Gp0127655_metabat2 bins", + "description": "metabat2 bins for Gp0127655", + "url": "https://data.microbiomedata.org/data/nmdc:mga0317978/MAGs/nmdc_mga0317978_metabat_bin.zip", + "md5_checksum": "668a0a6dbd840dd2178a00c2af4c2237", + "id": "nmdc:668a0a6dbd840dd2178a00c2af4c2237", + "file_size_bytes": 527634 + } + ] + } + ] + }, + { + "_id": { + "$oid": "649b009773e8249959349b64" + }, + "id": "nmdc:omprc-11-p1735e67", + "name": "Riverbed sediment microbial communities from areas with vegetation nearby in Columbia River, Washington, USA - GW-RW S2_0_10", + "description": "Riverbed sediment samples were collected from an area with a lot of surrounding vegetation in a hyporheic zone (subsurface groundwater - surface water mixing zone)", + "has_input": [ + "nmdc:bsm-11-k4wa0808" + ], + "has_output": [ + "jgi:574fde937ded5e3df1ee1428" + ], + "part_of": [ + "nmdc:sty-11-aygzgv51" + ], + "add_date": "2016-01-11", + "mod_date": "2021-06-15", + "ncbi_project_name": "Riverbed sediment microbial communities from areas with vegetation nearby in Columbia River, Washington, USA - GW-RW S2_0_10", + "omics_type": { + "has_raw_value": "Metagenome" + }, + "principal_investigator": { + "has_raw_value": "James Stegen" + }, + "processing_institution": "JGI", + "type": "nmdc:OmicsProcessing", + "gold_sequencing_project_identifiers": [ + "gold:Gp0127653" + ], + "downstream_workflow_activity_records": [ + { + "_id": { + "$oid": "649b009d6bdd4fd20273c878" + }, + "has_input": [ + "nmdc:84ffabc3fbd7e759cd2352ec513b89a0" + ], + "part_of": [ + "nmdc:mga079y988" + ], + "git_url": "https://github.com/microbiomedata/metaG/releases/tag/0.1", + "has_output": [ + "nmdc:8eec0e9c14abb418b906504d1675ecc5", + "nmdc:5d07358bbc48f25e157ffc91ea7ae3e0" + ], + "was_informed_by": "gold:Gp0127653", + "input_read_count": 20780788, + "output_read_bases": 2918466866, + "id": "nmdc:676183a611b251dc6b8f0b8ca600181b", + "execution_resource": "NERSC-Cori", + "input_read_bases": 3137898988, + "name": "Read QC Activity for nmdc:mga079y988", + "output_read_count": 19516330, + "started_at_time": "2021-10-11T02:23:35Z", + "type": "nmdc:ReadQCAnalysisActivity", + "ended_at_time": "2021-11-13T18:52:13+00:00", + "output_data_objects": [ + { + "name": "Gp0127653_Filtered Reads", + "description": "Filtered Reads for Gp0127653", + "data_object_type": "Filtered Sequencing Reads", + "url": "https://data.microbiomedata.org/data/nmdc:mga079y988/qa/nmdc_mga079y988_filtered.fastq.gz", + "md5_checksum": "8eec0e9c14abb418b906504d1675ecc5", + "id": "nmdc:8eec0e9c14abb418b906504d1675ecc5", + "file_size_bytes": 1661017378 + }, + { + "name": "Gp0127653_Filtered Stats", + "description": "Filtered Stats for Gp0127653", + "data_object_type": "QC Statistics", + "url": "https://data.microbiomedata.org/data/nmdc:mga079y988/qa/nmdc_mga079y988_filterStats.txt", + "md5_checksum": "5d07358bbc48f25e157ffc91ea7ae3e0", + "id": "nmdc:5d07358bbc48f25e157ffc91ea7ae3e0", + "file_size_bytes": 286 + } + ] + }, + { + "_id": { + "$oid": "649b009bff710ae353f8cf3d" + }, + "has_input": [ + "nmdc:8eec0e9c14abb418b906504d1675ecc5" + ], + "git_url": "https://github.com/microbiomedata/metaG/releases/tag/0.1", + "has_output": [ + "nmdc:fbbad3e21108a372e3d53c9ee8fc3cd5", + "nmdc:dbf03e26f7e1529762830161fe1f1906", + "nmdc:284ce1b28b8964cb525025d678277dba", + "nmdc:a379527f61806391e42b3512146013a8", + "nmdc:3659ac6c99dea0fb1385c58eac8b1335", + "nmdc:3219058371bf2f8081b2dd2b434ec145", + "nmdc:be29ebcd7358653afec7381f9ca43431", + "nmdc:a9e6ab6db23ddce02317e3e21ea3f618", + "nmdc:4c1aae1a46e51359f9146e48fff0e7f0" + ], + "was_informed_by": "gold:Gp0127653", + "id": "nmdc:676183a611b251dc6b8f0b8ca600181b", + "execution_resource": "NERSC-Cori", + "name": "ReadBased Analysis Activity for nmdc:mga079y988", + "started_at_time": "2021-10-11T02:23:35Z", + "type": "nmdc:ReadbasedAnalysis", + "ended_at_time": "2021-11-13T18:52:13+00:00", + "output_data_objects": [ + { + "name": "Gp0127653_Gottcha2 TSV report", + "description": "Gottcha2 TSV report for Gp0127653", + "url": "https://data.microbiomedata.org/data/nmdc:mga079y988/ReadbasedAnalysis/nmdc_mga079y988_gottcha2_report.tsv", + "md5_checksum": "fbbad3e21108a372e3d53c9ee8fc3cd5", + "id": "nmdc:fbbad3e21108a372e3d53c9ee8fc3cd5", + "file_size_bytes": 3812 + }, + { + "name": "Gp0127653_Gottcha2 full TSV report", + "description": "Gottcha2 full TSV report for Gp0127653", + "url": "https://data.microbiomedata.org/data/nmdc:mga079y988/ReadbasedAnalysis/nmdc_mga079y988_gottcha2_report_full.tsv", + "md5_checksum": "dbf03e26f7e1529762830161fe1f1906", + "id": "nmdc:dbf03e26f7e1529762830161fe1f1906", + "file_size_bytes": 857087 + }, + { + "name": "Gp0127653_Gottcha2 Krona HTML report", + "description": "Gottcha2 Krona HTML report for Gp0127653", + "data_object_type": "GOTTCHA2 Krona Plot", + "url": "https://data.microbiomedata.org/data/nmdc:mga079y988/ReadbasedAnalysis/nmdc_mga079y988_gottcha2_krona.html", + "md5_checksum": "284ce1b28b8964cb525025d678277dba", + "id": "nmdc:284ce1b28b8964cb525025d678277dba", + "file_size_bytes": 235621 + }, + { + "name": "Gp0127653_Centrifuge classification TSV report", + "description": "Centrifuge classification TSV report for Gp0127653", + "data_object_type": "Centrifuge Taxonomic Classification", + "url": "https://data.microbiomedata.org/data/nmdc:mga079y988/ReadbasedAnalysis/nmdc_mga079y988_centrifuge_classification.tsv", + "md5_checksum": "a379527f61806391e42b3512146013a8", + "id": "nmdc:a379527f61806391e42b3512146013a8", + "file_size_bytes": 1437707313 + }, + { + "name": "Gp0127653_Centrifuge TSV report", + "description": "Centrifuge TSV report for Gp0127653", + "data_object_type": "Centrifuge Classification Report", + "url": "https://data.microbiomedata.org/data/nmdc:mga079y988/ReadbasedAnalysis/nmdc_mga079y988_centrifuge_report.tsv", + "md5_checksum": "3659ac6c99dea0fb1385c58eac8b1335", + "id": "nmdc:3659ac6c99dea0fb1385c58eac8b1335", + "file_size_bytes": 255105 + }, + { + "name": "Gp0127653_Centrifuge Krona HTML report", + "description": "Centrifuge Krona HTML report for Gp0127653", + "data_object_type": "Centrifuge Krona Plot", + "url": "https://data.microbiomedata.org/data/nmdc:mga079y988/ReadbasedAnalysis/nmdc_mga079y988_centrifuge_krona.html", + "md5_checksum": "3219058371bf2f8081b2dd2b434ec145", + "id": "nmdc:3219058371bf2f8081b2dd2b434ec145", + "file_size_bytes": 2327985 + }, + { + "name": "Gp0127653_Kraken classification TSV report", + "description": "Kraken classification TSV report for Gp0127653", + "data_object_type": "Kraken2 Taxonomic Classification", + "url": "https://data.microbiomedata.org/data/nmdc:mga079y988/ReadbasedAnalysis/nmdc_mga079y988_kraken2_classification.tsv", + "md5_checksum": "be29ebcd7358653afec7381f9ca43431", + "id": "nmdc:be29ebcd7358653afec7381f9ca43431", + "file_size_bytes": 1164013677 + }, + { + "name": "Gp0127653_Kraken2 TSV report", + "description": "Kraken2 TSV report for Gp0127653", + "data_object_type": "Kraken2 Classification Report", + "url": "https://data.microbiomedata.org/data/nmdc:mga079y988/ReadbasedAnalysis/nmdc_mga079y988_kraken2_report.tsv", + "md5_checksum": "a9e6ab6db23ddce02317e3e21ea3f618", + "id": "nmdc:a9e6ab6db23ddce02317e3e21ea3f618", + "file_size_bytes": 638368 + }, + { + "name": "Gp0127653_Kraken2 Krona HTML report", + "description": "Kraken2 Krona HTML report for Gp0127653", + "data_object_type": "Kraken2 Krona Plot", + "url": "https://data.microbiomedata.org/data/nmdc:mga079y988/ReadbasedAnalysis/nmdc_mga079y988_kraken2_krona.html", + "md5_checksum": "4c1aae1a46e51359f9146e48fff0e7f0", + "id": "nmdc:4c1aae1a46e51359f9146e48fff0e7f0", + "file_size_bytes": 3982485 + } + ] + }, + { + "_id": { + "$oid": "61e719b9833bcf838a70124b" + }, + "has_input": [ + "nmdc:8eec0e9c14abb418b906504d1675ecc5" + ], + "part_of": [ + "nmdc:mga079y988" + ], + "git_url": "https://github.com/microbiomedata/metaG/releases/tag/0.1", + "has_output": [ + "nmdc:fbbad3e21108a372e3d53c9ee8fc3cd5", + "nmdc:dbf03e26f7e1529762830161fe1f1906", + "nmdc:284ce1b28b8964cb525025d678277dba", + "nmdc:a379527f61806391e42b3512146013a8", + "nmdc:3659ac6c99dea0fb1385c58eac8b1335", + "nmdc:3219058371bf2f8081b2dd2b434ec145", + "nmdc:be29ebcd7358653afec7381f9ca43431", + "nmdc:a9e6ab6db23ddce02317e3e21ea3f618", + "nmdc:4c1aae1a46e51359f9146e48fff0e7f0" + ], + "was_informed_by": "gold:Gp0127653", + "id": "nmdc:676183a611b251dc6b8f0b8ca600181b", + "execution_resource": "NERSC-Cori", + "name": "ReadBased Analysis Activity for nmdc:mga079y988", + "started_at_time": "2021-10-11T02:23:35Z", + "type": "nmdc:ReadbasedAnalysis", + "ended_at_time": "2021-11-13T18:52:13+00:00", + "output_data_objects": [ + { + "name": "Gp0127653_Gottcha2 TSV report", + "description": "Gottcha2 TSV report for Gp0127653", + "url": "https://data.microbiomedata.org/data/nmdc:mga079y988/ReadbasedAnalysis/nmdc_mga079y988_gottcha2_report.tsv", + "md5_checksum": "fbbad3e21108a372e3d53c9ee8fc3cd5", + "id": "nmdc:fbbad3e21108a372e3d53c9ee8fc3cd5", + "file_size_bytes": 3812 + }, + { + "name": "Gp0127653_Gottcha2 full TSV report", + "description": "Gottcha2 full TSV report for Gp0127653", + "url": "https://data.microbiomedata.org/data/nmdc:mga079y988/ReadbasedAnalysis/nmdc_mga079y988_gottcha2_report_full.tsv", + "md5_checksum": "dbf03e26f7e1529762830161fe1f1906", + "id": "nmdc:dbf03e26f7e1529762830161fe1f1906", + "file_size_bytes": 857087 + }, + { + "name": "Gp0127653_Gottcha2 Krona HTML report", + "description": "Gottcha2 Krona HTML report for Gp0127653", + "data_object_type": "GOTTCHA2 Krona Plot", + "url": "https://data.microbiomedata.org/data/nmdc:mga079y988/ReadbasedAnalysis/nmdc_mga079y988_gottcha2_krona.html", + "md5_checksum": "284ce1b28b8964cb525025d678277dba", + "id": "nmdc:284ce1b28b8964cb525025d678277dba", + "file_size_bytes": 235621 + }, + { + "name": "Gp0127653_Centrifuge classification TSV report", + "description": "Centrifuge classification TSV report for Gp0127653", + "data_object_type": "Centrifuge Taxonomic Classification", + "url": "https://data.microbiomedata.org/data/nmdc:mga079y988/ReadbasedAnalysis/nmdc_mga079y988_centrifuge_classification.tsv", + "md5_checksum": "a379527f61806391e42b3512146013a8", + "id": "nmdc:a379527f61806391e42b3512146013a8", + "file_size_bytes": 1437707313 + }, + { + "name": "Gp0127653_Centrifuge TSV report", + "description": "Centrifuge TSV report for Gp0127653", + "data_object_type": "Centrifuge Classification Report", + "url": "https://data.microbiomedata.org/data/nmdc:mga079y988/ReadbasedAnalysis/nmdc_mga079y988_centrifuge_report.tsv", + "md5_checksum": "3659ac6c99dea0fb1385c58eac8b1335", + "id": "nmdc:3659ac6c99dea0fb1385c58eac8b1335", + "file_size_bytes": 255105 + }, + { + "name": "Gp0127653_Centrifuge Krona HTML report", + "description": "Centrifuge Krona HTML report for Gp0127653", + "data_object_type": "Centrifuge Krona Plot", + "url": "https://data.microbiomedata.org/data/nmdc:mga079y988/ReadbasedAnalysis/nmdc_mga079y988_centrifuge_krona.html", + "md5_checksum": "3219058371bf2f8081b2dd2b434ec145", + "id": "nmdc:3219058371bf2f8081b2dd2b434ec145", + "file_size_bytes": 2327985 + }, + { + "name": "Gp0127653_Kraken classification TSV report", + "description": "Kraken classification TSV report for Gp0127653", + "data_object_type": "Kraken2 Taxonomic Classification", + "url": "https://data.microbiomedata.org/data/nmdc:mga079y988/ReadbasedAnalysis/nmdc_mga079y988_kraken2_classification.tsv", + "md5_checksum": "be29ebcd7358653afec7381f9ca43431", + "id": "nmdc:be29ebcd7358653afec7381f9ca43431", + "file_size_bytes": 1164013677 + }, + { + "name": "Gp0127653_Kraken2 TSV report", + "description": "Kraken2 TSV report for Gp0127653", + "data_object_type": "Kraken2 Classification Report", + "url": "https://data.microbiomedata.org/data/nmdc:mga079y988/ReadbasedAnalysis/nmdc_mga079y988_kraken2_report.tsv", + "md5_checksum": "a9e6ab6db23ddce02317e3e21ea3f618", + "id": "nmdc:a9e6ab6db23ddce02317e3e21ea3f618", + "file_size_bytes": 638368 + }, + { + "name": "Gp0127653_Kraken2 Krona HTML report", + "description": "Kraken2 Krona HTML report for Gp0127653", + "data_object_type": "Kraken2 Krona Plot", + "url": "https://data.microbiomedata.org/data/nmdc:mga079y988/ReadbasedAnalysis/nmdc_mga079y988_kraken2_krona.html", + "md5_checksum": "4c1aae1a46e51359f9146e48fff0e7f0", + "id": "nmdc:4c1aae1a46e51359f9146e48fff0e7f0", + "file_size_bytes": 3982485 + } + ] + }, + { + "_id": { + "$oid": "649b005f2ca5ee4adb139faa" + }, + "has_input": [ + "nmdc:8eec0e9c14abb418b906504d1675ecc5" + ], + "part_of": [ + "nmdc:mga079y988" + ], + "ctg_logsum": 9125.582, + "scaf_logsum": 9156.336, + "gap_pct": 0.00094, + "git_url": "https://github.com/microbiomedata/metaG/releases/tag/0.1", + "has_output": [ + "nmdc:0f2b82878f54787c127bf03338d5c605", + "nmdc:02f8e7222e9e6f45c388a189ca66e1f9", + "nmdc:eea8a4b58ca07019d0050b030be3a3d1", + "nmdc:44b1ad59bd14c3367ac0fa2ca37aa057", + "nmdc:ccd5ba8558a92751c59989aa81054e1a" + ], + "asm_score": 13.921, + "was_informed_by": "gold:Gp0127653", + "ctg_powsum": 1096.518, + "scaf_max": 58655, + "id": "nmdc:676183a611b251dc6b8f0b8ca600181b", + "scaf_powsum": 1101.795, + "execution_resource": "NERSC-Cori", + "contigs": 48932, + "name": "Assembly Activity for nmdc:mga079y988", + "ctg_max": 58655, + "gc_std": 0.10928, + "gc_avg": 0.57867, + "contig_bp": 16963869, + "started_at_time": "2021-10-11T02:23:35Z", + "scaf_bp": 16964029, + "type": "nmdc:MetagenomeAssembly", + "scaffolds": 48925, + "ended_at_time": "2021-11-13T18:52:13+00:00", + "ctg_l50": 309, + "ctg_l90": 281, + "ctg_n50": 19544, + "ctg_n90": 43034, + "scaf_l50": 309, + "scaf_l90": 281, + "scaf_n50": 19539, + "scaf_n90": 43028, + "scaf_l_gt50k": 58655, + "scaf_n_gt50k": 1, + "scaf_pct_gt50k": 0.34576103, + "output_data_objects": [ + { + "name": "Gp0127653_Assembled contigs fasta", + "description": "Assembled contigs fasta for Gp0127653", + "data_object_type": "Assembly Contigs", + "url": "https://data.microbiomedata.org/data/nmdc:mga079y988/assembly/nmdc_mga079y988_contigs.fna", + "md5_checksum": "0f2b82878f54787c127bf03338d5c605", + "id": "nmdc:0f2b82878f54787c127bf03338d5c605", + "file_size_bytes": 18722308 + }, + { + "name": "Gp0127653_Assembled scaffold fasta", + "description": "Assembled scaffold fasta for Gp0127653", + "data_object_type": "Assembly Scaffolds", + "url": "https://data.microbiomedata.org/data/nmdc:mga079y988/assembly/nmdc_mga079y988_scaffolds.fna", + "md5_checksum": "02f8e7222e9e6f45c388a189ca66e1f9", + "id": "nmdc:02f8e7222e9e6f45c388a189ca66e1f9", + "file_size_bytes": 18575622 + }, + { + "name": "Gp0127653_Metagenome Contig Coverage Stats", + "description": "Metagenome Contig Coverage Stats for Gp0127653", + "url": "https://data.microbiomedata.org/data/nmdc:mga079y988/assembly/nmdc_mga079y988_covstats.txt", + "md5_checksum": "eea8a4b58ca07019d0050b030be3a3d1", + "id": "nmdc:eea8a4b58ca07019d0050b030be3a3d1", + "file_size_bytes": 3824141 + }, + { + "name": "Gp0127653_Assembled AGP file", + "description": "Assembled AGP file for Gp0127653", + "data_object_type": "Assembly AGP", + "url": "https://data.microbiomedata.org/data/nmdc:mga079y988/assembly/nmdc_mga079y988_assembly.agp", + "md5_checksum": "44b1ad59bd14c3367ac0fa2ca37aa057", + "id": "nmdc:44b1ad59bd14c3367ac0fa2ca37aa057", + "file_size_bytes": 3551123 + }, + { + "name": "Gp0127653_Metagenome Alignment BAM file", + "description": "Metagenome Alignment BAM file for Gp0127653", + "data_object_type": "Assembly Coverage BAM", + "url": "https://data.microbiomedata.org/data/nmdc:mga079y988/assembly/nmdc_mga079y988_pairedMapped_sorted.bam", + "md5_checksum": "ccd5ba8558a92751c59989aa81054e1a", + "id": "nmdc:ccd5ba8558a92751c59989aa81054e1a", + "file_size_bytes": 1757373378 + } + ] + }, + { + "_id": { + "$oid": "649b005bbf2caae0415ef9be" + }, + "has_input": [ + "nmdc:0f2b82878f54787c127bf03338d5c605" + ], + "part_of": [ + "nmdc:mga079y988" + ], + "git_url": "https://github.com/microbiomedata/metaG/releases/tag/0.1", + "has_output": [ + "nmdc:81f16ca99f73a3314a66e6b24d23376f", + "nmdc:66bb16ef28196379647d319da50426dd", + "nmdc:1e7dac5f12cc086509ff905f7133b15a", + "nmdc:2a7343eb6364d769a1c43aa5c94daee8", + "nmdc:b2cee4d35f68d1f5731bff3af5904fa4", + "nmdc:1d45960b1ba5e27af42c736ec583ecd4", + "nmdc:3dec47a0a04865ecdcd9ed7cbc78eca4", + "nmdc:043322f3cd31d50faf4d4e0ffd1c8427", + "nmdc:6bed0fc7a7be284936c69fc1faac4be6", + "nmdc:052d3fb0080390255df5772f79e5ef2c", + "nmdc:e66a3b85c713e8766e5181da2e393984", + "nmdc:949e3b137b3a0591ed9de493ee5c530b" + ], + "was_informed_by": "gold:Gp0127653", + "id": "nmdc:676183a611b251dc6b8f0b8ca600181b", + "execution_resource": "NERSC-Cori", + "name": "Annotation Activity for nmdc:mga079y988", + "started_at_time": "2021-10-11T02:23:35Z", + "type": "nmdc:MetagenomeAnnotationActivity", + "ended_at_time": "2021-11-13T18:52:13+00:00", + "output_data_objects": [ + { + "name": "Gp0127653_Protein FAA", + "description": "Protein FAA for Gp0127653", + "data_object_type": "Annotation Amino Acid FASTA", + "url": "https://data.microbiomedata.org/data/nmdc:mga079y988/annotation/nmdc_mga079y988_proteins.faa", + "md5_checksum": "81f16ca99f73a3314a66e6b24d23376f", + "id": "nmdc:81f16ca99f73a3314a66e6b24d23376f", + "file_size_bytes": 11129064 + }, + { + "name": "Gp0127653_Structural annotation GFF file", + "description": "Structural annotation GFF file for Gp0127653", + "data_object_type": "Structural Annotation GFF", + "url": "https://data.microbiomedata.org/data/nmdc:mga079y988/annotation/nmdc_mga079y988_structural_annotation.gff", + "md5_checksum": "66bb16ef28196379647d319da50426dd", + "id": "nmdc:66bb16ef28196379647d319da50426dd", + "file_size_bytes": 8094827 + }, + { + "name": "Gp0127653_Functional annotation GFF file", + "description": "Functional annotation GFF file for Gp0127653", + "data_object_type": "Functional Annotation GFF", + "url": "https://data.microbiomedata.org/data/nmdc:mga079y988/annotation/nmdc_mga079y988_functional_annotation.gff", + "md5_checksum": "1e7dac5f12cc086509ff905f7133b15a", + "id": "nmdc:1e7dac5f12cc086509ff905f7133b15a", + "file_size_bytes": 13821021 + }, + { + "name": "Gp0127653_KO TSV file", + "description": "KO TSV file for Gp0127653", + "data_object_type": "Annotation KEGG Orthology", + "url": "https://data.microbiomedata.org/data/nmdc:mga079y988/annotation/nmdc_mga079y988_ko.tsv", + "md5_checksum": "2a7343eb6364d769a1c43aa5c94daee8", + "id": "nmdc:2a7343eb6364d769a1c43aa5c94daee8", + "file_size_bytes": 1578987 + }, + { + "name": "Gp0127653_EC TSV file", + "description": "EC TSV file for Gp0127653", + "data_object_type": "Annotation Enzyme Commission", + "url": "https://data.microbiomedata.org/data/nmdc:mga079y988/annotation/nmdc_mga079y988_ec.tsv", + "md5_checksum": "b2cee4d35f68d1f5731bff3af5904fa4", + "id": "nmdc:b2cee4d35f68d1f5731bff3af5904fa4", + "file_size_bytes": 1029657 + }, + { + "name": "Gp0127653_COG GFF file", + "description": "COG GFF file for Gp0127653", + "url": "https://data.microbiomedata.org/data/nmdc:mga079y988/annotation/nmdc_mga079y988_cog.gff", + "md5_checksum": "1d45960b1ba5e27af42c736ec583ecd4", + "id": "nmdc:1d45960b1ba5e27af42c736ec583ecd4", + "file_size_bytes": 7241411 + }, + { + "name": "Gp0127653_PFAM GFF file", + "description": "PFAM GFF file for Gp0127653", + "url": "https://data.microbiomedata.org/data/nmdc:mga079y988/annotation/nmdc_mga079y988_pfam.gff", + "md5_checksum": "3dec47a0a04865ecdcd9ed7cbc78eca4", + "id": "nmdc:3dec47a0a04865ecdcd9ed7cbc78eca4", + "file_size_bytes": 5221877 + }, + { + "name": "Gp0127653_TigrFam GFF file", + "description": "TigrFam GFF file for Gp0127653", + "url": "https://data.microbiomedata.org/data/nmdc:mga079y988/annotation/nmdc_mga079y988_tigrfam.gff", + "md5_checksum": "043322f3cd31d50faf4d4e0ffd1c8427", + "id": "nmdc:043322f3cd31d50faf4d4e0ffd1c8427", + "file_size_bytes": 472233 + }, + { + "name": "Gp0127653_SMART GFF file", + "description": "SMART GFF file for Gp0127653", + "url": "https://data.microbiomedata.org/data/nmdc:mga079y988/annotation/nmdc_mga079y988_smart.gff", + "md5_checksum": "6bed0fc7a7be284936c69fc1faac4be6", + "id": "nmdc:6bed0fc7a7be284936c69fc1faac4be6", + "file_size_bytes": 1586537 + }, + { + "name": "Gp0127653_SuperFam GFF file", + "description": "SuperFam GFF file for Gp0127653", + "url": "https://data.microbiomedata.org/data/nmdc:mga079y988/annotation/nmdc_mga079y988_supfam.gff", + "md5_checksum": "052d3fb0080390255df5772f79e5ef2c", + "id": "nmdc:052d3fb0080390255df5772f79e5ef2c", + "file_size_bytes": 9232981 + }, + { + "name": "Gp0127653_Cath FunFam GFF file", + "description": "Cath FunFam GFF file for Gp0127653", + "url": "https://data.microbiomedata.org/data/nmdc:mga079y988/annotation/nmdc_mga079y988_cath_funfam.gff", + "md5_checksum": "e66a3b85c713e8766e5181da2e393984", + "id": "nmdc:e66a3b85c713e8766e5181da2e393984", + "file_size_bytes": 6697496 + }, + { + "name": "Gp0127653_KO_EC GFF file", + "description": "KO_EC GFF file for Gp0127653", + "url": "https://data.microbiomedata.org/data/nmdc:mga079y988/annotation/nmdc_mga079y988_ko_ec.gff", + "md5_checksum": "949e3b137b3a0591ed9de493ee5c530b", + "id": "nmdc:949e3b137b3a0591ed9de493ee5c530b", + "file_size_bytes": 5035400 + } + ] + }, + { + "_id": { + "$oid": "649b0052ec087f6bbab34720" + }, + "has_input": [ + "nmdc:0f2b82878f54787c127bf03338d5c605", + "nmdc:ccd5ba8558a92751c59989aa81054e1a", + "nmdc:1e7dac5f12cc086509ff905f7133b15a" + ], + "too_short_contig_num": 48540, + "part_of": [ + "nmdc:mga079y988" + ], + "git_url": "https://github.com/microbiomedata/metaG/releases/tag/0.1", + "has_output": [ + "nmdc:d41d8cd98f00b204e9800998ecf8427e", + "nmdc:1029b97dba32dab780f4267f8224619f" + ], + "was_informed_by": "gold:Gp0127653", + "input_contig_num": 48931, + "id": "nmdc:676183a611b251dc6b8f0b8ca600181b", + "execution_resource": "NERSC-Cori", + "name": "MAGs Analysis Activity for nmdc:mga079y988", + "mags_list": [], + "unbinned_contig_num": 391, + "started_at_time": "2021-10-11T02:23:35Z", + "type": "nmdc:MAGsAnalysisActivity", + "ended_at_time": "2021-11-13T18:52:13+00:00", + "output_data_objects": [ + { + "name": "gold:Gp0452679_metabat2 bin checkm quality assessment result", + "description": "metabat2 bin checkm quality assessment result for gold:Gp0452679", + "data_object_type": "CheckM Statistics", + "url": "https://data.microbiomedata.org/data/nmdc:mga0fsjy84/MAGs/nmdc_mga0fsjy84_checkm_qa.out", + "md5_checksum": "d41d8cd98f00b204e9800998ecf8427e", + "id": "nmdc:d41d8cd98f00b204e9800998ecf8427e", + "file_size_bytes": 0 + }, + { + "name": "Gp0127653_high-quality and medium-quality bins", + "description": "high-quality and medium-quality bins for Gp0127653", + "data_object_type": "Metagenome Bins", + "url": "https://data.microbiomedata.org/data/nmdc:mga079y988/MAGs/nmdc_mga079y988_hqmq_bin.zip", + "md5_checksum": "1029b97dba32dab780f4267f8224619f", + "id": "nmdc:1029b97dba32dab780f4267f8224619f", + "file_size_bytes": 182 + } + ] + } + ] + }, + { + "_id": { + "$oid": "649b009773e8249959349bd6" + }, + "id": "nmdc:omprc-11-31svgk40", + "name": "SBR_FC_N1_10-20_H2Oext_13Oct15_Leopard_1_01_4405", + "description": "High resolution MS spectra only", + "has_input": [ + "nmdc:bsm-11-6zt5jc55" + ], + "has_output": [ + "emsl:output_456429" + ], + "part_of": [ + "nmdc:sty-11-aygzgv51" + ], + "instrument_name": "12T_FTICR_B", + "omics_type": { + "has_raw_value": "Organic Matter Characterization" + }, + "processing_institution": "EMSL", + "type": "nmdc:OmicsProcessing", + "alternative_identifiers": [ + "emsl:456429" + ], + "downstream_workflow_activity_records": [] + }, + { + "_id": { + "$oid": "649b009773e8249959349bd7" + }, + "id": "nmdc:omprc-11-9z3dj481", + "name": "SBR_FC_N1_00-10_H2Oext_15Oct15_Leopard_1_01_4515", + "description": "High resolution MS spectra only", + "has_input": [ + "nmdc:bsm-11-r39v4766" + ], + "has_output": [ + "emsl:output_456425" + ], + "part_of": [ + "nmdc:sty-11-aygzgv51" + ], + "instrument_name": "12T_FTICR_B", + "omics_type": { + "has_raw_value": "Organic Matter Characterization" + }, + "processing_institution": "EMSL", + "type": "nmdc:OmicsProcessing", + "alternative_identifiers": [ + "emsl:456425" + ], + "downstream_workflow_activity_records": [] + }, + { + "_id": { + "$oid": "649b009773e8249959349bd8" + }, + "id": "nmdc:omprc-11-6dkkx108", + "name": "SBR_FC_N1_00-10_H2Oext_13Oct15_Leopard_1_01_4404", + "description": "High resolution MS spectra only", + "has_input": [ + "nmdc:bsm-11-r39v4766" + ], + "has_output": [ + "emsl:output_456424" + ], + "part_of": [ + "nmdc:sty-11-aygzgv51" + ], + "instrument_name": "12T_FTICR_B", + "omics_type": { + "has_raw_value": "Organic Matter Characterization" + }, + "processing_institution": "EMSL", + "type": "nmdc:OmicsProcessing", + "alternative_identifiers": [ + "emsl:456424" + ], + "downstream_workflow_activity_records": [] + }, + { + "_id": { + "$oid": "649b009773e8249959349bd9" + }, + "id": "nmdc:omprc-11-xv3nwb33", + "name": "SBR_FC_N1_20-30_H2Oext_15Oct15_Leopard_1_01_4517", + "description": "High resolution MS spectra only", + "has_input": [ + "nmdc:bsm-11-fg8jh149" + ], + "has_output": [ + "emsl:output_456435" + ], + "part_of": [ + "nmdc:sty-11-aygzgv51" + ], + "instrument_name": "12T_FTICR_B", + "omics_type": { + "has_raw_value": "Organic Matter Characterization" + }, + "processing_institution": "EMSL", + "type": "nmdc:OmicsProcessing", + "alternative_identifiers": [ + "emsl:456435" + ], + "downstream_workflow_activity_records": [] + }, + { + "_id": { + "$oid": "649b009773e8249959349bda" + }, + "id": "nmdc:omprc-11-47x32496", + "name": "SBR_FC_N1_20-30_H2Oext_13Oct15_Leopard_1_01_4406", + "description": "High resolution MS spectra only", + "has_input": [ + "nmdc:bsm-11-fg8jh149" + ], + "has_output": [ + "emsl:output_456434" + ], + "part_of": [ + "nmdc:sty-11-aygzgv51" + ], + "instrument_name": "12T_FTICR_B", + "omics_type": { + "has_raw_value": "Organic Matter Characterization" + }, + "processing_institution": "EMSL", + "type": "nmdc:OmicsProcessing", + "alternative_identifiers": [ + "emsl:456434" + ], + "downstream_workflow_activity_records": [] + }, + { + "_id": { + "$oid": "649b009773e8249959349bdd" + }, + "id": "nmdc:omprc-11-5kt8rj58", + "name": "SBR_FC_N1_30-40_H2Oext_13Oct15_Leopard_1_01_4407", + "description": "High resolution MS spectra only", + "has_input": [ + "nmdc:bsm-11-y9qebw84" + ], + "has_output": [ + "emsl:output_456439" + ], + "part_of": [ + "nmdc:sty-11-aygzgv51" + ], + "instrument_name": "12T_FTICR_B", + "omics_type": { + "has_raw_value": "Organic Matter Characterization" + }, + "processing_institution": "EMSL", + "type": "nmdc:OmicsProcessing", + "alternative_identifiers": [ + "emsl:456439" + ], + "downstream_workflow_activity_records": [] + }, + { + "_id": { + "$oid": "649b009773e8249959349bdf" + }, + "id": "nmdc:omprc-11-bcnnxa07", + "name": "SBR_FC_N1_40-50_H2Oext_15Oct15_Leopard_1_01_4519", + "description": "High resolution MS spectra only", + "has_input": [ + "nmdc:bsm-11-01n4q492" + ], + "has_output": [ + "emsl:output_456445" + ], + "part_of": [ + "nmdc:sty-11-aygzgv51" + ], + "instrument_name": "12T_FTICR_B", + "omics_type": { + "has_raw_value": "Organic Matter Characterization" + }, + "processing_institution": "EMSL", + "type": "nmdc:OmicsProcessing", + "alternative_identifiers": [ + "emsl:456445" + ], + "downstream_workflow_activity_records": [] + }, + { + "_id": { + "$oid": "649b009773e8249959349be0" + }, + "id": "nmdc:omprc-11-gj7th903", + "name": "SBR_FC_N1_10-20_H2Oext_15Oct15_Leopard_1_01_4516", + "description": "High resolution MS spectra only", + "has_input": [ + "nmdc:bsm-11-6zt5jc55" + ], + "has_output": [ + "emsl:output_456430" + ], + "part_of": [ + "nmdc:sty-11-aygzgv51" + ], + "instrument_name": "12T_FTICR_B", + "omics_type": { + "has_raw_value": "Organic Matter Characterization" + }, + "processing_institution": "EMSL", + "type": "nmdc:OmicsProcessing", + "alternative_identifiers": [ + "emsl:456430" + ], + "downstream_workflow_activity_records": [] + }, + { + "_id": { + "$oid": "649b009773e8249959349be1" + }, + "id": "nmdc:omprc-11-fj734v67", + "name": "SBR_FC_N1_50-60_H2Oext_13Oct15_Leopard_1_01_4409", + "description": "High resolution MS spectra only", + "has_input": [ + "nmdc:bsm-11-0pragn10" + ], + "has_output": [ + "emsl:output_456449" + ], + "part_of": [ + "nmdc:sty-11-aygzgv51" + ], + "instrument_name": "12T_FTICR_B", + "omics_type": { + "has_raw_value": "Organic Matter Characterization" + }, + "processing_institution": "EMSL", + "type": "nmdc:OmicsProcessing", + "alternative_identifiers": [ + "emsl:456449" + ], + "downstream_workflow_activity_records": [] + }, + { + "_id": { + "$oid": "649b009773e8249959349be2" + }, + "id": "nmdc:omprc-11-6ar8e259", + "name": "SBR_FC_N1_50-60_H2Oext_15Oct15_Leopard_1_01_4520", + "description": "High resolution MS spectra only", + "has_input": [ + "nmdc:bsm-11-0pragn10" + ], + "has_output": [ + "emsl:output_456450" + ], + "part_of": [ + "nmdc:sty-11-aygzgv51" + ], + "instrument_name": "12T_FTICR_B", + "omics_type": { + "has_raw_value": "Organic Matter Characterization" + }, + "processing_institution": "EMSL", + "type": "nmdc:OmicsProcessing", + "alternative_identifiers": [ + "emsl:456450" + ], + "downstream_workflow_activity_records": [] + }, + { + "_id": { + "$oid": "649b009773e8249959349be3" + }, + "id": "nmdc:omprc-11-vcc89f39", + "name": "SBR_FC_N2_30-40_H2Oext_13Oct15_Leopard_1_01_4411", + "description": "High resolution MS spectra only", + "has_input": [ + "nmdc:bsm-11-1pewj990" + ], + "has_output": [ + "emsl:output_456459" + ], + "part_of": [ + "nmdc:sty-11-aygzgv51" + ], + "instrument_name": "12T_FTICR_B", + "omics_type": { + "has_raw_value": "Organic Matter Characterization" + }, + "processing_institution": "EMSL", + "type": "nmdc:OmicsProcessing", + "alternative_identifiers": [ + "emsl:456459" + ], + "downstream_workflow_activity_records": [] + }, + { + "_id": { + "$oid": "649b009773e8249959349be4" + }, + "id": "nmdc:omprc-11-k76emy12", + "name": "SBR_FC_N1_30-40_H2Oext_15Oct15_Leopard_1_01_4518", + "description": "High resolution MS spectra only", + "has_input": [ + "nmdc:bsm-11-y9qebw84" + ], + "has_output": [ + "emsl:output_456440" + ], + "part_of": [ + "nmdc:sty-11-aygzgv51" + ], + "instrument_name": "12T_FTICR_B", + "omics_type": { + "has_raw_value": "Organic Matter Characterization" + }, + "processing_institution": "EMSL", + "type": "nmdc:OmicsProcessing", + "alternative_identifiers": [ + "emsl:456440" + ], + "downstream_workflow_activity_records": [] + }, + { + "_id": { + "$oid": "649b009773e8249959349be5" + }, + "id": "nmdc:omprc-11-15c0g775", + "name": "SBR_FC_N2_30-40_H2Oext_15Oct15_Leopard_1_01_4522", + "description": "High resolution MS spectra only", + "has_input": [ + "nmdc:bsm-11-1pewj990" + ], + "has_output": [ + "emsl:output_456460" + ], + "part_of": [ + "nmdc:sty-11-aygzgv51" + ], + "instrument_name": "12T_FTICR_B", + "omics_type": { + "has_raw_value": "Organic Matter Characterization" + }, + "processing_institution": "EMSL", + "type": "nmdc:OmicsProcessing", + "alternative_identifiers": [ + "emsl:456460" + ], + "downstream_workflow_activity_records": [] + }, + { + "_id": { + "$oid": "649b009773e8249959349be6" + }, + "id": "nmdc:omprc-11-pj9h5941", + "name": "SBR_FC_N2_50-60_H2Oext_13Oct15_Leopard_1_01_4413", + "description": "High resolution MS spectra only", + "has_input": [ + "nmdc:bsm-11-567met04" + ], + "has_output": [ + "emsl:output_456469" + ], + "part_of": [ + "nmdc:sty-11-aygzgv51" + ], + "instrument_name": "12T_FTICR_B", + "omics_type": { + "has_raw_value": "Organic Matter Characterization" + }, + "processing_institution": "EMSL", + "type": "nmdc:OmicsProcessing", + "alternative_identifiers": [ + "emsl:456469" + ], + "downstream_workflow_activity_records": [] + }, + { + "_id": { + "$oid": "649b009773e8249959349be7" + }, + "id": "nmdc:omprc-11-hpwqy918", + "name": "SBR_FC_N2_00-30_H2Oext_15Oct15_Leopard_1_01_4521", + "description": "High resolution MS spectra only", + "has_input": [ + "nmdc:bsm-11-cjxybn89" + ], + "has_output": [ + "emsl:output_456455" + ], + "part_of": [ + "nmdc:sty-11-aygzgv51" + ], + "instrument_name": "12T_FTICR_B", + "omics_type": { + "has_raw_value": "Organic Matter Characterization" + }, + "processing_institution": "EMSL", + "type": "nmdc:OmicsProcessing", + "alternative_identifiers": [ + "emsl:456455" + ], + "downstream_workflow_activity_records": [] + }, + { + "_id": { + "$oid": "649b009773e8249959349be8" + }, + "id": "nmdc:omprc-11-wgp5nv05", + "name": "SBR_FC_N2_50-60_H2Oext_15Oct15_Leopard_1_01_4524", + "description": "High resolution MS spectra only", + "has_input": [ + "nmdc:bsm-11-567met04" + ], + "has_output": [ + "emsl:output_456470" + ], + "part_of": [ + "nmdc:sty-11-aygzgv51" + ], + "instrument_name": "12T_FTICR_B", + "omics_type": { + "has_raw_value": "Organic Matter Characterization" + }, + "processing_institution": "EMSL", + "type": "nmdc:OmicsProcessing", + "alternative_identifiers": [ + "emsl:456470" + ], + "downstream_workflow_activity_records": [] + }, + { + "_id": { + "$oid": "649b009773e8249959349be9" + }, + "id": "nmdc:omprc-11-9bn99f79", + "name": "SBR_FC_N2_40-50_H2Oext_13Oct15_Leopard_1_01_4412", + "description": "High resolution MS spectra only", + "has_input": [ + "nmdc:bsm-11-93raqf43" + ], + "has_output": [ + "emsl:output_456464" + ], + "part_of": [ + "nmdc:sty-11-aygzgv51" + ], + "instrument_name": "12T_FTICR_B", + "omics_type": { + "has_raw_value": "Organic Matter Characterization" + }, + "processing_institution": "EMSL", + "type": "nmdc:OmicsProcessing", + "alternative_identifiers": [ + "emsl:456464" + ], + "downstream_workflow_activity_records": [] + }, + { + "_id": { + "$oid": "649b009773e8249959349bea" + }, + "id": "nmdc:omprc-11-5m63ha78", + "name": "SBR_FC_N3_00-10_H2Oext_13Oct15_Leopard_1_01_4414", + "description": "High resolution MS spectra only", + "has_input": [ + "nmdc:bsm-11-g0c8gb14" + ], + "has_output": [ + "emsl:output_456474" + ], + "part_of": [ + "nmdc:sty-11-aygzgv51" + ], + "instrument_name": "12T_FTICR_B", + "omics_type": { + "has_raw_value": "Organic Matter Characterization" + }, + "processing_institution": "EMSL", + "type": "nmdc:OmicsProcessing", + "alternative_identifiers": [ + "emsl:456474" + ], + "downstream_workflow_activity_records": [] + }, + { + "_id": { + "$oid": "649b009773e8249959349beb" + }, + "id": "nmdc:omprc-11-db4my476", + "name": "SBR_FC_N2_00-30_H2Oext_13Oct15_Leopard_1_01_4410", + "description": "High resolution MS spectra only", + "has_input": [ + "nmdc:bsm-11-cjxybn89" + ], + "has_output": [ + "emsl:output_456454" + ], + "part_of": [ + "nmdc:sty-11-aygzgv51" + ], + "instrument_name": "12T_FTICR_B", + "omics_type": { + "has_raw_value": "Organic Matter Characterization" + }, + "processing_institution": "EMSL", + "type": "nmdc:OmicsProcessing", + "alternative_identifiers": [ + "emsl:456454" + ], + "downstream_workflow_activity_records": [] + }, + { + "_id": { + "$oid": "649b009773e8249959349bec" + }, + "id": "nmdc:omprc-11-gb2h5750", + "name": "SBR_FC_N3_00-10_H2Oext_15Oct15_Leopard_1_01_4525", + "description": "High resolution MS spectra only", + "has_input": [ + "nmdc:bsm-11-g0c8gb14" + ], + "has_output": [ + "emsl:output_456475" + ], + "part_of": [ + "nmdc:sty-11-aygzgv51" + ], + "instrument_name": "12T_FTICR_B", + "omics_type": { + "has_raw_value": "Organic Matter Characterization" + }, + "processing_institution": "EMSL", + "type": "nmdc:OmicsProcessing", + "alternative_identifiers": [ + "emsl:456475" + ], + "downstream_workflow_activity_records": [] + }, + { + "_id": { + "$oid": "649b009773e8249959349bed" + }, + "id": "nmdc:omprc-11-z2578j17", + "name": "SBR_FC_N2_40-50_H2Oext_15Oct15_Leopard_1_01_4523", + "description": "High resolution MS spectra only", + "has_input": [ + "nmdc:bsm-11-93raqf43" + ], + "has_output": [ + "emsl:output_456465" + ], + "part_of": [ + "nmdc:sty-11-aygzgv51" + ], + "instrument_name": "12T_FTICR_B", + "omics_type": { + "has_raw_value": "Organic Matter Characterization" + }, + "processing_institution": "EMSL", + "type": "nmdc:OmicsProcessing", + "alternative_identifiers": [ + "emsl:456465" + ], + "downstream_workflow_activity_records": [] + }, + { + "_id": { + "$oid": "649b009773e8249959349bee" + }, + "id": "nmdc:omprc-11-a53tkk54", + "name": "SBR_FC_N3_20-30_H2Oext_13Oct15_Leopard_1_01_4416", + "description": "High resolution MS spectra only", + "has_input": [ + "nmdc:bsm-11-3z48kw62" + ], + "has_output": [ + "emsl:output_456484" + ], + "part_of": [ + "nmdc:sty-11-aygzgv51" + ], + "instrument_name": "12T_FTICR_B", + "omics_type": { + "has_raw_value": "Organic Matter Characterization" + }, + "processing_institution": "EMSL", + "type": "nmdc:OmicsProcessing", + "alternative_identifiers": [ + "emsl:456484" + ], + "downstream_workflow_activity_records": [] + }, + { + "_id": { + "$oid": "649b009773e8249959349bef" + }, + "id": "nmdc:omprc-11-qkyx2m56", + "name": "SBR_FC_N3_10-20_H2Oext_15Oct15_Leopard_1_01_4526", + "description": "High resolution MS spectra only", + "has_input": [ + "nmdc:bsm-11-6aq1va61" + ], + "has_output": [ + "emsl:output_456480" + ], + "part_of": [ + "nmdc:sty-11-aygzgv51" + ], + "instrument_name": "12T_FTICR_B", + "omics_type": { + "has_raw_value": "Organic Matter Characterization" + }, + "processing_institution": "EMSL", + "type": "nmdc:OmicsProcessing", + "alternative_identifiers": [ + "emsl:456480" + ], + "downstream_workflow_activity_records": [] + }, + { + "_id": { + "$oid": "649b009773e8249959349bf0" + }, + "id": "nmdc:omprc-11-dz40jm83", + "name": "SBR_FC_N3_10-20_H2Oext_13Oct15_Leopard_1_01_4415", + "description": "High resolution MS spectra only", + "has_input": [ + "nmdc:bsm-11-6aq1va61" + ], + "has_output": [ + "emsl:output_456479" + ], + "part_of": [ + "nmdc:sty-11-aygzgv51" + ], + "instrument_name": "12T_FTICR_B", + "omics_type": { + "has_raw_value": "Organic Matter Characterization" + }, + "processing_institution": "EMSL", + "type": "nmdc:OmicsProcessing", + "alternative_identifiers": [ + "emsl:456479" + ], + "downstream_workflow_activity_records": [] + }, + { + "_id": { + "$oid": "649b009773e8249959349bf1" + }, + "id": "nmdc:omprc-11-dmk5wj06", + "name": "SBR_FC_N3_20-30_H2Oext_15Oct15_Leopard_1_01_4527", + "description": "High resolution MS spectra only", + "has_input": [ + "nmdc:bsm-11-3z48kw62" + ], + "has_output": [ + "emsl:output_456485" + ], + "part_of": [ + "nmdc:sty-11-aygzgv51" + ], + "instrument_name": "12T_FTICR_B", + "omics_type": { + "has_raw_value": "Organic Matter Characterization" + }, + "processing_institution": "EMSL", + "type": "nmdc:OmicsProcessing", + "alternative_identifiers": [ + "emsl:456485" + ], + "downstream_workflow_activity_records": [] + }, + { + "_id": { + "$oid": "649b009773e8249959349bf2" + }, + "id": "nmdc:omprc-11-nvhp3684", + "name": "SBR_FC_N3_40-50_H2Oext_13Oct15_Leopard_1_01_4418", + "description": "High resolution MS spectra only", + "has_input": [ + "nmdc:bsm-11-wvxw7m55" + ], + "has_output": [ + "emsl:output_456494" + ], + "part_of": [ + "nmdc:sty-11-aygzgv51" + ], + "instrument_name": "12T_FTICR_B", + "omics_type": { + "has_raw_value": "Organic Matter Characterization" + }, + "processing_institution": "EMSL", + "type": "nmdc:OmicsProcessing", + "alternative_identifiers": [ + "emsl:456494" + ], + "downstream_workflow_activity_records": [] + }, + { + "_id": { + "$oid": "649b009773e8249959349bf3" + }, + "id": "nmdc:omprc-11-x9b1vg36", + "name": "SBR_FC_N1_40-50_H2Oext_13Oct15_Leopard_1_01_4408", + "description": "High resolution MS spectra only", + "has_input": [ + "nmdc:bsm-11-01n4q492" + ], + "has_output": [ + "emsl:output_456444" + ], + "part_of": [ + "nmdc:sty-11-aygzgv51" + ], + "instrument_name": "12T_FTICR_B", + "omics_type": { + "has_raw_value": "Organic Matter Characterization" + }, + "processing_institution": "EMSL", + "type": "nmdc:OmicsProcessing", + "alternative_identifiers": [ + "emsl:456444" + ], + "downstream_workflow_activity_records": [] + }, + { + "_id": { + "$oid": "649b009773e8249959349bf4" + }, + "id": "nmdc:omprc-11-mfsr0a96", + "name": "SBR_FC_N3_40-50_H2Oext_15Oct15_Leopard_1_01_4529", + "description": "High resolution MS spectra only", + "has_input": [ + "nmdc:bsm-11-wvxw7m55" + ], + "has_output": [ + "emsl:output_456495" + ], + "part_of": [ + "nmdc:sty-11-aygzgv51" + ], + "instrument_name": "12T_FTICR_B", + "omics_type": { + "has_raw_value": "Organic Matter Characterization" + }, + "processing_institution": "EMSL", + "type": "nmdc:OmicsProcessing", + "alternative_identifiers": [ + "emsl:456495" + ], + "downstream_workflow_activity_records": [] + }, + { + "_id": { + "$oid": "649b009773e8249959349bf5" + }, + "id": "nmdc:omprc-11-vr9c4f75", + "name": "SBR_FC_N3_30-40_H2Oext_13Oct15_Leopard_1_01_4417", + "description": "High resolution MS spectra only", + "has_input": [ + "nmdc:bsm-11-5cpjs440" + ], + "has_output": [ + "emsl:output_456489" + ], + "part_of": [ + "nmdc:sty-11-aygzgv51" + ], + "instrument_name": "12T_FTICR_B", + "omics_type": { + "has_raw_value": "Organic Matter Characterization" + }, + "processing_institution": "EMSL", + "type": "nmdc:OmicsProcessing", + "alternative_identifiers": [ + "emsl:456489" + ], + "downstream_workflow_activity_records": [] + }, + { + "_id": { + "$oid": "649b009773e8249959349bf6" + }, + "id": "nmdc:omprc-11-9r543r49", + "name": "SBR_FC_N3_30-40_H2Oext_15Oct15_Leopard_1_01_4528", + "description": "High resolution MS spectra only", + "has_input": [ + "nmdc:bsm-11-5cpjs440" + ], + "has_output": [ + "emsl:output_456490" + ], + "part_of": [ + "nmdc:sty-11-aygzgv51" + ], + "instrument_name": "12T_FTICR_B", + "omics_type": { + "has_raw_value": "Organic Matter Characterization" + }, + "processing_institution": "EMSL", + "type": "nmdc:OmicsProcessing", + "alternative_identifiers": [ + "emsl:456490" + ], + "downstream_workflow_activity_records": [] + }, + { + "_id": { + "$oid": "649b009773e8249959349bf7" + }, + "id": "nmdc:omprc-11-2vfy3s61", + "name": "SBR_FC_N3_50-60_H2Oext_15Oct15_Leopard_1_01_4530", + "description": "High resolution MS spectra only", + "has_input": [ + "nmdc:bsm-11-8saj5c05" + ], + "has_output": [ + "emsl:output_456500" + ], + "part_of": [ + "nmdc:sty-11-aygzgv51" + ], + "instrument_name": "12T_FTICR_B", + "omics_type": { + "has_raw_value": "Organic Matter Characterization" + }, + "processing_institution": "EMSL", + "type": "nmdc:OmicsProcessing", + "alternative_identifiers": [ + "emsl:456500" + ], + "downstream_workflow_activity_records": [] + }, + { + "_id": { + "$oid": "649b009773e8249959349bf8" + }, + "id": "nmdc:omprc-11-c1ngnw86", + "name": "SBR_FC_S1_00-10_H2Oext_15Oct15_Leopard_1_01_4531", + "description": "High resolution MS spectra only", + "has_input": [ + "nmdc:bsm-11-m0hn5p98" + ], + "has_output": [ + "emsl:output_456505" + ], + "part_of": [ + "nmdc:sty-11-aygzgv51" + ], + "instrument_name": "12T_FTICR_B", + "omics_type": { + "has_raw_value": "Organic Matter Characterization" + }, + "processing_institution": "EMSL", + "type": "nmdc:OmicsProcessing", + "alternative_identifiers": [ + "emsl:456505" + ], + "downstream_workflow_activity_records": [] + }, + { + "_id": { + "$oid": "649b009773e8249959349bf9" + }, + "id": "nmdc:omprc-11-excmek32", + "name": "SBR_FC_S1_10-20_H2Oext_15Oct15_Leopard_1_01_4532", + "description": "High resolution MS spectra only", + "has_input": [ + "nmdc:bsm-11-br7th280" + ], + "has_output": [ + "emsl:output_456510" + ], + "part_of": [ + "nmdc:sty-11-aygzgv51" + ], + "instrument_name": "12T_FTICR_B", + "omics_type": { + "has_raw_value": "Organic Matter Characterization" + }, + "processing_institution": "EMSL", + "type": "nmdc:OmicsProcessing", + "alternative_identifiers": [ + "emsl:456510" + ], + "downstream_workflow_activity_records": [] + }, + { + "_id": { + "$oid": "649b009773e8249959349bfa" + }, + "id": "nmdc:omprc-11-mefm4e20", + "name": "SBR_FC_S1_00-10_H2Oext_13Oct15_Leopard_1_01_4420", + "description": "High resolution MS spectra only", + "has_input": [ + "nmdc:bsm-11-m0hn5p98" + ], + "has_output": [ + "emsl:output_456504" + ], + "part_of": [ + "nmdc:sty-11-aygzgv51" + ], + "instrument_name": "12T_FTICR_B", + "omics_type": { + "has_raw_value": "Organic Matter Characterization" + }, + "processing_institution": "EMSL", + "type": "nmdc:OmicsProcessing", + "alternative_identifiers": [ + "emsl:456504" + ], + "downstream_workflow_activity_records": [] + }, + { + "_id": { + "$oid": "649b009773e8249959349bfb" + }, + "id": "nmdc:omprc-11-fzgdcp80", + "name": "SBR_FC_S1_20-30_H2Oext_13Oct15_Leopard_1_01_4422", + "description": "High resolution MS spectra only", + "has_input": [ + "nmdc:bsm-11-qyz13338" + ], + "has_output": [ + "emsl:output_456514" + ], + "part_of": [ + "nmdc:sty-11-aygzgv51" + ], + "instrument_name": "12T_FTICR_B", + "omics_type": { + "has_raw_value": "Organic Matter Characterization" + }, + "processing_institution": "EMSL", + "type": "nmdc:OmicsProcessing", + "alternative_identifiers": [ + "emsl:456514" + ], + "downstream_workflow_activity_records": [] + }, + { + "_id": { + "$oid": "649b009773e8249959349bfc" + }, + "id": "nmdc:omprc-11-3akts752", + "name": "SBR_FC_N3_50-60_H2Oext_13Oct15_Leopard_1_01_4419", + "description": "High resolution MS spectra only", + "has_input": [ + "nmdc:bsm-11-8saj5c05" + ], + "has_output": [ + "emsl:output_456499" + ], + "part_of": [ + "nmdc:sty-11-aygzgv51" + ], + "instrument_name": "12T_FTICR_B", + "omics_type": { + "has_raw_value": "Organic Matter Characterization" + }, + "processing_institution": "EMSL", + "type": "nmdc:OmicsProcessing", + "alternative_identifiers": [ + "emsl:456499" + ], + "downstream_workflow_activity_records": [] + }, + { + "_id": { + "$oid": "649b009773e8249959349bfd" + }, + "id": "nmdc:omprc-11-6sq6j166", + "name": "SBR_FC_S1_10-20_H2Oext_13Oct15_Leopard_1_01_4421", + "description": "High resolution MS spectra only", + "has_input": [ + "nmdc:bsm-11-br7th280" + ], + "has_output": [ + "emsl:output_456509" + ], + "part_of": [ + "nmdc:sty-11-aygzgv51" + ], + "instrument_name": "12T_FTICR_B", + "omics_type": { + "has_raw_value": "Organic Matter Characterization" + }, + "processing_institution": "EMSL", + "type": "nmdc:OmicsProcessing", + "alternative_identifiers": [ + "emsl:456509" + ], + "downstream_workflow_activity_records": [] + }, + { + "_id": { + "$oid": "649b009773e8249959349bfe" + }, + "id": "nmdc:omprc-11-3awbyv03", + "name": "SBR_FC_S1_30-40_H2Oext_13Oct15_Leopard_1_01_4423", + "description": "High resolution MS spectra only", + "has_input": [ + "nmdc:bsm-11-pzv0cf30" + ], + "has_output": [ + "emsl:output_456519" + ], + "part_of": [ + "nmdc:sty-11-aygzgv51" + ], + "instrument_name": "12T_FTICR_B", + "omics_type": { + "has_raw_value": "Organic Matter Characterization" + }, + "processing_institution": "EMSL", + "type": "nmdc:OmicsProcessing", + "alternative_identifiers": [ + "emsl:456519" + ], + "downstream_workflow_activity_records": [] + }, + { + "_id": { + "$oid": "649b009773e8249959349bff" + }, + "id": "nmdc:omprc-11-4kgm5w41", + "name": "SBR_FC_S1_20-30_H2Oext_15Oct15_Leopard_1_01_4533", + "description": "High resolution MS spectra only", + "has_input": [ + "nmdc:bsm-11-qyz13338" + ], + "has_output": [ + "emsl:output_456515" + ], + "part_of": [ + "nmdc:sty-11-aygzgv51" + ], + "instrument_name": "12T_FTICR_B", + "omics_type": { + "has_raw_value": "Organic Matter Characterization" + }, + "processing_institution": "EMSL", + "type": "nmdc:OmicsProcessing", + "alternative_identifiers": [ + "emsl:456515" + ], + "downstream_workflow_activity_records": [] + }, + { + "_id": { + "$oid": "649b009773e8249959349c00" + }, + "id": "nmdc:omprc-11-6xm1va42", + "name": "SBR_FC_S1_50-60_H2Oext_13Oct15_Leopard_1_01_4425", + "description": "High resolution MS spectra only", + "has_input": [ + "nmdc:bsm-11-pnzwey18" + ], + "has_output": [ + "emsl:output_456529" + ], + "part_of": [ + "nmdc:sty-11-aygzgv51" + ], + "instrument_name": "12T_FTICR_B", + "omics_type": { + "has_raw_value": "Organic Matter Characterization" + }, + "processing_institution": "EMSL", + "type": "nmdc:OmicsProcessing", + "alternative_identifiers": [ + "emsl:456529" + ], + "downstream_workflow_activity_records": [] + }, + { + "_id": { + "$oid": "649b009773e8249959349c01" + }, + "id": "nmdc:omprc-11-t2yd9m69", + "name": "SBR_FC_S2_00-10_H2Oext_15Oct15_Leopard_1_01_4537", + "description": "High resolution MS spectra only", + "has_input": [ + "nmdc:bsm-11-60ayn103" + ], + "has_output": [ + "emsl:output_456535" + ], + "part_of": [ + "nmdc:sty-11-aygzgv51" + ], + "instrument_name": "12T_FTICR_B", + "omics_type": { + "has_raw_value": "Organic Matter Characterization" + }, + "processing_institution": "EMSL", + "type": "nmdc:OmicsProcessing", + "alternative_identifiers": [ + "emsl:456535" + ], + "downstream_workflow_activity_records": [] + }, + { + "_id": { + "$oid": "649b009773e8249959349c02" + }, + "id": "nmdc:omprc-11-9d9fxj83", + "name": "SBR_FC_S2_10-20_H2Oext_15Oct15_Leopard_1_01_4538", + "description": "High resolution MS spectra only", + "has_input": [ + "nmdc:bsm-11-e3hp0a71" + ], + "has_output": [ + "emsl:output_456540" + ], + "part_of": [ + "nmdc:sty-11-aygzgv51" + ], + "instrument_name": "12T_FTICR_B", + "omics_type": { + "has_raw_value": "Organic Matter Characterization" + }, + "processing_institution": "EMSL", + "type": "nmdc:OmicsProcessing", + "alternative_identifiers": [ + "emsl:456540" + ], + "downstream_workflow_activity_records": [] + }, + { + "_id": { + "$oid": "649b009773e8249959349c03" + }, + "id": "nmdc:omprc-11-1rkm8b52", + "name": "SBR_FC_S2_10-20_H2Oext_13Oct15_Leopard_1_01_4427", + "description": "High resolution MS spectra only", + "has_input": [ + "nmdc:bsm-11-e3hp0a71" + ], + "has_output": [ + "emsl:output_456539" + ], + "part_of": [ + "nmdc:sty-11-aygzgv51" + ], + "instrument_name": "12T_FTICR_B", + "omics_type": { + "has_raw_value": "Organic Matter Characterization" + }, + "processing_institution": "EMSL", + "type": "nmdc:OmicsProcessing", + "alternative_identifiers": [ + "emsl:456539" + ], + "downstream_workflow_activity_records": [] + }, + { + "_id": { + "$oid": "649b009773e8249959349c04" + }, + "id": "nmdc:omprc-11-bzhsj954", + "name": "SBR_FC_S2_30-40_H2Oext_15Oct15_Leopard_1_01_4540", + "description": "High resolution MS spectra only", + "has_input": [ + "nmdc:bsm-11-73qw2q23" + ], + "has_output": [ + "emsl:output_456550" + ], + "part_of": [ + "nmdc:sty-11-aygzgv51" + ], + "instrument_name": "12T_FTICR_B", + "omics_type": { + "has_raw_value": "Organic Matter Characterization" + }, + "processing_institution": "EMSL", + "type": "nmdc:OmicsProcessing", + "alternative_identifiers": [ + "emsl:456550" + ], + "downstream_workflow_activity_records": [] + }, + { + "_id": { + "$oid": "649b009773e8249959349c05" + }, + "id": "nmdc:omprc-11-ae716322", + "name": "SBR_FC_S2_20-30_H2Oext_13Oct15_Leopard_1_01_4428", + "description": "High resolution MS spectra only", + "has_input": [ + "nmdc:bsm-11-pknfsy79" + ], + "has_output": [ + "emsl:output_456544" + ], + "part_of": [ + "nmdc:sty-11-aygzgv51" + ], + "instrument_name": "12T_FTICR_B", + "omics_type": { + "has_raw_value": "Organic Matter Characterization" + }, + "processing_institution": "EMSL", + "type": "nmdc:OmicsProcessing", + "alternative_identifiers": [ + "emsl:456544" + ], + "downstream_workflow_activity_records": [] + }, + { + "_id": { + "$oid": "649b009773e8249959349c06" + }, + "id": "nmdc:omprc-11-ryy9gp08", + "name": "SBR_FC_S3_00-10_H2Oext_15Oct15_Leopard_1_01_4543", + "description": "High resolution MS spectra only", + "has_input": [ + "nmdc:bsm-11-rs67sh23" + ], + "has_output": [ + "emsl:output_456565" + ], + "part_of": [ + "nmdc:sty-11-aygzgv51" + ], + "instrument_name": "12T_FTICR_B", + "omics_type": { + "has_raw_value": "Organic Matter Characterization" + }, + "processing_institution": "EMSL", + "type": "nmdc:OmicsProcessing", + "alternative_identifiers": [ + "emsl:456565" + ], + "downstream_workflow_activity_records": [] + }, + { + "_id": { + "$oid": "649b009773e8249959349c07" + }, + "id": "nmdc:omprc-11-qx18h332", + "name": "SBR_FC_S3_10-20_H2Oext_13Oct15_Leopard_1_01_4433", + "description": "High resolution MS spectra only", + "has_input": [ + "nmdc:bsm-11-83ghpk10" + ], + "has_output": [ + "emsl:output_456569" + ], + "part_of": [ + "nmdc:sty-11-aygzgv51" + ], + "instrument_name": "12T_FTICR_B", + "omics_type": { + "has_raw_value": "Organic Matter Characterization" + }, + "processing_institution": "EMSL", + "type": "nmdc:OmicsProcessing", + "alternative_identifiers": [ + "emsl:456569" + ], + "downstream_workflow_activity_records": [] + }, + { + "_id": { + "$oid": "649b009773e8249959349c08" + }, + "id": "nmdc:omprc-11-k3e66918", + "name": "SBR_FC_S3_00-10_H2Oext_13Oct15_Leopard_1_01_4432", + "description": "High resolution MS spectra only", + "has_input": [ + "nmdc:bsm-11-rs67sh23" + ], + "has_output": [ + "emsl:output_456564" + ], + "part_of": [ + "nmdc:sty-11-aygzgv51" + ], + "instrument_name": "12T_FTICR_B", + "omics_type": { + "has_raw_value": "Organic Matter Characterization" + }, + "processing_institution": "EMSL", + "type": "nmdc:OmicsProcessing", + "alternative_identifiers": [ + "emsl:456564" + ], + "downstream_workflow_activity_records": [] + }, + { + "_id": { + "$oid": "649b009773e8249959349c09" + }, + "id": "nmdc:omprc-11-nq83nd29", + "name": "SBR_FC_S3_20-30_H2Oext_13Oct15_Leopard_1_01_4434", + "description": "High resolution MS spectra only", + "has_input": [ + "nmdc:bsm-11-5p6f9e18" + ], + "has_output": [ + "emsl:output_456574" + ], + "part_of": [ + "nmdc:sty-11-aygzgv51" + ], + "instrument_name": "12T_FTICR_B", + "omics_type": { + "has_raw_value": "Organic Matter Characterization" + }, + "processing_institution": "EMSL", + "type": "nmdc:OmicsProcessing", + "alternative_identifiers": [ + "emsl:456574" + ], + "downstream_workflow_activity_records": [] + }, + { + "_id": { + "$oid": "649b009773e8249959349c0a" + }, + "id": "nmdc:omprc-11-xsnaaj68", + "name": "SBR_FC_S2_20-30_H2Oext_15Oct15_Leopard_1_01_4539", + "description": "High resolution MS spectra only", + "has_input": [ + "nmdc:bsm-11-pknfsy79" + ], + "has_output": [ + "emsl:output_456545" + ], + "part_of": [ + "nmdc:sty-11-aygzgv51" + ], + "instrument_name": "12T_FTICR_B", + "omics_type": { + "has_raw_value": "Organic Matter Characterization" + }, + "processing_institution": "EMSL", + "type": "nmdc:OmicsProcessing", + "alternative_identifiers": [ + "emsl:456545" + ], + "downstream_workflow_activity_records": [] + }, + { + "_id": { + "$oid": "649b009773e8249959349c0b" + }, + "id": "nmdc:omprc-11-409hwb80", + "name": "SBR_FC_S3_30-40_H2Oext_13Oct15_Leopard_1_01_4435", + "description": "High resolution MS spectra only", + "has_input": [ + "nmdc:bsm-11-ea7re871" + ], + "has_output": [ + "emsl:output_456579" + ], + "part_of": [ + "nmdc:sty-11-aygzgv51" + ], + "instrument_name": "12T_FTICR_B", + "omics_type": { + "has_raw_value": "Organic Matter Characterization" + }, + "processing_institution": "EMSL", + "type": "nmdc:OmicsProcessing", + "alternative_identifiers": [ + "emsl:456579" + ], + "downstream_workflow_activity_records": [] + } +] \ No newline at end of file diff --git a/nmdc_automation/re_iding/scripts/rebuild_metagenome_workflow_records.py b/nmdc_automation/re_iding/scripts/rebuild_metagenome_workflow_records.py index ab054665..d7a4442b 100644 --- a/nmdc_automation/re_iding/scripts/rebuild_metagenome_workflow_records.py +++ b/nmdc_automation/re_iding/scripts/rebuild_metagenome_workflow_records.py @@ -83,6 +83,7 @@ def rebuild_workflow_records(study_id: str, site_config: bool): ) workflow_records_per_study = [] + # 2. For each OmicsProcessing record, find the legacy identifier: for omics_processing_record in omics_processing_records: logging.info(f"omics_processing_record: " @@ -90,6 +91,14 @@ def rebuild_workflow_records(study_id: str, site_config: bool): legacy_id = _get_legacy_id(omics_processing_record) logging.info(f"legacy_id: {legacy_id}") + omics_processing_record["downstream_workflow_activity_records"] = [] + + if (omics_processing_record["omics_type"]["has_raw_value"] != + "Metagenome"): + logging.info(f"omics_processing_record {omics_processing_record['id']} " + f"is not a Metagenome") + continue + # reads QC records # Downstream WorkflowExecutionActivity records depend on the `has_output` # data object of the ReadQcAnalysisActivity record. @@ -97,7 +106,19 @@ def rebuild_workflow_records(study_id: str, site_config: bool): read_qc_records = query_api_client.get_workflow_activity_informed_by( set_name, legacy_id ) + # Add the data objects referenced by the `has_output` property + for record in read_qc_records: + record["output_data_objects"] = [] + for data_object_id in record["has_output"]: + data_object_record = query_api_client.get_data_object_by_id( + data_object_id + ) + record["output_data_objects"].append(data_object_record) + + logging.info(f"Found {len(read_qc_records)} read_qc_records") + omics_processing_record[ + "downstream_workflow_activity_records"].extend(read_qc_records) # downstream workflow activity sets taxonomy_records, read_based_analysis_records, metagenome_assembly_records, \ @@ -115,11 +136,21 @@ def rebuild_workflow_records(study_id: str, site_config: bool): records = query_api_client.get_workflow_activity_informed_by( set_name, legacy_id ) + # Add the data objects referenced by the `has_output` property + for record in records: + record["output_data_objects"] = [] + for data_object_id in record["has_output"]: + data_object_record = query_api_client.get_data_object_by_id( + data_object_id + ) + record["output_data_objects"].append(data_object_record) + omics_processing_record[ + "downstream_workflow_activity_records"].extend(records) workflow_records_per_study.append(records) logging.info(f"Found {len(records)} {set_name} records") with open(f"{study_id}_assocated_record_dump.json", 'w') as json_file: - json.dump(workflow_records_per_study, json_file, indent=4) + json.dump(omics_processing_records, json_file, indent=4) if __name__ == "__main__": From 53b8c52345ceb28494a95405bfb57fa916f61955 Mon Sep 17 00:00:00 2001 From: Michal Babinski Date: Tue, 31 Oct 2023 14:18:42 -0700 Subject: [PATCH 17/91] added json reader --- nmdc_automation/re_iding/re_id_file_operations.py | 15 +++++++++++++++ 1 file changed, 15 insertions(+) diff --git a/nmdc_automation/re_iding/re_id_file_operations.py b/nmdc_automation/re_iding/re_id_file_operations.py index cf1d982d..8ca469c1 100644 --- a/nmdc_automation/re_iding/re_id_file_operations.py +++ b/nmdc_automation/re_iding/re_id_file_operations.py @@ -1,6 +1,7 @@ import subprocess import gzip import os +import json import hashlib from subprocess import check_output @@ -20,6 +21,20 @@ def md5_sum(fn): file_hash.update(chunk) return file_hash.hexdigest() +def read_json_file(filename): + """ + Read a JSON file and return its content as a dictionary. + + Parameters: + - filename (str): The path to the JSON file. + + Returns: + - dict: The content of the JSON file. + """ + with open(filename, 'r') as json_file: + data = json.load(json_file) + return data + def rewrite_id(src, dst, old_id, new_id, prefix=None): """ Rewrite lines in a file, replacing occurrences of an old ID with a new ID. From e06b4964cbd6d6167a329ddf3b8a62766e3a6177 Mon Sep 17 00:00:00 2001 From: Michal Babinski Date: Tue, 31 Oct 2023 14:19:13 -0700 Subject: [PATCH 18/91] started to break down workflows updating process --- nmdc_automation/re_iding/re_id_process.py | 71 ++++++++++++++++++----- 1 file changed, 56 insertions(+), 15 deletions(-) diff --git a/nmdc_automation/re_iding/re_id_process.py b/nmdc_automation/re_iding/re_id_process.py index ec975584..633129bb 100755 --- a/nmdc_automation/re_iding/re_id_process.py +++ b/nmdc_automation/re_iding/re_id_process.py @@ -14,7 +14,7 @@ ###GLOBAL###### nmdc_db = nmdc.Database() -runtime_api = NmdcRuntimeApi("../../configs/site_configuration.toml") +runtime_api = NmdcRuntimeApi("../../configs/napa_config.toml") base = "https://data.microbiomedata.org/data" base_dir = "/global/cfs/cdirs/m3408/results" @@ -32,7 +32,7 @@ def read_workflows_config(config_file): with open(config_file, "r") as file: workflow_data = yaml.safe_load(file) - return workflow_data + return workflow_data["Workflows"] def log_mapping(idtype, old, new): """ @@ -265,6 +265,56 @@ def post_database_object_to_runtime(datase_object): res = runtime_api.post_objects(nmdc_database_object) return res +def get_omics_id(omics_record): + return omics_record["id"] + +def get_record_by_type(omics_children_records, record_type): + """ + Reads a JSON file and returns the record that matches the given type. + + Parameters: + - filename (str): The path to the JSON file. + - record_type (str): The desired type value to match. + + Returns: + - dict: The first record that matches the given type, or None if not found. + """ + + for analysis_record in omics_children_records["downstream_workflow_activity_records"]: + if analysis_record.get("type") == record_type: + return analysis_record + + return None + +def reads_qc_update(omics_record, template_file): + + workflow_type = "nmdc:ReadQCAnalysisActivity" + + reads_qc_record = get_record_by_type(omics_record, workflow_type) + for template in read_workflows_config(template_file): + if template['Type'] == "nmdc:ReadQcAnalysisActivity": + reads_qc_template = template + + #TODO: + #Use nmdc_schema db and update workflow recors and data objects for reads qc (update files) + + print(reads_qc_record, reads_qc_template) + + +def process_analysis_sets(study_records, template_file,dry_run=False): + + count = 0 + for omic_record in study_records: + omics_id = get_omics_id(omic_record) + print(omics_id) + reads_qc_update(omic_record, template_file) + if dry_run == True: + count += 1 + if count == 1: + break + + + def main(): #TODO #1. Read in json dump of analysis records @@ -276,16 +326,7 @@ def main(): pass if __name__ == "__main__": - mongo_url = os.environ["MONGO_URL"] - client = MongoClient(mongo_url, directConnection=True) - db = client.nmdc - # Read mapping list - # This should have: - # was_informed_by_old\twas_informed_by_new - # e.g. - # nmdc:mga0xxxxx nmdc:omprc-11-xxxxx - omic_map = read_map() - for omic in omic_map: - process(db, omic, omic_map[omic]) - # for each omics process - # for act in [ + test_file = "scripts/nmdc:sty-11-aygzgv51_assocated_record_dump.json" + template_file = "/global/cfs/cdirs/m3408/aim2/dev/reiding_scripts/nmdc_automation/configs/re_iding_worklfows.yaml" + stegen_data = read_json_file(test_file) + process_analysis_sets(stegen_data, template_file, dry_run=True) \ No newline at end of file From 2196856d33e87cc1ca6708b29b41208630668cae Mon Sep 17 00:00:00 2001 From: Michael Thornton Date: Tue, 31 Oct 2023 16:16:30 -0700 Subject: [PATCH 19/91] Change script output to a list of serialized Database instances --- ...sty-11-aygzgv51_assocated_record_dump.json | 60111 ++++++++-------- .../rebuild_metagenome_workflow_records.py | 48 +- 2 files changed, 30055 insertions(+), 30104 deletions(-) diff --git a/nmdc_automation/re_iding/scripts/nmdc:sty-11-aygzgv51_assocated_record_dump.json b/nmdc_automation/re_iding/scripts/nmdc:sty-11-aygzgv51_assocated_record_dump.json index a7146766..37a912d9 100644 --- a/nmdc_automation/re_iding/scripts/nmdc:sty-11-aygzgv51_assocated_record_dump.json +++ b/nmdc_automation/re_iding/scripts/nmdc:sty-11-aygzgv51_assocated_record_dump.json @@ -1,301 +1,559 @@ [ { - "_id": { - "$oid": "649b009773e8249959349b33" - }, - "id": "nmdc:omprc-11-bn8jcq58", - "name": "Sand microcosm microbial communities from a hyporheic zone in Columbia River, Washington, USA - GW-RW T2_23-Sept-14", - "description": "Sterilized sand packs were incubated back in the ground and collected at time point T2.", - "has_input": [ - "nmdc:bsm-11-qq8s6x03" - ], - "has_output": [ - "jgi:55d740280d8785342fcf7e39" - ], - "part_of": [ - "nmdc:sty-11-aygzgv51" - ], - "add_date": "2015-05-28", - "mod_date": "2021-06-15", - "ncbi_project_name": "Sand microcosm microbial communities from a hyporheic zone in Columbia River, Washington, USA - GW-RW T2_23-Sept-14", - "omics_type": { - "has_raw_value": "Metagenome" - }, - "principal_investigator": { - "has_raw_value": "James Stegen" - }, - "processing_institution": "JGI", - "type": "nmdc:OmicsProcessing", - "gold_sequencing_project_identifiers": [ - "gold:Gp0115663" - ], - "downstream_workflow_activity_records": [ + "functional_annotation_agg": [], + "library_preparation_set": [], + "processed_sample_set": [], + "extraction_set": [], + "activity_set": [], + "biosample_set": [], + "data_object_set": [ + { + "name": "Gp0115663_Filtered Reads", + "description": "Filtered Reads for Gp0115663", + "data_object_type": "Filtered Sequencing Reads", + "url": "https://data.microbiomedata.org/data/nmdc:mga0h9dt75/qa/nmdc_mga0h9dt75_filtered.fastq.gz", + "md5_checksum": "7bf778baef033d36f118f8591256d6ef", + "id": "nmdc:7bf778baef033d36f118f8591256d6ef", + "file_size_bytes": 2571324879 + }, + { + "name": "Gp0115663_Filtered Stats", + "description": "Filtered Stats for Gp0115663", + "data_object_type": "QC Statistics", + "url": "https://data.microbiomedata.org/data/nmdc:mga0h9dt75/qa/nmdc_mga0h9dt75_filterStats.txt", + "md5_checksum": "b99ce8adc125c95f0bfdadf36a3f6848", + "id": "nmdc:b99ce8adc125c95f0bfdadf36a3f6848", + "file_size_bytes": 290 + }, + { + "name": "Gp0115663_Gottcha2 TSV report", + "description": "Gottcha2 TSV report for Gp0115663", + "url": "https://data.microbiomedata.org/data/nmdc:mga0h9dt75/ReadbasedAnalysis/nmdc_mga0h9dt75_gottcha2_report.tsv", + "md5_checksum": "bc7c1bda004aab357c8f6cf5a42242f9", + "id": "nmdc:bc7c1bda004aab357c8f6cf5a42242f9", + "file_size_bytes": 13174 + }, + { + "name": "Gp0115663_Gottcha2 full TSV report", + "description": "Gottcha2 full TSV report for Gp0115663", + "url": "https://data.microbiomedata.org/data/nmdc:mga0h9dt75/ReadbasedAnalysis/nmdc_mga0h9dt75_gottcha2_report_full.tsv", + "md5_checksum": "9481434cadd0d6c154e2ec4c11ef0e04", + "id": "nmdc:9481434cadd0d6c154e2ec4c11ef0e04", + "file_size_bytes": 1035818 + }, + { + "name": "Gp0115663_Gottcha2 Krona HTML report", + "description": "Gottcha2 Krona HTML report for Gp0115663", + "data_object_type": "GOTTCHA2 Krona Plot", + "url": "https://data.microbiomedata.org/data/nmdc:mga0h9dt75/ReadbasedAnalysis/nmdc_mga0h9dt75_gottcha2_krona.html", + "md5_checksum": "6b5bc6ce7f11c1336a5f85a98fc18541", + "id": "nmdc:6b5bc6ce7f11c1336a5f85a98fc18541", + "file_size_bytes": 262669 + }, + { + "name": "Gp0115663_Centrifuge classification TSV report", + "description": "Centrifuge classification TSV report for Gp0115663", + "data_object_type": "Centrifuge Taxonomic Classification", + "url": "https://data.microbiomedata.org/data/nmdc:mga0h9dt75/ReadbasedAnalysis/nmdc_mga0h9dt75_centrifuge_classification.tsv", + "md5_checksum": "933c71bbc2f4a2e84d50f0d3864cf940", + "id": "nmdc:933c71bbc2f4a2e84d50f0d3864cf940", + "file_size_bytes": 2189843623 + }, + { + "name": "Gp0115663_Centrifuge TSV report", + "description": "Centrifuge TSV report for Gp0115663", + "data_object_type": "Centrifuge Classification Report", + "url": "https://data.microbiomedata.org/data/nmdc:mga0h9dt75/ReadbasedAnalysis/nmdc_mga0h9dt75_centrifuge_report.tsv", + "md5_checksum": "1a208e2519770ef50740ac39f1b9ba9a", + "id": "nmdc:1a208e2519770ef50740ac39f1b9ba9a", + "file_size_bytes": 260134 + }, + { + "name": "Gp0115663_Centrifuge Krona HTML report", + "description": "Centrifuge Krona HTML report for Gp0115663", + "data_object_type": "Centrifuge Krona Plot", + "url": "https://data.microbiomedata.org/data/nmdc:mga0h9dt75/ReadbasedAnalysis/nmdc_mga0h9dt75_centrifuge_krona.html", + "md5_checksum": "f112a3840464ae7a9cf4a3bf295edd5c", + "id": "nmdc:f112a3840464ae7a9cf4a3bf295edd5c", + "file_size_bytes": 2343980 + }, + { + "name": "Gp0115663_Kraken classification TSV report", + "description": "Kraken classification TSV report for Gp0115663", + "data_object_type": "Kraken2 Taxonomic Classification", + "url": "https://data.microbiomedata.org/data/nmdc:mga0h9dt75/ReadbasedAnalysis/nmdc_mga0h9dt75_kraken2_classification.tsv", + "md5_checksum": "7ca01ea379f0baed96f87d1435925f95", + "id": "nmdc:7ca01ea379f0baed96f87d1435925f95", + "file_size_bytes": 1785563917 + }, + { + "name": "Gp0115663_Kraken2 TSV report", + "description": "Kraken2 TSV report for Gp0115663", + "data_object_type": "Kraken2 Classification Report", + "url": "https://data.microbiomedata.org/data/nmdc:mga0h9dt75/ReadbasedAnalysis/nmdc_mga0h9dt75_kraken2_report.tsv", + "md5_checksum": "c85f2f2b4a518c4adb23970448a5cb45", + "id": "nmdc:c85f2f2b4a518c4adb23970448a5cb45", + "file_size_bytes": 699896 + }, + { + "name": "Gp0115663_Kraken2 Krona HTML report", + "description": "Kraken2 Krona HTML report for Gp0115663", + "data_object_type": "Kraken2 Krona Plot", + "url": "https://data.microbiomedata.org/data/nmdc:mga0h9dt75/ReadbasedAnalysis/nmdc_mga0h9dt75_kraken2_krona.html", + "md5_checksum": "94ee1bc2dc74830a21d5c3471d6cf223", + "id": "nmdc:94ee1bc2dc74830a21d5c3471d6cf223", + "file_size_bytes": 4221977 + }, + { + "name": "Gp0115663_Gottcha2 TSV report", + "description": "Gottcha2 TSV report for Gp0115663", + "url": "https://data.microbiomedata.org/data/nmdc:mga0h9dt75/ReadbasedAnalysis/nmdc_mga0h9dt75_gottcha2_report.tsv", + "md5_checksum": "bc7c1bda004aab357c8f6cf5a42242f9", + "id": "nmdc:bc7c1bda004aab357c8f6cf5a42242f9", + "file_size_bytes": 13174 + }, + { + "name": "Gp0115663_Gottcha2 full TSV report", + "description": "Gottcha2 full TSV report for Gp0115663", + "url": "https://data.microbiomedata.org/data/nmdc:mga0h9dt75/ReadbasedAnalysis/nmdc_mga0h9dt75_gottcha2_report_full.tsv", + "md5_checksum": "9481434cadd0d6c154e2ec4c11ef0e04", + "id": "nmdc:9481434cadd0d6c154e2ec4c11ef0e04", + "file_size_bytes": 1035818 + }, + { + "name": "Gp0115663_Gottcha2 Krona HTML report", + "description": "Gottcha2 Krona HTML report for Gp0115663", + "data_object_type": "GOTTCHA2 Krona Plot", + "url": "https://data.microbiomedata.org/data/nmdc:mga0h9dt75/ReadbasedAnalysis/nmdc_mga0h9dt75_gottcha2_krona.html", + "md5_checksum": "6b5bc6ce7f11c1336a5f85a98fc18541", + "id": "nmdc:6b5bc6ce7f11c1336a5f85a98fc18541", + "file_size_bytes": 262669 + }, + { + "name": "Gp0115663_Centrifuge classification TSV report", + "description": "Centrifuge classification TSV report for Gp0115663", + "data_object_type": "Centrifuge Taxonomic Classification", + "url": "https://data.microbiomedata.org/data/nmdc:mga0h9dt75/ReadbasedAnalysis/nmdc_mga0h9dt75_centrifuge_classification.tsv", + "md5_checksum": "933c71bbc2f4a2e84d50f0d3864cf940", + "id": "nmdc:933c71bbc2f4a2e84d50f0d3864cf940", + "file_size_bytes": 2189843623 + }, + { + "name": "Gp0115663_Centrifuge TSV report", + "description": "Centrifuge TSV report for Gp0115663", + "data_object_type": "Centrifuge Classification Report", + "url": "https://data.microbiomedata.org/data/nmdc:mga0h9dt75/ReadbasedAnalysis/nmdc_mga0h9dt75_centrifuge_report.tsv", + "md5_checksum": "1a208e2519770ef50740ac39f1b9ba9a", + "id": "nmdc:1a208e2519770ef50740ac39f1b9ba9a", + "file_size_bytes": 260134 + }, + { + "name": "Gp0115663_Centrifuge Krona HTML report", + "description": "Centrifuge Krona HTML report for Gp0115663", + "data_object_type": "Centrifuge Krona Plot", + "url": "https://data.microbiomedata.org/data/nmdc:mga0h9dt75/ReadbasedAnalysis/nmdc_mga0h9dt75_centrifuge_krona.html", + "md5_checksum": "f112a3840464ae7a9cf4a3bf295edd5c", + "id": "nmdc:f112a3840464ae7a9cf4a3bf295edd5c", + "file_size_bytes": 2343980 + }, + { + "name": "Gp0115663_Kraken classification TSV report", + "description": "Kraken classification TSV report for Gp0115663", + "data_object_type": "Kraken2 Taxonomic Classification", + "url": "https://data.microbiomedata.org/data/nmdc:mga0h9dt75/ReadbasedAnalysis/nmdc_mga0h9dt75_kraken2_classification.tsv", + "md5_checksum": "7ca01ea379f0baed96f87d1435925f95", + "id": "nmdc:7ca01ea379f0baed96f87d1435925f95", + "file_size_bytes": 1785563917 + }, + { + "name": "Gp0115663_Kraken2 TSV report", + "description": "Kraken2 TSV report for Gp0115663", + "data_object_type": "Kraken2 Classification Report", + "url": "https://data.microbiomedata.org/data/nmdc:mga0h9dt75/ReadbasedAnalysis/nmdc_mga0h9dt75_kraken2_report.tsv", + "md5_checksum": "c85f2f2b4a518c4adb23970448a5cb45", + "id": "nmdc:c85f2f2b4a518c4adb23970448a5cb45", + "file_size_bytes": 699896 + }, + { + "name": "Gp0115663_Kraken2 Krona HTML report", + "description": "Kraken2 Krona HTML report for Gp0115663", + "data_object_type": "Kraken2 Krona Plot", + "url": "https://data.microbiomedata.org/data/nmdc:mga0h9dt75/ReadbasedAnalysis/nmdc_mga0h9dt75_kraken2_krona.html", + "md5_checksum": "94ee1bc2dc74830a21d5c3471d6cf223", + "id": "nmdc:94ee1bc2dc74830a21d5c3471d6cf223", + "file_size_bytes": 4221977 + }, + { + "name": "Gp0115663_Assembled contigs fasta", + "description": "Assembled contigs fasta for Gp0115663", + "data_object_type": "Assembly Contigs", + "url": "https://data.microbiomedata.org/data/nmdc:mga0h9dt75/assembly/nmdc_mga0h9dt75_contigs.fna", + "md5_checksum": "deddd162bf0128fba13b3bc1ca38d1aa", + "id": "nmdc:deddd162bf0128fba13b3bc1ca38d1aa", + "file_size_bytes": 90115831 + }, + { + "name": "Gp0115663_Assembled scaffold fasta", + "description": "Assembled scaffold fasta for Gp0115663", + "data_object_type": "Assembly Scaffolds", + "url": "https://data.microbiomedata.org/data/nmdc:mga0h9dt75/assembly/nmdc_mga0h9dt75_scaffolds.fna", + "md5_checksum": "b3573e3cda5a06611de71ca04c5c14cc", + "id": "nmdc:b3573e3cda5a06611de71ca04c5c14cc", + "file_size_bytes": 89604715 + }, + { + "name": "Gp0115663_Metagenome Contig Coverage Stats", + "description": "Metagenome Contig Coverage Stats for Gp0115663", + "url": "https://data.microbiomedata.org/data/nmdc:mga0h9dt75/assembly/nmdc_mga0h9dt75_covstats.txt", + "md5_checksum": "c6d0d4cea985ca6fb50a060e15b4a856", + "id": "nmdc:c6d0d4cea985ca6fb50a060e15b4a856", + "file_size_bytes": 13412363 + }, + { + "name": "Gp0115663_Assembled AGP file", + "description": "Assembled AGP file for Gp0115663", + "data_object_type": "Assembly AGP", + "url": "https://data.microbiomedata.org/data/nmdc:mga0h9dt75/assembly/nmdc_mga0h9dt75_assembly.agp", + "md5_checksum": "f450e3800e17691d5874c89fc46c186a", + "id": "nmdc:f450e3800e17691d5874c89fc46c186a", + "file_size_bytes": 12542171 + }, + { + "name": "Gp0115663_Metagenome Alignment BAM file", + "description": "Metagenome Alignment BAM file for Gp0115663", + "data_object_type": "Assembly Coverage BAM", + "url": "https://data.microbiomedata.org/data/nmdc:mga0h9dt75/assembly/nmdc_mga0h9dt75_pairedMapped_sorted.bam", + "md5_checksum": "31dc958d116d02122509e90b0883954f", + "id": "nmdc:31dc958d116d02122509e90b0883954f", + "file_size_bytes": 2773429299 + }, + { + "name": "Gp0115663_Protein FAA", + "description": "Protein FAA for Gp0115663", + "data_object_type": "Annotation Amino Acid FASTA", + "url": "https://data.microbiomedata.org/data/nmdc:mga0h9dt75/annotation/nmdc_mga0h9dt75_proteins.faa", + "md5_checksum": "879988d212ecec46928b8598e2f8391f", + "id": "nmdc:879988d212ecec46928b8598e2f8391f", + "file_size_bytes": 50165060 + }, + { + "name": "Gp0115663_Structural annotation GFF file", + "description": "Structural annotation GFF file for Gp0115663", + "data_object_type": "Structural Annotation GFF", + "url": "https://data.microbiomedata.org/data/nmdc:mga0h9dt75/annotation/nmdc_mga0h9dt75_structural_annotation.gff", + "md5_checksum": "884b95102f5965cc0ee2d9b7f198e5a4", + "id": "nmdc:884b95102f5965cc0ee2d9b7f198e5a4", + "file_size_bytes": 2767 + }, + { + "name": "Gp0115663_Functional annotation GFF file", + "description": "Functional annotation GFF file for Gp0115663", + "data_object_type": "Functional Annotation GFF", + "url": "https://data.microbiomedata.org/data/nmdc:mga0h9dt75/annotation/nmdc_mga0h9dt75_functional_annotation.gff", + "md5_checksum": "002e4ebc728f8b91cb5f298d340ab013", + "id": "nmdc:002e4ebc728f8b91cb5f298d340ab013", + "file_size_bytes": 55139586 + }, + { + "name": "Gp0115663_KO TSV file", + "description": "KO TSV file for Gp0115663", + "data_object_type": "Annotation KEGG Orthology", + "url": "https://data.microbiomedata.org/data/nmdc:mga0h9dt75/annotation/nmdc_mga0h9dt75_ko.tsv", + "md5_checksum": "6851078f29716d89e3f41f0969ae7bf0", + "id": "nmdc:6851078f29716d89e3f41f0969ae7bf0", + "file_size_bytes": 6023696 + }, + { + "name": "Gp0115663_EC TSV file", + "description": "EC TSV file for Gp0115663", + "data_object_type": "Annotation Enzyme Commission", + "url": "https://data.microbiomedata.org/data/nmdc:mga0h9dt75/annotation/nmdc_mga0h9dt75_ec.tsv", + "md5_checksum": "4f88c89459f36655eb7c1eceec19602a", + "id": "nmdc:4f88c89459f36655eb7c1eceec19602a", + "file_size_bytes": 3982918 + }, + { + "name": "Gp0115663_COG GFF file", + "description": "COG GFF file for Gp0115663", + "url": "https://data.microbiomedata.org/data/nmdc:mga0h9dt75/annotation/nmdc_mga0h9dt75_cog.gff", + "md5_checksum": "a068b9ce6ebb7deb15ff932b513817a9", + "id": "nmdc:a068b9ce6ebb7deb15ff932b513817a9", + "file_size_bytes": 27362917 + }, + { + "name": "Gp0115663_PFAM GFF file", + "description": "PFAM GFF file for Gp0115663", + "url": "https://data.microbiomedata.org/data/nmdc:mga0h9dt75/annotation/nmdc_mga0h9dt75_pfam.gff", + "md5_checksum": "618b18fa8635c80cc0091371f451a6f0", + "id": "nmdc:618b18fa8635c80cc0091371f451a6f0", + "file_size_bytes": 21572048 + }, + { + "name": "Gp0115663_TigrFam GFF file", + "description": "TigrFam GFF file for Gp0115663", + "url": "https://data.microbiomedata.org/data/nmdc:mga0h9dt75/annotation/nmdc_mga0h9dt75_tigrfam.gff", + "md5_checksum": "17e55a1a1a133ffbf8cbe4024d997a6f", + "id": "nmdc:17e55a1a1a133ffbf8cbe4024d997a6f", + "file_size_bytes": 2900068 + }, + { + "name": "Gp0115663_SMART GFF file", + "description": "SMART GFF file for Gp0115663", + "url": "https://data.microbiomedata.org/data/nmdc:mga0h9dt75/annotation/nmdc_mga0h9dt75_smart.gff", + "md5_checksum": "8f80142c0f5723af5a3b44b7ff4e4339", + "id": "nmdc:8f80142c0f5723af5a3b44b7ff4e4339", + "file_size_bytes": 6905519 + }, + { + "name": "Gp0115663_SuperFam GFF file", + "description": "SuperFam GFF file for Gp0115663", + "url": "https://data.microbiomedata.org/data/nmdc:mga0h9dt75/annotation/nmdc_mga0h9dt75_supfam.gff", + "md5_checksum": "fdd2e8741ffef40db383674a10bb4d11", + "id": "nmdc:fdd2e8741ffef40db383674a10bb4d11", + "file_size_bytes": 38787856 + }, + { + "name": "Gp0115663_Cath FunFam GFF file", + "description": "Cath FunFam GFF file for Gp0115663", + "url": "https://data.microbiomedata.org/data/nmdc:mga0h9dt75/annotation/nmdc_mga0h9dt75_cath_funfam.gff", + "md5_checksum": "8eb49ac20a6c2721d6db227f4fb3356a", + "id": "nmdc:8eb49ac20a6c2721d6db227f4fb3356a", + "file_size_bytes": 30134783 + }, + { + "name": "Gp0115663_KO_EC GFF file", + "description": "KO_EC GFF file for Gp0115663", + "url": "https://data.microbiomedata.org/data/nmdc:mga0h9dt75/annotation/nmdc_mga0h9dt75_ko_ec.gff", + "md5_checksum": "75f481e0d98793cfb4f9508cb3e31622", + "id": "nmdc:75f481e0d98793cfb4f9508cb3e31622", + "file_size_bytes": 19194308 + }, + { + "name": "gold:Gp0452679_metabat2 bin checkm quality assessment result", + "description": "metabat2 bin checkm quality assessment result for gold:Gp0452679", + "data_object_type": "CheckM Statistics", + "url": "https://data.microbiomedata.org/data/nmdc:mga0fsjy84/MAGs/nmdc_mga0fsjy84_checkm_qa.out", + "md5_checksum": "d41d8cd98f00b204e9800998ecf8427e", + "id": "nmdc:d41d8cd98f00b204e9800998ecf8427e", + "file_size_bytes": 0 + }, + { + "name": "Gp0115663_tooShort (< 3kb) filtered contigs fasta file by metaBat2", + "description": "tooShort (< 3kb) filtered contigs fasta file by metaBat2 for Gp0115663", + "url": "https://data.microbiomedata.org/data/nmdc:mga0h9dt75/MAGs/nmdc_mga0h9dt75_bins.tooShort.fa", + "md5_checksum": "c092b018cb4652c4ca0620b37a4b3fad", + "id": "nmdc:c092b018cb4652c4ca0620b37a4b3fad", + "file_size_bytes": 70411007 + }, + { + "name": "Gp0115663_unbinned fasta file from metabat2", + "description": "unbinned fasta file from metabat2 for Gp0115663", + "url": "https://data.microbiomedata.org/data/nmdc:mga0h9dt75/MAGs/nmdc_mga0h9dt75_bins.unbinned.fa", + "md5_checksum": "70d7c8a307f47adb05056bee1b01f9d4", + "id": "nmdc:70d7c8a307f47adb05056bee1b01f9d4", + "file_size_bytes": 15998690 + }, + { + "name": "Gp0115663_metabat2 bin checkm quality assessment result", + "description": "metabat2 bin checkm quality assessment result for Gp0115663", + "data_object_type": "CheckM Statistics", + "url": "https://data.microbiomedata.org/data/nmdc:mga0h9dt75/MAGs/nmdc_mga0h9dt75_checkm_qa.out", + "md5_checksum": "4545ab2039ae70f4439a93316f4fb7bc", + "id": "nmdc:4545ab2039ae70f4439a93316f4fb7bc", + "file_size_bytes": 1530 + }, + { + "name": "Gp0115663_high-quality and medium-quality bins", + "description": "high-quality and medium-quality bins for Gp0115663", + "data_object_type": "Metagenome Bins", + "url": "https://data.microbiomedata.org/data/nmdc:mga0h9dt75/MAGs/nmdc_mga0h9dt75_hqmq_bin.zip", + "md5_checksum": "280b63ae1cc1fa8d6154a0681d47c399", + "id": "nmdc:280b63ae1cc1fa8d6154a0681d47c399", + "file_size_bytes": 182 + }, + { + "name": "Gp0115663_metabat2 bins", + "description": "metabat2 bins for Gp0115663", + "url": "https://data.microbiomedata.org/data/nmdc:mga0h9dt75/MAGs/nmdc_mga0h9dt75_metabat_bin.zip", + "md5_checksum": "27c07072f175571200b5931550adb8aa", + "id": "nmdc:27c07072f175571200b5931550adb8aa", + "file_size_bytes": 1114314 + } + ], + "dissolving_activity_set": [], + "functional_annotation_set": [], + "genome_feature_set": [], + "mags_activity_set": [ { "_id": { - "$oid": "649b009d6bdd4fd20273c88b" + "$oid": "649b0052ec087f6bbab34734" }, "has_input": [ - "nmdc:30a06664f29cffbbbc49abad86eae6fc" + "nmdc:deddd162bf0128fba13b3bc1ca38d1aa", + "nmdc:31dc958d116d02122509e90b0883954f", + "nmdc:002e4ebc728f8b91cb5f298d340ab013" ], + "too_short_contig_num": 159810, "part_of": [ "nmdc:mga0h9dt75" ], + "binned_contig_num": 684, "git_url": "https://github.com/microbiomedata/metaG/releases/tag/0.1", "has_output": [ - "nmdc:7bf778baef033d36f118f8591256d6ef", - "nmdc:b99ce8adc125c95f0bfdadf36a3f6848" - ], - "was_informed_by": "gold:Gp0115663", - "input_read_count": 32238374, - "output_read_bases": 4608772924, - "id": "nmdc:b31abf9d7fe53e2f802bb53e2d13542b", - "execution_resource": "NERSC-Cori", - "input_read_bases": 4867994474, - "name": "Read QC Activity for nmdc:mga0h9dt75", - "output_read_count": 30774080, - "started_at_time": "2021-10-11T02:28:26Z", - "type": "nmdc:ReadQCAnalysisActivity", - "ended_at_time": "2021-10-11T04:56:04+00:00", - "output_data_objects": [ - { - "name": "Gp0115663_Filtered Reads", - "description": "Filtered Reads for Gp0115663", - "data_object_type": "Filtered Sequencing Reads", - "url": "https://data.microbiomedata.org/data/nmdc:mga0h9dt75/qa/nmdc_mga0h9dt75_filtered.fastq.gz", - "md5_checksum": "7bf778baef033d36f118f8591256d6ef", - "id": "nmdc:7bf778baef033d36f118f8591256d6ef", - "file_size_bytes": 2571324879 - }, - { - "name": "Gp0115663_Filtered Stats", - "description": "Filtered Stats for Gp0115663", - "data_object_type": "QC Statistics", - "url": "https://data.microbiomedata.org/data/nmdc:mga0h9dt75/qa/nmdc_mga0h9dt75_filterStats.txt", - "md5_checksum": "b99ce8adc125c95f0bfdadf36a3f6848", - "id": "nmdc:b99ce8adc125c95f0bfdadf36a3f6848", - "file_size_bytes": 290 - } - ] - }, - { - "_id": { - "$oid": "649b009bff710ae353f8cf4f" - }, - "has_input": [ - "nmdc:7bf778baef033d36f118f8591256d6ef" - ], - "git_url": "https://github.com/microbiomedata/metaG/releases/tag/0.1", - "has_output": [ - "nmdc:bc7c1bda004aab357c8f6cf5a42242f9", - "nmdc:9481434cadd0d6c154e2ec4c11ef0e04", - "nmdc:6b5bc6ce7f11c1336a5f85a98fc18541", - "nmdc:933c71bbc2f4a2e84d50f0d3864cf940", - "nmdc:1a208e2519770ef50740ac39f1b9ba9a", - "nmdc:f112a3840464ae7a9cf4a3bf295edd5c", - "nmdc:7ca01ea379f0baed96f87d1435925f95", - "nmdc:c85f2f2b4a518c4adb23970448a5cb45", - "nmdc:94ee1bc2dc74830a21d5c3471d6cf223" + "nmdc:d41d8cd98f00b204e9800998ecf8427e", + "nmdc:c092b018cb4652c4ca0620b37a4b3fad", + "nmdc:70d7c8a307f47adb05056bee1b01f9d4", + "nmdc:4545ab2039ae70f4439a93316f4fb7bc", + "nmdc:280b63ae1cc1fa8d6154a0681d47c399", + "nmdc:27c07072f175571200b5931550adb8aa" ], "was_informed_by": "gold:Gp0115663", + "input_contig_num": 169782, "id": "nmdc:b31abf9d7fe53e2f802bb53e2d13542b", "execution_resource": "NERSC-Cori", - "name": "ReadBased Analysis Activity for nmdc:mga0h9dt75", - "started_at_time": "2021-10-11T02:28:26Z", - "type": "nmdc:ReadbasedAnalysis", - "ended_at_time": "2021-10-11T04:56:04+00:00", - "output_data_objects": [ - { - "name": "Gp0115663_Gottcha2 TSV report", - "description": "Gottcha2 TSV report for Gp0115663", - "url": "https://data.microbiomedata.org/data/nmdc:mga0h9dt75/ReadbasedAnalysis/nmdc_mga0h9dt75_gottcha2_report.tsv", - "md5_checksum": "bc7c1bda004aab357c8f6cf5a42242f9", - "id": "nmdc:bc7c1bda004aab357c8f6cf5a42242f9", - "file_size_bytes": 13174 - }, - { - "name": "Gp0115663_Gottcha2 full TSV report", - "description": "Gottcha2 full TSV report for Gp0115663", - "url": "https://data.microbiomedata.org/data/nmdc:mga0h9dt75/ReadbasedAnalysis/nmdc_mga0h9dt75_gottcha2_report_full.tsv", - "md5_checksum": "9481434cadd0d6c154e2ec4c11ef0e04", - "id": "nmdc:9481434cadd0d6c154e2ec4c11ef0e04", - "file_size_bytes": 1035818 - }, - { - "name": "Gp0115663_Gottcha2 Krona HTML report", - "description": "Gottcha2 Krona HTML report for Gp0115663", - "data_object_type": "GOTTCHA2 Krona Plot", - "url": "https://data.microbiomedata.org/data/nmdc:mga0h9dt75/ReadbasedAnalysis/nmdc_mga0h9dt75_gottcha2_krona.html", - "md5_checksum": "6b5bc6ce7f11c1336a5f85a98fc18541", - "id": "nmdc:6b5bc6ce7f11c1336a5f85a98fc18541", - "file_size_bytes": 262669 - }, - { - "name": "Gp0115663_Centrifuge classification TSV report", - "description": "Centrifuge classification TSV report for Gp0115663", - "data_object_type": "Centrifuge Taxonomic Classification", - "url": "https://data.microbiomedata.org/data/nmdc:mga0h9dt75/ReadbasedAnalysis/nmdc_mga0h9dt75_centrifuge_classification.tsv", - "md5_checksum": "933c71bbc2f4a2e84d50f0d3864cf940", - "id": "nmdc:933c71bbc2f4a2e84d50f0d3864cf940", - "file_size_bytes": 2189843623 - }, + "name": "MAGs Analysis Activity for nmdc:mga0h9dt75", + "mags_list": [ { - "name": "Gp0115663_Centrifuge TSV report", - "description": "Centrifuge TSV report for Gp0115663", - "data_object_type": "Centrifuge Classification Report", - "url": "https://data.microbiomedata.org/data/nmdc:mga0h9dt75/ReadbasedAnalysis/nmdc_mga0h9dt75_centrifuge_report.tsv", - "md5_checksum": "1a208e2519770ef50740ac39f1b9ba9a", - "id": "nmdc:1a208e2519770ef50740ac39f1b9ba9a", - "file_size_bytes": 260134 + "number_of_contig": 61, + "completeness": 13.82, + "bin_name": "bins.1", + "gene_count": 294, + "bin_quality": "LQ", + "gtdbtk_species": "", + "gtdbtk_order": "", + "num_16s": 1, + "gtdbtk_family": "", + "gtdbtk_domain": "", + "contamination": 0.62, + "gtdbtk_class": "", + "gtdbtk_phylum": "", + "num_5s": 1, + "num_23s": 0, + "gtdbtk_genus": "", + "num_t_rna": 0 }, { - "name": "Gp0115663_Centrifuge Krona HTML report", - "description": "Centrifuge Krona HTML report for Gp0115663", - "data_object_type": "Centrifuge Krona Plot", - "url": "https://data.microbiomedata.org/data/nmdc:mga0h9dt75/ReadbasedAnalysis/nmdc_mga0h9dt75_centrifuge_krona.html", - "md5_checksum": "f112a3840464ae7a9cf4a3bf295edd5c", - "id": "nmdc:f112a3840464ae7a9cf4a3bf295edd5c", - "file_size_bytes": 2343980 + "number_of_contig": 485, + "completeness": 66.03, + "bin_name": "bins.2", + "gene_count": 2871, + "bin_quality": "LQ", + "gtdbtk_species": "", + "gtdbtk_order": "", + "num_16s": 0, + "gtdbtk_family": "", + "gtdbtk_domain": "", + "contamination": 10.87, + "gtdbtk_class": "", + "gtdbtk_phylum": "", + "num_5s": 1, + "num_23s": 0, + "gtdbtk_genus": "", + "num_t_rna": 32 }, { - "name": "Gp0115663_Kraken classification TSV report", - "description": "Kraken classification TSV report for Gp0115663", - "data_object_type": "Kraken2 Taxonomic Classification", - "url": "https://data.microbiomedata.org/data/nmdc:mga0h9dt75/ReadbasedAnalysis/nmdc_mga0h9dt75_kraken2_classification.tsv", - "md5_checksum": "7ca01ea379f0baed96f87d1435925f95", - "id": "nmdc:7ca01ea379f0baed96f87d1435925f95", - "file_size_bytes": 1785563917 + "number_of_contig": 56, + "completeness": 34.23, + "bin_name": "bins.3", + "gene_count": 337, + "bin_quality": "LQ", + "gtdbtk_species": "", + "gtdbtk_order": "", + "num_16s": 0, + "gtdbtk_family": "", + "gtdbtk_domain": "", + "contamination": 0.0, + "gtdbtk_class": "", + "gtdbtk_phylum": "", + "num_5s": 0, + "num_23s": 0, + "gtdbtk_genus": "", + "num_t_rna": 9 }, { - "name": "Gp0115663_Kraken2 TSV report", - "description": "Kraken2 TSV report for Gp0115663", - "data_object_type": "Kraken2 Classification Report", - "url": "https://data.microbiomedata.org/data/nmdc:mga0h9dt75/ReadbasedAnalysis/nmdc_mga0h9dt75_kraken2_report.tsv", - "md5_checksum": "c85f2f2b4a518c4adb23970448a5cb45", - "id": "nmdc:c85f2f2b4a518c4adb23970448a5cb45", - "file_size_bytes": 699896 + "number_of_contig": 63, + "completeness": 6.9, + "bin_name": "bins.4", + "gene_count": 276, + "bin_quality": "LQ", + "gtdbtk_species": "", + "gtdbtk_order": "", + "num_16s": 0, + "gtdbtk_family": "", + "gtdbtk_domain": "", + "contamination": 0.0, + "gtdbtk_class": "", + "gtdbtk_phylum": "", + "num_5s": 0, + "num_23s": 0, + "gtdbtk_genus": "", + "num_t_rna": 6 }, { - "name": "Gp0115663_Kraken2 Krona HTML report", - "description": "Kraken2 Krona HTML report for Gp0115663", - "data_object_type": "Kraken2 Krona Plot", - "url": "https://data.microbiomedata.org/data/nmdc:mga0h9dt75/ReadbasedAnalysis/nmdc_mga0h9dt75_kraken2_krona.html", - "md5_checksum": "94ee1bc2dc74830a21d5c3471d6cf223", - "id": "nmdc:94ee1bc2dc74830a21d5c3471d6cf223", - "file_size_bytes": 4221977 + "number_of_contig": 19, + "completeness": 4.45, + "bin_name": "bins.5", + "gene_count": 463, + "bin_quality": "LQ", + "gtdbtk_species": "", + "gtdbtk_order": "", + "num_16s": 0, + "gtdbtk_family": "", + "gtdbtk_domain": "", + "contamination": 0.0, + "gtdbtk_class": "", + "gtdbtk_phylum": "", + "num_5s": 0, + "num_23s": 0, + "gtdbtk_genus": "", + "num_t_rna": 4 } - ] - }, + ], + "unbinned_contig_num": 9288, + "started_at_time": "2021-10-11T02:28:26Z", + "type": "nmdc:MAGsAnalysisActivity", + "ended_at_time": "2021-10-11T04:56:04+00:00" + } + ], + "material_sample_set": [], + "material_sampling_activity_set": [], + "metabolomics_analysis_activity_set": [], + "metagenome_annotation_activity_set": [ { "_id": { - "$oid": "61e71a31833bcf838a701ec1" + "$oid": "649b005bbf2caae0415ef9d6" }, "has_input": [ - "nmdc:7bf778baef033d36f118f8591256d6ef" + "nmdc:deddd162bf0128fba13b3bc1ca38d1aa" ], "part_of": [ "nmdc:mga0h9dt75" ], "git_url": "https://github.com/microbiomedata/metaG/releases/tag/0.1", "has_output": [ - "nmdc:bc7c1bda004aab357c8f6cf5a42242f9", - "nmdc:9481434cadd0d6c154e2ec4c11ef0e04", - "nmdc:6b5bc6ce7f11c1336a5f85a98fc18541", - "nmdc:933c71bbc2f4a2e84d50f0d3864cf940", - "nmdc:1a208e2519770ef50740ac39f1b9ba9a", - "nmdc:f112a3840464ae7a9cf4a3bf295edd5c", - "nmdc:7ca01ea379f0baed96f87d1435925f95", - "nmdc:c85f2f2b4a518c4adb23970448a5cb45", - "nmdc:94ee1bc2dc74830a21d5c3471d6cf223" + "nmdc:879988d212ecec46928b8598e2f8391f", + "nmdc:884b95102f5965cc0ee2d9b7f198e5a4", + "nmdc:002e4ebc728f8b91cb5f298d340ab013", + "nmdc:6851078f29716d89e3f41f0969ae7bf0", + "nmdc:4f88c89459f36655eb7c1eceec19602a", + "nmdc:a068b9ce6ebb7deb15ff932b513817a9", + "nmdc:618b18fa8635c80cc0091371f451a6f0", + "nmdc:17e55a1a1a133ffbf8cbe4024d997a6f", + "nmdc:8f80142c0f5723af5a3b44b7ff4e4339", + "nmdc:fdd2e8741ffef40db383674a10bb4d11", + "nmdc:8eb49ac20a6c2721d6db227f4fb3356a", + "nmdc:75f481e0d98793cfb4f9508cb3e31622" ], "was_informed_by": "gold:Gp0115663", "id": "nmdc:b31abf9d7fe53e2f802bb53e2d13542b", "execution_resource": "NERSC-Cori", - "name": "ReadBased Analysis Activity for nmdc:mga0h9dt75", + "name": "Annotation Activity for nmdc:mga0h9dt75", "started_at_time": "2021-10-11T02:28:26Z", - "type": "nmdc:ReadbasedAnalysis", - "ended_at_time": "2021-10-11T04:56:04+00:00", - "output_data_objects": [ - { - "name": "Gp0115663_Gottcha2 TSV report", - "description": "Gottcha2 TSV report for Gp0115663", - "url": "https://data.microbiomedata.org/data/nmdc:mga0h9dt75/ReadbasedAnalysis/nmdc_mga0h9dt75_gottcha2_report.tsv", - "md5_checksum": "bc7c1bda004aab357c8f6cf5a42242f9", - "id": "nmdc:bc7c1bda004aab357c8f6cf5a42242f9", - "file_size_bytes": 13174 - }, - { - "name": "Gp0115663_Gottcha2 full TSV report", - "description": "Gottcha2 full TSV report for Gp0115663", - "url": "https://data.microbiomedata.org/data/nmdc:mga0h9dt75/ReadbasedAnalysis/nmdc_mga0h9dt75_gottcha2_report_full.tsv", - "md5_checksum": "9481434cadd0d6c154e2ec4c11ef0e04", - "id": "nmdc:9481434cadd0d6c154e2ec4c11ef0e04", - "file_size_bytes": 1035818 - }, - { - "name": "Gp0115663_Gottcha2 Krona HTML report", - "description": "Gottcha2 Krona HTML report for Gp0115663", - "data_object_type": "GOTTCHA2 Krona Plot", - "url": "https://data.microbiomedata.org/data/nmdc:mga0h9dt75/ReadbasedAnalysis/nmdc_mga0h9dt75_gottcha2_krona.html", - "md5_checksum": "6b5bc6ce7f11c1336a5f85a98fc18541", - "id": "nmdc:6b5bc6ce7f11c1336a5f85a98fc18541", - "file_size_bytes": 262669 - }, - { - "name": "Gp0115663_Centrifuge classification TSV report", - "description": "Centrifuge classification TSV report for Gp0115663", - "data_object_type": "Centrifuge Taxonomic Classification", - "url": "https://data.microbiomedata.org/data/nmdc:mga0h9dt75/ReadbasedAnalysis/nmdc_mga0h9dt75_centrifuge_classification.tsv", - "md5_checksum": "933c71bbc2f4a2e84d50f0d3864cf940", - "id": "nmdc:933c71bbc2f4a2e84d50f0d3864cf940", - "file_size_bytes": 2189843623 - }, - { - "name": "Gp0115663_Centrifuge TSV report", - "description": "Centrifuge TSV report for Gp0115663", - "data_object_type": "Centrifuge Classification Report", - "url": "https://data.microbiomedata.org/data/nmdc:mga0h9dt75/ReadbasedAnalysis/nmdc_mga0h9dt75_centrifuge_report.tsv", - "md5_checksum": "1a208e2519770ef50740ac39f1b9ba9a", - "id": "nmdc:1a208e2519770ef50740ac39f1b9ba9a", - "file_size_bytes": 260134 - }, - { - "name": "Gp0115663_Centrifuge Krona HTML report", - "description": "Centrifuge Krona HTML report for Gp0115663", - "data_object_type": "Centrifuge Krona Plot", - "url": "https://data.microbiomedata.org/data/nmdc:mga0h9dt75/ReadbasedAnalysis/nmdc_mga0h9dt75_centrifuge_krona.html", - "md5_checksum": "f112a3840464ae7a9cf4a3bf295edd5c", - "id": "nmdc:f112a3840464ae7a9cf4a3bf295edd5c", - "file_size_bytes": 2343980 - }, - { - "name": "Gp0115663_Kraken classification TSV report", - "description": "Kraken classification TSV report for Gp0115663", - "data_object_type": "Kraken2 Taxonomic Classification", - "url": "https://data.microbiomedata.org/data/nmdc:mga0h9dt75/ReadbasedAnalysis/nmdc_mga0h9dt75_kraken2_classification.tsv", - "md5_checksum": "7ca01ea379f0baed96f87d1435925f95", - "id": "nmdc:7ca01ea379f0baed96f87d1435925f95", - "file_size_bytes": 1785563917 - }, - { - "name": "Gp0115663_Kraken2 TSV report", - "description": "Kraken2 TSV report for Gp0115663", - "data_object_type": "Kraken2 Classification Report", - "url": "https://data.microbiomedata.org/data/nmdc:mga0h9dt75/ReadbasedAnalysis/nmdc_mga0h9dt75_kraken2_report.tsv", - "md5_checksum": "c85f2f2b4a518c4adb23970448a5cb45", - "id": "nmdc:c85f2f2b4a518c4adb23970448a5cb45", - "file_size_bytes": 699896 - }, - { - "name": "Gp0115663_Kraken2 Krona HTML report", - "description": "Kraken2 Krona HTML report for Gp0115663", - "data_object_type": "Kraken2 Krona Plot", - "url": "https://data.microbiomedata.org/data/nmdc:mga0h9dt75/ReadbasedAnalysis/nmdc_mga0h9dt75_kraken2_krona.html", - "md5_checksum": "94ee1bc2dc74830a21d5c3471d6cf223", - "id": "nmdc:94ee1bc2dc74830a21d5c3471d6cf223", - "file_size_bytes": 4221977 - } - ] - }, + "type": "nmdc:MetagenomeAnnotationActivity", + "ended_at_time": "2021-10-11T04:56:04+00:00" + } + ], + "metagenome_assembly_set": [ { "_id": { "$oid": "649b005f2ca5ee4adb139fb9" @@ -345,564 +603,645 @@ "scaf_n90": 141870, "scaf_l_gt50k": 68135, "scaf_n_gt50k": 1, - "scaf_pct_gt50k": 0.08160224, - "output_data_objects": [ - { - "name": "Gp0115663_Assembled contigs fasta", - "description": "Assembled contigs fasta for Gp0115663", - "data_object_type": "Assembly Contigs", - "url": "https://data.microbiomedata.org/data/nmdc:mga0h9dt75/assembly/nmdc_mga0h9dt75_contigs.fna", - "md5_checksum": "deddd162bf0128fba13b3bc1ca38d1aa", - "id": "nmdc:deddd162bf0128fba13b3bc1ca38d1aa", - "file_size_bytes": 90115831 - }, - { - "name": "Gp0115663_Assembled scaffold fasta", - "description": "Assembled scaffold fasta for Gp0115663", - "data_object_type": "Assembly Scaffolds", - "url": "https://data.microbiomedata.org/data/nmdc:mga0h9dt75/assembly/nmdc_mga0h9dt75_scaffolds.fna", - "md5_checksum": "b3573e3cda5a06611de71ca04c5c14cc", - "id": "nmdc:b3573e3cda5a06611de71ca04c5c14cc", - "file_size_bytes": 89604715 - }, - { - "name": "Gp0115663_Metagenome Contig Coverage Stats", - "description": "Metagenome Contig Coverage Stats for Gp0115663", - "url": "https://data.microbiomedata.org/data/nmdc:mga0h9dt75/assembly/nmdc_mga0h9dt75_covstats.txt", - "md5_checksum": "c6d0d4cea985ca6fb50a060e15b4a856", - "id": "nmdc:c6d0d4cea985ca6fb50a060e15b4a856", - "file_size_bytes": 13412363 - }, - { - "name": "Gp0115663_Assembled AGP file", - "description": "Assembled AGP file for Gp0115663", - "data_object_type": "Assembly AGP", - "url": "https://data.microbiomedata.org/data/nmdc:mga0h9dt75/assembly/nmdc_mga0h9dt75_assembly.agp", - "md5_checksum": "f450e3800e17691d5874c89fc46c186a", - "id": "nmdc:f450e3800e17691d5874c89fc46c186a", - "file_size_bytes": 12542171 - }, - { - "name": "Gp0115663_Metagenome Alignment BAM file", - "description": "Metagenome Alignment BAM file for Gp0115663", - "data_object_type": "Assembly Coverage BAM", - "url": "https://data.microbiomedata.org/data/nmdc:mga0h9dt75/assembly/nmdc_mga0h9dt75_pairedMapped_sorted.bam", - "md5_checksum": "31dc958d116d02122509e90b0883954f", - "id": "nmdc:31dc958d116d02122509e90b0883954f", - "file_size_bytes": 2773429299 - } + "scaf_pct_gt50k": 0.08160224 + } + ], + "metagenome_sequencing_activity_set": [], + "metaproteomics_analysis_activity_set": [], + "metatranscriptome_activity_set": [], + "nom_analysis_activity_set": [], + "omics_processing_set": [ + { + "_id": { + "$oid": "649b009773e8249959349b33" + }, + "id": "nmdc:omprc-11-bn8jcq58", + "name": "Sand microcosm microbial communities from a hyporheic zone in Columbia River, Washington, USA - GW-RW T2_23-Sept-14", + "description": "Sterilized sand packs were incubated back in the ground and collected at time point T2.", + "has_input": [ + "nmdc:bsm-11-qq8s6x03" + ], + "has_output": [ + "jgi:55d740280d8785342fcf7e39" + ], + "part_of": [ + "nmdc:sty-11-aygzgv51" + ], + "add_date": "2015-05-28", + "mod_date": "2021-06-15", + "ncbi_project_name": "Sand microcosm microbial communities from a hyporheic zone in Columbia River, Washington, USA - GW-RW T2_23-Sept-14", + "omics_type": { + "has_raw_value": "Metagenome" + }, + "principal_investigator": { + "has_raw_value": "James Stegen" + }, + "processing_institution": "JGI", + "type": "nmdc:OmicsProcessing", + "gold_sequencing_project_identifiers": [ + "gold:Gp0115663" ] - }, + } + ], + "reaction_activity_set": [], + "read_qc_analysis_activity_set": [ { "_id": { - "$oid": "649b005bbf2caae0415ef9d6" + "$oid": "649b009d6bdd4fd20273c88b" }, "has_input": [ - "nmdc:deddd162bf0128fba13b3bc1ca38d1aa" + "nmdc:30a06664f29cffbbbc49abad86eae6fc" ], "part_of": [ "nmdc:mga0h9dt75" ], "git_url": "https://github.com/microbiomedata/metaG/releases/tag/0.1", "has_output": [ - "nmdc:879988d212ecec46928b8598e2f8391f", - "nmdc:884b95102f5965cc0ee2d9b7f198e5a4", - "nmdc:002e4ebc728f8b91cb5f298d340ab013", - "nmdc:6851078f29716d89e3f41f0969ae7bf0", - "nmdc:4f88c89459f36655eb7c1eceec19602a", - "nmdc:a068b9ce6ebb7deb15ff932b513817a9", - "nmdc:618b18fa8635c80cc0091371f451a6f0", - "nmdc:17e55a1a1a133ffbf8cbe4024d997a6f", - "nmdc:8f80142c0f5723af5a3b44b7ff4e4339", - "nmdc:fdd2e8741ffef40db383674a10bb4d11", - "nmdc:8eb49ac20a6c2721d6db227f4fb3356a", - "nmdc:75f481e0d98793cfb4f9508cb3e31622" + "nmdc:7bf778baef033d36f118f8591256d6ef", + "nmdc:b99ce8adc125c95f0bfdadf36a3f6848" ], "was_informed_by": "gold:Gp0115663", + "input_read_count": 32238374, + "output_read_bases": 4608772924, "id": "nmdc:b31abf9d7fe53e2f802bb53e2d13542b", "execution_resource": "NERSC-Cori", - "name": "Annotation Activity for nmdc:mga0h9dt75", + "input_read_bases": 4867994474, + "name": "Read QC Activity for nmdc:mga0h9dt75", + "output_read_count": 30774080, "started_at_time": "2021-10-11T02:28:26Z", - "type": "nmdc:MetagenomeAnnotationActivity", - "ended_at_time": "2021-10-11T04:56:04+00:00", - "output_data_objects": [ - { - "name": "Gp0115663_Protein FAA", - "description": "Protein FAA for Gp0115663", - "data_object_type": "Annotation Amino Acid FASTA", - "url": "https://data.microbiomedata.org/data/nmdc:mga0h9dt75/annotation/nmdc_mga0h9dt75_proteins.faa", - "md5_checksum": "879988d212ecec46928b8598e2f8391f", - "id": "nmdc:879988d212ecec46928b8598e2f8391f", - "file_size_bytes": 50165060 - }, - { - "name": "Gp0115663_Structural annotation GFF file", - "description": "Structural annotation GFF file for Gp0115663", - "data_object_type": "Structural Annotation GFF", - "url": "https://data.microbiomedata.org/data/nmdc:mga0h9dt75/annotation/nmdc_mga0h9dt75_structural_annotation.gff", - "md5_checksum": "884b95102f5965cc0ee2d9b7f198e5a4", - "id": "nmdc:884b95102f5965cc0ee2d9b7f198e5a4", - "file_size_bytes": 2767 - }, - { - "name": "Gp0115663_Functional annotation GFF file", - "description": "Functional annotation GFF file for Gp0115663", - "data_object_type": "Functional Annotation GFF", - "url": "https://data.microbiomedata.org/data/nmdc:mga0h9dt75/annotation/nmdc_mga0h9dt75_functional_annotation.gff", - "md5_checksum": "002e4ebc728f8b91cb5f298d340ab013", - "id": "nmdc:002e4ebc728f8b91cb5f298d340ab013", - "file_size_bytes": 55139586 - }, - { - "name": "Gp0115663_KO TSV file", - "description": "KO TSV file for Gp0115663", - "data_object_type": "Annotation KEGG Orthology", - "url": "https://data.microbiomedata.org/data/nmdc:mga0h9dt75/annotation/nmdc_mga0h9dt75_ko.tsv", - "md5_checksum": "6851078f29716d89e3f41f0969ae7bf0", - "id": "nmdc:6851078f29716d89e3f41f0969ae7bf0", - "file_size_bytes": 6023696 - }, - { - "name": "Gp0115663_EC TSV file", - "description": "EC TSV file for Gp0115663", - "data_object_type": "Annotation Enzyme Commission", - "url": "https://data.microbiomedata.org/data/nmdc:mga0h9dt75/annotation/nmdc_mga0h9dt75_ec.tsv", - "md5_checksum": "4f88c89459f36655eb7c1eceec19602a", - "id": "nmdc:4f88c89459f36655eb7c1eceec19602a", - "file_size_bytes": 3982918 - }, - { - "name": "Gp0115663_COG GFF file", - "description": "COG GFF file for Gp0115663", - "url": "https://data.microbiomedata.org/data/nmdc:mga0h9dt75/annotation/nmdc_mga0h9dt75_cog.gff", - "md5_checksum": "a068b9ce6ebb7deb15ff932b513817a9", - "id": "nmdc:a068b9ce6ebb7deb15ff932b513817a9", - "file_size_bytes": 27362917 - }, - { - "name": "Gp0115663_PFAM GFF file", - "description": "PFAM GFF file for Gp0115663", - "url": "https://data.microbiomedata.org/data/nmdc:mga0h9dt75/annotation/nmdc_mga0h9dt75_pfam.gff", - "md5_checksum": "618b18fa8635c80cc0091371f451a6f0", - "id": "nmdc:618b18fa8635c80cc0091371f451a6f0", - "file_size_bytes": 21572048 - }, - { - "name": "Gp0115663_TigrFam GFF file", - "description": "TigrFam GFF file for Gp0115663", - "url": "https://data.microbiomedata.org/data/nmdc:mga0h9dt75/annotation/nmdc_mga0h9dt75_tigrfam.gff", - "md5_checksum": "17e55a1a1a133ffbf8cbe4024d997a6f", - "id": "nmdc:17e55a1a1a133ffbf8cbe4024d997a6f", - "file_size_bytes": 2900068 - }, - { - "name": "Gp0115663_SMART GFF file", - "description": "SMART GFF file for Gp0115663", - "url": "https://data.microbiomedata.org/data/nmdc:mga0h9dt75/annotation/nmdc_mga0h9dt75_smart.gff", - "md5_checksum": "8f80142c0f5723af5a3b44b7ff4e4339", - "id": "nmdc:8f80142c0f5723af5a3b44b7ff4e4339", - "file_size_bytes": 6905519 - }, - { - "name": "Gp0115663_SuperFam GFF file", - "description": "SuperFam GFF file for Gp0115663", - "url": "https://data.microbiomedata.org/data/nmdc:mga0h9dt75/annotation/nmdc_mga0h9dt75_supfam.gff", - "md5_checksum": "fdd2e8741ffef40db383674a10bb4d11", - "id": "nmdc:fdd2e8741ffef40db383674a10bb4d11", - "file_size_bytes": 38787856 - }, - { - "name": "Gp0115663_Cath FunFam GFF file", - "description": "Cath FunFam GFF file for Gp0115663", - "url": "https://data.microbiomedata.org/data/nmdc:mga0h9dt75/annotation/nmdc_mga0h9dt75_cath_funfam.gff", - "md5_checksum": "8eb49ac20a6c2721d6db227f4fb3356a", - "id": "nmdc:8eb49ac20a6c2721d6db227f4fb3356a", - "file_size_bytes": 30134783 - }, - { - "name": "Gp0115663_KO_EC GFF file", - "description": "KO_EC GFF file for Gp0115663", - "url": "https://data.microbiomedata.org/data/nmdc:mga0h9dt75/annotation/nmdc_mga0h9dt75_ko_ec.gff", - "md5_checksum": "75f481e0d98793cfb4f9508cb3e31622", - "id": "nmdc:75f481e0d98793cfb4f9508cb3e31622", - "file_size_bytes": 19194308 - } - ] - }, + "type": "nmdc:ReadQCAnalysisActivity", + "ended_at_time": "2021-10-11T04:56:04+00:00" + } + ], + "read_based_taxonomy_analysis_activity_set": [ { "_id": { - "$oid": "649b0052ec087f6bbab34734" + "$oid": "649b009bff710ae353f8cf4f" }, "has_input": [ - "nmdc:deddd162bf0128fba13b3bc1ca38d1aa", - "nmdc:31dc958d116d02122509e90b0883954f", - "nmdc:002e4ebc728f8b91cb5f298d340ab013" + "nmdc:7bf778baef033d36f118f8591256d6ef" + ], + "git_url": "https://github.com/microbiomedata/metaG/releases/tag/0.1", + "has_output": [ + "nmdc:bc7c1bda004aab357c8f6cf5a42242f9", + "nmdc:9481434cadd0d6c154e2ec4c11ef0e04", + "nmdc:6b5bc6ce7f11c1336a5f85a98fc18541", + "nmdc:933c71bbc2f4a2e84d50f0d3864cf940", + "nmdc:1a208e2519770ef50740ac39f1b9ba9a", + "nmdc:f112a3840464ae7a9cf4a3bf295edd5c", + "nmdc:7ca01ea379f0baed96f87d1435925f95", + "nmdc:c85f2f2b4a518c4adb23970448a5cb45", + "nmdc:94ee1bc2dc74830a21d5c3471d6cf223" + ], + "was_informed_by": "gold:Gp0115663", + "id": "nmdc:b31abf9d7fe53e2f802bb53e2d13542b", + "execution_resource": "NERSC-Cori", + "name": "ReadBased Analysis Activity for nmdc:mga0h9dt75", + "started_at_time": "2021-10-11T02:28:26Z", + "type": "nmdc:ReadbasedAnalysis", + "ended_at_time": "2021-10-11T04:56:04+00:00" + } + ], + "study_set": [], + "field_research_site_set": [], + "collecting_biosamples_from_site_set": [], + "date_created": null, + "etl_software_version": null, + "pooling_set": [], + "read_based_analysis_activity_set": [ + { + "_id": { + "$oid": "61e71a31833bcf838a701ec1" + }, + "has_input": [ + "nmdc:7bf778baef033d36f118f8591256d6ef" ], - "too_short_contig_num": 159810, "part_of": [ "nmdc:mga0h9dt75" ], - "binned_contig_num": 684, "git_url": "https://github.com/microbiomedata/metaG/releases/tag/0.1", "has_output": [ - "nmdc:d41d8cd98f00b204e9800998ecf8427e", - "nmdc:c092b018cb4652c4ca0620b37a4b3fad", - "nmdc:70d7c8a307f47adb05056bee1b01f9d4", - "nmdc:4545ab2039ae70f4439a93316f4fb7bc", - "nmdc:280b63ae1cc1fa8d6154a0681d47c399", - "nmdc:27c07072f175571200b5931550adb8aa" + "nmdc:bc7c1bda004aab357c8f6cf5a42242f9", + "nmdc:9481434cadd0d6c154e2ec4c11ef0e04", + "nmdc:6b5bc6ce7f11c1336a5f85a98fc18541", + "nmdc:933c71bbc2f4a2e84d50f0d3864cf940", + "nmdc:1a208e2519770ef50740ac39f1b9ba9a", + "nmdc:f112a3840464ae7a9cf4a3bf295edd5c", + "nmdc:7ca01ea379f0baed96f87d1435925f95", + "nmdc:c85f2f2b4a518c4adb23970448a5cb45", + "nmdc:94ee1bc2dc74830a21d5c3471d6cf223" ], "was_informed_by": "gold:Gp0115663", - "input_contig_num": 169782, "id": "nmdc:b31abf9d7fe53e2f802bb53e2d13542b", "execution_resource": "NERSC-Cori", - "name": "MAGs Analysis Activity for nmdc:mga0h9dt75", - "mags_list": [ - { - "number_of_contig": 61, - "completeness": 13.82, - "bin_name": "bins.1", - "gene_count": 294, - "bin_quality": "LQ", - "gtdbtk_species": "", - "gtdbtk_order": "", - "num_16s": 1, - "gtdbtk_family": "", - "gtdbtk_domain": "", - "contamination": 0.62, - "gtdbtk_class": "", - "gtdbtk_phylum": "", - "num_5s": 1, - "num_23s": 0, - "gtdbtk_genus": "", - "num_t_rna": 0 - }, - { - "number_of_contig": 485, - "completeness": 66.03, - "bin_name": "bins.2", - "gene_count": 2871, - "bin_quality": "LQ", - "gtdbtk_species": "", - "gtdbtk_order": "", - "num_16s": 0, - "gtdbtk_family": "", - "gtdbtk_domain": "", - "contamination": 10.87, - "gtdbtk_class": "", - "gtdbtk_phylum": "", - "num_5s": 1, - "num_23s": 0, - "gtdbtk_genus": "", - "num_t_rna": 32 - }, - { - "number_of_contig": 56, - "completeness": 34.23, - "bin_name": "bins.3", - "gene_count": 337, - "bin_quality": "LQ", - "gtdbtk_species": "", - "gtdbtk_order": "", - "num_16s": 0, - "gtdbtk_family": "", - "gtdbtk_domain": "", - "contamination": 0.0, - "gtdbtk_class": "", - "gtdbtk_phylum": "", - "num_5s": 0, - "num_23s": 0, - "gtdbtk_genus": "", - "num_t_rna": 9 - }, - { - "number_of_contig": 63, - "completeness": 6.9, - "bin_name": "bins.4", - "gene_count": 276, - "bin_quality": "LQ", - "gtdbtk_species": "", - "gtdbtk_order": "", - "num_16s": 0, - "gtdbtk_family": "", - "gtdbtk_domain": "", - "contamination": 0.0, - "gtdbtk_class": "", - "gtdbtk_phylum": "", - "num_5s": 0, - "num_23s": 0, - "gtdbtk_genus": "", - "num_t_rna": 6 - }, - { - "number_of_contig": 19, - "completeness": 4.45, - "bin_name": "bins.5", - "gene_count": 463, - "bin_quality": "LQ", - "gtdbtk_species": "", - "gtdbtk_order": "", - "num_16s": 0, - "gtdbtk_family": "", - "gtdbtk_domain": "", - "contamination": 0.0, - "gtdbtk_class": "", - "gtdbtk_phylum": "", - "num_5s": 0, - "num_23s": 0, - "gtdbtk_genus": "", - "num_t_rna": 4 - } - ], - "unbinned_contig_num": 9288, + "name": "ReadBased Analysis Activity for nmdc:mga0h9dt75", "started_at_time": "2021-10-11T02:28:26Z", - "type": "nmdc:MAGsAnalysisActivity", - "ended_at_time": "2021-10-11T04:56:04+00:00", - "output_data_objects": [ - { - "name": "gold:Gp0452679_metabat2 bin checkm quality assessment result", - "description": "metabat2 bin checkm quality assessment result for gold:Gp0452679", - "data_object_type": "CheckM Statistics", - "url": "https://data.microbiomedata.org/data/nmdc:mga0fsjy84/MAGs/nmdc_mga0fsjy84_checkm_qa.out", - "md5_checksum": "d41d8cd98f00b204e9800998ecf8427e", - "id": "nmdc:d41d8cd98f00b204e9800998ecf8427e", - "file_size_bytes": 0 - }, - { - "name": "Gp0115663_tooShort (< 3kb) filtered contigs fasta file by metaBat2", - "description": "tooShort (< 3kb) filtered contigs fasta file by metaBat2 for Gp0115663", - "url": "https://data.microbiomedata.org/data/nmdc:mga0h9dt75/MAGs/nmdc_mga0h9dt75_bins.tooShort.fa", - "md5_checksum": "c092b018cb4652c4ca0620b37a4b3fad", - "id": "nmdc:c092b018cb4652c4ca0620b37a4b3fad", - "file_size_bytes": 70411007 - }, - { - "name": "Gp0115663_unbinned fasta file from metabat2", - "description": "unbinned fasta file from metabat2 for Gp0115663", - "url": "https://data.microbiomedata.org/data/nmdc:mga0h9dt75/MAGs/nmdc_mga0h9dt75_bins.unbinned.fa", - "md5_checksum": "70d7c8a307f47adb05056bee1b01f9d4", - "id": "nmdc:70d7c8a307f47adb05056bee1b01f9d4", - "file_size_bytes": 15998690 - }, - { - "name": "Gp0115663_metabat2 bin checkm quality assessment result", - "description": "metabat2 bin checkm quality assessment result for Gp0115663", - "data_object_type": "CheckM Statistics", - "url": "https://data.microbiomedata.org/data/nmdc:mga0h9dt75/MAGs/nmdc_mga0h9dt75_checkm_qa.out", - "md5_checksum": "4545ab2039ae70f4439a93316f4fb7bc", - "id": "nmdc:4545ab2039ae70f4439a93316f4fb7bc", - "file_size_bytes": 1530 - }, - { - "name": "Gp0115663_high-quality and medium-quality bins", - "description": "high-quality and medium-quality bins for Gp0115663", - "data_object_type": "Metagenome Bins", - "url": "https://data.microbiomedata.org/data/nmdc:mga0h9dt75/MAGs/nmdc_mga0h9dt75_hqmq_bin.zip", - "md5_checksum": "280b63ae1cc1fa8d6154a0681d47c399", - "id": "nmdc:280b63ae1cc1fa8d6154a0681d47c399", - "file_size_bytes": 182 - }, - { - "name": "Gp0115663_metabat2 bins", - "description": "metabat2 bins for Gp0115663", - "url": "https://data.microbiomedata.org/data/nmdc:mga0h9dt75/MAGs/nmdc_mga0h9dt75_metabat_bin.zip", - "md5_checksum": "27c07072f175571200b5931550adb8aa", - "id": "nmdc:27c07072f175571200b5931550adb8aa", - "file_size_bytes": 1114314 - } - ] + "type": "nmdc:ReadbasedAnalysis", + "ended_at_time": "2021-10-11T04:56:04+00:00" } ] }, { - "_id": { - "$oid": "649b009773e8249959349b34" - }, - "id": "nmdc:omprc-11-zp2ar437", - "name": "Sand microcosm microbial communities from a hyporheic zone in Columbia River, Washington, USA - GW-RW T3_23-Sept-14", - "description": "Sterilized sand packs were incubated back in the ground and collected at time point T3.", - "has_input": [ - "nmdc:bsm-11-4qsqg549" - ], - "has_output": [ - "jgi:55d817fc0d8785342fcf8274" - ], - "part_of": [ - "nmdc:sty-11-aygzgv51" - ], - "add_date": "2015-05-28", - "mod_date": "2021-06-15", - "ncbi_project_name": "Sand microcosm microbial communities from a hyporheic zone in Columbia River, Washington, USA - GW-RW T3_23-Sept-14", - "omics_type": { - "has_raw_value": "Metagenome" - }, - "principal_investigator": { - "has_raw_value": "James Stegen" - }, - "processing_institution": "JGI", - "type": "nmdc:OmicsProcessing", - "gold_sequencing_project_identifiers": [ - "gold:Gp0115666" - ], - "downstream_workflow_activity_records": [ + "functional_annotation_agg": [], + "library_preparation_set": [], + "processed_sample_set": [], + "extraction_set": [], + "activity_set": [], + "biosample_set": [], + "data_object_set": [ + { + "name": "Gp0115666_Filtered Reads", + "description": "Filtered Reads for Gp0115666", + "data_object_type": "Filtered Sequencing Reads", + "url": "https://data.microbiomedata.org/data/nmdc:mga0eehe16/qa/nmdc_mga0eehe16_filtered.fastq.gz", + "md5_checksum": "0b301d2dd917c2be31422dd0e986dd5e", + "id": "nmdc:0b301d2dd917c2be31422dd0e986dd5e", + "file_size_bytes": 1806510860 + }, + { + "name": "Gp0115666_Filtered Stats", + "description": "Filtered Stats for Gp0115666", + "data_object_type": "QC Statistics", + "url": "https://data.microbiomedata.org/data/nmdc:mga0eehe16/qa/nmdc_mga0eehe16_filterStats.txt", + "md5_checksum": "0634e8261ce976d167457993d7f7a4ec", + "id": "nmdc:0634e8261ce976d167457993d7f7a4ec", + "file_size_bytes": 289 + }, + { + "name": "Gp0115666_Gottcha2 TSV report", + "description": "Gottcha2 TSV report for Gp0115666", + "url": "https://data.microbiomedata.org/data/nmdc:mga0eehe16/ReadbasedAnalysis/nmdc_mga0eehe16_gottcha2_report.tsv", + "md5_checksum": "17454627f873cc37e80700c4751c81d6", + "id": "nmdc:17454627f873cc37e80700c4751c81d6", + "file_size_bytes": 10721 + }, + { + "name": "Gp0115666_Gottcha2 full TSV report", + "description": "Gottcha2 full TSV report for Gp0115666", + "url": "https://data.microbiomedata.org/data/nmdc:mga0eehe16/ReadbasedAnalysis/nmdc_mga0eehe16_gottcha2_report_full.tsv", + "md5_checksum": "e0479eb7fd3345aaf134640e0b9e11b0", + "id": "nmdc:e0479eb7fd3345aaf134640e0b9e11b0", + "file_size_bytes": 920924 + }, + { + "name": "Gp0115666_Gottcha2 Krona HTML report", + "description": "Gottcha2 Krona HTML report for Gp0115666", + "data_object_type": "GOTTCHA2 Krona Plot", + "url": "https://data.microbiomedata.org/data/nmdc:mga0eehe16/ReadbasedAnalysis/nmdc_mga0eehe16_gottcha2_krona.html", + "md5_checksum": "a8433a0b17d7380fc836e4c9f85a7a54", + "id": "nmdc:a8433a0b17d7380fc836e4c9f85a7a54", + "file_size_bytes": 257441 + }, + { + "name": "Gp0115666_Centrifuge classification TSV report", + "description": "Centrifuge classification TSV report for Gp0115666", + "data_object_type": "Centrifuge Taxonomic Classification", + "url": "https://data.microbiomedata.org/data/nmdc:mga0eehe16/ReadbasedAnalysis/nmdc_mga0eehe16_centrifuge_classification.tsv", + "md5_checksum": "9e061ad19d4a6a3f209d1992d02df9f9", + "id": "nmdc:9e061ad19d4a6a3f209d1992d02df9f9", + "file_size_bytes": 1468295025 + }, + { + "name": "Gp0115666_Centrifuge TSV report", + "description": "Centrifuge TSV report for Gp0115666", + "data_object_type": "Centrifuge Classification Report", + "url": "https://data.microbiomedata.org/data/nmdc:mga0eehe16/ReadbasedAnalysis/nmdc_mga0eehe16_centrifuge_report.tsv", + "md5_checksum": "1d46eebd0f194f57dd9e92c9bc992891", + "id": "nmdc:1d46eebd0f194f57dd9e92c9bc992891", + "file_size_bytes": 257081 + }, + { + "name": "Gp0115666_Centrifuge Krona HTML report", + "description": "Centrifuge Krona HTML report for Gp0115666", + "data_object_type": "Centrifuge Krona Plot", + "url": "https://data.microbiomedata.org/data/nmdc:mga0eehe16/ReadbasedAnalysis/nmdc_mga0eehe16_centrifuge_krona.html", + "md5_checksum": "e5227b1cfdbc266c44d23028c92150a9", + "id": "nmdc:e5227b1cfdbc266c44d23028c92150a9", + "file_size_bytes": 2331968 + }, + { + "name": "Gp0115666_Kraken classification TSV report", + "description": "Kraken classification TSV report for Gp0115666", + "data_object_type": "Kraken2 Taxonomic Classification", + "url": "https://data.microbiomedata.org/data/nmdc:mga0eehe16/ReadbasedAnalysis/nmdc_mga0eehe16_kraken2_classification.tsv", + "md5_checksum": "05f7680c6646904cfb16fc146c0fed4a", + "id": "nmdc:05f7680c6646904cfb16fc146c0fed4a", + "file_size_bytes": 1204548180 + }, + { + "name": "Gp0115666_Kraken2 TSV report", + "description": "Kraken2 TSV report for Gp0115666", + "data_object_type": "Kraken2 Classification Report", + "url": "https://data.microbiomedata.org/data/nmdc:mga0eehe16/ReadbasedAnalysis/nmdc_mga0eehe16_kraken2_report.tsv", + "md5_checksum": "368cf81424348cdf46d17c13908280e7", + "id": "nmdc:368cf81424348cdf46d17c13908280e7", + "file_size_bytes": 653697 + }, + { + "name": "Gp0115666_Kraken2 Krona HTML report", + "description": "Kraken2 Krona HTML report for Gp0115666", + "data_object_type": "Kraken2 Krona Plot", + "url": "https://data.microbiomedata.org/data/nmdc:mga0eehe16/ReadbasedAnalysis/nmdc_mga0eehe16_kraken2_krona.html", + "md5_checksum": "b5091cfeed4fbea8316e50fbceea89bc", + "id": "nmdc:b5091cfeed4fbea8316e50fbceea89bc", + "file_size_bytes": 3983935 + }, + { + "name": "Gp0115666_Gottcha2 TSV report", + "description": "Gottcha2 TSV report for Gp0115666", + "url": "https://data.microbiomedata.org/data/nmdc:mga0eehe16/ReadbasedAnalysis/nmdc_mga0eehe16_gottcha2_report.tsv", + "md5_checksum": "17454627f873cc37e80700c4751c81d6", + "id": "nmdc:17454627f873cc37e80700c4751c81d6", + "file_size_bytes": 10721 + }, + { + "name": "Gp0115666_Gottcha2 full TSV report", + "description": "Gottcha2 full TSV report for Gp0115666", + "url": "https://data.microbiomedata.org/data/nmdc:mga0eehe16/ReadbasedAnalysis/nmdc_mga0eehe16_gottcha2_report_full.tsv", + "md5_checksum": "e0479eb7fd3345aaf134640e0b9e11b0", + "id": "nmdc:e0479eb7fd3345aaf134640e0b9e11b0", + "file_size_bytes": 920924 + }, + { + "name": "Gp0115666_Gottcha2 Krona HTML report", + "description": "Gottcha2 Krona HTML report for Gp0115666", + "data_object_type": "GOTTCHA2 Krona Plot", + "url": "https://data.microbiomedata.org/data/nmdc:mga0eehe16/ReadbasedAnalysis/nmdc_mga0eehe16_gottcha2_krona.html", + "md5_checksum": "a8433a0b17d7380fc836e4c9f85a7a54", + "id": "nmdc:a8433a0b17d7380fc836e4c9f85a7a54", + "file_size_bytes": 257441 + }, + { + "name": "Gp0115666_Centrifuge classification TSV report", + "description": "Centrifuge classification TSV report for Gp0115666", + "data_object_type": "Centrifuge Taxonomic Classification", + "url": "https://data.microbiomedata.org/data/nmdc:mga0eehe16/ReadbasedAnalysis/nmdc_mga0eehe16_centrifuge_classification.tsv", + "md5_checksum": "9e061ad19d4a6a3f209d1992d02df9f9", + "id": "nmdc:9e061ad19d4a6a3f209d1992d02df9f9", + "file_size_bytes": 1468295025 + }, + { + "name": "Gp0115666_Centrifuge TSV report", + "description": "Centrifuge TSV report for Gp0115666", + "data_object_type": "Centrifuge Classification Report", + "url": "https://data.microbiomedata.org/data/nmdc:mga0eehe16/ReadbasedAnalysis/nmdc_mga0eehe16_centrifuge_report.tsv", + "md5_checksum": "1d46eebd0f194f57dd9e92c9bc992891", + "id": "nmdc:1d46eebd0f194f57dd9e92c9bc992891", + "file_size_bytes": 257081 + }, + { + "name": "Gp0115666_Centrifuge Krona HTML report", + "description": "Centrifuge Krona HTML report for Gp0115666", + "data_object_type": "Centrifuge Krona Plot", + "url": "https://data.microbiomedata.org/data/nmdc:mga0eehe16/ReadbasedAnalysis/nmdc_mga0eehe16_centrifuge_krona.html", + "md5_checksum": "e5227b1cfdbc266c44d23028c92150a9", + "id": "nmdc:e5227b1cfdbc266c44d23028c92150a9", + "file_size_bytes": 2331968 + }, + { + "name": "Gp0115666_Kraken classification TSV report", + "description": "Kraken classification TSV report for Gp0115666", + "data_object_type": "Kraken2 Taxonomic Classification", + "url": "https://data.microbiomedata.org/data/nmdc:mga0eehe16/ReadbasedAnalysis/nmdc_mga0eehe16_kraken2_classification.tsv", + "md5_checksum": "05f7680c6646904cfb16fc146c0fed4a", + "id": "nmdc:05f7680c6646904cfb16fc146c0fed4a", + "file_size_bytes": 1204548180 + }, + { + "name": "Gp0115666_Kraken2 TSV report", + "description": "Kraken2 TSV report for Gp0115666", + "data_object_type": "Kraken2 Classification Report", + "url": "https://data.microbiomedata.org/data/nmdc:mga0eehe16/ReadbasedAnalysis/nmdc_mga0eehe16_kraken2_report.tsv", + "md5_checksum": "368cf81424348cdf46d17c13908280e7", + "id": "nmdc:368cf81424348cdf46d17c13908280e7", + "file_size_bytes": 653697 + }, + { + "name": "Gp0115666_Kraken2 Krona HTML report", + "description": "Kraken2 Krona HTML report for Gp0115666", + "data_object_type": "Kraken2 Krona Plot", + "url": "https://data.microbiomedata.org/data/nmdc:mga0eehe16/ReadbasedAnalysis/nmdc_mga0eehe16_kraken2_krona.html", + "md5_checksum": "b5091cfeed4fbea8316e50fbceea89bc", + "id": "nmdc:b5091cfeed4fbea8316e50fbceea89bc", + "file_size_bytes": 3983935 + }, + { + "name": "Gp0115666_Assembled contigs fasta", + "description": "Assembled contigs fasta for Gp0115666", + "data_object_type": "Assembly Contigs", + "url": "https://data.microbiomedata.org/data/nmdc:mga0eehe16/assembly/nmdc_mga0eehe16_contigs.fna", + "md5_checksum": "e557facdf4c3066ba4b5ba168995ba85", + "id": "nmdc:e557facdf4c3066ba4b5ba168995ba85", + "file_size_bytes": 63269472 + }, + { + "name": "Gp0115666_Assembled scaffold fasta", + "description": "Assembled scaffold fasta for Gp0115666", + "data_object_type": "Assembly Scaffolds", + "url": "https://data.microbiomedata.org/data/nmdc:mga0eehe16/assembly/nmdc_mga0eehe16_scaffolds.fna", + "md5_checksum": "92cc678ca9e54cb92118b9ae746fb996", + "id": "nmdc:92cc678ca9e54cb92118b9ae746fb996", + "file_size_bytes": 62917914 + }, + { + "name": "Gp0115666_Metagenome Contig Coverage Stats", + "description": "Metagenome Contig Coverage Stats for Gp0115666", + "url": "https://data.microbiomedata.org/data/nmdc:mga0eehe16/assembly/nmdc_mga0eehe16_covstats.txt", + "md5_checksum": "7082b41c627571a03466f94ba80c15b8", + "id": "nmdc:7082b41c627571a03466f94ba80c15b8", + "file_size_bytes": 9179769 + }, + { + "name": "Gp0115666_Assembled AGP file", + "description": "Assembled AGP file for Gp0115666", + "data_object_type": "Assembly AGP", + "url": "https://data.microbiomedata.org/data/nmdc:mga0eehe16/assembly/nmdc_mga0eehe16_assembly.agp", + "md5_checksum": "c5ccd39d97d652d5ec8804202a324b0e", + "id": "nmdc:c5ccd39d97d652d5ec8804202a324b0e", + "file_size_bytes": 8550216 + }, + { + "name": "Gp0115666_Metagenome Alignment BAM file", + "description": "Metagenome Alignment BAM file for Gp0115666", + "data_object_type": "Assembly Coverage BAM", + "url": "https://data.microbiomedata.org/data/nmdc:mga0eehe16/assembly/nmdc_mga0eehe16_pairedMapped_sorted.bam", + "md5_checksum": "3ece2c377622cebdddfb9322047cb115", + "id": "nmdc:3ece2c377622cebdddfb9322047cb115", + "file_size_bytes": 1940309089 + }, + { + "name": "Gp0115666_Protein FAA", + "description": "Protein FAA for Gp0115666", + "data_object_type": "Annotation Amino Acid FASTA", + "url": "https://data.microbiomedata.org/data/nmdc:mga0eehe16/annotation/nmdc_mga0eehe16_proteins.faa", + "md5_checksum": "4d509c29cad07f0b18d3f7e0e724c493", + "id": "nmdc:4d509c29cad07f0b18d3f7e0e724c493", + "file_size_bytes": 35706777 + }, + { + "name": "Gp0115666_Structural annotation GFF file", + "description": "Structural annotation GFF file for Gp0115666", + "data_object_type": "Structural Annotation GFF", + "url": "https://data.microbiomedata.org/data/nmdc:mga0eehe16/annotation/nmdc_mga0eehe16_structural_annotation.gff", + "md5_checksum": "60d04bb0a2d1a1d593bd849a2a13e405", + "id": "nmdc:60d04bb0a2d1a1d593bd849a2a13e405", + "file_size_bytes": 2520 + }, + { + "name": "Gp0115666_Functional annotation GFF file", + "description": "Functional annotation GFF file for Gp0115666", + "data_object_type": "Functional Annotation GFF", + "url": "https://data.microbiomedata.org/data/nmdc:mga0eehe16/annotation/nmdc_mga0eehe16_functional_annotation.gff", + "md5_checksum": "91cd273ea95a29b2c4e326c56eafe08a", + "id": "nmdc:91cd273ea95a29b2c4e326c56eafe08a", + "file_size_bytes": 40030386 + }, + { + "name": "Gp0115666_KO TSV file", + "description": "KO TSV file for Gp0115666", + "data_object_type": "Annotation KEGG Orthology", + "url": "https://data.microbiomedata.org/data/nmdc:mga0eehe16/annotation/nmdc_mga0eehe16_ko.tsv", + "md5_checksum": "e08c6253ec5a15eb43d8cb4d69d09d4c", + "id": "nmdc:e08c6253ec5a15eb43d8cb4d69d09d4c", + "file_size_bytes": 5584125 + }, + { + "name": "Gp0115666_EC TSV file", + "description": "EC TSV file for Gp0115666", + "data_object_type": "Annotation Enzyme Commission", + "url": "https://data.microbiomedata.org/data/nmdc:mga0eehe16/annotation/nmdc_mga0eehe16_ec.tsv", + "md5_checksum": "9edfc4fee191b722148af1e2648f787f", + "id": "nmdc:9edfc4fee191b722148af1e2648f787f", + "file_size_bytes": 3575242 + }, + { + "name": "Gp0115666_COG GFF file", + "description": "COG GFF file for Gp0115666", + "url": "https://data.microbiomedata.org/data/nmdc:mga0eehe16/annotation/nmdc_mga0eehe16_cog.gff", + "md5_checksum": "886402044865256b80bfaf42ca148a61", + "id": "nmdc:886402044865256b80bfaf42ca148a61", + "file_size_bytes": 23390091 + }, + { + "name": "Gp0115666_PFAM GFF file", + "description": "PFAM GFF file for Gp0115666", + "url": "https://data.microbiomedata.org/data/nmdc:mga0eehe16/annotation/nmdc_mga0eehe16_pfam.gff", + "md5_checksum": "1b2bc9b96a15ebdfe3ff1e30027544af", + "id": "nmdc:1b2bc9b96a15ebdfe3ff1e30027544af", + "file_size_bytes": 18444613 + }, + { + "name": "Gp0115666_TigrFam GFF file", + "description": "TigrFam GFF file for Gp0115666", + "url": "https://data.microbiomedata.org/data/nmdc:mga0eehe16/annotation/nmdc_mga0eehe16_tigrfam.gff", + "md5_checksum": "2d730834b8841b7a7ad30786bff382fa", + "id": "nmdc:2d730834b8841b7a7ad30786bff382fa", + "file_size_bytes": 2596225 + }, + { + "name": "Gp0115666_SMART GFF file", + "description": "SMART GFF file for Gp0115666", + "url": "https://data.microbiomedata.org/data/nmdc:mga0eehe16/annotation/nmdc_mga0eehe16_smart.gff", + "md5_checksum": "46d62d69e48d7aeecb87106e02102753", + "id": "nmdc:46d62d69e48d7aeecb87106e02102753", + "file_size_bytes": 4932262 + }, + { + "name": "Gp0115666_SuperFam GFF file", + "description": "SuperFam GFF file for Gp0115666", + "url": "https://data.microbiomedata.org/data/nmdc:mga0eehe16/annotation/nmdc_mga0eehe16_supfam.gff", + "md5_checksum": "1896e41000aa9e4acc98cc7702e42304", + "id": "nmdc:1896e41000aa9e4acc98cc7702e42304", + "file_size_bytes": 28911479 + }, + { + "name": "Gp0115666_Cath FunFam GFF file", + "description": "Cath FunFam GFF file for Gp0115666", + "url": "https://data.microbiomedata.org/data/nmdc:mga0eehe16/annotation/nmdc_mga0eehe16_cath_funfam.gff", + "md5_checksum": "f40bfd77fb3f24be2529fdafc01104c7", + "id": "nmdc:f40bfd77fb3f24be2529fdafc01104c7", + "file_size_bytes": 22881869 + }, + { + "name": "Gp0115666_KO_EC GFF file", + "description": "KO_EC GFF file for Gp0115666", + "url": "https://data.microbiomedata.org/data/nmdc:mga0eehe16/annotation/nmdc_mga0eehe16_ko_ec.gff", + "md5_checksum": "920be8f090654360619fbb16163b8513", + "id": "nmdc:920be8f090654360619fbb16163b8513", + "file_size_bytes": 17844749 + }, + { + "name": "gold:Gp0452679_metabat2 bin checkm quality assessment result", + "description": "metabat2 bin checkm quality assessment result for gold:Gp0452679", + "data_object_type": "CheckM Statistics", + "url": "https://data.microbiomedata.org/data/nmdc:mga0fsjy84/MAGs/nmdc_mga0fsjy84_checkm_qa.out", + "md5_checksum": "d41d8cd98f00b204e9800998ecf8427e", + "id": "nmdc:d41d8cd98f00b204e9800998ecf8427e", + "file_size_bytes": 0 + }, + { + "name": "Gp0115666_tooShort (< 3kb) filtered contigs fasta file by metaBat2", + "description": "tooShort (< 3kb) filtered contigs fasta file by metaBat2 for Gp0115666", + "url": "https://data.microbiomedata.org/data/nmdc:mga0eehe16/MAGs/nmdc_mga0eehe16_bins.tooShort.fa", + "md5_checksum": "9944a9020ce981a2423ca81424998e66", + "id": "nmdc:9944a9020ce981a2423ca81424998e66", + "file_size_bytes": 46766610 + }, + { + "name": "Gp0115666_unbinned fasta file from metabat2", + "description": "unbinned fasta file from metabat2 for Gp0115666", + "url": "https://data.microbiomedata.org/data/nmdc:mga0eehe16/MAGs/nmdc_mga0eehe16_bins.unbinned.fa", + "md5_checksum": "d2a24728b9006fd4fb4bf4f326138dc2", + "id": "nmdc:d2a24728b9006fd4fb4bf4f326138dc2", + "file_size_bytes": 11382048 + }, + { + "name": "Gp0115666_metabat2 bin checkm quality assessment result", + "description": "metabat2 bin checkm quality assessment result for Gp0115666", + "data_object_type": "CheckM Statistics", + "url": "https://data.microbiomedata.org/data/nmdc:mga0eehe16/MAGs/nmdc_mga0eehe16_checkm_qa.out", + "md5_checksum": "415dfed655f9c4673f2cce4f9947c2e4", + "id": "nmdc:415dfed655f9c4673f2cce4f9947c2e4", + "file_size_bytes": 1020 + }, + { + "name": "Gp0115666_high-quality and medium-quality bins", + "description": "high-quality and medium-quality bins for Gp0115666", + "data_object_type": "Metagenome Bins", + "url": "https://data.microbiomedata.org/data/nmdc:mga0eehe16/MAGs/nmdc_mga0eehe16_hqmq_bin.zip", + "md5_checksum": "be6482b534716166ce5daea5a07cba06", + "id": "nmdc:be6482b534716166ce5daea5a07cba06", + "file_size_bytes": 182 + }, + { + "name": "Gp0115666_metabat2 bins", + "description": "metabat2 bins for Gp0115666", + "url": "https://data.microbiomedata.org/data/nmdc:mga0eehe16/MAGs/nmdc_mga0eehe16_metabat_bin.zip", + "md5_checksum": "df08913532a84681996a29d1a1c127b3", + "id": "nmdc:df08913532a84681996a29d1a1c127b3", + "file_size_bytes": 1559491 + } + ], + "dissolving_activity_set": [], + "functional_annotation_set": [], + "genome_feature_set": [], + "mags_activity_set": [ { "_id": { - "$oid": "649b009d6bdd4fd20273c887" + "$oid": "649b0052ec087f6bbab34730" }, "has_input": [ - "nmdc:76893480c05758ad2977df78a5b050e5" + "nmdc:e557facdf4c3066ba4b5ba168995ba85", + "nmdc:3ece2c377622cebdddfb9322047cb115", + "nmdc:91cd273ea95a29b2c4e326c56eafe08a" ], + "too_short_contig_num": 108937, "part_of": [ "nmdc:mga0eehe16" ], + "binned_contig_num": 899, "git_url": "https://github.com/microbiomedata/metaG/releases/tag/0.1", "has_output": [ - "nmdc:0b301d2dd917c2be31422dd0e986dd5e", - "nmdc:0634e8261ce976d167457993d7f7a4ec" + "nmdc:d41d8cd98f00b204e9800998ecf8427e", + "nmdc:9944a9020ce981a2423ca81424998e66", + "nmdc:d2a24728b9006fd4fb4bf4f326138dc2", + "nmdc:415dfed655f9c4673f2cce4f9947c2e4", + "nmdc:be6482b534716166ce5daea5a07cba06", + "nmdc:df08913532a84681996a29d1a1c127b3" ], "was_informed_by": "gold:Gp0115666", - "input_read_count": 22183982, - "output_read_bases": 3025260554, + "input_contig_num": 116661, "id": "nmdc:db181675b157d27d9b0b2f35b5cbf03e", "execution_resource": "NERSC-Cori", - "input_read_bases": 3349781282, - "name": "Read QC Activity for nmdc:mga0eehe16", - "output_read_count": 20195754, - "started_at_time": "2021-10-11T02:28:09Z", - "type": "nmdc:ReadQCAnalysisActivity", - "ended_at_time": "2021-10-11T04:06:19+00:00", - "output_data_objects": [ + "name": "MAGs Analysis Activity for nmdc:mga0eehe16", + "mags_list": [ { - "name": "Gp0115666_Filtered Reads", - "description": "Filtered Reads for Gp0115666", - "data_object_type": "Filtered Sequencing Reads", - "url": "https://data.microbiomedata.org/data/nmdc:mga0eehe16/qa/nmdc_mga0eehe16_filtered.fastq.gz", - "md5_checksum": "0b301d2dd917c2be31422dd0e986dd5e", - "id": "nmdc:0b301d2dd917c2be31422dd0e986dd5e", - "file_size_bytes": 1806510860 + "number_of_contig": 216, + "completeness": 41.57, + "bin_name": "bins.1", + "gene_count": 1176, + "bin_quality": "LQ", + "gtdbtk_species": "", + "gtdbtk_order": "", + "num_16s": 0, + "gtdbtk_family": "", + "gtdbtk_domain": "", + "contamination": 0.93, + "gtdbtk_class": "", + "gtdbtk_phylum": "", + "num_5s": 0, + "num_23s": 0, + "gtdbtk_genus": "", + "num_t_rna": 11 }, { - "name": "Gp0115666_Filtered Stats", - "description": "Filtered Stats for Gp0115666", - "data_object_type": "QC Statistics", - "url": "https://data.microbiomedata.org/data/nmdc:mga0eehe16/qa/nmdc_mga0eehe16_filterStats.txt", - "md5_checksum": "0634e8261ce976d167457993d7f7a4ec", - "id": "nmdc:0634e8261ce976d167457993d7f7a4ec", - "file_size_bytes": 289 + "number_of_contig": 683, + "completeness": 87.59, + "bin_name": "bins.2", + "gene_count": 4526, + "bin_quality": "LQ", + "gtdbtk_species": "", + "gtdbtk_order": "", + "num_16s": 1, + "gtdbtk_family": "", + "gtdbtk_domain": "", + "contamination": 33.23, + "gtdbtk_class": "", + "gtdbtk_phylum": "", + "num_5s": 0, + "num_23s": 0, + "gtdbtk_genus": "", + "num_t_rna": 56 } - ] - }, + ], + "unbinned_contig_num": 6825, + "started_at_time": "2021-10-11T02:28:09Z", + "type": "nmdc:MAGsAnalysisActivity", + "ended_at_time": "2021-10-11T04:06:19+00:00" + } + ], + "material_sample_set": [], + "material_sampling_activity_set": [], + "metabolomics_analysis_activity_set": [], + "metagenome_annotation_activity_set": [ { "_id": { - "$oid": "649b009bff710ae353f8cf55" + "$oid": "649b005bbf2caae0415ef9ce" }, "has_input": [ - "nmdc:0b301d2dd917c2be31422dd0e986dd5e" + "nmdc:e557facdf4c3066ba4b5ba168995ba85" + ], + "part_of": [ + "nmdc:mga0eehe16" ], "git_url": "https://github.com/microbiomedata/metaG/releases/tag/0.1", "has_output": [ - "nmdc:17454627f873cc37e80700c4751c81d6", - "nmdc:e0479eb7fd3345aaf134640e0b9e11b0", - "nmdc:a8433a0b17d7380fc836e4c9f85a7a54", - "nmdc:9e061ad19d4a6a3f209d1992d02df9f9", - "nmdc:1d46eebd0f194f57dd9e92c9bc992891", - "nmdc:e5227b1cfdbc266c44d23028c92150a9", - "nmdc:05f7680c6646904cfb16fc146c0fed4a", - "nmdc:368cf81424348cdf46d17c13908280e7", - "nmdc:b5091cfeed4fbea8316e50fbceea89bc" + "nmdc:4d509c29cad07f0b18d3f7e0e724c493", + "nmdc:60d04bb0a2d1a1d593bd849a2a13e405", + "nmdc:91cd273ea95a29b2c4e326c56eafe08a", + "nmdc:e08c6253ec5a15eb43d8cb4d69d09d4c", + "nmdc:9edfc4fee191b722148af1e2648f787f", + "nmdc:886402044865256b80bfaf42ca148a61", + "nmdc:1b2bc9b96a15ebdfe3ff1e30027544af", + "nmdc:2d730834b8841b7a7ad30786bff382fa", + "nmdc:46d62d69e48d7aeecb87106e02102753", + "nmdc:1896e41000aa9e4acc98cc7702e42304", + "nmdc:f40bfd77fb3f24be2529fdafc01104c7", + "nmdc:920be8f090654360619fbb16163b8513" ], "was_informed_by": "gold:Gp0115666", "id": "nmdc:db181675b157d27d9b0b2f35b5cbf03e", "execution_resource": "NERSC-Cori", - "name": "ReadBased Analysis Activity for nmdc:mga0eehe16", + "name": "Annotation Activity for nmdc:mga0eehe16", "started_at_time": "2021-10-11T02:28:09Z", - "type": "nmdc:ReadbasedAnalysis", - "ended_at_time": "2021-10-11T04:06:19+00:00", - "output_data_objects": [ - { - "name": "Gp0115666_Gottcha2 TSV report", - "description": "Gottcha2 TSV report for Gp0115666", - "url": "https://data.microbiomedata.org/data/nmdc:mga0eehe16/ReadbasedAnalysis/nmdc_mga0eehe16_gottcha2_report.tsv", - "md5_checksum": "17454627f873cc37e80700c4751c81d6", - "id": "nmdc:17454627f873cc37e80700c4751c81d6", - "file_size_bytes": 10721 - }, - { - "name": "Gp0115666_Gottcha2 full TSV report", - "description": "Gottcha2 full TSV report for Gp0115666", - "url": "https://data.microbiomedata.org/data/nmdc:mga0eehe16/ReadbasedAnalysis/nmdc_mga0eehe16_gottcha2_report_full.tsv", - "md5_checksum": "e0479eb7fd3345aaf134640e0b9e11b0", - "id": "nmdc:e0479eb7fd3345aaf134640e0b9e11b0", - "file_size_bytes": 920924 - }, - { - "name": "Gp0115666_Gottcha2 Krona HTML report", - "description": "Gottcha2 Krona HTML report for Gp0115666", - "data_object_type": "GOTTCHA2 Krona Plot", - "url": "https://data.microbiomedata.org/data/nmdc:mga0eehe16/ReadbasedAnalysis/nmdc_mga0eehe16_gottcha2_krona.html", - "md5_checksum": "a8433a0b17d7380fc836e4c9f85a7a54", - "id": "nmdc:a8433a0b17d7380fc836e4c9f85a7a54", - "file_size_bytes": 257441 - }, - { - "name": "Gp0115666_Centrifuge classification TSV report", - "description": "Centrifuge classification TSV report for Gp0115666", - "data_object_type": "Centrifuge Taxonomic Classification", - "url": "https://data.microbiomedata.org/data/nmdc:mga0eehe16/ReadbasedAnalysis/nmdc_mga0eehe16_centrifuge_classification.tsv", - "md5_checksum": "9e061ad19d4a6a3f209d1992d02df9f9", - "id": "nmdc:9e061ad19d4a6a3f209d1992d02df9f9", - "file_size_bytes": 1468295025 - }, - { - "name": "Gp0115666_Centrifuge TSV report", - "description": "Centrifuge TSV report for Gp0115666", - "data_object_type": "Centrifuge Classification Report", - "url": "https://data.microbiomedata.org/data/nmdc:mga0eehe16/ReadbasedAnalysis/nmdc_mga0eehe16_centrifuge_report.tsv", - "md5_checksum": "1d46eebd0f194f57dd9e92c9bc992891", - "id": "nmdc:1d46eebd0f194f57dd9e92c9bc992891", - "file_size_bytes": 257081 - }, - { - "name": "Gp0115666_Centrifuge Krona HTML report", - "description": "Centrifuge Krona HTML report for Gp0115666", - "data_object_type": "Centrifuge Krona Plot", - "url": "https://data.microbiomedata.org/data/nmdc:mga0eehe16/ReadbasedAnalysis/nmdc_mga0eehe16_centrifuge_krona.html", - "md5_checksum": "e5227b1cfdbc266c44d23028c92150a9", - "id": "nmdc:e5227b1cfdbc266c44d23028c92150a9", - "file_size_bytes": 2331968 - }, - { - "name": "Gp0115666_Kraken classification TSV report", - "description": "Kraken classification TSV report for Gp0115666", - "data_object_type": "Kraken2 Taxonomic Classification", - "url": "https://data.microbiomedata.org/data/nmdc:mga0eehe16/ReadbasedAnalysis/nmdc_mga0eehe16_kraken2_classification.tsv", - "md5_checksum": "05f7680c6646904cfb16fc146c0fed4a", - "id": "nmdc:05f7680c6646904cfb16fc146c0fed4a", - "file_size_bytes": 1204548180 - }, - { - "name": "Gp0115666_Kraken2 TSV report", - "description": "Kraken2 TSV report for Gp0115666", - "data_object_type": "Kraken2 Classification Report", - "url": "https://data.microbiomedata.org/data/nmdc:mga0eehe16/ReadbasedAnalysis/nmdc_mga0eehe16_kraken2_report.tsv", - "md5_checksum": "368cf81424348cdf46d17c13908280e7", - "id": "nmdc:368cf81424348cdf46d17c13908280e7", - "file_size_bytes": 653697 - }, - { - "name": "Gp0115666_Kraken2 Krona HTML report", - "description": "Kraken2 Krona HTML report for Gp0115666", - "data_object_type": "Kraken2 Krona Plot", - "url": "https://data.microbiomedata.org/data/nmdc:mga0eehe16/ReadbasedAnalysis/nmdc_mga0eehe16_kraken2_krona.html", - "md5_checksum": "b5091cfeed4fbea8316e50fbceea89bc", - "id": "nmdc:b5091cfeed4fbea8316e50fbceea89bc", - "file_size_bytes": 3983935 - } - ] - }, + "type": "nmdc:MetagenomeAnnotationActivity", + "ended_at_time": "2021-10-11T04:06:19+00:00" + } + ], + "metagenome_assembly_set": [ { "_id": { - "$oid": "61e71a15833bcf838a701c88" + "$oid": "649b005f2ca5ee4adb139fbb" }, "has_input": [ "nmdc:0b301d2dd917c2be31422dd0e986dd5e" @@ -910,120 +1249,9 @@ "part_of": [ "nmdc:mga0eehe16" ], - "git_url": "https://github.com/microbiomedata/metaG/releases/tag/0.1", - "has_output": [ - "nmdc:17454627f873cc37e80700c4751c81d6", - "nmdc:e0479eb7fd3345aaf134640e0b9e11b0", - "nmdc:a8433a0b17d7380fc836e4c9f85a7a54", - "nmdc:9e061ad19d4a6a3f209d1992d02df9f9", - "nmdc:1d46eebd0f194f57dd9e92c9bc992891", - "nmdc:e5227b1cfdbc266c44d23028c92150a9", - "nmdc:05f7680c6646904cfb16fc146c0fed4a", - "nmdc:368cf81424348cdf46d17c13908280e7", - "nmdc:b5091cfeed4fbea8316e50fbceea89bc" - ], - "was_informed_by": "gold:Gp0115666", - "id": "nmdc:db181675b157d27d9b0b2f35b5cbf03e", - "execution_resource": "NERSC-Cori", - "name": "ReadBased Analysis Activity for nmdc:mga0eehe16", - "started_at_time": "2021-10-11T02:28:09Z", - "type": "nmdc:ReadbasedAnalysis", - "ended_at_time": "2021-10-11T04:06:19+00:00", - "output_data_objects": [ - { - "name": "Gp0115666_Gottcha2 TSV report", - "description": "Gottcha2 TSV report for Gp0115666", - "url": "https://data.microbiomedata.org/data/nmdc:mga0eehe16/ReadbasedAnalysis/nmdc_mga0eehe16_gottcha2_report.tsv", - "md5_checksum": "17454627f873cc37e80700c4751c81d6", - "id": "nmdc:17454627f873cc37e80700c4751c81d6", - "file_size_bytes": 10721 - }, - { - "name": "Gp0115666_Gottcha2 full TSV report", - "description": "Gottcha2 full TSV report for Gp0115666", - "url": "https://data.microbiomedata.org/data/nmdc:mga0eehe16/ReadbasedAnalysis/nmdc_mga0eehe16_gottcha2_report_full.tsv", - "md5_checksum": "e0479eb7fd3345aaf134640e0b9e11b0", - "id": "nmdc:e0479eb7fd3345aaf134640e0b9e11b0", - "file_size_bytes": 920924 - }, - { - "name": "Gp0115666_Gottcha2 Krona HTML report", - "description": "Gottcha2 Krona HTML report for Gp0115666", - "data_object_type": "GOTTCHA2 Krona Plot", - "url": "https://data.microbiomedata.org/data/nmdc:mga0eehe16/ReadbasedAnalysis/nmdc_mga0eehe16_gottcha2_krona.html", - "md5_checksum": "a8433a0b17d7380fc836e4c9f85a7a54", - "id": "nmdc:a8433a0b17d7380fc836e4c9f85a7a54", - "file_size_bytes": 257441 - }, - { - "name": "Gp0115666_Centrifuge classification TSV report", - "description": "Centrifuge classification TSV report for Gp0115666", - "data_object_type": "Centrifuge Taxonomic Classification", - "url": "https://data.microbiomedata.org/data/nmdc:mga0eehe16/ReadbasedAnalysis/nmdc_mga0eehe16_centrifuge_classification.tsv", - "md5_checksum": "9e061ad19d4a6a3f209d1992d02df9f9", - "id": "nmdc:9e061ad19d4a6a3f209d1992d02df9f9", - "file_size_bytes": 1468295025 - }, - { - "name": "Gp0115666_Centrifuge TSV report", - "description": "Centrifuge TSV report for Gp0115666", - "data_object_type": "Centrifuge Classification Report", - "url": "https://data.microbiomedata.org/data/nmdc:mga0eehe16/ReadbasedAnalysis/nmdc_mga0eehe16_centrifuge_report.tsv", - "md5_checksum": "1d46eebd0f194f57dd9e92c9bc992891", - "id": "nmdc:1d46eebd0f194f57dd9e92c9bc992891", - "file_size_bytes": 257081 - }, - { - "name": "Gp0115666_Centrifuge Krona HTML report", - "description": "Centrifuge Krona HTML report for Gp0115666", - "data_object_type": "Centrifuge Krona Plot", - "url": "https://data.microbiomedata.org/data/nmdc:mga0eehe16/ReadbasedAnalysis/nmdc_mga0eehe16_centrifuge_krona.html", - "md5_checksum": "e5227b1cfdbc266c44d23028c92150a9", - "id": "nmdc:e5227b1cfdbc266c44d23028c92150a9", - "file_size_bytes": 2331968 - }, - { - "name": "Gp0115666_Kraken classification TSV report", - "description": "Kraken classification TSV report for Gp0115666", - "data_object_type": "Kraken2 Taxonomic Classification", - "url": "https://data.microbiomedata.org/data/nmdc:mga0eehe16/ReadbasedAnalysis/nmdc_mga0eehe16_kraken2_classification.tsv", - "md5_checksum": "05f7680c6646904cfb16fc146c0fed4a", - "id": "nmdc:05f7680c6646904cfb16fc146c0fed4a", - "file_size_bytes": 1204548180 - }, - { - "name": "Gp0115666_Kraken2 TSV report", - "description": "Kraken2 TSV report for Gp0115666", - "data_object_type": "Kraken2 Classification Report", - "url": "https://data.microbiomedata.org/data/nmdc:mga0eehe16/ReadbasedAnalysis/nmdc_mga0eehe16_kraken2_report.tsv", - "md5_checksum": "368cf81424348cdf46d17c13908280e7", - "id": "nmdc:368cf81424348cdf46d17c13908280e7", - "file_size_bytes": 653697 - }, - { - "name": "Gp0115666_Kraken2 Krona HTML report", - "description": "Kraken2 Krona HTML report for Gp0115666", - "data_object_type": "Kraken2 Krona Plot", - "url": "https://data.microbiomedata.org/data/nmdc:mga0eehe16/ReadbasedAnalysis/nmdc_mga0eehe16_kraken2_krona.html", - "md5_checksum": "b5091cfeed4fbea8316e50fbceea89bc", - "id": "nmdc:b5091cfeed4fbea8316e50fbceea89bc", - "file_size_bytes": 3983935 - } - ] - }, - { - "_id": { - "$oid": "649b005f2ca5ee4adb139fbb" - }, - "has_input": [ - "nmdc:0b301d2dd917c2be31422dd0e986dd5e" - ], - "part_of": [ - "nmdc:mga0eehe16" - ], - "ctg_logsum": 181484, - "scaf_logsum": 182081, - "gap_pct": 0.00163, + "ctg_logsum": 181484, + "scaf_logsum": 182081, + "gap_pct": 0.00163, "git_url": "https://github.com/microbiomedata/metaG/releases/tag/0.1", "has_output": [ "nmdc:e557facdf4c3066ba4b5ba168995ba85", @@ -1057,644 +1285,690 @@ "scaf_l50": 493, "scaf_l90": 286, "scaf_n50": 27775, - "scaf_n90": 95875, - "output_data_objects": [ - { - "name": "Gp0115666_Assembled contigs fasta", - "description": "Assembled contigs fasta for Gp0115666", - "data_object_type": "Assembly Contigs", - "url": "https://data.microbiomedata.org/data/nmdc:mga0eehe16/assembly/nmdc_mga0eehe16_contigs.fna", - "md5_checksum": "e557facdf4c3066ba4b5ba168995ba85", - "id": "nmdc:e557facdf4c3066ba4b5ba168995ba85", - "file_size_bytes": 63269472 - }, - { - "name": "Gp0115666_Assembled scaffold fasta", - "description": "Assembled scaffold fasta for Gp0115666", - "data_object_type": "Assembly Scaffolds", - "url": "https://data.microbiomedata.org/data/nmdc:mga0eehe16/assembly/nmdc_mga0eehe16_scaffolds.fna", - "md5_checksum": "92cc678ca9e54cb92118b9ae746fb996", - "id": "nmdc:92cc678ca9e54cb92118b9ae746fb996", - "file_size_bytes": 62917914 - }, - { - "name": "Gp0115666_Metagenome Contig Coverage Stats", - "description": "Metagenome Contig Coverage Stats for Gp0115666", - "url": "https://data.microbiomedata.org/data/nmdc:mga0eehe16/assembly/nmdc_mga0eehe16_covstats.txt", - "md5_checksum": "7082b41c627571a03466f94ba80c15b8", - "id": "nmdc:7082b41c627571a03466f94ba80c15b8", - "file_size_bytes": 9179769 - }, - { - "name": "Gp0115666_Assembled AGP file", - "description": "Assembled AGP file for Gp0115666", - "data_object_type": "Assembly AGP", - "url": "https://data.microbiomedata.org/data/nmdc:mga0eehe16/assembly/nmdc_mga0eehe16_assembly.agp", - "md5_checksum": "c5ccd39d97d652d5ec8804202a324b0e", - "id": "nmdc:c5ccd39d97d652d5ec8804202a324b0e", - "file_size_bytes": 8550216 - }, - { - "name": "Gp0115666_Metagenome Alignment BAM file", - "description": "Metagenome Alignment BAM file for Gp0115666", - "data_object_type": "Assembly Coverage BAM", - "url": "https://data.microbiomedata.org/data/nmdc:mga0eehe16/assembly/nmdc_mga0eehe16_pairedMapped_sorted.bam", - "md5_checksum": "3ece2c377622cebdddfb9322047cb115", - "id": "nmdc:3ece2c377622cebdddfb9322047cb115", - "file_size_bytes": 1940309089 - } - ] - }, + "scaf_n90": 95875 + } + ], + "metagenome_sequencing_activity_set": [], + "metaproteomics_analysis_activity_set": [], + "metatranscriptome_activity_set": [], + "nom_analysis_activity_set": [], + "omics_processing_set": [ { "_id": { - "$oid": "649b005bbf2caae0415ef9ce" + "$oid": "649b009773e8249959349b34" }, + "id": "nmdc:omprc-11-zp2ar437", + "name": "Sand microcosm microbial communities from a hyporheic zone in Columbia River, Washington, USA - GW-RW T3_23-Sept-14", + "description": "Sterilized sand packs were incubated back in the ground and collected at time point T3.", "has_input": [ - "nmdc:e557facdf4c3066ba4b5ba168995ba85" - ], - "part_of": [ - "nmdc:mga0eehe16" + "nmdc:bsm-11-4qsqg549" ], - "git_url": "https://github.com/microbiomedata/metaG/releases/tag/0.1", "has_output": [ - "nmdc:4d509c29cad07f0b18d3f7e0e724c493", - "nmdc:60d04bb0a2d1a1d593bd849a2a13e405", - "nmdc:91cd273ea95a29b2c4e326c56eafe08a", - "nmdc:e08c6253ec5a15eb43d8cb4d69d09d4c", - "nmdc:9edfc4fee191b722148af1e2648f787f", - "nmdc:886402044865256b80bfaf42ca148a61", - "nmdc:1b2bc9b96a15ebdfe3ff1e30027544af", - "nmdc:2d730834b8841b7a7ad30786bff382fa", - "nmdc:46d62d69e48d7aeecb87106e02102753", - "nmdc:1896e41000aa9e4acc98cc7702e42304", - "nmdc:f40bfd77fb3f24be2529fdafc01104c7", - "nmdc:920be8f090654360619fbb16163b8513" + "jgi:55d817fc0d8785342fcf8274" ], - "was_informed_by": "gold:Gp0115666", - "id": "nmdc:db181675b157d27d9b0b2f35b5cbf03e", - "execution_resource": "NERSC-Cori", - "name": "Annotation Activity for nmdc:mga0eehe16", - "started_at_time": "2021-10-11T02:28:09Z", - "type": "nmdc:MetagenomeAnnotationActivity", - "ended_at_time": "2021-10-11T04:06:19+00:00", - "output_data_objects": [ - { - "name": "Gp0115666_Protein FAA", - "description": "Protein FAA for Gp0115666", - "data_object_type": "Annotation Amino Acid FASTA", - "url": "https://data.microbiomedata.org/data/nmdc:mga0eehe16/annotation/nmdc_mga0eehe16_proteins.faa", - "md5_checksum": "4d509c29cad07f0b18d3f7e0e724c493", - "id": "nmdc:4d509c29cad07f0b18d3f7e0e724c493", - "file_size_bytes": 35706777 - }, - { - "name": "Gp0115666_Structural annotation GFF file", - "description": "Structural annotation GFF file for Gp0115666", - "data_object_type": "Structural Annotation GFF", - "url": "https://data.microbiomedata.org/data/nmdc:mga0eehe16/annotation/nmdc_mga0eehe16_structural_annotation.gff", - "md5_checksum": "60d04bb0a2d1a1d593bd849a2a13e405", - "id": "nmdc:60d04bb0a2d1a1d593bd849a2a13e405", - "file_size_bytes": 2520 - }, - { - "name": "Gp0115666_Functional annotation GFF file", - "description": "Functional annotation GFF file for Gp0115666", - "data_object_type": "Functional Annotation GFF", - "url": "https://data.microbiomedata.org/data/nmdc:mga0eehe16/annotation/nmdc_mga0eehe16_functional_annotation.gff", - "md5_checksum": "91cd273ea95a29b2c4e326c56eafe08a", - "id": "nmdc:91cd273ea95a29b2c4e326c56eafe08a", - "file_size_bytes": 40030386 - }, - { - "name": "Gp0115666_KO TSV file", - "description": "KO TSV file for Gp0115666", - "data_object_type": "Annotation KEGG Orthology", - "url": "https://data.microbiomedata.org/data/nmdc:mga0eehe16/annotation/nmdc_mga0eehe16_ko.tsv", - "md5_checksum": "e08c6253ec5a15eb43d8cb4d69d09d4c", - "id": "nmdc:e08c6253ec5a15eb43d8cb4d69d09d4c", - "file_size_bytes": 5584125 - }, - { - "name": "Gp0115666_EC TSV file", - "description": "EC TSV file for Gp0115666", - "data_object_type": "Annotation Enzyme Commission", - "url": "https://data.microbiomedata.org/data/nmdc:mga0eehe16/annotation/nmdc_mga0eehe16_ec.tsv", - "md5_checksum": "9edfc4fee191b722148af1e2648f787f", - "id": "nmdc:9edfc4fee191b722148af1e2648f787f", - "file_size_bytes": 3575242 - }, - { - "name": "Gp0115666_COG GFF file", - "description": "COG GFF file for Gp0115666", - "url": "https://data.microbiomedata.org/data/nmdc:mga0eehe16/annotation/nmdc_mga0eehe16_cog.gff", - "md5_checksum": "886402044865256b80bfaf42ca148a61", - "id": "nmdc:886402044865256b80bfaf42ca148a61", - "file_size_bytes": 23390091 - }, - { - "name": "Gp0115666_PFAM GFF file", - "description": "PFAM GFF file for Gp0115666", - "url": "https://data.microbiomedata.org/data/nmdc:mga0eehe16/annotation/nmdc_mga0eehe16_pfam.gff", - "md5_checksum": "1b2bc9b96a15ebdfe3ff1e30027544af", - "id": "nmdc:1b2bc9b96a15ebdfe3ff1e30027544af", - "file_size_bytes": 18444613 - }, - { - "name": "Gp0115666_TigrFam GFF file", - "description": "TigrFam GFF file for Gp0115666", - "url": "https://data.microbiomedata.org/data/nmdc:mga0eehe16/annotation/nmdc_mga0eehe16_tigrfam.gff", - "md5_checksum": "2d730834b8841b7a7ad30786bff382fa", - "id": "nmdc:2d730834b8841b7a7ad30786bff382fa", - "file_size_bytes": 2596225 - }, - { - "name": "Gp0115666_SMART GFF file", - "description": "SMART GFF file for Gp0115666", - "url": "https://data.microbiomedata.org/data/nmdc:mga0eehe16/annotation/nmdc_mga0eehe16_smart.gff", - "md5_checksum": "46d62d69e48d7aeecb87106e02102753", - "id": "nmdc:46d62d69e48d7aeecb87106e02102753", - "file_size_bytes": 4932262 - }, - { - "name": "Gp0115666_SuperFam GFF file", - "description": "SuperFam GFF file for Gp0115666", - "url": "https://data.microbiomedata.org/data/nmdc:mga0eehe16/annotation/nmdc_mga0eehe16_supfam.gff", - "md5_checksum": "1896e41000aa9e4acc98cc7702e42304", - "id": "nmdc:1896e41000aa9e4acc98cc7702e42304", - "file_size_bytes": 28911479 - }, - { - "name": "Gp0115666_Cath FunFam GFF file", - "description": "Cath FunFam GFF file for Gp0115666", - "url": "https://data.microbiomedata.org/data/nmdc:mga0eehe16/annotation/nmdc_mga0eehe16_cath_funfam.gff", - "md5_checksum": "f40bfd77fb3f24be2529fdafc01104c7", - "id": "nmdc:f40bfd77fb3f24be2529fdafc01104c7", - "file_size_bytes": 22881869 - }, - { - "name": "Gp0115666_KO_EC GFF file", - "description": "KO_EC GFF file for Gp0115666", - "url": "https://data.microbiomedata.org/data/nmdc:mga0eehe16/annotation/nmdc_mga0eehe16_ko_ec.gff", - "md5_checksum": "920be8f090654360619fbb16163b8513", - "id": "nmdc:920be8f090654360619fbb16163b8513", - "file_size_bytes": 17844749 - } + "part_of": [ + "nmdc:sty-11-aygzgv51" + ], + "add_date": "2015-05-28", + "mod_date": "2021-06-15", + "ncbi_project_name": "Sand microcosm microbial communities from a hyporheic zone in Columbia River, Washington, USA - GW-RW T3_23-Sept-14", + "omics_type": { + "has_raw_value": "Metagenome" + }, + "principal_investigator": { + "has_raw_value": "James Stegen" + }, + "processing_institution": "JGI", + "type": "nmdc:OmicsProcessing", + "gold_sequencing_project_identifiers": [ + "gold:Gp0115666" ] - }, + } + ], + "reaction_activity_set": [], + "read_qc_analysis_activity_set": [ { "_id": { - "$oid": "649b0052ec087f6bbab34730" + "$oid": "649b009d6bdd4fd20273c887" }, "has_input": [ - "nmdc:e557facdf4c3066ba4b5ba168995ba85", - "nmdc:3ece2c377622cebdddfb9322047cb115", - "nmdc:91cd273ea95a29b2c4e326c56eafe08a" + "nmdc:76893480c05758ad2977df78a5b050e5" ], - "too_short_contig_num": 108937, "part_of": [ "nmdc:mga0eehe16" ], - "binned_contig_num": 899, "git_url": "https://github.com/microbiomedata/metaG/releases/tag/0.1", "has_output": [ - "nmdc:d41d8cd98f00b204e9800998ecf8427e", - "nmdc:9944a9020ce981a2423ca81424998e66", - "nmdc:d2a24728b9006fd4fb4bf4f326138dc2", - "nmdc:415dfed655f9c4673f2cce4f9947c2e4", - "nmdc:be6482b534716166ce5daea5a07cba06", - "nmdc:df08913532a84681996a29d1a1c127b3" + "nmdc:0b301d2dd917c2be31422dd0e986dd5e", + "nmdc:0634e8261ce976d167457993d7f7a4ec" ], "was_informed_by": "gold:Gp0115666", - "input_contig_num": 116661, + "input_read_count": 22183982, + "output_read_bases": 3025260554, "id": "nmdc:db181675b157d27d9b0b2f35b5cbf03e", "execution_resource": "NERSC-Cori", - "name": "MAGs Analysis Activity for nmdc:mga0eehe16", - "mags_list": [ - { - "number_of_contig": 216, - "completeness": 41.57, - "bin_name": "bins.1", - "gene_count": 1176, - "bin_quality": "LQ", - "gtdbtk_species": "", - "gtdbtk_order": "", - "num_16s": 0, - "gtdbtk_family": "", - "gtdbtk_domain": "", - "contamination": 0.93, - "gtdbtk_class": "", - "gtdbtk_phylum": "", - "num_5s": 0, - "num_23s": 0, - "gtdbtk_genus": "", - "num_t_rna": 11 - }, - { - "number_of_contig": 683, - "completeness": 87.59, - "bin_name": "bins.2", - "gene_count": 4526, - "bin_quality": "LQ", - "gtdbtk_species": "", - "gtdbtk_order": "", - "num_16s": 1, - "gtdbtk_family": "", - "gtdbtk_domain": "", - "contamination": 33.23, - "gtdbtk_class": "", - "gtdbtk_phylum": "", - "num_5s": 0, - "num_23s": 0, - "gtdbtk_genus": "", - "num_t_rna": 56 - } - ], - "unbinned_contig_num": 6825, + "input_read_bases": 3349781282, + "name": "Read QC Activity for nmdc:mga0eehe16", + "output_read_count": 20195754, "started_at_time": "2021-10-11T02:28:09Z", - "type": "nmdc:MAGsAnalysisActivity", - "ended_at_time": "2021-10-11T04:06:19+00:00", - "output_data_objects": [ - { - "name": "gold:Gp0452679_metabat2 bin checkm quality assessment result", - "description": "metabat2 bin checkm quality assessment result for gold:Gp0452679", - "data_object_type": "CheckM Statistics", - "url": "https://data.microbiomedata.org/data/nmdc:mga0fsjy84/MAGs/nmdc_mga0fsjy84_checkm_qa.out", - "md5_checksum": "d41d8cd98f00b204e9800998ecf8427e", - "id": "nmdc:d41d8cd98f00b204e9800998ecf8427e", - "file_size_bytes": 0 - }, - { - "name": "Gp0115666_tooShort (< 3kb) filtered contigs fasta file by metaBat2", - "description": "tooShort (< 3kb) filtered contigs fasta file by metaBat2 for Gp0115666", - "url": "https://data.microbiomedata.org/data/nmdc:mga0eehe16/MAGs/nmdc_mga0eehe16_bins.tooShort.fa", - "md5_checksum": "9944a9020ce981a2423ca81424998e66", - "id": "nmdc:9944a9020ce981a2423ca81424998e66", - "file_size_bytes": 46766610 - }, - { - "name": "Gp0115666_unbinned fasta file from metabat2", - "description": "unbinned fasta file from metabat2 for Gp0115666", - "url": "https://data.microbiomedata.org/data/nmdc:mga0eehe16/MAGs/nmdc_mga0eehe16_bins.unbinned.fa", - "md5_checksum": "d2a24728b9006fd4fb4bf4f326138dc2", - "id": "nmdc:d2a24728b9006fd4fb4bf4f326138dc2", - "file_size_bytes": 11382048 - }, - { - "name": "Gp0115666_metabat2 bin checkm quality assessment result", - "description": "metabat2 bin checkm quality assessment result for Gp0115666", - "data_object_type": "CheckM Statistics", - "url": "https://data.microbiomedata.org/data/nmdc:mga0eehe16/MAGs/nmdc_mga0eehe16_checkm_qa.out", - "md5_checksum": "415dfed655f9c4673f2cce4f9947c2e4", - "id": "nmdc:415dfed655f9c4673f2cce4f9947c2e4", - "file_size_bytes": 1020 - }, - { - "name": "Gp0115666_high-quality and medium-quality bins", - "description": "high-quality and medium-quality bins for Gp0115666", - "data_object_type": "Metagenome Bins", - "url": "https://data.microbiomedata.org/data/nmdc:mga0eehe16/MAGs/nmdc_mga0eehe16_hqmq_bin.zip", - "md5_checksum": "be6482b534716166ce5daea5a07cba06", - "id": "nmdc:be6482b534716166ce5daea5a07cba06", - "file_size_bytes": 182 - }, - { - "name": "Gp0115666_metabat2 bins", - "description": "metabat2 bins for Gp0115666", - "url": "https://data.microbiomedata.org/data/nmdc:mga0eehe16/MAGs/nmdc_mga0eehe16_metabat_bin.zip", - "md5_checksum": "df08913532a84681996a29d1a1c127b3", - "id": "nmdc:df08913532a84681996a29d1a1c127b3", - "file_size_bytes": 1559491 - } - ] + "type": "nmdc:ReadQCAnalysisActivity", + "ended_at_time": "2021-10-11T04:06:19+00:00" } - ] - }, - { - "_id": { - "$oid": "649b009773e8249959349b35" - }, - "id": "nmdc:omprc-11-wepaa271", - "name": "Sand microcosm microbial communities from a hyporheic zone in Columbia River, Washington, USA - GW-RW T3_30-Apr-14", - "description": "Sterilized sand packs were incubated back in the ground and collected at time point T3.", - "has_input": [ - "nmdc:bsm-11-srz83p34" - ], - "has_output": [ - "jgi:55d740240d8785342fcf7e37" - ], - "part_of": [ - "nmdc:sty-11-aygzgv51" - ], - "add_date": "2015-05-28", - "mod_date": "2021-06-15", - "ncbi_project_name": "Sand microcosm microbial communities from a hyporheic zone in Columbia River, Washington, USA - GW-RW T3_30-Apr-14", - "omics_type": { - "has_raw_value": "Metagenome" - }, - "principal_investigator": { - "has_raw_value": "James Stegen" - }, - "processing_institution": "JGI", - "type": "nmdc:OmicsProcessing", - "gold_sequencing_project_identifiers": [ - "gold:Gp0115668" - ], - "downstream_workflow_activity_records": [ + ], + "read_based_taxonomy_analysis_activity_set": [ { "_id": { - "$oid": "649b009d6bdd4fd20273c88c" + "$oid": "649b009bff710ae353f8cf55" }, "has_input": [ - "nmdc:0967bbbe5ee2737f66bc6ee7bf366bbb" - ], - "part_of": [ - "nmdc:mga0n66h21" + "nmdc:0b301d2dd917c2be31422dd0e986dd5e" ], "git_url": "https://github.com/microbiomedata/metaG/releases/tag/0.1", "has_output": [ - "nmdc:121b1c25e803f2a010ae5a2206a8d1d2", - "nmdc:63fb5949ebafd1846ba60f2ce033191c" + "nmdc:17454627f873cc37e80700c4751c81d6", + "nmdc:e0479eb7fd3345aaf134640e0b9e11b0", + "nmdc:a8433a0b17d7380fc836e4c9f85a7a54", + "nmdc:9e061ad19d4a6a3f209d1992d02df9f9", + "nmdc:1d46eebd0f194f57dd9e92c9bc992891", + "nmdc:e5227b1cfdbc266c44d23028c92150a9", + "nmdc:05f7680c6646904cfb16fc146c0fed4a", + "nmdc:368cf81424348cdf46d17c13908280e7", + "nmdc:b5091cfeed4fbea8316e50fbceea89bc" ], - "was_informed_by": "gold:Gp0115668", - "input_read_count": 35064492, - "output_read_bases": 5069132469, - "id": "nmdc:0aec70779dcdcc1b577d7e372c0eca87", + "was_informed_by": "gold:Gp0115666", + "id": "nmdc:db181675b157d27d9b0b2f35b5cbf03e", "execution_resource": "NERSC-Cori", - "input_read_bases": 5294738292, - "name": "Read QC Activity for nmdc:mga0n66h21", - "output_read_count": 33873238, - "started_at_time": "2021-10-11T02:28:43Z", - "type": "nmdc:ReadQCAnalysisActivity", - "ended_at_time": "2021-10-11T05:19:17+00:00", - "output_data_objects": [ - { - "name": "Gp0115668_Filtered Reads", - "description": "Filtered Reads for Gp0115668", - "data_object_type": "Filtered Sequencing Reads", - "url": "https://data.microbiomedata.org/data/nmdc:mga0n66h21/qa/nmdc_mga0n66h21_filtered.fastq.gz", - "md5_checksum": "121b1c25e803f2a010ae5a2206a8d1d2", - "id": "nmdc:121b1c25e803f2a010ae5a2206a8d1d2", - "file_size_bytes": 2665008319 - }, - { - "name": "Gp0115668_Filtered Stats", - "description": "Filtered Stats for Gp0115668", - "data_object_type": "QC Statistics", - "url": "https://data.microbiomedata.org/data/nmdc:mga0n66h21/qa/nmdc_mga0n66h21_filterStats.txt", - "md5_checksum": "63fb5949ebafd1846ba60f2ce033191c", - "id": "nmdc:63fb5949ebafd1846ba60f2ce033191c", - "file_size_bytes": 289 - } - ] - }, + "name": "ReadBased Analysis Activity for nmdc:mga0eehe16", + "started_at_time": "2021-10-11T02:28:09Z", + "type": "nmdc:ReadbasedAnalysis", + "ended_at_time": "2021-10-11T04:06:19+00:00" + } + ], + "study_set": [], + "field_research_site_set": [], + "collecting_biosamples_from_site_set": [], + "date_created": null, + "etl_software_version": null, + "pooling_set": [], + "read_based_analysis_activity_set": [ { "_id": { - "$oid": "649b009bff710ae353f8cf53" + "$oid": "61e71a15833bcf838a701c88" }, "has_input": [ - "nmdc:121b1c25e803f2a010ae5a2206a8d1d2" + "nmdc:0b301d2dd917c2be31422dd0e986dd5e" + ], + "part_of": [ + "nmdc:mga0eehe16" ], "git_url": "https://github.com/microbiomedata/metaG/releases/tag/0.1", "has_output": [ - "nmdc:8bdf8bbee24242aaaee763c1d851c05e", - "nmdc:2529ede10eb159148711d016ec022af3", - "nmdc:a0631ed87dc2e7c69355ef575dbe4e60", - "nmdc:93d26b69073bd4d6283aee3c7e5997d4", - "nmdc:d7a49bf0d9797a2b603643a2de896b5c", - "nmdc:890f9f52d828e1ea8277b52566763069", - "nmdc:371b7fabbcbc2d22c3ca84b422a88863", - "nmdc:8677985c5e8ad92dd6d051f85950a636", - "nmdc:9b2f355a4c2ff3651a3d1179212e2914" + "nmdc:17454627f873cc37e80700c4751c81d6", + "nmdc:e0479eb7fd3345aaf134640e0b9e11b0", + "nmdc:a8433a0b17d7380fc836e4c9f85a7a54", + "nmdc:9e061ad19d4a6a3f209d1992d02df9f9", + "nmdc:1d46eebd0f194f57dd9e92c9bc992891", + "nmdc:e5227b1cfdbc266c44d23028c92150a9", + "nmdc:05f7680c6646904cfb16fc146c0fed4a", + "nmdc:368cf81424348cdf46d17c13908280e7", + "nmdc:b5091cfeed4fbea8316e50fbceea89bc" ], - "was_informed_by": "gold:Gp0115668", - "id": "nmdc:0aec70779dcdcc1b577d7e372c0eca87", + "was_informed_by": "gold:Gp0115666", + "id": "nmdc:db181675b157d27d9b0b2f35b5cbf03e", "execution_resource": "NERSC-Cori", - "name": "ReadBased Analysis Activity for nmdc:mga0n66h21", - "started_at_time": "2021-10-11T02:28:43Z", + "name": "ReadBased Analysis Activity for nmdc:mga0eehe16", + "started_at_time": "2021-10-11T02:28:09Z", "type": "nmdc:ReadbasedAnalysis", - "ended_at_time": "2021-10-11T05:19:17+00:00", - "output_data_objects": [ - { - "name": "Gp0115668_Gottcha2 TSV report", - "description": "Gottcha2 TSV report for Gp0115668", - "url": "https://data.microbiomedata.org/data/nmdc:mga0n66h21/ReadbasedAnalysis/nmdc_mga0n66h21_gottcha2_report.tsv", - "md5_checksum": "8bdf8bbee24242aaaee763c1d851c05e", - "id": "nmdc:8bdf8bbee24242aaaee763c1d851c05e", - "file_size_bytes": 13875 - }, - { - "name": "Gp0115668_Gottcha2 full TSV report", - "description": "Gottcha2 full TSV report for Gp0115668", - "url": "https://data.microbiomedata.org/data/nmdc:mga0n66h21/ReadbasedAnalysis/nmdc_mga0n66h21_gottcha2_report_full.tsv", - "md5_checksum": "2529ede10eb159148711d016ec022af3", - "id": "nmdc:2529ede10eb159148711d016ec022af3", - "file_size_bytes": 956974 - }, - { - "name": "Gp0115668_Gottcha2 Krona HTML report", - "description": "Gottcha2 Krona HTML report for Gp0115668", - "data_object_type": "GOTTCHA2 Krona Plot", - "url": "https://data.microbiomedata.org/data/nmdc:mga0n66h21/ReadbasedAnalysis/nmdc_mga0n66h21_gottcha2_krona.html", - "md5_checksum": "a0631ed87dc2e7c69355ef575dbe4e60", - "id": "nmdc:a0631ed87dc2e7c69355ef575dbe4e60", - "file_size_bytes": 265076 - }, - { - "name": "Gp0115668_Centrifuge classification TSV report", - "description": "Centrifuge classification TSV report for Gp0115668", - "data_object_type": "Centrifuge Taxonomic Classification", - "url": "https://data.microbiomedata.org/data/nmdc:mga0n66h21/ReadbasedAnalysis/nmdc_mga0n66h21_centrifuge_classification.tsv", - "md5_checksum": "93d26b69073bd4d6283aee3c7e5997d4", - "id": "nmdc:93d26b69073bd4d6283aee3c7e5997d4", - "file_size_bytes": 2377445510 - }, - { - "name": "Gp0115668_Centrifuge TSV report", - "description": "Centrifuge TSV report for Gp0115668", - "data_object_type": "Centrifuge Classification Report", - "url": "https://data.microbiomedata.org/data/nmdc:mga0n66h21/ReadbasedAnalysis/nmdc_mga0n66h21_centrifuge_report.tsv", - "md5_checksum": "d7a49bf0d9797a2b603643a2de896b5c", - "id": "nmdc:d7a49bf0d9797a2b603643a2de896b5c", - "file_size_bytes": 258291 - }, - { - "name": "Gp0115668_Centrifuge Krona HTML report", - "description": "Centrifuge Krona HTML report for Gp0115668", - "data_object_type": "Centrifuge Krona Plot", - "url": "https://data.microbiomedata.org/data/nmdc:mga0n66h21/ReadbasedAnalysis/nmdc_mga0n66h21_centrifuge_krona.html", - "md5_checksum": "890f9f52d828e1ea8277b52566763069", - "id": "nmdc:890f9f52d828e1ea8277b52566763069", - "file_size_bytes": 2333775 - }, - { - "name": "Gp0115668_Kraken classification TSV report", - "description": "Kraken classification TSV report for Gp0115668", - "data_object_type": "Kraken2 Taxonomic Classification", - "url": "https://data.microbiomedata.org/data/nmdc:mga0n66h21/ReadbasedAnalysis/nmdc_mga0n66h21_kraken2_classification.tsv", - "md5_checksum": "371b7fabbcbc2d22c3ca84b422a88863", - "id": "nmdc:371b7fabbcbc2d22c3ca84b422a88863", - "file_size_bytes": 1966520263 - }, - { - "name": "Gp0115668_Kraken2 TSV report", - "description": "Kraken2 TSV report for Gp0115668", - "data_object_type": "Kraken2 Classification Report", - "url": "https://data.microbiomedata.org/data/nmdc:mga0n66h21/ReadbasedAnalysis/nmdc_mga0n66h21_kraken2_report.tsv", - "md5_checksum": "8677985c5e8ad92dd6d051f85950a636", - "id": "nmdc:8677985c5e8ad92dd6d051f85950a636", - "file_size_bytes": 707661 - }, - { - "name": "Gp0115668_Kraken2 Krona HTML report", - "description": "Kraken2 Krona HTML report for Gp0115668", - "data_object_type": "Kraken2 Krona Plot", - "url": "https://data.microbiomedata.org/data/nmdc:mga0n66h21/ReadbasedAnalysis/nmdc_mga0n66h21_kraken2_krona.html", - "md5_checksum": "9b2f355a4c2ff3651a3d1179212e2914", - "id": "nmdc:9b2f355a4c2ff3651a3d1179212e2914", - "file_size_bytes": 4276256 - } - ] + "ended_at_time": "2021-10-11T04:06:19+00:00" + } + ] + }, + { + "functional_annotation_agg": [], + "library_preparation_set": [], + "processed_sample_set": [], + "extraction_set": [], + "activity_set": [], + "biosample_set": [], + "data_object_set": [ + { + "name": "Gp0115668_Filtered Reads", + "description": "Filtered Reads for Gp0115668", + "data_object_type": "Filtered Sequencing Reads", + "url": "https://data.microbiomedata.org/data/nmdc:mga0n66h21/qa/nmdc_mga0n66h21_filtered.fastq.gz", + "md5_checksum": "121b1c25e803f2a010ae5a2206a8d1d2", + "id": "nmdc:121b1c25e803f2a010ae5a2206a8d1d2", + "file_size_bytes": 2665008319 + }, + { + "name": "Gp0115668_Filtered Stats", + "description": "Filtered Stats for Gp0115668", + "data_object_type": "QC Statistics", + "url": "https://data.microbiomedata.org/data/nmdc:mga0n66h21/qa/nmdc_mga0n66h21_filterStats.txt", + "md5_checksum": "63fb5949ebafd1846ba60f2ce033191c", + "id": "nmdc:63fb5949ebafd1846ba60f2ce033191c", + "file_size_bytes": 289 + }, + { + "name": "Gp0115668_Gottcha2 TSV report", + "description": "Gottcha2 TSV report for Gp0115668", + "url": "https://data.microbiomedata.org/data/nmdc:mga0n66h21/ReadbasedAnalysis/nmdc_mga0n66h21_gottcha2_report.tsv", + "md5_checksum": "8bdf8bbee24242aaaee763c1d851c05e", + "id": "nmdc:8bdf8bbee24242aaaee763c1d851c05e", + "file_size_bytes": 13875 + }, + { + "name": "Gp0115668_Gottcha2 full TSV report", + "description": "Gottcha2 full TSV report for Gp0115668", + "url": "https://data.microbiomedata.org/data/nmdc:mga0n66h21/ReadbasedAnalysis/nmdc_mga0n66h21_gottcha2_report_full.tsv", + "md5_checksum": "2529ede10eb159148711d016ec022af3", + "id": "nmdc:2529ede10eb159148711d016ec022af3", + "file_size_bytes": 956974 + }, + { + "name": "Gp0115668_Gottcha2 Krona HTML report", + "description": "Gottcha2 Krona HTML report for Gp0115668", + "data_object_type": "GOTTCHA2 Krona Plot", + "url": "https://data.microbiomedata.org/data/nmdc:mga0n66h21/ReadbasedAnalysis/nmdc_mga0n66h21_gottcha2_krona.html", + "md5_checksum": "a0631ed87dc2e7c69355ef575dbe4e60", + "id": "nmdc:a0631ed87dc2e7c69355ef575dbe4e60", + "file_size_bytes": 265076 + }, + { + "name": "Gp0115668_Centrifuge classification TSV report", + "description": "Centrifuge classification TSV report for Gp0115668", + "data_object_type": "Centrifuge Taxonomic Classification", + "url": "https://data.microbiomedata.org/data/nmdc:mga0n66h21/ReadbasedAnalysis/nmdc_mga0n66h21_centrifuge_classification.tsv", + "md5_checksum": "93d26b69073bd4d6283aee3c7e5997d4", + "id": "nmdc:93d26b69073bd4d6283aee3c7e5997d4", + "file_size_bytes": 2377445510 + }, + { + "name": "Gp0115668_Centrifuge TSV report", + "description": "Centrifuge TSV report for Gp0115668", + "data_object_type": "Centrifuge Classification Report", + "url": "https://data.microbiomedata.org/data/nmdc:mga0n66h21/ReadbasedAnalysis/nmdc_mga0n66h21_centrifuge_report.tsv", + "md5_checksum": "d7a49bf0d9797a2b603643a2de896b5c", + "id": "nmdc:d7a49bf0d9797a2b603643a2de896b5c", + "file_size_bytes": 258291 + }, + { + "name": "Gp0115668_Centrifuge Krona HTML report", + "description": "Centrifuge Krona HTML report for Gp0115668", + "data_object_type": "Centrifuge Krona Plot", + "url": "https://data.microbiomedata.org/data/nmdc:mga0n66h21/ReadbasedAnalysis/nmdc_mga0n66h21_centrifuge_krona.html", + "md5_checksum": "890f9f52d828e1ea8277b52566763069", + "id": "nmdc:890f9f52d828e1ea8277b52566763069", + "file_size_bytes": 2333775 + }, + { + "name": "Gp0115668_Kraken classification TSV report", + "description": "Kraken classification TSV report for Gp0115668", + "data_object_type": "Kraken2 Taxonomic Classification", + "url": "https://data.microbiomedata.org/data/nmdc:mga0n66h21/ReadbasedAnalysis/nmdc_mga0n66h21_kraken2_classification.tsv", + "md5_checksum": "371b7fabbcbc2d22c3ca84b422a88863", + "id": "nmdc:371b7fabbcbc2d22c3ca84b422a88863", + "file_size_bytes": 1966520263 + }, + { + "name": "Gp0115668_Kraken2 TSV report", + "description": "Kraken2 TSV report for Gp0115668", + "data_object_type": "Kraken2 Classification Report", + "url": "https://data.microbiomedata.org/data/nmdc:mga0n66h21/ReadbasedAnalysis/nmdc_mga0n66h21_kraken2_report.tsv", + "md5_checksum": "8677985c5e8ad92dd6d051f85950a636", + "id": "nmdc:8677985c5e8ad92dd6d051f85950a636", + "file_size_bytes": 707661 + }, + { + "name": "Gp0115668_Kraken2 Krona HTML report", + "description": "Kraken2 Krona HTML report for Gp0115668", + "data_object_type": "Kraken2 Krona Plot", + "url": "https://data.microbiomedata.org/data/nmdc:mga0n66h21/ReadbasedAnalysis/nmdc_mga0n66h21_kraken2_krona.html", + "md5_checksum": "9b2f355a4c2ff3651a3d1179212e2914", + "id": "nmdc:9b2f355a4c2ff3651a3d1179212e2914", + "file_size_bytes": 4276256 + }, + { + "name": "Gp0115668_Gottcha2 TSV report", + "description": "Gottcha2 TSV report for Gp0115668", + "url": "https://data.microbiomedata.org/data/nmdc:mga0n66h21/ReadbasedAnalysis/nmdc_mga0n66h21_gottcha2_report.tsv", + "md5_checksum": "8bdf8bbee24242aaaee763c1d851c05e", + "id": "nmdc:8bdf8bbee24242aaaee763c1d851c05e", + "file_size_bytes": 13875 + }, + { + "name": "Gp0115668_Gottcha2 full TSV report", + "description": "Gottcha2 full TSV report for Gp0115668", + "url": "https://data.microbiomedata.org/data/nmdc:mga0n66h21/ReadbasedAnalysis/nmdc_mga0n66h21_gottcha2_report_full.tsv", + "md5_checksum": "2529ede10eb159148711d016ec022af3", + "id": "nmdc:2529ede10eb159148711d016ec022af3", + "file_size_bytes": 956974 + }, + { + "name": "Gp0115668_Gottcha2 Krona HTML report", + "description": "Gottcha2 Krona HTML report for Gp0115668", + "data_object_type": "GOTTCHA2 Krona Plot", + "url": "https://data.microbiomedata.org/data/nmdc:mga0n66h21/ReadbasedAnalysis/nmdc_mga0n66h21_gottcha2_krona.html", + "md5_checksum": "a0631ed87dc2e7c69355ef575dbe4e60", + "id": "nmdc:a0631ed87dc2e7c69355ef575dbe4e60", + "file_size_bytes": 265076 + }, + { + "name": "Gp0115668_Centrifuge classification TSV report", + "description": "Centrifuge classification TSV report for Gp0115668", + "data_object_type": "Centrifuge Taxonomic Classification", + "url": "https://data.microbiomedata.org/data/nmdc:mga0n66h21/ReadbasedAnalysis/nmdc_mga0n66h21_centrifuge_classification.tsv", + "md5_checksum": "93d26b69073bd4d6283aee3c7e5997d4", + "id": "nmdc:93d26b69073bd4d6283aee3c7e5997d4", + "file_size_bytes": 2377445510 + }, + { + "name": "Gp0115668_Centrifuge TSV report", + "description": "Centrifuge TSV report for Gp0115668", + "data_object_type": "Centrifuge Classification Report", + "url": "https://data.microbiomedata.org/data/nmdc:mga0n66h21/ReadbasedAnalysis/nmdc_mga0n66h21_centrifuge_report.tsv", + "md5_checksum": "d7a49bf0d9797a2b603643a2de896b5c", + "id": "nmdc:d7a49bf0d9797a2b603643a2de896b5c", + "file_size_bytes": 258291 + }, + { + "name": "Gp0115668_Centrifuge Krona HTML report", + "description": "Centrifuge Krona HTML report for Gp0115668", + "data_object_type": "Centrifuge Krona Plot", + "url": "https://data.microbiomedata.org/data/nmdc:mga0n66h21/ReadbasedAnalysis/nmdc_mga0n66h21_centrifuge_krona.html", + "md5_checksum": "890f9f52d828e1ea8277b52566763069", + "id": "nmdc:890f9f52d828e1ea8277b52566763069", + "file_size_bytes": 2333775 + }, + { + "name": "Gp0115668_Kraken classification TSV report", + "description": "Kraken classification TSV report for Gp0115668", + "data_object_type": "Kraken2 Taxonomic Classification", + "url": "https://data.microbiomedata.org/data/nmdc:mga0n66h21/ReadbasedAnalysis/nmdc_mga0n66h21_kraken2_classification.tsv", + "md5_checksum": "371b7fabbcbc2d22c3ca84b422a88863", + "id": "nmdc:371b7fabbcbc2d22c3ca84b422a88863", + "file_size_bytes": 1966520263 + }, + { + "name": "Gp0115668_Kraken2 TSV report", + "description": "Kraken2 TSV report for Gp0115668", + "data_object_type": "Kraken2 Classification Report", + "url": "https://data.microbiomedata.org/data/nmdc:mga0n66h21/ReadbasedAnalysis/nmdc_mga0n66h21_kraken2_report.tsv", + "md5_checksum": "8677985c5e8ad92dd6d051f85950a636", + "id": "nmdc:8677985c5e8ad92dd6d051f85950a636", + "file_size_bytes": 707661 + }, + { + "name": "Gp0115668_Kraken2 Krona HTML report", + "description": "Kraken2 Krona HTML report for Gp0115668", + "data_object_type": "Kraken2 Krona Plot", + "url": "https://data.microbiomedata.org/data/nmdc:mga0n66h21/ReadbasedAnalysis/nmdc_mga0n66h21_kraken2_krona.html", + "md5_checksum": "9b2f355a4c2ff3651a3d1179212e2914", + "id": "nmdc:9b2f355a4c2ff3651a3d1179212e2914", + "file_size_bytes": 4276256 + }, + { + "name": "Gp0115668_Assembled contigs fasta", + "description": "Assembled contigs fasta for Gp0115668", + "data_object_type": "Assembly Contigs", + "url": "https://data.microbiomedata.org/data/nmdc:mga0n66h21/assembly/nmdc_mga0n66h21_contigs.fna", + "md5_checksum": "b2b862aede4f333acec79aac3afc7254", + "id": "nmdc:b2b862aede4f333acec79aac3afc7254", + "file_size_bytes": 182488593 + }, + { + "name": "Gp0115668_Assembled scaffold fasta", + "description": "Assembled scaffold fasta for Gp0115668", + "data_object_type": "Assembly Scaffolds", + "url": "https://data.microbiomedata.org/data/nmdc:mga0n66h21/assembly/nmdc_mga0n66h21_scaffolds.fna", + "md5_checksum": "15d4494dad1e12523aa9afb56b1e7cdb", + "id": "nmdc:15d4494dad1e12523aa9afb56b1e7cdb", + "file_size_bytes": 181514952 + }, + { + "name": "Gp0115668_Metagenome Contig Coverage Stats", + "description": "Metagenome Contig Coverage Stats for Gp0115668", + "url": "https://data.microbiomedata.org/data/nmdc:mga0n66h21/assembly/nmdc_mga0n66h21_covstats.txt", + "md5_checksum": "6ccb798d615b67dfb9c64ff32d6586c4", + "id": "nmdc:6ccb798d615b67dfb9c64ff32d6586c4", + "file_size_bytes": 25682298 + }, + { + "name": "Gp0115668_Assembled AGP file", + "description": "Assembled AGP file for Gp0115668", + "data_object_type": "Assembly AGP", + "url": "https://data.microbiomedata.org/data/nmdc:mga0n66h21/assembly/nmdc_mga0n66h21_assembly.agp", + "md5_checksum": "da27801a4e0ab450485f5a3aeb75a7d6", + "id": "nmdc:da27801a4e0ab450485f5a3aeb75a7d6", + "file_size_bytes": 24103161 + }, + { + "name": "Gp0115668_Metagenome Alignment BAM file", + "description": "Metagenome Alignment BAM file for Gp0115668", + "data_object_type": "Assembly Coverage BAM", + "url": "https://data.microbiomedata.org/data/nmdc:mga0n66h21/assembly/nmdc_mga0n66h21_pairedMapped_sorted.bam", + "md5_checksum": "f7a4bb0be4599b544360617190b45681", + "id": "nmdc:f7a4bb0be4599b544360617190b45681", + "file_size_bytes": 2958311801 + }, + { + "name": "Gp0115668_Protein FAA", + "description": "Protein FAA for Gp0115668", + "data_object_type": "Annotation Amino Acid FASTA", + "url": "https://data.microbiomedata.org/data/nmdc:mga0n66h21/annotation/nmdc_mga0n66h21_proteins.faa", + "md5_checksum": "5cb6273cd171d1ae5a8d77c8f131517f", + "id": "nmdc:5cb6273cd171d1ae5a8d77c8f131517f", + "file_size_bytes": 88016165 + }, + { + "name": "Gp0115668_Structural annotation GFF file", + "description": "Structural annotation GFF file for Gp0115668", + "data_object_type": "Structural Annotation GFF", + "url": "https://data.microbiomedata.org/data/nmdc:mga0n66h21/annotation/nmdc_mga0n66h21_structural_annotation.gff", + "md5_checksum": "d49149a48134c1091c001448cc91f8e2", + "id": "nmdc:d49149a48134c1091c001448cc91f8e2", + "file_size_bytes": 2527 + }, + { + "name": "Gp0115668_Functional annotation GFF file", + "description": "Functional annotation GFF file for Gp0115668", + "data_object_type": "Functional Annotation GFF", + "url": "https://data.microbiomedata.org/data/nmdc:mga0n66h21/annotation/nmdc_mga0n66h21_functional_annotation.gff", + "md5_checksum": "7a861805138d425525f298c1790b58ed", + "id": "nmdc:7a861805138d425525f298c1790b58ed", + "file_size_bytes": 91926507 + }, + { + "name": "Gp0115668_KO TSV file", + "description": "KO TSV file for Gp0115668", + "data_object_type": "Annotation KEGG Orthology", + "url": "https://data.microbiomedata.org/data/nmdc:mga0n66h21/annotation/nmdc_mga0n66h21_ko.tsv", + "md5_checksum": "0d0a80f2dafb68f4659709dd2ebd2f28", + "id": "nmdc:0d0a80f2dafb68f4659709dd2ebd2f28", + "file_size_bytes": 6651856 + }, + { + "name": "Gp0115668_EC TSV file", + "description": "EC TSV file for Gp0115668", + "data_object_type": "Annotation Enzyme Commission", + "url": "https://data.microbiomedata.org/data/nmdc:mga0n66h21/annotation/nmdc_mga0n66h21_ec.tsv", + "md5_checksum": "a8b689fdef54bf7235532de634cf553e", + "id": "nmdc:a8b689fdef54bf7235532de634cf553e", + "file_size_bytes": 4156019 + }, + { + "name": "Gp0115668_COG GFF file", + "description": "COG GFF file for Gp0115668", + "url": "https://data.microbiomedata.org/data/nmdc:mga0n66h21/annotation/nmdc_mga0n66h21_cog.gff", + "md5_checksum": "017daaa53039bc1135ca8f013596eb14", + "id": "nmdc:017daaa53039bc1135ca8f013596eb14", + "file_size_bytes": 33686729 + }, + { + "name": "Gp0115668_PFAM GFF file", + "description": "PFAM GFF file for Gp0115668", + "url": "https://data.microbiomedata.org/data/nmdc:mga0n66h21/annotation/nmdc_mga0n66h21_pfam.gff", + "md5_checksum": "e3eb963d76dc6bdc54756cfa80977611", + "id": "nmdc:e3eb963d76dc6bdc54756cfa80977611", + "file_size_bytes": 29534588 + }, + { + "name": "Gp0115668_TigrFam GFF file", + "description": "TigrFam GFF file for Gp0115668", + "url": "https://data.microbiomedata.org/data/nmdc:mga0n66h21/annotation/nmdc_mga0n66h21_tigrfam.gff", + "md5_checksum": "ab1d561046fbe146ac418e4ed822e861", + "id": "nmdc:ab1d561046fbe146ac418e4ed822e861", + "file_size_bytes": 2596288 + }, + { + "name": "Gp0115668_SMART GFF file", + "description": "SMART GFF file for Gp0115668", + "url": "https://data.microbiomedata.org/data/nmdc:mga0n66h21/annotation/nmdc_mga0n66h21_smart.gff", + "md5_checksum": "51054c4da9edc391b03418b5f9327815", + "id": "nmdc:51054c4da9edc391b03418b5f9327815", + "file_size_bytes": 18133874 + }, + { + "name": "Gp0115668_SuperFam GFF file", + "description": "SuperFam GFF file for Gp0115668", + "url": "https://data.microbiomedata.org/data/nmdc:mga0n66h21/annotation/nmdc_mga0n66h21_supfam.gff", + "md5_checksum": "335576d20d4f5c061a875529cbe9572c", + "id": "nmdc:335576d20d4f5c061a875529cbe9572c", + "file_size_bytes": 61337132 + }, + { + "name": "Gp0115668_Cath FunFam GFF file", + "description": "Cath FunFam GFF file for Gp0115668", + "url": "https://data.microbiomedata.org/data/nmdc:mga0n66h21/annotation/nmdc_mga0n66h21_cath_funfam.gff", + "md5_checksum": "6c5387ac5acb8b340a2c2a9e17e62bae", + "id": "nmdc:6c5387ac5acb8b340a2c2a9e17e62bae", + "file_size_bytes": 52005922 + }, + { + "name": "Gp0115668_KO_EC GFF file", + "description": "KO_EC GFF file for Gp0115668", + "url": "https://data.microbiomedata.org/data/nmdc:mga0n66h21/annotation/nmdc_mga0n66h21_ko_ec.gff", + "md5_checksum": "eea36326caba5baa0536ac2f5e36d497", + "id": "nmdc:eea36326caba5baa0536ac2f5e36d497", + "file_size_bytes": 21150415 + }, + { + "name": "gold:Gp0452679_metabat2 bin checkm quality assessment result", + "description": "metabat2 bin checkm quality assessment result for gold:Gp0452679", + "data_object_type": "CheckM Statistics", + "url": "https://data.microbiomedata.org/data/nmdc:mga0fsjy84/MAGs/nmdc_mga0fsjy84_checkm_qa.out", + "md5_checksum": "d41d8cd98f00b204e9800998ecf8427e", + "id": "nmdc:d41d8cd98f00b204e9800998ecf8427e", + "file_size_bytes": 0 + }, + { + "name": "Gp0115668_tooShort (< 3kb) filtered contigs fasta file by metaBat2", + "description": "tooShort (< 3kb) filtered contigs fasta file by metaBat2 for Gp0115668", + "url": "https://data.microbiomedata.org/data/nmdc:mga0n66h21/MAGs/nmdc_mga0n66h21_bins.tooShort.fa", + "md5_checksum": "8c05fc754583d51714bc1aa81396e59d", + "id": "nmdc:8c05fc754583d51714bc1aa81396e59d", + "file_size_bytes": 136315210 + }, + { + "name": "Gp0115668_unbinned fasta file from metabat2", + "description": "unbinned fasta file from metabat2 for Gp0115668", + "url": "https://data.microbiomedata.org/data/nmdc:mga0n66h21/MAGs/nmdc_mga0n66h21_bins.unbinned.fa", + "md5_checksum": "9ef1be5df79aee7c64f2addc4bda6afa", + "id": "nmdc:9ef1be5df79aee7c64f2addc4bda6afa", + "file_size_bytes": 39131745 + }, + { + "name": "Gp0115668_metabat2 bin checkm quality assessment result", + "description": "metabat2 bin checkm quality assessment result for Gp0115668", + "data_object_type": "CheckM Statistics", + "url": "https://data.microbiomedata.org/data/nmdc:mga0n66h21/MAGs/nmdc_mga0n66h21_checkm_qa.out", + "md5_checksum": "60db1474ee6a099c10e4fdc728420cf8", + "id": "nmdc:60db1474ee6a099c10e4fdc728420cf8", + "file_size_bytes": 1176 + }, + { + "name": "Gp0115668_high-quality and medium-quality bins", + "description": "high-quality and medium-quality bins for Gp0115668", + "data_object_type": "Metagenome Bins", + "url": "https://data.microbiomedata.org/data/nmdc:mga0n66h21/MAGs/nmdc_mga0n66h21_hqmq_bin.zip", + "md5_checksum": "5a36d8ba758ee510ab2be3e01fda3e0f", + "id": "nmdc:5a36d8ba758ee510ab2be3e01fda3e0f", + "file_size_bytes": 182 }, + { + "name": "Gp0115668_metabat2 bins", + "description": "metabat2 bins for Gp0115668", + "url": "https://data.microbiomedata.org/data/nmdc:mga0n66h21/MAGs/nmdc_mga0n66h21_metabat_bin.zip", + "md5_checksum": "3f4c7c98bb94687eb96382799c8626fe", + "id": "nmdc:3f4c7c98bb94687eb96382799c8626fe", + "file_size_bytes": 2145953 + } + ], + "dissolving_activity_set": [], + "functional_annotation_set": [], + "genome_feature_set": [], + "mags_activity_set": [ { "_id": { - "$oid": "61e71a4d833bcf838a7021ce" + "$oid": "649b0052ec087f6bbab34736" }, "has_input": [ - "nmdc:121b1c25e803f2a010ae5a2206a8d1d2" + "nmdc:b2b862aede4f333acec79aac3afc7254", + "nmdc:f7a4bb0be4599b544360617190b45681", + "nmdc:7a861805138d425525f298c1790b58ed" ], + "too_short_contig_num": 297764, "part_of": [ "nmdc:mga0n66h21" ], + "binned_contig_num": 1669, "git_url": "https://github.com/microbiomedata/metaG/releases/tag/0.1", "has_output": [ - "nmdc:8bdf8bbee24242aaaee763c1d851c05e", - "nmdc:2529ede10eb159148711d016ec022af3", - "nmdc:a0631ed87dc2e7c69355ef575dbe4e60", - "nmdc:93d26b69073bd4d6283aee3c7e5997d4", - "nmdc:d7a49bf0d9797a2b603643a2de896b5c", - "nmdc:890f9f52d828e1ea8277b52566763069", - "nmdc:371b7fabbcbc2d22c3ca84b422a88863", - "nmdc:8677985c5e8ad92dd6d051f85950a636", - "nmdc:9b2f355a4c2ff3651a3d1179212e2914" + "nmdc:d41d8cd98f00b204e9800998ecf8427e", + "nmdc:8c05fc754583d51714bc1aa81396e59d", + "nmdc:9ef1be5df79aee7c64f2addc4bda6afa", + "nmdc:60db1474ee6a099c10e4fdc728420cf8", + "nmdc:5a36d8ba758ee510ab2be3e01fda3e0f", + "nmdc:3f4c7c98bb94687eb96382799c8626fe" ], "was_informed_by": "gold:Gp0115668", + "input_contig_num": 323261, "id": "nmdc:0aec70779dcdcc1b577d7e372c0eca87", "execution_resource": "NERSC-Cori", - "name": "ReadBased Analysis Activity for nmdc:mga0n66h21", - "started_at_time": "2021-10-11T02:28:43Z", - "type": "nmdc:ReadbasedAnalysis", - "ended_at_time": "2021-10-11T05:19:17+00:00", - "output_data_objects": [ - { - "name": "Gp0115668_Gottcha2 TSV report", - "description": "Gottcha2 TSV report for Gp0115668", - "url": "https://data.microbiomedata.org/data/nmdc:mga0n66h21/ReadbasedAnalysis/nmdc_mga0n66h21_gottcha2_report.tsv", - "md5_checksum": "8bdf8bbee24242aaaee763c1d851c05e", - "id": "nmdc:8bdf8bbee24242aaaee763c1d851c05e", - "file_size_bytes": 13875 - }, - { - "name": "Gp0115668_Gottcha2 full TSV report", - "description": "Gottcha2 full TSV report for Gp0115668", - "url": "https://data.microbiomedata.org/data/nmdc:mga0n66h21/ReadbasedAnalysis/nmdc_mga0n66h21_gottcha2_report_full.tsv", - "md5_checksum": "2529ede10eb159148711d016ec022af3", - "id": "nmdc:2529ede10eb159148711d016ec022af3", - "file_size_bytes": 956974 - }, - { - "name": "Gp0115668_Gottcha2 Krona HTML report", - "description": "Gottcha2 Krona HTML report for Gp0115668", - "data_object_type": "GOTTCHA2 Krona Plot", - "url": "https://data.microbiomedata.org/data/nmdc:mga0n66h21/ReadbasedAnalysis/nmdc_mga0n66h21_gottcha2_krona.html", - "md5_checksum": "a0631ed87dc2e7c69355ef575dbe4e60", - "id": "nmdc:a0631ed87dc2e7c69355ef575dbe4e60", - "file_size_bytes": 265076 - }, - { - "name": "Gp0115668_Centrifuge classification TSV report", - "description": "Centrifuge classification TSV report for Gp0115668", - "data_object_type": "Centrifuge Taxonomic Classification", - "url": "https://data.microbiomedata.org/data/nmdc:mga0n66h21/ReadbasedAnalysis/nmdc_mga0n66h21_centrifuge_classification.tsv", - "md5_checksum": "93d26b69073bd4d6283aee3c7e5997d4", - "id": "nmdc:93d26b69073bd4d6283aee3c7e5997d4", - "file_size_bytes": 2377445510 - }, - { - "name": "Gp0115668_Centrifuge TSV report", - "description": "Centrifuge TSV report for Gp0115668", - "data_object_type": "Centrifuge Classification Report", - "url": "https://data.microbiomedata.org/data/nmdc:mga0n66h21/ReadbasedAnalysis/nmdc_mga0n66h21_centrifuge_report.tsv", - "md5_checksum": "d7a49bf0d9797a2b603643a2de896b5c", - "id": "nmdc:d7a49bf0d9797a2b603643a2de896b5c", - "file_size_bytes": 258291 - }, - { - "name": "Gp0115668_Centrifuge Krona HTML report", - "description": "Centrifuge Krona HTML report for Gp0115668", - "data_object_type": "Centrifuge Krona Plot", - "url": "https://data.microbiomedata.org/data/nmdc:mga0n66h21/ReadbasedAnalysis/nmdc_mga0n66h21_centrifuge_krona.html", - "md5_checksum": "890f9f52d828e1ea8277b52566763069", - "id": "nmdc:890f9f52d828e1ea8277b52566763069", - "file_size_bytes": 2333775 - }, + "name": "MAGs Analysis Activity for nmdc:mga0n66h21", + "mags_list": [ { - "name": "Gp0115668_Kraken classification TSV report", - "description": "Kraken classification TSV report for Gp0115668", - "data_object_type": "Kraken2 Taxonomic Classification", - "url": "https://data.microbiomedata.org/data/nmdc:mga0n66h21/ReadbasedAnalysis/nmdc_mga0n66h21_kraken2_classification.tsv", - "md5_checksum": "371b7fabbcbc2d22c3ca84b422a88863", - "id": "nmdc:371b7fabbcbc2d22c3ca84b422a88863", - "file_size_bytes": 1966520263 + "number_of_contig": 1013, + "completeness": 12.29, + "bin_name": "bins.1", + "gene_count": 4188, + "bin_quality": "LQ", + "gtdbtk_species": "", + "gtdbtk_order": "", + "num_16s": 0, + "gtdbtk_family": "", + "gtdbtk_domain": "", + "contamination": 2.32, + "gtdbtk_class": "", + "gtdbtk_phylum": "", + "num_5s": 0, + "num_23s": 0, + "gtdbtk_genus": "", + "num_t_rna": 20 }, { - "name": "Gp0115668_Kraken2 TSV report", - "description": "Kraken2 TSV report for Gp0115668", - "data_object_type": "Kraken2 Classification Report", - "url": "https://data.microbiomedata.org/data/nmdc:mga0n66h21/ReadbasedAnalysis/nmdc_mga0n66h21_kraken2_report.tsv", - "md5_checksum": "8677985c5e8ad92dd6d051f85950a636", - "id": "nmdc:8677985c5e8ad92dd6d051f85950a636", - "file_size_bytes": 707661 + "number_of_contig": 599, + "completeness": 58.72, + "bin_name": "bins.2", + "gene_count": 2940, + "bin_quality": "LQ", + "gtdbtk_species": "", + "gtdbtk_order": "", + "num_16s": 1, + "gtdbtk_family": "", + "gtdbtk_domain": "", + "contamination": 12.95, + "gtdbtk_class": "", + "gtdbtk_phylum": "", + "num_5s": 0, + "num_23s": 1, + "gtdbtk_genus": "", + "num_t_rna": 25 }, { - "name": "Gp0115668_Kraken2 Krona HTML report", - "description": "Kraken2 Krona HTML report for Gp0115668", - "data_object_type": "Kraken2 Krona Plot", - "url": "https://data.microbiomedata.org/data/nmdc:mga0n66h21/ReadbasedAnalysis/nmdc_mga0n66h21_kraken2_krona.html", - "md5_checksum": "9b2f355a4c2ff3651a3d1179212e2914", - "id": "nmdc:9b2f355a4c2ff3651a3d1179212e2914", - "file_size_bytes": 4276256 + "number_of_contig": 57, + "completeness": 4.0, + "bin_name": "bins.3", + "gene_count": 258, + "bin_quality": "LQ", + "gtdbtk_species": "", + "gtdbtk_order": "", + "num_16s": 0, + "gtdbtk_family": "", + "gtdbtk_domain": "", + "contamination": 0.03, + "gtdbtk_class": "", + "gtdbtk_phylum": "", + "num_5s": 0, + "num_23s": 0, + "gtdbtk_genus": "", + "num_t_rna": 1 } - ] - }, + ], + "unbinned_contig_num": 23828, + "started_at_time": "2021-10-11T02:28:43Z", + "type": "nmdc:MAGsAnalysisActivity", + "ended_at_time": "2021-10-11T05:19:17+00:00" + } + ], + "material_sample_set": [], + "material_sampling_activity_set": [], + "metabolomics_analysis_activity_set": [], + "metagenome_annotation_activity_set": [ { "_id": { - "$oid": "649b005f2ca5ee4adb139fc3" + "$oid": "649b005bbf2caae0415ef9d8" }, "has_input": [ - "nmdc:121b1c25e803f2a010ae5a2206a8d1d2" + "nmdc:b2b862aede4f333acec79aac3afc7254" ], "part_of": [ "nmdc:mga0n66h21" ], - "ctg_logsum": 489108, - "scaf_logsum": 491574, - "gap_pct": 0.00308, "git_url": "https://github.com/microbiomedata/metaG/releases/tag/0.1", "has_output": [ - "nmdc:b2b862aede4f333acec79aac3afc7254", - "nmdc:15d4494dad1e12523aa9afb56b1e7cdb", - "nmdc:6ccb798d615b67dfb9c64ff32d6586c4", - "nmdc:da27801a4e0ab450485f5a3aeb75a7d6", - "nmdc:f7a4bb0be4599b544360617190b45681" + "nmdc:5cb6273cd171d1ae5a8d77c8f131517f", + "nmdc:d49149a48134c1091c001448cc91f8e2", + "nmdc:7a861805138d425525f298c1790b58ed", + "nmdc:0d0a80f2dafb68f4659709dd2ebd2f28", + "nmdc:a8b689fdef54bf7235532de634cf553e", + "nmdc:017daaa53039bc1135ca8f013596eb14", + "nmdc:e3eb963d76dc6bdc54756cfa80977611", + "nmdc:ab1d561046fbe146ac418e4ed822e861", + "nmdc:51054c4da9edc391b03418b5f9327815", + "nmdc:335576d20d4f5c061a875529cbe9572c", + "nmdc:6c5387ac5acb8b340a2c2a9e17e62bae", + "nmdc:eea36326caba5baa0536ac2f5e36d497" ], - "asm_score": 4.087, "was_informed_by": "gold:Gp0115668", - "ctg_powsum": 53542, - "scaf_max": 53286, "id": "nmdc:0aec70779dcdcc1b577d7e372c0eca87", - "scaf_powsum": 53839, "execution_resource": "NERSC-Cori", - "contigs": 323269, + "name": "Annotation Activity for nmdc:mga0n66h21", + "started_at_time": "2021-10-11T02:28:43Z", + "type": "nmdc:MetagenomeAnnotationActivity", + "ended_at_time": "2021-10-11T05:19:17+00:00" + } + ], + "metagenome_assembly_set": [ + { + "_id": { + "$oid": "649b005f2ca5ee4adb139fc3" + }, + "has_input": [ + "nmdc:121b1c25e803f2a010ae5a2206a8d1d2" + ], + "part_of": [ + "nmdc:mga0n66h21" + ], + "ctg_logsum": 489108, + "scaf_logsum": 491574, + "gap_pct": 0.00308, + "git_url": "https://github.com/microbiomedata/metaG/releases/tag/0.1", + "has_output": [ + "nmdc:b2b862aede4f333acec79aac3afc7254", + "nmdc:15d4494dad1e12523aa9afb56b1e7cdb", + "nmdc:6ccb798d615b67dfb9c64ff32d6586c4", + "nmdc:da27801a4e0ab450485f5a3aeb75a7d6", + "nmdc:f7a4bb0be4599b544360617190b45681" + ], + "asm_score": 4.087, + "was_informed_by": "gold:Gp0115668", + "ctg_powsum": 53542, + "scaf_max": 53286, + "id": "nmdc:0aec70779dcdcc1b577d7e372c0eca87", + "scaf_powsum": 53839, + "execution_resource": "NERSC-Cori", + "contigs": 323269, "name": "Assembly Activity for nmdc:mga0n66h21", "ctg_max": 53286, "gc_std": 0.10793, @@ -1715,990 +1989,652 @@ "scaf_n90": 263381, "scaf_l_gt50k": 53286, "scaf_n_gt50k": 1, - "scaf_pct_gt50k": 0.03141731, - "output_data_objects": [ - { - "name": "Gp0115668_Assembled contigs fasta", - "description": "Assembled contigs fasta for Gp0115668", - "data_object_type": "Assembly Contigs", - "url": "https://data.microbiomedata.org/data/nmdc:mga0n66h21/assembly/nmdc_mga0n66h21_contigs.fna", - "md5_checksum": "b2b862aede4f333acec79aac3afc7254", - "id": "nmdc:b2b862aede4f333acec79aac3afc7254", - "file_size_bytes": 182488593 - }, - { - "name": "Gp0115668_Assembled scaffold fasta", - "description": "Assembled scaffold fasta for Gp0115668", - "data_object_type": "Assembly Scaffolds", - "url": "https://data.microbiomedata.org/data/nmdc:mga0n66h21/assembly/nmdc_mga0n66h21_scaffolds.fna", - "md5_checksum": "15d4494dad1e12523aa9afb56b1e7cdb", - "id": "nmdc:15d4494dad1e12523aa9afb56b1e7cdb", - "file_size_bytes": 181514952 - }, - { - "name": "Gp0115668_Metagenome Contig Coverage Stats", - "description": "Metagenome Contig Coverage Stats for Gp0115668", - "url": "https://data.microbiomedata.org/data/nmdc:mga0n66h21/assembly/nmdc_mga0n66h21_covstats.txt", - "md5_checksum": "6ccb798d615b67dfb9c64ff32d6586c4", - "id": "nmdc:6ccb798d615b67dfb9c64ff32d6586c4", - "file_size_bytes": 25682298 - }, - { - "name": "Gp0115668_Assembled AGP file", - "description": "Assembled AGP file for Gp0115668", - "data_object_type": "Assembly AGP", - "url": "https://data.microbiomedata.org/data/nmdc:mga0n66h21/assembly/nmdc_mga0n66h21_assembly.agp", - "md5_checksum": "da27801a4e0ab450485f5a3aeb75a7d6", - "id": "nmdc:da27801a4e0ab450485f5a3aeb75a7d6", - "file_size_bytes": 24103161 - }, - { - "name": "Gp0115668_Metagenome Alignment BAM file", - "description": "Metagenome Alignment BAM file for Gp0115668", - "data_object_type": "Assembly Coverage BAM", - "url": "https://data.microbiomedata.org/data/nmdc:mga0n66h21/assembly/nmdc_mga0n66h21_pairedMapped_sorted.bam", - "md5_checksum": "f7a4bb0be4599b544360617190b45681", - "id": "nmdc:f7a4bb0be4599b544360617190b45681", - "file_size_bytes": 2958311801 - } - ] - }, + "scaf_pct_gt50k": 0.03141731 + } + ], + "metagenome_sequencing_activity_set": [], + "metaproteomics_analysis_activity_set": [], + "metatranscriptome_activity_set": [], + "nom_analysis_activity_set": [], + "omics_processing_set": [ { "_id": { - "$oid": "649b005bbf2caae0415ef9d8" + "$oid": "649b009773e8249959349b35" }, + "id": "nmdc:omprc-11-wepaa271", + "name": "Sand microcosm microbial communities from a hyporheic zone in Columbia River, Washington, USA - GW-RW T3_30-Apr-14", + "description": "Sterilized sand packs were incubated back in the ground and collected at time point T3.", "has_input": [ - "nmdc:b2b862aede4f333acec79aac3afc7254" - ], - "part_of": [ - "nmdc:mga0n66h21" + "nmdc:bsm-11-srz83p34" ], - "git_url": "https://github.com/microbiomedata/metaG/releases/tag/0.1", "has_output": [ - "nmdc:5cb6273cd171d1ae5a8d77c8f131517f", - "nmdc:d49149a48134c1091c001448cc91f8e2", - "nmdc:7a861805138d425525f298c1790b58ed", - "nmdc:0d0a80f2dafb68f4659709dd2ebd2f28", - "nmdc:a8b689fdef54bf7235532de634cf553e", - "nmdc:017daaa53039bc1135ca8f013596eb14", - "nmdc:e3eb963d76dc6bdc54756cfa80977611", - "nmdc:ab1d561046fbe146ac418e4ed822e861", - "nmdc:51054c4da9edc391b03418b5f9327815", - "nmdc:335576d20d4f5c061a875529cbe9572c", - "nmdc:6c5387ac5acb8b340a2c2a9e17e62bae", - "nmdc:eea36326caba5baa0536ac2f5e36d497" + "jgi:55d740240d8785342fcf7e37" ], - "was_informed_by": "gold:Gp0115668", - "id": "nmdc:0aec70779dcdcc1b577d7e372c0eca87", - "execution_resource": "NERSC-Cori", - "name": "Annotation Activity for nmdc:mga0n66h21", - "started_at_time": "2021-10-11T02:28:43Z", - "type": "nmdc:MetagenomeAnnotationActivity", - "ended_at_time": "2021-10-11T05:19:17+00:00", - "output_data_objects": [ - { - "name": "Gp0115668_Protein FAA", - "description": "Protein FAA for Gp0115668", - "data_object_type": "Annotation Amino Acid FASTA", - "url": "https://data.microbiomedata.org/data/nmdc:mga0n66h21/annotation/nmdc_mga0n66h21_proteins.faa", - "md5_checksum": "5cb6273cd171d1ae5a8d77c8f131517f", - "id": "nmdc:5cb6273cd171d1ae5a8d77c8f131517f", - "file_size_bytes": 88016165 - }, - { - "name": "Gp0115668_Structural annotation GFF file", - "description": "Structural annotation GFF file for Gp0115668", - "data_object_type": "Structural Annotation GFF", - "url": "https://data.microbiomedata.org/data/nmdc:mga0n66h21/annotation/nmdc_mga0n66h21_structural_annotation.gff", - "md5_checksum": "d49149a48134c1091c001448cc91f8e2", - "id": "nmdc:d49149a48134c1091c001448cc91f8e2", - "file_size_bytes": 2527 - }, - { - "name": "Gp0115668_Functional annotation GFF file", - "description": "Functional annotation GFF file for Gp0115668", - "data_object_type": "Functional Annotation GFF", - "url": "https://data.microbiomedata.org/data/nmdc:mga0n66h21/annotation/nmdc_mga0n66h21_functional_annotation.gff", - "md5_checksum": "7a861805138d425525f298c1790b58ed", - "id": "nmdc:7a861805138d425525f298c1790b58ed", - "file_size_bytes": 91926507 - }, - { - "name": "Gp0115668_KO TSV file", - "description": "KO TSV file for Gp0115668", - "data_object_type": "Annotation KEGG Orthology", - "url": "https://data.microbiomedata.org/data/nmdc:mga0n66h21/annotation/nmdc_mga0n66h21_ko.tsv", - "md5_checksum": "0d0a80f2dafb68f4659709dd2ebd2f28", - "id": "nmdc:0d0a80f2dafb68f4659709dd2ebd2f28", - "file_size_bytes": 6651856 - }, - { - "name": "Gp0115668_EC TSV file", - "description": "EC TSV file for Gp0115668", - "data_object_type": "Annotation Enzyme Commission", - "url": "https://data.microbiomedata.org/data/nmdc:mga0n66h21/annotation/nmdc_mga0n66h21_ec.tsv", - "md5_checksum": "a8b689fdef54bf7235532de634cf553e", - "id": "nmdc:a8b689fdef54bf7235532de634cf553e", - "file_size_bytes": 4156019 - }, - { - "name": "Gp0115668_COG GFF file", - "description": "COG GFF file for Gp0115668", - "url": "https://data.microbiomedata.org/data/nmdc:mga0n66h21/annotation/nmdc_mga0n66h21_cog.gff", - "md5_checksum": "017daaa53039bc1135ca8f013596eb14", - "id": "nmdc:017daaa53039bc1135ca8f013596eb14", - "file_size_bytes": 33686729 - }, - { - "name": "Gp0115668_PFAM GFF file", - "description": "PFAM GFF file for Gp0115668", - "url": "https://data.microbiomedata.org/data/nmdc:mga0n66h21/annotation/nmdc_mga0n66h21_pfam.gff", - "md5_checksum": "e3eb963d76dc6bdc54756cfa80977611", - "id": "nmdc:e3eb963d76dc6bdc54756cfa80977611", - "file_size_bytes": 29534588 - }, - { - "name": "Gp0115668_TigrFam GFF file", - "description": "TigrFam GFF file for Gp0115668", - "url": "https://data.microbiomedata.org/data/nmdc:mga0n66h21/annotation/nmdc_mga0n66h21_tigrfam.gff", - "md5_checksum": "ab1d561046fbe146ac418e4ed822e861", - "id": "nmdc:ab1d561046fbe146ac418e4ed822e861", - "file_size_bytes": 2596288 - }, - { - "name": "Gp0115668_SMART GFF file", - "description": "SMART GFF file for Gp0115668", - "url": "https://data.microbiomedata.org/data/nmdc:mga0n66h21/annotation/nmdc_mga0n66h21_smart.gff", - "md5_checksum": "51054c4da9edc391b03418b5f9327815", - "id": "nmdc:51054c4da9edc391b03418b5f9327815", - "file_size_bytes": 18133874 - }, - { - "name": "Gp0115668_SuperFam GFF file", - "description": "SuperFam GFF file for Gp0115668", - "url": "https://data.microbiomedata.org/data/nmdc:mga0n66h21/annotation/nmdc_mga0n66h21_supfam.gff", - "md5_checksum": "335576d20d4f5c061a875529cbe9572c", - "id": "nmdc:335576d20d4f5c061a875529cbe9572c", - "file_size_bytes": 61337132 - }, - { - "name": "Gp0115668_Cath FunFam GFF file", - "description": "Cath FunFam GFF file for Gp0115668", - "url": "https://data.microbiomedata.org/data/nmdc:mga0n66h21/annotation/nmdc_mga0n66h21_cath_funfam.gff", - "md5_checksum": "6c5387ac5acb8b340a2c2a9e17e62bae", - "id": "nmdc:6c5387ac5acb8b340a2c2a9e17e62bae", - "file_size_bytes": 52005922 - }, - { - "name": "Gp0115668_KO_EC GFF file", - "description": "KO_EC GFF file for Gp0115668", - "url": "https://data.microbiomedata.org/data/nmdc:mga0n66h21/annotation/nmdc_mga0n66h21_ko_ec.gff", - "md5_checksum": "eea36326caba5baa0536ac2f5e36d497", - "id": "nmdc:eea36326caba5baa0536ac2f5e36d497", - "file_size_bytes": 21150415 - } + "part_of": [ + "nmdc:sty-11-aygzgv51" + ], + "add_date": "2015-05-28", + "mod_date": "2021-06-15", + "ncbi_project_name": "Sand microcosm microbial communities from a hyporheic zone in Columbia River, Washington, USA - GW-RW T3_30-Apr-14", + "omics_type": { + "has_raw_value": "Metagenome" + }, + "principal_investigator": { + "has_raw_value": "James Stegen" + }, + "processing_institution": "JGI", + "type": "nmdc:OmicsProcessing", + "gold_sequencing_project_identifiers": [ + "gold:Gp0115668" ] - }, + } + ], + "reaction_activity_set": [], + "read_qc_analysis_activity_set": [ { "_id": { - "$oid": "649b0052ec087f6bbab34736" + "$oid": "649b009d6bdd4fd20273c88c" }, "has_input": [ - "nmdc:b2b862aede4f333acec79aac3afc7254", - "nmdc:f7a4bb0be4599b544360617190b45681", - "nmdc:7a861805138d425525f298c1790b58ed" + "nmdc:0967bbbe5ee2737f66bc6ee7bf366bbb" ], - "too_short_contig_num": 297764, "part_of": [ "nmdc:mga0n66h21" ], - "binned_contig_num": 1669, "git_url": "https://github.com/microbiomedata/metaG/releases/tag/0.1", "has_output": [ - "nmdc:d41d8cd98f00b204e9800998ecf8427e", - "nmdc:8c05fc754583d51714bc1aa81396e59d", - "nmdc:9ef1be5df79aee7c64f2addc4bda6afa", - "nmdc:60db1474ee6a099c10e4fdc728420cf8", - "nmdc:5a36d8ba758ee510ab2be3e01fda3e0f", - "nmdc:3f4c7c98bb94687eb96382799c8626fe" + "nmdc:121b1c25e803f2a010ae5a2206a8d1d2", + "nmdc:63fb5949ebafd1846ba60f2ce033191c" ], "was_informed_by": "gold:Gp0115668", - "input_contig_num": 323261, + "input_read_count": 35064492, + "output_read_bases": 5069132469, "id": "nmdc:0aec70779dcdcc1b577d7e372c0eca87", "execution_resource": "NERSC-Cori", - "name": "MAGs Analysis Activity for nmdc:mga0n66h21", - "mags_list": [ - { - "number_of_contig": 1013, - "completeness": 12.29, - "bin_name": "bins.1", - "gene_count": 4188, - "bin_quality": "LQ", - "gtdbtk_species": "", - "gtdbtk_order": "", - "num_16s": 0, - "gtdbtk_family": "", - "gtdbtk_domain": "", - "contamination": 2.32, - "gtdbtk_class": "", - "gtdbtk_phylum": "", - "num_5s": 0, - "num_23s": 0, - "gtdbtk_genus": "", - "num_t_rna": 20 - }, - { - "number_of_contig": 599, - "completeness": 58.72, - "bin_name": "bins.2", - "gene_count": 2940, - "bin_quality": "LQ", - "gtdbtk_species": "", - "gtdbtk_order": "", - "num_16s": 1, - "gtdbtk_family": "", - "gtdbtk_domain": "", - "contamination": 12.95, - "gtdbtk_class": "", - "gtdbtk_phylum": "", - "num_5s": 0, - "num_23s": 1, - "gtdbtk_genus": "", - "num_t_rna": 25 - }, - { - "number_of_contig": 57, - "completeness": 4.0, - "bin_name": "bins.3", - "gene_count": 258, - "bin_quality": "LQ", - "gtdbtk_species": "", - "gtdbtk_order": "", - "num_16s": 0, - "gtdbtk_family": "", - "gtdbtk_domain": "", - "contamination": 0.03, - "gtdbtk_class": "", - "gtdbtk_phylum": "", - "num_5s": 0, - "num_23s": 0, - "gtdbtk_genus": "", - "num_t_rna": 1 - } - ], - "unbinned_contig_num": 23828, + "input_read_bases": 5294738292, + "name": "Read QC Activity for nmdc:mga0n66h21", + "output_read_count": 33873238, "started_at_time": "2021-10-11T02:28:43Z", - "type": "nmdc:MAGsAnalysisActivity", - "ended_at_time": "2021-10-11T05:19:17+00:00", - "output_data_objects": [ - { - "name": "gold:Gp0452679_metabat2 bin checkm quality assessment result", - "description": "metabat2 bin checkm quality assessment result for gold:Gp0452679", - "data_object_type": "CheckM Statistics", - "url": "https://data.microbiomedata.org/data/nmdc:mga0fsjy84/MAGs/nmdc_mga0fsjy84_checkm_qa.out", - "md5_checksum": "d41d8cd98f00b204e9800998ecf8427e", - "id": "nmdc:d41d8cd98f00b204e9800998ecf8427e", - "file_size_bytes": 0 - }, - { - "name": "Gp0115668_tooShort (< 3kb) filtered contigs fasta file by metaBat2", - "description": "tooShort (< 3kb) filtered contigs fasta file by metaBat2 for Gp0115668", - "url": "https://data.microbiomedata.org/data/nmdc:mga0n66h21/MAGs/nmdc_mga0n66h21_bins.tooShort.fa", - "md5_checksum": "8c05fc754583d51714bc1aa81396e59d", - "id": "nmdc:8c05fc754583d51714bc1aa81396e59d", - "file_size_bytes": 136315210 - }, - { - "name": "Gp0115668_unbinned fasta file from metabat2", - "description": "unbinned fasta file from metabat2 for Gp0115668", - "url": "https://data.microbiomedata.org/data/nmdc:mga0n66h21/MAGs/nmdc_mga0n66h21_bins.unbinned.fa", - "md5_checksum": "9ef1be5df79aee7c64f2addc4bda6afa", - "id": "nmdc:9ef1be5df79aee7c64f2addc4bda6afa", - "file_size_bytes": 39131745 - }, - { - "name": "Gp0115668_metabat2 bin checkm quality assessment result", - "description": "metabat2 bin checkm quality assessment result for Gp0115668", - "data_object_type": "CheckM Statistics", - "url": "https://data.microbiomedata.org/data/nmdc:mga0n66h21/MAGs/nmdc_mga0n66h21_checkm_qa.out", - "md5_checksum": "60db1474ee6a099c10e4fdc728420cf8", - "id": "nmdc:60db1474ee6a099c10e4fdc728420cf8", - "file_size_bytes": 1176 - }, - { - "name": "Gp0115668_high-quality and medium-quality bins", - "description": "high-quality and medium-quality bins for Gp0115668", - "data_object_type": "Metagenome Bins", - "url": "https://data.microbiomedata.org/data/nmdc:mga0n66h21/MAGs/nmdc_mga0n66h21_hqmq_bin.zip", - "md5_checksum": "5a36d8ba758ee510ab2be3e01fda3e0f", - "id": "nmdc:5a36d8ba758ee510ab2be3e01fda3e0f", - "file_size_bytes": 182 - }, - { - "name": "Gp0115668_metabat2 bins", - "description": "metabat2 bins for Gp0115668", - "url": "https://data.microbiomedata.org/data/nmdc:mga0n66h21/MAGs/nmdc_mga0n66h21_metabat_bin.zip", - "md5_checksum": "3f4c7c98bb94687eb96382799c8626fe", - "id": "nmdc:3f4c7c98bb94687eb96382799c8626fe", - "file_size_bytes": 2145953 - } - ] + "type": "nmdc:ReadQCAnalysisActivity", + "ended_at_time": "2021-10-11T05:19:17+00:00" } - ] - }, - { - "_id": { - "$oid": "649b009773e8249959349b36" - }, - "id": "nmdc:omprc-11-hymrq852", - "name": "Sand microcosm microbial communities from a hyporheic zone in Columbia River, Washington, USA - GW-RW T4_4-Nov-14", - "description": "Sterilized sand packs were incubated back in the ground and collected at time point T4.", - "has_input": [ - "nmdc:bsm-11-11219w54" - ], - "has_output": [ - "jgi:55a9cb010d87852b21508920" - ], - "part_of": [ - "nmdc:sty-11-aygzgv51" - ], - "add_date": "2015-05-28", - "mod_date": "2021-06-15", - "ncbi_project_name": "Sand microcosm microbial communities from a hyporheic zone in Columbia River, Washington, USA - GW-RW T4_4-Nov-14", - "omics_type": { - "has_raw_value": "Metagenome" - }, - "principal_investigator": { - "has_raw_value": "James Stegen" - }, - "processing_institution": "JGI", - "type": "nmdc:OmicsProcessing", - "gold_sequencing_project_identifiers": [ - "gold:Gp0115679" - ], - "downstream_workflow_activity_records": [ + ], + "read_based_taxonomy_analysis_activity_set": [ { "_id": { - "$oid": "649b009d6bdd4fd20273c884" + "$oid": "649b009bff710ae353f8cf53" }, "has_input": [ - "nmdc:3bf389b767cf8a49224dc0028e55eeb7" - ], - "part_of": [ - "nmdc:mga0gg1q48" + "nmdc:121b1c25e803f2a010ae5a2206a8d1d2" ], "git_url": "https://github.com/microbiomedata/metaG/releases/tag/0.1", "has_output": [ - "nmdc:7e294ff66cb7ddf84edf9c8bed576bcd", - "nmdc:08e2a96f7aaaff5ff6f747cfe6f49e49" + "nmdc:8bdf8bbee24242aaaee763c1d851c05e", + "nmdc:2529ede10eb159148711d016ec022af3", + "nmdc:a0631ed87dc2e7c69355ef575dbe4e60", + "nmdc:93d26b69073bd4d6283aee3c7e5997d4", + "nmdc:d7a49bf0d9797a2b603643a2de896b5c", + "nmdc:890f9f52d828e1ea8277b52566763069", + "nmdc:371b7fabbcbc2d22c3ca84b422a88863", + "nmdc:8677985c5e8ad92dd6d051f85950a636", + "nmdc:9b2f355a4c2ff3651a3d1179212e2914" ], - "was_informed_by": "gold:Gp0115679", - "input_read_count": 67696542, - "output_read_bases": 9825387057, - "id": "nmdc:8bc23e2bd69c8af5538db3d6192a3b5b", + "was_informed_by": "gold:Gp0115668", + "id": "nmdc:0aec70779dcdcc1b577d7e372c0eca87", "execution_resource": "NERSC-Cori", - "input_read_bases": 10222177842, - "name": "Read QC Activity for nmdc:mga0gg1q48", - "output_read_count": 67147510, - "started_at_time": "2021-10-11T02:23:30Z", - "type": "nmdc:ReadQCAnalysisActivity", - "ended_at_time": "2021-10-11T06:30:42+00:00", - "output_data_objects": [ - { - "name": "Gp0115679_Filtered Reads", - "description": "Filtered Reads for Gp0115679", - "data_object_type": "Filtered Sequencing Reads", - "url": "https://data.microbiomedata.org/data/nmdc:mga0gg1q48/qa/nmdc_mga0gg1q48_filtered.fastq.gz", - "md5_checksum": "7e294ff66cb7ddf84edf9c8bed576bcd", - "id": "nmdc:7e294ff66cb7ddf84edf9c8bed576bcd", - "file_size_bytes": 5673282665 - }, - { - "name": "Gp0115679_Filtered Stats", - "description": "Filtered Stats for Gp0115679", - "data_object_type": "QC Statistics", - "url": "https://data.microbiomedata.org/data/nmdc:mga0gg1q48/qa/nmdc_mga0gg1q48_filterStats.txt", - "md5_checksum": "08e2a96f7aaaff5ff6f747cfe6f49e49", - "id": "nmdc:08e2a96f7aaaff5ff6f747cfe6f49e49", - "file_size_bytes": 276 - } - ] - }, + "name": "ReadBased Analysis Activity for nmdc:mga0n66h21", + "started_at_time": "2021-10-11T02:28:43Z", + "type": "nmdc:ReadbasedAnalysis", + "ended_at_time": "2021-10-11T05:19:17+00:00" + } + ], + "study_set": [], + "field_research_site_set": [], + "collecting_biosamples_from_site_set": [], + "date_created": null, + "etl_software_version": null, + "pooling_set": [], + "read_based_analysis_activity_set": [ { "_id": { - "$oid": "649b009bff710ae353f8cf4b" + "$oid": "61e71a4d833bcf838a7021ce" }, "has_input": [ - "nmdc:7e294ff66cb7ddf84edf9c8bed576bcd" + "nmdc:121b1c25e803f2a010ae5a2206a8d1d2" + ], + "part_of": [ + "nmdc:mga0n66h21" ], "git_url": "https://github.com/microbiomedata/metaG/releases/tag/0.1", "has_output": [ - "nmdc:e20f8c00473472fa073adde871860801", - "nmdc:52f8c91d04e8d179af98e7fac35a8ff1", - "nmdc:f721d9dd168b0dea080b191a4396167e", - "nmdc:ab77e396ec643b58b54da92848b88a96", - "nmdc:f2514844e47a9e3d268671f80f152bc1", - "nmdc:a3e49f39f33c54bc8d9430a947cd4b16", - "nmdc:17bc87145b0dcabbb8e3de0f393f4d4d", - "nmdc:aecb320fdfe4c4da35c0206dd34e0f40", - "nmdc:77860ee043ae9738e7702a3f665b15fa" + "nmdc:8bdf8bbee24242aaaee763c1d851c05e", + "nmdc:2529ede10eb159148711d016ec022af3", + "nmdc:a0631ed87dc2e7c69355ef575dbe4e60", + "nmdc:93d26b69073bd4d6283aee3c7e5997d4", + "nmdc:d7a49bf0d9797a2b603643a2de896b5c", + "nmdc:890f9f52d828e1ea8277b52566763069", + "nmdc:371b7fabbcbc2d22c3ca84b422a88863", + "nmdc:8677985c5e8ad92dd6d051f85950a636", + "nmdc:9b2f355a4c2ff3651a3d1179212e2914" ], - "was_informed_by": "gold:Gp0115679", - "id": "nmdc:8bc23e2bd69c8af5538db3d6192a3b5b", + "was_informed_by": "gold:Gp0115668", + "id": "nmdc:0aec70779dcdcc1b577d7e372c0eca87", "execution_resource": "NERSC-Cori", - "name": "ReadBased Analysis Activity for nmdc:mga0gg1q48", - "started_at_time": "2021-10-11T02:23:30Z", + "name": "ReadBased Analysis Activity for nmdc:mga0n66h21", + "started_at_time": "2021-10-11T02:28:43Z", "type": "nmdc:ReadbasedAnalysis", - "ended_at_time": "2021-10-11T06:30:42+00:00", - "output_data_objects": [ - { - "name": "Gp0115679_Gottcha2 TSV report", - "description": "Gottcha2 TSV report for Gp0115679", - "url": "https://data.microbiomedata.org/data/nmdc:mga0gg1q48/ReadbasedAnalysis/nmdc_mga0gg1q48_gottcha2_report.tsv", - "md5_checksum": "e20f8c00473472fa073adde871860801", - "id": "nmdc:e20f8c00473472fa073adde871860801", - "file_size_bytes": 18551 - }, - { - "name": "Gp0115679_Gottcha2 full TSV report", - "description": "Gottcha2 full TSV report for Gp0115679", - "url": "https://data.microbiomedata.org/data/nmdc:mga0gg1q48/ReadbasedAnalysis/nmdc_mga0gg1q48_gottcha2_report_full.tsv", - "md5_checksum": "52f8c91d04e8d179af98e7fac35a8ff1", - "id": "nmdc:52f8c91d04e8d179af98e7fac35a8ff1", - "file_size_bytes": 1200541 - }, - { - "name": "Gp0115679_Gottcha2 Krona HTML report", - "description": "Gottcha2 Krona HTML report for Gp0115679", - "data_object_type": "GOTTCHA2 Krona Plot", - "url": "https://data.microbiomedata.org/data/nmdc:mga0gg1q48/ReadbasedAnalysis/nmdc_mga0gg1q48_gottcha2_krona.html", - "md5_checksum": "f721d9dd168b0dea080b191a4396167e", - "id": "nmdc:f721d9dd168b0dea080b191a4396167e", - "file_size_bytes": 278990 - }, - { - "name": "Gp0115679_Centrifuge classification TSV report", - "description": "Centrifuge classification TSV report for Gp0115679", - "data_object_type": "Centrifuge Taxonomic Classification", - "url": "https://data.microbiomedata.org/data/nmdc:mga0gg1q48/ReadbasedAnalysis/nmdc_mga0gg1q48_centrifuge_classification.tsv", - "md5_checksum": "ab77e396ec643b58b54da92848b88a96", - "id": "nmdc:ab77e396ec643b58b54da92848b88a96", - "file_size_bytes": 4742886512 - }, - { - "name": "Gp0115679_Centrifuge TSV report", - "description": "Centrifuge TSV report for Gp0115679", - "data_object_type": "Centrifuge Classification Report", - "url": "https://data.microbiomedata.org/data/nmdc:mga0gg1q48/ReadbasedAnalysis/nmdc_mga0gg1q48_centrifuge_report.tsv", - "md5_checksum": "f2514844e47a9e3d268671f80f152bc1", - "id": "nmdc:f2514844e47a9e3d268671f80f152bc1", - "file_size_bytes": 266907 - }, - { - "name": "Gp0115679_Centrifuge Krona HTML report", - "description": "Centrifuge Krona HTML report for Gp0115679", - "data_object_type": "Centrifuge Krona Plot", - "url": "https://data.microbiomedata.org/data/nmdc:mga0gg1q48/ReadbasedAnalysis/nmdc_mga0gg1q48_centrifuge_krona.html", - "md5_checksum": "a3e49f39f33c54bc8d9430a947cd4b16", - "id": "nmdc:a3e49f39f33c54bc8d9430a947cd4b16", - "file_size_bytes": 2359747 - }, - { - "name": "Gp0115679_Kraken classification TSV report", - "description": "Kraken classification TSV report for Gp0115679", - "data_object_type": "Kraken2 Taxonomic Classification", - "url": "https://data.microbiomedata.org/data/nmdc:mga0gg1q48/ReadbasedAnalysis/nmdc_mga0gg1q48_kraken2_classification.tsv", - "md5_checksum": "17bc87145b0dcabbb8e3de0f393f4d4d", - "id": "nmdc:17bc87145b0dcabbb8e3de0f393f4d4d", - "file_size_bytes": 3859620862 - }, - { - "name": "Gp0115679_Kraken2 TSV report", - "description": "Kraken2 TSV report for Gp0115679", - "data_object_type": "Kraken2 Classification Report", - "url": "https://data.microbiomedata.org/data/nmdc:mga0gg1q48/ReadbasedAnalysis/nmdc_mga0gg1q48_kraken2_report.tsv", - "md5_checksum": "aecb320fdfe4c4da35c0206dd34e0f40", - "id": "nmdc:aecb320fdfe4c4da35c0206dd34e0f40", - "file_size_bytes": 729541 - }, - { - "name": "Gp0115679_Kraken2 Krona HTML report", - "description": "Kraken2 Krona HTML report for Gp0115679", - "data_object_type": "Kraken2 Krona Plot", - "url": "https://data.microbiomedata.org/data/nmdc:mga0gg1q48/ReadbasedAnalysis/nmdc_mga0gg1q48_kraken2_krona.html", - "md5_checksum": "77860ee043ae9738e7702a3f665b15fa", - "id": "nmdc:77860ee043ae9738e7702a3f665b15fa", - "file_size_bytes": 4358324 - } - ] + "ended_at_time": "2021-10-11T05:19:17+00:00" + } + ] + }, + { + "functional_annotation_agg": [], + "library_preparation_set": [], + "processed_sample_set": [], + "extraction_set": [], + "activity_set": [], + "biosample_set": [], + "data_object_set": [ + { + "name": "Gp0115679_Filtered Reads", + "description": "Filtered Reads for Gp0115679", + "data_object_type": "Filtered Sequencing Reads", + "url": "https://data.microbiomedata.org/data/nmdc:mga0gg1q48/qa/nmdc_mga0gg1q48_filtered.fastq.gz", + "md5_checksum": "7e294ff66cb7ddf84edf9c8bed576bcd", + "id": "nmdc:7e294ff66cb7ddf84edf9c8bed576bcd", + "file_size_bytes": 5673282665 + }, + { + "name": "Gp0115679_Filtered Stats", + "description": "Filtered Stats for Gp0115679", + "data_object_type": "QC Statistics", + "url": "https://data.microbiomedata.org/data/nmdc:mga0gg1q48/qa/nmdc_mga0gg1q48_filterStats.txt", + "md5_checksum": "08e2a96f7aaaff5ff6f747cfe6f49e49", + "id": "nmdc:08e2a96f7aaaff5ff6f747cfe6f49e49", + "file_size_bytes": 276 + }, + { + "name": "Gp0115679_Gottcha2 TSV report", + "description": "Gottcha2 TSV report for Gp0115679", + "url": "https://data.microbiomedata.org/data/nmdc:mga0gg1q48/ReadbasedAnalysis/nmdc_mga0gg1q48_gottcha2_report.tsv", + "md5_checksum": "e20f8c00473472fa073adde871860801", + "id": "nmdc:e20f8c00473472fa073adde871860801", + "file_size_bytes": 18551 + }, + { + "name": "Gp0115679_Gottcha2 full TSV report", + "description": "Gottcha2 full TSV report for Gp0115679", + "url": "https://data.microbiomedata.org/data/nmdc:mga0gg1q48/ReadbasedAnalysis/nmdc_mga0gg1q48_gottcha2_report_full.tsv", + "md5_checksum": "52f8c91d04e8d179af98e7fac35a8ff1", + "id": "nmdc:52f8c91d04e8d179af98e7fac35a8ff1", + "file_size_bytes": 1200541 + }, + { + "name": "Gp0115679_Gottcha2 Krona HTML report", + "description": "Gottcha2 Krona HTML report for Gp0115679", + "data_object_type": "GOTTCHA2 Krona Plot", + "url": "https://data.microbiomedata.org/data/nmdc:mga0gg1q48/ReadbasedAnalysis/nmdc_mga0gg1q48_gottcha2_krona.html", + "md5_checksum": "f721d9dd168b0dea080b191a4396167e", + "id": "nmdc:f721d9dd168b0dea080b191a4396167e", + "file_size_bytes": 278990 + }, + { + "name": "Gp0115679_Centrifuge classification TSV report", + "description": "Centrifuge classification TSV report for Gp0115679", + "data_object_type": "Centrifuge Taxonomic Classification", + "url": "https://data.microbiomedata.org/data/nmdc:mga0gg1q48/ReadbasedAnalysis/nmdc_mga0gg1q48_centrifuge_classification.tsv", + "md5_checksum": "ab77e396ec643b58b54da92848b88a96", + "id": "nmdc:ab77e396ec643b58b54da92848b88a96", + "file_size_bytes": 4742886512 + }, + { + "name": "Gp0115679_Centrifuge TSV report", + "description": "Centrifuge TSV report for Gp0115679", + "data_object_type": "Centrifuge Classification Report", + "url": "https://data.microbiomedata.org/data/nmdc:mga0gg1q48/ReadbasedAnalysis/nmdc_mga0gg1q48_centrifuge_report.tsv", + "md5_checksum": "f2514844e47a9e3d268671f80f152bc1", + "id": "nmdc:f2514844e47a9e3d268671f80f152bc1", + "file_size_bytes": 266907 + }, + { + "name": "Gp0115679_Centrifuge Krona HTML report", + "description": "Centrifuge Krona HTML report for Gp0115679", + "data_object_type": "Centrifuge Krona Plot", + "url": "https://data.microbiomedata.org/data/nmdc:mga0gg1q48/ReadbasedAnalysis/nmdc_mga0gg1q48_centrifuge_krona.html", + "md5_checksum": "a3e49f39f33c54bc8d9430a947cd4b16", + "id": "nmdc:a3e49f39f33c54bc8d9430a947cd4b16", + "file_size_bytes": 2359747 + }, + { + "name": "Gp0115679_Kraken classification TSV report", + "description": "Kraken classification TSV report for Gp0115679", + "data_object_type": "Kraken2 Taxonomic Classification", + "url": "https://data.microbiomedata.org/data/nmdc:mga0gg1q48/ReadbasedAnalysis/nmdc_mga0gg1q48_kraken2_classification.tsv", + "md5_checksum": "17bc87145b0dcabbb8e3de0f393f4d4d", + "id": "nmdc:17bc87145b0dcabbb8e3de0f393f4d4d", + "file_size_bytes": 3859620862 + }, + { + "name": "Gp0115679_Kraken2 TSV report", + "description": "Kraken2 TSV report for Gp0115679", + "data_object_type": "Kraken2 Classification Report", + "url": "https://data.microbiomedata.org/data/nmdc:mga0gg1q48/ReadbasedAnalysis/nmdc_mga0gg1q48_kraken2_report.tsv", + "md5_checksum": "aecb320fdfe4c4da35c0206dd34e0f40", + "id": "nmdc:aecb320fdfe4c4da35c0206dd34e0f40", + "file_size_bytes": 729541 + }, + { + "name": "Gp0115679_Kraken2 Krona HTML report", + "description": "Kraken2 Krona HTML report for Gp0115679", + "data_object_type": "Kraken2 Krona Plot", + "url": "https://data.microbiomedata.org/data/nmdc:mga0gg1q48/ReadbasedAnalysis/nmdc_mga0gg1q48_kraken2_krona.html", + "md5_checksum": "77860ee043ae9738e7702a3f665b15fa", + "id": "nmdc:77860ee043ae9738e7702a3f665b15fa", + "file_size_bytes": 4358324 + }, + { + "name": "Gp0115679_Gottcha2 TSV report", + "description": "Gottcha2 TSV report for Gp0115679", + "url": "https://data.microbiomedata.org/data/nmdc:mga0gg1q48/ReadbasedAnalysis/nmdc_mga0gg1q48_gottcha2_report.tsv", + "md5_checksum": "e20f8c00473472fa073adde871860801", + "id": "nmdc:e20f8c00473472fa073adde871860801", + "file_size_bytes": 18551 + }, + { + "name": "Gp0115679_Gottcha2 full TSV report", + "description": "Gottcha2 full TSV report for Gp0115679", + "url": "https://data.microbiomedata.org/data/nmdc:mga0gg1q48/ReadbasedAnalysis/nmdc_mga0gg1q48_gottcha2_report_full.tsv", + "md5_checksum": "52f8c91d04e8d179af98e7fac35a8ff1", + "id": "nmdc:52f8c91d04e8d179af98e7fac35a8ff1", + "file_size_bytes": 1200541 + }, + { + "name": "Gp0115679_Gottcha2 Krona HTML report", + "description": "Gottcha2 Krona HTML report for Gp0115679", + "data_object_type": "GOTTCHA2 Krona Plot", + "url": "https://data.microbiomedata.org/data/nmdc:mga0gg1q48/ReadbasedAnalysis/nmdc_mga0gg1q48_gottcha2_krona.html", + "md5_checksum": "f721d9dd168b0dea080b191a4396167e", + "id": "nmdc:f721d9dd168b0dea080b191a4396167e", + "file_size_bytes": 278990 + }, + { + "name": "Gp0115679_Centrifuge classification TSV report", + "description": "Centrifuge classification TSV report for Gp0115679", + "data_object_type": "Centrifuge Taxonomic Classification", + "url": "https://data.microbiomedata.org/data/nmdc:mga0gg1q48/ReadbasedAnalysis/nmdc_mga0gg1q48_centrifuge_classification.tsv", + "md5_checksum": "ab77e396ec643b58b54da92848b88a96", + "id": "nmdc:ab77e396ec643b58b54da92848b88a96", + "file_size_bytes": 4742886512 + }, + { + "name": "Gp0115679_Centrifuge TSV report", + "description": "Centrifuge TSV report for Gp0115679", + "data_object_type": "Centrifuge Classification Report", + "url": "https://data.microbiomedata.org/data/nmdc:mga0gg1q48/ReadbasedAnalysis/nmdc_mga0gg1q48_centrifuge_report.tsv", + "md5_checksum": "f2514844e47a9e3d268671f80f152bc1", + "id": "nmdc:f2514844e47a9e3d268671f80f152bc1", + "file_size_bytes": 266907 + }, + { + "name": "Gp0115679_Centrifuge Krona HTML report", + "description": "Centrifuge Krona HTML report for Gp0115679", + "data_object_type": "Centrifuge Krona Plot", + "url": "https://data.microbiomedata.org/data/nmdc:mga0gg1q48/ReadbasedAnalysis/nmdc_mga0gg1q48_centrifuge_krona.html", + "md5_checksum": "a3e49f39f33c54bc8d9430a947cd4b16", + "id": "nmdc:a3e49f39f33c54bc8d9430a947cd4b16", + "file_size_bytes": 2359747 + }, + { + "name": "Gp0115679_Kraken classification TSV report", + "description": "Kraken classification TSV report for Gp0115679", + "data_object_type": "Kraken2 Taxonomic Classification", + "url": "https://data.microbiomedata.org/data/nmdc:mga0gg1q48/ReadbasedAnalysis/nmdc_mga0gg1q48_kraken2_classification.tsv", + "md5_checksum": "17bc87145b0dcabbb8e3de0f393f4d4d", + "id": "nmdc:17bc87145b0dcabbb8e3de0f393f4d4d", + "file_size_bytes": 3859620862 + }, + { + "name": "Gp0115679_Kraken2 TSV report", + "description": "Kraken2 TSV report for Gp0115679", + "data_object_type": "Kraken2 Classification Report", + "url": "https://data.microbiomedata.org/data/nmdc:mga0gg1q48/ReadbasedAnalysis/nmdc_mga0gg1q48_kraken2_report.tsv", + "md5_checksum": "aecb320fdfe4c4da35c0206dd34e0f40", + "id": "nmdc:aecb320fdfe4c4da35c0206dd34e0f40", + "file_size_bytes": 729541 + }, + { + "name": "Gp0115679_Kraken2 Krona HTML report", + "description": "Kraken2 Krona HTML report for Gp0115679", + "data_object_type": "Kraken2 Krona Plot", + "url": "https://data.microbiomedata.org/data/nmdc:mga0gg1q48/ReadbasedAnalysis/nmdc_mga0gg1q48_kraken2_krona.html", + "md5_checksum": "77860ee043ae9738e7702a3f665b15fa", + "id": "nmdc:77860ee043ae9738e7702a3f665b15fa", + "file_size_bytes": 4358324 + }, + { + "name": "Gp0115679_Assembled contigs fasta", + "description": "Assembled contigs fasta for Gp0115679", + "data_object_type": "Assembly Contigs", + "url": "https://data.microbiomedata.org/data/nmdc:mga0gg1q48/assembly/nmdc_mga0gg1q48_contigs.fna", + "md5_checksum": "e4314c3743795e0be8beda8b7f806557", + "id": "nmdc:e4314c3743795e0be8beda8b7f806557", + "file_size_bytes": 275030840 + }, + { + "name": "Gp0115679_Assembled scaffold fasta", + "description": "Assembled scaffold fasta for Gp0115679", + "data_object_type": "Assembly Scaffolds", + "url": "https://data.microbiomedata.org/data/nmdc:mga0gg1q48/assembly/nmdc_mga0gg1q48_scaffolds.fna", + "md5_checksum": "2a288a5827b66c88f8abf202bbe37aab", + "id": "nmdc:2a288a5827b66c88f8abf202bbe37aab", + "file_size_bytes": 273327529 + }, + { + "name": "Gp0115679_Metagenome Contig Coverage Stats", + "description": "Metagenome Contig Coverage Stats for Gp0115679", + "url": "https://data.microbiomedata.org/data/nmdc:mga0gg1q48/assembly/nmdc_mga0gg1q48_covstats.txt", + "md5_checksum": "a51c7b3a70601a885594936fd6c753bc", + "id": "nmdc:a51c7b3a70601a885594936fd6c753bc", + "file_size_bytes": 42368790 + }, + { + "name": "Gp0115679_Assembled AGP file", + "description": "Assembled AGP file for Gp0115679", + "data_object_type": "Assembly AGP", + "url": "https://data.microbiomedata.org/data/nmdc:mga0gg1q48/assembly/nmdc_mga0gg1q48_assembly.agp", + "md5_checksum": "8851d6fed8e5bbee88aeb7af77bbcfe3", + "id": "nmdc:8851d6fed8e5bbee88aeb7af77bbcfe3", + "file_size_bytes": 40232148 + }, + { + "name": "Gp0115679_Metagenome Alignment BAM file", + "description": "Metagenome Alignment BAM file for Gp0115679", + "data_object_type": "Assembly Coverage BAM", + "url": "https://data.microbiomedata.org/data/nmdc:mga0gg1q48/assembly/nmdc_mga0gg1q48_pairedMapped_sorted.bam", + "md5_checksum": "002ed5f389b8a13735d27a8741290f6b", + "id": "nmdc:002ed5f389b8a13735d27a8741290f6b", + "file_size_bytes": 6236105158 + }, + { + "name": "Gp0115679_Protein FAA", + "description": "Protein FAA for Gp0115679", + "data_object_type": "Annotation Amino Acid FASTA", + "url": "https://data.microbiomedata.org/data/nmdc:mga0gg1q48/annotation/nmdc_mga0gg1q48_proteins.faa", + "md5_checksum": "ac3faa8ad0e8e7827fcf6b882ec90706", + "id": "nmdc:ac3faa8ad0e8e7827fcf6b882ec90706", + "file_size_bytes": 151048115 + }, + { + "name": "Gp0115679_Structural annotation GFF file", + "description": "Structural annotation GFF file for Gp0115679", + "data_object_type": "Structural Annotation GFF", + "url": "https://data.microbiomedata.org/data/nmdc:mga0gg1q48/annotation/nmdc_mga0gg1q48_structural_annotation.gff", + "md5_checksum": "e3712dbbf0d0bfa14b9b340e73ebf4d0", + "id": "nmdc:e3712dbbf0d0bfa14b9b340e73ebf4d0", + "file_size_bytes": 2549 + }, + { + "name": "Gp0115679_Functional annotation GFF file", + "description": "Functional annotation GFF file for Gp0115679", + "data_object_type": "Functional Annotation GFF", + "url": "https://data.microbiomedata.org/data/nmdc:mga0gg1q48/annotation/nmdc_mga0gg1q48_functional_annotation.gff", + "md5_checksum": "8aed63ca1302c874040e74aceb54ff05", + "id": "nmdc:8aed63ca1302c874040e74aceb54ff05", + "file_size_bytes": 166415068 + }, + { + "name": "Gp0115679_KO TSV file", + "description": "KO TSV file for Gp0115679", + "data_object_type": "Annotation KEGG Orthology", + "url": "https://data.microbiomedata.org/data/nmdc:mga0gg1q48/annotation/nmdc_mga0gg1q48_ko.tsv", + "md5_checksum": "6361a06de62d93909abfb565a47fd5f0", + "id": "nmdc:6361a06de62d93909abfb565a47fd5f0", + "file_size_bytes": 18038415 + }, + { + "name": "Gp0115679_EC TSV file", + "description": "EC TSV file for Gp0115679", + "data_object_type": "Annotation Enzyme Commission", + "url": "https://data.microbiomedata.org/data/nmdc:mga0gg1q48/annotation/nmdc_mga0gg1q48_ec.tsv", + "md5_checksum": "bd9d330d1d6a925066003d653a171ca5", + "id": "nmdc:bd9d330d1d6a925066003d653a171ca5", + "file_size_bytes": 11896121 + }, + { + "name": "Gp0115679_COG GFF file", + "description": "COG GFF file for Gp0115679", + "url": "https://data.microbiomedata.org/data/nmdc:mga0gg1q48/annotation/nmdc_mga0gg1q48_cog.gff", + "md5_checksum": "c497ffc128d6738bf3868529eb7ff899", + "id": "nmdc:c497ffc128d6738bf3868529eb7ff899", + "file_size_bytes": 81943107 + }, + { + "name": "Gp0115679_PFAM GFF file", + "description": "PFAM GFF file for Gp0115679", + "url": "https://data.microbiomedata.org/data/nmdc:mga0gg1q48/annotation/nmdc_mga0gg1q48_pfam.gff", + "md5_checksum": "b67886515193abbd1eec79de067b3196", + "id": "nmdc:b67886515193abbd1eec79de067b3196", + "file_size_bytes": 65136506 + }, + { + "name": "Gp0115679_TigrFam GFF file", + "description": "TigrFam GFF file for Gp0115679", + "url": "https://data.microbiomedata.org/data/nmdc:mga0gg1q48/annotation/nmdc_mga0gg1q48_tigrfam.gff", + "md5_checksum": "05e7a016dddba90801c29de448c43c3c", + "id": "nmdc:05e7a016dddba90801c29de448c43c3c", + "file_size_bytes": 8536835 + }, + { + "name": "Gp0115679_SMART GFF file", + "description": "SMART GFF file for Gp0115679", + "url": "https://data.microbiomedata.org/data/nmdc:mga0gg1q48/annotation/nmdc_mga0gg1q48_smart.gff", + "md5_checksum": "7effd4db11316ff95f6a8303807d530f", + "id": "nmdc:7effd4db11316ff95f6a8303807d530f", + "file_size_bytes": 19907975 + }, + { + "name": "Gp0115679_SuperFam GFF file", + "description": "SuperFam GFF file for Gp0115679", + "url": "https://data.microbiomedata.org/data/nmdc:mga0gg1q48/annotation/nmdc_mga0gg1q48_supfam.gff", + "md5_checksum": "503770f008dd2cf04d73821412dcf23a", + "id": "nmdc:503770f008dd2cf04d73821412dcf23a", + "file_size_bytes": 107636995 + }, + { + "name": "Gp0115679_Cath FunFam GFF file", + "description": "Cath FunFam GFF file for Gp0115679", + "url": "https://data.microbiomedata.org/data/nmdc:mga0gg1q48/annotation/nmdc_mga0gg1q48_cath_funfam.gff", + "md5_checksum": "c33049c64af55f8ac54d52c861b0a221", + "id": "nmdc:c33049c64af55f8ac54d52c861b0a221", + "file_size_bytes": 89046662 + }, + { + "name": "Gp0115679_KO_EC GFF file", + "description": "KO_EC GFF file for Gp0115679", + "url": "https://data.microbiomedata.org/data/nmdc:mga0gg1q48/annotation/nmdc_mga0gg1q48_ko_ec.gff", + "md5_checksum": "b162efd63f79bc34de66f61348471b74", + "id": "nmdc:b162efd63f79bc34de66f61348471b74", + "file_size_bytes": 57348606 + }, + { + "name": "gold:Gp0452679_metabat2 bin checkm quality assessment result", + "description": "metabat2 bin checkm quality assessment result for gold:Gp0452679", + "data_object_type": "CheckM Statistics", + "url": "https://data.microbiomedata.org/data/nmdc:mga0fsjy84/MAGs/nmdc_mga0fsjy84_checkm_qa.out", + "md5_checksum": "d41d8cd98f00b204e9800998ecf8427e", + "id": "nmdc:d41d8cd98f00b204e9800998ecf8427e", + "file_size_bytes": 0 + }, + { + "name": "Gp0115679_tooShort (< 3kb) filtered contigs fasta file by metaBat2", + "description": "tooShort (< 3kb) filtered contigs fasta file by metaBat2 for Gp0115679", + "url": "https://data.microbiomedata.org/data/nmdc:mga0gg1q48/MAGs/nmdc_mga0gg1q48_bins.tooShort.fa", + "md5_checksum": "d830e60f4fb30ecb0610f991dcc70e47", + "id": "nmdc:d830e60f4fb30ecb0610f991dcc70e47", + "file_size_bytes": 215033122 + }, + { + "name": "Gp0115679_unbinned fasta file from metabat2", + "description": "unbinned fasta file from metabat2 for Gp0115679", + "url": "https://data.microbiomedata.org/data/nmdc:mga0gg1q48/MAGs/nmdc_mga0gg1q48_bins.unbinned.fa", + "md5_checksum": "d33af65556b85b1aaf3a5c48b6e294de", + "id": "nmdc:d33af65556b85b1aaf3a5c48b6e294de", + "file_size_bytes": 44057142 + }, + { + "name": "Gp0115679_metabat2 bin checkm quality assessment result", + "description": "metabat2 bin checkm quality assessment result for Gp0115679", + "data_object_type": "CheckM Statistics", + "url": "https://data.microbiomedata.org/data/nmdc:mga0gg1q48/MAGs/nmdc_mga0gg1q48_checkm_qa.out", + "md5_checksum": "d2d655091735e6308aafca1e1633aad9", + "id": "nmdc:d2d655091735e6308aafca1e1633aad9", + "file_size_bytes": 2394 + }, + { + "name": "Gp0115679_high-quality and medium-quality bins", + "description": "high-quality and medium-quality bins for Gp0115679", + "data_object_type": "Metagenome Bins", + "url": "https://data.microbiomedata.org/data/nmdc:mga0gg1q48/MAGs/nmdc_mga0gg1q48_hqmq_bin.zip", + "md5_checksum": "17c6259329da1bbe6da5a18274452a8d", + "id": "nmdc:17c6259329da1bbe6da5a18274452a8d", + "file_size_bytes": 3215059 }, + { + "name": "Gp0115679_metabat2 bins", + "description": "metabat2 bins for Gp0115679", + "url": "https://data.microbiomedata.org/data/nmdc:mga0gg1q48/MAGs/nmdc_mga0gg1q48_metabat_bin.zip", + "md5_checksum": "9250ad41cb19e04a6002e62bda38bbfb", + "id": "nmdc:9250ad41cb19e04a6002e62bda38bbfb", + "file_size_bytes": 1649649 + } + ], + "dissolving_activity_set": [], + "functional_annotation_set": [], + "genome_feature_set": [], + "mags_activity_set": [ { "_id": { - "$oid": "61e719fa833bcf838a701935" + "$oid": "649b0052ec087f6bbab3472e" }, "has_input": [ - "nmdc:7e294ff66cb7ddf84edf9c8bed576bcd" + "nmdc:e4314c3743795e0be8beda8b7f806557", + "nmdc:002ed5f389b8a13735d27a8741290f6b", + "nmdc:8aed63ca1302c874040e74aceb54ff05" ], + "too_short_contig_num": 504368, "part_of": [ "nmdc:mga0gg1q48" ], + "binned_contig_num": 1887, "git_url": "https://github.com/microbiomedata/metaG/releases/tag/0.1", "has_output": [ - "nmdc:e20f8c00473472fa073adde871860801", - "nmdc:52f8c91d04e8d179af98e7fac35a8ff1", - "nmdc:f721d9dd168b0dea080b191a4396167e", - "nmdc:ab77e396ec643b58b54da92848b88a96", - "nmdc:f2514844e47a9e3d268671f80f152bc1", - "nmdc:a3e49f39f33c54bc8d9430a947cd4b16", - "nmdc:17bc87145b0dcabbb8e3de0f393f4d4d", - "nmdc:aecb320fdfe4c4da35c0206dd34e0f40", - "nmdc:77860ee043ae9738e7702a3f665b15fa" + "nmdc:d41d8cd98f00b204e9800998ecf8427e", + "nmdc:d830e60f4fb30ecb0610f991dcc70e47", + "nmdc:d33af65556b85b1aaf3a5c48b6e294de", + "nmdc:d2d655091735e6308aafca1e1633aad9", + "nmdc:17c6259329da1bbe6da5a18274452a8d", + "nmdc:9250ad41cb19e04a6002e62bda38bbfb" ], "was_informed_by": "gold:Gp0115679", + "input_contig_num": 531775, "id": "nmdc:8bc23e2bd69c8af5538db3d6192a3b5b", "execution_resource": "NERSC-Cori", - "name": "ReadBased Analysis Activity for nmdc:mga0gg1q48", - "started_at_time": "2021-10-11T02:23:30Z", - "type": "nmdc:ReadbasedAnalysis", - "ended_at_time": "2021-10-11T06:30:42+00:00", - "output_data_objects": [ - { - "name": "Gp0115679_Gottcha2 TSV report", - "description": "Gottcha2 TSV report for Gp0115679", - "url": "https://data.microbiomedata.org/data/nmdc:mga0gg1q48/ReadbasedAnalysis/nmdc_mga0gg1q48_gottcha2_report.tsv", - "md5_checksum": "e20f8c00473472fa073adde871860801", - "id": "nmdc:e20f8c00473472fa073adde871860801", - "file_size_bytes": 18551 - }, - { - "name": "Gp0115679_Gottcha2 full TSV report", - "description": "Gottcha2 full TSV report for Gp0115679", - "url": "https://data.microbiomedata.org/data/nmdc:mga0gg1q48/ReadbasedAnalysis/nmdc_mga0gg1q48_gottcha2_report_full.tsv", - "md5_checksum": "52f8c91d04e8d179af98e7fac35a8ff1", - "id": "nmdc:52f8c91d04e8d179af98e7fac35a8ff1", - "file_size_bytes": 1200541 - }, - { - "name": "Gp0115679_Gottcha2 Krona HTML report", - "description": "Gottcha2 Krona HTML report for Gp0115679", - "data_object_type": "GOTTCHA2 Krona Plot", - "url": "https://data.microbiomedata.org/data/nmdc:mga0gg1q48/ReadbasedAnalysis/nmdc_mga0gg1q48_gottcha2_krona.html", - "md5_checksum": "f721d9dd168b0dea080b191a4396167e", - "id": "nmdc:f721d9dd168b0dea080b191a4396167e", - "file_size_bytes": 278990 - }, + "name": "MAGs Analysis Activity for nmdc:mga0gg1q48", + "mags_list": [ { - "name": "Gp0115679_Centrifuge classification TSV report", - "description": "Centrifuge classification TSV report for Gp0115679", - "data_object_type": "Centrifuge Taxonomic Classification", - "url": "https://data.microbiomedata.org/data/nmdc:mga0gg1q48/ReadbasedAnalysis/nmdc_mga0gg1q48_centrifuge_classification.tsv", - "md5_checksum": "ab77e396ec643b58b54da92848b88a96", - "id": "nmdc:ab77e396ec643b58b54da92848b88a96", - "file_size_bytes": 4742886512 + "number_of_contig": 73, + "completeness": 95.65, + "bin_name": "bins.1", + "gene_count": 2974, + "bin_quality": "HQ", + "gtdbtk_species": "", + "gtdbtk_order": "UBA11222", + "num_16s": 1, + "gtdbtk_family": "UBA11222", + "gtdbtk_domain": "Bacteria", + "contamination": 0.22, + "gtdbtk_class": "Alphaproteobacteria", + "gtdbtk_phylum": "Proteobacteria", + "num_5s": 1, + "num_23s": 1, + "gtdbtk_genus": "UBA11222", + "num_t_rna": 45 }, { - "name": "Gp0115679_Centrifuge TSV report", - "description": "Centrifuge TSV report for Gp0115679", - "data_object_type": "Centrifuge Classification Report", - "url": "https://data.microbiomedata.org/data/nmdc:mga0gg1q48/ReadbasedAnalysis/nmdc_mga0gg1q48_centrifuge_report.tsv", - "md5_checksum": "f2514844e47a9e3d268671f80f152bc1", - "id": "nmdc:f2514844e47a9e3d268671f80f152bc1", - "file_size_bytes": 266907 + "number_of_contig": 253, + "completeness": 39.12, + "bin_name": "bins.10", + "gene_count": 1586, + "bin_quality": "LQ", + "gtdbtk_species": "", + "gtdbtk_order": "", + "num_16s": 0, + "gtdbtk_family": "", + "gtdbtk_domain": "", + "contamination": 1.79, + "gtdbtk_class": "", + "gtdbtk_phylum": "", + "num_5s": 0, + "num_23s": 0, + "gtdbtk_genus": "", + "num_t_rna": 20 }, { - "name": "Gp0115679_Centrifuge Krona HTML report", - "description": "Centrifuge Krona HTML report for Gp0115679", - "data_object_type": "Centrifuge Krona Plot", - "url": "https://data.microbiomedata.org/data/nmdc:mga0gg1q48/ReadbasedAnalysis/nmdc_mga0gg1q48_centrifuge_krona.html", - "md5_checksum": "a3e49f39f33c54bc8d9430a947cd4b16", - "id": "nmdc:a3e49f39f33c54bc8d9430a947cd4b16", - "file_size_bytes": 2359747 + "number_of_contig": 135, + "completeness": 16.83, + "bin_name": "bins.2", + "gene_count": 706, + "bin_quality": "LQ", + "gtdbtk_species": "", + "gtdbtk_order": "", + "num_16s": 0, + "gtdbtk_family": "", + "gtdbtk_domain": "", + "contamination": 0.0, + "gtdbtk_class": "", + "gtdbtk_phylum": "", + "num_5s": 0, + "num_23s": 0, + "gtdbtk_genus": "", + "num_t_rna": 11 }, { - "name": "Gp0115679_Kraken classification TSV report", - "description": "Kraken classification TSV report for Gp0115679", - "data_object_type": "Kraken2 Taxonomic Classification", - "url": "https://data.microbiomedata.org/data/nmdc:mga0gg1q48/ReadbasedAnalysis/nmdc_mga0gg1q48_kraken2_classification.tsv", - "md5_checksum": "17bc87145b0dcabbb8e3de0f393f4d4d", - "id": "nmdc:17bc87145b0dcabbb8e3de0f393f4d4d", - "file_size_bytes": 3859620862 + "number_of_contig": 144, + "completeness": 22.53, + "bin_name": "bins.3", + "gene_count": 731, + "bin_quality": "LQ", + "gtdbtk_species": "", + "gtdbtk_order": "", + "num_16s": 0, + "gtdbtk_family": "", + "gtdbtk_domain": "", + "contamination": 0.14, + "gtdbtk_class": "", + "gtdbtk_phylum": "", + "num_5s": 0, + "num_23s": 0, + "gtdbtk_genus": "", + "num_t_rna": 16 }, { - "name": "Gp0115679_Kraken2 TSV report", - "description": "Kraken2 TSV report for Gp0115679", - "data_object_type": "Kraken2 Classification Report", - "url": "https://data.microbiomedata.org/data/nmdc:mga0gg1q48/ReadbasedAnalysis/nmdc_mga0gg1q48_kraken2_report.tsv", - "md5_checksum": "aecb320fdfe4c4da35c0206dd34e0f40", - "id": "nmdc:aecb320fdfe4c4da35c0206dd34e0f40", - "file_size_bytes": 729541 - }, - { - "name": "Gp0115679_Kraken2 Krona HTML report", - "description": "Kraken2 Krona HTML report for Gp0115679", - "data_object_type": "Kraken2 Krona Plot", - "url": "https://data.microbiomedata.org/data/nmdc:mga0gg1q48/ReadbasedAnalysis/nmdc_mga0gg1q48_kraken2_krona.html", - "md5_checksum": "77860ee043ae9738e7702a3f665b15fa", - "id": "nmdc:77860ee043ae9738e7702a3f665b15fa", - "file_size_bytes": 4358324 - } - ] - }, - { - "_id": { - "$oid": "649b005f2ca5ee4adb139fbd" - }, - "has_input": [ - "nmdc:7e294ff66cb7ddf84edf9c8bed576bcd" - ], - "part_of": [ - "nmdc:mga0gg1q48" - ], - "ctg_logsum": 682158, - "scaf_logsum": 725191, - "gap_pct": 0.02692, - "git_url": "https://github.com/microbiomedata/metaG/releases/tag/0.1", - "has_output": [ - "nmdc:e4314c3743795e0be8beda8b7f806557", - "nmdc:2a288a5827b66c88f8abf202bbe37aab", - "nmdc:a51c7b3a70601a885594936fd6c753bc", - "nmdc:8851d6fed8e5bbee88aeb7af77bbcfe3", - "nmdc:002ed5f389b8a13735d27a8741290f6b" - ], - "asm_score": 12.582, - "was_informed_by": "gold:Gp0115679", - "ctg_powsum": 84136, - "scaf_max": 884972, - "id": "nmdc:8bc23e2bd69c8af5538db3d6192a3b5b", - "scaf_powsum": 89882, - "execution_resource": "NERSC-Cori", - "contigs": 531791, - "name": "Assembly Activity for nmdc:mga0gg1q48", - "ctg_max": 719201, - "gc_std": 0.09689, - "contig_bp": 254202396, - "gc_avg": 0.48697, - "started_at_time": "2021-10-11T02:23:30Z", - "scaf_bp": 254270837, - "type": "nmdc:MetagenomeAssembly", - "scaffolds": 525116, - "ended_at_time": "2021-10-11T06:30:42+00:00", - "ctg_l50": 449, - "ctg_l90": 285, - "ctg_n50": 139317, - "ctg_n90": 451813, - "scaf_l50": 455, - "scaf_l90": 285, - "scaf_n50": 133535, - "scaf_n90": 445430, - "scaf_l_gt50k": 3540548, - "scaf_n_gt50k": 34, - "scaf_pct_gt50k": 1.3924317, - "output_data_objects": [ - { - "name": "Gp0115679_Assembled contigs fasta", - "description": "Assembled contigs fasta for Gp0115679", - "data_object_type": "Assembly Contigs", - "url": "https://data.microbiomedata.org/data/nmdc:mga0gg1q48/assembly/nmdc_mga0gg1q48_contigs.fna", - "md5_checksum": "e4314c3743795e0be8beda8b7f806557", - "id": "nmdc:e4314c3743795e0be8beda8b7f806557", - "file_size_bytes": 275030840 - }, - { - "name": "Gp0115679_Assembled scaffold fasta", - "description": "Assembled scaffold fasta for Gp0115679", - "data_object_type": "Assembly Scaffolds", - "url": "https://data.microbiomedata.org/data/nmdc:mga0gg1q48/assembly/nmdc_mga0gg1q48_scaffolds.fna", - "md5_checksum": "2a288a5827b66c88f8abf202bbe37aab", - "id": "nmdc:2a288a5827b66c88f8abf202bbe37aab", - "file_size_bytes": 273327529 - }, - { - "name": "Gp0115679_Metagenome Contig Coverage Stats", - "description": "Metagenome Contig Coverage Stats for Gp0115679", - "url": "https://data.microbiomedata.org/data/nmdc:mga0gg1q48/assembly/nmdc_mga0gg1q48_covstats.txt", - "md5_checksum": "a51c7b3a70601a885594936fd6c753bc", - "id": "nmdc:a51c7b3a70601a885594936fd6c753bc", - "file_size_bytes": 42368790 - }, - { - "name": "Gp0115679_Assembled AGP file", - "description": "Assembled AGP file for Gp0115679", - "data_object_type": "Assembly AGP", - "url": "https://data.microbiomedata.org/data/nmdc:mga0gg1q48/assembly/nmdc_mga0gg1q48_assembly.agp", - "md5_checksum": "8851d6fed8e5bbee88aeb7af77bbcfe3", - "id": "nmdc:8851d6fed8e5bbee88aeb7af77bbcfe3", - "file_size_bytes": 40232148 - }, - { - "name": "Gp0115679_Metagenome Alignment BAM file", - "description": "Metagenome Alignment BAM file for Gp0115679", - "data_object_type": "Assembly Coverage BAM", - "url": "https://data.microbiomedata.org/data/nmdc:mga0gg1q48/assembly/nmdc_mga0gg1q48_pairedMapped_sorted.bam", - "md5_checksum": "002ed5f389b8a13735d27a8741290f6b", - "id": "nmdc:002ed5f389b8a13735d27a8741290f6b", - "file_size_bytes": 6236105158 - } - ] - }, - { - "_id": { - "$oid": "649b005bbf2caae0415ef9c9" - }, - "has_input": [ - "nmdc:e4314c3743795e0be8beda8b7f806557" - ], - "part_of": [ - "nmdc:mga0gg1q48" - ], - "git_url": "https://github.com/microbiomedata/metaG/releases/tag/0.1", - "has_output": [ - "nmdc:ac3faa8ad0e8e7827fcf6b882ec90706", - "nmdc:e3712dbbf0d0bfa14b9b340e73ebf4d0", - "nmdc:8aed63ca1302c874040e74aceb54ff05", - "nmdc:6361a06de62d93909abfb565a47fd5f0", - "nmdc:bd9d330d1d6a925066003d653a171ca5", - "nmdc:c497ffc128d6738bf3868529eb7ff899", - "nmdc:b67886515193abbd1eec79de067b3196", - "nmdc:05e7a016dddba90801c29de448c43c3c", - "nmdc:7effd4db11316ff95f6a8303807d530f", - "nmdc:503770f008dd2cf04d73821412dcf23a", - "nmdc:c33049c64af55f8ac54d52c861b0a221", - "nmdc:b162efd63f79bc34de66f61348471b74" - ], - "was_informed_by": "gold:Gp0115679", - "id": "nmdc:8bc23e2bd69c8af5538db3d6192a3b5b", - "execution_resource": "NERSC-Cori", - "name": "Annotation Activity for nmdc:mga0gg1q48", - "started_at_time": "2021-10-11T02:23:30Z", - "type": "nmdc:MetagenomeAnnotationActivity", - "ended_at_time": "2021-10-11T06:30:42+00:00", - "output_data_objects": [ - { - "name": "Gp0115679_Protein FAA", - "description": "Protein FAA for Gp0115679", - "data_object_type": "Annotation Amino Acid FASTA", - "url": "https://data.microbiomedata.org/data/nmdc:mga0gg1q48/annotation/nmdc_mga0gg1q48_proteins.faa", - "md5_checksum": "ac3faa8ad0e8e7827fcf6b882ec90706", - "id": "nmdc:ac3faa8ad0e8e7827fcf6b882ec90706", - "file_size_bytes": 151048115 - }, - { - "name": "Gp0115679_Structural annotation GFF file", - "description": "Structural annotation GFF file for Gp0115679", - "data_object_type": "Structural Annotation GFF", - "url": "https://data.microbiomedata.org/data/nmdc:mga0gg1q48/annotation/nmdc_mga0gg1q48_structural_annotation.gff", - "md5_checksum": "e3712dbbf0d0bfa14b9b340e73ebf4d0", - "id": "nmdc:e3712dbbf0d0bfa14b9b340e73ebf4d0", - "file_size_bytes": 2549 - }, - { - "name": "Gp0115679_Functional annotation GFF file", - "description": "Functional annotation GFF file for Gp0115679", - "data_object_type": "Functional Annotation GFF", - "url": "https://data.microbiomedata.org/data/nmdc:mga0gg1q48/annotation/nmdc_mga0gg1q48_functional_annotation.gff", - "md5_checksum": "8aed63ca1302c874040e74aceb54ff05", - "id": "nmdc:8aed63ca1302c874040e74aceb54ff05", - "file_size_bytes": 166415068 - }, - { - "name": "Gp0115679_KO TSV file", - "description": "KO TSV file for Gp0115679", - "data_object_type": "Annotation KEGG Orthology", - "url": "https://data.microbiomedata.org/data/nmdc:mga0gg1q48/annotation/nmdc_mga0gg1q48_ko.tsv", - "md5_checksum": "6361a06de62d93909abfb565a47fd5f0", - "id": "nmdc:6361a06de62d93909abfb565a47fd5f0", - "file_size_bytes": 18038415 - }, - { - "name": "Gp0115679_EC TSV file", - "description": "EC TSV file for Gp0115679", - "data_object_type": "Annotation Enzyme Commission", - "url": "https://data.microbiomedata.org/data/nmdc:mga0gg1q48/annotation/nmdc_mga0gg1q48_ec.tsv", - "md5_checksum": "bd9d330d1d6a925066003d653a171ca5", - "id": "nmdc:bd9d330d1d6a925066003d653a171ca5", - "file_size_bytes": 11896121 - }, - { - "name": "Gp0115679_COG GFF file", - "description": "COG GFF file for Gp0115679", - "url": "https://data.microbiomedata.org/data/nmdc:mga0gg1q48/annotation/nmdc_mga0gg1q48_cog.gff", - "md5_checksum": "c497ffc128d6738bf3868529eb7ff899", - "id": "nmdc:c497ffc128d6738bf3868529eb7ff899", - "file_size_bytes": 81943107 - }, - { - "name": "Gp0115679_PFAM GFF file", - "description": "PFAM GFF file for Gp0115679", - "url": "https://data.microbiomedata.org/data/nmdc:mga0gg1q48/annotation/nmdc_mga0gg1q48_pfam.gff", - "md5_checksum": "b67886515193abbd1eec79de067b3196", - "id": "nmdc:b67886515193abbd1eec79de067b3196", - "file_size_bytes": 65136506 - }, - { - "name": "Gp0115679_TigrFam GFF file", - "description": "TigrFam GFF file for Gp0115679", - "url": "https://data.microbiomedata.org/data/nmdc:mga0gg1q48/annotation/nmdc_mga0gg1q48_tigrfam.gff", - "md5_checksum": "05e7a016dddba90801c29de448c43c3c", - "id": "nmdc:05e7a016dddba90801c29de448c43c3c", - "file_size_bytes": 8536835 - }, - { - "name": "Gp0115679_SMART GFF file", - "description": "SMART GFF file for Gp0115679", - "url": "https://data.microbiomedata.org/data/nmdc:mga0gg1q48/annotation/nmdc_mga0gg1q48_smart.gff", - "md5_checksum": "7effd4db11316ff95f6a8303807d530f", - "id": "nmdc:7effd4db11316ff95f6a8303807d530f", - "file_size_bytes": 19907975 - }, - { - "name": "Gp0115679_SuperFam GFF file", - "description": "SuperFam GFF file for Gp0115679", - "url": "https://data.microbiomedata.org/data/nmdc:mga0gg1q48/annotation/nmdc_mga0gg1q48_supfam.gff", - "md5_checksum": "503770f008dd2cf04d73821412dcf23a", - "id": "nmdc:503770f008dd2cf04d73821412dcf23a", - "file_size_bytes": 107636995 - }, - { - "name": "Gp0115679_Cath FunFam GFF file", - "description": "Cath FunFam GFF file for Gp0115679", - "url": "https://data.microbiomedata.org/data/nmdc:mga0gg1q48/annotation/nmdc_mga0gg1q48_cath_funfam.gff", - "md5_checksum": "c33049c64af55f8ac54d52c861b0a221", - "id": "nmdc:c33049c64af55f8ac54d52c861b0a221", - "file_size_bytes": 89046662 - }, - { - "name": "Gp0115679_KO_EC GFF file", - "description": "KO_EC GFF file for Gp0115679", - "url": "https://data.microbiomedata.org/data/nmdc:mga0gg1q48/annotation/nmdc_mga0gg1q48_ko_ec.gff", - "md5_checksum": "b162efd63f79bc34de66f61348471b74", - "id": "nmdc:b162efd63f79bc34de66f61348471b74", - "file_size_bytes": 57348606 - } - ] - }, - { - "_id": { - "$oid": "649b0052ec087f6bbab3472e" - }, - "has_input": [ - "nmdc:e4314c3743795e0be8beda8b7f806557", - "nmdc:002ed5f389b8a13735d27a8741290f6b", - "nmdc:8aed63ca1302c874040e74aceb54ff05" - ], - "too_short_contig_num": 504368, - "part_of": [ - "nmdc:mga0gg1q48" - ], - "binned_contig_num": 1887, - "git_url": "https://github.com/microbiomedata/metaG/releases/tag/0.1", - "has_output": [ - "nmdc:d41d8cd98f00b204e9800998ecf8427e", - "nmdc:d830e60f4fb30ecb0610f991dcc70e47", - "nmdc:d33af65556b85b1aaf3a5c48b6e294de", - "nmdc:d2d655091735e6308aafca1e1633aad9", - "nmdc:17c6259329da1bbe6da5a18274452a8d", - "nmdc:9250ad41cb19e04a6002e62bda38bbfb" - ], - "was_informed_by": "gold:Gp0115679", - "input_contig_num": 531775, - "id": "nmdc:8bc23e2bd69c8af5538db3d6192a3b5b", - "execution_resource": "NERSC-Cori", - "name": "MAGs Analysis Activity for nmdc:mga0gg1q48", - "mags_list": [ - { - "number_of_contig": 73, - "completeness": 95.65, - "bin_name": "bins.1", - "gene_count": 2974, - "bin_quality": "HQ", - "gtdbtk_species": "", - "gtdbtk_order": "UBA11222", - "num_16s": 1, - "gtdbtk_family": "UBA11222", - "gtdbtk_domain": "Bacteria", - "contamination": 0.22, - "gtdbtk_class": "Alphaproteobacteria", - "gtdbtk_phylum": "Proteobacteria", - "num_5s": 1, - "num_23s": 1, - "gtdbtk_genus": "UBA11222", - "num_t_rna": 45 - }, - { - "number_of_contig": 253, - "completeness": 39.12, - "bin_name": "bins.10", - "gene_count": 1586, - "bin_quality": "LQ", - "gtdbtk_species": "", - "gtdbtk_order": "", - "num_16s": 0, - "gtdbtk_family": "", - "gtdbtk_domain": "", - "contamination": 1.79, - "gtdbtk_class": "", - "gtdbtk_phylum": "", - "num_5s": 0, - "num_23s": 0, - "gtdbtk_genus": "", - "num_t_rna": 20 - }, - { - "number_of_contig": 135, - "completeness": 16.83, - "bin_name": "bins.2", - "gene_count": 706, - "bin_quality": "LQ", - "gtdbtk_species": "", - "gtdbtk_order": "", - "num_16s": 0, - "gtdbtk_family": "", - "gtdbtk_domain": "", - "contamination": 0.0, - "gtdbtk_class": "", - "gtdbtk_phylum": "", - "num_5s": 0, - "num_23s": 0, - "gtdbtk_genus": "", - "num_t_rna": 11 - }, - { - "number_of_contig": 144, - "completeness": 22.53, - "bin_name": "bins.3", - "gene_count": 731, - "bin_quality": "LQ", - "gtdbtk_species": "", - "gtdbtk_order": "", - "num_16s": 0, - "gtdbtk_family": "", - "gtdbtk_domain": "", - "contamination": 0.14, - "gtdbtk_class": "", - "gtdbtk_phylum": "", - "num_5s": 0, - "num_23s": 0, - "gtdbtk_genus": "", - "num_t_rna": 16 - }, - { - "number_of_contig": 273, - "completeness": 68.97, - "bin_name": "bins.4", - "gene_count": 2023, - "bin_quality": "MQ", - "gtdbtk_species": "UBA5335 sp002862435", - "gtdbtk_order": "UBA5335", - "num_16s": 0, - "gtdbtk_family": "UBA5335", - "gtdbtk_domain": "Bacteria", - "contamination": 1.72, - "gtdbtk_class": "Gammaproteobacteria", - "gtdbtk_phylum": "Proteobacteria", - "num_5s": 0, - "num_23s": 0, - "gtdbtk_genus": "UBA5335", - "num_t_rna": 33 + "number_of_contig": 273, + "completeness": 68.97, + "bin_name": "bins.4", + "gene_count": 2023, + "bin_quality": "MQ", + "gtdbtk_species": "UBA5335 sp002862435", + "gtdbtk_order": "UBA5335", + "num_16s": 0, + "gtdbtk_family": "UBA5335", + "gtdbtk_domain": "Bacteria", + "contamination": 1.72, + "gtdbtk_class": "Gammaproteobacteria", + "gtdbtk_phylum": "Proteobacteria", + "num_5s": 0, + "num_23s": 0, + "gtdbtk_genus": "UBA5335", + "num_t_rna": 33 }, { "number_of_contig": 3, @@ -2799,593 +2735,620 @@ "unbinned_contig_num": 25520, "started_at_time": "2021-10-11T02:23:30Z", "type": "nmdc:MAGsAnalysisActivity", - "ended_at_time": "2021-10-11T06:30:42+00:00", - "output_data_objects": [ - { - "name": "gold:Gp0452679_metabat2 bin checkm quality assessment result", - "description": "metabat2 bin checkm quality assessment result for gold:Gp0452679", - "data_object_type": "CheckM Statistics", - "url": "https://data.microbiomedata.org/data/nmdc:mga0fsjy84/MAGs/nmdc_mga0fsjy84_checkm_qa.out", - "md5_checksum": "d41d8cd98f00b204e9800998ecf8427e", - "id": "nmdc:d41d8cd98f00b204e9800998ecf8427e", - "file_size_bytes": 0 - }, - { - "name": "Gp0115679_tooShort (< 3kb) filtered contigs fasta file by metaBat2", - "description": "tooShort (< 3kb) filtered contigs fasta file by metaBat2 for Gp0115679", - "url": "https://data.microbiomedata.org/data/nmdc:mga0gg1q48/MAGs/nmdc_mga0gg1q48_bins.tooShort.fa", - "md5_checksum": "d830e60f4fb30ecb0610f991dcc70e47", - "id": "nmdc:d830e60f4fb30ecb0610f991dcc70e47", - "file_size_bytes": 215033122 - }, - { - "name": "Gp0115679_unbinned fasta file from metabat2", - "description": "unbinned fasta file from metabat2 for Gp0115679", - "url": "https://data.microbiomedata.org/data/nmdc:mga0gg1q48/MAGs/nmdc_mga0gg1q48_bins.unbinned.fa", - "md5_checksum": "d33af65556b85b1aaf3a5c48b6e294de", - "id": "nmdc:d33af65556b85b1aaf3a5c48b6e294de", - "file_size_bytes": 44057142 - }, - { - "name": "Gp0115679_metabat2 bin checkm quality assessment result", - "description": "metabat2 bin checkm quality assessment result for Gp0115679", - "data_object_type": "CheckM Statistics", - "url": "https://data.microbiomedata.org/data/nmdc:mga0gg1q48/MAGs/nmdc_mga0gg1q48_checkm_qa.out", - "md5_checksum": "d2d655091735e6308aafca1e1633aad9", - "id": "nmdc:d2d655091735e6308aafca1e1633aad9", - "file_size_bytes": 2394 - }, - { - "name": "Gp0115679_high-quality and medium-quality bins", - "description": "high-quality and medium-quality bins for Gp0115679", - "data_object_type": "Metagenome Bins", - "url": "https://data.microbiomedata.org/data/nmdc:mga0gg1q48/MAGs/nmdc_mga0gg1q48_hqmq_bin.zip", - "md5_checksum": "17c6259329da1bbe6da5a18274452a8d", - "id": "nmdc:17c6259329da1bbe6da5a18274452a8d", - "file_size_bytes": 3215059 - }, - { - "name": "Gp0115679_metabat2 bins", - "description": "metabat2 bins for Gp0115679", - "url": "https://data.microbiomedata.org/data/nmdc:mga0gg1q48/MAGs/nmdc_mga0gg1q48_metabat_bin.zip", - "md5_checksum": "9250ad41cb19e04a6002e62bda38bbfb", - "id": "nmdc:9250ad41cb19e04a6002e62bda38bbfb", - "file_size_bytes": 1649649 - } - ] + "ended_at_time": "2021-10-11T06:30:42+00:00" } - ] - }, - { - "_id": { - "$oid": "649b009773e8249959349b37" - }, - "id": "nmdc:omprc-11-yt8css91", - "name": "Sand microcosm microbial communities from a hyporheic zone in Columbia River, Washington, USA - GW-RW T3_25-Nov-14", - "description": "Sterilized sand packs were incubated back in the ground and collected at time point T3.", - "has_input": [ - "nmdc:bsm-11-ynevd369" - ], - "has_output": [ - "jgi:55d818010d8785342fcf8278" - ], - "part_of": [ - "nmdc:sty-11-aygzgv51" - ], - "add_date": "2015-05-28", - "mod_date": "2021-06-15", - "ncbi_project_name": "Sand microcosm microbial communities from a hyporheic zone in Columbia River, Washington, USA - GW-RW T3_25-Nov-14", - "omics_type": { - "has_raw_value": "Metagenome" - }, - "principal_investigator": { - "has_raw_value": "James Stegen" - }, - "processing_institution": "JGI", - "type": "nmdc:OmicsProcessing", - "gold_sequencing_project_identifiers": [ - "gold:Gp0115667" - ], - "downstream_workflow_activity_records": [ + ], + "material_sample_set": [], + "material_sampling_activity_set": [], + "metabolomics_analysis_activity_set": [], + "metagenome_annotation_activity_set": [ { "_id": { - "$oid": "649b009d6bdd4fd20273c882" + "$oid": "649b005bbf2caae0415ef9c9" }, "has_input": [ - "nmdc:cb2e0605e8f22a398d982e35aee57715" + "nmdc:e4314c3743795e0be8beda8b7f806557" ], "part_of": [ - "nmdc:mga0n0je44" + "nmdc:mga0gg1q48" ], "git_url": "https://github.com/microbiomedata/metaG/releases/tag/0.1", "has_output": [ - "nmdc:7d4057e3a44a05171c13fb0ed3e2294a", - "nmdc:dae7c6e067f69ef6db39b4240cc450ba" + "nmdc:ac3faa8ad0e8e7827fcf6b882ec90706", + "nmdc:e3712dbbf0d0bfa14b9b340e73ebf4d0", + "nmdc:8aed63ca1302c874040e74aceb54ff05", + "nmdc:6361a06de62d93909abfb565a47fd5f0", + "nmdc:bd9d330d1d6a925066003d653a171ca5", + "nmdc:c497ffc128d6738bf3868529eb7ff899", + "nmdc:b67886515193abbd1eec79de067b3196", + "nmdc:05e7a016dddba90801c29de448c43c3c", + "nmdc:7effd4db11316ff95f6a8303807d530f", + "nmdc:503770f008dd2cf04d73821412dcf23a", + "nmdc:c33049c64af55f8ac54d52c861b0a221", + "nmdc:b162efd63f79bc34de66f61348471b74" ], - "was_informed_by": "gold:Gp0115667", - "input_read_count": 19416222, - "output_read_bases": 2825090769, - "id": "nmdc:8093869c91384d3299431e56019f7de0", + "was_informed_by": "gold:Gp0115679", + "id": "nmdc:8bc23e2bd69c8af5538db3d6192a3b5b", "execution_resource": "NERSC-Cori", - "input_read_bases": 2931849522, - "name": "Read QC Activity for nmdc:mga0n0je44", - "output_read_count": 18855352, - "started_at_time": "2021-10-11T02:28:16Z", - "type": "nmdc:ReadQCAnalysisActivity", - "ended_at_time": "2021-10-11T03:58:24+00:00", - "output_data_objects": [ - { - "name": "Gp0115667_Filtered Reads", - "description": "Filtered Reads for Gp0115667", - "data_object_type": "Filtered Sequencing Reads", - "url": "https://data.microbiomedata.org/data/nmdc:mga0n0je44/qa/nmdc_mga0n0je44_filtered.fastq.gz", - "md5_checksum": "7d4057e3a44a05171c13fb0ed3e2294a", - "id": "nmdc:7d4057e3a44a05171c13fb0ed3e2294a", - "file_size_bytes": 1599931347 - }, - { - "name": "Gp0115667_Filtered Stats", - "description": "Filtered Stats for Gp0115667", - "data_object_type": "QC Statistics", - "url": "https://data.microbiomedata.org/data/nmdc:mga0n0je44/qa/nmdc_mga0n0je44_filterStats.txt", - "md5_checksum": "dae7c6e067f69ef6db39b4240cc450ba", - "id": "nmdc:dae7c6e067f69ef6db39b4240cc450ba", - "file_size_bytes": 286 - } - ] - }, + "name": "Annotation Activity for nmdc:mga0gg1q48", + "started_at_time": "2021-10-11T02:23:30Z", + "type": "nmdc:MetagenomeAnnotationActivity", + "ended_at_time": "2021-10-11T06:30:42+00:00" + } + ], + "metagenome_assembly_set": [ { "_id": { - "$oid": "649b009bff710ae353f8cf4a" + "$oid": "649b005f2ca5ee4adb139fbd" }, "has_input": [ - "nmdc:7d4057e3a44a05171c13fb0ed3e2294a" + "nmdc:7e294ff66cb7ddf84edf9c8bed576bcd" + ], + "part_of": [ + "nmdc:mga0gg1q48" ], + "ctg_logsum": 682158, + "scaf_logsum": 725191, + "gap_pct": 0.02692, "git_url": "https://github.com/microbiomedata/metaG/releases/tag/0.1", "has_output": [ - "nmdc:56edf81e5f5102edf7e416bc9430fbb6", - "nmdc:c3d0f03afb44520ef5f2ea14e6daf705", - "nmdc:2afff209a40ca4895307f3a47080c534", - "nmdc:d76c80bf15c4fd84f28c7150f24a8143", - "nmdc:b9d6d8a8297f9a604ac85a334a3412de", - "nmdc:fe4bd9f63c32f50676792e3c4adced08", - "nmdc:eb189cbf0543203d2521397b73d4d34b", - "nmdc:ce3f002a824efde4a7134e6cd2e6306b", - "nmdc:ac90bf3384ce44d097f7897ac5ff8134" + "nmdc:e4314c3743795e0be8beda8b7f806557", + "nmdc:2a288a5827b66c88f8abf202bbe37aab", + "nmdc:a51c7b3a70601a885594936fd6c753bc", + "nmdc:8851d6fed8e5bbee88aeb7af77bbcfe3", + "nmdc:002ed5f389b8a13735d27a8741290f6b" ], - "was_informed_by": "gold:Gp0115667", - "id": "nmdc:8093869c91384d3299431e56019f7de0", + "asm_score": 12.582, + "was_informed_by": "gold:Gp0115679", + "ctg_powsum": 84136, + "scaf_max": 884972, + "id": "nmdc:8bc23e2bd69c8af5538db3d6192a3b5b", + "scaf_powsum": 89882, "execution_resource": "NERSC-Cori", - "name": "ReadBased Analysis Activity for nmdc:mga0n0je44", - "started_at_time": "2021-10-11T02:28:16Z", - "type": "nmdc:ReadbasedAnalysis", - "ended_at_time": "2021-10-11T03:58:24+00:00", - "output_data_objects": [ - { - "name": "Gp0115667_Gottcha2 TSV report", - "description": "Gottcha2 TSV report for Gp0115667", - "url": "https://data.microbiomedata.org/data/nmdc:mga0n0je44/ReadbasedAnalysis/nmdc_mga0n0je44_gottcha2_report.tsv", - "md5_checksum": "56edf81e5f5102edf7e416bc9430fbb6", - "id": "nmdc:56edf81e5f5102edf7e416bc9430fbb6", - "file_size_bytes": 10576 - }, - { - "name": "Gp0115667_Gottcha2 full TSV report", - "description": "Gottcha2 full TSV report for Gp0115667", - "url": "https://data.microbiomedata.org/data/nmdc:mga0n0je44/ReadbasedAnalysis/nmdc_mga0n0je44_gottcha2_report_full.tsv", - "md5_checksum": "c3d0f03afb44520ef5f2ea14e6daf705", - "id": "nmdc:c3d0f03afb44520ef5f2ea14e6daf705", - "file_size_bytes": 792905 - }, - { - "name": "Gp0115667_Gottcha2 Krona HTML report", - "description": "Gottcha2 Krona HTML report for Gp0115667", - "data_object_type": "GOTTCHA2 Krona Plot", - "url": "https://data.microbiomedata.org/data/nmdc:mga0n0je44/ReadbasedAnalysis/nmdc_mga0n0je44_gottcha2_krona.html", - "md5_checksum": "2afff209a40ca4895307f3a47080c534", - "id": "nmdc:2afff209a40ca4895307f3a47080c534", - "file_size_bytes": 254763 - }, - { - "name": "Gp0115667_Centrifuge classification TSV report", - "description": "Centrifuge classification TSV report for Gp0115667", - "data_object_type": "Centrifuge Taxonomic Classification", - "url": "https://data.microbiomedata.org/data/nmdc:mga0n0je44/ReadbasedAnalysis/nmdc_mga0n0je44_centrifuge_classification.tsv", - "md5_checksum": "d76c80bf15c4fd84f28c7150f24a8143", - "id": "nmdc:d76c80bf15c4fd84f28c7150f24a8143", - "file_size_bytes": 1336111813 - }, - { - "name": "Gp0115667_Centrifuge TSV report", - "description": "Centrifuge TSV report for Gp0115667", - "data_object_type": "Centrifuge Classification Report", - "url": "https://data.microbiomedata.org/data/nmdc:mga0n0je44/ReadbasedAnalysis/nmdc_mga0n0je44_centrifuge_report.tsv", - "md5_checksum": "b9d6d8a8297f9a604ac85a334a3412de", - "id": "nmdc:b9d6d8a8297f9a604ac85a334a3412de", - "file_size_bytes": 254506 - }, - { - "name": "Gp0115667_Centrifuge Krona HTML report", - "description": "Centrifuge Krona HTML report for Gp0115667", - "data_object_type": "Centrifuge Krona Plot", - "url": "https://data.microbiomedata.org/data/nmdc:mga0n0je44/ReadbasedAnalysis/nmdc_mga0n0je44_centrifuge_krona.html", - "md5_checksum": "fe4bd9f63c32f50676792e3c4adced08", - "id": "nmdc:fe4bd9f63c32f50676792e3c4adced08", - "file_size_bytes": 2323153 - }, - { - "name": "Gp0115667_Kraken classification TSV report", - "description": "Kraken classification TSV report for Gp0115667", - "data_object_type": "Kraken2 Taxonomic Classification", - "url": "https://data.microbiomedata.org/data/nmdc:mga0n0je44/ReadbasedAnalysis/nmdc_mga0n0je44_kraken2_classification.tsv", - "md5_checksum": "eb189cbf0543203d2521397b73d4d34b", - "id": "nmdc:eb189cbf0543203d2521397b73d4d34b", - "file_size_bytes": 1097852664 - }, - { - "name": "Gp0115667_Kraken2 TSV report", - "description": "Kraken2 TSV report for Gp0115667", - "data_object_type": "Kraken2 Classification Report", - "url": "https://data.microbiomedata.org/data/nmdc:mga0n0je44/ReadbasedAnalysis/nmdc_mga0n0je44_kraken2_report.tsv", - "md5_checksum": "ce3f002a824efde4a7134e6cd2e6306b", - "id": "nmdc:ce3f002a824efde4a7134e6cd2e6306b", - "file_size_bytes": 639213 - }, - { - "name": "Gp0115667_Kraken2 Krona HTML report", - "description": "Kraken2 Krona HTML report for Gp0115667", - "data_object_type": "Kraken2 Krona Plot", - "url": "https://data.microbiomedata.org/data/nmdc:mga0n0je44/ReadbasedAnalysis/nmdc_mga0n0je44_kraken2_krona.html", - "md5_checksum": "ac90bf3384ce44d097f7897ac5ff8134", - "id": "nmdc:ac90bf3384ce44d097f7897ac5ff8134", - "file_size_bytes": 3979807 - } - ] - }, + "contigs": 531791, + "name": "Assembly Activity for nmdc:mga0gg1q48", + "ctg_max": 719201, + "gc_std": 0.09689, + "contig_bp": 254202396, + "gc_avg": 0.48697, + "started_at_time": "2021-10-11T02:23:30Z", + "scaf_bp": 254270837, + "type": "nmdc:MetagenomeAssembly", + "scaffolds": 525116, + "ended_at_time": "2021-10-11T06:30:42+00:00", + "ctg_l50": 449, + "ctg_l90": 285, + "ctg_n50": 139317, + "ctg_n90": 451813, + "scaf_l50": 455, + "scaf_l90": 285, + "scaf_n50": 133535, + "scaf_n90": 445430, + "scaf_l_gt50k": 3540548, + "scaf_n_gt50k": 34, + "scaf_pct_gt50k": 1.3924317 + } + ], + "metagenome_sequencing_activity_set": [], + "metaproteomics_analysis_activity_set": [], + "metatranscriptome_activity_set": [], + "nom_analysis_activity_set": [], + "omics_processing_set": [ { "_id": { - "$oid": "61e71a10833bcf838a701aaa" + "$oid": "649b009773e8249959349b36" }, + "id": "nmdc:omprc-11-hymrq852", + "name": "Sand microcosm microbial communities from a hyporheic zone in Columbia River, Washington, USA - GW-RW T4_4-Nov-14", + "description": "Sterilized sand packs were incubated back in the ground and collected at time point T4.", "has_input": [ - "nmdc:7d4057e3a44a05171c13fb0ed3e2294a" + "nmdc:bsm-11-11219w54" ], - "part_of": [ - "nmdc:mga0n0je44" - ], - "git_url": "https://github.com/microbiomedata/metaG/releases/tag/0.1", "has_output": [ - "nmdc:56edf81e5f5102edf7e416bc9430fbb6", - "nmdc:c3d0f03afb44520ef5f2ea14e6daf705", - "nmdc:2afff209a40ca4895307f3a47080c534", - "nmdc:d76c80bf15c4fd84f28c7150f24a8143", - "nmdc:b9d6d8a8297f9a604ac85a334a3412de", - "nmdc:fe4bd9f63c32f50676792e3c4adced08", - "nmdc:eb189cbf0543203d2521397b73d4d34b", - "nmdc:ce3f002a824efde4a7134e6cd2e6306b", - "nmdc:ac90bf3384ce44d097f7897ac5ff8134" + "jgi:55a9cb010d87852b21508920" ], - "was_informed_by": "gold:Gp0115667", - "id": "nmdc:8093869c91384d3299431e56019f7de0", - "execution_resource": "NERSC-Cori", - "name": "ReadBased Analysis Activity for nmdc:mga0n0je44", - "started_at_time": "2021-10-11T02:28:16Z", - "type": "nmdc:ReadbasedAnalysis", - "ended_at_time": "2021-10-11T03:58:24+00:00", - "output_data_objects": [ - { - "name": "Gp0115667_Gottcha2 TSV report", - "description": "Gottcha2 TSV report for Gp0115667", - "url": "https://data.microbiomedata.org/data/nmdc:mga0n0je44/ReadbasedAnalysis/nmdc_mga0n0je44_gottcha2_report.tsv", - "md5_checksum": "56edf81e5f5102edf7e416bc9430fbb6", - "id": "nmdc:56edf81e5f5102edf7e416bc9430fbb6", - "file_size_bytes": 10576 - }, - { - "name": "Gp0115667_Gottcha2 full TSV report", - "description": "Gottcha2 full TSV report for Gp0115667", - "url": "https://data.microbiomedata.org/data/nmdc:mga0n0je44/ReadbasedAnalysis/nmdc_mga0n0je44_gottcha2_report_full.tsv", - "md5_checksum": "c3d0f03afb44520ef5f2ea14e6daf705", - "id": "nmdc:c3d0f03afb44520ef5f2ea14e6daf705", - "file_size_bytes": 792905 - }, - { - "name": "Gp0115667_Gottcha2 Krona HTML report", - "description": "Gottcha2 Krona HTML report for Gp0115667", - "data_object_type": "GOTTCHA2 Krona Plot", - "url": "https://data.microbiomedata.org/data/nmdc:mga0n0je44/ReadbasedAnalysis/nmdc_mga0n0je44_gottcha2_krona.html", - "md5_checksum": "2afff209a40ca4895307f3a47080c534", - "id": "nmdc:2afff209a40ca4895307f3a47080c534", - "file_size_bytes": 254763 - }, - { - "name": "Gp0115667_Centrifuge classification TSV report", - "description": "Centrifuge classification TSV report for Gp0115667", - "data_object_type": "Centrifuge Taxonomic Classification", - "url": "https://data.microbiomedata.org/data/nmdc:mga0n0je44/ReadbasedAnalysis/nmdc_mga0n0je44_centrifuge_classification.tsv", - "md5_checksum": "d76c80bf15c4fd84f28c7150f24a8143", - "id": "nmdc:d76c80bf15c4fd84f28c7150f24a8143", - "file_size_bytes": 1336111813 - }, - { - "name": "Gp0115667_Centrifuge TSV report", - "description": "Centrifuge TSV report for Gp0115667", - "data_object_type": "Centrifuge Classification Report", - "url": "https://data.microbiomedata.org/data/nmdc:mga0n0je44/ReadbasedAnalysis/nmdc_mga0n0je44_centrifuge_report.tsv", - "md5_checksum": "b9d6d8a8297f9a604ac85a334a3412de", - "id": "nmdc:b9d6d8a8297f9a604ac85a334a3412de", - "file_size_bytes": 254506 - }, - { - "name": "Gp0115667_Centrifuge Krona HTML report", - "description": "Centrifuge Krona HTML report for Gp0115667", - "data_object_type": "Centrifuge Krona Plot", - "url": "https://data.microbiomedata.org/data/nmdc:mga0n0je44/ReadbasedAnalysis/nmdc_mga0n0je44_centrifuge_krona.html", - "md5_checksum": "fe4bd9f63c32f50676792e3c4adced08", - "id": "nmdc:fe4bd9f63c32f50676792e3c4adced08", - "file_size_bytes": 2323153 - }, - { - "name": "Gp0115667_Kraken classification TSV report", - "description": "Kraken classification TSV report for Gp0115667", - "data_object_type": "Kraken2 Taxonomic Classification", - "url": "https://data.microbiomedata.org/data/nmdc:mga0n0je44/ReadbasedAnalysis/nmdc_mga0n0je44_kraken2_classification.tsv", - "md5_checksum": "eb189cbf0543203d2521397b73d4d34b", - "id": "nmdc:eb189cbf0543203d2521397b73d4d34b", - "file_size_bytes": 1097852664 - }, - { - "name": "Gp0115667_Kraken2 TSV report", - "description": "Kraken2 TSV report for Gp0115667", - "data_object_type": "Kraken2 Classification Report", - "url": "https://data.microbiomedata.org/data/nmdc:mga0n0je44/ReadbasedAnalysis/nmdc_mga0n0je44_kraken2_report.tsv", - "md5_checksum": "ce3f002a824efde4a7134e6cd2e6306b", - "id": "nmdc:ce3f002a824efde4a7134e6cd2e6306b", - "file_size_bytes": 639213 - }, - { - "name": "Gp0115667_Kraken2 Krona HTML report", - "description": "Kraken2 Krona HTML report for Gp0115667", - "data_object_type": "Kraken2 Krona Plot", - "url": "https://data.microbiomedata.org/data/nmdc:mga0n0je44/ReadbasedAnalysis/nmdc_mga0n0je44_kraken2_krona.html", - "md5_checksum": "ac90bf3384ce44d097f7897ac5ff8134", - "id": "nmdc:ac90bf3384ce44d097f7897ac5ff8134", - "file_size_bytes": 3979807 - } + "part_of": [ + "nmdc:sty-11-aygzgv51" + ], + "add_date": "2015-05-28", + "mod_date": "2021-06-15", + "ncbi_project_name": "Sand microcosm microbial communities from a hyporheic zone in Columbia River, Washington, USA - GW-RW T4_4-Nov-14", + "omics_type": { + "has_raw_value": "Metagenome" + }, + "principal_investigator": { + "has_raw_value": "James Stegen" + }, + "processing_institution": "JGI", + "type": "nmdc:OmicsProcessing", + "gold_sequencing_project_identifiers": [ + "gold:Gp0115679" ] - }, + } + ], + "reaction_activity_set": [], + "read_qc_analysis_activity_set": [ { "_id": { - "$oid": "649b005f2ca5ee4adb139fb7" + "$oid": "649b009d6bdd4fd20273c884" }, "has_input": [ - "nmdc:7d4057e3a44a05171c13fb0ed3e2294a" + "nmdc:3bf389b767cf8a49224dc0028e55eeb7" ], "part_of": [ - "nmdc:mga0n0je44" + "nmdc:mga0gg1q48" ], - "ctg_logsum": 195440, - "scaf_logsum": 196103, - "gap_pct": 0.00293, "git_url": "https://github.com/microbiomedata/metaG/releases/tag/0.1", "has_output": [ - "nmdc:b3cefc5a9599a4fb9432132baf7f5565", - "nmdc:b60f674a01e3f7fff5ead95f330cef4f", - "nmdc:2e4532cb03bb1e9201976b9d65893788", - "nmdc:e49f8a26a9cd0420b688c967bbacb4c6", - "nmdc:d9b957c7efe7f753fe67441d0be605c6" + "nmdc:7e294ff66cb7ddf84edf9c8bed576bcd", + "nmdc:08e2a96f7aaaff5ff6f747cfe6f49e49" ], - "asm_score": 17.061, - "was_informed_by": "gold:Gp0115667", - "ctg_powsum": 25448, - "scaf_max": 245816, - "id": "nmdc:8093869c91384d3299431e56019f7de0", - "scaf_powsum": 25552, + "was_informed_by": "gold:Gp0115679", + "input_read_count": 67696542, + "output_read_bases": 9825387057, + "id": "nmdc:8bc23e2bd69c8af5538db3d6192a3b5b", "execution_resource": "NERSC-Cori", - "contigs": 116132, - "name": "Assembly Activity for nmdc:mga0n0je44", - "ctg_max": 245816, - "gc_std": 0.12277, - "contig_bp": 58413782, - "gc_avg": 0.47644, - "started_at_time": "2021-10-11T02:28:16Z", - "scaf_bp": 58415492, - "type": "nmdc:MetagenomeAssembly", - "scaffolds": 116033, - "ended_at_time": "2021-10-11T03:58:24+00:00", - "ctg_l50": 479, - "ctg_l90": 286, - "ctg_n50": 26909, - "ctg_n90": 95138, - "scaf_l50": 479, - "scaf_l90": 286, - "scaf_n50": 26889, - "scaf_n90": 95057, - "scaf_l_gt50k": 1865703, - "scaf_n_gt50k": 17, - "scaf_pct_gt50k": 3.1938498, - "output_data_objects": [ - { - "name": "Gp0115667_Assembled contigs fasta", - "description": "Assembled contigs fasta for Gp0115667", - "data_object_type": "Assembly Contigs", - "url": "https://data.microbiomedata.org/data/nmdc:mga0n0je44/assembly/nmdc_mga0n0je44_contigs.fna", - "md5_checksum": "b3cefc5a9599a4fb9432132baf7f5565", - "id": "nmdc:b3cefc5a9599a4fb9432132baf7f5565", - "file_size_bytes": 62926054 - }, - { - "name": "Gp0115667_Assembled scaffold fasta", - "description": "Assembled scaffold fasta for Gp0115667", - "data_object_type": "Assembly Scaffolds", - "url": "https://data.microbiomedata.org/data/nmdc:mga0n0je44/assembly/nmdc_mga0n0je44_scaffolds.fna", - "md5_checksum": "b60f674a01e3f7fff5ead95f330cef4f", - "id": "nmdc:b60f674a01e3f7fff5ead95f330cef4f", - "file_size_bytes": 62577490 - }, - { - "name": "Gp0115667_Metagenome Contig Coverage Stats", - "description": "Metagenome Contig Coverage Stats for Gp0115667", - "url": "https://data.microbiomedata.org/data/nmdc:mga0n0je44/assembly/nmdc_mga0n0je44_covstats.txt", - "md5_checksum": "2e4532cb03bb1e9201976b9d65893788", - "id": "nmdc:2e4532cb03bb1e9201976b9d65893788", - "file_size_bytes": 9189143 - }, - { - "name": "Gp0115667_Assembled AGP file", - "description": "Assembled AGP file for Gp0115667", - "data_object_type": "Assembly AGP", - "url": "https://data.microbiomedata.org/data/nmdc:mga0n0je44/assembly/nmdc_mga0n0je44_assembly.agp", - "md5_checksum": "e49f8a26a9cd0420b688c967bbacb4c6", - "id": "nmdc:e49f8a26a9cd0420b688c967bbacb4c6", - "file_size_bytes": 8508903 - }, - { - "name": "Gp0115667_Metagenome Alignment BAM file", - "description": "Metagenome Alignment BAM file for Gp0115667", - "data_object_type": "Assembly Coverage BAM", - "url": "https://data.microbiomedata.org/data/nmdc:mga0n0je44/assembly/nmdc_mga0n0je44_pairedMapped_sorted.bam", - "md5_checksum": "d9b957c7efe7f753fe67441d0be605c6", - "id": "nmdc:d9b957c7efe7f753fe67441d0be605c6", - "file_size_bytes": 1771039554 - } - ] - }, + "input_read_bases": 10222177842, + "name": "Read QC Activity for nmdc:mga0gg1q48", + "output_read_count": 67147510, + "started_at_time": "2021-10-11T02:23:30Z", + "type": "nmdc:ReadQCAnalysisActivity", + "ended_at_time": "2021-10-11T06:30:42+00:00" + } + ], + "read_based_taxonomy_analysis_activity_set": [ { "_id": { - "$oid": "649b005bbf2caae0415ef9cc" + "$oid": "649b009bff710ae353f8cf4b" }, "has_input": [ - "nmdc:b3cefc5a9599a4fb9432132baf7f5565" - ], - "part_of": [ - "nmdc:mga0n0je44" + "nmdc:7e294ff66cb7ddf84edf9c8bed576bcd" ], "git_url": "https://github.com/microbiomedata/metaG/releases/tag/0.1", "has_output": [ - "nmdc:45e8b887fc06ddbf2af3ecf9c91a7bf7", - "nmdc:26ab4381753f685c44091e1f17d8bab5", - "nmdc:5a378f3975ab6c2cf2a36b0b007ea3f8", - "nmdc:6df49253fee066c699d6a5191a0efaed", - "nmdc:5e35e51a595f892968e57681ee448e5f", - "nmdc:ae1bc890152d28387f65c65d434b97ea", - "nmdc:fb736eaba77cbd99135ddbc32168db94", - "nmdc:3b00892f95bc4dedaf4384685a75d52f", - "nmdc:b8c0d7c187169f34aafc17308aeea2ed", - "nmdc:2a8e4bb3922ec664bbb5ce49a30cc87e", - "nmdc:34eddc2289f3e3b4707a6c8060f6dd99", - "nmdc:0a51a22e2cf94c853657381549aa8f04" + "nmdc:e20f8c00473472fa073adde871860801", + "nmdc:52f8c91d04e8d179af98e7fac35a8ff1", + "nmdc:f721d9dd168b0dea080b191a4396167e", + "nmdc:ab77e396ec643b58b54da92848b88a96", + "nmdc:f2514844e47a9e3d268671f80f152bc1", + "nmdc:a3e49f39f33c54bc8d9430a947cd4b16", + "nmdc:17bc87145b0dcabbb8e3de0f393f4d4d", + "nmdc:aecb320fdfe4c4da35c0206dd34e0f40", + "nmdc:77860ee043ae9738e7702a3f665b15fa" ], - "was_informed_by": "gold:Gp0115667", - "id": "nmdc:8093869c91384d3299431e56019f7de0", + "was_informed_by": "gold:Gp0115679", + "id": "nmdc:8bc23e2bd69c8af5538db3d6192a3b5b", "execution_resource": "NERSC-Cori", - "name": "Annotation Activity for nmdc:mga0n0je44", - "started_at_time": "2021-10-11T02:28:16Z", - "type": "nmdc:MetagenomeAnnotationActivity", - "ended_at_time": "2021-10-11T03:58:24+00:00", - "output_data_objects": [ - { - "name": "Gp0115667_Protein FAA", - "description": "Protein FAA for Gp0115667", - "data_object_type": "Annotation Amino Acid FASTA", - "url": "https://data.microbiomedata.org/data/nmdc:mga0n0je44/annotation/nmdc_mga0n0je44_proteins.faa", - "md5_checksum": "45e8b887fc06ddbf2af3ecf9c91a7bf7", - "id": "nmdc:45e8b887fc06ddbf2af3ecf9c91a7bf7", - "file_size_bytes": 31564336 - }, - { - "name": "Gp0115667_Structural annotation GFF file", - "description": "Structural annotation GFF file for Gp0115667", - "data_object_type": "Structural Annotation GFF", - "url": "https://data.microbiomedata.org/data/nmdc:mga0n0je44/annotation/nmdc_mga0n0je44_structural_annotation.gff", - "md5_checksum": "26ab4381753f685c44091e1f17d8bab5", - "id": "nmdc:26ab4381753f685c44091e1f17d8bab5", - "file_size_bytes": 2760 - }, - { - "name": "Gp0115667_Functional annotation GFF file", - "description": "Functional annotation GFF file for Gp0115667", - "data_object_type": "Functional Annotation GFF", - "url": "https://data.microbiomedata.org/data/nmdc:mga0n0je44/annotation/nmdc_mga0n0je44_functional_annotation.gff", - "md5_checksum": "5a378f3975ab6c2cf2a36b0b007ea3f8", - "id": "nmdc:5a378f3975ab6c2cf2a36b0b007ea3f8", - "file_size_bytes": 34525554 - }, - { - "name": "Gp0115667_KO TSV file", - "description": "KO TSV file for Gp0115667", - "data_object_type": "Annotation KEGG Orthology", - "url": "https://data.microbiomedata.org/data/nmdc:mga0n0je44/annotation/nmdc_mga0n0je44_ko.tsv", - "md5_checksum": "6df49253fee066c699d6a5191a0efaed", - "id": "nmdc:6df49253fee066c699d6a5191a0efaed", - "file_size_bytes": 3439857 - }, - { - "name": "Gp0115667_EC TSV file", - "description": "EC TSV file for Gp0115667", - "data_object_type": "Annotation Enzyme Commission", - "url": "https://data.microbiomedata.org/data/nmdc:mga0n0je44/annotation/nmdc_mga0n0je44_ec.tsv", - "md5_checksum": "5e35e51a595f892968e57681ee448e5f", - "id": "nmdc:5e35e51a595f892968e57681ee448e5f", - "file_size_bytes": 2203532 - }, - { - "name": "Gp0115667_COG GFF file", - "description": "COG GFF file for Gp0115667", - "url": "https://data.microbiomedata.org/data/nmdc:mga0n0je44/annotation/nmdc_mga0n0je44_cog.gff", - "md5_checksum": "ae1bc890152d28387f65c65d434b97ea", - "id": "nmdc:ae1bc890152d28387f65c65d434b97ea", - "file_size_bytes": 15384958 - }, - { - "name": "Gp0115667_PFAM GFF file", - "description": "PFAM GFF file for Gp0115667", - "url": "https://data.microbiomedata.org/data/nmdc:mga0n0je44/annotation/nmdc_mga0n0je44_pfam.gff", - "md5_checksum": "fb736eaba77cbd99135ddbc32168db94", - "id": "nmdc:fb736eaba77cbd99135ddbc32168db94", - "file_size_bytes": 12472999 - }, - { - "name": "Gp0115667_TigrFam GFF file", - "description": "TigrFam GFF file for Gp0115667", - "url": "https://data.microbiomedata.org/data/nmdc:mga0n0je44/annotation/nmdc_mga0n0je44_tigrfam.gff", - "md5_checksum": "3b00892f95bc4dedaf4384685a75d52f", - "id": "nmdc:3b00892f95bc4dedaf4384685a75d52f", - "file_size_bytes": 1755779 - }, - { - "name": "Gp0115667_SMART GFF file", - "description": "SMART GFF file for Gp0115667", - "url": "https://data.microbiomedata.org/data/nmdc:mga0n0je44/annotation/nmdc_mga0n0je44_smart.gff", - "md5_checksum": "b8c0d7c187169f34aafc17308aeea2ed", - "id": "nmdc:b8c0d7c187169f34aafc17308aeea2ed", - "file_size_bytes": 3937293 - }, - { - "name": "Gp0115667_SuperFam GFF file", - "description": "SuperFam GFF file for Gp0115667", - "url": "https://data.microbiomedata.org/data/nmdc:mga0n0je44/annotation/nmdc_mga0n0je44_supfam.gff", - "md5_checksum": "2a8e4bb3922ec664bbb5ce49a30cc87e", - "id": "nmdc:2a8e4bb3922ec664bbb5ce49a30cc87e", - "file_size_bytes": 22725250 - }, - { - "name": "Gp0115667_Cath FunFam GFF file", - "description": "Cath FunFam GFF file for Gp0115667", - "url": "https://data.microbiomedata.org/data/nmdc:mga0n0je44/annotation/nmdc_mga0n0je44_cath_funfam.gff", - "md5_checksum": "34eddc2289f3e3b4707a6c8060f6dd99", - "id": "nmdc:34eddc2289f3e3b4707a6c8060f6dd99", - "file_size_bytes": 17788890 - }, - { - "name": "Gp0115667_KO_EC GFF file", - "description": "KO_EC GFF file for Gp0115667", - "url": "https://data.microbiomedata.org/data/nmdc:mga0n0je44/annotation/nmdc_mga0n0je44_ko_ec.gff", - "md5_checksum": "0a51a22e2cf94c853657381549aa8f04", - "id": "nmdc:0a51a22e2cf94c853657381549aa8f04", - "file_size_bytes": 11004264 - } - ] + "name": "ReadBased Analysis Activity for nmdc:mga0gg1q48", + "started_at_time": "2021-10-11T02:23:30Z", + "type": "nmdc:ReadbasedAnalysis", + "ended_at_time": "2021-10-11T06:30:42+00:00" + } + ], + "study_set": [], + "field_research_site_set": [], + "collecting_biosamples_from_site_set": [], + "date_created": null, + "etl_software_version": null, + "pooling_set": [], + "read_based_analysis_activity_set": [ + { + "_id": { + "$oid": "61e719fa833bcf838a701935" + }, + "has_input": [ + "nmdc:7e294ff66cb7ddf84edf9c8bed576bcd" + ], + "part_of": [ + "nmdc:mga0gg1q48" + ], + "git_url": "https://github.com/microbiomedata/metaG/releases/tag/0.1", + "has_output": [ + "nmdc:e20f8c00473472fa073adde871860801", + "nmdc:52f8c91d04e8d179af98e7fac35a8ff1", + "nmdc:f721d9dd168b0dea080b191a4396167e", + "nmdc:ab77e396ec643b58b54da92848b88a96", + "nmdc:f2514844e47a9e3d268671f80f152bc1", + "nmdc:a3e49f39f33c54bc8d9430a947cd4b16", + "nmdc:17bc87145b0dcabbb8e3de0f393f4d4d", + "nmdc:aecb320fdfe4c4da35c0206dd34e0f40", + "nmdc:77860ee043ae9738e7702a3f665b15fa" + ], + "was_informed_by": "gold:Gp0115679", + "id": "nmdc:8bc23e2bd69c8af5538db3d6192a3b5b", + "execution_resource": "NERSC-Cori", + "name": "ReadBased Analysis Activity for nmdc:mga0gg1q48", + "started_at_time": "2021-10-11T02:23:30Z", + "type": "nmdc:ReadbasedAnalysis", + "ended_at_time": "2021-10-11T06:30:42+00:00" + } + ] + }, + { + "functional_annotation_agg": [], + "library_preparation_set": [], + "processed_sample_set": [], + "extraction_set": [], + "activity_set": [], + "biosample_set": [], + "data_object_set": [ + { + "name": "Gp0115667_Filtered Reads", + "description": "Filtered Reads for Gp0115667", + "data_object_type": "Filtered Sequencing Reads", + "url": "https://data.microbiomedata.org/data/nmdc:mga0n0je44/qa/nmdc_mga0n0je44_filtered.fastq.gz", + "md5_checksum": "7d4057e3a44a05171c13fb0ed3e2294a", + "id": "nmdc:7d4057e3a44a05171c13fb0ed3e2294a", + "file_size_bytes": 1599931347 + }, + { + "name": "Gp0115667_Filtered Stats", + "description": "Filtered Stats for Gp0115667", + "data_object_type": "QC Statistics", + "url": "https://data.microbiomedata.org/data/nmdc:mga0n0je44/qa/nmdc_mga0n0je44_filterStats.txt", + "md5_checksum": "dae7c6e067f69ef6db39b4240cc450ba", + "id": "nmdc:dae7c6e067f69ef6db39b4240cc450ba", + "file_size_bytes": 286 + }, + { + "name": "Gp0115667_Gottcha2 TSV report", + "description": "Gottcha2 TSV report for Gp0115667", + "url": "https://data.microbiomedata.org/data/nmdc:mga0n0je44/ReadbasedAnalysis/nmdc_mga0n0je44_gottcha2_report.tsv", + "md5_checksum": "56edf81e5f5102edf7e416bc9430fbb6", + "id": "nmdc:56edf81e5f5102edf7e416bc9430fbb6", + "file_size_bytes": 10576 + }, + { + "name": "Gp0115667_Gottcha2 full TSV report", + "description": "Gottcha2 full TSV report for Gp0115667", + "url": "https://data.microbiomedata.org/data/nmdc:mga0n0je44/ReadbasedAnalysis/nmdc_mga0n0je44_gottcha2_report_full.tsv", + "md5_checksum": "c3d0f03afb44520ef5f2ea14e6daf705", + "id": "nmdc:c3d0f03afb44520ef5f2ea14e6daf705", + "file_size_bytes": 792905 + }, + { + "name": "Gp0115667_Gottcha2 Krona HTML report", + "description": "Gottcha2 Krona HTML report for Gp0115667", + "data_object_type": "GOTTCHA2 Krona Plot", + "url": "https://data.microbiomedata.org/data/nmdc:mga0n0je44/ReadbasedAnalysis/nmdc_mga0n0je44_gottcha2_krona.html", + "md5_checksum": "2afff209a40ca4895307f3a47080c534", + "id": "nmdc:2afff209a40ca4895307f3a47080c534", + "file_size_bytes": 254763 + }, + { + "name": "Gp0115667_Centrifuge classification TSV report", + "description": "Centrifuge classification TSV report for Gp0115667", + "data_object_type": "Centrifuge Taxonomic Classification", + "url": "https://data.microbiomedata.org/data/nmdc:mga0n0je44/ReadbasedAnalysis/nmdc_mga0n0je44_centrifuge_classification.tsv", + "md5_checksum": "d76c80bf15c4fd84f28c7150f24a8143", + "id": "nmdc:d76c80bf15c4fd84f28c7150f24a8143", + "file_size_bytes": 1336111813 + }, + { + "name": "Gp0115667_Centrifuge TSV report", + "description": "Centrifuge TSV report for Gp0115667", + "data_object_type": "Centrifuge Classification Report", + "url": "https://data.microbiomedata.org/data/nmdc:mga0n0je44/ReadbasedAnalysis/nmdc_mga0n0je44_centrifuge_report.tsv", + "md5_checksum": "b9d6d8a8297f9a604ac85a334a3412de", + "id": "nmdc:b9d6d8a8297f9a604ac85a334a3412de", + "file_size_bytes": 254506 + }, + { + "name": "Gp0115667_Centrifuge Krona HTML report", + "description": "Centrifuge Krona HTML report for Gp0115667", + "data_object_type": "Centrifuge Krona Plot", + "url": "https://data.microbiomedata.org/data/nmdc:mga0n0je44/ReadbasedAnalysis/nmdc_mga0n0je44_centrifuge_krona.html", + "md5_checksum": "fe4bd9f63c32f50676792e3c4adced08", + "id": "nmdc:fe4bd9f63c32f50676792e3c4adced08", + "file_size_bytes": 2323153 + }, + { + "name": "Gp0115667_Kraken classification TSV report", + "description": "Kraken classification TSV report for Gp0115667", + "data_object_type": "Kraken2 Taxonomic Classification", + "url": "https://data.microbiomedata.org/data/nmdc:mga0n0je44/ReadbasedAnalysis/nmdc_mga0n0je44_kraken2_classification.tsv", + "md5_checksum": "eb189cbf0543203d2521397b73d4d34b", + "id": "nmdc:eb189cbf0543203d2521397b73d4d34b", + "file_size_bytes": 1097852664 + }, + { + "name": "Gp0115667_Kraken2 TSV report", + "description": "Kraken2 TSV report for Gp0115667", + "data_object_type": "Kraken2 Classification Report", + "url": "https://data.microbiomedata.org/data/nmdc:mga0n0je44/ReadbasedAnalysis/nmdc_mga0n0je44_kraken2_report.tsv", + "md5_checksum": "ce3f002a824efde4a7134e6cd2e6306b", + "id": "nmdc:ce3f002a824efde4a7134e6cd2e6306b", + "file_size_bytes": 639213 + }, + { + "name": "Gp0115667_Kraken2 Krona HTML report", + "description": "Kraken2 Krona HTML report for Gp0115667", + "data_object_type": "Kraken2 Krona Plot", + "url": "https://data.microbiomedata.org/data/nmdc:mga0n0je44/ReadbasedAnalysis/nmdc_mga0n0je44_kraken2_krona.html", + "md5_checksum": "ac90bf3384ce44d097f7897ac5ff8134", + "id": "nmdc:ac90bf3384ce44d097f7897ac5ff8134", + "file_size_bytes": 3979807 + }, + { + "name": "Gp0115667_Gottcha2 TSV report", + "description": "Gottcha2 TSV report for Gp0115667", + "url": "https://data.microbiomedata.org/data/nmdc:mga0n0je44/ReadbasedAnalysis/nmdc_mga0n0je44_gottcha2_report.tsv", + "md5_checksum": "56edf81e5f5102edf7e416bc9430fbb6", + "id": "nmdc:56edf81e5f5102edf7e416bc9430fbb6", + "file_size_bytes": 10576 + }, + { + "name": "Gp0115667_Gottcha2 full TSV report", + "description": "Gottcha2 full TSV report for Gp0115667", + "url": "https://data.microbiomedata.org/data/nmdc:mga0n0je44/ReadbasedAnalysis/nmdc_mga0n0je44_gottcha2_report_full.tsv", + "md5_checksum": "c3d0f03afb44520ef5f2ea14e6daf705", + "id": "nmdc:c3d0f03afb44520ef5f2ea14e6daf705", + "file_size_bytes": 792905 + }, + { + "name": "Gp0115667_Gottcha2 Krona HTML report", + "description": "Gottcha2 Krona HTML report for Gp0115667", + "data_object_type": "GOTTCHA2 Krona Plot", + "url": "https://data.microbiomedata.org/data/nmdc:mga0n0je44/ReadbasedAnalysis/nmdc_mga0n0je44_gottcha2_krona.html", + "md5_checksum": "2afff209a40ca4895307f3a47080c534", + "id": "nmdc:2afff209a40ca4895307f3a47080c534", + "file_size_bytes": 254763 + }, + { + "name": "Gp0115667_Centrifuge classification TSV report", + "description": "Centrifuge classification TSV report for Gp0115667", + "data_object_type": "Centrifuge Taxonomic Classification", + "url": "https://data.microbiomedata.org/data/nmdc:mga0n0je44/ReadbasedAnalysis/nmdc_mga0n0je44_centrifuge_classification.tsv", + "md5_checksum": "d76c80bf15c4fd84f28c7150f24a8143", + "id": "nmdc:d76c80bf15c4fd84f28c7150f24a8143", + "file_size_bytes": 1336111813 + }, + { + "name": "Gp0115667_Centrifuge TSV report", + "description": "Centrifuge TSV report for Gp0115667", + "data_object_type": "Centrifuge Classification Report", + "url": "https://data.microbiomedata.org/data/nmdc:mga0n0je44/ReadbasedAnalysis/nmdc_mga0n0je44_centrifuge_report.tsv", + "md5_checksum": "b9d6d8a8297f9a604ac85a334a3412de", + "id": "nmdc:b9d6d8a8297f9a604ac85a334a3412de", + "file_size_bytes": 254506 + }, + { + "name": "Gp0115667_Centrifuge Krona HTML report", + "description": "Centrifuge Krona HTML report for Gp0115667", + "data_object_type": "Centrifuge Krona Plot", + "url": "https://data.microbiomedata.org/data/nmdc:mga0n0je44/ReadbasedAnalysis/nmdc_mga0n0je44_centrifuge_krona.html", + "md5_checksum": "fe4bd9f63c32f50676792e3c4adced08", + "id": "nmdc:fe4bd9f63c32f50676792e3c4adced08", + "file_size_bytes": 2323153 + }, + { + "name": "Gp0115667_Kraken classification TSV report", + "description": "Kraken classification TSV report for Gp0115667", + "data_object_type": "Kraken2 Taxonomic Classification", + "url": "https://data.microbiomedata.org/data/nmdc:mga0n0je44/ReadbasedAnalysis/nmdc_mga0n0je44_kraken2_classification.tsv", + "md5_checksum": "eb189cbf0543203d2521397b73d4d34b", + "id": "nmdc:eb189cbf0543203d2521397b73d4d34b", + "file_size_bytes": 1097852664 + }, + { + "name": "Gp0115667_Kraken2 TSV report", + "description": "Kraken2 TSV report for Gp0115667", + "data_object_type": "Kraken2 Classification Report", + "url": "https://data.microbiomedata.org/data/nmdc:mga0n0je44/ReadbasedAnalysis/nmdc_mga0n0je44_kraken2_report.tsv", + "md5_checksum": "ce3f002a824efde4a7134e6cd2e6306b", + "id": "nmdc:ce3f002a824efde4a7134e6cd2e6306b", + "file_size_bytes": 639213 + }, + { + "name": "Gp0115667_Kraken2 Krona HTML report", + "description": "Kraken2 Krona HTML report for Gp0115667", + "data_object_type": "Kraken2 Krona Plot", + "url": "https://data.microbiomedata.org/data/nmdc:mga0n0je44/ReadbasedAnalysis/nmdc_mga0n0je44_kraken2_krona.html", + "md5_checksum": "ac90bf3384ce44d097f7897ac5ff8134", + "id": "nmdc:ac90bf3384ce44d097f7897ac5ff8134", + "file_size_bytes": 3979807 + }, + { + "name": "Gp0115667_Assembled contigs fasta", + "description": "Assembled contigs fasta for Gp0115667", + "data_object_type": "Assembly Contigs", + "url": "https://data.microbiomedata.org/data/nmdc:mga0n0je44/assembly/nmdc_mga0n0je44_contigs.fna", + "md5_checksum": "b3cefc5a9599a4fb9432132baf7f5565", + "id": "nmdc:b3cefc5a9599a4fb9432132baf7f5565", + "file_size_bytes": 62926054 + }, + { + "name": "Gp0115667_Assembled scaffold fasta", + "description": "Assembled scaffold fasta for Gp0115667", + "data_object_type": "Assembly Scaffolds", + "url": "https://data.microbiomedata.org/data/nmdc:mga0n0je44/assembly/nmdc_mga0n0je44_scaffolds.fna", + "md5_checksum": "b60f674a01e3f7fff5ead95f330cef4f", + "id": "nmdc:b60f674a01e3f7fff5ead95f330cef4f", + "file_size_bytes": 62577490 + }, + { + "name": "Gp0115667_Metagenome Contig Coverage Stats", + "description": "Metagenome Contig Coverage Stats for Gp0115667", + "url": "https://data.microbiomedata.org/data/nmdc:mga0n0je44/assembly/nmdc_mga0n0je44_covstats.txt", + "md5_checksum": "2e4532cb03bb1e9201976b9d65893788", + "id": "nmdc:2e4532cb03bb1e9201976b9d65893788", + "file_size_bytes": 9189143 + }, + { + "name": "Gp0115667_Assembled AGP file", + "description": "Assembled AGP file for Gp0115667", + "data_object_type": "Assembly AGP", + "url": "https://data.microbiomedata.org/data/nmdc:mga0n0je44/assembly/nmdc_mga0n0je44_assembly.agp", + "md5_checksum": "e49f8a26a9cd0420b688c967bbacb4c6", + "id": "nmdc:e49f8a26a9cd0420b688c967bbacb4c6", + "file_size_bytes": 8508903 + }, + { + "name": "Gp0115667_Metagenome Alignment BAM file", + "description": "Metagenome Alignment BAM file for Gp0115667", + "data_object_type": "Assembly Coverage BAM", + "url": "https://data.microbiomedata.org/data/nmdc:mga0n0je44/assembly/nmdc_mga0n0je44_pairedMapped_sorted.bam", + "md5_checksum": "d9b957c7efe7f753fe67441d0be605c6", + "id": "nmdc:d9b957c7efe7f753fe67441d0be605c6", + "file_size_bytes": 1771039554 + }, + { + "name": "Gp0115667_Protein FAA", + "description": "Protein FAA for Gp0115667", + "data_object_type": "Annotation Amino Acid FASTA", + "url": "https://data.microbiomedata.org/data/nmdc:mga0n0je44/annotation/nmdc_mga0n0je44_proteins.faa", + "md5_checksum": "45e8b887fc06ddbf2af3ecf9c91a7bf7", + "id": "nmdc:45e8b887fc06ddbf2af3ecf9c91a7bf7", + "file_size_bytes": 31564336 + }, + { + "name": "Gp0115667_Structural annotation GFF file", + "description": "Structural annotation GFF file for Gp0115667", + "data_object_type": "Structural Annotation GFF", + "url": "https://data.microbiomedata.org/data/nmdc:mga0n0je44/annotation/nmdc_mga0n0je44_structural_annotation.gff", + "md5_checksum": "26ab4381753f685c44091e1f17d8bab5", + "id": "nmdc:26ab4381753f685c44091e1f17d8bab5", + "file_size_bytes": 2760 + }, + { + "name": "Gp0115667_Functional annotation GFF file", + "description": "Functional annotation GFF file for Gp0115667", + "data_object_type": "Functional Annotation GFF", + "url": "https://data.microbiomedata.org/data/nmdc:mga0n0je44/annotation/nmdc_mga0n0je44_functional_annotation.gff", + "md5_checksum": "5a378f3975ab6c2cf2a36b0b007ea3f8", + "id": "nmdc:5a378f3975ab6c2cf2a36b0b007ea3f8", + "file_size_bytes": 34525554 + }, + { + "name": "Gp0115667_KO TSV file", + "description": "KO TSV file for Gp0115667", + "data_object_type": "Annotation KEGG Orthology", + "url": "https://data.microbiomedata.org/data/nmdc:mga0n0je44/annotation/nmdc_mga0n0je44_ko.tsv", + "md5_checksum": "6df49253fee066c699d6a5191a0efaed", + "id": "nmdc:6df49253fee066c699d6a5191a0efaed", + "file_size_bytes": 3439857 + }, + { + "name": "Gp0115667_EC TSV file", + "description": "EC TSV file for Gp0115667", + "data_object_type": "Annotation Enzyme Commission", + "url": "https://data.microbiomedata.org/data/nmdc:mga0n0je44/annotation/nmdc_mga0n0je44_ec.tsv", + "md5_checksum": "5e35e51a595f892968e57681ee448e5f", + "id": "nmdc:5e35e51a595f892968e57681ee448e5f", + "file_size_bytes": 2203532 + }, + { + "name": "Gp0115667_COG GFF file", + "description": "COG GFF file for Gp0115667", + "url": "https://data.microbiomedata.org/data/nmdc:mga0n0je44/annotation/nmdc_mga0n0je44_cog.gff", + "md5_checksum": "ae1bc890152d28387f65c65d434b97ea", + "id": "nmdc:ae1bc890152d28387f65c65d434b97ea", + "file_size_bytes": 15384958 + }, + { + "name": "Gp0115667_PFAM GFF file", + "description": "PFAM GFF file for Gp0115667", + "url": "https://data.microbiomedata.org/data/nmdc:mga0n0je44/annotation/nmdc_mga0n0je44_pfam.gff", + "md5_checksum": "fb736eaba77cbd99135ddbc32168db94", + "id": "nmdc:fb736eaba77cbd99135ddbc32168db94", + "file_size_bytes": 12472999 + }, + { + "name": "Gp0115667_TigrFam GFF file", + "description": "TigrFam GFF file for Gp0115667", + "url": "https://data.microbiomedata.org/data/nmdc:mga0n0je44/annotation/nmdc_mga0n0je44_tigrfam.gff", + "md5_checksum": "3b00892f95bc4dedaf4384685a75d52f", + "id": "nmdc:3b00892f95bc4dedaf4384685a75d52f", + "file_size_bytes": 1755779 + }, + { + "name": "Gp0115667_SMART GFF file", + "description": "SMART GFF file for Gp0115667", + "url": "https://data.microbiomedata.org/data/nmdc:mga0n0je44/annotation/nmdc_mga0n0je44_smart.gff", + "md5_checksum": "b8c0d7c187169f34aafc17308aeea2ed", + "id": "nmdc:b8c0d7c187169f34aafc17308aeea2ed", + "file_size_bytes": 3937293 + }, + { + "name": "Gp0115667_SuperFam GFF file", + "description": "SuperFam GFF file for Gp0115667", + "url": "https://data.microbiomedata.org/data/nmdc:mga0n0je44/annotation/nmdc_mga0n0je44_supfam.gff", + "md5_checksum": "2a8e4bb3922ec664bbb5ce49a30cc87e", + "id": "nmdc:2a8e4bb3922ec664bbb5ce49a30cc87e", + "file_size_bytes": 22725250 + }, + { + "name": "Gp0115667_Cath FunFam GFF file", + "description": "Cath FunFam GFF file for Gp0115667", + "url": "https://data.microbiomedata.org/data/nmdc:mga0n0je44/annotation/nmdc_mga0n0je44_cath_funfam.gff", + "md5_checksum": "34eddc2289f3e3b4707a6c8060f6dd99", + "id": "nmdc:34eddc2289f3e3b4707a6c8060f6dd99", + "file_size_bytes": 17788890 + }, + { + "name": "Gp0115667_KO_EC GFF file", + "description": "KO_EC GFF file for Gp0115667", + "url": "https://data.microbiomedata.org/data/nmdc:mga0n0je44/annotation/nmdc_mga0n0je44_ko_ec.gff", + "md5_checksum": "0a51a22e2cf94c853657381549aa8f04", + "id": "nmdc:0a51a22e2cf94c853657381549aa8f04", + "file_size_bytes": 11004264 + }, + { + "name": "gold:Gp0452679_metabat2 bin checkm quality assessment result", + "description": "metabat2 bin checkm quality assessment result for gold:Gp0452679", + "data_object_type": "CheckM Statistics", + "url": "https://data.microbiomedata.org/data/nmdc:mga0fsjy84/MAGs/nmdc_mga0fsjy84_checkm_qa.out", + "md5_checksum": "d41d8cd98f00b204e9800998ecf8427e", + "id": "nmdc:d41d8cd98f00b204e9800998ecf8427e", + "file_size_bytes": 0 + }, + { + "name": "Gp0115667_tooShort (< 3kb) filtered contigs fasta file by metaBat2", + "description": "tooShort (< 3kb) filtered contigs fasta file by metaBat2 for Gp0115667", + "url": "https://data.microbiomedata.org/data/nmdc:mga0n0je44/MAGs/nmdc_mga0n0je44_bins.tooShort.fa", + "md5_checksum": "1277a6924ab380e001a7208e7ebbb0e3", + "id": "nmdc:1277a6924ab380e001a7208e7ebbb0e3", + "file_size_bytes": 46335107 + }, + { + "name": "Gp0115667_unbinned fasta file from metabat2", + "description": "unbinned fasta file from metabat2 for Gp0115667", + "url": "https://data.microbiomedata.org/data/nmdc:mga0n0je44/MAGs/nmdc_mga0n0je44_bins.unbinned.fa", + "md5_checksum": "48772112891988a2ef3f0c40786c11fd", + "id": "nmdc:48772112891988a2ef3f0c40786c11fd", + "file_size_bytes": 10701981 + }, + { + "name": "Gp0115667_metabat2 bin checkm quality assessment result", + "description": "metabat2 bin checkm quality assessment result for Gp0115667", + "data_object_type": "CheckM Statistics", + "url": "https://data.microbiomedata.org/data/nmdc:mga0n0je44/MAGs/nmdc_mga0n0je44_checkm_qa.out", + "md5_checksum": "527e2c19607c225a707db67b5be01b6f", + "id": "nmdc:527e2c19607c225a707db67b5be01b6f", + "file_size_bytes": 1360 + }, + { + "name": "Gp0115667_high-quality and medium-quality bins", + "description": "high-quality and medium-quality bins for Gp0115667", + "data_object_type": "Metagenome Bins", + "url": "https://data.microbiomedata.org/data/nmdc:mga0n0je44/MAGs/nmdc_mga0n0je44_hqmq_bin.zip", + "md5_checksum": "027626ff998bf1e495e32d09cab4bb08", + "id": "nmdc:027626ff998bf1e495e32d09cab4bb08", + "file_size_bytes": 1462611 }, + { + "name": "Gp0115667_metabat2 bins", + "description": "metabat2 bins for Gp0115667", + "url": "https://data.microbiomedata.org/data/nmdc:mga0n0je44/MAGs/nmdc_mga0n0je44_metabat_bin.zip", + "md5_checksum": "733e798989606c802b3bbfc952a38841", + "id": "nmdc:733e798989606c802b3bbfc952a38841", + "file_size_bytes": 334014 + } + ], + "dissolving_activity_set": [], + "functional_annotation_set": [], + "genome_feature_set": [], + "mags_activity_set": [ { "_id": { "$oid": "649b0052ec087f6bbab3472a" @@ -3495,618 +3458,648 @@ "unbinned_contig_num": 6177, "started_at_time": "2021-10-11T02:28:16Z", "type": "nmdc:MAGsAnalysisActivity", - "ended_at_time": "2021-10-11T03:58:24+00:00", - "output_data_objects": [ - { - "name": "gold:Gp0452679_metabat2 bin checkm quality assessment result", - "description": "metabat2 bin checkm quality assessment result for gold:Gp0452679", - "data_object_type": "CheckM Statistics", - "url": "https://data.microbiomedata.org/data/nmdc:mga0fsjy84/MAGs/nmdc_mga0fsjy84_checkm_qa.out", - "md5_checksum": "d41d8cd98f00b204e9800998ecf8427e", - "id": "nmdc:d41d8cd98f00b204e9800998ecf8427e", - "file_size_bytes": 0 - }, - { - "name": "Gp0115667_tooShort (< 3kb) filtered contigs fasta file by metaBat2", - "description": "tooShort (< 3kb) filtered contigs fasta file by metaBat2 for Gp0115667", - "url": "https://data.microbiomedata.org/data/nmdc:mga0n0je44/MAGs/nmdc_mga0n0je44_bins.tooShort.fa", - "md5_checksum": "1277a6924ab380e001a7208e7ebbb0e3", - "id": "nmdc:1277a6924ab380e001a7208e7ebbb0e3", - "file_size_bytes": 46335107 - }, - { - "name": "Gp0115667_unbinned fasta file from metabat2", - "description": "unbinned fasta file from metabat2 for Gp0115667", - "url": "https://data.microbiomedata.org/data/nmdc:mga0n0je44/MAGs/nmdc_mga0n0je44_bins.unbinned.fa", - "md5_checksum": "48772112891988a2ef3f0c40786c11fd", - "id": "nmdc:48772112891988a2ef3f0c40786c11fd", - "file_size_bytes": 10701981 - }, - { - "name": "Gp0115667_metabat2 bin checkm quality assessment result", - "description": "metabat2 bin checkm quality assessment result for Gp0115667", - "data_object_type": "CheckM Statistics", - "url": "https://data.microbiomedata.org/data/nmdc:mga0n0je44/MAGs/nmdc_mga0n0je44_checkm_qa.out", - "md5_checksum": "527e2c19607c225a707db67b5be01b6f", - "id": "nmdc:527e2c19607c225a707db67b5be01b6f", - "file_size_bytes": 1360 - }, - { - "name": "Gp0115667_high-quality and medium-quality bins", - "description": "high-quality and medium-quality bins for Gp0115667", - "data_object_type": "Metagenome Bins", - "url": "https://data.microbiomedata.org/data/nmdc:mga0n0je44/MAGs/nmdc_mga0n0je44_hqmq_bin.zip", - "md5_checksum": "027626ff998bf1e495e32d09cab4bb08", - "id": "nmdc:027626ff998bf1e495e32d09cab4bb08", - "file_size_bytes": 1462611 - }, - { - "name": "Gp0115667_metabat2 bins", - "description": "metabat2 bins for Gp0115667", - "url": "https://data.microbiomedata.org/data/nmdc:mga0n0je44/MAGs/nmdc_mga0n0je44_metabat_bin.zip", - "md5_checksum": "733e798989606c802b3bbfc952a38841", - "id": "nmdc:733e798989606c802b3bbfc952a38841", - "file_size_bytes": 334014 - } - ] + "ended_at_time": "2021-10-11T03:58:24+00:00" } - ] - }, - { - "_id": { - "$oid": "649b009773e8249959349b38" - }, - "id": "nmdc:omprc-11-hgehsc37", - "name": "Sand microcosm microbial communities from a hyporheic zone in Columbia River, Washington, USA - GW-RW T2_25-Nov-14", - "description": "Sterilized sand packs were incubated back in the ground and collected at time point T2.", - "has_input": [ - "nmdc:bsm-11-qxntpg05" - ], - "has_output": [ - "jgi:55d817f20d8785342fcf826c" - ], - "part_of": [ - "nmdc:sty-11-aygzgv51" - ], - "add_date": "2015-05-28", - "mod_date": "2021-06-15", - "ncbi_project_name": "Sand microcosm microbial communities from a hyporheic zone in Columbia River, Washington, USA - GW-RW T2_25-Nov-14", - "omics_type": { - "has_raw_value": "Metagenome" - }, - "principal_investigator": { - "has_raw_value": "James Stegen" - }, - "processing_institution": "JGI", - "type": "nmdc:OmicsProcessing", - "gold_sequencing_project_identifiers": [ - "gold:Gp0115664" - ], - "downstream_workflow_activity_records": [ + ], + "material_sample_set": [], + "material_sampling_activity_set": [], + "metabolomics_analysis_activity_set": [], + "metagenome_annotation_activity_set": [ { "_id": { - "$oid": "649b009d6bdd4fd20273c87b" + "$oid": "649b005bbf2caae0415ef9cc" }, "has_input": [ - "nmdc:86929bf5b2afcb965129dcf0eae2d8fc" + "nmdc:b3cefc5a9599a4fb9432132baf7f5565" ], "part_of": [ - "nmdc:mga0dm3v04" + "nmdc:mga0n0je44" ], "git_url": "https://github.com/microbiomedata/metaG/releases/tag/0.1", "has_output": [ - "nmdc:232e31505b6a0251df2303c0563d64c1", - "nmdc:f3f4f75f19c92af6e98d2b45cccaacd5" + "nmdc:45e8b887fc06ddbf2af3ecf9c91a7bf7", + "nmdc:26ab4381753f685c44091e1f17d8bab5", + "nmdc:5a378f3975ab6c2cf2a36b0b007ea3f8", + "nmdc:6df49253fee066c699d6a5191a0efaed", + "nmdc:5e35e51a595f892968e57681ee448e5f", + "nmdc:ae1bc890152d28387f65c65d434b97ea", + "nmdc:fb736eaba77cbd99135ddbc32168db94", + "nmdc:3b00892f95bc4dedaf4384685a75d52f", + "nmdc:b8c0d7c187169f34aafc17308aeea2ed", + "nmdc:2a8e4bb3922ec664bbb5ce49a30cc87e", + "nmdc:34eddc2289f3e3b4707a6c8060f6dd99", + "nmdc:0a51a22e2cf94c853657381549aa8f04" ], - "was_informed_by": "gold:Gp0115664", - "input_read_count": 19058974, - "output_read_bases": 2597325375, - "id": "nmdc:10c6e87f449fdc27c6b8bfbc9e25d6a3", + "was_informed_by": "gold:Gp0115667", + "id": "nmdc:8093869c91384d3299431e56019f7de0", "execution_resource": "NERSC-Cori", - "input_read_bases": 2877905074, - "name": "Read QC Activity for nmdc:mga0dm3v04", - "output_read_count": 17338778, + "name": "Annotation Activity for nmdc:mga0n0je44", "started_at_time": "2021-10-11T02:28:16Z", - "type": "nmdc:ReadQCAnalysisActivity", - "ended_at_time": "2021-10-11T03:33:34+00:00", - "output_data_objects": [ - { - "name": "Gp0115664_Filtered Reads", - "description": "Filtered Reads for Gp0115664", - "data_object_type": "Filtered Sequencing Reads", - "url": "https://data.microbiomedata.org/data/nmdc:mga0dm3v04/qa/nmdc_mga0dm3v04_filtered.fastq.gz", - "md5_checksum": "232e31505b6a0251df2303c0563d64c1", - "id": "nmdc:232e31505b6a0251df2303c0563d64c1", - "file_size_bytes": 1566732675 - }, - { - "name": "Gp0115664_Filtered Stats", - "description": "Filtered Stats for Gp0115664", - "data_object_type": "QC Statistics", - "url": "https://data.microbiomedata.org/data/nmdc:mga0dm3v04/qa/nmdc_mga0dm3v04_filterStats.txt", - "md5_checksum": "f3f4f75f19c92af6e98d2b45cccaacd5", - "id": "nmdc:f3f4f75f19c92af6e98d2b45cccaacd5", - "file_size_bytes": 289 - } - ] - }, + "type": "nmdc:MetagenomeAnnotationActivity", + "ended_at_time": "2021-10-11T03:58:24+00:00" + } + ], + "metagenome_assembly_set": [ { "_id": { - "$oid": "649b009bff710ae353f8cf3f" + "$oid": "649b005f2ca5ee4adb139fb7" }, "has_input": [ - "nmdc:232e31505b6a0251df2303c0563d64c1" + "nmdc:7d4057e3a44a05171c13fb0ed3e2294a" + ], + "part_of": [ + "nmdc:mga0n0je44" ], + "ctg_logsum": 195440, + "scaf_logsum": 196103, + "gap_pct": 0.00293, "git_url": "https://github.com/microbiomedata/metaG/releases/tag/0.1", "has_output": [ - "nmdc:9d61d9f0c31a98f88ad8cde86254148d", - "nmdc:7f93f97242aed036019f13492f5af35c", - "nmdc:b4d0179bcc68b5186a3544d9ee0c6941", - "nmdc:a4243f71a0288f489c566ae85d85891d", - "nmdc:f8b6ef830b94c6470056a3cd0a0eafc1", - "nmdc:a80779b32415ef001d0403f0b618b612", - "nmdc:01581429336a43d7dc2f85b8d49d6c6e", - "nmdc:ce47d6686edb7b3472102d5883229c45", - "nmdc:29b75e78b0b7fd8115614d8e9d341d46" + "nmdc:b3cefc5a9599a4fb9432132baf7f5565", + "nmdc:b60f674a01e3f7fff5ead95f330cef4f", + "nmdc:2e4532cb03bb1e9201976b9d65893788", + "nmdc:e49f8a26a9cd0420b688c967bbacb4c6", + "nmdc:d9b957c7efe7f753fe67441d0be605c6" ], - "was_informed_by": "gold:Gp0115664", - "id": "nmdc:10c6e87f449fdc27c6b8bfbc9e25d6a3", - "execution_resource": "NERSC-Cori", - "name": "ReadBased Analysis Activity for nmdc:mga0dm3v04", + "asm_score": 17.061, + "was_informed_by": "gold:Gp0115667", + "ctg_powsum": 25448, + "scaf_max": 245816, + "id": "nmdc:8093869c91384d3299431e56019f7de0", + "scaf_powsum": 25552, + "execution_resource": "NERSC-Cori", + "contigs": 116132, + "name": "Assembly Activity for nmdc:mga0n0je44", + "ctg_max": 245816, + "gc_std": 0.12277, + "contig_bp": 58413782, + "gc_avg": 0.47644, "started_at_time": "2021-10-11T02:28:16Z", - "type": "nmdc:ReadbasedAnalysis", - "ended_at_time": "2021-10-11T03:33:34+00:00", - "output_data_objects": [ - { - "name": "Gp0115664_Gottcha2 TSV report", - "description": "Gottcha2 TSV report for Gp0115664", - "url": "https://data.microbiomedata.org/data/nmdc:mga0dm3v04/ReadbasedAnalysis/nmdc_mga0dm3v04_gottcha2_report.tsv", - "md5_checksum": "9d61d9f0c31a98f88ad8cde86254148d", - "id": "nmdc:9d61d9f0c31a98f88ad8cde86254148d", - "file_size_bytes": 9591 - }, - { - "name": "Gp0115664_Gottcha2 full TSV report", - "description": "Gottcha2 full TSV report for Gp0115664", - "url": "https://data.microbiomedata.org/data/nmdc:mga0dm3v04/ReadbasedAnalysis/nmdc_mga0dm3v04_gottcha2_report_full.tsv", - "md5_checksum": "7f93f97242aed036019f13492f5af35c", - "id": "nmdc:7f93f97242aed036019f13492f5af35c", - "file_size_bytes": 885985 - }, - { - "name": "Gp0115664_Gottcha2 Krona HTML report", - "description": "Gottcha2 Krona HTML report for Gp0115664", - "data_object_type": "GOTTCHA2 Krona Plot", - "url": "https://data.microbiomedata.org/data/nmdc:mga0dm3v04/ReadbasedAnalysis/nmdc_mga0dm3v04_gottcha2_krona.html", - "md5_checksum": "b4d0179bcc68b5186a3544d9ee0c6941", - "id": "nmdc:b4d0179bcc68b5186a3544d9ee0c6941", - "file_size_bytes": 251303 - }, - { - "name": "Gp0115664_Centrifuge classification TSV report", - "description": "Centrifuge classification TSV report for Gp0115664", - "data_object_type": "Centrifuge Taxonomic Classification", - "url": "https://data.microbiomedata.org/data/nmdc:mga0dm3v04/ReadbasedAnalysis/nmdc_mga0dm3v04_centrifuge_classification.tsv", - "md5_checksum": "a4243f71a0288f489c566ae85d85891d", - "id": "nmdc:a4243f71a0288f489c566ae85d85891d", - "file_size_bytes": 1268144933 - }, - { - "name": "Gp0115664_Centrifuge TSV report", - "description": "Centrifuge TSV report for Gp0115664", - "data_object_type": "Centrifuge Classification Report", - "url": "https://data.microbiomedata.org/data/nmdc:mga0dm3v04/ReadbasedAnalysis/nmdc_mga0dm3v04_centrifuge_report.tsv", - "md5_checksum": "f8b6ef830b94c6470056a3cd0a0eafc1", - "id": "nmdc:f8b6ef830b94c6470056a3cd0a0eafc1", - "file_size_bytes": 254575 - }, - { - "name": "Gp0115664_Centrifuge Krona HTML report", - "description": "Centrifuge Krona HTML report for Gp0115664", - "data_object_type": "Centrifuge Krona Plot", - "url": "https://data.microbiomedata.org/data/nmdc:mga0dm3v04/ReadbasedAnalysis/nmdc_mga0dm3v04_centrifuge_krona.html", - "md5_checksum": "a80779b32415ef001d0403f0b618b612", - "id": "nmdc:a80779b32415ef001d0403f0b618b612", - "file_size_bytes": 2327293 - }, - { - "name": "Gp0115664_Kraken classification TSV report", - "description": "Kraken classification TSV report for Gp0115664", - "data_object_type": "Kraken2 Taxonomic Classification", - "url": "https://data.microbiomedata.org/data/nmdc:mga0dm3v04/ReadbasedAnalysis/nmdc_mga0dm3v04_kraken2_classification.tsv", - "md5_checksum": "01581429336a43d7dc2f85b8d49d6c6e", - "id": "nmdc:01581429336a43d7dc2f85b8d49d6c6e", - "file_size_bytes": 1037932028 - }, - { - "name": "Gp0115664_Kraken2 TSV report", - "description": "Kraken2 TSV report for Gp0115664", - "data_object_type": "Kraken2 Classification Report", - "url": "https://data.microbiomedata.org/data/nmdc:mga0dm3v04/ReadbasedAnalysis/nmdc_mga0dm3v04_kraken2_report.tsv", - "md5_checksum": "ce47d6686edb7b3472102d5883229c45", - "id": "nmdc:ce47d6686edb7b3472102d5883229c45", - "file_size_bytes": 641242 - }, - { - "name": "Gp0115664_Kraken2 Krona HTML report", - "description": "Kraken2 Krona HTML report for Gp0115664", - "data_object_type": "Kraken2 Krona Plot", - "url": "https://data.microbiomedata.org/data/nmdc:mga0dm3v04/ReadbasedAnalysis/nmdc_mga0dm3v04_kraken2_krona.html", - "md5_checksum": "29b75e78b0b7fd8115614d8e9d341d46", - "id": "nmdc:29b75e78b0b7fd8115614d8e9d341d46", - "file_size_bytes": 3995680 - } - ] - }, + "scaf_bp": 58415492, + "type": "nmdc:MetagenomeAssembly", + "scaffolds": 116033, + "ended_at_time": "2021-10-11T03:58:24+00:00", + "ctg_l50": 479, + "ctg_l90": 286, + "ctg_n50": 26909, + "ctg_n90": 95138, + "scaf_l50": 479, + "scaf_l90": 286, + "scaf_n50": 26889, + "scaf_n90": 95057, + "scaf_l_gt50k": 1865703, + "scaf_n_gt50k": 17, + "scaf_pct_gt50k": 3.1938498 + } + ], + "metagenome_sequencing_activity_set": [], + "metaproteomics_analysis_activity_set": [], + "metatranscriptome_activity_set": [], + "nom_analysis_activity_set": [], + "omics_processing_set": [ { "_id": { - "$oid": "61e719dd833bcf838a70154e" + "$oid": "649b009773e8249959349b37" }, + "id": "nmdc:omprc-11-yt8css91", + "name": "Sand microcosm microbial communities from a hyporheic zone in Columbia River, Washington, USA - GW-RW T3_25-Nov-14", + "description": "Sterilized sand packs were incubated back in the ground and collected at time point T3.", "has_input": [ - "nmdc:232e31505b6a0251df2303c0563d64c1" - ], - "part_of": [ - "nmdc:mga0dm3v04" + "nmdc:bsm-11-ynevd369" ], - "git_url": "https://github.com/microbiomedata/metaG/releases/tag/0.1", "has_output": [ - "nmdc:9d61d9f0c31a98f88ad8cde86254148d", - "nmdc:7f93f97242aed036019f13492f5af35c", - "nmdc:b4d0179bcc68b5186a3544d9ee0c6941", - "nmdc:a4243f71a0288f489c566ae85d85891d", - "nmdc:f8b6ef830b94c6470056a3cd0a0eafc1", - "nmdc:a80779b32415ef001d0403f0b618b612", - "nmdc:01581429336a43d7dc2f85b8d49d6c6e", - "nmdc:ce47d6686edb7b3472102d5883229c45", - "nmdc:29b75e78b0b7fd8115614d8e9d341d46" + "jgi:55d818010d8785342fcf8278" ], - "was_informed_by": "gold:Gp0115664", - "id": "nmdc:10c6e87f449fdc27c6b8bfbc9e25d6a3", - "execution_resource": "NERSC-Cori", - "name": "ReadBased Analysis Activity for nmdc:mga0dm3v04", - "started_at_time": "2021-10-11T02:28:16Z", - "type": "nmdc:ReadbasedAnalysis", - "ended_at_time": "2021-10-11T03:33:34+00:00", - "output_data_objects": [ - { - "name": "Gp0115664_Gottcha2 TSV report", - "description": "Gottcha2 TSV report for Gp0115664", - "url": "https://data.microbiomedata.org/data/nmdc:mga0dm3v04/ReadbasedAnalysis/nmdc_mga0dm3v04_gottcha2_report.tsv", - "md5_checksum": "9d61d9f0c31a98f88ad8cde86254148d", - "id": "nmdc:9d61d9f0c31a98f88ad8cde86254148d", - "file_size_bytes": 9591 - }, - { - "name": "Gp0115664_Gottcha2 full TSV report", - "description": "Gottcha2 full TSV report for Gp0115664", - "url": "https://data.microbiomedata.org/data/nmdc:mga0dm3v04/ReadbasedAnalysis/nmdc_mga0dm3v04_gottcha2_report_full.tsv", - "md5_checksum": "7f93f97242aed036019f13492f5af35c", - "id": "nmdc:7f93f97242aed036019f13492f5af35c", - "file_size_bytes": 885985 - }, - { - "name": "Gp0115664_Gottcha2 Krona HTML report", - "description": "Gottcha2 Krona HTML report for Gp0115664", - "data_object_type": "GOTTCHA2 Krona Plot", - "url": "https://data.microbiomedata.org/data/nmdc:mga0dm3v04/ReadbasedAnalysis/nmdc_mga0dm3v04_gottcha2_krona.html", - "md5_checksum": "b4d0179bcc68b5186a3544d9ee0c6941", - "id": "nmdc:b4d0179bcc68b5186a3544d9ee0c6941", - "file_size_bytes": 251303 - }, - { - "name": "Gp0115664_Centrifuge classification TSV report", - "description": "Centrifuge classification TSV report for Gp0115664", - "data_object_type": "Centrifuge Taxonomic Classification", - "url": "https://data.microbiomedata.org/data/nmdc:mga0dm3v04/ReadbasedAnalysis/nmdc_mga0dm3v04_centrifuge_classification.tsv", - "md5_checksum": "a4243f71a0288f489c566ae85d85891d", - "id": "nmdc:a4243f71a0288f489c566ae85d85891d", - "file_size_bytes": 1268144933 - }, - { - "name": "Gp0115664_Centrifuge TSV report", - "description": "Centrifuge TSV report for Gp0115664", - "data_object_type": "Centrifuge Classification Report", - "url": "https://data.microbiomedata.org/data/nmdc:mga0dm3v04/ReadbasedAnalysis/nmdc_mga0dm3v04_centrifuge_report.tsv", - "md5_checksum": "f8b6ef830b94c6470056a3cd0a0eafc1", - "id": "nmdc:f8b6ef830b94c6470056a3cd0a0eafc1", - "file_size_bytes": 254575 - }, - { - "name": "Gp0115664_Centrifuge Krona HTML report", - "description": "Centrifuge Krona HTML report for Gp0115664", - "data_object_type": "Centrifuge Krona Plot", - "url": "https://data.microbiomedata.org/data/nmdc:mga0dm3v04/ReadbasedAnalysis/nmdc_mga0dm3v04_centrifuge_krona.html", - "md5_checksum": "a80779b32415ef001d0403f0b618b612", - "id": "nmdc:a80779b32415ef001d0403f0b618b612", - "file_size_bytes": 2327293 - }, - { - "name": "Gp0115664_Kraken classification TSV report", - "description": "Kraken classification TSV report for Gp0115664", - "data_object_type": "Kraken2 Taxonomic Classification", - "url": "https://data.microbiomedata.org/data/nmdc:mga0dm3v04/ReadbasedAnalysis/nmdc_mga0dm3v04_kraken2_classification.tsv", - "md5_checksum": "01581429336a43d7dc2f85b8d49d6c6e", - "id": "nmdc:01581429336a43d7dc2f85b8d49d6c6e", - "file_size_bytes": 1037932028 - }, - { - "name": "Gp0115664_Kraken2 TSV report", - "description": "Kraken2 TSV report for Gp0115664", - "data_object_type": "Kraken2 Classification Report", - "url": "https://data.microbiomedata.org/data/nmdc:mga0dm3v04/ReadbasedAnalysis/nmdc_mga0dm3v04_kraken2_report.tsv", - "md5_checksum": "ce47d6686edb7b3472102d5883229c45", - "id": "nmdc:ce47d6686edb7b3472102d5883229c45", - "file_size_bytes": 641242 - }, - { - "name": "Gp0115664_Kraken2 Krona HTML report", - "description": "Kraken2 Krona HTML report for Gp0115664", - "data_object_type": "Kraken2 Krona Plot", - "url": "https://data.microbiomedata.org/data/nmdc:mga0dm3v04/ReadbasedAnalysis/nmdc_mga0dm3v04_kraken2_krona.html", - "md5_checksum": "29b75e78b0b7fd8115614d8e9d341d46", - "id": "nmdc:29b75e78b0b7fd8115614d8e9d341d46", - "file_size_bytes": 3995680 - } + "part_of": [ + "nmdc:sty-11-aygzgv51" + ], + "add_date": "2015-05-28", + "mod_date": "2021-06-15", + "ncbi_project_name": "Sand microcosm microbial communities from a hyporheic zone in Columbia River, Washington, USA - GW-RW T3_25-Nov-14", + "omics_type": { + "has_raw_value": "Metagenome" + }, + "principal_investigator": { + "has_raw_value": "James Stegen" + }, + "processing_institution": "JGI", + "type": "nmdc:OmicsProcessing", + "gold_sequencing_project_identifiers": [ + "gold:Gp0115667" ] - }, + } + ], + "reaction_activity_set": [], + "read_qc_analysis_activity_set": [ { "_id": { - "$oid": "649b005f2ca5ee4adb139fb0" + "$oid": "649b009d6bdd4fd20273c882" }, "has_input": [ - "nmdc:232e31505b6a0251df2303c0563d64c1" + "nmdc:cb2e0605e8f22a398d982e35aee57715" ], "part_of": [ - "nmdc:mga0dm3v04" + "nmdc:mga0n0je44" ], - "ctg_logsum": 60365, - "scaf_logsum": 60806, - "gap_pct": 0.00196, "git_url": "https://github.com/microbiomedata/metaG/releases/tag/0.1", "has_output": [ - "nmdc:3faf965a2e745048afed5d1c065a78c4", - "nmdc:2d99daff632b19ebdea3f3e5784e2fbc", - "nmdc:d8f255300e5f214baad3c3b4b3c0b51b", - "nmdc:1f9a75569aedc406a3db8ff779b03c19", - "nmdc:faeb84260d97f23162a6176b9442a5c8" + "nmdc:7d4057e3a44a05171c13fb0ed3e2294a", + "nmdc:dae7c6e067f69ef6db39b4240cc450ba" ], - "asm_score": 4.21, - "was_informed_by": "gold:Gp0115664", - "ctg_powsum": 6668.288, - "scaf_max": 15348, - "id": "nmdc:10c6e87f449fdc27c6b8bfbc9e25d6a3", - "scaf_powsum": 6720.964, + "was_informed_by": "gold:Gp0115667", + "input_read_count": 19416222, + "output_read_bases": 2825090769, + "id": "nmdc:8093869c91384d3299431e56019f7de0", "execution_resource": "NERSC-Cori", - "contigs": 78376, - "name": "Assembly Activity for nmdc:mga0dm3v04", - "ctg_max": 15348, - "gc_std": 0.11459, - "contig_bp": 33088752, - "gc_avg": 0.5432, + "input_read_bases": 2931849522, + "name": "Read QC Activity for nmdc:mga0n0je44", + "output_read_count": 18855352, "started_at_time": "2021-10-11T02:28:16Z", - "scaf_bp": 33089402, - "type": "nmdc:MetagenomeAssembly", - "scaffolds": 78311, - "ended_at_time": "2021-10-11T03:33:34+00:00", - "ctg_l50": 377, - "ctg_l90": 283, - "ctg_n50": 23883, - "ctg_n90": 67231, - "scaf_l50": 377, - "scaf_l90": 283, - "scaf_n50": 23850, - "scaf_n90": 67169, - "output_data_objects": [ - { - "name": "Gp0115664_Assembled contigs fasta", - "description": "Assembled contigs fasta for Gp0115664", - "data_object_type": "Assembly Contigs", - "url": "https://data.microbiomedata.org/data/nmdc:mga0dm3v04/assembly/nmdc_mga0dm3v04_contigs.fna", - "md5_checksum": "3faf965a2e745048afed5d1c065a78c4", - "id": "nmdc:3faf965a2e745048afed5d1c065a78c4", - "file_size_bytes": 36012597 - }, - { - "name": "Gp0115664_Assembled scaffold fasta", - "description": "Assembled scaffold fasta for Gp0115664", - "data_object_type": "Assembly Scaffolds", - "url": "https://data.microbiomedata.org/data/nmdc:mga0dm3v04/assembly/nmdc_mga0dm3v04_scaffolds.fna", - "md5_checksum": "2d99daff632b19ebdea3f3e5784e2fbc", - "id": "nmdc:2d99daff632b19ebdea3f3e5784e2fbc", - "file_size_bytes": 35776428 - }, - { - "name": "Gp0115664_Metagenome Contig Coverage Stats", - "description": "Metagenome Contig Coverage Stats for Gp0115664", - "url": "https://data.microbiomedata.org/data/nmdc:mga0dm3v04/assembly/nmdc_mga0dm3v04_covstats.txt", - "md5_checksum": "d8f255300e5f214baad3c3b4b3c0b51b", - "id": "nmdc:d8f255300e5f214baad3c3b4b3c0b51b", - "file_size_bytes": 6143277 - }, - { - "name": "Gp0115664_Assembled AGP file", - "description": "Assembled AGP file for Gp0115664", - "data_object_type": "Assembly AGP", - "url": "https://data.microbiomedata.org/data/nmdc:mga0dm3v04/assembly/nmdc_mga0dm3v04_assembly.agp", - "md5_checksum": "1f9a75569aedc406a3db8ff779b03c19", - "id": "nmdc:1f9a75569aedc406a3db8ff779b03c19", - "file_size_bytes": 5710214 - }, - { - "name": "Gp0115664_Metagenome Alignment BAM file", - "description": "Metagenome Alignment BAM file for Gp0115664", - "data_object_type": "Assembly Coverage BAM", - "url": "https://data.microbiomedata.org/data/nmdc:mga0dm3v04/assembly/nmdc_mga0dm3v04_pairedMapped_sorted.bam", - "md5_checksum": "faeb84260d97f23162a6176b9442a5c8", - "id": "nmdc:faeb84260d97f23162a6176b9442a5c8", - "file_size_bytes": 1670248615 - } - ] - }, + "type": "nmdc:ReadQCAnalysisActivity", + "ended_at_time": "2021-10-11T03:58:24+00:00" + } + ], + "read_based_taxonomy_analysis_activity_set": [ { "_id": { - "$oid": "649b005bbf2caae0415ef9c5" + "$oid": "649b009bff710ae353f8cf4a" }, "has_input": [ - "nmdc:3faf965a2e745048afed5d1c065a78c4" - ], - "part_of": [ - "nmdc:mga0dm3v04" + "nmdc:7d4057e3a44a05171c13fb0ed3e2294a" ], "git_url": "https://github.com/microbiomedata/metaG/releases/tag/0.1", "has_output": [ - "nmdc:338a8f2f739dfc89557e090d604302f6", - "nmdc:0ce03dd69826edcc8b5f6dd01ca176dc", - "nmdc:dc720d27299f6f5c1d38c4dcf1dfc8db", - "nmdc:bc7f7df6865acffd4e07f8b592573eb9", - "nmdc:be38bedd77ab3c072bafbb2c201c953d", - "nmdc:d7318549a735853b679d15171f5c7ea7", - "nmdc:c1617e0980c6e52149692aee39e30f8c", - "nmdc:bd5a9b5e55605ece8873d6ac05e76e0d", - "nmdc:eb1fba5cad14c3e211baa2de796bca2e", - "nmdc:2146449222f410a286e4786bf19c9a5e", - "nmdc:20ced78c72f67d064bddcc8d5534ebb6", - "nmdc:7ffe90ceb10c9f40f755aa8d7aa30170" + "nmdc:56edf81e5f5102edf7e416bc9430fbb6", + "nmdc:c3d0f03afb44520ef5f2ea14e6daf705", + "nmdc:2afff209a40ca4895307f3a47080c534", + "nmdc:d76c80bf15c4fd84f28c7150f24a8143", + "nmdc:b9d6d8a8297f9a604ac85a334a3412de", + "nmdc:fe4bd9f63c32f50676792e3c4adced08", + "nmdc:eb189cbf0543203d2521397b73d4d34b", + "nmdc:ce3f002a824efde4a7134e6cd2e6306b", + "nmdc:ac90bf3384ce44d097f7897ac5ff8134" ], - "was_informed_by": "gold:Gp0115664", - "id": "nmdc:10c6e87f449fdc27c6b8bfbc9e25d6a3", + "was_informed_by": "gold:Gp0115667", + "id": "nmdc:8093869c91384d3299431e56019f7de0", "execution_resource": "NERSC-Cori", - "name": "Annotation Activity for nmdc:mga0dm3v04", + "name": "ReadBased Analysis Activity for nmdc:mga0n0je44", "started_at_time": "2021-10-11T02:28:16Z", - "type": "nmdc:MetagenomeAnnotationActivity", - "ended_at_time": "2021-10-11T03:33:34+00:00", - "output_data_objects": [ - { - "name": "Gp0115664_Protein FAA", - "description": "Protein FAA for Gp0115664", - "data_object_type": "Annotation Amino Acid FASTA", - "url": "https://data.microbiomedata.org/data/nmdc:mga0dm3v04/annotation/nmdc_mga0dm3v04_proteins.faa", - "md5_checksum": "338a8f2f739dfc89557e090d604302f6", - "id": "nmdc:338a8f2f739dfc89557e090d604302f6", - "file_size_bytes": 21010319 - }, - { - "name": "Gp0115664_Structural annotation GFF file", - "description": "Structural annotation GFF file for Gp0115664", - "data_object_type": "Structural Annotation GFF", - "url": "https://data.microbiomedata.org/data/nmdc:mga0dm3v04/annotation/nmdc_mga0dm3v04_structural_annotation.gff", - "md5_checksum": "0ce03dd69826edcc8b5f6dd01ca176dc", - "id": "nmdc:0ce03dd69826edcc8b5f6dd01ca176dc", - "file_size_bytes": 2497 - }, - { - "name": "Gp0115664_Functional annotation GFF file", - "description": "Functional annotation GFF file for Gp0115664", - "data_object_type": "Functional Annotation GFF", - "url": "https://data.microbiomedata.org/data/nmdc:mga0dm3v04/annotation/nmdc_mga0dm3v04_functional_annotation.gff", - "md5_checksum": "dc720d27299f6f5c1d38c4dcf1dfc8db", - "id": "nmdc:dc720d27299f6f5c1d38c4dcf1dfc8db", - "file_size_bytes": 24426623 - }, - { - "name": "Gp0115664_KO TSV file", - "description": "KO TSV file for Gp0115664", - "data_object_type": "Annotation KEGG Orthology", - "url": "https://data.microbiomedata.org/data/nmdc:mga0dm3v04/annotation/nmdc_mga0dm3v04_ko.tsv", - "md5_checksum": "bc7f7df6865acffd4e07f8b592573eb9", - "id": "nmdc:bc7f7df6865acffd4e07f8b592573eb9", - "file_size_bytes": 2875393 - }, - { - "name": "Gp0115664_EC TSV file", - "description": "EC TSV file for Gp0115664", - "data_object_type": "Annotation Enzyme Commission", - "url": "https://data.microbiomedata.org/data/nmdc:mga0dm3v04/annotation/nmdc_mga0dm3v04_ec.tsv", - "md5_checksum": "be38bedd77ab3c072bafbb2c201c953d", - "id": "nmdc:be38bedd77ab3c072bafbb2c201c953d", - "file_size_bytes": 1882878 - }, - { - "name": "Gp0115664_COG GFF file", - "description": "COG GFF file for Gp0115664", - "url": "https://data.microbiomedata.org/data/nmdc:mga0dm3v04/annotation/nmdc_mga0dm3v04_cog.gff", - "md5_checksum": "d7318549a735853b679d15171f5c7ea7", - "id": "nmdc:d7318549a735853b679d15171f5c7ea7", - "file_size_bytes": 12475107 - }, - { - "name": "Gp0115664_PFAM GFF file", - "description": "PFAM GFF file for Gp0115664", - "url": "https://data.microbiomedata.org/data/nmdc:mga0dm3v04/annotation/nmdc_mga0dm3v04_pfam.gff", - "md5_checksum": "c1617e0980c6e52149692aee39e30f8c", - "id": "nmdc:c1617e0980c6e52149692aee39e30f8c", - "file_size_bytes": 9305713 - }, - { - "name": "Gp0115664_TigrFam GFF file", - "description": "TigrFam GFF file for Gp0115664", - "url": "https://data.microbiomedata.org/data/nmdc:mga0dm3v04/annotation/nmdc_mga0dm3v04_tigrfam.gff", - "md5_checksum": "bd5a9b5e55605ece8873d6ac05e76e0d", - "id": "nmdc:bd5a9b5e55605ece8873d6ac05e76e0d", - "file_size_bytes": 1181236 - }, - { - "name": "Gp0115664_SMART GFF file", - "description": "SMART GFF file for Gp0115664", - "url": "https://data.microbiomedata.org/data/nmdc:mga0dm3v04/annotation/nmdc_mga0dm3v04_smart.gff", - "md5_checksum": "eb1fba5cad14c3e211baa2de796bca2e", - "id": "nmdc:eb1fba5cad14c3e211baa2de796bca2e", - "file_size_bytes": 2718910 - }, - { - "name": "Gp0115664_SuperFam GFF file", - "description": "SuperFam GFF file for Gp0115664", - "url": "https://data.microbiomedata.org/data/nmdc:mga0dm3v04/annotation/nmdc_mga0dm3v04_supfam.gff", - "md5_checksum": "2146449222f410a286e4786bf19c9a5e", - "id": "nmdc:2146449222f410a286e4786bf19c9a5e", - "file_size_bytes": 16463047 - }, - { - "name": "Gp0115664_Cath FunFam GFF file", - "description": "Cath FunFam GFF file for Gp0115664", - "url": "https://data.microbiomedata.org/data/nmdc:mga0dm3v04/annotation/nmdc_mga0dm3v04_cath_funfam.gff", - "md5_checksum": "20ced78c72f67d064bddcc8d5534ebb6", - "id": "nmdc:20ced78c72f67d064bddcc8d5534ebb6", - "file_size_bytes": 12501882 - }, - { - "name": "Gp0115664_KO_EC GFF file", - "description": "KO_EC GFF file for Gp0115664", - "url": "https://data.microbiomedata.org/data/nmdc:mga0dm3v04/annotation/nmdc_mga0dm3v04_ko_ec.gff", - "md5_checksum": "7ffe90ceb10c9f40f755aa8d7aa30170", - "id": "nmdc:7ffe90ceb10c9f40f755aa8d7aa30170", - "file_size_bytes": 9217314 - } - ] - }, + "type": "nmdc:ReadbasedAnalysis", + "ended_at_time": "2021-10-11T03:58:24+00:00" + } + ], + "study_set": [], + "field_research_site_set": [], + "collecting_biosamples_from_site_set": [], + "date_created": null, + "etl_software_version": null, + "pooling_set": [], + "read_based_analysis_activity_set": [ { "_id": { - "$oid": "649b0052ec087f6bbab34723" + "$oid": "61e71a10833bcf838a701aaa" }, "has_input": [ - "nmdc:3faf965a2e745048afed5d1c065a78c4", - "nmdc:faeb84260d97f23162a6176b9442a5c8", - "nmdc:dc720d27299f6f5c1d38c4dcf1dfc8db" + "nmdc:7d4057e3a44a05171c13fb0ed3e2294a" ], - "too_short_contig_num": 75364, "part_of": [ - "nmdc:mga0dm3v04" + "nmdc:mga0n0je44" ], - "binned_contig_num": 220, "git_url": "https://github.com/microbiomedata/metaG/releases/tag/0.1", "has_output": [ - "nmdc:d41d8cd98f00b204e9800998ecf8427e", - "nmdc:767a36b1bffa42d3d25af3f81b15e11b", - "nmdc:994fd58ab9a53c19ba1cdb830e37a132", - "nmdc:db59a64c874a9e06c1f1ba58df96fe0d", - "nmdc:0d45611a5d0c80679c00fa759c939df0", - "nmdc:bb5835f621252fca37967e00245517ac" + "nmdc:56edf81e5f5102edf7e416bc9430fbb6", + "nmdc:c3d0f03afb44520ef5f2ea14e6daf705", + "nmdc:2afff209a40ca4895307f3a47080c534", + "nmdc:d76c80bf15c4fd84f28c7150f24a8143", + "nmdc:b9d6d8a8297f9a604ac85a334a3412de", + "nmdc:fe4bd9f63c32f50676792e3c4adced08", + "nmdc:eb189cbf0543203d2521397b73d4d34b", + "nmdc:ce3f002a824efde4a7134e6cd2e6306b", + "nmdc:ac90bf3384ce44d097f7897ac5ff8134" ], - "was_informed_by": "gold:Gp0115664", - "input_contig_num": 78376, - "id": "nmdc:10c6e87f449fdc27c6b8bfbc9e25d6a3", + "was_informed_by": "gold:Gp0115667", + "id": "nmdc:8093869c91384d3299431e56019f7de0", "execution_resource": "NERSC-Cori", - "name": "MAGs Analysis Activity for nmdc:mga0dm3v04", + "name": "ReadBased Analysis Activity for nmdc:mga0n0je44", + "started_at_time": "2021-10-11T02:28:16Z", + "type": "nmdc:ReadbasedAnalysis", + "ended_at_time": "2021-10-11T03:58:24+00:00" + } + ] + }, + { + "functional_annotation_agg": [], + "library_preparation_set": [], + "processed_sample_set": [], + "extraction_set": [], + "activity_set": [], + "biosample_set": [], + "data_object_set": [ + { + "name": "Gp0115664_Filtered Reads", + "description": "Filtered Reads for Gp0115664", + "data_object_type": "Filtered Sequencing Reads", + "url": "https://data.microbiomedata.org/data/nmdc:mga0dm3v04/qa/nmdc_mga0dm3v04_filtered.fastq.gz", + "md5_checksum": "232e31505b6a0251df2303c0563d64c1", + "id": "nmdc:232e31505b6a0251df2303c0563d64c1", + "file_size_bytes": 1566732675 + }, + { + "name": "Gp0115664_Filtered Stats", + "description": "Filtered Stats for Gp0115664", + "data_object_type": "QC Statistics", + "url": "https://data.microbiomedata.org/data/nmdc:mga0dm3v04/qa/nmdc_mga0dm3v04_filterStats.txt", + "md5_checksum": "f3f4f75f19c92af6e98d2b45cccaacd5", + "id": "nmdc:f3f4f75f19c92af6e98d2b45cccaacd5", + "file_size_bytes": 289 + }, + { + "name": "Gp0115664_Gottcha2 TSV report", + "description": "Gottcha2 TSV report for Gp0115664", + "url": "https://data.microbiomedata.org/data/nmdc:mga0dm3v04/ReadbasedAnalysis/nmdc_mga0dm3v04_gottcha2_report.tsv", + "md5_checksum": "9d61d9f0c31a98f88ad8cde86254148d", + "id": "nmdc:9d61d9f0c31a98f88ad8cde86254148d", + "file_size_bytes": 9591 + }, + { + "name": "Gp0115664_Gottcha2 full TSV report", + "description": "Gottcha2 full TSV report for Gp0115664", + "url": "https://data.microbiomedata.org/data/nmdc:mga0dm3v04/ReadbasedAnalysis/nmdc_mga0dm3v04_gottcha2_report_full.tsv", + "md5_checksum": "7f93f97242aed036019f13492f5af35c", + "id": "nmdc:7f93f97242aed036019f13492f5af35c", + "file_size_bytes": 885985 + }, + { + "name": "Gp0115664_Gottcha2 Krona HTML report", + "description": "Gottcha2 Krona HTML report for Gp0115664", + "data_object_type": "GOTTCHA2 Krona Plot", + "url": "https://data.microbiomedata.org/data/nmdc:mga0dm3v04/ReadbasedAnalysis/nmdc_mga0dm3v04_gottcha2_krona.html", + "md5_checksum": "b4d0179bcc68b5186a3544d9ee0c6941", + "id": "nmdc:b4d0179bcc68b5186a3544d9ee0c6941", + "file_size_bytes": 251303 + }, + { + "name": "Gp0115664_Centrifuge classification TSV report", + "description": "Centrifuge classification TSV report for Gp0115664", + "data_object_type": "Centrifuge Taxonomic Classification", + "url": "https://data.microbiomedata.org/data/nmdc:mga0dm3v04/ReadbasedAnalysis/nmdc_mga0dm3v04_centrifuge_classification.tsv", + "md5_checksum": "a4243f71a0288f489c566ae85d85891d", + "id": "nmdc:a4243f71a0288f489c566ae85d85891d", + "file_size_bytes": 1268144933 + }, + { + "name": "Gp0115664_Centrifuge TSV report", + "description": "Centrifuge TSV report for Gp0115664", + "data_object_type": "Centrifuge Classification Report", + "url": "https://data.microbiomedata.org/data/nmdc:mga0dm3v04/ReadbasedAnalysis/nmdc_mga0dm3v04_centrifuge_report.tsv", + "md5_checksum": "f8b6ef830b94c6470056a3cd0a0eafc1", + "id": "nmdc:f8b6ef830b94c6470056a3cd0a0eafc1", + "file_size_bytes": 254575 + }, + { + "name": "Gp0115664_Centrifuge Krona HTML report", + "description": "Centrifuge Krona HTML report for Gp0115664", + "data_object_type": "Centrifuge Krona Plot", + "url": "https://data.microbiomedata.org/data/nmdc:mga0dm3v04/ReadbasedAnalysis/nmdc_mga0dm3v04_centrifuge_krona.html", + "md5_checksum": "a80779b32415ef001d0403f0b618b612", + "id": "nmdc:a80779b32415ef001d0403f0b618b612", + "file_size_bytes": 2327293 + }, + { + "name": "Gp0115664_Kraken classification TSV report", + "description": "Kraken classification TSV report for Gp0115664", + "data_object_type": "Kraken2 Taxonomic Classification", + "url": "https://data.microbiomedata.org/data/nmdc:mga0dm3v04/ReadbasedAnalysis/nmdc_mga0dm3v04_kraken2_classification.tsv", + "md5_checksum": "01581429336a43d7dc2f85b8d49d6c6e", + "id": "nmdc:01581429336a43d7dc2f85b8d49d6c6e", + "file_size_bytes": 1037932028 + }, + { + "name": "Gp0115664_Kraken2 TSV report", + "description": "Kraken2 TSV report for Gp0115664", + "data_object_type": "Kraken2 Classification Report", + "url": "https://data.microbiomedata.org/data/nmdc:mga0dm3v04/ReadbasedAnalysis/nmdc_mga0dm3v04_kraken2_report.tsv", + "md5_checksum": "ce47d6686edb7b3472102d5883229c45", + "id": "nmdc:ce47d6686edb7b3472102d5883229c45", + "file_size_bytes": 641242 + }, + { + "name": "Gp0115664_Kraken2 Krona HTML report", + "description": "Kraken2 Krona HTML report for Gp0115664", + "data_object_type": "Kraken2 Krona Plot", + "url": "https://data.microbiomedata.org/data/nmdc:mga0dm3v04/ReadbasedAnalysis/nmdc_mga0dm3v04_kraken2_krona.html", + "md5_checksum": "29b75e78b0b7fd8115614d8e9d341d46", + "id": "nmdc:29b75e78b0b7fd8115614d8e9d341d46", + "file_size_bytes": 3995680 + }, + { + "name": "Gp0115664_Gottcha2 TSV report", + "description": "Gottcha2 TSV report for Gp0115664", + "url": "https://data.microbiomedata.org/data/nmdc:mga0dm3v04/ReadbasedAnalysis/nmdc_mga0dm3v04_gottcha2_report.tsv", + "md5_checksum": "9d61d9f0c31a98f88ad8cde86254148d", + "id": "nmdc:9d61d9f0c31a98f88ad8cde86254148d", + "file_size_bytes": 9591 + }, + { + "name": "Gp0115664_Gottcha2 full TSV report", + "description": "Gottcha2 full TSV report for Gp0115664", + "url": "https://data.microbiomedata.org/data/nmdc:mga0dm3v04/ReadbasedAnalysis/nmdc_mga0dm3v04_gottcha2_report_full.tsv", + "md5_checksum": "7f93f97242aed036019f13492f5af35c", + "id": "nmdc:7f93f97242aed036019f13492f5af35c", + "file_size_bytes": 885985 + }, + { + "name": "Gp0115664_Gottcha2 Krona HTML report", + "description": "Gottcha2 Krona HTML report for Gp0115664", + "data_object_type": "GOTTCHA2 Krona Plot", + "url": "https://data.microbiomedata.org/data/nmdc:mga0dm3v04/ReadbasedAnalysis/nmdc_mga0dm3v04_gottcha2_krona.html", + "md5_checksum": "b4d0179bcc68b5186a3544d9ee0c6941", + "id": "nmdc:b4d0179bcc68b5186a3544d9ee0c6941", + "file_size_bytes": 251303 + }, + { + "name": "Gp0115664_Centrifuge classification TSV report", + "description": "Centrifuge classification TSV report for Gp0115664", + "data_object_type": "Centrifuge Taxonomic Classification", + "url": "https://data.microbiomedata.org/data/nmdc:mga0dm3v04/ReadbasedAnalysis/nmdc_mga0dm3v04_centrifuge_classification.tsv", + "md5_checksum": "a4243f71a0288f489c566ae85d85891d", + "id": "nmdc:a4243f71a0288f489c566ae85d85891d", + "file_size_bytes": 1268144933 + }, + { + "name": "Gp0115664_Centrifuge TSV report", + "description": "Centrifuge TSV report for Gp0115664", + "data_object_type": "Centrifuge Classification Report", + "url": "https://data.microbiomedata.org/data/nmdc:mga0dm3v04/ReadbasedAnalysis/nmdc_mga0dm3v04_centrifuge_report.tsv", + "md5_checksum": "f8b6ef830b94c6470056a3cd0a0eafc1", + "id": "nmdc:f8b6ef830b94c6470056a3cd0a0eafc1", + "file_size_bytes": 254575 + }, + { + "name": "Gp0115664_Centrifuge Krona HTML report", + "description": "Centrifuge Krona HTML report for Gp0115664", + "data_object_type": "Centrifuge Krona Plot", + "url": "https://data.microbiomedata.org/data/nmdc:mga0dm3v04/ReadbasedAnalysis/nmdc_mga0dm3v04_centrifuge_krona.html", + "md5_checksum": "a80779b32415ef001d0403f0b618b612", + "id": "nmdc:a80779b32415ef001d0403f0b618b612", + "file_size_bytes": 2327293 + }, + { + "name": "Gp0115664_Kraken classification TSV report", + "description": "Kraken classification TSV report for Gp0115664", + "data_object_type": "Kraken2 Taxonomic Classification", + "url": "https://data.microbiomedata.org/data/nmdc:mga0dm3v04/ReadbasedAnalysis/nmdc_mga0dm3v04_kraken2_classification.tsv", + "md5_checksum": "01581429336a43d7dc2f85b8d49d6c6e", + "id": "nmdc:01581429336a43d7dc2f85b8d49d6c6e", + "file_size_bytes": 1037932028 + }, + { + "name": "Gp0115664_Kraken2 TSV report", + "description": "Kraken2 TSV report for Gp0115664", + "data_object_type": "Kraken2 Classification Report", + "url": "https://data.microbiomedata.org/data/nmdc:mga0dm3v04/ReadbasedAnalysis/nmdc_mga0dm3v04_kraken2_report.tsv", + "md5_checksum": "ce47d6686edb7b3472102d5883229c45", + "id": "nmdc:ce47d6686edb7b3472102d5883229c45", + "file_size_bytes": 641242 + }, + { + "name": "Gp0115664_Kraken2 Krona HTML report", + "description": "Kraken2 Krona HTML report for Gp0115664", + "data_object_type": "Kraken2 Krona Plot", + "url": "https://data.microbiomedata.org/data/nmdc:mga0dm3v04/ReadbasedAnalysis/nmdc_mga0dm3v04_kraken2_krona.html", + "md5_checksum": "29b75e78b0b7fd8115614d8e9d341d46", + "id": "nmdc:29b75e78b0b7fd8115614d8e9d341d46", + "file_size_bytes": 3995680 + }, + { + "name": "Gp0115664_Assembled contigs fasta", + "description": "Assembled contigs fasta for Gp0115664", + "data_object_type": "Assembly Contigs", + "url": "https://data.microbiomedata.org/data/nmdc:mga0dm3v04/assembly/nmdc_mga0dm3v04_contigs.fna", + "md5_checksum": "3faf965a2e745048afed5d1c065a78c4", + "id": "nmdc:3faf965a2e745048afed5d1c065a78c4", + "file_size_bytes": 36012597 + }, + { + "name": "Gp0115664_Assembled scaffold fasta", + "description": "Assembled scaffold fasta for Gp0115664", + "data_object_type": "Assembly Scaffolds", + "url": "https://data.microbiomedata.org/data/nmdc:mga0dm3v04/assembly/nmdc_mga0dm3v04_scaffolds.fna", + "md5_checksum": "2d99daff632b19ebdea3f3e5784e2fbc", + "id": "nmdc:2d99daff632b19ebdea3f3e5784e2fbc", + "file_size_bytes": 35776428 + }, + { + "name": "Gp0115664_Metagenome Contig Coverage Stats", + "description": "Metagenome Contig Coverage Stats for Gp0115664", + "url": "https://data.microbiomedata.org/data/nmdc:mga0dm3v04/assembly/nmdc_mga0dm3v04_covstats.txt", + "md5_checksum": "d8f255300e5f214baad3c3b4b3c0b51b", + "id": "nmdc:d8f255300e5f214baad3c3b4b3c0b51b", + "file_size_bytes": 6143277 + }, + { + "name": "Gp0115664_Assembled AGP file", + "description": "Assembled AGP file for Gp0115664", + "data_object_type": "Assembly AGP", + "url": "https://data.microbiomedata.org/data/nmdc:mga0dm3v04/assembly/nmdc_mga0dm3v04_assembly.agp", + "md5_checksum": "1f9a75569aedc406a3db8ff779b03c19", + "id": "nmdc:1f9a75569aedc406a3db8ff779b03c19", + "file_size_bytes": 5710214 + }, + { + "name": "Gp0115664_Metagenome Alignment BAM file", + "description": "Metagenome Alignment BAM file for Gp0115664", + "data_object_type": "Assembly Coverage BAM", + "url": "https://data.microbiomedata.org/data/nmdc:mga0dm3v04/assembly/nmdc_mga0dm3v04_pairedMapped_sorted.bam", + "md5_checksum": "faeb84260d97f23162a6176b9442a5c8", + "id": "nmdc:faeb84260d97f23162a6176b9442a5c8", + "file_size_bytes": 1670248615 + }, + { + "name": "Gp0115664_Protein FAA", + "description": "Protein FAA for Gp0115664", + "data_object_type": "Annotation Amino Acid FASTA", + "url": "https://data.microbiomedata.org/data/nmdc:mga0dm3v04/annotation/nmdc_mga0dm3v04_proteins.faa", + "md5_checksum": "338a8f2f739dfc89557e090d604302f6", + "id": "nmdc:338a8f2f739dfc89557e090d604302f6", + "file_size_bytes": 21010319 + }, + { + "name": "Gp0115664_Structural annotation GFF file", + "description": "Structural annotation GFF file for Gp0115664", + "data_object_type": "Structural Annotation GFF", + "url": "https://data.microbiomedata.org/data/nmdc:mga0dm3v04/annotation/nmdc_mga0dm3v04_structural_annotation.gff", + "md5_checksum": "0ce03dd69826edcc8b5f6dd01ca176dc", + "id": "nmdc:0ce03dd69826edcc8b5f6dd01ca176dc", + "file_size_bytes": 2497 + }, + { + "name": "Gp0115664_Functional annotation GFF file", + "description": "Functional annotation GFF file for Gp0115664", + "data_object_type": "Functional Annotation GFF", + "url": "https://data.microbiomedata.org/data/nmdc:mga0dm3v04/annotation/nmdc_mga0dm3v04_functional_annotation.gff", + "md5_checksum": "dc720d27299f6f5c1d38c4dcf1dfc8db", + "id": "nmdc:dc720d27299f6f5c1d38c4dcf1dfc8db", + "file_size_bytes": 24426623 + }, + { + "name": "Gp0115664_KO TSV file", + "description": "KO TSV file for Gp0115664", + "data_object_type": "Annotation KEGG Orthology", + "url": "https://data.microbiomedata.org/data/nmdc:mga0dm3v04/annotation/nmdc_mga0dm3v04_ko.tsv", + "md5_checksum": "bc7f7df6865acffd4e07f8b592573eb9", + "id": "nmdc:bc7f7df6865acffd4e07f8b592573eb9", + "file_size_bytes": 2875393 + }, + { + "name": "Gp0115664_EC TSV file", + "description": "EC TSV file for Gp0115664", + "data_object_type": "Annotation Enzyme Commission", + "url": "https://data.microbiomedata.org/data/nmdc:mga0dm3v04/annotation/nmdc_mga0dm3v04_ec.tsv", + "md5_checksum": "be38bedd77ab3c072bafbb2c201c953d", + "id": "nmdc:be38bedd77ab3c072bafbb2c201c953d", + "file_size_bytes": 1882878 + }, + { + "name": "Gp0115664_COG GFF file", + "description": "COG GFF file for Gp0115664", + "url": "https://data.microbiomedata.org/data/nmdc:mga0dm3v04/annotation/nmdc_mga0dm3v04_cog.gff", + "md5_checksum": "d7318549a735853b679d15171f5c7ea7", + "id": "nmdc:d7318549a735853b679d15171f5c7ea7", + "file_size_bytes": 12475107 + }, + { + "name": "Gp0115664_PFAM GFF file", + "description": "PFAM GFF file for Gp0115664", + "url": "https://data.microbiomedata.org/data/nmdc:mga0dm3v04/annotation/nmdc_mga0dm3v04_pfam.gff", + "md5_checksum": "c1617e0980c6e52149692aee39e30f8c", + "id": "nmdc:c1617e0980c6e52149692aee39e30f8c", + "file_size_bytes": 9305713 + }, + { + "name": "Gp0115664_TigrFam GFF file", + "description": "TigrFam GFF file for Gp0115664", + "url": "https://data.microbiomedata.org/data/nmdc:mga0dm3v04/annotation/nmdc_mga0dm3v04_tigrfam.gff", + "md5_checksum": "bd5a9b5e55605ece8873d6ac05e76e0d", + "id": "nmdc:bd5a9b5e55605ece8873d6ac05e76e0d", + "file_size_bytes": 1181236 + }, + { + "name": "Gp0115664_SMART GFF file", + "description": "SMART GFF file for Gp0115664", + "url": "https://data.microbiomedata.org/data/nmdc:mga0dm3v04/annotation/nmdc_mga0dm3v04_smart.gff", + "md5_checksum": "eb1fba5cad14c3e211baa2de796bca2e", + "id": "nmdc:eb1fba5cad14c3e211baa2de796bca2e", + "file_size_bytes": 2718910 + }, + { + "name": "Gp0115664_SuperFam GFF file", + "description": "SuperFam GFF file for Gp0115664", + "url": "https://data.microbiomedata.org/data/nmdc:mga0dm3v04/annotation/nmdc_mga0dm3v04_supfam.gff", + "md5_checksum": "2146449222f410a286e4786bf19c9a5e", + "id": "nmdc:2146449222f410a286e4786bf19c9a5e", + "file_size_bytes": 16463047 + }, + { + "name": "Gp0115664_Cath FunFam GFF file", + "description": "Cath FunFam GFF file for Gp0115664", + "url": "https://data.microbiomedata.org/data/nmdc:mga0dm3v04/annotation/nmdc_mga0dm3v04_cath_funfam.gff", + "md5_checksum": "20ced78c72f67d064bddcc8d5534ebb6", + "id": "nmdc:20ced78c72f67d064bddcc8d5534ebb6", + "file_size_bytes": 12501882 + }, + { + "name": "Gp0115664_KO_EC GFF file", + "description": "KO_EC GFF file for Gp0115664", + "url": "https://data.microbiomedata.org/data/nmdc:mga0dm3v04/annotation/nmdc_mga0dm3v04_ko_ec.gff", + "md5_checksum": "7ffe90ceb10c9f40f755aa8d7aa30170", + "id": "nmdc:7ffe90ceb10c9f40f755aa8d7aa30170", + "file_size_bytes": 9217314 + }, + { + "name": "gold:Gp0452679_metabat2 bin checkm quality assessment result", + "description": "metabat2 bin checkm quality assessment result for gold:Gp0452679", + "data_object_type": "CheckM Statistics", + "url": "https://data.microbiomedata.org/data/nmdc:mga0fsjy84/MAGs/nmdc_mga0fsjy84_checkm_qa.out", + "md5_checksum": "d41d8cd98f00b204e9800998ecf8427e", + "id": "nmdc:d41d8cd98f00b204e9800998ecf8427e", + "file_size_bytes": 0 + }, + { + "name": "Gp0115664_tooShort (< 3kb) filtered contigs fasta file by metaBat2", + "description": "tooShort (< 3kb) filtered contigs fasta file by metaBat2 for Gp0115664", + "url": "https://data.microbiomedata.org/data/nmdc:mga0dm3v04/MAGs/nmdc_mga0dm3v04_bins.tooShort.fa", + "md5_checksum": "767a36b1bffa42d3d25af3f81b15e11b", + "id": "nmdc:767a36b1bffa42d3d25af3f81b15e11b", + "file_size_bytes": 30368582 + }, + { + "name": "Gp0115664_unbinned fasta file from metabat2", + "description": "unbinned fasta file from metabat2 for Gp0115664", + "url": "https://data.microbiomedata.org/data/nmdc:mga0dm3v04/MAGs/nmdc_mga0dm3v04_bins.unbinned.fa", + "md5_checksum": "994fd58ab9a53c19ba1cdb830e37a132", + "id": "nmdc:994fd58ab9a53c19ba1cdb830e37a132", + "file_size_bytes": 4608000 + }, + { + "name": "Gp0115664_metabat2 bin checkm quality assessment result", + "description": "metabat2 bin checkm quality assessment result for Gp0115664", + "data_object_type": "CheckM Statistics", + "url": "https://data.microbiomedata.org/data/nmdc:mga0dm3v04/MAGs/nmdc_mga0dm3v04_checkm_qa.out", + "md5_checksum": "db59a64c874a9e06c1f1ba58df96fe0d", + "id": "nmdc:db59a64c874a9e06c1f1ba58df96fe0d", + "file_size_bytes": 845 + }, + { + "name": "Gp0115664_high-quality and medium-quality bins", + "description": "high-quality and medium-quality bins for Gp0115664", + "data_object_type": "Metagenome Bins", + "url": "https://data.microbiomedata.org/data/nmdc:mga0dm3v04/MAGs/nmdc_mga0dm3v04_hqmq_bin.zip", + "md5_checksum": "0d45611a5d0c80679c00fa759c939df0", + "id": "nmdc:0d45611a5d0c80679c00fa759c939df0", + "file_size_bytes": 182 + }, + { + "name": "Gp0115664_metabat2 bins", + "description": "metabat2 bins for Gp0115664", + "url": "https://data.microbiomedata.org/data/nmdc:mga0dm3v04/MAGs/nmdc_mga0dm3v04_metabat_bin.zip", + "md5_checksum": "bb5835f621252fca37967e00245517ac", + "id": "nmdc:bb5835f621252fca37967e00245517ac", + "file_size_bytes": 314358 + } + ], + "dissolving_activity_set": [], + "functional_annotation_set": [], + "genome_feature_set": [], + "mags_activity_set": [ + { + "_id": { + "$oid": "649b0052ec087f6bbab34723" + }, + "has_input": [ + "nmdc:3faf965a2e745048afed5d1c065a78c4", + "nmdc:faeb84260d97f23162a6176b9442a5c8", + "nmdc:dc720d27299f6f5c1d38c4dcf1dfc8db" + ], + "too_short_contig_num": 75364, + "part_of": [ + "nmdc:mga0dm3v04" + ], + "binned_contig_num": 220, + "git_url": "https://github.com/microbiomedata/metaG/releases/tag/0.1", + "has_output": [ + "nmdc:d41d8cd98f00b204e9800998ecf8427e", + "nmdc:767a36b1bffa42d3d25af3f81b15e11b", + "nmdc:994fd58ab9a53c19ba1cdb830e37a132", + "nmdc:db59a64c874a9e06c1f1ba58df96fe0d", + "nmdc:0d45611a5d0c80679c00fa759c939df0", + "nmdc:bb5835f621252fca37967e00245517ac" + ], + "was_informed_by": "gold:Gp0115664", + "input_contig_num": 78376, + "id": "nmdc:10c6e87f449fdc27c6b8bfbc9e25d6a3", + "execution_resource": "NERSC-Cori", + "name": "MAGs Analysis Activity for nmdc:mga0dm3v04", "mags_list": [ { "number_of_contig": 220, @@ -4131,652 +4124,676 @@ "unbinned_contig_num": 2792, "started_at_time": "2021-10-11T02:28:16Z", "type": "nmdc:MAGsAnalysisActivity", - "ended_at_time": "2021-10-11T03:33:34+00:00", - "output_data_objects": [ - { - "name": "gold:Gp0452679_metabat2 bin checkm quality assessment result", - "description": "metabat2 bin checkm quality assessment result for gold:Gp0452679", - "data_object_type": "CheckM Statistics", - "url": "https://data.microbiomedata.org/data/nmdc:mga0fsjy84/MAGs/nmdc_mga0fsjy84_checkm_qa.out", - "md5_checksum": "d41d8cd98f00b204e9800998ecf8427e", - "id": "nmdc:d41d8cd98f00b204e9800998ecf8427e", - "file_size_bytes": 0 - }, - { - "name": "Gp0115664_tooShort (< 3kb) filtered contigs fasta file by metaBat2", - "description": "tooShort (< 3kb) filtered contigs fasta file by metaBat2 for Gp0115664", - "url": "https://data.microbiomedata.org/data/nmdc:mga0dm3v04/MAGs/nmdc_mga0dm3v04_bins.tooShort.fa", - "md5_checksum": "767a36b1bffa42d3d25af3f81b15e11b", - "id": "nmdc:767a36b1bffa42d3d25af3f81b15e11b", - "file_size_bytes": 30368582 - }, - { - "name": "Gp0115664_unbinned fasta file from metabat2", - "description": "unbinned fasta file from metabat2 for Gp0115664", - "url": "https://data.microbiomedata.org/data/nmdc:mga0dm3v04/MAGs/nmdc_mga0dm3v04_bins.unbinned.fa", - "md5_checksum": "994fd58ab9a53c19ba1cdb830e37a132", - "id": "nmdc:994fd58ab9a53c19ba1cdb830e37a132", - "file_size_bytes": 4608000 - }, - { - "name": "Gp0115664_metabat2 bin checkm quality assessment result", - "description": "metabat2 bin checkm quality assessment result for Gp0115664", - "data_object_type": "CheckM Statistics", - "url": "https://data.microbiomedata.org/data/nmdc:mga0dm3v04/MAGs/nmdc_mga0dm3v04_checkm_qa.out", - "md5_checksum": "db59a64c874a9e06c1f1ba58df96fe0d", - "id": "nmdc:db59a64c874a9e06c1f1ba58df96fe0d", - "file_size_bytes": 845 - }, - { - "name": "Gp0115664_high-quality and medium-quality bins", - "description": "high-quality and medium-quality bins for Gp0115664", - "data_object_type": "Metagenome Bins", - "url": "https://data.microbiomedata.org/data/nmdc:mga0dm3v04/MAGs/nmdc_mga0dm3v04_hqmq_bin.zip", - "md5_checksum": "0d45611a5d0c80679c00fa759c939df0", - "id": "nmdc:0d45611a5d0c80679c00fa759c939df0", - "file_size_bytes": 182 - }, - { - "name": "Gp0115664_metabat2 bins", - "description": "metabat2 bins for Gp0115664", - "url": "https://data.microbiomedata.org/data/nmdc:mga0dm3v04/MAGs/nmdc_mga0dm3v04_metabat_bin.zip", - "md5_checksum": "bb5835f621252fca37967e00245517ac", - "id": "nmdc:bb5835f621252fca37967e00245517ac", - "file_size_bytes": 314358 - } - ] + "ended_at_time": "2021-10-11T03:33:34+00:00" } - ] - }, - { - "_id": { - "$oid": "649b009773e8249959349b39" - }, - "id": "nmdc:omprc-11-7vsv7h78", - "name": "Sand microcosm microbial communities from a hyporheic zone in Columbia River, Washington, USA - GW-RW T4_30-Apr-14", - "description": "Sterilized sand packs were incubated back in the ground and collected at time point T4.", - "has_input": [ - "nmdc:bsm-11-j0wbx741" - ], - "has_output": [ - "jgi:55f23d790d8785306f96497e" - ], - "part_of": [ - "nmdc:sty-11-aygzgv51" - ], - "add_date": "2015-05-28", - "mod_date": "2021-06-15", - "ncbi_project_name": "Sand microcosm microbial communities from a hyporheic zone in Columbia River, Washington, USA - GW-RW T4_30-Apr-14", - "omics_type": { - "has_raw_value": "Metagenome" - }, - "principal_investigator": { - "has_raw_value": "James Stegen" - }, - "processing_institution": "JGI", - "type": "nmdc:OmicsProcessing", - "gold_sequencing_project_identifiers": [ - "gold:Gp0115678" - ], - "downstream_workflow_activity_records": [ + ], + "material_sample_set": [], + "material_sampling_activity_set": [], + "metabolomics_analysis_activity_set": [], + "metagenome_annotation_activity_set": [ { "_id": { - "$oid": "649b009d6bdd4fd20273c881" + "$oid": "649b005bbf2caae0415ef9c5" }, "has_input": [ - "nmdc:0e6219b7901669483a0a0386cfc01f93" + "nmdc:3faf965a2e745048afed5d1c065a78c4" ], "part_of": [ - "nmdc:mga026tn70" - ], - "git_url": "https://github.com/microbiomedata/metaG/releases/tag/0.1", - "has_output": [ - "nmdc:e0ce93b88419f87568ff206e0efe3a24", - "nmdc:7bf8ff4cf0d98cccd8e1c20f77dd1690" - ], - "was_informed_by": "gold:Gp0115678", - "input_read_count": 51286688, - "output_read_bases": 7231449575, - "id": "nmdc:88c0fc9b5b0bb0cd814448bbfba0da6a", - "execution_resource": "NERSC-Cori", - "input_read_bases": 7744289888, - "name": "Read QC Activity for nmdc:mga026tn70", - "output_read_count": 48276864, - "started_at_time": "2021-10-11T02:23:30Z", - "type": "nmdc:ReadQCAnalysisActivity", - "ended_at_time": "2021-10-11T06:18:17+00:00", - "output_data_objects": [ - { - "name": "Gp0115678_Filtered Reads", - "description": "Filtered Reads for Gp0115678", - "data_object_type": "Filtered Sequencing Reads", - "url": "https://data.microbiomedata.org/data/nmdc:mga026tn70/qa/nmdc_mga026tn70_filtered.fastq.gz", - "md5_checksum": "e0ce93b88419f87568ff206e0efe3a24", - "id": "nmdc:e0ce93b88419f87568ff206e0efe3a24", - "file_size_bytes": 4090026888 - }, - { - "name": "Gp0115678_Filtered Stats", - "description": "Filtered Stats for Gp0115678", - "data_object_type": "QC Statistics", - "url": "https://data.microbiomedata.org/data/nmdc:mga026tn70/qa/nmdc_mga026tn70_filterStats.txt", - "md5_checksum": "7bf8ff4cf0d98cccd8e1c20f77dd1690", - "id": "nmdc:7bf8ff4cf0d98cccd8e1c20f77dd1690", - "file_size_bytes": 292 - } - ] - }, - { - "_id": { - "$oid": "649b009bff710ae353f8cf49" - }, - "has_input": [ - "nmdc:e0ce93b88419f87568ff206e0efe3a24" + "nmdc:mga0dm3v04" ], "git_url": "https://github.com/microbiomedata/metaG/releases/tag/0.1", "has_output": [ - "nmdc:05bab80e2ff02d160b8e808f056ee2b5", - "nmdc:12b2d6afc355bce76249d750a9fab534", - "nmdc:18214017d56658a48723c9c998dcba7e", - "nmdc:99ef009c73c128e561a4b9dcb70d7ff2", - "nmdc:78dab6988b57c654462ef3dbeb64d8d6", - "nmdc:f9c01985f057825149d35de0650095a8", - "nmdc:bcea8bbe63625ad0f3142abe69a4a11d", - "nmdc:054c3097c9682bc9a6e07f88fdecc0ee", - "nmdc:38d41d4299141abe28bf0405af80cdfc" + "nmdc:338a8f2f739dfc89557e090d604302f6", + "nmdc:0ce03dd69826edcc8b5f6dd01ca176dc", + "nmdc:dc720d27299f6f5c1d38c4dcf1dfc8db", + "nmdc:bc7f7df6865acffd4e07f8b592573eb9", + "nmdc:be38bedd77ab3c072bafbb2c201c953d", + "nmdc:d7318549a735853b679d15171f5c7ea7", + "nmdc:c1617e0980c6e52149692aee39e30f8c", + "nmdc:bd5a9b5e55605ece8873d6ac05e76e0d", + "nmdc:eb1fba5cad14c3e211baa2de796bca2e", + "nmdc:2146449222f410a286e4786bf19c9a5e", + "nmdc:20ced78c72f67d064bddcc8d5534ebb6", + "nmdc:7ffe90ceb10c9f40f755aa8d7aa30170" ], - "was_informed_by": "gold:Gp0115678", - "id": "nmdc:88c0fc9b5b0bb0cd814448bbfba0da6a", + "was_informed_by": "gold:Gp0115664", + "id": "nmdc:10c6e87f449fdc27c6b8bfbc9e25d6a3", "execution_resource": "NERSC-Cori", - "name": "ReadBased Analysis Activity for nmdc:mga026tn70", - "started_at_time": "2021-10-11T02:23:30Z", - "type": "nmdc:ReadbasedAnalysis", - "ended_at_time": "2021-10-11T06:18:17+00:00", - "output_data_objects": [ - { - "name": "Gp0115678_Gottcha2 TSV report", - "description": "Gottcha2 TSV report for Gp0115678", - "url": "https://data.microbiomedata.org/data/nmdc:mga026tn70/ReadbasedAnalysis/nmdc_mga026tn70_gottcha2_report.tsv", - "md5_checksum": "05bab80e2ff02d160b8e808f056ee2b5", - "id": "nmdc:05bab80e2ff02d160b8e808f056ee2b5", - "file_size_bytes": 19085 - }, - { - "name": "Gp0115678_Gottcha2 full TSV report", - "description": "Gottcha2 full TSV report for Gp0115678", - "url": "https://data.microbiomedata.org/data/nmdc:mga026tn70/ReadbasedAnalysis/nmdc_mga026tn70_gottcha2_report_full.tsv", - "md5_checksum": "12b2d6afc355bce76249d750a9fab534", - "id": "nmdc:12b2d6afc355bce76249d750a9fab534", - "file_size_bytes": 1243929 - }, - { - "name": "Gp0115678_Gottcha2 Krona HTML report", - "description": "Gottcha2 Krona HTML report for Gp0115678", - "data_object_type": "GOTTCHA2 Krona Plot", - "url": "https://data.microbiomedata.org/data/nmdc:mga026tn70/ReadbasedAnalysis/nmdc_mga026tn70_gottcha2_krona.html", - "md5_checksum": "18214017d56658a48723c9c998dcba7e", - "id": "nmdc:18214017d56658a48723c9c998dcba7e", - "file_size_bytes": 281148 - }, - { - "name": "Gp0115678_Centrifuge classification TSV report", - "description": "Centrifuge classification TSV report for Gp0115678", - "data_object_type": "Centrifuge Taxonomic Classification", - "url": "https://data.microbiomedata.org/data/nmdc:mga026tn70/ReadbasedAnalysis/nmdc_mga026tn70_centrifuge_classification.tsv", - "md5_checksum": "99ef009c73c128e561a4b9dcb70d7ff2", - "id": "nmdc:99ef009c73c128e561a4b9dcb70d7ff2", - "file_size_bytes": 3491726958 - }, - { - "name": "Gp0115678_Centrifuge TSV report", - "description": "Centrifuge TSV report for Gp0115678", - "data_object_type": "Centrifuge Classification Report", - "url": "https://data.microbiomedata.org/data/nmdc:mga026tn70/ReadbasedAnalysis/nmdc_mga026tn70_centrifuge_report.tsv", - "md5_checksum": "78dab6988b57c654462ef3dbeb64d8d6", - "id": "nmdc:78dab6988b57c654462ef3dbeb64d8d6", - "file_size_bytes": 264123 - }, - { - "name": "Gp0115678_Centrifuge Krona HTML report", - "description": "Centrifuge Krona HTML report for Gp0115678", - "data_object_type": "Centrifuge Krona Plot", - "url": "https://data.microbiomedata.org/data/nmdc:mga026tn70/ReadbasedAnalysis/nmdc_mga026tn70_centrifuge_krona.html", - "md5_checksum": "f9c01985f057825149d35de0650095a8", - "id": "nmdc:f9c01985f057825149d35de0650095a8", - "file_size_bytes": 2352347 - }, - { - "name": "Gp0115678_Kraken classification TSV report", - "description": "Kraken classification TSV report for Gp0115678", - "data_object_type": "Kraken2 Taxonomic Classification", - "url": "https://data.microbiomedata.org/data/nmdc:mga026tn70/ReadbasedAnalysis/nmdc_mga026tn70_kraken2_classification.tsv", - "md5_checksum": "bcea8bbe63625ad0f3142abe69a4a11d", - "id": "nmdc:bcea8bbe63625ad0f3142abe69a4a11d", - "file_size_bytes": 2880889483 - }, - { - "name": "Gp0115678_Kraken2 TSV report", - "description": "Kraken2 TSV report for Gp0115678", - "data_object_type": "Kraken2 Classification Report", - "url": "https://data.microbiomedata.org/data/nmdc:mga026tn70/ReadbasedAnalysis/nmdc_mga026tn70_kraken2_report.tsv", - "md5_checksum": "054c3097c9682bc9a6e07f88fdecc0ee", - "id": "nmdc:054c3097c9682bc9a6e07f88fdecc0ee", - "file_size_bytes": 735519 - }, - { - "name": "Gp0115678_Kraken2 Krona HTML report", - "description": "Kraken2 Krona HTML report for Gp0115678", - "data_object_type": "Kraken2 Krona Plot", - "url": "https://data.microbiomedata.org/data/nmdc:mga026tn70/ReadbasedAnalysis/nmdc_mga026tn70_kraken2_krona.html", - "md5_checksum": "38d41d4299141abe28bf0405af80cdfc", - "id": "nmdc:38d41d4299141abe28bf0405af80cdfc", - "file_size_bytes": 4410156 - } - ] - }, + "name": "Annotation Activity for nmdc:mga0dm3v04", + "started_at_time": "2021-10-11T02:28:16Z", + "type": "nmdc:MetagenomeAnnotationActivity", + "ended_at_time": "2021-10-11T03:33:34+00:00" + } + ], + "metagenome_assembly_set": [ { "_id": { - "$oid": "61e719f6833bcf838a701854" + "$oid": "649b005f2ca5ee4adb139fb0" }, "has_input": [ - "nmdc:e0ce93b88419f87568ff206e0efe3a24" + "nmdc:232e31505b6a0251df2303c0563d64c1" ], "part_of": [ - "nmdc:mga026tn70" + "nmdc:mga0dm3v04" ], + "ctg_logsum": 60365, + "scaf_logsum": 60806, + "gap_pct": 0.00196, "git_url": "https://github.com/microbiomedata/metaG/releases/tag/0.1", "has_output": [ - "nmdc:05bab80e2ff02d160b8e808f056ee2b5", - "nmdc:12b2d6afc355bce76249d750a9fab534", - "nmdc:18214017d56658a48723c9c998dcba7e", - "nmdc:99ef009c73c128e561a4b9dcb70d7ff2", - "nmdc:78dab6988b57c654462ef3dbeb64d8d6", - "nmdc:f9c01985f057825149d35de0650095a8", - "nmdc:bcea8bbe63625ad0f3142abe69a4a11d", - "nmdc:054c3097c9682bc9a6e07f88fdecc0ee", - "nmdc:38d41d4299141abe28bf0405af80cdfc" + "nmdc:3faf965a2e745048afed5d1c065a78c4", + "nmdc:2d99daff632b19ebdea3f3e5784e2fbc", + "nmdc:d8f255300e5f214baad3c3b4b3c0b51b", + "nmdc:1f9a75569aedc406a3db8ff779b03c19", + "nmdc:faeb84260d97f23162a6176b9442a5c8" ], - "was_informed_by": "gold:Gp0115678", - "id": "nmdc:88c0fc9b5b0bb0cd814448bbfba0da6a", - "execution_resource": "NERSC-Cori", - "name": "ReadBased Analysis Activity for nmdc:mga026tn70", - "started_at_time": "2021-10-11T02:23:30Z", - "type": "nmdc:ReadbasedAnalysis", - "ended_at_time": "2021-10-11T06:18:17+00:00", - "output_data_objects": [ - { - "name": "Gp0115678_Gottcha2 TSV report", - "description": "Gottcha2 TSV report for Gp0115678", - "url": "https://data.microbiomedata.org/data/nmdc:mga026tn70/ReadbasedAnalysis/nmdc_mga026tn70_gottcha2_report.tsv", - "md5_checksum": "05bab80e2ff02d160b8e808f056ee2b5", - "id": "nmdc:05bab80e2ff02d160b8e808f056ee2b5", - "file_size_bytes": 19085 - }, - { - "name": "Gp0115678_Gottcha2 full TSV report", - "description": "Gottcha2 full TSV report for Gp0115678", - "url": "https://data.microbiomedata.org/data/nmdc:mga026tn70/ReadbasedAnalysis/nmdc_mga026tn70_gottcha2_report_full.tsv", - "md5_checksum": "12b2d6afc355bce76249d750a9fab534", - "id": "nmdc:12b2d6afc355bce76249d750a9fab534", - "file_size_bytes": 1243929 - }, - { - "name": "Gp0115678_Gottcha2 Krona HTML report", - "description": "Gottcha2 Krona HTML report for Gp0115678", - "data_object_type": "GOTTCHA2 Krona Plot", - "url": "https://data.microbiomedata.org/data/nmdc:mga026tn70/ReadbasedAnalysis/nmdc_mga026tn70_gottcha2_krona.html", - "md5_checksum": "18214017d56658a48723c9c998dcba7e", - "id": "nmdc:18214017d56658a48723c9c998dcba7e", - "file_size_bytes": 281148 - }, - { - "name": "Gp0115678_Centrifuge classification TSV report", - "description": "Centrifuge classification TSV report for Gp0115678", - "data_object_type": "Centrifuge Taxonomic Classification", - "url": "https://data.microbiomedata.org/data/nmdc:mga026tn70/ReadbasedAnalysis/nmdc_mga026tn70_centrifuge_classification.tsv", - "md5_checksum": "99ef009c73c128e561a4b9dcb70d7ff2", - "id": "nmdc:99ef009c73c128e561a4b9dcb70d7ff2", - "file_size_bytes": 3491726958 - }, - { - "name": "Gp0115678_Centrifuge TSV report", - "description": "Centrifuge TSV report for Gp0115678", - "data_object_type": "Centrifuge Classification Report", - "url": "https://data.microbiomedata.org/data/nmdc:mga026tn70/ReadbasedAnalysis/nmdc_mga026tn70_centrifuge_report.tsv", - "md5_checksum": "78dab6988b57c654462ef3dbeb64d8d6", - "id": "nmdc:78dab6988b57c654462ef3dbeb64d8d6", - "file_size_bytes": 264123 - }, - { - "name": "Gp0115678_Centrifuge Krona HTML report", - "description": "Centrifuge Krona HTML report for Gp0115678", - "data_object_type": "Centrifuge Krona Plot", - "url": "https://data.microbiomedata.org/data/nmdc:mga026tn70/ReadbasedAnalysis/nmdc_mga026tn70_centrifuge_krona.html", - "md5_checksum": "f9c01985f057825149d35de0650095a8", - "id": "nmdc:f9c01985f057825149d35de0650095a8", - "file_size_bytes": 2352347 - }, - { - "name": "Gp0115678_Kraken classification TSV report", - "description": "Kraken classification TSV report for Gp0115678", - "data_object_type": "Kraken2 Taxonomic Classification", - "url": "https://data.microbiomedata.org/data/nmdc:mga026tn70/ReadbasedAnalysis/nmdc_mga026tn70_kraken2_classification.tsv", - "md5_checksum": "bcea8bbe63625ad0f3142abe69a4a11d", - "id": "nmdc:bcea8bbe63625ad0f3142abe69a4a11d", - "file_size_bytes": 2880889483 - }, - { - "name": "Gp0115678_Kraken2 TSV report", - "description": "Kraken2 TSV report for Gp0115678", - "data_object_type": "Kraken2 Classification Report", - "url": "https://data.microbiomedata.org/data/nmdc:mga026tn70/ReadbasedAnalysis/nmdc_mga026tn70_kraken2_report.tsv", - "md5_checksum": "054c3097c9682bc9a6e07f88fdecc0ee", - "id": "nmdc:054c3097c9682bc9a6e07f88fdecc0ee", - "file_size_bytes": 735519 - }, - { - "name": "Gp0115678_Kraken2 Krona HTML report", - "description": "Kraken2 Krona HTML report for Gp0115678", - "data_object_type": "Kraken2 Krona Plot", - "url": "https://data.microbiomedata.org/data/nmdc:mga026tn70/ReadbasedAnalysis/nmdc_mga026tn70_kraken2_krona.html", - "md5_checksum": "38d41d4299141abe28bf0405af80cdfc", - "id": "nmdc:38d41d4299141abe28bf0405af80cdfc", - "file_size_bytes": 4410156 - } + "asm_score": 4.21, + "was_informed_by": "gold:Gp0115664", + "ctg_powsum": 6668.288, + "scaf_max": 15348, + "id": "nmdc:10c6e87f449fdc27c6b8bfbc9e25d6a3", + "scaf_powsum": 6720.964, + "execution_resource": "NERSC-Cori", + "contigs": 78376, + "name": "Assembly Activity for nmdc:mga0dm3v04", + "ctg_max": 15348, + "gc_std": 0.11459, + "contig_bp": 33088752, + "gc_avg": 0.5432, + "started_at_time": "2021-10-11T02:28:16Z", + "scaf_bp": 33089402, + "type": "nmdc:MetagenomeAssembly", + "scaffolds": 78311, + "ended_at_time": "2021-10-11T03:33:34+00:00", + "ctg_l50": 377, + "ctg_l90": 283, + "ctg_n50": 23883, + "ctg_n90": 67231, + "scaf_l50": 377, + "scaf_l90": 283, + "scaf_n50": 23850, + "scaf_n90": 67169 + } + ], + "metagenome_sequencing_activity_set": [], + "metaproteomics_analysis_activity_set": [], + "metatranscriptome_activity_set": [], + "nom_analysis_activity_set": [], + "omics_processing_set": [ + { + "_id": { + "$oid": "649b009773e8249959349b38" + }, + "id": "nmdc:omprc-11-hgehsc37", + "name": "Sand microcosm microbial communities from a hyporheic zone in Columbia River, Washington, USA - GW-RW T2_25-Nov-14", + "description": "Sterilized sand packs were incubated back in the ground and collected at time point T2.", + "has_input": [ + "nmdc:bsm-11-qxntpg05" + ], + "has_output": [ + "jgi:55d817f20d8785342fcf826c" + ], + "part_of": [ + "nmdc:sty-11-aygzgv51" + ], + "add_date": "2015-05-28", + "mod_date": "2021-06-15", + "ncbi_project_name": "Sand microcosm microbial communities from a hyporheic zone in Columbia River, Washington, USA - GW-RW T2_25-Nov-14", + "omics_type": { + "has_raw_value": "Metagenome" + }, + "principal_investigator": { + "has_raw_value": "James Stegen" + }, + "processing_institution": "JGI", + "type": "nmdc:OmicsProcessing", + "gold_sequencing_project_identifiers": [ + "gold:Gp0115664" ] - }, + } + ], + "reaction_activity_set": [], + "read_qc_analysis_activity_set": [ { "_id": { - "$oid": "649b005f2ca5ee4adb139fb5" + "$oid": "649b009d6bdd4fd20273c87b" }, "has_input": [ - "nmdc:e0ce93b88419f87568ff206e0efe3a24" + "nmdc:86929bf5b2afcb965129dcf0eae2d8fc" ], "part_of": [ - "nmdc:mga026tn70" + "nmdc:mga0dm3v04" ], - "ctg_logsum": 494917, - "scaf_logsum": 496628, - "gap_pct": 0.00163, "git_url": "https://github.com/microbiomedata/metaG/releases/tag/0.1", "has_output": [ - "nmdc:d305e212cce8f84f14561d3957c968b1", - "nmdc:fb12da7c2d6d1f9d9c7a1511702758bb", - "nmdc:444562a4e7108077b7e541a5d9064086", - "nmdc:6c400425b7188b24ac49533d9ce0d43b", - "nmdc:1c63639a894aa686e77e57787fcafbc6" + "nmdc:232e31505b6a0251df2303c0563d64c1", + "nmdc:f3f4f75f19c92af6e98d2b45cccaacd5" ], - "asm_score": 7.785, - "was_informed_by": "gold:Gp0115678", - "ctg_powsum": 57423, - "scaf_max": 116556, - "id": "nmdc:88c0fc9b5b0bb0cd814448bbfba0da6a", - "scaf_powsum": 57689, + "was_informed_by": "gold:Gp0115664", + "input_read_count": 19058974, + "output_read_bases": 2597325375, + "id": "nmdc:10c6e87f449fdc27c6b8bfbc9e25d6a3", "execution_resource": "NERSC-Cori", - "contigs": 383712, - "name": "Assembly Activity for nmdc:mga026tn70", - "ctg_max": 116556, - "gc_std": 0.13426, - "contig_bp": 190310453, - "gc_avg": 0.48844, - "started_at_time": "2021-10-11T02:23:30Z", - "scaf_bp": 190313553, - "type": "nmdc:MetagenomeAssembly", - "scaffolds": 383447, - "ended_at_time": "2021-10-11T06:18:17+00:00", - "ctg_l50": 474, - "ctg_l90": 290, - "ctg_n50": 102228, - "ctg_n90": 321321, - "scaf_l50": 474, - "scaf_l90": 290, - "scaf_n50": 102177, - "scaf_n90": 321076, - "scaf_l_gt50k": 453691, - "scaf_n_gt50k": 6, - "scaf_pct_gt50k": 0.23839132, - "output_data_objects": [ - { - "name": "Gp0115678_Assembled contigs fasta", - "description": "Assembled contigs fasta for Gp0115678", - "data_object_type": "Assembly Contigs", - "url": "https://data.microbiomedata.org/data/nmdc:mga026tn70/assembly/nmdc_mga026tn70_contigs.fna", - "md5_checksum": "d305e212cce8f84f14561d3957c968b1", - "id": "nmdc:d305e212cce8f84f14561d3957c968b1", - "file_size_bytes": 205441595 - }, - { - "name": "Gp0115678_Assembled scaffold fasta", - "description": "Assembled scaffold fasta for Gp0115678", - "data_object_type": "Assembly Scaffolds", - "url": "https://data.microbiomedata.org/data/nmdc:mga026tn70/assembly/nmdc_mga026tn70_scaffolds.fna", - "md5_checksum": "fb12da7c2d6d1f9d9c7a1511702758bb", - "id": "nmdc:fb12da7c2d6d1f9d9c7a1511702758bb", - "file_size_bytes": 204286677 - }, - { - "name": "Gp0115678_Metagenome Contig Coverage Stats", - "description": "Metagenome Contig Coverage Stats for Gp0115678", - "url": "https://data.microbiomedata.org/data/nmdc:mga026tn70/assembly/nmdc_mga026tn70_covstats.txt", - "md5_checksum": "444562a4e7108077b7e541a5d9064086", - "id": "nmdc:444562a4e7108077b7e541a5d9064086", - "file_size_bytes": 30470067 - }, - { - "name": "Gp0115678_Assembled AGP file", - "description": "Assembled AGP file for Gp0115678", - "data_object_type": "Assembly AGP", - "url": "https://data.microbiomedata.org/data/nmdc:mga026tn70/assembly/nmdc_mga026tn70_assembly.agp", - "md5_checksum": "6c400425b7188b24ac49533d9ce0d43b", - "id": "nmdc:6c400425b7188b24ac49533d9ce0d43b", - "file_size_bytes": 28619270 - }, - { - "name": "Gp0115678_Metagenome Alignment BAM file", - "description": "Metagenome Alignment BAM file for Gp0115678", - "data_object_type": "Assembly Coverage BAM", - "url": "https://data.microbiomedata.org/data/nmdc:mga026tn70/assembly/nmdc_mga026tn70_pairedMapped_sorted.bam", - "md5_checksum": "1c63639a894aa686e77e57787fcafbc6", - "id": "nmdc:1c63639a894aa686e77e57787fcafbc6", - "file_size_bytes": 4471336607 - } - ] - }, + "input_read_bases": 2877905074, + "name": "Read QC Activity for nmdc:mga0dm3v04", + "output_read_count": 17338778, + "started_at_time": "2021-10-11T02:28:16Z", + "type": "nmdc:ReadQCAnalysisActivity", + "ended_at_time": "2021-10-11T03:33:34+00:00" + } + ], + "read_based_taxonomy_analysis_activity_set": [ { "_id": { - "$oid": "649b005bbf2caae0415ef9c8" + "$oid": "649b009bff710ae353f8cf3f" }, "has_input": [ - "nmdc:d305e212cce8f84f14561d3957c968b1" - ], - "part_of": [ - "nmdc:mga026tn70" + "nmdc:232e31505b6a0251df2303c0563d64c1" ], "git_url": "https://github.com/microbiomedata/metaG/releases/tag/0.1", "has_output": [ - "nmdc:ecfb1a4d469d9f95a91c8a3a3d5475af", - "nmdc:4eeee677df10364f622a0d4789522c69", - "nmdc:351ff91eddf2bc89acbdf04eab68aef1", - "nmdc:64b9d934918b78de80f1cf80a013557f", - "nmdc:903f2015c41660ae53e16bfc369d566a", - "nmdc:bf72ad74b2375abe730ecf7dc50b1557", - "nmdc:92f4707b0b022c217463f76d229dd3cb", - "nmdc:4f6f494c878aeff4308f2de2b2682ea6", - "nmdc:c44ff7df84f2b777b7fee22f7d28e205", - "nmdc:b4fad8c887bc33c67a3316475ccc3572", - "nmdc:4a3d00839e3067973b06771a31bbae93", - "nmdc:f01768e30cdd8f7650f631883d1c5d23" + "nmdc:9d61d9f0c31a98f88ad8cde86254148d", + "nmdc:7f93f97242aed036019f13492f5af35c", + "nmdc:b4d0179bcc68b5186a3544d9ee0c6941", + "nmdc:a4243f71a0288f489c566ae85d85891d", + "nmdc:f8b6ef830b94c6470056a3cd0a0eafc1", + "nmdc:a80779b32415ef001d0403f0b618b612", + "nmdc:01581429336a43d7dc2f85b8d49d6c6e", + "nmdc:ce47d6686edb7b3472102d5883229c45", + "nmdc:29b75e78b0b7fd8115614d8e9d341d46" ], - "was_informed_by": "gold:Gp0115678", - "id": "nmdc:88c0fc9b5b0bb0cd814448bbfba0da6a", + "was_informed_by": "gold:Gp0115664", + "id": "nmdc:10c6e87f449fdc27c6b8bfbc9e25d6a3", "execution_resource": "NERSC-Cori", - "name": "Annotation Activity for nmdc:mga026tn70", - "started_at_time": "2021-10-11T02:23:30Z", - "type": "nmdc:MetagenomeAnnotationActivity", - "ended_at_time": "2021-10-11T06:18:17+00:00", - "output_data_objects": [ - { - "name": "Gp0115678_Protein FAA", - "description": "Protein FAA for Gp0115678", - "data_object_type": "Annotation Amino Acid FASTA", - "url": "https://data.microbiomedata.org/data/nmdc:mga026tn70/annotation/nmdc_mga026tn70_proteins.faa", - "md5_checksum": "ecfb1a4d469d9f95a91c8a3a3d5475af", - "id": "nmdc:ecfb1a4d469d9f95a91c8a3a3d5475af", - "file_size_bytes": 109377096 - }, - { - "name": "Gp0115678_Structural annotation GFF file", - "description": "Structural annotation GFF file for Gp0115678", - "data_object_type": "Structural Annotation GFF", - "url": "https://data.microbiomedata.org/data/nmdc:mga026tn70/annotation/nmdc_mga026tn70_structural_annotation.gff", - "md5_checksum": "4eeee677df10364f622a0d4789522c69", - "id": "nmdc:4eeee677df10364f622a0d4789522c69", - "file_size_bytes": 2533 - }, - { - "name": "Gp0115678_Functional annotation GFF file", - "description": "Functional annotation GFF file for Gp0115678", - "data_object_type": "Functional Annotation GFF", - "url": "https://data.microbiomedata.org/data/nmdc:mga026tn70/annotation/nmdc_mga026tn70_functional_annotation.gff", - "md5_checksum": "351ff91eddf2bc89acbdf04eab68aef1", - "id": "nmdc:351ff91eddf2bc89acbdf04eab68aef1", - "file_size_bytes": 118933051 - }, - { - "name": "Gp0115678_KO TSV file", - "description": "KO TSV file for Gp0115678", - "data_object_type": "Annotation KEGG Orthology", - "url": "https://data.microbiomedata.org/data/nmdc:mga026tn70/annotation/nmdc_mga026tn70_ko.tsv", - "md5_checksum": "64b9d934918b78de80f1cf80a013557f", - "id": "nmdc:64b9d934918b78de80f1cf80a013557f", - "file_size_bytes": 12839157 - }, - { - "name": "Gp0115678_EC TSV file", - "description": "EC TSV file for Gp0115678", - "data_object_type": "Annotation Enzyme Commission", - "url": "https://data.microbiomedata.org/data/nmdc:mga026tn70/annotation/nmdc_mga026tn70_ec.tsv", - "md5_checksum": "903f2015c41660ae53e16bfc369d566a", - "id": "nmdc:903f2015c41660ae53e16bfc369d566a", - "file_size_bytes": 8227424 - }, - { - "name": "Gp0115678_COG GFF file", - "description": "COG GFF file for Gp0115678", - "url": "https://data.microbiomedata.org/data/nmdc:mga026tn70/annotation/nmdc_mga026tn70_cog.gff", - "md5_checksum": "bf72ad74b2375abe730ecf7dc50b1557", - "id": "nmdc:bf72ad74b2375abe730ecf7dc50b1557", - "file_size_bytes": 57084923 - }, - { - "name": "Gp0115678_PFAM GFF file", - "description": "PFAM GFF file for Gp0115678", - "url": "https://data.microbiomedata.org/data/nmdc:mga026tn70/annotation/nmdc_mga026tn70_pfam.gff", - "md5_checksum": "92f4707b0b022c217463f76d229dd3cb", - "id": "nmdc:92f4707b0b022c217463f76d229dd3cb", - "file_size_bytes": 46625196 - }, - { - "name": "Gp0115678_TigrFam GFF file", - "description": "TigrFam GFF file for Gp0115678", - "url": "https://data.microbiomedata.org/data/nmdc:mga026tn70/annotation/nmdc_mga026tn70_tigrfam.gff", - "md5_checksum": "4f6f494c878aeff4308f2de2b2682ea6", - "id": "nmdc:4f6f494c878aeff4308f2de2b2682ea6", - "file_size_bytes": 5472483 - }, - { - "name": "Gp0115678_SMART GFF file", - "description": "SMART GFF file for Gp0115678", - "url": "https://data.microbiomedata.org/data/nmdc:mga026tn70/annotation/nmdc_mga026tn70_smart.gff", - "md5_checksum": "c44ff7df84f2b777b7fee22f7d28e205", - "id": "nmdc:c44ff7df84f2b777b7fee22f7d28e205", - "file_size_bytes": 18005129 - }, - { - "name": "Gp0115678_SuperFam GFF file", - "description": "SuperFam GFF file for Gp0115678", - "url": "https://data.microbiomedata.org/data/nmdc:mga026tn70/annotation/nmdc_mga026tn70_supfam.gff", - "md5_checksum": "b4fad8c887bc33c67a3316475ccc3572", - "id": "nmdc:b4fad8c887bc33c67a3316475ccc3572", - "file_size_bytes": 80713018 - }, - { - "name": "Gp0115678_Cath FunFam GFF file", - "description": "Cath FunFam GFF file for Gp0115678", - "url": "https://data.microbiomedata.org/data/nmdc:mga026tn70/annotation/nmdc_mga026tn70_cath_funfam.gff", - "md5_checksum": "4a3d00839e3067973b06771a31bbae93", - "id": "nmdc:4a3d00839e3067973b06771a31bbae93", - "file_size_bytes": 66327975 - }, - { - "name": "Gp0115678_KO_EC GFF file", - "description": "KO_EC GFF file for Gp0115678", - "url": "https://data.microbiomedata.org/data/nmdc:mga026tn70/annotation/nmdc_mga026tn70_ko_ec.gff", - "md5_checksum": "f01768e30cdd8f7650f631883d1c5d23", - "id": "nmdc:f01768e30cdd8f7650f631883d1c5d23", - "file_size_bytes": 40908900 - } - ] - }, + "name": "ReadBased Analysis Activity for nmdc:mga0dm3v04", + "started_at_time": "2021-10-11T02:28:16Z", + "type": "nmdc:ReadbasedAnalysis", + "ended_at_time": "2021-10-11T03:33:34+00:00" + } + ], + "study_set": [], + "field_research_site_set": [], + "collecting_biosamples_from_site_set": [], + "date_created": null, + "etl_software_version": null, + "pooling_set": [], + "read_based_analysis_activity_set": [ { "_id": { - "$oid": "649b0052ec087f6bbab3472b" + "$oid": "61e719dd833bcf838a70154e" }, "has_input": [ - "nmdc:d305e212cce8f84f14561d3957c968b1", - "nmdc:1c63639a894aa686e77e57787fcafbc6", - "nmdc:351ff91eddf2bc89acbdf04eab68aef1" + "nmdc:232e31505b6a0251df2303c0563d64c1" ], - "too_short_contig_num": 362617, "part_of": [ - "nmdc:mga026tn70" + "nmdc:mga0dm3v04" ], - "binned_contig_num": 2089, "git_url": "https://github.com/microbiomedata/metaG/releases/tag/0.1", "has_output": [ - "nmdc:d41d8cd98f00b204e9800998ecf8427e", - "nmdc:cf2d0eb0281d2822373d4e7d25c8d1e6", - "nmdc:85defe7977c263b8fba3f31f89f101f9", - "nmdc:19a6a8410cece1118a06763023cc1313", - "nmdc:54ed3f096ca7eacec9e5078ca45a6530", - "nmdc:8493c05e428d90f8893e4c58755b2e95" + "nmdc:9d61d9f0c31a98f88ad8cde86254148d", + "nmdc:7f93f97242aed036019f13492f5af35c", + "nmdc:b4d0179bcc68b5186a3544d9ee0c6941", + "nmdc:a4243f71a0288f489c566ae85d85891d", + "nmdc:f8b6ef830b94c6470056a3cd0a0eafc1", + "nmdc:a80779b32415ef001d0403f0b618b612", + "nmdc:01581429336a43d7dc2f85b8d49d6c6e", + "nmdc:ce47d6686edb7b3472102d5883229c45", + "nmdc:29b75e78b0b7fd8115614d8e9d341d46" ], - "was_informed_by": "gold:Gp0115678", - "input_contig_num": 383711, - "id": "nmdc:88c0fc9b5b0bb0cd814448bbfba0da6a", + "was_informed_by": "gold:Gp0115664", + "id": "nmdc:10c6e87f449fdc27c6b8bfbc9e25d6a3", "execution_resource": "NERSC-Cori", - "name": "MAGs Analysis Activity for nmdc:mga026tn70", - "mags_list": [ - { - "number_of_contig": 5, - "completeness": 0.31, - "bin_name": "bins.1", - "gene_count": 264, - "bin_quality": "LQ", - "gtdbtk_species": "", - "gtdbtk_order": "", - "num_16s": 0, - "gtdbtk_family": "", - "gtdbtk_domain": "", - "contamination": 0.0, - "gtdbtk_class": "", - "gtdbtk_phylum": "", - "num_5s": 0, - "num_23s": 0, - "gtdbtk_genus": "", - "num_t_rna": 0 - }, - { - "number_of_contig": 231, - "completeness": 50.86, - "bin_name": "bins.2", - "gene_count": 1187, - "bin_quality": "MQ", - "gtdbtk_species": "", - "gtdbtk_order": "Pseudomonadales", - "num_16s": 0, - "gtdbtk_family": "UBA3067", - "gtdbtk_domain": "Bacteria", + "name": "ReadBased Analysis Activity for nmdc:mga0dm3v04", + "started_at_time": "2021-10-11T02:28:16Z", + "type": "nmdc:ReadbasedAnalysis", + "ended_at_time": "2021-10-11T03:33:34+00:00" + } + ] + }, + { + "functional_annotation_agg": [], + "library_preparation_set": [], + "processed_sample_set": [], + "extraction_set": [], + "activity_set": [], + "biosample_set": [], + "data_object_set": [ + { + "name": "Gp0115678_Filtered Reads", + "description": "Filtered Reads for Gp0115678", + "data_object_type": "Filtered Sequencing Reads", + "url": "https://data.microbiomedata.org/data/nmdc:mga026tn70/qa/nmdc_mga026tn70_filtered.fastq.gz", + "md5_checksum": "e0ce93b88419f87568ff206e0efe3a24", + "id": "nmdc:e0ce93b88419f87568ff206e0efe3a24", + "file_size_bytes": 4090026888 + }, + { + "name": "Gp0115678_Filtered Stats", + "description": "Filtered Stats for Gp0115678", + "data_object_type": "QC Statistics", + "url": "https://data.microbiomedata.org/data/nmdc:mga026tn70/qa/nmdc_mga026tn70_filterStats.txt", + "md5_checksum": "7bf8ff4cf0d98cccd8e1c20f77dd1690", + "id": "nmdc:7bf8ff4cf0d98cccd8e1c20f77dd1690", + "file_size_bytes": 292 + }, + { + "name": "Gp0115678_Gottcha2 TSV report", + "description": "Gottcha2 TSV report for Gp0115678", + "url": "https://data.microbiomedata.org/data/nmdc:mga026tn70/ReadbasedAnalysis/nmdc_mga026tn70_gottcha2_report.tsv", + "md5_checksum": "05bab80e2ff02d160b8e808f056ee2b5", + "id": "nmdc:05bab80e2ff02d160b8e808f056ee2b5", + "file_size_bytes": 19085 + }, + { + "name": "Gp0115678_Gottcha2 full TSV report", + "description": "Gottcha2 full TSV report for Gp0115678", + "url": "https://data.microbiomedata.org/data/nmdc:mga026tn70/ReadbasedAnalysis/nmdc_mga026tn70_gottcha2_report_full.tsv", + "md5_checksum": "12b2d6afc355bce76249d750a9fab534", + "id": "nmdc:12b2d6afc355bce76249d750a9fab534", + "file_size_bytes": 1243929 + }, + { + "name": "Gp0115678_Gottcha2 Krona HTML report", + "description": "Gottcha2 Krona HTML report for Gp0115678", + "data_object_type": "GOTTCHA2 Krona Plot", + "url": "https://data.microbiomedata.org/data/nmdc:mga026tn70/ReadbasedAnalysis/nmdc_mga026tn70_gottcha2_krona.html", + "md5_checksum": "18214017d56658a48723c9c998dcba7e", + "id": "nmdc:18214017d56658a48723c9c998dcba7e", + "file_size_bytes": 281148 + }, + { + "name": "Gp0115678_Centrifuge classification TSV report", + "description": "Centrifuge classification TSV report for Gp0115678", + "data_object_type": "Centrifuge Taxonomic Classification", + "url": "https://data.microbiomedata.org/data/nmdc:mga026tn70/ReadbasedAnalysis/nmdc_mga026tn70_centrifuge_classification.tsv", + "md5_checksum": "99ef009c73c128e561a4b9dcb70d7ff2", + "id": "nmdc:99ef009c73c128e561a4b9dcb70d7ff2", + "file_size_bytes": 3491726958 + }, + { + "name": "Gp0115678_Centrifuge TSV report", + "description": "Centrifuge TSV report for Gp0115678", + "data_object_type": "Centrifuge Classification Report", + "url": "https://data.microbiomedata.org/data/nmdc:mga026tn70/ReadbasedAnalysis/nmdc_mga026tn70_centrifuge_report.tsv", + "md5_checksum": "78dab6988b57c654462ef3dbeb64d8d6", + "id": "nmdc:78dab6988b57c654462ef3dbeb64d8d6", + "file_size_bytes": 264123 + }, + { + "name": "Gp0115678_Centrifuge Krona HTML report", + "description": "Centrifuge Krona HTML report for Gp0115678", + "data_object_type": "Centrifuge Krona Plot", + "url": "https://data.microbiomedata.org/data/nmdc:mga026tn70/ReadbasedAnalysis/nmdc_mga026tn70_centrifuge_krona.html", + "md5_checksum": "f9c01985f057825149d35de0650095a8", + "id": "nmdc:f9c01985f057825149d35de0650095a8", + "file_size_bytes": 2352347 + }, + { + "name": "Gp0115678_Kraken classification TSV report", + "description": "Kraken classification TSV report for Gp0115678", + "data_object_type": "Kraken2 Taxonomic Classification", + "url": "https://data.microbiomedata.org/data/nmdc:mga026tn70/ReadbasedAnalysis/nmdc_mga026tn70_kraken2_classification.tsv", + "md5_checksum": "bcea8bbe63625ad0f3142abe69a4a11d", + "id": "nmdc:bcea8bbe63625ad0f3142abe69a4a11d", + "file_size_bytes": 2880889483 + }, + { + "name": "Gp0115678_Kraken2 TSV report", + "description": "Kraken2 TSV report for Gp0115678", + "data_object_type": "Kraken2 Classification Report", + "url": "https://data.microbiomedata.org/data/nmdc:mga026tn70/ReadbasedAnalysis/nmdc_mga026tn70_kraken2_report.tsv", + "md5_checksum": "054c3097c9682bc9a6e07f88fdecc0ee", + "id": "nmdc:054c3097c9682bc9a6e07f88fdecc0ee", + "file_size_bytes": 735519 + }, + { + "name": "Gp0115678_Kraken2 Krona HTML report", + "description": "Kraken2 Krona HTML report for Gp0115678", + "data_object_type": "Kraken2 Krona Plot", + "url": "https://data.microbiomedata.org/data/nmdc:mga026tn70/ReadbasedAnalysis/nmdc_mga026tn70_kraken2_krona.html", + "md5_checksum": "38d41d4299141abe28bf0405af80cdfc", + "id": "nmdc:38d41d4299141abe28bf0405af80cdfc", + "file_size_bytes": 4410156 + }, + { + "name": "Gp0115678_Gottcha2 TSV report", + "description": "Gottcha2 TSV report for Gp0115678", + "url": "https://data.microbiomedata.org/data/nmdc:mga026tn70/ReadbasedAnalysis/nmdc_mga026tn70_gottcha2_report.tsv", + "md5_checksum": "05bab80e2ff02d160b8e808f056ee2b5", + "id": "nmdc:05bab80e2ff02d160b8e808f056ee2b5", + "file_size_bytes": 19085 + }, + { + "name": "Gp0115678_Gottcha2 full TSV report", + "description": "Gottcha2 full TSV report for Gp0115678", + "url": "https://data.microbiomedata.org/data/nmdc:mga026tn70/ReadbasedAnalysis/nmdc_mga026tn70_gottcha2_report_full.tsv", + "md5_checksum": "12b2d6afc355bce76249d750a9fab534", + "id": "nmdc:12b2d6afc355bce76249d750a9fab534", + "file_size_bytes": 1243929 + }, + { + "name": "Gp0115678_Gottcha2 Krona HTML report", + "description": "Gottcha2 Krona HTML report for Gp0115678", + "data_object_type": "GOTTCHA2 Krona Plot", + "url": "https://data.microbiomedata.org/data/nmdc:mga026tn70/ReadbasedAnalysis/nmdc_mga026tn70_gottcha2_krona.html", + "md5_checksum": "18214017d56658a48723c9c998dcba7e", + "id": "nmdc:18214017d56658a48723c9c998dcba7e", + "file_size_bytes": 281148 + }, + { + "name": "Gp0115678_Centrifuge classification TSV report", + "description": "Centrifuge classification TSV report for Gp0115678", + "data_object_type": "Centrifuge Taxonomic Classification", + "url": "https://data.microbiomedata.org/data/nmdc:mga026tn70/ReadbasedAnalysis/nmdc_mga026tn70_centrifuge_classification.tsv", + "md5_checksum": "99ef009c73c128e561a4b9dcb70d7ff2", + "id": "nmdc:99ef009c73c128e561a4b9dcb70d7ff2", + "file_size_bytes": 3491726958 + }, + { + "name": "Gp0115678_Centrifuge TSV report", + "description": "Centrifuge TSV report for Gp0115678", + "data_object_type": "Centrifuge Classification Report", + "url": "https://data.microbiomedata.org/data/nmdc:mga026tn70/ReadbasedAnalysis/nmdc_mga026tn70_centrifuge_report.tsv", + "md5_checksum": "78dab6988b57c654462ef3dbeb64d8d6", + "id": "nmdc:78dab6988b57c654462ef3dbeb64d8d6", + "file_size_bytes": 264123 + }, + { + "name": "Gp0115678_Centrifuge Krona HTML report", + "description": "Centrifuge Krona HTML report for Gp0115678", + "data_object_type": "Centrifuge Krona Plot", + "url": "https://data.microbiomedata.org/data/nmdc:mga026tn70/ReadbasedAnalysis/nmdc_mga026tn70_centrifuge_krona.html", + "md5_checksum": "f9c01985f057825149d35de0650095a8", + "id": "nmdc:f9c01985f057825149d35de0650095a8", + "file_size_bytes": 2352347 + }, + { + "name": "Gp0115678_Kraken classification TSV report", + "description": "Kraken classification TSV report for Gp0115678", + "data_object_type": "Kraken2 Taxonomic Classification", + "url": "https://data.microbiomedata.org/data/nmdc:mga026tn70/ReadbasedAnalysis/nmdc_mga026tn70_kraken2_classification.tsv", + "md5_checksum": "bcea8bbe63625ad0f3142abe69a4a11d", + "id": "nmdc:bcea8bbe63625ad0f3142abe69a4a11d", + "file_size_bytes": 2880889483 + }, + { + "name": "Gp0115678_Kraken2 TSV report", + "description": "Kraken2 TSV report for Gp0115678", + "data_object_type": "Kraken2 Classification Report", + "url": "https://data.microbiomedata.org/data/nmdc:mga026tn70/ReadbasedAnalysis/nmdc_mga026tn70_kraken2_report.tsv", + "md5_checksum": "054c3097c9682bc9a6e07f88fdecc0ee", + "id": "nmdc:054c3097c9682bc9a6e07f88fdecc0ee", + "file_size_bytes": 735519 + }, + { + "name": "Gp0115678_Kraken2 Krona HTML report", + "description": "Kraken2 Krona HTML report for Gp0115678", + "data_object_type": "Kraken2 Krona Plot", + "url": "https://data.microbiomedata.org/data/nmdc:mga026tn70/ReadbasedAnalysis/nmdc_mga026tn70_kraken2_krona.html", + "md5_checksum": "38d41d4299141abe28bf0405af80cdfc", + "id": "nmdc:38d41d4299141abe28bf0405af80cdfc", + "file_size_bytes": 4410156 + }, + { + "name": "Gp0115678_Assembled contigs fasta", + "description": "Assembled contigs fasta for Gp0115678", + "data_object_type": "Assembly Contigs", + "url": "https://data.microbiomedata.org/data/nmdc:mga026tn70/assembly/nmdc_mga026tn70_contigs.fna", + "md5_checksum": "d305e212cce8f84f14561d3957c968b1", + "id": "nmdc:d305e212cce8f84f14561d3957c968b1", + "file_size_bytes": 205441595 + }, + { + "name": "Gp0115678_Assembled scaffold fasta", + "description": "Assembled scaffold fasta for Gp0115678", + "data_object_type": "Assembly Scaffolds", + "url": "https://data.microbiomedata.org/data/nmdc:mga026tn70/assembly/nmdc_mga026tn70_scaffolds.fna", + "md5_checksum": "fb12da7c2d6d1f9d9c7a1511702758bb", + "id": "nmdc:fb12da7c2d6d1f9d9c7a1511702758bb", + "file_size_bytes": 204286677 + }, + { + "name": "Gp0115678_Metagenome Contig Coverage Stats", + "description": "Metagenome Contig Coverage Stats for Gp0115678", + "url": "https://data.microbiomedata.org/data/nmdc:mga026tn70/assembly/nmdc_mga026tn70_covstats.txt", + "md5_checksum": "444562a4e7108077b7e541a5d9064086", + "id": "nmdc:444562a4e7108077b7e541a5d9064086", + "file_size_bytes": 30470067 + }, + { + "name": "Gp0115678_Assembled AGP file", + "description": "Assembled AGP file for Gp0115678", + "data_object_type": "Assembly AGP", + "url": "https://data.microbiomedata.org/data/nmdc:mga026tn70/assembly/nmdc_mga026tn70_assembly.agp", + "md5_checksum": "6c400425b7188b24ac49533d9ce0d43b", + "id": "nmdc:6c400425b7188b24ac49533d9ce0d43b", + "file_size_bytes": 28619270 + }, + { + "name": "Gp0115678_Metagenome Alignment BAM file", + "description": "Metagenome Alignment BAM file for Gp0115678", + "data_object_type": "Assembly Coverage BAM", + "url": "https://data.microbiomedata.org/data/nmdc:mga026tn70/assembly/nmdc_mga026tn70_pairedMapped_sorted.bam", + "md5_checksum": "1c63639a894aa686e77e57787fcafbc6", + "id": "nmdc:1c63639a894aa686e77e57787fcafbc6", + "file_size_bytes": 4471336607 + }, + { + "name": "Gp0115678_Protein FAA", + "description": "Protein FAA for Gp0115678", + "data_object_type": "Annotation Amino Acid FASTA", + "url": "https://data.microbiomedata.org/data/nmdc:mga026tn70/annotation/nmdc_mga026tn70_proteins.faa", + "md5_checksum": "ecfb1a4d469d9f95a91c8a3a3d5475af", + "id": "nmdc:ecfb1a4d469d9f95a91c8a3a3d5475af", + "file_size_bytes": 109377096 + }, + { + "name": "Gp0115678_Structural annotation GFF file", + "description": "Structural annotation GFF file for Gp0115678", + "data_object_type": "Structural Annotation GFF", + "url": "https://data.microbiomedata.org/data/nmdc:mga026tn70/annotation/nmdc_mga026tn70_structural_annotation.gff", + "md5_checksum": "4eeee677df10364f622a0d4789522c69", + "id": "nmdc:4eeee677df10364f622a0d4789522c69", + "file_size_bytes": 2533 + }, + { + "name": "Gp0115678_Functional annotation GFF file", + "description": "Functional annotation GFF file for Gp0115678", + "data_object_type": "Functional Annotation GFF", + "url": "https://data.microbiomedata.org/data/nmdc:mga026tn70/annotation/nmdc_mga026tn70_functional_annotation.gff", + "md5_checksum": "351ff91eddf2bc89acbdf04eab68aef1", + "id": "nmdc:351ff91eddf2bc89acbdf04eab68aef1", + "file_size_bytes": 118933051 + }, + { + "name": "Gp0115678_KO TSV file", + "description": "KO TSV file for Gp0115678", + "data_object_type": "Annotation KEGG Orthology", + "url": "https://data.microbiomedata.org/data/nmdc:mga026tn70/annotation/nmdc_mga026tn70_ko.tsv", + "md5_checksum": "64b9d934918b78de80f1cf80a013557f", + "id": "nmdc:64b9d934918b78de80f1cf80a013557f", + "file_size_bytes": 12839157 + }, + { + "name": "Gp0115678_EC TSV file", + "description": "EC TSV file for Gp0115678", + "data_object_type": "Annotation Enzyme Commission", + "url": "https://data.microbiomedata.org/data/nmdc:mga026tn70/annotation/nmdc_mga026tn70_ec.tsv", + "md5_checksum": "903f2015c41660ae53e16bfc369d566a", + "id": "nmdc:903f2015c41660ae53e16bfc369d566a", + "file_size_bytes": 8227424 + }, + { + "name": "Gp0115678_COG GFF file", + "description": "COG GFF file for Gp0115678", + "url": "https://data.microbiomedata.org/data/nmdc:mga026tn70/annotation/nmdc_mga026tn70_cog.gff", + "md5_checksum": "bf72ad74b2375abe730ecf7dc50b1557", + "id": "nmdc:bf72ad74b2375abe730ecf7dc50b1557", + "file_size_bytes": 57084923 + }, + { + "name": "Gp0115678_PFAM GFF file", + "description": "PFAM GFF file for Gp0115678", + "url": "https://data.microbiomedata.org/data/nmdc:mga026tn70/annotation/nmdc_mga026tn70_pfam.gff", + "md5_checksum": "92f4707b0b022c217463f76d229dd3cb", + "id": "nmdc:92f4707b0b022c217463f76d229dd3cb", + "file_size_bytes": 46625196 + }, + { + "name": "Gp0115678_TigrFam GFF file", + "description": "TigrFam GFF file for Gp0115678", + "url": "https://data.microbiomedata.org/data/nmdc:mga026tn70/annotation/nmdc_mga026tn70_tigrfam.gff", + "md5_checksum": "4f6f494c878aeff4308f2de2b2682ea6", + "id": "nmdc:4f6f494c878aeff4308f2de2b2682ea6", + "file_size_bytes": 5472483 + }, + { + "name": "Gp0115678_SMART GFF file", + "description": "SMART GFF file for Gp0115678", + "url": "https://data.microbiomedata.org/data/nmdc:mga026tn70/annotation/nmdc_mga026tn70_smart.gff", + "md5_checksum": "c44ff7df84f2b777b7fee22f7d28e205", + "id": "nmdc:c44ff7df84f2b777b7fee22f7d28e205", + "file_size_bytes": 18005129 + }, + { + "name": "Gp0115678_SuperFam GFF file", + "description": "SuperFam GFF file for Gp0115678", + "url": "https://data.microbiomedata.org/data/nmdc:mga026tn70/annotation/nmdc_mga026tn70_supfam.gff", + "md5_checksum": "b4fad8c887bc33c67a3316475ccc3572", + "id": "nmdc:b4fad8c887bc33c67a3316475ccc3572", + "file_size_bytes": 80713018 + }, + { + "name": "Gp0115678_Cath FunFam GFF file", + "description": "Cath FunFam GFF file for Gp0115678", + "url": "https://data.microbiomedata.org/data/nmdc:mga026tn70/annotation/nmdc_mga026tn70_cath_funfam.gff", + "md5_checksum": "4a3d00839e3067973b06771a31bbae93", + "id": "nmdc:4a3d00839e3067973b06771a31bbae93", + "file_size_bytes": 66327975 + }, + { + "name": "Gp0115678_KO_EC GFF file", + "description": "KO_EC GFF file for Gp0115678", + "url": "https://data.microbiomedata.org/data/nmdc:mga026tn70/annotation/nmdc_mga026tn70_ko_ec.gff", + "md5_checksum": "f01768e30cdd8f7650f631883d1c5d23", + "id": "nmdc:f01768e30cdd8f7650f631883d1c5d23", + "file_size_bytes": 40908900 + }, + { + "name": "gold:Gp0452679_metabat2 bin checkm quality assessment result", + "description": "metabat2 bin checkm quality assessment result for gold:Gp0452679", + "data_object_type": "CheckM Statistics", + "url": "https://data.microbiomedata.org/data/nmdc:mga0fsjy84/MAGs/nmdc_mga0fsjy84_checkm_qa.out", + "md5_checksum": "d41d8cd98f00b204e9800998ecf8427e", + "id": "nmdc:d41d8cd98f00b204e9800998ecf8427e", + "file_size_bytes": 0 + }, + { + "name": "Gp0115678_tooShort (< 3kb) filtered contigs fasta file by metaBat2", + "description": "tooShort (< 3kb) filtered contigs fasta file by metaBat2 for Gp0115678", + "url": "https://data.microbiomedata.org/data/nmdc:mga026tn70/MAGs/nmdc_mga026tn70_bins.tooShort.fa", + "md5_checksum": "cf2d0eb0281d2822373d4e7d25c8d1e6", + "id": "nmdc:cf2d0eb0281d2822373d4e7d25c8d1e6", + "file_size_bytes": 160811096 + }, + { + "name": "Gp0115678_unbinned fasta file from metabat2", + "description": "unbinned fasta file from metabat2 for Gp0115678", + "url": "https://data.microbiomedata.org/data/nmdc:mga026tn70/MAGs/nmdc_mga026tn70_bins.unbinned.fa", + "md5_checksum": "85defe7977c263b8fba3f31f89f101f9", + "id": "nmdc:85defe7977c263b8fba3f31f89f101f9", + "file_size_bytes": 31022166 + }, + { + "name": "Gp0115678_metabat2 bin checkm quality assessment result", + "description": "metabat2 bin checkm quality assessment result for Gp0115678", + "data_object_type": "CheckM Statistics", + "url": "https://data.microbiomedata.org/data/nmdc:mga026tn70/MAGs/nmdc_mga026tn70_checkm_qa.out", + "md5_checksum": "19a6a8410cece1118a06763023cc1313", + "id": "nmdc:19a6a8410cece1118a06763023cc1313", + "file_size_bytes": 1690 + }, + { + "name": "Gp0115678_high-quality and medium-quality bins", + "description": "high-quality and medium-quality bins for Gp0115678", + "data_object_type": "Metagenome Bins", + "url": "https://data.microbiomedata.org/data/nmdc:mga026tn70/MAGs/nmdc_mga026tn70_hqmq_bin.zip", + "md5_checksum": "54ed3f096ca7eacec9e5078ca45a6530", + "id": "nmdc:54ed3f096ca7eacec9e5078ca45a6530", + "file_size_bytes": 4026276 + }, + { + "name": "Gp0115678_metabat2 bins", + "description": "metabat2 bins for Gp0115678", + "url": "https://data.microbiomedata.org/data/nmdc:mga026tn70/MAGs/nmdc_mga026tn70_metabat_bin.zip", + "md5_checksum": "8493c05e428d90f8893e4c58755b2e95", + "id": "nmdc:8493c05e428d90f8893e4c58755b2e95", + "file_size_bytes": 72078 + } + ], + "dissolving_activity_set": [], + "functional_annotation_set": [], + "genome_feature_set": [], + "mags_activity_set": [ + { + "_id": { + "$oid": "649b0052ec087f6bbab3472b" + }, + "has_input": [ + "nmdc:d305e212cce8f84f14561d3957c968b1", + "nmdc:1c63639a894aa686e77e57787fcafbc6", + "nmdc:351ff91eddf2bc89acbdf04eab68aef1" + ], + "too_short_contig_num": 362617, + "part_of": [ + "nmdc:mga026tn70" + ], + "binned_contig_num": 2089, + "git_url": "https://github.com/microbiomedata/metaG/releases/tag/0.1", + "has_output": [ + "nmdc:d41d8cd98f00b204e9800998ecf8427e", + "nmdc:cf2d0eb0281d2822373d4e7d25c8d1e6", + "nmdc:85defe7977c263b8fba3f31f89f101f9", + "nmdc:19a6a8410cece1118a06763023cc1313", + "nmdc:54ed3f096ca7eacec9e5078ca45a6530", + "nmdc:8493c05e428d90f8893e4c58755b2e95" + ], + "was_informed_by": "gold:Gp0115678", + "input_contig_num": 383711, + "id": "nmdc:88c0fc9b5b0bb0cd814448bbfba0da6a", + "execution_resource": "NERSC-Cori", + "name": "MAGs Analysis Activity for nmdc:mga026tn70", + "mags_list": [ + { + "number_of_contig": 5, + "completeness": 0.31, + "bin_name": "bins.1", + "gene_count": 264, + "bin_quality": "LQ", + "gtdbtk_species": "", + "gtdbtk_order": "", + "num_16s": 0, + "gtdbtk_family": "", + "gtdbtk_domain": "", + "contamination": 0.0, + "gtdbtk_class": "", + "gtdbtk_phylum": "", + "num_5s": 0, + "num_23s": 0, + "gtdbtk_genus": "", + "num_t_rna": 0 + }, + { + "number_of_contig": 231, + "completeness": 50.86, + "bin_name": "bins.2", + "gene_count": 1187, + "bin_quality": "MQ", + "gtdbtk_species": "", + "gtdbtk_order": "Pseudomonadales", + "num_16s": 0, + "gtdbtk_family": "UBA3067", + "gtdbtk_domain": "Bacteria", "contamination": 0.86, "gtdbtk_class": "Gammaproteobacteria", "gtdbtk_phylum": "Proteobacteria", @@ -4865,1300 +4882,1357 @@ "unbinned_contig_num": 19005, "started_at_time": "2021-10-11T02:23:30Z", "type": "nmdc:MAGsAnalysisActivity", - "ended_at_time": "2021-10-11T06:18:17+00:00", - "output_data_objects": [ - { - "name": "gold:Gp0452679_metabat2 bin checkm quality assessment result", - "description": "metabat2 bin checkm quality assessment result for gold:Gp0452679", - "data_object_type": "CheckM Statistics", - "url": "https://data.microbiomedata.org/data/nmdc:mga0fsjy84/MAGs/nmdc_mga0fsjy84_checkm_qa.out", - "md5_checksum": "d41d8cd98f00b204e9800998ecf8427e", - "id": "nmdc:d41d8cd98f00b204e9800998ecf8427e", - "file_size_bytes": 0 - }, - { - "name": "Gp0115678_tooShort (< 3kb) filtered contigs fasta file by metaBat2", - "description": "tooShort (< 3kb) filtered contigs fasta file by metaBat2 for Gp0115678", - "url": "https://data.microbiomedata.org/data/nmdc:mga026tn70/MAGs/nmdc_mga026tn70_bins.tooShort.fa", - "md5_checksum": "cf2d0eb0281d2822373d4e7d25c8d1e6", - "id": "nmdc:cf2d0eb0281d2822373d4e7d25c8d1e6", - "file_size_bytes": 160811096 - }, - { - "name": "Gp0115678_unbinned fasta file from metabat2", - "description": "unbinned fasta file from metabat2 for Gp0115678", - "url": "https://data.microbiomedata.org/data/nmdc:mga026tn70/MAGs/nmdc_mga026tn70_bins.unbinned.fa", - "md5_checksum": "85defe7977c263b8fba3f31f89f101f9", - "id": "nmdc:85defe7977c263b8fba3f31f89f101f9", - "file_size_bytes": 31022166 - }, - { - "name": "Gp0115678_metabat2 bin checkm quality assessment result", - "description": "metabat2 bin checkm quality assessment result for Gp0115678", - "data_object_type": "CheckM Statistics", - "url": "https://data.microbiomedata.org/data/nmdc:mga026tn70/MAGs/nmdc_mga026tn70_checkm_qa.out", - "md5_checksum": "19a6a8410cece1118a06763023cc1313", - "id": "nmdc:19a6a8410cece1118a06763023cc1313", - "file_size_bytes": 1690 - }, - { - "name": "Gp0115678_high-quality and medium-quality bins", - "description": "high-quality and medium-quality bins for Gp0115678", - "data_object_type": "Metagenome Bins", - "url": "https://data.microbiomedata.org/data/nmdc:mga026tn70/MAGs/nmdc_mga026tn70_hqmq_bin.zip", - "md5_checksum": "54ed3f096ca7eacec9e5078ca45a6530", - "id": "nmdc:54ed3f096ca7eacec9e5078ca45a6530", - "file_size_bytes": 4026276 - }, - { - "name": "Gp0115678_metabat2 bins", - "description": "metabat2 bins for Gp0115678", - "url": "https://data.microbiomedata.org/data/nmdc:mga026tn70/MAGs/nmdc_mga026tn70_metabat_bin.zip", - "md5_checksum": "8493c05e428d90f8893e4c58755b2e95", - "id": "nmdc:8493c05e428d90f8893e4c58755b2e95", - "file_size_bytes": 72078 - } - ] + "ended_at_time": "2021-10-11T06:18:17+00:00" } - ] - }, - { - "_id": { - "$oid": "649b009773e8249959349b3a" - }, - "id": "nmdc:omprc-11-5r54nt37", - "name": "Riverbed sediment microbial communities from areas with vegetation nearby in Columbia River, Washington, USA - GW-RW S2_20_30", - "description": "Riverbed sediment samples were collected from an area with a lot of surrounding vegetation in a hyporheic zone (subsurface groundwater - surface water mixing zone)", - "has_input": [ - "nmdc:bsm-11-r7ggfc16" - ], - "has_output": [ - "jgi:574fde547ded5e3df1ee13fa" - ], - "part_of": [ - "nmdc:sty-11-aygzgv51" - ], - "add_date": "2016-01-11", - "mod_date": "2021-06-15", - "ncbi_project_name": "Riverbed sediment microbial communities from areas with vegetation nearby in Columbia River, Washington, USA - GW-RW S2_20_30", - "omics_type": { - "has_raw_value": "Metagenome" - }, - "principal_investigator": { - "has_raw_value": "James Stegen" - }, - "processing_institution": "JGI", - "type": "nmdc:OmicsProcessing", - "gold_sequencing_project_identifiers": [ - "gold:Gp0127623" - ], - "downstream_workflow_activity_records": [ + ], + "material_sample_set": [], + "material_sampling_activity_set": [], + "metabolomics_analysis_activity_set": [], + "metagenome_annotation_activity_set": [ { "_id": { - "$oid": "649b009d6bdd4fd20273c85a" + "$oid": "649b005bbf2caae0415ef9c8" }, "has_input": [ - "nmdc:14766bc431808b2a29c03beecb66bbac" + "nmdc:d305e212cce8f84f14561d3957c968b1" ], "part_of": [ - "nmdc:mga03eyz63" + "nmdc:mga026tn70" ], "git_url": "https://github.com/microbiomedata/metaG/releases/tag/0.1", "has_output": [ - "nmdc:6a8409b21c45ba9feba873ec269c8ff7", - "nmdc:61fb06de10fe3a0c49c5afe14ab7fb32" + "nmdc:ecfb1a4d469d9f95a91c8a3a3d5475af", + "nmdc:4eeee677df10364f622a0d4789522c69", + "nmdc:351ff91eddf2bc89acbdf04eab68aef1", + "nmdc:64b9d934918b78de80f1cf80a013557f", + "nmdc:903f2015c41660ae53e16bfc369d566a", + "nmdc:bf72ad74b2375abe730ecf7dc50b1557", + "nmdc:92f4707b0b022c217463f76d229dd3cb", + "nmdc:4f6f494c878aeff4308f2de2b2682ea6", + "nmdc:c44ff7df84f2b777b7fee22f7d28e205", + "nmdc:b4fad8c887bc33c67a3316475ccc3572", + "nmdc:4a3d00839e3067973b06771a31bbae93", + "nmdc:f01768e30cdd8f7650f631883d1c5d23" ], - "was_informed_by": "gold:Gp0127623", - "input_read_count": 23705118, - "output_read_bases": 3409425046, - "id": "nmdc:e05db57d44a39f083df9a1803551b79b", + "was_informed_by": "gold:Gp0115678", + "id": "nmdc:88c0fc9b5b0bb0cd814448bbfba0da6a", "execution_resource": "NERSC-Cori", - "input_read_bases": 3579472818, - "name": "Read QC Activity for nmdc:mga03eyz63", - "output_read_count": 22801896, + "name": "Annotation Activity for nmdc:mga026tn70", "started_at_time": "2021-10-11T02:23:30Z", - "type": "nmdc:ReadQCAnalysisActivity", - "ended_at_time": "2021-10-11T02:42:25+00:00", - "output_data_objects": [ - { - "name": "Gp0127623_Filtered Reads", - "description": "Filtered Reads for Gp0127623", - "data_object_type": "Filtered Sequencing Reads", - "url": "https://data.microbiomedata.org/data/nmdc:mga03eyz63/qa/nmdc_mga03eyz63_filtered.fastq.gz", - "md5_checksum": "6a8409b21c45ba9feba873ec269c8ff7", - "id": "nmdc:6a8409b21c45ba9feba873ec269c8ff7", - "file_size_bytes": 1917552858 - }, - { - "name": "Gp0127623_Filtered Stats", - "description": "Filtered Stats for Gp0127623", - "data_object_type": "QC Statistics", - "url": "https://data.microbiomedata.org/data/nmdc:mga03eyz63/qa/nmdc_mga03eyz63_filterStats.txt", - "md5_checksum": "61fb06de10fe3a0c49c5afe14ab7fb32", - "id": "nmdc:61fb06de10fe3a0c49c5afe14ab7fb32", - "file_size_bytes": 283 - } - ] - }, + "type": "nmdc:MetagenomeAnnotationActivity", + "ended_at_time": "2021-10-11T06:18:17+00:00" + } + ], + "metagenome_assembly_set": [ { "_id": { - "$oid": "649b009bff710ae353f8cf17" + "$oid": "649b005f2ca5ee4adb139fb5" }, "has_input": [ - "nmdc:6a8409b21c45ba9feba873ec269c8ff7" + "nmdc:e0ce93b88419f87568ff206e0efe3a24" + ], + "part_of": [ + "nmdc:mga026tn70" ], + "ctg_logsum": 494917, + "scaf_logsum": 496628, + "gap_pct": 0.00163, "git_url": "https://github.com/microbiomedata/metaG/releases/tag/0.1", "has_output": [ - "nmdc:ac39e916e17e08a845bb40d97519d8be", - "nmdc:c6fd5c573ef8605d9b43ff9c698af423", - "nmdc:eda0c04d692ecf137585676c15924626", - "nmdc:d9ea063be9ab8ea102c1e2ec2fa9f177", - "nmdc:e1f164c534830cd628d67c564ace863b", - "nmdc:a1062576d998b7b82e39b8d8520fa37e", - "nmdc:040e6ca695283a12711c16344acd1e76", - "nmdc:ed4ced0ccbe3f6b34c35bd842e882cad", - "nmdc:f2eed9669268f69dbc31f0c4f839fccf" + "nmdc:d305e212cce8f84f14561d3957c968b1", + "nmdc:fb12da7c2d6d1f9d9c7a1511702758bb", + "nmdc:444562a4e7108077b7e541a5d9064086", + "nmdc:6c400425b7188b24ac49533d9ce0d43b", + "nmdc:1c63639a894aa686e77e57787fcafbc6" ], - "was_informed_by": "gold:Gp0127623", - "id": "nmdc:e05db57d44a39f083df9a1803551b79b", + "asm_score": 7.785, + "was_informed_by": "gold:Gp0115678", + "ctg_powsum": 57423, + "scaf_max": 116556, + "id": "nmdc:88c0fc9b5b0bb0cd814448bbfba0da6a", + "scaf_powsum": 57689, "execution_resource": "NERSC-Cori", - "name": "ReadBased Analysis Activity for nmdc:mga03eyz63", + "contigs": 383712, + "name": "Assembly Activity for nmdc:mga026tn70", + "ctg_max": 116556, + "gc_std": 0.13426, + "contig_bp": 190310453, + "gc_avg": 0.48844, "started_at_time": "2021-10-11T02:23:30Z", - "type": "nmdc:ReadbasedAnalysis", - "ended_at_time": "2021-10-11T02:42:25+00:00", - "output_data_objects": [ - { - "name": "Gp0127623_Gottcha2 TSV report", - "description": "Gottcha2 TSV report for Gp0127623", - "url": "https://data.microbiomedata.org/data/nmdc:mga03eyz63/ReadbasedAnalysis/nmdc_mga03eyz63_gottcha2_report.tsv", - "md5_checksum": "ac39e916e17e08a845bb40d97519d8be", - "id": "nmdc:ac39e916e17e08a845bb40d97519d8be", - "file_size_bytes": 1553 - }, - { - "name": "Gp0127623_Gottcha2 full TSV report", - "description": "Gottcha2 full TSV report for Gp0127623", - "url": "https://data.microbiomedata.org/data/nmdc:mga03eyz63/ReadbasedAnalysis/nmdc_mga03eyz63_gottcha2_report_full.tsv", - "md5_checksum": "c6fd5c573ef8605d9b43ff9c698af423", - "id": "nmdc:c6fd5c573ef8605d9b43ff9c698af423", - "file_size_bytes": 836575 - }, - { - "name": "Gp0127623_Gottcha2 Krona HTML report", - "description": "Gottcha2 Krona HTML report for Gp0127623", - "data_object_type": "GOTTCHA2 Krona Plot", - "url": "https://data.microbiomedata.org/data/nmdc:mga03eyz63/ReadbasedAnalysis/nmdc_mga03eyz63_gottcha2_krona.html", - "md5_checksum": "eda0c04d692ecf137585676c15924626", - "id": "nmdc:eda0c04d692ecf137585676c15924626", - "file_size_bytes": 231097 - }, - { - "name": "Gp0127623_Centrifuge classification TSV report", - "description": "Centrifuge classification TSV report for Gp0127623", - "data_object_type": "Centrifuge Taxonomic Classification", - "url": "https://data.microbiomedata.org/data/nmdc:mga03eyz63/ReadbasedAnalysis/nmdc_mga03eyz63_centrifuge_classification.tsv", - "md5_checksum": "d9ea063be9ab8ea102c1e2ec2fa9f177", - "id": "nmdc:d9ea063be9ab8ea102c1e2ec2fa9f177", - "file_size_bytes": 1669254765 - }, - { - "name": "Gp0127623_Centrifuge TSV report", - "description": "Centrifuge TSV report for Gp0127623", - "data_object_type": "Centrifuge Classification Report", - "url": "https://data.microbiomedata.org/data/nmdc:mga03eyz63/ReadbasedAnalysis/nmdc_mga03eyz63_centrifuge_report.tsv", - "md5_checksum": "e1f164c534830cd628d67c564ace863b", - "id": "nmdc:e1f164c534830cd628d67c564ace863b", - "file_size_bytes": 255784 - }, - { - "name": "Gp0127623_Centrifuge Krona HTML report", - "description": "Centrifuge Krona HTML report for Gp0127623", - "data_object_type": "Centrifuge Krona Plot", - "url": "https://data.microbiomedata.org/data/nmdc:mga03eyz63/ReadbasedAnalysis/nmdc_mga03eyz63_centrifuge_krona.html", - "md5_checksum": "a1062576d998b7b82e39b8d8520fa37e", - "id": "nmdc:a1062576d998b7b82e39b8d8520fa37e", - "file_size_bytes": 2333760 - }, - { - "name": "Gp0127623_Kraken classification TSV report", - "description": "Kraken classification TSV report for Gp0127623", - "data_object_type": "Kraken2 Taxonomic Classification", - "url": "https://data.microbiomedata.org/data/nmdc:mga03eyz63/ReadbasedAnalysis/nmdc_mga03eyz63_kraken2_classification.tsv", - "md5_checksum": "040e6ca695283a12711c16344acd1e76", - "id": "nmdc:040e6ca695283a12711c16344acd1e76", - "file_size_bytes": 1335651191 - }, - { - "name": "Gp0127623_Kraken2 TSV report", - "description": "Kraken2 TSV report for Gp0127623", - "data_object_type": "Kraken2 Classification Report", - "url": "https://data.microbiomedata.org/data/nmdc:mga03eyz63/ReadbasedAnalysis/nmdc_mga03eyz63_kraken2_report.tsv", - "md5_checksum": "ed4ced0ccbe3f6b34c35bd842e882cad", - "id": "nmdc:ed4ced0ccbe3f6b34c35bd842e882cad", - "file_size_bytes": 647609 - }, - { - "name": "Gp0127623_Kraken2 Krona HTML report", - "description": "Kraken2 Krona HTML report for Gp0127623", - "data_object_type": "Kraken2 Krona Plot", - "url": "https://data.microbiomedata.org/data/nmdc:mga03eyz63/ReadbasedAnalysis/nmdc_mga03eyz63_kraken2_krona.html", - "md5_checksum": "f2eed9669268f69dbc31f0c4f839fccf", - "id": "nmdc:f2eed9669268f69dbc31f0c4f839fccf", - "file_size_bytes": 3949449 - } - ] - }, + "scaf_bp": 190313553, + "type": "nmdc:MetagenomeAssembly", + "scaffolds": 383447, + "ended_at_time": "2021-10-11T06:18:17+00:00", + "ctg_l50": 474, + "ctg_l90": 290, + "ctg_n50": 102228, + "ctg_n90": 321321, + "scaf_l50": 474, + "scaf_l90": 290, + "scaf_n50": 102177, + "scaf_n90": 321076, + "scaf_l_gt50k": 453691, + "scaf_n_gt50k": 6, + "scaf_pct_gt50k": 0.23839132 + } + ], + "metagenome_sequencing_activity_set": [], + "metaproteomics_analysis_activity_set": [], + "metatranscriptome_activity_set": [], + "nom_analysis_activity_set": [], + "omics_processing_set": [ { "_id": { - "$oid": "61e718fc833bcf838a6ff4c9" + "$oid": "649b009773e8249959349b39" }, + "id": "nmdc:omprc-11-7vsv7h78", + "name": "Sand microcosm microbial communities from a hyporheic zone in Columbia River, Washington, USA - GW-RW T4_30-Apr-14", + "description": "Sterilized sand packs were incubated back in the ground and collected at time point T4.", "has_input": [ - "nmdc:6a8409b21c45ba9feba873ec269c8ff7" - ], - "part_of": [ - "nmdc:mga03eyz63" + "nmdc:bsm-11-j0wbx741" ], - "git_url": "https://github.com/microbiomedata/metaG/releases/tag/0.1", "has_output": [ - "nmdc:ac39e916e17e08a845bb40d97519d8be", - "nmdc:c6fd5c573ef8605d9b43ff9c698af423", - "nmdc:eda0c04d692ecf137585676c15924626", - "nmdc:d9ea063be9ab8ea102c1e2ec2fa9f177", - "nmdc:e1f164c534830cd628d67c564ace863b", - "nmdc:a1062576d998b7b82e39b8d8520fa37e", - "nmdc:040e6ca695283a12711c16344acd1e76", - "nmdc:ed4ced0ccbe3f6b34c35bd842e882cad", - "nmdc:f2eed9669268f69dbc31f0c4f839fccf" + "jgi:55f23d790d8785306f96497e" ], - "was_informed_by": "gold:Gp0127623", - "id": "nmdc:e05db57d44a39f083df9a1803551b79b", - "execution_resource": "NERSC-Cori", - "name": "ReadBased Analysis Activity for nmdc:mga03eyz63", - "started_at_time": "2021-10-11T02:23:30Z", - "type": "nmdc:ReadbasedAnalysis", - "ended_at_time": "2021-10-11T02:42:25+00:00", - "output_data_objects": [ - { - "name": "Gp0127623_Gottcha2 TSV report", - "description": "Gottcha2 TSV report for Gp0127623", - "url": "https://data.microbiomedata.org/data/nmdc:mga03eyz63/ReadbasedAnalysis/nmdc_mga03eyz63_gottcha2_report.tsv", - "md5_checksum": "ac39e916e17e08a845bb40d97519d8be", - "id": "nmdc:ac39e916e17e08a845bb40d97519d8be", - "file_size_bytes": 1553 - }, - { - "name": "Gp0127623_Gottcha2 full TSV report", - "description": "Gottcha2 full TSV report for Gp0127623", - "url": "https://data.microbiomedata.org/data/nmdc:mga03eyz63/ReadbasedAnalysis/nmdc_mga03eyz63_gottcha2_report_full.tsv", - "md5_checksum": "c6fd5c573ef8605d9b43ff9c698af423", - "id": "nmdc:c6fd5c573ef8605d9b43ff9c698af423", - "file_size_bytes": 836575 - }, - { - "name": "Gp0127623_Gottcha2 Krona HTML report", - "description": "Gottcha2 Krona HTML report for Gp0127623", - "data_object_type": "GOTTCHA2 Krona Plot", - "url": "https://data.microbiomedata.org/data/nmdc:mga03eyz63/ReadbasedAnalysis/nmdc_mga03eyz63_gottcha2_krona.html", - "md5_checksum": "eda0c04d692ecf137585676c15924626", - "id": "nmdc:eda0c04d692ecf137585676c15924626", - "file_size_bytes": 231097 - }, - { - "name": "Gp0127623_Centrifuge classification TSV report", - "description": "Centrifuge classification TSV report for Gp0127623", - "data_object_type": "Centrifuge Taxonomic Classification", - "url": "https://data.microbiomedata.org/data/nmdc:mga03eyz63/ReadbasedAnalysis/nmdc_mga03eyz63_centrifuge_classification.tsv", - "md5_checksum": "d9ea063be9ab8ea102c1e2ec2fa9f177", - "id": "nmdc:d9ea063be9ab8ea102c1e2ec2fa9f177", - "file_size_bytes": 1669254765 - }, - { - "name": "Gp0127623_Centrifuge TSV report", - "description": "Centrifuge TSV report for Gp0127623", - "data_object_type": "Centrifuge Classification Report", - "url": "https://data.microbiomedata.org/data/nmdc:mga03eyz63/ReadbasedAnalysis/nmdc_mga03eyz63_centrifuge_report.tsv", - "md5_checksum": "e1f164c534830cd628d67c564ace863b", - "id": "nmdc:e1f164c534830cd628d67c564ace863b", - "file_size_bytes": 255784 - }, - { - "name": "Gp0127623_Centrifuge Krona HTML report", - "description": "Centrifuge Krona HTML report for Gp0127623", - "data_object_type": "Centrifuge Krona Plot", - "url": "https://data.microbiomedata.org/data/nmdc:mga03eyz63/ReadbasedAnalysis/nmdc_mga03eyz63_centrifuge_krona.html", - "md5_checksum": "a1062576d998b7b82e39b8d8520fa37e", - "id": "nmdc:a1062576d998b7b82e39b8d8520fa37e", - "file_size_bytes": 2333760 - }, - { - "name": "Gp0127623_Kraken classification TSV report", - "description": "Kraken classification TSV report for Gp0127623", - "data_object_type": "Kraken2 Taxonomic Classification", - "url": "https://data.microbiomedata.org/data/nmdc:mga03eyz63/ReadbasedAnalysis/nmdc_mga03eyz63_kraken2_classification.tsv", - "md5_checksum": "040e6ca695283a12711c16344acd1e76", - "id": "nmdc:040e6ca695283a12711c16344acd1e76", - "file_size_bytes": 1335651191 - }, - { - "name": "Gp0127623_Kraken2 TSV report", - "description": "Kraken2 TSV report for Gp0127623", - "data_object_type": "Kraken2 Classification Report", - "url": "https://data.microbiomedata.org/data/nmdc:mga03eyz63/ReadbasedAnalysis/nmdc_mga03eyz63_kraken2_report.tsv", - "md5_checksum": "ed4ced0ccbe3f6b34c35bd842e882cad", - "id": "nmdc:ed4ced0ccbe3f6b34c35bd842e882cad", - "file_size_bytes": 647609 - }, - { - "name": "Gp0127623_Kraken2 Krona HTML report", - "description": "Kraken2 Krona HTML report for Gp0127623", - "data_object_type": "Kraken2 Krona Plot", - "url": "https://data.microbiomedata.org/data/nmdc:mga03eyz63/ReadbasedAnalysis/nmdc_mga03eyz63_kraken2_krona.html", - "md5_checksum": "f2eed9669268f69dbc31f0c4f839fccf", - "id": "nmdc:f2eed9669268f69dbc31f0c4f839fccf", - "file_size_bytes": 3949449 - } + "part_of": [ + "nmdc:sty-11-aygzgv51" + ], + "add_date": "2015-05-28", + "mod_date": "2021-06-15", + "ncbi_project_name": "Sand microcosm microbial communities from a hyporheic zone in Columbia River, Washington, USA - GW-RW T4_30-Apr-14", + "omics_type": { + "has_raw_value": "Metagenome" + }, + "principal_investigator": { + "has_raw_value": "James Stegen" + }, + "processing_institution": "JGI", + "type": "nmdc:OmicsProcessing", + "gold_sequencing_project_identifiers": [ + "gold:Gp0115678" ] - }, + } + ], + "reaction_activity_set": [], + "read_qc_analysis_activity_set": [ { "_id": { - "$oid": "649b005f2ca5ee4adb139f9f" + "$oid": "649b009d6bdd4fd20273c881" }, "has_input": [ - "nmdc:6a8409b21c45ba9feba873ec269c8ff7" + "nmdc:0e6219b7901669483a0a0386cfc01f93" ], "part_of": [ - "nmdc:mga03eyz63" + "nmdc:mga026tn70" ], - "ctg_logsum": 70596, - "scaf_logsum": 70885, - "gap_pct": 0.00063, "git_url": "https://github.com/microbiomedata/metaG/releases/tag/0.1", "has_output": [ - "nmdc:3373ef564b5b97fa472dc8f2c2277dbc", - "nmdc:a0377bb7d752e66b754753fcefb5005a", - "nmdc:081017d0d9e68a999c245618eb907c08", - "nmdc:4a6ed00a6c2156c142d7bbec6baa36b5", - "nmdc:21fb280328baf81e8135733eaf440b66" + "nmdc:e0ce93b88419f87568ff206e0efe3a24", + "nmdc:7bf8ff4cf0d98cccd8e1c20f77dd1690" ], - "asm_score": 3.626, - "was_informed_by": "gold:Gp0127623", - "ctg_powsum": 7584.611, - "scaf_max": 12785, - "id": "nmdc:e05db57d44a39f083df9a1803551b79b", - "scaf_powsum": 7618.086, + "was_informed_by": "gold:Gp0115678", + "input_read_count": 51286688, + "output_read_bases": 7231449575, + "id": "nmdc:88c0fc9b5b0bb0cd814448bbfba0da6a", "execution_resource": "NERSC-Cori", - "contigs": 118423, - "name": "Assembly Activity for nmdc:mga03eyz63", - "ctg_max": 11834, - "gc_std": 0.12108, - "contig_bp": 50762396, - "gc_avg": 0.59992, + "input_read_bases": 7744289888, + "name": "Read QC Activity for nmdc:mga026tn70", + "output_read_count": 48276864, "started_at_time": "2021-10-11T02:23:30Z", - "scaf_bp": 50762716, - "type": "nmdc:MetagenomeAssembly", - "scaffolds": 118391, - "ended_at_time": "2021-10-11T02:42:25+00:00", - "ctg_l50": 402, - "ctg_l90": 285, - "ctg_n50": 37682, - "ctg_n90": 100987, - "scaf_l50": 402, - "scaf_l90": 285, - "scaf_n50": 37659, - "scaf_n90": 100956, - "output_data_objects": [ - { - "name": "Gp0127623_Assembled contigs fasta", - "description": "Assembled contigs fasta for Gp0127623", - "data_object_type": "Assembly Contigs", - "url": "https://data.microbiomedata.org/data/nmdc:mga03eyz63/assembly/nmdc_mga03eyz63_contigs.fna", - "md5_checksum": "3373ef564b5b97fa472dc8f2c2277dbc", - "id": "nmdc:3373ef564b5b97fa472dc8f2c2277dbc", - "file_size_bytes": 55220158 - }, - { - "name": "Gp0127623_Assembled scaffold fasta", - "description": "Assembled scaffold fasta for Gp0127623", - "data_object_type": "Assembly Scaffolds", - "url": "https://data.microbiomedata.org/data/nmdc:mga03eyz63/assembly/nmdc_mga03eyz63_scaffolds.fna", - "md5_checksum": "a0377bb7d752e66b754753fcefb5005a", - "id": "nmdc:a0377bb7d752e66b754753fcefb5005a", - "file_size_bytes": 54864386 - }, - { - "name": "Gp0127623_Metagenome Contig Coverage Stats", - "description": "Metagenome Contig Coverage Stats for Gp0127623", - "url": "https://data.microbiomedata.org/data/nmdc:mga03eyz63/assembly/nmdc_mga03eyz63_covstats.txt", - "md5_checksum": "081017d0d9e68a999c245618eb907c08", - "id": "nmdc:081017d0d9e68a999c245618eb907c08", - "file_size_bytes": 9321875 - }, - { - "name": "Gp0127623_Assembled AGP file", - "description": "Assembled AGP file for Gp0127623", - "data_object_type": "Assembly AGP", - "url": "https://data.microbiomedata.org/data/nmdc:mga03eyz63/assembly/nmdc_mga03eyz63_assembly.agp", - "md5_checksum": "4a6ed00a6c2156c142d7bbec6baa36b5", - "id": "nmdc:4a6ed00a6c2156c142d7bbec6baa36b5", - "file_size_bytes": 8670291 - }, - { - "name": "Gp0127623_Metagenome Alignment BAM file", - "description": "Metagenome Alignment BAM file for Gp0127623", - "data_object_type": "Assembly Coverage BAM", - "url": "https://data.microbiomedata.org/data/nmdc:mga03eyz63/assembly/nmdc_mga03eyz63_pairedMapped_sorted.bam", - "md5_checksum": "21fb280328baf81e8135733eaf440b66", - "id": "nmdc:21fb280328baf81e8135733eaf440b66", - "file_size_bytes": 2062412797 - } - ] - }, + "type": "nmdc:ReadQCAnalysisActivity", + "ended_at_time": "2021-10-11T06:18:17+00:00" + } + ], + "read_based_taxonomy_analysis_activity_set": [ { "_id": { - "$oid": "649b005bbf2caae0415ef99a" + "$oid": "649b009bff710ae353f8cf49" }, "has_input": [ - "nmdc:3373ef564b5b97fa472dc8f2c2277dbc" - ], - "part_of": [ - "nmdc:mga03eyz63" + "nmdc:e0ce93b88419f87568ff206e0efe3a24" ], "git_url": "https://github.com/microbiomedata/metaG/releases/tag/0.1", "has_output": [ - "nmdc:8ac52d00bad1f9349da2acde572006b6", - "nmdc:9dd5eb06fe24f63d5012e34e364a580c", - "nmdc:05107e0217e199d7b0cd571db88f7d09", - "nmdc:02ffcaeeb9a73edea47ba3671396026a", - "nmdc:b9b4ccafc50787f86ef03680eb23848d", - "nmdc:fbd178d9c302b841e3fde3ab9acd8160", - "nmdc:1bcc35e753e7dad78ef8ae4989eb901a", - "nmdc:f6d6d2ea3c539560ad30bbd6df8bc71a", - "nmdc:45536a48cef31f2c3870c7bacb3d785a", - "nmdc:a52d057d005504857f82bcf661dd7676", - "nmdc:b92cb96900a31a3c70ccf9cfe45f02c3", - "nmdc:32eca4cab8525b09cf1b0ed2353f9278" + "nmdc:05bab80e2ff02d160b8e808f056ee2b5", + "nmdc:12b2d6afc355bce76249d750a9fab534", + "nmdc:18214017d56658a48723c9c998dcba7e", + "nmdc:99ef009c73c128e561a4b9dcb70d7ff2", + "nmdc:78dab6988b57c654462ef3dbeb64d8d6", + "nmdc:f9c01985f057825149d35de0650095a8", + "nmdc:bcea8bbe63625ad0f3142abe69a4a11d", + "nmdc:054c3097c9682bc9a6e07f88fdecc0ee", + "nmdc:38d41d4299141abe28bf0405af80cdfc" ], - "was_informed_by": "gold:Gp0127623", - "id": "nmdc:e05db57d44a39f083df9a1803551b79b", + "was_informed_by": "gold:Gp0115678", + "id": "nmdc:88c0fc9b5b0bb0cd814448bbfba0da6a", "execution_resource": "NERSC-Cori", - "name": "Annotation Activity for nmdc:mga03eyz63", + "name": "ReadBased Analysis Activity for nmdc:mga026tn70", "started_at_time": "2021-10-11T02:23:30Z", - "type": "nmdc:MetagenomeAnnotationActivity", - "ended_at_time": "2021-10-11T02:42:25+00:00", - "output_data_objects": [ - { - "name": "Gp0127623_Protein FAA", - "description": "Protein FAA for Gp0127623", - "data_object_type": "Annotation Amino Acid FASTA", - "url": "https://data.microbiomedata.org/data/nmdc:mga03eyz63/annotation/nmdc_mga03eyz63_proteins.faa", - "md5_checksum": "8ac52d00bad1f9349da2acde572006b6", - "id": "nmdc:8ac52d00bad1f9349da2acde572006b6", - "file_size_bytes": 32224726 - }, - { - "name": "Gp0127623_Structural annotation GFF file", - "description": "Structural annotation GFF file for Gp0127623", - "data_object_type": "Structural Annotation GFF", - "url": "https://data.microbiomedata.org/data/nmdc:mga03eyz63/annotation/nmdc_mga03eyz63_structural_annotation.gff", - "md5_checksum": "9dd5eb06fe24f63d5012e34e364a580c", - "id": "nmdc:9dd5eb06fe24f63d5012e34e364a580c", - "file_size_bytes": 2512 - }, - { - "name": "Gp0127623_Functional annotation GFF file", - "description": "Functional annotation GFF file for Gp0127623", - "data_object_type": "Functional Annotation GFF", - "url": "https://data.microbiomedata.org/data/nmdc:mga03eyz63/annotation/nmdc_mga03eyz63_functional_annotation.gff", - "md5_checksum": "05107e0217e199d7b0cd571db88f7d09", - "id": "nmdc:05107e0217e199d7b0cd571db88f7d09", - "file_size_bytes": 37779373 - }, - { - "name": "Gp0127623_KO TSV file", - "description": "KO TSV file for Gp0127623", - "data_object_type": "Annotation KEGG Orthology", - "url": "https://data.microbiomedata.org/data/nmdc:mga03eyz63/annotation/nmdc_mga03eyz63_ko.tsv", - "md5_checksum": "02ffcaeeb9a73edea47ba3671396026a", - "id": "nmdc:02ffcaeeb9a73edea47ba3671396026a", - "file_size_bytes": 4343179 - }, - { - "name": "Gp0127623_EC TSV file", - "description": "EC TSV file for Gp0127623", - "data_object_type": "Annotation Enzyme Commission", - "url": "https://data.microbiomedata.org/data/nmdc:mga03eyz63/annotation/nmdc_mga03eyz63_ec.tsv", - "md5_checksum": "b9b4ccafc50787f86ef03680eb23848d", - "id": "nmdc:b9b4ccafc50787f86ef03680eb23848d", - "file_size_bytes": 2966454 - }, - { - "name": "Gp0127623_COG GFF file", - "description": "COG GFF file for Gp0127623", - "url": "https://data.microbiomedata.org/data/nmdc:mga03eyz63/annotation/nmdc_mga03eyz63_cog.gff", - "md5_checksum": "fbd178d9c302b841e3fde3ab9acd8160", - "id": "nmdc:fbd178d9c302b841e3fde3ab9acd8160", - "file_size_bytes": 22023330 - }, - { - "name": "Gp0127623_PFAM GFF file", - "description": "PFAM GFF file for Gp0127623", - "url": "https://data.microbiomedata.org/data/nmdc:mga03eyz63/annotation/nmdc_mga03eyz63_pfam.gff", - "md5_checksum": "1bcc35e753e7dad78ef8ae4989eb901a", - "id": "nmdc:1bcc35e753e7dad78ef8ae4989eb901a", - "file_size_bytes": 15956001 - }, - { - "name": "Gp0127623_TigrFam GFF file", - "description": "TigrFam GFF file for Gp0127623", - "url": "https://data.microbiomedata.org/data/nmdc:mga03eyz63/annotation/nmdc_mga03eyz63_tigrfam.gff", - "md5_checksum": "f6d6d2ea3c539560ad30bbd6df8bc71a", - "id": "nmdc:f6d6d2ea3c539560ad30bbd6df8bc71a", - "file_size_bytes": 1656727 - }, - { - "name": "Gp0127623_SMART GFF file", - "description": "SMART GFF file for Gp0127623", - "url": "https://data.microbiomedata.org/data/nmdc:mga03eyz63/annotation/nmdc_mga03eyz63_smart.gff", - "md5_checksum": "45536a48cef31f2c3870c7bacb3d785a", - "id": "nmdc:45536a48cef31f2c3870c7bacb3d785a", - "file_size_bytes": 4731416 - }, - { - "name": "Gp0127623_SuperFam GFF file", - "description": "SuperFam GFF file for Gp0127623", - "url": "https://data.microbiomedata.org/data/nmdc:mga03eyz63/annotation/nmdc_mga03eyz63_supfam.gff", - "md5_checksum": "a52d057d005504857f82bcf661dd7676", - "id": "nmdc:a52d057d005504857f82bcf661dd7676", - "file_size_bytes": 27616681 - }, - { - "name": "Gp0127623_Cath FunFam GFF file", - "description": "Cath FunFam GFF file for Gp0127623", - "url": "https://data.microbiomedata.org/data/nmdc:mga03eyz63/annotation/nmdc_mga03eyz63_cath_funfam.gff", - "md5_checksum": "b92cb96900a31a3c70ccf9cfe45f02c3", - "id": "nmdc:b92cb96900a31a3c70ccf9cfe45f02c3", - "file_size_bytes": 20817140 - }, - { - "name": "Gp0127623_KO_EC GFF file", - "description": "KO_EC GFF file for Gp0127623", - "url": "https://data.microbiomedata.org/data/nmdc:mga03eyz63/annotation/nmdc_mga03eyz63_ko_ec.gff", - "md5_checksum": "32eca4cab8525b09cf1b0ed2353f9278", - "id": "nmdc:32eca4cab8525b09cf1b0ed2353f9278", - "file_size_bytes": 13827629 - } - ] - }, + "type": "nmdc:ReadbasedAnalysis", + "ended_at_time": "2021-10-11T06:18:17+00:00" + } + ], + "study_set": [], + "field_research_site_set": [], + "collecting_biosamples_from_site_set": [], + "date_created": null, + "etl_software_version": null, + "pooling_set": [], + "read_based_analysis_activity_set": [ { "_id": { - "$oid": "649b0052ec087f6bbab346fd" + "$oid": "61e719f6833bcf838a701854" }, "has_input": [ - "nmdc:3373ef564b5b97fa472dc8f2c2277dbc", - "nmdc:21fb280328baf81e8135733eaf440b66", - "nmdc:05107e0217e199d7b0cd571db88f7d09" + "nmdc:e0ce93b88419f87568ff206e0efe3a24" ], - "too_short_contig_num": 114220, "part_of": [ - "nmdc:mga03eyz63" + "nmdc:mga026tn70" ], - "binned_contig_num": 171, "git_url": "https://github.com/microbiomedata/metaG/releases/tag/0.1", "has_output": [ - "nmdc:d41d8cd98f00b204e9800998ecf8427e", - "nmdc:e63c76f92bc0ae95dfc238c099296e91", - "nmdc:3dfba77d38712870f8c415203f991496", - "nmdc:5e98d27533164fdf67c07cc224090547", - "nmdc:bfbe3e3a21e8a089c4c7a0d945c79b7b", - "nmdc:c70853ef1a6ab162b85df5215a76666b" + "nmdc:05bab80e2ff02d160b8e808f056ee2b5", + "nmdc:12b2d6afc355bce76249d750a9fab534", + "nmdc:18214017d56658a48723c9c998dcba7e", + "nmdc:99ef009c73c128e561a4b9dcb70d7ff2", + "nmdc:78dab6988b57c654462ef3dbeb64d8d6", + "nmdc:f9c01985f057825149d35de0650095a8", + "nmdc:bcea8bbe63625ad0f3142abe69a4a11d", + "nmdc:054c3097c9682bc9a6e07f88fdecc0ee", + "nmdc:38d41d4299141abe28bf0405af80cdfc" ], - "was_informed_by": "gold:Gp0127623", - "input_contig_num": 118423, - "id": "nmdc:e05db57d44a39f083df9a1803551b79b", + "was_informed_by": "gold:Gp0115678", + "id": "nmdc:88c0fc9b5b0bb0cd814448bbfba0da6a", "execution_resource": "NERSC-Cori", - "name": "MAGs Analysis Activity for nmdc:mga03eyz63", - "mags_list": [ - { - "number_of_contig": 171, - "completeness": 30.1, - "bin_name": "bins.1", - "gene_count": 991, - "bin_quality": "LQ", - "gtdbtk_species": "", - "gtdbtk_order": "", - "num_16s": 0, - "gtdbtk_family": "", - "gtdbtk_domain": "", - "contamination": 0.97, - "gtdbtk_class": "", - "gtdbtk_phylum": "", - "num_5s": 0, - "num_23s": 0, - "gtdbtk_genus": "", - "num_t_rna": 12 - } - ], - "unbinned_contig_num": 4032, + "name": "ReadBased Analysis Activity for nmdc:mga026tn70", "started_at_time": "2021-10-11T02:23:30Z", - "type": "nmdc:MAGsAnalysisActivity", - "ended_at_time": "2021-10-11T02:42:25+00:00", - "output_data_objects": [ - { - "name": "gold:Gp0452679_metabat2 bin checkm quality assessment result", - "description": "metabat2 bin checkm quality assessment result for gold:Gp0452679", - "data_object_type": "CheckM Statistics", - "url": "https://data.microbiomedata.org/data/nmdc:mga0fsjy84/MAGs/nmdc_mga0fsjy84_checkm_qa.out", - "md5_checksum": "d41d8cd98f00b204e9800998ecf8427e", - "id": "nmdc:d41d8cd98f00b204e9800998ecf8427e", - "file_size_bytes": 0 - }, - { - "name": "Gp0127623_tooShort (< 3kb) filtered contigs fasta file by metaBat2", - "description": "tooShort (< 3kb) filtered contigs fasta file by metaBat2 for Gp0127623", - "url": "https://data.microbiomedata.org/data/nmdc:mga03eyz63/MAGs/nmdc_mga03eyz63_bins.tooShort.fa", - "md5_checksum": "e63c76f92bc0ae95dfc238c099296e91", - "id": "nmdc:e63c76f92bc0ae95dfc238c099296e91", - "file_size_bytes": 48421824 - }, - { - "name": "Gp0127623_unbinned fasta file from metabat2", - "description": "unbinned fasta file from metabat2 for Gp0127623", - "url": "https://data.microbiomedata.org/data/nmdc:mga03eyz63/MAGs/nmdc_mga03eyz63_bins.unbinned.fa", - "md5_checksum": "3dfba77d38712870f8c415203f991496", - "id": "nmdc:3dfba77d38712870f8c415203f991496", - "file_size_bytes": 6028115 - }, - { - "name": "Gp0127623_metabat2 bin checkm quality assessment result", - "description": "metabat2 bin checkm quality assessment result for Gp0127623", - "data_object_type": "CheckM Statistics", - "url": "https://data.microbiomedata.org/data/nmdc:mga03eyz63/MAGs/nmdc_mga03eyz63_checkm_qa.out", - "md5_checksum": "5e98d27533164fdf67c07cc224090547", - "id": "nmdc:5e98d27533164fdf67c07cc224090547", - "file_size_bytes": 765 - }, - { - "name": "Gp0127623_high-quality and medium-quality bins", - "description": "high-quality and medium-quality bins for Gp0127623", - "data_object_type": "Metagenome Bins", - "url": "https://data.microbiomedata.org/data/nmdc:mga03eyz63/MAGs/nmdc_mga03eyz63_hqmq_bin.zip", - "md5_checksum": "bfbe3e3a21e8a089c4c7a0d945c79b7b", - "id": "nmdc:bfbe3e3a21e8a089c4c7a0d945c79b7b", - "file_size_bytes": 182 - }, - { - "name": "Gp0127623_metabat2 bins", - "description": "metabat2 bins for Gp0127623", - "url": "https://data.microbiomedata.org/data/nmdc:mga03eyz63/MAGs/nmdc_mga03eyz63_metabat_bin.zip", - "md5_checksum": "c70853ef1a6ab162b85df5215a76666b", - "id": "nmdc:c70853ef1a6ab162b85df5215a76666b", - "file_size_bytes": 236177 - } - ] + "type": "nmdc:ReadbasedAnalysis", + "ended_at_time": "2021-10-11T06:18:17+00:00" } ] }, { - "_id": { - "$oid": "649b009773e8249959349b3b" - }, - "id": "nmdc:omprc-11-76ebsj44", - "name": "Riverbed sediment microbial communities from areas with no vegetation in Columbia River, Washington, USA - GW-RW N1_20_30", - "description": "Riverbed sediment samples were collected from an area with no vegetation in a hyporheic zone (subsurface groundwater - surface water mixing zone)", - "has_input": [ - "nmdc:bsm-11-k3t2wk45" - ], - "has_output": [ - "jgi:574fde787ded5e3df1ee1416" - ], - "part_of": [ - "nmdc:sty-11-aygzgv51" - ], - "add_date": "2016-01-11", - "mod_date": "2021-06-15", - "ncbi_project_name": "Riverbed sediment microbial communities from areas with no vegetation in Columbia River, Washington, USA - GW-RW N1_20_30", - "omics_type": { - "has_raw_value": "Metagenome" - }, - "principal_investigator": { - "has_raw_value": "James Stegen" - }, - "processing_institution": "JGI", - "type": "nmdc:OmicsProcessing", - "gold_sequencing_project_identifiers": [ - "gold:Gp0127625" - ], - "downstream_workflow_activity_records": [ + "functional_annotation_agg": [], + "library_preparation_set": [], + "processed_sample_set": [], + "extraction_set": [], + "activity_set": [], + "biosample_set": [], + "data_object_set": [ + { + "name": "Gp0127623_Filtered Reads", + "description": "Filtered Reads for Gp0127623", + "data_object_type": "Filtered Sequencing Reads", + "url": "https://data.microbiomedata.org/data/nmdc:mga03eyz63/qa/nmdc_mga03eyz63_filtered.fastq.gz", + "md5_checksum": "6a8409b21c45ba9feba873ec269c8ff7", + "id": "nmdc:6a8409b21c45ba9feba873ec269c8ff7", + "file_size_bytes": 1917552858 + }, + { + "name": "Gp0127623_Filtered Stats", + "description": "Filtered Stats for Gp0127623", + "data_object_type": "QC Statistics", + "url": "https://data.microbiomedata.org/data/nmdc:mga03eyz63/qa/nmdc_mga03eyz63_filterStats.txt", + "md5_checksum": "61fb06de10fe3a0c49c5afe14ab7fb32", + "id": "nmdc:61fb06de10fe3a0c49c5afe14ab7fb32", + "file_size_bytes": 283 + }, + { + "name": "Gp0127623_Gottcha2 TSV report", + "description": "Gottcha2 TSV report for Gp0127623", + "url": "https://data.microbiomedata.org/data/nmdc:mga03eyz63/ReadbasedAnalysis/nmdc_mga03eyz63_gottcha2_report.tsv", + "md5_checksum": "ac39e916e17e08a845bb40d97519d8be", + "id": "nmdc:ac39e916e17e08a845bb40d97519d8be", + "file_size_bytes": 1553 + }, + { + "name": "Gp0127623_Gottcha2 full TSV report", + "description": "Gottcha2 full TSV report for Gp0127623", + "url": "https://data.microbiomedata.org/data/nmdc:mga03eyz63/ReadbasedAnalysis/nmdc_mga03eyz63_gottcha2_report_full.tsv", + "md5_checksum": "c6fd5c573ef8605d9b43ff9c698af423", + "id": "nmdc:c6fd5c573ef8605d9b43ff9c698af423", + "file_size_bytes": 836575 + }, + { + "name": "Gp0127623_Gottcha2 Krona HTML report", + "description": "Gottcha2 Krona HTML report for Gp0127623", + "data_object_type": "GOTTCHA2 Krona Plot", + "url": "https://data.microbiomedata.org/data/nmdc:mga03eyz63/ReadbasedAnalysis/nmdc_mga03eyz63_gottcha2_krona.html", + "md5_checksum": "eda0c04d692ecf137585676c15924626", + "id": "nmdc:eda0c04d692ecf137585676c15924626", + "file_size_bytes": 231097 + }, + { + "name": "Gp0127623_Centrifuge classification TSV report", + "description": "Centrifuge classification TSV report for Gp0127623", + "data_object_type": "Centrifuge Taxonomic Classification", + "url": "https://data.microbiomedata.org/data/nmdc:mga03eyz63/ReadbasedAnalysis/nmdc_mga03eyz63_centrifuge_classification.tsv", + "md5_checksum": "d9ea063be9ab8ea102c1e2ec2fa9f177", + "id": "nmdc:d9ea063be9ab8ea102c1e2ec2fa9f177", + "file_size_bytes": 1669254765 + }, + { + "name": "Gp0127623_Centrifuge TSV report", + "description": "Centrifuge TSV report for Gp0127623", + "data_object_type": "Centrifuge Classification Report", + "url": "https://data.microbiomedata.org/data/nmdc:mga03eyz63/ReadbasedAnalysis/nmdc_mga03eyz63_centrifuge_report.tsv", + "md5_checksum": "e1f164c534830cd628d67c564ace863b", + "id": "nmdc:e1f164c534830cd628d67c564ace863b", + "file_size_bytes": 255784 + }, + { + "name": "Gp0127623_Centrifuge Krona HTML report", + "description": "Centrifuge Krona HTML report for Gp0127623", + "data_object_type": "Centrifuge Krona Plot", + "url": "https://data.microbiomedata.org/data/nmdc:mga03eyz63/ReadbasedAnalysis/nmdc_mga03eyz63_centrifuge_krona.html", + "md5_checksum": "a1062576d998b7b82e39b8d8520fa37e", + "id": "nmdc:a1062576d998b7b82e39b8d8520fa37e", + "file_size_bytes": 2333760 + }, + { + "name": "Gp0127623_Kraken classification TSV report", + "description": "Kraken classification TSV report for Gp0127623", + "data_object_type": "Kraken2 Taxonomic Classification", + "url": "https://data.microbiomedata.org/data/nmdc:mga03eyz63/ReadbasedAnalysis/nmdc_mga03eyz63_kraken2_classification.tsv", + "md5_checksum": "040e6ca695283a12711c16344acd1e76", + "id": "nmdc:040e6ca695283a12711c16344acd1e76", + "file_size_bytes": 1335651191 + }, + { + "name": "Gp0127623_Kraken2 TSV report", + "description": "Kraken2 TSV report for Gp0127623", + "data_object_type": "Kraken2 Classification Report", + "url": "https://data.microbiomedata.org/data/nmdc:mga03eyz63/ReadbasedAnalysis/nmdc_mga03eyz63_kraken2_report.tsv", + "md5_checksum": "ed4ced0ccbe3f6b34c35bd842e882cad", + "id": "nmdc:ed4ced0ccbe3f6b34c35bd842e882cad", + "file_size_bytes": 647609 + }, + { + "name": "Gp0127623_Kraken2 Krona HTML report", + "description": "Kraken2 Krona HTML report for Gp0127623", + "data_object_type": "Kraken2 Krona Plot", + "url": "https://data.microbiomedata.org/data/nmdc:mga03eyz63/ReadbasedAnalysis/nmdc_mga03eyz63_kraken2_krona.html", + "md5_checksum": "f2eed9669268f69dbc31f0c4f839fccf", + "id": "nmdc:f2eed9669268f69dbc31f0c4f839fccf", + "file_size_bytes": 3949449 + }, + { + "name": "Gp0127623_Gottcha2 TSV report", + "description": "Gottcha2 TSV report for Gp0127623", + "url": "https://data.microbiomedata.org/data/nmdc:mga03eyz63/ReadbasedAnalysis/nmdc_mga03eyz63_gottcha2_report.tsv", + "md5_checksum": "ac39e916e17e08a845bb40d97519d8be", + "id": "nmdc:ac39e916e17e08a845bb40d97519d8be", + "file_size_bytes": 1553 + }, + { + "name": "Gp0127623_Gottcha2 full TSV report", + "description": "Gottcha2 full TSV report for Gp0127623", + "url": "https://data.microbiomedata.org/data/nmdc:mga03eyz63/ReadbasedAnalysis/nmdc_mga03eyz63_gottcha2_report_full.tsv", + "md5_checksum": "c6fd5c573ef8605d9b43ff9c698af423", + "id": "nmdc:c6fd5c573ef8605d9b43ff9c698af423", + "file_size_bytes": 836575 + }, + { + "name": "Gp0127623_Gottcha2 Krona HTML report", + "description": "Gottcha2 Krona HTML report for Gp0127623", + "data_object_type": "GOTTCHA2 Krona Plot", + "url": "https://data.microbiomedata.org/data/nmdc:mga03eyz63/ReadbasedAnalysis/nmdc_mga03eyz63_gottcha2_krona.html", + "md5_checksum": "eda0c04d692ecf137585676c15924626", + "id": "nmdc:eda0c04d692ecf137585676c15924626", + "file_size_bytes": 231097 + }, + { + "name": "Gp0127623_Centrifuge classification TSV report", + "description": "Centrifuge classification TSV report for Gp0127623", + "data_object_type": "Centrifuge Taxonomic Classification", + "url": "https://data.microbiomedata.org/data/nmdc:mga03eyz63/ReadbasedAnalysis/nmdc_mga03eyz63_centrifuge_classification.tsv", + "md5_checksum": "d9ea063be9ab8ea102c1e2ec2fa9f177", + "id": "nmdc:d9ea063be9ab8ea102c1e2ec2fa9f177", + "file_size_bytes": 1669254765 + }, + { + "name": "Gp0127623_Centrifuge TSV report", + "description": "Centrifuge TSV report for Gp0127623", + "data_object_type": "Centrifuge Classification Report", + "url": "https://data.microbiomedata.org/data/nmdc:mga03eyz63/ReadbasedAnalysis/nmdc_mga03eyz63_centrifuge_report.tsv", + "md5_checksum": "e1f164c534830cd628d67c564ace863b", + "id": "nmdc:e1f164c534830cd628d67c564ace863b", + "file_size_bytes": 255784 + }, + { + "name": "Gp0127623_Centrifuge Krona HTML report", + "description": "Centrifuge Krona HTML report for Gp0127623", + "data_object_type": "Centrifuge Krona Plot", + "url": "https://data.microbiomedata.org/data/nmdc:mga03eyz63/ReadbasedAnalysis/nmdc_mga03eyz63_centrifuge_krona.html", + "md5_checksum": "a1062576d998b7b82e39b8d8520fa37e", + "id": "nmdc:a1062576d998b7b82e39b8d8520fa37e", + "file_size_bytes": 2333760 + }, + { + "name": "Gp0127623_Kraken classification TSV report", + "description": "Kraken classification TSV report for Gp0127623", + "data_object_type": "Kraken2 Taxonomic Classification", + "url": "https://data.microbiomedata.org/data/nmdc:mga03eyz63/ReadbasedAnalysis/nmdc_mga03eyz63_kraken2_classification.tsv", + "md5_checksum": "040e6ca695283a12711c16344acd1e76", + "id": "nmdc:040e6ca695283a12711c16344acd1e76", + "file_size_bytes": 1335651191 + }, + { + "name": "Gp0127623_Kraken2 TSV report", + "description": "Kraken2 TSV report for Gp0127623", + "data_object_type": "Kraken2 Classification Report", + "url": "https://data.microbiomedata.org/data/nmdc:mga03eyz63/ReadbasedAnalysis/nmdc_mga03eyz63_kraken2_report.tsv", + "md5_checksum": "ed4ced0ccbe3f6b34c35bd842e882cad", + "id": "nmdc:ed4ced0ccbe3f6b34c35bd842e882cad", + "file_size_bytes": 647609 + }, + { + "name": "Gp0127623_Kraken2 Krona HTML report", + "description": "Kraken2 Krona HTML report for Gp0127623", + "data_object_type": "Kraken2 Krona Plot", + "url": "https://data.microbiomedata.org/data/nmdc:mga03eyz63/ReadbasedAnalysis/nmdc_mga03eyz63_kraken2_krona.html", + "md5_checksum": "f2eed9669268f69dbc31f0c4f839fccf", + "id": "nmdc:f2eed9669268f69dbc31f0c4f839fccf", + "file_size_bytes": 3949449 + }, + { + "name": "Gp0127623_Assembled contigs fasta", + "description": "Assembled contigs fasta for Gp0127623", + "data_object_type": "Assembly Contigs", + "url": "https://data.microbiomedata.org/data/nmdc:mga03eyz63/assembly/nmdc_mga03eyz63_contigs.fna", + "md5_checksum": "3373ef564b5b97fa472dc8f2c2277dbc", + "id": "nmdc:3373ef564b5b97fa472dc8f2c2277dbc", + "file_size_bytes": 55220158 + }, + { + "name": "Gp0127623_Assembled scaffold fasta", + "description": "Assembled scaffold fasta for Gp0127623", + "data_object_type": "Assembly Scaffolds", + "url": "https://data.microbiomedata.org/data/nmdc:mga03eyz63/assembly/nmdc_mga03eyz63_scaffolds.fna", + "md5_checksum": "a0377bb7d752e66b754753fcefb5005a", + "id": "nmdc:a0377bb7d752e66b754753fcefb5005a", + "file_size_bytes": 54864386 + }, + { + "name": "Gp0127623_Metagenome Contig Coverage Stats", + "description": "Metagenome Contig Coverage Stats for Gp0127623", + "url": "https://data.microbiomedata.org/data/nmdc:mga03eyz63/assembly/nmdc_mga03eyz63_covstats.txt", + "md5_checksum": "081017d0d9e68a999c245618eb907c08", + "id": "nmdc:081017d0d9e68a999c245618eb907c08", + "file_size_bytes": 9321875 + }, + { + "name": "Gp0127623_Assembled AGP file", + "description": "Assembled AGP file for Gp0127623", + "data_object_type": "Assembly AGP", + "url": "https://data.microbiomedata.org/data/nmdc:mga03eyz63/assembly/nmdc_mga03eyz63_assembly.agp", + "md5_checksum": "4a6ed00a6c2156c142d7bbec6baa36b5", + "id": "nmdc:4a6ed00a6c2156c142d7bbec6baa36b5", + "file_size_bytes": 8670291 + }, + { + "name": "Gp0127623_Metagenome Alignment BAM file", + "description": "Metagenome Alignment BAM file for Gp0127623", + "data_object_type": "Assembly Coverage BAM", + "url": "https://data.microbiomedata.org/data/nmdc:mga03eyz63/assembly/nmdc_mga03eyz63_pairedMapped_sorted.bam", + "md5_checksum": "21fb280328baf81e8135733eaf440b66", + "id": "nmdc:21fb280328baf81e8135733eaf440b66", + "file_size_bytes": 2062412797 + }, + { + "name": "Gp0127623_Protein FAA", + "description": "Protein FAA for Gp0127623", + "data_object_type": "Annotation Amino Acid FASTA", + "url": "https://data.microbiomedata.org/data/nmdc:mga03eyz63/annotation/nmdc_mga03eyz63_proteins.faa", + "md5_checksum": "8ac52d00bad1f9349da2acde572006b6", + "id": "nmdc:8ac52d00bad1f9349da2acde572006b6", + "file_size_bytes": 32224726 + }, + { + "name": "Gp0127623_Structural annotation GFF file", + "description": "Structural annotation GFF file for Gp0127623", + "data_object_type": "Structural Annotation GFF", + "url": "https://data.microbiomedata.org/data/nmdc:mga03eyz63/annotation/nmdc_mga03eyz63_structural_annotation.gff", + "md5_checksum": "9dd5eb06fe24f63d5012e34e364a580c", + "id": "nmdc:9dd5eb06fe24f63d5012e34e364a580c", + "file_size_bytes": 2512 + }, + { + "name": "Gp0127623_Functional annotation GFF file", + "description": "Functional annotation GFF file for Gp0127623", + "data_object_type": "Functional Annotation GFF", + "url": "https://data.microbiomedata.org/data/nmdc:mga03eyz63/annotation/nmdc_mga03eyz63_functional_annotation.gff", + "md5_checksum": "05107e0217e199d7b0cd571db88f7d09", + "id": "nmdc:05107e0217e199d7b0cd571db88f7d09", + "file_size_bytes": 37779373 + }, + { + "name": "Gp0127623_KO TSV file", + "description": "KO TSV file for Gp0127623", + "data_object_type": "Annotation KEGG Orthology", + "url": "https://data.microbiomedata.org/data/nmdc:mga03eyz63/annotation/nmdc_mga03eyz63_ko.tsv", + "md5_checksum": "02ffcaeeb9a73edea47ba3671396026a", + "id": "nmdc:02ffcaeeb9a73edea47ba3671396026a", + "file_size_bytes": 4343179 + }, + { + "name": "Gp0127623_EC TSV file", + "description": "EC TSV file for Gp0127623", + "data_object_type": "Annotation Enzyme Commission", + "url": "https://data.microbiomedata.org/data/nmdc:mga03eyz63/annotation/nmdc_mga03eyz63_ec.tsv", + "md5_checksum": "b9b4ccafc50787f86ef03680eb23848d", + "id": "nmdc:b9b4ccafc50787f86ef03680eb23848d", + "file_size_bytes": 2966454 + }, + { + "name": "Gp0127623_COG GFF file", + "description": "COG GFF file for Gp0127623", + "url": "https://data.microbiomedata.org/data/nmdc:mga03eyz63/annotation/nmdc_mga03eyz63_cog.gff", + "md5_checksum": "fbd178d9c302b841e3fde3ab9acd8160", + "id": "nmdc:fbd178d9c302b841e3fde3ab9acd8160", + "file_size_bytes": 22023330 + }, + { + "name": "Gp0127623_PFAM GFF file", + "description": "PFAM GFF file for Gp0127623", + "url": "https://data.microbiomedata.org/data/nmdc:mga03eyz63/annotation/nmdc_mga03eyz63_pfam.gff", + "md5_checksum": "1bcc35e753e7dad78ef8ae4989eb901a", + "id": "nmdc:1bcc35e753e7dad78ef8ae4989eb901a", + "file_size_bytes": 15956001 + }, + { + "name": "Gp0127623_TigrFam GFF file", + "description": "TigrFam GFF file for Gp0127623", + "url": "https://data.microbiomedata.org/data/nmdc:mga03eyz63/annotation/nmdc_mga03eyz63_tigrfam.gff", + "md5_checksum": "f6d6d2ea3c539560ad30bbd6df8bc71a", + "id": "nmdc:f6d6d2ea3c539560ad30bbd6df8bc71a", + "file_size_bytes": 1656727 + }, + { + "name": "Gp0127623_SMART GFF file", + "description": "SMART GFF file for Gp0127623", + "url": "https://data.microbiomedata.org/data/nmdc:mga03eyz63/annotation/nmdc_mga03eyz63_smart.gff", + "md5_checksum": "45536a48cef31f2c3870c7bacb3d785a", + "id": "nmdc:45536a48cef31f2c3870c7bacb3d785a", + "file_size_bytes": 4731416 + }, + { + "name": "Gp0127623_SuperFam GFF file", + "description": "SuperFam GFF file for Gp0127623", + "url": "https://data.microbiomedata.org/data/nmdc:mga03eyz63/annotation/nmdc_mga03eyz63_supfam.gff", + "md5_checksum": "a52d057d005504857f82bcf661dd7676", + "id": "nmdc:a52d057d005504857f82bcf661dd7676", + "file_size_bytes": 27616681 + }, + { + "name": "Gp0127623_Cath FunFam GFF file", + "description": "Cath FunFam GFF file for Gp0127623", + "url": "https://data.microbiomedata.org/data/nmdc:mga03eyz63/annotation/nmdc_mga03eyz63_cath_funfam.gff", + "md5_checksum": "b92cb96900a31a3c70ccf9cfe45f02c3", + "id": "nmdc:b92cb96900a31a3c70ccf9cfe45f02c3", + "file_size_bytes": 20817140 + }, + { + "name": "Gp0127623_KO_EC GFF file", + "description": "KO_EC GFF file for Gp0127623", + "url": "https://data.microbiomedata.org/data/nmdc:mga03eyz63/annotation/nmdc_mga03eyz63_ko_ec.gff", + "md5_checksum": "32eca4cab8525b09cf1b0ed2353f9278", + "id": "nmdc:32eca4cab8525b09cf1b0ed2353f9278", + "file_size_bytes": 13827629 + }, + { + "name": "gold:Gp0452679_metabat2 bin checkm quality assessment result", + "description": "metabat2 bin checkm quality assessment result for gold:Gp0452679", + "data_object_type": "CheckM Statistics", + "url": "https://data.microbiomedata.org/data/nmdc:mga0fsjy84/MAGs/nmdc_mga0fsjy84_checkm_qa.out", + "md5_checksum": "d41d8cd98f00b204e9800998ecf8427e", + "id": "nmdc:d41d8cd98f00b204e9800998ecf8427e", + "file_size_bytes": 0 + }, + { + "name": "Gp0127623_tooShort (< 3kb) filtered contigs fasta file by metaBat2", + "description": "tooShort (< 3kb) filtered contigs fasta file by metaBat2 for Gp0127623", + "url": "https://data.microbiomedata.org/data/nmdc:mga03eyz63/MAGs/nmdc_mga03eyz63_bins.tooShort.fa", + "md5_checksum": "e63c76f92bc0ae95dfc238c099296e91", + "id": "nmdc:e63c76f92bc0ae95dfc238c099296e91", + "file_size_bytes": 48421824 + }, + { + "name": "Gp0127623_unbinned fasta file from metabat2", + "description": "unbinned fasta file from metabat2 for Gp0127623", + "url": "https://data.microbiomedata.org/data/nmdc:mga03eyz63/MAGs/nmdc_mga03eyz63_bins.unbinned.fa", + "md5_checksum": "3dfba77d38712870f8c415203f991496", + "id": "nmdc:3dfba77d38712870f8c415203f991496", + "file_size_bytes": 6028115 + }, + { + "name": "Gp0127623_metabat2 bin checkm quality assessment result", + "description": "metabat2 bin checkm quality assessment result for Gp0127623", + "data_object_type": "CheckM Statistics", + "url": "https://data.microbiomedata.org/data/nmdc:mga03eyz63/MAGs/nmdc_mga03eyz63_checkm_qa.out", + "md5_checksum": "5e98d27533164fdf67c07cc224090547", + "id": "nmdc:5e98d27533164fdf67c07cc224090547", + "file_size_bytes": 765 + }, + { + "name": "Gp0127623_high-quality and medium-quality bins", + "description": "high-quality and medium-quality bins for Gp0127623", + "data_object_type": "Metagenome Bins", + "url": "https://data.microbiomedata.org/data/nmdc:mga03eyz63/MAGs/nmdc_mga03eyz63_hqmq_bin.zip", + "md5_checksum": "bfbe3e3a21e8a089c4c7a0d945c79b7b", + "id": "nmdc:bfbe3e3a21e8a089c4c7a0d945c79b7b", + "file_size_bytes": 182 + }, + { + "name": "Gp0127623_metabat2 bins", + "description": "metabat2 bins for Gp0127623", + "url": "https://data.microbiomedata.org/data/nmdc:mga03eyz63/MAGs/nmdc_mga03eyz63_metabat_bin.zip", + "md5_checksum": "c70853ef1a6ab162b85df5215a76666b", + "id": "nmdc:c70853ef1a6ab162b85df5215a76666b", + "file_size_bytes": 236177 + } + ], + "dissolving_activity_set": [], + "functional_annotation_set": [], + "genome_feature_set": [], + "mags_activity_set": [ { "_id": { - "$oid": "649b009d6bdd4fd20273c858" + "$oid": "649b0052ec087f6bbab346fd" }, "has_input": [ - "nmdc:93c62425e46296c35415039d7fd9cb56" + "nmdc:3373ef564b5b97fa472dc8f2c2277dbc", + "nmdc:21fb280328baf81e8135733eaf440b66", + "nmdc:05107e0217e199d7b0cd571db88f7d09" ], + "too_short_contig_num": 114220, "part_of": [ - "nmdc:mga0bfpq58" + "nmdc:mga03eyz63" ], + "binned_contig_num": 171, "git_url": "https://github.com/microbiomedata/metaG/releases/tag/0.1", "has_output": [ - "nmdc:2d13b3a30339b9c5b4fba099f9d4b10f", - "nmdc:42be49edad69619e550ddd69d150490f" + "nmdc:d41d8cd98f00b204e9800998ecf8427e", + "nmdc:e63c76f92bc0ae95dfc238c099296e91", + "nmdc:3dfba77d38712870f8c415203f991496", + "nmdc:5e98d27533164fdf67c07cc224090547", + "nmdc:bfbe3e3a21e8a089c4c7a0d945c79b7b", + "nmdc:c70853ef1a6ab162b85df5215a76666b" ], - "was_informed_by": "gold:Gp0127625", - "input_read_count": 26227312, - "output_read_bases": 3764845015, - "id": "nmdc:aa214e53dc8472f9ff99b02245d8f943", + "was_informed_by": "gold:Gp0127623", + "input_contig_num": 118423, + "id": "nmdc:e05db57d44a39f083df9a1803551b79b", "execution_resource": "NERSC-Cori", - "input_read_bases": 3960324112, - "name": "Read QC Activity for nmdc:mga0bfpq58", - "output_read_count": 25182244, - "started_at_time": "2021-10-11T02:23:30Z", - "type": "nmdc:ReadQCAnalysisActivity", - "ended_at_time": "2021-10-11T03:29:50+00:00", - "output_data_objects": [ - { - "name": "Gp0127625_Filtered Reads", - "description": "Filtered Reads for Gp0127625", - "data_object_type": "Filtered Sequencing Reads", - "url": "https://data.microbiomedata.org/data/nmdc:mga0bfpq58/qa/nmdc_mga0bfpq58_filtered.fastq.gz", - "md5_checksum": "2d13b3a30339b9c5b4fba099f9d4b10f", - "id": "nmdc:2d13b3a30339b9c5b4fba099f9d4b10f", - "file_size_bytes": 2037866145 - }, + "name": "MAGs Analysis Activity for nmdc:mga03eyz63", + "mags_list": [ { - "name": "Gp0127625_Filtered Stats", - "description": "Filtered Stats for Gp0127625", - "data_object_type": "QC Statistics", - "url": "https://data.microbiomedata.org/data/nmdc:mga0bfpq58/qa/nmdc_mga0bfpq58_filterStats.txt", - "md5_checksum": "42be49edad69619e550ddd69d150490f", - "id": "nmdc:42be49edad69619e550ddd69d150490f", - "file_size_bytes": 284 + "number_of_contig": 171, + "completeness": 30.1, + "bin_name": "bins.1", + "gene_count": 991, + "bin_quality": "LQ", + "gtdbtk_species": "", + "gtdbtk_order": "", + "num_16s": 0, + "gtdbtk_family": "", + "gtdbtk_domain": "", + "contamination": 0.97, + "gtdbtk_class": "", + "gtdbtk_phylum": "", + "num_5s": 0, + "num_23s": 0, + "gtdbtk_genus": "", + "num_t_rna": 12 } - ] - }, + ], + "unbinned_contig_num": 4032, + "started_at_time": "2021-10-11T02:23:30Z", + "type": "nmdc:MAGsAnalysisActivity", + "ended_at_time": "2021-10-11T02:42:25+00:00" + } + ], + "material_sample_set": [], + "material_sampling_activity_set": [], + "metabolomics_analysis_activity_set": [], + "metagenome_annotation_activity_set": [ { "_id": { - "$oid": "649b009bff710ae353f8cf15" + "$oid": "649b005bbf2caae0415ef99a" }, "has_input": [ - "nmdc:2d13b3a30339b9c5b4fba099f9d4b10f" + "nmdc:3373ef564b5b97fa472dc8f2c2277dbc" + ], + "part_of": [ + "nmdc:mga03eyz63" ], "git_url": "https://github.com/microbiomedata/metaG/releases/tag/0.1", "has_output": [ - "nmdc:550b631e1de3e01392154e54493d47ef", - "nmdc:3f14ff51550d9d78dae3a7ec08514907", - "nmdc:1a7b8f8968f451b5d5ccb97a10a56d89", - "nmdc:b09795fc768257d881e8ce547be0ce68", - "nmdc:064ba18473eb80ff0b484311565d2894", - "nmdc:a7b6cc370371668be2e3bb90f5ca0fd1", - "nmdc:60c663a34b79db2ee71edf1afe4c14e3", - "nmdc:bc8acb862c8942616ef07302667c334f", - "nmdc:b797ed6cb135c993b582cac368b2a93c" + "nmdc:8ac52d00bad1f9349da2acde572006b6", + "nmdc:9dd5eb06fe24f63d5012e34e364a580c", + "nmdc:05107e0217e199d7b0cd571db88f7d09", + "nmdc:02ffcaeeb9a73edea47ba3671396026a", + "nmdc:b9b4ccafc50787f86ef03680eb23848d", + "nmdc:fbd178d9c302b841e3fde3ab9acd8160", + "nmdc:1bcc35e753e7dad78ef8ae4989eb901a", + "nmdc:f6d6d2ea3c539560ad30bbd6df8bc71a", + "nmdc:45536a48cef31f2c3870c7bacb3d785a", + "nmdc:a52d057d005504857f82bcf661dd7676", + "nmdc:b92cb96900a31a3c70ccf9cfe45f02c3", + "nmdc:32eca4cab8525b09cf1b0ed2353f9278" ], - "was_informed_by": "gold:Gp0127625", - "id": "nmdc:aa214e53dc8472f9ff99b02245d8f943", + "was_informed_by": "gold:Gp0127623", + "id": "nmdc:e05db57d44a39f083df9a1803551b79b", "execution_resource": "NERSC-Cori", - "name": "ReadBased Analysis Activity for nmdc:mga0bfpq58", + "name": "Annotation Activity for nmdc:mga03eyz63", "started_at_time": "2021-10-11T02:23:30Z", - "type": "nmdc:ReadbasedAnalysis", - "ended_at_time": "2021-10-11T03:29:50+00:00", - "output_data_objects": [ - { - "name": "Gp0127625_Gottcha2 TSV report", - "description": "Gottcha2 TSV report for Gp0127625", - "url": "https://data.microbiomedata.org/data/nmdc:mga0bfpq58/ReadbasedAnalysis/nmdc_mga0bfpq58_gottcha2_report.tsv", - "md5_checksum": "550b631e1de3e01392154e54493d47ef", - "id": "nmdc:550b631e1de3e01392154e54493d47ef", - "file_size_bytes": 754 - }, - { - "name": "Gp0127625_Gottcha2 full TSV report", - "description": "Gottcha2 full TSV report for Gp0127625", - "url": "https://data.microbiomedata.org/data/nmdc:mga0bfpq58/ReadbasedAnalysis/nmdc_mga0bfpq58_gottcha2_report_full.tsv", - "md5_checksum": "3f14ff51550d9d78dae3a7ec08514907", - "id": "nmdc:3f14ff51550d9d78dae3a7ec08514907", - "file_size_bytes": 641658 - }, - { - "name": "Gp0127625_Gottcha2 Krona HTML report", - "description": "Gottcha2 Krona HTML report for Gp0127625", - "data_object_type": "GOTTCHA2 Krona Plot", - "url": "https://data.microbiomedata.org/data/nmdc:mga0bfpq58/ReadbasedAnalysis/nmdc_mga0bfpq58_gottcha2_krona.html", - "md5_checksum": "1a7b8f8968f451b5d5ccb97a10a56d89", - "id": "nmdc:1a7b8f8968f451b5d5ccb97a10a56d89", - "file_size_bytes": 228494 - }, - { - "name": "Gp0127625_Centrifuge classification TSV report", - "description": "Centrifuge classification TSV report for Gp0127625", - "data_object_type": "Centrifuge Taxonomic Classification", - "url": "https://data.microbiomedata.org/data/nmdc:mga0bfpq58/ReadbasedAnalysis/nmdc_mga0bfpq58_centrifuge_classification.tsv", - "md5_checksum": "b09795fc768257d881e8ce547be0ce68", - "id": "nmdc:b09795fc768257d881e8ce547be0ce68", - "file_size_bytes": 1849982678 - }, - { - "name": "Gp0127625_Centrifuge TSV report", - "description": "Centrifuge TSV report for Gp0127625", - "data_object_type": "Centrifuge Classification Report", - "url": "https://data.microbiomedata.org/data/nmdc:mga0bfpq58/ReadbasedAnalysis/nmdc_mga0bfpq58_centrifuge_report.tsv", - "md5_checksum": "064ba18473eb80ff0b484311565d2894", - "id": "nmdc:064ba18473eb80ff0b484311565d2894", - "file_size_bytes": 253852 - }, - { - "name": "Gp0127625_Centrifuge Krona HTML report", - "description": "Centrifuge Krona HTML report for Gp0127625", - "data_object_type": "Centrifuge Krona Plot", - "url": "https://data.microbiomedata.org/data/nmdc:mga0bfpq58/ReadbasedAnalysis/nmdc_mga0bfpq58_centrifuge_krona.html", - "md5_checksum": "a7b6cc370371668be2e3bb90f5ca0fd1", - "id": "nmdc:a7b6cc370371668be2e3bb90f5ca0fd1", - "file_size_bytes": 2331556 - }, - { - "name": "Gp0127625_Kraken classification TSV report", - "description": "Kraken classification TSV report for Gp0127625", - "data_object_type": "Kraken2 Taxonomic Classification", - "url": "https://data.microbiomedata.org/data/nmdc:mga0bfpq58/ReadbasedAnalysis/nmdc_mga0bfpq58_kraken2_classification.tsv", - "md5_checksum": "60c663a34b79db2ee71edf1afe4c14e3", - "id": "nmdc:60c663a34b79db2ee71edf1afe4c14e3", - "file_size_bytes": 1471976767 - }, - { - "name": "Gp0127625_Kraken2 TSV report", - "description": "Kraken2 TSV report for Gp0127625", - "data_object_type": "Kraken2 Classification Report", - "url": "https://data.microbiomedata.org/data/nmdc:mga0bfpq58/ReadbasedAnalysis/nmdc_mga0bfpq58_kraken2_report.tsv", - "md5_checksum": "bc8acb862c8942616ef07302667c334f", - "id": "nmdc:bc8acb862c8942616ef07302667c334f", - "file_size_bytes": 627498 - }, - { - "name": "Gp0127625_Kraken2 Krona HTML report", - "description": "Kraken2 Krona HTML report for Gp0127625", - "data_object_type": "Kraken2 Krona Plot", - "url": "https://data.microbiomedata.org/data/nmdc:mga0bfpq58/ReadbasedAnalysis/nmdc_mga0bfpq58_kraken2_krona.html", - "md5_checksum": "b797ed6cb135c993b582cac368b2a93c", - "id": "nmdc:b797ed6cb135c993b582cac368b2a93c", - "file_size_bytes": 3921941 - } - ] - }, + "type": "nmdc:MetagenomeAnnotationActivity", + "ended_at_time": "2021-10-11T02:42:25+00:00" + } + ], + "metagenome_assembly_set": [ { "_id": { - "$oid": "61e718e2833bcf838a6ff0ce" + "$oid": "649b005f2ca5ee4adb139f9f" }, "has_input": [ - "nmdc:2d13b3a30339b9c5b4fba099f9d4b10f" + "nmdc:6a8409b21c45ba9feba873ec269c8ff7" ], "part_of": [ - "nmdc:mga0bfpq58" + "nmdc:mga03eyz63" ], + "ctg_logsum": 70596, + "scaf_logsum": 70885, + "gap_pct": 0.00063, "git_url": "https://github.com/microbiomedata/metaG/releases/tag/0.1", "has_output": [ - "nmdc:550b631e1de3e01392154e54493d47ef", - "nmdc:3f14ff51550d9d78dae3a7ec08514907", - "nmdc:1a7b8f8968f451b5d5ccb97a10a56d89", - "nmdc:b09795fc768257d881e8ce547be0ce68", - "nmdc:064ba18473eb80ff0b484311565d2894", - "nmdc:a7b6cc370371668be2e3bb90f5ca0fd1", - "nmdc:60c663a34b79db2ee71edf1afe4c14e3", - "nmdc:bc8acb862c8942616ef07302667c334f", - "nmdc:b797ed6cb135c993b582cac368b2a93c" + "nmdc:3373ef564b5b97fa472dc8f2c2277dbc", + "nmdc:a0377bb7d752e66b754753fcefb5005a", + "nmdc:081017d0d9e68a999c245618eb907c08", + "nmdc:4a6ed00a6c2156c142d7bbec6baa36b5", + "nmdc:21fb280328baf81e8135733eaf440b66" ], - "was_informed_by": "gold:Gp0127625", - "id": "nmdc:aa214e53dc8472f9ff99b02245d8f943", + "asm_score": 3.626, + "was_informed_by": "gold:Gp0127623", + "ctg_powsum": 7584.611, + "scaf_max": 12785, + "id": "nmdc:e05db57d44a39f083df9a1803551b79b", + "scaf_powsum": 7618.086, "execution_resource": "NERSC-Cori", - "name": "ReadBased Analysis Activity for nmdc:mga0bfpq58", + "contigs": 118423, + "name": "Assembly Activity for nmdc:mga03eyz63", + "ctg_max": 11834, + "gc_std": 0.12108, + "contig_bp": 50762396, + "gc_avg": 0.59992, "started_at_time": "2021-10-11T02:23:30Z", - "type": "nmdc:ReadbasedAnalysis", - "ended_at_time": "2021-10-11T03:29:50+00:00", - "output_data_objects": [ - { - "name": "Gp0127625_Gottcha2 TSV report", - "description": "Gottcha2 TSV report for Gp0127625", - "url": "https://data.microbiomedata.org/data/nmdc:mga0bfpq58/ReadbasedAnalysis/nmdc_mga0bfpq58_gottcha2_report.tsv", - "md5_checksum": "550b631e1de3e01392154e54493d47ef", - "id": "nmdc:550b631e1de3e01392154e54493d47ef", - "file_size_bytes": 754 - }, - { - "name": "Gp0127625_Gottcha2 full TSV report", - "description": "Gottcha2 full TSV report for Gp0127625", - "url": "https://data.microbiomedata.org/data/nmdc:mga0bfpq58/ReadbasedAnalysis/nmdc_mga0bfpq58_gottcha2_report_full.tsv", - "md5_checksum": "3f14ff51550d9d78dae3a7ec08514907", - "id": "nmdc:3f14ff51550d9d78dae3a7ec08514907", - "file_size_bytes": 641658 - }, - { - "name": "Gp0127625_Gottcha2 Krona HTML report", - "description": "Gottcha2 Krona HTML report for Gp0127625", - "data_object_type": "GOTTCHA2 Krona Plot", - "url": "https://data.microbiomedata.org/data/nmdc:mga0bfpq58/ReadbasedAnalysis/nmdc_mga0bfpq58_gottcha2_krona.html", - "md5_checksum": "1a7b8f8968f451b5d5ccb97a10a56d89", - "id": "nmdc:1a7b8f8968f451b5d5ccb97a10a56d89", - "file_size_bytes": 228494 - }, - { - "name": "Gp0127625_Centrifuge classification TSV report", - "description": "Centrifuge classification TSV report for Gp0127625", - "data_object_type": "Centrifuge Taxonomic Classification", - "url": "https://data.microbiomedata.org/data/nmdc:mga0bfpq58/ReadbasedAnalysis/nmdc_mga0bfpq58_centrifuge_classification.tsv", - "md5_checksum": "b09795fc768257d881e8ce547be0ce68", - "id": "nmdc:b09795fc768257d881e8ce547be0ce68", - "file_size_bytes": 1849982678 - }, - { - "name": "Gp0127625_Centrifuge TSV report", - "description": "Centrifuge TSV report for Gp0127625", - "data_object_type": "Centrifuge Classification Report", - "url": "https://data.microbiomedata.org/data/nmdc:mga0bfpq58/ReadbasedAnalysis/nmdc_mga0bfpq58_centrifuge_report.tsv", - "md5_checksum": "064ba18473eb80ff0b484311565d2894", - "id": "nmdc:064ba18473eb80ff0b484311565d2894", - "file_size_bytes": 253852 - }, - { - "name": "Gp0127625_Centrifuge Krona HTML report", - "description": "Centrifuge Krona HTML report for Gp0127625", - "data_object_type": "Centrifuge Krona Plot", - "url": "https://data.microbiomedata.org/data/nmdc:mga0bfpq58/ReadbasedAnalysis/nmdc_mga0bfpq58_centrifuge_krona.html", - "md5_checksum": "a7b6cc370371668be2e3bb90f5ca0fd1", - "id": "nmdc:a7b6cc370371668be2e3bb90f5ca0fd1", - "file_size_bytes": 2331556 - }, - { - "name": "Gp0127625_Kraken classification TSV report", - "description": "Kraken classification TSV report for Gp0127625", - "data_object_type": "Kraken2 Taxonomic Classification", - "url": "https://data.microbiomedata.org/data/nmdc:mga0bfpq58/ReadbasedAnalysis/nmdc_mga0bfpq58_kraken2_classification.tsv", - "md5_checksum": "60c663a34b79db2ee71edf1afe4c14e3", - "id": "nmdc:60c663a34b79db2ee71edf1afe4c14e3", - "file_size_bytes": 1471976767 - }, - { - "name": "Gp0127625_Kraken2 TSV report", - "description": "Kraken2 TSV report for Gp0127625", - "data_object_type": "Kraken2 Classification Report", - "url": "https://data.microbiomedata.org/data/nmdc:mga0bfpq58/ReadbasedAnalysis/nmdc_mga0bfpq58_kraken2_report.tsv", - "md5_checksum": "bc8acb862c8942616ef07302667c334f", - "id": "nmdc:bc8acb862c8942616ef07302667c334f", - "file_size_bytes": 627498 - }, - { - "name": "Gp0127625_Kraken2 Krona HTML report", - "description": "Kraken2 Krona HTML report for Gp0127625", - "data_object_type": "Kraken2 Krona Plot", - "url": "https://data.microbiomedata.org/data/nmdc:mga0bfpq58/ReadbasedAnalysis/nmdc_mga0bfpq58_kraken2_krona.html", - "md5_checksum": "b797ed6cb135c993b582cac368b2a93c", - "id": "nmdc:b797ed6cb135c993b582cac368b2a93c", - "file_size_bytes": 3921941 - } + "scaf_bp": 50762716, + "type": "nmdc:MetagenomeAssembly", + "scaffolds": 118391, + "ended_at_time": "2021-10-11T02:42:25+00:00", + "ctg_l50": 402, + "ctg_l90": 285, + "ctg_n50": 37682, + "ctg_n90": 100987, + "scaf_l50": 402, + "scaf_l90": 285, + "scaf_n50": 37659, + "scaf_n90": 100956 + } + ], + "metagenome_sequencing_activity_set": [], + "metaproteomics_analysis_activity_set": [], + "metatranscriptome_activity_set": [], + "nom_analysis_activity_set": [], + "omics_processing_set": [ + { + "_id": { + "$oid": "649b009773e8249959349b3a" + }, + "id": "nmdc:omprc-11-5r54nt37", + "name": "Riverbed sediment microbial communities from areas with vegetation nearby in Columbia River, Washington, USA - GW-RW S2_20_30", + "description": "Riverbed sediment samples were collected from an area with a lot of surrounding vegetation in a hyporheic zone (subsurface groundwater - surface water mixing zone)", + "has_input": [ + "nmdc:bsm-11-r7ggfc16" + ], + "has_output": [ + "jgi:574fde547ded5e3df1ee13fa" + ], + "part_of": [ + "nmdc:sty-11-aygzgv51" + ], + "add_date": "2016-01-11", + "mod_date": "2021-06-15", + "ncbi_project_name": "Riverbed sediment microbial communities from areas with vegetation nearby in Columbia River, Washington, USA - GW-RW S2_20_30", + "omics_type": { + "has_raw_value": "Metagenome" + }, + "principal_investigator": { + "has_raw_value": "James Stegen" + }, + "processing_institution": "JGI", + "type": "nmdc:OmicsProcessing", + "gold_sequencing_project_identifiers": [ + "gold:Gp0127623" ] - }, + } + ], + "reaction_activity_set": [], + "read_qc_analysis_activity_set": [ { "_id": { - "$oid": "649b005f2ca5ee4adb139f89" + "$oid": "649b009d6bdd4fd20273c85a" }, "has_input": [ - "nmdc:2d13b3a30339b9c5b4fba099f9d4b10f" + "nmdc:14766bc431808b2a29c03beecb66bbac" ], "part_of": [ - "nmdc:mga0bfpq58" + "nmdc:mga03eyz63" ], - "ctg_logsum": 452076, - "scaf_logsum": 453436, - "gap_pct": 0.00138, "git_url": "https://github.com/microbiomedata/metaG/releases/tag/0.1", "has_output": [ - "nmdc:5b6e7cbece9167002b12c3415afa9bb8", - "nmdc:f2ef7ceaaedf4d6bf377ce82687b06e3", - "nmdc:d231edb2040700184064615a28e65ee5", - "nmdc:9e3e55fe2f337ee0192604f8aa13da8e", - "nmdc:ff612445b348b65f906cd8858c4ec54e" + "nmdc:6a8409b21c45ba9feba873ec269c8ff7", + "nmdc:61fb06de10fe3a0c49c5afe14ab7fb32" ], - "asm_score": 3.923, - "was_informed_by": "gold:Gp0127625", - "ctg_powsum": 49204, - "scaf_max": 29400, - "id": "nmdc:aa214e53dc8472f9ff99b02245d8f943", - "scaf_powsum": 49370, + "was_informed_by": "gold:Gp0127623", + "input_read_count": 23705118, + "output_read_bases": 3409425046, + "id": "nmdc:e05db57d44a39f083df9a1803551b79b", "execution_resource": "NERSC-Cori", - "contigs": 300102, - "name": "Assembly Activity for nmdc:mga0bfpq58", - "ctg_max": 29400, - "gc_std": 0.0955, - "contig_bp": 159709614, - "gc_avg": 0.6367, + "input_read_bases": 3579472818, + "name": "Read QC Activity for nmdc:mga03eyz63", + "output_read_count": 22801896, "started_at_time": "2021-10-11T02:23:30Z", - "scaf_bp": 159711824, - "type": "nmdc:MetagenomeAssembly", - "scaffolds": 299890, - "ended_at_time": "2021-10-11T03:29:50+00:00", - "ctg_l50": 546, - "ctg_l90": 301, - "ctg_n50": 78532, - "ctg_n90": 244428, - "scaf_l50": 546, - "scaf_l90": 301, - "scaf_n50": 78517, - "scaf_n90": 244244, - "output_data_objects": [ - { - "name": "Gp0127625_Assembled contigs fasta", - "description": "Assembled contigs fasta for Gp0127625", - "data_object_type": "Assembly Contigs", - "url": "https://data.microbiomedata.org/data/nmdc:mga0bfpq58/assembly/nmdc_mga0bfpq58_contigs.fna", - "md5_checksum": "5b6e7cbece9167002b12c3415afa9bb8", - "id": "nmdc:5b6e7cbece9167002b12c3415afa9bb8", - "file_size_bytes": 171703232 - }, - { - "name": "Gp0127625_Assembled scaffold fasta", - "description": "Assembled scaffold fasta for Gp0127625", - "data_object_type": "Assembly Scaffolds", - "url": "https://data.microbiomedata.org/data/nmdc:mga0bfpq58/assembly/nmdc_mga0bfpq58_scaffolds.fna", - "md5_checksum": "f2ef7ceaaedf4d6bf377ce82687b06e3", - "id": "nmdc:f2ef7ceaaedf4d6bf377ce82687b06e3", - "file_size_bytes": 170799869 - }, - { - "name": "Gp0127625_Metagenome Contig Coverage Stats", - "description": "Metagenome Contig Coverage Stats for Gp0127625", - "url": "https://data.microbiomedata.org/data/nmdc:mga0bfpq58/assembly/nmdc_mga0bfpq58_covstats.txt", - "md5_checksum": "d231edb2040700184064615a28e65ee5", - "id": "nmdc:d231edb2040700184064615a28e65ee5", - "file_size_bytes": 23875845 - }, - { - "name": "Gp0127625_Assembled AGP file", - "description": "Assembled AGP file for Gp0127625", - "data_object_type": "Assembly AGP", - "url": "https://data.microbiomedata.org/data/nmdc:mga0bfpq58/assembly/nmdc_mga0bfpq58_assembly.agp", - "md5_checksum": "9e3e55fe2f337ee0192604f8aa13da8e", - "id": "nmdc:9e3e55fe2f337ee0192604f8aa13da8e", - "file_size_bytes": 22351137 - }, - { - "name": "Gp0127625_Metagenome Alignment BAM file", - "description": "Metagenome Alignment BAM file for Gp0127625", - "data_object_type": "Assembly Coverage BAM", - "url": "https://data.microbiomedata.org/data/nmdc:mga0bfpq58/assembly/nmdc_mga0bfpq58_pairedMapped_sorted.bam", - "md5_checksum": "ff612445b348b65f906cd8858c4ec54e", - "id": "nmdc:ff612445b348b65f906cd8858c4ec54e", - "file_size_bytes": 2304803186 - } - ] - }, + "type": "nmdc:ReadQCAnalysisActivity", + "ended_at_time": "2021-10-11T02:42:25+00:00" + } + ], + "read_based_taxonomy_analysis_activity_set": [ { "_id": { - "$oid": "649b005bbf2caae0415ef999" + "$oid": "649b009bff710ae353f8cf17" }, "has_input": [ - "nmdc:5b6e7cbece9167002b12c3415afa9bb8" - ], - "part_of": [ - "nmdc:mga0bfpq58" + "nmdc:6a8409b21c45ba9feba873ec269c8ff7" ], "git_url": "https://github.com/microbiomedata/metaG/releases/tag/0.1", "has_output": [ - "nmdc:b1cae75f11c5efc7b37ea38c8d690e09", - "nmdc:035d81e38b01174de882d15a859390a0", - "nmdc:da95ab39eb229378ef9c1c7317f58c36", - "nmdc:7ba2f365814fc2ae2896849d4dbb619d", - "nmdc:91ade9a89599592c1e699b8990a11fba", - "nmdc:a1c78cb8202825bd692c572b1537b549", - "nmdc:b83f7bca7166e0bbeb5d260af5920d00", - "nmdc:2f8d30335b71e6d7f29458795d20daf4", - "nmdc:77a22d4fe5949259acc0f12eafe264a2", - "nmdc:f21f3b5ed41e8945b4eebdbb044f832a", - "nmdc:4a365e4bb51f09bb4f21470a753eac42", - "nmdc:5a230cb34060373c2e9a0af8b8040f46" + "nmdc:ac39e916e17e08a845bb40d97519d8be", + "nmdc:c6fd5c573ef8605d9b43ff9c698af423", + "nmdc:eda0c04d692ecf137585676c15924626", + "nmdc:d9ea063be9ab8ea102c1e2ec2fa9f177", + "nmdc:e1f164c534830cd628d67c564ace863b", + "nmdc:a1062576d998b7b82e39b8d8520fa37e", + "nmdc:040e6ca695283a12711c16344acd1e76", + "nmdc:ed4ced0ccbe3f6b34c35bd842e882cad", + "nmdc:f2eed9669268f69dbc31f0c4f839fccf" ], - "was_informed_by": "gold:Gp0127625", - "id": "nmdc:aa214e53dc8472f9ff99b02245d8f943", + "was_informed_by": "gold:Gp0127623", + "id": "nmdc:e05db57d44a39f083df9a1803551b79b", "execution_resource": "NERSC-Cori", - "name": "Annotation Activity for nmdc:mga0bfpq58", + "name": "ReadBased Analysis Activity for nmdc:mga03eyz63", "started_at_time": "2021-10-11T02:23:30Z", - "type": "nmdc:MetagenomeAnnotationActivity", - "ended_at_time": "2021-10-11T03:29:50+00:00", - "output_data_objects": [ - { - "name": "Gp0127625_Protein FAA", - "description": "Protein FAA for Gp0127625", - "data_object_type": "Annotation Amino Acid FASTA", - "url": "https://data.microbiomedata.org/data/nmdc:mga0bfpq58/annotation/nmdc_mga0bfpq58_proteins.faa", - "md5_checksum": "b1cae75f11c5efc7b37ea38c8d690e09", - "id": "nmdc:b1cae75f11c5efc7b37ea38c8d690e09", - "file_size_bytes": 96076876 - }, - { - "name": "Gp0127625_Structural annotation GFF file", - "description": "Structural annotation GFF file for Gp0127625", - "data_object_type": "Structural Annotation GFF", - "url": "https://data.microbiomedata.org/data/nmdc:mga0bfpq58/annotation/nmdc_mga0bfpq58_structural_annotation.gff", - "md5_checksum": "035d81e38b01174de882d15a859390a0", - "id": "nmdc:035d81e38b01174de882d15a859390a0", - "file_size_bytes": 2526 - }, - { - "name": "Gp0127625_Functional annotation GFF file", - "description": "Functional annotation GFF file for Gp0127625", - "data_object_type": "Functional Annotation GFF", - "url": "https://data.microbiomedata.org/data/nmdc:mga0bfpq58/annotation/nmdc_mga0bfpq58_functional_annotation.gff", - "md5_checksum": "da95ab39eb229378ef9c1c7317f58c36", - "id": "nmdc:da95ab39eb229378ef9c1c7317f58c36", - "file_size_bytes": 106301187 - }, - { - "name": "Gp0127625_KO TSV file", - "description": "KO TSV file for Gp0127625", - "data_object_type": "Annotation KEGG Orthology", - "url": "https://data.microbiomedata.org/data/nmdc:mga0bfpq58/annotation/nmdc_mga0bfpq58_ko.tsv", - "md5_checksum": "7ba2f365814fc2ae2896849d4dbb619d", - "id": "nmdc:7ba2f365814fc2ae2896849d4dbb619d", - "file_size_bytes": 12012992 - }, - { - "name": "Gp0127625_EC TSV file", - "description": "EC TSV file for Gp0127625", - "data_object_type": "Annotation Enzyme Commission", - "url": "https://data.microbiomedata.org/data/nmdc:mga0bfpq58/annotation/nmdc_mga0bfpq58_ec.tsv", - "md5_checksum": "91ade9a89599592c1e699b8990a11fba", - "id": "nmdc:91ade9a89599592c1e699b8990a11fba", - "file_size_bytes": 7987608 - }, - { - "name": "Gp0127625_COG GFF file", - "description": "COG GFF file for Gp0127625", - "url": "https://data.microbiomedata.org/data/nmdc:mga0bfpq58/annotation/nmdc_mga0bfpq58_cog.gff", - "md5_checksum": "a1c78cb8202825bd692c572b1537b549", - "id": "nmdc:a1c78cb8202825bd692c572b1537b549", - "file_size_bytes": 63761051 - }, - { - "name": "Gp0127625_PFAM GFF file", - "description": "PFAM GFF file for Gp0127625", - "url": "https://data.microbiomedata.org/data/nmdc:mga0bfpq58/annotation/nmdc_mga0bfpq58_pfam.gff", - "md5_checksum": "b83f7bca7166e0bbeb5d260af5920d00", - "id": "nmdc:b83f7bca7166e0bbeb5d260af5920d00", - "file_size_bytes": 49051515 - }, - { - "name": "Gp0127625_TigrFam GFF file", - "description": "TigrFam GFF file for Gp0127625", - "url": "https://data.microbiomedata.org/data/nmdc:mga0bfpq58/annotation/nmdc_mga0bfpq58_tigrfam.gff", - "md5_checksum": "2f8d30335b71e6d7f29458795d20daf4", - "id": "nmdc:2f8d30335b71e6d7f29458795d20daf4", - "file_size_bytes": 5446717 - }, - { - "name": "Gp0127625_SMART GFF file", - "description": "SMART GFF file for Gp0127625", - "url": "https://data.microbiomedata.org/data/nmdc:mga0bfpq58/annotation/nmdc_mga0bfpq58_smart.gff", - "md5_checksum": "77a22d4fe5949259acc0f12eafe264a2", - "id": "nmdc:77a22d4fe5949259acc0f12eafe264a2", - "file_size_bytes": 14046377 - }, - { - "name": "Gp0127625_SuperFam GFF file", - "description": "SuperFam GFF file for Gp0127625", - "url": "https://data.microbiomedata.org/data/nmdc:mga0bfpq58/annotation/nmdc_mga0bfpq58_supfam.gff", - "md5_checksum": "f21f3b5ed41e8945b4eebdbb044f832a", - "id": "nmdc:f21f3b5ed41e8945b4eebdbb044f832a", - "file_size_bytes": 79091420 - }, - { - "name": "Gp0127625_Cath FunFam GFF file", - "description": "Cath FunFam GFF file for Gp0127625", - "url": "https://data.microbiomedata.org/data/nmdc:mga0bfpq58/annotation/nmdc_mga0bfpq58_cath_funfam.gff", - "md5_checksum": "4a365e4bb51f09bb4f21470a753eac42", - "id": "nmdc:4a365e4bb51f09bb4f21470a753eac42", - "file_size_bytes": 60777542 - }, - { - "name": "Gp0127625_KO_EC GFF file", - "description": "KO_EC GFF file for Gp0127625", - "url": "https://data.microbiomedata.org/data/nmdc:mga0bfpq58/annotation/nmdc_mga0bfpq58_ko_ec.gff", - "md5_checksum": "5a230cb34060373c2e9a0af8b8040f46", - "id": "nmdc:5a230cb34060373c2e9a0af8b8040f46", - "file_size_bytes": 38117675 - } - ] - }, + "type": "nmdc:ReadbasedAnalysis", + "ended_at_time": "2021-10-11T02:42:25+00:00" + } + ], + "study_set": [], + "field_research_site_set": [], + "collecting_biosamples_from_site_set": [], + "date_created": null, + "etl_software_version": null, + "pooling_set": [], + "read_based_analysis_activity_set": [ { "_id": { - "$oid": "649b0052ec087f6bbab346fc" + "$oid": "61e718fc833bcf838a6ff4c9" }, "has_input": [ - "nmdc:5b6e7cbece9167002b12c3415afa9bb8", - "nmdc:ff612445b348b65f906cd8858c4ec54e", - "nmdc:da95ab39eb229378ef9c1c7317f58c36" + "nmdc:6a8409b21c45ba9feba873ec269c8ff7" ], - "too_short_contig_num": 275414, "part_of": [ - "nmdc:mga0bfpq58" + "nmdc:mga03eyz63" ], - "binned_contig_num": 1195, "git_url": "https://github.com/microbiomedata/metaG/releases/tag/0.1", "has_output": [ - "nmdc:d41d8cd98f00b204e9800998ecf8427e", - "nmdc:27f14f2f1af3ad7d17505a6ddc52d860", - "nmdc:b66d8fd47536ed5299c280aa873e2130", - "nmdc:dac476e3a7a8cdb2f3be5946ae437906", - "nmdc:1ce4d3dcf2c9cbe245b437ca14a2772f", - "nmdc:d312dfb56973b50497bab8faf7409db8" + "nmdc:ac39e916e17e08a845bb40d97519d8be", + "nmdc:c6fd5c573ef8605d9b43ff9c698af423", + "nmdc:eda0c04d692ecf137585676c15924626", + "nmdc:d9ea063be9ab8ea102c1e2ec2fa9f177", + "nmdc:e1f164c534830cd628d67c564ace863b", + "nmdc:a1062576d998b7b82e39b8d8520fa37e", + "nmdc:040e6ca695283a12711c16344acd1e76", + "nmdc:ed4ced0ccbe3f6b34c35bd842e882cad", + "nmdc:f2eed9669268f69dbc31f0c4f839fccf" ], - "was_informed_by": "gold:Gp0127625", - "input_contig_num": 300100, - "id": "nmdc:aa214e53dc8472f9ff99b02245d8f943", + "was_informed_by": "gold:Gp0127623", + "id": "nmdc:e05db57d44a39f083df9a1803551b79b", "execution_resource": "NERSC-Cori", - "name": "MAGs Analysis Activity for nmdc:mga0bfpq58", - "mags_list": [ - { - "number_of_contig": 382, - "completeness": 47.74, - "bin_name": "bins.1", - "gene_count": 2054, - "bin_quality": "LQ", - "gtdbtk_species": "", - "gtdbtk_order": "", - "num_16s": 0, - "gtdbtk_family": "", - "gtdbtk_domain": "", - "contamination": 2.69, - "gtdbtk_class": "", - "gtdbtk_phylum": "", - "num_5s": 0, - "num_23s": 0, - "gtdbtk_genus": "", - "num_t_rna": 21 - }, - { - "number_of_contig": 197, - "completeness": 22.93, - "bin_name": "bins.2", - "gene_count": 1005, - "bin_quality": "LQ", - "gtdbtk_species": "", - "gtdbtk_order": "", - "num_16s": 0, - "gtdbtk_family": "", - "gtdbtk_domain": "", - "contamination": 1.03, - "gtdbtk_class": "", - "gtdbtk_phylum": "", - "num_5s": 0, - "num_23s": 0, - "gtdbtk_genus": "", - "num_t_rna": 17 - }, - { - "number_of_contig": 95, - "completeness": 7.24, - "bin_name": "bins.3", - "gene_count": 447, - "bin_quality": "LQ", - "gtdbtk_species": "", + "name": "ReadBased Analysis Activity for nmdc:mga03eyz63", + "started_at_time": "2021-10-11T02:23:30Z", + "type": "nmdc:ReadbasedAnalysis", + "ended_at_time": "2021-10-11T02:42:25+00:00" + } + ] + }, + { + "functional_annotation_agg": [], + "library_preparation_set": [], + "processed_sample_set": [], + "extraction_set": [], + "activity_set": [], + "biosample_set": [], + "data_object_set": [ + { + "name": "Gp0127625_Filtered Reads", + "description": "Filtered Reads for Gp0127625", + "data_object_type": "Filtered Sequencing Reads", + "url": "https://data.microbiomedata.org/data/nmdc:mga0bfpq58/qa/nmdc_mga0bfpq58_filtered.fastq.gz", + "md5_checksum": "2d13b3a30339b9c5b4fba099f9d4b10f", + "id": "nmdc:2d13b3a30339b9c5b4fba099f9d4b10f", + "file_size_bytes": 2037866145 + }, + { + "name": "Gp0127625_Filtered Stats", + "description": "Filtered Stats for Gp0127625", + "data_object_type": "QC Statistics", + "url": "https://data.microbiomedata.org/data/nmdc:mga0bfpq58/qa/nmdc_mga0bfpq58_filterStats.txt", + "md5_checksum": "42be49edad69619e550ddd69d150490f", + "id": "nmdc:42be49edad69619e550ddd69d150490f", + "file_size_bytes": 284 + }, + { + "name": "Gp0127625_Gottcha2 TSV report", + "description": "Gottcha2 TSV report for Gp0127625", + "url": "https://data.microbiomedata.org/data/nmdc:mga0bfpq58/ReadbasedAnalysis/nmdc_mga0bfpq58_gottcha2_report.tsv", + "md5_checksum": "550b631e1de3e01392154e54493d47ef", + "id": "nmdc:550b631e1de3e01392154e54493d47ef", + "file_size_bytes": 754 + }, + { + "name": "Gp0127625_Gottcha2 full TSV report", + "description": "Gottcha2 full TSV report for Gp0127625", + "url": "https://data.microbiomedata.org/data/nmdc:mga0bfpq58/ReadbasedAnalysis/nmdc_mga0bfpq58_gottcha2_report_full.tsv", + "md5_checksum": "3f14ff51550d9d78dae3a7ec08514907", + "id": "nmdc:3f14ff51550d9d78dae3a7ec08514907", + "file_size_bytes": 641658 + }, + { + "name": "Gp0127625_Gottcha2 Krona HTML report", + "description": "Gottcha2 Krona HTML report for Gp0127625", + "data_object_type": "GOTTCHA2 Krona Plot", + "url": "https://data.microbiomedata.org/data/nmdc:mga0bfpq58/ReadbasedAnalysis/nmdc_mga0bfpq58_gottcha2_krona.html", + "md5_checksum": "1a7b8f8968f451b5d5ccb97a10a56d89", + "id": "nmdc:1a7b8f8968f451b5d5ccb97a10a56d89", + "file_size_bytes": 228494 + }, + { + "name": "Gp0127625_Centrifuge classification TSV report", + "description": "Centrifuge classification TSV report for Gp0127625", + "data_object_type": "Centrifuge Taxonomic Classification", + "url": "https://data.microbiomedata.org/data/nmdc:mga0bfpq58/ReadbasedAnalysis/nmdc_mga0bfpq58_centrifuge_classification.tsv", + "md5_checksum": "b09795fc768257d881e8ce547be0ce68", + "id": "nmdc:b09795fc768257d881e8ce547be0ce68", + "file_size_bytes": 1849982678 + }, + { + "name": "Gp0127625_Centrifuge TSV report", + "description": "Centrifuge TSV report for Gp0127625", + "data_object_type": "Centrifuge Classification Report", + "url": "https://data.microbiomedata.org/data/nmdc:mga0bfpq58/ReadbasedAnalysis/nmdc_mga0bfpq58_centrifuge_report.tsv", + "md5_checksum": "064ba18473eb80ff0b484311565d2894", + "id": "nmdc:064ba18473eb80ff0b484311565d2894", + "file_size_bytes": 253852 + }, + { + "name": "Gp0127625_Centrifuge Krona HTML report", + "description": "Centrifuge Krona HTML report for Gp0127625", + "data_object_type": "Centrifuge Krona Plot", + "url": "https://data.microbiomedata.org/data/nmdc:mga0bfpq58/ReadbasedAnalysis/nmdc_mga0bfpq58_centrifuge_krona.html", + "md5_checksum": "a7b6cc370371668be2e3bb90f5ca0fd1", + "id": "nmdc:a7b6cc370371668be2e3bb90f5ca0fd1", + "file_size_bytes": 2331556 + }, + { + "name": "Gp0127625_Kraken classification TSV report", + "description": "Kraken classification TSV report for Gp0127625", + "data_object_type": "Kraken2 Taxonomic Classification", + "url": "https://data.microbiomedata.org/data/nmdc:mga0bfpq58/ReadbasedAnalysis/nmdc_mga0bfpq58_kraken2_classification.tsv", + "md5_checksum": "60c663a34b79db2ee71edf1afe4c14e3", + "id": "nmdc:60c663a34b79db2ee71edf1afe4c14e3", + "file_size_bytes": 1471976767 + }, + { + "name": "Gp0127625_Kraken2 TSV report", + "description": "Kraken2 TSV report for Gp0127625", + "data_object_type": "Kraken2 Classification Report", + "url": "https://data.microbiomedata.org/data/nmdc:mga0bfpq58/ReadbasedAnalysis/nmdc_mga0bfpq58_kraken2_report.tsv", + "md5_checksum": "bc8acb862c8942616ef07302667c334f", + "id": "nmdc:bc8acb862c8942616ef07302667c334f", + "file_size_bytes": 627498 + }, + { + "name": "Gp0127625_Kraken2 Krona HTML report", + "description": "Kraken2 Krona HTML report for Gp0127625", + "data_object_type": "Kraken2 Krona Plot", + "url": "https://data.microbiomedata.org/data/nmdc:mga0bfpq58/ReadbasedAnalysis/nmdc_mga0bfpq58_kraken2_krona.html", + "md5_checksum": "b797ed6cb135c993b582cac368b2a93c", + "id": "nmdc:b797ed6cb135c993b582cac368b2a93c", + "file_size_bytes": 3921941 + }, + { + "name": "Gp0127625_Gottcha2 TSV report", + "description": "Gottcha2 TSV report for Gp0127625", + "url": "https://data.microbiomedata.org/data/nmdc:mga0bfpq58/ReadbasedAnalysis/nmdc_mga0bfpq58_gottcha2_report.tsv", + "md5_checksum": "550b631e1de3e01392154e54493d47ef", + "id": "nmdc:550b631e1de3e01392154e54493d47ef", + "file_size_bytes": 754 + }, + { + "name": "Gp0127625_Gottcha2 full TSV report", + "description": "Gottcha2 full TSV report for Gp0127625", + "url": "https://data.microbiomedata.org/data/nmdc:mga0bfpq58/ReadbasedAnalysis/nmdc_mga0bfpq58_gottcha2_report_full.tsv", + "md5_checksum": "3f14ff51550d9d78dae3a7ec08514907", + "id": "nmdc:3f14ff51550d9d78dae3a7ec08514907", + "file_size_bytes": 641658 + }, + { + "name": "Gp0127625_Gottcha2 Krona HTML report", + "description": "Gottcha2 Krona HTML report for Gp0127625", + "data_object_type": "GOTTCHA2 Krona Plot", + "url": "https://data.microbiomedata.org/data/nmdc:mga0bfpq58/ReadbasedAnalysis/nmdc_mga0bfpq58_gottcha2_krona.html", + "md5_checksum": "1a7b8f8968f451b5d5ccb97a10a56d89", + "id": "nmdc:1a7b8f8968f451b5d5ccb97a10a56d89", + "file_size_bytes": 228494 + }, + { + "name": "Gp0127625_Centrifuge classification TSV report", + "description": "Centrifuge classification TSV report for Gp0127625", + "data_object_type": "Centrifuge Taxonomic Classification", + "url": "https://data.microbiomedata.org/data/nmdc:mga0bfpq58/ReadbasedAnalysis/nmdc_mga0bfpq58_centrifuge_classification.tsv", + "md5_checksum": "b09795fc768257d881e8ce547be0ce68", + "id": "nmdc:b09795fc768257d881e8ce547be0ce68", + "file_size_bytes": 1849982678 + }, + { + "name": "Gp0127625_Centrifuge TSV report", + "description": "Centrifuge TSV report for Gp0127625", + "data_object_type": "Centrifuge Classification Report", + "url": "https://data.microbiomedata.org/data/nmdc:mga0bfpq58/ReadbasedAnalysis/nmdc_mga0bfpq58_centrifuge_report.tsv", + "md5_checksum": "064ba18473eb80ff0b484311565d2894", + "id": "nmdc:064ba18473eb80ff0b484311565d2894", + "file_size_bytes": 253852 + }, + { + "name": "Gp0127625_Centrifuge Krona HTML report", + "description": "Centrifuge Krona HTML report for Gp0127625", + "data_object_type": "Centrifuge Krona Plot", + "url": "https://data.microbiomedata.org/data/nmdc:mga0bfpq58/ReadbasedAnalysis/nmdc_mga0bfpq58_centrifuge_krona.html", + "md5_checksum": "a7b6cc370371668be2e3bb90f5ca0fd1", + "id": "nmdc:a7b6cc370371668be2e3bb90f5ca0fd1", + "file_size_bytes": 2331556 + }, + { + "name": "Gp0127625_Kraken classification TSV report", + "description": "Kraken classification TSV report for Gp0127625", + "data_object_type": "Kraken2 Taxonomic Classification", + "url": "https://data.microbiomedata.org/data/nmdc:mga0bfpq58/ReadbasedAnalysis/nmdc_mga0bfpq58_kraken2_classification.tsv", + "md5_checksum": "60c663a34b79db2ee71edf1afe4c14e3", + "id": "nmdc:60c663a34b79db2ee71edf1afe4c14e3", + "file_size_bytes": 1471976767 + }, + { + "name": "Gp0127625_Kraken2 TSV report", + "description": "Kraken2 TSV report for Gp0127625", + "data_object_type": "Kraken2 Classification Report", + "url": "https://data.microbiomedata.org/data/nmdc:mga0bfpq58/ReadbasedAnalysis/nmdc_mga0bfpq58_kraken2_report.tsv", + "md5_checksum": "bc8acb862c8942616ef07302667c334f", + "id": "nmdc:bc8acb862c8942616ef07302667c334f", + "file_size_bytes": 627498 + }, + { + "name": "Gp0127625_Kraken2 Krona HTML report", + "description": "Kraken2 Krona HTML report for Gp0127625", + "data_object_type": "Kraken2 Krona Plot", + "url": "https://data.microbiomedata.org/data/nmdc:mga0bfpq58/ReadbasedAnalysis/nmdc_mga0bfpq58_kraken2_krona.html", + "md5_checksum": "b797ed6cb135c993b582cac368b2a93c", + "id": "nmdc:b797ed6cb135c993b582cac368b2a93c", + "file_size_bytes": 3921941 + }, + { + "name": "Gp0127625_Assembled contigs fasta", + "description": "Assembled contigs fasta for Gp0127625", + "data_object_type": "Assembly Contigs", + "url": "https://data.microbiomedata.org/data/nmdc:mga0bfpq58/assembly/nmdc_mga0bfpq58_contigs.fna", + "md5_checksum": "5b6e7cbece9167002b12c3415afa9bb8", + "id": "nmdc:5b6e7cbece9167002b12c3415afa9bb8", + "file_size_bytes": 171703232 + }, + { + "name": "Gp0127625_Assembled scaffold fasta", + "description": "Assembled scaffold fasta for Gp0127625", + "data_object_type": "Assembly Scaffolds", + "url": "https://data.microbiomedata.org/data/nmdc:mga0bfpq58/assembly/nmdc_mga0bfpq58_scaffolds.fna", + "md5_checksum": "f2ef7ceaaedf4d6bf377ce82687b06e3", + "id": "nmdc:f2ef7ceaaedf4d6bf377ce82687b06e3", + "file_size_bytes": 170799869 + }, + { + "name": "Gp0127625_Metagenome Contig Coverage Stats", + "description": "Metagenome Contig Coverage Stats for Gp0127625", + "url": "https://data.microbiomedata.org/data/nmdc:mga0bfpq58/assembly/nmdc_mga0bfpq58_covstats.txt", + "md5_checksum": "d231edb2040700184064615a28e65ee5", + "id": "nmdc:d231edb2040700184064615a28e65ee5", + "file_size_bytes": 23875845 + }, + { + "name": "Gp0127625_Assembled AGP file", + "description": "Assembled AGP file for Gp0127625", + "data_object_type": "Assembly AGP", + "url": "https://data.microbiomedata.org/data/nmdc:mga0bfpq58/assembly/nmdc_mga0bfpq58_assembly.agp", + "md5_checksum": "9e3e55fe2f337ee0192604f8aa13da8e", + "id": "nmdc:9e3e55fe2f337ee0192604f8aa13da8e", + "file_size_bytes": 22351137 + }, + { + "name": "Gp0127625_Metagenome Alignment BAM file", + "description": "Metagenome Alignment BAM file for Gp0127625", + "data_object_type": "Assembly Coverage BAM", + "url": "https://data.microbiomedata.org/data/nmdc:mga0bfpq58/assembly/nmdc_mga0bfpq58_pairedMapped_sorted.bam", + "md5_checksum": "ff612445b348b65f906cd8858c4ec54e", + "id": "nmdc:ff612445b348b65f906cd8858c4ec54e", + "file_size_bytes": 2304803186 + }, + { + "name": "Gp0127625_Protein FAA", + "description": "Protein FAA for Gp0127625", + "data_object_type": "Annotation Amino Acid FASTA", + "url": "https://data.microbiomedata.org/data/nmdc:mga0bfpq58/annotation/nmdc_mga0bfpq58_proteins.faa", + "md5_checksum": "b1cae75f11c5efc7b37ea38c8d690e09", + "id": "nmdc:b1cae75f11c5efc7b37ea38c8d690e09", + "file_size_bytes": 96076876 + }, + { + "name": "Gp0127625_Structural annotation GFF file", + "description": "Structural annotation GFF file for Gp0127625", + "data_object_type": "Structural Annotation GFF", + "url": "https://data.microbiomedata.org/data/nmdc:mga0bfpq58/annotation/nmdc_mga0bfpq58_structural_annotation.gff", + "md5_checksum": "035d81e38b01174de882d15a859390a0", + "id": "nmdc:035d81e38b01174de882d15a859390a0", + "file_size_bytes": 2526 + }, + { + "name": "Gp0127625_Functional annotation GFF file", + "description": "Functional annotation GFF file for Gp0127625", + "data_object_type": "Functional Annotation GFF", + "url": "https://data.microbiomedata.org/data/nmdc:mga0bfpq58/annotation/nmdc_mga0bfpq58_functional_annotation.gff", + "md5_checksum": "da95ab39eb229378ef9c1c7317f58c36", + "id": "nmdc:da95ab39eb229378ef9c1c7317f58c36", + "file_size_bytes": 106301187 + }, + { + "name": "Gp0127625_KO TSV file", + "description": "KO TSV file for Gp0127625", + "data_object_type": "Annotation KEGG Orthology", + "url": "https://data.microbiomedata.org/data/nmdc:mga0bfpq58/annotation/nmdc_mga0bfpq58_ko.tsv", + "md5_checksum": "7ba2f365814fc2ae2896849d4dbb619d", + "id": "nmdc:7ba2f365814fc2ae2896849d4dbb619d", + "file_size_bytes": 12012992 + }, + { + "name": "Gp0127625_EC TSV file", + "description": "EC TSV file for Gp0127625", + "data_object_type": "Annotation Enzyme Commission", + "url": "https://data.microbiomedata.org/data/nmdc:mga0bfpq58/annotation/nmdc_mga0bfpq58_ec.tsv", + "md5_checksum": "91ade9a89599592c1e699b8990a11fba", + "id": "nmdc:91ade9a89599592c1e699b8990a11fba", + "file_size_bytes": 7987608 + }, + { + "name": "Gp0127625_COG GFF file", + "description": "COG GFF file for Gp0127625", + "url": "https://data.microbiomedata.org/data/nmdc:mga0bfpq58/annotation/nmdc_mga0bfpq58_cog.gff", + "md5_checksum": "a1c78cb8202825bd692c572b1537b549", + "id": "nmdc:a1c78cb8202825bd692c572b1537b549", + "file_size_bytes": 63761051 + }, + { + "name": "Gp0127625_PFAM GFF file", + "description": "PFAM GFF file for Gp0127625", + "url": "https://data.microbiomedata.org/data/nmdc:mga0bfpq58/annotation/nmdc_mga0bfpq58_pfam.gff", + "md5_checksum": "b83f7bca7166e0bbeb5d260af5920d00", + "id": "nmdc:b83f7bca7166e0bbeb5d260af5920d00", + "file_size_bytes": 49051515 + }, + { + "name": "Gp0127625_TigrFam GFF file", + "description": "TigrFam GFF file for Gp0127625", + "url": "https://data.microbiomedata.org/data/nmdc:mga0bfpq58/annotation/nmdc_mga0bfpq58_tigrfam.gff", + "md5_checksum": "2f8d30335b71e6d7f29458795d20daf4", + "id": "nmdc:2f8d30335b71e6d7f29458795d20daf4", + "file_size_bytes": 5446717 + }, + { + "name": "Gp0127625_SMART GFF file", + "description": "SMART GFF file for Gp0127625", + "url": "https://data.microbiomedata.org/data/nmdc:mga0bfpq58/annotation/nmdc_mga0bfpq58_smart.gff", + "md5_checksum": "77a22d4fe5949259acc0f12eafe264a2", + "id": "nmdc:77a22d4fe5949259acc0f12eafe264a2", + "file_size_bytes": 14046377 + }, + { + "name": "Gp0127625_SuperFam GFF file", + "description": "SuperFam GFF file for Gp0127625", + "url": "https://data.microbiomedata.org/data/nmdc:mga0bfpq58/annotation/nmdc_mga0bfpq58_supfam.gff", + "md5_checksum": "f21f3b5ed41e8945b4eebdbb044f832a", + "id": "nmdc:f21f3b5ed41e8945b4eebdbb044f832a", + "file_size_bytes": 79091420 + }, + { + "name": "Gp0127625_Cath FunFam GFF file", + "description": "Cath FunFam GFF file for Gp0127625", + "url": "https://data.microbiomedata.org/data/nmdc:mga0bfpq58/annotation/nmdc_mga0bfpq58_cath_funfam.gff", + "md5_checksum": "4a365e4bb51f09bb4f21470a753eac42", + "id": "nmdc:4a365e4bb51f09bb4f21470a753eac42", + "file_size_bytes": 60777542 + }, + { + "name": "Gp0127625_KO_EC GFF file", + "description": "KO_EC GFF file for Gp0127625", + "url": "https://data.microbiomedata.org/data/nmdc:mga0bfpq58/annotation/nmdc_mga0bfpq58_ko_ec.gff", + "md5_checksum": "5a230cb34060373c2e9a0af8b8040f46", + "id": "nmdc:5a230cb34060373c2e9a0af8b8040f46", + "file_size_bytes": 38117675 + }, + { + "name": "gold:Gp0452679_metabat2 bin checkm quality assessment result", + "description": "metabat2 bin checkm quality assessment result for gold:Gp0452679", + "data_object_type": "CheckM Statistics", + "url": "https://data.microbiomedata.org/data/nmdc:mga0fsjy84/MAGs/nmdc_mga0fsjy84_checkm_qa.out", + "md5_checksum": "d41d8cd98f00b204e9800998ecf8427e", + "id": "nmdc:d41d8cd98f00b204e9800998ecf8427e", + "file_size_bytes": 0 + }, + { + "name": "Gp0127625_tooShort (< 3kb) filtered contigs fasta file by metaBat2", + "description": "tooShort (< 3kb) filtered contigs fasta file by metaBat2 for Gp0127625", + "url": "https://data.microbiomedata.org/data/nmdc:mga0bfpq58/MAGs/nmdc_mga0bfpq58_bins.tooShort.fa", + "md5_checksum": "27f14f2f1af3ad7d17505a6ddc52d860", + "id": "nmdc:27f14f2f1af3ad7d17505a6ddc52d860", + "file_size_bytes": 128750891 + }, + { + "name": "Gp0127625_unbinned fasta file from metabat2", + "description": "unbinned fasta file from metabat2 for Gp0127625", + "url": "https://data.microbiomedata.org/data/nmdc:mga0bfpq58/MAGs/nmdc_mga0bfpq58_bins.unbinned.fa", + "md5_checksum": "b66d8fd47536ed5299c280aa873e2130", + "id": "nmdc:b66d8fd47536ed5299c280aa873e2130", + "file_size_bytes": 37223163 + }, + { + "name": "Gp0127625_metabat2 bin checkm quality assessment result", + "description": "metabat2 bin checkm quality assessment result for Gp0127625", + "data_object_type": "CheckM Statistics", + "url": "https://data.microbiomedata.org/data/nmdc:mga0bfpq58/MAGs/nmdc_mga0bfpq58_checkm_qa.out", + "md5_checksum": "dac476e3a7a8cdb2f3be5946ae437906", + "id": "nmdc:dac476e3a7a8cdb2f3be5946ae437906", + "file_size_bytes": 1413 + }, + { + "name": "Gp0127625_high-quality and medium-quality bins", + "description": "high-quality and medium-quality bins for Gp0127625", + "data_object_type": "Metagenome Bins", + "url": "https://data.microbiomedata.org/data/nmdc:mga0bfpq58/MAGs/nmdc_mga0bfpq58_hqmq_bin.zip", + "md5_checksum": "1ce4d3dcf2c9cbe245b437ca14a2772f", + "id": "nmdc:1ce4d3dcf2c9cbe245b437ca14a2772f", + "file_size_bytes": 182 + }, + { + "name": "Gp0127625_metabat2 bins", + "description": "metabat2 bins for Gp0127625", + "url": "https://data.microbiomedata.org/data/nmdc:mga0bfpq58/MAGs/nmdc_mga0bfpq58_metabat_bin.zip", + "md5_checksum": "d312dfb56973b50497bab8faf7409db8", + "id": "nmdc:d312dfb56973b50497bab8faf7409db8", + "file_size_bytes": 1729165 + } + ], + "dissolving_activity_set": [], + "functional_annotation_set": [], + "genome_feature_set": [], + "mags_activity_set": [ + { + "_id": { + "$oid": "649b0052ec087f6bbab346fc" + }, + "has_input": [ + "nmdc:5b6e7cbece9167002b12c3415afa9bb8", + "nmdc:ff612445b348b65f906cd8858c4ec54e", + "nmdc:da95ab39eb229378ef9c1c7317f58c36" + ], + "too_short_contig_num": 275414, + "part_of": [ + "nmdc:mga0bfpq58" + ], + "binned_contig_num": 1195, + "git_url": "https://github.com/microbiomedata/metaG/releases/tag/0.1", + "has_output": [ + "nmdc:d41d8cd98f00b204e9800998ecf8427e", + "nmdc:27f14f2f1af3ad7d17505a6ddc52d860", + "nmdc:b66d8fd47536ed5299c280aa873e2130", + "nmdc:dac476e3a7a8cdb2f3be5946ae437906", + "nmdc:1ce4d3dcf2c9cbe245b437ca14a2772f", + "nmdc:d312dfb56973b50497bab8faf7409db8" + ], + "was_informed_by": "gold:Gp0127625", + "input_contig_num": 300100, + "id": "nmdc:aa214e53dc8472f9ff99b02245d8f943", + "execution_resource": "NERSC-Cori", + "name": "MAGs Analysis Activity for nmdc:mga0bfpq58", + "mags_list": [ + { + "number_of_contig": 382, + "completeness": 47.74, + "bin_name": "bins.1", + "gene_count": 2054, + "bin_quality": "LQ", + "gtdbtk_species": "", + "gtdbtk_order": "", + "num_16s": 0, + "gtdbtk_family": "", + "gtdbtk_domain": "", + "contamination": 2.69, + "gtdbtk_class": "", + "gtdbtk_phylum": "", + "num_5s": 0, + "num_23s": 0, + "gtdbtk_genus": "", + "num_t_rna": 21 + }, + { + "number_of_contig": 197, + "completeness": 22.93, + "bin_name": "bins.2", + "gene_count": 1005, + "bin_quality": "LQ", + "gtdbtk_species": "", + "gtdbtk_order": "", + "num_16s": 0, + "gtdbtk_family": "", + "gtdbtk_domain": "", + "contamination": 1.03, + "gtdbtk_class": "", + "gtdbtk_phylum": "", + "num_5s": 0, + "num_23s": 0, + "gtdbtk_genus": "", + "num_t_rna": 17 + }, + { + "number_of_contig": 95, + "completeness": 7.24, + "bin_name": "bins.3", + "gene_count": 447, + "bin_quality": "LQ", + "gtdbtk_species": "", "gtdbtk_order": "", "num_16s": 0, "gtdbtk_family": "", @@ -6213,616 +6287,610 @@ "unbinned_contig_num": 23491, "started_at_time": "2021-10-11T02:23:30Z", "type": "nmdc:MAGsAnalysisActivity", - "ended_at_time": "2021-10-11T03:29:50+00:00", - "output_data_objects": [ - { - "name": "gold:Gp0452679_metabat2 bin checkm quality assessment result", - "description": "metabat2 bin checkm quality assessment result for gold:Gp0452679", - "data_object_type": "CheckM Statistics", - "url": "https://data.microbiomedata.org/data/nmdc:mga0fsjy84/MAGs/nmdc_mga0fsjy84_checkm_qa.out", - "md5_checksum": "d41d8cd98f00b204e9800998ecf8427e", - "id": "nmdc:d41d8cd98f00b204e9800998ecf8427e", - "file_size_bytes": 0 - }, - { - "name": "Gp0127625_tooShort (< 3kb) filtered contigs fasta file by metaBat2", - "description": "tooShort (< 3kb) filtered contigs fasta file by metaBat2 for Gp0127625", - "url": "https://data.microbiomedata.org/data/nmdc:mga0bfpq58/MAGs/nmdc_mga0bfpq58_bins.tooShort.fa", - "md5_checksum": "27f14f2f1af3ad7d17505a6ddc52d860", - "id": "nmdc:27f14f2f1af3ad7d17505a6ddc52d860", - "file_size_bytes": 128750891 - }, - { - "name": "Gp0127625_unbinned fasta file from metabat2", - "description": "unbinned fasta file from metabat2 for Gp0127625", - "url": "https://data.microbiomedata.org/data/nmdc:mga0bfpq58/MAGs/nmdc_mga0bfpq58_bins.unbinned.fa", - "md5_checksum": "b66d8fd47536ed5299c280aa873e2130", - "id": "nmdc:b66d8fd47536ed5299c280aa873e2130", - "file_size_bytes": 37223163 - }, - { - "name": "Gp0127625_metabat2 bin checkm quality assessment result", - "description": "metabat2 bin checkm quality assessment result for Gp0127625", - "data_object_type": "CheckM Statistics", - "url": "https://data.microbiomedata.org/data/nmdc:mga0bfpq58/MAGs/nmdc_mga0bfpq58_checkm_qa.out", - "md5_checksum": "dac476e3a7a8cdb2f3be5946ae437906", - "id": "nmdc:dac476e3a7a8cdb2f3be5946ae437906", - "file_size_bytes": 1413 - }, - { - "name": "Gp0127625_high-quality and medium-quality bins", - "description": "high-quality and medium-quality bins for Gp0127625", - "data_object_type": "Metagenome Bins", - "url": "https://data.microbiomedata.org/data/nmdc:mga0bfpq58/MAGs/nmdc_mga0bfpq58_hqmq_bin.zip", - "md5_checksum": "1ce4d3dcf2c9cbe245b437ca14a2772f", - "id": "nmdc:1ce4d3dcf2c9cbe245b437ca14a2772f", - "file_size_bytes": 182 - }, - { - "name": "Gp0127625_metabat2 bins", - "description": "metabat2 bins for Gp0127625", - "url": "https://data.microbiomedata.org/data/nmdc:mga0bfpq58/MAGs/nmdc_mga0bfpq58_metabat_bin.zip", - "md5_checksum": "d312dfb56973b50497bab8faf7409db8", - "id": "nmdc:d312dfb56973b50497bab8faf7409db8", - "file_size_bytes": 1729165 - } - ] + "ended_at_time": "2021-10-11T03:29:50+00:00" } - ] - }, - { - "_id": { - "$oid": "649b009773e8249959349b3c" - }, - "id": "nmdc:omprc-11-s6wqag22", - "name": "Riverbed sediment microbial communities from areas with no vegetation in Columbia River, Washington, USA - GW-RW N3_40_50", - "description": "Riverbed sediment samples were collected from an area with no vegetation in a hyporheic zone (subsurface groundwater - surface water mixing zone)", - "has_input": [ - "nmdc:bsm-11-mxdygh62" - ], - "has_output": [ - "jgi:574fde7b7ded5e3df1ee1418" - ], - "part_of": [ - "nmdc:sty-11-aygzgv51" - ], - "add_date": "2016-01-11", - "mod_date": "2021-06-15", - "ncbi_project_name": "Riverbed sediment microbial communities from areas with no vegetation in Columbia River, Washington, USA - GW-RW N3_40_50", - "omics_type": { - "has_raw_value": "Metagenome" - }, - "principal_investigator": { - "has_raw_value": "James Stegen" - }, - "processing_institution": "JGI", - "type": "nmdc:OmicsProcessing", - "gold_sequencing_project_identifiers": [ - "gold:Gp0127626" - ], - "downstream_workflow_activity_records": [ + ], + "material_sample_set": [], + "material_sampling_activity_set": [], + "metabolomics_analysis_activity_set": [], + "metagenome_annotation_activity_set": [ { "_id": { - "$oid": "649b009d6bdd4fd20273c86c" + "$oid": "649b005bbf2caae0415ef999" }, "has_input": [ - "nmdc:8bee270fc5b3a39f7e7609b60e191766" + "nmdc:5b6e7cbece9167002b12c3415afa9bb8" ], "part_of": [ - "nmdc:mga04xnj45" + "nmdc:mga0bfpq58" ], "git_url": "https://github.com/microbiomedata/metaG/releases/tag/0.1", "has_output": [ - "nmdc:07499ad2f2b80f42bd7109732b1eef90", - "nmdc:9089d07fdee5ed03e901c1656206af02" + "nmdc:b1cae75f11c5efc7b37ea38c8d690e09", + "nmdc:035d81e38b01174de882d15a859390a0", + "nmdc:da95ab39eb229378ef9c1c7317f58c36", + "nmdc:7ba2f365814fc2ae2896849d4dbb619d", + "nmdc:91ade9a89599592c1e699b8990a11fba", + "nmdc:a1c78cb8202825bd692c572b1537b549", + "nmdc:b83f7bca7166e0bbeb5d260af5920d00", + "nmdc:2f8d30335b71e6d7f29458795d20daf4", + "nmdc:77a22d4fe5949259acc0f12eafe264a2", + "nmdc:f21f3b5ed41e8945b4eebdbb044f832a", + "nmdc:4a365e4bb51f09bb4f21470a753eac42", + "nmdc:5a230cb34060373c2e9a0af8b8040f46" ], - "was_informed_by": "gold:Gp0127626", - "input_read_count": 24223170, - "output_read_bases": 3405205631, - "id": "nmdc:0ef6e04f4cefbcd9e2d3d2a7717baa3e", + "was_informed_by": "gold:Gp0127625", + "id": "nmdc:aa214e53dc8472f9ff99b02245d8f943", "execution_resource": "NERSC-Cori", - "input_read_bases": 3657698670, - "name": "Read QC Activity for nmdc:mga04xnj45", - "output_read_count": 22768968, - "started_at_time": "2021-12-01T21:31:29Z", - "type": "nmdc:ReadQCAnalysisActivity", - "ended_at_time": "2021-12-02T20:54:56+00:00", - "output_data_objects": [ - { - "name": "Gp0127626_Filtered Reads", - "description": "Filtered Reads for Gp0127626", - "data_object_type": "Filtered Sequencing Reads", - "url": "https://data.microbiomedata.org/data/nmdc:mga04xnj45/qa/nmdc_mga04xnj45_filtered.fastq.gz", - "md5_checksum": "07499ad2f2b80f42bd7109732b1eef90", - "id": "nmdc:07499ad2f2b80f42bd7109732b1eef90", - "file_size_bytes": 1944721961 - }, - { - "name": "Gp0127626_Filtered Stats", - "description": "Filtered Stats for Gp0127626", - "data_object_type": "QC Statistics", - "url": "https://data.microbiomedata.org/data/nmdc:mga04xnj45/qa/nmdc_mga04xnj45_filterStats.txt", - "md5_checksum": "9089d07fdee5ed03e901c1656206af02", - "id": "nmdc:9089d07fdee5ed03e901c1656206af02", - "file_size_bytes": 287 - } - ] - }, + "name": "Annotation Activity for nmdc:mga0bfpq58", + "started_at_time": "2021-10-11T02:23:30Z", + "type": "nmdc:MetagenomeAnnotationActivity", + "ended_at_time": "2021-10-11T03:29:50+00:00" + } + ], + "metagenome_assembly_set": [ { "_id": { - "$oid": "649b009bff710ae353f8cf30" + "$oid": "649b005f2ca5ee4adb139f89" }, "has_input": [ - "nmdc:07499ad2f2b80f42bd7109732b1eef90" + "nmdc:2d13b3a30339b9c5b4fba099f9d4b10f" ], + "part_of": [ + "nmdc:mga0bfpq58" + ], + "ctg_logsum": 452076, + "scaf_logsum": 453436, + "gap_pct": 0.00138, "git_url": "https://github.com/microbiomedata/metaG/releases/tag/0.1", "has_output": [ - "nmdc:a91f8dccb2baa53550216f5bdfbf1473", - "nmdc:a81ddf4e3bc044e8601554117cd887aa", - "nmdc:a012dc3a7b44774019c313fd8ee88efc", - "nmdc:dd4023a1488bdfc73b12c422b62b274a", - "nmdc:2f9b1c55d52cc61affbe99f5163b48c8", - "nmdc:ccf7f447a25ebf354ce44b3f1f90f223", - "nmdc:2c8efdb77cbcd1276c4fb386fd37bd6d", - "nmdc:806b27f1fa5a423100b113bb56edc708", - "nmdc:bb3e6793c4f036b9756f075d41846964" + "nmdc:5b6e7cbece9167002b12c3415afa9bb8", + "nmdc:f2ef7ceaaedf4d6bf377ce82687b06e3", + "nmdc:d231edb2040700184064615a28e65ee5", + "nmdc:9e3e55fe2f337ee0192604f8aa13da8e", + "nmdc:ff612445b348b65f906cd8858c4ec54e" ], - "was_informed_by": "gold:Gp0127626", - "id": "nmdc:0ef6e04f4cefbcd9e2d3d2a7717baa3e", - "execution_resource": "NERSC-Cori", - "name": "ReadBased Analysis Activity for nmdc:mga04xnj45", - "started_at_time": "2021-12-01T21:31:29Z", - "type": "nmdc:ReadbasedAnalysis", - "ended_at_time": "2021-12-02T20:54:56+00:00", - "output_data_objects": [ - { - "name": "Gp0127626_Gottcha2 TSV report", - "description": "Gottcha2 TSV report for Gp0127626", - "url": "https://data.microbiomedata.org/data/nmdc:mga04xnj45/ReadbasedAnalysis/nmdc_mga04xnj45_gottcha2_report.tsv", - "md5_checksum": "a91f8dccb2baa53550216f5bdfbf1473", - "id": "nmdc:a91f8dccb2baa53550216f5bdfbf1473", - "file_size_bytes": 2399 - }, - { - "name": "Gp0127626_Gottcha2 full TSV report", - "description": "Gottcha2 full TSV report for Gp0127626", - "url": "https://data.microbiomedata.org/data/nmdc:mga04xnj45/ReadbasedAnalysis/nmdc_mga04xnj45_gottcha2_report_full.tsv", - "md5_checksum": "a81ddf4e3bc044e8601554117cd887aa", - "id": "nmdc:a81ddf4e3bc044e8601554117cd887aa", - "file_size_bytes": 743066 - }, - { - "name": "Gp0127626_Gottcha2 Krona HTML report", - "description": "Gottcha2 Krona HTML report for Gp0127626", - "data_object_type": "GOTTCHA2 Krona Plot", - "url": "https://data.microbiomedata.org/data/nmdc:mga04xnj45/ReadbasedAnalysis/nmdc_mga04xnj45_gottcha2_krona.html", - "md5_checksum": "a012dc3a7b44774019c313fd8ee88efc", - "id": "nmdc:a012dc3a7b44774019c313fd8ee88efc", - "file_size_bytes": 233970 - }, - { - "name": "Gp0127626_Centrifuge classification TSV report", - "description": "Centrifuge classification TSV report for Gp0127626", - "data_object_type": "Centrifuge Taxonomic Classification", - "url": "https://data.microbiomedata.org/data/nmdc:mga04xnj45/ReadbasedAnalysis/nmdc_mga04xnj45_centrifuge_classification.tsv", - "md5_checksum": "dd4023a1488bdfc73b12c422b62b274a", - "id": "nmdc:dd4023a1488bdfc73b12c422b62b274a", - "file_size_bytes": 1673697764 - }, - { - "name": "Gp0127626_Centrifuge TSV report", - "description": "Centrifuge TSV report for Gp0127626", - "data_object_type": "Centrifuge Classification Report", - "url": "https://data.microbiomedata.org/data/nmdc:mga04xnj45/ReadbasedAnalysis/nmdc_mga04xnj45_centrifuge_report.tsv", - "md5_checksum": "2f9b1c55d52cc61affbe99f5163b48c8", - "id": "nmdc:2f9b1c55d52cc61affbe99f5163b48c8", - "file_size_bytes": 253730 - }, - { - "name": "Gp0127626_Centrifuge Krona HTML report", - "description": "Centrifuge Krona HTML report for Gp0127626", - "data_object_type": "Centrifuge Krona Plot", - "url": "https://data.microbiomedata.org/data/nmdc:mga04xnj45/ReadbasedAnalysis/nmdc_mga04xnj45_centrifuge_krona.html", - "md5_checksum": "ccf7f447a25ebf354ce44b3f1f90f223", - "id": "nmdc:ccf7f447a25ebf354ce44b3f1f90f223", - "file_size_bytes": 2327521 - }, - { - "name": "Gp0127626_Kraken classification TSV report", - "description": "Kraken classification TSV report for Gp0127626", - "data_object_type": "Kraken2 Taxonomic Classification", - "url": "https://data.microbiomedata.org/data/nmdc:mga04xnj45/ReadbasedAnalysis/nmdc_mga04xnj45_kraken2_classification.tsv", - "md5_checksum": "2c8efdb77cbcd1276c4fb386fd37bd6d", - "id": "nmdc:2c8efdb77cbcd1276c4fb386fd37bd6d", - "file_size_bytes": 1343921825 - }, - { - "name": "Gp0127626_Kraken2 TSV report", - "description": "Kraken2 TSV report for Gp0127626", - "data_object_type": "Kraken2 Classification Report", - "url": "https://data.microbiomedata.org/data/nmdc:mga04xnj45/ReadbasedAnalysis/nmdc_mga04xnj45_kraken2_report.tsv", - "md5_checksum": "806b27f1fa5a423100b113bb56edc708", - "id": "nmdc:806b27f1fa5a423100b113bb56edc708", - "file_size_bytes": 638478 - }, - { - "name": "Gp0127626_Kraken2 Krona HTML report", - "description": "Kraken2 Krona HTML report for Gp0127626", - "data_object_type": "Kraken2 Krona Plot", - "url": "https://data.microbiomedata.org/data/nmdc:mga04xnj45/ReadbasedAnalysis/nmdc_mga04xnj45_kraken2_krona.html", - "md5_checksum": "bb3e6793c4f036b9756f075d41846964", - "id": "nmdc:bb3e6793c4f036b9756f075d41846964", - "file_size_bytes": 3987411 - } - ] - }, + "asm_score": 3.923, + "was_informed_by": "gold:Gp0127625", + "ctg_powsum": 49204, + "scaf_max": 29400, + "id": "nmdc:aa214e53dc8472f9ff99b02245d8f943", + "scaf_powsum": 49370, + "execution_resource": "NERSC-Cori", + "contigs": 300102, + "name": "Assembly Activity for nmdc:mga0bfpq58", + "ctg_max": 29400, + "gc_std": 0.0955, + "contig_bp": 159709614, + "gc_avg": 0.6367, + "started_at_time": "2021-10-11T02:23:30Z", + "scaf_bp": 159711824, + "type": "nmdc:MetagenomeAssembly", + "scaffolds": 299890, + "ended_at_time": "2021-10-11T03:29:50+00:00", + "ctg_l50": 546, + "ctg_l90": 301, + "ctg_n50": 78532, + "ctg_n90": 244428, + "scaf_l50": 546, + "scaf_l90": 301, + "scaf_n50": 78517, + "scaf_n90": 244244 + } + ], + "metagenome_sequencing_activity_set": [], + "metaproteomics_analysis_activity_set": [], + "metatranscriptome_activity_set": [], + "nom_analysis_activity_set": [], + "omics_processing_set": [ { "_id": { - "$oid": "61e7195e833bcf838a700602" + "$oid": "649b009773e8249959349b3b" }, + "id": "nmdc:omprc-11-76ebsj44", + "name": "Riverbed sediment microbial communities from areas with no vegetation in Columbia River, Washington, USA - GW-RW N1_20_30", + "description": "Riverbed sediment samples were collected from an area with no vegetation in a hyporheic zone (subsurface groundwater - surface water mixing zone)", "has_input": [ - "nmdc:07499ad2f2b80f42bd7109732b1eef90" + "nmdc:bsm-11-k3t2wk45" ], - "part_of": [ - "nmdc:mga04xnj45" - ], - "git_url": "https://github.com/microbiomedata/metaG/releases/tag/0.1", "has_output": [ - "nmdc:a91f8dccb2baa53550216f5bdfbf1473", - "nmdc:a81ddf4e3bc044e8601554117cd887aa", - "nmdc:a012dc3a7b44774019c313fd8ee88efc", - "nmdc:dd4023a1488bdfc73b12c422b62b274a", - "nmdc:2f9b1c55d52cc61affbe99f5163b48c8", - "nmdc:ccf7f447a25ebf354ce44b3f1f90f223", - "nmdc:2c8efdb77cbcd1276c4fb386fd37bd6d", - "nmdc:806b27f1fa5a423100b113bb56edc708", - "nmdc:bb3e6793c4f036b9756f075d41846964" + "jgi:574fde787ded5e3df1ee1416" ], - "was_informed_by": "gold:Gp0127626", - "id": "nmdc:0ef6e04f4cefbcd9e2d3d2a7717baa3e", - "execution_resource": "NERSC-Cori", - "name": "ReadBased Analysis Activity for nmdc:mga04xnj45", - "started_at_time": "2021-12-01T21:31:29Z", - "type": "nmdc:ReadbasedAnalysis", - "ended_at_time": "2021-12-02T20:54:56+00:00", - "output_data_objects": [ - { - "name": "Gp0127626_Gottcha2 TSV report", - "description": "Gottcha2 TSV report for Gp0127626", - "url": "https://data.microbiomedata.org/data/nmdc:mga04xnj45/ReadbasedAnalysis/nmdc_mga04xnj45_gottcha2_report.tsv", - "md5_checksum": "a91f8dccb2baa53550216f5bdfbf1473", - "id": "nmdc:a91f8dccb2baa53550216f5bdfbf1473", - "file_size_bytes": 2399 - }, - { - "name": "Gp0127626_Gottcha2 full TSV report", - "description": "Gottcha2 full TSV report for Gp0127626", - "url": "https://data.microbiomedata.org/data/nmdc:mga04xnj45/ReadbasedAnalysis/nmdc_mga04xnj45_gottcha2_report_full.tsv", - "md5_checksum": "a81ddf4e3bc044e8601554117cd887aa", - "id": "nmdc:a81ddf4e3bc044e8601554117cd887aa", - "file_size_bytes": 743066 - }, - { - "name": "Gp0127626_Gottcha2 Krona HTML report", - "description": "Gottcha2 Krona HTML report for Gp0127626", - "data_object_type": "GOTTCHA2 Krona Plot", - "url": "https://data.microbiomedata.org/data/nmdc:mga04xnj45/ReadbasedAnalysis/nmdc_mga04xnj45_gottcha2_krona.html", - "md5_checksum": "a012dc3a7b44774019c313fd8ee88efc", - "id": "nmdc:a012dc3a7b44774019c313fd8ee88efc", - "file_size_bytes": 233970 - }, - { - "name": "Gp0127626_Centrifuge classification TSV report", - "description": "Centrifuge classification TSV report for Gp0127626", - "data_object_type": "Centrifuge Taxonomic Classification", - "url": "https://data.microbiomedata.org/data/nmdc:mga04xnj45/ReadbasedAnalysis/nmdc_mga04xnj45_centrifuge_classification.tsv", - "md5_checksum": "dd4023a1488bdfc73b12c422b62b274a", - "id": "nmdc:dd4023a1488bdfc73b12c422b62b274a", - "file_size_bytes": 1673697764 - }, - { - "name": "Gp0127626_Centrifuge TSV report", - "description": "Centrifuge TSV report for Gp0127626", - "data_object_type": "Centrifuge Classification Report", - "url": "https://data.microbiomedata.org/data/nmdc:mga04xnj45/ReadbasedAnalysis/nmdc_mga04xnj45_centrifuge_report.tsv", - "md5_checksum": "2f9b1c55d52cc61affbe99f5163b48c8", - "id": "nmdc:2f9b1c55d52cc61affbe99f5163b48c8", - "file_size_bytes": 253730 - }, - { - "name": "Gp0127626_Centrifuge Krona HTML report", - "description": "Centrifuge Krona HTML report for Gp0127626", - "data_object_type": "Centrifuge Krona Plot", - "url": "https://data.microbiomedata.org/data/nmdc:mga04xnj45/ReadbasedAnalysis/nmdc_mga04xnj45_centrifuge_krona.html", - "md5_checksum": "ccf7f447a25ebf354ce44b3f1f90f223", - "id": "nmdc:ccf7f447a25ebf354ce44b3f1f90f223", - "file_size_bytes": 2327521 - }, - { - "name": "Gp0127626_Kraken classification TSV report", - "description": "Kraken classification TSV report for Gp0127626", - "data_object_type": "Kraken2 Taxonomic Classification", - "url": "https://data.microbiomedata.org/data/nmdc:mga04xnj45/ReadbasedAnalysis/nmdc_mga04xnj45_kraken2_classification.tsv", - "md5_checksum": "2c8efdb77cbcd1276c4fb386fd37bd6d", - "id": "nmdc:2c8efdb77cbcd1276c4fb386fd37bd6d", - "file_size_bytes": 1343921825 - }, - { - "name": "Gp0127626_Kraken2 TSV report", - "description": "Kraken2 TSV report for Gp0127626", - "data_object_type": "Kraken2 Classification Report", - "url": "https://data.microbiomedata.org/data/nmdc:mga04xnj45/ReadbasedAnalysis/nmdc_mga04xnj45_kraken2_report.tsv", - "md5_checksum": "806b27f1fa5a423100b113bb56edc708", - "id": "nmdc:806b27f1fa5a423100b113bb56edc708", - "file_size_bytes": 638478 - }, - { - "name": "Gp0127626_Kraken2 Krona HTML report", - "description": "Kraken2 Krona HTML report for Gp0127626", - "data_object_type": "Kraken2 Krona Plot", - "url": "https://data.microbiomedata.org/data/nmdc:mga04xnj45/ReadbasedAnalysis/nmdc_mga04xnj45_kraken2_krona.html", - "md5_checksum": "bb3e6793c4f036b9756f075d41846964", - "id": "nmdc:bb3e6793c4f036b9756f075d41846964", - "file_size_bytes": 3987411 - } + "part_of": [ + "nmdc:sty-11-aygzgv51" + ], + "add_date": "2016-01-11", + "mod_date": "2021-06-15", + "ncbi_project_name": "Riverbed sediment microbial communities from areas with no vegetation in Columbia River, Washington, USA - GW-RW N1_20_30", + "omics_type": { + "has_raw_value": "Metagenome" + }, + "principal_investigator": { + "has_raw_value": "James Stegen" + }, + "processing_institution": "JGI", + "type": "nmdc:OmicsProcessing", + "gold_sequencing_project_identifiers": [ + "gold:Gp0127625" ] - }, + } + ], + "reaction_activity_set": [], + "read_qc_analysis_activity_set": [ { "_id": { - "$oid": "649b005f2ca5ee4adb139f9e" + "$oid": "649b009d6bdd4fd20273c858" }, "has_input": [ - "nmdc:07499ad2f2b80f42bd7109732b1eef90" + "nmdc:93c62425e46296c35415039d7fd9cb56" ], "part_of": [ - "nmdc:mga04xnj45" + "nmdc:mga0bfpq58" ], - "ctg_logsum": 63429, - "scaf_logsum": 63657, - "gap_pct": 0.00092, "git_url": "https://github.com/microbiomedata/metaG/releases/tag/0.1", "has_output": [ - "nmdc:6d72d9fb6a282f8872cd3d5b8ce1a29d", - "nmdc:2d89ade1cc6267bb77b48daa176442f2", - "nmdc:79588f527e08eace069ddc63171f004c", - "nmdc:cc855d3c15387d078c6919d1b19f8c05", - "nmdc:ef722a8ecd2b85d9202560df41eca7ed" + "nmdc:2d13b3a30339b9c5b4fba099f9d4b10f", + "nmdc:42be49edad69619e550ddd69d150490f" ], - "asm_score": 7.629, - "was_informed_by": "gold:Gp0127626", - "ctg_powsum": 7359.443, - "scaf_max": 30685, - "id": "nmdc:0ef6e04f4cefbcd9e2d3d2a7717baa3e", - "scaf_powsum": 7386.413, + "was_informed_by": "gold:Gp0127625", + "input_read_count": 26227312, + "output_read_bases": 3764845015, + "id": "nmdc:aa214e53dc8472f9ff99b02245d8f943", "execution_resource": "NERSC-Cori", - "contigs": 105397, - "name": "Assembly Activity for nmdc:mga04xnj45", - "ctg_max": 30685, - "gc_std": 0.09232, - "gc_avg": 0.60819, - "contig_bp": 43390261, - "started_at_time": "2021-12-01T21:31:29Z", - "scaf_bp": 43390661, - "type": "nmdc:MetagenomeAssembly", - "scaffolds": 105366, - "ended_at_time": "2021-12-02T20:54:56+00:00", - "ctg_l50": 368, - "ctg_l90": 284, - "ctg_n50": 34766, - "ctg_n90": 91597, - "scaf_l50": 368, - "scaf_l90": 284, - "scaf_n50": 34749, - "scaf_n90": 91567, - "output_data_objects": [ - { - "name": "Gp0127626_Assembled contigs fasta", - "description": "Assembled contigs fasta for Gp0127626", - "data_object_type": "Assembly Contigs", - "url": "https://data.microbiomedata.org/data/nmdc:mga04xnj45/assembly/nmdc_mga04xnj45_contigs.fna", - "md5_checksum": "6d72d9fb6a282f8872cd3d5b8ce1a29d", - "id": "nmdc:6d72d9fb6a282f8872cd3d5b8ce1a29d", - "file_size_bytes": 47315336 - }, - { - "name": "Gp0127626_Assembled scaffold fasta", - "description": "Assembled scaffold fasta for Gp0127626", - "data_object_type": "Assembly Scaffolds", - "url": "https://data.microbiomedata.org/data/nmdc:mga04xnj45/assembly/nmdc_mga04xnj45_scaffolds.fna", - "md5_checksum": "2d89ade1cc6267bb77b48daa176442f2", - "id": "nmdc:2d89ade1cc6267bb77b48daa176442f2", - "file_size_bytes": 46998743 - }, - { - "name": "Gp0127626_Metagenome Contig Coverage Stats", - "description": "Metagenome Contig Coverage Stats for Gp0127626", - "url": "https://data.microbiomedata.org/data/nmdc:mga04xnj45/assembly/nmdc_mga04xnj45_covstats.txt", - "md5_checksum": "79588f527e08eace069ddc63171f004c", - "id": "nmdc:79588f527e08eace069ddc63171f004c", - "file_size_bytes": 8270233 - }, - { - "name": "Gp0127626_Assembled AGP file", - "description": "Assembled AGP file for Gp0127626", - "data_object_type": "Assembly AGP", - "url": "https://data.microbiomedata.org/data/nmdc:mga04xnj45/assembly/nmdc_mga04xnj45_assembly.agp", - "md5_checksum": "cc855d3c15387d078c6919d1b19f8c05", - "id": "nmdc:cc855d3c15387d078c6919d1b19f8c05", - "file_size_bytes": 7690333 - }, - { - "name": "Gp0127626_Metagenome Alignment BAM file", - "description": "Metagenome Alignment BAM file for Gp0127626", - "data_object_type": "Assembly Coverage BAM", - "url": "https://data.microbiomedata.org/data/nmdc:mga04xnj45/assembly/nmdc_mga04xnj45_pairedMapped_sorted.bam", - "md5_checksum": "ef722a8ecd2b85d9202560df41eca7ed", - "id": "nmdc:ef722a8ecd2b85d9202560df41eca7ed", - "file_size_bytes": 2083099081 - } - ] - }, + "input_read_bases": 3960324112, + "name": "Read QC Activity for nmdc:mga0bfpq58", + "output_read_count": 25182244, + "started_at_time": "2021-10-11T02:23:30Z", + "type": "nmdc:ReadQCAnalysisActivity", + "ended_at_time": "2021-10-11T03:29:50+00:00" + } + ], + "read_based_taxonomy_analysis_activity_set": [ { "_id": { - "$oid": "649b005bbf2caae0415ef9b1" + "$oid": "649b009bff710ae353f8cf15" }, "has_input": [ - "nmdc:6d72d9fb6a282f8872cd3d5b8ce1a29d" - ], - "part_of": [ - "nmdc:mga04xnj45" + "nmdc:2d13b3a30339b9c5b4fba099f9d4b10f" ], "git_url": "https://github.com/microbiomedata/metaG/releases/tag/0.1", "has_output": [ - "nmdc:26360324fcaed21fd48b54972cce09cb", - "nmdc:d2be135e631726360cf6ac23a3d56629", - "nmdc:b2fdf525bc1ddadb30427cba91c63483", - "nmdc:75ff61c1b51ace76d6e01930ae41c38c", - "nmdc:4210daa7b1b0b84a6e5b6591e4e93c55", - "nmdc:cfd7a714b2e18f136d6dc48b9162e1c0", - "nmdc:80a1ed51631f5fbc43032aa4afbfbf1d", - "nmdc:52e64eec8c715affde1612b871e2490e", - "nmdc:4687e89ae41c98bc49ca81ded0b4c622", - "nmdc:8cc7e6c8e232891c3ac7d952302905b6", - "nmdc:445ce659140104b37475c5c2e3fb7761", - "nmdc:32e15eb7eab763990dbb0ce947321718" + "nmdc:550b631e1de3e01392154e54493d47ef", + "nmdc:3f14ff51550d9d78dae3a7ec08514907", + "nmdc:1a7b8f8968f451b5d5ccb97a10a56d89", + "nmdc:b09795fc768257d881e8ce547be0ce68", + "nmdc:064ba18473eb80ff0b484311565d2894", + "nmdc:a7b6cc370371668be2e3bb90f5ca0fd1", + "nmdc:60c663a34b79db2ee71edf1afe4c14e3", + "nmdc:bc8acb862c8942616ef07302667c334f", + "nmdc:b797ed6cb135c993b582cac368b2a93c" ], - "was_informed_by": "gold:Gp0127626", - "id": "nmdc:0ef6e04f4cefbcd9e2d3d2a7717baa3e", + "was_informed_by": "gold:Gp0127625", + "id": "nmdc:aa214e53dc8472f9ff99b02245d8f943", "execution_resource": "NERSC-Cori", - "name": "Annotation Activity for nmdc:mga04xnj45", - "started_at_time": "2021-12-01T21:31:29Z", - "type": "nmdc:MetagenomeAnnotationActivity", - "ended_at_time": "2021-12-02T20:54:56+00:00", - "output_data_objects": [ - { - "name": "Gp0127626_Protein FAA", - "description": "Protein FAA for Gp0127626", - "data_object_type": "Annotation Amino Acid FASTA", - "url": "https://data.microbiomedata.org/data/nmdc:mga04xnj45/annotation/nmdc_mga04xnj45_proteins.faa", - "md5_checksum": "26360324fcaed21fd48b54972cce09cb", - "id": "nmdc:26360324fcaed21fd48b54972cce09cb", - "file_size_bytes": 28150597 - }, - { - "name": "Gp0127626_Structural annotation GFF file", - "description": "Structural annotation GFF file for Gp0127626", - "data_object_type": "Structural Annotation GFF", - "url": "https://data.microbiomedata.org/data/nmdc:mga04xnj45/annotation/nmdc_mga04xnj45_structural_annotation.gff", - "md5_checksum": "d2be135e631726360cf6ac23a3d56629", - "id": "nmdc:d2be135e631726360cf6ac23a3d56629", - "file_size_bytes": 2511 - }, - { - "name": "Gp0127626_Functional annotation GFF file", - "description": "Functional annotation GFF file for Gp0127626", - "data_object_type": "Functional Annotation GFF", - "url": "https://data.microbiomedata.org/data/nmdc:mga04xnj45/annotation/nmdc_mga04xnj45_functional_annotation.gff", - "md5_checksum": "b2fdf525bc1ddadb30427cba91c63483", - "id": "nmdc:b2fdf525bc1ddadb30427cba91c63483", - "file_size_bytes": 33351979 - }, - { - "name": "Gp0127626_KO TSV file", - "description": "KO TSV file for Gp0127626", - "data_object_type": "Annotation KEGG Orthology", - "url": "https://data.microbiomedata.org/data/nmdc:mga04xnj45/annotation/nmdc_mga04xnj45_ko.tsv", - "md5_checksum": "75ff61c1b51ace76d6e01930ae41c38c", - "id": "nmdc:75ff61c1b51ace76d6e01930ae41c38c", - "file_size_bytes": 3842650 - }, - { - "name": "Gp0127626_EC TSV file", - "description": "EC TSV file for Gp0127626", - "data_object_type": "Annotation Enzyme Commission", - "url": "https://data.microbiomedata.org/data/nmdc:mga04xnj45/annotation/nmdc_mga04xnj45_ec.tsv", - "md5_checksum": "4210daa7b1b0b84a6e5b6591e4e93c55", - "id": "nmdc:4210daa7b1b0b84a6e5b6591e4e93c55", - "file_size_bytes": 2561980 - }, - { - "name": "Gp0127626_COG GFF file", - "description": "COG GFF file for Gp0127626", - "url": "https://data.microbiomedata.org/data/nmdc:mga04xnj45/annotation/nmdc_mga04xnj45_cog.gff", - "md5_checksum": "cfd7a714b2e18f136d6dc48b9162e1c0", - "id": "nmdc:cfd7a714b2e18f136d6dc48b9162e1c0", - "file_size_bytes": 19108716 - }, - { - "name": "Gp0127626_PFAM GFF file", - "description": "PFAM GFF file for Gp0127626", - "url": "https://data.microbiomedata.org/data/nmdc:mga04xnj45/annotation/nmdc_mga04xnj45_pfam.gff", - "md5_checksum": "80a1ed51631f5fbc43032aa4afbfbf1d", - "id": "nmdc:80a1ed51631f5fbc43032aa4afbfbf1d", - "file_size_bytes": 13800768 - }, - { - "name": "Gp0127626_TigrFam GFF file", - "description": "TigrFam GFF file for Gp0127626", - "url": "https://data.microbiomedata.org/data/nmdc:mga04xnj45/annotation/nmdc_mga04xnj45_tigrfam.gff", - "md5_checksum": "52e64eec8c715affde1612b871e2490e", - "id": "nmdc:52e64eec8c715affde1612b871e2490e", - "file_size_bytes": 1446190 - }, - { - "name": "Gp0127626_SMART GFF file", - "description": "SMART GFF file for Gp0127626", - "url": "https://data.microbiomedata.org/data/nmdc:mga04xnj45/annotation/nmdc_mga04xnj45_smart.gff", - "md5_checksum": "4687e89ae41c98bc49ca81ded0b4c622", - "id": "nmdc:4687e89ae41c98bc49ca81ded0b4c622", - "file_size_bytes": 4252918 - }, - { - "name": "Gp0127626_SuperFam GFF file", - "description": "SuperFam GFF file for Gp0127626", - "url": "https://data.microbiomedata.org/data/nmdc:mga04xnj45/annotation/nmdc_mga04xnj45_supfam.gff", - "md5_checksum": "8cc7e6c8e232891c3ac7d952302905b6", - "id": "nmdc:8cc7e6c8e232891c3ac7d952302905b6", - "file_size_bytes": 24007157 - }, - { - "name": "Gp0127626_Cath FunFam GFF file", - "description": "Cath FunFam GFF file for Gp0127626", - "url": "https://data.microbiomedata.org/data/nmdc:mga04xnj45/annotation/nmdc_mga04xnj45_cath_funfam.gff", - "md5_checksum": "445ce659140104b37475c5c2e3fb7761", - "id": "nmdc:445ce659140104b37475c5c2e3fb7761", - "file_size_bytes": 17990080 - }, - { - "name": "Gp0127626_KO_EC GFF file", - "description": "KO_EC GFF file for Gp0127626", - "url": "https://data.microbiomedata.org/data/nmdc:mga04xnj45/annotation/nmdc_mga04xnj45_ko_ec.gff", - "md5_checksum": "32e15eb7eab763990dbb0ce947321718", - "id": "nmdc:32e15eb7eab763990dbb0ce947321718", - "file_size_bytes": 12235401 - } - ] - }, + "name": "ReadBased Analysis Activity for nmdc:mga0bfpq58", + "started_at_time": "2021-10-11T02:23:30Z", + "type": "nmdc:ReadbasedAnalysis", + "ended_at_time": "2021-10-11T03:29:50+00:00" + } + ], + "study_set": [], + "field_research_site_set": [], + "collecting_biosamples_from_site_set": [], + "date_created": null, + "etl_software_version": null, + "pooling_set": [], + "read_based_analysis_activity_set": [ { "_id": { - "$oid": "649b0052ec087f6bbab34712" + "$oid": "61e718e2833bcf838a6ff0ce" }, "has_input": [ - "nmdc:6d72d9fb6a282f8872cd3d5b8ce1a29d", - "nmdc:ef722a8ecd2b85d9202560df41eca7ed", - "nmdc:b2fdf525bc1ddadb30427cba91c63483" + "nmdc:2d13b3a30339b9c5b4fba099f9d4b10f" ], - "too_short_contig_num": 102702, "part_of": [ - "nmdc:mga04xnj45" + "nmdc:mga0bfpq58" ], - "binned_contig_num": 230, "git_url": "https://github.com/microbiomedata/metaG/releases/tag/0.1", "has_output": [ - "nmdc:66dea8d60f61c7a150ae4cbc3ce88757", - "nmdc:1b0f148dc6a3a6a007482d1b03fe7e6a" + "nmdc:550b631e1de3e01392154e54493d47ef", + "nmdc:3f14ff51550d9d78dae3a7ec08514907", + "nmdc:1a7b8f8968f451b5d5ccb97a10a56d89", + "nmdc:b09795fc768257d881e8ce547be0ce68", + "nmdc:064ba18473eb80ff0b484311565d2894", + "nmdc:a7b6cc370371668be2e3bb90f5ca0fd1", + "nmdc:60c663a34b79db2ee71edf1afe4c14e3", + "nmdc:bc8acb862c8942616ef07302667c334f", + "nmdc:b797ed6cb135c993b582cac368b2a93c" ], - "was_informed_by": "gold:Gp0127626", - "input_contig_num": 105397, - "id": "nmdc:0ef6e04f4cefbcd9e2d3d2a7717baa3e", + "was_informed_by": "gold:Gp0127625", + "id": "nmdc:aa214e53dc8472f9ff99b02245d8f943", "execution_resource": "NERSC-Cori", - "name": "MAGs Analysis Activity for nmdc:mga04xnj45", - "mags_list": [ - { + "name": "ReadBased Analysis Activity for nmdc:mga0bfpq58", + "started_at_time": "2021-10-11T02:23:30Z", + "type": "nmdc:ReadbasedAnalysis", + "ended_at_time": "2021-10-11T03:29:50+00:00" + } + ] + }, + { + "functional_annotation_agg": [], + "library_preparation_set": [], + "processed_sample_set": [], + "extraction_set": [], + "activity_set": [], + "biosample_set": [], + "data_object_set": [ + { + "name": "Gp0127626_Filtered Reads", + "description": "Filtered Reads for Gp0127626", + "data_object_type": "Filtered Sequencing Reads", + "url": "https://data.microbiomedata.org/data/nmdc:mga04xnj45/qa/nmdc_mga04xnj45_filtered.fastq.gz", + "md5_checksum": "07499ad2f2b80f42bd7109732b1eef90", + "id": "nmdc:07499ad2f2b80f42bd7109732b1eef90", + "file_size_bytes": 1944721961 + }, + { + "name": "Gp0127626_Filtered Stats", + "description": "Filtered Stats for Gp0127626", + "data_object_type": "QC Statistics", + "url": "https://data.microbiomedata.org/data/nmdc:mga04xnj45/qa/nmdc_mga04xnj45_filterStats.txt", + "md5_checksum": "9089d07fdee5ed03e901c1656206af02", + "id": "nmdc:9089d07fdee5ed03e901c1656206af02", + "file_size_bytes": 287 + }, + { + "name": "Gp0127626_Gottcha2 TSV report", + "description": "Gottcha2 TSV report for Gp0127626", + "url": "https://data.microbiomedata.org/data/nmdc:mga04xnj45/ReadbasedAnalysis/nmdc_mga04xnj45_gottcha2_report.tsv", + "md5_checksum": "a91f8dccb2baa53550216f5bdfbf1473", + "id": "nmdc:a91f8dccb2baa53550216f5bdfbf1473", + "file_size_bytes": 2399 + }, + { + "name": "Gp0127626_Gottcha2 full TSV report", + "description": "Gottcha2 full TSV report for Gp0127626", + "url": "https://data.microbiomedata.org/data/nmdc:mga04xnj45/ReadbasedAnalysis/nmdc_mga04xnj45_gottcha2_report_full.tsv", + "md5_checksum": "a81ddf4e3bc044e8601554117cd887aa", + "id": "nmdc:a81ddf4e3bc044e8601554117cd887aa", + "file_size_bytes": 743066 + }, + { + "name": "Gp0127626_Gottcha2 Krona HTML report", + "description": "Gottcha2 Krona HTML report for Gp0127626", + "data_object_type": "GOTTCHA2 Krona Plot", + "url": "https://data.microbiomedata.org/data/nmdc:mga04xnj45/ReadbasedAnalysis/nmdc_mga04xnj45_gottcha2_krona.html", + "md5_checksum": "a012dc3a7b44774019c313fd8ee88efc", + "id": "nmdc:a012dc3a7b44774019c313fd8ee88efc", + "file_size_bytes": 233970 + }, + { + "name": "Gp0127626_Centrifuge classification TSV report", + "description": "Centrifuge classification TSV report for Gp0127626", + "data_object_type": "Centrifuge Taxonomic Classification", + "url": "https://data.microbiomedata.org/data/nmdc:mga04xnj45/ReadbasedAnalysis/nmdc_mga04xnj45_centrifuge_classification.tsv", + "md5_checksum": "dd4023a1488bdfc73b12c422b62b274a", + "id": "nmdc:dd4023a1488bdfc73b12c422b62b274a", + "file_size_bytes": 1673697764 + }, + { + "name": "Gp0127626_Centrifuge TSV report", + "description": "Centrifuge TSV report for Gp0127626", + "data_object_type": "Centrifuge Classification Report", + "url": "https://data.microbiomedata.org/data/nmdc:mga04xnj45/ReadbasedAnalysis/nmdc_mga04xnj45_centrifuge_report.tsv", + "md5_checksum": "2f9b1c55d52cc61affbe99f5163b48c8", + "id": "nmdc:2f9b1c55d52cc61affbe99f5163b48c8", + "file_size_bytes": 253730 + }, + { + "name": "Gp0127626_Centrifuge Krona HTML report", + "description": "Centrifuge Krona HTML report for Gp0127626", + "data_object_type": "Centrifuge Krona Plot", + "url": "https://data.microbiomedata.org/data/nmdc:mga04xnj45/ReadbasedAnalysis/nmdc_mga04xnj45_centrifuge_krona.html", + "md5_checksum": "ccf7f447a25ebf354ce44b3f1f90f223", + "id": "nmdc:ccf7f447a25ebf354ce44b3f1f90f223", + "file_size_bytes": 2327521 + }, + { + "name": "Gp0127626_Kraken classification TSV report", + "description": "Kraken classification TSV report for Gp0127626", + "data_object_type": "Kraken2 Taxonomic Classification", + "url": "https://data.microbiomedata.org/data/nmdc:mga04xnj45/ReadbasedAnalysis/nmdc_mga04xnj45_kraken2_classification.tsv", + "md5_checksum": "2c8efdb77cbcd1276c4fb386fd37bd6d", + "id": "nmdc:2c8efdb77cbcd1276c4fb386fd37bd6d", + "file_size_bytes": 1343921825 + }, + { + "name": "Gp0127626_Kraken2 TSV report", + "description": "Kraken2 TSV report for Gp0127626", + "data_object_type": "Kraken2 Classification Report", + "url": "https://data.microbiomedata.org/data/nmdc:mga04xnj45/ReadbasedAnalysis/nmdc_mga04xnj45_kraken2_report.tsv", + "md5_checksum": "806b27f1fa5a423100b113bb56edc708", + "id": "nmdc:806b27f1fa5a423100b113bb56edc708", + "file_size_bytes": 638478 + }, + { + "name": "Gp0127626_Kraken2 Krona HTML report", + "description": "Kraken2 Krona HTML report for Gp0127626", + "data_object_type": "Kraken2 Krona Plot", + "url": "https://data.microbiomedata.org/data/nmdc:mga04xnj45/ReadbasedAnalysis/nmdc_mga04xnj45_kraken2_krona.html", + "md5_checksum": "bb3e6793c4f036b9756f075d41846964", + "id": "nmdc:bb3e6793c4f036b9756f075d41846964", + "file_size_bytes": 3987411 + }, + { + "name": "Gp0127626_Gottcha2 TSV report", + "description": "Gottcha2 TSV report for Gp0127626", + "url": "https://data.microbiomedata.org/data/nmdc:mga04xnj45/ReadbasedAnalysis/nmdc_mga04xnj45_gottcha2_report.tsv", + "md5_checksum": "a91f8dccb2baa53550216f5bdfbf1473", + "id": "nmdc:a91f8dccb2baa53550216f5bdfbf1473", + "file_size_bytes": 2399 + }, + { + "name": "Gp0127626_Gottcha2 full TSV report", + "description": "Gottcha2 full TSV report for Gp0127626", + "url": "https://data.microbiomedata.org/data/nmdc:mga04xnj45/ReadbasedAnalysis/nmdc_mga04xnj45_gottcha2_report_full.tsv", + "md5_checksum": "a81ddf4e3bc044e8601554117cd887aa", + "id": "nmdc:a81ddf4e3bc044e8601554117cd887aa", + "file_size_bytes": 743066 + }, + { + "name": "Gp0127626_Gottcha2 Krona HTML report", + "description": "Gottcha2 Krona HTML report for Gp0127626", + "data_object_type": "GOTTCHA2 Krona Plot", + "url": "https://data.microbiomedata.org/data/nmdc:mga04xnj45/ReadbasedAnalysis/nmdc_mga04xnj45_gottcha2_krona.html", + "md5_checksum": "a012dc3a7b44774019c313fd8ee88efc", + "id": "nmdc:a012dc3a7b44774019c313fd8ee88efc", + "file_size_bytes": 233970 + }, + { + "name": "Gp0127626_Centrifuge classification TSV report", + "description": "Centrifuge classification TSV report for Gp0127626", + "data_object_type": "Centrifuge Taxonomic Classification", + "url": "https://data.microbiomedata.org/data/nmdc:mga04xnj45/ReadbasedAnalysis/nmdc_mga04xnj45_centrifuge_classification.tsv", + "md5_checksum": "dd4023a1488bdfc73b12c422b62b274a", + "id": "nmdc:dd4023a1488bdfc73b12c422b62b274a", + "file_size_bytes": 1673697764 + }, + { + "name": "Gp0127626_Centrifuge TSV report", + "description": "Centrifuge TSV report for Gp0127626", + "data_object_type": "Centrifuge Classification Report", + "url": "https://data.microbiomedata.org/data/nmdc:mga04xnj45/ReadbasedAnalysis/nmdc_mga04xnj45_centrifuge_report.tsv", + "md5_checksum": "2f9b1c55d52cc61affbe99f5163b48c8", + "id": "nmdc:2f9b1c55d52cc61affbe99f5163b48c8", + "file_size_bytes": 253730 + }, + { + "name": "Gp0127626_Centrifuge Krona HTML report", + "description": "Centrifuge Krona HTML report for Gp0127626", + "data_object_type": "Centrifuge Krona Plot", + "url": "https://data.microbiomedata.org/data/nmdc:mga04xnj45/ReadbasedAnalysis/nmdc_mga04xnj45_centrifuge_krona.html", + "md5_checksum": "ccf7f447a25ebf354ce44b3f1f90f223", + "id": "nmdc:ccf7f447a25ebf354ce44b3f1f90f223", + "file_size_bytes": 2327521 + }, + { + "name": "Gp0127626_Kraken classification TSV report", + "description": "Kraken classification TSV report for Gp0127626", + "data_object_type": "Kraken2 Taxonomic Classification", + "url": "https://data.microbiomedata.org/data/nmdc:mga04xnj45/ReadbasedAnalysis/nmdc_mga04xnj45_kraken2_classification.tsv", + "md5_checksum": "2c8efdb77cbcd1276c4fb386fd37bd6d", + "id": "nmdc:2c8efdb77cbcd1276c4fb386fd37bd6d", + "file_size_bytes": 1343921825 + }, + { + "name": "Gp0127626_Kraken2 TSV report", + "description": "Kraken2 TSV report for Gp0127626", + "data_object_type": "Kraken2 Classification Report", + "url": "https://data.microbiomedata.org/data/nmdc:mga04xnj45/ReadbasedAnalysis/nmdc_mga04xnj45_kraken2_report.tsv", + "md5_checksum": "806b27f1fa5a423100b113bb56edc708", + "id": "nmdc:806b27f1fa5a423100b113bb56edc708", + "file_size_bytes": 638478 + }, + { + "name": "Gp0127626_Kraken2 Krona HTML report", + "description": "Kraken2 Krona HTML report for Gp0127626", + "data_object_type": "Kraken2 Krona Plot", + "url": "https://data.microbiomedata.org/data/nmdc:mga04xnj45/ReadbasedAnalysis/nmdc_mga04xnj45_kraken2_krona.html", + "md5_checksum": "bb3e6793c4f036b9756f075d41846964", + "id": "nmdc:bb3e6793c4f036b9756f075d41846964", + "file_size_bytes": 3987411 + }, + { + "name": "Gp0127626_Assembled contigs fasta", + "description": "Assembled contigs fasta for Gp0127626", + "data_object_type": "Assembly Contigs", + "url": "https://data.microbiomedata.org/data/nmdc:mga04xnj45/assembly/nmdc_mga04xnj45_contigs.fna", + "md5_checksum": "6d72d9fb6a282f8872cd3d5b8ce1a29d", + "id": "nmdc:6d72d9fb6a282f8872cd3d5b8ce1a29d", + "file_size_bytes": 47315336 + }, + { + "name": "Gp0127626_Assembled scaffold fasta", + "description": "Assembled scaffold fasta for Gp0127626", + "data_object_type": "Assembly Scaffolds", + "url": "https://data.microbiomedata.org/data/nmdc:mga04xnj45/assembly/nmdc_mga04xnj45_scaffolds.fna", + "md5_checksum": "2d89ade1cc6267bb77b48daa176442f2", + "id": "nmdc:2d89ade1cc6267bb77b48daa176442f2", + "file_size_bytes": 46998743 + }, + { + "name": "Gp0127626_Metagenome Contig Coverage Stats", + "description": "Metagenome Contig Coverage Stats for Gp0127626", + "url": "https://data.microbiomedata.org/data/nmdc:mga04xnj45/assembly/nmdc_mga04xnj45_covstats.txt", + "md5_checksum": "79588f527e08eace069ddc63171f004c", + "id": "nmdc:79588f527e08eace069ddc63171f004c", + "file_size_bytes": 8270233 + }, + { + "name": "Gp0127626_Assembled AGP file", + "description": "Assembled AGP file for Gp0127626", + "data_object_type": "Assembly AGP", + "url": "https://data.microbiomedata.org/data/nmdc:mga04xnj45/assembly/nmdc_mga04xnj45_assembly.agp", + "md5_checksum": "cc855d3c15387d078c6919d1b19f8c05", + "id": "nmdc:cc855d3c15387d078c6919d1b19f8c05", + "file_size_bytes": 7690333 + }, + { + "name": "Gp0127626_Metagenome Alignment BAM file", + "description": "Metagenome Alignment BAM file for Gp0127626", + "data_object_type": "Assembly Coverage BAM", + "url": "https://data.microbiomedata.org/data/nmdc:mga04xnj45/assembly/nmdc_mga04xnj45_pairedMapped_sorted.bam", + "md5_checksum": "ef722a8ecd2b85d9202560df41eca7ed", + "id": "nmdc:ef722a8ecd2b85d9202560df41eca7ed", + "file_size_bytes": 2083099081 + }, + { + "name": "Gp0127626_Protein FAA", + "description": "Protein FAA for Gp0127626", + "data_object_type": "Annotation Amino Acid FASTA", + "url": "https://data.microbiomedata.org/data/nmdc:mga04xnj45/annotation/nmdc_mga04xnj45_proteins.faa", + "md5_checksum": "26360324fcaed21fd48b54972cce09cb", + "id": "nmdc:26360324fcaed21fd48b54972cce09cb", + "file_size_bytes": 28150597 + }, + { + "name": "Gp0127626_Structural annotation GFF file", + "description": "Structural annotation GFF file for Gp0127626", + "data_object_type": "Structural Annotation GFF", + "url": "https://data.microbiomedata.org/data/nmdc:mga04xnj45/annotation/nmdc_mga04xnj45_structural_annotation.gff", + "md5_checksum": "d2be135e631726360cf6ac23a3d56629", + "id": "nmdc:d2be135e631726360cf6ac23a3d56629", + "file_size_bytes": 2511 + }, + { + "name": "Gp0127626_Functional annotation GFF file", + "description": "Functional annotation GFF file for Gp0127626", + "data_object_type": "Functional Annotation GFF", + "url": "https://data.microbiomedata.org/data/nmdc:mga04xnj45/annotation/nmdc_mga04xnj45_functional_annotation.gff", + "md5_checksum": "b2fdf525bc1ddadb30427cba91c63483", + "id": "nmdc:b2fdf525bc1ddadb30427cba91c63483", + "file_size_bytes": 33351979 + }, + { + "name": "Gp0127626_KO TSV file", + "description": "KO TSV file for Gp0127626", + "data_object_type": "Annotation KEGG Orthology", + "url": "https://data.microbiomedata.org/data/nmdc:mga04xnj45/annotation/nmdc_mga04xnj45_ko.tsv", + "md5_checksum": "75ff61c1b51ace76d6e01930ae41c38c", + "id": "nmdc:75ff61c1b51ace76d6e01930ae41c38c", + "file_size_bytes": 3842650 + }, + { + "name": "Gp0127626_EC TSV file", + "description": "EC TSV file for Gp0127626", + "data_object_type": "Annotation Enzyme Commission", + "url": "https://data.microbiomedata.org/data/nmdc:mga04xnj45/annotation/nmdc_mga04xnj45_ec.tsv", + "md5_checksum": "4210daa7b1b0b84a6e5b6591e4e93c55", + "id": "nmdc:4210daa7b1b0b84a6e5b6591e4e93c55", + "file_size_bytes": 2561980 + }, + { + "name": "Gp0127626_COG GFF file", + "description": "COG GFF file for Gp0127626", + "url": "https://data.microbiomedata.org/data/nmdc:mga04xnj45/annotation/nmdc_mga04xnj45_cog.gff", + "md5_checksum": "cfd7a714b2e18f136d6dc48b9162e1c0", + "id": "nmdc:cfd7a714b2e18f136d6dc48b9162e1c0", + "file_size_bytes": 19108716 + }, + { + "name": "Gp0127626_PFAM GFF file", + "description": "PFAM GFF file for Gp0127626", + "url": "https://data.microbiomedata.org/data/nmdc:mga04xnj45/annotation/nmdc_mga04xnj45_pfam.gff", + "md5_checksum": "80a1ed51631f5fbc43032aa4afbfbf1d", + "id": "nmdc:80a1ed51631f5fbc43032aa4afbfbf1d", + "file_size_bytes": 13800768 + }, + { + "name": "Gp0127626_TigrFam GFF file", + "description": "TigrFam GFF file for Gp0127626", + "url": "https://data.microbiomedata.org/data/nmdc:mga04xnj45/annotation/nmdc_mga04xnj45_tigrfam.gff", + "md5_checksum": "52e64eec8c715affde1612b871e2490e", + "id": "nmdc:52e64eec8c715affde1612b871e2490e", + "file_size_bytes": 1446190 + }, + { + "name": "Gp0127626_SMART GFF file", + "description": "SMART GFF file for Gp0127626", + "url": "https://data.microbiomedata.org/data/nmdc:mga04xnj45/annotation/nmdc_mga04xnj45_smart.gff", + "md5_checksum": "4687e89ae41c98bc49ca81ded0b4c622", + "id": "nmdc:4687e89ae41c98bc49ca81ded0b4c622", + "file_size_bytes": 4252918 + }, + { + "name": "Gp0127626_SuperFam GFF file", + "description": "SuperFam GFF file for Gp0127626", + "url": "https://data.microbiomedata.org/data/nmdc:mga04xnj45/annotation/nmdc_mga04xnj45_supfam.gff", + "md5_checksum": "8cc7e6c8e232891c3ac7d952302905b6", + "id": "nmdc:8cc7e6c8e232891c3ac7d952302905b6", + "file_size_bytes": 24007157 + }, + { + "name": "Gp0127626_Cath FunFam GFF file", + "description": "Cath FunFam GFF file for Gp0127626", + "url": "https://data.microbiomedata.org/data/nmdc:mga04xnj45/annotation/nmdc_mga04xnj45_cath_funfam.gff", + "md5_checksum": "445ce659140104b37475c5c2e3fb7761", + "id": "nmdc:445ce659140104b37475c5c2e3fb7761", + "file_size_bytes": 17990080 + }, + { + "name": "Gp0127626_KO_EC GFF file", + "description": "KO_EC GFF file for Gp0127626", + "url": "https://data.microbiomedata.org/data/nmdc:mga04xnj45/annotation/nmdc_mga04xnj45_ko_ec.gff", + "md5_checksum": "32e15eb7eab763990dbb0ce947321718", + "id": "nmdc:32e15eb7eab763990dbb0ce947321718", + "file_size_bytes": 12235401 + }, + { + "name": "Gp0127626_metabat2 bin checkm quality assessment result", + "description": "metabat2 bin checkm quality assessment result for Gp0127626", + "data_object_type": "CheckM Statistics", + "url": "https://data.microbiomedata.org/data/nmdc:mga04xnj45/MAGs/nmdc_mga04xnj45_checkm_qa.out", + "md5_checksum": "66dea8d60f61c7a150ae4cbc3ce88757", + "id": "nmdc:66dea8d60f61c7a150ae4cbc3ce88757", + "file_size_bytes": 765 + }, + { + "name": "Gp0127626_high-quality and medium-quality bins", + "description": "high-quality and medium-quality bins for Gp0127626", + "data_object_type": "Metagenome Bins", + "url": "https://data.microbiomedata.org/data/nmdc:mga04xnj45/MAGs/nmdc_mga04xnj45_hqmq_bin.zip", + "md5_checksum": "1b0f148dc6a3a6a007482d1b03fe7e6a", + "id": "nmdc:1b0f148dc6a3a6a007482d1b03fe7e6a", + "file_size_bytes": 520239 + } + ], + "dissolving_activity_set": [], + "functional_annotation_set": [], + "genome_feature_set": [], + "mags_activity_set": [ + { + "_id": { + "$oid": "649b0052ec087f6bbab34712" + }, + "has_input": [ + "nmdc:6d72d9fb6a282f8872cd3d5b8ce1a29d", + "nmdc:ef722a8ecd2b85d9202560df41eca7ed", + "nmdc:b2fdf525bc1ddadb30427cba91c63483" + ], + "too_short_contig_num": 102702, + "part_of": [ + "nmdc:mga04xnj45" + ], + "binned_contig_num": 230, + "git_url": "https://github.com/microbiomedata/metaG/releases/tag/0.1", + "has_output": [ + "nmdc:66dea8d60f61c7a150ae4cbc3ce88757", + "nmdc:1b0f148dc6a3a6a007482d1b03fe7e6a" + ], + "was_informed_by": "gold:Gp0127626", + "input_contig_num": 105397, + "id": "nmdc:0ef6e04f4cefbcd9e2d3d2a7717baa3e", + "execution_resource": "NERSC-Cori", + "name": "MAGs Analysis Activity for nmdc:mga04xnj45", + "mags_list": [ + { "number_of_contig": 230, "completeness": 81.4, "bin_name": "bins.1", @@ -6845,1256 +6913,1343 @@ "unbinned_contig_num": 2465, "started_at_time": "2021-12-01T21:31:29Z", "type": "nmdc:MAGsAnalysisActivity", - "ended_at_time": "2021-12-02T20:54:56+00:00", - "output_data_objects": [ - { - "name": "Gp0127626_metabat2 bin checkm quality assessment result", - "description": "metabat2 bin checkm quality assessment result for Gp0127626", - "data_object_type": "CheckM Statistics", - "url": "https://data.microbiomedata.org/data/nmdc:mga04xnj45/MAGs/nmdc_mga04xnj45_checkm_qa.out", - "md5_checksum": "66dea8d60f61c7a150ae4cbc3ce88757", - "id": "nmdc:66dea8d60f61c7a150ae4cbc3ce88757", - "file_size_bytes": 765 - }, - { - "name": "Gp0127626_high-quality and medium-quality bins", - "description": "high-quality and medium-quality bins for Gp0127626", - "data_object_type": "Metagenome Bins", - "url": "https://data.microbiomedata.org/data/nmdc:mga04xnj45/MAGs/nmdc_mga04xnj45_hqmq_bin.zip", - "md5_checksum": "1b0f148dc6a3a6a007482d1b03fe7e6a", - "id": "nmdc:1b0f148dc6a3a6a007482d1b03fe7e6a", - "file_size_bytes": 520239 - } - ] + "ended_at_time": "2021-12-02T20:54:56+00:00" } - ] - }, - { - "_id": { - "$oid": "649b009773e8249959349b3d" - }, - "id": "nmdc:omprc-11-x0es2p18", - "name": "Riverbed sediment microbial communities from areas with no vegetation in Columbia River, Washington, USA - GW-RW N3_10_20", - "description": "Riverbed sediment samples were collected from an area with no vegetation in a hyporheic zone (subsurface groundwater - surface water mixing zone)", - "has_input": [ - "nmdc:bsm-11-msqbhe76" - ], - "has_output": [ - "jgi:574fde577ded5e3df1ee13fc" - ], - "part_of": [ - "nmdc:sty-11-aygzgv51" - ], - "add_date": "2016-01-11", - "mod_date": "2021-06-15", - "ncbi_project_name": "Riverbed sediment microbial communities from areas with no vegetation in Columbia River, Washington, USA - GW-RW N3_10_20", - "omics_type": { - "has_raw_value": "Metagenome" - }, - "principal_investigator": { - "has_raw_value": "James Stegen" - }, - "processing_institution": "JGI", - "type": "nmdc:OmicsProcessing", - "gold_sequencing_project_identifiers": [ - "gold:Gp0127624" - ], - "downstream_workflow_activity_records": [ + ], + "material_sample_set": [], + "material_sampling_activity_set": [], + "metabolomics_analysis_activity_set": [], + "metagenome_annotation_activity_set": [ { "_id": { - "$oid": "649b009d6bdd4fd20273c85b" + "$oid": "649b005bbf2caae0415ef9b1" }, "has_input": [ - "nmdc:e24b00c4de7a24629f5933940070e06c" + "nmdc:6d72d9fb6a282f8872cd3d5b8ce1a29d" ], "part_of": [ - "nmdc:mga0e8jh10" + "nmdc:mga04xnj45" ], "git_url": "https://github.com/microbiomedata/metaG/releases/tag/0.1", "has_output": [ - "nmdc:8585f6896702bddf64b02191be5921f4", - "nmdc:b9b6464ecc746a4cc39b549696c5fe9c" + "nmdc:26360324fcaed21fd48b54972cce09cb", + "nmdc:d2be135e631726360cf6ac23a3d56629", + "nmdc:b2fdf525bc1ddadb30427cba91c63483", + "nmdc:75ff61c1b51ace76d6e01930ae41c38c", + "nmdc:4210daa7b1b0b84a6e5b6591e4e93c55", + "nmdc:cfd7a714b2e18f136d6dc48b9162e1c0", + "nmdc:80a1ed51631f5fbc43032aa4afbfbf1d", + "nmdc:52e64eec8c715affde1612b871e2490e", + "nmdc:4687e89ae41c98bc49ca81ded0b4c622", + "nmdc:8cc7e6c8e232891c3ac7d952302905b6", + "nmdc:445ce659140104b37475c5c2e3fb7761", + "nmdc:32e15eb7eab763990dbb0ce947321718" ], - "was_informed_by": "gold:Gp0127624", - "input_read_count": 25674112, - "output_read_bases": 3361311014, - "id": "nmdc:20efb77f7b08a72b0c887c059686b7cf", + "was_informed_by": "gold:Gp0127626", + "id": "nmdc:0ef6e04f4cefbcd9e2d3d2a7717baa3e", "execution_resource": "NERSC-Cori", - "input_read_bases": 3876790912, - "name": "Read QC Activity for nmdc:mga0e8jh10", - "output_read_count": 22503352, - "started_at_time": "2021-10-11T02:23:30Z", - "type": "nmdc:ReadQCAnalysisActivity", - "ended_at_time": "2021-10-11T03:30:59+00:00", - "output_data_objects": [ - { - "name": "Gp0127624_Filtered Reads", - "description": "Filtered Reads for Gp0127624", - "data_object_type": "Filtered Sequencing Reads", - "url": "https://data.microbiomedata.org/data/nmdc:mga0e8jh10/qa/nmdc_mga0e8jh10_filtered.fastq.gz", - "md5_checksum": "8585f6896702bddf64b02191be5921f4", - "id": "nmdc:8585f6896702bddf64b02191be5921f4", - "file_size_bytes": 1795382596 - }, - { - "name": "Gp0127624_Filtered Stats", - "description": "Filtered Stats for Gp0127624", - "data_object_type": "QC Statistics", - "url": "https://data.microbiomedata.org/data/nmdc:mga0e8jh10/qa/nmdc_mga0e8jh10_filterStats.txt", - "md5_checksum": "b9b6464ecc746a4cc39b549696c5fe9c", - "id": "nmdc:b9b6464ecc746a4cc39b549696c5fe9c", - "file_size_bytes": 289 - } - ] - }, + "name": "Annotation Activity for nmdc:mga04xnj45", + "started_at_time": "2021-12-01T21:31:29Z", + "type": "nmdc:MetagenomeAnnotationActivity", + "ended_at_time": "2021-12-02T20:54:56+00:00" + } + ], + "metagenome_assembly_set": [ { "_id": { - "$oid": "649b009bff710ae353f8cf1c" + "$oid": "649b005f2ca5ee4adb139f9e" }, "has_input": [ - "nmdc:8585f6896702bddf64b02191be5921f4" + "nmdc:07499ad2f2b80f42bd7109732b1eef90" ], + "part_of": [ + "nmdc:mga04xnj45" + ], + "ctg_logsum": 63429, + "scaf_logsum": 63657, + "gap_pct": 0.00092, "git_url": "https://github.com/microbiomedata/metaG/releases/tag/0.1", "has_output": [ - "nmdc:fef871a81032dd1f3e57dc1c7d5aa3db", - "nmdc:6c7fec765f2a225f168ebb1f69961013", - "nmdc:6e660d5a062f9c3ad7b49d8d438453d7", - "nmdc:77db34862804280185d3b1ce961e5338", - "nmdc:84e3efb84d961d189ece310911ccf475", - "nmdc:b8fd31679921f8b68c80917e14caa260", - "nmdc:715c66c69b621478da7d48481f3cbd1d", - "nmdc:0781e8042688219035efafe7d75858d0", - "nmdc:85547ab860ef9d6877ba7abc8881740a" + "nmdc:6d72d9fb6a282f8872cd3d5b8ce1a29d", + "nmdc:2d89ade1cc6267bb77b48daa176442f2", + "nmdc:79588f527e08eace069ddc63171f004c", + "nmdc:cc855d3c15387d078c6919d1b19f8c05", + "nmdc:ef722a8ecd2b85d9202560df41eca7ed" ], - "was_informed_by": "gold:Gp0127624", - "id": "nmdc:20efb77f7b08a72b0c887c059686b7cf", + "asm_score": 7.629, + "was_informed_by": "gold:Gp0127626", + "ctg_powsum": 7359.443, + "scaf_max": 30685, + "id": "nmdc:0ef6e04f4cefbcd9e2d3d2a7717baa3e", + "scaf_powsum": 7386.413, "execution_resource": "NERSC-Cori", - "name": "ReadBased Analysis Activity for nmdc:mga0e8jh10", - "started_at_time": "2021-10-11T02:23:30Z", - "type": "nmdc:ReadbasedAnalysis", - "ended_at_time": "2021-10-11T03:30:59+00:00", - "output_data_objects": [ - { - "name": "Gp0127624_Gottcha2 TSV report", - "description": "Gottcha2 TSV report for Gp0127624", - "url": "https://data.microbiomedata.org/data/nmdc:mga0e8jh10/ReadbasedAnalysis/nmdc_mga0e8jh10_gottcha2_report.tsv", - "md5_checksum": "fef871a81032dd1f3e57dc1c7d5aa3db", - "id": "nmdc:fef871a81032dd1f3e57dc1c7d5aa3db", - "file_size_bytes": 1500 - }, - { - "name": "Gp0127624_Gottcha2 full TSV report", - "description": "Gottcha2 full TSV report for Gp0127624", - "url": "https://data.microbiomedata.org/data/nmdc:mga0e8jh10/ReadbasedAnalysis/nmdc_mga0e8jh10_gottcha2_report_full.tsv", - "md5_checksum": "6c7fec765f2a225f168ebb1f69961013", - "id": "nmdc:6c7fec765f2a225f168ebb1f69961013", - "file_size_bytes": 692993 - }, - { - "name": "Gp0127624_Gottcha2 Krona HTML report", - "description": "Gottcha2 Krona HTML report for Gp0127624", - "data_object_type": "GOTTCHA2 Krona Plot", - "url": "https://data.microbiomedata.org/data/nmdc:mga0e8jh10/ReadbasedAnalysis/nmdc_mga0e8jh10_gottcha2_krona.html", - "md5_checksum": "6e660d5a062f9c3ad7b49d8d438453d7", - "id": "nmdc:6e660d5a062f9c3ad7b49d8d438453d7", - "file_size_bytes": 230779 - }, - { - "name": "Gp0127624_Centrifuge classification TSV report", - "description": "Centrifuge classification TSV report for Gp0127624", - "data_object_type": "Centrifuge Taxonomic Classification", - "url": "https://data.microbiomedata.org/data/nmdc:mga0e8jh10/ReadbasedAnalysis/nmdc_mga0e8jh10_centrifuge_classification.tsv", - "md5_checksum": "77db34862804280185d3b1ce961e5338", - "id": "nmdc:77db34862804280185d3b1ce961e5338", - "file_size_bytes": 1645928829 - }, - { - "name": "Gp0127624_Centrifuge TSV report", - "description": "Centrifuge TSV report for Gp0127624", - "data_object_type": "Centrifuge Classification Report", - "url": "https://data.microbiomedata.org/data/nmdc:mga0e8jh10/ReadbasedAnalysis/nmdc_mga0e8jh10_centrifuge_report.tsv", - "md5_checksum": "84e3efb84d961d189ece310911ccf475", - "id": "nmdc:84e3efb84d961d189ece310911ccf475", - "file_size_bytes": 254646 - }, - { - "name": "Gp0127624_Centrifuge Krona HTML report", - "description": "Centrifuge Krona HTML report for Gp0127624", - "data_object_type": "Centrifuge Krona Plot", - "url": "https://data.microbiomedata.org/data/nmdc:mga0e8jh10/ReadbasedAnalysis/nmdc_mga0e8jh10_centrifuge_krona.html", - "md5_checksum": "b8fd31679921f8b68c80917e14caa260", - "id": "nmdc:b8fd31679921f8b68c80917e14caa260", - "file_size_bytes": 2332082 - }, - { - "name": "Gp0127624_Kraken classification TSV report", - "description": "Kraken classification TSV report for Gp0127624", - "data_object_type": "Kraken2 Taxonomic Classification", - "url": "https://data.microbiomedata.org/data/nmdc:mga0e8jh10/ReadbasedAnalysis/nmdc_mga0e8jh10_kraken2_classification.tsv", - "md5_checksum": "715c66c69b621478da7d48481f3cbd1d", - "id": "nmdc:715c66c69b621478da7d48481f3cbd1d", - "file_size_bytes": 1316771556 - }, - { - "name": "Gp0127624_Kraken2 TSV report", - "description": "Kraken2 TSV report for Gp0127624", - "data_object_type": "Kraken2 Classification Report", - "url": "https://data.microbiomedata.org/data/nmdc:mga0e8jh10/ReadbasedAnalysis/nmdc_mga0e8jh10_kraken2_report.tsv", - "md5_checksum": "0781e8042688219035efafe7d75858d0", - "id": "nmdc:0781e8042688219035efafe7d75858d0", - "file_size_bytes": 626940 - }, - { - "name": "Gp0127624_Kraken2 Krona HTML report", - "description": "Kraken2 Krona HTML report for Gp0127624", - "data_object_type": "Kraken2 Krona Plot", - "url": "https://data.microbiomedata.org/data/nmdc:mga0e8jh10/ReadbasedAnalysis/nmdc_mga0e8jh10_kraken2_krona.html", - "md5_checksum": "85547ab860ef9d6877ba7abc8881740a", - "id": "nmdc:85547ab860ef9d6877ba7abc8881740a", - "file_size_bytes": 3921891 - } - ] - }, + "contigs": 105397, + "name": "Assembly Activity for nmdc:mga04xnj45", + "ctg_max": 30685, + "gc_std": 0.09232, + "gc_avg": 0.60819, + "contig_bp": 43390261, + "started_at_time": "2021-12-01T21:31:29Z", + "scaf_bp": 43390661, + "type": "nmdc:MetagenomeAssembly", + "scaffolds": 105366, + "ended_at_time": "2021-12-02T20:54:56+00:00", + "ctg_l50": 368, + "ctg_l90": 284, + "ctg_n50": 34766, + "ctg_n90": 91597, + "scaf_l50": 368, + "scaf_l90": 284, + "scaf_n50": 34749, + "scaf_n90": 91567 + } + ], + "metagenome_sequencing_activity_set": [], + "metaproteomics_analysis_activity_set": [], + "metatranscriptome_activity_set": [], + "nom_analysis_activity_set": [], + "omics_processing_set": [ { "_id": { - "$oid": "61e7191b833bcf838a6ff905" + "$oid": "649b009773e8249959349b3c" }, + "id": "nmdc:omprc-11-s6wqag22", + "name": "Riverbed sediment microbial communities from areas with no vegetation in Columbia River, Washington, USA - GW-RW N3_40_50", + "description": "Riverbed sediment samples were collected from an area with no vegetation in a hyporheic zone (subsurface groundwater - surface water mixing zone)", "has_input": [ - "nmdc:8585f6896702bddf64b02191be5921f4" - ], - "part_of": [ - "nmdc:mga0e8jh10" + "nmdc:bsm-11-mxdygh62" ], - "git_url": "https://github.com/microbiomedata/metaG/releases/tag/0.1", "has_output": [ - "nmdc:fef871a81032dd1f3e57dc1c7d5aa3db", - "nmdc:6c7fec765f2a225f168ebb1f69961013", - "nmdc:6e660d5a062f9c3ad7b49d8d438453d7", - "nmdc:77db34862804280185d3b1ce961e5338", - "nmdc:84e3efb84d961d189ece310911ccf475", - "nmdc:b8fd31679921f8b68c80917e14caa260", - "nmdc:715c66c69b621478da7d48481f3cbd1d", - "nmdc:0781e8042688219035efafe7d75858d0", - "nmdc:85547ab860ef9d6877ba7abc8881740a" + "jgi:574fde7b7ded5e3df1ee1418" ], - "was_informed_by": "gold:Gp0127624", - "id": "nmdc:20efb77f7b08a72b0c887c059686b7cf", - "execution_resource": "NERSC-Cori", - "name": "ReadBased Analysis Activity for nmdc:mga0e8jh10", - "started_at_time": "2021-10-11T02:23:30Z", - "type": "nmdc:ReadbasedAnalysis", - "ended_at_time": "2021-10-11T03:30:59+00:00", - "output_data_objects": [ - { - "name": "Gp0127624_Gottcha2 TSV report", - "description": "Gottcha2 TSV report for Gp0127624", - "url": "https://data.microbiomedata.org/data/nmdc:mga0e8jh10/ReadbasedAnalysis/nmdc_mga0e8jh10_gottcha2_report.tsv", - "md5_checksum": "fef871a81032dd1f3e57dc1c7d5aa3db", - "id": "nmdc:fef871a81032dd1f3e57dc1c7d5aa3db", - "file_size_bytes": 1500 - }, - { - "name": "Gp0127624_Gottcha2 full TSV report", - "description": "Gottcha2 full TSV report for Gp0127624", - "url": "https://data.microbiomedata.org/data/nmdc:mga0e8jh10/ReadbasedAnalysis/nmdc_mga0e8jh10_gottcha2_report_full.tsv", - "md5_checksum": "6c7fec765f2a225f168ebb1f69961013", - "id": "nmdc:6c7fec765f2a225f168ebb1f69961013", - "file_size_bytes": 692993 - }, - { - "name": "Gp0127624_Gottcha2 Krona HTML report", - "description": "Gottcha2 Krona HTML report for Gp0127624", - "data_object_type": "GOTTCHA2 Krona Plot", - "url": "https://data.microbiomedata.org/data/nmdc:mga0e8jh10/ReadbasedAnalysis/nmdc_mga0e8jh10_gottcha2_krona.html", - "md5_checksum": "6e660d5a062f9c3ad7b49d8d438453d7", - "id": "nmdc:6e660d5a062f9c3ad7b49d8d438453d7", - "file_size_bytes": 230779 - }, - { - "name": "Gp0127624_Centrifuge classification TSV report", - "description": "Centrifuge classification TSV report for Gp0127624", - "data_object_type": "Centrifuge Taxonomic Classification", - "url": "https://data.microbiomedata.org/data/nmdc:mga0e8jh10/ReadbasedAnalysis/nmdc_mga0e8jh10_centrifuge_classification.tsv", - "md5_checksum": "77db34862804280185d3b1ce961e5338", - "id": "nmdc:77db34862804280185d3b1ce961e5338", - "file_size_bytes": 1645928829 - }, - { - "name": "Gp0127624_Centrifuge TSV report", - "description": "Centrifuge TSV report for Gp0127624", - "data_object_type": "Centrifuge Classification Report", - "url": "https://data.microbiomedata.org/data/nmdc:mga0e8jh10/ReadbasedAnalysis/nmdc_mga0e8jh10_centrifuge_report.tsv", - "md5_checksum": "84e3efb84d961d189ece310911ccf475", - "id": "nmdc:84e3efb84d961d189ece310911ccf475", - "file_size_bytes": 254646 - }, - { - "name": "Gp0127624_Centrifuge Krona HTML report", - "description": "Centrifuge Krona HTML report for Gp0127624", - "data_object_type": "Centrifuge Krona Plot", - "url": "https://data.microbiomedata.org/data/nmdc:mga0e8jh10/ReadbasedAnalysis/nmdc_mga0e8jh10_centrifuge_krona.html", - "md5_checksum": "b8fd31679921f8b68c80917e14caa260", - "id": "nmdc:b8fd31679921f8b68c80917e14caa260", - "file_size_bytes": 2332082 - }, - { - "name": "Gp0127624_Kraken classification TSV report", - "description": "Kraken classification TSV report for Gp0127624", - "data_object_type": "Kraken2 Taxonomic Classification", - "url": "https://data.microbiomedata.org/data/nmdc:mga0e8jh10/ReadbasedAnalysis/nmdc_mga0e8jh10_kraken2_classification.tsv", - "md5_checksum": "715c66c69b621478da7d48481f3cbd1d", - "id": "nmdc:715c66c69b621478da7d48481f3cbd1d", - "file_size_bytes": 1316771556 - }, - { - "name": "Gp0127624_Kraken2 TSV report", - "description": "Kraken2 TSV report for Gp0127624", - "data_object_type": "Kraken2 Classification Report", - "url": "https://data.microbiomedata.org/data/nmdc:mga0e8jh10/ReadbasedAnalysis/nmdc_mga0e8jh10_kraken2_report.tsv", - "md5_checksum": "0781e8042688219035efafe7d75858d0", - "id": "nmdc:0781e8042688219035efafe7d75858d0", - "file_size_bytes": 626940 - }, - { - "name": "Gp0127624_Kraken2 Krona HTML report", - "description": "Kraken2 Krona HTML report for Gp0127624", - "data_object_type": "Kraken2 Krona Plot", - "url": "https://data.microbiomedata.org/data/nmdc:mga0e8jh10/ReadbasedAnalysis/nmdc_mga0e8jh10_kraken2_krona.html", - "md5_checksum": "85547ab860ef9d6877ba7abc8881740a", - "id": "nmdc:85547ab860ef9d6877ba7abc8881740a", - "file_size_bytes": 3921891 - } + "part_of": [ + "nmdc:sty-11-aygzgv51" + ], + "add_date": "2016-01-11", + "mod_date": "2021-06-15", + "ncbi_project_name": "Riverbed sediment microbial communities from areas with no vegetation in Columbia River, Washington, USA - GW-RW N3_40_50", + "omics_type": { + "has_raw_value": "Metagenome" + }, + "principal_investigator": { + "has_raw_value": "James Stegen" + }, + "processing_institution": "JGI", + "type": "nmdc:OmicsProcessing", + "gold_sequencing_project_identifiers": [ + "gold:Gp0127626" ] - }, + } + ], + "reaction_activity_set": [], + "read_qc_analysis_activity_set": [ { "_id": { - "$oid": "649b005f2ca5ee4adb139f8d" + "$oid": "649b009d6bdd4fd20273c86c" }, "has_input": [ - "nmdc:8585f6896702bddf64b02191be5921f4" + "nmdc:8bee270fc5b3a39f7e7609b60e191766" ], "part_of": [ - "nmdc:mga0e8jh10" + "nmdc:mga04xnj45" ], - "ctg_logsum": 174168, - "scaf_logsum": 174680, - "gap_pct": 0.0009, "git_url": "https://github.com/microbiomedata/metaG/releases/tag/0.1", "has_output": [ - "nmdc:464a9db7a94e7e0646b1ff8b501d82f3", - "nmdc:0a50f88775f36e9238152f3319252853", - "nmdc:f0dc2f598fa06efbe99843bddaf54f60", - "nmdc:a4405d49e8efe2ee124d25e2414de56c", - "nmdc:8c37ab0b3594cc975348041e4841f6ac" + "nmdc:07499ad2f2b80f42bd7109732b1eef90", + "nmdc:9089d07fdee5ed03e901c1656206af02" ], - "asm_score": 5.95, - "was_informed_by": "gold:Gp0127624", - "ctg_powsum": 19404, - "scaf_max": 33408, - "id": "nmdc:20efb77f7b08a72b0c887c059686b7cf", - "scaf_powsum": 19462, + "was_informed_by": "gold:Gp0127626", + "input_read_count": 24223170, + "output_read_bases": 3405205631, + "id": "nmdc:0ef6e04f4cefbcd9e2d3d2a7717baa3e", "execution_resource": "NERSC-Cori", - "contigs": 191010, - "name": "Assembly Activity for nmdc:mga0e8jh10", - "ctg_max": 33408, - "gc_std": 0.09154, - "contig_bp": 88102698, - "gc_avg": 0.62452, - "started_at_time": "2021-10-11T02:23:30Z", - "scaf_bp": 88103488, - "type": "nmdc:MetagenomeAssembly", - "scaffolds": 190940, - "ended_at_time": "2021-10-11T03:30:59+00:00", - "ctg_l50": 434, - "ctg_l90": 288, - "ctg_n50": 56361, - "ctg_n90": 162547, - "scaf_l50": 434, - "scaf_l90": 288, - "scaf_n50": 56334, - "scaf_n90": 162481, - "output_data_objects": [ - { - "name": "Gp0127624_Assembled contigs fasta", - "description": "Assembled contigs fasta for Gp0127624", - "data_object_type": "Assembly Contigs", - "url": "https://data.microbiomedata.org/data/nmdc:mga0e8jh10/assembly/nmdc_mga0e8jh10_contigs.fna", - "md5_checksum": "464a9db7a94e7e0646b1ff8b501d82f3", - "id": "nmdc:464a9db7a94e7e0646b1ff8b501d82f3", - "file_size_bytes": 95468011 - }, - { - "name": "Gp0127624_Assembled scaffold fasta", - "description": "Assembled scaffold fasta for Gp0127624", - "data_object_type": "Assembly Scaffolds", - "url": "https://data.microbiomedata.org/data/nmdc:mga0e8jh10/assembly/nmdc_mga0e8jh10_scaffolds.fna", - "md5_checksum": "0a50f88775f36e9238152f3319252853", - "id": "nmdc:0a50f88775f36e9238152f3319252853", - "file_size_bytes": 94893921 - }, - { - "name": "Gp0127624_Metagenome Contig Coverage Stats", - "description": "Metagenome Contig Coverage Stats for Gp0127624", - "url": "https://data.microbiomedata.org/data/nmdc:mga0e8jh10/assembly/nmdc_mga0e8jh10_covstats.txt", - "md5_checksum": "f0dc2f598fa06efbe99843bddaf54f60", - "id": "nmdc:f0dc2f598fa06efbe99843bddaf54f60", - "file_size_bytes": 15112642 - }, - { - "name": "Gp0127624_Assembled AGP file", - "description": "Assembled AGP file for Gp0127624", - "data_object_type": "Assembly AGP", - "url": "https://data.microbiomedata.org/data/nmdc:mga0e8jh10/assembly/nmdc_mga0e8jh10_assembly.agp", - "md5_checksum": "a4405d49e8efe2ee124d25e2414de56c", - "id": "nmdc:a4405d49e8efe2ee124d25e2414de56c", - "file_size_bytes": 14126849 - }, - { - "name": "Gp0127624_Metagenome Alignment BAM file", - "description": "Metagenome Alignment BAM file for Gp0127624", - "data_object_type": "Assembly Coverage BAM", - "url": "https://data.microbiomedata.org/data/nmdc:mga0e8jh10/assembly/nmdc_mga0e8jh10_pairedMapped_sorted.bam", - "md5_checksum": "8c37ab0b3594cc975348041e4841f6ac", - "id": "nmdc:8c37ab0b3594cc975348041e4841f6ac", - "file_size_bytes": 1976821836 - } - ] - }, + "input_read_bases": 3657698670, + "name": "Read QC Activity for nmdc:mga04xnj45", + "output_read_count": 22768968, + "started_at_time": "2021-12-01T21:31:29Z", + "type": "nmdc:ReadQCAnalysisActivity", + "ended_at_time": "2021-12-02T20:54:56+00:00" + } + ], + "read_based_taxonomy_analysis_activity_set": [ { "_id": { - "$oid": "649b005bbf2caae0415ef9a2" + "$oid": "649b009bff710ae353f8cf30" }, "has_input": [ - "nmdc:464a9db7a94e7e0646b1ff8b501d82f3" - ], - "part_of": [ - "nmdc:mga0e8jh10" + "nmdc:07499ad2f2b80f42bd7109732b1eef90" ], "git_url": "https://github.com/microbiomedata/metaG/releases/tag/0.1", "has_output": [ - "nmdc:40d15cb24063dbb6097fd1626f62db95", - "nmdc:f70325438abce4c6f56e6c82619dd44a", - "nmdc:c5cf33c1f2f68a7c63fef6dd623a97c0", - "nmdc:4aca66fe81c8c056fa5617c7aa77bc7d", - "nmdc:303a5e88a0eae8942082e9e13f9f6eba", - "nmdc:d919f65e54a8351324e332a5daa6a831", - "nmdc:764c7c2b5554fc6b860b036cab22e0ef", - "nmdc:d0a86560767836f901bdd2625bea46e3", - "nmdc:2f64111072a2b19a726aed9c9f54bba7", - "nmdc:51a011777869ff58b977991f5c90fc47", - "nmdc:53f57253df5119d338b9813aa81c7c9b", - "nmdc:c4aa03608fa7442a05cd23fdcc29bc21" + "nmdc:a91f8dccb2baa53550216f5bdfbf1473", + "nmdc:a81ddf4e3bc044e8601554117cd887aa", + "nmdc:a012dc3a7b44774019c313fd8ee88efc", + "nmdc:dd4023a1488bdfc73b12c422b62b274a", + "nmdc:2f9b1c55d52cc61affbe99f5163b48c8", + "nmdc:ccf7f447a25ebf354ce44b3f1f90f223", + "nmdc:2c8efdb77cbcd1276c4fb386fd37bd6d", + "nmdc:806b27f1fa5a423100b113bb56edc708", + "nmdc:bb3e6793c4f036b9756f075d41846964" ], - "was_informed_by": "gold:Gp0127624", - "id": "nmdc:20efb77f7b08a72b0c887c059686b7cf", + "was_informed_by": "gold:Gp0127626", + "id": "nmdc:0ef6e04f4cefbcd9e2d3d2a7717baa3e", "execution_resource": "NERSC-Cori", - "name": "Annotation Activity for nmdc:mga0e8jh10", - "started_at_time": "2021-10-11T02:23:30Z", - "type": "nmdc:MetagenomeAnnotationActivity", - "ended_at_time": "2021-10-11T03:30:59+00:00", - "output_data_objects": [ - { - "name": "Gp0127624_Protein FAA", - "description": "Protein FAA for Gp0127624", - "data_object_type": "Annotation Amino Acid FASTA", - "url": "https://data.microbiomedata.org/data/nmdc:mga0e8jh10/annotation/nmdc_mga0e8jh10_proteins.faa", - "md5_checksum": "40d15cb24063dbb6097fd1626f62db95", - "id": "nmdc:40d15cb24063dbb6097fd1626f62db95", - "file_size_bytes": 55458746 - }, - { - "name": "Gp0127624_Structural annotation GFF file", - "description": "Structural annotation GFF file for Gp0127624", - "data_object_type": "Structural Annotation GFF", - "url": "https://data.microbiomedata.org/data/nmdc:mga0e8jh10/annotation/nmdc_mga0e8jh10_structural_annotation.gff", - "md5_checksum": "f70325438abce4c6f56e6c82619dd44a", - "id": "nmdc:f70325438abce4c6f56e6c82619dd44a", - "file_size_bytes": 2518 - }, - { - "name": "Gp0127624_Functional annotation GFF file", - "description": "Functional annotation GFF file for Gp0127624", - "data_object_type": "Functional Annotation GFF", - "url": "https://data.microbiomedata.org/data/nmdc:mga0e8jh10/annotation/nmdc_mga0e8jh10_functional_annotation.gff", - "md5_checksum": "c5cf33c1f2f68a7c63fef6dd623a97c0", - "id": "nmdc:c5cf33c1f2f68a7c63fef6dd623a97c0", - "file_size_bytes": 63778960 - }, - { - "name": "Gp0127624_KO TSV file", - "description": "KO TSV file for Gp0127624", - "data_object_type": "Annotation KEGG Orthology", - "url": "https://data.microbiomedata.org/data/nmdc:mga0e8jh10/annotation/nmdc_mga0e8jh10_ko.tsv", - "md5_checksum": "4aca66fe81c8c056fa5617c7aa77bc7d", - "id": "nmdc:4aca66fe81c8c056fa5617c7aa77bc7d", - "file_size_bytes": 7252005 - }, - { - "name": "Gp0127624_EC TSV file", - "description": "EC TSV file for Gp0127624", - "data_object_type": "Annotation Enzyme Commission", - "url": "https://data.microbiomedata.org/data/nmdc:mga0e8jh10/annotation/nmdc_mga0e8jh10_ec.tsv", - "md5_checksum": "303a5e88a0eae8942082e9e13f9f6eba", - "id": "nmdc:303a5e88a0eae8942082e9e13f9f6eba", - "file_size_bytes": 4835920 - }, - { - "name": "Gp0127624_COG GFF file", - "description": "COG GFF file for Gp0127624", - "url": "https://data.microbiomedata.org/data/nmdc:mga0e8jh10/annotation/nmdc_mga0e8jh10_cog.gff", - "md5_checksum": "d919f65e54a8351324e332a5daa6a831", - "id": "nmdc:d919f65e54a8351324e332a5daa6a831", - "file_size_bytes": 37494199 - }, - { - "name": "Gp0127624_PFAM GFF file", - "description": "PFAM GFF file for Gp0127624", - "url": "https://data.microbiomedata.org/data/nmdc:mga0e8jh10/annotation/nmdc_mga0e8jh10_pfam.gff", - "md5_checksum": "764c7c2b5554fc6b860b036cab22e0ef", - "id": "nmdc:764c7c2b5554fc6b860b036cab22e0ef", - "file_size_bytes": 27739105 - }, - { - "name": "Gp0127624_TigrFam GFF file", - "description": "TigrFam GFF file for Gp0127624", - "url": "https://data.microbiomedata.org/data/nmdc:mga0e8jh10/annotation/nmdc_mga0e8jh10_tigrfam.gff", - "md5_checksum": "d0a86560767836f901bdd2625bea46e3", - "id": "nmdc:d0a86560767836f901bdd2625bea46e3", - "file_size_bytes": 3077428 - }, - { - "name": "Gp0127624_SMART GFF file", - "description": "SMART GFF file for Gp0127624", - "url": "https://data.microbiomedata.org/data/nmdc:mga0e8jh10/annotation/nmdc_mga0e8jh10_smart.gff", - "md5_checksum": "2f64111072a2b19a726aed9c9f54bba7", - "id": "nmdc:2f64111072a2b19a726aed9c9f54bba7", - "file_size_bytes": 8547849 - }, - { - "name": "Gp0127624_SuperFam GFF file", - "description": "SuperFam GFF file for Gp0127624", - "url": "https://data.microbiomedata.org/data/nmdc:mga0e8jh10/annotation/nmdc_mga0e8jh10_supfam.gff", - "md5_checksum": "51a011777869ff58b977991f5c90fc47", - "id": "nmdc:51a011777869ff58b977991f5c90fc47", - "file_size_bytes": 46844460 - }, - { - "name": "Gp0127624_Cath FunFam GFF file", - "description": "Cath FunFam GFF file for Gp0127624", - "url": "https://data.microbiomedata.org/data/nmdc:mga0e8jh10/annotation/nmdc_mga0e8jh10_cath_funfam.gff", - "md5_checksum": "53f57253df5119d338b9813aa81c7c9b", - "id": "nmdc:53f57253df5119d338b9813aa81c7c9b", - "file_size_bytes": 35558659 - }, - { - "name": "Gp0127624_KO_EC GFF file", - "description": "KO_EC GFF file for Gp0127624", - "url": "https://data.microbiomedata.org/data/nmdc:mga0e8jh10/annotation/nmdc_mga0e8jh10_ko_ec.gff", - "md5_checksum": "c4aa03608fa7442a05cd23fdcc29bc21", - "id": "nmdc:c4aa03608fa7442a05cd23fdcc29bc21", - "file_size_bytes": 23055213 - } - ] - }, + "name": "ReadBased Analysis Activity for nmdc:mga04xnj45", + "started_at_time": "2021-12-01T21:31:29Z", + "type": "nmdc:ReadbasedAnalysis", + "ended_at_time": "2021-12-02T20:54:56+00:00" + } + ], + "study_set": [], + "field_research_site_set": [], + "collecting_biosamples_from_site_set": [], + "date_created": null, + "etl_software_version": null, + "pooling_set": [], + "read_based_analysis_activity_set": [ { "_id": { - "$oid": "649b0052ec087f6bbab34701" + "$oid": "61e7195e833bcf838a700602" }, "has_input": [ - "nmdc:464a9db7a94e7e0646b1ff8b501d82f3", - "nmdc:8c37ab0b3594cc975348041e4841f6ac", - "nmdc:c5cf33c1f2f68a7c63fef6dd623a97c0" + "nmdc:07499ad2f2b80f42bd7109732b1eef90" ], - "too_short_contig_num": 182057, "part_of": [ - "nmdc:mga0e8jh10" + "nmdc:mga04xnj45" ], - "binned_contig_num": 364, "git_url": "https://github.com/microbiomedata/metaG/releases/tag/0.1", "has_output": [ - "nmdc:d41d8cd98f00b204e9800998ecf8427e", - "nmdc:73aca2cc587d8a632a730dcc6ff53d3b", - "nmdc:822be4fbeadb0c8c24f4a680d646b62f", - "nmdc:6b39bdb404c651428634ad28f8f15e2a", - "nmdc:0bd9d9e5f15087ccd35c38956bb3a210", - "nmdc:2d174febedeca0ce515939dd53d6ccb9" + "nmdc:a91f8dccb2baa53550216f5bdfbf1473", + "nmdc:a81ddf4e3bc044e8601554117cd887aa", + "nmdc:a012dc3a7b44774019c313fd8ee88efc", + "nmdc:dd4023a1488bdfc73b12c422b62b274a", + "nmdc:2f9b1c55d52cc61affbe99f5163b48c8", + "nmdc:ccf7f447a25ebf354ce44b3f1f90f223", + "nmdc:2c8efdb77cbcd1276c4fb386fd37bd6d", + "nmdc:806b27f1fa5a423100b113bb56edc708", + "nmdc:bb3e6793c4f036b9756f075d41846964" ], - "was_informed_by": "gold:Gp0127624", - "input_contig_num": 191010, - "id": "nmdc:20efb77f7b08a72b0c887c059686b7cf", + "was_informed_by": "gold:Gp0127626", + "id": "nmdc:0ef6e04f4cefbcd9e2d3d2a7717baa3e", "execution_resource": "NERSC-Cori", - "name": "MAGs Analysis Activity for nmdc:mga0e8jh10", - "mags_list": [ - { - "number_of_contig": 69, - "completeness": 11.21, - "bin_name": "bins.1", - "gene_count": 328, - "bin_quality": "LQ", - "gtdbtk_species": "", - "gtdbtk_order": "", - "num_16s": 0, - "gtdbtk_family": "", - "gtdbtk_domain": "", - "contamination": 0.0, - "gtdbtk_class": "", - "gtdbtk_phylum": "", - "num_5s": 0, - "num_23s": 0, - "gtdbtk_genus": "", - "num_t_rna": 8 - }, - { - "number_of_contig": 194, - "completeness": 75.24, - "bin_name": "bins.2", - "gene_count": 2023, - "bin_quality": "MQ", - "gtdbtk_species": "", - "gtdbtk_order": "Nitrososphaerales", - "num_16s": 0, - "gtdbtk_family": "Nitrososphaeraceae", - "gtdbtk_domain": "Archaea", - "contamination": 1.78, - "gtdbtk_class": "Nitrososphaeria", - "gtdbtk_phylum": "Crenarchaeota", - "num_5s": 1, - "num_23s": 0, - "gtdbtk_genus": "", - "num_t_rna": 35 - }, - { - "number_of_contig": 101, - "completeness": 19.54, - "bin_name": "bins.3", - "gene_count": 585, - "bin_quality": "LQ", - "gtdbtk_species": "", - "gtdbtk_order": "", - "num_16s": 0, - "gtdbtk_family": "", - "gtdbtk_domain": "", - "contamination": 0.0, - "gtdbtk_class": "", - "gtdbtk_phylum": "", - "num_5s": 0, - "num_23s": 0, - "gtdbtk_genus": "", - "num_t_rna": 10 - } - ], - "unbinned_contig_num": 8589, - "started_at_time": "2021-10-11T02:23:30Z", - "type": "nmdc:MAGsAnalysisActivity", - "ended_at_time": "2021-10-11T03:30:59+00:00", - "output_data_objects": [ - { - "name": "gold:Gp0452679_metabat2 bin checkm quality assessment result", - "description": "metabat2 bin checkm quality assessment result for gold:Gp0452679", - "data_object_type": "CheckM Statistics", - "url": "https://data.microbiomedata.org/data/nmdc:mga0fsjy84/MAGs/nmdc_mga0fsjy84_checkm_qa.out", - "md5_checksum": "d41d8cd98f00b204e9800998ecf8427e", - "id": "nmdc:d41d8cd98f00b204e9800998ecf8427e", - "file_size_bytes": 0 - }, - { - "name": "Gp0127624_tooShort (< 3kb) filtered contigs fasta file by metaBat2", - "description": "tooShort (< 3kb) filtered contigs fasta file by metaBat2 for Gp0127624", - "url": "https://data.microbiomedata.org/data/nmdc:mga0e8jh10/MAGs/nmdc_mga0e8jh10_bins.tooShort.fa", - "md5_checksum": "73aca2cc587d8a632a730dcc6ff53d3b", - "id": "nmdc:73aca2cc587d8a632a730dcc6ff53d3b", - "file_size_bytes": 79198373 - }, - { - "name": "Gp0127624_unbinned fasta file from metabat2", - "description": "unbinned fasta file from metabat2 for Gp0127624", - "url": "https://data.microbiomedata.org/data/nmdc:mga0e8jh10/MAGs/nmdc_mga0e8jh10_bins.unbinned.fa", - "md5_checksum": "822be4fbeadb0c8c24f4a680d646b62f", - "id": "nmdc:822be4fbeadb0c8c24f4a680d646b62f", - "file_size_bytes": 13854717 - }, - { - "name": "Gp0127624_metabat2 bin checkm quality assessment result", - "description": "metabat2 bin checkm quality assessment result for Gp0127624", - "data_object_type": "CheckM Statistics", - "url": "https://data.microbiomedata.org/data/nmdc:mga0e8jh10/MAGs/nmdc_mga0e8jh10_checkm_qa.out", - "md5_checksum": "6b39bdb404c651428634ad28f8f15e2a", - "id": "nmdc:6b39bdb404c651428634ad28f8f15e2a", - "file_size_bytes": 1106 - }, - { - "name": "Gp0127624_high-quality and medium-quality bins", - "description": "high-quality and medium-quality bins for Gp0127624", - "data_object_type": "Metagenome Bins", - "url": "https://data.microbiomedata.org/data/nmdc:mga0e8jh10/MAGs/nmdc_mga0e8jh10_hqmq_bin.zip", - "md5_checksum": "0bd9d9e5f15087ccd35c38956bb3a210", - "id": "nmdc:0bd9d9e5f15087ccd35c38956bb3a210", - "file_size_bytes": 507790 - }, - { - "name": "Gp0127624_metabat2 bins", - "description": "metabat2 bins for Gp0127624", - "url": "https://data.microbiomedata.org/data/nmdc:mga0e8jh10/MAGs/nmdc_mga0e8jh10_metabat_bin.zip", - "md5_checksum": "2d174febedeca0ce515939dd53d6ccb9", - "id": "nmdc:2d174febedeca0ce515939dd53d6ccb9", - "file_size_bytes": 230699 - } - ] + "name": "ReadBased Analysis Activity for nmdc:mga04xnj45", + "started_at_time": "2021-12-01T21:31:29Z", + "type": "nmdc:ReadbasedAnalysis", + "ended_at_time": "2021-12-02T20:54:56+00:00" } ] }, { - "_id": { - "$oid": "649b009773e8249959349b3e" - }, - "id": "nmdc:omprc-11-1nvcer55", - "name": "Riverbed sediment microbial communities from areas with vegetation nearby in Columbia River, Washington, USA - GW-RW S3_10_20", - "description": "Riverbed sediment samples were collected from an area with a lot of surrounding vegetation in a hyporheic zone (subsurface groundwater - surface water mixing zone)", - "has_input": [ - "nmdc:bsm-11-3sfanv57" - ], - "has_output": [ - "jgi:574fde587ded5e3df1ee13fd" - ], - "part_of": [ - "nmdc:sty-11-aygzgv51" - ], - "add_date": "2016-01-11", - "mod_date": "2021-06-15", - "ncbi_project_name": "Riverbed sediment microbial communities from areas with vegetation nearby in Columbia River, Washington, USA - GW-RW S3_10_20", - "omics_type": { - "has_raw_value": "Metagenome" - }, - "principal_investigator": { - "has_raw_value": "James Stegen" - }, - "processing_institution": "JGI", - "type": "nmdc:OmicsProcessing", - "gold_sequencing_project_identifiers": [ - "gold:Gp0127629" - ], - "downstream_workflow_activity_records": [ + "functional_annotation_agg": [], + "library_preparation_set": [], + "processed_sample_set": [], + "extraction_set": [], + "activity_set": [], + "biosample_set": [], + "data_object_set": [ + { + "name": "Gp0127624_Filtered Reads", + "description": "Filtered Reads for Gp0127624", + "data_object_type": "Filtered Sequencing Reads", + "url": "https://data.microbiomedata.org/data/nmdc:mga0e8jh10/qa/nmdc_mga0e8jh10_filtered.fastq.gz", + "md5_checksum": "8585f6896702bddf64b02191be5921f4", + "id": "nmdc:8585f6896702bddf64b02191be5921f4", + "file_size_bytes": 1795382596 + }, { - "_id": { - "$oid": "649b009d6bdd4fd20273c869" - }, - "has_input": [ - "nmdc:22f8150866c51b35726066d2ec13c5ca" - ], - "part_of": [ - "nmdc:mga071r920" - ], - "git_url": "https://github.com/microbiomedata/metaG/releases/tag/0.1", - "has_output": [ - "nmdc:0db98173ae3395106e24d250b2655f06", - "nmdc:bc0874c01bbd31c644cd598e2fdad3c4" - ], - "was_informed_by": "gold:Gp0127629", - "input_read_count": 23886420, - "output_read_bases": 3395256515, - "id": "nmdc:b82754c2c692809f9e59ff9824278c32", - "execution_resource": "NERSC-Cori", - "input_read_bases": 3606849420, - "name": "Read QC Activity for nmdc:mga071r920", - "output_read_count": 22738452, - "started_at_time": "2021-10-11T02:23:35Z", - "type": "nmdc:ReadQCAnalysisActivity", - "ended_at_time": "2021-10-11T03:33:33+00:00", - "output_data_objects": [ - { - "name": "Gp0127629_Filtered Reads", - "description": "Filtered Reads for Gp0127629", - "data_object_type": "Filtered Sequencing Reads", - "url": "https://data.microbiomedata.org/data/nmdc:mga071r920/qa/nmdc_mga071r920_filtered.fastq.gz", - "md5_checksum": "0db98173ae3395106e24d250b2655f06", - "id": "nmdc:0db98173ae3395106e24d250b2655f06", - "file_size_bytes": 1807840952 - }, - { - "name": "Gp0127629_Filtered Stats", - "description": "Filtered Stats for Gp0127629", - "data_object_type": "QC Statistics", - "url": "https://data.microbiomedata.org/data/nmdc:mga071r920/qa/nmdc_mga071r920_filterStats.txt", - "md5_checksum": "bc0874c01bbd31c644cd598e2fdad3c4", - "id": "nmdc:bc0874c01bbd31c644cd598e2fdad3c4", - "file_size_bytes": 284 - } - ] + "name": "Gp0127624_Filtered Stats", + "description": "Filtered Stats for Gp0127624", + "data_object_type": "QC Statistics", + "url": "https://data.microbiomedata.org/data/nmdc:mga0e8jh10/qa/nmdc_mga0e8jh10_filterStats.txt", + "md5_checksum": "b9b6464ecc746a4cc39b549696c5fe9c", + "id": "nmdc:b9b6464ecc746a4cc39b549696c5fe9c", + "file_size_bytes": 289 + }, + { + "name": "Gp0127624_Gottcha2 TSV report", + "description": "Gottcha2 TSV report for Gp0127624", + "url": "https://data.microbiomedata.org/data/nmdc:mga0e8jh10/ReadbasedAnalysis/nmdc_mga0e8jh10_gottcha2_report.tsv", + "md5_checksum": "fef871a81032dd1f3e57dc1c7d5aa3db", + "id": "nmdc:fef871a81032dd1f3e57dc1c7d5aa3db", + "file_size_bytes": 1500 + }, + { + "name": "Gp0127624_Gottcha2 full TSV report", + "description": "Gottcha2 full TSV report for Gp0127624", + "url": "https://data.microbiomedata.org/data/nmdc:mga0e8jh10/ReadbasedAnalysis/nmdc_mga0e8jh10_gottcha2_report_full.tsv", + "md5_checksum": "6c7fec765f2a225f168ebb1f69961013", + "id": "nmdc:6c7fec765f2a225f168ebb1f69961013", + "file_size_bytes": 692993 + }, + { + "name": "Gp0127624_Gottcha2 Krona HTML report", + "description": "Gottcha2 Krona HTML report for Gp0127624", + "data_object_type": "GOTTCHA2 Krona Plot", + "url": "https://data.microbiomedata.org/data/nmdc:mga0e8jh10/ReadbasedAnalysis/nmdc_mga0e8jh10_gottcha2_krona.html", + "md5_checksum": "6e660d5a062f9c3ad7b49d8d438453d7", + "id": "nmdc:6e660d5a062f9c3ad7b49d8d438453d7", + "file_size_bytes": 230779 + }, + { + "name": "Gp0127624_Centrifuge classification TSV report", + "description": "Centrifuge classification TSV report for Gp0127624", + "data_object_type": "Centrifuge Taxonomic Classification", + "url": "https://data.microbiomedata.org/data/nmdc:mga0e8jh10/ReadbasedAnalysis/nmdc_mga0e8jh10_centrifuge_classification.tsv", + "md5_checksum": "77db34862804280185d3b1ce961e5338", + "id": "nmdc:77db34862804280185d3b1ce961e5338", + "file_size_bytes": 1645928829 + }, + { + "name": "Gp0127624_Centrifuge TSV report", + "description": "Centrifuge TSV report for Gp0127624", + "data_object_type": "Centrifuge Classification Report", + "url": "https://data.microbiomedata.org/data/nmdc:mga0e8jh10/ReadbasedAnalysis/nmdc_mga0e8jh10_centrifuge_report.tsv", + "md5_checksum": "84e3efb84d961d189ece310911ccf475", + "id": "nmdc:84e3efb84d961d189ece310911ccf475", + "file_size_bytes": 254646 + }, + { + "name": "Gp0127624_Centrifuge Krona HTML report", + "description": "Centrifuge Krona HTML report for Gp0127624", + "data_object_type": "Centrifuge Krona Plot", + "url": "https://data.microbiomedata.org/data/nmdc:mga0e8jh10/ReadbasedAnalysis/nmdc_mga0e8jh10_centrifuge_krona.html", + "md5_checksum": "b8fd31679921f8b68c80917e14caa260", + "id": "nmdc:b8fd31679921f8b68c80917e14caa260", + "file_size_bytes": 2332082 + }, + { + "name": "Gp0127624_Kraken classification TSV report", + "description": "Kraken classification TSV report for Gp0127624", + "data_object_type": "Kraken2 Taxonomic Classification", + "url": "https://data.microbiomedata.org/data/nmdc:mga0e8jh10/ReadbasedAnalysis/nmdc_mga0e8jh10_kraken2_classification.tsv", + "md5_checksum": "715c66c69b621478da7d48481f3cbd1d", + "id": "nmdc:715c66c69b621478da7d48481f3cbd1d", + "file_size_bytes": 1316771556 + }, + { + "name": "Gp0127624_Kraken2 TSV report", + "description": "Kraken2 TSV report for Gp0127624", + "data_object_type": "Kraken2 Classification Report", + "url": "https://data.microbiomedata.org/data/nmdc:mga0e8jh10/ReadbasedAnalysis/nmdc_mga0e8jh10_kraken2_report.tsv", + "md5_checksum": "0781e8042688219035efafe7d75858d0", + "id": "nmdc:0781e8042688219035efafe7d75858d0", + "file_size_bytes": 626940 + }, + { + "name": "Gp0127624_Kraken2 Krona HTML report", + "description": "Kraken2 Krona HTML report for Gp0127624", + "data_object_type": "Kraken2 Krona Plot", + "url": "https://data.microbiomedata.org/data/nmdc:mga0e8jh10/ReadbasedAnalysis/nmdc_mga0e8jh10_kraken2_krona.html", + "md5_checksum": "85547ab860ef9d6877ba7abc8881740a", + "id": "nmdc:85547ab860ef9d6877ba7abc8881740a", + "file_size_bytes": 3921891 }, + { + "name": "Gp0127624_Gottcha2 TSV report", + "description": "Gottcha2 TSV report for Gp0127624", + "url": "https://data.microbiomedata.org/data/nmdc:mga0e8jh10/ReadbasedAnalysis/nmdc_mga0e8jh10_gottcha2_report.tsv", + "md5_checksum": "fef871a81032dd1f3e57dc1c7d5aa3db", + "id": "nmdc:fef871a81032dd1f3e57dc1c7d5aa3db", + "file_size_bytes": 1500 + }, + { + "name": "Gp0127624_Gottcha2 full TSV report", + "description": "Gottcha2 full TSV report for Gp0127624", + "url": "https://data.microbiomedata.org/data/nmdc:mga0e8jh10/ReadbasedAnalysis/nmdc_mga0e8jh10_gottcha2_report_full.tsv", + "md5_checksum": "6c7fec765f2a225f168ebb1f69961013", + "id": "nmdc:6c7fec765f2a225f168ebb1f69961013", + "file_size_bytes": 692993 + }, + { + "name": "Gp0127624_Gottcha2 Krona HTML report", + "description": "Gottcha2 Krona HTML report for Gp0127624", + "data_object_type": "GOTTCHA2 Krona Plot", + "url": "https://data.microbiomedata.org/data/nmdc:mga0e8jh10/ReadbasedAnalysis/nmdc_mga0e8jh10_gottcha2_krona.html", + "md5_checksum": "6e660d5a062f9c3ad7b49d8d438453d7", + "id": "nmdc:6e660d5a062f9c3ad7b49d8d438453d7", + "file_size_bytes": 230779 + }, + { + "name": "Gp0127624_Centrifuge classification TSV report", + "description": "Centrifuge classification TSV report for Gp0127624", + "data_object_type": "Centrifuge Taxonomic Classification", + "url": "https://data.microbiomedata.org/data/nmdc:mga0e8jh10/ReadbasedAnalysis/nmdc_mga0e8jh10_centrifuge_classification.tsv", + "md5_checksum": "77db34862804280185d3b1ce961e5338", + "id": "nmdc:77db34862804280185d3b1ce961e5338", + "file_size_bytes": 1645928829 + }, + { + "name": "Gp0127624_Centrifuge TSV report", + "description": "Centrifuge TSV report for Gp0127624", + "data_object_type": "Centrifuge Classification Report", + "url": "https://data.microbiomedata.org/data/nmdc:mga0e8jh10/ReadbasedAnalysis/nmdc_mga0e8jh10_centrifuge_report.tsv", + "md5_checksum": "84e3efb84d961d189ece310911ccf475", + "id": "nmdc:84e3efb84d961d189ece310911ccf475", + "file_size_bytes": 254646 + }, + { + "name": "Gp0127624_Centrifuge Krona HTML report", + "description": "Centrifuge Krona HTML report for Gp0127624", + "data_object_type": "Centrifuge Krona Plot", + "url": "https://data.microbiomedata.org/data/nmdc:mga0e8jh10/ReadbasedAnalysis/nmdc_mga0e8jh10_centrifuge_krona.html", + "md5_checksum": "b8fd31679921f8b68c80917e14caa260", + "id": "nmdc:b8fd31679921f8b68c80917e14caa260", + "file_size_bytes": 2332082 + }, + { + "name": "Gp0127624_Kraken classification TSV report", + "description": "Kraken classification TSV report for Gp0127624", + "data_object_type": "Kraken2 Taxonomic Classification", + "url": "https://data.microbiomedata.org/data/nmdc:mga0e8jh10/ReadbasedAnalysis/nmdc_mga0e8jh10_kraken2_classification.tsv", + "md5_checksum": "715c66c69b621478da7d48481f3cbd1d", + "id": "nmdc:715c66c69b621478da7d48481f3cbd1d", + "file_size_bytes": 1316771556 + }, + { + "name": "Gp0127624_Kraken2 TSV report", + "description": "Kraken2 TSV report for Gp0127624", + "data_object_type": "Kraken2 Classification Report", + "url": "https://data.microbiomedata.org/data/nmdc:mga0e8jh10/ReadbasedAnalysis/nmdc_mga0e8jh10_kraken2_report.tsv", + "md5_checksum": "0781e8042688219035efafe7d75858d0", + "id": "nmdc:0781e8042688219035efafe7d75858d0", + "file_size_bytes": 626940 + }, + { + "name": "Gp0127624_Kraken2 Krona HTML report", + "description": "Kraken2 Krona HTML report for Gp0127624", + "data_object_type": "Kraken2 Krona Plot", + "url": "https://data.microbiomedata.org/data/nmdc:mga0e8jh10/ReadbasedAnalysis/nmdc_mga0e8jh10_kraken2_krona.html", + "md5_checksum": "85547ab860ef9d6877ba7abc8881740a", + "id": "nmdc:85547ab860ef9d6877ba7abc8881740a", + "file_size_bytes": 3921891 + }, + { + "name": "Gp0127624_Assembled contigs fasta", + "description": "Assembled contigs fasta for Gp0127624", + "data_object_type": "Assembly Contigs", + "url": "https://data.microbiomedata.org/data/nmdc:mga0e8jh10/assembly/nmdc_mga0e8jh10_contigs.fna", + "md5_checksum": "464a9db7a94e7e0646b1ff8b501d82f3", + "id": "nmdc:464a9db7a94e7e0646b1ff8b501d82f3", + "file_size_bytes": 95468011 + }, + { + "name": "Gp0127624_Assembled scaffold fasta", + "description": "Assembled scaffold fasta for Gp0127624", + "data_object_type": "Assembly Scaffolds", + "url": "https://data.microbiomedata.org/data/nmdc:mga0e8jh10/assembly/nmdc_mga0e8jh10_scaffolds.fna", + "md5_checksum": "0a50f88775f36e9238152f3319252853", + "id": "nmdc:0a50f88775f36e9238152f3319252853", + "file_size_bytes": 94893921 + }, + { + "name": "Gp0127624_Metagenome Contig Coverage Stats", + "description": "Metagenome Contig Coverage Stats for Gp0127624", + "url": "https://data.microbiomedata.org/data/nmdc:mga0e8jh10/assembly/nmdc_mga0e8jh10_covstats.txt", + "md5_checksum": "f0dc2f598fa06efbe99843bddaf54f60", + "id": "nmdc:f0dc2f598fa06efbe99843bddaf54f60", + "file_size_bytes": 15112642 + }, + { + "name": "Gp0127624_Assembled AGP file", + "description": "Assembled AGP file for Gp0127624", + "data_object_type": "Assembly AGP", + "url": "https://data.microbiomedata.org/data/nmdc:mga0e8jh10/assembly/nmdc_mga0e8jh10_assembly.agp", + "md5_checksum": "a4405d49e8efe2ee124d25e2414de56c", + "id": "nmdc:a4405d49e8efe2ee124d25e2414de56c", + "file_size_bytes": 14126849 + }, + { + "name": "Gp0127624_Metagenome Alignment BAM file", + "description": "Metagenome Alignment BAM file for Gp0127624", + "data_object_type": "Assembly Coverage BAM", + "url": "https://data.microbiomedata.org/data/nmdc:mga0e8jh10/assembly/nmdc_mga0e8jh10_pairedMapped_sorted.bam", + "md5_checksum": "8c37ab0b3594cc975348041e4841f6ac", + "id": "nmdc:8c37ab0b3594cc975348041e4841f6ac", + "file_size_bytes": 1976821836 + }, + { + "name": "Gp0127624_Protein FAA", + "description": "Protein FAA for Gp0127624", + "data_object_type": "Annotation Amino Acid FASTA", + "url": "https://data.microbiomedata.org/data/nmdc:mga0e8jh10/annotation/nmdc_mga0e8jh10_proteins.faa", + "md5_checksum": "40d15cb24063dbb6097fd1626f62db95", + "id": "nmdc:40d15cb24063dbb6097fd1626f62db95", + "file_size_bytes": 55458746 + }, + { + "name": "Gp0127624_Structural annotation GFF file", + "description": "Structural annotation GFF file for Gp0127624", + "data_object_type": "Structural Annotation GFF", + "url": "https://data.microbiomedata.org/data/nmdc:mga0e8jh10/annotation/nmdc_mga0e8jh10_structural_annotation.gff", + "md5_checksum": "f70325438abce4c6f56e6c82619dd44a", + "id": "nmdc:f70325438abce4c6f56e6c82619dd44a", + "file_size_bytes": 2518 + }, + { + "name": "Gp0127624_Functional annotation GFF file", + "description": "Functional annotation GFF file for Gp0127624", + "data_object_type": "Functional Annotation GFF", + "url": "https://data.microbiomedata.org/data/nmdc:mga0e8jh10/annotation/nmdc_mga0e8jh10_functional_annotation.gff", + "md5_checksum": "c5cf33c1f2f68a7c63fef6dd623a97c0", + "id": "nmdc:c5cf33c1f2f68a7c63fef6dd623a97c0", + "file_size_bytes": 63778960 + }, + { + "name": "Gp0127624_KO TSV file", + "description": "KO TSV file for Gp0127624", + "data_object_type": "Annotation KEGG Orthology", + "url": "https://data.microbiomedata.org/data/nmdc:mga0e8jh10/annotation/nmdc_mga0e8jh10_ko.tsv", + "md5_checksum": "4aca66fe81c8c056fa5617c7aa77bc7d", + "id": "nmdc:4aca66fe81c8c056fa5617c7aa77bc7d", + "file_size_bytes": 7252005 + }, + { + "name": "Gp0127624_EC TSV file", + "description": "EC TSV file for Gp0127624", + "data_object_type": "Annotation Enzyme Commission", + "url": "https://data.microbiomedata.org/data/nmdc:mga0e8jh10/annotation/nmdc_mga0e8jh10_ec.tsv", + "md5_checksum": "303a5e88a0eae8942082e9e13f9f6eba", + "id": "nmdc:303a5e88a0eae8942082e9e13f9f6eba", + "file_size_bytes": 4835920 + }, + { + "name": "Gp0127624_COG GFF file", + "description": "COG GFF file for Gp0127624", + "url": "https://data.microbiomedata.org/data/nmdc:mga0e8jh10/annotation/nmdc_mga0e8jh10_cog.gff", + "md5_checksum": "d919f65e54a8351324e332a5daa6a831", + "id": "nmdc:d919f65e54a8351324e332a5daa6a831", + "file_size_bytes": 37494199 + }, + { + "name": "Gp0127624_PFAM GFF file", + "description": "PFAM GFF file for Gp0127624", + "url": "https://data.microbiomedata.org/data/nmdc:mga0e8jh10/annotation/nmdc_mga0e8jh10_pfam.gff", + "md5_checksum": "764c7c2b5554fc6b860b036cab22e0ef", + "id": "nmdc:764c7c2b5554fc6b860b036cab22e0ef", + "file_size_bytes": 27739105 + }, + { + "name": "Gp0127624_TigrFam GFF file", + "description": "TigrFam GFF file for Gp0127624", + "url": "https://data.microbiomedata.org/data/nmdc:mga0e8jh10/annotation/nmdc_mga0e8jh10_tigrfam.gff", + "md5_checksum": "d0a86560767836f901bdd2625bea46e3", + "id": "nmdc:d0a86560767836f901bdd2625bea46e3", + "file_size_bytes": 3077428 + }, + { + "name": "Gp0127624_SMART GFF file", + "description": "SMART GFF file for Gp0127624", + "url": "https://data.microbiomedata.org/data/nmdc:mga0e8jh10/annotation/nmdc_mga0e8jh10_smart.gff", + "md5_checksum": "2f64111072a2b19a726aed9c9f54bba7", + "id": "nmdc:2f64111072a2b19a726aed9c9f54bba7", + "file_size_bytes": 8547849 + }, + { + "name": "Gp0127624_SuperFam GFF file", + "description": "SuperFam GFF file for Gp0127624", + "url": "https://data.microbiomedata.org/data/nmdc:mga0e8jh10/annotation/nmdc_mga0e8jh10_supfam.gff", + "md5_checksum": "51a011777869ff58b977991f5c90fc47", + "id": "nmdc:51a011777869ff58b977991f5c90fc47", + "file_size_bytes": 46844460 + }, + { + "name": "Gp0127624_Cath FunFam GFF file", + "description": "Cath FunFam GFF file for Gp0127624", + "url": "https://data.microbiomedata.org/data/nmdc:mga0e8jh10/annotation/nmdc_mga0e8jh10_cath_funfam.gff", + "md5_checksum": "53f57253df5119d338b9813aa81c7c9b", + "id": "nmdc:53f57253df5119d338b9813aa81c7c9b", + "file_size_bytes": 35558659 + }, + { + "name": "Gp0127624_KO_EC GFF file", + "description": "KO_EC GFF file for Gp0127624", + "url": "https://data.microbiomedata.org/data/nmdc:mga0e8jh10/annotation/nmdc_mga0e8jh10_ko_ec.gff", + "md5_checksum": "c4aa03608fa7442a05cd23fdcc29bc21", + "id": "nmdc:c4aa03608fa7442a05cd23fdcc29bc21", + "file_size_bytes": 23055213 + }, + { + "name": "gold:Gp0452679_metabat2 bin checkm quality assessment result", + "description": "metabat2 bin checkm quality assessment result for gold:Gp0452679", + "data_object_type": "CheckM Statistics", + "url": "https://data.microbiomedata.org/data/nmdc:mga0fsjy84/MAGs/nmdc_mga0fsjy84_checkm_qa.out", + "md5_checksum": "d41d8cd98f00b204e9800998ecf8427e", + "id": "nmdc:d41d8cd98f00b204e9800998ecf8427e", + "file_size_bytes": 0 + }, + { + "name": "Gp0127624_tooShort (< 3kb) filtered contigs fasta file by metaBat2", + "description": "tooShort (< 3kb) filtered contigs fasta file by metaBat2 for Gp0127624", + "url": "https://data.microbiomedata.org/data/nmdc:mga0e8jh10/MAGs/nmdc_mga0e8jh10_bins.tooShort.fa", + "md5_checksum": "73aca2cc587d8a632a730dcc6ff53d3b", + "id": "nmdc:73aca2cc587d8a632a730dcc6ff53d3b", + "file_size_bytes": 79198373 + }, + { + "name": "Gp0127624_unbinned fasta file from metabat2", + "description": "unbinned fasta file from metabat2 for Gp0127624", + "url": "https://data.microbiomedata.org/data/nmdc:mga0e8jh10/MAGs/nmdc_mga0e8jh10_bins.unbinned.fa", + "md5_checksum": "822be4fbeadb0c8c24f4a680d646b62f", + "id": "nmdc:822be4fbeadb0c8c24f4a680d646b62f", + "file_size_bytes": 13854717 + }, + { + "name": "Gp0127624_metabat2 bin checkm quality assessment result", + "description": "metabat2 bin checkm quality assessment result for Gp0127624", + "data_object_type": "CheckM Statistics", + "url": "https://data.microbiomedata.org/data/nmdc:mga0e8jh10/MAGs/nmdc_mga0e8jh10_checkm_qa.out", + "md5_checksum": "6b39bdb404c651428634ad28f8f15e2a", + "id": "nmdc:6b39bdb404c651428634ad28f8f15e2a", + "file_size_bytes": 1106 + }, + { + "name": "Gp0127624_high-quality and medium-quality bins", + "description": "high-quality and medium-quality bins for Gp0127624", + "data_object_type": "Metagenome Bins", + "url": "https://data.microbiomedata.org/data/nmdc:mga0e8jh10/MAGs/nmdc_mga0e8jh10_hqmq_bin.zip", + "md5_checksum": "0bd9d9e5f15087ccd35c38956bb3a210", + "id": "nmdc:0bd9d9e5f15087ccd35c38956bb3a210", + "file_size_bytes": 507790 + }, + { + "name": "Gp0127624_metabat2 bins", + "description": "metabat2 bins for Gp0127624", + "url": "https://data.microbiomedata.org/data/nmdc:mga0e8jh10/MAGs/nmdc_mga0e8jh10_metabat_bin.zip", + "md5_checksum": "2d174febedeca0ce515939dd53d6ccb9", + "id": "nmdc:2d174febedeca0ce515939dd53d6ccb9", + "file_size_bytes": 230699 + } + ], + "dissolving_activity_set": [], + "functional_annotation_set": [], + "genome_feature_set": [], + "mags_activity_set": [ { "_id": { - "$oid": "649b009bff710ae353f8cf2b" + "$oid": "649b0052ec087f6bbab34701" }, "has_input": [ - "nmdc:0db98173ae3395106e24d250b2655f06" + "nmdc:464a9db7a94e7e0646b1ff8b501d82f3", + "nmdc:8c37ab0b3594cc975348041e4841f6ac", + "nmdc:c5cf33c1f2f68a7c63fef6dd623a97c0" + ], + "too_short_contig_num": 182057, + "part_of": [ + "nmdc:mga0e8jh10" ], + "binned_contig_num": 364, "git_url": "https://github.com/microbiomedata/metaG/releases/tag/0.1", "has_output": [ - "nmdc:f4f810491708ff25956cddd005cc9944", - "nmdc:67e3c200d3765733af33d1db1f4bf968", - "nmdc:26cd6390e8362da2ee1d7691360d2dfb", - "nmdc:80fe705d97ef4a0701b1320e9ba19a82", - "nmdc:6a216ec913587e26ddc036b703126d76", - "nmdc:ebed7286f886596764a66a0d1dac3e43", - "nmdc:80dd3584d257e8f84b59118ffd0d5e21", - "nmdc:61b5fe5664ca99f6354c7a5a0222678c", - "nmdc:81108175d5ef2ca158f516bfc75d3cd9" + "nmdc:d41d8cd98f00b204e9800998ecf8427e", + "nmdc:73aca2cc587d8a632a730dcc6ff53d3b", + "nmdc:822be4fbeadb0c8c24f4a680d646b62f", + "nmdc:6b39bdb404c651428634ad28f8f15e2a", + "nmdc:0bd9d9e5f15087ccd35c38956bb3a210", + "nmdc:2d174febedeca0ce515939dd53d6ccb9" ], - "was_informed_by": "gold:Gp0127629", - "id": "nmdc:b82754c2c692809f9e59ff9824278c32", + "was_informed_by": "gold:Gp0127624", + "input_contig_num": 191010, + "id": "nmdc:20efb77f7b08a72b0c887c059686b7cf", "execution_resource": "NERSC-Cori", - "name": "ReadBased Analysis Activity for nmdc:mga071r920", - "started_at_time": "2021-10-11T02:23:35Z", - "type": "nmdc:ReadbasedAnalysis", - "ended_at_time": "2021-10-11T03:33:33+00:00", - "output_data_objects": [ - { - "name": "Gp0127629_Gottcha2 TSV report", - "description": "Gottcha2 TSV report for Gp0127629", - "url": "https://data.microbiomedata.org/data/nmdc:mga071r920/ReadbasedAnalysis/nmdc_mga071r920_gottcha2_report.tsv", - "md5_checksum": "f4f810491708ff25956cddd005cc9944", - "id": "nmdc:f4f810491708ff25956cddd005cc9944", - "file_size_bytes": 1206 - }, - { - "name": "Gp0127629_Gottcha2 full TSV report", - "description": "Gottcha2 full TSV report for Gp0127629", - "url": "https://data.microbiomedata.org/data/nmdc:mga071r920/ReadbasedAnalysis/nmdc_mga071r920_gottcha2_report_full.tsv", - "md5_checksum": "67e3c200d3765733af33d1db1f4bf968", - "id": "nmdc:67e3c200d3765733af33d1db1f4bf968", - "file_size_bytes": 662074 - }, - { - "name": "Gp0127629_Gottcha2 Krona HTML report", - "description": "Gottcha2 Krona HTML report for Gp0127629", - "data_object_type": "GOTTCHA2 Krona Plot", - "url": "https://data.microbiomedata.org/data/nmdc:mga071r920/ReadbasedAnalysis/nmdc_mga071r920_gottcha2_krona.html", - "md5_checksum": "26cd6390e8362da2ee1d7691360d2dfb", - "id": "nmdc:26cd6390e8362da2ee1d7691360d2dfb", - "file_size_bytes": 229307 - }, - { - "name": "Gp0127629_Centrifuge classification TSV report", - "description": "Centrifuge classification TSV report for Gp0127629", - "data_object_type": "Centrifuge Taxonomic Classification", - "url": "https://data.microbiomedata.org/data/nmdc:mga071r920/ReadbasedAnalysis/nmdc_mga071r920_centrifuge_classification.tsv", - "md5_checksum": "80fe705d97ef4a0701b1320e9ba19a82", - "id": "nmdc:80fe705d97ef4a0701b1320e9ba19a82", - "file_size_bytes": 1667543500 - }, - { - "name": "Gp0127629_Centrifuge TSV report", - "description": "Centrifuge TSV report for Gp0127629", - "data_object_type": "Centrifuge Classification Report", - "url": "https://data.microbiomedata.org/data/nmdc:mga071r920/ReadbasedAnalysis/nmdc_mga071r920_centrifuge_report.tsv", - "md5_checksum": "6a216ec913587e26ddc036b703126d76", - "id": "nmdc:6a216ec913587e26ddc036b703126d76", - "file_size_bytes": 253079 - }, - { - "name": "Gp0127629_Centrifuge Krona HTML report", - "description": "Centrifuge Krona HTML report for Gp0127629", - "data_object_type": "Centrifuge Krona Plot", - "url": "https://data.microbiomedata.org/data/nmdc:mga071r920/ReadbasedAnalysis/nmdc_mga071r920_centrifuge_krona.html", - "md5_checksum": "ebed7286f886596764a66a0d1dac3e43", - "id": "nmdc:ebed7286f886596764a66a0d1dac3e43", - "file_size_bytes": 2326900 - }, + "name": "MAGs Analysis Activity for nmdc:mga0e8jh10", + "mags_list": [ { - "name": "Gp0127629_Kraken classification TSV report", - "description": "Kraken classification TSV report for Gp0127629", - "data_object_type": "Kraken2 Taxonomic Classification", - "url": "https://data.microbiomedata.org/data/nmdc:mga071r920/ReadbasedAnalysis/nmdc_mga071r920_kraken2_classification.tsv", - "md5_checksum": "80dd3584d257e8f84b59118ffd0d5e21", - "id": "nmdc:80dd3584d257e8f84b59118ffd0d5e21", - "file_size_bytes": 1328025421 + "number_of_contig": 69, + "completeness": 11.21, + "bin_name": "bins.1", + "gene_count": 328, + "bin_quality": "LQ", + "gtdbtk_species": "", + "gtdbtk_order": "", + "num_16s": 0, + "gtdbtk_family": "", + "gtdbtk_domain": "", + "contamination": 0.0, + "gtdbtk_class": "", + "gtdbtk_phylum": "", + "num_5s": 0, + "num_23s": 0, + "gtdbtk_genus": "", + "num_t_rna": 8 }, { - "name": "Gp0127629_Kraken2 TSV report", - "description": "Kraken2 TSV report for Gp0127629", - "data_object_type": "Kraken2 Classification Report", - "url": "https://data.microbiomedata.org/data/nmdc:mga071r920/ReadbasedAnalysis/nmdc_mga071r920_kraken2_report.tsv", - "md5_checksum": "61b5fe5664ca99f6354c7a5a0222678c", - "id": "nmdc:61b5fe5664ca99f6354c7a5a0222678c", - "file_size_bytes": 628969 + "number_of_contig": 194, + "completeness": 75.24, + "bin_name": "bins.2", + "gene_count": 2023, + "bin_quality": "MQ", + "gtdbtk_species": "", + "gtdbtk_order": "Nitrososphaerales", + "num_16s": 0, + "gtdbtk_family": "Nitrososphaeraceae", + "gtdbtk_domain": "Archaea", + "contamination": 1.78, + "gtdbtk_class": "Nitrososphaeria", + "gtdbtk_phylum": "Crenarchaeota", + "num_5s": 1, + "num_23s": 0, + "gtdbtk_genus": "", + "num_t_rna": 35 }, { - "name": "Gp0127629_Kraken2 Krona HTML report", - "description": "Kraken2 Krona HTML report for Gp0127629", - "data_object_type": "Kraken2 Krona Plot", - "url": "https://data.microbiomedata.org/data/nmdc:mga071r920/ReadbasedAnalysis/nmdc_mga071r920_kraken2_krona.html", - "md5_checksum": "81108175d5ef2ca158f516bfc75d3cd9", - "id": "nmdc:81108175d5ef2ca158f516bfc75d3cd9", - "file_size_bytes": 3933712 + "number_of_contig": 101, + "completeness": 19.54, + "bin_name": "bins.3", + "gene_count": 585, + "bin_quality": "LQ", + "gtdbtk_species": "", + "gtdbtk_order": "", + "num_16s": 0, + "gtdbtk_family": "", + "gtdbtk_domain": "", + "contamination": 0.0, + "gtdbtk_class": "", + "gtdbtk_phylum": "", + "num_5s": 0, + "num_23s": 0, + "gtdbtk_genus": "", + "num_t_rna": 10 } - ] - }, + ], + "unbinned_contig_num": 8589, + "started_at_time": "2021-10-11T02:23:30Z", + "type": "nmdc:MAGsAnalysisActivity", + "ended_at_time": "2021-10-11T03:30:59+00:00" + } + ], + "material_sample_set": [], + "material_sampling_activity_set": [], + "metabolomics_analysis_activity_set": [], + "metagenome_annotation_activity_set": [ { "_id": { - "$oid": "61e7195d833bcf838a70058b" + "$oid": "649b005bbf2caae0415ef9a2" }, "has_input": [ - "nmdc:0db98173ae3395106e24d250b2655f06" + "nmdc:464a9db7a94e7e0646b1ff8b501d82f3" ], "part_of": [ - "nmdc:mga071r920" + "nmdc:mga0e8jh10" ], "git_url": "https://github.com/microbiomedata/metaG/releases/tag/0.1", "has_output": [ - "nmdc:f4f810491708ff25956cddd005cc9944", - "nmdc:67e3c200d3765733af33d1db1f4bf968", - "nmdc:26cd6390e8362da2ee1d7691360d2dfb", - "nmdc:80fe705d97ef4a0701b1320e9ba19a82", - "nmdc:6a216ec913587e26ddc036b703126d76", - "nmdc:ebed7286f886596764a66a0d1dac3e43", - "nmdc:80dd3584d257e8f84b59118ffd0d5e21", - "nmdc:61b5fe5664ca99f6354c7a5a0222678c", - "nmdc:81108175d5ef2ca158f516bfc75d3cd9" - ], - "was_informed_by": "gold:Gp0127629", - "id": "nmdc:b82754c2c692809f9e59ff9824278c32", - "execution_resource": "NERSC-Cori", - "name": "ReadBased Analysis Activity for nmdc:mga071r920", - "started_at_time": "2021-10-11T02:23:35Z", - "type": "nmdc:ReadbasedAnalysis", - "ended_at_time": "2021-10-11T03:33:33+00:00", - "output_data_objects": [ - { - "name": "Gp0127629_Gottcha2 TSV report", - "description": "Gottcha2 TSV report for Gp0127629", - "url": "https://data.microbiomedata.org/data/nmdc:mga071r920/ReadbasedAnalysis/nmdc_mga071r920_gottcha2_report.tsv", - "md5_checksum": "f4f810491708ff25956cddd005cc9944", - "id": "nmdc:f4f810491708ff25956cddd005cc9944", - "file_size_bytes": 1206 - }, - { - "name": "Gp0127629_Gottcha2 full TSV report", - "description": "Gottcha2 full TSV report for Gp0127629", - "url": "https://data.microbiomedata.org/data/nmdc:mga071r920/ReadbasedAnalysis/nmdc_mga071r920_gottcha2_report_full.tsv", - "md5_checksum": "67e3c200d3765733af33d1db1f4bf968", - "id": "nmdc:67e3c200d3765733af33d1db1f4bf968", - "file_size_bytes": 662074 - }, - { - "name": "Gp0127629_Gottcha2 Krona HTML report", - "description": "Gottcha2 Krona HTML report for Gp0127629", - "data_object_type": "GOTTCHA2 Krona Plot", - "url": "https://data.microbiomedata.org/data/nmdc:mga071r920/ReadbasedAnalysis/nmdc_mga071r920_gottcha2_krona.html", - "md5_checksum": "26cd6390e8362da2ee1d7691360d2dfb", - "id": "nmdc:26cd6390e8362da2ee1d7691360d2dfb", - "file_size_bytes": 229307 - }, - { - "name": "Gp0127629_Centrifuge classification TSV report", - "description": "Centrifuge classification TSV report for Gp0127629", - "data_object_type": "Centrifuge Taxonomic Classification", - "url": "https://data.microbiomedata.org/data/nmdc:mga071r920/ReadbasedAnalysis/nmdc_mga071r920_centrifuge_classification.tsv", - "md5_checksum": "80fe705d97ef4a0701b1320e9ba19a82", - "id": "nmdc:80fe705d97ef4a0701b1320e9ba19a82", - "file_size_bytes": 1667543500 - }, - { - "name": "Gp0127629_Centrifuge TSV report", - "description": "Centrifuge TSV report for Gp0127629", - "data_object_type": "Centrifuge Classification Report", - "url": "https://data.microbiomedata.org/data/nmdc:mga071r920/ReadbasedAnalysis/nmdc_mga071r920_centrifuge_report.tsv", - "md5_checksum": "6a216ec913587e26ddc036b703126d76", - "id": "nmdc:6a216ec913587e26ddc036b703126d76", - "file_size_bytes": 253079 - }, - { - "name": "Gp0127629_Centrifuge Krona HTML report", - "description": "Centrifuge Krona HTML report for Gp0127629", - "data_object_type": "Centrifuge Krona Plot", - "url": "https://data.microbiomedata.org/data/nmdc:mga071r920/ReadbasedAnalysis/nmdc_mga071r920_centrifuge_krona.html", - "md5_checksum": "ebed7286f886596764a66a0d1dac3e43", - "id": "nmdc:ebed7286f886596764a66a0d1dac3e43", - "file_size_bytes": 2326900 - }, - { - "name": "Gp0127629_Kraken classification TSV report", - "description": "Kraken classification TSV report for Gp0127629", - "data_object_type": "Kraken2 Taxonomic Classification", - "url": "https://data.microbiomedata.org/data/nmdc:mga071r920/ReadbasedAnalysis/nmdc_mga071r920_kraken2_classification.tsv", - "md5_checksum": "80dd3584d257e8f84b59118ffd0d5e21", - "id": "nmdc:80dd3584d257e8f84b59118ffd0d5e21", - "file_size_bytes": 1328025421 - }, - { - "name": "Gp0127629_Kraken2 TSV report", - "description": "Kraken2 TSV report for Gp0127629", - "data_object_type": "Kraken2 Classification Report", - "url": "https://data.microbiomedata.org/data/nmdc:mga071r920/ReadbasedAnalysis/nmdc_mga071r920_kraken2_report.tsv", - "md5_checksum": "61b5fe5664ca99f6354c7a5a0222678c", - "id": "nmdc:61b5fe5664ca99f6354c7a5a0222678c", - "file_size_bytes": 628969 - }, - { - "name": "Gp0127629_Kraken2 Krona HTML report", - "description": "Kraken2 Krona HTML report for Gp0127629", - "data_object_type": "Kraken2 Krona Plot", - "url": "https://data.microbiomedata.org/data/nmdc:mga071r920/ReadbasedAnalysis/nmdc_mga071r920_kraken2_krona.html", - "md5_checksum": "81108175d5ef2ca158f516bfc75d3cd9", - "id": "nmdc:81108175d5ef2ca158f516bfc75d3cd9", - "file_size_bytes": 3933712 - } - ] - }, + "nmdc:40d15cb24063dbb6097fd1626f62db95", + "nmdc:f70325438abce4c6f56e6c82619dd44a", + "nmdc:c5cf33c1f2f68a7c63fef6dd623a97c0", + "nmdc:4aca66fe81c8c056fa5617c7aa77bc7d", + "nmdc:303a5e88a0eae8942082e9e13f9f6eba", + "nmdc:d919f65e54a8351324e332a5daa6a831", + "nmdc:764c7c2b5554fc6b860b036cab22e0ef", + "nmdc:d0a86560767836f901bdd2625bea46e3", + "nmdc:2f64111072a2b19a726aed9c9f54bba7", + "nmdc:51a011777869ff58b977991f5c90fc47", + "nmdc:53f57253df5119d338b9813aa81c7c9b", + "nmdc:c4aa03608fa7442a05cd23fdcc29bc21" + ], + "was_informed_by": "gold:Gp0127624", + "id": "nmdc:20efb77f7b08a72b0c887c059686b7cf", + "execution_resource": "NERSC-Cori", + "name": "Annotation Activity for nmdc:mga0e8jh10", + "started_at_time": "2021-10-11T02:23:30Z", + "type": "nmdc:MetagenomeAnnotationActivity", + "ended_at_time": "2021-10-11T03:30:59+00:00" + } + ], + "metagenome_assembly_set": [ { "_id": { - "$oid": "649b005f2ca5ee4adb139f9a" + "$oid": "649b005f2ca5ee4adb139f8d" }, "has_input": [ - "nmdc:0db98173ae3395106e24d250b2655f06" + "nmdc:8585f6896702bddf64b02191be5921f4" ], "part_of": [ - "nmdc:mga071r920" + "nmdc:mga0e8jh10" ], - "ctg_logsum": 212258, - "scaf_logsum": 212917, - "gap_pct": 0.00151, + "ctg_logsum": 174168, + "scaf_logsum": 174680, + "gap_pct": 0.0009, "git_url": "https://github.com/microbiomedata/metaG/releases/tag/0.1", "has_output": [ - "nmdc:7badcefc26b24213b514cd4c3c9a87d7", - "nmdc:89dd3c10791083ae5a5b30c2154deabd", - "nmdc:5e503e3abe6eb9e94c34a55da5bbafdc", - "nmdc:0a1f96cd74ec9f1a6668924745689014", - "nmdc:1608f12840c36ac1d882cc6ef4f4627f" + "nmdc:464a9db7a94e7e0646b1ff8b501d82f3", + "nmdc:0a50f88775f36e9238152f3319252853", + "nmdc:f0dc2f598fa06efbe99843bddaf54f60", + "nmdc:a4405d49e8efe2ee124d25e2414de56c", + "nmdc:8c37ab0b3594cc975348041e4841f6ac" ], - "asm_score": 3.305, - "was_informed_by": "gold:Gp0127629", - "ctg_powsum": 22751, - "scaf_max": 23996, - "id": "nmdc:b82754c2c692809f9e59ff9824278c32", - "scaf_powsum": 22826, + "asm_score": 5.95, + "was_informed_by": "gold:Gp0127624", + "ctg_powsum": 19404, + "scaf_max": 33408, + "id": "nmdc:20efb77f7b08a72b0c887c059686b7cf", + "scaf_powsum": 19462, "execution_resource": "NERSC-Cori", - "contigs": 208553, - "name": "Assembly Activity for nmdc:mga071r920", - "ctg_max": 23996, - "gc_std": 0.1053, - "contig_bp": 101011771, - "gc_avg": 0.62056, - "started_at_time": "2021-10-11T02:23:35Z", - "scaf_bp": 101013301, + "contigs": 191010, + "name": "Assembly Activity for nmdc:mga0e8jh10", + "ctg_max": 33408, + "gc_std": 0.09154, + "contig_bp": 88102698, + "gc_avg": 0.62452, + "started_at_time": "2021-10-11T02:23:30Z", + "scaf_bp": 88103488, "type": "nmdc:MetagenomeAssembly", - "scaffolds": 208427, - "ended_at_time": "2021-10-11T03:33:33+00:00", - "ctg_l50": 478, - "ctg_l90": 290, - "ctg_n50": 59884, - "ctg_n90": 174522, - "scaf_l50": 478, - "scaf_l90": 290, - "scaf_n50": 59864, - "scaf_n90": 174416, - "output_data_objects": [ - { - "name": "Gp0127629_Assembled contigs fasta", - "description": "Assembled contigs fasta for Gp0127629", - "data_object_type": "Assembly Contigs", - "url": "https://data.microbiomedata.org/data/nmdc:mga071r920/assembly/nmdc_mga071r920_contigs.fna", - "md5_checksum": "7badcefc26b24213b514cd4c3c9a87d7", - "id": "nmdc:7badcefc26b24213b514cd4c3c9a87d7", - "file_size_bytes": 109144090 - }, - { - "name": "Gp0127629_Assembled scaffold fasta", - "description": "Assembled scaffold fasta for Gp0127629", - "data_object_type": "Assembly Scaffolds", - "url": "https://data.microbiomedata.org/data/nmdc:mga071r920/assembly/nmdc_mga071r920_scaffolds.fna", - "md5_checksum": "89dd3c10791083ae5a5b30c2154deabd", - "id": "nmdc:89dd3c10791083ae5a5b30c2154deabd", - "file_size_bytes": 108517023 - }, - { - "name": "Gp0127629_Metagenome Contig Coverage Stats", - "description": "Metagenome Contig Coverage Stats for Gp0127629", - "url": "https://data.microbiomedata.org/data/nmdc:mga071r920/assembly/nmdc_mga071r920_covstats.txt", - "md5_checksum": "5e503e3abe6eb9e94c34a55da5bbafdc", - "id": "nmdc:5e503e3abe6eb9e94c34a55da5bbafdc", - "file_size_bytes": 16536925 - }, - { - "name": "Gp0127629_Assembled AGP file", - "description": "Assembled AGP file for Gp0127629", - "data_object_type": "Assembly AGP", - "url": "https://data.microbiomedata.org/data/nmdc:mga071r920/assembly/nmdc_mga071r920_assembly.agp", - "md5_checksum": "0a1f96cd74ec9f1a6668924745689014", - "id": "nmdc:0a1f96cd74ec9f1a6668924745689014", - "file_size_bytes": 15454045 - }, - { - "name": "Gp0127629_Metagenome Alignment BAM file", - "description": "Metagenome Alignment BAM file for Gp0127629", - "data_object_type": "Assembly Coverage BAM", - "url": "https://data.microbiomedata.org/data/nmdc:mga071r920/assembly/nmdc_mga071r920_pairedMapped_sorted.bam", - "md5_checksum": "1608f12840c36ac1d882cc6ef4f4627f", - "id": "nmdc:1608f12840c36ac1d882cc6ef4f4627f", - "file_size_bytes": 2001264626 - } + "scaffolds": 190940, + "ended_at_time": "2021-10-11T03:30:59+00:00", + "ctg_l50": 434, + "ctg_l90": 288, + "ctg_n50": 56361, + "ctg_n90": 162547, + "scaf_l50": 434, + "scaf_l90": 288, + "scaf_n50": 56334, + "scaf_n90": 162481 + } + ], + "metagenome_sequencing_activity_set": [], + "metaproteomics_analysis_activity_set": [], + "metatranscriptome_activity_set": [], + "nom_analysis_activity_set": [], + "omics_processing_set": [ + { + "_id": { + "$oid": "649b009773e8249959349b3d" + }, + "id": "nmdc:omprc-11-x0es2p18", + "name": "Riverbed sediment microbial communities from areas with no vegetation in Columbia River, Washington, USA - GW-RW N3_10_20", + "description": "Riverbed sediment samples were collected from an area with no vegetation in a hyporheic zone (subsurface groundwater - surface water mixing zone)", + "has_input": [ + "nmdc:bsm-11-msqbhe76" + ], + "has_output": [ + "jgi:574fde577ded5e3df1ee13fc" + ], + "part_of": [ + "nmdc:sty-11-aygzgv51" + ], + "add_date": "2016-01-11", + "mod_date": "2021-06-15", + "ncbi_project_name": "Riverbed sediment microbial communities from areas with no vegetation in Columbia River, Washington, USA - GW-RW N3_10_20", + "omics_type": { + "has_raw_value": "Metagenome" + }, + "principal_investigator": { + "has_raw_value": "James Stegen" + }, + "processing_institution": "JGI", + "type": "nmdc:OmicsProcessing", + "gold_sequencing_project_identifiers": [ + "gold:Gp0127624" ] - }, + } + ], + "reaction_activity_set": [], + "read_qc_analysis_activity_set": [ { "_id": { - "$oid": "649b005bbf2caae0415ef9af" + "$oid": "649b009d6bdd4fd20273c85b" }, "has_input": [ - "nmdc:7badcefc26b24213b514cd4c3c9a87d7" + "nmdc:e24b00c4de7a24629f5933940070e06c" ], "part_of": [ - "nmdc:mga071r920" + "nmdc:mga0e8jh10" ], "git_url": "https://github.com/microbiomedata/metaG/releases/tag/0.1", "has_output": [ - "nmdc:ba15f54043fad473edec771b60f5b040", - "nmdc:f6d684abab1c60b2b95ade84644e6a38", - "nmdc:496e0fa5ac1c04849338c972189ee3f6", - "nmdc:311ffbbfc80f28908615a1f18492ae5e", - "nmdc:7d90dec4cdc8c8e8a4bc8f7bddf2e0c2", - "nmdc:1116328ed7ba951246f0eec1d3f065b4", - "nmdc:325e47bc009aeba79fc767e3b6daeee2", - "nmdc:f820db8ce6a1ae7c3e8af40729f5b62b", - "nmdc:96ab6fa258a08490082b4f99269f3e8d", - "nmdc:a2b630c408bd557d693b147f95627fdc", - "nmdc:ba87cd24242288e0b6d8f32a2bcbbb80", - "nmdc:9c97bd7a5e4978e31ed1e5386c3619f3" + "nmdc:8585f6896702bddf64b02191be5921f4", + "nmdc:b9b6464ecc746a4cc39b549696c5fe9c" ], - "was_informed_by": "gold:Gp0127629", - "id": "nmdc:b82754c2c692809f9e59ff9824278c32", + "was_informed_by": "gold:Gp0127624", + "input_read_count": 25674112, + "output_read_bases": 3361311014, + "id": "nmdc:20efb77f7b08a72b0c887c059686b7cf", "execution_resource": "NERSC-Cori", - "name": "Annotation Activity for nmdc:mga071r920", - "started_at_time": "2021-10-11T02:23:35Z", - "type": "nmdc:MetagenomeAnnotationActivity", - "ended_at_time": "2021-10-11T03:33:33+00:00", - "output_data_objects": [ - { - "name": "Gp0127629_Protein FAA", - "description": "Protein FAA for Gp0127629", - "data_object_type": "Annotation Amino Acid FASTA", - "url": "https://data.microbiomedata.org/data/nmdc:mga071r920/annotation/nmdc_mga071r920_proteins.faa", - "md5_checksum": "ba15f54043fad473edec771b60f5b040", - "id": "nmdc:ba15f54043fad473edec771b60f5b040", - "file_size_bytes": 62222526 - }, - { - "name": "Gp0127629_Structural annotation GFF file", - "description": "Structural annotation GFF file for Gp0127629", - "data_object_type": "Structural Annotation GFF", - "url": "https://data.microbiomedata.org/data/nmdc:mga071r920/annotation/nmdc_mga071r920_structural_annotation.gff", - "md5_checksum": "f6d684abab1c60b2b95ade84644e6a38", - "id": "nmdc:f6d684abab1c60b2b95ade84644e6a38", - "file_size_bytes": 2521 - }, - { - "name": "Gp0127629_Functional annotation GFF file", - "description": "Functional annotation GFF file for Gp0127629", - "data_object_type": "Functional Annotation GFF", - "url": "https://data.microbiomedata.org/data/nmdc:mga071r920/annotation/nmdc_mga071r920_functional_annotation.gff", - "md5_checksum": "496e0fa5ac1c04849338c972189ee3f6", - "id": "nmdc:496e0fa5ac1c04849338c972189ee3f6", - "file_size_bytes": 70803412 - }, - { - "name": "Gp0127629_KO TSV file", - "description": "KO TSV file for Gp0127629", - "data_object_type": "Annotation KEGG Orthology", - "url": "https://data.microbiomedata.org/data/nmdc:mga071r920/annotation/nmdc_mga071r920_ko.tsv", - "md5_checksum": "311ffbbfc80f28908615a1f18492ae5e", - "id": "nmdc:311ffbbfc80f28908615a1f18492ae5e", - "file_size_bytes": 8203743 - }, - { - "name": "Gp0127629_EC TSV file", - "description": "EC TSV file for Gp0127629", - "data_object_type": "Annotation Enzyme Commission", - "url": "https://data.microbiomedata.org/data/nmdc:mga071r920/annotation/nmdc_mga071r920_ec.tsv", - "md5_checksum": "7d90dec4cdc8c8e8a4bc8f7bddf2e0c2", - "id": "nmdc:7d90dec4cdc8c8e8a4bc8f7bddf2e0c2", - "file_size_bytes": 5508974 - }, - { - "name": "Gp0127629_COG GFF file", - "description": "COG GFF file for Gp0127629", - "url": "https://data.microbiomedata.org/data/nmdc:mga071r920/annotation/nmdc_mga071r920_cog.gff", - "md5_checksum": "1116328ed7ba951246f0eec1d3f065b4", - "id": "nmdc:1116328ed7ba951246f0eec1d3f065b4", - "file_size_bytes": 42250648 - }, - { - "name": "Gp0127629_PFAM GFF file", - "description": "PFAM GFF file for Gp0127629", - "url": "https://data.microbiomedata.org/data/nmdc:mga071r920/annotation/nmdc_mga071r920_pfam.gff", - "md5_checksum": "325e47bc009aeba79fc767e3b6daeee2", - "id": "nmdc:325e47bc009aeba79fc767e3b6daeee2", - "file_size_bytes": 31677996 - }, - { - "name": "Gp0127629_TigrFam GFF file", - "description": "TigrFam GFF file for Gp0127629", - "url": "https://data.microbiomedata.org/data/nmdc:mga071r920/annotation/nmdc_mga071r920_tigrfam.gff", - "md5_checksum": "f820db8ce6a1ae7c3e8af40729f5b62b", - "id": "nmdc:f820db8ce6a1ae7c3e8af40729f5b62b", - "file_size_bytes": 3472661 - }, - { - "name": "Gp0127629_SMART GFF file", - "description": "SMART GFF file for Gp0127629", - "url": "https://data.microbiomedata.org/data/nmdc:mga071r920/annotation/nmdc_mga071r920_smart.gff", - "md5_checksum": "96ab6fa258a08490082b4f99269f3e8d", - "id": "nmdc:96ab6fa258a08490082b4f99269f3e8d", - "file_size_bytes": 9149681 - }, - { - "name": "Gp0127629_SuperFam GFF file", - "description": "SuperFam GFF file for Gp0127629", - "url": "https://data.microbiomedata.org/data/nmdc:mga071r920/annotation/nmdc_mga071r920_supfam.gff", - "md5_checksum": "a2b630c408bd557d693b147f95627fdc", - "id": "nmdc:a2b630c408bd557d693b147f95627fdc", - "file_size_bytes": 52308332 - }, - { - "name": "Gp0127629_Cath FunFam GFF file", - "description": "Cath FunFam GFF file for Gp0127629", - "url": "https://data.microbiomedata.org/data/nmdc:mga071r920/annotation/nmdc_mga071r920_cath_funfam.gff", - "md5_checksum": "ba87cd24242288e0b6d8f32a2bcbbb80", - "id": "nmdc:ba87cd24242288e0b6d8f32a2bcbbb80", - "file_size_bytes": 39926818 - }, - { - "name": "Gp0127629_KO_EC GFF file", - "description": "KO_EC GFF file for Gp0127629", - "url": "https://data.microbiomedata.org/data/nmdc:mga071r920/annotation/nmdc_mga071r920_ko_ec.gff", - "md5_checksum": "9c97bd7a5e4978e31ed1e5386c3619f3", - "id": "nmdc:9c97bd7a5e4978e31ed1e5386c3619f3", - "file_size_bytes": 26101397 - } - ] - }, + "input_read_bases": 3876790912, + "name": "Read QC Activity for nmdc:mga0e8jh10", + "output_read_count": 22503352, + "started_at_time": "2021-10-11T02:23:30Z", + "type": "nmdc:ReadQCAnalysisActivity", + "ended_at_time": "2021-10-11T03:30:59+00:00" + } + ], + "read_based_taxonomy_analysis_activity_set": [ { "_id": { - "$oid": "649b0052ec087f6bbab3470c" + "$oid": "649b009bff710ae353f8cf1c" }, "has_input": [ - "nmdc:7badcefc26b24213b514cd4c3c9a87d7", - "nmdc:1608f12840c36ac1d882cc6ef4f4627f", - "nmdc:496e0fa5ac1c04849338c972189ee3f6" + "nmdc:8585f6896702bddf64b02191be5921f4" + ], + "git_url": "https://github.com/microbiomedata/metaG/releases/tag/0.1", + "has_output": [ + "nmdc:fef871a81032dd1f3e57dc1c7d5aa3db", + "nmdc:6c7fec765f2a225f168ebb1f69961013", + "nmdc:6e660d5a062f9c3ad7b49d8d438453d7", + "nmdc:77db34862804280185d3b1ce961e5338", + "nmdc:84e3efb84d961d189ece310911ccf475", + "nmdc:b8fd31679921f8b68c80917e14caa260", + "nmdc:715c66c69b621478da7d48481f3cbd1d", + "nmdc:0781e8042688219035efafe7d75858d0", + "nmdc:85547ab860ef9d6877ba7abc8881740a" + ], + "was_informed_by": "gold:Gp0127624", + "id": "nmdc:20efb77f7b08a72b0c887c059686b7cf", + "execution_resource": "NERSC-Cori", + "name": "ReadBased Analysis Activity for nmdc:mga0e8jh10", + "started_at_time": "2021-10-11T02:23:30Z", + "type": "nmdc:ReadbasedAnalysis", + "ended_at_time": "2021-10-11T03:30:59+00:00" + } + ], + "study_set": [], + "field_research_site_set": [], + "collecting_biosamples_from_site_set": [], + "date_created": null, + "etl_software_version": null, + "pooling_set": [], + "read_based_analysis_activity_set": [ + { + "_id": { + "$oid": "61e7191b833bcf838a6ff905" + }, + "has_input": [ + "nmdc:8585f6896702bddf64b02191be5921f4" ], - "too_short_contig_num": 195955, "part_of": [ - "nmdc:mga071r920" + "nmdc:mga0e8jh10" ], - "binned_contig_num": 271, "git_url": "https://github.com/microbiomedata/metaG/releases/tag/0.1", "has_output": [ - "nmdc:d41d8cd98f00b204e9800998ecf8427e", - "nmdc:2bbd475ff6a15058b38244e71456024a", - "nmdc:70901a70c06fdcfc71efa2d004e210fd", - "nmdc:d52b4ae6b61161082fee7d42ecf5ee87", - "nmdc:58d9cd30ca53424cd0f1ce27d0a8a885", - "nmdc:8f4f5294de942734837fba3d68ffc6b4" + "nmdc:fef871a81032dd1f3e57dc1c7d5aa3db", + "nmdc:6c7fec765f2a225f168ebb1f69961013", + "nmdc:6e660d5a062f9c3ad7b49d8d438453d7", + "nmdc:77db34862804280185d3b1ce961e5338", + "nmdc:84e3efb84d961d189ece310911ccf475", + "nmdc:b8fd31679921f8b68c80917e14caa260", + "nmdc:715c66c69b621478da7d48481f3cbd1d", + "nmdc:0781e8042688219035efafe7d75858d0", + "nmdc:85547ab860ef9d6877ba7abc8881740a" ], - "was_informed_by": "gold:Gp0127629", - "input_contig_num": 208551, + "was_informed_by": "gold:Gp0127624", + "id": "nmdc:20efb77f7b08a72b0c887c059686b7cf", + "execution_resource": "NERSC-Cori", + "name": "ReadBased Analysis Activity for nmdc:mga0e8jh10", + "started_at_time": "2021-10-11T02:23:30Z", + "type": "nmdc:ReadbasedAnalysis", + "ended_at_time": "2021-10-11T03:30:59+00:00" + } + ] + }, + { + "functional_annotation_agg": [], + "library_preparation_set": [], + "processed_sample_set": [], + "extraction_set": [], + "activity_set": [], + "biosample_set": [], + "data_object_set": [ + { + "name": "Gp0127629_Filtered Reads", + "description": "Filtered Reads for Gp0127629", + "data_object_type": "Filtered Sequencing Reads", + "url": "https://data.microbiomedata.org/data/nmdc:mga071r920/qa/nmdc_mga071r920_filtered.fastq.gz", + "md5_checksum": "0db98173ae3395106e24d250b2655f06", + "id": "nmdc:0db98173ae3395106e24d250b2655f06", + "file_size_bytes": 1807840952 + }, + { + "name": "Gp0127629_Filtered Stats", + "description": "Filtered Stats for Gp0127629", + "data_object_type": "QC Statistics", + "url": "https://data.microbiomedata.org/data/nmdc:mga071r920/qa/nmdc_mga071r920_filterStats.txt", + "md5_checksum": "bc0874c01bbd31c644cd598e2fdad3c4", + "id": "nmdc:bc0874c01bbd31c644cd598e2fdad3c4", + "file_size_bytes": 284 + }, + { + "name": "Gp0127629_Gottcha2 TSV report", + "description": "Gottcha2 TSV report for Gp0127629", + "url": "https://data.microbiomedata.org/data/nmdc:mga071r920/ReadbasedAnalysis/nmdc_mga071r920_gottcha2_report.tsv", + "md5_checksum": "f4f810491708ff25956cddd005cc9944", + "id": "nmdc:f4f810491708ff25956cddd005cc9944", + "file_size_bytes": 1206 + }, + { + "name": "Gp0127629_Gottcha2 full TSV report", + "description": "Gottcha2 full TSV report for Gp0127629", + "url": "https://data.microbiomedata.org/data/nmdc:mga071r920/ReadbasedAnalysis/nmdc_mga071r920_gottcha2_report_full.tsv", + "md5_checksum": "67e3c200d3765733af33d1db1f4bf968", + "id": "nmdc:67e3c200d3765733af33d1db1f4bf968", + "file_size_bytes": 662074 + }, + { + "name": "Gp0127629_Gottcha2 Krona HTML report", + "description": "Gottcha2 Krona HTML report for Gp0127629", + "data_object_type": "GOTTCHA2 Krona Plot", + "url": "https://data.microbiomedata.org/data/nmdc:mga071r920/ReadbasedAnalysis/nmdc_mga071r920_gottcha2_krona.html", + "md5_checksum": "26cd6390e8362da2ee1d7691360d2dfb", + "id": "nmdc:26cd6390e8362da2ee1d7691360d2dfb", + "file_size_bytes": 229307 + }, + { + "name": "Gp0127629_Centrifuge classification TSV report", + "description": "Centrifuge classification TSV report for Gp0127629", + "data_object_type": "Centrifuge Taxonomic Classification", + "url": "https://data.microbiomedata.org/data/nmdc:mga071r920/ReadbasedAnalysis/nmdc_mga071r920_centrifuge_classification.tsv", + "md5_checksum": "80fe705d97ef4a0701b1320e9ba19a82", + "id": "nmdc:80fe705d97ef4a0701b1320e9ba19a82", + "file_size_bytes": 1667543500 + }, + { + "name": "Gp0127629_Centrifuge TSV report", + "description": "Centrifuge TSV report for Gp0127629", + "data_object_type": "Centrifuge Classification Report", + "url": "https://data.microbiomedata.org/data/nmdc:mga071r920/ReadbasedAnalysis/nmdc_mga071r920_centrifuge_report.tsv", + "md5_checksum": "6a216ec913587e26ddc036b703126d76", + "id": "nmdc:6a216ec913587e26ddc036b703126d76", + "file_size_bytes": 253079 + }, + { + "name": "Gp0127629_Centrifuge Krona HTML report", + "description": "Centrifuge Krona HTML report for Gp0127629", + "data_object_type": "Centrifuge Krona Plot", + "url": "https://data.microbiomedata.org/data/nmdc:mga071r920/ReadbasedAnalysis/nmdc_mga071r920_centrifuge_krona.html", + "md5_checksum": "ebed7286f886596764a66a0d1dac3e43", + "id": "nmdc:ebed7286f886596764a66a0d1dac3e43", + "file_size_bytes": 2326900 + }, + { + "name": "Gp0127629_Kraken classification TSV report", + "description": "Kraken classification TSV report for Gp0127629", + "data_object_type": "Kraken2 Taxonomic Classification", + "url": "https://data.microbiomedata.org/data/nmdc:mga071r920/ReadbasedAnalysis/nmdc_mga071r920_kraken2_classification.tsv", + "md5_checksum": "80dd3584d257e8f84b59118ffd0d5e21", + "id": "nmdc:80dd3584d257e8f84b59118ffd0d5e21", + "file_size_bytes": 1328025421 + }, + { + "name": "Gp0127629_Kraken2 TSV report", + "description": "Kraken2 TSV report for Gp0127629", + "data_object_type": "Kraken2 Classification Report", + "url": "https://data.microbiomedata.org/data/nmdc:mga071r920/ReadbasedAnalysis/nmdc_mga071r920_kraken2_report.tsv", + "md5_checksum": "61b5fe5664ca99f6354c7a5a0222678c", + "id": "nmdc:61b5fe5664ca99f6354c7a5a0222678c", + "file_size_bytes": 628969 + }, + { + "name": "Gp0127629_Kraken2 Krona HTML report", + "description": "Kraken2 Krona HTML report for Gp0127629", + "data_object_type": "Kraken2 Krona Plot", + "url": "https://data.microbiomedata.org/data/nmdc:mga071r920/ReadbasedAnalysis/nmdc_mga071r920_kraken2_krona.html", + "md5_checksum": "81108175d5ef2ca158f516bfc75d3cd9", + "id": "nmdc:81108175d5ef2ca158f516bfc75d3cd9", + "file_size_bytes": 3933712 + }, + { + "name": "Gp0127629_Gottcha2 TSV report", + "description": "Gottcha2 TSV report for Gp0127629", + "url": "https://data.microbiomedata.org/data/nmdc:mga071r920/ReadbasedAnalysis/nmdc_mga071r920_gottcha2_report.tsv", + "md5_checksum": "f4f810491708ff25956cddd005cc9944", + "id": "nmdc:f4f810491708ff25956cddd005cc9944", + "file_size_bytes": 1206 + }, + { + "name": "Gp0127629_Gottcha2 full TSV report", + "description": "Gottcha2 full TSV report for Gp0127629", + "url": "https://data.microbiomedata.org/data/nmdc:mga071r920/ReadbasedAnalysis/nmdc_mga071r920_gottcha2_report_full.tsv", + "md5_checksum": "67e3c200d3765733af33d1db1f4bf968", + "id": "nmdc:67e3c200d3765733af33d1db1f4bf968", + "file_size_bytes": 662074 + }, + { + "name": "Gp0127629_Gottcha2 Krona HTML report", + "description": "Gottcha2 Krona HTML report for Gp0127629", + "data_object_type": "GOTTCHA2 Krona Plot", + "url": "https://data.microbiomedata.org/data/nmdc:mga071r920/ReadbasedAnalysis/nmdc_mga071r920_gottcha2_krona.html", + "md5_checksum": "26cd6390e8362da2ee1d7691360d2dfb", + "id": "nmdc:26cd6390e8362da2ee1d7691360d2dfb", + "file_size_bytes": 229307 + }, + { + "name": "Gp0127629_Centrifuge classification TSV report", + "description": "Centrifuge classification TSV report for Gp0127629", + "data_object_type": "Centrifuge Taxonomic Classification", + "url": "https://data.microbiomedata.org/data/nmdc:mga071r920/ReadbasedAnalysis/nmdc_mga071r920_centrifuge_classification.tsv", + "md5_checksum": "80fe705d97ef4a0701b1320e9ba19a82", + "id": "nmdc:80fe705d97ef4a0701b1320e9ba19a82", + "file_size_bytes": 1667543500 + }, + { + "name": "Gp0127629_Centrifuge TSV report", + "description": "Centrifuge TSV report for Gp0127629", + "data_object_type": "Centrifuge Classification Report", + "url": "https://data.microbiomedata.org/data/nmdc:mga071r920/ReadbasedAnalysis/nmdc_mga071r920_centrifuge_report.tsv", + "md5_checksum": "6a216ec913587e26ddc036b703126d76", + "id": "nmdc:6a216ec913587e26ddc036b703126d76", + "file_size_bytes": 253079 + }, + { + "name": "Gp0127629_Centrifuge Krona HTML report", + "description": "Centrifuge Krona HTML report for Gp0127629", + "data_object_type": "Centrifuge Krona Plot", + "url": "https://data.microbiomedata.org/data/nmdc:mga071r920/ReadbasedAnalysis/nmdc_mga071r920_centrifuge_krona.html", + "md5_checksum": "ebed7286f886596764a66a0d1dac3e43", + "id": "nmdc:ebed7286f886596764a66a0d1dac3e43", + "file_size_bytes": 2326900 + }, + { + "name": "Gp0127629_Kraken classification TSV report", + "description": "Kraken classification TSV report for Gp0127629", + "data_object_type": "Kraken2 Taxonomic Classification", + "url": "https://data.microbiomedata.org/data/nmdc:mga071r920/ReadbasedAnalysis/nmdc_mga071r920_kraken2_classification.tsv", + "md5_checksum": "80dd3584d257e8f84b59118ffd0d5e21", + "id": "nmdc:80dd3584d257e8f84b59118ffd0d5e21", + "file_size_bytes": 1328025421 + }, + { + "name": "Gp0127629_Kraken2 TSV report", + "description": "Kraken2 TSV report for Gp0127629", + "data_object_type": "Kraken2 Classification Report", + "url": "https://data.microbiomedata.org/data/nmdc:mga071r920/ReadbasedAnalysis/nmdc_mga071r920_kraken2_report.tsv", + "md5_checksum": "61b5fe5664ca99f6354c7a5a0222678c", + "id": "nmdc:61b5fe5664ca99f6354c7a5a0222678c", + "file_size_bytes": 628969 + }, + { + "name": "Gp0127629_Kraken2 Krona HTML report", + "description": "Kraken2 Krona HTML report for Gp0127629", + "data_object_type": "Kraken2 Krona Plot", + "url": "https://data.microbiomedata.org/data/nmdc:mga071r920/ReadbasedAnalysis/nmdc_mga071r920_kraken2_krona.html", + "md5_checksum": "81108175d5ef2ca158f516bfc75d3cd9", + "id": "nmdc:81108175d5ef2ca158f516bfc75d3cd9", + "file_size_bytes": 3933712 + }, + { + "name": "Gp0127629_Assembled contigs fasta", + "description": "Assembled contigs fasta for Gp0127629", + "data_object_type": "Assembly Contigs", + "url": "https://data.microbiomedata.org/data/nmdc:mga071r920/assembly/nmdc_mga071r920_contigs.fna", + "md5_checksum": "7badcefc26b24213b514cd4c3c9a87d7", + "id": "nmdc:7badcefc26b24213b514cd4c3c9a87d7", + "file_size_bytes": 109144090 + }, + { + "name": "Gp0127629_Assembled scaffold fasta", + "description": "Assembled scaffold fasta for Gp0127629", + "data_object_type": "Assembly Scaffolds", + "url": "https://data.microbiomedata.org/data/nmdc:mga071r920/assembly/nmdc_mga071r920_scaffolds.fna", + "md5_checksum": "89dd3c10791083ae5a5b30c2154deabd", + "id": "nmdc:89dd3c10791083ae5a5b30c2154deabd", + "file_size_bytes": 108517023 + }, + { + "name": "Gp0127629_Metagenome Contig Coverage Stats", + "description": "Metagenome Contig Coverage Stats for Gp0127629", + "url": "https://data.microbiomedata.org/data/nmdc:mga071r920/assembly/nmdc_mga071r920_covstats.txt", + "md5_checksum": "5e503e3abe6eb9e94c34a55da5bbafdc", + "id": "nmdc:5e503e3abe6eb9e94c34a55da5bbafdc", + "file_size_bytes": 16536925 + }, + { + "name": "Gp0127629_Assembled AGP file", + "description": "Assembled AGP file for Gp0127629", + "data_object_type": "Assembly AGP", + "url": "https://data.microbiomedata.org/data/nmdc:mga071r920/assembly/nmdc_mga071r920_assembly.agp", + "md5_checksum": "0a1f96cd74ec9f1a6668924745689014", + "id": "nmdc:0a1f96cd74ec9f1a6668924745689014", + "file_size_bytes": 15454045 + }, + { + "name": "Gp0127629_Metagenome Alignment BAM file", + "description": "Metagenome Alignment BAM file for Gp0127629", + "data_object_type": "Assembly Coverage BAM", + "url": "https://data.microbiomedata.org/data/nmdc:mga071r920/assembly/nmdc_mga071r920_pairedMapped_sorted.bam", + "md5_checksum": "1608f12840c36ac1d882cc6ef4f4627f", + "id": "nmdc:1608f12840c36ac1d882cc6ef4f4627f", + "file_size_bytes": 2001264626 + }, + { + "name": "Gp0127629_Protein FAA", + "description": "Protein FAA for Gp0127629", + "data_object_type": "Annotation Amino Acid FASTA", + "url": "https://data.microbiomedata.org/data/nmdc:mga071r920/annotation/nmdc_mga071r920_proteins.faa", + "md5_checksum": "ba15f54043fad473edec771b60f5b040", + "id": "nmdc:ba15f54043fad473edec771b60f5b040", + "file_size_bytes": 62222526 + }, + { + "name": "Gp0127629_Structural annotation GFF file", + "description": "Structural annotation GFF file for Gp0127629", + "data_object_type": "Structural Annotation GFF", + "url": "https://data.microbiomedata.org/data/nmdc:mga071r920/annotation/nmdc_mga071r920_structural_annotation.gff", + "md5_checksum": "f6d684abab1c60b2b95ade84644e6a38", + "id": "nmdc:f6d684abab1c60b2b95ade84644e6a38", + "file_size_bytes": 2521 + }, + { + "name": "Gp0127629_Functional annotation GFF file", + "description": "Functional annotation GFF file for Gp0127629", + "data_object_type": "Functional Annotation GFF", + "url": "https://data.microbiomedata.org/data/nmdc:mga071r920/annotation/nmdc_mga071r920_functional_annotation.gff", + "md5_checksum": "496e0fa5ac1c04849338c972189ee3f6", + "id": "nmdc:496e0fa5ac1c04849338c972189ee3f6", + "file_size_bytes": 70803412 + }, + { + "name": "Gp0127629_KO TSV file", + "description": "KO TSV file for Gp0127629", + "data_object_type": "Annotation KEGG Orthology", + "url": "https://data.microbiomedata.org/data/nmdc:mga071r920/annotation/nmdc_mga071r920_ko.tsv", + "md5_checksum": "311ffbbfc80f28908615a1f18492ae5e", + "id": "nmdc:311ffbbfc80f28908615a1f18492ae5e", + "file_size_bytes": 8203743 + }, + { + "name": "Gp0127629_EC TSV file", + "description": "EC TSV file for Gp0127629", + "data_object_type": "Annotation Enzyme Commission", + "url": "https://data.microbiomedata.org/data/nmdc:mga071r920/annotation/nmdc_mga071r920_ec.tsv", + "md5_checksum": "7d90dec4cdc8c8e8a4bc8f7bddf2e0c2", + "id": "nmdc:7d90dec4cdc8c8e8a4bc8f7bddf2e0c2", + "file_size_bytes": 5508974 + }, + { + "name": "Gp0127629_COG GFF file", + "description": "COG GFF file for Gp0127629", + "url": "https://data.microbiomedata.org/data/nmdc:mga071r920/annotation/nmdc_mga071r920_cog.gff", + "md5_checksum": "1116328ed7ba951246f0eec1d3f065b4", + "id": "nmdc:1116328ed7ba951246f0eec1d3f065b4", + "file_size_bytes": 42250648 + }, + { + "name": "Gp0127629_PFAM GFF file", + "description": "PFAM GFF file for Gp0127629", + "url": "https://data.microbiomedata.org/data/nmdc:mga071r920/annotation/nmdc_mga071r920_pfam.gff", + "md5_checksum": "325e47bc009aeba79fc767e3b6daeee2", + "id": "nmdc:325e47bc009aeba79fc767e3b6daeee2", + "file_size_bytes": 31677996 + }, + { + "name": "Gp0127629_TigrFam GFF file", + "description": "TigrFam GFF file for Gp0127629", + "url": "https://data.microbiomedata.org/data/nmdc:mga071r920/annotation/nmdc_mga071r920_tigrfam.gff", + "md5_checksum": "f820db8ce6a1ae7c3e8af40729f5b62b", + "id": "nmdc:f820db8ce6a1ae7c3e8af40729f5b62b", + "file_size_bytes": 3472661 + }, + { + "name": "Gp0127629_SMART GFF file", + "description": "SMART GFF file for Gp0127629", + "url": "https://data.microbiomedata.org/data/nmdc:mga071r920/annotation/nmdc_mga071r920_smart.gff", + "md5_checksum": "96ab6fa258a08490082b4f99269f3e8d", + "id": "nmdc:96ab6fa258a08490082b4f99269f3e8d", + "file_size_bytes": 9149681 + }, + { + "name": "Gp0127629_SuperFam GFF file", + "description": "SuperFam GFF file for Gp0127629", + "url": "https://data.microbiomedata.org/data/nmdc:mga071r920/annotation/nmdc_mga071r920_supfam.gff", + "md5_checksum": "a2b630c408bd557d693b147f95627fdc", + "id": "nmdc:a2b630c408bd557d693b147f95627fdc", + "file_size_bytes": 52308332 + }, + { + "name": "Gp0127629_Cath FunFam GFF file", + "description": "Cath FunFam GFF file for Gp0127629", + "url": "https://data.microbiomedata.org/data/nmdc:mga071r920/annotation/nmdc_mga071r920_cath_funfam.gff", + "md5_checksum": "ba87cd24242288e0b6d8f32a2bcbbb80", + "id": "nmdc:ba87cd24242288e0b6d8f32a2bcbbb80", + "file_size_bytes": 39926818 + }, + { + "name": "Gp0127629_KO_EC GFF file", + "description": "KO_EC GFF file for Gp0127629", + "url": "https://data.microbiomedata.org/data/nmdc:mga071r920/annotation/nmdc_mga071r920_ko_ec.gff", + "md5_checksum": "9c97bd7a5e4978e31ed1e5386c3619f3", + "id": "nmdc:9c97bd7a5e4978e31ed1e5386c3619f3", + "file_size_bytes": 26101397 + }, + { + "name": "gold:Gp0452679_metabat2 bin checkm quality assessment result", + "description": "metabat2 bin checkm quality assessment result for gold:Gp0452679", + "data_object_type": "CheckM Statistics", + "url": "https://data.microbiomedata.org/data/nmdc:mga0fsjy84/MAGs/nmdc_mga0fsjy84_checkm_qa.out", + "md5_checksum": "d41d8cd98f00b204e9800998ecf8427e", + "id": "nmdc:d41d8cd98f00b204e9800998ecf8427e", + "file_size_bytes": 0 + }, + { + "name": "Gp0127629_tooShort (< 3kb) filtered contigs fasta file by metaBat2", + "description": "tooShort (< 3kb) filtered contigs fasta file by metaBat2 for Gp0127629", + "url": "https://data.microbiomedata.org/data/nmdc:mga071r920/MAGs/nmdc_mga071r920_bins.tooShort.fa", + "md5_checksum": "2bbd475ff6a15058b38244e71456024a", + "id": "nmdc:2bbd475ff6a15058b38244e71456024a", + "file_size_bytes": 88674437 + }, + { + "name": "Gp0127629_unbinned fasta file from metabat2", + "description": "unbinned fasta file from metabat2 for Gp0127629", + "url": "https://data.microbiomedata.org/data/nmdc:mga071r920/MAGs/nmdc_mga071r920_bins.unbinned.fa", + "md5_checksum": "70901a70c06fdcfc71efa2d004e210fd", + "id": "nmdc:70901a70c06fdcfc71efa2d004e210fd", + "file_size_bytes": 19226945 + }, + { + "name": "Gp0127629_metabat2 bin checkm quality assessment result", + "description": "metabat2 bin checkm quality assessment result for Gp0127629", + "data_object_type": "CheckM Statistics", + "url": "https://data.microbiomedata.org/data/nmdc:mga071r920/MAGs/nmdc_mga071r920_checkm_qa.out", + "md5_checksum": "d52b4ae6b61161082fee7d42ecf5ee87", + "id": "nmdc:d52b4ae6b61161082fee7d42ecf5ee87", + "file_size_bytes": 978 + }, + { + "name": "Gp0127629_high-quality and medium-quality bins", + "description": "high-quality and medium-quality bins for Gp0127629", + "data_object_type": "Metagenome Bins", + "url": "https://data.microbiomedata.org/data/nmdc:mga071r920/MAGs/nmdc_mga071r920_hqmq_bin.zip", + "md5_checksum": "58d9cd30ca53424cd0f1ce27d0a8a885", + "id": "nmdc:58d9cd30ca53424cd0f1ce27d0a8a885", + "file_size_bytes": 182 + }, + { + "name": "Gp0127629_metabat2 bins", + "description": "metabat2 bins for Gp0127629", + "url": "https://data.microbiomedata.org/data/nmdc:mga071r920/MAGs/nmdc_mga071r920_metabat_bin.zip", + "md5_checksum": "8f4f5294de942734837fba3d68ffc6b4", + "id": "nmdc:8f4f5294de942734837fba3d68ffc6b4", + "file_size_bytes": 377953 + } + ], + "dissolving_activity_set": [], + "functional_annotation_set": [], + "genome_feature_set": [], + "mags_activity_set": [ + { + "_id": { + "$oid": "649b0052ec087f6bbab3470c" + }, + "has_input": [ + "nmdc:7badcefc26b24213b514cd4c3c9a87d7", + "nmdc:1608f12840c36ac1d882cc6ef4f4627f", + "nmdc:496e0fa5ac1c04849338c972189ee3f6" + ], + "too_short_contig_num": 195955, + "part_of": [ + "nmdc:mga071r920" + ], + "binned_contig_num": 271, + "git_url": "https://github.com/microbiomedata/metaG/releases/tag/0.1", + "has_output": [ + "nmdc:d41d8cd98f00b204e9800998ecf8427e", + "nmdc:2bbd475ff6a15058b38244e71456024a", + "nmdc:70901a70c06fdcfc71efa2d004e210fd", + "nmdc:d52b4ae6b61161082fee7d42ecf5ee87", + "nmdc:58d9cd30ca53424cd0f1ce27d0a8a885", + "nmdc:8f4f5294de942734837fba3d68ffc6b4" + ], + "was_informed_by": "gold:Gp0127629", + "input_contig_num": 208551, "id": "nmdc:b82754c2c692809f9e59ff9824278c32", "execution_resource": "NERSC-Cori", "name": "MAGs Analysis Activity for nmdc:mga071r920", @@ -8141,1934 +8296,1314 @@ "unbinned_contig_num": 12325, "started_at_time": "2021-10-11T02:23:35Z", "type": "nmdc:MAGsAnalysisActivity", - "ended_at_time": "2021-10-11T03:33:33+00:00", - "output_data_objects": [ - { - "name": "gold:Gp0452679_metabat2 bin checkm quality assessment result", - "description": "metabat2 bin checkm quality assessment result for gold:Gp0452679", - "data_object_type": "CheckM Statistics", - "url": "https://data.microbiomedata.org/data/nmdc:mga0fsjy84/MAGs/nmdc_mga0fsjy84_checkm_qa.out", - "md5_checksum": "d41d8cd98f00b204e9800998ecf8427e", - "id": "nmdc:d41d8cd98f00b204e9800998ecf8427e", - "file_size_bytes": 0 - }, - { - "name": "Gp0127629_tooShort (< 3kb) filtered contigs fasta file by metaBat2", - "description": "tooShort (< 3kb) filtered contigs fasta file by metaBat2 for Gp0127629", - "url": "https://data.microbiomedata.org/data/nmdc:mga071r920/MAGs/nmdc_mga071r920_bins.tooShort.fa", - "md5_checksum": "2bbd475ff6a15058b38244e71456024a", - "id": "nmdc:2bbd475ff6a15058b38244e71456024a", - "file_size_bytes": 88674437 - }, - { - "name": "Gp0127629_unbinned fasta file from metabat2", - "description": "unbinned fasta file from metabat2 for Gp0127629", - "url": "https://data.microbiomedata.org/data/nmdc:mga071r920/MAGs/nmdc_mga071r920_bins.unbinned.fa", - "md5_checksum": "70901a70c06fdcfc71efa2d004e210fd", - "id": "nmdc:70901a70c06fdcfc71efa2d004e210fd", - "file_size_bytes": 19226945 - }, - { - "name": "Gp0127629_metabat2 bin checkm quality assessment result", - "description": "metabat2 bin checkm quality assessment result for Gp0127629", - "data_object_type": "CheckM Statistics", - "url": "https://data.microbiomedata.org/data/nmdc:mga071r920/MAGs/nmdc_mga071r920_checkm_qa.out", - "md5_checksum": "d52b4ae6b61161082fee7d42ecf5ee87", - "id": "nmdc:d52b4ae6b61161082fee7d42ecf5ee87", - "file_size_bytes": 978 - }, - { - "name": "Gp0127629_high-quality and medium-quality bins", - "description": "high-quality and medium-quality bins for Gp0127629", - "data_object_type": "Metagenome Bins", - "url": "https://data.microbiomedata.org/data/nmdc:mga071r920/MAGs/nmdc_mga071r920_hqmq_bin.zip", - "md5_checksum": "58d9cd30ca53424cd0f1ce27d0a8a885", - "id": "nmdc:58d9cd30ca53424cd0f1ce27d0a8a885", - "file_size_bytes": 182 - }, - { - "name": "Gp0127629_metabat2 bins", - "description": "metabat2 bins for Gp0127629", - "url": "https://data.microbiomedata.org/data/nmdc:mga071r920/MAGs/nmdc_mga071r920_metabat_bin.zip", - "md5_checksum": "8f4f5294de942734837fba3d68ffc6b4", - "id": "nmdc:8f4f5294de942734837fba3d68ffc6b4", - "file_size_bytes": 377953 - } - ] + "ended_at_time": "2021-10-11T03:33:33+00:00" } - ] - }, - { - "_id": { - "$oid": "649b009773e8249959349b3f" - }, - "id": "nmdc:omprc-11-b051xn44", - "name": "Riverbed sediment microbial communities from areas with vegetation nearby in Columbia River, Washington, USA - GW-RW S3_0_10", - "description": "Riverbed sediment samples were collected from an area with a lot of surrounding vegetation in a hyporheic zone (subsurface groundwater - surface water mixing zone)", - "has_input": [ - "nmdc:bsm-11-jdsasr43" - ], - "has_output": [ - "jgi:574fe09a7ded5e3df1ee1485" - ], - "part_of": [ - "nmdc:sty-11-aygzgv51" - ], - "add_date": "2016-01-11", - "mod_date": "2021-06-15", - "ncbi_project_name": "Riverbed sediment microbial communities from areas with vegetation nearby in Columbia River, Washington, USA - GW-RW S3_0_10", - "omics_type": { - "has_raw_value": "Metagenome" - }, - "principal_investigator": { - "has_raw_value": "James Stegen" - }, - "processing_institution": "JGI", - "type": "nmdc:OmicsProcessing", - "gold_sequencing_project_identifiers": [ - "gold:Gp0127628" - ], - "downstream_workflow_activity_records": [ + ], + "material_sample_set": [], + "material_sampling_activity_set": [], + "metabolomics_analysis_activity_set": [], + "metagenome_annotation_activity_set": [ { "_id": { - "$oid": "649b009d6bdd4fd20273c863" + "$oid": "649b005bbf2caae0415ef9af" }, "has_input": [ - "nmdc:efca984ecf94cc8de2aeabf94e0b87cc" + "nmdc:7badcefc26b24213b514cd4c3c9a87d7" ], "part_of": [ - "nmdc:mga0x5c381" - ], - "git_url": "https://github.com/microbiomedata/metaG/releases/tag/0.1", - "has_output": [ - "nmdc:f6f1760721d73fc57919b2115a1d47ec", - "nmdc:2225f9d41343590d818186fa2d66852d" - ], - "was_informed_by": "gold:Gp0127628", - "input_read_count": 31715882, - "output_read_bases": 4516265181, - "id": "nmdc:a634b0691cf34899d9b09bc4573d4c36", - "execution_resource": "NERSC-Cori", - "input_read_bases": 4789098182, - "name": "Read QC Activity for nmdc:mga0x5c381", - "output_read_count": 30212248, - "started_at_time": "2021-10-11T02:25:13Z", - "type": "nmdc:ReadQCAnalysisActivity", - "ended_at_time": "2021-10-11T04:45:59+00:00", - "output_data_objects": [ - { - "name": "Gp0127628_Filtered Reads", - "description": "Filtered Reads for Gp0127628", - "data_object_type": "Filtered Sequencing Reads", - "url": "https://data.microbiomedata.org/data/nmdc:mga0x5c381/qa/nmdc_mga0x5c381_filtered.fastq.gz", - "md5_checksum": "f6f1760721d73fc57919b2115a1d47ec", - "id": "nmdc:f6f1760721d73fc57919b2115a1d47ec", - "file_size_bytes": 2548975208 - }, - { - "name": "Gp0127628_Filtered Stats", - "description": "Filtered Stats for Gp0127628", - "data_object_type": "QC Statistics", - "url": "https://data.microbiomedata.org/data/nmdc:mga0x5c381/qa/nmdc_mga0x5c381_filterStats.txt", - "md5_checksum": "2225f9d41343590d818186fa2d66852d", - "id": "nmdc:2225f9d41343590d818186fa2d66852d", - "file_size_bytes": 291 - } - ] - }, - { - "_id": { - "$oid": "649b009bff710ae353f8cf24" - }, - "has_input": [ - "nmdc:f6f1760721d73fc57919b2115a1d47ec" + "nmdc:mga071r920" ], "git_url": "https://github.com/microbiomedata/metaG/releases/tag/0.1", "has_output": [ - "nmdc:a6ed9af48a9ad473ab66721829a5c226", - "nmdc:335dbf6f1055de0950988a002f432c0b", - "nmdc:35da19bc0e50db1f9a02fe1550d1df0e", - "nmdc:224085164a389c6f207967ed03b3e6af", - "nmdc:39ba17263c144761a8bdcc1645c034f5", - "nmdc:84debc9bd1c09328d60f073d7fc2db4f", - "nmdc:8f75800abbcf5a94043ad677d7cb975c", - "nmdc:aae9e961d8ed716457616c8a8841037b", - "nmdc:ba83d6ab837403f4bcbc9400a0460457" + "nmdc:ba15f54043fad473edec771b60f5b040", + "nmdc:f6d684abab1c60b2b95ade84644e6a38", + "nmdc:496e0fa5ac1c04849338c972189ee3f6", + "nmdc:311ffbbfc80f28908615a1f18492ae5e", + "nmdc:7d90dec4cdc8c8e8a4bc8f7bddf2e0c2", + "nmdc:1116328ed7ba951246f0eec1d3f065b4", + "nmdc:325e47bc009aeba79fc767e3b6daeee2", + "nmdc:f820db8ce6a1ae7c3e8af40729f5b62b", + "nmdc:96ab6fa258a08490082b4f99269f3e8d", + "nmdc:a2b630c408bd557d693b147f95627fdc", + "nmdc:ba87cd24242288e0b6d8f32a2bcbbb80", + "nmdc:9c97bd7a5e4978e31ed1e5386c3619f3" ], - "was_informed_by": "gold:Gp0127628", - "id": "nmdc:a634b0691cf34899d9b09bc4573d4c36", + "was_informed_by": "gold:Gp0127629", + "id": "nmdc:b82754c2c692809f9e59ff9824278c32", "execution_resource": "NERSC-Cori", - "name": "ReadBased Analysis Activity for nmdc:mga0x5c381", - "started_at_time": "2021-10-11T02:25:13Z", - "type": "nmdc:ReadbasedAnalysis", - "ended_at_time": "2021-10-11T04:45:59+00:00", - "output_data_objects": [ - { - "name": "Gp0127628_Gottcha2 TSV report", - "description": "Gottcha2 TSV report for Gp0127628", - "url": "https://data.microbiomedata.org/data/nmdc:mga0x5c381/ReadbasedAnalysis/nmdc_mga0x5c381_gottcha2_report.tsv", - "md5_checksum": "a6ed9af48a9ad473ab66721829a5c226", - "id": "nmdc:a6ed9af48a9ad473ab66721829a5c226", - "file_size_bytes": 3472 - }, - { - "name": "Gp0127628_Gottcha2 full TSV report", - "description": "Gottcha2 full TSV report for Gp0127628", - "url": "https://data.microbiomedata.org/data/nmdc:mga0x5c381/ReadbasedAnalysis/nmdc_mga0x5c381_gottcha2_report_full.tsv", - "md5_checksum": "335dbf6f1055de0950988a002f432c0b", - "id": "nmdc:335dbf6f1055de0950988a002f432c0b", - "file_size_bytes": 863867 - }, - { - "name": "Gp0127628_Gottcha2 Krona HTML report", - "description": "Gottcha2 Krona HTML report for Gp0127628", - "data_object_type": "GOTTCHA2 Krona Plot", - "url": "https://data.microbiomedata.org/data/nmdc:mga0x5c381/ReadbasedAnalysis/nmdc_mga0x5c381_gottcha2_krona.html", - "md5_checksum": "35da19bc0e50db1f9a02fe1550d1df0e", - "id": "nmdc:35da19bc0e50db1f9a02fe1550d1df0e", - "file_size_bytes": 234974 - }, - { - "name": "Gp0127628_Centrifuge classification TSV report", - "description": "Centrifuge classification TSV report for Gp0127628", - "data_object_type": "Centrifuge Taxonomic Classification", - "url": "https://data.microbiomedata.org/data/nmdc:mga0x5c381/ReadbasedAnalysis/nmdc_mga0x5c381_centrifuge_classification.tsv", - "md5_checksum": "224085164a389c6f207967ed03b3e6af", - "id": "nmdc:224085164a389c6f207967ed03b3e6af", - "file_size_bytes": 2220789142 - }, - { - "name": "Gp0127628_Centrifuge TSV report", - "description": "Centrifuge TSV report for Gp0127628", - "data_object_type": "Centrifuge Classification Report", - "url": "https://data.microbiomedata.org/data/nmdc:mga0x5c381/ReadbasedAnalysis/nmdc_mga0x5c381_centrifuge_report.tsv", - "md5_checksum": "39ba17263c144761a8bdcc1645c034f5", - "id": "nmdc:39ba17263c144761a8bdcc1645c034f5", - "file_size_bytes": 257030 - }, - { - "name": "Gp0127628_Centrifuge Krona HTML report", - "description": "Centrifuge Krona HTML report for Gp0127628", - "data_object_type": "Centrifuge Krona Plot", - "url": "https://data.microbiomedata.org/data/nmdc:mga0x5c381/ReadbasedAnalysis/nmdc_mga0x5c381_centrifuge_krona.html", - "md5_checksum": "84debc9bd1c09328d60f073d7fc2db4f", - "id": "nmdc:84debc9bd1c09328d60f073d7fc2db4f", - "file_size_bytes": 2337568 - }, - { - "name": "Gp0127628_Kraken classification TSV report", - "description": "Kraken classification TSV report for Gp0127628", - "data_object_type": "Kraken2 Taxonomic Classification", - "url": "https://data.microbiomedata.org/data/nmdc:mga0x5c381/ReadbasedAnalysis/nmdc_mga0x5c381_kraken2_classification.tsv", - "md5_checksum": "8f75800abbcf5a94043ad677d7cb975c", - "id": "nmdc:8f75800abbcf5a94043ad677d7cb975c", - "file_size_bytes": 1776487262 - }, - { - "name": "Gp0127628_Kraken2 TSV report", - "description": "Kraken2 TSV report for Gp0127628", - "data_object_type": "Kraken2 Classification Report", - "url": "https://data.microbiomedata.org/data/nmdc:mga0x5c381/ReadbasedAnalysis/nmdc_mga0x5c381_kraken2_report.tsv", - "md5_checksum": "aae9e961d8ed716457616c8a8841037b", - "id": "nmdc:aae9e961d8ed716457616c8a8841037b", - "file_size_bytes": 664011 - }, - { - "name": "Gp0127628_Kraken2 Krona HTML report", - "description": "Kraken2 Krona HTML report for Gp0127628", - "data_object_type": "Kraken2 Krona Plot", - "url": "https://data.microbiomedata.org/data/nmdc:mga0x5c381/ReadbasedAnalysis/nmdc_mga0x5c381_kraken2_krona.html", - "md5_checksum": "ba83d6ab837403f4bcbc9400a0460457", - "id": "nmdc:ba83d6ab837403f4bcbc9400a0460457", - "file_size_bytes": 4035375 - } - ] - }, + "name": "Annotation Activity for nmdc:mga071r920", + "started_at_time": "2021-10-11T02:23:35Z", + "type": "nmdc:MetagenomeAnnotationActivity", + "ended_at_time": "2021-10-11T03:33:33+00:00" + } + ], + "metagenome_assembly_set": [ { "_id": { - "$oid": "61e7193b833bcf838a6fff9c" + "$oid": "649b005f2ca5ee4adb139f9a" }, "has_input": [ - "nmdc:f6f1760721d73fc57919b2115a1d47ec" + "nmdc:0db98173ae3395106e24d250b2655f06" ], "part_of": [ - "nmdc:mga0x5c381" + "nmdc:mga071r920" ], + "ctg_logsum": 212258, + "scaf_logsum": 212917, + "gap_pct": 0.00151, "git_url": "https://github.com/microbiomedata/metaG/releases/tag/0.1", "has_output": [ - "nmdc:a6ed9af48a9ad473ab66721829a5c226", - "nmdc:335dbf6f1055de0950988a002f432c0b", - "nmdc:35da19bc0e50db1f9a02fe1550d1df0e", - "nmdc:224085164a389c6f207967ed03b3e6af", - "nmdc:39ba17263c144761a8bdcc1645c034f5", - "nmdc:84debc9bd1c09328d60f073d7fc2db4f", - "nmdc:8f75800abbcf5a94043ad677d7cb975c", - "nmdc:aae9e961d8ed716457616c8a8841037b", - "nmdc:ba83d6ab837403f4bcbc9400a0460457" + "nmdc:7badcefc26b24213b514cd4c3c9a87d7", + "nmdc:89dd3c10791083ae5a5b30c2154deabd", + "nmdc:5e503e3abe6eb9e94c34a55da5bbafdc", + "nmdc:0a1f96cd74ec9f1a6668924745689014", + "nmdc:1608f12840c36ac1d882cc6ef4f4627f" ], - "was_informed_by": "gold:Gp0127628", - "id": "nmdc:a634b0691cf34899d9b09bc4573d4c36", - "execution_resource": "NERSC-Cori", - "name": "ReadBased Analysis Activity for nmdc:mga0x5c381", - "started_at_time": "2021-10-11T02:25:13Z", - "type": "nmdc:ReadbasedAnalysis", - "ended_at_time": "2021-10-11T04:45:59+00:00", - "output_data_objects": [ - { - "name": "Gp0127628_Gottcha2 TSV report", - "description": "Gottcha2 TSV report for Gp0127628", - "url": "https://data.microbiomedata.org/data/nmdc:mga0x5c381/ReadbasedAnalysis/nmdc_mga0x5c381_gottcha2_report.tsv", - "md5_checksum": "a6ed9af48a9ad473ab66721829a5c226", - "id": "nmdc:a6ed9af48a9ad473ab66721829a5c226", - "file_size_bytes": 3472 - }, - { - "name": "Gp0127628_Gottcha2 full TSV report", - "description": "Gottcha2 full TSV report for Gp0127628", - "url": "https://data.microbiomedata.org/data/nmdc:mga0x5c381/ReadbasedAnalysis/nmdc_mga0x5c381_gottcha2_report_full.tsv", - "md5_checksum": "335dbf6f1055de0950988a002f432c0b", - "id": "nmdc:335dbf6f1055de0950988a002f432c0b", - "file_size_bytes": 863867 - }, - { - "name": "Gp0127628_Gottcha2 Krona HTML report", - "description": "Gottcha2 Krona HTML report for Gp0127628", - "data_object_type": "GOTTCHA2 Krona Plot", - "url": "https://data.microbiomedata.org/data/nmdc:mga0x5c381/ReadbasedAnalysis/nmdc_mga0x5c381_gottcha2_krona.html", - "md5_checksum": "35da19bc0e50db1f9a02fe1550d1df0e", - "id": "nmdc:35da19bc0e50db1f9a02fe1550d1df0e", - "file_size_bytes": 234974 - }, - { - "name": "Gp0127628_Centrifuge classification TSV report", - "description": "Centrifuge classification TSV report for Gp0127628", - "data_object_type": "Centrifuge Taxonomic Classification", - "url": "https://data.microbiomedata.org/data/nmdc:mga0x5c381/ReadbasedAnalysis/nmdc_mga0x5c381_centrifuge_classification.tsv", - "md5_checksum": "224085164a389c6f207967ed03b3e6af", - "id": "nmdc:224085164a389c6f207967ed03b3e6af", - "file_size_bytes": 2220789142 - }, - { - "name": "Gp0127628_Centrifuge TSV report", - "description": "Centrifuge TSV report for Gp0127628", - "data_object_type": "Centrifuge Classification Report", - "url": "https://data.microbiomedata.org/data/nmdc:mga0x5c381/ReadbasedAnalysis/nmdc_mga0x5c381_centrifuge_report.tsv", - "md5_checksum": "39ba17263c144761a8bdcc1645c034f5", - "id": "nmdc:39ba17263c144761a8bdcc1645c034f5", - "file_size_bytes": 257030 - }, - { - "name": "Gp0127628_Centrifuge Krona HTML report", - "description": "Centrifuge Krona HTML report for Gp0127628", - "data_object_type": "Centrifuge Krona Plot", - "url": "https://data.microbiomedata.org/data/nmdc:mga0x5c381/ReadbasedAnalysis/nmdc_mga0x5c381_centrifuge_krona.html", - "md5_checksum": "84debc9bd1c09328d60f073d7fc2db4f", - "id": "nmdc:84debc9bd1c09328d60f073d7fc2db4f", - "file_size_bytes": 2337568 - }, - { - "name": "Gp0127628_Kraken classification TSV report", - "description": "Kraken classification TSV report for Gp0127628", - "data_object_type": "Kraken2 Taxonomic Classification", - "url": "https://data.microbiomedata.org/data/nmdc:mga0x5c381/ReadbasedAnalysis/nmdc_mga0x5c381_kraken2_classification.tsv", - "md5_checksum": "8f75800abbcf5a94043ad677d7cb975c", - "id": "nmdc:8f75800abbcf5a94043ad677d7cb975c", - "file_size_bytes": 1776487262 - }, - { - "name": "Gp0127628_Kraken2 TSV report", - "description": "Kraken2 TSV report for Gp0127628", - "data_object_type": "Kraken2 Classification Report", - "url": "https://data.microbiomedata.org/data/nmdc:mga0x5c381/ReadbasedAnalysis/nmdc_mga0x5c381_kraken2_report.tsv", - "md5_checksum": "aae9e961d8ed716457616c8a8841037b", - "id": "nmdc:aae9e961d8ed716457616c8a8841037b", - "file_size_bytes": 664011 - }, - { - "name": "Gp0127628_Kraken2 Krona HTML report", - "description": "Kraken2 Krona HTML report for Gp0127628", - "data_object_type": "Kraken2 Krona Plot", - "url": "https://data.microbiomedata.org/data/nmdc:mga0x5c381/ReadbasedAnalysis/nmdc_mga0x5c381_kraken2_krona.html", - "md5_checksum": "ba83d6ab837403f4bcbc9400a0460457", - "id": "nmdc:ba83d6ab837403f4bcbc9400a0460457", - "file_size_bytes": 4035375 - } - ] - }, + "asm_score": 3.305, + "was_informed_by": "gold:Gp0127629", + "ctg_powsum": 22751, + "scaf_max": 23996, + "id": "nmdc:b82754c2c692809f9e59ff9824278c32", + "scaf_powsum": 22826, + "execution_resource": "NERSC-Cori", + "contigs": 208553, + "name": "Assembly Activity for nmdc:mga071r920", + "ctg_max": 23996, + "gc_std": 0.1053, + "contig_bp": 101011771, + "gc_avg": 0.62056, + "started_at_time": "2021-10-11T02:23:35Z", + "scaf_bp": 101013301, + "type": "nmdc:MetagenomeAssembly", + "scaffolds": 208427, + "ended_at_time": "2021-10-11T03:33:33+00:00", + "ctg_l50": 478, + "ctg_l90": 290, + "ctg_n50": 59884, + "ctg_n90": 174522, + "scaf_l50": 478, + "scaf_l90": 290, + "scaf_n50": 59864, + "scaf_n90": 174416 + } + ], + "metagenome_sequencing_activity_set": [], + "metaproteomics_analysis_activity_set": [], + "metatranscriptome_activity_set": [], + "nom_analysis_activity_set": [], + "omics_processing_set": [ { "_id": { - "$oid": "649b005f2ca5ee4adb139fae" + "$oid": "649b009773e8249959349b3e" }, + "id": "nmdc:omprc-11-1nvcer55", + "name": "Riverbed sediment microbial communities from areas with vegetation nearby in Columbia River, Washington, USA - GW-RW S3_10_20", + "description": "Riverbed sediment samples were collected from an area with a lot of surrounding vegetation in a hyporheic zone (subsurface groundwater - surface water mixing zone)", "has_input": [ - "nmdc:f6f1760721d73fc57919b2115a1d47ec" - ], - "part_of": [ - "nmdc:mga0x5c381" + "nmdc:bsm-11-3sfanv57" ], - "ctg_logsum": 110768, - "scaf_logsum": 111226, - "gap_pct": 0.00124, - "git_url": "https://github.com/microbiomedata/metaG/releases/tag/0.1", "has_output": [ - "nmdc:9e550afb3bcd8d66807f861ecfed815b", - "nmdc:5e79fce62ffa8c4479be5159143797e0", - "nmdc:682fd042d6adcd93f75c3eae2cf32241", - "nmdc:9d607ebd92ad5bcbaaa405884d4a83a3", - "nmdc:9163caaba1f60d1af9a551559069ca08" + "jgi:574fde587ded5e3df1ee13fd" ], - "asm_score": 4.319, - "was_informed_by": "gold:Gp0127628", - "ctg_powsum": 11962, - "scaf_max": 45540, - "id": "nmdc:a634b0691cf34899d9b09bc4573d4c36", - "scaf_powsum": 12026, - "execution_resource": "NERSC-Cori", - "contigs": 157859, - "name": "Assembly Activity for nmdc:mga0x5c381", - "ctg_max": 40273, - "gc_std": 0.10673, - "contig_bp": 68288279, - "gc_avg": 0.61453, - "started_at_time": "2021-10-11T02:25:13Z", - "scaf_bp": 68289129, - "type": "nmdc:MetagenomeAssembly", - "scaffolds": 157774, - "ended_at_time": "2021-10-11T04:45:59+00:00", - "ctg_l50": 400, - "ctg_l90": 285, - "ctg_n50": 49248, - "ctg_n90": 135173, - "scaf_l50": 400, - "scaf_l90": 285, - "scaf_n50": 49230, - "scaf_n90": 135095, - "output_data_objects": [ - { - "name": "Gp0127628_Assembled contigs fasta", - "description": "Assembled contigs fasta for Gp0127628", - "data_object_type": "Assembly Contigs", - "url": "https://data.microbiomedata.org/data/nmdc:mga0x5c381/assembly/nmdc_mga0x5c381_contigs.fna", - "md5_checksum": "9e550afb3bcd8d66807f861ecfed815b", - "id": "nmdc:9e550afb3bcd8d66807f861ecfed815b", - "file_size_bytes": 74277737 - }, - { - "name": "Gp0127628_Assembled scaffold fasta", - "description": "Assembled scaffold fasta for Gp0127628", - "data_object_type": "Assembly Scaffolds", - "url": "https://data.microbiomedata.org/data/nmdc:mga0x5c381/assembly/nmdc_mga0x5c381_scaffolds.fna", - "md5_checksum": "5e79fce62ffa8c4479be5159143797e0", - "id": "nmdc:5e79fce62ffa8c4479be5159143797e0", - "file_size_bytes": 73802989 - }, - { - "name": "Gp0127628_Metagenome Contig Coverage Stats", - "description": "Metagenome Contig Coverage Stats for Gp0127628", - "url": "https://data.microbiomedata.org/data/nmdc:mga0x5c381/assembly/nmdc_mga0x5c381_covstats.txt", - "md5_checksum": "682fd042d6adcd93f75c3eae2cf32241", - "id": "nmdc:682fd042d6adcd93f75c3eae2cf32241", - "file_size_bytes": 12462125 - }, - { - "name": "Gp0127628_Assembled AGP file", - "description": "Assembled AGP file for Gp0127628", - "data_object_type": "Assembly AGP", - "url": "https://data.microbiomedata.org/data/nmdc:mga0x5c381/assembly/nmdc_mga0x5c381_assembly.agp", - "md5_checksum": "9d607ebd92ad5bcbaaa405884d4a83a3", - "id": "nmdc:9d607ebd92ad5bcbaaa405884d4a83a3", - "file_size_bytes": 11636352 - }, - { - "name": "Gp0127628_Metagenome Alignment BAM file", - "description": "Metagenome Alignment BAM file for Gp0127628", - "data_object_type": "Assembly Coverage BAM", - "url": "https://data.microbiomedata.org/data/nmdc:mga0x5c381/assembly/nmdc_mga0x5c381_pairedMapped_sorted.bam", - "md5_checksum": "9163caaba1f60d1af9a551559069ca08", - "id": "nmdc:9163caaba1f60d1af9a551559069ca08", - "file_size_bytes": 2743529039 - } + "part_of": [ + "nmdc:sty-11-aygzgv51" + ], + "add_date": "2016-01-11", + "mod_date": "2021-06-15", + "ncbi_project_name": "Riverbed sediment microbial communities from areas with vegetation nearby in Columbia River, Washington, USA - GW-RW S3_10_20", + "omics_type": { + "has_raw_value": "Metagenome" + }, + "principal_investigator": { + "has_raw_value": "James Stegen" + }, + "processing_institution": "JGI", + "type": "nmdc:OmicsProcessing", + "gold_sequencing_project_identifiers": [ + "gold:Gp0127629" ] - }, + } + ], + "reaction_activity_set": [], + "read_qc_analysis_activity_set": [ { "_id": { - "$oid": "649b005bbf2caae0415ef9aa" + "$oid": "649b009d6bdd4fd20273c869" }, "has_input": [ - "nmdc:9e550afb3bcd8d66807f861ecfed815b" + "nmdc:22f8150866c51b35726066d2ec13c5ca" ], "part_of": [ - "nmdc:mga0x5c381" + "nmdc:mga071r920" ], "git_url": "https://github.com/microbiomedata/metaG/releases/tag/0.1", "has_output": [ - "nmdc:9c21fbee23b4098d69ac618d32fe44c3", - "nmdc:c668eaf35e0ebbb7a304271a03dfd3cd", - "nmdc:cf08b19ebb993d895845588d073c02fe", - "nmdc:e110cecd0dcfbefbde06b88e89047c94", - "nmdc:5f393bad4aacf75d348d7e7d5fe00a06", - "nmdc:c8834a004633752f76b91883416c34b8", - "nmdc:adc813c11b8b32e205aa65ab971d4159", - "nmdc:eecb4098ed258acb0820c17e9e308a9d", - "nmdc:cd2cbf38f357d4c7ec5080072e994861", - "nmdc:1e7aefe1539f0dbe510f805a8d0a6930", - "nmdc:29e9378a37cc56837c1343de85993789", - "nmdc:5faeccd78a03acd094263a777faa5fe2" + "nmdc:0db98173ae3395106e24d250b2655f06", + "nmdc:bc0874c01bbd31c644cd598e2fdad3c4" ], - "was_informed_by": "gold:Gp0127628", - "id": "nmdc:a634b0691cf34899d9b09bc4573d4c36", + "was_informed_by": "gold:Gp0127629", + "input_read_count": 23886420, + "output_read_bases": 3395256515, + "id": "nmdc:b82754c2c692809f9e59ff9824278c32", "execution_resource": "NERSC-Cori", - "name": "Annotation Activity for nmdc:mga0x5c381", - "started_at_time": "2021-10-11T02:25:13Z", - "type": "nmdc:MetagenomeAnnotationActivity", - "ended_at_time": "2021-10-11T04:45:59+00:00", - "output_data_objects": [ - { - "name": "Gp0127628_Protein FAA", - "description": "Protein FAA for Gp0127628", - "data_object_type": "Annotation Amino Acid FASTA", - "url": "https://data.microbiomedata.org/data/nmdc:mga0x5c381/annotation/nmdc_mga0x5c381_proteins.faa", - "md5_checksum": "9c21fbee23b4098d69ac618d32fe44c3", - "id": "nmdc:9c21fbee23b4098d69ac618d32fe44c3", - "file_size_bytes": 43551850 - }, - { - "name": "Gp0127628_Structural annotation GFF file", - "description": "Structural annotation GFF file for Gp0127628", - "data_object_type": "Structural Annotation GFF", - "url": "https://data.microbiomedata.org/data/nmdc:mga0x5c381/annotation/nmdc_mga0x5c381_structural_annotation.gff", - "md5_checksum": "c668eaf35e0ebbb7a304271a03dfd3cd", - "id": "nmdc:c668eaf35e0ebbb7a304271a03dfd3cd", - "file_size_bytes": 2518 - }, - { - "name": "Gp0127628_Functional annotation GFF file", - "description": "Functional annotation GFF file for Gp0127628", - "data_object_type": "Functional Annotation GFF", - "url": "https://data.microbiomedata.org/data/nmdc:mga0x5c381/annotation/nmdc_mga0x5c381_functional_annotation.gff", - "md5_checksum": "cf08b19ebb993d895845588d073c02fe", - "id": "nmdc:cf08b19ebb993d895845588d073c02fe", - "file_size_bytes": 50830515 - }, - { - "name": "Gp0127628_KO TSV file", - "description": "KO TSV file for Gp0127628", - "data_object_type": "Annotation KEGG Orthology", - "url": "https://data.microbiomedata.org/data/nmdc:mga0x5c381/annotation/nmdc_mga0x5c381_ko.tsv", - "md5_checksum": "e110cecd0dcfbefbde06b88e89047c94", - "id": "nmdc:e110cecd0dcfbefbde06b88e89047c94", - "file_size_bytes": 5904167 - }, - { - "name": "Gp0127628_EC TSV file", - "description": "EC TSV file for Gp0127628", - "data_object_type": "Annotation Enzyme Commission", - "url": "https://data.microbiomedata.org/data/nmdc:mga0x5c381/annotation/nmdc_mga0x5c381_ec.tsv", - "md5_checksum": "5f393bad4aacf75d348d7e7d5fe00a06", - "id": "nmdc:5f393bad4aacf75d348d7e7d5fe00a06", - "file_size_bytes": 3917008 - }, - { - "name": "Gp0127628_COG GFF file", - "description": "COG GFF file for Gp0127628", - "url": "https://data.microbiomedata.org/data/nmdc:mga0x5c381/annotation/nmdc_mga0x5c381_cog.gff", - "md5_checksum": "c8834a004633752f76b91883416c34b8", - "id": "nmdc:c8834a004633752f76b91883416c34b8", - "file_size_bytes": 29634134 - }, - { - "name": "Gp0127628_PFAM GFF file", - "description": "PFAM GFF file for Gp0127628", - "url": "https://data.microbiomedata.org/data/nmdc:mga0x5c381/annotation/nmdc_mga0x5c381_pfam.gff", - "md5_checksum": "adc813c11b8b32e205aa65ab971d4159", - "id": "nmdc:adc813c11b8b32e205aa65ab971d4159", - "file_size_bytes": 21661208 - }, - { - "name": "Gp0127628_TigrFam GFF file", - "description": "TigrFam GFF file for Gp0127628", - "url": "https://data.microbiomedata.org/data/nmdc:mga0x5c381/annotation/nmdc_mga0x5c381_tigrfam.gff", - "md5_checksum": "eecb4098ed258acb0820c17e9e308a9d", - "id": "nmdc:eecb4098ed258acb0820c17e9e308a9d", - "file_size_bytes": 2198767 - }, - { - "name": "Gp0127628_SMART GFF file", - "description": "SMART GFF file for Gp0127628", - "url": "https://data.microbiomedata.org/data/nmdc:mga0x5c381/annotation/nmdc_mga0x5c381_smart.gff", - "md5_checksum": "cd2cbf38f357d4c7ec5080072e994861", - "id": "nmdc:cd2cbf38f357d4c7ec5080072e994861", - "file_size_bytes": 6281175 - }, - { - "name": "Gp0127628_SuperFam GFF file", - "description": "SuperFam GFF file for Gp0127628", - "url": "https://data.microbiomedata.org/data/nmdc:mga0x5c381/annotation/nmdc_mga0x5c381_supfam.gff", - "md5_checksum": "1e7aefe1539f0dbe510f805a8d0a6930", - "id": "nmdc:1e7aefe1539f0dbe510f805a8d0a6930", - "file_size_bytes": 36891824 - }, - { - "name": "Gp0127628_Cath FunFam GFF file", - "description": "Cath FunFam GFF file for Gp0127628", - "url": "https://data.microbiomedata.org/data/nmdc:mga0x5c381/annotation/nmdc_mga0x5c381_cath_funfam.gff", - "md5_checksum": "29e9378a37cc56837c1343de85993789", - "id": "nmdc:29e9378a37cc56837c1343de85993789", - "file_size_bytes": 27671574 - }, - { - "name": "Gp0127628_KO_EC GFF file", - "description": "KO_EC GFF file for Gp0127628", - "url": "https://data.microbiomedata.org/data/nmdc:mga0x5c381/annotation/nmdc_mga0x5c381_ko_ec.gff", - "md5_checksum": "5faeccd78a03acd094263a777faa5fe2", - "id": "nmdc:5faeccd78a03acd094263a777faa5fe2", - "file_size_bytes": 18790529 - } - ] - }, + "input_read_bases": 3606849420, + "name": "Read QC Activity for nmdc:mga071r920", + "output_read_count": 22738452, + "started_at_time": "2021-10-11T02:23:35Z", + "type": "nmdc:ReadQCAnalysisActivity", + "ended_at_time": "2021-10-11T03:33:33+00:00" + } + ], + "read_based_taxonomy_analysis_activity_set": [ { "_id": { - "$oid": "649b0052ec087f6bbab34705" + "$oid": "649b009bff710ae353f8cf2b" }, "has_input": [ - "nmdc:9e550afb3bcd8d66807f861ecfed815b", - "nmdc:9163caaba1f60d1af9a551559069ca08", - "nmdc:cf08b19ebb993d895845588d073c02fe" - ], - "too_short_contig_num": 151485, - "part_of": [ - "nmdc:mga0x5c381" + "nmdc:0db98173ae3395106e24d250b2655f06" ], - "binned_contig_num": 238, "git_url": "https://github.com/microbiomedata/metaG/releases/tag/0.1", "has_output": [ - "nmdc:d41d8cd98f00b204e9800998ecf8427e", - "nmdc:13137fa415f537d2874808d8c75c1b3d", - "nmdc:196d2699f8fdab4e38c8a638f92093b1", - "nmdc:b67b26f8f76faa347575352000021faf", - "nmdc:166c8a0ad2f4d57e9b16cdc699d56c09", - "nmdc:5ef5ad24cfe3990c0256d420f51f9010" + "nmdc:f4f810491708ff25956cddd005cc9944", + "nmdc:67e3c200d3765733af33d1db1f4bf968", + "nmdc:26cd6390e8362da2ee1d7691360d2dfb", + "nmdc:80fe705d97ef4a0701b1320e9ba19a82", + "nmdc:6a216ec913587e26ddc036b703126d76", + "nmdc:ebed7286f886596764a66a0d1dac3e43", + "nmdc:80dd3584d257e8f84b59118ffd0d5e21", + "nmdc:61b5fe5664ca99f6354c7a5a0222678c", + "nmdc:81108175d5ef2ca158f516bfc75d3cd9" ], - "was_informed_by": "gold:Gp0127628", - "input_contig_num": 157858, - "id": "nmdc:a634b0691cf34899d9b09bc4573d4c36", + "was_informed_by": "gold:Gp0127629", + "id": "nmdc:b82754c2c692809f9e59ff9824278c32", "execution_resource": "NERSC-Cori", - "name": "MAGs Analysis Activity for nmdc:mga0x5c381", - "mags_list": [ - { - "number_of_contig": 238, - "completeness": 30.86, - "bin_name": "bins.1", - "gene_count": 1126, - "bin_quality": "LQ", - "gtdbtk_species": "", - "gtdbtk_order": "", - "num_16s": 0, - "gtdbtk_family": "", - "gtdbtk_domain": "", - "contamination": 0.0, - "gtdbtk_class": "", - "gtdbtk_phylum": "", - "num_5s": 0, - "num_23s": 0, - "gtdbtk_genus": "", - "num_t_rna": 5 - } - ], - "unbinned_contig_num": 6135, - "started_at_time": "2021-10-11T02:25:13Z", - "type": "nmdc:MAGsAnalysisActivity", - "ended_at_time": "2021-10-11T04:45:59+00:00", - "output_data_objects": [ - { - "name": "gold:Gp0452679_metabat2 bin checkm quality assessment result", - "description": "metabat2 bin checkm quality assessment result for gold:Gp0452679", - "data_object_type": "CheckM Statistics", - "url": "https://data.microbiomedata.org/data/nmdc:mga0fsjy84/MAGs/nmdc_mga0fsjy84_checkm_qa.out", - "md5_checksum": "d41d8cd98f00b204e9800998ecf8427e", - "id": "nmdc:d41d8cd98f00b204e9800998ecf8427e", - "file_size_bytes": 0 - }, - { - "name": "Gp0127628_tooShort (< 3kb) filtered contigs fasta file by metaBat2", - "description": "tooShort (< 3kb) filtered contigs fasta file by metaBat2 for Gp0127628", - "url": "https://data.microbiomedata.org/data/nmdc:mga0x5c381/MAGs/nmdc_mga0x5c381_bins.tooShort.fa", - "md5_checksum": "13137fa415f537d2874808d8c75c1b3d", - "id": "nmdc:13137fa415f537d2874808d8c75c1b3d", - "file_size_bytes": 63661919 - }, - { - "name": "Gp0127628_unbinned fasta file from metabat2", - "description": "unbinned fasta file from metabat2 for Gp0127628", - "url": "https://data.microbiomedata.org/data/nmdc:mga0x5c381/MAGs/nmdc_mga0x5c381_bins.unbinned.fa", - "md5_checksum": "196d2699f8fdab4e38c8a638f92093b1", - "id": "nmdc:196d2699f8fdab4e38c8a638f92093b1", - "file_size_bytes": 9649261 - }, - { - "name": "Gp0127628_metabat2 bin checkm quality assessment result", - "description": "metabat2 bin checkm quality assessment result for Gp0127628", - "data_object_type": "CheckM Statistics", - "url": "https://data.microbiomedata.org/data/nmdc:mga0x5c381/MAGs/nmdc_mga0x5c381_checkm_qa.out", - "md5_checksum": "b67b26f8f76faa347575352000021faf", - "id": "nmdc:b67b26f8f76faa347575352000021faf", - "file_size_bytes": 785 - }, - { - "name": "Gp0127628_high-quality and medium-quality bins", - "description": "high-quality and medium-quality bins for Gp0127628", - "data_object_type": "Metagenome Bins", - "url": "https://data.microbiomedata.org/data/nmdc:mga0x5c381/MAGs/nmdc_mga0x5c381_hqmq_bin.zip", - "md5_checksum": "166c8a0ad2f4d57e9b16cdc699d56c09", - "id": "nmdc:166c8a0ad2f4d57e9b16cdc699d56c09", - "file_size_bytes": 182 - }, - { - "name": "Gp0127628_metabat2 bins", - "description": "metabat2 bins for Gp0127628", - "url": "https://data.microbiomedata.org/data/nmdc:mga0x5c381/MAGs/nmdc_mga0x5c381_metabat_bin.zip", - "md5_checksum": "5ef5ad24cfe3990c0256d420f51f9010", - "id": "nmdc:5ef5ad24cfe3990c0256d420f51f9010", - "file_size_bytes": 279359 - } - ] + "name": "ReadBased Analysis Activity for nmdc:mga071r920", + "started_at_time": "2021-10-11T02:23:35Z", + "type": "nmdc:ReadbasedAnalysis", + "ended_at_time": "2021-10-11T03:33:33+00:00" } - ] - }, - { - "_id": { - "$oid": "649b009773e8249959349b40" - }, - "id": "nmdc:omprc-11-k8kt2j31", - "name": "Riverbed sediment microbial communities from areas with vegetation nearby in Columbia River, Washington, USA - GW-RW S1_20_30", - "description": "Riverbed sediment samples were collected from an area with a lot of surrounding vegetation in a hyporheic zone (subsurface groundwater - surface water mixing zone)", - "has_input": [ - "nmdc:bsm-11-4vqhvw07" - ], - "has_output": [ - "jgi:574fde5b7ded5e3df1ee13ff" - ], - "part_of": [ - "nmdc:sty-11-aygzgv51" - ], - "add_date": "2016-01-11", - "mod_date": "2021-06-15", - "ncbi_project_name": "Riverbed sediment microbial communities from areas with vegetation nearby in Columbia River, Washington, USA - GW-RW S1_20_30", - "omics_type": { - "has_raw_value": "Metagenome" - }, - "principal_investigator": { - "has_raw_value": "James Stegen" - }, - "processing_institution": "JGI", - "type": "nmdc:OmicsProcessing", - "gold_sequencing_project_identifiers": [ - "gold:Gp0127631" - ], - "downstream_workflow_activity_records": [ + ], + "study_set": [], + "field_research_site_set": [], + "collecting_biosamples_from_site_set": [], + "date_created": null, + "etl_software_version": null, + "pooling_set": [], + "read_based_analysis_activity_set": [ { "_id": { - "$oid": "649b009d6bdd4fd20273c862" + "$oid": "61e7195d833bcf838a70058b" }, "has_input": [ - "nmdc:9c97e4b734b9cac731fe30fb07a32bb7" + "nmdc:0db98173ae3395106e24d250b2655f06" ], "part_of": [ - "nmdc:mga0jx8k09" + "nmdc:mga071r920" ], "git_url": "https://github.com/microbiomedata/metaG/releases/tag/0.1", "has_output": [ - "nmdc:6969fd7f4b1a5a34fb30d31b92cd6bf8", - "nmdc:b280141d234edf10cde8794539700654" - ], - "was_informed_by": "gold:Gp0127631", - "input_read_count": 26419652, - "output_read_bases": 3798930297, - "id": "nmdc:00a9e34a36f0ea767ff48aa2f411874f", - "execution_resource": "NERSC-Cori", - "input_read_bases": 3989367452, - "name": "Read QC Activity for nmdc:mga0jx8k09", - "output_read_count": 25434840, - "started_at_time": "2021-10-11T02:26:22Z", - "type": "nmdc:ReadQCAnalysisActivity", - "ended_at_time": "2021-10-11T04:40:31+00:00", - "output_data_objects": [ - { - "name": "Gp0127631_Filtered Reads", - "description": "Filtered Reads for Gp0127631", - "data_object_type": "Filtered Sequencing Reads", - "url": "https://data.microbiomedata.org/data/nmdc:mga0jx8k09/qa/nmdc_mga0jx8k09_filtered.fastq.gz", - "md5_checksum": "6969fd7f4b1a5a34fb30d31b92cd6bf8", - "id": "nmdc:6969fd7f4b1a5a34fb30d31b92cd6bf8", - "file_size_bytes": 2030538721 - }, - { - "name": "Gp0127631_Filtered Stats", - "description": "Filtered Stats for Gp0127631", - "data_object_type": "QC Statistics", - "url": "https://data.microbiomedata.org/data/nmdc:mga0jx8k09/qa/nmdc_mga0jx8k09_filterStats.txt", - "md5_checksum": "b280141d234edf10cde8794539700654", - "id": "nmdc:b280141d234edf10cde8794539700654", - "file_size_bytes": 284 - } - ] - }, - { - "_id": { - "$oid": "649b009bff710ae353f8cf22" - }, - "has_input": [ - "nmdc:6969fd7f4b1a5a34fb30d31b92cd6bf8" - ], - "git_url": "https://github.com/microbiomedata/metaG/releases/tag/0.1", - "has_output": [ - "nmdc:b78e8246144185beb95c0caf65ef1f1a", - "nmdc:8875c6ce19e13ed9a88447f2f78bb049", - "nmdc:3b0aee019c772a695bf4cc8f4a390f4e", - "nmdc:0d1729a83798b752f33eeb8d97afe972", - "nmdc:77561a0de3bb8aae04d110429fd9ad0c", - "nmdc:ea27c005b1788434c2198ad60939d4bc", - "nmdc:6a46583da876b9d6287302308df0b9fd", - "nmdc:af619dc5a0423509a4beaca26aa61000", - "nmdc:50093825ec73dcabe66aa353de766beb" + "nmdc:f4f810491708ff25956cddd005cc9944", + "nmdc:67e3c200d3765733af33d1db1f4bf968", + "nmdc:26cd6390e8362da2ee1d7691360d2dfb", + "nmdc:80fe705d97ef4a0701b1320e9ba19a82", + "nmdc:6a216ec913587e26ddc036b703126d76", + "nmdc:ebed7286f886596764a66a0d1dac3e43", + "nmdc:80dd3584d257e8f84b59118ffd0d5e21", + "nmdc:61b5fe5664ca99f6354c7a5a0222678c", + "nmdc:81108175d5ef2ca158f516bfc75d3cd9" ], - "was_informed_by": "gold:Gp0127631", - "id": "nmdc:00a9e34a36f0ea767ff48aa2f411874f", + "was_informed_by": "gold:Gp0127629", + "id": "nmdc:b82754c2c692809f9e59ff9824278c32", "execution_resource": "NERSC-Cori", - "name": "ReadBased Analysis Activity for nmdc:mga0jx8k09", - "started_at_time": "2021-10-11T02:26:22Z", + "name": "ReadBased Analysis Activity for nmdc:mga071r920", + "started_at_time": "2021-10-11T02:23:35Z", "type": "nmdc:ReadbasedAnalysis", - "ended_at_time": "2021-10-11T04:40:31+00:00", - "output_data_objects": [ - { - "name": "Gp0127631_Gottcha2 TSV report", - "description": "Gottcha2 TSV report for Gp0127631", - "url": "https://data.microbiomedata.org/data/nmdc:mga0jx8k09/ReadbasedAnalysis/nmdc_mga0jx8k09_gottcha2_report.tsv", - "md5_checksum": "b78e8246144185beb95c0caf65ef1f1a", - "id": "nmdc:b78e8246144185beb95c0caf65ef1f1a", - "file_size_bytes": 1227 - }, - { - "name": "Gp0127631_Gottcha2 full TSV report", - "description": "Gottcha2 full TSV report for Gp0127631", - "url": "https://data.microbiomedata.org/data/nmdc:mga0jx8k09/ReadbasedAnalysis/nmdc_mga0jx8k09_gottcha2_report_full.tsv", - "md5_checksum": "8875c6ce19e13ed9a88447f2f78bb049", - "id": "nmdc:8875c6ce19e13ed9a88447f2f78bb049", - "file_size_bytes": 647196 - }, - { - "name": "Gp0127631_Gottcha2 Krona HTML report", - "description": "Gottcha2 Krona HTML report for Gp0127631", - "data_object_type": "GOTTCHA2 Krona Plot", - "url": "https://data.microbiomedata.org/data/nmdc:mga0jx8k09/ReadbasedAnalysis/nmdc_mga0jx8k09_gottcha2_krona.html", - "md5_checksum": "3b0aee019c772a695bf4cc8f4a390f4e", - "id": "nmdc:3b0aee019c772a695bf4cc8f4a390f4e", - "file_size_bytes": 229312 - }, - { - "name": "Gp0127631_Centrifuge classification TSV report", - "description": "Centrifuge classification TSV report for Gp0127631", - "data_object_type": "Centrifuge Taxonomic Classification", - "url": "https://data.microbiomedata.org/data/nmdc:mga0jx8k09/ReadbasedAnalysis/nmdc_mga0jx8k09_centrifuge_classification.tsv", - "md5_checksum": "0d1729a83798b752f33eeb8d97afe972", - "id": "nmdc:0d1729a83798b752f33eeb8d97afe972", - "file_size_bytes": 1861431092 - }, - { - "name": "Gp0127631_Centrifuge TSV report", - "description": "Centrifuge TSV report for Gp0127631", - "data_object_type": "Centrifuge Classification Report", - "url": "https://data.microbiomedata.org/data/nmdc:mga0jx8k09/ReadbasedAnalysis/nmdc_mga0jx8k09_centrifuge_report.tsv", - "md5_checksum": "77561a0de3bb8aae04d110429fd9ad0c", - "id": "nmdc:77561a0de3bb8aae04d110429fd9ad0c", - "file_size_bytes": 254665 - }, - { - "name": "Gp0127631_Centrifuge Krona HTML report", - "description": "Centrifuge Krona HTML report for Gp0127631", - "data_object_type": "Centrifuge Krona Plot", - "url": "https://data.microbiomedata.org/data/nmdc:mga0jx8k09/ReadbasedAnalysis/nmdc_mga0jx8k09_centrifuge_krona.html", - "md5_checksum": "ea27c005b1788434c2198ad60939d4bc", - "id": "nmdc:ea27c005b1788434c2198ad60939d4bc", - "file_size_bytes": 2334578 - }, - { - "name": "Gp0127631_Kraken classification TSV report", - "description": "Kraken classification TSV report for Gp0127631", - "data_object_type": "Kraken2 Taxonomic Classification", - "url": "https://data.microbiomedata.org/data/nmdc:mga0jx8k09/ReadbasedAnalysis/nmdc_mga0jx8k09_kraken2_classification.tsv", - "md5_checksum": "6a46583da876b9d6287302308df0b9fd", - "id": "nmdc:6a46583da876b9d6287302308df0b9fd", - "file_size_bytes": 1483354621 - }, - { - "name": "Gp0127631_Kraken2 TSV report", - "description": "Kraken2 TSV report for Gp0127631", - "data_object_type": "Kraken2 Classification Report", - "url": "https://data.microbiomedata.org/data/nmdc:mga0jx8k09/ReadbasedAnalysis/nmdc_mga0jx8k09_kraken2_report.tsv", - "md5_checksum": "af619dc5a0423509a4beaca26aa61000", - "id": "nmdc:af619dc5a0423509a4beaca26aa61000", - "file_size_bytes": 640329 - }, - { - "name": "Gp0127631_Kraken2 Krona HTML report", - "description": "Kraken2 Krona HTML report for Gp0127631", - "data_object_type": "Kraken2 Krona Plot", - "url": "https://data.microbiomedata.org/data/nmdc:mga0jx8k09/ReadbasedAnalysis/nmdc_mga0jx8k09_kraken2_krona.html", - "md5_checksum": "50093825ec73dcabe66aa353de766beb", - "id": "nmdc:50093825ec73dcabe66aa353de766beb", - "file_size_bytes": 3993246 - } - ] + "ended_at_time": "2021-10-11T03:33:33+00:00" + } + ] + }, + { + "functional_annotation_agg": [], + "library_preparation_set": [], + "processed_sample_set": [], + "extraction_set": [], + "activity_set": [], + "biosample_set": [], + "data_object_set": [ + { + "name": "Gp0127628_Filtered Reads", + "description": "Filtered Reads for Gp0127628", + "data_object_type": "Filtered Sequencing Reads", + "url": "https://data.microbiomedata.org/data/nmdc:mga0x5c381/qa/nmdc_mga0x5c381_filtered.fastq.gz", + "md5_checksum": "f6f1760721d73fc57919b2115a1d47ec", + "id": "nmdc:f6f1760721d73fc57919b2115a1d47ec", + "file_size_bytes": 2548975208 + }, + { + "name": "Gp0127628_Filtered Stats", + "description": "Filtered Stats for Gp0127628", + "data_object_type": "QC Statistics", + "url": "https://data.microbiomedata.org/data/nmdc:mga0x5c381/qa/nmdc_mga0x5c381_filterStats.txt", + "md5_checksum": "2225f9d41343590d818186fa2d66852d", + "id": "nmdc:2225f9d41343590d818186fa2d66852d", + "file_size_bytes": 291 + }, + { + "name": "Gp0127628_Gottcha2 TSV report", + "description": "Gottcha2 TSV report for Gp0127628", + "url": "https://data.microbiomedata.org/data/nmdc:mga0x5c381/ReadbasedAnalysis/nmdc_mga0x5c381_gottcha2_report.tsv", + "md5_checksum": "a6ed9af48a9ad473ab66721829a5c226", + "id": "nmdc:a6ed9af48a9ad473ab66721829a5c226", + "file_size_bytes": 3472 + }, + { + "name": "Gp0127628_Gottcha2 full TSV report", + "description": "Gottcha2 full TSV report for Gp0127628", + "url": "https://data.microbiomedata.org/data/nmdc:mga0x5c381/ReadbasedAnalysis/nmdc_mga0x5c381_gottcha2_report_full.tsv", + "md5_checksum": "335dbf6f1055de0950988a002f432c0b", + "id": "nmdc:335dbf6f1055de0950988a002f432c0b", + "file_size_bytes": 863867 + }, + { + "name": "Gp0127628_Gottcha2 Krona HTML report", + "description": "Gottcha2 Krona HTML report for Gp0127628", + "data_object_type": "GOTTCHA2 Krona Plot", + "url": "https://data.microbiomedata.org/data/nmdc:mga0x5c381/ReadbasedAnalysis/nmdc_mga0x5c381_gottcha2_krona.html", + "md5_checksum": "35da19bc0e50db1f9a02fe1550d1df0e", + "id": "nmdc:35da19bc0e50db1f9a02fe1550d1df0e", + "file_size_bytes": 234974 + }, + { + "name": "Gp0127628_Centrifuge classification TSV report", + "description": "Centrifuge classification TSV report for Gp0127628", + "data_object_type": "Centrifuge Taxonomic Classification", + "url": "https://data.microbiomedata.org/data/nmdc:mga0x5c381/ReadbasedAnalysis/nmdc_mga0x5c381_centrifuge_classification.tsv", + "md5_checksum": "224085164a389c6f207967ed03b3e6af", + "id": "nmdc:224085164a389c6f207967ed03b3e6af", + "file_size_bytes": 2220789142 + }, + { + "name": "Gp0127628_Centrifuge TSV report", + "description": "Centrifuge TSV report for Gp0127628", + "data_object_type": "Centrifuge Classification Report", + "url": "https://data.microbiomedata.org/data/nmdc:mga0x5c381/ReadbasedAnalysis/nmdc_mga0x5c381_centrifuge_report.tsv", + "md5_checksum": "39ba17263c144761a8bdcc1645c034f5", + "id": "nmdc:39ba17263c144761a8bdcc1645c034f5", + "file_size_bytes": 257030 + }, + { + "name": "Gp0127628_Centrifuge Krona HTML report", + "description": "Centrifuge Krona HTML report for Gp0127628", + "data_object_type": "Centrifuge Krona Plot", + "url": "https://data.microbiomedata.org/data/nmdc:mga0x5c381/ReadbasedAnalysis/nmdc_mga0x5c381_centrifuge_krona.html", + "md5_checksum": "84debc9bd1c09328d60f073d7fc2db4f", + "id": "nmdc:84debc9bd1c09328d60f073d7fc2db4f", + "file_size_bytes": 2337568 + }, + { + "name": "Gp0127628_Kraken classification TSV report", + "description": "Kraken classification TSV report for Gp0127628", + "data_object_type": "Kraken2 Taxonomic Classification", + "url": "https://data.microbiomedata.org/data/nmdc:mga0x5c381/ReadbasedAnalysis/nmdc_mga0x5c381_kraken2_classification.tsv", + "md5_checksum": "8f75800abbcf5a94043ad677d7cb975c", + "id": "nmdc:8f75800abbcf5a94043ad677d7cb975c", + "file_size_bytes": 1776487262 + }, + { + "name": "Gp0127628_Kraken2 TSV report", + "description": "Kraken2 TSV report for Gp0127628", + "data_object_type": "Kraken2 Classification Report", + "url": "https://data.microbiomedata.org/data/nmdc:mga0x5c381/ReadbasedAnalysis/nmdc_mga0x5c381_kraken2_report.tsv", + "md5_checksum": "aae9e961d8ed716457616c8a8841037b", + "id": "nmdc:aae9e961d8ed716457616c8a8841037b", + "file_size_bytes": 664011 + }, + { + "name": "Gp0127628_Kraken2 Krona HTML report", + "description": "Kraken2 Krona HTML report for Gp0127628", + "data_object_type": "Kraken2 Krona Plot", + "url": "https://data.microbiomedata.org/data/nmdc:mga0x5c381/ReadbasedAnalysis/nmdc_mga0x5c381_kraken2_krona.html", + "md5_checksum": "ba83d6ab837403f4bcbc9400a0460457", + "id": "nmdc:ba83d6ab837403f4bcbc9400a0460457", + "file_size_bytes": 4035375 + }, + { + "name": "Gp0127628_Gottcha2 TSV report", + "description": "Gottcha2 TSV report for Gp0127628", + "url": "https://data.microbiomedata.org/data/nmdc:mga0x5c381/ReadbasedAnalysis/nmdc_mga0x5c381_gottcha2_report.tsv", + "md5_checksum": "a6ed9af48a9ad473ab66721829a5c226", + "id": "nmdc:a6ed9af48a9ad473ab66721829a5c226", + "file_size_bytes": 3472 + }, + { + "name": "Gp0127628_Gottcha2 full TSV report", + "description": "Gottcha2 full TSV report for Gp0127628", + "url": "https://data.microbiomedata.org/data/nmdc:mga0x5c381/ReadbasedAnalysis/nmdc_mga0x5c381_gottcha2_report_full.tsv", + "md5_checksum": "335dbf6f1055de0950988a002f432c0b", + "id": "nmdc:335dbf6f1055de0950988a002f432c0b", + "file_size_bytes": 863867 + }, + { + "name": "Gp0127628_Gottcha2 Krona HTML report", + "description": "Gottcha2 Krona HTML report for Gp0127628", + "data_object_type": "GOTTCHA2 Krona Plot", + "url": "https://data.microbiomedata.org/data/nmdc:mga0x5c381/ReadbasedAnalysis/nmdc_mga0x5c381_gottcha2_krona.html", + "md5_checksum": "35da19bc0e50db1f9a02fe1550d1df0e", + "id": "nmdc:35da19bc0e50db1f9a02fe1550d1df0e", + "file_size_bytes": 234974 + }, + { + "name": "Gp0127628_Centrifuge classification TSV report", + "description": "Centrifuge classification TSV report for Gp0127628", + "data_object_type": "Centrifuge Taxonomic Classification", + "url": "https://data.microbiomedata.org/data/nmdc:mga0x5c381/ReadbasedAnalysis/nmdc_mga0x5c381_centrifuge_classification.tsv", + "md5_checksum": "224085164a389c6f207967ed03b3e6af", + "id": "nmdc:224085164a389c6f207967ed03b3e6af", + "file_size_bytes": 2220789142 + }, + { + "name": "Gp0127628_Centrifuge TSV report", + "description": "Centrifuge TSV report for Gp0127628", + "data_object_type": "Centrifuge Classification Report", + "url": "https://data.microbiomedata.org/data/nmdc:mga0x5c381/ReadbasedAnalysis/nmdc_mga0x5c381_centrifuge_report.tsv", + "md5_checksum": "39ba17263c144761a8bdcc1645c034f5", + "id": "nmdc:39ba17263c144761a8bdcc1645c034f5", + "file_size_bytes": 257030 + }, + { + "name": "Gp0127628_Centrifuge Krona HTML report", + "description": "Centrifuge Krona HTML report for Gp0127628", + "data_object_type": "Centrifuge Krona Plot", + "url": "https://data.microbiomedata.org/data/nmdc:mga0x5c381/ReadbasedAnalysis/nmdc_mga0x5c381_centrifuge_krona.html", + "md5_checksum": "84debc9bd1c09328d60f073d7fc2db4f", + "id": "nmdc:84debc9bd1c09328d60f073d7fc2db4f", + "file_size_bytes": 2337568 + }, + { + "name": "Gp0127628_Kraken classification TSV report", + "description": "Kraken classification TSV report for Gp0127628", + "data_object_type": "Kraken2 Taxonomic Classification", + "url": "https://data.microbiomedata.org/data/nmdc:mga0x5c381/ReadbasedAnalysis/nmdc_mga0x5c381_kraken2_classification.tsv", + "md5_checksum": "8f75800abbcf5a94043ad677d7cb975c", + "id": "nmdc:8f75800abbcf5a94043ad677d7cb975c", + "file_size_bytes": 1776487262 + }, + { + "name": "Gp0127628_Kraken2 TSV report", + "description": "Kraken2 TSV report for Gp0127628", + "data_object_type": "Kraken2 Classification Report", + "url": "https://data.microbiomedata.org/data/nmdc:mga0x5c381/ReadbasedAnalysis/nmdc_mga0x5c381_kraken2_report.tsv", + "md5_checksum": "aae9e961d8ed716457616c8a8841037b", + "id": "nmdc:aae9e961d8ed716457616c8a8841037b", + "file_size_bytes": 664011 + }, + { + "name": "Gp0127628_Kraken2 Krona HTML report", + "description": "Kraken2 Krona HTML report for Gp0127628", + "data_object_type": "Kraken2 Krona Plot", + "url": "https://data.microbiomedata.org/data/nmdc:mga0x5c381/ReadbasedAnalysis/nmdc_mga0x5c381_kraken2_krona.html", + "md5_checksum": "ba83d6ab837403f4bcbc9400a0460457", + "id": "nmdc:ba83d6ab837403f4bcbc9400a0460457", + "file_size_bytes": 4035375 + }, + { + "name": "Gp0127628_Assembled contigs fasta", + "description": "Assembled contigs fasta for Gp0127628", + "data_object_type": "Assembly Contigs", + "url": "https://data.microbiomedata.org/data/nmdc:mga0x5c381/assembly/nmdc_mga0x5c381_contigs.fna", + "md5_checksum": "9e550afb3bcd8d66807f861ecfed815b", + "id": "nmdc:9e550afb3bcd8d66807f861ecfed815b", + "file_size_bytes": 74277737 + }, + { + "name": "Gp0127628_Assembled scaffold fasta", + "description": "Assembled scaffold fasta for Gp0127628", + "data_object_type": "Assembly Scaffolds", + "url": "https://data.microbiomedata.org/data/nmdc:mga0x5c381/assembly/nmdc_mga0x5c381_scaffolds.fna", + "md5_checksum": "5e79fce62ffa8c4479be5159143797e0", + "id": "nmdc:5e79fce62ffa8c4479be5159143797e0", + "file_size_bytes": 73802989 + }, + { + "name": "Gp0127628_Metagenome Contig Coverage Stats", + "description": "Metagenome Contig Coverage Stats for Gp0127628", + "url": "https://data.microbiomedata.org/data/nmdc:mga0x5c381/assembly/nmdc_mga0x5c381_covstats.txt", + "md5_checksum": "682fd042d6adcd93f75c3eae2cf32241", + "id": "nmdc:682fd042d6adcd93f75c3eae2cf32241", + "file_size_bytes": 12462125 + }, + { + "name": "Gp0127628_Assembled AGP file", + "description": "Assembled AGP file for Gp0127628", + "data_object_type": "Assembly AGP", + "url": "https://data.microbiomedata.org/data/nmdc:mga0x5c381/assembly/nmdc_mga0x5c381_assembly.agp", + "md5_checksum": "9d607ebd92ad5bcbaaa405884d4a83a3", + "id": "nmdc:9d607ebd92ad5bcbaaa405884d4a83a3", + "file_size_bytes": 11636352 + }, + { + "name": "Gp0127628_Metagenome Alignment BAM file", + "description": "Metagenome Alignment BAM file for Gp0127628", + "data_object_type": "Assembly Coverage BAM", + "url": "https://data.microbiomedata.org/data/nmdc:mga0x5c381/assembly/nmdc_mga0x5c381_pairedMapped_sorted.bam", + "md5_checksum": "9163caaba1f60d1af9a551559069ca08", + "id": "nmdc:9163caaba1f60d1af9a551559069ca08", + "file_size_bytes": 2743529039 + }, + { + "name": "Gp0127628_Protein FAA", + "description": "Protein FAA for Gp0127628", + "data_object_type": "Annotation Amino Acid FASTA", + "url": "https://data.microbiomedata.org/data/nmdc:mga0x5c381/annotation/nmdc_mga0x5c381_proteins.faa", + "md5_checksum": "9c21fbee23b4098d69ac618d32fe44c3", + "id": "nmdc:9c21fbee23b4098d69ac618d32fe44c3", + "file_size_bytes": 43551850 + }, + { + "name": "Gp0127628_Structural annotation GFF file", + "description": "Structural annotation GFF file for Gp0127628", + "data_object_type": "Structural Annotation GFF", + "url": "https://data.microbiomedata.org/data/nmdc:mga0x5c381/annotation/nmdc_mga0x5c381_structural_annotation.gff", + "md5_checksum": "c668eaf35e0ebbb7a304271a03dfd3cd", + "id": "nmdc:c668eaf35e0ebbb7a304271a03dfd3cd", + "file_size_bytes": 2518 + }, + { + "name": "Gp0127628_Functional annotation GFF file", + "description": "Functional annotation GFF file for Gp0127628", + "data_object_type": "Functional Annotation GFF", + "url": "https://data.microbiomedata.org/data/nmdc:mga0x5c381/annotation/nmdc_mga0x5c381_functional_annotation.gff", + "md5_checksum": "cf08b19ebb993d895845588d073c02fe", + "id": "nmdc:cf08b19ebb993d895845588d073c02fe", + "file_size_bytes": 50830515 }, + { + "name": "Gp0127628_KO TSV file", + "description": "KO TSV file for Gp0127628", + "data_object_type": "Annotation KEGG Orthology", + "url": "https://data.microbiomedata.org/data/nmdc:mga0x5c381/annotation/nmdc_mga0x5c381_ko.tsv", + "md5_checksum": "e110cecd0dcfbefbde06b88e89047c94", + "id": "nmdc:e110cecd0dcfbefbde06b88e89047c94", + "file_size_bytes": 5904167 + }, + { + "name": "Gp0127628_EC TSV file", + "description": "EC TSV file for Gp0127628", + "data_object_type": "Annotation Enzyme Commission", + "url": "https://data.microbiomedata.org/data/nmdc:mga0x5c381/annotation/nmdc_mga0x5c381_ec.tsv", + "md5_checksum": "5f393bad4aacf75d348d7e7d5fe00a06", + "id": "nmdc:5f393bad4aacf75d348d7e7d5fe00a06", + "file_size_bytes": 3917008 + }, + { + "name": "Gp0127628_COG GFF file", + "description": "COG GFF file for Gp0127628", + "url": "https://data.microbiomedata.org/data/nmdc:mga0x5c381/annotation/nmdc_mga0x5c381_cog.gff", + "md5_checksum": "c8834a004633752f76b91883416c34b8", + "id": "nmdc:c8834a004633752f76b91883416c34b8", + "file_size_bytes": 29634134 + }, + { + "name": "Gp0127628_PFAM GFF file", + "description": "PFAM GFF file for Gp0127628", + "url": "https://data.microbiomedata.org/data/nmdc:mga0x5c381/annotation/nmdc_mga0x5c381_pfam.gff", + "md5_checksum": "adc813c11b8b32e205aa65ab971d4159", + "id": "nmdc:adc813c11b8b32e205aa65ab971d4159", + "file_size_bytes": 21661208 + }, + { + "name": "Gp0127628_TigrFam GFF file", + "description": "TigrFam GFF file for Gp0127628", + "url": "https://data.microbiomedata.org/data/nmdc:mga0x5c381/annotation/nmdc_mga0x5c381_tigrfam.gff", + "md5_checksum": "eecb4098ed258acb0820c17e9e308a9d", + "id": "nmdc:eecb4098ed258acb0820c17e9e308a9d", + "file_size_bytes": 2198767 + }, + { + "name": "Gp0127628_SMART GFF file", + "description": "SMART GFF file for Gp0127628", + "url": "https://data.microbiomedata.org/data/nmdc:mga0x5c381/annotation/nmdc_mga0x5c381_smart.gff", + "md5_checksum": "cd2cbf38f357d4c7ec5080072e994861", + "id": "nmdc:cd2cbf38f357d4c7ec5080072e994861", + "file_size_bytes": 6281175 + }, + { + "name": "Gp0127628_SuperFam GFF file", + "description": "SuperFam GFF file for Gp0127628", + "url": "https://data.microbiomedata.org/data/nmdc:mga0x5c381/annotation/nmdc_mga0x5c381_supfam.gff", + "md5_checksum": "1e7aefe1539f0dbe510f805a8d0a6930", + "id": "nmdc:1e7aefe1539f0dbe510f805a8d0a6930", + "file_size_bytes": 36891824 + }, + { + "name": "Gp0127628_Cath FunFam GFF file", + "description": "Cath FunFam GFF file for Gp0127628", + "url": "https://data.microbiomedata.org/data/nmdc:mga0x5c381/annotation/nmdc_mga0x5c381_cath_funfam.gff", + "md5_checksum": "29e9378a37cc56837c1343de85993789", + "id": "nmdc:29e9378a37cc56837c1343de85993789", + "file_size_bytes": 27671574 + }, + { + "name": "Gp0127628_KO_EC GFF file", + "description": "KO_EC GFF file for Gp0127628", + "url": "https://data.microbiomedata.org/data/nmdc:mga0x5c381/annotation/nmdc_mga0x5c381_ko_ec.gff", + "md5_checksum": "5faeccd78a03acd094263a777faa5fe2", + "id": "nmdc:5faeccd78a03acd094263a777faa5fe2", + "file_size_bytes": 18790529 + }, + { + "name": "gold:Gp0452679_metabat2 bin checkm quality assessment result", + "description": "metabat2 bin checkm quality assessment result for gold:Gp0452679", + "data_object_type": "CheckM Statistics", + "url": "https://data.microbiomedata.org/data/nmdc:mga0fsjy84/MAGs/nmdc_mga0fsjy84_checkm_qa.out", + "md5_checksum": "d41d8cd98f00b204e9800998ecf8427e", + "id": "nmdc:d41d8cd98f00b204e9800998ecf8427e", + "file_size_bytes": 0 + }, + { + "name": "Gp0127628_tooShort (< 3kb) filtered contigs fasta file by metaBat2", + "description": "tooShort (< 3kb) filtered contigs fasta file by metaBat2 for Gp0127628", + "url": "https://data.microbiomedata.org/data/nmdc:mga0x5c381/MAGs/nmdc_mga0x5c381_bins.tooShort.fa", + "md5_checksum": "13137fa415f537d2874808d8c75c1b3d", + "id": "nmdc:13137fa415f537d2874808d8c75c1b3d", + "file_size_bytes": 63661919 + }, + { + "name": "Gp0127628_unbinned fasta file from metabat2", + "description": "unbinned fasta file from metabat2 for Gp0127628", + "url": "https://data.microbiomedata.org/data/nmdc:mga0x5c381/MAGs/nmdc_mga0x5c381_bins.unbinned.fa", + "md5_checksum": "196d2699f8fdab4e38c8a638f92093b1", + "id": "nmdc:196d2699f8fdab4e38c8a638f92093b1", + "file_size_bytes": 9649261 + }, + { + "name": "Gp0127628_metabat2 bin checkm quality assessment result", + "description": "metabat2 bin checkm quality assessment result for Gp0127628", + "data_object_type": "CheckM Statistics", + "url": "https://data.microbiomedata.org/data/nmdc:mga0x5c381/MAGs/nmdc_mga0x5c381_checkm_qa.out", + "md5_checksum": "b67b26f8f76faa347575352000021faf", + "id": "nmdc:b67b26f8f76faa347575352000021faf", + "file_size_bytes": 785 + }, + { + "name": "Gp0127628_high-quality and medium-quality bins", + "description": "high-quality and medium-quality bins for Gp0127628", + "data_object_type": "Metagenome Bins", + "url": "https://data.microbiomedata.org/data/nmdc:mga0x5c381/MAGs/nmdc_mga0x5c381_hqmq_bin.zip", + "md5_checksum": "166c8a0ad2f4d57e9b16cdc699d56c09", + "id": "nmdc:166c8a0ad2f4d57e9b16cdc699d56c09", + "file_size_bytes": 182 + }, + { + "name": "Gp0127628_metabat2 bins", + "description": "metabat2 bins for Gp0127628", + "url": "https://data.microbiomedata.org/data/nmdc:mga0x5c381/MAGs/nmdc_mga0x5c381_metabat_bin.zip", + "md5_checksum": "5ef5ad24cfe3990c0256d420f51f9010", + "id": "nmdc:5ef5ad24cfe3990c0256d420f51f9010", + "file_size_bytes": 279359 + } + ], + "dissolving_activity_set": [], + "functional_annotation_set": [], + "genome_feature_set": [], + "mags_activity_set": [ { "_id": { - "$oid": "61e71938833bcf838a6ffe7a" + "$oid": "649b0052ec087f6bbab34705" }, "has_input": [ - "nmdc:6969fd7f4b1a5a34fb30d31b92cd6bf8" + "nmdc:9e550afb3bcd8d66807f861ecfed815b", + "nmdc:9163caaba1f60d1af9a551559069ca08", + "nmdc:cf08b19ebb993d895845588d073c02fe" ], + "too_short_contig_num": 151485, "part_of": [ - "nmdc:mga0jx8k09" + "nmdc:mga0x5c381" ], + "binned_contig_num": 238, "git_url": "https://github.com/microbiomedata/metaG/releases/tag/0.1", "has_output": [ - "nmdc:b78e8246144185beb95c0caf65ef1f1a", - "nmdc:8875c6ce19e13ed9a88447f2f78bb049", - "nmdc:3b0aee019c772a695bf4cc8f4a390f4e", - "nmdc:0d1729a83798b752f33eeb8d97afe972", - "nmdc:77561a0de3bb8aae04d110429fd9ad0c", - "nmdc:ea27c005b1788434c2198ad60939d4bc", - "nmdc:6a46583da876b9d6287302308df0b9fd", - "nmdc:af619dc5a0423509a4beaca26aa61000", - "nmdc:50093825ec73dcabe66aa353de766beb" + "nmdc:d41d8cd98f00b204e9800998ecf8427e", + "nmdc:13137fa415f537d2874808d8c75c1b3d", + "nmdc:196d2699f8fdab4e38c8a638f92093b1", + "nmdc:b67b26f8f76faa347575352000021faf", + "nmdc:166c8a0ad2f4d57e9b16cdc699d56c09", + "nmdc:5ef5ad24cfe3990c0256d420f51f9010" ], - "was_informed_by": "gold:Gp0127631", - "id": "nmdc:00a9e34a36f0ea767ff48aa2f411874f", + "was_informed_by": "gold:Gp0127628", + "input_contig_num": 157858, + "id": "nmdc:a634b0691cf34899d9b09bc4573d4c36", "execution_resource": "NERSC-Cori", - "name": "ReadBased Analysis Activity for nmdc:mga0jx8k09", - "started_at_time": "2021-10-11T02:26:22Z", - "type": "nmdc:ReadbasedAnalysis", - "ended_at_time": "2021-10-11T04:40:31+00:00", - "output_data_objects": [ - { - "name": "Gp0127631_Gottcha2 TSV report", - "description": "Gottcha2 TSV report for Gp0127631", - "url": "https://data.microbiomedata.org/data/nmdc:mga0jx8k09/ReadbasedAnalysis/nmdc_mga0jx8k09_gottcha2_report.tsv", - "md5_checksum": "b78e8246144185beb95c0caf65ef1f1a", - "id": "nmdc:b78e8246144185beb95c0caf65ef1f1a", - "file_size_bytes": 1227 - }, - { - "name": "Gp0127631_Gottcha2 full TSV report", - "description": "Gottcha2 full TSV report for Gp0127631", - "url": "https://data.microbiomedata.org/data/nmdc:mga0jx8k09/ReadbasedAnalysis/nmdc_mga0jx8k09_gottcha2_report_full.tsv", - "md5_checksum": "8875c6ce19e13ed9a88447f2f78bb049", - "id": "nmdc:8875c6ce19e13ed9a88447f2f78bb049", - "file_size_bytes": 647196 - }, - { - "name": "Gp0127631_Gottcha2 Krona HTML report", - "description": "Gottcha2 Krona HTML report for Gp0127631", - "data_object_type": "GOTTCHA2 Krona Plot", - "url": "https://data.microbiomedata.org/data/nmdc:mga0jx8k09/ReadbasedAnalysis/nmdc_mga0jx8k09_gottcha2_krona.html", - "md5_checksum": "3b0aee019c772a695bf4cc8f4a390f4e", - "id": "nmdc:3b0aee019c772a695bf4cc8f4a390f4e", - "file_size_bytes": 229312 - }, - { - "name": "Gp0127631_Centrifuge classification TSV report", - "description": "Centrifuge classification TSV report for Gp0127631", - "data_object_type": "Centrifuge Taxonomic Classification", - "url": "https://data.microbiomedata.org/data/nmdc:mga0jx8k09/ReadbasedAnalysis/nmdc_mga0jx8k09_centrifuge_classification.tsv", - "md5_checksum": "0d1729a83798b752f33eeb8d97afe972", - "id": "nmdc:0d1729a83798b752f33eeb8d97afe972", - "file_size_bytes": 1861431092 - }, - { - "name": "Gp0127631_Centrifuge TSV report", - "description": "Centrifuge TSV report for Gp0127631", - "data_object_type": "Centrifuge Classification Report", - "url": "https://data.microbiomedata.org/data/nmdc:mga0jx8k09/ReadbasedAnalysis/nmdc_mga0jx8k09_centrifuge_report.tsv", - "md5_checksum": "77561a0de3bb8aae04d110429fd9ad0c", - "id": "nmdc:77561a0de3bb8aae04d110429fd9ad0c", - "file_size_bytes": 254665 - }, - { - "name": "Gp0127631_Centrifuge Krona HTML report", - "description": "Centrifuge Krona HTML report for Gp0127631", - "data_object_type": "Centrifuge Krona Plot", - "url": "https://data.microbiomedata.org/data/nmdc:mga0jx8k09/ReadbasedAnalysis/nmdc_mga0jx8k09_centrifuge_krona.html", - "md5_checksum": "ea27c005b1788434c2198ad60939d4bc", - "id": "nmdc:ea27c005b1788434c2198ad60939d4bc", - "file_size_bytes": 2334578 - }, - { - "name": "Gp0127631_Kraken classification TSV report", - "description": "Kraken classification TSV report for Gp0127631", - "data_object_type": "Kraken2 Taxonomic Classification", - "url": "https://data.microbiomedata.org/data/nmdc:mga0jx8k09/ReadbasedAnalysis/nmdc_mga0jx8k09_kraken2_classification.tsv", - "md5_checksum": "6a46583da876b9d6287302308df0b9fd", - "id": "nmdc:6a46583da876b9d6287302308df0b9fd", - "file_size_bytes": 1483354621 - }, - { - "name": "Gp0127631_Kraken2 TSV report", - "description": "Kraken2 TSV report for Gp0127631", - "data_object_type": "Kraken2 Classification Report", - "url": "https://data.microbiomedata.org/data/nmdc:mga0jx8k09/ReadbasedAnalysis/nmdc_mga0jx8k09_kraken2_report.tsv", - "md5_checksum": "af619dc5a0423509a4beaca26aa61000", - "id": "nmdc:af619dc5a0423509a4beaca26aa61000", - "file_size_bytes": 640329 - }, + "name": "MAGs Analysis Activity for nmdc:mga0x5c381", + "mags_list": [ { - "name": "Gp0127631_Kraken2 Krona HTML report", - "description": "Kraken2 Krona HTML report for Gp0127631", - "data_object_type": "Kraken2 Krona Plot", - "url": "https://data.microbiomedata.org/data/nmdc:mga0jx8k09/ReadbasedAnalysis/nmdc_mga0jx8k09_kraken2_krona.html", - "md5_checksum": "50093825ec73dcabe66aa353de766beb", - "id": "nmdc:50093825ec73dcabe66aa353de766beb", - "file_size_bytes": 3993246 + "number_of_contig": 238, + "completeness": 30.86, + "bin_name": "bins.1", + "gene_count": 1126, + "bin_quality": "LQ", + "gtdbtk_species": "", + "gtdbtk_order": "", + "num_16s": 0, + "gtdbtk_family": "", + "gtdbtk_domain": "", + "contamination": 0.0, + "gtdbtk_class": "", + "gtdbtk_phylum": "", + "num_5s": 0, + "num_23s": 0, + "gtdbtk_genus": "", + "num_t_rna": 5 } - ] - }, + ], + "unbinned_contig_num": 6135, + "started_at_time": "2021-10-11T02:25:13Z", + "type": "nmdc:MAGsAnalysisActivity", + "ended_at_time": "2021-10-11T04:45:59+00:00" + } + ], + "material_sample_set": [], + "material_sampling_activity_set": [], + "metabolomics_analysis_activity_set": [], + "metagenome_annotation_activity_set": [ { "_id": { - "$oid": "649b005f2ca5ee4adb139f94" + "$oid": "649b005bbf2caae0415ef9aa" }, "has_input": [ - "nmdc:6969fd7f4b1a5a34fb30d31b92cd6bf8" + "nmdc:9e550afb3bcd8d66807f861ecfed815b" ], "part_of": [ - "nmdc:mga0jx8k09" + "nmdc:mga0x5c381" ], - "ctg_logsum": 306128, - "scaf_logsum": 307525, - "gap_pct": 0.00196, "git_url": "https://github.com/microbiomedata/metaG/releases/tag/0.1", "has_output": [ - "nmdc:1eb44ff780f2aad1053ca336b53d7b98", - "nmdc:992fb303b5ced60489fea0ce6dae71f9", - "nmdc:a0f466071ed249babf1a5653e1c20a02", - "nmdc:5eddebfbfabfd9c0e71c2699bee73870", - "nmdc:0ecd5e99ec93ba17c7b02483560bafdf" - ], - "asm_score": 3.117, - "was_informed_by": "gold:Gp0127631", - "ctg_powsum": 32898, - "scaf_max": 14244, - "id": "nmdc:00a9e34a36f0ea767ff48aa2f411874f", - "scaf_powsum": 33057, + "nmdc:9c21fbee23b4098d69ac618d32fe44c3", + "nmdc:c668eaf35e0ebbb7a304271a03dfd3cd", + "nmdc:cf08b19ebb993d895845588d073c02fe", + "nmdc:e110cecd0dcfbefbde06b88e89047c94", + "nmdc:5f393bad4aacf75d348d7e7d5fe00a06", + "nmdc:c8834a004633752f76b91883416c34b8", + "nmdc:adc813c11b8b32e205aa65ab971d4159", + "nmdc:eecb4098ed258acb0820c17e9e308a9d", + "nmdc:cd2cbf38f357d4c7ec5080072e994861", + "nmdc:1e7aefe1539f0dbe510f805a8d0a6930", + "nmdc:29e9378a37cc56837c1343de85993789", + "nmdc:5faeccd78a03acd094263a777faa5fe2" + ], + "was_informed_by": "gold:Gp0127628", + "id": "nmdc:a634b0691cf34899d9b09bc4573d4c36", "execution_resource": "NERSC-Cori", - "contigs": 237399, - "name": "Assembly Activity for nmdc:mga0jx8k09", - "ctg_max": 14244, - "gc_std": 0.09594, - "contig_bp": 119367623, - "gc_avg": 0.62364, - "started_at_time": "2021-10-11T02:26:22Z", - "scaf_bp": 119369963, - "type": "nmdc:MetagenomeAssembly", - "scaffolds": 237183, - "ended_at_time": "2021-10-11T04:40:31+00:00", - "ctg_l50": 499, - "ctg_l90": 292, - "ctg_n50": 64310, - "ctg_n90": 195626, - "scaf_l50": 500, - "scaf_l90": 292, - "scaf_n50": 64017, - "scaf_n90": 195424, - "output_data_objects": [ - { - "name": "Gp0127631_Assembled contigs fasta", - "description": "Assembled contigs fasta for Gp0127631", - "data_object_type": "Assembly Contigs", - "url": "https://data.microbiomedata.org/data/nmdc:mga0jx8k09/assembly/nmdc_mga0jx8k09_contigs.fna", - "md5_checksum": "1eb44ff780f2aad1053ca336b53d7b98", - "id": "nmdc:1eb44ff780f2aad1053ca336b53d7b98", - "file_size_bytes": 128714098 - }, - { - "name": "Gp0127631_Assembled scaffold fasta", - "description": "Assembled scaffold fasta for Gp0127631", - "data_object_type": "Assembly Scaffolds", - "url": "https://data.microbiomedata.org/data/nmdc:mga0jx8k09/assembly/nmdc_mga0jx8k09_scaffolds.fna", - "md5_checksum": "992fb303b5ced60489fea0ce6dae71f9", - "id": "nmdc:992fb303b5ced60489fea0ce6dae71f9", - "file_size_bytes": 127998496 - }, - { - "name": "Gp0127631_Metagenome Contig Coverage Stats", - "description": "Metagenome Contig Coverage Stats for Gp0127631", - "url": "https://data.microbiomedata.org/data/nmdc:mga0jx8k09/assembly/nmdc_mga0jx8k09_covstats.txt", - "md5_checksum": "a0f466071ed249babf1a5653e1c20a02", - "id": "nmdc:a0f466071ed249babf1a5653e1c20a02", - "file_size_bytes": 18831462 - }, - { - "name": "Gp0127631_Assembled AGP file", - "description": "Assembled AGP file for Gp0127631", - "data_object_type": "Assembly AGP", - "url": "https://data.microbiomedata.org/data/nmdc:mga0jx8k09/assembly/nmdc_mga0jx8k09_assembly.agp", - "md5_checksum": "5eddebfbfabfd9c0e71c2699bee73870", - "id": "nmdc:5eddebfbfabfd9c0e71c2699bee73870", - "file_size_bytes": 17634272 - }, - { - "name": "Gp0127631_Metagenome Alignment BAM file", - "description": "Metagenome Alignment BAM file for Gp0127631", - "data_object_type": "Assembly Coverage BAM", - "url": "https://data.microbiomedata.org/data/nmdc:mga0jx8k09/assembly/nmdc_mga0jx8k09_pairedMapped_sorted.bam", - "md5_checksum": "0ecd5e99ec93ba17c7b02483560bafdf", - "id": "nmdc:0ecd5e99ec93ba17c7b02483560bafdf", - "file_size_bytes": 2245356551 - } - ] - }, + "name": "Annotation Activity for nmdc:mga0x5c381", + "started_at_time": "2021-10-11T02:25:13Z", + "type": "nmdc:MetagenomeAnnotationActivity", + "ended_at_time": "2021-10-11T04:45:59+00:00" + } + ], + "metagenome_assembly_set": [ { "_id": { - "$oid": "649b005bbf2caae0415ef9a8" + "$oid": "649b005f2ca5ee4adb139fae" }, "has_input": [ - "nmdc:1eb44ff780f2aad1053ca336b53d7b98" + "nmdc:f6f1760721d73fc57919b2115a1d47ec" ], "part_of": [ - "nmdc:mga0jx8k09" + "nmdc:mga0x5c381" ], + "ctg_logsum": 110768, + "scaf_logsum": 111226, + "gap_pct": 0.00124, "git_url": "https://github.com/microbiomedata/metaG/releases/tag/0.1", "has_output": [ - "nmdc:8c26f97b6a3196ed09dc4f54857d4972", - "nmdc:c7112633e322d7bc609bd479f7ddddb9", - "nmdc:2f6baf7176d2d904c02ae71875a8d326", - "nmdc:1abb9d211201bef0cb545e70a65de8cf", - "nmdc:985a23612611fb258d2dbaee1e4458f5", - "nmdc:44c3fa82e71af5647b7619b0dd8a0728", - "nmdc:fb70c00e07d0b93b12cacbded87dcea6", - "nmdc:98e1311ba5e96a176baccdb9a95439f9", - "nmdc:0685da4455dde2dec9f221b9356f008c", - "nmdc:14b7f064a3a2fad830fad893ff3257bc", - "nmdc:1b8b64c254f88dd9a8e3cd42bde7b7ba", - "nmdc:01769b6920ba82884f19ac3f88428db1" + "nmdc:9e550afb3bcd8d66807f861ecfed815b", + "nmdc:5e79fce62ffa8c4479be5159143797e0", + "nmdc:682fd042d6adcd93f75c3eae2cf32241", + "nmdc:9d607ebd92ad5bcbaaa405884d4a83a3", + "nmdc:9163caaba1f60d1af9a551559069ca08" ], - "was_informed_by": "gold:Gp0127631", - "id": "nmdc:00a9e34a36f0ea767ff48aa2f411874f", + "asm_score": 4.319, + "was_informed_by": "gold:Gp0127628", + "ctg_powsum": 11962, + "scaf_max": 45540, + "id": "nmdc:a634b0691cf34899d9b09bc4573d4c36", + "scaf_powsum": 12026, "execution_resource": "NERSC-Cori", - "name": "Annotation Activity for nmdc:mga0jx8k09", - "started_at_time": "2021-10-11T02:26:22Z", - "type": "nmdc:MetagenomeAnnotationActivity", - "ended_at_time": "2021-10-11T04:40:31+00:00", - "output_data_objects": [ - { - "name": "Gp0127631_Protein FAA", - "description": "Protein FAA for Gp0127631", - "data_object_type": "Annotation Amino Acid FASTA", - "url": "https://data.microbiomedata.org/data/nmdc:mga0jx8k09/annotation/nmdc_mga0jx8k09_proteins.faa", - "md5_checksum": "8c26f97b6a3196ed09dc4f54857d4972", - "id": "nmdc:8c26f97b6a3196ed09dc4f54857d4972", - "file_size_bytes": 72966123 - }, - { - "name": "Gp0127631_Structural annotation GFF file", - "description": "Structural annotation GFF file for Gp0127631", - "data_object_type": "Structural Annotation GFF", - "url": "https://data.microbiomedata.org/data/nmdc:mga0jx8k09/annotation/nmdc_mga0jx8k09_structural_annotation.gff", - "md5_checksum": "c7112633e322d7bc609bd479f7ddddb9", - "id": "nmdc:c7112633e322d7bc609bd479f7ddddb9", - "file_size_bytes": 2524 - }, - { - "name": "Gp0127631_Functional annotation GFF file", - "description": "Functional annotation GFF file for Gp0127631", - "data_object_type": "Functional Annotation GFF", - "url": "https://data.microbiomedata.org/data/nmdc:mga0jx8k09/annotation/nmdc_mga0jx8k09_functional_annotation.gff", - "md5_checksum": "2f6baf7176d2d904c02ae71875a8d326", - "id": "nmdc:2f6baf7176d2d904c02ae71875a8d326", - "file_size_bytes": 81929295 - }, - { - "name": "Gp0127631_KO TSV file", - "description": "KO TSV file for Gp0127631", - "data_object_type": "Annotation KEGG Orthology", - "url": "https://data.microbiomedata.org/data/nmdc:mga0jx8k09/annotation/nmdc_mga0jx8k09_ko.tsv", - "md5_checksum": "1abb9d211201bef0cb545e70a65de8cf", - "id": "nmdc:1abb9d211201bef0cb545e70a65de8cf", - "file_size_bytes": 8979915 - }, - { - "name": "Gp0127631_EC TSV file", - "description": "EC TSV file for Gp0127631", - "data_object_type": "Annotation Enzyme Commission", - "url": "https://data.microbiomedata.org/data/nmdc:mga0jx8k09/annotation/nmdc_mga0jx8k09_ec.tsv", - "md5_checksum": "985a23612611fb258d2dbaee1e4458f5", - "id": "nmdc:985a23612611fb258d2dbaee1e4458f5", - "file_size_bytes": 5914861 - }, - { - "name": "Gp0127631_COG GFF file", - "description": "COG GFF file for Gp0127631", - "url": "https://data.microbiomedata.org/data/nmdc:mga0jx8k09/annotation/nmdc_mga0jx8k09_cog.gff", - "md5_checksum": "44c3fa82e71af5647b7619b0dd8a0728", - "id": "nmdc:44c3fa82e71af5647b7619b0dd8a0728", - "file_size_bytes": 47190255 - }, - { - "name": "Gp0127631_PFAM GFF file", - "description": "PFAM GFF file for Gp0127631", - "url": "https://data.microbiomedata.org/data/nmdc:mga0jx8k09/annotation/nmdc_mga0jx8k09_pfam.gff", - "md5_checksum": "fb70c00e07d0b93b12cacbded87dcea6", - "id": "nmdc:fb70c00e07d0b93b12cacbded87dcea6", - "file_size_bytes": 35794646 - }, - { - "name": "Gp0127631_TigrFam GFF file", - "description": "TigrFam GFF file for Gp0127631", - "url": "https://data.microbiomedata.org/data/nmdc:mga0jx8k09/annotation/nmdc_mga0jx8k09_tigrfam.gff", - "md5_checksum": "98e1311ba5e96a176baccdb9a95439f9", - "id": "nmdc:98e1311ba5e96a176baccdb9a95439f9", - "file_size_bytes": 3856365 - }, - { - "name": "Gp0127631_SMART GFF file", - "description": "SMART GFF file for Gp0127631", - "url": "https://data.microbiomedata.org/data/nmdc:mga0jx8k09/annotation/nmdc_mga0jx8k09_smart.gff", - "md5_checksum": "0685da4455dde2dec9f221b9356f008c", - "id": "nmdc:0685da4455dde2dec9f221b9356f008c", - "file_size_bytes": 10561278 - }, - { - "name": "Gp0127631_SuperFam GFF file", - "description": "SuperFam GFF file for Gp0127631", - "url": "https://data.microbiomedata.org/data/nmdc:mga0jx8k09/annotation/nmdc_mga0jx8k09_supfam.gff", - "md5_checksum": "14b7f064a3a2fad830fad893ff3257bc", - "id": "nmdc:14b7f064a3a2fad830fad893ff3257bc", - "file_size_bytes": 59641133 - }, - { - "name": "Gp0127631_Cath FunFam GFF file", - "description": "Cath FunFam GFF file for Gp0127631", - "url": "https://data.microbiomedata.org/data/nmdc:mga0jx8k09/annotation/nmdc_mga0jx8k09_cath_funfam.gff", - "md5_checksum": "1b8b64c254f88dd9a8e3cd42bde7b7ba", - "id": "nmdc:1b8b64c254f88dd9a8e3cd42bde7b7ba", - "file_size_bytes": 45160077 - }, - { - "name": "Gp0127631_KO_EC GFF file", - "description": "KO_EC GFF file for Gp0127631", - "url": "https://data.microbiomedata.org/data/nmdc:mga0jx8k09/annotation/nmdc_mga0jx8k09_ko_ec.gff", - "md5_checksum": "01769b6920ba82884f19ac3f88428db1", - "id": "nmdc:01769b6920ba82884f19ac3f88428db1", - "file_size_bytes": 28510384 - } - ] - }, + "contigs": 157859, + "name": "Assembly Activity for nmdc:mga0x5c381", + "ctg_max": 40273, + "gc_std": 0.10673, + "contig_bp": 68288279, + "gc_avg": 0.61453, + "started_at_time": "2021-10-11T02:25:13Z", + "scaf_bp": 68289129, + "type": "nmdc:MetagenomeAssembly", + "scaffolds": 157774, + "ended_at_time": "2021-10-11T04:45:59+00:00", + "ctg_l50": 400, + "ctg_l90": 285, + "ctg_n50": 49248, + "ctg_n90": 135173, + "scaf_l50": 400, + "scaf_l90": 285, + "scaf_n50": 49230, + "scaf_n90": 135095 + } + ], + "metagenome_sequencing_activity_set": [], + "metaproteomics_analysis_activity_set": [], + "metatranscriptome_activity_set": [], + "nom_analysis_activity_set": [], + "omics_processing_set": [ { "_id": { - "$oid": "649b0052ec087f6bbab34706" + "$oid": "649b009773e8249959349b3f" }, + "id": "nmdc:omprc-11-b051xn44", + "name": "Riverbed sediment microbial communities from areas with vegetation nearby in Columbia River, Washington, USA - GW-RW S3_0_10", + "description": "Riverbed sediment samples were collected from an area with a lot of surrounding vegetation in a hyporheic zone (subsurface groundwater - surface water mixing zone)", "has_input": [ - "nmdc:1eb44ff780f2aad1053ca336b53d7b98", - "nmdc:0ecd5e99ec93ba17c7b02483560bafdf", - "nmdc:2f6baf7176d2d904c02ae71875a8d326" - ], - "too_short_contig_num": 219869, - "part_of": [ - "nmdc:mga0jx8k09" + "nmdc:bsm-11-jdsasr43" ], - "binned_contig_num": 506, - "git_url": "https://github.com/microbiomedata/metaG/releases/tag/0.1", "has_output": [ - "nmdc:d41d8cd98f00b204e9800998ecf8427e", - "nmdc:53faea62cf1183292bc6fca374f75ed1", - "nmdc:7a6616d3262630c2aea2923e3c2683d0", - "nmdc:e16dde65e7229d69949c9e2dee7e2413", - "nmdc:58acda197bd8136a80d5047342008cdf", - "nmdc:8d5e2b8a8dede83c2f74182f506f9176" + "jgi:574fe09a7ded5e3df1ee1485" ], - "was_informed_by": "gold:Gp0127631", - "input_contig_num": 237399, - "id": "nmdc:00a9e34a36f0ea767ff48aa2f411874f", - "execution_resource": "NERSC-Cori", - "name": "MAGs Analysis Activity for nmdc:mga0jx8k09", - "mags_list": [ - { - "number_of_contig": 151, - "completeness": 11.4, - "bin_name": "bins.1", - "gene_count": 748, - "bin_quality": "LQ", - "gtdbtk_species": "", - "gtdbtk_order": "", - "num_16s": 0, - "gtdbtk_family": "", - "gtdbtk_domain": "", - "contamination": 0.0, - "gtdbtk_class": "", - "gtdbtk_phylum": "", - "num_5s": 0, - "num_23s": 0, - "gtdbtk_genus": "", - "num_t_rna": 6 - }, - { - "number_of_contig": 268, - "completeness": 7.47, - "bin_name": "bins.2", - "gene_count": 1304, - "bin_quality": "LQ", - "gtdbtk_species": "", - "gtdbtk_order": "", - "num_16s": 0, - "gtdbtk_family": "", - "gtdbtk_domain": "", - "contamination": 0.0, - "gtdbtk_class": "", - "gtdbtk_phylum": "", - "num_5s": 0, - "num_23s": 0, - "gtdbtk_genus": "", - "num_t_rna": 12 - }, - { - "number_of_contig": 87, - "completeness": 13.32, - "bin_name": "bins.3", - "gene_count": 412, - "bin_quality": "LQ", - "gtdbtk_species": "", - "gtdbtk_order": "", - "num_16s": 0, - "gtdbtk_family": "", - "gtdbtk_domain": "", - "contamination": 0.0, - "gtdbtk_class": "", - "gtdbtk_phylum": "", - "num_5s": 0, - "num_23s": 0, - "gtdbtk_genus": "", - "num_t_rna": 4 - } + "part_of": [ + "nmdc:sty-11-aygzgv51" ], - "unbinned_contig_num": 17024, - "started_at_time": "2021-10-11T02:26:22Z", - "type": "nmdc:MAGsAnalysisActivity", - "ended_at_time": "2021-10-11T04:40:31+00:00", - "output_data_objects": [ - { - "name": "gold:Gp0452679_metabat2 bin checkm quality assessment result", - "description": "metabat2 bin checkm quality assessment result for gold:Gp0452679", - "data_object_type": "CheckM Statistics", - "url": "https://data.microbiomedata.org/data/nmdc:mga0fsjy84/MAGs/nmdc_mga0fsjy84_checkm_qa.out", - "md5_checksum": "d41d8cd98f00b204e9800998ecf8427e", - "id": "nmdc:d41d8cd98f00b204e9800998ecf8427e", - "file_size_bytes": 0 - }, - { - "name": "Gp0127631_tooShort (< 3kb) filtered contigs fasta file by metaBat2", - "description": "tooShort (< 3kb) filtered contigs fasta file by metaBat2 for Gp0127631", - "url": "https://data.microbiomedata.org/data/nmdc:mga0jx8k09/MAGs/nmdc_mga0jx8k09_bins.tooShort.fa", - "md5_checksum": "53faea62cf1183292bc6fca374f75ed1", - "id": "nmdc:53faea62cf1183292bc6fca374f75ed1", - "file_size_bytes": 99316833 - }, - { - "name": "Gp0127631_unbinned fasta file from metabat2", - "description": "unbinned fasta file from metabat2 for Gp0127631", - "url": "https://data.microbiomedata.org/data/nmdc:mga0jx8k09/MAGs/nmdc_mga0jx8k09_bins.unbinned.fa", - "md5_checksum": "7a6616d3262630c2aea2923e3c2683d0", - "id": "nmdc:7a6616d3262630c2aea2923e3c2683d0", - "file_size_bytes": 27381739 - }, - { - "name": "Gp0127631_metabat2 bin checkm quality assessment result", - "description": "metabat2 bin checkm quality assessment result for Gp0127631", - "data_object_type": "CheckM Statistics", - "url": "https://data.microbiomedata.org/data/nmdc:mga0jx8k09/MAGs/nmdc_mga0jx8k09_checkm_qa.out", - "md5_checksum": "e16dde65e7229d69949c9e2dee7e2413", - "id": "nmdc:e16dde65e7229d69949c9e2dee7e2413", - "file_size_bytes": 1085 - }, - { - "name": "Gp0127631_high-quality and medium-quality bins", - "description": "high-quality and medium-quality bins for Gp0127631", - "data_object_type": "Metagenome Bins", - "url": "https://data.microbiomedata.org/data/nmdc:mga0jx8k09/MAGs/nmdc_mga0jx8k09_hqmq_bin.zip", - "md5_checksum": "58acda197bd8136a80d5047342008cdf", - "id": "nmdc:58acda197bd8136a80d5047342008cdf", - "file_size_bytes": 182 - }, - { - "name": "Gp0127631_metabat2 bins", - "description": "metabat2 bins for Gp0127631", - "url": "https://data.microbiomedata.org/data/nmdc:mga0jx8k09/MAGs/nmdc_mga0jx8k09_metabat_bin.zip", - "md5_checksum": "8d5e2b8a8dede83c2f74182f506f9176", - "id": "nmdc:8d5e2b8a8dede83c2f74182f506f9176", - "file_size_bytes": 596616 - } + "add_date": "2016-01-11", + "mod_date": "2021-06-15", + "ncbi_project_name": "Riverbed sediment microbial communities from areas with vegetation nearby in Columbia River, Washington, USA - GW-RW S3_0_10", + "omics_type": { + "has_raw_value": "Metagenome" + }, + "principal_investigator": { + "has_raw_value": "James Stegen" + }, + "processing_institution": "JGI", + "type": "nmdc:OmicsProcessing", + "gold_sequencing_project_identifiers": [ + "gold:Gp0127628" ] } - ] - }, - { - "_id": { - "$oid": "649b009773e8249959349b41" - }, - "id": "nmdc:omprc-11-9pbab972", - "name": "Riverbed sediment microbial communities from areas with no vegetation in Columbia River, Washington, USA - GW-RW N3_30_40", - "description": "Riverbed sediment samples were collected from an area with no vegetation in a hyporheic zone (subsurface groundwater - surface water mixing zone)", - "has_input": [ - "nmdc:bsm-11-3yjh4z33" - ], - "has_output": [ - "jgi:574fde7c7ded5e3df1ee1419" - ], - "part_of": [ - "nmdc:sty-11-aygzgv51" - ], - "add_date": "2016-01-11", - "mod_date": "2021-06-15", - "ncbi_project_name": "Riverbed sediment microbial communities from areas with no vegetation in Columbia River, Washington, USA - GW-RW N3_30_40", - "omics_type": { - "has_raw_value": "Metagenome" - }, - "principal_investigator": { - "has_raw_value": "James Stegen" - }, - "processing_institution": "JGI", - "type": "nmdc:OmicsProcessing", - "gold_sequencing_project_identifiers": [ - "gold:Gp0127630" - ], - "downstream_workflow_activity_records": [ + ], + "reaction_activity_set": [], + "read_qc_analysis_activity_set": [ { "_id": { - "$oid": "649b009d6bdd4fd20273c865" + "$oid": "649b009d6bdd4fd20273c863" }, "has_input": [ - "nmdc:0e737a8e36535f70bff074004ee1f9c0" + "nmdc:efca984ecf94cc8de2aeabf94e0b87cc" ], "part_of": [ - "nmdc:mga09n3g47" + "nmdc:mga0x5c381" ], "git_url": "https://github.com/microbiomedata/metaG/releases/tag/0.1", "has_output": [ - "nmdc:eaffb16b5247d85c08f8af73bcb8b65e", - "nmdc:088fd18cb9169097e739289d2e5ebb13" + "nmdc:f6f1760721d73fc57919b2115a1d47ec", + "nmdc:2225f9d41343590d818186fa2d66852d" ], - "was_informed_by": "gold:Gp0127630", - "input_read_count": 28569382, - "output_read_bases": 4016672570, - "id": "nmdc:9fb85875014e14636fbb93d97d62f4bd", + "was_informed_by": "gold:Gp0127628", + "input_read_count": 31715882, + "output_read_bases": 4516265181, + "id": "nmdc:a634b0691cf34899d9b09bc4573d4c36", "execution_resource": "NERSC-Cori", - "input_read_bases": 4313976682, - "name": "Read QC Activity for nmdc:mga09n3g47", - "output_read_count": 26868700, - "started_at_time": "2021-10-11T02:26:53Z", + "input_read_bases": 4789098182, + "name": "Read QC Activity for nmdc:mga0x5c381", + "output_read_count": 30212248, + "started_at_time": "2021-10-11T02:25:13Z", "type": "nmdc:ReadQCAnalysisActivity", - "ended_at_time": "2021-10-11T04:54:22+00:00", - "output_data_objects": [ - { - "name": "Gp0127630_Filtered Reads", - "description": "Filtered Reads for Gp0127630", - "data_object_type": "Filtered Sequencing Reads", - "url": "https://data.microbiomedata.org/data/nmdc:mga09n3g47/qa/nmdc_mga09n3g47_filtered.fastq.gz", - "md5_checksum": "eaffb16b5247d85c08f8af73bcb8b65e", - "id": "nmdc:eaffb16b5247d85c08f8af73bcb8b65e", - "file_size_bytes": 2294158265 - }, - { - "name": "Gp0127630_Filtered Stats", - "description": "Filtered Stats for Gp0127630", - "data_object_type": "QC Statistics", - "url": "https://data.microbiomedata.org/data/nmdc:mga09n3g47/qa/nmdc_mga09n3g47_filterStats.txt", - "md5_checksum": "088fd18cb9169097e739289d2e5ebb13", - "id": "nmdc:088fd18cb9169097e739289d2e5ebb13", - "file_size_bytes": 288 - } - ] - }, + "ended_at_time": "2021-10-11T04:45:59+00:00" + } + ], + "read_based_taxonomy_analysis_activity_set": [ { "_id": { - "$oid": "649b009bff710ae353f8cf27" + "$oid": "649b009bff710ae353f8cf24" }, "has_input": [ - "nmdc:eaffb16b5247d85c08f8af73bcb8b65e" + "nmdc:f6f1760721d73fc57919b2115a1d47ec" ], "git_url": "https://github.com/microbiomedata/metaG/releases/tag/0.1", "has_output": [ - "nmdc:ad8aa7d317d86bcd1b33e6e68a917198", - "nmdc:e5f1da9ed5be2adcd65763d387387c9f", - "nmdc:db82b41936f37bbbeaa027ffc25b58cd", - "nmdc:2f21fd19f055d1931ab82016ed781a12", - "nmdc:890f494d1dd5e130d6c1688e78f27ff2", - "nmdc:813232a3034ddb9a05efc2f2e9b78cce", - "nmdc:ef490241b537bb4c19bd5548cd7b7f6b", - "nmdc:6a7de24b01ad1c63ba6edb758e25af40", - "nmdc:fc8a855916eb1ba0f7d278b7c1f1786f" + "nmdc:a6ed9af48a9ad473ab66721829a5c226", + "nmdc:335dbf6f1055de0950988a002f432c0b", + "nmdc:35da19bc0e50db1f9a02fe1550d1df0e", + "nmdc:224085164a389c6f207967ed03b3e6af", + "nmdc:39ba17263c144761a8bdcc1645c034f5", + "nmdc:84debc9bd1c09328d60f073d7fc2db4f", + "nmdc:8f75800abbcf5a94043ad677d7cb975c", + "nmdc:aae9e961d8ed716457616c8a8841037b", + "nmdc:ba83d6ab837403f4bcbc9400a0460457" ], - "was_informed_by": "gold:Gp0127630", - "id": "nmdc:9fb85875014e14636fbb93d97d62f4bd", + "was_informed_by": "gold:Gp0127628", + "id": "nmdc:a634b0691cf34899d9b09bc4573d4c36", "execution_resource": "NERSC-Cori", - "name": "ReadBased Analysis Activity for nmdc:mga09n3g47", - "started_at_time": "2021-10-11T02:26:53Z", + "name": "ReadBased Analysis Activity for nmdc:mga0x5c381", + "started_at_time": "2021-10-11T02:25:13Z", "type": "nmdc:ReadbasedAnalysis", - "ended_at_time": "2021-10-11T04:54:22+00:00", - "output_data_objects": [ - { - "name": "Gp0127630_Gottcha2 TSV report", - "description": "Gottcha2 TSV report for Gp0127630", - "url": "https://data.microbiomedata.org/data/nmdc:mga09n3g47/ReadbasedAnalysis/nmdc_mga09n3g47_gottcha2_report.tsv", - "md5_checksum": "ad8aa7d317d86bcd1b33e6e68a917198", - "id": "nmdc:ad8aa7d317d86bcd1b33e6e68a917198", - "file_size_bytes": 3373 - }, - { - "name": "Gp0127630_Gottcha2 full TSV report", - "description": "Gottcha2 full TSV report for Gp0127630", - "url": "https://data.microbiomedata.org/data/nmdc:mga09n3g47/ReadbasedAnalysis/nmdc_mga09n3g47_gottcha2_report_full.tsv", - "md5_checksum": "e5f1da9ed5be2adcd65763d387387c9f", - "id": "nmdc:e5f1da9ed5be2adcd65763d387387c9f", - "file_size_bytes": 791488 - }, - { - "name": "Gp0127630_Gottcha2 Krona HTML report", - "description": "Gottcha2 Krona HTML report for Gp0127630", - "data_object_type": "GOTTCHA2 Krona Plot", - "url": "https://data.microbiomedata.org/data/nmdc:mga09n3g47/ReadbasedAnalysis/nmdc_mga09n3g47_gottcha2_krona.html", - "md5_checksum": "db82b41936f37bbbeaa027ffc25b58cd", - "id": "nmdc:db82b41936f37bbbeaa027ffc25b58cd", - "file_size_bytes": 235803 - }, - { - "name": "Gp0127630_Centrifuge classification TSV report", - "description": "Centrifuge classification TSV report for Gp0127630", - "data_object_type": "Centrifuge Taxonomic Classification", - "url": "https://data.microbiomedata.org/data/nmdc:mga09n3g47/ReadbasedAnalysis/nmdc_mga09n3g47_centrifuge_classification.tsv", - "md5_checksum": "2f21fd19f055d1931ab82016ed781a12", - "id": "nmdc:2f21fd19f055d1931ab82016ed781a12", - "file_size_bytes": 1974171566 - }, - { - "name": "Gp0127630_Centrifuge TSV report", - "description": "Centrifuge TSV report for Gp0127630", - "data_object_type": "Centrifuge Classification Report", - "url": "https://data.microbiomedata.org/data/nmdc:mga09n3g47/ReadbasedAnalysis/nmdc_mga09n3g47_centrifuge_report.tsv", - "md5_checksum": "890f494d1dd5e130d6c1688e78f27ff2", - "id": "nmdc:890f494d1dd5e130d6c1688e78f27ff2", - "file_size_bytes": 255012 - }, - { - "name": "Gp0127630_Centrifuge Krona HTML report", - "description": "Centrifuge Krona HTML report for Gp0127630", - "data_object_type": "Centrifuge Krona Plot", - "url": "https://data.microbiomedata.org/data/nmdc:mga09n3g47/ReadbasedAnalysis/nmdc_mga09n3g47_centrifuge_krona.html", - "md5_checksum": "813232a3034ddb9a05efc2f2e9b78cce", - "id": "nmdc:813232a3034ddb9a05efc2f2e9b78cce", - "file_size_bytes": 2330430 - }, - { - "name": "Gp0127630_Kraken classification TSV report", - "description": "Kraken classification TSV report for Gp0127630", - "data_object_type": "Kraken2 Taxonomic Classification", - "url": "https://data.microbiomedata.org/data/nmdc:mga09n3g47/ReadbasedAnalysis/nmdc_mga09n3g47_kraken2_classification.tsv", - "md5_checksum": "ef490241b537bb4c19bd5548cd7b7f6b", - "id": "nmdc:ef490241b537bb4c19bd5548cd7b7f6b", - "file_size_bytes": 1584744477 - }, - { - "name": "Gp0127630_Kraken2 TSV report", - "description": "Kraken2 TSV report for Gp0127630", - "data_object_type": "Kraken2 Classification Report", - "url": "https://data.microbiomedata.org/data/nmdc:mga09n3g47/ReadbasedAnalysis/nmdc_mga09n3g47_kraken2_report.tsv", - "md5_checksum": "6a7de24b01ad1c63ba6edb758e25af40", - "id": "nmdc:6a7de24b01ad1c63ba6edb758e25af40", - "file_size_bytes": 650172 - }, - { - "name": "Gp0127630_Kraken2 Krona HTML report", - "description": "Kraken2 Krona HTML report for Gp0127630", - "data_object_type": "Kraken2 Krona Plot", - "url": "https://data.microbiomedata.org/data/nmdc:mga09n3g47/ReadbasedAnalysis/nmdc_mga09n3g47_kraken2_krona.html", - "md5_checksum": "fc8a855916eb1ba0f7d278b7c1f1786f", - "id": "nmdc:fc8a855916eb1ba0f7d278b7c1f1786f", - "file_size_bytes": 3962195 - } - ] - }, + "ended_at_time": "2021-10-11T04:45:59+00:00" + } + ], + "study_set": [], + "field_research_site_set": [], + "collecting_biosamples_from_site_set": [], + "date_created": null, + "etl_software_version": null, + "pooling_set": [], + "read_based_analysis_activity_set": [ { "_id": { - "$oid": "61e71939833bcf838a6fff09" + "$oid": "61e7193b833bcf838a6fff9c" }, "has_input": [ - "nmdc:eaffb16b5247d85c08f8af73bcb8b65e" + "nmdc:f6f1760721d73fc57919b2115a1d47ec" ], "part_of": [ - "nmdc:mga09n3g47" + "nmdc:mga0x5c381" ], "git_url": "https://github.com/microbiomedata/metaG/releases/tag/0.1", "has_output": [ - "nmdc:ad8aa7d317d86bcd1b33e6e68a917198", - "nmdc:e5f1da9ed5be2adcd65763d387387c9f", - "nmdc:db82b41936f37bbbeaa027ffc25b58cd", - "nmdc:2f21fd19f055d1931ab82016ed781a12", - "nmdc:890f494d1dd5e130d6c1688e78f27ff2", - "nmdc:813232a3034ddb9a05efc2f2e9b78cce", - "nmdc:ef490241b537bb4c19bd5548cd7b7f6b", - "nmdc:6a7de24b01ad1c63ba6edb758e25af40", - "nmdc:fc8a855916eb1ba0f7d278b7c1f1786f" + "nmdc:a6ed9af48a9ad473ab66721829a5c226", + "nmdc:335dbf6f1055de0950988a002f432c0b", + "nmdc:35da19bc0e50db1f9a02fe1550d1df0e", + "nmdc:224085164a389c6f207967ed03b3e6af", + "nmdc:39ba17263c144761a8bdcc1645c034f5", + "nmdc:84debc9bd1c09328d60f073d7fc2db4f", + "nmdc:8f75800abbcf5a94043ad677d7cb975c", + "nmdc:aae9e961d8ed716457616c8a8841037b", + "nmdc:ba83d6ab837403f4bcbc9400a0460457" ], - "was_informed_by": "gold:Gp0127630", - "id": "nmdc:9fb85875014e14636fbb93d97d62f4bd", + "was_informed_by": "gold:Gp0127628", + "id": "nmdc:a634b0691cf34899d9b09bc4573d4c36", "execution_resource": "NERSC-Cori", - "name": "ReadBased Analysis Activity for nmdc:mga09n3g47", - "started_at_time": "2021-10-11T02:26:53Z", + "name": "ReadBased Analysis Activity for nmdc:mga0x5c381", + "started_at_time": "2021-10-11T02:25:13Z", "type": "nmdc:ReadbasedAnalysis", - "ended_at_time": "2021-10-11T04:54:22+00:00", - "output_data_objects": [ - { - "name": "Gp0127630_Gottcha2 TSV report", - "description": "Gottcha2 TSV report for Gp0127630", - "url": "https://data.microbiomedata.org/data/nmdc:mga09n3g47/ReadbasedAnalysis/nmdc_mga09n3g47_gottcha2_report.tsv", - "md5_checksum": "ad8aa7d317d86bcd1b33e6e68a917198", - "id": "nmdc:ad8aa7d317d86bcd1b33e6e68a917198", - "file_size_bytes": 3373 - }, - { - "name": "Gp0127630_Gottcha2 full TSV report", - "description": "Gottcha2 full TSV report for Gp0127630", - "url": "https://data.microbiomedata.org/data/nmdc:mga09n3g47/ReadbasedAnalysis/nmdc_mga09n3g47_gottcha2_report_full.tsv", - "md5_checksum": "e5f1da9ed5be2adcd65763d387387c9f", - "id": "nmdc:e5f1da9ed5be2adcd65763d387387c9f", - "file_size_bytes": 791488 - }, - { - "name": "Gp0127630_Gottcha2 Krona HTML report", - "description": "Gottcha2 Krona HTML report for Gp0127630", - "data_object_type": "GOTTCHA2 Krona Plot", - "url": "https://data.microbiomedata.org/data/nmdc:mga09n3g47/ReadbasedAnalysis/nmdc_mga09n3g47_gottcha2_krona.html", - "md5_checksum": "db82b41936f37bbbeaa027ffc25b58cd", - "id": "nmdc:db82b41936f37bbbeaa027ffc25b58cd", - "file_size_bytes": 235803 - }, - { - "name": "Gp0127630_Centrifuge classification TSV report", - "description": "Centrifuge classification TSV report for Gp0127630", - "data_object_type": "Centrifuge Taxonomic Classification", - "url": "https://data.microbiomedata.org/data/nmdc:mga09n3g47/ReadbasedAnalysis/nmdc_mga09n3g47_centrifuge_classification.tsv", - "md5_checksum": "2f21fd19f055d1931ab82016ed781a12", - "id": "nmdc:2f21fd19f055d1931ab82016ed781a12", - "file_size_bytes": 1974171566 - }, - { - "name": "Gp0127630_Centrifuge TSV report", - "description": "Centrifuge TSV report for Gp0127630", - "data_object_type": "Centrifuge Classification Report", - "url": "https://data.microbiomedata.org/data/nmdc:mga09n3g47/ReadbasedAnalysis/nmdc_mga09n3g47_centrifuge_report.tsv", - "md5_checksum": "890f494d1dd5e130d6c1688e78f27ff2", - "id": "nmdc:890f494d1dd5e130d6c1688e78f27ff2", - "file_size_bytes": 255012 - }, - { - "name": "Gp0127630_Centrifuge Krona HTML report", - "description": "Centrifuge Krona HTML report for Gp0127630", - "data_object_type": "Centrifuge Krona Plot", - "url": "https://data.microbiomedata.org/data/nmdc:mga09n3g47/ReadbasedAnalysis/nmdc_mga09n3g47_centrifuge_krona.html", - "md5_checksum": "813232a3034ddb9a05efc2f2e9b78cce", - "id": "nmdc:813232a3034ddb9a05efc2f2e9b78cce", - "file_size_bytes": 2330430 - }, - { - "name": "Gp0127630_Kraken classification TSV report", - "description": "Kraken classification TSV report for Gp0127630", - "data_object_type": "Kraken2 Taxonomic Classification", - "url": "https://data.microbiomedata.org/data/nmdc:mga09n3g47/ReadbasedAnalysis/nmdc_mga09n3g47_kraken2_classification.tsv", - "md5_checksum": "ef490241b537bb4c19bd5548cd7b7f6b", - "id": "nmdc:ef490241b537bb4c19bd5548cd7b7f6b", - "file_size_bytes": 1584744477 - }, - { - "name": "Gp0127630_Kraken2 TSV report", - "description": "Kraken2 TSV report for Gp0127630", - "data_object_type": "Kraken2 Classification Report", - "url": "https://data.microbiomedata.org/data/nmdc:mga09n3g47/ReadbasedAnalysis/nmdc_mga09n3g47_kraken2_report.tsv", - "md5_checksum": "6a7de24b01ad1c63ba6edb758e25af40", - "id": "nmdc:6a7de24b01ad1c63ba6edb758e25af40", - "file_size_bytes": 650172 - }, - { - "name": "Gp0127630_Kraken2 Krona HTML report", - "description": "Kraken2 Krona HTML report for Gp0127630", - "data_object_type": "Kraken2 Krona Plot", - "url": "https://data.microbiomedata.org/data/nmdc:mga09n3g47/ReadbasedAnalysis/nmdc_mga09n3g47_kraken2_krona.html", - "md5_checksum": "fc8a855916eb1ba0f7d278b7c1f1786f", - "id": "nmdc:fc8a855916eb1ba0f7d278b7c1f1786f", - "file_size_bytes": 3962195 - } - ] + "ended_at_time": "2021-10-11T04:45:59+00:00" + } + ] + }, + { + "functional_annotation_agg": [], + "library_preparation_set": [], + "processed_sample_set": [], + "extraction_set": [], + "activity_set": [], + "biosample_set": [], + "data_object_set": [ + { + "name": "Gp0127631_Filtered Reads", + "description": "Filtered Reads for Gp0127631", + "data_object_type": "Filtered Sequencing Reads", + "url": "https://data.microbiomedata.org/data/nmdc:mga0jx8k09/qa/nmdc_mga0jx8k09_filtered.fastq.gz", + "md5_checksum": "6969fd7f4b1a5a34fb30d31b92cd6bf8", + "id": "nmdc:6969fd7f4b1a5a34fb30d31b92cd6bf8", + "file_size_bytes": 2030538721 + }, + { + "name": "Gp0127631_Filtered Stats", + "description": "Filtered Stats for Gp0127631", + "data_object_type": "QC Statistics", + "url": "https://data.microbiomedata.org/data/nmdc:mga0jx8k09/qa/nmdc_mga0jx8k09_filterStats.txt", + "md5_checksum": "b280141d234edf10cde8794539700654", + "id": "nmdc:b280141d234edf10cde8794539700654", + "file_size_bytes": 284 + }, + { + "name": "Gp0127631_Gottcha2 TSV report", + "description": "Gottcha2 TSV report for Gp0127631", + "url": "https://data.microbiomedata.org/data/nmdc:mga0jx8k09/ReadbasedAnalysis/nmdc_mga0jx8k09_gottcha2_report.tsv", + "md5_checksum": "b78e8246144185beb95c0caf65ef1f1a", + "id": "nmdc:b78e8246144185beb95c0caf65ef1f1a", + "file_size_bytes": 1227 + }, + { + "name": "Gp0127631_Gottcha2 full TSV report", + "description": "Gottcha2 full TSV report for Gp0127631", + "url": "https://data.microbiomedata.org/data/nmdc:mga0jx8k09/ReadbasedAnalysis/nmdc_mga0jx8k09_gottcha2_report_full.tsv", + "md5_checksum": "8875c6ce19e13ed9a88447f2f78bb049", + "id": "nmdc:8875c6ce19e13ed9a88447f2f78bb049", + "file_size_bytes": 647196 + }, + { + "name": "Gp0127631_Gottcha2 Krona HTML report", + "description": "Gottcha2 Krona HTML report for Gp0127631", + "data_object_type": "GOTTCHA2 Krona Plot", + "url": "https://data.microbiomedata.org/data/nmdc:mga0jx8k09/ReadbasedAnalysis/nmdc_mga0jx8k09_gottcha2_krona.html", + "md5_checksum": "3b0aee019c772a695bf4cc8f4a390f4e", + "id": "nmdc:3b0aee019c772a695bf4cc8f4a390f4e", + "file_size_bytes": 229312 + }, + { + "name": "Gp0127631_Centrifuge classification TSV report", + "description": "Centrifuge classification TSV report for Gp0127631", + "data_object_type": "Centrifuge Taxonomic Classification", + "url": "https://data.microbiomedata.org/data/nmdc:mga0jx8k09/ReadbasedAnalysis/nmdc_mga0jx8k09_centrifuge_classification.tsv", + "md5_checksum": "0d1729a83798b752f33eeb8d97afe972", + "id": "nmdc:0d1729a83798b752f33eeb8d97afe972", + "file_size_bytes": 1861431092 + }, + { + "name": "Gp0127631_Centrifuge TSV report", + "description": "Centrifuge TSV report for Gp0127631", + "data_object_type": "Centrifuge Classification Report", + "url": "https://data.microbiomedata.org/data/nmdc:mga0jx8k09/ReadbasedAnalysis/nmdc_mga0jx8k09_centrifuge_report.tsv", + "md5_checksum": "77561a0de3bb8aae04d110429fd9ad0c", + "id": "nmdc:77561a0de3bb8aae04d110429fd9ad0c", + "file_size_bytes": 254665 + }, + { + "name": "Gp0127631_Centrifuge Krona HTML report", + "description": "Centrifuge Krona HTML report for Gp0127631", + "data_object_type": "Centrifuge Krona Plot", + "url": "https://data.microbiomedata.org/data/nmdc:mga0jx8k09/ReadbasedAnalysis/nmdc_mga0jx8k09_centrifuge_krona.html", + "md5_checksum": "ea27c005b1788434c2198ad60939d4bc", + "id": "nmdc:ea27c005b1788434c2198ad60939d4bc", + "file_size_bytes": 2334578 + }, + { + "name": "Gp0127631_Kraken classification TSV report", + "description": "Kraken classification TSV report for Gp0127631", + "data_object_type": "Kraken2 Taxonomic Classification", + "url": "https://data.microbiomedata.org/data/nmdc:mga0jx8k09/ReadbasedAnalysis/nmdc_mga0jx8k09_kraken2_classification.tsv", + "md5_checksum": "6a46583da876b9d6287302308df0b9fd", + "id": "nmdc:6a46583da876b9d6287302308df0b9fd", + "file_size_bytes": 1483354621 + }, + { + "name": "Gp0127631_Kraken2 TSV report", + "description": "Kraken2 TSV report for Gp0127631", + "data_object_type": "Kraken2 Classification Report", + "url": "https://data.microbiomedata.org/data/nmdc:mga0jx8k09/ReadbasedAnalysis/nmdc_mga0jx8k09_kraken2_report.tsv", + "md5_checksum": "af619dc5a0423509a4beaca26aa61000", + "id": "nmdc:af619dc5a0423509a4beaca26aa61000", + "file_size_bytes": 640329 + }, + { + "name": "Gp0127631_Kraken2 Krona HTML report", + "description": "Kraken2 Krona HTML report for Gp0127631", + "data_object_type": "Kraken2 Krona Plot", + "url": "https://data.microbiomedata.org/data/nmdc:mga0jx8k09/ReadbasedAnalysis/nmdc_mga0jx8k09_kraken2_krona.html", + "md5_checksum": "50093825ec73dcabe66aa353de766beb", + "id": "nmdc:50093825ec73dcabe66aa353de766beb", + "file_size_bytes": 3993246 + }, + { + "name": "Gp0127631_Gottcha2 TSV report", + "description": "Gottcha2 TSV report for Gp0127631", + "url": "https://data.microbiomedata.org/data/nmdc:mga0jx8k09/ReadbasedAnalysis/nmdc_mga0jx8k09_gottcha2_report.tsv", + "md5_checksum": "b78e8246144185beb95c0caf65ef1f1a", + "id": "nmdc:b78e8246144185beb95c0caf65ef1f1a", + "file_size_bytes": 1227 + }, + { + "name": "Gp0127631_Gottcha2 full TSV report", + "description": "Gottcha2 full TSV report for Gp0127631", + "url": "https://data.microbiomedata.org/data/nmdc:mga0jx8k09/ReadbasedAnalysis/nmdc_mga0jx8k09_gottcha2_report_full.tsv", + "md5_checksum": "8875c6ce19e13ed9a88447f2f78bb049", + "id": "nmdc:8875c6ce19e13ed9a88447f2f78bb049", + "file_size_bytes": 647196 + }, + { + "name": "Gp0127631_Gottcha2 Krona HTML report", + "description": "Gottcha2 Krona HTML report for Gp0127631", + "data_object_type": "GOTTCHA2 Krona Plot", + "url": "https://data.microbiomedata.org/data/nmdc:mga0jx8k09/ReadbasedAnalysis/nmdc_mga0jx8k09_gottcha2_krona.html", + "md5_checksum": "3b0aee019c772a695bf4cc8f4a390f4e", + "id": "nmdc:3b0aee019c772a695bf4cc8f4a390f4e", + "file_size_bytes": 229312 + }, + { + "name": "Gp0127631_Centrifuge classification TSV report", + "description": "Centrifuge classification TSV report for Gp0127631", + "data_object_type": "Centrifuge Taxonomic Classification", + "url": "https://data.microbiomedata.org/data/nmdc:mga0jx8k09/ReadbasedAnalysis/nmdc_mga0jx8k09_centrifuge_classification.tsv", + "md5_checksum": "0d1729a83798b752f33eeb8d97afe972", + "id": "nmdc:0d1729a83798b752f33eeb8d97afe972", + "file_size_bytes": 1861431092 + }, + { + "name": "Gp0127631_Centrifuge TSV report", + "description": "Centrifuge TSV report for Gp0127631", + "data_object_type": "Centrifuge Classification Report", + "url": "https://data.microbiomedata.org/data/nmdc:mga0jx8k09/ReadbasedAnalysis/nmdc_mga0jx8k09_centrifuge_report.tsv", + "md5_checksum": "77561a0de3bb8aae04d110429fd9ad0c", + "id": "nmdc:77561a0de3bb8aae04d110429fd9ad0c", + "file_size_bytes": 254665 + }, + { + "name": "Gp0127631_Centrifuge Krona HTML report", + "description": "Centrifuge Krona HTML report for Gp0127631", + "data_object_type": "Centrifuge Krona Plot", + "url": "https://data.microbiomedata.org/data/nmdc:mga0jx8k09/ReadbasedAnalysis/nmdc_mga0jx8k09_centrifuge_krona.html", + "md5_checksum": "ea27c005b1788434c2198ad60939d4bc", + "id": "nmdc:ea27c005b1788434c2198ad60939d4bc", + "file_size_bytes": 2334578 + }, + { + "name": "Gp0127631_Kraken classification TSV report", + "description": "Kraken classification TSV report for Gp0127631", + "data_object_type": "Kraken2 Taxonomic Classification", + "url": "https://data.microbiomedata.org/data/nmdc:mga0jx8k09/ReadbasedAnalysis/nmdc_mga0jx8k09_kraken2_classification.tsv", + "md5_checksum": "6a46583da876b9d6287302308df0b9fd", + "id": "nmdc:6a46583da876b9d6287302308df0b9fd", + "file_size_bytes": 1483354621 + }, + { + "name": "Gp0127631_Kraken2 TSV report", + "description": "Kraken2 TSV report for Gp0127631", + "data_object_type": "Kraken2 Classification Report", + "url": "https://data.microbiomedata.org/data/nmdc:mga0jx8k09/ReadbasedAnalysis/nmdc_mga0jx8k09_kraken2_report.tsv", + "md5_checksum": "af619dc5a0423509a4beaca26aa61000", + "id": "nmdc:af619dc5a0423509a4beaca26aa61000", + "file_size_bytes": 640329 + }, + { + "name": "Gp0127631_Kraken2 Krona HTML report", + "description": "Kraken2 Krona HTML report for Gp0127631", + "data_object_type": "Kraken2 Krona Plot", + "url": "https://data.microbiomedata.org/data/nmdc:mga0jx8k09/ReadbasedAnalysis/nmdc_mga0jx8k09_kraken2_krona.html", + "md5_checksum": "50093825ec73dcabe66aa353de766beb", + "id": "nmdc:50093825ec73dcabe66aa353de766beb", + "file_size_bytes": 3993246 + }, + { + "name": "Gp0127631_Assembled contigs fasta", + "description": "Assembled contigs fasta for Gp0127631", + "data_object_type": "Assembly Contigs", + "url": "https://data.microbiomedata.org/data/nmdc:mga0jx8k09/assembly/nmdc_mga0jx8k09_contigs.fna", + "md5_checksum": "1eb44ff780f2aad1053ca336b53d7b98", + "id": "nmdc:1eb44ff780f2aad1053ca336b53d7b98", + "file_size_bytes": 128714098 + }, + { + "name": "Gp0127631_Assembled scaffold fasta", + "description": "Assembled scaffold fasta for Gp0127631", + "data_object_type": "Assembly Scaffolds", + "url": "https://data.microbiomedata.org/data/nmdc:mga0jx8k09/assembly/nmdc_mga0jx8k09_scaffolds.fna", + "md5_checksum": "992fb303b5ced60489fea0ce6dae71f9", + "id": "nmdc:992fb303b5ced60489fea0ce6dae71f9", + "file_size_bytes": 127998496 + }, + { + "name": "Gp0127631_Metagenome Contig Coverage Stats", + "description": "Metagenome Contig Coverage Stats for Gp0127631", + "url": "https://data.microbiomedata.org/data/nmdc:mga0jx8k09/assembly/nmdc_mga0jx8k09_covstats.txt", + "md5_checksum": "a0f466071ed249babf1a5653e1c20a02", + "id": "nmdc:a0f466071ed249babf1a5653e1c20a02", + "file_size_bytes": 18831462 + }, + { + "name": "Gp0127631_Assembled AGP file", + "description": "Assembled AGP file for Gp0127631", + "data_object_type": "Assembly AGP", + "url": "https://data.microbiomedata.org/data/nmdc:mga0jx8k09/assembly/nmdc_mga0jx8k09_assembly.agp", + "md5_checksum": "5eddebfbfabfd9c0e71c2699bee73870", + "id": "nmdc:5eddebfbfabfd9c0e71c2699bee73870", + "file_size_bytes": 17634272 + }, + { + "name": "Gp0127631_Metagenome Alignment BAM file", + "description": "Metagenome Alignment BAM file for Gp0127631", + "data_object_type": "Assembly Coverage BAM", + "url": "https://data.microbiomedata.org/data/nmdc:mga0jx8k09/assembly/nmdc_mga0jx8k09_pairedMapped_sorted.bam", + "md5_checksum": "0ecd5e99ec93ba17c7b02483560bafdf", + "id": "nmdc:0ecd5e99ec93ba17c7b02483560bafdf", + "file_size_bytes": 2245356551 + }, + { + "name": "Gp0127631_Protein FAA", + "description": "Protein FAA for Gp0127631", + "data_object_type": "Annotation Amino Acid FASTA", + "url": "https://data.microbiomedata.org/data/nmdc:mga0jx8k09/annotation/nmdc_mga0jx8k09_proteins.faa", + "md5_checksum": "8c26f97b6a3196ed09dc4f54857d4972", + "id": "nmdc:8c26f97b6a3196ed09dc4f54857d4972", + "file_size_bytes": 72966123 + }, + { + "name": "Gp0127631_Structural annotation GFF file", + "description": "Structural annotation GFF file for Gp0127631", + "data_object_type": "Structural Annotation GFF", + "url": "https://data.microbiomedata.org/data/nmdc:mga0jx8k09/annotation/nmdc_mga0jx8k09_structural_annotation.gff", + "md5_checksum": "c7112633e322d7bc609bd479f7ddddb9", + "id": "nmdc:c7112633e322d7bc609bd479f7ddddb9", + "file_size_bytes": 2524 + }, + { + "name": "Gp0127631_Functional annotation GFF file", + "description": "Functional annotation GFF file for Gp0127631", + "data_object_type": "Functional Annotation GFF", + "url": "https://data.microbiomedata.org/data/nmdc:mga0jx8k09/annotation/nmdc_mga0jx8k09_functional_annotation.gff", + "md5_checksum": "2f6baf7176d2d904c02ae71875a8d326", + "id": "nmdc:2f6baf7176d2d904c02ae71875a8d326", + "file_size_bytes": 81929295 + }, + { + "name": "Gp0127631_KO TSV file", + "description": "KO TSV file for Gp0127631", + "data_object_type": "Annotation KEGG Orthology", + "url": "https://data.microbiomedata.org/data/nmdc:mga0jx8k09/annotation/nmdc_mga0jx8k09_ko.tsv", + "md5_checksum": "1abb9d211201bef0cb545e70a65de8cf", + "id": "nmdc:1abb9d211201bef0cb545e70a65de8cf", + "file_size_bytes": 8979915 + }, + { + "name": "Gp0127631_EC TSV file", + "description": "EC TSV file for Gp0127631", + "data_object_type": "Annotation Enzyme Commission", + "url": "https://data.microbiomedata.org/data/nmdc:mga0jx8k09/annotation/nmdc_mga0jx8k09_ec.tsv", + "md5_checksum": "985a23612611fb258d2dbaee1e4458f5", + "id": "nmdc:985a23612611fb258d2dbaee1e4458f5", + "file_size_bytes": 5914861 + }, + { + "name": "Gp0127631_COG GFF file", + "description": "COG GFF file for Gp0127631", + "url": "https://data.microbiomedata.org/data/nmdc:mga0jx8k09/annotation/nmdc_mga0jx8k09_cog.gff", + "md5_checksum": "44c3fa82e71af5647b7619b0dd8a0728", + "id": "nmdc:44c3fa82e71af5647b7619b0dd8a0728", + "file_size_bytes": 47190255 + }, + { + "name": "Gp0127631_PFAM GFF file", + "description": "PFAM GFF file for Gp0127631", + "url": "https://data.microbiomedata.org/data/nmdc:mga0jx8k09/annotation/nmdc_mga0jx8k09_pfam.gff", + "md5_checksum": "fb70c00e07d0b93b12cacbded87dcea6", + "id": "nmdc:fb70c00e07d0b93b12cacbded87dcea6", + "file_size_bytes": 35794646 + }, + { + "name": "Gp0127631_TigrFam GFF file", + "description": "TigrFam GFF file for Gp0127631", + "url": "https://data.microbiomedata.org/data/nmdc:mga0jx8k09/annotation/nmdc_mga0jx8k09_tigrfam.gff", + "md5_checksum": "98e1311ba5e96a176baccdb9a95439f9", + "id": "nmdc:98e1311ba5e96a176baccdb9a95439f9", + "file_size_bytes": 3856365 + }, + { + "name": "Gp0127631_SMART GFF file", + "description": "SMART GFF file for Gp0127631", + "url": "https://data.microbiomedata.org/data/nmdc:mga0jx8k09/annotation/nmdc_mga0jx8k09_smart.gff", + "md5_checksum": "0685da4455dde2dec9f221b9356f008c", + "id": "nmdc:0685da4455dde2dec9f221b9356f008c", + "file_size_bytes": 10561278 + }, + { + "name": "Gp0127631_SuperFam GFF file", + "description": "SuperFam GFF file for Gp0127631", + "url": "https://data.microbiomedata.org/data/nmdc:mga0jx8k09/annotation/nmdc_mga0jx8k09_supfam.gff", + "md5_checksum": "14b7f064a3a2fad830fad893ff3257bc", + "id": "nmdc:14b7f064a3a2fad830fad893ff3257bc", + "file_size_bytes": 59641133 + }, + { + "name": "Gp0127631_Cath FunFam GFF file", + "description": "Cath FunFam GFF file for Gp0127631", + "url": "https://data.microbiomedata.org/data/nmdc:mga0jx8k09/annotation/nmdc_mga0jx8k09_cath_funfam.gff", + "md5_checksum": "1b8b64c254f88dd9a8e3cd42bde7b7ba", + "id": "nmdc:1b8b64c254f88dd9a8e3cd42bde7b7ba", + "file_size_bytes": 45160077 + }, + { + "name": "Gp0127631_KO_EC GFF file", + "description": "KO_EC GFF file for Gp0127631", + "url": "https://data.microbiomedata.org/data/nmdc:mga0jx8k09/annotation/nmdc_mga0jx8k09_ko_ec.gff", + "md5_checksum": "01769b6920ba82884f19ac3f88428db1", + "id": "nmdc:01769b6920ba82884f19ac3f88428db1", + "file_size_bytes": 28510384 + }, + { + "name": "gold:Gp0452679_metabat2 bin checkm quality assessment result", + "description": "metabat2 bin checkm quality assessment result for gold:Gp0452679", + "data_object_type": "CheckM Statistics", + "url": "https://data.microbiomedata.org/data/nmdc:mga0fsjy84/MAGs/nmdc_mga0fsjy84_checkm_qa.out", + "md5_checksum": "d41d8cd98f00b204e9800998ecf8427e", + "id": "nmdc:d41d8cd98f00b204e9800998ecf8427e", + "file_size_bytes": 0 + }, + { + "name": "Gp0127631_tooShort (< 3kb) filtered contigs fasta file by metaBat2", + "description": "tooShort (< 3kb) filtered contigs fasta file by metaBat2 for Gp0127631", + "url": "https://data.microbiomedata.org/data/nmdc:mga0jx8k09/MAGs/nmdc_mga0jx8k09_bins.tooShort.fa", + "md5_checksum": "53faea62cf1183292bc6fca374f75ed1", + "id": "nmdc:53faea62cf1183292bc6fca374f75ed1", + "file_size_bytes": 99316833 + }, + { + "name": "Gp0127631_unbinned fasta file from metabat2", + "description": "unbinned fasta file from metabat2 for Gp0127631", + "url": "https://data.microbiomedata.org/data/nmdc:mga0jx8k09/MAGs/nmdc_mga0jx8k09_bins.unbinned.fa", + "md5_checksum": "7a6616d3262630c2aea2923e3c2683d0", + "id": "nmdc:7a6616d3262630c2aea2923e3c2683d0", + "file_size_bytes": 27381739 + }, + { + "name": "Gp0127631_metabat2 bin checkm quality assessment result", + "description": "metabat2 bin checkm quality assessment result for Gp0127631", + "data_object_type": "CheckM Statistics", + "url": "https://data.microbiomedata.org/data/nmdc:mga0jx8k09/MAGs/nmdc_mga0jx8k09_checkm_qa.out", + "md5_checksum": "e16dde65e7229d69949c9e2dee7e2413", + "id": "nmdc:e16dde65e7229d69949c9e2dee7e2413", + "file_size_bytes": 1085 + }, + { + "name": "Gp0127631_high-quality and medium-quality bins", + "description": "high-quality and medium-quality bins for Gp0127631", + "data_object_type": "Metagenome Bins", + "url": "https://data.microbiomedata.org/data/nmdc:mga0jx8k09/MAGs/nmdc_mga0jx8k09_hqmq_bin.zip", + "md5_checksum": "58acda197bd8136a80d5047342008cdf", + "id": "nmdc:58acda197bd8136a80d5047342008cdf", + "file_size_bytes": 182 }, + { + "name": "Gp0127631_metabat2 bins", + "description": "metabat2 bins for Gp0127631", + "url": "https://data.microbiomedata.org/data/nmdc:mga0jx8k09/MAGs/nmdc_mga0jx8k09_metabat_bin.zip", + "md5_checksum": "8d5e2b8a8dede83c2f74182f506f9176", + "id": "nmdc:8d5e2b8a8dede83c2f74182f506f9176", + "file_size_bytes": 596616 + } + ], + "dissolving_activity_set": [], + "functional_annotation_set": [], + "genome_feature_set": [], + "mags_activity_set": [ { "_id": { - "$oid": "649b005f2ca5ee4adb139f95" + "$oid": "649b0052ec087f6bbab34706" }, "has_input": [ - "nmdc:eaffb16b5247d85c08f8af73bcb8b65e" + "nmdc:1eb44ff780f2aad1053ca336b53d7b98", + "nmdc:0ecd5e99ec93ba17c7b02483560bafdf", + "nmdc:2f6baf7176d2d904c02ae71875a8d326" ], + "too_short_contig_num": 219869, "part_of": [ - "nmdc:mga09n3g47" + "nmdc:mga0jx8k09" ], - "ctg_logsum": 77070, - "scaf_logsum": 77428, - "gap_pct": 0.00093, + "binned_contig_num": 506, "git_url": "https://github.com/microbiomedata/metaG/releases/tag/0.1", "has_output": [ - "nmdc:7b35237c97a75f17ba74be0fe96416c9", - "nmdc:118dd6190bdaf127d3c105cc73012cc3", - "nmdc:9e129133978cb4c4cc4bae9fc28a8a49", - "nmdc:33d86c437a046031ea2b4bed5a2d2d6b", - "nmdc:873f16e03e0f94c9ec28573fb10ad6d8" + "nmdc:d41d8cd98f00b204e9800998ecf8427e", + "nmdc:53faea62cf1183292bc6fca374f75ed1", + "nmdc:7a6616d3262630c2aea2923e3c2683d0", + "nmdc:e16dde65e7229d69949c9e2dee7e2413", + "nmdc:58acda197bd8136a80d5047342008cdf", + "nmdc:8d5e2b8a8dede83c2f74182f506f9176" ], - "asm_score": 6.312, - "was_informed_by": "gold:Gp0127630", - "ctg_powsum": 8755.579, - "scaf_max": 31136, - "id": "nmdc:9fb85875014e14636fbb93d97d62f4bd", - "scaf_powsum": 8795.268, + "was_informed_by": "gold:Gp0127631", + "input_contig_num": 237399, + "id": "nmdc:00a9e34a36f0ea767ff48aa2f411874f", "execution_resource": "NERSC-Cori", - "contigs": 127321, - "name": "Assembly Activity for nmdc:mga09n3g47", - "ctg_max": 31136, - "gc_std": 0.09346, - "contig_bp": 52740992, - "gc_avg": 0.61288, - "started_at_time": "2021-10-11T02:26:53Z", - "scaf_bp": 52741482, - "type": "nmdc:MetagenomeAssembly", - "scaffolds": 127272, - "ended_at_time": "2021-10-11T04:54:22+00:00", - "ctg_l50": 372, - "ctg_l90": 284, - "ctg_n50": 41888, - "ctg_n90": 110882, - "scaf_l50": 372, - "scaf_l90": 284, - "scaf_n50": 41856, - "scaf_n90": 110834, - "output_data_objects": [ - { - "name": "Gp0127630_Assembled contigs fasta", - "description": "Assembled contigs fasta for Gp0127630", - "data_object_type": "Assembly Contigs", - "url": "https://data.microbiomedata.org/data/nmdc:mga09n3g47/assembly/nmdc_mga09n3g47_contigs.fna", - "md5_checksum": "7b35237c97a75f17ba74be0fe96416c9", - "id": "nmdc:7b35237c97a75f17ba74be0fe96416c9", - "file_size_bytes": 57511432 - }, - { - "name": "Gp0127630_Assembled scaffold fasta", - "description": "Assembled scaffold fasta for Gp0127630", - "data_object_type": "Assembly Scaffolds", - "url": "https://data.microbiomedata.org/data/nmdc:mga09n3g47/assembly/nmdc_mga09n3g47_scaffolds.fna", - "md5_checksum": "118dd6190bdaf127d3c105cc73012cc3", - "id": "nmdc:118dd6190bdaf127d3c105cc73012cc3", - "file_size_bytes": 57128690 - }, - { - "name": "Gp0127630_Metagenome Contig Coverage Stats", - "description": "Metagenome Contig Coverage Stats for Gp0127630", - "url": "https://data.microbiomedata.org/data/nmdc:mga09n3g47/assembly/nmdc_mga09n3g47_covstats.txt", - "md5_checksum": "9e129133978cb4c4cc4bae9fc28a8a49", - "id": "nmdc:9e129133978cb4c4cc4bae9fc28a8a49", - "file_size_bytes": 10020081 - }, - { - "name": "Gp0127630_Assembled AGP file", - "description": "Assembled AGP file for Gp0127630", - "data_object_type": "Assembly AGP", - "url": "https://data.microbiomedata.org/data/nmdc:mga09n3g47/assembly/nmdc_mga09n3g47_assembly.agp", - "md5_checksum": "33d86c437a046031ea2b4bed5a2d2d6b", - "id": "nmdc:33d86c437a046031ea2b4bed5a2d2d6b", - "file_size_bytes": 9337675 - }, - { - "name": "Gp0127630_Metagenome Alignment BAM file", - "description": "Metagenome Alignment BAM file for Gp0127630", - "data_object_type": "Assembly Coverage BAM", - "url": "https://data.microbiomedata.org/data/nmdc:mga09n3g47/assembly/nmdc_mga09n3g47_pairedMapped_sorted.bam", - "md5_checksum": "873f16e03e0f94c9ec28573fb10ad6d8", - "id": "nmdc:873f16e03e0f94c9ec28573fb10ad6d8", - "file_size_bytes": 2461822274 - } - ] - }, - { - "_id": { - "$oid": "649b005bbf2caae0415ef9ad" - }, - "has_input": [ - "nmdc:7b35237c97a75f17ba74be0fe96416c9" - ], - "part_of": [ - "nmdc:mga09n3g47" - ], - "git_url": "https://github.com/microbiomedata/metaG/releases/tag/0.1", - "has_output": [ - "nmdc:f7735eb161908954feda34285993f1b9", - "nmdc:c6053080461e8cc0bbadd13e0775e108", - "nmdc:4878e3d5a95e67c0bb81da53e03400be", - "nmdc:dbc4d4e179a86aa95211de3e62219191", - "nmdc:5bdd96be3fbc888969d92c2ed6392846", - "nmdc:78026e2afc7644463828fbbfa4d8d727", - "nmdc:ef99a9afe80e1acc086694ca8ab4cca7", - "nmdc:f949efd8a6b6affb4707a4314980e86e", - "nmdc:2f9f0b8164c35117da1e121e63ad772f", - "nmdc:1e3d433d3cb308d086dec26916b6b1bf", - "nmdc:d467bd6407a5a41798aa84df69a4a31d", - "nmdc:4cb3db8f0ff98bf805f4750af65eb9d1" - ], - "was_informed_by": "gold:Gp0127630", - "id": "nmdc:9fb85875014e14636fbb93d97d62f4bd", - "execution_resource": "NERSC-Cori", - "name": "Annotation Activity for nmdc:mga09n3g47", - "started_at_time": "2021-10-11T02:26:53Z", - "type": "nmdc:MetagenomeAnnotationActivity", - "ended_at_time": "2021-10-11T04:54:22+00:00", - "output_data_objects": [ - { - "name": "Gp0127630_Protein FAA", - "description": "Protein FAA for Gp0127630", - "data_object_type": "Annotation Amino Acid FASTA", - "url": "https://data.microbiomedata.org/data/nmdc:mga09n3g47/annotation/nmdc_mga09n3g47_proteins.faa", - "md5_checksum": "f7735eb161908954feda34285993f1b9", - "id": "nmdc:f7735eb161908954feda34285993f1b9", - "file_size_bytes": 34246728 - }, - { - "name": "Gp0127630_Structural annotation GFF file", - "description": "Structural annotation GFF file for Gp0127630", - "data_object_type": "Structural Annotation GFF", - "url": "https://data.microbiomedata.org/data/nmdc:mga09n3g47/annotation/nmdc_mga09n3g47_structural_annotation.gff", - "md5_checksum": "c6053080461e8cc0bbadd13e0775e108", - "id": "nmdc:c6053080461e8cc0bbadd13e0775e108", - "file_size_bytes": 2515 - }, - { - "name": "Gp0127630_Functional annotation GFF file", - "description": "Functional annotation GFF file for Gp0127630", - "data_object_type": "Functional Annotation GFF", - "url": "https://data.microbiomedata.org/data/nmdc:mga09n3g47/annotation/nmdc_mga09n3g47_functional_annotation.gff", - "md5_checksum": "4878e3d5a95e67c0bb81da53e03400be", - "id": "nmdc:4878e3d5a95e67c0bb81da53e03400be", - "file_size_bytes": 40345940 - }, - { - "name": "Gp0127630_KO TSV file", - "description": "KO TSV file for Gp0127630", - "data_object_type": "Annotation KEGG Orthology", - "url": "https://data.microbiomedata.org/data/nmdc:mga09n3g47/annotation/nmdc_mga09n3g47_ko.tsv", - "md5_checksum": "dbc4d4e179a86aa95211de3e62219191", - "id": "nmdc:dbc4d4e179a86aa95211de3e62219191", - "file_size_bytes": 4543233 - }, - { - "name": "Gp0127630_EC TSV file", - "description": "EC TSV file for Gp0127630", - "data_object_type": "Annotation Enzyme Commission", - "url": "https://data.microbiomedata.org/data/nmdc:mga09n3g47/annotation/nmdc_mga09n3g47_ec.tsv", - "md5_checksum": "5bdd96be3fbc888969d92c2ed6392846", - "id": "nmdc:5bdd96be3fbc888969d92c2ed6392846", - "file_size_bytes": 3027431 - }, - { - "name": "Gp0127630_COG GFF file", - "description": "COG GFF file for Gp0127630", - "url": "https://data.microbiomedata.org/data/nmdc:mga09n3g47/annotation/nmdc_mga09n3g47_cog.gff", - "md5_checksum": "78026e2afc7644463828fbbfa4d8d727", - "id": "nmdc:78026e2afc7644463828fbbfa4d8d727", - "file_size_bytes": 23085097 - }, - { - "name": "Gp0127630_PFAM GFF file", - "description": "PFAM GFF file for Gp0127630", - "url": "https://data.microbiomedata.org/data/nmdc:mga09n3g47/annotation/nmdc_mga09n3g47_pfam.gff", - "md5_checksum": "ef99a9afe80e1acc086694ca8ab4cca7", - "id": "nmdc:ef99a9afe80e1acc086694ca8ab4cca7", - "file_size_bytes": 16769237 - }, - { - "name": "Gp0127630_TigrFam GFF file", - "description": "TigrFam GFF file for Gp0127630", - "url": "https://data.microbiomedata.org/data/nmdc:mga09n3g47/annotation/nmdc_mga09n3g47_tigrfam.gff", - "md5_checksum": "f949efd8a6b6affb4707a4314980e86e", - "id": "nmdc:f949efd8a6b6affb4707a4314980e86e", - "file_size_bytes": 1710760 - }, - { - "name": "Gp0127630_SMART GFF file", - "description": "SMART GFF file for Gp0127630", - "url": "https://data.microbiomedata.org/data/nmdc:mga09n3g47/annotation/nmdc_mga09n3g47_smart.gff", - "md5_checksum": "2f9f0b8164c35117da1e121e63ad772f", - "id": "nmdc:2f9f0b8164c35117da1e121e63ad772f", - "file_size_bytes": 5166448 - }, - { - "name": "Gp0127630_SuperFam GFF file", - "description": "SuperFam GFF file for Gp0127630", - "url": "https://data.microbiomedata.org/data/nmdc:mga09n3g47/annotation/nmdc_mga09n3g47_supfam.gff", - "md5_checksum": "1e3d433d3cb308d086dec26916b6b1bf", - "id": "nmdc:1e3d433d3cb308d086dec26916b6b1bf", - "file_size_bytes": 29155547 - }, - { - "name": "Gp0127630_Cath FunFam GFF file", - "description": "Cath FunFam GFF file for Gp0127630", - "url": "https://data.microbiomedata.org/data/nmdc:mga09n3g47/annotation/nmdc_mga09n3g47_cath_funfam.gff", - "md5_checksum": "d467bd6407a5a41798aa84df69a4a31d", - "id": "nmdc:d467bd6407a5a41798aa84df69a4a31d", - "file_size_bytes": 21679406 - }, - { - "name": "Gp0127630_KO_EC GFF file", - "description": "KO_EC GFF file for Gp0127630", - "url": "https://data.microbiomedata.org/data/nmdc:mga09n3g47/annotation/nmdc_mga09n3g47_ko_ec.gff", - "md5_checksum": "4cb3db8f0ff98bf805f4750af65eb9d1", - "id": "nmdc:4cb3db8f0ff98bf805f4750af65eb9d1", - "file_size_bytes": 14461252 - } - ] - }, - { - "_id": { - "$oid": "649b0052ec087f6bbab34707" - }, - "has_input": [ - "nmdc:7b35237c97a75f17ba74be0fe96416c9", - "nmdc:873f16e03e0f94c9ec28573fb10ad6d8", - "nmdc:4878e3d5a95e67c0bb81da53e03400be" - ], - "too_short_contig_num": 123771, - "part_of": [ - "nmdc:mga09n3g47" - ], - "binned_contig_num": 313, - "git_url": "https://github.com/microbiomedata/metaG/releases/tag/0.1", - "has_output": [ - "nmdc:d41d8cd98f00b204e9800998ecf8427e", - "nmdc:ce09d99bdfdf0379b09a3ae75c65d830", - "nmdc:acd651395108c71dd20eeebf9b177d06", - "nmdc:850a6fbbd2993f4dfeb5a40485e67f8e", - "nmdc:287529453d35eab4acb72032a59994d0", - "nmdc:4ad58f05545a75edc1b933a0b0286d16" - ], - "was_informed_by": "gold:Gp0127630", - "input_contig_num": 127321, - "id": "nmdc:9fb85875014e14636fbb93d97d62f4bd", - "execution_resource": "NERSC-Cori", - "name": "MAGs Analysis Activity for nmdc:mga09n3g47", + "name": "MAGs Analysis Activity for nmdc:mga0jx8k09", "mags_list": [ { - "number_of_contig": 86, - "completeness": 19.9, + "number_of_contig": 151, + "completeness": 11.4, "bin_name": "bins.1", - "gene_count": 422, + "gene_count": 748, "bin_quality": "LQ", "gtdbtk_species": "", "gtdbtk_order": "", @@ -10081,5815 +9616,5429 @@ "num_5s": 0, "num_23s": 0, "gtdbtk_genus": "", - "num_t_rna": 7 + "num_t_rna": 6 }, { - "number_of_contig": 227, - "completeness": 70.23, + "number_of_contig": 268, + "completeness": 7.47, "bin_name": "bins.2", - "gene_count": 1932, - "bin_quality": "MQ", + "gene_count": 1304, + "bin_quality": "LQ", "gtdbtk_species": "", - "gtdbtk_order": "Nitrososphaerales", + "gtdbtk_order": "", "num_16s": 0, - "gtdbtk_family": "Nitrososphaeraceae", - "gtdbtk_domain": "Archaea", - "contamination": 1.94, - "gtdbtk_class": "Nitrososphaeria", - "gtdbtk_phylum": "Crenarchaeota", - "num_5s": 1, + "gtdbtk_family": "", + "gtdbtk_domain": "", + "contamination": 0.0, + "gtdbtk_class": "", + "gtdbtk_phylum": "", + "num_5s": 0, "num_23s": 0, "gtdbtk_genus": "", - "num_t_rna": 35 - } - ], - "unbinned_contig_num": 3237, - "started_at_time": "2021-10-11T02:26:53Z", - "type": "nmdc:MAGsAnalysisActivity", - "ended_at_time": "2021-10-11T04:54:22+00:00", - "output_data_objects": [ - { - "name": "gold:Gp0452679_metabat2 bin checkm quality assessment result", - "description": "metabat2 bin checkm quality assessment result for gold:Gp0452679", - "data_object_type": "CheckM Statistics", - "url": "https://data.microbiomedata.org/data/nmdc:mga0fsjy84/MAGs/nmdc_mga0fsjy84_checkm_qa.out", - "md5_checksum": "d41d8cd98f00b204e9800998ecf8427e", - "id": "nmdc:d41d8cd98f00b204e9800998ecf8427e", - "file_size_bytes": 0 - }, - { - "name": "Gp0127630_tooShort (< 3kb) filtered contigs fasta file by metaBat2", - "description": "tooShort (< 3kb) filtered contigs fasta file by metaBat2 for Gp0127630", - "url": "https://data.microbiomedata.org/data/nmdc:mga09n3g47/MAGs/nmdc_mga09n3g47_bins.tooShort.fa", - "md5_checksum": "ce09d99bdfdf0379b09a3ae75c65d830", - "id": "nmdc:ce09d99bdfdf0379b09a3ae75c65d830", - "file_size_bytes": 50450286 - }, - { - "name": "Gp0127630_unbinned fasta file from metabat2", - "description": "unbinned fasta file from metabat2 for Gp0127630", - "url": "https://data.microbiomedata.org/data/nmdc:mga09n3g47/MAGs/nmdc_mga09n3g47_bins.unbinned.fa", - "md5_checksum": "acd651395108c71dd20eeebf9b177d06", - "id": "nmdc:acd651395108c71dd20eeebf9b177d06", - "file_size_bytes": 5114463 - }, - { - "name": "Gp0127630_metabat2 bin checkm quality assessment result", - "description": "metabat2 bin checkm quality assessment result for Gp0127630", - "data_object_type": "CheckM Statistics", - "url": "https://data.microbiomedata.org/data/nmdc:mga09n3g47/MAGs/nmdc_mga09n3g47_checkm_qa.out", - "md5_checksum": "850a6fbbd2993f4dfeb5a40485e67f8e", - "id": "nmdc:850a6fbbd2993f4dfeb5a40485e67f8e", - "file_size_bytes": 948 - }, - { - "name": "Gp0127630_high-quality and medium-quality bins", - "description": "high-quality and medium-quality bins for Gp0127630", - "data_object_type": "Metagenome Bins", - "url": "https://data.microbiomedata.org/data/nmdc:mga09n3g47/MAGs/nmdc_mga09n3g47_hqmq_bin.zip", - "md5_checksum": "287529453d35eab4acb72032a59994d0", - "id": "nmdc:287529453d35eab4acb72032a59994d0", - "file_size_bytes": 484667 + "num_t_rna": 12 }, { - "name": "Gp0127630_metabat2 bins", - "description": "metabat2 bins for Gp0127630", - "url": "https://data.microbiomedata.org/data/nmdc:mga09n3g47/MAGs/nmdc_mga09n3g47_metabat_bin.zip", - "md5_checksum": "4ad58f05545a75edc1b933a0b0286d16", - "id": "nmdc:4ad58f05545a75edc1b933a0b0286d16", - "file_size_bytes": 110526 + "number_of_contig": 87, + "completeness": 13.32, + "bin_name": "bins.3", + "gene_count": 412, + "bin_quality": "LQ", + "gtdbtk_species": "", + "gtdbtk_order": "", + "num_16s": 0, + "gtdbtk_family": "", + "gtdbtk_domain": "", + "contamination": 0.0, + "gtdbtk_class": "", + "gtdbtk_phylum": "", + "num_5s": 0, + "num_23s": 0, + "gtdbtk_genus": "", + "num_t_rna": 4 } - ] + ], + "unbinned_contig_num": 17024, + "started_at_time": "2021-10-11T02:26:22Z", + "type": "nmdc:MAGsAnalysisActivity", + "ended_at_time": "2021-10-11T04:40:31+00:00" } - ] - }, - { - "_id": { - "$oid": "649b009773e8249959349b42" - }, - "id": "nmdc:omprc-11-0g415160", - "name": "Riverbed sediment microbial communities from areas with vegetation nearby in Columbia River, Washington, USA - GW-RW S1_30_40", - "description": "Riverbed sediment samples were collected from an area with a lot of surrounding vegetation in a hyporheic zone (subsurface groundwater - surface water mixing zone)", - "has_input": [ - "nmdc:bsm-11-nbgp1x53" - ], - "has_output": [ - "jgi:574fde5e7ded5e3df1ee1401" - ], - "part_of": [ - "nmdc:sty-11-aygzgv51" - ], - "add_date": "2016-01-11", - "mod_date": "2021-06-15", - "ncbi_project_name": "Riverbed sediment microbial communities from areas with vegetation nearby in Columbia River, Washington, USA - GW-RW S1_30_40", - "omics_type": { - "has_raw_value": "Metagenome" - }, - "principal_investigator": { - "has_raw_value": "James Stegen" - }, - "processing_institution": "JGI", - "type": "nmdc:OmicsProcessing", - "gold_sequencing_project_identifiers": [ - "gold:Gp0127633" - ], - "downstream_workflow_activity_records": [ + ], + "material_sample_set": [], + "material_sampling_activity_set": [], + "metabolomics_analysis_activity_set": [], + "metagenome_annotation_activity_set": [ { "_id": { - "$oid": "649b009d6bdd4fd20273c867" + "$oid": "649b005bbf2caae0415ef9a8" }, "has_input": [ - "nmdc:c0b8d6516c48cfe5a0b110abe67ee983" + "nmdc:1eb44ff780f2aad1053ca336b53d7b98" ], "part_of": [ - "nmdc:mga05zvf81" + "nmdc:mga0jx8k09" ], "git_url": "https://github.com/microbiomedata/metaG/releases/tag/0.1", "has_output": [ - "nmdc:7cbd497624d8b60ab2a5e7fdbe4730f2", - "nmdc:eccf0501d08f920a88b6598d573a8e3e" + "nmdc:8c26f97b6a3196ed09dc4f54857d4972", + "nmdc:c7112633e322d7bc609bd479f7ddddb9", + "nmdc:2f6baf7176d2d904c02ae71875a8d326", + "nmdc:1abb9d211201bef0cb545e70a65de8cf", + "nmdc:985a23612611fb258d2dbaee1e4458f5", + "nmdc:44c3fa82e71af5647b7619b0dd8a0728", + "nmdc:fb70c00e07d0b93b12cacbded87dcea6", + "nmdc:98e1311ba5e96a176baccdb9a95439f9", + "nmdc:0685da4455dde2dec9f221b9356f008c", + "nmdc:14b7f064a3a2fad830fad893ff3257bc", + "nmdc:1b8b64c254f88dd9a8e3cd42bde7b7ba", + "nmdc:01769b6920ba82884f19ac3f88428db1" ], - "was_informed_by": "gold:Gp0127633", - "input_read_count": 23291434, - "output_read_bases": 3367024367, - "id": "nmdc:c88a6f4e80cd6159dbbdeaeffbc28f55", + "was_informed_by": "gold:Gp0127631", + "id": "nmdc:00a9e34a36f0ea767ff48aa2f411874f", "execution_resource": "NERSC-Cori", - "input_read_bases": 3517006534, - "name": "Read QC Activity for nmdc:mga05zvf81", - "output_read_count": 22556158, - "started_at_time": "2021-10-11T02:24:58Z", - "type": "nmdc:ReadQCAnalysisActivity", - "ended_at_time": "2021-10-11T03:40:06+00:00", - "output_data_objects": [ - { - "name": "Gp0127633_Filtered Reads", - "description": "Filtered Reads for Gp0127633", - "data_object_type": "Filtered Sequencing Reads", - "url": "https://data.microbiomedata.org/data/nmdc:mga05zvf81/qa/nmdc_mga05zvf81_filtered.fastq.gz", - "md5_checksum": "7cbd497624d8b60ab2a5e7fdbe4730f2", - "id": "nmdc:7cbd497624d8b60ab2a5e7fdbe4730f2", - "file_size_bytes": 1727224362 - }, - { - "name": "Gp0127633_Filtered Stats", - "description": "Filtered Stats for Gp0127633", - "data_object_type": "QC Statistics", - "url": "https://data.microbiomedata.org/data/nmdc:mga05zvf81/qa/nmdc_mga05zvf81_filterStats.txt", - "md5_checksum": "eccf0501d08f920a88b6598d573a8e3e", - "id": "nmdc:eccf0501d08f920a88b6598d573a8e3e", - "file_size_bytes": 280 - } - ] - }, + "name": "Annotation Activity for nmdc:mga0jx8k09", + "started_at_time": "2021-10-11T02:26:22Z", + "type": "nmdc:MetagenomeAnnotationActivity", + "ended_at_time": "2021-10-11T04:40:31+00:00" + } + ], + "metagenome_assembly_set": [ { "_id": { - "$oid": "649b009bff710ae353f8cf29" + "$oid": "649b005f2ca5ee4adb139f94" }, "has_input": [ - "nmdc:7cbd497624d8b60ab2a5e7fdbe4730f2" + "nmdc:6969fd7f4b1a5a34fb30d31b92cd6bf8" ], + "part_of": [ + "nmdc:mga0jx8k09" + ], + "ctg_logsum": 306128, + "scaf_logsum": 307525, + "gap_pct": 0.00196, "git_url": "https://github.com/microbiomedata/metaG/releases/tag/0.1", "has_output": [ - "nmdc:8bd9eb762acabbac5d079c379c28e381", - "nmdc:77351dd18ca40e5552ac1380ba94acbf", - "nmdc:f445af1a7774572d156f55a898d26f09", - "nmdc:e11fcbf66318878c05984fa3d893e3b7", - "nmdc:28beb8baabdaf346f2066b40f375a152", - "nmdc:1f74a43724c4afed5563499d05601e22", - "nmdc:4825177c6d0a8b67db82e6070cfbc35f", - "nmdc:275268a6b5aca33c427d11877bcfa674", - "nmdc:89e810af4915f0e117eaa60550587453" + "nmdc:1eb44ff780f2aad1053ca336b53d7b98", + "nmdc:992fb303b5ced60489fea0ce6dae71f9", + "nmdc:a0f466071ed249babf1a5653e1c20a02", + "nmdc:5eddebfbfabfd9c0e71c2699bee73870", + "nmdc:0ecd5e99ec93ba17c7b02483560bafdf" ], - "was_informed_by": "gold:Gp0127633", - "id": "nmdc:c88a6f4e80cd6159dbbdeaeffbc28f55", + "asm_score": 3.117, + "was_informed_by": "gold:Gp0127631", + "ctg_powsum": 32898, + "scaf_max": 14244, + "id": "nmdc:00a9e34a36f0ea767ff48aa2f411874f", + "scaf_powsum": 33057, "execution_resource": "NERSC-Cori", - "name": "ReadBased Analysis Activity for nmdc:mga05zvf81", - "started_at_time": "2021-10-11T02:24:58Z", - "type": "nmdc:ReadbasedAnalysis", - "ended_at_time": "2021-10-11T03:40:06+00:00", - "output_data_objects": [ - { - "name": "Gp0127633_Gottcha2 TSV report", - "description": "Gottcha2 TSV report for Gp0127633", - "url": "https://data.microbiomedata.org/data/nmdc:mga05zvf81/ReadbasedAnalysis/nmdc_mga05zvf81_gottcha2_report.tsv", - "md5_checksum": "8bd9eb762acabbac5d079c379c28e381", - "id": "nmdc:8bd9eb762acabbac5d079c379c28e381", - "file_size_bytes": 875 - }, - { - "name": "Gp0127633_Gottcha2 full TSV report", - "description": "Gottcha2 full TSV report for Gp0127633", - "url": "https://data.microbiomedata.org/data/nmdc:mga05zvf81/ReadbasedAnalysis/nmdc_mga05zvf81_gottcha2_report_full.tsv", - "md5_checksum": "77351dd18ca40e5552ac1380ba94acbf", - "id": "nmdc:77351dd18ca40e5552ac1380ba94acbf", - "file_size_bytes": 578856 - }, - { - "name": "Gp0127633_Gottcha2 Krona HTML report", - "description": "Gottcha2 Krona HTML report for Gp0127633", - "data_object_type": "GOTTCHA2 Krona Plot", - "url": "https://data.microbiomedata.org/data/nmdc:mga05zvf81/ReadbasedAnalysis/nmdc_mga05zvf81_gottcha2_krona.html", - "md5_checksum": "f445af1a7774572d156f55a898d26f09", - "id": "nmdc:f445af1a7774572d156f55a898d26f09", - "file_size_bytes": 228067 - }, - { - "name": "Gp0127633_Centrifuge classification TSV report", - "description": "Centrifuge classification TSV report for Gp0127633", - "data_object_type": "Centrifuge Taxonomic Classification", - "url": "https://data.microbiomedata.org/data/nmdc:mga05zvf81/ReadbasedAnalysis/nmdc_mga05zvf81_centrifuge_classification.tsv", - "md5_checksum": "e11fcbf66318878c05984fa3d893e3b7", - "id": "nmdc:e11fcbf66318878c05984fa3d893e3b7", - "file_size_bytes": 1646942155 - }, - { - "name": "Gp0127633_Centrifuge TSV report", - "description": "Centrifuge TSV report for Gp0127633", - "data_object_type": "Centrifuge Classification Report", - "url": "https://data.microbiomedata.org/data/nmdc:mga05zvf81/ReadbasedAnalysis/nmdc_mga05zvf81_centrifuge_report.tsv", - "md5_checksum": "28beb8baabdaf346f2066b40f375a152", - "id": "nmdc:28beb8baabdaf346f2066b40f375a152", - "file_size_bytes": 252735 - }, - { - "name": "Gp0127633_Centrifuge Krona HTML report", - "description": "Centrifuge Krona HTML report for Gp0127633", - "data_object_type": "Centrifuge Krona Plot", - "url": "https://data.microbiomedata.org/data/nmdc:mga05zvf81/ReadbasedAnalysis/nmdc_mga05zvf81_centrifuge_krona.html", - "md5_checksum": "1f74a43724c4afed5563499d05601e22", - "id": "nmdc:1f74a43724c4afed5563499d05601e22", - "file_size_bytes": 2329168 - }, - { - "name": "Gp0127633_Kraken classification TSV report", - "description": "Kraken classification TSV report for Gp0127633", - "data_object_type": "Kraken2 Taxonomic Classification", - "url": "https://data.microbiomedata.org/data/nmdc:mga05zvf81/ReadbasedAnalysis/nmdc_mga05zvf81_kraken2_classification.tsv", - "md5_checksum": "4825177c6d0a8b67db82e6070cfbc35f", - "id": "nmdc:4825177c6d0a8b67db82e6070cfbc35f", - "file_size_bytes": 1310443491 - }, - { - "name": "Gp0127633_Kraken2 TSV report", - "description": "Kraken2 TSV report for Gp0127633", - "data_object_type": "Kraken2 Classification Report", - "url": "https://data.microbiomedata.org/data/nmdc:mga05zvf81/ReadbasedAnalysis/nmdc_mga05zvf81_kraken2_report.tsv", - "md5_checksum": "275268a6b5aca33c427d11877bcfa674", - "id": "nmdc:275268a6b5aca33c427d11877bcfa674", - "file_size_bytes": 621441 - }, - { - "name": "Gp0127633_Kraken2 Krona HTML report", - "description": "Kraken2 Krona HTML report for Gp0127633", - "data_object_type": "Kraken2 Krona Plot", - "url": "https://data.microbiomedata.org/data/nmdc:mga05zvf81/ReadbasedAnalysis/nmdc_mga05zvf81_kraken2_krona.html", - "md5_checksum": "89e810af4915f0e117eaa60550587453", - "id": "nmdc:89e810af4915f0e117eaa60550587453", - "file_size_bytes": 3891844 - } - ] - }, + "contigs": 237399, + "name": "Assembly Activity for nmdc:mga0jx8k09", + "ctg_max": 14244, + "gc_std": 0.09594, + "contig_bp": 119367623, + "gc_avg": 0.62364, + "started_at_time": "2021-10-11T02:26:22Z", + "scaf_bp": 119369963, + "type": "nmdc:MetagenomeAssembly", + "scaffolds": 237183, + "ended_at_time": "2021-10-11T04:40:31+00:00", + "ctg_l50": 499, + "ctg_l90": 292, + "ctg_n50": 64310, + "ctg_n90": 195626, + "scaf_l50": 500, + "scaf_l90": 292, + "scaf_n50": 64017, + "scaf_n90": 195424 + } + ], + "metagenome_sequencing_activity_set": [], + "metaproteomics_analysis_activity_set": [], + "metatranscriptome_activity_set": [], + "nom_analysis_activity_set": [], + "omics_processing_set": [ { "_id": { - "$oid": "61e7193c833bcf838a70001a" + "$oid": "649b009773e8249959349b40" }, + "id": "nmdc:omprc-11-k8kt2j31", + "name": "Riverbed sediment microbial communities from areas with vegetation nearby in Columbia River, Washington, USA - GW-RW S1_20_30", + "description": "Riverbed sediment samples were collected from an area with a lot of surrounding vegetation in a hyporheic zone (subsurface groundwater - surface water mixing zone)", "has_input": [ - "nmdc:7cbd497624d8b60ab2a5e7fdbe4730f2" - ], - "part_of": [ - "nmdc:mga05zvf81" + "nmdc:bsm-11-4vqhvw07" ], - "git_url": "https://github.com/microbiomedata/metaG/releases/tag/0.1", "has_output": [ - "nmdc:8bd9eb762acabbac5d079c379c28e381", - "nmdc:77351dd18ca40e5552ac1380ba94acbf", - "nmdc:f445af1a7774572d156f55a898d26f09", - "nmdc:e11fcbf66318878c05984fa3d893e3b7", - "nmdc:28beb8baabdaf346f2066b40f375a152", - "nmdc:1f74a43724c4afed5563499d05601e22", - "nmdc:4825177c6d0a8b67db82e6070cfbc35f", - "nmdc:275268a6b5aca33c427d11877bcfa674", - "nmdc:89e810af4915f0e117eaa60550587453" + "jgi:574fde5b7ded5e3df1ee13ff" ], - "was_informed_by": "gold:Gp0127633", - "id": "nmdc:c88a6f4e80cd6159dbbdeaeffbc28f55", - "execution_resource": "NERSC-Cori", - "name": "ReadBased Analysis Activity for nmdc:mga05zvf81", - "started_at_time": "2021-10-11T02:24:58Z", - "type": "nmdc:ReadbasedAnalysis", - "ended_at_time": "2021-10-11T03:40:06+00:00", - "output_data_objects": [ - { - "name": "Gp0127633_Gottcha2 TSV report", - "description": "Gottcha2 TSV report for Gp0127633", - "url": "https://data.microbiomedata.org/data/nmdc:mga05zvf81/ReadbasedAnalysis/nmdc_mga05zvf81_gottcha2_report.tsv", - "md5_checksum": "8bd9eb762acabbac5d079c379c28e381", - "id": "nmdc:8bd9eb762acabbac5d079c379c28e381", - "file_size_bytes": 875 - }, - { - "name": "Gp0127633_Gottcha2 full TSV report", - "description": "Gottcha2 full TSV report for Gp0127633", - "url": "https://data.microbiomedata.org/data/nmdc:mga05zvf81/ReadbasedAnalysis/nmdc_mga05zvf81_gottcha2_report_full.tsv", - "md5_checksum": "77351dd18ca40e5552ac1380ba94acbf", - "id": "nmdc:77351dd18ca40e5552ac1380ba94acbf", - "file_size_bytes": 578856 - }, - { - "name": "Gp0127633_Gottcha2 Krona HTML report", - "description": "Gottcha2 Krona HTML report for Gp0127633", - "data_object_type": "GOTTCHA2 Krona Plot", - "url": "https://data.microbiomedata.org/data/nmdc:mga05zvf81/ReadbasedAnalysis/nmdc_mga05zvf81_gottcha2_krona.html", - "md5_checksum": "f445af1a7774572d156f55a898d26f09", - "id": "nmdc:f445af1a7774572d156f55a898d26f09", - "file_size_bytes": 228067 - }, - { - "name": "Gp0127633_Centrifuge classification TSV report", - "description": "Centrifuge classification TSV report for Gp0127633", - "data_object_type": "Centrifuge Taxonomic Classification", - "url": "https://data.microbiomedata.org/data/nmdc:mga05zvf81/ReadbasedAnalysis/nmdc_mga05zvf81_centrifuge_classification.tsv", - "md5_checksum": "e11fcbf66318878c05984fa3d893e3b7", - "id": "nmdc:e11fcbf66318878c05984fa3d893e3b7", - "file_size_bytes": 1646942155 - }, - { - "name": "Gp0127633_Centrifuge TSV report", - "description": "Centrifuge TSV report for Gp0127633", - "data_object_type": "Centrifuge Classification Report", - "url": "https://data.microbiomedata.org/data/nmdc:mga05zvf81/ReadbasedAnalysis/nmdc_mga05zvf81_centrifuge_report.tsv", - "md5_checksum": "28beb8baabdaf346f2066b40f375a152", - "id": "nmdc:28beb8baabdaf346f2066b40f375a152", - "file_size_bytes": 252735 - }, - { - "name": "Gp0127633_Centrifuge Krona HTML report", - "description": "Centrifuge Krona HTML report for Gp0127633", - "data_object_type": "Centrifuge Krona Plot", - "url": "https://data.microbiomedata.org/data/nmdc:mga05zvf81/ReadbasedAnalysis/nmdc_mga05zvf81_centrifuge_krona.html", - "md5_checksum": "1f74a43724c4afed5563499d05601e22", - "id": "nmdc:1f74a43724c4afed5563499d05601e22", - "file_size_bytes": 2329168 - }, - { - "name": "Gp0127633_Kraken classification TSV report", - "description": "Kraken classification TSV report for Gp0127633", - "data_object_type": "Kraken2 Taxonomic Classification", - "url": "https://data.microbiomedata.org/data/nmdc:mga05zvf81/ReadbasedAnalysis/nmdc_mga05zvf81_kraken2_classification.tsv", - "md5_checksum": "4825177c6d0a8b67db82e6070cfbc35f", - "id": "nmdc:4825177c6d0a8b67db82e6070cfbc35f", - "file_size_bytes": 1310443491 - }, - { - "name": "Gp0127633_Kraken2 TSV report", - "description": "Kraken2 TSV report for Gp0127633", - "data_object_type": "Kraken2 Classification Report", - "url": "https://data.microbiomedata.org/data/nmdc:mga05zvf81/ReadbasedAnalysis/nmdc_mga05zvf81_kraken2_report.tsv", - "md5_checksum": "275268a6b5aca33c427d11877bcfa674", - "id": "nmdc:275268a6b5aca33c427d11877bcfa674", - "file_size_bytes": 621441 - }, - { - "name": "Gp0127633_Kraken2 Krona HTML report", - "description": "Kraken2 Krona HTML report for Gp0127633", - "data_object_type": "Kraken2 Krona Plot", - "url": "https://data.microbiomedata.org/data/nmdc:mga05zvf81/ReadbasedAnalysis/nmdc_mga05zvf81_kraken2_krona.html", - "md5_checksum": "89e810af4915f0e117eaa60550587453", - "id": "nmdc:89e810af4915f0e117eaa60550587453", - "file_size_bytes": 3891844 - } + "part_of": [ + "nmdc:sty-11-aygzgv51" + ], + "add_date": "2016-01-11", + "mod_date": "2021-06-15", + "ncbi_project_name": "Riverbed sediment microbial communities from areas with vegetation nearby in Columbia River, Washington, USA - GW-RW S1_20_30", + "omics_type": { + "has_raw_value": "Metagenome" + }, + "principal_investigator": { + "has_raw_value": "James Stegen" + }, + "processing_institution": "JGI", + "type": "nmdc:OmicsProcessing", + "gold_sequencing_project_identifiers": [ + "gold:Gp0127631" ] - }, + } + ], + "reaction_activity_set": [], + "read_qc_analysis_activity_set": [ { "_id": { - "$oid": "649b005f2ca5ee4adb139f97" + "$oid": "649b009d6bdd4fd20273c862" }, "has_input": [ - "nmdc:7cbd497624d8b60ab2a5e7fdbe4730f2" + "nmdc:9c97e4b734b9cac731fe30fb07a32bb7" ], "part_of": [ - "nmdc:mga05zvf81" + "nmdc:mga0jx8k09" ], - "ctg_logsum": 378958, - "scaf_logsum": 380592, - "gap_pct": 0.00189, "git_url": "https://github.com/microbiomedata/metaG/releases/tag/0.1", "has_output": [ - "nmdc:ea5ca9478871b3e2600e1df0d748cbef", - "nmdc:327e130872e4c5faac2f1c9f8dea2316", - "nmdc:f61f1e62791a38beae95bd95833a6784", - "nmdc:416254a3bfc685dd16c11d65a222305f", - "nmdc:bc054294600fa310924f104484effd3e" + "nmdc:6969fd7f4b1a5a34fb30d31b92cd6bf8", + "nmdc:b280141d234edf10cde8794539700654" ], - "asm_score": 4.48, - "was_informed_by": "gold:Gp0127633", - "ctg_powsum": 41464, - "scaf_max": 30530, - "id": "nmdc:c88a6f4e80cd6159dbbdeaeffbc28f55", - "scaf_powsum": 41655, + "was_informed_by": "gold:Gp0127631", + "input_read_count": 26419652, + "output_read_bases": 3798930297, + "id": "nmdc:00a9e34a36f0ea767ff48aa2f411874f", "execution_resource": "NERSC-Cori", - "contigs": 272879, - "name": "Assembly Activity for nmdc:mga05zvf81", - "ctg_max": 30530, - "gc_std": 0.08353, - "contig_bp": 141974737, - "gc_avg": 0.63381, - "started_at_time": "2021-10-11T02:24:58Z", - "scaf_bp": 141977427, - "type": "nmdc:MetagenomeAssembly", - "scaffolds": 272628, - "ended_at_time": "2021-10-11T03:40:06+00:00", - "ctg_l50": 526, - "ctg_l90": 298, - "ctg_n50": 72824, - "ctg_n90": 224178, - "scaf_l50": 527, - "scaf_l90": 298, - "scaf_n50": 72571, - "scaf_n90": 223970, - "output_data_objects": [ - { - "name": "Gp0127633_Assembled contigs fasta", - "description": "Assembled contigs fasta for Gp0127633", - "data_object_type": "Assembly Contigs", - "url": "https://data.microbiomedata.org/data/nmdc:mga05zvf81/assembly/nmdc_mga05zvf81_contigs.fna", - "md5_checksum": "ea5ca9478871b3e2600e1df0d748cbef", - "id": "nmdc:ea5ca9478871b3e2600e1df0d748cbef", - "file_size_bytes": 152814586 - }, - { - "name": "Gp0127633_Assembled scaffold fasta", - "description": "Assembled scaffold fasta for Gp0127633", - "data_object_type": "Assembly Scaffolds", - "url": "https://data.microbiomedata.org/data/nmdc:mga05zvf81/assembly/nmdc_mga05zvf81_scaffolds.fna", - "md5_checksum": "327e130872e4c5faac2f1c9f8dea2316", - "id": "nmdc:327e130872e4c5faac2f1c9f8dea2316", - "file_size_bytes": 151993436 - }, - { - "name": "Gp0127633_Metagenome Contig Coverage Stats", - "description": "Metagenome Contig Coverage Stats for Gp0127633", - "url": "https://data.microbiomedata.org/data/nmdc:mga05zvf81/assembly/nmdc_mga05zvf81_covstats.txt", - "md5_checksum": "f61f1e62791a38beae95bd95833a6784", - "id": "nmdc:f61f1e62791a38beae95bd95833a6784", - "file_size_bytes": 21678212 - }, - { - "name": "Gp0127633_Assembled AGP file", - "description": "Assembled AGP file for Gp0127633", - "data_object_type": "Assembly AGP", - "url": "https://data.microbiomedata.org/data/nmdc:mga05zvf81/assembly/nmdc_mga05zvf81_assembly.agp", - "md5_checksum": "416254a3bfc685dd16c11d65a222305f", - "id": "nmdc:416254a3bfc685dd16c11d65a222305f", - "file_size_bytes": 20304047 - }, - { - "name": "Gp0127633_Metagenome Alignment BAM file", - "description": "Metagenome Alignment BAM file for Gp0127633", - "data_object_type": "Assembly Coverage BAM", - "url": "https://data.microbiomedata.org/data/nmdc:mga05zvf81/assembly/nmdc_mga05zvf81_pairedMapped_sorted.bam", - "md5_checksum": "bc054294600fa310924f104484effd3e", - "id": "nmdc:bc054294600fa310924f104484effd3e", - "file_size_bytes": 1959649749 - } - ] - }, + "input_read_bases": 3989367452, + "name": "Read QC Activity for nmdc:mga0jx8k09", + "output_read_count": 25434840, + "started_at_time": "2021-10-11T02:26:22Z", + "type": "nmdc:ReadQCAnalysisActivity", + "ended_at_time": "2021-10-11T04:40:31+00:00" + } + ], + "read_based_taxonomy_analysis_activity_set": [ { "_id": { - "$oid": "649b005bbf2caae0415ef9b0" + "$oid": "649b009bff710ae353f8cf22" }, "has_input": [ - "nmdc:ea5ca9478871b3e2600e1df0d748cbef" - ], - "part_of": [ - "nmdc:mga05zvf81" + "nmdc:6969fd7f4b1a5a34fb30d31b92cd6bf8" ], "git_url": "https://github.com/microbiomedata/metaG/releases/tag/0.1", "has_output": [ - "nmdc:8defcf55f08cd56d8b2560e27f490ca5", - "nmdc:a6031c0a101419dd413a0804937425ca", - "nmdc:43069b1146c84c064b7ff334dc9ff100", - "nmdc:acc5a2c445dc6e00668c9a5d50aecdb8", - "nmdc:ec91d5d7a8af4fb845e22cbe7ab82bde", - "nmdc:3cd238ff1bb176b7a159aeb34a7c4683", - "nmdc:5103ea2a481ea3b82f1aa98ab7a36998", - "nmdc:8f7429420cbefb9e27bcdbe6252e5288", - "nmdc:6d69127dc30609e4861a7b2443b99164", - "nmdc:00243bcaf50313d937a7685380a876bb", - "nmdc:ec6ffd40772dee9d48dbec0beb6b3321", - "nmdc:907439e314b4f4623244e2cec8532098" + "nmdc:b78e8246144185beb95c0caf65ef1f1a", + "nmdc:8875c6ce19e13ed9a88447f2f78bb049", + "nmdc:3b0aee019c772a695bf4cc8f4a390f4e", + "nmdc:0d1729a83798b752f33eeb8d97afe972", + "nmdc:77561a0de3bb8aae04d110429fd9ad0c", + "nmdc:ea27c005b1788434c2198ad60939d4bc", + "nmdc:6a46583da876b9d6287302308df0b9fd", + "nmdc:af619dc5a0423509a4beaca26aa61000", + "nmdc:50093825ec73dcabe66aa353de766beb" ], - "was_informed_by": "gold:Gp0127633", - "id": "nmdc:c88a6f4e80cd6159dbbdeaeffbc28f55", + "was_informed_by": "gold:Gp0127631", + "id": "nmdc:00a9e34a36f0ea767ff48aa2f411874f", "execution_resource": "NERSC-Cori", - "name": "Annotation Activity for nmdc:mga05zvf81", - "started_at_time": "2021-10-11T02:24:58Z", - "type": "nmdc:MetagenomeAnnotationActivity", - "ended_at_time": "2021-10-11T03:40:06+00:00", - "output_data_objects": [ - { - "name": "Gp0127633_Protein FAA", - "description": "Protein FAA for Gp0127633", - "data_object_type": "Annotation Amino Acid FASTA", - "url": "https://data.microbiomedata.org/data/nmdc:mga05zvf81/annotation/nmdc_mga05zvf81_proteins.faa", - "md5_checksum": "8defcf55f08cd56d8b2560e27f490ca5", - "id": "nmdc:8defcf55f08cd56d8b2560e27f490ca5", - "file_size_bytes": 85918779 - }, - { - "name": "Gp0127633_Structural annotation GFF file", - "description": "Structural annotation GFF file for Gp0127633", - "data_object_type": "Structural Annotation GFF", - "url": "https://data.microbiomedata.org/data/nmdc:mga05zvf81/annotation/nmdc_mga05zvf81_structural_annotation.gff", - "md5_checksum": "a6031c0a101419dd413a0804937425ca", - "id": "nmdc:a6031c0a101419dd413a0804937425ca", - "file_size_bytes": 2527 - }, - { - "name": "Gp0127633_Functional annotation GFF file", - "description": "Functional annotation GFF file for Gp0127633", - "data_object_type": "Functional Annotation GFF", - "url": "https://data.microbiomedata.org/data/nmdc:mga05zvf81/annotation/nmdc_mga05zvf81_functional_annotation.gff", - "md5_checksum": "43069b1146c84c064b7ff334dc9ff100", - "id": "nmdc:43069b1146c84c064b7ff334dc9ff100", - "file_size_bytes": 95647963 - }, - { - "name": "Gp0127633_KO TSV file", - "description": "KO TSV file for Gp0127633", - "data_object_type": "Annotation KEGG Orthology", - "url": "https://data.microbiomedata.org/data/nmdc:mga05zvf81/annotation/nmdc_mga05zvf81_ko.tsv", - "md5_checksum": "acc5a2c445dc6e00668c9a5d50aecdb8", - "id": "nmdc:acc5a2c445dc6e00668c9a5d50aecdb8", - "file_size_bytes": 10638485 - }, - { - "name": "Gp0127633_EC TSV file", - "description": "EC TSV file for Gp0127633", - "data_object_type": "Annotation Enzyme Commission", - "url": "https://data.microbiomedata.org/data/nmdc:mga05zvf81/annotation/nmdc_mga05zvf81_ec.tsv", - "md5_checksum": "ec91d5d7a8af4fb845e22cbe7ab82bde", - "id": "nmdc:ec91d5d7a8af4fb845e22cbe7ab82bde", - "file_size_bytes": 6991172 - }, - { - "name": "Gp0127633_COG GFF file", - "description": "COG GFF file for Gp0127633", - "url": "https://data.microbiomedata.org/data/nmdc:mga05zvf81/annotation/nmdc_mga05zvf81_cog.gff", - "md5_checksum": "3cd238ff1bb176b7a159aeb34a7c4683", - "id": "nmdc:3cd238ff1bb176b7a159aeb34a7c4683", - "file_size_bytes": 56525933 - }, - { - "name": "Gp0127633_PFAM GFF file", - "description": "PFAM GFF file for Gp0127633", - "url": "https://data.microbiomedata.org/data/nmdc:mga05zvf81/annotation/nmdc_mga05zvf81_pfam.gff", - "md5_checksum": "5103ea2a481ea3b82f1aa98ab7a36998", - "id": "nmdc:5103ea2a481ea3b82f1aa98ab7a36998", - "file_size_bytes": 43189711 - }, - { - "name": "Gp0127633_TigrFam GFF file", - "description": "TigrFam GFF file for Gp0127633", - "url": "https://data.microbiomedata.org/data/nmdc:mga05zvf81/annotation/nmdc_mga05zvf81_tigrfam.gff", - "md5_checksum": "8f7429420cbefb9e27bcdbe6252e5288", - "id": "nmdc:8f7429420cbefb9e27bcdbe6252e5288", - "file_size_bytes": 4806086 - }, - { - "name": "Gp0127633_SMART GFF file", - "description": "SMART GFF file for Gp0127633", - "url": "https://data.microbiomedata.org/data/nmdc:mga05zvf81/annotation/nmdc_mga05zvf81_smart.gff", - "md5_checksum": "6d69127dc30609e4861a7b2443b99164", - "id": "nmdc:6d69127dc30609e4861a7b2443b99164", - "file_size_bytes": 12776467 - }, - { - "name": "Gp0127633_SuperFam GFF file", - "description": "SuperFam GFF file for Gp0127633", - "url": "https://data.microbiomedata.org/data/nmdc:mga05zvf81/annotation/nmdc_mga05zvf81_supfam.gff", - "md5_checksum": "00243bcaf50313d937a7685380a876bb", - "id": "nmdc:00243bcaf50313d937a7685380a876bb", - "file_size_bytes": 70607320 - }, - { - "name": "Gp0127633_Cath FunFam GFF file", - "description": "Cath FunFam GFF file for Gp0127633", - "url": "https://data.microbiomedata.org/data/nmdc:mga05zvf81/annotation/nmdc_mga05zvf81_cath_funfam.gff", - "md5_checksum": "ec6ffd40772dee9d48dbec0beb6b3321", - "id": "nmdc:ec6ffd40772dee9d48dbec0beb6b3321", - "file_size_bytes": 53950895 - }, - { - "name": "Gp0127633_KO_EC GFF file", - "description": "KO_EC GFF file for Gp0127633", - "url": "https://data.microbiomedata.org/data/nmdc:mga05zvf81/annotation/nmdc_mga05zvf81_ko_ec.gff", - "md5_checksum": "907439e314b4f4623244e2cec8532098", - "id": "nmdc:907439e314b4f4623244e2cec8532098", - "file_size_bytes": 33781965 - } - ] - }, + "name": "ReadBased Analysis Activity for nmdc:mga0jx8k09", + "started_at_time": "2021-10-11T02:26:22Z", + "type": "nmdc:ReadbasedAnalysis", + "ended_at_time": "2021-10-11T04:40:31+00:00" + } + ], + "study_set": [], + "field_research_site_set": [], + "collecting_biosamples_from_site_set": [], + "date_created": null, + "etl_software_version": null, + "pooling_set": [], + "read_based_analysis_activity_set": [ { "_id": { - "$oid": "649b0052ec087f6bbab3470e" + "$oid": "61e71938833bcf838a6ffe7a" }, "has_input": [ - "nmdc:ea5ca9478871b3e2600e1df0d748cbef", - "nmdc:bc054294600fa310924f104484effd3e", - "nmdc:43069b1146c84c064b7ff334dc9ff100" + "nmdc:6969fd7f4b1a5a34fb30d31b92cd6bf8" ], - "too_short_contig_num": 252383, "part_of": [ - "nmdc:mga05zvf81" + "nmdc:mga0jx8k09" ], - "binned_contig_num": 738, "git_url": "https://github.com/microbiomedata/metaG/releases/tag/0.1", "has_output": [ - "nmdc:d41d8cd98f00b204e9800998ecf8427e", - "nmdc:00415cf72f9a77f907e3467a08b123c5", - "nmdc:83064ec7bfc35a79a1ca76fdd8ad75fd", - "nmdc:3f435d6da551400a4ba4400fa3608e7f", - "nmdc:c66f93153962f8b80c8f3d6978b6d802", - "nmdc:ce2a364ec51a1d6311a319509751266e" + "nmdc:b78e8246144185beb95c0caf65ef1f1a", + "nmdc:8875c6ce19e13ed9a88447f2f78bb049", + "nmdc:3b0aee019c772a695bf4cc8f4a390f4e", + "nmdc:0d1729a83798b752f33eeb8d97afe972", + "nmdc:77561a0de3bb8aae04d110429fd9ad0c", + "nmdc:ea27c005b1788434c2198ad60939d4bc", + "nmdc:6a46583da876b9d6287302308df0b9fd", + "nmdc:af619dc5a0423509a4beaca26aa61000", + "nmdc:50093825ec73dcabe66aa353de766beb" ], - "was_informed_by": "gold:Gp0127633", - "input_contig_num": 272872, - "id": "nmdc:c88a6f4e80cd6159dbbdeaeffbc28f55", + "was_informed_by": "gold:Gp0127631", + "id": "nmdc:00a9e34a36f0ea767ff48aa2f411874f", "execution_resource": "NERSC-Cori", - "name": "MAGs Analysis Activity for nmdc:mga05zvf81", - "mags_list": [ - { - "number_of_contig": 83, - "completeness": 0.0, - "bin_name": "bins.1", - "gene_count": 600, - "bin_quality": "LQ", - "gtdbtk_species": "", - "gtdbtk_order": "", - "num_16s": 0, - "gtdbtk_family": "", - "gtdbtk_domain": "", - "contamination": 0.0, - "gtdbtk_class": "", - "gtdbtk_phylum": "", - "num_5s": 0, - "num_23s": 0, - "gtdbtk_genus": "", - "num_t_rna": 5 - }, + "name": "ReadBased Analysis Activity for nmdc:mga0jx8k09", + "started_at_time": "2021-10-11T02:26:22Z", + "type": "nmdc:ReadbasedAnalysis", + "ended_at_time": "2021-10-11T04:40:31+00:00" + } + ] + }, + { + "functional_annotation_agg": [], + "library_preparation_set": [], + "processed_sample_set": [], + "extraction_set": [], + "activity_set": [], + "biosample_set": [], + "data_object_set": [ + { + "name": "Gp0127630_Filtered Reads", + "description": "Filtered Reads for Gp0127630", + "data_object_type": "Filtered Sequencing Reads", + "url": "https://data.microbiomedata.org/data/nmdc:mga09n3g47/qa/nmdc_mga09n3g47_filtered.fastq.gz", + "md5_checksum": "eaffb16b5247d85c08f8af73bcb8b65e", + "id": "nmdc:eaffb16b5247d85c08f8af73bcb8b65e", + "file_size_bytes": 2294158265 + }, + { + "name": "Gp0127630_Filtered Stats", + "description": "Filtered Stats for Gp0127630", + "data_object_type": "QC Statistics", + "url": "https://data.microbiomedata.org/data/nmdc:mga09n3g47/qa/nmdc_mga09n3g47_filterStats.txt", + "md5_checksum": "088fd18cb9169097e739289d2e5ebb13", + "id": "nmdc:088fd18cb9169097e739289d2e5ebb13", + "file_size_bytes": 288 + }, + { + "name": "Gp0127630_Gottcha2 TSV report", + "description": "Gottcha2 TSV report for Gp0127630", + "url": "https://data.microbiomedata.org/data/nmdc:mga09n3g47/ReadbasedAnalysis/nmdc_mga09n3g47_gottcha2_report.tsv", + "md5_checksum": "ad8aa7d317d86bcd1b33e6e68a917198", + "id": "nmdc:ad8aa7d317d86bcd1b33e6e68a917198", + "file_size_bytes": 3373 + }, + { + "name": "Gp0127630_Gottcha2 full TSV report", + "description": "Gottcha2 full TSV report for Gp0127630", + "url": "https://data.microbiomedata.org/data/nmdc:mga09n3g47/ReadbasedAnalysis/nmdc_mga09n3g47_gottcha2_report_full.tsv", + "md5_checksum": "e5f1da9ed5be2adcd65763d387387c9f", + "id": "nmdc:e5f1da9ed5be2adcd65763d387387c9f", + "file_size_bytes": 791488 + }, + { + "name": "Gp0127630_Gottcha2 Krona HTML report", + "description": "Gottcha2 Krona HTML report for Gp0127630", + "data_object_type": "GOTTCHA2 Krona Plot", + "url": "https://data.microbiomedata.org/data/nmdc:mga09n3g47/ReadbasedAnalysis/nmdc_mga09n3g47_gottcha2_krona.html", + "md5_checksum": "db82b41936f37bbbeaa027ffc25b58cd", + "id": "nmdc:db82b41936f37bbbeaa027ffc25b58cd", + "file_size_bytes": 235803 + }, + { + "name": "Gp0127630_Centrifuge classification TSV report", + "description": "Centrifuge classification TSV report for Gp0127630", + "data_object_type": "Centrifuge Taxonomic Classification", + "url": "https://data.microbiomedata.org/data/nmdc:mga09n3g47/ReadbasedAnalysis/nmdc_mga09n3g47_centrifuge_classification.tsv", + "md5_checksum": "2f21fd19f055d1931ab82016ed781a12", + "id": "nmdc:2f21fd19f055d1931ab82016ed781a12", + "file_size_bytes": 1974171566 + }, + { + "name": "Gp0127630_Centrifuge TSV report", + "description": "Centrifuge TSV report for Gp0127630", + "data_object_type": "Centrifuge Classification Report", + "url": "https://data.microbiomedata.org/data/nmdc:mga09n3g47/ReadbasedAnalysis/nmdc_mga09n3g47_centrifuge_report.tsv", + "md5_checksum": "890f494d1dd5e130d6c1688e78f27ff2", + "id": "nmdc:890f494d1dd5e130d6c1688e78f27ff2", + "file_size_bytes": 255012 + }, + { + "name": "Gp0127630_Centrifuge Krona HTML report", + "description": "Centrifuge Krona HTML report for Gp0127630", + "data_object_type": "Centrifuge Krona Plot", + "url": "https://data.microbiomedata.org/data/nmdc:mga09n3g47/ReadbasedAnalysis/nmdc_mga09n3g47_centrifuge_krona.html", + "md5_checksum": "813232a3034ddb9a05efc2f2e9b78cce", + "id": "nmdc:813232a3034ddb9a05efc2f2e9b78cce", + "file_size_bytes": 2330430 + }, + { + "name": "Gp0127630_Kraken classification TSV report", + "description": "Kraken classification TSV report for Gp0127630", + "data_object_type": "Kraken2 Taxonomic Classification", + "url": "https://data.microbiomedata.org/data/nmdc:mga09n3g47/ReadbasedAnalysis/nmdc_mga09n3g47_kraken2_classification.tsv", + "md5_checksum": "ef490241b537bb4c19bd5548cd7b7f6b", + "id": "nmdc:ef490241b537bb4c19bd5548cd7b7f6b", + "file_size_bytes": 1584744477 + }, + { + "name": "Gp0127630_Kraken2 TSV report", + "description": "Kraken2 TSV report for Gp0127630", + "data_object_type": "Kraken2 Classification Report", + "url": "https://data.microbiomedata.org/data/nmdc:mga09n3g47/ReadbasedAnalysis/nmdc_mga09n3g47_kraken2_report.tsv", + "md5_checksum": "6a7de24b01ad1c63ba6edb758e25af40", + "id": "nmdc:6a7de24b01ad1c63ba6edb758e25af40", + "file_size_bytes": 650172 + }, + { + "name": "Gp0127630_Kraken2 Krona HTML report", + "description": "Kraken2 Krona HTML report for Gp0127630", + "data_object_type": "Kraken2 Krona Plot", + "url": "https://data.microbiomedata.org/data/nmdc:mga09n3g47/ReadbasedAnalysis/nmdc_mga09n3g47_kraken2_krona.html", + "md5_checksum": "fc8a855916eb1ba0f7d278b7c1f1786f", + "id": "nmdc:fc8a855916eb1ba0f7d278b7c1f1786f", + "file_size_bytes": 3962195 + }, + { + "name": "Gp0127630_Gottcha2 TSV report", + "description": "Gottcha2 TSV report for Gp0127630", + "url": "https://data.microbiomedata.org/data/nmdc:mga09n3g47/ReadbasedAnalysis/nmdc_mga09n3g47_gottcha2_report.tsv", + "md5_checksum": "ad8aa7d317d86bcd1b33e6e68a917198", + "id": "nmdc:ad8aa7d317d86bcd1b33e6e68a917198", + "file_size_bytes": 3373 + }, + { + "name": "Gp0127630_Gottcha2 full TSV report", + "description": "Gottcha2 full TSV report for Gp0127630", + "url": "https://data.microbiomedata.org/data/nmdc:mga09n3g47/ReadbasedAnalysis/nmdc_mga09n3g47_gottcha2_report_full.tsv", + "md5_checksum": "e5f1da9ed5be2adcd65763d387387c9f", + "id": "nmdc:e5f1da9ed5be2adcd65763d387387c9f", + "file_size_bytes": 791488 + }, + { + "name": "Gp0127630_Gottcha2 Krona HTML report", + "description": "Gottcha2 Krona HTML report for Gp0127630", + "data_object_type": "GOTTCHA2 Krona Plot", + "url": "https://data.microbiomedata.org/data/nmdc:mga09n3g47/ReadbasedAnalysis/nmdc_mga09n3g47_gottcha2_krona.html", + "md5_checksum": "db82b41936f37bbbeaa027ffc25b58cd", + "id": "nmdc:db82b41936f37bbbeaa027ffc25b58cd", + "file_size_bytes": 235803 + }, + { + "name": "Gp0127630_Centrifuge classification TSV report", + "description": "Centrifuge classification TSV report for Gp0127630", + "data_object_type": "Centrifuge Taxonomic Classification", + "url": "https://data.microbiomedata.org/data/nmdc:mga09n3g47/ReadbasedAnalysis/nmdc_mga09n3g47_centrifuge_classification.tsv", + "md5_checksum": "2f21fd19f055d1931ab82016ed781a12", + "id": "nmdc:2f21fd19f055d1931ab82016ed781a12", + "file_size_bytes": 1974171566 + }, + { + "name": "Gp0127630_Centrifuge TSV report", + "description": "Centrifuge TSV report for Gp0127630", + "data_object_type": "Centrifuge Classification Report", + "url": "https://data.microbiomedata.org/data/nmdc:mga09n3g47/ReadbasedAnalysis/nmdc_mga09n3g47_centrifuge_report.tsv", + "md5_checksum": "890f494d1dd5e130d6c1688e78f27ff2", + "id": "nmdc:890f494d1dd5e130d6c1688e78f27ff2", + "file_size_bytes": 255012 + }, + { + "name": "Gp0127630_Centrifuge Krona HTML report", + "description": "Centrifuge Krona HTML report for Gp0127630", + "data_object_type": "Centrifuge Krona Plot", + "url": "https://data.microbiomedata.org/data/nmdc:mga09n3g47/ReadbasedAnalysis/nmdc_mga09n3g47_centrifuge_krona.html", + "md5_checksum": "813232a3034ddb9a05efc2f2e9b78cce", + "id": "nmdc:813232a3034ddb9a05efc2f2e9b78cce", + "file_size_bytes": 2330430 + }, + { + "name": "Gp0127630_Kraken classification TSV report", + "description": "Kraken classification TSV report for Gp0127630", + "data_object_type": "Kraken2 Taxonomic Classification", + "url": "https://data.microbiomedata.org/data/nmdc:mga09n3g47/ReadbasedAnalysis/nmdc_mga09n3g47_kraken2_classification.tsv", + "md5_checksum": "ef490241b537bb4c19bd5548cd7b7f6b", + "id": "nmdc:ef490241b537bb4c19bd5548cd7b7f6b", + "file_size_bytes": 1584744477 + }, + { + "name": "Gp0127630_Kraken2 TSV report", + "description": "Kraken2 TSV report for Gp0127630", + "data_object_type": "Kraken2 Classification Report", + "url": "https://data.microbiomedata.org/data/nmdc:mga09n3g47/ReadbasedAnalysis/nmdc_mga09n3g47_kraken2_report.tsv", + "md5_checksum": "6a7de24b01ad1c63ba6edb758e25af40", + "id": "nmdc:6a7de24b01ad1c63ba6edb758e25af40", + "file_size_bytes": 650172 + }, + { + "name": "Gp0127630_Kraken2 Krona HTML report", + "description": "Kraken2 Krona HTML report for Gp0127630", + "data_object_type": "Kraken2 Krona Plot", + "url": "https://data.microbiomedata.org/data/nmdc:mga09n3g47/ReadbasedAnalysis/nmdc_mga09n3g47_kraken2_krona.html", + "md5_checksum": "fc8a855916eb1ba0f7d278b7c1f1786f", + "id": "nmdc:fc8a855916eb1ba0f7d278b7c1f1786f", + "file_size_bytes": 3962195 + }, + { + "name": "Gp0127630_Assembled contigs fasta", + "description": "Assembled contigs fasta for Gp0127630", + "data_object_type": "Assembly Contigs", + "url": "https://data.microbiomedata.org/data/nmdc:mga09n3g47/assembly/nmdc_mga09n3g47_contigs.fna", + "md5_checksum": "7b35237c97a75f17ba74be0fe96416c9", + "id": "nmdc:7b35237c97a75f17ba74be0fe96416c9", + "file_size_bytes": 57511432 + }, + { + "name": "Gp0127630_Assembled scaffold fasta", + "description": "Assembled scaffold fasta for Gp0127630", + "data_object_type": "Assembly Scaffolds", + "url": "https://data.microbiomedata.org/data/nmdc:mga09n3g47/assembly/nmdc_mga09n3g47_scaffolds.fna", + "md5_checksum": "118dd6190bdaf127d3c105cc73012cc3", + "id": "nmdc:118dd6190bdaf127d3c105cc73012cc3", + "file_size_bytes": 57128690 + }, + { + "name": "Gp0127630_Metagenome Contig Coverage Stats", + "description": "Metagenome Contig Coverage Stats for Gp0127630", + "url": "https://data.microbiomedata.org/data/nmdc:mga09n3g47/assembly/nmdc_mga09n3g47_covstats.txt", + "md5_checksum": "9e129133978cb4c4cc4bae9fc28a8a49", + "id": "nmdc:9e129133978cb4c4cc4bae9fc28a8a49", + "file_size_bytes": 10020081 + }, + { + "name": "Gp0127630_Assembled AGP file", + "description": "Assembled AGP file for Gp0127630", + "data_object_type": "Assembly AGP", + "url": "https://data.microbiomedata.org/data/nmdc:mga09n3g47/assembly/nmdc_mga09n3g47_assembly.agp", + "md5_checksum": "33d86c437a046031ea2b4bed5a2d2d6b", + "id": "nmdc:33d86c437a046031ea2b4bed5a2d2d6b", + "file_size_bytes": 9337675 + }, + { + "name": "Gp0127630_Metagenome Alignment BAM file", + "description": "Metagenome Alignment BAM file for Gp0127630", + "data_object_type": "Assembly Coverage BAM", + "url": "https://data.microbiomedata.org/data/nmdc:mga09n3g47/assembly/nmdc_mga09n3g47_pairedMapped_sorted.bam", + "md5_checksum": "873f16e03e0f94c9ec28573fb10ad6d8", + "id": "nmdc:873f16e03e0f94c9ec28573fb10ad6d8", + "file_size_bytes": 2461822274 + }, + { + "name": "Gp0127630_Protein FAA", + "description": "Protein FAA for Gp0127630", + "data_object_type": "Annotation Amino Acid FASTA", + "url": "https://data.microbiomedata.org/data/nmdc:mga09n3g47/annotation/nmdc_mga09n3g47_proteins.faa", + "md5_checksum": "f7735eb161908954feda34285993f1b9", + "id": "nmdc:f7735eb161908954feda34285993f1b9", + "file_size_bytes": 34246728 + }, + { + "name": "Gp0127630_Structural annotation GFF file", + "description": "Structural annotation GFF file for Gp0127630", + "data_object_type": "Structural Annotation GFF", + "url": "https://data.microbiomedata.org/data/nmdc:mga09n3g47/annotation/nmdc_mga09n3g47_structural_annotation.gff", + "md5_checksum": "c6053080461e8cc0bbadd13e0775e108", + "id": "nmdc:c6053080461e8cc0bbadd13e0775e108", + "file_size_bytes": 2515 + }, + { + "name": "Gp0127630_Functional annotation GFF file", + "description": "Functional annotation GFF file for Gp0127630", + "data_object_type": "Functional Annotation GFF", + "url": "https://data.microbiomedata.org/data/nmdc:mga09n3g47/annotation/nmdc_mga09n3g47_functional_annotation.gff", + "md5_checksum": "4878e3d5a95e67c0bb81da53e03400be", + "id": "nmdc:4878e3d5a95e67c0bb81da53e03400be", + "file_size_bytes": 40345940 + }, + { + "name": "Gp0127630_KO TSV file", + "description": "KO TSV file for Gp0127630", + "data_object_type": "Annotation KEGG Orthology", + "url": "https://data.microbiomedata.org/data/nmdc:mga09n3g47/annotation/nmdc_mga09n3g47_ko.tsv", + "md5_checksum": "dbc4d4e179a86aa95211de3e62219191", + "id": "nmdc:dbc4d4e179a86aa95211de3e62219191", + "file_size_bytes": 4543233 + }, + { + "name": "Gp0127630_EC TSV file", + "description": "EC TSV file for Gp0127630", + "data_object_type": "Annotation Enzyme Commission", + "url": "https://data.microbiomedata.org/data/nmdc:mga09n3g47/annotation/nmdc_mga09n3g47_ec.tsv", + "md5_checksum": "5bdd96be3fbc888969d92c2ed6392846", + "id": "nmdc:5bdd96be3fbc888969d92c2ed6392846", + "file_size_bytes": 3027431 + }, + { + "name": "Gp0127630_COG GFF file", + "description": "COG GFF file for Gp0127630", + "url": "https://data.microbiomedata.org/data/nmdc:mga09n3g47/annotation/nmdc_mga09n3g47_cog.gff", + "md5_checksum": "78026e2afc7644463828fbbfa4d8d727", + "id": "nmdc:78026e2afc7644463828fbbfa4d8d727", + "file_size_bytes": 23085097 + }, + { + "name": "Gp0127630_PFAM GFF file", + "description": "PFAM GFF file for Gp0127630", + "url": "https://data.microbiomedata.org/data/nmdc:mga09n3g47/annotation/nmdc_mga09n3g47_pfam.gff", + "md5_checksum": "ef99a9afe80e1acc086694ca8ab4cca7", + "id": "nmdc:ef99a9afe80e1acc086694ca8ab4cca7", + "file_size_bytes": 16769237 + }, + { + "name": "Gp0127630_TigrFam GFF file", + "description": "TigrFam GFF file for Gp0127630", + "url": "https://data.microbiomedata.org/data/nmdc:mga09n3g47/annotation/nmdc_mga09n3g47_tigrfam.gff", + "md5_checksum": "f949efd8a6b6affb4707a4314980e86e", + "id": "nmdc:f949efd8a6b6affb4707a4314980e86e", + "file_size_bytes": 1710760 + }, + { + "name": "Gp0127630_SMART GFF file", + "description": "SMART GFF file for Gp0127630", + "url": "https://data.microbiomedata.org/data/nmdc:mga09n3g47/annotation/nmdc_mga09n3g47_smart.gff", + "md5_checksum": "2f9f0b8164c35117da1e121e63ad772f", + "id": "nmdc:2f9f0b8164c35117da1e121e63ad772f", + "file_size_bytes": 5166448 + }, + { + "name": "Gp0127630_SuperFam GFF file", + "description": "SuperFam GFF file for Gp0127630", + "url": "https://data.microbiomedata.org/data/nmdc:mga09n3g47/annotation/nmdc_mga09n3g47_supfam.gff", + "md5_checksum": "1e3d433d3cb308d086dec26916b6b1bf", + "id": "nmdc:1e3d433d3cb308d086dec26916b6b1bf", + "file_size_bytes": 29155547 + }, + { + "name": "Gp0127630_Cath FunFam GFF file", + "description": "Cath FunFam GFF file for Gp0127630", + "url": "https://data.microbiomedata.org/data/nmdc:mga09n3g47/annotation/nmdc_mga09n3g47_cath_funfam.gff", + "md5_checksum": "d467bd6407a5a41798aa84df69a4a31d", + "id": "nmdc:d467bd6407a5a41798aa84df69a4a31d", + "file_size_bytes": 21679406 + }, + { + "name": "Gp0127630_KO_EC GFF file", + "description": "KO_EC GFF file for Gp0127630", + "url": "https://data.microbiomedata.org/data/nmdc:mga09n3g47/annotation/nmdc_mga09n3g47_ko_ec.gff", + "md5_checksum": "4cb3db8f0ff98bf805f4750af65eb9d1", + "id": "nmdc:4cb3db8f0ff98bf805f4750af65eb9d1", + "file_size_bytes": 14461252 + }, + { + "name": "gold:Gp0452679_metabat2 bin checkm quality assessment result", + "description": "metabat2 bin checkm quality assessment result for gold:Gp0452679", + "data_object_type": "CheckM Statistics", + "url": "https://data.microbiomedata.org/data/nmdc:mga0fsjy84/MAGs/nmdc_mga0fsjy84_checkm_qa.out", + "md5_checksum": "d41d8cd98f00b204e9800998ecf8427e", + "id": "nmdc:d41d8cd98f00b204e9800998ecf8427e", + "file_size_bytes": 0 + }, + { + "name": "Gp0127630_tooShort (< 3kb) filtered contigs fasta file by metaBat2", + "description": "tooShort (< 3kb) filtered contigs fasta file by metaBat2 for Gp0127630", + "url": "https://data.microbiomedata.org/data/nmdc:mga09n3g47/MAGs/nmdc_mga09n3g47_bins.tooShort.fa", + "md5_checksum": "ce09d99bdfdf0379b09a3ae75c65d830", + "id": "nmdc:ce09d99bdfdf0379b09a3ae75c65d830", + "file_size_bytes": 50450286 + }, + { + "name": "Gp0127630_unbinned fasta file from metabat2", + "description": "unbinned fasta file from metabat2 for Gp0127630", + "url": "https://data.microbiomedata.org/data/nmdc:mga09n3g47/MAGs/nmdc_mga09n3g47_bins.unbinned.fa", + "md5_checksum": "acd651395108c71dd20eeebf9b177d06", + "id": "nmdc:acd651395108c71dd20eeebf9b177d06", + "file_size_bytes": 5114463 + }, + { + "name": "Gp0127630_metabat2 bin checkm quality assessment result", + "description": "metabat2 bin checkm quality assessment result for Gp0127630", + "data_object_type": "CheckM Statistics", + "url": "https://data.microbiomedata.org/data/nmdc:mga09n3g47/MAGs/nmdc_mga09n3g47_checkm_qa.out", + "md5_checksum": "850a6fbbd2993f4dfeb5a40485e67f8e", + "id": "nmdc:850a6fbbd2993f4dfeb5a40485e67f8e", + "file_size_bytes": 948 + }, + { + "name": "Gp0127630_high-quality and medium-quality bins", + "description": "high-quality and medium-quality bins for Gp0127630", + "data_object_type": "Metagenome Bins", + "url": "https://data.microbiomedata.org/data/nmdc:mga09n3g47/MAGs/nmdc_mga09n3g47_hqmq_bin.zip", + "md5_checksum": "287529453d35eab4acb72032a59994d0", + "id": "nmdc:287529453d35eab4acb72032a59994d0", + "file_size_bytes": 484667 + }, + { + "name": "Gp0127630_metabat2 bins", + "description": "metabat2 bins for Gp0127630", + "url": "https://data.microbiomedata.org/data/nmdc:mga09n3g47/MAGs/nmdc_mga09n3g47_metabat_bin.zip", + "md5_checksum": "4ad58f05545a75edc1b933a0b0286d16", + "id": "nmdc:4ad58f05545a75edc1b933a0b0286d16", + "file_size_bytes": 110526 + } + ], + "dissolving_activity_set": [], + "functional_annotation_set": [], + "genome_feature_set": [], + "mags_activity_set": [ + { + "_id": { + "$oid": "649b0052ec087f6bbab34707" + }, + "has_input": [ + "nmdc:7b35237c97a75f17ba74be0fe96416c9", + "nmdc:873f16e03e0f94c9ec28573fb10ad6d8", + "nmdc:4878e3d5a95e67c0bb81da53e03400be" + ], + "too_short_contig_num": 123771, + "part_of": [ + "nmdc:mga09n3g47" + ], + "binned_contig_num": 313, + "git_url": "https://github.com/microbiomedata/metaG/releases/tag/0.1", + "has_output": [ + "nmdc:d41d8cd98f00b204e9800998ecf8427e", + "nmdc:ce09d99bdfdf0379b09a3ae75c65d830", + "nmdc:acd651395108c71dd20eeebf9b177d06", + "nmdc:850a6fbbd2993f4dfeb5a40485e67f8e", + "nmdc:287529453d35eab4acb72032a59994d0", + "nmdc:4ad58f05545a75edc1b933a0b0286d16" + ], + "was_informed_by": "gold:Gp0127630", + "input_contig_num": 127321, + "id": "nmdc:9fb85875014e14636fbb93d97d62f4bd", + "execution_resource": "NERSC-Cori", + "name": "MAGs Analysis Activity for nmdc:mga09n3g47", + "mags_list": [ { - "number_of_contig": 142, - "completeness": 43.03, - "bin_name": "bins.2", - "gene_count": 746, + "number_of_contig": 86, + "completeness": 19.9, + "bin_name": "bins.1", + "gene_count": 422, "bin_quality": "LQ", "gtdbtk_species": "", "gtdbtk_order": "", "num_16s": 0, "gtdbtk_family": "", "gtdbtk_domain": "", - "contamination": 1.72, + "contamination": 0.0, "gtdbtk_class": "", "gtdbtk_phylum": "", "num_5s": 0, "num_23s": 0, "gtdbtk_genus": "", - "num_t_rna": 6 + "num_t_rna": 7 }, { - "number_of_contig": 194, - "completeness": 73.62, - "bin_name": "bins.3", - "gene_count": 1844, + "number_of_contig": 227, + "completeness": 70.23, + "bin_name": "bins.2", + "gene_count": 1932, "bin_quality": "MQ", "gtdbtk_species": "", "gtdbtk_order": "Nitrososphaerales", "num_16s": 0, "gtdbtk_family": "Nitrososphaeraceae", "gtdbtk_domain": "Archaea", - "contamination": 2.43, + "contamination": 1.94, "gtdbtk_class": "Nitrososphaeria", "gtdbtk_phylum": "Crenarchaeota", "num_5s": 1, "num_23s": 0, "gtdbtk_genus": "", - "num_t_rna": 31 - }, - { - "number_of_contig": 91, - "completeness": 10.82, - "bin_name": "bins.4", - "gene_count": 442, - "bin_quality": "LQ", - "gtdbtk_species": "", - "gtdbtk_order": "", - "num_16s": 0, - "gtdbtk_family": "", - "gtdbtk_domain": "", - "contamination": 0.0, - "gtdbtk_class": "", - "gtdbtk_phylum": "", - "num_5s": 0, - "num_23s": 0, - "gtdbtk_genus": "", - "num_t_rna": 2 - }, - { - "number_of_contig": 82, - "completeness": 10.97, - "bin_name": "bins.5", - "gene_count": 385, - "bin_quality": "LQ", - "gtdbtk_species": "", - "gtdbtk_order": "", - "num_16s": 1, - "gtdbtk_family": "", - "gtdbtk_domain": "", - "contamination": 0.0, - "gtdbtk_class": "", - "gtdbtk_phylum": "", - "num_5s": 0, - "num_23s": 0, - "gtdbtk_genus": "", - "num_t_rna": 3 - }, - { - "number_of_contig": 146, - "completeness": 31.6, - "bin_name": "bins.6", - "gene_count": 800, - "bin_quality": "LQ", - "gtdbtk_species": "", - "gtdbtk_order": "", - "num_16s": 1, - "gtdbtk_family": "", - "gtdbtk_domain": "", - "contamination": 1.6, - "gtdbtk_class": "", - "gtdbtk_phylum": "", - "num_5s": 0, - "num_23s": 1, - "gtdbtk_genus": "", - "num_t_rna": 20 + "num_t_rna": 35 } ], - "unbinned_contig_num": 19751, - "started_at_time": "2021-10-11T02:24:58Z", + "unbinned_contig_num": 3237, + "started_at_time": "2021-10-11T02:26:53Z", "type": "nmdc:MAGsAnalysisActivity", - "ended_at_time": "2021-10-11T03:40:06+00:00", - "output_data_objects": [ - { - "name": "gold:Gp0452679_metabat2 bin checkm quality assessment result", - "description": "metabat2 bin checkm quality assessment result for gold:Gp0452679", - "data_object_type": "CheckM Statistics", - "url": "https://data.microbiomedata.org/data/nmdc:mga0fsjy84/MAGs/nmdc_mga0fsjy84_checkm_qa.out", - "md5_checksum": "d41d8cd98f00b204e9800998ecf8427e", - "id": "nmdc:d41d8cd98f00b204e9800998ecf8427e", - "file_size_bytes": 0 - }, - { - "name": "Gp0127633_tooShort (< 3kb) filtered contigs fasta file by metaBat2", - "description": "tooShort (< 3kb) filtered contigs fasta file by metaBat2 for Gp0127633", - "url": "https://data.microbiomedata.org/data/nmdc:mga05zvf81/MAGs/nmdc_mga05zvf81_bins.tooShort.fa", - "md5_checksum": "00415cf72f9a77f907e3467a08b123c5", - "id": "nmdc:00415cf72f9a77f907e3467a08b123c5", - "file_size_bytes": 116930318 - }, - { - "name": "Gp0127633_unbinned fasta file from metabat2", - "description": "unbinned fasta file from metabat2 for Gp0127633", - "url": "https://data.microbiomedata.org/data/nmdc:mga05zvf81/MAGs/nmdc_mga05zvf81_bins.unbinned.fa", - "md5_checksum": "83064ec7bfc35a79a1ca76fdd8ad75fd", - "id": "nmdc:83064ec7bfc35a79a1ca76fdd8ad75fd", - "file_size_bytes": 31883888 - }, - { - "name": "Gp0127633_metabat2 bin checkm quality assessment result", - "description": "metabat2 bin checkm quality assessment result for Gp0127633", - "data_object_type": "CheckM Statistics", - "url": "https://data.microbiomedata.org/data/nmdc:mga05zvf81/MAGs/nmdc_mga05zvf81_checkm_qa.out", - "md5_checksum": "3f435d6da551400a4ba4400fa3608e7f", - "id": "nmdc:3f435d6da551400a4ba4400fa3608e7f", - "file_size_bytes": 1590 - }, - { - "name": "Gp0127633_high-quality and medium-quality bins", - "description": "high-quality and medium-quality bins for Gp0127633", - "data_object_type": "Metagenome Bins", - "url": "https://data.microbiomedata.org/data/nmdc:mga05zvf81/MAGs/nmdc_mga05zvf81_hqmq_bin.zip", - "md5_checksum": "c66f93153962f8b80c8f3d6978b6d802", - "id": "nmdc:c66f93153962f8b80c8f3d6978b6d802", - "file_size_bytes": 460412 - }, - { - "name": "Gp0127633_metabat2 bins", - "description": "metabat2 bins for Gp0127633", - "url": "https://data.microbiomedata.org/data/nmdc:mga05zvf81/MAGs/nmdc_mga05zvf81_metabat_bin.zip", - "md5_checksum": "ce2a364ec51a1d6311a319509751266e", - "id": "nmdc:ce2a364ec51a1d6311a319509751266e", - "file_size_bytes": 753147 - } - ] + "ended_at_time": "2021-10-11T04:54:22+00:00" } - ] - }, - { - "_id": { - "$oid": "649b009773e8249959349b43" - }, - "id": "nmdc:omprc-11-z5qv0f24", - "name": "Riverbed sediment microbial communities from areas with vegetation nearby in Columbia River, Washington, USA - GW-RW S1_40_50", - "description": "Riverbed sediment samples were collected from an area with a lot of surrounding vegetation in a hyporheic zone (subsurface groundwater - surface water mixing zone)", - "has_input": [ - "nmdc:bsm-11-v0q5ak63" - ], - "has_output": [ - "jgi:574fe0967ded5e3df1ee1482" - ], - "part_of": [ - "nmdc:sty-11-aygzgv51" - ], - "add_date": "2016-01-11", - "mod_date": "2021-06-15", - "ncbi_project_name": "Riverbed sediment microbial communities from areas with vegetation nearby in Columbia River, Washington, USA - GW-RW S1_40_50", - "omics_type": { - "has_raw_value": "Metagenome" - }, - "principal_investigator": { - "has_raw_value": "James Stegen" - }, - "processing_institution": "JGI", - "type": "nmdc:OmicsProcessing", - "gold_sequencing_project_identifiers": [ - "gold:Gp0127627" - ], - "downstream_workflow_activity_records": [ + ], + "material_sample_set": [], + "material_sampling_activity_set": [], + "metabolomics_analysis_activity_set": [], + "metagenome_annotation_activity_set": [ { "_id": { - "$oid": "649b009d6bdd4fd20273c866" + "$oid": "649b005bbf2caae0415ef9ad" }, "has_input": [ - "nmdc:45f15cded08bad75a2ef9d7e4b1f42de" + "nmdc:7b35237c97a75f17ba74be0fe96416c9" ], "part_of": [ - "nmdc:mga0daby71" + "nmdc:mga09n3g47" ], "git_url": "https://github.com/microbiomedata/metaG/releases/tag/0.1", "has_output": [ - "nmdc:ed95796b3fd964c6bedb141d70737ebf", - "nmdc:308ae373809697291bbc7947a1e4ed2d" - ], - "was_informed_by": "gold:Gp0127627", - "input_read_count": 20505370, - "output_read_bases": 2992084693, - "id": "nmdc:ecaab4b51de694b35269756fd7e31ed9", - "execution_resource": "NERSC-Cori", - "input_read_bases": 3096310870, - "name": "Read QC Activity for nmdc:mga0daby71", - "output_read_count": 19995028, - "started_at_time": "2021-11-13T18:47:34Z", - "type": "nmdc:ReadQCAnalysisActivity", - "ended_at_time": "2021-11-13T19:08:49+00:00", - "output_data_objects": [ - { - "name": "Gp0127627_Filtered Reads", - "description": "Filtered Reads for Gp0127627", - "data_object_type": "Filtered Sequencing Reads", - "url": "https://data.microbiomedata.org/data/nmdc:mga0daby71/qa/nmdc_mga0daby71_filtered.fastq.gz", - "md5_checksum": "ed95796b3fd964c6bedb141d70737ebf", - "id": "nmdc:ed95796b3fd964c6bedb141d70737ebf", - "file_size_bytes": 1752924191 - }, - { - "name": "Gp0127627_Filtered Stats", - "description": "Filtered Stats for Gp0127627", - "data_object_type": "QC Statistics", - "url": "https://data.microbiomedata.org/data/nmdc:mga0daby71/qa/nmdc_mga0daby71_filterStats.txt", - "md5_checksum": "308ae373809697291bbc7947a1e4ed2d", - "id": "nmdc:308ae373809697291bbc7947a1e4ed2d", - "file_size_bytes": 281 - } - ] - }, + "nmdc:f7735eb161908954feda34285993f1b9", + "nmdc:c6053080461e8cc0bbadd13e0775e108", + "nmdc:4878e3d5a95e67c0bb81da53e03400be", + "nmdc:dbc4d4e179a86aa95211de3e62219191", + "nmdc:5bdd96be3fbc888969d92c2ed6392846", + "nmdc:78026e2afc7644463828fbbfa4d8d727", + "nmdc:ef99a9afe80e1acc086694ca8ab4cca7", + "nmdc:f949efd8a6b6affb4707a4314980e86e", + "nmdc:2f9f0b8164c35117da1e121e63ad772f", + "nmdc:1e3d433d3cb308d086dec26916b6b1bf", + "nmdc:d467bd6407a5a41798aa84df69a4a31d", + "nmdc:4cb3db8f0ff98bf805f4750af65eb9d1" + ], + "was_informed_by": "gold:Gp0127630", + "id": "nmdc:9fb85875014e14636fbb93d97d62f4bd", + "execution_resource": "NERSC-Cori", + "name": "Annotation Activity for nmdc:mga09n3g47", + "started_at_time": "2021-10-11T02:26:53Z", + "type": "nmdc:MetagenomeAnnotationActivity", + "ended_at_time": "2021-10-11T04:54:22+00:00" + } + ], + "metagenome_assembly_set": [ { "_id": { - "$oid": "649b009bff710ae353f8cf2a" + "$oid": "649b005f2ca5ee4adb139f95" }, "has_input": [ - "nmdc:ed95796b3fd964c6bedb141d70737ebf" + "nmdc:eaffb16b5247d85c08f8af73bcb8b65e" ], + "part_of": [ + "nmdc:mga09n3g47" + ], + "ctg_logsum": 77070, + "scaf_logsum": 77428, + "gap_pct": 0.00093, "git_url": "https://github.com/microbiomedata/metaG/releases/tag/0.1", "has_output": [ - "nmdc:a5ac6665e5d66242b1c885a911236982", - "nmdc:d19478a191693d643157a89c69cc02d1", - "nmdc:679a82699663e88a5e8828ee081fa967", - "nmdc:95b3150e6fb62195c1e5ebf06f87c7d5", - "nmdc:0380e478962be82e0d97a6339f7f3b91", - "nmdc:0c1d139abdfa9fa10f26923abb4d6bda", - "nmdc:f388f7f0d79d0b2bbec1c3c0c5641814", - "nmdc:a2a0029691c04851f4a98003a773fe3f", - "nmdc:bab24ab64ad432d115f182df7198d46e" + "nmdc:7b35237c97a75f17ba74be0fe96416c9", + "nmdc:118dd6190bdaf127d3c105cc73012cc3", + "nmdc:9e129133978cb4c4cc4bae9fc28a8a49", + "nmdc:33d86c437a046031ea2b4bed5a2d2d6b", + "nmdc:873f16e03e0f94c9ec28573fb10ad6d8" ], - "was_informed_by": "gold:Gp0127627", - "id": "nmdc:ecaab4b51de694b35269756fd7e31ed9", + "asm_score": 6.312, + "was_informed_by": "gold:Gp0127630", + "ctg_powsum": 8755.579, + "scaf_max": 31136, + "id": "nmdc:9fb85875014e14636fbb93d97d62f4bd", + "scaf_powsum": 8795.268, "execution_resource": "NERSC-Cori", - "name": "ReadBased Analysis Activity for nmdc:mga0daby71", - "started_at_time": "2021-11-13T18:47:34Z", - "type": "nmdc:ReadbasedAnalysis", - "ended_at_time": "2021-11-13T19:08:49+00:00", - "output_data_objects": [ - { - "name": "Gp0127627_Gottcha2 TSV report", - "description": "Gottcha2 TSV report for Gp0127627", - "url": "https://data.microbiomedata.org/data/nmdc:mga0daby71/ReadbasedAnalysis/nmdc_mga0daby71_gottcha2_report.tsv", - "md5_checksum": "a5ac6665e5d66242b1c885a911236982", - "id": "nmdc:a5ac6665e5d66242b1c885a911236982", - "file_size_bytes": 5530 - }, - { - "name": "Gp0127627_Gottcha2 full TSV report", - "description": "Gottcha2 full TSV report for Gp0127627", - "url": "https://data.microbiomedata.org/data/nmdc:mga0daby71/ReadbasedAnalysis/nmdc_mga0daby71_gottcha2_report_full.tsv", - "md5_checksum": "d19478a191693d643157a89c69cc02d1", - "id": "nmdc:d19478a191693d643157a89c69cc02d1", - "file_size_bytes": 825047 - }, - { - "name": "Gp0127627_Gottcha2 Krona HTML report", - "description": "Gottcha2 Krona HTML report for Gp0127627", - "data_object_type": "GOTTCHA2 Krona Plot", - "url": "https://data.microbiomedata.org/data/nmdc:mga0daby71/ReadbasedAnalysis/nmdc_mga0daby71_gottcha2_krona.html", - "md5_checksum": "679a82699663e88a5e8828ee081fa967", - "id": "nmdc:679a82699663e88a5e8828ee081fa967", - "file_size_bytes": 241114 - }, - { - "name": "Gp0127627_Centrifuge classification TSV report", - "description": "Centrifuge classification TSV report for Gp0127627", - "data_object_type": "Centrifuge Taxonomic Classification", - "url": "https://data.microbiomedata.org/data/nmdc:mga0daby71/ReadbasedAnalysis/nmdc_mga0daby71_centrifuge_classification.tsv", - "md5_checksum": "95b3150e6fb62195c1e5ebf06f87c7d5", - "id": "nmdc:95b3150e6fb62195c1e5ebf06f87c7d5", - "file_size_bytes": 1463660267 - }, - { - "name": "Gp0127627_Centrifuge TSV report", - "description": "Centrifuge TSV report for Gp0127627", - "data_object_type": "Centrifuge Classification Report", - "url": "https://data.microbiomedata.org/data/nmdc:mga0daby71/ReadbasedAnalysis/nmdc_mga0daby71_centrifuge_report.tsv", - "md5_checksum": "0380e478962be82e0d97a6339f7f3b91", - "id": "nmdc:0380e478962be82e0d97a6339f7f3b91", - "file_size_bytes": 254347 - }, - { - "name": "Gp0127627_Centrifuge Krona HTML report", - "description": "Centrifuge Krona HTML report for Gp0127627", - "data_object_type": "Centrifuge Krona Plot", - "url": "https://data.microbiomedata.org/data/nmdc:mga0daby71/ReadbasedAnalysis/nmdc_mga0daby71_centrifuge_krona.html", - "md5_checksum": "0c1d139abdfa9fa10f26923abb4d6bda", - "id": "nmdc:0c1d139abdfa9fa10f26923abb4d6bda", - "file_size_bytes": 2330603 - }, - { - "name": "Gp0127627_Kraken classification TSV report", - "description": "Kraken classification TSV report for Gp0127627", - "data_object_type": "Kraken2 Taxonomic Classification", - "url": "https://data.microbiomedata.org/data/nmdc:mga0daby71/ReadbasedAnalysis/nmdc_mga0daby71_kraken2_classification.tsv", - "md5_checksum": "f388f7f0d79d0b2bbec1c3c0c5641814", - "id": "nmdc:f388f7f0d79d0b2bbec1c3c0c5641814", - "file_size_bytes": 1177609473 - }, - { - "name": "Gp0127627_Kraken2 TSV report", - "description": "Kraken2 TSV report for Gp0127627", - "data_object_type": "Kraken2 Classification Report", - "url": "https://data.microbiomedata.org/data/nmdc:mga0daby71/ReadbasedAnalysis/nmdc_mga0daby71_kraken2_report.tsv", - "md5_checksum": "a2a0029691c04851f4a98003a773fe3f", - "id": "nmdc:a2a0029691c04851f4a98003a773fe3f", - "file_size_bytes": 643281 - }, - { - "name": "Gp0127627_Kraken2 Krona HTML report", - "description": "Kraken2 Krona HTML report for Gp0127627", - "data_object_type": "Kraken2 Krona Plot", - "url": "https://data.microbiomedata.org/data/nmdc:mga0daby71/ReadbasedAnalysis/nmdc_mga0daby71_kraken2_krona.html", - "md5_checksum": "bab24ab64ad432d115f182df7198d46e", - "id": "nmdc:bab24ab64ad432d115f182df7198d46e", - "file_size_bytes": 3926756 - } + "contigs": 127321, + "name": "Assembly Activity for nmdc:mga09n3g47", + "ctg_max": 31136, + "gc_std": 0.09346, + "contig_bp": 52740992, + "gc_avg": 0.61288, + "started_at_time": "2021-10-11T02:26:53Z", + "scaf_bp": 52741482, + "type": "nmdc:MetagenomeAssembly", + "scaffolds": 127272, + "ended_at_time": "2021-10-11T04:54:22+00:00", + "ctg_l50": 372, + "ctg_l90": 284, + "ctg_n50": 41888, + "ctg_n90": 110882, + "scaf_l50": 372, + "scaf_l90": 284, + "scaf_n50": 41856, + "scaf_n90": 110834 + } + ], + "metagenome_sequencing_activity_set": [], + "metaproteomics_analysis_activity_set": [], + "metatranscriptome_activity_set": [], + "nom_analysis_activity_set": [], + "omics_processing_set": [ + { + "_id": { + "$oid": "649b009773e8249959349b41" + }, + "id": "nmdc:omprc-11-9pbab972", + "name": "Riverbed sediment microbial communities from areas with no vegetation in Columbia River, Washington, USA - GW-RW N3_30_40", + "description": "Riverbed sediment samples were collected from an area with no vegetation in a hyporheic zone (subsurface groundwater - surface water mixing zone)", + "has_input": [ + "nmdc:bsm-11-3yjh4z33" + ], + "has_output": [ + "jgi:574fde7c7ded5e3df1ee1419" + ], + "part_of": [ + "nmdc:sty-11-aygzgv51" + ], + "add_date": "2016-01-11", + "mod_date": "2021-06-15", + "ncbi_project_name": "Riverbed sediment microbial communities from areas with no vegetation in Columbia River, Washington, USA - GW-RW N3_30_40", + "omics_type": { + "has_raw_value": "Metagenome" + }, + "principal_investigator": { + "has_raw_value": "James Stegen" + }, + "processing_institution": "JGI", + "type": "nmdc:OmicsProcessing", + "gold_sequencing_project_identifiers": [ + "gold:Gp0127630" ] - }, + } + ], + "reaction_activity_set": [], + "read_qc_analysis_activity_set": [ { "_id": { - "$oid": "61e7195c833bcf838a70049b" + "$oid": "649b009d6bdd4fd20273c865" }, "has_input": [ - "nmdc:ed95796b3fd964c6bedb141d70737ebf" + "nmdc:0e737a8e36535f70bff074004ee1f9c0" ], "part_of": [ - "nmdc:mga0daby71" + "nmdc:mga09n3g47" ], "git_url": "https://github.com/microbiomedata/metaG/releases/tag/0.1", "has_output": [ - "nmdc:a5ac6665e5d66242b1c885a911236982", - "nmdc:d19478a191693d643157a89c69cc02d1", - "nmdc:679a82699663e88a5e8828ee081fa967", - "nmdc:95b3150e6fb62195c1e5ebf06f87c7d5", - "nmdc:0380e478962be82e0d97a6339f7f3b91", - "nmdc:0c1d139abdfa9fa10f26923abb4d6bda", - "nmdc:f388f7f0d79d0b2bbec1c3c0c5641814", - "nmdc:a2a0029691c04851f4a98003a773fe3f", - "nmdc:bab24ab64ad432d115f182df7198d46e" + "nmdc:eaffb16b5247d85c08f8af73bcb8b65e", + "nmdc:088fd18cb9169097e739289d2e5ebb13" ], - "was_informed_by": "gold:Gp0127627", - "id": "nmdc:ecaab4b51de694b35269756fd7e31ed9", + "was_informed_by": "gold:Gp0127630", + "input_read_count": 28569382, + "output_read_bases": 4016672570, + "id": "nmdc:9fb85875014e14636fbb93d97d62f4bd", "execution_resource": "NERSC-Cori", - "name": "ReadBased Analysis Activity for nmdc:mga0daby71", - "started_at_time": "2021-11-13T18:47:34Z", - "type": "nmdc:ReadbasedAnalysis", - "ended_at_time": "2021-11-13T19:08:49+00:00", - "output_data_objects": [ - { - "name": "Gp0127627_Gottcha2 TSV report", - "description": "Gottcha2 TSV report for Gp0127627", - "url": "https://data.microbiomedata.org/data/nmdc:mga0daby71/ReadbasedAnalysis/nmdc_mga0daby71_gottcha2_report.tsv", - "md5_checksum": "a5ac6665e5d66242b1c885a911236982", - "id": "nmdc:a5ac6665e5d66242b1c885a911236982", - "file_size_bytes": 5530 - }, - { - "name": "Gp0127627_Gottcha2 full TSV report", - "description": "Gottcha2 full TSV report for Gp0127627", - "url": "https://data.microbiomedata.org/data/nmdc:mga0daby71/ReadbasedAnalysis/nmdc_mga0daby71_gottcha2_report_full.tsv", - "md5_checksum": "d19478a191693d643157a89c69cc02d1", - "id": "nmdc:d19478a191693d643157a89c69cc02d1", - "file_size_bytes": 825047 - }, - { - "name": "Gp0127627_Gottcha2 Krona HTML report", - "description": "Gottcha2 Krona HTML report for Gp0127627", - "data_object_type": "GOTTCHA2 Krona Plot", - "url": "https://data.microbiomedata.org/data/nmdc:mga0daby71/ReadbasedAnalysis/nmdc_mga0daby71_gottcha2_krona.html", - "md5_checksum": "679a82699663e88a5e8828ee081fa967", - "id": "nmdc:679a82699663e88a5e8828ee081fa967", - "file_size_bytes": 241114 - }, - { - "name": "Gp0127627_Centrifuge classification TSV report", - "description": "Centrifuge classification TSV report for Gp0127627", - "data_object_type": "Centrifuge Taxonomic Classification", - "url": "https://data.microbiomedata.org/data/nmdc:mga0daby71/ReadbasedAnalysis/nmdc_mga0daby71_centrifuge_classification.tsv", - "md5_checksum": "95b3150e6fb62195c1e5ebf06f87c7d5", - "id": "nmdc:95b3150e6fb62195c1e5ebf06f87c7d5", - "file_size_bytes": 1463660267 - }, - { - "name": "Gp0127627_Centrifuge TSV report", - "description": "Centrifuge TSV report for Gp0127627", - "data_object_type": "Centrifuge Classification Report", - "url": "https://data.microbiomedata.org/data/nmdc:mga0daby71/ReadbasedAnalysis/nmdc_mga0daby71_centrifuge_report.tsv", - "md5_checksum": "0380e478962be82e0d97a6339f7f3b91", - "id": "nmdc:0380e478962be82e0d97a6339f7f3b91", - "file_size_bytes": 254347 - }, - { - "name": "Gp0127627_Centrifuge Krona HTML report", - "description": "Centrifuge Krona HTML report for Gp0127627", - "data_object_type": "Centrifuge Krona Plot", - "url": "https://data.microbiomedata.org/data/nmdc:mga0daby71/ReadbasedAnalysis/nmdc_mga0daby71_centrifuge_krona.html", - "md5_checksum": "0c1d139abdfa9fa10f26923abb4d6bda", - "id": "nmdc:0c1d139abdfa9fa10f26923abb4d6bda", - "file_size_bytes": 2330603 - }, - { - "name": "Gp0127627_Kraken classification TSV report", - "description": "Kraken classification TSV report for Gp0127627", - "data_object_type": "Kraken2 Taxonomic Classification", - "url": "https://data.microbiomedata.org/data/nmdc:mga0daby71/ReadbasedAnalysis/nmdc_mga0daby71_kraken2_classification.tsv", - "md5_checksum": "f388f7f0d79d0b2bbec1c3c0c5641814", - "id": "nmdc:f388f7f0d79d0b2bbec1c3c0c5641814", - "file_size_bytes": 1177609473 - }, - { - "name": "Gp0127627_Kraken2 TSV report", - "description": "Kraken2 TSV report for Gp0127627", - "data_object_type": "Kraken2 Classification Report", - "url": "https://data.microbiomedata.org/data/nmdc:mga0daby71/ReadbasedAnalysis/nmdc_mga0daby71_kraken2_report.tsv", - "md5_checksum": "a2a0029691c04851f4a98003a773fe3f", - "id": "nmdc:a2a0029691c04851f4a98003a773fe3f", - "file_size_bytes": 643281 - }, - { - "name": "Gp0127627_Kraken2 Krona HTML report", - "description": "Kraken2 Krona HTML report for Gp0127627", - "data_object_type": "Kraken2 Krona Plot", - "url": "https://data.microbiomedata.org/data/nmdc:mga0daby71/ReadbasedAnalysis/nmdc_mga0daby71_kraken2_krona.html", - "md5_checksum": "bab24ab64ad432d115f182df7198d46e", - "id": "nmdc:bab24ab64ad432d115f182df7198d46e", - "file_size_bytes": 3926756 - } - ] - }, + "input_read_bases": 4313976682, + "name": "Read QC Activity for nmdc:mga09n3g47", + "output_read_count": 26868700, + "started_at_time": "2021-10-11T02:26:53Z", + "type": "nmdc:ReadQCAnalysisActivity", + "ended_at_time": "2021-10-11T04:54:22+00:00" + } + ], + "read_based_taxonomy_analysis_activity_set": [ { "_id": { - "$oid": "649b005f2ca5ee4adb139f99" + "$oid": "649b009bff710ae353f8cf27" }, "has_input": [ - "nmdc:ed95796b3fd964c6bedb141d70737ebf" - ], - "part_of": [ - "nmdc:mga0daby71" + "nmdc:eaffb16b5247d85c08f8af73bcb8b65e" ], - "ctg_logsum": 6346.305, - "scaf_logsum": 6368.36, - "gap_pct": 0.00044, "git_url": "https://github.com/microbiomedata/metaG/releases/tag/0.1", "has_output": [ - "nmdc:a7db57faea894bec6603a69abfdfcf7d", - "nmdc:8e798fcdd761feff51cab6a9c97ed7ae", - "nmdc:0d3200307a90e23525d3fefa7a25f867", - "nmdc:e6e7f40bb1f1e333904f20dc3c317e37", - "nmdc:08f1ba3d3d380a167182c1beb7da304f" + "nmdc:ad8aa7d317d86bcd1b33e6e68a917198", + "nmdc:e5f1da9ed5be2adcd65763d387387c9f", + "nmdc:db82b41936f37bbbeaa027ffc25b58cd", + "nmdc:2f21fd19f055d1931ab82016ed781a12", + "nmdc:890f494d1dd5e130d6c1688e78f27ff2", + "nmdc:813232a3034ddb9a05efc2f2e9b78cce", + "nmdc:ef490241b537bb4c19bd5548cd7b7f6b", + "nmdc:6a7de24b01ad1c63ba6edb758e25af40", + "nmdc:fc8a855916eb1ba0f7d278b7c1f1786f" ], - "asm_score": 4.807, - "was_informed_by": "gold:Gp0127627", - "ctg_powsum": 681.483, - "scaf_max": 15604, - "id": "nmdc:ecaab4b51de694b35269756fd7e31ed9", - "scaf_powsum": 683.717, + "was_informed_by": "gold:Gp0127630", + "id": "nmdc:9fb85875014e14636fbb93d97d62f4bd", "execution_resource": "NERSC-Cori", - "contigs": 51188, - "name": "Assembly Activity for nmdc:mga0daby71", - "ctg_max": 15604, - "gc_std": 0.11462, - "gc_avg": 0.57328, - "contig_bp": 18008171, - "started_at_time": "2021-11-13T18:47:34Z", - "scaf_bp": 18008251, - "type": "nmdc:MetagenomeAssembly", - "scaffolds": 51180, - "ended_at_time": "2021-11-13T19:08:49+00:00", - "ctg_l50": 321, - "ctg_l90": 282, - "ctg_n50": 20415, - "ctg_n90": 44756, - "scaf_l50": 321, - "scaf_l90": 282, - "scaf_n50": 20413, - "scaf_n90": 44748, - "output_data_objects": [ - { - "name": "Gp0127627_Assembled contigs fasta", - "description": "Assembled contigs fasta for Gp0127627", - "data_object_type": "Assembly Contigs", - "url": "https://data.microbiomedata.org/data/nmdc:mga0daby71/assembly/nmdc_mga0daby71_contigs.fna", - "md5_checksum": "a7db57faea894bec6603a69abfdfcf7d", - "id": "nmdc:a7db57faea894bec6603a69abfdfcf7d", - "file_size_bytes": 19853676 - }, - { - "name": "Gp0127627_Assembled scaffold fasta", - "description": "Assembled scaffold fasta for Gp0127627", - "data_object_type": "Assembly Scaffolds", - "url": "https://data.microbiomedata.org/data/nmdc:mga0daby71/assembly/nmdc_mga0daby71_scaffolds.fna", - "md5_checksum": "8e798fcdd761feff51cab6a9c97ed7ae", - "id": "nmdc:8e798fcdd761feff51cab6a9c97ed7ae", - "file_size_bytes": 19699986 - }, - { - "name": "Gp0127627_Metagenome Contig Coverage Stats", - "description": "Metagenome Contig Coverage Stats for Gp0127627", - "url": "https://data.microbiomedata.org/data/nmdc:mga0daby71/assembly/nmdc_mga0daby71_covstats.txt", - "md5_checksum": "0d3200307a90e23525d3fefa7a25f867", - "id": "nmdc:0d3200307a90e23525d3fefa7a25f867", - "file_size_bytes": 3997845 - }, - { - "name": "Gp0127627_Assembled AGP file", - "description": "Assembled AGP file for Gp0127627", - "data_object_type": "Assembly AGP", - "url": "https://data.microbiomedata.org/data/nmdc:mga0daby71/assembly/nmdc_mga0daby71_assembly.agp", - "md5_checksum": "e6e7f40bb1f1e333904f20dc3c317e37", - "id": "nmdc:e6e7f40bb1f1e333904f20dc3c317e37", - "file_size_bytes": 3715901 - }, - { - "name": "Gp0127627_Metagenome Alignment BAM file", - "description": "Metagenome Alignment BAM file for Gp0127627", - "data_object_type": "Assembly Coverage BAM", - "url": "https://data.microbiomedata.org/data/nmdc:mga0daby71/assembly/nmdc_mga0daby71_pairedMapped_sorted.bam", - "md5_checksum": "08f1ba3d3d380a167182c1beb7da304f", - "id": "nmdc:08f1ba3d3d380a167182c1beb7da304f", - "file_size_bytes": 1854522814 - } - ] - }, + "name": "ReadBased Analysis Activity for nmdc:mga09n3g47", + "started_at_time": "2021-10-11T02:26:53Z", + "type": "nmdc:ReadbasedAnalysis", + "ended_at_time": "2021-10-11T04:54:22+00:00" + } + ], + "study_set": [], + "field_research_site_set": [], + "collecting_biosamples_from_site_set": [], + "date_created": null, + "etl_software_version": null, + "pooling_set": [], + "read_based_analysis_activity_set": [ { "_id": { - "$oid": "649b005bbf2caae0415ef9ab" + "$oid": "61e71939833bcf838a6fff09" }, "has_input": [ - "nmdc:a7db57faea894bec6603a69abfdfcf7d" + "nmdc:eaffb16b5247d85c08f8af73bcb8b65e" ], "part_of": [ - "nmdc:mga0daby71" + "nmdc:mga09n3g47" ], "git_url": "https://github.com/microbiomedata/metaG/releases/tag/0.1", "has_output": [ - "nmdc:6003e73aa18ac6aa3cc0f7e020c7170e", - "nmdc:7e5852b8ca5590f81c543ea69398410f", - "nmdc:cf868630ca2d9037e69e82cfb76a7bd7", - "nmdc:9e52b5a16f0eff5df36bd46038702a52", - "nmdc:c44dceb1684f1a4249e7b8e944a2b7cf", - "nmdc:4a788566d47b89e8bc79eea6e26f2c42", - "nmdc:3d01f11a480f59cefdc67e7b6c7f9fc6", - "nmdc:196a8e27999a32a6168d23f30d84f37b", - "nmdc:c3040fe67c2c8b2924c6db6c53b268ce", - "nmdc:5594ce118ad4b2f9ec03adc10ebb6267", - "nmdc:9d2ac6550f5a1dc3d4b3743e8fe2ceec", - "nmdc:2c73a261047ff94b898c190418373075" - ], - "was_informed_by": "gold:Gp0127627", - "id": "nmdc:ecaab4b51de694b35269756fd7e31ed9", - "execution_resource": "NERSC-Cori", - "name": "Annotation Activity for nmdc:mga0daby71", - "started_at_time": "2021-11-13T18:47:34Z", - "type": "nmdc:MetagenomeAnnotationActivity", - "ended_at_time": "2021-11-13T19:08:49+00:00", - "output_data_objects": [ - { - "name": "Gp0127627_Protein FAA", - "description": "Protein FAA for Gp0127627", - "data_object_type": "Annotation Amino Acid FASTA", - "url": "https://data.microbiomedata.org/data/nmdc:mga0daby71/annotation/nmdc_mga0daby71_proteins.faa", - "md5_checksum": "6003e73aa18ac6aa3cc0f7e020c7170e", - "id": "nmdc:6003e73aa18ac6aa3cc0f7e020c7170e", - "file_size_bytes": 12141650 - }, - { - "name": "Gp0127627_Structural annotation GFF file", - "description": "Structural annotation GFF file for Gp0127627", - "data_object_type": "Structural Annotation GFF", - "url": "https://data.microbiomedata.org/data/nmdc:mga0daby71/annotation/nmdc_mga0daby71_structural_annotation.gff", - "md5_checksum": "7e5852b8ca5590f81c543ea69398410f", - "id": "nmdc:7e5852b8ca5590f81c543ea69398410f", - "file_size_bytes": 8716031 - }, - { - "name": "Gp0127627_Functional annotation GFF file", - "description": "Functional annotation GFF file for Gp0127627", - "data_object_type": "Functional Annotation GFF", - "url": "https://data.microbiomedata.org/data/nmdc:mga0daby71/annotation/nmdc_mga0daby71_functional_annotation.gff", - "md5_checksum": "cf868630ca2d9037e69e82cfb76a7bd7", - "id": "nmdc:cf868630ca2d9037e69e82cfb76a7bd7", - "file_size_bytes": 14995284 - }, - { - "name": "Gp0127627_KO TSV file", - "description": "KO TSV file for Gp0127627", - "data_object_type": "Annotation KEGG Orthology", - "url": "https://data.microbiomedata.org/data/nmdc:mga0daby71/annotation/nmdc_mga0daby71_ko.tsv", - "md5_checksum": "9e52b5a16f0eff5df36bd46038702a52", - "id": "nmdc:9e52b5a16f0eff5df36bd46038702a52", - "file_size_bytes": 1782540 - }, - { - "name": "Gp0127627_EC TSV file", - "description": "EC TSV file for Gp0127627", - "data_object_type": "Annotation Enzyme Commission", - "url": "https://data.microbiomedata.org/data/nmdc:mga0daby71/annotation/nmdc_mga0daby71_ec.tsv", - "md5_checksum": "c44dceb1684f1a4249e7b8e944a2b7cf", - "id": "nmdc:c44dceb1684f1a4249e7b8e944a2b7cf", - "file_size_bytes": 1180943 - }, - { - "name": "Gp0127627_COG GFF file", - "description": "COG GFF file for Gp0127627", - "url": "https://data.microbiomedata.org/data/nmdc:mga0daby71/annotation/nmdc_mga0daby71_cog.gff", - "md5_checksum": "4a788566d47b89e8bc79eea6e26f2c42", - "id": "nmdc:4a788566d47b89e8bc79eea6e26f2c42", - "file_size_bytes": 8144598 - }, - { - "name": "Gp0127627_PFAM GFF file", - "description": "PFAM GFF file for Gp0127627", - "url": "https://data.microbiomedata.org/data/nmdc:mga0daby71/annotation/nmdc_mga0daby71_pfam.gff", - "md5_checksum": "3d01f11a480f59cefdc67e7b6c7f9fc6", - "id": "nmdc:3d01f11a480f59cefdc67e7b6c7f9fc6", - "file_size_bytes": 5854816 - }, - { - "name": "Gp0127627_TigrFam GFF file", - "description": "TigrFam GFF file for Gp0127627", - "url": "https://data.microbiomedata.org/data/nmdc:mga0daby71/annotation/nmdc_mga0daby71_tigrfam.gff", - "md5_checksum": "196a8e27999a32a6168d23f30d84f37b", - "id": "nmdc:196a8e27999a32a6168d23f30d84f37b", - "file_size_bytes": 549612 - }, - { - "name": "Gp0127627_SMART GFF file", - "description": "SMART GFF file for Gp0127627", - "url": "https://data.microbiomedata.org/data/nmdc:mga0daby71/annotation/nmdc_mga0daby71_smart.gff", - "md5_checksum": "c3040fe67c2c8b2924c6db6c53b268ce", - "id": "nmdc:c3040fe67c2c8b2924c6db6c53b268ce", - "file_size_bytes": 1739035 - }, - { - "name": "Gp0127627_SuperFam GFF file", - "description": "SuperFam GFF file for Gp0127627", - "url": "https://data.microbiomedata.org/data/nmdc:mga0daby71/annotation/nmdc_mga0daby71_supfam.gff", - "md5_checksum": "5594ce118ad4b2f9ec03adc10ebb6267", - "id": "nmdc:5594ce118ad4b2f9ec03adc10ebb6267", - "file_size_bytes": 10326655 - }, - { - "name": "Gp0127627_Cath FunFam GFF file", - "description": "Cath FunFam GFF file for Gp0127627", - "url": "https://data.microbiomedata.org/data/nmdc:mga0daby71/annotation/nmdc_mga0daby71_cath_funfam.gff", - "md5_checksum": "9d2ac6550f5a1dc3d4b3743e8fe2ceec", - "id": "nmdc:9d2ac6550f5a1dc3d4b3743e8fe2ceec", - "file_size_bytes": 7571959 - }, - { - "name": "Gp0127627_KO_EC GFF file", - "description": "KO_EC GFF file for Gp0127627", - "url": "https://data.microbiomedata.org/data/nmdc:mga0daby71/annotation/nmdc_mga0daby71_ko_ec.gff", - "md5_checksum": "2c73a261047ff94b898c190418373075", - "id": "nmdc:2c73a261047ff94b898c190418373075", - "file_size_bytes": 5683569 - } - ] - }, - { - "_id": { - "$oid": "649b0052ec087f6bbab3470b" - }, - "has_input": [ - "nmdc:a7db57faea894bec6603a69abfdfcf7d", - "nmdc:08f1ba3d3d380a167182c1beb7da304f", - "nmdc:cf868630ca2d9037e69e82cfb76a7bd7" - ], - "too_short_contig_num": 50792, - "part_of": [ - "nmdc:mga0daby71" - ], - "git_url": "https://github.com/microbiomedata/metaG/releases/tag/0.1", - "has_output": [ - "nmdc:d41d8cd98f00b204e9800998ecf8427e", - "nmdc:ba468a2c4f4810d87ba95ad9e123483d" + "nmdc:ad8aa7d317d86bcd1b33e6e68a917198", + "nmdc:e5f1da9ed5be2adcd65763d387387c9f", + "nmdc:db82b41936f37bbbeaa027ffc25b58cd", + "nmdc:2f21fd19f055d1931ab82016ed781a12", + "nmdc:890f494d1dd5e130d6c1688e78f27ff2", + "nmdc:813232a3034ddb9a05efc2f2e9b78cce", + "nmdc:ef490241b537bb4c19bd5548cd7b7f6b", + "nmdc:6a7de24b01ad1c63ba6edb758e25af40", + "nmdc:fc8a855916eb1ba0f7d278b7c1f1786f" ], - "was_informed_by": "gold:Gp0127627", - "input_contig_num": 51188, - "id": "nmdc:ecaab4b51de694b35269756fd7e31ed9", + "was_informed_by": "gold:Gp0127630", + "id": "nmdc:9fb85875014e14636fbb93d97d62f4bd", "execution_resource": "NERSC-Cori", - "name": "MAGs Analysis Activity for nmdc:mga0daby71", - "mags_list": [], - "unbinned_contig_num": 396, - "started_at_time": "2021-11-13T18:47:34Z", - "type": "nmdc:MAGsAnalysisActivity", - "ended_at_time": "2021-11-13T19:08:49+00:00", - "output_data_objects": [ - { - "name": "gold:Gp0452679_metabat2 bin checkm quality assessment result", - "description": "metabat2 bin checkm quality assessment result for gold:Gp0452679", - "data_object_type": "CheckM Statistics", - "url": "https://data.microbiomedata.org/data/nmdc:mga0fsjy84/MAGs/nmdc_mga0fsjy84_checkm_qa.out", - "md5_checksum": "d41d8cd98f00b204e9800998ecf8427e", - "id": "nmdc:d41d8cd98f00b204e9800998ecf8427e", - "file_size_bytes": 0 - }, - { - "name": "Gp0127627_high-quality and medium-quality bins", - "description": "high-quality and medium-quality bins for Gp0127627", - "data_object_type": "Metagenome Bins", - "url": "https://data.microbiomedata.org/data/nmdc:mga0daby71/MAGs/nmdc_mga0daby71_hqmq_bin.zip", - "md5_checksum": "ba468a2c4f4810d87ba95ad9e123483d", - "id": "nmdc:ba468a2c4f4810d87ba95ad9e123483d", - "file_size_bytes": 182 - } - ] + "name": "ReadBased Analysis Activity for nmdc:mga09n3g47", + "started_at_time": "2021-10-11T02:26:53Z", + "type": "nmdc:ReadbasedAnalysis", + "ended_at_time": "2021-10-11T04:54:22+00:00" } ] }, { - "_id": { - "$oid": "649b009773e8249959349b44" - }, - "id": "nmdc:omprc-11-8qms8262", - "name": "Riverbed sediment microbial communities from areas with vegetation nearby in Columbia River, Washington, USA - GW-RW S3_40_50", - "description": "Riverbed sediment samples were collected from an area with a lot of surrounding vegetation in a hyporheic zone (subsurface groundwater - surface water mixing zone)", - "has_input": [ - "nmdc:bsm-11-0xprxw22" - ], - "has_output": [ - "jgi:574fde807ded5e3df1ee141b" - ], - "part_of": [ - "nmdc:sty-11-aygzgv51" - ], - "add_date": "2016-01-11", - "mod_date": "2021-06-15", - "ncbi_project_name": "Riverbed sediment microbial communities from areas with vegetation nearby in Columbia River, Washington, USA - GW-RW S3_40_50", - "omics_type": { - "has_raw_value": "Metagenome" - }, - "principal_investigator": { - "has_raw_value": "James Stegen" - }, - "processing_institution": "JGI", - "type": "nmdc:OmicsProcessing", - "gold_sequencing_project_identifiers": [ - "gold:Gp0127632" - ], - "downstream_workflow_activity_records": [ + "functional_annotation_agg": [], + "library_preparation_set": [], + "processed_sample_set": [], + "extraction_set": [], + "activity_set": [], + "biosample_set": [], + "data_object_set": [ + { + "name": "Gp0127633_Filtered Reads", + "description": "Filtered Reads for Gp0127633", + "data_object_type": "Filtered Sequencing Reads", + "url": "https://data.microbiomedata.org/data/nmdc:mga05zvf81/qa/nmdc_mga05zvf81_filtered.fastq.gz", + "md5_checksum": "7cbd497624d8b60ab2a5e7fdbe4730f2", + "id": "nmdc:7cbd497624d8b60ab2a5e7fdbe4730f2", + "file_size_bytes": 1727224362 + }, { - "_id": { - "$oid": "649b009d6bdd4fd20273c868" - }, - "has_input": [ - "nmdc:5cbd7ceb39903cbded77b36ae866fe9f" - ], - "part_of": [ - "nmdc:mga0b6cy30" - ], - "git_url": "https://github.com/microbiomedata/metaG/releases/tag/0.1", - "has_output": [ - "nmdc:a43bfb55389206c2fc5ddb53e6aa2bc6", - "nmdc:919c5aade4fffb76f743a33b035b2839" - ], - "was_informed_by": "gold:Gp0127632", - "input_read_count": 27906294, - "output_read_bases": 3905482172, - "id": "nmdc:2d5f97b7f6c0f385d6185e05ac989b1e", - "execution_resource": "NERSC-Cori", - "input_read_bases": 4213850394, - "name": "Read QC Activity for nmdc:mga0b6cy30", - "output_read_count": 26116440, - "started_at_time": "2021-10-11T02:23:42Z", - "type": "nmdc:ReadQCAnalysisActivity", - "ended_at_time": "2021-10-11T04:08:32+00:00", - "output_data_objects": [ - { - "name": "Gp0127632_Filtered Reads", - "description": "Filtered Reads for Gp0127632", - "data_object_type": "Filtered Sequencing Reads", - "url": "https://data.microbiomedata.org/data/nmdc:mga0b6cy30/qa/nmdc_mga0b6cy30_filtered.fastq.gz", - "md5_checksum": "a43bfb55389206c2fc5ddb53e6aa2bc6", - "id": "nmdc:a43bfb55389206c2fc5ddb53e6aa2bc6", - "file_size_bytes": 2199178772 - }, - { - "name": "Gp0127632_Filtered Stats", - "description": "Filtered Stats for Gp0127632", - "data_object_type": "QC Statistics", - "url": "https://data.microbiomedata.org/data/nmdc:mga0b6cy30/qa/nmdc_mga0b6cy30_filterStats.txt", - "md5_checksum": "919c5aade4fffb76f743a33b035b2839", - "id": "nmdc:919c5aade4fffb76f743a33b035b2839", - "file_size_bytes": 289 - } - ] + "name": "Gp0127633_Filtered Stats", + "description": "Filtered Stats for Gp0127633", + "data_object_type": "QC Statistics", + "url": "https://data.microbiomedata.org/data/nmdc:mga05zvf81/qa/nmdc_mga05zvf81_filterStats.txt", + "md5_checksum": "eccf0501d08f920a88b6598d573a8e3e", + "id": "nmdc:eccf0501d08f920a88b6598d573a8e3e", + "file_size_bytes": 280 + }, + { + "name": "Gp0127633_Gottcha2 TSV report", + "description": "Gottcha2 TSV report for Gp0127633", + "url": "https://data.microbiomedata.org/data/nmdc:mga05zvf81/ReadbasedAnalysis/nmdc_mga05zvf81_gottcha2_report.tsv", + "md5_checksum": "8bd9eb762acabbac5d079c379c28e381", + "id": "nmdc:8bd9eb762acabbac5d079c379c28e381", + "file_size_bytes": 875 + }, + { + "name": "Gp0127633_Gottcha2 full TSV report", + "description": "Gottcha2 full TSV report for Gp0127633", + "url": "https://data.microbiomedata.org/data/nmdc:mga05zvf81/ReadbasedAnalysis/nmdc_mga05zvf81_gottcha2_report_full.tsv", + "md5_checksum": "77351dd18ca40e5552ac1380ba94acbf", + "id": "nmdc:77351dd18ca40e5552ac1380ba94acbf", + "file_size_bytes": 578856 + }, + { + "name": "Gp0127633_Gottcha2 Krona HTML report", + "description": "Gottcha2 Krona HTML report for Gp0127633", + "data_object_type": "GOTTCHA2 Krona Plot", + "url": "https://data.microbiomedata.org/data/nmdc:mga05zvf81/ReadbasedAnalysis/nmdc_mga05zvf81_gottcha2_krona.html", + "md5_checksum": "f445af1a7774572d156f55a898d26f09", + "id": "nmdc:f445af1a7774572d156f55a898d26f09", + "file_size_bytes": 228067 + }, + { + "name": "Gp0127633_Centrifuge classification TSV report", + "description": "Centrifuge classification TSV report for Gp0127633", + "data_object_type": "Centrifuge Taxonomic Classification", + "url": "https://data.microbiomedata.org/data/nmdc:mga05zvf81/ReadbasedAnalysis/nmdc_mga05zvf81_centrifuge_classification.tsv", + "md5_checksum": "e11fcbf66318878c05984fa3d893e3b7", + "id": "nmdc:e11fcbf66318878c05984fa3d893e3b7", + "file_size_bytes": 1646942155 + }, + { + "name": "Gp0127633_Centrifuge TSV report", + "description": "Centrifuge TSV report for Gp0127633", + "data_object_type": "Centrifuge Classification Report", + "url": "https://data.microbiomedata.org/data/nmdc:mga05zvf81/ReadbasedAnalysis/nmdc_mga05zvf81_centrifuge_report.tsv", + "md5_checksum": "28beb8baabdaf346f2066b40f375a152", + "id": "nmdc:28beb8baabdaf346f2066b40f375a152", + "file_size_bytes": 252735 + }, + { + "name": "Gp0127633_Centrifuge Krona HTML report", + "description": "Centrifuge Krona HTML report for Gp0127633", + "data_object_type": "Centrifuge Krona Plot", + "url": "https://data.microbiomedata.org/data/nmdc:mga05zvf81/ReadbasedAnalysis/nmdc_mga05zvf81_centrifuge_krona.html", + "md5_checksum": "1f74a43724c4afed5563499d05601e22", + "id": "nmdc:1f74a43724c4afed5563499d05601e22", + "file_size_bytes": 2329168 + }, + { + "name": "Gp0127633_Kraken classification TSV report", + "description": "Kraken classification TSV report for Gp0127633", + "data_object_type": "Kraken2 Taxonomic Classification", + "url": "https://data.microbiomedata.org/data/nmdc:mga05zvf81/ReadbasedAnalysis/nmdc_mga05zvf81_kraken2_classification.tsv", + "md5_checksum": "4825177c6d0a8b67db82e6070cfbc35f", + "id": "nmdc:4825177c6d0a8b67db82e6070cfbc35f", + "file_size_bytes": 1310443491 + }, + { + "name": "Gp0127633_Kraken2 TSV report", + "description": "Kraken2 TSV report for Gp0127633", + "data_object_type": "Kraken2 Classification Report", + "url": "https://data.microbiomedata.org/data/nmdc:mga05zvf81/ReadbasedAnalysis/nmdc_mga05zvf81_kraken2_report.tsv", + "md5_checksum": "275268a6b5aca33c427d11877bcfa674", + "id": "nmdc:275268a6b5aca33c427d11877bcfa674", + "file_size_bytes": 621441 + }, + { + "name": "Gp0127633_Kraken2 Krona HTML report", + "description": "Kraken2 Krona HTML report for Gp0127633", + "data_object_type": "Kraken2 Krona Plot", + "url": "https://data.microbiomedata.org/data/nmdc:mga05zvf81/ReadbasedAnalysis/nmdc_mga05zvf81_kraken2_krona.html", + "md5_checksum": "89e810af4915f0e117eaa60550587453", + "id": "nmdc:89e810af4915f0e117eaa60550587453", + "file_size_bytes": 3891844 + }, + { + "name": "Gp0127633_Gottcha2 TSV report", + "description": "Gottcha2 TSV report for Gp0127633", + "url": "https://data.microbiomedata.org/data/nmdc:mga05zvf81/ReadbasedAnalysis/nmdc_mga05zvf81_gottcha2_report.tsv", + "md5_checksum": "8bd9eb762acabbac5d079c379c28e381", + "id": "nmdc:8bd9eb762acabbac5d079c379c28e381", + "file_size_bytes": 875 + }, + { + "name": "Gp0127633_Gottcha2 full TSV report", + "description": "Gottcha2 full TSV report for Gp0127633", + "url": "https://data.microbiomedata.org/data/nmdc:mga05zvf81/ReadbasedAnalysis/nmdc_mga05zvf81_gottcha2_report_full.tsv", + "md5_checksum": "77351dd18ca40e5552ac1380ba94acbf", + "id": "nmdc:77351dd18ca40e5552ac1380ba94acbf", + "file_size_bytes": 578856 + }, + { + "name": "Gp0127633_Gottcha2 Krona HTML report", + "description": "Gottcha2 Krona HTML report for Gp0127633", + "data_object_type": "GOTTCHA2 Krona Plot", + "url": "https://data.microbiomedata.org/data/nmdc:mga05zvf81/ReadbasedAnalysis/nmdc_mga05zvf81_gottcha2_krona.html", + "md5_checksum": "f445af1a7774572d156f55a898d26f09", + "id": "nmdc:f445af1a7774572d156f55a898d26f09", + "file_size_bytes": 228067 + }, + { + "name": "Gp0127633_Centrifuge classification TSV report", + "description": "Centrifuge classification TSV report for Gp0127633", + "data_object_type": "Centrifuge Taxonomic Classification", + "url": "https://data.microbiomedata.org/data/nmdc:mga05zvf81/ReadbasedAnalysis/nmdc_mga05zvf81_centrifuge_classification.tsv", + "md5_checksum": "e11fcbf66318878c05984fa3d893e3b7", + "id": "nmdc:e11fcbf66318878c05984fa3d893e3b7", + "file_size_bytes": 1646942155 + }, + { + "name": "Gp0127633_Centrifuge TSV report", + "description": "Centrifuge TSV report for Gp0127633", + "data_object_type": "Centrifuge Classification Report", + "url": "https://data.microbiomedata.org/data/nmdc:mga05zvf81/ReadbasedAnalysis/nmdc_mga05zvf81_centrifuge_report.tsv", + "md5_checksum": "28beb8baabdaf346f2066b40f375a152", + "id": "nmdc:28beb8baabdaf346f2066b40f375a152", + "file_size_bytes": 252735 + }, + { + "name": "Gp0127633_Centrifuge Krona HTML report", + "description": "Centrifuge Krona HTML report for Gp0127633", + "data_object_type": "Centrifuge Krona Plot", + "url": "https://data.microbiomedata.org/data/nmdc:mga05zvf81/ReadbasedAnalysis/nmdc_mga05zvf81_centrifuge_krona.html", + "md5_checksum": "1f74a43724c4afed5563499d05601e22", + "id": "nmdc:1f74a43724c4afed5563499d05601e22", + "file_size_bytes": 2329168 + }, + { + "name": "Gp0127633_Kraken classification TSV report", + "description": "Kraken classification TSV report for Gp0127633", + "data_object_type": "Kraken2 Taxonomic Classification", + "url": "https://data.microbiomedata.org/data/nmdc:mga05zvf81/ReadbasedAnalysis/nmdc_mga05zvf81_kraken2_classification.tsv", + "md5_checksum": "4825177c6d0a8b67db82e6070cfbc35f", + "id": "nmdc:4825177c6d0a8b67db82e6070cfbc35f", + "file_size_bytes": 1310443491 + }, + { + "name": "Gp0127633_Kraken2 TSV report", + "description": "Kraken2 TSV report for Gp0127633", + "data_object_type": "Kraken2 Classification Report", + "url": "https://data.microbiomedata.org/data/nmdc:mga05zvf81/ReadbasedAnalysis/nmdc_mga05zvf81_kraken2_report.tsv", + "md5_checksum": "275268a6b5aca33c427d11877bcfa674", + "id": "nmdc:275268a6b5aca33c427d11877bcfa674", + "file_size_bytes": 621441 + }, + { + "name": "Gp0127633_Kraken2 Krona HTML report", + "description": "Kraken2 Krona HTML report for Gp0127633", + "data_object_type": "Kraken2 Krona Plot", + "url": "https://data.microbiomedata.org/data/nmdc:mga05zvf81/ReadbasedAnalysis/nmdc_mga05zvf81_kraken2_krona.html", + "md5_checksum": "89e810af4915f0e117eaa60550587453", + "id": "nmdc:89e810af4915f0e117eaa60550587453", + "file_size_bytes": 3891844 + }, + { + "name": "Gp0127633_Assembled contigs fasta", + "description": "Assembled contigs fasta for Gp0127633", + "data_object_type": "Assembly Contigs", + "url": "https://data.microbiomedata.org/data/nmdc:mga05zvf81/assembly/nmdc_mga05zvf81_contigs.fna", + "md5_checksum": "ea5ca9478871b3e2600e1df0d748cbef", + "id": "nmdc:ea5ca9478871b3e2600e1df0d748cbef", + "file_size_bytes": 152814586 + }, + { + "name": "Gp0127633_Assembled scaffold fasta", + "description": "Assembled scaffold fasta for Gp0127633", + "data_object_type": "Assembly Scaffolds", + "url": "https://data.microbiomedata.org/data/nmdc:mga05zvf81/assembly/nmdc_mga05zvf81_scaffolds.fna", + "md5_checksum": "327e130872e4c5faac2f1c9f8dea2316", + "id": "nmdc:327e130872e4c5faac2f1c9f8dea2316", + "file_size_bytes": 151993436 + }, + { + "name": "Gp0127633_Metagenome Contig Coverage Stats", + "description": "Metagenome Contig Coverage Stats for Gp0127633", + "url": "https://data.microbiomedata.org/data/nmdc:mga05zvf81/assembly/nmdc_mga05zvf81_covstats.txt", + "md5_checksum": "f61f1e62791a38beae95bd95833a6784", + "id": "nmdc:f61f1e62791a38beae95bd95833a6784", + "file_size_bytes": 21678212 + }, + { + "name": "Gp0127633_Assembled AGP file", + "description": "Assembled AGP file for Gp0127633", + "data_object_type": "Assembly AGP", + "url": "https://data.microbiomedata.org/data/nmdc:mga05zvf81/assembly/nmdc_mga05zvf81_assembly.agp", + "md5_checksum": "416254a3bfc685dd16c11d65a222305f", + "id": "nmdc:416254a3bfc685dd16c11d65a222305f", + "file_size_bytes": 20304047 + }, + { + "name": "Gp0127633_Metagenome Alignment BAM file", + "description": "Metagenome Alignment BAM file for Gp0127633", + "data_object_type": "Assembly Coverage BAM", + "url": "https://data.microbiomedata.org/data/nmdc:mga05zvf81/assembly/nmdc_mga05zvf81_pairedMapped_sorted.bam", + "md5_checksum": "bc054294600fa310924f104484effd3e", + "id": "nmdc:bc054294600fa310924f104484effd3e", + "file_size_bytes": 1959649749 + }, + { + "name": "Gp0127633_Protein FAA", + "description": "Protein FAA for Gp0127633", + "data_object_type": "Annotation Amino Acid FASTA", + "url": "https://data.microbiomedata.org/data/nmdc:mga05zvf81/annotation/nmdc_mga05zvf81_proteins.faa", + "md5_checksum": "8defcf55f08cd56d8b2560e27f490ca5", + "id": "nmdc:8defcf55f08cd56d8b2560e27f490ca5", + "file_size_bytes": 85918779 + }, + { + "name": "Gp0127633_Structural annotation GFF file", + "description": "Structural annotation GFF file for Gp0127633", + "data_object_type": "Structural Annotation GFF", + "url": "https://data.microbiomedata.org/data/nmdc:mga05zvf81/annotation/nmdc_mga05zvf81_structural_annotation.gff", + "md5_checksum": "a6031c0a101419dd413a0804937425ca", + "id": "nmdc:a6031c0a101419dd413a0804937425ca", + "file_size_bytes": 2527 + }, + { + "name": "Gp0127633_Functional annotation GFF file", + "description": "Functional annotation GFF file for Gp0127633", + "data_object_type": "Functional Annotation GFF", + "url": "https://data.microbiomedata.org/data/nmdc:mga05zvf81/annotation/nmdc_mga05zvf81_functional_annotation.gff", + "md5_checksum": "43069b1146c84c064b7ff334dc9ff100", + "id": "nmdc:43069b1146c84c064b7ff334dc9ff100", + "file_size_bytes": 95647963 + }, + { + "name": "Gp0127633_KO TSV file", + "description": "KO TSV file for Gp0127633", + "data_object_type": "Annotation KEGG Orthology", + "url": "https://data.microbiomedata.org/data/nmdc:mga05zvf81/annotation/nmdc_mga05zvf81_ko.tsv", + "md5_checksum": "acc5a2c445dc6e00668c9a5d50aecdb8", + "id": "nmdc:acc5a2c445dc6e00668c9a5d50aecdb8", + "file_size_bytes": 10638485 + }, + { + "name": "Gp0127633_EC TSV file", + "description": "EC TSV file for Gp0127633", + "data_object_type": "Annotation Enzyme Commission", + "url": "https://data.microbiomedata.org/data/nmdc:mga05zvf81/annotation/nmdc_mga05zvf81_ec.tsv", + "md5_checksum": "ec91d5d7a8af4fb845e22cbe7ab82bde", + "id": "nmdc:ec91d5d7a8af4fb845e22cbe7ab82bde", + "file_size_bytes": 6991172 + }, + { + "name": "Gp0127633_COG GFF file", + "description": "COG GFF file for Gp0127633", + "url": "https://data.microbiomedata.org/data/nmdc:mga05zvf81/annotation/nmdc_mga05zvf81_cog.gff", + "md5_checksum": "3cd238ff1bb176b7a159aeb34a7c4683", + "id": "nmdc:3cd238ff1bb176b7a159aeb34a7c4683", + "file_size_bytes": 56525933 + }, + { + "name": "Gp0127633_PFAM GFF file", + "description": "PFAM GFF file for Gp0127633", + "url": "https://data.microbiomedata.org/data/nmdc:mga05zvf81/annotation/nmdc_mga05zvf81_pfam.gff", + "md5_checksum": "5103ea2a481ea3b82f1aa98ab7a36998", + "id": "nmdc:5103ea2a481ea3b82f1aa98ab7a36998", + "file_size_bytes": 43189711 + }, + { + "name": "Gp0127633_TigrFam GFF file", + "description": "TigrFam GFF file for Gp0127633", + "url": "https://data.microbiomedata.org/data/nmdc:mga05zvf81/annotation/nmdc_mga05zvf81_tigrfam.gff", + "md5_checksum": "8f7429420cbefb9e27bcdbe6252e5288", + "id": "nmdc:8f7429420cbefb9e27bcdbe6252e5288", + "file_size_bytes": 4806086 + }, + { + "name": "Gp0127633_SMART GFF file", + "description": "SMART GFF file for Gp0127633", + "url": "https://data.microbiomedata.org/data/nmdc:mga05zvf81/annotation/nmdc_mga05zvf81_smart.gff", + "md5_checksum": "6d69127dc30609e4861a7b2443b99164", + "id": "nmdc:6d69127dc30609e4861a7b2443b99164", + "file_size_bytes": 12776467 + }, + { + "name": "Gp0127633_SuperFam GFF file", + "description": "SuperFam GFF file for Gp0127633", + "url": "https://data.microbiomedata.org/data/nmdc:mga05zvf81/annotation/nmdc_mga05zvf81_supfam.gff", + "md5_checksum": "00243bcaf50313d937a7685380a876bb", + "id": "nmdc:00243bcaf50313d937a7685380a876bb", + "file_size_bytes": 70607320 + }, + { + "name": "Gp0127633_Cath FunFam GFF file", + "description": "Cath FunFam GFF file for Gp0127633", + "url": "https://data.microbiomedata.org/data/nmdc:mga05zvf81/annotation/nmdc_mga05zvf81_cath_funfam.gff", + "md5_checksum": "ec6ffd40772dee9d48dbec0beb6b3321", + "id": "nmdc:ec6ffd40772dee9d48dbec0beb6b3321", + "file_size_bytes": 53950895 + }, + { + "name": "Gp0127633_KO_EC GFF file", + "description": "KO_EC GFF file for Gp0127633", + "url": "https://data.microbiomedata.org/data/nmdc:mga05zvf81/annotation/nmdc_mga05zvf81_ko_ec.gff", + "md5_checksum": "907439e314b4f4623244e2cec8532098", + "id": "nmdc:907439e314b4f4623244e2cec8532098", + "file_size_bytes": 33781965 + }, + { + "name": "gold:Gp0452679_metabat2 bin checkm quality assessment result", + "description": "metabat2 bin checkm quality assessment result for gold:Gp0452679", + "data_object_type": "CheckM Statistics", + "url": "https://data.microbiomedata.org/data/nmdc:mga0fsjy84/MAGs/nmdc_mga0fsjy84_checkm_qa.out", + "md5_checksum": "d41d8cd98f00b204e9800998ecf8427e", + "id": "nmdc:d41d8cd98f00b204e9800998ecf8427e", + "file_size_bytes": 0 + }, + { + "name": "Gp0127633_tooShort (< 3kb) filtered contigs fasta file by metaBat2", + "description": "tooShort (< 3kb) filtered contigs fasta file by metaBat2 for Gp0127633", + "url": "https://data.microbiomedata.org/data/nmdc:mga05zvf81/MAGs/nmdc_mga05zvf81_bins.tooShort.fa", + "md5_checksum": "00415cf72f9a77f907e3467a08b123c5", + "id": "nmdc:00415cf72f9a77f907e3467a08b123c5", + "file_size_bytes": 116930318 + }, + { + "name": "Gp0127633_unbinned fasta file from metabat2", + "description": "unbinned fasta file from metabat2 for Gp0127633", + "url": "https://data.microbiomedata.org/data/nmdc:mga05zvf81/MAGs/nmdc_mga05zvf81_bins.unbinned.fa", + "md5_checksum": "83064ec7bfc35a79a1ca76fdd8ad75fd", + "id": "nmdc:83064ec7bfc35a79a1ca76fdd8ad75fd", + "file_size_bytes": 31883888 + }, + { + "name": "Gp0127633_metabat2 bin checkm quality assessment result", + "description": "metabat2 bin checkm quality assessment result for Gp0127633", + "data_object_type": "CheckM Statistics", + "url": "https://data.microbiomedata.org/data/nmdc:mga05zvf81/MAGs/nmdc_mga05zvf81_checkm_qa.out", + "md5_checksum": "3f435d6da551400a4ba4400fa3608e7f", + "id": "nmdc:3f435d6da551400a4ba4400fa3608e7f", + "file_size_bytes": 1590 + }, + { + "name": "Gp0127633_high-quality and medium-quality bins", + "description": "high-quality and medium-quality bins for Gp0127633", + "data_object_type": "Metagenome Bins", + "url": "https://data.microbiomedata.org/data/nmdc:mga05zvf81/MAGs/nmdc_mga05zvf81_hqmq_bin.zip", + "md5_checksum": "c66f93153962f8b80c8f3d6978b6d802", + "id": "nmdc:c66f93153962f8b80c8f3d6978b6d802", + "file_size_bytes": 460412 }, + { + "name": "Gp0127633_metabat2 bins", + "description": "metabat2 bins for Gp0127633", + "url": "https://data.microbiomedata.org/data/nmdc:mga05zvf81/MAGs/nmdc_mga05zvf81_metabat_bin.zip", + "md5_checksum": "ce2a364ec51a1d6311a319509751266e", + "id": "nmdc:ce2a364ec51a1d6311a319509751266e", + "file_size_bytes": 753147 + } + ], + "dissolving_activity_set": [], + "functional_annotation_set": [], + "genome_feature_set": [], + "mags_activity_set": [ { "_id": { - "$oid": "649b009bff710ae353f8cf31" + "$oid": "649b0052ec087f6bbab3470e" }, "has_input": [ - "nmdc:a43bfb55389206c2fc5ddb53e6aa2bc6" + "nmdc:ea5ca9478871b3e2600e1df0d748cbef", + "nmdc:bc054294600fa310924f104484effd3e", + "nmdc:43069b1146c84c064b7ff334dc9ff100" + ], + "too_short_contig_num": 252383, + "part_of": [ + "nmdc:mga05zvf81" ], + "binned_contig_num": 738, "git_url": "https://github.com/microbiomedata/metaG/releases/tag/0.1", "has_output": [ - "nmdc:3e583cccbbc068e0879ba6618bb6407c", - "nmdc:6c54105711e818c4d8169ab595b05efe", - "nmdc:adb155cdb656648496484998a62fb96f", - "nmdc:0a03ac5737750a3b336e7299e9f01ead", - "nmdc:f345b3a57c37097a860e38d5e83835b8", - "nmdc:c1f4471d943b284720a8becb5a2e32b4", - "nmdc:50cfcfc5d0d89245b8370abf6bfef23c", - "nmdc:a8dd7aa20043510158ad3b2bbe961b42", - "nmdc:e350fda9bd0651755171d79b413b8da3" + "nmdc:d41d8cd98f00b204e9800998ecf8427e", + "nmdc:00415cf72f9a77f907e3467a08b123c5", + "nmdc:83064ec7bfc35a79a1ca76fdd8ad75fd", + "nmdc:3f435d6da551400a4ba4400fa3608e7f", + "nmdc:c66f93153962f8b80c8f3d6978b6d802", + "nmdc:ce2a364ec51a1d6311a319509751266e" ], - "was_informed_by": "gold:Gp0127632", - "id": "nmdc:2d5f97b7f6c0f385d6185e05ac989b1e", + "was_informed_by": "gold:Gp0127633", + "input_contig_num": 272872, + "id": "nmdc:c88a6f4e80cd6159dbbdeaeffbc28f55", "execution_resource": "NERSC-Cori", - "name": "ReadBased Analysis Activity for nmdc:mga0b6cy30", - "started_at_time": "2021-10-11T02:23:42Z", - "type": "nmdc:ReadbasedAnalysis", - "ended_at_time": "2021-10-11T04:08:32+00:00", - "output_data_objects": [ - { - "name": "Gp0127632_Gottcha2 TSV report", - "description": "Gottcha2 TSV report for Gp0127632", - "url": "https://data.microbiomedata.org/data/nmdc:mga0b6cy30/ReadbasedAnalysis/nmdc_mga0b6cy30_gottcha2_report.tsv", - "md5_checksum": "3e583cccbbc068e0879ba6618bb6407c", - "id": "nmdc:3e583cccbbc068e0879ba6618bb6407c", - "file_size_bytes": 2899 - }, - { - "name": "Gp0127632_Gottcha2 full TSV report", - "description": "Gottcha2 full TSV report for Gp0127632", - "url": "https://data.microbiomedata.org/data/nmdc:mga0b6cy30/ReadbasedAnalysis/nmdc_mga0b6cy30_gottcha2_report_full.tsv", - "md5_checksum": "6c54105711e818c4d8169ab595b05efe", - "id": "nmdc:6c54105711e818c4d8169ab595b05efe", - "file_size_bytes": 769416 - }, - { - "name": "Gp0127632_Gottcha2 Krona HTML report", - "description": "Gottcha2 Krona HTML report for Gp0127632", - "data_object_type": "GOTTCHA2 Krona Plot", - "url": "https://data.microbiomedata.org/data/nmdc:mga0b6cy30/ReadbasedAnalysis/nmdc_mga0b6cy30_gottcha2_krona.html", - "md5_checksum": "adb155cdb656648496484998a62fb96f", - "id": "nmdc:adb155cdb656648496484998a62fb96f", - "file_size_bytes": 235384 - }, - { - "name": "Gp0127632_Centrifuge classification TSV report", - "description": "Centrifuge classification TSV report for Gp0127632", - "data_object_type": "Centrifuge Taxonomic Classification", - "url": "https://data.microbiomedata.org/data/nmdc:mga0b6cy30/ReadbasedAnalysis/nmdc_mga0b6cy30_centrifuge_classification.tsv", - "md5_checksum": "0a03ac5737750a3b336e7299e9f01ead", - "id": "nmdc:0a03ac5737750a3b336e7299e9f01ead", - "file_size_bytes": 1917130445 - }, - { - "name": "Gp0127632_Centrifuge TSV report", - "description": "Centrifuge TSV report for Gp0127632", - "data_object_type": "Centrifuge Classification Report", - "url": "https://data.microbiomedata.org/data/nmdc:mga0b6cy30/ReadbasedAnalysis/nmdc_mga0b6cy30_centrifuge_report.tsv", - "md5_checksum": "f345b3a57c37097a860e38d5e83835b8", - "id": "nmdc:f345b3a57c37097a860e38d5e83835b8", - "file_size_bytes": 255290 - }, + "name": "MAGs Analysis Activity for nmdc:mga05zvf81", + "mags_list": [ { - "name": "Gp0127632_Centrifuge Krona HTML report", - "description": "Centrifuge Krona HTML report for Gp0127632", - "data_object_type": "Centrifuge Krona Plot", - "url": "https://data.microbiomedata.org/data/nmdc:mga0b6cy30/ReadbasedAnalysis/nmdc_mga0b6cy30_centrifuge_krona.html", - "md5_checksum": "c1f4471d943b284720a8becb5a2e32b4", - "id": "nmdc:c1f4471d943b284720a8becb5a2e32b4", - "file_size_bytes": 2333225 + "number_of_contig": 83, + "completeness": 0.0, + "bin_name": "bins.1", + "gene_count": 600, + "bin_quality": "LQ", + "gtdbtk_species": "", + "gtdbtk_order": "", + "num_16s": 0, + "gtdbtk_family": "", + "gtdbtk_domain": "", + "contamination": 0.0, + "gtdbtk_class": "", + "gtdbtk_phylum": "", + "num_5s": 0, + "num_23s": 0, + "gtdbtk_genus": "", + "num_t_rna": 5 }, { - "name": "Gp0127632_Kraken classification TSV report", - "description": "Kraken classification TSV report for Gp0127632", - "data_object_type": "Kraken2 Taxonomic Classification", - "url": "https://data.microbiomedata.org/data/nmdc:mga0b6cy30/ReadbasedAnalysis/nmdc_mga0b6cy30_kraken2_classification.tsv", - "md5_checksum": "50cfcfc5d0d89245b8370abf6bfef23c", - "id": "nmdc:50cfcfc5d0d89245b8370abf6bfef23c", - "file_size_bytes": 1537863470 + "number_of_contig": 142, + "completeness": 43.03, + "bin_name": "bins.2", + "gene_count": 746, + "bin_quality": "LQ", + "gtdbtk_species": "", + "gtdbtk_order": "", + "num_16s": 0, + "gtdbtk_family": "", + "gtdbtk_domain": "", + "contamination": 1.72, + "gtdbtk_class": "", + "gtdbtk_phylum": "", + "num_5s": 0, + "num_23s": 0, + "gtdbtk_genus": "", + "num_t_rna": 6 + }, + { + "number_of_contig": 194, + "completeness": 73.62, + "bin_name": "bins.3", + "gene_count": 1844, + "bin_quality": "MQ", + "gtdbtk_species": "", + "gtdbtk_order": "Nitrososphaerales", + "num_16s": 0, + "gtdbtk_family": "Nitrososphaeraceae", + "gtdbtk_domain": "Archaea", + "contamination": 2.43, + "gtdbtk_class": "Nitrososphaeria", + "gtdbtk_phylum": "Crenarchaeota", + "num_5s": 1, + "num_23s": 0, + "gtdbtk_genus": "", + "num_t_rna": 31 + }, + { + "number_of_contig": 91, + "completeness": 10.82, + "bin_name": "bins.4", + "gene_count": 442, + "bin_quality": "LQ", + "gtdbtk_species": "", + "gtdbtk_order": "", + "num_16s": 0, + "gtdbtk_family": "", + "gtdbtk_domain": "", + "contamination": 0.0, + "gtdbtk_class": "", + "gtdbtk_phylum": "", + "num_5s": 0, + "num_23s": 0, + "gtdbtk_genus": "", + "num_t_rna": 2 }, { - "name": "Gp0127632_Kraken2 TSV report", - "description": "Kraken2 TSV report for Gp0127632", - "data_object_type": "Kraken2 Classification Report", - "url": "https://data.microbiomedata.org/data/nmdc:mga0b6cy30/ReadbasedAnalysis/nmdc_mga0b6cy30_kraken2_report.tsv", - "md5_checksum": "a8dd7aa20043510158ad3b2bbe961b42", - "id": "nmdc:a8dd7aa20043510158ad3b2bbe961b42", - "file_size_bytes": 648597 + "number_of_contig": 82, + "completeness": 10.97, + "bin_name": "bins.5", + "gene_count": 385, + "bin_quality": "LQ", + "gtdbtk_species": "", + "gtdbtk_order": "", + "num_16s": 1, + "gtdbtk_family": "", + "gtdbtk_domain": "", + "contamination": 0.0, + "gtdbtk_class": "", + "gtdbtk_phylum": "", + "num_5s": 0, + "num_23s": 0, + "gtdbtk_genus": "", + "num_t_rna": 3 }, { - "name": "Gp0127632_Kraken2 Krona HTML report", - "description": "Kraken2 Krona HTML report for Gp0127632", - "data_object_type": "Kraken2 Krona Plot", - "url": "https://data.microbiomedata.org/data/nmdc:mga0b6cy30/ReadbasedAnalysis/nmdc_mga0b6cy30_kraken2_krona.html", - "md5_checksum": "e350fda9bd0651755171d79b413b8da3", - "id": "nmdc:e350fda9bd0651755171d79b413b8da3", - "file_size_bytes": 3959152 + "number_of_contig": 146, + "completeness": 31.6, + "bin_name": "bins.6", + "gene_count": 800, + "bin_quality": "LQ", + "gtdbtk_species": "", + "gtdbtk_order": "", + "num_16s": 1, + "gtdbtk_family": "", + "gtdbtk_domain": "", + "contamination": 1.6, + "gtdbtk_class": "", + "gtdbtk_phylum": "", + "num_5s": 0, + "num_23s": 1, + "gtdbtk_genus": "", + "num_t_rna": 20 } - ] - }, + ], + "unbinned_contig_num": 19751, + "started_at_time": "2021-10-11T02:24:58Z", + "type": "nmdc:MAGsAnalysisActivity", + "ended_at_time": "2021-10-11T03:40:06+00:00" + } + ], + "material_sample_set": [], + "material_sampling_activity_set": [], + "metabolomics_analysis_activity_set": [], + "metagenome_annotation_activity_set": [ { "_id": { - "$oid": "61e7195d833bcf838a700521" + "$oid": "649b005bbf2caae0415ef9b0" }, "has_input": [ - "nmdc:a43bfb55389206c2fc5ddb53e6aa2bc6" + "nmdc:ea5ca9478871b3e2600e1df0d748cbef" ], "part_of": [ - "nmdc:mga0b6cy30" + "nmdc:mga05zvf81" ], "git_url": "https://github.com/microbiomedata/metaG/releases/tag/0.1", "has_output": [ - "nmdc:3e583cccbbc068e0879ba6618bb6407c", - "nmdc:6c54105711e818c4d8169ab595b05efe", - "nmdc:adb155cdb656648496484998a62fb96f", - "nmdc:0a03ac5737750a3b336e7299e9f01ead", - "nmdc:f345b3a57c37097a860e38d5e83835b8", - "nmdc:c1f4471d943b284720a8becb5a2e32b4", - "nmdc:50cfcfc5d0d89245b8370abf6bfef23c", - "nmdc:a8dd7aa20043510158ad3b2bbe961b42", - "nmdc:e350fda9bd0651755171d79b413b8da3" + "nmdc:8defcf55f08cd56d8b2560e27f490ca5", + "nmdc:a6031c0a101419dd413a0804937425ca", + "nmdc:43069b1146c84c064b7ff334dc9ff100", + "nmdc:acc5a2c445dc6e00668c9a5d50aecdb8", + "nmdc:ec91d5d7a8af4fb845e22cbe7ab82bde", + "nmdc:3cd238ff1bb176b7a159aeb34a7c4683", + "nmdc:5103ea2a481ea3b82f1aa98ab7a36998", + "nmdc:8f7429420cbefb9e27bcdbe6252e5288", + "nmdc:6d69127dc30609e4861a7b2443b99164", + "nmdc:00243bcaf50313d937a7685380a876bb", + "nmdc:ec6ffd40772dee9d48dbec0beb6b3321", + "nmdc:907439e314b4f4623244e2cec8532098" ], - "was_informed_by": "gold:Gp0127632", - "id": "nmdc:2d5f97b7f6c0f385d6185e05ac989b1e", + "was_informed_by": "gold:Gp0127633", + "id": "nmdc:c88a6f4e80cd6159dbbdeaeffbc28f55", "execution_resource": "NERSC-Cori", - "name": "ReadBased Analysis Activity for nmdc:mga0b6cy30", - "started_at_time": "2021-10-11T02:23:42Z", - "type": "nmdc:ReadbasedAnalysis", - "ended_at_time": "2021-10-11T04:08:32+00:00", - "output_data_objects": [ - { - "name": "Gp0127632_Gottcha2 TSV report", - "description": "Gottcha2 TSV report for Gp0127632", - "url": "https://data.microbiomedata.org/data/nmdc:mga0b6cy30/ReadbasedAnalysis/nmdc_mga0b6cy30_gottcha2_report.tsv", - "md5_checksum": "3e583cccbbc068e0879ba6618bb6407c", - "id": "nmdc:3e583cccbbc068e0879ba6618bb6407c", - "file_size_bytes": 2899 - }, - { - "name": "Gp0127632_Gottcha2 full TSV report", - "description": "Gottcha2 full TSV report for Gp0127632", - "url": "https://data.microbiomedata.org/data/nmdc:mga0b6cy30/ReadbasedAnalysis/nmdc_mga0b6cy30_gottcha2_report_full.tsv", - "md5_checksum": "6c54105711e818c4d8169ab595b05efe", - "id": "nmdc:6c54105711e818c4d8169ab595b05efe", - "file_size_bytes": 769416 - }, - { - "name": "Gp0127632_Gottcha2 Krona HTML report", - "description": "Gottcha2 Krona HTML report for Gp0127632", - "data_object_type": "GOTTCHA2 Krona Plot", - "url": "https://data.microbiomedata.org/data/nmdc:mga0b6cy30/ReadbasedAnalysis/nmdc_mga0b6cy30_gottcha2_krona.html", - "md5_checksum": "adb155cdb656648496484998a62fb96f", - "id": "nmdc:adb155cdb656648496484998a62fb96f", - "file_size_bytes": 235384 - }, - { - "name": "Gp0127632_Centrifuge classification TSV report", - "description": "Centrifuge classification TSV report for Gp0127632", - "data_object_type": "Centrifuge Taxonomic Classification", - "url": "https://data.microbiomedata.org/data/nmdc:mga0b6cy30/ReadbasedAnalysis/nmdc_mga0b6cy30_centrifuge_classification.tsv", - "md5_checksum": "0a03ac5737750a3b336e7299e9f01ead", - "id": "nmdc:0a03ac5737750a3b336e7299e9f01ead", - "file_size_bytes": 1917130445 - }, - { - "name": "Gp0127632_Centrifuge TSV report", - "description": "Centrifuge TSV report for Gp0127632", - "data_object_type": "Centrifuge Classification Report", - "url": "https://data.microbiomedata.org/data/nmdc:mga0b6cy30/ReadbasedAnalysis/nmdc_mga0b6cy30_centrifuge_report.tsv", - "md5_checksum": "f345b3a57c37097a860e38d5e83835b8", - "id": "nmdc:f345b3a57c37097a860e38d5e83835b8", - "file_size_bytes": 255290 - }, - { - "name": "Gp0127632_Centrifuge Krona HTML report", - "description": "Centrifuge Krona HTML report for Gp0127632", - "data_object_type": "Centrifuge Krona Plot", - "url": "https://data.microbiomedata.org/data/nmdc:mga0b6cy30/ReadbasedAnalysis/nmdc_mga0b6cy30_centrifuge_krona.html", - "md5_checksum": "c1f4471d943b284720a8becb5a2e32b4", - "id": "nmdc:c1f4471d943b284720a8becb5a2e32b4", - "file_size_bytes": 2333225 - }, - { - "name": "Gp0127632_Kraken classification TSV report", - "description": "Kraken classification TSV report for Gp0127632", - "data_object_type": "Kraken2 Taxonomic Classification", - "url": "https://data.microbiomedata.org/data/nmdc:mga0b6cy30/ReadbasedAnalysis/nmdc_mga0b6cy30_kraken2_classification.tsv", - "md5_checksum": "50cfcfc5d0d89245b8370abf6bfef23c", - "id": "nmdc:50cfcfc5d0d89245b8370abf6bfef23c", - "file_size_bytes": 1537863470 - }, - { - "name": "Gp0127632_Kraken2 TSV report", - "description": "Kraken2 TSV report for Gp0127632", - "data_object_type": "Kraken2 Classification Report", - "url": "https://data.microbiomedata.org/data/nmdc:mga0b6cy30/ReadbasedAnalysis/nmdc_mga0b6cy30_kraken2_report.tsv", - "md5_checksum": "a8dd7aa20043510158ad3b2bbe961b42", - "id": "nmdc:a8dd7aa20043510158ad3b2bbe961b42", - "file_size_bytes": 648597 - }, - { - "name": "Gp0127632_Kraken2 Krona HTML report", - "description": "Kraken2 Krona HTML report for Gp0127632", - "data_object_type": "Kraken2 Krona Plot", - "url": "https://data.microbiomedata.org/data/nmdc:mga0b6cy30/ReadbasedAnalysis/nmdc_mga0b6cy30_kraken2_krona.html", - "md5_checksum": "e350fda9bd0651755171d79b413b8da3", - "id": "nmdc:e350fda9bd0651755171d79b413b8da3", - "file_size_bytes": 3959152 - } - ] - }, + "name": "Annotation Activity for nmdc:mga05zvf81", + "started_at_time": "2021-10-11T02:24:58Z", + "type": "nmdc:MetagenomeAnnotationActivity", + "ended_at_time": "2021-10-11T03:40:06+00:00" + } + ], + "metagenome_assembly_set": [ { "_id": { - "$oid": "649b005f2ca5ee4adb139f9b" + "$oid": "649b005f2ca5ee4adb139f97" }, "has_input": [ - "nmdc:a43bfb55389206c2fc5ddb53e6aa2bc6" + "nmdc:7cbd497624d8b60ab2a5e7fdbe4730f2" ], "part_of": [ - "nmdc:mga0b6cy30" + "nmdc:mga05zvf81" ], - "ctg_logsum": 81568, - "scaf_logsum": 81839, - "gap_pct": 0.00096, + "ctg_logsum": 378958, + "scaf_logsum": 380592, + "gap_pct": 0.00189, "git_url": "https://github.com/microbiomedata/metaG/releases/tag/0.1", "has_output": [ - "nmdc:b5094d52c6d48836de0aac261c622868", - "nmdc:4d9d83ac8db218e6d0bd4f29801c3ce3", - "nmdc:f8fad4cf225943d8fddec3fa3402c53a", - "nmdc:52f130d084757d6e27177ed108e9e5bf", - "nmdc:9e5deaa9e7ac3f5f90d79b6520d39d53" + "nmdc:ea5ca9478871b3e2600e1df0d748cbef", + "nmdc:327e130872e4c5faac2f1c9f8dea2316", + "nmdc:f61f1e62791a38beae95bd95833a6784", + "nmdc:416254a3bfc685dd16c11d65a222305f", + "nmdc:bc054294600fa310924f104484effd3e" ], - "asm_score": 5.986, - "was_informed_by": "gold:Gp0127632", - "ctg_powsum": 9274.272, - "scaf_max": 23706, - "id": "nmdc:2d5f97b7f6c0f385d6185e05ac989b1e", - "scaf_powsum": 9304.689, + "asm_score": 4.48, + "was_informed_by": "gold:Gp0127633", + "ctg_powsum": 41464, + "scaf_max": 30530, + "id": "nmdc:c88a6f4e80cd6159dbbdeaeffbc28f55", + "scaf_powsum": 41655, "execution_resource": "NERSC-Cori", - "contigs": 132499, - "name": "Assembly Activity for nmdc:mga0b6cy30", - "ctg_max": 23706, - "gc_std": 0.09103, - "contig_bp": 54959738, - "gc_avg": 0.61354, - "started_at_time": "2021-10-11T02:23:42Z", - "scaf_bp": 54960268, + "contigs": 272879, + "name": "Assembly Activity for nmdc:mga05zvf81", + "ctg_max": 30530, + "gc_std": 0.08353, + "contig_bp": 141974737, + "gc_avg": 0.63381, + "started_at_time": "2021-10-11T02:24:58Z", + "scaf_bp": 141977427, "type": "nmdc:MetagenomeAssembly", - "scaffolds": 132455, - "ended_at_time": "2021-10-11T04:08:32+00:00", - "ctg_l50": 372, - "ctg_l90": 285, - "ctg_n50": 43541, - "ctg_n90": 113564, - "scaf_l50": 372, - "scaf_l90": 285, - "scaf_n50": 43524, - "scaf_n90": 113522, - "output_data_objects": [ - { - "name": "Gp0127632_Assembled contigs fasta", - "description": "Assembled contigs fasta for Gp0127632", - "data_object_type": "Assembly Contigs", - "url": "https://data.microbiomedata.org/data/nmdc:mga0b6cy30/assembly/nmdc_mga0b6cy30_contigs.fna", - "md5_checksum": "b5094d52c6d48836de0aac261c622868", - "id": "nmdc:b5094d52c6d48836de0aac261c622868", - "file_size_bytes": 59930370 - }, - { - "name": "Gp0127632_Assembled scaffold fasta", - "description": "Assembled scaffold fasta for Gp0127632", - "data_object_type": "Assembly Scaffolds", - "url": "https://data.microbiomedata.org/data/nmdc:mga0b6cy30/assembly/nmdc_mga0b6cy30_scaffolds.fna", - "md5_checksum": "4d9d83ac8db218e6d0bd4f29801c3ce3", - "id": "nmdc:4d9d83ac8db218e6d0bd4f29801c3ce3", - "file_size_bytes": 59532251 - }, - { - "name": "Gp0127632_Metagenome Contig Coverage Stats", - "description": "Metagenome Contig Coverage Stats for Gp0127632", - "url": "https://data.microbiomedata.org/data/nmdc:mga0b6cy30/assembly/nmdc_mga0b6cy30_covstats.txt", - "md5_checksum": "f8fad4cf225943d8fddec3fa3402c53a", - "id": "nmdc:f8fad4cf225943d8fddec3fa3402c53a", - "file_size_bytes": 10428676 - }, - { - "name": "Gp0127632_Assembled AGP file", - "description": "Assembled AGP file for Gp0127632", - "data_object_type": "Assembly AGP", - "url": "https://data.microbiomedata.org/data/nmdc:mga0b6cy30/assembly/nmdc_mga0b6cy30_assembly.agp", - "md5_checksum": "52f130d084757d6e27177ed108e9e5bf", - "id": "nmdc:52f130d084757d6e27177ed108e9e5bf", - "file_size_bytes": 9725931 - }, - { - "name": "Gp0127632_Metagenome Alignment BAM file", - "description": "Metagenome Alignment BAM file for Gp0127632", - "data_object_type": "Assembly Coverage BAM", - "url": "https://data.microbiomedata.org/data/nmdc:mga0b6cy30/assembly/nmdc_mga0b6cy30_pairedMapped_sorted.bam", - "md5_checksum": "9e5deaa9e7ac3f5f90d79b6520d39d53", - "id": "nmdc:9e5deaa9e7ac3f5f90d79b6520d39d53", - "file_size_bytes": 2363431165 - } - ] - }, + "scaffolds": 272628, + "ended_at_time": "2021-10-11T03:40:06+00:00", + "ctg_l50": 526, + "ctg_l90": 298, + "ctg_n50": 72824, + "ctg_n90": 224178, + "scaf_l50": 527, + "scaf_l90": 298, + "scaf_n50": 72571, + "scaf_n90": 223970 + } + ], + "metagenome_sequencing_activity_set": [], + "metaproteomics_analysis_activity_set": [], + "metatranscriptome_activity_set": [], + "nom_analysis_activity_set": [], + "omics_processing_set": [ { "_id": { - "$oid": "649b005bbf2caae0415ef9ae" + "$oid": "649b009773e8249959349b42" }, + "id": "nmdc:omprc-11-0g415160", + "name": "Riverbed sediment microbial communities from areas with vegetation nearby in Columbia River, Washington, USA - GW-RW S1_30_40", + "description": "Riverbed sediment samples were collected from an area with a lot of surrounding vegetation in a hyporheic zone (subsurface groundwater - surface water mixing zone)", "has_input": [ - "nmdc:b5094d52c6d48836de0aac261c622868" + "nmdc:bsm-11-nbgp1x53" + ], + "has_output": [ + "jgi:574fde5e7ded5e3df1ee1401" ], "part_of": [ - "nmdc:mga0b6cy30" + "nmdc:sty-11-aygzgv51" ], - "git_url": "https://github.com/microbiomedata/metaG/releases/tag/0.1", - "has_output": [ - "nmdc:42989e75458691fbd17e537582c56d5e", - "nmdc:09240a6d1afc5f8b965a80a64aa96ef4", - "nmdc:c595237698baaf882fdeeac92f1b02be", - "nmdc:cd87df7a80ed03eef7d9923b9e9621e4", - "nmdc:57053d5594bb80495014664df22b0bb0", - "nmdc:3c82ee6a19674bd5abd4072cb137d96f", - "nmdc:c9bf48d6c88b3db0f431a08d93873c4a", - "nmdc:2475726e21bd8369f76d529f55f21a3f", - "nmdc:5698830d572ddc4e35a5f6642da7981a", - "nmdc:18cdb0f987a2d417d0a39a685e435729", - "nmdc:b34e4d1823bd5cd88aa42832d10b3431", - "nmdc:dc544f4796d49c520372e1872c5aea49" - ], - "was_informed_by": "gold:Gp0127632", - "id": "nmdc:2d5f97b7f6c0f385d6185e05ac989b1e", - "execution_resource": "NERSC-Cori", - "name": "Annotation Activity for nmdc:mga0b6cy30", - "started_at_time": "2021-10-11T02:23:42Z", - "type": "nmdc:MetagenomeAnnotationActivity", - "ended_at_time": "2021-10-11T04:08:32+00:00", - "output_data_objects": [ - { - "name": "Gp0127632_Protein FAA", - "description": "Protein FAA for Gp0127632", - "data_object_type": "Annotation Amino Acid FASTA", - "url": "https://data.microbiomedata.org/data/nmdc:mga0b6cy30/annotation/nmdc_mga0b6cy30_proteins.faa", - "md5_checksum": "42989e75458691fbd17e537582c56d5e", - "id": "nmdc:42989e75458691fbd17e537582c56d5e", - "file_size_bytes": 35685584 - }, - { - "name": "Gp0127632_Structural annotation GFF file", - "description": "Structural annotation GFF file for Gp0127632", - "data_object_type": "Structural Annotation GFF", - "url": "https://data.microbiomedata.org/data/nmdc:mga0b6cy30/annotation/nmdc_mga0b6cy30_structural_annotation.gff", - "md5_checksum": "09240a6d1afc5f8b965a80a64aa96ef4", - "id": "nmdc:09240a6d1afc5f8b965a80a64aa96ef4", - "file_size_bytes": 2512 - }, - { - "name": "Gp0127632_Functional annotation GFF file", - "description": "Functional annotation GFF file for Gp0127632", - "data_object_type": "Functional Annotation GFF", - "url": "https://data.microbiomedata.org/data/nmdc:mga0b6cy30/annotation/nmdc_mga0b6cy30_functional_annotation.gff", - "md5_checksum": "c595237698baaf882fdeeac92f1b02be", - "id": "nmdc:c595237698baaf882fdeeac92f1b02be", - "file_size_bytes": 41979225 - }, - { - "name": "Gp0127632_KO TSV file", - "description": "KO TSV file for Gp0127632", - "data_object_type": "Annotation KEGG Orthology", - "url": "https://data.microbiomedata.org/data/nmdc:mga0b6cy30/annotation/nmdc_mga0b6cy30_ko.tsv", - "md5_checksum": "cd87df7a80ed03eef7d9923b9e9621e4", - "id": "nmdc:cd87df7a80ed03eef7d9923b9e9621e4", - "file_size_bytes": 4726366 - }, - { - "name": "Gp0127632_EC TSV file", - "description": "EC TSV file for Gp0127632", - "data_object_type": "Annotation Enzyme Commission", - "url": "https://data.microbiomedata.org/data/nmdc:mga0b6cy30/annotation/nmdc_mga0b6cy30_ec.tsv", - "md5_checksum": "57053d5594bb80495014664df22b0bb0", - "id": "nmdc:57053d5594bb80495014664df22b0bb0", - "file_size_bytes": 3155078 - }, - { - "name": "Gp0127632_COG GFF file", - "description": "COG GFF file for Gp0127632", - "url": "https://data.microbiomedata.org/data/nmdc:mga0b6cy30/annotation/nmdc_mga0b6cy30_cog.gff", - "md5_checksum": "3c82ee6a19674bd5abd4072cb137d96f", - "id": "nmdc:3c82ee6a19674bd5abd4072cb137d96f", - "file_size_bytes": 23956687 - }, - { - "name": "Gp0127632_PFAM GFF file", - "description": "PFAM GFF file for Gp0127632", - "url": "https://data.microbiomedata.org/data/nmdc:mga0b6cy30/annotation/nmdc_mga0b6cy30_pfam.gff", - "md5_checksum": "c9bf48d6c88b3db0f431a08d93873c4a", - "id": "nmdc:c9bf48d6c88b3db0f431a08d93873c4a", - "file_size_bytes": 17333907 - }, - { - "name": "Gp0127632_TigrFam GFF file", - "description": "TigrFam GFF file for Gp0127632", - "url": "https://data.microbiomedata.org/data/nmdc:mga0b6cy30/annotation/nmdc_mga0b6cy30_tigrfam.gff", - "md5_checksum": "2475726e21bd8369f76d529f55f21a3f", - "id": "nmdc:2475726e21bd8369f76d529f55f21a3f", - "file_size_bytes": 1771706 - }, - { - "name": "Gp0127632_SMART GFF file", - "description": "SMART GFF file for Gp0127632", - "url": "https://data.microbiomedata.org/data/nmdc:mga0b6cy30/annotation/nmdc_mga0b6cy30_smart.gff", - "md5_checksum": "5698830d572ddc4e35a5f6642da7981a", - "id": "nmdc:5698830d572ddc4e35a5f6642da7981a", - "file_size_bytes": 5383998 - }, - { - "name": "Gp0127632_SuperFam GFF file", - "description": "SuperFam GFF file for Gp0127632", - "url": "https://data.microbiomedata.org/data/nmdc:mga0b6cy30/annotation/nmdc_mga0b6cy30_supfam.gff", - "md5_checksum": "18cdb0f987a2d417d0a39a685e435729", - "id": "nmdc:18cdb0f987a2d417d0a39a685e435729", - "file_size_bytes": 30162479 - }, - { - "name": "Gp0127632_Cath FunFam GFF file", - "description": "Cath FunFam GFF file for Gp0127632", - "url": "https://data.microbiomedata.org/data/nmdc:mga0b6cy30/annotation/nmdc_mga0b6cy30_cath_funfam.gff", - "md5_checksum": "b34e4d1823bd5cd88aa42832d10b3431", - "id": "nmdc:b34e4d1823bd5cd88aa42832d10b3431", - "file_size_bytes": 22459777 - }, - { - "name": "Gp0127632_KO_EC GFF file", - "description": "KO_EC GFF file for Gp0127632", - "url": "https://data.microbiomedata.org/data/nmdc:mga0b6cy30/annotation/nmdc_mga0b6cy30_ko_ec.gff", - "md5_checksum": "dc544f4796d49c520372e1872c5aea49", - "id": "nmdc:dc544f4796d49c520372e1872c5aea49", - "file_size_bytes": 15047897 - } - ] - }, - { - "_id": { - "$oid": "649b0052ec087f6bbab3470d" + "add_date": "2016-01-11", + "mod_date": "2021-06-15", + "ncbi_project_name": "Riverbed sediment microbial communities from areas with vegetation nearby in Columbia River, Washington, USA - GW-RW S1_30_40", + "omics_type": { + "has_raw_value": "Metagenome" }, - "has_input": [ - "nmdc:b5094d52c6d48836de0aac261c622868", - "nmdc:9e5deaa9e7ac3f5f90d79b6520d39d53", - "nmdc:c595237698baaf882fdeeac92f1b02be" - ], - "too_short_contig_num": 128818, - "part_of": [ - "nmdc:mga0b6cy30" - ], - "binned_contig_num": 313, - "git_url": "https://github.com/microbiomedata/metaG/releases/tag/0.1", - "has_output": [ - "nmdc:d41d8cd98f00b204e9800998ecf8427e", - "nmdc:2941988fcfb708d20ad1e44682c78e22", - "nmdc:a6bc8d9d5ba5fe9713829aa7aef3c4cd", - "nmdc:2914266e7ac7a8668c6f8d8722466c69", - "nmdc:0fd97ca0ce01d42361ce817d3753a65e", - "nmdc:8e7832cac0ae99e2b63dfdfa34c24927" - ], - "was_informed_by": "gold:Gp0127632", - "input_contig_num": 132499, - "id": "nmdc:2d5f97b7f6c0f385d6185e05ac989b1e", - "execution_resource": "NERSC-Cori", - "name": "MAGs Analysis Activity for nmdc:mga0b6cy30", - "mags_list": [ - { - "number_of_contig": 84, - "completeness": 27.81, - "bin_name": "bins.1", - "gene_count": 437, - "bin_quality": "LQ", - "gtdbtk_species": "", - "gtdbtk_order": "", - "num_16s": 0, - "gtdbtk_family": "", - "gtdbtk_domain": "", - "contamination": 1.71, - "gtdbtk_class": "", - "gtdbtk_phylum": "", - "num_5s": 1, - "num_23s": 1, - "gtdbtk_genus": "", - "num_t_rna": 10 - }, - { - "number_of_contig": 229, - "completeness": 71.45, - "bin_name": "bins.2", - "gene_count": 1997, - "bin_quality": "MQ", - "gtdbtk_species": "", - "gtdbtk_order": "Nitrososphaerales", - "num_16s": 0, - "gtdbtk_family": "Nitrososphaeraceae", - "gtdbtk_domain": "Archaea", - "contamination": 0.97, - "gtdbtk_class": "Nitrososphaeria", - "gtdbtk_phylum": "Crenarchaeota", - "num_5s": 1, - "num_23s": 0, - "gtdbtk_genus": "", - "num_t_rna": 36 - } - ], - "unbinned_contig_num": 3368, - "started_at_time": "2021-10-11T02:23:42Z", - "type": "nmdc:MAGsAnalysisActivity", - "ended_at_time": "2021-10-11T04:08:32+00:00", - "output_data_objects": [ - { - "name": "gold:Gp0452679_metabat2 bin checkm quality assessment result", - "description": "metabat2 bin checkm quality assessment result for gold:Gp0452679", - "data_object_type": "CheckM Statistics", - "url": "https://data.microbiomedata.org/data/nmdc:mga0fsjy84/MAGs/nmdc_mga0fsjy84_checkm_qa.out", - "md5_checksum": "d41d8cd98f00b204e9800998ecf8427e", - "id": "nmdc:d41d8cd98f00b204e9800998ecf8427e", - "file_size_bytes": 0 - }, - { - "name": "Gp0127632_tooShort (< 3kb) filtered contigs fasta file by metaBat2", - "description": "tooShort (< 3kb) filtered contigs fasta file by metaBat2 for Gp0127632", - "url": "https://data.microbiomedata.org/data/nmdc:mga0b6cy30/MAGs/nmdc_mga0b6cy30_bins.tooShort.fa", - "md5_checksum": "2941988fcfb708d20ad1e44682c78e22", - "id": "nmdc:2941988fcfb708d20ad1e44682c78e22", - "file_size_bytes": 52475207 - }, - { - "name": "Gp0127632_unbinned fasta file from metabat2", - "description": "unbinned fasta file from metabat2 for Gp0127632", - "url": "https://data.microbiomedata.org/data/nmdc:mga0b6cy30/MAGs/nmdc_mga0b6cy30_bins.unbinned.fa", - "md5_checksum": "a6bc8d9d5ba5fe9713829aa7aef3c4cd", - "id": "nmdc:a6bc8d9d5ba5fe9713829aa7aef3c4cd", - "file_size_bytes": 5473493 - }, - { - "name": "Gp0127632_metabat2 bin checkm quality assessment result", - "description": "metabat2 bin checkm quality assessment result for Gp0127632", - "data_object_type": "CheckM Statistics", - "url": "https://data.microbiomedata.org/data/nmdc:mga0b6cy30/MAGs/nmdc_mga0b6cy30_checkm_qa.out", - "md5_checksum": "2914266e7ac7a8668c6f8d8722466c69", - "id": "nmdc:2914266e7ac7a8668c6f8d8722466c69", - "file_size_bytes": 948 - }, - { - "name": "Gp0127632_high-quality and medium-quality bins", - "description": "high-quality and medium-quality bins for Gp0127632", - "data_object_type": "Metagenome Bins", - "url": "https://data.microbiomedata.org/data/nmdc:mga0b6cy30/MAGs/nmdc_mga0b6cy30_hqmq_bin.zip", - "md5_checksum": "0fd97ca0ce01d42361ce817d3753a65e", - "id": "nmdc:0fd97ca0ce01d42361ce817d3753a65e", - "file_size_bytes": 497493 - }, - { - "name": "Gp0127632_metabat2 bins", - "description": "metabat2 bins for Gp0127632", - "url": "https://data.microbiomedata.org/data/nmdc:mga0b6cy30/MAGs/nmdc_mga0b6cy30_metabat_bin.zip", - "md5_checksum": "8e7832cac0ae99e2b63dfdfa34c24927", - "id": "nmdc:8e7832cac0ae99e2b63dfdfa34c24927", - "file_size_bytes": 108323 - } + "principal_investigator": { + "has_raw_value": "James Stegen" + }, + "processing_institution": "JGI", + "type": "nmdc:OmicsProcessing", + "gold_sequencing_project_identifiers": [ + "gold:Gp0127633" ] } - ] - }, - { - "_id": { - "$oid": "649b009773e8249959349b45" - }, - "id": "nmdc:omprc-11-k675bw84", - "name": "Riverbed sediment microbial communities from areas with no vegetation in Columbia River, Washington, USA - GW-RW N2_30_40", - "description": "Riverbed sediment samples were collected from an area with no vegetation in a hyporheic zone (subsurface groundwater - surface water mixing zone)", - "has_input": [ - "nmdc:bsm-11-rtf54942" - ], - "has_output": [ - "jgi:574fe09f7ded5e3df1ee1489" - ], - "part_of": [ - "nmdc:sty-11-aygzgv51" - ], - "add_date": "2016-01-11", - "mod_date": "2021-06-15", - "ncbi_project_name": "Riverbed sediment microbial communities from areas with no vegetation in Columbia River, Washington, USA - GW-RW N2_30_40", - "omics_type": { - "has_raw_value": "Metagenome" - }, - "principal_investigator": { - "has_raw_value": "James Stegen" - }, - "processing_institution": "JGI", - "type": "nmdc:OmicsProcessing", - "gold_sequencing_project_identifiers": [ - "gold:Gp0127636" - ], - "downstream_workflow_activity_records": [ + ], + "reaction_activity_set": [], + "read_qc_analysis_activity_set": [ { "_id": { - "$oid": "649b009d6bdd4fd20273c864" + "$oid": "649b009d6bdd4fd20273c867" }, "has_input": [ - "nmdc:341830a5735c34968da2304bc27edd2a" + "nmdc:c0b8d6516c48cfe5a0b110abe67ee983" ], "part_of": [ - "nmdc:mga02tph39" + "nmdc:mga05zvf81" ], "git_url": "https://github.com/microbiomedata/metaG/releases/tag/0.1", "has_output": [ - "nmdc:e4f5675c728fd1896682eb669656b5d6", - "nmdc:64f455185b1bc610a8d74a84ed12683f" + "nmdc:7cbd497624d8b60ab2a5e7fdbe4730f2", + "nmdc:eccf0501d08f920a88b6598d573a8e3e" ], - "was_informed_by": "gold:Gp0127636", - "input_read_count": 31642056, - "output_read_bases": 4354491393, - "id": "nmdc:3a0bb3c0ec1ec60e0487cb98f7f19a90", + "was_informed_by": "gold:Gp0127633", + "input_read_count": 23291434, + "output_read_bases": 3367024367, + "id": "nmdc:c88a6f4e80cd6159dbbdeaeffbc28f55", "execution_resource": "NERSC-Cori", - "input_read_bases": 4777950456, - "name": "Read QC Activity for nmdc:mga02tph39", - "output_read_count": 29115818, - "started_at_time": "2021-10-11T02:23:42Z", + "input_read_bases": 3517006534, + "name": "Read QC Activity for nmdc:mga05zvf81", + "output_read_count": 22556158, + "started_at_time": "2021-10-11T02:24:58Z", "type": "nmdc:ReadQCAnalysisActivity", - "ended_at_time": "2021-11-13T18:49:37+00:00", - "output_data_objects": [ - { - "name": "Gp0127636_Filtered Reads", - "description": "Filtered Reads for Gp0127636", - "data_object_type": "Filtered Sequencing Reads", - "url": "https://data.microbiomedata.org/data/nmdc:mga02tph39/qa/nmdc_mga02tph39_filtered.fastq.gz", - "md5_checksum": "e4f5675c728fd1896682eb669656b5d6", - "id": "nmdc:e4f5675c728fd1896682eb669656b5d6", - "file_size_bytes": 2463342132 - }, - { - "name": "Gp0127636_Filtered Stats", - "description": "Filtered Stats for Gp0127636", - "data_object_type": "QC Statistics", - "url": "https://data.microbiomedata.org/data/nmdc:mga02tph39/qa/nmdc_mga02tph39_filterStats.txt", - "md5_checksum": "64f455185b1bc610a8d74a84ed12683f", - "id": "nmdc:64f455185b1bc610a8d74a84ed12683f", - "file_size_bytes": 293 - } - ] - }, + "ended_at_time": "2021-10-11T03:40:06+00:00" + } + ], + "read_based_taxonomy_analysis_activity_set": [ { "_id": { - "$oid": "649b009bff710ae353f8cf26" + "$oid": "649b009bff710ae353f8cf29" }, "has_input": [ - "nmdc:e4f5675c728fd1896682eb669656b5d6" + "nmdc:7cbd497624d8b60ab2a5e7fdbe4730f2" ], "git_url": "https://github.com/microbiomedata/metaG/releases/tag/0.1", "has_output": [ - "nmdc:50d80a30d4ff113e36f6fd64b1f28547", - "nmdc:c2cd20a2011592a76397f49dc3acd6b7", - "nmdc:827ad863c875ea14473c9903d192fa73", - "nmdc:957074ca49765b22348e27b0133d8ba0", - "nmdc:9253645582296696cb33b11754832574", - "nmdc:9aef1d9e04acfe0b7fb1b9dc3b842912", - "nmdc:75180fce38f38a6307231b47a8d2b23b", - "nmdc:b4524a34937893768dbd3752068dee0c", - "nmdc:f1543441c59aaaf8ec52036a5bbbe3f4" + "nmdc:8bd9eb762acabbac5d079c379c28e381", + "nmdc:77351dd18ca40e5552ac1380ba94acbf", + "nmdc:f445af1a7774572d156f55a898d26f09", + "nmdc:e11fcbf66318878c05984fa3d893e3b7", + "nmdc:28beb8baabdaf346f2066b40f375a152", + "nmdc:1f74a43724c4afed5563499d05601e22", + "nmdc:4825177c6d0a8b67db82e6070cfbc35f", + "nmdc:275268a6b5aca33c427d11877bcfa674", + "nmdc:89e810af4915f0e117eaa60550587453" ], - "was_informed_by": "gold:Gp0127636", - "id": "nmdc:3a0bb3c0ec1ec60e0487cb98f7f19a90", + "was_informed_by": "gold:Gp0127633", + "id": "nmdc:c88a6f4e80cd6159dbbdeaeffbc28f55", "execution_resource": "NERSC-Cori", - "name": "ReadBased Analysis Activity for nmdc:mga02tph39", - "started_at_time": "2021-10-11T02:23:42Z", + "name": "ReadBased Analysis Activity for nmdc:mga05zvf81", + "started_at_time": "2021-10-11T02:24:58Z", "type": "nmdc:ReadbasedAnalysis", - "ended_at_time": "2021-11-13T18:49:37+00:00", - "output_data_objects": [ - { - "name": "Gp0127636_Gottcha2 TSV report", - "description": "Gottcha2 TSV report for Gp0127636", - "url": "https://data.microbiomedata.org/data/nmdc:mga02tph39/ReadbasedAnalysis/nmdc_mga02tph39_gottcha2_report.tsv", - "md5_checksum": "50d80a30d4ff113e36f6fd64b1f28547", - "id": "nmdc:50d80a30d4ff113e36f6fd64b1f28547", - "file_size_bytes": 5547 - }, - { - "name": "Gp0127636_Gottcha2 full TSV report", - "description": "Gottcha2 full TSV report for Gp0127636", - "url": "https://data.microbiomedata.org/data/nmdc:mga02tph39/ReadbasedAnalysis/nmdc_mga02tph39_gottcha2_report_full.tsv", - "md5_checksum": "c2cd20a2011592a76397f49dc3acd6b7", - "id": "nmdc:c2cd20a2011592a76397f49dc3acd6b7", - "file_size_bytes": 965042 - }, - { - "name": "Gp0127636_Gottcha2 Krona HTML report", - "description": "Gottcha2 Krona HTML report for Gp0127636", - "data_object_type": "GOTTCHA2 Krona Plot", - "url": "https://data.microbiomedata.org/data/nmdc:mga02tph39/ReadbasedAnalysis/nmdc_mga02tph39_gottcha2_krona.html", - "md5_checksum": "827ad863c875ea14473c9903d192fa73", - "id": "nmdc:827ad863c875ea14473c9903d192fa73", - "file_size_bytes": 242495 - }, - { - "name": "Gp0127636_Centrifuge classification TSV report", - "description": "Centrifuge classification TSV report for Gp0127636", - "data_object_type": "Centrifuge Taxonomic Classification", - "url": "https://data.microbiomedata.org/data/nmdc:mga02tph39/ReadbasedAnalysis/nmdc_mga02tph39_centrifuge_classification.tsv", - "md5_checksum": "957074ca49765b22348e27b0133d8ba0", - "id": "nmdc:957074ca49765b22348e27b0133d8ba0", - "file_size_bytes": 2151939041 - }, - { - "name": "Gp0127636_Centrifuge TSV report", - "description": "Centrifuge TSV report for Gp0127636", - "data_object_type": "Centrifuge Classification Report", - "url": "https://data.microbiomedata.org/data/nmdc:mga02tph39/ReadbasedAnalysis/nmdc_mga02tph39_centrifuge_report.tsv", - "md5_checksum": "9253645582296696cb33b11754832574", - "id": "nmdc:9253645582296696cb33b11754832574", - "file_size_bytes": 257932 - }, - { - "name": "Gp0127636_Centrifuge Krona HTML report", - "description": "Centrifuge Krona HTML report for Gp0127636", - "data_object_type": "Centrifuge Krona Plot", - "url": "https://data.microbiomedata.org/data/nmdc:mga02tph39/ReadbasedAnalysis/nmdc_mga02tph39_centrifuge_krona.html", - "md5_checksum": "9aef1d9e04acfe0b7fb1b9dc3b842912", - "id": "nmdc:9aef1d9e04acfe0b7fb1b9dc3b842912", - "file_size_bytes": 2335219 - }, - { - "name": "Gp0127636_Kraken classification TSV report", - "description": "Kraken classification TSV report for Gp0127636", - "data_object_type": "Kraken2 Taxonomic Classification", - "url": "https://data.microbiomedata.org/data/nmdc:mga02tph39/ReadbasedAnalysis/nmdc_mga02tph39_kraken2_classification.tsv", - "md5_checksum": "75180fce38f38a6307231b47a8d2b23b", - "id": "nmdc:75180fce38f38a6307231b47a8d2b23b", - "file_size_bytes": 1746049273 - }, - { - "name": "Gp0127636_Kraken2 TSV report", - "description": "Kraken2 TSV report for Gp0127636", - "data_object_type": "Kraken2 Classification Report", - "url": "https://data.microbiomedata.org/data/nmdc:mga02tph39/ReadbasedAnalysis/nmdc_mga02tph39_kraken2_report.tsv", - "md5_checksum": "b4524a34937893768dbd3752068dee0c", - "id": "nmdc:b4524a34937893768dbd3752068dee0c", - "file_size_bytes": 660975 - }, - { - "name": "Gp0127636_Kraken2 Krona HTML report", - "description": "Kraken2 Krona HTML report for Gp0127636", - "data_object_type": "Kraken2 Krona Plot", - "url": "https://data.microbiomedata.org/data/nmdc:mga02tph39/ReadbasedAnalysis/nmdc_mga02tph39_kraken2_krona.html", - "md5_checksum": "f1543441c59aaaf8ec52036a5bbbe3f4", - "id": "nmdc:f1543441c59aaaf8ec52036a5bbbe3f4", - "file_size_bytes": 4020978 - } - ] - }, + "ended_at_time": "2021-10-11T03:40:06+00:00" + } + ], + "study_set": [], + "field_research_site_set": [], + "collecting_biosamples_from_site_set": [], + "date_created": null, + "etl_software_version": null, + "pooling_set": [], + "read_based_analysis_activity_set": [ { "_id": { - "$oid": "61e71959833bcf838a70040a" + "$oid": "61e7193c833bcf838a70001a" }, "has_input": [ - "nmdc:e4f5675c728fd1896682eb669656b5d6" + "nmdc:7cbd497624d8b60ab2a5e7fdbe4730f2" ], "part_of": [ - "nmdc:mga02tph39" + "nmdc:mga05zvf81" ], "git_url": "https://github.com/microbiomedata/metaG/releases/tag/0.1", "has_output": [ - "nmdc:50d80a30d4ff113e36f6fd64b1f28547", - "nmdc:c2cd20a2011592a76397f49dc3acd6b7", - "nmdc:827ad863c875ea14473c9903d192fa73", - "nmdc:957074ca49765b22348e27b0133d8ba0", - "nmdc:9253645582296696cb33b11754832574", - "nmdc:9aef1d9e04acfe0b7fb1b9dc3b842912", - "nmdc:75180fce38f38a6307231b47a8d2b23b", - "nmdc:b4524a34937893768dbd3752068dee0c", - "nmdc:f1543441c59aaaf8ec52036a5bbbe3f4" + "nmdc:8bd9eb762acabbac5d079c379c28e381", + "nmdc:77351dd18ca40e5552ac1380ba94acbf", + "nmdc:f445af1a7774572d156f55a898d26f09", + "nmdc:e11fcbf66318878c05984fa3d893e3b7", + "nmdc:28beb8baabdaf346f2066b40f375a152", + "nmdc:1f74a43724c4afed5563499d05601e22", + "nmdc:4825177c6d0a8b67db82e6070cfbc35f", + "nmdc:275268a6b5aca33c427d11877bcfa674", + "nmdc:89e810af4915f0e117eaa60550587453" ], - "was_informed_by": "gold:Gp0127636", - "id": "nmdc:3a0bb3c0ec1ec60e0487cb98f7f19a90", + "was_informed_by": "gold:Gp0127633", + "id": "nmdc:c88a6f4e80cd6159dbbdeaeffbc28f55", "execution_resource": "NERSC-Cori", - "name": "ReadBased Analysis Activity for nmdc:mga02tph39", - "started_at_time": "2021-10-11T02:23:42Z", + "name": "ReadBased Analysis Activity for nmdc:mga05zvf81", + "started_at_time": "2021-10-11T02:24:58Z", "type": "nmdc:ReadbasedAnalysis", - "ended_at_time": "2021-11-13T18:49:37+00:00", - "output_data_objects": [ - { - "name": "Gp0127636_Gottcha2 TSV report", - "description": "Gottcha2 TSV report for Gp0127636", - "url": "https://data.microbiomedata.org/data/nmdc:mga02tph39/ReadbasedAnalysis/nmdc_mga02tph39_gottcha2_report.tsv", - "md5_checksum": "50d80a30d4ff113e36f6fd64b1f28547", - "id": "nmdc:50d80a30d4ff113e36f6fd64b1f28547", - "file_size_bytes": 5547 - }, - { - "name": "Gp0127636_Gottcha2 full TSV report", - "description": "Gottcha2 full TSV report for Gp0127636", - "url": "https://data.microbiomedata.org/data/nmdc:mga02tph39/ReadbasedAnalysis/nmdc_mga02tph39_gottcha2_report_full.tsv", - "md5_checksum": "c2cd20a2011592a76397f49dc3acd6b7", - "id": "nmdc:c2cd20a2011592a76397f49dc3acd6b7", - "file_size_bytes": 965042 - }, - { - "name": "Gp0127636_Gottcha2 Krona HTML report", - "description": "Gottcha2 Krona HTML report for Gp0127636", - "data_object_type": "GOTTCHA2 Krona Plot", - "url": "https://data.microbiomedata.org/data/nmdc:mga02tph39/ReadbasedAnalysis/nmdc_mga02tph39_gottcha2_krona.html", - "md5_checksum": "827ad863c875ea14473c9903d192fa73", - "id": "nmdc:827ad863c875ea14473c9903d192fa73", - "file_size_bytes": 242495 - }, - { - "name": "Gp0127636_Centrifuge classification TSV report", - "description": "Centrifuge classification TSV report for Gp0127636", - "data_object_type": "Centrifuge Taxonomic Classification", - "url": "https://data.microbiomedata.org/data/nmdc:mga02tph39/ReadbasedAnalysis/nmdc_mga02tph39_centrifuge_classification.tsv", - "md5_checksum": "957074ca49765b22348e27b0133d8ba0", - "id": "nmdc:957074ca49765b22348e27b0133d8ba0", - "file_size_bytes": 2151939041 - }, - { - "name": "Gp0127636_Centrifuge TSV report", - "description": "Centrifuge TSV report for Gp0127636", - "data_object_type": "Centrifuge Classification Report", - "url": "https://data.microbiomedata.org/data/nmdc:mga02tph39/ReadbasedAnalysis/nmdc_mga02tph39_centrifuge_report.tsv", - "md5_checksum": "9253645582296696cb33b11754832574", - "id": "nmdc:9253645582296696cb33b11754832574", - "file_size_bytes": 257932 - }, - { - "name": "Gp0127636_Centrifuge Krona HTML report", - "description": "Centrifuge Krona HTML report for Gp0127636", - "data_object_type": "Centrifuge Krona Plot", - "url": "https://data.microbiomedata.org/data/nmdc:mga02tph39/ReadbasedAnalysis/nmdc_mga02tph39_centrifuge_krona.html", - "md5_checksum": "9aef1d9e04acfe0b7fb1b9dc3b842912", - "id": "nmdc:9aef1d9e04acfe0b7fb1b9dc3b842912", - "file_size_bytes": 2335219 - }, - { - "name": "Gp0127636_Kraken classification TSV report", - "description": "Kraken classification TSV report for Gp0127636", - "data_object_type": "Kraken2 Taxonomic Classification", - "url": "https://data.microbiomedata.org/data/nmdc:mga02tph39/ReadbasedAnalysis/nmdc_mga02tph39_kraken2_classification.tsv", - "md5_checksum": "75180fce38f38a6307231b47a8d2b23b", - "id": "nmdc:75180fce38f38a6307231b47a8d2b23b", - "file_size_bytes": 1746049273 - }, - { - "name": "Gp0127636_Kraken2 TSV report", - "description": "Kraken2 TSV report for Gp0127636", - "data_object_type": "Kraken2 Classification Report", - "url": "https://data.microbiomedata.org/data/nmdc:mga02tph39/ReadbasedAnalysis/nmdc_mga02tph39_kraken2_report.tsv", - "md5_checksum": "b4524a34937893768dbd3752068dee0c", - "id": "nmdc:b4524a34937893768dbd3752068dee0c", - "file_size_bytes": 660975 - }, - { - "name": "Gp0127636_Kraken2 Krona HTML report", - "description": "Kraken2 Krona HTML report for Gp0127636", - "data_object_type": "Kraken2 Krona Plot", - "url": "https://data.microbiomedata.org/data/nmdc:mga02tph39/ReadbasedAnalysis/nmdc_mga02tph39_kraken2_krona.html", - "md5_checksum": "f1543441c59aaaf8ec52036a5bbbe3f4", - "id": "nmdc:f1543441c59aaaf8ec52036a5bbbe3f4", - "file_size_bytes": 4020978 - } - ] + "ended_at_time": "2021-10-11T03:40:06+00:00" + } + ] + }, + { + "functional_annotation_agg": [], + "library_preparation_set": [], + "processed_sample_set": [], + "extraction_set": [], + "activity_set": [], + "biosample_set": [], + "data_object_set": [ + { + "name": "Gp0127627_Filtered Reads", + "description": "Filtered Reads for Gp0127627", + "data_object_type": "Filtered Sequencing Reads", + "url": "https://data.microbiomedata.org/data/nmdc:mga0daby71/qa/nmdc_mga0daby71_filtered.fastq.gz", + "md5_checksum": "ed95796b3fd964c6bedb141d70737ebf", + "id": "nmdc:ed95796b3fd964c6bedb141d70737ebf", + "file_size_bytes": 1752924191 + }, + { + "name": "Gp0127627_Filtered Stats", + "description": "Filtered Stats for Gp0127627", + "data_object_type": "QC Statistics", + "url": "https://data.microbiomedata.org/data/nmdc:mga0daby71/qa/nmdc_mga0daby71_filterStats.txt", + "md5_checksum": "308ae373809697291bbc7947a1e4ed2d", + "id": "nmdc:308ae373809697291bbc7947a1e4ed2d", + "file_size_bytes": 281 + }, + { + "name": "Gp0127627_Gottcha2 TSV report", + "description": "Gottcha2 TSV report for Gp0127627", + "url": "https://data.microbiomedata.org/data/nmdc:mga0daby71/ReadbasedAnalysis/nmdc_mga0daby71_gottcha2_report.tsv", + "md5_checksum": "a5ac6665e5d66242b1c885a911236982", + "id": "nmdc:a5ac6665e5d66242b1c885a911236982", + "file_size_bytes": 5530 + }, + { + "name": "Gp0127627_Gottcha2 full TSV report", + "description": "Gottcha2 full TSV report for Gp0127627", + "url": "https://data.microbiomedata.org/data/nmdc:mga0daby71/ReadbasedAnalysis/nmdc_mga0daby71_gottcha2_report_full.tsv", + "md5_checksum": "d19478a191693d643157a89c69cc02d1", + "id": "nmdc:d19478a191693d643157a89c69cc02d1", + "file_size_bytes": 825047 + }, + { + "name": "Gp0127627_Gottcha2 Krona HTML report", + "description": "Gottcha2 Krona HTML report for Gp0127627", + "data_object_type": "GOTTCHA2 Krona Plot", + "url": "https://data.microbiomedata.org/data/nmdc:mga0daby71/ReadbasedAnalysis/nmdc_mga0daby71_gottcha2_krona.html", + "md5_checksum": "679a82699663e88a5e8828ee081fa967", + "id": "nmdc:679a82699663e88a5e8828ee081fa967", + "file_size_bytes": 241114 + }, + { + "name": "Gp0127627_Centrifuge classification TSV report", + "description": "Centrifuge classification TSV report for Gp0127627", + "data_object_type": "Centrifuge Taxonomic Classification", + "url": "https://data.microbiomedata.org/data/nmdc:mga0daby71/ReadbasedAnalysis/nmdc_mga0daby71_centrifuge_classification.tsv", + "md5_checksum": "95b3150e6fb62195c1e5ebf06f87c7d5", + "id": "nmdc:95b3150e6fb62195c1e5ebf06f87c7d5", + "file_size_bytes": 1463660267 + }, + { + "name": "Gp0127627_Centrifuge TSV report", + "description": "Centrifuge TSV report for Gp0127627", + "data_object_type": "Centrifuge Classification Report", + "url": "https://data.microbiomedata.org/data/nmdc:mga0daby71/ReadbasedAnalysis/nmdc_mga0daby71_centrifuge_report.tsv", + "md5_checksum": "0380e478962be82e0d97a6339f7f3b91", + "id": "nmdc:0380e478962be82e0d97a6339f7f3b91", + "file_size_bytes": 254347 + }, + { + "name": "Gp0127627_Centrifuge Krona HTML report", + "description": "Centrifuge Krona HTML report for Gp0127627", + "data_object_type": "Centrifuge Krona Plot", + "url": "https://data.microbiomedata.org/data/nmdc:mga0daby71/ReadbasedAnalysis/nmdc_mga0daby71_centrifuge_krona.html", + "md5_checksum": "0c1d139abdfa9fa10f26923abb4d6bda", + "id": "nmdc:0c1d139abdfa9fa10f26923abb4d6bda", + "file_size_bytes": 2330603 + }, + { + "name": "Gp0127627_Kraken classification TSV report", + "description": "Kraken classification TSV report for Gp0127627", + "data_object_type": "Kraken2 Taxonomic Classification", + "url": "https://data.microbiomedata.org/data/nmdc:mga0daby71/ReadbasedAnalysis/nmdc_mga0daby71_kraken2_classification.tsv", + "md5_checksum": "f388f7f0d79d0b2bbec1c3c0c5641814", + "id": "nmdc:f388f7f0d79d0b2bbec1c3c0c5641814", + "file_size_bytes": 1177609473 + }, + { + "name": "Gp0127627_Kraken2 TSV report", + "description": "Kraken2 TSV report for Gp0127627", + "data_object_type": "Kraken2 Classification Report", + "url": "https://data.microbiomedata.org/data/nmdc:mga0daby71/ReadbasedAnalysis/nmdc_mga0daby71_kraken2_report.tsv", + "md5_checksum": "a2a0029691c04851f4a98003a773fe3f", + "id": "nmdc:a2a0029691c04851f4a98003a773fe3f", + "file_size_bytes": 643281 + }, + { + "name": "Gp0127627_Kraken2 Krona HTML report", + "description": "Kraken2 Krona HTML report for Gp0127627", + "data_object_type": "Kraken2 Krona Plot", + "url": "https://data.microbiomedata.org/data/nmdc:mga0daby71/ReadbasedAnalysis/nmdc_mga0daby71_kraken2_krona.html", + "md5_checksum": "bab24ab64ad432d115f182df7198d46e", + "id": "nmdc:bab24ab64ad432d115f182df7198d46e", + "file_size_bytes": 3926756 + }, + { + "name": "Gp0127627_Gottcha2 TSV report", + "description": "Gottcha2 TSV report for Gp0127627", + "url": "https://data.microbiomedata.org/data/nmdc:mga0daby71/ReadbasedAnalysis/nmdc_mga0daby71_gottcha2_report.tsv", + "md5_checksum": "a5ac6665e5d66242b1c885a911236982", + "id": "nmdc:a5ac6665e5d66242b1c885a911236982", + "file_size_bytes": 5530 + }, + { + "name": "Gp0127627_Gottcha2 full TSV report", + "description": "Gottcha2 full TSV report for Gp0127627", + "url": "https://data.microbiomedata.org/data/nmdc:mga0daby71/ReadbasedAnalysis/nmdc_mga0daby71_gottcha2_report_full.tsv", + "md5_checksum": "d19478a191693d643157a89c69cc02d1", + "id": "nmdc:d19478a191693d643157a89c69cc02d1", + "file_size_bytes": 825047 + }, + { + "name": "Gp0127627_Gottcha2 Krona HTML report", + "description": "Gottcha2 Krona HTML report for Gp0127627", + "data_object_type": "GOTTCHA2 Krona Plot", + "url": "https://data.microbiomedata.org/data/nmdc:mga0daby71/ReadbasedAnalysis/nmdc_mga0daby71_gottcha2_krona.html", + "md5_checksum": "679a82699663e88a5e8828ee081fa967", + "id": "nmdc:679a82699663e88a5e8828ee081fa967", + "file_size_bytes": 241114 + }, + { + "name": "Gp0127627_Centrifuge classification TSV report", + "description": "Centrifuge classification TSV report for Gp0127627", + "data_object_type": "Centrifuge Taxonomic Classification", + "url": "https://data.microbiomedata.org/data/nmdc:mga0daby71/ReadbasedAnalysis/nmdc_mga0daby71_centrifuge_classification.tsv", + "md5_checksum": "95b3150e6fb62195c1e5ebf06f87c7d5", + "id": "nmdc:95b3150e6fb62195c1e5ebf06f87c7d5", + "file_size_bytes": 1463660267 + }, + { + "name": "Gp0127627_Centrifuge TSV report", + "description": "Centrifuge TSV report for Gp0127627", + "data_object_type": "Centrifuge Classification Report", + "url": "https://data.microbiomedata.org/data/nmdc:mga0daby71/ReadbasedAnalysis/nmdc_mga0daby71_centrifuge_report.tsv", + "md5_checksum": "0380e478962be82e0d97a6339f7f3b91", + "id": "nmdc:0380e478962be82e0d97a6339f7f3b91", + "file_size_bytes": 254347 + }, + { + "name": "Gp0127627_Centrifuge Krona HTML report", + "description": "Centrifuge Krona HTML report for Gp0127627", + "data_object_type": "Centrifuge Krona Plot", + "url": "https://data.microbiomedata.org/data/nmdc:mga0daby71/ReadbasedAnalysis/nmdc_mga0daby71_centrifuge_krona.html", + "md5_checksum": "0c1d139abdfa9fa10f26923abb4d6bda", + "id": "nmdc:0c1d139abdfa9fa10f26923abb4d6bda", + "file_size_bytes": 2330603 + }, + { + "name": "Gp0127627_Kraken classification TSV report", + "description": "Kraken classification TSV report for Gp0127627", + "data_object_type": "Kraken2 Taxonomic Classification", + "url": "https://data.microbiomedata.org/data/nmdc:mga0daby71/ReadbasedAnalysis/nmdc_mga0daby71_kraken2_classification.tsv", + "md5_checksum": "f388f7f0d79d0b2bbec1c3c0c5641814", + "id": "nmdc:f388f7f0d79d0b2bbec1c3c0c5641814", + "file_size_bytes": 1177609473 + }, + { + "name": "Gp0127627_Kraken2 TSV report", + "description": "Kraken2 TSV report for Gp0127627", + "data_object_type": "Kraken2 Classification Report", + "url": "https://data.microbiomedata.org/data/nmdc:mga0daby71/ReadbasedAnalysis/nmdc_mga0daby71_kraken2_report.tsv", + "md5_checksum": "a2a0029691c04851f4a98003a773fe3f", + "id": "nmdc:a2a0029691c04851f4a98003a773fe3f", + "file_size_bytes": 643281 + }, + { + "name": "Gp0127627_Kraken2 Krona HTML report", + "description": "Kraken2 Krona HTML report for Gp0127627", + "data_object_type": "Kraken2 Krona Plot", + "url": "https://data.microbiomedata.org/data/nmdc:mga0daby71/ReadbasedAnalysis/nmdc_mga0daby71_kraken2_krona.html", + "md5_checksum": "bab24ab64ad432d115f182df7198d46e", + "id": "nmdc:bab24ab64ad432d115f182df7198d46e", + "file_size_bytes": 3926756 + }, + { + "name": "Gp0127627_Assembled contigs fasta", + "description": "Assembled contigs fasta for Gp0127627", + "data_object_type": "Assembly Contigs", + "url": "https://data.microbiomedata.org/data/nmdc:mga0daby71/assembly/nmdc_mga0daby71_contigs.fna", + "md5_checksum": "a7db57faea894bec6603a69abfdfcf7d", + "id": "nmdc:a7db57faea894bec6603a69abfdfcf7d", + "file_size_bytes": 19853676 + }, + { + "name": "Gp0127627_Assembled scaffold fasta", + "description": "Assembled scaffold fasta for Gp0127627", + "data_object_type": "Assembly Scaffolds", + "url": "https://data.microbiomedata.org/data/nmdc:mga0daby71/assembly/nmdc_mga0daby71_scaffolds.fna", + "md5_checksum": "8e798fcdd761feff51cab6a9c97ed7ae", + "id": "nmdc:8e798fcdd761feff51cab6a9c97ed7ae", + "file_size_bytes": 19699986 + }, + { + "name": "Gp0127627_Metagenome Contig Coverage Stats", + "description": "Metagenome Contig Coverage Stats for Gp0127627", + "url": "https://data.microbiomedata.org/data/nmdc:mga0daby71/assembly/nmdc_mga0daby71_covstats.txt", + "md5_checksum": "0d3200307a90e23525d3fefa7a25f867", + "id": "nmdc:0d3200307a90e23525d3fefa7a25f867", + "file_size_bytes": 3997845 + }, + { + "name": "Gp0127627_Assembled AGP file", + "description": "Assembled AGP file for Gp0127627", + "data_object_type": "Assembly AGP", + "url": "https://data.microbiomedata.org/data/nmdc:mga0daby71/assembly/nmdc_mga0daby71_assembly.agp", + "md5_checksum": "e6e7f40bb1f1e333904f20dc3c317e37", + "id": "nmdc:e6e7f40bb1f1e333904f20dc3c317e37", + "file_size_bytes": 3715901 + }, + { + "name": "Gp0127627_Metagenome Alignment BAM file", + "description": "Metagenome Alignment BAM file for Gp0127627", + "data_object_type": "Assembly Coverage BAM", + "url": "https://data.microbiomedata.org/data/nmdc:mga0daby71/assembly/nmdc_mga0daby71_pairedMapped_sorted.bam", + "md5_checksum": "08f1ba3d3d380a167182c1beb7da304f", + "id": "nmdc:08f1ba3d3d380a167182c1beb7da304f", + "file_size_bytes": 1854522814 + }, + { + "name": "Gp0127627_Protein FAA", + "description": "Protein FAA for Gp0127627", + "data_object_type": "Annotation Amino Acid FASTA", + "url": "https://data.microbiomedata.org/data/nmdc:mga0daby71/annotation/nmdc_mga0daby71_proteins.faa", + "md5_checksum": "6003e73aa18ac6aa3cc0f7e020c7170e", + "id": "nmdc:6003e73aa18ac6aa3cc0f7e020c7170e", + "file_size_bytes": 12141650 + }, + { + "name": "Gp0127627_Structural annotation GFF file", + "description": "Structural annotation GFF file for Gp0127627", + "data_object_type": "Structural Annotation GFF", + "url": "https://data.microbiomedata.org/data/nmdc:mga0daby71/annotation/nmdc_mga0daby71_structural_annotation.gff", + "md5_checksum": "7e5852b8ca5590f81c543ea69398410f", + "id": "nmdc:7e5852b8ca5590f81c543ea69398410f", + "file_size_bytes": 8716031 }, + { + "name": "Gp0127627_Functional annotation GFF file", + "description": "Functional annotation GFF file for Gp0127627", + "data_object_type": "Functional Annotation GFF", + "url": "https://data.microbiomedata.org/data/nmdc:mga0daby71/annotation/nmdc_mga0daby71_functional_annotation.gff", + "md5_checksum": "cf868630ca2d9037e69e82cfb76a7bd7", + "id": "nmdc:cf868630ca2d9037e69e82cfb76a7bd7", + "file_size_bytes": 14995284 + }, + { + "name": "Gp0127627_KO TSV file", + "description": "KO TSV file for Gp0127627", + "data_object_type": "Annotation KEGG Orthology", + "url": "https://data.microbiomedata.org/data/nmdc:mga0daby71/annotation/nmdc_mga0daby71_ko.tsv", + "md5_checksum": "9e52b5a16f0eff5df36bd46038702a52", + "id": "nmdc:9e52b5a16f0eff5df36bd46038702a52", + "file_size_bytes": 1782540 + }, + { + "name": "Gp0127627_EC TSV file", + "description": "EC TSV file for Gp0127627", + "data_object_type": "Annotation Enzyme Commission", + "url": "https://data.microbiomedata.org/data/nmdc:mga0daby71/annotation/nmdc_mga0daby71_ec.tsv", + "md5_checksum": "c44dceb1684f1a4249e7b8e944a2b7cf", + "id": "nmdc:c44dceb1684f1a4249e7b8e944a2b7cf", + "file_size_bytes": 1180943 + }, + { + "name": "Gp0127627_COG GFF file", + "description": "COG GFF file for Gp0127627", + "url": "https://data.microbiomedata.org/data/nmdc:mga0daby71/annotation/nmdc_mga0daby71_cog.gff", + "md5_checksum": "4a788566d47b89e8bc79eea6e26f2c42", + "id": "nmdc:4a788566d47b89e8bc79eea6e26f2c42", + "file_size_bytes": 8144598 + }, + { + "name": "Gp0127627_PFAM GFF file", + "description": "PFAM GFF file for Gp0127627", + "url": "https://data.microbiomedata.org/data/nmdc:mga0daby71/annotation/nmdc_mga0daby71_pfam.gff", + "md5_checksum": "3d01f11a480f59cefdc67e7b6c7f9fc6", + "id": "nmdc:3d01f11a480f59cefdc67e7b6c7f9fc6", + "file_size_bytes": 5854816 + }, + { + "name": "Gp0127627_TigrFam GFF file", + "description": "TigrFam GFF file for Gp0127627", + "url": "https://data.microbiomedata.org/data/nmdc:mga0daby71/annotation/nmdc_mga0daby71_tigrfam.gff", + "md5_checksum": "196a8e27999a32a6168d23f30d84f37b", + "id": "nmdc:196a8e27999a32a6168d23f30d84f37b", + "file_size_bytes": 549612 + }, + { + "name": "Gp0127627_SMART GFF file", + "description": "SMART GFF file for Gp0127627", + "url": "https://data.microbiomedata.org/data/nmdc:mga0daby71/annotation/nmdc_mga0daby71_smart.gff", + "md5_checksum": "c3040fe67c2c8b2924c6db6c53b268ce", + "id": "nmdc:c3040fe67c2c8b2924c6db6c53b268ce", + "file_size_bytes": 1739035 + }, + { + "name": "Gp0127627_SuperFam GFF file", + "description": "SuperFam GFF file for Gp0127627", + "url": "https://data.microbiomedata.org/data/nmdc:mga0daby71/annotation/nmdc_mga0daby71_supfam.gff", + "md5_checksum": "5594ce118ad4b2f9ec03adc10ebb6267", + "id": "nmdc:5594ce118ad4b2f9ec03adc10ebb6267", + "file_size_bytes": 10326655 + }, + { + "name": "Gp0127627_Cath FunFam GFF file", + "description": "Cath FunFam GFF file for Gp0127627", + "url": "https://data.microbiomedata.org/data/nmdc:mga0daby71/annotation/nmdc_mga0daby71_cath_funfam.gff", + "md5_checksum": "9d2ac6550f5a1dc3d4b3743e8fe2ceec", + "id": "nmdc:9d2ac6550f5a1dc3d4b3743e8fe2ceec", + "file_size_bytes": 7571959 + }, + { + "name": "Gp0127627_KO_EC GFF file", + "description": "KO_EC GFF file for Gp0127627", + "url": "https://data.microbiomedata.org/data/nmdc:mga0daby71/annotation/nmdc_mga0daby71_ko_ec.gff", + "md5_checksum": "2c73a261047ff94b898c190418373075", + "id": "nmdc:2c73a261047ff94b898c190418373075", + "file_size_bytes": 5683569 + }, + { + "name": "gold:Gp0452679_metabat2 bin checkm quality assessment result", + "description": "metabat2 bin checkm quality assessment result for gold:Gp0452679", + "data_object_type": "CheckM Statistics", + "url": "https://data.microbiomedata.org/data/nmdc:mga0fsjy84/MAGs/nmdc_mga0fsjy84_checkm_qa.out", + "md5_checksum": "d41d8cd98f00b204e9800998ecf8427e", + "id": "nmdc:d41d8cd98f00b204e9800998ecf8427e", + "file_size_bytes": 0 + }, + { + "name": "Gp0127627_high-quality and medium-quality bins", + "description": "high-quality and medium-quality bins for Gp0127627", + "data_object_type": "Metagenome Bins", + "url": "https://data.microbiomedata.org/data/nmdc:mga0daby71/MAGs/nmdc_mga0daby71_hqmq_bin.zip", + "md5_checksum": "ba468a2c4f4810d87ba95ad9e123483d", + "id": "nmdc:ba468a2c4f4810d87ba95ad9e123483d", + "file_size_bytes": 182 + } + ], + "dissolving_activity_set": [], + "functional_annotation_set": [], + "genome_feature_set": [], + "mags_activity_set": [ { "_id": { - "$oid": "649b005f2ca5ee4adb139f91" + "$oid": "649b0052ec087f6bbab3470b" }, "has_input": [ - "nmdc:e4f5675c728fd1896682eb669656b5d6" + "nmdc:a7db57faea894bec6603a69abfdfcf7d", + "nmdc:08f1ba3d3d380a167182c1beb7da304f", + "nmdc:cf868630ca2d9037e69e82cfb76a7bd7" ], + "too_short_contig_num": 50792, "part_of": [ - "nmdc:mga02tph39" + "nmdc:mga0daby71" ], - "ctg_logsum": 36469, - "scaf_logsum": 36615, - "gap_pct": 0.00062, "git_url": "https://github.com/microbiomedata/metaG/releases/tag/0.1", "has_output": [ - "nmdc:36692b7b93756aaabd7f1f6259753c4e", - "nmdc:8d02adf1319d5b95c2abc6ed5b5c1683", - "nmdc:9830a711accd3a5ed899a2e616d0f4bf", - "nmdc:481fbd8cdeacd71e54a45c78d5decb36", - "nmdc:a24edc9ffd773c30cea8ea709988307a" + "nmdc:d41d8cd98f00b204e9800998ecf8427e", + "nmdc:ba468a2c4f4810d87ba95ad9e123483d" ], - "asm_score": 3.618, - "was_informed_by": "gold:Gp0127636", - "ctg_powsum": 3976.058, - "scaf_max": 23067, - "id": "nmdc:3a0bb3c0ec1ec60e0487cb98f7f19a90", - "scaf_powsum": 3993.143, + "was_informed_by": "gold:Gp0127627", + "input_contig_num": 51188, + "id": "nmdc:ecaab4b51de694b35269756fd7e31ed9", "execution_resource": "NERSC-Cori", - "contigs": 95606, - "name": "Assembly Activity for nmdc:mga02tph39", - "ctg_max": 23067, - "gc_std": 0.11099, - "gc_avg": 0.57474, - "contig_bp": 35573088, - "started_at_time": "2021-10-11T02:23:42Z", - "scaf_bp": 35573308, - "type": "nmdc:MetagenomeAssembly", - "scaffolds": 95584, - "ended_at_time": "2021-11-13T18:49:37+00:00", - "ctg_l50": 329, - "ctg_l90": 282, - "ctg_n50": 35238, - "ctg_n90": 83377, - "scaf_l50": 329, - "scaf_l90": 282, - "scaf_n50": 35220, - "scaf_n90": 83355, - "output_data_objects": [ - { - "name": "Gp0127636_Assembled contigs fasta", - "description": "Assembled contigs fasta for Gp0127636", - "data_object_type": "Assembly Contigs", - "url": "https://data.microbiomedata.org/data/nmdc:mga02tph39/assembly/nmdc_mga02tph39_contigs.fna", - "md5_checksum": "36692b7b93756aaabd7f1f6259753c4e", - "id": "nmdc:36692b7b93756aaabd7f1f6259753c4e", - "file_size_bytes": 39062008 - }, - { - "name": "Gp0127636_Assembled scaffold fasta", - "description": "Assembled scaffold fasta for Gp0127636", - "data_object_type": "Assembly Scaffolds", - "url": "https://data.microbiomedata.org/data/nmdc:mga02tph39/assembly/nmdc_mga02tph39_scaffolds.fna", - "md5_checksum": "8d02adf1319d5b95c2abc6ed5b5c1683", - "id": "nmdc:8d02adf1319d5b95c2abc6ed5b5c1683", - "file_size_bytes": 38774844 - }, - { - "name": "Gp0127636_Metagenome Contig Coverage Stats", - "description": "Metagenome Contig Coverage Stats for Gp0127636", - "url": "https://data.microbiomedata.org/data/nmdc:mga02tph39/assembly/nmdc_mga02tph39_covstats.txt", - "md5_checksum": "9830a711accd3a5ed899a2e616d0f4bf", - "id": "nmdc:9830a711accd3a5ed899a2e616d0f4bf", - "file_size_bytes": 7495949 - }, - { - "name": "Gp0127636_Assembled AGP file", - "description": "Assembled AGP file for Gp0127636", - "data_object_type": "Assembly AGP", - "url": "https://data.microbiomedata.org/data/nmdc:mga02tph39/assembly/nmdc_mga02tph39_assembly.agp", - "md5_checksum": "481fbd8cdeacd71e54a45c78d5decb36", - "id": "nmdc:481fbd8cdeacd71e54a45c78d5decb36", - "file_size_bytes": 6962527 - }, - { - "name": "Gp0127636_Metagenome Alignment BAM file", - "description": "Metagenome Alignment BAM file for Gp0127636", - "data_object_type": "Assembly Coverage BAM", - "url": "https://data.microbiomedata.org/data/nmdc:mga02tph39/assembly/nmdc_mga02tph39_pairedMapped_sorted.bam", - "md5_checksum": "a24edc9ffd773c30cea8ea709988307a", - "id": "nmdc:a24edc9ffd773c30cea8ea709988307a", - "file_size_bytes": 2624769069 - } - ] - }, + "name": "MAGs Analysis Activity for nmdc:mga0daby71", + "mags_list": [], + "unbinned_contig_num": 396, + "started_at_time": "2021-11-13T18:47:34Z", + "type": "nmdc:MAGsAnalysisActivity", + "ended_at_time": "2021-11-13T19:08:49+00:00" + } + ], + "material_sample_set": [], + "material_sampling_activity_set": [], + "metabolomics_analysis_activity_set": [], + "metagenome_annotation_activity_set": [ { "_id": { - "$oid": "649b005bbf2caae0415ef9ac" + "$oid": "649b005bbf2caae0415ef9ab" }, "has_input": [ - "nmdc:36692b7b93756aaabd7f1f6259753c4e" + "nmdc:a7db57faea894bec6603a69abfdfcf7d" ], "part_of": [ - "nmdc:mga02tph39" + "nmdc:mga0daby71" ], "git_url": "https://github.com/microbiomedata/metaG/releases/tag/0.1", "has_output": [ - "nmdc:a5d97f323fe7117cb38a2eea1f2246d2", - "nmdc:2b791fb3e2964d7808388b32086e0de2", - "nmdc:f61ed86592491b2d83b5893749e12406", - "nmdc:e983789bdc08364b00a000684062ed16", - "nmdc:3cd47d66b6e9006ff683a2eda168285f", - "nmdc:e056ee666e8001bdb6f790efb3394093", - "nmdc:2b90fcb7628c3ffa9e7a14a32612b7af", - "nmdc:4e2f1d4b2d20bfb0209a320a60c4aeac", - "nmdc:dd24a8b0f774555ac91e663416745428", - "nmdc:2e76b71475b854e2bf2d0aa15a53dd7d", - "nmdc:2f297176cd51b2ede33c313f713b40b1", - "nmdc:678a7af05a89d9d4f5f5d598dc2e3013" + "nmdc:6003e73aa18ac6aa3cc0f7e020c7170e", + "nmdc:7e5852b8ca5590f81c543ea69398410f", + "nmdc:cf868630ca2d9037e69e82cfb76a7bd7", + "nmdc:9e52b5a16f0eff5df36bd46038702a52", + "nmdc:c44dceb1684f1a4249e7b8e944a2b7cf", + "nmdc:4a788566d47b89e8bc79eea6e26f2c42", + "nmdc:3d01f11a480f59cefdc67e7b6c7f9fc6", + "nmdc:196a8e27999a32a6168d23f30d84f37b", + "nmdc:c3040fe67c2c8b2924c6db6c53b268ce", + "nmdc:5594ce118ad4b2f9ec03adc10ebb6267", + "nmdc:9d2ac6550f5a1dc3d4b3743e8fe2ceec", + "nmdc:2c73a261047ff94b898c190418373075" ], - "was_informed_by": "gold:Gp0127636", - "id": "nmdc:3a0bb3c0ec1ec60e0487cb98f7f19a90", + "was_informed_by": "gold:Gp0127627", + "id": "nmdc:ecaab4b51de694b35269756fd7e31ed9", "execution_resource": "NERSC-Cori", - "name": "Annotation Activity for nmdc:mga02tph39", - "started_at_time": "2021-10-11T02:23:42Z", + "name": "Annotation Activity for nmdc:mga0daby71", + "started_at_time": "2021-11-13T18:47:34Z", "type": "nmdc:MetagenomeAnnotationActivity", - "ended_at_time": "2021-11-13T18:49:37+00:00", - "output_data_objects": [ - { - "name": "Gp0127636_Protein FAA", - "description": "Protein FAA for Gp0127636", - "data_object_type": "Annotation Amino Acid FASTA", - "url": "https://data.microbiomedata.org/data/nmdc:mga02tph39/annotation/nmdc_mga02tph39_proteins.faa", - "md5_checksum": "a5d97f323fe7117cb38a2eea1f2246d2", - "id": "nmdc:a5d97f323fe7117cb38a2eea1f2246d2", - "file_size_bytes": 23469553 - }, - { - "name": "Gp0127636_Structural annotation GFF file", - "description": "Structural annotation GFF file for Gp0127636", - "data_object_type": "Structural Annotation GFF", - "url": "https://data.microbiomedata.org/data/nmdc:mga02tph39/annotation/nmdc_mga02tph39_structural_annotation.gff", - "md5_checksum": "2b791fb3e2964d7808388b32086e0de2", - "id": "nmdc:2b791fb3e2964d7808388b32086e0de2", - "file_size_bytes": 16532352 - }, - { - "name": "Gp0127636_Functional annotation GFF file", - "description": "Functional annotation GFF file for Gp0127636", - "data_object_type": "Functional Annotation GFF", - "url": "https://data.microbiomedata.org/data/nmdc:mga02tph39/annotation/nmdc_mga02tph39_functional_annotation.gff", - "md5_checksum": "f61ed86592491b2d83b5893749e12406", - "id": "nmdc:f61ed86592491b2d83b5893749e12406", - "file_size_bytes": 28432426 - }, - { - "name": "Gp0127636_KO TSV file", - "description": "KO TSV file for Gp0127636", - "data_object_type": "Annotation KEGG Orthology", - "url": "https://data.microbiomedata.org/data/nmdc:mga02tph39/annotation/nmdc_mga02tph39_ko.tsv", - "md5_checksum": "e983789bdc08364b00a000684062ed16", - "id": "nmdc:e983789bdc08364b00a000684062ed16", - "file_size_bytes": 3189682 - }, - { - "name": "Gp0127636_EC TSV file", - "description": "EC TSV file for Gp0127636", - "data_object_type": "Annotation Enzyme Commission", - "url": "https://data.microbiomedata.org/data/nmdc:mga02tph39/annotation/nmdc_mga02tph39_ec.tsv", - "md5_checksum": "3cd47d66b6e9006ff683a2eda168285f", - "id": "nmdc:3cd47d66b6e9006ff683a2eda168285f", - "file_size_bytes": 2100535 - }, - { - "name": "Gp0127636_COG GFF file", - "description": "COG GFF file for Gp0127636", - "url": "https://data.microbiomedata.org/data/nmdc:mga02tph39/annotation/nmdc_mga02tph39_cog.gff", - "md5_checksum": "e056ee666e8001bdb6f790efb3394093", - "id": "nmdc:e056ee666e8001bdb6f790efb3394093", - "file_size_bytes": 15585690 - }, - { - "name": "Gp0127636_PFAM GFF file", - "description": "PFAM GFF file for Gp0127636", - "url": "https://data.microbiomedata.org/data/nmdc:mga02tph39/annotation/nmdc_mga02tph39_pfam.gff", - "md5_checksum": "2b90fcb7628c3ffa9e7a14a32612b7af", - "id": "nmdc:2b90fcb7628c3ffa9e7a14a32612b7af", - "file_size_bytes": 11182350 - }, - { - "name": "Gp0127636_TigrFam GFF file", - "description": "TigrFam GFF file for Gp0127636", - "url": "https://data.microbiomedata.org/data/nmdc:mga02tph39/annotation/nmdc_mga02tph39_tigrfam.gff", - "md5_checksum": "4e2f1d4b2d20bfb0209a320a60c4aeac", - "id": "nmdc:4e2f1d4b2d20bfb0209a320a60c4aeac", - "file_size_bytes": 995758 - }, - { - "name": "Gp0127636_SMART GFF file", - "description": "SMART GFF file for Gp0127636", - "url": "https://data.microbiomedata.org/data/nmdc:mga02tph39/annotation/nmdc_mga02tph39_smart.gff", - "md5_checksum": "dd24a8b0f774555ac91e663416745428", - "id": "nmdc:dd24a8b0f774555ac91e663416745428", - "file_size_bytes": 3256325 - }, - { - "name": "Gp0127636_SuperFam GFF file", - "description": "SuperFam GFF file for Gp0127636", - "url": "https://data.microbiomedata.org/data/nmdc:mga02tph39/annotation/nmdc_mga02tph39_supfam.gff", - "md5_checksum": "2e76b71475b854e2bf2d0aa15a53dd7d", - "id": "nmdc:2e76b71475b854e2bf2d0aa15a53dd7d", - "file_size_bytes": 19666317 - }, - { - "name": "Gp0127636_Cath FunFam GFF file", - "description": "Cath FunFam GFF file for Gp0127636", - "url": "https://data.microbiomedata.org/data/nmdc:mga02tph39/annotation/nmdc_mga02tph39_cath_funfam.gff", - "md5_checksum": "2f297176cd51b2ede33c313f713b40b1", - "id": "nmdc:2f297176cd51b2ede33c313f713b40b1", - "file_size_bytes": 14458019 - }, - { - "name": "Gp0127636_KO_EC GFF file", - "description": "KO_EC GFF file for Gp0127636", - "url": "https://data.microbiomedata.org/data/nmdc:mga02tph39/annotation/nmdc_mga02tph39_ko_ec.gff", - "md5_checksum": "678a7af05a89d9d4f5f5d598dc2e3013", - "id": "nmdc:678a7af05a89d9d4f5f5d598dc2e3013", - "file_size_bytes": 10187098 - } - ] - }, + "ended_at_time": "2021-11-13T19:08:49+00:00" + } + ], + "metagenome_assembly_set": [ { "_id": { - "$oid": "649b0052ec087f6bbab34709" + "$oid": "649b005f2ca5ee4adb139f99" }, "has_input": [ - "nmdc:36692b7b93756aaabd7f1f6259753c4e", - "nmdc:a24edc9ffd773c30cea8ea709988307a", - "nmdc:f61ed86592491b2d83b5893749e12406" + "nmdc:ed95796b3fd964c6bedb141d70737ebf" ], - "too_short_contig_num": 93687, "part_of": [ - "nmdc:mga02tph39" + "nmdc:mga0daby71" ], + "ctg_logsum": 6346.305, + "scaf_logsum": 6368.36, + "gap_pct": 0.00044, "git_url": "https://github.com/microbiomedata/metaG/releases/tag/0.1", "has_output": [ - "nmdc:d41d8cd98f00b204e9800998ecf8427e", - "nmdc:2d1e318b8b815a8a5487f23315d0fe02" + "nmdc:a7db57faea894bec6603a69abfdfcf7d", + "nmdc:8e798fcdd761feff51cab6a9c97ed7ae", + "nmdc:0d3200307a90e23525d3fefa7a25f867", + "nmdc:e6e7f40bb1f1e333904f20dc3c317e37", + "nmdc:08f1ba3d3d380a167182c1beb7da304f" ], - "was_informed_by": "gold:Gp0127636", - "input_contig_num": 95606, - "id": "nmdc:3a0bb3c0ec1ec60e0487cb98f7f19a90", + "asm_score": 4.807, + "was_informed_by": "gold:Gp0127627", + "ctg_powsum": 681.483, + "scaf_max": 15604, + "id": "nmdc:ecaab4b51de694b35269756fd7e31ed9", + "scaf_powsum": 683.717, "execution_resource": "NERSC-Cori", - "name": "MAGs Analysis Activity for nmdc:mga02tph39", - "mags_list": [], - "unbinned_contig_num": 1919, - "started_at_time": "2021-10-11T02:23:42Z", - "type": "nmdc:MAGsAnalysisActivity", - "ended_at_time": "2021-11-13T18:49:37+00:00", - "output_data_objects": [ - { - "name": "gold:Gp0452679_metabat2 bin checkm quality assessment result", - "description": "metabat2 bin checkm quality assessment result for gold:Gp0452679", - "data_object_type": "CheckM Statistics", - "url": "https://data.microbiomedata.org/data/nmdc:mga0fsjy84/MAGs/nmdc_mga0fsjy84_checkm_qa.out", - "md5_checksum": "d41d8cd98f00b204e9800998ecf8427e", - "id": "nmdc:d41d8cd98f00b204e9800998ecf8427e", - "file_size_bytes": 0 - }, - { - "name": "Gp0127636_high-quality and medium-quality bins", - "description": "high-quality and medium-quality bins for Gp0127636", - "data_object_type": "Metagenome Bins", - "url": "https://data.microbiomedata.org/data/nmdc:mga02tph39/MAGs/nmdc_mga02tph39_hqmq_bin.zip", - "md5_checksum": "2d1e318b8b815a8a5487f23315d0fe02", - "id": "nmdc:2d1e318b8b815a8a5487f23315d0fe02", - "file_size_bytes": 182 - } - ] + "contigs": 51188, + "name": "Assembly Activity for nmdc:mga0daby71", + "ctg_max": 15604, + "gc_std": 0.11462, + "gc_avg": 0.57328, + "contig_bp": 18008171, + "started_at_time": "2021-11-13T18:47:34Z", + "scaf_bp": 18008251, + "type": "nmdc:MetagenomeAssembly", + "scaffolds": 51180, + "ended_at_time": "2021-11-13T19:08:49+00:00", + "ctg_l50": 321, + "ctg_l90": 282, + "ctg_n50": 20415, + "ctg_n90": 44756, + "scaf_l50": 321, + "scaf_l90": 282, + "scaf_n50": 20413, + "scaf_n90": 44748 } - ] - }, - { - "_id": { - "$oid": "649b009773e8249959349b46" - }, - "id": "nmdc:omprc-11-mbv2jc69", - "name": "Riverbed sediment microbial communities from areas with no vegetation in Columbia River, Washington, USA - GW-RW N2_50_60", - "description": "Riverbed sediment samples were collected from an area with no vegetation in a hyporheic zone (subsurface groundwater - surface water mixing zone)", - "has_input": [ - "nmdc:bsm-11-jdgzjq31" - ], - "has_output": [ - "jgi:574fe09c7ded5e3df1ee1487" - ], - "part_of": [ - "nmdc:sty-11-aygzgv51" - ], - "add_date": "2016-01-11", - "mod_date": "2021-06-15", - "ncbi_project_name": "Riverbed sediment microbial communities from areas with no vegetation in Columbia River, Washington, USA - GW-RW N2_50_60", - "omics_type": { - "has_raw_value": "Metagenome" - }, - "principal_investigator": { - "has_raw_value": "James Stegen" - }, - "processing_institution": "JGI", - "type": "nmdc:OmicsProcessing", - "gold_sequencing_project_identifiers": [ - "gold:Gp0127634" - ], - "downstream_workflow_activity_records": [ + ], + "metagenome_sequencing_activity_set": [], + "metaproteomics_analysis_activity_set": [], + "metatranscriptome_activity_set": [], + "nom_analysis_activity_set": [], + "omics_processing_set": [ { "_id": { - "$oid": "649b009d6bdd4fd20273c86a" + "$oid": "649b009773e8249959349b43" }, + "id": "nmdc:omprc-11-z5qv0f24", + "name": "Riverbed sediment microbial communities from areas with vegetation nearby in Columbia River, Washington, USA - GW-RW S1_40_50", + "description": "Riverbed sediment samples were collected from an area with a lot of surrounding vegetation in a hyporheic zone (subsurface groundwater - surface water mixing zone)", "has_input": [ - "nmdc:2b7712d32a159eca66fc50936de000a5" + "nmdc:bsm-11-v0q5ak63" ], - "part_of": [ - "nmdc:mga0r0vf18" - ], - "git_url": "https://github.com/microbiomedata/metaG/releases/tag/0.1", "has_output": [ - "nmdc:ac889627d813c8e34cfbf79a4264c590", - "nmdc:0dfd55be1779ae7922d80aa22034c9a1" - ], - "was_informed_by": "gold:Gp0127634", - "input_read_count": 29872658, - "output_read_bases": 4172764161, - "id": "nmdc:d6415e0c3e4f9b8702c1ae98c6fb2f48", - "execution_resource": "NERSC-Cori", - "input_read_bases": 4510771358, - "name": "Read QC Activity for nmdc:mga0r0vf18", - "output_read_count": 27896694, - "started_at_time": "2021-10-11T02:23:30Z", - "type": "nmdc:ReadQCAnalysisActivity", - "ended_at_time": "2021-10-11T04:49:55+00:00", - "output_data_objects": [ - { - "name": "Gp0127634_Filtered Reads", - "description": "Filtered Reads for Gp0127634", - "data_object_type": "Filtered Sequencing Reads", - "url": "https://data.microbiomedata.org/data/nmdc:mga0r0vf18/qa/nmdc_mga0r0vf18_filtered.fastq.gz", - "md5_checksum": "ac889627d813c8e34cfbf79a4264c590", - "id": "nmdc:ac889627d813c8e34cfbf79a4264c590", - "file_size_bytes": 2316462404 - }, - { - "name": "Gp0127634_Filtered Stats", - "description": "Filtered Stats for Gp0127634", - "data_object_type": "QC Statistics", - "url": "https://data.microbiomedata.org/data/nmdc:mga0r0vf18/qa/nmdc_mga0r0vf18_filterStats.txt", - "md5_checksum": "0dfd55be1779ae7922d80aa22034c9a1", - "id": "nmdc:0dfd55be1779ae7922d80aa22034c9a1", - "file_size_bytes": 291 - } - ] - }, - { - "_id": { - "$oid": "649b009bff710ae353f8cf2d" - }, - "has_input": [ - "nmdc:ac889627d813c8e34cfbf79a4264c590" + "jgi:574fe0967ded5e3df1ee1482" ], - "git_url": "https://github.com/microbiomedata/metaG/releases/tag/0.1", - "has_output": [ - "nmdc:0526ea84f6e7893f5b6d62a32f81a199", - "nmdc:1a7380f5adb59f36c98c840bf28ad4bd", - "nmdc:366ab38bb6de9591f31a086d42ac23d6", - "nmdc:c44ba44bc6910c2f3ed3a60a52b4a616", - "nmdc:0ca043b630ba304cb80603e8332c78cf", - "nmdc:059ff39ced52c0df45a331c4e9e10fdd", - "nmdc:7bfa3b5b29ec5cf9882251585d99f9bf", - "nmdc:2fceef0aaf1c3c3e3edd8d69bb72c8d3", - "nmdc:678e7c401a6971629f7d3ada83b307ab" + "part_of": [ + "nmdc:sty-11-aygzgv51" ], - "was_informed_by": "gold:Gp0127634", - "id": "nmdc:d6415e0c3e4f9b8702c1ae98c6fb2f48", - "execution_resource": "NERSC-Cori", - "name": "ReadBased Analysis Activity for nmdc:mga0r0vf18", - "started_at_time": "2021-10-11T02:23:30Z", - "type": "nmdc:ReadbasedAnalysis", - "ended_at_time": "2021-10-11T04:49:55+00:00", - "output_data_objects": [ - { - "name": "Gp0127634_Gottcha2 TSV report", - "description": "Gottcha2 TSV report for Gp0127634", - "url": "https://data.microbiomedata.org/data/nmdc:mga0r0vf18/ReadbasedAnalysis/nmdc_mga0r0vf18_gottcha2_report.tsv", - "md5_checksum": "0526ea84f6e7893f5b6d62a32f81a199", - "id": "nmdc:0526ea84f6e7893f5b6d62a32f81a199", - "file_size_bytes": 4224 - }, - { - "name": "Gp0127634_Gottcha2 full TSV report", - "description": "Gottcha2 full TSV report for Gp0127634", - "url": "https://data.microbiomedata.org/data/nmdc:mga0r0vf18/ReadbasedAnalysis/nmdc_mga0r0vf18_gottcha2_report_full.tsv", - "md5_checksum": "1a7380f5adb59f36c98c840bf28ad4bd", - "id": "nmdc:1a7380f5adb59f36c98c840bf28ad4bd", - "file_size_bytes": 875501 - }, - { - "name": "Gp0127634_Gottcha2 Krona HTML report", - "description": "Gottcha2 Krona HTML report for Gp0127634", - "data_object_type": "GOTTCHA2 Krona Plot", - "url": "https://data.microbiomedata.org/data/nmdc:mga0r0vf18/ReadbasedAnalysis/nmdc_mga0r0vf18_gottcha2_krona.html", - "md5_checksum": "366ab38bb6de9591f31a086d42ac23d6", - "id": "nmdc:366ab38bb6de9591f31a086d42ac23d6", - "file_size_bytes": 238755 - }, - { - "name": "Gp0127634_Centrifuge classification TSV report", - "description": "Centrifuge classification TSV report for Gp0127634", - "data_object_type": "Centrifuge Taxonomic Classification", - "url": "https://data.microbiomedata.org/data/nmdc:mga0r0vf18/ReadbasedAnalysis/nmdc_mga0r0vf18_centrifuge_classification.tsv", - "md5_checksum": "c44ba44bc6910c2f3ed3a60a52b4a616", - "id": "nmdc:c44ba44bc6910c2f3ed3a60a52b4a616", - "file_size_bytes": 2051793471 - }, - { - "name": "Gp0127634_Centrifuge TSV report", - "description": "Centrifuge TSV report for Gp0127634", - "data_object_type": "Centrifuge Classification Report", - "url": "https://data.microbiomedata.org/data/nmdc:mga0r0vf18/ReadbasedAnalysis/nmdc_mga0r0vf18_centrifuge_report.tsv", - "md5_checksum": "0ca043b630ba304cb80603e8332c78cf", - "id": "nmdc:0ca043b630ba304cb80603e8332c78cf", - "file_size_bytes": 256560 - }, - { - "name": "Gp0127634_Centrifuge Krona HTML report", - "description": "Centrifuge Krona HTML report for Gp0127634", - "data_object_type": "Centrifuge Krona Plot", - "url": "https://data.microbiomedata.org/data/nmdc:mga0r0vf18/ReadbasedAnalysis/nmdc_mga0r0vf18_centrifuge_krona.html", - "md5_checksum": "059ff39ced52c0df45a331c4e9e10fdd", - "id": "nmdc:059ff39ced52c0df45a331c4e9e10fdd", - "file_size_bytes": 2334325 - }, - { - "name": "Gp0127634_Kraken classification TSV report", - "description": "Kraken classification TSV report for Gp0127634", - "data_object_type": "Kraken2 Taxonomic Classification", - "url": "https://data.microbiomedata.org/data/nmdc:mga0r0vf18/ReadbasedAnalysis/nmdc_mga0r0vf18_kraken2_classification.tsv", - "md5_checksum": "7bfa3b5b29ec5cf9882251585d99f9bf", - "id": "nmdc:7bfa3b5b29ec5cf9882251585d99f9bf", - "file_size_bytes": 1649071235 - }, - { - "name": "Gp0127634_Kraken2 TSV report", - "description": "Kraken2 TSV report for Gp0127634", - "data_object_type": "Kraken2 Classification Report", - "url": "https://data.microbiomedata.org/data/nmdc:mga0r0vf18/ReadbasedAnalysis/nmdc_mga0r0vf18_kraken2_report.tsv", - "md5_checksum": "2fceef0aaf1c3c3e3edd8d69bb72c8d3", - "id": "nmdc:2fceef0aaf1c3c3e3edd8d69bb72c8d3", - "file_size_bytes": 654782 - }, - { - "name": "Gp0127634_Kraken2 Krona HTML report", - "description": "Kraken2 Krona HTML report for Gp0127634", - "data_object_type": "Kraken2 Krona Plot", - "url": "https://data.microbiomedata.org/data/nmdc:mga0r0vf18/ReadbasedAnalysis/nmdc_mga0r0vf18_kraken2_krona.html", - "md5_checksum": "678e7c401a6971629f7d3ada83b307ab", - "id": "nmdc:678e7c401a6971629f7d3ada83b307ab", - "file_size_bytes": 3988988 - } + "add_date": "2016-01-11", + "mod_date": "2021-06-15", + "ncbi_project_name": "Riverbed sediment microbial communities from areas with vegetation nearby in Columbia River, Washington, USA - GW-RW S1_40_50", + "omics_type": { + "has_raw_value": "Metagenome" + }, + "principal_investigator": { + "has_raw_value": "James Stegen" + }, + "processing_institution": "JGI", + "type": "nmdc:OmicsProcessing", + "gold_sequencing_project_identifiers": [ + "gold:Gp0127627" ] - }, + } + ], + "reaction_activity_set": [], + "read_qc_analysis_activity_set": [ { "_id": { - "$oid": "61e71979833bcf838a700840" + "$oid": "649b009d6bdd4fd20273c866" }, "has_input": [ - "nmdc:ac889627d813c8e34cfbf79a4264c590" + "nmdc:45f15cded08bad75a2ef9d7e4b1f42de" ], "part_of": [ - "nmdc:mga0r0vf18" + "nmdc:mga0daby71" ], "git_url": "https://github.com/microbiomedata/metaG/releases/tag/0.1", "has_output": [ - "nmdc:0526ea84f6e7893f5b6d62a32f81a199", - "nmdc:1a7380f5adb59f36c98c840bf28ad4bd", - "nmdc:366ab38bb6de9591f31a086d42ac23d6", - "nmdc:c44ba44bc6910c2f3ed3a60a52b4a616", - "nmdc:0ca043b630ba304cb80603e8332c78cf", - "nmdc:059ff39ced52c0df45a331c4e9e10fdd", - "nmdc:7bfa3b5b29ec5cf9882251585d99f9bf", - "nmdc:2fceef0aaf1c3c3e3edd8d69bb72c8d3", - "nmdc:678e7c401a6971629f7d3ada83b307ab" + "nmdc:ed95796b3fd964c6bedb141d70737ebf", + "nmdc:308ae373809697291bbc7947a1e4ed2d" ], - "was_informed_by": "gold:Gp0127634", - "id": "nmdc:d6415e0c3e4f9b8702c1ae98c6fb2f48", + "was_informed_by": "gold:Gp0127627", + "input_read_count": 20505370, + "output_read_bases": 2992084693, + "id": "nmdc:ecaab4b51de694b35269756fd7e31ed9", "execution_resource": "NERSC-Cori", - "name": "ReadBased Analysis Activity for nmdc:mga0r0vf18", - "started_at_time": "2021-10-11T02:23:30Z", - "type": "nmdc:ReadbasedAnalysis", - "ended_at_time": "2021-10-11T04:49:55+00:00", - "output_data_objects": [ - { - "name": "Gp0127634_Gottcha2 TSV report", - "description": "Gottcha2 TSV report for Gp0127634", - "url": "https://data.microbiomedata.org/data/nmdc:mga0r0vf18/ReadbasedAnalysis/nmdc_mga0r0vf18_gottcha2_report.tsv", - "md5_checksum": "0526ea84f6e7893f5b6d62a32f81a199", - "id": "nmdc:0526ea84f6e7893f5b6d62a32f81a199", - "file_size_bytes": 4224 - }, - { - "name": "Gp0127634_Gottcha2 full TSV report", - "description": "Gottcha2 full TSV report for Gp0127634", - "url": "https://data.microbiomedata.org/data/nmdc:mga0r0vf18/ReadbasedAnalysis/nmdc_mga0r0vf18_gottcha2_report_full.tsv", - "md5_checksum": "1a7380f5adb59f36c98c840bf28ad4bd", - "id": "nmdc:1a7380f5adb59f36c98c840bf28ad4bd", - "file_size_bytes": 875501 - }, - { - "name": "Gp0127634_Gottcha2 Krona HTML report", - "description": "Gottcha2 Krona HTML report for Gp0127634", - "data_object_type": "GOTTCHA2 Krona Plot", - "url": "https://data.microbiomedata.org/data/nmdc:mga0r0vf18/ReadbasedAnalysis/nmdc_mga0r0vf18_gottcha2_krona.html", - "md5_checksum": "366ab38bb6de9591f31a086d42ac23d6", - "id": "nmdc:366ab38bb6de9591f31a086d42ac23d6", - "file_size_bytes": 238755 - }, - { - "name": "Gp0127634_Centrifuge classification TSV report", - "description": "Centrifuge classification TSV report for Gp0127634", - "data_object_type": "Centrifuge Taxonomic Classification", - "url": "https://data.microbiomedata.org/data/nmdc:mga0r0vf18/ReadbasedAnalysis/nmdc_mga0r0vf18_centrifuge_classification.tsv", - "md5_checksum": "c44ba44bc6910c2f3ed3a60a52b4a616", - "id": "nmdc:c44ba44bc6910c2f3ed3a60a52b4a616", - "file_size_bytes": 2051793471 - }, - { - "name": "Gp0127634_Centrifuge TSV report", - "description": "Centrifuge TSV report for Gp0127634", - "data_object_type": "Centrifuge Classification Report", - "url": "https://data.microbiomedata.org/data/nmdc:mga0r0vf18/ReadbasedAnalysis/nmdc_mga0r0vf18_centrifuge_report.tsv", - "md5_checksum": "0ca043b630ba304cb80603e8332c78cf", - "id": "nmdc:0ca043b630ba304cb80603e8332c78cf", - "file_size_bytes": 256560 - }, - { - "name": "Gp0127634_Centrifuge Krona HTML report", - "description": "Centrifuge Krona HTML report for Gp0127634", - "data_object_type": "Centrifuge Krona Plot", - "url": "https://data.microbiomedata.org/data/nmdc:mga0r0vf18/ReadbasedAnalysis/nmdc_mga0r0vf18_centrifuge_krona.html", - "md5_checksum": "059ff39ced52c0df45a331c4e9e10fdd", - "id": "nmdc:059ff39ced52c0df45a331c4e9e10fdd", - "file_size_bytes": 2334325 - }, - { - "name": "Gp0127634_Kraken classification TSV report", - "description": "Kraken classification TSV report for Gp0127634", - "data_object_type": "Kraken2 Taxonomic Classification", - "url": "https://data.microbiomedata.org/data/nmdc:mga0r0vf18/ReadbasedAnalysis/nmdc_mga0r0vf18_kraken2_classification.tsv", - "md5_checksum": "7bfa3b5b29ec5cf9882251585d99f9bf", - "id": "nmdc:7bfa3b5b29ec5cf9882251585d99f9bf", - "file_size_bytes": 1649071235 - }, - { - "name": "Gp0127634_Kraken2 TSV report", - "description": "Kraken2 TSV report for Gp0127634", - "data_object_type": "Kraken2 Classification Report", - "url": "https://data.microbiomedata.org/data/nmdc:mga0r0vf18/ReadbasedAnalysis/nmdc_mga0r0vf18_kraken2_report.tsv", - "md5_checksum": "2fceef0aaf1c3c3e3edd8d69bb72c8d3", - "id": "nmdc:2fceef0aaf1c3c3e3edd8d69bb72c8d3", - "file_size_bytes": 654782 - }, - { - "name": "Gp0127634_Kraken2 Krona HTML report", - "description": "Kraken2 Krona HTML report for Gp0127634", - "data_object_type": "Kraken2 Krona Plot", - "url": "https://data.microbiomedata.org/data/nmdc:mga0r0vf18/ReadbasedAnalysis/nmdc_mga0r0vf18_kraken2_krona.html", - "md5_checksum": "678e7c401a6971629f7d3ada83b307ab", - "id": "nmdc:678e7c401a6971629f7d3ada83b307ab", - "file_size_bytes": 3988988 - } - ] - }, + "input_read_bases": 3096310870, + "name": "Read QC Activity for nmdc:mga0daby71", + "output_read_count": 19995028, + "started_at_time": "2021-11-13T18:47:34Z", + "type": "nmdc:ReadQCAnalysisActivity", + "ended_at_time": "2021-11-13T19:08:49+00:00" + } + ], + "read_based_taxonomy_analysis_activity_set": [ { "_id": { - "$oid": "649b005f2ca5ee4adb139f9c" + "$oid": "649b009bff710ae353f8cf2a" }, "has_input": [ - "nmdc:ac889627d813c8e34cfbf79a4264c590" - ], - "part_of": [ - "nmdc:mga0r0vf18" + "nmdc:ed95796b3fd964c6bedb141d70737ebf" ], - "ctg_logsum": 142091, - "scaf_logsum": 142614, - "gap_pct": 0.00138, "git_url": "https://github.com/microbiomedata/metaG/releases/tag/0.1", "has_output": [ - "nmdc:2a30cf44cc596923301befc34edf6c0a", - "nmdc:f147264a5a4a7eec4d68f05ab52ecc1d", - "nmdc:9bd1b25df71c0a6f9ca408ddc045ffed", - "nmdc:825969095ff134b195b06a40fcc6089a", - "nmdc:356d9ca409747590849dd894998166ee" + "nmdc:a5ac6665e5d66242b1c885a911236982", + "nmdc:d19478a191693d643157a89c69cc02d1", + "nmdc:679a82699663e88a5e8828ee081fa967", + "nmdc:95b3150e6fb62195c1e5ebf06f87c7d5", + "nmdc:0380e478962be82e0d97a6339f7f3b91", + "nmdc:0c1d139abdfa9fa10f26923abb4d6bda", + "nmdc:f388f7f0d79d0b2bbec1c3c0c5641814", + "nmdc:a2a0029691c04851f4a98003a773fe3f", + "nmdc:bab24ab64ad432d115f182df7198d46e" ], - "asm_score": 5.751, - "was_informed_by": "gold:Gp0127634", - "ctg_powsum": 15837, - "scaf_max": 33833, - "id": "nmdc:d6415e0c3e4f9b8702c1ae98c6fb2f48", - "scaf_powsum": 15897, + "was_informed_by": "gold:Gp0127627", + "id": "nmdc:ecaab4b51de694b35269756fd7e31ed9", "execution_resource": "NERSC-Cori", - "contigs": 175824, - "name": "Assembly Activity for nmdc:mga0r0vf18", - "ctg_max": 33833, - "gc_std": 0.09424, - "contig_bp": 78219291, - "gc_avg": 0.62214, - "started_at_time": "2021-10-11T02:23:30Z", - "scaf_bp": 78220371, - "type": "nmdc:MetagenomeAssembly", - "scaffolds": 175734, - "ended_at_time": "2021-10-11T04:49:55+00:00", - "ctg_l50": 412, - "ctg_l90": 286, - "ctg_n50": 53340, - "ctg_n90": 150131, - "scaf_l50": 412, - "scaf_l90": 286, - "scaf_n50": 53321, - "scaf_n90": 150048, - "output_data_objects": [ - { - "name": "Gp0127634_Assembled contigs fasta", - "description": "Assembled contigs fasta for Gp0127634", - "data_object_type": "Assembly Contigs", - "url": "https://data.microbiomedata.org/data/nmdc:mga0r0vf18/assembly/nmdc_mga0r0vf18_contigs.fna", - "md5_checksum": "2a30cf44cc596923301befc34edf6c0a", - "id": "nmdc:2a30cf44cc596923301befc34edf6c0a", - "file_size_bytes": 84939887 - }, - { - "name": "Gp0127634_Assembled scaffold fasta", - "description": "Assembled scaffold fasta for Gp0127634", - "data_object_type": "Assembly Scaffolds", - "url": "https://data.microbiomedata.org/data/nmdc:mga0r0vf18/assembly/nmdc_mga0r0vf18_scaffolds.fna", - "md5_checksum": "f147264a5a4a7eec4d68f05ab52ecc1d", - "id": "nmdc:f147264a5a4a7eec4d68f05ab52ecc1d", - "file_size_bytes": 84411544 - }, - { - "name": "Gp0127634_Metagenome Contig Coverage Stats", - "description": "Metagenome Contig Coverage Stats for Gp0127634", - "url": "https://data.microbiomedata.org/data/nmdc:mga0r0vf18/assembly/nmdc_mga0r0vf18_covstats.txt", - "md5_checksum": "9bd1b25df71c0a6f9ca408ddc045ffed", - "id": "nmdc:9bd1b25df71c0a6f9ca408ddc045ffed", - "file_size_bytes": 13895509 - }, - { - "name": "Gp0127634_Assembled AGP file", - "description": "Assembled AGP file for Gp0127634", - "data_object_type": "Assembly AGP", - "url": "https://data.microbiomedata.org/data/nmdc:mga0r0vf18/assembly/nmdc_mga0r0vf18_assembly.agp", - "md5_checksum": "825969095ff134b195b06a40fcc6089a", - "id": "nmdc:825969095ff134b195b06a40fcc6089a", - "file_size_bytes": 12985962 - }, - { - "name": "Gp0127634_Metagenome Alignment BAM file", - "description": "Metagenome Alignment BAM file for Gp0127634", - "data_object_type": "Assembly Coverage BAM", - "url": "https://data.microbiomedata.org/data/nmdc:mga0r0vf18/assembly/nmdc_mga0r0vf18_pairedMapped_sorted.bam", - "md5_checksum": "356d9ca409747590849dd894998166ee", - "id": "nmdc:356d9ca409747590849dd894998166ee", - "file_size_bytes": 2516463401 - } - ] - }, + "name": "ReadBased Analysis Activity for nmdc:mga0daby71", + "started_at_time": "2021-11-13T18:47:34Z", + "type": "nmdc:ReadbasedAnalysis", + "ended_at_time": "2021-11-13T19:08:49+00:00" + } + ], + "study_set": [], + "field_research_site_set": [], + "collecting_biosamples_from_site_set": [], + "date_created": null, + "etl_software_version": null, + "pooling_set": [], + "read_based_analysis_activity_set": [ { "_id": { - "$oid": "649b005bbf2caae0415ef9b3" + "$oid": "61e7195c833bcf838a70049b" }, "has_input": [ - "nmdc:2a30cf44cc596923301befc34edf6c0a" + "nmdc:ed95796b3fd964c6bedb141d70737ebf" ], "part_of": [ - "nmdc:mga0r0vf18" + "nmdc:mga0daby71" ], "git_url": "https://github.com/microbiomedata/metaG/releases/tag/0.1", "has_output": [ - "nmdc:ca16203099dc1d6bbce00320bb753974", - "nmdc:fffbb7b52a4886755df429e22a152427", - "nmdc:f63b43e7797845fa94dc6f552ba1ea39", - "nmdc:8ab8f39bfc76267daa4ce5a34811bff1", - "nmdc:d6ff8f2f0d5c77495b2b43a7020e5730", - "nmdc:763d16c5dbadbeba61ceee91ed5209f3", - "nmdc:52cba722f402eea06fda75ec1e5a5103", - "nmdc:ad358ce4b479febc34a2acdd9f249517", - "nmdc:10a0ca82cf662ac4d9b465f05ed1fb2b", - "nmdc:d0e8459e010015e726c31f0f8c18d359", - "nmdc:41d7ca149efb4c12bce48e5a19649a84", - "nmdc:9da1883e60979e17665b0211198c35f0" + "nmdc:a5ac6665e5d66242b1c885a911236982", + "nmdc:d19478a191693d643157a89c69cc02d1", + "nmdc:679a82699663e88a5e8828ee081fa967", + "nmdc:95b3150e6fb62195c1e5ebf06f87c7d5", + "nmdc:0380e478962be82e0d97a6339f7f3b91", + "nmdc:0c1d139abdfa9fa10f26923abb4d6bda", + "nmdc:f388f7f0d79d0b2bbec1c3c0c5641814", + "nmdc:a2a0029691c04851f4a98003a773fe3f", + "nmdc:bab24ab64ad432d115f182df7198d46e" ], - "was_informed_by": "gold:Gp0127634", - "id": "nmdc:d6415e0c3e4f9b8702c1ae98c6fb2f48", + "was_informed_by": "gold:Gp0127627", + "id": "nmdc:ecaab4b51de694b35269756fd7e31ed9", "execution_resource": "NERSC-Cori", - "name": "Annotation Activity for nmdc:mga0r0vf18", - "started_at_time": "2021-10-11T02:23:30Z", - "type": "nmdc:MetagenomeAnnotationActivity", - "ended_at_time": "2021-10-11T04:49:55+00:00", - "output_data_objects": [ - { - "name": "Gp0127634_Protein FAA", - "description": "Protein FAA for Gp0127634", - "data_object_type": "Annotation Amino Acid FASTA", - "url": "https://data.microbiomedata.org/data/nmdc:mga0r0vf18/annotation/nmdc_mga0r0vf18_proteins.faa", - "md5_checksum": "ca16203099dc1d6bbce00320bb753974", - "id": "nmdc:ca16203099dc1d6bbce00320bb753974", - "file_size_bytes": 49630516 - }, - { - "name": "Gp0127634_Structural annotation GFF file", - "description": "Structural annotation GFF file for Gp0127634", - "data_object_type": "Structural Annotation GFF", - "url": "https://data.microbiomedata.org/data/nmdc:mga0r0vf18/annotation/nmdc_mga0r0vf18_structural_annotation.gff", - "md5_checksum": "fffbb7b52a4886755df429e22a152427", - "id": "nmdc:fffbb7b52a4886755df429e22a152427", - "file_size_bytes": 2519 - }, - { - "name": "Gp0127634_Functional annotation GFF file", - "description": "Functional annotation GFF file for Gp0127634", - "data_object_type": "Functional Annotation GFF", - "url": "https://data.microbiomedata.org/data/nmdc:mga0r0vf18/annotation/nmdc_mga0r0vf18_functional_annotation.gff", - "md5_checksum": "f63b43e7797845fa94dc6f552ba1ea39", - "id": "nmdc:f63b43e7797845fa94dc6f552ba1ea39", - "file_size_bytes": 57589694 - }, - { - "name": "Gp0127634_KO TSV file", - "description": "KO TSV file for Gp0127634", - "data_object_type": "Annotation KEGG Orthology", - "url": "https://data.microbiomedata.org/data/nmdc:mga0r0vf18/annotation/nmdc_mga0r0vf18_ko.tsv", - "md5_checksum": "8ab8f39bfc76267daa4ce5a34811bff1", - "id": "nmdc:8ab8f39bfc76267daa4ce5a34811bff1", - "file_size_bytes": 6602379 - }, - { - "name": "Gp0127634_EC TSV file", - "description": "EC TSV file for Gp0127634", - "data_object_type": "Annotation Enzyme Commission", - "url": "https://data.microbiomedata.org/data/nmdc:mga0r0vf18/annotation/nmdc_mga0r0vf18_ec.tsv", - "md5_checksum": "d6ff8f2f0d5c77495b2b43a7020e5730", - "id": "nmdc:d6ff8f2f0d5c77495b2b43a7020e5730", - "file_size_bytes": 4399755 - }, - { - "name": "Gp0127634_COG GFF file", - "description": "COG GFF file for Gp0127634", - "url": "https://data.microbiomedata.org/data/nmdc:mga0r0vf18/annotation/nmdc_mga0r0vf18_cog.gff", - "md5_checksum": "763d16c5dbadbeba61ceee91ed5209f3", - "id": "nmdc:763d16c5dbadbeba61ceee91ed5209f3", - "file_size_bytes": 33737036 - }, - { - "name": "Gp0127634_PFAM GFF file", - "description": "PFAM GFF file for Gp0127634", - "url": "https://data.microbiomedata.org/data/nmdc:mga0r0vf18/annotation/nmdc_mga0r0vf18_pfam.gff", - "md5_checksum": "52cba722f402eea06fda75ec1e5a5103", - "id": "nmdc:52cba722f402eea06fda75ec1e5a5103", - "file_size_bytes": 24757263 - }, - { - "name": "Gp0127634_TigrFam GFF file", - "description": "TigrFam GFF file for Gp0127634", - "url": "https://data.microbiomedata.org/data/nmdc:mga0r0vf18/annotation/nmdc_mga0r0vf18_tigrfam.gff", - "md5_checksum": "ad358ce4b479febc34a2acdd9f249517", - "id": "nmdc:ad358ce4b479febc34a2acdd9f249517", - "file_size_bytes": 2661782 - }, - { - "name": "Gp0127634_SMART GFF file", - "description": "SMART GFF file for Gp0127634", - "url": "https://data.microbiomedata.org/data/nmdc:mga0r0vf18/annotation/nmdc_mga0r0vf18_smart.gff", - "md5_checksum": "10a0ca82cf662ac4d9b465f05ed1fb2b", - "id": "nmdc:10a0ca82cf662ac4d9b465f05ed1fb2b", - "file_size_bytes": 7506881 - }, - { - "name": "Gp0127634_SuperFam GFF file", - "description": "SuperFam GFF file for Gp0127634", - "url": "https://data.microbiomedata.org/data/nmdc:mga0r0vf18/annotation/nmdc_mga0r0vf18_supfam.gff", - "md5_checksum": "d0e8459e010015e726c31f0f8c18d359", - "id": "nmdc:d0e8459e010015e726c31f0f8c18d359", - "file_size_bytes": 42013513 - }, - { - "name": "Gp0127634_Cath FunFam GFF file", - "description": "Cath FunFam GFF file for Gp0127634", - "url": "https://data.microbiomedata.org/data/nmdc:mga0r0vf18/annotation/nmdc_mga0r0vf18_cath_funfam.gff", - "md5_checksum": "41d7ca149efb4c12bce48e5a19649a84", - "id": "nmdc:41d7ca149efb4c12bce48e5a19649a84", - "file_size_bytes": 31747110 - }, - { - "name": "Gp0127634_KO_EC GFF file", - "description": "KO_EC GFF file for Gp0127634", - "url": "https://data.microbiomedata.org/data/nmdc:mga0r0vf18/annotation/nmdc_mga0r0vf18_ko_ec.gff", - "md5_checksum": "9da1883e60979e17665b0211198c35f0", - "id": "nmdc:9da1883e60979e17665b0211198c35f0", - "file_size_bytes": 20999001 - } - ] + "name": "ReadBased Analysis Activity for nmdc:mga0daby71", + "started_at_time": "2021-11-13T18:47:34Z", + "type": "nmdc:ReadbasedAnalysis", + "ended_at_time": "2021-11-13T19:08:49+00:00" + } + ] + }, + { + "functional_annotation_agg": [], + "library_preparation_set": [], + "processed_sample_set": [], + "extraction_set": [], + "activity_set": [], + "biosample_set": [], + "data_object_set": [ + { + "name": "Gp0127632_Filtered Reads", + "description": "Filtered Reads for Gp0127632", + "data_object_type": "Filtered Sequencing Reads", + "url": "https://data.microbiomedata.org/data/nmdc:mga0b6cy30/qa/nmdc_mga0b6cy30_filtered.fastq.gz", + "md5_checksum": "a43bfb55389206c2fc5ddb53e6aa2bc6", + "id": "nmdc:a43bfb55389206c2fc5ddb53e6aa2bc6", + "file_size_bytes": 2199178772 + }, + { + "name": "Gp0127632_Filtered Stats", + "description": "Filtered Stats for Gp0127632", + "data_object_type": "QC Statistics", + "url": "https://data.microbiomedata.org/data/nmdc:mga0b6cy30/qa/nmdc_mga0b6cy30_filterStats.txt", + "md5_checksum": "919c5aade4fffb76f743a33b035b2839", + "id": "nmdc:919c5aade4fffb76f743a33b035b2839", + "file_size_bytes": 289 + }, + { + "name": "Gp0127632_Gottcha2 TSV report", + "description": "Gottcha2 TSV report for Gp0127632", + "url": "https://data.microbiomedata.org/data/nmdc:mga0b6cy30/ReadbasedAnalysis/nmdc_mga0b6cy30_gottcha2_report.tsv", + "md5_checksum": "3e583cccbbc068e0879ba6618bb6407c", + "id": "nmdc:3e583cccbbc068e0879ba6618bb6407c", + "file_size_bytes": 2899 + }, + { + "name": "Gp0127632_Gottcha2 full TSV report", + "description": "Gottcha2 full TSV report for Gp0127632", + "url": "https://data.microbiomedata.org/data/nmdc:mga0b6cy30/ReadbasedAnalysis/nmdc_mga0b6cy30_gottcha2_report_full.tsv", + "md5_checksum": "6c54105711e818c4d8169ab595b05efe", + "id": "nmdc:6c54105711e818c4d8169ab595b05efe", + "file_size_bytes": 769416 + }, + { + "name": "Gp0127632_Gottcha2 Krona HTML report", + "description": "Gottcha2 Krona HTML report for Gp0127632", + "data_object_type": "GOTTCHA2 Krona Plot", + "url": "https://data.microbiomedata.org/data/nmdc:mga0b6cy30/ReadbasedAnalysis/nmdc_mga0b6cy30_gottcha2_krona.html", + "md5_checksum": "adb155cdb656648496484998a62fb96f", + "id": "nmdc:adb155cdb656648496484998a62fb96f", + "file_size_bytes": 235384 + }, + { + "name": "Gp0127632_Centrifuge classification TSV report", + "description": "Centrifuge classification TSV report for Gp0127632", + "data_object_type": "Centrifuge Taxonomic Classification", + "url": "https://data.microbiomedata.org/data/nmdc:mga0b6cy30/ReadbasedAnalysis/nmdc_mga0b6cy30_centrifuge_classification.tsv", + "md5_checksum": "0a03ac5737750a3b336e7299e9f01ead", + "id": "nmdc:0a03ac5737750a3b336e7299e9f01ead", + "file_size_bytes": 1917130445 + }, + { + "name": "Gp0127632_Centrifuge TSV report", + "description": "Centrifuge TSV report for Gp0127632", + "data_object_type": "Centrifuge Classification Report", + "url": "https://data.microbiomedata.org/data/nmdc:mga0b6cy30/ReadbasedAnalysis/nmdc_mga0b6cy30_centrifuge_report.tsv", + "md5_checksum": "f345b3a57c37097a860e38d5e83835b8", + "id": "nmdc:f345b3a57c37097a860e38d5e83835b8", + "file_size_bytes": 255290 + }, + { + "name": "Gp0127632_Centrifuge Krona HTML report", + "description": "Centrifuge Krona HTML report for Gp0127632", + "data_object_type": "Centrifuge Krona Plot", + "url": "https://data.microbiomedata.org/data/nmdc:mga0b6cy30/ReadbasedAnalysis/nmdc_mga0b6cy30_centrifuge_krona.html", + "md5_checksum": "c1f4471d943b284720a8becb5a2e32b4", + "id": "nmdc:c1f4471d943b284720a8becb5a2e32b4", + "file_size_bytes": 2333225 + }, + { + "name": "Gp0127632_Kraken classification TSV report", + "description": "Kraken classification TSV report for Gp0127632", + "data_object_type": "Kraken2 Taxonomic Classification", + "url": "https://data.microbiomedata.org/data/nmdc:mga0b6cy30/ReadbasedAnalysis/nmdc_mga0b6cy30_kraken2_classification.tsv", + "md5_checksum": "50cfcfc5d0d89245b8370abf6bfef23c", + "id": "nmdc:50cfcfc5d0d89245b8370abf6bfef23c", + "file_size_bytes": 1537863470 + }, + { + "name": "Gp0127632_Kraken2 TSV report", + "description": "Kraken2 TSV report for Gp0127632", + "data_object_type": "Kraken2 Classification Report", + "url": "https://data.microbiomedata.org/data/nmdc:mga0b6cy30/ReadbasedAnalysis/nmdc_mga0b6cy30_kraken2_report.tsv", + "md5_checksum": "a8dd7aa20043510158ad3b2bbe961b42", + "id": "nmdc:a8dd7aa20043510158ad3b2bbe961b42", + "file_size_bytes": 648597 + }, + { + "name": "Gp0127632_Kraken2 Krona HTML report", + "description": "Kraken2 Krona HTML report for Gp0127632", + "data_object_type": "Kraken2 Krona Plot", + "url": "https://data.microbiomedata.org/data/nmdc:mga0b6cy30/ReadbasedAnalysis/nmdc_mga0b6cy30_kraken2_krona.html", + "md5_checksum": "e350fda9bd0651755171d79b413b8da3", + "id": "nmdc:e350fda9bd0651755171d79b413b8da3", + "file_size_bytes": 3959152 + }, + { + "name": "Gp0127632_Gottcha2 TSV report", + "description": "Gottcha2 TSV report for Gp0127632", + "url": "https://data.microbiomedata.org/data/nmdc:mga0b6cy30/ReadbasedAnalysis/nmdc_mga0b6cy30_gottcha2_report.tsv", + "md5_checksum": "3e583cccbbc068e0879ba6618bb6407c", + "id": "nmdc:3e583cccbbc068e0879ba6618bb6407c", + "file_size_bytes": 2899 + }, + { + "name": "Gp0127632_Gottcha2 full TSV report", + "description": "Gottcha2 full TSV report for Gp0127632", + "url": "https://data.microbiomedata.org/data/nmdc:mga0b6cy30/ReadbasedAnalysis/nmdc_mga0b6cy30_gottcha2_report_full.tsv", + "md5_checksum": "6c54105711e818c4d8169ab595b05efe", + "id": "nmdc:6c54105711e818c4d8169ab595b05efe", + "file_size_bytes": 769416 + }, + { + "name": "Gp0127632_Gottcha2 Krona HTML report", + "description": "Gottcha2 Krona HTML report for Gp0127632", + "data_object_type": "GOTTCHA2 Krona Plot", + "url": "https://data.microbiomedata.org/data/nmdc:mga0b6cy30/ReadbasedAnalysis/nmdc_mga0b6cy30_gottcha2_krona.html", + "md5_checksum": "adb155cdb656648496484998a62fb96f", + "id": "nmdc:adb155cdb656648496484998a62fb96f", + "file_size_bytes": 235384 + }, + { + "name": "Gp0127632_Centrifuge classification TSV report", + "description": "Centrifuge classification TSV report for Gp0127632", + "data_object_type": "Centrifuge Taxonomic Classification", + "url": "https://data.microbiomedata.org/data/nmdc:mga0b6cy30/ReadbasedAnalysis/nmdc_mga0b6cy30_centrifuge_classification.tsv", + "md5_checksum": "0a03ac5737750a3b336e7299e9f01ead", + "id": "nmdc:0a03ac5737750a3b336e7299e9f01ead", + "file_size_bytes": 1917130445 + }, + { + "name": "Gp0127632_Centrifuge TSV report", + "description": "Centrifuge TSV report for Gp0127632", + "data_object_type": "Centrifuge Classification Report", + "url": "https://data.microbiomedata.org/data/nmdc:mga0b6cy30/ReadbasedAnalysis/nmdc_mga0b6cy30_centrifuge_report.tsv", + "md5_checksum": "f345b3a57c37097a860e38d5e83835b8", + "id": "nmdc:f345b3a57c37097a860e38d5e83835b8", + "file_size_bytes": 255290 + }, + { + "name": "Gp0127632_Centrifuge Krona HTML report", + "description": "Centrifuge Krona HTML report for Gp0127632", + "data_object_type": "Centrifuge Krona Plot", + "url": "https://data.microbiomedata.org/data/nmdc:mga0b6cy30/ReadbasedAnalysis/nmdc_mga0b6cy30_centrifuge_krona.html", + "md5_checksum": "c1f4471d943b284720a8becb5a2e32b4", + "id": "nmdc:c1f4471d943b284720a8becb5a2e32b4", + "file_size_bytes": 2333225 }, + { + "name": "Gp0127632_Kraken classification TSV report", + "description": "Kraken classification TSV report for Gp0127632", + "data_object_type": "Kraken2 Taxonomic Classification", + "url": "https://data.microbiomedata.org/data/nmdc:mga0b6cy30/ReadbasedAnalysis/nmdc_mga0b6cy30_kraken2_classification.tsv", + "md5_checksum": "50cfcfc5d0d89245b8370abf6bfef23c", + "id": "nmdc:50cfcfc5d0d89245b8370abf6bfef23c", + "file_size_bytes": 1537863470 + }, + { + "name": "Gp0127632_Kraken2 TSV report", + "description": "Kraken2 TSV report for Gp0127632", + "data_object_type": "Kraken2 Classification Report", + "url": "https://data.microbiomedata.org/data/nmdc:mga0b6cy30/ReadbasedAnalysis/nmdc_mga0b6cy30_kraken2_report.tsv", + "md5_checksum": "a8dd7aa20043510158ad3b2bbe961b42", + "id": "nmdc:a8dd7aa20043510158ad3b2bbe961b42", + "file_size_bytes": 648597 + }, + { + "name": "Gp0127632_Kraken2 Krona HTML report", + "description": "Kraken2 Krona HTML report for Gp0127632", + "data_object_type": "Kraken2 Krona Plot", + "url": "https://data.microbiomedata.org/data/nmdc:mga0b6cy30/ReadbasedAnalysis/nmdc_mga0b6cy30_kraken2_krona.html", + "md5_checksum": "e350fda9bd0651755171d79b413b8da3", + "id": "nmdc:e350fda9bd0651755171d79b413b8da3", + "file_size_bytes": 3959152 + }, + { + "name": "Gp0127632_Assembled contigs fasta", + "description": "Assembled contigs fasta for Gp0127632", + "data_object_type": "Assembly Contigs", + "url": "https://data.microbiomedata.org/data/nmdc:mga0b6cy30/assembly/nmdc_mga0b6cy30_contigs.fna", + "md5_checksum": "b5094d52c6d48836de0aac261c622868", + "id": "nmdc:b5094d52c6d48836de0aac261c622868", + "file_size_bytes": 59930370 + }, + { + "name": "Gp0127632_Assembled scaffold fasta", + "description": "Assembled scaffold fasta for Gp0127632", + "data_object_type": "Assembly Scaffolds", + "url": "https://data.microbiomedata.org/data/nmdc:mga0b6cy30/assembly/nmdc_mga0b6cy30_scaffolds.fna", + "md5_checksum": "4d9d83ac8db218e6d0bd4f29801c3ce3", + "id": "nmdc:4d9d83ac8db218e6d0bd4f29801c3ce3", + "file_size_bytes": 59532251 + }, + { + "name": "Gp0127632_Metagenome Contig Coverage Stats", + "description": "Metagenome Contig Coverage Stats for Gp0127632", + "url": "https://data.microbiomedata.org/data/nmdc:mga0b6cy30/assembly/nmdc_mga0b6cy30_covstats.txt", + "md5_checksum": "f8fad4cf225943d8fddec3fa3402c53a", + "id": "nmdc:f8fad4cf225943d8fddec3fa3402c53a", + "file_size_bytes": 10428676 + }, + { + "name": "Gp0127632_Assembled AGP file", + "description": "Assembled AGP file for Gp0127632", + "data_object_type": "Assembly AGP", + "url": "https://data.microbiomedata.org/data/nmdc:mga0b6cy30/assembly/nmdc_mga0b6cy30_assembly.agp", + "md5_checksum": "52f130d084757d6e27177ed108e9e5bf", + "id": "nmdc:52f130d084757d6e27177ed108e9e5bf", + "file_size_bytes": 9725931 + }, + { + "name": "Gp0127632_Metagenome Alignment BAM file", + "description": "Metagenome Alignment BAM file for Gp0127632", + "data_object_type": "Assembly Coverage BAM", + "url": "https://data.microbiomedata.org/data/nmdc:mga0b6cy30/assembly/nmdc_mga0b6cy30_pairedMapped_sorted.bam", + "md5_checksum": "9e5deaa9e7ac3f5f90d79b6520d39d53", + "id": "nmdc:9e5deaa9e7ac3f5f90d79b6520d39d53", + "file_size_bytes": 2363431165 + }, + { + "name": "Gp0127632_Protein FAA", + "description": "Protein FAA for Gp0127632", + "data_object_type": "Annotation Amino Acid FASTA", + "url": "https://data.microbiomedata.org/data/nmdc:mga0b6cy30/annotation/nmdc_mga0b6cy30_proteins.faa", + "md5_checksum": "42989e75458691fbd17e537582c56d5e", + "id": "nmdc:42989e75458691fbd17e537582c56d5e", + "file_size_bytes": 35685584 + }, + { + "name": "Gp0127632_Structural annotation GFF file", + "description": "Structural annotation GFF file for Gp0127632", + "data_object_type": "Structural Annotation GFF", + "url": "https://data.microbiomedata.org/data/nmdc:mga0b6cy30/annotation/nmdc_mga0b6cy30_structural_annotation.gff", + "md5_checksum": "09240a6d1afc5f8b965a80a64aa96ef4", + "id": "nmdc:09240a6d1afc5f8b965a80a64aa96ef4", + "file_size_bytes": 2512 + }, + { + "name": "Gp0127632_Functional annotation GFF file", + "description": "Functional annotation GFF file for Gp0127632", + "data_object_type": "Functional Annotation GFF", + "url": "https://data.microbiomedata.org/data/nmdc:mga0b6cy30/annotation/nmdc_mga0b6cy30_functional_annotation.gff", + "md5_checksum": "c595237698baaf882fdeeac92f1b02be", + "id": "nmdc:c595237698baaf882fdeeac92f1b02be", + "file_size_bytes": 41979225 + }, + { + "name": "Gp0127632_KO TSV file", + "description": "KO TSV file for Gp0127632", + "data_object_type": "Annotation KEGG Orthology", + "url": "https://data.microbiomedata.org/data/nmdc:mga0b6cy30/annotation/nmdc_mga0b6cy30_ko.tsv", + "md5_checksum": "cd87df7a80ed03eef7d9923b9e9621e4", + "id": "nmdc:cd87df7a80ed03eef7d9923b9e9621e4", + "file_size_bytes": 4726366 + }, + { + "name": "Gp0127632_EC TSV file", + "description": "EC TSV file for Gp0127632", + "data_object_type": "Annotation Enzyme Commission", + "url": "https://data.microbiomedata.org/data/nmdc:mga0b6cy30/annotation/nmdc_mga0b6cy30_ec.tsv", + "md5_checksum": "57053d5594bb80495014664df22b0bb0", + "id": "nmdc:57053d5594bb80495014664df22b0bb0", + "file_size_bytes": 3155078 + }, + { + "name": "Gp0127632_COG GFF file", + "description": "COG GFF file for Gp0127632", + "url": "https://data.microbiomedata.org/data/nmdc:mga0b6cy30/annotation/nmdc_mga0b6cy30_cog.gff", + "md5_checksum": "3c82ee6a19674bd5abd4072cb137d96f", + "id": "nmdc:3c82ee6a19674bd5abd4072cb137d96f", + "file_size_bytes": 23956687 + }, + { + "name": "Gp0127632_PFAM GFF file", + "description": "PFAM GFF file for Gp0127632", + "url": "https://data.microbiomedata.org/data/nmdc:mga0b6cy30/annotation/nmdc_mga0b6cy30_pfam.gff", + "md5_checksum": "c9bf48d6c88b3db0f431a08d93873c4a", + "id": "nmdc:c9bf48d6c88b3db0f431a08d93873c4a", + "file_size_bytes": 17333907 + }, + { + "name": "Gp0127632_TigrFam GFF file", + "description": "TigrFam GFF file for Gp0127632", + "url": "https://data.microbiomedata.org/data/nmdc:mga0b6cy30/annotation/nmdc_mga0b6cy30_tigrfam.gff", + "md5_checksum": "2475726e21bd8369f76d529f55f21a3f", + "id": "nmdc:2475726e21bd8369f76d529f55f21a3f", + "file_size_bytes": 1771706 + }, + { + "name": "Gp0127632_SMART GFF file", + "description": "SMART GFF file for Gp0127632", + "url": "https://data.microbiomedata.org/data/nmdc:mga0b6cy30/annotation/nmdc_mga0b6cy30_smart.gff", + "md5_checksum": "5698830d572ddc4e35a5f6642da7981a", + "id": "nmdc:5698830d572ddc4e35a5f6642da7981a", + "file_size_bytes": 5383998 + }, + { + "name": "Gp0127632_SuperFam GFF file", + "description": "SuperFam GFF file for Gp0127632", + "url": "https://data.microbiomedata.org/data/nmdc:mga0b6cy30/annotation/nmdc_mga0b6cy30_supfam.gff", + "md5_checksum": "18cdb0f987a2d417d0a39a685e435729", + "id": "nmdc:18cdb0f987a2d417d0a39a685e435729", + "file_size_bytes": 30162479 + }, + { + "name": "Gp0127632_Cath FunFam GFF file", + "description": "Cath FunFam GFF file for Gp0127632", + "url": "https://data.microbiomedata.org/data/nmdc:mga0b6cy30/annotation/nmdc_mga0b6cy30_cath_funfam.gff", + "md5_checksum": "b34e4d1823bd5cd88aa42832d10b3431", + "id": "nmdc:b34e4d1823bd5cd88aa42832d10b3431", + "file_size_bytes": 22459777 + }, + { + "name": "Gp0127632_KO_EC GFF file", + "description": "KO_EC GFF file for Gp0127632", + "url": "https://data.microbiomedata.org/data/nmdc:mga0b6cy30/annotation/nmdc_mga0b6cy30_ko_ec.gff", + "md5_checksum": "dc544f4796d49c520372e1872c5aea49", + "id": "nmdc:dc544f4796d49c520372e1872c5aea49", + "file_size_bytes": 15047897 + }, + { + "name": "gold:Gp0452679_metabat2 bin checkm quality assessment result", + "description": "metabat2 bin checkm quality assessment result for gold:Gp0452679", + "data_object_type": "CheckM Statistics", + "url": "https://data.microbiomedata.org/data/nmdc:mga0fsjy84/MAGs/nmdc_mga0fsjy84_checkm_qa.out", + "md5_checksum": "d41d8cd98f00b204e9800998ecf8427e", + "id": "nmdc:d41d8cd98f00b204e9800998ecf8427e", + "file_size_bytes": 0 + }, + { + "name": "Gp0127632_tooShort (< 3kb) filtered contigs fasta file by metaBat2", + "description": "tooShort (< 3kb) filtered contigs fasta file by metaBat2 for Gp0127632", + "url": "https://data.microbiomedata.org/data/nmdc:mga0b6cy30/MAGs/nmdc_mga0b6cy30_bins.tooShort.fa", + "md5_checksum": "2941988fcfb708d20ad1e44682c78e22", + "id": "nmdc:2941988fcfb708d20ad1e44682c78e22", + "file_size_bytes": 52475207 + }, + { + "name": "Gp0127632_unbinned fasta file from metabat2", + "description": "unbinned fasta file from metabat2 for Gp0127632", + "url": "https://data.microbiomedata.org/data/nmdc:mga0b6cy30/MAGs/nmdc_mga0b6cy30_bins.unbinned.fa", + "md5_checksum": "a6bc8d9d5ba5fe9713829aa7aef3c4cd", + "id": "nmdc:a6bc8d9d5ba5fe9713829aa7aef3c4cd", + "file_size_bytes": 5473493 + }, + { + "name": "Gp0127632_metabat2 bin checkm quality assessment result", + "description": "metabat2 bin checkm quality assessment result for Gp0127632", + "data_object_type": "CheckM Statistics", + "url": "https://data.microbiomedata.org/data/nmdc:mga0b6cy30/MAGs/nmdc_mga0b6cy30_checkm_qa.out", + "md5_checksum": "2914266e7ac7a8668c6f8d8722466c69", + "id": "nmdc:2914266e7ac7a8668c6f8d8722466c69", + "file_size_bytes": 948 + }, + { + "name": "Gp0127632_high-quality and medium-quality bins", + "description": "high-quality and medium-quality bins for Gp0127632", + "data_object_type": "Metagenome Bins", + "url": "https://data.microbiomedata.org/data/nmdc:mga0b6cy30/MAGs/nmdc_mga0b6cy30_hqmq_bin.zip", + "md5_checksum": "0fd97ca0ce01d42361ce817d3753a65e", + "id": "nmdc:0fd97ca0ce01d42361ce817d3753a65e", + "file_size_bytes": 497493 + }, + { + "name": "Gp0127632_metabat2 bins", + "description": "metabat2 bins for Gp0127632", + "url": "https://data.microbiomedata.org/data/nmdc:mga0b6cy30/MAGs/nmdc_mga0b6cy30_metabat_bin.zip", + "md5_checksum": "8e7832cac0ae99e2b63dfdfa34c24927", + "id": "nmdc:8e7832cac0ae99e2b63dfdfa34c24927", + "file_size_bytes": 108323 + } + ], + "dissolving_activity_set": [], + "functional_annotation_set": [], + "genome_feature_set": [], + "mags_activity_set": [ { "_id": { - "$oid": "649b0052ec087f6bbab34713" + "$oid": "649b0052ec087f6bbab3470d" }, "has_input": [ - "nmdc:2a30cf44cc596923301befc34edf6c0a", - "nmdc:356d9ca409747590849dd894998166ee", - "nmdc:f63b43e7797845fa94dc6f552ba1ea39" + "nmdc:b5094d52c6d48836de0aac261c622868", + "nmdc:9e5deaa9e7ac3f5f90d79b6520d39d53", + "nmdc:c595237698baaf882fdeeac92f1b02be" ], - "too_short_contig_num": 168596, + "too_short_contig_num": 128818, "part_of": [ - "nmdc:mga0r0vf18" + "nmdc:mga0b6cy30" ], - "binned_contig_num": 278, + "binned_contig_num": 313, "git_url": "https://github.com/microbiomedata/metaG/releases/tag/0.1", "has_output": [ "nmdc:d41d8cd98f00b204e9800998ecf8427e", - "nmdc:3c8eadbcf4f583090d8f378ea6758799", - "nmdc:1be647dc835ee8fe666fe9893266bd21", - "nmdc:6cc278c455cafc691333c0a74fe6c540", - "nmdc:de4d0180489bdaa5526977508a489b99", - "nmdc:16a08c4a3a6e9c70a5d47209177d0e60" + "nmdc:2941988fcfb708d20ad1e44682c78e22", + "nmdc:a6bc8d9d5ba5fe9713829aa7aef3c4cd", + "nmdc:2914266e7ac7a8668c6f8d8722466c69", + "nmdc:0fd97ca0ce01d42361ce817d3753a65e", + "nmdc:8e7832cac0ae99e2b63dfdfa34c24927" ], - "was_informed_by": "gold:Gp0127634", - "input_contig_num": 175822, - "id": "nmdc:d6415e0c3e4f9b8702c1ae98c6fb2f48", + "was_informed_by": "gold:Gp0127632", + "input_contig_num": 132499, + "id": "nmdc:2d5f97b7f6c0f385d6185e05ac989b1e", "execution_resource": "NERSC-Cori", - "name": "MAGs Analysis Activity for nmdc:mga0r0vf18", + "name": "MAGs Analysis Activity for nmdc:mga0b6cy30", "mags_list": [ { - "number_of_contig": 235, - "completeness": 68.28, + "number_of_contig": 84, + "completeness": 27.81, "bin_name": "bins.1", - "gene_count": 2056, - "bin_quality": "MQ", + "gene_count": 437, + "bin_quality": "LQ", "gtdbtk_species": "", - "gtdbtk_order": "Nitrososphaerales", + "gtdbtk_order": "", "num_16s": 0, - "gtdbtk_family": "Nitrososphaeraceae", - "gtdbtk_domain": "Archaea", - "contamination": 2.91, - "gtdbtk_class": "Nitrososphaeria", - "gtdbtk_phylum": "Crenarchaeota", + "gtdbtk_family": "", + "gtdbtk_domain": "", + "contamination": 1.71, + "gtdbtk_class": "", + "gtdbtk_phylum": "", "num_5s": 1, - "num_23s": 0, + "num_23s": 1, "gtdbtk_genus": "", - "num_t_rna": 34 + "num_t_rna": 10 }, { - "number_of_contig": 43, - "completeness": 10.69, + "number_of_contig": 229, + "completeness": 71.45, "bin_name": "bins.2", - "gene_count": 247, - "bin_quality": "LQ", + "gene_count": 1997, + "bin_quality": "MQ", "gtdbtk_species": "", - "gtdbtk_order": "", + "gtdbtk_order": "Nitrososphaerales", "num_16s": 0, - "gtdbtk_family": "", - "gtdbtk_domain": "", - "contamination": 0.0, - "gtdbtk_class": "", - "gtdbtk_phylum": "", - "num_5s": 0, + "gtdbtk_family": "Nitrososphaeraceae", + "gtdbtk_domain": "Archaea", + "contamination": 0.97, + "gtdbtk_class": "Nitrososphaeria", + "gtdbtk_phylum": "Crenarchaeota", + "num_5s": 1, "num_23s": 0, "gtdbtk_genus": "", - "num_t_rna": 4 + "num_t_rna": 36 } ], - "unbinned_contig_num": 6948, - "started_at_time": "2021-10-11T02:23:30Z", + "unbinned_contig_num": 3368, + "started_at_time": "2021-10-11T02:23:42Z", "type": "nmdc:MAGsAnalysisActivity", - "ended_at_time": "2021-10-11T04:49:55+00:00", - "output_data_objects": [ - { - "name": "gold:Gp0452679_metabat2 bin checkm quality assessment result", - "description": "metabat2 bin checkm quality assessment result for gold:Gp0452679", - "data_object_type": "CheckM Statistics", - "url": "https://data.microbiomedata.org/data/nmdc:mga0fsjy84/MAGs/nmdc_mga0fsjy84_checkm_qa.out", - "md5_checksum": "d41d8cd98f00b204e9800998ecf8427e", - "id": "nmdc:d41d8cd98f00b204e9800998ecf8427e", - "file_size_bytes": 0 - }, - { - "name": "Gp0127634_tooShort (< 3kb) filtered contigs fasta file by metaBat2", - "description": "tooShort (< 3kb) filtered contigs fasta file by metaBat2 for Gp0127634", - "url": "https://data.microbiomedata.org/data/nmdc:mga0r0vf18/MAGs/nmdc_mga0r0vf18_bins.tooShort.fa", - "md5_checksum": "3c8eadbcf4f583090d8f378ea6758799", - "id": "nmdc:3c8eadbcf4f583090d8f378ea6758799", - "file_size_bytes": 71683990 - }, - { - "name": "Gp0127634_unbinned fasta file from metabat2", - "description": "unbinned fasta file from metabat2 for Gp0127634", - "url": "https://data.microbiomedata.org/data/nmdc:mga0r0vf18/MAGs/nmdc_mga0r0vf18_bins.unbinned.fa", - "md5_checksum": "1be647dc835ee8fe666fe9893266bd21", - "id": "nmdc:1be647dc835ee8fe666fe9893266bd21", - "file_size_bytes": 11353478 - }, - { - "name": "Gp0127634_metabat2 bin checkm quality assessment result", - "description": "metabat2 bin checkm quality assessment result for Gp0127634", - "data_object_type": "CheckM Statistics", - "url": "https://data.microbiomedata.org/data/nmdc:mga0r0vf18/MAGs/nmdc_mga0r0vf18_checkm_qa.out", - "md5_checksum": "6cc278c455cafc691333c0a74fe6c540", - "id": "nmdc:6cc278c455cafc691333c0a74fe6c540", - "file_size_bytes": 936 - }, - { - "name": "Gp0127634_high-quality and medium-quality bins", - "description": "high-quality and medium-quality bins for Gp0127634", - "data_object_type": "Metagenome Bins", - "url": "https://data.microbiomedata.org/data/nmdc:mga0r0vf18/MAGs/nmdc_mga0r0vf18_hqmq_bin.zip", - "md5_checksum": "de4d0180489bdaa5526977508a489b99", - "id": "nmdc:de4d0180489bdaa5526977508a489b99", - "file_size_bytes": 518340 - }, - { - "name": "Gp0127634_metabat2 bins", - "description": "metabat2 bins for Gp0127634", - "url": "https://data.microbiomedata.org/data/nmdc:mga0r0vf18/MAGs/nmdc_mga0r0vf18_metabat_bin.zip", - "md5_checksum": "16a08c4a3a6e9c70a5d47209177d0e60", - "id": "nmdc:16a08c4a3a6e9c70a5d47209177d0e60", - "file_size_bytes": 63768 - } - ] + "ended_at_time": "2021-10-11T04:08:32+00:00" } - ] - }, - { - "_id": { - "$oid": "649b009773e8249959349b47" - }, - "id": "nmdc:omprc-11-kc23zq65", - "name": "Riverbed sediment microbial communities from areas with no vegetation in Columbia River, Washington, USA - GW-RW N1_10_20", - "description": "Riverbed sediment samples were collected from an area with no vegetation in a hyporheic zone (subsurface groundwater - surface water mixing zone)", - "has_input": [ - "nmdc:bsm-11-59xteq78" - ], - "has_output": [ - "jgi:574fde607ded5e3df1ee1403" - ], - "part_of": [ - "nmdc:sty-11-aygzgv51" - ], - "add_date": "2016-01-11", - "mod_date": "2021-06-15", - "ncbi_project_name": "Riverbed sediment microbial communities from areas with no vegetation in Columbia River, Washington, USA - GW-RW N1_10_20", - "omics_type": { - "has_raw_value": "Metagenome" - }, - "principal_investigator": { - "has_raw_value": "James Stegen" - }, - "processing_institution": "JGI", - "type": "nmdc:OmicsProcessing", - "gold_sequencing_project_identifiers": [ - "gold:Gp0127635" - ], - "downstream_workflow_activity_records": [ + ], + "material_sample_set": [], + "material_sampling_activity_set": [], + "metabolomics_analysis_activity_set": [], + "metagenome_annotation_activity_set": [ { "_id": { - "$oid": "649b009d6bdd4fd20273c860" + "$oid": "649b005bbf2caae0415ef9ae" }, "has_input": [ - "nmdc:1a16fdf096087338922b288165a924b8" + "nmdc:b5094d52c6d48836de0aac261c622868" ], "part_of": [ - "nmdc:mga0ak4p20" + "nmdc:mga0b6cy30" ], "git_url": "https://github.com/microbiomedata/metaG/releases/tag/0.1", "has_output": [ - "nmdc:f8bc16e232f7ba0f6d6b5ca35a708c36", - "nmdc:fbc260443529d6e8067efdac3b58a8c1" + "nmdc:42989e75458691fbd17e537582c56d5e", + "nmdc:09240a6d1afc5f8b965a80a64aa96ef4", + "nmdc:c595237698baaf882fdeeac92f1b02be", + "nmdc:cd87df7a80ed03eef7d9923b9e9621e4", + "nmdc:57053d5594bb80495014664df22b0bb0", + "nmdc:3c82ee6a19674bd5abd4072cb137d96f", + "nmdc:c9bf48d6c88b3db0f431a08d93873c4a", + "nmdc:2475726e21bd8369f76d529f55f21a3f", + "nmdc:5698830d572ddc4e35a5f6642da7981a", + "nmdc:18cdb0f987a2d417d0a39a685e435729", + "nmdc:b34e4d1823bd5cd88aa42832d10b3431", + "nmdc:dc544f4796d49c520372e1872c5aea49" ], - "was_informed_by": "gold:Gp0127635", - "input_read_count": 25320866, - "output_read_bases": 3673182178, - "id": "nmdc:80cd0785782060a937ca17d1b417b0b6", + "was_informed_by": "gold:Gp0127632", + "id": "nmdc:2d5f97b7f6c0f385d6185e05ac989b1e", "execution_resource": "NERSC-Cori", - "input_read_bases": 3823450766, - "name": "Read QC Activity for nmdc:mga0ak4p20", - "output_read_count": 24600396, - "started_at_time": "2021-10-11T02:26:59Z", - "type": "nmdc:ReadQCAnalysisActivity", - "ended_at_time": "2021-10-11T04:11:48+00:00", - "output_data_objects": [ - { - "name": "Gp0127635_Filtered Reads", - "description": "Filtered Reads for Gp0127635", - "data_object_type": "Filtered Sequencing Reads", - "url": "https://data.microbiomedata.org/data/nmdc:mga0ak4p20/qa/nmdc_mga0ak4p20_filtered.fastq.gz", - "md5_checksum": "f8bc16e232f7ba0f6d6b5ca35a708c36", - "id": "nmdc:f8bc16e232f7ba0f6d6b5ca35a708c36", - "file_size_bytes": 1951049105 - }, - { - "name": "Gp0127635_Filtered Stats", - "description": "Filtered Stats for Gp0127635", - "data_object_type": "QC Statistics", - "url": "https://data.microbiomedata.org/data/nmdc:mga0ak4p20/qa/nmdc_mga0ak4p20_filterStats.txt", - "md5_checksum": "fbc260443529d6e8067efdac3b58a8c1", - "id": "nmdc:fbc260443529d6e8067efdac3b58a8c1", - "file_size_bytes": 280 - } - ] - }, + "name": "Annotation Activity for nmdc:mga0b6cy30", + "started_at_time": "2021-10-11T02:23:42Z", + "type": "nmdc:MetagenomeAnnotationActivity", + "ended_at_time": "2021-10-11T04:08:32+00:00" + } + ], + "metagenome_assembly_set": [ { "_id": { - "$oid": "649b009bff710ae353f8cf25" + "$oid": "649b005f2ca5ee4adb139f9b" }, "has_input": [ - "nmdc:f8bc16e232f7ba0f6d6b5ca35a708c36" + "nmdc:a43bfb55389206c2fc5ddb53e6aa2bc6" + ], + "part_of": [ + "nmdc:mga0b6cy30" ], + "ctg_logsum": 81568, + "scaf_logsum": 81839, + "gap_pct": 0.00096, "git_url": "https://github.com/microbiomedata/metaG/releases/tag/0.1", "has_output": [ - "nmdc:d8a410c52c8f6cf0097b674492cc3926", - "nmdc:ddec46781153da60da815c65871f5413", - "nmdc:e626ec18dba4885613240927cbb99d8b", - "nmdc:f8486e4ee029038a452a3484db10cabc", - "nmdc:4121f2ec52b80b7feb9d9a4749080125", - "nmdc:5b8c1cd8ba47041c20d3e18cb902a854", - "nmdc:59807dae5216b11c96df5593a26d9a88", - "nmdc:a491f6797bd7294dbc5ba301efb3466e", - "nmdc:6748020214a3d68ad588e3548107208e" + "nmdc:b5094d52c6d48836de0aac261c622868", + "nmdc:4d9d83ac8db218e6d0bd4f29801c3ce3", + "nmdc:f8fad4cf225943d8fddec3fa3402c53a", + "nmdc:52f130d084757d6e27177ed108e9e5bf", + "nmdc:9e5deaa9e7ac3f5f90d79b6520d39d53" ], - "was_informed_by": "gold:Gp0127635", - "id": "nmdc:80cd0785782060a937ca17d1b417b0b6", + "asm_score": 5.986, + "was_informed_by": "gold:Gp0127632", + "ctg_powsum": 9274.272, + "scaf_max": 23706, + "id": "nmdc:2d5f97b7f6c0f385d6185e05ac989b1e", + "scaf_powsum": 9304.689, "execution_resource": "NERSC-Cori", - "name": "ReadBased Analysis Activity for nmdc:mga0ak4p20", - "started_at_time": "2021-10-11T02:26:59Z", - "type": "nmdc:ReadbasedAnalysis", - "ended_at_time": "2021-10-11T04:11:48+00:00", - "output_data_objects": [ - { - "name": "Gp0127635_Gottcha2 TSV report", - "description": "Gottcha2 TSV report for Gp0127635", - "url": "https://data.microbiomedata.org/data/nmdc:mga0ak4p20/ReadbasedAnalysis/nmdc_mga0ak4p20_gottcha2_report.tsv", - "md5_checksum": "d8a410c52c8f6cf0097b674492cc3926", - "id": "nmdc:d8a410c52c8f6cf0097b674492cc3926", - "file_size_bytes": 3696 - }, - { - "name": "Gp0127635_Gottcha2 full TSV report", - "description": "Gottcha2 full TSV report for Gp0127635", - "url": "https://data.microbiomedata.org/data/nmdc:mga0ak4p20/ReadbasedAnalysis/nmdc_mga0ak4p20_gottcha2_report_full.tsv", - "md5_checksum": "ddec46781153da60da815c65871f5413", - "id": "nmdc:ddec46781153da60da815c65871f5413", - "file_size_bytes": 677459 - }, - { - "name": "Gp0127635_Gottcha2 Krona HTML report", - "description": "Gottcha2 Krona HTML report for Gp0127635", - "data_object_type": "GOTTCHA2 Krona Plot", - "url": "https://data.microbiomedata.org/data/nmdc:mga0ak4p20/ReadbasedAnalysis/nmdc_mga0ak4p20_gottcha2_krona.html", - "md5_checksum": "e626ec18dba4885613240927cbb99d8b", - "id": "nmdc:e626ec18dba4885613240927cbb99d8b", - "file_size_bytes": 236164 - }, - { - "name": "Gp0127635_Centrifuge classification TSV report", - "description": "Centrifuge classification TSV report for Gp0127635", - "data_object_type": "Centrifuge Taxonomic Classification", - "url": "https://data.microbiomedata.org/data/nmdc:mga0ak4p20/ReadbasedAnalysis/nmdc_mga0ak4p20_centrifuge_classification.tsv", - "md5_checksum": "f8486e4ee029038a452a3484db10cabc", - "id": "nmdc:f8486e4ee029038a452a3484db10cabc", - "file_size_bytes": 1796179546 - }, - { - "name": "Gp0127635_Centrifuge TSV report", - "description": "Centrifuge TSV report for Gp0127635", - "data_object_type": "Centrifuge Classification Report", - "url": "https://data.microbiomedata.org/data/nmdc:mga0ak4p20/ReadbasedAnalysis/nmdc_mga0ak4p20_centrifuge_report.tsv", - "md5_checksum": "4121f2ec52b80b7feb9d9a4749080125", - "id": "nmdc:4121f2ec52b80b7feb9d9a4749080125", - "file_size_bytes": 254661 - }, - { - "name": "Gp0127635_Centrifuge Krona HTML report", - "description": "Centrifuge Krona HTML report for Gp0127635", - "data_object_type": "Centrifuge Krona Plot", - "url": "https://data.microbiomedata.org/data/nmdc:mga0ak4p20/ReadbasedAnalysis/nmdc_mga0ak4p20_centrifuge_krona.html", - "md5_checksum": "5b8c1cd8ba47041c20d3e18cb902a854", - "id": "nmdc:5b8c1cd8ba47041c20d3e18cb902a854", - "file_size_bytes": 2333534 - }, - { - "name": "Gp0127635_Kraken classification TSV report", - "description": "Kraken classification TSV report for Gp0127635", - "data_object_type": "Kraken2 Taxonomic Classification", - "url": "https://data.microbiomedata.org/data/nmdc:mga0ak4p20/ReadbasedAnalysis/nmdc_mga0ak4p20_kraken2_classification.tsv", - "md5_checksum": "59807dae5216b11c96df5593a26d9a88", - "id": "nmdc:59807dae5216b11c96df5593a26d9a88", - "file_size_bytes": 1432249556 - }, - { - "name": "Gp0127635_Kraken2 TSV report", - "description": "Kraken2 TSV report for Gp0127635", - "data_object_type": "Kraken2 Classification Report", - "url": "https://data.microbiomedata.org/data/nmdc:mga0ak4p20/ReadbasedAnalysis/nmdc_mga0ak4p20_kraken2_report.tsv", - "md5_checksum": "a491f6797bd7294dbc5ba301efb3466e", - "id": "nmdc:a491f6797bd7294dbc5ba301efb3466e", - "file_size_bytes": 639738 - }, - { - "name": "Gp0127635_Kraken2 Krona HTML report", - "description": "Kraken2 Krona HTML report for Gp0127635", - "data_object_type": "Kraken2 Krona Plot", - "url": "https://data.microbiomedata.org/data/nmdc:mga0ak4p20/ReadbasedAnalysis/nmdc_mga0ak4p20_kraken2_krona.html", - "md5_checksum": "6748020214a3d68ad588e3548107208e", - "id": "nmdc:6748020214a3d68ad588e3548107208e", - "file_size_bytes": 3996293 - } + "contigs": 132499, + "name": "Assembly Activity for nmdc:mga0b6cy30", + "ctg_max": 23706, + "gc_std": 0.09103, + "contig_bp": 54959738, + "gc_avg": 0.61354, + "started_at_time": "2021-10-11T02:23:42Z", + "scaf_bp": 54960268, + "type": "nmdc:MetagenomeAssembly", + "scaffolds": 132455, + "ended_at_time": "2021-10-11T04:08:32+00:00", + "ctg_l50": 372, + "ctg_l90": 285, + "ctg_n50": 43541, + "ctg_n90": 113564, + "scaf_l50": 372, + "scaf_l90": 285, + "scaf_n50": 43524, + "scaf_n90": 113522 + } + ], + "metagenome_sequencing_activity_set": [], + "metaproteomics_analysis_activity_set": [], + "metatranscriptome_activity_set": [], + "nom_analysis_activity_set": [], + "omics_processing_set": [ + { + "_id": { + "$oid": "649b009773e8249959349b44" + }, + "id": "nmdc:omprc-11-8qms8262", + "name": "Riverbed sediment microbial communities from areas with vegetation nearby in Columbia River, Washington, USA - GW-RW S3_40_50", + "description": "Riverbed sediment samples were collected from an area with a lot of surrounding vegetation in a hyporheic zone (subsurface groundwater - surface water mixing zone)", + "has_input": [ + "nmdc:bsm-11-0xprxw22" + ], + "has_output": [ + "jgi:574fde807ded5e3df1ee141b" + ], + "part_of": [ + "nmdc:sty-11-aygzgv51" + ], + "add_date": "2016-01-11", + "mod_date": "2021-06-15", + "ncbi_project_name": "Riverbed sediment microbial communities from areas with vegetation nearby in Columbia River, Washington, USA - GW-RW S3_40_50", + "omics_type": { + "has_raw_value": "Metagenome" + }, + "principal_investigator": { + "has_raw_value": "James Stegen" + }, + "processing_institution": "JGI", + "type": "nmdc:OmicsProcessing", + "gold_sequencing_project_identifiers": [ + "gold:Gp0127632" ] - }, + } + ], + "reaction_activity_set": [], + "read_qc_analysis_activity_set": [ { "_id": { - "$oid": "61e71936833bcf838a6ffdfc" + "$oid": "649b009d6bdd4fd20273c868" }, "has_input": [ - "nmdc:f8bc16e232f7ba0f6d6b5ca35a708c36" + "nmdc:5cbd7ceb39903cbded77b36ae866fe9f" ], "part_of": [ - "nmdc:mga0ak4p20" + "nmdc:mga0b6cy30" ], "git_url": "https://github.com/microbiomedata/metaG/releases/tag/0.1", "has_output": [ - "nmdc:d8a410c52c8f6cf0097b674492cc3926", - "nmdc:ddec46781153da60da815c65871f5413", - "nmdc:e626ec18dba4885613240927cbb99d8b", - "nmdc:f8486e4ee029038a452a3484db10cabc", - "nmdc:4121f2ec52b80b7feb9d9a4749080125", - "nmdc:5b8c1cd8ba47041c20d3e18cb902a854", - "nmdc:59807dae5216b11c96df5593a26d9a88", - "nmdc:a491f6797bd7294dbc5ba301efb3466e", - "nmdc:6748020214a3d68ad588e3548107208e" + "nmdc:a43bfb55389206c2fc5ddb53e6aa2bc6", + "nmdc:919c5aade4fffb76f743a33b035b2839" ], - "was_informed_by": "gold:Gp0127635", - "id": "nmdc:80cd0785782060a937ca17d1b417b0b6", + "was_informed_by": "gold:Gp0127632", + "input_read_count": 27906294, + "output_read_bases": 3905482172, + "id": "nmdc:2d5f97b7f6c0f385d6185e05ac989b1e", "execution_resource": "NERSC-Cori", - "name": "ReadBased Analysis Activity for nmdc:mga0ak4p20", - "started_at_time": "2021-10-11T02:26:59Z", - "type": "nmdc:ReadbasedAnalysis", - "ended_at_time": "2021-10-11T04:11:48+00:00", - "output_data_objects": [ - { - "name": "Gp0127635_Gottcha2 TSV report", - "description": "Gottcha2 TSV report for Gp0127635", - "url": "https://data.microbiomedata.org/data/nmdc:mga0ak4p20/ReadbasedAnalysis/nmdc_mga0ak4p20_gottcha2_report.tsv", - "md5_checksum": "d8a410c52c8f6cf0097b674492cc3926", - "id": "nmdc:d8a410c52c8f6cf0097b674492cc3926", - "file_size_bytes": 3696 - }, - { - "name": "Gp0127635_Gottcha2 full TSV report", - "description": "Gottcha2 full TSV report for Gp0127635", - "url": "https://data.microbiomedata.org/data/nmdc:mga0ak4p20/ReadbasedAnalysis/nmdc_mga0ak4p20_gottcha2_report_full.tsv", - "md5_checksum": "ddec46781153da60da815c65871f5413", - "id": "nmdc:ddec46781153da60da815c65871f5413", - "file_size_bytes": 677459 - }, - { - "name": "Gp0127635_Gottcha2 Krona HTML report", - "description": "Gottcha2 Krona HTML report for Gp0127635", - "data_object_type": "GOTTCHA2 Krona Plot", - "url": "https://data.microbiomedata.org/data/nmdc:mga0ak4p20/ReadbasedAnalysis/nmdc_mga0ak4p20_gottcha2_krona.html", - "md5_checksum": "e626ec18dba4885613240927cbb99d8b", - "id": "nmdc:e626ec18dba4885613240927cbb99d8b", - "file_size_bytes": 236164 - }, - { - "name": "Gp0127635_Centrifuge classification TSV report", - "description": "Centrifuge classification TSV report for Gp0127635", - "data_object_type": "Centrifuge Taxonomic Classification", - "url": "https://data.microbiomedata.org/data/nmdc:mga0ak4p20/ReadbasedAnalysis/nmdc_mga0ak4p20_centrifuge_classification.tsv", - "md5_checksum": "f8486e4ee029038a452a3484db10cabc", - "id": "nmdc:f8486e4ee029038a452a3484db10cabc", - "file_size_bytes": 1796179546 - }, - { - "name": "Gp0127635_Centrifuge TSV report", - "description": "Centrifuge TSV report for Gp0127635", - "data_object_type": "Centrifuge Classification Report", - "url": "https://data.microbiomedata.org/data/nmdc:mga0ak4p20/ReadbasedAnalysis/nmdc_mga0ak4p20_centrifuge_report.tsv", - "md5_checksum": "4121f2ec52b80b7feb9d9a4749080125", - "id": "nmdc:4121f2ec52b80b7feb9d9a4749080125", - "file_size_bytes": 254661 - }, - { - "name": "Gp0127635_Centrifuge Krona HTML report", - "description": "Centrifuge Krona HTML report for Gp0127635", - "data_object_type": "Centrifuge Krona Plot", - "url": "https://data.microbiomedata.org/data/nmdc:mga0ak4p20/ReadbasedAnalysis/nmdc_mga0ak4p20_centrifuge_krona.html", - "md5_checksum": "5b8c1cd8ba47041c20d3e18cb902a854", - "id": "nmdc:5b8c1cd8ba47041c20d3e18cb902a854", - "file_size_bytes": 2333534 - }, - { - "name": "Gp0127635_Kraken classification TSV report", - "description": "Kraken classification TSV report for Gp0127635", - "data_object_type": "Kraken2 Taxonomic Classification", - "url": "https://data.microbiomedata.org/data/nmdc:mga0ak4p20/ReadbasedAnalysis/nmdc_mga0ak4p20_kraken2_classification.tsv", - "md5_checksum": "59807dae5216b11c96df5593a26d9a88", - "id": "nmdc:59807dae5216b11c96df5593a26d9a88", - "file_size_bytes": 1432249556 - }, - { - "name": "Gp0127635_Kraken2 TSV report", - "description": "Kraken2 TSV report for Gp0127635", - "data_object_type": "Kraken2 Classification Report", - "url": "https://data.microbiomedata.org/data/nmdc:mga0ak4p20/ReadbasedAnalysis/nmdc_mga0ak4p20_kraken2_report.tsv", - "md5_checksum": "a491f6797bd7294dbc5ba301efb3466e", - "id": "nmdc:a491f6797bd7294dbc5ba301efb3466e", - "file_size_bytes": 639738 - }, - { - "name": "Gp0127635_Kraken2 Krona HTML report", - "description": "Kraken2 Krona HTML report for Gp0127635", - "data_object_type": "Kraken2 Krona Plot", - "url": "https://data.microbiomedata.org/data/nmdc:mga0ak4p20/ReadbasedAnalysis/nmdc_mga0ak4p20_kraken2_krona.html", - "md5_checksum": "6748020214a3d68ad588e3548107208e", - "id": "nmdc:6748020214a3d68ad588e3548107208e", - "file_size_bytes": 3996293 - } - ] - }, + "input_read_bases": 4213850394, + "name": "Read QC Activity for nmdc:mga0b6cy30", + "output_read_count": 26116440, + "started_at_time": "2021-10-11T02:23:42Z", + "type": "nmdc:ReadQCAnalysisActivity", + "ended_at_time": "2021-10-11T04:08:32+00:00" + } + ], + "read_based_taxonomy_analysis_activity_set": [ { "_id": { - "$oid": "649b005f2ca5ee4adb139f90" + "$oid": "649b009bff710ae353f8cf31" }, "has_input": [ - "nmdc:f8bc16e232f7ba0f6d6b5ca35a708c36" - ], - "part_of": [ - "nmdc:mga0ak4p20" + "nmdc:a43bfb55389206c2fc5ddb53e6aa2bc6" ], - "ctg_logsum": 269360, - "scaf_logsum": 270403, - "gap_pct": 0.00195, "git_url": "https://github.com/microbiomedata/metaG/releases/tag/0.1", "has_output": [ - "nmdc:3d1b5043e0c49ac6062aeba4ebbba910", - "nmdc:4d4497f63f95f7d2f8986178dab3ae52", - "nmdc:ac98d3d128ec5b045a9ef019a5653b99", - "nmdc:1d0302bec371a73f040d052f4b66277c", - "nmdc:2d8cca230f439e38f1e628666e40e013" + "nmdc:3e583cccbbc068e0879ba6618bb6407c", + "nmdc:6c54105711e818c4d8169ab595b05efe", + "nmdc:adb155cdb656648496484998a62fb96f", + "nmdc:0a03ac5737750a3b336e7299e9f01ead", + "nmdc:f345b3a57c37097a860e38d5e83835b8", + "nmdc:c1f4471d943b284720a8becb5a2e32b4", + "nmdc:50cfcfc5d0d89245b8370abf6bfef23c", + "nmdc:a8dd7aa20043510158ad3b2bbe961b42", + "nmdc:e350fda9bd0651755171d79b413b8da3" ], - "asm_score": 3.934, - "was_informed_by": "gold:Gp0127635", - "ctg_powsum": 29422, - "scaf_max": 23775, - "id": "nmdc:80cd0785782060a937ca17d1b417b0b6", - "scaf_powsum": 29544, + "was_informed_by": "gold:Gp0127632", + "id": "nmdc:2d5f97b7f6c0f385d6185e05ac989b1e", "execution_resource": "NERSC-Cori", - "contigs": 206757, - "name": "Assembly Activity for nmdc:mga0ak4p20", - "ctg_max": 23775, - "gc_std": 0.10033, - "contig_bp": 103842002, - "gc_avg": 0.61621, - "started_at_time": "2021-10-11T02:26:59Z", - "scaf_bp": 103844032, - "type": "nmdc:MetagenomeAssembly", - "scaffolds": 206599, - "ended_at_time": "2021-10-11T04:11:48+00:00", - "ctg_l50": 496, - "ctg_l90": 290, - "ctg_n50": 55322, - "ctg_n90": 171862, - "scaf_l50": 497, - "scaf_l90": 290, - "scaf_n50": 55067, - "scaf_n90": 171721, - "output_data_objects": [ - { - "name": "Gp0127635_Assembled contigs fasta", - "description": "Assembled contigs fasta for Gp0127635", - "data_object_type": "Assembly Contigs", - "url": "https://data.microbiomedata.org/data/nmdc:mga0ak4p20/assembly/nmdc_mga0ak4p20_contigs.fna", - "md5_checksum": "3d1b5043e0c49ac6062aeba4ebbba910", - "id": "nmdc:3d1b5043e0c49ac6062aeba4ebbba910", - "file_size_bytes": 111964628 - }, - { - "name": "Gp0127635_Assembled scaffold fasta", - "description": "Assembled scaffold fasta for Gp0127635", - "data_object_type": "Assembly Scaffolds", - "url": "https://data.microbiomedata.org/data/nmdc:mga0ak4p20/assembly/nmdc_mga0ak4p20_scaffolds.fna", - "md5_checksum": "4d4497f63f95f7d2f8986178dab3ae52", - "id": "nmdc:4d4497f63f95f7d2f8986178dab3ae52", - "file_size_bytes": 111342667 - }, - { - "name": "Gp0127635_Metagenome Contig Coverage Stats", - "description": "Metagenome Contig Coverage Stats for Gp0127635", - "url": "https://data.microbiomedata.org/data/nmdc:mga0ak4p20/assembly/nmdc_mga0ak4p20_covstats.txt", - "md5_checksum": "ac98d3d128ec5b045a9ef019a5653b99", - "id": "nmdc:ac98d3d128ec5b045a9ef019a5653b99", - "file_size_bytes": 16397988 - }, - { - "name": "Gp0127635_Assembled AGP file", - "description": "Assembled AGP file for Gp0127635", - "data_object_type": "Assembly AGP", - "url": "https://data.microbiomedata.org/data/nmdc:mga0ak4p20/assembly/nmdc_mga0ak4p20_assembly.agp", - "md5_checksum": "1d0302bec371a73f040d052f4b66277c", - "id": "nmdc:1d0302bec371a73f040d052f4b66277c", - "file_size_bytes": 15325341 - }, - { - "name": "Gp0127635_Metagenome Alignment BAM file", - "description": "Metagenome Alignment BAM file for Gp0127635", - "data_object_type": "Assembly Coverage BAM", - "url": "https://data.microbiomedata.org/data/nmdc:mga0ak4p20/assembly/nmdc_mga0ak4p20_pairedMapped_sorted.bam", - "md5_checksum": "2d8cca230f439e38f1e628666e40e013", - "id": "nmdc:2d8cca230f439e38f1e628666e40e013", - "file_size_bytes": 2159251548 - } - ] - }, + "name": "ReadBased Analysis Activity for nmdc:mga0b6cy30", + "started_at_time": "2021-10-11T02:23:42Z", + "type": "nmdc:ReadbasedAnalysis", + "ended_at_time": "2021-10-11T04:08:32+00:00" + } + ], + "study_set": [], + "field_research_site_set": [], + "collecting_biosamples_from_site_set": [], + "date_created": null, + "etl_software_version": null, + "pooling_set": [], + "read_based_analysis_activity_set": [ { "_id": { - "$oid": "649b005bbf2caae0415ef9a7" + "$oid": "61e7195d833bcf838a700521" }, "has_input": [ - "nmdc:3d1b5043e0c49ac6062aeba4ebbba910" + "nmdc:a43bfb55389206c2fc5ddb53e6aa2bc6" ], "part_of": [ - "nmdc:mga0ak4p20" + "nmdc:mga0b6cy30" ], "git_url": "https://github.com/microbiomedata/metaG/releases/tag/0.1", "has_output": [ - "nmdc:bb7eae2b3dbc58168b9122098f078bb5", - "nmdc:2af7f6c008858f2f0d47c00fa9758129", - "nmdc:dd3668477e39a65243179dfb9e4bf26e", - "nmdc:be0e9a5999ddfd46bf5daac56aa96b86", - "nmdc:95a6a1f91bf18bc1a781a8890d2e1bc5", - "nmdc:6960907313875913a789e1fda46ed34e", - "nmdc:033da43cdca9f81ed2270a9094fdb065", - "nmdc:e9603ffd918db8a21df1310b890315ff", - "nmdc:fd98e0cfe1f4ca7b9e4af833c5ef199c", - "nmdc:03481d99958ae1c9dcccb8fd91c0bbf7", - "nmdc:f0a96fb57947358a42053e9fb7134e70", - "nmdc:9737b61f2e6e923ac662e0a1c4f6aaa9" + "nmdc:3e583cccbbc068e0879ba6618bb6407c", + "nmdc:6c54105711e818c4d8169ab595b05efe", + "nmdc:adb155cdb656648496484998a62fb96f", + "nmdc:0a03ac5737750a3b336e7299e9f01ead", + "nmdc:f345b3a57c37097a860e38d5e83835b8", + "nmdc:c1f4471d943b284720a8becb5a2e32b4", + "nmdc:50cfcfc5d0d89245b8370abf6bfef23c", + "nmdc:a8dd7aa20043510158ad3b2bbe961b42", + "nmdc:e350fda9bd0651755171d79b413b8da3" ], - "was_informed_by": "gold:Gp0127635", - "id": "nmdc:80cd0785782060a937ca17d1b417b0b6", + "was_informed_by": "gold:Gp0127632", + "id": "nmdc:2d5f97b7f6c0f385d6185e05ac989b1e", "execution_resource": "NERSC-Cori", - "name": "Annotation Activity for nmdc:mga0ak4p20", - "started_at_time": "2021-10-11T02:26:59Z", - "type": "nmdc:MetagenomeAnnotationActivity", - "ended_at_time": "2021-10-11T04:11:48+00:00", - "output_data_objects": [ - { - "name": "Gp0127635_Protein FAA", - "description": "Protein FAA for Gp0127635", - "data_object_type": "Annotation Amino Acid FASTA", - "url": "https://data.microbiomedata.org/data/nmdc:mga0ak4p20/annotation/nmdc_mga0ak4p20_proteins.faa", - "md5_checksum": "bb7eae2b3dbc58168b9122098f078bb5", - "id": "nmdc:bb7eae2b3dbc58168b9122098f078bb5", - "file_size_bytes": 63157189 - }, - { - "name": "Gp0127635_Structural annotation GFF file", - "description": "Structural annotation GFF file for Gp0127635", - "data_object_type": "Structural Annotation GFF", - "url": "https://data.microbiomedata.org/data/nmdc:mga0ak4p20/annotation/nmdc_mga0ak4p20_structural_annotation.gff", - "md5_checksum": "2af7f6c008858f2f0d47c00fa9758129", - "id": "nmdc:2af7f6c008858f2f0d47c00fa9758129", - "file_size_bytes": 2526 - }, - { - "name": "Gp0127635_Functional annotation GFF file", - "description": "Functional annotation GFF file for Gp0127635", - "data_object_type": "Functional Annotation GFF", - "url": "https://data.microbiomedata.org/data/nmdc:mga0ak4p20/annotation/nmdc_mga0ak4p20_functional_annotation.gff", - "md5_checksum": "dd3668477e39a65243179dfb9e4bf26e", - "id": "nmdc:dd3668477e39a65243179dfb9e4bf26e", - "file_size_bytes": 71092075 - }, - { - "name": "Gp0127635_KO TSV file", - "description": "KO TSV file for Gp0127635", - "data_object_type": "Annotation KEGG Orthology", - "url": "https://data.microbiomedata.org/data/nmdc:mga0ak4p20/annotation/nmdc_mga0ak4p20_ko.tsv", - "md5_checksum": "be0e9a5999ddfd46bf5daac56aa96b86", - "id": "nmdc:be0e9a5999ddfd46bf5daac56aa96b86", - "file_size_bytes": 8023056 - }, - { - "name": "Gp0127635_EC TSV file", - "description": "EC TSV file for Gp0127635", - "data_object_type": "Annotation Enzyme Commission", - "url": "https://data.microbiomedata.org/data/nmdc:mga0ak4p20/annotation/nmdc_mga0ak4p20_ec.tsv", - "md5_checksum": "95a6a1f91bf18bc1a781a8890d2e1bc5", - "id": "nmdc:95a6a1f91bf18bc1a781a8890d2e1bc5", - "file_size_bytes": 5303502 - }, - { - "name": "Gp0127635_COG GFF file", - "description": "COG GFF file for Gp0127635", - "url": "https://data.microbiomedata.org/data/nmdc:mga0ak4p20/annotation/nmdc_mga0ak4p20_cog.gff", - "md5_checksum": "6960907313875913a789e1fda46ed34e", - "id": "nmdc:6960907313875913a789e1fda46ed34e", - "file_size_bytes": 42106254 - }, - { - "name": "Gp0127635_PFAM GFF file", - "description": "PFAM GFF file for Gp0127635", - "url": "https://data.microbiomedata.org/data/nmdc:mga0ak4p20/annotation/nmdc_mga0ak4p20_pfam.gff", - "md5_checksum": "033da43cdca9f81ed2270a9094fdb065", - "id": "nmdc:033da43cdca9f81ed2270a9094fdb065", - "file_size_bytes": 31806020 - }, - { - "name": "Gp0127635_TigrFam GFF file", - "description": "TigrFam GFF file for Gp0127635", - "url": "https://data.microbiomedata.org/data/nmdc:mga0ak4p20/annotation/nmdc_mga0ak4p20_tigrfam.gff", - "md5_checksum": "e9603ffd918db8a21df1310b890315ff", - "id": "nmdc:e9603ffd918db8a21df1310b890315ff", - "file_size_bytes": 3500524 - }, - { - "name": "Gp0127635_SMART GFF file", - "description": "SMART GFF file for Gp0127635", - "url": "https://data.microbiomedata.org/data/nmdc:mga0ak4p20/annotation/nmdc_mga0ak4p20_smart.gff", - "md5_checksum": "fd98e0cfe1f4ca7b9e4af833c5ef199c", - "id": "nmdc:fd98e0cfe1f4ca7b9e4af833c5ef199c", - "file_size_bytes": 9346082 - }, - { - "name": "Gp0127635_SuperFam GFF file", - "description": "SuperFam GFF file for Gp0127635", - "url": "https://data.microbiomedata.org/data/nmdc:mga0ak4p20/annotation/nmdc_mga0ak4p20_supfam.gff", - "md5_checksum": "03481d99958ae1c9dcccb8fd91c0bbf7", - "id": "nmdc:03481d99958ae1c9dcccb8fd91c0bbf7", - "file_size_bytes": 52582333 - }, - { - "name": "Gp0127635_Cath FunFam GFF file", - "description": "Cath FunFam GFF file for Gp0127635", - "url": "https://data.microbiomedata.org/data/nmdc:mga0ak4p20/annotation/nmdc_mga0ak4p20_cath_funfam.gff", - "md5_checksum": "f0a96fb57947358a42053e9fb7134e70", - "id": "nmdc:f0a96fb57947358a42053e9fb7134e70", - "file_size_bytes": 40179818 - }, - { - "name": "Gp0127635_KO_EC GFF file", - "description": "KO_EC GFF file for Gp0127635", - "url": "https://data.microbiomedata.org/data/nmdc:mga0ak4p20/annotation/nmdc_mga0ak4p20_ko_ec.gff", - "md5_checksum": "9737b61f2e6e923ac662e0a1c4f6aaa9", - "id": "nmdc:9737b61f2e6e923ac662e0a1c4f6aaa9", - "file_size_bytes": 25482964 - } - ] + "name": "ReadBased Analysis Activity for nmdc:mga0b6cy30", + "started_at_time": "2021-10-11T02:23:42Z", + "type": "nmdc:ReadbasedAnalysis", + "ended_at_time": "2021-10-11T04:08:32+00:00" + } + ] + }, + { + "functional_annotation_agg": [], + "library_preparation_set": [], + "processed_sample_set": [], + "extraction_set": [], + "activity_set": [], + "biosample_set": [], + "data_object_set": [ + { + "name": "Gp0127636_Filtered Reads", + "description": "Filtered Reads for Gp0127636", + "data_object_type": "Filtered Sequencing Reads", + "url": "https://data.microbiomedata.org/data/nmdc:mga02tph39/qa/nmdc_mga02tph39_filtered.fastq.gz", + "md5_checksum": "e4f5675c728fd1896682eb669656b5d6", + "id": "nmdc:e4f5675c728fd1896682eb669656b5d6", + "file_size_bytes": 2463342132 + }, + { + "name": "Gp0127636_Filtered Stats", + "description": "Filtered Stats for Gp0127636", + "data_object_type": "QC Statistics", + "url": "https://data.microbiomedata.org/data/nmdc:mga02tph39/qa/nmdc_mga02tph39_filterStats.txt", + "md5_checksum": "64f455185b1bc610a8d74a84ed12683f", + "id": "nmdc:64f455185b1bc610a8d74a84ed12683f", + "file_size_bytes": 293 + }, + { + "name": "Gp0127636_Gottcha2 TSV report", + "description": "Gottcha2 TSV report for Gp0127636", + "url": "https://data.microbiomedata.org/data/nmdc:mga02tph39/ReadbasedAnalysis/nmdc_mga02tph39_gottcha2_report.tsv", + "md5_checksum": "50d80a30d4ff113e36f6fd64b1f28547", + "id": "nmdc:50d80a30d4ff113e36f6fd64b1f28547", + "file_size_bytes": 5547 + }, + { + "name": "Gp0127636_Gottcha2 full TSV report", + "description": "Gottcha2 full TSV report for Gp0127636", + "url": "https://data.microbiomedata.org/data/nmdc:mga02tph39/ReadbasedAnalysis/nmdc_mga02tph39_gottcha2_report_full.tsv", + "md5_checksum": "c2cd20a2011592a76397f49dc3acd6b7", + "id": "nmdc:c2cd20a2011592a76397f49dc3acd6b7", + "file_size_bytes": 965042 + }, + { + "name": "Gp0127636_Gottcha2 Krona HTML report", + "description": "Gottcha2 Krona HTML report for Gp0127636", + "data_object_type": "GOTTCHA2 Krona Plot", + "url": "https://data.microbiomedata.org/data/nmdc:mga02tph39/ReadbasedAnalysis/nmdc_mga02tph39_gottcha2_krona.html", + "md5_checksum": "827ad863c875ea14473c9903d192fa73", + "id": "nmdc:827ad863c875ea14473c9903d192fa73", + "file_size_bytes": 242495 + }, + { + "name": "Gp0127636_Centrifuge classification TSV report", + "description": "Centrifuge classification TSV report for Gp0127636", + "data_object_type": "Centrifuge Taxonomic Classification", + "url": "https://data.microbiomedata.org/data/nmdc:mga02tph39/ReadbasedAnalysis/nmdc_mga02tph39_centrifuge_classification.tsv", + "md5_checksum": "957074ca49765b22348e27b0133d8ba0", + "id": "nmdc:957074ca49765b22348e27b0133d8ba0", + "file_size_bytes": 2151939041 + }, + { + "name": "Gp0127636_Centrifuge TSV report", + "description": "Centrifuge TSV report for Gp0127636", + "data_object_type": "Centrifuge Classification Report", + "url": "https://data.microbiomedata.org/data/nmdc:mga02tph39/ReadbasedAnalysis/nmdc_mga02tph39_centrifuge_report.tsv", + "md5_checksum": "9253645582296696cb33b11754832574", + "id": "nmdc:9253645582296696cb33b11754832574", + "file_size_bytes": 257932 + }, + { + "name": "Gp0127636_Centrifuge Krona HTML report", + "description": "Centrifuge Krona HTML report for Gp0127636", + "data_object_type": "Centrifuge Krona Plot", + "url": "https://data.microbiomedata.org/data/nmdc:mga02tph39/ReadbasedAnalysis/nmdc_mga02tph39_centrifuge_krona.html", + "md5_checksum": "9aef1d9e04acfe0b7fb1b9dc3b842912", + "id": "nmdc:9aef1d9e04acfe0b7fb1b9dc3b842912", + "file_size_bytes": 2335219 + }, + { + "name": "Gp0127636_Kraken classification TSV report", + "description": "Kraken classification TSV report for Gp0127636", + "data_object_type": "Kraken2 Taxonomic Classification", + "url": "https://data.microbiomedata.org/data/nmdc:mga02tph39/ReadbasedAnalysis/nmdc_mga02tph39_kraken2_classification.tsv", + "md5_checksum": "75180fce38f38a6307231b47a8d2b23b", + "id": "nmdc:75180fce38f38a6307231b47a8d2b23b", + "file_size_bytes": 1746049273 + }, + { + "name": "Gp0127636_Kraken2 TSV report", + "description": "Kraken2 TSV report for Gp0127636", + "data_object_type": "Kraken2 Classification Report", + "url": "https://data.microbiomedata.org/data/nmdc:mga02tph39/ReadbasedAnalysis/nmdc_mga02tph39_kraken2_report.tsv", + "md5_checksum": "b4524a34937893768dbd3752068dee0c", + "id": "nmdc:b4524a34937893768dbd3752068dee0c", + "file_size_bytes": 660975 + }, + { + "name": "Gp0127636_Kraken2 Krona HTML report", + "description": "Kraken2 Krona HTML report for Gp0127636", + "data_object_type": "Kraken2 Krona Plot", + "url": "https://data.microbiomedata.org/data/nmdc:mga02tph39/ReadbasedAnalysis/nmdc_mga02tph39_kraken2_krona.html", + "md5_checksum": "f1543441c59aaaf8ec52036a5bbbe3f4", + "id": "nmdc:f1543441c59aaaf8ec52036a5bbbe3f4", + "file_size_bytes": 4020978 + }, + { + "name": "Gp0127636_Gottcha2 TSV report", + "description": "Gottcha2 TSV report for Gp0127636", + "url": "https://data.microbiomedata.org/data/nmdc:mga02tph39/ReadbasedAnalysis/nmdc_mga02tph39_gottcha2_report.tsv", + "md5_checksum": "50d80a30d4ff113e36f6fd64b1f28547", + "id": "nmdc:50d80a30d4ff113e36f6fd64b1f28547", + "file_size_bytes": 5547 + }, + { + "name": "Gp0127636_Gottcha2 full TSV report", + "description": "Gottcha2 full TSV report for Gp0127636", + "url": "https://data.microbiomedata.org/data/nmdc:mga02tph39/ReadbasedAnalysis/nmdc_mga02tph39_gottcha2_report_full.tsv", + "md5_checksum": "c2cd20a2011592a76397f49dc3acd6b7", + "id": "nmdc:c2cd20a2011592a76397f49dc3acd6b7", + "file_size_bytes": 965042 + }, + { + "name": "Gp0127636_Gottcha2 Krona HTML report", + "description": "Gottcha2 Krona HTML report for Gp0127636", + "data_object_type": "GOTTCHA2 Krona Plot", + "url": "https://data.microbiomedata.org/data/nmdc:mga02tph39/ReadbasedAnalysis/nmdc_mga02tph39_gottcha2_krona.html", + "md5_checksum": "827ad863c875ea14473c9903d192fa73", + "id": "nmdc:827ad863c875ea14473c9903d192fa73", + "file_size_bytes": 242495 + }, + { + "name": "Gp0127636_Centrifuge classification TSV report", + "description": "Centrifuge classification TSV report for Gp0127636", + "data_object_type": "Centrifuge Taxonomic Classification", + "url": "https://data.microbiomedata.org/data/nmdc:mga02tph39/ReadbasedAnalysis/nmdc_mga02tph39_centrifuge_classification.tsv", + "md5_checksum": "957074ca49765b22348e27b0133d8ba0", + "id": "nmdc:957074ca49765b22348e27b0133d8ba0", + "file_size_bytes": 2151939041 + }, + { + "name": "Gp0127636_Centrifuge TSV report", + "description": "Centrifuge TSV report for Gp0127636", + "data_object_type": "Centrifuge Classification Report", + "url": "https://data.microbiomedata.org/data/nmdc:mga02tph39/ReadbasedAnalysis/nmdc_mga02tph39_centrifuge_report.tsv", + "md5_checksum": "9253645582296696cb33b11754832574", + "id": "nmdc:9253645582296696cb33b11754832574", + "file_size_bytes": 257932 + }, + { + "name": "Gp0127636_Centrifuge Krona HTML report", + "description": "Centrifuge Krona HTML report for Gp0127636", + "data_object_type": "Centrifuge Krona Plot", + "url": "https://data.microbiomedata.org/data/nmdc:mga02tph39/ReadbasedAnalysis/nmdc_mga02tph39_centrifuge_krona.html", + "md5_checksum": "9aef1d9e04acfe0b7fb1b9dc3b842912", + "id": "nmdc:9aef1d9e04acfe0b7fb1b9dc3b842912", + "file_size_bytes": 2335219 + }, + { + "name": "Gp0127636_Kraken classification TSV report", + "description": "Kraken classification TSV report for Gp0127636", + "data_object_type": "Kraken2 Taxonomic Classification", + "url": "https://data.microbiomedata.org/data/nmdc:mga02tph39/ReadbasedAnalysis/nmdc_mga02tph39_kraken2_classification.tsv", + "md5_checksum": "75180fce38f38a6307231b47a8d2b23b", + "id": "nmdc:75180fce38f38a6307231b47a8d2b23b", + "file_size_bytes": 1746049273 + }, + { + "name": "Gp0127636_Kraken2 TSV report", + "description": "Kraken2 TSV report for Gp0127636", + "data_object_type": "Kraken2 Classification Report", + "url": "https://data.microbiomedata.org/data/nmdc:mga02tph39/ReadbasedAnalysis/nmdc_mga02tph39_kraken2_report.tsv", + "md5_checksum": "b4524a34937893768dbd3752068dee0c", + "id": "nmdc:b4524a34937893768dbd3752068dee0c", + "file_size_bytes": 660975 + }, + { + "name": "Gp0127636_Kraken2 Krona HTML report", + "description": "Kraken2 Krona HTML report for Gp0127636", + "data_object_type": "Kraken2 Krona Plot", + "url": "https://data.microbiomedata.org/data/nmdc:mga02tph39/ReadbasedAnalysis/nmdc_mga02tph39_kraken2_krona.html", + "md5_checksum": "f1543441c59aaaf8ec52036a5bbbe3f4", + "id": "nmdc:f1543441c59aaaf8ec52036a5bbbe3f4", + "file_size_bytes": 4020978 }, + { + "name": "Gp0127636_Assembled contigs fasta", + "description": "Assembled contigs fasta for Gp0127636", + "data_object_type": "Assembly Contigs", + "url": "https://data.microbiomedata.org/data/nmdc:mga02tph39/assembly/nmdc_mga02tph39_contigs.fna", + "md5_checksum": "36692b7b93756aaabd7f1f6259753c4e", + "id": "nmdc:36692b7b93756aaabd7f1f6259753c4e", + "file_size_bytes": 39062008 + }, + { + "name": "Gp0127636_Assembled scaffold fasta", + "description": "Assembled scaffold fasta for Gp0127636", + "data_object_type": "Assembly Scaffolds", + "url": "https://data.microbiomedata.org/data/nmdc:mga02tph39/assembly/nmdc_mga02tph39_scaffolds.fna", + "md5_checksum": "8d02adf1319d5b95c2abc6ed5b5c1683", + "id": "nmdc:8d02adf1319d5b95c2abc6ed5b5c1683", + "file_size_bytes": 38774844 + }, + { + "name": "Gp0127636_Metagenome Contig Coverage Stats", + "description": "Metagenome Contig Coverage Stats for Gp0127636", + "url": "https://data.microbiomedata.org/data/nmdc:mga02tph39/assembly/nmdc_mga02tph39_covstats.txt", + "md5_checksum": "9830a711accd3a5ed899a2e616d0f4bf", + "id": "nmdc:9830a711accd3a5ed899a2e616d0f4bf", + "file_size_bytes": 7495949 + }, + { + "name": "Gp0127636_Assembled AGP file", + "description": "Assembled AGP file for Gp0127636", + "data_object_type": "Assembly AGP", + "url": "https://data.microbiomedata.org/data/nmdc:mga02tph39/assembly/nmdc_mga02tph39_assembly.agp", + "md5_checksum": "481fbd8cdeacd71e54a45c78d5decb36", + "id": "nmdc:481fbd8cdeacd71e54a45c78d5decb36", + "file_size_bytes": 6962527 + }, + { + "name": "Gp0127636_Metagenome Alignment BAM file", + "description": "Metagenome Alignment BAM file for Gp0127636", + "data_object_type": "Assembly Coverage BAM", + "url": "https://data.microbiomedata.org/data/nmdc:mga02tph39/assembly/nmdc_mga02tph39_pairedMapped_sorted.bam", + "md5_checksum": "a24edc9ffd773c30cea8ea709988307a", + "id": "nmdc:a24edc9ffd773c30cea8ea709988307a", + "file_size_bytes": 2624769069 + }, + { + "name": "Gp0127636_Protein FAA", + "description": "Protein FAA for Gp0127636", + "data_object_type": "Annotation Amino Acid FASTA", + "url": "https://data.microbiomedata.org/data/nmdc:mga02tph39/annotation/nmdc_mga02tph39_proteins.faa", + "md5_checksum": "a5d97f323fe7117cb38a2eea1f2246d2", + "id": "nmdc:a5d97f323fe7117cb38a2eea1f2246d2", + "file_size_bytes": 23469553 + }, + { + "name": "Gp0127636_Structural annotation GFF file", + "description": "Structural annotation GFF file for Gp0127636", + "data_object_type": "Structural Annotation GFF", + "url": "https://data.microbiomedata.org/data/nmdc:mga02tph39/annotation/nmdc_mga02tph39_structural_annotation.gff", + "md5_checksum": "2b791fb3e2964d7808388b32086e0de2", + "id": "nmdc:2b791fb3e2964d7808388b32086e0de2", + "file_size_bytes": 16532352 + }, + { + "name": "Gp0127636_Functional annotation GFF file", + "description": "Functional annotation GFF file for Gp0127636", + "data_object_type": "Functional Annotation GFF", + "url": "https://data.microbiomedata.org/data/nmdc:mga02tph39/annotation/nmdc_mga02tph39_functional_annotation.gff", + "md5_checksum": "f61ed86592491b2d83b5893749e12406", + "id": "nmdc:f61ed86592491b2d83b5893749e12406", + "file_size_bytes": 28432426 + }, + { + "name": "Gp0127636_KO TSV file", + "description": "KO TSV file for Gp0127636", + "data_object_type": "Annotation KEGG Orthology", + "url": "https://data.microbiomedata.org/data/nmdc:mga02tph39/annotation/nmdc_mga02tph39_ko.tsv", + "md5_checksum": "e983789bdc08364b00a000684062ed16", + "id": "nmdc:e983789bdc08364b00a000684062ed16", + "file_size_bytes": 3189682 + }, + { + "name": "Gp0127636_EC TSV file", + "description": "EC TSV file for Gp0127636", + "data_object_type": "Annotation Enzyme Commission", + "url": "https://data.microbiomedata.org/data/nmdc:mga02tph39/annotation/nmdc_mga02tph39_ec.tsv", + "md5_checksum": "3cd47d66b6e9006ff683a2eda168285f", + "id": "nmdc:3cd47d66b6e9006ff683a2eda168285f", + "file_size_bytes": 2100535 + }, + { + "name": "Gp0127636_COG GFF file", + "description": "COG GFF file for Gp0127636", + "url": "https://data.microbiomedata.org/data/nmdc:mga02tph39/annotation/nmdc_mga02tph39_cog.gff", + "md5_checksum": "e056ee666e8001bdb6f790efb3394093", + "id": "nmdc:e056ee666e8001bdb6f790efb3394093", + "file_size_bytes": 15585690 + }, + { + "name": "Gp0127636_PFAM GFF file", + "description": "PFAM GFF file for Gp0127636", + "url": "https://data.microbiomedata.org/data/nmdc:mga02tph39/annotation/nmdc_mga02tph39_pfam.gff", + "md5_checksum": "2b90fcb7628c3ffa9e7a14a32612b7af", + "id": "nmdc:2b90fcb7628c3ffa9e7a14a32612b7af", + "file_size_bytes": 11182350 + }, + { + "name": "Gp0127636_TigrFam GFF file", + "description": "TigrFam GFF file for Gp0127636", + "url": "https://data.microbiomedata.org/data/nmdc:mga02tph39/annotation/nmdc_mga02tph39_tigrfam.gff", + "md5_checksum": "4e2f1d4b2d20bfb0209a320a60c4aeac", + "id": "nmdc:4e2f1d4b2d20bfb0209a320a60c4aeac", + "file_size_bytes": 995758 + }, + { + "name": "Gp0127636_SMART GFF file", + "description": "SMART GFF file for Gp0127636", + "url": "https://data.microbiomedata.org/data/nmdc:mga02tph39/annotation/nmdc_mga02tph39_smart.gff", + "md5_checksum": "dd24a8b0f774555ac91e663416745428", + "id": "nmdc:dd24a8b0f774555ac91e663416745428", + "file_size_bytes": 3256325 + }, + { + "name": "Gp0127636_SuperFam GFF file", + "description": "SuperFam GFF file for Gp0127636", + "url": "https://data.microbiomedata.org/data/nmdc:mga02tph39/annotation/nmdc_mga02tph39_supfam.gff", + "md5_checksum": "2e76b71475b854e2bf2d0aa15a53dd7d", + "id": "nmdc:2e76b71475b854e2bf2d0aa15a53dd7d", + "file_size_bytes": 19666317 + }, + { + "name": "Gp0127636_Cath FunFam GFF file", + "description": "Cath FunFam GFF file for Gp0127636", + "url": "https://data.microbiomedata.org/data/nmdc:mga02tph39/annotation/nmdc_mga02tph39_cath_funfam.gff", + "md5_checksum": "2f297176cd51b2ede33c313f713b40b1", + "id": "nmdc:2f297176cd51b2ede33c313f713b40b1", + "file_size_bytes": 14458019 + }, + { + "name": "Gp0127636_KO_EC GFF file", + "description": "KO_EC GFF file for Gp0127636", + "url": "https://data.microbiomedata.org/data/nmdc:mga02tph39/annotation/nmdc_mga02tph39_ko_ec.gff", + "md5_checksum": "678a7af05a89d9d4f5f5d598dc2e3013", + "id": "nmdc:678a7af05a89d9d4f5f5d598dc2e3013", + "file_size_bytes": 10187098 + }, + { + "name": "gold:Gp0452679_metabat2 bin checkm quality assessment result", + "description": "metabat2 bin checkm quality assessment result for gold:Gp0452679", + "data_object_type": "CheckM Statistics", + "url": "https://data.microbiomedata.org/data/nmdc:mga0fsjy84/MAGs/nmdc_mga0fsjy84_checkm_qa.out", + "md5_checksum": "d41d8cd98f00b204e9800998ecf8427e", + "id": "nmdc:d41d8cd98f00b204e9800998ecf8427e", + "file_size_bytes": 0 + }, + { + "name": "Gp0127636_high-quality and medium-quality bins", + "description": "high-quality and medium-quality bins for Gp0127636", + "data_object_type": "Metagenome Bins", + "url": "https://data.microbiomedata.org/data/nmdc:mga02tph39/MAGs/nmdc_mga02tph39_hqmq_bin.zip", + "md5_checksum": "2d1e318b8b815a8a5487f23315d0fe02", + "id": "nmdc:2d1e318b8b815a8a5487f23315d0fe02", + "file_size_bytes": 182 + } + ], + "dissolving_activity_set": [], + "functional_annotation_set": [], + "genome_feature_set": [], + "mags_activity_set": [ { "_id": { - "$oid": "649b0052ec087f6bbab34708" + "$oid": "649b0052ec087f6bbab34709" }, "has_input": [ - "nmdc:3d1b5043e0c49ac6062aeba4ebbba910", - "nmdc:2d8cca230f439e38f1e628666e40e013", - "nmdc:dd3668477e39a65243179dfb9e4bf26e" + "nmdc:36692b7b93756aaabd7f1f6259753c4e", + "nmdc:a24edc9ffd773c30cea8ea709988307a", + "nmdc:f61ed86592491b2d83b5893749e12406" ], - "too_short_contig_num": 192406, + "too_short_contig_num": 93687, "part_of": [ - "nmdc:mga0ak4p20" + "nmdc:mga02tph39" ], - "binned_contig_num": 502, "git_url": "https://github.com/microbiomedata/metaG/releases/tag/0.1", "has_output": [ "nmdc:d41d8cd98f00b204e9800998ecf8427e", - "nmdc:daed5e3af5201fe510e780f155f90bc3", - "nmdc:7cdb1c384c8bc63b3c127e5bc434ac6b", - "nmdc:b5ae13756638f09d74fdbe03183b231f", - "nmdc:1dc5796596177362849da19fc4e50b13", - "nmdc:fba0bfa144e9ef179edb10b5a941c259" + "nmdc:2d1e318b8b815a8a5487f23315d0fe02" ], - "was_informed_by": "gold:Gp0127635", - "input_contig_num": 206754, - "id": "nmdc:80cd0785782060a937ca17d1b417b0b6", + "was_informed_by": "gold:Gp0127636", + "input_contig_num": 95606, + "id": "nmdc:3a0bb3c0ec1ec60e0487cb98f7f19a90", "execution_resource": "NERSC-Cori", - "name": "MAGs Analysis Activity for nmdc:mga0ak4p20", - "mags_list": [ - { - "number_of_contig": 203, - "completeness": 41.91, - "bin_name": "bins.1", - "gene_count": 1456, - "bin_quality": "LQ", - "gtdbtk_species": "", - "gtdbtk_order": "", - "num_16s": 0, - "gtdbtk_family": "", - "gtdbtk_domain": "", - "contamination": 3.88, - "gtdbtk_class": "", - "gtdbtk_phylum": "", - "num_5s": 0, - "num_23s": 0, - "gtdbtk_genus": "", - "num_t_rna": 24 - }, - { - "number_of_contig": 171, - "completeness": 8.33, - "bin_name": "bins.2", - "gene_count": 880, - "bin_quality": "LQ", - "gtdbtk_species": "", - "gtdbtk_order": "", - "num_16s": 0, - "gtdbtk_family": "", - "gtdbtk_domain": "", - "contamination": 0.0, - "gtdbtk_class": "", - "gtdbtk_phylum": "", - "num_5s": 0, - "num_23s": 0, - "gtdbtk_genus": "", - "num_t_rna": 5 - }, - { - "number_of_contig": 55, - "completeness": 14.66, - "bin_name": "bins.3", - "gene_count": 269, - "bin_quality": "LQ", - "gtdbtk_species": "", - "gtdbtk_order": "", - "num_16s": 0, - "gtdbtk_family": "", - "gtdbtk_domain": "", - "contamination": 0.0, - "gtdbtk_class": "", - "gtdbtk_phylum": "", - "num_5s": 0, - "num_23s": 0, - "gtdbtk_genus": "", - "num_t_rna": 1 - }, - { - "number_of_contig": 73, - "completeness": 0.0, - "bin_name": "bins.4", - "gene_count": 475, - "bin_quality": "LQ", - "gtdbtk_species": "", - "gtdbtk_order": "", - "num_16s": 0, - "gtdbtk_family": "", - "gtdbtk_domain": "", - "contamination": 0.0, - "gtdbtk_class": "", - "gtdbtk_phylum": "", - "num_5s": 0, - "num_23s": 0, - "gtdbtk_genus": "", - "num_t_rna": 6 - } - ], - "unbinned_contig_num": 13846, - "started_at_time": "2021-10-11T02:26:59Z", + "name": "MAGs Analysis Activity for nmdc:mga02tph39", + "mags_list": [], + "unbinned_contig_num": 1919, + "started_at_time": "2021-10-11T02:23:42Z", "type": "nmdc:MAGsAnalysisActivity", - "ended_at_time": "2021-10-11T04:11:48+00:00", - "output_data_objects": [ - { - "name": "gold:Gp0452679_metabat2 bin checkm quality assessment result", - "description": "metabat2 bin checkm quality assessment result for gold:Gp0452679", - "data_object_type": "CheckM Statistics", - "url": "https://data.microbiomedata.org/data/nmdc:mga0fsjy84/MAGs/nmdc_mga0fsjy84_checkm_qa.out", - "md5_checksum": "d41d8cd98f00b204e9800998ecf8427e", - "id": "nmdc:d41d8cd98f00b204e9800998ecf8427e", - "file_size_bytes": 0 - }, - { - "name": "Gp0127635_tooShort (< 3kb) filtered contigs fasta file by metaBat2", - "description": "tooShort (< 3kb) filtered contigs fasta file by metaBat2 for Gp0127635", - "url": "https://data.microbiomedata.org/data/nmdc:mga0ak4p20/MAGs/nmdc_mga0ak4p20_bins.tooShort.fa", - "md5_checksum": "daed5e3af5201fe510e780f155f90bc3", - "id": "nmdc:daed5e3af5201fe510e780f155f90bc3", - "file_size_bytes": 86476884 - }, - { - "name": "Gp0127635_unbinned fasta file from metabat2", - "description": "unbinned fasta file from metabat2 for Gp0127635", - "url": "https://data.microbiomedata.org/data/nmdc:mga0ak4p20/MAGs/nmdc_mga0ak4p20_bins.unbinned.fa", - "md5_checksum": "7cdb1c384c8bc63b3c127e5bc434ac6b", - "id": "nmdc:7cdb1c384c8bc63b3c127e5bc434ac6b", - "file_size_bytes": 22898396 - }, - { - "name": "Gp0127635_metabat2 bin checkm quality assessment result", - "description": "metabat2 bin checkm quality assessment result for Gp0127635", - "data_object_type": "CheckM Statistics", - "url": "https://data.microbiomedata.org/data/nmdc:mga0ak4p20/MAGs/nmdc_mga0ak4p20_checkm_qa.out", - "md5_checksum": "b5ae13756638f09d74fdbe03183b231f", - "id": "nmdc:b5ae13756638f09d74fdbe03183b231f", - "file_size_bytes": 1240 - }, - { - "name": "Gp0127635_high-quality and medium-quality bins", - "description": "high-quality and medium-quality bins for Gp0127635", - "data_object_type": "Metagenome Bins", - "url": "https://data.microbiomedata.org/data/nmdc:mga0ak4p20/MAGs/nmdc_mga0ak4p20_hqmq_bin.zip", - "md5_checksum": "1dc5796596177362849da19fc4e50b13", - "id": "nmdc:1dc5796596177362849da19fc4e50b13", - "file_size_bytes": 182 - }, - { - "name": "Gp0127635_metabat2 bins", - "description": "metabat2 bins for Gp0127635", - "url": "https://data.microbiomedata.org/data/nmdc:mga0ak4p20/MAGs/nmdc_mga0ak4p20_metabat_bin.zip", - "md5_checksum": "fba0bfa144e9ef179edb10b5a941c259", - "id": "nmdc:fba0bfa144e9ef179edb10b5a941c259", - "file_size_bytes": 795127 - } - ] + "ended_at_time": "2021-11-13T18:49:37+00:00" } - ] - }, - { - "_id": { - "$oid": "649b009773e8249959349b48" - }, - "id": "nmdc:omprc-11-c8dzx197", - "name": "Riverbed sediment microbial communities from areas with vegetation nearby in Columbia River, Washington, USA - GW-RW S1_0_10", - "description": "Riverbed sediment samples were collected from an area with a lot of surrounding vegetation in a hyporheic zone (subsurface groundwater - surface water mixing zone)", - "has_input": [ - "nmdc:bsm-11-kwfbp795" - ], - "has_output": [ - "jgi:574fde647ded5e3df1ee1406" - ], - "part_of": [ - "nmdc:sty-11-aygzgv51" - ], - "add_date": "2016-01-11", - "mod_date": "2021-06-15", - "ncbi_project_name": "Riverbed sediment microbial communities from areas with vegetation nearby in Columbia River, Washington, USA - GW-RW S1_0_10", - "omics_type": { - "has_raw_value": "Metagenome" - }, - "principal_investigator": { - "has_raw_value": "James Stegen" - }, - "processing_institution": "JGI", - "type": "nmdc:OmicsProcessing", - "gold_sequencing_project_identifiers": [ - "gold:Gp0127637" - ], - "downstream_workflow_activity_records": [ + ], + "material_sample_set": [], + "material_sampling_activity_set": [], + "metabolomics_analysis_activity_set": [], + "metagenome_annotation_activity_set": [ { "_id": { - "$oid": "649b009d6bdd4fd20273c85f" + "$oid": "649b005bbf2caae0415ef9ac" }, "has_input": [ - "nmdc:320ac579913ecc4c218607b6b3b915b3" + "nmdc:36692b7b93756aaabd7f1f6259753c4e" ], "part_of": [ - "nmdc:mga0sb9b30" + "nmdc:mga02tph39" ], "git_url": "https://github.com/microbiomedata/metaG/releases/tag/0.1", "has_output": [ - "nmdc:805310f4b1e39a0cc9e5b5787576cb8b", - "nmdc:611e67df261e050860b1075c6a6a5ff5" + "nmdc:a5d97f323fe7117cb38a2eea1f2246d2", + "nmdc:2b791fb3e2964d7808388b32086e0de2", + "nmdc:f61ed86592491b2d83b5893749e12406", + "nmdc:e983789bdc08364b00a000684062ed16", + "nmdc:3cd47d66b6e9006ff683a2eda168285f", + "nmdc:e056ee666e8001bdb6f790efb3394093", + "nmdc:2b90fcb7628c3ffa9e7a14a32612b7af", + "nmdc:4e2f1d4b2d20bfb0209a320a60c4aeac", + "nmdc:dd24a8b0f774555ac91e663416745428", + "nmdc:2e76b71475b854e2bf2d0aa15a53dd7d", + "nmdc:2f297176cd51b2ede33c313f713b40b1", + "nmdc:678a7af05a89d9d4f5f5d598dc2e3013" ], - "was_informed_by": "gold:Gp0127637", - "input_read_count": 24239336, - "output_read_bases": 2975652755, - "id": "nmdc:c6d11b07b7b33c0d906ce0d4a58a7ccf", - "execution_resource": "NERSC-Cori", - "input_read_bases": 3660139736, - "name": "Read QC Activity for nmdc:mga0sb9b30", - "output_read_count": 19917090, - "started_at_time": "2021-10-11T02:24:01Z", - "type": "nmdc:ReadQCAnalysisActivity", - "ended_at_time": "2021-10-11T03:11:56+00:00", - "output_data_objects": [ - { - "name": "Gp0127637_Filtered Reads", - "description": "Filtered Reads for Gp0127637", - "data_object_type": "Filtered Sequencing Reads", - "url": "https://data.microbiomedata.org/data/nmdc:mga0sb9b30/qa/nmdc_mga0sb9b30_filtered.fastq.gz", - "md5_checksum": "805310f4b1e39a0cc9e5b5787576cb8b", - "id": "nmdc:805310f4b1e39a0cc9e5b5787576cb8b", - "file_size_bytes": 1553219358 - }, - { - "name": "Gp0127637_Filtered Stats", - "description": "Filtered Stats for Gp0127637", - "data_object_type": "QC Statistics", - "url": "https://data.microbiomedata.org/data/nmdc:mga0sb9b30/qa/nmdc_mga0sb9b30_filterStats.txt", - "md5_checksum": "611e67df261e050860b1075c6a6a5ff5", - "id": "nmdc:611e67df261e050860b1075c6a6a5ff5", - "file_size_bytes": 289 - } - ] - }, + "was_informed_by": "gold:Gp0127636", + "id": "nmdc:3a0bb3c0ec1ec60e0487cb98f7f19a90", + "execution_resource": "NERSC-Cori", + "name": "Annotation Activity for nmdc:mga02tph39", + "started_at_time": "2021-10-11T02:23:42Z", + "type": "nmdc:MetagenomeAnnotationActivity", + "ended_at_time": "2021-11-13T18:49:37+00:00" + } + ], + "metagenome_assembly_set": [ { "_id": { - "$oid": "649b009bff710ae353f8cf23" + "$oid": "649b005f2ca5ee4adb139f91" }, "has_input": [ - "nmdc:805310f4b1e39a0cc9e5b5787576cb8b" + "nmdc:e4f5675c728fd1896682eb669656b5d6" ], + "part_of": [ + "nmdc:mga02tph39" + ], + "ctg_logsum": 36469, + "scaf_logsum": 36615, + "gap_pct": 0.00062, "git_url": "https://github.com/microbiomedata/metaG/releases/tag/0.1", "has_output": [ - "nmdc:9268e073dacb7f7cd5f9513393cb0b2a", - "nmdc:37dd1d73ad47979ee5284830d27df535", - "nmdc:43bffbfb830c6e3ccc140ec0dff1e773", - "nmdc:cb3bd5ca5088484cb4e580ad91d736b2", - "nmdc:f44a5d59785cdededea0fe4a6a429c30", - "nmdc:81a6efbd082e07bc2db174a88d64a272", - "nmdc:f63856a84bc9afb8954ccdb1803d5fde", - "nmdc:9a1826f66ee45187d627076d11dc491f", - "nmdc:67adb9cc2c75251f556a90b1a959ea72" + "nmdc:36692b7b93756aaabd7f1f6259753c4e", + "nmdc:8d02adf1319d5b95c2abc6ed5b5c1683", + "nmdc:9830a711accd3a5ed899a2e616d0f4bf", + "nmdc:481fbd8cdeacd71e54a45c78d5decb36", + "nmdc:a24edc9ffd773c30cea8ea709988307a" ], - "was_informed_by": "gold:Gp0127637", - "id": "nmdc:c6d11b07b7b33c0d906ce0d4a58a7ccf", + "asm_score": 3.618, + "was_informed_by": "gold:Gp0127636", + "ctg_powsum": 3976.058, + "scaf_max": 23067, + "id": "nmdc:3a0bb3c0ec1ec60e0487cb98f7f19a90", + "scaf_powsum": 3993.143, "execution_resource": "NERSC-Cori", - "name": "ReadBased Analysis Activity for nmdc:mga0sb9b30", - "started_at_time": "2021-10-11T02:24:01Z", - "type": "nmdc:ReadbasedAnalysis", - "ended_at_time": "2021-10-11T03:11:56+00:00", - "output_data_objects": [ - { - "name": "Gp0127637_Gottcha2 TSV report", - "description": "Gottcha2 TSV report for Gp0127637", - "url": "https://data.microbiomedata.org/data/nmdc:mga0sb9b30/ReadbasedAnalysis/nmdc_mga0sb9b30_gottcha2_report.tsv", - "md5_checksum": "9268e073dacb7f7cd5f9513393cb0b2a", - "id": "nmdc:9268e073dacb7f7cd5f9513393cb0b2a", - "file_size_bytes": 660 - }, - { - "name": "Gp0127637_Gottcha2 full TSV report", - "description": "Gottcha2 full TSV report for Gp0127637", - "url": "https://data.microbiomedata.org/data/nmdc:mga0sb9b30/ReadbasedAnalysis/nmdc_mga0sb9b30_gottcha2_report_full.tsv", - "md5_checksum": "37dd1d73ad47979ee5284830d27df535", - "id": "nmdc:37dd1d73ad47979ee5284830d27df535", - "file_size_bytes": 594054 - }, - { - "name": "Gp0127637_Gottcha2 Krona HTML report", - "description": "Gottcha2 Krona HTML report for Gp0127637", - "data_object_type": "GOTTCHA2 Krona Plot", - "url": "https://data.microbiomedata.org/data/nmdc:mga0sb9b30/ReadbasedAnalysis/nmdc_mga0sb9b30_gottcha2_krona.html", - "md5_checksum": "43bffbfb830c6e3ccc140ec0dff1e773", - "id": "nmdc:43bffbfb830c6e3ccc140ec0dff1e773", - "file_size_bytes": 227750 - }, - { - "name": "Gp0127637_Centrifuge classification TSV report", - "description": "Centrifuge classification TSV report for Gp0127637", - "data_object_type": "Centrifuge Taxonomic Classification", - "url": "https://data.microbiomedata.org/data/nmdc:mga0sb9b30/ReadbasedAnalysis/nmdc_mga0sb9b30_centrifuge_classification.tsv", - "md5_checksum": "cb3bd5ca5088484cb4e580ad91d736b2", - "id": "nmdc:cb3bd5ca5088484cb4e580ad91d736b2", - "file_size_bytes": 1457058272 - }, - { - "name": "Gp0127637_Centrifuge TSV report", - "description": "Centrifuge TSV report for Gp0127637", - "data_object_type": "Centrifuge Classification Report", - "url": "https://data.microbiomedata.org/data/nmdc:mga0sb9b30/ReadbasedAnalysis/nmdc_mga0sb9b30_centrifuge_report.tsv", - "md5_checksum": "f44a5d59785cdededea0fe4a6a429c30", - "id": "nmdc:f44a5d59785cdededea0fe4a6a429c30", - "file_size_bytes": 251867 - }, - { - "name": "Gp0127637_Centrifuge Krona HTML report", - "description": "Centrifuge Krona HTML report for Gp0127637", - "data_object_type": "Centrifuge Krona Plot", - "url": "https://data.microbiomedata.org/data/nmdc:mga0sb9b30/ReadbasedAnalysis/nmdc_mga0sb9b30_centrifuge_krona.html", - "md5_checksum": "81a6efbd082e07bc2db174a88d64a272", - "id": "nmdc:81a6efbd082e07bc2db174a88d64a272", - "file_size_bytes": 2325282 - }, - { - "name": "Gp0127637_Kraken classification TSV report", - "description": "Kraken classification TSV report for Gp0127637", - "data_object_type": "Kraken2 Taxonomic Classification", - "url": "https://data.microbiomedata.org/data/nmdc:mga0sb9b30/ReadbasedAnalysis/nmdc_mga0sb9b30_kraken2_classification.tsv", - "md5_checksum": "f63856a84bc9afb8954ccdb1803d5fde", - "id": "nmdc:f63856a84bc9afb8954ccdb1803d5fde", - "file_size_bytes": 1160106364 - }, - { - "name": "Gp0127637_Kraken2 TSV report", - "description": "Kraken2 TSV report for Gp0127637", - "data_object_type": "Kraken2 Classification Report", - "url": "https://data.microbiomedata.org/data/nmdc:mga0sb9b30/ReadbasedAnalysis/nmdc_mga0sb9b30_kraken2_report.tsv", - "md5_checksum": "9a1826f66ee45187d627076d11dc491f", - "id": "nmdc:9a1826f66ee45187d627076d11dc491f", - "file_size_bytes": 613810 - }, - { - "name": "Gp0127637_Kraken2 Krona HTML report", - "description": "Kraken2 Krona HTML report for Gp0127637", - "data_object_type": "Kraken2 Krona Plot", - "url": "https://data.microbiomedata.org/data/nmdc:mga0sb9b30/ReadbasedAnalysis/nmdc_mga0sb9b30_kraken2_krona.html", - "md5_checksum": "67adb9cc2c75251f556a90b1a959ea72", - "id": "nmdc:67adb9cc2c75251f556a90b1a959ea72", - "file_size_bytes": 3853908 - } + "contigs": 95606, + "name": "Assembly Activity for nmdc:mga02tph39", + "ctg_max": 23067, + "gc_std": 0.11099, + "gc_avg": 0.57474, + "contig_bp": 35573088, + "started_at_time": "2021-10-11T02:23:42Z", + "scaf_bp": 35573308, + "type": "nmdc:MetagenomeAssembly", + "scaffolds": 95584, + "ended_at_time": "2021-11-13T18:49:37+00:00", + "ctg_l50": 329, + "ctg_l90": 282, + "ctg_n50": 35238, + "ctg_n90": 83377, + "scaf_l50": 329, + "scaf_l90": 282, + "scaf_n50": 35220, + "scaf_n90": 83355 + } + ], + "metagenome_sequencing_activity_set": [], + "metaproteomics_analysis_activity_set": [], + "metatranscriptome_activity_set": [], + "nom_analysis_activity_set": [], + "omics_processing_set": [ + { + "_id": { + "$oid": "649b009773e8249959349b45" + }, + "id": "nmdc:omprc-11-k675bw84", + "name": "Riverbed sediment microbial communities from areas with no vegetation in Columbia River, Washington, USA - GW-RW N2_30_40", + "description": "Riverbed sediment samples were collected from an area with no vegetation in a hyporheic zone (subsurface groundwater - surface water mixing zone)", + "has_input": [ + "nmdc:bsm-11-rtf54942" + ], + "has_output": [ + "jgi:574fe09f7ded5e3df1ee1489" + ], + "part_of": [ + "nmdc:sty-11-aygzgv51" + ], + "add_date": "2016-01-11", + "mod_date": "2021-06-15", + "ncbi_project_name": "Riverbed sediment microbial communities from areas with no vegetation in Columbia River, Washington, USA - GW-RW N2_30_40", + "omics_type": { + "has_raw_value": "Metagenome" + }, + "principal_investigator": { + "has_raw_value": "James Stegen" + }, + "processing_institution": "JGI", + "type": "nmdc:OmicsProcessing", + "gold_sequencing_project_identifiers": [ + "gold:Gp0127636" ] - }, + } + ], + "reaction_activity_set": [], + "read_qc_analysis_activity_set": [ { "_id": { - "$oid": "61e7191f833bcf838a6ffa50" + "$oid": "649b009d6bdd4fd20273c864" }, "has_input": [ - "nmdc:805310f4b1e39a0cc9e5b5787576cb8b" + "nmdc:341830a5735c34968da2304bc27edd2a" ], "part_of": [ - "nmdc:mga0sb9b30" + "nmdc:mga02tph39" ], "git_url": "https://github.com/microbiomedata/metaG/releases/tag/0.1", "has_output": [ - "nmdc:9268e073dacb7f7cd5f9513393cb0b2a", - "nmdc:37dd1d73ad47979ee5284830d27df535", - "nmdc:43bffbfb830c6e3ccc140ec0dff1e773", - "nmdc:cb3bd5ca5088484cb4e580ad91d736b2", - "nmdc:f44a5d59785cdededea0fe4a6a429c30", - "nmdc:81a6efbd082e07bc2db174a88d64a272", - "nmdc:f63856a84bc9afb8954ccdb1803d5fde", - "nmdc:9a1826f66ee45187d627076d11dc491f", - "nmdc:67adb9cc2c75251f556a90b1a959ea72" + "nmdc:e4f5675c728fd1896682eb669656b5d6", + "nmdc:64f455185b1bc610a8d74a84ed12683f" ], - "was_informed_by": "gold:Gp0127637", - "id": "nmdc:c6d11b07b7b33c0d906ce0d4a58a7ccf", + "was_informed_by": "gold:Gp0127636", + "input_read_count": 31642056, + "output_read_bases": 4354491393, + "id": "nmdc:3a0bb3c0ec1ec60e0487cb98f7f19a90", "execution_resource": "NERSC-Cori", - "name": "ReadBased Analysis Activity for nmdc:mga0sb9b30", - "started_at_time": "2021-10-11T02:24:01Z", - "type": "nmdc:ReadbasedAnalysis", - "ended_at_time": "2021-10-11T03:11:56+00:00", - "output_data_objects": [ - { - "name": "Gp0127637_Gottcha2 TSV report", - "description": "Gottcha2 TSV report for Gp0127637", - "url": "https://data.microbiomedata.org/data/nmdc:mga0sb9b30/ReadbasedAnalysis/nmdc_mga0sb9b30_gottcha2_report.tsv", - "md5_checksum": "9268e073dacb7f7cd5f9513393cb0b2a", - "id": "nmdc:9268e073dacb7f7cd5f9513393cb0b2a", - "file_size_bytes": 660 - }, - { - "name": "Gp0127637_Gottcha2 full TSV report", - "description": "Gottcha2 full TSV report for Gp0127637", - "url": "https://data.microbiomedata.org/data/nmdc:mga0sb9b30/ReadbasedAnalysis/nmdc_mga0sb9b30_gottcha2_report_full.tsv", - "md5_checksum": "37dd1d73ad47979ee5284830d27df535", - "id": "nmdc:37dd1d73ad47979ee5284830d27df535", - "file_size_bytes": 594054 - }, - { - "name": "Gp0127637_Gottcha2 Krona HTML report", - "description": "Gottcha2 Krona HTML report for Gp0127637", - "data_object_type": "GOTTCHA2 Krona Plot", - "url": "https://data.microbiomedata.org/data/nmdc:mga0sb9b30/ReadbasedAnalysis/nmdc_mga0sb9b30_gottcha2_krona.html", - "md5_checksum": "43bffbfb830c6e3ccc140ec0dff1e773", - "id": "nmdc:43bffbfb830c6e3ccc140ec0dff1e773", - "file_size_bytes": 227750 - }, - { - "name": "Gp0127637_Centrifuge classification TSV report", - "description": "Centrifuge classification TSV report for Gp0127637", - "data_object_type": "Centrifuge Taxonomic Classification", - "url": "https://data.microbiomedata.org/data/nmdc:mga0sb9b30/ReadbasedAnalysis/nmdc_mga0sb9b30_centrifuge_classification.tsv", - "md5_checksum": "cb3bd5ca5088484cb4e580ad91d736b2", - "id": "nmdc:cb3bd5ca5088484cb4e580ad91d736b2", - "file_size_bytes": 1457058272 - }, - { - "name": "Gp0127637_Centrifuge TSV report", - "description": "Centrifuge TSV report for Gp0127637", - "data_object_type": "Centrifuge Classification Report", - "url": "https://data.microbiomedata.org/data/nmdc:mga0sb9b30/ReadbasedAnalysis/nmdc_mga0sb9b30_centrifuge_report.tsv", - "md5_checksum": "f44a5d59785cdededea0fe4a6a429c30", - "id": "nmdc:f44a5d59785cdededea0fe4a6a429c30", - "file_size_bytes": 251867 - }, - { - "name": "Gp0127637_Centrifuge Krona HTML report", - "description": "Centrifuge Krona HTML report for Gp0127637", - "data_object_type": "Centrifuge Krona Plot", - "url": "https://data.microbiomedata.org/data/nmdc:mga0sb9b30/ReadbasedAnalysis/nmdc_mga0sb9b30_centrifuge_krona.html", - "md5_checksum": "81a6efbd082e07bc2db174a88d64a272", - "id": "nmdc:81a6efbd082e07bc2db174a88d64a272", - "file_size_bytes": 2325282 - }, - { - "name": "Gp0127637_Kraken classification TSV report", - "description": "Kraken classification TSV report for Gp0127637", - "data_object_type": "Kraken2 Taxonomic Classification", - "url": "https://data.microbiomedata.org/data/nmdc:mga0sb9b30/ReadbasedAnalysis/nmdc_mga0sb9b30_kraken2_classification.tsv", - "md5_checksum": "f63856a84bc9afb8954ccdb1803d5fde", - "id": "nmdc:f63856a84bc9afb8954ccdb1803d5fde", - "file_size_bytes": 1160106364 - }, - { - "name": "Gp0127637_Kraken2 TSV report", - "description": "Kraken2 TSV report for Gp0127637", - "data_object_type": "Kraken2 Classification Report", - "url": "https://data.microbiomedata.org/data/nmdc:mga0sb9b30/ReadbasedAnalysis/nmdc_mga0sb9b30_kraken2_report.tsv", - "md5_checksum": "9a1826f66ee45187d627076d11dc491f", - "id": "nmdc:9a1826f66ee45187d627076d11dc491f", - "file_size_bytes": 613810 - }, - { - "name": "Gp0127637_Kraken2 Krona HTML report", - "description": "Kraken2 Krona HTML report for Gp0127637", - "data_object_type": "Kraken2 Krona Plot", - "url": "https://data.microbiomedata.org/data/nmdc:mga0sb9b30/ReadbasedAnalysis/nmdc_mga0sb9b30_kraken2_krona.html", - "md5_checksum": "67adb9cc2c75251f556a90b1a959ea72", - "id": "nmdc:67adb9cc2c75251f556a90b1a959ea72", - "file_size_bytes": 3853908 - } - ] - }, + "input_read_bases": 4777950456, + "name": "Read QC Activity for nmdc:mga02tph39", + "output_read_count": 29115818, + "started_at_time": "2021-10-11T02:23:42Z", + "type": "nmdc:ReadQCAnalysisActivity", + "ended_at_time": "2021-11-13T18:49:37+00:00" + } + ], + "read_based_taxonomy_analysis_activity_set": [ { "_id": { - "$oid": "649b005f2ca5ee4adb139f93" + "$oid": "649b009bff710ae353f8cf26" }, "has_input": [ - "nmdc:805310f4b1e39a0cc9e5b5787576cb8b" - ], - "part_of": [ - "nmdc:mga0sb9b30" + "nmdc:e4f5675c728fd1896682eb669656b5d6" ], - "ctg_logsum": 271617, - "scaf_logsum": 272416, - "gap_pct": 0.00166, "git_url": "https://github.com/microbiomedata/metaG/releases/tag/0.1", "has_output": [ - "nmdc:aee81646e593045bbb32a0012870b88b", - "nmdc:f1026db242cad285204c9c3d6307c183", - "nmdc:b02b0a0145d14e97a31e6a6f7e4b8dc8", - "nmdc:8afcf1e8b7b3f35edaefee7a0c31e19f", - "nmdc:dee5fa37f57a24685b65e00380d6e433" + "nmdc:50d80a30d4ff113e36f6fd64b1f28547", + "nmdc:c2cd20a2011592a76397f49dc3acd6b7", + "nmdc:827ad863c875ea14473c9903d192fa73", + "nmdc:957074ca49765b22348e27b0133d8ba0", + "nmdc:9253645582296696cb33b11754832574", + "nmdc:9aef1d9e04acfe0b7fb1b9dc3b842912", + "nmdc:75180fce38f38a6307231b47a8d2b23b", + "nmdc:b4524a34937893768dbd3752068dee0c", + "nmdc:f1543441c59aaaf8ec52036a5bbbe3f4" ], - "asm_score": 5.062, - "was_informed_by": "gold:Gp0127637", - "ctg_powsum": 29885, - "scaf_max": 43650, - "id": "nmdc:c6d11b07b7b33c0d906ce0d4a58a7ccf", - "scaf_powsum": 29983, + "was_informed_by": "gold:Gp0127636", + "id": "nmdc:3a0bb3c0ec1ec60e0487cb98f7f19a90", "execution_resource": "NERSC-Cori", - "contigs": 214863, - "name": "Assembly Activity for nmdc:mga0sb9b30", - "ctg_max": 43650, - "gc_std": 0.08814, - "contig_bp": 108739484, - "gc_avg": 0.63266, - "started_at_time": "2021-10-11T02:24:01Z", - "scaf_bp": 108741284, - "type": "nmdc:MetagenomeAssembly", - "scaffolds": 214737, - "ended_at_time": "2021-10-11T03:11:56+00:00", - "ctg_l50": 505, - "ctg_l90": 294, - "ctg_n50": 58474, - "ctg_n90": 177521, - "scaf_l50": 505, - "scaf_l90": 294, - "scaf_n50": 58469, - "scaf_n90": 177412, - "output_data_objects": [ - { - "name": "Gp0127637_Assembled contigs fasta", - "description": "Assembled contigs fasta for Gp0127637", - "data_object_type": "Assembly Contigs", - "url": "https://data.microbiomedata.org/data/nmdc:mga0sb9b30/assembly/nmdc_mga0sb9b30_contigs.fna", - "md5_checksum": "aee81646e593045bbb32a0012870b88b", - "id": "nmdc:aee81646e593045bbb32a0012870b88b", - "file_size_bytes": 117200777 - }, - { - "name": "Gp0127637_Assembled scaffold fasta", - "description": "Assembled scaffold fasta for Gp0127637", - "data_object_type": "Assembly Scaffolds", - "url": "https://data.microbiomedata.org/data/nmdc:mga0sb9b30/assembly/nmdc_mga0sb9b30_scaffolds.fna", - "md5_checksum": "f1026db242cad285204c9c3d6307c183", - "id": "nmdc:f1026db242cad285204c9c3d6307c183", - "file_size_bytes": 116554638 - }, - { - "name": "Gp0127637_Metagenome Contig Coverage Stats", - "description": "Metagenome Contig Coverage Stats for Gp0127637", - "url": "https://data.microbiomedata.org/data/nmdc:mga0sb9b30/assembly/nmdc_mga0sb9b30_covstats.txt", - "md5_checksum": "b02b0a0145d14e97a31e6a6f7e4b8dc8", - "id": "nmdc:b02b0a0145d14e97a31e6a6f7e4b8dc8", - "file_size_bytes": 17037754 - }, - { - "name": "Gp0127637_Assembled AGP file", - "description": "Assembled AGP file for Gp0127637", - "data_object_type": "Assembly AGP", - "url": "https://data.microbiomedata.org/data/nmdc:mga0sb9b30/assembly/nmdc_mga0sb9b30_assembly.agp", - "md5_checksum": "8afcf1e8b7b3f35edaefee7a0c31e19f", - "id": "nmdc:8afcf1e8b7b3f35edaefee7a0c31e19f", - "file_size_bytes": 15931363 - }, - { - "name": "Gp0127637_Metagenome Alignment BAM file", - "description": "Metagenome Alignment BAM file for Gp0127637", - "data_object_type": "Assembly Coverage BAM", - "url": "https://data.microbiomedata.org/data/nmdc:mga0sb9b30/assembly/nmdc_mga0sb9b30_pairedMapped_sorted.bam", - "md5_checksum": "dee5fa37f57a24685b65e00380d6e433", - "id": "nmdc:dee5fa37f57a24685b65e00380d6e433", - "file_size_bytes": 1739825120 - } - ] - }, + "name": "ReadBased Analysis Activity for nmdc:mga02tph39", + "started_at_time": "2021-10-11T02:23:42Z", + "type": "nmdc:ReadbasedAnalysis", + "ended_at_time": "2021-11-13T18:49:37+00:00" + } + ], + "study_set": [], + "field_research_site_set": [], + "collecting_biosamples_from_site_set": [], + "date_created": null, + "etl_software_version": null, + "pooling_set": [], + "read_based_analysis_activity_set": [ { "_id": { - "$oid": "649b005bbf2caae0415ef9a6" + "$oid": "61e71959833bcf838a70040a" }, "has_input": [ - "nmdc:aee81646e593045bbb32a0012870b88b" + "nmdc:e4f5675c728fd1896682eb669656b5d6" ], "part_of": [ - "nmdc:mga0sb9b30" + "nmdc:mga02tph39" ], "git_url": "https://github.com/microbiomedata/metaG/releases/tag/0.1", "has_output": [ - "nmdc:69603434971f93dbd79860c18dd5c61a", - "nmdc:bf8f822c6730b4cc73715ced3d25c262", - "nmdc:b9ec0754ffaa338c899244703bc91386", - "nmdc:22402cc61770feb5a0aaa4f760808366", - "nmdc:8c96f7faa38c361acc247b5a107a6b54", - "nmdc:7a28d1eafd3a3c181e95f61eb3d18bf1", - "nmdc:89a8657f659710b3927baab155917fdf", - "nmdc:9b9ecf34f2f6ef6865d4864f5debfbb7", - "nmdc:5cae6736713d02ccbe26543d733875cb", - "nmdc:a64350eb947c199cc1fbfb087191c0c7", - "nmdc:b8492828a1ad078d9c3192bab4d9a3fa", - "nmdc:2471f27b6cf11b6f93c791c273989731" + "nmdc:50d80a30d4ff113e36f6fd64b1f28547", + "nmdc:c2cd20a2011592a76397f49dc3acd6b7", + "nmdc:827ad863c875ea14473c9903d192fa73", + "nmdc:957074ca49765b22348e27b0133d8ba0", + "nmdc:9253645582296696cb33b11754832574", + "nmdc:9aef1d9e04acfe0b7fb1b9dc3b842912", + "nmdc:75180fce38f38a6307231b47a8d2b23b", + "nmdc:b4524a34937893768dbd3752068dee0c", + "nmdc:f1543441c59aaaf8ec52036a5bbbe3f4" ], - "was_informed_by": "gold:Gp0127637", - "id": "nmdc:c6d11b07b7b33c0d906ce0d4a58a7ccf", + "was_informed_by": "gold:Gp0127636", + "id": "nmdc:3a0bb3c0ec1ec60e0487cb98f7f19a90", "execution_resource": "NERSC-Cori", - "name": "Annotation Activity for nmdc:mga0sb9b30", - "started_at_time": "2021-10-11T02:24:01Z", - "type": "nmdc:MetagenomeAnnotationActivity", - "ended_at_time": "2021-10-11T03:11:56+00:00", - "output_data_objects": [ - { - "name": "Gp0127637_Protein FAA", - "description": "Protein FAA for Gp0127637", - "data_object_type": "Annotation Amino Acid FASTA", - "url": "https://data.microbiomedata.org/data/nmdc:mga0sb9b30/annotation/nmdc_mga0sb9b30_proteins.faa", - "md5_checksum": "69603434971f93dbd79860c18dd5c61a", - "id": "nmdc:69603434971f93dbd79860c18dd5c61a", - "file_size_bytes": 66263123 - }, - { - "name": "Gp0127637_Structural annotation GFF file", - "description": "Structural annotation GFF file for Gp0127637", - "data_object_type": "Structural Annotation GFF", - "url": "https://data.microbiomedata.org/data/nmdc:mga0sb9b30/annotation/nmdc_mga0sb9b30_structural_annotation.gff", - "md5_checksum": "bf8f822c6730b4cc73715ced3d25c262", - "id": "nmdc:bf8f822c6730b4cc73715ced3d25c262", - "file_size_bytes": 2521 - }, - { - "name": "Gp0127637_Functional annotation GFF file", - "description": "Functional annotation GFF file for Gp0127637", - "data_object_type": "Functional Annotation GFF", - "url": "https://data.microbiomedata.org/data/nmdc:mga0sb9b30/annotation/nmdc_mga0sb9b30_functional_annotation.gff", - "md5_checksum": "b9ec0754ffaa338c899244703bc91386", - "id": "nmdc:b9ec0754ffaa338c899244703bc91386", - "file_size_bytes": 74459552 - }, - { - "name": "Gp0127637_KO TSV file", - "description": "KO TSV file for Gp0127637", - "data_object_type": "Annotation KEGG Orthology", - "url": "https://data.microbiomedata.org/data/nmdc:mga0sb9b30/annotation/nmdc_mga0sb9b30_ko.tsv", - "md5_checksum": "22402cc61770feb5a0aaa4f760808366", - "id": "nmdc:22402cc61770feb5a0aaa4f760808366", - "file_size_bytes": 8394894 - }, - { - "name": "Gp0127637_EC TSV file", - "description": "EC TSV file for Gp0127637", - "data_object_type": "Annotation Enzyme Commission", - "url": "https://data.microbiomedata.org/data/nmdc:mga0sb9b30/annotation/nmdc_mga0sb9b30_ec.tsv", - "md5_checksum": "8c96f7faa38c361acc247b5a107a6b54", - "id": "nmdc:8c96f7faa38c361acc247b5a107a6b54", - "file_size_bytes": 5556852 - }, - { - "name": "Gp0127637_COG GFF file", - "description": "COG GFF file for Gp0127637", - "url": "https://data.microbiomedata.org/data/nmdc:mga0sb9b30/annotation/nmdc_mga0sb9b30_cog.gff", - "md5_checksum": "7a28d1eafd3a3c181e95f61eb3d18bf1", - "id": "nmdc:7a28d1eafd3a3c181e95f61eb3d18bf1", - "file_size_bytes": 44328195 - }, - { - "name": "Gp0127637_PFAM GFF file", - "description": "PFAM GFF file for Gp0127637", - "url": "https://data.microbiomedata.org/data/nmdc:mga0sb9b30/annotation/nmdc_mga0sb9b30_pfam.gff", - "md5_checksum": "89a8657f659710b3927baab155917fdf", - "id": "nmdc:89a8657f659710b3927baab155917fdf", - "file_size_bytes": 33562431 - }, - { - "name": "Gp0127637_TigrFam GFF file", - "description": "TigrFam GFF file for Gp0127637", - "url": "https://data.microbiomedata.org/data/nmdc:mga0sb9b30/annotation/nmdc_mga0sb9b30_tigrfam.gff", - "md5_checksum": "9b9ecf34f2f6ef6865d4864f5debfbb7", - "id": "nmdc:9b9ecf34f2f6ef6865d4864f5debfbb7", - "file_size_bytes": 3752251 - }, - { - "name": "Gp0127637_SMART GFF file", - "description": "SMART GFF file for Gp0127637", - "url": "https://data.microbiomedata.org/data/nmdc:mga0sb9b30/annotation/nmdc_mga0sb9b30_smart.gff", - "md5_checksum": "5cae6736713d02ccbe26543d733875cb", - "id": "nmdc:5cae6736713d02ccbe26543d733875cb", - "file_size_bytes": 9871224 - }, - { - "name": "Gp0127637_SuperFam GFF file", - "description": "SuperFam GFF file for Gp0127637", - "url": "https://data.microbiomedata.org/data/nmdc:mga0sb9b30/annotation/nmdc_mga0sb9b30_supfam.gff", - "md5_checksum": "a64350eb947c199cc1fbfb087191c0c7", - "id": "nmdc:a64350eb947c199cc1fbfb087191c0c7", - "file_size_bytes": 55329770 - }, - { - "name": "Gp0127637_Cath FunFam GFF file", - "description": "Cath FunFam GFF file for Gp0127637", - "url": "https://data.microbiomedata.org/data/nmdc:mga0sb9b30/annotation/nmdc_mga0sb9b30_cath_funfam.gff", - "md5_checksum": "b8492828a1ad078d9c3192bab4d9a3fa", - "id": "nmdc:b8492828a1ad078d9c3192bab4d9a3fa", - "file_size_bytes": 42052238 - }, - { - "name": "Gp0127637_KO_EC GFF file", - "description": "KO_EC GFF file for Gp0127637", - "url": "https://data.microbiomedata.org/data/nmdc:mga0sb9b30/annotation/nmdc_mga0sb9b30_ko_ec.gff", - "md5_checksum": "2471f27b6cf11b6f93c791c273989731", - "id": "nmdc:2471f27b6cf11b6f93c791c273989731", - "file_size_bytes": 26689447 - } - ] + "name": "ReadBased Analysis Activity for nmdc:mga02tph39", + "started_at_time": "2021-10-11T02:23:42Z", + "type": "nmdc:ReadbasedAnalysis", + "ended_at_time": "2021-11-13T18:49:37+00:00" + } + ] + }, + { + "functional_annotation_agg": [], + "library_preparation_set": [], + "processed_sample_set": [], + "extraction_set": [], + "activity_set": [], + "biosample_set": [], + "data_object_set": [ + { + "name": "Gp0127634_Filtered Reads", + "description": "Filtered Reads for Gp0127634", + "data_object_type": "Filtered Sequencing Reads", + "url": "https://data.microbiomedata.org/data/nmdc:mga0r0vf18/qa/nmdc_mga0r0vf18_filtered.fastq.gz", + "md5_checksum": "ac889627d813c8e34cfbf79a4264c590", + "id": "nmdc:ac889627d813c8e34cfbf79a4264c590", + "file_size_bytes": 2316462404 + }, + { + "name": "Gp0127634_Filtered Stats", + "description": "Filtered Stats for Gp0127634", + "data_object_type": "QC Statistics", + "url": "https://data.microbiomedata.org/data/nmdc:mga0r0vf18/qa/nmdc_mga0r0vf18_filterStats.txt", + "md5_checksum": "0dfd55be1779ae7922d80aa22034c9a1", + "id": "nmdc:0dfd55be1779ae7922d80aa22034c9a1", + "file_size_bytes": 291 + }, + { + "name": "Gp0127634_Gottcha2 TSV report", + "description": "Gottcha2 TSV report for Gp0127634", + "url": "https://data.microbiomedata.org/data/nmdc:mga0r0vf18/ReadbasedAnalysis/nmdc_mga0r0vf18_gottcha2_report.tsv", + "md5_checksum": "0526ea84f6e7893f5b6d62a32f81a199", + "id": "nmdc:0526ea84f6e7893f5b6d62a32f81a199", + "file_size_bytes": 4224 + }, + { + "name": "Gp0127634_Gottcha2 full TSV report", + "description": "Gottcha2 full TSV report for Gp0127634", + "url": "https://data.microbiomedata.org/data/nmdc:mga0r0vf18/ReadbasedAnalysis/nmdc_mga0r0vf18_gottcha2_report_full.tsv", + "md5_checksum": "1a7380f5adb59f36c98c840bf28ad4bd", + "id": "nmdc:1a7380f5adb59f36c98c840bf28ad4bd", + "file_size_bytes": 875501 + }, + { + "name": "Gp0127634_Gottcha2 Krona HTML report", + "description": "Gottcha2 Krona HTML report for Gp0127634", + "data_object_type": "GOTTCHA2 Krona Plot", + "url": "https://data.microbiomedata.org/data/nmdc:mga0r0vf18/ReadbasedAnalysis/nmdc_mga0r0vf18_gottcha2_krona.html", + "md5_checksum": "366ab38bb6de9591f31a086d42ac23d6", + "id": "nmdc:366ab38bb6de9591f31a086d42ac23d6", + "file_size_bytes": 238755 + }, + { + "name": "Gp0127634_Centrifuge classification TSV report", + "description": "Centrifuge classification TSV report for Gp0127634", + "data_object_type": "Centrifuge Taxonomic Classification", + "url": "https://data.microbiomedata.org/data/nmdc:mga0r0vf18/ReadbasedAnalysis/nmdc_mga0r0vf18_centrifuge_classification.tsv", + "md5_checksum": "c44ba44bc6910c2f3ed3a60a52b4a616", + "id": "nmdc:c44ba44bc6910c2f3ed3a60a52b4a616", + "file_size_bytes": 2051793471 + }, + { + "name": "Gp0127634_Centrifuge TSV report", + "description": "Centrifuge TSV report for Gp0127634", + "data_object_type": "Centrifuge Classification Report", + "url": "https://data.microbiomedata.org/data/nmdc:mga0r0vf18/ReadbasedAnalysis/nmdc_mga0r0vf18_centrifuge_report.tsv", + "md5_checksum": "0ca043b630ba304cb80603e8332c78cf", + "id": "nmdc:0ca043b630ba304cb80603e8332c78cf", + "file_size_bytes": 256560 + }, + { + "name": "Gp0127634_Centrifuge Krona HTML report", + "description": "Centrifuge Krona HTML report for Gp0127634", + "data_object_type": "Centrifuge Krona Plot", + "url": "https://data.microbiomedata.org/data/nmdc:mga0r0vf18/ReadbasedAnalysis/nmdc_mga0r0vf18_centrifuge_krona.html", + "md5_checksum": "059ff39ced52c0df45a331c4e9e10fdd", + "id": "nmdc:059ff39ced52c0df45a331c4e9e10fdd", + "file_size_bytes": 2334325 + }, + { + "name": "Gp0127634_Kraken classification TSV report", + "description": "Kraken classification TSV report for Gp0127634", + "data_object_type": "Kraken2 Taxonomic Classification", + "url": "https://data.microbiomedata.org/data/nmdc:mga0r0vf18/ReadbasedAnalysis/nmdc_mga0r0vf18_kraken2_classification.tsv", + "md5_checksum": "7bfa3b5b29ec5cf9882251585d99f9bf", + "id": "nmdc:7bfa3b5b29ec5cf9882251585d99f9bf", + "file_size_bytes": 1649071235 + }, + { + "name": "Gp0127634_Kraken2 TSV report", + "description": "Kraken2 TSV report for Gp0127634", + "data_object_type": "Kraken2 Classification Report", + "url": "https://data.microbiomedata.org/data/nmdc:mga0r0vf18/ReadbasedAnalysis/nmdc_mga0r0vf18_kraken2_report.tsv", + "md5_checksum": "2fceef0aaf1c3c3e3edd8d69bb72c8d3", + "id": "nmdc:2fceef0aaf1c3c3e3edd8d69bb72c8d3", + "file_size_bytes": 654782 + }, + { + "name": "Gp0127634_Kraken2 Krona HTML report", + "description": "Kraken2 Krona HTML report for Gp0127634", + "data_object_type": "Kraken2 Krona Plot", + "url": "https://data.microbiomedata.org/data/nmdc:mga0r0vf18/ReadbasedAnalysis/nmdc_mga0r0vf18_kraken2_krona.html", + "md5_checksum": "678e7c401a6971629f7d3ada83b307ab", + "id": "nmdc:678e7c401a6971629f7d3ada83b307ab", + "file_size_bytes": 3988988 + }, + { + "name": "Gp0127634_Gottcha2 TSV report", + "description": "Gottcha2 TSV report for Gp0127634", + "url": "https://data.microbiomedata.org/data/nmdc:mga0r0vf18/ReadbasedAnalysis/nmdc_mga0r0vf18_gottcha2_report.tsv", + "md5_checksum": "0526ea84f6e7893f5b6d62a32f81a199", + "id": "nmdc:0526ea84f6e7893f5b6d62a32f81a199", + "file_size_bytes": 4224 + }, + { + "name": "Gp0127634_Gottcha2 full TSV report", + "description": "Gottcha2 full TSV report for Gp0127634", + "url": "https://data.microbiomedata.org/data/nmdc:mga0r0vf18/ReadbasedAnalysis/nmdc_mga0r0vf18_gottcha2_report_full.tsv", + "md5_checksum": "1a7380f5adb59f36c98c840bf28ad4bd", + "id": "nmdc:1a7380f5adb59f36c98c840bf28ad4bd", + "file_size_bytes": 875501 }, + { + "name": "Gp0127634_Gottcha2 Krona HTML report", + "description": "Gottcha2 Krona HTML report for Gp0127634", + "data_object_type": "GOTTCHA2 Krona Plot", + "url": "https://data.microbiomedata.org/data/nmdc:mga0r0vf18/ReadbasedAnalysis/nmdc_mga0r0vf18_gottcha2_krona.html", + "md5_checksum": "366ab38bb6de9591f31a086d42ac23d6", + "id": "nmdc:366ab38bb6de9591f31a086d42ac23d6", + "file_size_bytes": 238755 + }, + { + "name": "Gp0127634_Centrifuge classification TSV report", + "description": "Centrifuge classification TSV report for Gp0127634", + "data_object_type": "Centrifuge Taxonomic Classification", + "url": "https://data.microbiomedata.org/data/nmdc:mga0r0vf18/ReadbasedAnalysis/nmdc_mga0r0vf18_centrifuge_classification.tsv", + "md5_checksum": "c44ba44bc6910c2f3ed3a60a52b4a616", + "id": "nmdc:c44ba44bc6910c2f3ed3a60a52b4a616", + "file_size_bytes": 2051793471 + }, + { + "name": "Gp0127634_Centrifuge TSV report", + "description": "Centrifuge TSV report for Gp0127634", + "data_object_type": "Centrifuge Classification Report", + "url": "https://data.microbiomedata.org/data/nmdc:mga0r0vf18/ReadbasedAnalysis/nmdc_mga0r0vf18_centrifuge_report.tsv", + "md5_checksum": "0ca043b630ba304cb80603e8332c78cf", + "id": "nmdc:0ca043b630ba304cb80603e8332c78cf", + "file_size_bytes": 256560 + }, + { + "name": "Gp0127634_Centrifuge Krona HTML report", + "description": "Centrifuge Krona HTML report for Gp0127634", + "data_object_type": "Centrifuge Krona Plot", + "url": "https://data.microbiomedata.org/data/nmdc:mga0r0vf18/ReadbasedAnalysis/nmdc_mga0r0vf18_centrifuge_krona.html", + "md5_checksum": "059ff39ced52c0df45a331c4e9e10fdd", + "id": "nmdc:059ff39ced52c0df45a331c4e9e10fdd", + "file_size_bytes": 2334325 + }, + { + "name": "Gp0127634_Kraken classification TSV report", + "description": "Kraken classification TSV report for Gp0127634", + "data_object_type": "Kraken2 Taxonomic Classification", + "url": "https://data.microbiomedata.org/data/nmdc:mga0r0vf18/ReadbasedAnalysis/nmdc_mga0r0vf18_kraken2_classification.tsv", + "md5_checksum": "7bfa3b5b29ec5cf9882251585d99f9bf", + "id": "nmdc:7bfa3b5b29ec5cf9882251585d99f9bf", + "file_size_bytes": 1649071235 + }, + { + "name": "Gp0127634_Kraken2 TSV report", + "description": "Kraken2 TSV report for Gp0127634", + "data_object_type": "Kraken2 Classification Report", + "url": "https://data.microbiomedata.org/data/nmdc:mga0r0vf18/ReadbasedAnalysis/nmdc_mga0r0vf18_kraken2_report.tsv", + "md5_checksum": "2fceef0aaf1c3c3e3edd8d69bb72c8d3", + "id": "nmdc:2fceef0aaf1c3c3e3edd8d69bb72c8d3", + "file_size_bytes": 654782 + }, + { + "name": "Gp0127634_Kraken2 Krona HTML report", + "description": "Kraken2 Krona HTML report for Gp0127634", + "data_object_type": "Kraken2 Krona Plot", + "url": "https://data.microbiomedata.org/data/nmdc:mga0r0vf18/ReadbasedAnalysis/nmdc_mga0r0vf18_kraken2_krona.html", + "md5_checksum": "678e7c401a6971629f7d3ada83b307ab", + "id": "nmdc:678e7c401a6971629f7d3ada83b307ab", + "file_size_bytes": 3988988 + }, + { + "name": "Gp0127634_Assembled contigs fasta", + "description": "Assembled contigs fasta for Gp0127634", + "data_object_type": "Assembly Contigs", + "url": "https://data.microbiomedata.org/data/nmdc:mga0r0vf18/assembly/nmdc_mga0r0vf18_contigs.fna", + "md5_checksum": "2a30cf44cc596923301befc34edf6c0a", + "id": "nmdc:2a30cf44cc596923301befc34edf6c0a", + "file_size_bytes": 84939887 + }, + { + "name": "Gp0127634_Assembled scaffold fasta", + "description": "Assembled scaffold fasta for Gp0127634", + "data_object_type": "Assembly Scaffolds", + "url": "https://data.microbiomedata.org/data/nmdc:mga0r0vf18/assembly/nmdc_mga0r0vf18_scaffolds.fna", + "md5_checksum": "f147264a5a4a7eec4d68f05ab52ecc1d", + "id": "nmdc:f147264a5a4a7eec4d68f05ab52ecc1d", + "file_size_bytes": 84411544 + }, + { + "name": "Gp0127634_Metagenome Contig Coverage Stats", + "description": "Metagenome Contig Coverage Stats for Gp0127634", + "url": "https://data.microbiomedata.org/data/nmdc:mga0r0vf18/assembly/nmdc_mga0r0vf18_covstats.txt", + "md5_checksum": "9bd1b25df71c0a6f9ca408ddc045ffed", + "id": "nmdc:9bd1b25df71c0a6f9ca408ddc045ffed", + "file_size_bytes": 13895509 + }, + { + "name": "Gp0127634_Assembled AGP file", + "description": "Assembled AGP file for Gp0127634", + "data_object_type": "Assembly AGP", + "url": "https://data.microbiomedata.org/data/nmdc:mga0r0vf18/assembly/nmdc_mga0r0vf18_assembly.agp", + "md5_checksum": "825969095ff134b195b06a40fcc6089a", + "id": "nmdc:825969095ff134b195b06a40fcc6089a", + "file_size_bytes": 12985962 + }, + { + "name": "Gp0127634_Metagenome Alignment BAM file", + "description": "Metagenome Alignment BAM file for Gp0127634", + "data_object_type": "Assembly Coverage BAM", + "url": "https://data.microbiomedata.org/data/nmdc:mga0r0vf18/assembly/nmdc_mga0r0vf18_pairedMapped_sorted.bam", + "md5_checksum": "356d9ca409747590849dd894998166ee", + "id": "nmdc:356d9ca409747590849dd894998166ee", + "file_size_bytes": 2516463401 + }, + { + "name": "Gp0127634_Protein FAA", + "description": "Protein FAA for Gp0127634", + "data_object_type": "Annotation Amino Acid FASTA", + "url": "https://data.microbiomedata.org/data/nmdc:mga0r0vf18/annotation/nmdc_mga0r0vf18_proteins.faa", + "md5_checksum": "ca16203099dc1d6bbce00320bb753974", + "id": "nmdc:ca16203099dc1d6bbce00320bb753974", + "file_size_bytes": 49630516 + }, + { + "name": "Gp0127634_Structural annotation GFF file", + "description": "Structural annotation GFF file for Gp0127634", + "data_object_type": "Structural Annotation GFF", + "url": "https://data.microbiomedata.org/data/nmdc:mga0r0vf18/annotation/nmdc_mga0r0vf18_structural_annotation.gff", + "md5_checksum": "fffbb7b52a4886755df429e22a152427", + "id": "nmdc:fffbb7b52a4886755df429e22a152427", + "file_size_bytes": 2519 + }, + { + "name": "Gp0127634_Functional annotation GFF file", + "description": "Functional annotation GFF file for Gp0127634", + "data_object_type": "Functional Annotation GFF", + "url": "https://data.microbiomedata.org/data/nmdc:mga0r0vf18/annotation/nmdc_mga0r0vf18_functional_annotation.gff", + "md5_checksum": "f63b43e7797845fa94dc6f552ba1ea39", + "id": "nmdc:f63b43e7797845fa94dc6f552ba1ea39", + "file_size_bytes": 57589694 + }, + { + "name": "Gp0127634_KO TSV file", + "description": "KO TSV file for Gp0127634", + "data_object_type": "Annotation KEGG Orthology", + "url": "https://data.microbiomedata.org/data/nmdc:mga0r0vf18/annotation/nmdc_mga0r0vf18_ko.tsv", + "md5_checksum": "8ab8f39bfc76267daa4ce5a34811bff1", + "id": "nmdc:8ab8f39bfc76267daa4ce5a34811bff1", + "file_size_bytes": 6602379 + }, + { + "name": "Gp0127634_EC TSV file", + "description": "EC TSV file for Gp0127634", + "data_object_type": "Annotation Enzyme Commission", + "url": "https://data.microbiomedata.org/data/nmdc:mga0r0vf18/annotation/nmdc_mga0r0vf18_ec.tsv", + "md5_checksum": "d6ff8f2f0d5c77495b2b43a7020e5730", + "id": "nmdc:d6ff8f2f0d5c77495b2b43a7020e5730", + "file_size_bytes": 4399755 + }, + { + "name": "Gp0127634_COG GFF file", + "description": "COG GFF file for Gp0127634", + "url": "https://data.microbiomedata.org/data/nmdc:mga0r0vf18/annotation/nmdc_mga0r0vf18_cog.gff", + "md5_checksum": "763d16c5dbadbeba61ceee91ed5209f3", + "id": "nmdc:763d16c5dbadbeba61ceee91ed5209f3", + "file_size_bytes": 33737036 + }, + { + "name": "Gp0127634_PFAM GFF file", + "description": "PFAM GFF file for Gp0127634", + "url": "https://data.microbiomedata.org/data/nmdc:mga0r0vf18/annotation/nmdc_mga0r0vf18_pfam.gff", + "md5_checksum": "52cba722f402eea06fda75ec1e5a5103", + "id": "nmdc:52cba722f402eea06fda75ec1e5a5103", + "file_size_bytes": 24757263 + }, + { + "name": "Gp0127634_TigrFam GFF file", + "description": "TigrFam GFF file for Gp0127634", + "url": "https://data.microbiomedata.org/data/nmdc:mga0r0vf18/annotation/nmdc_mga0r0vf18_tigrfam.gff", + "md5_checksum": "ad358ce4b479febc34a2acdd9f249517", + "id": "nmdc:ad358ce4b479febc34a2acdd9f249517", + "file_size_bytes": 2661782 + }, + { + "name": "Gp0127634_SMART GFF file", + "description": "SMART GFF file for Gp0127634", + "url": "https://data.microbiomedata.org/data/nmdc:mga0r0vf18/annotation/nmdc_mga0r0vf18_smart.gff", + "md5_checksum": "10a0ca82cf662ac4d9b465f05ed1fb2b", + "id": "nmdc:10a0ca82cf662ac4d9b465f05ed1fb2b", + "file_size_bytes": 7506881 + }, + { + "name": "Gp0127634_SuperFam GFF file", + "description": "SuperFam GFF file for Gp0127634", + "url": "https://data.microbiomedata.org/data/nmdc:mga0r0vf18/annotation/nmdc_mga0r0vf18_supfam.gff", + "md5_checksum": "d0e8459e010015e726c31f0f8c18d359", + "id": "nmdc:d0e8459e010015e726c31f0f8c18d359", + "file_size_bytes": 42013513 + }, + { + "name": "Gp0127634_Cath FunFam GFF file", + "description": "Cath FunFam GFF file for Gp0127634", + "url": "https://data.microbiomedata.org/data/nmdc:mga0r0vf18/annotation/nmdc_mga0r0vf18_cath_funfam.gff", + "md5_checksum": "41d7ca149efb4c12bce48e5a19649a84", + "id": "nmdc:41d7ca149efb4c12bce48e5a19649a84", + "file_size_bytes": 31747110 + }, + { + "name": "Gp0127634_KO_EC GFF file", + "description": "KO_EC GFF file for Gp0127634", + "url": "https://data.microbiomedata.org/data/nmdc:mga0r0vf18/annotation/nmdc_mga0r0vf18_ko_ec.gff", + "md5_checksum": "9da1883e60979e17665b0211198c35f0", + "id": "nmdc:9da1883e60979e17665b0211198c35f0", + "file_size_bytes": 20999001 + }, + { + "name": "gold:Gp0452679_metabat2 bin checkm quality assessment result", + "description": "metabat2 bin checkm quality assessment result for gold:Gp0452679", + "data_object_type": "CheckM Statistics", + "url": "https://data.microbiomedata.org/data/nmdc:mga0fsjy84/MAGs/nmdc_mga0fsjy84_checkm_qa.out", + "md5_checksum": "d41d8cd98f00b204e9800998ecf8427e", + "id": "nmdc:d41d8cd98f00b204e9800998ecf8427e", + "file_size_bytes": 0 + }, + { + "name": "Gp0127634_tooShort (< 3kb) filtered contigs fasta file by metaBat2", + "description": "tooShort (< 3kb) filtered contigs fasta file by metaBat2 for Gp0127634", + "url": "https://data.microbiomedata.org/data/nmdc:mga0r0vf18/MAGs/nmdc_mga0r0vf18_bins.tooShort.fa", + "md5_checksum": "3c8eadbcf4f583090d8f378ea6758799", + "id": "nmdc:3c8eadbcf4f583090d8f378ea6758799", + "file_size_bytes": 71683990 + }, + { + "name": "Gp0127634_unbinned fasta file from metabat2", + "description": "unbinned fasta file from metabat2 for Gp0127634", + "url": "https://data.microbiomedata.org/data/nmdc:mga0r0vf18/MAGs/nmdc_mga0r0vf18_bins.unbinned.fa", + "md5_checksum": "1be647dc835ee8fe666fe9893266bd21", + "id": "nmdc:1be647dc835ee8fe666fe9893266bd21", + "file_size_bytes": 11353478 + }, + { + "name": "Gp0127634_metabat2 bin checkm quality assessment result", + "description": "metabat2 bin checkm quality assessment result for Gp0127634", + "data_object_type": "CheckM Statistics", + "url": "https://data.microbiomedata.org/data/nmdc:mga0r0vf18/MAGs/nmdc_mga0r0vf18_checkm_qa.out", + "md5_checksum": "6cc278c455cafc691333c0a74fe6c540", + "id": "nmdc:6cc278c455cafc691333c0a74fe6c540", + "file_size_bytes": 936 + }, + { + "name": "Gp0127634_high-quality and medium-quality bins", + "description": "high-quality and medium-quality bins for Gp0127634", + "data_object_type": "Metagenome Bins", + "url": "https://data.microbiomedata.org/data/nmdc:mga0r0vf18/MAGs/nmdc_mga0r0vf18_hqmq_bin.zip", + "md5_checksum": "de4d0180489bdaa5526977508a489b99", + "id": "nmdc:de4d0180489bdaa5526977508a489b99", + "file_size_bytes": 518340 + }, + { + "name": "Gp0127634_metabat2 bins", + "description": "metabat2 bins for Gp0127634", + "url": "https://data.microbiomedata.org/data/nmdc:mga0r0vf18/MAGs/nmdc_mga0r0vf18_metabat_bin.zip", + "md5_checksum": "16a08c4a3a6e9c70a5d47209177d0e60", + "id": "nmdc:16a08c4a3a6e9c70a5d47209177d0e60", + "file_size_bytes": 63768 + } + ], + "dissolving_activity_set": [], + "functional_annotation_set": [], + "genome_feature_set": [], + "mags_activity_set": [ { "_id": { - "$oid": "649b0052ec087f6bbab34702" + "$oid": "649b0052ec087f6bbab34713" }, "has_input": [ - "nmdc:aee81646e593045bbb32a0012870b88b", - "nmdc:dee5fa37f57a24685b65e00380d6e433", - "nmdc:b9ec0754ffaa338c899244703bc91386" + "nmdc:2a30cf44cc596923301befc34edf6c0a", + "nmdc:356d9ca409747590849dd894998166ee", + "nmdc:f63b43e7797845fa94dc6f552ba1ea39" ], - "too_short_contig_num": 200319, + "too_short_contig_num": 168596, "part_of": [ - "nmdc:mga0sb9b30" + "nmdc:mga0r0vf18" ], - "binned_contig_num": 482, + "binned_contig_num": 278, "git_url": "https://github.com/microbiomedata/metaG/releases/tag/0.1", "has_output": [ "nmdc:d41d8cd98f00b204e9800998ecf8427e", - "nmdc:7968c6b88e49f066bd24982b4d54965b", - "nmdc:120fbaa7439eb628d9a982de573446a8", - "nmdc:347a7ee18b37674e031cca9046e92623", - "nmdc:de1da5ea4bfdf3131a6c510b79b145c2", - "nmdc:382d00338a5e4829285e58a203de153e" + "nmdc:3c8eadbcf4f583090d8f378ea6758799", + "nmdc:1be647dc835ee8fe666fe9893266bd21", + "nmdc:6cc278c455cafc691333c0a74fe6c540", + "nmdc:de4d0180489bdaa5526977508a489b99", + "nmdc:16a08c4a3a6e9c70a5d47209177d0e60" ], - "was_informed_by": "gold:Gp0127637", - "input_contig_num": 214863, - "id": "nmdc:c6d11b07b7b33c0d906ce0d4a58a7ccf", + "was_informed_by": "gold:Gp0127634", + "input_contig_num": 175822, + "id": "nmdc:d6415e0c3e4f9b8702c1ae98c6fb2f48", "execution_resource": "NERSC-Cori", - "name": "MAGs Analysis Activity for nmdc:mga0sb9b30", + "name": "MAGs Analysis Activity for nmdc:mga0r0vf18", "mags_list": [ { - "number_of_contig": 59, - "completeness": 8.33, + "number_of_contig": 235, + "completeness": 68.28, "bin_name": "bins.1", - "gene_count": 295, - "bin_quality": "LQ", + "gene_count": 2056, + "bin_quality": "MQ", "gtdbtk_species": "", - "gtdbtk_order": "", + "gtdbtk_order": "Nitrososphaerales", "num_16s": 0, - "gtdbtk_family": "", - "gtdbtk_domain": "", - "contamination": 0.0, - "gtdbtk_class": "", - "gtdbtk_phylum": "", - "num_5s": 0, + "gtdbtk_family": "Nitrososphaeraceae", + "gtdbtk_domain": "Archaea", + "contamination": 2.91, + "gtdbtk_class": "Nitrososphaeria", + "gtdbtk_phylum": "Crenarchaeota", + "num_5s": 1, "num_23s": 0, "gtdbtk_genus": "", - "num_t_rna": 0 + "num_t_rna": 34 }, { - "number_of_contig": 233, - "completeness": 45.87, + "number_of_contig": 43, + "completeness": 10.69, "bin_name": "bins.2", - "gene_count": 1342, + "gene_count": 247, "bin_quality": "LQ", "gtdbtk_species": "", "gtdbtk_order": "", "num_16s": 0, "gtdbtk_family": "", "gtdbtk_domain": "", - "contamination": 1.28, + "contamination": 0.0, "gtdbtk_class": "", "gtdbtk_phylum": "", "num_5s": 0, "num_23s": 0, "gtdbtk_genus": "", - "num_t_rna": 18 - }, - { - "number_of_contig": 190, - "completeness": 75.08, - "bin_name": "bins.3", - "gene_count": 1991, - "bin_quality": "MQ", - "gtdbtk_species": "", - "gtdbtk_order": "Nitrososphaerales", - "num_16s": 0, - "gtdbtk_family": "Nitrososphaeraceae", - "gtdbtk_domain": "Archaea", - "contamination": 1.21, - "gtdbtk_class": "Nitrososphaeria", - "gtdbtk_phylum": "Crenarchaeota", - "num_5s": 1, - "num_23s": 1, - "gtdbtk_genus": "", - "num_t_rna": 37 + "num_t_rna": 4 } ], - "unbinned_contig_num": 14062, - "started_at_time": "2021-10-11T02:24:01Z", + "unbinned_contig_num": 6948, + "started_at_time": "2021-10-11T02:23:30Z", "type": "nmdc:MAGsAnalysisActivity", - "ended_at_time": "2021-10-11T03:11:56+00:00", - "output_data_objects": [ - { - "name": "gold:Gp0452679_metabat2 bin checkm quality assessment result", - "description": "metabat2 bin checkm quality assessment result for gold:Gp0452679", - "data_object_type": "CheckM Statistics", - "url": "https://data.microbiomedata.org/data/nmdc:mga0fsjy84/MAGs/nmdc_mga0fsjy84_checkm_qa.out", - "md5_checksum": "d41d8cd98f00b204e9800998ecf8427e", - "id": "nmdc:d41d8cd98f00b204e9800998ecf8427e", - "file_size_bytes": 0 - }, - { - "name": "Gp0127637_tooShort (< 3kb) filtered contigs fasta file by metaBat2", - "description": "tooShort (< 3kb) filtered contigs fasta file by metaBat2 for Gp0127637", - "url": "https://data.microbiomedata.org/data/nmdc:mga0sb9b30/MAGs/nmdc_mga0sb9b30_bins.tooShort.fa", - "md5_checksum": "7968c6b88e49f066bd24982b4d54965b", - "id": "nmdc:7968c6b88e49f066bd24982b4d54965b", - "file_size_bytes": 91577123 - }, - { - "name": "Gp0127637_unbinned fasta file from metabat2", - "description": "unbinned fasta file from metabat2 for Gp0127637", - "url": "https://data.microbiomedata.org/data/nmdc:mga0sb9b30/MAGs/nmdc_mga0sb9b30_bins.unbinned.fa", - "md5_checksum": "120fbaa7439eb628d9a982de573446a8", - "id": "nmdc:120fbaa7439eb628d9a982de573446a8", - "file_size_bytes": 22556841 - }, - { - "name": "Gp0127637_metabat2 bin checkm quality assessment result", - "description": "metabat2 bin checkm quality assessment result for Gp0127637", - "data_object_type": "CheckM Statistics", - "url": "https://data.microbiomedata.org/data/nmdc:mga0sb9b30/MAGs/nmdc_mga0sb9b30_checkm_qa.out", - "md5_checksum": "347a7ee18b37674e031cca9046e92623", - "id": "nmdc:347a7ee18b37674e031cca9046e92623", - "file_size_bytes": 1092 - }, - { - "name": "Gp0127637_high-quality and medium-quality bins", - "description": "high-quality and medium-quality bins for Gp0127637", - "data_object_type": "Metagenome Bins", - "url": "https://data.microbiomedata.org/data/nmdc:mga0sb9b30/MAGs/nmdc_mga0sb9b30_hqmq_bin.zip", - "md5_checksum": "de1da5ea4bfdf3131a6c510b79b145c2", - "id": "nmdc:de1da5ea4bfdf3131a6c510b79b145c2", - "file_size_bytes": 504932 - }, - { - "name": "Gp0127637_metabat2 bins", - "description": "metabat2 bins for Gp0127637", - "url": "https://data.microbiomedata.org/data/nmdc:mga0sb9b30/MAGs/nmdc_mga0sb9b30_metabat_bin.zip", - "md5_checksum": "382d00338a5e4829285e58a203de153e", - "id": "nmdc:382d00338a5e4829285e58a203de153e", - "file_size_bytes": 432910 - } - ] + "ended_at_time": "2021-10-11T04:49:55+00:00" } - ] - }, - { - "_id": { - "$oid": "649b009773e8249959349b49" - }, - "id": "nmdc:omprc-11-tgxmb243", - "name": "Riverbed sediment microbial communities from areas with no vegetation in Columbia River, Washington, USA - GW-RW N2_40_50", - "description": "Riverbed sediment samples were collected from an area with no vegetation in a hyporheic zone (subsurface groundwater - surface water mixing zone)", - "has_input": [ - "nmdc:bsm-11-pq3zmp51" - ], - "has_output": [ - "jgi:574fde837ded5e3df1ee141d" - ], - "part_of": [ - "nmdc:sty-11-aygzgv51" - ], - "add_date": "2016-01-11", - "mod_date": "2021-06-15", - "ncbi_project_name": "Riverbed sediment microbial communities from areas with no vegetation in Columbia River, Washington, USA - GW-RW N2_40_50", - "omics_type": { - "has_raw_value": "Metagenome" - }, - "principal_investigator": { - "has_raw_value": "James Stegen" - }, - "processing_institution": "JGI", - "type": "nmdc:OmicsProcessing", - "gold_sequencing_project_identifiers": [ - "gold:Gp0127638" - ], - "downstream_workflow_activity_records": [ + ], + "material_sample_set": [], + "material_sampling_activity_set": [], + "metabolomics_analysis_activity_set": [], + "metagenome_annotation_activity_set": [ { "_id": { - "$oid": "649b009d6bdd4fd20273c873" + "$oid": "649b005bbf2caae0415ef9b3" }, "has_input": [ - "nmdc:56b2d94789953adf1b4ed35f09f0edd4" + "nmdc:2a30cf44cc596923301befc34edf6c0a" ], "part_of": [ - "nmdc:mga0hjgc20" + "nmdc:mga0r0vf18" ], "git_url": "https://github.com/microbiomedata/metaG/releases/tag/0.1", "has_output": [ - "nmdc:56ba2416c050decd6c16c618c1e4a752", - "nmdc:5c9398042e9ff608befa78e86597bdf0" + "nmdc:ca16203099dc1d6bbce00320bb753974", + "nmdc:fffbb7b52a4886755df429e22a152427", + "nmdc:f63b43e7797845fa94dc6f552ba1ea39", + "nmdc:8ab8f39bfc76267daa4ce5a34811bff1", + "nmdc:d6ff8f2f0d5c77495b2b43a7020e5730", + "nmdc:763d16c5dbadbeba61ceee91ed5209f3", + "nmdc:52cba722f402eea06fda75ec1e5a5103", + "nmdc:ad358ce4b479febc34a2acdd9f249517", + "nmdc:10a0ca82cf662ac4d9b465f05ed1fb2b", + "nmdc:d0e8459e010015e726c31f0f8c18d359", + "nmdc:41d7ca149efb4c12bce48e5a19649a84", + "nmdc:9da1883e60979e17665b0211198c35f0" ], - "was_informed_by": "gold:Gp0127638", - "input_read_count": 21721428, - "output_read_bases": 2949961420, - "id": "nmdc:668844f5ea6cefdcb893db0bb6afc92a", + "was_informed_by": "gold:Gp0127634", + "id": "nmdc:d6415e0c3e4f9b8702c1ae98c6fb2f48", "execution_resource": "NERSC-Cori", - "input_read_bases": 3279935628, - "name": "Read QC Activity for nmdc:mga0hjgc20", - "output_read_count": 19723416, - "started_at_time": "2021-12-01T21:31:29Z", - "type": "nmdc:ReadQCAnalysisActivity", - "ended_at_time": "2021-12-02T20:49:51+00:00", - "output_data_objects": [ - { - "name": "Gp0127638_Filtered Reads", - "description": "Filtered Reads for Gp0127638", - "data_object_type": "Filtered Sequencing Reads", - "url": "https://data.microbiomedata.org/data/nmdc:mga0hjgc20/qa/nmdc_mga0hjgc20_filtered.fastq.gz", - "md5_checksum": "56ba2416c050decd6c16c618c1e4a752", - "id": "nmdc:56ba2416c050decd6c16c618c1e4a752", - "file_size_bytes": 1649318115 - }, - { - "name": "Gp0127638_Filtered Stats", - "description": "Filtered Stats for Gp0127638", - "data_object_type": "QC Statistics", - "url": "https://data.microbiomedata.org/data/nmdc:mga0hjgc20/qa/nmdc_mga0hjgc20_filterStats.txt", - "md5_checksum": "5c9398042e9ff608befa78e86597bdf0", - "id": "nmdc:5c9398042e9ff608befa78e86597bdf0", - "file_size_bytes": 283 - } - ] - }, + "name": "Annotation Activity for nmdc:mga0r0vf18", + "started_at_time": "2021-10-11T02:23:30Z", + "type": "nmdc:MetagenomeAnnotationActivity", + "ended_at_time": "2021-10-11T04:49:55+00:00" + } + ], + "metagenome_assembly_set": [ { "_id": { - "$oid": "649b009bff710ae353f8cf35" + "$oid": "649b005f2ca5ee4adb139f9c" }, "has_input": [ - "nmdc:56ba2416c050decd6c16c618c1e4a752" + "nmdc:ac889627d813c8e34cfbf79a4264c590" + ], + "part_of": [ + "nmdc:mga0r0vf18" ], + "ctg_logsum": 142091, + "scaf_logsum": 142614, + "gap_pct": 0.00138, "git_url": "https://github.com/microbiomedata/metaG/releases/tag/0.1", "has_output": [ - "nmdc:dbbd6ca6777b71d1fac4aae2cd947deb", - "nmdc:b6de56746a284f8226dd86817c8ae04e", - "nmdc:d9572e708af9f0a06e98cfddfb298359", - "nmdc:e9946f36795474182b7759d3d7532b57", - "nmdc:33ff1d85d17d763afc9e21e481cc10d2", - "nmdc:997a66f49a232750bd7132639f3387e7", - "nmdc:d3f604a59babf001839d38a617b62931", - "nmdc:3abfaa434ee1449cbbb69985e48488b4", - "nmdc:70c2fc1a2c7c0032528ff91ad1576465" + "nmdc:2a30cf44cc596923301befc34edf6c0a", + "nmdc:f147264a5a4a7eec4d68f05ab52ecc1d", + "nmdc:9bd1b25df71c0a6f9ca408ddc045ffed", + "nmdc:825969095ff134b195b06a40fcc6089a", + "nmdc:356d9ca409747590849dd894998166ee" ], - "was_informed_by": "gold:Gp0127638", - "id": "nmdc:668844f5ea6cefdcb893db0bb6afc92a", + "asm_score": 5.751, + "was_informed_by": "gold:Gp0127634", + "ctg_powsum": 15837, + "scaf_max": 33833, + "id": "nmdc:d6415e0c3e4f9b8702c1ae98c6fb2f48", + "scaf_powsum": 15897, "execution_resource": "NERSC-Cori", - "name": "ReadBased Analysis Activity for nmdc:mga0hjgc20", - "started_at_time": "2021-12-01T21:31:29Z", - "type": "nmdc:ReadbasedAnalysis", - "ended_at_time": "2021-12-02T20:49:51+00:00", - "output_data_objects": [ - { - "name": "Gp0127638_Gottcha2 TSV report", - "description": "Gottcha2 TSV report for Gp0127638", - "url": "https://data.microbiomedata.org/data/nmdc:mga0hjgc20/ReadbasedAnalysis/nmdc_mga0hjgc20_gottcha2_report.tsv", - "md5_checksum": "dbbd6ca6777b71d1fac4aae2cd947deb", - "id": "nmdc:dbbd6ca6777b71d1fac4aae2cd947deb", - "file_size_bytes": 2025 - }, - { - "name": "Gp0127638_Gottcha2 full TSV report", - "description": "Gottcha2 full TSV report for Gp0127638", - "url": "https://data.microbiomedata.org/data/nmdc:mga0hjgc20/ReadbasedAnalysis/nmdc_mga0hjgc20_gottcha2_report_full.tsv", - "md5_checksum": "b6de56746a284f8226dd86817c8ae04e", - "id": "nmdc:b6de56746a284f8226dd86817c8ae04e", - "file_size_bytes": 655633 - }, - { - "name": "Gp0127638_Gottcha2 Krona HTML report", - "description": "Gottcha2 Krona HTML report for Gp0127638", - "data_object_type": "GOTTCHA2 Krona Plot", - "url": "https://data.microbiomedata.org/data/nmdc:mga0hjgc20/ReadbasedAnalysis/nmdc_mga0hjgc20_gottcha2_krona.html", - "md5_checksum": "d9572e708af9f0a06e98cfddfb298359", - "id": "nmdc:d9572e708af9f0a06e98cfddfb298359", - "file_size_bytes": 232133 - }, - { - "name": "Gp0127638_Centrifuge classification TSV report", - "description": "Centrifuge classification TSV report for Gp0127638", - "data_object_type": "Centrifuge Taxonomic Classification", - "url": "https://data.microbiomedata.org/data/nmdc:mga0hjgc20/ReadbasedAnalysis/nmdc_mga0hjgc20_centrifuge_classification.tsv", - "md5_checksum": "e9946f36795474182b7759d3d7532b57", - "id": "nmdc:e9946f36795474182b7759d3d7532b57", - "file_size_bytes": 1448205544 - }, - { - "name": "Gp0127638_Centrifuge TSV report", - "description": "Centrifuge TSV report for Gp0127638", - "data_object_type": "Centrifuge Classification Report", - "url": "https://data.microbiomedata.org/data/nmdc:mga0hjgc20/ReadbasedAnalysis/nmdc_mga0hjgc20_centrifuge_report.tsv", - "md5_checksum": "33ff1d85d17d763afc9e21e481cc10d2", - "id": "nmdc:33ff1d85d17d763afc9e21e481cc10d2", - "file_size_bytes": 253872 - }, - { - "name": "Gp0127638_Centrifuge Krona HTML report", - "description": "Centrifuge Krona HTML report for Gp0127638", - "data_object_type": "Centrifuge Krona Plot", - "url": "https://data.microbiomedata.org/data/nmdc:mga0hjgc20/ReadbasedAnalysis/nmdc_mga0hjgc20_centrifuge_krona.html", - "md5_checksum": "997a66f49a232750bd7132639f3387e7", - "id": "nmdc:997a66f49a232750bd7132639f3387e7", - "file_size_bytes": 2331772 - }, - { - "name": "Gp0127638_Kraken classification TSV report", - "description": "Kraken classification TSV report for Gp0127638", - "data_object_type": "Kraken2 Taxonomic Classification", - "url": "https://data.microbiomedata.org/data/nmdc:mga0hjgc20/ReadbasedAnalysis/nmdc_mga0hjgc20_kraken2_classification.tsv", - "md5_checksum": "d3f604a59babf001839d38a617b62931", - "id": "nmdc:d3f604a59babf001839d38a617b62931", - "file_size_bytes": 1157365410 - }, - { - "name": "Gp0127638_Kraken2 TSV report", - "description": "Kraken2 TSV report for Gp0127638", - "data_object_type": "Kraken2 Classification Report", - "url": "https://data.microbiomedata.org/data/nmdc:mga0hjgc20/ReadbasedAnalysis/nmdc_mga0hjgc20_kraken2_report.tsv", - "md5_checksum": "3abfaa434ee1449cbbb69985e48488b4", - "id": "nmdc:3abfaa434ee1449cbbb69985e48488b4", - "file_size_bytes": 621484 - }, - { - "name": "Gp0127638_Kraken2 Krona HTML report", - "description": "Kraken2 Krona HTML report for Gp0127638", - "data_object_type": "Kraken2 Krona Plot", - "url": "https://data.microbiomedata.org/data/nmdc:mga0hjgc20/ReadbasedAnalysis/nmdc_mga0hjgc20_kraken2_krona.html", - "md5_checksum": "70c2fc1a2c7c0032528ff91ad1576465", - "id": "nmdc:70c2fc1a2c7c0032528ff91ad1576465", - "file_size_bytes": 3896830 - } + "contigs": 175824, + "name": "Assembly Activity for nmdc:mga0r0vf18", + "ctg_max": 33833, + "gc_std": 0.09424, + "contig_bp": 78219291, + "gc_avg": 0.62214, + "started_at_time": "2021-10-11T02:23:30Z", + "scaf_bp": 78220371, + "type": "nmdc:MetagenomeAssembly", + "scaffolds": 175734, + "ended_at_time": "2021-10-11T04:49:55+00:00", + "ctg_l50": 412, + "ctg_l90": 286, + "ctg_n50": 53340, + "ctg_n90": 150131, + "scaf_l50": 412, + "scaf_l90": 286, + "scaf_n50": 53321, + "scaf_n90": 150048 + } + ], + "metagenome_sequencing_activity_set": [], + "metaproteomics_analysis_activity_set": [], + "metatranscriptome_activity_set": [], + "nom_analysis_activity_set": [], + "omics_processing_set": [ + { + "_id": { + "$oid": "649b009773e8249959349b46" + }, + "id": "nmdc:omprc-11-mbv2jc69", + "name": "Riverbed sediment microbial communities from areas with no vegetation in Columbia River, Washington, USA - GW-RW N2_50_60", + "description": "Riverbed sediment samples were collected from an area with no vegetation in a hyporheic zone (subsurface groundwater - surface water mixing zone)", + "has_input": [ + "nmdc:bsm-11-jdgzjq31" + ], + "has_output": [ + "jgi:574fe09c7ded5e3df1ee1487" + ], + "part_of": [ + "nmdc:sty-11-aygzgv51" + ], + "add_date": "2016-01-11", + "mod_date": "2021-06-15", + "ncbi_project_name": "Riverbed sediment microbial communities from areas with no vegetation in Columbia River, Washington, USA - GW-RW N2_50_60", + "omics_type": { + "has_raw_value": "Metagenome" + }, + "principal_investigator": { + "has_raw_value": "James Stegen" + }, + "processing_institution": "JGI", + "type": "nmdc:OmicsProcessing", + "gold_sequencing_project_identifiers": [ + "gold:Gp0127634" ] - }, + } + ], + "reaction_activity_set": [], + "read_qc_analysis_activity_set": [ { "_id": { - "$oid": "61e719b5833bcf838a7010e1" + "$oid": "649b009d6bdd4fd20273c86a" }, "has_input": [ - "nmdc:56ba2416c050decd6c16c618c1e4a752" + "nmdc:2b7712d32a159eca66fc50936de000a5" ], "part_of": [ - "nmdc:mga0hjgc20" + "nmdc:mga0r0vf18" ], "git_url": "https://github.com/microbiomedata/metaG/releases/tag/0.1", "has_output": [ - "nmdc:dbbd6ca6777b71d1fac4aae2cd947deb", - "nmdc:b6de56746a284f8226dd86817c8ae04e", - "nmdc:d9572e708af9f0a06e98cfddfb298359", - "nmdc:e9946f36795474182b7759d3d7532b57", - "nmdc:33ff1d85d17d763afc9e21e481cc10d2", - "nmdc:997a66f49a232750bd7132639f3387e7", - "nmdc:d3f604a59babf001839d38a617b62931", - "nmdc:3abfaa434ee1449cbbb69985e48488b4", - "nmdc:70c2fc1a2c7c0032528ff91ad1576465" + "nmdc:ac889627d813c8e34cfbf79a4264c590", + "nmdc:0dfd55be1779ae7922d80aa22034c9a1" ], - "was_informed_by": "gold:Gp0127638", - "id": "nmdc:668844f5ea6cefdcb893db0bb6afc92a", + "was_informed_by": "gold:Gp0127634", + "input_read_count": 29872658, + "output_read_bases": 4172764161, + "id": "nmdc:d6415e0c3e4f9b8702c1ae98c6fb2f48", "execution_resource": "NERSC-Cori", - "name": "ReadBased Analysis Activity for nmdc:mga0hjgc20", - "started_at_time": "2021-12-01T21:31:29Z", - "type": "nmdc:ReadbasedAnalysis", - "ended_at_time": "2021-12-02T20:49:51+00:00", - "output_data_objects": [ - { - "name": "Gp0127638_Gottcha2 TSV report", - "description": "Gottcha2 TSV report for Gp0127638", - "url": "https://data.microbiomedata.org/data/nmdc:mga0hjgc20/ReadbasedAnalysis/nmdc_mga0hjgc20_gottcha2_report.tsv", - "md5_checksum": "dbbd6ca6777b71d1fac4aae2cd947deb", - "id": "nmdc:dbbd6ca6777b71d1fac4aae2cd947deb", - "file_size_bytes": 2025 - }, - { - "name": "Gp0127638_Gottcha2 full TSV report", - "description": "Gottcha2 full TSV report for Gp0127638", - "url": "https://data.microbiomedata.org/data/nmdc:mga0hjgc20/ReadbasedAnalysis/nmdc_mga0hjgc20_gottcha2_report_full.tsv", - "md5_checksum": "b6de56746a284f8226dd86817c8ae04e", - "id": "nmdc:b6de56746a284f8226dd86817c8ae04e", - "file_size_bytes": 655633 - }, - { - "name": "Gp0127638_Gottcha2 Krona HTML report", - "description": "Gottcha2 Krona HTML report for Gp0127638", - "data_object_type": "GOTTCHA2 Krona Plot", - "url": "https://data.microbiomedata.org/data/nmdc:mga0hjgc20/ReadbasedAnalysis/nmdc_mga0hjgc20_gottcha2_krona.html", - "md5_checksum": "d9572e708af9f0a06e98cfddfb298359", - "id": "nmdc:d9572e708af9f0a06e98cfddfb298359", - "file_size_bytes": 232133 - }, - { - "name": "Gp0127638_Centrifuge classification TSV report", - "description": "Centrifuge classification TSV report for Gp0127638", - "data_object_type": "Centrifuge Taxonomic Classification", - "url": "https://data.microbiomedata.org/data/nmdc:mga0hjgc20/ReadbasedAnalysis/nmdc_mga0hjgc20_centrifuge_classification.tsv", - "md5_checksum": "e9946f36795474182b7759d3d7532b57", - "id": "nmdc:e9946f36795474182b7759d3d7532b57", - "file_size_bytes": 1448205544 - }, - { - "name": "Gp0127638_Centrifuge TSV report", - "description": "Centrifuge TSV report for Gp0127638", - "data_object_type": "Centrifuge Classification Report", - "url": "https://data.microbiomedata.org/data/nmdc:mga0hjgc20/ReadbasedAnalysis/nmdc_mga0hjgc20_centrifuge_report.tsv", - "md5_checksum": "33ff1d85d17d763afc9e21e481cc10d2", - "id": "nmdc:33ff1d85d17d763afc9e21e481cc10d2", - "file_size_bytes": 253872 - }, - { - "name": "Gp0127638_Centrifuge Krona HTML report", - "description": "Centrifuge Krona HTML report for Gp0127638", - "data_object_type": "Centrifuge Krona Plot", - "url": "https://data.microbiomedata.org/data/nmdc:mga0hjgc20/ReadbasedAnalysis/nmdc_mga0hjgc20_centrifuge_krona.html", - "md5_checksum": "997a66f49a232750bd7132639f3387e7", - "id": "nmdc:997a66f49a232750bd7132639f3387e7", - "file_size_bytes": 2331772 - }, - { - "name": "Gp0127638_Kraken classification TSV report", - "description": "Kraken classification TSV report for Gp0127638", - "data_object_type": "Kraken2 Taxonomic Classification", - "url": "https://data.microbiomedata.org/data/nmdc:mga0hjgc20/ReadbasedAnalysis/nmdc_mga0hjgc20_kraken2_classification.tsv", - "md5_checksum": "d3f604a59babf001839d38a617b62931", - "id": "nmdc:d3f604a59babf001839d38a617b62931", - "file_size_bytes": 1157365410 - }, - { - "name": "Gp0127638_Kraken2 TSV report", - "description": "Kraken2 TSV report for Gp0127638", - "data_object_type": "Kraken2 Classification Report", - "url": "https://data.microbiomedata.org/data/nmdc:mga0hjgc20/ReadbasedAnalysis/nmdc_mga0hjgc20_kraken2_report.tsv", - "md5_checksum": "3abfaa434ee1449cbbb69985e48488b4", - "id": "nmdc:3abfaa434ee1449cbbb69985e48488b4", - "file_size_bytes": 621484 - }, - { - "name": "Gp0127638_Kraken2 Krona HTML report", - "description": "Kraken2 Krona HTML report for Gp0127638", - "data_object_type": "Kraken2 Krona Plot", - "url": "https://data.microbiomedata.org/data/nmdc:mga0hjgc20/ReadbasedAnalysis/nmdc_mga0hjgc20_kraken2_krona.html", - "md5_checksum": "70c2fc1a2c7c0032528ff91ad1576465", - "id": "nmdc:70c2fc1a2c7c0032528ff91ad1576465", - "file_size_bytes": 3896830 - } - ] - }, + "input_read_bases": 4510771358, + "name": "Read QC Activity for nmdc:mga0r0vf18", + "output_read_count": 27896694, + "started_at_time": "2021-10-11T02:23:30Z", + "type": "nmdc:ReadQCAnalysisActivity", + "ended_at_time": "2021-10-11T04:49:55+00:00" + } + ], + "read_based_taxonomy_analysis_activity_set": [ { "_id": { - "$oid": "649b005f2ca5ee4adb139fac" + "$oid": "649b009bff710ae353f8cf2d" }, "has_input": [ - "nmdc:56ba2416c050decd6c16c618c1e4a752" - ], - "part_of": [ - "nmdc:mga0hjgc20" + "nmdc:ac889627d813c8e34cfbf79a4264c590" ], - "ctg_logsum": 141543, - "scaf_logsum": 141966, - "gap_pct": 0.00109, "git_url": "https://github.com/microbiomedata/metaG/releases/tag/0.1", "has_output": [ - "nmdc:5122503797ac0ed9694a6f4feecab955", - "nmdc:d7ee4628101b11bc5fb67d961a4e1a0a", - "nmdc:0944f2c0dd70a751117fb10d9a41fddc", - "nmdc:1917dcbbe1efcc2a57c511648a7f332e", - "nmdc:6420476f7e93425a68aa00b8e09cd6e7" + "nmdc:0526ea84f6e7893f5b6d62a32f81a199", + "nmdc:1a7380f5adb59f36c98c840bf28ad4bd", + "nmdc:366ab38bb6de9591f31a086d42ac23d6", + "nmdc:c44ba44bc6910c2f3ed3a60a52b4a616", + "nmdc:0ca043b630ba304cb80603e8332c78cf", + "nmdc:059ff39ced52c0df45a331c4e9e10fdd", + "nmdc:7bfa3b5b29ec5cf9882251585d99f9bf", + "nmdc:2fceef0aaf1c3c3e3edd8d69bb72c8d3", + "nmdc:678e7c401a6971629f7d3ada83b307ab" ], - "asm_score": 6.89, - "was_informed_by": "gold:Gp0127638", - "ctg_powsum": 15753, - "scaf_max": 48487, - "id": "nmdc:668844f5ea6cefdcb893db0bb6afc92a", - "scaf_powsum": 15801, + "was_informed_by": "gold:Gp0127634", + "id": "nmdc:d6415e0c3e4f9b8702c1ae98c6fb2f48", "execution_resource": "NERSC-Cori", - "contigs": 169698, - "name": "Assembly Activity for nmdc:mga0hjgc20", - "ctg_max": 48487, - "gc_std": 0.08917, - "gc_avg": 0.63213, - "contig_bp": 77783768, - "started_at_time": "2021-12-01T21:31:29Z", - "scaf_bp": 77784618, - "type": "nmdc:MetagenomeAssembly", - "scaffolds": 169622, - "ended_at_time": "2021-12-02T20:49:51+00:00", - "ctg_l50": 433, - "ctg_l90": 289, - "ctg_n50": 51455, - "ctg_n90": 144304, - "scaf_l50": 433, - "scaf_l90": 289, - "scaf_n50": 51437, - "scaf_n90": 144234, - "output_data_objects": [ - { - "name": "Gp0127638_Assembled contigs fasta", - "description": "Assembled contigs fasta for Gp0127638", - "data_object_type": "Assembly Contigs", - "url": "https://data.microbiomedata.org/data/nmdc:mga0hjgc20/assembly/nmdc_mga0hjgc20_contigs.fna", - "md5_checksum": "5122503797ac0ed9694a6f4feecab955", - "id": "nmdc:5122503797ac0ed9694a6f4feecab955", - "file_size_bytes": 84307064 - }, - { - "name": "Gp0127638_Assembled scaffold fasta", - "description": "Assembled scaffold fasta for Gp0127638", - "data_object_type": "Assembly Scaffolds", - "url": "https://data.microbiomedata.org/data/nmdc:mga0hjgc20/assembly/nmdc_mga0hjgc20_scaffolds.fna", - "md5_checksum": "d7ee4628101b11bc5fb67d961a4e1a0a", - "id": "nmdc:d7ee4628101b11bc5fb67d961a4e1a0a", - "file_size_bytes": 83796938 - }, - { - "name": "Gp0127638_Metagenome Contig Coverage Stats", - "description": "Metagenome Contig Coverage Stats for Gp0127638", - "url": "https://data.microbiomedata.org/data/nmdc:mga0hjgc20/assembly/nmdc_mga0hjgc20_covstats.txt", - "md5_checksum": "0944f2c0dd70a751117fb10d9a41fddc", - "id": "nmdc:0944f2c0dd70a751117fb10d9a41fddc", - "file_size_bytes": 13413799 - }, - { - "name": "Gp0127638_Assembled AGP file", - "description": "Assembled AGP file for Gp0127638", - "data_object_type": "Assembly AGP", - "url": "https://data.microbiomedata.org/data/nmdc:mga0hjgc20/assembly/nmdc_mga0hjgc20_assembly.agp", - "md5_checksum": "1917dcbbe1efcc2a57c511648a7f332e", - "id": "nmdc:1917dcbbe1efcc2a57c511648a7f332e", - "file_size_bytes": 12526116 - }, - { - "name": "Gp0127638_Metagenome Alignment BAM file", - "description": "Metagenome Alignment BAM file for Gp0127638", - "data_object_type": "Assembly Coverage BAM", - "url": "https://data.microbiomedata.org/data/nmdc:mga0hjgc20/assembly/nmdc_mga0hjgc20_pairedMapped_sorted.bam", - "md5_checksum": "6420476f7e93425a68aa00b8e09cd6e7", - "id": "nmdc:6420476f7e93425a68aa00b8e09cd6e7", - "file_size_bytes": 1810224630 - } - ] - }, + "name": "ReadBased Analysis Activity for nmdc:mga0r0vf18", + "started_at_time": "2021-10-11T02:23:30Z", + "type": "nmdc:ReadbasedAnalysis", + "ended_at_time": "2021-10-11T04:49:55+00:00" + } + ], + "study_set": [], + "field_research_site_set": [], + "collecting_biosamples_from_site_set": [], + "date_created": null, + "etl_software_version": null, + "pooling_set": [], + "read_based_analysis_activity_set": [ { "_id": { - "$oid": "649b005bbf2caae0415ef9bc" + "$oid": "61e71979833bcf838a700840" }, "has_input": [ - "nmdc:5122503797ac0ed9694a6f4feecab955" + "nmdc:ac889627d813c8e34cfbf79a4264c590" ], "part_of": [ - "nmdc:mga0hjgc20" + "nmdc:mga0r0vf18" ], "git_url": "https://github.com/microbiomedata/metaG/releases/tag/0.1", "has_output": [ - "nmdc:f56690d136c4dafdc1eaa64a21fd9210", - "nmdc:8be4e8ac2d00bf1d5b4863c36dc3678c", - "nmdc:41453202313c56e06b0cc00b5ee6c375", - "nmdc:e06bd74dce2e5b839b35ac1012d93ba4", - "nmdc:f2786d1f8a17bedd0104b01ec06ebfce", - "nmdc:37cb3fb060da091a84f1baa7ef3743fc", - "nmdc:34680897818585cefbef6e69109e7de4", - "nmdc:a00404838fbe9f846a704e1dbb14f2b2", - "nmdc:700dd121a0ac41e3fa8077d7330adae7", - "nmdc:e429651ae53a18b07d99880d09a19b26", - "nmdc:b22aab3cc1b9231102b23c31b418eff4", - "nmdc:ebb5a6a7ad1f14fd8cf2178ec59969ef" + "nmdc:0526ea84f6e7893f5b6d62a32f81a199", + "nmdc:1a7380f5adb59f36c98c840bf28ad4bd", + "nmdc:366ab38bb6de9591f31a086d42ac23d6", + "nmdc:c44ba44bc6910c2f3ed3a60a52b4a616", + "nmdc:0ca043b630ba304cb80603e8332c78cf", + "nmdc:059ff39ced52c0df45a331c4e9e10fdd", + "nmdc:7bfa3b5b29ec5cf9882251585d99f9bf", + "nmdc:2fceef0aaf1c3c3e3edd8d69bb72c8d3", + "nmdc:678e7c401a6971629f7d3ada83b307ab" ], - "was_informed_by": "gold:Gp0127638", - "id": "nmdc:668844f5ea6cefdcb893db0bb6afc92a", + "was_informed_by": "gold:Gp0127634", + "id": "nmdc:d6415e0c3e4f9b8702c1ae98c6fb2f48", "execution_resource": "NERSC-Cori", - "name": "Annotation Activity for nmdc:mga0hjgc20", - "started_at_time": "2021-12-01T21:31:29Z", - "type": "nmdc:MetagenomeAnnotationActivity", - "ended_at_time": "2021-12-02T20:49:51+00:00", - "output_data_objects": [ - { - "name": "Gp0127638_Protein FAA", - "description": "Protein FAA for Gp0127638", - "data_object_type": "Annotation Amino Acid FASTA", - "url": "https://data.microbiomedata.org/data/nmdc:mga0hjgc20/annotation/nmdc_mga0hjgc20_proteins.faa", - "md5_checksum": "f56690d136c4dafdc1eaa64a21fd9210", - "id": "nmdc:f56690d136c4dafdc1eaa64a21fd9210", - "file_size_bytes": 49236514 - }, - { - "name": "Gp0127638_Structural annotation GFF file", - "description": "Structural annotation GFF file for Gp0127638", - "data_object_type": "Structural Annotation GFF", - "url": "https://data.microbiomedata.org/data/nmdc:mga0hjgc20/annotation/nmdc_mga0hjgc20_structural_annotation.gff", - "md5_checksum": "8be4e8ac2d00bf1d5b4863c36dc3678c", - "id": "nmdc:8be4e8ac2d00bf1d5b4863c36dc3678c", - "file_size_bytes": 2519 - }, - { - "name": "Gp0127638_Functional annotation GFF file", - "description": "Functional annotation GFF file for Gp0127638", - "data_object_type": "Functional Annotation GFF", - "url": "https://data.microbiomedata.org/data/nmdc:mga0hjgc20/annotation/nmdc_mga0hjgc20_functional_annotation.gff", - "md5_checksum": "41453202313c56e06b0cc00b5ee6c375", - "id": "nmdc:41453202313c56e06b0cc00b5ee6c375", - "file_size_bytes": 56761027 - }, - { - "name": "Gp0127638_KO TSV file", - "description": "KO TSV file for Gp0127638", - "data_object_type": "Annotation KEGG Orthology", - "url": "https://data.microbiomedata.org/data/nmdc:mga0hjgc20/annotation/nmdc_mga0hjgc20_ko.tsv", - "md5_checksum": "e06bd74dce2e5b839b35ac1012d93ba4", - "id": "nmdc:e06bd74dce2e5b839b35ac1012d93ba4", - "file_size_bytes": 6728487 - }, - { - "name": "Gp0127638_EC TSV file", - "description": "EC TSV file for Gp0127638", - "data_object_type": "Annotation Enzyme Commission", - "url": "https://data.microbiomedata.org/data/nmdc:mga0hjgc20/annotation/nmdc_mga0hjgc20_ec.tsv", - "md5_checksum": "f2786d1f8a17bedd0104b01ec06ebfce", - "id": "nmdc:f2786d1f8a17bedd0104b01ec06ebfce", - "file_size_bytes": 4522678 - }, - { - "name": "Gp0127638_COG GFF file", - "description": "COG GFF file for Gp0127638", - "url": "https://data.microbiomedata.org/data/nmdc:mga0hjgc20/annotation/nmdc_mga0hjgc20_cog.gff", - "md5_checksum": "37cb3fb060da091a84f1baa7ef3743fc", - "id": "nmdc:37cb3fb060da091a84f1baa7ef3743fc", - "file_size_bytes": 33992392 - }, - { - "name": "Gp0127638_PFAM GFF file", - "description": "PFAM GFF file for Gp0127638", - "url": "https://data.microbiomedata.org/data/nmdc:mga0hjgc20/annotation/nmdc_mga0hjgc20_pfam.gff", - "md5_checksum": "34680897818585cefbef6e69109e7de4", - "id": "nmdc:34680897818585cefbef6e69109e7de4", - "file_size_bytes": 25203872 - }, - { - "name": "Gp0127638_TigrFam GFF file", - "description": "TigrFam GFF file for Gp0127638", - "url": "https://data.microbiomedata.org/data/nmdc:mga0hjgc20/annotation/nmdc_mga0hjgc20_tigrfam.gff", - "md5_checksum": "a00404838fbe9f846a704e1dbb14f2b2", - "id": "nmdc:a00404838fbe9f846a704e1dbb14f2b2", - "file_size_bytes": 2852587 - }, - { - "name": "Gp0127638_SMART GFF file", - "description": "SMART GFF file for Gp0127638", - "url": "https://data.microbiomedata.org/data/nmdc:mga0hjgc20/annotation/nmdc_mga0hjgc20_smart.gff", - "md5_checksum": "700dd121a0ac41e3fa8077d7330adae7", - "id": "nmdc:700dd121a0ac41e3fa8077d7330adae7", - "file_size_bytes": 7723231 - }, - { - "name": "Gp0127638_SuperFam GFF file", - "description": "SuperFam GFF file for Gp0127638", - "url": "https://data.microbiomedata.org/data/nmdc:mga0hjgc20/annotation/nmdc_mga0hjgc20_supfam.gff", - "md5_checksum": "e429651ae53a18b07d99880d09a19b26", - "id": "nmdc:e429651ae53a18b07d99880d09a19b26", - "file_size_bytes": 42064836 - }, - { - "name": "Gp0127638_Cath FunFam GFF file", - "description": "Cath FunFam GFF file for Gp0127638", - "url": "https://data.microbiomedata.org/data/nmdc:mga0hjgc20/annotation/nmdc_mga0hjgc20_cath_funfam.gff", - "md5_checksum": "b22aab3cc1b9231102b23c31b418eff4", - "id": "nmdc:b22aab3cc1b9231102b23c31b418eff4", - "file_size_bytes": 32005228 - }, - { - "name": "Gp0127638_KO_EC GFF file", - "description": "KO_EC GFF file for Gp0127638", - "url": "https://data.microbiomedata.org/data/nmdc:mga0hjgc20/annotation/nmdc_mga0hjgc20_ko_ec.gff", - "md5_checksum": "ebb5a6a7ad1f14fd8cf2178ec59969ef", - "id": "nmdc:ebb5a6a7ad1f14fd8cf2178ec59969ef", - "file_size_bytes": 21405596 - } - ] + "name": "ReadBased Analysis Activity for nmdc:mga0r0vf18", + "started_at_time": "2021-10-11T02:23:30Z", + "type": "nmdc:ReadbasedAnalysis", + "ended_at_time": "2021-10-11T04:49:55+00:00" + } + ] + }, + { + "functional_annotation_agg": [], + "library_preparation_set": [], + "processed_sample_set": [], + "extraction_set": [], + "activity_set": [], + "biosample_set": [], + "data_object_set": [ + { + "name": "Gp0127635_Filtered Reads", + "description": "Filtered Reads for Gp0127635", + "data_object_type": "Filtered Sequencing Reads", + "url": "https://data.microbiomedata.org/data/nmdc:mga0ak4p20/qa/nmdc_mga0ak4p20_filtered.fastq.gz", + "md5_checksum": "f8bc16e232f7ba0f6d6b5ca35a708c36", + "id": "nmdc:f8bc16e232f7ba0f6d6b5ca35a708c36", + "file_size_bytes": 1951049105 + }, + { + "name": "Gp0127635_Filtered Stats", + "description": "Filtered Stats for Gp0127635", + "data_object_type": "QC Statistics", + "url": "https://data.microbiomedata.org/data/nmdc:mga0ak4p20/qa/nmdc_mga0ak4p20_filterStats.txt", + "md5_checksum": "fbc260443529d6e8067efdac3b58a8c1", + "id": "nmdc:fbc260443529d6e8067efdac3b58a8c1", + "file_size_bytes": 280 + }, + { + "name": "Gp0127635_Gottcha2 TSV report", + "description": "Gottcha2 TSV report for Gp0127635", + "url": "https://data.microbiomedata.org/data/nmdc:mga0ak4p20/ReadbasedAnalysis/nmdc_mga0ak4p20_gottcha2_report.tsv", + "md5_checksum": "d8a410c52c8f6cf0097b674492cc3926", + "id": "nmdc:d8a410c52c8f6cf0097b674492cc3926", + "file_size_bytes": 3696 + }, + { + "name": "Gp0127635_Gottcha2 full TSV report", + "description": "Gottcha2 full TSV report for Gp0127635", + "url": "https://data.microbiomedata.org/data/nmdc:mga0ak4p20/ReadbasedAnalysis/nmdc_mga0ak4p20_gottcha2_report_full.tsv", + "md5_checksum": "ddec46781153da60da815c65871f5413", + "id": "nmdc:ddec46781153da60da815c65871f5413", + "file_size_bytes": 677459 + }, + { + "name": "Gp0127635_Gottcha2 Krona HTML report", + "description": "Gottcha2 Krona HTML report for Gp0127635", + "data_object_type": "GOTTCHA2 Krona Plot", + "url": "https://data.microbiomedata.org/data/nmdc:mga0ak4p20/ReadbasedAnalysis/nmdc_mga0ak4p20_gottcha2_krona.html", + "md5_checksum": "e626ec18dba4885613240927cbb99d8b", + "id": "nmdc:e626ec18dba4885613240927cbb99d8b", + "file_size_bytes": 236164 + }, + { + "name": "Gp0127635_Centrifuge classification TSV report", + "description": "Centrifuge classification TSV report for Gp0127635", + "data_object_type": "Centrifuge Taxonomic Classification", + "url": "https://data.microbiomedata.org/data/nmdc:mga0ak4p20/ReadbasedAnalysis/nmdc_mga0ak4p20_centrifuge_classification.tsv", + "md5_checksum": "f8486e4ee029038a452a3484db10cabc", + "id": "nmdc:f8486e4ee029038a452a3484db10cabc", + "file_size_bytes": 1796179546 + }, + { + "name": "Gp0127635_Centrifuge TSV report", + "description": "Centrifuge TSV report for Gp0127635", + "data_object_type": "Centrifuge Classification Report", + "url": "https://data.microbiomedata.org/data/nmdc:mga0ak4p20/ReadbasedAnalysis/nmdc_mga0ak4p20_centrifuge_report.tsv", + "md5_checksum": "4121f2ec52b80b7feb9d9a4749080125", + "id": "nmdc:4121f2ec52b80b7feb9d9a4749080125", + "file_size_bytes": 254661 + }, + { + "name": "Gp0127635_Centrifuge Krona HTML report", + "description": "Centrifuge Krona HTML report for Gp0127635", + "data_object_type": "Centrifuge Krona Plot", + "url": "https://data.microbiomedata.org/data/nmdc:mga0ak4p20/ReadbasedAnalysis/nmdc_mga0ak4p20_centrifuge_krona.html", + "md5_checksum": "5b8c1cd8ba47041c20d3e18cb902a854", + "id": "nmdc:5b8c1cd8ba47041c20d3e18cb902a854", + "file_size_bytes": 2333534 + }, + { + "name": "Gp0127635_Kraken classification TSV report", + "description": "Kraken classification TSV report for Gp0127635", + "data_object_type": "Kraken2 Taxonomic Classification", + "url": "https://data.microbiomedata.org/data/nmdc:mga0ak4p20/ReadbasedAnalysis/nmdc_mga0ak4p20_kraken2_classification.tsv", + "md5_checksum": "59807dae5216b11c96df5593a26d9a88", + "id": "nmdc:59807dae5216b11c96df5593a26d9a88", + "file_size_bytes": 1432249556 + }, + { + "name": "Gp0127635_Kraken2 TSV report", + "description": "Kraken2 TSV report for Gp0127635", + "data_object_type": "Kraken2 Classification Report", + "url": "https://data.microbiomedata.org/data/nmdc:mga0ak4p20/ReadbasedAnalysis/nmdc_mga0ak4p20_kraken2_report.tsv", + "md5_checksum": "a491f6797bd7294dbc5ba301efb3466e", + "id": "nmdc:a491f6797bd7294dbc5ba301efb3466e", + "file_size_bytes": 639738 + }, + { + "name": "Gp0127635_Kraken2 Krona HTML report", + "description": "Kraken2 Krona HTML report for Gp0127635", + "data_object_type": "Kraken2 Krona Plot", + "url": "https://data.microbiomedata.org/data/nmdc:mga0ak4p20/ReadbasedAnalysis/nmdc_mga0ak4p20_kraken2_krona.html", + "md5_checksum": "6748020214a3d68ad588e3548107208e", + "id": "nmdc:6748020214a3d68ad588e3548107208e", + "file_size_bytes": 3996293 + }, + { + "name": "Gp0127635_Gottcha2 TSV report", + "description": "Gottcha2 TSV report for Gp0127635", + "url": "https://data.microbiomedata.org/data/nmdc:mga0ak4p20/ReadbasedAnalysis/nmdc_mga0ak4p20_gottcha2_report.tsv", + "md5_checksum": "d8a410c52c8f6cf0097b674492cc3926", + "id": "nmdc:d8a410c52c8f6cf0097b674492cc3926", + "file_size_bytes": 3696 + }, + { + "name": "Gp0127635_Gottcha2 full TSV report", + "description": "Gottcha2 full TSV report for Gp0127635", + "url": "https://data.microbiomedata.org/data/nmdc:mga0ak4p20/ReadbasedAnalysis/nmdc_mga0ak4p20_gottcha2_report_full.tsv", + "md5_checksum": "ddec46781153da60da815c65871f5413", + "id": "nmdc:ddec46781153da60da815c65871f5413", + "file_size_bytes": 677459 + }, + { + "name": "Gp0127635_Gottcha2 Krona HTML report", + "description": "Gottcha2 Krona HTML report for Gp0127635", + "data_object_type": "GOTTCHA2 Krona Plot", + "url": "https://data.microbiomedata.org/data/nmdc:mga0ak4p20/ReadbasedAnalysis/nmdc_mga0ak4p20_gottcha2_krona.html", + "md5_checksum": "e626ec18dba4885613240927cbb99d8b", + "id": "nmdc:e626ec18dba4885613240927cbb99d8b", + "file_size_bytes": 236164 + }, + { + "name": "Gp0127635_Centrifuge classification TSV report", + "description": "Centrifuge classification TSV report for Gp0127635", + "data_object_type": "Centrifuge Taxonomic Classification", + "url": "https://data.microbiomedata.org/data/nmdc:mga0ak4p20/ReadbasedAnalysis/nmdc_mga0ak4p20_centrifuge_classification.tsv", + "md5_checksum": "f8486e4ee029038a452a3484db10cabc", + "id": "nmdc:f8486e4ee029038a452a3484db10cabc", + "file_size_bytes": 1796179546 + }, + { + "name": "Gp0127635_Centrifuge TSV report", + "description": "Centrifuge TSV report for Gp0127635", + "data_object_type": "Centrifuge Classification Report", + "url": "https://data.microbiomedata.org/data/nmdc:mga0ak4p20/ReadbasedAnalysis/nmdc_mga0ak4p20_centrifuge_report.tsv", + "md5_checksum": "4121f2ec52b80b7feb9d9a4749080125", + "id": "nmdc:4121f2ec52b80b7feb9d9a4749080125", + "file_size_bytes": 254661 + }, + { + "name": "Gp0127635_Centrifuge Krona HTML report", + "description": "Centrifuge Krona HTML report for Gp0127635", + "data_object_type": "Centrifuge Krona Plot", + "url": "https://data.microbiomedata.org/data/nmdc:mga0ak4p20/ReadbasedAnalysis/nmdc_mga0ak4p20_centrifuge_krona.html", + "md5_checksum": "5b8c1cd8ba47041c20d3e18cb902a854", + "id": "nmdc:5b8c1cd8ba47041c20d3e18cb902a854", + "file_size_bytes": 2333534 + }, + { + "name": "Gp0127635_Kraken classification TSV report", + "description": "Kraken classification TSV report for Gp0127635", + "data_object_type": "Kraken2 Taxonomic Classification", + "url": "https://data.microbiomedata.org/data/nmdc:mga0ak4p20/ReadbasedAnalysis/nmdc_mga0ak4p20_kraken2_classification.tsv", + "md5_checksum": "59807dae5216b11c96df5593a26d9a88", + "id": "nmdc:59807dae5216b11c96df5593a26d9a88", + "file_size_bytes": 1432249556 + }, + { + "name": "Gp0127635_Kraken2 TSV report", + "description": "Kraken2 TSV report for Gp0127635", + "data_object_type": "Kraken2 Classification Report", + "url": "https://data.microbiomedata.org/data/nmdc:mga0ak4p20/ReadbasedAnalysis/nmdc_mga0ak4p20_kraken2_report.tsv", + "md5_checksum": "a491f6797bd7294dbc5ba301efb3466e", + "id": "nmdc:a491f6797bd7294dbc5ba301efb3466e", + "file_size_bytes": 639738 + }, + { + "name": "Gp0127635_Kraken2 Krona HTML report", + "description": "Kraken2 Krona HTML report for Gp0127635", + "data_object_type": "Kraken2 Krona Plot", + "url": "https://data.microbiomedata.org/data/nmdc:mga0ak4p20/ReadbasedAnalysis/nmdc_mga0ak4p20_kraken2_krona.html", + "md5_checksum": "6748020214a3d68ad588e3548107208e", + "id": "nmdc:6748020214a3d68ad588e3548107208e", + "file_size_bytes": 3996293 + }, + { + "name": "Gp0127635_Assembled contigs fasta", + "description": "Assembled contigs fasta for Gp0127635", + "data_object_type": "Assembly Contigs", + "url": "https://data.microbiomedata.org/data/nmdc:mga0ak4p20/assembly/nmdc_mga0ak4p20_contigs.fna", + "md5_checksum": "3d1b5043e0c49ac6062aeba4ebbba910", + "id": "nmdc:3d1b5043e0c49ac6062aeba4ebbba910", + "file_size_bytes": 111964628 + }, + { + "name": "Gp0127635_Assembled scaffold fasta", + "description": "Assembled scaffold fasta for Gp0127635", + "data_object_type": "Assembly Scaffolds", + "url": "https://data.microbiomedata.org/data/nmdc:mga0ak4p20/assembly/nmdc_mga0ak4p20_scaffolds.fna", + "md5_checksum": "4d4497f63f95f7d2f8986178dab3ae52", + "id": "nmdc:4d4497f63f95f7d2f8986178dab3ae52", + "file_size_bytes": 111342667 + }, + { + "name": "Gp0127635_Metagenome Contig Coverage Stats", + "description": "Metagenome Contig Coverage Stats for Gp0127635", + "url": "https://data.microbiomedata.org/data/nmdc:mga0ak4p20/assembly/nmdc_mga0ak4p20_covstats.txt", + "md5_checksum": "ac98d3d128ec5b045a9ef019a5653b99", + "id": "nmdc:ac98d3d128ec5b045a9ef019a5653b99", + "file_size_bytes": 16397988 + }, + { + "name": "Gp0127635_Assembled AGP file", + "description": "Assembled AGP file for Gp0127635", + "data_object_type": "Assembly AGP", + "url": "https://data.microbiomedata.org/data/nmdc:mga0ak4p20/assembly/nmdc_mga0ak4p20_assembly.agp", + "md5_checksum": "1d0302bec371a73f040d052f4b66277c", + "id": "nmdc:1d0302bec371a73f040d052f4b66277c", + "file_size_bytes": 15325341 + }, + { + "name": "Gp0127635_Metagenome Alignment BAM file", + "description": "Metagenome Alignment BAM file for Gp0127635", + "data_object_type": "Assembly Coverage BAM", + "url": "https://data.microbiomedata.org/data/nmdc:mga0ak4p20/assembly/nmdc_mga0ak4p20_pairedMapped_sorted.bam", + "md5_checksum": "2d8cca230f439e38f1e628666e40e013", + "id": "nmdc:2d8cca230f439e38f1e628666e40e013", + "file_size_bytes": 2159251548 + }, + { + "name": "Gp0127635_Protein FAA", + "description": "Protein FAA for Gp0127635", + "data_object_type": "Annotation Amino Acid FASTA", + "url": "https://data.microbiomedata.org/data/nmdc:mga0ak4p20/annotation/nmdc_mga0ak4p20_proteins.faa", + "md5_checksum": "bb7eae2b3dbc58168b9122098f078bb5", + "id": "nmdc:bb7eae2b3dbc58168b9122098f078bb5", + "file_size_bytes": 63157189 + }, + { + "name": "Gp0127635_Structural annotation GFF file", + "description": "Structural annotation GFF file for Gp0127635", + "data_object_type": "Structural Annotation GFF", + "url": "https://data.microbiomedata.org/data/nmdc:mga0ak4p20/annotation/nmdc_mga0ak4p20_structural_annotation.gff", + "md5_checksum": "2af7f6c008858f2f0d47c00fa9758129", + "id": "nmdc:2af7f6c008858f2f0d47c00fa9758129", + "file_size_bytes": 2526 + }, + { + "name": "Gp0127635_Functional annotation GFF file", + "description": "Functional annotation GFF file for Gp0127635", + "data_object_type": "Functional Annotation GFF", + "url": "https://data.microbiomedata.org/data/nmdc:mga0ak4p20/annotation/nmdc_mga0ak4p20_functional_annotation.gff", + "md5_checksum": "dd3668477e39a65243179dfb9e4bf26e", + "id": "nmdc:dd3668477e39a65243179dfb9e4bf26e", + "file_size_bytes": 71092075 + }, + { + "name": "Gp0127635_KO TSV file", + "description": "KO TSV file for Gp0127635", + "data_object_type": "Annotation KEGG Orthology", + "url": "https://data.microbiomedata.org/data/nmdc:mga0ak4p20/annotation/nmdc_mga0ak4p20_ko.tsv", + "md5_checksum": "be0e9a5999ddfd46bf5daac56aa96b86", + "id": "nmdc:be0e9a5999ddfd46bf5daac56aa96b86", + "file_size_bytes": 8023056 + }, + { + "name": "Gp0127635_EC TSV file", + "description": "EC TSV file for Gp0127635", + "data_object_type": "Annotation Enzyme Commission", + "url": "https://data.microbiomedata.org/data/nmdc:mga0ak4p20/annotation/nmdc_mga0ak4p20_ec.tsv", + "md5_checksum": "95a6a1f91bf18bc1a781a8890d2e1bc5", + "id": "nmdc:95a6a1f91bf18bc1a781a8890d2e1bc5", + "file_size_bytes": 5303502 + }, + { + "name": "Gp0127635_COG GFF file", + "description": "COG GFF file for Gp0127635", + "url": "https://data.microbiomedata.org/data/nmdc:mga0ak4p20/annotation/nmdc_mga0ak4p20_cog.gff", + "md5_checksum": "6960907313875913a789e1fda46ed34e", + "id": "nmdc:6960907313875913a789e1fda46ed34e", + "file_size_bytes": 42106254 + }, + { + "name": "Gp0127635_PFAM GFF file", + "description": "PFAM GFF file for Gp0127635", + "url": "https://data.microbiomedata.org/data/nmdc:mga0ak4p20/annotation/nmdc_mga0ak4p20_pfam.gff", + "md5_checksum": "033da43cdca9f81ed2270a9094fdb065", + "id": "nmdc:033da43cdca9f81ed2270a9094fdb065", + "file_size_bytes": 31806020 + }, + { + "name": "Gp0127635_TigrFam GFF file", + "description": "TigrFam GFF file for Gp0127635", + "url": "https://data.microbiomedata.org/data/nmdc:mga0ak4p20/annotation/nmdc_mga0ak4p20_tigrfam.gff", + "md5_checksum": "e9603ffd918db8a21df1310b890315ff", + "id": "nmdc:e9603ffd918db8a21df1310b890315ff", + "file_size_bytes": 3500524 + }, + { + "name": "Gp0127635_SMART GFF file", + "description": "SMART GFF file for Gp0127635", + "url": "https://data.microbiomedata.org/data/nmdc:mga0ak4p20/annotation/nmdc_mga0ak4p20_smart.gff", + "md5_checksum": "fd98e0cfe1f4ca7b9e4af833c5ef199c", + "id": "nmdc:fd98e0cfe1f4ca7b9e4af833c5ef199c", + "file_size_bytes": 9346082 + }, + { + "name": "Gp0127635_SuperFam GFF file", + "description": "SuperFam GFF file for Gp0127635", + "url": "https://data.microbiomedata.org/data/nmdc:mga0ak4p20/annotation/nmdc_mga0ak4p20_supfam.gff", + "md5_checksum": "03481d99958ae1c9dcccb8fd91c0bbf7", + "id": "nmdc:03481d99958ae1c9dcccb8fd91c0bbf7", + "file_size_bytes": 52582333 + }, + { + "name": "Gp0127635_Cath FunFam GFF file", + "description": "Cath FunFam GFF file for Gp0127635", + "url": "https://data.microbiomedata.org/data/nmdc:mga0ak4p20/annotation/nmdc_mga0ak4p20_cath_funfam.gff", + "md5_checksum": "f0a96fb57947358a42053e9fb7134e70", + "id": "nmdc:f0a96fb57947358a42053e9fb7134e70", + "file_size_bytes": 40179818 + }, + { + "name": "Gp0127635_KO_EC GFF file", + "description": "KO_EC GFF file for Gp0127635", + "url": "https://data.microbiomedata.org/data/nmdc:mga0ak4p20/annotation/nmdc_mga0ak4p20_ko_ec.gff", + "md5_checksum": "9737b61f2e6e923ac662e0a1c4f6aaa9", + "id": "nmdc:9737b61f2e6e923ac662e0a1c4f6aaa9", + "file_size_bytes": 25482964 + }, + { + "name": "gold:Gp0452679_metabat2 bin checkm quality assessment result", + "description": "metabat2 bin checkm quality assessment result for gold:Gp0452679", + "data_object_type": "CheckM Statistics", + "url": "https://data.microbiomedata.org/data/nmdc:mga0fsjy84/MAGs/nmdc_mga0fsjy84_checkm_qa.out", + "md5_checksum": "d41d8cd98f00b204e9800998ecf8427e", + "id": "nmdc:d41d8cd98f00b204e9800998ecf8427e", + "file_size_bytes": 0 + }, + { + "name": "Gp0127635_tooShort (< 3kb) filtered contigs fasta file by metaBat2", + "description": "tooShort (< 3kb) filtered contigs fasta file by metaBat2 for Gp0127635", + "url": "https://data.microbiomedata.org/data/nmdc:mga0ak4p20/MAGs/nmdc_mga0ak4p20_bins.tooShort.fa", + "md5_checksum": "daed5e3af5201fe510e780f155f90bc3", + "id": "nmdc:daed5e3af5201fe510e780f155f90bc3", + "file_size_bytes": 86476884 + }, + { + "name": "Gp0127635_unbinned fasta file from metabat2", + "description": "unbinned fasta file from metabat2 for Gp0127635", + "url": "https://data.microbiomedata.org/data/nmdc:mga0ak4p20/MAGs/nmdc_mga0ak4p20_bins.unbinned.fa", + "md5_checksum": "7cdb1c384c8bc63b3c127e5bc434ac6b", + "id": "nmdc:7cdb1c384c8bc63b3c127e5bc434ac6b", + "file_size_bytes": 22898396 + }, + { + "name": "Gp0127635_metabat2 bin checkm quality assessment result", + "description": "metabat2 bin checkm quality assessment result for Gp0127635", + "data_object_type": "CheckM Statistics", + "url": "https://data.microbiomedata.org/data/nmdc:mga0ak4p20/MAGs/nmdc_mga0ak4p20_checkm_qa.out", + "md5_checksum": "b5ae13756638f09d74fdbe03183b231f", + "id": "nmdc:b5ae13756638f09d74fdbe03183b231f", + "file_size_bytes": 1240 + }, + { + "name": "Gp0127635_high-quality and medium-quality bins", + "description": "high-quality and medium-quality bins for Gp0127635", + "data_object_type": "Metagenome Bins", + "url": "https://data.microbiomedata.org/data/nmdc:mga0ak4p20/MAGs/nmdc_mga0ak4p20_hqmq_bin.zip", + "md5_checksum": "1dc5796596177362849da19fc4e50b13", + "id": "nmdc:1dc5796596177362849da19fc4e50b13", + "file_size_bytes": 182 }, + { + "name": "Gp0127635_metabat2 bins", + "description": "metabat2 bins for Gp0127635", + "url": "https://data.microbiomedata.org/data/nmdc:mga0ak4p20/MAGs/nmdc_mga0ak4p20_metabat_bin.zip", + "md5_checksum": "fba0bfa144e9ef179edb10b5a941c259", + "id": "nmdc:fba0bfa144e9ef179edb10b5a941c259", + "file_size_bytes": 795127 + } + ], + "dissolving_activity_set": [], + "functional_annotation_set": [], + "genome_feature_set": [], + "mags_activity_set": [ { "_id": { - "$oid": "649b0052ec087f6bbab3471d" + "$oid": "649b0052ec087f6bbab34708" }, "has_input": [ - "nmdc:5122503797ac0ed9694a6f4feecab955", - "nmdc:6420476f7e93425a68aa00b8e09cd6e7", - "nmdc:41453202313c56e06b0cc00b5ee6c375" + "nmdc:3d1b5043e0c49ac6062aeba4ebbba910", + "nmdc:2d8cca230f439e38f1e628666e40e013", + "nmdc:dd3668477e39a65243179dfb9e4bf26e" ], - "too_short_contig_num": 162130, + "too_short_contig_num": 192406, "part_of": [ - "nmdc:mga0hjgc20" + "nmdc:mga0ak4p20" ], - "binned_contig_num": 189, + "binned_contig_num": 502, "git_url": "https://github.com/microbiomedata/metaG/releases/tag/0.1", "has_output": [ - "nmdc:dcdd7e33e92d3658fe68056f21b57f5d", - "nmdc:8ca8e2250dc68643e937163323f2a826" + "nmdc:d41d8cd98f00b204e9800998ecf8427e", + "nmdc:daed5e3af5201fe510e780f155f90bc3", + "nmdc:7cdb1c384c8bc63b3c127e5bc434ac6b", + "nmdc:b5ae13756638f09d74fdbe03183b231f", + "nmdc:1dc5796596177362849da19fc4e50b13", + "nmdc:fba0bfa144e9ef179edb10b5a941c259" ], - "was_informed_by": "gold:Gp0127638", - "input_contig_num": 169697, - "id": "nmdc:668844f5ea6cefdcb893db0bb6afc92a", + "was_informed_by": "gold:Gp0127635", + "input_contig_num": 206754, + "id": "nmdc:80cd0785782060a937ca17d1b417b0b6", "execution_resource": "NERSC-Cori", - "name": "MAGs Analysis Activity for nmdc:mga0hjgc20", + "name": "MAGs Analysis Activity for nmdc:mga0ak4p20", "mags_list": [ { - "number_of_contig": 189, - "completeness": 73.5, + "number_of_contig": 203, + "completeness": 41.91, "bin_name": "bins.1", - "gene_count": 2020, - "bin_quality": "MQ", + "gene_count": 1456, + "bin_quality": "LQ", "gtdbtk_species": "", - "gtdbtk_order": "Nitrososphaerales", + "gtdbtk_order": "", "num_16s": 0, - "gtdbtk_family": "Nitrososphaeraceae", - "gtdbtk_domain": "Archaea", - "contamination": 0.97, - "gtdbtk_class": "Nitrososphaeria", - "gtdbtk_phylum": "Crenarchaeota", + "gtdbtk_family": "", + "gtdbtk_domain": "", + "contamination": 3.88, + "gtdbtk_class": "", + "gtdbtk_phylum": "", "num_5s": 0, "num_23s": 0, "gtdbtk_genus": "", - "num_t_rna": 37 - } - ], - "unbinned_contig_num": 7378, - "started_at_time": "2021-12-01T21:31:29Z", - "type": "nmdc:MAGsAnalysisActivity", - "ended_at_time": "2021-12-02T20:49:51+00:00", - "output_data_objects": [ + "num_t_rna": 24 + }, + { + "number_of_contig": 171, + "completeness": 8.33, + "bin_name": "bins.2", + "gene_count": 880, + "bin_quality": "LQ", + "gtdbtk_species": "", + "gtdbtk_order": "", + "num_16s": 0, + "gtdbtk_family": "", + "gtdbtk_domain": "", + "contamination": 0.0, + "gtdbtk_class": "", + "gtdbtk_phylum": "", + "num_5s": 0, + "num_23s": 0, + "gtdbtk_genus": "", + "num_t_rna": 5 + }, { - "name": "Gp0127638_metabat2 bin checkm quality assessment result", - "description": "metabat2 bin checkm quality assessment result for Gp0127638", - "data_object_type": "CheckM Statistics", - "url": "https://data.microbiomedata.org/data/nmdc:mga0hjgc20/MAGs/nmdc_mga0hjgc20_checkm_qa.out", - "md5_checksum": "dcdd7e33e92d3658fe68056f21b57f5d", - "id": "nmdc:dcdd7e33e92d3658fe68056f21b57f5d", - "file_size_bytes": 760 + "number_of_contig": 55, + "completeness": 14.66, + "bin_name": "bins.3", + "gene_count": 269, + "bin_quality": "LQ", + "gtdbtk_species": "", + "gtdbtk_order": "", + "num_16s": 0, + "gtdbtk_family": "", + "gtdbtk_domain": "", + "contamination": 0.0, + "gtdbtk_class": "", + "gtdbtk_phylum": "", + "num_5s": 0, + "num_23s": 0, + "gtdbtk_genus": "", + "num_t_rna": 1 }, { - "name": "Gp0127638_high-quality and medium-quality bins", - "description": "high-quality and medium-quality bins for Gp0127638", - "data_object_type": "Metagenome Bins", - "url": "https://data.microbiomedata.org/data/nmdc:mga0hjgc20/MAGs/nmdc_mga0hjgc20_hqmq_bin.zip", - "md5_checksum": "8ca8e2250dc68643e937163323f2a826", - "id": "nmdc:8ca8e2250dc68643e937163323f2a826", - "file_size_bytes": 508443 + "number_of_contig": 73, + "completeness": 0.0, + "bin_name": "bins.4", + "gene_count": 475, + "bin_quality": "LQ", + "gtdbtk_species": "", + "gtdbtk_order": "", + "num_16s": 0, + "gtdbtk_family": "", + "gtdbtk_domain": "", + "contamination": 0.0, + "gtdbtk_class": "", + "gtdbtk_phylum": "", + "num_5s": 0, + "num_23s": 0, + "gtdbtk_genus": "", + "num_t_rna": 6 } - ] + ], + "unbinned_contig_num": 13846, + "started_at_time": "2021-10-11T02:26:59Z", + "type": "nmdc:MAGsAnalysisActivity", + "ended_at_time": "2021-10-11T04:11:48+00:00" } - ] - }, - { - "_id": { - "$oid": "649b009773e8249959349b4a" - }, - "id": "nmdc:omprc-11-t0xjjc50", - "name": "Sand microcosm microbial communities from a hyporheic zone in Columbia River, Washington, USA - GW-RW T4_10-June-14", - "description": "Sterilized sand packs were incubated back in the ground and collected at time point T4.", - "has_input": [ - "nmdc:bsm-11-vg9vy382" - ], - "has_output": [ - "jgi:55d7402a0d8785342fcf7e3b" - ], - "part_of": [ - "nmdc:sty-11-aygzgv51" - ], - "add_date": "2015-05-28", - "mod_date": "2021-06-15", - "ncbi_project_name": "Sand microcosm microbial communities from a hyporheic zone in Columbia River, Washington, USA - GW-RW T4_10-June-14", - "omics_type": { - "has_raw_value": "Metagenome" - }, - "principal_investigator": { - "has_raw_value": "James Stegen" - }, - "processing_institution": "JGI", - "type": "nmdc:OmicsProcessing", - "gold_sequencing_project_identifiers": [ - "gold:Gp0115670" - ], - "downstream_workflow_activity_records": [ + ], + "material_sample_set": [], + "material_sampling_activity_set": [], + "metabolomics_analysis_activity_set": [], + "metagenome_annotation_activity_set": [ { "_id": { - "$oid": "649b009d6bdd4fd20273c888" + "$oid": "649b005bbf2caae0415ef9a7" }, "has_input": [ - "nmdc:aa477a857eb9da284635b774477f3f54" + "nmdc:3d1b5043e0c49ac6062aeba4ebbba910" ], "part_of": [ - "nmdc:mga0d7pj22" + "nmdc:mga0ak4p20" ], "git_url": "https://github.com/microbiomedata/metaG/releases/tag/0.1", "has_output": [ - "nmdc:7f6b353300583c60d2d668880b4134cd", - "nmdc:a4f65d101293fa4345cd865f86597464" - ], - "was_informed_by": "gold:Gp0115670", - "input_read_count": 36554212, - "output_read_bases": 5044444014, - "id": "nmdc:f1cbe3cc181b2e689272b4223b68c15f", + "nmdc:bb7eae2b3dbc58168b9122098f078bb5", + "nmdc:2af7f6c008858f2f0d47c00fa9758129", + "nmdc:dd3668477e39a65243179dfb9e4bf26e", + "nmdc:be0e9a5999ddfd46bf5daac56aa96b86", + "nmdc:95a6a1f91bf18bc1a781a8890d2e1bc5", + "nmdc:6960907313875913a789e1fda46ed34e", + "nmdc:033da43cdca9f81ed2270a9094fdb065", + "nmdc:e9603ffd918db8a21df1310b890315ff", + "nmdc:fd98e0cfe1f4ca7b9e4af833c5ef199c", + "nmdc:03481d99958ae1c9dcccb8fd91c0bbf7", + "nmdc:f0a96fb57947358a42053e9fb7134e70", + "nmdc:9737b61f2e6e923ac662e0a1c4f6aaa9" + ], + "was_informed_by": "gold:Gp0127635", + "id": "nmdc:80cd0785782060a937ca17d1b417b0b6", "execution_resource": "NERSC-Cori", - "input_read_bases": 5519686012, - "name": "Read QC Activity for nmdc:mga0d7pj22", - "output_read_count": 33663942, - "started_at_time": "2021-10-11T02:28:43Z", - "type": "nmdc:ReadQCAnalysisActivity", - "ended_at_time": "2021-10-11T05:55:52+00:00", - "output_data_objects": [ - { - "name": "Gp0115670_Filtered Reads", - "description": "Filtered Reads for Gp0115670", - "data_object_type": "Filtered Sequencing Reads", - "url": "https://data.microbiomedata.org/data/nmdc:mga0d7pj22/qa/nmdc_mga0d7pj22_filtered.fastq.gz", - "md5_checksum": "7f6b353300583c60d2d668880b4134cd", - "id": "nmdc:7f6b353300583c60d2d668880b4134cd", - "file_size_bytes": 3012174785 - }, - { - "name": "Gp0115670_Filtered Stats", - "description": "Filtered Stats for Gp0115670", - "data_object_type": "QC Statistics", - "url": "https://data.microbiomedata.org/data/nmdc:mga0d7pj22/qa/nmdc_mga0d7pj22_filterStats.txt", - "md5_checksum": "a4f65d101293fa4345cd865f86597464", - "id": "nmdc:a4f65d101293fa4345cd865f86597464", - "file_size_bytes": 291 - } - ] - }, + "name": "Annotation Activity for nmdc:mga0ak4p20", + "started_at_time": "2021-10-11T02:26:59Z", + "type": "nmdc:MetagenomeAnnotationActivity", + "ended_at_time": "2021-10-11T04:11:48+00:00" + } + ], + "metagenome_assembly_set": [ { "_id": { - "$oid": "649b009bff710ae353f8cf4d" + "$oid": "649b005f2ca5ee4adb139f90" }, "has_input": [ - "nmdc:7f6b353300583c60d2d668880b4134cd" + "nmdc:f8bc16e232f7ba0f6d6b5ca35a708c36" + ], + "part_of": [ + "nmdc:mga0ak4p20" ], + "ctg_logsum": 269360, + "scaf_logsum": 270403, + "gap_pct": 0.00195, "git_url": "https://github.com/microbiomedata/metaG/releases/tag/0.1", "has_output": [ - "nmdc:e316502f9e7a78c9db3996ef832aa9d7", - "nmdc:1ac2be77491e7d425da1d62f69f1508d", - "nmdc:de5b15fa9d3bdbc3abcc2475ee351323", - "nmdc:a9bbb74833404a2bf3bbd05e83a7a0ed", - "nmdc:c065784bed2b2495d512af93d05967de", - "nmdc:a34dbcbdebae0861e41c09e7b9a5d9f0", - "nmdc:b2122f5a910a1d4ae8a62956d1cd731c", - "nmdc:8a26d8496a70f4777be0e1237092e44c", - "nmdc:694b83f0b6f599948d4248dd48dd9ba9" + "nmdc:3d1b5043e0c49ac6062aeba4ebbba910", + "nmdc:4d4497f63f95f7d2f8986178dab3ae52", + "nmdc:ac98d3d128ec5b045a9ef019a5653b99", + "nmdc:1d0302bec371a73f040d052f4b66277c", + "nmdc:2d8cca230f439e38f1e628666e40e013" ], - "was_informed_by": "gold:Gp0115670", - "id": "nmdc:f1cbe3cc181b2e689272b4223b68c15f", + "asm_score": 3.934, + "was_informed_by": "gold:Gp0127635", + "ctg_powsum": 29422, + "scaf_max": 23775, + "id": "nmdc:80cd0785782060a937ca17d1b417b0b6", + "scaf_powsum": 29544, "execution_resource": "NERSC-Cori", - "name": "ReadBased Analysis Activity for nmdc:mga0d7pj22", - "started_at_time": "2021-10-11T02:28:43Z", - "type": "nmdc:ReadbasedAnalysis", - "ended_at_time": "2021-10-11T05:55:52+00:00", - "output_data_objects": [ - { - "name": "Gp0115670_Gottcha2 TSV report", - "description": "Gottcha2 TSV report for Gp0115670", - "url": "https://data.microbiomedata.org/data/nmdc:mga0d7pj22/ReadbasedAnalysis/nmdc_mga0d7pj22_gottcha2_report.tsv", - "md5_checksum": "e316502f9e7a78c9db3996ef832aa9d7", - "id": "nmdc:e316502f9e7a78c9db3996ef832aa9d7", - "file_size_bytes": 13758 - }, - { - "name": "Gp0115670_Gottcha2 full TSV report", - "description": "Gottcha2 full TSV report for Gp0115670", - "url": "https://data.microbiomedata.org/data/nmdc:mga0d7pj22/ReadbasedAnalysis/nmdc_mga0d7pj22_gottcha2_report_full.tsv", - "md5_checksum": "1ac2be77491e7d425da1d62f69f1508d", - "id": "nmdc:1ac2be77491e7d425da1d62f69f1508d", - "file_size_bytes": 1116084 - }, - { - "name": "Gp0115670_Gottcha2 Krona HTML report", - "description": "Gottcha2 Krona HTML report for Gp0115670", - "data_object_type": "GOTTCHA2 Krona Plot", - "url": "https://data.microbiomedata.org/data/nmdc:mga0d7pj22/ReadbasedAnalysis/nmdc_mga0d7pj22_gottcha2_krona.html", - "md5_checksum": "de5b15fa9d3bdbc3abcc2475ee351323", - "id": "nmdc:de5b15fa9d3bdbc3abcc2475ee351323", - "file_size_bytes": 268542 - }, - { - "name": "Gp0115670_Centrifuge classification TSV report", - "description": "Centrifuge classification TSV report for Gp0115670", - "data_object_type": "Centrifuge Taxonomic Classification", - "url": "https://data.microbiomedata.org/data/nmdc:mga0d7pj22/ReadbasedAnalysis/nmdc_mga0d7pj22_centrifuge_classification.tsv", - "md5_checksum": "a9bbb74833404a2bf3bbd05e83a7a0ed", - "id": "nmdc:a9bbb74833404a2bf3bbd05e83a7a0ed", - "file_size_bytes": 2458475116 - }, - { - "name": "Gp0115670_Centrifuge TSV report", - "description": "Centrifuge TSV report for Gp0115670", - "data_object_type": "Centrifuge Classification Report", - "url": "https://data.microbiomedata.org/data/nmdc:mga0d7pj22/ReadbasedAnalysis/nmdc_mga0d7pj22_centrifuge_report.tsv", - "md5_checksum": "c065784bed2b2495d512af93d05967de", - "id": "nmdc:c065784bed2b2495d512af93d05967de", - "file_size_bytes": 261692 - }, - { - "name": "Gp0115670_Centrifuge Krona HTML report", - "description": "Centrifuge Krona HTML report for Gp0115670", - "data_object_type": "Centrifuge Krona Plot", - "url": "https://data.microbiomedata.org/data/nmdc:mga0d7pj22/ReadbasedAnalysis/nmdc_mga0d7pj22_centrifuge_krona.html", - "md5_checksum": "a34dbcbdebae0861e41c09e7b9a5d9f0", - "id": "nmdc:a34dbcbdebae0861e41c09e7b9a5d9f0", - "file_size_bytes": 2343355 - }, - { - "name": "Gp0115670_Kraken classification TSV report", - "description": "Kraken classification TSV report for Gp0115670", - "data_object_type": "Kraken2 Taxonomic Classification", - "url": "https://data.microbiomedata.org/data/nmdc:mga0d7pj22/ReadbasedAnalysis/nmdc_mga0d7pj22_kraken2_classification.tsv", - "md5_checksum": "b2122f5a910a1d4ae8a62956d1cd731c", - "id": "nmdc:b2122f5a910a1d4ae8a62956d1cd731c", - "file_size_bytes": 2019980511 - }, - { - "name": "Gp0115670_Kraken2 TSV report", - "description": "Kraken2 TSV report for Gp0115670", - "data_object_type": "Kraken2 Classification Report", - "url": "https://data.microbiomedata.org/data/nmdc:mga0d7pj22/ReadbasedAnalysis/nmdc_mga0d7pj22_kraken2_report.tsv", - "md5_checksum": "8a26d8496a70f4777be0e1237092e44c", - "id": "nmdc:8a26d8496a70f4777be0e1237092e44c", - "file_size_bytes": 694029 - }, - { - "name": "Gp0115670_Kraken2 Krona HTML report", - "description": "Kraken2 Krona HTML report for Gp0115670", - "data_object_type": "Kraken2 Krona Plot", - "url": "https://data.microbiomedata.org/data/nmdc:mga0d7pj22/ReadbasedAnalysis/nmdc_mga0d7pj22_kraken2_krona.html", - "md5_checksum": "694b83f0b6f599948d4248dd48dd9ba9", - "id": "nmdc:694b83f0b6f599948d4248dd48dd9ba9", - "file_size_bytes": 4190653 - } + "contigs": 206757, + "name": "Assembly Activity for nmdc:mga0ak4p20", + "ctg_max": 23775, + "gc_std": 0.10033, + "contig_bp": 103842002, + "gc_avg": 0.61621, + "started_at_time": "2021-10-11T02:26:59Z", + "scaf_bp": 103844032, + "type": "nmdc:MetagenomeAssembly", + "scaffolds": 206599, + "ended_at_time": "2021-10-11T04:11:48+00:00", + "ctg_l50": 496, + "ctg_l90": 290, + "ctg_n50": 55322, + "ctg_n90": 171862, + "scaf_l50": 497, + "scaf_l90": 290, + "scaf_n50": 55067, + "scaf_n90": 171721 + } + ], + "metagenome_sequencing_activity_set": [], + "metaproteomics_analysis_activity_set": [], + "metatranscriptome_activity_set": [], + "nom_analysis_activity_set": [], + "omics_processing_set": [ + { + "_id": { + "$oid": "649b009773e8249959349b47" + }, + "id": "nmdc:omprc-11-kc23zq65", + "name": "Riverbed sediment microbial communities from areas with no vegetation in Columbia River, Washington, USA - GW-RW N1_10_20", + "description": "Riverbed sediment samples were collected from an area with no vegetation in a hyporheic zone (subsurface groundwater - surface water mixing zone)", + "has_input": [ + "nmdc:bsm-11-59xteq78" + ], + "has_output": [ + "jgi:574fde607ded5e3df1ee1403" + ], + "part_of": [ + "nmdc:sty-11-aygzgv51" + ], + "add_date": "2016-01-11", + "mod_date": "2021-06-15", + "ncbi_project_name": "Riverbed sediment microbial communities from areas with no vegetation in Columbia River, Washington, USA - GW-RW N1_10_20", + "omics_type": { + "has_raw_value": "Metagenome" + }, + "principal_investigator": { + "has_raw_value": "James Stegen" + }, + "processing_institution": "JGI", + "type": "nmdc:OmicsProcessing", + "gold_sequencing_project_identifiers": [ + "gold:Gp0127635" ] - }, + } + ], + "reaction_activity_set": [], + "read_qc_analysis_activity_set": [ { "_id": { - "$oid": "61e71a12833bcf838a701ba9" + "$oid": "649b009d6bdd4fd20273c860" }, "has_input": [ - "nmdc:7f6b353300583c60d2d668880b4134cd" + "nmdc:1a16fdf096087338922b288165a924b8" ], "part_of": [ - "nmdc:mga0d7pj22" + "nmdc:mga0ak4p20" ], "git_url": "https://github.com/microbiomedata/metaG/releases/tag/0.1", "has_output": [ - "nmdc:e316502f9e7a78c9db3996ef832aa9d7", - "nmdc:1ac2be77491e7d425da1d62f69f1508d", - "nmdc:de5b15fa9d3bdbc3abcc2475ee351323", - "nmdc:a9bbb74833404a2bf3bbd05e83a7a0ed", - "nmdc:c065784bed2b2495d512af93d05967de", - "nmdc:a34dbcbdebae0861e41c09e7b9a5d9f0", - "nmdc:b2122f5a910a1d4ae8a62956d1cd731c", - "nmdc:8a26d8496a70f4777be0e1237092e44c", - "nmdc:694b83f0b6f599948d4248dd48dd9ba9" + "nmdc:f8bc16e232f7ba0f6d6b5ca35a708c36", + "nmdc:fbc260443529d6e8067efdac3b58a8c1" ], - "was_informed_by": "gold:Gp0115670", - "id": "nmdc:f1cbe3cc181b2e689272b4223b68c15f", + "was_informed_by": "gold:Gp0127635", + "input_read_count": 25320866, + "output_read_bases": 3673182178, + "id": "nmdc:80cd0785782060a937ca17d1b417b0b6", "execution_resource": "NERSC-Cori", - "name": "ReadBased Analysis Activity for nmdc:mga0d7pj22", - "started_at_time": "2021-10-11T02:28:43Z", - "type": "nmdc:ReadbasedAnalysis", - "ended_at_time": "2021-10-11T05:55:52+00:00", - "output_data_objects": [ - { - "name": "Gp0115670_Gottcha2 TSV report", - "description": "Gottcha2 TSV report for Gp0115670", - "url": "https://data.microbiomedata.org/data/nmdc:mga0d7pj22/ReadbasedAnalysis/nmdc_mga0d7pj22_gottcha2_report.tsv", - "md5_checksum": "e316502f9e7a78c9db3996ef832aa9d7", - "id": "nmdc:e316502f9e7a78c9db3996ef832aa9d7", - "file_size_bytes": 13758 - }, - { - "name": "Gp0115670_Gottcha2 full TSV report", - "description": "Gottcha2 full TSV report for Gp0115670", - "url": "https://data.microbiomedata.org/data/nmdc:mga0d7pj22/ReadbasedAnalysis/nmdc_mga0d7pj22_gottcha2_report_full.tsv", - "md5_checksum": "1ac2be77491e7d425da1d62f69f1508d", - "id": "nmdc:1ac2be77491e7d425da1d62f69f1508d", - "file_size_bytes": 1116084 - }, - { - "name": "Gp0115670_Gottcha2 Krona HTML report", - "description": "Gottcha2 Krona HTML report for Gp0115670", - "data_object_type": "GOTTCHA2 Krona Plot", - "url": "https://data.microbiomedata.org/data/nmdc:mga0d7pj22/ReadbasedAnalysis/nmdc_mga0d7pj22_gottcha2_krona.html", - "md5_checksum": "de5b15fa9d3bdbc3abcc2475ee351323", - "id": "nmdc:de5b15fa9d3bdbc3abcc2475ee351323", - "file_size_bytes": 268542 - }, - { - "name": "Gp0115670_Centrifuge classification TSV report", - "description": "Centrifuge classification TSV report for Gp0115670", - "data_object_type": "Centrifuge Taxonomic Classification", - "url": "https://data.microbiomedata.org/data/nmdc:mga0d7pj22/ReadbasedAnalysis/nmdc_mga0d7pj22_centrifuge_classification.tsv", - "md5_checksum": "a9bbb74833404a2bf3bbd05e83a7a0ed", - "id": "nmdc:a9bbb74833404a2bf3bbd05e83a7a0ed", - "file_size_bytes": 2458475116 - }, - { - "name": "Gp0115670_Centrifuge TSV report", - "description": "Centrifuge TSV report for Gp0115670", - "data_object_type": "Centrifuge Classification Report", - "url": "https://data.microbiomedata.org/data/nmdc:mga0d7pj22/ReadbasedAnalysis/nmdc_mga0d7pj22_centrifuge_report.tsv", - "md5_checksum": "c065784bed2b2495d512af93d05967de", - "id": "nmdc:c065784bed2b2495d512af93d05967de", - "file_size_bytes": 261692 - }, - { - "name": "Gp0115670_Centrifuge Krona HTML report", - "description": "Centrifuge Krona HTML report for Gp0115670", - "data_object_type": "Centrifuge Krona Plot", - "url": "https://data.microbiomedata.org/data/nmdc:mga0d7pj22/ReadbasedAnalysis/nmdc_mga0d7pj22_centrifuge_krona.html", - "md5_checksum": "a34dbcbdebae0861e41c09e7b9a5d9f0", - "id": "nmdc:a34dbcbdebae0861e41c09e7b9a5d9f0", - "file_size_bytes": 2343355 - }, - { - "name": "Gp0115670_Kraken classification TSV report", - "description": "Kraken classification TSV report for Gp0115670", - "data_object_type": "Kraken2 Taxonomic Classification", - "url": "https://data.microbiomedata.org/data/nmdc:mga0d7pj22/ReadbasedAnalysis/nmdc_mga0d7pj22_kraken2_classification.tsv", - "md5_checksum": "b2122f5a910a1d4ae8a62956d1cd731c", - "id": "nmdc:b2122f5a910a1d4ae8a62956d1cd731c", - "file_size_bytes": 2019980511 - }, - { - "name": "Gp0115670_Kraken2 TSV report", - "description": "Kraken2 TSV report for Gp0115670", - "data_object_type": "Kraken2 Classification Report", - "url": "https://data.microbiomedata.org/data/nmdc:mga0d7pj22/ReadbasedAnalysis/nmdc_mga0d7pj22_kraken2_report.tsv", - "md5_checksum": "8a26d8496a70f4777be0e1237092e44c", - "id": "nmdc:8a26d8496a70f4777be0e1237092e44c", - "file_size_bytes": 694029 - }, - { - "name": "Gp0115670_Kraken2 Krona HTML report", - "description": "Kraken2 Krona HTML report for Gp0115670", - "data_object_type": "Kraken2 Krona Plot", - "url": "https://data.microbiomedata.org/data/nmdc:mga0d7pj22/ReadbasedAnalysis/nmdc_mga0d7pj22_kraken2_krona.html", - "md5_checksum": "694b83f0b6f599948d4248dd48dd9ba9", - "id": "nmdc:694b83f0b6f599948d4248dd48dd9ba9", - "file_size_bytes": 4190653 - } - ] - }, + "input_read_bases": 3823450766, + "name": "Read QC Activity for nmdc:mga0ak4p20", + "output_read_count": 24600396, + "started_at_time": "2021-10-11T02:26:59Z", + "type": "nmdc:ReadQCAnalysisActivity", + "ended_at_time": "2021-10-11T04:11:48+00:00" + } + ], + "read_based_taxonomy_analysis_activity_set": [ { "_id": { - "$oid": "649b005f2ca5ee4adb139fc0" + "$oid": "649b009bff710ae353f8cf25" }, "has_input": [ - "nmdc:7f6b353300583c60d2d668880b4134cd" - ], - "part_of": [ - "nmdc:mga0d7pj22" + "nmdc:f8bc16e232f7ba0f6d6b5ca35a708c36" ], - "ctg_logsum": 272574, - "scaf_logsum": 274450, - "gap_pct": 0.00346, "git_url": "https://github.com/microbiomedata/metaG/releases/tag/0.1", "has_output": [ - "nmdc:975cdb0a18df949be4efb80d1dc4ef0b", - "nmdc:1dfaed4da055c5fd4226abe08bd91db9", - "nmdc:8a749340eefc40901a22a0ef603bc803", - "nmdc:ad027e4c3ca67907154c03feeebbd97b", - "nmdc:c4f2407273babd894282d4d0f20be5d1" + "nmdc:d8a410c52c8f6cf0097b674492cc3926", + "nmdc:ddec46781153da60da815c65871f5413", + "nmdc:e626ec18dba4885613240927cbb99d8b", + "nmdc:f8486e4ee029038a452a3484db10cabc", + "nmdc:4121f2ec52b80b7feb9d9a4749080125", + "nmdc:5b8c1cd8ba47041c20d3e18cb902a854", + "nmdc:59807dae5216b11c96df5593a26d9a88", + "nmdc:a491f6797bd7294dbc5ba301efb3466e", + "nmdc:6748020214a3d68ad588e3548107208e" ], - "asm_score": 12.57, - "was_informed_by": "gold:Gp0115670", - "ctg_powsum": 33596, - "scaf_max": 211520, - "id": "nmdc:f1cbe3cc181b2e689272b4223b68c15f", - "scaf_powsum": 33865, + "was_informed_by": "gold:Gp0127635", + "id": "nmdc:80cd0785782060a937ca17d1b417b0b6", "execution_resource": "NERSC-Cori", - "contigs": 152605, - "name": "Assembly Activity for nmdc:mga0d7pj22", - "ctg_max": 211520, - "gc_std": 0.125, - "contig_bp": 79563543, - "gc_avg": 0.57036, - "started_at_time": "2021-10-11T02:28:43Z", - "scaf_bp": 79566293, - "type": "nmdc:MetagenomeAssembly", - "scaffolds": 152330, - "ended_at_time": "2021-10-11T05:55:52+00:00", - "ctg_l50": 492, - "ctg_l90": 290, - "ctg_n50": 35595, - "ctg_n90": 126332, - "scaf_l50": 493, - "scaf_l90": 290, - "scaf_n50": 35340, - "scaf_n90": 126070, - "scaf_l_gt50k": 1744421, - "scaf_n_gt50k": 21, - "scaf_pct_gt50k": 2.192412, - "output_data_objects": [ - { - "name": "Gp0115670_Assembled contigs fasta", - "description": "Assembled contigs fasta for Gp0115670", - "data_object_type": "Assembly Contigs", - "url": "https://data.microbiomedata.org/data/nmdc:mga0d7pj22/assembly/nmdc_mga0d7pj22_contigs.fna", - "md5_checksum": "975cdb0a18df949be4efb80d1dc4ef0b", - "id": "nmdc:975cdb0a18df949be4efb80d1dc4ef0b", - "file_size_bytes": 85578260 - }, - { - "name": "Gp0115670_Assembled scaffold fasta", - "description": "Assembled scaffold fasta for Gp0115670", - "data_object_type": "Assembly Scaffolds", - "url": "https://data.microbiomedata.org/data/nmdc:mga0d7pj22/assembly/nmdc_mga0d7pj22_scaffolds.fna", - "md5_checksum": "1dfaed4da055c5fd4226abe08bd91db9", - "id": "nmdc:1dfaed4da055c5fd4226abe08bd91db9", - "file_size_bytes": 85115954 - }, - { - "name": "Gp0115670_Metagenome Contig Coverage Stats", - "description": "Metagenome Contig Coverage Stats for Gp0115670", - "url": "https://data.microbiomedata.org/data/nmdc:mga0d7pj22/assembly/nmdc_mga0d7pj22_covstats.txt", - "md5_checksum": "8a749340eefc40901a22a0ef603bc803", - "id": "nmdc:8a749340eefc40901a22a0ef603bc803", - "file_size_bytes": 12068883 - }, - { - "name": "Gp0115670_Assembled AGP file", - "description": "Assembled AGP file for Gp0115670", - "data_object_type": "Assembly AGP", - "url": "https://data.microbiomedata.org/data/nmdc:mga0d7pj22/assembly/nmdc_mga0d7pj22_assembly.agp", - "md5_checksum": "ad027e4c3ca67907154c03feeebbd97b", - "id": "nmdc:ad027e4c3ca67907154c03feeebbd97b", - "file_size_bytes": 11264235 - }, - { - "name": "Gp0115670_Metagenome Alignment BAM file", - "description": "Metagenome Alignment BAM file for Gp0115670", - "data_object_type": "Assembly Coverage BAM", - "url": "https://data.microbiomedata.org/data/nmdc:mga0d7pj22/assembly/nmdc_mga0d7pj22_pairedMapped_sorted.bam", - "md5_checksum": "c4f2407273babd894282d4d0f20be5d1", - "id": "nmdc:c4f2407273babd894282d4d0f20be5d1", - "file_size_bytes": 3245960211 - } - ] - }, + "name": "ReadBased Analysis Activity for nmdc:mga0ak4p20", + "started_at_time": "2021-10-11T02:26:59Z", + "type": "nmdc:ReadbasedAnalysis", + "ended_at_time": "2021-10-11T04:11:48+00:00" + } + ], + "study_set": [], + "field_research_site_set": [], + "collecting_biosamples_from_site_set": [], + "date_created": null, + "etl_software_version": null, + "pooling_set": [], + "read_based_analysis_activity_set": [ { "_id": { - "$oid": "649b005bbf2caae0415ef9cf" + "$oid": "61e71936833bcf838a6ffdfc" }, "has_input": [ - "nmdc:975cdb0a18df949be4efb80d1dc4ef0b" + "nmdc:f8bc16e232f7ba0f6d6b5ca35a708c36" ], "part_of": [ - "nmdc:mga0d7pj22" + "nmdc:mga0ak4p20" ], "git_url": "https://github.com/microbiomedata/metaG/releases/tag/0.1", "has_output": [ - "nmdc:21230aff7bb5b266fb544905f9ac5ce2", - "nmdc:91c5cc265ef61ab83111a5bc9462e8b2", - "nmdc:0bc4d8b8ef11724c3d7e728b0e8e0ea5", - "nmdc:811910b7d8c300befddd039e833b0453", - "nmdc:9ed55d9535d1592866a66e9d5cd936a2", - "nmdc:a127efaa423e6dd6d24d7ab67cc2124a", - "nmdc:4b56646de8c37278beaaf9797e4ddf2f", - "nmdc:53a0873376e22fef62f2740f6afead21", - "nmdc:36748318682076112ba81283c8bc767a", - "nmdc:5dd32385b351847f23ec4eac63eb70ff", - "nmdc:95076052a4d5d57e1ed0c7699e4f5472", - "nmdc:6ae89cc4b2fb7d09614c106d3358be27" + "nmdc:d8a410c52c8f6cf0097b674492cc3926", + "nmdc:ddec46781153da60da815c65871f5413", + "nmdc:e626ec18dba4885613240927cbb99d8b", + "nmdc:f8486e4ee029038a452a3484db10cabc", + "nmdc:4121f2ec52b80b7feb9d9a4749080125", + "nmdc:5b8c1cd8ba47041c20d3e18cb902a854", + "nmdc:59807dae5216b11c96df5593a26d9a88", + "nmdc:a491f6797bd7294dbc5ba301efb3466e", + "nmdc:6748020214a3d68ad588e3548107208e" ], - "was_informed_by": "gold:Gp0115670", - "id": "nmdc:f1cbe3cc181b2e689272b4223b68c15f", + "was_informed_by": "gold:Gp0127635", + "id": "nmdc:80cd0785782060a937ca17d1b417b0b6", "execution_resource": "NERSC-Cori", - "name": "Annotation Activity for nmdc:mga0d7pj22", - "started_at_time": "2021-10-11T02:28:43Z", - "type": "nmdc:MetagenomeAnnotationActivity", - "ended_at_time": "2021-10-11T05:55:52+00:00", - "output_data_objects": [ - { - "name": "Gp0115670_Protein FAA", - "description": "Protein FAA for Gp0115670", - "data_object_type": "Annotation Amino Acid FASTA", - "url": "https://data.microbiomedata.org/data/nmdc:mga0d7pj22/annotation/nmdc_mga0d7pj22_proteins.faa", - "md5_checksum": "21230aff7bb5b266fb544905f9ac5ce2", - "id": "nmdc:21230aff7bb5b266fb544905f9ac5ce2", - "file_size_bytes": 46061226 - }, - { - "name": "Gp0115670_Structural annotation GFF file", - "description": "Structural annotation GFF file for Gp0115670", - "data_object_type": "Structural Annotation GFF", - "url": "https://data.microbiomedata.org/data/nmdc:mga0d7pj22/annotation/nmdc_mga0d7pj22_structural_annotation.gff", - "md5_checksum": "91c5cc265ef61ab83111a5bc9462e8b2", - "id": "nmdc:91c5cc265ef61ab83111a5bc9462e8b2", - "file_size_bytes": 2769 - }, - { - "name": "Gp0115670_Functional annotation GFF file", - "description": "Functional annotation GFF file for Gp0115670", - "data_object_type": "Functional Annotation GFF", - "url": "https://data.microbiomedata.org/data/nmdc:mga0d7pj22/annotation/nmdc_mga0d7pj22_functional_annotation.gff", - "md5_checksum": "0bc4d8b8ef11724c3d7e728b0e8e0ea5", - "id": "nmdc:0bc4d8b8ef11724c3d7e728b0e8e0ea5", - "file_size_bytes": 50449176 - }, - { - "name": "Gp0115670_KO TSV file", - "description": "KO TSV file for Gp0115670", - "data_object_type": "Annotation KEGG Orthology", - "url": "https://data.microbiomedata.org/data/nmdc:mga0d7pj22/annotation/nmdc_mga0d7pj22_ko.tsv", - "md5_checksum": "811910b7d8c300befddd039e833b0453", - "id": "nmdc:811910b7d8c300befddd039e833b0453", - "file_size_bytes": 6653168 - }, - { - "name": "Gp0115670_EC TSV file", - "description": "EC TSV file for Gp0115670", - "data_object_type": "Annotation Enzyme Commission", - "url": "https://data.microbiomedata.org/data/nmdc:mga0d7pj22/annotation/nmdc_mga0d7pj22_ec.tsv", - "md5_checksum": "9ed55d9535d1592866a66e9d5cd936a2", - "id": "nmdc:9ed55d9535d1592866a66e9d5cd936a2", - "file_size_bytes": 4232890 - }, - { - "name": "Gp0115670_COG GFF file", - "description": "COG GFF file for Gp0115670", - "url": "https://data.microbiomedata.org/data/nmdc:mga0d7pj22/annotation/nmdc_mga0d7pj22_cog.gff", - "md5_checksum": "a127efaa423e6dd6d24d7ab67cc2124a", - "id": "nmdc:a127efaa423e6dd6d24d7ab67cc2124a", - "file_size_bytes": 28376544 - }, - { - "name": "Gp0115670_PFAM GFF file", - "description": "PFAM GFF file for Gp0115670", - "url": "https://data.microbiomedata.org/data/nmdc:mga0d7pj22/annotation/nmdc_mga0d7pj22_pfam.gff", - "md5_checksum": "4b56646de8c37278beaaf9797e4ddf2f", - "id": "nmdc:4b56646de8c37278beaaf9797e4ddf2f", - "file_size_bytes": 22850790 - }, - { - "name": "Gp0115670_TigrFam GFF file", - "description": "TigrFam GFF file for Gp0115670", - "url": "https://data.microbiomedata.org/data/nmdc:mga0d7pj22/annotation/nmdc_mga0d7pj22_tigrfam.gff", - "md5_checksum": "53a0873376e22fef62f2740f6afead21", - "id": "nmdc:53a0873376e22fef62f2740f6afead21", - "file_size_bytes": 3099434 - }, - { - "name": "Gp0115670_SMART GFF file", - "description": "SMART GFF file for Gp0115670", - "url": "https://data.microbiomedata.org/data/nmdc:mga0d7pj22/annotation/nmdc_mga0d7pj22_smart.gff", - "md5_checksum": "36748318682076112ba81283c8bc767a", - "id": "nmdc:36748318682076112ba81283c8bc767a", - "file_size_bytes": 6433811 - }, - { - "name": "Gp0115670_SuperFam GFF file", - "description": "SuperFam GFF file for Gp0115670", - "url": "https://data.microbiomedata.org/data/nmdc:mga0d7pj22/annotation/nmdc_mga0d7pj22_supfam.gff", - "md5_checksum": "5dd32385b351847f23ec4eac63eb70ff", - "id": "nmdc:5dd32385b351847f23ec4eac63eb70ff", - "file_size_bytes": 36427587 - }, - { - "name": "Gp0115670_Cath FunFam GFF file", - "description": "Cath FunFam GFF file for Gp0115670", - "url": "https://data.microbiomedata.org/data/nmdc:mga0d7pj22/annotation/nmdc_mga0d7pj22_cath_funfam.gff", - "md5_checksum": "95076052a4d5d57e1ed0c7699e4f5472", - "id": "nmdc:95076052a4d5d57e1ed0c7699e4f5472", - "file_size_bytes": 28909664 - }, - { - "name": "Gp0115670_KO_EC GFF file", - "description": "KO_EC GFF file for Gp0115670", - "url": "https://data.microbiomedata.org/data/nmdc:mga0d7pj22/annotation/nmdc_mga0d7pj22_ko_ec.gff", - "md5_checksum": "6ae89cc4b2fb7d09614c106d3358be27", - "id": "nmdc:6ae89cc4b2fb7d09614c106d3358be27", - "file_size_bytes": 21214802 - } - ] + "name": "ReadBased Analysis Activity for nmdc:mga0ak4p20", + "started_at_time": "2021-10-11T02:26:59Z", + "type": "nmdc:ReadbasedAnalysis", + "ended_at_time": "2021-10-11T04:11:48+00:00" + } + ] + }, + { + "functional_annotation_agg": [], + "library_preparation_set": [], + "processed_sample_set": [], + "extraction_set": [], + "activity_set": [], + "biosample_set": [], + "data_object_set": [ + { + "name": "Gp0127637_Filtered Reads", + "description": "Filtered Reads for Gp0127637", + "data_object_type": "Filtered Sequencing Reads", + "url": "https://data.microbiomedata.org/data/nmdc:mga0sb9b30/qa/nmdc_mga0sb9b30_filtered.fastq.gz", + "md5_checksum": "805310f4b1e39a0cc9e5b5787576cb8b", + "id": "nmdc:805310f4b1e39a0cc9e5b5787576cb8b", + "file_size_bytes": 1553219358 + }, + { + "name": "Gp0127637_Filtered Stats", + "description": "Filtered Stats for Gp0127637", + "data_object_type": "QC Statistics", + "url": "https://data.microbiomedata.org/data/nmdc:mga0sb9b30/qa/nmdc_mga0sb9b30_filterStats.txt", + "md5_checksum": "611e67df261e050860b1075c6a6a5ff5", + "id": "nmdc:611e67df261e050860b1075c6a6a5ff5", + "file_size_bytes": 289 + }, + { + "name": "Gp0127637_Gottcha2 TSV report", + "description": "Gottcha2 TSV report for Gp0127637", + "url": "https://data.microbiomedata.org/data/nmdc:mga0sb9b30/ReadbasedAnalysis/nmdc_mga0sb9b30_gottcha2_report.tsv", + "md5_checksum": "9268e073dacb7f7cd5f9513393cb0b2a", + "id": "nmdc:9268e073dacb7f7cd5f9513393cb0b2a", + "file_size_bytes": 660 + }, + { + "name": "Gp0127637_Gottcha2 full TSV report", + "description": "Gottcha2 full TSV report for Gp0127637", + "url": "https://data.microbiomedata.org/data/nmdc:mga0sb9b30/ReadbasedAnalysis/nmdc_mga0sb9b30_gottcha2_report_full.tsv", + "md5_checksum": "37dd1d73ad47979ee5284830d27df535", + "id": "nmdc:37dd1d73ad47979ee5284830d27df535", + "file_size_bytes": 594054 + }, + { + "name": "Gp0127637_Gottcha2 Krona HTML report", + "description": "Gottcha2 Krona HTML report for Gp0127637", + "data_object_type": "GOTTCHA2 Krona Plot", + "url": "https://data.microbiomedata.org/data/nmdc:mga0sb9b30/ReadbasedAnalysis/nmdc_mga0sb9b30_gottcha2_krona.html", + "md5_checksum": "43bffbfb830c6e3ccc140ec0dff1e773", + "id": "nmdc:43bffbfb830c6e3ccc140ec0dff1e773", + "file_size_bytes": 227750 + }, + { + "name": "Gp0127637_Centrifuge classification TSV report", + "description": "Centrifuge classification TSV report for Gp0127637", + "data_object_type": "Centrifuge Taxonomic Classification", + "url": "https://data.microbiomedata.org/data/nmdc:mga0sb9b30/ReadbasedAnalysis/nmdc_mga0sb9b30_centrifuge_classification.tsv", + "md5_checksum": "cb3bd5ca5088484cb4e580ad91d736b2", + "id": "nmdc:cb3bd5ca5088484cb4e580ad91d736b2", + "file_size_bytes": 1457058272 + }, + { + "name": "Gp0127637_Centrifuge TSV report", + "description": "Centrifuge TSV report for Gp0127637", + "data_object_type": "Centrifuge Classification Report", + "url": "https://data.microbiomedata.org/data/nmdc:mga0sb9b30/ReadbasedAnalysis/nmdc_mga0sb9b30_centrifuge_report.tsv", + "md5_checksum": "f44a5d59785cdededea0fe4a6a429c30", + "id": "nmdc:f44a5d59785cdededea0fe4a6a429c30", + "file_size_bytes": 251867 + }, + { + "name": "Gp0127637_Centrifuge Krona HTML report", + "description": "Centrifuge Krona HTML report for Gp0127637", + "data_object_type": "Centrifuge Krona Plot", + "url": "https://data.microbiomedata.org/data/nmdc:mga0sb9b30/ReadbasedAnalysis/nmdc_mga0sb9b30_centrifuge_krona.html", + "md5_checksum": "81a6efbd082e07bc2db174a88d64a272", + "id": "nmdc:81a6efbd082e07bc2db174a88d64a272", + "file_size_bytes": 2325282 + }, + { + "name": "Gp0127637_Kraken classification TSV report", + "description": "Kraken classification TSV report for Gp0127637", + "data_object_type": "Kraken2 Taxonomic Classification", + "url": "https://data.microbiomedata.org/data/nmdc:mga0sb9b30/ReadbasedAnalysis/nmdc_mga0sb9b30_kraken2_classification.tsv", + "md5_checksum": "f63856a84bc9afb8954ccdb1803d5fde", + "id": "nmdc:f63856a84bc9afb8954ccdb1803d5fde", + "file_size_bytes": 1160106364 + }, + { + "name": "Gp0127637_Kraken2 TSV report", + "description": "Kraken2 TSV report for Gp0127637", + "data_object_type": "Kraken2 Classification Report", + "url": "https://data.microbiomedata.org/data/nmdc:mga0sb9b30/ReadbasedAnalysis/nmdc_mga0sb9b30_kraken2_report.tsv", + "md5_checksum": "9a1826f66ee45187d627076d11dc491f", + "id": "nmdc:9a1826f66ee45187d627076d11dc491f", + "file_size_bytes": 613810 + }, + { + "name": "Gp0127637_Kraken2 Krona HTML report", + "description": "Kraken2 Krona HTML report for Gp0127637", + "data_object_type": "Kraken2 Krona Plot", + "url": "https://data.microbiomedata.org/data/nmdc:mga0sb9b30/ReadbasedAnalysis/nmdc_mga0sb9b30_kraken2_krona.html", + "md5_checksum": "67adb9cc2c75251f556a90b1a959ea72", + "id": "nmdc:67adb9cc2c75251f556a90b1a959ea72", + "file_size_bytes": 3853908 + }, + { + "name": "Gp0127637_Gottcha2 TSV report", + "description": "Gottcha2 TSV report for Gp0127637", + "url": "https://data.microbiomedata.org/data/nmdc:mga0sb9b30/ReadbasedAnalysis/nmdc_mga0sb9b30_gottcha2_report.tsv", + "md5_checksum": "9268e073dacb7f7cd5f9513393cb0b2a", + "id": "nmdc:9268e073dacb7f7cd5f9513393cb0b2a", + "file_size_bytes": 660 + }, + { + "name": "Gp0127637_Gottcha2 full TSV report", + "description": "Gottcha2 full TSV report for Gp0127637", + "url": "https://data.microbiomedata.org/data/nmdc:mga0sb9b30/ReadbasedAnalysis/nmdc_mga0sb9b30_gottcha2_report_full.tsv", + "md5_checksum": "37dd1d73ad47979ee5284830d27df535", + "id": "nmdc:37dd1d73ad47979ee5284830d27df535", + "file_size_bytes": 594054 + }, + { + "name": "Gp0127637_Gottcha2 Krona HTML report", + "description": "Gottcha2 Krona HTML report for Gp0127637", + "data_object_type": "GOTTCHA2 Krona Plot", + "url": "https://data.microbiomedata.org/data/nmdc:mga0sb9b30/ReadbasedAnalysis/nmdc_mga0sb9b30_gottcha2_krona.html", + "md5_checksum": "43bffbfb830c6e3ccc140ec0dff1e773", + "id": "nmdc:43bffbfb830c6e3ccc140ec0dff1e773", + "file_size_bytes": 227750 + }, + { + "name": "Gp0127637_Centrifuge classification TSV report", + "description": "Centrifuge classification TSV report for Gp0127637", + "data_object_type": "Centrifuge Taxonomic Classification", + "url": "https://data.microbiomedata.org/data/nmdc:mga0sb9b30/ReadbasedAnalysis/nmdc_mga0sb9b30_centrifuge_classification.tsv", + "md5_checksum": "cb3bd5ca5088484cb4e580ad91d736b2", + "id": "nmdc:cb3bd5ca5088484cb4e580ad91d736b2", + "file_size_bytes": 1457058272 + }, + { + "name": "Gp0127637_Centrifuge TSV report", + "description": "Centrifuge TSV report for Gp0127637", + "data_object_type": "Centrifuge Classification Report", + "url": "https://data.microbiomedata.org/data/nmdc:mga0sb9b30/ReadbasedAnalysis/nmdc_mga0sb9b30_centrifuge_report.tsv", + "md5_checksum": "f44a5d59785cdededea0fe4a6a429c30", + "id": "nmdc:f44a5d59785cdededea0fe4a6a429c30", + "file_size_bytes": 251867 + }, + { + "name": "Gp0127637_Centrifuge Krona HTML report", + "description": "Centrifuge Krona HTML report for Gp0127637", + "data_object_type": "Centrifuge Krona Plot", + "url": "https://data.microbiomedata.org/data/nmdc:mga0sb9b30/ReadbasedAnalysis/nmdc_mga0sb9b30_centrifuge_krona.html", + "md5_checksum": "81a6efbd082e07bc2db174a88d64a272", + "id": "nmdc:81a6efbd082e07bc2db174a88d64a272", + "file_size_bytes": 2325282 + }, + { + "name": "Gp0127637_Kraken classification TSV report", + "description": "Kraken classification TSV report for Gp0127637", + "data_object_type": "Kraken2 Taxonomic Classification", + "url": "https://data.microbiomedata.org/data/nmdc:mga0sb9b30/ReadbasedAnalysis/nmdc_mga0sb9b30_kraken2_classification.tsv", + "md5_checksum": "f63856a84bc9afb8954ccdb1803d5fde", + "id": "nmdc:f63856a84bc9afb8954ccdb1803d5fde", + "file_size_bytes": 1160106364 + }, + { + "name": "Gp0127637_Kraken2 TSV report", + "description": "Kraken2 TSV report for Gp0127637", + "data_object_type": "Kraken2 Classification Report", + "url": "https://data.microbiomedata.org/data/nmdc:mga0sb9b30/ReadbasedAnalysis/nmdc_mga0sb9b30_kraken2_report.tsv", + "md5_checksum": "9a1826f66ee45187d627076d11dc491f", + "id": "nmdc:9a1826f66ee45187d627076d11dc491f", + "file_size_bytes": 613810 + }, + { + "name": "Gp0127637_Kraken2 Krona HTML report", + "description": "Kraken2 Krona HTML report for Gp0127637", + "data_object_type": "Kraken2 Krona Plot", + "url": "https://data.microbiomedata.org/data/nmdc:mga0sb9b30/ReadbasedAnalysis/nmdc_mga0sb9b30_kraken2_krona.html", + "md5_checksum": "67adb9cc2c75251f556a90b1a959ea72", + "id": "nmdc:67adb9cc2c75251f556a90b1a959ea72", + "file_size_bytes": 3853908 + }, + { + "name": "Gp0127637_Assembled contigs fasta", + "description": "Assembled contigs fasta for Gp0127637", + "data_object_type": "Assembly Contigs", + "url": "https://data.microbiomedata.org/data/nmdc:mga0sb9b30/assembly/nmdc_mga0sb9b30_contigs.fna", + "md5_checksum": "aee81646e593045bbb32a0012870b88b", + "id": "nmdc:aee81646e593045bbb32a0012870b88b", + "file_size_bytes": 117200777 + }, + { + "name": "Gp0127637_Assembled scaffold fasta", + "description": "Assembled scaffold fasta for Gp0127637", + "data_object_type": "Assembly Scaffolds", + "url": "https://data.microbiomedata.org/data/nmdc:mga0sb9b30/assembly/nmdc_mga0sb9b30_scaffolds.fna", + "md5_checksum": "f1026db242cad285204c9c3d6307c183", + "id": "nmdc:f1026db242cad285204c9c3d6307c183", + "file_size_bytes": 116554638 + }, + { + "name": "Gp0127637_Metagenome Contig Coverage Stats", + "description": "Metagenome Contig Coverage Stats for Gp0127637", + "url": "https://data.microbiomedata.org/data/nmdc:mga0sb9b30/assembly/nmdc_mga0sb9b30_covstats.txt", + "md5_checksum": "b02b0a0145d14e97a31e6a6f7e4b8dc8", + "id": "nmdc:b02b0a0145d14e97a31e6a6f7e4b8dc8", + "file_size_bytes": 17037754 + }, + { + "name": "Gp0127637_Assembled AGP file", + "description": "Assembled AGP file for Gp0127637", + "data_object_type": "Assembly AGP", + "url": "https://data.microbiomedata.org/data/nmdc:mga0sb9b30/assembly/nmdc_mga0sb9b30_assembly.agp", + "md5_checksum": "8afcf1e8b7b3f35edaefee7a0c31e19f", + "id": "nmdc:8afcf1e8b7b3f35edaefee7a0c31e19f", + "file_size_bytes": 15931363 + }, + { + "name": "Gp0127637_Metagenome Alignment BAM file", + "description": "Metagenome Alignment BAM file for Gp0127637", + "data_object_type": "Assembly Coverage BAM", + "url": "https://data.microbiomedata.org/data/nmdc:mga0sb9b30/assembly/nmdc_mga0sb9b30_pairedMapped_sorted.bam", + "md5_checksum": "dee5fa37f57a24685b65e00380d6e433", + "id": "nmdc:dee5fa37f57a24685b65e00380d6e433", + "file_size_bytes": 1739825120 + }, + { + "name": "Gp0127637_Protein FAA", + "description": "Protein FAA for Gp0127637", + "data_object_type": "Annotation Amino Acid FASTA", + "url": "https://data.microbiomedata.org/data/nmdc:mga0sb9b30/annotation/nmdc_mga0sb9b30_proteins.faa", + "md5_checksum": "69603434971f93dbd79860c18dd5c61a", + "id": "nmdc:69603434971f93dbd79860c18dd5c61a", + "file_size_bytes": 66263123 + }, + { + "name": "Gp0127637_Structural annotation GFF file", + "description": "Structural annotation GFF file for Gp0127637", + "data_object_type": "Structural Annotation GFF", + "url": "https://data.microbiomedata.org/data/nmdc:mga0sb9b30/annotation/nmdc_mga0sb9b30_structural_annotation.gff", + "md5_checksum": "bf8f822c6730b4cc73715ced3d25c262", + "id": "nmdc:bf8f822c6730b4cc73715ced3d25c262", + "file_size_bytes": 2521 + }, + { + "name": "Gp0127637_Functional annotation GFF file", + "description": "Functional annotation GFF file for Gp0127637", + "data_object_type": "Functional Annotation GFF", + "url": "https://data.microbiomedata.org/data/nmdc:mga0sb9b30/annotation/nmdc_mga0sb9b30_functional_annotation.gff", + "md5_checksum": "b9ec0754ffaa338c899244703bc91386", + "id": "nmdc:b9ec0754ffaa338c899244703bc91386", + "file_size_bytes": 74459552 + }, + { + "name": "Gp0127637_KO TSV file", + "description": "KO TSV file for Gp0127637", + "data_object_type": "Annotation KEGG Orthology", + "url": "https://data.microbiomedata.org/data/nmdc:mga0sb9b30/annotation/nmdc_mga0sb9b30_ko.tsv", + "md5_checksum": "22402cc61770feb5a0aaa4f760808366", + "id": "nmdc:22402cc61770feb5a0aaa4f760808366", + "file_size_bytes": 8394894 + }, + { + "name": "Gp0127637_EC TSV file", + "description": "EC TSV file for Gp0127637", + "data_object_type": "Annotation Enzyme Commission", + "url": "https://data.microbiomedata.org/data/nmdc:mga0sb9b30/annotation/nmdc_mga0sb9b30_ec.tsv", + "md5_checksum": "8c96f7faa38c361acc247b5a107a6b54", + "id": "nmdc:8c96f7faa38c361acc247b5a107a6b54", + "file_size_bytes": 5556852 + }, + { + "name": "Gp0127637_COG GFF file", + "description": "COG GFF file for Gp0127637", + "url": "https://data.microbiomedata.org/data/nmdc:mga0sb9b30/annotation/nmdc_mga0sb9b30_cog.gff", + "md5_checksum": "7a28d1eafd3a3c181e95f61eb3d18bf1", + "id": "nmdc:7a28d1eafd3a3c181e95f61eb3d18bf1", + "file_size_bytes": 44328195 + }, + { + "name": "Gp0127637_PFAM GFF file", + "description": "PFAM GFF file for Gp0127637", + "url": "https://data.microbiomedata.org/data/nmdc:mga0sb9b30/annotation/nmdc_mga0sb9b30_pfam.gff", + "md5_checksum": "89a8657f659710b3927baab155917fdf", + "id": "nmdc:89a8657f659710b3927baab155917fdf", + "file_size_bytes": 33562431 + }, + { + "name": "Gp0127637_TigrFam GFF file", + "description": "TigrFam GFF file for Gp0127637", + "url": "https://data.microbiomedata.org/data/nmdc:mga0sb9b30/annotation/nmdc_mga0sb9b30_tigrfam.gff", + "md5_checksum": "9b9ecf34f2f6ef6865d4864f5debfbb7", + "id": "nmdc:9b9ecf34f2f6ef6865d4864f5debfbb7", + "file_size_bytes": 3752251 + }, + { + "name": "Gp0127637_SMART GFF file", + "description": "SMART GFF file for Gp0127637", + "url": "https://data.microbiomedata.org/data/nmdc:mga0sb9b30/annotation/nmdc_mga0sb9b30_smart.gff", + "md5_checksum": "5cae6736713d02ccbe26543d733875cb", + "id": "nmdc:5cae6736713d02ccbe26543d733875cb", + "file_size_bytes": 9871224 + }, + { + "name": "Gp0127637_SuperFam GFF file", + "description": "SuperFam GFF file for Gp0127637", + "url": "https://data.microbiomedata.org/data/nmdc:mga0sb9b30/annotation/nmdc_mga0sb9b30_supfam.gff", + "md5_checksum": "a64350eb947c199cc1fbfb087191c0c7", + "id": "nmdc:a64350eb947c199cc1fbfb087191c0c7", + "file_size_bytes": 55329770 + }, + { + "name": "Gp0127637_Cath FunFam GFF file", + "description": "Cath FunFam GFF file for Gp0127637", + "url": "https://data.microbiomedata.org/data/nmdc:mga0sb9b30/annotation/nmdc_mga0sb9b30_cath_funfam.gff", + "md5_checksum": "b8492828a1ad078d9c3192bab4d9a3fa", + "id": "nmdc:b8492828a1ad078d9c3192bab4d9a3fa", + "file_size_bytes": 42052238 + }, + { + "name": "Gp0127637_KO_EC GFF file", + "description": "KO_EC GFF file for Gp0127637", + "url": "https://data.microbiomedata.org/data/nmdc:mga0sb9b30/annotation/nmdc_mga0sb9b30_ko_ec.gff", + "md5_checksum": "2471f27b6cf11b6f93c791c273989731", + "id": "nmdc:2471f27b6cf11b6f93c791c273989731", + "file_size_bytes": 26689447 + }, + { + "name": "gold:Gp0452679_metabat2 bin checkm quality assessment result", + "description": "metabat2 bin checkm quality assessment result for gold:Gp0452679", + "data_object_type": "CheckM Statistics", + "url": "https://data.microbiomedata.org/data/nmdc:mga0fsjy84/MAGs/nmdc_mga0fsjy84_checkm_qa.out", + "md5_checksum": "d41d8cd98f00b204e9800998ecf8427e", + "id": "nmdc:d41d8cd98f00b204e9800998ecf8427e", + "file_size_bytes": 0 + }, + { + "name": "Gp0127637_tooShort (< 3kb) filtered contigs fasta file by metaBat2", + "description": "tooShort (< 3kb) filtered contigs fasta file by metaBat2 for Gp0127637", + "url": "https://data.microbiomedata.org/data/nmdc:mga0sb9b30/MAGs/nmdc_mga0sb9b30_bins.tooShort.fa", + "md5_checksum": "7968c6b88e49f066bd24982b4d54965b", + "id": "nmdc:7968c6b88e49f066bd24982b4d54965b", + "file_size_bytes": 91577123 + }, + { + "name": "Gp0127637_unbinned fasta file from metabat2", + "description": "unbinned fasta file from metabat2 for Gp0127637", + "url": "https://data.microbiomedata.org/data/nmdc:mga0sb9b30/MAGs/nmdc_mga0sb9b30_bins.unbinned.fa", + "md5_checksum": "120fbaa7439eb628d9a982de573446a8", + "id": "nmdc:120fbaa7439eb628d9a982de573446a8", + "file_size_bytes": 22556841 + }, + { + "name": "Gp0127637_metabat2 bin checkm quality assessment result", + "description": "metabat2 bin checkm quality assessment result for Gp0127637", + "data_object_type": "CheckM Statistics", + "url": "https://data.microbiomedata.org/data/nmdc:mga0sb9b30/MAGs/nmdc_mga0sb9b30_checkm_qa.out", + "md5_checksum": "347a7ee18b37674e031cca9046e92623", + "id": "nmdc:347a7ee18b37674e031cca9046e92623", + "file_size_bytes": 1092 + }, + { + "name": "Gp0127637_high-quality and medium-quality bins", + "description": "high-quality and medium-quality bins for Gp0127637", + "data_object_type": "Metagenome Bins", + "url": "https://data.microbiomedata.org/data/nmdc:mga0sb9b30/MAGs/nmdc_mga0sb9b30_hqmq_bin.zip", + "md5_checksum": "de1da5ea4bfdf3131a6c510b79b145c2", + "id": "nmdc:de1da5ea4bfdf3131a6c510b79b145c2", + "file_size_bytes": 504932 }, + { + "name": "Gp0127637_metabat2 bins", + "description": "metabat2 bins for Gp0127637", + "url": "https://data.microbiomedata.org/data/nmdc:mga0sb9b30/MAGs/nmdc_mga0sb9b30_metabat_bin.zip", + "md5_checksum": "382d00338a5e4829285e58a203de153e", + "id": "nmdc:382d00338a5e4829285e58a203de153e", + "file_size_bytes": 432910 + } + ], + "dissolving_activity_set": [], + "functional_annotation_set": [], + "genome_feature_set": [], + "mags_activity_set": [ { "_id": { - "$oid": "649b0052ec087f6bbab3472d" + "$oid": "649b0052ec087f6bbab34702" }, "has_input": [ - "nmdc:975cdb0a18df949be4efb80d1dc4ef0b", - "nmdc:c4f2407273babd894282d4d0f20be5d1", - "nmdc:0bc4d8b8ef11724c3d7e728b0e8e0ea5" + "nmdc:aee81646e593045bbb32a0012870b88b", + "nmdc:dee5fa37f57a24685b65e00380d6e433", + "nmdc:b9ec0754ffaa338c899244703bc91386" ], - "too_short_contig_num": 142606, + "too_short_contig_num": 200319, "part_of": [ - "nmdc:mga0d7pj22" + "nmdc:mga0sb9b30" ], - "binned_contig_num": 1261, + "binned_contig_num": 482, "git_url": "https://github.com/microbiomedata/metaG/releases/tag/0.1", "has_output": [ "nmdc:d41d8cd98f00b204e9800998ecf8427e", - "nmdc:fd5fe3f1faaaf3cd8a88d9bbfb016827", - "nmdc:e27b736ee699ef2a8468a684811aaabd", - "nmdc:b0866d1a944aa27e34dc7a140aeaf336", - "nmdc:0875e5107d03a40832d15e5cf80adbbc", - "nmdc:9b60c7c905d34e08427781eafbce9b12" + "nmdc:7968c6b88e49f066bd24982b4d54965b", + "nmdc:120fbaa7439eb628d9a982de573446a8", + "nmdc:347a7ee18b37674e031cca9046e92623", + "nmdc:de1da5ea4bfdf3131a6c510b79b145c2", + "nmdc:382d00338a5e4829285e58a203de153e" ], - "was_informed_by": "gold:Gp0115670", - "input_contig_num": 152605, - "id": "nmdc:f1cbe3cc181b2e689272b4223b68c15f", + "was_informed_by": "gold:Gp0127637", + "input_contig_num": 214863, + "id": "nmdc:c6d11b07b7b33c0d906ce0d4a58a7ccf", "execution_resource": "NERSC-Cori", - "name": "MAGs Analysis Activity for nmdc:mga0d7pj22", + "name": "MAGs Analysis Activity for nmdc:mga0sb9b30", "mags_list": [ { - "number_of_contig": 118, - "completeness": 23.28, + "number_of_contig": 59, + "completeness": 8.33, "bin_name": "bins.1", - "gene_count": 572, + "gene_count": 295, "bin_quality": "LQ", "gtdbtk_species": "", "gtdbtk_order": "", @@ -15902,2095 +15051,2139 @@ "num_5s": 0, "num_23s": 0, "gtdbtk_genus": "", - "num_t_rna": 5 + "num_t_rna": 0 }, { - "number_of_contig": 151, - "completeness": 38.09, + "number_of_contig": 233, + "completeness": 45.87, "bin_name": "bins.2", - "gene_count": 725, + "gene_count": 1342, "bin_quality": "LQ", "gtdbtk_species": "", "gtdbtk_order": "", "num_16s": 0, "gtdbtk_family": "", "gtdbtk_domain": "", - "contamination": 0.0, + "contamination": 1.28, "gtdbtk_class": "", "gtdbtk_phylum": "", "num_5s": 0, "num_23s": 0, "gtdbtk_genus": "", - "num_t_rna": 9 + "num_t_rna": 18 }, { - "number_of_contig": 100, - "completeness": 99.01, + "number_of_contig": 190, + "completeness": 75.08, "bin_name": "bins.3", - "gene_count": 3233, - "bin_quality": "HQ", - "gtdbtk_species": "", - "gtdbtk_order": "Sphingomonadales", - "num_16s": 1, - "gtdbtk_family": "Sphingomonadaceae", - "gtdbtk_domain": "Bacteria", - "contamination": 1.38, - "gtdbtk_class": "Alphaproteobacteria", - "gtdbtk_phylum": "Proteobacteria", - "num_5s": 1, - "num_23s": 2, - "gtdbtk_genus": "Novosphingobium", - "num_t_rna": 47 - }, - { - "number_of_contig": 135, - "completeness": 34.24, - "bin_name": "bins.4", - "gene_count": 689, - "bin_quality": "LQ", - "gtdbtk_species": "", - "gtdbtk_order": "", - "num_16s": 0, - "gtdbtk_family": "", - "gtdbtk_domain": "", - "contamination": 0.91, - "gtdbtk_class": "", - "gtdbtk_phylum": "", - "num_5s": 0, - "num_23s": 0, - "gtdbtk_genus": "", - "num_t_rna": 6 - }, - { - "number_of_contig": 652, - "completeness": 57.14, - "bin_name": "bins.5", - "gene_count": 3635, + "gene_count": 1991, "bin_quality": "MQ", "gtdbtk_species": "", - "gtdbtk_order": "Burkholderiales", - "num_16s": 1, - "gtdbtk_family": "Burkholderiaceae", - "gtdbtk_domain": "Bacteria", - "contamination": 2.6, - "gtdbtk_class": "Gammaproteobacteria", - "gtdbtk_phylum": "Proteobacteria", + "gtdbtk_order": "Nitrososphaerales", + "num_16s": 0, + "gtdbtk_family": "Nitrososphaeraceae", + "gtdbtk_domain": "Archaea", + "contamination": 1.21, + "gtdbtk_class": "Nitrososphaeria", + "gtdbtk_phylum": "Crenarchaeota", "num_5s": 1, "num_23s": 1, - "gtdbtk_genus": "Rhizobacter", - "num_t_rna": 27 - }, - { - "number_of_contig": 105, - "completeness": 27.22, - "bin_name": "bins.6", - "gene_count": 509, - "bin_quality": "LQ", - "gtdbtk_species": "", - "gtdbtk_order": "", - "num_16s": 0, - "gtdbtk_family": "", - "gtdbtk_domain": "", - "contamination": 0.19, - "gtdbtk_class": "", - "gtdbtk_phylum": "", - "num_5s": 0, - "num_23s": 0, "gtdbtk_genus": "", - "num_t_rna": 9 + "num_t_rna": 37 } ], - "unbinned_contig_num": 8738, - "started_at_time": "2021-10-11T02:28:43Z", + "unbinned_contig_num": 14062, + "started_at_time": "2021-10-11T02:24:01Z", "type": "nmdc:MAGsAnalysisActivity", - "ended_at_time": "2021-10-11T05:55:52+00:00", - "output_data_objects": [ - { - "name": "gold:Gp0452679_metabat2 bin checkm quality assessment result", - "description": "metabat2 bin checkm quality assessment result for gold:Gp0452679", - "data_object_type": "CheckM Statistics", - "url": "https://data.microbiomedata.org/data/nmdc:mga0fsjy84/MAGs/nmdc_mga0fsjy84_checkm_qa.out", - "md5_checksum": "d41d8cd98f00b204e9800998ecf8427e", - "id": "nmdc:d41d8cd98f00b204e9800998ecf8427e", - "file_size_bytes": 0 - }, - { - "name": "Gp0115670_tooShort (< 3kb) filtered contigs fasta file by metaBat2", - "description": "tooShort (< 3kb) filtered contigs fasta file by metaBat2 for Gp0115670", - "url": "https://data.microbiomedata.org/data/nmdc:mga0d7pj22/MAGs/nmdc_mga0d7pj22_bins.tooShort.fa", - "md5_checksum": "fd5fe3f1faaaf3cd8a88d9bbfb016827", - "id": "nmdc:fd5fe3f1faaaf3cd8a88d9bbfb016827", - "file_size_bytes": 61828850 - }, - { - "name": "Gp0115670_unbinned fasta file from metabat2", - "description": "unbinned fasta file from metabat2 for Gp0115670", - "url": "https://data.microbiomedata.org/data/nmdc:mga0d7pj22/MAGs/nmdc_mga0d7pj22_bins.unbinned.fa", - "md5_checksum": "e27b736ee699ef2a8468a684811aaabd", - "id": "nmdc:e27b736ee699ef2a8468a684811aaabd", - "file_size_bytes": 15075820 - }, - { - "name": "Gp0115670_metabat2 bin checkm quality assessment result", - "description": "metabat2 bin checkm quality assessment result for Gp0115670", - "data_object_type": "CheckM Statistics", - "url": "https://data.microbiomedata.org/data/nmdc:mga0d7pj22/MAGs/nmdc_mga0d7pj22_checkm_qa.out", - "md5_checksum": "b0866d1a944aa27e34dc7a140aeaf336", - "id": "nmdc:b0866d1a944aa27e34dc7a140aeaf336", - "file_size_bytes": 1690 - }, - { - "name": "Gp0115670_high-quality and medium-quality bins", - "description": "high-quality and medium-quality bins for Gp0115670", - "data_object_type": "Metagenome Bins", - "url": "https://data.microbiomedata.org/data/nmdc:mga0d7pj22/MAGs/nmdc_mga0d7pj22_hqmq_bin.zip", - "md5_checksum": "0875e5107d03a40832d15e5cf80adbbc", - "id": "nmdc:0875e5107d03a40832d15e5cf80adbbc", - "file_size_bytes": 1944800 - }, - { - "name": "Gp0115670_metabat2 bins", - "description": "metabat2 bins for Gp0115670", - "url": "https://data.microbiomedata.org/data/nmdc:mga0d7pj22/MAGs/nmdc_mga0d7pj22_metabat_bin.zip", - "md5_checksum": "9b60c7c905d34e08427781eafbce9b12", - "id": "nmdc:9b60c7c905d34e08427781eafbce9b12", - "file_size_bytes": 658258 - } - ] + "ended_at_time": "2021-10-11T03:11:56+00:00" } - ] - }, - { - "_id": { - "$oid": "649b009773e8249959349b4b" - }, - "id": "nmdc:omprc-11-1avd3d16", - "name": "Sand microcosm microbial communities from a hyporheic zone in Columbia River, Washington, USA - GW-RW T4_21-May-14", - "description": "Sterilized sand packs were incubated back in the ground and collected at time point T4.", - "has_input": [ - "nmdc:bsm-11-5xjtzc47" - ], - "has_output": [ - "jgi:55d7402c0d8785342fcf7e3e" - ], - "part_of": [ - "nmdc:sty-11-aygzgv51" - ], - "add_date": "2015-05-28", - "mod_date": "2021-06-15", - "ncbi_project_name": "Sand microcosm microbial communities from a hyporheic zone in Columbia River, Washington, USA - GW-RW T4_21-May-14", - "omics_type": { - "has_raw_value": "Metagenome" - }, - "principal_investigator": { - "has_raw_value": "James Stegen" - }, - "processing_institution": "JGI", - "type": "nmdc:OmicsProcessing", - "gold_sequencing_project_identifiers": [ - "gold:Gp0115674" - ], - "downstream_workflow_activity_records": [ + ], + "material_sample_set": [], + "material_sampling_activity_set": [], + "metabolomics_analysis_activity_set": [], + "metagenome_annotation_activity_set": [ { "_id": { - "$oid": "649b009d6bdd4fd20273c88a" + "$oid": "649b005bbf2caae0415ef9a6" }, "has_input": [ - "nmdc:d94c174a22116c2db7ab8c47619e30aa" + "nmdc:aee81646e593045bbb32a0012870b88b" ], "part_of": [ - "nmdc:mga0cf0450" + "nmdc:mga0sb9b30" ], "git_url": "https://github.com/microbiomedata/metaG/releases/tag/0.1", "has_output": [ - "nmdc:538fd5695eb3decd48891e72acebb8ce", - "nmdc:dde2b1748e16380e63476430ee27083a" + "nmdc:69603434971f93dbd79860c18dd5c61a", + "nmdc:bf8f822c6730b4cc73715ced3d25c262", + "nmdc:b9ec0754ffaa338c899244703bc91386", + "nmdc:22402cc61770feb5a0aaa4f760808366", + "nmdc:8c96f7faa38c361acc247b5a107a6b54", + "nmdc:7a28d1eafd3a3c181e95f61eb3d18bf1", + "nmdc:89a8657f659710b3927baab155917fdf", + "nmdc:9b9ecf34f2f6ef6865d4864f5debfbb7", + "nmdc:5cae6736713d02ccbe26543d733875cb", + "nmdc:a64350eb947c199cc1fbfb087191c0c7", + "nmdc:b8492828a1ad078d9c3192bab4d9a3fa", + "nmdc:2471f27b6cf11b6f93c791c273989731" ], - "was_informed_by": "gold:Gp0115674", - "input_read_count": 26546332, - "output_read_bases": 3862169938, - "id": "nmdc:954288cfef2de46d4b895fac3811a7d0", + "was_informed_by": "gold:Gp0127637", + "id": "nmdc:c6d11b07b7b33c0d906ce0d4a58a7ccf", "execution_resource": "NERSC-Cori", - "input_read_bases": 4008496132, - "name": "Read QC Activity for nmdc:mga0cf0450", - "output_read_count": 25776010, - "started_at_time": "2021-10-11T02:28:52Z", - "type": "nmdc:ReadQCAnalysisActivity", - "ended_at_time": "2021-10-11T05:21:41+00:00", - "output_data_objects": [ - { - "name": "Gp0115674_Filtered Reads", - "description": "Filtered Reads for Gp0115674", - "data_object_type": "Filtered Sequencing Reads", - "url": "https://data.microbiomedata.org/data/nmdc:mga0cf0450/qa/nmdc_mga0cf0450_filtered.fastq.gz", - "md5_checksum": "538fd5695eb3decd48891e72acebb8ce", - "id": "nmdc:538fd5695eb3decd48891e72acebb8ce", - "file_size_bytes": 2126353222 - }, - { - "name": "Gp0115674_Filtered Stats", - "description": "Filtered Stats for Gp0115674", - "data_object_type": "QC Statistics", - "url": "https://data.microbiomedata.org/data/nmdc:mga0cf0450/qa/nmdc_mga0cf0450_filterStats.txt", - "md5_checksum": "dde2b1748e16380e63476430ee27083a", - "id": "nmdc:dde2b1748e16380e63476430ee27083a", - "file_size_bytes": 288 - } - ] - }, + "name": "Annotation Activity for nmdc:mga0sb9b30", + "started_at_time": "2021-10-11T02:24:01Z", + "type": "nmdc:MetagenomeAnnotationActivity", + "ended_at_time": "2021-10-11T03:11:56+00:00" + } + ], + "metagenome_assembly_set": [ { "_id": { - "$oid": "649b009bff710ae353f8cf50" + "$oid": "649b005f2ca5ee4adb139f93" }, "has_input": [ - "nmdc:538fd5695eb3decd48891e72acebb8ce" + "nmdc:805310f4b1e39a0cc9e5b5787576cb8b" ], + "part_of": [ + "nmdc:mga0sb9b30" + ], + "ctg_logsum": 271617, + "scaf_logsum": 272416, + "gap_pct": 0.00166, "git_url": "https://github.com/microbiomedata/metaG/releases/tag/0.1", "has_output": [ - "nmdc:7d6ec08ff0d080997fda7c7417f9c3d4", - "nmdc:df0dfd58dc386f5e0ded0b65b4a88c58", - "nmdc:ce3f31985e0a99f97bd4751bc2469bcb", - "nmdc:f8740b1fadbc29aef50d32706c955199", - "nmdc:80abfcc9b09476af4083b2af1760834f", - "nmdc:f189624af50d8d62908f8ddd5f3451ad", - "nmdc:09302fbc8e30758a95fac09ee5cfd449", - "nmdc:e44f717fc6f3458c17b4f5129a5e7920", - "nmdc:19eb52a96c1dedc9036ec9a0aaeda079" + "nmdc:aee81646e593045bbb32a0012870b88b", + "nmdc:f1026db242cad285204c9c3d6307c183", + "nmdc:b02b0a0145d14e97a31e6a6f7e4b8dc8", + "nmdc:8afcf1e8b7b3f35edaefee7a0c31e19f", + "nmdc:dee5fa37f57a24685b65e00380d6e433" ], - "was_informed_by": "gold:Gp0115674", - "id": "nmdc:954288cfef2de46d4b895fac3811a7d0", + "asm_score": 5.062, + "was_informed_by": "gold:Gp0127637", + "ctg_powsum": 29885, + "scaf_max": 43650, + "id": "nmdc:c6d11b07b7b33c0d906ce0d4a58a7ccf", + "scaf_powsum": 29983, "execution_resource": "NERSC-Cori", - "name": "ReadBased Analysis Activity for nmdc:mga0cf0450", - "started_at_time": "2021-10-11T02:28:52Z", - "type": "nmdc:ReadbasedAnalysis", - "ended_at_time": "2021-10-11T05:21:41+00:00", - "output_data_objects": [ - { - "name": "Gp0115674_Gottcha2 TSV report", - "description": "Gottcha2 TSV report for Gp0115674", - "url": "https://data.microbiomedata.org/data/nmdc:mga0cf0450/ReadbasedAnalysis/nmdc_mga0cf0450_gottcha2_report.tsv", - "md5_checksum": "7d6ec08ff0d080997fda7c7417f9c3d4", - "id": "nmdc:7d6ec08ff0d080997fda7c7417f9c3d4", - "file_size_bytes": 13768 - }, - { - "name": "Gp0115674_Gottcha2 full TSV report", - "description": "Gottcha2 full TSV report for Gp0115674", - "url": "https://data.microbiomedata.org/data/nmdc:mga0cf0450/ReadbasedAnalysis/nmdc_mga0cf0450_gottcha2_report_full.tsv", - "md5_checksum": "df0dfd58dc386f5e0ded0b65b4a88c58", - "id": "nmdc:df0dfd58dc386f5e0ded0b65b4a88c58", - "file_size_bytes": 1022858 - }, - { - "name": "Gp0115674_Gottcha2 Krona HTML report", - "description": "Gottcha2 Krona HTML report for Gp0115674", - "data_object_type": "GOTTCHA2 Krona Plot", - "url": "https://data.microbiomedata.org/data/nmdc:mga0cf0450/ReadbasedAnalysis/nmdc_mga0cf0450_gottcha2_krona.html", - "md5_checksum": "ce3f31985e0a99f97bd4751bc2469bcb", - "id": "nmdc:ce3f31985e0a99f97bd4751bc2469bcb", - "file_size_bytes": 269166 - }, - { - "name": "Gp0115674_Centrifuge classification TSV report", - "description": "Centrifuge classification TSV report for Gp0115674", - "data_object_type": "Centrifuge Taxonomic Classification", - "url": "https://data.microbiomedata.org/data/nmdc:mga0cf0450/ReadbasedAnalysis/nmdc_mga0cf0450_centrifuge_classification.tsv", - "md5_checksum": "f8740b1fadbc29aef50d32706c955199", - "id": "nmdc:f8740b1fadbc29aef50d32706c955199", - "file_size_bytes": 1904303690 - }, - { - "name": "Gp0115674_Centrifuge TSV report", - "description": "Centrifuge TSV report for Gp0115674", - "data_object_type": "Centrifuge Classification Report", - "url": "https://data.microbiomedata.org/data/nmdc:mga0cf0450/ReadbasedAnalysis/nmdc_mga0cf0450_centrifuge_report.tsv", - "md5_checksum": "80abfcc9b09476af4083b2af1760834f", - "id": "nmdc:80abfcc9b09476af4083b2af1760834f", - "file_size_bytes": 258748 - }, - { - "name": "Gp0115674_Centrifuge Krona HTML report", - "description": "Centrifuge Krona HTML report for Gp0115674", - "data_object_type": "Centrifuge Krona Plot", - "url": "https://data.microbiomedata.org/data/nmdc:mga0cf0450/ReadbasedAnalysis/nmdc_mga0cf0450_centrifuge_krona.html", - "md5_checksum": "f189624af50d8d62908f8ddd5f3451ad", - "id": "nmdc:f189624af50d8d62908f8ddd5f3451ad", - "file_size_bytes": 2335000 - }, - { - "name": "Gp0115674_Kraken classification TSV report", - "description": "Kraken classification TSV report for Gp0115674", - "data_object_type": "Kraken2 Taxonomic Classification", - "url": "https://data.microbiomedata.org/data/nmdc:mga0cf0450/ReadbasedAnalysis/nmdc_mga0cf0450_kraken2_classification.tsv", - "md5_checksum": "09302fbc8e30758a95fac09ee5cfd449", - "id": "nmdc:09302fbc8e30758a95fac09ee5cfd449", - "file_size_bytes": 1574286150 - }, - { - "name": "Gp0115674_Kraken2 TSV report", - "description": "Kraken2 TSV report for Gp0115674", - "data_object_type": "Kraken2 Classification Report", - "url": "https://data.microbiomedata.org/data/nmdc:mga0cf0450/ReadbasedAnalysis/nmdc_mga0cf0450_kraken2_report.tsv", - "md5_checksum": "e44f717fc6f3458c17b4f5129a5e7920", - "id": "nmdc:e44f717fc6f3458c17b4f5129a5e7920", - "file_size_bytes": 671800 - }, - { - "name": "Gp0115674_Kraken2 Krona HTML report", - "description": "Kraken2 Krona HTML report for Gp0115674", - "data_object_type": "Kraken2 Krona Plot", - "url": "https://data.microbiomedata.org/data/nmdc:mga0cf0450/ReadbasedAnalysis/nmdc_mga0cf0450_kraken2_krona.html", - "md5_checksum": "19eb52a96c1dedc9036ec9a0aaeda079", - "id": "nmdc:19eb52a96c1dedc9036ec9a0aaeda079", - "file_size_bytes": 4070548 - } + "contigs": 214863, + "name": "Assembly Activity for nmdc:mga0sb9b30", + "ctg_max": 43650, + "gc_std": 0.08814, + "contig_bp": 108739484, + "gc_avg": 0.63266, + "started_at_time": "2021-10-11T02:24:01Z", + "scaf_bp": 108741284, + "type": "nmdc:MetagenomeAssembly", + "scaffolds": 214737, + "ended_at_time": "2021-10-11T03:11:56+00:00", + "ctg_l50": 505, + "ctg_l90": 294, + "ctg_n50": 58474, + "ctg_n90": 177521, + "scaf_l50": 505, + "scaf_l90": 294, + "scaf_n50": 58469, + "scaf_n90": 177412 + } + ], + "metagenome_sequencing_activity_set": [], + "metaproteomics_analysis_activity_set": [], + "metatranscriptome_activity_set": [], + "nom_analysis_activity_set": [], + "omics_processing_set": [ + { + "_id": { + "$oid": "649b009773e8249959349b48" + }, + "id": "nmdc:omprc-11-c8dzx197", + "name": "Riverbed sediment microbial communities from areas with vegetation nearby in Columbia River, Washington, USA - GW-RW S1_0_10", + "description": "Riverbed sediment samples were collected from an area with a lot of surrounding vegetation in a hyporheic zone (subsurface groundwater - surface water mixing zone)", + "has_input": [ + "nmdc:bsm-11-kwfbp795" + ], + "has_output": [ + "jgi:574fde647ded5e3df1ee1406" + ], + "part_of": [ + "nmdc:sty-11-aygzgv51" + ], + "add_date": "2016-01-11", + "mod_date": "2021-06-15", + "ncbi_project_name": "Riverbed sediment microbial communities from areas with vegetation nearby in Columbia River, Washington, USA - GW-RW S1_0_10", + "omics_type": { + "has_raw_value": "Metagenome" + }, + "principal_investigator": { + "has_raw_value": "James Stegen" + }, + "processing_institution": "JGI", + "type": "nmdc:OmicsProcessing", + "gold_sequencing_project_identifiers": [ + "gold:Gp0127637" ] - }, + } + ], + "reaction_activity_set": [], + "read_qc_analysis_activity_set": [ { "_id": { - "$oid": "61e71a31833bcf838a701e57" + "$oid": "649b009d6bdd4fd20273c85f" }, "has_input": [ - "nmdc:538fd5695eb3decd48891e72acebb8ce" + "nmdc:320ac579913ecc4c218607b6b3b915b3" ], "part_of": [ - "nmdc:mga0cf0450" + "nmdc:mga0sb9b30" ], "git_url": "https://github.com/microbiomedata/metaG/releases/tag/0.1", "has_output": [ - "nmdc:7d6ec08ff0d080997fda7c7417f9c3d4", - "nmdc:df0dfd58dc386f5e0ded0b65b4a88c58", - "nmdc:ce3f31985e0a99f97bd4751bc2469bcb", - "nmdc:f8740b1fadbc29aef50d32706c955199", - "nmdc:80abfcc9b09476af4083b2af1760834f", - "nmdc:f189624af50d8d62908f8ddd5f3451ad", - "nmdc:09302fbc8e30758a95fac09ee5cfd449", - "nmdc:e44f717fc6f3458c17b4f5129a5e7920", - "nmdc:19eb52a96c1dedc9036ec9a0aaeda079" + "nmdc:805310f4b1e39a0cc9e5b5787576cb8b", + "nmdc:611e67df261e050860b1075c6a6a5ff5" ], - "was_informed_by": "gold:Gp0115674", - "id": "nmdc:954288cfef2de46d4b895fac3811a7d0", + "was_informed_by": "gold:Gp0127637", + "input_read_count": 24239336, + "output_read_bases": 2975652755, + "id": "nmdc:c6d11b07b7b33c0d906ce0d4a58a7ccf", "execution_resource": "NERSC-Cori", - "name": "ReadBased Analysis Activity for nmdc:mga0cf0450", - "started_at_time": "2021-10-11T02:28:52Z", + "input_read_bases": 3660139736, + "name": "Read QC Activity for nmdc:mga0sb9b30", + "output_read_count": 19917090, + "started_at_time": "2021-10-11T02:24:01Z", + "type": "nmdc:ReadQCAnalysisActivity", + "ended_at_time": "2021-10-11T03:11:56+00:00" + } + ], + "read_based_taxonomy_analysis_activity_set": [ + { + "_id": { + "$oid": "649b009bff710ae353f8cf23" + }, + "has_input": [ + "nmdc:805310f4b1e39a0cc9e5b5787576cb8b" + ], + "git_url": "https://github.com/microbiomedata/metaG/releases/tag/0.1", + "has_output": [ + "nmdc:9268e073dacb7f7cd5f9513393cb0b2a", + "nmdc:37dd1d73ad47979ee5284830d27df535", + "nmdc:43bffbfb830c6e3ccc140ec0dff1e773", + "nmdc:cb3bd5ca5088484cb4e580ad91d736b2", + "nmdc:f44a5d59785cdededea0fe4a6a429c30", + "nmdc:81a6efbd082e07bc2db174a88d64a272", + "nmdc:f63856a84bc9afb8954ccdb1803d5fde", + "nmdc:9a1826f66ee45187d627076d11dc491f", + "nmdc:67adb9cc2c75251f556a90b1a959ea72" + ], + "was_informed_by": "gold:Gp0127637", + "id": "nmdc:c6d11b07b7b33c0d906ce0d4a58a7ccf", + "execution_resource": "NERSC-Cori", + "name": "ReadBased Analysis Activity for nmdc:mga0sb9b30", + "started_at_time": "2021-10-11T02:24:01Z", "type": "nmdc:ReadbasedAnalysis", - "ended_at_time": "2021-10-11T05:21:41+00:00", - "output_data_objects": [ - { - "name": "Gp0115674_Gottcha2 TSV report", - "description": "Gottcha2 TSV report for Gp0115674", - "url": "https://data.microbiomedata.org/data/nmdc:mga0cf0450/ReadbasedAnalysis/nmdc_mga0cf0450_gottcha2_report.tsv", - "md5_checksum": "7d6ec08ff0d080997fda7c7417f9c3d4", - "id": "nmdc:7d6ec08ff0d080997fda7c7417f9c3d4", - "file_size_bytes": 13768 - }, - { - "name": "Gp0115674_Gottcha2 full TSV report", - "description": "Gottcha2 full TSV report for Gp0115674", - "url": "https://data.microbiomedata.org/data/nmdc:mga0cf0450/ReadbasedAnalysis/nmdc_mga0cf0450_gottcha2_report_full.tsv", - "md5_checksum": "df0dfd58dc386f5e0ded0b65b4a88c58", - "id": "nmdc:df0dfd58dc386f5e0ded0b65b4a88c58", - "file_size_bytes": 1022858 - }, - { - "name": "Gp0115674_Gottcha2 Krona HTML report", - "description": "Gottcha2 Krona HTML report for Gp0115674", - "data_object_type": "GOTTCHA2 Krona Plot", - "url": "https://data.microbiomedata.org/data/nmdc:mga0cf0450/ReadbasedAnalysis/nmdc_mga0cf0450_gottcha2_krona.html", - "md5_checksum": "ce3f31985e0a99f97bd4751bc2469bcb", - "id": "nmdc:ce3f31985e0a99f97bd4751bc2469bcb", - "file_size_bytes": 269166 - }, - { - "name": "Gp0115674_Centrifuge classification TSV report", - "description": "Centrifuge classification TSV report for Gp0115674", - "data_object_type": "Centrifuge Taxonomic Classification", - "url": "https://data.microbiomedata.org/data/nmdc:mga0cf0450/ReadbasedAnalysis/nmdc_mga0cf0450_centrifuge_classification.tsv", - "md5_checksum": "f8740b1fadbc29aef50d32706c955199", - "id": "nmdc:f8740b1fadbc29aef50d32706c955199", - "file_size_bytes": 1904303690 - }, - { - "name": "Gp0115674_Centrifuge TSV report", - "description": "Centrifuge TSV report for Gp0115674", - "data_object_type": "Centrifuge Classification Report", - "url": "https://data.microbiomedata.org/data/nmdc:mga0cf0450/ReadbasedAnalysis/nmdc_mga0cf0450_centrifuge_report.tsv", - "md5_checksum": "80abfcc9b09476af4083b2af1760834f", - "id": "nmdc:80abfcc9b09476af4083b2af1760834f", - "file_size_bytes": 258748 - }, - { - "name": "Gp0115674_Centrifuge Krona HTML report", - "description": "Centrifuge Krona HTML report for Gp0115674", - "data_object_type": "Centrifuge Krona Plot", - "url": "https://data.microbiomedata.org/data/nmdc:mga0cf0450/ReadbasedAnalysis/nmdc_mga0cf0450_centrifuge_krona.html", - "md5_checksum": "f189624af50d8d62908f8ddd5f3451ad", - "id": "nmdc:f189624af50d8d62908f8ddd5f3451ad", - "file_size_bytes": 2335000 - }, - { - "name": "Gp0115674_Kraken classification TSV report", - "description": "Kraken classification TSV report for Gp0115674", - "data_object_type": "Kraken2 Taxonomic Classification", - "url": "https://data.microbiomedata.org/data/nmdc:mga0cf0450/ReadbasedAnalysis/nmdc_mga0cf0450_kraken2_classification.tsv", - "md5_checksum": "09302fbc8e30758a95fac09ee5cfd449", - "id": "nmdc:09302fbc8e30758a95fac09ee5cfd449", - "file_size_bytes": 1574286150 - }, - { - "name": "Gp0115674_Kraken2 TSV report", - "description": "Kraken2 TSV report for Gp0115674", - "data_object_type": "Kraken2 Classification Report", - "url": "https://data.microbiomedata.org/data/nmdc:mga0cf0450/ReadbasedAnalysis/nmdc_mga0cf0450_kraken2_report.tsv", - "md5_checksum": "e44f717fc6f3458c17b4f5129a5e7920", - "id": "nmdc:e44f717fc6f3458c17b4f5129a5e7920", - "file_size_bytes": 671800 - }, - { - "name": "Gp0115674_Kraken2 Krona HTML report", - "description": "Kraken2 Krona HTML report for Gp0115674", - "data_object_type": "Kraken2 Krona Plot", - "url": "https://data.microbiomedata.org/data/nmdc:mga0cf0450/ReadbasedAnalysis/nmdc_mga0cf0450_kraken2_krona.html", - "md5_checksum": "19eb52a96c1dedc9036ec9a0aaeda079", - "id": "nmdc:19eb52a96c1dedc9036ec9a0aaeda079", - "file_size_bytes": 4070548 - } - ] - }, + "ended_at_time": "2021-10-11T03:11:56+00:00" + } + ], + "study_set": [], + "field_research_site_set": [], + "collecting_biosamples_from_site_set": [], + "date_created": null, + "etl_software_version": null, + "pooling_set": [], + "read_based_analysis_activity_set": [ { "_id": { - "$oid": "649b005f2ca5ee4adb139fb6" + "$oid": "61e7191f833bcf838a6ffa50" }, "has_input": [ - "nmdc:538fd5695eb3decd48891e72acebb8ce" + "nmdc:805310f4b1e39a0cc9e5b5787576cb8b" ], "part_of": [ - "nmdc:mga0cf0450" + "nmdc:mga0sb9b30" ], - "ctg_logsum": 272042, - "scaf_logsum": 272657, - "gap_pct": 0.00172, "git_url": "https://github.com/microbiomedata/metaG/releases/tag/0.1", "has_output": [ - "nmdc:ed2e4b90c8c2947486cc5c3c5828f949", - "nmdc:e8fa9ae5e04a2969d220d81f1fb752f2", - "nmdc:5f308ea3cb43a331cda55ac9f91c6a53", - "nmdc:604ed99b7c622082ddf174bb11d2787f", - "nmdc:a0263d8b11653306a05f598395ca603a" + "nmdc:9268e073dacb7f7cd5f9513393cb0b2a", + "nmdc:37dd1d73ad47979ee5284830d27df535", + "nmdc:43bffbfb830c6e3ccc140ec0dff1e773", + "nmdc:cb3bd5ca5088484cb4e580ad91d736b2", + "nmdc:f44a5d59785cdededea0fe4a6a429c30", + "nmdc:81a6efbd082e07bc2db174a88d64a272", + "nmdc:f63856a84bc9afb8954ccdb1803d5fde", + "nmdc:9a1826f66ee45187d627076d11dc491f", + "nmdc:67adb9cc2c75251f556a90b1a959ea72" ], - "asm_score": 18.19, - "was_informed_by": "gold:Gp0115674", - "ctg_powsum": 36133, - "scaf_max": 176505, - "id": "nmdc:954288cfef2de46d4b895fac3811a7d0", - "scaf_powsum": 36239, + "was_informed_by": "gold:Gp0127637", + "id": "nmdc:c6d11b07b7b33c0d906ce0d4a58a7ccf", "execution_resource": "NERSC-Cori", - "contigs": 139326, - "name": "Assembly Activity for nmdc:mga0cf0450", - "ctg_max": 176505, - "gc_std": 0.12397, - "contig_bp": 73195425, - "gc_avg": 0.56886, - "started_at_time": "2021-10-11T02:28:52Z", - "scaf_bp": 73196685, - "type": "nmdc:MetagenomeAssembly", - "scaffolds": 139236, - "ended_at_time": "2021-10-11T05:21:41+00:00", - "ctg_l50": 481, - "ctg_l90": 290, - "ctg_n50": 30768, - "ctg_n90": 115008, - "scaf_l50": 482, - "scaf_l90": 290, - "scaf_n50": 30582, - "scaf_n90": 114932, - "scaf_l_gt50k": 2506146, - "scaf_n_gt50k": 32, - "scaf_pct_gt50k": 3.4238515, - "output_data_objects": [ - { - "name": "Gp0115674_Assembled contigs fasta", - "description": "Assembled contigs fasta for Gp0115674", - "data_object_type": "Assembly Contigs", - "url": "https://data.microbiomedata.org/data/nmdc:mga0cf0450/assembly/nmdc_mga0cf0450_contigs.fna", - "md5_checksum": "ed2e4b90c8c2947486cc5c3c5828f949", - "id": "nmdc:ed2e4b90c8c2947486cc5c3c5828f949", - "file_size_bytes": 78686505 - }, - { - "name": "Gp0115674_Assembled scaffold fasta", - "description": "Assembled scaffold fasta for Gp0115674", - "data_object_type": "Assembly Scaffolds", - "url": "https://data.microbiomedata.org/data/nmdc:mga0cf0450/assembly/nmdc_mga0cf0450_scaffolds.fna", - "md5_checksum": "e8fa9ae5e04a2969d220d81f1fb752f2", - "id": "nmdc:e8fa9ae5e04a2969d220d81f1fb752f2", - "file_size_bytes": 78267725 - }, - { - "name": "Gp0115674_Metagenome Contig Coverage Stats", - "description": "Metagenome Contig Coverage Stats for Gp0115674", - "url": "https://data.microbiomedata.org/data/nmdc:mga0cf0450/assembly/nmdc_mga0cf0450_covstats.txt", - "md5_checksum": "5f308ea3cb43a331cda55ac9f91c6a53", - "id": "nmdc:5f308ea3cb43a331cda55ac9f91c6a53", - "file_size_bytes": 10980044 - }, - { - "name": "Gp0115674_Assembled AGP file", - "description": "Assembled AGP file for Gp0115674", - "data_object_type": "Assembly AGP", - "url": "https://data.microbiomedata.org/data/nmdc:mga0cf0450/assembly/nmdc_mga0cf0450_assembly.agp", - "md5_checksum": "604ed99b7c622082ddf174bb11d2787f", - "id": "nmdc:604ed99b7c622082ddf174bb11d2787f", - "file_size_bytes": 10249514 - }, - { - "name": "Gp0115674_Metagenome Alignment BAM file", - "description": "Metagenome Alignment BAM file for Gp0115674", - "data_object_type": "Assembly Coverage BAM", - "url": "https://data.microbiomedata.org/data/nmdc:mga0cf0450/assembly/nmdc_mga0cf0450_pairedMapped_sorted.bam", - "md5_checksum": "a0263d8b11653306a05f598395ca603a", - "id": "nmdc:a0263d8b11653306a05f598395ca603a", - "file_size_bytes": 2304306876 - } - ] + "name": "ReadBased Analysis Activity for nmdc:mga0sb9b30", + "started_at_time": "2021-10-11T02:24:01Z", + "type": "nmdc:ReadbasedAnalysis", + "ended_at_time": "2021-10-11T03:11:56+00:00" + } + ] + }, + { + "functional_annotation_agg": [], + "library_preparation_set": [], + "processed_sample_set": [], + "extraction_set": [], + "activity_set": [], + "biosample_set": [], + "data_object_set": [ + { + "name": "Gp0127638_Filtered Reads", + "description": "Filtered Reads for Gp0127638", + "data_object_type": "Filtered Sequencing Reads", + "url": "https://data.microbiomedata.org/data/nmdc:mga0hjgc20/qa/nmdc_mga0hjgc20_filtered.fastq.gz", + "md5_checksum": "56ba2416c050decd6c16c618c1e4a752", + "id": "nmdc:56ba2416c050decd6c16c618c1e4a752", + "file_size_bytes": 1649318115 + }, + { + "name": "Gp0127638_Filtered Stats", + "description": "Filtered Stats for Gp0127638", + "data_object_type": "QC Statistics", + "url": "https://data.microbiomedata.org/data/nmdc:mga0hjgc20/qa/nmdc_mga0hjgc20_filterStats.txt", + "md5_checksum": "5c9398042e9ff608befa78e86597bdf0", + "id": "nmdc:5c9398042e9ff608befa78e86597bdf0", + "file_size_bytes": 283 + }, + { + "name": "Gp0127638_Gottcha2 TSV report", + "description": "Gottcha2 TSV report for Gp0127638", + "url": "https://data.microbiomedata.org/data/nmdc:mga0hjgc20/ReadbasedAnalysis/nmdc_mga0hjgc20_gottcha2_report.tsv", + "md5_checksum": "dbbd6ca6777b71d1fac4aae2cd947deb", + "id": "nmdc:dbbd6ca6777b71d1fac4aae2cd947deb", + "file_size_bytes": 2025 + }, + { + "name": "Gp0127638_Gottcha2 full TSV report", + "description": "Gottcha2 full TSV report for Gp0127638", + "url": "https://data.microbiomedata.org/data/nmdc:mga0hjgc20/ReadbasedAnalysis/nmdc_mga0hjgc20_gottcha2_report_full.tsv", + "md5_checksum": "b6de56746a284f8226dd86817c8ae04e", + "id": "nmdc:b6de56746a284f8226dd86817c8ae04e", + "file_size_bytes": 655633 + }, + { + "name": "Gp0127638_Gottcha2 Krona HTML report", + "description": "Gottcha2 Krona HTML report for Gp0127638", + "data_object_type": "GOTTCHA2 Krona Plot", + "url": "https://data.microbiomedata.org/data/nmdc:mga0hjgc20/ReadbasedAnalysis/nmdc_mga0hjgc20_gottcha2_krona.html", + "md5_checksum": "d9572e708af9f0a06e98cfddfb298359", + "id": "nmdc:d9572e708af9f0a06e98cfddfb298359", + "file_size_bytes": 232133 + }, + { + "name": "Gp0127638_Centrifuge classification TSV report", + "description": "Centrifuge classification TSV report for Gp0127638", + "data_object_type": "Centrifuge Taxonomic Classification", + "url": "https://data.microbiomedata.org/data/nmdc:mga0hjgc20/ReadbasedAnalysis/nmdc_mga0hjgc20_centrifuge_classification.tsv", + "md5_checksum": "e9946f36795474182b7759d3d7532b57", + "id": "nmdc:e9946f36795474182b7759d3d7532b57", + "file_size_bytes": 1448205544 + }, + { + "name": "Gp0127638_Centrifuge TSV report", + "description": "Centrifuge TSV report for Gp0127638", + "data_object_type": "Centrifuge Classification Report", + "url": "https://data.microbiomedata.org/data/nmdc:mga0hjgc20/ReadbasedAnalysis/nmdc_mga0hjgc20_centrifuge_report.tsv", + "md5_checksum": "33ff1d85d17d763afc9e21e481cc10d2", + "id": "nmdc:33ff1d85d17d763afc9e21e481cc10d2", + "file_size_bytes": 253872 + }, + { + "name": "Gp0127638_Centrifuge Krona HTML report", + "description": "Centrifuge Krona HTML report for Gp0127638", + "data_object_type": "Centrifuge Krona Plot", + "url": "https://data.microbiomedata.org/data/nmdc:mga0hjgc20/ReadbasedAnalysis/nmdc_mga0hjgc20_centrifuge_krona.html", + "md5_checksum": "997a66f49a232750bd7132639f3387e7", + "id": "nmdc:997a66f49a232750bd7132639f3387e7", + "file_size_bytes": 2331772 + }, + { + "name": "Gp0127638_Kraken classification TSV report", + "description": "Kraken classification TSV report for Gp0127638", + "data_object_type": "Kraken2 Taxonomic Classification", + "url": "https://data.microbiomedata.org/data/nmdc:mga0hjgc20/ReadbasedAnalysis/nmdc_mga0hjgc20_kraken2_classification.tsv", + "md5_checksum": "d3f604a59babf001839d38a617b62931", + "id": "nmdc:d3f604a59babf001839d38a617b62931", + "file_size_bytes": 1157365410 + }, + { + "name": "Gp0127638_Kraken2 TSV report", + "description": "Kraken2 TSV report for Gp0127638", + "data_object_type": "Kraken2 Classification Report", + "url": "https://data.microbiomedata.org/data/nmdc:mga0hjgc20/ReadbasedAnalysis/nmdc_mga0hjgc20_kraken2_report.tsv", + "md5_checksum": "3abfaa434ee1449cbbb69985e48488b4", + "id": "nmdc:3abfaa434ee1449cbbb69985e48488b4", + "file_size_bytes": 621484 + }, + { + "name": "Gp0127638_Kraken2 Krona HTML report", + "description": "Kraken2 Krona HTML report for Gp0127638", + "data_object_type": "Kraken2 Krona Plot", + "url": "https://data.microbiomedata.org/data/nmdc:mga0hjgc20/ReadbasedAnalysis/nmdc_mga0hjgc20_kraken2_krona.html", + "md5_checksum": "70c2fc1a2c7c0032528ff91ad1576465", + "id": "nmdc:70c2fc1a2c7c0032528ff91ad1576465", + "file_size_bytes": 3896830 + }, + { + "name": "Gp0127638_Gottcha2 TSV report", + "description": "Gottcha2 TSV report for Gp0127638", + "url": "https://data.microbiomedata.org/data/nmdc:mga0hjgc20/ReadbasedAnalysis/nmdc_mga0hjgc20_gottcha2_report.tsv", + "md5_checksum": "dbbd6ca6777b71d1fac4aae2cd947deb", + "id": "nmdc:dbbd6ca6777b71d1fac4aae2cd947deb", + "file_size_bytes": 2025 + }, + { + "name": "Gp0127638_Gottcha2 full TSV report", + "description": "Gottcha2 full TSV report for Gp0127638", + "url": "https://data.microbiomedata.org/data/nmdc:mga0hjgc20/ReadbasedAnalysis/nmdc_mga0hjgc20_gottcha2_report_full.tsv", + "md5_checksum": "b6de56746a284f8226dd86817c8ae04e", + "id": "nmdc:b6de56746a284f8226dd86817c8ae04e", + "file_size_bytes": 655633 + }, + { + "name": "Gp0127638_Gottcha2 Krona HTML report", + "description": "Gottcha2 Krona HTML report for Gp0127638", + "data_object_type": "GOTTCHA2 Krona Plot", + "url": "https://data.microbiomedata.org/data/nmdc:mga0hjgc20/ReadbasedAnalysis/nmdc_mga0hjgc20_gottcha2_krona.html", + "md5_checksum": "d9572e708af9f0a06e98cfddfb298359", + "id": "nmdc:d9572e708af9f0a06e98cfddfb298359", + "file_size_bytes": 232133 + }, + { + "name": "Gp0127638_Centrifuge classification TSV report", + "description": "Centrifuge classification TSV report for Gp0127638", + "data_object_type": "Centrifuge Taxonomic Classification", + "url": "https://data.microbiomedata.org/data/nmdc:mga0hjgc20/ReadbasedAnalysis/nmdc_mga0hjgc20_centrifuge_classification.tsv", + "md5_checksum": "e9946f36795474182b7759d3d7532b57", + "id": "nmdc:e9946f36795474182b7759d3d7532b57", + "file_size_bytes": 1448205544 + }, + { + "name": "Gp0127638_Centrifuge TSV report", + "description": "Centrifuge TSV report for Gp0127638", + "data_object_type": "Centrifuge Classification Report", + "url": "https://data.microbiomedata.org/data/nmdc:mga0hjgc20/ReadbasedAnalysis/nmdc_mga0hjgc20_centrifuge_report.tsv", + "md5_checksum": "33ff1d85d17d763afc9e21e481cc10d2", + "id": "nmdc:33ff1d85d17d763afc9e21e481cc10d2", + "file_size_bytes": 253872 + }, + { + "name": "Gp0127638_Centrifuge Krona HTML report", + "description": "Centrifuge Krona HTML report for Gp0127638", + "data_object_type": "Centrifuge Krona Plot", + "url": "https://data.microbiomedata.org/data/nmdc:mga0hjgc20/ReadbasedAnalysis/nmdc_mga0hjgc20_centrifuge_krona.html", + "md5_checksum": "997a66f49a232750bd7132639f3387e7", + "id": "nmdc:997a66f49a232750bd7132639f3387e7", + "file_size_bytes": 2331772 + }, + { + "name": "Gp0127638_Kraken classification TSV report", + "description": "Kraken classification TSV report for Gp0127638", + "data_object_type": "Kraken2 Taxonomic Classification", + "url": "https://data.microbiomedata.org/data/nmdc:mga0hjgc20/ReadbasedAnalysis/nmdc_mga0hjgc20_kraken2_classification.tsv", + "md5_checksum": "d3f604a59babf001839d38a617b62931", + "id": "nmdc:d3f604a59babf001839d38a617b62931", + "file_size_bytes": 1157365410 + }, + { + "name": "Gp0127638_Kraken2 TSV report", + "description": "Kraken2 TSV report for Gp0127638", + "data_object_type": "Kraken2 Classification Report", + "url": "https://data.microbiomedata.org/data/nmdc:mga0hjgc20/ReadbasedAnalysis/nmdc_mga0hjgc20_kraken2_report.tsv", + "md5_checksum": "3abfaa434ee1449cbbb69985e48488b4", + "id": "nmdc:3abfaa434ee1449cbbb69985e48488b4", + "file_size_bytes": 621484 + }, + { + "name": "Gp0127638_Kraken2 Krona HTML report", + "description": "Kraken2 Krona HTML report for Gp0127638", + "data_object_type": "Kraken2 Krona Plot", + "url": "https://data.microbiomedata.org/data/nmdc:mga0hjgc20/ReadbasedAnalysis/nmdc_mga0hjgc20_kraken2_krona.html", + "md5_checksum": "70c2fc1a2c7c0032528ff91ad1576465", + "id": "nmdc:70c2fc1a2c7c0032528ff91ad1576465", + "file_size_bytes": 3896830 + }, + { + "name": "Gp0127638_Assembled contigs fasta", + "description": "Assembled contigs fasta for Gp0127638", + "data_object_type": "Assembly Contigs", + "url": "https://data.microbiomedata.org/data/nmdc:mga0hjgc20/assembly/nmdc_mga0hjgc20_contigs.fna", + "md5_checksum": "5122503797ac0ed9694a6f4feecab955", + "id": "nmdc:5122503797ac0ed9694a6f4feecab955", + "file_size_bytes": 84307064 + }, + { + "name": "Gp0127638_Assembled scaffold fasta", + "description": "Assembled scaffold fasta for Gp0127638", + "data_object_type": "Assembly Scaffolds", + "url": "https://data.microbiomedata.org/data/nmdc:mga0hjgc20/assembly/nmdc_mga0hjgc20_scaffolds.fna", + "md5_checksum": "d7ee4628101b11bc5fb67d961a4e1a0a", + "id": "nmdc:d7ee4628101b11bc5fb67d961a4e1a0a", + "file_size_bytes": 83796938 + }, + { + "name": "Gp0127638_Metagenome Contig Coverage Stats", + "description": "Metagenome Contig Coverage Stats for Gp0127638", + "url": "https://data.microbiomedata.org/data/nmdc:mga0hjgc20/assembly/nmdc_mga0hjgc20_covstats.txt", + "md5_checksum": "0944f2c0dd70a751117fb10d9a41fddc", + "id": "nmdc:0944f2c0dd70a751117fb10d9a41fddc", + "file_size_bytes": 13413799 + }, + { + "name": "Gp0127638_Assembled AGP file", + "description": "Assembled AGP file for Gp0127638", + "data_object_type": "Assembly AGP", + "url": "https://data.microbiomedata.org/data/nmdc:mga0hjgc20/assembly/nmdc_mga0hjgc20_assembly.agp", + "md5_checksum": "1917dcbbe1efcc2a57c511648a7f332e", + "id": "nmdc:1917dcbbe1efcc2a57c511648a7f332e", + "file_size_bytes": 12526116 + }, + { + "name": "Gp0127638_Metagenome Alignment BAM file", + "description": "Metagenome Alignment BAM file for Gp0127638", + "data_object_type": "Assembly Coverage BAM", + "url": "https://data.microbiomedata.org/data/nmdc:mga0hjgc20/assembly/nmdc_mga0hjgc20_pairedMapped_sorted.bam", + "md5_checksum": "6420476f7e93425a68aa00b8e09cd6e7", + "id": "nmdc:6420476f7e93425a68aa00b8e09cd6e7", + "file_size_bytes": 1810224630 + }, + { + "name": "Gp0127638_Protein FAA", + "description": "Protein FAA for Gp0127638", + "data_object_type": "Annotation Amino Acid FASTA", + "url": "https://data.microbiomedata.org/data/nmdc:mga0hjgc20/annotation/nmdc_mga0hjgc20_proteins.faa", + "md5_checksum": "f56690d136c4dafdc1eaa64a21fd9210", + "id": "nmdc:f56690d136c4dafdc1eaa64a21fd9210", + "file_size_bytes": 49236514 + }, + { + "name": "Gp0127638_Structural annotation GFF file", + "description": "Structural annotation GFF file for Gp0127638", + "data_object_type": "Structural Annotation GFF", + "url": "https://data.microbiomedata.org/data/nmdc:mga0hjgc20/annotation/nmdc_mga0hjgc20_structural_annotation.gff", + "md5_checksum": "8be4e8ac2d00bf1d5b4863c36dc3678c", + "id": "nmdc:8be4e8ac2d00bf1d5b4863c36dc3678c", + "file_size_bytes": 2519 + }, + { + "name": "Gp0127638_Functional annotation GFF file", + "description": "Functional annotation GFF file for Gp0127638", + "data_object_type": "Functional Annotation GFF", + "url": "https://data.microbiomedata.org/data/nmdc:mga0hjgc20/annotation/nmdc_mga0hjgc20_functional_annotation.gff", + "md5_checksum": "41453202313c56e06b0cc00b5ee6c375", + "id": "nmdc:41453202313c56e06b0cc00b5ee6c375", + "file_size_bytes": 56761027 + }, + { + "name": "Gp0127638_KO TSV file", + "description": "KO TSV file for Gp0127638", + "data_object_type": "Annotation KEGG Orthology", + "url": "https://data.microbiomedata.org/data/nmdc:mga0hjgc20/annotation/nmdc_mga0hjgc20_ko.tsv", + "md5_checksum": "e06bd74dce2e5b839b35ac1012d93ba4", + "id": "nmdc:e06bd74dce2e5b839b35ac1012d93ba4", + "file_size_bytes": 6728487 + }, + { + "name": "Gp0127638_EC TSV file", + "description": "EC TSV file for Gp0127638", + "data_object_type": "Annotation Enzyme Commission", + "url": "https://data.microbiomedata.org/data/nmdc:mga0hjgc20/annotation/nmdc_mga0hjgc20_ec.tsv", + "md5_checksum": "f2786d1f8a17bedd0104b01ec06ebfce", + "id": "nmdc:f2786d1f8a17bedd0104b01ec06ebfce", + "file_size_bytes": 4522678 + }, + { + "name": "Gp0127638_COG GFF file", + "description": "COG GFF file for Gp0127638", + "url": "https://data.microbiomedata.org/data/nmdc:mga0hjgc20/annotation/nmdc_mga0hjgc20_cog.gff", + "md5_checksum": "37cb3fb060da091a84f1baa7ef3743fc", + "id": "nmdc:37cb3fb060da091a84f1baa7ef3743fc", + "file_size_bytes": 33992392 + }, + { + "name": "Gp0127638_PFAM GFF file", + "description": "PFAM GFF file for Gp0127638", + "url": "https://data.microbiomedata.org/data/nmdc:mga0hjgc20/annotation/nmdc_mga0hjgc20_pfam.gff", + "md5_checksum": "34680897818585cefbef6e69109e7de4", + "id": "nmdc:34680897818585cefbef6e69109e7de4", + "file_size_bytes": 25203872 + }, + { + "name": "Gp0127638_TigrFam GFF file", + "description": "TigrFam GFF file for Gp0127638", + "url": "https://data.microbiomedata.org/data/nmdc:mga0hjgc20/annotation/nmdc_mga0hjgc20_tigrfam.gff", + "md5_checksum": "a00404838fbe9f846a704e1dbb14f2b2", + "id": "nmdc:a00404838fbe9f846a704e1dbb14f2b2", + "file_size_bytes": 2852587 + }, + { + "name": "Gp0127638_SMART GFF file", + "description": "SMART GFF file for Gp0127638", + "url": "https://data.microbiomedata.org/data/nmdc:mga0hjgc20/annotation/nmdc_mga0hjgc20_smart.gff", + "md5_checksum": "700dd121a0ac41e3fa8077d7330adae7", + "id": "nmdc:700dd121a0ac41e3fa8077d7330adae7", + "file_size_bytes": 7723231 + }, + { + "name": "Gp0127638_SuperFam GFF file", + "description": "SuperFam GFF file for Gp0127638", + "url": "https://data.microbiomedata.org/data/nmdc:mga0hjgc20/annotation/nmdc_mga0hjgc20_supfam.gff", + "md5_checksum": "e429651ae53a18b07d99880d09a19b26", + "id": "nmdc:e429651ae53a18b07d99880d09a19b26", + "file_size_bytes": 42064836 + }, + { + "name": "Gp0127638_Cath FunFam GFF file", + "description": "Cath FunFam GFF file for Gp0127638", + "url": "https://data.microbiomedata.org/data/nmdc:mga0hjgc20/annotation/nmdc_mga0hjgc20_cath_funfam.gff", + "md5_checksum": "b22aab3cc1b9231102b23c31b418eff4", + "id": "nmdc:b22aab3cc1b9231102b23c31b418eff4", + "file_size_bytes": 32005228 + }, + { + "name": "Gp0127638_KO_EC GFF file", + "description": "KO_EC GFF file for Gp0127638", + "url": "https://data.microbiomedata.org/data/nmdc:mga0hjgc20/annotation/nmdc_mga0hjgc20_ko_ec.gff", + "md5_checksum": "ebb5a6a7ad1f14fd8cf2178ec59969ef", + "id": "nmdc:ebb5a6a7ad1f14fd8cf2178ec59969ef", + "file_size_bytes": 21405596 + }, + { + "name": "Gp0127638_metabat2 bin checkm quality assessment result", + "description": "metabat2 bin checkm quality assessment result for Gp0127638", + "data_object_type": "CheckM Statistics", + "url": "https://data.microbiomedata.org/data/nmdc:mga0hjgc20/MAGs/nmdc_mga0hjgc20_checkm_qa.out", + "md5_checksum": "dcdd7e33e92d3658fe68056f21b57f5d", + "id": "nmdc:dcdd7e33e92d3658fe68056f21b57f5d", + "file_size_bytes": 760 }, + { + "name": "Gp0127638_high-quality and medium-quality bins", + "description": "high-quality and medium-quality bins for Gp0127638", + "data_object_type": "Metagenome Bins", + "url": "https://data.microbiomedata.org/data/nmdc:mga0hjgc20/MAGs/nmdc_mga0hjgc20_hqmq_bin.zip", + "md5_checksum": "8ca8e2250dc68643e937163323f2a826", + "id": "nmdc:8ca8e2250dc68643e937163323f2a826", + "file_size_bytes": 508443 + } + ], + "dissolving_activity_set": [], + "functional_annotation_set": [], + "genome_feature_set": [], + "mags_activity_set": [ { "_id": { - "$oid": "649b005bbf2caae0415ef9d4" + "$oid": "649b0052ec087f6bbab3471d" }, "has_input": [ - "nmdc:ed2e4b90c8c2947486cc5c3c5828f949" + "nmdc:5122503797ac0ed9694a6f4feecab955", + "nmdc:6420476f7e93425a68aa00b8e09cd6e7", + "nmdc:41453202313c56e06b0cc00b5ee6c375" ], + "too_short_contig_num": 162130, "part_of": [ - "nmdc:mga0cf0450" + "nmdc:mga0hjgc20" ], + "binned_contig_num": 189, "git_url": "https://github.com/microbiomedata/metaG/releases/tag/0.1", "has_output": [ - "nmdc:9ae7cb8ba4bee2ce9a46c963d00ba6ba", - "nmdc:ce90743969776fd717671aeb21d37379", - "nmdc:1a4f5145ccf0838811fe570a93549fdf", - "nmdc:662dae8ba0ea9dda93637c2ea60c1f4e", - "nmdc:b5db445feb8edb47022c2a0ee86d828d", - "nmdc:157d24f6f63091fbe9ef98cc3090975d", - "nmdc:afa217feffb94965aa1839041305237e", - "nmdc:4a00e0c0bc479b8e6f1139c8de3149d5", - "nmdc:ffcd280a63fab7bcfa5422f34070d87f", - "nmdc:9fb334fc9409e6db51aaa1f960b08f4b", - "nmdc:d5676c01e67f71559a382850f42c3493", - "nmdc:121fab4d5bff0dcbb9d1849738a72347" + "nmdc:dcdd7e33e92d3658fe68056f21b57f5d", + "nmdc:8ca8e2250dc68643e937163323f2a826" ], - "was_informed_by": "gold:Gp0115674", - "id": "nmdc:954288cfef2de46d4b895fac3811a7d0", + "was_informed_by": "gold:Gp0127638", + "input_contig_num": 169697, + "id": "nmdc:668844f5ea6cefdcb893db0bb6afc92a", "execution_resource": "NERSC-Cori", - "name": "Annotation Activity for nmdc:mga0cf0450", - "started_at_time": "2021-10-11T02:28:52Z", - "type": "nmdc:MetagenomeAnnotationActivity", - "ended_at_time": "2021-10-11T05:21:41+00:00", - "output_data_objects": [ - { - "name": "Gp0115674_Protein FAA", - "description": "Protein FAA for Gp0115674", - "data_object_type": "Annotation Amino Acid FASTA", - "url": "https://data.microbiomedata.org/data/nmdc:mga0cf0450/annotation/nmdc_mga0cf0450_proteins.faa", - "md5_checksum": "9ae7cb8ba4bee2ce9a46c963d00ba6ba", - "id": "nmdc:9ae7cb8ba4bee2ce9a46c963d00ba6ba", - "file_size_bytes": 43650605 - }, - { - "name": "Gp0115674_Structural annotation GFF file", - "description": "Structural annotation GFF file for Gp0115674", - "data_object_type": "Structural Annotation GFF", - "url": "https://data.microbiomedata.org/data/nmdc:mga0cf0450/annotation/nmdc_mga0cf0450_structural_annotation.gff", - "md5_checksum": "ce90743969776fd717671aeb21d37379", - "id": "nmdc:ce90743969776fd717671aeb21d37379", - "file_size_bytes": 2529 - }, - { - "name": "Gp0115674_Functional annotation GFF file", - "description": "Functional annotation GFF file for Gp0115674", - "data_object_type": "Functional Annotation GFF", - "url": "https://data.microbiomedata.org/data/nmdc:mga0cf0450/annotation/nmdc_mga0cf0450_functional_annotation.gff", - "md5_checksum": "1a4f5145ccf0838811fe570a93549fdf", - "id": "nmdc:1a4f5145ccf0838811fe570a93549fdf", - "file_size_bytes": 47604509 - }, - { - "name": "Gp0115674_KO TSV file", - "description": "KO TSV file for Gp0115674", - "data_object_type": "Annotation KEGG Orthology", - "url": "https://data.microbiomedata.org/data/nmdc:mga0cf0450/annotation/nmdc_mga0cf0450_ko.tsv", - "md5_checksum": "662dae8ba0ea9dda93637c2ea60c1f4e", - "id": "nmdc:662dae8ba0ea9dda93637c2ea60c1f4e", - "file_size_bytes": 6436472 - }, - { - "name": "Gp0115674_EC TSV file", - "description": "EC TSV file for Gp0115674", - "data_object_type": "Annotation Enzyme Commission", - "url": "https://data.microbiomedata.org/data/nmdc:mga0cf0450/annotation/nmdc_mga0cf0450_ec.tsv", - "md5_checksum": "b5db445feb8edb47022c2a0ee86d828d", - "id": "nmdc:b5db445feb8edb47022c2a0ee86d828d", - "file_size_bytes": 4111562 - }, - { - "name": "Gp0115674_COG GFF file", - "description": "COG GFF file for Gp0115674", - "url": "https://data.microbiomedata.org/data/nmdc:mga0cf0450/annotation/nmdc_mga0cf0450_cog.gff", - "md5_checksum": "157d24f6f63091fbe9ef98cc3090975d", - "id": "nmdc:157d24f6f63091fbe9ef98cc3090975d", - "file_size_bytes": 27373015 - }, - { - "name": "Gp0115674_PFAM GFF file", - "description": "PFAM GFF file for Gp0115674", - "url": "https://data.microbiomedata.org/data/nmdc:mga0cf0450/annotation/nmdc_mga0cf0450_pfam.gff", - "md5_checksum": "afa217feffb94965aa1839041305237e", - "id": "nmdc:afa217feffb94965aa1839041305237e", - "file_size_bytes": 22153817 - }, - { - "name": "Gp0115674_TigrFam GFF file", - "description": "TigrFam GFF file for Gp0115674", - "url": "https://data.microbiomedata.org/data/nmdc:mga0cf0450/annotation/nmdc_mga0cf0450_tigrfam.gff", - "md5_checksum": "4a00e0c0bc479b8e6f1139c8de3149d5", - "id": "nmdc:4a00e0c0bc479b8e6f1139c8de3149d5", - "file_size_bytes": 2995281 - }, - { - "name": "Gp0115674_SMART GFF file", - "description": "SMART GFF file for Gp0115674", - "url": "https://data.microbiomedata.org/data/nmdc:mga0cf0450/annotation/nmdc_mga0cf0450_smart.gff", - "md5_checksum": "ffcd280a63fab7bcfa5422f34070d87f", - "id": "nmdc:ffcd280a63fab7bcfa5422f34070d87f", - "file_size_bytes": 6393135 - }, - { - "name": "Gp0115674_SuperFam GFF file", - "description": "SuperFam GFF file for Gp0115674", - "url": "https://data.microbiomedata.org/data/nmdc:mga0cf0450/annotation/nmdc_mga0cf0450_supfam.gff", - "md5_checksum": "9fb334fc9409e6db51aaa1f960b08f4b", - "id": "nmdc:9fb334fc9409e6db51aaa1f960b08f4b", - "file_size_bytes": 35023258 - }, - { - "name": "Gp0115674_Cath FunFam GFF file", - "description": "Cath FunFam GFF file for Gp0115674", - "url": "https://data.microbiomedata.org/data/nmdc:mga0cf0450/annotation/nmdc_mga0cf0450_cath_funfam.gff", - "md5_checksum": "d5676c01e67f71559a382850f42c3493", - "id": "nmdc:d5676c01e67f71559a382850f42c3493", - "file_size_bytes": 27788764 - }, + "name": "MAGs Analysis Activity for nmdc:mga0hjgc20", + "mags_list": [ { - "name": "Gp0115674_KO_EC GFF file", - "description": "KO_EC GFF file for Gp0115674", - "url": "https://data.microbiomedata.org/data/nmdc:mga0cf0450/annotation/nmdc_mga0cf0450_ko_ec.gff", - "md5_checksum": "121fab4d5bff0dcbb9d1849738a72347", - "id": "nmdc:121fab4d5bff0dcbb9d1849738a72347", - "file_size_bytes": 20542466 + "number_of_contig": 189, + "completeness": 73.5, + "bin_name": "bins.1", + "gene_count": 2020, + "bin_quality": "MQ", + "gtdbtk_species": "", + "gtdbtk_order": "Nitrososphaerales", + "num_16s": 0, + "gtdbtk_family": "Nitrososphaeraceae", + "gtdbtk_domain": "Archaea", + "contamination": 0.97, + "gtdbtk_class": "Nitrososphaeria", + "gtdbtk_phylum": "Crenarchaeota", + "num_5s": 0, + "num_23s": 0, + "gtdbtk_genus": "", + "num_t_rna": 37 } - ] - }, + ], + "unbinned_contig_num": 7378, + "started_at_time": "2021-12-01T21:31:29Z", + "type": "nmdc:MAGsAnalysisActivity", + "ended_at_time": "2021-12-02T20:49:51+00:00" + } + ], + "material_sample_set": [], + "material_sampling_activity_set": [], + "metabolomics_analysis_activity_set": [], + "metagenome_annotation_activity_set": [ { "_id": { - "$oid": "649b0052ec087f6bbab34732" + "$oid": "649b005bbf2caae0415ef9bc" }, "has_input": [ - "nmdc:ed2e4b90c8c2947486cc5c3c5828f949", - "nmdc:a0263d8b11653306a05f598395ca603a", - "nmdc:1a4f5145ccf0838811fe570a93549fdf" + "nmdc:5122503797ac0ed9694a6f4feecab955" ], - "too_short_contig_num": 131855, "part_of": [ - "nmdc:mga0cf0450" + "nmdc:mga0hjgc20" ], - "binned_contig_num": 1119, "git_url": "https://github.com/microbiomedata/metaG/releases/tag/0.1", "has_output": [ - "nmdc:d41d8cd98f00b204e9800998ecf8427e", - "nmdc:6a03eb0156b154ea68ffff9b473e73a5", - "nmdc:33a477987509b67fcfa5096d20c7c40b", - "nmdc:314c92c3a9458e1aa304e3c474209acf", - "nmdc:a4f9093efaf84855cab58880b262afd5", - "nmdc:1a29af6f30c21f38b25e4553605f50ef" + "nmdc:f56690d136c4dafdc1eaa64a21fd9210", + "nmdc:8be4e8ac2d00bf1d5b4863c36dc3678c", + "nmdc:41453202313c56e06b0cc00b5ee6c375", + "nmdc:e06bd74dce2e5b839b35ac1012d93ba4", + "nmdc:f2786d1f8a17bedd0104b01ec06ebfce", + "nmdc:37cb3fb060da091a84f1baa7ef3743fc", + "nmdc:34680897818585cefbef6e69109e7de4", + "nmdc:a00404838fbe9f846a704e1dbb14f2b2", + "nmdc:700dd121a0ac41e3fa8077d7330adae7", + "nmdc:e429651ae53a18b07d99880d09a19b26", + "nmdc:b22aab3cc1b9231102b23c31b418eff4", + "nmdc:ebb5a6a7ad1f14fd8cf2178ec59969ef" ], - "was_informed_by": "gold:Gp0115674", - "input_contig_num": 139324, - "id": "nmdc:954288cfef2de46d4b895fac3811a7d0", + "was_informed_by": "gold:Gp0127638", + "id": "nmdc:668844f5ea6cefdcb893db0bb6afc92a", "execution_resource": "NERSC-Cori", - "name": "MAGs Analysis Activity for nmdc:mga0cf0450", - "mags_list": [ - { - "number_of_contig": 198, - "completeness": 100.0, - "bin_name": "bins.1", - "gene_count": 5608, - "bin_quality": "HQ", - "gtdbtk_species": "", - "gtdbtk_order": "Burkholderiales", - "num_16s": 1, - "gtdbtk_family": "Burkholderiaceae", - "gtdbtk_domain": "Bacteria", - "contamination": 1.29, - "gtdbtk_class": "Gammaproteobacteria", - "gtdbtk_phylum": "Proteobacteria", - "num_5s": 1, - "num_23s": 1, - "gtdbtk_genus": "Rhizobacter", - "num_t_rna": 46 - }, - { - "number_of_contig": 353, - "completeness": 88.62, - "bin_name": "bins.2", - "gene_count": 3146, - "bin_quality": "MQ", - "gtdbtk_species": "", - "gtdbtk_order": "Sphingomonadales", - "num_16s": 0, - "gtdbtk_family": "Sphingomonadaceae", - "gtdbtk_domain": "Bacteria", - "contamination": 2.0, - "gtdbtk_class": "Alphaproteobacteria", - "gtdbtk_phylum": "Proteobacteria", - "num_5s": 0, - "num_23s": 0, - "gtdbtk_genus": "Novosphingobium", - "num_t_rna": 40 - }, - { - "number_of_contig": 273, - "completeness": 51.61, - "bin_name": "bins.3", - "gene_count": 1397, - "bin_quality": "MQ", - "gtdbtk_species": "", - "gtdbtk_order": "Pseudomonadales", - "num_16s": 0, - "gtdbtk_family": "UBA3067", - "gtdbtk_domain": "Bacteria", - "contamination": 0.8, - "gtdbtk_class": "Gammaproteobacteria", - "gtdbtk_phylum": "Proteobacteria", - "num_5s": 0, - "num_23s": 0, - "gtdbtk_genus": "UBA3067", - "num_t_rna": 17 - }, - { - "number_of_contig": 295, - "completeness": 49.14, - "bin_name": "bins.4", - "gene_count": 1695, - "bin_quality": "LQ", - "gtdbtk_species": "", - "gtdbtk_order": "", - "num_16s": 0, - "gtdbtk_family": "", - "gtdbtk_domain": "", - "contamination": 0.0, - "gtdbtk_class": "", - "gtdbtk_phylum": "", - "num_5s": 0, - "num_23s": 0, - "gtdbtk_genus": "", - "num_t_rna": 16 - } + "name": "Annotation Activity for nmdc:mga0hjgc20", + "started_at_time": "2021-12-01T21:31:29Z", + "type": "nmdc:MetagenomeAnnotationActivity", + "ended_at_time": "2021-12-02T20:49:51+00:00" + } + ], + "metagenome_assembly_set": [ + { + "_id": { + "$oid": "649b005f2ca5ee4adb139fac" + }, + "has_input": [ + "nmdc:56ba2416c050decd6c16c618c1e4a752" ], - "unbinned_contig_num": 6350, - "started_at_time": "2021-10-11T02:28:52Z", - "type": "nmdc:MAGsAnalysisActivity", - "ended_at_time": "2021-10-11T05:21:41+00:00", - "output_data_objects": [ - { - "name": "gold:Gp0452679_metabat2 bin checkm quality assessment result", - "description": "metabat2 bin checkm quality assessment result for gold:Gp0452679", - "data_object_type": "CheckM Statistics", - "url": "https://data.microbiomedata.org/data/nmdc:mga0fsjy84/MAGs/nmdc_mga0fsjy84_checkm_qa.out", - "md5_checksum": "d41d8cd98f00b204e9800998ecf8427e", - "id": "nmdc:d41d8cd98f00b204e9800998ecf8427e", - "file_size_bytes": 0 - }, - { - "name": "Gp0115674_tooShort (< 3kb) filtered contigs fasta file by metaBat2", - "description": "tooShort (< 3kb) filtered contigs fasta file by metaBat2 for Gp0115674", - "url": "https://data.microbiomedata.org/data/nmdc:mga0cf0450/MAGs/nmdc_mga0cf0450_bins.tooShort.fa", - "md5_checksum": "6a03eb0156b154ea68ffff9b473e73a5", - "id": "nmdc:6a03eb0156b154ea68ffff9b473e73a5", - "file_size_bytes": 56345518 - }, - { - "name": "Gp0115674_unbinned fasta file from metabat2", - "description": "unbinned fasta file from metabat2 for Gp0115674", - "url": "https://data.microbiomedata.org/data/nmdc:mga0cf0450/MAGs/nmdc_mga0cf0450_bins.unbinned.fa", - "md5_checksum": "33a477987509b67fcfa5096d20c7c40b", - "id": "nmdc:33a477987509b67fcfa5096d20c7c40b", - "file_size_bytes": 10836032 - }, - { - "name": "Gp0115674_metabat2 bin checkm quality assessment result", - "description": "metabat2 bin checkm quality assessment result for Gp0115674", - "data_object_type": "CheckM Statistics", - "url": "https://data.microbiomedata.org/data/nmdc:mga0cf0450/MAGs/nmdc_mga0cf0450_checkm_qa.out", - "md5_checksum": "314c92c3a9458e1aa304e3c474209acf", - "id": "nmdc:314c92c3a9458e1aa304e3c474209acf", - "file_size_bytes": 1360 - }, - { - "name": "Gp0115674_high-quality and medium-quality bins", - "description": "high-quality and medium-quality bins for Gp0115674", - "data_object_type": "Metagenome Bins", - "url": "https://data.microbiomedata.org/data/nmdc:mga0cf0450/MAGs/nmdc_mga0cf0450_hqmq_bin.zip", - "md5_checksum": "a4f9093efaf84855cab58880b262afd5", - "id": "nmdc:a4f9093efaf84855cab58880b262afd5", - "file_size_bytes": 2974639 - }, - { - "name": "Gp0115674_metabat2 bins", - "description": "metabat2 bins for Gp0115674", - "url": "https://data.microbiomedata.org/data/nmdc:mga0cf0450/MAGs/nmdc_mga0cf0450_metabat_bin.zip", - "md5_checksum": "1a29af6f30c21f38b25e4553605f50ef", - "id": "nmdc:1a29af6f30c21f38b25e4553605f50ef", - "file_size_bytes": 469326 - } + "part_of": [ + "nmdc:mga0hjgc20" + ], + "ctg_logsum": 141543, + "scaf_logsum": 141966, + "gap_pct": 0.00109, + "git_url": "https://github.com/microbiomedata/metaG/releases/tag/0.1", + "has_output": [ + "nmdc:5122503797ac0ed9694a6f4feecab955", + "nmdc:d7ee4628101b11bc5fb67d961a4e1a0a", + "nmdc:0944f2c0dd70a751117fb10d9a41fddc", + "nmdc:1917dcbbe1efcc2a57c511648a7f332e", + "nmdc:6420476f7e93425a68aa00b8e09cd6e7" + ], + "asm_score": 6.89, + "was_informed_by": "gold:Gp0127638", + "ctg_powsum": 15753, + "scaf_max": 48487, + "id": "nmdc:668844f5ea6cefdcb893db0bb6afc92a", + "scaf_powsum": 15801, + "execution_resource": "NERSC-Cori", + "contigs": 169698, + "name": "Assembly Activity for nmdc:mga0hjgc20", + "ctg_max": 48487, + "gc_std": 0.08917, + "gc_avg": 0.63213, + "contig_bp": 77783768, + "started_at_time": "2021-12-01T21:31:29Z", + "scaf_bp": 77784618, + "type": "nmdc:MetagenomeAssembly", + "scaffolds": 169622, + "ended_at_time": "2021-12-02T20:49:51+00:00", + "ctg_l50": 433, + "ctg_l90": 289, + "ctg_n50": 51455, + "ctg_n90": 144304, + "scaf_l50": 433, + "scaf_l90": 289, + "scaf_n50": 51437, + "scaf_n90": 144234 + } + ], + "metagenome_sequencing_activity_set": [], + "metaproteomics_analysis_activity_set": [], + "metatranscriptome_activity_set": [], + "nom_analysis_activity_set": [], + "omics_processing_set": [ + { + "_id": { + "$oid": "649b009773e8249959349b49" + }, + "id": "nmdc:omprc-11-tgxmb243", + "name": "Riverbed sediment microbial communities from areas with no vegetation in Columbia River, Washington, USA - GW-RW N2_40_50", + "description": "Riverbed sediment samples were collected from an area with no vegetation in a hyporheic zone (subsurface groundwater - surface water mixing zone)", + "has_input": [ + "nmdc:bsm-11-pq3zmp51" + ], + "has_output": [ + "jgi:574fde837ded5e3df1ee141d" + ], + "part_of": [ + "nmdc:sty-11-aygzgv51" + ], + "add_date": "2016-01-11", + "mod_date": "2021-06-15", + "ncbi_project_name": "Riverbed sediment microbial communities from areas with no vegetation in Columbia River, Washington, USA - GW-RW N2_40_50", + "omics_type": { + "has_raw_value": "Metagenome" + }, + "principal_investigator": { + "has_raw_value": "James Stegen" + }, + "processing_institution": "JGI", + "type": "nmdc:OmicsProcessing", + "gold_sequencing_project_identifiers": [ + "gold:Gp0127638" ] } - ] - }, - { - "_id": { - "$oid": "649b009773e8249959349b4c" - }, - "id": "nmdc:omprc-11-hk1bje46", - "name": "Sand microcosm microbial communities from a hyporheic zone in Columbia River, Washington, USA - GW-RW T4_2-Sept-14", - "description": "Sterilized sand packs were incubated back in the ground and collected at time point T4.", - "has_input": [ - "nmdc:bsm-11-5h7px351" - ], - "has_output": [ - "jgi:55d817f70d8785342fcf8270" - ], - "part_of": [ - "nmdc:sty-11-aygzgv51" - ], - "add_date": "2015-05-28", - "mod_date": "2021-06-15", - "ncbi_project_name": "Sand microcosm microbial communities from a hyporheic zone in Columbia River, Washington, USA - GW-RW T4_2-Sept-14", - "omics_type": { - "has_raw_value": "Metagenome" - }, - "principal_investigator": { - "has_raw_value": "James Stegen" - }, - "processing_institution": "JGI", - "type": "nmdc:OmicsProcessing", - "gold_sequencing_project_identifiers": [ - "gold:Gp0115673" - ], - "downstream_workflow_activity_records": [ + ], + "reaction_activity_set": [], + "read_qc_analysis_activity_set": [ { "_id": { - "$oid": "649b009d6bdd4fd20273c876" + "$oid": "649b009d6bdd4fd20273c873" }, "has_input": [ - "nmdc:3783bc4ce3716b6d299533bc3f6591b6" + "nmdc:56b2d94789953adf1b4ed35f09f0edd4" ], "part_of": [ - "nmdc:mga0kpja70" + "nmdc:mga0hjgc20" ], "git_url": "https://github.com/microbiomedata/metaG/releases/tag/0.1", "has_output": [ - "nmdc:268918f610926421d2af43f175553680", - "nmdc:4610980cf3558f5a9830797ead97362a" + "nmdc:56ba2416c050decd6c16c618c1e4a752", + "nmdc:5c9398042e9ff608befa78e86597bdf0" ], - "was_informed_by": "gold:Gp0115673", - "input_read_count": 17796788, - "output_read_bases": 2520029380, - "id": "nmdc:7ae51c3485db8a27225f08083565b28e", + "was_informed_by": "gold:Gp0127638", + "input_read_count": 21721428, + "output_read_bases": 2949961420, + "id": "nmdc:668844f5ea6cefdcb893db0bb6afc92a", "execution_resource": "NERSC-Cori", - "input_read_bases": 2687314988, - "name": "Read QC Activity for nmdc:mga0kpja70", - "output_read_count": 16817496, - "started_at_time": "2021-10-11T02:28:36Z", + "input_read_bases": 3279935628, + "name": "Read QC Activity for nmdc:mga0hjgc20", + "output_read_count": 19723416, + "started_at_time": "2021-12-01T21:31:29Z", "type": "nmdc:ReadQCAnalysisActivity", - "ended_at_time": "2021-10-11T03:32:43+00:00", - "output_data_objects": [ - { - "name": "Gp0115673_Filtered Reads", - "description": "Filtered Reads for Gp0115673", - "data_object_type": "Filtered Sequencing Reads", - "url": "https://data.microbiomedata.org/data/nmdc:mga0kpja70/qa/nmdc_mga0kpja70_filtered.fastq.gz", - "md5_checksum": "268918f610926421d2af43f175553680", - "id": "nmdc:268918f610926421d2af43f175553680", - "file_size_bytes": 1492820163 - }, - { - "name": "Gp0115673_Filtered Stats", - "description": "Filtered Stats for Gp0115673", - "data_object_type": "QC Statistics", - "url": "https://data.microbiomedata.org/data/nmdc:mga0kpja70/qa/nmdc_mga0kpja70_filterStats.txt", - "md5_checksum": "4610980cf3558f5a9830797ead97362a", - "id": "nmdc:4610980cf3558f5a9830797ead97362a", - "file_size_bytes": 287 - } - ] - }, + "ended_at_time": "2021-12-02T20:49:51+00:00" + } + ], + "read_based_taxonomy_analysis_activity_set": [ { "_id": { - "$oid": "649b009bff710ae353f8cf44" + "$oid": "649b009bff710ae353f8cf35" }, "has_input": [ - "nmdc:268918f610926421d2af43f175553680" + "nmdc:56ba2416c050decd6c16c618c1e4a752" ], "git_url": "https://github.com/microbiomedata/metaG/releases/tag/0.1", "has_output": [ - "nmdc:c7b24571b61a33018cf118b5424b787f", - "nmdc:e185734176505343bf4c83c16a0a9fe2", - "nmdc:7c6b0ef44450c747580826a2e218844b", - "nmdc:5b98c377f424d7609f1a09e350cfb837", - "nmdc:b5f7a68a94b356001014d1be024231af", - "nmdc:75bca66cfcdd38331c10edbba03fa0d3", - "nmdc:35bf579641b2ffb3614098d9811a4968", - "nmdc:801b79f5442e5bfaa0d15f76786cfbc0", - "nmdc:a7030fa8e9622e3396c2b96448e90c3b" + "nmdc:dbbd6ca6777b71d1fac4aae2cd947deb", + "nmdc:b6de56746a284f8226dd86817c8ae04e", + "nmdc:d9572e708af9f0a06e98cfddfb298359", + "nmdc:e9946f36795474182b7759d3d7532b57", + "nmdc:33ff1d85d17d763afc9e21e481cc10d2", + "nmdc:997a66f49a232750bd7132639f3387e7", + "nmdc:d3f604a59babf001839d38a617b62931", + "nmdc:3abfaa434ee1449cbbb69985e48488b4", + "nmdc:70c2fc1a2c7c0032528ff91ad1576465" ], - "was_informed_by": "gold:Gp0115673", - "id": "nmdc:7ae51c3485db8a27225f08083565b28e", + "was_informed_by": "gold:Gp0127638", + "id": "nmdc:668844f5ea6cefdcb893db0bb6afc92a", "execution_resource": "NERSC-Cori", - "name": "ReadBased Analysis Activity for nmdc:mga0kpja70", - "started_at_time": "2021-10-11T02:28:36Z", + "name": "ReadBased Analysis Activity for nmdc:mga0hjgc20", + "started_at_time": "2021-12-01T21:31:29Z", "type": "nmdc:ReadbasedAnalysis", - "ended_at_time": "2021-10-11T03:32:43+00:00", - "output_data_objects": [ - { - "name": "Gp0115673_Gottcha2 TSV report", - "description": "Gottcha2 TSV report for Gp0115673", - "url": "https://data.microbiomedata.org/data/nmdc:mga0kpja70/ReadbasedAnalysis/nmdc_mga0kpja70_gottcha2_report.tsv", - "md5_checksum": "c7b24571b61a33018cf118b5424b787f", - "id": "nmdc:c7b24571b61a33018cf118b5424b787f", - "file_size_bytes": 9782 - }, - { - "name": "Gp0115673_Gottcha2 full TSV report", - "description": "Gottcha2 full TSV report for Gp0115673", - "url": "https://data.microbiomedata.org/data/nmdc:mga0kpja70/ReadbasedAnalysis/nmdc_mga0kpja70_gottcha2_report_full.tsv", - "md5_checksum": "e185734176505343bf4c83c16a0a9fe2", - "id": "nmdc:e185734176505343bf4c83c16a0a9fe2", - "file_size_bytes": 856112 - }, - { - "name": "Gp0115673_Gottcha2 Krona HTML report", - "description": "Gottcha2 Krona HTML report for Gp0115673", - "data_object_type": "GOTTCHA2 Krona Plot", - "url": "https://data.microbiomedata.org/data/nmdc:mga0kpja70/ReadbasedAnalysis/nmdc_mga0kpja70_gottcha2_krona.html", - "md5_checksum": "7c6b0ef44450c747580826a2e218844b", - "id": "nmdc:7c6b0ef44450c747580826a2e218844b", - "file_size_bytes": 255142 - }, - { - "name": "Gp0115673_Centrifuge classification TSV report", - "description": "Centrifuge classification TSV report for Gp0115673", - "data_object_type": "Centrifuge Taxonomic Classification", - "url": "https://data.microbiomedata.org/data/nmdc:mga0kpja70/ReadbasedAnalysis/nmdc_mga0kpja70_centrifuge_classification.tsv", - "md5_checksum": "5b98c377f424d7609f1a09e350cfb837", - "id": "nmdc:5b98c377f424d7609f1a09e350cfb837", - "file_size_bytes": 1218364738 - }, - { - "name": "Gp0115673_Centrifuge TSV report", - "description": "Centrifuge TSV report for Gp0115673", - "data_object_type": "Centrifuge Classification Report", - "url": "https://data.microbiomedata.org/data/nmdc:mga0kpja70/ReadbasedAnalysis/nmdc_mga0kpja70_centrifuge_report.tsv", - "md5_checksum": "b5f7a68a94b356001014d1be024231af", - "id": "nmdc:b5f7a68a94b356001014d1be024231af", - "file_size_bytes": 254923 - }, - { - "name": "Gp0115673_Centrifuge Krona HTML report", - "description": "Centrifuge Krona HTML report for Gp0115673", - "data_object_type": "Centrifuge Krona Plot", - "url": "https://data.microbiomedata.org/data/nmdc:mga0kpja70/ReadbasedAnalysis/nmdc_mga0kpja70_centrifuge_krona.html", - "md5_checksum": "75bca66cfcdd38331c10edbba03fa0d3", - "id": "nmdc:75bca66cfcdd38331c10edbba03fa0d3", - "file_size_bytes": 2323219 - }, - { - "name": "Gp0115673_Kraken classification TSV report", - "description": "Kraken classification TSV report for Gp0115673", - "data_object_type": "Kraken2 Taxonomic Classification", - "url": "https://data.microbiomedata.org/data/nmdc:mga0kpja70/ReadbasedAnalysis/nmdc_mga0kpja70_kraken2_classification.tsv", - "md5_checksum": "35bf579641b2ffb3614098d9811a4968", - "id": "nmdc:35bf579641b2ffb3614098d9811a4968", - "file_size_bytes": 1001134031 - }, - { - "name": "Gp0115673_Kraken2 TSV report", - "description": "Kraken2 TSV report for Gp0115673", - "data_object_type": "Kraken2 Classification Report", - "url": "https://data.microbiomedata.org/data/nmdc:mga0kpja70/ReadbasedAnalysis/nmdc_mga0kpja70_kraken2_report.tsv", - "md5_checksum": "801b79f5442e5bfaa0d15f76786cfbc0", - "id": "nmdc:801b79f5442e5bfaa0d15f76786cfbc0", - "file_size_bytes": 640671 - }, - { - "name": "Gp0115673_Kraken2 Krona HTML report", - "description": "Kraken2 Krona HTML report for Gp0115673", - "data_object_type": "Kraken2 Krona Plot", - "url": "https://data.microbiomedata.org/data/nmdc:mga0kpja70/ReadbasedAnalysis/nmdc_mga0kpja70_kraken2_krona.html", - "md5_checksum": "a7030fa8e9622e3396c2b96448e90c3b", - "id": "nmdc:a7030fa8e9622e3396c2b96448e90c3b", - "file_size_bytes": 3995499 - } - ] - }, + "ended_at_time": "2021-12-02T20:49:51+00:00" + } + ], + "study_set": [], + "field_research_site_set": [], + "collecting_biosamples_from_site_set": [], + "date_created": null, + "etl_software_version": null, + "pooling_set": [], + "read_based_analysis_activity_set": [ { "_id": { - "$oid": "61e719b7833bcf838a7011dc" + "$oid": "61e719b5833bcf838a7010e1" }, "has_input": [ - "nmdc:268918f610926421d2af43f175553680" + "nmdc:56ba2416c050decd6c16c618c1e4a752" ], "part_of": [ - "nmdc:mga0kpja70" + "nmdc:mga0hjgc20" ], "git_url": "https://github.com/microbiomedata/metaG/releases/tag/0.1", "has_output": [ - "nmdc:c7b24571b61a33018cf118b5424b787f", - "nmdc:e185734176505343bf4c83c16a0a9fe2", - "nmdc:7c6b0ef44450c747580826a2e218844b", - "nmdc:5b98c377f424d7609f1a09e350cfb837", - "nmdc:b5f7a68a94b356001014d1be024231af", - "nmdc:75bca66cfcdd38331c10edbba03fa0d3", - "nmdc:35bf579641b2ffb3614098d9811a4968", - "nmdc:801b79f5442e5bfaa0d15f76786cfbc0", - "nmdc:a7030fa8e9622e3396c2b96448e90c3b" + "nmdc:dbbd6ca6777b71d1fac4aae2cd947deb", + "nmdc:b6de56746a284f8226dd86817c8ae04e", + "nmdc:d9572e708af9f0a06e98cfddfb298359", + "nmdc:e9946f36795474182b7759d3d7532b57", + "nmdc:33ff1d85d17d763afc9e21e481cc10d2", + "nmdc:997a66f49a232750bd7132639f3387e7", + "nmdc:d3f604a59babf001839d38a617b62931", + "nmdc:3abfaa434ee1449cbbb69985e48488b4", + "nmdc:70c2fc1a2c7c0032528ff91ad1576465" ], - "was_informed_by": "gold:Gp0115673", - "id": "nmdc:7ae51c3485db8a27225f08083565b28e", + "was_informed_by": "gold:Gp0127638", + "id": "nmdc:668844f5ea6cefdcb893db0bb6afc92a", "execution_resource": "NERSC-Cori", - "name": "ReadBased Analysis Activity for nmdc:mga0kpja70", - "started_at_time": "2021-10-11T02:28:36Z", + "name": "ReadBased Analysis Activity for nmdc:mga0hjgc20", + "started_at_time": "2021-12-01T21:31:29Z", "type": "nmdc:ReadbasedAnalysis", - "ended_at_time": "2021-10-11T03:32:43+00:00", - "output_data_objects": [ - { - "name": "Gp0115673_Gottcha2 TSV report", - "description": "Gottcha2 TSV report for Gp0115673", - "url": "https://data.microbiomedata.org/data/nmdc:mga0kpja70/ReadbasedAnalysis/nmdc_mga0kpja70_gottcha2_report.tsv", - "md5_checksum": "c7b24571b61a33018cf118b5424b787f", - "id": "nmdc:c7b24571b61a33018cf118b5424b787f", - "file_size_bytes": 9782 - }, - { - "name": "Gp0115673_Gottcha2 full TSV report", - "description": "Gottcha2 full TSV report for Gp0115673", - "url": "https://data.microbiomedata.org/data/nmdc:mga0kpja70/ReadbasedAnalysis/nmdc_mga0kpja70_gottcha2_report_full.tsv", - "md5_checksum": "e185734176505343bf4c83c16a0a9fe2", - "id": "nmdc:e185734176505343bf4c83c16a0a9fe2", - "file_size_bytes": 856112 - }, - { - "name": "Gp0115673_Gottcha2 Krona HTML report", - "description": "Gottcha2 Krona HTML report for Gp0115673", - "data_object_type": "GOTTCHA2 Krona Plot", - "url": "https://data.microbiomedata.org/data/nmdc:mga0kpja70/ReadbasedAnalysis/nmdc_mga0kpja70_gottcha2_krona.html", - "md5_checksum": "7c6b0ef44450c747580826a2e218844b", - "id": "nmdc:7c6b0ef44450c747580826a2e218844b", - "file_size_bytes": 255142 - }, - { - "name": "Gp0115673_Centrifuge classification TSV report", - "description": "Centrifuge classification TSV report for Gp0115673", - "data_object_type": "Centrifuge Taxonomic Classification", - "url": "https://data.microbiomedata.org/data/nmdc:mga0kpja70/ReadbasedAnalysis/nmdc_mga0kpja70_centrifuge_classification.tsv", - "md5_checksum": "5b98c377f424d7609f1a09e350cfb837", - "id": "nmdc:5b98c377f424d7609f1a09e350cfb837", - "file_size_bytes": 1218364738 - }, - { - "name": "Gp0115673_Centrifuge TSV report", - "description": "Centrifuge TSV report for Gp0115673", - "data_object_type": "Centrifuge Classification Report", - "url": "https://data.microbiomedata.org/data/nmdc:mga0kpja70/ReadbasedAnalysis/nmdc_mga0kpja70_centrifuge_report.tsv", - "md5_checksum": "b5f7a68a94b356001014d1be024231af", - "id": "nmdc:b5f7a68a94b356001014d1be024231af", - "file_size_bytes": 254923 - }, - { - "name": "Gp0115673_Centrifuge Krona HTML report", - "description": "Centrifuge Krona HTML report for Gp0115673", - "data_object_type": "Centrifuge Krona Plot", - "url": "https://data.microbiomedata.org/data/nmdc:mga0kpja70/ReadbasedAnalysis/nmdc_mga0kpja70_centrifuge_krona.html", - "md5_checksum": "75bca66cfcdd38331c10edbba03fa0d3", - "id": "nmdc:75bca66cfcdd38331c10edbba03fa0d3", - "file_size_bytes": 2323219 - }, - { - "name": "Gp0115673_Kraken classification TSV report", - "description": "Kraken classification TSV report for Gp0115673", - "data_object_type": "Kraken2 Taxonomic Classification", - "url": "https://data.microbiomedata.org/data/nmdc:mga0kpja70/ReadbasedAnalysis/nmdc_mga0kpja70_kraken2_classification.tsv", - "md5_checksum": "35bf579641b2ffb3614098d9811a4968", - "id": "nmdc:35bf579641b2ffb3614098d9811a4968", - "file_size_bytes": 1001134031 - }, - { - "name": "Gp0115673_Kraken2 TSV report", - "description": "Kraken2 TSV report for Gp0115673", - "data_object_type": "Kraken2 Classification Report", - "url": "https://data.microbiomedata.org/data/nmdc:mga0kpja70/ReadbasedAnalysis/nmdc_mga0kpja70_kraken2_report.tsv", - "md5_checksum": "801b79f5442e5bfaa0d15f76786cfbc0", - "id": "nmdc:801b79f5442e5bfaa0d15f76786cfbc0", - "file_size_bytes": 640671 - }, - { - "name": "Gp0115673_Kraken2 Krona HTML report", - "description": "Kraken2 Krona HTML report for Gp0115673", - "data_object_type": "Kraken2 Krona Plot", - "url": "https://data.microbiomedata.org/data/nmdc:mga0kpja70/ReadbasedAnalysis/nmdc_mga0kpja70_kraken2_krona.html", - "md5_checksum": "a7030fa8e9622e3396c2b96448e90c3b", - "id": "nmdc:a7030fa8e9622e3396c2b96448e90c3b", - "file_size_bytes": 3995499 - } - ] + "ended_at_time": "2021-12-02T20:49:51+00:00" + } + ] + }, + { + "functional_annotation_agg": [], + "library_preparation_set": [], + "processed_sample_set": [], + "extraction_set": [], + "activity_set": [], + "biosample_set": [], + "data_object_set": [ + { + "name": "Gp0115670_Filtered Reads", + "description": "Filtered Reads for Gp0115670", + "data_object_type": "Filtered Sequencing Reads", + "url": "https://data.microbiomedata.org/data/nmdc:mga0d7pj22/qa/nmdc_mga0d7pj22_filtered.fastq.gz", + "md5_checksum": "7f6b353300583c60d2d668880b4134cd", + "id": "nmdc:7f6b353300583c60d2d668880b4134cd", + "file_size_bytes": 3012174785 + }, + { + "name": "Gp0115670_Filtered Stats", + "description": "Filtered Stats for Gp0115670", + "data_object_type": "QC Statistics", + "url": "https://data.microbiomedata.org/data/nmdc:mga0d7pj22/qa/nmdc_mga0d7pj22_filterStats.txt", + "md5_checksum": "a4f65d101293fa4345cd865f86597464", + "id": "nmdc:a4f65d101293fa4345cd865f86597464", + "file_size_bytes": 291 + }, + { + "name": "Gp0115670_Gottcha2 TSV report", + "description": "Gottcha2 TSV report for Gp0115670", + "url": "https://data.microbiomedata.org/data/nmdc:mga0d7pj22/ReadbasedAnalysis/nmdc_mga0d7pj22_gottcha2_report.tsv", + "md5_checksum": "e316502f9e7a78c9db3996ef832aa9d7", + "id": "nmdc:e316502f9e7a78c9db3996ef832aa9d7", + "file_size_bytes": 13758 + }, + { + "name": "Gp0115670_Gottcha2 full TSV report", + "description": "Gottcha2 full TSV report for Gp0115670", + "url": "https://data.microbiomedata.org/data/nmdc:mga0d7pj22/ReadbasedAnalysis/nmdc_mga0d7pj22_gottcha2_report_full.tsv", + "md5_checksum": "1ac2be77491e7d425da1d62f69f1508d", + "id": "nmdc:1ac2be77491e7d425da1d62f69f1508d", + "file_size_bytes": 1116084 + }, + { + "name": "Gp0115670_Gottcha2 Krona HTML report", + "description": "Gottcha2 Krona HTML report for Gp0115670", + "data_object_type": "GOTTCHA2 Krona Plot", + "url": "https://data.microbiomedata.org/data/nmdc:mga0d7pj22/ReadbasedAnalysis/nmdc_mga0d7pj22_gottcha2_krona.html", + "md5_checksum": "de5b15fa9d3bdbc3abcc2475ee351323", + "id": "nmdc:de5b15fa9d3bdbc3abcc2475ee351323", + "file_size_bytes": 268542 + }, + { + "name": "Gp0115670_Centrifuge classification TSV report", + "description": "Centrifuge classification TSV report for Gp0115670", + "data_object_type": "Centrifuge Taxonomic Classification", + "url": "https://data.microbiomedata.org/data/nmdc:mga0d7pj22/ReadbasedAnalysis/nmdc_mga0d7pj22_centrifuge_classification.tsv", + "md5_checksum": "a9bbb74833404a2bf3bbd05e83a7a0ed", + "id": "nmdc:a9bbb74833404a2bf3bbd05e83a7a0ed", + "file_size_bytes": 2458475116 + }, + { + "name": "Gp0115670_Centrifuge TSV report", + "description": "Centrifuge TSV report for Gp0115670", + "data_object_type": "Centrifuge Classification Report", + "url": "https://data.microbiomedata.org/data/nmdc:mga0d7pj22/ReadbasedAnalysis/nmdc_mga0d7pj22_centrifuge_report.tsv", + "md5_checksum": "c065784bed2b2495d512af93d05967de", + "id": "nmdc:c065784bed2b2495d512af93d05967de", + "file_size_bytes": 261692 + }, + { + "name": "Gp0115670_Centrifuge Krona HTML report", + "description": "Centrifuge Krona HTML report for Gp0115670", + "data_object_type": "Centrifuge Krona Plot", + "url": "https://data.microbiomedata.org/data/nmdc:mga0d7pj22/ReadbasedAnalysis/nmdc_mga0d7pj22_centrifuge_krona.html", + "md5_checksum": "a34dbcbdebae0861e41c09e7b9a5d9f0", + "id": "nmdc:a34dbcbdebae0861e41c09e7b9a5d9f0", + "file_size_bytes": 2343355 + }, + { + "name": "Gp0115670_Kraken classification TSV report", + "description": "Kraken classification TSV report for Gp0115670", + "data_object_type": "Kraken2 Taxonomic Classification", + "url": "https://data.microbiomedata.org/data/nmdc:mga0d7pj22/ReadbasedAnalysis/nmdc_mga0d7pj22_kraken2_classification.tsv", + "md5_checksum": "b2122f5a910a1d4ae8a62956d1cd731c", + "id": "nmdc:b2122f5a910a1d4ae8a62956d1cd731c", + "file_size_bytes": 2019980511 + }, + { + "name": "Gp0115670_Kraken2 TSV report", + "description": "Kraken2 TSV report for Gp0115670", + "data_object_type": "Kraken2 Classification Report", + "url": "https://data.microbiomedata.org/data/nmdc:mga0d7pj22/ReadbasedAnalysis/nmdc_mga0d7pj22_kraken2_report.tsv", + "md5_checksum": "8a26d8496a70f4777be0e1237092e44c", + "id": "nmdc:8a26d8496a70f4777be0e1237092e44c", + "file_size_bytes": 694029 + }, + { + "name": "Gp0115670_Kraken2 Krona HTML report", + "description": "Kraken2 Krona HTML report for Gp0115670", + "data_object_type": "Kraken2 Krona Plot", + "url": "https://data.microbiomedata.org/data/nmdc:mga0d7pj22/ReadbasedAnalysis/nmdc_mga0d7pj22_kraken2_krona.html", + "md5_checksum": "694b83f0b6f599948d4248dd48dd9ba9", + "id": "nmdc:694b83f0b6f599948d4248dd48dd9ba9", + "file_size_bytes": 4190653 + }, + { + "name": "Gp0115670_Gottcha2 TSV report", + "description": "Gottcha2 TSV report for Gp0115670", + "url": "https://data.microbiomedata.org/data/nmdc:mga0d7pj22/ReadbasedAnalysis/nmdc_mga0d7pj22_gottcha2_report.tsv", + "md5_checksum": "e316502f9e7a78c9db3996ef832aa9d7", + "id": "nmdc:e316502f9e7a78c9db3996ef832aa9d7", + "file_size_bytes": 13758 + }, + { + "name": "Gp0115670_Gottcha2 full TSV report", + "description": "Gottcha2 full TSV report for Gp0115670", + "url": "https://data.microbiomedata.org/data/nmdc:mga0d7pj22/ReadbasedAnalysis/nmdc_mga0d7pj22_gottcha2_report_full.tsv", + "md5_checksum": "1ac2be77491e7d425da1d62f69f1508d", + "id": "nmdc:1ac2be77491e7d425da1d62f69f1508d", + "file_size_bytes": 1116084 + }, + { + "name": "Gp0115670_Gottcha2 Krona HTML report", + "description": "Gottcha2 Krona HTML report for Gp0115670", + "data_object_type": "GOTTCHA2 Krona Plot", + "url": "https://data.microbiomedata.org/data/nmdc:mga0d7pj22/ReadbasedAnalysis/nmdc_mga0d7pj22_gottcha2_krona.html", + "md5_checksum": "de5b15fa9d3bdbc3abcc2475ee351323", + "id": "nmdc:de5b15fa9d3bdbc3abcc2475ee351323", + "file_size_bytes": 268542 + }, + { + "name": "Gp0115670_Centrifuge classification TSV report", + "description": "Centrifuge classification TSV report for Gp0115670", + "data_object_type": "Centrifuge Taxonomic Classification", + "url": "https://data.microbiomedata.org/data/nmdc:mga0d7pj22/ReadbasedAnalysis/nmdc_mga0d7pj22_centrifuge_classification.tsv", + "md5_checksum": "a9bbb74833404a2bf3bbd05e83a7a0ed", + "id": "nmdc:a9bbb74833404a2bf3bbd05e83a7a0ed", + "file_size_bytes": 2458475116 + }, + { + "name": "Gp0115670_Centrifuge TSV report", + "description": "Centrifuge TSV report for Gp0115670", + "data_object_type": "Centrifuge Classification Report", + "url": "https://data.microbiomedata.org/data/nmdc:mga0d7pj22/ReadbasedAnalysis/nmdc_mga0d7pj22_centrifuge_report.tsv", + "md5_checksum": "c065784bed2b2495d512af93d05967de", + "id": "nmdc:c065784bed2b2495d512af93d05967de", + "file_size_bytes": 261692 + }, + { + "name": "Gp0115670_Centrifuge Krona HTML report", + "description": "Centrifuge Krona HTML report for Gp0115670", + "data_object_type": "Centrifuge Krona Plot", + "url": "https://data.microbiomedata.org/data/nmdc:mga0d7pj22/ReadbasedAnalysis/nmdc_mga0d7pj22_centrifuge_krona.html", + "md5_checksum": "a34dbcbdebae0861e41c09e7b9a5d9f0", + "id": "nmdc:a34dbcbdebae0861e41c09e7b9a5d9f0", + "file_size_bytes": 2343355 + }, + { + "name": "Gp0115670_Kraken classification TSV report", + "description": "Kraken classification TSV report for Gp0115670", + "data_object_type": "Kraken2 Taxonomic Classification", + "url": "https://data.microbiomedata.org/data/nmdc:mga0d7pj22/ReadbasedAnalysis/nmdc_mga0d7pj22_kraken2_classification.tsv", + "md5_checksum": "b2122f5a910a1d4ae8a62956d1cd731c", + "id": "nmdc:b2122f5a910a1d4ae8a62956d1cd731c", + "file_size_bytes": 2019980511 + }, + { + "name": "Gp0115670_Kraken2 TSV report", + "description": "Kraken2 TSV report for Gp0115670", + "data_object_type": "Kraken2 Classification Report", + "url": "https://data.microbiomedata.org/data/nmdc:mga0d7pj22/ReadbasedAnalysis/nmdc_mga0d7pj22_kraken2_report.tsv", + "md5_checksum": "8a26d8496a70f4777be0e1237092e44c", + "id": "nmdc:8a26d8496a70f4777be0e1237092e44c", + "file_size_bytes": 694029 + }, + { + "name": "Gp0115670_Kraken2 Krona HTML report", + "description": "Kraken2 Krona HTML report for Gp0115670", + "data_object_type": "Kraken2 Krona Plot", + "url": "https://data.microbiomedata.org/data/nmdc:mga0d7pj22/ReadbasedAnalysis/nmdc_mga0d7pj22_kraken2_krona.html", + "md5_checksum": "694b83f0b6f599948d4248dd48dd9ba9", + "id": "nmdc:694b83f0b6f599948d4248dd48dd9ba9", + "file_size_bytes": 4190653 + }, + { + "name": "Gp0115670_Assembled contigs fasta", + "description": "Assembled contigs fasta for Gp0115670", + "data_object_type": "Assembly Contigs", + "url": "https://data.microbiomedata.org/data/nmdc:mga0d7pj22/assembly/nmdc_mga0d7pj22_contigs.fna", + "md5_checksum": "975cdb0a18df949be4efb80d1dc4ef0b", + "id": "nmdc:975cdb0a18df949be4efb80d1dc4ef0b", + "file_size_bytes": 85578260 + }, + { + "name": "Gp0115670_Assembled scaffold fasta", + "description": "Assembled scaffold fasta for Gp0115670", + "data_object_type": "Assembly Scaffolds", + "url": "https://data.microbiomedata.org/data/nmdc:mga0d7pj22/assembly/nmdc_mga0d7pj22_scaffolds.fna", + "md5_checksum": "1dfaed4da055c5fd4226abe08bd91db9", + "id": "nmdc:1dfaed4da055c5fd4226abe08bd91db9", + "file_size_bytes": 85115954 + }, + { + "name": "Gp0115670_Metagenome Contig Coverage Stats", + "description": "Metagenome Contig Coverage Stats for Gp0115670", + "url": "https://data.microbiomedata.org/data/nmdc:mga0d7pj22/assembly/nmdc_mga0d7pj22_covstats.txt", + "md5_checksum": "8a749340eefc40901a22a0ef603bc803", + "id": "nmdc:8a749340eefc40901a22a0ef603bc803", + "file_size_bytes": 12068883 + }, + { + "name": "Gp0115670_Assembled AGP file", + "description": "Assembled AGP file for Gp0115670", + "data_object_type": "Assembly AGP", + "url": "https://data.microbiomedata.org/data/nmdc:mga0d7pj22/assembly/nmdc_mga0d7pj22_assembly.agp", + "md5_checksum": "ad027e4c3ca67907154c03feeebbd97b", + "id": "nmdc:ad027e4c3ca67907154c03feeebbd97b", + "file_size_bytes": 11264235 + }, + { + "name": "Gp0115670_Metagenome Alignment BAM file", + "description": "Metagenome Alignment BAM file for Gp0115670", + "data_object_type": "Assembly Coverage BAM", + "url": "https://data.microbiomedata.org/data/nmdc:mga0d7pj22/assembly/nmdc_mga0d7pj22_pairedMapped_sorted.bam", + "md5_checksum": "c4f2407273babd894282d4d0f20be5d1", + "id": "nmdc:c4f2407273babd894282d4d0f20be5d1", + "file_size_bytes": 3245960211 + }, + { + "name": "Gp0115670_Protein FAA", + "description": "Protein FAA for Gp0115670", + "data_object_type": "Annotation Amino Acid FASTA", + "url": "https://data.microbiomedata.org/data/nmdc:mga0d7pj22/annotation/nmdc_mga0d7pj22_proteins.faa", + "md5_checksum": "21230aff7bb5b266fb544905f9ac5ce2", + "id": "nmdc:21230aff7bb5b266fb544905f9ac5ce2", + "file_size_bytes": 46061226 + }, + { + "name": "Gp0115670_Structural annotation GFF file", + "description": "Structural annotation GFF file for Gp0115670", + "data_object_type": "Structural Annotation GFF", + "url": "https://data.microbiomedata.org/data/nmdc:mga0d7pj22/annotation/nmdc_mga0d7pj22_structural_annotation.gff", + "md5_checksum": "91c5cc265ef61ab83111a5bc9462e8b2", + "id": "nmdc:91c5cc265ef61ab83111a5bc9462e8b2", + "file_size_bytes": 2769 + }, + { + "name": "Gp0115670_Functional annotation GFF file", + "description": "Functional annotation GFF file for Gp0115670", + "data_object_type": "Functional Annotation GFF", + "url": "https://data.microbiomedata.org/data/nmdc:mga0d7pj22/annotation/nmdc_mga0d7pj22_functional_annotation.gff", + "md5_checksum": "0bc4d8b8ef11724c3d7e728b0e8e0ea5", + "id": "nmdc:0bc4d8b8ef11724c3d7e728b0e8e0ea5", + "file_size_bytes": 50449176 + }, + { + "name": "Gp0115670_KO TSV file", + "description": "KO TSV file for Gp0115670", + "data_object_type": "Annotation KEGG Orthology", + "url": "https://data.microbiomedata.org/data/nmdc:mga0d7pj22/annotation/nmdc_mga0d7pj22_ko.tsv", + "md5_checksum": "811910b7d8c300befddd039e833b0453", + "id": "nmdc:811910b7d8c300befddd039e833b0453", + "file_size_bytes": 6653168 + }, + { + "name": "Gp0115670_EC TSV file", + "description": "EC TSV file for Gp0115670", + "data_object_type": "Annotation Enzyme Commission", + "url": "https://data.microbiomedata.org/data/nmdc:mga0d7pj22/annotation/nmdc_mga0d7pj22_ec.tsv", + "md5_checksum": "9ed55d9535d1592866a66e9d5cd936a2", + "id": "nmdc:9ed55d9535d1592866a66e9d5cd936a2", + "file_size_bytes": 4232890 + }, + { + "name": "Gp0115670_COG GFF file", + "description": "COG GFF file for Gp0115670", + "url": "https://data.microbiomedata.org/data/nmdc:mga0d7pj22/annotation/nmdc_mga0d7pj22_cog.gff", + "md5_checksum": "a127efaa423e6dd6d24d7ab67cc2124a", + "id": "nmdc:a127efaa423e6dd6d24d7ab67cc2124a", + "file_size_bytes": 28376544 + }, + { + "name": "Gp0115670_PFAM GFF file", + "description": "PFAM GFF file for Gp0115670", + "url": "https://data.microbiomedata.org/data/nmdc:mga0d7pj22/annotation/nmdc_mga0d7pj22_pfam.gff", + "md5_checksum": "4b56646de8c37278beaaf9797e4ddf2f", + "id": "nmdc:4b56646de8c37278beaaf9797e4ddf2f", + "file_size_bytes": 22850790 + }, + { + "name": "Gp0115670_TigrFam GFF file", + "description": "TigrFam GFF file for Gp0115670", + "url": "https://data.microbiomedata.org/data/nmdc:mga0d7pj22/annotation/nmdc_mga0d7pj22_tigrfam.gff", + "md5_checksum": "53a0873376e22fef62f2740f6afead21", + "id": "nmdc:53a0873376e22fef62f2740f6afead21", + "file_size_bytes": 3099434 + }, + { + "name": "Gp0115670_SMART GFF file", + "description": "SMART GFF file for Gp0115670", + "url": "https://data.microbiomedata.org/data/nmdc:mga0d7pj22/annotation/nmdc_mga0d7pj22_smart.gff", + "md5_checksum": "36748318682076112ba81283c8bc767a", + "id": "nmdc:36748318682076112ba81283c8bc767a", + "file_size_bytes": 6433811 + }, + { + "name": "Gp0115670_SuperFam GFF file", + "description": "SuperFam GFF file for Gp0115670", + "url": "https://data.microbiomedata.org/data/nmdc:mga0d7pj22/annotation/nmdc_mga0d7pj22_supfam.gff", + "md5_checksum": "5dd32385b351847f23ec4eac63eb70ff", + "id": "nmdc:5dd32385b351847f23ec4eac63eb70ff", + "file_size_bytes": 36427587 + }, + { + "name": "Gp0115670_Cath FunFam GFF file", + "description": "Cath FunFam GFF file for Gp0115670", + "url": "https://data.microbiomedata.org/data/nmdc:mga0d7pj22/annotation/nmdc_mga0d7pj22_cath_funfam.gff", + "md5_checksum": "95076052a4d5d57e1ed0c7699e4f5472", + "id": "nmdc:95076052a4d5d57e1ed0c7699e4f5472", + "file_size_bytes": 28909664 + }, + { + "name": "Gp0115670_KO_EC GFF file", + "description": "KO_EC GFF file for Gp0115670", + "url": "https://data.microbiomedata.org/data/nmdc:mga0d7pj22/annotation/nmdc_mga0d7pj22_ko_ec.gff", + "md5_checksum": "6ae89cc4b2fb7d09614c106d3358be27", + "id": "nmdc:6ae89cc4b2fb7d09614c106d3358be27", + "file_size_bytes": 21214802 + }, + { + "name": "gold:Gp0452679_metabat2 bin checkm quality assessment result", + "description": "metabat2 bin checkm quality assessment result for gold:Gp0452679", + "data_object_type": "CheckM Statistics", + "url": "https://data.microbiomedata.org/data/nmdc:mga0fsjy84/MAGs/nmdc_mga0fsjy84_checkm_qa.out", + "md5_checksum": "d41d8cd98f00b204e9800998ecf8427e", + "id": "nmdc:d41d8cd98f00b204e9800998ecf8427e", + "file_size_bytes": 0 + }, + { + "name": "Gp0115670_tooShort (< 3kb) filtered contigs fasta file by metaBat2", + "description": "tooShort (< 3kb) filtered contigs fasta file by metaBat2 for Gp0115670", + "url": "https://data.microbiomedata.org/data/nmdc:mga0d7pj22/MAGs/nmdc_mga0d7pj22_bins.tooShort.fa", + "md5_checksum": "fd5fe3f1faaaf3cd8a88d9bbfb016827", + "id": "nmdc:fd5fe3f1faaaf3cd8a88d9bbfb016827", + "file_size_bytes": 61828850 + }, + { + "name": "Gp0115670_unbinned fasta file from metabat2", + "description": "unbinned fasta file from metabat2 for Gp0115670", + "url": "https://data.microbiomedata.org/data/nmdc:mga0d7pj22/MAGs/nmdc_mga0d7pj22_bins.unbinned.fa", + "md5_checksum": "e27b736ee699ef2a8468a684811aaabd", + "id": "nmdc:e27b736ee699ef2a8468a684811aaabd", + "file_size_bytes": 15075820 + }, + { + "name": "Gp0115670_metabat2 bin checkm quality assessment result", + "description": "metabat2 bin checkm quality assessment result for Gp0115670", + "data_object_type": "CheckM Statistics", + "url": "https://data.microbiomedata.org/data/nmdc:mga0d7pj22/MAGs/nmdc_mga0d7pj22_checkm_qa.out", + "md5_checksum": "b0866d1a944aa27e34dc7a140aeaf336", + "id": "nmdc:b0866d1a944aa27e34dc7a140aeaf336", + "file_size_bytes": 1690 + }, + { + "name": "Gp0115670_high-quality and medium-quality bins", + "description": "high-quality and medium-quality bins for Gp0115670", + "data_object_type": "Metagenome Bins", + "url": "https://data.microbiomedata.org/data/nmdc:mga0d7pj22/MAGs/nmdc_mga0d7pj22_hqmq_bin.zip", + "md5_checksum": "0875e5107d03a40832d15e5cf80adbbc", + "id": "nmdc:0875e5107d03a40832d15e5cf80adbbc", + "file_size_bytes": 1944800 }, + { + "name": "Gp0115670_metabat2 bins", + "description": "metabat2 bins for Gp0115670", + "url": "https://data.microbiomedata.org/data/nmdc:mga0d7pj22/MAGs/nmdc_mga0d7pj22_metabat_bin.zip", + "md5_checksum": "9b60c7c905d34e08427781eafbce9b12", + "id": "nmdc:9b60c7c905d34e08427781eafbce9b12", + "file_size_bytes": 658258 + } + ], + "dissolving_activity_set": [], + "functional_annotation_set": [], + "genome_feature_set": [], + "mags_activity_set": [ { "_id": { - "$oid": "649b005f2ca5ee4adb139fa8" + "$oid": "649b0052ec087f6bbab3472d" }, "has_input": [ - "nmdc:268918f610926421d2af43f175553680" + "nmdc:975cdb0a18df949be4efb80d1dc4ef0b", + "nmdc:c4f2407273babd894282d4d0f20be5d1", + "nmdc:0bc4d8b8ef11724c3d7e728b0e8e0ea5" ], + "too_short_contig_num": 142606, "part_of": [ - "nmdc:mga0kpja70" + "nmdc:mga0d7pj22" ], - "ctg_logsum": 160283, - "scaf_logsum": 161291, - "gap_pct": 0.0036, + "binned_contig_num": 1261, "git_url": "https://github.com/microbiomedata/metaG/releases/tag/0.1", "has_output": [ - "nmdc:06d4964c0822abd6f94ca883c122f7ce", - "nmdc:bad916c69afe839097650b0b9526a841", - "nmdc:a187658f262fa495de43707aabcbf480", - "nmdc:c525c04f90889be615025c667908370c", - "nmdc:2e293158750df042be7422826125bef2" + "nmdc:d41d8cd98f00b204e9800998ecf8427e", + "nmdc:fd5fe3f1faaaf3cd8a88d9bbfb016827", + "nmdc:e27b736ee699ef2a8468a684811aaabd", + "nmdc:b0866d1a944aa27e34dc7a140aeaf336", + "nmdc:0875e5107d03a40832d15e5cf80adbbc", + "nmdc:9b60c7c905d34e08427781eafbce9b12" ], - "asm_score": 6.419, - "was_informed_by": "gold:Gp0115673", - "ctg_powsum": 18694, - "scaf_max": 39252, - "id": "nmdc:7ae51c3485db8a27225f08083565b28e", - "scaf_powsum": 18825, + "was_informed_by": "gold:Gp0115670", + "input_contig_num": 152605, + "id": "nmdc:f1cbe3cc181b2e689272b4223b68c15f", "execution_resource": "NERSC-Cori", - "contigs": 89808, - "name": "Assembly Activity for nmdc:mga0kpja70", - "ctg_max": 39252, - "gc_std": 0.11246, - "contig_bp": 46120517, - "gc_avg": 0.55483, - "started_at_time": "2021-10-11T02:28:36Z", - "scaf_bp": 46122177, - "type": "nmdc:MetagenomeAssembly", - "scaffolds": 89660, - "ended_at_time": "2021-10-11T03:32:43+00:00", - "ctg_l50": 493, - "ctg_l90": 286, - "ctg_n50": 19910, - "ctg_n90": 73487, - "scaf_l50": 494, - "scaf_l90": 286, - "scaf_n50": 19797, - "scaf_n90": 73347, - "output_data_objects": [ + "name": "MAGs Analysis Activity for nmdc:mga0d7pj22", + "mags_list": [ { - "name": "Gp0115673_Assembled contigs fasta", - "description": "Assembled contigs fasta for Gp0115673", - "data_object_type": "Assembly Contigs", - "url": "https://data.microbiomedata.org/data/nmdc:mga0kpja70/assembly/nmdc_mga0kpja70_contigs.fna", - "md5_checksum": "06d4964c0822abd6f94ca883c122f7ce", - "id": "nmdc:06d4964c0822abd6f94ca883c122f7ce", - "file_size_bytes": 49610158 + "number_of_contig": 118, + "completeness": 23.28, + "bin_name": "bins.1", + "gene_count": 572, + "bin_quality": "LQ", + "gtdbtk_species": "", + "gtdbtk_order": "", + "num_16s": 0, + "gtdbtk_family": "", + "gtdbtk_domain": "", + "contamination": 0.0, + "gtdbtk_class": "", + "gtdbtk_phylum": "", + "num_5s": 0, + "num_23s": 0, + "gtdbtk_genus": "", + "num_t_rna": 5 }, { - "name": "Gp0115673_Assembled scaffold fasta", - "description": "Assembled scaffold fasta for Gp0115673", - "data_object_type": "Assembly Scaffolds", - "url": "https://data.microbiomedata.org/data/nmdc:mga0kpja70/assembly/nmdc_mga0kpja70_scaffolds.fna", - "md5_checksum": "bad916c69afe839097650b0b9526a841", - "id": "nmdc:bad916c69afe839097650b0b9526a841", - "file_size_bytes": 49338957 + "number_of_contig": 151, + "completeness": 38.09, + "bin_name": "bins.2", + "gene_count": 725, + "bin_quality": "LQ", + "gtdbtk_species": "", + "gtdbtk_order": "", + "num_16s": 0, + "gtdbtk_family": "", + "gtdbtk_domain": "", + "contamination": 0.0, + "gtdbtk_class": "", + "gtdbtk_phylum": "", + "num_5s": 0, + "num_23s": 0, + "gtdbtk_genus": "", + "num_t_rna": 9 }, { - "name": "Gp0115673_Metagenome Contig Coverage Stats", - "description": "Metagenome Contig Coverage Stats for Gp0115673", - "url": "https://data.microbiomedata.org/data/nmdc:mga0kpja70/assembly/nmdc_mga0kpja70_covstats.txt", - "md5_checksum": "a187658f262fa495de43707aabcbf480", - "id": "nmdc:a187658f262fa495de43707aabcbf480", - "file_size_bytes": 7048516 + "number_of_contig": 100, + "completeness": 99.01, + "bin_name": "bins.3", + "gene_count": 3233, + "bin_quality": "HQ", + "gtdbtk_species": "", + "gtdbtk_order": "Sphingomonadales", + "num_16s": 1, + "gtdbtk_family": "Sphingomonadaceae", + "gtdbtk_domain": "Bacteria", + "contamination": 1.38, + "gtdbtk_class": "Alphaproteobacteria", + "gtdbtk_phylum": "Proteobacteria", + "num_5s": 1, + "num_23s": 2, + "gtdbtk_genus": "Novosphingobium", + "num_t_rna": 47 }, { - "name": "Gp0115673_Assembled AGP file", - "description": "Assembled AGP file for Gp0115673", - "data_object_type": "Assembly AGP", - "url": "https://data.microbiomedata.org/data/nmdc:mga0kpja70/assembly/nmdc_mga0kpja70_assembly.agp", - "md5_checksum": "c525c04f90889be615025c667908370c", - "id": "nmdc:c525c04f90889be615025c667908370c", - "file_size_bytes": 6557406 + "number_of_contig": 135, + "completeness": 34.24, + "bin_name": "bins.4", + "gene_count": 689, + "bin_quality": "LQ", + "gtdbtk_species": "", + "gtdbtk_order": "", + "num_16s": 0, + "gtdbtk_family": "", + "gtdbtk_domain": "", + "contamination": 0.91, + "gtdbtk_class": "", + "gtdbtk_phylum": "", + "num_5s": 0, + "num_23s": 0, + "gtdbtk_genus": "", + "num_t_rna": 6 }, { - "name": "Gp0115673_Metagenome Alignment BAM file", - "description": "Metagenome Alignment BAM file for Gp0115673", - "data_object_type": "Assembly Coverage BAM", - "url": "https://data.microbiomedata.org/data/nmdc:mga0kpja70/assembly/nmdc_mga0kpja70_pairedMapped_sorted.bam", - "md5_checksum": "2e293158750df042be7422826125bef2", - "id": "nmdc:2e293158750df042be7422826125bef2", - "file_size_bytes": 1601507411 - } - ] - }, - { - "_id": { - "$oid": "649b005bbf2caae0415ef9c0" - }, - "has_input": [ - "nmdc:06d4964c0822abd6f94ca883c122f7ce" - ], - "part_of": [ - "nmdc:mga0kpja70" - ], - "git_url": "https://github.com/microbiomedata/metaG/releases/tag/0.1", - "has_output": [ - "nmdc:be3b8decbc48f9588daca36ca4c883ab", - "nmdc:106c834bb14367ec6154d1b04f2a1021", - "nmdc:dfe3eed1eee6d6764ae22a2c6b0209e5", - "nmdc:84e3913c75d155fc45f04bc04810063a", - "nmdc:418e74fcbe4b97b8d74cb697a3b3feb4", - "nmdc:2d57dd06178c83c1f9c4bfaecf34b8b4", - "nmdc:42173701162f4fdb727bc4eded48c2a1", - "nmdc:89b8851da4dca184654a76128048e09a", - "nmdc:e0d0721c6051fb0eebd70635882639c1", - "nmdc:e9b0a3709e78dd9dfdba4eff7103c425", - "nmdc:e627abd2dfaee1fbf695de11211c6971", - "nmdc:a04e32711e814e733114531a666606c6" - ], - "was_informed_by": "gold:Gp0115673", - "id": "nmdc:7ae51c3485db8a27225f08083565b28e", - "execution_resource": "NERSC-Cori", - "name": "Annotation Activity for nmdc:mga0kpja70", - "started_at_time": "2021-10-11T02:28:36Z", - "type": "nmdc:MetagenomeAnnotationActivity", - "ended_at_time": "2021-10-11T03:32:43+00:00", - "output_data_objects": [ - { - "name": "Gp0115673_Protein FAA", - "description": "Protein FAA for Gp0115673", - "data_object_type": "Annotation Amino Acid FASTA", - "url": "https://data.microbiomedata.org/data/nmdc:mga0kpja70/annotation/nmdc_mga0kpja70_proteins.faa", - "md5_checksum": "be3b8decbc48f9588daca36ca4c883ab", - "id": "nmdc:be3b8decbc48f9588daca36ca4c883ab", - "file_size_bytes": 27487621 - }, - { - "name": "Gp0115673_Structural annotation GFF file", - "description": "Structural annotation GFF file for Gp0115673", - "data_object_type": "Structural Annotation GFF", - "url": "https://data.microbiomedata.org/data/nmdc:mga0kpja70/annotation/nmdc_mga0kpja70_structural_annotation.gff", - "md5_checksum": "106c834bb14367ec6154d1b04f2a1021", - "id": "nmdc:106c834bb14367ec6154d1b04f2a1021", - "file_size_bytes": 2505 - }, - { - "name": "Gp0115673_Functional annotation GFF file", - "description": "Functional annotation GFF file for Gp0115673", - "data_object_type": "Functional Annotation GFF", - "url": "https://data.microbiomedata.org/data/nmdc:mga0kpja70/annotation/nmdc_mga0kpja70_functional_annotation.gff", - "md5_checksum": "dfe3eed1eee6d6764ae22a2c6b0209e5", - "id": "nmdc:dfe3eed1eee6d6764ae22a2c6b0209e5", - "file_size_bytes": 30665845 - }, - { - "name": "Gp0115673_KO TSV file", - "description": "KO TSV file for Gp0115673", - "data_object_type": "Annotation KEGG Orthology", - "url": "https://data.microbiomedata.org/data/nmdc:mga0kpja70/annotation/nmdc_mga0kpja70_ko.tsv", - "md5_checksum": "84e3913c75d155fc45f04bc04810063a", - "id": "nmdc:84e3913c75d155fc45f04bc04810063a", - "file_size_bytes": 4142989 - }, - { - "name": "Gp0115673_EC TSV file", - "description": "EC TSV file for Gp0115673", - "data_object_type": "Annotation Enzyme Commission", - "url": "https://data.microbiomedata.org/data/nmdc:mga0kpja70/annotation/nmdc_mga0kpja70_ec.tsv", - "md5_checksum": "418e74fcbe4b97b8d74cb697a3b3feb4", - "id": "nmdc:418e74fcbe4b97b8d74cb697a3b3feb4", - "file_size_bytes": 2665975 - }, - { - "name": "Gp0115673_COG GFF file", - "description": "COG GFF file for Gp0115673", - "url": "https://data.microbiomedata.org/data/nmdc:mga0kpja70/annotation/nmdc_mga0kpja70_cog.gff", - "md5_checksum": "2d57dd06178c83c1f9c4bfaecf34b8b4", - "id": "nmdc:2d57dd06178c83c1f9c4bfaecf34b8b4", - "file_size_bytes": 17716812 - }, - { - "name": "Gp0115673_PFAM GFF file", - "description": "PFAM GFF file for Gp0115673", - "url": "https://data.microbiomedata.org/data/nmdc:mga0kpja70/annotation/nmdc_mga0kpja70_pfam.gff", - "md5_checksum": "42173701162f4fdb727bc4eded48c2a1", - "id": "nmdc:42173701162f4fdb727bc4eded48c2a1", - "file_size_bytes": 14043787 - }, - { - "name": "Gp0115673_TigrFam GFF file", - "description": "TigrFam GFF file for Gp0115673", - "url": "https://data.microbiomedata.org/data/nmdc:mga0kpja70/annotation/nmdc_mga0kpja70_tigrfam.gff", - "md5_checksum": "89b8851da4dca184654a76128048e09a", - "id": "nmdc:89b8851da4dca184654a76128048e09a", - "file_size_bytes": 2009579 - }, - { - "name": "Gp0115673_SMART GFF file", - "description": "SMART GFF file for Gp0115673", - "url": "https://data.microbiomedata.org/data/nmdc:mga0kpja70/annotation/nmdc_mga0kpja70_smart.gff", - "md5_checksum": "e0d0721c6051fb0eebd70635882639c1", - "id": "nmdc:e0d0721c6051fb0eebd70635882639c1", - "file_size_bytes": 3834400 - }, - { - "name": "Gp0115673_SuperFam GFF file", - "description": "SuperFam GFF file for Gp0115673", - "url": "https://data.microbiomedata.org/data/nmdc:mga0kpja70/annotation/nmdc_mga0kpja70_supfam.gff", - "md5_checksum": "e9b0a3709e78dd9dfdba4eff7103c425", - "id": "nmdc:e9b0a3709e78dd9dfdba4eff7103c425", - "file_size_bytes": 22131290 - }, - { - "name": "Gp0115673_Cath FunFam GFF file", - "description": "Cath FunFam GFF file for Gp0115673", - "url": "https://data.microbiomedata.org/data/nmdc:mga0kpja70/annotation/nmdc_mga0kpja70_cath_funfam.gff", - "md5_checksum": "e627abd2dfaee1fbf695de11211c6971", - "id": "nmdc:e627abd2dfaee1fbf695de11211c6971", - "file_size_bytes": 17702997 - }, - { - "name": "Gp0115673_KO_EC GFF file", - "description": "KO_EC GFF file for Gp0115673", - "url": "https://data.microbiomedata.org/data/nmdc:mga0kpja70/annotation/nmdc_mga0kpja70_ko_ec.gff", - "md5_checksum": "a04e32711e814e733114531a666606c6", - "id": "nmdc:a04e32711e814e733114531a666606c6", - "file_size_bytes": 13225993 - } - ] - }, - { - "_id": { - "$oid": "649b0052ec087f6bbab3471f" - }, - "has_input": [ - "nmdc:06d4964c0822abd6f94ca883c122f7ce", - "nmdc:2e293158750df042be7422826125bef2", - "nmdc:dfe3eed1eee6d6764ae22a2c6b0209e5" - ], - "too_short_contig_num": 83787, - "part_of": [ - "nmdc:mga0kpja70" - ], - "binned_contig_num": 890, - "git_url": "https://github.com/microbiomedata/metaG/releases/tag/0.1", - "has_output": [ - "nmdc:d41d8cd98f00b204e9800998ecf8427e", - "nmdc:c907101a9eb50d1e522d1fc11b4d3164", - "nmdc:f80fbdbf31ee0ac76353d59e64b789bc", - "nmdc:af15089c0cb19ec9bd65f98e59dc94f1", - "nmdc:70d3f2afd9f32a2bdaa81a6fc547f6fb", - "nmdc:f40d84a4fc0c87d76c144777f9e8a8ea" - ], - "was_informed_by": "gold:Gp0115673", - "input_contig_num": 89806, - "id": "nmdc:7ae51c3485db8a27225f08083565b28e", - "execution_resource": "NERSC-Cori", - "name": "MAGs Analysis Activity for nmdc:mga0kpja70", - "mags_list": [ - { - "number_of_contig": 67, - "completeness": 12.5, - "bin_name": "bins.1", - "gene_count": 318, - "bin_quality": "LQ", + "number_of_contig": 652, + "completeness": 57.14, + "bin_name": "bins.5", + "gene_count": 3635, + "bin_quality": "MQ", "gtdbtk_species": "", - "gtdbtk_order": "", - "num_16s": 0, - "gtdbtk_family": "", - "gtdbtk_domain": "", - "contamination": 0.0, - "gtdbtk_class": "", - "gtdbtk_phylum": "", - "num_5s": 0, - "num_23s": 0, - "gtdbtk_genus": "", - "num_t_rna": 4 + "gtdbtk_order": "Burkholderiales", + "num_16s": 1, + "gtdbtk_family": "Burkholderiaceae", + "gtdbtk_domain": "Bacteria", + "contamination": 2.6, + "gtdbtk_class": "Gammaproteobacteria", + "gtdbtk_phylum": "Proteobacteria", + "num_5s": 1, + "num_23s": 1, + "gtdbtk_genus": "Rhizobacter", + "num_t_rna": 27 }, { - "number_of_contig": 823, - "completeness": 97.81, - "bin_name": "bins.2", - "gene_count": 5828, + "number_of_contig": 105, + "completeness": 27.22, + "bin_name": "bins.6", + "gene_count": 509, "bin_quality": "LQ", "gtdbtk_species": "", "gtdbtk_order": "", - "num_16s": 1, + "num_16s": 0, "gtdbtk_family": "", "gtdbtk_domain": "", - "contamination": 66.19, + "contamination": 0.19, "gtdbtk_class": "", "gtdbtk_phylum": "", "num_5s": 0, "num_23s": 0, "gtdbtk_genus": "", - "num_t_rna": 63 + "num_t_rna": 9 } ], - "unbinned_contig_num": 5129, - "started_at_time": "2021-10-11T02:28:36Z", + "unbinned_contig_num": 8738, + "started_at_time": "2021-10-11T02:28:43Z", "type": "nmdc:MAGsAnalysisActivity", - "ended_at_time": "2021-10-11T03:32:43+00:00", - "output_data_objects": [ - { - "name": "gold:Gp0452679_metabat2 bin checkm quality assessment result", - "description": "metabat2 bin checkm quality assessment result for gold:Gp0452679", - "data_object_type": "CheckM Statistics", - "url": "https://data.microbiomedata.org/data/nmdc:mga0fsjy84/MAGs/nmdc_mga0fsjy84_checkm_qa.out", - "md5_checksum": "d41d8cd98f00b204e9800998ecf8427e", - "id": "nmdc:d41d8cd98f00b204e9800998ecf8427e", - "file_size_bytes": 0 - }, - { - "name": "Gp0115673_tooShort (< 3kb) filtered contigs fasta file by metaBat2", - "description": "tooShort (< 3kb) filtered contigs fasta file by metaBat2 for Gp0115673", - "url": "https://data.microbiomedata.org/data/nmdc:mga0kpja70/MAGs/nmdc_mga0kpja70_bins.tooShort.fa", - "md5_checksum": "c907101a9eb50d1e522d1fc11b4d3164", - "id": "nmdc:c907101a9eb50d1e522d1fc11b4d3164", - "file_size_bytes": 35344893 - }, - { - "name": "Gp0115673_unbinned fasta file from metabat2", - "description": "unbinned fasta file from metabat2 for Gp0115673", - "url": "https://data.microbiomedata.org/data/nmdc:mga0kpja70/MAGs/nmdc_mga0kpja70_bins.unbinned.fa", - "md5_checksum": "f80fbdbf31ee0ac76353d59e64b789bc", - "id": "nmdc:f80fbdbf31ee0ac76353d59e64b789bc", - "file_size_bytes": 8810307 - }, - { - "name": "Gp0115673_metabat2 bin checkm quality assessment result", - "description": "metabat2 bin checkm quality assessment result for Gp0115673", - "data_object_type": "CheckM Statistics", - "url": "https://data.microbiomedata.org/data/nmdc:mga0kpja70/MAGs/nmdc_mga0kpja70_checkm_qa.out", - "md5_checksum": "af15089c0cb19ec9bd65f98e59dc94f1", - "id": "nmdc:af15089c0cb19ec9bd65f98e59dc94f1", - "file_size_bytes": 942 - }, - { - "name": "Gp0115673_high-quality and medium-quality bins", - "description": "high-quality and medium-quality bins for Gp0115673", - "data_object_type": "Metagenome Bins", - "url": "https://data.microbiomedata.org/data/nmdc:mga0kpja70/MAGs/nmdc_mga0kpja70_hqmq_bin.zip", - "md5_checksum": "70d3f2afd9f32a2bdaa81a6fc547f6fb", - "id": "nmdc:70d3f2afd9f32a2bdaa81a6fc547f6fb", - "file_size_bytes": 182 - }, - { - "name": "Gp0115673_metabat2 bins", - "description": "metabat2 bins for Gp0115673", - "url": "https://data.microbiomedata.org/data/nmdc:mga0kpja70/MAGs/nmdc_mga0kpja70_metabat_bin.zip", - "md5_checksum": "f40d84a4fc0c87d76c144777f9e8a8ea", - "id": "nmdc:f40d84a4fc0c87d76c144777f9e8a8ea", - "file_size_bytes": 1658458 - } - ] + "ended_at_time": "2021-10-11T05:55:52+00:00" } - ] - }, - { - "_id": { - "$oid": "649b009773e8249959349b4d" - }, - "id": "nmdc:omprc-11-qtje8r57", - "name": "Sand microcosm microbial communities from a hyporheic zone in Columbia River, Washington, USA - GW-RW T4_12-Aug-14", - "description": "Sterilized sand packs were incubated back in the ground and collected at time point T4.", - "has_input": [ - "nmdc:bsm-11-wzdqhh45" - ], - "has_output": [ - "jgi:55d817fa0d8785342fcf8272" - ], - "part_of": [ - "nmdc:sty-11-aygzgv51" - ], - "add_date": "2015-05-28", - "mod_date": "2021-06-15", - "ncbi_project_name": "Sand microcosm microbial communities from a hyporheic zone in Columbia River, Washington, USA - GW-RW T4_12-Aug-14", - "omics_type": { - "has_raw_value": "Metagenome" - }, - "principal_investigator": { - "has_raw_value": "James Stegen" - }, - "processing_institution": "JGI", - "type": "nmdc:OmicsProcessing", - "gold_sequencing_project_identifiers": [ - "gold:Gp0115671" - ], - "downstream_workflow_activity_records": [ + ], + "material_sample_set": [], + "material_sampling_activity_set": [], + "metabolomics_analysis_activity_set": [], + "metagenome_annotation_activity_set": [ { "_id": { - "$oid": "649b009d6bdd4fd20273c891" + "$oid": "649b005bbf2caae0415ef9cf" }, "has_input": [ - "nmdc:57d2e9b1a32e13f859c8b6e450ac3402" + "nmdc:975cdb0a18df949be4efb80d1dc4ef0b" ], "part_of": [ - "nmdc:mga0rw1351" - ], - "git_url": "https://github.com/microbiomedata/metaG/releases/tag/0.1", - "has_output": [ - "nmdc:445f37bc3019e9fe3b29a2ac5bcbfc9c", - "nmdc:24440b4c5534da30eee650b68eccda84" - ], - "was_informed_by": "gold:Gp0115671", - "input_read_count": 22298982, - "output_read_bases": 3062549086, - "id": "nmdc:c61259a6fe99bc2482a8619c099e6cc2", - "execution_resource": "NERSC-Cori", - "input_read_bases": 3367146282, - "name": "Read QC Activity for nmdc:mga0rw1351", - "output_read_count": 20445042, - "started_at_time": "2021-10-11T02:27:50Z", - "type": "nmdc:ReadQCAnalysisActivity", - "ended_at_time": "2021-10-11T03:39:05+00:00", - "output_data_objects": [ - { - "name": "Gp0115671_Filtered Reads", - "description": "Filtered Reads for Gp0115671", - "data_object_type": "Filtered Sequencing Reads", - "url": "https://data.microbiomedata.org/data/nmdc:mga0rw1351/qa/nmdc_mga0rw1351_filtered.fastq.gz", - "md5_checksum": "445f37bc3019e9fe3b29a2ac5bcbfc9c", - "id": "nmdc:445f37bc3019e9fe3b29a2ac5bcbfc9c", - "file_size_bytes": 1806996776 - }, - { - "name": "Gp0115671_Filtered Stats", - "description": "Filtered Stats for Gp0115671", - "data_object_type": "QC Statistics", - "url": "https://data.microbiomedata.org/data/nmdc:mga0rw1351/qa/nmdc_mga0rw1351_filterStats.txt", - "md5_checksum": "24440b4c5534da30eee650b68eccda84", - "id": "nmdc:24440b4c5534da30eee650b68eccda84", - "file_size_bytes": 289 - } - ] - }, - { - "_id": { - "$oid": "649b009bff710ae353f8cf56" - }, - "has_input": [ - "nmdc:445f37bc3019e9fe3b29a2ac5bcbfc9c" + "nmdc:mga0d7pj22" ], "git_url": "https://github.com/microbiomedata/metaG/releases/tag/0.1", "has_output": [ - "nmdc:358559c32b69eff51758db66ac01021b", - "nmdc:befbd648249c2871bd27999120e50bf7", - "nmdc:cacb8f623a808d0cae094d46f2801dd3", - "nmdc:1b15ffb745e320a9bf0cac7e672e974b", - "nmdc:90b77c7118bf6ec1f99836a50d562a7f", - "nmdc:e0736ff520260ba2097c02b9e767362c", - "nmdc:a00960655f9e80726fdb0fade1bec958", - "nmdc:366bf195f71d2c35a9b47c0f29381e85", - "nmdc:e111cd4927f6736e5de6f6e81e7e6d72" + "nmdc:21230aff7bb5b266fb544905f9ac5ce2", + "nmdc:91c5cc265ef61ab83111a5bc9462e8b2", + "nmdc:0bc4d8b8ef11724c3d7e728b0e8e0ea5", + "nmdc:811910b7d8c300befddd039e833b0453", + "nmdc:9ed55d9535d1592866a66e9d5cd936a2", + "nmdc:a127efaa423e6dd6d24d7ab67cc2124a", + "nmdc:4b56646de8c37278beaaf9797e4ddf2f", + "nmdc:53a0873376e22fef62f2740f6afead21", + "nmdc:36748318682076112ba81283c8bc767a", + "nmdc:5dd32385b351847f23ec4eac63eb70ff", + "nmdc:95076052a4d5d57e1ed0c7699e4f5472", + "nmdc:6ae89cc4b2fb7d09614c106d3358be27" ], - "was_informed_by": "gold:Gp0115671", - "id": "nmdc:c61259a6fe99bc2482a8619c099e6cc2", + "was_informed_by": "gold:Gp0115670", + "id": "nmdc:f1cbe3cc181b2e689272b4223b68c15f", "execution_resource": "NERSC-Cori", - "name": "ReadBased Analysis Activity for nmdc:mga0rw1351", - "started_at_time": "2021-10-11T02:27:50Z", - "type": "nmdc:ReadbasedAnalysis", - "ended_at_time": "2021-10-11T03:39:05+00:00", - "output_data_objects": [ - { - "name": "Gp0115671_Gottcha2 TSV report", - "description": "Gottcha2 TSV report for Gp0115671", - "url": "https://data.microbiomedata.org/data/nmdc:mga0rw1351/ReadbasedAnalysis/nmdc_mga0rw1351_gottcha2_report.tsv", - "md5_checksum": "358559c32b69eff51758db66ac01021b", - "id": "nmdc:358559c32b69eff51758db66ac01021b", - "file_size_bytes": 11833 - }, - { - "name": "Gp0115671_Gottcha2 full TSV report", - "description": "Gottcha2 full TSV report for Gp0115671", - "url": "https://data.microbiomedata.org/data/nmdc:mga0rw1351/ReadbasedAnalysis/nmdc_mga0rw1351_gottcha2_report_full.tsv", - "md5_checksum": "befbd648249c2871bd27999120e50bf7", - "id": "nmdc:befbd648249c2871bd27999120e50bf7", - "file_size_bytes": 888177 - }, - { - "name": "Gp0115671_Gottcha2 Krona HTML report", - "description": "Gottcha2 Krona HTML report for Gp0115671", - "data_object_type": "GOTTCHA2 Krona Plot", - "url": "https://data.microbiomedata.org/data/nmdc:mga0rw1351/ReadbasedAnalysis/nmdc_mga0rw1351_gottcha2_krona.html", - "md5_checksum": "cacb8f623a808d0cae094d46f2801dd3", - "id": "nmdc:cacb8f623a808d0cae094d46f2801dd3", - "file_size_bytes": 261703 - }, - { - "name": "Gp0115671_Centrifuge classification TSV report", - "description": "Centrifuge classification TSV report for Gp0115671", - "data_object_type": "Centrifuge Taxonomic Classification", - "url": "https://data.microbiomedata.org/data/nmdc:mga0rw1351/ReadbasedAnalysis/nmdc_mga0rw1351_centrifuge_classification.tsv", - "md5_checksum": "1b15ffb745e320a9bf0cac7e672e974b", - "id": "nmdc:1b15ffb745e320a9bf0cac7e672e974b", - "file_size_bytes": 1474970402 - }, - { - "name": "Gp0115671_Centrifuge TSV report", - "description": "Centrifuge TSV report for Gp0115671", - "data_object_type": "Centrifuge Classification Report", - "url": "https://data.microbiomedata.org/data/nmdc:mga0rw1351/ReadbasedAnalysis/nmdc_mga0rw1351_centrifuge_report.tsv", - "md5_checksum": "90b77c7118bf6ec1f99836a50d562a7f", - "id": "nmdc:90b77c7118bf6ec1f99836a50d562a7f", - "file_size_bytes": 255777 - }, - { - "name": "Gp0115671_Centrifuge Krona HTML report", - "description": "Centrifuge Krona HTML report for Gp0115671", - "data_object_type": "Centrifuge Krona Plot", - "url": "https://data.microbiomedata.org/data/nmdc:mga0rw1351/ReadbasedAnalysis/nmdc_mga0rw1351_centrifuge_krona.html", - "md5_checksum": "e0736ff520260ba2097c02b9e767362c", - "id": "nmdc:e0736ff520260ba2097c02b9e767362c", - "file_size_bytes": 2329875 - }, - { - "name": "Gp0115671_Kraken classification TSV report", - "description": "Kraken classification TSV report for Gp0115671", - "data_object_type": "Kraken2 Taxonomic Classification", - "url": "https://data.microbiomedata.org/data/nmdc:mga0rw1351/ReadbasedAnalysis/nmdc_mga0rw1351_kraken2_classification.tsv", - "md5_checksum": "a00960655f9e80726fdb0fade1bec958", - "id": "nmdc:a00960655f9e80726fdb0fade1bec958", - "file_size_bytes": 1213240496 - }, - { - "name": "Gp0115671_Kraken2 TSV report", - "description": "Kraken2 TSV report for Gp0115671", - "data_object_type": "Kraken2 Classification Report", - "url": "https://data.microbiomedata.org/data/nmdc:mga0rw1351/ReadbasedAnalysis/nmdc_mga0rw1351_kraken2_report.tsv", - "md5_checksum": "366bf195f71d2c35a9b47c0f29381e85", - "id": "nmdc:366bf195f71d2c35a9b47c0f29381e85", - "file_size_bytes": 659715 - }, - { - "name": "Gp0115671_Kraken2 Krona HTML report", - "description": "Kraken2 Krona HTML report for Gp0115671", - "data_object_type": "Kraken2 Krona Plot", - "url": "https://data.microbiomedata.org/data/nmdc:mga0rw1351/ReadbasedAnalysis/nmdc_mga0rw1351_kraken2_krona.html", - "md5_checksum": "e111cd4927f6736e5de6f6e81e7e6d72", - "id": "nmdc:e111cd4927f6736e5de6f6e81e7e6d72", - "file_size_bytes": 4010701 - } - ] - }, + "name": "Annotation Activity for nmdc:mga0d7pj22", + "started_at_time": "2021-10-11T02:28:43Z", + "type": "nmdc:MetagenomeAnnotationActivity", + "ended_at_time": "2021-10-11T05:55:52+00:00" + } + ], + "metagenome_assembly_set": [ { "_id": { - "$oid": "61e71a4c833bcf838a702155" + "$oid": "649b005f2ca5ee4adb139fc0" }, "has_input": [ - "nmdc:445f37bc3019e9fe3b29a2ac5bcbfc9c" + "nmdc:7f6b353300583c60d2d668880b4134cd" ], "part_of": [ - "nmdc:mga0rw1351" + "nmdc:mga0d7pj22" ], + "ctg_logsum": 272574, + "scaf_logsum": 274450, + "gap_pct": 0.00346, "git_url": "https://github.com/microbiomedata/metaG/releases/tag/0.1", "has_output": [ - "nmdc:358559c32b69eff51758db66ac01021b", - "nmdc:befbd648249c2871bd27999120e50bf7", - "nmdc:cacb8f623a808d0cae094d46f2801dd3", - "nmdc:1b15ffb745e320a9bf0cac7e672e974b", - "nmdc:90b77c7118bf6ec1f99836a50d562a7f", - "nmdc:e0736ff520260ba2097c02b9e767362c", - "nmdc:a00960655f9e80726fdb0fade1bec958", - "nmdc:366bf195f71d2c35a9b47c0f29381e85", - "nmdc:e111cd4927f6736e5de6f6e81e7e6d72" + "nmdc:975cdb0a18df949be4efb80d1dc4ef0b", + "nmdc:1dfaed4da055c5fd4226abe08bd91db9", + "nmdc:8a749340eefc40901a22a0ef603bc803", + "nmdc:ad027e4c3ca67907154c03feeebbd97b", + "nmdc:c4f2407273babd894282d4d0f20be5d1" ], - "was_informed_by": "gold:Gp0115671", - "id": "nmdc:c61259a6fe99bc2482a8619c099e6cc2", + "asm_score": 12.57, + "was_informed_by": "gold:Gp0115670", + "ctg_powsum": 33596, + "scaf_max": 211520, + "id": "nmdc:f1cbe3cc181b2e689272b4223b68c15f", + "scaf_powsum": 33865, "execution_resource": "NERSC-Cori", - "name": "ReadBased Analysis Activity for nmdc:mga0rw1351", - "started_at_time": "2021-10-11T02:27:50Z", - "type": "nmdc:ReadbasedAnalysis", - "ended_at_time": "2021-10-11T03:39:05+00:00", - "output_data_objects": [ - { - "name": "Gp0115671_Gottcha2 TSV report", - "description": "Gottcha2 TSV report for Gp0115671", - "url": "https://data.microbiomedata.org/data/nmdc:mga0rw1351/ReadbasedAnalysis/nmdc_mga0rw1351_gottcha2_report.tsv", - "md5_checksum": "358559c32b69eff51758db66ac01021b", - "id": "nmdc:358559c32b69eff51758db66ac01021b", - "file_size_bytes": 11833 - }, - { - "name": "Gp0115671_Gottcha2 full TSV report", - "description": "Gottcha2 full TSV report for Gp0115671", - "url": "https://data.microbiomedata.org/data/nmdc:mga0rw1351/ReadbasedAnalysis/nmdc_mga0rw1351_gottcha2_report_full.tsv", - "md5_checksum": "befbd648249c2871bd27999120e50bf7", - "id": "nmdc:befbd648249c2871bd27999120e50bf7", - "file_size_bytes": 888177 - }, - { - "name": "Gp0115671_Gottcha2 Krona HTML report", - "description": "Gottcha2 Krona HTML report for Gp0115671", - "data_object_type": "GOTTCHA2 Krona Plot", - "url": "https://data.microbiomedata.org/data/nmdc:mga0rw1351/ReadbasedAnalysis/nmdc_mga0rw1351_gottcha2_krona.html", - "md5_checksum": "cacb8f623a808d0cae094d46f2801dd3", - "id": "nmdc:cacb8f623a808d0cae094d46f2801dd3", - "file_size_bytes": 261703 - }, - { - "name": "Gp0115671_Centrifuge classification TSV report", - "description": "Centrifuge classification TSV report for Gp0115671", - "data_object_type": "Centrifuge Taxonomic Classification", - "url": "https://data.microbiomedata.org/data/nmdc:mga0rw1351/ReadbasedAnalysis/nmdc_mga0rw1351_centrifuge_classification.tsv", - "md5_checksum": "1b15ffb745e320a9bf0cac7e672e974b", - "id": "nmdc:1b15ffb745e320a9bf0cac7e672e974b", - "file_size_bytes": 1474970402 - }, - { - "name": "Gp0115671_Centrifuge TSV report", - "description": "Centrifuge TSV report for Gp0115671", - "data_object_type": "Centrifuge Classification Report", - "url": "https://data.microbiomedata.org/data/nmdc:mga0rw1351/ReadbasedAnalysis/nmdc_mga0rw1351_centrifuge_report.tsv", - "md5_checksum": "90b77c7118bf6ec1f99836a50d562a7f", - "id": "nmdc:90b77c7118bf6ec1f99836a50d562a7f", - "file_size_bytes": 255777 - }, - { - "name": "Gp0115671_Centrifuge Krona HTML report", - "description": "Centrifuge Krona HTML report for Gp0115671", - "data_object_type": "Centrifuge Krona Plot", - "url": "https://data.microbiomedata.org/data/nmdc:mga0rw1351/ReadbasedAnalysis/nmdc_mga0rw1351_centrifuge_krona.html", - "md5_checksum": "e0736ff520260ba2097c02b9e767362c", - "id": "nmdc:e0736ff520260ba2097c02b9e767362c", - "file_size_bytes": 2329875 - }, - { - "name": "Gp0115671_Kraken classification TSV report", - "description": "Kraken classification TSV report for Gp0115671", - "data_object_type": "Kraken2 Taxonomic Classification", - "url": "https://data.microbiomedata.org/data/nmdc:mga0rw1351/ReadbasedAnalysis/nmdc_mga0rw1351_kraken2_classification.tsv", - "md5_checksum": "a00960655f9e80726fdb0fade1bec958", - "id": "nmdc:a00960655f9e80726fdb0fade1bec958", - "file_size_bytes": 1213240496 - }, - { - "name": "Gp0115671_Kraken2 TSV report", - "description": "Kraken2 TSV report for Gp0115671", - "data_object_type": "Kraken2 Classification Report", - "url": "https://data.microbiomedata.org/data/nmdc:mga0rw1351/ReadbasedAnalysis/nmdc_mga0rw1351_kraken2_report.tsv", - "md5_checksum": "366bf195f71d2c35a9b47c0f29381e85", - "id": "nmdc:366bf195f71d2c35a9b47c0f29381e85", - "file_size_bytes": 659715 - }, - { - "name": "Gp0115671_Kraken2 Krona HTML report", - "description": "Kraken2 Krona HTML report for Gp0115671", - "data_object_type": "Kraken2 Krona Plot", - "url": "https://data.microbiomedata.org/data/nmdc:mga0rw1351/ReadbasedAnalysis/nmdc_mga0rw1351_kraken2_krona.html", - "md5_checksum": "e111cd4927f6736e5de6f6e81e7e6d72", - "id": "nmdc:e111cd4927f6736e5de6f6e81e7e6d72", - "file_size_bytes": 4010701 - } + "contigs": 152605, + "name": "Assembly Activity for nmdc:mga0d7pj22", + "ctg_max": 211520, + "gc_std": 0.125, + "contig_bp": 79563543, + "gc_avg": 0.57036, + "started_at_time": "2021-10-11T02:28:43Z", + "scaf_bp": 79566293, + "type": "nmdc:MetagenomeAssembly", + "scaffolds": 152330, + "ended_at_time": "2021-10-11T05:55:52+00:00", + "ctg_l50": 492, + "ctg_l90": 290, + "ctg_n50": 35595, + "ctg_n90": 126332, + "scaf_l50": 493, + "scaf_l90": 290, + "scaf_n50": 35340, + "scaf_n90": 126070, + "scaf_l_gt50k": 1744421, + "scaf_n_gt50k": 21, + "scaf_pct_gt50k": 2.192412 + } + ], + "metagenome_sequencing_activity_set": [], + "metaproteomics_analysis_activity_set": [], + "metatranscriptome_activity_set": [], + "nom_analysis_activity_set": [], + "omics_processing_set": [ + { + "_id": { + "$oid": "649b009773e8249959349b4a" + }, + "id": "nmdc:omprc-11-t0xjjc50", + "name": "Sand microcosm microbial communities from a hyporheic zone in Columbia River, Washington, USA - GW-RW T4_10-June-14", + "description": "Sterilized sand packs were incubated back in the ground and collected at time point T4.", + "has_input": [ + "nmdc:bsm-11-vg9vy382" + ], + "has_output": [ + "jgi:55d7402a0d8785342fcf7e3b" + ], + "part_of": [ + "nmdc:sty-11-aygzgv51" + ], + "add_date": "2015-05-28", + "mod_date": "2021-06-15", + "ncbi_project_name": "Sand microcosm microbial communities from a hyporheic zone in Columbia River, Washington, USA - GW-RW T4_10-June-14", + "omics_type": { + "has_raw_value": "Metagenome" + }, + "principal_investigator": { + "has_raw_value": "James Stegen" + }, + "processing_institution": "JGI", + "type": "nmdc:OmicsProcessing", + "gold_sequencing_project_identifiers": [ + "gold:Gp0115670" ] - }, + } + ], + "reaction_activity_set": [], + "read_qc_analysis_activity_set": [ { "_id": { - "$oid": "649b005f2ca5ee4adb139fc1" + "$oid": "649b009d6bdd4fd20273c888" }, "has_input": [ - "nmdc:445f37bc3019e9fe3b29a2ac5bcbfc9c" + "nmdc:aa477a857eb9da284635b774477f3f54" ], "part_of": [ - "nmdc:mga0rw1351" + "nmdc:mga0d7pj22" ], - "ctg_logsum": 111611, - "scaf_logsum": 112140, - "gap_pct": 0.00155, "git_url": "https://github.com/microbiomedata/metaG/releases/tag/0.1", "has_output": [ - "nmdc:0a1ebd847e3bb8f928ef491497f8355b", - "nmdc:be4cab04a701bce0ed99605109bd5d6f", - "nmdc:cc4d3160618a82f81518bdc97ce1f5e2", - "nmdc:473ca208ab97399a644c8e5326e765e5", - "nmdc:69371e513bebd1069a0ed26cc2c914cb" + "nmdc:7f6b353300583c60d2d668880b4134cd", + "nmdc:a4f65d101293fa4345cd865f86597464" ], - "asm_score": 3.588, - "was_informed_by": "gold:Gp0115671", - "ctg_powsum": 12152, - "scaf_max": 16504, - "id": "nmdc:c61259a6fe99bc2482a8619c099e6cc2", - "scaf_powsum": 12215, + "was_informed_by": "gold:Gp0115670", + "input_read_count": 36554212, + "output_read_bases": 5044444014, + "id": "nmdc:f1cbe3cc181b2e689272b4223b68c15f", "execution_resource": "NERSC-Cori", - "contigs": 120326, - "name": "Assembly Activity for nmdc:mga0rw1351", - "ctg_max": 16504, - "gc_std": 0.11331, - "contig_bp": 54171370, - "gc_avg": 0.54451, - "started_at_time": "2021-10-11T02:27:50Z", - "scaf_bp": 54172210, - "type": "nmdc:MetagenomeAssembly", - "scaffolds": 120242, - "ended_at_time": "2021-10-11T03:39:05+00:00", - "ctg_l50": 421, - "ctg_l90": 285, - "ctg_n50": 34725, - "ctg_n90": 101428, - "scaf_l50": 421, - "scaf_l90": 285, - "scaf_n50": 34687, - "scaf_n90": 101345, - "output_data_objects": [ - { - "name": "Gp0115671_Assembled contigs fasta", - "description": "Assembled contigs fasta for Gp0115671", - "data_object_type": "Assembly Contigs", - "url": "https://data.microbiomedata.org/data/nmdc:mga0rw1351/assembly/nmdc_mga0rw1351_contigs.fna", - "md5_checksum": "0a1ebd847e3bb8f928ef491497f8355b", - "id": "nmdc:0a1ebd847e3bb8f928ef491497f8355b", - "file_size_bytes": 58744710 - }, - { - "name": "Gp0115671_Assembled scaffold fasta", - "description": "Assembled scaffold fasta for Gp0115671", - "data_object_type": "Assembly Scaffolds", - "url": "https://data.microbiomedata.org/data/nmdc:mga0rw1351/assembly/nmdc_mga0rw1351_scaffolds.fna", - "md5_checksum": "be4cab04a701bce0ed99605109bd5d6f", - "id": "nmdc:be4cab04a701bce0ed99605109bd5d6f", - "file_size_bytes": 58382380 - }, - { - "name": "Gp0115671_Metagenome Contig Coverage Stats", - "description": "Metagenome Contig Coverage Stats for Gp0115671", - "url": "https://data.microbiomedata.org/data/nmdc:mga0rw1351/assembly/nmdc_mga0rw1351_covstats.txt", - "md5_checksum": "cc4d3160618a82f81518bdc97ce1f5e2", - "id": "nmdc:cc4d3160618a82f81518bdc97ce1f5e2", - "file_size_bytes": 9464710 - }, - { - "name": "Gp0115671_Assembled AGP file", - "description": "Assembled AGP file for Gp0115671", - "data_object_type": "Assembly AGP", - "url": "https://data.microbiomedata.org/data/nmdc:mga0rw1351/assembly/nmdc_mga0rw1351_assembly.agp", - "md5_checksum": "473ca208ab97399a644c8e5326e765e5", - "id": "nmdc:473ca208ab97399a644c8e5326e765e5", - "file_size_bytes": 8820452 - }, - { - "name": "Gp0115671_Metagenome Alignment BAM file", - "description": "Metagenome Alignment BAM file for Gp0115671", - "data_object_type": "Assembly Coverage BAM", - "url": "https://data.microbiomedata.org/data/nmdc:mga0rw1351/assembly/nmdc_mga0rw1351_pairedMapped_sorted.bam", - "md5_checksum": "69371e513bebd1069a0ed26cc2c914cb", - "id": "nmdc:69371e513bebd1069a0ed26cc2c914cb", - "file_size_bytes": 1938214126 - } - ] - }, + "input_read_bases": 5519686012, + "name": "Read QC Activity for nmdc:mga0d7pj22", + "output_read_count": 33663942, + "started_at_time": "2021-10-11T02:28:43Z", + "type": "nmdc:ReadQCAnalysisActivity", + "ended_at_time": "2021-10-11T05:55:52+00:00" + } + ], + "read_based_taxonomy_analysis_activity_set": [ { "_id": { - "$oid": "649b005bbf2caae0415ef9d3" + "$oid": "649b009bff710ae353f8cf4d" }, "has_input": [ - "nmdc:0a1ebd847e3bb8f928ef491497f8355b" - ], - "part_of": [ - "nmdc:mga0rw1351" + "nmdc:7f6b353300583c60d2d668880b4134cd" ], "git_url": "https://github.com/microbiomedata/metaG/releases/tag/0.1", "has_output": [ - "nmdc:147b97234576ba123a9f3c63eb249ecf", - "nmdc:3e037f5f744c9f8e4aa355222cc620ae", - "nmdc:10d19849864ecdb722335200d0607bbe", - "nmdc:0ce9fa5958b6445f7be463538e89e9b1", - "nmdc:a3bc059d9350034f835be4e754486c73", - "nmdc:da9866461051130a44f0982b1a65c061", - "nmdc:676fff23fb641ee8af8a2b948fc5b46e", - "nmdc:a4aa56158a292b63078eb029ed1d90a9", - "nmdc:6a28f85e8b5addccb429cc7f8964e496", - "nmdc:d5b21cce7406ab46611c49dc1ab658ed", - "nmdc:8ead1ab881fd48527d853b0d0601b4bc", - "nmdc:ad206c1031a6f0a7805034dee03ff889" + "nmdc:e316502f9e7a78c9db3996ef832aa9d7", + "nmdc:1ac2be77491e7d425da1d62f69f1508d", + "nmdc:de5b15fa9d3bdbc3abcc2475ee351323", + "nmdc:a9bbb74833404a2bf3bbd05e83a7a0ed", + "nmdc:c065784bed2b2495d512af93d05967de", + "nmdc:a34dbcbdebae0861e41c09e7b9a5d9f0", + "nmdc:b2122f5a910a1d4ae8a62956d1cd731c", + "nmdc:8a26d8496a70f4777be0e1237092e44c", + "nmdc:694b83f0b6f599948d4248dd48dd9ba9" ], - "was_informed_by": "gold:Gp0115671", - "id": "nmdc:c61259a6fe99bc2482a8619c099e6cc2", + "was_informed_by": "gold:Gp0115670", + "id": "nmdc:f1cbe3cc181b2e689272b4223b68c15f", "execution_resource": "NERSC-Cori", - "name": "Annotation Activity for nmdc:mga0rw1351", - "started_at_time": "2021-10-11T02:27:50Z", - "type": "nmdc:MetagenomeAnnotationActivity", - "ended_at_time": "2021-10-11T03:39:05+00:00", - "output_data_objects": [ - { - "name": "Gp0115671_Protein FAA", - "description": "Protein FAA for Gp0115671", - "data_object_type": "Annotation Amino Acid FASTA", - "url": "https://data.microbiomedata.org/data/nmdc:mga0rw1351/annotation/nmdc_mga0rw1351_proteins.faa", - "md5_checksum": "147b97234576ba123a9f3c63eb249ecf", - "id": "nmdc:147b97234576ba123a9f3c63eb249ecf", - "file_size_bytes": 32911597 - }, - { - "name": "Gp0115671_Structural annotation GFF file", - "description": "Structural annotation GFF file for Gp0115671", - "data_object_type": "Structural Annotation GFF", - "url": "https://data.microbiomedata.org/data/nmdc:mga0rw1351/annotation/nmdc_mga0rw1351_structural_annotation.gff", - "md5_checksum": "3e037f5f744c9f8e4aa355222cc620ae", - "id": "nmdc:3e037f5f744c9f8e4aa355222cc620ae", - "file_size_bytes": 2516 - }, - { - "name": "Gp0115671_Functional annotation GFF file", - "description": "Functional annotation GFF file for Gp0115671", - "data_object_type": "Functional Annotation GFF", - "url": "https://data.microbiomedata.org/data/nmdc:mga0rw1351/annotation/nmdc_mga0rw1351_functional_annotation.gff", - "md5_checksum": "10d19849864ecdb722335200d0607bbe", - "id": "nmdc:10d19849864ecdb722335200d0607bbe", - "file_size_bytes": 38009425 - }, - { - "name": "Gp0115671_KO TSV file", - "description": "KO TSV file for Gp0115671", - "data_object_type": "Annotation KEGG Orthology", - "url": "https://data.microbiomedata.org/data/nmdc:mga0rw1351/annotation/nmdc_mga0rw1351_ko.tsv", - "md5_checksum": "0ce9fa5958b6445f7be463538e89e9b1", - "id": "nmdc:0ce9fa5958b6445f7be463538e89e9b1", - "file_size_bytes": 4994549 - }, - { - "name": "Gp0115671_EC TSV file", - "description": "EC TSV file for Gp0115671", - "data_object_type": "Annotation Enzyme Commission", - "url": "https://data.microbiomedata.org/data/nmdc:mga0rw1351/annotation/nmdc_mga0rw1351_ec.tsv", - "md5_checksum": "a3bc059d9350034f835be4e754486c73", - "id": "nmdc:a3bc059d9350034f835be4e754486c73", - "file_size_bytes": 3207987 - }, - { - "name": "Gp0115671_COG GFF file", - "description": "COG GFF file for Gp0115671", - "url": "https://data.microbiomedata.org/data/nmdc:mga0rw1351/annotation/nmdc_mga0rw1351_cog.gff", - "md5_checksum": "da9866461051130a44f0982b1a65c061", - "id": "nmdc:da9866461051130a44f0982b1a65c061", - "file_size_bytes": 21138081 - }, - { - "name": "Gp0115671_PFAM GFF file", - "description": "PFAM GFF file for Gp0115671", - "url": "https://data.microbiomedata.org/data/nmdc:mga0rw1351/annotation/nmdc_mga0rw1351_pfam.gff", - "md5_checksum": "676fff23fb641ee8af8a2b948fc5b46e", - "id": "nmdc:676fff23fb641ee8af8a2b948fc5b46e", - "file_size_bytes": 16269399 - }, - { - "name": "Gp0115671_TigrFam GFF file", - "description": "TigrFam GFF file for Gp0115671", - "url": "https://data.microbiomedata.org/data/nmdc:mga0rw1351/annotation/nmdc_mga0rw1351_tigrfam.gff", - "md5_checksum": "a4aa56158a292b63078eb029ed1d90a9", - "id": "nmdc:a4aa56158a292b63078eb029ed1d90a9", - "file_size_bytes": 2189740 - }, - { - "name": "Gp0115671_SMART GFF file", - "description": "SMART GFF file for Gp0115671", - "url": "https://data.microbiomedata.org/data/nmdc:mga0rw1351/annotation/nmdc_mga0rw1351_smart.gff", - "md5_checksum": "6a28f85e8b5addccb429cc7f8964e496", - "id": "nmdc:6a28f85e8b5addccb429cc7f8964e496", - "file_size_bytes": 4669463 - }, - { - "name": "Gp0115671_SuperFam GFF file", - "description": "SuperFam GFF file for Gp0115671", - "url": "https://data.microbiomedata.org/data/nmdc:mga0rw1351/annotation/nmdc_mga0rw1351_supfam.gff", - "md5_checksum": "d5b21cce7406ab46611c49dc1ab658ed", - "id": "nmdc:d5b21cce7406ab46611c49dc1ab658ed", - "file_size_bytes": 26589549 - }, - { - "name": "Gp0115671_Cath FunFam GFF file", - "description": "Cath FunFam GFF file for Gp0115671", - "url": "https://data.microbiomedata.org/data/nmdc:mga0rw1351/annotation/nmdc_mga0rw1351_cath_funfam.gff", - "md5_checksum": "8ead1ab881fd48527d853b0d0601b4bc", - "id": "nmdc:8ead1ab881fd48527d853b0d0601b4bc", - "file_size_bytes": 20889965 - }, - { - "name": "Gp0115671_KO_EC GFF file", - "description": "KO_EC GFF file for Gp0115671", - "url": "https://data.microbiomedata.org/data/nmdc:mga0rw1351/annotation/nmdc_mga0rw1351_ko_ec.gff", - "md5_checksum": "ad206c1031a6f0a7805034dee03ff889", - "id": "nmdc:ad206c1031a6f0a7805034dee03ff889", - "file_size_bytes": 15914575 - } - ] - }, + "name": "ReadBased Analysis Activity for nmdc:mga0d7pj22", + "started_at_time": "2021-10-11T02:28:43Z", + "type": "nmdc:ReadbasedAnalysis", + "ended_at_time": "2021-10-11T05:55:52+00:00" + } + ], + "study_set": [], + "field_research_site_set": [], + "collecting_biosamples_from_site_set": [], + "date_created": null, + "etl_software_version": null, + "pooling_set": [], + "read_based_analysis_activity_set": [ { "_id": { - "$oid": "649b0052ec087f6bbab34733" + "$oid": "61e71a12833bcf838a701ba9" }, "has_input": [ - "nmdc:0a1ebd847e3bb8f928ef491497f8355b", - "nmdc:69371e513bebd1069a0ed26cc2c914cb", - "nmdc:10d19849864ecdb722335200d0607bbe" + "nmdc:7f6b353300583c60d2d668880b4134cd" ], - "too_short_contig_num": 114372, "part_of": [ - "nmdc:mga0rw1351" + "nmdc:mga0d7pj22" ], - "binned_contig_num": 328, "git_url": "https://github.com/microbiomedata/metaG/releases/tag/0.1", "has_output": [ - "nmdc:d41d8cd98f00b204e9800998ecf8427e", - "nmdc:57fd559aaca7b976f3b38bb1a3ce362b", - "nmdc:43a900225e93216944b4eec3a01f7db7", - "nmdc:cad0e18a4d2c4067a2724f41e449cb86", - "nmdc:55577aa26faf185b3b3f4c78711e7715", - "nmdc:c484ee1e530a0c9b47069c0288110e47" + "nmdc:e316502f9e7a78c9db3996ef832aa9d7", + "nmdc:1ac2be77491e7d425da1d62f69f1508d", + "nmdc:de5b15fa9d3bdbc3abcc2475ee351323", + "nmdc:a9bbb74833404a2bf3bbd05e83a7a0ed", + "nmdc:c065784bed2b2495d512af93d05967de", + "nmdc:a34dbcbdebae0861e41c09e7b9a5d9f0", + "nmdc:b2122f5a910a1d4ae8a62956d1cd731c", + "nmdc:8a26d8496a70f4777be0e1237092e44c", + "nmdc:694b83f0b6f599948d4248dd48dd9ba9" ], - "was_informed_by": "gold:Gp0115671", - "input_contig_num": 120326, - "id": "nmdc:c61259a6fe99bc2482a8619c099e6cc2", + "was_informed_by": "gold:Gp0115670", + "id": "nmdc:f1cbe3cc181b2e689272b4223b68c15f", "execution_resource": "NERSC-Cori", - "name": "MAGs Analysis Activity for nmdc:mga0rw1351", - "mags_list": [ - { - "number_of_contig": 173, - "completeness": 26.29, - "bin_name": "bins.1", - "gene_count": 875, - "bin_quality": "LQ", - "gtdbtk_species": "", - "gtdbtk_order": "", - "num_16s": 0, - "gtdbtk_family": "", - "gtdbtk_domain": "", - "contamination": 0.18, - "gtdbtk_class": "", - "gtdbtk_phylum": "", - "num_5s": 1, - "num_23s": 1, - "gtdbtk_genus": "", - "num_t_rna": 14 + "name": "ReadBased Analysis Activity for nmdc:mga0d7pj22", + "started_at_time": "2021-10-11T02:28:43Z", + "type": "nmdc:ReadbasedAnalysis", + "ended_at_time": "2021-10-11T05:55:52+00:00" + } + ] + }, + { + "functional_annotation_agg": [], + "library_preparation_set": [], + "processed_sample_set": [], + "extraction_set": [], + "activity_set": [], + "biosample_set": [], + "data_object_set": [ + { + "name": "Gp0115674_Filtered Reads", + "description": "Filtered Reads for Gp0115674", + "data_object_type": "Filtered Sequencing Reads", + "url": "https://data.microbiomedata.org/data/nmdc:mga0cf0450/qa/nmdc_mga0cf0450_filtered.fastq.gz", + "md5_checksum": "538fd5695eb3decd48891e72acebb8ce", + "id": "nmdc:538fd5695eb3decd48891e72acebb8ce", + "file_size_bytes": 2126353222 + }, + { + "name": "Gp0115674_Filtered Stats", + "description": "Filtered Stats for Gp0115674", + "data_object_type": "QC Statistics", + "url": "https://data.microbiomedata.org/data/nmdc:mga0cf0450/qa/nmdc_mga0cf0450_filterStats.txt", + "md5_checksum": "dde2b1748e16380e63476430ee27083a", + "id": "nmdc:dde2b1748e16380e63476430ee27083a", + "file_size_bytes": 288 + }, + { + "name": "Gp0115674_Gottcha2 TSV report", + "description": "Gottcha2 TSV report for Gp0115674", + "url": "https://data.microbiomedata.org/data/nmdc:mga0cf0450/ReadbasedAnalysis/nmdc_mga0cf0450_gottcha2_report.tsv", + "md5_checksum": "7d6ec08ff0d080997fda7c7417f9c3d4", + "id": "nmdc:7d6ec08ff0d080997fda7c7417f9c3d4", + "file_size_bytes": 13768 + }, + { + "name": "Gp0115674_Gottcha2 full TSV report", + "description": "Gottcha2 full TSV report for Gp0115674", + "url": "https://data.microbiomedata.org/data/nmdc:mga0cf0450/ReadbasedAnalysis/nmdc_mga0cf0450_gottcha2_report_full.tsv", + "md5_checksum": "df0dfd58dc386f5e0ded0b65b4a88c58", + "id": "nmdc:df0dfd58dc386f5e0ded0b65b4a88c58", + "file_size_bytes": 1022858 + }, + { + "name": "Gp0115674_Gottcha2 Krona HTML report", + "description": "Gottcha2 Krona HTML report for Gp0115674", + "data_object_type": "GOTTCHA2 Krona Plot", + "url": "https://data.microbiomedata.org/data/nmdc:mga0cf0450/ReadbasedAnalysis/nmdc_mga0cf0450_gottcha2_krona.html", + "md5_checksum": "ce3f31985e0a99f97bd4751bc2469bcb", + "id": "nmdc:ce3f31985e0a99f97bd4751bc2469bcb", + "file_size_bytes": 269166 + }, + { + "name": "Gp0115674_Centrifuge classification TSV report", + "description": "Centrifuge classification TSV report for Gp0115674", + "data_object_type": "Centrifuge Taxonomic Classification", + "url": "https://data.microbiomedata.org/data/nmdc:mga0cf0450/ReadbasedAnalysis/nmdc_mga0cf0450_centrifuge_classification.tsv", + "md5_checksum": "f8740b1fadbc29aef50d32706c955199", + "id": "nmdc:f8740b1fadbc29aef50d32706c955199", + "file_size_bytes": 1904303690 + }, + { + "name": "Gp0115674_Centrifuge TSV report", + "description": "Centrifuge TSV report for Gp0115674", + "data_object_type": "Centrifuge Classification Report", + "url": "https://data.microbiomedata.org/data/nmdc:mga0cf0450/ReadbasedAnalysis/nmdc_mga0cf0450_centrifuge_report.tsv", + "md5_checksum": "80abfcc9b09476af4083b2af1760834f", + "id": "nmdc:80abfcc9b09476af4083b2af1760834f", + "file_size_bytes": 258748 + }, + { + "name": "Gp0115674_Centrifuge Krona HTML report", + "description": "Centrifuge Krona HTML report for Gp0115674", + "data_object_type": "Centrifuge Krona Plot", + "url": "https://data.microbiomedata.org/data/nmdc:mga0cf0450/ReadbasedAnalysis/nmdc_mga0cf0450_centrifuge_krona.html", + "md5_checksum": "f189624af50d8d62908f8ddd5f3451ad", + "id": "nmdc:f189624af50d8d62908f8ddd5f3451ad", + "file_size_bytes": 2335000 + }, + { + "name": "Gp0115674_Kraken classification TSV report", + "description": "Kraken classification TSV report for Gp0115674", + "data_object_type": "Kraken2 Taxonomic Classification", + "url": "https://data.microbiomedata.org/data/nmdc:mga0cf0450/ReadbasedAnalysis/nmdc_mga0cf0450_kraken2_classification.tsv", + "md5_checksum": "09302fbc8e30758a95fac09ee5cfd449", + "id": "nmdc:09302fbc8e30758a95fac09ee5cfd449", + "file_size_bytes": 1574286150 + }, + { + "name": "Gp0115674_Kraken2 TSV report", + "description": "Kraken2 TSV report for Gp0115674", + "data_object_type": "Kraken2 Classification Report", + "url": "https://data.microbiomedata.org/data/nmdc:mga0cf0450/ReadbasedAnalysis/nmdc_mga0cf0450_kraken2_report.tsv", + "md5_checksum": "e44f717fc6f3458c17b4f5129a5e7920", + "id": "nmdc:e44f717fc6f3458c17b4f5129a5e7920", + "file_size_bytes": 671800 + }, + { + "name": "Gp0115674_Kraken2 Krona HTML report", + "description": "Kraken2 Krona HTML report for Gp0115674", + "data_object_type": "Kraken2 Krona Plot", + "url": "https://data.microbiomedata.org/data/nmdc:mga0cf0450/ReadbasedAnalysis/nmdc_mga0cf0450_kraken2_krona.html", + "md5_checksum": "19eb52a96c1dedc9036ec9a0aaeda079", + "id": "nmdc:19eb52a96c1dedc9036ec9a0aaeda079", + "file_size_bytes": 4070548 + }, + { + "name": "Gp0115674_Gottcha2 TSV report", + "description": "Gottcha2 TSV report for Gp0115674", + "url": "https://data.microbiomedata.org/data/nmdc:mga0cf0450/ReadbasedAnalysis/nmdc_mga0cf0450_gottcha2_report.tsv", + "md5_checksum": "7d6ec08ff0d080997fda7c7417f9c3d4", + "id": "nmdc:7d6ec08ff0d080997fda7c7417f9c3d4", + "file_size_bytes": 13768 + }, + { + "name": "Gp0115674_Gottcha2 full TSV report", + "description": "Gottcha2 full TSV report for Gp0115674", + "url": "https://data.microbiomedata.org/data/nmdc:mga0cf0450/ReadbasedAnalysis/nmdc_mga0cf0450_gottcha2_report_full.tsv", + "md5_checksum": "df0dfd58dc386f5e0ded0b65b4a88c58", + "id": "nmdc:df0dfd58dc386f5e0ded0b65b4a88c58", + "file_size_bytes": 1022858 + }, + { + "name": "Gp0115674_Gottcha2 Krona HTML report", + "description": "Gottcha2 Krona HTML report for Gp0115674", + "data_object_type": "GOTTCHA2 Krona Plot", + "url": "https://data.microbiomedata.org/data/nmdc:mga0cf0450/ReadbasedAnalysis/nmdc_mga0cf0450_gottcha2_krona.html", + "md5_checksum": "ce3f31985e0a99f97bd4751bc2469bcb", + "id": "nmdc:ce3f31985e0a99f97bd4751bc2469bcb", + "file_size_bytes": 269166 + }, + { + "name": "Gp0115674_Centrifuge classification TSV report", + "description": "Centrifuge classification TSV report for Gp0115674", + "data_object_type": "Centrifuge Taxonomic Classification", + "url": "https://data.microbiomedata.org/data/nmdc:mga0cf0450/ReadbasedAnalysis/nmdc_mga0cf0450_centrifuge_classification.tsv", + "md5_checksum": "f8740b1fadbc29aef50d32706c955199", + "id": "nmdc:f8740b1fadbc29aef50d32706c955199", + "file_size_bytes": 1904303690 + }, + { + "name": "Gp0115674_Centrifuge TSV report", + "description": "Centrifuge TSV report for Gp0115674", + "data_object_type": "Centrifuge Classification Report", + "url": "https://data.microbiomedata.org/data/nmdc:mga0cf0450/ReadbasedAnalysis/nmdc_mga0cf0450_centrifuge_report.tsv", + "md5_checksum": "80abfcc9b09476af4083b2af1760834f", + "id": "nmdc:80abfcc9b09476af4083b2af1760834f", + "file_size_bytes": 258748 + }, + { + "name": "Gp0115674_Centrifuge Krona HTML report", + "description": "Centrifuge Krona HTML report for Gp0115674", + "data_object_type": "Centrifuge Krona Plot", + "url": "https://data.microbiomedata.org/data/nmdc:mga0cf0450/ReadbasedAnalysis/nmdc_mga0cf0450_centrifuge_krona.html", + "md5_checksum": "f189624af50d8d62908f8ddd5f3451ad", + "id": "nmdc:f189624af50d8d62908f8ddd5f3451ad", + "file_size_bytes": 2335000 + }, + { + "name": "Gp0115674_Kraken classification TSV report", + "description": "Kraken classification TSV report for Gp0115674", + "data_object_type": "Kraken2 Taxonomic Classification", + "url": "https://data.microbiomedata.org/data/nmdc:mga0cf0450/ReadbasedAnalysis/nmdc_mga0cf0450_kraken2_classification.tsv", + "md5_checksum": "09302fbc8e30758a95fac09ee5cfd449", + "id": "nmdc:09302fbc8e30758a95fac09ee5cfd449", + "file_size_bytes": 1574286150 + }, + { + "name": "Gp0115674_Kraken2 TSV report", + "description": "Kraken2 TSV report for Gp0115674", + "data_object_type": "Kraken2 Classification Report", + "url": "https://data.microbiomedata.org/data/nmdc:mga0cf0450/ReadbasedAnalysis/nmdc_mga0cf0450_kraken2_report.tsv", + "md5_checksum": "e44f717fc6f3458c17b4f5129a5e7920", + "id": "nmdc:e44f717fc6f3458c17b4f5129a5e7920", + "file_size_bytes": 671800 + }, + { + "name": "Gp0115674_Kraken2 Krona HTML report", + "description": "Kraken2 Krona HTML report for Gp0115674", + "data_object_type": "Kraken2 Krona Plot", + "url": "https://data.microbiomedata.org/data/nmdc:mga0cf0450/ReadbasedAnalysis/nmdc_mga0cf0450_kraken2_krona.html", + "md5_checksum": "19eb52a96c1dedc9036ec9a0aaeda079", + "id": "nmdc:19eb52a96c1dedc9036ec9a0aaeda079", + "file_size_bytes": 4070548 + }, + { + "name": "Gp0115674_Assembled contigs fasta", + "description": "Assembled contigs fasta for Gp0115674", + "data_object_type": "Assembly Contigs", + "url": "https://data.microbiomedata.org/data/nmdc:mga0cf0450/assembly/nmdc_mga0cf0450_contigs.fna", + "md5_checksum": "ed2e4b90c8c2947486cc5c3c5828f949", + "id": "nmdc:ed2e4b90c8c2947486cc5c3c5828f949", + "file_size_bytes": 78686505 + }, + { + "name": "Gp0115674_Assembled scaffold fasta", + "description": "Assembled scaffold fasta for Gp0115674", + "data_object_type": "Assembly Scaffolds", + "url": "https://data.microbiomedata.org/data/nmdc:mga0cf0450/assembly/nmdc_mga0cf0450_scaffolds.fna", + "md5_checksum": "e8fa9ae5e04a2969d220d81f1fb752f2", + "id": "nmdc:e8fa9ae5e04a2969d220d81f1fb752f2", + "file_size_bytes": 78267725 + }, + { + "name": "Gp0115674_Metagenome Contig Coverage Stats", + "description": "Metagenome Contig Coverage Stats for Gp0115674", + "url": "https://data.microbiomedata.org/data/nmdc:mga0cf0450/assembly/nmdc_mga0cf0450_covstats.txt", + "md5_checksum": "5f308ea3cb43a331cda55ac9f91c6a53", + "id": "nmdc:5f308ea3cb43a331cda55ac9f91c6a53", + "file_size_bytes": 10980044 + }, + { + "name": "Gp0115674_Assembled AGP file", + "description": "Assembled AGP file for Gp0115674", + "data_object_type": "Assembly AGP", + "url": "https://data.microbiomedata.org/data/nmdc:mga0cf0450/assembly/nmdc_mga0cf0450_assembly.agp", + "md5_checksum": "604ed99b7c622082ddf174bb11d2787f", + "id": "nmdc:604ed99b7c622082ddf174bb11d2787f", + "file_size_bytes": 10249514 + }, + { + "name": "Gp0115674_Metagenome Alignment BAM file", + "description": "Metagenome Alignment BAM file for Gp0115674", + "data_object_type": "Assembly Coverage BAM", + "url": "https://data.microbiomedata.org/data/nmdc:mga0cf0450/assembly/nmdc_mga0cf0450_pairedMapped_sorted.bam", + "md5_checksum": "a0263d8b11653306a05f598395ca603a", + "id": "nmdc:a0263d8b11653306a05f598395ca603a", + "file_size_bytes": 2304306876 + }, + { + "name": "Gp0115674_Protein FAA", + "description": "Protein FAA for Gp0115674", + "data_object_type": "Annotation Amino Acid FASTA", + "url": "https://data.microbiomedata.org/data/nmdc:mga0cf0450/annotation/nmdc_mga0cf0450_proteins.faa", + "md5_checksum": "9ae7cb8ba4bee2ce9a46c963d00ba6ba", + "id": "nmdc:9ae7cb8ba4bee2ce9a46c963d00ba6ba", + "file_size_bytes": 43650605 + }, + { + "name": "Gp0115674_Structural annotation GFF file", + "description": "Structural annotation GFF file for Gp0115674", + "data_object_type": "Structural Annotation GFF", + "url": "https://data.microbiomedata.org/data/nmdc:mga0cf0450/annotation/nmdc_mga0cf0450_structural_annotation.gff", + "md5_checksum": "ce90743969776fd717671aeb21d37379", + "id": "nmdc:ce90743969776fd717671aeb21d37379", + "file_size_bytes": 2529 + }, + { + "name": "Gp0115674_Functional annotation GFF file", + "description": "Functional annotation GFF file for Gp0115674", + "data_object_type": "Functional Annotation GFF", + "url": "https://data.microbiomedata.org/data/nmdc:mga0cf0450/annotation/nmdc_mga0cf0450_functional_annotation.gff", + "md5_checksum": "1a4f5145ccf0838811fe570a93549fdf", + "id": "nmdc:1a4f5145ccf0838811fe570a93549fdf", + "file_size_bytes": 47604509 + }, + { + "name": "Gp0115674_KO TSV file", + "description": "KO TSV file for Gp0115674", + "data_object_type": "Annotation KEGG Orthology", + "url": "https://data.microbiomedata.org/data/nmdc:mga0cf0450/annotation/nmdc_mga0cf0450_ko.tsv", + "md5_checksum": "662dae8ba0ea9dda93637c2ea60c1f4e", + "id": "nmdc:662dae8ba0ea9dda93637c2ea60c1f4e", + "file_size_bytes": 6436472 + }, + { + "name": "Gp0115674_EC TSV file", + "description": "EC TSV file for Gp0115674", + "data_object_type": "Annotation Enzyme Commission", + "url": "https://data.microbiomedata.org/data/nmdc:mga0cf0450/annotation/nmdc_mga0cf0450_ec.tsv", + "md5_checksum": "b5db445feb8edb47022c2a0ee86d828d", + "id": "nmdc:b5db445feb8edb47022c2a0ee86d828d", + "file_size_bytes": 4111562 + }, + { + "name": "Gp0115674_COG GFF file", + "description": "COG GFF file for Gp0115674", + "url": "https://data.microbiomedata.org/data/nmdc:mga0cf0450/annotation/nmdc_mga0cf0450_cog.gff", + "md5_checksum": "157d24f6f63091fbe9ef98cc3090975d", + "id": "nmdc:157d24f6f63091fbe9ef98cc3090975d", + "file_size_bytes": 27373015 + }, + { + "name": "Gp0115674_PFAM GFF file", + "description": "PFAM GFF file for Gp0115674", + "url": "https://data.microbiomedata.org/data/nmdc:mga0cf0450/annotation/nmdc_mga0cf0450_pfam.gff", + "md5_checksum": "afa217feffb94965aa1839041305237e", + "id": "nmdc:afa217feffb94965aa1839041305237e", + "file_size_bytes": 22153817 + }, + { + "name": "Gp0115674_TigrFam GFF file", + "description": "TigrFam GFF file for Gp0115674", + "url": "https://data.microbiomedata.org/data/nmdc:mga0cf0450/annotation/nmdc_mga0cf0450_tigrfam.gff", + "md5_checksum": "4a00e0c0bc479b8e6f1139c8de3149d5", + "id": "nmdc:4a00e0c0bc479b8e6f1139c8de3149d5", + "file_size_bytes": 2995281 + }, + { + "name": "Gp0115674_SMART GFF file", + "description": "SMART GFF file for Gp0115674", + "url": "https://data.microbiomedata.org/data/nmdc:mga0cf0450/annotation/nmdc_mga0cf0450_smart.gff", + "md5_checksum": "ffcd280a63fab7bcfa5422f34070d87f", + "id": "nmdc:ffcd280a63fab7bcfa5422f34070d87f", + "file_size_bytes": 6393135 + }, + { + "name": "Gp0115674_SuperFam GFF file", + "description": "SuperFam GFF file for Gp0115674", + "url": "https://data.microbiomedata.org/data/nmdc:mga0cf0450/annotation/nmdc_mga0cf0450_supfam.gff", + "md5_checksum": "9fb334fc9409e6db51aaa1f960b08f4b", + "id": "nmdc:9fb334fc9409e6db51aaa1f960b08f4b", + "file_size_bytes": 35023258 + }, + { + "name": "Gp0115674_Cath FunFam GFF file", + "description": "Cath FunFam GFF file for Gp0115674", + "url": "https://data.microbiomedata.org/data/nmdc:mga0cf0450/annotation/nmdc_mga0cf0450_cath_funfam.gff", + "md5_checksum": "d5676c01e67f71559a382850f42c3493", + "id": "nmdc:d5676c01e67f71559a382850f42c3493", + "file_size_bytes": 27788764 + }, + { + "name": "Gp0115674_KO_EC GFF file", + "description": "KO_EC GFF file for Gp0115674", + "url": "https://data.microbiomedata.org/data/nmdc:mga0cf0450/annotation/nmdc_mga0cf0450_ko_ec.gff", + "md5_checksum": "121fab4d5bff0dcbb9d1849738a72347", + "id": "nmdc:121fab4d5bff0dcbb9d1849738a72347", + "file_size_bytes": 20542466 + }, + { + "name": "gold:Gp0452679_metabat2 bin checkm quality assessment result", + "description": "metabat2 bin checkm quality assessment result for gold:Gp0452679", + "data_object_type": "CheckM Statistics", + "url": "https://data.microbiomedata.org/data/nmdc:mga0fsjy84/MAGs/nmdc_mga0fsjy84_checkm_qa.out", + "md5_checksum": "d41d8cd98f00b204e9800998ecf8427e", + "id": "nmdc:d41d8cd98f00b204e9800998ecf8427e", + "file_size_bytes": 0 + }, + { + "name": "Gp0115674_tooShort (< 3kb) filtered contigs fasta file by metaBat2", + "description": "tooShort (< 3kb) filtered contigs fasta file by metaBat2 for Gp0115674", + "url": "https://data.microbiomedata.org/data/nmdc:mga0cf0450/MAGs/nmdc_mga0cf0450_bins.tooShort.fa", + "md5_checksum": "6a03eb0156b154ea68ffff9b473e73a5", + "id": "nmdc:6a03eb0156b154ea68ffff9b473e73a5", + "file_size_bytes": 56345518 + }, + { + "name": "Gp0115674_unbinned fasta file from metabat2", + "description": "unbinned fasta file from metabat2 for Gp0115674", + "url": "https://data.microbiomedata.org/data/nmdc:mga0cf0450/MAGs/nmdc_mga0cf0450_bins.unbinned.fa", + "md5_checksum": "33a477987509b67fcfa5096d20c7c40b", + "id": "nmdc:33a477987509b67fcfa5096d20c7c40b", + "file_size_bytes": 10836032 + }, + { + "name": "Gp0115674_metabat2 bin checkm quality assessment result", + "description": "metabat2 bin checkm quality assessment result for Gp0115674", + "data_object_type": "CheckM Statistics", + "url": "https://data.microbiomedata.org/data/nmdc:mga0cf0450/MAGs/nmdc_mga0cf0450_checkm_qa.out", + "md5_checksum": "314c92c3a9458e1aa304e3c474209acf", + "id": "nmdc:314c92c3a9458e1aa304e3c474209acf", + "file_size_bytes": 1360 + }, + { + "name": "Gp0115674_high-quality and medium-quality bins", + "description": "high-quality and medium-quality bins for Gp0115674", + "data_object_type": "Metagenome Bins", + "url": "https://data.microbiomedata.org/data/nmdc:mga0cf0450/MAGs/nmdc_mga0cf0450_hqmq_bin.zip", + "md5_checksum": "a4f9093efaf84855cab58880b262afd5", + "id": "nmdc:a4f9093efaf84855cab58880b262afd5", + "file_size_bytes": 2974639 + }, + { + "name": "Gp0115674_metabat2 bins", + "description": "metabat2 bins for Gp0115674", + "url": "https://data.microbiomedata.org/data/nmdc:mga0cf0450/MAGs/nmdc_mga0cf0450_metabat_bin.zip", + "md5_checksum": "1a29af6f30c21f38b25e4553605f50ef", + "id": "nmdc:1a29af6f30c21f38b25e4553605f50ef", + "file_size_bytes": 469326 + } + ], + "dissolving_activity_set": [], + "functional_annotation_set": [], + "genome_feature_set": [], + "mags_activity_set": [ + { + "_id": { + "$oid": "649b0052ec087f6bbab34732" + }, + "has_input": [ + "nmdc:ed2e4b90c8c2947486cc5c3c5828f949", + "nmdc:a0263d8b11653306a05f598395ca603a", + "nmdc:1a4f5145ccf0838811fe570a93549fdf" + ], + "too_short_contig_num": 131855, + "part_of": [ + "nmdc:mga0cf0450" + ], + "binned_contig_num": 1119, + "git_url": "https://github.com/microbiomedata/metaG/releases/tag/0.1", + "has_output": [ + "nmdc:d41d8cd98f00b204e9800998ecf8427e", + "nmdc:6a03eb0156b154ea68ffff9b473e73a5", + "nmdc:33a477987509b67fcfa5096d20c7c40b", + "nmdc:314c92c3a9458e1aa304e3c474209acf", + "nmdc:a4f9093efaf84855cab58880b262afd5", + "nmdc:1a29af6f30c21f38b25e4553605f50ef" + ], + "was_informed_by": "gold:Gp0115674", + "input_contig_num": 139324, + "id": "nmdc:954288cfef2de46d4b895fac3811a7d0", + "execution_resource": "NERSC-Cori", + "name": "MAGs Analysis Activity for nmdc:mga0cf0450", + "mags_list": [ + { + "number_of_contig": 198, + "completeness": 100.0, + "bin_name": "bins.1", + "gene_count": 5608, + "bin_quality": "HQ", + "gtdbtk_species": "", + "gtdbtk_order": "Burkholderiales", + "num_16s": 1, + "gtdbtk_family": "Burkholderiaceae", + "gtdbtk_domain": "Bacteria", + "contamination": 1.29, + "gtdbtk_class": "Gammaproteobacteria", + "gtdbtk_phylum": "Proteobacteria", + "num_5s": 1, + "num_23s": 1, + "gtdbtk_genus": "Rhizobacter", + "num_t_rna": 46 }, { - "number_of_contig": 155, - "completeness": 24.1, + "number_of_contig": 353, + "completeness": 88.62, "bin_name": "bins.2", - "gene_count": 806, + "gene_count": 3146, + "bin_quality": "MQ", + "gtdbtk_species": "", + "gtdbtk_order": "Sphingomonadales", + "num_16s": 0, + "gtdbtk_family": "Sphingomonadaceae", + "gtdbtk_domain": "Bacteria", + "contamination": 2.0, + "gtdbtk_class": "Alphaproteobacteria", + "gtdbtk_phylum": "Proteobacteria", + "num_5s": 0, + "num_23s": 0, + "gtdbtk_genus": "Novosphingobium", + "num_t_rna": 40 + }, + { + "number_of_contig": 273, + "completeness": 51.61, + "bin_name": "bins.3", + "gene_count": 1397, + "bin_quality": "MQ", + "gtdbtk_species": "", + "gtdbtk_order": "Pseudomonadales", + "num_16s": 0, + "gtdbtk_family": "UBA3067", + "gtdbtk_domain": "Bacteria", + "contamination": 0.8, + "gtdbtk_class": "Gammaproteobacteria", + "gtdbtk_phylum": "Proteobacteria", + "num_5s": 0, + "num_23s": 0, + "gtdbtk_genus": "UBA3067", + "num_t_rna": 17 + }, + { + "number_of_contig": 295, + "completeness": 49.14, + "bin_name": "bins.4", + "gene_count": 1695, "bin_quality": "LQ", "gtdbtk_species": "", "gtdbtk_order": "", @@ -18003,671 +17196,660 @@ "num_5s": 0, "num_23s": 0, "gtdbtk_genus": "", - "num_t_rna": 9 + "num_t_rna": 16 } ], - "unbinned_contig_num": 5626, - "started_at_time": "2021-10-11T02:27:50Z", + "unbinned_contig_num": 6350, + "started_at_time": "2021-10-11T02:28:52Z", "type": "nmdc:MAGsAnalysisActivity", - "ended_at_time": "2021-10-11T03:39:05+00:00", - "output_data_objects": [ - { - "name": "gold:Gp0452679_metabat2 bin checkm quality assessment result", - "description": "metabat2 bin checkm quality assessment result for gold:Gp0452679", - "data_object_type": "CheckM Statistics", - "url": "https://data.microbiomedata.org/data/nmdc:mga0fsjy84/MAGs/nmdc_mga0fsjy84_checkm_qa.out", - "md5_checksum": "d41d8cd98f00b204e9800998ecf8427e", - "id": "nmdc:d41d8cd98f00b204e9800998ecf8427e", - "file_size_bytes": 0 - }, - { - "name": "Gp0115671_tooShort (< 3kb) filtered contigs fasta file by metaBat2", - "description": "tooShort (< 3kb) filtered contigs fasta file by metaBat2 for Gp0115671", - "url": "https://data.microbiomedata.org/data/nmdc:mga0rw1351/MAGs/nmdc_mga0rw1351_bins.tooShort.fa", - "md5_checksum": "57fd559aaca7b976f3b38bb1a3ce362b", - "id": "nmdc:57fd559aaca7b976f3b38bb1a3ce362b", - "file_size_bytes": 48167943 - }, - { - "name": "Gp0115671_unbinned fasta file from metabat2", - "description": "unbinned fasta file from metabat2 for Gp0115671", - "url": "https://data.microbiomedata.org/data/nmdc:mga0rw1351/MAGs/nmdc_mga0rw1351_bins.unbinned.fa", - "md5_checksum": "43a900225e93216944b4eec3a01f7db7", - "id": "nmdc:43a900225e93216944b4eec3a01f7db7", - "file_size_bytes": 9124730 - }, - { - "name": "Gp0115671_metabat2 bin checkm quality assessment result", - "description": "metabat2 bin checkm quality assessment result for Gp0115671", - "data_object_type": "CheckM Statistics", - "url": "https://data.microbiomedata.org/data/nmdc:mga0rw1351/MAGs/nmdc_mga0rw1351_checkm_qa.out", - "md5_checksum": "cad0e18a4d2c4067a2724f41e449cb86", - "id": "nmdc:cad0e18a4d2c4067a2724f41e449cb86", - "file_size_bytes": 1014 - }, - { - "name": "Gp0115671_high-quality and medium-quality bins", - "description": "high-quality and medium-quality bins for Gp0115671", - "data_object_type": "Metagenome Bins", - "url": "https://data.microbiomedata.org/data/nmdc:mga0rw1351/MAGs/nmdc_mga0rw1351_hqmq_bin.zip", - "md5_checksum": "55577aa26faf185b3b3f4c78711e7715", - "id": "nmdc:55577aa26faf185b3b3f4c78711e7715", - "file_size_bytes": 182 - }, - { - "name": "Gp0115671_metabat2 bins", - "description": "metabat2 bins for Gp0115671", - "url": "https://data.microbiomedata.org/data/nmdc:mga0rw1351/MAGs/nmdc_mga0rw1351_metabat_bin.zip", - "md5_checksum": "c484ee1e530a0c9b47069c0288110e47", - "id": "nmdc:c484ee1e530a0c9b47069c0288110e47", - "file_size_bytes": 444082 - } - ] + "ended_at_time": "2021-10-11T05:21:41+00:00" } - ] - }, - { - "_id": { - "$oid": "649b009773e8249959349b4e" - }, - "id": "nmdc:omprc-11-7ey2jr63", - "name": "Sand microcosm microbial communities from a hyporheic zone in Columbia River, Washington, USA - GW-RW T4_23-Sept-14", - "description": "Sterilized sand packs were incubated back in the ground and collected at time point T4.", - "has_input": [ - "nmdc:bsm-11-pkgtg048" - ], - "has_output": [ - "jgi:55d740220d8785342fcf7e35" - ], - "part_of": [ - "nmdc:sty-11-aygzgv51" - ], - "add_date": "2015-05-28", - "mod_date": "2021-06-15", - "ncbi_project_name": "Sand microcosm microbial communities from a hyporheic zone in Columbia River, Washington, USA - GW-RW T4_23-Sept-14", - "omics_type": { - "has_raw_value": "Metagenome" - }, - "principal_investigator": { - "has_raw_value": "James Stegen" - }, - "processing_institution": "JGI", - "type": "nmdc:OmicsProcessing", - "gold_sequencing_project_identifiers": [ - "gold:Gp0115676" - ], - "downstream_workflow_activity_records": [ + ], + "material_sample_set": [], + "material_sampling_activity_set": [], + "metabolomics_analysis_activity_set": [], + "metagenome_annotation_activity_set": [ { "_id": { - "$oid": "649b009d6bdd4fd20273c883" + "$oid": "649b005bbf2caae0415ef9d4" }, "has_input": [ - "nmdc:5672111f6f33b8aff5f65e69ebb41c5e" + "nmdc:ed2e4b90c8c2947486cc5c3c5828f949" ], "part_of": [ - "nmdc:mga0w3a067" + "nmdc:mga0cf0450" ], "git_url": "https://github.com/microbiomedata/metaG/releases/tag/0.1", "has_output": [ - "nmdc:e777bc518da4bbe0ab7b2959f00e2b08", - "nmdc:79815495339053b7935b55dbde02b2ff" + "nmdc:9ae7cb8ba4bee2ce9a46c963d00ba6ba", + "nmdc:ce90743969776fd717671aeb21d37379", + "nmdc:1a4f5145ccf0838811fe570a93549fdf", + "nmdc:662dae8ba0ea9dda93637c2ea60c1f4e", + "nmdc:b5db445feb8edb47022c2a0ee86d828d", + "nmdc:157d24f6f63091fbe9ef98cc3090975d", + "nmdc:afa217feffb94965aa1839041305237e", + "nmdc:4a00e0c0bc479b8e6f1139c8de3149d5", + "nmdc:ffcd280a63fab7bcfa5422f34070d87f", + "nmdc:9fb334fc9409e6db51aaa1f960b08f4b", + "nmdc:d5676c01e67f71559a382850f42c3493", + "nmdc:121fab4d5bff0dcbb9d1849738a72347" ], - "was_informed_by": "gold:Gp0115676", - "input_read_count": 39069214, - "output_read_bases": 5550744725, - "id": "nmdc:b2025bb4a2c7616273d414e4093f63ca", + "was_informed_by": "gold:Gp0115674", + "id": "nmdc:954288cfef2de46d4b895fac3811a7d0", "execution_resource": "NERSC-Cori", - "input_read_bases": 5899451314, - "name": "Read QC Activity for nmdc:mga0w3a067", - "output_read_count": 37037822, - "started_at_time": "2021-10-11T02:26:37Z", - "type": "nmdc:ReadQCAnalysisActivity", - "ended_at_time": "2021-10-11T05:40:05+00:00", - "output_data_objects": [ - { - "name": "Gp0115676_Filtered Reads", - "description": "Filtered Reads for Gp0115676", - "data_object_type": "Filtered Sequencing Reads", - "url": "https://data.microbiomedata.org/data/nmdc:mga0w3a067/qa/nmdc_mga0w3a067_filtered.fastq.gz", - "md5_checksum": "e777bc518da4bbe0ab7b2959f00e2b08", - "id": "nmdc:e777bc518da4bbe0ab7b2959f00e2b08", - "file_size_bytes": 3113249122 - }, - { - "name": "Gp0115676_Filtered Stats", - "description": "Filtered Stats for Gp0115676", - "data_object_type": "QC Statistics", - "url": "https://data.microbiomedata.org/data/nmdc:mga0w3a067/qa/nmdc_mga0w3a067_filterStats.txt", - "md5_checksum": "79815495339053b7935b55dbde02b2ff", - "id": "nmdc:79815495339053b7935b55dbde02b2ff", - "file_size_bytes": 292 - } - ] - }, + "name": "Annotation Activity for nmdc:mga0cf0450", + "started_at_time": "2021-10-11T02:28:52Z", + "type": "nmdc:MetagenomeAnnotationActivity", + "ended_at_time": "2021-10-11T05:21:41+00:00" + } + ], + "metagenome_assembly_set": [ { "_id": { - "$oid": "649b009bff710ae353f8cf4c" + "$oid": "649b005f2ca5ee4adb139fb6" }, "has_input": [ - "nmdc:e777bc518da4bbe0ab7b2959f00e2b08" + "nmdc:538fd5695eb3decd48891e72acebb8ce" + ], + "part_of": [ + "nmdc:mga0cf0450" ], + "ctg_logsum": 272042, + "scaf_logsum": 272657, + "gap_pct": 0.00172, "git_url": "https://github.com/microbiomedata/metaG/releases/tag/0.1", "has_output": [ - "nmdc:13343b2533892633bcc3655a1ebe788f", - "nmdc:87b36326bee32ad5642e3ffc2f5ac7db", - "nmdc:95a2de8be672fd50bf542215194dc4d4", - "nmdc:6cd0210b345d6908ad8ab683b1a11572", - "nmdc:5049a65d2a42d73c5d47373e990b70f7", - "nmdc:6e1e28773094884d35c04072309e285a", - "nmdc:7fa3aba8b1e31ccc00cf56f04f5605ac", - "nmdc:3b3abe337d79d09e9c7ba0a40045ad93", - "nmdc:e8602b20781cdbbd84e6dcb92c048a6b" + "nmdc:ed2e4b90c8c2947486cc5c3c5828f949", + "nmdc:e8fa9ae5e04a2969d220d81f1fb752f2", + "nmdc:5f308ea3cb43a331cda55ac9f91c6a53", + "nmdc:604ed99b7c622082ddf174bb11d2787f", + "nmdc:a0263d8b11653306a05f598395ca603a" ], - "was_informed_by": "gold:Gp0115676", - "id": "nmdc:b2025bb4a2c7616273d414e4093f63ca", + "asm_score": 18.19, + "was_informed_by": "gold:Gp0115674", + "ctg_powsum": 36133, + "scaf_max": 176505, + "id": "nmdc:954288cfef2de46d4b895fac3811a7d0", + "scaf_powsum": 36239, "execution_resource": "NERSC-Cori", - "name": "ReadBased Analysis Activity for nmdc:mga0w3a067", - "started_at_time": "2021-10-11T02:26:37Z", - "type": "nmdc:ReadbasedAnalysis", - "ended_at_time": "2021-10-11T05:40:05+00:00", - "output_data_objects": [ - { - "name": "Gp0115676_Gottcha2 TSV report", - "description": "Gottcha2 TSV report for Gp0115676", - "url": "https://data.microbiomedata.org/data/nmdc:mga0w3a067/ReadbasedAnalysis/nmdc_mga0w3a067_gottcha2_report.tsv", - "md5_checksum": "13343b2533892633bcc3655a1ebe788f", - "id": "nmdc:13343b2533892633bcc3655a1ebe788f", - "file_size_bytes": 13659 - }, - { - "name": "Gp0115676_Gottcha2 full TSV report", - "description": "Gottcha2 full TSV report for Gp0115676", - "url": "https://data.microbiomedata.org/data/nmdc:mga0w3a067/ReadbasedAnalysis/nmdc_mga0w3a067_gottcha2_report_full.tsv", - "md5_checksum": "87b36326bee32ad5642e3ffc2f5ac7db", - "id": "nmdc:87b36326bee32ad5642e3ffc2f5ac7db", - "file_size_bytes": 1168924 - }, - { - "name": "Gp0115676_Gottcha2 Krona HTML report", - "description": "Gottcha2 Krona HTML report for Gp0115676", - "data_object_type": "GOTTCHA2 Krona Plot", - "url": "https://data.microbiomedata.org/data/nmdc:mga0w3a067/ReadbasedAnalysis/nmdc_mga0w3a067_gottcha2_krona.html", - "md5_checksum": "95a2de8be672fd50bf542215194dc4d4", - "id": "nmdc:95a2de8be672fd50bf542215194dc4d4", - "file_size_bytes": 267660 - }, - { - "name": "Gp0115676_Centrifuge classification TSV report", - "description": "Centrifuge classification TSV report for Gp0115676", - "data_object_type": "Centrifuge Taxonomic Classification", - "url": "https://data.microbiomedata.org/data/nmdc:mga0w3a067/ReadbasedAnalysis/nmdc_mga0w3a067_centrifuge_classification.tsv", - "md5_checksum": "6cd0210b345d6908ad8ab683b1a11572", - "id": "nmdc:6cd0210b345d6908ad8ab683b1a11572", - "file_size_bytes": 2721808152 - }, - { - "name": "Gp0115676_Centrifuge TSV report", - "description": "Centrifuge TSV report for Gp0115676", - "data_object_type": "Centrifuge Classification Report", - "url": "https://data.microbiomedata.org/data/nmdc:mga0w3a067/ReadbasedAnalysis/nmdc_mga0w3a067_centrifuge_report.tsv", - "md5_checksum": "5049a65d2a42d73c5d47373e990b70f7", - "id": "nmdc:5049a65d2a42d73c5d47373e990b70f7", - "file_size_bytes": 263207 - }, - { - "name": "Gp0115676_Centrifuge Krona HTML report", - "description": "Centrifuge Krona HTML report for Gp0115676", - "data_object_type": "Centrifuge Krona Plot", - "url": "https://data.microbiomedata.org/data/nmdc:mga0w3a067/ReadbasedAnalysis/nmdc_mga0w3a067_centrifuge_krona.html", - "md5_checksum": "6e1e28773094884d35c04072309e285a", - "id": "nmdc:6e1e28773094884d35c04072309e285a", - "file_size_bytes": 2347912 - }, - { - "name": "Gp0115676_Kraken classification TSV report", - "description": "Kraken classification TSV report for Gp0115676", - "data_object_type": "Kraken2 Taxonomic Classification", - "url": "https://data.microbiomedata.org/data/nmdc:mga0w3a067/ReadbasedAnalysis/nmdc_mga0w3a067_kraken2_classification.tsv", - "md5_checksum": "7fa3aba8b1e31ccc00cf56f04f5605ac", - "id": "nmdc:7fa3aba8b1e31ccc00cf56f04f5605ac", - "file_size_bytes": 2224468607 - }, - { - "name": "Gp0115676_Kraken2 TSV report", - "description": "Kraken2 TSV report for Gp0115676", - "data_object_type": "Kraken2 Classification Report", - "url": "https://data.microbiomedata.org/data/nmdc:mga0w3a067/ReadbasedAnalysis/nmdc_mga0w3a067_kraken2_report.tsv", - "md5_checksum": "3b3abe337d79d09e9c7ba0a40045ad93", - "id": "nmdc:3b3abe337d79d09e9c7ba0a40045ad93", - "file_size_bytes": 701128 - }, - { - "name": "Gp0115676_Kraken2 Krona HTML report", - "description": "Kraken2 Krona HTML report for Gp0115676", - "data_object_type": "Kraken2 Krona Plot", - "url": "https://data.microbiomedata.org/data/nmdc:mga0w3a067/ReadbasedAnalysis/nmdc_mga0w3a067_kraken2_krona.html", - "md5_checksum": "e8602b20781cdbbd84e6dcb92c048a6b", - "id": "nmdc:e8602b20781cdbbd84e6dcb92c048a6b", - "file_size_bytes": 4217185 - } + "contigs": 139326, + "name": "Assembly Activity for nmdc:mga0cf0450", + "ctg_max": 176505, + "gc_std": 0.12397, + "contig_bp": 73195425, + "gc_avg": 0.56886, + "started_at_time": "2021-10-11T02:28:52Z", + "scaf_bp": 73196685, + "type": "nmdc:MetagenomeAssembly", + "scaffolds": 139236, + "ended_at_time": "2021-10-11T05:21:41+00:00", + "ctg_l50": 481, + "ctg_l90": 290, + "ctg_n50": 30768, + "ctg_n90": 115008, + "scaf_l50": 482, + "scaf_l90": 290, + "scaf_n50": 30582, + "scaf_n90": 114932, + "scaf_l_gt50k": 2506146, + "scaf_n_gt50k": 32, + "scaf_pct_gt50k": 3.4238515 + } + ], + "metagenome_sequencing_activity_set": [], + "metaproteomics_analysis_activity_set": [], + "metatranscriptome_activity_set": [], + "nom_analysis_activity_set": [], + "omics_processing_set": [ + { + "_id": { + "$oid": "649b009773e8249959349b4b" + }, + "id": "nmdc:omprc-11-1avd3d16", + "name": "Sand microcosm microbial communities from a hyporheic zone in Columbia River, Washington, USA - GW-RW T4_21-May-14", + "description": "Sterilized sand packs were incubated back in the ground and collected at time point T4.", + "has_input": [ + "nmdc:bsm-11-5xjtzc47" + ], + "has_output": [ + "jgi:55d7402c0d8785342fcf7e3e" + ], + "part_of": [ + "nmdc:sty-11-aygzgv51" + ], + "add_date": "2015-05-28", + "mod_date": "2021-06-15", + "ncbi_project_name": "Sand microcosm microbial communities from a hyporheic zone in Columbia River, Washington, USA - GW-RW T4_21-May-14", + "omics_type": { + "has_raw_value": "Metagenome" + }, + "principal_investigator": { + "has_raw_value": "James Stegen" + }, + "processing_institution": "JGI", + "type": "nmdc:OmicsProcessing", + "gold_sequencing_project_identifiers": [ + "gold:Gp0115674" ] - }, + } + ], + "reaction_activity_set": [], + "read_qc_analysis_activity_set": [ { "_id": { - "$oid": "61e71a12833bcf838a701b3a" + "$oid": "649b009d6bdd4fd20273c88a" }, "has_input": [ - "nmdc:e777bc518da4bbe0ab7b2959f00e2b08" + "nmdc:d94c174a22116c2db7ab8c47619e30aa" ], "part_of": [ - "nmdc:mga0w3a067" + "nmdc:mga0cf0450" ], "git_url": "https://github.com/microbiomedata/metaG/releases/tag/0.1", "has_output": [ - "nmdc:13343b2533892633bcc3655a1ebe788f", - "nmdc:87b36326bee32ad5642e3ffc2f5ac7db", - "nmdc:95a2de8be672fd50bf542215194dc4d4", - "nmdc:6cd0210b345d6908ad8ab683b1a11572", - "nmdc:5049a65d2a42d73c5d47373e990b70f7", - "nmdc:6e1e28773094884d35c04072309e285a", - "nmdc:7fa3aba8b1e31ccc00cf56f04f5605ac", - "nmdc:3b3abe337d79d09e9c7ba0a40045ad93", - "nmdc:e8602b20781cdbbd84e6dcb92c048a6b" + "nmdc:538fd5695eb3decd48891e72acebb8ce", + "nmdc:dde2b1748e16380e63476430ee27083a" ], - "was_informed_by": "gold:Gp0115676", - "id": "nmdc:b2025bb4a2c7616273d414e4093f63ca", + "was_informed_by": "gold:Gp0115674", + "input_read_count": 26546332, + "output_read_bases": 3862169938, + "id": "nmdc:954288cfef2de46d4b895fac3811a7d0", "execution_resource": "NERSC-Cori", - "name": "ReadBased Analysis Activity for nmdc:mga0w3a067", - "started_at_time": "2021-10-11T02:26:37Z", - "type": "nmdc:ReadbasedAnalysis", - "ended_at_time": "2021-10-11T05:40:05+00:00", - "output_data_objects": [ - { - "name": "Gp0115676_Gottcha2 TSV report", - "description": "Gottcha2 TSV report for Gp0115676", - "url": "https://data.microbiomedata.org/data/nmdc:mga0w3a067/ReadbasedAnalysis/nmdc_mga0w3a067_gottcha2_report.tsv", - "md5_checksum": "13343b2533892633bcc3655a1ebe788f", - "id": "nmdc:13343b2533892633bcc3655a1ebe788f", - "file_size_bytes": 13659 - }, - { - "name": "Gp0115676_Gottcha2 full TSV report", - "description": "Gottcha2 full TSV report for Gp0115676", - "url": "https://data.microbiomedata.org/data/nmdc:mga0w3a067/ReadbasedAnalysis/nmdc_mga0w3a067_gottcha2_report_full.tsv", - "md5_checksum": "87b36326bee32ad5642e3ffc2f5ac7db", - "id": "nmdc:87b36326bee32ad5642e3ffc2f5ac7db", - "file_size_bytes": 1168924 - }, - { - "name": "Gp0115676_Gottcha2 Krona HTML report", - "description": "Gottcha2 Krona HTML report for Gp0115676", - "data_object_type": "GOTTCHA2 Krona Plot", - "url": "https://data.microbiomedata.org/data/nmdc:mga0w3a067/ReadbasedAnalysis/nmdc_mga0w3a067_gottcha2_krona.html", - "md5_checksum": "95a2de8be672fd50bf542215194dc4d4", - "id": "nmdc:95a2de8be672fd50bf542215194dc4d4", - "file_size_bytes": 267660 - }, - { - "name": "Gp0115676_Centrifuge classification TSV report", - "description": "Centrifuge classification TSV report for Gp0115676", - "data_object_type": "Centrifuge Taxonomic Classification", - "url": "https://data.microbiomedata.org/data/nmdc:mga0w3a067/ReadbasedAnalysis/nmdc_mga0w3a067_centrifuge_classification.tsv", - "md5_checksum": "6cd0210b345d6908ad8ab683b1a11572", - "id": "nmdc:6cd0210b345d6908ad8ab683b1a11572", - "file_size_bytes": 2721808152 - }, - { - "name": "Gp0115676_Centrifuge TSV report", - "description": "Centrifuge TSV report for Gp0115676", - "data_object_type": "Centrifuge Classification Report", - "url": "https://data.microbiomedata.org/data/nmdc:mga0w3a067/ReadbasedAnalysis/nmdc_mga0w3a067_centrifuge_report.tsv", - "md5_checksum": "5049a65d2a42d73c5d47373e990b70f7", - "id": "nmdc:5049a65d2a42d73c5d47373e990b70f7", - "file_size_bytes": 263207 - }, - { - "name": "Gp0115676_Centrifuge Krona HTML report", - "description": "Centrifuge Krona HTML report for Gp0115676", - "data_object_type": "Centrifuge Krona Plot", - "url": "https://data.microbiomedata.org/data/nmdc:mga0w3a067/ReadbasedAnalysis/nmdc_mga0w3a067_centrifuge_krona.html", - "md5_checksum": "6e1e28773094884d35c04072309e285a", - "id": "nmdc:6e1e28773094884d35c04072309e285a", - "file_size_bytes": 2347912 - }, - { - "name": "Gp0115676_Kraken classification TSV report", - "description": "Kraken classification TSV report for Gp0115676", - "data_object_type": "Kraken2 Taxonomic Classification", - "url": "https://data.microbiomedata.org/data/nmdc:mga0w3a067/ReadbasedAnalysis/nmdc_mga0w3a067_kraken2_classification.tsv", - "md5_checksum": "7fa3aba8b1e31ccc00cf56f04f5605ac", - "id": "nmdc:7fa3aba8b1e31ccc00cf56f04f5605ac", - "file_size_bytes": 2224468607 - }, - { - "name": "Gp0115676_Kraken2 TSV report", - "description": "Kraken2 TSV report for Gp0115676", - "data_object_type": "Kraken2 Classification Report", - "url": "https://data.microbiomedata.org/data/nmdc:mga0w3a067/ReadbasedAnalysis/nmdc_mga0w3a067_kraken2_report.tsv", - "md5_checksum": "3b3abe337d79d09e9c7ba0a40045ad93", - "id": "nmdc:3b3abe337d79d09e9c7ba0a40045ad93", - "file_size_bytes": 701128 - }, - { - "name": "Gp0115676_Kraken2 Krona HTML report", - "description": "Kraken2 Krona HTML report for Gp0115676", - "data_object_type": "Kraken2 Krona Plot", - "url": "https://data.microbiomedata.org/data/nmdc:mga0w3a067/ReadbasedAnalysis/nmdc_mga0w3a067_kraken2_krona.html", - "md5_checksum": "e8602b20781cdbbd84e6dcb92c048a6b", - "id": "nmdc:e8602b20781cdbbd84e6dcb92c048a6b", - "file_size_bytes": 4217185 - } - ] - }, + "input_read_bases": 4008496132, + "name": "Read QC Activity for nmdc:mga0cf0450", + "output_read_count": 25776010, + "started_at_time": "2021-10-11T02:28:52Z", + "type": "nmdc:ReadQCAnalysisActivity", + "ended_at_time": "2021-10-11T05:21:41+00:00" + } + ], + "read_based_taxonomy_analysis_activity_set": [ { "_id": { - "$oid": "649b005f2ca5ee4adb139fbe" + "$oid": "649b009bff710ae353f8cf50" }, "has_input": [ - "nmdc:e777bc518da4bbe0ab7b2959f00e2b08" - ], - "part_of": [ - "nmdc:mga0w3a067" + "nmdc:538fd5695eb3decd48891e72acebb8ce" ], - "ctg_logsum": 335229, - "scaf_logsum": 337025, - "gap_pct": 0.00236, "git_url": "https://github.com/microbiomedata/metaG/releases/tag/0.1", "has_output": [ - "nmdc:19987e32391f846db382edabf14ba43e", - "nmdc:1a4c5ace6c1b54e057d282031e8bc2c6", - "nmdc:af7a38646011c9e6d0ad2b1ebd7f47c9", - "nmdc:1b665fb0fbbf40a13122100c927b398b", - "nmdc:7c1232ff8d861d2e2c111a1dc4a70480" + "nmdc:7d6ec08ff0d080997fda7c7417f9c3d4", + "nmdc:df0dfd58dc386f5e0ded0b65b4a88c58", + "nmdc:ce3f31985e0a99f97bd4751bc2469bcb", + "nmdc:f8740b1fadbc29aef50d32706c955199", + "nmdc:80abfcc9b09476af4083b2af1760834f", + "nmdc:f189624af50d8d62908f8ddd5f3451ad", + "nmdc:09302fbc8e30758a95fac09ee5cfd449", + "nmdc:e44f717fc6f3458c17b4f5129a5e7920", + "nmdc:19eb52a96c1dedc9036ec9a0aaeda079" ], - "asm_score": 10.939, - "was_informed_by": "gold:Gp0115676", - "ctg_powsum": 40696, - "scaf_max": 163197, - "id": "nmdc:b2025bb4a2c7616273d414e4093f63ca", - "scaf_powsum": 40973, + "was_informed_by": "gold:Gp0115674", + "id": "nmdc:954288cfef2de46d4b895fac3811a7d0", "execution_resource": "NERSC-Cori", - "contigs": 187125, - "name": "Assembly Activity for nmdc:mga0w3a067", - "ctg_max": 163197, - "gc_std": 0.10616, - "contig_bp": 97611209, - "gc_avg": 0.5929, - "started_at_time": "2021-10-11T02:26:37Z", - "scaf_bp": 97613509, - "type": "nmdc:MetagenomeAssembly", - "scaffolds": 186895, - "ended_at_time": "2021-10-11T05:40:05+00:00", - "ctg_l50": 499, - "ctg_l90": 288, - "ctg_n50": 42676, - "ctg_n90": 155670, - "scaf_l50": 499, - "scaf_l90": 288, - "scaf_n50": 42593, - "scaf_n90": 155449, - "scaf_l_gt50k": 743033, - "scaf_n_gt50k": 11, - "scaf_pct_gt50k": 0.7611989, - "output_data_objects": [ - { - "name": "Gp0115676_Assembled contigs fasta", - "description": "Assembled contigs fasta for Gp0115676", - "data_object_type": "Assembly Contigs", - "url": "https://data.microbiomedata.org/data/nmdc:mga0w3a067/assembly/nmdc_mga0w3a067_contigs.fna", - "md5_checksum": "19987e32391f846db382edabf14ba43e", - "id": "nmdc:19987e32391f846db382edabf14ba43e", - "file_size_bytes": 105010680 - }, - { - "name": "Gp0115676_Assembled scaffold fasta", - "description": "Assembled scaffold fasta for Gp0115676", - "data_object_type": "Assembly Scaffolds", - "url": "https://data.microbiomedata.org/data/nmdc:mga0w3a067/assembly/nmdc_mga0w3a067_scaffolds.fna", - "md5_checksum": "1a4c5ace6c1b54e057d282031e8bc2c6", - "id": "nmdc:1a4c5ace6c1b54e057d282031e8bc2c6", - "file_size_bytes": 104445982 - }, - { - "name": "Gp0115676_Metagenome Contig Coverage Stats", - "description": "Metagenome Contig Coverage Stats for Gp0115676", - "url": "https://data.microbiomedata.org/data/nmdc:mga0w3a067/assembly/nmdc_mga0w3a067_covstats.txt", - "md5_checksum": "af7a38646011c9e6d0ad2b1ebd7f47c9", - "id": "nmdc:af7a38646011c9e6d0ad2b1ebd7f47c9", - "file_size_bytes": 14811778 - }, - { - "name": "Gp0115676_Assembled AGP file", - "description": "Assembled AGP file for Gp0115676", - "data_object_type": "Assembly AGP", - "url": "https://data.microbiomedata.org/data/nmdc:mga0w3a067/assembly/nmdc_mga0w3a067_assembly.agp", - "md5_checksum": "1b665fb0fbbf40a13122100c927b398b", - "id": "nmdc:1b665fb0fbbf40a13122100c927b398b", - "file_size_bytes": 13854137 - }, - { - "name": "Gp0115676_Metagenome Alignment BAM file", - "description": "Metagenome Alignment BAM file for Gp0115676", - "data_object_type": "Assembly Coverage BAM", - "url": "https://data.microbiomedata.org/data/nmdc:mga0w3a067/assembly/nmdc_mga0w3a067_pairedMapped_sorted.bam", - "md5_checksum": "7c1232ff8d861d2e2c111a1dc4a70480", - "id": "nmdc:7c1232ff8d861d2e2c111a1dc4a70480", - "file_size_bytes": 3366223347 - } - ] - }, + "name": "ReadBased Analysis Activity for nmdc:mga0cf0450", + "started_at_time": "2021-10-11T02:28:52Z", + "type": "nmdc:ReadbasedAnalysis", + "ended_at_time": "2021-10-11T05:21:41+00:00" + } + ], + "study_set": [], + "field_research_site_set": [], + "collecting_biosamples_from_site_set": [], + "date_created": null, + "etl_software_version": null, + "pooling_set": [], + "read_based_analysis_activity_set": [ { "_id": { - "$oid": "649b005bbf2caae0415ef9cb" + "$oid": "61e71a31833bcf838a701e57" }, "has_input": [ - "nmdc:19987e32391f846db382edabf14ba43e" + "nmdc:538fd5695eb3decd48891e72acebb8ce" ], "part_of": [ - "nmdc:mga0w3a067" + "nmdc:mga0cf0450" ], "git_url": "https://github.com/microbiomedata/metaG/releases/tag/0.1", "has_output": [ - "nmdc:35adf26b13c97c40147af2f067e0c9be", - "nmdc:3de29d8dede94769e7753f0aaee86691", - "nmdc:6fa3d1e5fae636b4199ff57b4776a51c", - "nmdc:b865dcd9976c90dbc8459ec7ccc72d45", - "nmdc:98b9ea6588dc9ff918298c4a7c567edf", - "nmdc:d8fbe8d24c00eee2ef163e3bb428b718", - "nmdc:ed68f1e7fd4873f1ea756d0c58a9c550", - "nmdc:4d0469ae5b27dd4045d637d2493ccba9", - "nmdc:a893783f6886e31b6bca5b6baede9f66", - "nmdc:2225c723ccf0fd5ea309cfb5ca90d536", - "nmdc:1abd69f8096f98174d95d9a3a13c2a3b", - "nmdc:83647c3e1ed96fda36f7c119a3e98182" + "nmdc:7d6ec08ff0d080997fda7c7417f9c3d4", + "nmdc:df0dfd58dc386f5e0ded0b65b4a88c58", + "nmdc:ce3f31985e0a99f97bd4751bc2469bcb", + "nmdc:f8740b1fadbc29aef50d32706c955199", + "nmdc:80abfcc9b09476af4083b2af1760834f", + "nmdc:f189624af50d8d62908f8ddd5f3451ad", + "nmdc:09302fbc8e30758a95fac09ee5cfd449", + "nmdc:e44f717fc6f3458c17b4f5129a5e7920", + "nmdc:19eb52a96c1dedc9036ec9a0aaeda079" ], - "was_informed_by": "gold:Gp0115676", - "id": "nmdc:b2025bb4a2c7616273d414e4093f63ca", + "was_informed_by": "gold:Gp0115674", + "id": "nmdc:954288cfef2de46d4b895fac3811a7d0", "execution_resource": "NERSC-Cori", - "name": "Annotation Activity for nmdc:mga0w3a067", - "started_at_time": "2021-10-11T02:26:37Z", - "type": "nmdc:MetagenomeAnnotationActivity", - "ended_at_time": "2021-10-11T05:40:05+00:00", - "output_data_objects": [ - { - "name": "Gp0115676_Protein FAA", - "description": "Protein FAA for Gp0115676", - "data_object_type": "Annotation Amino Acid FASTA", - "url": "https://data.microbiomedata.org/data/nmdc:mga0w3a067/annotation/nmdc_mga0w3a067_proteins.faa", - "md5_checksum": "35adf26b13c97c40147af2f067e0c9be", - "id": "nmdc:35adf26b13c97c40147af2f067e0c9be", - "file_size_bytes": 59120149 - }, - { - "name": "Gp0115676_Structural annotation GFF file", - "description": "Structural annotation GFF file for Gp0115676", - "data_object_type": "Structural Annotation GFF", - "url": "https://data.microbiomedata.org/data/nmdc:mga0w3a067/annotation/nmdc_mga0w3a067_structural_annotation.gff", - "md5_checksum": "3de29d8dede94769e7753f0aaee86691", - "id": "nmdc:3de29d8dede94769e7753f0aaee86691", - "file_size_bytes": 2524 - }, - { - "name": "Gp0115676_Functional annotation GFF file", - "description": "Functional annotation GFF file for Gp0115676", - "data_object_type": "Functional Annotation GFF", - "url": "https://data.microbiomedata.org/data/nmdc:mga0w3a067/annotation/nmdc_mga0w3a067_functional_annotation.gff", - "md5_checksum": "6fa3d1e5fae636b4199ff57b4776a51c", - "id": "nmdc:6fa3d1e5fae636b4199ff57b4776a51c", - "file_size_bytes": 65284624 - }, - { - "name": "Gp0115676_KO TSV file", - "description": "KO TSV file for Gp0115676", - "data_object_type": "Annotation KEGG Orthology", - "url": "https://data.microbiomedata.org/data/nmdc:mga0w3a067/annotation/nmdc_mga0w3a067_ko.tsv", - "md5_checksum": "b865dcd9976c90dbc8459ec7ccc72d45", - "id": "nmdc:b865dcd9976c90dbc8459ec7ccc72d45", - "file_size_bytes": 9219020 - }, - { - "name": "Gp0115676_EC TSV file", - "description": "EC TSV file for Gp0115676", - "data_object_type": "Annotation Enzyme Commission", - "url": "https://data.microbiomedata.org/data/nmdc:mga0w3a067/annotation/nmdc_mga0w3a067_ec.tsv", - "md5_checksum": "98b9ea6588dc9ff918298c4a7c567edf", - "id": "nmdc:98b9ea6588dc9ff918298c4a7c567edf", - "file_size_bytes": 5972063 - }, - { - "name": "Gp0115676_COG GFF file", - "description": "COG GFF file for Gp0115676", - "url": "https://data.microbiomedata.org/data/nmdc:mga0w3a067/annotation/nmdc_mga0w3a067_cog.gff", - "md5_checksum": "d8fbe8d24c00eee2ef163e3bb428b718", - "id": "nmdc:d8fbe8d24c00eee2ef163e3bb428b718", - "file_size_bytes": 39290017 - }, - { - "name": "Gp0115676_PFAM GFF file", - "description": "PFAM GFF file for Gp0115676", - "url": "https://data.microbiomedata.org/data/nmdc:mga0w3a067/annotation/nmdc_mga0w3a067_pfam.gff", - "md5_checksum": "ed68f1e7fd4873f1ea756d0c58a9c550", - "id": "nmdc:ed68f1e7fd4873f1ea756d0c58a9c550", - "file_size_bytes": 31343624 - }, - { - "name": "Gp0115676_TigrFam GFF file", - "description": "TigrFam GFF file for Gp0115676", - "url": "https://data.microbiomedata.org/data/nmdc:mga0w3a067/annotation/nmdc_mga0w3a067_tigrfam.gff", - "md5_checksum": "4d0469ae5b27dd4045d637d2493ccba9", - "id": "nmdc:4d0469ae5b27dd4045d637d2493ccba9", - "file_size_bytes": 4260344 - }, - { - "name": "Gp0115676_SMART GFF file", - "description": "SMART GFF file for Gp0115676", - "url": "https://data.microbiomedata.org/data/nmdc:mga0w3a067/annotation/nmdc_mga0w3a067_smart.gff", - "md5_checksum": "a893783f6886e31b6bca5b6baede9f66", - "id": "nmdc:a893783f6886e31b6bca5b6baede9f66", - "file_size_bytes": 8240017 - }, - { - "name": "Gp0115676_SuperFam GFF file", - "description": "SuperFam GFF file for Gp0115676", - "url": "https://data.microbiomedata.org/data/nmdc:mga0w3a067/annotation/nmdc_mga0w3a067_supfam.gff", - "md5_checksum": "2225c723ccf0fd5ea309cfb5ca90d536", - "id": "nmdc:2225c723ccf0fd5ea309cfb5ca90d536", - "file_size_bytes": 48186264 - }, - { - "name": "Gp0115676_Cath FunFam GFF file", - "description": "Cath FunFam GFF file for Gp0115676", - "url": "https://data.microbiomedata.org/data/nmdc:mga0w3a067/annotation/nmdc_mga0w3a067_cath_funfam.gff", - "md5_checksum": "1abd69f8096f98174d95d9a3a13c2a3b", - "id": "nmdc:1abd69f8096f98174d95d9a3a13c2a3b", - "file_size_bytes": 38259823 - }, - { - "name": "Gp0115676_KO_EC GFF file", - "description": "KO_EC GFF file for Gp0115676", - "url": "https://data.microbiomedata.org/data/nmdc:mga0w3a067/annotation/nmdc_mga0w3a067_ko_ec.gff", - "md5_checksum": "83647c3e1ed96fda36f7c119a3e98182", - "id": "nmdc:83647c3e1ed96fda36f7c119a3e98182", - "file_size_bytes": 29337291 - } - ] + "name": "ReadBased Analysis Activity for nmdc:mga0cf0450", + "started_at_time": "2021-10-11T02:28:52Z", + "type": "nmdc:ReadbasedAnalysis", + "ended_at_time": "2021-10-11T05:21:41+00:00" + } + ] + }, + { + "functional_annotation_agg": [], + "library_preparation_set": [], + "processed_sample_set": [], + "extraction_set": [], + "activity_set": [], + "biosample_set": [], + "data_object_set": [ + { + "name": "Gp0115673_Filtered Reads", + "description": "Filtered Reads for Gp0115673", + "data_object_type": "Filtered Sequencing Reads", + "url": "https://data.microbiomedata.org/data/nmdc:mga0kpja70/qa/nmdc_mga0kpja70_filtered.fastq.gz", + "md5_checksum": "268918f610926421d2af43f175553680", + "id": "nmdc:268918f610926421d2af43f175553680", + "file_size_bytes": 1492820163 + }, + { + "name": "Gp0115673_Filtered Stats", + "description": "Filtered Stats for Gp0115673", + "data_object_type": "QC Statistics", + "url": "https://data.microbiomedata.org/data/nmdc:mga0kpja70/qa/nmdc_mga0kpja70_filterStats.txt", + "md5_checksum": "4610980cf3558f5a9830797ead97362a", + "id": "nmdc:4610980cf3558f5a9830797ead97362a", + "file_size_bytes": 287 + }, + { + "name": "Gp0115673_Gottcha2 TSV report", + "description": "Gottcha2 TSV report for Gp0115673", + "url": "https://data.microbiomedata.org/data/nmdc:mga0kpja70/ReadbasedAnalysis/nmdc_mga0kpja70_gottcha2_report.tsv", + "md5_checksum": "c7b24571b61a33018cf118b5424b787f", + "id": "nmdc:c7b24571b61a33018cf118b5424b787f", + "file_size_bytes": 9782 + }, + { + "name": "Gp0115673_Gottcha2 full TSV report", + "description": "Gottcha2 full TSV report for Gp0115673", + "url": "https://data.microbiomedata.org/data/nmdc:mga0kpja70/ReadbasedAnalysis/nmdc_mga0kpja70_gottcha2_report_full.tsv", + "md5_checksum": "e185734176505343bf4c83c16a0a9fe2", + "id": "nmdc:e185734176505343bf4c83c16a0a9fe2", + "file_size_bytes": 856112 + }, + { + "name": "Gp0115673_Gottcha2 Krona HTML report", + "description": "Gottcha2 Krona HTML report for Gp0115673", + "data_object_type": "GOTTCHA2 Krona Plot", + "url": "https://data.microbiomedata.org/data/nmdc:mga0kpja70/ReadbasedAnalysis/nmdc_mga0kpja70_gottcha2_krona.html", + "md5_checksum": "7c6b0ef44450c747580826a2e218844b", + "id": "nmdc:7c6b0ef44450c747580826a2e218844b", + "file_size_bytes": 255142 + }, + { + "name": "Gp0115673_Centrifuge classification TSV report", + "description": "Centrifuge classification TSV report for Gp0115673", + "data_object_type": "Centrifuge Taxonomic Classification", + "url": "https://data.microbiomedata.org/data/nmdc:mga0kpja70/ReadbasedAnalysis/nmdc_mga0kpja70_centrifuge_classification.tsv", + "md5_checksum": "5b98c377f424d7609f1a09e350cfb837", + "id": "nmdc:5b98c377f424d7609f1a09e350cfb837", + "file_size_bytes": 1218364738 + }, + { + "name": "Gp0115673_Centrifuge TSV report", + "description": "Centrifuge TSV report for Gp0115673", + "data_object_type": "Centrifuge Classification Report", + "url": "https://data.microbiomedata.org/data/nmdc:mga0kpja70/ReadbasedAnalysis/nmdc_mga0kpja70_centrifuge_report.tsv", + "md5_checksum": "b5f7a68a94b356001014d1be024231af", + "id": "nmdc:b5f7a68a94b356001014d1be024231af", + "file_size_bytes": 254923 + }, + { + "name": "Gp0115673_Centrifuge Krona HTML report", + "description": "Centrifuge Krona HTML report for Gp0115673", + "data_object_type": "Centrifuge Krona Plot", + "url": "https://data.microbiomedata.org/data/nmdc:mga0kpja70/ReadbasedAnalysis/nmdc_mga0kpja70_centrifuge_krona.html", + "md5_checksum": "75bca66cfcdd38331c10edbba03fa0d3", + "id": "nmdc:75bca66cfcdd38331c10edbba03fa0d3", + "file_size_bytes": 2323219 + }, + { + "name": "Gp0115673_Kraken classification TSV report", + "description": "Kraken classification TSV report for Gp0115673", + "data_object_type": "Kraken2 Taxonomic Classification", + "url": "https://data.microbiomedata.org/data/nmdc:mga0kpja70/ReadbasedAnalysis/nmdc_mga0kpja70_kraken2_classification.tsv", + "md5_checksum": "35bf579641b2ffb3614098d9811a4968", + "id": "nmdc:35bf579641b2ffb3614098d9811a4968", + "file_size_bytes": 1001134031 + }, + { + "name": "Gp0115673_Kraken2 TSV report", + "description": "Kraken2 TSV report for Gp0115673", + "data_object_type": "Kraken2 Classification Report", + "url": "https://data.microbiomedata.org/data/nmdc:mga0kpja70/ReadbasedAnalysis/nmdc_mga0kpja70_kraken2_report.tsv", + "md5_checksum": "801b79f5442e5bfaa0d15f76786cfbc0", + "id": "nmdc:801b79f5442e5bfaa0d15f76786cfbc0", + "file_size_bytes": 640671 + }, + { + "name": "Gp0115673_Kraken2 Krona HTML report", + "description": "Kraken2 Krona HTML report for Gp0115673", + "data_object_type": "Kraken2 Krona Plot", + "url": "https://data.microbiomedata.org/data/nmdc:mga0kpja70/ReadbasedAnalysis/nmdc_mga0kpja70_kraken2_krona.html", + "md5_checksum": "a7030fa8e9622e3396c2b96448e90c3b", + "id": "nmdc:a7030fa8e9622e3396c2b96448e90c3b", + "file_size_bytes": 3995499 + }, + { + "name": "Gp0115673_Gottcha2 TSV report", + "description": "Gottcha2 TSV report for Gp0115673", + "url": "https://data.microbiomedata.org/data/nmdc:mga0kpja70/ReadbasedAnalysis/nmdc_mga0kpja70_gottcha2_report.tsv", + "md5_checksum": "c7b24571b61a33018cf118b5424b787f", + "id": "nmdc:c7b24571b61a33018cf118b5424b787f", + "file_size_bytes": 9782 + }, + { + "name": "Gp0115673_Gottcha2 full TSV report", + "description": "Gottcha2 full TSV report for Gp0115673", + "url": "https://data.microbiomedata.org/data/nmdc:mga0kpja70/ReadbasedAnalysis/nmdc_mga0kpja70_gottcha2_report_full.tsv", + "md5_checksum": "e185734176505343bf4c83c16a0a9fe2", + "id": "nmdc:e185734176505343bf4c83c16a0a9fe2", + "file_size_bytes": 856112 + }, + { + "name": "Gp0115673_Gottcha2 Krona HTML report", + "description": "Gottcha2 Krona HTML report for Gp0115673", + "data_object_type": "GOTTCHA2 Krona Plot", + "url": "https://data.microbiomedata.org/data/nmdc:mga0kpja70/ReadbasedAnalysis/nmdc_mga0kpja70_gottcha2_krona.html", + "md5_checksum": "7c6b0ef44450c747580826a2e218844b", + "id": "nmdc:7c6b0ef44450c747580826a2e218844b", + "file_size_bytes": 255142 + }, + { + "name": "Gp0115673_Centrifuge classification TSV report", + "description": "Centrifuge classification TSV report for Gp0115673", + "data_object_type": "Centrifuge Taxonomic Classification", + "url": "https://data.microbiomedata.org/data/nmdc:mga0kpja70/ReadbasedAnalysis/nmdc_mga0kpja70_centrifuge_classification.tsv", + "md5_checksum": "5b98c377f424d7609f1a09e350cfb837", + "id": "nmdc:5b98c377f424d7609f1a09e350cfb837", + "file_size_bytes": 1218364738 + }, + { + "name": "Gp0115673_Centrifuge TSV report", + "description": "Centrifuge TSV report for Gp0115673", + "data_object_type": "Centrifuge Classification Report", + "url": "https://data.microbiomedata.org/data/nmdc:mga0kpja70/ReadbasedAnalysis/nmdc_mga0kpja70_centrifuge_report.tsv", + "md5_checksum": "b5f7a68a94b356001014d1be024231af", + "id": "nmdc:b5f7a68a94b356001014d1be024231af", + "file_size_bytes": 254923 + }, + { + "name": "Gp0115673_Centrifuge Krona HTML report", + "description": "Centrifuge Krona HTML report for Gp0115673", + "data_object_type": "Centrifuge Krona Plot", + "url": "https://data.microbiomedata.org/data/nmdc:mga0kpja70/ReadbasedAnalysis/nmdc_mga0kpja70_centrifuge_krona.html", + "md5_checksum": "75bca66cfcdd38331c10edbba03fa0d3", + "id": "nmdc:75bca66cfcdd38331c10edbba03fa0d3", + "file_size_bytes": 2323219 + }, + { + "name": "Gp0115673_Kraken classification TSV report", + "description": "Kraken classification TSV report for Gp0115673", + "data_object_type": "Kraken2 Taxonomic Classification", + "url": "https://data.microbiomedata.org/data/nmdc:mga0kpja70/ReadbasedAnalysis/nmdc_mga0kpja70_kraken2_classification.tsv", + "md5_checksum": "35bf579641b2ffb3614098d9811a4968", + "id": "nmdc:35bf579641b2ffb3614098d9811a4968", + "file_size_bytes": 1001134031 + }, + { + "name": "Gp0115673_Kraken2 TSV report", + "description": "Kraken2 TSV report for Gp0115673", + "data_object_type": "Kraken2 Classification Report", + "url": "https://data.microbiomedata.org/data/nmdc:mga0kpja70/ReadbasedAnalysis/nmdc_mga0kpja70_kraken2_report.tsv", + "md5_checksum": "801b79f5442e5bfaa0d15f76786cfbc0", + "id": "nmdc:801b79f5442e5bfaa0d15f76786cfbc0", + "file_size_bytes": 640671 + }, + { + "name": "Gp0115673_Kraken2 Krona HTML report", + "description": "Kraken2 Krona HTML report for Gp0115673", + "data_object_type": "Kraken2 Krona Plot", + "url": "https://data.microbiomedata.org/data/nmdc:mga0kpja70/ReadbasedAnalysis/nmdc_mga0kpja70_kraken2_krona.html", + "md5_checksum": "a7030fa8e9622e3396c2b96448e90c3b", + "id": "nmdc:a7030fa8e9622e3396c2b96448e90c3b", + "file_size_bytes": 3995499 + }, + { + "name": "Gp0115673_Assembled contigs fasta", + "description": "Assembled contigs fasta for Gp0115673", + "data_object_type": "Assembly Contigs", + "url": "https://data.microbiomedata.org/data/nmdc:mga0kpja70/assembly/nmdc_mga0kpja70_contigs.fna", + "md5_checksum": "06d4964c0822abd6f94ca883c122f7ce", + "id": "nmdc:06d4964c0822abd6f94ca883c122f7ce", + "file_size_bytes": 49610158 + }, + { + "name": "Gp0115673_Assembled scaffold fasta", + "description": "Assembled scaffold fasta for Gp0115673", + "data_object_type": "Assembly Scaffolds", + "url": "https://data.microbiomedata.org/data/nmdc:mga0kpja70/assembly/nmdc_mga0kpja70_scaffolds.fna", + "md5_checksum": "bad916c69afe839097650b0b9526a841", + "id": "nmdc:bad916c69afe839097650b0b9526a841", + "file_size_bytes": 49338957 + }, + { + "name": "Gp0115673_Metagenome Contig Coverage Stats", + "description": "Metagenome Contig Coverage Stats for Gp0115673", + "url": "https://data.microbiomedata.org/data/nmdc:mga0kpja70/assembly/nmdc_mga0kpja70_covstats.txt", + "md5_checksum": "a187658f262fa495de43707aabcbf480", + "id": "nmdc:a187658f262fa495de43707aabcbf480", + "file_size_bytes": 7048516 + }, + { + "name": "Gp0115673_Assembled AGP file", + "description": "Assembled AGP file for Gp0115673", + "data_object_type": "Assembly AGP", + "url": "https://data.microbiomedata.org/data/nmdc:mga0kpja70/assembly/nmdc_mga0kpja70_assembly.agp", + "md5_checksum": "c525c04f90889be615025c667908370c", + "id": "nmdc:c525c04f90889be615025c667908370c", + "file_size_bytes": 6557406 + }, + { + "name": "Gp0115673_Metagenome Alignment BAM file", + "description": "Metagenome Alignment BAM file for Gp0115673", + "data_object_type": "Assembly Coverage BAM", + "url": "https://data.microbiomedata.org/data/nmdc:mga0kpja70/assembly/nmdc_mga0kpja70_pairedMapped_sorted.bam", + "md5_checksum": "2e293158750df042be7422826125bef2", + "id": "nmdc:2e293158750df042be7422826125bef2", + "file_size_bytes": 1601507411 + }, + { + "name": "Gp0115673_Protein FAA", + "description": "Protein FAA for Gp0115673", + "data_object_type": "Annotation Amino Acid FASTA", + "url": "https://data.microbiomedata.org/data/nmdc:mga0kpja70/annotation/nmdc_mga0kpja70_proteins.faa", + "md5_checksum": "be3b8decbc48f9588daca36ca4c883ab", + "id": "nmdc:be3b8decbc48f9588daca36ca4c883ab", + "file_size_bytes": 27487621 + }, + { + "name": "Gp0115673_Structural annotation GFF file", + "description": "Structural annotation GFF file for Gp0115673", + "data_object_type": "Structural Annotation GFF", + "url": "https://data.microbiomedata.org/data/nmdc:mga0kpja70/annotation/nmdc_mga0kpja70_structural_annotation.gff", + "md5_checksum": "106c834bb14367ec6154d1b04f2a1021", + "id": "nmdc:106c834bb14367ec6154d1b04f2a1021", + "file_size_bytes": 2505 + }, + { + "name": "Gp0115673_Functional annotation GFF file", + "description": "Functional annotation GFF file for Gp0115673", + "data_object_type": "Functional Annotation GFF", + "url": "https://data.microbiomedata.org/data/nmdc:mga0kpja70/annotation/nmdc_mga0kpja70_functional_annotation.gff", + "md5_checksum": "dfe3eed1eee6d6764ae22a2c6b0209e5", + "id": "nmdc:dfe3eed1eee6d6764ae22a2c6b0209e5", + "file_size_bytes": 30665845 + }, + { + "name": "Gp0115673_KO TSV file", + "description": "KO TSV file for Gp0115673", + "data_object_type": "Annotation KEGG Orthology", + "url": "https://data.microbiomedata.org/data/nmdc:mga0kpja70/annotation/nmdc_mga0kpja70_ko.tsv", + "md5_checksum": "84e3913c75d155fc45f04bc04810063a", + "id": "nmdc:84e3913c75d155fc45f04bc04810063a", + "file_size_bytes": 4142989 + }, + { + "name": "Gp0115673_EC TSV file", + "description": "EC TSV file for Gp0115673", + "data_object_type": "Annotation Enzyme Commission", + "url": "https://data.microbiomedata.org/data/nmdc:mga0kpja70/annotation/nmdc_mga0kpja70_ec.tsv", + "md5_checksum": "418e74fcbe4b97b8d74cb697a3b3feb4", + "id": "nmdc:418e74fcbe4b97b8d74cb697a3b3feb4", + "file_size_bytes": 2665975 + }, + { + "name": "Gp0115673_COG GFF file", + "description": "COG GFF file for Gp0115673", + "url": "https://data.microbiomedata.org/data/nmdc:mga0kpja70/annotation/nmdc_mga0kpja70_cog.gff", + "md5_checksum": "2d57dd06178c83c1f9c4bfaecf34b8b4", + "id": "nmdc:2d57dd06178c83c1f9c4bfaecf34b8b4", + "file_size_bytes": 17716812 + }, + { + "name": "Gp0115673_PFAM GFF file", + "description": "PFAM GFF file for Gp0115673", + "url": "https://data.microbiomedata.org/data/nmdc:mga0kpja70/annotation/nmdc_mga0kpja70_pfam.gff", + "md5_checksum": "42173701162f4fdb727bc4eded48c2a1", + "id": "nmdc:42173701162f4fdb727bc4eded48c2a1", + "file_size_bytes": 14043787 + }, + { + "name": "Gp0115673_TigrFam GFF file", + "description": "TigrFam GFF file for Gp0115673", + "url": "https://data.microbiomedata.org/data/nmdc:mga0kpja70/annotation/nmdc_mga0kpja70_tigrfam.gff", + "md5_checksum": "89b8851da4dca184654a76128048e09a", + "id": "nmdc:89b8851da4dca184654a76128048e09a", + "file_size_bytes": 2009579 + }, + { + "name": "Gp0115673_SMART GFF file", + "description": "SMART GFF file for Gp0115673", + "url": "https://data.microbiomedata.org/data/nmdc:mga0kpja70/annotation/nmdc_mga0kpja70_smart.gff", + "md5_checksum": "e0d0721c6051fb0eebd70635882639c1", + "id": "nmdc:e0d0721c6051fb0eebd70635882639c1", + "file_size_bytes": 3834400 + }, + { + "name": "Gp0115673_SuperFam GFF file", + "description": "SuperFam GFF file for Gp0115673", + "url": "https://data.microbiomedata.org/data/nmdc:mga0kpja70/annotation/nmdc_mga0kpja70_supfam.gff", + "md5_checksum": "e9b0a3709e78dd9dfdba4eff7103c425", + "id": "nmdc:e9b0a3709e78dd9dfdba4eff7103c425", + "file_size_bytes": 22131290 + }, + { + "name": "Gp0115673_Cath FunFam GFF file", + "description": "Cath FunFam GFF file for Gp0115673", + "url": "https://data.microbiomedata.org/data/nmdc:mga0kpja70/annotation/nmdc_mga0kpja70_cath_funfam.gff", + "md5_checksum": "e627abd2dfaee1fbf695de11211c6971", + "id": "nmdc:e627abd2dfaee1fbf695de11211c6971", + "file_size_bytes": 17702997 + }, + { + "name": "Gp0115673_KO_EC GFF file", + "description": "KO_EC GFF file for Gp0115673", + "url": "https://data.microbiomedata.org/data/nmdc:mga0kpja70/annotation/nmdc_mga0kpja70_ko_ec.gff", + "md5_checksum": "a04e32711e814e733114531a666606c6", + "id": "nmdc:a04e32711e814e733114531a666606c6", + "file_size_bytes": 13225993 + }, + { + "name": "gold:Gp0452679_metabat2 bin checkm quality assessment result", + "description": "metabat2 bin checkm quality assessment result for gold:Gp0452679", + "data_object_type": "CheckM Statistics", + "url": "https://data.microbiomedata.org/data/nmdc:mga0fsjy84/MAGs/nmdc_mga0fsjy84_checkm_qa.out", + "md5_checksum": "d41d8cd98f00b204e9800998ecf8427e", + "id": "nmdc:d41d8cd98f00b204e9800998ecf8427e", + "file_size_bytes": 0 + }, + { + "name": "Gp0115673_tooShort (< 3kb) filtered contigs fasta file by metaBat2", + "description": "tooShort (< 3kb) filtered contigs fasta file by metaBat2 for Gp0115673", + "url": "https://data.microbiomedata.org/data/nmdc:mga0kpja70/MAGs/nmdc_mga0kpja70_bins.tooShort.fa", + "md5_checksum": "c907101a9eb50d1e522d1fc11b4d3164", + "id": "nmdc:c907101a9eb50d1e522d1fc11b4d3164", + "file_size_bytes": 35344893 + }, + { + "name": "Gp0115673_unbinned fasta file from metabat2", + "description": "unbinned fasta file from metabat2 for Gp0115673", + "url": "https://data.microbiomedata.org/data/nmdc:mga0kpja70/MAGs/nmdc_mga0kpja70_bins.unbinned.fa", + "md5_checksum": "f80fbdbf31ee0ac76353d59e64b789bc", + "id": "nmdc:f80fbdbf31ee0ac76353d59e64b789bc", + "file_size_bytes": 8810307 + }, + { + "name": "Gp0115673_metabat2 bin checkm quality assessment result", + "description": "metabat2 bin checkm quality assessment result for Gp0115673", + "data_object_type": "CheckM Statistics", + "url": "https://data.microbiomedata.org/data/nmdc:mga0kpja70/MAGs/nmdc_mga0kpja70_checkm_qa.out", + "md5_checksum": "af15089c0cb19ec9bd65f98e59dc94f1", + "id": "nmdc:af15089c0cb19ec9bd65f98e59dc94f1", + "file_size_bytes": 942 + }, + { + "name": "Gp0115673_high-quality and medium-quality bins", + "description": "high-quality and medium-quality bins for Gp0115673", + "data_object_type": "Metagenome Bins", + "url": "https://data.microbiomedata.org/data/nmdc:mga0kpja70/MAGs/nmdc_mga0kpja70_hqmq_bin.zip", + "md5_checksum": "70d3f2afd9f32a2bdaa81a6fc547f6fb", + "id": "nmdc:70d3f2afd9f32a2bdaa81a6fc547f6fb", + "file_size_bytes": 182 }, + { + "name": "Gp0115673_metabat2 bins", + "description": "metabat2 bins for Gp0115673", + "url": "https://data.microbiomedata.org/data/nmdc:mga0kpja70/MAGs/nmdc_mga0kpja70_metabat_bin.zip", + "md5_checksum": "f40d84a4fc0c87d76c144777f9e8a8ea", + "id": "nmdc:f40d84a4fc0c87d76c144777f9e8a8ea", + "file_size_bytes": 1658458 + } + ], + "dissolving_activity_set": [], + "functional_annotation_set": [], + "genome_feature_set": [], + "mags_activity_set": [ { "_id": { - "$oid": "649b0052ec087f6bbab3472c" + "$oid": "649b0052ec087f6bbab3471f" }, "has_input": [ - "nmdc:19987e32391f846db382edabf14ba43e", - "nmdc:7c1232ff8d861d2e2c111a1dc4a70480", - "nmdc:6fa3d1e5fae636b4199ff57b4776a51c" + "nmdc:06d4964c0822abd6f94ca883c122f7ce", + "nmdc:2e293158750df042be7422826125bef2", + "nmdc:dfe3eed1eee6d6764ae22a2c6b0209e5" ], - "too_short_contig_num": 175121, + "too_short_contig_num": 83787, "part_of": [ - "nmdc:mga0w3a067" + "nmdc:mga0kpja70" ], - "binned_contig_num": 1550, + "binned_contig_num": 890, "git_url": "https://github.com/microbiomedata/metaG/releases/tag/0.1", "has_output": [ "nmdc:d41d8cd98f00b204e9800998ecf8427e", - "nmdc:71667f3b8ee0cb5acadc541fa6914022", - "nmdc:0141a64077e0f18adc42cb1915a00fa2", - "nmdc:982b47616dde63a388400fcc57d7c5b0", - "nmdc:313eb61bc7577e272eca6332e923f9c4", - "nmdc:763eb40a8905e9b0d459c45222f1b05e" + "nmdc:c907101a9eb50d1e522d1fc11b4d3164", + "nmdc:f80fbdbf31ee0ac76353d59e64b789bc", + "nmdc:af15089c0cb19ec9bd65f98e59dc94f1", + "nmdc:70d3f2afd9f32a2bdaa81a6fc547f6fb", + "nmdc:f40d84a4fc0c87d76c144777f9e8a8ea" ], - "was_informed_by": "gold:Gp0115676", - "input_contig_num": 187123, - "id": "nmdc:b2025bb4a2c7616273d414e4093f63ca", + "was_informed_by": "gold:Gp0115673", + "input_contig_num": 89806, + "id": "nmdc:7ae51c3485db8a27225f08083565b28e", "execution_resource": "NERSC-Cori", - "name": "MAGs Analysis Activity for nmdc:mga0w3a067", + "name": "MAGs Analysis Activity for nmdc:mga0kpja70", "mags_list": [ { - "number_of_contig": 457, - "completeness": 95.14, + "number_of_contig": 67, + "completeness": 12.5, "bin_name": "bins.1", - "gene_count": 6260, - "bin_quality": "LQ", - "gtdbtk_species": "", - "gtdbtk_order": "", - "num_16s": 2, - "gtdbtk_family": "", - "gtdbtk_domain": "", - "contamination": 76.52, - "gtdbtk_class": "", - "gtdbtk_phylum": "", - "num_5s": 1, - "num_23s": 1, - "gtdbtk_genus": "", - "num_t_rna": 85 - }, - { - "number_of_contig": 24, - "completeness": 4.17, - "bin_name": "bins.2", - "gene_count": 246, - "bin_quality": "LQ", - "gtdbtk_species": "", - "gtdbtk_order": "", - "num_16s": 0, - "gtdbtk_family": "", - "gtdbtk_domain": "", - "contamination": 0.0, - "gtdbtk_class": "", - "gtdbtk_phylum": "", - "num_5s": 1, - "num_23s": 1, - "gtdbtk_genus": "", - "num_t_rna": 5 - }, - { - "number_of_contig": 175, - "completeness": 36.21, - "bin_name": "bins.3", - "gene_count": 937, + "gene_count": 318, "bin_quality": "LQ", "gtdbtk_species": "", "gtdbtk_order": "", @@ -18680,1459 +17862,1377 @@ "num_5s": 0, "num_23s": 0, "gtdbtk_genus": "", - "num_t_rna": 12 - }, - { - "number_of_contig": 485, - "completeness": 43.26, - "bin_name": "bins.4", - "gene_count": 2590, - "bin_quality": "LQ", - "gtdbtk_species": "", - "gtdbtk_order": "", - "num_16s": 0, - "gtdbtk_family": "", - "gtdbtk_domain": "", - "contamination": 0.55, - "gtdbtk_class": "", - "gtdbtk_phylum": "", - "num_5s": 1, - "num_23s": 1, - "gtdbtk_genus": "", - "num_t_rna": 29 - }, - { - "number_of_contig": 339, - "completeness": 79.0, - "bin_name": "bins.5", - "gene_count": 2464, - "bin_quality": "MQ", - "gtdbtk_species": "", - "gtdbtk_order": "UBA11222", - "num_16s": 0, - "gtdbtk_family": "UBA11222", - "gtdbtk_domain": "Bacteria", - "contamination": 3.71, - "gtdbtk_class": "Alphaproteobacteria", - "gtdbtk_phylum": "Proteobacteria", - "num_5s": 0, - "num_23s": 0, - "gtdbtk_genus": "UBA11222", - "num_t_rna": 32 + "num_t_rna": 4 }, { - "number_of_contig": 70, - "completeness": 0.0, - "bin_name": "bins.6", - "gene_count": 298, + "number_of_contig": 823, + "completeness": 97.81, + "bin_name": "bins.2", + "gene_count": 5828, "bin_quality": "LQ", "gtdbtk_species": "", "gtdbtk_order": "", - "num_16s": 0, + "num_16s": 1, "gtdbtk_family": "", "gtdbtk_domain": "", - "contamination": 0.0, + "contamination": 66.19, "gtdbtk_class": "", "gtdbtk_phylum": "", "num_5s": 0, "num_23s": 0, "gtdbtk_genus": "", - "num_t_rna": 1 + "num_t_rna": 63 } ], - "unbinned_contig_num": 10452, - "started_at_time": "2021-10-11T02:26:37Z", + "unbinned_contig_num": 5129, + "started_at_time": "2021-10-11T02:28:36Z", "type": "nmdc:MAGsAnalysisActivity", - "ended_at_time": "2021-10-11T05:40:05+00:00", - "output_data_objects": [ - { - "name": "gold:Gp0452679_metabat2 bin checkm quality assessment result", - "description": "metabat2 bin checkm quality assessment result for gold:Gp0452679", - "data_object_type": "CheckM Statistics", - "url": "https://data.microbiomedata.org/data/nmdc:mga0fsjy84/MAGs/nmdc_mga0fsjy84_checkm_qa.out", - "md5_checksum": "d41d8cd98f00b204e9800998ecf8427e", - "id": "nmdc:d41d8cd98f00b204e9800998ecf8427e", - "file_size_bytes": 0 - }, - { - "name": "Gp0115676_tooShort (< 3kb) filtered contigs fasta file by metaBat2", - "description": "tooShort (< 3kb) filtered contigs fasta file by metaBat2 for Gp0115676", - "url": "https://data.microbiomedata.org/data/nmdc:mga0w3a067/MAGs/nmdc_mga0w3a067_bins.tooShort.fa", - "md5_checksum": "71667f3b8ee0cb5acadc541fa6914022", - "id": "nmdc:71667f3b8ee0cb5acadc541fa6914022", - "file_size_bytes": 75793492 - }, - { - "name": "Gp0115676_unbinned fasta file from metabat2", - "description": "unbinned fasta file from metabat2 for Gp0115676", - "url": "https://data.microbiomedata.org/data/nmdc:mga0w3a067/MAGs/nmdc_mga0w3a067_bins.unbinned.fa", - "md5_checksum": "0141a64077e0f18adc42cb1915a00fa2", - "id": "nmdc:0141a64077e0f18adc42cb1915a00fa2", - "file_size_bytes": 17366889 - }, - { - "name": "Gp0115676_metabat2 bin checkm quality assessment result", - "description": "metabat2 bin checkm quality assessment result for Gp0115676", - "data_object_type": "CheckM Statistics", - "url": "https://data.microbiomedata.org/data/nmdc:mga0w3a067/MAGs/nmdc_mga0w3a067_checkm_qa.out", - "md5_checksum": "982b47616dde63a388400fcc57d7c5b0", - "id": "nmdc:982b47616dde63a388400fcc57d7c5b0", - "file_size_bytes": 1700 - }, - { - "name": "Gp0115676_high-quality and medium-quality bins", - "description": "high-quality and medium-quality bins for Gp0115676", - "data_object_type": "Metagenome Bins", - "url": "https://data.microbiomedata.org/data/nmdc:mga0w3a067/MAGs/nmdc_mga0w3a067_hqmq_bin.zip", - "md5_checksum": "313eb61bc7577e272eca6332e923f9c4", - "id": "nmdc:313eb61bc7577e272eca6332e923f9c4", - "file_size_bytes": 677741 - }, - { - "name": "Gp0115676_metabat2 bins", - "description": "metabat2 bins for Gp0115676", - "url": "https://data.microbiomedata.org/data/nmdc:mga0w3a067/MAGs/nmdc_mga0w3a067_metabat_bin.zip", - "md5_checksum": "763eb40a8905e9b0d459c45222f1b05e", - "id": "nmdc:763eb40a8905e9b0d459c45222f1b05e", - "file_size_bytes": 2885722 - } - ] + "ended_at_time": "2021-10-11T03:32:43+00:00" } - ] - }, - { - "_id": { - "$oid": "649b009773e8249959349b4f" - }, - "id": "nmdc:omprc-11-qngh7497", - "name": "Sand microcosm microbial communities from a hyporheic zone in Columbia River, Washington, USA - GW-RW T4_25-Nov-14", - "description": "Sterilized sand packs were incubated back in the ground and collected at time point T4.", - "has_input": [ - "nmdc:bsm-11-8362vs44" - ], - "has_output": [ - "jgi:55a9caff0d87852b2150891e" - ], - "part_of": [ - "nmdc:sty-11-aygzgv51" - ], - "add_date": "2015-05-28", - "mod_date": "2021-06-15", - "ncbi_project_name": "Sand microcosm microbial communities from a hyporheic zone in Columbia River, Washington, USA - GW-RW T4_25-Nov-14", - "omics_type": { - "has_raw_value": "Metagenome" - }, - "principal_investigator": { - "has_raw_value": "James Stegen" - }, - "processing_institution": "JGI", - "type": "nmdc:OmicsProcessing", - "gold_sequencing_project_identifiers": [ - "gold:Gp0115677" - ], - "downstream_workflow_activity_records": [ + ], + "material_sample_set": [], + "material_sampling_activity_set": [], + "metabolomics_analysis_activity_set": [], + "metagenome_annotation_activity_set": [ { "_id": { - "$oid": "649b009d6bdd4fd20273c87a" + "$oid": "649b005bbf2caae0415ef9c0" }, "has_input": [ - "nmdc:80ca2cf2e3edcac29eb62b43f62e25c3" + "nmdc:06d4964c0822abd6f94ca883c122f7ce" ], "part_of": [ - "nmdc:mga0zb0766" + "nmdc:mga0kpja70" ], "git_url": "https://github.com/microbiomedata/metaG/releases/tag/0.1", "has_output": [ - "nmdc:63c857b3011dec61a08044d518291f23", - "nmdc:2a79d7978caecf9b08fb2029fa42c9b3" + "nmdc:be3b8decbc48f9588daca36ca4c883ab", + "nmdc:106c834bb14367ec6154d1b04f2a1021", + "nmdc:dfe3eed1eee6d6764ae22a2c6b0209e5", + "nmdc:84e3913c75d155fc45f04bc04810063a", + "nmdc:418e74fcbe4b97b8d74cb697a3b3feb4", + "nmdc:2d57dd06178c83c1f9c4bfaecf34b8b4", + "nmdc:42173701162f4fdb727bc4eded48c2a1", + "nmdc:89b8851da4dca184654a76128048e09a", + "nmdc:e0d0721c6051fb0eebd70635882639c1", + "nmdc:e9b0a3709e78dd9dfdba4eff7103c425", + "nmdc:e627abd2dfaee1fbf695de11211c6971", + "nmdc:a04e32711e814e733114531a666606c6" ], - "was_informed_by": "gold:Gp0115677", - "input_read_count": 65434428, - "output_read_bases": 9483843059, - "id": "nmdc:4fa91d90b1bd5f821a7f09edc8426939", + "was_informed_by": "gold:Gp0115673", + "id": "nmdc:7ae51c3485db8a27225f08083565b28e", "execution_resource": "NERSC-Cori", - "input_read_bases": 9880598628, - "name": "Read QC Activity for nmdc:mga0zb0766", - "output_read_count": 64887080, - "started_at_time": "2021-10-11T02:24:49Z", - "type": "nmdc:ReadQCAnalysisActivity", - "ended_at_time": "2021-10-11T06:26:42+00:00", - "output_data_objects": [ - { - "name": "Gp0115677_Filtered Reads", - "description": "Filtered Reads for Gp0115677", - "data_object_type": "Filtered Sequencing Reads", - "url": "https://data.microbiomedata.org/data/nmdc:mga0zb0766/qa/nmdc_mga0zb0766_filtered.fastq.gz", - "md5_checksum": "63c857b3011dec61a08044d518291f23", - "id": "nmdc:63c857b3011dec61a08044d518291f23", - "file_size_bytes": 5307348388 - }, - { - "name": "Gp0115677_Filtered Stats", - "description": "Filtered Stats for Gp0115677", - "data_object_type": "QC Statistics", - "url": "https://data.microbiomedata.org/data/nmdc:mga0zb0766/qa/nmdc_mga0zb0766_filterStats.txt", - "md5_checksum": "2a79d7978caecf9b08fb2029fa42c9b3", - "id": "nmdc:2a79d7978caecf9b08fb2029fa42c9b3", - "file_size_bytes": 279 - } - ] - }, + "name": "Annotation Activity for nmdc:mga0kpja70", + "started_at_time": "2021-10-11T02:28:36Z", + "type": "nmdc:MetagenomeAnnotationActivity", + "ended_at_time": "2021-10-11T03:32:43+00:00" + } + ], + "metagenome_assembly_set": [ { "_id": { - "$oid": "649b009bff710ae353f8cf3c" + "$oid": "649b005f2ca5ee4adb139fa8" }, "has_input": [ - "nmdc:63c857b3011dec61a08044d518291f23" + "nmdc:268918f610926421d2af43f175553680" + ], + "part_of": [ + "nmdc:mga0kpja70" ], + "ctg_logsum": 160283, + "scaf_logsum": 161291, + "gap_pct": 0.0036, "git_url": "https://github.com/microbiomedata/metaG/releases/tag/0.1", "has_output": [ - "nmdc:ba32f20b0cc5143783e00c5d1ba15223", - "nmdc:c1730daf5e6017219fd9fc079e42c132", - "nmdc:55b6c047c48f5bf9fb156f139992e4d8", - "nmdc:1c2e2dff881b35a25b4622bbc66c3140", - "nmdc:50f771c7bc17a0b184c2a10a24013f08", - "nmdc:229017cdb1832bb718d22dc27db44125", - "nmdc:49d5d11132bd5a02c3dd077d42a6a16b", - "nmdc:bdd701b44e67929ec8bbe279697da937", - "nmdc:d35583a5ed45df5a58bf084fc67bf988" + "nmdc:06d4964c0822abd6f94ca883c122f7ce", + "nmdc:bad916c69afe839097650b0b9526a841", + "nmdc:a187658f262fa495de43707aabcbf480", + "nmdc:c525c04f90889be615025c667908370c", + "nmdc:2e293158750df042be7422826125bef2" ], - "was_informed_by": "gold:Gp0115677", - "id": "nmdc:4fa91d90b1bd5f821a7f09edc8426939", + "asm_score": 6.419, + "was_informed_by": "gold:Gp0115673", + "ctg_powsum": 18694, + "scaf_max": 39252, + "id": "nmdc:7ae51c3485db8a27225f08083565b28e", + "scaf_powsum": 18825, "execution_resource": "NERSC-Cori", - "name": "ReadBased Analysis Activity for nmdc:mga0zb0766", - "started_at_time": "2021-10-11T02:24:49Z", - "type": "nmdc:ReadbasedAnalysis", - "ended_at_time": "2021-10-11T06:26:42+00:00", - "output_data_objects": [ - { - "name": "Gp0115677_Gottcha2 TSV report", - "description": "Gottcha2 TSV report for Gp0115677", - "url": "https://data.microbiomedata.org/data/nmdc:mga0zb0766/ReadbasedAnalysis/nmdc_mga0zb0766_gottcha2_report.tsv", - "md5_checksum": "ba32f20b0cc5143783e00c5d1ba15223", - "id": "nmdc:ba32f20b0cc5143783e00c5d1ba15223", - "file_size_bytes": 17895 - }, - { - "name": "Gp0115677_Gottcha2 full TSV report", - "description": "Gottcha2 full TSV report for Gp0115677", - "url": "https://data.microbiomedata.org/data/nmdc:mga0zb0766/ReadbasedAnalysis/nmdc_mga0zb0766_gottcha2_report_full.tsv", - "md5_checksum": "c1730daf5e6017219fd9fc079e42c132", - "id": "nmdc:c1730daf5e6017219fd9fc079e42c132", - "file_size_bytes": 1182538 - }, - { - "name": "Gp0115677_Gottcha2 Krona HTML report", - "description": "Gottcha2 Krona HTML report for Gp0115677", - "data_object_type": "GOTTCHA2 Krona Plot", - "url": "https://data.microbiomedata.org/data/nmdc:mga0zb0766/ReadbasedAnalysis/nmdc_mga0zb0766_gottcha2_krona.html", - "md5_checksum": "55b6c047c48f5bf9fb156f139992e4d8", - "id": "nmdc:55b6c047c48f5bf9fb156f139992e4d8", - "file_size_bytes": 276802 - }, - { - "name": "Gp0115677_Centrifuge classification TSV report", - "description": "Centrifuge classification TSV report for Gp0115677", - "data_object_type": "Centrifuge Taxonomic Classification", - "url": "https://data.microbiomedata.org/data/nmdc:mga0zb0766/ReadbasedAnalysis/nmdc_mga0zb0766_centrifuge_classification.tsv", - "md5_checksum": "1c2e2dff881b35a25b4622bbc66c3140", - "id": "nmdc:1c2e2dff881b35a25b4622bbc66c3140", - "file_size_bytes": 4716470614 - }, - { - "name": "Gp0115677_Centrifuge TSV report", - "description": "Centrifuge TSV report for Gp0115677", - "data_object_type": "Centrifuge Classification Report", - "url": "https://data.microbiomedata.org/data/nmdc:mga0zb0766/ReadbasedAnalysis/nmdc_mga0zb0766_centrifuge_report.tsv", - "md5_checksum": "50f771c7bc17a0b184c2a10a24013f08", - "id": "nmdc:50f771c7bc17a0b184c2a10a24013f08", - "file_size_bytes": 267231 - }, - { - "name": "Gp0115677_Centrifuge Krona HTML report", - "description": "Centrifuge Krona HTML report for Gp0115677", - "data_object_type": "Centrifuge Krona Plot", - "url": "https://data.microbiomedata.org/data/nmdc:mga0zb0766/ReadbasedAnalysis/nmdc_mga0zb0766_centrifuge_krona.html", - "md5_checksum": "229017cdb1832bb718d22dc27db44125", - "id": "nmdc:229017cdb1832bb718d22dc27db44125", - "file_size_bytes": 2356003 - }, - { - "name": "Gp0115677_Kraken classification TSV report", - "description": "Kraken classification TSV report for Gp0115677", - "data_object_type": "Kraken2 Taxonomic Classification", - "url": "https://data.microbiomedata.org/data/nmdc:mga0zb0766/ReadbasedAnalysis/nmdc_mga0zb0766_kraken2_classification.tsv", - "md5_checksum": "49d5d11132bd5a02c3dd077d42a6a16b", - "id": "nmdc:49d5d11132bd5a02c3dd077d42a6a16b", - "file_size_bytes": 3857487871 - }, - { - "name": "Gp0115677_Kraken2 TSV report", - "description": "Kraken2 TSV report for Gp0115677", - "data_object_type": "Kraken2 Classification Report", - "url": "https://data.microbiomedata.org/data/nmdc:mga0zb0766/ReadbasedAnalysis/nmdc_mga0zb0766_kraken2_report.tsv", - "md5_checksum": "bdd701b44e67929ec8bbe279697da937", - "id": "nmdc:bdd701b44e67929ec8bbe279697da937", - "file_size_bytes": 708598 - }, - { - "name": "Gp0115677_Kraken2 Krona HTML report", - "description": "Kraken2 Krona HTML report for Gp0115677", - "data_object_type": "Kraken2 Krona Plot", - "url": "https://data.microbiomedata.org/data/nmdc:mga0zb0766/ReadbasedAnalysis/nmdc_mga0zb0766_kraken2_krona.html", - "md5_checksum": "d35583a5ed45df5a58bf084fc67bf988", - "id": "nmdc:d35583a5ed45df5a58bf084fc67bf988", - "file_size_bytes": 4250180 - } - ] - }, + "contigs": 89808, + "name": "Assembly Activity for nmdc:mga0kpja70", + "ctg_max": 39252, + "gc_std": 0.11246, + "contig_bp": 46120517, + "gc_avg": 0.55483, + "started_at_time": "2021-10-11T02:28:36Z", + "scaf_bp": 46122177, + "type": "nmdc:MetagenomeAssembly", + "scaffolds": 89660, + "ended_at_time": "2021-10-11T03:32:43+00:00", + "ctg_l50": 493, + "ctg_l90": 286, + "ctg_n50": 19910, + "ctg_n90": 73487, + "scaf_l50": 494, + "scaf_l90": 286, + "scaf_n50": 19797, + "scaf_n90": 73347 + } + ], + "metagenome_sequencing_activity_set": [], + "metaproteomics_analysis_activity_set": [], + "metatranscriptome_activity_set": [], + "nom_analysis_activity_set": [], + "omics_processing_set": [ { "_id": { - "$oid": "61e719d5833bcf838a70143c" + "$oid": "649b009773e8249959349b4c" }, + "id": "nmdc:omprc-11-hk1bje46", + "name": "Sand microcosm microbial communities from a hyporheic zone in Columbia River, Washington, USA - GW-RW T4_2-Sept-14", + "description": "Sterilized sand packs were incubated back in the ground and collected at time point T4.", "has_input": [ - "nmdc:63c857b3011dec61a08044d518291f23" - ], - "part_of": [ - "nmdc:mga0zb0766" + "nmdc:bsm-11-5h7px351" ], - "git_url": "https://github.com/microbiomedata/metaG/releases/tag/0.1", "has_output": [ - "nmdc:ba32f20b0cc5143783e00c5d1ba15223", - "nmdc:c1730daf5e6017219fd9fc079e42c132", - "nmdc:55b6c047c48f5bf9fb156f139992e4d8", - "nmdc:1c2e2dff881b35a25b4622bbc66c3140", - "nmdc:50f771c7bc17a0b184c2a10a24013f08", - "nmdc:229017cdb1832bb718d22dc27db44125", - "nmdc:49d5d11132bd5a02c3dd077d42a6a16b", - "nmdc:bdd701b44e67929ec8bbe279697da937", - "nmdc:d35583a5ed45df5a58bf084fc67bf988" + "jgi:55d817f70d8785342fcf8270" ], - "was_informed_by": "gold:Gp0115677", - "id": "nmdc:4fa91d90b1bd5f821a7f09edc8426939", - "execution_resource": "NERSC-Cori", - "name": "ReadBased Analysis Activity for nmdc:mga0zb0766", - "started_at_time": "2021-10-11T02:24:49Z", - "type": "nmdc:ReadbasedAnalysis", - "ended_at_time": "2021-10-11T06:26:42+00:00", - "output_data_objects": [ - { - "name": "Gp0115677_Gottcha2 TSV report", - "description": "Gottcha2 TSV report for Gp0115677", - "url": "https://data.microbiomedata.org/data/nmdc:mga0zb0766/ReadbasedAnalysis/nmdc_mga0zb0766_gottcha2_report.tsv", - "md5_checksum": "ba32f20b0cc5143783e00c5d1ba15223", - "id": "nmdc:ba32f20b0cc5143783e00c5d1ba15223", - "file_size_bytes": 17895 - }, - { - "name": "Gp0115677_Gottcha2 full TSV report", - "description": "Gottcha2 full TSV report for Gp0115677", - "url": "https://data.microbiomedata.org/data/nmdc:mga0zb0766/ReadbasedAnalysis/nmdc_mga0zb0766_gottcha2_report_full.tsv", - "md5_checksum": "c1730daf5e6017219fd9fc079e42c132", - "id": "nmdc:c1730daf5e6017219fd9fc079e42c132", - "file_size_bytes": 1182538 - }, - { - "name": "Gp0115677_Gottcha2 Krona HTML report", - "description": "Gottcha2 Krona HTML report for Gp0115677", - "data_object_type": "GOTTCHA2 Krona Plot", - "url": "https://data.microbiomedata.org/data/nmdc:mga0zb0766/ReadbasedAnalysis/nmdc_mga0zb0766_gottcha2_krona.html", - "md5_checksum": "55b6c047c48f5bf9fb156f139992e4d8", - "id": "nmdc:55b6c047c48f5bf9fb156f139992e4d8", - "file_size_bytes": 276802 - }, - { - "name": "Gp0115677_Centrifuge classification TSV report", - "description": "Centrifuge classification TSV report for Gp0115677", - "data_object_type": "Centrifuge Taxonomic Classification", - "url": "https://data.microbiomedata.org/data/nmdc:mga0zb0766/ReadbasedAnalysis/nmdc_mga0zb0766_centrifuge_classification.tsv", - "md5_checksum": "1c2e2dff881b35a25b4622bbc66c3140", - "id": "nmdc:1c2e2dff881b35a25b4622bbc66c3140", - "file_size_bytes": 4716470614 - }, - { - "name": "Gp0115677_Centrifuge TSV report", - "description": "Centrifuge TSV report for Gp0115677", - "data_object_type": "Centrifuge Classification Report", - "url": "https://data.microbiomedata.org/data/nmdc:mga0zb0766/ReadbasedAnalysis/nmdc_mga0zb0766_centrifuge_report.tsv", - "md5_checksum": "50f771c7bc17a0b184c2a10a24013f08", - "id": "nmdc:50f771c7bc17a0b184c2a10a24013f08", - "file_size_bytes": 267231 - }, - { - "name": "Gp0115677_Centrifuge Krona HTML report", - "description": "Centrifuge Krona HTML report for Gp0115677", - "data_object_type": "Centrifuge Krona Plot", - "url": "https://data.microbiomedata.org/data/nmdc:mga0zb0766/ReadbasedAnalysis/nmdc_mga0zb0766_centrifuge_krona.html", - "md5_checksum": "229017cdb1832bb718d22dc27db44125", - "id": "nmdc:229017cdb1832bb718d22dc27db44125", - "file_size_bytes": 2356003 - }, - { - "name": "Gp0115677_Kraken classification TSV report", - "description": "Kraken classification TSV report for Gp0115677", - "data_object_type": "Kraken2 Taxonomic Classification", - "url": "https://data.microbiomedata.org/data/nmdc:mga0zb0766/ReadbasedAnalysis/nmdc_mga0zb0766_kraken2_classification.tsv", - "md5_checksum": "49d5d11132bd5a02c3dd077d42a6a16b", - "id": "nmdc:49d5d11132bd5a02c3dd077d42a6a16b", - "file_size_bytes": 3857487871 - }, - { - "name": "Gp0115677_Kraken2 TSV report", - "description": "Kraken2 TSV report for Gp0115677", - "data_object_type": "Kraken2 Classification Report", - "url": "https://data.microbiomedata.org/data/nmdc:mga0zb0766/ReadbasedAnalysis/nmdc_mga0zb0766_kraken2_report.tsv", - "md5_checksum": "bdd701b44e67929ec8bbe279697da937", - "id": "nmdc:bdd701b44e67929ec8bbe279697da937", - "file_size_bytes": 708598 - }, - { - "name": "Gp0115677_Kraken2 Krona HTML report", - "description": "Kraken2 Krona HTML report for Gp0115677", - "data_object_type": "Kraken2 Krona Plot", - "url": "https://data.microbiomedata.org/data/nmdc:mga0zb0766/ReadbasedAnalysis/nmdc_mga0zb0766_kraken2_krona.html", - "md5_checksum": "d35583a5ed45df5a58bf084fc67bf988", - "id": "nmdc:d35583a5ed45df5a58bf084fc67bf988", - "file_size_bytes": 4250180 - } + "part_of": [ + "nmdc:sty-11-aygzgv51" + ], + "add_date": "2015-05-28", + "mod_date": "2021-06-15", + "ncbi_project_name": "Sand microcosm microbial communities from a hyporheic zone in Columbia River, Washington, USA - GW-RW T4_2-Sept-14", + "omics_type": { + "has_raw_value": "Metagenome" + }, + "principal_investigator": { + "has_raw_value": "James Stegen" + }, + "processing_institution": "JGI", + "type": "nmdc:OmicsProcessing", + "gold_sequencing_project_identifiers": [ + "gold:Gp0115673" ] - }, + } + ], + "reaction_activity_set": [], + "read_qc_analysis_activity_set": [ { "_id": { - "$oid": "649b005f2ca5ee4adb139fad" + "$oid": "649b009d6bdd4fd20273c876" }, "has_input": [ - "nmdc:63c857b3011dec61a08044d518291f23" + "nmdc:3783bc4ce3716b6d299533bc3f6591b6" ], "part_of": [ - "nmdc:mga0zb0766" + "nmdc:mga0kpja70" ], - "ctg_logsum": 407938, - "scaf_logsum": 442802, - "gap_pct": 0.02562, "git_url": "https://github.com/microbiomedata/metaG/releases/tag/0.1", "has_output": [ - "nmdc:3d9e14d6f7a854042a7d71def080409b", - "nmdc:26d0d64ca7c850f0e04a4c33690bd178", - "nmdc:8f8a0622cfe39054bd20f11116c78402", - "nmdc:623aa370c44897cf30844647c2f5bd94", - "nmdc:f4a1cf24281f14a666a1bfc9afc0aab5" + "nmdc:268918f610926421d2af43f175553680", + "nmdc:4610980cf3558f5a9830797ead97362a" ], - "asm_score": 13.853, - "was_informed_by": "gold:Gp0115677", - "ctg_powsum": 50872, - "scaf_max": 582605, - "id": "nmdc:4fa91d90b1bd5f821a7f09edc8426939", - "scaf_powsum": 55815, + "was_informed_by": "gold:Gp0115673", + "input_read_count": 17796788, + "output_read_bases": 2520029380, + "id": "nmdc:7ae51c3485db8a27225f08083565b28e", "execution_resource": "NERSC-Cori", - "contigs": 548764, - "name": "Assembly Activity for nmdc:mga0zb0766", - "ctg_max": 464697, - "gc_std": 0.11035, - "contig_bp": 229799767, - "gc_avg": 0.55184, - "started_at_time": "2021-10-11T02:24:49Z", - "scaf_bp": 229858665, - "type": "nmdc:MetagenomeAssembly", - "scaffolds": 543003, - "ended_at_time": "2021-10-11T06:26:42+00:00", - "ctg_l50": 375, - "ctg_l90": 283, - "ctg_n50": 171281, - "ctg_n90": 471697, - "scaf_l50": 378, - "scaf_l90": 283, - "scaf_n50": 164840, - "scaf_n90": 466121, - "scaf_l_gt50k": 2790937, - "scaf_n_gt50k": 23, - "scaf_pct_gt50k": 1.2141969, - "output_data_objects": [ - { - "name": "Gp0115677_Assembled contigs fasta", - "description": "Assembled contigs fasta for Gp0115677", - "data_object_type": "Assembly Contigs", - "url": "https://data.microbiomedata.org/data/nmdc:mga0zb0766/assembly/nmdc_mga0zb0766_contigs.fna", - "md5_checksum": "3d9e14d6f7a854042a7d71def080409b", - "id": "nmdc:3d9e14d6f7a854042a7d71def080409b", - "file_size_bytes": 250747283 - }, - { - "name": "Gp0115677_Assembled scaffold fasta", - "description": "Assembled scaffold fasta for Gp0115677", - "data_object_type": "Assembly Scaffolds", - "url": "https://data.microbiomedata.org/data/nmdc:mga0zb0766/assembly/nmdc_mga0zb0766_scaffolds.fna", - "md5_checksum": "26d0d64ca7c850f0e04a4c33690bd178", - "id": "nmdc:26d0d64ca7c850f0e04a4c33690bd178", - "file_size_bytes": 249006954 - }, - { - "name": "Gp0115677_Metagenome Contig Coverage Stats", - "description": "Metagenome Contig Coverage Stats for Gp0115677", - "url": "https://data.microbiomedata.org/data/nmdc:mga0zb0766/assembly/nmdc_mga0zb0766_covstats.txt", - "md5_checksum": "8f8a0622cfe39054bd20f11116c78402", - "id": "nmdc:8f8a0622cfe39054bd20f11116c78402", - "file_size_bytes": 43716675 - }, - { - "name": "Gp0115677_Assembled AGP file", - "description": "Assembled AGP file for Gp0115677", - "data_object_type": "Assembly AGP", - "url": "https://data.microbiomedata.org/data/nmdc:mga0zb0766/assembly/nmdc_mga0zb0766_assembly.agp", - "md5_checksum": "623aa370c44897cf30844647c2f5bd94", - "id": "nmdc:623aa370c44897cf30844647c2f5bd94", - "file_size_bytes": 41409581 - }, - { - "name": "Gp0115677_Metagenome Alignment BAM file", - "description": "Metagenome Alignment BAM file for Gp0115677", - "data_object_type": "Assembly Coverage BAM", - "url": "https://data.microbiomedata.org/data/nmdc:mga0zb0766/assembly/nmdc_mga0zb0766_pairedMapped_sorted.bam", - "md5_checksum": "f4a1cf24281f14a666a1bfc9afc0aab5", - "id": "nmdc:f4a1cf24281f14a666a1bfc9afc0aab5", - "file_size_bytes": 5828772757 - } - ] - }, + "input_read_bases": 2687314988, + "name": "Read QC Activity for nmdc:mga0kpja70", + "output_read_count": 16817496, + "started_at_time": "2021-10-11T02:28:36Z", + "type": "nmdc:ReadQCAnalysisActivity", + "ended_at_time": "2021-10-11T03:32:43+00:00" + } + ], + "read_based_taxonomy_analysis_activity_set": [ { "_id": { - "$oid": "649b005bbf2caae0415ef9c1" + "$oid": "649b009bff710ae353f8cf44" }, "has_input": [ - "nmdc:3d9e14d6f7a854042a7d71def080409b" - ], - "part_of": [ - "nmdc:mga0zb0766" + "nmdc:268918f610926421d2af43f175553680" ], "git_url": "https://github.com/microbiomedata/metaG/releases/tag/0.1", "has_output": [ - "nmdc:4f9d82516561ee307b1ab4841255aff0", - "nmdc:a658e9045fde900cdc78d0578446b960", - "nmdc:075c3477b8874aa8d6c4dbc1360a2b38", - "nmdc:9a338a51c6ca2ec4e0da4e15903be407", - "nmdc:0f9e627ace8d9b8420e957bcd033244a", - "nmdc:144a997b22098f5fe748d2fa069cdc71", - "nmdc:82dc44c196f4b6b5552e8360f21f93a0", - "nmdc:9238a5420065e1da9da31c270c90268a", - "nmdc:ce31f29ff8fed6d0a973d61157af7220", - "nmdc:016cbd549e03d896ed746ab91771b21a", - "nmdc:7ef0abcd7fba705f6e9e26dcb8b1da8d", - "nmdc:c935ce264779684a01c9a7777e506d02" - ], - "was_informed_by": "gold:Gp0115677", - "id": "nmdc:4fa91d90b1bd5f821a7f09edc8426939", + "nmdc:c7b24571b61a33018cf118b5424b787f", + "nmdc:e185734176505343bf4c83c16a0a9fe2", + "nmdc:7c6b0ef44450c747580826a2e218844b", + "nmdc:5b98c377f424d7609f1a09e350cfb837", + "nmdc:b5f7a68a94b356001014d1be024231af", + "nmdc:75bca66cfcdd38331c10edbba03fa0d3", + "nmdc:35bf579641b2ffb3614098d9811a4968", + "nmdc:801b79f5442e5bfaa0d15f76786cfbc0", + "nmdc:a7030fa8e9622e3396c2b96448e90c3b" + ], + "was_informed_by": "gold:Gp0115673", + "id": "nmdc:7ae51c3485db8a27225f08083565b28e", "execution_resource": "NERSC-Cori", - "name": "Annotation Activity for nmdc:mga0zb0766", - "started_at_time": "2021-10-11T02:24:49Z", - "type": "nmdc:MetagenomeAnnotationActivity", - "ended_at_time": "2021-10-11T06:26:42+00:00", - "output_data_objects": [ - { - "name": "Gp0115677_Protein FAA", - "description": "Protein FAA for Gp0115677", - "data_object_type": "Annotation Amino Acid FASTA", - "url": "https://data.microbiomedata.org/data/nmdc:mga0zb0766/annotation/nmdc_mga0zb0766_proteins.faa", - "md5_checksum": "4f9d82516561ee307b1ab4841255aff0", - "id": "nmdc:4f9d82516561ee307b1ab4841255aff0", - "file_size_bytes": 144603933 - }, - { - "name": "Gp0115677_Structural annotation GFF file", - "description": "Structural annotation GFF file for Gp0115677", - "data_object_type": "Structural Annotation GFF", - "url": "https://data.microbiomedata.org/data/nmdc:mga0zb0766/annotation/nmdc_mga0zb0766_structural_annotation.gff", - "md5_checksum": "a658e9045fde900cdc78d0578446b960", - "id": "nmdc:a658e9045fde900cdc78d0578446b960", - "file_size_bytes": 2546 - }, - { - "name": "Gp0115677_Functional annotation GFF file", - "description": "Functional annotation GFF file for Gp0115677", - "data_object_type": "Functional Annotation GFF", - "url": "https://data.microbiomedata.org/data/nmdc:mga0zb0766/annotation/nmdc_mga0zb0766_functional_annotation.gff", - "md5_checksum": "075c3477b8874aa8d6c4dbc1360a2b38", - "id": "nmdc:075c3477b8874aa8d6c4dbc1360a2b38", - "file_size_bytes": 167984752 - }, - { - "name": "Gp0115677_KO TSV file", - "description": "KO TSV file for Gp0115677", - "data_object_type": "Annotation KEGG Orthology", - "url": "https://data.microbiomedata.org/data/nmdc:mga0zb0766/annotation/nmdc_mga0zb0766_ko.tsv", - "md5_checksum": "9a338a51c6ca2ec4e0da4e15903be407", - "id": "nmdc:9a338a51c6ca2ec4e0da4e15903be407", - "file_size_bytes": 19341535 - }, - { - "name": "Gp0115677_EC TSV file", - "description": "EC TSV file for Gp0115677", - "data_object_type": "Annotation Enzyme Commission", - "url": "https://data.microbiomedata.org/data/nmdc:mga0zb0766/annotation/nmdc_mga0zb0766_ec.tsv", - "md5_checksum": "0f9e627ace8d9b8420e957bcd033244a", - "id": "nmdc:0f9e627ace8d9b8420e957bcd033244a", - "file_size_bytes": 12533246 - }, - { - "name": "Gp0115677_COG GFF file", - "description": "COG GFF file for Gp0115677", - "url": "https://data.microbiomedata.org/data/nmdc:mga0zb0766/annotation/nmdc_mga0zb0766_cog.gff", - "md5_checksum": "144a997b22098f5fe748d2fa069cdc71", - "id": "nmdc:144a997b22098f5fe748d2fa069cdc71", - "file_size_bytes": 85841510 - }, - { - "name": "Gp0115677_PFAM GFF file", - "description": "PFAM GFF file for Gp0115677", - "url": "https://data.microbiomedata.org/data/nmdc:mga0zb0766/annotation/nmdc_mga0zb0766_pfam.gff", - "md5_checksum": "82dc44c196f4b6b5552e8360f21f93a0", - "id": "nmdc:82dc44c196f4b6b5552e8360f21f93a0", - "file_size_bytes": 64139943 - }, - { - "name": "Gp0115677_TigrFam GFF file", - "description": "TigrFam GFF file for Gp0115677", - "url": "https://data.microbiomedata.org/data/nmdc:mga0zb0766/annotation/nmdc_mga0zb0766_tigrfam.gff", - "md5_checksum": "9238a5420065e1da9da31c270c90268a", - "id": "nmdc:9238a5420065e1da9da31c270c90268a", - "file_size_bytes": 7585101 - }, - { - "name": "Gp0115677_SMART GFF file", - "description": "SMART GFF file for Gp0115677", - "url": "https://data.microbiomedata.org/data/nmdc:mga0zb0766/annotation/nmdc_mga0zb0766_smart.gff", - "md5_checksum": "ce31f29ff8fed6d0a973d61157af7220", - "id": "nmdc:ce31f29ff8fed6d0a973d61157af7220", - "file_size_bytes": 18353478 - }, - { - "name": "Gp0115677_SuperFam GFF file", - "description": "SuperFam GFF file for Gp0115677", - "url": "https://data.microbiomedata.org/data/nmdc:mga0zb0766/annotation/nmdc_mga0zb0766_supfam.gff", - "md5_checksum": "016cbd549e03d896ed746ab91771b21a", - "id": "nmdc:016cbd549e03d896ed746ab91771b21a", - "file_size_bytes": 107179327 - }, - { - "name": "Gp0115677_Cath FunFam GFF file", - "description": "Cath FunFam GFF file for Gp0115677", - "url": "https://data.microbiomedata.org/data/nmdc:mga0zb0766/annotation/nmdc_mga0zb0766_cath_funfam.gff", - "md5_checksum": "7ef0abcd7fba705f6e9e26dcb8b1da8d", - "id": "nmdc:7ef0abcd7fba705f6e9e26dcb8b1da8d", - "file_size_bytes": 85056001 - }, - { - "name": "Gp0115677_KO_EC GFF file", - "description": "KO_EC GFF file for Gp0115677", - "url": "https://data.microbiomedata.org/data/nmdc:mga0zb0766/annotation/nmdc_mga0zb0766_ko_ec.gff", - "md5_checksum": "c935ce264779684a01c9a7777e506d02", - "id": "nmdc:c935ce264779684a01c9a7777e506d02", - "file_size_bytes": 61547317 - } - ] + "name": "ReadBased Analysis Activity for nmdc:mga0kpja70", + "started_at_time": "2021-10-11T02:28:36Z", + "type": "nmdc:ReadbasedAnalysis", + "ended_at_time": "2021-10-11T03:32:43+00:00" + } + ], + "study_set": [], + "field_research_site_set": [], + "collecting_biosamples_from_site_set": [], + "date_created": null, + "etl_software_version": null, + "pooling_set": [], + "read_based_analysis_activity_set": [ + { + "_id": { + "$oid": "61e719b7833bcf838a7011dc" + }, + "has_input": [ + "nmdc:268918f610926421d2af43f175553680" + ], + "part_of": [ + "nmdc:mga0kpja70" + ], + "git_url": "https://github.com/microbiomedata/metaG/releases/tag/0.1", + "has_output": [ + "nmdc:c7b24571b61a33018cf118b5424b787f", + "nmdc:e185734176505343bf4c83c16a0a9fe2", + "nmdc:7c6b0ef44450c747580826a2e218844b", + "nmdc:5b98c377f424d7609f1a09e350cfb837", + "nmdc:b5f7a68a94b356001014d1be024231af", + "nmdc:75bca66cfcdd38331c10edbba03fa0d3", + "nmdc:35bf579641b2ffb3614098d9811a4968", + "nmdc:801b79f5442e5bfaa0d15f76786cfbc0", + "nmdc:a7030fa8e9622e3396c2b96448e90c3b" + ], + "was_informed_by": "gold:Gp0115673", + "id": "nmdc:7ae51c3485db8a27225f08083565b28e", + "execution_resource": "NERSC-Cori", + "name": "ReadBased Analysis Activity for nmdc:mga0kpja70", + "started_at_time": "2021-10-11T02:28:36Z", + "type": "nmdc:ReadbasedAnalysis", + "ended_at_time": "2021-10-11T03:32:43+00:00" + } + ] + }, + { + "functional_annotation_agg": [], + "library_preparation_set": [], + "processed_sample_set": [], + "extraction_set": [], + "activity_set": [], + "biosample_set": [], + "data_object_set": [ + { + "name": "Gp0115671_Filtered Reads", + "description": "Filtered Reads for Gp0115671", + "data_object_type": "Filtered Sequencing Reads", + "url": "https://data.microbiomedata.org/data/nmdc:mga0rw1351/qa/nmdc_mga0rw1351_filtered.fastq.gz", + "md5_checksum": "445f37bc3019e9fe3b29a2ac5bcbfc9c", + "id": "nmdc:445f37bc3019e9fe3b29a2ac5bcbfc9c", + "file_size_bytes": 1806996776 + }, + { + "name": "Gp0115671_Filtered Stats", + "description": "Filtered Stats for Gp0115671", + "data_object_type": "QC Statistics", + "url": "https://data.microbiomedata.org/data/nmdc:mga0rw1351/qa/nmdc_mga0rw1351_filterStats.txt", + "md5_checksum": "24440b4c5534da30eee650b68eccda84", + "id": "nmdc:24440b4c5534da30eee650b68eccda84", + "file_size_bytes": 289 + }, + { + "name": "Gp0115671_Gottcha2 TSV report", + "description": "Gottcha2 TSV report for Gp0115671", + "url": "https://data.microbiomedata.org/data/nmdc:mga0rw1351/ReadbasedAnalysis/nmdc_mga0rw1351_gottcha2_report.tsv", + "md5_checksum": "358559c32b69eff51758db66ac01021b", + "id": "nmdc:358559c32b69eff51758db66ac01021b", + "file_size_bytes": 11833 + }, + { + "name": "Gp0115671_Gottcha2 full TSV report", + "description": "Gottcha2 full TSV report for Gp0115671", + "url": "https://data.microbiomedata.org/data/nmdc:mga0rw1351/ReadbasedAnalysis/nmdc_mga0rw1351_gottcha2_report_full.tsv", + "md5_checksum": "befbd648249c2871bd27999120e50bf7", + "id": "nmdc:befbd648249c2871bd27999120e50bf7", + "file_size_bytes": 888177 + }, + { + "name": "Gp0115671_Gottcha2 Krona HTML report", + "description": "Gottcha2 Krona HTML report for Gp0115671", + "data_object_type": "GOTTCHA2 Krona Plot", + "url": "https://data.microbiomedata.org/data/nmdc:mga0rw1351/ReadbasedAnalysis/nmdc_mga0rw1351_gottcha2_krona.html", + "md5_checksum": "cacb8f623a808d0cae094d46f2801dd3", + "id": "nmdc:cacb8f623a808d0cae094d46f2801dd3", + "file_size_bytes": 261703 + }, + { + "name": "Gp0115671_Centrifuge classification TSV report", + "description": "Centrifuge classification TSV report for Gp0115671", + "data_object_type": "Centrifuge Taxonomic Classification", + "url": "https://data.microbiomedata.org/data/nmdc:mga0rw1351/ReadbasedAnalysis/nmdc_mga0rw1351_centrifuge_classification.tsv", + "md5_checksum": "1b15ffb745e320a9bf0cac7e672e974b", + "id": "nmdc:1b15ffb745e320a9bf0cac7e672e974b", + "file_size_bytes": 1474970402 + }, + { + "name": "Gp0115671_Centrifuge TSV report", + "description": "Centrifuge TSV report for Gp0115671", + "data_object_type": "Centrifuge Classification Report", + "url": "https://data.microbiomedata.org/data/nmdc:mga0rw1351/ReadbasedAnalysis/nmdc_mga0rw1351_centrifuge_report.tsv", + "md5_checksum": "90b77c7118bf6ec1f99836a50d562a7f", + "id": "nmdc:90b77c7118bf6ec1f99836a50d562a7f", + "file_size_bytes": 255777 + }, + { + "name": "Gp0115671_Centrifuge Krona HTML report", + "description": "Centrifuge Krona HTML report for Gp0115671", + "data_object_type": "Centrifuge Krona Plot", + "url": "https://data.microbiomedata.org/data/nmdc:mga0rw1351/ReadbasedAnalysis/nmdc_mga0rw1351_centrifuge_krona.html", + "md5_checksum": "e0736ff520260ba2097c02b9e767362c", + "id": "nmdc:e0736ff520260ba2097c02b9e767362c", + "file_size_bytes": 2329875 + }, + { + "name": "Gp0115671_Kraken classification TSV report", + "description": "Kraken classification TSV report for Gp0115671", + "data_object_type": "Kraken2 Taxonomic Classification", + "url": "https://data.microbiomedata.org/data/nmdc:mga0rw1351/ReadbasedAnalysis/nmdc_mga0rw1351_kraken2_classification.tsv", + "md5_checksum": "a00960655f9e80726fdb0fade1bec958", + "id": "nmdc:a00960655f9e80726fdb0fade1bec958", + "file_size_bytes": 1213240496 + }, + { + "name": "Gp0115671_Kraken2 TSV report", + "description": "Kraken2 TSV report for Gp0115671", + "data_object_type": "Kraken2 Classification Report", + "url": "https://data.microbiomedata.org/data/nmdc:mga0rw1351/ReadbasedAnalysis/nmdc_mga0rw1351_kraken2_report.tsv", + "md5_checksum": "366bf195f71d2c35a9b47c0f29381e85", + "id": "nmdc:366bf195f71d2c35a9b47c0f29381e85", + "file_size_bytes": 659715 + }, + { + "name": "Gp0115671_Kraken2 Krona HTML report", + "description": "Kraken2 Krona HTML report for Gp0115671", + "data_object_type": "Kraken2 Krona Plot", + "url": "https://data.microbiomedata.org/data/nmdc:mga0rw1351/ReadbasedAnalysis/nmdc_mga0rw1351_kraken2_krona.html", + "md5_checksum": "e111cd4927f6736e5de6f6e81e7e6d72", + "id": "nmdc:e111cd4927f6736e5de6f6e81e7e6d72", + "file_size_bytes": 4010701 + }, + { + "name": "Gp0115671_Gottcha2 TSV report", + "description": "Gottcha2 TSV report for Gp0115671", + "url": "https://data.microbiomedata.org/data/nmdc:mga0rw1351/ReadbasedAnalysis/nmdc_mga0rw1351_gottcha2_report.tsv", + "md5_checksum": "358559c32b69eff51758db66ac01021b", + "id": "nmdc:358559c32b69eff51758db66ac01021b", + "file_size_bytes": 11833 + }, + { + "name": "Gp0115671_Gottcha2 full TSV report", + "description": "Gottcha2 full TSV report for Gp0115671", + "url": "https://data.microbiomedata.org/data/nmdc:mga0rw1351/ReadbasedAnalysis/nmdc_mga0rw1351_gottcha2_report_full.tsv", + "md5_checksum": "befbd648249c2871bd27999120e50bf7", + "id": "nmdc:befbd648249c2871bd27999120e50bf7", + "file_size_bytes": 888177 + }, + { + "name": "Gp0115671_Gottcha2 Krona HTML report", + "description": "Gottcha2 Krona HTML report for Gp0115671", + "data_object_type": "GOTTCHA2 Krona Plot", + "url": "https://data.microbiomedata.org/data/nmdc:mga0rw1351/ReadbasedAnalysis/nmdc_mga0rw1351_gottcha2_krona.html", + "md5_checksum": "cacb8f623a808d0cae094d46f2801dd3", + "id": "nmdc:cacb8f623a808d0cae094d46f2801dd3", + "file_size_bytes": 261703 + }, + { + "name": "Gp0115671_Centrifuge classification TSV report", + "description": "Centrifuge classification TSV report for Gp0115671", + "data_object_type": "Centrifuge Taxonomic Classification", + "url": "https://data.microbiomedata.org/data/nmdc:mga0rw1351/ReadbasedAnalysis/nmdc_mga0rw1351_centrifuge_classification.tsv", + "md5_checksum": "1b15ffb745e320a9bf0cac7e672e974b", + "id": "nmdc:1b15ffb745e320a9bf0cac7e672e974b", + "file_size_bytes": 1474970402 + }, + { + "name": "Gp0115671_Centrifuge TSV report", + "description": "Centrifuge TSV report for Gp0115671", + "data_object_type": "Centrifuge Classification Report", + "url": "https://data.microbiomedata.org/data/nmdc:mga0rw1351/ReadbasedAnalysis/nmdc_mga0rw1351_centrifuge_report.tsv", + "md5_checksum": "90b77c7118bf6ec1f99836a50d562a7f", + "id": "nmdc:90b77c7118bf6ec1f99836a50d562a7f", + "file_size_bytes": 255777 + }, + { + "name": "Gp0115671_Centrifuge Krona HTML report", + "description": "Centrifuge Krona HTML report for Gp0115671", + "data_object_type": "Centrifuge Krona Plot", + "url": "https://data.microbiomedata.org/data/nmdc:mga0rw1351/ReadbasedAnalysis/nmdc_mga0rw1351_centrifuge_krona.html", + "md5_checksum": "e0736ff520260ba2097c02b9e767362c", + "id": "nmdc:e0736ff520260ba2097c02b9e767362c", + "file_size_bytes": 2329875 + }, + { + "name": "Gp0115671_Kraken classification TSV report", + "description": "Kraken classification TSV report for Gp0115671", + "data_object_type": "Kraken2 Taxonomic Classification", + "url": "https://data.microbiomedata.org/data/nmdc:mga0rw1351/ReadbasedAnalysis/nmdc_mga0rw1351_kraken2_classification.tsv", + "md5_checksum": "a00960655f9e80726fdb0fade1bec958", + "id": "nmdc:a00960655f9e80726fdb0fade1bec958", + "file_size_bytes": 1213240496 + }, + { + "name": "Gp0115671_Kraken2 TSV report", + "description": "Kraken2 TSV report for Gp0115671", + "data_object_type": "Kraken2 Classification Report", + "url": "https://data.microbiomedata.org/data/nmdc:mga0rw1351/ReadbasedAnalysis/nmdc_mga0rw1351_kraken2_report.tsv", + "md5_checksum": "366bf195f71d2c35a9b47c0f29381e85", + "id": "nmdc:366bf195f71d2c35a9b47c0f29381e85", + "file_size_bytes": 659715 + }, + { + "name": "Gp0115671_Kraken2 Krona HTML report", + "description": "Kraken2 Krona HTML report for Gp0115671", + "data_object_type": "Kraken2 Krona Plot", + "url": "https://data.microbiomedata.org/data/nmdc:mga0rw1351/ReadbasedAnalysis/nmdc_mga0rw1351_kraken2_krona.html", + "md5_checksum": "e111cd4927f6736e5de6f6e81e7e6d72", + "id": "nmdc:e111cd4927f6736e5de6f6e81e7e6d72", + "file_size_bytes": 4010701 + }, + { + "name": "Gp0115671_Assembled contigs fasta", + "description": "Assembled contigs fasta for Gp0115671", + "data_object_type": "Assembly Contigs", + "url": "https://data.microbiomedata.org/data/nmdc:mga0rw1351/assembly/nmdc_mga0rw1351_contigs.fna", + "md5_checksum": "0a1ebd847e3bb8f928ef491497f8355b", + "id": "nmdc:0a1ebd847e3bb8f928ef491497f8355b", + "file_size_bytes": 58744710 + }, + { + "name": "Gp0115671_Assembled scaffold fasta", + "description": "Assembled scaffold fasta for Gp0115671", + "data_object_type": "Assembly Scaffolds", + "url": "https://data.microbiomedata.org/data/nmdc:mga0rw1351/assembly/nmdc_mga0rw1351_scaffolds.fna", + "md5_checksum": "be4cab04a701bce0ed99605109bd5d6f", + "id": "nmdc:be4cab04a701bce0ed99605109bd5d6f", + "file_size_bytes": 58382380 + }, + { + "name": "Gp0115671_Metagenome Contig Coverage Stats", + "description": "Metagenome Contig Coverage Stats for Gp0115671", + "url": "https://data.microbiomedata.org/data/nmdc:mga0rw1351/assembly/nmdc_mga0rw1351_covstats.txt", + "md5_checksum": "cc4d3160618a82f81518bdc97ce1f5e2", + "id": "nmdc:cc4d3160618a82f81518bdc97ce1f5e2", + "file_size_bytes": 9464710 + }, + { + "name": "Gp0115671_Assembled AGP file", + "description": "Assembled AGP file for Gp0115671", + "data_object_type": "Assembly AGP", + "url": "https://data.microbiomedata.org/data/nmdc:mga0rw1351/assembly/nmdc_mga0rw1351_assembly.agp", + "md5_checksum": "473ca208ab97399a644c8e5326e765e5", + "id": "nmdc:473ca208ab97399a644c8e5326e765e5", + "file_size_bytes": 8820452 + }, + { + "name": "Gp0115671_Metagenome Alignment BAM file", + "description": "Metagenome Alignment BAM file for Gp0115671", + "data_object_type": "Assembly Coverage BAM", + "url": "https://data.microbiomedata.org/data/nmdc:mga0rw1351/assembly/nmdc_mga0rw1351_pairedMapped_sorted.bam", + "md5_checksum": "69371e513bebd1069a0ed26cc2c914cb", + "id": "nmdc:69371e513bebd1069a0ed26cc2c914cb", + "file_size_bytes": 1938214126 }, + { + "name": "Gp0115671_Protein FAA", + "description": "Protein FAA for Gp0115671", + "data_object_type": "Annotation Amino Acid FASTA", + "url": "https://data.microbiomedata.org/data/nmdc:mga0rw1351/annotation/nmdc_mga0rw1351_proteins.faa", + "md5_checksum": "147b97234576ba123a9f3c63eb249ecf", + "id": "nmdc:147b97234576ba123a9f3c63eb249ecf", + "file_size_bytes": 32911597 + }, + { + "name": "Gp0115671_Structural annotation GFF file", + "description": "Structural annotation GFF file for Gp0115671", + "data_object_type": "Structural Annotation GFF", + "url": "https://data.microbiomedata.org/data/nmdc:mga0rw1351/annotation/nmdc_mga0rw1351_structural_annotation.gff", + "md5_checksum": "3e037f5f744c9f8e4aa355222cc620ae", + "id": "nmdc:3e037f5f744c9f8e4aa355222cc620ae", + "file_size_bytes": 2516 + }, + { + "name": "Gp0115671_Functional annotation GFF file", + "description": "Functional annotation GFF file for Gp0115671", + "data_object_type": "Functional Annotation GFF", + "url": "https://data.microbiomedata.org/data/nmdc:mga0rw1351/annotation/nmdc_mga0rw1351_functional_annotation.gff", + "md5_checksum": "10d19849864ecdb722335200d0607bbe", + "id": "nmdc:10d19849864ecdb722335200d0607bbe", + "file_size_bytes": 38009425 + }, + { + "name": "Gp0115671_KO TSV file", + "description": "KO TSV file for Gp0115671", + "data_object_type": "Annotation KEGG Orthology", + "url": "https://data.microbiomedata.org/data/nmdc:mga0rw1351/annotation/nmdc_mga0rw1351_ko.tsv", + "md5_checksum": "0ce9fa5958b6445f7be463538e89e9b1", + "id": "nmdc:0ce9fa5958b6445f7be463538e89e9b1", + "file_size_bytes": 4994549 + }, + { + "name": "Gp0115671_EC TSV file", + "description": "EC TSV file for Gp0115671", + "data_object_type": "Annotation Enzyme Commission", + "url": "https://data.microbiomedata.org/data/nmdc:mga0rw1351/annotation/nmdc_mga0rw1351_ec.tsv", + "md5_checksum": "a3bc059d9350034f835be4e754486c73", + "id": "nmdc:a3bc059d9350034f835be4e754486c73", + "file_size_bytes": 3207987 + }, + { + "name": "Gp0115671_COG GFF file", + "description": "COG GFF file for Gp0115671", + "url": "https://data.microbiomedata.org/data/nmdc:mga0rw1351/annotation/nmdc_mga0rw1351_cog.gff", + "md5_checksum": "da9866461051130a44f0982b1a65c061", + "id": "nmdc:da9866461051130a44f0982b1a65c061", + "file_size_bytes": 21138081 + }, + { + "name": "Gp0115671_PFAM GFF file", + "description": "PFAM GFF file for Gp0115671", + "url": "https://data.microbiomedata.org/data/nmdc:mga0rw1351/annotation/nmdc_mga0rw1351_pfam.gff", + "md5_checksum": "676fff23fb641ee8af8a2b948fc5b46e", + "id": "nmdc:676fff23fb641ee8af8a2b948fc5b46e", + "file_size_bytes": 16269399 + }, + { + "name": "Gp0115671_TigrFam GFF file", + "description": "TigrFam GFF file for Gp0115671", + "url": "https://data.microbiomedata.org/data/nmdc:mga0rw1351/annotation/nmdc_mga0rw1351_tigrfam.gff", + "md5_checksum": "a4aa56158a292b63078eb029ed1d90a9", + "id": "nmdc:a4aa56158a292b63078eb029ed1d90a9", + "file_size_bytes": 2189740 + }, + { + "name": "Gp0115671_SMART GFF file", + "description": "SMART GFF file for Gp0115671", + "url": "https://data.microbiomedata.org/data/nmdc:mga0rw1351/annotation/nmdc_mga0rw1351_smart.gff", + "md5_checksum": "6a28f85e8b5addccb429cc7f8964e496", + "id": "nmdc:6a28f85e8b5addccb429cc7f8964e496", + "file_size_bytes": 4669463 + }, + { + "name": "Gp0115671_SuperFam GFF file", + "description": "SuperFam GFF file for Gp0115671", + "url": "https://data.microbiomedata.org/data/nmdc:mga0rw1351/annotation/nmdc_mga0rw1351_supfam.gff", + "md5_checksum": "d5b21cce7406ab46611c49dc1ab658ed", + "id": "nmdc:d5b21cce7406ab46611c49dc1ab658ed", + "file_size_bytes": 26589549 + }, + { + "name": "Gp0115671_Cath FunFam GFF file", + "description": "Cath FunFam GFF file for Gp0115671", + "url": "https://data.microbiomedata.org/data/nmdc:mga0rw1351/annotation/nmdc_mga0rw1351_cath_funfam.gff", + "md5_checksum": "8ead1ab881fd48527d853b0d0601b4bc", + "id": "nmdc:8ead1ab881fd48527d853b0d0601b4bc", + "file_size_bytes": 20889965 + }, + { + "name": "Gp0115671_KO_EC GFF file", + "description": "KO_EC GFF file for Gp0115671", + "url": "https://data.microbiomedata.org/data/nmdc:mga0rw1351/annotation/nmdc_mga0rw1351_ko_ec.gff", + "md5_checksum": "ad206c1031a6f0a7805034dee03ff889", + "id": "nmdc:ad206c1031a6f0a7805034dee03ff889", + "file_size_bytes": 15914575 + }, + { + "name": "gold:Gp0452679_metabat2 bin checkm quality assessment result", + "description": "metabat2 bin checkm quality assessment result for gold:Gp0452679", + "data_object_type": "CheckM Statistics", + "url": "https://data.microbiomedata.org/data/nmdc:mga0fsjy84/MAGs/nmdc_mga0fsjy84_checkm_qa.out", + "md5_checksum": "d41d8cd98f00b204e9800998ecf8427e", + "id": "nmdc:d41d8cd98f00b204e9800998ecf8427e", + "file_size_bytes": 0 + }, + { + "name": "Gp0115671_tooShort (< 3kb) filtered contigs fasta file by metaBat2", + "description": "tooShort (< 3kb) filtered contigs fasta file by metaBat2 for Gp0115671", + "url": "https://data.microbiomedata.org/data/nmdc:mga0rw1351/MAGs/nmdc_mga0rw1351_bins.tooShort.fa", + "md5_checksum": "57fd559aaca7b976f3b38bb1a3ce362b", + "id": "nmdc:57fd559aaca7b976f3b38bb1a3ce362b", + "file_size_bytes": 48167943 + }, + { + "name": "Gp0115671_unbinned fasta file from metabat2", + "description": "unbinned fasta file from metabat2 for Gp0115671", + "url": "https://data.microbiomedata.org/data/nmdc:mga0rw1351/MAGs/nmdc_mga0rw1351_bins.unbinned.fa", + "md5_checksum": "43a900225e93216944b4eec3a01f7db7", + "id": "nmdc:43a900225e93216944b4eec3a01f7db7", + "file_size_bytes": 9124730 + }, + { + "name": "Gp0115671_metabat2 bin checkm quality assessment result", + "description": "metabat2 bin checkm quality assessment result for Gp0115671", + "data_object_type": "CheckM Statistics", + "url": "https://data.microbiomedata.org/data/nmdc:mga0rw1351/MAGs/nmdc_mga0rw1351_checkm_qa.out", + "md5_checksum": "cad0e18a4d2c4067a2724f41e449cb86", + "id": "nmdc:cad0e18a4d2c4067a2724f41e449cb86", + "file_size_bytes": 1014 + }, + { + "name": "Gp0115671_high-quality and medium-quality bins", + "description": "high-quality and medium-quality bins for Gp0115671", + "data_object_type": "Metagenome Bins", + "url": "https://data.microbiomedata.org/data/nmdc:mga0rw1351/MAGs/nmdc_mga0rw1351_hqmq_bin.zip", + "md5_checksum": "55577aa26faf185b3b3f4c78711e7715", + "id": "nmdc:55577aa26faf185b3b3f4c78711e7715", + "file_size_bytes": 182 + }, + { + "name": "Gp0115671_metabat2 bins", + "description": "metabat2 bins for Gp0115671", + "url": "https://data.microbiomedata.org/data/nmdc:mga0rw1351/MAGs/nmdc_mga0rw1351_metabat_bin.zip", + "md5_checksum": "c484ee1e530a0c9b47069c0288110e47", + "id": "nmdc:c484ee1e530a0c9b47069c0288110e47", + "file_size_bytes": 444082 + } + ], + "dissolving_activity_set": [], + "functional_annotation_set": [], + "genome_feature_set": [], + "mags_activity_set": [ { "_id": { - "$oid": "649b0052ec087f6bbab34724" + "$oid": "649b0052ec087f6bbab34733" }, "has_input": [ - "nmdc:3d9e14d6f7a854042a7d71def080409b", - "nmdc:f4a1cf24281f14a666a1bfc9afc0aab5", - "nmdc:075c3477b8874aa8d6c4dbc1360a2b38" + "nmdc:0a1ebd847e3bb8f928ef491497f8355b", + "nmdc:69371e513bebd1069a0ed26cc2c914cb", + "nmdc:10d19849864ecdb722335200d0607bbe" ], - "too_short_contig_num": 532333, + "too_short_contig_num": 114372, "part_of": [ - "nmdc:mga0zb0766" + "nmdc:mga0rw1351" ], - "binned_contig_num": 969, + "binned_contig_num": 328, "git_url": "https://github.com/microbiomedata/metaG/releases/tag/0.1", "has_output": [ "nmdc:d41d8cd98f00b204e9800998ecf8427e", - "nmdc:603009bd6294d2318d929a57b5d3e5d3", - "nmdc:c5334a4e305f78c294c304c3c0526826", - "nmdc:6adacc1ba06e5e451f3636c394c71ae8", - "nmdc:77d4e2a7f358b9ac1d53b69d7e8c45e1", - "nmdc:42c3fb9a3906f6b413f99e3276bb7550" + "nmdc:57fd559aaca7b976f3b38bb1a3ce362b", + "nmdc:43a900225e93216944b4eec3a01f7db7", + "nmdc:cad0e18a4d2c4067a2724f41e449cb86", + "nmdc:55577aa26faf185b3b3f4c78711e7715", + "nmdc:c484ee1e530a0c9b47069c0288110e47" ], - "was_informed_by": "gold:Gp0115677", - "input_contig_num": 548756, - "id": "nmdc:4fa91d90b1bd5f821a7f09edc8426939", + "was_informed_by": "gold:Gp0115671", + "input_contig_num": 120326, + "id": "nmdc:c61259a6fe99bc2482a8619c099e6cc2", "execution_resource": "NERSC-Cori", - "name": "MAGs Analysis Activity for nmdc:mga0zb0766", + "name": "MAGs Analysis Activity for nmdc:mga0rw1351", "mags_list": [ { - "number_of_contig": 68, - "completeness": 3.17, + "number_of_contig": 173, + "completeness": 26.29, "bin_name": "bins.1", - "gene_count": 329, + "gene_count": 875, "bin_quality": "LQ", "gtdbtk_species": "", "gtdbtk_order": "", "num_16s": 0, "gtdbtk_family": "", "gtdbtk_domain": "", - "contamination": 0.0, + "contamination": 0.18, "gtdbtk_class": "", "gtdbtk_phylum": "", - "num_5s": 0, - "num_23s": 0, + "num_5s": 1, + "num_23s": 1, "gtdbtk_genus": "", - "num_t_rna": 2 + "num_t_rna": 14 }, { - "number_of_contig": 282, - "completeness": 59.56, + "number_of_contig": 155, + "completeness": 24.1, "bin_name": "bins.2", - "gene_count": 1735, - "bin_quality": "MQ", - "gtdbtk_species": "UBA5335 sp002862435", - "gtdbtk_order": "UBA5335", + "gene_count": 806, + "bin_quality": "LQ", + "gtdbtk_species": "", + "gtdbtk_order": "", "num_16s": 0, - "gtdbtk_family": "UBA5335", - "gtdbtk_domain": "Bacteria", + "gtdbtk_family": "", + "gtdbtk_domain": "", "contamination": 0.0, - "gtdbtk_class": "Gammaproteobacteria", - "gtdbtk_phylum": "Proteobacteria", - "num_5s": 0, - "num_23s": 0, - "gtdbtk_genus": "UBA5335", - "num_t_rna": 26 - }, - { - "number_of_contig": 3, - "completeness": 54.6, - "bin_name": "bins.3", - "gene_count": 751, - "bin_quality": "MQ", - "gtdbtk_species": "", - "gtdbtk_order": "UBA9983_A", - "num_16s": 1, - "gtdbtk_family": "UBA2163", - "gtdbtk_domain": "Bacteria", - "contamination": 1.72, - "gtdbtk_class": "Paceibacteria", - "gtdbtk_phylum": "Patescibacteria", - "num_5s": 1, - "num_23s": 1, - "gtdbtk_genus": "1-14-0-10-47-16", - "num_t_rna": 22 - }, - { - "number_of_contig": 90, - "completeness": 98.7, - "bin_name": "bins.4", - "gene_count": 3042, - "bin_quality": "MQ", - "gtdbtk_species": "", - "gtdbtk_order": "UBA11222", - "num_16s": 0, - "gtdbtk_family": "UBA11222", - "gtdbtk_domain": "Bacteria", - "contamination": 0.0, - "gtdbtk_class": "Alphaproteobacteria", - "gtdbtk_phylum": "Proteobacteria", - "num_5s": 2, - "num_23s": 2, - "gtdbtk_genus": "UBA11222", - "num_t_rna": 46 - }, - { - "number_of_contig": 325, - "completeness": 73.34, - "bin_name": "bins.5", - "gene_count": 2576, - "bin_quality": "MQ", - "gtdbtk_species": "", - "gtdbtk_order": "UBA11222", - "num_16s": 1, - "gtdbtk_family": "UBA11222", - "gtdbtk_domain": "Bacteria", - "contamination": 0.91, - "gtdbtk_class": "Alphaproteobacteria", - "gtdbtk_phylum": "Proteobacteria", - "num_5s": 0, - "num_23s": 0, - "gtdbtk_genus": "UBA11222", - "num_t_rna": 35 - }, - { - "number_of_contig": 199, - "completeness": 49.14, - "bin_name": "bins.6", - "gene_count": 1046, - "bin_quality": "LQ", - "gtdbtk_species": "", - "gtdbtk_order": "", - "num_16s": 0, - "gtdbtk_family": "", - "gtdbtk_domain": "", - "contamination": 0.0, - "gtdbtk_class": "", - "gtdbtk_phylum": "", - "num_5s": 2, - "num_23s": 2, - "gtdbtk_genus": "", - "num_t_rna": 21 - }, - { - "number_of_contig": 2, - "completeness": 24.32, - "bin_name": "bins.7", - "gene_count": 329, - "bin_quality": "LQ", - "gtdbtk_species": "", - "gtdbtk_order": "", - "num_16s": 0, - "gtdbtk_family": "", - "gtdbtk_domain": "", - "contamination": 0.0, - "gtdbtk_class": "", - "gtdbtk_phylum": "", + "gtdbtk_class": "", + "gtdbtk_phylum": "", "num_5s": 0, "num_23s": 0, "gtdbtk_genus": "", - "num_t_rna": 16 + "num_t_rna": 9 } ], - "unbinned_contig_num": 15454, - "started_at_time": "2021-10-11T02:24:49Z", + "unbinned_contig_num": 5626, + "started_at_time": "2021-10-11T02:27:50Z", "type": "nmdc:MAGsAnalysisActivity", - "ended_at_time": "2021-10-11T06:26:42+00:00", - "output_data_objects": [ - { - "name": "gold:Gp0452679_metabat2 bin checkm quality assessment result", - "description": "metabat2 bin checkm quality assessment result for gold:Gp0452679", - "data_object_type": "CheckM Statistics", - "url": "https://data.microbiomedata.org/data/nmdc:mga0fsjy84/MAGs/nmdc_mga0fsjy84_checkm_qa.out", - "md5_checksum": "d41d8cd98f00b204e9800998ecf8427e", - "id": "nmdc:d41d8cd98f00b204e9800998ecf8427e", - "file_size_bytes": 0 - }, - { - "name": "Gp0115677_tooShort (< 3kb) filtered contigs fasta file by metaBat2", - "description": "tooShort (< 3kb) filtered contigs fasta file by metaBat2 for Gp0115677", - "url": "https://data.microbiomedata.org/data/nmdc:mga0zb0766/MAGs/nmdc_mga0zb0766_bins.tooShort.fa", - "md5_checksum": "603009bd6294d2318d929a57b5d3e5d3", - "id": "nmdc:603009bd6294d2318d929a57b5d3e5d3", - "file_size_bytes": 215021876 - }, - { - "name": "Gp0115677_unbinned fasta file from metabat2", - "description": "unbinned fasta file from metabat2 for Gp0115677", - "url": "https://data.microbiomedata.org/data/nmdc:mga0zb0766/MAGs/nmdc_mga0zb0766_bins.unbinned.fa", - "md5_checksum": "c5334a4e305f78c294c304c3c0526826", - "id": "nmdc:c5334a4e305f78c294c304c3c0526826", - "file_size_bytes": 26658018 - }, - { - "name": "Gp0115677_metabat2 bin checkm quality assessment result", - "description": "metabat2 bin checkm quality assessment result for Gp0115677", - "data_object_type": "CheckM Statistics", - "url": "https://data.microbiomedata.org/data/nmdc:mga0zb0766/MAGs/nmdc_mga0zb0766_checkm_qa.out", - "md5_checksum": "6adacc1ba06e5e451f3636c394c71ae8", - "id": "nmdc:6adacc1ba06e5e451f3636c394c71ae8", - "file_size_bytes": 1859 - }, - { - "name": "Gp0115677_high-quality and medium-quality bins", - "description": "high-quality and medium-quality bins for Gp0115677", - "data_object_type": "Metagenome Bins", - "url": "https://data.microbiomedata.org/data/nmdc:mga0zb0766/MAGs/nmdc_mga0zb0766_hqmq_bin.zip", - "md5_checksum": "77d4e2a7f358b9ac1d53b69d7e8c45e1", - "id": "nmdc:77d4e2a7f358b9ac1d53b69d7e8c45e1", - "file_size_bytes": 2309404 - }, - { - "name": "Gp0115677_metabat2 bins", - "description": "metabat2 bins for Gp0115677", - "url": "https://data.microbiomedata.org/data/nmdc:mga0zb0766/MAGs/nmdc_mga0zb0766_metabat_bin.zip", - "md5_checksum": "42c3fb9a3906f6b413f99e3276bb7550", - "id": "nmdc:42c3fb9a3906f6b413f99e3276bb7550", - "file_size_bytes": 450699 - } - ] + "ended_at_time": "2021-10-11T03:39:05+00:00" } - ] - }, - { - "_id": { - "$oid": "649b009773e8249959349b50" - }, - "id": "nmdc:omprc-11-jk7zjz92", - "name": "Sand microcosm microbial communities from a hyporheic zone in Columbia River, Washington, USA- GW-RW T4_22-July-14", - "description": "Sterilized sand packs were incubated back in the ground and collected at time point T4.", - "has_input": [ - "nmdc:bsm-11-a5d23e19" - ], - "has_output": [ - "jgi:55d817f30d8785342fcf826d" - ], - "part_of": [ - "nmdc:sty-11-aygzgv51" - ], - "add_date": "2015-05-28", - "mod_date": "2021-06-15", - "ncbi_project_name": "Sand microcosm microbial communities from a hyporheic zone in Columbia River, Washington, USA- GW-RW T4_22-July-14", - "omics_type": { - "has_raw_value": "Metagenome" - }, - "principal_investigator": { - "has_raw_value": "James Stegen" - }, - "processing_institution": "JGI", - "type": "nmdc:OmicsProcessing", - "gold_sequencing_project_identifiers": [ - "gold:Gp0115675" - ], - "downstream_workflow_activity_records": [ + ], + "material_sample_set": [], + "material_sampling_activity_set": [], + "metabolomics_analysis_activity_set": [], + "metagenome_annotation_activity_set": [ { "_id": { - "$oid": "649b009d6bdd4fd20273c87d" + "$oid": "649b005bbf2caae0415ef9d3" }, "has_input": [ - "nmdc:4a9a0183b794a98c57e5b5ce959a3f65" + "nmdc:0a1ebd847e3bb8f928ef491497f8355b" ], "part_of": [ - "nmdc:mga0vf2h47" + "nmdc:mga0rw1351" ], "git_url": "https://github.com/microbiomedata/metaG/releases/tag/0.1", "has_output": [ - "nmdc:54e3a71218d04224719e0dc8a7fdf9c7", - "nmdc:2507e3f107100ce0c72c57191d450818" + "nmdc:147b97234576ba123a9f3c63eb249ecf", + "nmdc:3e037f5f744c9f8e4aa355222cc620ae", + "nmdc:10d19849864ecdb722335200d0607bbe", + "nmdc:0ce9fa5958b6445f7be463538e89e9b1", + "nmdc:a3bc059d9350034f835be4e754486c73", + "nmdc:da9866461051130a44f0982b1a65c061", + "nmdc:676fff23fb641ee8af8a2b948fc5b46e", + "nmdc:a4aa56158a292b63078eb029ed1d90a9", + "nmdc:6a28f85e8b5addccb429cc7f8964e496", + "nmdc:d5b21cce7406ab46611c49dc1ab658ed", + "nmdc:8ead1ab881fd48527d853b0d0601b4bc", + "nmdc:ad206c1031a6f0a7805034dee03ff889" ], - "was_informed_by": "gold:Gp0115675", - "input_read_count": 18827380, - "output_read_bases": 2508839784, - "id": "nmdc:4cfac32b9cd7a8dd01d49117e1078a79", + "was_informed_by": "gold:Gp0115671", + "id": "nmdc:c61259a6fe99bc2482a8619c099e6cc2", "execution_resource": "NERSC-Cori", - "input_read_bases": 2842934380, - "name": "Read QC Activity for nmdc:mga0vf2h47", - "output_read_count": 16749572, - "started_at_time": "2021-10-11T02:28:05Z", - "type": "nmdc:ReadQCAnalysisActivity", - "ended_at_time": "2021-10-11T03:25:21+00:00", - "output_data_objects": [ - { - "name": "Gp0115675_Filtered Reads", - "description": "Filtered Reads for Gp0115675", - "data_object_type": "Filtered Sequencing Reads", - "url": "https://data.microbiomedata.org/data/nmdc:mga0vf2h47/qa/nmdc_mga0vf2h47_filtered.fastq.gz", - "md5_checksum": "54e3a71218d04224719e0dc8a7fdf9c7", - "id": "nmdc:54e3a71218d04224719e0dc8a7fdf9c7", - "file_size_bytes": 1533239347 - }, - { - "name": "Gp0115675_Filtered Stats", - "description": "Filtered Stats for Gp0115675", - "data_object_type": "QC Statistics", - "url": "https://data.microbiomedata.org/data/nmdc:mga0vf2h47/qa/nmdc_mga0vf2h47_filterStats.txt", - "md5_checksum": "2507e3f107100ce0c72c57191d450818", - "id": "nmdc:2507e3f107100ce0c72c57191d450818", - "file_size_bytes": 287 - } - ] - }, + "name": "Annotation Activity for nmdc:mga0rw1351", + "started_at_time": "2021-10-11T02:27:50Z", + "type": "nmdc:MetagenomeAnnotationActivity", + "ended_at_time": "2021-10-11T03:39:05+00:00" + } + ], + "metagenome_assembly_set": [ { "_id": { - "$oid": "649b009bff710ae353f8cf41" + "$oid": "649b005f2ca5ee4adb139fc1" }, "has_input": [ - "nmdc:54e3a71218d04224719e0dc8a7fdf9c7" + "nmdc:445f37bc3019e9fe3b29a2ac5bcbfc9c" + ], + "part_of": [ + "nmdc:mga0rw1351" ], + "ctg_logsum": 111611, + "scaf_logsum": 112140, + "gap_pct": 0.00155, "git_url": "https://github.com/microbiomedata/metaG/releases/tag/0.1", "has_output": [ - "nmdc:60d673988c4f4447feb5985e8501e914", - "nmdc:a8f93ed13033eb949109b4e83980a893", - "nmdc:31dd6eb616f1e9815778453ab1601195", - "nmdc:6d7a930d79f220b06cde8fbf8339e744", - "nmdc:0aaac507db0e29827e1c87df47324932", - "nmdc:6aec8677139ed24ef9cfe0c75b30056f", - "nmdc:d39369f32ada967d7cf52cb503fccf4a", - "nmdc:1ec0247d86889fcef13f39a58a92b066", - "nmdc:242a1c60f6cb14ba8430375171fda436" + "nmdc:0a1ebd847e3bb8f928ef491497f8355b", + "nmdc:be4cab04a701bce0ed99605109bd5d6f", + "nmdc:cc4d3160618a82f81518bdc97ce1f5e2", + "nmdc:473ca208ab97399a644c8e5326e765e5", + "nmdc:69371e513bebd1069a0ed26cc2c914cb" ], - "was_informed_by": "gold:Gp0115675", - "id": "nmdc:4cfac32b9cd7a8dd01d49117e1078a79", + "asm_score": 3.588, + "was_informed_by": "gold:Gp0115671", + "ctg_powsum": 12152, + "scaf_max": 16504, + "id": "nmdc:c61259a6fe99bc2482a8619c099e6cc2", + "scaf_powsum": 12215, "execution_resource": "NERSC-Cori", - "name": "ReadBased Analysis Activity for nmdc:mga0vf2h47", - "started_at_time": "2021-10-11T02:28:05Z", - "type": "nmdc:ReadbasedAnalysis", - "ended_at_time": "2021-10-11T03:25:21+00:00", - "output_data_objects": [ - { - "name": "Gp0115675_Gottcha2 TSV report", - "description": "Gottcha2 TSV report for Gp0115675", - "url": "https://data.microbiomedata.org/data/nmdc:mga0vf2h47/ReadbasedAnalysis/nmdc_mga0vf2h47_gottcha2_report.tsv", - "md5_checksum": "60d673988c4f4447feb5985e8501e914", - "id": "nmdc:60d673988c4f4447feb5985e8501e914", - "file_size_bytes": 8921 - }, - { - "name": "Gp0115675_Gottcha2 full TSV report", - "description": "Gottcha2 full TSV report for Gp0115675", - "url": "https://data.microbiomedata.org/data/nmdc:mga0vf2h47/ReadbasedAnalysis/nmdc_mga0vf2h47_gottcha2_report_full.tsv", - "md5_checksum": "a8f93ed13033eb949109b4e83980a893", - "id": "nmdc:a8f93ed13033eb949109b4e83980a893", - "file_size_bytes": 871109 - }, - { - "name": "Gp0115675_Gottcha2 Krona HTML report", - "description": "Gottcha2 Krona HTML report for Gp0115675", - "data_object_type": "GOTTCHA2 Krona Plot", - "url": "https://data.microbiomedata.org/data/nmdc:mga0vf2h47/ReadbasedAnalysis/nmdc_mga0vf2h47_gottcha2_krona.html", - "md5_checksum": "31dd6eb616f1e9815778453ab1601195", - "id": "nmdc:31dd6eb616f1e9815778453ab1601195", - "file_size_bytes": 252578 - }, - { - "name": "Gp0115675_Centrifuge classification TSV report", - "description": "Centrifuge classification TSV report for Gp0115675", - "data_object_type": "Centrifuge Taxonomic Classification", - "url": "https://data.microbiomedata.org/data/nmdc:mga0vf2h47/ReadbasedAnalysis/nmdc_mga0vf2h47_centrifuge_classification.tsv", - "md5_checksum": "6d7a930d79f220b06cde8fbf8339e744", - "id": "nmdc:6d7a930d79f220b06cde8fbf8339e744", - "file_size_bytes": 1218767711 - }, - { - "name": "Gp0115675_Centrifuge TSV report", - "description": "Centrifuge TSV report for Gp0115675", - "data_object_type": "Centrifuge Classification Report", - "url": "https://data.microbiomedata.org/data/nmdc:mga0vf2h47/ReadbasedAnalysis/nmdc_mga0vf2h47_centrifuge_report.tsv", - "md5_checksum": "0aaac507db0e29827e1c87df47324932", - "id": "nmdc:0aaac507db0e29827e1c87df47324932", - "file_size_bytes": 254260 - }, - { - "name": "Gp0115675_Centrifuge Krona HTML report", - "description": "Centrifuge Krona HTML report for Gp0115675", - "data_object_type": "Centrifuge Krona Plot", - "url": "https://data.microbiomedata.org/data/nmdc:mga0vf2h47/ReadbasedAnalysis/nmdc_mga0vf2h47_centrifuge_krona.html", - "md5_checksum": "6aec8677139ed24ef9cfe0c75b30056f", - "id": "nmdc:6aec8677139ed24ef9cfe0c75b30056f", - "file_size_bytes": 2324387 - }, - { - "name": "Gp0115675_Kraken classification TSV report", - "description": "Kraken classification TSV report for Gp0115675", - "data_object_type": "Kraken2 Taxonomic Classification", - "url": "https://data.microbiomedata.org/data/nmdc:mga0vf2h47/ReadbasedAnalysis/nmdc_mga0vf2h47_kraken2_classification.tsv", - "md5_checksum": "d39369f32ada967d7cf52cb503fccf4a", - "id": "nmdc:d39369f32ada967d7cf52cb503fccf4a", - "file_size_bytes": 1001846607 - }, - { - "name": "Gp0115675_Kraken2 TSV report", - "description": "Kraken2 TSV report for Gp0115675", - "data_object_type": "Kraken2 Classification Report", - "url": "https://data.microbiomedata.org/data/nmdc:mga0vf2h47/ReadbasedAnalysis/nmdc_mga0vf2h47_kraken2_report.tsv", - "md5_checksum": "1ec0247d86889fcef13f39a58a92b066", - "id": "nmdc:1ec0247d86889fcef13f39a58a92b066", - "file_size_bytes": 635541 - }, - { - "name": "Gp0115675_Kraken2 Krona HTML report", - "description": "Kraken2 Krona HTML report for Gp0115675", - "data_object_type": "Kraken2 Krona Plot", - "url": "https://data.microbiomedata.org/data/nmdc:mga0vf2h47/ReadbasedAnalysis/nmdc_mga0vf2h47_kraken2_krona.html", - "md5_checksum": "242a1c60f6cb14ba8430375171fda436", - "id": "nmdc:242a1c60f6cb14ba8430375171fda436", - "file_size_bytes": 3968420 - } + "contigs": 120326, + "name": "Assembly Activity for nmdc:mga0rw1351", + "ctg_max": 16504, + "gc_std": 0.11331, + "contig_bp": 54171370, + "gc_avg": 0.54451, + "started_at_time": "2021-10-11T02:27:50Z", + "scaf_bp": 54172210, + "type": "nmdc:MetagenomeAssembly", + "scaffolds": 120242, + "ended_at_time": "2021-10-11T03:39:05+00:00", + "ctg_l50": 421, + "ctg_l90": 285, + "ctg_n50": 34725, + "ctg_n90": 101428, + "scaf_l50": 421, + "scaf_l90": 285, + "scaf_n50": 34687, + "scaf_n90": 101345 + } + ], + "metagenome_sequencing_activity_set": [], + "metaproteomics_analysis_activity_set": [], + "metatranscriptome_activity_set": [], + "nom_analysis_activity_set": [], + "omics_processing_set": [ + { + "_id": { + "$oid": "649b009773e8249959349b4d" + }, + "id": "nmdc:omprc-11-qtje8r57", + "name": "Sand microcosm microbial communities from a hyporheic zone in Columbia River, Washington, USA - GW-RW T4_12-Aug-14", + "description": "Sterilized sand packs were incubated back in the ground and collected at time point T4.", + "has_input": [ + "nmdc:bsm-11-wzdqhh45" + ], + "has_output": [ + "jgi:55d817fa0d8785342fcf8272" + ], + "part_of": [ + "nmdc:sty-11-aygzgv51" + ], + "add_date": "2015-05-28", + "mod_date": "2021-06-15", + "ncbi_project_name": "Sand microcosm microbial communities from a hyporheic zone in Columbia River, Washington, USA - GW-RW T4_12-Aug-14", + "omics_type": { + "has_raw_value": "Metagenome" + }, + "principal_investigator": { + "has_raw_value": "James Stegen" + }, + "processing_institution": "JGI", + "type": "nmdc:OmicsProcessing", + "gold_sequencing_project_identifiers": [ + "gold:Gp0115671" ] - }, + } + ], + "reaction_activity_set": [], + "read_qc_analysis_activity_set": [ { "_id": { - "$oid": "61e719dc833bcf838a7014d6" + "$oid": "649b009d6bdd4fd20273c891" }, "has_input": [ - "nmdc:54e3a71218d04224719e0dc8a7fdf9c7" + "nmdc:57d2e9b1a32e13f859c8b6e450ac3402" ], "part_of": [ - "nmdc:mga0vf2h47" + "nmdc:mga0rw1351" ], "git_url": "https://github.com/microbiomedata/metaG/releases/tag/0.1", "has_output": [ - "nmdc:60d673988c4f4447feb5985e8501e914", - "nmdc:a8f93ed13033eb949109b4e83980a893", - "nmdc:31dd6eb616f1e9815778453ab1601195", - "nmdc:6d7a930d79f220b06cde8fbf8339e744", - "nmdc:0aaac507db0e29827e1c87df47324932", - "nmdc:6aec8677139ed24ef9cfe0c75b30056f", - "nmdc:d39369f32ada967d7cf52cb503fccf4a", - "nmdc:1ec0247d86889fcef13f39a58a92b066", - "nmdc:242a1c60f6cb14ba8430375171fda436" + "nmdc:445f37bc3019e9fe3b29a2ac5bcbfc9c", + "nmdc:24440b4c5534da30eee650b68eccda84" ], - "was_informed_by": "gold:Gp0115675", - "id": "nmdc:4cfac32b9cd7a8dd01d49117e1078a79", + "was_informed_by": "gold:Gp0115671", + "input_read_count": 22298982, + "output_read_bases": 3062549086, + "id": "nmdc:c61259a6fe99bc2482a8619c099e6cc2", "execution_resource": "NERSC-Cori", - "name": "ReadBased Analysis Activity for nmdc:mga0vf2h47", - "started_at_time": "2021-10-11T02:28:05Z", - "type": "nmdc:ReadbasedAnalysis", - "ended_at_time": "2021-10-11T03:25:21+00:00", - "output_data_objects": [ - { - "name": "Gp0115675_Gottcha2 TSV report", - "description": "Gottcha2 TSV report for Gp0115675", - "url": "https://data.microbiomedata.org/data/nmdc:mga0vf2h47/ReadbasedAnalysis/nmdc_mga0vf2h47_gottcha2_report.tsv", - "md5_checksum": "60d673988c4f4447feb5985e8501e914", - "id": "nmdc:60d673988c4f4447feb5985e8501e914", - "file_size_bytes": 8921 - }, - { - "name": "Gp0115675_Gottcha2 full TSV report", - "description": "Gottcha2 full TSV report for Gp0115675", - "url": "https://data.microbiomedata.org/data/nmdc:mga0vf2h47/ReadbasedAnalysis/nmdc_mga0vf2h47_gottcha2_report_full.tsv", - "md5_checksum": "a8f93ed13033eb949109b4e83980a893", - "id": "nmdc:a8f93ed13033eb949109b4e83980a893", - "file_size_bytes": 871109 - }, - { - "name": "Gp0115675_Gottcha2 Krona HTML report", - "description": "Gottcha2 Krona HTML report for Gp0115675", - "data_object_type": "GOTTCHA2 Krona Plot", - "url": "https://data.microbiomedata.org/data/nmdc:mga0vf2h47/ReadbasedAnalysis/nmdc_mga0vf2h47_gottcha2_krona.html", - "md5_checksum": "31dd6eb616f1e9815778453ab1601195", - "id": "nmdc:31dd6eb616f1e9815778453ab1601195", - "file_size_bytes": 252578 - }, - { - "name": "Gp0115675_Centrifuge classification TSV report", - "description": "Centrifuge classification TSV report for Gp0115675", - "data_object_type": "Centrifuge Taxonomic Classification", - "url": "https://data.microbiomedata.org/data/nmdc:mga0vf2h47/ReadbasedAnalysis/nmdc_mga0vf2h47_centrifuge_classification.tsv", - "md5_checksum": "6d7a930d79f220b06cde8fbf8339e744", - "id": "nmdc:6d7a930d79f220b06cde8fbf8339e744", - "file_size_bytes": 1218767711 - }, - { - "name": "Gp0115675_Centrifuge TSV report", - "description": "Centrifuge TSV report for Gp0115675", - "data_object_type": "Centrifuge Classification Report", - "url": "https://data.microbiomedata.org/data/nmdc:mga0vf2h47/ReadbasedAnalysis/nmdc_mga0vf2h47_centrifuge_report.tsv", - "md5_checksum": "0aaac507db0e29827e1c87df47324932", - "id": "nmdc:0aaac507db0e29827e1c87df47324932", - "file_size_bytes": 254260 - }, - { - "name": "Gp0115675_Centrifuge Krona HTML report", - "description": "Centrifuge Krona HTML report for Gp0115675", - "data_object_type": "Centrifuge Krona Plot", - "url": "https://data.microbiomedata.org/data/nmdc:mga0vf2h47/ReadbasedAnalysis/nmdc_mga0vf2h47_centrifuge_krona.html", - "md5_checksum": "6aec8677139ed24ef9cfe0c75b30056f", - "id": "nmdc:6aec8677139ed24ef9cfe0c75b30056f", - "file_size_bytes": 2324387 - }, - { - "name": "Gp0115675_Kraken classification TSV report", - "description": "Kraken classification TSV report for Gp0115675", - "data_object_type": "Kraken2 Taxonomic Classification", - "url": "https://data.microbiomedata.org/data/nmdc:mga0vf2h47/ReadbasedAnalysis/nmdc_mga0vf2h47_kraken2_classification.tsv", - "md5_checksum": "d39369f32ada967d7cf52cb503fccf4a", - "id": "nmdc:d39369f32ada967d7cf52cb503fccf4a", - "file_size_bytes": 1001846607 - }, - { - "name": "Gp0115675_Kraken2 TSV report", - "description": "Kraken2 TSV report for Gp0115675", - "data_object_type": "Kraken2 Classification Report", - "url": "https://data.microbiomedata.org/data/nmdc:mga0vf2h47/ReadbasedAnalysis/nmdc_mga0vf2h47_kraken2_report.tsv", - "md5_checksum": "1ec0247d86889fcef13f39a58a92b066", - "id": "nmdc:1ec0247d86889fcef13f39a58a92b066", - "file_size_bytes": 635541 - }, - { - "name": "Gp0115675_Kraken2 Krona HTML report", - "description": "Kraken2 Krona HTML report for Gp0115675", - "data_object_type": "Kraken2 Krona Plot", - "url": "https://data.microbiomedata.org/data/nmdc:mga0vf2h47/ReadbasedAnalysis/nmdc_mga0vf2h47_kraken2_krona.html", - "md5_checksum": "242a1c60f6cb14ba8430375171fda436", - "id": "nmdc:242a1c60f6cb14ba8430375171fda436", - "file_size_bytes": 3968420 - } - ] - }, + "input_read_bases": 3367146282, + "name": "Read QC Activity for nmdc:mga0rw1351", + "output_read_count": 20445042, + "started_at_time": "2021-10-11T02:27:50Z", + "type": "nmdc:ReadQCAnalysisActivity", + "ended_at_time": "2021-10-11T03:39:05+00:00" + } + ], + "read_based_taxonomy_analysis_activity_set": [ { "_id": { - "$oid": "649b005f2ca5ee4adb139faf" + "$oid": "649b009bff710ae353f8cf56" }, "has_input": [ - "nmdc:54e3a71218d04224719e0dc8a7fdf9c7" - ], - "part_of": [ - "nmdc:mga0vf2h47" + "nmdc:445f37bc3019e9fe3b29a2ac5bcbfc9c" ], - "ctg_logsum": 115425, - "scaf_logsum": 116377, - "gap_pct": 0.00425, "git_url": "https://github.com/microbiomedata/metaG/releases/tag/0.1", "has_output": [ - "nmdc:dd5cad9348fc41cb18ac989185fed0b5", - "nmdc:6d02084941141ac9a1876c621a50aef0", - "nmdc:cc8faed3494579d793c08ede54cb5b3a", - "nmdc:8891e46c9766f2b84d45fd6e46078a64", - "nmdc:80470769e7531b46c709d12c65487ffe" + "nmdc:358559c32b69eff51758db66ac01021b", + "nmdc:befbd648249c2871bd27999120e50bf7", + "nmdc:cacb8f623a808d0cae094d46f2801dd3", + "nmdc:1b15ffb745e320a9bf0cac7e672e974b", + "nmdc:90b77c7118bf6ec1f99836a50d562a7f", + "nmdc:e0736ff520260ba2097c02b9e767362c", + "nmdc:a00960655f9e80726fdb0fade1bec958", + "nmdc:366bf195f71d2c35a9b47c0f29381e85", + "nmdc:e111cd4927f6736e5de6f6e81e7e6d72" ], - "asm_score": 4.718, - "was_informed_by": "gold:Gp0115675", - "ctg_powsum": 13174, - "scaf_max": 25635, - "id": "nmdc:4cfac32b9cd7a8dd01d49117e1078a79", - "scaf_powsum": 13311, + "was_informed_by": "gold:Gp0115671", + "id": "nmdc:c61259a6fe99bc2482a8619c099e6cc2", "execution_resource": "NERSC-Cori", - "contigs": 80858, - "name": "Assembly Activity for nmdc:mga0vf2h47", - "ctg_max": 25635, - "gc_std": 0.10716, - "contig_bp": 38571486, - "gc_avg": 0.56103, - "started_at_time": "2021-10-11T02:28:05Z", - "scaf_bp": 38573126, - "type": "nmdc:MetagenomeAssembly", - "scaffolds": 80703, - "ended_at_time": "2021-10-11T03:25:21+00:00", - "ctg_l50": 435, - "ctg_l90": 284, - "ctg_n50": 19932, - "ctg_n90": 68422, - "scaf_l50": 436, - "scaf_l90": 284, - "scaf_n50": 19754, - "scaf_n90": 68272, - "output_data_objects": [ - { - "name": "Gp0115675_Assembled contigs fasta", - "description": "Assembled contigs fasta for Gp0115675", - "data_object_type": "Assembly Contigs", - "url": "https://data.microbiomedata.org/data/nmdc:mga0vf2h47/assembly/nmdc_mga0vf2h47_contigs.fna", - "md5_checksum": "dd5cad9348fc41cb18ac989185fed0b5", - "id": "nmdc:dd5cad9348fc41cb18ac989185fed0b5", - "file_size_bytes": 41662357 - }, - { - "name": "Gp0115675_Assembled scaffold fasta", - "description": "Assembled scaffold fasta for Gp0115675", - "data_object_type": "Assembly Scaffolds", - "url": "https://data.microbiomedata.org/data/nmdc:mga0vf2h47/assembly/nmdc_mga0vf2h47_scaffolds.fna", - "md5_checksum": "6d02084941141ac9a1876c621a50aef0", - "id": "nmdc:6d02084941141ac9a1876c621a50aef0", - "file_size_bytes": 41417652 - }, - { - "name": "Gp0115675_Metagenome Contig Coverage Stats", - "description": "Metagenome Contig Coverage Stats for Gp0115675", - "url": "https://data.microbiomedata.org/data/nmdc:mga0vf2h47/assembly/nmdc_mga0vf2h47_covstats.txt", - "md5_checksum": "cc8faed3494579d793c08ede54cb5b3a", - "id": "nmdc:cc8faed3494579d793c08ede54cb5b3a", - "file_size_bytes": 6338871 - }, - { - "name": "Gp0115675_Assembled AGP file", - "description": "Assembled AGP file for Gp0115675", - "data_object_type": "Assembly AGP", - "url": "https://data.microbiomedata.org/data/nmdc:mga0vf2h47/assembly/nmdc_mga0vf2h47_assembly.agp", - "md5_checksum": "8891e46c9766f2b84d45fd6e46078a64", - "id": "nmdc:8891e46c9766f2b84d45fd6e46078a64", - "file_size_bytes": 5901316 - }, - { - "name": "Gp0115675_Metagenome Alignment BAM file", - "description": "Metagenome Alignment BAM file for Gp0115675", - "data_object_type": "Assembly Coverage BAM", - "url": "https://data.microbiomedata.org/data/nmdc:mga0vf2h47/assembly/nmdc_mga0vf2h47_pairedMapped_sorted.bam", - "md5_checksum": "80470769e7531b46c709d12c65487ffe", - "id": "nmdc:80470769e7531b46c709d12c65487ffe", - "file_size_bytes": 1635169657 - } - ] - }, + "name": "ReadBased Analysis Activity for nmdc:mga0rw1351", + "started_at_time": "2021-10-11T02:27:50Z", + "type": "nmdc:ReadbasedAnalysis", + "ended_at_time": "2021-10-11T03:39:05+00:00" + } + ], + "study_set": [], + "field_research_site_set": [], + "collecting_biosamples_from_site_set": [], + "date_created": null, + "etl_software_version": null, + "pooling_set": [], + "read_based_analysis_activity_set": [ { "_id": { - "$oid": "649b005bbf2caae0415ef9c2" + "$oid": "61e71a4c833bcf838a702155" }, "has_input": [ - "nmdc:dd5cad9348fc41cb18ac989185fed0b5" + "nmdc:445f37bc3019e9fe3b29a2ac5bcbfc9c" ], "part_of": [ - "nmdc:mga0vf2h47" + "nmdc:mga0rw1351" ], "git_url": "https://github.com/microbiomedata/metaG/releases/tag/0.1", "has_output": [ - "nmdc:93ea50ce57263b498b781240c04dbf46", - "nmdc:71195b9bc697bf29cd865718a689eb1b", - "nmdc:d8cccd9c5cd237c238e5ba443c477db5", - "nmdc:1cb17c4c7681345f53a7f4ef5c319fba", - "nmdc:17e386be26f52833c463a89733ef2e34", - "nmdc:3e9b2fd11f2f5c16f9f25560e3b6fc55", - "nmdc:b11e36753299e36fa92670cf75165698", - "nmdc:70ac1de5fbc6cc835d5a0d1855f7a28a", - "nmdc:b9e3eb74fa7fee0fac886f8a436b9ecf", - "nmdc:faa27c2be6dc56e66f739dbffcbb6bef", - "nmdc:b080e9d168c0c1330fda64814afe335b", - "nmdc:4ea799de0bc051409b7231801eea0129" + "nmdc:358559c32b69eff51758db66ac01021b", + "nmdc:befbd648249c2871bd27999120e50bf7", + "nmdc:cacb8f623a808d0cae094d46f2801dd3", + "nmdc:1b15ffb745e320a9bf0cac7e672e974b", + "nmdc:90b77c7118bf6ec1f99836a50d562a7f", + "nmdc:e0736ff520260ba2097c02b9e767362c", + "nmdc:a00960655f9e80726fdb0fade1bec958", + "nmdc:366bf195f71d2c35a9b47c0f29381e85", + "nmdc:e111cd4927f6736e5de6f6e81e7e6d72" ], - "was_informed_by": "gold:Gp0115675", - "id": "nmdc:4cfac32b9cd7a8dd01d49117e1078a79", + "was_informed_by": "gold:Gp0115671", + "id": "nmdc:c61259a6fe99bc2482a8619c099e6cc2", "execution_resource": "NERSC-Cori", - "name": "Annotation Activity for nmdc:mga0vf2h47", - "started_at_time": "2021-10-11T02:28:05Z", - "type": "nmdc:MetagenomeAnnotationActivity", - "ended_at_time": "2021-10-11T03:25:21+00:00", - "output_data_objects": [ - { - "name": "Gp0115675_Protein FAA", - "description": "Protein FAA for Gp0115675", - "data_object_type": "Annotation Amino Acid FASTA", - "url": "https://data.microbiomedata.org/data/nmdc:mga0vf2h47/annotation/nmdc_mga0vf2h47_proteins.faa", - "md5_checksum": "93ea50ce57263b498b781240c04dbf46", - "id": "nmdc:93ea50ce57263b498b781240c04dbf46", - "file_size_bytes": 23383485 - }, - { - "name": "Gp0115675_Structural annotation GFF file", - "description": "Structural annotation GFF file for Gp0115675", - "data_object_type": "Structural Annotation GFF", - "url": "https://data.microbiomedata.org/data/nmdc:mga0vf2h47/annotation/nmdc_mga0vf2h47_structural_annotation.gff", - "md5_checksum": "71195b9bc697bf29cd865718a689eb1b", - "id": "nmdc:71195b9bc697bf29cd865718a689eb1b", - "file_size_bytes": 2508 - }, - { - "name": "Gp0115675_Functional annotation GFF file", - "description": "Functional annotation GFF file for Gp0115675", - "data_object_type": "Functional Annotation GFF", - "url": "https://data.microbiomedata.org/data/nmdc:mga0vf2h47/annotation/nmdc_mga0vf2h47_functional_annotation.gff", - "md5_checksum": "d8cccd9c5cd237c238e5ba443c477db5", - "id": "nmdc:d8cccd9c5cd237c238e5ba443c477db5", - "file_size_bytes": 26575202 - }, - { - "name": "Gp0115675_KO TSV file", - "description": "KO TSV file for Gp0115675", - "data_object_type": "Annotation KEGG Orthology", - "url": "https://data.microbiomedata.org/data/nmdc:mga0vf2h47/annotation/nmdc_mga0vf2h47_ko.tsv", - "md5_checksum": "1cb17c4c7681345f53a7f4ef5c319fba", - "id": "nmdc:1cb17c4c7681345f53a7f4ef5c319fba", - "file_size_bytes": 3577030 - }, - { - "name": "Gp0115675_EC TSV file", - "description": "EC TSV file for Gp0115675", - "data_object_type": "Annotation Enzyme Commission", - "url": "https://data.microbiomedata.org/data/nmdc:mga0vf2h47/annotation/nmdc_mga0vf2h47_ec.tsv", - "md5_checksum": "17e386be26f52833c463a89733ef2e34", - "id": "nmdc:17e386be26f52833c463a89733ef2e34", - "file_size_bytes": 2294485 - }, - { - "name": "Gp0115675_COG GFF file", - "description": "COG GFF file for Gp0115675", - "url": "https://data.microbiomedata.org/data/nmdc:mga0vf2h47/annotation/nmdc_mga0vf2h47_cog.gff", - "md5_checksum": "3e9b2fd11f2f5c16f9f25560e3b6fc55", - "id": "nmdc:3e9b2fd11f2f5c16f9f25560e3b6fc55", - "file_size_bytes": 15181628 - }, - { - "name": "Gp0115675_PFAM GFF file", - "description": "PFAM GFF file for Gp0115675", - "url": "https://data.microbiomedata.org/data/nmdc:mga0vf2h47/annotation/nmdc_mga0vf2h47_pfam.gff", - "md5_checksum": "b11e36753299e36fa92670cf75165698", - "id": "nmdc:b11e36753299e36fa92670cf75165698", - "file_size_bytes": 11905020 - }, - { - "name": "Gp0115675_TigrFam GFF file", - "description": "TigrFam GFF file for Gp0115675", - "url": "https://data.microbiomedata.org/data/nmdc:mga0vf2h47/annotation/nmdc_mga0vf2h47_tigrfam.gff", - "md5_checksum": "70ac1de5fbc6cc835d5a0d1855f7a28a", - "id": "nmdc:70ac1de5fbc6cc835d5a0d1855f7a28a", - "file_size_bytes": 1629352 - }, - { - "name": "Gp0115675_SMART GFF file", - "description": "SMART GFF file for Gp0115675", - "url": "https://data.microbiomedata.org/data/nmdc:mga0vf2h47/annotation/nmdc_mga0vf2h47_smart.gff", - "md5_checksum": "b9e3eb74fa7fee0fac886f8a436b9ecf", - "id": "nmdc:b9e3eb74fa7fee0fac886f8a436b9ecf", - "file_size_bytes": 3360419 - }, - { - "name": "Gp0115675_SuperFam GFF file", - "description": "SuperFam GFF file for Gp0115675", - "url": "https://data.microbiomedata.org/data/nmdc:mga0vf2h47/annotation/nmdc_mga0vf2h47_supfam.gff", - "md5_checksum": "faa27c2be6dc56e66f739dbffcbb6bef", - "id": "nmdc:faa27c2be6dc56e66f739dbffcbb6bef", - "file_size_bytes": 19134944 - }, - { - "name": "Gp0115675_Cath FunFam GFF file", - "description": "Cath FunFam GFF file for Gp0115675", - "url": "https://data.microbiomedata.org/data/nmdc:mga0vf2h47/annotation/nmdc_mga0vf2h47_cath_funfam.gff", - "md5_checksum": "b080e9d168c0c1330fda64814afe335b", - "id": "nmdc:b080e9d168c0c1330fda64814afe335b", - "file_size_bytes": 15037016 - }, - { - "name": "Gp0115675_KO_EC GFF file", - "description": "KO_EC GFF file for Gp0115675", - "url": "https://data.microbiomedata.org/data/nmdc:mga0vf2h47/annotation/nmdc_mga0vf2h47_ko_ec.gff", - "md5_checksum": "4ea799de0bc051409b7231801eea0129", - "id": "nmdc:4ea799de0bc051409b7231801eea0129", - "file_size_bytes": 11398449 - } - ] + "name": "ReadBased Analysis Activity for nmdc:mga0rw1351", + "started_at_time": "2021-10-11T02:27:50Z", + "type": "nmdc:ReadbasedAnalysis", + "ended_at_time": "2021-10-11T03:39:05+00:00" + } + ] + }, + { + "functional_annotation_agg": [], + "library_preparation_set": [], + "processed_sample_set": [], + "extraction_set": [], + "activity_set": [], + "biosample_set": [], + "data_object_set": [ + { + "name": "Gp0115676_Filtered Reads", + "description": "Filtered Reads for Gp0115676", + "data_object_type": "Filtered Sequencing Reads", + "url": "https://data.microbiomedata.org/data/nmdc:mga0w3a067/qa/nmdc_mga0w3a067_filtered.fastq.gz", + "md5_checksum": "e777bc518da4bbe0ab7b2959f00e2b08", + "id": "nmdc:e777bc518da4bbe0ab7b2959f00e2b08", + "file_size_bytes": 3113249122 + }, + { + "name": "Gp0115676_Filtered Stats", + "description": "Filtered Stats for Gp0115676", + "data_object_type": "QC Statistics", + "url": "https://data.microbiomedata.org/data/nmdc:mga0w3a067/qa/nmdc_mga0w3a067_filterStats.txt", + "md5_checksum": "79815495339053b7935b55dbde02b2ff", + "id": "nmdc:79815495339053b7935b55dbde02b2ff", + "file_size_bytes": 292 + }, + { + "name": "Gp0115676_Gottcha2 TSV report", + "description": "Gottcha2 TSV report for Gp0115676", + "url": "https://data.microbiomedata.org/data/nmdc:mga0w3a067/ReadbasedAnalysis/nmdc_mga0w3a067_gottcha2_report.tsv", + "md5_checksum": "13343b2533892633bcc3655a1ebe788f", + "id": "nmdc:13343b2533892633bcc3655a1ebe788f", + "file_size_bytes": 13659 + }, + { + "name": "Gp0115676_Gottcha2 full TSV report", + "description": "Gottcha2 full TSV report for Gp0115676", + "url": "https://data.microbiomedata.org/data/nmdc:mga0w3a067/ReadbasedAnalysis/nmdc_mga0w3a067_gottcha2_report_full.tsv", + "md5_checksum": "87b36326bee32ad5642e3ffc2f5ac7db", + "id": "nmdc:87b36326bee32ad5642e3ffc2f5ac7db", + "file_size_bytes": 1168924 + }, + { + "name": "Gp0115676_Gottcha2 Krona HTML report", + "description": "Gottcha2 Krona HTML report for Gp0115676", + "data_object_type": "GOTTCHA2 Krona Plot", + "url": "https://data.microbiomedata.org/data/nmdc:mga0w3a067/ReadbasedAnalysis/nmdc_mga0w3a067_gottcha2_krona.html", + "md5_checksum": "95a2de8be672fd50bf542215194dc4d4", + "id": "nmdc:95a2de8be672fd50bf542215194dc4d4", + "file_size_bytes": 267660 + }, + { + "name": "Gp0115676_Centrifuge classification TSV report", + "description": "Centrifuge classification TSV report for Gp0115676", + "data_object_type": "Centrifuge Taxonomic Classification", + "url": "https://data.microbiomedata.org/data/nmdc:mga0w3a067/ReadbasedAnalysis/nmdc_mga0w3a067_centrifuge_classification.tsv", + "md5_checksum": "6cd0210b345d6908ad8ab683b1a11572", + "id": "nmdc:6cd0210b345d6908ad8ab683b1a11572", + "file_size_bytes": 2721808152 + }, + { + "name": "Gp0115676_Centrifuge TSV report", + "description": "Centrifuge TSV report for Gp0115676", + "data_object_type": "Centrifuge Classification Report", + "url": "https://data.microbiomedata.org/data/nmdc:mga0w3a067/ReadbasedAnalysis/nmdc_mga0w3a067_centrifuge_report.tsv", + "md5_checksum": "5049a65d2a42d73c5d47373e990b70f7", + "id": "nmdc:5049a65d2a42d73c5d47373e990b70f7", + "file_size_bytes": 263207 + }, + { + "name": "Gp0115676_Centrifuge Krona HTML report", + "description": "Centrifuge Krona HTML report for Gp0115676", + "data_object_type": "Centrifuge Krona Plot", + "url": "https://data.microbiomedata.org/data/nmdc:mga0w3a067/ReadbasedAnalysis/nmdc_mga0w3a067_centrifuge_krona.html", + "md5_checksum": "6e1e28773094884d35c04072309e285a", + "id": "nmdc:6e1e28773094884d35c04072309e285a", + "file_size_bytes": 2347912 + }, + { + "name": "Gp0115676_Kraken classification TSV report", + "description": "Kraken classification TSV report for Gp0115676", + "data_object_type": "Kraken2 Taxonomic Classification", + "url": "https://data.microbiomedata.org/data/nmdc:mga0w3a067/ReadbasedAnalysis/nmdc_mga0w3a067_kraken2_classification.tsv", + "md5_checksum": "7fa3aba8b1e31ccc00cf56f04f5605ac", + "id": "nmdc:7fa3aba8b1e31ccc00cf56f04f5605ac", + "file_size_bytes": 2224468607 + }, + { + "name": "Gp0115676_Kraken2 TSV report", + "description": "Kraken2 TSV report for Gp0115676", + "data_object_type": "Kraken2 Classification Report", + "url": "https://data.microbiomedata.org/data/nmdc:mga0w3a067/ReadbasedAnalysis/nmdc_mga0w3a067_kraken2_report.tsv", + "md5_checksum": "3b3abe337d79d09e9c7ba0a40045ad93", + "id": "nmdc:3b3abe337d79d09e9c7ba0a40045ad93", + "file_size_bytes": 701128 + }, + { + "name": "Gp0115676_Kraken2 Krona HTML report", + "description": "Kraken2 Krona HTML report for Gp0115676", + "data_object_type": "Kraken2 Krona Plot", + "url": "https://data.microbiomedata.org/data/nmdc:mga0w3a067/ReadbasedAnalysis/nmdc_mga0w3a067_kraken2_krona.html", + "md5_checksum": "e8602b20781cdbbd84e6dcb92c048a6b", + "id": "nmdc:e8602b20781cdbbd84e6dcb92c048a6b", + "file_size_bytes": 4217185 + }, + { + "name": "Gp0115676_Gottcha2 TSV report", + "description": "Gottcha2 TSV report for Gp0115676", + "url": "https://data.microbiomedata.org/data/nmdc:mga0w3a067/ReadbasedAnalysis/nmdc_mga0w3a067_gottcha2_report.tsv", + "md5_checksum": "13343b2533892633bcc3655a1ebe788f", + "id": "nmdc:13343b2533892633bcc3655a1ebe788f", + "file_size_bytes": 13659 + }, + { + "name": "Gp0115676_Gottcha2 full TSV report", + "description": "Gottcha2 full TSV report for Gp0115676", + "url": "https://data.microbiomedata.org/data/nmdc:mga0w3a067/ReadbasedAnalysis/nmdc_mga0w3a067_gottcha2_report_full.tsv", + "md5_checksum": "87b36326bee32ad5642e3ffc2f5ac7db", + "id": "nmdc:87b36326bee32ad5642e3ffc2f5ac7db", + "file_size_bytes": 1168924 + }, + { + "name": "Gp0115676_Gottcha2 Krona HTML report", + "description": "Gottcha2 Krona HTML report for Gp0115676", + "data_object_type": "GOTTCHA2 Krona Plot", + "url": "https://data.microbiomedata.org/data/nmdc:mga0w3a067/ReadbasedAnalysis/nmdc_mga0w3a067_gottcha2_krona.html", + "md5_checksum": "95a2de8be672fd50bf542215194dc4d4", + "id": "nmdc:95a2de8be672fd50bf542215194dc4d4", + "file_size_bytes": 267660 + }, + { + "name": "Gp0115676_Centrifuge classification TSV report", + "description": "Centrifuge classification TSV report for Gp0115676", + "data_object_type": "Centrifuge Taxonomic Classification", + "url": "https://data.microbiomedata.org/data/nmdc:mga0w3a067/ReadbasedAnalysis/nmdc_mga0w3a067_centrifuge_classification.tsv", + "md5_checksum": "6cd0210b345d6908ad8ab683b1a11572", + "id": "nmdc:6cd0210b345d6908ad8ab683b1a11572", + "file_size_bytes": 2721808152 + }, + { + "name": "Gp0115676_Centrifuge TSV report", + "description": "Centrifuge TSV report for Gp0115676", + "data_object_type": "Centrifuge Classification Report", + "url": "https://data.microbiomedata.org/data/nmdc:mga0w3a067/ReadbasedAnalysis/nmdc_mga0w3a067_centrifuge_report.tsv", + "md5_checksum": "5049a65d2a42d73c5d47373e990b70f7", + "id": "nmdc:5049a65d2a42d73c5d47373e990b70f7", + "file_size_bytes": 263207 + }, + { + "name": "Gp0115676_Centrifuge Krona HTML report", + "description": "Centrifuge Krona HTML report for Gp0115676", + "data_object_type": "Centrifuge Krona Plot", + "url": "https://data.microbiomedata.org/data/nmdc:mga0w3a067/ReadbasedAnalysis/nmdc_mga0w3a067_centrifuge_krona.html", + "md5_checksum": "6e1e28773094884d35c04072309e285a", + "id": "nmdc:6e1e28773094884d35c04072309e285a", + "file_size_bytes": 2347912 + }, + { + "name": "Gp0115676_Kraken classification TSV report", + "description": "Kraken classification TSV report for Gp0115676", + "data_object_type": "Kraken2 Taxonomic Classification", + "url": "https://data.microbiomedata.org/data/nmdc:mga0w3a067/ReadbasedAnalysis/nmdc_mga0w3a067_kraken2_classification.tsv", + "md5_checksum": "7fa3aba8b1e31ccc00cf56f04f5605ac", + "id": "nmdc:7fa3aba8b1e31ccc00cf56f04f5605ac", + "file_size_bytes": 2224468607 + }, + { + "name": "Gp0115676_Kraken2 TSV report", + "description": "Kraken2 TSV report for Gp0115676", + "data_object_type": "Kraken2 Classification Report", + "url": "https://data.microbiomedata.org/data/nmdc:mga0w3a067/ReadbasedAnalysis/nmdc_mga0w3a067_kraken2_report.tsv", + "md5_checksum": "3b3abe337d79d09e9c7ba0a40045ad93", + "id": "nmdc:3b3abe337d79d09e9c7ba0a40045ad93", + "file_size_bytes": 701128 + }, + { + "name": "Gp0115676_Kraken2 Krona HTML report", + "description": "Kraken2 Krona HTML report for Gp0115676", + "data_object_type": "Kraken2 Krona Plot", + "url": "https://data.microbiomedata.org/data/nmdc:mga0w3a067/ReadbasedAnalysis/nmdc_mga0w3a067_kraken2_krona.html", + "md5_checksum": "e8602b20781cdbbd84e6dcb92c048a6b", + "id": "nmdc:e8602b20781cdbbd84e6dcb92c048a6b", + "file_size_bytes": 4217185 + }, + { + "name": "Gp0115676_Assembled contigs fasta", + "description": "Assembled contigs fasta for Gp0115676", + "data_object_type": "Assembly Contigs", + "url": "https://data.microbiomedata.org/data/nmdc:mga0w3a067/assembly/nmdc_mga0w3a067_contigs.fna", + "md5_checksum": "19987e32391f846db382edabf14ba43e", + "id": "nmdc:19987e32391f846db382edabf14ba43e", + "file_size_bytes": 105010680 + }, + { + "name": "Gp0115676_Assembled scaffold fasta", + "description": "Assembled scaffold fasta for Gp0115676", + "data_object_type": "Assembly Scaffolds", + "url": "https://data.microbiomedata.org/data/nmdc:mga0w3a067/assembly/nmdc_mga0w3a067_scaffolds.fna", + "md5_checksum": "1a4c5ace6c1b54e057d282031e8bc2c6", + "id": "nmdc:1a4c5ace6c1b54e057d282031e8bc2c6", + "file_size_bytes": 104445982 + }, + { + "name": "Gp0115676_Metagenome Contig Coverage Stats", + "description": "Metagenome Contig Coverage Stats for Gp0115676", + "url": "https://data.microbiomedata.org/data/nmdc:mga0w3a067/assembly/nmdc_mga0w3a067_covstats.txt", + "md5_checksum": "af7a38646011c9e6d0ad2b1ebd7f47c9", + "id": "nmdc:af7a38646011c9e6d0ad2b1ebd7f47c9", + "file_size_bytes": 14811778 + }, + { + "name": "Gp0115676_Assembled AGP file", + "description": "Assembled AGP file for Gp0115676", + "data_object_type": "Assembly AGP", + "url": "https://data.microbiomedata.org/data/nmdc:mga0w3a067/assembly/nmdc_mga0w3a067_assembly.agp", + "md5_checksum": "1b665fb0fbbf40a13122100c927b398b", + "id": "nmdc:1b665fb0fbbf40a13122100c927b398b", + "file_size_bytes": 13854137 + }, + { + "name": "Gp0115676_Metagenome Alignment BAM file", + "description": "Metagenome Alignment BAM file for Gp0115676", + "data_object_type": "Assembly Coverage BAM", + "url": "https://data.microbiomedata.org/data/nmdc:mga0w3a067/assembly/nmdc_mga0w3a067_pairedMapped_sorted.bam", + "md5_checksum": "7c1232ff8d861d2e2c111a1dc4a70480", + "id": "nmdc:7c1232ff8d861d2e2c111a1dc4a70480", + "file_size_bytes": 3366223347 + }, + { + "name": "Gp0115676_Protein FAA", + "description": "Protein FAA for Gp0115676", + "data_object_type": "Annotation Amino Acid FASTA", + "url": "https://data.microbiomedata.org/data/nmdc:mga0w3a067/annotation/nmdc_mga0w3a067_proteins.faa", + "md5_checksum": "35adf26b13c97c40147af2f067e0c9be", + "id": "nmdc:35adf26b13c97c40147af2f067e0c9be", + "file_size_bytes": 59120149 + }, + { + "name": "Gp0115676_Structural annotation GFF file", + "description": "Structural annotation GFF file for Gp0115676", + "data_object_type": "Structural Annotation GFF", + "url": "https://data.microbiomedata.org/data/nmdc:mga0w3a067/annotation/nmdc_mga0w3a067_structural_annotation.gff", + "md5_checksum": "3de29d8dede94769e7753f0aaee86691", + "id": "nmdc:3de29d8dede94769e7753f0aaee86691", + "file_size_bytes": 2524 + }, + { + "name": "Gp0115676_Functional annotation GFF file", + "description": "Functional annotation GFF file for Gp0115676", + "data_object_type": "Functional Annotation GFF", + "url": "https://data.microbiomedata.org/data/nmdc:mga0w3a067/annotation/nmdc_mga0w3a067_functional_annotation.gff", + "md5_checksum": "6fa3d1e5fae636b4199ff57b4776a51c", + "id": "nmdc:6fa3d1e5fae636b4199ff57b4776a51c", + "file_size_bytes": 65284624 + }, + { + "name": "Gp0115676_KO TSV file", + "description": "KO TSV file for Gp0115676", + "data_object_type": "Annotation KEGG Orthology", + "url": "https://data.microbiomedata.org/data/nmdc:mga0w3a067/annotation/nmdc_mga0w3a067_ko.tsv", + "md5_checksum": "b865dcd9976c90dbc8459ec7ccc72d45", + "id": "nmdc:b865dcd9976c90dbc8459ec7ccc72d45", + "file_size_bytes": 9219020 + }, + { + "name": "Gp0115676_EC TSV file", + "description": "EC TSV file for Gp0115676", + "data_object_type": "Annotation Enzyme Commission", + "url": "https://data.microbiomedata.org/data/nmdc:mga0w3a067/annotation/nmdc_mga0w3a067_ec.tsv", + "md5_checksum": "98b9ea6588dc9ff918298c4a7c567edf", + "id": "nmdc:98b9ea6588dc9ff918298c4a7c567edf", + "file_size_bytes": 5972063 + }, + { + "name": "Gp0115676_COG GFF file", + "description": "COG GFF file for Gp0115676", + "url": "https://data.microbiomedata.org/data/nmdc:mga0w3a067/annotation/nmdc_mga0w3a067_cog.gff", + "md5_checksum": "d8fbe8d24c00eee2ef163e3bb428b718", + "id": "nmdc:d8fbe8d24c00eee2ef163e3bb428b718", + "file_size_bytes": 39290017 + }, + { + "name": "Gp0115676_PFAM GFF file", + "description": "PFAM GFF file for Gp0115676", + "url": "https://data.microbiomedata.org/data/nmdc:mga0w3a067/annotation/nmdc_mga0w3a067_pfam.gff", + "md5_checksum": "ed68f1e7fd4873f1ea756d0c58a9c550", + "id": "nmdc:ed68f1e7fd4873f1ea756d0c58a9c550", + "file_size_bytes": 31343624 + }, + { + "name": "Gp0115676_TigrFam GFF file", + "description": "TigrFam GFF file for Gp0115676", + "url": "https://data.microbiomedata.org/data/nmdc:mga0w3a067/annotation/nmdc_mga0w3a067_tigrfam.gff", + "md5_checksum": "4d0469ae5b27dd4045d637d2493ccba9", + "id": "nmdc:4d0469ae5b27dd4045d637d2493ccba9", + "file_size_bytes": 4260344 + }, + { + "name": "Gp0115676_SMART GFF file", + "description": "SMART GFF file for Gp0115676", + "url": "https://data.microbiomedata.org/data/nmdc:mga0w3a067/annotation/nmdc_mga0w3a067_smart.gff", + "md5_checksum": "a893783f6886e31b6bca5b6baede9f66", + "id": "nmdc:a893783f6886e31b6bca5b6baede9f66", + "file_size_bytes": 8240017 + }, + { + "name": "Gp0115676_SuperFam GFF file", + "description": "SuperFam GFF file for Gp0115676", + "url": "https://data.microbiomedata.org/data/nmdc:mga0w3a067/annotation/nmdc_mga0w3a067_supfam.gff", + "md5_checksum": "2225c723ccf0fd5ea309cfb5ca90d536", + "id": "nmdc:2225c723ccf0fd5ea309cfb5ca90d536", + "file_size_bytes": 48186264 + }, + { + "name": "Gp0115676_Cath FunFam GFF file", + "description": "Cath FunFam GFF file for Gp0115676", + "url": "https://data.microbiomedata.org/data/nmdc:mga0w3a067/annotation/nmdc_mga0w3a067_cath_funfam.gff", + "md5_checksum": "1abd69f8096f98174d95d9a3a13c2a3b", + "id": "nmdc:1abd69f8096f98174d95d9a3a13c2a3b", + "file_size_bytes": 38259823 + }, + { + "name": "Gp0115676_KO_EC GFF file", + "description": "KO_EC GFF file for Gp0115676", + "url": "https://data.microbiomedata.org/data/nmdc:mga0w3a067/annotation/nmdc_mga0w3a067_ko_ec.gff", + "md5_checksum": "83647c3e1ed96fda36f7c119a3e98182", + "id": "nmdc:83647c3e1ed96fda36f7c119a3e98182", + "file_size_bytes": 29337291 + }, + { + "name": "gold:Gp0452679_metabat2 bin checkm quality assessment result", + "description": "metabat2 bin checkm quality assessment result for gold:Gp0452679", + "data_object_type": "CheckM Statistics", + "url": "https://data.microbiomedata.org/data/nmdc:mga0fsjy84/MAGs/nmdc_mga0fsjy84_checkm_qa.out", + "md5_checksum": "d41d8cd98f00b204e9800998ecf8427e", + "id": "nmdc:d41d8cd98f00b204e9800998ecf8427e", + "file_size_bytes": 0 + }, + { + "name": "Gp0115676_tooShort (< 3kb) filtered contigs fasta file by metaBat2", + "description": "tooShort (< 3kb) filtered contigs fasta file by metaBat2 for Gp0115676", + "url": "https://data.microbiomedata.org/data/nmdc:mga0w3a067/MAGs/nmdc_mga0w3a067_bins.tooShort.fa", + "md5_checksum": "71667f3b8ee0cb5acadc541fa6914022", + "id": "nmdc:71667f3b8ee0cb5acadc541fa6914022", + "file_size_bytes": 75793492 + }, + { + "name": "Gp0115676_unbinned fasta file from metabat2", + "description": "unbinned fasta file from metabat2 for Gp0115676", + "url": "https://data.microbiomedata.org/data/nmdc:mga0w3a067/MAGs/nmdc_mga0w3a067_bins.unbinned.fa", + "md5_checksum": "0141a64077e0f18adc42cb1915a00fa2", + "id": "nmdc:0141a64077e0f18adc42cb1915a00fa2", + "file_size_bytes": 17366889 + }, + { + "name": "Gp0115676_metabat2 bin checkm quality assessment result", + "description": "metabat2 bin checkm quality assessment result for Gp0115676", + "data_object_type": "CheckM Statistics", + "url": "https://data.microbiomedata.org/data/nmdc:mga0w3a067/MAGs/nmdc_mga0w3a067_checkm_qa.out", + "md5_checksum": "982b47616dde63a388400fcc57d7c5b0", + "id": "nmdc:982b47616dde63a388400fcc57d7c5b0", + "file_size_bytes": 1700 + }, + { + "name": "Gp0115676_high-quality and medium-quality bins", + "description": "high-quality and medium-quality bins for Gp0115676", + "data_object_type": "Metagenome Bins", + "url": "https://data.microbiomedata.org/data/nmdc:mga0w3a067/MAGs/nmdc_mga0w3a067_hqmq_bin.zip", + "md5_checksum": "313eb61bc7577e272eca6332e923f9c4", + "id": "nmdc:313eb61bc7577e272eca6332e923f9c4", + "file_size_bytes": 677741 }, + { + "name": "Gp0115676_metabat2 bins", + "description": "metabat2 bins for Gp0115676", + "url": "https://data.microbiomedata.org/data/nmdc:mga0w3a067/MAGs/nmdc_mga0w3a067_metabat_bin.zip", + "md5_checksum": "763eb40a8905e9b0d459c45222f1b05e", + "id": "nmdc:763eb40a8905e9b0d459c45222f1b05e", + "file_size_bytes": 2885722 + } + ], + "dissolving_activity_set": [], + "functional_annotation_set": [], + "genome_feature_set": [], + "mags_activity_set": [ { "_id": { - "$oid": "649b0052ec087f6bbab34722" + "$oid": "649b0052ec087f6bbab3472c" }, "has_input": [ - "nmdc:dd5cad9348fc41cb18ac989185fed0b5", - "nmdc:80470769e7531b46c709d12c65487ffe", - "nmdc:d8cccd9c5cd237c238e5ba443c477db5" + "nmdc:19987e32391f846db382edabf14ba43e", + "nmdc:7c1232ff8d861d2e2c111a1dc4a70480", + "nmdc:6fa3d1e5fae636b4199ff57b4776a51c" ], - "too_short_contig_num": 76352, + "too_short_contig_num": 175121, "part_of": [ - "nmdc:mga0vf2h47" + "nmdc:mga0w3a067" ], - "binned_contig_num": 846, + "binned_contig_num": 1550, "git_url": "https://github.com/microbiomedata/metaG/releases/tag/0.1", "has_output": [ "nmdc:d41d8cd98f00b204e9800998ecf8427e", - "nmdc:826503b4204b77c319c0bb353d69818e", - "nmdc:9a02c2954014bb8dcd62800609dd3ec5", - "nmdc:d15ed915946e095d045d73f4b4de019d", - "nmdc:8de4404b1a6601bae7d7d5fd51bd131a", - "nmdc:55f66520d821205e80dcd303cc2793bc" + "nmdc:71667f3b8ee0cb5acadc541fa6914022", + "nmdc:0141a64077e0f18adc42cb1915a00fa2", + "nmdc:982b47616dde63a388400fcc57d7c5b0", + "nmdc:313eb61bc7577e272eca6332e923f9c4", + "nmdc:763eb40a8905e9b0d459c45222f1b05e" ], - "was_informed_by": "gold:Gp0115675", - "input_contig_num": 80857, - "id": "nmdc:4cfac32b9cd7a8dd01d49117e1078a79", + "was_informed_by": "gold:Gp0115676", + "input_contig_num": 187123, + "id": "nmdc:b2025bb4a2c7616273d414e4093f63ca", "execution_resource": "NERSC-Cori", - "name": "MAGs Analysis Activity for nmdc:mga0vf2h47", + "name": "MAGs Analysis Activity for nmdc:mga0w3a067", "mags_list": [ { - "number_of_contig": 579, - "completeness": 73.87, + "number_of_contig": 457, + "completeness": 95.14, "bin_name": "bins.1", - "gene_count": 3274, + "gene_count": 6260, "bin_quality": "LQ", "gtdbtk_species": "", "gtdbtk_order": "", - "num_16s": 1, + "num_16s": 2, "gtdbtk_family": "", "gtdbtk_domain": "", - "contamination": 25.78, + "contamination": 76.52, "gtdbtk_class": "", "gtdbtk_phylum": "", "num_5s": 1, "num_23s": 1, "gtdbtk_genus": "", - "num_t_rna": 37 + "num_t_rna": 85 }, { - "number_of_contig": 199, - "completeness": 36.21, + "number_of_contig": 24, + "completeness": 4.17, "bin_name": "bins.2", - "gene_count": 1070, + "gene_count": 246, "bin_quality": "LQ", "gtdbtk_species": "", "gtdbtk_order": "", @@ -20142,709 +19242,739 @@ "contamination": 0.0, "gtdbtk_class": "", "gtdbtk_phylum": "", - "num_5s": 0, - "num_23s": 0, + "num_5s": 1, + "num_23s": 1, "gtdbtk_genus": "", - "num_t_rna": 16 + "num_t_rna": 5 }, { - "number_of_contig": 68, - "completeness": 4.17, + "number_of_contig": 175, + "completeness": 36.21, "bin_name": "bins.3", - "gene_count": 480, + "gene_count": 937, "bin_quality": "LQ", "gtdbtk_species": "", "gtdbtk_order": "", "num_16s": 0, "gtdbtk_family": "", "gtdbtk_domain": "", - "contamination": 4.17, + "contamination": 0.0, "gtdbtk_class": "", "gtdbtk_phylum": "", "num_5s": 0, "num_23s": 0, "gtdbtk_genus": "", - "num_t_rna": 5 - } - ], - "unbinned_contig_num": 3659, - "started_at_time": "2021-10-11T02:28:05Z", - "type": "nmdc:MAGsAnalysisActivity", - "ended_at_time": "2021-10-11T03:25:21+00:00", - "output_data_objects": [ - { - "name": "gold:Gp0452679_metabat2 bin checkm quality assessment result", - "description": "metabat2 bin checkm quality assessment result for gold:Gp0452679", - "data_object_type": "CheckM Statistics", - "url": "https://data.microbiomedata.org/data/nmdc:mga0fsjy84/MAGs/nmdc_mga0fsjy84_checkm_qa.out", - "md5_checksum": "d41d8cd98f00b204e9800998ecf8427e", - "id": "nmdc:d41d8cd98f00b204e9800998ecf8427e", - "file_size_bytes": 0 - }, - { - "name": "Gp0115675_tooShort (< 3kb) filtered contigs fasta file by metaBat2", - "description": "tooShort (< 3kb) filtered contigs fasta file by metaBat2 for Gp0115675", - "url": "https://data.microbiomedata.org/data/nmdc:mga0vf2h47/MAGs/nmdc_mga0vf2h47_bins.tooShort.fa", - "md5_checksum": "826503b4204b77c319c0bb353d69818e", - "id": "nmdc:826503b4204b77c319c0bb353d69818e", - "file_size_bytes": 31246547 - }, - { - "name": "Gp0115675_unbinned fasta file from metabat2", - "description": "unbinned fasta file from metabat2 for Gp0115675", - "url": "https://data.microbiomedata.org/data/nmdc:mga0vf2h47/MAGs/nmdc_mga0vf2h47_bins.unbinned.fa", - "md5_checksum": "9a02c2954014bb8dcd62800609dd3ec5", - "id": "nmdc:9a02c2954014bb8dcd62800609dd3ec5", - "file_size_bytes": 6258719 + "num_t_rna": 12 }, { - "name": "Gp0115675_metabat2 bin checkm quality assessment result", - "description": "metabat2 bin checkm quality assessment result for Gp0115675", - "data_object_type": "CheckM Statistics", - "url": "https://data.microbiomedata.org/data/nmdc:mga0vf2h47/MAGs/nmdc_mga0vf2h47_checkm_qa.out", - "md5_checksum": "d15ed915946e095d045d73f4b4de019d", - "id": "nmdc:d15ed915946e095d045d73f4b4de019d", - "file_size_bytes": 1092 + "number_of_contig": 485, + "completeness": 43.26, + "bin_name": "bins.4", + "gene_count": 2590, + "bin_quality": "LQ", + "gtdbtk_species": "", + "gtdbtk_order": "", + "num_16s": 0, + "gtdbtk_family": "", + "gtdbtk_domain": "", + "contamination": 0.55, + "gtdbtk_class": "", + "gtdbtk_phylum": "", + "num_5s": 1, + "num_23s": 1, + "gtdbtk_genus": "", + "num_t_rna": 29 }, { - "name": "Gp0115675_high-quality and medium-quality bins", - "description": "high-quality and medium-quality bins for Gp0115675", - "data_object_type": "Metagenome Bins", - "url": "https://data.microbiomedata.org/data/nmdc:mga0vf2h47/MAGs/nmdc_mga0vf2h47_hqmq_bin.zip", - "md5_checksum": "8de4404b1a6601bae7d7d5fd51bd131a", - "id": "nmdc:8de4404b1a6601bae7d7d5fd51bd131a", - "file_size_bytes": 182 + "number_of_contig": 339, + "completeness": 79.0, + "bin_name": "bins.5", + "gene_count": 2464, + "bin_quality": "MQ", + "gtdbtk_species": "", + "gtdbtk_order": "UBA11222", + "num_16s": 0, + "gtdbtk_family": "UBA11222", + "gtdbtk_domain": "Bacteria", + "contamination": 3.71, + "gtdbtk_class": "Alphaproteobacteria", + "gtdbtk_phylum": "Proteobacteria", + "num_5s": 0, + "num_23s": 0, + "gtdbtk_genus": "UBA11222", + "num_t_rna": 32 }, { - "name": "Gp0115675_metabat2 bins", - "description": "metabat2 bins for Gp0115675", - "url": "https://data.microbiomedata.org/data/nmdc:mga0vf2h47/MAGs/nmdc_mga0vf2h47_metabat_bin.zip", - "md5_checksum": "55f66520d821205e80dcd303cc2793bc", - "id": "nmdc:55f66520d821205e80dcd303cc2793bc", - "file_size_bytes": 1259160 + "number_of_contig": 70, + "completeness": 0.0, + "bin_name": "bins.6", + "gene_count": 298, + "bin_quality": "LQ", + "gtdbtk_species": "", + "gtdbtk_order": "", + "num_16s": 0, + "gtdbtk_family": "", + "gtdbtk_domain": "", + "contamination": 0.0, + "gtdbtk_class": "", + "gtdbtk_phylum": "", + "num_5s": 0, + "num_23s": 0, + "gtdbtk_genus": "", + "num_t_rna": 1 } - ] + ], + "unbinned_contig_num": 10452, + "started_at_time": "2021-10-11T02:26:37Z", + "type": "nmdc:MAGsAnalysisActivity", + "ended_at_time": "2021-10-11T05:40:05+00:00" } - ] - }, - { - "_id": { - "$oid": "649b009773e8249959349b51" - }, - "id": "nmdc:omprc-11-2jt0jk84", - "name": "Sand microcosm microbial communities from a hyporheic zone in Columbia River, Washington, USA - GW-RW T2_30-Apr-14", - "description": "Sterilized sand packs were incubated back in the ground and collected at time point T2.", - "has_input": [ - "nmdc:bsm-11-qjtgh002" - ], - "has_output": [ - "jgi:55f23d820d8785306f964980" - ], - "part_of": [ - "nmdc:sty-11-aygzgv51" - ], - "add_date": "2015-05-28", - "mod_date": "2021-06-15", - "ncbi_project_name": "Sand microcosm microbial communities from a hyporheic zone in Columbia River, Washington, USA - GW-RW T2_30-Apr-14", - "omics_type": { - "has_raw_value": "Metagenome" - }, - "principal_investigator": { - "has_raw_value": "James Stegen" - }, - "processing_institution": "JGI", - "type": "nmdc:OmicsProcessing", - "gold_sequencing_project_identifiers": [ - "gold:Gp0115665" - ], - "downstream_workflow_activity_records": [ + ], + "material_sample_set": [], + "material_sampling_activity_set": [], + "metabolomics_analysis_activity_set": [], + "metagenome_annotation_activity_set": [ { "_id": { - "$oid": "649b009d6bdd4fd20273c88e" + "$oid": "649b005bbf2caae0415ef9cb" }, "has_input": [ - "nmdc:0d39aafcd16496457fbb3be0f785b67f" + "nmdc:19987e32391f846db382edabf14ba43e" ], "part_of": [ - "nmdc:mga06n7k74" + "nmdc:mga0w3a067" ], "git_url": "https://github.com/microbiomedata/metaG/releases/tag/0.1", "has_output": [ - "nmdc:b0462e18cf9dafc9d2207a58bf085530", - "nmdc:f0e1b9004b0e9aafb06c444444a522c7" + "nmdc:35adf26b13c97c40147af2f067e0c9be", + "nmdc:3de29d8dede94769e7753f0aaee86691", + "nmdc:6fa3d1e5fae636b4199ff57b4776a51c", + "nmdc:b865dcd9976c90dbc8459ec7ccc72d45", + "nmdc:98b9ea6588dc9ff918298c4a7c567edf", + "nmdc:d8fbe8d24c00eee2ef163e3bb428b718", + "nmdc:ed68f1e7fd4873f1ea756d0c58a9c550", + "nmdc:4d0469ae5b27dd4045d637d2493ccba9", + "nmdc:a893783f6886e31b6bca5b6baede9f66", + "nmdc:2225c723ccf0fd5ea309cfb5ca90d536", + "nmdc:1abd69f8096f98174d95d9a3a13c2a3b", + "nmdc:83647c3e1ed96fda36f7c119a3e98182" ], - "was_informed_by": "gold:Gp0115665", - "input_read_count": 50719572, - "output_read_bases": 7175148255, - "id": "nmdc:ad03f306fbd2bb1f6302eedfc1cde9b2", + "was_informed_by": "gold:Gp0115676", + "id": "nmdc:b2025bb4a2c7616273d414e4093f63ca", "execution_resource": "NERSC-Cori", - "input_read_bases": 7658655372, - "name": "Read QC Activity for nmdc:mga06n7k74", - "output_read_count": 47896142, - "started_at_time": "2021-10-11T02:28:54Z", - "type": "nmdc:ReadQCAnalysisActivity", - "ended_at_time": "2021-10-11T06:19:29+00:00", - "output_data_objects": [ - { - "name": "Gp0115665_Filtered Reads", - "description": "Filtered Reads for Gp0115665", - "data_object_type": "Filtered Sequencing Reads", - "url": "https://data.microbiomedata.org/data/nmdc:mga06n7k74/qa/nmdc_mga06n7k74_filtered.fastq.gz", - "md5_checksum": "b0462e18cf9dafc9d2207a58bf085530", - "id": "nmdc:b0462e18cf9dafc9d2207a58bf085530", - "file_size_bytes": 4096192298 - }, - { - "name": "Gp0115665_Filtered Stats", - "description": "Filtered Stats for Gp0115665", - "data_object_type": "QC Statistics", - "url": "https://data.microbiomedata.org/data/nmdc:mga06n7k74/qa/nmdc_mga06n7k74_filterStats.txt", - "md5_checksum": "f0e1b9004b0e9aafb06c444444a522c7", - "id": "nmdc:f0e1b9004b0e9aafb06c444444a522c7", - "file_size_bytes": 291 - } - ] - }, + "name": "Annotation Activity for nmdc:mga0w3a067", + "started_at_time": "2021-10-11T02:26:37Z", + "type": "nmdc:MetagenomeAnnotationActivity", + "ended_at_time": "2021-10-11T05:40:05+00:00" + } + ], + "metagenome_assembly_set": [ { "_id": { - "$oid": "649b009bff710ae353f8cf57" + "$oid": "649b005f2ca5ee4adb139fbe" }, "has_input": [ - "nmdc:b0462e18cf9dafc9d2207a58bf085530" + "nmdc:e777bc518da4bbe0ab7b2959f00e2b08" + ], + "part_of": [ + "nmdc:mga0w3a067" ], + "ctg_logsum": 335229, + "scaf_logsum": 337025, + "gap_pct": 0.00236, "git_url": "https://github.com/microbiomedata/metaG/releases/tag/0.1", "has_output": [ - "nmdc:432fedddcbacb4e69c0350354ab44080", - "nmdc:50b9a4c83b2ec0d1dd683cb8814ed5ad", - "nmdc:e3d7339ba5c7677be13854f391462474", - "nmdc:7bf922ee2f9fc298c031e2ff7d5abe0d", - "nmdc:33a20a77c3dc5b4feb102d66dfbfbe11", - "nmdc:30bdf0aedf771221ca3f7f18ff4e0067", - "nmdc:8e21ac30de17de0d1051d7d223d0aa0f", - "nmdc:64459bec7843953a70f8ea2b09a7e9de", - "nmdc:9aa0ec113eb8dd22e7f574216d1760b2" + "nmdc:19987e32391f846db382edabf14ba43e", + "nmdc:1a4c5ace6c1b54e057d282031e8bc2c6", + "nmdc:af7a38646011c9e6d0ad2b1ebd7f47c9", + "nmdc:1b665fb0fbbf40a13122100c927b398b", + "nmdc:7c1232ff8d861d2e2c111a1dc4a70480" ], - "was_informed_by": "gold:Gp0115665", - "id": "nmdc:ad03f306fbd2bb1f6302eedfc1cde9b2", + "asm_score": 10.939, + "was_informed_by": "gold:Gp0115676", + "ctg_powsum": 40696, + "scaf_max": 163197, + "id": "nmdc:b2025bb4a2c7616273d414e4093f63ca", + "scaf_powsum": 40973, "execution_resource": "NERSC-Cori", - "name": "ReadBased Analysis Activity for nmdc:mga06n7k74", - "started_at_time": "2021-10-11T02:28:54Z", - "type": "nmdc:ReadbasedAnalysis", - "ended_at_time": "2021-10-11T06:19:29+00:00", - "output_data_objects": [ - { - "name": "Gp0115665_Gottcha2 TSV report", - "description": "Gottcha2 TSV report for Gp0115665", - "url": "https://data.microbiomedata.org/data/nmdc:mga06n7k74/ReadbasedAnalysis/nmdc_mga06n7k74_gottcha2_report.tsv", - "md5_checksum": "432fedddcbacb4e69c0350354ab44080", - "id": "nmdc:432fedddcbacb4e69c0350354ab44080", - "file_size_bytes": 18015 - }, - { - "name": "Gp0115665_Gottcha2 full TSV report", - "description": "Gottcha2 full TSV report for Gp0115665", - "url": "https://data.microbiomedata.org/data/nmdc:mga06n7k74/ReadbasedAnalysis/nmdc_mga06n7k74_gottcha2_report_full.tsv", - "md5_checksum": "50b9a4c83b2ec0d1dd683cb8814ed5ad", - "id": "nmdc:50b9a4c83b2ec0d1dd683cb8814ed5ad", - "file_size_bytes": 1283220 - }, - { - "name": "Gp0115665_Gottcha2 Krona HTML report", - "description": "Gottcha2 Krona HTML report for Gp0115665", - "data_object_type": "GOTTCHA2 Krona Plot", - "url": "https://data.microbiomedata.org/data/nmdc:mga06n7k74/ReadbasedAnalysis/nmdc_mga06n7k74_gottcha2_krona.html", - "md5_checksum": "e3d7339ba5c7677be13854f391462474", - "id": "nmdc:e3d7339ba5c7677be13854f391462474", - "file_size_bytes": 281366 - }, - { - "name": "Gp0115665_Centrifuge classification TSV report", - "description": "Centrifuge classification TSV report for Gp0115665", - "data_object_type": "Centrifuge Taxonomic Classification", - "url": "https://data.microbiomedata.org/data/nmdc:mga06n7k74/ReadbasedAnalysis/nmdc_mga06n7k74_centrifuge_classification.tsv", - "md5_checksum": "7bf922ee2f9fc298c031e2ff7d5abe0d", - "id": "nmdc:7bf922ee2f9fc298c031e2ff7d5abe0d", - "file_size_bytes": 3481369185 - }, - { - "name": "Gp0115665_Centrifuge TSV report", - "description": "Centrifuge TSV report for Gp0115665", - "data_object_type": "Centrifuge Classification Report", - "url": "https://data.microbiomedata.org/data/nmdc:mga06n7k74/ReadbasedAnalysis/nmdc_mga06n7k74_centrifuge_report.tsv", - "md5_checksum": "33a20a77c3dc5b4feb102d66dfbfbe11", - "id": "nmdc:33a20a77c3dc5b4feb102d66dfbfbe11", - "file_size_bytes": 263480 - }, - { - "name": "Gp0115665_Centrifuge Krona HTML report", - "description": "Centrifuge Krona HTML report for Gp0115665", - "data_object_type": "Centrifuge Krona Plot", - "url": "https://data.microbiomedata.org/data/nmdc:mga06n7k74/ReadbasedAnalysis/nmdc_mga06n7k74_centrifuge_krona.html", - "md5_checksum": "30bdf0aedf771221ca3f7f18ff4e0067", - "id": "nmdc:30bdf0aedf771221ca3f7f18ff4e0067", - "file_size_bytes": 2347079 - }, - { - "name": "Gp0115665_Kraken classification TSV report", - "description": "Kraken classification TSV report for Gp0115665", - "data_object_type": "Kraken2 Taxonomic Classification", - "url": "https://data.microbiomedata.org/data/nmdc:mga06n7k74/ReadbasedAnalysis/nmdc_mga06n7k74_kraken2_classification.tsv", - "md5_checksum": "8e21ac30de17de0d1051d7d223d0aa0f", - "id": "nmdc:8e21ac30de17de0d1051d7d223d0aa0f", - "file_size_bytes": 2866138771 - }, - { - "name": "Gp0115665_Kraken2 TSV report", - "description": "Kraken2 TSV report for Gp0115665", - "data_object_type": "Kraken2 Classification Report", - "url": "https://data.microbiomedata.org/data/nmdc:mga06n7k74/ReadbasedAnalysis/nmdc_mga06n7k74_kraken2_report.tsv", - "md5_checksum": "64459bec7843953a70f8ea2b09a7e9de", - "id": "nmdc:64459bec7843953a70f8ea2b09a7e9de", - "file_size_bytes": 728030 - }, - { - "name": "Gp0115665_Kraken2 Krona HTML report", - "description": "Kraken2 Krona HTML report for Gp0115665", - "data_object_type": "Kraken2 Krona Plot", - "url": "https://data.microbiomedata.org/data/nmdc:mga06n7k74/ReadbasedAnalysis/nmdc_mga06n7k74_kraken2_krona.html", - "md5_checksum": "9aa0ec113eb8dd22e7f574216d1760b2", - "id": "nmdc:9aa0ec113eb8dd22e7f574216d1760b2", - "file_size_bytes": 4374689 - } - ] - }, + "contigs": 187125, + "name": "Assembly Activity for nmdc:mga0w3a067", + "ctg_max": 163197, + "gc_std": 0.10616, + "contig_bp": 97611209, + "gc_avg": 0.5929, + "started_at_time": "2021-10-11T02:26:37Z", + "scaf_bp": 97613509, + "type": "nmdc:MetagenomeAssembly", + "scaffolds": 186895, + "ended_at_time": "2021-10-11T05:40:05+00:00", + "ctg_l50": 499, + "ctg_l90": 288, + "ctg_n50": 42676, + "ctg_n90": 155670, + "scaf_l50": 499, + "scaf_l90": 288, + "scaf_n50": 42593, + "scaf_n90": 155449, + "scaf_l_gt50k": 743033, + "scaf_n_gt50k": 11, + "scaf_pct_gt50k": 0.7611989 + } + ], + "metagenome_sequencing_activity_set": [], + "metaproteomics_analysis_activity_set": [], + "metatranscriptome_activity_set": [], + "nom_analysis_activity_set": [], + "omics_processing_set": [ { "_id": { - "$oid": "61e71a36833bcf838a702021" + "$oid": "649b009773e8249959349b4e" }, + "id": "nmdc:omprc-11-7ey2jr63", + "name": "Sand microcosm microbial communities from a hyporheic zone in Columbia River, Washington, USA - GW-RW T4_23-Sept-14", + "description": "Sterilized sand packs were incubated back in the ground and collected at time point T4.", "has_input": [ - "nmdc:b0462e18cf9dafc9d2207a58bf085530" + "nmdc:bsm-11-pkgtg048" + ], + "has_output": [ + "jgi:55d740220d8785342fcf7e35" ], "part_of": [ - "nmdc:mga06n7k74" + "nmdc:sty-11-aygzgv51" ], - "git_url": "https://github.com/microbiomedata/metaG/releases/tag/0.1", - "has_output": [ - "nmdc:432fedddcbacb4e69c0350354ab44080", - "nmdc:50b9a4c83b2ec0d1dd683cb8814ed5ad", - "nmdc:e3d7339ba5c7677be13854f391462474", - "nmdc:7bf922ee2f9fc298c031e2ff7d5abe0d", - "nmdc:33a20a77c3dc5b4feb102d66dfbfbe11", - "nmdc:30bdf0aedf771221ca3f7f18ff4e0067", - "nmdc:8e21ac30de17de0d1051d7d223d0aa0f", - "nmdc:64459bec7843953a70f8ea2b09a7e9de", - "nmdc:9aa0ec113eb8dd22e7f574216d1760b2" - ], - "was_informed_by": "gold:Gp0115665", - "id": "nmdc:ad03f306fbd2bb1f6302eedfc1cde9b2", - "execution_resource": "NERSC-Cori", - "name": "ReadBased Analysis Activity for nmdc:mga06n7k74", - "started_at_time": "2021-10-11T02:28:54Z", - "type": "nmdc:ReadbasedAnalysis", - "ended_at_time": "2021-10-11T06:19:29+00:00", - "output_data_objects": [ - { - "name": "Gp0115665_Gottcha2 TSV report", - "description": "Gottcha2 TSV report for Gp0115665", - "url": "https://data.microbiomedata.org/data/nmdc:mga06n7k74/ReadbasedAnalysis/nmdc_mga06n7k74_gottcha2_report.tsv", - "md5_checksum": "432fedddcbacb4e69c0350354ab44080", - "id": "nmdc:432fedddcbacb4e69c0350354ab44080", - "file_size_bytes": 18015 - }, - { - "name": "Gp0115665_Gottcha2 full TSV report", - "description": "Gottcha2 full TSV report for Gp0115665", - "url": "https://data.microbiomedata.org/data/nmdc:mga06n7k74/ReadbasedAnalysis/nmdc_mga06n7k74_gottcha2_report_full.tsv", - "md5_checksum": "50b9a4c83b2ec0d1dd683cb8814ed5ad", - "id": "nmdc:50b9a4c83b2ec0d1dd683cb8814ed5ad", - "file_size_bytes": 1283220 - }, - { - "name": "Gp0115665_Gottcha2 Krona HTML report", - "description": "Gottcha2 Krona HTML report for Gp0115665", - "data_object_type": "GOTTCHA2 Krona Plot", - "url": "https://data.microbiomedata.org/data/nmdc:mga06n7k74/ReadbasedAnalysis/nmdc_mga06n7k74_gottcha2_krona.html", - "md5_checksum": "e3d7339ba5c7677be13854f391462474", - "id": "nmdc:e3d7339ba5c7677be13854f391462474", - "file_size_bytes": 281366 - }, - { - "name": "Gp0115665_Centrifuge classification TSV report", - "description": "Centrifuge classification TSV report for Gp0115665", - "data_object_type": "Centrifuge Taxonomic Classification", - "url": "https://data.microbiomedata.org/data/nmdc:mga06n7k74/ReadbasedAnalysis/nmdc_mga06n7k74_centrifuge_classification.tsv", - "md5_checksum": "7bf922ee2f9fc298c031e2ff7d5abe0d", - "id": "nmdc:7bf922ee2f9fc298c031e2ff7d5abe0d", - "file_size_bytes": 3481369185 - }, - { - "name": "Gp0115665_Centrifuge TSV report", - "description": "Centrifuge TSV report for Gp0115665", - "data_object_type": "Centrifuge Classification Report", - "url": "https://data.microbiomedata.org/data/nmdc:mga06n7k74/ReadbasedAnalysis/nmdc_mga06n7k74_centrifuge_report.tsv", - "md5_checksum": "33a20a77c3dc5b4feb102d66dfbfbe11", - "id": "nmdc:33a20a77c3dc5b4feb102d66dfbfbe11", - "file_size_bytes": 263480 - }, - { - "name": "Gp0115665_Centrifuge Krona HTML report", - "description": "Centrifuge Krona HTML report for Gp0115665", - "data_object_type": "Centrifuge Krona Plot", - "url": "https://data.microbiomedata.org/data/nmdc:mga06n7k74/ReadbasedAnalysis/nmdc_mga06n7k74_centrifuge_krona.html", - "md5_checksum": "30bdf0aedf771221ca3f7f18ff4e0067", - "id": "nmdc:30bdf0aedf771221ca3f7f18ff4e0067", - "file_size_bytes": 2347079 - }, - { - "name": "Gp0115665_Kraken classification TSV report", - "description": "Kraken classification TSV report for Gp0115665", - "data_object_type": "Kraken2 Taxonomic Classification", - "url": "https://data.microbiomedata.org/data/nmdc:mga06n7k74/ReadbasedAnalysis/nmdc_mga06n7k74_kraken2_classification.tsv", - "md5_checksum": "8e21ac30de17de0d1051d7d223d0aa0f", - "id": "nmdc:8e21ac30de17de0d1051d7d223d0aa0f", - "file_size_bytes": 2866138771 - }, - { - "name": "Gp0115665_Kraken2 TSV report", - "description": "Kraken2 TSV report for Gp0115665", - "data_object_type": "Kraken2 Classification Report", - "url": "https://data.microbiomedata.org/data/nmdc:mga06n7k74/ReadbasedAnalysis/nmdc_mga06n7k74_kraken2_report.tsv", - "md5_checksum": "64459bec7843953a70f8ea2b09a7e9de", - "id": "nmdc:64459bec7843953a70f8ea2b09a7e9de", - "file_size_bytes": 728030 - }, - { - "name": "Gp0115665_Kraken2 Krona HTML report", - "description": "Kraken2 Krona HTML report for Gp0115665", - "data_object_type": "Kraken2 Krona Plot", - "url": "https://data.microbiomedata.org/data/nmdc:mga06n7k74/ReadbasedAnalysis/nmdc_mga06n7k74_kraken2_krona.html", - "md5_checksum": "9aa0ec113eb8dd22e7f574216d1760b2", - "id": "nmdc:9aa0ec113eb8dd22e7f574216d1760b2", - "file_size_bytes": 4374689 - } + "add_date": "2015-05-28", + "mod_date": "2021-06-15", + "ncbi_project_name": "Sand microcosm microbial communities from a hyporheic zone in Columbia River, Washington, USA - GW-RW T4_23-Sept-14", + "omics_type": { + "has_raw_value": "Metagenome" + }, + "principal_investigator": { + "has_raw_value": "James Stegen" + }, + "processing_institution": "JGI", + "type": "nmdc:OmicsProcessing", + "gold_sequencing_project_identifiers": [ + "gold:Gp0115676" ] - }, + } + ], + "reaction_activity_set": [], + "read_qc_analysis_activity_set": [ { "_id": { - "$oid": "649b005f2ca5ee4adb139fc2" + "$oid": "649b009d6bdd4fd20273c883" }, "has_input": [ - "nmdc:b0462e18cf9dafc9d2207a58bf085530" + "nmdc:5672111f6f33b8aff5f65e69ebb41c5e" ], "part_of": [ - "nmdc:mga06n7k74" + "nmdc:mga0w3a067" ], - "ctg_logsum": 427633, - "scaf_logsum": 429769, - "gap_pct": 0.00206, "git_url": "https://github.com/microbiomedata/metaG/releases/tag/0.1", "has_output": [ - "nmdc:9704e757dc537a7f06c6f83fc633cf64", - "nmdc:2674db4e7e6171864fa47f0b3b5a9603", - "nmdc:ab6c496a5e3ab895fee3812fd992e1e7", - "nmdc:5a1240fa0a6bf92c95e852c0352e5839", - "nmdc:e28c85b50e0b654626e655755165aff5" + "nmdc:e777bc518da4bbe0ab7b2959f00e2b08", + "nmdc:79815495339053b7935b55dbde02b2ff" ], - "asm_score": 5.768, - "was_informed_by": "gold:Gp0115665", - "ctg_powsum": 48025, - "scaf_max": 44931, - "id": "nmdc:ad03f306fbd2bb1f6302eedfc1cde9b2", - "scaf_powsum": 48321, + "was_informed_by": "gold:Gp0115676", + "input_read_count": 39069214, + "output_read_bases": 5550744725, + "id": "nmdc:b2025bb4a2c7616273d414e4093f63ca", "execution_resource": "NERSC-Cori", - "contigs": 352055, - "name": "Assembly Activity for nmdc:mga06n7k74", - "ctg_max": 44931, - "gc_std": 0.13027, - "contig_bp": 172051088, - "gc_avg": 0.51918, - "started_at_time": "2021-10-11T02:28:54Z", - "scaf_bp": 172054628, - "type": "nmdc:MetagenomeAssembly", - "scaffolds": 351728, - "ended_at_time": "2021-10-11T06:19:29+00:00", - "ctg_l50": 468, - "ctg_l90": 289, - "ctg_n50": 95561, - "ctg_n90": 294969, - "scaf_l50": 468, - "scaf_l90": 289, - "scaf_n50": 95446, - "scaf_n90": 294658, - "output_data_objects": [ - { - "name": "Gp0115665_Assembled contigs fasta", - "description": "Assembled contigs fasta for Gp0115665", - "data_object_type": "Assembly Contigs", - "url": "https://data.microbiomedata.org/data/nmdc:mga06n7k74/assembly/nmdc_mga06n7k74_contigs.fna", - "md5_checksum": "9704e757dc537a7f06c6f83fc633cf64", - "id": "nmdc:9704e757dc537a7f06c6f83fc633cf64", - "file_size_bytes": 185880663 - }, - { - "name": "Gp0115665_Assembled scaffold fasta", - "description": "Assembled scaffold fasta for Gp0115665", - "data_object_type": "Assembly Scaffolds", - "url": "https://data.microbiomedata.org/data/nmdc:mga06n7k74/assembly/nmdc_mga06n7k74_scaffolds.fna", - "md5_checksum": "2674db4e7e6171864fa47f0b3b5a9603", - "id": "nmdc:2674db4e7e6171864fa47f0b3b5a9603", - "file_size_bytes": 184819604 - }, - { - "name": "Gp0115665_Metagenome Contig Coverage Stats", - "description": "Metagenome Contig Coverage Stats for Gp0115665", - "url": "https://data.microbiomedata.org/data/nmdc:mga06n7k74/assembly/nmdc_mga06n7k74_covstats.txt", - "md5_checksum": "ab6c496a5e3ab895fee3812fd992e1e7", - "id": "nmdc:ab6c496a5e3ab895fee3812fd992e1e7", - "file_size_bytes": 27961807 - }, - { - "name": "Gp0115665_Assembled AGP file", - "description": "Assembled AGP file for Gp0115665", - "data_object_type": "Assembly AGP", - "url": "https://data.microbiomedata.org/data/nmdc:mga06n7k74/assembly/nmdc_mga06n7k74_assembly.agp", - "md5_checksum": "5a1240fa0a6bf92c95e852c0352e5839", - "id": "nmdc:5a1240fa0a6bf92c95e852c0352e5839", - "file_size_bytes": 26248242 - }, - { - "name": "Gp0115665_Metagenome Alignment BAM file", - "description": "Metagenome Alignment BAM file for Gp0115665", - "data_object_type": "Assembly Coverage BAM", - "url": "https://data.microbiomedata.org/data/nmdc:mga06n7k74/assembly/nmdc_mga06n7k74_pairedMapped_sorted.bam", - "md5_checksum": "e28c85b50e0b654626e655755165aff5", - "id": "nmdc:e28c85b50e0b654626e655755165aff5", - "file_size_bytes": 4460978045 - } - ] - }, + "input_read_bases": 5899451314, + "name": "Read QC Activity for nmdc:mga0w3a067", + "output_read_count": 37037822, + "started_at_time": "2021-10-11T02:26:37Z", + "type": "nmdc:ReadQCAnalysisActivity", + "ended_at_time": "2021-10-11T05:40:05+00:00" + } + ], + "read_based_taxonomy_analysis_activity_set": [ { "_id": { - "$oid": "649b005bbf2caae0415ef9d2" + "$oid": "649b009bff710ae353f8cf4c" }, "has_input": [ - "nmdc:9704e757dc537a7f06c6f83fc633cf64" - ], - "part_of": [ - "nmdc:mga06n7k74" + "nmdc:e777bc518da4bbe0ab7b2959f00e2b08" ], "git_url": "https://github.com/microbiomedata/metaG/releases/tag/0.1", "has_output": [ - "nmdc:2d23b05bda1c60f2ef6d54c8fe5fb5e7", - "nmdc:6c55ce2e0d6e74d217d850b273c4f0c4", - "nmdc:b3add25cdb76a537e70617ac6a1d1fc5", - "nmdc:b782707ae2cf5676596ca99800deea26", - "nmdc:6a8565bf52f70efa03c755a9f0b82d7d", - "nmdc:f5d79b4c69825e0b66153e7582cb489b", - "nmdc:f66a0eaa9432ef5a2dd390214f47eed5", - "nmdc:26cc0a40aab6bfc64d24afa760b43102", - "nmdc:83785a6e8f7658dc2354b9bad1b86d01", - "nmdc:0f03207aa38d1aec8afdbf2bec1e4990", - "nmdc:4876eed2bee3b3b7b2ac827857410be6", - "nmdc:bb5b62735a896d189c9a274c6e091bab" + "nmdc:13343b2533892633bcc3655a1ebe788f", + "nmdc:87b36326bee32ad5642e3ffc2f5ac7db", + "nmdc:95a2de8be672fd50bf542215194dc4d4", + "nmdc:6cd0210b345d6908ad8ab683b1a11572", + "nmdc:5049a65d2a42d73c5d47373e990b70f7", + "nmdc:6e1e28773094884d35c04072309e285a", + "nmdc:7fa3aba8b1e31ccc00cf56f04f5605ac", + "nmdc:3b3abe337d79d09e9c7ba0a40045ad93", + "nmdc:e8602b20781cdbbd84e6dcb92c048a6b" ], - "was_informed_by": "gold:Gp0115665", - "id": "nmdc:ad03f306fbd2bb1f6302eedfc1cde9b2", + "was_informed_by": "gold:Gp0115676", + "id": "nmdc:b2025bb4a2c7616273d414e4093f63ca", "execution_resource": "NERSC-Cori", - "name": "Annotation Activity for nmdc:mga06n7k74", - "started_at_time": "2021-10-11T02:28:54Z", - "type": "nmdc:MetagenomeAnnotationActivity", - "ended_at_time": "2021-10-11T06:19:29+00:00", - "output_data_objects": [ - { - "name": "Gp0115665_Protein FAA", - "description": "Protein FAA for Gp0115665", - "data_object_type": "Annotation Amino Acid FASTA", - "url": "https://data.microbiomedata.org/data/nmdc:mga06n7k74/annotation/nmdc_mga06n7k74_proteins.faa", - "md5_checksum": "2d23b05bda1c60f2ef6d54c8fe5fb5e7", - "id": "nmdc:2d23b05bda1c60f2ef6d54c8fe5fb5e7", - "file_size_bytes": 100719814 - }, - { - "name": "Gp0115665_Structural annotation GFF file", - "description": "Structural annotation GFF file for Gp0115665", - "data_object_type": "Structural Annotation GFF", - "url": "https://data.microbiomedata.org/data/nmdc:mga06n7k74/annotation/nmdc_mga06n7k74_structural_annotation.gff", - "md5_checksum": "6c55ce2e0d6e74d217d850b273c4f0c4", - "id": "nmdc:6c55ce2e0d6e74d217d850b273c4f0c4", - "file_size_bytes": 2534 - }, - { - "name": "Gp0115665_Functional annotation GFF file", - "description": "Functional annotation GFF file for Gp0115665", - "data_object_type": "Functional Annotation GFF", - "url": "https://data.microbiomedata.org/data/nmdc:mga06n7k74/annotation/nmdc_mga06n7k74_functional_annotation.gff", - "md5_checksum": "b3add25cdb76a537e70617ac6a1d1fc5", - "id": "nmdc:b3add25cdb76a537e70617ac6a1d1fc5", - "file_size_bytes": 110405026 - }, - { - "name": "Gp0115665_KO TSV file", - "description": "KO TSV file for Gp0115665", - "data_object_type": "Annotation KEGG Orthology", - "url": "https://data.microbiomedata.org/data/nmdc:mga06n7k74/annotation/nmdc_mga06n7k74_ko.tsv", - "md5_checksum": "b782707ae2cf5676596ca99800deea26", - "id": "nmdc:b782707ae2cf5676596ca99800deea26", - "file_size_bytes": 12963636 - }, - { - "name": "Gp0115665_EC TSV file", - "description": "EC TSV file for Gp0115665", - "data_object_type": "Annotation Enzyme Commission", - "url": "https://data.microbiomedata.org/data/nmdc:mga06n7k74/annotation/nmdc_mga06n7k74_ec.tsv", - "md5_checksum": "6a8565bf52f70efa03c755a9f0b82d7d", - "id": "nmdc:6a8565bf52f70efa03c755a9f0b82d7d", - "file_size_bytes": 8371381 - }, - { - "name": "Gp0115665_COG GFF file", - "description": "COG GFF file for Gp0115665", - "url": "https://data.microbiomedata.org/data/nmdc:mga06n7k74/annotation/nmdc_mga06n7k74_cog.gff", - "md5_checksum": "f5d79b4c69825e0b66153e7582cb489b", - "id": "nmdc:f5d79b4c69825e0b66153e7582cb489b", - "file_size_bytes": 56948501 - }, - { - "name": "Gp0115665_PFAM GFF file", - "description": "PFAM GFF file for Gp0115665", - "url": "https://data.microbiomedata.org/data/nmdc:mga06n7k74/annotation/nmdc_mga06n7k74_pfam.gff", - "md5_checksum": "f66a0eaa9432ef5a2dd390214f47eed5", - "id": "nmdc:f66a0eaa9432ef5a2dd390214f47eed5", - "file_size_bytes": 45618277 - }, - { - "name": "Gp0115665_TigrFam GFF file", - "description": "TigrFam GFF file for Gp0115665", - "url": "https://data.microbiomedata.org/data/nmdc:mga06n7k74/annotation/nmdc_mga06n7k74_tigrfam.gff", - "md5_checksum": "26cc0a40aab6bfc64d24afa760b43102", - "id": "nmdc:26cc0a40aab6bfc64d24afa760b43102", - "file_size_bytes": 5245489 - }, - { - "name": "Gp0115665_SMART GFF file", - "description": "SMART GFF file for Gp0115665", - "url": "https://data.microbiomedata.org/data/nmdc:mga06n7k74/annotation/nmdc_mga06n7k74_smart.gff", - "md5_checksum": "83785a6e8f7658dc2354b9bad1b86d01", - "id": "nmdc:83785a6e8f7658dc2354b9bad1b86d01", - "file_size_bytes": 15993417 - }, - { - "name": "Gp0115665_SuperFam GFF file", - "description": "SuperFam GFF file for Gp0115665", - "url": "https://data.microbiomedata.org/data/nmdc:mga06n7k74/annotation/nmdc_mga06n7k74_supfam.gff", - "md5_checksum": "0f03207aa38d1aec8afdbf2bec1e4990", - "id": "nmdc:0f03207aa38d1aec8afdbf2bec1e4990", - "file_size_bytes": 76926960 - }, - { - "name": "Gp0115665_Cath FunFam GFF file", - "description": "Cath FunFam GFF file for Gp0115665", - "url": "https://data.microbiomedata.org/data/nmdc:mga06n7k74/annotation/nmdc_mga06n7k74_cath_funfam.gff", - "md5_checksum": "4876eed2bee3b3b7b2ac827857410be6", - "id": "nmdc:4876eed2bee3b3b7b2ac827857410be6", - "file_size_bytes": 61571084 - }, - { - "name": "Gp0115665_KO_EC GFF file", - "description": "KO_EC GFF file for Gp0115665", - "url": "https://data.microbiomedata.org/data/nmdc:mga06n7k74/annotation/nmdc_mga06n7k74_ko_ec.gff", - "md5_checksum": "bb5b62735a896d189c9a274c6e091bab", - "id": "nmdc:bb5b62735a896d189c9a274c6e091bab", - "file_size_bytes": 41244685 - } - ] - }, + "name": "ReadBased Analysis Activity for nmdc:mga0w3a067", + "started_at_time": "2021-10-11T02:26:37Z", + "type": "nmdc:ReadbasedAnalysis", + "ended_at_time": "2021-10-11T05:40:05+00:00" + } + ], + "study_set": [], + "field_research_site_set": [], + "collecting_biosamples_from_site_set": [], + "date_created": null, + "etl_software_version": null, + "pooling_set": [], + "read_based_analysis_activity_set": [ { "_id": { - "$oid": "649b0052ec087f6bbab34738" + "$oid": "61e71a12833bcf838a701b3a" }, "has_input": [ - "nmdc:9704e757dc537a7f06c6f83fc633cf64", - "nmdc:e28c85b50e0b654626e655755165aff5", - "nmdc:b3add25cdb76a537e70617ac6a1d1fc5" + "nmdc:e777bc518da4bbe0ab7b2959f00e2b08" ], - "too_short_contig_num": 331533, "part_of": [ - "nmdc:mga06n7k74" + "nmdc:mga0w3a067" ], - "binned_contig_num": 1636, "git_url": "https://github.com/microbiomedata/metaG/releases/tag/0.1", "has_output": [ - "nmdc:d41d8cd98f00b204e9800998ecf8427e", - "nmdc:79794b0497c1a4a292778ddb94504f7a", - "nmdc:e26dc245e491a521a94fbb9ab1b4293d", - "nmdc:45cb473694eb3cfa8abc7768e87ef303", - "nmdc:e344d87dbac42a645fd3c7d5b9d0a1a5", - "nmdc:1098bd9921c6ab8f52aca786e3b7bf1d" + "nmdc:13343b2533892633bcc3655a1ebe788f", + "nmdc:87b36326bee32ad5642e3ffc2f5ac7db", + "nmdc:95a2de8be672fd50bf542215194dc4d4", + "nmdc:6cd0210b345d6908ad8ab683b1a11572", + "nmdc:5049a65d2a42d73c5d47373e990b70f7", + "nmdc:6e1e28773094884d35c04072309e285a", + "nmdc:7fa3aba8b1e31ccc00cf56f04f5605ac", + "nmdc:3b3abe337d79d09e9c7ba0a40045ad93", + "nmdc:e8602b20781cdbbd84e6dcb92c048a6b" ], - "was_informed_by": "gold:Gp0115665", - "input_contig_num": 352053, - "id": "nmdc:ad03f306fbd2bb1f6302eedfc1cde9b2", + "was_informed_by": "gold:Gp0115676", + "id": "nmdc:b2025bb4a2c7616273d414e4093f63ca", "execution_resource": "NERSC-Cori", - "name": "MAGs Analysis Activity for nmdc:mga06n7k74", - "mags_list": [ - { - "number_of_contig": 211, - "completeness": 44.36, - "bin_name": "bins.1", - "gene_count": 1029, - "bin_quality": "LQ", - "gtdbtk_species": "", - "gtdbtk_order": "", - "num_16s": 0, - "gtdbtk_family": "", - "gtdbtk_domain": "", - "contamination": 0.49, - "gtdbtk_class": "", - "gtdbtk_phylum": "", - "num_5s": 0, - "num_23s": 0, - "gtdbtk_genus": "", - "num_t_rna": 21 - }, - { - "number_of_contig": 564, - "completeness": 79.11, - "bin_name": "bins.2", - "gene_count": 4164, - "bin_quality": "MQ", - "gtdbtk_species": "", - "gtdbtk_order": "Burkholderiales", - "num_16s": 0, - "gtdbtk_family": "Burkholderiaceae", - "gtdbtk_domain": "Bacteria", - "contamination": 3.11, - "gtdbtk_class": "Gammaproteobacteria", - "gtdbtk_phylum": "Proteobacteria", - "num_5s": 0, - "num_23s": 0, - "gtdbtk_genus": "Aquabacterium", - "num_t_rna": 33 - }, - { - "number_of_contig": 646, - "completeness": 72.48, - "bin_name": "bins.3", - "gene_count": 4108, - "bin_quality": "MQ", - "gtdbtk_species": "", - "gtdbtk_order": "Burkholderiales", - "num_16s": 1, - "gtdbtk_family": "Burkholderiaceae", - "gtdbtk_domain": "Bacteria", - "contamination": 2.62, - "gtdbtk_class": "Gammaproteobacteria", - "gtdbtk_phylum": "Proteobacteria", - "num_5s": 0, - "num_23s": 0, - "gtdbtk_genus": "Rhizobacter", - "num_t_rna": 28 - }, - { - "number_of_contig": 67, - "completeness": 1.97, - "bin_name": "bins.4", - "gene_count": 257, + "name": "ReadBased Analysis Activity for nmdc:mga0w3a067", + "started_at_time": "2021-10-11T02:26:37Z", + "type": "nmdc:ReadbasedAnalysis", + "ended_at_time": "2021-10-11T05:40:05+00:00" + } + ] + }, + { + "functional_annotation_agg": [], + "library_preparation_set": [], + "processed_sample_set": [], + "extraction_set": [], + "activity_set": [], + "biosample_set": [], + "data_object_set": [ + { + "name": "Gp0115677_Filtered Reads", + "description": "Filtered Reads for Gp0115677", + "data_object_type": "Filtered Sequencing Reads", + "url": "https://data.microbiomedata.org/data/nmdc:mga0zb0766/qa/nmdc_mga0zb0766_filtered.fastq.gz", + "md5_checksum": "63c857b3011dec61a08044d518291f23", + "id": "nmdc:63c857b3011dec61a08044d518291f23", + "file_size_bytes": 5307348388 + }, + { + "name": "Gp0115677_Filtered Stats", + "description": "Filtered Stats for Gp0115677", + "data_object_type": "QC Statistics", + "url": "https://data.microbiomedata.org/data/nmdc:mga0zb0766/qa/nmdc_mga0zb0766_filterStats.txt", + "md5_checksum": "2a79d7978caecf9b08fb2029fa42c9b3", + "id": "nmdc:2a79d7978caecf9b08fb2029fa42c9b3", + "file_size_bytes": 279 + }, + { + "name": "Gp0115677_Gottcha2 TSV report", + "description": "Gottcha2 TSV report for Gp0115677", + "url": "https://data.microbiomedata.org/data/nmdc:mga0zb0766/ReadbasedAnalysis/nmdc_mga0zb0766_gottcha2_report.tsv", + "md5_checksum": "ba32f20b0cc5143783e00c5d1ba15223", + "id": "nmdc:ba32f20b0cc5143783e00c5d1ba15223", + "file_size_bytes": 17895 + }, + { + "name": "Gp0115677_Gottcha2 full TSV report", + "description": "Gottcha2 full TSV report for Gp0115677", + "url": "https://data.microbiomedata.org/data/nmdc:mga0zb0766/ReadbasedAnalysis/nmdc_mga0zb0766_gottcha2_report_full.tsv", + "md5_checksum": "c1730daf5e6017219fd9fc079e42c132", + "id": "nmdc:c1730daf5e6017219fd9fc079e42c132", + "file_size_bytes": 1182538 + }, + { + "name": "Gp0115677_Gottcha2 Krona HTML report", + "description": "Gottcha2 Krona HTML report for Gp0115677", + "data_object_type": "GOTTCHA2 Krona Plot", + "url": "https://data.microbiomedata.org/data/nmdc:mga0zb0766/ReadbasedAnalysis/nmdc_mga0zb0766_gottcha2_krona.html", + "md5_checksum": "55b6c047c48f5bf9fb156f139992e4d8", + "id": "nmdc:55b6c047c48f5bf9fb156f139992e4d8", + "file_size_bytes": 276802 + }, + { + "name": "Gp0115677_Centrifuge classification TSV report", + "description": "Centrifuge classification TSV report for Gp0115677", + "data_object_type": "Centrifuge Taxonomic Classification", + "url": "https://data.microbiomedata.org/data/nmdc:mga0zb0766/ReadbasedAnalysis/nmdc_mga0zb0766_centrifuge_classification.tsv", + "md5_checksum": "1c2e2dff881b35a25b4622bbc66c3140", + "id": "nmdc:1c2e2dff881b35a25b4622bbc66c3140", + "file_size_bytes": 4716470614 + }, + { + "name": "Gp0115677_Centrifuge TSV report", + "description": "Centrifuge TSV report for Gp0115677", + "data_object_type": "Centrifuge Classification Report", + "url": "https://data.microbiomedata.org/data/nmdc:mga0zb0766/ReadbasedAnalysis/nmdc_mga0zb0766_centrifuge_report.tsv", + "md5_checksum": "50f771c7bc17a0b184c2a10a24013f08", + "id": "nmdc:50f771c7bc17a0b184c2a10a24013f08", + "file_size_bytes": 267231 + }, + { + "name": "Gp0115677_Centrifuge Krona HTML report", + "description": "Centrifuge Krona HTML report for Gp0115677", + "data_object_type": "Centrifuge Krona Plot", + "url": "https://data.microbiomedata.org/data/nmdc:mga0zb0766/ReadbasedAnalysis/nmdc_mga0zb0766_centrifuge_krona.html", + "md5_checksum": "229017cdb1832bb718d22dc27db44125", + "id": "nmdc:229017cdb1832bb718d22dc27db44125", + "file_size_bytes": 2356003 + }, + { + "name": "Gp0115677_Kraken classification TSV report", + "description": "Kraken classification TSV report for Gp0115677", + "data_object_type": "Kraken2 Taxonomic Classification", + "url": "https://data.microbiomedata.org/data/nmdc:mga0zb0766/ReadbasedAnalysis/nmdc_mga0zb0766_kraken2_classification.tsv", + "md5_checksum": "49d5d11132bd5a02c3dd077d42a6a16b", + "id": "nmdc:49d5d11132bd5a02c3dd077d42a6a16b", + "file_size_bytes": 3857487871 + }, + { + "name": "Gp0115677_Kraken2 TSV report", + "description": "Kraken2 TSV report for Gp0115677", + "data_object_type": "Kraken2 Classification Report", + "url": "https://data.microbiomedata.org/data/nmdc:mga0zb0766/ReadbasedAnalysis/nmdc_mga0zb0766_kraken2_report.tsv", + "md5_checksum": "bdd701b44e67929ec8bbe279697da937", + "id": "nmdc:bdd701b44e67929ec8bbe279697da937", + "file_size_bytes": 708598 + }, + { + "name": "Gp0115677_Kraken2 Krona HTML report", + "description": "Kraken2 Krona HTML report for Gp0115677", + "data_object_type": "Kraken2 Krona Plot", + "url": "https://data.microbiomedata.org/data/nmdc:mga0zb0766/ReadbasedAnalysis/nmdc_mga0zb0766_kraken2_krona.html", + "md5_checksum": "d35583a5ed45df5a58bf084fc67bf988", + "id": "nmdc:d35583a5ed45df5a58bf084fc67bf988", + "file_size_bytes": 4250180 + }, + { + "name": "Gp0115677_Gottcha2 TSV report", + "description": "Gottcha2 TSV report for Gp0115677", + "url": "https://data.microbiomedata.org/data/nmdc:mga0zb0766/ReadbasedAnalysis/nmdc_mga0zb0766_gottcha2_report.tsv", + "md5_checksum": "ba32f20b0cc5143783e00c5d1ba15223", + "id": "nmdc:ba32f20b0cc5143783e00c5d1ba15223", + "file_size_bytes": 17895 + }, + { + "name": "Gp0115677_Gottcha2 full TSV report", + "description": "Gottcha2 full TSV report for Gp0115677", + "url": "https://data.microbiomedata.org/data/nmdc:mga0zb0766/ReadbasedAnalysis/nmdc_mga0zb0766_gottcha2_report_full.tsv", + "md5_checksum": "c1730daf5e6017219fd9fc079e42c132", + "id": "nmdc:c1730daf5e6017219fd9fc079e42c132", + "file_size_bytes": 1182538 + }, + { + "name": "Gp0115677_Gottcha2 Krona HTML report", + "description": "Gottcha2 Krona HTML report for Gp0115677", + "data_object_type": "GOTTCHA2 Krona Plot", + "url": "https://data.microbiomedata.org/data/nmdc:mga0zb0766/ReadbasedAnalysis/nmdc_mga0zb0766_gottcha2_krona.html", + "md5_checksum": "55b6c047c48f5bf9fb156f139992e4d8", + "id": "nmdc:55b6c047c48f5bf9fb156f139992e4d8", + "file_size_bytes": 276802 + }, + { + "name": "Gp0115677_Centrifuge classification TSV report", + "description": "Centrifuge classification TSV report for Gp0115677", + "data_object_type": "Centrifuge Taxonomic Classification", + "url": "https://data.microbiomedata.org/data/nmdc:mga0zb0766/ReadbasedAnalysis/nmdc_mga0zb0766_centrifuge_classification.tsv", + "md5_checksum": "1c2e2dff881b35a25b4622bbc66c3140", + "id": "nmdc:1c2e2dff881b35a25b4622bbc66c3140", + "file_size_bytes": 4716470614 + }, + { + "name": "Gp0115677_Centrifuge TSV report", + "description": "Centrifuge TSV report for Gp0115677", + "data_object_type": "Centrifuge Classification Report", + "url": "https://data.microbiomedata.org/data/nmdc:mga0zb0766/ReadbasedAnalysis/nmdc_mga0zb0766_centrifuge_report.tsv", + "md5_checksum": "50f771c7bc17a0b184c2a10a24013f08", + "id": "nmdc:50f771c7bc17a0b184c2a10a24013f08", + "file_size_bytes": 267231 + }, + { + "name": "Gp0115677_Centrifuge Krona HTML report", + "description": "Centrifuge Krona HTML report for Gp0115677", + "data_object_type": "Centrifuge Krona Plot", + "url": "https://data.microbiomedata.org/data/nmdc:mga0zb0766/ReadbasedAnalysis/nmdc_mga0zb0766_centrifuge_krona.html", + "md5_checksum": "229017cdb1832bb718d22dc27db44125", + "id": "nmdc:229017cdb1832bb718d22dc27db44125", + "file_size_bytes": 2356003 + }, + { + "name": "Gp0115677_Kraken classification TSV report", + "description": "Kraken classification TSV report for Gp0115677", + "data_object_type": "Kraken2 Taxonomic Classification", + "url": "https://data.microbiomedata.org/data/nmdc:mga0zb0766/ReadbasedAnalysis/nmdc_mga0zb0766_kraken2_classification.tsv", + "md5_checksum": "49d5d11132bd5a02c3dd077d42a6a16b", + "id": "nmdc:49d5d11132bd5a02c3dd077d42a6a16b", + "file_size_bytes": 3857487871 + }, + { + "name": "Gp0115677_Kraken2 TSV report", + "description": "Kraken2 TSV report for Gp0115677", + "data_object_type": "Kraken2 Classification Report", + "url": "https://data.microbiomedata.org/data/nmdc:mga0zb0766/ReadbasedAnalysis/nmdc_mga0zb0766_kraken2_report.tsv", + "md5_checksum": "bdd701b44e67929ec8bbe279697da937", + "id": "nmdc:bdd701b44e67929ec8bbe279697da937", + "file_size_bytes": 708598 + }, + { + "name": "Gp0115677_Kraken2 Krona HTML report", + "description": "Kraken2 Krona HTML report for Gp0115677", + "data_object_type": "Kraken2 Krona Plot", + "url": "https://data.microbiomedata.org/data/nmdc:mga0zb0766/ReadbasedAnalysis/nmdc_mga0zb0766_kraken2_krona.html", + "md5_checksum": "d35583a5ed45df5a58bf084fc67bf988", + "id": "nmdc:d35583a5ed45df5a58bf084fc67bf988", + "file_size_bytes": 4250180 + }, + { + "name": "Gp0115677_Assembled contigs fasta", + "description": "Assembled contigs fasta for Gp0115677", + "data_object_type": "Assembly Contigs", + "url": "https://data.microbiomedata.org/data/nmdc:mga0zb0766/assembly/nmdc_mga0zb0766_contigs.fna", + "md5_checksum": "3d9e14d6f7a854042a7d71def080409b", + "id": "nmdc:3d9e14d6f7a854042a7d71def080409b", + "file_size_bytes": 250747283 + }, + { + "name": "Gp0115677_Assembled scaffold fasta", + "description": "Assembled scaffold fasta for Gp0115677", + "data_object_type": "Assembly Scaffolds", + "url": "https://data.microbiomedata.org/data/nmdc:mga0zb0766/assembly/nmdc_mga0zb0766_scaffolds.fna", + "md5_checksum": "26d0d64ca7c850f0e04a4c33690bd178", + "id": "nmdc:26d0d64ca7c850f0e04a4c33690bd178", + "file_size_bytes": 249006954 + }, + { + "name": "Gp0115677_Metagenome Contig Coverage Stats", + "description": "Metagenome Contig Coverage Stats for Gp0115677", + "url": "https://data.microbiomedata.org/data/nmdc:mga0zb0766/assembly/nmdc_mga0zb0766_covstats.txt", + "md5_checksum": "8f8a0622cfe39054bd20f11116c78402", + "id": "nmdc:8f8a0622cfe39054bd20f11116c78402", + "file_size_bytes": 43716675 + }, + { + "name": "Gp0115677_Assembled AGP file", + "description": "Assembled AGP file for Gp0115677", + "data_object_type": "Assembly AGP", + "url": "https://data.microbiomedata.org/data/nmdc:mga0zb0766/assembly/nmdc_mga0zb0766_assembly.agp", + "md5_checksum": "623aa370c44897cf30844647c2f5bd94", + "id": "nmdc:623aa370c44897cf30844647c2f5bd94", + "file_size_bytes": 41409581 + }, + { + "name": "Gp0115677_Metagenome Alignment BAM file", + "description": "Metagenome Alignment BAM file for Gp0115677", + "data_object_type": "Assembly Coverage BAM", + "url": "https://data.microbiomedata.org/data/nmdc:mga0zb0766/assembly/nmdc_mga0zb0766_pairedMapped_sorted.bam", + "md5_checksum": "f4a1cf24281f14a666a1bfc9afc0aab5", + "id": "nmdc:f4a1cf24281f14a666a1bfc9afc0aab5", + "file_size_bytes": 5828772757 + }, + { + "name": "Gp0115677_Protein FAA", + "description": "Protein FAA for Gp0115677", + "data_object_type": "Annotation Amino Acid FASTA", + "url": "https://data.microbiomedata.org/data/nmdc:mga0zb0766/annotation/nmdc_mga0zb0766_proteins.faa", + "md5_checksum": "4f9d82516561ee307b1ab4841255aff0", + "id": "nmdc:4f9d82516561ee307b1ab4841255aff0", + "file_size_bytes": 144603933 + }, + { + "name": "Gp0115677_Structural annotation GFF file", + "description": "Structural annotation GFF file for Gp0115677", + "data_object_type": "Structural Annotation GFF", + "url": "https://data.microbiomedata.org/data/nmdc:mga0zb0766/annotation/nmdc_mga0zb0766_structural_annotation.gff", + "md5_checksum": "a658e9045fde900cdc78d0578446b960", + "id": "nmdc:a658e9045fde900cdc78d0578446b960", + "file_size_bytes": 2546 + }, + { + "name": "Gp0115677_Functional annotation GFF file", + "description": "Functional annotation GFF file for Gp0115677", + "data_object_type": "Functional Annotation GFF", + "url": "https://data.microbiomedata.org/data/nmdc:mga0zb0766/annotation/nmdc_mga0zb0766_functional_annotation.gff", + "md5_checksum": "075c3477b8874aa8d6c4dbc1360a2b38", + "id": "nmdc:075c3477b8874aa8d6c4dbc1360a2b38", + "file_size_bytes": 167984752 + }, + { + "name": "Gp0115677_KO TSV file", + "description": "KO TSV file for Gp0115677", + "data_object_type": "Annotation KEGG Orthology", + "url": "https://data.microbiomedata.org/data/nmdc:mga0zb0766/annotation/nmdc_mga0zb0766_ko.tsv", + "md5_checksum": "9a338a51c6ca2ec4e0da4e15903be407", + "id": "nmdc:9a338a51c6ca2ec4e0da4e15903be407", + "file_size_bytes": 19341535 + }, + { + "name": "Gp0115677_EC TSV file", + "description": "EC TSV file for Gp0115677", + "data_object_type": "Annotation Enzyme Commission", + "url": "https://data.microbiomedata.org/data/nmdc:mga0zb0766/annotation/nmdc_mga0zb0766_ec.tsv", + "md5_checksum": "0f9e627ace8d9b8420e957bcd033244a", + "id": "nmdc:0f9e627ace8d9b8420e957bcd033244a", + "file_size_bytes": 12533246 + }, + { + "name": "Gp0115677_COG GFF file", + "description": "COG GFF file for Gp0115677", + "url": "https://data.microbiomedata.org/data/nmdc:mga0zb0766/annotation/nmdc_mga0zb0766_cog.gff", + "md5_checksum": "144a997b22098f5fe748d2fa069cdc71", + "id": "nmdc:144a997b22098f5fe748d2fa069cdc71", + "file_size_bytes": 85841510 + }, + { + "name": "Gp0115677_PFAM GFF file", + "description": "PFAM GFF file for Gp0115677", + "url": "https://data.microbiomedata.org/data/nmdc:mga0zb0766/annotation/nmdc_mga0zb0766_pfam.gff", + "md5_checksum": "82dc44c196f4b6b5552e8360f21f93a0", + "id": "nmdc:82dc44c196f4b6b5552e8360f21f93a0", + "file_size_bytes": 64139943 + }, + { + "name": "Gp0115677_TigrFam GFF file", + "description": "TigrFam GFF file for Gp0115677", + "url": "https://data.microbiomedata.org/data/nmdc:mga0zb0766/annotation/nmdc_mga0zb0766_tigrfam.gff", + "md5_checksum": "9238a5420065e1da9da31c270c90268a", + "id": "nmdc:9238a5420065e1da9da31c270c90268a", + "file_size_bytes": 7585101 + }, + { + "name": "Gp0115677_SMART GFF file", + "description": "SMART GFF file for Gp0115677", + "url": "https://data.microbiomedata.org/data/nmdc:mga0zb0766/annotation/nmdc_mga0zb0766_smart.gff", + "md5_checksum": "ce31f29ff8fed6d0a973d61157af7220", + "id": "nmdc:ce31f29ff8fed6d0a973d61157af7220", + "file_size_bytes": 18353478 + }, + { + "name": "Gp0115677_SuperFam GFF file", + "description": "SuperFam GFF file for Gp0115677", + "url": "https://data.microbiomedata.org/data/nmdc:mga0zb0766/annotation/nmdc_mga0zb0766_supfam.gff", + "md5_checksum": "016cbd549e03d896ed746ab91771b21a", + "id": "nmdc:016cbd549e03d896ed746ab91771b21a", + "file_size_bytes": 107179327 + }, + { + "name": "Gp0115677_Cath FunFam GFF file", + "description": "Cath FunFam GFF file for Gp0115677", + "url": "https://data.microbiomedata.org/data/nmdc:mga0zb0766/annotation/nmdc_mga0zb0766_cath_funfam.gff", + "md5_checksum": "7ef0abcd7fba705f6e9e26dcb8b1da8d", + "id": "nmdc:7ef0abcd7fba705f6e9e26dcb8b1da8d", + "file_size_bytes": 85056001 + }, + { + "name": "Gp0115677_KO_EC GFF file", + "description": "KO_EC GFF file for Gp0115677", + "url": "https://data.microbiomedata.org/data/nmdc:mga0zb0766/annotation/nmdc_mga0zb0766_ko_ec.gff", + "md5_checksum": "c935ce264779684a01c9a7777e506d02", + "id": "nmdc:c935ce264779684a01c9a7777e506d02", + "file_size_bytes": 61547317 + }, + { + "name": "gold:Gp0452679_metabat2 bin checkm quality assessment result", + "description": "metabat2 bin checkm quality assessment result for gold:Gp0452679", + "data_object_type": "CheckM Statistics", + "url": "https://data.microbiomedata.org/data/nmdc:mga0fsjy84/MAGs/nmdc_mga0fsjy84_checkm_qa.out", + "md5_checksum": "d41d8cd98f00b204e9800998ecf8427e", + "id": "nmdc:d41d8cd98f00b204e9800998ecf8427e", + "file_size_bytes": 0 + }, + { + "name": "Gp0115677_tooShort (< 3kb) filtered contigs fasta file by metaBat2", + "description": "tooShort (< 3kb) filtered contigs fasta file by metaBat2 for Gp0115677", + "url": "https://data.microbiomedata.org/data/nmdc:mga0zb0766/MAGs/nmdc_mga0zb0766_bins.tooShort.fa", + "md5_checksum": "603009bd6294d2318d929a57b5d3e5d3", + "id": "nmdc:603009bd6294d2318d929a57b5d3e5d3", + "file_size_bytes": 215021876 + }, + { + "name": "Gp0115677_unbinned fasta file from metabat2", + "description": "unbinned fasta file from metabat2 for Gp0115677", + "url": "https://data.microbiomedata.org/data/nmdc:mga0zb0766/MAGs/nmdc_mga0zb0766_bins.unbinned.fa", + "md5_checksum": "c5334a4e305f78c294c304c3c0526826", + "id": "nmdc:c5334a4e305f78c294c304c3c0526826", + "file_size_bytes": 26658018 + }, + { + "name": "Gp0115677_metabat2 bin checkm quality assessment result", + "description": "metabat2 bin checkm quality assessment result for Gp0115677", + "data_object_type": "CheckM Statistics", + "url": "https://data.microbiomedata.org/data/nmdc:mga0zb0766/MAGs/nmdc_mga0zb0766_checkm_qa.out", + "md5_checksum": "6adacc1ba06e5e451f3636c394c71ae8", + "id": "nmdc:6adacc1ba06e5e451f3636c394c71ae8", + "file_size_bytes": 1859 + }, + { + "name": "Gp0115677_high-quality and medium-quality bins", + "description": "high-quality and medium-quality bins for Gp0115677", + "data_object_type": "Metagenome Bins", + "url": "https://data.microbiomedata.org/data/nmdc:mga0zb0766/MAGs/nmdc_mga0zb0766_hqmq_bin.zip", + "md5_checksum": "77d4e2a7f358b9ac1d53b69d7e8c45e1", + "id": "nmdc:77d4e2a7f358b9ac1d53b69d7e8c45e1", + "file_size_bytes": 2309404 + }, + { + "name": "Gp0115677_metabat2 bins", + "description": "metabat2 bins for Gp0115677", + "url": "https://data.microbiomedata.org/data/nmdc:mga0zb0766/MAGs/nmdc_mga0zb0766_metabat_bin.zip", + "md5_checksum": "42c3fb9a3906f6b413f99e3276bb7550", + "id": "nmdc:42c3fb9a3906f6b413f99e3276bb7550", + "file_size_bytes": 450699 + } + ], + "dissolving_activity_set": [], + "functional_annotation_set": [], + "genome_feature_set": [], + "mags_activity_set": [ + { + "_id": { + "$oid": "649b0052ec087f6bbab34724" + }, + "has_input": [ + "nmdc:3d9e14d6f7a854042a7d71def080409b", + "nmdc:f4a1cf24281f14a666a1bfc9afc0aab5", + "nmdc:075c3477b8874aa8d6c4dbc1360a2b38" + ], + "too_short_contig_num": 532333, + "part_of": [ + "nmdc:mga0zb0766" + ], + "binned_contig_num": 969, + "git_url": "https://github.com/microbiomedata/metaG/releases/tag/0.1", + "has_output": [ + "nmdc:d41d8cd98f00b204e9800998ecf8427e", + "nmdc:603009bd6294d2318d929a57b5d3e5d3", + "nmdc:c5334a4e305f78c294c304c3c0526826", + "nmdc:6adacc1ba06e5e451f3636c394c71ae8", + "nmdc:77d4e2a7f358b9ac1d53b69d7e8c45e1", + "nmdc:42c3fb9a3906f6b413f99e3276bb7550" + ], + "was_informed_by": "gold:Gp0115677", + "input_contig_num": 548756, + "id": "nmdc:4fa91d90b1bd5f821a7f09edc8426939", + "execution_resource": "NERSC-Cori", + "name": "MAGs Analysis Activity for nmdc:mga0zb0766", + "mags_list": [ + { + "number_of_contig": 68, + "completeness": 3.17, + "bin_name": "bins.1", + "gene_count": 329, "bin_quality": "LQ", "gtdbtk_species": "", "gtdbtk_order": "", @@ -20857,13 +19987,89 @@ "num_5s": 0, "num_23s": 0, "gtdbtk_genus": "", - "num_t_rna": 3 + "num_t_rna": 2 }, { - "number_of_contig": 64, - "completeness": 7.47, + "number_of_contig": 282, + "completeness": 59.56, + "bin_name": "bins.2", + "gene_count": 1735, + "bin_quality": "MQ", + "gtdbtk_species": "UBA5335 sp002862435", + "gtdbtk_order": "UBA5335", + "num_16s": 0, + "gtdbtk_family": "UBA5335", + "gtdbtk_domain": "Bacteria", + "contamination": 0.0, + "gtdbtk_class": "Gammaproteobacteria", + "gtdbtk_phylum": "Proteobacteria", + "num_5s": 0, + "num_23s": 0, + "gtdbtk_genus": "UBA5335", + "num_t_rna": 26 + }, + { + "number_of_contig": 3, + "completeness": 54.6, + "bin_name": "bins.3", + "gene_count": 751, + "bin_quality": "MQ", + "gtdbtk_species": "", + "gtdbtk_order": "UBA9983_A", + "num_16s": 1, + "gtdbtk_family": "UBA2163", + "gtdbtk_domain": "Bacteria", + "contamination": 1.72, + "gtdbtk_class": "Paceibacteria", + "gtdbtk_phylum": "Patescibacteria", + "num_5s": 1, + "num_23s": 1, + "gtdbtk_genus": "1-14-0-10-47-16", + "num_t_rna": 22 + }, + { + "number_of_contig": 90, + "completeness": 98.7, + "bin_name": "bins.4", + "gene_count": 3042, + "bin_quality": "MQ", + "gtdbtk_species": "", + "gtdbtk_order": "UBA11222", + "num_16s": 0, + "gtdbtk_family": "UBA11222", + "gtdbtk_domain": "Bacteria", + "contamination": 0.0, + "gtdbtk_class": "Alphaproteobacteria", + "gtdbtk_phylum": "Proteobacteria", + "num_5s": 2, + "num_23s": 2, + "gtdbtk_genus": "UBA11222", + "num_t_rna": 46 + }, + { + "number_of_contig": 325, + "completeness": 73.34, "bin_name": "bins.5", - "gene_count": 259, + "gene_count": 2576, + "bin_quality": "MQ", + "gtdbtk_species": "", + "gtdbtk_order": "UBA11222", + "num_16s": 1, + "gtdbtk_family": "UBA11222", + "gtdbtk_domain": "Bacteria", + "contamination": 0.91, + "gtdbtk_class": "Alphaproteobacteria", + "gtdbtk_phylum": "Proteobacteria", + "num_5s": 0, + "num_23s": 0, + "gtdbtk_genus": "UBA11222", + "num_t_rna": 35 + }, + { + "number_of_contig": 199, + "completeness": 49.14, + "bin_name": "bins.6", + "gene_count": 1046, "bin_quality": "LQ", "gtdbtk_species": "", "gtdbtk_order": "", @@ -20873,16 +20079,16 @@ "contamination": 0.0, "gtdbtk_class": "", "gtdbtk_phylum": "", - "num_5s": 0, - "num_23s": 0, + "num_5s": 2, + "num_23s": 2, "gtdbtk_genus": "", - "num_t_rna": 4 + "num_t_rna": 21 }, { - "number_of_contig": 84, - "completeness": 3.88, - "bin_name": "bins.6", - "gene_count": 313, + "number_of_contig": 2, + "completeness": 24.32, + "bin_name": "bins.7", + "gene_count": 329, "bin_quality": "LQ", "gtdbtk_species": "", "gtdbtk_order": "", @@ -20895,1326 +20101,1418 @@ "num_5s": 0, "num_23s": 0, "gtdbtk_genus": "", - "num_t_rna": 0 + "num_t_rna": 16 } ], - "unbinned_contig_num": 18884, - "started_at_time": "2021-10-11T02:28:54Z", + "unbinned_contig_num": 15454, + "started_at_time": "2021-10-11T02:24:49Z", "type": "nmdc:MAGsAnalysisActivity", - "ended_at_time": "2021-10-11T06:19:29+00:00", - "output_data_objects": [ - { - "name": "gold:Gp0452679_metabat2 bin checkm quality assessment result", - "description": "metabat2 bin checkm quality assessment result for gold:Gp0452679", - "data_object_type": "CheckM Statistics", - "url": "https://data.microbiomedata.org/data/nmdc:mga0fsjy84/MAGs/nmdc_mga0fsjy84_checkm_qa.out", - "md5_checksum": "d41d8cd98f00b204e9800998ecf8427e", - "id": "nmdc:d41d8cd98f00b204e9800998ecf8427e", - "file_size_bytes": 0 - }, - { - "name": "Gp0115665_tooShort (< 3kb) filtered contigs fasta file by metaBat2", - "description": "tooShort (< 3kb) filtered contigs fasta file by metaBat2 for Gp0115665", - "url": "https://data.microbiomedata.org/data/nmdc:mga06n7k74/MAGs/nmdc_mga06n7k74_bins.tooShort.fa", - "md5_checksum": "79794b0497c1a4a292778ddb94504f7a", - "id": "nmdc:79794b0497c1a4a292778ddb94504f7a", - "file_size_bytes": 146322768 - }, - { - "name": "Gp0115665_unbinned fasta file from metabat2", - "description": "unbinned fasta file from metabat2 for Gp0115665", - "url": "https://data.microbiomedata.org/data/nmdc:mga06n7k74/MAGs/nmdc_mga06n7k74_bins.unbinned.fa", - "md5_checksum": "e26dc245e491a521a94fbb9ab1b4293d", - "id": "nmdc:e26dc245e491a521a94fbb9ab1b4293d", - "file_size_bytes": 30116585 - }, - { - "name": "Gp0115665_metabat2 bin checkm quality assessment result", - "description": "metabat2 bin checkm quality assessment result for Gp0115665", - "data_object_type": "CheckM Statistics", - "url": "https://data.microbiomedata.org/data/nmdc:mga06n7k74/MAGs/nmdc_mga06n7k74_checkm_qa.out", - "md5_checksum": "45cb473694eb3cfa8abc7768e87ef303", - "id": "nmdc:45cb473694eb3cfa8abc7768e87ef303", - "file_size_bytes": 1700 - }, - { - "name": "Gp0115665_high-quality and medium-quality bins", - "description": "high-quality and medium-quality bins for Gp0115665", - "data_object_type": "Metagenome Bins", - "url": "https://data.microbiomedata.org/data/nmdc:mga06n7k74/MAGs/nmdc_mga06n7k74_hqmq_bin.zip", - "md5_checksum": "e344d87dbac42a645fd3c7d5b9d0a1a5", - "id": "nmdc:e344d87dbac42a645fd3c7d5b9d0a1a5", - "file_size_bytes": 2294379 - }, - { - "name": "Gp0115665_metabat2 bins", - "description": "metabat2 bins for Gp0115665", - "url": "https://data.microbiomedata.org/data/nmdc:mga06n7k74/MAGs/nmdc_mga06n7k74_metabat_bin.zip", - "md5_checksum": "1098bd9921c6ab8f52aca786e3b7bf1d", - "id": "nmdc:1098bd9921c6ab8f52aca786e3b7bf1d", - "file_size_bytes": 534425 - } - ] + "ended_at_time": "2021-10-11T06:26:42+00:00" } - ] - }, - { - "_id": { - "$oid": "649b009773e8249959349b52" - }, - "id": "nmdc:omprc-11-hqmmwn16", - "name": "Sand microcosm microbial communities from a hyporheic zone in Columbia River, Washington, USA - GW-RW T4_1-July-14", - "description": "Sterilized sand packs were incubated back in the ground and collected at time point T4.", - "has_input": [ - "nmdc:bsm-11-47nxfg85" - ], - "has_output": [ - "jgi:55d817fe0d8785342fcf8276" - ], - "part_of": [ - "nmdc:sty-11-aygzgv51" - ], - "add_date": "2015-05-28", - "mod_date": "2021-06-15", - "ncbi_project_name": "Sand microcosm microbial communities from a hyporheic zone in Columbia River, Washington, USA - GW-RW T4_1-July-14", - "omics_type": { - "has_raw_value": "Metagenome" - }, - "principal_investigator": { - "has_raw_value": "James Stegen" - }, - "processing_institution": "JGI", - "type": "nmdc:OmicsProcessing", - "gold_sequencing_project_identifiers": [ - "gold:Gp0115669" - ], - "downstream_workflow_activity_records": [ + ], + "material_sample_set": [], + "material_sampling_activity_set": [], + "metabolomics_analysis_activity_set": [], + "metagenome_annotation_activity_set": [ { "_id": { - "$oid": "649b009d6bdd4fd20273c88d" + "$oid": "649b005bbf2caae0415ef9c1" }, "has_input": [ - "nmdc:f18b96b7d225d2f64f7b29015150113f" + "nmdc:3d9e14d6f7a854042a7d71def080409b" ], "part_of": [ - "nmdc:mga0k85x37" + "nmdc:mga0zb0766" ], "git_url": "https://github.com/microbiomedata/metaG/releases/tag/0.1", "has_output": [ - "nmdc:6eef104db92b99c9741b26c667d75cd9", - "nmdc:58fde3e96dbb28af9133bede850a2653" + "nmdc:4f9d82516561ee307b1ab4841255aff0", + "nmdc:a658e9045fde900cdc78d0578446b960", + "nmdc:075c3477b8874aa8d6c4dbc1360a2b38", + "nmdc:9a338a51c6ca2ec4e0da4e15903be407", + "nmdc:0f9e627ace8d9b8420e957bcd033244a", + "nmdc:144a997b22098f5fe748d2fa069cdc71", + "nmdc:82dc44c196f4b6b5552e8360f21f93a0", + "nmdc:9238a5420065e1da9da31c270c90268a", + "nmdc:ce31f29ff8fed6d0a973d61157af7220", + "nmdc:016cbd549e03d896ed746ab91771b21a", + "nmdc:7ef0abcd7fba705f6e9e26dcb8b1da8d", + "nmdc:c935ce264779684a01c9a7777e506d02" ], - "was_informed_by": "gold:Gp0115669", - "input_read_count": 20957834, - "output_read_bases": 3065138996, - "id": "nmdc:ed7745adccd65c2dd20dfa24c2922db2", + "was_informed_by": "gold:Gp0115677", + "id": "nmdc:4fa91d90b1bd5f821a7f09edc8426939", "execution_resource": "NERSC-Cori", - "input_read_bases": 3164632934, - "name": "Read QC Activity for nmdc:mga0k85x37", - "output_read_count": 20454422, - "started_at_time": "2021-10-11T02:28:43Z", - "type": "nmdc:ReadQCAnalysisActivity", - "ended_at_time": "2021-10-11T04:20:07+00:00", - "output_data_objects": [ - { - "name": "Gp0115669_Filtered Reads", - "description": "Filtered Reads for Gp0115669", - "data_object_type": "Filtered Sequencing Reads", - "url": "https://data.microbiomedata.org/data/nmdc:mga0k85x37/qa/nmdc_mga0k85x37_filtered.fastq.gz", - "md5_checksum": "6eef104db92b99c9741b26c667d75cd9", - "id": "nmdc:6eef104db92b99c9741b26c667d75cd9", - "file_size_bytes": 1806935637 - }, - { - "name": "Gp0115669_Filtered Stats", - "description": "Filtered Stats for Gp0115669", - "data_object_type": "QC Statistics", - "url": "https://data.microbiomedata.org/data/nmdc:mga0k85x37/qa/nmdc_mga0k85x37_filterStats.txt", - "md5_checksum": "58fde3e96dbb28af9133bede850a2653", - "id": "nmdc:58fde3e96dbb28af9133bede850a2653", - "file_size_bytes": 286 - } - ] - }, + "name": "Annotation Activity for nmdc:mga0zb0766", + "started_at_time": "2021-10-11T02:24:49Z", + "type": "nmdc:MetagenomeAnnotationActivity", + "ended_at_time": "2021-10-11T06:26:42+00:00" + } + ], + "metagenome_assembly_set": [ { "_id": { - "$oid": "649b009bff710ae353f8cf51" + "$oid": "649b005f2ca5ee4adb139fad" }, "has_input": [ - "nmdc:6eef104db92b99c9741b26c667d75cd9" + "nmdc:63c857b3011dec61a08044d518291f23" ], + "part_of": [ + "nmdc:mga0zb0766" + ], + "ctg_logsum": 407938, + "scaf_logsum": 442802, + "gap_pct": 0.02562, "git_url": "https://github.com/microbiomedata/metaG/releases/tag/0.1", "has_output": [ - "nmdc:05933784d02331b60b2531e2025cd3b7", - "nmdc:50fc279637cb7048aaaeec9b223d0286", - "nmdc:c3add9c5d34e3ca719096ba3ba9b1c08", - "nmdc:2777a04ec7e23aff356bb4f2733e55b7", - "nmdc:de45d70cc01749e9b5691dc24674545d", - "nmdc:534f97f3792b74385c4da305196a1b1d", - "nmdc:fc3e489df923ec344ac0cce7316f49d6", - "nmdc:07b6457a094fab96563168ed287dc59f", - "nmdc:164a1bc50e8d6509446ae2877be8231c" + "nmdc:3d9e14d6f7a854042a7d71def080409b", + "nmdc:26d0d64ca7c850f0e04a4c33690bd178", + "nmdc:8f8a0622cfe39054bd20f11116c78402", + "nmdc:623aa370c44897cf30844647c2f5bd94", + "nmdc:f4a1cf24281f14a666a1bfc9afc0aab5" ], - "was_informed_by": "gold:Gp0115669", - "id": "nmdc:ed7745adccd65c2dd20dfa24c2922db2", + "asm_score": 13.853, + "was_informed_by": "gold:Gp0115677", + "ctg_powsum": 50872, + "scaf_max": 582605, + "id": "nmdc:4fa91d90b1bd5f821a7f09edc8426939", + "scaf_powsum": 55815, "execution_resource": "NERSC-Cori", - "name": "ReadBased Analysis Activity for nmdc:mga0k85x37", - "started_at_time": "2021-10-11T02:28:43Z", - "type": "nmdc:ReadbasedAnalysis", - "ended_at_time": "2021-10-11T04:20:07+00:00", - "output_data_objects": [ - { - "name": "Gp0115669_Gottcha2 TSV report", - "description": "Gottcha2 TSV report for Gp0115669", - "url": "https://data.microbiomedata.org/data/nmdc:mga0k85x37/ReadbasedAnalysis/nmdc_mga0k85x37_gottcha2_report.tsv", - "md5_checksum": "05933784d02331b60b2531e2025cd3b7", - "id": "nmdc:05933784d02331b60b2531e2025cd3b7", - "file_size_bytes": 11362 - }, - { - "name": "Gp0115669_Gottcha2 full TSV report", - "description": "Gottcha2 full TSV report for Gp0115669", - "url": "https://data.microbiomedata.org/data/nmdc:mga0k85x37/ReadbasedAnalysis/nmdc_mga0k85x37_gottcha2_report_full.tsv", - "md5_checksum": "50fc279637cb7048aaaeec9b223d0286", - "id": "nmdc:50fc279637cb7048aaaeec9b223d0286", - "file_size_bytes": 909325 - }, - { - "name": "Gp0115669_Gottcha2 Krona HTML report", - "description": "Gottcha2 Krona HTML report for Gp0115669", - "data_object_type": "GOTTCHA2 Krona Plot", - "url": "https://data.microbiomedata.org/data/nmdc:mga0k85x37/ReadbasedAnalysis/nmdc_mga0k85x37_gottcha2_krona.html", - "md5_checksum": "c3add9c5d34e3ca719096ba3ba9b1c08", - "id": "nmdc:c3add9c5d34e3ca719096ba3ba9b1c08", - "file_size_bytes": 261412 - }, - { - "name": "Gp0115669_Centrifuge classification TSV report", - "description": "Centrifuge classification TSV report for Gp0115669", - "data_object_type": "Centrifuge Taxonomic Classification", - "url": "https://data.microbiomedata.org/data/nmdc:mga0k85x37/ReadbasedAnalysis/nmdc_mga0k85x37_centrifuge_classification.tsv", - "md5_checksum": "2777a04ec7e23aff356bb4f2733e55b7", - "id": "nmdc:2777a04ec7e23aff356bb4f2733e55b7", - "file_size_bytes": 1481087410 - }, - { - "name": "Gp0115669_Centrifuge TSV report", - "description": "Centrifuge TSV report for Gp0115669", - "data_object_type": "Centrifuge Classification Report", - "url": "https://data.microbiomedata.org/data/nmdc:mga0k85x37/ReadbasedAnalysis/nmdc_mga0k85x37_centrifuge_report.tsv", - "md5_checksum": "de45d70cc01749e9b5691dc24674545d", - "id": "nmdc:de45d70cc01749e9b5691dc24674545d", - "file_size_bytes": 256139 - }, - { - "name": "Gp0115669_Centrifuge Krona HTML report", - "description": "Centrifuge Krona HTML report for Gp0115669", - "data_object_type": "Centrifuge Krona Plot", - "url": "https://data.microbiomedata.org/data/nmdc:mga0k85x37/ReadbasedAnalysis/nmdc_mga0k85x37_centrifuge_krona.html", - "md5_checksum": "534f97f3792b74385c4da305196a1b1d", - "id": "nmdc:534f97f3792b74385c4da305196a1b1d", - "file_size_bytes": 2323658 - }, - { - "name": "Gp0115669_Kraken classification TSV report", - "description": "Kraken classification TSV report for Gp0115669", - "data_object_type": "Kraken2 Taxonomic Classification", - "url": "https://data.microbiomedata.org/data/nmdc:mga0k85x37/ReadbasedAnalysis/nmdc_mga0k85x37_kraken2_classification.tsv", - "md5_checksum": "fc3e489df923ec344ac0cce7316f49d6", - "id": "nmdc:fc3e489df923ec344ac0cce7316f49d6", - "file_size_bytes": 1220980345 - }, - { - "name": "Gp0115669_Kraken2 TSV report", - "description": "Kraken2 TSV report for Gp0115669", - "data_object_type": "Kraken2 Classification Report", - "url": "https://data.microbiomedata.org/data/nmdc:mga0k85x37/ReadbasedAnalysis/nmdc_mga0k85x37_kraken2_report.tsv", - "md5_checksum": "07b6457a094fab96563168ed287dc59f", - "id": "nmdc:07b6457a094fab96563168ed287dc59f", - "file_size_bytes": 651795 - }, - { - "name": "Gp0115669_Kraken2 Krona HTML report", - "description": "Kraken2 Krona HTML report for Gp0115669", - "data_object_type": "Kraken2 Krona Plot", - "url": "https://data.microbiomedata.org/data/nmdc:mga0k85x37/ReadbasedAnalysis/nmdc_mga0k85x37_kraken2_krona.html", - "md5_checksum": "164a1bc50e8d6509446ae2877be8231c", - "id": "nmdc:164a1bc50e8d6509446ae2877be8231c", - "file_size_bytes": 3963303 - } + "contigs": 548764, + "name": "Assembly Activity for nmdc:mga0zb0766", + "ctg_max": 464697, + "gc_std": 0.11035, + "contig_bp": 229799767, + "gc_avg": 0.55184, + "started_at_time": "2021-10-11T02:24:49Z", + "scaf_bp": 229858665, + "type": "nmdc:MetagenomeAssembly", + "scaffolds": 543003, + "ended_at_time": "2021-10-11T06:26:42+00:00", + "ctg_l50": 375, + "ctg_l90": 283, + "ctg_n50": 171281, + "ctg_n90": 471697, + "scaf_l50": 378, + "scaf_l90": 283, + "scaf_n50": 164840, + "scaf_n90": 466121, + "scaf_l_gt50k": 2790937, + "scaf_n_gt50k": 23, + "scaf_pct_gt50k": 1.2141969 + } + ], + "metagenome_sequencing_activity_set": [], + "metaproteomics_analysis_activity_set": [], + "metatranscriptome_activity_set": [], + "nom_analysis_activity_set": [], + "omics_processing_set": [ + { + "_id": { + "$oid": "649b009773e8249959349b4f" + }, + "id": "nmdc:omprc-11-qngh7497", + "name": "Sand microcosm microbial communities from a hyporheic zone in Columbia River, Washington, USA - GW-RW T4_25-Nov-14", + "description": "Sterilized sand packs were incubated back in the ground and collected at time point T4.", + "has_input": [ + "nmdc:bsm-11-8362vs44" + ], + "has_output": [ + "jgi:55a9caff0d87852b2150891e" + ], + "part_of": [ + "nmdc:sty-11-aygzgv51" + ], + "add_date": "2015-05-28", + "mod_date": "2021-06-15", + "ncbi_project_name": "Sand microcosm microbial communities from a hyporheic zone in Columbia River, Washington, USA - GW-RW T4_25-Nov-14", + "omics_type": { + "has_raw_value": "Metagenome" + }, + "principal_investigator": { + "has_raw_value": "James Stegen" + }, + "processing_institution": "JGI", + "type": "nmdc:OmicsProcessing", + "gold_sequencing_project_identifiers": [ + "gold:Gp0115677" ] - }, + } + ], + "reaction_activity_set": [], + "read_qc_analysis_activity_set": [ { "_id": { - "$oid": "61e71a34833bcf838a701fb0" + "$oid": "649b009d6bdd4fd20273c87a" }, "has_input": [ - "nmdc:6eef104db92b99c9741b26c667d75cd9" + "nmdc:80ca2cf2e3edcac29eb62b43f62e25c3" ], "part_of": [ - "nmdc:mga0k85x37" + "nmdc:mga0zb0766" ], "git_url": "https://github.com/microbiomedata/metaG/releases/tag/0.1", "has_output": [ - "nmdc:05933784d02331b60b2531e2025cd3b7", - "nmdc:50fc279637cb7048aaaeec9b223d0286", - "nmdc:c3add9c5d34e3ca719096ba3ba9b1c08", - "nmdc:2777a04ec7e23aff356bb4f2733e55b7", - "nmdc:de45d70cc01749e9b5691dc24674545d", - "nmdc:534f97f3792b74385c4da305196a1b1d", - "nmdc:fc3e489df923ec344ac0cce7316f49d6", - "nmdc:07b6457a094fab96563168ed287dc59f", - "nmdc:164a1bc50e8d6509446ae2877be8231c" + "nmdc:63c857b3011dec61a08044d518291f23", + "nmdc:2a79d7978caecf9b08fb2029fa42c9b3" ], - "was_informed_by": "gold:Gp0115669", - "id": "nmdc:ed7745adccd65c2dd20dfa24c2922db2", + "was_informed_by": "gold:Gp0115677", + "input_read_count": 65434428, + "output_read_bases": 9483843059, + "id": "nmdc:4fa91d90b1bd5f821a7f09edc8426939", "execution_resource": "NERSC-Cori", - "name": "ReadBased Analysis Activity for nmdc:mga0k85x37", - "started_at_time": "2021-10-11T02:28:43Z", - "type": "nmdc:ReadbasedAnalysis", - "ended_at_time": "2021-10-11T04:20:07+00:00", - "output_data_objects": [ - { - "name": "Gp0115669_Gottcha2 TSV report", - "description": "Gottcha2 TSV report for Gp0115669", - "url": "https://data.microbiomedata.org/data/nmdc:mga0k85x37/ReadbasedAnalysis/nmdc_mga0k85x37_gottcha2_report.tsv", - "md5_checksum": "05933784d02331b60b2531e2025cd3b7", - "id": "nmdc:05933784d02331b60b2531e2025cd3b7", - "file_size_bytes": 11362 - }, - { - "name": "Gp0115669_Gottcha2 full TSV report", - "description": "Gottcha2 full TSV report for Gp0115669", - "url": "https://data.microbiomedata.org/data/nmdc:mga0k85x37/ReadbasedAnalysis/nmdc_mga0k85x37_gottcha2_report_full.tsv", - "md5_checksum": "50fc279637cb7048aaaeec9b223d0286", - "id": "nmdc:50fc279637cb7048aaaeec9b223d0286", - "file_size_bytes": 909325 - }, - { - "name": "Gp0115669_Gottcha2 Krona HTML report", - "description": "Gottcha2 Krona HTML report for Gp0115669", - "data_object_type": "GOTTCHA2 Krona Plot", - "url": "https://data.microbiomedata.org/data/nmdc:mga0k85x37/ReadbasedAnalysis/nmdc_mga0k85x37_gottcha2_krona.html", - "md5_checksum": "c3add9c5d34e3ca719096ba3ba9b1c08", - "id": "nmdc:c3add9c5d34e3ca719096ba3ba9b1c08", - "file_size_bytes": 261412 - }, - { - "name": "Gp0115669_Centrifuge classification TSV report", - "description": "Centrifuge classification TSV report for Gp0115669", - "data_object_type": "Centrifuge Taxonomic Classification", - "url": "https://data.microbiomedata.org/data/nmdc:mga0k85x37/ReadbasedAnalysis/nmdc_mga0k85x37_centrifuge_classification.tsv", - "md5_checksum": "2777a04ec7e23aff356bb4f2733e55b7", - "id": "nmdc:2777a04ec7e23aff356bb4f2733e55b7", - "file_size_bytes": 1481087410 - }, - { - "name": "Gp0115669_Centrifuge TSV report", - "description": "Centrifuge TSV report for Gp0115669", - "data_object_type": "Centrifuge Classification Report", - "url": "https://data.microbiomedata.org/data/nmdc:mga0k85x37/ReadbasedAnalysis/nmdc_mga0k85x37_centrifuge_report.tsv", - "md5_checksum": "de45d70cc01749e9b5691dc24674545d", - "id": "nmdc:de45d70cc01749e9b5691dc24674545d", - "file_size_bytes": 256139 - }, - { - "name": "Gp0115669_Centrifuge Krona HTML report", - "description": "Centrifuge Krona HTML report for Gp0115669", - "data_object_type": "Centrifuge Krona Plot", - "url": "https://data.microbiomedata.org/data/nmdc:mga0k85x37/ReadbasedAnalysis/nmdc_mga0k85x37_centrifuge_krona.html", - "md5_checksum": "534f97f3792b74385c4da305196a1b1d", - "id": "nmdc:534f97f3792b74385c4da305196a1b1d", - "file_size_bytes": 2323658 - }, - { - "name": "Gp0115669_Kraken classification TSV report", - "description": "Kraken classification TSV report for Gp0115669", - "data_object_type": "Kraken2 Taxonomic Classification", - "url": "https://data.microbiomedata.org/data/nmdc:mga0k85x37/ReadbasedAnalysis/nmdc_mga0k85x37_kraken2_classification.tsv", - "md5_checksum": "fc3e489df923ec344ac0cce7316f49d6", - "id": "nmdc:fc3e489df923ec344ac0cce7316f49d6", - "file_size_bytes": 1220980345 - }, - { - "name": "Gp0115669_Kraken2 TSV report", - "description": "Kraken2 TSV report for Gp0115669", - "data_object_type": "Kraken2 Classification Report", - "url": "https://data.microbiomedata.org/data/nmdc:mga0k85x37/ReadbasedAnalysis/nmdc_mga0k85x37_kraken2_report.tsv", - "md5_checksum": "07b6457a094fab96563168ed287dc59f", - "id": "nmdc:07b6457a094fab96563168ed287dc59f", - "file_size_bytes": 651795 - }, - { - "name": "Gp0115669_Kraken2 Krona HTML report", - "description": "Kraken2 Krona HTML report for Gp0115669", - "data_object_type": "Kraken2 Krona Plot", - "url": "https://data.microbiomedata.org/data/nmdc:mga0k85x37/ReadbasedAnalysis/nmdc_mga0k85x37_kraken2_krona.html", - "md5_checksum": "164a1bc50e8d6509446ae2877be8231c", - "id": "nmdc:164a1bc50e8d6509446ae2877be8231c", - "file_size_bytes": 3963303 - } - ] - }, + "input_read_bases": 9880598628, + "name": "Read QC Activity for nmdc:mga0zb0766", + "output_read_count": 64887080, + "started_at_time": "2021-10-11T02:24:49Z", + "type": "nmdc:ReadQCAnalysisActivity", + "ended_at_time": "2021-10-11T06:26:42+00:00" + } + ], + "read_based_taxonomy_analysis_activity_set": [ { "_id": { - "$oid": "649b005f2ca5ee4adb139fba" + "$oid": "649b009bff710ae353f8cf3c" }, "has_input": [ - "nmdc:6eef104db92b99c9741b26c667d75cd9" - ], - "part_of": [ - "nmdc:mga0k85x37" + "nmdc:63c857b3011dec61a08044d518291f23" ], - "ctg_logsum": 151663, - "scaf_logsum": 152336, - "gap_pct": 0.00222, "git_url": "https://github.com/microbiomedata/metaG/releases/tag/0.1", "has_output": [ - "nmdc:03eb095e55df50d639fab237d06c14ac", - "nmdc:569cb5da239e82dce1b40bfa7e2fd518", - "nmdc:b77ef3014c80797cc88509adf02be002", - "nmdc:62d08517e0ba0f991f2d8bbd66061d78", - "nmdc:568b82cb6038fec5df04c30cbd874098" + "nmdc:ba32f20b0cc5143783e00c5d1ba15223", + "nmdc:c1730daf5e6017219fd9fc079e42c132", + "nmdc:55b6c047c48f5bf9fb156f139992e4d8", + "nmdc:1c2e2dff881b35a25b4622bbc66c3140", + "nmdc:50f771c7bc17a0b184c2a10a24013f08", + "nmdc:229017cdb1832bb718d22dc27db44125", + "nmdc:49d5d11132bd5a02c3dd077d42a6a16b", + "nmdc:bdd701b44e67929ec8bbe279697da937", + "nmdc:d35583a5ed45df5a58bf084fc67bf988" ], - "asm_score": 4.733, - "was_informed_by": "gold:Gp0115669", - "ctg_powsum": 17017, - "scaf_max": 20100, - "id": "nmdc:ed7745adccd65c2dd20dfa24c2922db2", - "scaf_powsum": 17101, + "was_informed_by": "gold:Gp0115677", + "id": "nmdc:4fa91d90b1bd5f821a7f09edc8426939", "execution_resource": "NERSC-Cori", - "contigs": 114114, - "name": "Assembly Activity for nmdc:mga0k85x37", - "ctg_max": 20100, - "gc_std": 0.11871, - "contig_bp": 54567489, - "gc_avg": 0.55923, - "started_at_time": "2021-10-11T02:28:43Z", - "scaf_bp": 54568699, - "type": "nmdc:MetagenomeAssembly", - "scaffolds": 114011, - "ended_at_time": "2021-10-11T04:20:07+00:00", - "ctg_l50": 451, - "ctg_l90": 285, - "ctg_n50": 29019, - "ctg_n90": 94816, - "scaf_l50": 451, - "scaf_l90": 285, - "scaf_n50": 28976, - "scaf_n90": 94720, - "output_data_objects": [ - { - "name": "Gp0115669_Assembled contigs fasta", - "description": "Assembled contigs fasta for Gp0115669", - "data_object_type": "Assembly Contigs", - "url": "https://data.microbiomedata.org/data/nmdc:mga0k85x37/assembly/nmdc_mga0k85x37_contigs.fna", - "md5_checksum": "03eb095e55df50d639fab237d06c14ac", - "id": "nmdc:03eb095e55df50d639fab237d06c14ac", - "file_size_bytes": 58951440 - }, - { - "name": "Gp0115669_Assembled scaffold fasta", - "description": "Assembled scaffold fasta for Gp0115669", - "data_object_type": "Assembly Scaffolds", - "url": "https://data.microbiomedata.org/data/nmdc:mga0k85x37/assembly/nmdc_mga0k85x37_scaffolds.fna", - "md5_checksum": "569cb5da239e82dce1b40bfa7e2fd518", - "id": "nmdc:569cb5da239e82dce1b40bfa7e2fd518", - "file_size_bytes": 58607757 - }, - { - "name": "Gp0115669_Metagenome Contig Coverage Stats", - "description": "Metagenome Contig Coverage Stats for Gp0115669", - "url": "https://data.microbiomedata.org/data/nmdc:mga0k85x37/assembly/nmdc_mga0k85x37_covstats.txt", - "md5_checksum": "b77ef3014c80797cc88509adf02be002", - "id": "nmdc:b77ef3014c80797cc88509adf02be002", - "file_size_bytes": 8978635 - }, - { - "name": "Gp0115669_Assembled AGP file", - "description": "Assembled AGP file for Gp0115669", - "data_object_type": "Assembly AGP", - "url": "https://data.microbiomedata.org/data/nmdc:mga0k85x37/assembly/nmdc_mga0k85x37_assembly.agp", - "md5_checksum": "62d08517e0ba0f991f2d8bbd66061d78", - "id": "nmdc:62d08517e0ba0f991f2d8bbd66061d78", - "file_size_bytes": 8358006 - }, - { - "name": "Gp0115669_Metagenome Alignment BAM file", - "description": "Metagenome Alignment BAM file for Gp0115669", - "data_object_type": "Assembly Coverage BAM", - "url": "https://data.microbiomedata.org/data/nmdc:mga0k85x37/assembly/nmdc_mga0k85x37_pairedMapped_sorted.bam", - "md5_checksum": "568b82cb6038fec5df04c30cbd874098", - "id": "nmdc:568b82cb6038fec5df04c30cbd874098", - "file_size_bytes": 1940308720 - } - ] - }, + "name": "ReadBased Analysis Activity for nmdc:mga0zb0766", + "started_at_time": "2021-10-11T02:24:49Z", + "type": "nmdc:ReadbasedAnalysis", + "ended_at_time": "2021-10-11T06:26:42+00:00" + } + ], + "study_set": [], + "field_research_site_set": [], + "collecting_biosamples_from_site_set": [], + "date_created": null, + "etl_software_version": null, + "pooling_set": [], + "read_based_analysis_activity_set": [ { "_id": { - "$oid": "649b005bbf2caae0415ef9d0" + "$oid": "61e719d5833bcf838a70143c" }, "has_input": [ - "nmdc:03eb095e55df50d639fab237d06c14ac" + "nmdc:63c857b3011dec61a08044d518291f23" ], "part_of": [ - "nmdc:mga0k85x37" + "nmdc:mga0zb0766" ], "git_url": "https://github.com/microbiomedata/metaG/releases/tag/0.1", "has_output": [ - "nmdc:8a5f288604c61556ff3e827725864fd1", - "nmdc:0180998d6f3a3021638f04d9c0b35019", - "nmdc:950b8c4ebd1da50e2ca079273540f3af", - "nmdc:96ec49c6124cf4f8f3e7da3525348477", - "nmdc:12ca374a58bf899e42ed2c191a239e71", - "nmdc:b8ae2993aa29c8e04c00580dfdb82650", - "nmdc:7901c83b5a41e54854c96ab0b081ebd6", - "nmdc:762fe35b733dd82f89f5dce44fa54ed1", - "nmdc:661b70d6f41a44fcc1913b101f79d86a", - "nmdc:e1843a865023d75edd3139c14b8c355e", - "nmdc:a21449989b0b0884901602528b3f423e", - "nmdc:7f52547663f4eeea33de1e437012981e" + "nmdc:ba32f20b0cc5143783e00c5d1ba15223", + "nmdc:c1730daf5e6017219fd9fc079e42c132", + "nmdc:55b6c047c48f5bf9fb156f139992e4d8", + "nmdc:1c2e2dff881b35a25b4622bbc66c3140", + "nmdc:50f771c7bc17a0b184c2a10a24013f08", + "nmdc:229017cdb1832bb718d22dc27db44125", + "nmdc:49d5d11132bd5a02c3dd077d42a6a16b", + "nmdc:bdd701b44e67929ec8bbe279697da937", + "nmdc:d35583a5ed45df5a58bf084fc67bf988" ], - "was_informed_by": "gold:Gp0115669", - "id": "nmdc:ed7745adccd65c2dd20dfa24c2922db2", + "was_informed_by": "gold:Gp0115677", + "id": "nmdc:4fa91d90b1bd5f821a7f09edc8426939", "execution_resource": "NERSC-Cori", - "name": "Annotation Activity for nmdc:mga0k85x37", - "started_at_time": "2021-10-11T02:28:43Z", - "type": "nmdc:MetagenomeAnnotationActivity", - "ended_at_time": "2021-10-11T04:20:07+00:00", - "output_data_objects": [ - { - "name": "Gp0115669_Protein FAA", - "description": "Protein FAA for Gp0115669", - "data_object_type": "Annotation Amino Acid FASTA", - "url": "https://data.microbiomedata.org/data/nmdc:mga0k85x37/annotation/nmdc_mga0k85x37_proteins.faa", - "md5_checksum": "8a5f288604c61556ff3e827725864fd1", - "id": "nmdc:8a5f288604c61556ff3e827725864fd1", - "file_size_bytes": 32524652 - }, - { - "name": "Gp0115669_Structural annotation GFF file", - "description": "Structural annotation GFF file for Gp0115669", - "data_object_type": "Structural Annotation GFF", - "url": "https://data.microbiomedata.org/data/nmdc:mga0k85x37/annotation/nmdc_mga0k85x37_structural_annotation.gff", - "md5_checksum": "0180998d6f3a3021638f04d9c0b35019", - "id": "nmdc:0180998d6f3a3021638f04d9c0b35019", - "file_size_bytes": 2514 - }, - { - "name": "Gp0115669_Functional annotation GFF file", - "description": "Functional annotation GFF file for Gp0115669", - "data_object_type": "Functional Annotation GFF", - "url": "https://data.microbiomedata.org/data/nmdc:mga0k85x37/annotation/nmdc_mga0k85x37_functional_annotation.gff", - "md5_checksum": "950b8c4ebd1da50e2ca079273540f3af", - "id": "nmdc:950b8c4ebd1da50e2ca079273540f3af", - "file_size_bytes": 36685287 - }, - { - "name": "Gp0115669_KO TSV file", - "description": "KO TSV file for Gp0115669", - "data_object_type": "Annotation KEGG Orthology", - "url": "https://data.microbiomedata.org/data/nmdc:mga0k85x37/annotation/nmdc_mga0k85x37_ko.tsv", - "md5_checksum": "96ec49c6124cf4f8f3e7da3525348477", - "id": "nmdc:96ec49c6124cf4f8f3e7da3525348477", - "file_size_bytes": 4815732 - }, - { - "name": "Gp0115669_EC TSV file", - "description": "EC TSV file for Gp0115669", - "data_object_type": "Annotation Enzyme Commission", - "url": "https://data.microbiomedata.org/data/nmdc:mga0k85x37/annotation/nmdc_mga0k85x37_ec.tsv", - "md5_checksum": "12ca374a58bf899e42ed2c191a239e71", - "id": "nmdc:12ca374a58bf899e42ed2c191a239e71", - "file_size_bytes": 3090911 - }, - { - "name": "Gp0115669_COG GFF file", - "description": "COG GFF file for Gp0115669", - "url": "https://data.microbiomedata.org/data/nmdc:mga0k85x37/annotation/nmdc_mga0k85x37_cog.gff", - "md5_checksum": "b8ae2993aa29c8e04c00580dfdb82650", - "id": "nmdc:b8ae2993aa29c8e04c00580dfdb82650", - "file_size_bytes": 20357759 - }, - { - "name": "Gp0115669_PFAM GFF file", - "description": "PFAM GFF file for Gp0115669", - "url": "https://data.microbiomedata.org/data/nmdc:mga0k85x37/annotation/nmdc_mga0k85x37_pfam.gff", - "md5_checksum": "7901c83b5a41e54854c96ab0b081ebd6", - "id": "nmdc:7901c83b5a41e54854c96ab0b081ebd6", - "file_size_bytes": 15876941 - }, - { - "name": "Gp0115669_TigrFam GFF file", - "description": "TigrFam GFF file for Gp0115669", - "url": "https://data.microbiomedata.org/data/nmdc:mga0k85x37/annotation/nmdc_mga0k85x37_tigrfam.gff", - "md5_checksum": "762fe35b733dd82f89f5dce44fa54ed1", - "id": "nmdc:762fe35b733dd82f89f5dce44fa54ed1", - "file_size_bytes": 2104873 - }, - { - "name": "Gp0115669_SMART GFF file", - "description": "SMART GFF file for Gp0115669", - "url": "https://data.microbiomedata.org/data/nmdc:mga0k85x37/annotation/nmdc_mga0k85x37_smart.gff", - "md5_checksum": "661b70d6f41a44fcc1913b101f79d86a", - "id": "nmdc:661b70d6f41a44fcc1913b101f79d86a", - "file_size_bytes": 4523437 - }, - { - "name": "Gp0115669_SuperFam GFF file", - "description": "SuperFam GFF file for Gp0115669", - "url": "https://data.microbiomedata.org/data/nmdc:mga0k85x37/annotation/nmdc_mga0k85x37_supfam.gff", - "md5_checksum": "e1843a865023d75edd3139c14b8c355e", - "id": "nmdc:e1843a865023d75edd3139c14b8c355e", - "file_size_bytes": 25872277 - }, - { - "name": "Gp0115669_Cath FunFam GFF file", - "description": "Cath FunFam GFF file for Gp0115669", - "url": "https://data.microbiomedata.org/data/nmdc:mga0k85x37/annotation/nmdc_mga0k85x37_cath_funfam.gff", - "md5_checksum": "a21449989b0b0884901602528b3f423e", - "id": "nmdc:a21449989b0b0884901602528b3f423e", - "file_size_bytes": 20254021 - }, - { - "name": "Gp0115669_KO_EC GFF file", - "description": "KO_EC GFF file for Gp0115669", - "url": "https://data.microbiomedata.org/data/nmdc:mga0k85x37/annotation/nmdc_mga0k85x37_ko_ec.gff", - "md5_checksum": "7f52547663f4eeea33de1e437012981e", - "id": "nmdc:7f52547663f4eeea33de1e437012981e", - "file_size_bytes": 15397038 - } - ] + "name": "ReadBased Analysis Activity for nmdc:mga0zb0766", + "started_at_time": "2021-10-11T02:24:49Z", + "type": "nmdc:ReadbasedAnalysis", + "ended_at_time": "2021-10-11T06:26:42+00:00" + } + ] + }, + { + "functional_annotation_agg": [], + "library_preparation_set": [], + "processed_sample_set": [], + "extraction_set": [], + "activity_set": [], + "biosample_set": [], + "data_object_set": [ + { + "name": "Gp0115675_Filtered Reads", + "description": "Filtered Reads for Gp0115675", + "data_object_type": "Filtered Sequencing Reads", + "url": "https://data.microbiomedata.org/data/nmdc:mga0vf2h47/qa/nmdc_mga0vf2h47_filtered.fastq.gz", + "md5_checksum": "54e3a71218d04224719e0dc8a7fdf9c7", + "id": "nmdc:54e3a71218d04224719e0dc8a7fdf9c7", + "file_size_bytes": 1533239347 + }, + { + "name": "Gp0115675_Filtered Stats", + "description": "Filtered Stats for Gp0115675", + "data_object_type": "QC Statistics", + "url": "https://data.microbiomedata.org/data/nmdc:mga0vf2h47/qa/nmdc_mga0vf2h47_filterStats.txt", + "md5_checksum": "2507e3f107100ce0c72c57191d450818", + "id": "nmdc:2507e3f107100ce0c72c57191d450818", + "file_size_bytes": 287 + }, + { + "name": "Gp0115675_Gottcha2 TSV report", + "description": "Gottcha2 TSV report for Gp0115675", + "url": "https://data.microbiomedata.org/data/nmdc:mga0vf2h47/ReadbasedAnalysis/nmdc_mga0vf2h47_gottcha2_report.tsv", + "md5_checksum": "60d673988c4f4447feb5985e8501e914", + "id": "nmdc:60d673988c4f4447feb5985e8501e914", + "file_size_bytes": 8921 + }, + { + "name": "Gp0115675_Gottcha2 full TSV report", + "description": "Gottcha2 full TSV report for Gp0115675", + "url": "https://data.microbiomedata.org/data/nmdc:mga0vf2h47/ReadbasedAnalysis/nmdc_mga0vf2h47_gottcha2_report_full.tsv", + "md5_checksum": "a8f93ed13033eb949109b4e83980a893", + "id": "nmdc:a8f93ed13033eb949109b4e83980a893", + "file_size_bytes": 871109 + }, + { + "name": "Gp0115675_Gottcha2 Krona HTML report", + "description": "Gottcha2 Krona HTML report for Gp0115675", + "data_object_type": "GOTTCHA2 Krona Plot", + "url": "https://data.microbiomedata.org/data/nmdc:mga0vf2h47/ReadbasedAnalysis/nmdc_mga0vf2h47_gottcha2_krona.html", + "md5_checksum": "31dd6eb616f1e9815778453ab1601195", + "id": "nmdc:31dd6eb616f1e9815778453ab1601195", + "file_size_bytes": 252578 + }, + { + "name": "Gp0115675_Centrifuge classification TSV report", + "description": "Centrifuge classification TSV report for Gp0115675", + "data_object_type": "Centrifuge Taxonomic Classification", + "url": "https://data.microbiomedata.org/data/nmdc:mga0vf2h47/ReadbasedAnalysis/nmdc_mga0vf2h47_centrifuge_classification.tsv", + "md5_checksum": "6d7a930d79f220b06cde8fbf8339e744", + "id": "nmdc:6d7a930d79f220b06cde8fbf8339e744", + "file_size_bytes": 1218767711 + }, + { + "name": "Gp0115675_Centrifuge TSV report", + "description": "Centrifuge TSV report for Gp0115675", + "data_object_type": "Centrifuge Classification Report", + "url": "https://data.microbiomedata.org/data/nmdc:mga0vf2h47/ReadbasedAnalysis/nmdc_mga0vf2h47_centrifuge_report.tsv", + "md5_checksum": "0aaac507db0e29827e1c87df47324932", + "id": "nmdc:0aaac507db0e29827e1c87df47324932", + "file_size_bytes": 254260 + }, + { + "name": "Gp0115675_Centrifuge Krona HTML report", + "description": "Centrifuge Krona HTML report for Gp0115675", + "data_object_type": "Centrifuge Krona Plot", + "url": "https://data.microbiomedata.org/data/nmdc:mga0vf2h47/ReadbasedAnalysis/nmdc_mga0vf2h47_centrifuge_krona.html", + "md5_checksum": "6aec8677139ed24ef9cfe0c75b30056f", + "id": "nmdc:6aec8677139ed24ef9cfe0c75b30056f", + "file_size_bytes": 2324387 + }, + { + "name": "Gp0115675_Kraken classification TSV report", + "description": "Kraken classification TSV report for Gp0115675", + "data_object_type": "Kraken2 Taxonomic Classification", + "url": "https://data.microbiomedata.org/data/nmdc:mga0vf2h47/ReadbasedAnalysis/nmdc_mga0vf2h47_kraken2_classification.tsv", + "md5_checksum": "d39369f32ada967d7cf52cb503fccf4a", + "id": "nmdc:d39369f32ada967d7cf52cb503fccf4a", + "file_size_bytes": 1001846607 + }, + { + "name": "Gp0115675_Kraken2 TSV report", + "description": "Kraken2 TSV report for Gp0115675", + "data_object_type": "Kraken2 Classification Report", + "url": "https://data.microbiomedata.org/data/nmdc:mga0vf2h47/ReadbasedAnalysis/nmdc_mga0vf2h47_kraken2_report.tsv", + "md5_checksum": "1ec0247d86889fcef13f39a58a92b066", + "id": "nmdc:1ec0247d86889fcef13f39a58a92b066", + "file_size_bytes": 635541 + }, + { + "name": "Gp0115675_Kraken2 Krona HTML report", + "description": "Kraken2 Krona HTML report for Gp0115675", + "data_object_type": "Kraken2 Krona Plot", + "url": "https://data.microbiomedata.org/data/nmdc:mga0vf2h47/ReadbasedAnalysis/nmdc_mga0vf2h47_kraken2_krona.html", + "md5_checksum": "242a1c60f6cb14ba8430375171fda436", + "id": "nmdc:242a1c60f6cb14ba8430375171fda436", + "file_size_bytes": 3968420 + }, + { + "name": "Gp0115675_Gottcha2 TSV report", + "description": "Gottcha2 TSV report for Gp0115675", + "url": "https://data.microbiomedata.org/data/nmdc:mga0vf2h47/ReadbasedAnalysis/nmdc_mga0vf2h47_gottcha2_report.tsv", + "md5_checksum": "60d673988c4f4447feb5985e8501e914", + "id": "nmdc:60d673988c4f4447feb5985e8501e914", + "file_size_bytes": 8921 + }, + { + "name": "Gp0115675_Gottcha2 full TSV report", + "description": "Gottcha2 full TSV report for Gp0115675", + "url": "https://data.microbiomedata.org/data/nmdc:mga0vf2h47/ReadbasedAnalysis/nmdc_mga0vf2h47_gottcha2_report_full.tsv", + "md5_checksum": "a8f93ed13033eb949109b4e83980a893", + "id": "nmdc:a8f93ed13033eb949109b4e83980a893", + "file_size_bytes": 871109 + }, + { + "name": "Gp0115675_Gottcha2 Krona HTML report", + "description": "Gottcha2 Krona HTML report for Gp0115675", + "data_object_type": "GOTTCHA2 Krona Plot", + "url": "https://data.microbiomedata.org/data/nmdc:mga0vf2h47/ReadbasedAnalysis/nmdc_mga0vf2h47_gottcha2_krona.html", + "md5_checksum": "31dd6eb616f1e9815778453ab1601195", + "id": "nmdc:31dd6eb616f1e9815778453ab1601195", + "file_size_bytes": 252578 + }, + { + "name": "Gp0115675_Centrifuge classification TSV report", + "description": "Centrifuge classification TSV report for Gp0115675", + "data_object_type": "Centrifuge Taxonomic Classification", + "url": "https://data.microbiomedata.org/data/nmdc:mga0vf2h47/ReadbasedAnalysis/nmdc_mga0vf2h47_centrifuge_classification.tsv", + "md5_checksum": "6d7a930d79f220b06cde8fbf8339e744", + "id": "nmdc:6d7a930d79f220b06cde8fbf8339e744", + "file_size_bytes": 1218767711 + }, + { + "name": "Gp0115675_Centrifuge TSV report", + "description": "Centrifuge TSV report for Gp0115675", + "data_object_type": "Centrifuge Classification Report", + "url": "https://data.microbiomedata.org/data/nmdc:mga0vf2h47/ReadbasedAnalysis/nmdc_mga0vf2h47_centrifuge_report.tsv", + "md5_checksum": "0aaac507db0e29827e1c87df47324932", + "id": "nmdc:0aaac507db0e29827e1c87df47324932", + "file_size_bytes": 254260 + }, + { + "name": "Gp0115675_Centrifuge Krona HTML report", + "description": "Centrifuge Krona HTML report for Gp0115675", + "data_object_type": "Centrifuge Krona Plot", + "url": "https://data.microbiomedata.org/data/nmdc:mga0vf2h47/ReadbasedAnalysis/nmdc_mga0vf2h47_centrifuge_krona.html", + "md5_checksum": "6aec8677139ed24ef9cfe0c75b30056f", + "id": "nmdc:6aec8677139ed24ef9cfe0c75b30056f", + "file_size_bytes": 2324387 + }, + { + "name": "Gp0115675_Kraken classification TSV report", + "description": "Kraken classification TSV report for Gp0115675", + "data_object_type": "Kraken2 Taxonomic Classification", + "url": "https://data.microbiomedata.org/data/nmdc:mga0vf2h47/ReadbasedAnalysis/nmdc_mga0vf2h47_kraken2_classification.tsv", + "md5_checksum": "d39369f32ada967d7cf52cb503fccf4a", + "id": "nmdc:d39369f32ada967d7cf52cb503fccf4a", + "file_size_bytes": 1001846607 + }, + { + "name": "Gp0115675_Kraken2 TSV report", + "description": "Kraken2 TSV report for Gp0115675", + "data_object_type": "Kraken2 Classification Report", + "url": "https://data.microbiomedata.org/data/nmdc:mga0vf2h47/ReadbasedAnalysis/nmdc_mga0vf2h47_kraken2_report.tsv", + "md5_checksum": "1ec0247d86889fcef13f39a58a92b066", + "id": "nmdc:1ec0247d86889fcef13f39a58a92b066", + "file_size_bytes": 635541 + }, + { + "name": "Gp0115675_Kraken2 Krona HTML report", + "description": "Kraken2 Krona HTML report for Gp0115675", + "data_object_type": "Kraken2 Krona Plot", + "url": "https://data.microbiomedata.org/data/nmdc:mga0vf2h47/ReadbasedAnalysis/nmdc_mga0vf2h47_kraken2_krona.html", + "md5_checksum": "242a1c60f6cb14ba8430375171fda436", + "id": "nmdc:242a1c60f6cb14ba8430375171fda436", + "file_size_bytes": 3968420 + }, + { + "name": "Gp0115675_Assembled contigs fasta", + "description": "Assembled contigs fasta for Gp0115675", + "data_object_type": "Assembly Contigs", + "url": "https://data.microbiomedata.org/data/nmdc:mga0vf2h47/assembly/nmdc_mga0vf2h47_contigs.fna", + "md5_checksum": "dd5cad9348fc41cb18ac989185fed0b5", + "id": "nmdc:dd5cad9348fc41cb18ac989185fed0b5", + "file_size_bytes": 41662357 + }, + { + "name": "Gp0115675_Assembled scaffold fasta", + "description": "Assembled scaffold fasta for Gp0115675", + "data_object_type": "Assembly Scaffolds", + "url": "https://data.microbiomedata.org/data/nmdc:mga0vf2h47/assembly/nmdc_mga0vf2h47_scaffolds.fna", + "md5_checksum": "6d02084941141ac9a1876c621a50aef0", + "id": "nmdc:6d02084941141ac9a1876c621a50aef0", + "file_size_bytes": 41417652 + }, + { + "name": "Gp0115675_Metagenome Contig Coverage Stats", + "description": "Metagenome Contig Coverage Stats for Gp0115675", + "url": "https://data.microbiomedata.org/data/nmdc:mga0vf2h47/assembly/nmdc_mga0vf2h47_covstats.txt", + "md5_checksum": "cc8faed3494579d793c08ede54cb5b3a", + "id": "nmdc:cc8faed3494579d793c08ede54cb5b3a", + "file_size_bytes": 6338871 + }, + { + "name": "Gp0115675_Assembled AGP file", + "description": "Assembled AGP file for Gp0115675", + "data_object_type": "Assembly AGP", + "url": "https://data.microbiomedata.org/data/nmdc:mga0vf2h47/assembly/nmdc_mga0vf2h47_assembly.agp", + "md5_checksum": "8891e46c9766f2b84d45fd6e46078a64", + "id": "nmdc:8891e46c9766f2b84d45fd6e46078a64", + "file_size_bytes": 5901316 + }, + { + "name": "Gp0115675_Metagenome Alignment BAM file", + "description": "Metagenome Alignment BAM file for Gp0115675", + "data_object_type": "Assembly Coverage BAM", + "url": "https://data.microbiomedata.org/data/nmdc:mga0vf2h47/assembly/nmdc_mga0vf2h47_pairedMapped_sorted.bam", + "md5_checksum": "80470769e7531b46c709d12c65487ffe", + "id": "nmdc:80470769e7531b46c709d12c65487ffe", + "file_size_bytes": 1635169657 + }, + { + "name": "Gp0115675_Protein FAA", + "description": "Protein FAA for Gp0115675", + "data_object_type": "Annotation Amino Acid FASTA", + "url": "https://data.microbiomedata.org/data/nmdc:mga0vf2h47/annotation/nmdc_mga0vf2h47_proteins.faa", + "md5_checksum": "93ea50ce57263b498b781240c04dbf46", + "id": "nmdc:93ea50ce57263b498b781240c04dbf46", + "file_size_bytes": 23383485 + }, + { + "name": "Gp0115675_Structural annotation GFF file", + "description": "Structural annotation GFF file for Gp0115675", + "data_object_type": "Structural Annotation GFF", + "url": "https://data.microbiomedata.org/data/nmdc:mga0vf2h47/annotation/nmdc_mga0vf2h47_structural_annotation.gff", + "md5_checksum": "71195b9bc697bf29cd865718a689eb1b", + "id": "nmdc:71195b9bc697bf29cd865718a689eb1b", + "file_size_bytes": 2508 + }, + { + "name": "Gp0115675_Functional annotation GFF file", + "description": "Functional annotation GFF file for Gp0115675", + "data_object_type": "Functional Annotation GFF", + "url": "https://data.microbiomedata.org/data/nmdc:mga0vf2h47/annotation/nmdc_mga0vf2h47_functional_annotation.gff", + "md5_checksum": "d8cccd9c5cd237c238e5ba443c477db5", + "id": "nmdc:d8cccd9c5cd237c238e5ba443c477db5", + "file_size_bytes": 26575202 + }, + { + "name": "Gp0115675_KO TSV file", + "description": "KO TSV file for Gp0115675", + "data_object_type": "Annotation KEGG Orthology", + "url": "https://data.microbiomedata.org/data/nmdc:mga0vf2h47/annotation/nmdc_mga0vf2h47_ko.tsv", + "md5_checksum": "1cb17c4c7681345f53a7f4ef5c319fba", + "id": "nmdc:1cb17c4c7681345f53a7f4ef5c319fba", + "file_size_bytes": 3577030 + }, + { + "name": "Gp0115675_EC TSV file", + "description": "EC TSV file for Gp0115675", + "data_object_type": "Annotation Enzyme Commission", + "url": "https://data.microbiomedata.org/data/nmdc:mga0vf2h47/annotation/nmdc_mga0vf2h47_ec.tsv", + "md5_checksum": "17e386be26f52833c463a89733ef2e34", + "id": "nmdc:17e386be26f52833c463a89733ef2e34", + "file_size_bytes": 2294485 + }, + { + "name": "Gp0115675_COG GFF file", + "description": "COG GFF file for Gp0115675", + "url": "https://data.microbiomedata.org/data/nmdc:mga0vf2h47/annotation/nmdc_mga0vf2h47_cog.gff", + "md5_checksum": "3e9b2fd11f2f5c16f9f25560e3b6fc55", + "id": "nmdc:3e9b2fd11f2f5c16f9f25560e3b6fc55", + "file_size_bytes": 15181628 + }, + { + "name": "Gp0115675_PFAM GFF file", + "description": "PFAM GFF file for Gp0115675", + "url": "https://data.microbiomedata.org/data/nmdc:mga0vf2h47/annotation/nmdc_mga0vf2h47_pfam.gff", + "md5_checksum": "b11e36753299e36fa92670cf75165698", + "id": "nmdc:b11e36753299e36fa92670cf75165698", + "file_size_bytes": 11905020 + }, + { + "name": "Gp0115675_TigrFam GFF file", + "description": "TigrFam GFF file for Gp0115675", + "url": "https://data.microbiomedata.org/data/nmdc:mga0vf2h47/annotation/nmdc_mga0vf2h47_tigrfam.gff", + "md5_checksum": "70ac1de5fbc6cc835d5a0d1855f7a28a", + "id": "nmdc:70ac1de5fbc6cc835d5a0d1855f7a28a", + "file_size_bytes": 1629352 + }, + { + "name": "Gp0115675_SMART GFF file", + "description": "SMART GFF file for Gp0115675", + "url": "https://data.microbiomedata.org/data/nmdc:mga0vf2h47/annotation/nmdc_mga0vf2h47_smart.gff", + "md5_checksum": "b9e3eb74fa7fee0fac886f8a436b9ecf", + "id": "nmdc:b9e3eb74fa7fee0fac886f8a436b9ecf", + "file_size_bytes": 3360419 + }, + { + "name": "Gp0115675_SuperFam GFF file", + "description": "SuperFam GFF file for Gp0115675", + "url": "https://data.microbiomedata.org/data/nmdc:mga0vf2h47/annotation/nmdc_mga0vf2h47_supfam.gff", + "md5_checksum": "faa27c2be6dc56e66f739dbffcbb6bef", + "id": "nmdc:faa27c2be6dc56e66f739dbffcbb6bef", + "file_size_bytes": 19134944 + }, + { + "name": "Gp0115675_Cath FunFam GFF file", + "description": "Cath FunFam GFF file for Gp0115675", + "url": "https://data.microbiomedata.org/data/nmdc:mga0vf2h47/annotation/nmdc_mga0vf2h47_cath_funfam.gff", + "md5_checksum": "b080e9d168c0c1330fda64814afe335b", + "id": "nmdc:b080e9d168c0c1330fda64814afe335b", + "file_size_bytes": 15037016 + }, + { + "name": "Gp0115675_KO_EC GFF file", + "description": "KO_EC GFF file for Gp0115675", + "url": "https://data.microbiomedata.org/data/nmdc:mga0vf2h47/annotation/nmdc_mga0vf2h47_ko_ec.gff", + "md5_checksum": "4ea799de0bc051409b7231801eea0129", + "id": "nmdc:4ea799de0bc051409b7231801eea0129", + "file_size_bytes": 11398449 + }, + { + "name": "gold:Gp0452679_metabat2 bin checkm quality assessment result", + "description": "metabat2 bin checkm quality assessment result for gold:Gp0452679", + "data_object_type": "CheckM Statistics", + "url": "https://data.microbiomedata.org/data/nmdc:mga0fsjy84/MAGs/nmdc_mga0fsjy84_checkm_qa.out", + "md5_checksum": "d41d8cd98f00b204e9800998ecf8427e", + "id": "nmdc:d41d8cd98f00b204e9800998ecf8427e", + "file_size_bytes": 0 + }, + { + "name": "Gp0115675_tooShort (< 3kb) filtered contigs fasta file by metaBat2", + "description": "tooShort (< 3kb) filtered contigs fasta file by metaBat2 for Gp0115675", + "url": "https://data.microbiomedata.org/data/nmdc:mga0vf2h47/MAGs/nmdc_mga0vf2h47_bins.tooShort.fa", + "md5_checksum": "826503b4204b77c319c0bb353d69818e", + "id": "nmdc:826503b4204b77c319c0bb353d69818e", + "file_size_bytes": 31246547 + }, + { + "name": "Gp0115675_unbinned fasta file from metabat2", + "description": "unbinned fasta file from metabat2 for Gp0115675", + "url": "https://data.microbiomedata.org/data/nmdc:mga0vf2h47/MAGs/nmdc_mga0vf2h47_bins.unbinned.fa", + "md5_checksum": "9a02c2954014bb8dcd62800609dd3ec5", + "id": "nmdc:9a02c2954014bb8dcd62800609dd3ec5", + "file_size_bytes": 6258719 + }, + { + "name": "Gp0115675_metabat2 bin checkm quality assessment result", + "description": "metabat2 bin checkm quality assessment result for Gp0115675", + "data_object_type": "CheckM Statistics", + "url": "https://data.microbiomedata.org/data/nmdc:mga0vf2h47/MAGs/nmdc_mga0vf2h47_checkm_qa.out", + "md5_checksum": "d15ed915946e095d045d73f4b4de019d", + "id": "nmdc:d15ed915946e095d045d73f4b4de019d", + "file_size_bytes": 1092 + }, + { + "name": "Gp0115675_high-quality and medium-quality bins", + "description": "high-quality and medium-quality bins for Gp0115675", + "data_object_type": "Metagenome Bins", + "url": "https://data.microbiomedata.org/data/nmdc:mga0vf2h47/MAGs/nmdc_mga0vf2h47_hqmq_bin.zip", + "md5_checksum": "8de4404b1a6601bae7d7d5fd51bd131a", + "id": "nmdc:8de4404b1a6601bae7d7d5fd51bd131a", + "file_size_bytes": 182 }, + { + "name": "Gp0115675_metabat2 bins", + "description": "metabat2 bins for Gp0115675", + "url": "https://data.microbiomedata.org/data/nmdc:mga0vf2h47/MAGs/nmdc_mga0vf2h47_metabat_bin.zip", + "md5_checksum": "55f66520d821205e80dcd303cc2793bc", + "id": "nmdc:55f66520d821205e80dcd303cc2793bc", + "file_size_bytes": 1259160 + } + ], + "dissolving_activity_set": [], + "functional_annotation_set": [], + "genome_feature_set": [], + "mags_activity_set": [ { "_id": { - "$oid": "649b0052ec087f6bbab34735" + "$oid": "649b0052ec087f6bbab34722" }, "has_input": [ - "nmdc:03eb095e55df50d639fab237d06c14ac", - "nmdc:568b82cb6038fec5df04c30cbd874098", - "nmdc:950b8c4ebd1da50e2ca079273540f3af" + "nmdc:dd5cad9348fc41cb18ac989185fed0b5", + "nmdc:80470769e7531b46c709d12c65487ffe", + "nmdc:d8cccd9c5cd237c238e5ba443c477db5" ], - "too_short_contig_num": 107191, + "too_short_contig_num": 76352, "part_of": [ - "nmdc:mga0k85x37" + "nmdc:mga0vf2h47" ], - "binned_contig_num": 651, + "binned_contig_num": 846, "git_url": "https://github.com/microbiomedata/metaG/releases/tag/0.1", "has_output": [ "nmdc:d41d8cd98f00b204e9800998ecf8427e", - "nmdc:420b015f88d0b88ab582805f39ed2b47", - "nmdc:ee8a556be3a57008c1c05ff9fe83437e", - "nmdc:6fd5dfbd1500a60620194b5b9a4aab8a", - "nmdc:6a7eb248822ec0994ddeffe8b5aae7b1", - "nmdc:6a80769f6812a45615890cc2b03e9abf" + "nmdc:826503b4204b77c319c0bb353d69818e", + "nmdc:9a02c2954014bb8dcd62800609dd3ec5", + "nmdc:d15ed915946e095d045d73f4b4de019d", + "nmdc:8de4404b1a6601bae7d7d5fd51bd131a", + "nmdc:55f66520d821205e80dcd303cc2793bc" ], - "was_informed_by": "gold:Gp0115669", - "input_contig_num": 114113, - "id": "nmdc:ed7745adccd65c2dd20dfa24c2922db2", + "was_informed_by": "gold:Gp0115675", + "input_contig_num": 80857, + "id": "nmdc:4cfac32b9cd7a8dd01d49117e1078a79", "execution_resource": "NERSC-Cori", - "name": "MAGs Analysis Activity for nmdc:mga0k85x37", + "name": "MAGs Analysis Activity for nmdc:mga0vf2h47", "mags_list": [ { - "number_of_contig": 48, - "completeness": 13.04, + "number_of_contig": 579, + "completeness": 73.87, "bin_name": "bins.1", - "gene_count": 245, + "gene_count": 3274, "bin_quality": "LQ", "gtdbtk_species": "", "gtdbtk_order": "", - "num_16s": 0, + "num_16s": 1, "gtdbtk_family": "", "gtdbtk_domain": "", - "contamination": 0.0, + "contamination": 25.78, "gtdbtk_class": "", "gtdbtk_phylum": "", - "num_5s": 0, - "num_23s": 0, + "num_5s": 1, + "num_23s": 1, "gtdbtk_genus": "", - "num_t_rna": 5 + "num_t_rna": 37 }, { - "number_of_contig": 379, - "completeness": 72.42, + "number_of_contig": 199, + "completeness": 36.21, "bin_name": "bins.2", - "gene_count": 2513, - "bin_quality": "MQ", + "gene_count": 1070, + "bin_quality": "LQ", "gtdbtk_species": "", - "gtdbtk_order": "Sphingomonadales", + "gtdbtk_order": "", "num_16s": 0, - "gtdbtk_family": "Sphingomonadaceae", - "gtdbtk_domain": "Bacteria", - "contamination": 1.85, - "gtdbtk_class": "Alphaproteobacteria", - "gtdbtk_phylum": "Proteobacteria", + "gtdbtk_family": "", + "gtdbtk_domain": "", + "contamination": 0.0, + "gtdbtk_class": "", + "gtdbtk_phylum": "", "num_5s": 0, "num_23s": 0, - "gtdbtk_genus": "Novosphingobium", - "num_t_rna": 32 + "gtdbtk_genus": "", + "num_t_rna": 16 }, { - "number_of_contig": 224, - "completeness": 29.36, + "number_of_contig": 68, + "completeness": 4.17, "bin_name": "bins.3", - "gene_count": 1148, + "gene_count": 480, "bin_quality": "LQ", "gtdbtk_species": "", "gtdbtk_order": "", "num_16s": 0, "gtdbtk_family": "", "gtdbtk_domain": "", - "contamination": 0.43, + "contamination": 4.17, "gtdbtk_class": "", "gtdbtk_phylum": "", "num_5s": 0, "num_23s": 0, "gtdbtk_genus": "", - "num_t_rna": 13 + "num_t_rna": 5 } ], - "unbinned_contig_num": 6271, - "started_at_time": "2021-10-11T02:28:43Z", + "unbinned_contig_num": 3659, + "started_at_time": "2021-10-11T02:28:05Z", "type": "nmdc:MAGsAnalysisActivity", - "ended_at_time": "2021-10-11T04:20:07+00:00", - "output_data_objects": [ - { - "name": "gold:Gp0452679_metabat2 bin checkm quality assessment result", - "description": "metabat2 bin checkm quality assessment result for gold:Gp0452679", - "data_object_type": "CheckM Statistics", - "url": "https://data.microbiomedata.org/data/nmdc:mga0fsjy84/MAGs/nmdc_mga0fsjy84_checkm_qa.out", - "md5_checksum": "d41d8cd98f00b204e9800998ecf8427e", - "id": "nmdc:d41d8cd98f00b204e9800998ecf8427e", - "file_size_bytes": 0 - }, - { - "name": "Gp0115669_tooShort (< 3kb) filtered contigs fasta file by metaBat2", - "description": "tooShort (< 3kb) filtered contigs fasta file by metaBat2 for Gp0115669", - "url": "https://data.microbiomedata.org/data/nmdc:mga0k85x37/MAGs/nmdc_mga0k85x37_bins.tooShort.fa", - "md5_checksum": "420b015f88d0b88ab582805f39ed2b47", - "id": "nmdc:420b015f88d0b88ab582805f39ed2b47", - "file_size_bytes": 44979790 - }, - { - "name": "Gp0115669_unbinned fasta file from metabat2", - "description": "unbinned fasta file from metabat2 for Gp0115669", - "url": "https://data.microbiomedata.org/data/nmdc:mga0k85x37/MAGs/nmdc_mga0k85x37_bins.unbinned.fa", - "md5_checksum": "ee8a556be3a57008c1c05ff9fe83437e", - "id": "nmdc:ee8a556be3a57008c1c05ff9fe83437e", - "file_size_bytes": 10530111 - }, - { - "name": "Gp0115669_metabat2 bin checkm quality assessment result", - "description": "metabat2 bin checkm quality assessment result for Gp0115669", - "data_object_type": "CheckM Statistics", - "url": "https://data.microbiomedata.org/data/nmdc:mga0k85x37/MAGs/nmdc_mga0k85x37_checkm_qa.out", - "md5_checksum": "6fd5dfbd1500a60620194b5b9a4aab8a", - "id": "nmdc:6fd5dfbd1500a60620194b5b9a4aab8a", - "file_size_bytes": 1190 - }, - { - "name": "Gp0115669_high-quality and medium-quality bins", - "description": "high-quality and medium-quality bins for Gp0115669", - "data_object_type": "Metagenome Bins", - "url": "https://data.microbiomedata.org/data/nmdc:mga0k85x37/MAGs/nmdc_mga0k85x37_hqmq_bin.zip", - "md5_checksum": "6a7eb248822ec0994ddeffe8b5aae7b1", - "id": "nmdc:6a7eb248822ec0994ddeffe8b5aae7b1", - "file_size_bytes": 681479 - }, - { - "name": "Gp0115669_metabat2 bins", - "description": "metabat2 bins for Gp0115669", - "url": "https://data.microbiomedata.org/data/nmdc:mga0k85x37/MAGs/nmdc_mga0k85x37_metabat_bin.zip", - "md5_checksum": "6a80769f6812a45615890cc2b03e9abf", - "id": "nmdc:6a80769f6812a45615890cc2b03e9abf", - "file_size_bytes": 359752 - } - ] + "ended_at_time": "2021-10-11T03:25:21+00:00" } - ] - }, - { - "_id": { - "$oid": "649b009773e8249959349b53" - }, - "id": "nmdc:omprc-11-qsxwf517", - "name": "Sand microcosm microbial communities from a hyporheic zone in Columbia River, Washington, USA - GW-RW T4_14-Oct-14", - "description": "Sterilized sand packs were incubated back in the ground and collected at time point T4.", - "has_input": [ - "nmdc:bsm-11-sdhyr752" - ], - "has_output": [ - "jgi:55d7402b0d8785342fcf7e3c" - ], - "part_of": [ - "nmdc:sty-11-aygzgv51" - ], - "add_date": "2015-05-28", - "mod_date": "2021-06-15", - "ncbi_project_name": "Sand microcosm microbial communities from a hyporheic zone in Columbia River, Washington, USA - GW-RW T4_14-Oct-14", - "omics_type": { - "has_raw_value": "Metagenome" - }, - "principal_investigator": { - "has_raw_value": "James Stegen" - }, - "processing_institution": "JGI", - "type": "nmdc:OmicsProcessing", - "gold_sequencing_project_identifiers": [ - "gold:Gp0115672" - ], - "downstream_workflow_activity_records": [ + ], + "material_sample_set": [], + "material_sampling_activity_set": [], + "metabolomics_analysis_activity_set": [], + "metagenome_annotation_activity_set": [ { "_id": { - "$oid": "649b009d6bdd4fd20273c889" + "$oid": "649b005bbf2caae0415ef9c2" }, "has_input": [ - "nmdc:1f6998a48aec6f4008a92d2b8e17d314" + "nmdc:dd5cad9348fc41cb18ac989185fed0b5" ], "part_of": [ - "nmdc:mga0cwhj53" + "nmdc:mga0vf2h47" ], "git_url": "https://github.com/microbiomedata/metaG/releases/tag/0.1", "has_output": [ - "nmdc:eb516fb673793f5161fb634fc19de310", - "nmdc:f4b68d1bd25f8d2fa8986aeef5fbec3f" + "nmdc:93ea50ce57263b498b781240c04dbf46", + "nmdc:71195b9bc697bf29cd865718a689eb1b", + "nmdc:d8cccd9c5cd237c238e5ba443c477db5", + "nmdc:1cb17c4c7681345f53a7f4ef5c319fba", + "nmdc:17e386be26f52833c463a89733ef2e34", + "nmdc:3e9b2fd11f2f5c16f9f25560e3b6fc55", + "nmdc:b11e36753299e36fa92670cf75165698", + "nmdc:70ac1de5fbc6cc835d5a0d1855f7a28a", + "nmdc:b9e3eb74fa7fee0fac886f8a436b9ecf", + "nmdc:faa27c2be6dc56e66f739dbffcbb6bef", + "nmdc:b080e9d168c0c1330fda64814afe335b", + "nmdc:4ea799de0bc051409b7231801eea0129" ], - "was_informed_by": "gold:Gp0115672", - "input_read_count": 34522052, - "output_read_bases": 5012430912, - "id": "nmdc:50eb8825777d1294abac150521e5c2db", + "was_informed_by": "gold:Gp0115675", + "id": "nmdc:4cfac32b9cd7a8dd01d49117e1078a79", "execution_resource": "NERSC-Cori", - "input_read_bases": 5212829852, - "name": "Read QC Activity for nmdc:mga0cwhj53", - "output_read_count": 33454554, - "started_at_time": "2021-10-11T02:28:16Z", - "type": "nmdc:ReadQCAnalysisActivity", - "ended_at_time": "2021-10-11T05:56:20+00:00", - "output_data_objects": [ - { - "name": "Gp0115672_Filtered Reads", - "description": "Filtered Reads for Gp0115672", - "data_object_type": "Filtered Sequencing Reads", - "url": "https://data.microbiomedata.org/data/nmdc:mga0cwhj53/qa/nmdc_mga0cwhj53_filtered.fastq.gz", - "md5_checksum": "eb516fb673793f5161fb634fc19de310", - "id": "nmdc:eb516fb673793f5161fb634fc19de310", - "file_size_bytes": 2704299418 - }, - { - "name": "Gp0115672_Filtered Stats", - "description": "Filtered Stats for Gp0115672", - "data_object_type": "QC Statistics", - "url": "https://data.microbiomedata.org/data/nmdc:mga0cwhj53/qa/nmdc_mga0cwhj53_filterStats.txt", - "md5_checksum": "f4b68d1bd25f8d2fa8986aeef5fbec3f", - "id": "nmdc:f4b68d1bd25f8d2fa8986aeef5fbec3f", - "file_size_bytes": 290 - } - ] - }, + "name": "Annotation Activity for nmdc:mga0vf2h47", + "started_at_time": "2021-10-11T02:28:05Z", + "type": "nmdc:MetagenomeAnnotationActivity", + "ended_at_time": "2021-10-11T03:25:21+00:00" + } + ], + "metagenome_assembly_set": [ { "_id": { - "$oid": "649b009bff710ae353f8cf52" + "$oid": "649b005f2ca5ee4adb139faf" }, "has_input": [ - "nmdc:eb516fb673793f5161fb634fc19de310" + "nmdc:54e3a71218d04224719e0dc8a7fdf9c7" + ], + "part_of": [ + "nmdc:mga0vf2h47" ], + "ctg_logsum": 115425, + "scaf_logsum": 116377, + "gap_pct": 0.00425, "git_url": "https://github.com/microbiomedata/metaG/releases/tag/0.1", "has_output": [ - "nmdc:5a9326e2e450663a5ed8c97389136b25", - "nmdc:6044f2e33e0dd3e951484e9c50ae10f4", - "nmdc:39a46887587926c9b81e126bb1036005", - "nmdc:b8dde2c047141d9097317c86f723eded", - "nmdc:d530342b37f0785f92650e9650f31d6a", - "nmdc:6672aa851b5d39d7381211232b4f6cb2", - "nmdc:61e3c875231ae8999b5aa1dbf7d55cca", - "nmdc:3049835ed4e3533acce49e9cc60b03fc", - "nmdc:3266e79813577aae1d4377c62e73332c" + "nmdc:dd5cad9348fc41cb18ac989185fed0b5", + "nmdc:6d02084941141ac9a1876c621a50aef0", + "nmdc:cc8faed3494579d793c08ede54cb5b3a", + "nmdc:8891e46c9766f2b84d45fd6e46078a64", + "nmdc:80470769e7531b46c709d12c65487ffe" ], - "was_informed_by": "gold:Gp0115672", - "id": "nmdc:50eb8825777d1294abac150521e5c2db", + "asm_score": 4.718, + "was_informed_by": "gold:Gp0115675", + "ctg_powsum": 13174, + "scaf_max": 25635, + "id": "nmdc:4cfac32b9cd7a8dd01d49117e1078a79", + "scaf_powsum": 13311, "execution_resource": "NERSC-Cori", - "name": "ReadBased Analysis Activity for nmdc:mga0cwhj53", - "started_at_time": "2021-10-11T02:28:16Z", - "type": "nmdc:ReadbasedAnalysis", - "ended_at_time": "2021-10-11T05:56:20+00:00", - "output_data_objects": [ - { - "name": "Gp0115672_Gottcha2 TSV report", - "description": "Gottcha2 TSV report for Gp0115672", - "url": "https://data.microbiomedata.org/data/nmdc:mga0cwhj53/ReadbasedAnalysis/nmdc_mga0cwhj53_gottcha2_report.tsv", - "md5_checksum": "5a9326e2e450663a5ed8c97389136b25", - "id": "nmdc:5a9326e2e450663a5ed8c97389136b25", - "file_size_bytes": 15806 - }, - { - "name": "Gp0115672_Gottcha2 full TSV report", - "description": "Gottcha2 full TSV report for Gp0115672", - "url": "https://data.microbiomedata.org/data/nmdc:mga0cwhj53/ReadbasedAnalysis/nmdc_mga0cwhj53_gottcha2_report_full.tsv", - "md5_checksum": "6044f2e33e0dd3e951484e9c50ae10f4", - "id": "nmdc:6044f2e33e0dd3e951484e9c50ae10f4", - "file_size_bytes": 1142479 - }, - { - "name": "Gp0115672_Gottcha2 Krona HTML report", - "description": "Gottcha2 Krona HTML report for Gp0115672", - "data_object_type": "GOTTCHA2 Krona Plot", - "url": "https://data.microbiomedata.org/data/nmdc:mga0cwhj53/ReadbasedAnalysis/nmdc_mga0cwhj53_gottcha2_krona.html", - "md5_checksum": "39a46887587926c9b81e126bb1036005", - "id": "nmdc:39a46887587926c9b81e126bb1036005", - "file_size_bytes": 273611 - }, - { - "name": "Gp0115672_Centrifuge classification TSV report", - "description": "Centrifuge classification TSV report for Gp0115672", - "data_object_type": "Centrifuge Taxonomic Classification", - "url": "https://data.microbiomedata.org/data/nmdc:mga0cwhj53/ReadbasedAnalysis/nmdc_mga0cwhj53_centrifuge_classification.tsv", - "md5_checksum": "b8dde2c047141d9097317c86f723eded", - "id": "nmdc:b8dde2c047141d9097317c86f723eded", - "file_size_bytes": 2436637487 - }, - { - "name": "Gp0115672_Centrifuge TSV report", - "description": "Centrifuge TSV report for Gp0115672", - "data_object_type": "Centrifuge Classification Report", - "url": "https://data.microbiomedata.org/data/nmdc:mga0cwhj53/ReadbasedAnalysis/nmdc_mga0cwhj53_centrifuge_report.tsv", - "md5_checksum": "d530342b37f0785f92650e9650f31d6a", - "id": "nmdc:d530342b37f0785f92650e9650f31d6a", - "file_size_bytes": 261520 - }, - { - "name": "Gp0115672_Centrifuge Krona HTML report", - "description": "Centrifuge Krona HTML report for Gp0115672", - "data_object_type": "Centrifuge Krona Plot", - "url": "https://data.microbiomedata.org/data/nmdc:mga0cwhj53/ReadbasedAnalysis/nmdc_mga0cwhj53_centrifuge_krona.html", - "md5_checksum": "6672aa851b5d39d7381211232b4f6cb2", - "id": "nmdc:6672aa851b5d39d7381211232b4f6cb2", - "file_size_bytes": 2342832 - }, - { - "name": "Gp0115672_Kraken classification TSV report", - "description": "Kraken classification TSV report for Gp0115672", - "data_object_type": "Kraken2 Taxonomic Classification", - "url": "https://data.microbiomedata.org/data/nmdc:mga0cwhj53/ReadbasedAnalysis/nmdc_mga0cwhj53_kraken2_classification.tsv", - "md5_checksum": "61e3c875231ae8999b5aa1dbf7d55cca", - "id": "nmdc:61e3c875231ae8999b5aa1dbf7d55cca", - "file_size_bytes": 1993150715 - }, - { - "name": "Gp0115672_Kraken2 TSV report", - "description": "Kraken2 TSV report for Gp0115672", - "data_object_type": "Kraken2 Classification Report", - "url": "https://data.microbiomedata.org/data/nmdc:mga0cwhj53/ReadbasedAnalysis/nmdc_mga0cwhj53_kraken2_report.tsv", - "md5_checksum": "3049835ed4e3533acce49e9cc60b03fc", - "id": "nmdc:3049835ed4e3533acce49e9cc60b03fc", - "file_size_bytes": 693572 - }, - { - "name": "Gp0115672_Kraken2 Krona HTML report", - "description": "Kraken2 Krona HTML report for Gp0115672", - "data_object_type": "Kraken2 Krona Plot", - "url": "https://data.microbiomedata.org/data/nmdc:mga0cwhj53/ReadbasedAnalysis/nmdc_mga0cwhj53_kraken2_krona.html", - "md5_checksum": "3266e79813577aae1d4377c62e73332c", - "id": "nmdc:3266e79813577aae1d4377c62e73332c", - "file_size_bytes": 4177114 - } - ] - }, + "contigs": 80858, + "name": "Assembly Activity for nmdc:mga0vf2h47", + "ctg_max": 25635, + "gc_std": 0.10716, + "contig_bp": 38571486, + "gc_avg": 0.56103, + "started_at_time": "2021-10-11T02:28:05Z", + "scaf_bp": 38573126, + "type": "nmdc:MetagenomeAssembly", + "scaffolds": 80703, + "ended_at_time": "2021-10-11T03:25:21+00:00", + "ctg_l50": 435, + "ctg_l90": 284, + "ctg_n50": 19932, + "ctg_n90": 68422, + "scaf_l50": 436, + "scaf_l90": 284, + "scaf_n50": 19754, + "scaf_n90": 68272 + } + ], + "metagenome_sequencing_activity_set": [], + "metaproteomics_analysis_activity_set": [], + "metatranscriptome_activity_set": [], + "nom_analysis_activity_set": [], + "omics_processing_set": [ { "_id": { - "$oid": "61e71a33833bcf838a701f34" + "$oid": "649b009773e8249959349b50" }, + "id": "nmdc:omprc-11-jk7zjz92", + "name": "Sand microcosm microbial communities from a hyporheic zone in Columbia River, Washington, USA- GW-RW T4_22-July-14", + "description": "Sterilized sand packs were incubated back in the ground and collected at time point T4.", "has_input": [ - "nmdc:eb516fb673793f5161fb634fc19de310" - ], - "part_of": [ - "nmdc:mga0cwhj53" + "nmdc:bsm-11-a5d23e19" ], - "git_url": "https://github.com/microbiomedata/metaG/releases/tag/0.1", "has_output": [ - "nmdc:5a9326e2e450663a5ed8c97389136b25", - "nmdc:6044f2e33e0dd3e951484e9c50ae10f4", - "nmdc:39a46887587926c9b81e126bb1036005", - "nmdc:b8dde2c047141d9097317c86f723eded", - "nmdc:d530342b37f0785f92650e9650f31d6a", - "nmdc:6672aa851b5d39d7381211232b4f6cb2", - "nmdc:61e3c875231ae8999b5aa1dbf7d55cca", - "nmdc:3049835ed4e3533acce49e9cc60b03fc", - "nmdc:3266e79813577aae1d4377c62e73332c" + "jgi:55d817f30d8785342fcf826d" ], - "was_informed_by": "gold:Gp0115672", - "id": "nmdc:50eb8825777d1294abac150521e5c2db", - "execution_resource": "NERSC-Cori", - "name": "ReadBased Analysis Activity for nmdc:mga0cwhj53", - "started_at_time": "2021-10-11T02:28:16Z", - "type": "nmdc:ReadbasedAnalysis", - "ended_at_time": "2021-10-11T05:56:20+00:00", - "output_data_objects": [ - { - "name": "Gp0115672_Gottcha2 TSV report", - "description": "Gottcha2 TSV report for Gp0115672", - "url": "https://data.microbiomedata.org/data/nmdc:mga0cwhj53/ReadbasedAnalysis/nmdc_mga0cwhj53_gottcha2_report.tsv", - "md5_checksum": "5a9326e2e450663a5ed8c97389136b25", - "id": "nmdc:5a9326e2e450663a5ed8c97389136b25", - "file_size_bytes": 15806 - }, - { - "name": "Gp0115672_Gottcha2 full TSV report", - "description": "Gottcha2 full TSV report for Gp0115672", - "url": "https://data.microbiomedata.org/data/nmdc:mga0cwhj53/ReadbasedAnalysis/nmdc_mga0cwhj53_gottcha2_report_full.tsv", - "md5_checksum": "6044f2e33e0dd3e951484e9c50ae10f4", - "id": "nmdc:6044f2e33e0dd3e951484e9c50ae10f4", - "file_size_bytes": 1142479 - }, - { - "name": "Gp0115672_Gottcha2 Krona HTML report", - "description": "Gottcha2 Krona HTML report for Gp0115672", - "data_object_type": "GOTTCHA2 Krona Plot", - "url": "https://data.microbiomedata.org/data/nmdc:mga0cwhj53/ReadbasedAnalysis/nmdc_mga0cwhj53_gottcha2_krona.html", - "md5_checksum": "39a46887587926c9b81e126bb1036005", - "id": "nmdc:39a46887587926c9b81e126bb1036005", - "file_size_bytes": 273611 - }, - { - "name": "Gp0115672_Centrifuge classification TSV report", - "description": "Centrifuge classification TSV report for Gp0115672", - "data_object_type": "Centrifuge Taxonomic Classification", - "url": "https://data.microbiomedata.org/data/nmdc:mga0cwhj53/ReadbasedAnalysis/nmdc_mga0cwhj53_centrifuge_classification.tsv", - "md5_checksum": "b8dde2c047141d9097317c86f723eded", - "id": "nmdc:b8dde2c047141d9097317c86f723eded", - "file_size_bytes": 2436637487 - }, - { - "name": "Gp0115672_Centrifuge TSV report", - "description": "Centrifuge TSV report for Gp0115672", - "data_object_type": "Centrifuge Classification Report", - "url": "https://data.microbiomedata.org/data/nmdc:mga0cwhj53/ReadbasedAnalysis/nmdc_mga0cwhj53_centrifuge_report.tsv", - "md5_checksum": "d530342b37f0785f92650e9650f31d6a", - "id": "nmdc:d530342b37f0785f92650e9650f31d6a", - "file_size_bytes": 261520 - }, - { - "name": "Gp0115672_Centrifuge Krona HTML report", - "description": "Centrifuge Krona HTML report for Gp0115672", - "data_object_type": "Centrifuge Krona Plot", - "url": "https://data.microbiomedata.org/data/nmdc:mga0cwhj53/ReadbasedAnalysis/nmdc_mga0cwhj53_centrifuge_krona.html", - "md5_checksum": "6672aa851b5d39d7381211232b4f6cb2", - "id": "nmdc:6672aa851b5d39d7381211232b4f6cb2", - "file_size_bytes": 2342832 - }, - { - "name": "Gp0115672_Kraken classification TSV report", - "description": "Kraken classification TSV report for Gp0115672", - "data_object_type": "Kraken2 Taxonomic Classification", - "url": "https://data.microbiomedata.org/data/nmdc:mga0cwhj53/ReadbasedAnalysis/nmdc_mga0cwhj53_kraken2_classification.tsv", - "md5_checksum": "61e3c875231ae8999b5aa1dbf7d55cca", - "id": "nmdc:61e3c875231ae8999b5aa1dbf7d55cca", - "file_size_bytes": 1993150715 - }, - { - "name": "Gp0115672_Kraken2 TSV report", - "description": "Kraken2 TSV report for Gp0115672", - "data_object_type": "Kraken2 Classification Report", - "url": "https://data.microbiomedata.org/data/nmdc:mga0cwhj53/ReadbasedAnalysis/nmdc_mga0cwhj53_kraken2_report.tsv", - "md5_checksum": "3049835ed4e3533acce49e9cc60b03fc", - "id": "nmdc:3049835ed4e3533acce49e9cc60b03fc", - "file_size_bytes": 693572 - }, - { - "name": "Gp0115672_Kraken2 Krona HTML report", - "description": "Kraken2 Krona HTML report for Gp0115672", - "data_object_type": "Kraken2 Krona Plot", - "url": "https://data.microbiomedata.org/data/nmdc:mga0cwhj53/ReadbasedAnalysis/nmdc_mga0cwhj53_kraken2_krona.html", - "md5_checksum": "3266e79813577aae1d4377c62e73332c", - "id": "nmdc:3266e79813577aae1d4377c62e73332c", - "file_size_bytes": 4177114 - } + "part_of": [ + "nmdc:sty-11-aygzgv51" + ], + "add_date": "2015-05-28", + "mod_date": "2021-06-15", + "ncbi_project_name": "Sand microcosm microbial communities from a hyporheic zone in Columbia River, Washington, USA- GW-RW T4_22-July-14", + "omics_type": { + "has_raw_value": "Metagenome" + }, + "principal_investigator": { + "has_raw_value": "James Stegen" + }, + "processing_institution": "JGI", + "type": "nmdc:OmicsProcessing", + "gold_sequencing_project_identifiers": [ + "gold:Gp0115675" ] - }, + } + ], + "reaction_activity_set": [], + "read_qc_analysis_activity_set": [ { "_id": { - "$oid": "649b005f2ca5ee4adb139fbf" + "$oid": "649b009d6bdd4fd20273c87d" }, "has_input": [ - "nmdc:eb516fb673793f5161fb634fc19de310" + "nmdc:4a9a0183b794a98c57e5b5ce959a3f65" ], "part_of": [ - "nmdc:mga0cwhj53" + "nmdc:mga0vf2h47" ], - "ctg_logsum": 447149, - "scaf_logsum": 448446, - "gap_pct": 0.0019, "git_url": "https://github.com/microbiomedata/metaG/releases/tag/0.1", "has_output": [ - "nmdc:6f762f7b079f8c2633ef674a8264879f", - "nmdc:26cc1c91f5f5e79d50041ff4623398b5", - "nmdc:bd9d5497c4e2e0ea61df1f3f239107f7", - "nmdc:362a9857666fe2f4e90bf6a818f551cc", - "nmdc:afd1d03b38bc5deb9c196264bcea8795" + "nmdc:54e3a71218d04224719e0dc8a7fdf9c7", + "nmdc:2507e3f107100ce0c72c57191d450818" ], - "asm_score": 13.127, - "was_informed_by": "gold:Gp0115672", - "ctg_powsum": 55923, - "scaf_max": 157008, - "id": "nmdc:50eb8825777d1294abac150521e5c2db", - "scaf_powsum": 56113, + "was_informed_by": "gold:Gp0115675", + "input_read_count": 18827380, + "output_read_bases": 2508839784, + "id": "nmdc:4cfac32b9cd7a8dd01d49117e1078a79", "execution_resource": "NERSC-Cori", - "contigs": 221046, - "name": "Assembly Activity for nmdc:mga0cwhj53", - "ctg_max": 157008, - "gc_std": 0.10619, - "contig_bp": 120471215, - "gc_avg": 0.56196, - "started_at_time": "2021-10-11T02:28:16Z", - "scaf_bp": 120473505, - "type": "nmdc:MetagenomeAssembly", - "scaffolds": 220853, - "ended_at_time": "2021-10-11T05:56:20+00:00", - "ctg_l50": 528, - "ctg_l90": 293, - "ctg_n50": 48327, - "ctg_n90": 178881, - "scaf_l50": 529, - "scaf_l90": 293, - "scaf_n50": 48077, - "scaf_n90": 178708, - "scaf_l_gt50k": 2147966, - "scaf_n_gt50k": 28, - "scaf_pct_gt50k": 1.7829365, - "output_data_objects": [ - { - "name": "Gp0115672_Assembled contigs fasta", - "description": "Assembled contigs fasta for Gp0115672", - "data_object_type": "Assembly Contigs", - "url": "https://data.microbiomedata.org/data/nmdc:mga0cwhj53/assembly/nmdc_mga0cwhj53_contigs.fna", - "md5_checksum": "6f762f7b079f8c2633ef674a8264879f", - "id": "nmdc:6f762f7b079f8c2633ef674a8264879f", - "file_size_bytes": 129321165 - }, - { - "name": "Gp0115672_Assembled scaffold fasta", - "description": "Assembled scaffold fasta for Gp0115672", - "data_object_type": "Assembly Scaffolds", - "url": "https://data.microbiomedata.org/data/nmdc:mga0cwhj53/assembly/nmdc_mga0cwhj53_scaffolds.fna", - "md5_checksum": "26cc1c91f5f5e79d50041ff4623398b5", - "id": "nmdc:26cc1c91f5f5e79d50041ff4623398b5", - "file_size_bytes": 128655263 - }, - { - "name": "Gp0115672_Metagenome Contig Coverage Stats", - "description": "Metagenome Contig Coverage Stats for Gp0115672", - "url": "https://data.microbiomedata.org/data/nmdc:mga0cwhj53/assembly/nmdc_mga0cwhj53_covstats.txt", - "md5_checksum": "bd9d5497c4e2e0ea61df1f3f239107f7", - "id": "nmdc:bd9d5497c4e2e0ea61df1f3f239107f7", - "file_size_bytes": 17496249 - }, - { - "name": "Gp0115672_Assembled AGP file", - "description": "Assembled AGP file for Gp0115672", - "data_object_type": "Assembly AGP", - "url": "https://data.microbiomedata.org/data/nmdc:mga0cwhj53/assembly/nmdc_mga0cwhj53_assembly.agp", - "md5_checksum": "362a9857666fe2f4e90bf6a818f551cc", - "id": "nmdc:362a9857666fe2f4e90bf6a818f551cc", - "file_size_bytes": 16401188 - }, - { - "name": "Gp0115672_Metagenome Alignment BAM file", - "description": "Metagenome Alignment BAM file for Gp0115672", - "data_object_type": "Assembly Coverage BAM", - "url": "https://data.microbiomedata.org/data/nmdc:mga0cwhj53/assembly/nmdc_mga0cwhj53_pairedMapped_sorted.bam", - "md5_checksum": "afd1d03b38bc5deb9c196264bcea8795", - "id": "nmdc:afd1d03b38bc5deb9c196264bcea8795", - "file_size_bytes": 2952467259 - } - ] - }, + "input_read_bases": 2842934380, + "name": "Read QC Activity for nmdc:mga0vf2h47", + "output_read_count": 16749572, + "started_at_time": "2021-10-11T02:28:05Z", + "type": "nmdc:ReadQCAnalysisActivity", + "ended_at_time": "2021-10-11T03:25:21+00:00" + } + ], + "read_based_taxonomy_analysis_activity_set": [ { "_id": { - "$oid": "649b005bbf2caae0415ef9d1" + "$oid": "649b009bff710ae353f8cf41" }, "has_input": [ - "nmdc:6f762f7b079f8c2633ef674a8264879f" - ], - "part_of": [ - "nmdc:mga0cwhj53" + "nmdc:54e3a71218d04224719e0dc8a7fdf9c7" ], "git_url": "https://github.com/microbiomedata/metaG/releases/tag/0.1", "has_output": [ - "nmdc:84e3590be0f59007275fdf459d464f74", - "nmdc:7dd630b842f587768235714e8a95f377", - "nmdc:38d776837c2208b557e2e4e5428c879d", - "nmdc:e38cb3355892042cb02580c26c083cd9", - "nmdc:d55119e8f094efa075c44b22e8b2f689", - "nmdc:02a9ad5732172f04d1da83d145f63226", - "nmdc:73811b72087e57f23db32f4a0ca4fb9c", - "nmdc:dfc18c0f97e80c14ca6ca1bc2ba7a809", - "nmdc:5a843529ffac8227515c5ea399ee4815", - "nmdc:82ac29a9999c6bc097cb0f35e4177e35", - "nmdc:5b0e8395559ef0d8a341ae0e132e60f6", - "nmdc:1e74c3df751a59a34e5c0d87f4a37563" + "nmdc:60d673988c4f4447feb5985e8501e914", + "nmdc:a8f93ed13033eb949109b4e83980a893", + "nmdc:31dd6eb616f1e9815778453ab1601195", + "nmdc:6d7a930d79f220b06cde8fbf8339e744", + "nmdc:0aaac507db0e29827e1c87df47324932", + "nmdc:6aec8677139ed24ef9cfe0c75b30056f", + "nmdc:d39369f32ada967d7cf52cb503fccf4a", + "nmdc:1ec0247d86889fcef13f39a58a92b066", + "nmdc:242a1c60f6cb14ba8430375171fda436" ], - "was_informed_by": "gold:Gp0115672", - "id": "nmdc:50eb8825777d1294abac150521e5c2db", + "was_informed_by": "gold:Gp0115675", + "id": "nmdc:4cfac32b9cd7a8dd01d49117e1078a79", "execution_resource": "NERSC-Cori", - "name": "Annotation Activity for nmdc:mga0cwhj53", - "started_at_time": "2021-10-11T02:28:16Z", - "type": "nmdc:MetagenomeAnnotationActivity", - "ended_at_time": "2021-10-11T05:56:20+00:00", - "output_data_objects": [ - { - "name": "Gp0115672_Protein FAA", - "description": "Protein FAA for Gp0115672", - "data_object_type": "Annotation Amino Acid FASTA", - "url": "https://data.microbiomedata.org/data/nmdc:mga0cwhj53/annotation/nmdc_mga0cwhj53_proteins.faa", - "md5_checksum": "84e3590be0f59007275fdf459d464f74", - "id": "nmdc:84e3590be0f59007275fdf459d464f74", - "file_size_bytes": 71651089 - }, - { - "name": "Gp0115672_Structural annotation GFF file", - "description": "Structural annotation GFF file for Gp0115672", - "data_object_type": "Structural Annotation GFF", - "url": "https://data.microbiomedata.org/data/nmdc:mga0cwhj53/annotation/nmdc_mga0cwhj53_structural_annotation.gff", - "md5_checksum": "7dd630b842f587768235714e8a95f377", - "id": "nmdc:7dd630b842f587768235714e8a95f377", - "file_size_bytes": 2534 - }, - { - "name": "Gp0115672_Functional annotation GFF file", - "description": "Functional annotation GFF file for Gp0115672", - "data_object_type": "Functional Annotation GFF", - "url": "https://data.microbiomedata.org/data/nmdc:mga0cwhj53/annotation/nmdc_mga0cwhj53_functional_annotation.gff", - "md5_checksum": "38d776837c2208b557e2e4e5428c879d", - "id": "nmdc:38d776837c2208b557e2e4e5428c879d", - "file_size_bytes": 78213025 - }, - { - "name": "Gp0115672_KO TSV file", - "description": "KO TSV file for Gp0115672", - "data_object_type": "Annotation KEGG Orthology", - "url": "https://data.microbiomedata.org/data/nmdc:mga0cwhj53/annotation/nmdc_mga0cwhj53_ko.tsv", - "md5_checksum": "e38cb3355892042cb02580c26c083cd9", - "id": "nmdc:e38cb3355892042cb02580c26c083cd9", - "file_size_bytes": 10621211 - }, - { - "name": "Gp0115672_EC TSV file", - "description": "EC TSV file for Gp0115672", - "data_object_type": "Annotation Enzyme Commission", - "url": "https://data.microbiomedata.org/data/nmdc:mga0cwhj53/annotation/nmdc_mga0cwhj53_ec.tsv", - "md5_checksum": "d55119e8f094efa075c44b22e8b2f689", - "id": "nmdc:d55119e8f094efa075c44b22e8b2f689", - "file_size_bytes": 6814564 - }, - { - "name": "Gp0115672_COG GFF file", - "description": "COG GFF file for Gp0115672", - "url": "https://data.microbiomedata.org/data/nmdc:mga0cwhj53/annotation/nmdc_mga0cwhj53_cog.gff", - "md5_checksum": "02a9ad5732172f04d1da83d145f63226", - "id": "nmdc:02a9ad5732172f04d1da83d145f63226", - "file_size_bytes": 45617917 - }, - { - "name": "Gp0115672_PFAM GFF file", - "description": "PFAM GFF file for Gp0115672", - "url": "https://data.microbiomedata.org/data/nmdc:mga0cwhj53/annotation/nmdc_mga0cwhj53_pfam.gff", - "md5_checksum": "73811b72087e57f23db32f4a0ca4fb9c", - "id": "nmdc:73811b72087e57f23db32f4a0ca4fb9c", - "file_size_bytes": 37040943 - }, - { - "name": "Gp0115672_TigrFam GFF file", - "description": "TigrFam GFF file for Gp0115672", - "url": "https://data.microbiomedata.org/data/nmdc:mga0cwhj53/annotation/nmdc_mga0cwhj53_tigrfam.gff", - "md5_checksum": "dfc18c0f97e80c14ca6ca1bc2ba7a809", - "id": "nmdc:dfc18c0f97e80c14ca6ca1bc2ba7a809", - "file_size_bytes": 5380314 - }, - { - "name": "Gp0115672_SMART GFF file", - "description": "SMART GFF file for Gp0115672", - "url": "https://data.microbiomedata.org/data/nmdc:mga0cwhj53/annotation/nmdc_mga0cwhj53_smart.gff", - "md5_checksum": "5a843529ffac8227515c5ea399ee4815", - "id": "nmdc:5a843529ffac8227515c5ea399ee4815", - "file_size_bytes": 10141642 - }, - { - "name": "Gp0115672_SuperFam GFF file", - "description": "SuperFam GFF file for Gp0115672", - "url": "https://data.microbiomedata.org/data/nmdc:mga0cwhj53/annotation/nmdc_mga0cwhj53_supfam.gff", - "md5_checksum": "82ac29a9999c6bc097cb0f35e4177e35", - "id": "nmdc:82ac29a9999c6bc097cb0f35e4177e35", - "file_size_bytes": 56808220 - }, - { - "name": "Gp0115672_Cath FunFam GFF file", - "description": "Cath FunFam GFF file for Gp0115672", - "url": "https://data.microbiomedata.org/data/nmdc:mga0cwhj53/annotation/nmdc_mga0cwhj53_cath_funfam.gff", - "md5_checksum": "5b0e8395559ef0d8a341ae0e132e60f6", - "id": "nmdc:5b0e8395559ef0d8a341ae0e132e60f6", - "file_size_bytes": 45632833 - }, - { - "name": "Gp0115672_KO_EC GFF file", - "description": "KO_EC GFF file for Gp0115672", - "url": "https://data.microbiomedata.org/data/nmdc:mga0cwhj53/annotation/nmdc_mga0cwhj53_ko_ec.gff", - "md5_checksum": "1e74c3df751a59a34e5c0d87f4a37563", - "id": "nmdc:1e74c3df751a59a34e5c0d87f4a37563", - "file_size_bytes": 33782864 - } - ] + "name": "ReadBased Analysis Activity for nmdc:mga0vf2h47", + "started_at_time": "2021-10-11T02:28:05Z", + "type": "nmdc:ReadbasedAnalysis", + "ended_at_time": "2021-10-11T03:25:21+00:00" + } + ], + "study_set": [], + "field_research_site_set": [], + "collecting_biosamples_from_site_set": [], + "date_created": null, + "etl_software_version": null, + "pooling_set": [], + "read_based_analysis_activity_set": [ + { + "_id": { + "$oid": "61e719dc833bcf838a7014d6" + }, + "has_input": [ + "nmdc:54e3a71218d04224719e0dc8a7fdf9c7" + ], + "part_of": [ + "nmdc:mga0vf2h47" + ], + "git_url": "https://github.com/microbiomedata/metaG/releases/tag/0.1", + "has_output": [ + "nmdc:60d673988c4f4447feb5985e8501e914", + "nmdc:a8f93ed13033eb949109b4e83980a893", + "nmdc:31dd6eb616f1e9815778453ab1601195", + "nmdc:6d7a930d79f220b06cde8fbf8339e744", + "nmdc:0aaac507db0e29827e1c87df47324932", + "nmdc:6aec8677139ed24ef9cfe0c75b30056f", + "nmdc:d39369f32ada967d7cf52cb503fccf4a", + "nmdc:1ec0247d86889fcef13f39a58a92b066", + "nmdc:242a1c60f6cb14ba8430375171fda436" + ], + "was_informed_by": "gold:Gp0115675", + "id": "nmdc:4cfac32b9cd7a8dd01d49117e1078a79", + "execution_resource": "NERSC-Cori", + "name": "ReadBased Analysis Activity for nmdc:mga0vf2h47", + "started_at_time": "2021-10-11T02:28:05Z", + "type": "nmdc:ReadbasedAnalysis", + "ended_at_time": "2021-10-11T03:25:21+00:00" + } + ] + }, + { + "functional_annotation_agg": [], + "library_preparation_set": [], + "processed_sample_set": [], + "extraction_set": [], + "activity_set": [], + "biosample_set": [], + "data_object_set": [ + { + "name": "Gp0115665_Filtered Reads", + "description": "Filtered Reads for Gp0115665", + "data_object_type": "Filtered Sequencing Reads", + "url": "https://data.microbiomedata.org/data/nmdc:mga06n7k74/qa/nmdc_mga06n7k74_filtered.fastq.gz", + "md5_checksum": "b0462e18cf9dafc9d2207a58bf085530", + "id": "nmdc:b0462e18cf9dafc9d2207a58bf085530", + "file_size_bytes": 4096192298 + }, + { + "name": "Gp0115665_Filtered Stats", + "description": "Filtered Stats for Gp0115665", + "data_object_type": "QC Statistics", + "url": "https://data.microbiomedata.org/data/nmdc:mga06n7k74/qa/nmdc_mga06n7k74_filterStats.txt", + "md5_checksum": "f0e1b9004b0e9aafb06c444444a522c7", + "id": "nmdc:f0e1b9004b0e9aafb06c444444a522c7", + "file_size_bytes": 291 + }, + { + "name": "Gp0115665_Gottcha2 TSV report", + "description": "Gottcha2 TSV report for Gp0115665", + "url": "https://data.microbiomedata.org/data/nmdc:mga06n7k74/ReadbasedAnalysis/nmdc_mga06n7k74_gottcha2_report.tsv", + "md5_checksum": "432fedddcbacb4e69c0350354ab44080", + "id": "nmdc:432fedddcbacb4e69c0350354ab44080", + "file_size_bytes": 18015 + }, + { + "name": "Gp0115665_Gottcha2 full TSV report", + "description": "Gottcha2 full TSV report for Gp0115665", + "url": "https://data.microbiomedata.org/data/nmdc:mga06n7k74/ReadbasedAnalysis/nmdc_mga06n7k74_gottcha2_report_full.tsv", + "md5_checksum": "50b9a4c83b2ec0d1dd683cb8814ed5ad", + "id": "nmdc:50b9a4c83b2ec0d1dd683cb8814ed5ad", + "file_size_bytes": 1283220 + }, + { + "name": "Gp0115665_Gottcha2 Krona HTML report", + "description": "Gottcha2 Krona HTML report for Gp0115665", + "data_object_type": "GOTTCHA2 Krona Plot", + "url": "https://data.microbiomedata.org/data/nmdc:mga06n7k74/ReadbasedAnalysis/nmdc_mga06n7k74_gottcha2_krona.html", + "md5_checksum": "e3d7339ba5c7677be13854f391462474", + "id": "nmdc:e3d7339ba5c7677be13854f391462474", + "file_size_bytes": 281366 + }, + { + "name": "Gp0115665_Centrifuge classification TSV report", + "description": "Centrifuge classification TSV report for Gp0115665", + "data_object_type": "Centrifuge Taxonomic Classification", + "url": "https://data.microbiomedata.org/data/nmdc:mga06n7k74/ReadbasedAnalysis/nmdc_mga06n7k74_centrifuge_classification.tsv", + "md5_checksum": "7bf922ee2f9fc298c031e2ff7d5abe0d", + "id": "nmdc:7bf922ee2f9fc298c031e2ff7d5abe0d", + "file_size_bytes": 3481369185 + }, + { + "name": "Gp0115665_Centrifuge TSV report", + "description": "Centrifuge TSV report for Gp0115665", + "data_object_type": "Centrifuge Classification Report", + "url": "https://data.microbiomedata.org/data/nmdc:mga06n7k74/ReadbasedAnalysis/nmdc_mga06n7k74_centrifuge_report.tsv", + "md5_checksum": "33a20a77c3dc5b4feb102d66dfbfbe11", + "id": "nmdc:33a20a77c3dc5b4feb102d66dfbfbe11", + "file_size_bytes": 263480 + }, + { + "name": "Gp0115665_Centrifuge Krona HTML report", + "description": "Centrifuge Krona HTML report for Gp0115665", + "data_object_type": "Centrifuge Krona Plot", + "url": "https://data.microbiomedata.org/data/nmdc:mga06n7k74/ReadbasedAnalysis/nmdc_mga06n7k74_centrifuge_krona.html", + "md5_checksum": "30bdf0aedf771221ca3f7f18ff4e0067", + "id": "nmdc:30bdf0aedf771221ca3f7f18ff4e0067", + "file_size_bytes": 2347079 + }, + { + "name": "Gp0115665_Kraken classification TSV report", + "description": "Kraken classification TSV report for Gp0115665", + "data_object_type": "Kraken2 Taxonomic Classification", + "url": "https://data.microbiomedata.org/data/nmdc:mga06n7k74/ReadbasedAnalysis/nmdc_mga06n7k74_kraken2_classification.tsv", + "md5_checksum": "8e21ac30de17de0d1051d7d223d0aa0f", + "id": "nmdc:8e21ac30de17de0d1051d7d223d0aa0f", + "file_size_bytes": 2866138771 + }, + { + "name": "Gp0115665_Kraken2 TSV report", + "description": "Kraken2 TSV report for Gp0115665", + "data_object_type": "Kraken2 Classification Report", + "url": "https://data.microbiomedata.org/data/nmdc:mga06n7k74/ReadbasedAnalysis/nmdc_mga06n7k74_kraken2_report.tsv", + "md5_checksum": "64459bec7843953a70f8ea2b09a7e9de", + "id": "nmdc:64459bec7843953a70f8ea2b09a7e9de", + "file_size_bytes": 728030 + }, + { + "name": "Gp0115665_Kraken2 Krona HTML report", + "description": "Kraken2 Krona HTML report for Gp0115665", + "data_object_type": "Kraken2 Krona Plot", + "url": "https://data.microbiomedata.org/data/nmdc:mga06n7k74/ReadbasedAnalysis/nmdc_mga06n7k74_kraken2_krona.html", + "md5_checksum": "9aa0ec113eb8dd22e7f574216d1760b2", + "id": "nmdc:9aa0ec113eb8dd22e7f574216d1760b2", + "file_size_bytes": 4374689 + }, + { + "name": "Gp0115665_Gottcha2 TSV report", + "description": "Gottcha2 TSV report for Gp0115665", + "url": "https://data.microbiomedata.org/data/nmdc:mga06n7k74/ReadbasedAnalysis/nmdc_mga06n7k74_gottcha2_report.tsv", + "md5_checksum": "432fedddcbacb4e69c0350354ab44080", + "id": "nmdc:432fedddcbacb4e69c0350354ab44080", + "file_size_bytes": 18015 + }, + { + "name": "Gp0115665_Gottcha2 full TSV report", + "description": "Gottcha2 full TSV report for Gp0115665", + "url": "https://data.microbiomedata.org/data/nmdc:mga06n7k74/ReadbasedAnalysis/nmdc_mga06n7k74_gottcha2_report_full.tsv", + "md5_checksum": "50b9a4c83b2ec0d1dd683cb8814ed5ad", + "id": "nmdc:50b9a4c83b2ec0d1dd683cb8814ed5ad", + "file_size_bytes": 1283220 + }, + { + "name": "Gp0115665_Gottcha2 Krona HTML report", + "description": "Gottcha2 Krona HTML report for Gp0115665", + "data_object_type": "GOTTCHA2 Krona Plot", + "url": "https://data.microbiomedata.org/data/nmdc:mga06n7k74/ReadbasedAnalysis/nmdc_mga06n7k74_gottcha2_krona.html", + "md5_checksum": "e3d7339ba5c7677be13854f391462474", + "id": "nmdc:e3d7339ba5c7677be13854f391462474", + "file_size_bytes": 281366 + }, + { + "name": "Gp0115665_Centrifuge classification TSV report", + "description": "Centrifuge classification TSV report for Gp0115665", + "data_object_type": "Centrifuge Taxonomic Classification", + "url": "https://data.microbiomedata.org/data/nmdc:mga06n7k74/ReadbasedAnalysis/nmdc_mga06n7k74_centrifuge_classification.tsv", + "md5_checksum": "7bf922ee2f9fc298c031e2ff7d5abe0d", + "id": "nmdc:7bf922ee2f9fc298c031e2ff7d5abe0d", + "file_size_bytes": 3481369185 + }, + { + "name": "Gp0115665_Centrifuge TSV report", + "description": "Centrifuge TSV report for Gp0115665", + "data_object_type": "Centrifuge Classification Report", + "url": "https://data.microbiomedata.org/data/nmdc:mga06n7k74/ReadbasedAnalysis/nmdc_mga06n7k74_centrifuge_report.tsv", + "md5_checksum": "33a20a77c3dc5b4feb102d66dfbfbe11", + "id": "nmdc:33a20a77c3dc5b4feb102d66dfbfbe11", + "file_size_bytes": 263480 + }, + { + "name": "Gp0115665_Centrifuge Krona HTML report", + "description": "Centrifuge Krona HTML report for Gp0115665", + "data_object_type": "Centrifuge Krona Plot", + "url": "https://data.microbiomedata.org/data/nmdc:mga06n7k74/ReadbasedAnalysis/nmdc_mga06n7k74_centrifuge_krona.html", + "md5_checksum": "30bdf0aedf771221ca3f7f18ff4e0067", + "id": "nmdc:30bdf0aedf771221ca3f7f18ff4e0067", + "file_size_bytes": 2347079 + }, + { + "name": "Gp0115665_Kraken classification TSV report", + "description": "Kraken classification TSV report for Gp0115665", + "data_object_type": "Kraken2 Taxonomic Classification", + "url": "https://data.microbiomedata.org/data/nmdc:mga06n7k74/ReadbasedAnalysis/nmdc_mga06n7k74_kraken2_classification.tsv", + "md5_checksum": "8e21ac30de17de0d1051d7d223d0aa0f", + "id": "nmdc:8e21ac30de17de0d1051d7d223d0aa0f", + "file_size_bytes": 2866138771 + }, + { + "name": "Gp0115665_Kraken2 TSV report", + "description": "Kraken2 TSV report for Gp0115665", + "data_object_type": "Kraken2 Classification Report", + "url": "https://data.microbiomedata.org/data/nmdc:mga06n7k74/ReadbasedAnalysis/nmdc_mga06n7k74_kraken2_report.tsv", + "md5_checksum": "64459bec7843953a70f8ea2b09a7e9de", + "id": "nmdc:64459bec7843953a70f8ea2b09a7e9de", + "file_size_bytes": 728030 + }, + { + "name": "Gp0115665_Kraken2 Krona HTML report", + "description": "Kraken2 Krona HTML report for Gp0115665", + "data_object_type": "Kraken2 Krona Plot", + "url": "https://data.microbiomedata.org/data/nmdc:mga06n7k74/ReadbasedAnalysis/nmdc_mga06n7k74_kraken2_krona.html", + "md5_checksum": "9aa0ec113eb8dd22e7f574216d1760b2", + "id": "nmdc:9aa0ec113eb8dd22e7f574216d1760b2", + "file_size_bytes": 4374689 + }, + { + "name": "Gp0115665_Assembled contigs fasta", + "description": "Assembled contigs fasta for Gp0115665", + "data_object_type": "Assembly Contigs", + "url": "https://data.microbiomedata.org/data/nmdc:mga06n7k74/assembly/nmdc_mga06n7k74_contigs.fna", + "md5_checksum": "9704e757dc537a7f06c6f83fc633cf64", + "id": "nmdc:9704e757dc537a7f06c6f83fc633cf64", + "file_size_bytes": 185880663 + }, + { + "name": "Gp0115665_Assembled scaffold fasta", + "description": "Assembled scaffold fasta for Gp0115665", + "data_object_type": "Assembly Scaffolds", + "url": "https://data.microbiomedata.org/data/nmdc:mga06n7k74/assembly/nmdc_mga06n7k74_scaffolds.fna", + "md5_checksum": "2674db4e7e6171864fa47f0b3b5a9603", + "id": "nmdc:2674db4e7e6171864fa47f0b3b5a9603", + "file_size_bytes": 184819604 + }, + { + "name": "Gp0115665_Metagenome Contig Coverage Stats", + "description": "Metagenome Contig Coverage Stats for Gp0115665", + "url": "https://data.microbiomedata.org/data/nmdc:mga06n7k74/assembly/nmdc_mga06n7k74_covstats.txt", + "md5_checksum": "ab6c496a5e3ab895fee3812fd992e1e7", + "id": "nmdc:ab6c496a5e3ab895fee3812fd992e1e7", + "file_size_bytes": 27961807 + }, + { + "name": "Gp0115665_Assembled AGP file", + "description": "Assembled AGP file for Gp0115665", + "data_object_type": "Assembly AGP", + "url": "https://data.microbiomedata.org/data/nmdc:mga06n7k74/assembly/nmdc_mga06n7k74_assembly.agp", + "md5_checksum": "5a1240fa0a6bf92c95e852c0352e5839", + "id": "nmdc:5a1240fa0a6bf92c95e852c0352e5839", + "file_size_bytes": 26248242 + }, + { + "name": "Gp0115665_Metagenome Alignment BAM file", + "description": "Metagenome Alignment BAM file for Gp0115665", + "data_object_type": "Assembly Coverage BAM", + "url": "https://data.microbiomedata.org/data/nmdc:mga06n7k74/assembly/nmdc_mga06n7k74_pairedMapped_sorted.bam", + "md5_checksum": "e28c85b50e0b654626e655755165aff5", + "id": "nmdc:e28c85b50e0b654626e655755165aff5", + "file_size_bytes": 4460978045 + }, + { + "name": "Gp0115665_Protein FAA", + "description": "Protein FAA for Gp0115665", + "data_object_type": "Annotation Amino Acid FASTA", + "url": "https://data.microbiomedata.org/data/nmdc:mga06n7k74/annotation/nmdc_mga06n7k74_proteins.faa", + "md5_checksum": "2d23b05bda1c60f2ef6d54c8fe5fb5e7", + "id": "nmdc:2d23b05bda1c60f2ef6d54c8fe5fb5e7", + "file_size_bytes": 100719814 + }, + { + "name": "Gp0115665_Structural annotation GFF file", + "description": "Structural annotation GFF file for Gp0115665", + "data_object_type": "Structural Annotation GFF", + "url": "https://data.microbiomedata.org/data/nmdc:mga06n7k74/annotation/nmdc_mga06n7k74_structural_annotation.gff", + "md5_checksum": "6c55ce2e0d6e74d217d850b273c4f0c4", + "id": "nmdc:6c55ce2e0d6e74d217d850b273c4f0c4", + "file_size_bytes": 2534 + }, + { + "name": "Gp0115665_Functional annotation GFF file", + "description": "Functional annotation GFF file for Gp0115665", + "data_object_type": "Functional Annotation GFF", + "url": "https://data.microbiomedata.org/data/nmdc:mga06n7k74/annotation/nmdc_mga06n7k74_functional_annotation.gff", + "md5_checksum": "b3add25cdb76a537e70617ac6a1d1fc5", + "id": "nmdc:b3add25cdb76a537e70617ac6a1d1fc5", + "file_size_bytes": 110405026 + }, + { + "name": "Gp0115665_KO TSV file", + "description": "KO TSV file for Gp0115665", + "data_object_type": "Annotation KEGG Orthology", + "url": "https://data.microbiomedata.org/data/nmdc:mga06n7k74/annotation/nmdc_mga06n7k74_ko.tsv", + "md5_checksum": "b782707ae2cf5676596ca99800deea26", + "id": "nmdc:b782707ae2cf5676596ca99800deea26", + "file_size_bytes": 12963636 + }, + { + "name": "Gp0115665_EC TSV file", + "description": "EC TSV file for Gp0115665", + "data_object_type": "Annotation Enzyme Commission", + "url": "https://data.microbiomedata.org/data/nmdc:mga06n7k74/annotation/nmdc_mga06n7k74_ec.tsv", + "md5_checksum": "6a8565bf52f70efa03c755a9f0b82d7d", + "id": "nmdc:6a8565bf52f70efa03c755a9f0b82d7d", + "file_size_bytes": 8371381 }, + { + "name": "Gp0115665_COG GFF file", + "description": "COG GFF file for Gp0115665", + "url": "https://data.microbiomedata.org/data/nmdc:mga06n7k74/annotation/nmdc_mga06n7k74_cog.gff", + "md5_checksum": "f5d79b4c69825e0b66153e7582cb489b", + "id": "nmdc:f5d79b4c69825e0b66153e7582cb489b", + "file_size_bytes": 56948501 + }, + { + "name": "Gp0115665_PFAM GFF file", + "description": "PFAM GFF file for Gp0115665", + "url": "https://data.microbiomedata.org/data/nmdc:mga06n7k74/annotation/nmdc_mga06n7k74_pfam.gff", + "md5_checksum": "f66a0eaa9432ef5a2dd390214f47eed5", + "id": "nmdc:f66a0eaa9432ef5a2dd390214f47eed5", + "file_size_bytes": 45618277 + }, + { + "name": "Gp0115665_TigrFam GFF file", + "description": "TigrFam GFF file for Gp0115665", + "url": "https://data.microbiomedata.org/data/nmdc:mga06n7k74/annotation/nmdc_mga06n7k74_tigrfam.gff", + "md5_checksum": "26cc0a40aab6bfc64d24afa760b43102", + "id": "nmdc:26cc0a40aab6bfc64d24afa760b43102", + "file_size_bytes": 5245489 + }, + { + "name": "Gp0115665_SMART GFF file", + "description": "SMART GFF file for Gp0115665", + "url": "https://data.microbiomedata.org/data/nmdc:mga06n7k74/annotation/nmdc_mga06n7k74_smart.gff", + "md5_checksum": "83785a6e8f7658dc2354b9bad1b86d01", + "id": "nmdc:83785a6e8f7658dc2354b9bad1b86d01", + "file_size_bytes": 15993417 + }, + { + "name": "Gp0115665_SuperFam GFF file", + "description": "SuperFam GFF file for Gp0115665", + "url": "https://data.microbiomedata.org/data/nmdc:mga06n7k74/annotation/nmdc_mga06n7k74_supfam.gff", + "md5_checksum": "0f03207aa38d1aec8afdbf2bec1e4990", + "id": "nmdc:0f03207aa38d1aec8afdbf2bec1e4990", + "file_size_bytes": 76926960 + }, + { + "name": "Gp0115665_Cath FunFam GFF file", + "description": "Cath FunFam GFF file for Gp0115665", + "url": "https://data.microbiomedata.org/data/nmdc:mga06n7k74/annotation/nmdc_mga06n7k74_cath_funfam.gff", + "md5_checksum": "4876eed2bee3b3b7b2ac827857410be6", + "id": "nmdc:4876eed2bee3b3b7b2ac827857410be6", + "file_size_bytes": 61571084 + }, + { + "name": "Gp0115665_KO_EC GFF file", + "description": "KO_EC GFF file for Gp0115665", + "url": "https://data.microbiomedata.org/data/nmdc:mga06n7k74/annotation/nmdc_mga06n7k74_ko_ec.gff", + "md5_checksum": "bb5b62735a896d189c9a274c6e091bab", + "id": "nmdc:bb5b62735a896d189c9a274c6e091bab", + "file_size_bytes": 41244685 + }, + { + "name": "gold:Gp0452679_metabat2 bin checkm quality assessment result", + "description": "metabat2 bin checkm quality assessment result for gold:Gp0452679", + "data_object_type": "CheckM Statistics", + "url": "https://data.microbiomedata.org/data/nmdc:mga0fsjy84/MAGs/nmdc_mga0fsjy84_checkm_qa.out", + "md5_checksum": "d41d8cd98f00b204e9800998ecf8427e", + "id": "nmdc:d41d8cd98f00b204e9800998ecf8427e", + "file_size_bytes": 0 + }, + { + "name": "Gp0115665_tooShort (< 3kb) filtered contigs fasta file by metaBat2", + "description": "tooShort (< 3kb) filtered contigs fasta file by metaBat2 for Gp0115665", + "url": "https://data.microbiomedata.org/data/nmdc:mga06n7k74/MAGs/nmdc_mga06n7k74_bins.tooShort.fa", + "md5_checksum": "79794b0497c1a4a292778ddb94504f7a", + "id": "nmdc:79794b0497c1a4a292778ddb94504f7a", + "file_size_bytes": 146322768 + }, + { + "name": "Gp0115665_unbinned fasta file from metabat2", + "description": "unbinned fasta file from metabat2 for Gp0115665", + "url": "https://data.microbiomedata.org/data/nmdc:mga06n7k74/MAGs/nmdc_mga06n7k74_bins.unbinned.fa", + "md5_checksum": "e26dc245e491a521a94fbb9ab1b4293d", + "id": "nmdc:e26dc245e491a521a94fbb9ab1b4293d", + "file_size_bytes": 30116585 + }, + { + "name": "Gp0115665_metabat2 bin checkm quality assessment result", + "description": "metabat2 bin checkm quality assessment result for Gp0115665", + "data_object_type": "CheckM Statistics", + "url": "https://data.microbiomedata.org/data/nmdc:mga06n7k74/MAGs/nmdc_mga06n7k74_checkm_qa.out", + "md5_checksum": "45cb473694eb3cfa8abc7768e87ef303", + "id": "nmdc:45cb473694eb3cfa8abc7768e87ef303", + "file_size_bytes": 1700 + }, + { + "name": "Gp0115665_high-quality and medium-quality bins", + "description": "high-quality and medium-quality bins for Gp0115665", + "data_object_type": "Metagenome Bins", + "url": "https://data.microbiomedata.org/data/nmdc:mga06n7k74/MAGs/nmdc_mga06n7k74_hqmq_bin.zip", + "md5_checksum": "e344d87dbac42a645fd3c7d5b9d0a1a5", + "id": "nmdc:e344d87dbac42a645fd3c7d5b9d0a1a5", + "file_size_bytes": 2294379 + }, + { + "name": "Gp0115665_metabat2 bins", + "description": "metabat2 bins for Gp0115665", + "url": "https://data.microbiomedata.org/data/nmdc:mga06n7k74/MAGs/nmdc_mga06n7k74_metabat_bin.zip", + "md5_checksum": "1098bd9921c6ab8f52aca786e3b7bf1d", + "id": "nmdc:1098bd9921c6ab8f52aca786e3b7bf1d", + "file_size_bytes": 534425 + } + ], + "dissolving_activity_set": [], + "functional_annotation_set": [], + "genome_feature_set": [], + "mags_activity_set": [ { "_id": { - "$oid": "649b0052ec087f6bbab34737" + "$oid": "649b0052ec087f6bbab34738" }, "has_input": [ - "nmdc:6f762f7b079f8c2633ef674a8264879f", - "nmdc:afd1d03b38bc5deb9c196264bcea8795", - "nmdc:38d776837c2208b557e2e4e5428c879d" + "nmdc:9704e757dc537a7f06c6f83fc633cf64", + "nmdc:e28c85b50e0b654626e655755165aff5", + "nmdc:b3add25cdb76a537e70617ac6a1d1fc5" ], - "too_short_contig_num": 206294, + "too_short_contig_num": 331533, "part_of": [ - "nmdc:mga0cwhj53" + "nmdc:mga06n7k74" ], - "binned_contig_num": 1785, + "binned_contig_num": 1636, "git_url": "https://github.com/microbiomedata/metaG/releases/tag/0.1", "has_output": [ "nmdc:d41d8cd98f00b204e9800998ecf8427e", - "nmdc:2b6e0195e34697039eff38b51026be24", - "nmdc:f02d361fbef7549e2289bf4da623787d", - "nmdc:2de282e5507477269238ead458f11ac0", - "nmdc:3abae1a573f9f0ac6da47e1ab9b9a723", - "nmdc:4d315d8dac1d9605d110ff2298b10229" + "nmdc:79794b0497c1a4a292778ddb94504f7a", + "nmdc:e26dc245e491a521a94fbb9ab1b4293d", + "nmdc:45cb473694eb3cfa8abc7768e87ef303", + "nmdc:e344d87dbac42a645fd3c7d5b9d0a1a5", + "nmdc:1098bd9921c6ab8f52aca786e3b7bf1d" ], - "was_informed_by": "gold:Gp0115672", - "input_contig_num": 221045, - "id": "nmdc:50eb8825777d1294abac150521e5c2db", + "was_informed_by": "gold:Gp0115665", + "input_contig_num": 352053, + "id": "nmdc:ad03f306fbd2bb1f6302eedfc1cde9b2", "execution_resource": "NERSC-Cori", - "name": "MAGs Analysis Activity for nmdc:mga0cwhj53", + "name": "MAGs Analysis Activity for nmdc:mga06n7k74", "mags_list": [ { - "number_of_contig": 316, - "completeness": 61.03, + "number_of_contig": 211, + "completeness": 44.36, "bin_name": "bins.1", - "gene_count": 2148, + "gene_count": 1029, + "bin_quality": "LQ", + "gtdbtk_species": "", + "gtdbtk_order": "", + "num_16s": 0, + "gtdbtk_family": "", + "gtdbtk_domain": "", + "contamination": 0.49, + "gtdbtk_class": "", + "gtdbtk_phylum": "", + "num_5s": 0, + "num_23s": 0, + "gtdbtk_genus": "", + "num_t_rna": 21 + }, + { + "number_of_contig": 564, + "completeness": 79.11, + "bin_name": "bins.2", + "gene_count": 4164, "bin_quality": "MQ", "gtdbtk_species": "", - "gtdbtk_order": "Sphingomonadales", + "gtdbtk_order": "Burkholderiales", "num_16s": 0, - "gtdbtk_family": "Sphingomonadaceae", + "gtdbtk_family": "Burkholderiaceae", "gtdbtk_domain": "Bacteria", - "contamination": 0.85, - "gtdbtk_class": "Alphaproteobacteria", + "contamination": 3.11, + "gtdbtk_class": "Gammaproteobacteria", "gtdbtk_phylum": "Proteobacteria", "num_5s": 0, "num_23s": 0, - "gtdbtk_genus": "Novosphingobium", - "num_t_rna": 19 + "gtdbtk_genus": "Aquabacterium", + "num_t_rna": 33 }, { - "number_of_contig": 130, - "completeness": 34.64, - "bin_name": "bins.2", - "gene_count": 675, + "number_of_contig": 646, + "completeness": 72.48, + "bin_name": "bins.3", + "gene_count": 4108, + "bin_quality": "MQ", + "gtdbtk_species": "", + "gtdbtk_order": "Burkholderiales", + "num_16s": 1, + "gtdbtk_family": "Burkholderiaceae", + "gtdbtk_domain": "Bacteria", + "contamination": 2.62, + "gtdbtk_class": "Gammaproteobacteria", + "gtdbtk_phylum": "Proteobacteria", + "num_5s": 0, + "num_23s": 0, + "gtdbtk_genus": "Rhizobacter", + "num_t_rna": 28 + }, + { + "number_of_contig": 67, + "completeness": 1.97, + "bin_name": "bins.4", + "gene_count": 257, "bin_quality": "LQ", "gtdbtk_species": "", "gtdbtk_order": "", @@ -22227,70 +21525,13 @@ "num_5s": 0, "num_23s": 0, "gtdbtk_genus": "", - "num_t_rna": 5 + "num_t_rna": 3 }, { - "number_of_contig": 201, - "completeness": 19.13, - "bin_name": "bins.3", - "gene_count": 1000, - "bin_quality": "LQ", - "gtdbtk_species": "", - "gtdbtk_order": "", - "num_16s": 0, - "gtdbtk_family": "", - "gtdbtk_domain": "", - "contamination": 0.0, - "gtdbtk_class": "", - "gtdbtk_phylum": "", - "num_5s": 0, - "num_23s": 0, - "gtdbtk_genus": "", - "num_t_rna": 2 - }, - { - "number_of_contig": 256, - "completeness": 75.9, - "bin_name": "bins.4", - "gene_count": 2131, - "bin_quality": "MQ", - "gtdbtk_species": "UBA5335 sp002862435", - "gtdbtk_order": "UBA5335", - "num_16s": 0, - "gtdbtk_family": "UBA5335", - "gtdbtk_domain": "Bacteria", - "contamination": 1.52, - "gtdbtk_class": "Gammaproteobacteria", - "gtdbtk_phylum": "Proteobacteria", - "num_5s": 0, - "num_23s": 0, - "gtdbtk_genus": "UBA5335", - "num_t_rna": 22 - }, - { - "number_of_contig": 254, - "completeness": 100.0, + "number_of_contig": 64, + "completeness": 7.47, "bin_name": "bins.5", - "gene_count": 6188, - "bin_quality": "LQ", - "gtdbtk_species": "", - "gtdbtk_order": "", - "num_16s": 2, - "gtdbtk_family": "", - "gtdbtk_domain": "", - "contamination": 95.83, - "gtdbtk_class": "", - "gtdbtk_phylum": "", - "num_5s": 2, - "num_23s": 2, - "gtdbtk_genus": "", - "num_t_rna": 86 - }, - { - "number_of_contig": 106, - "completeness": 7.24, - "bin_name": "bins.6", - "gene_count": 524, + "gene_count": 259, "bin_quality": "LQ", "gtdbtk_species": "", "gtdbtk_order": "", @@ -22303,32 +21544,13 @@ "num_5s": 0, "num_23s": 0, "gtdbtk_genus": "", - "num_t_rna": 16 - }, - { - "number_of_contig": 306, - "completeness": 65.74, - "bin_name": "bins.7", - "gene_count": 2357, - "bin_quality": "MQ", - "gtdbtk_species": "", - "gtdbtk_order": "UBA11222", - "num_16s": 0, - "gtdbtk_family": "UBA11222", - "gtdbtk_domain": "Bacteria", - "contamination": 2.3, - "gtdbtk_class": "Alphaproteobacteria", - "gtdbtk_phylum": "Proteobacteria", - "num_5s": 0, - "num_23s": 0, - "gtdbtk_genus": "UBA11222", - "num_t_rna": 29 + "num_t_rna": 4 }, { - "number_of_contig": 216, - "completeness": 47.34, - "bin_name": "bins.8", - "gene_count": 1203, + "number_of_contig": 84, + "completeness": 3.88, + "bin_name": "bins.6", + "gene_count": 313, "bin_quality": "LQ", "gtdbtk_species": "", "gtdbtk_order": "", @@ -22341,3197 +21563,2824 @@ "num_5s": 0, "num_23s": 0, "gtdbtk_genus": "", - "num_t_rna": 12 + "num_t_rna": 0 } ], - "unbinned_contig_num": 12966, - "started_at_time": "2021-10-11T02:28:16Z", + "unbinned_contig_num": 18884, + "started_at_time": "2021-10-11T02:28:54Z", "type": "nmdc:MAGsAnalysisActivity", - "ended_at_time": "2021-10-11T05:56:20+00:00", - "output_data_objects": [ - { - "name": "gold:Gp0452679_metabat2 bin checkm quality assessment result", - "description": "metabat2 bin checkm quality assessment result for gold:Gp0452679", - "data_object_type": "CheckM Statistics", - "url": "https://data.microbiomedata.org/data/nmdc:mga0fsjy84/MAGs/nmdc_mga0fsjy84_checkm_qa.out", - "md5_checksum": "d41d8cd98f00b204e9800998ecf8427e", - "id": "nmdc:d41d8cd98f00b204e9800998ecf8427e", - "file_size_bytes": 0 - }, - { - "name": "Gp0115672_tooShort (< 3kb) filtered contigs fasta file by metaBat2", - "description": "tooShort (< 3kb) filtered contigs fasta file by metaBat2 for Gp0115672", - "url": "https://data.microbiomedata.org/data/nmdc:mga0cwhj53/MAGs/nmdc_mga0cwhj53_bins.tooShort.fa", - "md5_checksum": "2b6e0195e34697039eff38b51026be24", - "id": "nmdc:2b6e0195e34697039eff38b51026be24", - "file_size_bytes": 91055942 - }, - { - "name": "Gp0115672_unbinned fasta file from metabat2", - "description": "unbinned fasta file from metabat2 for Gp0115672", - "url": "https://data.microbiomedata.org/data/nmdc:mga0cwhj53/MAGs/nmdc_mga0cwhj53_bins.unbinned.fa", - "md5_checksum": "f02d361fbef7549e2289bf4da623787d", - "id": "nmdc:f02d361fbef7549e2289bf4da623787d", - "file_size_bytes": 23202832 - }, - { - "name": "Gp0115672_metabat2 bin checkm quality assessment result", - "description": "metabat2 bin checkm quality assessment result for Gp0115672", - "data_object_type": "CheckM Statistics", - "url": "https://data.microbiomedata.org/data/nmdc:mga0cwhj53/MAGs/nmdc_mga0cwhj53_checkm_qa.out", - "md5_checksum": "2de282e5507477269238ead458f11ac0", - "id": "nmdc:2de282e5507477269238ead458f11ac0", - "file_size_bytes": 2040 - }, - { - "name": "Gp0115672_high-quality and medium-quality bins", - "description": "high-quality and medium-quality bins for Gp0115672", - "data_object_type": "Metagenome Bins", - "url": "https://data.microbiomedata.org/data/nmdc:mga0cwhj53/MAGs/nmdc_mga0cwhj53_hqmq_bin.zip", - "md5_checksum": "3abae1a573f9f0ac6da47e1ab9b9a723", - "id": "nmdc:3abae1a573f9f0ac6da47e1ab9b9a723", - "file_size_bytes": 1815861 - }, - { - "name": "Gp0115672_metabat2 bins", - "description": "metabat2 bins for Gp0115672", - "url": "https://data.microbiomedata.org/data/nmdc:mga0cwhj53/MAGs/nmdc_mga0cwhj53_metabat_bin.zip", - "md5_checksum": "4d315d8dac1d9605d110ff2298b10229", - "id": "nmdc:4d315d8dac1d9605d110ff2298b10229", - "file_size_bytes": 2757900 - } - ] + "ended_at_time": "2021-10-11T06:19:29+00:00" } - ] - }, - { - "_id": { - "$oid": "649b009773e8249959349b54" - }, - "id": "nmdc:omprc-11-932jcd76", - "name": "Riverbed sediment microbial communities from areas with no vegetation in Columbia River, Washington, USA - GW-RW N3_50_60", - "description": "Riverbed sediment samples were collected from an area with no vegetation in a hyporheic zone (subsurface groundwater - surface water mixing zone)", - "has_input": [ - "nmdc:bsm-11-pvcgp635" - ], - "has_output": [ - "jgi:574fe0a17ded5e3df1ee148a" - ], - "part_of": [ - "nmdc:sty-11-aygzgv51" - ], - "add_date": "2016-01-11", - "mod_date": "2021-06-15", - "ncbi_project_name": "Riverbed sediment microbial communities from areas with no vegetation in Columbia River, Washington, USA - GW-RW N3_50_60", - "omics_type": { - "has_raw_value": "Metagenome" - }, - "principal_investigator": { - "has_raw_value": "James Stegen" - }, - "processing_institution": "JGI", - "type": "nmdc:OmicsProcessing", - "gold_sequencing_project_identifiers": [ - "gold:Gp0127640" - ], - "downstream_workflow_activity_records": [ + ], + "material_sample_set": [], + "material_sampling_activity_set": [], + "metabolomics_analysis_activity_set": [], + "metagenome_annotation_activity_set": [ { "_id": { - "$oid": "649b009d6bdd4fd20273c875" + "$oid": "649b005bbf2caae0415ef9d2" }, "has_input": [ - "nmdc:0094fcbe3a051a8000b8823c8db540f8" + "nmdc:9704e757dc537a7f06c6f83fc633cf64" ], "part_of": [ - "nmdc:mga06rnc11" + "nmdc:mga06n7k74" ], "git_url": "https://github.com/microbiomedata/metaG/releases/tag/0.1", "has_output": [ - "nmdc:534c94e20d292a6bf09c0a42b550b4c2", - "nmdc:db5ccad12d6ddb46947fbd815aae7f9a" + "nmdc:2d23b05bda1c60f2ef6d54c8fe5fb5e7", + "nmdc:6c55ce2e0d6e74d217d850b273c4f0c4", + "nmdc:b3add25cdb76a537e70617ac6a1d1fc5", + "nmdc:b782707ae2cf5676596ca99800deea26", + "nmdc:6a8565bf52f70efa03c755a9f0b82d7d", + "nmdc:f5d79b4c69825e0b66153e7582cb489b", + "nmdc:f66a0eaa9432ef5a2dd390214f47eed5", + "nmdc:26cc0a40aab6bfc64d24afa760b43102", + "nmdc:83785a6e8f7658dc2354b9bad1b86d01", + "nmdc:0f03207aa38d1aec8afdbf2bec1e4990", + "nmdc:4876eed2bee3b3b7b2ac827857410be6", + "nmdc:bb5b62735a896d189c9a274c6e091bab" ], - "was_informed_by": "gold:Gp0127640", - "input_read_count": 28754670, - "output_read_bases": 4186416440, - "id": "nmdc:414c4647eddd8081308d92da2d59815e", + "was_informed_by": "gold:Gp0115665", + "id": "nmdc:ad03f306fbd2bb1f6302eedfc1cde9b2", "execution_resource": "NERSC-Cori", - "input_read_bases": 4341955170, - "name": "Read QC Activity for nmdc:mga06rnc11", - "output_read_count": 27981268, - "started_at_time": "2021-10-11T02:24:27Z", - "type": "nmdc:ReadQCAnalysisActivity", - "ended_at_time": "2021-10-11T04:33:17+00:00", - "output_data_objects": [ - { - "name": "Gp0127640_Filtered Reads", - "description": "Filtered Reads for Gp0127640", - "data_object_type": "Filtered Sequencing Reads", - "url": "https://data.microbiomedata.org/data/nmdc:mga06rnc11/qa/nmdc_mga06rnc11_filtered.fastq.gz", - "md5_checksum": "534c94e20d292a6bf09c0a42b550b4c2", - "id": "nmdc:534c94e20d292a6bf09c0a42b550b4c2", - "file_size_bytes": 2416846292 - }, - { - "name": "Gp0127640_Filtered Stats", - "description": "Filtered Stats for Gp0127640", - "data_object_type": "QC Statistics", - "url": "https://data.microbiomedata.org/data/nmdc:mga06rnc11/qa/nmdc_mga06rnc11_filterStats.txt", - "md5_checksum": "db5ccad12d6ddb46947fbd815aae7f9a", - "id": "nmdc:db5ccad12d6ddb46947fbd815aae7f9a", - "file_size_bytes": 285 - } - ] - }, + "name": "Annotation Activity for nmdc:mga06n7k74", + "started_at_time": "2021-10-11T02:28:54Z", + "type": "nmdc:MetagenomeAnnotationActivity", + "ended_at_time": "2021-10-11T06:19:29+00:00" + } + ], + "metagenome_assembly_set": [ { "_id": { - "$oid": "649b009bff710ae353f8cf3a" + "$oid": "649b005f2ca5ee4adb139fc2" }, "has_input": [ - "nmdc:534c94e20d292a6bf09c0a42b550b4c2" + "nmdc:b0462e18cf9dafc9d2207a58bf085530" ], + "part_of": [ + "nmdc:mga06n7k74" + ], + "ctg_logsum": 427633, + "scaf_logsum": 429769, + "gap_pct": 0.00206, "git_url": "https://github.com/microbiomedata/metaG/releases/tag/0.1", "has_output": [ - "nmdc:7e79b2eba131ed6df71a56f47b1b901f", - "nmdc:bc82dcb8151fc20c22be71b6531a1fb2", - "nmdc:d5e45563875efca0653ba2dd47ee3d68", - "nmdc:bf5aa70f6ff14da2ef1393124ec29c4d", - "nmdc:61f1f6d57fd4d445682e25ec34901721", - "nmdc:7c31728fc2a51c8d202f9f74b1919886", - "nmdc:f36c2b28e63d21ca4d9e84035450c8e1", - "nmdc:e2939606fc9ff1c0046b333e1740f258", - "nmdc:d47144fd7ec0608e7677550d9589c889" + "nmdc:9704e757dc537a7f06c6f83fc633cf64", + "nmdc:2674db4e7e6171864fa47f0b3b5a9603", + "nmdc:ab6c496a5e3ab895fee3812fd992e1e7", + "nmdc:5a1240fa0a6bf92c95e852c0352e5839", + "nmdc:e28c85b50e0b654626e655755165aff5" ], - "was_informed_by": "gold:Gp0127640", - "id": "nmdc:414c4647eddd8081308d92da2d59815e", + "asm_score": 5.768, + "was_informed_by": "gold:Gp0115665", + "ctg_powsum": 48025, + "scaf_max": 44931, + "id": "nmdc:ad03f306fbd2bb1f6302eedfc1cde9b2", + "scaf_powsum": 48321, "execution_resource": "NERSC-Cori", - "name": "ReadBased Analysis Activity for nmdc:mga06rnc11", - "started_at_time": "2021-10-11T02:24:27Z", - "type": "nmdc:ReadbasedAnalysis", - "ended_at_time": "2021-10-11T04:33:17+00:00", - "output_data_objects": [ - { - "name": "Gp0127640_Gottcha2 TSV report", - "description": "Gottcha2 TSV report for Gp0127640", - "url": "https://data.microbiomedata.org/data/nmdc:mga06rnc11/ReadbasedAnalysis/nmdc_mga06rnc11_gottcha2_report.tsv", - "md5_checksum": "7e79b2eba131ed6df71a56f47b1b901f", - "id": "nmdc:7e79b2eba131ed6df71a56f47b1b901f", - "file_size_bytes": 3824 - }, - { - "name": "Gp0127640_Gottcha2 full TSV report", - "description": "Gottcha2 full TSV report for Gp0127640", - "url": "https://data.microbiomedata.org/data/nmdc:mga06rnc11/ReadbasedAnalysis/nmdc_mga06rnc11_gottcha2_report_full.tsv", - "md5_checksum": "bc82dcb8151fc20c22be71b6531a1fb2", - "id": "nmdc:bc82dcb8151fc20c22be71b6531a1fb2", - "file_size_bytes": 850491 - }, - { - "name": "Gp0127640_Gottcha2 Krona HTML report", - "description": "Gottcha2 Krona HTML report for Gp0127640", - "data_object_type": "GOTTCHA2 Krona Plot", - "url": "https://data.microbiomedata.org/data/nmdc:mga06rnc11/ReadbasedAnalysis/nmdc_mga06rnc11_gottcha2_krona.html", - "md5_checksum": "d5e45563875efca0653ba2dd47ee3d68", - "id": "nmdc:d5e45563875efca0653ba2dd47ee3d68", - "file_size_bytes": 236151 - }, - { - "name": "Gp0127640_Centrifuge classification TSV report", - "description": "Centrifuge classification TSV report for Gp0127640", - "data_object_type": "Centrifuge Taxonomic Classification", - "url": "https://data.microbiomedata.org/data/nmdc:mga06rnc11/ReadbasedAnalysis/nmdc_mga06rnc11_centrifuge_classification.tsv", - "md5_checksum": "bf5aa70f6ff14da2ef1393124ec29c4d", - "id": "nmdc:bf5aa70f6ff14da2ef1393124ec29c4d", - "file_size_bytes": 2057333090 - }, - { - "name": "Gp0127640_Centrifuge TSV report", - "description": "Centrifuge TSV report for Gp0127640", - "data_object_type": "Centrifuge Classification Report", - "url": "https://data.microbiomedata.org/data/nmdc:mga06rnc11/ReadbasedAnalysis/nmdc_mga06rnc11_centrifuge_report.tsv", - "md5_checksum": "61f1f6d57fd4d445682e25ec34901721", - "id": "nmdc:61f1f6d57fd4d445682e25ec34901721", - "file_size_bytes": 256577 - }, - { - "name": "Gp0127640_Centrifuge Krona HTML report", - "description": "Centrifuge Krona HTML report for Gp0127640", - "data_object_type": "Centrifuge Krona Plot", - "url": "https://data.microbiomedata.org/data/nmdc:mga06rnc11/ReadbasedAnalysis/nmdc_mga06rnc11_centrifuge_krona.html", - "md5_checksum": "7c31728fc2a51c8d202f9f74b1919886", - "id": "nmdc:7c31728fc2a51c8d202f9f74b1919886", - "file_size_bytes": 2334984 - }, - { - "name": "Gp0127640_Kraken classification TSV report", - "description": "Kraken classification TSV report for Gp0127640", - "data_object_type": "Kraken2 Taxonomic Classification", - "url": "https://data.microbiomedata.org/data/nmdc:mga06rnc11/ReadbasedAnalysis/nmdc_mga06rnc11_kraken2_classification.tsv", - "md5_checksum": "f36c2b28e63d21ca4d9e84035450c8e1", - "id": "nmdc:f36c2b28e63d21ca4d9e84035450c8e1", - "file_size_bytes": 1658481192 - }, - { - "name": "Gp0127640_Kraken2 TSV report", - "description": "Kraken2 TSV report for Gp0127640", - "data_object_type": "Kraken2 Classification Report", - "url": "https://data.microbiomedata.org/data/nmdc:mga06rnc11/ReadbasedAnalysis/nmdc_mga06rnc11_kraken2_report.tsv", - "md5_checksum": "e2939606fc9ff1c0046b333e1740f258", - "id": "nmdc:e2939606fc9ff1c0046b333e1740f258", - "file_size_bytes": 653129 - }, - { - "name": "Gp0127640_Kraken2 Krona HTML report", - "description": "Kraken2 Krona HTML report for Gp0127640", - "data_object_type": "Kraken2 Krona Plot", - "url": "https://data.microbiomedata.org/data/nmdc:mga06rnc11/ReadbasedAnalysis/nmdc_mga06rnc11_kraken2_krona.html", - "md5_checksum": "d47144fd7ec0608e7677550d9589c889", - "id": "nmdc:d47144fd7ec0608e7677550d9589c889", - "file_size_bytes": 3977820 - } + "contigs": 352055, + "name": "Assembly Activity for nmdc:mga06n7k74", + "ctg_max": 44931, + "gc_std": 0.13027, + "contig_bp": 172051088, + "gc_avg": 0.51918, + "started_at_time": "2021-10-11T02:28:54Z", + "scaf_bp": 172054628, + "type": "nmdc:MetagenomeAssembly", + "scaffolds": 351728, + "ended_at_time": "2021-10-11T06:19:29+00:00", + "ctg_l50": 468, + "ctg_l90": 289, + "ctg_n50": 95561, + "ctg_n90": 294969, + "scaf_l50": 468, + "scaf_l90": 289, + "scaf_n50": 95446, + "scaf_n90": 294658 + } + ], + "metagenome_sequencing_activity_set": [], + "metaproteomics_analysis_activity_set": [], + "metatranscriptome_activity_set": [], + "nom_analysis_activity_set": [], + "omics_processing_set": [ + { + "_id": { + "$oid": "649b009773e8249959349b51" + }, + "id": "nmdc:omprc-11-2jt0jk84", + "name": "Sand microcosm microbial communities from a hyporheic zone in Columbia River, Washington, USA - GW-RW T2_30-Apr-14", + "description": "Sterilized sand packs were incubated back in the ground and collected at time point T2.", + "has_input": [ + "nmdc:bsm-11-qjtgh002" + ], + "has_output": [ + "jgi:55f23d820d8785306f964980" + ], + "part_of": [ + "nmdc:sty-11-aygzgv51" + ], + "add_date": "2015-05-28", + "mod_date": "2021-06-15", + "ncbi_project_name": "Sand microcosm microbial communities from a hyporheic zone in Columbia River, Washington, USA - GW-RW T2_30-Apr-14", + "omics_type": { + "has_raw_value": "Metagenome" + }, + "principal_investigator": { + "has_raw_value": "James Stegen" + }, + "processing_institution": "JGI", + "type": "nmdc:OmicsProcessing", + "gold_sequencing_project_identifiers": [ + "gold:Gp0115665" ] - }, + } + ], + "reaction_activity_set": [], + "read_qc_analysis_activity_set": [ { "_id": { - "$oid": "61e7199d833bcf838a700ec0" + "$oid": "649b009d6bdd4fd20273c88e" }, "has_input": [ - "nmdc:534c94e20d292a6bf09c0a42b550b4c2" + "nmdc:0d39aafcd16496457fbb3be0f785b67f" ], "part_of": [ - "nmdc:mga06rnc11" + "nmdc:mga06n7k74" ], "git_url": "https://github.com/microbiomedata/metaG/releases/tag/0.1", "has_output": [ - "nmdc:7e79b2eba131ed6df71a56f47b1b901f", - "nmdc:bc82dcb8151fc20c22be71b6531a1fb2", - "nmdc:d5e45563875efca0653ba2dd47ee3d68", - "nmdc:bf5aa70f6ff14da2ef1393124ec29c4d", - "nmdc:61f1f6d57fd4d445682e25ec34901721", - "nmdc:7c31728fc2a51c8d202f9f74b1919886", - "nmdc:f36c2b28e63d21ca4d9e84035450c8e1", - "nmdc:e2939606fc9ff1c0046b333e1740f258", - "nmdc:d47144fd7ec0608e7677550d9589c889" - ], - "was_informed_by": "gold:Gp0127640", - "id": "nmdc:414c4647eddd8081308d92da2d59815e", - "execution_resource": "NERSC-Cori", - "name": "ReadBased Analysis Activity for nmdc:mga06rnc11", - "started_at_time": "2021-10-11T02:24:27Z", - "type": "nmdc:ReadbasedAnalysis", - "ended_at_time": "2021-10-11T04:33:17+00:00", - "output_data_objects": [ - { - "name": "Gp0127640_Gottcha2 TSV report", - "description": "Gottcha2 TSV report for Gp0127640", - "url": "https://data.microbiomedata.org/data/nmdc:mga06rnc11/ReadbasedAnalysis/nmdc_mga06rnc11_gottcha2_report.tsv", - "md5_checksum": "7e79b2eba131ed6df71a56f47b1b901f", - "id": "nmdc:7e79b2eba131ed6df71a56f47b1b901f", - "file_size_bytes": 3824 - }, - { - "name": "Gp0127640_Gottcha2 full TSV report", - "description": "Gottcha2 full TSV report for Gp0127640", - "url": "https://data.microbiomedata.org/data/nmdc:mga06rnc11/ReadbasedAnalysis/nmdc_mga06rnc11_gottcha2_report_full.tsv", - "md5_checksum": "bc82dcb8151fc20c22be71b6531a1fb2", - "id": "nmdc:bc82dcb8151fc20c22be71b6531a1fb2", - "file_size_bytes": 850491 - }, - { - "name": "Gp0127640_Gottcha2 Krona HTML report", - "description": "Gottcha2 Krona HTML report for Gp0127640", - "data_object_type": "GOTTCHA2 Krona Plot", - "url": "https://data.microbiomedata.org/data/nmdc:mga06rnc11/ReadbasedAnalysis/nmdc_mga06rnc11_gottcha2_krona.html", - "md5_checksum": "d5e45563875efca0653ba2dd47ee3d68", - "id": "nmdc:d5e45563875efca0653ba2dd47ee3d68", - "file_size_bytes": 236151 - }, - { - "name": "Gp0127640_Centrifuge classification TSV report", - "description": "Centrifuge classification TSV report for Gp0127640", - "data_object_type": "Centrifuge Taxonomic Classification", - "url": "https://data.microbiomedata.org/data/nmdc:mga06rnc11/ReadbasedAnalysis/nmdc_mga06rnc11_centrifuge_classification.tsv", - "md5_checksum": "bf5aa70f6ff14da2ef1393124ec29c4d", - "id": "nmdc:bf5aa70f6ff14da2ef1393124ec29c4d", - "file_size_bytes": 2057333090 - }, - { - "name": "Gp0127640_Centrifuge TSV report", - "description": "Centrifuge TSV report for Gp0127640", - "data_object_type": "Centrifuge Classification Report", - "url": "https://data.microbiomedata.org/data/nmdc:mga06rnc11/ReadbasedAnalysis/nmdc_mga06rnc11_centrifuge_report.tsv", - "md5_checksum": "61f1f6d57fd4d445682e25ec34901721", - "id": "nmdc:61f1f6d57fd4d445682e25ec34901721", - "file_size_bytes": 256577 - }, - { - "name": "Gp0127640_Centrifuge Krona HTML report", - "description": "Centrifuge Krona HTML report for Gp0127640", - "data_object_type": "Centrifuge Krona Plot", - "url": "https://data.microbiomedata.org/data/nmdc:mga06rnc11/ReadbasedAnalysis/nmdc_mga06rnc11_centrifuge_krona.html", - "md5_checksum": "7c31728fc2a51c8d202f9f74b1919886", - "id": "nmdc:7c31728fc2a51c8d202f9f74b1919886", - "file_size_bytes": 2334984 - }, - { - "name": "Gp0127640_Kraken classification TSV report", - "description": "Kraken classification TSV report for Gp0127640", - "data_object_type": "Kraken2 Taxonomic Classification", - "url": "https://data.microbiomedata.org/data/nmdc:mga06rnc11/ReadbasedAnalysis/nmdc_mga06rnc11_kraken2_classification.tsv", - "md5_checksum": "f36c2b28e63d21ca4d9e84035450c8e1", - "id": "nmdc:f36c2b28e63d21ca4d9e84035450c8e1", - "file_size_bytes": 1658481192 - }, - { - "name": "Gp0127640_Kraken2 TSV report", - "description": "Kraken2 TSV report for Gp0127640", - "data_object_type": "Kraken2 Classification Report", - "url": "https://data.microbiomedata.org/data/nmdc:mga06rnc11/ReadbasedAnalysis/nmdc_mga06rnc11_kraken2_report.tsv", - "md5_checksum": "e2939606fc9ff1c0046b333e1740f258", - "id": "nmdc:e2939606fc9ff1c0046b333e1740f258", - "file_size_bytes": 653129 - }, - { - "name": "Gp0127640_Kraken2 Krona HTML report", - "description": "Kraken2 Krona HTML report for Gp0127640", - "data_object_type": "Kraken2 Krona Plot", - "url": "https://data.microbiomedata.org/data/nmdc:mga06rnc11/ReadbasedAnalysis/nmdc_mga06rnc11_kraken2_krona.html", - "md5_checksum": "d47144fd7ec0608e7677550d9589c889", - "id": "nmdc:d47144fd7ec0608e7677550d9589c889", - "file_size_bytes": 3977820 - } - ] - }, - { - "_id": { - "$oid": "649b005f2ca5ee4adb139fa6" - }, - "has_input": [ - "nmdc:534c94e20d292a6bf09c0a42b550b4c2" - ], - "part_of": [ - "nmdc:mga06rnc11" - ], - "ctg_logsum": 42879, - "scaf_logsum": 42987, - "gap_pct": 0.0005, - "git_url": "https://github.com/microbiomedata/metaG/releases/tag/0.1", - "has_output": [ - "nmdc:b85a322271c7f93ef295141d12cb2dbc", - "nmdc:794445b3fedfaec8af9b70b167bc6852", - "nmdc:d389ae4f8a92c21423fc77aa054ba985", - "nmdc:765541c2865f6047d5e2e8e7299908e4", - "nmdc:78b554dd52492c3d1e401d0c9198b89b" + "nmdc:b0462e18cf9dafc9d2207a58bf085530", + "nmdc:f0e1b9004b0e9aafb06c444444a522c7" ], - "asm_score": 5.471, - "was_informed_by": "gold:Gp0127640", - "ctg_powsum": 4901.253, - "scaf_max": 27880, - "id": "nmdc:414c4647eddd8081308d92da2d59815e", - "scaf_powsum": 4913.296, + "was_informed_by": "gold:Gp0115665", + "input_read_count": 50719572, + "output_read_bases": 7175148255, + "id": "nmdc:ad03f306fbd2bb1f6302eedfc1cde9b2", "execution_resource": "NERSC-Cori", - "contigs": 106665, - "name": "Assembly Activity for nmdc:mga06rnc11", - "ctg_max": 27880, - "gc_std": 0.10189, - "contig_bp": 40331509, - "gc_avg": 0.58648, - "started_at_time": "2021-10-11T02:24:27Z", - "scaf_bp": 40331709, - "type": "nmdc:MetagenomeAssembly", - "scaffolds": 106645, - "ended_at_time": "2021-10-11T04:33:17+00:00", - "ctg_l50": 336, - "ctg_l90": 282, - "ctg_n50": 38543, - "ctg_n90": 94525, - "scaf_l50": 336, - "scaf_l90": 282, - "scaf_n50": 38534, - "scaf_n90": 94506, - "output_data_objects": [ - { - "name": "Gp0127640_Assembled contigs fasta", - "description": "Assembled contigs fasta for Gp0127640", - "data_object_type": "Assembly Contigs", - "url": "https://data.microbiomedata.org/data/nmdc:mga06rnc11/assembly/nmdc_mga06rnc11_contigs.fna", - "md5_checksum": "b85a322271c7f93ef295141d12cb2dbc", - "id": "nmdc:b85a322271c7f93ef295141d12cb2dbc", - "file_size_bytes": 44243651 - }, - { - "name": "Gp0127640_Assembled scaffold fasta", - "description": "Assembled scaffold fasta for Gp0127640", - "data_object_type": "Assembly Scaffolds", - "url": "https://data.microbiomedata.org/data/nmdc:mga06rnc11/assembly/nmdc_mga06rnc11_scaffolds.fna", - "md5_checksum": "794445b3fedfaec8af9b70b167bc6852", - "id": "nmdc:794445b3fedfaec8af9b70b167bc6852", - "file_size_bytes": 43923338 - }, - { - "name": "Gp0127640_Metagenome Contig Coverage Stats", - "description": "Metagenome Contig Coverage Stats for Gp0127640", - "url": "https://data.microbiomedata.org/data/nmdc:mga06rnc11/assembly/nmdc_mga06rnc11_covstats.txt", - "md5_checksum": "d389ae4f8a92c21423fc77aa054ba985", - "id": "nmdc:d389ae4f8a92c21423fc77aa054ba985", - "file_size_bytes": 8365383 - }, - { - "name": "Gp0127640_Assembled AGP file", - "description": "Assembled AGP file for Gp0127640", - "data_object_type": "Assembly AGP", - "url": "https://data.microbiomedata.org/data/nmdc:mga06rnc11/assembly/nmdc_mga06rnc11_assembly.agp", - "md5_checksum": "765541c2865f6047d5e2e8e7299908e4", - "id": "nmdc:765541c2865f6047d5e2e8e7299908e4", - "file_size_bytes": 7782777 - }, - { - "name": "Gp0127640_Metagenome Alignment BAM file", - "description": "Metagenome Alignment BAM file for Gp0127640", - "data_object_type": "Assembly Coverage BAM", - "url": "https://data.microbiomedata.org/data/nmdc:mga06rnc11/assembly/nmdc_mga06rnc11_pairedMapped_sorted.bam", - "md5_checksum": "78b554dd52492c3d1e401d0c9198b89b", - "id": "nmdc:78b554dd52492c3d1e401d0c9198b89b", - "file_size_bytes": 2578128724 - } - ] - }, + "input_read_bases": 7658655372, + "name": "Read QC Activity for nmdc:mga06n7k74", + "output_read_count": 47896142, + "started_at_time": "2021-10-11T02:28:54Z", + "type": "nmdc:ReadQCAnalysisActivity", + "ended_at_time": "2021-10-11T06:19:29+00:00" + } + ], + "read_based_taxonomy_analysis_activity_set": [ { "_id": { - "$oid": "649b005bbf2caae0415ef9bb" + "$oid": "649b009bff710ae353f8cf57" }, "has_input": [ - "nmdc:b85a322271c7f93ef295141d12cb2dbc" - ], - "part_of": [ - "nmdc:mga06rnc11" + "nmdc:b0462e18cf9dafc9d2207a58bf085530" ], "git_url": "https://github.com/microbiomedata/metaG/releases/tag/0.1", "has_output": [ - "nmdc:13e64b02d230f76008e42256a48d1cec", - "nmdc:7babb0c9f662679659b7b1bee469f073", - "nmdc:e84b1e43d546c9793c3a4d9eaa8cee86", - "nmdc:2e3e5b7ffa39e533db8ed1d925426f50", - "nmdc:62e46d35a6aff3a52b39c6bb04dc6161", - "nmdc:93fa7de9c74cfcff99bb74e27fa94674", - "nmdc:63bad86a6d7fb23b5a4683ae36820622", - "nmdc:d6b80bb748b4d6fbe52c15300ad2137b", - "nmdc:46722961c280df725d15489e82502031", - "nmdc:6f1a0029cb25f1433de1d7c241bc7553", - "nmdc:6d2839963f616d810e66435b3bbe018a", - "nmdc:efbf36ca49c40ad0367ecd23c012b29b" + "nmdc:432fedddcbacb4e69c0350354ab44080", + "nmdc:50b9a4c83b2ec0d1dd683cb8814ed5ad", + "nmdc:e3d7339ba5c7677be13854f391462474", + "nmdc:7bf922ee2f9fc298c031e2ff7d5abe0d", + "nmdc:33a20a77c3dc5b4feb102d66dfbfbe11", + "nmdc:30bdf0aedf771221ca3f7f18ff4e0067", + "nmdc:8e21ac30de17de0d1051d7d223d0aa0f", + "nmdc:64459bec7843953a70f8ea2b09a7e9de", + "nmdc:9aa0ec113eb8dd22e7f574216d1760b2" ], - "was_informed_by": "gold:Gp0127640", - "id": "nmdc:414c4647eddd8081308d92da2d59815e", + "was_informed_by": "gold:Gp0115665", + "id": "nmdc:ad03f306fbd2bb1f6302eedfc1cde9b2", "execution_resource": "NERSC-Cori", - "name": "Annotation Activity for nmdc:mga06rnc11", - "started_at_time": "2021-10-11T02:24:27Z", - "type": "nmdc:MetagenomeAnnotationActivity", - "ended_at_time": "2021-10-11T04:33:17+00:00", - "output_data_objects": [ - { - "name": "Gp0127640_Protein FAA", - "description": "Protein FAA for Gp0127640", - "data_object_type": "Annotation Amino Acid FASTA", - "url": "https://data.microbiomedata.org/data/nmdc:mga06rnc11/annotation/nmdc_mga06rnc11_proteins.faa", - "md5_checksum": "13e64b02d230f76008e42256a48d1cec", - "id": "nmdc:13e64b02d230f76008e42256a48d1cec", - "file_size_bytes": 26637626 - }, - { - "name": "Gp0127640_Structural annotation GFF file", - "description": "Structural annotation GFF file for Gp0127640", - "data_object_type": "Structural Annotation GFF", - "url": "https://data.microbiomedata.org/data/nmdc:mga06rnc11/annotation/nmdc_mga06rnc11_structural_annotation.gff", - "md5_checksum": "7babb0c9f662679659b7b1bee469f073", - "id": "nmdc:7babb0c9f662679659b7b1bee469f073", - "file_size_bytes": 2515 - }, - { - "name": "Gp0127640_Functional annotation GFF file", - "description": "Functional annotation GFF file for Gp0127640", - "data_object_type": "Functional Annotation GFF", - "url": "https://data.microbiomedata.org/data/nmdc:mga06rnc11/annotation/nmdc_mga06rnc11_functional_annotation.gff", - "md5_checksum": "e84b1e43d546c9793c3a4d9eaa8cee86", - "id": "nmdc:e84b1e43d546c9793c3a4d9eaa8cee86", - "file_size_bytes": 32184781 - }, - { - "name": "Gp0127640_KO TSV file", - "description": "KO TSV file for Gp0127640", - "data_object_type": "Annotation KEGG Orthology", - "url": "https://data.microbiomedata.org/data/nmdc:mga06rnc11/annotation/nmdc_mga06rnc11_ko.tsv", - "md5_checksum": "2e3e5b7ffa39e533db8ed1d925426f50", - "id": "nmdc:2e3e5b7ffa39e533db8ed1d925426f50", - "file_size_bytes": 3620933 - }, - { - "name": "Gp0127640_EC TSV file", - "description": "EC TSV file for Gp0127640", - "data_object_type": "Annotation Enzyme Commission", - "url": "https://data.microbiomedata.org/data/nmdc:mga06rnc11/annotation/nmdc_mga06rnc11_ec.tsv", - "md5_checksum": "62e46d35a6aff3a52b39c6bb04dc6161", - "id": "nmdc:62e46d35a6aff3a52b39c6bb04dc6161", - "file_size_bytes": 2390086 - }, - { - "name": "Gp0127640_COG GFF file", - "description": "COG GFF file for Gp0127640", - "url": "https://data.microbiomedata.org/data/nmdc:mga06rnc11/annotation/nmdc_mga06rnc11_cog.gff", - "md5_checksum": "93fa7de9c74cfcff99bb74e27fa94674", - "id": "nmdc:93fa7de9c74cfcff99bb74e27fa94674", - "file_size_bytes": 17898567 - }, - { - "name": "Gp0127640_PFAM GFF file", - "description": "PFAM GFF file for Gp0127640", - "url": "https://data.microbiomedata.org/data/nmdc:mga06rnc11/annotation/nmdc_mga06rnc11_pfam.gff", - "md5_checksum": "63bad86a6d7fb23b5a4683ae36820622", - "id": "nmdc:63bad86a6d7fb23b5a4683ae36820622", - "file_size_bytes": 12585366 - }, - { - "name": "Gp0127640_TigrFam GFF file", - "description": "TigrFam GFF file for Gp0127640", - "url": "https://data.microbiomedata.org/data/nmdc:mga06rnc11/annotation/nmdc_mga06rnc11_tigrfam.gff", - "md5_checksum": "d6b80bb748b4d6fbe52c15300ad2137b", - "id": "nmdc:d6b80bb748b4d6fbe52c15300ad2137b", - "file_size_bytes": 1170952 - }, - { - "name": "Gp0127640_SMART GFF file", - "description": "SMART GFF file for Gp0127640", - "url": "https://data.microbiomedata.org/data/nmdc:mga06rnc11/annotation/nmdc_mga06rnc11_smart.gff", - "md5_checksum": "46722961c280df725d15489e82502031", - "id": "nmdc:46722961c280df725d15489e82502031", - "file_size_bytes": 3891425 - }, - { - "name": "Gp0127640_SuperFam GFF file", - "description": "SuperFam GFF file for Gp0127640", - "url": "https://data.microbiomedata.org/data/nmdc:mga06rnc11/annotation/nmdc_mga06rnc11_supfam.gff", - "md5_checksum": "6f1a0029cb25f1433de1d7c241bc7553", - "id": "nmdc:6f1a0029cb25f1433de1d7c241bc7553", - "file_size_bytes": 22543435 - }, - { - "name": "Gp0127640_Cath FunFam GFF file", - "description": "Cath FunFam GFF file for Gp0127640", - "url": "https://data.microbiomedata.org/data/nmdc:mga06rnc11/annotation/nmdc_mga06rnc11_cath_funfam.gff", - "md5_checksum": "6d2839963f616d810e66435b3bbe018a", - "id": "nmdc:6d2839963f616d810e66435b3bbe018a", - "file_size_bytes": 16572925 - }, - { - "name": "Gp0127640_KO_EC GFF file", - "description": "KO_EC GFF file for Gp0127640", - "url": "https://data.microbiomedata.org/data/nmdc:mga06rnc11/annotation/nmdc_mga06rnc11_ko_ec.gff", - "md5_checksum": "efbf36ca49c40ad0367ecd23c012b29b", - "id": "nmdc:efbf36ca49c40ad0367ecd23c012b29b", - "file_size_bytes": 11571776 - } - ] - }, + "name": "ReadBased Analysis Activity for nmdc:mga06n7k74", + "started_at_time": "2021-10-11T02:28:54Z", + "type": "nmdc:ReadbasedAnalysis", + "ended_at_time": "2021-10-11T06:19:29+00:00" + } + ], + "study_set": [], + "field_research_site_set": [], + "collecting_biosamples_from_site_set": [], + "date_created": null, + "etl_software_version": null, + "pooling_set": [], + "read_based_analysis_activity_set": [ { "_id": { - "$oid": "649b0052ec087f6bbab3471a" + "$oid": "61e71a36833bcf838a702021" }, "has_input": [ - "nmdc:b85a322271c7f93ef295141d12cb2dbc", - "nmdc:78b554dd52492c3d1e401d0c9198b89b", - "nmdc:e84b1e43d546c9793c3a4d9eaa8cee86" + "nmdc:b0462e18cf9dafc9d2207a58bf085530" ], - "too_short_contig_num": 104867, "part_of": [ - "nmdc:mga06rnc11" + "nmdc:mga06n7k74" ], - "binned_contig_num": 213, "git_url": "https://github.com/microbiomedata/metaG/releases/tag/0.1", "has_output": [ - "nmdc:d41d8cd98f00b204e9800998ecf8427e", - "nmdc:ce395376d0bc7121e4dc5efc774d5e74", - "nmdc:a16cbb06b91ebfb45f5a010effc1cfde", - "nmdc:97ae130ca2f75c66b8cbd60c4d35463a", - "nmdc:5945311235c6195ad409ab30e2b72c0c", - "nmdc:d1cf2992bd60e25032eedeb09858d14b" + "nmdc:432fedddcbacb4e69c0350354ab44080", + "nmdc:50b9a4c83b2ec0d1dd683cb8814ed5ad", + "nmdc:e3d7339ba5c7677be13854f391462474", + "nmdc:7bf922ee2f9fc298c031e2ff7d5abe0d", + "nmdc:33a20a77c3dc5b4feb102d66dfbfbe11", + "nmdc:30bdf0aedf771221ca3f7f18ff4e0067", + "nmdc:8e21ac30de17de0d1051d7d223d0aa0f", + "nmdc:64459bec7843953a70f8ea2b09a7e9de", + "nmdc:9aa0ec113eb8dd22e7f574216d1760b2" ], - "was_informed_by": "gold:Gp0127640", - "input_contig_num": 106665, - "id": "nmdc:414c4647eddd8081308d92da2d59815e", + "was_informed_by": "gold:Gp0115665", + "id": "nmdc:ad03f306fbd2bb1f6302eedfc1cde9b2", "execution_resource": "NERSC-Cori", - "name": "MAGs Analysis Activity for nmdc:mga06rnc11", - "mags_list": [ - { - "number_of_contig": 213, - "completeness": 48.94, - "bin_name": "bins.1", - "gene_count": 1422, - "bin_quality": "LQ", - "gtdbtk_species": "", - "gtdbtk_order": "", - "num_16s": 0, - "gtdbtk_family": "", - "gtdbtk_domain": "", - "contamination": 0.97, - "gtdbtk_class": "", - "gtdbtk_phylum": "", - "num_5s": 0, - "num_23s": 0, - "gtdbtk_genus": "", - "num_t_rna": 30 - } - ], - "unbinned_contig_num": 1585, - "started_at_time": "2021-10-11T02:24:27Z", - "type": "nmdc:MAGsAnalysisActivity", - "ended_at_time": "2021-10-11T04:33:17+00:00", - "output_data_objects": [ - { - "name": "gold:Gp0452679_metabat2 bin checkm quality assessment result", - "description": "metabat2 bin checkm quality assessment result for gold:Gp0452679", - "data_object_type": "CheckM Statistics", - "url": "https://data.microbiomedata.org/data/nmdc:mga0fsjy84/MAGs/nmdc_mga0fsjy84_checkm_qa.out", - "md5_checksum": "d41d8cd98f00b204e9800998ecf8427e", - "id": "nmdc:d41d8cd98f00b204e9800998ecf8427e", - "file_size_bytes": 0 - }, - { - "name": "Gp0127640_tooShort (< 3kb) filtered contigs fasta file by metaBat2", - "description": "tooShort (< 3kb) filtered contigs fasta file by metaBat2 for Gp0127640", - "url": "https://data.microbiomedata.org/data/nmdc:mga06rnc11/MAGs/nmdc_mga06rnc11_bins.tooShort.fa", - "md5_checksum": "ce395376d0bc7121e4dc5efc774d5e74", - "id": "nmdc:ce395376d0bc7121e4dc5efc774d5e74", - "file_size_bytes": 40358420 - }, - { - "name": "Gp0127640_unbinned fasta file from metabat2", - "description": "unbinned fasta file from metabat2 for Gp0127640", - "url": "https://data.microbiomedata.org/data/nmdc:mga06rnc11/MAGs/nmdc_mga06rnc11_bins.unbinned.fa", - "md5_checksum": "a16cbb06b91ebfb45f5a010effc1cfde", - "id": "nmdc:a16cbb06b91ebfb45f5a010effc1cfde", - "file_size_bytes": 2755747 - }, - { - "name": "Gp0127640_metabat2 bin checkm quality assessment result", - "description": "metabat2 bin checkm quality assessment result for Gp0127640", - "data_object_type": "CheckM Statistics", - "url": "https://data.microbiomedata.org/data/nmdc:mga06rnc11/MAGs/nmdc_mga06rnc11_checkm_qa.out", - "md5_checksum": "97ae130ca2f75c66b8cbd60c4d35463a", - "id": "nmdc:97ae130ca2f75c66b8cbd60c4d35463a", - "file_size_bytes": 760 - }, - { - "name": "Gp0127640_high-quality and medium-quality bins", - "description": "high-quality and medium-quality bins for Gp0127640", - "data_object_type": "Metagenome Bins", - "url": "https://data.microbiomedata.org/data/nmdc:mga06rnc11/MAGs/nmdc_mga06rnc11_hqmq_bin.zip", - "md5_checksum": "5945311235c6195ad409ab30e2b72c0c", - "id": "nmdc:5945311235c6195ad409ab30e2b72c0c", - "file_size_bytes": 182 - }, - { - "name": "Gp0127640_metabat2 bins", - "description": "metabat2 bins for Gp0127640", - "url": "https://data.microbiomedata.org/data/nmdc:mga06rnc11/MAGs/nmdc_mga06rnc11_metabat_bin.zip", - "md5_checksum": "d1cf2992bd60e25032eedeb09858d14b", - "id": "nmdc:d1cf2992bd60e25032eedeb09858d14b", - "file_size_bytes": 345388 - } - ] + "name": "ReadBased Analysis Activity for nmdc:mga06n7k74", + "started_at_time": "2021-10-11T02:28:54Z", + "type": "nmdc:ReadbasedAnalysis", + "ended_at_time": "2021-10-11T06:19:29+00:00" } ] }, { - "_id": { - "$oid": "649b009773e8249959349b55" - }, - "id": "nmdc:omprc-11-p0jdew93", - "name": "Riverbed sediment microbial communities from areas with vegetation nearby in Columbia River, Washington, USA - GW-RW S1_10_20", - "description": "Riverbed sediment samples were collected from an area with a lot of surrounding vegetation in a hyporheic zone (subsurface groundwater - surface water mixing zone)", - "has_input": [ - "nmdc:bsm-11-fgtanh42" - ], - "has_output": [ - "jgi:574fde697ded5e3df1ee140a" - ], - "part_of": [ - "nmdc:sty-11-aygzgv51" - ], - "add_date": "2016-01-11", - "mod_date": "2021-06-15", - "ncbi_project_name": "Riverbed sediment microbial communities from areas with vegetation nearby in Columbia River, Washington, USA - GW-RW S1_10_20", - "omics_type": { - "has_raw_value": "Metagenome" - }, - "principal_investigator": { - "has_raw_value": "James Stegen" - }, - "processing_institution": "JGI", - "type": "nmdc:OmicsProcessing", - "gold_sequencing_project_identifiers": [ - "gold:Gp0127641" - ], - "downstream_workflow_activity_records": [ + "functional_annotation_agg": [], + "library_preparation_set": [], + "processed_sample_set": [], + "extraction_set": [], + "activity_set": [], + "biosample_set": [], + "data_object_set": [ + { + "name": "Gp0115669_Filtered Reads", + "description": "Filtered Reads for Gp0115669", + "data_object_type": "Filtered Sequencing Reads", + "url": "https://data.microbiomedata.org/data/nmdc:mga0k85x37/qa/nmdc_mga0k85x37_filtered.fastq.gz", + "md5_checksum": "6eef104db92b99c9741b26c667d75cd9", + "id": "nmdc:6eef104db92b99c9741b26c667d75cd9", + "file_size_bytes": 1806935637 + }, { - "_id": { - "$oid": "649b009d6bdd4fd20273c872" - }, - "has_input": [ - "nmdc:c59690f54a7afb65869c9c683e3eef7f" - ], - "part_of": [ - "nmdc:mga0822t33" - ], - "git_url": "https://github.com/microbiomedata/metaG/releases/tag/0.1", - "has_output": [ - "nmdc:a2700afe93abad6f004a3701348622a2", - "nmdc:aaa9a8a3d8e147116953394a8755742d" - ], - "was_informed_by": "gold:Gp0127641", - "input_read_count": 24261468, - "output_read_bases": 3340338011, - "id": "nmdc:363fe7a0dd914e046b274fea70625c52", - "execution_resource": "NERSC-Cori", - "input_read_bases": 3663481668, - "name": "Read QC Activity for nmdc:mga0822t33", - "output_read_count": 22362924, - "started_at_time": "2021-10-11T02:27:18Z", - "type": "nmdc:ReadQCAnalysisActivity", - "ended_at_time": "2021-10-11T04:05:47+00:00", - "output_data_objects": [ - { - "name": "Gp0127641_Filtered Reads", - "description": "Filtered Reads for Gp0127641", - "data_object_type": "Filtered Sequencing Reads", - "url": "https://data.microbiomedata.org/data/nmdc:mga0822t33/qa/nmdc_mga0822t33_filtered.fastq.gz", - "md5_checksum": "a2700afe93abad6f004a3701348622a2", - "id": "nmdc:a2700afe93abad6f004a3701348622a2", - "file_size_bytes": 1787020792 - }, - { - "name": "Gp0127641_Filtered Stats", - "description": "Filtered Stats for Gp0127641", - "data_object_type": "QC Statistics", - "url": "https://data.microbiomedata.org/data/nmdc:mga0822t33/qa/nmdc_mga0822t33_filterStats.txt", - "md5_checksum": "aaa9a8a3d8e147116953394a8755742d", - "id": "nmdc:aaa9a8a3d8e147116953394a8755742d", - "file_size_bytes": 289 - } - ] + "name": "Gp0115669_Filtered Stats", + "description": "Filtered Stats for Gp0115669", + "data_object_type": "QC Statistics", + "url": "https://data.microbiomedata.org/data/nmdc:mga0k85x37/qa/nmdc_mga0k85x37_filterStats.txt", + "md5_checksum": "58fde3e96dbb28af9133bede850a2653", + "id": "nmdc:58fde3e96dbb28af9133bede850a2653", + "file_size_bytes": 286 }, { - "_id": { - "$oid": "649b009bff710ae353f8cf37" - }, - "has_input": [ - "nmdc:a2700afe93abad6f004a3701348622a2" - ], - "git_url": "https://github.com/microbiomedata/metaG/releases/tag/0.1", - "has_output": [ - "nmdc:0d021c80bfd39c8293a8b355b8ff3605", - "nmdc:a42312841b816448d8bd5d3adfa65f58", - "nmdc:f473f4a99336a49105d2722888ae0510", - "nmdc:ae51ea50660f44fa3b317a45f3015556", - "nmdc:ef39b44a90c8525e93f45e500b3ae934", - "nmdc:e2653a4ce3f34c235ad7b01e87dd1016", - "nmdc:869730c4d81163e0c238dd4ae27ebd9e", - "nmdc:dc193d1a1693589003f992c820606bab", - "nmdc:2f36b41c419efa1b1dfb6a9576b965ee" - ], - "was_informed_by": "gold:Gp0127641", - "id": "nmdc:363fe7a0dd914e046b274fea70625c52", - "execution_resource": "NERSC-Cori", - "name": "ReadBased Analysis Activity for nmdc:mga0822t33", - "started_at_time": "2021-10-11T02:27:18Z", - "type": "nmdc:ReadbasedAnalysis", - "ended_at_time": "2021-10-11T04:05:47+00:00", - "output_data_objects": [ - { - "name": "Gp0127641_Gottcha2 TSV report", - "description": "Gottcha2 TSV report for Gp0127641", - "url": "https://data.microbiomedata.org/data/nmdc:mga0822t33/ReadbasedAnalysis/nmdc_mga0822t33_gottcha2_report.tsv", - "md5_checksum": "0d021c80bfd39c8293a8b355b8ff3605", - "id": "nmdc:0d021c80bfd39c8293a8b355b8ff3605", - "file_size_bytes": 3331 - }, - { - "name": "Gp0127641_Gottcha2 full TSV report", - "description": "Gottcha2 full TSV report for Gp0127641", - "url": "https://data.microbiomedata.org/data/nmdc:mga0822t33/ReadbasedAnalysis/nmdc_mga0822t33_gottcha2_report_full.tsv", - "md5_checksum": "a42312841b816448d8bd5d3adfa65f58", - "id": "nmdc:a42312841b816448d8bd5d3adfa65f58", - "file_size_bytes": 761359 - }, - { - "name": "Gp0127641_Gottcha2 Krona HTML report", - "description": "Gottcha2 Krona HTML report for Gp0127641", - "data_object_type": "GOTTCHA2 Krona Plot", - "url": "https://data.microbiomedata.org/data/nmdc:mga0822t33/ReadbasedAnalysis/nmdc_mga0822t33_gottcha2_krona.html", - "md5_checksum": "f473f4a99336a49105d2722888ae0510", - "id": "nmdc:f473f4a99336a49105d2722888ae0510", - "file_size_bytes": 236161 - }, - { - "name": "Gp0127641_Centrifuge classification TSV report", - "description": "Centrifuge classification TSV report for Gp0127641", - "data_object_type": "Centrifuge Taxonomic Classification", - "url": "https://data.microbiomedata.org/data/nmdc:mga0822t33/ReadbasedAnalysis/nmdc_mga0822t33_centrifuge_classification.tsv", - "md5_checksum": "ae51ea50660f44fa3b317a45f3015556", - "id": "nmdc:ae51ea50660f44fa3b317a45f3015556", - "file_size_bytes": 1635953327 - }, - { - "name": "Gp0127641_Centrifuge TSV report", - "description": "Centrifuge TSV report for Gp0127641", - "data_object_type": "Centrifuge Classification Report", - "url": "https://data.microbiomedata.org/data/nmdc:mga0822t33/ReadbasedAnalysis/nmdc_mga0822t33_centrifuge_report.tsv", - "md5_checksum": "ef39b44a90c8525e93f45e500b3ae934", - "id": "nmdc:ef39b44a90c8525e93f45e500b3ae934", - "file_size_bytes": 255166 - }, - { - "name": "Gp0127641_Centrifuge Krona HTML report", - "description": "Centrifuge Krona HTML report for Gp0127641", - "data_object_type": "Centrifuge Krona Plot", - "url": "https://data.microbiomedata.org/data/nmdc:mga0822t33/ReadbasedAnalysis/nmdc_mga0822t33_centrifuge_krona.html", - "md5_checksum": "e2653a4ce3f34c235ad7b01e87dd1016", - "id": "nmdc:e2653a4ce3f34c235ad7b01e87dd1016", - "file_size_bytes": 2332521 - }, - { - "name": "Gp0127641_Kraken classification TSV report", - "description": "Kraken classification TSV report for Gp0127641", - "data_object_type": "Kraken2 Taxonomic Classification", - "url": "https://data.microbiomedata.org/data/nmdc:mga0822t33/ReadbasedAnalysis/nmdc_mga0822t33_kraken2_classification.tsv", - "md5_checksum": "869730c4d81163e0c238dd4ae27ebd9e", - "id": "nmdc:869730c4d81163e0c238dd4ae27ebd9e", - "file_size_bytes": 1307934195 - }, - { - "name": "Gp0127641_Kraken2 TSV report", - "description": "Kraken2 TSV report for Gp0127641", - "data_object_type": "Kraken2 Classification Report", - "url": "https://data.microbiomedata.org/data/nmdc:mga0822t33/ReadbasedAnalysis/nmdc_mga0822t33_kraken2_report.tsv", - "md5_checksum": "dc193d1a1693589003f992c820606bab", - "id": "nmdc:dc193d1a1693589003f992c820606bab", - "file_size_bytes": 635050 - }, - { - "name": "Gp0127641_Kraken2 Krona HTML report", - "description": "Kraken2 Krona HTML report for Gp0127641", - "data_object_type": "Kraken2 Krona Plot", - "url": "https://data.microbiomedata.org/data/nmdc:mga0822t33/ReadbasedAnalysis/nmdc_mga0822t33_kraken2_krona.html", - "md5_checksum": "2f36b41c419efa1b1dfb6a9576b965ee", - "id": "nmdc:2f36b41c419efa1b1dfb6a9576b965ee", - "file_size_bytes": 3964515 - } - ] + "name": "Gp0115669_Gottcha2 TSV report", + "description": "Gottcha2 TSV report for Gp0115669", + "url": "https://data.microbiomedata.org/data/nmdc:mga0k85x37/ReadbasedAnalysis/nmdc_mga0k85x37_gottcha2_report.tsv", + "md5_checksum": "05933784d02331b60b2531e2025cd3b7", + "id": "nmdc:05933784d02331b60b2531e2025cd3b7", + "file_size_bytes": 11362 + }, + { + "name": "Gp0115669_Gottcha2 full TSV report", + "description": "Gottcha2 full TSV report for Gp0115669", + "url": "https://data.microbiomedata.org/data/nmdc:mga0k85x37/ReadbasedAnalysis/nmdc_mga0k85x37_gottcha2_report_full.tsv", + "md5_checksum": "50fc279637cb7048aaaeec9b223d0286", + "id": "nmdc:50fc279637cb7048aaaeec9b223d0286", + "file_size_bytes": 909325 + }, + { + "name": "Gp0115669_Gottcha2 Krona HTML report", + "description": "Gottcha2 Krona HTML report for Gp0115669", + "data_object_type": "GOTTCHA2 Krona Plot", + "url": "https://data.microbiomedata.org/data/nmdc:mga0k85x37/ReadbasedAnalysis/nmdc_mga0k85x37_gottcha2_krona.html", + "md5_checksum": "c3add9c5d34e3ca719096ba3ba9b1c08", + "id": "nmdc:c3add9c5d34e3ca719096ba3ba9b1c08", + "file_size_bytes": 261412 + }, + { + "name": "Gp0115669_Centrifuge classification TSV report", + "description": "Centrifuge classification TSV report for Gp0115669", + "data_object_type": "Centrifuge Taxonomic Classification", + "url": "https://data.microbiomedata.org/data/nmdc:mga0k85x37/ReadbasedAnalysis/nmdc_mga0k85x37_centrifuge_classification.tsv", + "md5_checksum": "2777a04ec7e23aff356bb4f2733e55b7", + "id": "nmdc:2777a04ec7e23aff356bb4f2733e55b7", + "file_size_bytes": 1481087410 + }, + { + "name": "Gp0115669_Centrifuge TSV report", + "description": "Centrifuge TSV report for Gp0115669", + "data_object_type": "Centrifuge Classification Report", + "url": "https://data.microbiomedata.org/data/nmdc:mga0k85x37/ReadbasedAnalysis/nmdc_mga0k85x37_centrifuge_report.tsv", + "md5_checksum": "de45d70cc01749e9b5691dc24674545d", + "id": "nmdc:de45d70cc01749e9b5691dc24674545d", + "file_size_bytes": 256139 + }, + { + "name": "Gp0115669_Centrifuge Krona HTML report", + "description": "Centrifuge Krona HTML report for Gp0115669", + "data_object_type": "Centrifuge Krona Plot", + "url": "https://data.microbiomedata.org/data/nmdc:mga0k85x37/ReadbasedAnalysis/nmdc_mga0k85x37_centrifuge_krona.html", + "md5_checksum": "534f97f3792b74385c4da305196a1b1d", + "id": "nmdc:534f97f3792b74385c4da305196a1b1d", + "file_size_bytes": 2323658 + }, + { + "name": "Gp0115669_Kraken classification TSV report", + "description": "Kraken classification TSV report for Gp0115669", + "data_object_type": "Kraken2 Taxonomic Classification", + "url": "https://data.microbiomedata.org/data/nmdc:mga0k85x37/ReadbasedAnalysis/nmdc_mga0k85x37_kraken2_classification.tsv", + "md5_checksum": "fc3e489df923ec344ac0cce7316f49d6", + "id": "nmdc:fc3e489df923ec344ac0cce7316f49d6", + "file_size_bytes": 1220980345 + }, + { + "name": "Gp0115669_Kraken2 TSV report", + "description": "Kraken2 TSV report for Gp0115669", + "data_object_type": "Kraken2 Classification Report", + "url": "https://data.microbiomedata.org/data/nmdc:mga0k85x37/ReadbasedAnalysis/nmdc_mga0k85x37_kraken2_report.tsv", + "md5_checksum": "07b6457a094fab96563168ed287dc59f", + "id": "nmdc:07b6457a094fab96563168ed287dc59f", + "file_size_bytes": 651795 + }, + { + "name": "Gp0115669_Kraken2 Krona HTML report", + "description": "Kraken2 Krona HTML report for Gp0115669", + "data_object_type": "Kraken2 Krona Plot", + "url": "https://data.microbiomedata.org/data/nmdc:mga0k85x37/ReadbasedAnalysis/nmdc_mga0k85x37_kraken2_krona.html", + "md5_checksum": "164a1bc50e8d6509446ae2877be8231c", + "id": "nmdc:164a1bc50e8d6509446ae2877be8231c", + "file_size_bytes": 3963303 + }, + { + "name": "Gp0115669_Gottcha2 TSV report", + "description": "Gottcha2 TSV report for Gp0115669", + "url": "https://data.microbiomedata.org/data/nmdc:mga0k85x37/ReadbasedAnalysis/nmdc_mga0k85x37_gottcha2_report.tsv", + "md5_checksum": "05933784d02331b60b2531e2025cd3b7", + "id": "nmdc:05933784d02331b60b2531e2025cd3b7", + "file_size_bytes": 11362 + }, + { + "name": "Gp0115669_Gottcha2 full TSV report", + "description": "Gottcha2 full TSV report for Gp0115669", + "url": "https://data.microbiomedata.org/data/nmdc:mga0k85x37/ReadbasedAnalysis/nmdc_mga0k85x37_gottcha2_report_full.tsv", + "md5_checksum": "50fc279637cb7048aaaeec9b223d0286", + "id": "nmdc:50fc279637cb7048aaaeec9b223d0286", + "file_size_bytes": 909325 + }, + { + "name": "Gp0115669_Gottcha2 Krona HTML report", + "description": "Gottcha2 Krona HTML report for Gp0115669", + "data_object_type": "GOTTCHA2 Krona Plot", + "url": "https://data.microbiomedata.org/data/nmdc:mga0k85x37/ReadbasedAnalysis/nmdc_mga0k85x37_gottcha2_krona.html", + "md5_checksum": "c3add9c5d34e3ca719096ba3ba9b1c08", + "id": "nmdc:c3add9c5d34e3ca719096ba3ba9b1c08", + "file_size_bytes": 261412 + }, + { + "name": "Gp0115669_Centrifuge classification TSV report", + "description": "Centrifuge classification TSV report for Gp0115669", + "data_object_type": "Centrifuge Taxonomic Classification", + "url": "https://data.microbiomedata.org/data/nmdc:mga0k85x37/ReadbasedAnalysis/nmdc_mga0k85x37_centrifuge_classification.tsv", + "md5_checksum": "2777a04ec7e23aff356bb4f2733e55b7", + "id": "nmdc:2777a04ec7e23aff356bb4f2733e55b7", + "file_size_bytes": 1481087410 + }, + { + "name": "Gp0115669_Centrifuge TSV report", + "description": "Centrifuge TSV report for Gp0115669", + "data_object_type": "Centrifuge Classification Report", + "url": "https://data.microbiomedata.org/data/nmdc:mga0k85x37/ReadbasedAnalysis/nmdc_mga0k85x37_centrifuge_report.tsv", + "md5_checksum": "de45d70cc01749e9b5691dc24674545d", + "id": "nmdc:de45d70cc01749e9b5691dc24674545d", + "file_size_bytes": 256139 + }, + { + "name": "Gp0115669_Centrifuge Krona HTML report", + "description": "Centrifuge Krona HTML report for Gp0115669", + "data_object_type": "Centrifuge Krona Plot", + "url": "https://data.microbiomedata.org/data/nmdc:mga0k85x37/ReadbasedAnalysis/nmdc_mga0k85x37_centrifuge_krona.html", + "md5_checksum": "534f97f3792b74385c4da305196a1b1d", + "id": "nmdc:534f97f3792b74385c4da305196a1b1d", + "file_size_bytes": 2323658 + }, + { + "name": "Gp0115669_Kraken classification TSV report", + "description": "Kraken classification TSV report for Gp0115669", + "data_object_type": "Kraken2 Taxonomic Classification", + "url": "https://data.microbiomedata.org/data/nmdc:mga0k85x37/ReadbasedAnalysis/nmdc_mga0k85x37_kraken2_classification.tsv", + "md5_checksum": "fc3e489df923ec344ac0cce7316f49d6", + "id": "nmdc:fc3e489df923ec344ac0cce7316f49d6", + "file_size_bytes": 1220980345 + }, + { + "name": "Gp0115669_Kraken2 TSV report", + "description": "Kraken2 TSV report for Gp0115669", + "data_object_type": "Kraken2 Classification Report", + "url": "https://data.microbiomedata.org/data/nmdc:mga0k85x37/ReadbasedAnalysis/nmdc_mga0k85x37_kraken2_report.tsv", + "md5_checksum": "07b6457a094fab96563168ed287dc59f", + "id": "nmdc:07b6457a094fab96563168ed287dc59f", + "file_size_bytes": 651795 + }, + { + "name": "Gp0115669_Kraken2 Krona HTML report", + "description": "Kraken2 Krona HTML report for Gp0115669", + "data_object_type": "Kraken2 Krona Plot", + "url": "https://data.microbiomedata.org/data/nmdc:mga0k85x37/ReadbasedAnalysis/nmdc_mga0k85x37_kraken2_krona.html", + "md5_checksum": "164a1bc50e8d6509446ae2877be8231c", + "id": "nmdc:164a1bc50e8d6509446ae2877be8231c", + "file_size_bytes": 3963303 + }, + { + "name": "Gp0115669_Assembled contigs fasta", + "description": "Assembled contigs fasta for Gp0115669", + "data_object_type": "Assembly Contigs", + "url": "https://data.microbiomedata.org/data/nmdc:mga0k85x37/assembly/nmdc_mga0k85x37_contigs.fna", + "md5_checksum": "03eb095e55df50d639fab237d06c14ac", + "id": "nmdc:03eb095e55df50d639fab237d06c14ac", + "file_size_bytes": 58951440 + }, + { + "name": "Gp0115669_Assembled scaffold fasta", + "description": "Assembled scaffold fasta for Gp0115669", + "data_object_type": "Assembly Scaffolds", + "url": "https://data.microbiomedata.org/data/nmdc:mga0k85x37/assembly/nmdc_mga0k85x37_scaffolds.fna", + "md5_checksum": "569cb5da239e82dce1b40bfa7e2fd518", + "id": "nmdc:569cb5da239e82dce1b40bfa7e2fd518", + "file_size_bytes": 58607757 + }, + { + "name": "Gp0115669_Metagenome Contig Coverage Stats", + "description": "Metagenome Contig Coverage Stats for Gp0115669", + "url": "https://data.microbiomedata.org/data/nmdc:mga0k85x37/assembly/nmdc_mga0k85x37_covstats.txt", + "md5_checksum": "b77ef3014c80797cc88509adf02be002", + "id": "nmdc:b77ef3014c80797cc88509adf02be002", + "file_size_bytes": 8978635 + }, + { + "name": "Gp0115669_Assembled AGP file", + "description": "Assembled AGP file for Gp0115669", + "data_object_type": "Assembly AGP", + "url": "https://data.microbiomedata.org/data/nmdc:mga0k85x37/assembly/nmdc_mga0k85x37_assembly.agp", + "md5_checksum": "62d08517e0ba0f991f2d8bbd66061d78", + "id": "nmdc:62d08517e0ba0f991f2d8bbd66061d78", + "file_size_bytes": 8358006 + }, + { + "name": "Gp0115669_Metagenome Alignment BAM file", + "description": "Metagenome Alignment BAM file for Gp0115669", + "data_object_type": "Assembly Coverage BAM", + "url": "https://data.microbiomedata.org/data/nmdc:mga0k85x37/assembly/nmdc_mga0k85x37_pairedMapped_sorted.bam", + "md5_checksum": "568b82cb6038fec5df04c30cbd874098", + "id": "nmdc:568b82cb6038fec5df04c30cbd874098", + "file_size_bytes": 1940308720 + }, + { + "name": "Gp0115669_Protein FAA", + "description": "Protein FAA for Gp0115669", + "data_object_type": "Annotation Amino Acid FASTA", + "url": "https://data.microbiomedata.org/data/nmdc:mga0k85x37/annotation/nmdc_mga0k85x37_proteins.faa", + "md5_checksum": "8a5f288604c61556ff3e827725864fd1", + "id": "nmdc:8a5f288604c61556ff3e827725864fd1", + "file_size_bytes": 32524652 + }, + { + "name": "Gp0115669_Structural annotation GFF file", + "description": "Structural annotation GFF file for Gp0115669", + "data_object_type": "Structural Annotation GFF", + "url": "https://data.microbiomedata.org/data/nmdc:mga0k85x37/annotation/nmdc_mga0k85x37_structural_annotation.gff", + "md5_checksum": "0180998d6f3a3021638f04d9c0b35019", + "id": "nmdc:0180998d6f3a3021638f04d9c0b35019", + "file_size_bytes": 2514 + }, + { + "name": "Gp0115669_Functional annotation GFF file", + "description": "Functional annotation GFF file for Gp0115669", + "data_object_type": "Functional Annotation GFF", + "url": "https://data.microbiomedata.org/data/nmdc:mga0k85x37/annotation/nmdc_mga0k85x37_functional_annotation.gff", + "md5_checksum": "950b8c4ebd1da50e2ca079273540f3af", + "id": "nmdc:950b8c4ebd1da50e2ca079273540f3af", + "file_size_bytes": 36685287 + }, + { + "name": "Gp0115669_KO TSV file", + "description": "KO TSV file for Gp0115669", + "data_object_type": "Annotation KEGG Orthology", + "url": "https://data.microbiomedata.org/data/nmdc:mga0k85x37/annotation/nmdc_mga0k85x37_ko.tsv", + "md5_checksum": "96ec49c6124cf4f8f3e7da3525348477", + "id": "nmdc:96ec49c6124cf4f8f3e7da3525348477", + "file_size_bytes": 4815732 + }, + { + "name": "Gp0115669_EC TSV file", + "description": "EC TSV file for Gp0115669", + "data_object_type": "Annotation Enzyme Commission", + "url": "https://data.microbiomedata.org/data/nmdc:mga0k85x37/annotation/nmdc_mga0k85x37_ec.tsv", + "md5_checksum": "12ca374a58bf899e42ed2c191a239e71", + "id": "nmdc:12ca374a58bf899e42ed2c191a239e71", + "file_size_bytes": 3090911 + }, + { + "name": "Gp0115669_COG GFF file", + "description": "COG GFF file for Gp0115669", + "url": "https://data.microbiomedata.org/data/nmdc:mga0k85x37/annotation/nmdc_mga0k85x37_cog.gff", + "md5_checksum": "b8ae2993aa29c8e04c00580dfdb82650", + "id": "nmdc:b8ae2993aa29c8e04c00580dfdb82650", + "file_size_bytes": 20357759 + }, + { + "name": "Gp0115669_PFAM GFF file", + "description": "PFAM GFF file for Gp0115669", + "url": "https://data.microbiomedata.org/data/nmdc:mga0k85x37/annotation/nmdc_mga0k85x37_pfam.gff", + "md5_checksum": "7901c83b5a41e54854c96ab0b081ebd6", + "id": "nmdc:7901c83b5a41e54854c96ab0b081ebd6", + "file_size_bytes": 15876941 + }, + { + "name": "Gp0115669_TigrFam GFF file", + "description": "TigrFam GFF file for Gp0115669", + "url": "https://data.microbiomedata.org/data/nmdc:mga0k85x37/annotation/nmdc_mga0k85x37_tigrfam.gff", + "md5_checksum": "762fe35b733dd82f89f5dce44fa54ed1", + "id": "nmdc:762fe35b733dd82f89f5dce44fa54ed1", + "file_size_bytes": 2104873 + }, + { + "name": "Gp0115669_SMART GFF file", + "description": "SMART GFF file for Gp0115669", + "url": "https://data.microbiomedata.org/data/nmdc:mga0k85x37/annotation/nmdc_mga0k85x37_smart.gff", + "md5_checksum": "661b70d6f41a44fcc1913b101f79d86a", + "id": "nmdc:661b70d6f41a44fcc1913b101f79d86a", + "file_size_bytes": 4523437 + }, + { + "name": "Gp0115669_SuperFam GFF file", + "description": "SuperFam GFF file for Gp0115669", + "url": "https://data.microbiomedata.org/data/nmdc:mga0k85x37/annotation/nmdc_mga0k85x37_supfam.gff", + "md5_checksum": "e1843a865023d75edd3139c14b8c355e", + "id": "nmdc:e1843a865023d75edd3139c14b8c355e", + "file_size_bytes": 25872277 + }, + { + "name": "Gp0115669_Cath FunFam GFF file", + "description": "Cath FunFam GFF file for Gp0115669", + "url": "https://data.microbiomedata.org/data/nmdc:mga0k85x37/annotation/nmdc_mga0k85x37_cath_funfam.gff", + "md5_checksum": "a21449989b0b0884901602528b3f423e", + "id": "nmdc:a21449989b0b0884901602528b3f423e", + "file_size_bytes": 20254021 + }, + { + "name": "Gp0115669_KO_EC GFF file", + "description": "KO_EC GFF file for Gp0115669", + "url": "https://data.microbiomedata.org/data/nmdc:mga0k85x37/annotation/nmdc_mga0k85x37_ko_ec.gff", + "md5_checksum": "7f52547663f4eeea33de1e437012981e", + "id": "nmdc:7f52547663f4eeea33de1e437012981e", + "file_size_bytes": 15397038 + }, + { + "name": "gold:Gp0452679_metabat2 bin checkm quality assessment result", + "description": "metabat2 bin checkm quality assessment result for gold:Gp0452679", + "data_object_type": "CheckM Statistics", + "url": "https://data.microbiomedata.org/data/nmdc:mga0fsjy84/MAGs/nmdc_mga0fsjy84_checkm_qa.out", + "md5_checksum": "d41d8cd98f00b204e9800998ecf8427e", + "id": "nmdc:d41d8cd98f00b204e9800998ecf8427e", + "file_size_bytes": 0 }, + { + "name": "Gp0115669_tooShort (< 3kb) filtered contigs fasta file by metaBat2", + "description": "tooShort (< 3kb) filtered contigs fasta file by metaBat2 for Gp0115669", + "url": "https://data.microbiomedata.org/data/nmdc:mga0k85x37/MAGs/nmdc_mga0k85x37_bins.tooShort.fa", + "md5_checksum": "420b015f88d0b88ab582805f39ed2b47", + "id": "nmdc:420b015f88d0b88ab582805f39ed2b47", + "file_size_bytes": 44979790 + }, + { + "name": "Gp0115669_unbinned fasta file from metabat2", + "description": "unbinned fasta file from metabat2 for Gp0115669", + "url": "https://data.microbiomedata.org/data/nmdc:mga0k85x37/MAGs/nmdc_mga0k85x37_bins.unbinned.fa", + "md5_checksum": "ee8a556be3a57008c1c05ff9fe83437e", + "id": "nmdc:ee8a556be3a57008c1c05ff9fe83437e", + "file_size_bytes": 10530111 + }, + { + "name": "Gp0115669_metabat2 bin checkm quality assessment result", + "description": "metabat2 bin checkm quality assessment result for Gp0115669", + "data_object_type": "CheckM Statistics", + "url": "https://data.microbiomedata.org/data/nmdc:mga0k85x37/MAGs/nmdc_mga0k85x37_checkm_qa.out", + "md5_checksum": "6fd5dfbd1500a60620194b5b9a4aab8a", + "id": "nmdc:6fd5dfbd1500a60620194b5b9a4aab8a", + "file_size_bytes": 1190 + }, + { + "name": "Gp0115669_high-quality and medium-quality bins", + "description": "high-quality and medium-quality bins for Gp0115669", + "data_object_type": "Metagenome Bins", + "url": "https://data.microbiomedata.org/data/nmdc:mga0k85x37/MAGs/nmdc_mga0k85x37_hqmq_bin.zip", + "md5_checksum": "6a7eb248822ec0994ddeffe8b5aae7b1", + "id": "nmdc:6a7eb248822ec0994ddeffe8b5aae7b1", + "file_size_bytes": 681479 + }, + { + "name": "Gp0115669_metabat2 bins", + "description": "metabat2 bins for Gp0115669", + "url": "https://data.microbiomedata.org/data/nmdc:mga0k85x37/MAGs/nmdc_mga0k85x37_metabat_bin.zip", + "md5_checksum": "6a80769f6812a45615890cc2b03e9abf", + "id": "nmdc:6a80769f6812a45615890cc2b03e9abf", + "file_size_bytes": 359752 + } + ], + "dissolving_activity_set": [], + "functional_annotation_set": [], + "genome_feature_set": [], + "mags_activity_set": [ { "_id": { - "$oid": "61e7199c833bcf838a700e42" + "$oid": "649b0052ec087f6bbab34735" }, "has_input": [ - "nmdc:a2700afe93abad6f004a3701348622a2" + "nmdc:03eb095e55df50d639fab237d06c14ac", + "nmdc:568b82cb6038fec5df04c30cbd874098", + "nmdc:950b8c4ebd1da50e2ca079273540f3af" ], + "too_short_contig_num": 107191, "part_of": [ - "nmdc:mga0822t33" + "nmdc:mga0k85x37" ], + "binned_contig_num": 651, "git_url": "https://github.com/microbiomedata/metaG/releases/tag/0.1", "has_output": [ - "nmdc:0d021c80bfd39c8293a8b355b8ff3605", - "nmdc:a42312841b816448d8bd5d3adfa65f58", - "nmdc:f473f4a99336a49105d2722888ae0510", - "nmdc:ae51ea50660f44fa3b317a45f3015556", - "nmdc:ef39b44a90c8525e93f45e500b3ae934", - "nmdc:e2653a4ce3f34c235ad7b01e87dd1016", - "nmdc:869730c4d81163e0c238dd4ae27ebd9e", - "nmdc:dc193d1a1693589003f992c820606bab", - "nmdc:2f36b41c419efa1b1dfb6a9576b965ee" + "nmdc:d41d8cd98f00b204e9800998ecf8427e", + "nmdc:420b015f88d0b88ab582805f39ed2b47", + "nmdc:ee8a556be3a57008c1c05ff9fe83437e", + "nmdc:6fd5dfbd1500a60620194b5b9a4aab8a", + "nmdc:6a7eb248822ec0994ddeffe8b5aae7b1", + "nmdc:6a80769f6812a45615890cc2b03e9abf" ], - "was_informed_by": "gold:Gp0127641", - "id": "nmdc:363fe7a0dd914e046b274fea70625c52", + "was_informed_by": "gold:Gp0115669", + "input_contig_num": 114113, + "id": "nmdc:ed7745adccd65c2dd20dfa24c2922db2", "execution_resource": "NERSC-Cori", - "name": "ReadBased Analysis Activity for nmdc:mga0822t33", - "started_at_time": "2021-10-11T02:27:18Z", - "type": "nmdc:ReadbasedAnalysis", - "ended_at_time": "2021-10-11T04:05:47+00:00", - "output_data_objects": [ - { - "name": "Gp0127641_Gottcha2 TSV report", - "description": "Gottcha2 TSV report for Gp0127641", - "url": "https://data.microbiomedata.org/data/nmdc:mga0822t33/ReadbasedAnalysis/nmdc_mga0822t33_gottcha2_report.tsv", - "md5_checksum": "0d021c80bfd39c8293a8b355b8ff3605", - "id": "nmdc:0d021c80bfd39c8293a8b355b8ff3605", - "file_size_bytes": 3331 - }, - { - "name": "Gp0127641_Gottcha2 full TSV report", - "description": "Gottcha2 full TSV report for Gp0127641", - "url": "https://data.microbiomedata.org/data/nmdc:mga0822t33/ReadbasedAnalysis/nmdc_mga0822t33_gottcha2_report_full.tsv", - "md5_checksum": "a42312841b816448d8bd5d3adfa65f58", - "id": "nmdc:a42312841b816448d8bd5d3adfa65f58", - "file_size_bytes": 761359 - }, - { - "name": "Gp0127641_Gottcha2 Krona HTML report", - "description": "Gottcha2 Krona HTML report for Gp0127641", - "data_object_type": "GOTTCHA2 Krona Plot", - "url": "https://data.microbiomedata.org/data/nmdc:mga0822t33/ReadbasedAnalysis/nmdc_mga0822t33_gottcha2_krona.html", - "md5_checksum": "f473f4a99336a49105d2722888ae0510", - "id": "nmdc:f473f4a99336a49105d2722888ae0510", - "file_size_bytes": 236161 - }, - { - "name": "Gp0127641_Centrifuge classification TSV report", - "description": "Centrifuge classification TSV report for Gp0127641", - "data_object_type": "Centrifuge Taxonomic Classification", - "url": "https://data.microbiomedata.org/data/nmdc:mga0822t33/ReadbasedAnalysis/nmdc_mga0822t33_centrifuge_classification.tsv", - "md5_checksum": "ae51ea50660f44fa3b317a45f3015556", - "id": "nmdc:ae51ea50660f44fa3b317a45f3015556", - "file_size_bytes": 1635953327 - }, - { - "name": "Gp0127641_Centrifuge TSV report", - "description": "Centrifuge TSV report for Gp0127641", - "data_object_type": "Centrifuge Classification Report", - "url": "https://data.microbiomedata.org/data/nmdc:mga0822t33/ReadbasedAnalysis/nmdc_mga0822t33_centrifuge_report.tsv", - "md5_checksum": "ef39b44a90c8525e93f45e500b3ae934", - "id": "nmdc:ef39b44a90c8525e93f45e500b3ae934", - "file_size_bytes": 255166 - }, - { - "name": "Gp0127641_Centrifuge Krona HTML report", - "description": "Centrifuge Krona HTML report for Gp0127641", - "data_object_type": "Centrifuge Krona Plot", - "url": "https://data.microbiomedata.org/data/nmdc:mga0822t33/ReadbasedAnalysis/nmdc_mga0822t33_centrifuge_krona.html", - "md5_checksum": "e2653a4ce3f34c235ad7b01e87dd1016", - "id": "nmdc:e2653a4ce3f34c235ad7b01e87dd1016", - "file_size_bytes": 2332521 - }, + "name": "MAGs Analysis Activity for nmdc:mga0k85x37", + "mags_list": [ { - "name": "Gp0127641_Kraken classification TSV report", - "description": "Kraken classification TSV report for Gp0127641", - "data_object_type": "Kraken2 Taxonomic Classification", - "url": "https://data.microbiomedata.org/data/nmdc:mga0822t33/ReadbasedAnalysis/nmdc_mga0822t33_kraken2_classification.tsv", - "md5_checksum": "869730c4d81163e0c238dd4ae27ebd9e", - "id": "nmdc:869730c4d81163e0c238dd4ae27ebd9e", - "file_size_bytes": 1307934195 + "number_of_contig": 48, + "completeness": 13.04, + "bin_name": "bins.1", + "gene_count": 245, + "bin_quality": "LQ", + "gtdbtk_species": "", + "gtdbtk_order": "", + "num_16s": 0, + "gtdbtk_family": "", + "gtdbtk_domain": "", + "contamination": 0.0, + "gtdbtk_class": "", + "gtdbtk_phylum": "", + "num_5s": 0, + "num_23s": 0, + "gtdbtk_genus": "", + "num_t_rna": 5 }, { - "name": "Gp0127641_Kraken2 TSV report", - "description": "Kraken2 TSV report for Gp0127641", - "data_object_type": "Kraken2 Classification Report", - "url": "https://data.microbiomedata.org/data/nmdc:mga0822t33/ReadbasedAnalysis/nmdc_mga0822t33_kraken2_report.tsv", - "md5_checksum": "dc193d1a1693589003f992c820606bab", - "id": "nmdc:dc193d1a1693589003f992c820606bab", - "file_size_bytes": 635050 + "number_of_contig": 379, + "completeness": 72.42, + "bin_name": "bins.2", + "gene_count": 2513, + "bin_quality": "MQ", + "gtdbtk_species": "", + "gtdbtk_order": "Sphingomonadales", + "num_16s": 0, + "gtdbtk_family": "Sphingomonadaceae", + "gtdbtk_domain": "Bacteria", + "contamination": 1.85, + "gtdbtk_class": "Alphaproteobacteria", + "gtdbtk_phylum": "Proteobacteria", + "num_5s": 0, + "num_23s": 0, + "gtdbtk_genus": "Novosphingobium", + "num_t_rna": 32 }, { - "name": "Gp0127641_Kraken2 Krona HTML report", - "description": "Kraken2 Krona HTML report for Gp0127641", - "data_object_type": "Kraken2 Krona Plot", - "url": "https://data.microbiomedata.org/data/nmdc:mga0822t33/ReadbasedAnalysis/nmdc_mga0822t33_kraken2_krona.html", - "md5_checksum": "2f36b41c419efa1b1dfb6a9576b965ee", - "id": "nmdc:2f36b41c419efa1b1dfb6a9576b965ee", - "file_size_bytes": 3964515 + "number_of_contig": 224, + "completeness": 29.36, + "bin_name": "bins.3", + "gene_count": 1148, + "bin_quality": "LQ", + "gtdbtk_species": "", + "gtdbtk_order": "", + "num_16s": 0, + "gtdbtk_family": "", + "gtdbtk_domain": "", + "contamination": 0.43, + "gtdbtk_class": "", + "gtdbtk_phylum": "", + "num_5s": 0, + "num_23s": 0, + "gtdbtk_genus": "", + "num_t_rna": 13 } - ] - }, + ], + "unbinned_contig_num": 6271, + "started_at_time": "2021-10-11T02:28:43Z", + "type": "nmdc:MAGsAnalysisActivity", + "ended_at_time": "2021-10-11T04:20:07+00:00" + } + ], + "material_sample_set": [], + "material_sampling_activity_set": [], + "metabolomics_analysis_activity_set": [], + "metagenome_annotation_activity_set": [ { "_id": { - "$oid": "649b005f2ca5ee4adb139fa2" + "$oid": "649b005bbf2caae0415ef9d0" }, "has_input": [ - "nmdc:a2700afe93abad6f004a3701348622a2" + "nmdc:03eb095e55df50d639fab237d06c14ac" ], "part_of": [ - "nmdc:mga0822t33" + "nmdc:mga0k85x37" ], - "ctg_logsum": 224925, - "scaf_logsum": 225846, - "gap_pct": 0.00137, "git_url": "https://github.com/microbiomedata/metaG/releases/tag/0.1", "has_output": [ - "nmdc:18f0d53f503c855c0093677df58366e0", - "nmdc:2fe3e02d47d8e1d66ccb15c0e42bf1e0", - "nmdc:04ad2128f72c26a4fa2d0ee7b1709ee9", - "nmdc:b89858508c524a03011cd5191f7589fa", - "nmdc:6974d394df454501e0515b31a2415367" + "nmdc:8a5f288604c61556ff3e827725864fd1", + "nmdc:0180998d6f3a3021638f04d9c0b35019", + "nmdc:950b8c4ebd1da50e2ca079273540f3af", + "nmdc:96ec49c6124cf4f8f3e7da3525348477", + "nmdc:12ca374a58bf899e42ed2c191a239e71", + "nmdc:b8ae2993aa29c8e04c00580dfdb82650", + "nmdc:7901c83b5a41e54854c96ab0b081ebd6", + "nmdc:762fe35b733dd82f89f5dce44fa54ed1", + "nmdc:661b70d6f41a44fcc1913b101f79d86a", + "nmdc:e1843a865023d75edd3139c14b8c355e", + "nmdc:a21449989b0b0884901602528b3f423e", + "nmdc:7f52547663f4eeea33de1e437012981e" ], - "asm_score": 3.367, - "was_informed_by": "gold:Gp0127641", - "ctg_powsum": 24264, - "scaf_max": 18020, - "id": "nmdc:363fe7a0dd914e046b274fea70625c52", - "scaf_powsum": 24365, + "was_informed_by": "gold:Gp0115669", + "id": "nmdc:ed7745adccd65c2dd20dfa24c2922db2", "execution_resource": "NERSC-Cori", - "contigs": 191907, - "name": "Assembly Activity for nmdc:mga0822t33", - "ctg_max": 18020, - "gc_std": 0.10192, - "contig_bp": 94878155, - "gc_avg": 0.61857, - "started_at_time": "2021-10-11T02:27:18Z", - "scaf_bp": 94879455, - "type": "nmdc:MetagenomeAssembly", - "scaffolds": 191777, - "ended_at_time": "2021-10-11T04:05:47+00:00", - "ctg_l50": 489, - "ctg_l90": 290, - "ctg_n50": 53038, - "ctg_n90": 159679, - "scaf_l50": 489, - "scaf_l90": 290, - "scaf_n50": 53021, - "scaf_n90": 159560, - "output_data_objects": [ - { - "name": "Gp0127641_Assembled contigs fasta", - "description": "Assembled contigs fasta for Gp0127641", - "data_object_type": "Assembly Contigs", - "url": "https://data.microbiomedata.org/data/nmdc:mga0822t33/assembly/nmdc_mga0822t33_contigs.fna", - "md5_checksum": "18f0d53f503c855c0093677df58366e0", - "id": "nmdc:18f0d53f503c855c0093677df58366e0", - "file_size_bytes": 102384540 - }, - { - "name": "Gp0127641_Assembled scaffold fasta", - "description": "Assembled scaffold fasta for Gp0127641", - "data_object_type": "Assembly Scaffolds", - "url": "https://data.microbiomedata.org/data/nmdc:mga0822t33/assembly/nmdc_mga0822t33_scaffolds.fna", - "md5_checksum": "2fe3e02d47d8e1d66ccb15c0e42bf1e0", - "id": "nmdc:2fe3e02d47d8e1d66ccb15c0e42bf1e0", - "file_size_bytes": 101806869 - }, - { - "name": "Gp0127641_Metagenome Contig Coverage Stats", - "description": "Metagenome Contig Coverage Stats for Gp0127641", - "url": "https://data.microbiomedata.org/data/nmdc:mga0822t33/assembly/nmdc_mga0822t33_covstats.txt", - "md5_checksum": "04ad2128f72c26a4fa2d0ee7b1709ee9", - "id": "nmdc:04ad2128f72c26a4fa2d0ee7b1709ee9", - "file_size_bytes": 15204446 - }, - { - "name": "Gp0127641_Assembled AGP file", - "description": "Assembled AGP file for Gp0127641", - "data_object_type": "Assembly AGP", - "url": "https://data.microbiomedata.org/data/nmdc:mga0822t33/assembly/nmdc_mga0822t33_assembly.agp", - "md5_checksum": "b89858508c524a03011cd5191f7589fa", - "id": "nmdc:b89858508c524a03011cd5191f7589fa", - "file_size_bytes": 14206204 - }, - { - "name": "Gp0127641_Metagenome Alignment BAM file", - "description": "Metagenome Alignment BAM file for Gp0127641", - "data_object_type": "Assembly Coverage BAM", - "url": "https://data.microbiomedata.org/data/nmdc:mga0822t33/assembly/nmdc_mga0822t33_pairedMapped_sorted.bam", - "md5_checksum": "6974d394df454501e0515b31a2415367", - "id": "nmdc:6974d394df454501e0515b31a2415367", - "file_size_bytes": 1967753614 - } - ] - }, + "name": "Annotation Activity for nmdc:mga0k85x37", + "started_at_time": "2021-10-11T02:28:43Z", + "type": "nmdc:MetagenomeAnnotationActivity", + "ended_at_time": "2021-10-11T04:20:07+00:00" + } + ], + "metagenome_assembly_set": [ { "_id": { - "$oid": "649b005bbf2caae0415ef9b9" + "$oid": "649b005f2ca5ee4adb139fba" }, "has_input": [ - "nmdc:18f0d53f503c855c0093677df58366e0" + "nmdc:6eef104db92b99c9741b26c667d75cd9" ], "part_of": [ - "nmdc:mga0822t33" + "nmdc:mga0k85x37" ], + "ctg_logsum": 151663, + "scaf_logsum": 152336, + "gap_pct": 0.00222, "git_url": "https://github.com/microbiomedata/metaG/releases/tag/0.1", "has_output": [ - "nmdc:f33a2a1789f5e913c3ef0dd0440a4877", - "nmdc:9aba4a0c78cb073609b129c4bb65fe2d", - "nmdc:2477ce1de68bdb1322eec1ffad5c74ac", - "nmdc:65768fea44cbd0183b286ab8f9883394", - "nmdc:b8ac75e77d2bc2607877e33ab692c43b", - "nmdc:31018e605b1569eb64006f2108b9d7d4", - "nmdc:c7ee9f693971a7686d8ff701fddbcb4a", - "nmdc:5c0d5f63853ca572d8d73cac9a36c8d7", - "nmdc:058c5e17eeeea69b2bf0b1b3c2838aea", - "nmdc:b836f94d526c1936d080a4aa7c0646c9", - "nmdc:0100d09c52d0c243b5ae45d95e6a22dc", - "nmdc:64b87140003d1a5a3d9ac939be55e57d" + "nmdc:03eb095e55df50d639fab237d06c14ac", + "nmdc:569cb5da239e82dce1b40bfa7e2fd518", + "nmdc:b77ef3014c80797cc88509adf02be002", + "nmdc:62d08517e0ba0f991f2d8bbd66061d78", + "nmdc:568b82cb6038fec5df04c30cbd874098" ], - "was_informed_by": "gold:Gp0127641", - "id": "nmdc:363fe7a0dd914e046b274fea70625c52", + "asm_score": 4.733, + "was_informed_by": "gold:Gp0115669", + "ctg_powsum": 17017, + "scaf_max": 20100, + "id": "nmdc:ed7745adccd65c2dd20dfa24c2922db2", + "scaf_powsum": 17101, "execution_resource": "NERSC-Cori", - "name": "Annotation Activity for nmdc:mga0822t33", - "started_at_time": "2021-10-11T02:27:18Z", - "type": "nmdc:MetagenomeAnnotationActivity", - "ended_at_time": "2021-10-11T04:05:47+00:00", - "output_data_objects": [ - { - "name": "Gp0127641_Protein FAA", - "description": "Protein FAA for Gp0127641", - "data_object_type": "Annotation Amino Acid FASTA", - "url": "https://data.microbiomedata.org/data/nmdc:mga0822t33/annotation/nmdc_mga0822t33_proteins.faa", - "md5_checksum": "f33a2a1789f5e913c3ef0dd0440a4877", - "id": "nmdc:f33a2a1789f5e913c3ef0dd0440a4877", - "file_size_bytes": 57768168 - }, - { - "name": "Gp0127641_Structural annotation GFF file", - "description": "Structural annotation GFF file for Gp0127641", - "data_object_type": "Structural Annotation GFF", - "url": "https://data.microbiomedata.org/data/nmdc:mga0822t33/annotation/nmdc_mga0822t33_structural_annotation.gff", - "md5_checksum": "9aba4a0c78cb073609b129c4bb65fe2d", - "id": "nmdc:9aba4a0c78cb073609b129c4bb65fe2d", - "file_size_bytes": 2522 - }, - { - "name": "Gp0127641_Functional annotation GFF file", - "description": "Functional annotation GFF file for Gp0127641", - "data_object_type": "Functional Annotation GFF", - "url": "https://data.microbiomedata.org/data/nmdc:mga0822t33/annotation/nmdc_mga0822t33_functional_annotation.gff", - "md5_checksum": "2477ce1de68bdb1322eec1ffad5c74ac", - "id": "nmdc:2477ce1de68bdb1322eec1ffad5c74ac", - "file_size_bytes": 65167139 - }, - { - "name": "Gp0127641_KO TSV file", - "description": "KO TSV file for Gp0127641", - "data_object_type": "Annotation KEGG Orthology", - "url": "https://data.microbiomedata.org/data/nmdc:mga0822t33/annotation/nmdc_mga0822t33_ko.tsv", - "md5_checksum": "65768fea44cbd0183b286ab8f9883394", - "id": "nmdc:65768fea44cbd0183b286ab8f9883394", - "file_size_bytes": 7266122 - }, - { - "name": "Gp0127641_EC TSV file", - "description": "EC TSV file for Gp0127641", - "data_object_type": "Annotation Enzyme Commission", - "url": "https://data.microbiomedata.org/data/nmdc:mga0822t33/annotation/nmdc_mga0822t33_ec.tsv", - "md5_checksum": "b8ac75e77d2bc2607877e33ab692c43b", - "id": "nmdc:b8ac75e77d2bc2607877e33ab692c43b", - "file_size_bytes": 4793386 - }, - { - "name": "Gp0127641_COG GFF file", - "description": "COG GFF file for Gp0127641", - "url": "https://data.microbiomedata.org/data/nmdc:mga0822t33/annotation/nmdc_mga0822t33_cog.gff", - "md5_checksum": "31018e605b1569eb64006f2108b9d7d4", - "id": "nmdc:31018e605b1569eb64006f2108b9d7d4", - "file_size_bytes": 38184948 - }, - { - "name": "Gp0127641_PFAM GFF file", - "description": "PFAM GFF file for Gp0127641", - "url": "https://data.microbiomedata.org/data/nmdc:mga0822t33/annotation/nmdc_mga0822t33_pfam.gff", - "md5_checksum": "c7ee9f693971a7686d8ff701fddbcb4a", - "id": "nmdc:c7ee9f693971a7686d8ff701fddbcb4a", - "file_size_bytes": 28867184 - }, - { - "name": "Gp0127641_TigrFam GFF file", - "description": "TigrFam GFF file for Gp0127641", - "url": "https://data.microbiomedata.org/data/nmdc:mga0822t33/annotation/nmdc_mga0822t33_tigrfam.gff", - "md5_checksum": "5c0d5f63853ca572d8d73cac9a36c8d7", - "id": "nmdc:5c0d5f63853ca572d8d73cac9a36c8d7", - "file_size_bytes": 3122581 - }, - { - "name": "Gp0127641_SMART GFF file", - "description": "SMART GFF file for Gp0127641", - "url": "https://data.microbiomedata.org/data/nmdc:mga0822t33/annotation/nmdc_mga0822t33_smart.gff", - "md5_checksum": "058c5e17eeeea69b2bf0b1b3c2838aea", - "id": "nmdc:058c5e17eeeea69b2bf0b1b3c2838aea", - "file_size_bytes": 8368877 - }, - { - "name": "Gp0127641_SuperFam GFF file", - "description": "SuperFam GFF file for Gp0127641", - "url": "https://data.microbiomedata.org/data/nmdc:mga0822t33/annotation/nmdc_mga0822t33_supfam.gff", - "md5_checksum": "b836f94d526c1936d080a4aa7c0646c9", - "id": "nmdc:b836f94d526c1936d080a4aa7c0646c9", - "file_size_bytes": 47986944 - }, - { - "name": "Gp0127641_Cath FunFam GFF file", - "description": "Cath FunFam GFF file for Gp0127641", - "url": "https://data.microbiomedata.org/data/nmdc:mga0822t33/annotation/nmdc_mga0822t33_cath_funfam.gff", - "md5_checksum": "0100d09c52d0c243b5ae45d95e6a22dc", - "id": "nmdc:0100d09c52d0c243b5ae45d95e6a22dc", - "file_size_bytes": 36349993 - }, - { - "name": "Gp0127641_KO_EC GFF file", - "description": "KO_EC GFF file for Gp0127641", - "url": "https://data.microbiomedata.org/data/nmdc:mga0822t33/annotation/nmdc_mga0822t33_ko_ec.gff", - "md5_checksum": "64b87140003d1a5a3d9ac939be55e57d", - "id": "nmdc:64b87140003d1a5a3d9ac939be55e57d", - "file_size_bytes": 23113010 - } - ] - }, + "contigs": 114114, + "name": "Assembly Activity for nmdc:mga0k85x37", + "ctg_max": 20100, + "gc_std": 0.11871, + "contig_bp": 54567489, + "gc_avg": 0.55923, + "started_at_time": "2021-10-11T02:28:43Z", + "scaf_bp": 54568699, + "type": "nmdc:MetagenomeAssembly", + "scaffolds": 114011, + "ended_at_time": "2021-10-11T04:20:07+00:00", + "ctg_l50": 451, + "ctg_l90": 285, + "ctg_n50": 29019, + "ctg_n90": 94816, + "scaf_l50": 451, + "scaf_l90": 285, + "scaf_n50": 28976, + "scaf_n90": 94720 + } + ], + "metagenome_sequencing_activity_set": [], + "metaproteomics_analysis_activity_set": [], + "metatranscriptome_activity_set": [], + "nom_analysis_activity_set": [], + "omics_processing_set": [ { "_id": { - "$oid": "649b0052ec087f6bbab34719" + "$oid": "649b009773e8249959349b52" }, + "id": "nmdc:omprc-11-hqmmwn16", + "name": "Sand microcosm microbial communities from a hyporheic zone in Columbia River, Washington, USA - GW-RW T4_1-July-14", + "description": "Sterilized sand packs were incubated back in the ground and collected at time point T4.", "has_input": [ - "nmdc:18f0d53f503c855c0093677df58366e0", - "nmdc:6974d394df454501e0515b31a2415367", - "nmdc:2477ce1de68bdb1322eec1ffad5c74ac" - ], - "too_short_contig_num": 179152, - "part_of": [ - "nmdc:mga0822t33" + "nmdc:bsm-11-47nxfg85" ], - "binned_contig_num": 464, - "git_url": "https://github.com/microbiomedata/metaG/releases/tag/0.1", "has_output": [ - "nmdc:d41d8cd98f00b204e9800998ecf8427e", - "nmdc:024b6771e169aeaf57a3b10acc6045a1", - "nmdc:545cd253ad26116236dec9937b32d8ef", - "nmdc:1785cfe7cf0546dc8702193921a2f566", - "nmdc:0a2a5650358b51ffcd3bbcfc874ac5c9", - "nmdc:8f6b89831cabcd1dc7aa5e26d87f5063" + "jgi:55d817fe0d8785342fcf8276" ], - "was_informed_by": "gold:Gp0127641", - "input_contig_num": 191906, - "id": "nmdc:363fe7a0dd914e046b274fea70625c52", - "execution_resource": "NERSC-Cori", - "name": "MAGs Analysis Activity for nmdc:mga0822t33", - "mags_list": [ - { - "number_of_contig": 142, - "completeness": 24.43, - "bin_name": "bins.1", - "gene_count": 832, - "bin_quality": "LQ", - "gtdbtk_species": "", - "gtdbtk_order": "", - "num_16s": 1, - "gtdbtk_family": "", - "gtdbtk_domain": "", - "contamination": 0.0, - "gtdbtk_class": "", - "gtdbtk_phylum": "", - "num_5s": 0, - "num_23s": 0, - "gtdbtk_genus": "", - "num_t_rna": 15 - }, - { - "number_of_contig": 322, - "completeness": 46.21, - "bin_name": "bins.2", - "gene_count": 1652, - "bin_quality": "LQ", - "gtdbtk_species": "", - "gtdbtk_order": "", - "num_16s": 0, - "gtdbtk_family": "", - "gtdbtk_domain": "", - "contamination": 0.0, - "gtdbtk_class": "", - "gtdbtk_phylum": "", - "num_5s": 1, - "num_23s": 1, - "gtdbtk_genus": "", - "num_t_rna": 21 - } + "part_of": [ + "nmdc:sty-11-aygzgv51" ], - "unbinned_contig_num": 12290, - "started_at_time": "2021-10-11T02:27:18Z", - "type": "nmdc:MAGsAnalysisActivity", - "ended_at_time": "2021-10-11T04:05:47+00:00", - "output_data_objects": [ - { - "name": "gold:Gp0452679_metabat2 bin checkm quality assessment result", - "description": "metabat2 bin checkm quality assessment result for gold:Gp0452679", - "data_object_type": "CheckM Statistics", - "url": "https://data.microbiomedata.org/data/nmdc:mga0fsjy84/MAGs/nmdc_mga0fsjy84_checkm_qa.out", - "md5_checksum": "d41d8cd98f00b204e9800998ecf8427e", - "id": "nmdc:d41d8cd98f00b204e9800998ecf8427e", - "file_size_bytes": 0 - }, - { - "name": "Gp0127641_tooShort (< 3kb) filtered contigs fasta file by metaBat2", - "description": "tooShort (< 3kb) filtered contigs fasta file by metaBat2 for Gp0127641", - "url": "https://data.microbiomedata.org/data/nmdc:mga0822t33/MAGs/nmdc_mga0822t33_bins.tooShort.fa", - "md5_checksum": "024b6771e169aeaf57a3b10acc6045a1", - "id": "nmdc:024b6771e169aeaf57a3b10acc6045a1", - "file_size_bytes": 80852741 - }, - { - "name": "Gp0127641_unbinned fasta file from metabat2", - "description": "unbinned fasta file from metabat2 for Gp0127641", - "url": "https://data.microbiomedata.org/data/nmdc:mga0822t33/MAGs/nmdc_mga0822t33_bins.unbinned.fa", - "md5_checksum": "545cd253ad26116236dec9937b32d8ef", - "id": "nmdc:545cd253ad26116236dec9937b32d8ef", - "file_size_bytes": 19497941 - }, - { - "name": "Gp0127641_metabat2 bin checkm quality assessment result", - "description": "metabat2 bin checkm quality assessment result for Gp0127641", - "data_object_type": "CheckM Statistics", - "url": "https://data.microbiomedata.org/data/nmdc:mga0822t33/MAGs/nmdc_mga0822t33_checkm_qa.out", - "md5_checksum": "1785cfe7cf0546dc8702193921a2f566", - "id": "nmdc:1785cfe7cf0546dc8702193921a2f566", - "file_size_bytes": 936 - }, - { - "name": "Gp0127641_high-quality and medium-quality bins", - "description": "high-quality and medium-quality bins for Gp0127641", - "data_object_type": "Metagenome Bins", - "url": "https://data.microbiomedata.org/data/nmdc:mga0822t33/MAGs/nmdc_mga0822t33_hqmq_bin.zip", - "md5_checksum": "0a2a5650358b51ffcd3bbcfc874ac5c9", - "id": "nmdc:0a2a5650358b51ffcd3bbcfc874ac5c9", - "file_size_bytes": 182 - }, - { - "name": "Gp0127641_metabat2 bins", - "description": "metabat2 bins for Gp0127641", - "url": "https://data.microbiomedata.org/data/nmdc:mga0822t33/MAGs/nmdc_mga0822t33_metabat_bin.zip", - "md5_checksum": "8f6b89831cabcd1dc7aa5e26d87f5063", - "id": "nmdc:8f6b89831cabcd1dc7aa5e26d87f5063", - "file_size_bytes": 625863 - } + "add_date": "2015-05-28", + "mod_date": "2021-06-15", + "ncbi_project_name": "Sand microcosm microbial communities from a hyporheic zone in Columbia River, Washington, USA - GW-RW T4_1-July-14", + "omics_type": { + "has_raw_value": "Metagenome" + }, + "principal_investigator": { + "has_raw_value": "James Stegen" + }, + "processing_institution": "JGI", + "type": "nmdc:OmicsProcessing", + "gold_sequencing_project_identifiers": [ + "gold:Gp0115669" ] } - ] - }, - { - "_id": { - "$oid": "649b009773e8249959349b56" - }, - "id": "nmdc:omprc-11-dtsr6z90", - "name": "Riverbed sediment microbial communities from areas with no vegetation in Columbia River, Washington, USA - GW-RW N1_0_10", - "description": "Riverbed sediment samples were collected from an area with no vegetation in a hyporheic zone (subsurface groundwater - surface water mixing zone)", - "has_input": [ - "nmdc:bsm-11-g079t498" - ], - "has_output": [ - "jgi:574fde6c7ded5e3df1ee140c" - ], - "part_of": [ - "nmdc:sty-11-aygzgv51" - ], - "add_date": "2016-01-11", - "mod_date": "2021-06-15", - "ncbi_project_name": "Riverbed sediment microbial communities from areas with no vegetation in Columbia River, Washington, USA - GW-RW N1_0_10", - "omics_type": { - "has_raw_value": "Metagenome" - }, - "principal_investigator": { - "has_raw_value": "James Stegen" - }, - "processing_institution": "JGI", - "type": "nmdc:OmicsProcessing", - "gold_sequencing_project_identifiers": [ - "gold:Gp0127643" - ], - "downstream_workflow_activity_records": [ + ], + "reaction_activity_set": [], + "read_qc_analysis_activity_set": [ { "_id": { - "$oid": "649b009d6bdd4fd20273c879" + "$oid": "649b009d6bdd4fd20273c88d" }, "has_input": [ - "nmdc:8b553dbdd47b90ed7f55d5747822f5d5" + "nmdc:f18b96b7d225d2f64f7b29015150113f" ], "part_of": [ - "nmdc:mga0evc178" + "nmdc:mga0k85x37" ], "git_url": "https://github.com/microbiomedata/metaG/releases/tag/0.1", "has_output": [ - "nmdc:2ef23543e3064ca73c3034713d87c026", - "nmdc:87b172ead58a37be8d199c0acfc96759" + "nmdc:6eef104db92b99c9741b26c667d75cd9", + "nmdc:58fde3e96dbb28af9133bede850a2653" ], - "was_informed_by": "gold:Gp0127643", - "input_read_count": 25305566, - "output_read_bases": 3510483777, - "id": "nmdc:30cd614596fd2a4b87220523f8a6ff30", + "was_informed_by": "gold:Gp0115669", + "input_read_count": 20957834, + "output_read_bases": 3065138996, + "id": "nmdc:ed7745adccd65c2dd20dfa24c2922db2", "execution_resource": "NERSC-Cori", - "input_read_bases": 3821140466, - "name": "Read QC Activity for nmdc:mga0evc178", - "output_read_count": 23508042, - "started_at_time": "2021-10-11T02:27:00Z", + "input_read_bases": 3164632934, + "name": "Read QC Activity for nmdc:mga0k85x37", + "output_read_count": 20454422, + "started_at_time": "2021-10-11T02:28:43Z", "type": "nmdc:ReadQCAnalysisActivity", - "ended_at_time": "2021-10-11T04:04:16+00:00", - "output_data_objects": [ - { - "name": "Gp0127643_Filtered Reads", - "description": "Filtered Reads for Gp0127643", - "data_object_type": "Filtered Sequencing Reads", - "url": "https://data.microbiomedata.org/data/nmdc:mga0evc178/qa/nmdc_mga0evc178_filtered.fastq.gz", - "md5_checksum": "2ef23543e3064ca73c3034713d87c026", - "id": "nmdc:2ef23543e3064ca73c3034713d87c026", - "file_size_bytes": 1891088172 - }, - { - "name": "Gp0127643_Filtered Stats", - "description": "Filtered Stats for Gp0127643", - "data_object_type": "QC Statistics", - "url": "https://data.microbiomedata.org/data/nmdc:mga0evc178/qa/nmdc_mga0evc178_filterStats.txt", - "md5_checksum": "87b172ead58a37be8d199c0acfc96759", - "id": "nmdc:87b172ead58a37be8d199c0acfc96759", - "file_size_bytes": 289 - } - ] - }, + "ended_at_time": "2021-10-11T04:20:07+00:00" + } + ], + "read_based_taxonomy_analysis_activity_set": [ { "_id": { - "$oid": "649b009bff710ae353f8cf36" + "$oid": "649b009bff710ae353f8cf51" }, "has_input": [ - "nmdc:2ef23543e3064ca73c3034713d87c026" + "nmdc:6eef104db92b99c9741b26c667d75cd9" ], "git_url": "https://github.com/microbiomedata/metaG/releases/tag/0.1", "has_output": [ - "nmdc:e8f825653e5736e29b73de55bd11a270", - "nmdc:99bb1311b220e9a03da619fe5fb58f0f", - "nmdc:5c97bc15d4d5999f140664b3b2777c6d", - "nmdc:c9074b2e05765afd68463dc301b87995", - "nmdc:ed2c05d1702a9a811b8a98de748bc82a", - "nmdc:6465fe59472b111ead1f0414ccf39f62", - "nmdc:9855ca52bce074c34dcebfd154fa94ff", - "nmdc:ed8059f366d60112deb41a0c307bc6fc", - "nmdc:f98bae155bced880c058ecde7d539c18" + "nmdc:05933784d02331b60b2531e2025cd3b7", + "nmdc:50fc279637cb7048aaaeec9b223d0286", + "nmdc:c3add9c5d34e3ca719096ba3ba9b1c08", + "nmdc:2777a04ec7e23aff356bb4f2733e55b7", + "nmdc:de45d70cc01749e9b5691dc24674545d", + "nmdc:534f97f3792b74385c4da305196a1b1d", + "nmdc:fc3e489df923ec344ac0cce7316f49d6", + "nmdc:07b6457a094fab96563168ed287dc59f", + "nmdc:164a1bc50e8d6509446ae2877be8231c" ], - "was_informed_by": "gold:Gp0127643", - "id": "nmdc:30cd614596fd2a4b87220523f8a6ff30", + "was_informed_by": "gold:Gp0115669", + "id": "nmdc:ed7745adccd65c2dd20dfa24c2922db2", "execution_resource": "NERSC-Cori", - "name": "ReadBased Analysis Activity for nmdc:mga0evc178", - "started_at_time": "2021-10-11T02:27:00Z", + "name": "ReadBased Analysis Activity for nmdc:mga0k85x37", + "started_at_time": "2021-10-11T02:28:43Z", "type": "nmdc:ReadbasedAnalysis", - "ended_at_time": "2021-10-11T04:04:16+00:00", - "output_data_objects": [ - { - "name": "Gp0127643_Gottcha2 TSV report", - "description": "Gottcha2 TSV report for Gp0127643", - "url": "https://data.microbiomedata.org/data/nmdc:mga0evc178/ReadbasedAnalysis/nmdc_mga0evc178_gottcha2_report.tsv", - "md5_checksum": "e8f825653e5736e29b73de55bd11a270", - "id": "nmdc:e8f825653e5736e29b73de55bd11a270", - "file_size_bytes": 1326 - }, - { - "name": "Gp0127643_Gottcha2 full TSV report", - "description": "Gottcha2 full TSV report for Gp0127643", - "url": "https://data.microbiomedata.org/data/nmdc:mga0evc178/ReadbasedAnalysis/nmdc_mga0evc178_gottcha2_report_full.tsv", - "md5_checksum": "99bb1311b220e9a03da619fe5fb58f0f", - "id": "nmdc:99bb1311b220e9a03da619fe5fb58f0f", - "file_size_bytes": 664131 - }, - { - "name": "Gp0127643_Gottcha2 Krona HTML report", - "description": "Gottcha2 Krona HTML report for Gp0127643", - "data_object_type": "GOTTCHA2 Krona Plot", - "url": "https://data.microbiomedata.org/data/nmdc:mga0evc178/ReadbasedAnalysis/nmdc_mga0evc178_gottcha2_krona.html", - "md5_checksum": "5c97bc15d4d5999f140664b3b2777c6d", - "id": "nmdc:5c97bc15d4d5999f140664b3b2777c6d", - "file_size_bytes": 229630 - }, - { - "name": "Gp0127643_Centrifuge classification TSV report", - "description": "Centrifuge classification TSV report for Gp0127643", - "data_object_type": "Centrifuge Taxonomic Classification", - "url": "https://data.microbiomedata.org/data/nmdc:mga0evc178/ReadbasedAnalysis/nmdc_mga0evc178_centrifuge_classification.tsv", - "md5_checksum": "c9074b2e05765afd68463dc301b87995", - "id": "nmdc:c9074b2e05765afd68463dc301b87995", - "file_size_bytes": 1726867547 - }, - { - "name": "Gp0127643_Centrifuge TSV report", - "description": "Centrifuge TSV report for Gp0127643", - "data_object_type": "Centrifuge Classification Report", - "url": "https://data.microbiomedata.org/data/nmdc:mga0evc178/ReadbasedAnalysis/nmdc_mga0evc178_centrifuge_report.tsv", - "md5_checksum": "ed2c05d1702a9a811b8a98de748bc82a", - "id": "nmdc:ed2c05d1702a9a811b8a98de748bc82a", - "file_size_bytes": 254021 - }, - { - "name": "Gp0127643_Centrifuge Krona HTML report", - "description": "Centrifuge Krona HTML report for Gp0127643", - "data_object_type": "Centrifuge Krona Plot", - "url": "https://data.microbiomedata.org/data/nmdc:mga0evc178/ReadbasedAnalysis/nmdc_mga0evc178_centrifuge_krona.html", - "md5_checksum": "6465fe59472b111ead1f0414ccf39f62", - "id": "nmdc:6465fe59472b111ead1f0414ccf39f62", - "file_size_bytes": 2331702 - }, - { - "name": "Gp0127643_Kraken classification TSV report", - "description": "Kraken classification TSV report for Gp0127643", - "data_object_type": "Kraken2 Taxonomic Classification", - "url": "https://data.microbiomedata.org/data/nmdc:mga0evc178/ReadbasedAnalysis/nmdc_mga0evc178_kraken2_classification.tsv", - "md5_checksum": "9855ca52bce074c34dcebfd154fa94ff", - "id": "nmdc:9855ca52bce074c34dcebfd154fa94ff", - "file_size_bytes": 1376409913 - }, - { - "name": "Gp0127643_Kraken2 TSV report", - "description": "Kraken2 TSV report for Gp0127643", - "data_object_type": "Kraken2 Classification Report", - "url": "https://data.microbiomedata.org/data/nmdc:mga0evc178/ReadbasedAnalysis/nmdc_mga0evc178_kraken2_report.tsv", - "md5_checksum": "ed8059f366d60112deb41a0c307bc6fc", - "id": "nmdc:ed8059f366d60112deb41a0c307bc6fc", - "file_size_bytes": 640506 - }, - { - "name": "Gp0127643_Kraken2 Krona HTML report", - "description": "Kraken2 Krona HTML report for Gp0127643", - "data_object_type": "Kraken2 Krona Plot", - "url": "https://data.microbiomedata.org/data/nmdc:mga0evc178/ReadbasedAnalysis/nmdc_mga0evc178_kraken2_krona.html", - "md5_checksum": "f98bae155bced880c058ecde7d539c18", - "id": "nmdc:f98bae155bced880c058ecde7d539c18", - "file_size_bytes": 3998448 - } - ] - }, + "ended_at_time": "2021-10-11T04:20:07+00:00" + } + ], + "study_set": [], + "field_research_site_set": [], + "collecting_biosamples_from_site_set": [], + "date_created": null, + "etl_software_version": null, + "pooling_set": [], + "read_based_analysis_activity_set": [ { "_id": { - "$oid": "61e719b6833bcf838a70116b" + "$oid": "61e71a34833bcf838a701fb0" }, "has_input": [ - "nmdc:2ef23543e3064ca73c3034713d87c026" + "nmdc:6eef104db92b99c9741b26c667d75cd9" ], "part_of": [ - "nmdc:mga0evc178" + "nmdc:mga0k85x37" ], "git_url": "https://github.com/microbiomedata/metaG/releases/tag/0.1", "has_output": [ - "nmdc:e8f825653e5736e29b73de55bd11a270", - "nmdc:99bb1311b220e9a03da619fe5fb58f0f", - "nmdc:5c97bc15d4d5999f140664b3b2777c6d", - "nmdc:c9074b2e05765afd68463dc301b87995", - "nmdc:ed2c05d1702a9a811b8a98de748bc82a", - "nmdc:6465fe59472b111ead1f0414ccf39f62", - "nmdc:9855ca52bce074c34dcebfd154fa94ff", - "nmdc:ed8059f366d60112deb41a0c307bc6fc", - "nmdc:f98bae155bced880c058ecde7d539c18" + "nmdc:05933784d02331b60b2531e2025cd3b7", + "nmdc:50fc279637cb7048aaaeec9b223d0286", + "nmdc:c3add9c5d34e3ca719096ba3ba9b1c08", + "nmdc:2777a04ec7e23aff356bb4f2733e55b7", + "nmdc:de45d70cc01749e9b5691dc24674545d", + "nmdc:534f97f3792b74385c4da305196a1b1d", + "nmdc:fc3e489df923ec344ac0cce7316f49d6", + "nmdc:07b6457a094fab96563168ed287dc59f", + "nmdc:164a1bc50e8d6509446ae2877be8231c" ], - "was_informed_by": "gold:Gp0127643", - "id": "nmdc:30cd614596fd2a4b87220523f8a6ff30", + "was_informed_by": "gold:Gp0115669", + "id": "nmdc:ed7745adccd65c2dd20dfa24c2922db2", "execution_resource": "NERSC-Cori", - "name": "ReadBased Analysis Activity for nmdc:mga0evc178", - "started_at_time": "2021-10-11T02:27:00Z", + "name": "ReadBased Analysis Activity for nmdc:mga0k85x37", + "started_at_time": "2021-10-11T02:28:43Z", "type": "nmdc:ReadbasedAnalysis", - "ended_at_time": "2021-10-11T04:04:16+00:00", - "output_data_objects": [ - { - "name": "Gp0127643_Gottcha2 TSV report", - "description": "Gottcha2 TSV report for Gp0127643", - "url": "https://data.microbiomedata.org/data/nmdc:mga0evc178/ReadbasedAnalysis/nmdc_mga0evc178_gottcha2_report.tsv", - "md5_checksum": "e8f825653e5736e29b73de55bd11a270", - "id": "nmdc:e8f825653e5736e29b73de55bd11a270", - "file_size_bytes": 1326 - }, - { - "name": "Gp0127643_Gottcha2 full TSV report", - "description": "Gottcha2 full TSV report for Gp0127643", - "url": "https://data.microbiomedata.org/data/nmdc:mga0evc178/ReadbasedAnalysis/nmdc_mga0evc178_gottcha2_report_full.tsv", - "md5_checksum": "99bb1311b220e9a03da619fe5fb58f0f", - "id": "nmdc:99bb1311b220e9a03da619fe5fb58f0f", - "file_size_bytes": 664131 - }, - { - "name": "Gp0127643_Gottcha2 Krona HTML report", - "description": "Gottcha2 Krona HTML report for Gp0127643", - "data_object_type": "GOTTCHA2 Krona Plot", - "url": "https://data.microbiomedata.org/data/nmdc:mga0evc178/ReadbasedAnalysis/nmdc_mga0evc178_gottcha2_krona.html", - "md5_checksum": "5c97bc15d4d5999f140664b3b2777c6d", - "id": "nmdc:5c97bc15d4d5999f140664b3b2777c6d", - "file_size_bytes": 229630 - }, - { - "name": "Gp0127643_Centrifuge classification TSV report", - "description": "Centrifuge classification TSV report for Gp0127643", - "data_object_type": "Centrifuge Taxonomic Classification", - "url": "https://data.microbiomedata.org/data/nmdc:mga0evc178/ReadbasedAnalysis/nmdc_mga0evc178_centrifuge_classification.tsv", - "md5_checksum": "c9074b2e05765afd68463dc301b87995", - "id": "nmdc:c9074b2e05765afd68463dc301b87995", - "file_size_bytes": 1726867547 - }, - { - "name": "Gp0127643_Centrifuge TSV report", - "description": "Centrifuge TSV report for Gp0127643", - "data_object_type": "Centrifuge Classification Report", - "url": "https://data.microbiomedata.org/data/nmdc:mga0evc178/ReadbasedAnalysis/nmdc_mga0evc178_centrifuge_report.tsv", - "md5_checksum": "ed2c05d1702a9a811b8a98de748bc82a", - "id": "nmdc:ed2c05d1702a9a811b8a98de748bc82a", - "file_size_bytes": 254021 - }, - { - "name": "Gp0127643_Centrifuge Krona HTML report", - "description": "Centrifuge Krona HTML report for Gp0127643", - "data_object_type": "Centrifuge Krona Plot", - "url": "https://data.microbiomedata.org/data/nmdc:mga0evc178/ReadbasedAnalysis/nmdc_mga0evc178_centrifuge_krona.html", - "md5_checksum": "6465fe59472b111ead1f0414ccf39f62", - "id": "nmdc:6465fe59472b111ead1f0414ccf39f62", - "file_size_bytes": 2331702 - }, - { - "name": "Gp0127643_Kraken classification TSV report", - "description": "Kraken classification TSV report for Gp0127643", - "data_object_type": "Kraken2 Taxonomic Classification", - "url": "https://data.microbiomedata.org/data/nmdc:mga0evc178/ReadbasedAnalysis/nmdc_mga0evc178_kraken2_classification.tsv", - "md5_checksum": "9855ca52bce074c34dcebfd154fa94ff", - "id": "nmdc:9855ca52bce074c34dcebfd154fa94ff", - "file_size_bytes": 1376409913 - }, - { - "name": "Gp0127643_Kraken2 TSV report", - "description": "Kraken2 TSV report for Gp0127643", - "data_object_type": "Kraken2 Classification Report", - "url": "https://data.microbiomedata.org/data/nmdc:mga0evc178/ReadbasedAnalysis/nmdc_mga0evc178_kraken2_report.tsv", - "md5_checksum": "ed8059f366d60112deb41a0c307bc6fc", - "id": "nmdc:ed8059f366d60112deb41a0c307bc6fc", - "file_size_bytes": 640506 - }, - { - "name": "Gp0127643_Kraken2 Krona HTML report", - "description": "Kraken2 Krona HTML report for Gp0127643", - "data_object_type": "Kraken2 Krona Plot", - "url": "https://data.microbiomedata.org/data/nmdc:mga0evc178/ReadbasedAnalysis/nmdc_mga0evc178_kraken2_krona.html", - "md5_checksum": "f98bae155bced880c058ecde7d539c18", - "id": "nmdc:f98bae155bced880c058ecde7d539c18", - "file_size_bytes": 3998448 - } - ] + "ended_at_time": "2021-10-11T04:20:07+00:00" + } + ] + }, + { + "functional_annotation_agg": [], + "library_preparation_set": [], + "processed_sample_set": [], + "extraction_set": [], + "activity_set": [], + "biosample_set": [], + "data_object_set": [ + { + "name": "Gp0115672_Filtered Reads", + "description": "Filtered Reads for Gp0115672", + "data_object_type": "Filtered Sequencing Reads", + "url": "https://data.microbiomedata.org/data/nmdc:mga0cwhj53/qa/nmdc_mga0cwhj53_filtered.fastq.gz", + "md5_checksum": "eb516fb673793f5161fb634fc19de310", + "id": "nmdc:eb516fb673793f5161fb634fc19de310", + "file_size_bytes": 2704299418 + }, + { + "name": "Gp0115672_Filtered Stats", + "description": "Filtered Stats for Gp0115672", + "data_object_type": "QC Statistics", + "url": "https://data.microbiomedata.org/data/nmdc:mga0cwhj53/qa/nmdc_mga0cwhj53_filterStats.txt", + "md5_checksum": "f4b68d1bd25f8d2fa8986aeef5fbec3f", + "id": "nmdc:f4b68d1bd25f8d2fa8986aeef5fbec3f", + "file_size_bytes": 290 + }, + { + "name": "Gp0115672_Gottcha2 TSV report", + "description": "Gottcha2 TSV report for Gp0115672", + "url": "https://data.microbiomedata.org/data/nmdc:mga0cwhj53/ReadbasedAnalysis/nmdc_mga0cwhj53_gottcha2_report.tsv", + "md5_checksum": "5a9326e2e450663a5ed8c97389136b25", + "id": "nmdc:5a9326e2e450663a5ed8c97389136b25", + "file_size_bytes": 15806 + }, + { + "name": "Gp0115672_Gottcha2 full TSV report", + "description": "Gottcha2 full TSV report for Gp0115672", + "url": "https://data.microbiomedata.org/data/nmdc:mga0cwhj53/ReadbasedAnalysis/nmdc_mga0cwhj53_gottcha2_report_full.tsv", + "md5_checksum": "6044f2e33e0dd3e951484e9c50ae10f4", + "id": "nmdc:6044f2e33e0dd3e951484e9c50ae10f4", + "file_size_bytes": 1142479 + }, + { + "name": "Gp0115672_Gottcha2 Krona HTML report", + "description": "Gottcha2 Krona HTML report for Gp0115672", + "data_object_type": "GOTTCHA2 Krona Plot", + "url": "https://data.microbiomedata.org/data/nmdc:mga0cwhj53/ReadbasedAnalysis/nmdc_mga0cwhj53_gottcha2_krona.html", + "md5_checksum": "39a46887587926c9b81e126bb1036005", + "id": "nmdc:39a46887587926c9b81e126bb1036005", + "file_size_bytes": 273611 + }, + { + "name": "Gp0115672_Centrifuge classification TSV report", + "description": "Centrifuge classification TSV report for Gp0115672", + "data_object_type": "Centrifuge Taxonomic Classification", + "url": "https://data.microbiomedata.org/data/nmdc:mga0cwhj53/ReadbasedAnalysis/nmdc_mga0cwhj53_centrifuge_classification.tsv", + "md5_checksum": "b8dde2c047141d9097317c86f723eded", + "id": "nmdc:b8dde2c047141d9097317c86f723eded", + "file_size_bytes": 2436637487 + }, + { + "name": "Gp0115672_Centrifuge TSV report", + "description": "Centrifuge TSV report for Gp0115672", + "data_object_type": "Centrifuge Classification Report", + "url": "https://data.microbiomedata.org/data/nmdc:mga0cwhj53/ReadbasedAnalysis/nmdc_mga0cwhj53_centrifuge_report.tsv", + "md5_checksum": "d530342b37f0785f92650e9650f31d6a", + "id": "nmdc:d530342b37f0785f92650e9650f31d6a", + "file_size_bytes": 261520 + }, + { + "name": "Gp0115672_Centrifuge Krona HTML report", + "description": "Centrifuge Krona HTML report for Gp0115672", + "data_object_type": "Centrifuge Krona Plot", + "url": "https://data.microbiomedata.org/data/nmdc:mga0cwhj53/ReadbasedAnalysis/nmdc_mga0cwhj53_centrifuge_krona.html", + "md5_checksum": "6672aa851b5d39d7381211232b4f6cb2", + "id": "nmdc:6672aa851b5d39d7381211232b4f6cb2", + "file_size_bytes": 2342832 }, + { + "name": "Gp0115672_Kraken classification TSV report", + "description": "Kraken classification TSV report for Gp0115672", + "data_object_type": "Kraken2 Taxonomic Classification", + "url": "https://data.microbiomedata.org/data/nmdc:mga0cwhj53/ReadbasedAnalysis/nmdc_mga0cwhj53_kraken2_classification.tsv", + "md5_checksum": "61e3c875231ae8999b5aa1dbf7d55cca", + "id": "nmdc:61e3c875231ae8999b5aa1dbf7d55cca", + "file_size_bytes": 1993150715 + }, + { + "name": "Gp0115672_Kraken2 TSV report", + "description": "Kraken2 TSV report for Gp0115672", + "data_object_type": "Kraken2 Classification Report", + "url": "https://data.microbiomedata.org/data/nmdc:mga0cwhj53/ReadbasedAnalysis/nmdc_mga0cwhj53_kraken2_report.tsv", + "md5_checksum": "3049835ed4e3533acce49e9cc60b03fc", + "id": "nmdc:3049835ed4e3533acce49e9cc60b03fc", + "file_size_bytes": 693572 + }, + { + "name": "Gp0115672_Kraken2 Krona HTML report", + "description": "Kraken2 Krona HTML report for Gp0115672", + "data_object_type": "Kraken2 Krona Plot", + "url": "https://data.microbiomedata.org/data/nmdc:mga0cwhj53/ReadbasedAnalysis/nmdc_mga0cwhj53_kraken2_krona.html", + "md5_checksum": "3266e79813577aae1d4377c62e73332c", + "id": "nmdc:3266e79813577aae1d4377c62e73332c", + "file_size_bytes": 4177114 + }, + { + "name": "Gp0115672_Gottcha2 TSV report", + "description": "Gottcha2 TSV report for Gp0115672", + "url": "https://data.microbiomedata.org/data/nmdc:mga0cwhj53/ReadbasedAnalysis/nmdc_mga0cwhj53_gottcha2_report.tsv", + "md5_checksum": "5a9326e2e450663a5ed8c97389136b25", + "id": "nmdc:5a9326e2e450663a5ed8c97389136b25", + "file_size_bytes": 15806 + }, + { + "name": "Gp0115672_Gottcha2 full TSV report", + "description": "Gottcha2 full TSV report for Gp0115672", + "url": "https://data.microbiomedata.org/data/nmdc:mga0cwhj53/ReadbasedAnalysis/nmdc_mga0cwhj53_gottcha2_report_full.tsv", + "md5_checksum": "6044f2e33e0dd3e951484e9c50ae10f4", + "id": "nmdc:6044f2e33e0dd3e951484e9c50ae10f4", + "file_size_bytes": 1142479 + }, + { + "name": "Gp0115672_Gottcha2 Krona HTML report", + "description": "Gottcha2 Krona HTML report for Gp0115672", + "data_object_type": "GOTTCHA2 Krona Plot", + "url": "https://data.microbiomedata.org/data/nmdc:mga0cwhj53/ReadbasedAnalysis/nmdc_mga0cwhj53_gottcha2_krona.html", + "md5_checksum": "39a46887587926c9b81e126bb1036005", + "id": "nmdc:39a46887587926c9b81e126bb1036005", + "file_size_bytes": 273611 + }, + { + "name": "Gp0115672_Centrifuge classification TSV report", + "description": "Centrifuge classification TSV report for Gp0115672", + "data_object_type": "Centrifuge Taxonomic Classification", + "url": "https://data.microbiomedata.org/data/nmdc:mga0cwhj53/ReadbasedAnalysis/nmdc_mga0cwhj53_centrifuge_classification.tsv", + "md5_checksum": "b8dde2c047141d9097317c86f723eded", + "id": "nmdc:b8dde2c047141d9097317c86f723eded", + "file_size_bytes": 2436637487 + }, + { + "name": "Gp0115672_Centrifuge TSV report", + "description": "Centrifuge TSV report for Gp0115672", + "data_object_type": "Centrifuge Classification Report", + "url": "https://data.microbiomedata.org/data/nmdc:mga0cwhj53/ReadbasedAnalysis/nmdc_mga0cwhj53_centrifuge_report.tsv", + "md5_checksum": "d530342b37f0785f92650e9650f31d6a", + "id": "nmdc:d530342b37f0785f92650e9650f31d6a", + "file_size_bytes": 261520 + }, + { + "name": "Gp0115672_Centrifuge Krona HTML report", + "description": "Centrifuge Krona HTML report for Gp0115672", + "data_object_type": "Centrifuge Krona Plot", + "url": "https://data.microbiomedata.org/data/nmdc:mga0cwhj53/ReadbasedAnalysis/nmdc_mga0cwhj53_centrifuge_krona.html", + "md5_checksum": "6672aa851b5d39d7381211232b4f6cb2", + "id": "nmdc:6672aa851b5d39d7381211232b4f6cb2", + "file_size_bytes": 2342832 + }, + { + "name": "Gp0115672_Kraken classification TSV report", + "description": "Kraken classification TSV report for Gp0115672", + "data_object_type": "Kraken2 Taxonomic Classification", + "url": "https://data.microbiomedata.org/data/nmdc:mga0cwhj53/ReadbasedAnalysis/nmdc_mga0cwhj53_kraken2_classification.tsv", + "md5_checksum": "61e3c875231ae8999b5aa1dbf7d55cca", + "id": "nmdc:61e3c875231ae8999b5aa1dbf7d55cca", + "file_size_bytes": 1993150715 + }, + { + "name": "Gp0115672_Kraken2 TSV report", + "description": "Kraken2 TSV report for Gp0115672", + "data_object_type": "Kraken2 Classification Report", + "url": "https://data.microbiomedata.org/data/nmdc:mga0cwhj53/ReadbasedAnalysis/nmdc_mga0cwhj53_kraken2_report.tsv", + "md5_checksum": "3049835ed4e3533acce49e9cc60b03fc", + "id": "nmdc:3049835ed4e3533acce49e9cc60b03fc", + "file_size_bytes": 693572 + }, + { + "name": "Gp0115672_Kraken2 Krona HTML report", + "description": "Kraken2 Krona HTML report for Gp0115672", + "data_object_type": "Kraken2 Krona Plot", + "url": "https://data.microbiomedata.org/data/nmdc:mga0cwhj53/ReadbasedAnalysis/nmdc_mga0cwhj53_kraken2_krona.html", + "md5_checksum": "3266e79813577aae1d4377c62e73332c", + "id": "nmdc:3266e79813577aae1d4377c62e73332c", + "file_size_bytes": 4177114 + }, + { + "name": "Gp0115672_Assembled contigs fasta", + "description": "Assembled contigs fasta for Gp0115672", + "data_object_type": "Assembly Contigs", + "url": "https://data.microbiomedata.org/data/nmdc:mga0cwhj53/assembly/nmdc_mga0cwhj53_contigs.fna", + "md5_checksum": "6f762f7b079f8c2633ef674a8264879f", + "id": "nmdc:6f762f7b079f8c2633ef674a8264879f", + "file_size_bytes": 129321165 + }, + { + "name": "Gp0115672_Assembled scaffold fasta", + "description": "Assembled scaffold fasta for Gp0115672", + "data_object_type": "Assembly Scaffolds", + "url": "https://data.microbiomedata.org/data/nmdc:mga0cwhj53/assembly/nmdc_mga0cwhj53_scaffolds.fna", + "md5_checksum": "26cc1c91f5f5e79d50041ff4623398b5", + "id": "nmdc:26cc1c91f5f5e79d50041ff4623398b5", + "file_size_bytes": 128655263 + }, + { + "name": "Gp0115672_Metagenome Contig Coverage Stats", + "description": "Metagenome Contig Coverage Stats for Gp0115672", + "url": "https://data.microbiomedata.org/data/nmdc:mga0cwhj53/assembly/nmdc_mga0cwhj53_covstats.txt", + "md5_checksum": "bd9d5497c4e2e0ea61df1f3f239107f7", + "id": "nmdc:bd9d5497c4e2e0ea61df1f3f239107f7", + "file_size_bytes": 17496249 + }, + { + "name": "Gp0115672_Assembled AGP file", + "description": "Assembled AGP file for Gp0115672", + "data_object_type": "Assembly AGP", + "url": "https://data.microbiomedata.org/data/nmdc:mga0cwhj53/assembly/nmdc_mga0cwhj53_assembly.agp", + "md5_checksum": "362a9857666fe2f4e90bf6a818f551cc", + "id": "nmdc:362a9857666fe2f4e90bf6a818f551cc", + "file_size_bytes": 16401188 + }, + { + "name": "Gp0115672_Metagenome Alignment BAM file", + "description": "Metagenome Alignment BAM file for Gp0115672", + "data_object_type": "Assembly Coverage BAM", + "url": "https://data.microbiomedata.org/data/nmdc:mga0cwhj53/assembly/nmdc_mga0cwhj53_pairedMapped_sorted.bam", + "md5_checksum": "afd1d03b38bc5deb9c196264bcea8795", + "id": "nmdc:afd1d03b38bc5deb9c196264bcea8795", + "file_size_bytes": 2952467259 + }, + { + "name": "Gp0115672_Protein FAA", + "description": "Protein FAA for Gp0115672", + "data_object_type": "Annotation Amino Acid FASTA", + "url": "https://data.microbiomedata.org/data/nmdc:mga0cwhj53/annotation/nmdc_mga0cwhj53_proteins.faa", + "md5_checksum": "84e3590be0f59007275fdf459d464f74", + "id": "nmdc:84e3590be0f59007275fdf459d464f74", + "file_size_bytes": 71651089 + }, + { + "name": "Gp0115672_Structural annotation GFF file", + "description": "Structural annotation GFF file for Gp0115672", + "data_object_type": "Structural Annotation GFF", + "url": "https://data.microbiomedata.org/data/nmdc:mga0cwhj53/annotation/nmdc_mga0cwhj53_structural_annotation.gff", + "md5_checksum": "7dd630b842f587768235714e8a95f377", + "id": "nmdc:7dd630b842f587768235714e8a95f377", + "file_size_bytes": 2534 + }, + { + "name": "Gp0115672_Functional annotation GFF file", + "description": "Functional annotation GFF file for Gp0115672", + "data_object_type": "Functional Annotation GFF", + "url": "https://data.microbiomedata.org/data/nmdc:mga0cwhj53/annotation/nmdc_mga0cwhj53_functional_annotation.gff", + "md5_checksum": "38d776837c2208b557e2e4e5428c879d", + "id": "nmdc:38d776837c2208b557e2e4e5428c879d", + "file_size_bytes": 78213025 + }, + { + "name": "Gp0115672_KO TSV file", + "description": "KO TSV file for Gp0115672", + "data_object_type": "Annotation KEGG Orthology", + "url": "https://data.microbiomedata.org/data/nmdc:mga0cwhj53/annotation/nmdc_mga0cwhj53_ko.tsv", + "md5_checksum": "e38cb3355892042cb02580c26c083cd9", + "id": "nmdc:e38cb3355892042cb02580c26c083cd9", + "file_size_bytes": 10621211 + }, + { + "name": "Gp0115672_EC TSV file", + "description": "EC TSV file for Gp0115672", + "data_object_type": "Annotation Enzyme Commission", + "url": "https://data.microbiomedata.org/data/nmdc:mga0cwhj53/annotation/nmdc_mga0cwhj53_ec.tsv", + "md5_checksum": "d55119e8f094efa075c44b22e8b2f689", + "id": "nmdc:d55119e8f094efa075c44b22e8b2f689", + "file_size_bytes": 6814564 + }, + { + "name": "Gp0115672_COG GFF file", + "description": "COG GFF file for Gp0115672", + "url": "https://data.microbiomedata.org/data/nmdc:mga0cwhj53/annotation/nmdc_mga0cwhj53_cog.gff", + "md5_checksum": "02a9ad5732172f04d1da83d145f63226", + "id": "nmdc:02a9ad5732172f04d1da83d145f63226", + "file_size_bytes": 45617917 + }, + { + "name": "Gp0115672_PFAM GFF file", + "description": "PFAM GFF file for Gp0115672", + "url": "https://data.microbiomedata.org/data/nmdc:mga0cwhj53/annotation/nmdc_mga0cwhj53_pfam.gff", + "md5_checksum": "73811b72087e57f23db32f4a0ca4fb9c", + "id": "nmdc:73811b72087e57f23db32f4a0ca4fb9c", + "file_size_bytes": 37040943 + }, + { + "name": "Gp0115672_TigrFam GFF file", + "description": "TigrFam GFF file for Gp0115672", + "url": "https://data.microbiomedata.org/data/nmdc:mga0cwhj53/annotation/nmdc_mga0cwhj53_tigrfam.gff", + "md5_checksum": "dfc18c0f97e80c14ca6ca1bc2ba7a809", + "id": "nmdc:dfc18c0f97e80c14ca6ca1bc2ba7a809", + "file_size_bytes": 5380314 + }, + { + "name": "Gp0115672_SMART GFF file", + "description": "SMART GFF file for Gp0115672", + "url": "https://data.microbiomedata.org/data/nmdc:mga0cwhj53/annotation/nmdc_mga0cwhj53_smart.gff", + "md5_checksum": "5a843529ffac8227515c5ea399ee4815", + "id": "nmdc:5a843529ffac8227515c5ea399ee4815", + "file_size_bytes": 10141642 + }, + { + "name": "Gp0115672_SuperFam GFF file", + "description": "SuperFam GFF file for Gp0115672", + "url": "https://data.microbiomedata.org/data/nmdc:mga0cwhj53/annotation/nmdc_mga0cwhj53_supfam.gff", + "md5_checksum": "82ac29a9999c6bc097cb0f35e4177e35", + "id": "nmdc:82ac29a9999c6bc097cb0f35e4177e35", + "file_size_bytes": 56808220 + }, + { + "name": "Gp0115672_Cath FunFam GFF file", + "description": "Cath FunFam GFF file for Gp0115672", + "url": "https://data.microbiomedata.org/data/nmdc:mga0cwhj53/annotation/nmdc_mga0cwhj53_cath_funfam.gff", + "md5_checksum": "5b0e8395559ef0d8a341ae0e132e60f6", + "id": "nmdc:5b0e8395559ef0d8a341ae0e132e60f6", + "file_size_bytes": 45632833 + }, + { + "name": "Gp0115672_KO_EC GFF file", + "description": "KO_EC GFF file for Gp0115672", + "url": "https://data.microbiomedata.org/data/nmdc:mga0cwhj53/annotation/nmdc_mga0cwhj53_ko_ec.gff", + "md5_checksum": "1e74c3df751a59a34e5c0d87f4a37563", + "id": "nmdc:1e74c3df751a59a34e5c0d87f4a37563", + "file_size_bytes": 33782864 + }, + { + "name": "gold:Gp0452679_metabat2 bin checkm quality assessment result", + "description": "metabat2 bin checkm quality assessment result for gold:Gp0452679", + "data_object_type": "CheckM Statistics", + "url": "https://data.microbiomedata.org/data/nmdc:mga0fsjy84/MAGs/nmdc_mga0fsjy84_checkm_qa.out", + "md5_checksum": "d41d8cd98f00b204e9800998ecf8427e", + "id": "nmdc:d41d8cd98f00b204e9800998ecf8427e", + "file_size_bytes": 0 + }, + { + "name": "Gp0115672_tooShort (< 3kb) filtered contigs fasta file by metaBat2", + "description": "tooShort (< 3kb) filtered contigs fasta file by metaBat2 for Gp0115672", + "url": "https://data.microbiomedata.org/data/nmdc:mga0cwhj53/MAGs/nmdc_mga0cwhj53_bins.tooShort.fa", + "md5_checksum": "2b6e0195e34697039eff38b51026be24", + "id": "nmdc:2b6e0195e34697039eff38b51026be24", + "file_size_bytes": 91055942 + }, + { + "name": "Gp0115672_unbinned fasta file from metabat2", + "description": "unbinned fasta file from metabat2 for Gp0115672", + "url": "https://data.microbiomedata.org/data/nmdc:mga0cwhj53/MAGs/nmdc_mga0cwhj53_bins.unbinned.fa", + "md5_checksum": "f02d361fbef7549e2289bf4da623787d", + "id": "nmdc:f02d361fbef7549e2289bf4da623787d", + "file_size_bytes": 23202832 + }, + { + "name": "Gp0115672_metabat2 bin checkm quality assessment result", + "description": "metabat2 bin checkm quality assessment result for Gp0115672", + "data_object_type": "CheckM Statistics", + "url": "https://data.microbiomedata.org/data/nmdc:mga0cwhj53/MAGs/nmdc_mga0cwhj53_checkm_qa.out", + "md5_checksum": "2de282e5507477269238ead458f11ac0", + "id": "nmdc:2de282e5507477269238ead458f11ac0", + "file_size_bytes": 2040 + }, + { + "name": "Gp0115672_high-quality and medium-quality bins", + "description": "high-quality and medium-quality bins for Gp0115672", + "data_object_type": "Metagenome Bins", + "url": "https://data.microbiomedata.org/data/nmdc:mga0cwhj53/MAGs/nmdc_mga0cwhj53_hqmq_bin.zip", + "md5_checksum": "3abae1a573f9f0ac6da47e1ab9b9a723", + "id": "nmdc:3abae1a573f9f0ac6da47e1ab9b9a723", + "file_size_bytes": 1815861 + }, + { + "name": "Gp0115672_metabat2 bins", + "description": "metabat2 bins for Gp0115672", + "url": "https://data.microbiomedata.org/data/nmdc:mga0cwhj53/MAGs/nmdc_mga0cwhj53_metabat_bin.zip", + "md5_checksum": "4d315d8dac1d9605d110ff2298b10229", + "id": "nmdc:4d315d8dac1d9605d110ff2298b10229", + "file_size_bytes": 2757900 + } + ], + "dissolving_activity_set": [], + "functional_annotation_set": [], + "genome_feature_set": [], + "mags_activity_set": [ { "_id": { - "$oid": "649b005f2ca5ee4adb139fa9" + "$oid": "649b0052ec087f6bbab34737" }, "has_input": [ - "nmdc:2ef23543e3064ca73c3034713d87c026" + "nmdc:6f762f7b079f8c2633ef674a8264879f", + "nmdc:afd1d03b38bc5deb9c196264bcea8795", + "nmdc:38d776837c2208b557e2e4e5428c879d" ], + "too_short_contig_num": 206294, "part_of": [ - "nmdc:mga0evc178" + "nmdc:mga0cwhj53" ], - "ctg_logsum": 258957, - "scaf_logsum": 260132, - "gap_pct": 0.00166, + "binned_contig_num": 1785, "git_url": "https://github.com/microbiomedata/metaG/releases/tag/0.1", "has_output": [ - "nmdc:a3a85f9f946ff34f28dfd4b5f8590f23", - "nmdc:001fd34d98a73eee6be5a41004e67469", - "nmdc:9b45294f72cb55b2f039366d33183fa3", - "nmdc:b2ec4f5a3f02869684bdfaf065d75c54", - "nmdc:fa61e18d49a2012f115d970f0a195986" + "nmdc:d41d8cd98f00b204e9800998ecf8427e", + "nmdc:2b6e0195e34697039eff38b51026be24", + "nmdc:f02d361fbef7549e2289bf4da623787d", + "nmdc:2de282e5507477269238ead458f11ac0", + "nmdc:3abae1a573f9f0ac6da47e1ab9b9a723", + "nmdc:4d315d8dac1d9605d110ff2298b10229" ], - "asm_score": 3.329, - "was_informed_by": "gold:Gp0127643", - "ctg_powsum": 27868, - "scaf_max": 12873, - "id": "nmdc:30cd614596fd2a4b87220523f8a6ff30", - "scaf_powsum": 27998, + "was_informed_by": "gold:Gp0115672", + "input_contig_num": 221045, + "id": "nmdc:50eb8825777d1294abac150521e5c2db", "execution_resource": "NERSC-Cori", - "contigs": 208967, - "name": "Assembly Activity for nmdc:mga0evc178", - "ctg_max": 12873, - "gc_std": 0.09438, - "contig_bp": 104567589, - "gc_avg": 0.63102, - "started_at_time": "2021-10-11T02:27:00Z", - "scaf_bp": 104569329, - "type": "nmdc:MetagenomeAssembly", - "scaffolds": 208793, - "ended_at_time": "2021-10-11T04:04:16+00:00", - "ctg_l50": 497, - "ctg_l90": 292, - "ctg_n50": 57164, - "ctg_n90": 172414, - "scaf_l50": 498, - "scaf_l90": 292, - "scaf_n50": 56935, - "scaf_n90": 172256, - "output_data_objects": [ - { - "name": "Gp0127643_Assembled contigs fasta", - "description": "Assembled contigs fasta for Gp0127643", - "data_object_type": "Assembly Contigs", - "url": "https://data.microbiomedata.org/data/nmdc:mga0evc178/assembly/nmdc_mga0evc178_contigs.fna", - "md5_checksum": "a3a85f9f946ff34f28dfd4b5f8590f23", - "id": "nmdc:a3a85f9f946ff34f28dfd4b5f8590f23", - "file_size_bytes": 112772885 - }, - { - "name": "Gp0127643_Assembled scaffold fasta", - "description": "Assembled scaffold fasta for Gp0127643", - "data_object_type": "Assembly Scaffolds", - "url": "https://data.microbiomedata.org/data/nmdc:mga0evc178/assembly/nmdc_mga0evc178_scaffolds.fna", - "md5_checksum": "001fd34d98a73eee6be5a41004e67469", - "id": "nmdc:001fd34d98a73eee6be5a41004e67469", - "file_size_bytes": 112143079 - }, + "name": "MAGs Analysis Activity for nmdc:mga0cwhj53", + "mags_list": [ { - "name": "Gp0127643_Metagenome Contig Coverage Stats", - "description": "Metagenome Contig Coverage Stats for Gp0127643", - "url": "https://data.microbiomedata.org/data/nmdc:mga0evc178/assembly/nmdc_mga0evc178_covstats.txt", - "md5_checksum": "9b45294f72cb55b2f039366d33183fa3", - "id": "nmdc:9b45294f72cb55b2f039366d33183fa3", - "file_size_bytes": 16563197 - }, - { - "name": "Gp0127643_Assembled AGP file", - "description": "Assembled AGP file for Gp0127643", - "data_object_type": "Assembly AGP", - "url": "https://data.microbiomedata.org/data/nmdc:mga0evc178/assembly/nmdc_mga0evc178_assembly.agp", - "md5_checksum": "b2ec4f5a3f02869684bdfaf065d75c54", - "id": "nmdc:b2ec4f5a3f02869684bdfaf065d75c54", - "file_size_bytes": 15493398 - }, - { - "name": "Gp0127643_Metagenome Alignment BAM file", - "description": "Metagenome Alignment BAM file for Gp0127643", - "data_object_type": "Assembly Coverage BAM", - "url": "https://data.microbiomedata.org/data/nmdc:mga0evc178/assembly/nmdc_mga0evc178_pairedMapped_sorted.bam", - "md5_checksum": "fa61e18d49a2012f115d970f0a195986", - "id": "nmdc:fa61e18d49a2012f115d970f0a195986", - "file_size_bytes": 2085429752 - } - ] - }, - { - "_id": { - "$oid": "649b005bbf2caae0415ef9bd" - }, - "has_input": [ - "nmdc:a3a85f9f946ff34f28dfd4b5f8590f23" - ], - "part_of": [ - "nmdc:mga0evc178" - ], - "git_url": "https://github.com/microbiomedata/metaG/releases/tag/0.1", - "has_output": [ - "nmdc:b2cd0d1a024094fd4e308c21d439ed5f", - "nmdc:6151bacd37618698c28b00151b4998f8", - "nmdc:744277086ab01222a91233536d5e8976", - "nmdc:9c8a359c69bcb1179241f9a3c727fa23", - "nmdc:027b72af172d078f88471d932cf6d473", - "nmdc:ff24990735aa002e828ff7204a456ad2", - "nmdc:e884ad501d1bb3bcf006f0999020ce0f", - "nmdc:8321f818f53371491a7a80ef7e063ca6", - "nmdc:6f799842fe74ebff7942a026dbf9b1bf", - "nmdc:8ee84a629a5899c25e0fbd0f07084530", - "nmdc:6697cdb0b1dcf83e7ecb8fcefa0703ef", - "nmdc:d2990b0bd86e50209dcada6fa6b09510" - ], - "was_informed_by": "gold:Gp0127643", - "id": "nmdc:30cd614596fd2a4b87220523f8a6ff30", - "execution_resource": "NERSC-Cori", - "name": "Annotation Activity for nmdc:mga0evc178", - "started_at_time": "2021-10-11T02:27:00Z", - "type": "nmdc:MetagenomeAnnotationActivity", - "ended_at_time": "2021-10-11T04:04:16+00:00", - "output_data_objects": [ - { - "name": "Gp0127643_Protein FAA", - "description": "Protein FAA for Gp0127643", - "data_object_type": "Annotation Amino Acid FASTA", - "url": "https://data.microbiomedata.org/data/nmdc:mga0evc178/annotation/nmdc_mga0evc178_proteins.faa", - "md5_checksum": "b2cd0d1a024094fd4e308c21d439ed5f", - "id": "nmdc:b2cd0d1a024094fd4e308c21d439ed5f", - "file_size_bytes": 63917762 - }, - { - "name": "Gp0127643_Structural annotation GFF file", - "description": "Structural annotation GFF file for Gp0127643", - "data_object_type": "Structural Annotation GFF", - "url": "https://data.microbiomedata.org/data/nmdc:mga0evc178/annotation/nmdc_mga0evc178_structural_annotation.gff", - "md5_checksum": "6151bacd37618698c28b00151b4998f8", - "id": "nmdc:6151bacd37618698c28b00151b4998f8", - "file_size_bytes": 2521 - }, - { - "name": "Gp0127643_Functional annotation GFF file", - "description": "Functional annotation GFF file for Gp0127643", - "data_object_type": "Functional Annotation GFF", - "url": "https://data.microbiomedata.org/data/nmdc:mga0evc178/annotation/nmdc_mga0evc178_functional_annotation.gff", - "md5_checksum": "744277086ab01222a91233536d5e8976", - "id": "nmdc:744277086ab01222a91233536d5e8976", - "file_size_bytes": 71811800 - }, - { - "name": "Gp0127643_KO TSV file", - "description": "KO TSV file for Gp0127643", - "data_object_type": "Annotation KEGG Orthology", - "url": "https://data.microbiomedata.org/data/nmdc:mga0evc178/annotation/nmdc_mga0evc178_ko.tsv", - "md5_checksum": "9c8a359c69bcb1179241f9a3c727fa23", - "id": "nmdc:9c8a359c69bcb1179241f9a3c727fa23", - "file_size_bytes": 7959243 - }, - { - "name": "Gp0127643_EC TSV file", - "description": "EC TSV file for Gp0127643", - "data_object_type": "Annotation Enzyme Commission", - "url": "https://data.microbiomedata.org/data/nmdc:mga0evc178/annotation/nmdc_mga0evc178_ec.tsv", - "md5_checksum": "027b72af172d078f88471d932cf6d473", - "id": "nmdc:027b72af172d078f88471d932cf6d473", - "file_size_bytes": 5202338 + "number_of_contig": 316, + "completeness": 61.03, + "bin_name": "bins.1", + "gene_count": 2148, + "bin_quality": "MQ", + "gtdbtk_species": "", + "gtdbtk_order": "Sphingomonadales", + "num_16s": 0, + "gtdbtk_family": "Sphingomonadaceae", + "gtdbtk_domain": "Bacteria", + "contamination": 0.85, + "gtdbtk_class": "Alphaproteobacteria", + "gtdbtk_phylum": "Proteobacteria", + "num_5s": 0, + "num_23s": 0, + "gtdbtk_genus": "Novosphingobium", + "num_t_rna": 19 }, { - "name": "Gp0127643_COG GFF file", - "description": "COG GFF file for Gp0127643", - "url": "https://data.microbiomedata.org/data/nmdc:mga0evc178/annotation/nmdc_mga0evc178_cog.gff", - "md5_checksum": "ff24990735aa002e828ff7204a456ad2", - "id": "nmdc:ff24990735aa002e828ff7204a456ad2", - "file_size_bytes": 41649279 + "number_of_contig": 130, + "completeness": 34.64, + "bin_name": "bins.2", + "gene_count": 675, + "bin_quality": "LQ", + "gtdbtk_species": "", + "gtdbtk_order": "", + "num_16s": 0, + "gtdbtk_family": "", + "gtdbtk_domain": "", + "contamination": 0.0, + "gtdbtk_class": "", + "gtdbtk_phylum": "", + "num_5s": 0, + "num_23s": 0, + "gtdbtk_genus": "", + "num_t_rna": 5 }, { - "name": "Gp0127643_PFAM GFF file", - "description": "PFAM GFF file for Gp0127643", - "url": "https://data.microbiomedata.org/data/nmdc:mga0evc178/annotation/nmdc_mga0evc178_pfam.gff", - "md5_checksum": "e884ad501d1bb3bcf006f0999020ce0f", - "id": "nmdc:e884ad501d1bb3bcf006f0999020ce0f", - "file_size_bytes": 31529168 + "number_of_contig": 201, + "completeness": 19.13, + "bin_name": "bins.3", + "gene_count": 1000, + "bin_quality": "LQ", + "gtdbtk_species": "", + "gtdbtk_order": "", + "num_16s": 0, + "gtdbtk_family": "", + "gtdbtk_domain": "", + "contamination": 0.0, + "gtdbtk_class": "", + "gtdbtk_phylum": "", + "num_5s": 0, + "num_23s": 0, + "gtdbtk_genus": "", + "num_t_rna": 2 }, { - "name": "Gp0127643_TigrFam GFF file", - "description": "TigrFam GFF file for Gp0127643", - "url": "https://data.microbiomedata.org/data/nmdc:mga0evc178/annotation/nmdc_mga0evc178_tigrfam.gff", - "md5_checksum": "8321f818f53371491a7a80ef7e063ca6", - "id": "nmdc:8321f818f53371491a7a80ef7e063ca6", - "file_size_bytes": 3378599 + "number_of_contig": 256, + "completeness": 75.9, + "bin_name": "bins.4", + "gene_count": 2131, + "bin_quality": "MQ", + "gtdbtk_species": "UBA5335 sp002862435", + "gtdbtk_order": "UBA5335", + "num_16s": 0, + "gtdbtk_family": "UBA5335", + "gtdbtk_domain": "Bacteria", + "contamination": 1.52, + "gtdbtk_class": "Gammaproteobacteria", + "gtdbtk_phylum": "Proteobacteria", + "num_5s": 0, + "num_23s": 0, + "gtdbtk_genus": "UBA5335", + "num_t_rna": 22 }, { - "name": "Gp0127643_SMART GFF file", - "description": "SMART GFF file for Gp0127643", - "url": "https://data.microbiomedata.org/data/nmdc:mga0evc178/annotation/nmdc_mga0evc178_smart.gff", - "md5_checksum": "6f799842fe74ebff7942a026dbf9b1bf", - "id": "nmdc:6f799842fe74ebff7942a026dbf9b1bf", - "file_size_bytes": 9132037 + "number_of_contig": 254, + "completeness": 100.0, + "bin_name": "bins.5", + "gene_count": 6188, + "bin_quality": "LQ", + "gtdbtk_species": "", + "gtdbtk_order": "", + "num_16s": 2, + "gtdbtk_family": "", + "gtdbtk_domain": "", + "contamination": 95.83, + "gtdbtk_class": "", + "gtdbtk_phylum": "", + "num_5s": 2, + "num_23s": 2, + "gtdbtk_genus": "", + "num_t_rna": 86 }, { - "name": "Gp0127643_SuperFam GFF file", - "description": "SuperFam GFF file for Gp0127643", - "url": "https://data.microbiomedata.org/data/nmdc:mga0evc178/annotation/nmdc_mga0evc178_supfam.gff", - "md5_checksum": "8ee84a629a5899c25e0fbd0f07084530", - "id": "nmdc:8ee84a629a5899c25e0fbd0f07084530", - "file_size_bytes": 52720037 + "number_of_contig": 106, + "completeness": 7.24, + "bin_name": "bins.6", + "gene_count": 524, + "bin_quality": "LQ", + "gtdbtk_species": "", + "gtdbtk_order": "", + "num_16s": 0, + "gtdbtk_family": "", + "gtdbtk_domain": "", + "contamination": 0.0, + "gtdbtk_class": "", + "gtdbtk_phylum": "", + "num_5s": 0, + "num_23s": 0, + "gtdbtk_genus": "", + "num_t_rna": 16 }, { - "name": "Gp0127643_Cath FunFam GFF file", - "description": "Cath FunFam GFF file for Gp0127643", - "url": "https://data.microbiomedata.org/data/nmdc:mga0evc178/annotation/nmdc_mga0evc178_cath_funfam.gff", - "md5_checksum": "6697cdb0b1dcf83e7ecb8fcefa0703ef", - "id": "nmdc:6697cdb0b1dcf83e7ecb8fcefa0703ef", - "file_size_bytes": 39643020 + "number_of_contig": 306, + "completeness": 65.74, + "bin_name": "bins.7", + "gene_count": 2357, + "bin_quality": "MQ", + "gtdbtk_species": "", + "gtdbtk_order": "UBA11222", + "num_16s": 0, + "gtdbtk_family": "UBA11222", + "gtdbtk_domain": "Bacteria", + "contamination": 2.3, + "gtdbtk_class": "Alphaproteobacteria", + "gtdbtk_phylum": "Proteobacteria", + "num_5s": 0, + "num_23s": 0, + "gtdbtk_genus": "UBA11222", + "num_t_rna": 29 }, { - "name": "Gp0127643_KO_EC GFF file", - "description": "KO_EC GFF file for Gp0127643", - "url": "https://data.microbiomedata.org/data/nmdc:mga0evc178/annotation/nmdc_mga0evc178_ko_ec.gff", - "md5_checksum": "d2990b0bd86e50209dcada6fa6b09510", - "id": "nmdc:d2990b0bd86e50209dcada6fa6b09510", - "file_size_bytes": 25272687 - } - ] - }, - { - "_id": { - "$oid": "649b0052ec087f6bbab3471e" - }, - "has_input": [ - "nmdc:a3a85f9f946ff34f28dfd4b5f8590f23", - "nmdc:fa61e18d49a2012f115d970f0a195986", - "nmdc:744277086ab01222a91233536d5e8976" - ], - "too_short_contig_num": 194066, - "part_of": [ - "nmdc:mga0evc178" - ], - "binned_contig_num": 470, - "git_url": "https://github.com/microbiomedata/metaG/releases/tag/0.1", - "has_output": [ - "nmdc:d41d8cd98f00b204e9800998ecf8427e", - "nmdc:ed8acb6d21b14da131350d9c52aa7041", - "nmdc:d81e3cc17fa762a717dcf324a0aa3d45", - "nmdc:bd388cba93a77cde2f5791fa0f580865", - "nmdc:30695aca02693c6aba316db3e9f565a8", - "nmdc:79de6d81848956e1c06a811bc9bdab81" - ], - "was_informed_by": "gold:Gp0127643", - "input_contig_num": 208967, - "id": "nmdc:30cd614596fd2a4b87220523f8a6ff30", - "execution_resource": "NERSC-Cori", - "name": "MAGs Analysis Activity for nmdc:mga0evc178", - "mags_list": [ - { - "number_of_contig": 470, - "completeness": 30.73, - "bin_name": "bins.1", - "gene_count": 2501, + "number_of_contig": 216, + "completeness": 47.34, + "bin_name": "bins.8", + "gene_count": 1203, "bin_quality": "LQ", "gtdbtk_species": "", "gtdbtk_order": "", "num_16s": 0, "gtdbtk_family": "", "gtdbtk_domain": "", - "contamination": 1.71, + "contamination": 0.0, "gtdbtk_class": "", "gtdbtk_phylum": "", "num_5s": 0, "num_23s": 0, "gtdbtk_genus": "", - "num_t_rna": 22 + "num_t_rna": 12 } ], - "unbinned_contig_num": 14431, - "started_at_time": "2021-10-11T02:27:00Z", + "unbinned_contig_num": 12966, + "started_at_time": "2021-10-11T02:28:16Z", "type": "nmdc:MAGsAnalysisActivity", - "ended_at_time": "2021-10-11T04:04:16+00:00", - "output_data_objects": [ - { - "name": "gold:Gp0452679_metabat2 bin checkm quality assessment result", - "description": "metabat2 bin checkm quality assessment result for gold:Gp0452679", - "data_object_type": "CheckM Statistics", - "url": "https://data.microbiomedata.org/data/nmdc:mga0fsjy84/MAGs/nmdc_mga0fsjy84_checkm_qa.out", - "md5_checksum": "d41d8cd98f00b204e9800998ecf8427e", - "id": "nmdc:d41d8cd98f00b204e9800998ecf8427e", - "file_size_bytes": 0 - }, - { - "name": "Gp0127643_tooShort (< 3kb) filtered contigs fasta file by metaBat2", - "description": "tooShort (< 3kb) filtered contigs fasta file by metaBat2 for Gp0127643", - "url": "https://data.microbiomedata.org/data/nmdc:mga0evc178/MAGs/nmdc_mga0evc178_bins.tooShort.fa", - "md5_checksum": "ed8acb6d21b14da131350d9c52aa7041", - "id": "nmdc:ed8acb6d21b14da131350d9c52aa7041", - "file_size_bytes": 87917684 - }, - { - "name": "Gp0127643_unbinned fasta file from metabat2", - "description": "unbinned fasta file from metabat2 for Gp0127643", - "url": "https://data.microbiomedata.org/data/nmdc:mga0evc178/MAGs/nmdc_mga0evc178_bins.unbinned.fa", - "md5_checksum": "d81e3cc17fa762a717dcf324a0aa3d45", - "id": "nmdc:d81e3cc17fa762a717dcf324a0aa3d45", - "file_size_bytes": 22746526 - }, - { - "name": "Gp0127643_metabat2 bin checkm quality assessment result", - "description": "metabat2 bin checkm quality assessment result for Gp0127643", - "data_object_type": "CheckM Statistics", - "url": "https://data.microbiomedata.org/data/nmdc:mga0evc178/MAGs/nmdc_mga0evc178_checkm_qa.out", - "md5_checksum": "bd388cba93a77cde2f5791fa0f580865", - "id": "nmdc:bd388cba93a77cde2f5791fa0f580865", - "file_size_bytes": 785 - }, - { - "name": "Gp0127643_high-quality and medium-quality bins", - "description": "high-quality and medium-quality bins for Gp0127643", - "data_object_type": "Metagenome Bins", - "url": "https://data.microbiomedata.org/data/nmdc:mga0evc178/MAGs/nmdc_mga0evc178_hqmq_bin.zip", - "md5_checksum": "30695aca02693c6aba316db3e9f565a8", - "id": "nmdc:30695aca02693c6aba316db3e9f565a8", - "file_size_bytes": 182 - }, - { - "name": "Gp0127643_metabat2 bins", - "description": "metabat2 bins for Gp0127643", - "url": "https://data.microbiomedata.org/data/nmdc:mga0evc178/MAGs/nmdc_mga0evc178_metabat_bin.zip", - "md5_checksum": "79de6d81848956e1c06a811bc9bdab81", - "id": "nmdc:79de6d81848956e1c06a811bc9bdab81", - "file_size_bytes": 614113 - } - ] + "ended_at_time": "2021-10-11T05:56:20+00:00" } - ] - }, - { - "_id": { - "$oid": "649b009773e8249959349b57" - }, - "id": "nmdc:omprc-11-hwadfm25", - "name": "Riverbed sediment microbial communities from areas with vegetation nearby in Columbia River, Washington, USA - GW-RW S3_30_40", - "description": "Riverbed sediment samples were collected from an area with a lot of surrounding vegetation in a hyporheic zone (subsurface groundwater - surface water mixing zone)", - "has_input": [ - "nmdc:bsm-11-n80sx618" - ], - "has_output": [ - "jgi:574fe0a87ded5e3df1ee148e" - ], - "part_of": [ - "nmdc:sty-11-aygzgv51" - ], - "add_date": "2016-01-11", - "mod_date": "2021-06-15", - "ncbi_project_name": "Riverbed sediment microbial communities from areas with vegetation nearby in Columbia River, Washington, USA - GW-RW S3_30_40", - "omics_type": { - "has_raw_value": "Metagenome" - }, - "principal_investigator": { - "has_raw_value": "James Stegen" - }, - "processing_institution": "JGI", - "type": "nmdc:OmicsProcessing", - "gold_sequencing_project_identifiers": [ - "gold:Gp0127644" - ], - "downstream_workflow_activity_records": [ + ], + "material_sample_set": [], + "material_sampling_activity_set": [], + "metabolomics_analysis_activity_set": [], + "metagenome_annotation_activity_set": [ { "_id": { - "$oid": "649b009d6bdd4fd20273c86b" + "$oid": "649b005bbf2caae0415ef9d1" }, "has_input": [ - "nmdc:a1d8fff4b02719c4d0f9c442cf052f69" + "nmdc:6f762f7b079f8c2633ef674a8264879f" ], "part_of": [ - "nmdc:mga0bpf635" + "nmdc:mga0cwhj53" ], "git_url": "https://github.com/microbiomedata/metaG/releases/tag/0.1", "has_output": [ - "nmdc:98da35678c59689ce738b2a6bc708692", - "nmdc:ff08ea52254e0cc1011c56656505b27b" + "nmdc:84e3590be0f59007275fdf459d464f74", + "nmdc:7dd630b842f587768235714e8a95f377", + "nmdc:38d776837c2208b557e2e4e5428c879d", + "nmdc:e38cb3355892042cb02580c26c083cd9", + "nmdc:d55119e8f094efa075c44b22e8b2f689", + "nmdc:02a9ad5732172f04d1da83d145f63226", + "nmdc:73811b72087e57f23db32f4a0ca4fb9c", + "nmdc:dfc18c0f97e80c14ca6ca1bc2ba7a809", + "nmdc:5a843529ffac8227515c5ea399ee4815", + "nmdc:82ac29a9999c6bc097cb0f35e4177e35", + "nmdc:5b0e8395559ef0d8a341ae0e132e60f6", + "nmdc:1e74c3df751a59a34e5c0d87f4a37563" ], - "was_informed_by": "gold:Gp0127644", - "input_read_count": 11431762, - "output_read_bases": 1245433047, - "id": "nmdc:9990ae6d30bab3988f258ae8224acd89", + "was_informed_by": "gold:Gp0115672", + "id": "nmdc:50eb8825777d1294abac150521e5c2db", "execution_resource": "NERSC-Cori", - "input_read_bases": 1726196062, - "name": "Read QC Activity for nmdc:mga0bpf635", - "output_read_count": 8322164, - "started_at_time": "2021-10-11T02:26:47Z", - "type": "nmdc:ReadQCAnalysisActivity", - "ended_at_time": "2021-10-11T02:55:00+00:00", - "output_data_objects": [ - { - "name": "Gp0127644_Filtered Reads", - "description": "Filtered Reads for Gp0127644", - "data_object_type": "Filtered Sequencing Reads", - "url": "https://data.microbiomedata.org/data/nmdc:mga0bpf635/qa/nmdc_mga0bpf635_filtered.fastq.gz", - "md5_checksum": "98da35678c59689ce738b2a6bc708692", - "id": "nmdc:98da35678c59689ce738b2a6bc708692", - "file_size_bytes": 694199131 - }, - { - "name": "Gp0127644_Filtered Stats", - "description": "Filtered Stats for Gp0127644", - "data_object_type": "QC Statistics", - "url": "https://data.microbiomedata.org/data/nmdc:mga0bpf635/qa/nmdc_mga0bpf635_filterStats.txt", - "md5_checksum": "ff08ea52254e0cc1011c56656505b27b", - "id": "nmdc:ff08ea52254e0cc1011c56656505b27b", - "file_size_bytes": 280 - } - ] - }, + "name": "Annotation Activity for nmdc:mga0cwhj53", + "started_at_time": "2021-10-11T02:28:16Z", + "type": "nmdc:MetagenomeAnnotationActivity", + "ended_at_time": "2021-10-11T05:56:20+00:00" + } + ], + "metagenome_assembly_set": [ { "_id": { - "$oid": "649b009bff710ae353f8cf38" + "$oid": "649b005f2ca5ee4adb139fbf" }, "has_input": [ - "nmdc:98da35678c59689ce738b2a6bc708692" + "nmdc:eb516fb673793f5161fb634fc19de310" + ], + "part_of": [ + "nmdc:mga0cwhj53" ], + "ctg_logsum": 447149, + "scaf_logsum": 448446, + "gap_pct": 0.0019, "git_url": "https://github.com/microbiomedata/metaG/releases/tag/0.1", "has_output": [ - "nmdc:dc2e21becda8d6b010a95897cf97ae90", - "nmdc:0dd334c92557f3a8ac8c78b437c75eaf", - "nmdc:425873a08e598b0ca2987ff7b9b5da1f", - "nmdc:b0f2449065b52935ddba8abd6ae6bc88", - "nmdc:9baa708296f62334e099cf61711b5e16", - "nmdc:f2a43278b06876cae5d4e8cdef17cfe1", - "nmdc:f1a811dbc523f9a27dbc004b8a66f0cb", - "nmdc:8983fa1acb03f2905bbec3a6ccee2854", - "nmdc:a07c6c5fb68d1a56e39d93e8745b96cb" + "nmdc:6f762f7b079f8c2633ef674a8264879f", + "nmdc:26cc1c91f5f5e79d50041ff4623398b5", + "nmdc:bd9d5497c4e2e0ea61df1f3f239107f7", + "nmdc:362a9857666fe2f4e90bf6a818f551cc", + "nmdc:afd1d03b38bc5deb9c196264bcea8795" ], - "was_informed_by": "gold:Gp0127644", - "id": "nmdc:9990ae6d30bab3988f258ae8224acd89", + "asm_score": 13.127, + "was_informed_by": "gold:Gp0115672", + "ctg_powsum": 55923, + "scaf_max": 157008, + "id": "nmdc:50eb8825777d1294abac150521e5c2db", + "scaf_powsum": 56113, "execution_resource": "NERSC-Cori", - "name": "ReadBased Analysis Activity for nmdc:mga0bpf635", - "started_at_time": "2021-10-11T02:26:47Z", - "type": "nmdc:ReadbasedAnalysis", - "ended_at_time": "2021-10-11T02:55:00+00:00", - "output_data_objects": [ - { - "name": "gold:Gp0452677_Gottcha2 TSV report", - "description": "Gottcha2 TSV report for gold:Gp0452677", - "url": "https://data.microbiomedata.org/data/nmdc:mga0fbpz25/ReadbasedAnalysis/nmdc_mga0fbpz25_gottcha2_report.tsv", - "md5_checksum": "dc2e21becda8d6b010a95897cf97ae90", - "id": "nmdc:dc2e21becda8d6b010a95897cf97ae90", - "file_size_bytes": 109 - }, - { - "name": "Gp0127644_Gottcha2 full TSV report", - "description": "Gottcha2 full TSV report for Gp0127644", - "url": "https://data.microbiomedata.org/data/nmdc:mga0bpf635/ReadbasedAnalysis/nmdc_mga0bpf635_gottcha2_report_full.tsv", - "md5_checksum": "0dd334c92557f3a8ac8c78b437c75eaf", - "id": "nmdc:0dd334c92557f3a8ac8c78b437c75eaf", - "file_size_bytes": 426075 - }, - { - "name": "gold:Gp0452677_Gottcha2 Krona HTML report", - "description": "Gottcha2 Krona HTML report for gold:Gp0452677", - "data_object_type": "GOTTCHA2 Krona Plot", - "url": "https://data.microbiomedata.org/data/nmdc:mga0fbpz25/ReadbasedAnalysis/nmdc_mga0fbpz25_gottcha2_krona.html", - "md5_checksum": "425873a08e598b0ca2987ff7b9b5da1f", - "id": "nmdc:425873a08e598b0ca2987ff7b9b5da1f", - "file_size_bytes": 226638 - }, - { - "name": "Gp0127644_Centrifuge classification TSV report", - "description": "Centrifuge classification TSV report for Gp0127644", - "data_object_type": "Centrifuge Taxonomic Classification", - "url": "https://data.microbiomedata.org/data/nmdc:mga0bpf635/ReadbasedAnalysis/nmdc_mga0bpf635_centrifuge_classification.tsv", - "md5_checksum": "b0f2449065b52935ddba8abd6ae6bc88", - "id": "nmdc:b0f2449065b52935ddba8abd6ae6bc88", - "file_size_bytes": 610862986 - }, - { - "name": "Gp0127644_Centrifuge TSV report", - "description": "Centrifuge TSV report for Gp0127644", - "data_object_type": "Centrifuge Classification Report", - "url": "https://data.microbiomedata.org/data/nmdc:mga0bpf635/ReadbasedAnalysis/nmdc_mga0bpf635_centrifuge_report.tsv", - "md5_checksum": "9baa708296f62334e099cf61711b5e16", - "id": "nmdc:9baa708296f62334e099cf61711b5e16", - "file_size_bytes": 243322 - }, - { - "name": "Gp0127644_Centrifuge Krona HTML report", - "description": "Centrifuge Krona HTML report for Gp0127644", - "data_object_type": "Centrifuge Krona Plot", - "url": "https://data.microbiomedata.org/data/nmdc:mga0bpf635/ReadbasedAnalysis/nmdc_mga0bpf635_centrifuge_krona.html", - "md5_checksum": "f2a43278b06876cae5d4e8cdef17cfe1", - "id": "nmdc:f2a43278b06876cae5d4e8cdef17cfe1", - "file_size_bytes": 2294995 - }, - { - "name": "Gp0127644_Kraken classification TSV report", - "description": "Kraken classification TSV report for Gp0127644", - "data_object_type": "Kraken2 Taxonomic Classification", - "url": "https://data.microbiomedata.org/data/nmdc:mga0bpf635/ReadbasedAnalysis/nmdc_mga0bpf635_kraken2_classification.tsv", - "md5_checksum": "f1a811dbc523f9a27dbc004b8a66f0cb", - "id": "nmdc:f1a811dbc523f9a27dbc004b8a66f0cb", - "file_size_bytes": 487178087 - }, - { - "name": "Gp0127644_Kraken2 TSV report", - "description": "Kraken2 TSV report for Gp0127644", - "data_object_type": "Kraken2 Classification Report", - "url": "https://data.microbiomedata.org/data/nmdc:mga0bpf635/ReadbasedAnalysis/nmdc_mga0bpf635_kraken2_report.tsv", - "md5_checksum": "8983fa1acb03f2905bbec3a6ccee2854", - "id": "nmdc:8983fa1acb03f2905bbec3a6ccee2854", - "file_size_bytes": 557688 - }, - { - "name": "Gp0127644_Kraken2 Krona HTML report", - "description": "Kraken2 Krona HTML report for Gp0127644", - "data_object_type": "Kraken2 Krona Plot", - "url": "https://data.microbiomedata.org/data/nmdc:mga0bpf635/ReadbasedAnalysis/nmdc_mga0bpf635_kraken2_krona.html", - "md5_checksum": "a07c6c5fb68d1a56e39d93e8745b96cb", - "id": "nmdc:a07c6c5fb68d1a56e39d93e8745b96cb", - "file_size_bytes": 3567307 - } + "contigs": 221046, + "name": "Assembly Activity for nmdc:mga0cwhj53", + "ctg_max": 157008, + "gc_std": 0.10619, + "contig_bp": 120471215, + "gc_avg": 0.56196, + "started_at_time": "2021-10-11T02:28:16Z", + "scaf_bp": 120473505, + "type": "nmdc:MetagenomeAssembly", + "scaffolds": 220853, + "ended_at_time": "2021-10-11T05:56:20+00:00", + "ctg_l50": 528, + "ctg_l90": 293, + "ctg_n50": 48327, + "ctg_n90": 178881, + "scaf_l50": 529, + "scaf_l90": 293, + "scaf_n50": 48077, + "scaf_n90": 178708, + "scaf_l_gt50k": 2147966, + "scaf_n_gt50k": 28, + "scaf_pct_gt50k": 1.7829365 + } + ], + "metagenome_sequencing_activity_set": [], + "metaproteomics_analysis_activity_set": [], + "metatranscriptome_activity_set": [], + "nom_analysis_activity_set": [], + "omics_processing_set": [ + { + "_id": { + "$oid": "649b009773e8249959349b53" + }, + "id": "nmdc:omprc-11-qsxwf517", + "name": "Sand microcosm microbial communities from a hyporheic zone in Columbia River, Washington, USA - GW-RW T4_14-Oct-14", + "description": "Sterilized sand packs were incubated back in the ground and collected at time point T4.", + "has_input": [ + "nmdc:bsm-11-sdhyr752" + ], + "has_output": [ + "jgi:55d7402b0d8785342fcf7e3c" + ], + "part_of": [ + "nmdc:sty-11-aygzgv51" + ], + "add_date": "2015-05-28", + "mod_date": "2021-06-15", + "ncbi_project_name": "Sand microcosm microbial communities from a hyporheic zone in Columbia River, Washington, USA - GW-RW T4_14-Oct-14", + "omics_type": { + "has_raw_value": "Metagenome" + }, + "principal_investigator": { + "has_raw_value": "James Stegen" + }, + "processing_institution": "JGI", + "type": "nmdc:OmicsProcessing", + "gold_sequencing_project_identifiers": [ + "gold:Gp0115672" ] - }, + } + ], + "reaction_activity_set": [], + "read_qc_analysis_activity_set": [ { "_id": { - "$oid": "61e7197c833bcf838a700966" + "$oid": "649b009d6bdd4fd20273c889" }, "has_input": [ - "nmdc:98da35678c59689ce738b2a6bc708692" + "nmdc:1f6998a48aec6f4008a92d2b8e17d314" ], "part_of": [ - "nmdc:mga0bpf635" + "nmdc:mga0cwhj53" ], "git_url": "https://github.com/microbiomedata/metaG/releases/tag/0.1", "has_output": [ - "nmdc:dc2e21becda8d6b010a95897cf97ae90", - "nmdc:0dd334c92557f3a8ac8c78b437c75eaf", - "nmdc:425873a08e598b0ca2987ff7b9b5da1f", - "nmdc:b0f2449065b52935ddba8abd6ae6bc88", - "nmdc:9baa708296f62334e099cf61711b5e16", - "nmdc:f2a43278b06876cae5d4e8cdef17cfe1", - "nmdc:f1a811dbc523f9a27dbc004b8a66f0cb", - "nmdc:8983fa1acb03f2905bbec3a6ccee2854", - "nmdc:a07c6c5fb68d1a56e39d93e8745b96cb" + "nmdc:eb516fb673793f5161fb634fc19de310", + "nmdc:f4b68d1bd25f8d2fa8986aeef5fbec3f" ], - "was_informed_by": "gold:Gp0127644", - "id": "nmdc:9990ae6d30bab3988f258ae8224acd89", + "was_informed_by": "gold:Gp0115672", + "input_read_count": 34522052, + "output_read_bases": 5012430912, + "id": "nmdc:50eb8825777d1294abac150521e5c2db", "execution_resource": "NERSC-Cori", - "name": "ReadBased Analysis Activity for nmdc:mga0bpf635", - "started_at_time": "2021-10-11T02:26:47Z", - "type": "nmdc:ReadbasedAnalysis", - "ended_at_time": "2021-10-11T02:55:00+00:00", - "output_data_objects": [ - { - "name": "gold:Gp0452677_Gottcha2 TSV report", - "description": "Gottcha2 TSV report for gold:Gp0452677", - "url": "https://data.microbiomedata.org/data/nmdc:mga0fbpz25/ReadbasedAnalysis/nmdc_mga0fbpz25_gottcha2_report.tsv", - "md5_checksum": "dc2e21becda8d6b010a95897cf97ae90", - "id": "nmdc:dc2e21becda8d6b010a95897cf97ae90", - "file_size_bytes": 109 - }, - { - "name": "Gp0127644_Gottcha2 full TSV report", - "description": "Gottcha2 full TSV report for Gp0127644", - "url": "https://data.microbiomedata.org/data/nmdc:mga0bpf635/ReadbasedAnalysis/nmdc_mga0bpf635_gottcha2_report_full.tsv", - "md5_checksum": "0dd334c92557f3a8ac8c78b437c75eaf", - "id": "nmdc:0dd334c92557f3a8ac8c78b437c75eaf", - "file_size_bytes": 426075 - }, - { - "name": "gold:Gp0452677_Gottcha2 Krona HTML report", - "description": "Gottcha2 Krona HTML report for gold:Gp0452677", - "data_object_type": "GOTTCHA2 Krona Plot", - "url": "https://data.microbiomedata.org/data/nmdc:mga0fbpz25/ReadbasedAnalysis/nmdc_mga0fbpz25_gottcha2_krona.html", - "md5_checksum": "425873a08e598b0ca2987ff7b9b5da1f", - "id": "nmdc:425873a08e598b0ca2987ff7b9b5da1f", - "file_size_bytes": 226638 - }, - { - "name": "Gp0127644_Centrifuge classification TSV report", - "description": "Centrifuge classification TSV report for Gp0127644", - "data_object_type": "Centrifuge Taxonomic Classification", - "url": "https://data.microbiomedata.org/data/nmdc:mga0bpf635/ReadbasedAnalysis/nmdc_mga0bpf635_centrifuge_classification.tsv", - "md5_checksum": "b0f2449065b52935ddba8abd6ae6bc88", - "id": "nmdc:b0f2449065b52935ddba8abd6ae6bc88", - "file_size_bytes": 610862986 - }, - { - "name": "Gp0127644_Centrifuge TSV report", - "description": "Centrifuge TSV report for Gp0127644", - "data_object_type": "Centrifuge Classification Report", - "url": "https://data.microbiomedata.org/data/nmdc:mga0bpf635/ReadbasedAnalysis/nmdc_mga0bpf635_centrifuge_report.tsv", - "md5_checksum": "9baa708296f62334e099cf61711b5e16", - "id": "nmdc:9baa708296f62334e099cf61711b5e16", - "file_size_bytes": 243322 - }, - { - "name": "Gp0127644_Centrifuge Krona HTML report", - "description": "Centrifuge Krona HTML report for Gp0127644", - "data_object_type": "Centrifuge Krona Plot", - "url": "https://data.microbiomedata.org/data/nmdc:mga0bpf635/ReadbasedAnalysis/nmdc_mga0bpf635_centrifuge_krona.html", - "md5_checksum": "f2a43278b06876cae5d4e8cdef17cfe1", - "id": "nmdc:f2a43278b06876cae5d4e8cdef17cfe1", - "file_size_bytes": 2294995 - }, - { - "name": "Gp0127644_Kraken classification TSV report", - "description": "Kraken classification TSV report for Gp0127644", - "data_object_type": "Kraken2 Taxonomic Classification", - "url": "https://data.microbiomedata.org/data/nmdc:mga0bpf635/ReadbasedAnalysis/nmdc_mga0bpf635_kraken2_classification.tsv", - "md5_checksum": "f1a811dbc523f9a27dbc004b8a66f0cb", - "id": "nmdc:f1a811dbc523f9a27dbc004b8a66f0cb", - "file_size_bytes": 487178087 - }, - { - "name": "Gp0127644_Kraken2 TSV report", - "description": "Kraken2 TSV report for Gp0127644", - "data_object_type": "Kraken2 Classification Report", - "url": "https://data.microbiomedata.org/data/nmdc:mga0bpf635/ReadbasedAnalysis/nmdc_mga0bpf635_kraken2_report.tsv", - "md5_checksum": "8983fa1acb03f2905bbec3a6ccee2854", - "id": "nmdc:8983fa1acb03f2905bbec3a6ccee2854", - "file_size_bytes": 557688 - }, - { - "name": "Gp0127644_Kraken2 Krona HTML report", - "description": "Kraken2 Krona HTML report for Gp0127644", - "data_object_type": "Kraken2 Krona Plot", - "url": "https://data.microbiomedata.org/data/nmdc:mga0bpf635/ReadbasedAnalysis/nmdc_mga0bpf635_kraken2_krona.html", - "md5_checksum": "a07c6c5fb68d1a56e39d93e8745b96cb", - "id": "nmdc:a07c6c5fb68d1a56e39d93e8745b96cb", - "file_size_bytes": 3567307 - } - ] - }, + "input_read_bases": 5212829852, + "name": "Read QC Activity for nmdc:mga0cwhj53", + "output_read_count": 33454554, + "started_at_time": "2021-10-11T02:28:16Z", + "type": "nmdc:ReadQCAnalysisActivity", + "ended_at_time": "2021-10-11T05:56:20+00:00" + } + ], + "read_based_taxonomy_analysis_activity_set": [ { "_id": { - "$oid": "649b005f2ca5ee4adb139fa1" + "$oid": "649b009bff710ae353f8cf52" }, "has_input": [ - "nmdc:98da35678c59689ce738b2a6bc708692" - ], - "part_of": [ - "nmdc:mga0bpf635" + "nmdc:eb516fb673793f5161fb634fc19de310" ], - "ctg_logsum": 37962, - "scaf_logsum": 38062, - "gap_pct": 0.00069, "git_url": "https://github.com/microbiomedata/metaG/releases/tag/0.1", "has_output": [ - "nmdc:16f77f4aaed29f3acc31646e1ce06b2d", - "nmdc:b6afa25cadc614083204383bbad06f48", - "nmdc:87b1ea13d41499eeb5eb67932db01423", - "nmdc:72a38c353753abcb6d046385bf2950f6", - "nmdc:ecf498b9aa15f9d000845ffdfa7eb521" + "nmdc:5a9326e2e450663a5ed8c97389136b25", + "nmdc:6044f2e33e0dd3e951484e9c50ae10f4", + "nmdc:39a46887587926c9b81e126bb1036005", + "nmdc:b8dde2c047141d9097317c86f723eded", + "nmdc:d530342b37f0785f92650e9650f31d6a", + "nmdc:6672aa851b5d39d7381211232b4f6cb2", + "nmdc:61e3c875231ae8999b5aa1dbf7d55cca", + "nmdc:3049835ed4e3533acce49e9cc60b03fc", + "nmdc:3266e79813577aae1d4377c62e73332c" ], - "asm_score": 3.712, - "was_informed_by": "gold:Gp0127644", - "ctg_powsum": 4162.045, - "scaf_max": 11252, - "id": "nmdc:9990ae6d30bab3988f258ae8224acd89", - "scaf_powsum": 4172.955, + "was_informed_by": "gold:Gp0115672", + "id": "nmdc:50eb8825777d1294abac150521e5c2db", "execution_resource": "NERSC-Cori", - "contigs": 46135, - "name": "Assembly Activity for nmdc:mga0bpf635", - "ctg_max": 11252, - "gc_std": 0.09328, - "contig_bp": 20152503, - "gc_avg": 0.6086, - "started_at_time": "2021-10-11T02:26:47Z", - "scaf_bp": 20152643, - "type": "nmdc:MetagenomeAssembly", - "scaffolds": 46121, - "ended_at_time": "2021-10-11T02:55:00+00:00", - "ctg_l50": 394, - "ctg_l90": 285, - "ctg_n50": 14034, - "ctg_n90": 39639, - "scaf_l50": 395, - "scaf_l90": 285, - "scaf_n50": 13959, - "scaf_n90": 39626, - "output_data_objects": [ - { - "name": "Gp0127644_Assembled contigs fasta", - "description": "Assembled contigs fasta for Gp0127644", - "data_object_type": "Assembly Contigs", - "url": "https://data.microbiomedata.org/data/nmdc:mga0bpf635/assembly/nmdc_mga0bpf635_contigs.fna", - "md5_checksum": "16f77f4aaed29f3acc31646e1ce06b2d", - "id": "nmdc:16f77f4aaed29f3acc31646e1ce06b2d", - "file_size_bytes": 21881611 - }, - { - "name": "Gp0127644_Assembled scaffold fasta", - "description": "Assembled scaffold fasta for Gp0127644", - "data_object_type": "Assembly Scaffolds", - "url": "https://data.microbiomedata.org/data/nmdc:mga0bpf635/assembly/nmdc_mga0bpf635_scaffolds.fna", - "md5_checksum": "b6afa25cadc614083204383bbad06f48", - "id": "nmdc:b6afa25cadc614083204383bbad06f48", - "file_size_bytes": 21742982 - }, - { - "name": "Gp0127644_Metagenome Contig Coverage Stats", - "description": "Metagenome Contig Coverage Stats for Gp0127644", - "url": "https://data.microbiomedata.org/data/nmdc:mga0bpf635/assembly/nmdc_mga0bpf635_covstats.txt", - "md5_checksum": "87b1ea13d41499eeb5eb67932db01423", - "id": "nmdc:87b1ea13d41499eeb5eb67932db01423", - "file_size_bytes": 3612085 - }, - { - "name": "Gp0127644_Assembled AGP file", - "description": "Assembled AGP file for Gp0127644", - "data_object_type": "Assembly AGP", - "url": "https://data.microbiomedata.org/data/nmdc:mga0bpf635/assembly/nmdc_mga0bpf635_assembly.agp", - "md5_checksum": "72a38c353753abcb6d046385bf2950f6", - "id": "nmdc:72a38c353753abcb6d046385bf2950f6", - "file_size_bytes": 3350598 - }, - { - "name": "Gp0127644_Metagenome Alignment BAM file", - "description": "Metagenome Alignment BAM file for Gp0127644", - "data_object_type": "Assembly Coverage BAM", - "url": "https://data.microbiomedata.org/data/nmdc:mga0bpf635/assembly/nmdc_mga0bpf635_pairedMapped_sorted.bam", - "md5_checksum": "ecf498b9aa15f9d000845ffdfa7eb521", - "id": "nmdc:ecf498b9aa15f9d000845ffdfa7eb521", - "file_size_bytes": 746781339 - } - ] - }, + "name": "ReadBased Analysis Activity for nmdc:mga0cwhj53", + "started_at_time": "2021-10-11T02:28:16Z", + "type": "nmdc:ReadbasedAnalysis", + "ended_at_time": "2021-10-11T05:56:20+00:00" + } + ], + "study_set": [], + "field_research_site_set": [], + "collecting_biosamples_from_site_set": [], + "date_created": null, + "etl_software_version": null, + "pooling_set": [], + "read_based_analysis_activity_set": [ { "_id": { - "$oid": "649b005bbf2caae0415ef9b4" + "$oid": "61e71a33833bcf838a701f34" }, "has_input": [ - "nmdc:16f77f4aaed29f3acc31646e1ce06b2d" + "nmdc:eb516fb673793f5161fb634fc19de310" ], "part_of": [ - "nmdc:mga0bpf635" + "nmdc:mga0cwhj53" ], "git_url": "https://github.com/microbiomedata/metaG/releases/tag/0.1", "has_output": [ - "nmdc:9d960cad4d88795aba8bb1acbe415fc9", - "nmdc:cb5d98ee6e459ce1cc2d14295424eef1", - "nmdc:349838000a53b6655a5b12edf6351c50", - "nmdc:7bb072409221978dbea8ff5cb0bdba1e", - "nmdc:3d69ade973d1652bd6f061b2122ffe36", - "nmdc:2a9b9a21fe5fb84219e0be5f153665be", - "nmdc:83e64b9fc9406a72d18e8dd4742bac1a", - "nmdc:cdc4cc8629b7c61f1708f654aaaa9932", - "nmdc:f8d79375a2bf82f257e0015efeee6f26", - "nmdc:c9b4806132d19e740822b1a84bc4f07d", - "nmdc:e304e10eb60423c23486e140594d1a7b", - "nmdc:9b78f0ac527ee7287ae532a896582948" - ], - "was_informed_by": "gold:Gp0127644", - "id": "nmdc:9990ae6d30bab3988f258ae8224acd89", - "execution_resource": "NERSC-Cori", - "name": "Annotation Activity for nmdc:mga0bpf635", - "started_at_time": "2021-10-11T02:26:47Z", - "type": "nmdc:MetagenomeAnnotationActivity", - "ended_at_time": "2021-10-11T02:55:00+00:00", - "output_data_objects": [ - { - "name": "Gp0127644_Protein FAA", - "description": "Protein FAA for Gp0127644", - "data_object_type": "Annotation Amino Acid FASTA", - "url": "https://data.microbiomedata.org/data/nmdc:mga0bpf635/annotation/nmdc_mga0bpf635_proteins.faa", - "md5_checksum": "9d960cad4d88795aba8bb1acbe415fc9", - "id": "nmdc:9d960cad4d88795aba8bb1acbe415fc9", - "file_size_bytes": 12848136 - }, - { - "name": "Gp0127644_Structural annotation GFF file", - "description": "Structural annotation GFF file for Gp0127644", - "data_object_type": "Structural Annotation GFF", - "url": "https://data.microbiomedata.org/data/nmdc:mga0bpf635/annotation/nmdc_mga0bpf635_structural_annotation.gff", - "md5_checksum": "cb5d98ee6e459ce1cc2d14295424eef1", - "id": "nmdc:cb5d98ee6e459ce1cc2d14295424eef1", - "file_size_bytes": 2488 - }, - { - "name": "Gp0127644_Functional annotation GFF file", - "description": "Functional annotation GFF file for Gp0127644", - "data_object_type": "Functional Annotation GFF", - "url": "https://data.microbiomedata.org/data/nmdc:mga0bpf635/annotation/nmdc_mga0bpf635_functional_annotation.gff", - "md5_checksum": "349838000a53b6655a5b12edf6351c50", - "id": "nmdc:349838000a53b6655a5b12edf6351c50", - "file_size_bytes": 15112193 - }, - { - "name": "Gp0127644_KO TSV file", - "description": "KO TSV file for Gp0127644", - "data_object_type": "Annotation KEGG Orthology", - "url": "https://data.microbiomedata.org/data/nmdc:mga0bpf635/annotation/nmdc_mga0bpf635_ko.tsv", - "md5_checksum": "7bb072409221978dbea8ff5cb0bdba1e", - "id": "nmdc:7bb072409221978dbea8ff5cb0bdba1e", - "file_size_bytes": 1814299 - }, - { - "name": "Gp0127644_EC TSV file", - "description": "EC TSV file for Gp0127644", - "data_object_type": "Annotation Enzyme Commission", - "url": "https://data.microbiomedata.org/data/nmdc:mga0bpf635/annotation/nmdc_mga0bpf635_ec.tsv", - "md5_checksum": "3d69ade973d1652bd6f061b2122ffe36", - "id": "nmdc:3d69ade973d1652bd6f061b2122ffe36", - "file_size_bytes": 1233948 - }, - { - "name": "Gp0127644_COG GFF file", - "description": "COG GFF file for Gp0127644", - "url": "https://data.microbiomedata.org/data/nmdc:mga0bpf635/annotation/nmdc_mga0bpf635_cog.gff", - "md5_checksum": "2a9b9a21fe5fb84219e0be5f153665be", - "id": "nmdc:2a9b9a21fe5fb84219e0be5f153665be", - "file_size_bytes": 9028987 - }, - { - "name": "Gp0127644_PFAM GFF file", - "description": "PFAM GFF file for Gp0127644", - "url": "https://data.microbiomedata.org/data/nmdc:mga0bpf635/annotation/nmdc_mga0bpf635_pfam.gff", - "md5_checksum": "83e64b9fc9406a72d18e8dd4742bac1a", - "id": "nmdc:83e64b9fc9406a72d18e8dd4742bac1a", - "file_size_bytes": 6574998 - }, - { - "name": "Gp0127644_TigrFam GFF file", - "description": "TigrFam GFF file for Gp0127644", - "url": "https://data.microbiomedata.org/data/nmdc:mga0bpf635/annotation/nmdc_mga0bpf635_tigrfam.gff", - "md5_checksum": "cdc4cc8629b7c61f1708f654aaaa9932", - "id": "nmdc:cdc4cc8629b7c61f1708f654aaaa9932", - "file_size_bytes": 783908 - }, - { - "name": "Gp0127644_SMART GFF file", - "description": "SMART GFF file for Gp0127644", - "url": "https://data.microbiomedata.org/data/nmdc:mga0bpf635/annotation/nmdc_mga0bpf635_smart.gff", - "md5_checksum": "f8d79375a2bf82f257e0015efeee6f26", - "id": "nmdc:f8d79375a2bf82f257e0015efeee6f26", - "file_size_bytes": 2030043 - }, - { - "name": "Gp0127644_SuperFam GFF file", - "description": "SuperFam GFF file for Gp0127644", - "url": "https://data.microbiomedata.org/data/nmdc:mga0bpf635/annotation/nmdc_mga0bpf635_supfam.gff", - "md5_checksum": "c9b4806132d19e740822b1a84bc4f07d", - "id": "nmdc:c9b4806132d19e740822b1a84bc4f07d", - "file_size_bytes": 11227652 - }, - { - "name": "Gp0127644_Cath FunFam GFF file", - "description": "Cath FunFam GFF file for Gp0127644", - "url": "https://data.microbiomedata.org/data/nmdc:mga0bpf635/annotation/nmdc_mga0bpf635_cath_funfam.gff", - "md5_checksum": "e304e10eb60423c23486e140594d1a7b", - "id": "nmdc:e304e10eb60423c23486e140594d1a7b", - "file_size_bytes": 8555821 - }, - { - "name": "Gp0127644_KO_EC GFF file", - "description": "KO_EC GFF file for Gp0127644", - "url": "https://data.microbiomedata.org/data/nmdc:mga0bpf635/annotation/nmdc_mga0bpf635_ko_ec.gff", - "md5_checksum": "9b78f0ac527ee7287ae532a896582948", - "id": "nmdc:9b78f0ac527ee7287ae532a896582948", - "file_size_bytes": 5791094 - } - ] + "nmdc:5a9326e2e450663a5ed8c97389136b25", + "nmdc:6044f2e33e0dd3e951484e9c50ae10f4", + "nmdc:39a46887587926c9b81e126bb1036005", + "nmdc:b8dde2c047141d9097317c86f723eded", + "nmdc:d530342b37f0785f92650e9650f31d6a", + "nmdc:6672aa851b5d39d7381211232b4f6cb2", + "nmdc:61e3c875231ae8999b5aa1dbf7d55cca", + "nmdc:3049835ed4e3533acce49e9cc60b03fc", + "nmdc:3266e79813577aae1d4377c62e73332c" + ], + "was_informed_by": "gold:Gp0115672", + "id": "nmdc:50eb8825777d1294abac150521e5c2db", + "execution_resource": "NERSC-Cori", + "name": "ReadBased Analysis Activity for nmdc:mga0cwhj53", + "started_at_time": "2021-10-11T02:28:16Z", + "type": "nmdc:ReadbasedAnalysis", + "ended_at_time": "2021-10-11T05:56:20+00:00" + } + ] + }, + { + "functional_annotation_agg": [], + "library_preparation_set": [], + "processed_sample_set": [], + "extraction_set": [], + "activity_set": [], + "biosample_set": [], + "data_object_set": [ + { + "name": "Gp0127640_Filtered Reads", + "description": "Filtered Reads for Gp0127640", + "data_object_type": "Filtered Sequencing Reads", + "url": "https://data.microbiomedata.org/data/nmdc:mga06rnc11/qa/nmdc_mga06rnc11_filtered.fastq.gz", + "md5_checksum": "534c94e20d292a6bf09c0a42b550b4c2", + "id": "nmdc:534c94e20d292a6bf09c0a42b550b4c2", + "file_size_bytes": 2416846292 + }, + { + "name": "Gp0127640_Filtered Stats", + "description": "Filtered Stats for Gp0127640", + "data_object_type": "QC Statistics", + "url": "https://data.microbiomedata.org/data/nmdc:mga06rnc11/qa/nmdc_mga06rnc11_filterStats.txt", + "md5_checksum": "db5ccad12d6ddb46947fbd815aae7f9a", + "id": "nmdc:db5ccad12d6ddb46947fbd815aae7f9a", + "file_size_bytes": 285 + }, + { + "name": "Gp0127640_Gottcha2 TSV report", + "description": "Gottcha2 TSV report for Gp0127640", + "url": "https://data.microbiomedata.org/data/nmdc:mga06rnc11/ReadbasedAnalysis/nmdc_mga06rnc11_gottcha2_report.tsv", + "md5_checksum": "7e79b2eba131ed6df71a56f47b1b901f", + "id": "nmdc:7e79b2eba131ed6df71a56f47b1b901f", + "file_size_bytes": 3824 + }, + { + "name": "Gp0127640_Gottcha2 full TSV report", + "description": "Gottcha2 full TSV report for Gp0127640", + "url": "https://data.microbiomedata.org/data/nmdc:mga06rnc11/ReadbasedAnalysis/nmdc_mga06rnc11_gottcha2_report_full.tsv", + "md5_checksum": "bc82dcb8151fc20c22be71b6531a1fb2", + "id": "nmdc:bc82dcb8151fc20c22be71b6531a1fb2", + "file_size_bytes": 850491 + }, + { + "name": "Gp0127640_Gottcha2 Krona HTML report", + "description": "Gottcha2 Krona HTML report for Gp0127640", + "data_object_type": "GOTTCHA2 Krona Plot", + "url": "https://data.microbiomedata.org/data/nmdc:mga06rnc11/ReadbasedAnalysis/nmdc_mga06rnc11_gottcha2_krona.html", + "md5_checksum": "d5e45563875efca0653ba2dd47ee3d68", + "id": "nmdc:d5e45563875efca0653ba2dd47ee3d68", + "file_size_bytes": 236151 + }, + { + "name": "Gp0127640_Centrifuge classification TSV report", + "description": "Centrifuge classification TSV report for Gp0127640", + "data_object_type": "Centrifuge Taxonomic Classification", + "url": "https://data.microbiomedata.org/data/nmdc:mga06rnc11/ReadbasedAnalysis/nmdc_mga06rnc11_centrifuge_classification.tsv", + "md5_checksum": "bf5aa70f6ff14da2ef1393124ec29c4d", + "id": "nmdc:bf5aa70f6ff14da2ef1393124ec29c4d", + "file_size_bytes": 2057333090 + }, + { + "name": "Gp0127640_Centrifuge TSV report", + "description": "Centrifuge TSV report for Gp0127640", + "data_object_type": "Centrifuge Classification Report", + "url": "https://data.microbiomedata.org/data/nmdc:mga06rnc11/ReadbasedAnalysis/nmdc_mga06rnc11_centrifuge_report.tsv", + "md5_checksum": "61f1f6d57fd4d445682e25ec34901721", + "id": "nmdc:61f1f6d57fd4d445682e25ec34901721", + "file_size_bytes": 256577 + }, + { + "name": "Gp0127640_Centrifuge Krona HTML report", + "description": "Centrifuge Krona HTML report for Gp0127640", + "data_object_type": "Centrifuge Krona Plot", + "url": "https://data.microbiomedata.org/data/nmdc:mga06rnc11/ReadbasedAnalysis/nmdc_mga06rnc11_centrifuge_krona.html", + "md5_checksum": "7c31728fc2a51c8d202f9f74b1919886", + "id": "nmdc:7c31728fc2a51c8d202f9f74b1919886", + "file_size_bytes": 2334984 + }, + { + "name": "Gp0127640_Kraken classification TSV report", + "description": "Kraken classification TSV report for Gp0127640", + "data_object_type": "Kraken2 Taxonomic Classification", + "url": "https://data.microbiomedata.org/data/nmdc:mga06rnc11/ReadbasedAnalysis/nmdc_mga06rnc11_kraken2_classification.tsv", + "md5_checksum": "f36c2b28e63d21ca4d9e84035450c8e1", + "id": "nmdc:f36c2b28e63d21ca4d9e84035450c8e1", + "file_size_bytes": 1658481192 + }, + { + "name": "Gp0127640_Kraken2 TSV report", + "description": "Kraken2 TSV report for Gp0127640", + "data_object_type": "Kraken2 Classification Report", + "url": "https://data.microbiomedata.org/data/nmdc:mga06rnc11/ReadbasedAnalysis/nmdc_mga06rnc11_kraken2_report.tsv", + "md5_checksum": "e2939606fc9ff1c0046b333e1740f258", + "id": "nmdc:e2939606fc9ff1c0046b333e1740f258", + "file_size_bytes": 653129 + }, + { + "name": "Gp0127640_Kraken2 Krona HTML report", + "description": "Kraken2 Krona HTML report for Gp0127640", + "data_object_type": "Kraken2 Krona Plot", + "url": "https://data.microbiomedata.org/data/nmdc:mga06rnc11/ReadbasedAnalysis/nmdc_mga06rnc11_kraken2_krona.html", + "md5_checksum": "d47144fd7ec0608e7677550d9589c889", + "id": "nmdc:d47144fd7ec0608e7677550d9589c889", + "file_size_bytes": 3977820 + }, + { + "name": "Gp0127640_Gottcha2 TSV report", + "description": "Gottcha2 TSV report for Gp0127640", + "url": "https://data.microbiomedata.org/data/nmdc:mga06rnc11/ReadbasedAnalysis/nmdc_mga06rnc11_gottcha2_report.tsv", + "md5_checksum": "7e79b2eba131ed6df71a56f47b1b901f", + "id": "nmdc:7e79b2eba131ed6df71a56f47b1b901f", + "file_size_bytes": 3824 + }, + { + "name": "Gp0127640_Gottcha2 full TSV report", + "description": "Gottcha2 full TSV report for Gp0127640", + "url": "https://data.microbiomedata.org/data/nmdc:mga06rnc11/ReadbasedAnalysis/nmdc_mga06rnc11_gottcha2_report_full.tsv", + "md5_checksum": "bc82dcb8151fc20c22be71b6531a1fb2", + "id": "nmdc:bc82dcb8151fc20c22be71b6531a1fb2", + "file_size_bytes": 850491 + }, + { + "name": "Gp0127640_Gottcha2 Krona HTML report", + "description": "Gottcha2 Krona HTML report for Gp0127640", + "data_object_type": "GOTTCHA2 Krona Plot", + "url": "https://data.microbiomedata.org/data/nmdc:mga06rnc11/ReadbasedAnalysis/nmdc_mga06rnc11_gottcha2_krona.html", + "md5_checksum": "d5e45563875efca0653ba2dd47ee3d68", + "id": "nmdc:d5e45563875efca0653ba2dd47ee3d68", + "file_size_bytes": 236151 + }, + { + "name": "Gp0127640_Centrifuge classification TSV report", + "description": "Centrifuge classification TSV report for Gp0127640", + "data_object_type": "Centrifuge Taxonomic Classification", + "url": "https://data.microbiomedata.org/data/nmdc:mga06rnc11/ReadbasedAnalysis/nmdc_mga06rnc11_centrifuge_classification.tsv", + "md5_checksum": "bf5aa70f6ff14da2ef1393124ec29c4d", + "id": "nmdc:bf5aa70f6ff14da2ef1393124ec29c4d", + "file_size_bytes": 2057333090 + }, + { + "name": "Gp0127640_Centrifuge TSV report", + "description": "Centrifuge TSV report for Gp0127640", + "data_object_type": "Centrifuge Classification Report", + "url": "https://data.microbiomedata.org/data/nmdc:mga06rnc11/ReadbasedAnalysis/nmdc_mga06rnc11_centrifuge_report.tsv", + "md5_checksum": "61f1f6d57fd4d445682e25ec34901721", + "id": "nmdc:61f1f6d57fd4d445682e25ec34901721", + "file_size_bytes": 256577 + }, + { + "name": "Gp0127640_Centrifuge Krona HTML report", + "description": "Centrifuge Krona HTML report for Gp0127640", + "data_object_type": "Centrifuge Krona Plot", + "url": "https://data.microbiomedata.org/data/nmdc:mga06rnc11/ReadbasedAnalysis/nmdc_mga06rnc11_centrifuge_krona.html", + "md5_checksum": "7c31728fc2a51c8d202f9f74b1919886", + "id": "nmdc:7c31728fc2a51c8d202f9f74b1919886", + "file_size_bytes": 2334984 + }, + { + "name": "Gp0127640_Kraken classification TSV report", + "description": "Kraken classification TSV report for Gp0127640", + "data_object_type": "Kraken2 Taxonomic Classification", + "url": "https://data.microbiomedata.org/data/nmdc:mga06rnc11/ReadbasedAnalysis/nmdc_mga06rnc11_kraken2_classification.tsv", + "md5_checksum": "f36c2b28e63d21ca4d9e84035450c8e1", + "id": "nmdc:f36c2b28e63d21ca4d9e84035450c8e1", + "file_size_bytes": 1658481192 + }, + { + "name": "Gp0127640_Kraken2 TSV report", + "description": "Kraken2 TSV report for Gp0127640", + "data_object_type": "Kraken2 Classification Report", + "url": "https://data.microbiomedata.org/data/nmdc:mga06rnc11/ReadbasedAnalysis/nmdc_mga06rnc11_kraken2_report.tsv", + "md5_checksum": "e2939606fc9ff1c0046b333e1740f258", + "id": "nmdc:e2939606fc9ff1c0046b333e1740f258", + "file_size_bytes": 653129 + }, + { + "name": "Gp0127640_Kraken2 Krona HTML report", + "description": "Kraken2 Krona HTML report for Gp0127640", + "data_object_type": "Kraken2 Krona Plot", + "url": "https://data.microbiomedata.org/data/nmdc:mga06rnc11/ReadbasedAnalysis/nmdc_mga06rnc11_kraken2_krona.html", + "md5_checksum": "d47144fd7ec0608e7677550d9589c889", + "id": "nmdc:d47144fd7ec0608e7677550d9589c889", + "file_size_bytes": 3977820 + }, + { + "name": "Gp0127640_Assembled contigs fasta", + "description": "Assembled contigs fasta for Gp0127640", + "data_object_type": "Assembly Contigs", + "url": "https://data.microbiomedata.org/data/nmdc:mga06rnc11/assembly/nmdc_mga06rnc11_contigs.fna", + "md5_checksum": "b85a322271c7f93ef295141d12cb2dbc", + "id": "nmdc:b85a322271c7f93ef295141d12cb2dbc", + "file_size_bytes": 44243651 + }, + { + "name": "Gp0127640_Assembled scaffold fasta", + "description": "Assembled scaffold fasta for Gp0127640", + "data_object_type": "Assembly Scaffolds", + "url": "https://data.microbiomedata.org/data/nmdc:mga06rnc11/assembly/nmdc_mga06rnc11_scaffolds.fna", + "md5_checksum": "794445b3fedfaec8af9b70b167bc6852", + "id": "nmdc:794445b3fedfaec8af9b70b167bc6852", + "file_size_bytes": 43923338 + }, + { + "name": "Gp0127640_Metagenome Contig Coverage Stats", + "description": "Metagenome Contig Coverage Stats for Gp0127640", + "url": "https://data.microbiomedata.org/data/nmdc:mga06rnc11/assembly/nmdc_mga06rnc11_covstats.txt", + "md5_checksum": "d389ae4f8a92c21423fc77aa054ba985", + "id": "nmdc:d389ae4f8a92c21423fc77aa054ba985", + "file_size_bytes": 8365383 + }, + { + "name": "Gp0127640_Assembled AGP file", + "description": "Assembled AGP file for Gp0127640", + "data_object_type": "Assembly AGP", + "url": "https://data.microbiomedata.org/data/nmdc:mga06rnc11/assembly/nmdc_mga06rnc11_assembly.agp", + "md5_checksum": "765541c2865f6047d5e2e8e7299908e4", + "id": "nmdc:765541c2865f6047d5e2e8e7299908e4", + "file_size_bytes": 7782777 + }, + { + "name": "Gp0127640_Metagenome Alignment BAM file", + "description": "Metagenome Alignment BAM file for Gp0127640", + "data_object_type": "Assembly Coverage BAM", + "url": "https://data.microbiomedata.org/data/nmdc:mga06rnc11/assembly/nmdc_mga06rnc11_pairedMapped_sorted.bam", + "md5_checksum": "78b554dd52492c3d1e401d0c9198b89b", + "id": "nmdc:78b554dd52492c3d1e401d0c9198b89b", + "file_size_bytes": 2578128724 + }, + { + "name": "Gp0127640_Protein FAA", + "description": "Protein FAA for Gp0127640", + "data_object_type": "Annotation Amino Acid FASTA", + "url": "https://data.microbiomedata.org/data/nmdc:mga06rnc11/annotation/nmdc_mga06rnc11_proteins.faa", + "md5_checksum": "13e64b02d230f76008e42256a48d1cec", + "id": "nmdc:13e64b02d230f76008e42256a48d1cec", + "file_size_bytes": 26637626 + }, + { + "name": "Gp0127640_Structural annotation GFF file", + "description": "Structural annotation GFF file for Gp0127640", + "data_object_type": "Structural Annotation GFF", + "url": "https://data.microbiomedata.org/data/nmdc:mga06rnc11/annotation/nmdc_mga06rnc11_structural_annotation.gff", + "md5_checksum": "7babb0c9f662679659b7b1bee469f073", + "id": "nmdc:7babb0c9f662679659b7b1bee469f073", + "file_size_bytes": 2515 + }, + { + "name": "Gp0127640_Functional annotation GFF file", + "description": "Functional annotation GFF file for Gp0127640", + "data_object_type": "Functional Annotation GFF", + "url": "https://data.microbiomedata.org/data/nmdc:mga06rnc11/annotation/nmdc_mga06rnc11_functional_annotation.gff", + "md5_checksum": "e84b1e43d546c9793c3a4d9eaa8cee86", + "id": "nmdc:e84b1e43d546c9793c3a4d9eaa8cee86", + "file_size_bytes": 32184781 + }, + { + "name": "Gp0127640_KO TSV file", + "description": "KO TSV file for Gp0127640", + "data_object_type": "Annotation KEGG Orthology", + "url": "https://data.microbiomedata.org/data/nmdc:mga06rnc11/annotation/nmdc_mga06rnc11_ko.tsv", + "md5_checksum": "2e3e5b7ffa39e533db8ed1d925426f50", + "id": "nmdc:2e3e5b7ffa39e533db8ed1d925426f50", + "file_size_bytes": 3620933 + }, + { + "name": "Gp0127640_EC TSV file", + "description": "EC TSV file for Gp0127640", + "data_object_type": "Annotation Enzyme Commission", + "url": "https://data.microbiomedata.org/data/nmdc:mga06rnc11/annotation/nmdc_mga06rnc11_ec.tsv", + "md5_checksum": "62e46d35a6aff3a52b39c6bb04dc6161", + "id": "nmdc:62e46d35a6aff3a52b39c6bb04dc6161", + "file_size_bytes": 2390086 + }, + { + "name": "Gp0127640_COG GFF file", + "description": "COG GFF file for Gp0127640", + "url": "https://data.microbiomedata.org/data/nmdc:mga06rnc11/annotation/nmdc_mga06rnc11_cog.gff", + "md5_checksum": "93fa7de9c74cfcff99bb74e27fa94674", + "id": "nmdc:93fa7de9c74cfcff99bb74e27fa94674", + "file_size_bytes": 17898567 + }, + { + "name": "Gp0127640_PFAM GFF file", + "description": "PFAM GFF file for Gp0127640", + "url": "https://data.microbiomedata.org/data/nmdc:mga06rnc11/annotation/nmdc_mga06rnc11_pfam.gff", + "md5_checksum": "63bad86a6d7fb23b5a4683ae36820622", + "id": "nmdc:63bad86a6d7fb23b5a4683ae36820622", + "file_size_bytes": 12585366 + }, + { + "name": "Gp0127640_TigrFam GFF file", + "description": "TigrFam GFF file for Gp0127640", + "url": "https://data.microbiomedata.org/data/nmdc:mga06rnc11/annotation/nmdc_mga06rnc11_tigrfam.gff", + "md5_checksum": "d6b80bb748b4d6fbe52c15300ad2137b", + "id": "nmdc:d6b80bb748b4d6fbe52c15300ad2137b", + "file_size_bytes": 1170952 + }, + { + "name": "Gp0127640_SMART GFF file", + "description": "SMART GFF file for Gp0127640", + "url": "https://data.microbiomedata.org/data/nmdc:mga06rnc11/annotation/nmdc_mga06rnc11_smart.gff", + "md5_checksum": "46722961c280df725d15489e82502031", + "id": "nmdc:46722961c280df725d15489e82502031", + "file_size_bytes": 3891425 + }, + { + "name": "Gp0127640_SuperFam GFF file", + "description": "SuperFam GFF file for Gp0127640", + "url": "https://data.microbiomedata.org/data/nmdc:mga06rnc11/annotation/nmdc_mga06rnc11_supfam.gff", + "md5_checksum": "6f1a0029cb25f1433de1d7c241bc7553", + "id": "nmdc:6f1a0029cb25f1433de1d7c241bc7553", + "file_size_bytes": 22543435 + }, + { + "name": "Gp0127640_Cath FunFam GFF file", + "description": "Cath FunFam GFF file for Gp0127640", + "url": "https://data.microbiomedata.org/data/nmdc:mga06rnc11/annotation/nmdc_mga06rnc11_cath_funfam.gff", + "md5_checksum": "6d2839963f616d810e66435b3bbe018a", + "id": "nmdc:6d2839963f616d810e66435b3bbe018a", + "file_size_bytes": 16572925 + }, + { + "name": "Gp0127640_KO_EC GFF file", + "description": "KO_EC GFF file for Gp0127640", + "url": "https://data.microbiomedata.org/data/nmdc:mga06rnc11/annotation/nmdc_mga06rnc11_ko_ec.gff", + "md5_checksum": "efbf36ca49c40ad0367ecd23c012b29b", + "id": "nmdc:efbf36ca49c40ad0367ecd23c012b29b", + "file_size_bytes": 11571776 + }, + { + "name": "gold:Gp0452679_metabat2 bin checkm quality assessment result", + "description": "metabat2 bin checkm quality assessment result for gold:Gp0452679", + "data_object_type": "CheckM Statistics", + "url": "https://data.microbiomedata.org/data/nmdc:mga0fsjy84/MAGs/nmdc_mga0fsjy84_checkm_qa.out", + "md5_checksum": "d41d8cd98f00b204e9800998ecf8427e", + "id": "nmdc:d41d8cd98f00b204e9800998ecf8427e", + "file_size_bytes": 0 + }, + { + "name": "Gp0127640_tooShort (< 3kb) filtered contigs fasta file by metaBat2", + "description": "tooShort (< 3kb) filtered contigs fasta file by metaBat2 for Gp0127640", + "url": "https://data.microbiomedata.org/data/nmdc:mga06rnc11/MAGs/nmdc_mga06rnc11_bins.tooShort.fa", + "md5_checksum": "ce395376d0bc7121e4dc5efc774d5e74", + "id": "nmdc:ce395376d0bc7121e4dc5efc774d5e74", + "file_size_bytes": 40358420 + }, + { + "name": "Gp0127640_unbinned fasta file from metabat2", + "description": "unbinned fasta file from metabat2 for Gp0127640", + "url": "https://data.microbiomedata.org/data/nmdc:mga06rnc11/MAGs/nmdc_mga06rnc11_bins.unbinned.fa", + "md5_checksum": "a16cbb06b91ebfb45f5a010effc1cfde", + "id": "nmdc:a16cbb06b91ebfb45f5a010effc1cfde", + "file_size_bytes": 2755747 + }, + { + "name": "Gp0127640_metabat2 bin checkm quality assessment result", + "description": "metabat2 bin checkm quality assessment result for Gp0127640", + "data_object_type": "CheckM Statistics", + "url": "https://data.microbiomedata.org/data/nmdc:mga06rnc11/MAGs/nmdc_mga06rnc11_checkm_qa.out", + "md5_checksum": "97ae130ca2f75c66b8cbd60c4d35463a", + "id": "nmdc:97ae130ca2f75c66b8cbd60c4d35463a", + "file_size_bytes": 760 }, + { + "name": "Gp0127640_high-quality and medium-quality bins", + "description": "high-quality and medium-quality bins for Gp0127640", + "data_object_type": "Metagenome Bins", + "url": "https://data.microbiomedata.org/data/nmdc:mga06rnc11/MAGs/nmdc_mga06rnc11_hqmq_bin.zip", + "md5_checksum": "5945311235c6195ad409ab30e2b72c0c", + "id": "nmdc:5945311235c6195ad409ab30e2b72c0c", + "file_size_bytes": 182 + }, + { + "name": "Gp0127640_metabat2 bins", + "description": "metabat2 bins for Gp0127640", + "url": "https://data.microbiomedata.org/data/nmdc:mga06rnc11/MAGs/nmdc_mga06rnc11_metabat_bin.zip", + "md5_checksum": "d1cf2992bd60e25032eedeb09858d14b", + "id": "nmdc:d1cf2992bd60e25032eedeb09858d14b", + "file_size_bytes": 345388 + } + ], + "dissolving_activity_set": [], + "functional_annotation_set": [], + "genome_feature_set": [], + "mags_activity_set": [ { "_id": { - "$oid": "649b0052ec087f6bbab34714" + "$oid": "649b0052ec087f6bbab3471a" }, "has_input": [ - "nmdc:16f77f4aaed29f3acc31646e1ce06b2d", - "nmdc:ecf498b9aa15f9d000845ffdfa7eb521", - "nmdc:349838000a53b6655a5b12edf6351c50" + "nmdc:b85a322271c7f93ef295141d12cb2dbc", + "nmdc:78b554dd52492c3d1e401d0c9198b89b", + "nmdc:e84b1e43d546c9793c3a4d9eaa8cee86" ], - "too_short_contig_num": 44192, + "too_short_contig_num": 104867, "part_of": [ - "nmdc:mga0bpf635" + "nmdc:mga06rnc11" ], - "binned_contig_num": 157, + "binned_contig_num": 213, "git_url": "https://github.com/microbiomedata/metaG/releases/tag/0.1", "has_output": [ "nmdc:d41d8cd98f00b204e9800998ecf8427e", - "nmdc:4857d71459f50147c8ae97ffce40caa5", - "nmdc:65522bf77241109a74354d0e294597f9", - "nmdc:30d6c9fb23abb0849991fad01e0393f1", - "nmdc:a76c8c9034b877334a75e7c0b7c2c830", - "nmdc:9d712c5924d6d0ee6d7305918e69302d" + "nmdc:ce395376d0bc7121e4dc5efc774d5e74", + "nmdc:a16cbb06b91ebfb45f5a010effc1cfde", + "nmdc:97ae130ca2f75c66b8cbd60c4d35463a", + "nmdc:5945311235c6195ad409ab30e2b72c0c", + "nmdc:d1cf2992bd60e25032eedeb09858d14b" ], - "was_informed_by": "gold:Gp0127644", - "input_contig_num": 46135, - "id": "nmdc:9990ae6d30bab3988f258ae8224acd89", + "was_informed_by": "gold:Gp0127640", + "input_contig_num": 106665, + "id": "nmdc:414c4647eddd8081308d92da2d59815e", "execution_resource": "NERSC-Cori", - "name": "MAGs Analysis Activity for nmdc:mga0bpf635", + "name": "MAGs Analysis Activity for nmdc:mga06rnc11", "mags_list": [ { - "number_of_contig": 157, - "completeness": 39.0, + "number_of_contig": 213, + "completeness": 48.94, "bin_name": "bins.1", - "gene_count": 891, + "gene_count": 1422, "bin_quality": "LQ", "gtdbtk_species": "", "gtdbtk_order": "", - "num_16s": 1, + "num_16s": 0, "gtdbtk_family": "", "gtdbtk_domain": "", - "contamination": 0.0, + "contamination": 0.97, "gtdbtk_class": "", "gtdbtk_phylum": "", "num_5s": 0, "num_23s": 0, "gtdbtk_genus": "", - "num_t_rna": 19 + "num_t_rna": 30 } ], - "unbinned_contig_num": 1786, - "started_at_time": "2021-10-11T02:26:47Z", + "unbinned_contig_num": 1585, + "started_at_time": "2021-10-11T02:24:27Z", "type": "nmdc:MAGsAnalysisActivity", - "ended_at_time": "2021-10-11T02:55:00+00:00", - "output_data_objects": [ - { - "name": "gold:Gp0452679_metabat2 bin checkm quality assessment result", - "description": "metabat2 bin checkm quality assessment result for gold:Gp0452679", - "data_object_type": "CheckM Statistics", - "url": "https://data.microbiomedata.org/data/nmdc:mga0fsjy84/MAGs/nmdc_mga0fsjy84_checkm_qa.out", - "md5_checksum": "d41d8cd98f00b204e9800998ecf8427e", - "id": "nmdc:d41d8cd98f00b204e9800998ecf8427e", - "file_size_bytes": 0 - }, - { - "name": "Gp0127644_tooShort (< 3kb) filtered contigs fasta file by metaBat2", - "description": "tooShort (< 3kb) filtered contigs fasta file by metaBat2 for Gp0127644", - "url": "https://data.microbiomedata.org/data/nmdc:mga0bpf635/MAGs/nmdc_mga0bpf635_bins.tooShort.fa", - "md5_checksum": "4857d71459f50147c8ae97ffce40caa5", - "id": "nmdc:4857d71459f50147c8ae97ffce40caa5", - "file_size_bytes": 18310651 - }, - { - "name": "Gp0127644_unbinned fasta file from metabat2", - "description": "unbinned fasta file from metabat2 for Gp0127644", - "url": "https://data.microbiomedata.org/data/nmdc:mga0bpf635/MAGs/nmdc_mga0bpf635_bins.unbinned.fa", - "md5_checksum": "65522bf77241109a74354d0e294597f9", - "id": "nmdc:65522bf77241109a74354d0e294597f9", - "file_size_bytes": 2858628 - }, - { - "name": "Gp0127644_metabat2 bin checkm quality assessment result", - "description": "metabat2 bin checkm quality assessment result for Gp0127644", - "data_object_type": "CheckM Statistics", - "url": "https://data.microbiomedata.org/data/nmdc:mga0bpf635/MAGs/nmdc_mga0bpf635_checkm_qa.out", - "md5_checksum": "30d6c9fb23abb0849991fad01e0393f1", - "id": "nmdc:30d6c9fb23abb0849991fad01e0393f1", - "file_size_bytes": 760 - }, - { - "name": "Gp0127644_high-quality and medium-quality bins", - "description": "high-quality and medium-quality bins for Gp0127644", - "data_object_type": "Metagenome Bins", - "url": "https://data.microbiomedata.org/data/nmdc:mga0bpf635/MAGs/nmdc_mga0bpf635_hqmq_bin.zip", - "md5_checksum": "a76c8c9034b877334a75e7c0b7c2c830", - "id": "nmdc:a76c8c9034b877334a75e7c0b7c2c830", - "file_size_bytes": 182 - }, - { - "name": "Gp0127644_metabat2 bins", - "description": "metabat2 bins for Gp0127644", - "url": "https://data.microbiomedata.org/data/nmdc:mga0bpf635/MAGs/nmdc_mga0bpf635_metabat_bin.zip", - "md5_checksum": "9d712c5924d6d0ee6d7305918e69302d", - "id": "nmdc:9d712c5924d6d0ee6d7305918e69302d", - "file_size_bytes": 218004 - } - ] + "ended_at_time": "2021-10-11T04:33:17+00:00" } - ] - }, - { - "_id": { - "$oid": "649b009773e8249959349b58" - }, - "id": "nmdc:omprc-11-vnnn4722", - "name": "Riverbed sediment microbial communities from areas with no vegetation in Columbia River, Washington, USA - GW-RW N1_30_40", - "description": "Riverbed sediment samples were collected from an area with no vegetation in a hyporheic zone (subsurface groundwater - surface water mixing zone)", - "has_input": [ - "nmdc:bsm-11-tzp60785" - ], - "has_output": [ - "jgi:574fde667ded5e3df1ee1407" - ], - "part_of": [ - "nmdc:sty-11-aygzgv51" - ], - "add_date": "2016-01-11", - "mod_date": "2021-06-15", - "ncbi_project_name": "Riverbed sediment microbial communities from areas with no vegetation in Columbia River, Washington, USA - GW-RW N1_30_40", - "omics_type": { - "has_raw_value": "Metagenome" - }, - "principal_investigator": { - "has_raw_value": "James Stegen" - }, - "processing_institution": "JGI", - "type": "nmdc:OmicsProcessing", - "gold_sequencing_project_identifiers": [ - "gold:Gp0127639" - ], - "downstream_workflow_activity_records": [ + ], + "material_sample_set": [], + "material_sampling_activity_set": [], + "metabolomics_analysis_activity_set": [], + "metagenome_annotation_activity_set": [ { "_id": { - "$oid": "649b009d6bdd4fd20273c871" + "$oid": "649b005bbf2caae0415ef9bb" }, "has_input": [ - "nmdc:ae9087ed8e1ead2407bca45a47725633" + "nmdc:b85a322271c7f93ef295141d12cb2dbc" ], "part_of": [ - "nmdc:mga09wpw60" + "nmdc:mga06rnc11" ], "git_url": "https://github.com/microbiomedata/metaG/releases/tag/0.1", "has_output": [ - "nmdc:833077b40372c6daa20beaed04ed0ae1", - "nmdc:b68178eebde030fad0850797adbb2624" + "nmdc:13e64b02d230f76008e42256a48d1cec", + "nmdc:7babb0c9f662679659b7b1bee469f073", + "nmdc:e84b1e43d546c9793c3a4d9eaa8cee86", + "nmdc:2e3e5b7ffa39e533db8ed1d925426f50", + "nmdc:62e46d35a6aff3a52b39c6bb04dc6161", + "nmdc:93fa7de9c74cfcff99bb74e27fa94674", + "nmdc:63bad86a6d7fb23b5a4683ae36820622", + "nmdc:d6b80bb748b4d6fbe52c15300ad2137b", + "nmdc:46722961c280df725d15489e82502031", + "nmdc:6f1a0029cb25f1433de1d7c241bc7553", + "nmdc:6d2839963f616d810e66435b3bbe018a", + "nmdc:efbf36ca49c40ad0367ecd23c012b29b" ], - "was_informed_by": "gold:Gp0127639", - "input_read_count": 23535784, - "output_read_bases": 2989527376, - "id": "nmdc:b8fea91a695311b43660f2e7044ad15c", + "was_informed_by": "gold:Gp0127640", + "id": "nmdc:414c4647eddd8081308d92da2d59815e", "execution_resource": "NERSC-Cori", - "input_read_bases": 3553903384, - "name": "Read QC Activity for nmdc:mga09wpw60", - "output_read_count": 20011156, - "started_at_time": "2021-10-11T02:27:08Z", - "type": "nmdc:ReadQCAnalysisActivity", - "ended_at_time": "2021-10-11T03:27:12+00:00", - "output_data_objects": [ - { - "name": "Gp0127639_Filtered Reads", - "description": "Filtered Reads for Gp0127639", - "data_object_type": "Filtered Sequencing Reads", - "url": "https://data.microbiomedata.org/data/nmdc:mga09wpw60/qa/nmdc_mga09wpw60_filtered.fastq.gz", - "md5_checksum": "833077b40372c6daa20beaed04ed0ae1", - "id": "nmdc:833077b40372c6daa20beaed04ed0ae1", - "file_size_bytes": 1585232805 - }, - { - "name": "Gp0127639_Filtered Stats", - "description": "Filtered Stats for Gp0127639", - "data_object_type": "QC Statistics", - "url": "https://data.microbiomedata.org/data/nmdc:mga09wpw60/qa/nmdc_mga09wpw60_filterStats.txt", - "md5_checksum": "b68178eebde030fad0850797adbb2624", - "id": "nmdc:b68178eebde030fad0850797adbb2624", - "file_size_bytes": 289 - } - ] - }, + "name": "Annotation Activity for nmdc:mga06rnc11", + "started_at_time": "2021-10-11T02:24:27Z", + "type": "nmdc:MetagenomeAnnotationActivity", + "ended_at_time": "2021-10-11T04:33:17+00:00" + } + ], + "metagenome_assembly_set": [ { "_id": { - "$oid": "649b009bff710ae353f8cf46" + "$oid": "649b005f2ca5ee4adb139fa6" }, "has_input": [ - "nmdc:833077b40372c6daa20beaed04ed0ae1" + "nmdc:534c94e20d292a6bf09c0a42b550b4c2" + ], + "part_of": [ + "nmdc:mga06rnc11" ], + "ctg_logsum": 42879, + "scaf_logsum": 42987, + "gap_pct": 0.0005, "git_url": "https://github.com/microbiomedata/metaG/releases/tag/0.1", "has_output": [ - "nmdc:514172bb91ef3b125ae2d001b47bff0b", - "nmdc:82f072d1931154fbc722531d3d0dc41c", - "nmdc:62a817ebcbfaf2c8feb1abedc35a736f", - "nmdc:81281fef2c0778516a84b3a672cc0230", - "nmdc:86ae054ba9def1126579c8f76db8a07a", - "nmdc:9db20a88fa3d02eb00f64d1671ef8521", - "nmdc:848fc10ed4365047cb139a4b40303808", - "nmdc:94e422e0bae86c608fba1c3815e08e92", - "nmdc:c6eb85143a2489921c53f8184d536129" + "nmdc:b85a322271c7f93ef295141d12cb2dbc", + "nmdc:794445b3fedfaec8af9b70b167bc6852", + "nmdc:d389ae4f8a92c21423fc77aa054ba985", + "nmdc:765541c2865f6047d5e2e8e7299908e4", + "nmdc:78b554dd52492c3d1e401d0c9198b89b" ], - "was_informed_by": "gold:Gp0127639", - "id": "nmdc:b8fea91a695311b43660f2e7044ad15c", + "asm_score": 5.471, + "was_informed_by": "gold:Gp0127640", + "ctg_powsum": 4901.253, + "scaf_max": 27880, + "id": "nmdc:414c4647eddd8081308d92da2d59815e", + "scaf_powsum": 4913.296, "execution_resource": "NERSC-Cori", - "name": "ReadBased Analysis Activity for nmdc:mga09wpw60", - "started_at_time": "2021-10-11T02:27:08Z", - "type": "nmdc:ReadbasedAnalysis", - "ended_at_time": "2021-10-11T03:27:12+00:00", - "output_data_objects": [ - { - "name": "Gp0127639_Gottcha2 TSV report", - "description": "Gottcha2 TSV report for Gp0127639", - "url": "https://data.microbiomedata.org/data/nmdc:mga09wpw60/ReadbasedAnalysis/nmdc_mga09wpw60_gottcha2_report.tsv", - "md5_checksum": "514172bb91ef3b125ae2d001b47bff0b", - "id": "nmdc:514172bb91ef3b125ae2d001b47bff0b", - "file_size_bytes": 648 - }, - { - "name": "Gp0127639_Gottcha2 full TSV report", - "description": "Gottcha2 full TSV report for Gp0127639", - "url": "https://data.microbiomedata.org/data/nmdc:mga09wpw60/ReadbasedAnalysis/nmdc_mga09wpw60_gottcha2_report_full.tsv", - "md5_checksum": "82f072d1931154fbc722531d3d0dc41c", - "id": "nmdc:82f072d1931154fbc722531d3d0dc41c", - "file_size_bytes": 588644 - }, - { - "name": "Gp0127639_Gottcha2 Krona HTML report", - "description": "Gottcha2 Krona HTML report for Gp0127639", - "data_object_type": "GOTTCHA2 Krona Plot", - "url": "https://data.microbiomedata.org/data/nmdc:mga09wpw60/ReadbasedAnalysis/nmdc_mga09wpw60_gottcha2_krona.html", - "md5_checksum": "62a817ebcbfaf2c8feb1abedc35a736f", - "id": "nmdc:62a817ebcbfaf2c8feb1abedc35a736f", - "file_size_bytes": 228175 - }, - { - "name": "Gp0127639_Centrifuge classification TSV report", - "description": "Centrifuge classification TSV report for Gp0127639", - "data_object_type": "Centrifuge Taxonomic Classification", - "url": "https://data.microbiomedata.org/data/nmdc:mga09wpw60/ReadbasedAnalysis/nmdc_mga09wpw60_centrifuge_classification.tsv", - "md5_checksum": "81281fef2c0778516a84b3a672cc0230", - "id": "nmdc:81281fef2c0778516a84b3a672cc0230", - "file_size_bytes": 1468498728 - }, - { - "name": "Gp0127639_Centrifuge TSV report", - "description": "Centrifuge TSV report for Gp0127639", - "data_object_type": "Centrifuge Classification Report", - "url": "https://data.microbiomedata.org/data/nmdc:mga09wpw60/ReadbasedAnalysis/nmdc_mga09wpw60_centrifuge_report.tsv", - "md5_checksum": "86ae054ba9def1126579c8f76db8a07a", - "id": "nmdc:86ae054ba9def1126579c8f76db8a07a", - "file_size_bytes": 251338 - }, - { - "name": "Gp0127639_Centrifuge Krona HTML report", - "description": "Centrifuge Krona HTML report for Gp0127639", - "data_object_type": "Centrifuge Krona Plot", - "url": "https://data.microbiomedata.org/data/nmdc:mga09wpw60/ReadbasedAnalysis/nmdc_mga09wpw60_centrifuge_krona.html", - "md5_checksum": "9db20a88fa3d02eb00f64d1671ef8521", - "id": "nmdc:9db20a88fa3d02eb00f64d1671ef8521", - "file_size_bytes": 2322720 - }, - { - "name": "Gp0127639_Kraken classification TSV report", - "description": "Kraken classification TSV report for Gp0127639", - "data_object_type": "Kraken2 Taxonomic Classification", - "url": "https://data.microbiomedata.org/data/nmdc:mga09wpw60/ReadbasedAnalysis/nmdc_mga09wpw60_kraken2_classification.tsv", - "md5_checksum": "848fc10ed4365047cb139a4b40303808", - "id": "nmdc:848fc10ed4365047cb139a4b40303808", - "file_size_bytes": 1168015909 - }, - { - "name": "Gp0127639_Kraken2 TSV report", - "description": "Kraken2 TSV report for Gp0127639", - "data_object_type": "Kraken2 Classification Report", - "url": "https://data.microbiomedata.org/data/nmdc:mga09wpw60/ReadbasedAnalysis/nmdc_mga09wpw60_kraken2_report.tsv", - "md5_checksum": "94e422e0bae86c608fba1c3815e08e92", - "id": "nmdc:94e422e0bae86c608fba1c3815e08e92", - "file_size_bytes": 616202 - }, - { - "name": "Gp0127639_Kraken2 Krona HTML report", - "description": "Kraken2 Krona HTML report for Gp0127639", - "data_object_type": "Kraken2 Krona Plot", - "url": "https://data.microbiomedata.org/data/nmdc:mga09wpw60/ReadbasedAnalysis/nmdc_mga09wpw60_kraken2_krona.html", - "md5_checksum": "c6eb85143a2489921c53f8184d536129", - "id": "nmdc:c6eb85143a2489921c53f8184d536129", - "file_size_bytes": 3863456 - } - ] - }, + "contigs": 106665, + "name": "Assembly Activity for nmdc:mga06rnc11", + "ctg_max": 27880, + "gc_std": 0.10189, + "contig_bp": 40331509, + "gc_avg": 0.58648, + "started_at_time": "2021-10-11T02:24:27Z", + "scaf_bp": 40331709, + "type": "nmdc:MetagenomeAssembly", + "scaffolds": 106645, + "ended_at_time": "2021-10-11T04:33:17+00:00", + "ctg_l50": 336, + "ctg_l90": 282, + "ctg_n50": 38543, + "ctg_n90": 94525, + "scaf_l50": 336, + "scaf_l90": 282, + "scaf_n50": 38534, + "scaf_n90": 94506 + } + ], + "metagenome_sequencing_activity_set": [], + "metaproteomics_analysis_activity_set": [], + "metatranscriptome_activity_set": [], + "nom_analysis_activity_set": [], + "omics_processing_set": [ { "_id": { - "$oid": "61e7199a833bcf838a700d65" + "$oid": "649b009773e8249959349b54" }, + "id": "nmdc:omprc-11-932jcd76", + "name": "Riverbed sediment microbial communities from areas with no vegetation in Columbia River, Washington, USA - GW-RW N3_50_60", + "description": "Riverbed sediment samples were collected from an area with no vegetation in a hyporheic zone (subsurface groundwater - surface water mixing zone)", "has_input": [ - "nmdc:833077b40372c6daa20beaed04ed0ae1" + "nmdc:bsm-11-pvcgp635" ], - "part_of": [ - "nmdc:mga09wpw60" - ], - "git_url": "https://github.com/microbiomedata/metaG/releases/tag/0.1", "has_output": [ - "nmdc:514172bb91ef3b125ae2d001b47bff0b", - "nmdc:82f072d1931154fbc722531d3d0dc41c", - "nmdc:62a817ebcbfaf2c8feb1abedc35a736f", - "nmdc:81281fef2c0778516a84b3a672cc0230", - "nmdc:86ae054ba9def1126579c8f76db8a07a", - "nmdc:9db20a88fa3d02eb00f64d1671ef8521", - "nmdc:848fc10ed4365047cb139a4b40303808", - "nmdc:94e422e0bae86c608fba1c3815e08e92", - "nmdc:c6eb85143a2489921c53f8184d536129" + "jgi:574fe0a17ded5e3df1ee148a" ], - "was_informed_by": "gold:Gp0127639", - "id": "nmdc:b8fea91a695311b43660f2e7044ad15c", - "execution_resource": "NERSC-Cori", - "name": "ReadBased Analysis Activity for nmdc:mga09wpw60", - "started_at_time": "2021-10-11T02:27:08Z", - "type": "nmdc:ReadbasedAnalysis", - "ended_at_time": "2021-10-11T03:27:12+00:00", - "output_data_objects": [ - { - "name": "Gp0127639_Gottcha2 TSV report", - "description": "Gottcha2 TSV report for Gp0127639", - "url": "https://data.microbiomedata.org/data/nmdc:mga09wpw60/ReadbasedAnalysis/nmdc_mga09wpw60_gottcha2_report.tsv", - "md5_checksum": "514172bb91ef3b125ae2d001b47bff0b", - "id": "nmdc:514172bb91ef3b125ae2d001b47bff0b", - "file_size_bytes": 648 - }, - { - "name": "Gp0127639_Gottcha2 full TSV report", - "description": "Gottcha2 full TSV report for Gp0127639", - "url": "https://data.microbiomedata.org/data/nmdc:mga09wpw60/ReadbasedAnalysis/nmdc_mga09wpw60_gottcha2_report_full.tsv", - "md5_checksum": "82f072d1931154fbc722531d3d0dc41c", - "id": "nmdc:82f072d1931154fbc722531d3d0dc41c", - "file_size_bytes": 588644 - }, - { - "name": "Gp0127639_Gottcha2 Krona HTML report", - "description": "Gottcha2 Krona HTML report for Gp0127639", - "data_object_type": "GOTTCHA2 Krona Plot", - "url": "https://data.microbiomedata.org/data/nmdc:mga09wpw60/ReadbasedAnalysis/nmdc_mga09wpw60_gottcha2_krona.html", - "md5_checksum": "62a817ebcbfaf2c8feb1abedc35a736f", - "id": "nmdc:62a817ebcbfaf2c8feb1abedc35a736f", - "file_size_bytes": 228175 - }, - { - "name": "Gp0127639_Centrifuge classification TSV report", - "description": "Centrifuge classification TSV report for Gp0127639", - "data_object_type": "Centrifuge Taxonomic Classification", - "url": "https://data.microbiomedata.org/data/nmdc:mga09wpw60/ReadbasedAnalysis/nmdc_mga09wpw60_centrifuge_classification.tsv", - "md5_checksum": "81281fef2c0778516a84b3a672cc0230", - "id": "nmdc:81281fef2c0778516a84b3a672cc0230", - "file_size_bytes": 1468498728 - }, - { - "name": "Gp0127639_Centrifuge TSV report", - "description": "Centrifuge TSV report for Gp0127639", - "data_object_type": "Centrifuge Classification Report", - "url": "https://data.microbiomedata.org/data/nmdc:mga09wpw60/ReadbasedAnalysis/nmdc_mga09wpw60_centrifuge_report.tsv", - "md5_checksum": "86ae054ba9def1126579c8f76db8a07a", - "id": "nmdc:86ae054ba9def1126579c8f76db8a07a", - "file_size_bytes": 251338 - }, - { - "name": "Gp0127639_Centrifuge Krona HTML report", - "description": "Centrifuge Krona HTML report for Gp0127639", - "data_object_type": "Centrifuge Krona Plot", - "url": "https://data.microbiomedata.org/data/nmdc:mga09wpw60/ReadbasedAnalysis/nmdc_mga09wpw60_centrifuge_krona.html", - "md5_checksum": "9db20a88fa3d02eb00f64d1671ef8521", - "id": "nmdc:9db20a88fa3d02eb00f64d1671ef8521", - "file_size_bytes": 2322720 - }, - { - "name": "Gp0127639_Kraken classification TSV report", - "description": "Kraken classification TSV report for Gp0127639", - "data_object_type": "Kraken2 Taxonomic Classification", - "url": "https://data.microbiomedata.org/data/nmdc:mga09wpw60/ReadbasedAnalysis/nmdc_mga09wpw60_kraken2_classification.tsv", - "md5_checksum": "848fc10ed4365047cb139a4b40303808", - "id": "nmdc:848fc10ed4365047cb139a4b40303808", - "file_size_bytes": 1168015909 - }, - { - "name": "Gp0127639_Kraken2 TSV report", - "description": "Kraken2 TSV report for Gp0127639", - "data_object_type": "Kraken2 Classification Report", - "url": "https://data.microbiomedata.org/data/nmdc:mga09wpw60/ReadbasedAnalysis/nmdc_mga09wpw60_kraken2_report.tsv", - "md5_checksum": "94e422e0bae86c608fba1c3815e08e92", - "id": "nmdc:94e422e0bae86c608fba1c3815e08e92", - "file_size_bytes": 616202 - }, - { - "name": "Gp0127639_Kraken2 Krona HTML report", - "description": "Kraken2 Krona HTML report for Gp0127639", - "data_object_type": "Kraken2 Krona Plot", - "url": "https://data.microbiomedata.org/data/nmdc:mga09wpw60/ReadbasedAnalysis/nmdc_mga09wpw60_kraken2_krona.html", - "md5_checksum": "c6eb85143a2489921c53f8184d536129", - "id": "nmdc:c6eb85143a2489921c53f8184d536129", - "file_size_bytes": 3863456 - } + "part_of": [ + "nmdc:sty-11-aygzgv51" + ], + "add_date": "2016-01-11", + "mod_date": "2021-06-15", + "ncbi_project_name": "Riverbed sediment microbial communities from areas with no vegetation in Columbia River, Washington, USA - GW-RW N3_50_60", + "omics_type": { + "has_raw_value": "Metagenome" + }, + "principal_investigator": { + "has_raw_value": "James Stegen" + }, + "processing_institution": "JGI", + "type": "nmdc:OmicsProcessing", + "gold_sequencing_project_identifiers": [ + "gold:Gp0127640" ] - }, + } + ], + "reaction_activity_set": [], + "read_qc_analysis_activity_set": [ { "_id": { - "$oid": "649b005f2ca5ee4adb139fa3" + "$oid": "649b009d6bdd4fd20273c875" }, "has_input": [ - "nmdc:833077b40372c6daa20beaed04ed0ae1" + "nmdc:0094fcbe3a051a8000b8823c8db540f8" ], "part_of": [ - "nmdc:mga09wpw60" + "nmdc:mga06rnc11" ], - "ctg_logsum": 317684, - "scaf_logsum": 318786, - "gap_pct": 0.0017, "git_url": "https://github.com/microbiomedata/metaG/releases/tag/0.1", "has_output": [ - "nmdc:2b73310c6eef1ece5bb01f235b22fdbd", - "nmdc:8f14c016997dd96f70f547df930717be", - "nmdc:5966e5e32744a14549b19b4c92a606a5", - "nmdc:1fcd489b3ae86a76bf297cc19b50392d", - "nmdc:5b90d13539ce840980db101fa7c1df96" + "nmdc:534c94e20d292a6bf09c0a42b550b4c2", + "nmdc:db5ccad12d6ddb46947fbd815aae7f9a" ], - "asm_score": 3.397, - "was_informed_by": "gold:Gp0127639", - "ctg_powsum": 34356, - "scaf_max": 19860, - "id": "nmdc:b8fea91a695311b43660f2e7044ad15c", - "scaf_powsum": 34485, + "was_informed_by": "gold:Gp0127640", + "input_read_count": 28754670, + "output_read_bases": 4186416440, + "id": "nmdc:414c4647eddd8081308d92da2d59815e", "execution_resource": "NERSC-Cori", - "contigs": 212560, - "name": "Assembly Activity for nmdc:mga09wpw60", - "ctg_max": 19860, - "gc_std": 0.09375, - "contig_bp": 112053293, - "gc_avg": 0.63186, - "started_at_time": "2021-10-11T02:27:08Z", - "scaf_bp": 112055193, - "type": "nmdc:MetagenomeAssembly", - "scaffolds": 212379, - "ended_at_time": "2021-10-11T03:27:12+00:00", - "ctg_l50": 538, - "ctg_l90": 298, - "ctg_n50": 55584, - "ctg_n90": 173977, - "scaf_l50": 539, - "scaf_l90": 298, - "scaf_n50": 55395, - "scaf_n90": 173826, - "output_data_objects": [ - { - "name": "Gp0127639_Assembled contigs fasta", - "description": "Assembled contigs fasta for Gp0127639", - "data_object_type": "Assembly Contigs", - "url": "https://data.microbiomedata.org/data/nmdc:mga09wpw60/assembly/nmdc_mga09wpw60_contigs.fna", - "md5_checksum": "2b73310c6eef1ece5bb01f235b22fdbd", - "id": "nmdc:2b73310c6eef1ece5bb01f235b22fdbd", - "file_size_bytes": 120497476 - }, - { - "name": "Gp0127639_Assembled scaffold fasta", - "description": "Assembled scaffold fasta for Gp0127639", - "data_object_type": "Assembly Scaffolds", - "url": "https://data.microbiomedata.org/data/nmdc:mga09wpw60/assembly/nmdc_mga09wpw60_scaffolds.fna", - "md5_checksum": "8f14c016997dd96f70f547df930717be", - "id": "nmdc:8f14c016997dd96f70f547df930717be", - "file_size_bytes": 119857107 - }, - { - "name": "Gp0127639_Metagenome Contig Coverage Stats", - "description": "Metagenome Contig Coverage Stats for Gp0127639", - "url": "https://data.microbiomedata.org/data/nmdc:mga09wpw60/assembly/nmdc_mga09wpw60_covstats.txt", - "md5_checksum": "5966e5e32744a14549b19b4c92a606a5", - "id": "nmdc:5966e5e32744a14549b19b4c92a606a5", - "file_size_bytes": 16872665 - }, - { - "name": "Gp0127639_Assembled AGP file", - "description": "Assembled AGP file for Gp0127639", - "data_object_type": "Assembly AGP", - "url": "https://data.microbiomedata.org/data/nmdc:mga09wpw60/assembly/nmdc_mga09wpw60_assembly.agp", - "md5_checksum": "1fcd489b3ae86a76bf297cc19b50392d", - "id": "nmdc:1fcd489b3ae86a76bf297cc19b50392d", - "file_size_bytes": 15768901 - }, - { - "name": "Gp0127639_Metagenome Alignment BAM file", - "description": "Metagenome Alignment BAM file for Gp0127639", - "data_object_type": "Assembly Coverage BAM", - "url": "https://data.microbiomedata.org/data/nmdc:mga09wpw60/assembly/nmdc_mga09wpw60_pairedMapped_sorted.bam", - "md5_checksum": "5b90d13539ce840980db101fa7c1df96", - "id": "nmdc:5b90d13539ce840980db101fa7c1df96", - "file_size_bytes": 1779135536 - } - ] - }, + "input_read_bases": 4341955170, + "name": "Read QC Activity for nmdc:mga06rnc11", + "output_read_count": 27981268, + "started_at_time": "2021-10-11T02:24:27Z", + "type": "nmdc:ReadQCAnalysisActivity", + "ended_at_time": "2021-10-11T04:33:17+00:00" + } + ], + "read_based_taxonomy_analysis_activity_set": [ { "_id": { - "$oid": "649b005bbf2caae0415ef9b7" + "$oid": "649b009bff710ae353f8cf3a" }, "has_input": [ - "nmdc:2b73310c6eef1ece5bb01f235b22fdbd" - ], - "part_of": [ - "nmdc:mga09wpw60" + "nmdc:534c94e20d292a6bf09c0a42b550b4c2" ], "git_url": "https://github.com/microbiomedata/metaG/releases/tag/0.1", "has_output": [ - "nmdc:6c09d55cfb8872b30eb1832394f80beb", - "nmdc:2e3cc72d21590667259f6356882ce63b", - "nmdc:2dee5eaa50c8eeb6e3bc8471501d9964", - "nmdc:7ec4cfdd88352d703a2bb64b99bd56c5", - "nmdc:16bedd944e5e836924c28b006026c348", - "nmdc:8764070f565c50998968e0739420f5cc", - "nmdc:9e6accc90d61ea572819dcdb591e41a7", - "nmdc:32b9518ee41cadb157f3c0f9ec91476c", - "nmdc:432d591bd525ae429e837431d44954f7", - "nmdc:3120d5d5d27d142f898f70a8cc1b076e", - "nmdc:d37ff61fdae942030a1b07e855cf1abd", - "nmdc:56995366ba4186639a8ff4fd4defbd5e" + "nmdc:7e79b2eba131ed6df71a56f47b1b901f", + "nmdc:bc82dcb8151fc20c22be71b6531a1fb2", + "nmdc:d5e45563875efca0653ba2dd47ee3d68", + "nmdc:bf5aa70f6ff14da2ef1393124ec29c4d", + "nmdc:61f1f6d57fd4d445682e25ec34901721", + "nmdc:7c31728fc2a51c8d202f9f74b1919886", + "nmdc:f36c2b28e63d21ca4d9e84035450c8e1", + "nmdc:e2939606fc9ff1c0046b333e1740f258", + "nmdc:d47144fd7ec0608e7677550d9589c889" ], - "was_informed_by": "gold:Gp0127639", - "id": "nmdc:b8fea91a695311b43660f2e7044ad15c", + "was_informed_by": "gold:Gp0127640", + "id": "nmdc:414c4647eddd8081308d92da2d59815e", "execution_resource": "NERSC-Cori", - "name": "Annotation Activity for nmdc:mga09wpw60", - "started_at_time": "2021-10-11T02:27:08Z", - "type": "nmdc:MetagenomeAnnotationActivity", - "ended_at_time": "2021-10-11T03:27:12+00:00", - "output_data_objects": [ - { - "name": "Gp0127639_Protein FAA", - "description": "Protein FAA for Gp0127639", - "data_object_type": "Annotation Amino Acid FASTA", - "url": "https://data.microbiomedata.org/data/nmdc:mga09wpw60/annotation/nmdc_mga09wpw60_proteins.faa", - "md5_checksum": "6c09d55cfb8872b30eb1832394f80beb", - "id": "nmdc:6c09d55cfb8872b30eb1832394f80beb", - "file_size_bytes": 67573912 - }, - { - "name": "Gp0127639_Structural annotation GFF file", - "description": "Structural annotation GFF file for Gp0127639", - "data_object_type": "Structural Annotation GFF", - "url": "https://data.microbiomedata.org/data/nmdc:mga09wpw60/annotation/nmdc_mga09wpw60_structural_annotation.gff", - "md5_checksum": "2e3cc72d21590667259f6356882ce63b", - "id": "nmdc:2e3cc72d21590667259f6356882ce63b", - "file_size_bytes": 2526 - }, - { - "name": "Gp0127639_Functional annotation GFF file", - "description": "Functional annotation GFF file for Gp0127639", - "data_object_type": "Functional Annotation GFF", - "url": "https://data.microbiomedata.org/data/nmdc:mga09wpw60/annotation/nmdc_mga09wpw60_functional_annotation.gff", - "md5_checksum": "2dee5eaa50c8eeb6e3bc8471501d9964", - "id": "nmdc:2dee5eaa50c8eeb6e3bc8471501d9964", - "file_size_bytes": 75196016 - }, - { - "name": "Gp0127639_KO TSV file", - "description": "KO TSV file for Gp0127639", - "data_object_type": "Annotation KEGG Orthology", - "url": "https://data.microbiomedata.org/data/nmdc:mga09wpw60/annotation/nmdc_mga09wpw60_ko.tsv", - "md5_checksum": "7ec4cfdd88352d703a2bb64b99bd56c5", - "id": "nmdc:7ec4cfdd88352d703a2bb64b99bd56c5", - "file_size_bytes": 8707597 - }, - { - "name": "Gp0127639_EC TSV file", - "description": "EC TSV file for Gp0127639", - "data_object_type": "Annotation Enzyme Commission", - "url": "https://data.microbiomedata.org/data/nmdc:mga09wpw60/annotation/nmdc_mga09wpw60_ec.tsv", - "md5_checksum": "16bedd944e5e836924c28b006026c348", - "id": "nmdc:16bedd944e5e836924c28b006026c348", - "file_size_bytes": 5769544 - }, - { - "name": "Gp0127639_COG GFF file", - "description": "COG GFF file for Gp0127639", - "url": "https://data.microbiomedata.org/data/nmdc:mga09wpw60/annotation/nmdc_mga09wpw60_cog.gff", - "md5_checksum": "8764070f565c50998968e0739420f5cc", - "id": "nmdc:8764070f565c50998968e0739420f5cc", - "file_size_bytes": 45648468 - }, - { - "name": "Gp0127639_PFAM GFF file", - "description": "PFAM GFF file for Gp0127639", - "url": "https://data.microbiomedata.org/data/nmdc:mga09wpw60/annotation/nmdc_mga09wpw60_pfam.gff", - "md5_checksum": "9e6accc90d61ea572819dcdb591e41a7", - "id": "nmdc:9e6accc90d61ea572819dcdb591e41a7", - "file_size_bytes": 34995151 - }, - { - "name": "Gp0127639_TigrFam GFF file", - "description": "TigrFam GFF file for Gp0127639", - "url": "https://data.microbiomedata.org/data/nmdc:mga09wpw60/annotation/nmdc_mga09wpw60_tigrfam.gff", - "md5_checksum": "32b9518ee41cadb157f3c0f9ec91476c", - "id": "nmdc:32b9518ee41cadb157f3c0f9ec91476c", - "file_size_bytes": 4060116 - }, - { - "name": "Gp0127639_SMART GFF file", - "description": "SMART GFF file for Gp0127639", - "url": "https://data.microbiomedata.org/data/nmdc:mga09wpw60/annotation/nmdc_mga09wpw60_smart.gff", - "md5_checksum": "432d591bd525ae429e837431d44954f7", - "id": "nmdc:432d591bd525ae429e837431d44954f7", - "file_size_bytes": 10056742 - }, - { - "name": "Gp0127639_SuperFam GFF file", - "description": "SuperFam GFF file for Gp0127639", - "url": "https://data.microbiomedata.org/data/nmdc:mga09wpw60/annotation/nmdc_mga09wpw60_supfam.gff", - "md5_checksum": "3120d5d5d27d142f898f70a8cc1b076e", - "id": "nmdc:3120d5d5d27d142f898f70a8cc1b076e", - "file_size_bytes": 56435804 - }, - { - "name": "Gp0127639_Cath FunFam GFF file", - "description": "Cath FunFam GFF file for Gp0127639", - "url": "https://data.microbiomedata.org/data/nmdc:mga09wpw60/annotation/nmdc_mga09wpw60_cath_funfam.gff", - "md5_checksum": "d37ff61fdae942030a1b07e855cf1abd", - "id": "nmdc:d37ff61fdae942030a1b07e855cf1abd", - "file_size_bytes": 43456195 - }, - { - "name": "Gp0127639_KO_EC GFF file", - "description": "KO_EC GFF file for Gp0127639", - "url": "https://data.microbiomedata.org/data/nmdc:mga09wpw60/annotation/nmdc_mga09wpw60_ko_ec.gff", - "md5_checksum": "56995366ba4186639a8ff4fd4defbd5e", - "id": "nmdc:56995366ba4186639a8ff4fd4defbd5e", - "file_size_bytes": 27657123 - } - ] - }, + "name": "ReadBased Analysis Activity for nmdc:mga06rnc11", + "started_at_time": "2021-10-11T02:24:27Z", + "type": "nmdc:ReadbasedAnalysis", + "ended_at_time": "2021-10-11T04:33:17+00:00" + } + ], + "study_set": [], + "field_research_site_set": [], + "collecting_biosamples_from_site_set": [], + "date_created": null, + "etl_software_version": null, + "pooling_set": [], + "read_based_analysis_activity_set": [ { "_id": { - "$oid": "649b0052ec087f6bbab3471b" + "$oid": "61e7199d833bcf838a700ec0" }, "has_input": [ - "nmdc:2b73310c6eef1ece5bb01f235b22fdbd", - "nmdc:5b90d13539ce840980db101fa7c1df96", - "nmdc:2dee5eaa50c8eeb6e3bc8471501d9964" + "nmdc:534c94e20d292a6bf09c0a42b550b4c2" ], - "too_short_contig_num": 194918, "part_of": [ - "nmdc:mga09wpw60" + "nmdc:mga06rnc11" ], - "binned_contig_num": 732, "git_url": "https://github.com/microbiomedata/metaG/releases/tag/0.1", "has_output": [ - "nmdc:d41d8cd98f00b204e9800998ecf8427e", - "nmdc:820dbad1b0ddd3c728e77aceee09ea28", - "nmdc:24fbfc69ded61dffff95ba2f8475239c", - "nmdc:1837710887027f94b0f25208edb35cbe", - "nmdc:7072cfd6665082a95b2c09a4bc88760c", - "nmdc:b0db190d9d1093ef87a5efb8a600e9ef" - ], - "was_informed_by": "gold:Gp0127639", - "input_contig_num": 212559, - "id": "nmdc:b8fea91a695311b43660f2e7044ad15c", + "nmdc:7e79b2eba131ed6df71a56f47b1b901f", + "nmdc:bc82dcb8151fc20c22be71b6531a1fb2", + "nmdc:d5e45563875efca0653ba2dd47ee3d68", + "nmdc:bf5aa70f6ff14da2ef1393124ec29c4d", + "nmdc:61f1f6d57fd4d445682e25ec34901721", + "nmdc:7c31728fc2a51c8d202f9f74b1919886", + "nmdc:f36c2b28e63d21ca4d9e84035450c8e1", + "nmdc:e2939606fc9ff1c0046b333e1740f258", + "nmdc:d47144fd7ec0608e7677550d9589c889" + ], + "was_informed_by": "gold:Gp0127640", + "id": "nmdc:414c4647eddd8081308d92da2d59815e", "execution_resource": "NERSC-Cori", - "name": "MAGs Analysis Activity for nmdc:mga09wpw60", + "name": "ReadBased Analysis Activity for nmdc:mga06rnc11", + "started_at_time": "2021-10-11T02:24:27Z", + "type": "nmdc:ReadbasedAnalysis", + "ended_at_time": "2021-10-11T04:33:17+00:00" + } + ] + }, + { + "functional_annotation_agg": [], + "library_preparation_set": [], + "processed_sample_set": [], + "extraction_set": [], + "activity_set": [], + "biosample_set": [], + "data_object_set": [ + { + "name": "Gp0127641_Filtered Reads", + "description": "Filtered Reads for Gp0127641", + "data_object_type": "Filtered Sequencing Reads", + "url": "https://data.microbiomedata.org/data/nmdc:mga0822t33/qa/nmdc_mga0822t33_filtered.fastq.gz", + "md5_checksum": "a2700afe93abad6f004a3701348622a2", + "id": "nmdc:a2700afe93abad6f004a3701348622a2", + "file_size_bytes": 1787020792 + }, + { + "name": "Gp0127641_Filtered Stats", + "description": "Filtered Stats for Gp0127641", + "data_object_type": "QC Statistics", + "url": "https://data.microbiomedata.org/data/nmdc:mga0822t33/qa/nmdc_mga0822t33_filterStats.txt", + "md5_checksum": "aaa9a8a3d8e147116953394a8755742d", + "id": "nmdc:aaa9a8a3d8e147116953394a8755742d", + "file_size_bytes": 289 + }, + { + "name": "Gp0127641_Gottcha2 TSV report", + "description": "Gottcha2 TSV report for Gp0127641", + "url": "https://data.microbiomedata.org/data/nmdc:mga0822t33/ReadbasedAnalysis/nmdc_mga0822t33_gottcha2_report.tsv", + "md5_checksum": "0d021c80bfd39c8293a8b355b8ff3605", + "id": "nmdc:0d021c80bfd39c8293a8b355b8ff3605", + "file_size_bytes": 3331 + }, + { + "name": "Gp0127641_Gottcha2 full TSV report", + "description": "Gottcha2 full TSV report for Gp0127641", + "url": "https://data.microbiomedata.org/data/nmdc:mga0822t33/ReadbasedAnalysis/nmdc_mga0822t33_gottcha2_report_full.tsv", + "md5_checksum": "a42312841b816448d8bd5d3adfa65f58", + "id": "nmdc:a42312841b816448d8bd5d3adfa65f58", + "file_size_bytes": 761359 + }, + { + "name": "Gp0127641_Gottcha2 Krona HTML report", + "description": "Gottcha2 Krona HTML report for Gp0127641", + "data_object_type": "GOTTCHA2 Krona Plot", + "url": "https://data.microbiomedata.org/data/nmdc:mga0822t33/ReadbasedAnalysis/nmdc_mga0822t33_gottcha2_krona.html", + "md5_checksum": "f473f4a99336a49105d2722888ae0510", + "id": "nmdc:f473f4a99336a49105d2722888ae0510", + "file_size_bytes": 236161 + }, + { + "name": "Gp0127641_Centrifuge classification TSV report", + "description": "Centrifuge classification TSV report for Gp0127641", + "data_object_type": "Centrifuge Taxonomic Classification", + "url": "https://data.microbiomedata.org/data/nmdc:mga0822t33/ReadbasedAnalysis/nmdc_mga0822t33_centrifuge_classification.tsv", + "md5_checksum": "ae51ea50660f44fa3b317a45f3015556", + "id": "nmdc:ae51ea50660f44fa3b317a45f3015556", + "file_size_bytes": 1635953327 + }, + { + "name": "Gp0127641_Centrifuge TSV report", + "description": "Centrifuge TSV report for Gp0127641", + "data_object_type": "Centrifuge Classification Report", + "url": "https://data.microbiomedata.org/data/nmdc:mga0822t33/ReadbasedAnalysis/nmdc_mga0822t33_centrifuge_report.tsv", + "md5_checksum": "ef39b44a90c8525e93f45e500b3ae934", + "id": "nmdc:ef39b44a90c8525e93f45e500b3ae934", + "file_size_bytes": 255166 + }, + { + "name": "Gp0127641_Centrifuge Krona HTML report", + "description": "Centrifuge Krona HTML report for Gp0127641", + "data_object_type": "Centrifuge Krona Plot", + "url": "https://data.microbiomedata.org/data/nmdc:mga0822t33/ReadbasedAnalysis/nmdc_mga0822t33_centrifuge_krona.html", + "md5_checksum": "e2653a4ce3f34c235ad7b01e87dd1016", + "id": "nmdc:e2653a4ce3f34c235ad7b01e87dd1016", + "file_size_bytes": 2332521 + }, + { + "name": "Gp0127641_Kraken classification TSV report", + "description": "Kraken classification TSV report for Gp0127641", + "data_object_type": "Kraken2 Taxonomic Classification", + "url": "https://data.microbiomedata.org/data/nmdc:mga0822t33/ReadbasedAnalysis/nmdc_mga0822t33_kraken2_classification.tsv", + "md5_checksum": "869730c4d81163e0c238dd4ae27ebd9e", + "id": "nmdc:869730c4d81163e0c238dd4ae27ebd9e", + "file_size_bytes": 1307934195 + }, + { + "name": "Gp0127641_Kraken2 TSV report", + "description": "Kraken2 TSV report for Gp0127641", + "data_object_type": "Kraken2 Classification Report", + "url": "https://data.microbiomedata.org/data/nmdc:mga0822t33/ReadbasedAnalysis/nmdc_mga0822t33_kraken2_report.tsv", + "md5_checksum": "dc193d1a1693589003f992c820606bab", + "id": "nmdc:dc193d1a1693589003f992c820606bab", + "file_size_bytes": 635050 + }, + { + "name": "Gp0127641_Kraken2 Krona HTML report", + "description": "Kraken2 Krona HTML report for Gp0127641", + "data_object_type": "Kraken2 Krona Plot", + "url": "https://data.microbiomedata.org/data/nmdc:mga0822t33/ReadbasedAnalysis/nmdc_mga0822t33_kraken2_krona.html", + "md5_checksum": "2f36b41c419efa1b1dfb6a9576b965ee", + "id": "nmdc:2f36b41c419efa1b1dfb6a9576b965ee", + "file_size_bytes": 3964515 + }, + { + "name": "Gp0127641_Gottcha2 TSV report", + "description": "Gottcha2 TSV report for Gp0127641", + "url": "https://data.microbiomedata.org/data/nmdc:mga0822t33/ReadbasedAnalysis/nmdc_mga0822t33_gottcha2_report.tsv", + "md5_checksum": "0d021c80bfd39c8293a8b355b8ff3605", + "id": "nmdc:0d021c80bfd39c8293a8b355b8ff3605", + "file_size_bytes": 3331 + }, + { + "name": "Gp0127641_Gottcha2 full TSV report", + "description": "Gottcha2 full TSV report for Gp0127641", + "url": "https://data.microbiomedata.org/data/nmdc:mga0822t33/ReadbasedAnalysis/nmdc_mga0822t33_gottcha2_report_full.tsv", + "md5_checksum": "a42312841b816448d8bd5d3adfa65f58", + "id": "nmdc:a42312841b816448d8bd5d3adfa65f58", + "file_size_bytes": 761359 + }, + { + "name": "Gp0127641_Gottcha2 Krona HTML report", + "description": "Gottcha2 Krona HTML report for Gp0127641", + "data_object_type": "GOTTCHA2 Krona Plot", + "url": "https://data.microbiomedata.org/data/nmdc:mga0822t33/ReadbasedAnalysis/nmdc_mga0822t33_gottcha2_krona.html", + "md5_checksum": "f473f4a99336a49105d2722888ae0510", + "id": "nmdc:f473f4a99336a49105d2722888ae0510", + "file_size_bytes": 236161 + }, + { + "name": "Gp0127641_Centrifuge classification TSV report", + "description": "Centrifuge classification TSV report for Gp0127641", + "data_object_type": "Centrifuge Taxonomic Classification", + "url": "https://data.microbiomedata.org/data/nmdc:mga0822t33/ReadbasedAnalysis/nmdc_mga0822t33_centrifuge_classification.tsv", + "md5_checksum": "ae51ea50660f44fa3b317a45f3015556", + "id": "nmdc:ae51ea50660f44fa3b317a45f3015556", + "file_size_bytes": 1635953327 + }, + { + "name": "Gp0127641_Centrifuge TSV report", + "description": "Centrifuge TSV report for Gp0127641", + "data_object_type": "Centrifuge Classification Report", + "url": "https://data.microbiomedata.org/data/nmdc:mga0822t33/ReadbasedAnalysis/nmdc_mga0822t33_centrifuge_report.tsv", + "md5_checksum": "ef39b44a90c8525e93f45e500b3ae934", + "id": "nmdc:ef39b44a90c8525e93f45e500b3ae934", + "file_size_bytes": 255166 + }, + { + "name": "Gp0127641_Centrifuge Krona HTML report", + "description": "Centrifuge Krona HTML report for Gp0127641", + "data_object_type": "Centrifuge Krona Plot", + "url": "https://data.microbiomedata.org/data/nmdc:mga0822t33/ReadbasedAnalysis/nmdc_mga0822t33_centrifuge_krona.html", + "md5_checksum": "e2653a4ce3f34c235ad7b01e87dd1016", + "id": "nmdc:e2653a4ce3f34c235ad7b01e87dd1016", + "file_size_bytes": 2332521 + }, + { + "name": "Gp0127641_Kraken classification TSV report", + "description": "Kraken classification TSV report for Gp0127641", + "data_object_type": "Kraken2 Taxonomic Classification", + "url": "https://data.microbiomedata.org/data/nmdc:mga0822t33/ReadbasedAnalysis/nmdc_mga0822t33_kraken2_classification.tsv", + "md5_checksum": "869730c4d81163e0c238dd4ae27ebd9e", + "id": "nmdc:869730c4d81163e0c238dd4ae27ebd9e", + "file_size_bytes": 1307934195 + }, + { + "name": "Gp0127641_Kraken2 TSV report", + "description": "Kraken2 TSV report for Gp0127641", + "data_object_type": "Kraken2 Classification Report", + "url": "https://data.microbiomedata.org/data/nmdc:mga0822t33/ReadbasedAnalysis/nmdc_mga0822t33_kraken2_report.tsv", + "md5_checksum": "dc193d1a1693589003f992c820606bab", + "id": "nmdc:dc193d1a1693589003f992c820606bab", + "file_size_bytes": 635050 + }, + { + "name": "Gp0127641_Kraken2 Krona HTML report", + "description": "Kraken2 Krona HTML report for Gp0127641", + "data_object_type": "Kraken2 Krona Plot", + "url": "https://data.microbiomedata.org/data/nmdc:mga0822t33/ReadbasedAnalysis/nmdc_mga0822t33_kraken2_krona.html", + "md5_checksum": "2f36b41c419efa1b1dfb6a9576b965ee", + "id": "nmdc:2f36b41c419efa1b1dfb6a9576b965ee", + "file_size_bytes": 3964515 + }, + { + "name": "Gp0127641_Assembled contigs fasta", + "description": "Assembled contigs fasta for Gp0127641", + "data_object_type": "Assembly Contigs", + "url": "https://data.microbiomedata.org/data/nmdc:mga0822t33/assembly/nmdc_mga0822t33_contigs.fna", + "md5_checksum": "18f0d53f503c855c0093677df58366e0", + "id": "nmdc:18f0d53f503c855c0093677df58366e0", + "file_size_bytes": 102384540 + }, + { + "name": "Gp0127641_Assembled scaffold fasta", + "description": "Assembled scaffold fasta for Gp0127641", + "data_object_type": "Assembly Scaffolds", + "url": "https://data.microbiomedata.org/data/nmdc:mga0822t33/assembly/nmdc_mga0822t33_scaffolds.fna", + "md5_checksum": "2fe3e02d47d8e1d66ccb15c0e42bf1e0", + "id": "nmdc:2fe3e02d47d8e1d66ccb15c0e42bf1e0", + "file_size_bytes": 101806869 + }, + { + "name": "Gp0127641_Metagenome Contig Coverage Stats", + "description": "Metagenome Contig Coverage Stats for Gp0127641", + "url": "https://data.microbiomedata.org/data/nmdc:mga0822t33/assembly/nmdc_mga0822t33_covstats.txt", + "md5_checksum": "04ad2128f72c26a4fa2d0ee7b1709ee9", + "id": "nmdc:04ad2128f72c26a4fa2d0ee7b1709ee9", + "file_size_bytes": 15204446 + }, + { + "name": "Gp0127641_Assembled AGP file", + "description": "Assembled AGP file for Gp0127641", + "data_object_type": "Assembly AGP", + "url": "https://data.microbiomedata.org/data/nmdc:mga0822t33/assembly/nmdc_mga0822t33_assembly.agp", + "md5_checksum": "b89858508c524a03011cd5191f7589fa", + "id": "nmdc:b89858508c524a03011cd5191f7589fa", + "file_size_bytes": 14206204 + }, + { + "name": "Gp0127641_Metagenome Alignment BAM file", + "description": "Metagenome Alignment BAM file for Gp0127641", + "data_object_type": "Assembly Coverage BAM", + "url": "https://data.microbiomedata.org/data/nmdc:mga0822t33/assembly/nmdc_mga0822t33_pairedMapped_sorted.bam", + "md5_checksum": "6974d394df454501e0515b31a2415367", + "id": "nmdc:6974d394df454501e0515b31a2415367", + "file_size_bytes": 1967753614 + }, + { + "name": "Gp0127641_Protein FAA", + "description": "Protein FAA for Gp0127641", + "data_object_type": "Annotation Amino Acid FASTA", + "url": "https://data.microbiomedata.org/data/nmdc:mga0822t33/annotation/nmdc_mga0822t33_proteins.faa", + "md5_checksum": "f33a2a1789f5e913c3ef0dd0440a4877", + "id": "nmdc:f33a2a1789f5e913c3ef0dd0440a4877", + "file_size_bytes": 57768168 + }, + { + "name": "Gp0127641_Structural annotation GFF file", + "description": "Structural annotation GFF file for Gp0127641", + "data_object_type": "Structural Annotation GFF", + "url": "https://data.microbiomedata.org/data/nmdc:mga0822t33/annotation/nmdc_mga0822t33_structural_annotation.gff", + "md5_checksum": "9aba4a0c78cb073609b129c4bb65fe2d", + "id": "nmdc:9aba4a0c78cb073609b129c4bb65fe2d", + "file_size_bytes": 2522 + }, + { + "name": "Gp0127641_Functional annotation GFF file", + "description": "Functional annotation GFF file for Gp0127641", + "data_object_type": "Functional Annotation GFF", + "url": "https://data.microbiomedata.org/data/nmdc:mga0822t33/annotation/nmdc_mga0822t33_functional_annotation.gff", + "md5_checksum": "2477ce1de68bdb1322eec1ffad5c74ac", + "id": "nmdc:2477ce1de68bdb1322eec1ffad5c74ac", + "file_size_bytes": 65167139 + }, + { + "name": "Gp0127641_KO TSV file", + "description": "KO TSV file for Gp0127641", + "data_object_type": "Annotation KEGG Orthology", + "url": "https://data.microbiomedata.org/data/nmdc:mga0822t33/annotation/nmdc_mga0822t33_ko.tsv", + "md5_checksum": "65768fea44cbd0183b286ab8f9883394", + "id": "nmdc:65768fea44cbd0183b286ab8f9883394", + "file_size_bytes": 7266122 + }, + { + "name": "Gp0127641_EC TSV file", + "description": "EC TSV file for Gp0127641", + "data_object_type": "Annotation Enzyme Commission", + "url": "https://data.microbiomedata.org/data/nmdc:mga0822t33/annotation/nmdc_mga0822t33_ec.tsv", + "md5_checksum": "b8ac75e77d2bc2607877e33ab692c43b", + "id": "nmdc:b8ac75e77d2bc2607877e33ab692c43b", + "file_size_bytes": 4793386 + }, + { + "name": "Gp0127641_COG GFF file", + "description": "COG GFF file for Gp0127641", + "url": "https://data.microbiomedata.org/data/nmdc:mga0822t33/annotation/nmdc_mga0822t33_cog.gff", + "md5_checksum": "31018e605b1569eb64006f2108b9d7d4", + "id": "nmdc:31018e605b1569eb64006f2108b9d7d4", + "file_size_bytes": 38184948 + }, + { + "name": "Gp0127641_PFAM GFF file", + "description": "PFAM GFF file for Gp0127641", + "url": "https://data.microbiomedata.org/data/nmdc:mga0822t33/annotation/nmdc_mga0822t33_pfam.gff", + "md5_checksum": "c7ee9f693971a7686d8ff701fddbcb4a", + "id": "nmdc:c7ee9f693971a7686d8ff701fddbcb4a", + "file_size_bytes": 28867184 + }, + { + "name": "Gp0127641_TigrFam GFF file", + "description": "TigrFam GFF file for Gp0127641", + "url": "https://data.microbiomedata.org/data/nmdc:mga0822t33/annotation/nmdc_mga0822t33_tigrfam.gff", + "md5_checksum": "5c0d5f63853ca572d8d73cac9a36c8d7", + "id": "nmdc:5c0d5f63853ca572d8d73cac9a36c8d7", + "file_size_bytes": 3122581 + }, + { + "name": "Gp0127641_SMART GFF file", + "description": "SMART GFF file for Gp0127641", + "url": "https://data.microbiomedata.org/data/nmdc:mga0822t33/annotation/nmdc_mga0822t33_smart.gff", + "md5_checksum": "058c5e17eeeea69b2bf0b1b3c2838aea", + "id": "nmdc:058c5e17eeeea69b2bf0b1b3c2838aea", + "file_size_bytes": 8368877 + }, + { + "name": "Gp0127641_SuperFam GFF file", + "description": "SuperFam GFF file for Gp0127641", + "url": "https://data.microbiomedata.org/data/nmdc:mga0822t33/annotation/nmdc_mga0822t33_supfam.gff", + "md5_checksum": "b836f94d526c1936d080a4aa7c0646c9", + "id": "nmdc:b836f94d526c1936d080a4aa7c0646c9", + "file_size_bytes": 47986944 + }, + { + "name": "Gp0127641_Cath FunFam GFF file", + "description": "Cath FunFam GFF file for Gp0127641", + "url": "https://data.microbiomedata.org/data/nmdc:mga0822t33/annotation/nmdc_mga0822t33_cath_funfam.gff", + "md5_checksum": "0100d09c52d0c243b5ae45d95e6a22dc", + "id": "nmdc:0100d09c52d0c243b5ae45d95e6a22dc", + "file_size_bytes": 36349993 + }, + { + "name": "Gp0127641_KO_EC GFF file", + "description": "KO_EC GFF file for Gp0127641", + "url": "https://data.microbiomedata.org/data/nmdc:mga0822t33/annotation/nmdc_mga0822t33_ko_ec.gff", + "md5_checksum": "64b87140003d1a5a3d9ac939be55e57d", + "id": "nmdc:64b87140003d1a5a3d9ac939be55e57d", + "file_size_bytes": 23113010 + }, + { + "name": "gold:Gp0452679_metabat2 bin checkm quality assessment result", + "description": "metabat2 bin checkm quality assessment result for gold:Gp0452679", + "data_object_type": "CheckM Statistics", + "url": "https://data.microbiomedata.org/data/nmdc:mga0fsjy84/MAGs/nmdc_mga0fsjy84_checkm_qa.out", + "md5_checksum": "d41d8cd98f00b204e9800998ecf8427e", + "id": "nmdc:d41d8cd98f00b204e9800998ecf8427e", + "file_size_bytes": 0 + }, + { + "name": "Gp0127641_tooShort (< 3kb) filtered contigs fasta file by metaBat2", + "description": "tooShort (< 3kb) filtered contigs fasta file by metaBat2 for Gp0127641", + "url": "https://data.microbiomedata.org/data/nmdc:mga0822t33/MAGs/nmdc_mga0822t33_bins.tooShort.fa", + "md5_checksum": "024b6771e169aeaf57a3b10acc6045a1", + "id": "nmdc:024b6771e169aeaf57a3b10acc6045a1", + "file_size_bytes": 80852741 + }, + { + "name": "Gp0127641_unbinned fasta file from metabat2", + "description": "unbinned fasta file from metabat2 for Gp0127641", + "url": "https://data.microbiomedata.org/data/nmdc:mga0822t33/MAGs/nmdc_mga0822t33_bins.unbinned.fa", + "md5_checksum": "545cd253ad26116236dec9937b32d8ef", + "id": "nmdc:545cd253ad26116236dec9937b32d8ef", + "file_size_bytes": 19497941 + }, + { + "name": "Gp0127641_metabat2 bin checkm quality assessment result", + "description": "metabat2 bin checkm quality assessment result for Gp0127641", + "data_object_type": "CheckM Statistics", + "url": "https://data.microbiomedata.org/data/nmdc:mga0822t33/MAGs/nmdc_mga0822t33_checkm_qa.out", + "md5_checksum": "1785cfe7cf0546dc8702193921a2f566", + "id": "nmdc:1785cfe7cf0546dc8702193921a2f566", + "file_size_bytes": 936 + }, + { + "name": "Gp0127641_high-quality and medium-quality bins", + "description": "high-quality and medium-quality bins for Gp0127641", + "data_object_type": "Metagenome Bins", + "url": "https://data.microbiomedata.org/data/nmdc:mga0822t33/MAGs/nmdc_mga0822t33_hqmq_bin.zip", + "md5_checksum": "0a2a5650358b51ffcd3bbcfc874ac5c9", + "id": "nmdc:0a2a5650358b51ffcd3bbcfc874ac5c9", + "file_size_bytes": 182 + }, + { + "name": "Gp0127641_metabat2 bins", + "description": "metabat2 bins for Gp0127641", + "url": "https://data.microbiomedata.org/data/nmdc:mga0822t33/MAGs/nmdc_mga0822t33_metabat_bin.zip", + "md5_checksum": "8f6b89831cabcd1dc7aa5e26d87f5063", + "id": "nmdc:8f6b89831cabcd1dc7aa5e26d87f5063", + "file_size_bytes": 625863 + } + ], + "dissolving_activity_set": [], + "functional_annotation_set": [], + "genome_feature_set": [], + "mags_activity_set": [ + { + "_id": { + "$oid": "649b0052ec087f6bbab34719" + }, + "has_input": [ + "nmdc:18f0d53f503c855c0093677df58366e0", + "nmdc:6974d394df454501e0515b31a2415367", + "nmdc:2477ce1de68bdb1322eec1ffad5c74ac" + ], + "too_short_contig_num": 179152, + "part_of": [ + "nmdc:mga0822t33" + ], + "binned_contig_num": 464, + "git_url": "https://github.com/microbiomedata/metaG/releases/tag/0.1", + "has_output": [ + "nmdc:d41d8cd98f00b204e9800998ecf8427e", + "nmdc:024b6771e169aeaf57a3b10acc6045a1", + "nmdc:545cd253ad26116236dec9937b32d8ef", + "nmdc:1785cfe7cf0546dc8702193921a2f566", + "nmdc:0a2a5650358b51ffcd3bbcfc874ac5c9", + "nmdc:8f6b89831cabcd1dc7aa5e26d87f5063" + ], + "was_informed_by": "gold:Gp0127641", + "input_contig_num": 191906, + "id": "nmdc:363fe7a0dd914e046b274fea70625c52", + "execution_resource": "NERSC-Cori", + "name": "MAGs Analysis Activity for nmdc:mga0822t33", "mags_list": [ { - "number_of_contig": 85, - "completeness": 18.1, + "number_of_contig": 142, + "completeness": 24.43, "bin_name": "bins.1", - "gene_count": 437, + "gene_count": 832, "bin_quality": "LQ", "gtdbtk_species": "", "gtdbtk_order": "", - "num_16s": 0, + "num_16s": 1, "gtdbtk_family": "", "gtdbtk_domain": "", "contamination": 0.0, @@ -25540,70 +24389,13 @@ "num_5s": 0, "num_23s": 0, "gtdbtk_genus": "", - "num_t_rna": 8 + "num_t_rna": 15 }, { - "number_of_contig": 59, - "completeness": 15.92, + "number_of_contig": 322, + "completeness": 46.21, "bin_name": "bins.2", - "gene_count": 343, - "bin_quality": "LQ", - "gtdbtk_species": "", - "gtdbtk_order": "", - "num_16s": 0, - "gtdbtk_family": "", - "gtdbtk_domain": "", - "contamination": 0.84, - "gtdbtk_class": "", - "gtdbtk_phylum": "", - "num_5s": 0, - "num_23s": 0, - "gtdbtk_genus": "", - "num_t_rna": 8 - }, - { - "number_of_contig": 258, - "completeness": 21.26, - "bin_name": "bins.3", - "gene_count": 1440, - "bin_quality": "LQ", - "gtdbtk_species": "", - "gtdbtk_order": "", - "num_16s": 0, - "gtdbtk_family": "", - "gtdbtk_domain": "", - "contamination": 0.0, - "gtdbtk_class": "", - "gtdbtk_phylum": "", - "num_5s": 0, - "num_23s": 0, - "gtdbtk_genus": "", - "num_t_rna": 19 - }, - { - "number_of_contig": 101, - "completeness": 29.13, - "bin_name": "bins.4", - "gene_count": 560, - "bin_quality": "LQ", - "gtdbtk_species": "", - "gtdbtk_order": "", - "num_16s": 0, - "gtdbtk_family": "", - "gtdbtk_domain": "", - "contamination": 0.97, - "gtdbtk_class": "", - "gtdbtk_phylum": "", - "num_5s": 0, - "num_23s": 0, - "gtdbtk_genus": "", - "num_t_rna": 6 - }, - { - "number_of_contig": 116, - "completeness": 1.53, - "bin_name": "bins.5", - "gene_count": 763, + "gene_count": 1652, "bin_quality": "LQ", "gtdbtk_species": "", "gtdbtk_order": "", @@ -25613,122 +24405,2855 @@ "contamination": 0.0, "gtdbtk_class": "", "gtdbtk_phylum": "", - "num_5s": 0, - "num_23s": 0, + "num_5s": 1, + "num_23s": 1, "gtdbtk_genus": "", - "num_t_rna": 10 - }, + "num_t_rna": 21 + } + ], + "unbinned_contig_num": 12290, + "started_at_time": "2021-10-11T02:27:18Z", + "type": "nmdc:MAGsAnalysisActivity", + "ended_at_time": "2021-10-11T04:05:47+00:00" + } + ], + "material_sample_set": [], + "material_sampling_activity_set": [], + "metabolomics_analysis_activity_set": [], + "metagenome_annotation_activity_set": [ + { + "_id": { + "$oid": "649b005bbf2caae0415ef9b9" + }, + "has_input": [ + "nmdc:18f0d53f503c855c0093677df58366e0" + ], + "part_of": [ + "nmdc:mga0822t33" + ], + "git_url": "https://github.com/microbiomedata/metaG/releases/tag/0.1", + "has_output": [ + "nmdc:f33a2a1789f5e913c3ef0dd0440a4877", + "nmdc:9aba4a0c78cb073609b129c4bb65fe2d", + "nmdc:2477ce1de68bdb1322eec1ffad5c74ac", + "nmdc:65768fea44cbd0183b286ab8f9883394", + "nmdc:b8ac75e77d2bc2607877e33ab692c43b", + "nmdc:31018e605b1569eb64006f2108b9d7d4", + "nmdc:c7ee9f693971a7686d8ff701fddbcb4a", + "nmdc:5c0d5f63853ca572d8d73cac9a36c8d7", + "nmdc:058c5e17eeeea69b2bf0b1b3c2838aea", + "nmdc:b836f94d526c1936d080a4aa7c0646c9", + "nmdc:0100d09c52d0c243b5ae45d95e6a22dc", + "nmdc:64b87140003d1a5a3d9ac939be55e57d" + ], + "was_informed_by": "gold:Gp0127641", + "id": "nmdc:363fe7a0dd914e046b274fea70625c52", + "execution_resource": "NERSC-Cori", + "name": "Annotation Activity for nmdc:mga0822t33", + "started_at_time": "2021-10-11T02:27:18Z", + "type": "nmdc:MetagenomeAnnotationActivity", + "ended_at_time": "2021-10-11T04:05:47+00:00" + } + ], + "metagenome_assembly_set": [ + { + "_id": { + "$oid": "649b005f2ca5ee4adb139fa2" + }, + "has_input": [ + "nmdc:a2700afe93abad6f004a3701348622a2" + ], + "part_of": [ + "nmdc:mga0822t33" + ], + "ctg_logsum": 224925, + "scaf_logsum": 225846, + "gap_pct": 0.00137, + "git_url": "https://github.com/microbiomedata/metaG/releases/tag/0.1", + "has_output": [ + "nmdc:18f0d53f503c855c0093677df58366e0", + "nmdc:2fe3e02d47d8e1d66ccb15c0e42bf1e0", + "nmdc:04ad2128f72c26a4fa2d0ee7b1709ee9", + "nmdc:b89858508c524a03011cd5191f7589fa", + "nmdc:6974d394df454501e0515b31a2415367" + ], + "asm_score": 3.367, + "was_informed_by": "gold:Gp0127641", + "ctg_powsum": 24264, + "scaf_max": 18020, + "id": "nmdc:363fe7a0dd914e046b274fea70625c52", + "scaf_powsum": 24365, + "execution_resource": "NERSC-Cori", + "contigs": 191907, + "name": "Assembly Activity for nmdc:mga0822t33", + "ctg_max": 18020, + "gc_std": 0.10192, + "contig_bp": 94878155, + "gc_avg": 0.61857, + "started_at_time": "2021-10-11T02:27:18Z", + "scaf_bp": 94879455, + "type": "nmdc:MetagenomeAssembly", + "scaffolds": 191777, + "ended_at_time": "2021-10-11T04:05:47+00:00", + "ctg_l50": 489, + "ctg_l90": 290, + "ctg_n50": 53038, + "ctg_n90": 159679, + "scaf_l50": 489, + "scaf_l90": 290, + "scaf_n50": 53021, + "scaf_n90": 159560 + } + ], + "metagenome_sequencing_activity_set": [], + "metaproteomics_analysis_activity_set": [], + "metatranscriptome_activity_set": [], + "nom_analysis_activity_set": [], + "omics_processing_set": [ + { + "_id": { + "$oid": "649b009773e8249959349b55" + }, + "id": "nmdc:omprc-11-p0jdew93", + "name": "Riverbed sediment microbial communities from areas with vegetation nearby in Columbia River, Washington, USA - GW-RW S1_10_20", + "description": "Riverbed sediment samples were collected from an area with a lot of surrounding vegetation in a hyporheic zone (subsurface groundwater - surface water mixing zone)", + "has_input": [ + "nmdc:bsm-11-fgtanh42" + ], + "has_output": [ + "jgi:574fde697ded5e3df1ee140a" + ], + "part_of": [ + "nmdc:sty-11-aygzgv51" + ], + "add_date": "2016-01-11", + "mod_date": "2021-06-15", + "ncbi_project_name": "Riverbed sediment microbial communities from areas with vegetation nearby in Columbia River, Washington, USA - GW-RW S1_10_20", + "omics_type": { + "has_raw_value": "Metagenome" + }, + "principal_investigator": { + "has_raw_value": "James Stegen" + }, + "processing_institution": "JGI", + "type": "nmdc:OmicsProcessing", + "gold_sequencing_project_identifiers": [ + "gold:Gp0127641" + ] + } + ], + "reaction_activity_set": [], + "read_qc_analysis_activity_set": [ + { + "_id": { + "$oid": "649b009d6bdd4fd20273c872" + }, + "has_input": [ + "nmdc:c59690f54a7afb65869c9c683e3eef7f" + ], + "part_of": [ + "nmdc:mga0822t33" + ], + "git_url": "https://github.com/microbiomedata/metaG/releases/tag/0.1", + "has_output": [ + "nmdc:a2700afe93abad6f004a3701348622a2", + "nmdc:aaa9a8a3d8e147116953394a8755742d" + ], + "was_informed_by": "gold:Gp0127641", + "input_read_count": 24261468, + "output_read_bases": 3340338011, + "id": "nmdc:363fe7a0dd914e046b274fea70625c52", + "execution_resource": "NERSC-Cori", + "input_read_bases": 3663481668, + "name": "Read QC Activity for nmdc:mga0822t33", + "output_read_count": 22362924, + "started_at_time": "2021-10-11T02:27:18Z", + "type": "nmdc:ReadQCAnalysisActivity", + "ended_at_time": "2021-10-11T04:05:47+00:00" + } + ], + "read_based_taxonomy_analysis_activity_set": [ + { + "_id": { + "$oid": "649b009bff710ae353f8cf37" + }, + "has_input": [ + "nmdc:a2700afe93abad6f004a3701348622a2" + ], + "git_url": "https://github.com/microbiomedata/metaG/releases/tag/0.1", + "has_output": [ + "nmdc:0d021c80bfd39c8293a8b355b8ff3605", + "nmdc:a42312841b816448d8bd5d3adfa65f58", + "nmdc:f473f4a99336a49105d2722888ae0510", + "nmdc:ae51ea50660f44fa3b317a45f3015556", + "nmdc:ef39b44a90c8525e93f45e500b3ae934", + "nmdc:e2653a4ce3f34c235ad7b01e87dd1016", + "nmdc:869730c4d81163e0c238dd4ae27ebd9e", + "nmdc:dc193d1a1693589003f992c820606bab", + "nmdc:2f36b41c419efa1b1dfb6a9576b965ee" + ], + "was_informed_by": "gold:Gp0127641", + "id": "nmdc:363fe7a0dd914e046b274fea70625c52", + "execution_resource": "NERSC-Cori", + "name": "ReadBased Analysis Activity for nmdc:mga0822t33", + "started_at_time": "2021-10-11T02:27:18Z", + "type": "nmdc:ReadbasedAnalysis", + "ended_at_time": "2021-10-11T04:05:47+00:00" + } + ], + "study_set": [], + "field_research_site_set": [], + "collecting_biosamples_from_site_set": [], + "date_created": null, + "etl_software_version": null, + "pooling_set": [], + "read_based_analysis_activity_set": [ + { + "_id": { + "$oid": "61e7199c833bcf838a700e42" + }, + "has_input": [ + "nmdc:a2700afe93abad6f004a3701348622a2" + ], + "part_of": [ + "nmdc:mga0822t33" + ], + "git_url": "https://github.com/microbiomedata/metaG/releases/tag/0.1", + "has_output": [ + "nmdc:0d021c80bfd39c8293a8b355b8ff3605", + "nmdc:a42312841b816448d8bd5d3adfa65f58", + "nmdc:f473f4a99336a49105d2722888ae0510", + "nmdc:ae51ea50660f44fa3b317a45f3015556", + "nmdc:ef39b44a90c8525e93f45e500b3ae934", + "nmdc:e2653a4ce3f34c235ad7b01e87dd1016", + "nmdc:869730c4d81163e0c238dd4ae27ebd9e", + "nmdc:dc193d1a1693589003f992c820606bab", + "nmdc:2f36b41c419efa1b1dfb6a9576b965ee" + ], + "was_informed_by": "gold:Gp0127641", + "id": "nmdc:363fe7a0dd914e046b274fea70625c52", + "execution_resource": "NERSC-Cori", + "name": "ReadBased Analysis Activity for nmdc:mga0822t33", + "started_at_time": "2021-10-11T02:27:18Z", + "type": "nmdc:ReadbasedAnalysis", + "ended_at_time": "2021-10-11T04:05:47+00:00" + } + ] + }, + { + "functional_annotation_agg": [], + "library_preparation_set": [], + "processed_sample_set": [], + "extraction_set": [], + "activity_set": [], + "biosample_set": [], + "data_object_set": [ + { + "name": "Gp0127643_Filtered Reads", + "description": "Filtered Reads for Gp0127643", + "data_object_type": "Filtered Sequencing Reads", + "url": "https://data.microbiomedata.org/data/nmdc:mga0evc178/qa/nmdc_mga0evc178_filtered.fastq.gz", + "md5_checksum": "2ef23543e3064ca73c3034713d87c026", + "id": "nmdc:2ef23543e3064ca73c3034713d87c026", + "file_size_bytes": 1891088172 + }, + { + "name": "Gp0127643_Filtered Stats", + "description": "Filtered Stats for Gp0127643", + "data_object_type": "QC Statistics", + "url": "https://data.microbiomedata.org/data/nmdc:mga0evc178/qa/nmdc_mga0evc178_filterStats.txt", + "md5_checksum": "87b172ead58a37be8d199c0acfc96759", + "id": "nmdc:87b172ead58a37be8d199c0acfc96759", + "file_size_bytes": 289 + }, + { + "name": "Gp0127643_Gottcha2 TSV report", + "description": "Gottcha2 TSV report for Gp0127643", + "url": "https://data.microbiomedata.org/data/nmdc:mga0evc178/ReadbasedAnalysis/nmdc_mga0evc178_gottcha2_report.tsv", + "md5_checksum": "e8f825653e5736e29b73de55bd11a270", + "id": "nmdc:e8f825653e5736e29b73de55bd11a270", + "file_size_bytes": 1326 + }, + { + "name": "Gp0127643_Gottcha2 full TSV report", + "description": "Gottcha2 full TSV report for Gp0127643", + "url": "https://data.microbiomedata.org/data/nmdc:mga0evc178/ReadbasedAnalysis/nmdc_mga0evc178_gottcha2_report_full.tsv", + "md5_checksum": "99bb1311b220e9a03da619fe5fb58f0f", + "id": "nmdc:99bb1311b220e9a03da619fe5fb58f0f", + "file_size_bytes": 664131 + }, + { + "name": "Gp0127643_Gottcha2 Krona HTML report", + "description": "Gottcha2 Krona HTML report for Gp0127643", + "data_object_type": "GOTTCHA2 Krona Plot", + "url": "https://data.microbiomedata.org/data/nmdc:mga0evc178/ReadbasedAnalysis/nmdc_mga0evc178_gottcha2_krona.html", + "md5_checksum": "5c97bc15d4d5999f140664b3b2777c6d", + "id": "nmdc:5c97bc15d4d5999f140664b3b2777c6d", + "file_size_bytes": 229630 + }, + { + "name": "Gp0127643_Centrifuge classification TSV report", + "description": "Centrifuge classification TSV report for Gp0127643", + "data_object_type": "Centrifuge Taxonomic Classification", + "url": "https://data.microbiomedata.org/data/nmdc:mga0evc178/ReadbasedAnalysis/nmdc_mga0evc178_centrifuge_classification.tsv", + "md5_checksum": "c9074b2e05765afd68463dc301b87995", + "id": "nmdc:c9074b2e05765afd68463dc301b87995", + "file_size_bytes": 1726867547 + }, + { + "name": "Gp0127643_Centrifuge TSV report", + "description": "Centrifuge TSV report for Gp0127643", + "data_object_type": "Centrifuge Classification Report", + "url": "https://data.microbiomedata.org/data/nmdc:mga0evc178/ReadbasedAnalysis/nmdc_mga0evc178_centrifuge_report.tsv", + "md5_checksum": "ed2c05d1702a9a811b8a98de748bc82a", + "id": "nmdc:ed2c05d1702a9a811b8a98de748bc82a", + "file_size_bytes": 254021 + }, + { + "name": "Gp0127643_Centrifuge Krona HTML report", + "description": "Centrifuge Krona HTML report for Gp0127643", + "data_object_type": "Centrifuge Krona Plot", + "url": "https://data.microbiomedata.org/data/nmdc:mga0evc178/ReadbasedAnalysis/nmdc_mga0evc178_centrifuge_krona.html", + "md5_checksum": "6465fe59472b111ead1f0414ccf39f62", + "id": "nmdc:6465fe59472b111ead1f0414ccf39f62", + "file_size_bytes": 2331702 + }, + { + "name": "Gp0127643_Kraken classification TSV report", + "description": "Kraken classification TSV report for Gp0127643", + "data_object_type": "Kraken2 Taxonomic Classification", + "url": "https://data.microbiomedata.org/data/nmdc:mga0evc178/ReadbasedAnalysis/nmdc_mga0evc178_kraken2_classification.tsv", + "md5_checksum": "9855ca52bce074c34dcebfd154fa94ff", + "id": "nmdc:9855ca52bce074c34dcebfd154fa94ff", + "file_size_bytes": 1376409913 + }, + { + "name": "Gp0127643_Kraken2 TSV report", + "description": "Kraken2 TSV report for Gp0127643", + "data_object_type": "Kraken2 Classification Report", + "url": "https://data.microbiomedata.org/data/nmdc:mga0evc178/ReadbasedAnalysis/nmdc_mga0evc178_kraken2_report.tsv", + "md5_checksum": "ed8059f366d60112deb41a0c307bc6fc", + "id": "nmdc:ed8059f366d60112deb41a0c307bc6fc", + "file_size_bytes": 640506 + }, + { + "name": "Gp0127643_Kraken2 Krona HTML report", + "description": "Kraken2 Krona HTML report for Gp0127643", + "data_object_type": "Kraken2 Krona Plot", + "url": "https://data.microbiomedata.org/data/nmdc:mga0evc178/ReadbasedAnalysis/nmdc_mga0evc178_kraken2_krona.html", + "md5_checksum": "f98bae155bced880c058ecde7d539c18", + "id": "nmdc:f98bae155bced880c058ecde7d539c18", + "file_size_bytes": 3998448 + }, + { + "name": "Gp0127643_Gottcha2 TSV report", + "description": "Gottcha2 TSV report for Gp0127643", + "url": "https://data.microbiomedata.org/data/nmdc:mga0evc178/ReadbasedAnalysis/nmdc_mga0evc178_gottcha2_report.tsv", + "md5_checksum": "e8f825653e5736e29b73de55bd11a270", + "id": "nmdc:e8f825653e5736e29b73de55bd11a270", + "file_size_bytes": 1326 + }, + { + "name": "Gp0127643_Gottcha2 full TSV report", + "description": "Gottcha2 full TSV report for Gp0127643", + "url": "https://data.microbiomedata.org/data/nmdc:mga0evc178/ReadbasedAnalysis/nmdc_mga0evc178_gottcha2_report_full.tsv", + "md5_checksum": "99bb1311b220e9a03da619fe5fb58f0f", + "id": "nmdc:99bb1311b220e9a03da619fe5fb58f0f", + "file_size_bytes": 664131 + }, + { + "name": "Gp0127643_Gottcha2 Krona HTML report", + "description": "Gottcha2 Krona HTML report for Gp0127643", + "data_object_type": "GOTTCHA2 Krona Plot", + "url": "https://data.microbiomedata.org/data/nmdc:mga0evc178/ReadbasedAnalysis/nmdc_mga0evc178_gottcha2_krona.html", + "md5_checksum": "5c97bc15d4d5999f140664b3b2777c6d", + "id": "nmdc:5c97bc15d4d5999f140664b3b2777c6d", + "file_size_bytes": 229630 + }, + { + "name": "Gp0127643_Centrifuge classification TSV report", + "description": "Centrifuge classification TSV report for Gp0127643", + "data_object_type": "Centrifuge Taxonomic Classification", + "url": "https://data.microbiomedata.org/data/nmdc:mga0evc178/ReadbasedAnalysis/nmdc_mga0evc178_centrifuge_classification.tsv", + "md5_checksum": "c9074b2e05765afd68463dc301b87995", + "id": "nmdc:c9074b2e05765afd68463dc301b87995", + "file_size_bytes": 1726867547 + }, + { + "name": "Gp0127643_Centrifuge TSV report", + "description": "Centrifuge TSV report for Gp0127643", + "data_object_type": "Centrifuge Classification Report", + "url": "https://data.microbiomedata.org/data/nmdc:mga0evc178/ReadbasedAnalysis/nmdc_mga0evc178_centrifuge_report.tsv", + "md5_checksum": "ed2c05d1702a9a811b8a98de748bc82a", + "id": "nmdc:ed2c05d1702a9a811b8a98de748bc82a", + "file_size_bytes": 254021 + }, + { + "name": "Gp0127643_Centrifuge Krona HTML report", + "description": "Centrifuge Krona HTML report for Gp0127643", + "data_object_type": "Centrifuge Krona Plot", + "url": "https://data.microbiomedata.org/data/nmdc:mga0evc178/ReadbasedAnalysis/nmdc_mga0evc178_centrifuge_krona.html", + "md5_checksum": "6465fe59472b111ead1f0414ccf39f62", + "id": "nmdc:6465fe59472b111ead1f0414ccf39f62", + "file_size_bytes": 2331702 + }, + { + "name": "Gp0127643_Kraken classification TSV report", + "description": "Kraken classification TSV report for Gp0127643", + "data_object_type": "Kraken2 Taxonomic Classification", + "url": "https://data.microbiomedata.org/data/nmdc:mga0evc178/ReadbasedAnalysis/nmdc_mga0evc178_kraken2_classification.tsv", + "md5_checksum": "9855ca52bce074c34dcebfd154fa94ff", + "id": "nmdc:9855ca52bce074c34dcebfd154fa94ff", + "file_size_bytes": 1376409913 + }, + { + "name": "Gp0127643_Kraken2 TSV report", + "description": "Kraken2 TSV report for Gp0127643", + "data_object_type": "Kraken2 Classification Report", + "url": "https://data.microbiomedata.org/data/nmdc:mga0evc178/ReadbasedAnalysis/nmdc_mga0evc178_kraken2_report.tsv", + "md5_checksum": "ed8059f366d60112deb41a0c307bc6fc", + "id": "nmdc:ed8059f366d60112deb41a0c307bc6fc", + "file_size_bytes": 640506 + }, + { + "name": "Gp0127643_Kraken2 Krona HTML report", + "description": "Kraken2 Krona HTML report for Gp0127643", + "data_object_type": "Kraken2 Krona Plot", + "url": "https://data.microbiomedata.org/data/nmdc:mga0evc178/ReadbasedAnalysis/nmdc_mga0evc178_kraken2_krona.html", + "md5_checksum": "f98bae155bced880c058ecde7d539c18", + "id": "nmdc:f98bae155bced880c058ecde7d539c18", + "file_size_bytes": 3998448 + }, + { + "name": "Gp0127643_Assembled contigs fasta", + "description": "Assembled contigs fasta for Gp0127643", + "data_object_type": "Assembly Contigs", + "url": "https://data.microbiomedata.org/data/nmdc:mga0evc178/assembly/nmdc_mga0evc178_contigs.fna", + "md5_checksum": "a3a85f9f946ff34f28dfd4b5f8590f23", + "id": "nmdc:a3a85f9f946ff34f28dfd4b5f8590f23", + "file_size_bytes": 112772885 + }, + { + "name": "Gp0127643_Assembled scaffold fasta", + "description": "Assembled scaffold fasta for Gp0127643", + "data_object_type": "Assembly Scaffolds", + "url": "https://data.microbiomedata.org/data/nmdc:mga0evc178/assembly/nmdc_mga0evc178_scaffolds.fna", + "md5_checksum": "001fd34d98a73eee6be5a41004e67469", + "id": "nmdc:001fd34d98a73eee6be5a41004e67469", + "file_size_bytes": 112143079 + }, + { + "name": "Gp0127643_Metagenome Contig Coverage Stats", + "description": "Metagenome Contig Coverage Stats for Gp0127643", + "url": "https://data.microbiomedata.org/data/nmdc:mga0evc178/assembly/nmdc_mga0evc178_covstats.txt", + "md5_checksum": "9b45294f72cb55b2f039366d33183fa3", + "id": "nmdc:9b45294f72cb55b2f039366d33183fa3", + "file_size_bytes": 16563197 + }, + { + "name": "Gp0127643_Assembled AGP file", + "description": "Assembled AGP file for Gp0127643", + "data_object_type": "Assembly AGP", + "url": "https://data.microbiomedata.org/data/nmdc:mga0evc178/assembly/nmdc_mga0evc178_assembly.agp", + "md5_checksum": "b2ec4f5a3f02869684bdfaf065d75c54", + "id": "nmdc:b2ec4f5a3f02869684bdfaf065d75c54", + "file_size_bytes": 15493398 + }, + { + "name": "Gp0127643_Metagenome Alignment BAM file", + "description": "Metagenome Alignment BAM file for Gp0127643", + "data_object_type": "Assembly Coverage BAM", + "url": "https://data.microbiomedata.org/data/nmdc:mga0evc178/assembly/nmdc_mga0evc178_pairedMapped_sorted.bam", + "md5_checksum": "fa61e18d49a2012f115d970f0a195986", + "id": "nmdc:fa61e18d49a2012f115d970f0a195986", + "file_size_bytes": 2085429752 + }, + { + "name": "Gp0127643_Protein FAA", + "description": "Protein FAA for Gp0127643", + "data_object_type": "Annotation Amino Acid FASTA", + "url": "https://data.microbiomedata.org/data/nmdc:mga0evc178/annotation/nmdc_mga0evc178_proteins.faa", + "md5_checksum": "b2cd0d1a024094fd4e308c21d439ed5f", + "id": "nmdc:b2cd0d1a024094fd4e308c21d439ed5f", + "file_size_bytes": 63917762 + }, + { + "name": "Gp0127643_Structural annotation GFF file", + "description": "Structural annotation GFF file for Gp0127643", + "data_object_type": "Structural Annotation GFF", + "url": "https://data.microbiomedata.org/data/nmdc:mga0evc178/annotation/nmdc_mga0evc178_structural_annotation.gff", + "md5_checksum": "6151bacd37618698c28b00151b4998f8", + "id": "nmdc:6151bacd37618698c28b00151b4998f8", + "file_size_bytes": 2521 + }, + { + "name": "Gp0127643_Functional annotation GFF file", + "description": "Functional annotation GFF file for Gp0127643", + "data_object_type": "Functional Annotation GFF", + "url": "https://data.microbiomedata.org/data/nmdc:mga0evc178/annotation/nmdc_mga0evc178_functional_annotation.gff", + "md5_checksum": "744277086ab01222a91233536d5e8976", + "id": "nmdc:744277086ab01222a91233536d5e8976", + "file_size_bytes": 71811800 + }, + { + "name": "Gp0127643_KO TSV file", + "description": "KO TSV file for Gp0127643", + "data_object_type": "Annotation KEGG Orthology", + "url": "https://data.microbiomedata.org/data/nmdc:mga0evc178/annotation/nmdc_mga0evc178_ko.tsv", + "md5_checksum": "9c8a359c69bcb1179241f9a3c727fa23", + "id": "nmdc:9c8a359c69bcb1179241f9a3c727fa23", + "file_size_bytes": 7959243 + }, + { + "name": "Gp0127643_EC TSV file", + "description": "EC TSV file for Gp0127643", + "data_object_type": "Annotation Enzyme Commission", + "url": "https://data.microbiomedata.org/data/nmdc:mga0evc178/annotation/nmdc_mga0evc178_ec.tsv", + "md5_checksum": "027b72af172d078f88471d932cf6d473", + "id": "nmdc:027b72af172d078f88471d932cf6d473", + "file_size_bytes": 5202338 + }, + { + "name": "Gp0127643_COG GFF file", + "description": "COG GFF file for Gp0127643", + "url": "https://data.microbiomedata.org/data/nmdc:mga0evc178/annotation/nmdc_mga0evc178_cog.gff", + "md5_checksum": "ff24990735aa002e828ff7204a456ad2", + "id": "nmdc:ff24990735aa002e828ff7204a456ad2", + "file_size_bytes": 41649279 + }, + { + "name": "Gp0127643_PFAM GFF file", + "description": "PFAM GFF file for Gp0127643", + "url": "https://data.microbiomedata.org/data/nmdc:mga0evc178/annotation/nmdc_mga0evc178_pfam.gff", + "md5_checksum": "e884ad501d1bb3bcf006f0999020ce0f", + "id": "nmdc:e884ad501d1bb3bcf006f0999020ce0f", + "file_size_bytes": 31529168 + }, + { + "name": "Gp0127643_TigrFam GFF file", + "description": "TigrFam GFF file for Gp0127643", + "url": "https://data.microbiomedata.org/data/nmdc:mga0evc178/annotation/nmdc_mga0evc178_tigrfam.gff", + "md5_checksum": "8321f818f53371491a7a80ef7e063ca6", + "id": "nmdc:8321f818f53371491a7a80ef7e063ca6", + "file_size_bytes": 3378599 + }, + { + "name": "Gp0127643_SMART GFF file", + "description": "SMART GFF file for Gp0127643", + "url": "https://data.microbiomedata.org/data/nmdc:mga0evc178/annotation/nmdc_mga0evc178_smart.gff", + "md5_checksum": "6f799842fe74ebff7942a026dbf9b1bf", + "id": "nmdc:6f799842fe74ebff7942a026dbf9b1bf", + "file_size_bytes": 9132037 + }, + { + "name": "Gp0127643_SuperFam GFF file", + "description": "SuperFam GFF file for Gp0127643", + "url": "https://data.microbiomedata.org/data/nmdc:mga0evc178/annotation/nmdc_mga0evc178_supfam.gff", + "md5_checksum": "8ee84a629a5899c25e0fbd0f07084530", + "id": "nmdc:8ee84a629a5899c25e0fbd0f07084530", + "file_size_bytes": 52720037 + }, + { + "name": "Gp0127643_Cath FunFam GFF file", + "description": "Cath FunFam GFF file for Gp0127643", + "url": "https://data.microbiomedata.org/data/nmdc:mga0evc178/annotation/nmdc_mga0evc178_cath_funfam.gff", + "md5_checksum": "6697cdb0b1dcf83e7ecb8fcefa0703ef", + "id": "nmdc:6697cdb0b1dcf83e7ecb8fcefa0703ef", + "file_size_bytes": 39643020 + }, + { + "name": "Gp0127643_KO_EC GFF file", + "description": "KO_EC GFF file for Gp0127643", + "url": "https://data.microbiomedata.org/data/nmdc:mga0evc178/annotation/nmdc_mga0evc178_ko_ec.gff", + "md5_checksum": "d2990b0bd86e50209dcada6fa6b09510", + "id": "nmdc:d2990b0bd86e50209dcada6fa6b09510", + "file_size_bytes": 25272687 + }, + { + "name": "gold:Gp0452679_metabat2 bin checkm quality assessment result", + "description": "metabat2 bin checkm quality assessment result for gold:Gp0452679", + "data_object_type": "CheckM Statistics", + "url": "https://data.microbiomedata.org/data/nmdc:mga0fsjy84/MAGs/nmdc_mga0fsjy84_checkm_qa.out", + "md5_checksum": "d41d8cd98f00b204e9800998ecf8427e", + "id": "nmdc:d41d8cd98f00b204e9800998ecf8427e", + "file_size_bytes": 0 + }, + { + "name": "Gp0127643_tooShort (< 3kb) filtered contigs fasta file by metaBat2", + "description": "tooShort (< 3kb) filtered contigs fasta file by metaBat2 for Gp0127643", + "url": "https://data.microbiomedata.org/data/nmdc:mga0evc178/MAGs/nmdc_mga0evc178_bins.tooShort.fa", + "md5_checksum": "ed8acb6d21b14da131350d9c52aa7041", + "id": "nmdc:ed8acb6d21b14da131350d9c52aa7041", + "file_size_bytes": 87917684 + }, + { + "name": "Gp0127643_unbinned fasta file from metabat2", + "description": "unbinned fasta file from metabat2 for Gp0127643", + "url": "https://data.microbiomedata.org/data/nmdc:mga0evc178/MAGs/nmdc_mga0evc178_bins.unbinned.fa", + "md5_checksum": "d81e3cc17fa762a717dcf324a0aa3d45", + "id": "nmdc:d81e3cc17fa762a717dcf324a0aa3d45", + "file_size_bytes": 22746526 + }, + { + "name": "Gp0127643_metabat2 bin checkm quality assessment result", + "description": "metabat2 bin checkm quality assessment result for Gp0127643", + "data_object_type": "CheckM Statistics", + "url": "https://data.microbiomedata.org/data/nmdc:mga0evc178/MAGs/nmdc_mga0evc178_checkm_qa.out", + "md5_checksum": "bd388cba93a77cde2f5791fa0f580865", + "id": "nmdc:bd388cba93a77cde2f5791fa0f580865", + "file_size_bytes": 785 + }, + { + "name": "Gp0127643_high-quality and medium-quality bins", + "description": "high-quality and medium-quality bins for Gp0127643", + "data_object_type": "Metagenome Bins", + "url": "https://data.microbiomedata.org/data/nmdc:mga0evc178/MAGs/nmdc_mga0evc178_hqmq_bin.zip", + "md5_checksum": "30695aca02693c6aba316db3e9f565a8", + "id": "nmdc:30695aca02693c6aba316db3e9f565a8", + "file_size_bytes": 182 + }, + { + "name": "Gp0127643_metabat2 bins", + "description": "metabat2 bins for Gp0127643", + "url": "https://data.microbiomedata.org/data/nmdc:mga0evc178/MAGs/nmdc_mga0evc178_metabat_bin.zip", + "md5_checksum": "79de6d81848956e1c06a811bc9bdab81", + "id": "nmdc:79de6d81848956e1c06a811bc9bdab81", + "file_size_bytes": 614113 + } + ], + "dissolving_activity_set": [], + "functional_annotation_set": [], + "genome_feature_set": [], + "mags_activity_set": [ + { + "_id": { + "$oid": "649b0052ec087f6bbab3471e" + }, + "has_input": [ + "nmdc:a3a85f9f946ff34f28dfd4b5f8590f23", + "nmdc:fa61e18d49a2012f115d970f0a195986", + "nmdc:744277086ab01222a91233536d5e8976" + ], + "too_short_contig_num": 194066, + "part_of": [ + "nmdc:mga0evc178" + ], + "binned_contig_num": 470, + "git_url": "https://github.com/microbiomedata/metaG/releases/tag/0.1", + "has_output": [ + "nmdc:d41d8cd98f00b204e9800998ecf8427e", + "nmdc:ed8acb6d21b14da131350d9c52aa7041", + "nmdc:d81e3cc17fa762a717dcf324a0aa3d45", + "nmdc:bd388cba93a77cde2f5791fa0f580865", + "nmdc:30695aca02693c6aba316db3e9f565a8", + "nmdc:79de6d81848956e1c06a811bc9bdab81" + ], + "was_informed_by": "gold:Gp0127643", + "input_contig_num": 208967, + "id": "nmdc:30cd614596fd2a4b87220523f8a6ff30", + "execution_resource": "NERSC-Cori", + "name": "MAGs Analysis Activity for nmdc:mga0evc178", + "mags_list": [ + { + "number_of_contig": 470, + "completeness": 30.73, + "bin_name": "bins.1", + "gene_count": 2501, + "bin_quality": "LQ", + "gtdbtk_species": "", + "gtdbtk_order": "", + "num_16s": 0, + "gtdbtk_family": "", + "gtdbtk_domain": "", + "contamination": 1.71, + "gtdbtk_class": "", + "gtdbtk_phylum": "", + "num_5s": 0, + "num_23s": 0, + "gtdbtk_genus": "", + "num_t_rna": 22 + } + ], + "unbinned_contig_num": 14431, + "started_at_time": "2021-10-11T02:27:00Z", + "type": "nmdc:MAGsAnalysisActivity", + "ended_at_time": "2021-10-11T04:04:16+00:00" + } + ], + "material_sample_set": [], + "material_sampling_activity_set": [], + "metabolomics_analysis_activity_set": [], + "metagenome_annotation_activity_set": [ + { + "_id": { + "$oid": "649b005bbf2caae0415ef9bd" + }, + "has_input": [ + "nmdc:a3a85f9f946ff34f28dfd4b5f8590f23" + ], + "part_of": [ + "nmdc:mga0evc178" + ], + "git_url": "https://github.com/microbiomedata/metaG/releases/tag/0.1", + "has_output": [ + "nmdc:b2cd0d1a024094fd4e308c21d439ed5f", + "nmdc:6151bacd37618698c28b00151b4998f8", + "nmdc:744277086ab01222a91233536d5e8976", + "nmdc:9c8a359c69bcb1179241f9a3c727fa23", + "nmdc:027b72af172d078f88471d932cf6d473", + "nmdc:ff24990735aa002e828ff7204a456ad2", + "nmdc:e884ad501d1bb3bcf006f0999020ce0f", + "nmdc:8321f818f53371491a7a80ef7e063ca6", + "nmdc:6f799842fe74ebff7942a026dbf9b1bf", + "nmdc:8ee84a629a5899c25e0fbd0f07084530", + "nmdc:6697cdb0b1dcf83e7ecb8fcefa0703ef", + "nmdc:d2990b0bd86e50209dcada6fa6b09510" + ], + "was_informed_by": "gold:Gp0127643", + "id": "nmdc:30cd614596fd2a4b87220523f8a6ff30", + "execution_resource": "NERSC-Cori", + "name": "Annotation Activity for nmdc:mga0evc178", + "started_at_time": "2021-10-11T02:27:00Z", + "type": "nmdc:MetagenomeAnnotationActivity", + "ended_at_time": "2021-10-11T04:04:16+00:00" + } + ], + "metagenome_assembly_set": [ + { + "_id": { + "$oid": "649b005f2ca5ee4adb139fa9" + }, + "has_input": [ + "nmdc:2ef23543e3064ca73c3034713d87c026" + ], + "part_of": [ + "nmdc:mga0evc178" + ], + "ctg_logsum": 258957, + "scaf_logsum": 260132, + "gap_pct": 0.00166, + "git_url": "https://github.com/microbiomedata/metaG/releases/tag/0.1", + "has_output": [ + "nmdc:a3a85f9f946ff34f28dfd4b5f8590f23", + "nmdc:001fd34d98a73eee6be5a41004e67469", + "nmdc:9b45294f72cb55b2f039366d33183fa3", + "nmdc:b2ec4f5a3f02869684bdfaf065d75c54", + "nmdc:fa61e18d49a2012f115d970f0a195986" + ], + "asm_score": 3.329, + "was_informed_by": "gold:Gp0127643", + "ctg_powsum": 27868, + "scaf_max": 12873, + "id": "nmdc:30cd614596fd2a4b87220523f8a6ff30", + "scaf_powsum": 27998, + "execution_resource": "NERSC-Cori", + "contigs": 208967, + "name": "Assembly Activity for nmdc:mga0evc178", + "ctg_max": 12873, + "gc_std": 0.09438, + "contig_bp": 104567589, + "gc_avg": 0.63102, + "started_at_time": "2021-10-11T02:27:00Z", + "scaf_bp": 104569329, + "type": "nmdc:MetagenomeAssembly", + "scaffolds": 208793, + "ended_at_time": "2021-10-11T04:04:16+00:00", + "ctg_l50": 497, + "ctg_l90": 292, + "ctg_n50": 57164, + "ctg_n90": 172414, + "scaf_l50": 498, + "scaf_l90": 292, + "scaf_n50": 56935, + "scaf_n90": 172256 + } + ], + "metagenome_sequencing_activity_set": [], + "metaproteomics_analysis_activity_set": [], + "metatranscriptome_activity_set": [], + "nom_analysis_activity_set": [], + "omics_processing_set": [ + { + "_id": { + "$oid": "649b009773e8249959349b56" + }, + "id": "nmdc:omprc-11-dtsr6z90", + "name": "Riverbed sediment microbial communities from areas with no vegetation in Columbia River, Washington, USA - GW-RW N1_0_10", + "description": "Riverbed sediment samples were collected from an area with no vegetation in a hyporheic zone (subsurface groundwater - surface water mixing zone)", + "has_input": [ + "nmdc:bsm-11-g079t498" + ], + "has_output": [ + "jgi:574fde6c7ded5e3df1ee140c" + ], + "part_of": [ + "nmdc:sty-11-aygzgv51" + ], + "add_date": "2016-01-11", + "mod_date": "2021-06-15", + "ncbi_project_name": "Riverbed sediment microbial communities from areas with no vegetation in Columbia River, Washington, USA - GW-RW N1_0_10", + "omics_type": { + "has_raw_value": "Metagenome" + }, + "principal_investigator": { + "has_raw_value": "James Stegen" + }, + "processing_institution": "JGI", + "type": "nmdc:OmicsProcessing", + "gold_sequencing_project_identifiers": [ + "gold:Gp0127643" + ] + } + ], + "reaction_activity_set": [], + "read_qc_analysis_activity_set": [ + { + "_id": { + "$oid": "649b009d6bdd4fd20273c879" + }, + "has_input": [ + "nmdc:8b553dbdd47b90ed7f55d5747822f5d5" + ], + "part_of": [ + "nmdc:mga0evc178" + ], + "git_url": "https://github.com/microbiomedata/metaG/releases/tag/0.1", + "has_output": [ + "nmdc:2ef23543e3064ca73c3034713d87c026", + "nmdc:87b172ead58a37be8d199c0acfc96759" + ], + "was_informed_by": "gold:Gp0127643", + "input_read_count": 25305566, + "output_read_bases": 3510483777, + "id": "nmdc:30cd614596fd2a4b87220523f8a6ff30", + "execution_resource": "NERSC-Cori", + "input_read_bases": 3821140466, + "name": "Read QC Activity for nmdc:mga0evc178", + "output_read_count": 23508042, + "started_at_time": "2021-10-11T02:27:00Z", + "type": "nmdc:ReadQCAnalysisActivity", + "ended_at_time": "2021-10-11T04:04:16+00:00" + } + ], + "read_based_taxonomy_analysis_activity_set": [ + { + "_id": { + "$oid": "649b009bff710ae353f8cf36" + }, + "has_input": [ + "nmdc:2ef23543e3064ca73c3034713d87c026" + ], + "git_url": "https://github.com/microbiomedata/metaG/releases/tag/0.1", + "has_output": [ + "nmdc:e8f825653e5736e29b73de55bd11a270", + "nmdc:99bb1311b220e9a03da619fe5fb58f0f", + "nmdc:5c97bc15d4d5999f140664b3b2777c6d", + "nmdc:c9074b2e05765afd68463dc301b87995", + "nmdc:ed2c05d1702a9a811b8a98de748bc82a", + "nmdc:6465fe59472b111ead1f0414ccf39f62", + "nmdc:9855ca52bce074c34dcebfd154fa94ff", + "nmdc:ed8059f366d60112deb41a0c307bc6fc", + "nmdc:f98bae155bced880c058ecde7d539c18" + ], + "was_informed_by": "gold:Gp0127643", + "id": "nmdc:30cd614596fd2a4b87220523f8a6ff30", + "execution_resource": "NERSC-Cori", + "name": "ReadBased Analysis Activity for nmdc:mga0evc178", + "started_at_time": "2021-10-11T02:27:00Z", + "type": "nmdc:ReadbasedAnalysis", + "ended_at_time": "2021-10-11T04:04:16+00:00" + } + ], + "study_set": [], + "field_research_site_set": [], + "collecting_biosamples_from_site_set": [], + "date_created": null, + "etl_software_version": null, + "pooling_set": [], + "read_based_analysis_activity_set": [ + { + "_id": { + "$oid": "61e719b6833bcf838a70116b" + }, + "has_input": [ + "nmdc:2ef23543e3064ca73c3034713d87c026" + ], + "part_of": [ + "nmdc:mga0evc178" + ], + "git_url": "https://github.com/microbiomedata/metaG/releases/tag/0.1", + "has_output": [ + "nmdc:e8f825653e5736e29b73de55bd11a270", + "nmdc:99bb1311b220e9a03da619fe5fb58f0f", + "nmdc:5c97bc15d4d5999f140664b3b2777c6d", + "nmdc:c9074b2e05765afd68463dc301b87995", + "nmdc:ed2c05d1702a9a811b8a98de748bc82a", + "nmdc:6465fe59472b111ead1f0414ccf39f62", + "nmdc:9855ca52bce074c34dcebfd154fa94ff", + "nmdc:ed8059f366d60112deb41a0c307bc6fc", + "nmdc:f98bae155bced880c058ecde7d539c18" + ], + "was_informed_by": "gold:Gp0127643", + "id": "nmdc:30cd614596fd2a4b87220523f8a6ff30", + "execution_resource": "NERSC-Cori", + "name": "ReadBased Analysis Activity for nmdc:mga0evc178", + "started_at_time": "2021-10-11T02:27:00Z", + "type": "nmdc:ReadbasedAnalysis", + "ended_at_time": "2021-10-11T04:04:16+00:00" + } + ] + }, + { + "functional_annotation_agg": [], + "library_preparation_set": [], + "processed_sample_set": [], + "extraction_set": [], + "activity_set": [], + "biosample_set": [], + "data_object_set": [ + { + "name": "Gp0127644_Filtered Reads", + "description": "Filtered Reads for Gp0127644", + "data_object_type": "Filtered Sequencing Reads", + "url": "https://data.microbiomedata.org/data/nmdc:mga0bpf635/qa/nmdc_mga0bpf635_filtered.fastq.gz", + "md5_checksum": "98da35678c59689ce738b2a6bc708692", + "id": "nmdc:98da35678c59689ce738b2a6bc708692", + "file_size_bytes": 694199131 + }, + { + "name": "Gp0127644_Filtered Stats", + "description": "Filtered Stats for Gp0127644", + "data_object_type": "QC Statistics", + "url": "https://data.microbiomedata.org/data/nmdc:mga0bpf635/qa/nmdc_mga0bpf635_filterStats.txt", + "md5_checksum": "ff08ea52254e0cc1011c56656505b27b", + "id": "nmdc:ff08ea52254e0cc1011c56656505b27b", + "file_size_bytes": 280 + }, + { + "name": "gold:Gp0452677_Gottcha2 TSV report", + "description": "Gottcha2 TSV report for gold:Gp0452677", + "url": "https://data.microbiomedata.org/data/nmdc:mga0fbpz25/ReadbasedAnalysis/nmdc_mga0fbpz25_gottcha2_report.tsv", + "md5_checksum": "dc2e21becda8d6b010a95897cf97ae90", + "id": "nmdc:dc2e21becda8d6b010a95897cf97ae90", + "file_size_bytes": 109 + }, + { + "name": "Gp0127644_Gottcha2 full TSV report", + "description": "Gottcha2 full TSV report for Gp0127644", + "url": "https://data.microbiomedata.org/data/nmdc:mga0bpf635/ReadbasedAnalysis/nmdc_mga0bpf635_gottcha2_report_full.tsv", + "md5_checksum": "0dd334c92557f3a8ac8c78b437c75eaf", + "id": "nmdc:0dd334c92557f3a8ac8c78b437c75eaf", + "file_size_bytes": 426075 + }, + { + "name": "gold:Gp0452677_Gottcha2 Krona HTML report", + "description": "Gottcha2 Krona HTML report for gold:Gp0452677", + "data_object_type": "GOTTCHA2 Krona Plot", + "url": "https://data.microbiomedata.org/data/nmdc:mga0fbpz25/ReadbasedAnalysis/nmdc_mga0fbpz25_gottcha2_krona.html", + "md5_checksum": "425873a08e598b0ca2987ff7b9b5da1f", + "id": "nmdc:425873a08e598b0ca2987ff7b9b5da1f", + "file_size_bytes": 226638 + }, + { + "name": "Gp0127644_Centrifuge classification TSV report", + "description": "Centrifuge classification TSV report for Gp0127644", + "data_object_type": "Centrifuge Taxonomic Classification", + "url": "https://data.microbiomedata.org/data/nmdc:mga0bpf635/ReadbasedAnalysis/nmdc_mga0bpf635_centrifuge_classification.tsv", + "md5_checksum": "b0f2449065b52935ddba8abd6ae6bc88", + "id": "nmdc:b0f2449065b52935ddba8abd6ae6bc88", + "file_size_bytes": 610862986 + }, + { + "name": "Gp0127644_Centrifuge TSV report", + "description": "Centrifuge TSV report for Gp0127644", + "data_object_type": "Centrifuge Classification Report", + "url": "https://data.microbiomedata.org/data/nmdc:mga0bpf635/ReadbasedAnalysis/nmdc_mga0bpf635_centrifuge_report.tsv", + "md5_checksum": "9baa708296f62334e099cf61711b5e16", + "id": "nmdc:9baa708296f62334e099cf61711b5e16", + "file_size_bytes": 243322 + }, + { + "name": "Gp0127644_Centrifuge Krona HTML report", + "description": "Centrifuge Krona HTML report for Gp0127644", + "data_object_type": "Centrifuge Krona Plot", + "url": "https://data.microbiomedata.org/data/nmdc:mga0bpf635/ReadbasedAnalysis/nmdc_mga0bpf635_centrifuge_krona.html", + "md5_checksum": "f2a43278b06876cae5d4e8cdef17cfe1", + "id": "nmdc:f2a43278b06876cae5d4e8cdef17cfe1", + "file_size_bytes": 2294995 + }, + { + "name": "Gp0127644_Kraken classification TSV report", + "description": "Kraken classification TSV report for Gp0127644", + "data_object_type": "Kraken2 Taxonomic Classification", + "url": "https://data.microbiomedata.org/data/nmdc:mga0bpf635/ReadbasedAnalysis/nmdc_mga0bpf635_kraken2_classification.tsv", + "md5_checksum": "f1a811dbc523f9a27dbc004b8a66f0cb", + "id": "nmdc:f1a811dbc523f9a27dbc004b8a66f0cb", + "file_size_bytes": 487178087 + }, + { + "name": "Gp0127644_Kraken2 TSV report", + "description": "Kraken2 TSV report for Gp0127644", + "data_object_type": "Kraken2 Classification Report", + "url": "https://data.microbiomedata.org/data/nmdc:mga0bpf635/ReadbasedAnalysis/nmdc_mga0bpf635_kraken2_report.tsv", + "md5_checksum": "8983fa1acb03f2905bbec3a6ccee2854", + "id": "nmdc:8983fa1acb03f2905bbec3a6ccee2854", + "file_size_bytes": 557688 + }, + { + "name": "Gp0127644_Kraken2 Krona HTML report", + "description": "Kraken2 Krona HTML report for Gp0127644", + "data_object_type": "Kraken2 Krona Plot", + "url": "https://data.microbiomedata.org/data/nmdc:mga0bpf635/ReadbasedAnalysis/nmdc_mga0bpf635_kraken2_krona.html", + "md5_checksum": "a07c6c5fb68d1a56e39d93e8745b96cb", + "id": "nmdc:a07c6c5fb68d1a56e39d93e8745b96cb", + "file_size_bytes": 3567307 + }, + { + "name": "gold:Gp0452677_Gottcha2 TSV report", + "description": "Gottcha2 TSV report for gold:Gp0452677", + "url": "https://data.microbiomedata.org/data/nmdc:mga0fbpz25/ReadbasedAnalysis/nmdc_mga0fbpz25_gottcha2_report.tsv", + "md5_checksum": "dc2e21becda8d6b010a95897cf97ae90", + "id": "nmdc:dc2e21becda8d6b010a95897cf97ae90", + "file_size_bytes": 109 + }, + { + "name": "Gp0127644_Gottcha2 full TSV report", + "description": "Gottcha2 full TSV report for Gp0127644", + "url": "https://data.microbiomedata.org/data/nmdc:mga0bpf635/ReadbasedAnalysis/nmdc_mga0bpf635_gottcha2_report_full.tsv", + "md5_checksum": "0dd334c92557f3a8ac8c78b437c75eaf", + "id": "nmdc:0dd334c92557f3a8ac8c78b437c75eaf", + "file_size_bytes": 426075 + }, + { + "name": "gold:Gp0452677_Gottcha2 Krona HTML report", + "description": "Gottcha2 Krona HTML report for gold:Gp0452677", + "data_object_type": "GOTTCHA2 Krona Plot", + "url": "https://data.microbiomedata.org/data/nmdc:mga0fbpz25/ReadbasedAnalysis/nmdc_mga0fbpz25_gottcha2_krona.html", + "md5_checksum": "425873a08e598b0ca2987ff7b9b5da1f", + "id": "nmdc:425873a08e598b0ca2987ff7b9b5da1f", + "file_size_bytes": 226638 + }, + { + "name": "Gp0127644_Centrifuge classification TSV report", + "description": "Centrifuge classification TSV report for Gp0127644", + "data_object_type": "Centrifuge Taxonomic Classification", + "url": "https://data.microbiomedata.org/data/nmdc:mga0bpf635/ReadbasedAnalysis/nmdc_mga0bpf635_centrifuge_classification.tsv", + "md5_checksum": "b0f2449065b52935ddba8abd6ae6bc88", + "id": "nmdc:b0f2449065b52935ddba8abd6ae6bc88", + "file_size_bytes": 610862986 + }, + { + "name": "Gp0127644_Centrifuge TSV report", + "description": "Centrifuge TSV report for Gp0127644", + "data_object_type": "Centrifuge Classification Report", + "url": "https://data.microbiomedata.org/data/nmdc:mga0bpf635/ReadbasedAnalysis/nmdc_mga0bpf635_centrifuge_report.tsv", + "md5_checksum": "9baa708296f62334e099cf61711b5e16", + "id": "nmdc:9baa708296f62334e099cf61711b5e16", + "file_size_bytes": 243322 + }, + { + "name": "Gp0127644_Centrifuge Krona HTML report", + "description": "Centrifuge Krona HTML report for Gp0127644", + "data_object_type": "Centrifuge Krona Plot", + "url": "https://data.microbiomedata.org/data/nmdc:mga0bpf635/ReadbasedAnalysis/nmdc_mga0bpf635_centrifuge_krona.html", + "md5_checksum": "f2a43278b06876cae5d4e8cdef17cfe1", + "id": "nmdc:f2a43278b06876cae5d4e8cdef17cfe1", + "file_size_bytes": 2294995 + }, + { + "name": "Gp0127644_Kraken classification TSV report", + "description": "Kraken classification TSV report for Gp0127644", + "data_object_type": "Kraken2 Taxonomic Classification", + "url": "https://data.microbiomedata.org/data/nmdc:mga0bpf635/ReadbasedAnalysis/nmdc_mga0bpf635_kraken2_classification.tsv", + "md5_checksum": "f1a811dbc523f9a27dbc004b8a66f0cb", + "id": "nmdc:f1a811dbc523f9a27dbc004b8a66f0cb", + "file_size_bytes": 487178087 + }, + { + "name": "Gp0127644_Kraken2 TSV report", + "description": "Kraken2 TSV report for Gp0127644", + "data_object_type": "Kraken2 Classification Report", + "url": "https://data.microbiomedata.org/data/nmdc:mga0bpf635/ReadbasedAnalysis/nmdc_mga0bpf635_kraken2_report.tsv", + "md5_checksum": "8983fa1acb03f2905bbec3a6ccee2854", + "id": "nmdc:8983fa1acb03f2905bbec3a6ccee2854", + "file_size_bytes": 557688 + }, + { + "name": "Gp0127644_Kraken2 Krona HTML report", + "description": "Kraken2 Krona HTML report for Gp0127644", + "data_object_type": "Kraken2 Krona Plot", + "url": "https://data.microbiomedata.org/data/nmdc:mga0bpf635/ReadbasedAnalysis/nmdc_mga0bpf635_kraken2_krona.html", + "md5_checksum": "a07c6c5fb68d1a56e39d93e8745b96cb", + "id": "nmdc:a07c6c5fb68d1a56e39d93e8745b96cb", + "file_size_bytes": 3567307 + }, + { + "name": "Gp0127644_Assembled contigs fasta", + "description": "Assembled contigs fasta for Gp0127644", + "data_object_type": "Assembly Contigs", + "url": "https://data.microbiomedata.org/data/nmdc:mga0bpf635/assembly/nmdc_mga0bpf635_contigs.fna", + "md5_checksum": "16f77f4aaed29f3acc31646e1ce06b2d", + "id": "nmdc:16f77f4aaed29f3acc31646e1ce06b2d", + "file_size_bytes": 21881611 + }, + { + "name": "Gp0127644_Assembled scaffold fasta", + "description": "Assembled scaffold fasta for Gp0127644", + "data_object_type": "Assembly Scaffolds", + "url": "https://data.microbiomedata.org/data/nmdc:mga0bpf635/assembly/nmdc_mga0bpf635_scaffolds.fna", + "md5_checksum": "b6afa25cadc614083204383bbad06f48", + "id": "nmdc:b6afa25cadc614083204383bbad06f48", + "file_size_bytes": 21742982 + }, + { + "name": "Gp0127644_Metagenome Contig Coverage Stats", + "description": "Metagenome Contig Coverage Stats for Gp0127644", + "url": "https://data.microbiomedata.org/data/nmdc:mga0bpf635/assembly/nmdc_mga0bpf635_covstats.txt", + "md5_checksum": "87b1ea13d41499eeb5eb67932db01423", + "id": "nmdc:87b1ea13d41499eeb5eb67932db01423", + "file_size_bytes": 3612085 + }, + { + "name": "Gp0127644_Assembled AGP file", + "description": "Assembled AGP file for Gp0127644", + "data_object_type": "Assembly AGP", + "url": "https://data.microbiomedata.org/data/nmdc:mga0bpf635/assembly/nmdc_mga0bpf635_assembly.agp", + "md5_checksum": "72a38c353753abcb6d046385bf2950f6", + "id": "nmdc:72a38c353753abcb6d046385bf2950f6", + "file_size_bytes": 3350598 + }, + { + "name": "Gp0127644_Metagenome Alignment BAM file", + "description": "Metagenome Alignment BAM file for Gp0127644", + "data_object_type": "Assembly Coverage BAM", + "url": "https://data.microbiomedata.org/data/nmdc:mga0bpf635/assembly/nmdc_mga0bpf635_pairedMapped_sorted.bam", + "md5_checksum": "ecf498b9aa15f9d000845ffdfa7eb521", + "id": "nmdc:ecf498b9aa15f9d000845ffdfa7eb521", + "file_size_bytes": 746781339 + }, + { + "name": "Gp0127644_Protein FAA", + "description": "Protein FAA for Gp0127644", + "data_object_type": "Annotation Amino Acid FASTA", + "url": "https://data.microbiomedata.org/data/nmdc:mga0bpf635/annotation/nmdc_mga0bpf635_proteins.faa", + "md5_checksum": "9d960cad4d88795aba8bb1acbe415fc9", + "id": "nmdc:9d960cad4d88795aba8bb1acbe415fc9", + "file_size_bytes": 12848136 + }, + { + "name": "Gp0127644_Structural annotation GFF file", + "description": "Structural annotation GFF file for Gp0127644", + "data_object_type": "Structural Annotation GFF", + "url": "https://data.microbiomedata.org/data/nmdc:mga0bpf635/annotation/nmdc_mga0bpf635_structural_annotation.gff", + "md5_checksum": "cb5d98ee6e459ce1cc2d14295424eef1", + "id": "nmdc:cb5d98ee6e459ce1cc2d14295424eef1", + "file_size_bytes": 2488 + }, + { + "name": "Gp0127644_Functional annotation GFF file", + "description": "Functional annotation GFF file for Gp0127644", + "data_object_type": "Functional Annotation GFF", + "url": "https://data.microbiomedata.org/data/nmdc:mga0bpf635/annotation/nmdc_mga0bpf635_functional_annotation.gff", + "md5_checksum": "349838000a53b6655a5b12edf6351c50", + "id": "nmdc:349838000a53b6655a5b12edf6351c50", + "file_size_bytes": 15112193 + }, + { + "name": "Gp0127644_KO TSV file", + "description": "KO TSV file for Gp0127644", + "data_object_type": "Annotation KEGG Orthology", + "url": "https://data.microbiomedata.org/data/nmdc:mga0bpf635/annotation/nmdc_mga0bpf635_ko.tsv", + "md5_checksum": "7bb072409221978dbea8ff5cb0bdba1e", + "id": "nmdc:7bb072409221978dbea8ff5cb0bdba1e", + "file_size_bytes": 1814299 + }, + { + "name": "Gp0127644_EC TSV file", + "description": "EC TSV file for Gp0127644", + "data_object_type": "Annotation Enzyme Commission", + "url": "https://data.microbiomedata.org/data/nmdc:mga0bpf635/annotation/nmdc_mga0bpf635_ec.tsv", + "md5_checksum": "3d69ade973d1652bd6f061b2122ffe36", + "id": "nmdc:3d69ade973d1652bd6f061b2122ffe36", + "file_size_bytes": 1233948 + }, + { + "name": "Gp0127644_COG GFF file", + "description": "COG GFF file for Gp0127644", + "url": "https://data.microbiomedata.org/data/nmdc:mga0bpf635/annotation/nmdc_mga0bpf635_cog.gff", + "md5_checksum": "2a9b9a21fe5fb84219e0be5f153665be", + "id": "nmdc:2a9b9a21fe5fb84219e0be5f153665be", + "file_size_bytes": 9028987 + }, + { + "name": "Gp0127644_PFAM GFF file", + "description": "PFAM GFF file for Gp0127644", + "url": "https://data.microbiomedata.org/data/nmdc:mga0bpf635/annotation/nmdc_mga0bpf635_pfam.gff", + "md5_checksum": "83e64b9fc9406a72d18e8dd4742bac1a", + "id": "nmdc:83e64b9fc9406a72d18e8dd4742bac1a", + "file_size_bytes": 6574998 + }, + { + "name": "Gp0127644_TigrFam GFF file", + "description": "TigrFam GFF file for Gp0127644", + "url": "https://data.microbiomedata.org/data/nmdc:mga0bpf635/annotation/nmdc_mga0bpf635_tigrfam.gff", + "md5_checksum": "cdc4cc8629b7c61f1708f654aaaa9932", + "id": "nmdc:cdc4cc8629b7c61f1708f654aaaa9932", + "file_size_bytes": 783908 + }, + { + "name": "Gp0127644_SMART GFF file", + "description": "SMART GFF file for Gp0127644", + "url": "https://data.microbiomedata.org/data/nmdc:mga0bpf635/annotation/nmdc_mga0bpf635_smart.gff", + "md5_checksum": "f8d79375a2bf82f257e0015efeee6f26", + "id": "nmdc:f8d79375a2bf82f257e0015efeee6f26", + "file_size_bytes": 2030043 + }, + { + "name": "Gp0127644_SuperFam GFF file", + "description": "SuperFam GFF file for Gp0127644", + "url": "https://data.microbiomedata.org/data/nmdc:mga0bpf635/annotation/nmdc_mga0bpf635_supfam.gff", + "md5_checksum": "c9b4806132d19e740822b1a84bc4f07d", + "id": "nmdc:c9b4806132d19e740822b1a84bc4f07d", + "file_size_bytes": 11227652 + }, + { + "name": "Gp0127644_Cath FunFam GFF file", + "description": "Cath FunFam GFF file for Gp0127644", + "url": "https://data.microbiomedata.org/data/nmdc:mga0bpf635/annotation/nmdc_mga0bpf635_cath_funfam.gff", + "md5_checksum": "e304e10eb60423c23486e140594d1a7b", + "id": "nmdc:e304e10eb60423c23486e140594d1a7b", + "file_size_bytes": 8555821 + }, + { + "name": "Gp0127644_KO_EC GFF file", + "description": "KO_EC GFF file for Gp0127644", + "url": "https://data.microbiomedata.org/data/nmdc:mga0bpf635/annotation/nmdc_mga0bpf635_ko_ec.gff", + "md5_checksum": "9b78f0ac527ee7287ae532a896582948", + "id": "nmdc:9b78f0ac527ee7287ae532a896582948", + "file_size_bytes": 5791094 + }, + { + "name": "gold:Gp0452679_metabat2 bin checkm quality assessment result", + "description": "metabat2 bin checkm quality assessment result for gold:Gp0452679", + "data_object_type": "CheckM Statistics", + "url": "https://data.microbiomedata.org/data/nmdc:mga0fsjy84/MAGs/nmdc_mga0fsjy84_checkm_qa.out", + "md5_checksum": "d41d8cd98f00b204e9800998ecf8427e", + "id": "nmdc:d41d8cd98f00b204e9800998ecf8427e", + "file_size_bytes": 0 + }, + { + "name": "Gp0127644_tooShort (< 3kb) filtered contigs fasta file by metaBat2", + "description": "tooShort (< 3kb) filtered contigs fasta file by metaBat2 for Gp0127644", + "url": "https://data.microbiomedata.org/data/nmdc:mga0bpf635/MAGs/nmdc_mga0bpf635_bins.tooShort.fa", + "md5_checksum": "4857d71459f50147c8ae97ffce40caa5", + "id": "nmdc:4857d71459f50147c8ae97ffce40caa5", + "file_size_bytes": 18310651 + }, + { + "name": "Gp0127644_unbinned fasta file from metabat2", + "description": "unbinned fasta file from metabat2 for Gp0127644", + "url": "https://data.microbiomedata.org/data/nmdc:mga0bpf635/MAGs/nmdc_mga0bpf635_bins.unbinned.fa", + "md5_checksum": "65522bf77241109a74354d0e294597f9", + "id": "nmdc:65522bf77241109a74354d0e294597f9", + "file_size_bytes": 2858628 + }, + { + "name": "Gp0127644_metabat2 bin checkm quality assessment result", + "description": "metabat2 bin checkm quality assessment result for Gp0127644", + "data_object_type": "CheckM Statistics", + "url": "https://data.microbiomedata.org/data/nmdc:mga0bpf635/MAGs/nmdc_mga0bpf635_checkm_qa.out", + "md5_checksum": "30d6c9fb23abb0849991fad01e0393f1", + "id": "nmdc:30d6c9fb23abb0849991fad01e0393f1", + "file_size_bytes": 760 + }, + { + "name": "Gp0127644_high-quality and medium-quality bins", + "description": "high-quality and medium-quality bins for Gp0127644", + "data_object_type": "Metagenome Bins", + "url": "https://data.microbiomedata.org/data/nmdc:mga0bpf635/MAGs/nmdc_mga0bpf635_hqmq_bin.zip", + "md5_checksum": "a76c8c9034b877334a75e7c0b7c2c830", + "id": "nmdc:a76c8c9034b877334a75e7c0b7c2c830", + "file_size_bytes": 182 + }, + { + "name": "Gp0127644_metabat2 bins", + "description": "metabat2 bins for Gp0127644", + "url": "https://data.microbiomedata.org/data/nmdc:mga0bpf635/MAGs/nmdc_mga0bpf635_metabat_bin.zip", + "md5_checksum": "9d712c5924d6d0ee6d7305918e69302d", + "id": "nmdc:9d712c5924d6d0ee6d7305918e69302d", + "file_size_bytes": 218004 + } + ], + "dissolving_activity_set": [], + "functional_annotation_set": [], + "genome_feature_set": [], + "mags_activity_set": [ + { + "_id": { + "$oid": "649b0052ec087f6bbab34714" + }, + "has_input": [ + "nmdc:16f77f4aaed29f3acc31646e1ce06b2d", + "nmdc:ecf498b9aa15f9d000845ffdfa7eb521", + "nmdc:349838000a53b6655a5b12edf6351c50" + ], + "too_short_contig_num": 44192, + "part_of": [ + "nmdc:mga0bpf635" + ], + "binned_contig_num": 157, + "git_url": "https://github.com/microbiomedata/metaG/releases/tag/0.1", + "has_output": [ + "nmdc:d41d8cd98f00b204e9800998ecf8427e", + "nmdc:4857d71459f50147c8ae97ffce40caa5", + "nmdc:65522bf77241109a74354d0e294597f9", + "nmdc:30d6c9fb23abb0849991fad01e0393f1", + "nmdc:a76c8c9034b877334a75e7c0b7c2c830", + "nmdc:9d712c5924d6d0ee6d7305918e69302d" + ], + "was_informed_by": "gold:Gp0127644", + "input_contig_num": 46135, + "id": "nmdc:9990ae6d30bab3988f258ae8224acd89", + "execution_resource": "NERSC-Cori", + "name": "MAGs Analysis Activity for nmdc:mga0bpf635", + "mags_list": [ + { + "number_of_contig": 157, + "completeness": 39.0, + "bin_name": "bins.1", + "gene_count": 891, + "bin_quality": "LQ", + "gtdbtk_species": "", + "gtdbtk_order": "", + "num_16s": 1, + "gtdbtk_family": "", + "gtdbtk_domain": "", + "contamination": 0.0, + "gtdbtk_class": "", + "gtdbtk_phylum": "", + "num_5s": 0, + "num_23s": 0, + "gtdbtk_genus": "", + "num_t_rna": 19 + } + ], + "unbinned_contig_num": 1786, + "started_at_time": "2021-10-11T02:26:47Z", + "type": "nmdc:MAGsAnalysisActivity", + "ended_at_time": "2021-10-11T02:55:00+00:00" + } + ], + "material_sample_set": [], + "material_sampling_activity_set": [], + "metabolomics_analysis_activity_set": [], + "metagenome_annotation_activity_set": [ + { + "_id": { + "$oid": "649b005bbf2caae0415ef9b4" + }, + "has_input": [ + "nmdc:16f77f4aaed29f3acc31646e1ce06b2d" + ], + "part_of": [ + "nmdc:mga0bpf635" + ], + "git_url": "https://github.com/microbiomedata/metaG/releases/tag/0.1", + "has_output": [ + "nmdc:9d960cad4d88795aba8bb1acbe415fc9", + "nmdc:cb5d98ee6e459ce1cc2d14295424eef1", + "nmdc:349838000a53b6655a5b12edf6351c50", + "nmdc:7bb072409221978dbea8ff5cb0bdba1e", + "nmdc:3d69ade973d1652bd6f061b2122ffe36", + "nmdc:2a9b9a21fe5fb84219e0be5f153665be", + "nmdc:83e64b9fc9406a72d18e8dd4742bac1a", + "nmdc:cdc4cc8629b7c61f1708f654aaaa9932", + "nmdc:f8d79375a2bf82f257e0015efeee6f26", + "nmdc:c9b4806132d19e740822b1a84bc4f07d", + "nmdc:e304e10eb60423c23486e140594d1a7b", + "nmdc:9b78f0ac527ee7287ae532a896582948" + ], + "was_informed_by": "gold:Gp0127644", + "id": "nmdc:9990ae6d30bab3988f258ae8224acd89", + "execution_resource": "NERSC-Cori", + "name": "Annotation Activity for nmdc:mga0bpf635", + "started_at_time": "2021-10-11T02:26:47Z", + "type": "nmdc:MetagenomeAnnotationActivity", + "ended_at_time": "2021-10-11T02:55:00+00:00" + } + ], + "metagenome_assembly_set": [ + { + "_id": { + "$oid": "649b005f2ca5ee4adb139fa1" + }, + "has_input": [ + "nmdc:98da35678c59689ce738b2a6bc708692" + ], + "part_of": [ + "nmdc:mga0bpf635" + ], + "ctg_logsum": 37962, + "scaf_logsum": 38062, + "gap_pct": 0.00069, + "git_url": "https://github.com/microbiomedata/metaG/releases/tag/0.1", + "has_output": [ + "nmdc:16f77f4aaed29f3acc31646e1ce06b2d", + "nmdc:b6afa25cadc614083204383bbad06f48", + "nmdc:87b1ea13d41499eeb5eb67932db01423", + "nmdc:72a38c353753abcb6d046385bf2950f6", + "nmdc:ecf498b9aa15f9d000845ffdfa7eb521" + ], + "asm_score": 3.712, + "was_informed_by": "gold:Gp0127644", + "ctg_powsum": 4162.045, + "scaf_max": 11252, + "id": "nmdc:9990ae6d30bab3988f258ae8224acd89", + "scaf_powsum": 4172.955, + "execution_resource": "NERSC-Cori", + "contigs": 46135, + "name": "Assembly Activity for nmdc:mga0bpf635", + "ctg_max": 11252, + "gc_std": 0.09328, + "contig_bp": 20152503, + "gc_avg": 0.6086, + "started_at_time": "2021-10-11T02:26:47Z", + "scaf_bp": 20152643, + "type": "nmdc:MetagenomeAssembly", + "scaffolds": 46121, + "ended_at_time": "2021-10-11T02:55:00+00:00", + "ctg_l50": 394, + "ctg_l90": 285, + "ctg_n50": 14034, + "ctg_n90": 39639, + "scaf_l50": 395, + "scaf_l90": 285, + "scaf_n50": 13959, + "scaf_n90": 39626 + } + ], + "metagenome_sequencing_activity_set": [], + "metaproteomics_analysis_activity_set": [], + "metatranscriptome_activity_set": [], + "nom_analysis_activity_set": [], + "omics_processing_set": [ + { + "_id": { + "$oid": "649b009773e8249959349b57" + }, + "id": "nmdc:omprc-11-hwadfm25", + "name": "Riverbed sediment microbial communities from areas with vegetation nearby in Columbia River, Washington, USA - GW-RW S3_30_40", + "description": "Riverbed sediment samples were collected from an area with a lot of surrounding vegetation in a hyporheic zone (subsurface groundwater - surface water mixing zone)", + "has_input": [ + "nmdc:bsm-11-n80sx618" + ], + "has_output": [ + "jgi:574fe0a87ded5e3df1ee148e" + ], + "part_of": [ + "nmdc:sty-11-aygzgv51" + ], + "add_date": "2016-01-11", + "mod_date": "2021-06-15", + "ncbi_project_name": "Riverbed sediment microbial communities from areas with vegetation nearby in Columbia River, Washington, USA - GW-RW S3_30_40", + "omics_type": { + "has_raw_value": "Metagenome" + }, + "principal_investigator": { + "has_raw_value": "James Stegen" + }, + "processing_institution": "JGI", + "type": "nmdc:OmicsProcessing", + "gold_sequencing_project_identifiers": [ + "gold:Gp0127644" + ] + } + ], + "reaction_activity_set": [], + "read_qc_analysis_activity_set": [ + { + "_id": { + "$oid": "649b009d6bdd4fd20273c86b" + }, + "has_input": [ + "nmdc:a1d8fff4b02719c4d0f9c442cf052f69" + ], + "part_of": [ + "nmdc:mga0bpf635" + ], + "git_url": "https://github.com/microbiomedata/metaG/releases/tag/0.1", + "has_output": [ + "nmdc:98da35678c59689ce738b2a6bc708692", + "nmdc:ff08ea52254e0cc1011c56656505b27b" + ], + "was_informed_by": "gold:Gp0127644", + "input_read_count": 11431762, + "output_read_bases": 1245433047, + "id": "nmdc:9990ae6d30bab3988f258ae8224acd89", + "execution_resource": "NERSC-Cori", + "input_read_bases": 1726196062, + "name": "Read QC Activity for nmdc:mga0bpf635", + "output_read_count": 8322164, + "started_at_time": "2021-10-11T02:26:47Z", + "type": "nmdc:ReadQCAnalysisActivity", + "ended_at_time": "2021-10-11T02:55:00+00:00" + } + ], + "read_based_taxonomy_analysis_activity_set": [ + { + "_id": { + "$oid": "649b009bff710ae353f8cf38" + }, + "has_input": [ + "nmdc:98da35678c59689ce738b2a6bc708692" + ], + "git_url": "https://github.com/microbiomedata/metaG/releases/tag/0.1", + "has_output": [ + "nmdc:dc2e21becda8d6b010a95897cf97ae90", + "nmdc:0dd334c92557f3a8ac8c78b437c75eaf", + "nmdc:425873a08e598b0ca2987ff7b9b5da1f", + "nmdc:b0f2449065b52935ddba8abd6ae6bc88", + "nmdc:9baa708296f62334e099cf61711b5e16", + "nmdc:f2a43278b06876cae5d4e8cdef17cfe1", + "nmdc:f1a811dbc523f9a27dbc004b8a66f0cb", + "nmdc:8983fa1acb03f2905bbec3a6ccee2854", + "nmdc:a07c6c5fb68d1a56e39d93e8745b96cb" + ], + "was_informed_by": "gold:Gp0127644", + "id": "nmdc:9990ae6d30bab3988f258ae8224acd89", + "execution_resource": "NERSC-Cori", + "name": "ReadBased Analysis Activity for nmdc:mga0bpf635", + "started_at_time": "2021-10-11T02:26:47Z", + "type": "nmdc:ReadbasedAnalysis", + "ended_at_time": "2021-10-11T02:55:00+00:00" + } + ], + "study_set": [], + "field_research_site_set": [], + "collecting_biosamples_from_site_set": [], + "date_created": null, + "etl_software_version": null, + "pooling_set": [], + "read_based_analysis_activity_set": [ + { + "_id": { + "$oid": "61e7197c833bcf838a700966" + }, + "has_input": [ + "nmdc:98da35678c59689ce738b2a6bc708692" + ], + "part_of": [ + "nmdc:mga0bpf635" + ], + "git_url": "https://github.com/microbiomedata/metaG/releases/tag/0.1", + "has_output": [ + "nmdc:dc2e21becda8d6b010a95897cf97ae90", + "nmdc:0dd334c92557f3a8ac8c78b437c75eaf", + "nmdc:425873a08e598b0ca2987ff7b9b5da1f", + "nmdc:b0f2449065b52935ddba8abd6ae6bc88", + "nmdc:9baa708296f62334e099cf61711b5e16", + "nmdc:f2a43278b06876cae5d4e8cdef17cfe1", + "nmdc:f1a811dbc523f9a27dbc004b8a66f0cb", + "nmdc:8983fa1acb03f2905bbec3a6ccee2854", + "nmdc:a07c6c5fb68d1a56e39d93e8745b96cb" + ], + "was_informed_by": "gold:Gp0127644", + "id": "nmdc:9990ae6d30bab3988f258ae8224acd89", + "execution_resource": "NERSC-Cori", + "name": "ReadBased Analysis Activity for nmdc:mga0bpf635", + "started_at_time": "2021-10-11T02:26:47Z", + "type": "nmdc:ReadbasedAnalysis", + "ended_at_time": "2021-10-11T02:55:00+00:00" + } + ] + }, + { + "functional_annotation_agg": [], + "library_preparation_set": [], + "processed_sample_set": [], + "extraction_set": [], + "activity_set": [], + "biosample_set": [], + "data_object_set": [ + { + "name": "Gp0127639_Filtered Reads", + "description": "Filtered Reads for Gp0127639", + "data_object_type": "Filtered Sequencing Reads", + "url": "https://data.microbiomedata.org/data/nmdc:mga09wpw60/qa/nmdc_mga09wpw60_filtered.fastq.gz", + "md5_checksum": "833077b40372c6daa20beaed04ed0ae1", + "id": "nmdc:833077b40372c6daa20beaed04ed0ae1", + "file_size_bytes": 1585232805 + }, + { + "name": "Gp0127639_Filtered Stats", + "description": "Filtered Stats for Gp0127639", + "data_object_type": "QC Statistics", + "url": "https://data.microbiomedata.org/data/nmdc:mga09wpw60/qa/nmdc_mga09wpw60_filterStats.txt", + "md5_checksum": "b68178eebde030fad0850797adbb2624", + "id": "nmdc:b68178eebde030fad0850797adbb2624", + "file_size_bytes": 289 + }, + { + "name": "Gp0127639_Gottcha2 TSV report", + "description": "Gottcha2 TSV report for Gp0127639", + "url": "https://data.microbiomedata.org/data/nmdc:mga09wpw60/ReadbasedAnalysis/nmdc_mga09wpw60_gottcha2_report.tsv", + "md5_checksum": "514172bb91ef3b125ae2d001b47bff0b", + "id": "nmdc:514172bb91ef3b125ae2d001b47bff0b", + "file_size_bytes": 648 + }, + { + "name": "Gp0127639_Gottcha2 full TSV report", + "description": "Gottcha2 full TSV report for Gp0127639", + "url": "https://data.microbiomedata.org/data/nmdc:mga09wpw60/ReadbasedAnalysis/nmdc_mga09wpw60_gottcha2_report_full.tsv", + "md5_checksum": "82f072d1931154fbc722531d3d0dc41c", + "id": "nmdc:82f072d1931154fbc722531d3d0dc41c", + "file_size_bytes": 588644 + }, + { + "name": "Gp0127639_Gottcha2 Krona HTML report", + "description": "Gottcha2 Krona HTML report for Gp0127639", + "data_object_type": "GOTTCHA2 Krona Plot", + "url": "https://data.microbiomedata.org/data/nmdc:mga09wpw60/ReadbasedAnalysis/nmdc_mga09wpw60_gottcha2_krona.html", + "md5_checksum": "62a817ebcbfaf2c8feb1abedc35a736f", + "id": "nmdc:62a817ebcbfaf2c8feb1abedc35a736f", + "file_size_bytes": 228175 + }, + { + "name": "Gp0127639_Centrifuge classification TSV report", + "description": "Centrifuge classification TSV report for Gp0127639", + "data_object_type": "Centrifuge Taxonomic Classification", + "url": "https://data.microbiomedata.org/data/nmdc:mga09wpw60/ReadbasedAnalysis/nmdc_mga09wpw60_centrifuge_classification.tsv", + "md5_checksum": "81281fef2c0778516a84b3a672cc0230", + "id": "nmdc:81281fef2c0778516a84b3a672cc0230", + "file_size_bytes": 1468498728 + }, + { + "name": "Gp0127639_Centrifuge TSV report", + "description": "Centrifuge TSV report for Gp0127639", + "data_object_type": "Centrifuge Classification Report", + "url": "https://data.microbiomedata.org/data/nmdc:mga09wpw60/ReadbasedAnalysis/nmdc_mga09wpw60_centrifuge_report.tsv", + "md5_checksum": "86ae054ba9def1126579c8f76db8a07a", + "id": "nmdc:86ae054ba9def1126579c8f76db8a07a", + "file_size_bytes": 251338 + }, + { + "name": "Gp0127639_Centrifuge Krona HTML report", + "description": "Centrifuge Krona HTML report for Gp0127639", + "data_object_type": "Centrifuge Krona Plot", + "url": "https://data.microbiomedata.org/data/nmdc:mga09wpw60/ReadbasedAnalysis/nmdc_mga09wpw60_centrifuge_krona.html", + "md5_checksum": "9db20a88fa3d02eb00f64d1671ef8521", + "id": "nmdc:9db20a88fa3d02eb00f64d1671ef8521", + "file_size_bytes": 2322720 + }, + { + "name": "Gp0127639_Kraken classification TSV report", + "description": "Kraken classification TSV report for Gp0127639", + "data_object_type": "Kraken2 Taxonomic Classification", + "url": "https://data.microbiomedata.org/data/nmdc:mga09wpw60/ReadbasedAnalysis/nmdc_mga09wpw60_kraken2_classification.tsv", + "md5_checksum": "848fc10ed4365047cb139a4b40303808", + "id": "nmdc:848fc10ed4365047cb139a4b40303808", + "file_size_bytes": 1168015909 + }, + { + "name": "Gp0127639_Kraken2 TSV report", + "description": "Kraken2 TSV report for Gp0127639", + "data_object_type": "Kraken2 Classification Report", + "url": "https://data.microbiomedata.org/data/nmdc:mga09wpw60/ReadbasedAnalysis/nmdc_mga09wpw60_kraken2_report.tsv", + "md5_checksum": "94e422e0bae86c608fba1c3815e08e92", + "id": "nmdc:94e422e0bae86c608fba1c3815e08e92", + "file_size_bytes": 616202 + }, + { + "name": "Gp0127639_Kraken2 Krona HTML report", + "description": "Kraken2 Krona HTML report for Gp0127639", + "data_object_type": "Kraken2 Krona Plot", + "url": "https://data.microbiomedata.org/data/nmdc:mga09wpw60/ReadbasedAnalysis/nmdc_mga09wpw60_kraken2_krona.html", + "md5_checksum": "c6eb85143a2489921c53f8184d536129", + "id": "nmdc:c6eb85143a2489921c53f8184d536129", + "file_size_bytes": 3863456 + }, + { + "name": "Gp0127639_Gottcha2 TSV report", + "description": "Gottcha2 TSV report for Gp0127639", + "url": "https://data.microbiomedata.org/data/nmdc:mga09wpw60/ReadbasedAnalysis/nmdc_mga09wpw60_gottcha2_report.tsv", + "md5_checksum": "514172bb91ef3b125ae2d001b47bff0b", + "id": "nmdc:514172bb91ef3b125ae2d001b47bff0b", + "file_size_bytes": 648 + }, + { + "name": "Gp0127639_Gottcha2 full TSV report", + "description": "Gottcha2 full TSV report for Gp0127639", + "url": "https://data.microbiomedata.org/data/nmdc:mga09wpw60/ReadbasedAnalysis/nmdc_mga09wpw60_gottcha2_report_full.tsv", + "md5_checksum": "82f072d1931154fbc722531d3d0dc41c", + "id": "nmdc:82f072d1931154fbc722531d3d0dc41c", + "file_size_bytes": 588644 + }, + { + "name": "Gp0127639_Gottcha2 Krona HTML report", + "description": "Gottcha2 Krona HTML report for Gp0127639", + "data_object_type": "GOTTCHA2 Krona Plot", + "url": "https://data.microbiomedata.org/data/nmdc:mga09wpw60/ReadbasedAnalysis/nmdc_mga09wpw60_gottcha2_krona.html", + "md5_checksum": "62a817ebcbfaf2c8feb1abedc35a736f", + "id": "nmdc:62a817ebcbfaf2c8feb1abedc35a736f", + "file_size_bytes": 228175 + }, + { + "name": "Gp0127639_Centrifuge classification TSV report", + "description": "Centrifuge classification TSV report for Gp0127639", + "data_object_type": "Centrifuge Taxonomic Classification", + "url": "https://data.microbiomedata.org/data/nmdc:mga09wpw60/ReadbasedAnalysis/nmdc_mga09wpw60_centrifuge_classification.tsv", + "md5_checksum": "81281fef2c0778516a84b3a672cc0230", + "id": "nmdc:81281fef2c0778516a84b3a672cc0230", + "file_size_bytes": 1468498728 + }, + { + "name": "Gp0127639_Centrifuge TSV report", + "description": "Centrifuge TSV report for Gp0127639", + "data_object_type": "Centrifuge Classification Report", + "url": "https://data.microbiomedata.org/data/nmdc:mga09wpw60/ReadbasedAnalysis/nmdc_mga09wpw60_centrifuge_report.tsv", + "md5_checksum": "86ae054ba9def1126579c8f76db8a07a", + "id": "nmdc:86ae054ba9def1126579c8f76db8a07a", + "file_size_bytes": 251338 + }, + { + "name": "Gp0127639_Centrifuge Krona HTML report", + "description": "Centrifuge Krona HTML report for Gp0127639", + "data_object_type": "Centrifuge Krona Plot", + "url": "https://data.microbiomedata.org/data/nmdc:mga09wpw60/ReadbasedAnalysis/nmdc_mga09wpw60_centrifuge_krona.html", + "md5_checksum": "9db20a88fa3d02eb00f64d1671ef8521", + "id": "nmdc:9db20a88fa3d02eb00f64d1671ef8521", + "file_size_bytes": 2322720 + }, + { + "name": "Gp0127639_Kraken classification TSV report", + "description": "Kraken classification TSV report for Gp0127639", + "data_object_type": "Kraken2 Taxonomic Classification", + "url": "https://data.microbiomedata.org/data/nmdc:mga09wpw60/ReadbasedAnalysis/nmdc_mga09wpw60_kraken2_classification.tsv", + "md5_checksum": "848fc10ed4365047cb139a4b40303808", + "id": "nmdc:848fc10ed4365047cb139a4b40303808", + "file_size_bytes": 1168015909 + }, + { + "name": "Gp0127639_Kraken2 TSV report", + "description": "Kraken2 TSV report for Gp0127639", + "data_object_type": "Kraken2 Classification Report", + "url": "https://data.microbiomedata.org/data/nmdc:mga09wpw60/ReadbasedAnalysis/nmdc_mga09wpw60_kraken2_report.tsv", + "md5_checksum": "94e422e0bae86c608fba1c3815e08e92", + "id": "nmdc:94e422e0bae86c608fba1c3815e08e92", + "file_size_bytes": 616202 + }, + { + "name": "Gp0127639_Kraken2 Krona HTML report", + "description": "Kraken2 Krona HTML report for Gp0127639", + "data_object_type": "Kraken2 Krona Plot", + "url": "https://data.microbiomedata.org/data/nmdc:mga09wpw60/ReadbasedAnalysis/nmdc_mga09wpw60_kraken2_krona.html", + "md5_checksum": "c6eb85143a2489921c53f8184d536129", + "id": "nmdc:c6eb85143a2489921c53f8184d536129", + "file_size_bytes": 3863456 + }, + { + "name": "Gp0127639_Assembled contigs fasta", + "description": "Assembled contigs fasta for Gp0127639", + "data_object_type": "Assembly Contigs", + "url": "https://data.microbiomedata.org/data/nmdc:mga09wpw60/assembly/nmdc_mga09wpw60_contigs.fna", + "md5_checksum": "2b73310c6eef1ece5bb01f235b22fdbd", + "id": "nmdc:2b73310c6eef1ece5bb01f235b22fdbd", + "file_size_bytes": 120497476 + }, + { + "name": "Gp0127639_Assembled scaffold fasta", + "description": "Assembled scaffold fasta for Gp0127639", + "data_object_type": "Assembly Scaffolds", + "url": "https://data.microbiomedata.org/data/nmdc:mga09wpw60/assembly/nmdc_mga09wpw60_scaffolds.fna", + "md5_checksum": "8f14c016997dd96f70f547df930717be", + "id": "nmdc:8f14c016997dd96f70f547df930717be", + "file_size_bytes": 119857107 + }, + { + "name": "Gp0127639_Metagenome Contig Coverage Stats", + "description": "Metagenome Contig Coverage Stats for Gp0127639", + "url": "https://data.microbiomedata.org/data/nmdc:mga09wpw60/assembly/nmdc_mga09wpw60_covstats.txt", + "md5_checksum": "5966e5e32744a14549b19b4c92a606a5", + "id": "nmdc:5966e5e32744a14549b19b4c92a606a5", + "file_size_bytes": 16872665 + }, + { + "name": "Gp0127639_Assembled AGP file", + "description": "Assembled AGP file for Gp0127639", + "data_object_type": "Assembly AGP", + "url": "https://data.microbiomedata.org/data/nmdc:mga09wpw60/assembly/nmdc_mga09wpw60_assembly.agp", + "md5_checksum": "1fcd489b3ae86a76bf297cc19b50392d", + "id": "nmdc:1fcd489b3ae86a76bf297cc19b50392d", + "file_size_bytes": 15768901 + }, + { + "name": "Gp0127639_Metagenome Alignment BAM file", + "description": "Metagenome Alignment BAM file for Gp0127639", + "data_object_type": "Assembly Coverage BAM", + "url": "https://data.microbiomedata.org/data/nmdc:mga09wpw60/assembly/nmdc_mga09wpw60_pairedMapped_sorted.bam", + "md5_checksum": "5b90d13539ce840980db101fa7c1df96", + "id": "nmdc:5b90d13539ce840980db101fa7c1df96", + "file_size_bytes": 1779135536 + }, + { + "name": "Gp0127639_Protein FAA", + "description": "Protein FAA for Gp0127639", + "data_object_type": "Annotation Amino Acid FASTA", + "url": "https://data.microbiomedata.org/data/nmdc:mga09wpw60/annotation/nmdc_mga09wpw60_proteins.faa", + "md5_checksum": "6c09d55cfb8872b30eb1832394f80beb", + "id": "nmdc:6c09d55cfb8872b30eb1832394f80beb", + "file_size_bytes": 67573912 + }, + { + "name": "Gp0127639_Structural annotation GFF file", + "description": "Structural annotation GFF file for Gp0127639", + "data_object_type": "Structural Annotation GFF", + "url": "https://data.microbiomedata.org/data/nmdc:mga09wpw60/annotation/nmdc_mga09wpw60_structural_annotation.gff", + "md5_checksum": "2e3cc72d21590667259f6356882ce63b", + "id": "nmdc:2e3cc72d21590667259f6356882ce63b", + "file_size_bytes": 2526 + }, + { + "name": "Gp0127639_Functional annotation GFF file", + "description": "Functional annotation GFF file for Gp0127639", + "data_object_type": "Functional Annotation GFF", + "url": "https://data.microbiomedata.org/data/nmdc:mga09wpw60/annotation/nmdc_mga09wpw60_functional_annotation.gff", + "md5_checksum": "2dee5eaa50c8eeb6e3bc8471501d9964", + "id": "nmdc:2dee5eaa50c8eeb6e3bc8471501d9964", + "file_size_bytes": 75196016 + }, + { + "name": "Gp0127639_KO TSV file", + "description": "KO TSV file for Gp0127639", + "data_object_type": "Annotation KEGG Orthology", + "url": "https://data.microbiomedata.org/data/nmdc:mga09wpw60/annotation/nmdc_mga09wpw60_ko.tsv", + "md5_checksum": "7ec4cfdd88352d703a2bb64b99bd56c5", + "id": "nmdc:7ec4cfdd88352d703a2bb64b99bd56c5", + "file_size_bytes": 8707597 + }, + { + "name": "Gp0127639_EC TSV file", + "description": "EC TSV file for Gp0127639", + "data_object_type": "Annotation Enzyme Commission", + "url": "https://data.microbiomedata.org/data/nmdc:mga09wpw60/annotation/nmdc_mga09wpw60_ec.tsv", + "md5_checksum": "16bedd944e5e836924c28b006026c348", + "id": "nmdc:16bedd944e5e836924c28b006026c348", + "file_size_bytes": 5769544 + }, + { + "name": "Gp0127639_COG GFF file", + "description": "COG GFF file for Gp0127639", + "url": "https://data.microbiomedata.org/data/nmdc:mga09wpw60/annotation/nmdc_mga09wpw60_cog.gff", + "md5_checksum": "8764070f565c50998968e0739420f5cc", + "id": "nmdc:8764070f565c50998968e0739420f5cc", + "file_size_bytes": 45648468 + }, + { + "name": "Gp0127639_PFAM GFF file", + "description": "PFAM GFF file for Gp0127639", + "url": "https://data.microbiomedata.org/data/nmdc:mga09wpw60/annotation/nmdc_mga09wpw60_pfam.gff", + "md5_checksum": "9e6accc90d61ea572819dcdb591e41a7", + "id": "nmdc:9e6accc90d61ea572819dcdb591e41a7", + "file_size_bytes": 34995151 + }, + { + "name": "Gp0127639_TigrFam GFF file", + "description": "TigrFam GFF file for Gp0127639", + "url": "https://data.microbiomedata.org/data/nmdc:mga09wpw60/annotation/nmdc_mga09wpw60_tigrfam.gff", + "md5_checksum": "32b9518ee41cadb157f3c0f9ec91476c", + "id": "nmdc:32b9518ee41cadb157f3c0f9ec91476c", + "file_size_bytes": 4060116 + }, + { + "name": "Gp0127639_SMART GFF file", + "description": "SMART GFF file for Gp0127639", + "url": "https://data.microbiomedata.org/data/nmdc:mga09wpw60/annotation/nmdc_mga09wpw60_smart.gff", + "md5_checksum": "432d591bd525ae429e837431d44954f7", + "id": "nmdc:432d591bd525ae429e837431d44954f7", + "file_size_bytes": 10056742 + }, + { + "name": "Gp0127639_SuperFam GFF file", + "description": "SuperFam GFF file for Gp0127639", + "url": "https://data.microbiomedata.org/data/nmdc:mga09wpw60/annotation/nmdc_mga09wpw60_supfam.gff", + "md5_checksum": "3120d5d5d27d142f898f70a8cc1b076e", + "id": "nmdc:3120d5d5d27d142f898f70a8cc1b076e", + "file_size_bytes": 56435804 + }, + { + "name": "Gp0127639_Cath FunFam GFF file", + "description": "Cath FunFam GFF file for Gp0127639", + "url": "https://data.microbiomedata.org/data/nmdc:mga09wpw60/annotation/nmdc_mga09wpw60_cath_funfam.gff", + "md5_checksum": "d37ff61fdae942030a1b07e855cf1abd", + "id": "nmdc:d37ff61fdae942030a1b07e855cf1abd", + "file_size_bytes": 43456195 + }, + { + "name": "Gp0127639_KO_EC GFF file", + "description": "KO_EC GFF file for Gp0127639", + "url": "https://data.microbiomedata.org/data/nmdc:mga09wpw60/annotation/nmdc_mga09wpw60_ko_ec.gff", + "md5_checksum": "56995366ba4186639a8ff4fd4defbd5e", + "id": "nmdc:56995366ba4186639a8ff4fd4defbd5e", + "file_size_bytes": 27657123 + }, + { + "name": "gold:Gp0452679_metabat2 bin checkm quality assessment result", + "description": "metabat2 bin checkm quality assessment result for gold:Gp0452679", + "data_object_type": "CheckM Statistics", + "url": "https://data.microbiomedata.org/data/nmdc:mga0fsjy84/MAGs/nmdc_mga0fsjy84_checkm_qa.out", + "md5_checksum": "d41d8cd98f00b204e9800998ecf8427e", + "id": "nmdc:d41d8cd98f00b204e9800998ecf8427e", + "file_size_bytes": 0 + }, + { + "name": "Gp0127639_tooShort (< 3kb) filtered contigs fasta file by metaBat2", + "description": "tooShort (< 3kb) filtered contigs fasta file by metaBat2 for Gp0127639", + "url": "https://data.microbiomedata.org/data/nmdc:mga09wpw60/MAGs/nmdc_mga09wpw60_bins.tooShort.fa", + "md5_checksum": "820dbad1b0ddd3c728e77aceee09ea28", + "id": "nmdc:820dbad1b0ddd3c728e77aceee09ea28", + "file_size_bytes": 90173016 + }, + { + "name": "Gp0127639_unbinned fasta file from metabat2", + "description": "unbinned fasta file from metabat2 for Gp0127639", + "url": "https://data.microbiomedata.org/data/nmdc:mga09wpw60/MAGs/nmdc_mga09wpw60_bins.unbinned.fa", + "md5_checksum": "24fbfc69ded61dffff95ba2f8475239c", + "id": "nmdc:24fbfc69ded61dffff95ba2f8475239c", + "file_size_bytes": 27021291 + }, + { + "name": "Gp0127639_metabat2 bin checkm quality assessment result", + "description": "metabat2 bin checkm quality assessment result for Gp0127639", + "data_object_type": "CheckM Statistics", + "url": "https://data.microbiomedata.org/data/nmdc:mga09wpw60/MAGs/nmdc_mga09wpw60_checkm_qa.out", + "md5_checksum": "1837710887027f94b0f25208edb35cbe", + "id": "nmdc:1837710887027f94b0f25208edb35cbe", + "file_size_bytes": 1570 + }, + { + "name": "Gp0127639_high-quality and medium-quality bins", + "description": "high-quality and medium-quality bins for Gp0127639", + "data_object_type": "Metagenome Bins", + "url": "https://data.microbiomedata.org/data/nmdc:mga09wpw60/MAGs/nmdc_mga09wpw60_hqmq_bin.zip", + "md5_checksum": "7072cfd6665082a95b2c09a4bc88760c", + "id": "nmdc:7072cfd6665082a95b2c09a4bc88760c", + "file_size_bytes": 182 + }, + { + "name": "Gp0127639_metabat2 bins", + "description": "metabat2 bins for Gp0127639", + "url": "https://data.microbiomedata.org/data/nmdc:mga09wpw60/MAGs/nmdc_mga09wpw60_metabat_bin.zip", + "md5_checksum": "b0db190d9d1093ef87a5efb8a600e9ef", + "id": "nmdc:b0db190d9d1093ef87a5efb8a600e9ef", + "file_size_bytes": 1000457 + } + ], + "dissolving_activity_set": [], + "functional_annotation_set": [], + "genome_feature_set": [], + "mags_activity_set": [ + { + "_id": { + "$oid": "649b0052ec087f6bbab3471b" + }, + "has_input": [ + "nmdc:2b73310c6eef1ece5bb01f235b22fdbd", + "nmdc:5b90d13539ce840980db101fa7c1df96", + "nmdc:2dee5eaa50c8eeb6e3bc8471501d9964" + ], + "too_short_contig_num": 194918, + "part_of": [ + "nmdc:mga09wpw60" + ], + "binned_contig_num": 732, + "git_url": "https://github.com/microbiomedata/metaG/releases/tag/0.1", + "has_output": [ + "nmdc:d41d8cd98f00b204e9800998ecf8427e", + "nmdc:820dbad1b0ddd3c728e77aceee09ea28", + "nmdc:24fbfc69ded61dffff95ba2f8475239c", + "nmdc:1837710887027f94b0f25208edb35cbe", + "nmdc:7072cfd6665082a95b2c09a4bc88760c", + "nmdc:b0db190d9d1093ef87a5efb8a600e9ef" + ], + "was_informed_by": "gold:Gp0127639", + "input_contig_num": 212559, + "id": "nmdc:b8fea91a695311b43660f2e7044ad15c", + "execution_resource": "NERSC-Cori", + "name": "MAGs Analysis Activity for nmdc:mga09wpw60", + "mags_list": [ + { + "number_of_contig": 85, + "completeness": 18.1, + "bin_name": "bins.1", + "gene_count": 437, + "bin_quality": "LQ", + "gtdbtk_species": "", + "gtdbtk_order": "", + "num_16s": 0, + "gtdbtk_family": "", + "gtdbtk_domain": "", + "contamination": 0.0, + "gtdbtk_class": "", + "gtdbtk_phylum": "", + "num_5s": 0, + "num_23s": 0, + "gtdbtk_genus": "", + "num_t_rna": 8 + }, + { + "number_of_contig": 59, + "completeness": 15.92, + "bin_name": "bins.2", + "gene_count": 343, + "bin_quality": "LQ", + "gtdbtk_species": "", + "gtdbtk_order": "", + "num_16s": 0, + "gtdbtk_family": "", + "gtdbtk_domain": "", + "contamination": 0.84, + "gtdbtk_class": "", + "gtdbtk_phylum": "", + "num_5s": 0, + "num_23s": 0, + "gtdbtk_genus": "", + "num_t_rna": 8 + }, + { + "number_of_contig": 258, + "completeness": 21.26, + "bin_name": "bins.3", + "gene_count": 1440, + "bin_quality": "LQ", + "gtdbtk_species": "", + "gtdbtk_order": "", + "num_16s": 0, + "gtdbtk_family": "", + "gtdbtk_domain": "", + "contamination": 0.0, + "gtdbtk_class": "", + "gtdbtk_phylum": "", + "num_5s": 0, + "num_23s": 0, + "gtdbtk_genus": "", + "num_t_rna": 19 + }, + { + "number_of_contig": 101, + "completeness": 29.13, + "bin_name": "bins.4", + "gene_count": 560, + "bin_quality": "LQ", + "gtdbtk_species": "", + "gtdbtk_order": "", + "num_16s": 0, + "gtdbtk_family": "", + "gtdbtk_domain": "", + "contamination": 0.97, + "gtdbtk_class": "", + "gtdbtk_phylum": "", + "num_5s": 0, + "num_23s": 0, + "gtdbtk_genus": "", + "num_t_rna": 6 + }, + { + "number_of_contig": 116, + "completeness": 1.53, + "bin_name": "bins.5", + "gene_count": 763, + "bin_quality": "LQ", + "gtdbtk_species": "", + "gtdbtk_order": "", + "num_16s": 0, + "gtdbtk_family": "", + "gtdbtk_domain": "", + "contamination": 0.0, + "gtdbtk_class": "", + "gtdbtk_phylum": "", + "num_5s": 0, + "num_23s": 0, + "gtdbtk_genus": "", + "num_t_rna": 10 + }, + { + "number_of_contig": 113, + "completeness": 9.72, + "bin_name": "bins.6", + "gene_count": 531, + "bin_quality": "LQ", + "gtdbtk_species": "", + "gtdbtk_order": "", + "num_16s": 0, + "gtdbtk_family": "", + "gtdbtk_domain": "", + "contamination": 0.0, + "gtdbtk_class": "", + "gtdbtk_phylum": "", + "num_5s": 0, + "num_23s": 0, + "gtdbtk_genus": "", + "num_t_rna": 4 + } + ], + "unbinned_contig_num": 16909, + "started_at_time": "2021-10-11T02:27:08Z", + "type": "nmdc:MAGsAnalysisActivity", + "ended_at_time": "2021-10-11T03:27:12+00:00" + } + ], + "material_sample_set": [], + "material_sampling_activity_set": [], + "metabolomics_analysis_activity_set": [], + "metagenome_annotation_activity_set": [ + { + "_id": { + "$oid": "649b005bbf2caae0415ef9b7" + }, + "has_input": [ + "nmdc:2b73310c6eef1ece5bb01f235b22fdbd" + ], + "part_of": [ + "nmdc:mga09wpw60" + ], + "git_url": "https://github.com/microbiomedata/metaG/releases/tag/0.1", + "has_output": [ + "nmdc:6c09d55cfb8872b30eb1832394f80beb", + "nmdc:2e3cc72d21590667259f6356882ce63b", + "nmdc:2dee5eaa50c8eeb6e3bc8471501d9964", + "nmdc:7ec4cfdd88352d703a2bb64b99bd56c5", + "nmdc:16bedd944e5e836924c28b006026c348", + "nmdc:8764070f565c50998968e0739420f5cc", + "nmdc:9e6accc90d61ea572819dcdb591e41a7", + "nmdc:32b9518ee41cadb157f3c0f9ec91476c", + "nmdc:432d591bd525ae429e837431d44954f7", + "nmdc:3120d5d5d27d142f898f70a8cc1b076e", + "nmdc:d37ff61fdae942030a1b07e855cf1abd", + "nmdc:56995366ba4186639a8ff4fd4defbd5e" + ], + "was_informed_by": "gold:Gp0127639", + "id": "nmdc:b8fea91a695311b43660f2e7044ad15c", + "execution_resource": "NERSC-Cori", + "name": "Annotation Activity for nmdc:mga09wpw60", + "started_at_time": "2021-10-11T02:27:08Z", + "type": "nmdc:MetagenomeAnnotationActivity", + "ended_at_time": "2021-10-11T03:27:12+00:00" + } + ], + "metagenome_assembly_set": [ + { + "_id": { + "$oid": "649b005f2ca5ee4adb139fa3" + }, + "has_input": [ + "nmdc:833077b40372c6daa20beaed04ed0ae1" + ], + "part_of": [ + "nmdc:mga09wpw60" + ], + "ctg_logsum": 317684, + "scaf_logsum": 318786, + "gap_pct": 0.0017, + "git_url": "https://github.com/microbiomedata/metaG/releases/tag/0.1", + "has_output": [ + "nmdc:2b73310c6eef1ece5bb01f235b22fdbd", + "nmdc:8f14c016997dd96f70f547df930717be", + "nmdc:5966e5e32744a14549b19b4c92a606a5", + "nmdc:1fcd489b3ae86a76bf297cc19b50392d", + "nmdc:5b90d13539ce840980db101fa7c1df96" + ], + "asm_score": 3.397, + "was_informed_by": "gold:Gp0127639", + "ctg_powsum": 34356, + "scaf_max": 19860, + "id": "nmdc:b8fea91a695311b43660f2e7044ad15c", + "scaf_powsum": 34485, + "execution_resource": "NERSC-Cori", + "contigs": 212560, + "name": "Assembly Activity for nmdc:mga09wpw60", + "ctg_max": 19860, + "gc_std": 0.09375, + "contig_bp": 112053293, + "gc_avg": 0.63186, + "started_at_time": "2021-10-11T02:27:08Z", + "scaf_bp": 112055193, + "type": "nmdc:MetagenomeAssembly", + "scaffolds": 212379, + "ended_at_time": "2021-10-11T03:27:12+00:00", + "ctg_l50": 538, + "ctg_l90": 298, + "ctg_n50": 55584, + "ctg_n90": 173977, + "scaf_l50": 539, + "scaf_l90": 298, + "scaf_n50": 55395, + "scaf_n90": 173826 + } + ], + "metagenome_sequencing_activity_set": [], + "metaproteomics_analysis_activity_set": [], + "metatranscriptome_activity_set": [], + "nom_analysis_activity_set": [], + "omics_processing_set": [ + { + "_id": { + "$oid": "649b009773e8249959349b58" + }, + "id": "nmdc:omprc-11-vnnn4722", + "name": "Riverbed sediment microbial communities from areas with no vegetation in Columbia River, Washington, USA - GW-RW N1_30_40", + "description": "Riverbed sediment samples were collected from an area with no vegetation in a hyporheic zone (subsurface groundwater - surface water mixing zone)", + "has_input": [ + "nmdc:bsm-11-tzp60785" + ], + "has_output": [ + "jgi:574fde667ded5e3df1ee1407" + ], + "part_of": [ + "nmdc:sty-11-aygzgv51" + ], + "add_date": "2016-01-11", + "mod_date": "2021-06-15", + "ncbi_project_name": "Riverbed sediment microbial communities from areas with no vegetation in Columbia River, Washington, USA - GW-RW N1_30_40", + "omics_type": { + "has_raw_value": "Metagenome" + }, + "principal_investigator": { + "has_raw_value": "James Stegen" + }, + "processing_institution": "JGI", + "type": "nmdc:OmicsProcessing", + "gold_sequencing_project_identifiers": [ + "gold:Gp0127639" + ] + } + ], + "reaction_activity_set": [], + "read_qc_analysis_activity_set": [ + { + "_id": { + "$oid": "649b009d6bdd4fd20273c871" + }, + "has_input": [ + "nmdc:ae9087ed8e1ead2407bca45a47725633" + ], + "part_of": [ + "nmdc:mga09wpw60" + ], + "git_url": "https://github.com/microbiomedata/metaG/releases/tag/0.1", + "has_output": [ + "nmdc:833077b40372c6daa20beaed04ed0ae1", + "nmdc:b68178eebde030fad0850797adbb2624" + ], + "was_informed_by": "gold:Gp0127639", + "input_read_count": 23535784, + "output_read_bases": 2989527376, + "id": "nmdc:b8fea91a695311b43660f2e7044ad15c", + "execution_resource": "NERSC-Cori", + "input_read_bases": 3553903384, + "name": "Read QC Activity for nmdc:mga09wpw60", + "output_read_count": 20011156, + "started_at_time": "2021-10-11T02:27:08Z", + "type": "nmdc:ReadQCAnalysisActivity", + "ended_at_time": "2021-10-11T03:27:12+00:00" + } + ], + "read_based_taxonomy_analysis_activity_set": [ + { + "_id": { + "$oid": "649b009bff710ae353f8cf46" + }, + "has_input": [ + "nmdc:833077b40372c6daa20beaed04ed0ae1" + ], + "git_url": "https://github.com/microbiomedata/metaG/releases/tag/0.1", + "has_output": [ + "nmdc:514172bb91ef3b125ae2d001b47bff0b", + "nmdc:82f072d1931154fbc722531d3d0dc41c", + "nmdc:62a817ebcbfaf2c8feb1abedc35a736f", + "nmdc:81281fef2c0778516a84b3a672cc0230", + "nmdc:86ae054ba9def1126579c8f76db8a07a", + "nmdc:9db20a88fa3d02eb00f64d1671ef8521", + "nmdc:848fc10ed4365047cb139a4b40303808", + "nmdc:94e422e0bae86c608fba1c3815e08e92", + "nmdc:c6eb85143a2489921c53f8184d536129" + ], + "was_informed_by": "gold:Gp0127639", + "id": "nmdc:b8fea91a695311b43660f2e7044ad15c", + "execution_resource": "NERSC-Cori", + "name": "ReadBased Analysis Activity for nmdc:mga09wpw60", + "started_at_time": "2021-10-11T02:27:08Z", + "type": "nmdc:ReadbasedAnalysis", + "ended_at_time": "2021-10-11T03:27:12+00:00" + } + ], + "study_set": [], + "field_research_site_set": [], + "collecting_biosamples_from_site_set": [], + "date_created": null, + "etl_software_version": null, + "pooling_set": [], + "read_based_analysis_activity_set": [ + { + "_id": { + "$oid": "61e7199a833bcf838a700d65" + }, + "has_input": [ + "nmdc:833077b40372c6daa20beaed04ed0ae1" + ], + "part_of": [ + "nmdc:mga09wpw60" + ], + "git_url": "https://github.com/microbiomedata/metaG/releases/tag/0.1", + "has_output": [ + "nmdc:514172bb91ef3b125ae2d001b47bff0b", + "nmdc:82f072d1931154fbc722531d3d0dc41c", + "nmdc:62a817ebcbfaf2c8feb1abedc35a736f", + "nmdc:81281fef2c0778516a84b3a672cc0230", + "nmdc:86ae054ba9def1126579c8f76db8a07a", + "nmdc:9db20a88fa3d02eb00f64d1671ef8521", + "nmdc:848fc10ed4365047cb139a4b40303808", + "nmdc:94e422e0bae86c608fba1c3815e08e92", + "nmdc:c6eb85143a2489921c53f8184d536129" + ], + "was_informed_by": "gold:Gp0127639", + "id": "nmdc:b8fea91a695311b43660f2e7044ad15c", + "execution_resource": "NERSC-Cori", + "name": "ReadBased Analysis Activity for nmdc:mga09wpw60", + "started_at_time": "2021-10-11T02:27:08Z", + "type": "nmdc:ReadbasedAnalysis", + "ended_at_time": "2021-10-11T03:27:12+00:00" + } + ] + }, + { + "functional_annotation_agg": [], + "library_preparation_set": [], + "processed_sample_set": [], + "extraction_set": [], + "activity_set": [], + "biosample_set": [], + "data_object_set": [ + { + "name": "Gp0127642_Filtered Reads", + "description": "Filtered Reads for Gp0127642", + "data_object_type": "Filtered Sequencing Reads", + "url": "https://data.microbiomedata.org/data/nmdc:mga0cvxk30/qa/nmdc_mga0cvxk30_filtered.fastq.gz", + "md5_checksum": "603166d1e0da357d356a2029215d76ea", + "id": "nmdc:603166d1e0da357d356a2029215d76ea", + "file_size_bytes": 2304174057 + }, + { + "name": "Gp0127642_Filtered Stats", + "description": "Filtered Stats for Gp0127642", + "data_object_type": "QC Statistics", + "url": "https://data.microbiomedata.org/data/nmdc:mga0cvxk30/qa/nmdc_mga0cvxk30_filterStats.txt", + "md5_checksum": "639d9630c859c9b2f6f7a2eff1e1a863", + "id": "nmdc:639d9630c859c9b2f6f7a2eff1e1a863", + "file_size_bytes": 284 + }, + { + "name": "Gp0127642_Gottcha2 TSV report", + "description": "Gottcha2 TSV report for Gp0127642", + "url": "https://data.microbiomedata.org/data/nmdc:mga0cvxk30/ReadbasedAnalysis/nmdc_mga0cvxk30_gottcha2_report.tsv", + "md5_checksum": "bc7f6a9435c3a9aaca7ce9efe9d16e41", + "id": "nmdc:bc7f6a9435c3a9aaca7ce9efe9d16e41", + "file_size_bytes": 5303 + }, + { + "name": "Gp0127642_Gottcha2 full TSV report", + "description": "Gottcha2 full TSV report for Gp0127642", + "url": "https://data.microbiomedata.org/data/nmdc:mga0cvxk30/ReadbasedAnalysis/nmdc_mga0cvxk30_gottcha2_report_full.tsv", + "md5_checksum": "0a079e34648ce23b0837dff31e2be5df", + "id": "nmdc:0a079e34648ce23b0837dff31e2be5df", + "file_size_bytes": 948120 + }, + { + "name": "Gp0127642_Gottcha2 Krona HTML report", + "description": "Gottcha2 Krona HTML report for Gp0127642", + "data_object_type": "GOTTCHA2 Krona Plot", + "url": "https://data.microbiomedata.org/data/nmdc:mga0cvxk30/ReadbasedAnalysis/nmdc_mga0cvxk30_gottcha2_krona.html", + "md5_checksum": "f19bf1723f0f0e9f2158b137d2618b08", + "id": "nmdc:f19bf1723f0f0e9f2158b137d2618b08", + "file_size_bytes": 241990 + }, + { + "name": "Gp0127642_Centrifuge classification TSV report", + "description": "Centrifuge classification TSV report for Gp0127642", + "data_object_type": "Centrifuge Taxonomic Classification", + "url": "https://data.microbiomedata.org/data/nmdc:mga0cvxk30/ReadbasedAnalysis/nmdc_mga0cvxk30_centrifuge_classification.tsv", + "md5_checksum": "81fc62d01a53a7ab5037829a158f0b64", + "id": "nmdc:81fc62d01a53a7ab5037829a158f0b64", + "file_size_bytes": 2023464022 + }, + { + "name": "Gp0127642_Centrifuge TSV report", + "description": "Centrifuge TSV report for Gp0127642", + "data_object_type": "Centrifuge Classification Report", + "url": "https://data.microbiomedata.org/data/nmdc:mga0cvxk30/ReadbasedAnalysis/nmdc_mga0cvxk30_centrifuge_report.tsv", + "md5_checksum": "05cc05eefdcb0d7bac19031619244a4b", + "id": "nmdc:05cc05eefdcb0d7bac19031619244a4b", + "file_size_bytes": 257700 + }, + { + "name": "Gp0127642_Centrifuge Krona HTML report", + "description": "Centrifuge Krona HTML report for Gp0127642", + "data_object_type": "Centrifuge Krona Plot", + "url": "https://data.microbiomedata.org/data/nmdc:mga0cvxk30/ReadbasedAnalysis/nmdc_mga0cvxk30_centrifuge_krona.html", + "md5_checksum": "bb92f0d18280f32aacf482a43a841372", + "id": "nmdc:bb92f0d18280f32aacf482a43a841372", + "file_size_bytes": 2339227 + }, + { + "name": "Gp0127642_Kraken classification TSV report", + "description": "Kraken classification TSV report for Gp0127642", + "data_object_type": "Kraken2 Taxonomic Classification", + "url": "https://data.microbiomedata.org/data/nmdc:mga0cvxk30/ReadbasedAnalysis/nmdc_mga0cvxk30_kraken2_classification.tsv", + "md5_checksum": "2fddd33160498548fa73e95dfc304d1a", + "id": "nmdc:2fddd33160498548fa73e95dfc304d1a", + "file_size_bytes": 1630988221 + }, + { + "name": "Gp0127642_Kraken2 TSV report", + "description": "Kraken2 TSV report for Gp0127642", + "data_object_type": "Kraken2 Classification Report", + "url": "https://data.microbiomedata.org/data/nmdc:mga0cvxk30/ReadbasedAnalysis/nmdc_mga0cvxk30_kraken2_report.tsv", + "md5_checksum": "272e3daee292c6e284026ee95b72d290", + "id": "nmdc:272e3daee292c6e284026ee95b72d290", + "file_size_bytes": 659136 + }, + { + "name": "Gp0127642_Kraken2 Krona HTML report", + "description": "Kraken2 Krona HTML report for Gp0127642", + "data_object_type": "Kraken2 Krona Plot", + "url": "https://data.microbiomedata.org/data/nmdc:mga0cvxk30/ReadbasedAnalysis/nmdc_mga0cvxk30_kraken2_krona.html", + "md5_checksum": "bca8c2988929e7c176ec7b6609445db2", + "id": "nmdc:bca8c2988929e7c176ec7b6609445db2", + "file_size_bytes": 4013188 + }, + { + "name": "Gp0127642_Gottcha2 TSV report", + "description": "Gottcha2 TSV report for Gp0127642", + "url": "https://data.microbiomedata.org/data/nmdc:mga0cvxk30/ReadbasedAnalysis/nmdc_mga0cvxk30_gottcha2_report.tsv", + "md5_checksum": "bc7f6a9435c3a9aaca7ce9efe9d16e41", + "id": "nmdc:bc7f6a9435c3a9aaca7ce9efe9d16e41", + "file_size_bytes": 5303 + }, + { + "name": "Gp0127642_Gottcha2 full TSV report", + "description": "Gottcha2 full TSV report for Gp0127642", + "url": "https://data.microbiomedata.org/data/nmdc:mga0cvxk30/ReadbasedAnalysis/nmdc_mga0cvxk30_gottcha2_report_full.tsv", + "md5_checksum": "0a079e34648ce23b0837dff31e2be5df", + "id": "nmdc:0a079e34648ce23b0837dff31e2be5df", + "file_size_bytes": 948120 + }, + { + "name": "Gp0127642_Gottcha2 Krona HTML report", + "description": "Gottcha2 Krona HTML report for Gp0127642", + "data_object_type": "GOTTCHA2 Krona Plot", + "url": "https://data.microbiomedata.org/data/nmdc:mga0cvxk30/ReadbasedAnalysis/nmdc_mga0cvxk30_gottcha2_krona.html", + "md5_checksum": "f19bf1723f0f0e9f2158b137d2618b08", + "id": "nmdc:f19bf1723f0f0e9f2158b137d2618b08", + "file_size_bytes": 241990 + }, + { + "name": "Gp0127642_Centrifuge classification TSV report", + "description": "Centrifuge classification TSV report for Gp0127642", + "data_object_type": "Centrifuge Taxonomic Classification", + "url": "https://data.microbiomedata.org/data/nmdc:mga0cvxk30/ReadbasedAnalysis/nmdc_mga0cvxk30_centrifuge_classification.tsv", + "md5_checksum": "81fc62d01a53a7ab5037829a158f0b64", + "id": "nmdc:81fc62d01a53a7ab5037829a158f0b64", + "file_size_bytes": 2023464022 + }, + { + "name": "Gp0127642_Centrifuge TSV report", + "description": "Centrifuge TSV report for Gp0127642", + "data_object_type": "Centrifuge Classification Report", + "url": "https://data.microbiomedata.org/data/nmdc:mga0cvxk30/ReadbasedAnalysis/nmdc_mga0cvxk30_centrifuge_report.tsv", + "md5_checksum": "05cc05eefdcb0d7bac19031619244a4b", + "id": "nmdc:05cc05eefdcb0d7bac19031619244a4b", + "file_size_bytes": 257700 + }, + { + "name": "Gp0127642_Centrifuge Krona HTML report", + "description": "Centrifuge Krona HTML report for Gp0127642", + "data_object_type": "Centrifuge Krona Plot", + "url": "https://data.microbiomedata.org/data/nmdc:mga0cvxk30/ReadbasedAnalysis/nmdc_mga0cvxk30_centrifuge_krona.html", + "md5_checksum": "bb92f0d18280f32aacf482a43a841372", + "id": "nmdc:bb92f0d18280f32aacf482a43a841372", + "file_size_bytes": 2339227 + }, + { + "name": "Gp0127642_Kraken classification TSV report", + "description": "Kraken classification TSV report for Gp0127642", + "data_object_type": "Kraken2 Taxonomic Classification", + "url": "https://data.microbiomedata.org/data/nmdc:mga0cvxk30/ReadbasedAnalysis/nmdc_mga0cvxk30_kraken2_classification.tsv", + "md5_checksum": "2fddd33160498548fa73e95dfc304d1a", + "id": "nmdc:2fddd33160498548fa73e95dfc304d1a", + "file_size_bytes": 1630988221 + }, + { + "name": "Gp0127642_Kraken2 TSV report", + "description": "Kraken2 TSV report for Gp0127642", + "data_object_type": "Kraken2 Classification Report", + "url": "https://data.microbiomedata.org/data/nmdc:mga0cvxk30/ReadbasedAnalysis/nmdc_mga0cvxk30_kraken2_report.tsv", + "md5_checksum": "272e3daee292c6e284026ee95b72d290", + "id": "nmdc:272e3daee292c6e284026ee95b72d290", + "file_size_bytes": 659136 + }, + { + "name": "Gp0127642_Kraken2 Krona HTML report", + "description": "Kraken2 Krona HTML report for Gp0127642", + "data_object_type": "Kraken2 Krona Plot", + "url": "https://data.microbiomedata.org/data/nmdc:mga0cvxk30/ReadbasedAnalysis/nmdc_mga0cvxk30_kraken2_krona.html", + "md5_checksum": "bca8c2988929e7c176ec7b6609445db2", + "id": "nmdc:bca8c2988929e7c176ec7b6609445db2", + "file_size_bytes": 4013188 + }, + { + "name": "Gp0127642_Assembled contigs fasta", + "description": "Assembled contigs fasta for Gp0127642", + "data_object_type": "Assembly Contigs", + "url": "https://data.microbiomedata.org/data/nmdc:mga0cvxk30/assembly/nmdc_mga0cvxk30_contigs.fna", + "md5_checksum": "9c2c077dd8f43350b83c1c1ba853bbbc", + "id": "nmdc:9c2c077dd8f43350b83c1c1ba853bbbc", + "file_size_bytes": 44374790 + }, + { + "name": "Gp0127642_Assembled scaffold fasta", + "description": "Assembled scaffold fasta for Gp0127642", + "data_object_type": "Assembly Scaffolds", + "url": "https://data.microbiomedata.org/data/nmdc:mga0cvxk30/assembly/nmdc_mga0cvxk30_scaffolds.fna", + "md5_checksum": "9a3dfedede65ba1253a84264492e909c", + "id": "nmdc:9a3dfedede65ba1253a84264492e909c", + "file_size_bytes": 44064962 + }, + { + "name": "Gp0127642_Metagenome Contig Coverage Stats", + "description": "Metagenome Contig Coverage Stats for Gp0127642", + "url": "https://data.microbiomedata.org/data/nmdc:mga0cvxk30/assembly/nmdc_mga0cvxk30_covstats.txt", + "md5_checksum": "0772cb4473177c4e0046c7fd9cb65b27", + "id": "nmdc:0772cb4473177c4e0046c7fd9cb65b27", + "file_size_bytes": 8090415 + }, + { + "name": "Gp0127642_Assembled AGP file", + "description": "Assembled AGP file for Gp0127642", + "data_object_type": "Assembly AGP", + "url": "https://data.microbiomedata.org/data/nmdc:mga0cvxk30/assembly/nmdc_mga0cvxk30_assembly.agp", + "md5_checksum": "7d0ccfaeac8981d1300b8c17abed052b", + "id": "nmdc:7d0ccfaeac8981d1300b8c17abed052b", + "file_size_bytes": 7524067 + }, + { + "name": "Gp0127642_Metagenome Alignment BAM file", + "description": "Metagenome Alignment BAM file for Gp0127642", + "data_object_type": "Assembly Coverage BAM", + "url": "https://data.microbiomedata.org/data/nmdc:mga0cvxk30/assembly/nmdc_mga0cvxk30_pairedMapped_sorted.bam", + "md5_checksum": "a5b5801b13f062bc09a1405d0a01e6ac", + "id": "nmdc:a5b5801b13f062bc09a1405d0a01e6ac", + "file_size_bytes": 2461892983 + }, + { + "name": "Gp0127642_Protein FAA", + "description": "Protein FAA for Gp0127642", + "data_object_type": "Annotation Amino Acid FASTA", + "url": "https://data.microbiomedata.org/data/nmdc:mga0cvxk30/annotation/nmdc_mga0cvxk30_proteins.faa", + "md5_checksum": "e6270776fe3cb9f4e8e2958f9d8d6151", + "id": "nmdc:e6270776fe3cb9f4e8e2958f9d8d6151", + "file_size_bytes": 26699570 + }, + { + "name": "Gp0127642_Structural annotation GFF file", + "description": "Structural annotation GFF file for Gp0127642", + "data_object_type": "Structural Annotation GFF", + "url": "https://data.microbiomedata.org/data/nmdc:mga0cvxk30/annotation/nmdc_mga0cvxk30_structural_annotation.gff", + "md5_checksum": "f442172aba544a550f1e294bc615fd1d", + "id": "nmdc:f442172aba544a550f1e294bc615fd1d", + "file_size_bytes": 2505 + }, + { + "name": "Gp0127642_Functional annotation GFF file", + "description": "Functional annotation GFF file for Gp0127642", + "data_object_type": "Functional Annotation GFF", + "url": "https://data.microbiomedata.org/data/nmdc:mga0cvxk30/annotation/nmdc_mga0cvxk30_functional_annotation.gff", + "md5_checksum": "c0f7ac45facbbb7b74bb7ce11af11910", + "id": "nmdc:c0f7ac45facbbb7b74bb7ce11af11910", + "file_size_bytes": 32011364 + }, + { + "name": "Gp0127642_KO TSV file", + "description": "KO TSV file for Gp0127642", + "data_object_type": "Annotation KEGG Orthology", + "url": "https://data.microbiomedata.org/data/nmdc:mga0cvxk30/annotation/nmdc_mga0cvxk30_ko.tsv", + "md5_checksum": "63db41425c31ceda578a9e2a801dcb98", + "id": "nmdc:63db41425c31ceda578a9e2a801dcb98", + "file_size_bytes": 3660508 + }, + { + "name": "Gp0127642_EC TSV file", + "description": "EC TSV file for Gp0127642", + "data_object_type": "Annotation Enzyme Commission", + "url": "https://data.microbiomedata.org/data/nmdc:mga0cvxk30/annotation/nmdc_mga0cvxk30_ec.tsv", + "md5_checksum": "1cf9336281454b1747a86f9877f47ce8", + "id": "nmdc:1cf9336281454b1747a86f9877f47ce8", + "file_size_bytes": 2451794 + }, + { + "name": "Gp0127642_COG GFF file", + "description": "COG GFF file for Gp0127642", + "url": "https://data.microbiomedata.org/data/nmdc:mga0cvxk30/annotation/nmdc_mga0cvxk30_cog.gff", + "md5_checksum": "1cb7ab56a921ed80d21dad5b2d41c139", + "id": "nmdc:1cb7ab56a921ed80d21dad5b2d41c139", + "file_size_bytes": 18356139 + }, + { + "name": "Gp0127642_PFAM GFF file", + "description": "PFAM GFF file for Gp0127642", + "url": "https://data.microbiomedata.org/data/nmdc:mga0cvxk30/annotation/nmdc_mga0cvxk30_pfam.gff", + "md5_checksum": "157326e95b92fa83ab5755c22acf5837", + "id": "nmdc:157326e95b92fa83ab5755c22acf5837", + "file_size_bytes": 13044512 + }, + { + "name": "Gp0127642_TigrFam GFF file", + "description": "TigrFam GFF file for Gp0127642", + "url": "https://data.microbiomedata.org/data/nmdc:mga0cvxk30/annotation/nmdc_mga0cvxk30_tigrfam.gff", + "md5_checksum": "f001a06864e30347885e5a76ae89ae92", + "id": "nmdc:f001a06864e30347885e5a76ae89ae92", + "file_size_bytes": 1280537 + }, + { + "name": "Gp0127642_SMART GFF file", + "description": "SMART GFF file for Gp0127642", + "url": "https://data.microbiomedata.org/data/nmdc:mga0cvxk30/annotation/nmdc_mga0cvxk30_smart.gff", + "md5_checksum": "aa1e3207b62ca31a87da28ad4c3e6e92", + "id": "nmdc:aa1e3207b62ca31a87da28ad4c3e6e92", + "file_size_bytes": 4029242 + }, + { + "name": "Gp0127642_SuperFam GFF file", + "description": "SuperFam GFF file for Gp0127642", + "url": "https://data.microbiomedata.org/data/nmdc:mga0cvxk30/annotation/nmdc_mga0cvxk30_supfam.gff", + "md5_checksum": "5119eebdfebd43b4af243a61cc8e45eb", + "id": "nmdc:5119eebdfebd43b4af243a61cc8e45eb", + "file_size_bytes": 23011352 + }, + { + "name": "Gp0127642_Cath FunFam GFF file", + "description": "Cath FunFam GFF file for Gp0127642", + "url": "https://data.microbiomedata.org/data/nmdc:mga0cvxk30/annotation/nmdc_mga0cvxk30_cath_funfam.gff", + "md5_checksum": "4e6178de376e5e228c8b5c17ce3d0621", + "id": "nmdc:4e6178de376e5e228c8b5c17ce3d0621", + "file_size_bytes": 17039992 + }, + { + "name": "Gp0127642_KO_EC GFF file", + "description": "KO_EC GFF file for Gp0127642", + "url": "https://data.microbiomedata.org/data/nmdc:mga0cvxk30/annotation/nmdc_mga0cvxk30_ko_ec.gff", + "md5_checksum": "d89f026da3dfb4ee7d4884a47ce5739d", + "id": "nmdc:d89f026da3dfb4ee7d4884a47ce5739d", + "file_size_bytes": 11677748 + }, + { + "name": "Gp0127642_metabat2 bin checkm quality assessment result", + "description": "metabat2 bin checkm quality assessment result for Gp0127642", + "data_object_type": "CheckM Statistics", + "url": "https://data.microbiomedata.org/data/nmdc:mga0cvxk30/MAGs/nmdc_mga0cvxk30_checkm_qa.out", + "md5_checksum": "ac59797a394f8e4aa971e5c1d016e23e", + "id": "nmdc:ac59797a394f8e4aa971e5c1d016e23e", + "file_size_bytes": 765 + }, + { + "name": "Gp0127642_high-quality and medium-quality bins", + "description": "high-quality and medium-quality bins for Gp0127642", + "data_object_type": "Metagenome Bins", + "url": "https://data.microbiomedata.org/data/nmdc:mga0cvxk30/MAGs/nmdc_mga0cvxk30_hqmq_bin.zip", + "md5_checksum": "46858bd4b45bdaa4e4344820f3c54b3b", + "id": "nmdc:46858bd4b45bdaa4e4344820f3c54b3b", + "file_size_bytes": 472684 + } + ], + "dissolving_activity_set": [], + "functional_annotation_set": [], + "genome_feature_set": [], + "mags_activity_set": [ + { + "_id": { + "$oid": "649b0052ec087f6bbab3471c" + }, + "has_input": [ + "nmdc:9c2c077dd8f43350b83c1c1ba853bbbc", + "nmdc:a5b5801b13f062bc09a1405d0a01e6ac", + "nmdc:c0f7ac45facbbb7b74bb7ce11af11910" + ], + "too_short_contig_num": 101249, + "part_of": [ + "nmdc:mga0cvxk30" + ], + "binned_contig_num": 213, + "git_url": "https://github.com/microbiomedata/metaG/releases/tag/0.1", + "has_output": [ + "nmdc:ac59797a394f8e4aa971e5c1d016e23e", + "nmdc:46858bd4b45bdaa4e4344820f3c54b3b" + ], + "was_informed_by": "gold:Gp0127642", + "input_contig_num": 103206, + "id": "nmdc:2de70a234a7c626b9f512b8aa3b73717", + "execution_resource": "NERSC-Cori", + "name": "MAGs Analysis Activity for nmdc:mga0cvxk30", + "mags_list": [ { - "number_of_contig": 113, - "completeness": 9.72, - "bin_name": "bins.6", - "gene_count": 531, - "bin_quality": "LQ", + "number_of_contig": 213, + "completeness": 71.17, + "bin_name": "bins.1", + "gene_count": 1914, + "bin_quality": "MQ", "gtdbtk_species": "", - "gtdbtk_order": "", + "gtdbtk_order": "Nitrososphaerales", "num_16s": 0, - "gtdbtk_family": "", - "gtdbtk_domain": "", - "contamination": 0.0, - "gtdbtk_class": "", - "gtdbtk_phylum": "", - "num_5s": 0, + "gtdbtk_family": "Nitrososphaeraceae", + "gtdbtk_domain": "Archaea", + "contamination": 0.97, + "gtdbtk_class": "Nitrososphaeria", + "gtdbtk_phylum": "Crenarchaeota", + "num_5s": 1, "num_23s": 0, "gtdbtk_genus": "", - "num_t_rna": 4 + "num_t_rna": 31 } ], - "unbinned_contig_num": 16909, - "started_at_time": "2021-10-11T02:27:08Z", + "unbinned_contig_num": 1744, + "started_at_time": "2021-12-01T21:30:33Z", "type": "nmdc:MAGsAnalysisActivity", - "ended_at_time": "2021-10-11T03:27:12+00:00", - "output_data_objects": [ - { - "name": "gold:Gp0452679_metabat2 bin checkm quality assessment result", - "description": "metabat2 bin checkm quality assessment result for gold:Gp0452679", - "data_object_type": "CheckM Statistics", - "url": "https://data.microbiomedata.org/data/nmdc:mga0fsjy84/MAGs/nmdc_mga0fsjy84_checkm_qa.out", - "md5_checksum": "d41d8cd98f00b204e9800998ecf8427e", - "id": "nmdc:d41d8cd98f00b204e9800998ecf8427e", - "file_size_bytes": 0 - }, - { - "name": "Gp0127639_tooShort (< 3kb) filtered contigs fasta file by metaBat2", - "description": "tooShort (< 3kb) filtered contigs fasta file by metaBat2 for Gp0127639", - "url": "https://data.microbiomedata.org/data/nmdc:mga09wpw60/MAGs/nmdc_mga09wpw60_bins.tooShort.fa", - "md5_checksum": "820dbad1b0ddd3c728e77aceee09ea28", - "id": "nmdc:820dbad1b0ddd3c728e77aceee09ea28", - "file_size_bytes": 90173016 - }, - { - "name": "Gp0127639_unbinned fasta file from metabat2", - "description": "unbinned fasta file from metabat2 for Gp0127639", - "url": "https://data.microbiomedata.org/data/nmdc:mga09wpw60/MAGs/nmdc_mga09wpw60_bins.unbinned.fa", - "md5_checksum": "24fbfc69ded61dffff95ba2f8475239c", - "id": "nmdc:24fbfc69ded61dffff95ba2f8475239c", - "file_size_bytes": 27021291 - }, - { - "name": "Gp0127639_metabat2 bin checkm quality assessment result", - "description": "metabat2 bin checkm quality assessment result for Gp0127639", - "data_object_type": "CheckM Statistics", - "url": "https://data.microbiomedata.org/data/nmdc:mga09wpw60/MAGs/nmdc_mga09wpw60_checkm_qa.out", - "md5_checksum": "1837710887027f94b0f25208edb35cbe", - "id": "nmdc:1837710887027f94b0f25208edb35cbe", - "file_size_bytes": 1570 - }, - { - "name": "Gp0127639_high-quality and medium-quality bins", - "description": "high-quality and medium-quality bins for Gp0127639", - "data_object_type": "Metagenome Bins", - "url": "https://data.microbiomedata.org/data/nmdc:mga09wpw60/MAGs/nmdc_mga09wpw60_hqmq_bin.zip", - "md5_checksum": "7072cfd6665082a95b2c09a4bc88760c", - "id": "nmdc:7072cfd6665082a95b2c09a4bc88760c", - "file_size_bytes": 182 - }, - { - "name": "Gp0127639_metabat2 bins", - "description": "metabat2 bins for Gp0127639", - "url": "https://data.microbiomedata.org/data/nmdc:mga09wpw60/MAGs/nmdc_mga09wpw60_metabat_bin.zip", - "md5_checksum": "b0db190d9d1093ef87a5efb8a600e9ef", - "id": "nmdc:b0db190d9d1093ef87a5efb8a600e9ef", - "file_size_bytes": 1000457 - } + "ended_at_time": "2021-12-02T20:50:24+00:00" + } + ], + "material_sample_set": [], + "material_sampling_activity_set": [], + "metabolomics_analysis_activity_set": [], + "metagenome_annotation_activity_set": [ + { + "_id": { + "$oid": "649b005bbf2caae0415ef9ba" + }, + "has_input": [ + "nmdc:9c2c077dd8f43350b83c1c1ba853bbbc" + ], + "part_of": [ + "nmdc:mga0cvxk30" + ], + "git_url": "https://github.com/microbiomedata/metaG/releases/tag/0.1", + "has_output": [ + "nmdc:e6270776fe3cb9f4e8e2958f9d8d6151", + "nmdc:f442172aba544a550f1e294bc615fd1d", + "nmdc:c0f7ac45facbbb7b74bb7ce11af11910", + "nmdc:63db41425c31ceda578a9e2a801dcb98", + "nmdc:1cf9336281454b1747a86f9877f47ce8", + "nmdc:1cb7ab56a921ed80d21dad5b2d41c139", + "nmdc:157326e95b92fa83ab5755c22acf5837", + "nmdc:f001a06864e30347885e5a76ae89ae92", + "nmdc:aa1e3207b62ca31a87da28ad4c3e6e92", + "nmdc:5119eebdfebd43b4af243a61cc8e45eb", + "nmdc:4e6178de376e5e228c8b5c17ce3d0621", + "nmdc:d89f026da3dfb4ee7d4884a47ce5739d" + ], + "was_informed_by": "gold:Gp0127642", + "id": "nmdc:2de70a234a7c626b9f512b8aa3b73717", + "execution_resource": "NERSC-Cori", + "name": "Annotation Activity for nmdc:mga0cvxk30", + "started_at_time": "2021-12-01T21:30:33Z", + "type": "nmdc:MetagenomeAnnotationActivity", + "ended_at_time": "2021-12-02T20:50:24+00:00" + } + ], + "metagenome_assembly_set": [ + { + "_id": { + "$oid": "649b005f2ca5ee4adb139fa7" + }, + "has_input": [ + "nmdc:603166d1e0da357d356a2029215d76ea" + ], + "part_of": [ + "nmdc:mga0cvxk30" + ], + "ctg_logsum": 50653, + "scaf_logsum": 50816, + "gap_pct": 0.00106, + "git_url": "https://github.com/microbiomedata/metaG/releases/tag/0.1", + "has_output": [ + "nmdc:9c2c077dd8f43350b83c1c1ba853bbbc", + "nmdc:9a3dfedede65ba1253a84264492e909c", + "nmdc:0772cb4473177c4e0046c7fd9cb65b27", + "nmdc:7d0ccfaeac8981d1300b8c17abed052b", + "nmdc:a5b5801b13f062bc09a1405d0a01e6ac" + ], + "asm_score": 7.947, + "was_informed_by": "gold:Gp0127642", + "ctg_powsum": 5974.26, + "scaf_max": 27286, + "id": "nmdc:2de70a234a7c626b9f512b8aa3b73717", + "scaf_powsum": 5993.216, + "execution_resource": "NERSC-Cori", + "contigs": 103206, + "name": "Assembly Activity for nmdc:mga0cvxk30", + "ctg_max": 27286, + "gc_std": 0.1028, + "gc_avg": 0.60377, + "contig_bp": 40567169, + "started_at_time": "2021-12-01T21:30:33Z", + "scaf_bp": 40567599, + "type": "nmdc:MetagenomeAssembly", + "scaffolds": 103181, + "ended_at_time": "2021-12-02T20:50:24+00:00", + "ctg_l50": 348, + "ctg_l90": 283, + "ctg_n50": 35487, + "ctg_n90": 88775, + "scaf_l50": 348, + "scaf_l90": 283, + "scaf_n50": 35472, + "scaf_n90": 88751 + } + ], + "metagenome_sequencing_activity_set": [], + "metaproteomics_analysis_activity_set": [], + "metatranscriptome_activity_set": [], + "nom_analysis_activity_set": [], + "omics_processing_set": [ + { + "_id": { + "$oid": "649b009773e8249959349b59" + }, + "id": "nmdc:omprc-11-p21wp875", + "name": "Riverbed sediment microbial communities from areas with no vegetation in Columbia River, Washington, USA - GW-RW N2_0_30", + "description": "Riverbed sediment samples were collected from an area with no vegetation in a hyporheic zone (subsurface groundwater - surface water mixing zone)", + "has_input": [ + "nmdc:bsm-11-qpve9v25" + ], + "has_output": [ + "jgi:574fe0a67ded5e3df1ee148d" + ], + "part_of": [ + "nmdc:sty-11-aygzgv51" + ], + "add_date": "2016-01-11", + "mod_date": "2021-06-15", + "ncbi_project_name": "Riverbed sediment microbial communities from areas with no vegetation in Columbia River, Washington, USA - GW-RW N2_0_30", + "omics_type": { + "has_raw_value": "Metagenome" + }, + "principal_investigator": { + "has_raw_value": "James Stegen" + }, + "processing_institution": "JGI", + "type": "nmdc:OmicsProcessing", + "gold_sequencing_project_identifiers": [ + "gold:Gp0127642" ] } - ] - }, - { - "_id": { - "$oid": "649b009773e8249959349b59" - }, - "id": "nmdc:omprc-11-p21wp875", - "name": "Riverbed sediment microbial communities from areas with no vegetation in Columbia River, Washington, USA - GW-RW N2_0_30", - "description": "Riverbed sediment samples were collected from an area with no vegetation in a hyporheic zone (subsurface groundwater - surface water mixing zone)", - "has_input": [ - "nmdc:bsm-11-qpve9v25" - ], - "has_output": [ - "jgi:574fe0a67ded5e3df1ee148d" - ], - "part_of": [ - "nmdc:sty-11-aygzgv51" - ], - "add_date": "2016-01-11", - "mod_date": "2021-06-15", - "ncbi_project_name": "Riverbed sediment microbial communities from areas with no vegetation in Columbia River, Washington, USA - GW-RW N2_0_30", - "omics_type": { - "has_raw_value": "Metagenome" - }, - "principal_investigator": { - "has_raw_value": "James Stegen" - }, - "processing_institution": "JGI", - "type": "nmdc:OmicsProcessing", - "gold_sequencing_project_identifiers": [ - "gold:Gp0127642" - ], - "downstream_workflow_activity_records": [ + ], + "reaction_activity_set": [], + "read_qc_analysis_activity_set": [ { "_id": { "$oid": "649b009d6bdd4fd20273c874" @@ -25754,28 +27279,10 @@ "output_read_count": 27378404, "started_at_time": "2021-12-01T21:30:33Z", "type": "nmdc:ReadQCAnalysisActivity", - "ended_at_time": "2021-12-02T20:50:24+00:00", - "output_data_objects": [ - { - "name": "Gp0127642_Filtered Reads", - "description": "Filtered Reads for Gp0127642", - "data_object_type": "Filtered Sequencing Reads", - "url": "https://data.microbiomedata.org/data/nmdc:mga0cvxk30/qa/nmdc_mga0cvxk30_filtered.fastq.gz", - "md5_checksum": "603166d1e0da357d356a2029215d76ea", - "id": "nmdc:603166d1e0da357d356a2029215d76ea", - "file_size_bytes": 2304174057 - }, - { - "name": "Gp0127642_Filtered Stats", - "description": "Filtered Stats for Gp0127642", - "data_object_type": "QC Statistics", - "url": "https://data.microbiomedata.org/data/nmdc:mga0cvxk30/qa/nmdc_mga0cvxk30_filterStats.txt", - "md5_checksum": "639d9630c859c9b2f6f7a2eff1e1a863", - "id": "nmdc:639d9630c859c9b2f6f7a2eff1e1a863", - "file_size_bytes": 284 - } - ] - }, + "ended_at_time": "2021-12-02T20:50:24+00:00" + } + ], + "read_based_taxonomy_analysis_activity_set": [ { "_id": { "$oid": "649b009bff710ae353f8cf3b" @@ -25801,89 +27308,16 @@ "name": "ReadBased Analysis Activity for nmdc:mga0cvxk30", "started_at_time": "2021-12-01T21:30:33Z", "type": "nmdc:ReadbasedAnalysis", - "ended_at_time": "2021-12-02T20:50:24+00:00", - "output_data_objects": [ - { - "name": "Gp0127642_Gottcha2 TSV report", - "description": "Gottcha2 TSV report for Gp0127642", - "url": "https://data.microbiomedata.org/data/nmdc:mga0cvxk30/ReadbasedAnalysis/nmdc_mga0cvxk30_gottcha2_report.tsv", - "md5_checksum": "bc7f6a9435c3a9aaca7ce9efe9d16e41", - "id": "nmdc:bc7f6a9435c3a9aaca7ce9efe9d16e41", - "file_size_bytes": 5303 - }, - { - "name": "Gp0127642_Gottcha2 full TSV report", - "description": "Gottcha2 full TSV report for Gp0127642", - "url": "https://data.microbiomedata.org/data/nmdc:mga0cvxk30/ReadbasedAnalysis/nmdc_mga0cvxk30_gottcha2_report_full.tsv", - "md5_checksum": "0a079e34648ce23b0837dff31e2be5df", - "id": "nmdc:0a079e34648ce23b0837dff31e2be5df", - "file_size_bytes": 948120 - }, - { - "name": "Gp0127642_Gottcha2 Krona HTML report", - "description": "Gottcha2 Krona HTML report for Gp0127642", - "data_object_type": "GOTTCHA2 Krona Plot", - "url": "https://data.microbiomedata.org/data/nmdc:mga0cvxk30/ReadbasedAnalysis/nmdc_mga0cvxk30_gottcha2_krona.html", - "md5_checksum": "f19bf1723f0f0e9f2158b137d2618b08", - "id": "nmdc:f19bf1723f0f0e9f2158b137d2618b08", - "file_size_bytes": 241990 - }, - { - "name": "Gp0127642_Centrifuge classification TSV report", - "description": "Centrifuge classification TSV report for Gp0127642", - "data_object_type": "Centrifuge Taxonomic Classification", - "url": "https://data.microbiomedata.org/data/nmdc:mga0cvxk30/ReadbasedAnalysis/nmdc_mga0cvxk30_centrifuge_classification.tsv", - "md5_checksum": "81fc62d01a53a7ab5037829a158f0b64", - "id": "nmdc:81fc62d01a53a7ab5037829a158f0b64", - "file_size_bytes": 2023464022 - }, - { - "name": "Gp0127642_Centrifuge TSV report", - "description": "Centrifuge TSV report for Gp0127642", - "data_object_type": "Centrifuge Classification Report", - "url": "https://data.microbiomedata.org/data/nmdc:mga0cvxk30/ReadbasedAnalysis/nmdc_mga0cvxk30_centrifuge_report.tsv", - "md5_checksum": "05cc05eefdcb0d7bac19031619244a4b", - "id": "nmdc:05cc05eefdcb0d7bac19031619244a4b", - "file_size_bytes": 257700 - }, - { - "name": "Gp0127642_Centrifuge Krona HTML report", - "description": "Centrifuge Krona HTML report for Gp0127642", - "data_object_type": "Centrifuge Krona Plot", - "url": "https://data.microbiomedata.org/data/nmdc:mga0cvxk30/ReadbasedAnalysis/nmdc_mga0cvxk30_centrifuge_krona.html", - "md5_checksum": "bb92f0d18280f32aacf482a43a841372", - "id": "nmdc:bb92f0d18280f32aacf482a43a841372", - "file_size_bytes": 2339227 - }, - { - "name": "Gp0127642_Kraken classification TSV report", - "description": "Kraken classification TSV report for Gp0127642", - "data_object_type": "Kraken2 Taxonomic Classification", - "url": "https://data.microbiomedata.org/data/nmdc:mga0cvxk30/ReadbasedAnalysis/nmdc_mga0cvxk30_kraken2_classification.tsv", - "md5_checksum": "2fddd33160498548fa73e95dfc304d1a", - "id": "nmdc:2fddd33160498548fa73e95dfc304d1a", - "file_size_bytes": 1630988221 - }, - { - "name": "Gp0127642_Kraken2 TSV report", - "description": "Kraken2 TSV report for Gp0127642", - "data_object_type": "Kraken2 Classification Report", - "url": "https://data.microbiomedata.org/data/nmdc:mga0cvxk30/ReadbasedAnalysis/nmdc_mga0cvxk30_kraken2_report.tsv", - "md5_checksum": "272e3daee292c6e284026ee95b72d290", - "id": "nmdc:272e3daee292c6e284026ee95b72d290", - "file_size_bytes": 659136 - }, - { - "name": "Gp0127642_Kraken2 Krona HTML report", - "description": "Kraken2 Krona HTML report for Gp0127642", - "data_object_type": "Kraken2 Krona Plot", - "url": "https://data.microbiomedata.org/data/nmdc:mga0cvxk30/ReadbasedAnalysis/nmdc_mga0cvxk30_kraken2_krona.html", - "md5_checksum": "bca8c2988929e7c176ec7b6609445db2", - "id": "nmdc:bca8c2988929e7c176ec7b6609445db2", - "file_size_bytes": 4013188 - } - ] - }, + "ended_at_time": "2021-12-02T20:50:24+00:00" + } + ], + "study_set": [], + "field_research_site_set": [], + "collecting_biosamples_from_site_set": [], + "date_created": null, + "etl_software_version": null, + "pooling_set": [], + "read_based_analysis_activity_set": [ { "_id": { "$oid": "61e7199f833bcf838a700f38" @@ -25912,422 +27346,580 @@ "name": "ReadBased Analysis Activity for nmdc:mga0cvxk30", "started_at_time": "2021-12-01T21:30:33Z", "type": "nmdc:ReadbasedAnalysis", - "ended_at_time": "2021-12-02T20:50:24+00:00", - "output_data_objects": [ - { - "name": "Gp0127642_Gottcha2 TSV report", - "description": "Gottcha2 TSV report for Gp0127642", - "url": "https://data.microbiomedata.org/data/nmdc:mga0cvxk30/ReadbasedAnalysis/nmdc_mga0cvxk30_gottcha2_report.tsv", - "md5_checksum": "bc7f6a9435c3a9aaca7ce9efe9d16e41", - "id": "nmdc:bc7f6a9435c3a9aaca7ce9efe9d16e41", - "file_size_bytes": 5303 - }, - { - "name": "Gp0127642_Gottcha2 full TSV report", - "description": "Gottcha2 full TSV report for Gp0127642", - "url": "https://data.microbiomedata.org/data/nmdc:mga0cvxk30/ReadbasedAnalysis/nmdc_mga0cvxk30_gottcha2_report_full.tsv", - "md5_checksum": "0a079e34648ce23b0837dff31e2be5df", - "id": "nmdc:0a079e34648ce23b0837dff31e2be5df", - "file_size_bytes": 948120 - }, - { - "name": "Gp0127642_Gottcha2 Krona HTML report", - "description": "Gottcha2 Krona HTML report for Gp0127642", - "data_object_type": "GOTTCHA2 Krona Plot", - "url": "https://data.microbiomedata.org/data/nmdc:mga0cvxk30/ReadbasedAnalysis/nmdc_mga0cvxk30_gottcha2_krona.html", - "md5_checksum": "f19bf1723f0f0e9f2158b137d2618b08", - "id": "nmdc:f19bf1723f0f0e9f2158b137d2618b08", - "file_size_bytes": 241990 - }, - { - "name": "Gp0127642_Centrifuge classification TSV report", - "description": "Centrifuge classification TSV report for Gp0127642", - "data_object_type": "Centrifuge Taxonomic Classification", - "url": "https://data.microbiomedata.org/data/nmdc:mga0cvxk30/ReadbasedAnalysis/nmdc_mga0cvxk30_centrifuge_classification.tsv", - "md5_checksum": "81fc62d01a53a7ab5037829a158f0b64", - "id": "nmdc:81fc62d01a53a7ab5037829a158f0b64", - "file_size_bytes": 2023464022 - }, - { - "name": "Gp0127642_Centrifuge TSV report", - "description": "Centrifuge TSV report for Gp0127642", - "data_object_type": "Centrifuge Classification Report", - "url": "https://data.microbiomedata.org/data/nmdc:mga0cvxk30/ReadbasedAnalysis/nmdc_mga0cvxk30_centrifuge_report.tsv", - "md5_checksum": "05cc05eefdcb0d7bac19031619244a4b", - "id": "nmdc:05cc05eefdcb0d7bac19031619244a4b", - "file_size_bytes": 257700 - }, - { - "name": "Gp0127642_Centrifuge Krona HTML report", - "description": "Centrifuge Krona HTML report for Gp0127642", - "data_object_type": "Centrifuge Krona Plot", - "url": "https://data.microbiomedata.org/data/nmdc:mga0cvxk30/ReadbasedAnalysis/nmdc_mga0cvxk30_centrifuge_krona.html", - "md5_checksum": "bb92f0d18280f32aacf482a43a841372", - "id": "nmdc:bb92f0d18280f32aacf482a43a841372", - "file_size_bytes": 2339227 - }, - { - "name": "Gp0127642_Kraken classification TSV report", - "description": "Kraken classification TSV report for Gp0127642", - "data_object_type": "Kraken2 Taxonomic Classification", - "url": "https://data.microbiomedata.org/data/nmdc:mga0cvxk30/ReadbasedAnalysis/nmdc_mga0cvxk30_kraken2_classification.tsv", - "md5_checksum": "2fddd33160498548fa73e95dfc304d1a", - "id": "nmdc:2fddd33160498548fa73e95dfc304d1a", - "file_size_bytes": 1630988221 - }, - { - "name": "Gp0127642_Kraken2 TSV report", - "description": "Kraken2 TSV report for Gp0127642", - "data_object_type": "Kraken2 Classification Report", - "url": "https://data.microbiomedata.org/data/nmdc:mga0cvxk30/ReadbasedAnalysis/nmdc_mga0cvxk30_kraken2_report.tsv", - "md5_checksum": "272e3daee292c6e284026ee95b72d290", - "id": "nmdc:272e3daee292c6e284026ee95b72d290", - "file_size_bytes": 659136 - }, - { - "name": "Gp0127642_Kraken2 Krona HTML report", - "description": "Kraken2 Krona HTML report for Gp0127642", - "data_object_type": "Kraken2 Krona Plot", - "url": "https://data.microbiomedata.org/data/nmdc:mga0cvxk30/ReadbasedAnalysis/nmdc_mga0cvxk30_kraken2_krona.html", - "md5_checksum": "bca8c2988929e7c176ec7b6609445db2", - "id": "nmdc:bca8c2988929e7c176ec7b6609445db2", - "file_size_bytes": 4013188 - } - ] + "ended_at_time": "2021-12-02T20:50:24+00:00" + } + ] + }, + { + "functional_annotation_agg": [], + "library_preparation_set": [], + "processed_sample_set": [], + "extraction_set": [], + "activity_set": [], + "biosample_set": [], + "data_object_set": [ + { + "name": "Gp0127646_Filtered Reads", + "description": "Filtered Reads for Gp0127646", + "data_object_type": "Filtered Sequencing Reads", + "url": "https://data.microbiomedata.org/data/nmdc:mga0dm4q17/qa/nmdc_mga0dm4q17_filtered.fastq.gz", + "md5_checksum": "208a3777ef0b99408f0d5832dee576e0", + "id": "nmdc:208a3777ef0b99408f0d5832dee576e0", + "file_size_bytes": 2209739723 + }, + { + "name": "Gp0127646_Filtered Stats", + "description": "Filtered Stats for Gp0127646", + "data_object_type": "QC Statistics", + "url": "https://data.microbiomedata.org/data/nmdc:mga0dm4q17/qa/nmdc_mga0dm4q17_filterStats.txt", + "md5_checksum": "8533a56006bdc1841b6fc16e99b6a84a", + "id": "nmdc:8533a56006bdc1841b6fc16e99b6a84a", + "file_size_bytes": 291 + }, + { + "name": "Gp0127646_Gottcha2 TSV report", + "description": "Gottcha2 TSV report for Gp0127646", + "url": "https://data.microbiomedata.org/data/nmdc:mga0dm4q17/ReadbasedAnalysis/nmdc_mga0dm4q17_gottcha2_report.tsv", + "md5_checksum": "3e0598df41941463bac0fdec5df29f55", + "id": "nmdc:3e0598df41941463bac0fdec5df29f55", + "file_size_bytes": 4650 + }, + { + "name": "Gp0127646_Gottcha2 full TSV report", + "description": "Gottcha2 full TSV report for Gp0127646", + "url": "https://data.microbiomedata.org/data/nmdc:mga0dm4q17/ReadbasedAnalysis/nmdc_mga0dm4q17_gottcha2_report_full.tsv", + "md5_checksum": "1a625b148d8f6d9fe9aeab6cfb67df6c", + "id": "nmdc:1a625b148d8f6d9fe9aeab6cfb67df6c", + "file_size_bytes": 877659 + }, + { + "name": "Gp0127646_Gottcha2 Krona HTML report", + "description": "Gottcha2 Krona HTML report for Gp0127646", + "data_object_type": "GOTTCHA2 Krona Plot", + "url": "https://data.microbiomedata.org/data/nmdc:mga0dm4q17/ReadbasedAnalysis/nmdc_mga0dm4q17_gottcha2_krona.html", + "md5_checksum": "bc8e157195d042d7207d67b4982fea96", + "id": "nmdc:bc8e157195d042d7207d67b4982fea96", + "file_size_bytes": 236676 + }, + { + "name": "Gp0127646_Centrifuge classification TSV report", + "description": "Centrifuge classification TSV report for Gp0127646", + "data_object_type": "Centrifuge Taxonomic Classification", + "url": "https://data.microbiomedata.org/data/nmdc:mga0dm4q17/ReadbasedAnalysis/nmdc_mga0dm4q17_centrifuge_classification.tsv", + "md5_checksum": "a8fc683bb9b3aba316cb605c5fb591ec", + "id": "nmdc:a8fc683bb9b3aba316cb605c5fb591ec", + "file_size_bytes": 1901493736 + }, + { + "name": "Gp0127646_Centrifuge TSV report", + "description": "Centrifuge TSV report for Gp0127646", + "data_object_type": "Centrifuge Classification Report", + "url": "https://data.microbiomedata.org/data/nmdc:mga0dm4q17/ReadbasedAnalysis/nmdc_mga0dm4q17_centrifuge_report.tsv", + "md5_checksum": "b5fe0189dbf00662d78cc55b8b0cc803", + "id": "nmdc:b5fe0189dbf00662d78cc55b8b0cc803", + "file_size_bytes": 256274 + }, + { + "name": "Gp0127646_Centrifuge Krona HTML report", + "description": "Centrifuge Krona HTML report for Gp0127646", + "data_object_type": "Centrifuge Krona Plot", + "url": "https://data.microbiomedata.org/data/nmdc:mga0dm4q17/ReadbasedAnalysis/nmdc_mga0dm4q17_centrifuge_krona.html", + "md5_checksum": "cd10cca62774e66f60d60380ee18132e", + "id": "nmdc:cd10cca62774e66f60d60380ee18132e", + "file_size_bytes": 2333722 + }, + { + "name": "Gp0127646_Kraken classification TSV report", + "description": "Kraken classification TSV report for Gp0127646", + "data_object_type": "Kraken2 Taxonomic Classification", + "url": "https://data.microbiomedata.org/data/nmdc:mga0dm4q17/ReadbasedAnalysis/nmdc_mga0dm4q17_kraken2_classification.tsv", + "md5_checksum": "b13ee2ee52d15c3669aecd2e913f2658", + "id": "nmdc:b13ee2ee52d15c3669aecd2e913f2658", + "file_size_bytes": 1534616616 + }, + { + "name": "Gp0127646_Kraken2 TSV report", + "description": "Kraken2 TSV report for Gp0127646", + "data_object_type": "Kraken2 Classification Report", + "url": "https://data.microbiomedata.org/data/nmdc:mga0dm4q17/ReadbasedAnalysis/nmdc_mga0dm4q17_kraken2_report.tsv", + "md5_checksum": "09a2d722810b3d90207bc4cfa626133b", + "id": "nmdc:09a2d722810b3d90207bc4cfa626133b", + "file_size_bytes": 663507 + }, + { + "name": "Gp0127646_Kraken2 Krona HTML report", + "description": "Kraken2 Krona HTML report for Gp0127646", + "data_object_type": "Kraken2 Krona Plot", + "url": "https://data.microbiomedata.org/data/nmdc:mga0dm4q17/ReadbasedAnalysis/nmdc_mga0dm4q17_kraken2_krona.html", + "md5_checksum": "c3a8d9f48266a43ad74fc581132e2bba", + "id": "nmdc:c3a8d9f48266a43ad74fc581132e2bba", + "file_size_bytes": 4031909 + }, + { + "name": "Gp0127646_Gottcha2 TSV report", + "description": "Gottcha2 TSV report for Gp0127646", + "url": "https://data.microbiomedata.org/data/nmdc:mga0dm4q17/ReadbasedAnalysis/nmdc_mga0dm4q17_gottcha2_report.tsv", + "md5_checksum": "3e0598df41941463bac0fdec5df29f55", + "id": "nmdc:3e0598df41941463bac0fdec5df29f55", + "file_size_bytes": 4650 + }, + { + "name": "Gp0127646_Gottcha2 full TSV report", + "description": "Gottcha2 full TSV report for Gp0127646", + "url": "https://data.microbiomedata.org/data/nmdc:mga0dm4q17/ReadbasedAnalysis/nmdc_mga0dm4q17_gottcha2_report_full.tsv", + "md5_checksum": "1a625b148d8f6d9fe9aeab6cfb67df6c", + "id": "nmdc:1a625b148d8f6d9fe9aeab6cfb67df6c", + "file_size_bytes": 877659 + }, + { + "name": "Gp0127646_Gottcha2 Krona HTML report", + "description": "Gottcha2 Krona HTML report for Gp0127646", + "data_object_type": "GOTTCHA2 Krona Plot", + "url": "https://data.microbiomedata.org/data/nmdc:mga0dm4q17/ReadbasedAnalysis/nmdc_mga0dm4q17_gottcha2_krona.html", + "md5_checksum": "bc8e157195d042d7207d67b4982fea96", + "id": "nmdc:bc8e157195d042d7207d67b4982fea96", + "file_size_bytes": 236676 + }, + { + "name": "Gp0127646_Centrifuge classification TSV report", + "description": "Centrifuge classification TSV report for Gp0127646", + "data_object_type": "Centrifuge Taxonomic Classification", + "url": "https://data.microbiomedata.org/data/nmdc:mga0dm4q17/ReadbasedAnalysis/nmdc_mga0dm4q17_centrifuge_classification.tsv", + "md5_checksum": "a8fc683bb9b3aba316cb605c5fb591ec", + "id": "nmdc:a8fc683bb9b3aba316cb605c5fb591ec", + "file_size_bytes": 1901493736 + }, + { + "name": "Gp0127646_Centrifuge TSV report", + "description": "Centrifuge TSV report for Gp0127646", + "data_object_type": "Centrifuge Classification Report", + "url": "https://data.microbiomedata.org/data/nmdc:mga0dm4q17/ReadbasedAnalysis/nmdc_mga0dm4q17_centrifuge_report.tsv", + "md5_checksum": "b5fe0189dbf00662d78cc55b8b0cc803", + "id": "nmdc:b5fe0189dbf00662d78cc55b8b0cc803", + "file_size_bytes": 256274 + }, + { + "name": "Gp0127646_Centrifuge Krona HTML report", + "description": "Centrifuge Krona HTML report for Gp0127646", + "data_object_type": "Centrifuge Krona Plot", + "url": "https://data.microbiomedata.org/data/nmdc:mga0dm4q17/ReadbasedAnalysis/nmdc_mga0dm4q17_centrifuge_krona.html", + "md5_checksum": "cd10cca62774e66f60d60380ee18132e", + "id": "nmdc:cd10cca62774e66f60d60380ee18132e", + "file_size_bytes": 2333722 + }, + { + "name": "Gp0127646_Kraken classification TSV report", + "description": "Kraken classification TSV report for Gp0127646", + "data_object_type": "Kraken2 Taxonomic Classification", + "url": "https://data.microbiomedata.org/data/nmdc:mga0dm4q17/ReadbasedAnalysis/nmdc_mga0dm4q17_kraken2_classification.tsv", + "md5_checksum": "b13ee2ee52d15c3669aecd2e913f2658", + "id": "nmdc:b13ee2ee52d15c3669aecd2e913f2658", + "file_size_bytes": 1534616616 + }, + { + "name": "Gp0127646_Kraken2 TSV report", + "description": "Kraken2 TSV report for Gp0127646", + "data_object_type": "Kraken2 Classification Report", + "url": "https://data.microbiomedata.org/data/nmdc:mga0dm4q17/ReadbasedAnalysis/nmdc_mga0dm4q17_kraken2_report.tsv", + "md5_checksum": "09a2d722810b3d90207bc4cfa626133b", + "id": "nmdc:09a2d722810b3d90207bc4cfa626133b", + "file_size_bytes": 663507 + }, + { + "name": "Gp0127646_Kraken2 Krona HTML report", + "description": "Kraken2 Krona HTML report for Gp0127646", + "data_object_type": "Kraken2 Krona Plot", + "url": "https://data.microbiomedata.org/data/nmdc:mga0dm4q17/ReadbasedAnalysis/nmdc_mga0dm4q17_kraken2_krona.html", + "md5_checksum": "c3a8d9f48266a43ad74fc581132e2bba", + "id": "nmdc:c3a8d9f48266a43ad74fc581132e2bba", + "file_size_bytes": 4031909 + }, + { + "name": "Gp0127646_Assembled contigs fasta", + "description": "Assembled contigs fasta for Gp0127646", + "data_object_type": "Assembly Contigs", + "url": "https://data.microbiomedata.org/data/nmdc:mga0dm4q17/assembly/nmdc_mga0dm4q17_contigs.fna", + "md5_checksum": "c2301a45b987661e5e6f32eaf6928003", + "id": "nmdc:c2301a45b987661e5e6f32eaf6928003", + "file_size_bytes": 33070670 + }, + { + "name": "Gp0127646_Assembled scaffold fasta", + "description": "Assembled scaffold fasta for Gp0127646", + "data_object_type": "Assembly Scaffolds", + "url": "https://data.microbiomedata.org/data/nmdc:mga0dm4q17/assembly/nmdc_mga0dm4q17_scaffolds.fna", + "md5_checksum": "6233a266773aa722d6a3c2556b0c1cb5", + "id": "nmdc:6233a266773aa722d6a3c2556b0c1cb5", + "file_size_bytes": 32825592 + }, + { + "name": "Gp0127646_Metagenome Contig Coverage Stats", + "description": "Metagenome Contig Coverage Stats for Gp0127646", + "url": "https://data.microbiomedata.org/data/nmdc:mga0dm4q17/assembly/nmdc_mga0dm4q17_covstats.txt", + "md5_checksum": "c5460716df8c1d47e081837c8cc5d281", + "id": "nmdc:c5460716df8c1d47e081837c8cc5d281", + "file_size_bytes": 6393678 + }, + { + "name": "Gp0127646_Assembled AGP file", + "description": "Assembled AGP file for Gp0127646", + "data_object_type": "Assembly AGP", + "url": "https://data.microbiomedata.org/data/nmdc:mga0dm4q17/assembly/nmdc_mga0dm4q17_assembly.agp", + "md5_checksum": "9437132a95f356e7cc6513f862f38f81", + "id": "nmdc:9437132a95f356e7cc6513f862f38f81", + "file_size_bytes": 5942403 + }, + { + "name": "Gp0127646_Metagenome Alignment BAM file", + "description": "Metagenome Alignment BAM file for Gp0127646", + "data_object_type": "Assembly Coverage BAM", + "url": "https://data.microbiomedata.org/data/nmdc:mga0dm4q17/assembly/nmdc_mga0dm4q17_pairedMapped_sorted.bam", + "md5_checksum": "0d0ee85be3a079b0eba5bb872c842f7d", + "id": "nmdc:0d0ee85be3a079b0eba5bb872c842f7d", + "file_size_bytes": 2346665933 + }, + { + "name": "Gp0127646_Protein FAA", + "description": "Protein FAA for Gp0127646", + "data_object_type": "Annotation Amino Acid FASTA", + "url": "https://data.microbiomedata.org/data/nmdc:mga0dm4q17/annotation/nmdc_mga0dm4q17_proteins.faa", + "md5_checksum": "b907352a805a209c5b7e10f6ce9e3ceb", + "id": "nmdc:b907352a805a209c5b7e10f6ce9e3ceb", + "file_size_bytes": 18886480 + }, + { + "name": "Gp0127646_Structural annotation GFF file", + "description": "Structural annotation GFF file for Gp0127646", + "data_object_type": "Structural Annotation GFF", + "url": "https://data.microbiomedata.org/data/nmdc:mga0dm4q17/annotation/nmdc_mga0dm4q17_structural_annotation.gff", + "md5_checksum": "769c049c4b3301900de0c62666e8c297", + "id": "nmdc:769c049c4b3301900de0c62666e8c297", + "file_size_bytes": 2883 + }, + { + "name": "Gp0127646_Functional annotation GFF file", + "description": "Functional annotation GFF file for Gp0127646", + "data_object_type": "Functional Annotation GFF", + "url": "https://data.microbiomedata.org/data/nmdc:mga0dm4q17/annotation/nmdc_mga0dm4q17_functional_annotation.gff", + "md5_checksum": "3dcb9f83f3921fc7f3e7a2050584cc77", + "id": "nmdc:3dcb9f83f3921fc7f3e7a2050584cc77", + "file_size_bytes": 23048582 + }, + { + "name": "Gp0127646_KO TSV file", + "description": "KO TSV file for Gp0127646", + "data_object_type": "Annotation KEGG Orthology", + "url": "https://data.microbiomedata.org/data/nmdc:mga0dm4q17/annotation/nmdc_mga0dm4q17_ko.tsv", + "md5_checksum": "1b81cc955690e81f18c2bc1533e7ee89", + "id": "nmdc:1b81cc955690e81f18c2bc1533e7ee89", + "file_size_bytes": 2643070 + }, + { + "name": "Gp0127646_EC TSV file", + "description": "EC TSV file for Gp0127646", + "data_object_type": "Annotation Enzyme Commission", + "url": "https://data.microbiomedata.org/data/nmdc:mga0dm4q17/annotation/nmdc_mga0dm4q17_ec.tsv", + "md5_checksum": "dd94ee1dbd107bf14e8be72b8f546290", + "id": "nmdc:dd94ee1dbd107bf14e8be72b8f546290", + "file_size_bytes": 1742846 + }, + { + "name": "Gp0127646_COG GFF file", + "description": "COG GFF file for Gp0127646", + "url": "https://data.microbiomedata.org/data/nmdc:mga0dm4q17/annotation/nmdc_mga0dm4q17_cog.gff", + "md5_checksum": "e271f0ef1c44b514304c35a7913751e3", + "id": "nmdc:e271f0ef1c44b514304c35a7913751e3", + "file_size_bytes": 12090733 + }, + { + "name": "Gp0127646_PFAM GFF file", + "description": "PFAM GFF file for Gp0127646", + "url": "https://data.microbiomedata.org/data/nmdc:mga0dm4q17/annotation/nmdc_mga0dm4q17_pfam.gff", + "md5_checksum": "b3d3f1ef308b7555cbea077cc00dbc95", + "id": "nmdc:b3d3f1ef308b7555cbea077cc00dbc95", + "file_size_bytes": 8631888 + }, + { + "name": "Gp0127646_TigrFam GFF file", + "description": "TigrFam GFF file for Gp0127646", + "url": "https://data.microbiomedata.org/data/nmdc:mga0dm4q17/annotation/nmdc_mga0dm4q17_tigrfam.gff", + "md5_checksum": "d18d6a67ad7e17514b0c4b502ea69ac0", + "id": "nmdc:d18d6a67ad7e17514b0c4b502ea69ac0", + "file_size_bytes": 840759 + }, + { + "name": "Gp0127646_SMART GFF file", + "description": "SMART GFF file for Gp0127646", + "url": "https://data.microbiomedata.org/data/nmdc:mga0dm4q17/annotation/nmdc_mga0dm4q17_smart.gff", + "md5_checksum": "62920faf364dea6a1d028878d49a2989", + "id": "nmdc:62920faf364dea6a1d028878d49a2989", + "file_size_bytes": 2684392 + }, + { + "name": "Gp0127646_SuperFam GFF file", + "description": "SuperFam GFF file for Gp0127646", + "url": "https://data.microbiomedata.org/data/nmdc:mga0dm4q17/annotation/nmdc_mga0dm4q17_supfam.gff", + "md5_checksum": "757bd3295026410cb03690d4dae95935", + "id": "nmdc:757bd3295026410cb03690d4dae95935", + "file_size_bytes": 15569120 + }, + { + "name": "Gp0127646_Cath FunFam GFF file", + "description": "Cath FunFam GFF file for Gp0127646", + "url": "https://data.microbiomedata.org/data/nmdc:mga0dm4q17/annotation/nmdc_mga0dm4q17_cath_funfam.gff", + "md5_checksum": "19eef79eefc81cbe6d7d4586d8be5d23", + "id": "nmdc:19eef79eefc81cbe6d7d4586d8be5d23", + "file_size_bytes": 11479737 + }, + { + "name": "Gp0127646_KO_EC GFF file", + "description": "KO_EC GFF file for Gp0127646", + "url": "https://data.microbiomedata.org/data/nmdc:mga0dm4q17/annotation/nmdc_mga0dm4q17_ko_ec.gff", + "md5_checksum": "fc8598d9d6926e6ac8bb9c488016734a", + "id": "nmdc:fc8598d9d6926e6ac8bb9c488016734a", + "file_size_bytes": 8425263 + }, + { + "name": "gold:Gp0452679_metabat2 bin checkm quality assessment result", + "description": "metabat2 bin checkm quality assessment result for gold:Gp0452679", + "data_object_type": "CheckM Statistics", + "url": "https://data.microbiomedata.org/data/nmdc:mga0fsjy84/MAGs/nmdc_mga0fsjy84_checkm_qa.out", + "md5_checksum": "d41d8cd98f00b204e9800998ecf8427e", + "id": "nmdc:d41d8cd98f00b204e9800998ecf8427e", + "file_size_bytes": 0 + }, + { + "name": "Gp0127646_tooShort (< 3kb) filtered contigs fasta file by metaBat2", + "description": "tooShort (< 3kb) filtered contigs fasta file by metaBat2 for Gp0127646", + "url": "https://data.microbiomedata.org/data/nmdc:mga0dm4q17/MAGs/nmdc_mga0dm4q17_bins.tooShort.fa", + "md5_checksum": "de605dd3ecac26d6a35740c09448b171", + "id": "nmdc:de605dd3ecac26d6a35740c09448b171", + "file_size_bytes": 31210054 + }, + { + "name": "Gp0127646_unbinned fasta file from metabat2", + "description": "unbinned fasta file from metabat2 for Gp0127646", + "url": "https://data.microbiomedata.org/data/nmdc:mga0dm4q17/MAGs/nmdc_mga0dm4q17_bins.unbinned.fa", + "md5_checksum": "9392ab9668a1c347f010004c2f0cc8db", + "id": "nmdc:9392ab9668a1c347f010004c2f0cc8db", + "file_size_bytes": 1595698 + }, + { + "name": "Gp0127647_metabat2 bin checkm quality assessment result", + "description": "metabat2 bin checkm quality assessment result for Gp0127647", + "data_object_type": "CheckM Statistics", + "url": "https://data.microbiomedata.org/data/nmdc:mga0g0e588/MAGs/nmdc_mga0g0e588_checkm_qa.out", + "md5_checksum": "e8bdcd7b113a14b29a3026b73cd18c20", + "id": "nmdc:e8bdcd7b113a14b29a3026b73cd18c20", + "file_size_bytes": 775 + }, + { + "name": "Gp0127646_high-quality and medium-quality bins", + "description": "high-quality and medium-quality bins for Gp0127646", + "data_object_type": "Metagenome Bins", + "url": "https://data.microbiomedata.org/data/nmdc:mga0dm4q17/MAGs/nmdc_mga0dm4q17_hqmq_bin.zip", + "md5_checksum": "d75d0006d0009e7e14f2ad8044a3cbfb", + "id": "nmdc:d75d0006d0009e7e14f2ad8044a3cbfb", + "file_size_bytes": 182 }, + { + "name": "Gp0127646_metabat2 bins", + "description": "metabat2 bins for Gp0127646", + "url": "https://data.microbiomedata.org/data/nmdc:mga0dm4q17/MAGs/nmdc_mga0dm4q17_metabat_bin.zip", + "md5_checksum": "17e9a7763327f2b5d3f841079c2f68d8", + "id": "nmdc:17e9a7763327f2b5d3f841079c2f68d8", + "file_size_bytes": 82006 + } + ], + "dissolving_activity_set": [], + "functional_annotation_set": [], + "genome_feature_set": [], + "mags_activity_set": [ { "_id": { - "$oid": "649b005f2ca5ee4adb139fa7" + "$oid": "649b0052ec087f6bbab34715" }, "has_input": [ - "nmdc:603166d1e0da357d356a2029215d76ea" + "nmdc:c2301a45b987661e5e6f32eaf6928003", + "nmdc:0d0ee85be3a079b0eba5bb872c842f7d", + "nmdc:3dcb9f83f3921fc7f3e7a2050584cc77" ], + "too_short_contig_num": 80674, "part_of": [ - "nmdc:mga0cvxk30" + "nmdc:mga0dm4q17" ], - "ctg_logsum": 50653, - "scaf_logsum": 50816, - "gap_pct": 0.00106, + "binned_contig_num": 20, "git_url": "https://github.com/microbiomedata/metaG/releases/tag/0.1", "has_output": [ - "nmdc:9c2c077dd8f43350b83c1c1ba853bbbc", - "nmdc:9a3dfedede65ba1253a84264492e909c", - "nmdc:0772cb4473177c4e0046c7fd9cb65b27", - "nmdc:7d0ccfaeac8981d1300b8c17abed052b", - "nmdc:a5b5801b13f062bc09a1405d0a01e6ac" + "nmdc:d41d8cd98f00b204e9800998ecf8427e", + "nmdc:de605dd3ecac26d6a35740c09448b171", + "nmdc:9392ab9668a1c347f010004c2f0cc8db", + "nmdc:e8bdcd7b113a14b29a3026b73cd18c20", + "nmdc:d75d0006d0009e7e14f2ad8044a3cbfb", + "nmdc:17e9a7763327f2b5d3f841079c2f68d8" ], - "asm_score": 7.947, - "was_informed_by": "gold:Gp0127642", - "ctg_powsum": 5974.26, - "scaf_max": 27286, - "id": "nmdc:2de70a234a7c626b9f512b8aa3b73717", - "scaf_powsum": 5993.216, + "was_informed_by": "gold:Gp0127646", + "input_contig_num": 81652, + "id": "nmdc:1a0a2a1b5db9455e9dbbacf102059180", "execution_resource": "NERSC-Cori", - "contigs": 103206, - "name": "Assembly Activity for nmdc:mga0cvxk30", - "ctg_max": 27286, - "gc_std": 0.1028, - "gc_avg": 0.60377, - "contig_bp": 40567169, - "started_at_time": "2021-12-01T21:30:33Z", - "scaf_bp": 40567599, - "type": "nmdc:MetagenomeAssembly", - "scaffolds": 103181, - "ended_at_time": "2021-12-02T20:50:24+00:00", - "ctg_l50": 348, - "ctg_l90": 283, - "ctg_n50": 35487, - "ctg_n90": 88775, - "scaf_l50": 348, - "scaf_l90": 283, - "scaf_n50": 35472, - "scaf_n90": 88751, - "output_data_objects": [ - { - "name": "Gp0127642_Assembled contigs fasta", - "description": "Assembled contigs fasta for Gp0127642", - "data_object_type": "Assembly Contigs", - "url": "https://data.microbiomedata.org/data/nmdc:mga0cvxk30/assembly/nmdc_mga0cvxk30_contigs.fna", - "md5_checksum": "9c2c077dd8f43350b83c1c1ba853bbbc", - "id": "nmdc:9c2c077dd8f43350b83c1c1ba853bbbc", - "file_size_bytes": 44374790 - }, - { - "name": "Gp0127642_Assembled scaffold fasta", - "description": "Assembled scaffold fasta for Gp0127642", - "data_object_type": "Assembly Scaffolds", - "url": "https://data.microbiomedata.org/data/nmdc:mga0cvxk30/assembly/nmdc_mga0cvxk30_scaffolds.fna", - "md5_checksum": "9a3dfedede65ba1253a84264492e909c", - "id": "nmdc:9a3dfedede65ba1253a84264492e909c", - "file_size_bytes": 44064962 - }, - { - "name": "Gp0127642_Metagenome Contig Coverage Stats", - "description": "Metagenome Contig Coverage Stats for Gp0127642", - "url": "https://data.microbiomedata.org/data/nmdc:mga0cvxk30/assembly/nmdc_mga0cvxk30_covstats.txt", - "md5_checksum": "0772cb4473177c4e0046c7fd9cb65b27", - "id": "nmdc:0772cb4473177c4e0046c7fd9cb65b27", - "file_size_bytes": 8090415 - }, - { - "name": "Gp0127642_Assembled AGP file", - "description": "Assembled AGP file for Gp0127642", - "data_object_type": "Assembly AGP", - "url": "https://data.microbiomedata.org/data/nmdc:mga0cvxk30/assembly/nmdc_mga0cvxk30_assembly.agp", - "md5_checksum": "7d0ccfaeac8981d1300b8c17abed052b", - "id": "nmdc:7d0ccfaeac8981d1300b8c17abed052b", - "file_size_bytes": 7524067 - }, + "name": "MAGs Analysis Activity for nmdc:mga0dm4q17", + "mags_list": [ { - "name": "Gp0127642_Metagenome Alignment BAM file", - "description": "Metagenome Alignment BAM file for Gp0127642", - "data_object_type": "Assembly Coverage BAM", - "url": "https://data.microbiomedata.org/data/nmdc:mga0cvxk30/assembly/nmdc_mga0cvxk30_pairedMapped_sorted.bam", - "md5_checksum": "a5b5801b13f062bc09a1405d0a01e6ac", - "id": "nmdc:a5b5801b13f062bc09a1405d0a01e6ac", - "file_size_bytes": 2461892983 + "number_of_contig": 20, + "completeness": 1.36, + "bin_name": "bins.1", + "gene_count": 275, + "bin_quality": "LQ", + "gtdbtk_species": "", + "gtdbtk_order": "", + "num_16s": 1, + "gtdbtk_family": "", + "gtdbtk_domain": "", + "contamination": 0.0, + "gtdbtk_class": "", + "gtdbtk_phylum": "", + "num_5s": 0, + "num_23s": 2, + "gtdbtk_genus": "", + "num_t_rna": 10 } - ] - }, + ], + "unbinned_contig_num": 958, + "started_at_time": "2021-10-11T02:23:42Z", + "type": "nmdc:MAGsAnalysisActivity", + "ended_at_time": "2021-10-11T04:05:12+00:00" + } + ], + "material_sample_set": [], + "material_sampling_activity_set": [], + "metabolomics_analysis_activity_set": [], + "metagenome_annotation_activity_set": [ { "_id": { - "$oid": "649b005bbf2caae0415ef9ba" + "$oid": "649b005bbf2caae0415ef9b2" }, "has_input": [ - "nmdc:9c2c077dd8f43350b83c1c1ba853bbbc" + "nmdc:c2301a45b987661e5e6f32eaf6928003" ], "part_of": [ - "nmdc:mga0cvxk30" + "nmdc:mga0dm4q17" ], "git_url": "https://github.com/microbiomedata/metaG/releases/tag/0.1", "has_output": [ - "nmdc:e6270776fe3cb9f4e8e2958f9d8d6151", - "nmdc:f442172aba544a550f1e294bc615fd1d", - "nmdc:c0f7ac45facbbb7b74bb7ce11af11910", - "nmdc:63db41425c31ceda578a9e2a801dcb98", - "nmdc:1cf9336281454b1747a86f9877f47ce8", - "nmdc:1cb7ab56a921ed80d21dad5b2d41c139", - "nmdc:157326e95b92fa83ab5755c22acf5837", - "nmdc:f001a06864e30347885e5a76ae89ae92", - "nmdc:aa1e3207b62ca31a87da28ad4c3e6e92", - "nmdc:5119eebdfebd43b4af243a61cc8e45eb", - "nmdc:4e6178de376e5e228c8b5c17ce3d0621", - "nmdc:d89f026da3dfb4ee7d4884a47ce5739d" + "nmdc:b907352a805a209c5b7e10f6ce9e3ceb", + "nmdc:769c049c4b3301900de0c62666e8c297", + "nmdc:3dcb9f83f3921fc7f3e7a2050584cc77", + "nmdc:1b81cc955690e81f18c2bc1533e7ee89", + "nmdc:dd94ee1dbd107bf14e8be72b8f546290", + "nmdc:e271f0ef1c44b514304c35a7913751e3", + "nmdc:b3d3f1ef308b7555cbea077cc00dbc95", + "nmdc:d18d6a67ad7e17514b0c4b502ea69ac0", + "nmdc:62920faf364dea6a1d028878d49a2989", + "nmdc:757bd3295026410cb03690d4dae95935", + "nmdc:19eef79eefc81cbe6d7d4586d8be5d23", + "nmdc:fc8598d9d6926e6ac8bb9c488016734a" ], - "was_informed_by": "gold:Gp0127642", - "id": "nmdc:2de70a234a7c626b9f512b8aa3b73717", + "was_informed_by": "gold:Gp0127646", + "id": "nmdc:1a0a2a1b5db9455e9dbbacf102059180", "execution_resource": "NERSC-Cori", - "name": "Annotation Activity for nmdc:mga0cvxk30", - "started_at_time": "2021-12-01T21:30:33Z", + "name": "Annotation Activity for nmdc:mga0dm4q17", + "started_at_time": "2021-10-11T02:23:42Z", "type": "nmdc:MetagenomeAnnotationActivity", - "ended_at_time": "2021-12-02T20:50:24+00:00", - "output_data_objects": [ - { - "name": "Gp0127642_Protein FAA", - "description": "Protein FAA for Gp0127642", - "data_object_type": "Annotation Amino Acid FASTA", - "url": "https://data.microbiomedata.org/data/nmdc:mga0cvxk30/annotation/nmdc_mga0cvxk30_proteins.faa", - "md5_checksum": "e6270776fe3cb9f4e8e2958f9d8d6151", - "id": "nmdc:e6270776fe3cb9f4e8e2958f9d8d6151", - "file_size_bytes": 26699570 - }, - { - "name": "Gp0127642_Structural annotation GFF file", - "description": "Structural annotation GFF file for Gp0127642", - "data_object_type": "Structural Annotation GFF", - "url": "https://data.microbiomedata.org/data/nmdc:mga0cvxk30/annotation/nmdc_mga0cvxk30_structural_annotation.gff", - "md5_checksum": "f442172aba544a550f1e294bc615fd1d", - "id": "nmdc:f442172aba544a550f1e294bc615fd1d", - "file_size_bytes": 2505 - }, - { - "name": "Gp0127642_Functional annotation GFF file", - "description": "Functional annotation GFF file for Gp0127642", - "data_object_type": "Functional Annotation GFF", - "url": "https://data.microbiomedata.org/data/nmdc:mga0cvxk30/annotation/nmdc_mga0cvxk30_functional_annotation.gff", - "md5_checksum": "c0f7ac45facbbb7b74bb7ce11af11910", - "id": "nmdc:c0f7ac45facbbb7b74bb7ce11af11910", - "file_size_bytes": 32011364 - }, - { - "name": "Gp0127642_KO TSV file", - "description": "KO TSV file for Gp0127642", - "data_object_type": "Annotation KEGG Orthology", - "url": "https://data.microbiomedata.org/data/nmdc:mga0cvxk30/annotation/nmdc_mga0cvxk30_ko.tsv", - "md5_checksum": "63db41425c31ceda578a9e2a801dcb98", - "id": "nmdc:63db41425c31ceda578a9e2a801dcb98", - "file_size_bytes": 3660508 - }, - { - "name": "Gp0127642_EC TSV file", - "description": "EC TSV file for Gp0127642", - "data_object_type": "Annotation Enzyme Commission", - "url": "https://data.microbiomedata.org/data/nmdc:mga0cvxk30/annotation/nmdc_mga0cvxk30_ec.tsv", - "md5_checksum": "1cf9336281454b1747a86f9877f47ce8", - "id": "nmdc:1cf9336281454b1747a86f9877f47ce8", - "file_size_bytes": 2451794 - }, - { - "name": "Gp0127642_COG GFF file", - "description": "COG GFF file for Gp0127642", - "url": "https://data.microbiomedata.org/data/nmdc:mga0cvxk30/annotation/nmdc_mga0cvxk30_cog.gff", - "md5_checksum": "1cb7ab56a921ed80d21dad5b2d41c139", - "id": "nmdc:1cb7ab56a921ed80d21dad5b2d41c139", - "file_size_bytes": 18356139 - }, - { - "name": "Gp0127642_PFAM GFF file", - "description": "PFAM GFF file for Gp0127642", - "url": "https://data.microbiomedata.org/data/nmdc:mga0cvxk30/annotation/nmdc_mga0cvxk30_pfam.gff", - "md5_checksum": "157326e95b92fa83ab5755c22acf5837", - "id": "nmdc:157326e95b92fa83ab5755c22acf5837", - "file_size_bytes": 13044512 - }, - { - "name": "Gp0127642_TigrFam GFF file", - "description": "TigrFam GFF file for Gp0127642", - "url": "https://data.microbiomedata.org/data/nmdc:mga0cvxk30/annotation/nmdc_mga0cvxk30_tigrfam.gff", - "md5_checksum": "f001a06864e30347885e5a76ae89ae92", - "id": "nmdc:f001a06864e30347885e5a76ae89ae92", - "file_size_bytes": 1280537 - }, - { - "name": "Gp0127642_SMART GFF file", - "description": "SMART GFF file for Gp0127642", - "url": "https://data.microbiomedata.org/data/nmdc:mga0cvxk30/annotation/nmdc_mga0cvxk30_smart.gff", - "md5_checksum": "aa1e3207b62ca31a87da28ad4c3e6e92", - "id": "nmdc:aa1e3207b62ca31a87da28ad4c3e6e92", - "file_size_bytes": 4029242 - }, - { - "name": "Gp0127642_SuperFam GFF file", - "description": "SuperFam GFF file for Gp0127642", - "url": "https://data.microbiomedata.org/data/nmdc:mga0cvxk30/annotation/nmdc_mga0cvxk30_supfam.gff", - "md5_checksum": "5119eebdfebd43b4af243a61cc8e45eb", - "id": "nmdc:5119eebdfebd43b4af243a61cc8e45eb", - "file_size_bytes": 23011352 - }, - { - "name": "Gp0127642_Cath FunFam GFF file", - "description": "Cath FunFam GFF file for Gp0127642", - "url": "https://data.microbiomedata.org/data/nmdc:mga0cvxk30/annotation/nmdc_mga0cvxk30_cath_funfam.gff", - "md5_checksum": "4e6178de376e5e228c8b5c17ce3d0621", - "id": "nmdc:4e6178de376e5e228c8b5c17ce3d0621", - "file_size_bytes": 17039992 - }, - { - "name": "Gp0127642_KO_EC GFF file", - "description": "KO_EC GFF file for Gp0127642", - "url": "https://data.microbiomedata.org/data/nmdc:mga0cvxk30/annotation/nmdc_mga0cvxk30_ko_ec.gff", - "md5_checksum": "d89f026da3dfb4ee7d4884a47ce5739d", - "id": "nmdc:d89f026da3dfb4ee7d4884a47ce5739d", - "file_size_bytes": 11677748 - } - ] - }, + "ended_at_time": "2021-10-11T04:05:12+00:00" + } + ], + "metagenome_assembly_set": [ { "_id": { - "$oid": "649b0052ec087f6bbab3471c" + "$oid": "649b005f2ca5ee4adb139f9d" }, "has_input": [ - "nmdc:9c2c077dd8f43350b83c1c1ba853bbbc", - "nmdc:a5b5801b13f062bc09a1405d0a01e6ac", - "nmdc:c0f7ac45facbbb7b74bb7ce11af11910" + "nmdc:208a3777ef0b99408f0d5832dee576e0" ], - "too_short_contig_num": 101249, "part_of": [ - "nmdc:mga0cvxk30" + "nmdc:mga0dm4q17" ], - "binned_contig_num": 213, + "ctg_logsum": 20856, + "scaf_logsum": 20954, + "gap_pct": 0.00116, "git_url": "https://github.com/microbiomedata/metaG/releases/tag/0.1", "has_output": [ - "nmdc:ac59797a394f8e4aa971e5c1d016e23e", - "nmdc:46858bd4b45bdaa4e4344820f3c54b3b" + "nmdc:c2301a45b987661e5e6f32eaf6928003", + "nmdc:6233a266773aa722d6a3c2556b0c1cb5", + "nmdc:c5460716df8c1d47e081837c8cc5d281", + "nmdc:9437132a95f356e7cc6513f862f38f81", + "nmdc:0d0ee85be3a079b0eba5bb872c842f7d" ], - "was_informed_by": "gold:Gp0127642", - "input_contig_num": 103206, - "id": "nmdc:2de70a234a7c626b9f512b8aa3b73717", + "asm_score": 17.863, + "was_informed_by": "gold:Gp0127646", + "ctg_powsum": 2534.931, + "scaf_max": 88400, + "id": "nmdc:1a0a2a1b5db9455e9dbbacf102059180", + "scaf_powsum": 2545.156, "execution_resource": "NERSC-Cori", - "name": "MAGs Analysis Activity for nmdc:mga0cvxk30", - "mags_list": [ - { - "number_of_contig": 213, - "completeness": 71.17, - "bin_name": "bins.1", - "gene_count": 1914, - "bin_quality": "MQ", - "gtdbtk_species": "", - "gtdbtk_order": "Nitrososphaerales", - "num_16s": 0, - "gtdbtk_family": "Nitrososphaeraceae", - "gtdbtk_domain": "Archaea", - "contamination": 0.97, - "gtdbtk_class": "Nitrososphaeria", - "gtdbtk_phylum": "Crenarchaeota", - "num_5s": 1, - "num_23s": 0, - "gtdbtk_genus": "", - "num_t_rna": 31 - } + "contigs": 81653, + "name": "Assembly Activity for nmdc:mga0dm4q17", + "ctg_max": 88400, + "gc_std": 0.13273, + "contig_bp": 30097213, + "gc_avg": 0.55961, + "started_at_time": "2021-10-11T02:23:42Z", + "scaf_bp": 30097563, + "type": "nmdc:MetagenomeAssembly", + "scaffolds": 81627, + "ended_at_time": "2021-10-11T04:05:12+00:00", + "ctg_l50": 332, + "ctg_l90": 282, + "ctg_n50": 30532, + "ctg_n90": 71638, + "scaf_l50": 332, + "scaf_l90": 282, + "scaf_n50": 30518, + "scaf_n90": 71614, + "scaf_l_gt50k": 150260, + "scaf_n_gt50k": 2, + "scaf_pct_gt50k": 0.49924305 + } + ], + "metagenome_sequencing_activity_set": [], + "metaproteomics_analysis_activity_set": [], + "metatranscriptome_activity_set": [], + "nom_analysis_activity_set": [], + "omics_processing_set": [ + { + "_id": { + "$oid": "649b009773e8249959349b5a" + }, + "id": "nmdc:omprc-11-vs67yj43", + "name": "Riverbed sediment microbial communities from areas with no vegetation in Columbia River, Washington, USA - GW-RW N3_0_10", + "description": "Riverbed sediment samples were collected from an area with no vegetation in a hyporheic zone (subsurface groundwater - surface water mixing zone)", + "has_input": [ + "nmdc:bsm-11-0n5nks24" ], - "unbinned_contig_num": 1744, - "started_at_time": "2021-12-01T21:30:33Z", - "type": "nmdc:MAGsAnalysisActivity", - "ended_at_time": "2021-12-02T20:50:24+00:00", - "output_data_objects": [ - { - "name": "Gp0127642_metabat2 bin checkm quality assessment result", - "description": "metabat2 bin checkm quality assessment result for Gp0127642", - "data_object_type": "CheckM Statistics", - "url": "https://data.microbiomedata.org/data/nmdc:mga0cvxk30/MAGs/nmdc_mga0cvxk30_checkm_qa.out", - "md5_checksum": "ac59797a394f8e4aa971e5c1d016e23e", - "id": "nmdc:ac59797a394f8e4aa971e5c1d016e23e", - "file_size_bytes": 765 - }, - { - "name": "Gp0127642_high-quality and medium-quality bins", - "description": "high-quality and medium-quality bins for Gp0127642", - "data_object_type": "Metagenome Bins", - "url": "https://data.microbiomedata.org/data/nmdc:mga0cvxk30/MAGs/nmdc_mga0cvxk30_hqmq_bin.zip", - "md5_checksum": "46858bd4b45bdaa4e4344820f3c54b3b", - "id": "nmdc:46858bd4b45bdaa4e4344820f3c54b3b", - "file_size_bytes": 472684 - } + "has_output": [ + "jgi:574fde867ded5e3df1ee1420" + ], + "part_of": [ + "nmdc:sty-11-aygzgv51" + ], + "add_date": "2016-01-11", + "mod_date": "2021-06-15", + "ncbi_project_name": "Riverbed sediment microbial communities from areas with no vegetation in Columbia River, Washington, USA - GW-RW N3_0_10", + "omics_type": { + "has_raw_value": "Metagenome" + }, + "principal_investigator": { + "has_raw_value": "James Stegen" + }, + "processing_institution": "JGI", + "type": "nmdc:OmicsProcessing", + "gold_sequencing_project_identifiers": [ + "gold:Gp0127646" ] } - ] - }, - { - "_id": { - "$oid": "649b009773e8249959349b5a" - }, - "id": "nmdc:omprc-11-vs67yj43", - "name": "Riverbed sediment microbial communities from areas with no vegetation in Columbia River, Washington, USA - GW-RW N3_0_10", - "description": "Riverbed sediment samples were collected from an area with no vegetation in a hyporheic zone (subsurface groundwater - surface water mixing zone)", - "has_input": [ - "nmdc:bsm-11-0n5nks24" - ], - "has_output": [ - "jgi:574fde867ded5e3df1ee1420" - ], - "part_of": [ - "nmdc:sty-11-aygzgv51" - ], - "add_date": "2016-01-11", - "mod_date": "2021-06-15", - "ncbi_project_name": "Riverbed sediment microbial communities from areas with no vegetation in Columbia River, Washington, USA - GW-RW N3_0_10", - "omics_type": { - "has_raw_value": "Metagenome" - }, - "principal_investigator": { - "has_raw_value": "James Stegen" - }, - "processing_institution": "JGI", - "type": "nmdc:OmicsProcessing", - "gold_sequencing_project_identifiers": [ - "gold:Gp0127646" - ], - "downstream_workflow_activity_records": [ + ], + "reaction_activity_set": [], + "read_qc_analysis_activity_set": [ { "_id": { "$oid": "649b009d6bdd4fd20273c86f" @@ -26353,28 +27945,10 @@ "output_read_count": 25862834, "started_at_time": "2021-10-11T02:23:42Z", "type": "nmdc:ReadQCAnalysisActivity", - "ended_at_time": "2021-10-11T04:05:12+00:00", - "output_data_objects": [ - { - "name": "Gp0127646_Filtered Reads", - "description": "Filtered Reads for Gp0127646", - "data_object_type": "Filtered Sequencing Reads", - "url": "https://data.microbiomedata.org/data/nmdc:mga0dm4q17/qa/nmdc_mga0dm4q17_filtered.fastq.gz", - "md5_checksum": "208a3777ef0b99408f0d5832dee576e0", - "id": "nmdc:208a3777ef0b99408f0d5832dee576e0", - "file_size_bytes": 2209739723 - }, - { - "name": "Gp0127646_Filtered Stats", - "description": "Filtered Stats for Gp0127646", - "data_object_type": "QC Statistics", - "url": "https://data.microbiomedata.org/data/nmdc:mga0dm4q17/qa/nmdc_mga0dm4q17_filterStats.txt", - "md5_checksum": "8533a56006bdc1841b6fc16e99b6a84a", - "id": "nmdc:8533a56006bdc1841b6fc16e99b6a84a", - "file_size_bytes": 291 - } - ] - }, + "ended_at_time": "2021-10-11T04:05:12+00:00" + } + ], + "read_based_taxonomy_analysis_activity_set": [ { "_id": { "$oid": "649b009bff710ae353f8cf33" @@ -26400,89 +27974,16 @@ "name": "ReadBased Analysis Activity for nmdc:mga0dm4q17", "started_at_time": "2021-10-11T02:23:42Z", "type": "nmdc:ReadbasedAnalysis", - "ended_at_time": "2021-10-11T04:05:12+00:00", - "output_data_objects": [ - { - "name": "Gp0127646_Gottcha2 TSV report", - "description": "Gottcha2 TSV report for Gp0127646", - "url": "https://data.microbiomedata.org/data/nmdc:mga0dm4q17/ReadbasedAnalysis/nmdc_mga0dm4q17_gottcha2_report.tsv", - "md5_checksum": "3e0598df41941463bac0fdec5df29f55", - "id": "nmdc:3e0598df41941463bac0fdec5df29f55", - "file_size_bytes": 4650 - }, - { - "name": "Gp0127646_Gottcha2 full TSV report", - "description": "Gottcha2 full TSV report for Gp0127646", - "url": "https://data.microbiomedata.org/data/nmdc:mga0dm4q17/ReadbasedAnalysis/nmdc_mga0dm4q17_gottcha2_report_full.tsv", - "md5_checksum": "1a625b148d8f6d9fe9aeab6cfb67df6c", - "id": "nmdc:1a625b148d8f6d9fe9aeab6cfb67df6c", - "file_size_bytes": 877659 - }, - { - "name": "Gp0127646_Gottcha2 Krona HTML report", - "description": "Gottcha2 Krona HTML report for Gp0127646", - "data_object_type": "GOTTCHA2 Krona Plot", - "url": "https://data.microbiomedata.org/data/nmdc:mga0dm4q17/ReadbasedAnalysis/nmdc_mga0dm4q17_gottcha2_krona.html", - "md5_checksum": "bc8e157195d042d7207d67b4982fea96", - "id": "nmdc:bc8e157195d042d7207d67b4982fea96", - "file_size_bytes": 236676 - }, - { - "name": "Gp0127646_Centrifuge classification TSV report", - "description": "Centrifuge classification TSV report for Gp0127646", - "data_object_type": "Centrifuge Taxonomic Classification", - "url": "https://data.microbiomedata.org/data/nmdc:mga0dm4q17/ReadbasedAnalysis/nmdc_mga0dm4q17_centrifuge_classification.tsv", - "md5_checksum": "a8fc683bb9b3aba316cb605c5fb591ec", - "id": "nmdc:a8fc683bb9b3aba316cb605c5fb591ec", - "file_size_bytes": 1901493736 - }, - { - "name": "Gp0127646_Centrifuge TSV report", - "description": "Centrifuge TSV report for Gp0127646", - "data_object_type": "Centrifuge Classification Report", - "url": "https://data.microbiomedata.org/data/nmdc:mga0dm4q17/ReadbasedAnalysis/nmdc_mga0dm4q17_centrifuge_report.tsv", - "md5_checksum": "b5fe0189dbf00662d78cc55b8b0cc803", - "id": "nmdc:b5fe0189dbf00662d78cc55b8b0cc803", - "file_size_bytes": 256274 - }, - { - "name": "Gp0127646_Centrifuge Krona HTML report", - "description": "Centrifuge Krona HTML report for Gp0127646", - "data_object_type": "Centrifuge Krona Plot", - "url": "https://data.microbiomedata.org/data/nmdc:mga0dm4q17/ReadbasedAnalysis/nmdc_mga0dm4q17_centrifuge_krona.html", - "md5_checksum": "cd10cca62774e66f60d60380ee18132e", - "id": "nmdc:cd10cca62774e66f60d60380ee18132e", - "file_size_bytes": 2333722 - }, - { - "name": "Gp0127646_Kraken classification TSV report", - "description": "Kraken classification TSV report for Gp0127646", - "data_object_type": "Kraken2 Taxonomic Classification", - "url": "https://data.microbiomedata.org/data/nmdc:mga0dm4q17/ReadbasedAnalysis/nmdc_mga0dm4q17_kraken2_classification.tsv", - "md5_checksum": "b13ee2ee52d15c3669aecd2e913f2658", - "id": "nmdc:b13ee2ee52d15c3669aecd2e913f2658", - "file_size_bytes": 1534616616 - }, - { - "name": "Gp0127646_Kraken2 TSV report", - "description": "Kraken2 TSV report for Gp0127646", - "data_object_type": "Kraken2 Classification Report", - "url": "https://data.microbiomedata.org/data/nmdc:mga0dm4q17/ReadbasedAnalysis/nmdc_mga0dm4q17_kraken2_report.tsv", - "md5_checksum": "09a2d722810b3d90207bc4cfa626133b", - "id": "nmdc:09a2d722810b3d90207bc4cfa626133b", - "file_size_bytes": 663507 - }, - { - "name": "Gp0127646_Kraken2 Krona HTML report", - "description": "Kraken2 Krona HTML report for Gp0127646", - "data_object_type": "Kraken2 Krona Plot", - "url": "https://data.microbiomedata.org/data/nmdc:mga0dm4q17/ReadbasedAnalysis/nmdc_mga0dm4q17_kraken2_krona.html", - "md5_checksum": "c3a8d9f48266a43ad74fc581132e2bba", - "id": "nmdc:c3a8d9f48266a43ad74fc581132e2bba", - "file_size_bytes": 4031909 - } - ] - }, + "ended_at_time": "2021-10-11T04:05:12+00:00" + } + ], + "study_set": [], + "field_research_site_set": [], + "collecting_biosamples_from_site_set": [], + "date_created": null, + "etl_software_version": null, + "pooling_set": [], + "read_based_analysis_activity_set": [ { "_id": { "$oid": "61e7197d833bcf838a7009e9" @@ -26511,1117 +28012,1262 @@ "name": "ReadBased Analysis Activity for nmdc:mga0dm4q17", "started_at_time": "2021-10-11T02:23:42Z", "type": "nmdc:ReadbasedAnalysis", - "ended_at_time": "2021-10-11T04:05:12+00:00", - "output_data_objects": [ - { - "name": "Gp0127646_Gottcha2 TSV report", - "description": "Gottcha2 TSV report for Gp0127646", - "url": "https://data.microbiomedata.org/data/nmdc:mga0dm4q17/ReadbasedAnalysis/nmdc_mga0dm4q17_gottcha2_report.tsv", - "md5_checksum": "3e0598df41941463bac0fdec5df29f55", - "id": "nmdc:3e0598df41941463bac0fdec5df29f55", - "file_size_bytes": 4650 - }, - { - "name": "Gp0127646_Gottcha2 full TSV report", - "description": "Gottcha2 full TSV report for Gp0127646", - "url": "https://data.microbiomedata.org/data/nmdc:mga0dm4q17/ReadbasedAnalysis/nmdc_mga0dm4q17_gottcha2_report_full.tsv", - "md5_checksum": "1a625b148d8f6d9fe9aeab6cfb67df6c", - "id": "nmdc:1a625b148d8f6d9fe9aeab6cfb67df6c", - "file_size_bytes": 877659 - }, - { - "name": "Gp0127646_Gottcha2 Krona HTML report", - "description": "Gottcha2 Krona HTML report for Gp0127646", - "data_object_type": "GOTTCHA2 Krona Plot", - "url": "https://data.microbiomedata.org/data/nmdc:mga0dm4q17/ReadbasedAnalysis/nmdc_mga0dm4q17_gottcha2_krona.html", - "md5_checksum": "bc8e157195d042d7207d67b4982fea96", - "id": "nmdc:bc8e157195d042d7207d67b4982fea96", - "file_size_bytes": 236676 - }, - { - "name": "Gp0127646_Centrifuge classification TSV report", - "description": "Centrifuge classification TSV report for Gp0127646", - "data_object_type": "Centrifuge Taxonomic Classification", - "url": "https://data.microbiomedata.org/data/nmdc:mga0dm4q17/ReadbasedAnalysis/nmdc_mga0dm4q17_centrifuge_classification.tsv", - "md5_checksum": "a8fc683bb9b3aba316cb605c5fb591ec", - "id": "nmdc:a8fc683bb9b3aba316cb605c5fb591ec", - "file_size_bytes": 1901493736 - }, - { - "name": "Gp0127646_Centrifuge TSV report", - "description": "Centrifuge TSV report for Gp0127646", - "data_object_type": "Centrifuge Classification Report", - "url": "https://data.microbiomedata.org/data/nmdc:mga0dm4q17/ReadbasedAnalysis/nmdc_mga0dm4q17_centrifuge_report.tsv", - "md5_checksum": "b5fe0189dbf00662d78cc55b8b0cc803", - "id": "nmdc:b5fe0189dbf00662d78cc55b8b0cc803", - "file_size_bytes": 256274 - }, - { - "name": "Gp0127646_Centrifuge Krona HTML report", - "description": "Centrifuge Krona HTML report for Gp0127646", - "data_object_type": "Centrifuge Krona Plot", - "url": "https://data.microbiomedata.org/data/nmdc:mga0dm4q17/ReadbasedAnalysis/nmdc_mga0dm4q17_centrifuge_krona.html", - "md5_checksum": "cd10cca62774e66f60d60380ee18132e", - "id": "nmdc:cd10cca62774e66f60d60380ee18132e", - "file_size_bytes": 2333722 - }, - { - "name": "Gp0127646_Kraken classification TSV report", - "description": "Kraken classification TSV report for Gp0127646", - "data_object_type": "Kraken2 Taxonomic Classification", - "url": "https://data.microbiomedata.org/data/nmdc:mga0dm4q17/ReadbasedAnalysis/nmdc_mga0dm4q17_kraken2_classification.tsv", - "md5_checksum": "b13ee2ee52d15c3669aecd2e913f2658", - "id": "nmdc:b13ee2ee52d15c3669aecd2e913f2658", - "file_size_bytes": 1534616616 - }, - { - "name": "Gp0127646_Kraken2 TSV report", - "description": "Kraken2 TSV report for Gp0127646", - "data_object_type": "Kraken2 Classification Report", - "url": "https://data.microbiomedata.org/data/nmdc:mga0dm4q17/ReadbasedAnalysis/nmdc_mga0dm4q17_kraken2_report.tsv", - "md5_checksum": "09a2d722810b3d90207bc4cfa626133b", - "id": "nmdc:09a2d722810b3d90207bc4cfa626133b", - "file_size_bytes": 663507 - }, - { - "name": "Gp0127646_Kraken2 Krona HTML report", - "description": "Kraken2 Krona HTML report for Gp0127646", - "data_object_type": "Kraken2 Krona Plot", - "url": "https://data.microbiomedata.org/data/nmdc:mga0dm4q17/ReadbasedAnalysis/nmdc_mga0dm4q17_kraken2_krona.html", - "md5_checksum": "c3a8d9f48266a43ad74fc581132e2bba", - "id": "nmdc:c3a8d9f48266a43ad74fc581132e2bba", - "file_size_bytes": 4031909 - } - ] + "ended_at_time": "2021-10-11T04:05:12+00:00" + } + ] + }, + { + "functional_annotation_agg": [], + "library_preparation_set": [], + "processed_sample_set": [], + "extraction_set": [], + "activity_set": [], + "biosample_set": [], + "data_object_set": [ + { + "name": "Gp0127648_Filtered Reads", + "description": "Filtered Reads for Gp0127648", + "data_object_type": "Filtered Sequencing Reads", + "url": "https://data.microbiomedata.org/data/nmdc:mga0andh11/qa/nmdc_mga0andh11_filtered.fastq.gz", + "md5_checksum": "fcc3a92dd2b6ab6045f4be27da6f2cdd", + "id": "nmdc:fcc3a92dd2b6ab6045f4be27da6f2cdd", + "file_size_bytes": 2191252492 }, { - "_id": { - "$oid": "649b005f2ca5ee4adb139f9d" - }, - "has_input": [ - "nmdc:208a3777ef0b99408f0d5832dee576e0" - ], - "part_of": [ - "nmdc:mga0dm4q17" - ], - "ctg_logsum": 20856, - "scaf_logsum": 20954, - "gap_pct": 0.00116, - "git_url": "https://github.com/microbiomedata/metaG/releases/tag/0.1", - "has_output": [ - "nmdc:c2301a45b987661e5e6f32eaf6928003", - "nmdc:6233a266773aa722d6a3c2556b0c1cb5", - "nmdc:c5460716df8c1d47e081837c8cc5d281", - "nmdc:9437132a95f356e7cc6513f862f38f81", - "nmdc:0d0ee85be3a079b0eba5bb872c842f7d" - ], - "asm_score": 17.863, - "was_informed_by": "gold:Gp0127646", - "ctg_powsum": 2534.931, - "scaf_max": 88400, - "id": "nmdc:1a0a2a1b5db9455e9dbbacf102059180", - "scaf_powsum": 2545.156, - "execution_resource": "NERSC-Cori", - "contigs": 81653, - "name": "Assembly Activity for nmdc:mga0dm4q17", - "ctg_max": 88400, - "gc_std": 0.13273, - "contig_bp": 30097213, - "gc_avg": 0.55961, - "started_at_time": "2021-10-11T02:23:42Z", - "scaf_bp": 30097563, - "type": "nmdc:MetagenomeAssembly", - "scaffolds": 81627, - "ended_at_time": "2021-10-11T04:05:12+00:00", - "ctg_l50": 332, - "ctg_l90": 282, - "ctg_n50": 30532, - "ctg_n90": 71638, - "scaf_l50": 332, - "scaf_l90": 282, - "scaf_n50": 30518, - "scaf_n90": 71614, - "scaf_l_gt50k": 150260, - "scaf_n_gt50k": 2, - "scaf_pct_gt50k": 0.49924305, - "output_data_objects": [ - { - "name": "Gp0127646_Assembled contigs fasta", - "description": "Assembled contigs fasta for Gp0127646", - "data_object_type": "Assembly Contigs", - "url": "https://data.microbiomedata.org/data/nmdc:mga0dm4q17/assembly/nmdc_mga0dm4q17_contigs.fna", - "md5_checksum": "c2301a45b987661e5e6f32eaf6928003", - "id": "nmdc:c2301a45b987661e5e6f32eaf6928003", - "file_size_bytes": 33070670 - }, - { - "name": "Gp0127646_Assembled scaffold fasta", - "description": "Assembled scaffold fasta for Gp0127646", - "data_object_type": "Assembly Scaffolds", - "url": "https://data.microbiomedata.org/data/nmdc:mga0dm4q17/assembly/nmdc_mga0dm4q17_scaffolds.fna", - "md5_checksum": "6233a266773aa722d6a3c2556b0c1cb5", - "id": "nmdc:6233a266773aa722d6a3c2556b0c1cb5", - "file_size_bytes": 32825592 - }, - { - "name": "Gp0127646_Metagenome Contig Coverage Stats", - "description": "Metagenome Contig Coverage Stats for Gp0127646", - "url": "https://data.microbiomedata.org/data/nmdc:mga0dm4q17/assembly/nmdc_mga0dm4q17_covstats.txt", - "md5_checksum": "c5460716df8c1d47e081837c8cc5d281", - "id": "nmdc:c5460716df8c1d47e081837c8cc5d281", - "file_size_bytes": 6393678 - }, - { - "name": "Gp0127646_Assembled AGP file", - "description": "Assembled AGP file for Gp0127646", - "data_object_type": "Assembly AGP", - "url": "https://data.microbiomedata.org/data/nmdc:mga0dm4q17/assembly/nmdc_mga0dm4q17_assembly.agp", - "md5_checksum": "9437132a95f356e7cc6513f862f38f81", - "id": "nmdc:9437132a95f356e7cc6513f862f38f81", - "file_size_bytes": 5942403 - }, - { - "name": "Gp0127646_Metagenome Alignment BAM file", - "description": "Metagenome Alignment BAM file for Gp0127646", - "data_object_type": "Assembly Coverage BAM", - "url": "https://data.microbiomedata.org/data/nmdc:mga0dm4q17/assembly/nmdc_mga0dm4q17_pairedMapped_sorted.bam", - "md5_checksum": "0d0ee85be3a079b0eba5bb872c842f7d", - "id": "nmdc:0d0ee85be3a079b0eba5bb872c842f7d", - "file_size_bytes": 2346665933 - } - ] + "name": "Gp0127648_Filtered Stats", + "description": "Filtered Stats for Gp0127648", + "data_object_type": "QC Statistics", + "url": "https://data.microbiomedata.org/data/nmdc:mga0andh11/qa/nmdc_mga0andh11_filterStats.txt", + "md5_checksum": "2208c88cac6b941799d4492dbf5f0887", + "id": "nmdc:2208c88cac6b941799d4492dbf5f0887", + "file_size_bytes": 289 }, { - "_id": { - "$oid": "649b005bbf2caae0415ef9b2" - }, - "has_input": [ - "nmdc:c2301a45b987661e5e6f32eaf6928003" - ], - "part_of": [ - "nmdc:mga0dm4q17" - ], - "git_url": "https://github.com/microbiomedata/metaG/releases/tag/0.1", - "has_output": [ - "nmdc:b907352a805a209c5b7e10f6ce9e3ceb", - "nmdc:769c049c4b3301900de0c62666e8c297", - "nmdc:3dcb9f83f3921fc7f3e7a2050584cc77", - "nmdc:1b81cc955690e81f18c2bc1533e7ee89", - "nmdc:dd94ee1dbd107bf14e8be72b8f546290", - "nmdc:e271f0ef1c44b514304c35a7913751e3", - "nmdc:b3d3f1ef308b7555cbea077cc00dbc95", - "nmdc:d18d6a67ad7e17514b0c4b502ea69ac0", - "nmdc:62920faf364dea6a1d028878d49a2989", - "nmdc:757bd3295026410cb03690d4dae95935", - "nmdc:19eef79eefc81cbe6d7d4586d8be5d23", - "nmdc:fc8598d9d6926e6ac8bb9c488016734a" - ], - "was_informed_by": "gold:Gp0127646", - "id": "nmdc:1a0a2a1b5db9455e9dbbacf102059180", - "execution_resource": "NERSC-Cori", - "name": "Annotation Activity for nmdc:mga0dm4q17", - "started_at_time": "2021-10-11T02:23:42Z", - "type": "nmdc:MetagenomeAnnotationActivity", - "ended_at_time": "2021-10-11T04:05:12+00:00", - "output_data_objects": [ - { - "name": "Gp0127646_Protein FAA", - "description": "Protein FAA for Gp0127646", - "data_object_type": "Annotation Amino Acid FASTA", - "url": "https://data.microbiomedata.org/data/nmdc:mga0dm4q17/annotation/nmdc_mga0dm4q17_proteins.faa", - "md5_checksum": "b907352a805a209c5b7e10f6ce9e3ceb", - "id": "nmdc:b907352a805a209c5b7e10f6ce9e3ceb", - "file_size_bytes": 18886480 - }, - { - "name": "Gp0127646_Structural annotation GFF file", - "description": "Structural annotation GFF file for Gp0127646", - "data_object_type": "Structural Annotation GFF", - "url": "https://data.microbiomedata.org/data/nmdc:mga0dm4q17/annotation/nmdc_mga0dm4q17_structural_annotation.gff", - "md5_checksum": "769c049c4b3301900de0c62666e8c297", - "id": "nmdc:769c049c4b3301900de0c62666e8c297", - "file_size_bytes": 2883 - }, - { - "name": "Gp0127646_Functional annotation GFF file", - "description": "Functional annotation GFF file for Gp0127646", - "data_object_type": "Functional Annotation GFF", - "url": "https://data.microbiomedata.org/data/nmdc:mga0dm4q17/annotation/nmdc_mga0dm4q17_functional_annotation.gff", - "md5_checksum": "3dcb9f83f3921fc7f3e7a2050584cc77", - "id": "nmdc:3dcb9f83f3921fc7f3e7a2050584cc77", - "file_size_bytes": 23048582 - }, - { - "name": "Gp0127646_KO TSV file", - "description": "KO TSV file for Gp0127646", - "data_object_type": "Annotation KEGG Orthology", - "url": "https://data.microbiomedata.org/data/nmdc:mga0dm4q17/annotation/nmdc_mga0dm4q17_ko.tsv", - "md5_checksum": "1b81cc955690e81f18c2bc1533e7ee89", - "id": "nmdc:1b81cc955690e81f18c2bc1533e7ee89", - "file_size_bytes": 2643070 - }, - { - "name": "Gp0127646_EC TSV file", - "description": "EC TSV file for Gp0127646", - "data_object_type": "Annotation Enzyme Commission", - "url": "https://data.microbiomedata.org/data/nmdc:mga0dm4q17/annotation/nmdc_mga0dm4q17_ec.tsv", - "md5_checksum": "dd94ee1dbd107bf14e8be72b8f546290", - "id": "nmdc:dd94ee1dbd107bf14e8be72b8f546290", - "file_size_bytes": 1742846 - }, - { - "name": "Gp0127646_COG GFF file", - "description": "COG GFF file for Gp0127646", - "url": "https://data.microbiomedata.org/data/nmdc:mga0dm4q17/annotation/nmdc_mga0dm4q17_cog.gff", - "md5_checksum": "e271f0ef1c44b514304c35a7913751e3", - "id": "nmdc:e271f0ef1c44b514304c35a7913751e3", - "file_size_bytes": 12090733 - }, - { - "name": "Gp0127646_PFAM GFF file", - "description": "PFAM GFF file for Gp0127646", - "url": "https://data.microbiomedata.org/data/nmdc:mga0dm4q17/annotation/nmdc_mga0dm4q17_pfam.gff", - "md5_checksum": "b3d3f1ef308b7555cbea077cc00dbc95", - "id": "nmdc:b3d3f1ef308b7555cbea077cc00dbc95", - "file_size_bytes": 8631888 - }, - { - "name": "Gp0127646_TigrFam GFF file", - "description": "TigrFam GFF file for Gp0127646", - "url": "https://data.microbiomedata.org/data/nmdc:mga0dm4q17/annotation/nmdc_mga0dm4q17_tigrfam.gff", - "md5_checksum": "d18d6a67ad7e17514b0c4b502ea69ac0", - "id": "nmdc:d18d6a67ad7e17514b0c4b502ea69ac0", - "file_size_bytes": 840759 - }, - { - "name": "Gp0127646_SMART GFF file", - "description": "SMART GFF file for Gp0127646", - "url": "https://data.microbiomedata.org/data/nmdc:mga0dm4q17/annotation/nmdc_mga0dm4q17_smart.gff", - "md5_checksum": "62920faf364dea6a1d028878d49a2989", - "id": "nmdc:62920faf364dea6a1d028878d49a2989", - "file_size_bytes": 2684392 - }, - { - "name": "Gp0127646_SuperFam GFF file", - "description": "SuperFam GFF file for Gp0127646", - "url": "https://data.microbiomedata.org/data/nmdc:mga0dm4q17/annotation/nmdc_mga0dm4q17_supfam.gff", - "md5_checksum": "757bd3295026410cb03690d4dae95935", - "id": "nmdc:757bd3295026410cb03690d4dae95935", - "file_size_bytes": 15569120 - }, - { - "name": "Gp0127646_Cath FunFam GFF file", - "description": "Cath FunFam GFF file for Gp0127646", - "url": "https://data.microbiomedata.org/data/nmdc:mga0dm4q17/annotation/nmdc_mga0dm4q17_cath_funfam.gff", - "md5_checksum": "19eef79eefc81cbe6d7d4586d8be5d23", - "id": "nmdc:19eef79eefc81cbe6d7d4586d8be5d23", - "file_size_bytes": 11479737 - }, - { - "name": "Gp0127646_KO_EC GFF file", - "description": "KO_EC GFF file for Gp0127646", - "url": "https://data.microbiomedata.org/data/nmdc:mga0dm4q17/annotation/nmdc_mga0dm4q17_ko_ec.gff", - "md5_checksum": "fc8598d9d6926e6ac8bb9c488016734a", - "id": "nmdc:fc8598d9d6926e6ac8bb9c488016734a", - "file_size_bytes": 8425263 - } - ] + "name": "Gp0127648_Gottcha2 TSV report", + "description": "Gottcha2 TSV report for Gp0127648", + "url": "https://data.microbiomedata.org/data/nmdc:mga0andh11/ReadbasedAnalysis/nmdc_mga0andh11_gottcha2_report.tsv", + "md5_checksum": "5e64b9ccf92f0c974c51bd8393dea50c", + "id": "nmdc:5e64b9ccf92f0c974c51bd8393dea50c", + "file_size_bytes": 3323 + }, + { + "name": "Gp0127648_Gottcha2 full TSV report", + "description": "Gottcha2 full TSV report for Gp0127648", + "url": "https://data.microbiomedata.org/data/nmdc:mga0andh11/ReadbasedAnalysis/nmdc_mga0andh11_gottcha2_report_full.tsv", + "md5_checksum": "1357df297d8d8a872b335e0c3222d102", + "id": "nmdc:1357df297d8d8a872b335e0c3222d102", + "file_size_bytes": 782039 + }, + { + "name": "Gp0127648_Gottcha2 Krona HTML report", + "description": "Gottcha2 Krona HTML report for Gp0127648", + "data_object_type": "GOTTCHA2 Krona Plot", + "url": "https://data.microbiomedata.org/data/nmdc:mga0andh11/ReadbasedAnalysis/nmdc_mga0andh11_gottcha2_krona.html", + "md5_checksum": "5b510e336e60b6120b43e9b6420a074e", + "id": "nmdc:5b510e336e60b6120b43e9b6420a074e", + "file_size_bytes": 236971 + }, + { + "name": "Gp0127648_Centrifuge classification TSV report", + "description": "Centrifuge classification TSV report for Gp0127648", + "data_object_type": "Centrifuge Taxonomic Classification", + "url": "https://data.microbiomedata.org/data/nmdc:mga0andh11/ReadbasedAnalysis/nmdc_mga0andh11_centrifuge_classification.tsv", + "md5_checksum": "33bf814280051c220e0c4a06f7935728", + "id": "nmdc:33bf814280051c220e0c4a06f7935728", + "file_size_bytes": 1945479328 + }, + { + "name": "Gp0127648_Centrifuge TSV report", + "description": "Centrifuge TSV report for Gp0127648", + "data_object_type": "Centrifuge Classification Report", + "url": "https://data.microbiomedata.org/data/nmdc:mga0andh11/ReadbasedAnalysis/nmdc_mga0andh11_centrifuge_report.tsv", + "md5_checksum": "e77a1d052b0d2a99e0a1df3b3c038f7c", + "id": "nmdc:e77a1d052b0d2a99e0a1df3b3c038f7c", + "file_size_bytes": 255338 + }, + { + "name": "Gp0127648_Centrifuge Krona HTML report", + "description": "Centrifuge Krona HTML report for Gp0127648", + "data_object_type": "Centrifuge Krona Plot", + "url": "https://data.microbiomedata.org/data/nmdc:mga0andh11/ReadbasedAnalysis/nmdc_mga0andh11_centrifuge_krona.html", + "md5_checksum": "0efb0ad19234056d7e2e3726dead3622", + "id": "nmdc:0efb0ad19234056d7e2e3726dead3622", + "file_size_bytes": 2333371 + }, + { + "name": "Gp0127648_Kraken classification TSV report", + "description": "Kraken classification TSV report for Gp0127648", + "data_object_type": "Kraken2 Taxonomic Classification", + "url": "https://data.microbiomedata.org/data/nmdc:mga0andh11/ReadbasedAnalysis/nmdc_mga0andh11_kraken2_classification.tsv", + "md5_checksum": "222bac312efdd6c86d2475ad224b7907", + "id": "nmdc:222bac312efdd6c86d2475ad224b7907", + "file_size_bytes": 1562011343 + }, + { + "name": "Gp0127648_Kraken2 TSV report", + "description": "Kraken2 TSV report for Gp0127648", + "data_object_type": "Kraken2 Classification Report", + "url": "https://data.microbiomedata.org/data/nmdc:mga0andh11/ReadbasedAnalysis/nmdc_mga0andh11_kraken2_report.tsv", + "md5_checksum": "baaca868b1fed932b463e489708dd741", + "id": "nmdc:baaca868b1fed932b463e489708dd741", + "file_size_bytes": 647859 + }, + { + "name": "Gp0127648_Kraken2 Krona HTML report", + "description": "Kraken2 Krona HTML report for Gp0127648", + "data_object_type": "Kraken2 Krona Plot", + "url": "https://data.microbiomedata.org/data/nmdc:mga0andh11/ReadbasedAnalysis/nmdc_mga0andh11_kraken2_krona.html", + "md5_checksum": "b549d169e5b0693152555373a6d8ee75", + "id": "nmdc:b549d169e5b0693152555373a6d8ee75", + "file_size_bytes": 3952548 + }, + { + "name": "Gp0127648_Gottcha2 TSV report", + "description": "Gottcha2 TSV report for Gp0127648", + "url": "https://data.microbiomedata.org/data/nmdc:mga0andh11/ReadbasedAnalysis/nmdc_mga0andh11_gottcha2_report.tsv", + "md5_checksum": "5e64b9ccf92f0c974c51bd8393dea50c", + "id": "nmdc:5e64b9ccf92f0c974c51bd8393dea50c", + "file_size_bytes": 3323 + }, + { + "name": "Gp0127648_Gottcha2 full TSV report", + "description": "Gottcha2 full TSV report for Gp0127648", + "url": "https://data.microbiomedata.org/data/nmdc:mga0andh11/ReadbasedAnalysis/nmdc_mga0andh11_gottcha2_report_full.tsv", + "md5_checksum": "1357df297d8d8a872b335e0c3222d102", + "id": "nmdc:1357df297d8d8a872b335e0c3222d102", + "file_size_bytes": 782039 + }, + { + "name": "Gp0127648_Gottcha2 Krona HTML report", + "description": "Gottcha2 Krona HTML report for Gp0127648", + "data_object_type": "GOTTCHA2 Krona Plot", + "url": "https://data.microbiomedata.org/data/nmdc:mga0andh11/ReadbasedAnalysis/nmdc_mga0andh11_gottcha2_krona.html", + "md5_checksum": "5b510e336e60b6120b43e9b6420a074e", + "id": "nmdc:5b510e336e60b6120b43e9b6420a074e", + "file_size_bytes": 236971 + }, + { + "name": "Gp0127648_Centrifuge classification TSV report", + "description": "Centrifuge classification TSV report for Gp0127648", + "data_object_type": "Centrifuge Taxonomic Classification", + "url": "https://data.microbiomedata.org/data/nmdc:mga0andh11/ReadbasedAnalysis/nmdc_mga0andh11_centrifuge_classification.tsv", + "md5_checksum": "33bf814280051c220e0c4a06f7935728", + "id": "nmdc:33bf814280051c220e0c4a06f7935728", + "file_size_bytes": 1945479328 + }, + { + "name": "Gp0127648_Centrifuge TSV report", + "description": "Centrifuge TSV report for Gp0127648", + "data_object_type": "Centrifuge Classification Report", + "url": "https://data.microbiomedata.org/data/nmdc:mga0andh11/ReadbasedAnalysis/nmdc_mga0andh11_centrifuge_report.tsv", + "md5_checksum": "e77a1d052b0d2a99e0a1df3b3c038f7c", + "id": "nmdc:e77a1d052b0d2a99e0a1df3b3c038f7c", + "file_size_bytes": 255338 + }, + { + "name": "Gp0127648_Centrifuge Krona HTML report", + "description": "Centrifuge Krona HTML report for Gp0127648", + "data_object_type": "Centrifuge Krona Plot", + "url": "https://data.microbiomedata.org/data/nmdc:mga0andh11/ReadbasedAnalysis/nmdc_mga0andh11_centrifuge_krona.html", + "md5_checksum": "0efb0ad19234056d7e2e3726dead3622", + "id": "nmdc:0efb0ad19234056d7e2e3726dead3622", + "file_size_bytes": 2333371 + }, + { + "name": "Gp0127648_Kraken classification TSV report", + "description": "Kraken classification TSV report for Gp0127648", + "data_object_type": "Kraken2 Taxonomic Classification", + "url": "https://data.microbiomedata.org/data/nmdc:mga0andh11/ReadbasedAnalysis/nmdc_mga0andh11_kraken2_classification.tsv", + "md5_checksum": "222bac312efdd6c86d2475ad224b7907", + "id": "nmdc:222bac312efdd6c86d2475ad224b7907", + "file_size_bytes": 1562011343 + }, + { + "name": "Gp0127648_Kraken2 TSV report", + "description": "Kraken2 TSV report for Gp0127648", + "data_object_type": "Kraken2 Classification Report", + "url": "https://data.microbiomedata.org/data/nmdc:mga0andh11/ReadbasedAnalysis/nmdc_mga0andh11_kraken2_report.tsv", + "md5_checksum": "baaca868b1fed932b463e489708dd741", + "id": "nmdc:baaca868b1fed932b463e489708dd741", + "file_size_bytes": 647859 + }, + { + "name": "Gp0127648_Kraken2 Krona HTML report", + "description": "Kraken2 Krona HTML report for Gp0127648", + "data_object_type": "Kraken2 Krona Plot", + "url": "https://data.microbiomedata.org/data/nmdc:mga0andh11/ReadbasedAnalysis/nmdc_mga0andh11_kraken2_krona.html", + "md5_checksum": "b549d169e5b0693152555373a6d8ee75", + "id": "nmdc:b549d169e5b0693152555373a6d8ee75", + "file_size_bytes": 3952548 + }, + { + "name": "Gp0127648_Assembled contigs fasta", + "description": "Assembled contigs fasta for Gp0127648", + "data_object_type": "Assembly Contigs", + "url": "https://data.microbiomedata.org/data/nmdc:mga0andh11/assembly/nmdc_mga0andh11_contigs.fna", + "md5_checksum": "ca10f7bae0565946414188c9121ee338", + "id": "nmdc:ca10f7bae0565946414188c9121ee338", + "file_size_bytes": 67439267 + }, + { + "name": "Gp0127648_Assembled scaffold fasta", + "description": "Assembled scaffold fasta for Gp0127648", + "data_object_type": "Assembly Scaffolds", + "url": "https://data.microbiomedata.org/data/nmdc:mga0andh11/assembly/nmdc_mga0andh11_scaffolds.fna", + "md5_checksum": "cf23062373806986b70244b1fabbd17b", + "id": "nmdc:cf23062373806986b70244b1fabbd17b", + "file_size_bytes": 66996134 + }, + { + "name": "Gp0127648_Metagenome Contig Coverage Stats", + "description": "Metagenome Contig Coverage Stats for Gp0127648", + "url": "https://data.microbiomedata.org/data/nmdc:mga0andh11/assembly/nmdc_mga0andh11_covstats.txt", + "md5_checksum": "99b2c3c91b299b9426cca9dfb10b0cea", + "id": "nmdc:99b2c3c91b299b9426cca9dfb10b0cea", + "file_size_bytes": 11610674 + }, + { + "name": "Gp0127648_Assembled AGP file", + "description": "Assembled AGP file for Gp0127648", + "data_object_type": "Assembly AGP", + "url": "https://data.microbiomedata.org/data/nmdc:mga0andh11/assembly/nmdc_mga0andh11_assembly.agp", + "md5_checksum": "303d7282e6f91afaa9564c65107d4086", + "id": "nmdc:303d7282e6f91afaa9564c65107d4086", + "file_size_bytes": 10842402 + }, + { + "name": "Gp0127648_Metagenome Alignment BAM file", + "description": "Metagenome Alignment BAM file for Gp0127648", + "data_object_type": "Assembly Coverage BAM", + "url": "https://data.microbiomedata.org/data/nmdc:mga0andh11/assembly/nmdc_mga0andh11_pairedMapped_sorted.bam", + "md5_checksum": "4a6ffadb01b62dd73278429808c1a39a", + "id": "nmdc:4a6ffadb01b62dd73278429808c1a39a", + "file_size_bytes": 2362185094 + }, + { + "name": "Gp0127648_Protein FAA", + "description": "Protein FAA for Gp0127648", + "data_object_type": "Annotation Amino Acid FASTA", + "url": "https://data.microbiomedata.org/data/nmdc:mga0andh11/annotation/nmdc_mga0andh11_proteins.faa", + "md5_checksum": "c4a719f3a899f7aa760f627f7b1ae6e7", + "id": "nmdc:c4a719f3a899f7aa760f627f7b1ae6e7", + "file_size_bytes": 40118426 + }, + { + "name": "Gp0127648_Structural annotation GFF file", + "description": "Structural annotation GFF file for Gp0127648", + "data_object_type": "Structural Annotation GFF", + "url": "https://data.microbiomedata.org/data/nmdc:mga0andh11/annotation/nmdc_mga0andh11_structural_annotation.gff", + "md5_checksum": "80ab4116b1cdfbc3e4c4d06e5990d735", + "id": "nmdc:80ab4116b1cdfbc3e4c4d06e5990d735", + "file_size_bytes": 2507 + }, + { + "name": "Gp0127648_Functional annotation GFF file", + "description": "Functional annotation GFF file for Gp0127648", + "data_object_type": "Functional Annotation GFF", + "url": "https://data.microbiomedata.org/data/nmdc:mga0andh11/annotation/nmdc_mga0andh11_functional_annotation.gff", + "md5_checksum": "600011ab7e39465d3f9f28d5d93a4248", + "id": "nmdc:600011ab7e39465d3f9f28d5d93a4248", + "file_size_bytes": 47178055 + }, + { + "name": "Gp0127648_KO TSV file", + "description": "KO TSV file for Gp0127648", + "data_object_type": "Annotation KEGG Orthology", + "url": "https://data.microbiomedata.org/data/nmdc:mga0andh11/annotation/nmdc_mga0andh11_ko.tsv", + "md5_checksum": "0c8d98b369900cd19da39235e3eae6db", + "id": "nmdc:0c8d98b369900cd19da39235e3eae6db", + "file_size_bytes": 5498487 + }, + { + "name": "Gp0127648_EC TSV file", + "description": "EC TSV file for Gp0127648", + "data_object_type": "Annotation Enzyme Commission", + "url": "https://data.microbiomedata.org/data/nmdc:mga0andh11/annotation/nmdc_mga0andh11_ec.tsv", + "md5_checksum": "16c37f8c4f74e7e81b7900536da55e39", + "id": "nmdc:16c37f8c4f74e7e81b7900536da55e39", + "file_size_bytes": 3650457 + }, + { + "name": "Gp0127648_COG GFF file", + "description": "COG GFF file for Gp0127648", + "url": "https://data.microbiomedata.org/data/nmdc:mga0andh11/annotation/nmdc_mga0andh11_cog.gff", + "md5_checksum": "a7fc228cd8d224bbf2843ba6a6648480", + "id": "nmdc:a7fc228cd8d224bbf2843ba6a6648480", + "file_size_bytes": 27226505 + }, + { + "name": "Gp0127648_PFAM GFF file", + "description": "PFAM GFF file for Gp0127648", + "url": "https://data.microbiomedata.org/data/nmdc:mga0andh11/annotation/nmdc_mga0andh11_pfam.gff", + "md5_checksum": "a57d9d86c20cfd13ddc56027110485ba", + "id": "nmdc:a57d9d86c20cfd13ddc56027110485ba", + "file_size_bytes": 19896169 + }, + { + "name": "Gp0127648_TigrFam GFF file", + "description": "TigrFam GFF file for Gp0127648", + "url": "https://data.microbiomedata.org/data/nmdc:mga0andh11/annotation/nmdc_mga0andh11_tigrfam.gff", + "md5_checksum": "6a4be27e2e7454941b73aa843471f25d", + "id": "nmdc:6a4be27e2e7454941b73aa843471f25d", + "file_size_bytes": 2105656 + }, + { + "name": "Gp0127648_SMART GFF file", + "description": "SMART GFF file for Gp0127648", + "url": "https://data.microbiomedata.org/data/nmdc:mga0andh11/annotation/nmdc_mga0andh11_smart.gff", + "md5_checksum": "be3d2a77be3ccd810d679f03204f8bac", + "id": "nmdc:be3d2a77be3ccd810d679f03204f8bac", + "file_size_bytes": 6062323 + }, + { + "name": "Gp0127648_SuperFam GFF file", + "description": "SuperFam GFF file for Gp0127648", + "url": "https://data.microbiomedata.org/data/nmdc:mga0andh11/annotation/nmdc_mga0andh11_supfam.gff", + "md5_checksum": "eb5ac02ce17f687c5ccf5a64548c559e", + "id": "nmdc:eb5ac02ce17f687c5ccf5a64548c559e", + "file_size_bytes": 33896425 + }, + { + "name": "Gp0127648_Cath FunFam GFF file", + "description": "Cath FunFam GFF file for Gp0127648", + "url": "https://data.microbiomedata.org/data/nmdc:mga0andh11/annotation/nmdc_mga0andh11_cath_funfam.gff", + "md5_checksum": "81ff9f257ffe63ca5d04db9e767620b1", + "id": "nmdc:81ff9f257ffe63ca5d04db9e767620b1", + "file_size_bytes": 25515156 + }, + { + "name": "Gp0127648_KO_EC GFF file", + "description": "KO_EC GFF file for Gp0127648", + "url": "https://data.microbiomedata.org/data/nmdc:mga0andh11/annotation/nmdc_mga0andh11_ko_ec.gff", + "md5_checksum": "8768f37ff001a86a25ae34c7deee9d9a", + "id": "nmdc:8768f37ff001a86a25ae34c7deee9d9a", + "file_size_bytes": 17491444 + }, + { + "name": "gold:Gp0452679_metabat2 bin checkm quality assessment result", + "description": "metabat2 bin checkm quality assessment result for gold:Gp0452679", + "data_object_type": "CheckM Statistics", + "url": "https://data.microbiomedata.org/data/nmdc:mga0fsjy84/MAGs/nmdc_mga0fsjy84_checkm_qa.out", + "md5_checksum": "d41d8cd98f00b204e9800998ecf8427e", + "id": "nmdc:d41d8cd98f00b204e9800998ecf8427e", + "file_size_bytes": 0 + }, + { + "name": "Gp0127648_tooShort (< 3kb) filtered contigs fasta file by metaBat2", + "description": "tooShort (< 3kb) filtered contigs fasta file by metaBat2 for Gp0127648", + "url": "https://data.microbiomedata.org/data/nmdc:mga0andh11/MAGs/nmdc_mga0andh11_bins.tooShort.fa", + "md5_checksum": "8b67e5038c55083e2aa8e19c5d05fef8", + "id": "nmdc:8b67e5038c55083e2aa8e19c5d05fef8", + "file_size_bytes": 58962192 + }, + { + "name": "Gp0127648_unbinned fasta file from metabat2", + "description": "unbinned fasta file from metabat2 for Gp0127648", + "url": "https://data.microbiomedata.org/data/nmdc:mga0andh11/MAGs/nmdc_mga0andh11_bins.unbinned.fa", + "md5_checksum": "fc8454a790709b36d7ca96cd99359d26", + "id": "nmdc:fc8454a790709b36d7ca96cd99359d26", + "file_size_bytes": 6656731 + }, + { + "name": "Gp0127648_metabat2 bin checkm quality assessment result", + "description": "metabat2 bin checkm quality assessment result for Gp0127648", + "data_object_type": "CheckM Statistics", + "url": "https://data.microbiomedata.org/data/nmdc:mga0andh11/MAGs/nmdc_mga0andh11_checkm_qa.out", + "md5_checksum": "942bd7c28c52e6301bf97dab0ea2852a", + "id": "nmdc:942bd7c28c52e6301bf97dab0ea2852a", + "file_size_bytes": 930 + }, + { + "name": "Gp0127648_high-quality and medium-quality bins", + "description": "high-quality and medium-quality bins for Gp0127648", + "data_object_type": "Metagenome Bins", + "url": "https://data.microbiomedata.org/data/nmdc:mga0andh11/MAGs/nmdc_mga0andh11_hqmq_bin.zip", + "md5_checksum": "82ebf9065be9715e1230a50bf7a02197", + "id": "nmdc:82ebf9065be9715e1230a50bf7a02197", + "file_size_bytes": 466157 }, + { + "name": "Gp0127648_metabat2 bins", + "description": "metabat2 bins for Gp0127648", + "url": "https://data.microbiomedata.org/data/nmdc:mga0andh11/MAGs/nmdc_mga0andh11_metabat_bin.zip", + "md5_checksum": "897536007e7e3525457df5d3baddd593", + "id": "nmdc:897536007e7e3525457df5d3baddd593", + "file_size_bytes": 90255 + } + ], + "dissolving_activity_set": [], + "functional_annotation_set": [], + "genome_feature_set": [], + "mags_activity_set": [ { "_id": { - "$oid": "649b0052ec087f6bbab34715" + "$oid": "649b0052ec087f6bbab34718" }, "has_input": [ - "nmdc:c2301a45b987661e5e6f32eaf6928003", - "nmdc:0d0ee85be3a079b0eba5bb872c842f7d", - "nmdc:3dcb9f83f3921fc7f3e7a2050584cc77" + "nmdc:ca10f7bae0565946414188c9121ee338", + "nmdc:4a6ffadb01b62dd73278429808c1a39a", + "nmdc:600011ab7e39465d3f9f28d5d93a4248" ], - "too_short_contig_num": 80674, + "too_short_contig_num": 142847, "part_of": [ - "nmdc:mga0dm4q17" + "nmdc:mga0andh11" ], - "binned_contig_num": 20, + "binned_contig_num": 329, "git_url": "https://github.com/microbiomedata/metaG/releases/tag/0.1", "has_output": [ "nmdc:d41d8cd98f00b204e9800998ecf8427e", - "nmdc:de605dd3ecac26d6a35740c09448b171", - "nmdc:9392ab9668a1c347f010004c2f0cc8db", - "nmdc:e8bdcd7b113a14b29a3026b73cd18c20", - "nmdc:d75d0006d0009e7e14f2ad8044a3cbfb", - "nmdc:17e9a7763327f2b5d3f841079c2f68d8" + "nmdc:8b67e5038c55083e2aa8e19c5d05fef8", + "nmdc:fc8454a790709b36d7ca96cd99359d26", + "nmdc:942bd7c28c52e6301bf97dab0ea2852a", + "nmdc:82ebf9065be9715e1230a50bf7a02197", + "nmdc:897536007e7e3525457df5d3baddd593" ], - "was_informed_by": "gold:Gp0127646", - "input_contig_num": 81652, - "id": "nmdc:1a0a2a1b5db9455e9dbbacf102059180", + "was_informed_by": "gold:Gp0127648", + "input_contig_num": 147340, + "id": "nmdc:cb02cfd1451743d94381a247cf0a9d65", "execution_resource": "NERSC-Cori", - "name": "MAGs Analysis Activity for nmdc:mga0dm4q17", + "name": "MAGs Analysis Activity for nmdc:mga0andh11", "mags_list": [ { - "number_of_contig": 20, - "completeness": 1.36, + "number_of_contig": 255, + "completeness": 70.91, "bin_name": "bins.1", - "gene_count": 275, + "gene_count": 1857, + "bin_quality": "MQ", + "gtdbtk_species": "", + "gtdbtk_order": "Nitrososphaerales", + "num_16s": 1, + "gtdbtk_family": "Nitrososphaeraceae", + "gtdbtk_domain": "Archaea", + "contamination": 1.94, + "gtdbtk_class": "Nitrososphaeria", + "gtdbtk_phylum": "Crenarchaeota", + "num_5s": 1, + "num_23s": 0, + "gtdbtk_genus": "", + "num_t_rna": 28 + }, + { + "number_of_contig": 74, + "completeness": 19.91, + "bin_name": "bins.2", + "gene_count": 380, "bin_quality": "LQ", "gtdbtk_species": "", "gtdbtk_order": "", - "num_16s": 1, + "num_16s": 0, "gtdbtk_family": "", "gtdbtk_domain": "", "contamination": 0.0, "gtdbtk_class": "", "gtdbtk_phylum": "", "num_5s": 0, - "num_23s": 2, + "num_23s": 0, "gtdbtk_genus": "", - "num_t_rna": 10 + "num_t_rna": 5 } ], - "unbinned_contig_num": 958, - "started_at_time": "2021-10-11T02:23:42Z", + "unbinned_contig_num": 4164, + "started_at_time": "2021-10-11T02:23:29Z", "type": "nmdc:MAGsAnalysisActivity", - "ended_at_time": "2021-10-11T04:05:12+00:00", - "output_data_objects": [ - { - "name": "gold:Gp0452679_metabat2 bin checkm quality assessment result", - "description": "metabat2 bin checkm quality assessment result for gold:Gp0452679", - "data_object_type": "CheckM Statistics", - "url": "https://data.microbiomedata.org/data/nmdc:mga0fsjy84/MAGs/nmdc_mga0fsjy84_checkm_qa.out", - "md5_checksum": "d41d8cd98f00b204e9800998ecf8427e", - "id": "nmdc:d41d8cd98f00b204e9800998ecf8427e", - "file_size_bytes": 0 - }, - { - "name": "Gp0127646_tooShort (< 3kb) filtered contigs fasta file by metaBat2", - "description": "tooShort (< 3kb) filtered contigs fasta file by metaBat2 for Gp0127646", - "url": "https://data.microbiomedata.org/data/nmdc:mga0dm4q17/MAGs/nmdc_mga0dm4q17_bins.tooShort.fa", - "md5_checksum": "de605dd3ecac26d6a35740c09448b171", - "id": "nmdc:de605dd3ecac26d6a35740c09448b171", - "file_size_bytes": 31210054 - }, - { - "name": "Gp0127646_unbinned fasta file from metabat2", - "description": "unbinned fasta file from metabat2 for Gp0127646", - "url": "https://data.microbiomedata.org/data/nmdc:mga0dm4q17/MAGs/nmdc_mga0dm4q17_bins.unbinned.fa", - "md5_checksum": "9392ab9668a1c347f010004c2f0cc8db", - "id": "nmdc:9392ab9668a1c347f010004c2f0cc8db", - "file_size_bytes": 1595698 - }, - { - "name": "Gp0127647_metabat2 bin checkm quality assessment result", - "description": "metabat2 bin checkm quality assessment result for Gp0127647", - "data_object_type": "CheckM Statistics", - "url": "https://data.microbiomedata.org/data/nmdc:mga0g0e588/MAGs/nmdc_mga0g0e588_checkm_qa.out", - "md5_checksum": "e8bdcd7b113a14b29a3026b73cd18c20", - "id": "nmdc:e8bdcd7b113a14b29a3026b73cd18c20", - "file_size_bytes": 775 - }, - { - "name": "Gp0127646_high-quality and medium-quality bins", - "description": "high-quality and medium-quality bins for Gp0127646", - "data_object_type": "Metagenome Bins", - "url": "https://data.microbiomedata.org/data/nmdc:mga0dm4q17/MAGs/nmdc_mga0dm4q17_hqmq_bin.zip", - "md5_checksum": "d75d0006d0009e7e14f2ad8044a3cbfb", - "id": "nmdc:d75d0006d0009e7e14f2ad8044a3cbfb", - "file_size_bytes": 182 - }, - { - "name": "Gp0127646_metabat2 bins", - "description": "metabat2 bins for Gp0127646", - "url": "https://data.microbiomedata.org/data/nmdc:mga0dm4q17/MAGs/nmdc_mga0dm4q17_metabat_bin.zip", - "md5_checksum": "17e9a7763327f2b5d3f841079c2f68d8", - "id": "nmdc:17e9a7763327f2b5d3f841079c2f68d8", - "file_size_bytes": 82006 - } - ] + "ended_at_time": "2021-10-11T04:13:04+00:00" } - ] - }, - { - "_id": { - "$oid": "649b009773e8249959349b5b" - }, - "id": "nmdc:omprc-11-nhf5m035", - "name": "Riverbed sediment microbial communities from areas with vegetation nearby in Columbia River, Washington, USA - GW-RW S1_50_60", - "description": "Riverbed sediment samples were collected from an area with a lot of surrounding vegetation in a hyporheic zone (subsurface groundwater - surface water mixing zone)", - "has_input": [ - "nmdc:bsm-11-b7nrtg75" - ], - "has_output": [ - "jgi:574fe0ac7ded5e3df1ee1491" - ], - "part_of": [ - "nmdc:sty-11-aygzgv51" - ], - "add_date": "2016-01-11", - "mod_date": "2021-06-15", - "ncbi_project_name": "Riverbed sediment microbial communities from areas with vegetation nearby in Columbia River, Washington, USA - GW-RW S1_50_60", - "omics_type": { - "has_raw_value": "Metagenome" - }, - "principal_investigator": { - "has_raw_value": "James Stegen" - }, - "processing_institution": "JGI", - "type": "nmdc:OmicsProcessing", - "gold_sequencing_project_identifiers": [ - "gold:Gp0127648" - ], - "downstream_workflow_activity_records": [ + ], + "material_sample_set": [], + "material_sampling_activity_set": [], + "metabolomics_analysis_activity_set": [], + "metagenome_annotation_activity_set": [ { "_id": { - "$oid": "649b009d6bdd4fd20273c870" + "$oid": "649b005bbf2caae0415ef9b8" }, "has_input": [ - "nmdc:22bf7ba401619da2a191e7b30544a8ac" + "nmdc:ca10f7bae0565946414188c9121ee338" ], "part_of": [ "nmdc:mga0andh11" ], "git_url": "https://github.com/microbiomedata/metaG/releases/tag/0.1", "has_output": [ - "nmdc:fcc3a92dd2b6ab6045f4be27da6f2cdd", - "nmdc:2208c88cac6b941799d4492dbf5f0887" + "nmdc:c4a719f3a899f7aa760f627f7b1ae6e7", + "nmdc:80ab4116b1cdfbc3e4c4d06e5990d735", + "nmdc:600011ab7e39465d3f9f28d5d93a4248", + "nmdc:0c8d98b369900cd19da39235e3eae6db", + "nmdc:16c37f8c4f74e7e81b7900536da55e39", + "nmdc:a7fc228cd8d224bbf2843ba6a6648480", + "nmdc:a57d9d86c20cfd13ddc56027110485ba", + "nmdc:6a4be27e2e7454941b73aa843471f25d", + "nmdc:be3d2a77be3ccd810d679f03204f8bac", + "nmdc:eb5ac02ce17f687c5ccf5a64548c559e", + "nmdc:81ff9f257ffe63ca5d04db9e767620b1", + "nmdc:8768f37ff001a86a25ae34c7deee9d9a" ], "was_informed_by": "gold:Gp0127648", - "input_read_count": 28064750, - "output_read_bases": 3953713958, "id": "nmdc:cb02cfd1451743d94381a247cf0a9d65", "execution_resource": "NERSC-Cori", - "input_read_bases": 4237777250, - "name": "Read QC Activity for nmdc:mga0andh11", - "output_read_count": 26438892, + "name": "Annotation Activity for nmdc:mga0andh11", "started_at_time": "2021-10-11T02:23:29Z", - "type": "nmdc:ReadQCAnalysisActivity", - "ended_at_time": "2021-10-11T04:13:04+00:00", - "output_data_objects": [ - { - "name": "Gp0127648_Filtered Reads", - "description": "Filtered Reads for Gp0127648", - "data_object_type": "Filtered Sequencing Reads", - "url": "https://data.microbiomedata.org/data/nmdc:mga0andh11/qa/nmdc_mga0andh11_filtered.fastq.gz", - "md5_checksum": "fcc3a92dd2b6ab6045f4be27da6f2cdd", - "id": "nmdc:fcc3a92dd2b6ab6045f4be27da6f2cdd", - "file_size_bytes": 2191252492 - }, - { - "name": "Gp0127648_Filtered Stats", - "description": "Filtered Stats for Gp0127648", - "data_object_type": "QC Statistics", - "url": "https://data.microbiomedata.org/data/nmdc:mga0andh11/qa/nmdc_mga0andh11_filterStats.txt", - "md5_checksum": "2208c88cac6b941799d4492dbf5f0887", - "id": "nmdc:2208c88cac6b941799d4492dbf5f0887", - "file_size_bytes": 289 - } - ] - }, + "type": "nmdc:MetagenomeAnnotationActivity", + "ended_at_time": "2021-10-11T04:13:04+00:00" + } + ], + "metagenome_assembly_set": [ { "_id": { - "$oid": "649b009bff710ae353f8cf39" + "$oid": "649b005f2ca5ee4adb139fa5" }, "has_input": [ "nmdc:fcc3a92dd2b6ab6045f4be27da6f2cdd" ], + "part_of": [ + "nmdc:mga0andh11" + ], + "ctg_logsum": 91193, + "scaf_logsum": 91521, + "gap_pct": 0.0011, "git_url": "https://github.com/microbiomedata/metaG/releases/tag/0.1", "has_output": [ - "nmdc:5e64b9ccf92f0c974c51bd8393dea50c", - "nmdc:1357df297d8d8a872b335e0c3222d102", - "nmdc:5b510e336e60b6120b43e9b6420a074e", - "nmdc:33bf814280051c220e0c4a06f7935728", - "nmdc:e77a1d052b0d2a99e0a1df3b3c038f7c", - "nmdc:0efb0ad19234056d7e2e3726dead3622", - "nmdc:222bac312efdd6c86d2475ad224b7907", - "nmdc:baaca868b1fed932b463e489708dd741", - "nmdc:b549d169e5b0693152555373a6d8ee75" + "nmdc:ca10f7bae0565946414188c9121ee338", + "nmdc:cf23062373806986b70244b1fabbd17b", + "nmdc:99b2c3c91b299b9426cca9dfb10b0cea", + "nmdc:303d7282e6f91afaa9564c65107d4086", + "nmdc:4a6ffadb01b62dd73278429808c1a39a" ], + "asm_score": 4.996, "was_informed_by": "gold:Gp0127648", + "ctg_powsum": 10170, + "scaf_max": 23974, "id": "nmdc:cb02cfd1451743d94381a247cf0a9d65", + "scaf_powsum": 10208, "execution_resource": "NERSC-Cori", - "name": "ReadBased Analysis Activity for nmdc:mga0andh11", + "contigs": 147340, + "name": "Assembly Activity for nmdc:mga0andh11", + "ctg_max": 23974, + "gc_std": 0.0855, + "contig_bp": 61886959, + "gc_avg": 0.61759, "started_at_time": "2021-10-11T02:23:29Z", - "type": "nmdc:ReadbasedAnalysis", + "scaf_bp": 61887639, + "type": "nmdc:MetagenomeAssembly", + "scaffolds": 147272, "ended_at_time": "2021-10-11T04:13:04+00:00", - "output_data_objects": [ - { - "name": "Gp0127648_Gottcha2 TSV report", - "description": "Gottcha2 TSV report for Gp0127648", - "url": "https://data.microbiomedata.org/data/nmdc:mga0andh11/ReadbasedAnalysis/nmdc_mga0andh11_gottcha2_report.tsv", - "md5_checksum": "5e64b9ccf92f0c974c51bd8393dea50c", - "id": "nmdc:5e64b9ccf92f0c974c51bd8393dea50c", - "file_size_bytes": 3323 - }, - { - "name": "Gp0127648_Gottcha2 full TSV report", - "description": "Gottcha2 full TSV report for Gp0127648", - "url": "https://data.microbiomedata.org/data/nmdc:mga0andh11/ReadbasedAnalysis/nmdc_mga0andh11_gottcha2_report_full.tsv", - "md5_checksum": "1357df297d8d8a872b335e0c3222d102", - "id": "nmdc:1357df297d8d8a872b335e0c3222d102", - "file_size_bytes": 782039 - }, - { - "name": "Gp0127648_Gottcha2 Krona HTML report", - "description": "Gottcha2 Krona HTML report for Gp0127648", - "data_object_type": "GOTTCHA2 Krona Plot", - "url": "https://data.microbiomedata.org/data/nmdc:mga0andh11/ReadbasedAnalysis/nmdc_mga0andh11_gottcha2_krona.html", - "md5_checksum": "5b510e336e60b6120b43e9b6420a074e", - "id": "nmdc:5b510e336e60b6120b43e9b6420a074e", - "file_size_bytes": 236971 - }, - { - "name": "Gp0127648_Centrifuge classification TSV report", - "description": "Centrifuge classification TSV report for Gp0127648", - "data_object_type": "Centrifuge Taxonomic Classification", - "url": "https://data.microbiomedata.org/data/nmdc:mga0andh11/ReadbasedAnalysis/nmdc_mga0andh11_centrifuge_classification.tsv", - "md5_checksum": "33bf814280051c220e0c4a06f7935728", - "id": "nmdc:33bf814280051c220e0c4a06f7935728", - "file_size_bytes": 1945479328 - }, - { - "name": "Gp0127648_Centrifuge TSV report", - "description": "Centrifuge TSV report for Gp0127648", - "data_object_type": "Centrifuge Classification Report", - "url": "https://data.microbiomedata.org/data/nmdc:mga0andh11/ReadbasedAnalysis/nmdc_mga0andh11_centrifuge_report.tsv", - "md5_checksum": "e77a1d052b0d2a99e0a1df3b3c038f7c", - "id": "nmdc:e77a1d052b0d2a99e0a1df3b3c038f7c", - "file_size_bytes": 255338 - }, - { - "name": "Gp0127648_Centrifuge Krona HTML report", - "description": "Centrifuge Krona HTML report for Gp0127648", - "data_object_type": "Centrifuge Krona Plot", - "url": "https://data.microbiomedata.org/data/nmdc:mga0andh11/ReadbasedAnalysis/nmdc_mga0andh11_centrifuge_krona.html", - "md5_checksum": "0efb0ad19234056d7e2e3726dead3622", - "id": "nmdc:0efb0ad19234056d7e2e3726dead3622", - "file_size_bytes": 2333371 - }, - { - "name": "Gp0127648_Kraken classification TSV report", - "description": "Kraken classification TSV report for Gp0127648", - "data_object_type": "Kraken2 Taxonomic Classification", - "url": "https://data.microbiomedata.org/data/nmdc:mga0andh11/ReadbasedAnalysis/nmdc_mga0andh11_kraken2_classification.tsv", - "md5_checksum": "222bac312efdd6c86d2475ad224b7907", - "id": "nmdc:222bac312efdd6c86d2475ad224b7907", - "file_size_bytes": 1562011343 - }, - { - "name": "Gp0127648_Kraken2 TSV report", - "description": "Kraken2 TSV report for Gp0127648", - "data_object_type": "Kraken2 Classification Report", - "url": "https://data.microbiomedata.org/data/nmdc:mga0andh11/ReadbasedAnalysis/nmdc_mga0andh11_kraken2_report.tsv", - "md5_checksum": "baaca868b1fed932b463e489708dd741", - "id": "nmdc:baaca868b1fed932b463e489708dd741", - "file_size_bytes": 647859 - }, - { - "name": "Gp0127648_Kraken2 Krona HTML report", - "description": "Kraken2 Krona HTML report for Gp0127648", - "data_object_type": "Kraken2 Krona Plot", - "url": "https://data.microbiomedata.org/data/nmdc:mga0andh11/ReadbasedAnalysis/nmdc_mga0andh11_kraken2_krona.html", - "md5_checksum": "b549d169e5b0693152555373a6d8ee75", - "id": "nmdc:b549d169e5b0693152555373a6d8ee75", - "file_size_bytes": 3952548 - } + "ctg_l50": 381, + "ctg_l90": 285, + "ctg_n50": 47493, + "ctg_n90": 126039, + "scaf_l50": 381, + "scaf_l90": 285, + "scaf_n50": 47464, + "scaf_n90": 125972 + } + ], + "metagenome_sequencing_activity_set": [], + "metaproteomics_analysis_activity_set": [], + "metatranscriptome_activity_set": [], + "nom_analysis_activity_set": [], + "omics_processing_set": [ + { + "_id": { + "$oid": "649b009773e8249959349b5b" + }, + "id": "nmdc:omprc-11-nhf5m035", + "name": "Riverbed sediment microbial communities from areas with vegetation nearby in Columbia River, Washington, USA - GW-RW S1_50_60", + "description": "Riverbed sediment samples were collected from an area with a lot of surrounding vegetation in a hyporheic zone (subsurface groundwater - surface water mixing zone)", + "has_input": [ + "nmdc:bsm-11-b7nrtg75" + ], + "has_output": [ + "jgi:574fe0ac7ded5e3df1ee1491" + ], + "part_of": [ + "nmdc:sty-11-aygzgv51" + ], + "add_date": "2016-01-11", + "mod_date": "2021-06-15", + "ncbi_project_name": "Riverbed sediment microbial communities from areas with vegetation nearby in Columbia River, Washington, USA - GW-RW S1_50_60", + "omics_type": { + "has_raw_value": "Metagenome" + }, + "principal_investigator": { + "has_raw_value": "James Stegen" + }, + "processing_institution": "JGI", + "type": "nmdc:OmicsProcessing", + "gold_sequencing_project_identifiers": [ + "gold:Gp0127648" ] - }, + } + ], + "reaction_activity_set": [], + "read_qc_analysis_activity_set": [ { "_id": { - "$oid": "61e7199b833bcf838a700dd2" + "$oid": "649b009d6bdd4fd20273c870" }, "has_input": [ - "nmdc:fcc3a92dd2b6ab6045f4be27da6f2cdd" + "nmdc:22bf7ba401619da2a191e7b30544a8ac" ], "part_of": [ "nmdc:mga0andh11" ], "git_url": "https://github.com/microbiomedata/metaG/releases/tag/0.1", "has_output": [ - "nmdc:5e64b9ccf92f0c974c51bd8393dea50c", - "nmdc:1357df297d8d8a872b335e0c3222d102", - "nmdc:5b510e336e60b6120b43e9b6420a074e", - "nmdc:33bf814280051c220e0c4a06f7935728", - "nmdc:e77a1d052b0d2a99e0a1df3b3c038f7c", - "nmdc:0efb0ad19234056d7e2e3726dead3622", - "nmdc:222bac312efdd6c86d2475ad224b7907", - "nmdc:baaca868b1fed932b463e489708dd741", - "nmdc:b549d169e5b0693152555373a6d8ee75" + "nmdc:fcc3a92dd2b6ab6045f4be27da6f2cdd", + "nmdc:2208c88cac6b941799d4492dbf5f0887" ], "was_informed_by": "gold:Gp0127648", + "input_read_count": 28064750, + "output_read_bases": 3953713958, "id": "nmdc:cb02cfd1451743d94381a247cf0a9d65", "execution_resource": "NERSC-Cori", - "name": "ReadBased Analysis Activity for nmdc:mga0andh11", + "input_read_bases": 4237777250, + "name": "Read QC Activity for nmdc:mga0andh11", + "output_read_count": 26438892, "started_at_time": "2021-10-11T02:23:29Z", - "type": "nmdc:ReadbasedAnalysis", - "ended_at_time": "2021-10-11T04:13:04+00:00", - "output_data_objects": [ - { - "name": "Gp0127648_Gottcha2 TSV report", - "description": "Gottcha2 TSV report for Gp0127648", - "url": "https://data.microbiomedata.org/data/nmdc:mga0andh11/ReadbasedAnalysis/nmdc_mga0andh11_gottcha2_report.tsv", - "md5_checksum": "5e64b9ccf92f0c974c51bd8393dea50c", - "id": "nmdc:5e64b9ccf92f0c974c51bd8393dea50c", - "file_size_bytes": 3323 - }, - { - "name": "Gp0127648_Gottcha2 full TSV report", - "description": "Gottcha2 full TSV report for Gp0127648", - "url": "https://data.microbiomedata.org/data/nmdc:mga0andh11/ReadbasedAnalysis/nmdc_mga0andh11_gottcha2_report_full.tsv", - "md5_checksum": "1357df297d8d8a872b335e0c3222d102", - "id": "nmdc:1357df297d8d8a872b335e0c3222d102", - "file_size_bytes": 782039 - }, - { - "name": "Gp0127648_Gottcha2 Krona HTML report", - "description": "Gottcha2 Krona HTML report for Gp0127648", - "data_object_type": "GOTTCHA2 Krona Plot", - "url": "https://data.microbiomedata.org/data/nmdc:mga0andh11/ReadbasedAnalysis/nmdc_mga0andh11_gottcha2_krona.html", - "md5_checksum": "5b510e336e60b6120b43e9b6420a074e", - "id": "nmdc:5b510e336e60b6120b43e9b6420a074e", - "file_size_bytes": 236971 - }, - { - "name": "Gp0127648_Centrifuge classification TSV report", - "description": "Centrifuge classification TSV report for Gp0127648", - "data_object_type": "Centrifuge Taxonomic Classification", - "url": "https://data.microbiomedata.org/data/nmdc:mga0andh11/ReadbasedAnalysis/nmdc_mga0andh11_centrifuge_classification.tsv", - "md5_checksum": "33bf814280051c220e0c4a06f7935728", - "id": "nmdc:33bf814280051c220e0c4a06f7935728", - "file_size_bytes": 1945479328 - }, - { - "name": "Gp0127648_Centrifuge TSV report", - "description": "Centrifuge TSV report for Gp0127648", - "data_object_type": "Centrifuge Classification Report", - "url": "https://data.microbiomedata.org/data/nmdc:mga0andh11/ReadbasedAnalysis/nmdc_mga0andh11_centrifuge_report.tsv", - "md5_checksum": "e77a1d052b0d2a99e0a1df3b3c038f7c", - "id": "nmdc:e77a1d052b0d2a99e0a1df3b3c038f7c", - "file_size_bytes": 255338 - }, - { - "name": "Gp0127648_Centrifuge Krona HTML report", - "description": "Centrifuge Krona HTML report for Gp0127648", - "data_object_type": "Centrifuge Krona Plot", - "url": "https://data.microbiomedata.org/data/nmdc:mga0andh11/ReadbasedAnalysis/nmdc_mga0andh11_centrifuge_krona.html", - "md5_checksum": "0efb0ad19234056d7e2e3726dead3622", - "id": "nmdc:0efb0ad19234056d7e2e3726dead3622", - "file_size_bytes": 2333371 - }, - { - "name": "Gp0127648_Kraken classification TSV report", - "description": "Kraken classification TSV report for Gp0127648", - "data_object_type": "Kraken2 Taxonomic Classification", - "url": "https://data.microbiomedata.org/data/nmdc:mga0andh11/ReadbasedAnalysis/nmdc_mga0andh11_kraken2_classification.tsv", - "md5_checksum": "222bac312efdd6c86d2475ad224b7907", - "id": "nmdc:222bac312efdd6c86d2475ad224b7907", - "file_size_bytes": 1562011343 - }, - { - "name": "Gp0127648_Kraken2 TSV report", - "description": "Kraken2 TSV report for Gp0127648", - "data_object_type": "Kraken2 Classification Report", - "url": "https://data.microbiomedata.org/data/nmdc:mga0andh11/ReadbasedAnalysis/nmdc_mga0andh11_kraken2_report.tsv", - "md5_checksum": "baaca868b1fed932b463e489708dd741", - "id": "nmdc:baaca868b1fed932b463e489708dd741", - "file_size_bytes": 647859 - }, - { - "name": "Gp0127648_Kraken2 Krona HTML report", - "description": "Kraken2 Krona HTML report for Gp0127648", - "data_object_type": "Kraken2 Krona Plot", - "url": "https://data.microbiomedata.org/data/nmdc:mga0andh11/ReadbasedAnalysis/nmdc_mga0andh11_kraken2_krona.html", - "md5_checksum": "b549d169e5b0693152555373a6d8ee75", - "id": "nmdc:b549d169e5b0693152555373a6d8ee75", - "file_size_bytes": 3952548 - } - ] - }, + "type": "nmdc:ReadQCAnalysisActivity", + "ended_at_time": "2021-10-11T04:13:04+00:00" + } + ], + "read_based_taxonomy_analysis_activity_set": [ { "_id": { - "$oid": "649b005f2ca5ee4adb139fa5" + "$oid": "649b009bff710ae353f8cf39" }, "has_input": [ "nmdc:fcc3a92dd2b6ab6045f4be27da6f2cdd" ], - "part_of": [ - "nmdc:mga0andh11" - ], - "ctg_logsum": 91193, - "scaf_logsum": 91521, - "gap_pct": 0.0011, "git_url": "https://github.com/microbiomedata/metaG/releases/tag/0.1", "has_output": [ - "nmdc:ca10f7bae0565946414188c9121ee338", - "nmdc:cf23062373806986b70244b1fabbd17b", - "nmdc:99b2c3c91b299b9426cca9dfb10b0cea", - "nmdc:303d7282e6f91afaa9564c65107d4086", - "nmdc:4a6ffadb01b62dd73278429808c1a39a" + "nmdc:5e64b9ccf92f0c974c51bd8393dea50c", + "nmdc:1357df297d8d8a872b335e0c3222d102", + "nmdc:5b510e336e60b6120b43e9b6420a074e", + "nmdc:33bf814280051c220e0c4a06f7935728", + "nmdc:e77a1d052b0d2a99e0a1df3b3c038f7c", + "nmdc:0efb0ad19234056d7e2e3726dead3622", + "nmdc:222bac312efdd6c86d2475ad224b7907", + "nmdc:baaca868b1fed932b463e489708dd741", + "nmdc:b549d169e5b0693152555373a6d8ee75" ], - "asm_score": 4.996, "was_informed_by": "gold:Gp0127648", - "ctg_powsum": 10170, - "scaf_max": 23974, "id": "nmdc:cb02cfd1451743d94381a247cf0a9d65", - "scaf_powsum": 10208, "execution_resource": "NERSC-Cori", - "contigs": 147340, - "name": "Assembly Activity for nmdc:mga0andh11", - "ctg_max": 23974, - "gc_std": 0.0855, - "contig_bp": 61886959, - "gc_avg": 0.61759, + "name": "ReadBased Analysis Activity for nmdc:mga0andh11", "started_at_time": "2021-10-11T02:23:29Z", - "scaf_bp": 61887639, - "type": "nmdc:MetagenomeAssembly", - "scaffolds": 147272, - "ended_at_time": "2021-10-11T04:13:04+00:00", - "ctg_l50": 381, - "ctg_l90": 285, - "ctg_n50": 47493, - "ctg_n90": 126039, - "scaf_l50": 381, - "scaf_l90": 285, - "scaf_n50": 47464, - "scaf_n90": 125972, - "output_data_objects": [ - { - "name": "Gp0127648_Assembled contigs fasta", - "description": "Assembled contigs fasta for Gp0127648", - "data_object_type": "Assembly Contigs", - "url": "https://data.microbiomedata.org/data/nmdc:mga0andh11/assembly/nmdc_mga0andh11_contigs.fna", - "md5_checksum": "ca10f7bae0565946414188c9121ee338", - "id": "nmdc:ca10f7bae0565946414188c9121ee338", - "file_size_bytes": 67439267 - }, - { - "name": "Gp0127648_Assembled scaffold fasta", - "description": "Assembled scaffold fasta for Gp0127648", - "data_object_type": "Assembly Scaffolds", - "url": "https://data.microbiomedata.org/data/nmdc:mga0andh11/assembly/nmdc_mga0andh11_scaffolds.fna", - "md5_checksum": "cf23062373806986b70244b1fabbd17b", - "id": "nmdc:cf23062373806986b70244b1fabbd17b", - "file_size_bytes": 66996134 - }, - { - "name": "Gp0127648_Metagenome Contig Coverage Stats", - "description": "Metagenome Contig Coverage Stats for Gp0127648", - "url": "https://data.microbiomedata.org/data/nmdc:mga0andh11/assembly/nmdc_mga0andh11_covstats.txt", - "md5_checksum": "99b2c3c91b299b9426cca9dfb10b0cea", - "id": "nmdc:99b2c3c91b299b9426cca9dfb10b0cea", - "file_size_bytes": 11610674 - }, - { - "name": "Gp0127648_Assembled AGP file", - "description": "Assembled AGP file for Gp0127648", - "data_object_type": "Assembly AGP", - "url": "https://data.microbiomedata.org/data/nmdc:mga0andh11/assembly/nmdc_mga0andh11_assembly.agp", - "md5_checksum": "303d7282e6f91afaa9564c65107d4086", - "id": "nmdc:303d7282e6f91afaa9564c65107d4086", - "file_size_bytes": 10842402 - }, - { - "name": "Gp0127648_Metagenome Alignment BAM file", - "description": "Metagenome Alignment BAM file for Gp0127648", - "data_object_type": "Assembly Coverage BAM", - "url": "https://data.microbiomedata.org/data/nmdc:mga0andh11/assembly/nmdc_mga0andh11_pairedMapped_sorted.bam", - "md5_checksum": "4a6ffadb01b62dd73278429808c1a39a", - "id": "nmdc:4a6ffadb01b62dd73278429808c1a39a", - "file_size_bytes": 2362185094 - } - ] - }, + "type": "nmdc:ReadbasedAnalysis", + "ended_at_time": "2021-10-11T04:13:04+00:00" + } + ], + "study_set": [], + "field_research_site_set": [], + "collecting_biosamples_from_site_set": [], + "date_created": null, + "etl_software_version": null, + "pooling_set": [], + "read_based_analysis_activity_set": [ { "_id": { - "$oid": "649b005bbf2caae0415ef9b8" + "$oid": "61e7199b833bcf838a700dd2" }, "has_input": [ - "nmdc:ca10f7bae0565946414188c9121ee338" + "nmdc:fcc3a92dd2b6ab6045f4be27da6f2cdd" ], "part_of": [ "nmdc:mga0andh11" ], "git_url": "https://github.com/microbiomedata/metaG/releases/tag/0.1", "has_output": [ - "nmdc:c4a719f3a899f7aa760f627f7b1ae6e7", - "nmdc:80ab4116b1cdfbc3e4c4d06e5990d735", - "nmdc:600011ab7e39465d3f9f28d5d93a4248", - "nmdc:0c8d98b369900cd19da39235e3eae6db", - "nmdc:16c37f8c4f74e7e81b7900536da55e39", - "nmdc:a7fc228cd8d224bbf2843ba6a6648480", - "nmdc:a57d9d86c20cfd13ddc56027110485ba", - "nmdc:6a4be27e2e7454941b73aa843471f25d", - "nmdc:be3d2a77be3ccd810d679f03204f8bac", - "nmdc:eb5ac02ce17f687c5ccf5a64548c559e", - "nmdc:81ff9f257ffe63ca5d04db9e767620b1", - "nmdc:8768f37ff001a86a25ae34c7deee9d9a" + "nmdc:5e64b9ccf92f0c974c51bd8393dea50c", + "nmdc:1357df297d8d8a872b335e0c3222d102", + "nmdc:5b510e336e60b6120b43e9b6420a074e", + "nmdc:33bf814280051c220e0c4a06f7935728", + "nmdc:e77a1d052b0d2a99e0a1df3b3c038f7c", + "nmdc:0efb0ad19234056d7e2e3726dead3622", + "nmdc:222bac312efdd6c86d2475ad224b7907", + "nmdc:baaca868b1fed932b463e489708dd741", + "nmdc:b549d169e5b0693152555373a6d8ee75" ], "was_informed_by": "gold:Gp0127648", "id": "nmdc:cb02cfd1451743d94381a247cf0a9d65", "execution_resource": "NERSC-Cori", - "name": "Annotation Activity for nmdc:mga0andh11", + "name": "ReadBased Analysis Activity for nmdc:mga0andh11", "started_at_time": "2021-10-11T02:23:29Z", - "type": "nmdc:MetagenomeAnnotationActivity", - "ended_at_time": "2021-10-11T04:13:04+00:00", - "output_data_objects": [ - { - "name": "Gp0127648_Protein FAA", - "description": "Protein FAA for Gp0127648", - "data_object_type": "Annotation Amino Acid FASTA", - "url": "https://data.microbiomedata.org/data/nmdc:mga0andh11/annotation/nmdc_mga0andh11_proteins.faa", - "md5_checksum": "c4a719f3a899f7aa760f627f7b1ae6e7", - "id": "nmdc:c4a719f3a899f7aa760f627f7b1ae6e7", - "file_size_bytes": 40118426 - }, - { - "name": "Gp0127648_Structural annotation GFF file", - "description": "Structural annotation GFF file for Gp0127648", - "data_object_type": "Structural Annotation GFF", - "url": "https://data.microbiomedata.org/data/nmdc:mga0andh11/annotation/nmdc_mga0andh11_structural_annotation.gff", - "md5_checksum": "80ab4116b1cdfbc3e4c4d06e5990d735", - "id": "nmdc:80ab4116b1cdfbc3e4c4d06e5990d735", - "file_size_bytes": 2507 - }, - { - "name": "Gp0127648_Functional annotation GFF file", - "description": "Functional annotation GFF file for Gp0127648", - "data_object_type": "Functional Annotation GFF", - "url": "https://data.microbiomedata.org/data/nmdc:mga0andh11/annotation/nmdc_mga0andh11_functional_annotation.gff", - "md5_checksum": "600011ab7e39465d3f9f28d5d93a4248", - "id": "nmdc:600011ab7e39465d3f9f28d5d93a4248", - "file_size_bytes": 47178055 - }, - { - "name": "Gp0127648_KO TSV file", - "description": "KO TSV file for Gp0127648", - "data_object_type": "Annotation KEGG Orthology", - "url": "https://data.microbiomedata.org/data/nmdc:mga0andh11/annotation/nmdc_mga0andh11_ko.tsv", - "md5_checksum": "0c8d98b369900cd19da39235e3eae6db", - "id": "nmdc:0c8d98b369900cd19da39235e3eae6db", - "file_size_bytes": 5498487 - }, - { - "name": "Gp0127648_EC TSV file", - "description": "EC TSV file for Gp0127648", - "data_object_type": "Annotation Enzyme Commission", - "url": "https://data.microbiomedata.org/data/nmdc:mga0andh11/annotation/nmdc_mga0andh11_ec.tsv", - "md5_checksum": "16c37f8c4f74e7e81b7900536da55e39", - "id": "nmdc:16c37f8c4f74e7e81b7900536da55e39", - "file_size_bytes": 3650457 - }, - { - "name": "Gp0127648_COG GFF file", - "description": "COG GFF file for Gp0127648", - "url": "https://data.microbiomedata.org/data/nmdc:mga0andh11/annotation/nmdc_mga0andh11_cog.gff", - "md5_checksum": "a7fc228cd8d224bbf2843ba6a6648480", - "id": "nmdc:a7fc228cd8d224bbf2843ba6a6648480", - "file_size_bytes": 27226505 - }, - { - "name": "Gp0127648_PFAM GFF file", - "description": "PFAM GFF file for Gp0127648", - "url": "https://data.microbiomedata.org/data/nmdc:mga0andh11/annotation/nmdc_mga0andh11_pfam.gff", - "md5_checksum": "a57d9d86c20cfd13ddc56027110485ba", - "id": "nmdc:a57d9d86c20cfd13ddc56027110485ba", - "file_size_bytes": 19896169 - }, - { - "name": "Gp0127648_TigrFam GFF file", - "description": "TigrFam GFF file for Gp0127648", - "url": "https://data.microbiomedata.org/data/nmdc:mga0andh11/annotation/nmdc_mga0andh11_tigrfam.gff", - "md5_checksum": "6a4be27e2e7454941b73aa843471f25d", - "id": "nmdc:6a4be27e2e7454941b73aa843471f25d", - "file_size_bytes": 2105656 - }, - { - "name": "Gp0127648_SMART GFF file", - "description": "SMART GFF file for Gp0127648", - "url": "https://data.microbiomedata.org/data/nmdc:mga0andh11/annotation/nmdc_mga0andh11_smart.gff", - "md5_checksum": "be3d2a77be3ccd810d679f03204f8bac", - "id": "nmdc:be3d2a77be3ccd810d679f03204f8bac", - "file_size_bytes": 6062323 - }, - { - "name": "Gp0127648_SuperFam GFF file", - "description": "SuperFam GFF file for Gp0127648", - "url": "https://data.microbiomedata.org/data/nmdc:mga0andh11/annotation/nmdc_mga0andh11_supfam.gff", - "md5_checksum": "eb5ac02ce17f687c5ccf5a64548c559e", - "id": "nmdc:eb5ac02ce17f687c5ccf5a64548c559e", - "file_size_bytes": 33896425 - }, - { - "name": "Gp0127648_Cath FunFam GFF file", - "description": "Cath FunFam GFF file for Gp0127648", - "url": "https://data.microbiomedata.org/data/nmdc:mga0andh11/annotation/nmdc_mga0andh11_cath_funfam.gff", - "md5_checksum": "81ff9f257ffe63ca5d04db9e767620b1", - "id": "nmdc:81ff9f257ffe63ca5d04db9e767620b1", - "file_size_bytes": 25515156 - }, - { - "name": "Gp0127648_KO_EC GFF file", - "description": "KO_EC GFF file for Gp0127648", - "url": "https://data.microbiomedata.org/data/nmdc:mga0andh11/annotation/nmdc_mga0andh11_ko_ec.gff", - "md5_checksum": "8768f37ff001a86a25ae34c7deee9d9a", - "id": "nmdc:8768f37ff001a86a25ae34c7deee9d9a", - "file_size_bytes": 17491444 - } - ] + "type": "nmdc:ReadbasedAnalysis", + "ended_at_time": "2021-10-11T04:13:04+00:00" + } + ] + }, + { + "functional_annotation_agg": [], + "library_preparation_set": [], + "processed_sample_set": [], + "extraction_set": [], + "activity_set": [], + "biosample_set": [], + "data_object_set": [ + { + "name": "Gp0127647_Filtered Reads", + "description": "Filtered Reads for Gp0127647", + "data_object_type": "Filtered Sequencing Reads", + "url": "https://data.microbiomedata.org/data/nmdc:mga0g0e588/qa/nmdc_mga0g0e588_filtered.fastq.gz", + "md5_checksum": "c082eff434fe4863c0e29c79b759d100", + "id": "nmdc:c082eff434fe4863c0e29c79b759d100", + "file_size_bytes": 2052448806 + }, + { + "name": "Gp0127647_Filtered Stats", + "description": "Filtered Stats for Gp0127647", + "data_object_type": "QC Statistics", + "url": "https://data.microbiomedata.org/data/nmdc:mga0g0e588/qa/nmdc_mga0g0e588_filterStats.txt", + "md5_checksum": "7f204d0d1d45e77b39d9c9b2362c6b0b", + "id": "nmdc:7f204d0d1d45e77b39d9c9b2362c6b0b", + "file_size_bytes": 282 + }, + { + "name": "Gp0127647_Gottcha2 TSV report", + "description": "Gottcha2 TSV report for Gp0127647", + "url": "https://data.microbiomedata.org/data/nmdc:mga0g0e588/ReadbasedAnalysis/nmdc_mga0g0e588_gottcha2_report.tsv", + "md5_checksum": "7e1438bf8076daf46f3d782d8f9656b4", + "id": "nmdc:7e1438bf8076daf46f3d782d8f9656b4", + "file_size_bytes": 4666 + }, + { + "name": "Gp0127647_Gottcha2 full TSV report", + "description": "Gottcha2 full TSV report for Gp0127647", + "url": "https://data.microbiomedata.org/data/nmdc:mga0g0e588/ReadbasedAnalysis/nmdc_mga0g0e588_gottcha2_report_full.tsv", + "md5_checksum": "cfd63309cd38a293615ddce5e8ea6402", + "id": "nmdc:cfd63309cd38a293615ddce5e8ea6402", + "file_size_bytes": 786018 + }, + { + "name": "Gp0127647_Gottcha2 Krona HTML report", + "description": "Gottcha2 Krona HTML report for Gp0127647", + "data_object_type": "GOTTCHA2 Krona Plot", + "url": "https://data.microbiomedata.org/data/nmdc:mga0g0e588/ReadbasedAnalysis/nmdc_mga0g0e588_gottcha2_krona.html", + "md5_checksum": "7e353b7bfb1586773fa00b515dffe6ec", + "id": "nmdc:7e353b7bfb1586773fa00b515dffe6ec", + "file_size_bytes": 237895 }, + { + "name": "Gp0127647_Centrifuge classification TSV report", + "description": "Centrifuge classification TSV report for Gp0127647", + "data_object_type": "Centrifuge Taxonomic Classification", + "url": "https://data.microbiomedata.org/data/nmdc:mga0g0e588/ReadbasedAnalysis/nmdc_mga0g0e588_centrifuge_classification.tsv", + "md5_checksum": "6667be33e7867ca2aabfa5d663e2970a", + "id": "nmdc:6667be33e7867ca2aabfa5d663e2970a", + "file_size_bytes": 1767305277 + }, + { + "name": "Gp0127647_Centrifuge TSV report", + "description": "Centrifuge TSV report for Gp0127647", + "data_object_type": "Centrifuge Classification Report", + "url": "https://data.microbiomedata.org/data/nmdc:mga0g0e588/ReadbasedAnalysis/nmdc_mga0g0e588_centrifuge_report.tsv", + "md5_checksum": "7ee0b0b21444ee06752e6b9c32f476af", + "id": "nmdc:7ee0b0b21444ee06752e6b9c32f476af", + "file_size_bytes": 254858 + }, + { + "name": "Gp0127647_Centrifuge Krona HTML report", + "description": "Centrifuge Krona HTML report for Gp0127647", + "data_object_type": "Centrifuge Krona Plot", + "url": "https://data.microbiomedata.org/data/nmdc:mga0g0e588/ReadbasedAnalysis/nmdc_mga0g0e588_centrifuge_krona.html", + "md5_checksum": "d3b27bed597f07ad4bb4a500ad2fb928", + "id": "nmdc:d3b27bed597f07ad4bb4a500ad2fb928", + "file_size_bytes": 2332396 + }, + { + "name": "Gp0127647_Kraken classification TSV report", + "description": "Kraken classification TSV report for Gp0127647", + "data_object_type": "Kraken2 Taxonomic Classification", + "url": "https://data.microbiomedata.org/data/nmdc:mga0g0e588/ReadbasedAnalysis/nmdc_mga0g0e588_kraken2_classification.tsv", + "md5_checksum": "45617f93e5f072fbad25a0308ead6c3d", + "id": "nmdc:45617f93e5f072fbad25a0308ead6c3d", + "file_size_bytes": 1419938277 + }, + { + "name": "Gp0127647_Kraken2 TSV report", + "description": "Kraken2 TSV report for Gp0127647", + "data_object_type": "Kraken2 Classification Report", + "url": "https://data.microbiomedata.org/data/nmdc:mga0g0e588/ReadbasedAnalysis/nmdc_mga0g0e588_kraken2_report.tsv", + "md5_checksum": "460e7594fcd06678df1b9c5e5075cb4d", + "id": "nmdc:460e7594fcd06678df1b9c5e5075cb4d", + "file_size_bytes": 661837 + }, + { + "name": "Gp0127647_Kraken2 Krona HTML report", + "description": "Kraken2 Krona HTML report for Gp0127647", + "data_object_type": "Kraken2 Krona Plot", + "url": "https://data.microbiomedata.org/data/nmdc:mga0g0e588/ReadbasedAnalysis/nmdc_mga0g0e588_kraken2_krona.html", + "md5_checksum": "ab80fc324c9206a41a66d64227a97179", + "id": "nmdc:ab80fc324c9206a41a66d64227a97179", + "file_size_bytes": 4028822 + }, + { + "name": "Gp0127647_Gottcha2 TSV report", + "description": "Gottcha2 TSV report for Gp0127647", + "url": "https://data.microbiomedata.org/data/nmdc:mga0g0e588/ReadbasedAnalysis/nmdc_mga0g0e588_gottcha2_report.tsv", + "md5_checksum": "7e1438bf8076daf46f3d782d8f9656b4", + "id": "nmdc:7e1438bf8076daf46f3d782d8f9656b4", + "file_size_bytes": 4666 + }, + { + "name": "Gp0127647_Gottcha2 full TSV report", + "description": "Gottcha2 full TSV report for Gp0127647", + "url": "https://data.microbiomedata.org/data/nmdc:mga0g0e588/ReadbasedAnalysis/nmdc_mga0g0e588_gottcha2_report_full.tsv", + "md5_checksum": "cfd63309cd38a293615ddce5e8ea6402", + "id": "nmdc:cfd63309cd38a293615ddce5e8ea6402", + "file_size_bytes": 786018 + }, + { + "name": "Gp0127647_Gottcha2 Krona HTML report", + "description": "Gottcha2 Krona HTML report for Gp0127647", + "data_object_type": "GOTTCHA2 Krona Plot", + "url": "https://data.microbiomedata.org/data/nmdc:mga0g0e588/ReadbasedAnalysis/nmdc_mga0g0e588_gottcha2_krona.html", + "md5_checksum": "7e353b7bfb1586773fa00b515dffe6ec", + "id": "nmdc:7e353b7bfb1586773fa00b515dffe6ec", + "file_size_bytes": 237895 + }, + { + "name": "Gp0127647_Centrifuge classification TSV report", + "description": "Centrifuge classification TSV report for Gp0127647", + "data_object_type": "Centrifuge Taxonomic Classification", + "url": "https://data.microbiomedata.org/data/nmdc:mga0g0e588/ReadbasedAnalysis/nmdc_mga0g0e588_centrifuge_classification.tsv", + "md5_checksum": "6667be33e7867ca2aabfa5d663e2970a", + "id": "nmdc:6667be33e7867ca2aabfa5d663e2970a", + "file_size_bytes": 1767305277 + }, + { + "name": "Gp0127647_Centrifuge TSV report", + "description": "Centrifuge TSV report for Gp0127647", + "data_object_type": "Centrifuge Classification Report", + "url": "https://data.microbiomedata.org/data/nmdc:mga0g0e588/ReadbasedAnalysis/nmdc_mga0g0e588_centrifuge_report.tsv", + "md5_checksum": "7ee0b0b21444ee06752e6b9c32f476af", + "id": "nmdc:7ee0b0b21444ee06752e6b9c32f476af", + "file_size_bytes": 254858 + }, + { + "name": "Gp0127647_Centrifuge Krona HTML report", + "description": "Centrifuge Krona HTML report for Gp0127647", + "data_object_type": "Centrifuge Krona Plot", + "url": "https://data.microbiomedata.org/data/nmdc:mga0g0e588/ReadbasedAnalysis/nmdc_mga0g0e588_centrifuge_krona.html", + "md5_checksum": "d3b27bed597f07ad4bb4a500ad2fb928", + "id": "nmdc:d3b27bed597f07ad4bb4a500ad2fb928", + "file_size_bytes": 2332396 + }, + { + "name": "Gp0127647_Kraken classification TSV report", + "description": "Kraken classification TSV report for Gp0127647", + "data_object_type": "Kraken2 Taxonomic Classification", + "url": "https://data.microbiomedata.org/data/nmdc:mga0g0e588/ReadbasedAnalysis/nmdc_mga0g0e588_kraken2_classification.tsv", + "md5_checksum": "45617f93e5f072fbad25a0308ead6c3d", + "id": "nmdc:45617f93e5f072fbad25a0308ead6c3d", + "file_size_bytes": 1419938277 + }, + { + "name": "Gp0127647_Kraken2 TSV report", + "description": "Kraken2 TSV report for Gp0127647", + "data_object_type": "Kraken2 Classification Report", + "url": "https://data.microbiomedata.org/data/nmdc:mga0g0e588/ReadbasedAnalysis/nmdc_mga0g0e588_kraken2_report.tsv", + "md5_checksum": "460e7594fcd06678df1b9c5e5075cb4d", + "id": "nmdc:460e7594fcd06678df1b9c5e5075cb4d", + "file_size_bytes": 661837 + }, + { + "name": "Gp0127647_Kraken2 Krona HTML report", + "description": "Kraken2 Krona HTML report for Gp0127647", + "data_object_type": "Kraken2 Krona Plot", + "url": "https://data.microbiomedata.org/data/nmdc:mga0g0e588/ReadbasedAnalysis/nmdc_mga0g0e588_kraken2_krona.html", + "md5_checksum": "ab80fc324c9206a41a66d64227a97179", + "id": "nmdc:ab80fc324c9206a41a66d64227a97179", + "file_size_bytes": 4028822 + }, + { + "name": "Gp0127647_Assembled contigs fasta", + "description": "Assembled contigs fasta for Gp0127647", + "data_object_type": "Assembly Contigs", + "url": "https://data.microbiomedata.org/data/nmdc:mga0g0e588/assembly/nmdc_mga0g0e588_contigs.fna", + "md5_checksum": "05952c056a6db782ba77c6369206838a", + "id": "nmdc:05952c056a6db782ba77c6369206838a", + "file_size_bytes": 41696500 + }, + { + "name": "Gp0127647_Assembled scaffold fasta", + "description": "Assembled scaffold fasta for Gp0127647", + "data_object_type": "Assembly Scaffolds", + "url": "https://data.microbiomedata.org/data/nmdc:mga0g0e588/assembly/nmdc_mga0g0e588_scaffolds.fna", + "md5_checksum": "6fa8f2d4236fda4f628436ed85094e3b", + "id": "nmdc:6fa8f2d4236fda4f628436ed85094e3b", + "file_size_bytes": 41403892 + }, + { + "name": "Gp0127647_Metagenome Contig Coverage Stats", + "description": "Metagenome Contig Coverage Stats for Gp0127647", + "url": "https://data.microbiomedata.org/data/nmdc:mga0g0e588/assembly/nmdc_mga0g0e588_covstats.txt", + "md5_checksum": "82be5b6248eb4b0bfef1c9afa5c5c0bc", + "id": "nmdc:82be5b6248eb4b0bfef1c9afa5c5c0bc", + "file_size_bytes": 7629542 + }, + { + "name": "Gp0127647_Assembled AGP file", + "description": "Assembled AGP file for Gp0127647", + "data_object_type": "Assembly AGP", + "url": "https://data.microbiomedata.org/data/nmdc:mga0g0e588/assembly/nmdc_mga0g0e588_assembly.agp", + "md5_checksum": "fee22437c76dc343846f41e1be538b9d", + "id": "nmdc:fee22437c76dc343846f41e1be538b9d", + "file_size_bytes": 7091204 + }, + { + "name": "Gp0127647_Metagenome Alignment BAM file", + "description": "Metagenome Alignment BAM file for Gp0127647", + "data_object_type": "Assembly Coverage BAM", + "url": "https://data.microbiomedata.org/data/nmdc:mga0g0e588/assembly/nmdc_mga0g0e588_pairedMapped_sorted.bam", + "md5_checksum": "7fc9fd7844b6ce48869a0ad5216da4dc", + "id": "nmdc:7fc9fd7844b6ce48869a0ad5216da4dc", + "file_size_bytes": 2190560397 + }, + { + "name": "Gp0127647_Protein FAA", + "description": "Protein FAA for Gp0127647", + "data_object_type": "Annotation Amino Acid FASTA", + "url": "https://data.microbiomedata.org/data/nmdc:mga0g0e588/annotation/nmdc_mga0g0e588_proteins.faa", + "md5_checksum": "b95b8538748c921fac6c93ba55d43e2c", + "id": "nmdc:b95b8538748c921fac6c93ba55d43e2c", + "file_size_bytes": 23580407 + }, + { + "name": "Gp0127647_Structural annotation GFF file", + "description": "Structural annotation GFF file for Gp0127647", + "data_object_type": "Structural Annotation GFF", + "url": "https://data.microbiomedata.org/data/nmdc:mga0g0e588/annotation/nmdc_mga0g0e588_structural_annotation.gff", + "md5_checksum": "9c63632766a4946bc76829a7dafe49c0", + "id": "nmdc:9c63632766a4946bc76829a7dafe49c0", + "file_size_bytes": 2925 + }, + { + "name": "Gp0127647_Functional annotation GFF file", + "description": "Functional annotation GFF file for Gp0127647", + "data_object_type": "Functional Annotation GFF", + "url": "https://data.microbiomedata.org/data/nmdc:mga0g0e588/annotation/nmdc_mga0g0e588_functional_annotation.gff", + "md5_checksum": "0c5e791c8170181aa3e43d710e7c55eb", + "id": "nmdc:0c5e791c8170181aa3e43d710e7c55eb", + "file_size_bytes": 28355659 + }, + { + "name": "Gp0127647_KO TSV file", + "description": "KO TSV file for Gp0127647", + "data_object_type": "Annotation KEGG Orthology", + "url": "https://data.microbiomedata.org/data/nmdc:mga0g0e588/annotation/nmdc_mga0g0e588_ko.tsv", + "md5_checksum": "358cb8682dd2d5c1b7a691e9f7734acc", + "id": "nmdc:358cb8682dd2d5c1b7a691e9f7734acc", + "file_size_bytes": 3251676 + }, + { + "name": "Gp0127647_EC TSV file", + "description": "EC TSV file for Gp0127647", + "data_object_type": "Annotation Enzyme Commission", + "url": "https://data.microbiomedata.org/data/nmdc:mga0g0e588/annotation/nmdc_mga0g0e588_ec.tsv", + "md5_checksum": "d770a8c872a3a359bf3482e564c56988", + "id": "nmdc:d770a8c872a3a359bf3482e564c56988", + "file_size_bytes": 2134531 + }, + { + "name": "Gp0127647_COG GFF file", + "description": "COG GFF file for Gp0127647", + "url": "https://data.microbiomedata.org/data/nmdc:mga0g0e588/annotation/nmdc_mga0g0e588_cog.gff", + "md5_checksum": "cdecaf6cff3fc2d559cc3313599b137b", + "id": "nmdc:cdecaf6cff3fc2d559cc3313599b137b", + "file_size_bytes": 15119260 + }, + { + "name": "Gp0127647_PFAM GFF file", + "description": "PFAM GFF file for Gp0127647", + "url": "https://data.microbiomedata.org/data/nmdc:mga0g0e588/annotation/nmdc_mga0g0e588_pfam.gff", + "md5_checksum": "7dedc14d5645ae32f913d8f823ba5aa3", + "id": "nmdc:7dedc14d5645ae32f913d8f823ba5aa3", + "file_size_bytes": 11013734 + }, + { + "name": "Gp0127647_TigrFam GFF file", + "description": "TigrFam GFF file for Gp0127647", + "url": "https://data.microbiomedata.org/data/nmdc:mga0g0e588/annotation/nmdc_mga0g0e588_tigrfam.gff", + "md5_checksum": "809e6d246bd10968d4da074db08216d9", + "id": "nmdc:809e6d246bd10968d4da074db08216d9", + "file_size_bytes": 1131416 + }, + { + "name": "Gp0127647_SMART GFF file", + "description": "SMART GFF file for Gp0127647", + "url": "https://data.microbiomedata.org/data/nmdc:mga0g0e588/annotation/nmdc_mga0g0e588_smart.gff", + "md5_checksum": "546d11411d30ab337a215d0094fc36b6", + "id": "nmdc:546d11411d30ab337a215d0094fc36b6", + "file_size_bytes": 3424877 + }, + { + "name": "Gp0127647_SuperFam GFF file", + "description": "SuperFam GFF file for Gp0127647", + "url": "https://data.microbiomedata.org/data/nmdc:mga0g0e588/annotation/nmdc_mga0g0e588_supfam.gff", + "md5_checksum": "6eb654de91a99eb4e01e1bf9513a6208", + "id": "nmdc:6eb654de91a99eb4e01e1bf9513a6208", + "file_size_bytes": 19463761 + }, + { + "name": "Gp0127647_Cath FunFam GFF file", + "description": "Cath FunFam GFF file for Gp0127647", + "url": "https://data.microbiomedata.org/data/nmdc:mga0g0e588/annotation/nmdc_mga0g0e588_cath_funfam.gff", + "md5_checksum": "a8ae7ed318e7c170aeed508f331ce5b2", + "id": "nmdc:a8ae7ed318e7c170aeed508f331ce5b2", + "file_size_bytes": 14536820 + }, + { + "name": "Gp0127647_KO_EC GFF file", + "description": "KO_EC GFF file for Gp0127647", + "url": "https://data.microbiomedata.org/data/nmdc:mga0g0e588/annotation/nmdc_mga0g0e588_ko_ec.gff", + "md5_checksum": "455f95c7c15739b2fddc6f62b03253ed", + "id": "nmdc:455f95c7c15739b2fddc6f62b03253ed", + "file_size_bytes": 10367039 + }, + { + "name": "gold:Gp0452679_metabat2 bin checkm quality assessment result", + "description": "metabat2 bin checkm quality assessment result for gold:Gp0452679", + "data_object_type": "CheckM Statistics", + "url": "https://data.microbiomedata.org/data/nmdc:mga0fsjy84/MAGs/nmdc_mga0fsjy84_checkm_qa.out", + "md5_checksum": "d41d8cd98f00b204e9800998ecf8427e", + "id": "nmdc:d41d8cd98f00b204e9800998ecf8427e", + "file_size_bytes": 0 + }, + { + "name": "Gp0127647_tooShort (< 3kb) filtered contigs fasta file by metaBat2", + "description": "tooShort (< 3kb) filtered contigs fasta file by metaBat2 for Gp0127647", + "url": "https://data.microbiomedata.org/data/nmdc:mga0g0e588/MAGs/nmdc_mga0g0e588_bins.tooShort.fa", + "md5_checksum": "8ec4227eca7ea06fed4e866c4de4a5c9", + "id": "nmdc:8ec4227eca7ea06fed4e866c4de4a5c9", + "file_size_bytes": 38197270 + }, + { + "name": "Gp0127647_unbinned fasta file from metabat2", + "description": "unbinned fasta file from metabat2 for Gp0127647", + "url": "https://data.microbiomedata.org/data/nmdc:mga0g0e588/MAGs/nmdc_mga0g0e588_bins.unbinned.fa", + "md5_checksum": "40c0cbc75e2b698572b8b94d91fdc236", + "id": "nmdc:40c0cbc75e2b698572b8b94d91fdc236", + "file_size_bytes": 3202231 + }, + { + "name": "Gp0127647_metabat2 bin checkm quality assessment result", + "description": "metabat2 bin checkm quality assessment result for Gp0127647", + "data_object_type": "CheckM Statistics", + "url": "https://data.microbiomedata.org/data/nmdc:mga0g0e588/MAGs/nmdc_mga0g0e588_checkm_qa.out", + "md5_checksum": "e8bdcd7b113a14b29a3026b73cd18c20", + "id": "nmdc:e8bdcd7b113a14b29a3026b73cd18c20", + "file_size_bytes": 775 + }, + { + "name": "Gp0127647_high-quality and medium-quality bins", + "description": "high-quality and medium-quality bins for Gp0127647", + "data_object_type": "Metagenome Bins", + "url": "https://data.microbiomedata.org/data/nmdc:mga0g0e588/MAGs/nmdc_mga0g0e588_hqmq_bin.zip", + "md5_checksum": "03b448db547a556e988a0d4948dab424", + "id": "nmdc:03b448db547a556e988a0d4948dab424", + "file_size_bytes": 182 + }, + { + "name": "Gp0127647_metabat2 bins", + "description": "metabat2 bins for Gp0127647", + "url": "https://data.microbiomedata.org/data/nmdc:mga0g0e588/MAGs/nmdc_mga0g0e588_metabat_bin.zip", + "md5_checksum": "6e92868d1912cb8f5b32fbf507721d16", + "id": "nmdc:6e92868d1912cb8f5b32fbf507721d16", + "file_size_bytes": 91931 + } + ], + "dissolving_activity_set": [], + "functional_annotation_set": [], + "genome_feature_set": [], + "mags_activity_set": [ { "_id": { - "$oid": "649b0052ec087f6bbab34718" + "$oid": "649b0052ec087f6bbab34716" }, "has_input": [ - "nmdc:ca10f7bae0565946414188c9121ee338", - "nmdc:4a6ffadb01b62dd73278429808c1a39a", - "nmdc:600011ab7e39465d3f9f28d5d93a4248" + "nmdc:05952c056a6db782ba77c6369206838a", + "nmdc:7fc9fd7844b6ce48869a0ad5216da4dc", + "nmdc:0c5e791c8170181aa3e43d710e7c55eb" ], - "too_short_contig_num": 142847, + "too_short_contig_num": 95291, "part_of": [ - "nmdc:mga0andh11" + "nmdc:mga0g0e588" ], - "binned_contig_num": 329, + "binned_contig_num": 20, "git_url": "https://github.com/microbiomedata/metaG/releases/tag/0.1", "has_output": [ "nmdc:d41d8cd98f00b204e9800998ecf8427e", - "nmdc:8b67e5038c55083e2aa8e19c5d05fef8", - "nmdc:fc8454a790709b36d7ca96cd99359d26", - "nmdc:942bd7c28c52e6301bf97dab0ea2852a", - "nmdc:82ebf9065be9715e1230a50bf7a02197", - "nmdc:897536007e7e3525457df5d3baddd593" + "nmdc:8ec4227eca7ea06fed4e866c4de4a5c9", + "nmdc:40c0cbc75e2b698572b8b94d91fdc236", + "nmdc:e8bdcd7b113a14b29a3026b73cd18c20", + "nmdc:03b448db547a556e988a0d4948dab424", + "nmdc:6e92868d1912cb8f5b32fbf507721d16" ], - "was_informed_by": "gold:Gp0127648", - "input_contig_num": 147340, - "id": "nmdc:cb02cfd1451743d94381a247cf0a9d65", - "execution_resource": "NERSC-Cori", - "name": "MAGs Analysis Activity for nmdc:mga0andh11", - "mags_list": [ - { - "number_of_contig": 255, - "completeness": 70.91, - "bin_name": "bins.1", - "gene_count": 1857, - "bin_quality": "MQ", - "gtdbtk_species": "", - "gtdbtk_order": "Nitrososphaerales", - "num_16s": 1, - "gtdbtk_family": "Nitrososphaeraceae", - "gtdbtk_domain": "Archaea", - "contamination": 1.94, - "gtdbtk_class": "Nitrososphaeria", - "gtdbtk_phylum": "Crenarchaeota", - "num_5s": 1, - "num_23s": 0, - "gtdbtk_genus": "", - "num_t_rna": 28 - }, + "was_informed_by": "gold:Gp0127647", + "input_contig_num": 97351, + "id": "nmdc:36fa8157feb62e7d176b99031e5e41a9", + "execution_resource": "NERSC-Cori", + "name": "MAGs Analysis Activity for nmdc:mga0g0e588", + "mags_list": [ { - "number_of_contig": 74, - "completeness": 19.91, - "bin_name": "bins.2", - "gene_count": 380, + "number_of_contig": 20, + "completeness": 1.36, + "bin_name": "bins.1", + "gene_count": 310, "bin_quality": "LQ", "gtdbtk_species": "", "gtdbtk_order": "", - "num_16s": 0, + "num_16s": 2, "gtdbtk_family": "", "gtdbtk_domain": "", "contamination": 0.0, "gtdbtk_class": "", "gtdbtk_phylum": "", "num_5s": 0, - "num_23s": 0, + "num_23s": 2, "gtdbtk_genus": "", - "num_t_rna": 5 + "num_t_rna": 14 } ], - "unbinned_contig_num": 4164, - "started_at_time": "2021-10-11T02:23:29Z", + "unbinned_contig_num": 2040, + "started_at_time": "2021-10-11T02:24:27Z", "type": "nmdc:MAGsAnalysisActivity", - "ended_at_time": "2021-10-11T04:13:04+00:00", - "output_data_objects": [ - { - "name": "gold:Gp0452679_metabat2 bin checkm quality assessment result", - "description": "metabat2 bin checkm quality assessment result for gold:Gp0452679", - "data_object_type": "CheckM Statistics", - "url": "https://data.microbiomedata.org/data/nmdc:mga0fsjy84/MAGs/nmdc_mga0fsjy84_checkm_qa.out", - "md5_checksum": "d41d8cd98f00b204e9800998ecf8427e", - "id": "nmdc:d41d8cd98f00b204e9800998ecf8427e", - "file_size_bytes": 0 - }, - { - "name": "Gp0127648_tooShort (< 3kb) filtered contigs fasta file by metaBat2", - "description": "tooShort (< 3kb) filtered contigs fasta file by metaBat2 for Gp0127648", - "url": "https://data.microbiomedata.org/data/nmdc:mga0andh11/MAGs/nmdc_mga0andh11_bins.tooShort.fa", - "md5_checksum": "8b67e5038c55083e2aa8e19c5d05fef8", - "id": "nmdc:8b67e5038c55083e2aa8e19c5d05fef8", - "file_size_bytes": 58962192 - }, - { - "name": "Gp0127648_unbinned fasta file from metabat2", - "description": "unbinned fasta file from metabat2 for Gp0127648", - "url": "https://data.microbiomedata.org/data/nmdc:mga0andh11/MAGs/nmdc_mga0andh11_bins.unbinned.fa", - "md5_checksum": "fc8454a790709b36d7ca96cd99359d26", - "id": "nmdc:fc8454a790709b36d7ca96cd99359d26", - "file_size_bytes": 6656731 - }, - { - "name": "Gp0127648_metabat2 bin checkm quality assessment result", - "description": "metabat2 bin checkm quality assessment result for Gp0127648", - "data_object_type": "CheckM Statistics", - "url": "https://data.microbiomedata.org/data/nmdc:mga0andh11/MAGs/nmdc_mga0andh11_checkm_qa.out", - "md5_checksum": "942bd7c28c52e6301bf97dab0ea2852a", - "id": "nmdc:942bd7c28c52e6301bf97dab0ea2852a", - "file_size_bytes": 930 - }, - { - "name": "Gp0127648_high-quality and medium-quality bins", - "description": "high-quality and medium-quality bins for Gp0127648", - "data_object_type": "Metagenome Bins", - "url": "https://data.microbiomedata.org/data/nmdc:mga0andh11/MAGs/nmdc_mga0andh11_hqmq_bin.zip", - "md5_checksum": "82ebf9065be9715e1230a50bf7a02197", - "id": "nmdc:82ebf9065be9715e1230a50bf7a02197", - "file_size_bytes": 466157 - }, - { - "name": "Gp0127648_metabat2 bins", - "description": "metabat2 bins for Gp0127648", - "url": "https://data.microbiomedata.org/data/nmdc:mga0andh11/MAGs/nmdc_mga0andh11_metabat_bin.zip", - "md5_checksum": "897536007e7e3525457df5d3baddd593", - "id": "nmdc:897536007e7e3525457df5d3baddd593", - "file_size_bytes": 90255 - } + "ended_at_time": "2021-10-11T03:38:33+00:00" + } + ], + "material_sample_set": [], + "material_sampling_activity_set": [], + "metabolomics_analysis_activity_set": [], + "metagenome_annotation_activity_set": [ + { + "_id": { + "$oid": "649b005bbf2caae0415ef9b5" + }, + "has_input": [ + "nmdc:05952c056a6db782ba77c6369206838a" + ], + "part_of": [ + "nmdc:mga0g0e588" + ], + "git_url": "https://github.com/microbiomedata/metaG/releases/tag/0.1", + "has_output": [ + "nmdc:b95b8538748c921fac6c93ba55d43e2c", + "nmdc:9c63632766a4946bc76829a7dafe49c0", + "nmdc:0c5e791c8170181aa3e43d710e7c55eb", + "nmdc:358cb8682dd2d5c1b7a691e9f7734acc", + "nmdc:d770a8c872a3a359bf3482e564c56988", + "nmdc:cdecaf6cff3fc2d559cc3313599b137b", + "nmdc:7dedc14d5645ae32f913d8f823ba5aa3", + "nmdc:809e6d246bd10968d4da074db08216d9", + "nmdc:546d11411d30ab337a215d0094fc36b6", + "nmdc:6eb654de91a99eb4e01e1bf9513a6208", + "nmdc:a8ae7ed318e7c170aeed508f331ce5b2", + "nmdc:455f95c7c15739b2fddc6f62b03253ed" + ], + "was_informed_by": "gold:Gp0127647", + "id": "nmdc:36fa8157feb62e7d176b99031e5e41a9", + "execution_resource": "NERSC-Cori", + "name": "Annotation Activity for nmdc:mga0g0e588", + "started_at_time": "2021-10-11T02:24:27Z", + "type": "nmdc:MetagenomeAnnotationActivity", + "ended_at_time": "2021-10-11T03:38:33+00:00" + } + ], + "metagenome_assembly_set": [ + { + "_id": { + "$oid": "649b005f2ca5ee4adb139fa0" + }, + "has_input": [ + "nmdc:c082eff434fe4863c0e29c79b759d100" + ], + "part_of": [ + "nmdc:mga0g0e588" + ], + "ctg_logsum": 37666, + "scaf_logsum": 37899, + "gap_pct": 0.00092, + "git_url": "https://github.com/microbiomedata/metaG/releases/tag/0.1", + "has_output": [ + "nmdc:05952c056a6db782ba77c6369206838a", + "nmdc:6fa8f2d4236fda4f628436ed85094e3b", + "nmdc:82be5b6248eb4b0bfef1c9afa5c5c0bc", + "nmdc:fee22437c76dc343846f41e1be538b9d", + "nmdc:7fc9fd7844b6ce48869a0ad5216da4dc" + ], + "asm_score": 14.664, + "was_informed_by": "gold:Gp0127647", + "ctg_powsum": 4336.355, + "scaf_max": 96788, + "id": "nmdc:36fa8157feb62e7d176b99031e5e41a9", + "scaf_powsum": 4362.772, + "execution_resource": "NERSC-Cori", + "contigs": 97351, + "name": "Assembly Activity for nmdc:mga0g0e588", + "ctg_max": 96788, + "gc_std": 0.13435, + "contig_bp": 38110297, + "gc_avg": 0.5552, + "started_at_time": "2021-10-11T02:24:27Z", + "scaf_bp": 38110647, + "type": "nmdc:MetagenomeAssembly", + "scaffolds": 97316, + "ended_at_time": "2021-10-11T03:38:33+00:00", + "ctg_l50": 353, + "ctg_l90": 283, + "ctg_n50": 34144, + "ctg_n90": 85387, + "scaf_l50": 353, + "scaf_l90": 283, + "scaf_n50": 34125, + "scaf_n90": 85353, + "scaf_l_gt50k": 153917, + "scaf_n_gt50k": 2, + "scaf_pct_gt50k": 0.40386876 + } + ], + "metagenome_sequencing_activity_set": [], + "metaproteomics_analysis_activity_set": [], + "metatranscriptome_activity_set": [], + "nom_analysis_activity_set": [], + "omics_processing_set": [ + { + "_id": { + "$oid": "649b009773e8249959349b5c" + }, + "id": "nmdc:omprc-11-w3v30q48", + "name": "Riverbed sediment microbial communities from areas with vegetation nearby in Columbia River, Washington, USA - GW-RW S2_10_20", + "description": "Riverbed sediment samples were collected from an area with a lot of surrounding vegetation in a hyporheic zone (subsurface groundwater - surface water mixing zone)", + "has_input": [ + "nmdc:bsm-11-q44pjf87" + ], + "has_output": [ + "jgi:574fde8a7ded5e3df1ee1422" + ], + "part_of": [ + "nmdc:sty-11-aygzgv51" + ], + "add_date": "2016-01-11", + "mod_date": "2021-06-15", + "ncbi_project_name": "Riverbed sediment microbial communities from areas with vegetation nearby in Columbia River, Washington, USA - GW-RW S2_10_20", + "omics_type": { + "has_raw_value": "Metagenome" + }, + "principal_investigator": { + "has_raw_value": "James Stegen" + }, + "processing_institution": "JGI", + "type": "nmdc:OmicsProcessing", + "gold_sequencing_project_identifiers": [ + "gold:Gp0127647" ] } - ] - }, - { - "_id": { - "$oid": "649b009773e8249959349b5c" - }, - "id": "nmdc:omprc-11-w3v30q48", - "name": "Riverbed sediment microbial communities from areas with vegetation nearby in Columbia River, Washington, USA - GW-RW S2_10_20", - "description": "Riverbed sediment samples were collected from an area with a lot of surrounding vegetation in a hyporheic zone (subsurface groundwater - surface water mixing zone)", - "has_input": [ - "nmdc:bsm-11-q44pjf87" - ], - "has_output": [ - "jgi:574fde8a7ded5e3df1ee1422" - ], - "part_of": [ - "nmdc:sty-11-aygzgv51" - ], - "add_date": "2016-01-11", - "mod_date": "2021-06-15", - "ncbi_project_name": "Riverbed sediment microbial communities from areas with vegetation nearby in Columbia River, Washington, USA - GW-RW S2_10_20", - "omics_type": { - "has_raw_value": "Metagenome" - }, - "principal_investigator": { - "has_raw_value": "James Stegen" - }, - "processing_institution": "JGI", - "type": "nmdc:OmicsProcessing", - "gold_sequencing_project_identifiers": [ - "gold:Gp0127647" - ], - "downstream_workflow_activity_records": [ + ], + "reaction_activity_set": [], + "read_qc_analysis_activity_set": [ { "_id": { "$oid": "649b009d6bdd4fd20273c86e" @@ -27647,28 +29293,10 @@ "output_read_count": 24128544, "started_at_time": "2021-10-11T02:24:27Z", "type": "nmdc:ReadQCAnalysisActivity", - "ended_at_time": "2021-10-11T03:38:33+00:00", - "output_data_objects": [ - { - "name": "Gp0127647_Filtered Reads", - "description": "Filtered Reads for Gp0127647", - "data_object_type": "Filtered Sequencing Reads", - "url": "https://data.microbiomedata.org/data/nmdc:mga0g0e588/qa/nmdc_mga0g0e588_filtered.fastq.gz", - "md5_checksum": "c082eff434fe4863c0e29c79b759d100", - "id": "nmdc:c082eff434fe4863c0e29c79b759d100", - "file_size_bytes": 2052448806 - }, - { - "name": "Gp0127647_Filtered Stats", - "description": "Filtered Stats for Gp0127647", - "data_object_type": "QC Statistics", - "url": "https://data.microbiomedata.org/data/nmdc:mga0g0e588/qa/nmdc_mga0g0e588_filterStats.txt", - "md5_checksum": "7f204d0d1d45e77b39d9c9b2362c6b0b", - "id": "nmdc:7f204d0d1d45e77b39d9c9b2362c6b0b", - "file_size_bytes": 282 - } - ] - }, + "ended_at_time": "2021-10-11T03:38:33+00:00" + } + ], + "read_based_taxonomy_analysis_activity_set": [ { "_id": { "$oid": "649b009bff710ae353f8cf32" @@ -27694,89 +29322,16 @@ "name": "ReadBased Analysis Activity for nmdc:mga0g0e588", "started_at_time": "2021-10-11T02:24:27Z", "type": "nmdc:ReadbasedAnalysis", - "ended_at_time": "2021-10-11T03:38:33+00:00", - "output_data_objects": [ - { - "name": "Gp0127647_Gottcha2 TSV report", - "description": "Gottcha2 TSV report for Gp0127647", - "url": "https://data.microbiomedata.org/data/nmdc:mga0g0e588/ReadbasedAnalysis/nmdc_mga0g0e588_gottcha2_report.tsv", - "md5_checksum": "7e1438bf8076daf46f3d782d8f9656b4", - "id": "nmdc:7e1438bf8076daf46f3d782d8f9656b4", - "file_size_bytes": 4666 - }, - { - "name": "Gp0127647_Gottcha2 full TSV report", - "description": "Gottcha2 full TSV report for Gp0127647", - "url": "https://data.microbiomedata.org/data/nmdc:mga0g0e588/ReadbasedAnalysis/nmdc_mga0g0e588_gottcha2_report_full.tsv", - "md5_checksum": "cfd63309cd38a293615ddce5e8ea6402", - "id": "nmdc:cfd63309cd38a293615ddce5e8ea6402", - "file_size_bytes": 786018 - }, - { - "name": "Gp0127647_Gottcha2 Krona HTML report", - "description": "Gottcha2 Krona HTML report for Gp0127647", - "data_object_type": "GOTTCHA2 Krona Plot", - "url": "https://data.microbiomedata.org/data/nmdc:mga0g0e588/ReadbasedAnalysis/nmdc_mga0g0e588_gottcha2_krona.html", - "md5_checksum": "7e353b7bfb1586773fa00b515dffe6ec", - "id": "nmdc:7e353b7bfb1586773fa00b515dffe6ec", - "file_size_bytes": 237895 - }, - { - "name": "Gp0127647_Centrifuge classification TSV report", - "description": "Centrifuge classification TSV report for Gp0127647", - "data_object_type": "Centrifuge Taxonomic Classification", - "url": "https://data.microbiomedata.org/data/nmdc:mga0g0e588/ReadbasedAnalysis/nmdc_mga0g0e588_centrifuge_classification.tsv", - "md5_checksum": "6667be33e7867ca2aabfa5d663e2970a", - "id": "nmdc:6667be33e7867ca2aabfa5d663e2970a", - "file_size_bytes": 1767305277 - }, - { - "name": "Gp0127647_Centrifuge TSV report", - "description": "Centrifuge TSV report for Gp0127647", - "data_object_type": "Centrifuge Classification Report", - "url": "https://data.microbiomedata.org/data/nmdc:mga0g0e588/ReadbasedAnalysis/nmdc_mga0g0e588_centrifuge_report.tsv", - "md5_checksum": "7ee0b0b21444ee06752e6b9c32f476af", - "id": "nmdc:7ee0b0b21444ee06752e6b9c32f476af", - "file_size_bytes": 254858 - }, - { - "name": "Gp0127647_Centrifuge Krona HTML report", - "description": "Centrifuge Krona HTML report for Gp0127647", - "data_object_type": "Centrifuge Krona Plot", - "url": "https://data.microbiomedata.org/data/nmdc:mga0g0e588/ReadbasedAnalysis/nmdc_mga0g0e588_centrifuge_krona.html", - "md5_checksum": "d3b27bed597f07ad4bb4a500ad2fb928", - "id": "nmdc:d3b27bed597f07ad4bb4a500ad2fb928", - "file_size_bytes": 2332396 - }, - { - "name": "Gp0127647_Kraken classification TSV report", - "description": "Kraken classification TSV report for Gp0127647", - "data_object_type": "Kraken2 Taxonomic Classification", - "url": "https://data.microbiomedata.org/data/nmdc:mga0g0e588/ReadbasedAnalysis/nmdc_mga0g0e588_kraken2_classification.tsv", - "md5_checksum": "45617f93e5f072fbad25a0308ead6c3d", - "id": "nmdc:45617f93e5f072fbad25a0308ead6c3d", - "file_size_bytes": 1419938277 - }, - { - "name": "Gp0127647_Kraken2 TSV report", - "description": "Kraken2 TSV report for Gp0127647", - "data_object_type": "Kraken2 Classification Report", - "url": "https://data.microbiomedata.org/data/nmdc:mga0g0e588/ReadbasedAnalysis/nmdc_mga0g0e588_kraken2_report.tsv", - "md5_checksum": "460e7594fcd06678df1b9c5e5075cb4d", - "id": "nmdc:460e7594fcd06678df1b9c5e5075cb4d", - "file_size_bytes": 661837 - }, - { - "name": "Gp0127647_Kraken2 Krona HTML report", - "description": "Kraken2 Krona HTML report for Gp0127647", - "data_object_type": "Kraken2 Krona Plot", - "url": "https://data.microbiomedata.org/data/nmdc:mga0g0e588/ReadbasedAnalysis/nmdc_mga0g0e588_kraken2_krona.html", - "md5_checksum": "ab80fc324c9206a41a66d64227a97179", - "id": "nmdc:ab80fc324c9206a41a66d64227a97179", - "file_size_bytes": 4028822 - } - ] - }, + "ended_at_time": "2021-10-11T03:38:33+00:00" + } + ], + "study_set": [], + "field_research_site_set": [], + "collecting_biosamples_from_site_set": [], + "date_created": null, + "etl_software_version": null, + "pooling_set": [], + "read_based_analysis_activity_set": [ { "_id": { "$oid": "61e7197e833bcf838a700a51" @@ -27805,462 +29360,577 @@ "name": "ReadBased Analysis Activity for nmdc:mga0g0e588", "started_at_time": "2021-10-11T02:24:27Z", "type": "nmdc:ReadbasedAnalysis", - "ended_at_time": "2021-10-11T03:38:33+00:00", - "output_data_objects": [ - { - "name": "Gp0127647_Gottcha2 TSV report", - "description": "Gottcha2 TSV report for Gp0127647", - "url": "https://data.microbiomedata.org/data/nmdc:mga0g0e588/ReadbasedAnalysis/nmdc_mga0g0e588_gottcha2_report.tsv", - "md5_checksum": "7e1438bf8076daf46f3d782d8f9656b4", - "id": "nmdc:7e1438bf8076daf46f3d782d8f9656b4", - "file_size_bytes": 4666 - }, - { - "name": "Gp0127647_Gottcha2 full TSV report", - "description": "Gottcha2 full TSV report for Gp0127647", - "url": "https://data.microbiomedata.org/data/nmdc:mga0g0e588/ReadbasedAnalysis/nmdc_mga0g0e588_gottcha2_report_full.tsv", - "md5_checksum": "cfd63309cd38a293615ddce5e8ea6402", - "id": "nmdc:cfd63309cd38a293615ddce5e8ea6402", - "file_size_bytes": 786018 - }, - { - "name": "Gp0127647_Gottcha2 Krona HTML report", - "description": "Gottcha2 Krona HTML report for Gp0127647", - "data_object_type": "GOTTCHA2 Krona Plot", - "url": "https://data.microbiomedata.org/data/nmdc:mga0g0e588/ReadbasedAnalysis/nmdc_mga0g0e588_gottcha2_krona.html", - "md5_checksum": "7e353b7bfb1586773fa00b515dffe6ec", - "id": "nmdc:7e353b7bfb1586773fa00b515dffe6ec", - "file_size_bytes": 237895 - }, - { - "name": "Gp0127647_Centrifuge classification TSV report", - "description": "Centrifuge classification TSV report for Gp0127647", - "data_object_type": "Centrifuge Taxonomic Classification", - "url": "https://data.microbiomedata.org/data/nmdc:mga0g0e588/ReadbasedAnalysis/nmdc_mga0g0e588_centrifuge_classification.tsv", - "md5_checksum": "6667be33e7867ca2aabfa5d663e2970a", - "id": "nmdc:6667be33e7867ca2aabfa5d663e2970a", - "file_size_bytes": 1767305277 - }, - { - "name": "Gp0127647_Centrifuge TSV report", - "description": "Centrifuge TSV report for Gp0127647", - "data_object_type": "Centrifuge Classification Report", - "url": "https://data.microbiomedata.org/data/nmdc:mga0g0e588/ReadbasedAnalysis/nmdc_mga0g0e588_centrifuge_report.tsv", - "md5_checksum": "7ee0b0b21444ee06752e6b9c32f476af", - "id": "nmdc:7ee0b0b21444ee06752e6b9c32f476af", - "file_size_bytes": 254858 - }, - { - "name": "Gp0127647_Centrifuge Krona HTML report", - "description": "Centrifuge Krona HTML report for Gp0127647", - "data_object_type": "Centrifuge Krona Plot", - "url": "https://data.microbiomedata.org/data/nmdc:mga0g0e588/ReadbasedAnalysis/nmdc_mga0g0e588_centrifuge_krona.html", - "md5_checksum": "d3b27bed597f07ad4bb4a500ad2fb928", - "id": "nmdc:d3b27bed597f07ad4bb4a500ad2fb928", - "file_size_bytes": 2332396 - }, - { - "name": "Gp0127647_Kraken classification TSV report", - "description": "Kraken classification TSV report for Gp0127647", - "data_object_type": "Kraken2 Taxonomic Classification", - "url": "https://data.microbiomedata.org/data/nmdc:mga0g0e588/ReadbasedAnalysis/nmdc_mga0g0e588_kraken2_classification.tsv", - "md5_checksum": "45617f93e5f072fbad25a0308ead6c3d", - "id": "nmdc:45617f93e5f072fbad25a0308ead6c3d", - "file_size_bytes": 1419938277 - }, - { - "name": "Gp0127647_Kraken2 TSV report", - "description": "Kraken2 TSV report for Gp0127647", - "data_object_type": "Kraken2 Classification Report", - "url": "https://data.microbiomedata.org/data/nmdc:mga0g0e588/ReadbasedAnalysis/nmdc_mga0g0e588_kraken2_report.tsv", - "md5_checksum": "460e7594fcd06678df1b9c5e5075cb4d", - "id": "nmdc:460e7594fcd06678df1b9c5e5075cb4d", - "file_size_bytes": 661837 - }, - { - "name": "Gp0127647_Kraken2 Krona HTML report", - "description": "Kraken2 Krona HTML report for Gp0127647", - "data_object_type": "Kraken2 Krona Plot", - "url": "https://data.microbiomedata.org/data/nmdc:mga0g0e588/ReadbasedAnalysis/nmdc_mga0g0e588_kraken2_krona.html", - "md5_checksum": "ab80fc324c9206a41a66d64227a97179", - "id": "nmdc:ab80fc324c9206a41a66d64227a97179", - "file_size_bytes": 4028822 - } - ] + "ended_at_time": "2021-10-11T03:38:33+00:00" + } + ] + }, + { + "functional_annotation_agg": [], + "library_preparation_set": [], + "processed_sample_set": [], + "extraction_set": [], + "activity_set": [], + "biosample_set": [], + "data_object_set": [ + { + "name": "Gp0127645_Filtered Reads", + "description": "Filtered Reads for Gp0127645", + "data_object_type": "Filtered Sequencing Reads", + "url": "https://data.microbiomedata.org/data/nmdc:mga0jbfx89/qa/nmdc_mga0jbfx89_filtered.fastq.gz", + "md5_checksum": "034df323b47f010f27e7c032d445a891", + "id": "nmdc:034df323b47f010f27e7c032d445a891", + "file_size_bytes": 1909192845 + }, + { + "name": "Gp0127645_Filtered Stats", + "description": "Filtered Stats for Gp0127645", + "data_object_type": "QC Statistics", + "url": "https://data.microbiomedata.org/data/nmdc:mga0jbfx89/qa/nmdc_mga0jbfx89_filterStats.txt", + "md5_checksum": "ca137bf5e2df6541425f22b5d1fec492", + "id": "nmdc:ca137bf5e2df6541425f22b5d1fec492", + "file_size_bytes": 283 + }, + { + "name": "Gp0127645_Gottcha2 TSV report", + "description": "Gottcha2 TSV report for Gp0127645", + "url": "https://data.microbiomedata.org/data/nmdc:mga0jbfx89/ReadbasedAnalysis/nmdc_mga0jbfx89_gottcha2_report.tsv", + "md5_checksum": "694374188ba4372344536fa26a2282b8", + "id": "nmdc:694374188ba4372344536fa26a2282b8", + "file_size_bytes": 3780 + }, + { + "name": "Gp0127645_Gottcha2 full TSV report", + "description": "Gottcha2 full TSV report for Gp0127645", + "url": "https://data.microbiomedata.org/data/nmdc:mga0jbfx89/ReadbasedAnalysis/nmdc_mga0jbfx89_gottcha2_report_full.tsv", + "md5_checksum": "e11dfa7178e8c426c7c930b57aa40377", + "id": "nmdc:e11dfa7178e8c426c7c930b57aa40377", + "file_size_bytes": 822292 + }, + { + "name": "Gp0127645_Gottcha2 Krona HTML report", + "description": "Gottcha2 Krona HTML report for Gp0127645", + "data_object_type": "GOTTCHA2 Krona Plot", + "url": "https://data.microbiomedata.org/data/nmdc:mga0jbfx89/ReadbasedAnalysis/nmdc_mga0jbfx89_gottcha2_krona.html", + "md5_checksum": "46e203465faf61780fad8f626e9ab623", + "id": "nmdc:46e203465faf61780fad8f626e9ab623", + "file_size_bytes": 236496 + }, + { + "name": "Gp0127645_Centrifuge classification TSV report", + "description": "Centrifuge classification TSV report for Gp0127645", + "data_object_type": "Centrifuge Taxonomic Classification", + "url": "https://data.microbiomedata.org/data/nmdc:mga0jbfx89/ReadbasedAnalysis/nmdc_mga0jbfx89_centrifuge_classification.tsv", + "md5_checksum": "7a6b2ded3f49663d9916eaea3e129dc7", + "id": "nmdc:7a6b2ded3f49663d9916eaea3e129dc7", + "file_size_bytes": 1699052782 + }, + { + "name": "Gp0127645_Centrifuge TSV report", + "description": "Centrifuge TSV report for Gp0127645", + "data_object_type": "Centrifuge Classification Report", + "url": "https://data.microbiomedata.org/data/nmdc:mga0jbfx89/ReadbasedAnalysis/nmdc_mga0jbfx89_centrifuge_report.tsv", + "md5_checksum": "6f8be89c7aab1c3f392b4f80c7ddf6a5", + "id": "nmdc:6f8be89c7aab1c3f392b4f80c7ddf6a5", + "file_size_bytes": 256209 + }, + { + "name": "Gp0127645_Centrifuge Krona HTML report", + "description": "Centrifuge Krona HTML report for Gp0127645", + "data_object_type": "Centrifuge Krona Plot", + "url": "https://data.microbiomedata.org/data/nmdc:mga0jbfx89/ReadbasedAnalysis/nmdc_mga0jbfx89_centrifuge_krona.html", + "md5_checksum": "4299b438a815becc8beed40fcb803e9f", + "id": "nmdc:4299b438a815becc8beed40fcb803e9f", + "file_size_bytes": 2336400 + }, + { + "name": "Gp0127645_Kraken classification TSV report", + "description": "Kraken classification TSV report for Gp0127645", + "data_object_type": "Kraken2 Taxonomic Classification", + "url": "https://data.microbiomedata.org/data/nmdc:mga0jbfx89/ReadbasedAnalysis/nmdc_mga0jbfx89_kraken2_classification.tsv", + "md5_checksum": "4ae4dbd13c7338df5c00555bc6755947", + "id": "nmdc:4ae4dbd13c7338df5c00555bc6755947", + "file_size_bytes": 1359323947 + }, + { + "name": "Gp0127645_Kraken2 TSV report", + "description": "Kraken2 TSV report for Gp0127645", + "data_object_type": "Kraken2 Classification Report", + "url": "https://data.microbiomedata.org/data/nmdc:mga0jbfx89/ReadbasedAnalysis/nmdc_mga0jbfx89_kraken2_report.tsv", + "md5_checksum": "2be07eb38d408077a55ecb48e123f7f8", + "id": "nmdc:2be07eb38d408077a55ecb48e123f7f8", + "file_size_bytes": 651624 + }, + { + "name": "Gp0127645_Kraken2 Krona HTML report", + "description": "Kraken2 Krona HTML report for Gp0127645", + "data_object_type": "Kraken2 Krona Plot", + "url": "https://data.microbiomedata.org/data/nmdc:mga0jbfx89/ReadbasedAnalysis/nmdc_mga0jbfx89_kraken2_krona.html", + "md5_checksum": "f318581f0df6e04b7ae2384f9237da06", + "id": "nmdc:f318581f0df6e04b7ae2384f9237da06", + "file_size_bytes": 3973557 + }, + { + "name": "Gp0127645_Gottcha2 TSV report", + "description": "Gottcha2 TSV report for Gp0127645", + "url": "https://data.microbiomedata.org/data/nmdc:mga0jbfx89/ReadbasedAnalysis/nmdc_mga0jbfx89_gottcha2_report.tsv", + "md5_checksum": "694374188ba4372344536fa26a2282b8", + "id": "nmdc:694374188ba4372344536fa26a2282b8", + "file_size_bytes": 3780 + }, + { + "name": "Gp0127645_Gottcha2 full TSV report", + "description": "Gottcha2 full TSV report for Gp0127645", + "url": "https://data.microbiomedata.org/data/nmdc:mga0jbfx89/ReadbasedAnalysis/nmdc_mga0jbfx89_gottcha2_report_full.tsv", + "md5_checksum": "e11dfa7178e8c426c7c930b57aa40377", + "id": "nmdc:e11dfa7178e8c426c7c930b57aa40377", + "file_size_bytes": 822292 + }, + { + "name": "Gp0127645_Gottcha2 Krona HTML report", + "description": "Gottcha2 Krona HTML report for Gp0127645", + "data_object_type": "GOTTCHA2 Krona Plot", + "url": "https://data.microbiomedata.org/data/nmdc:mga0jbfx89/ReadbasedAnalysis/nmdc_mga0jbfx89_gottcha2_krona.html", + "md5_checksum": "46e203465faf61780fad8f626e9ab623", + "id": "nmdc:46e203465faf61780fad8f626e9ab623", + "file_size_bytes": 236496 + }, + { + "name": "Gp0127645_Centrifuge classification TSV report", + "description": "Centrifuge classification TSV report for Gp0127645", + "data_object_type": "Centrifuge Taxonomic Classification", + "url": "https://data.microbiomedata.org/data/nmdc:mga0jbfx89/ReadbasedAnalysis/nmdc_mga0jbfx89_centrifuge_classification.tsv", + "md5_checksum": "7a6b2ded3f49663d9916eaea3e129dc7", + "id": "nmdc:7a6b2ded3f49663d9916eaea3e129dc7", + "file_size_bytes": 1699052782 + }, + { + "name": "Gp0127645_Centrifuge TSV report", + "description": "Centrifuge TSV report for Gp0127645", + "data_object_type": "Centrifuge Classification Report", + "url": "https://data.microbiomedata.org/data/nmdc:mga0jbfx89/ReadbasedAnalysis/nmdc_mga0jbfx89_centrifuge_report.tsv", + "md5_checksum": "6f8be89c7aab1c3f392b4f80c7ddf6a5", + "id": "nmdc:6f8be89c7aab1c3f392b4f80c7ddf6a5", + "file_size_bytes": 256209 + }, + { + "name": "Gp0127645_Centrifuge Krona HTML report", + "description": "Centrifuge Krona HTML report for Gp0127645", + "data_object_type": "Centrifuge Krona Plot", + "url": "https://data.microbiomedata.org/data/nmdc:mga0jbfx89/ReadbasedAnalysis/nmdc_mga0jbfx89_centrifuge_krona.html", + "md5_checksum": "4299b438a815becc8beed40fcb803e9f", + "id": "nmdc:4299b438a815becc8beed40fcb803e9f", + "file_size_bytes": 2336400 + }, + { + "name": "Gp0127645_Kraken classification TSV report", + "description": "Kraken classification TSV report for Gp0127645", + "data_object_type": "Kraken2 Taxonomic Classification", + "url": "https://data.microbiomedata.org/data/nmdc:mga0jbfx89/ReadbasedAnalysis/nmdc_mga0jbfx89_kraken2_classification.tsv", + "md5_checksum": "4ae4dbd13c7338df5c00555bc6755947", + "id": "nmdc:4ae4dbd13c7338df5c00555bc6755947", + "file_size_bytes": 1359323947 + }, + { + "name": "Gp0127645_Kraken2 TSV report", + "description": "Kraken2 TSV report for Gp0127645", + "data_object_type": "Kraken2 Classification Report", + "url": "https://data.microbiomedata.org/data/nmdc:mga0jbfx89/ReadbasedAnalysis/nmdc_mga0jbfx89_kraken2_report.tsv", + "md5_checksum": "2be07eb38d408077a55ecb48e123f7f8", + "id": "nmdc:2be07eb38d408077a55ecb48e123f7f8", + "file_size_bytes": 651624 + }, + { + "name": "Gp0127645_Kraken2 Krona HTML report", + "description": "Kraken2 Krona HTML report for Gp0127645", + "data_object_type": "Kraken2 Krona Plot", + "url": "https://data.microbiomedata.org/data/nmdc:mga0jbfx89/ReadbasedAnalysis/nmdc_mga0jbfx89_kraken2_krona.html", + "md5_checksum": "f318581f0df6e04b7ae2384f9237da06", + "id": "nmdc:f318581f0df6e04b7ae2384f9237da06", + "file_size_bytes": 3973557 + }, + { + "name": "Gp0127645_Assembled contigs fasta", + "description": "Assembled contigs fasta for Gp0127645", + "data_object_type": "Assembly Contigs", + "url": "https://data.microbiomedata.org/data/nmdc:mga0jbfx89/assembly/nmdc_mga0jbfx89_contigs.fna", + "md5_checksum": "3685fdcfffdf34d2802c692dc0515e33", + "id": "nmdc:3685fdcfffdf34d2802c692dc0515e33", + "file_size_bytes": 49479236 + }, + { + "name": "Gp0127645_Assembled scaffold fasta", + "description": "Assembled scaffold fasta for Gp0127645", + "data_object_type": "Assembly Scaffolds", + "url": "https://data.microbiomedata.org/data/nmdc:mga0jbfx89/assembly/nmdc_mga0jbfx89_scaffolds.fna", + "md5_checksum": "7891adab80c63d98169e3cb7b4331f1e", + "id": "nmdc:7891adab80c63d98169e3cb7b4331f1e", + "file_size_bytes": 49157929 + }, + { + "name": "Gp0127645_Metagenome Contig Coverage Stats", + "description": "Metagenome Contig Coverage Stats for Gp0127645", + "url": "https://data.microbiomedata.org/data/nmdc:mga0jbfx89/assembly/nmdc_mga0jbfx89_covstats.txt", + "md5_checksum": "d883460ae5f8cbabc3d437e745935040", + "id": "nmdc:d883460ae5f8cbabc3d437e745935040", + "file_size_bytes": 8394481 + }, + { + "name": "Gp0127645_Assembled AGP file", + "description": "Assembled AGP file for Gp0127645", + "data_object_type": "Assembly AGP", + "url": "https://data.microbiomedata.org/data/nmdc:mga0jbfx89/assembly/nmdc_mga0jbfx89_assembly.agp", + "md5_checksum": "f36166196caa529e09f3b93e17db3acc", + "id": "nmdc:f36166196caa529e09f3b93e17db3acc", + "file_size_bytes": 7804199 + }, + { + "name": "Gp0127645_Metagenome Alignment BAM file", + "description": "Metagenome Alignment BAM file for Gp0127645", + "data_object_type": "Assembly Coverage BAM", + "url": "https://data.microbiomedata.org/data/nmdc:mga0jbfx89/assembly/nmdc_mga0jbfx89_pairedMapped_sorted.bam", + "md5_checksum": "08a13111a5314ec4c8dbaa59790dc2f1", + "id": "nmdc:08a13111a5314ec4c8dbaa59790dc2f1", + "file_size_bytes": 2047004915 + }, + { + "name": "Gp0127645_Protein FAA", + "description": "Protein FAA for Gp0127645", + "data_object_type": "Annotation Amino Acid FASTA", + "url": "https://data.microbiomedata.org/data/nmdc:mga0jbfx89/annotation/nmdc_mga0jbfx89_proteins.faa", + "md5_checksum": "b14fecfaa99eaad42128e409aa7ae3ec", + "id": "nmdc:b14fecfaa99eaad42128e409aa7ae3ec", + "file_size_bytes": 29015561 + }, + { + "name": "Gp0127645_Structural annotation GFF file", + "description": "Structural annotation GFF file for Gp0127645", + "data_object_type": "Structural Annotation GFF", + "url": "https://data.microbiomedata.org/data/nmdc:mga0jbfx89/annotation/nmdc_mga0jbfx89_structural_annotation.gff", + "md5_checksum": "851584f7bcec80cddec4b113fe6cfcea", + "id": "nmdc:851584f7bcec80cddec4b113fe6cfcea", + "file_size_bytes": 2506 + }, + { + "name": "Gp0127645_Functional annotation GFF file", + "description": "Functional annotation GFF file for Gp0127645", + "data_object_type": "Functional Annotation GFF", + "url": "https://data.microbiomedata.org/data/nmdc:mga0jbfx89/annotation/nmdc_mga0jbfx89_functional_annotation.gff", + "md5_checksum": "d0280881c70c54946d9b5170e62b904b", + "id": "nmdc:d0280881c70c54946d9b5170e62b904b", + "file_size_bytes": 34124039 + }, + { + "name": "Gp0127645_KO TSV file", + "description": "KO TSV file for Gp0127645", + "data_object_type": "Annotation KEGG Orthology", + "url": "https://data.microbiomedata.org/data/nmdc:mga0jbfx89/annotation/nmdc_mga0jbfx89_ko.tsv", + "md5_checksum": "7c1894478af7b8205bb4760acb93c353", + "id": "nmdc:7c1894478af7b8205bb4760acb93c353", + "file_size_bytes": 3942110 + }, + { + "name": "Gp0127645_EC TSV file", + "description": "EC TSV file for Gp0127645", + "data_object_type": "Annotation Enzyme Commission", + "url": "https://data.microbiomedata.org/data/nmdc:mga0jbfx89/annotation/nmdc_mga0jbfx89_ec.tsv", + "md5_checksum": "ac413560dfdbcea1f0697391b593c552", + "id": "nmdc:ac413560dfdbcea1f0697391b593c552", + "file_size_bytes": 2691460 + }, + { + "name": "Gp0127645_COG GFF file", + "description": "COG GFF file for Gp0127645", + "url": "https://data.microbiomedata.org/data/nmdc:mga0jbfx89/annotation/nmdc_mga0jbfx89_cog.gff", + "md5_checksum": "80f846ff418e4758f4c6b9a96ba2b8ca", + "id": "nmdc:80f846ff418e4758f4c6b9a96ba2b8ca", + "file_size_bytes": 19597211 + }, + { + "name": "Gp0127645_PFAM GFF file", + "description": "PFAM GFF file for Gp0127645", + "url": "https://data.microbiomedata.org/data/nmdc:mga0jbfx89/annotation/nmdc_mga0jbfx89_pfam.gff", + "md5_checksum": "bbfcd35137b7cb018945a531704805eb", + "id": "nmdc:bbfcd35137b7cb018945a531704805eb", + "file_size_bytes": 14110039 + }, + { + "name": "Gp0127645_TigrFam GFF file", + "description": "TigrFam GFF file for Gp0127645", + "url": "https://data.microbiomedata.org/data/nmdc:mga0jbfx89/annotation/nmdc_mga0jbfx89_tigrfam.gff", + "md5_checksum": "c1c10952c472a97fb7de8bc7dbce564b", + "id": "nmdc:c1c10952c472a97fb7de8bc7dbce564b", + "file_size_bytes": 1502814 }, + { + "name": "Gp0127645_SMART GFF file", + "description": "SMART GFF file for Gp0127645", + "url": "https://data.microbiomedata.org/data/nmdc:mga0jbfx89/annotation/nmdc_mga0jbfx89_smart.gff", + "md5_checksum": "b86dba5a29f4ca25cec7c0590e0b4771", + "id": "nmdc:b86dba5a29f4ca25cec7c0590e0b4771", + "file_size_bytes": 4354176 + }, + { + "name": "Gp0127645_SuperFam GFF file", + "description": "SuperFam GFF file for Gp0127645", + "url": "https://data.microbiomedata.org/data/nmdc:mga0jbfx89/annotation/nmdc_mga0jbfx89_supfam.gff", + "md5_checksum": "a701026580285ca67816cb9a2f272ca6", + "id": "nmdc:a701026580285ca67816cb9a2f272ca6", + "file_size_bytes": 24911282 + }, + { + "name": "Gp0127645_Cath FunFam GFF file", + "description": "Cath FunFam GFF file for Gp0127645", + "url": "https://data.microbiomedata.org/data/nmdc:mga0jbfx89/annotation/nmdc_mga0jbfx89_cath_funfam.gff", + "md5_checksum": "5ce71fa6aebdb4fb9f843e89ab53ca9b", + "id": "nmdc:5ce71fa6aebdb4fb9f843e89ab53ca9b", + "file_size_bytes": 18832113 + }, + { + "name": "Gp0127645_KO_EC GFF file", + "description": "KO_EC GFF file for Gp0127645", + "url": "https://data.microbiomedata.org/data/nmdc:mga0jbfx89/annotation/nmdc_mga0jbfx89_ko_ec.gff", + "md5_checksum": "47c0e39e60bd4d688a29ede2af2cee35", + "id": "nmdc:47c0e39e60bd4d688a29ede2af2cee35", + "file_size_bytes": 12581509 + }, + { + "name": "gold:Gp0452679_metabat2 bin checkm quality assessment result", + "description": "metabat2 bin checkm quality assessment result for gold:Gp0452679", + "data_object_type": "CheckM Statistics", + "url": "https://data.microbiomedata.org/data/nmdc:mga0fsjy84/MAGs/nmdc_mga0fsjy84_checkm_qa.out", + "md5_checksum": "d41d8cd98f00b204e9800998ecf8427e", + "id": "nmdc:d41d8cd98f00b204e9800998ecf8427e", + "file_size_bytes": 0 + }, + { + "name": "Gp0127645_tooShort (< 3kb) filtered contigs fasta file by metaBat2", + "description": "tooShort (< 3kb) filtered contigs fasta file by metaBat2 for Gp0127645", + "url": "https://data.microbiomedata.org/data/nmdc:mga0jbfx89/MAGs/nmdc_mga0jbfx89_bins.tooShort.fa", + "md5_checksum": "47d1233f5afdd7b00790ac2ca8be778a", + "id": "nmdc:47d1233f5afdd7b00790ac2ca8be778a", + "file_size_bytes": 43078346 + }, + { + "name": "Gp0127645_unbinned fasta file from metabat2", + "description": "unbinned fasta file from metabat2 for Gp0127645", + "url": "https://data.microbiomedata.org/data/nmdc:mga0jbfx89/MAGs/nmdc_mga0jbfx89_bins.unbinned.fa", + "md5_checksum": "637bc2394dcb4869149370683ccc9e61", + "id": "nmdc:637bc2394dcb4869149370683ccc9e61", + "file_size_bytes": 6153132 + }, + { + "name": "Gp0127645_metabat2 bin checkm quality assessment result", + "description": "metabat2 bin checkm quality assessment result for Gp0127645", + "data_object_type": "CheckM Statistics", + "url": "https://data.microbiomedata.org/data/nmdc:mga0jbfx89/MAGs/nmdc_mga0jbfx89_checkm_qa.out", + "md5_checksum": "a8e49a136701e388199a72f02bb6d288", + "id": "nmdc:a8e49a136701e388199a72f02bb6d288", + "file_size_bytes": 765 + }, + { + "name": "Gp0127645_high-quality and medium-quality bins", + "description": "high-quality and medium-quality bins for Gp0127645", + "data_object_type": "Metagenome Bins", + "url": "https://data.microbiomedata.org/data/nmdc:mga0jbfx89/MAGs/nmdc_mga0jbfx89_hqmq_bin.zip", + "md5_checksum": "b0d2597d04809508e9dd0bcb48c7edad", + "id": "nmdc:b0d2597d04809508e9dd0bcb48c7edad", + "file_size_bytes": 182 + }, + { + "name": "Gp0127645_metabat2 bins", + "description": "metabat2 bins for Gp0127645", + "url": "https://data.microbiomedata.org/data/nmdc:mga0jbfx89/MAGs/nmdc_mga0jbfx89_metabat_bin.zip", + "md5_checksum": "106983a66b58a2d07f0592d9379ad635", + "id": "nmdc:106983a66b58a2d07f0592d9379ad635", + "file_size_bytes": 76018 + } + ], + "dissolving_activity_set": [], + "functional_annotation_set": [], + "genome_feature_set": [], + "mags_activity_set": [ { "_id": { - "$oid": "649b005f2ca5ee4adb139fa0" + "$oid": "649b0052ec087f6bbab34717" }, "has_input": [ - "nmdc:c082eff434fe4863c0e29c79b759d100" + "nmdc:3685fdcfffdf34d2802c692dc0515e33", + "nmdc:08a13111a5314ec4c8dbaa59790dc2f1", + "nmdc:d0280881c70c54946d9b5170e62b904b" ], + "too_short_contig_num": 102729, "part_of": [ - "nmdc:mga0g0e588" + "nmdc:mga0jbfx89" ], - "ctg_logsum": 37666, - "scaf_logsum": 37899, - "gap_pct": 0.00092, + "binned_contig_num": 61, "git_url": "https://github.com/microbiomedata/metaG/releases/tag/0.1", "has_output": [ - "nmdc:05952c056a6db782ba77c6369206838a", - "nmdc:6fa8f2d4236fda4f628436ed85094e3b", - "nmdc:82be5b6248eb4b0bfef1c9afa5c5c0bc", - "nmdc:fee22437c76dc343846f41e1be538b9d", - "nmdc:7fc9fd7844b6ce48869a0ad5216da4dc" + "nmdc:d41d8cd98f00b204e9800998ecf8427e", + "nmdc:47d1233f5afdd7b00790ac2ca8be778a", + "nmdc:637bc2394dcb4869149370683ccc9e61", + "nmdc:a8e49a136701e388199a72f02bb6d288", + "nmdc:b0d2597d04809508e9dd0bcb48c7edad", + "nmdc:106983a66b58a2d07f0592d9379ad635" ], - "asm_score": 14.664, - "was_informed_by": "gold:Gp0127647", - "ctg_powsum": 4336.355, - "scaf_max": 96788, - "id": "nmdc:36fa8157feb62e7d176b99031e5e41a9", - "scaf_powsum": 4362.772, + "was_informed_by": "gold:Gp0127645", + "input_contig_num": 106865, + "id": "nmdc:f18f00c9c36689307a6b4188b7b18e32", "execution_resource": "NERSC-Cori", - "contigs": 97351, - "name": "Assembly Activity for nmdc:mga0g0e588", - "ctg_max": 96788, - "gc_std": 0.13435, - "contig_bp": 38110297, - "gc_avg": 0.5552, - "started_at_time": "2021-10-11T02:24:27Z", - "scaf_bp": 38110647, - "type": "nmdc:MetagenomeAssembly", - "scaffolds": 97316, - "ended_at_time": "2021-10-11T03:38:33+00:00", - "ctg_l50": 353, - "ctg_l90": 283, - "ctg_n50": 34144, - "ctg_n90": 85387, - "scaf_l50": 353, - "scaf_l90": 283, - "scaf_n50": 34125, - "scaf_n90": 85353, - "scaf_l_gt50k": 153917, - "scaf_n_gt50k": 2, - "scaf_pct_gt50k": 0.40386876, - "output_data_objects": [ - { - "name": "Gp0127647_Assembled contigs fasta", - "description": "Assembled contigs fasta for Gp0127647", - "data_object_type": "Assembly Contigs", - "url": "https://data.microbiomedata.org/data/nmdc:mga0g0e588/assembly/nmdc_mga0g0e588_contigs.fna", - "md5_checksum": "05952c056a6db782ba77c6369206838a", - "id": "nmdc:05952c056a6db782ba77c6369206838a", - "file_size_bytes": 41696500 - }, - { - "name": "Gp0127647_Assembled scaffold fasta", - "description": "Assembled scaffold fasta for Gp0127647", - "data_object_type": "Assembly Scaffolds", - "url": "https://data.microbiomedata.org/data/nmdc:mga0g0e588/assembly/nmdc_mga0g0e588_scaffolds.fna", - "md5_checksum": "6fa8f2d4236fda4f628436ed85094e3b", - "id": "nmdc:6fa8f2d4236fda4f628436ed85094e3b", - "file_size_bytes": 41403892 - }, - { - "name": "Gp0127647_Metagenome Contig Coverage Stats", - "description": "Metagenome Contig Coverage Stats for Gp0127647", - "url": "https://data.microbiomedata.org/data/nmdc:mga0g0e588/assembly/nmdc_mga0g0e588_covstats.txt", - "md5_checksum": "82be5b6248eb4b0bfef1c9afa5c5c0bc", - "id": "nmdc:82be5b6248eb4b0bfef1c9afa5c5c0bc", - "file_size_bytes": 7629542 - }, - { - "name": "Gp0127647_Assembled AGP file", - "description": "Assembled AGP file for Gp0127647", - "data_object_type": "Assembly AGP", - "url": "https://data.microbiomedata.org/data/nmdc:mga0g0e588/assembly/nmdc_mga0g0e588_assembly.agp", - "md5_checksum": "fee22437c76dc343846f41e1be538b9d", - "id": "nmdc:fee22437c76dc343846f41e1be538b9d", - "file_size_bytes": 7091204 - }, + "name": "MAGs Analysis Activity for nmdc:mga0jbfx89", + "mags_list": [ { - "name": "Gp0127647_Metagenome Alignment BAM file", - "description": "Metagenome Alignment BAM file for Gp0127647", - "data_object_type": "Assembly Coverage BAM", - "url": "https://data.microbiomedata.org/data/nmdc:mga0g0e588/assembly/nmdc_mga0g0e588_pairedMapped_sorted.bam", - "md5_checksum": "7fc9fd7844b6ce48869a0ad5216da4dc", - "id": "nmdc:7fc9fd7844b6ce48869a0ad5216da4dc", - "file_size_bytes": 2190560397 + "number_of_contig": 61, + "completeness": 18.77, + "bin_name": "bins.1", + "gene_count": 307, + "bin_quality": "LQ", + "gtdbtk_species": "", + "gtdbtk_order": "", + "num_16s": 0, + "gtdbtk_family": "", + "gtdbtk_domain": "", + "contamination": 0.0, + "gtdbtk_class": "", + "gtdbtk_phylum": "", + "num_5s": 0, + "num_23s": 0, + "gtdbtk_genus": "", + "num_t_rna": 7 } - ] - }, + ], + "unbinned_contig_num": 4075, + "started_at_time": "2021-10-11T02:24:42Z", + "type": "nmdc:MAGsAnalysisActivity", + "ended_at_time": "2021-10-11T04:07:11+00:00" + } + ], + "material_sample_set": [], + "material_sampling_activity_set": [], + "metabolomics_analysis_activity_set": [], + "metagenome_annotation_activity_set": [ { "_id": { - "$oid": "649b005bbf2caae0415ef9b5" + "$oid": "649b005bbf2caae0415ef9b6" }, "has_input": [ - "nmdc:05952c056a6db782ba77c6369206838a" + "nmdc:3685fdcfffdf34d2802c692dc0515e33" ], "part_of": [ - "nmdc:mga0g0e588" + "nmdc:mga0jbfx89" ], "git_url": "https://github.com/microbiomedata/metaG/releases/tag/0.1", "has_output": [ - "nmdc:b95b8538748c921fac6c93ba55d43e2c", - "nmdc:9c63632766a4946bc76829a7dafe49c0", - "nmdc:0c5e791c8170181aa3e43d710e7c55eb", - "nmdc:358cb8682dd2d5c1b7a691e9f7734acc", - "nmdc:d770a8c872a3a359bf3482e564c56988", - "nmdc:cdecaf6cff3fc2d559cc3313599b137b", - "nmdc:7dedc14d5645ae32f913d8f823ba5aa3", - "nmdc:809e6d246bd10968d4da074db08216d9", - "nmdc:546d11411d30ab337a215d0094fc36b6", - "nmdc:6eb654de91a99eb4e01e1bf9513a6208", - "nmdc:a8ae7ed318e7c170aeed508f331ce5b2", - "nmdc:455f95c7c15739b2fddc6f62b03253ed" + "nmdc:b14fecfaa99eaad42128e409aa7ae3ec", + "nmdc:851584f7bcec80cddec4b113fe6cfcea", + "nmdc:d0280881c70c54946d9b5170e62b904b", + "nmdc:7c1894478af7b8205bb4760acb93c353", + "nmdc:ac413560dfdbcea1f0697391b593c552", + "nmdc:80f846ff418e4758f4c6b9a96ba2b8ca", + "nmdc:bbfcd35137b7cb018945a531704805eb", + "nmdc:c1c10952c472a97fb7de8bc7dbce564b", + "nmdc:b86dba5a29f4ca25cec7c0590e0b4771", + "nmdc:a701026580285ca67816cb9a2f272ca6", + "nmdc:5ce71fa6aebdb4fb9f843e89ab53ca9b", + "nmdc:47c0e39e60bd4d688a29ede2af2cee35" ], - "was_informed_by": "gold:Gp0127647", - "id": "nmdc:36fa8157feb62e7d176b99031e5e41a9", + "was_informed_by": "gold:Gp0127645", + "id": "nmdc:f18f00c9c36689307a6b4188b7b18e32", "execution_resource": "NERSC-Cori", - "name": "Annotation Activity for nmdc:mga0g0e588", - "started_at_time": "2021-10-11T02:24:27Z", + "name": "Annotation Activity for nmdc:mga0jbfx89", + "started_at_time": "2021-10-11T02:24:42Z", "type": "nmdc:MetagenomeAnnotationActivity", - "ended_at_time": "2021-10-11T03:38:33+00:00", - "output_data_objects": [ - { - "name": "Gp0127647_Protein FAA", - "description": "Protein FAA for Gp0127647", - "data_object_type": "Annotation Amino Acid FASTA", - "url": "https://data.microbiomedata.org/data/nmdc:mga0g0e588/annotation/nmdc_mga0g0e588_proteins.faa", - "md5_checksum": "b95b8538748c921fac6c93ba55d43e2c", - "id": "nmdc:b95b8538748c921fac6c93ba55d43e2c", - "file_size_bytes": 23580407 - }, - { - "name": "Gp0127647_Structural annotation GFF file", - "description": "Structural annotation GFF file for Gp0127647", - "data_object_type": "Structural Annotation GFF", - "url": "https://data.microbiomedata.org/data/nmdc:mga0g0e588/annotation/nmdc_mga0g0e588_structural_annotation.gff", - "md5_checksum": "9c63632766a4946bc76829a7dafe49c0", - "id": "nmdc:9c63632766a4946bc76829a7dafe49c0", - "file_size_bytes": 2925 - }, - { - "name": "Gp0127647_Functional annotation GFF file", - "description": "Functional annotation GFF file for Gp0127647", - "data_object_type": "Functional Annotation GFF", - "url": "https://data.microbiomedata.org/data/nmdc:mga0g0e588/annotation/nmdc_mga0g0e588_functional_annotation.gff", - "md5_checksum": "0c5e791c8170181aa3e43d710e7c55eb", - "id": "nmdc:0c5e791c8170181aa3e43d710e7c55eb", - "file_size_bytes": 28355659 - }, - { - "name": "Gp0127647_KO TSV file", - "description": "KO TSV file for Gp0127647", - "data_object_type": "Annotation KEGG Orthology", - "url": "https://data.microbiomedata.org/data/nmdc:mga0g0e588/annotation/nmdc_mga0g0e588_ko.tsv", - "md5_checksum": "358cb8682dd2d5c1b7a691e9f7734acc", - "id": "nmdc:358cb8682dd2d5c1b7a691e9f7734acc", - "file_size_bytes": 3251676 - }, - { - "name": "Gp0127647_EC TSV file", - "description": "EC TSV file for Gp0127647", - "data_object_type": "Annotation Enzyme Commission", - "url": "https://data.microbiomedata.org/data/nmdc:mga0g0e588/annotation/nmdc_mga0g0e588_ec.tsv", - "md5_checksum": "d770a8c872a3a359bf3482e564c56988", - "id": "nmdc:d770a8c872a3a359bf3482e564c56988", - "file_size_bytes": 2134531 - }, - { - "name": "Gp0127647_COG GFF file", - "description": "COG GFF file for Gp0127647", - "url": "https://data.microbiomedata.org/data/nmdc:mga0g0e588/annotation/nmdc_mga0g0e588_cog.gff", - "md5_checksum": "cdecaf6cff3fc2d559cc3313599b137b", - "id": "nmdc:cdecaf6cff3fc2d559cc3313599b137b", - "file_size_bytes": 15119260 - }, - { - "name": "Gp0127647_PFAM GFF file", - "description": "PFAM GFF file for Gp0127647", - "url": "https://data.microbiomedata.org/data/nmdc:mga0g0e588/annotation/nmdc_mga0g0e588_pfam.gff", - "md5_checksum": "7dedc14d5645ae32f913d8f823ba5aa3", - "id": "nmdc:7dedc14d5645ae32f913d8f823ba5aa3", - "file_size_bytes": 11013734 - }, - { - "name": "Gp0127647_TigrFam GFF file", - "description": "TigrFam GFF file for Gp0127647", - "url": "https://data.microbiomedata.org/data/nmdc:mga0g0e588/annotation/nmdc_mga0g0e588_tigrfam.gff", - "md5_checksum": "809e6d246bd10968d4da074db08216d9", - "id": "nmdc:809e6d246bd10968d4da074db08216d9", - "file_size_bytes": 1131416 - }, - { - "name": "Gp0127647_SMART GFF file", - "description": "SMART GFF file for Gp0127647", - "url": "https://data.microbiomedata.org/data/nmdc:mga0g0e588/annotation/nmdc_mga0g0e588_smart.gff", - "md5_checksum": "546d11411d30ab337a215d0094fc36b6", - "id": "nmdc:546d11411d30ab337a215d0094fc36b6", - "file_size_bytes": 3424877 - }, - { - "name": "Gp0127647_SuperFam GFF file", - "description": "SuperFam GFF file for Gp0127647", - "url": "https://data.microbiomedata.org/data/nmdc:mga0g0e588/annotation/nmdc_mga0g0e588_supfam.gff", - "md5_checksum": "6eb654de91a99eb4e01e1bf9513a6208", - "id": "nmdc:6eb654de91a99eb4e01e1bf9513a6208", - "file_size_bytes": 19463761 - }, - { - "name": "Gp0127647_Cath FunFam GFF file", - "description": "Cath FunFam GFF file for Gp0127647", - "url": "https://data.microbiomedata.org/data/nmdc:mga0g0e588/annotation/nmdc_mga0g0e588_cath_funfam.gff", - "md5_checksum": "a8ae7ed318e7c170aeed508f331ce5b2", - "id": "nmdc:a8ae7ed318e7c170aeed508f331ce5b2", - "file_size_bytes": 14536820 - }, - { - "name": "Gp0127647_KO_EC GFF file", - "description": "KO_EC GFF file for Gp0127647", - "url": "https://data.microbiomedata.org/data/nmdc:mga0g0e588/annotation/nmdc_mga0g0e588_ko_ec.gff", - "md5_checksum": "455f95c7c15739b2fddc6f62b03253ed", - "id": "nmdc:455f95c7c15739b2fddc6f62b03253ed", - "file_size_bytes": 10367039 - } - ] - }, + "ended_at_time": "2021-10-11T04:07:11+00:00" + } + ], + "metagenome_assembly_set": [ { "_id": { - "$oid": "649b0052ec087f6bbab34716" + "$oid": "649b005f2ca5ee4adb139fa4" }, "has_input": [ - "nmdc:05952c056a6db782ba77c6369206838a", - "nmdc:7fc9fd7844b6ce48869a0ad5216da4dc", - "nmdc:0c5e791c8170181aa3e43d710e7c55eb" + "nmdc:034df323b47f010f27e7c032d445a891" ], - "too_short_contig_num": 95291, "part_of": [ - "nmdc:mga0g0e588" + "nmdc:mga0jbfx89" ], - "binned_contig_num": 20, + "ctg_logsum": 65663, + "scaf_logsum": 65979, + "gap_pct": 0.00097, "git_url": "https://github.com/microbiomedata/metaG/releases/tag/0.1", "has_output": [ - "nmdc:d41d8cd98f00b204e9800998ecf8427e", - "nmdc:8ec4227eca7ea06fed4e866c4de4a5c9", - "nmdc:40c0cbc75e2b698572b8b94d91fdc236", - "nmdc:e8bdcd7b113a14b29a3026b73cd18c20", - "nmdc:03b448db547a556e988a0d4948dab424", - "nmdc:6e92868d1912cb8f5b32fbf507721d16" + "nmdc:3685fdcfffdf34d2802c692dc0515e33", + "nmdc:7891adab80c63d98169e3cb7b4331f1e", + "nmdc:d883460ae5f8cbabc3d437e745935040", + "nmdc:f36166196caa529e09f3b93e17db3acc", + "nmdc:08a13111a5314ec4c8dbaa59790dc2f1" ], - "was_informed_by": "gold:Gp0127647", - "input_contig_num": 97351, - "id": "nmdc:36fa8157feb62e7d176b99031e5e41a9", + "asm_score": 2.823, + "was_informed_by": "gold:Gp0127645", + "ctg_powsum": 6960.932, + "scaf_max": 6924, + "id": "nmdc:f18f00c9c36689307a6b4188b7b18e32", + "scaf_powsum": 6995.401, "execution_resource": "NERSC-Cori", - "name": "MAGs Analysis Activity for nmdc:mga0g0e588", - "mags_list": [ - { - "number_of_contig": 20, - "completeness": 1.36, - "bin_name": "bins.1", - "gene_count": 310, - "bin_quality": "LQ", - "gtdbtk_species": "", - "gtdbtk_order": "", - "num_16s": 2, - "gtdbtk_family": "", - "gtdbtk_domain": "", - "contamination": 0.0, - "gtdbtk_class": "", - "gtdbtk_phylum": "", - "num_5s": 0, - "num_23s": 2, - "gtdbtk_genus": "", - "num_t_rna": 14 - } + "contigs": 106865, + "name": "Assembly Activity for nmdc:mga0jbfx89", + "ctg_max": 6924, + "gc_std": 0.12472, + "contig_bp": 45473855, + "gc_avg": 0.58373, + "started_at_time": "2021-10-11T02:24:42Z", + "scaf_bp": 45474295, + "type": "nmdc:MetagenomeAssembly", + "scaffolds": 106821, + "ended_at_time": "2021-10-11T04:07:11+00:00", + "ctg_l50": 395, + "ctg_l90": 284, + "ctg_n50": 33845, + "ctg_n90": 92046, + "scaf_l50": 395, + "scaf_l90": 284, + "scaf_n50": 33825, + "scaf_n90": 92004 + } + ], + "metagenome_sequencing_activity_set": [], + "metaproteomics_analysis_activity_set": [], + "metatranscriptome_activity_set": [], + "nom_analysis_activity_set": [], + "omics_processing_set": [ + { + "_id": { + "$oid": "649b009773e8249959349b5d" + }, + "id": "nmdc:omprc-11-vykcbs96", + "name": "Riverbed sediment microbial communities from areas with no vegetation in Columbia River, Washington, USA - GW-RW N1_40_50", + "description": "Riverbed sediment samples were collected from an area with no vegetation in a hyporheic zone (subsurface groundwater - surface water mixing zone)", + "has_input": [ + "nmdc:bsm-11-ffqcqd73" + ], + "has_output": [ + "jgi:574fde6e7ded5e3df1ee140d" ], - "unbinned_contig_num": 2040, - "started_at_time": "2021-10-11T02:24:27Z", - "type": "nmdc:MAGsAnalysisActivity", - "ended_at_time": "2021-10-11T03:38:33+00:00", - "output_data_objects": [ - { - "name": "gold:Gp0452679_metabat2 bin checkm quality assessment result", - "description": "metabat2 bin checkm quality assessment result for gold:Gp0452679", - "data_object_type": "CheckM Statistics", - "url": "https://data.microbiomedata.org/data/nmdc:mga0fsjy84/MAGs/nmdc_mga0fsjy84_checkm_qa.out", - "md5_checksum": "d41d8cd98f00b204e9800998ecf8427e", - "id": "nmdc:d41d8cd98f00b204e9800998ecf8427e", - "file_size_bytes": 0 - }, - { - "name": "Gp0127647_tooShort (< 3kb) filtered contigs fasta file by metaBat2", - "description": "tooShort (< 3kb) filtered contigs fasta file by metaBat2 for Gp0127647", - "url": "https://data.microbiomedata.org/data/nmdc:mga0g0e588/MAGs/nmdc_mga0g0e588_bins.tooShort.fa", - "md5_checksum": "8ec4227eca7ea06fed4e866c4de4a5c9", - "id": "nmdc:8ec4227eca7ea06fed4e866c4de4a5c9", - "file_size_bytes": 38197270 - }, - { - "name": "Gp0127647_unbinned fasta file from metabat2", - "description": "unbinned fasta file from metabat2 for Gp0127647", - "url": "https://data.microbiomedata.org/data/nmdc:mga0g0e588/MAGs/nmdc_mga0g0e588_bins.unbinned.fa", - "md5_checksum": "40c0cbc75e2b698572b8b94d91fdc236", - "id": "nmdc:40c0cbc75e2b698572b8b94d91fdc236", - "file_size_bytes": 3202231 - }, - { - "name": "Gp0127647_metabat2 bin checkm quality assessment result", - "description": "metabat2 bin checkm quality assessment result for Gp0127647", - "data_object_type": "CheckM Statistics", - "url": "https://data.microbiomedata.org/data/nmdc:mga0g0e588/MAGs/nmdc_mga0g0e588_checkm_qa.out", - "md5_checksum": "e8bdcd7b113a14b29a3026b73cd18c20", - "id": "nmdc:e8bdcd7b113a14b29a3026b73cd18c20", - "file_size_bytes": 775 - }, - { - "name": "Gp0127647_high-quality and medium-quality bins", - "description": "high-quality and medium-quality bins for Gp0127647", - "data_object_type": "Metagenome Bins", - "url": "https://data.microbiomedata.org/data/nmdc:mga0g0e588/MAGs/nmdc_mga0g0e588_hqmq_bin.zip", - "md5_checksum": "03b448db547a556e988a0d4948dab424", - "id": "nmdc:03b448db547a556e988a0d4948dab424", - "file_size_bytes": 182 - }, - { - "name": "Gp0127647_metabat2 bins", - "description": "metabat2 bins for Gp0127647", - "url": "https://data.microbiomedata.org/data/nmdc:mga0g0e588/MAGs/nmdc_mga0g0e588_metabat_bin.zip", - "md5_checksum": "6e92868d1912cb8f5b32fbf507721d16", - "id": "nmdc:6e92868d1912cb8f5b32fbf507721d16", - "file_size_bytes": 91931 - } + "part_of": [ + "nmdc:sty-11-aygzgv51" + ], + "add_date": "2016-01-11", + "mod_date": "2021-06-15", + "ncbi_project_name": "Riverbed sediment microbial communities from areas with no vegetation in Columbia River, Washington, USA - GW-RW N1_40_50", + "omics_type": { + "has_raw_value": "Metagenome" + }, + "principal_investigator": { + "has_raw_value": "James Stegen" + }, + "processing_institution": "JGI", + "type": "nmdc:OmicsProcessing", + "gold_sequencing_project_identifiers": [ + "gold:Gp0127645" ] } - ] - }, - { - "_id": { - "$oid": "649b009773e8249959349b5d" - }, - "id": "nmdc:omprc-11-vykcbs96", - "name": "Riverbed sediment microbial communities from areas with no vegetation in Columbia River, Washington, USA - GW-RW N1_40_50", - "description": "Riverbed sediment samples were collected from an area with no vegetation in a hyporheic zone (subsurface groundwater - surface water mixing zone)", - "has_input": [ - "nmdc:bsm-11-ffqcqd73" - ], - "has_output": [ - "jgi:574fde6e7ded5e3df1ee140d" - ], - "part_of": [ - "nmdc:sty-11-aygzgv51" - ], - "add_date": "2016-01-11", - "mod_date": "2021-06-15", - "ncbi_project_name": "Riverbed sediment microbial communities from areas with no vegetation in Columbia River, Washington, USA - GW-RW N1_40_50", - "omics_type": { - "has_raw_value": "Metagenome" - }, - "principal_investigator": { - "has_raw_value": "James Stegen" - }, - "processing_institution": "JGI", - "type": "nmdc:OmicsProcessing", - "gold_sequencing_project_identifiers": [ - "gold:Gp0127645" - ], - "downstream_workflow_activity_records": [ + ], + "reaction_activity_set": [], + "read_qc_analysis_activity_set": [ { "_id": { "$oid": "649b009d6bdd4fd20273c86d" @@ -28286,28 +29956,10 @@ "output_read_count": 23262948, "started_at_time": "2021-10-11T02:24:42Z", "type": "nmdc:ReadQCAnalysisActivity", - "ended_at_time": "2021-10-11T04:07:11+00:00", - "output_data_objects": [ - { - "name": "Gp0127645_Filtered Reads", - "description": "Filtered Reads for Gp0127645", - "data_object_type": "Filtered Sequencing Reads", - "url": "https://data.microbiomedata.org/data/nmdc:mga0jbfx89/qa/nmdc_mga0jbfx89_filtered.fastq.gz", - "md5_checksum": "034df323b47f010f27e7c032d445a891", - "id": "nmdc:034df323b47f010f27e7c032d445a891", - "file_size_bytes": 1909192845 - }, - { - "name": "Gp0127645_Filtered Stats", - "description": "Filtered Stats for Gp0127645", - "data_object_type": "QC Statistics", - "url": "https://data.microbiomedata.org/data/nmdc:mga0jbfx89/qa/nmdc_mga0jbfx89_filterStats.txt", - "md5_checksum": "ca137bf5e2df6541425f22b5d1fec492", - "id": "nmdc:ca137bf5e2df6541425f22b5d1fec492", - "file_size_bytes": 283 - } - ] - }, + "ended_at_time": "2021-10-11T04:07:11+00:00" + } + ], + "read_based_taxonomy_analysis_activity_set": [ { "_id": { "$oid": "649b009bff710ae353f8cf43" @@ -28333,89 +29985,16 @@ "name": "ReadBased Analysis Activity for nmdc:mga0jbfx89", "started_at_time": "2021-10-11T02:24:42Z", "type": "nmdc:ReadbasedAnalysis", - "ended_at_time": "2021-10-11T04:07:11+00:00", - "output_data_objects": [ - { - "name": "Gp0127645_Gottcha2 TSV report", - "description": "Gottcha2 TSV report for Gp0127645", - "url": "https://data.microbiomedata.org/data/nmdc:mga0jbfx89/ReadbasedAnalysis/nmdc_mga0jbfx89_gottcha2_report.tsv", - "md5_checksum": "694374188ba4372344536fa26a2282b8", - "id": "nmdc:694374188ba4372344536fa26a2282b8", - "file_size_bytes": 3780 - }, - { - "name": "Gp0127645_Gottcha2 full TSV report", - "description": "Gottcha2 full TSV report for Gp0127645", - "url": "https://data.microbiomedata.org/data/nmdc:mga0jbfx89/ReadbasedAnalysis/nmdc_mga0jbfx89_gottcha2_report_full.tsv", - "md5_checksum": "e11dfa7178e8c426c7c930b57aa40377", - "id": "nmdc:e11dfa7178e8c426c7c930b57aa40377", - "file_size_bytes": 822292 - }, - { - "name": "Gp0127645_Gottcha2 Krona HTML report", - "description": "Gottcha2 Krona HTML report for Gp0127645", - "data_object_type": "GOTTCHA2 Krona Plot", - "url": "https://data.microbiomedata.org/data/nmdc:mga0jbfx89/ReadbasedAnalysis/nmdc_mga0jbfx89_gottcha2_krona.html", - "md5_checksum": "46e203465faf61780fad8f626e9ab623", - "id": "nmdc:46e203465faf61780fad8f626e9ab623", - "file_size_bytes": 236496 - }, - { - "name": "Gp0127645_Centrifuge classification TSV report", - "description": "Centrifuge classification TSV report for Gp0127645", - "data_object_type": "Centrifuge Taxonomic Classification", - "url": "https://data.microbiomedata.org/data/nmdc:mga0jbfx89/ReadbasedAnalysis/nmdc_mga0jbfx89_centrifuge_classification.tsv", - "md5_checksum": "7a6b2ded3f49663d9916eaea3e129dc7", - "id": "nmdc:7a6b2ded3f49663d9916eaea3e129dc7", - "file_size_bytes": 1699052782 - }, - { - "name": "Gp0127645_Centrifuge TSV report", - "description": "Centrifuge TSV report for Gp0127645", - "data_object_type": "Centrifuge Classification Report", - "url": "https://data.microbiomedata.org/data/nmdc:mga0jbfx89/ReadbasedAnalysis/nmdc_mga0jbfx89_centrifuge_report.tsv", - "md5_checksum": "6f8be89c7aab1c3f392b4f80c7ddf6a5", - "id": "nmdc:6f8be89c7aab1c3f392b4f80c7ddf6a5", - "file_size_bytes": 256209 - }, - { - "name": "Gp0127645_Centrifuge Krona HTML report", - "description": "Centrifuge Krona HTML report for Gp0127645", - "data_object_type": "Centrifuge Krona Plot", - "url": "https://data.microbiomedata.org/data/nmdc:mga0jbfx89/ReadbasedAnalysis/nmdc_mga0jbfx89_centrifuge_krona.html", - "md5_checksum": "4299b438a815becc8beed40fcb803e9f", - "id": "nmdc:4299b438a815becc8beed40fcb803e9f", - "file_size_bytes": 2336400 - }, - { - "name": "Gp0127645_Kraken classification TSV report", - "description": "Kraken classification TSV report for Gp0127645", - "data_object_type": "Kraken2 Taxonomic Classification", - "url": "https://data.microbiomedata.org/data/nmdc:mga0jbfx89/ReadbasedAnalysis/nmdc_mga0jbfx89_kraken2_classification.tsv", - "md5_checksum": "4ae4dbd13c7338df5c00555bc6755947", - "id": "nmdc:4ae4dbd13c7338df5c00555bc6755947", - "file_size_bytes": 1359323947 - }, - { - "name": "Gp0127645_Kraken2 TSV report", - "description": "Kraken2 TSV report for Gp0127645", - "data_object_type": "Kraken2 Classification Report", - "url": "https://data.microbiomedata.org/data/nmdc:mga0jbfx89/ReadbasedAnalysis/nmdc_mga0jbfx89_kraken2_report.tsv", - "md5_checksum": "2be07eb38d408077a55ecb48e123f7f8", - "id": "nmdc:2be07eb38d408077a55ecb48e123f7f8", - "file_size_bytes": 651624 - }, - { - "name": "Gp0127645_Kraken2 Krona HTML report", - "description": "Kraken2 Krona HTML report for Gp0127645", - "data_object_type": "Kraken2 Krona Plot", - "url": "https://data.microbiomedata.org/data/nmdc:mga0jbfx89/ReadbasedAnalysis/nmdc_mga0jbfx89_kraken2_krona.html", - "md5_checksum": "f318581f0df6e04b7ae2384f9237da06", - "id": "nmdc:f318581f0df6e04b7ae2384f9237da06", - "file_size_bytes": 3973557 - } - ] - }, + "ended_at_time": "2021-10-11T04:07:11+00:00" + } + ], + "study_set": [], + "field_research_site_set": [], + "collecting_biosamples_from_site_set": [], + "date_created": null, + "etl_software_version": null, + "pooling_set": [], + "read_based_analysis_activity_set": [ { "_id": { "$oid": "61e7197f833bcf838a700ac3" @@ -28444,353 +30023,448 @@ "name": "ReadBased Analysis Activity for nmdc:mga0jbfx89", "started_at_time": "2021-10-11T02:24:42Z", "type": "nmdc:ReadbasedAnalysis", - "ended_at_time": "2021-10-11T04:07:11+00:00", - "output_data_objects": [ - { - "name": "Gp0127645_Gottcha2 TSV report", - "description": "Gottcha2 TSV report for Gp0127645", - "url": "https://data.microbiomedata.org/data/nmdc:mga0jbfx89/ReadbasedAnalysis/nmdc_mga0jbfx89_gottcha2_report.tsv", - "md5_checksum": "694374188ba4372344536fa26a2282b8", - "id": "nmdc:694374188ba4372344536fa26a2282b8", - "file_size_bytes": 3780 - }, - { - "name": "Gp0127645_Gottcha2 full TSV report", - "description": "Gottcha2 full TSV report for Gp0127645", - "url": "https://data.microbiomedata.org/data/nmdc:mga0jbfx89/ReadbasedAnalysis/nmdc_mga0jbfx89_gottcha2_report_full.tsv", - "md5_checksum": "e11dfa7178e8c426c7c930b57aa40377", - "id": "nmdc:e11dfa7178e8c426c7c930b57aa40377", - "file_size_bytes": 822292 - }, - { - "name": "Gp0127645_Gottcha2 Krona HTML report", - "description": "Gottcha2 Krona HTML report for Gp0127645", - "data_object_type": "GOTTCHA2 Krona Plot", - "url": "https://data.microbiomedata.org/data/nmdc:mga0jbfx89/ReadbasedAnalysis/nmdc_mga0jbfx89_gottcha2_krona.html", - "md5_checksum": "46e203465faf61780fad8f626e9ab623", - "id": "nmdc:46e203465faf61780fad8f626e9ab623", - "file_size_bytes": 236496 - }, - { - "name": "Gp0127645_Centrifuge classification TSV report", - "description": "Centrifuge classification TSV report for Gp0127645", - "data_object_type": "Centrifuge Taxonomic Classification", - "url": "https://data.microbiomedata.org/data/nmdc:mga0jbfx89/ReadbasedAnalysis/nmdc_mga0jbfx89_centrifuge_classification.tsv", - "md5_checksum": "7a6b2ded3f49663d9916eaea3e129dc7", - "id": "nmdc:7a6b2ded3f49663d9916eaea3e129dc7", - "file_size_bytes": 1699052782 - }, - { - "name": "Gp0127645_Centrifuge TSV report", - "description": "Centrifuge TSV report for Gp0127645", - "data_object_type": "Centrifuge Classification Report", - "url": "https://data.microbiomedata.org/data/nmdc:mga0jbfx89/ReadbasedAnalysis/nmdc_mga0jbfx89_centrifuge_report.tsv", - "md5_checksum": "6f8be89c7aab1c3f392b4f80c7ddf6a5", - "id": "nmdc:6f8be89c7aab1c3f392b4f80c7ddf6a5", - "file_size_bytes": 256209 - }, - { - "name": "Gp0127645_Centrifuge Krona HTML report", - "description": "Centrifuge Krona HTML report for Gp0127645", - "data_object_type": "Centrifuge Krona Plot", - "url": "https://data.microbiomedata.org/data/nmdc:mga0jbfx89/ReadbasedAnalysis/nmdc_mga0jbfx89_centrifuge_krona.html", - "md5_checksum": "4299b438a815becc8beed40fcb803e9f", - "id": "nmdc:4299b438a815becc8beed40fcb803e9f", - "file_size_bytes": 2336400 - }, - { - "name": "Gp0127645_Kraken classification TSV report", - "description": "Kraken classification TSV report for Gp0127645", - "data_object_type": "Kraken2 Taxonomic Classification", - "url": "https://data.microbiomedata.org/data/nmdc:mga0jbfx89/ReadbasedAnalysis/nmdc_mga0jbfx89_kraken2_classification.tsv", - "md5_checksum": "4ae4dbd13c7338df5c00555bc6755947", - "id": "nmdc:4ae4dbd13c7338df5c00555bc6755947", - "file_size_bytes": 1359323947 - }, - { - "name": "Gp0127645_Kraken2 TSV report", - "description": "Kraken2 TSV report for Gp0127645", - "data_object_type": "Kraken2 Classification Report", - "url": "https://data.microbiomedata.org/data/nmdc:mga0jbfx89/ReadbasedAnalysis/nmdc_mga0jbfx89_kraken2_report.tsv", - "md5_checksum": "2be07eb38d408077a55ecb48e123f7f8", - "id": "nmdc:2be07eb38d408077a55ecb48e123f7f8", - "file_size_bytes": 651624 - }, - { - "name": "Gp0127645_Kraken2 Krona HTML report", - "description": "Kraken2 Krona HTML report for Gp0127645", - "data_object_type": "Kraken2 Krona Plot", - "url": "https://data.microbiomedata.org/data/nmdc:mga0jbfx89/ReadbasedAnalysis/nmdc_mga0jbfx89_kraken2_krona.html", - "md5_checksum": "f318581f0df6e04b7ae2384f9237da06", - "id": "nmdc:f318581f0df6e04b7ae2384f9237da06", - "file_size_bytes": 3973557 - } - ] + "ended_at_time": "2021-10-11T04:07:11+00:00" + } + ] + }, + { + "functional_annotation_agg": [], + "library_preparation_set": [], + "processed_sample_set": [], + "extraction_set": [], + "activity_set": [], + "biosample_set": [], + "data_object_set": [ + { + "name": "Gp0127649_Filtered Reads", + "description": "Filtered Reads for Gp0127649", + "data_object_type": "Filtered Sequencing Reads", + "url": "https://data.microbiomedata.org/data/nmdc:mga0j4fe07/qa/nmdc_mga0j4fe07_filtered.fastq.gz", + "md5_checksum": "ed0ea2f2ef6b667c5f8e60cd7d197cf5", + "id": "nmdc:ed0ea2f2ef6b667c5f8e60cd7d197cf5", + "file_size_bytes": 1967546513 }, { - "_id": { - "$oid": "649b005f2ca5ee4adb139fa4" - }, - "has_input": [ - "nmdc:034df323b47f010f27e7c032d445a891" - ], - "part_of": [ - "nmdc:mga0jbfx89" - ], - "ctg_logsum": 65663, - "scaf_logsum": 65979, - "gap_pct": 0.00097, - "git_url": "https://github.com/microbiomedata/metaG/releases/tag/0.1", - "has_output": [ - "nmdc:3685fdcfffdf34d2802c692dc0515e33", - "nmdc:7891adab80c63d98169e3cb7b4331f1e", - "nmdc:d883460ae5f8cbabc3d437e745935040", - "nmdc:f36166196caa529e09f3b93e17db3acc", - "nmdc:08a13111a5314ec4c8dbaa59790dc2f1" - ], - "asm_score": 2.823, - "was_informed_by": "gold:Gp0127645", - "ctg_powsum": 6960.932, - "scaf_max": 6924, - "id": "nmdc:f18f00c9c36689307a6b4188b7b18e32", - "scaf_powsum": 6995.401, - "execution_resource": "NERSC-Cori", - "contigs": 106865, - "name": "Assembly Activity for nmdc:mga0jbfx89", - "ctg_max": 6924, - "gc_std": 0.12472, - "contig_bp": 45473855, - "gc_avg": 0.58373, - "started_at_time": "2021-10-11T02:24:42Z", - "scaf_bp": 45474295, - "type": "nmdc:MetagenomeAssembly", - "scaffolds": 106821, - "ended_at_time": "2021-10-11T04:07:11+00:00", - "ctg_l50": 395, - "ctg_l90": 284, - "ctg_n50": 33845, - "ctg_n90": 92046, - "scaf_l50": 395, - "scaf_l90": 284, - "scaf_n50": 33825, - "scaf_n90": 92004, - "output_data_objects": [ - { - "name": "Gp0127645_Assembled contigs fasta", - "description": "Assembled contigs fasta for Gp0127645", - "data_object_type": "Assembly Contigs", - "url": "https://data.microbiomedata.org/data/nmdc:mga0jbfx89/assembly/nmdc_mga0jbfx89_contigs.fna", - "md5_checksum": "3685fdcfffdf34d2802c692dc0515e33", - "id": "nmdc:3685fdcfffdf34d2802c692dc0515e33", - "file_size_bytes": 49479236 - }, - { - "name": "Gp0127645_Assembled scaffold fasta", - "description": "Assembled scaffold fasta for Gp0127645", - "data_object_type": "Assembly Scaffolds", - "url": "https://data.microbiomedata.org/data/nmdc:mga0jbfx89/assembly/nmdc_mga0jbfx89_scaffolds.fna", - "md5_checksum": "7891adab80c63d98169e3cb7b4331f1e", - "id": "nmdc:7891adab80c63d98169e3cb7b4331f1e", - "file_size_bytes": 49157929 - }, - { - "name": "Gp0127645_Metagenome Contig Coverage Stats", - "description": "Metagenome Contig Coverage Stats for Gp0127645", - "url": "https://data.microbiomedata.org/data/nmdc:mga0jbfx89/assembly/nmdc_mga0jbfx89_covstats.txt", - "md5_checksum": "d883460ae5f8cbabc3d437e745935040", - "id": "nmdc:d883460ae5f8cbabc3d437e745935040", - "file_size_bytes": 8394481 - }, - { - "name": "Gp0127645_Assembled AGP file", - "description": "Assembled AGP file for Gp0127645", - "data_object_type": "Assembly AGP", - "url": "https://data.microbiomedata.org/data/nmdc:mga0jbfx89/assembly/nmdc_mga0jbfx89_assembly.agp", - "md5_checksum": "f36166196caa529e09f3b93e17db3acc", - "id": "nmdc:f36166196caa529e09f3b93e17db3acc", - "file_size_bytes": 7804199 - }, - { - "name": "Gp0127645_Metagenome Alignment BAM file", - "description": "Metagenome Alignment BAM file for Gp0127645", - "data_object_type": "Assembly Coverage BAM", - "url": "https://data.microbiomedata.org/data/nmdc:mga0jbfx89/assembly/nmdc_mga0jbfx89_pairedMapped_sorted.bam", - "md5_checksum": "08a13111a5314ec4c8dbaa59790dc2f1", - "id": "nmdc:08a13111a5314ec4c8dbaa59790dc2f1", - "file_size_bytes": 2047004915 - } - ] + "name": "Gp0127649_Filtered Stats", + "description": "Filtered Stats for Gp0127649", + "data_object_type": "QC Statistics", + "url": "https://data.microbiomedata.org/data/nmdc:mga0j4fe07/qa/nmdc_mga0j4fe07_filterStats.txt", + "md5_checksum": "25a7ff469ffae5906d6ade4d74cab88f", + "id": "nmdc:25a7ff469ffae5906d6ade4d74cab88f", + "file_size_bytes": 283 }, { - "_id": { - "$oid": "649b005bbf2caae0415ef9b6" - }, - "has_input": [ - "nmdc:3685fdcfffdf34d2802c692dc0515e33" - ], - "part_of": [ - "nmdc:mga0jbfx89" - ], - "git_url": "https://github.com/microbiomedata/metaG/releases/tag/0.1", - "has_output": [ - "nmdc:b14fecfaa99eaad42128e409aa7ae3ec", - "nmdc:851584f7bcec80cddec4b113fe6cfcea", - "nmdc:d0280881c70c54946d9b5170e62b904b", - "nmdc:7c1894478af7b8205bb4760acb93c353", - "nmdc:ac413560dfdbcea1f0697391b593c552", - "nmdc:80f846ff418e4758f4c6b9a96ba2b8ca", - "nmdc:bbfcd35137b7cb018945a531704805eb", - "nmdc:c1c10952c472a97fb7de8bc7dbce564b", - "nmdc:b86dba5a29f4ca25cec7c0590e0b4771", - "nmdc:a701026580285ca67816cb9a2f272ca6", - "nmdc:5ce71fa6aebdb4fb9f843e89ab53ca9b", - "nmdc:47c0e39e60bd4d688a29ede2af2cee35" - ], - "was_informed_by": "gold:Gp0127645", - "id": "nmdc:f18f00c9c36689307a6b4188b7b18e32", - "execution_resource": "NERSC-Cori", - "name": "Annotation Activity for nmdc:mga0jbfx89", - "started_at_time": "2021-10-11T02:24:42Z", - "type": "nmdc:MetagenomeAnnotationActivity", - "ended_at_time": "2021-10-11T04:07:11+00:00", - "output_data_objects": [ - { - "name": "Gp0127645_Protein FAA", - "description": "Protein FAA for Gp0127645", - "data_object_type": "Annotation Amino Acid FASTA", - "url": "https://data.microbiomedata.org/data/nmdc:mga0jbfx89/annotation/nmdc_mga0jbfx89_proteins.faa", - "md5_checksum": "b14fecfaa99eaad42128e409aa7ae3ec", - "id": "nmdc:b14fecfaa99eaad42128e409aa7ae3ec", - "file_size_bytes": 29015561 - }, - { - "name": "Gp0127645_Structural annotation GFF file", - "description": "Structural annotation GFF file for Gp0127645", - "data_object_type": "Structural Annotation GFF", - "url": "https://data.microbiomedata.org/data/nmdc:mga0jbfx89/annotation/nmdc_mga0jbfx89_structural_annotation.gff", - "md5_checksum": "851584f7bcec80cddec4b113fe6cfcea", - "id": "nmdc:851584f7bcec80cddec4b113fe6cfcea", - "file_size_bytes": 2506 - }, - { - "name": "Gp0127645_Functional annotation GFF file", - "description": "Functional annotation GFF file for Gp0127645", - "data_object_type": "Functional Annotation GFF", - "url": "https://data.microbiomedata.org/data/nmdc:mga0jbfx89/annotation/nmdc_mga0jbfx89_functional_annotation.gff", - "md5_checksum": "d0280881c70c54946d9b5170e62b904b", - "id": "nmdc:d0280881c70c54946d9b5170e62b904b", - "file_size_bytes": 34124039 - }, - { - "name": "Gp0127645_KO TSV file", - "description": "KO TSV file for Gp0127645", - "data_object_type": "Annotation KEGG Orthology", - "url": "https://data.microbiomedata.org/data/nmdc:mga0jbfx89/annotation/nmdc_mga0jbfx89_ko.tsv", - "md5_checksum": "7c1894478af7b8205bb4760acb93c353", - "id": "nmdc:7c1894478af7b8205bb4760acb93c353", - "file_size_bytes": 3942110 - }, - { - "name": "Gp0127645_EC TSV file", - "description": "EC TSV file for Gp0127645", - "data_object_type": "Annotation Enzyme Commission", - "url": "https://data.microbiomedata.org/data/nmdc:mga0jbfx89/annotation/nmdc_mga0jbfx89_ec.tsv", - "md5_checksum": "ac413560dfdbcea1f0697391b593c552", - "id": "nmdc:ac413560dfdbcea1f0697391b593c552", - "file_size_bytes": 2691460 - }, - { - "name": "Gp0127645_COG GFF file", - "description": "COG GFF file for Gp0127645", - "url": "https://data.microbiomedata.org/data/nmdc:mga0jbfx89/annotation/nmdc_mga0jbfx89_cog.gff", - "md5_checksum": "80f846ff418e4758f4c6b9a96ba2b8ca", - "id": "nmdc:80f846ff418e4758f4c6b9a96ba2b8ca", - "file_size_bytes": 19597211 - }, - { - "name": "Gp0127645_PFAM GFF file", - "description": "PFAM GFF file for Gp0127645", - "url": "https://data.microbiomedata.org/data/nmdc:mga0jbfx89/annotation/nmdc_mga0jbfx89_pfam.gff", - "md5_checksum": "bbfcd35137b7cb018945a531704805eb", - "id": "nmdc:bbfcd35137b7cb018945a531704805eb", - "file_size_bytes": 14110039 - }, - { - "name": "Gp0127645_TigrFam GFF file", - "description": "TigrFam GFF file for Gp0127645", - "url": "https://data.microbiomedata.org/data/nmdc:mga0jbfx89/annotation/nmdc_mga0jbfx89_tigrfam.gff", - "md5_checksum": "c1c10952c472a97fb7de8bc7dbce564b", - "id": "nmdc:c1c10952c472a97fb7de8bc7dbce564b", - "file_size_bytes": 1502814 - }, - { - "name": "Gp0127645_SMART GFF file", - "description": "SMART GFF file for Gp0127645", - "url": "https://data.microbiomedata.org/data/nmdc:mga0jbfx89/annotation/nmdc_mga0jbfx89_smart.gff", - "md5_checksum": "b86dba5a29f4ca25cec7c0590e0b4771", - "id": "nmdc:b86dba5a29f4ca25cec7c0590e0b4771", - "file_size_bytes": 4354176 - }, - { - "name": "Gp0127645_SuperFam GFF file", - "description": "SuperFam GFF file for Gp0127645", - "url": "https://data.microbiomedata.org/data/nmdc:mga0jbfx89/annotation/nmdc_mga0jbfx89_supfam.gff", - "md5_checksum": "a701026580285ca67816cb9a2f272ca6", - "id": "nmdc:a701026580285ca67816cb9a2f272ca6", - "file_size_bytes": 24911282 - }, - { - "name": "Gp0127645_Cath FunFam GFF file", - "description": "Cath FunFam GFF file for Gp0127645", - "url": "https://data.microbiomedata.org/data/nmdc:mga0jbfx89/annotation/nmdc_mga0jbfx89_cath_funfam.gff", - "md5_checksum": "5ce71fa6aebdb4fb9f843e89ab53ca9b", - "id": "nmdc:5ce71fa6aebdb4fb9f843e89ab53ca9b", - "file_size_bytes": 18832113 - }, - { - "name": "Gp0127645_KO_EC GFF file", - "description": "KO_EC GFF file for Gp0127645", - "url": "https://data.microbiomedata.org/data/nmdc:mga0jbfx89/annotation/nmdc_mga0jbfx89_ko_ec.gff", - "md5_checksum": "47c0e39e60bd4d688a29ede2af2cee35", - "id": "nmdc:47c0e39e60bd4d688a29ede2af2cee35", - "file_size_bytes": 12581509 - } - ] + "name": "Gp0127649_Gottcha2 TSV report", + "description": "Gottcha2 TSV report for Gp0127649", + "url": "https://data.microbiomedata.org/data/nmdc:mga0j4fe07/ReadbasedAnalysis/nmdc_mga0j4fe07_gottcha2_report.tsv", + "md5_checksum": "c30cb5928ad608e7c8fe1ce77d81933a", + "id": "nmdc:c30cb5928ad608e7c8fe1ce77d81933a", + "file_size_bytes": 2079 + }, + { + "name": "Gp0127649_Gottcha2 full TSV report", + "description": "Gottcha2 full TSV report for Gp0127649", + "url": "https://data.microbiomedata.org/data/nmdc:mga0j4fe07/ReadbasedAnalysis/nmdc_mga0j4fe07_gottcha2_report_full.tsv", + "md5_checksum": "4aa159b1ee973c6e3e309ef60d351018", + "id": "nmdc:4aa159b1ee973c6e3e309ef60d351018", + "file_size_bytes": 642861 + }, + { + "name": "Gp0127649_Gottcha2 Krona HTML report", + "description": "Gottcha2 Krona HTML report for Gp0127649", + "data_object_type": "GOTTCHA2 Krona Plot", + "url": "https://data.microbiomedata.org/data/nmdc:mga0j4fe07/ReadbasedAnalysis/nmdc_mga0j4fe07_gottcha2_krona.html", + "md5_checksum": "8c1683fa4041bd10711aa3beb4735811", + "id": "nmdc:8c1683fa4041bd10711aa3beb4735811", + "file_size_bytes": 230792 + }, + { + "name": "Gp0127649_Centrifuge classification TSV report", + "description": "Centrifuge classification TSV report for Gp0127649", + "data_object_type": "Centrifuge Taxonomic Classification", + "url": "https://data.microbiomedata.org/data/nmdc:mga0j4fe07/ReadbasedAnalysis/nmdc_mga0j4fe07_centrifuge_classification.tsv", + "md5_checksum": "b8be7144441cbd6fbe4a8193f9e055ab", + "id": "nmdc:b8be7144441cbd6fbe4a8193f9e055ab", + "file_size_bytes": 1743695420 + }, + { + "name": "Gp0127649_Centrifuge TSV report", + "description": "Centrifuge TSV report for Gp0127649", + "data_object_type": "Centrifuge Classification Report", + "url": "https://data.microbiomedata.org/data/nmdc:mga0j4fe07/ReadbasedAnalysis/nmdc_mga0j4fe07_centrifuge_report.tsv", + "md5_checksum": "d4f57641e41f0249f3fde7b973289cf5", + "id": "nmdc:d4f57641e41f0249f3fde7b973289cf5", + "file_size_bytes": 254036 + }, + { + "name": "Gp0127649_Centrifuge Krona HTML report", + "description": "Centrifuge Krona HTML report for Gp0127649", + "data_object_type": "Centrifuge Krona Plot", + "url": "https://data.microbiomedata.org/data/nmdc:mga0j4fe07/ReadbasedAnalysis/nmdc_mga0j4fe07_centrifuge_krona.html", + "md5_checksum": "4e9ec619c5611cb0166ea127496fadeb", + "id": "nmdc:4e9ec619c5611cb0166ea127496fadeb", + "file_size_bytes": 2332943 + }, + { + "name": "Gp0127649_Kraken classification TSV report", + "description": "Kraken classification TSV report for Gp0127649", + "data_object_type": "Kraken2 Taxonomic Classification", + "url": "https://data.microbiomedata.org/data/nmdc:mga0j4fe07/ReadbasedAnalysis/nmdc_mga0j4fe07_kraken2_classification.tsv", + "md5_checksum": "ed2b2495ca211e17298ca2e212fe3811", + "id": "nmdc:ed2b2495ca211e17298ca2e212fe3811", + "file_size_bytes": 1387669799 + }, + { + "name": "Gp0127649_Kraken2 TSV report", + "description": "Kraken2 TSV report for Gp0127649", + "data_object_type": "Kraken2 Classification Report", + "url": "https://data.microbiomedata.org/data/nmdc:mga0j4fe07/ReadbasedAnalysis/nmdc_mga0j4fe07_kraken2_report.tsv", + "md5_checksum": "05d35fc4e391296ff0e716c3fcbbee89", + "id": "nmdc:05d35fc4e391296ff0e716c3fcbbee89", + "file_size_bytes": 637131 + }, + { + "name": "Gp0127649_Kraken2 Krona HTML report", + "description": "Kraken2 Krona HTML report for Gp0127649", + "data_object_type": "Kraken2 Krona Plot", + "url": "https://data.microbiomedata.org/data/nmdc:mga0j4fe07/ReadbasedAnalysis/nmdc_mga0j4fe07_kraken2_krona.html", + "md5_checksum": "0d07551972f3230ec2ef4a0e04929b97", + "id": "nmdc:0d07551972f3230ec2ef4a0e04929b97", + "file_size_bytes": 3976407 + }, + { + "name": "Gp0127649_Gottcha2 TSV report", + "description": "Gottcha2 TSV report for Gp0127649", + "url": "https://data.microbiomedata.org/data/nmdc:mga0j4fe07/ReadbasedAnalysis/nmdc_mga0j4fe07_gottcha2_report.tsv", + "md5_checksum": "c30cb5928ad608e7c8fe1ce77d81933a", + "id": "nmdc:c30cb5928ad608e7c8fe1ce77d81933a", + "file_size_bytes": 2079 + }, + { + "name": "Gp0127649_Gottcha2 full TSV report", + "description": "Gottcha2 full TSV report for Gp0127649", + "url": "https://data.microbiomedata.org/data/nmdc:mga0j4fe07/ReadbasedAnalysis/nmdc_mga0j4fe07_gottcha2_report_full.tsv", + "md5_checksum": "4aa159b1ee973c6e3e309ef60d351018", + "id": "nmdc:4aa159b1ee973c6e3e309ef60d351018", + "file_size_bytes": 642861 + }, + { + "name": "Gp0127649_Gottcha2 Krona HTML report", + "description": "Gottcha2 Krona HTML report for Gp0127649", + "data_object_type": "GOTTCHA2 Krona Plot", + "url": "https://data.microbiomedata.org/data/nmdc:mga0j4fe07/ReadbasedAnalysis/nmdc_mga0j4fe07_gottcha2_krona.html", + "md5_checksum": "8c1683fa4041bd10711aa3beb4735811", + "id": "nmdc:8c1683fa4041bd10711aa3beb4735811", + "file_size_bytes": 230792 + }, + { + "name": "Gp0127649_Centrifuge classification TSV report", + "description": "Centrifuge classification TSV report for Gp0127649", + "data_object_type": "Centrifuge Taxonomic Classification", + "url": "https://data.microbiomedata.org/data/nmdc:mga0j4fe07/ReadbasedAnalysis/nmdc_mga0j4fe07_centrifuge_classification.tsv", + "md5_checksum": "b8be7144441cbd6fbe4a8193f9e055ab", + "id": "nmdc:b8be7144441cbd6fbe4a8193f9e055ab", + "file_size_bytes": 1743695420 + }, + { + "name": "Gp0127649_Centrifuge TSV report", + "description": "Centrifuge TSV report for Gp0127649", + "data_object_type": "Centrifuge Classification Report", + "url": "https://data.microbiomedata.org/data/nmdc:mga0j4fe07/ReadbasedAnalysis/nmdc_mga0j4fe07_centrifuge_report.tsv", + "md5_checksum": "d4f57641e41f0249f3fde7b973289cf5", + "id": "nmdc:d4f57641e41f0249f3fde7b973289cf5", + "file_size_bytes": 254036 + }, + { + "name": "Gp0127649_Centrifuge Krona HTML report", + "description": "Centrifuge Krona HTML report for Gp0127649", + "data_object_type": "Centrifuge Krona Plot", + "url": "https://data.microbiomedata.org/data/nmdc:mga0j4fe07/ReadbasedAnalysis/nmdc_mga0j4fe07_centrifuge_krona.html", + "md5_checksum": "4e9ec619c5611cb0166ea127496fadeb", + "id": "nmdc:4e9ec619c5611cb0166ea127496fadeb", + "file_size_bytes": 2332943 + }, + { + "name": "Gp0127649_Kraken classification TSV report", + "description": "Kraken classification TSV report for Gp0127649", + "data_object_type": "Kraken2 Taxonomic Classification", + "url": "https://data.microbiomedata.org/data/nmdc:mga0j4fe07/ReadbasedAnalysis/nmdc_mga0j4fe07_kraken2_classification.tsv", + "md5_checksum": "ed2b2495ca211e17298ca2e212fe3811", + "id": "nmdc:ed2b2495ca211e17298ca2e212fe3811", + "file_size_bytes": 1387669799 + }, + { + "name": "Gp0127649_Kraken2 TSV report", + "description": "Kraken2 TSV report for Gp0127649", + "data_object_type": "Kraken2 Classification Report", + "url": "https://data.microbiomedata.org/data/nmdc:mga0j4fe07/ReadbasedAnalysis/nmdc_mga0j4fe07_kraken2_report.tsv", + "md5_checksum": "05d35fc4e391296ff0e716c3fcbbee89", + "id": "nmdc:05d35fc4e391296ff0e716c3fcbbee89", + "file_size_bytes": 637131 + }, + { + "name": "Gp0127649_Kraken2 Krona HTML report", + "description": "Kraken2 Krona HTML report for Gp0127649", + "data_object_type": "Kraken2 Krona Plot", + "url": "https://data.microbiomedata.org/data/nmdc:mga0j4fe07/ReadbasedAnalysis/nmdc_mga0j4fe07_kraken2_krona.html", + "md5_checksum": "0d07551972f3230ec2ef4a0e04929b97", + "id": "nmdc:0d07551972f3230ec2ef4a0e04929b97", + "file_size_bytes": 3976407 + }, + { + "name": "Gp0127649_Assembled contigs fasta", + "description": "Assembled contigs fasta for Gp0127649", + "data_object_type": "Assembly Contigs", + "url": "https://data.microbiomedata.org/data/nmdc:mga0j4fe07/assembly/nmdc_mga0j4fe07_contigs.fna", + "md5_checksum": "5ada15f24d3de4a96521532a4ced6018", + "id": "nmdc:5ada15f24d3de4a96521532a4ced6018", + "file_size_bytes": 94852732 + }, + { + "name": "Gp0127649_Assembled scaffold fasta", + "description": "Assembled scaffold fasta for Gp0127649", + "data_object_type": "Assembly Scaffolds", + "url": "https://data.microbiomedata.org/data/nmdc:mga0j4fe07/assembly/nmdc_mga0j4fe07_scaffolds.fna", + "md5_checksum": "fc32ae27239661670605b59c395dd770", + "id": "nmdc:fc32ae27239661670605b59c395dd770", + "file_size_bytes": 94280572 + }, + { + "name": "Gp0127649_Metagenome Contig Coverage Stats", + "description": "Metagenome Contig Coverage Stats for Gp0127649", + "url": "https://data.microbiomedata.org/data/nmdc:mga0j4fe07/assembly/nmdc_mga0j4fe07_covstats.txt", + "md5_checksum": "d6e996af3275c4cdd3e51376517e2b6b", + "id": "nmdc:d6e996af3275c4cdd3e51376517e2b6b", + "file_size_bytes": 15029734 + }, + { + "name": "Gp0127649_Assembled AGP file", + "description": "Assembled AGP file for Gp0127649", + "data_object_type": "Assembly AGP", + "url": "https://data.microbiomedata.org/data/nmdc:mga0j4fe07/assembly/nmdc_mga0j4fe07_assembly.agp", + "md5_checksum": "f52600933fc5a09f7cead5c065d6b100", + "id": "nmdc:f52600933fc5a09f7cead5c065d6b100", + "file_size_bytes": 14057243 + }, + { + "name": "Gp0127649_Metagenome Alignment BAM file", + "description": "Metagenome Alignment BAM file for Gp0127649", + "data_object_type": "Assembly Coverage BAM", + "url": "https://data.microbiomedata.org/data/nmdc:mga0j4fe07/assembly/nmdc_mga0j4fe07_pairedMapped_sorted.bam", + "md5_checksum": "5d9826a5f5164cfe20bfc1343144c96f", + "id": "nmdc:5d9826a5f5164cfe20bfc1343144c96f", + "file_size_bytes": 2147322298 + }, + { + "name": "Gp0127649_Protein FAA", + "description": "Protein FAA for Gp0127649", + "data_object_type": "Annotation Amino Acid FASTA", + "url": "https://data.microbiomedata.org/data/nmdc:mga0j4fe07/annotation/nmdc_mga0j4fe07_proteins.faa", + "md5_checksum": "4e5d87bb4bb3198f5b9955622a781376", + "id": "nmdc:4e5d87bb4bb3198f5b9955622a781376", + "file_size_bytes": 55301156 + }, + { + "name": "Gp0127649_Structural annotation GFF file", + "description": "Structural annotation GFF file for Gp0127649", + "data_object_type": "Structural Annotation GFF", + "url": "https://data.microbiomedata.org/data/nmdc:mga0j4fe07/annotation/nmdc_mga0j4fe07_structural_annotation.gff", + "md5_checksum": "40f79a8b021a3de27c464087fad9f092", + "id": "nmdc:40f79a8b021a3de27c464087fad9f092", + "file_size_bytes": 2518 + }, + { + "name": "Gp0127649_Functional annotation GFF file", + "description": "Functional annotation GFF file for Gp0127649", + "data_object_type": "Functional Annotation GFF", + "url": "https://data.microbiomedata.org/data/nmdc:mga0j4fe07/annotation/nmdc_mga0j4fe07_functional_annotation.gff", + "md5_checksum": "aba74592cf7aa507179e9544c008a0ec", + "id": "nmdc:aba74592cf7aa507179e9544c008a0ec", + "file_size_bytes": 63464973 + }, + { + "name": "Gp0127649_KO TSV file", + "description": "KO TSV file for Gp0127649", + "data_object_type": "Annotation KEGG Orthology", + "url": "https://data.microbiomedata.org/data/nmdc:mga0j4fe07/annotation/nmdc_mga0j4fe07_ko.tsv", + "md5_checksum": "29500fc3a86f2767cc3752ba02fa0a05", + "id": "nmdc:29500fc3a86f2767cc3752ba02fa0a05", + "file_size_bytes": 7317450 + }, + { + "name": "Gp0127649_EC TSV file", + "description": "EC TSV file for Gp0127649", + "data_object_type": "Annotation Enzyme Commission", + "url": "https://data.microbiomedata.org/data/nmdc:mga0j4fe07/annotation/nmdc_mga0j4fe07_ec.tsv", + "md5_checksum": "ba8fedc9b57d401ad0cc2b329038c5a9", + "id": "nmdc:ba8fedc9b57d401ad0cc2b329038c5a9", + "file_size_bytes": 4888576 + }, + { + "name": "Gp0127649_COG GFF file", + "description": "COG GFF file for Gp0127649", + "url": "https://data.microbiomedata.org/data/nmdc:mga0j4fe07/annotation/nmdc_mga0j4fe07_cog.gff", + "md5_checksum": "66bd5f2b62818742c6df5c39d1952a99", + "id": "nmdc:66bd5f2b62818742c6df5c39d1952a99", + "file_size_bytes": 36960882 + }, + { + "name": "Gp0127649_PFAM GFF file", + "description": "PFAM GFF file for Gp0127649", + "url": "https://data.microbiomedata.org/data/nmdc:mga0j4fe07/annotation/nmdc_mga0j4fe07_pfam.gff", + "md5_checksum": "e60c77fb34f71861ceacf988074949af", + "id": "nmdc:e60c77fb34f71861ceacf988074949af", + "file_size_bytes": 27535342 + }, + { + "name": "Gp0127649_TigrFam GFF file", + "description": "TigrFam GFF file for Gp0127649", + "url": "https://data.microbiomedata.org/data/nmdc:mga0j4fe07/annotation/nmdc_mga0j4fe07_tigrfam.gff", + "md5_checksum": "3738ab59fb56002a9f38d95b101957bd", + "id": "nmdc:3738ab59fb56002a9f38d95b101957bd", + "file_size_bytes": 2999247 + }, + { + "name": "Gp0127649_SMART GFF file", + "description": "SMART GFF file for Gp0127649", + "url": "https://data.microbiomedata.org/data/nmdc:mga0j4fe07/annotation/nmdc_mga0j4fe07_smart.gff", + "md5_checksum": "2f34c5db7846cbf37add471c0dbca951", + "id": "nmdc:2f34c5db7846cbf37add471c0dbca951", + "file_size_bytes": 8199823 + }, + { + "name": "Gp0127649_SuperFam GFF file", + "description": "SuperFam GFF file for Gp0127649", + "url": "https://data.microbiomedata.org/data/nmdc:mga0j4fe07/annotation/nmdc_mga0j4fe07_supfam.gff", + "md5_checksum": "fa7f659afca037861ae65e08092f2d83", + "id": "nmdc:fa7f659afca037861ae65e08092f2d83", + "file_size_bytes": 46114509 + }, + { + "name": "Gp0127649_Cath FunFam GFF file", + "description": "Cath FunFam GFF file for Gp0127649", + "url": "https://data.microbiomedata.org/data/nmdc:mga0j4fe07/annotation/nmdc_mga0j4fe07_cath_funfam.gff", + "md5_checksum": "9ee627031c0b425974fa1aa4d695d4ae", + "id": "nmdc:9ee627031c0b425974fa1aa4d695d4ae", + "file_size_bytes": 34807554 + }, + { + "name": "Gp0127649_KO_EC GFF file", + "description": "KO_EC GFF file for Gp0127649", + "url": "https://data.microbiomedata.org/data/nmdc:mga0j4fe07/annotation/nmdc_mga0j4fe07_ko_ec.gff", + "md5_checksum": "2fc423fd55e34d3400c9a6924df67633", + "id": "nmdc:2fc423fd55e34d3400c9a6924df67633", + "file_size_bytes": 23276630 + }, + { + "name": "gold:Gp0452679_metabat2 bin checkm quality assessment result", + "description": "metabat2 bin checkm quality assessment result for gold:Gp0452679", + "data_object_type": "CheckM Statistics", + "url": "https://data.microbiomedata.org/data/nmdc:mga0fsjy84/MAGs/nmdc_mga0fsjy84_checkm_qa.out", + "md5_checksum": "d41d8cd98f00b204e9800998ecf8427e", + "id": "nmdc:d41d8cd98f00b204e9800998ecf8427e", + "file_size_bytes": 0 + }, + { + "name": "Gp0127649_tooShort (< 3kb) filtered contigs fasta file by metaBat2", + "description": "tooShort (< 3kb) filtered contigs fasta file by metaBat2 for Gp0127649", + "url": "https://data.microbiomedata.org/data/nmdc:mga0j4fe07/MAGs/nmdc_mga0j4fe07_bins.tooShort.fa", + "md5_checksum": "f84d25fee16a4dece54f5580d893ecaa", + "id": "nmdc:f84d25fee16a4dece54f5580d893ecaa", + "file_size_bytes": 79592416 + }, + { + "name": "Gp0127649_unbinned fasta file from metabat2", + "description": "unbinned fasta file from metabat2 for Gp0127649", + "url": "https://data.microbiomedata.org/data/nmdc:mga0j4fe07/MAGs/nmdc_mga0j4fe07_bins.unbinned.fa", + "md5_checksum": "ed61fb0056b08bc82f4545c49b744c2a", + "id": "nmdc:ed61fb0056b08bc82f4545c49b744c2a", + "file_size_bytes": 14383032 + }, + { + "name": "Gp0127649_metabat2 bin checkm quality assessment result", + "description": "metabat2 bin checkm quality assessment result for Gp0127649", + "data_object_type": "CheckM Statistics", + "url": "https://data.microbiomedata.org/data/nmdc:mga0j4fe07/MAGs/nmdc_mga0j4fe07_checkm_qa.out", + "md5_checksum": "8d4bce832a16affbcc3efeb8364e8eaa", + "id": "nmdc:8d4bce832a16affbcc3efeb8364e8eaa", + "file_size_bytes": 942 + }, + { + "name": "Gp0127649_high-quality and medium-quality bins", + "description": "high-quality and medium-quality bins for Gp0127649", + "data_object_type": "Metagenome Bins", + "url": "https://data.microbiomedata.org/data/nmdc:mga0j4fe07/MAGs/nmdc_mga0j4fe07_hqmq_bin.zip", + "md5_checksum": "40273505b8b3dddd3ee5cb5c83871067", + "id": "nmdc:40273505b8b3dddd3ee5cb5c83871067", + "file_size_bytes": 182 }, + { + "name": "Gp0127649_metabat2 bins", + "description": "metabat2 bins for Gp0127649", + "url": "https://data.microbiomedata.org/data/nmdc:mga0j4fe07/MAGs/nmdc_mga0j4fe07_metabat_bin.zip", + "md5_checksum": "b767f2b59d0fd9e650914e140cacf104", + "id": "nmdc:b767f2b59d0fd9e650914e140cacf104", + "file_size_bytes": 269239 + } + ], + "dissolving_activity_set": [], + "functional_annotation_set": [], + "genome_feature_set": [], + "mags_activity_set": [ { "_id": { - "$oid": "649b0052ec087f6bbab34717" + "$oid": "649b0052ec087f6bbab34729" }, "has_input": [ - "nmdc:3685fdcfffdf34d2802c692dc0515e33", - "nmdc:08a13111a5314ec4c8dbaa59790dc2f1", - "nmdc:d0280881c70c54946d9b5170e62b904b" + "nmdc:5ada15f24d3de4a96521532a4ced6018", + "nmdc:5d9826a5f5164cfe20bfc1343144c96f", + "nmdc:aba74592cf7aa507179e9544c008a0ec" ], - "too_short_contig_num": 102729, + "too_short_contig_num": 180499, "part_of": [ - "nmdc:mga0jbfx89" + "nmdc:mga0j4fe07" ], - "binned_contig_num": 61, + "binned_contig_num": 211, "git_url": "https://github.com/microbiomedata/metaG/releases/tag/0.1", "has_output": [ "nmdc:d41d8cd98f00b204e9800998ecf8427e", - "nmdc:47d1233f5afdd7b00790ac2ca8be778a", - "nmdc:637bc2394dcb4869149370683ccc9e61", - "nmdc:a8e49a136701e388199a72f02bb6d288", - "nmdc:b0d2597d04809508e9dd0bcb48c7edad", - "nmdc:106983a66b58a2d07f0592d9379ad635" + "nmdc:f84d25fee16a4dece54f5580d893ecaa", + "nmdc:ed61fb0056b08bc82f4545c49b744c2a", + "nmdc:8d4bce832a16affbcc3efeb8364e8eaa", + "nmdc:40273505b8b3dddd3ee5cb5c83871067", + "nmdc:b767f2b59d0fd9e650914e140cacf104" ], - "was_informed_by": "gold:Gp0127645", - "input_contig_num": 106865, - "id": "nmdc:f18f00c9c36689307a6b4188b7b18e32", + "was_informed_by": "gold:Gp0127649", + "input_contig_num": 190009, + "id": "nmdc:74b40db9b9eef8519a02f49fe6034be5", "execution_resource": "NERSC-Cori", - "name": "MAGs Analysis Activity for nmdc:mga0jbfx89", + "name": "MAGs Analysis Activity for nmdc:mga0j4fe07", "mags_list": [ { - "number_of_contig": 61, - "completeness": 18.77, + "number_of_contig": 64, + "completeness": 16.46, "bin_name": "bins.1", - "gene_count": 307, + "gene_count": 305, + "bin_quality": "LQ", + "gtdbtk_species": "", + "gtdbtk_order": "", + "num_16s": 0, + "gtdbtk_family": "", + "gtdbtk_domain": "", + "contamination": 0.47, + "gtdbtk_class": "", + "gtdbtk_phylum": "", + "num_5s": 0, + "num_23s": 0, + "gtdbtk_genus": "", + "num_t_rna": 4 + }, + { + "number_of_contig": 147, + "completeness": 19.16, + "bin_name": "bins.2", + "gene_count": 744, "bin_quality": "LQ", "gtdbtk_species": "", "gtdbtk_order": "", @@ -28803,649 +30477,714 @@ "num_5s": 0, "num_23s": 0, "gtdbtk_genus": "", - "num_t_rna": 7 + "num_t_rna": 15 } ], - "unbinned_contig_num": 4075, - "started_at_time": "2021-10-11T02:24:42Z", + "unbinned_contig_num": 9299, + "started_at_time": "2021-10-11T02:23:29Z", "type": "nmdc:MAGsAnalysisActivity", - "ended_at_time": "2021-10-11T04:07:11+00:00", - "output_data_objects": [ - { - "name": "gold:Gp0452679_metabat2 bin checkm quality assessment result", - "description": "metabat2 bin checkm quality assessment result for gold:Gp0452679", - "data_object_type": "CheckM Statistics", - "url": "https://data.microbiomedata.org/data/nmdc:mga0fsjy84/MAGs/nmdc_mga0fsjy84_checkm_qa.out", - "md5_checksum": "d41d8cd98f00b204e9800998ecf8427e", - "id": "nmdc:d41d8cd98f00b204e9800998ecf8427e", - "file_size_bytes": 0 - }, - { - "name": "Gp0127645_tooShort (< 3kb) filtered contigs fasta file by metaBat2", - "description": "tooShort (< 3kb) filtered contigs fasta file by metaBat2 for Gp0127645", - "url": "https://data.microbiomedata.org/data/nmdc:mga0jbfx89/MAGs/nmdc_mga0jbfx89_bins.tooShort.fa", - "md5_checksum": "47d1233f5afdd7b00790ac2ca8be778a", - "id": "nmdc:47d1233f5afdd7b00790ac2ca8be778a", - "file_size_bytes": 43078346 - }, - { - "name": "Gp0127645_unbinned fasta file from metabat2", - "description": "unbinned fasta file from metabat2 for Gp0127645", - "url": "https://data.microbiomedata.org/data/nmdc:mga0jbfx89/MAGs/nmdc_mga0jbfx89_bins.unbinned.fa", - "md5_checksum": "637bc2394dcb4869149370683ccc9e61", - "id": "nmdc:637bc2394dcb4869149370683ccc9e61", - "file_size_bytes": 6153132 - }, - { - "name": "Gp0127645_metabat2 bin checkm quality assessment result", - "description": "metabat2 bin checkm quality assessment result for Gp0127645", - "data_object_type": "CheckM Statistics", - "url": "https://data.microbiomedata.org/data/nmdc:mga0jbfx89/MAGs/nmdc_mga0jbfx89_checkm_qa.out", - "md5_checksum": "a8e49a136701e388199a72f02bb6d288", - "id": "nmdc:a8e49a136701e388199a72f02bb6d288", - "file_size_bytes": 765 - }, - { - "name": "Gp0127645_high-quality and medium-quality bins", - "description": "high-quality and medium-quality bins for Gp0127645", - "data_object_type": "Metagenome Bins", - "url": "https://data.microbiomedata.org/data/nmdc:mga0jbfx89/MAGs/nmdc_mga0jbfx89_hqmq_bin.zip", - "md5_checksum": "b0d2597d04809508e9dd0bcb48c7edad", - "id": "nmdc:b0d2597d04809508e9dd0bcb48c7edad", - "file_size_bytes": 182 - }, - { - "name": "Gp0127645_metabat2 bins", - "description": "metabat2 bins for Gp0127645", - "url": "https://data.microbiomedata.org/data/nmdc:mga0jbfx89/MAGs/nmdc_mga0jbfx89_metabat_bin.zip", - "md5_checksum": "106983a66b58a2d07f0592d9379ad635", - "id": "nmdc:106983a66b58a2d07f0592d9379ad635", - "file_size_bytes": 76018 - } - ] + "ended_at_time": "2021-10-11T03:38:32+00:00" } - ] - }, - { - "_id": { - "$oid": "649b009773e8249959349b5e" - }, - "id": "nmdc:omprc-11-dw7shd52", - "name": "Riverbed sediment microbial communities from areas with vegetation nearby in Columbia River, Washington, USA - GW-RW S2_50_60", - "description": "Riverbed sediment samples were collected from an area with a lot of surrounding vegetation in a hyporheic zone (subsurface groundwater - surface water mixing zone)", - "has_input": [ - "nmdc:bsm-11-7fedgs13" - ], - "has_output": [ - "jgi:574fde8c7ded5e3df1ee1424" - ], - "part_of": [ - "nmdc:sty-11-aygzgv51" - ], - "add_date": "2016-01-11", - "mod_date": "2021-06-15", - "ncbi_project_name": "Riverbed sediment microbial communities from areas with vegetation nearby in Columbia River, Washington, USA - GW-RW S2_50_60", - "omics_type": { - "has_raw_value": "Metagenome" - }, - "principal_investigator": { - "has_raw_value": "James Stegen" - }, - "processing_institution": "JGI", - "type": "nmdc:OmicsProcessing", - "gold_sequencing_project_identifiers": [ - "gold:Gp0127649" - ], - "downstream_workflow_activity_records": [ + ], + "material_sample_set": [], + "material_sampling_activity_set": [], + "metabolomics_analysis_activity_set": [], + "metagenome_annotation_activity_set": [ { "_id": { - "$oid": "649b009d6bdd4fd20273c885" + "$oid": "649b005bbf2caae0415ef9ca" }, "has_input": [ - "nmdc:5895de3040f750a5ce1b5238158fd51c" + "nmdc:5ada15f24d3de4a96521532a4ced6018" ], "part_of": [ "nmdc:mga0j4fe07" ], "git_url": "https://github.com/microbiomedata/metaG/releases/tag/0.1", "has_output": [ - "nmdc:ed0ea2f2ef6b667c5f8e60cd7d197cf5", - "nmdc:25a7ff469ffae5906d6ade4d74cab88f" + "nmdc:4e5d87bb4bb3198f5b9955622a781376", + "nmdc:40f79a8b021a3de27c464087fad9f092", + "nmdc:aba74592cf7aa507179e9544c008a0ec", + "nmdc:29500fc3a86f2767cc3752ba02fa0a05", + "nmdc:ba8fedc9b57d401ad0cc2b329038c5a9", + "nmdc:66bd5f2b62818742c6df5c39d1952a99", + "nmdc:e60c77fb34f71861ceacf988074949af", + "nmdc:3738ab59fb56002a9f38d95b101957bd", + "nmdc:2f34c5db7846cbf37add471c0dbca951", + "nmdc:fa7f659afca037861ae65e08092f2d83", + "nmdc:9ee627031c0b425974fa1aa4d695d4ae", + "nmdc:2fc423fd55e34d3400c9a6924df67633" ], "was_informed_by": "gold:Gp0127649", - "input_read_count": 24889788, - "output_read_bases": 3558782964, "id": "nmdc:74b40db9b9eef8519a02f49fe6034be5", "execution_resource": "NERSC-Cori", - "input_read_bases": 3758357988, - "name": "Read QC Activity for nmdc:mga0j4fe07", - "output_read_count": 23803802, + "name": "Annotation Activity for nmdc:mga0j4fe07", "started_at_time": "2021-10-11T02:23:29Z", - "type": "nmdc:ReadQCAnalysisActivity", - "ended_at_time": "2021-10-11T03:38:32+00:00", - "output_data_objects": [ - { - "name": "Gp0127649_Filtered Reads", - "description": "Filtered Reads for Gp0127649", - "data_object_type": "Filtered Sequencing Reads", - "url": "https://data.microbiomedata.org/data/nmdc:mga0j4fe07/qa/nmdc_mga0j4fe07_filtered.fastq.gz", - "md5_checksum": "ed0ea2f2ef6b667c5f8e60cd7d197cf5", - "id": "nmdc:ed0ea2f2ef6b667c5f8e60cd7d197cf5", - "file_size_bytes": 1967546513 - }, - { - "name": "Gp0127649_Filtered Stats", - "description": "Filtered Stats for Gp0127649", - "data_object_type": "QC Statistics", - "url": "https://data.microbiomedata.org/data/nmdc:mga0j4fe07/qa/nmdc_mga0j4fe07_filterStats.txt", - "md5_checksum": "25a7ff469ffae5906d6ade4d74cab88f", - "id": "nmdc:25a7ff469ffae5906d6ade4d74cab88f", - "file_size_bytes": 283 - } - ] - }, + "type": "nmdc:MetagenomeAnnotationActivity", + "ended_at_time": "2021-10-11T03:38:32+00:00" + } + ], + "metagenome_assembly_set": [ { "_id": { - "$oid": "649b009bff710ae353f8cf48" + "$oid": "649b005f2ca5ee4adb139fb8" }, "has_input": [ "nmdc:ed0ea2f2ef6b667c5f8e60cd7d197cf5" ], + "part_of": [ + "nmdc:mga0j4fe07" + ], + "ctg_logsum": 157844, + "scaf_logsum": 158661, + "gap_pct": 0.00147, "git_url": "https://github.com/microbiomedata/metaG/releases/tag/0.1", "has_output": [ - "nmdc:c30cb5928ad608e7c8fe1ce77d81933a", - "nmdc:4aa159b1ee973c6e3e309ef60d351018", - "nmdc:8c1683fa4041bd10711aa3beb4735811", - "nmdc:b8be7144441cbd6fbe4a8193f9e055ab", - "nmdc:d4f57641e41f0249f3fde7b973289cf5", - "nmdc:4e9ec619c5611cb0166ea127496fadeb", - "nmdc:ed2b2495ca211e17298ca2e212fe3811", - "nmdc:05d35fc4e391296ff0e716c3fcbbee89", - "nmdc:0d07551972f3230ec2ef4a0e04929b97" + "nmdc:5ada15f24d3de4a96521532a4ced6018", + "nmdc:fc32ae27239661670605b59c395dd770", + "nmdc:d6e996af3275c4cdd3e51376517e2b6b", + "nmdc:f52600933fc5a09f7cead5c065d6b100", + "nmdc:5d9826a5f5164cfe20bfc1343144c96f" ], + "asm_score": 3.279, "was_informed_by": "gold:Gp0127649", + "ctg_powsum": 16877, + "scaf_max": 28201, "id": "nmdc:74b40db9b9eef8519a02f49fe6034be5", + "scaf_powsum": 16967, "execution_resource": "NERSC-Cori", - "name": "ReadBased Analysis Activity for nmdc:mga0j4fe07", + "contigs": 190009, + "name": "Assembly Activity for nmdc:mga0j4fe07", + "ctg_max": 28201, + "gc_std": 0.09385, + "contig_bp": 87528185, + "gc_avg": 0.62766, "started_at_time": "2021-10-11T02:23:29Z", - "type": "nmdc:ReadbasedAnalysis", + "scaf_bp": 87529475, + "type": "nmdc:MetagenomeAssembly", + "scaffolds": 189880, "ended_at_time": "2021-10-11T03:38:32+00:00", - "output_data_objects": [ - { - "name": "Gp0127649_Gottcha2 TSV report", - "description": "Gottcha2 TSV report for Gp0127649", - "url": "https://data.microbiomedata.org/data/nmdc:mga0j4fe07/ReadbasedAnalysis/nmdc_mga0j4fe07_gottcha2_report.tsv", - "md5_checksum": "c30cb5928ad608e7c8fe1ce77d81933a", - "id": "nmdc:c30cb5928ad608e7c8fe1ce77d81933a", - "file_size_bytes": 2079 - }, - { - "name": "Gp0127649_Gottcha2 full TSV report", - "description": "Gottcha2 full TSV report for Gp0127649", - "url": "https://data.microbiomedata.org/data/nmdc:mga0j4fe07/ReadbasedAnalysis/nmdc_mga0j4fe07_gottcha2_report_full.tsv", - "md5_checksum": "4aa159b1ee973c6e3e309ef60d351018", - "id": "nmdc:4aa159b1ee973c6e3e309ef60d351018", - "file_size_bytes": 642861 - }, - { - "name": "Gp0127649_Gottcha2 Krona HTML report", - "description": "Gottcha2 Krona HTML report for Gp0127649", - "data_object_type": "GOTTCHA2 Krona Plot", - "url": "https://data.microbiomedata.org/data/nmdc:mga0j4fe07/ReadbasedAnalysis/nmdc_mga0j4fe07_gottcha2_krona.html", - "md5_checksum": "8c1683fa4041bd10711aa3beb4735811", - "id": "nmdc:8c1683fa4041bd10711aa3beb4735811", - "file_size_bytes": 230792 - }, - { - "name": "Gp0127649_Centrifuge classification TSV report", - "description": "Centrifuge classification TSV report for Gp0127649", - "data_object_type": "Centrifuge Taxonomic Classification", - "url": "https://data.microbiomedata.org/data/nmdc:mga0j4fe07/ReadbasedAnalysis/nmdc_mga0j4fe07_centrifuge_classification.tsv", - "md5_checksum": "b8be7144441cbd6fbe4a8193f9e055ab", - "id": "nmdc:b8be7144441cbd6fbe4a8193f9e055ab", - "file_size_bytes": 1743695420 - }, - { - "name": "Gp0127649_Centrifuge TSV report", - "description": "Centrifuge TSV report for Gp0127649", - "data_object_type": "Centrifuge Classification Report", - "url": "https://data.microbiomedata.org/data/nmdc:mga0j4fe07/ReadbasedAnalysis/nmdc_mga0j4fe07_centrifuge_report.tsv", - "md5_checksum": "d4f57641e41f0249f3fde7b973289cf5", - "id": "nmdc:d4f57641e41f0249f3fde7b973289cf5", - "file_size_bytes": 254036 - }, - { - "name": "Gp0127649_Centrifuge Krona HTML report", - "description": "Centrifuge Krona HTML report for Gp0127649", - "data_object_type": "Centrifuge Krona Plot", - "url": "https://data.microbiomedata.org/data/nmdc:mga0j4fe07/ReadbasedAnalysis/nmdc_mga0j4fe07_centrifuge_krona.html", - "md5_checksum": "4e9ec619c5611cb0166ea127496fadeb", - "id": "nmdc:4e9ec619c5611cb0166ea127496fadeb", - "file_size_bytes": 2332943 - }, - { - "name": "Gp0127649_Kraken classification TSV report", - "description": "Kraken classification TSV report for Gp0127649", - "data_object_type": "Kraken2 Taxonomic Classification", - "url": "https://data.microbiomedata.org/data/nmdc:mga0j4fe07/ReadbasedAnalysis/nmdc_mga0j4fe07_kraken2_classification.tsv", - "md5_checksum": "ed2b2495ca211e17298ca2e212fe3811", - "id": "nmdc:ed2b2495ca211e17298ca2e212fe3811", - "file_size_bytes": 1387669799 - }, - { - "name": "Gp0127649_Kraken2 TSV report", - "description": "Kraken2 TSV report for Gp0127649", - "data_object_type": "Kraken2 Classification Report", - "url": "https://data.microbiomedata.org/data/nmdc:mga0j4fe07/ReadbasedAnalysis/nmdc_mga0j4fe07_kraken2_report.tsv", - "md5_checksum": "05d35fc4e391296ff0e716c3fcbbee89", - "id": "nmdc:05d35fc4e391296ff0e716c3fcbbee89", - "file_size_bytes": 637131 - }, - { - "name": "Gp0127649_Kraken2 Krona HTML report", - "description": "Kraken2 Krona HTML report for Gp0127649", - "data_object_type": "Kraken2 Krona Plot", - "url": "https://data.microbiomedata.org/data/nmdc:mga0j4fe07/ReadbasedAnalysis/nmdc_mga0j4fe07_kraken2_krona.html", - "md5_checksum": "0d07551972f3230ec2ef4a0e04929b97", - "id": "nmdc:0d07551972f3230ec2ef4a0e04929b97", - "file_size_bytes": 3976407 - } + "ctg_l50": 440, + "ctg_l90": 289, + "ctg_n50": 57445, + "ctg_n90": 160942, + "scaf_l50": 440, + "scaf_l90": 289, + "scaf_n50": 57416, + "scaf_n90": 160823 + } + ], + "metagenome_sequencing_activity_set": [], + "metaproteomics_analysis_activity_set": [], + "metatranscriptome_activity_set": [], + "nom_analysis_activity_set": [], + "omics_processing_set": [ + { + "_id": { + "$oid": "649b009773e8249959349b5e" + }, + "id": "nmdc:omprc-11-dw7shd52", + "name": "Riverbed sediment microbial communities from areas with vegetation nearby in Columbia River, Washington, USA - GW-RW S2_50_60", + "description": "Riverbed sediment samples were collected from an area with a lot of surrounding vegetation in a hyporheic zone (subsurface groundwater - surface water mixing zone)", + "has_input": [ + "nmdc:bsm-11-7fedgs13" + ], + "has_output": [ + "jgi:574fde8c7ded5e3df1ee1424" + ], + "part_of": [ + "nmdc:sty-11-aygzgv51" + ], + "add_date": "2016-01-11", + "mod_date": "2021-06-15", + "ncbi_project_name": "Riverbed sediment microbial communities from areas with vegetation nearby in Columbia River, Washington, USA - GW-RW S2_50_60", + "omics_type": { + "has_raw_value": "Metagenome" + }, + "principal_investigator": { + "has_raw_value": "James Stegen" + }, + "processing_institution": "JGI", + "type": "nmdc:OmicsProcessing", + "gold_sequencing_project_identifiers": [ + "gold:Gp0127649" ] - }, + } + ], + "reaction_activity_set": [], + "read_qc_analysis_activity_set": [ { "_id": { - "$oid": "61e719f8833bcf838a7018c7" + "$oid": "649b009d6bdd4fd20273c885" }, "has_input": [ - "nmdc:ed0ea2f2ef6b667c5f8e60cd7d197cf5" + "nmdc:5895de3040f750a5ce1b5238158fd51c" ], "part_of": [ "nmdc:mga0j4fe07" ], "git_url": "https://github.com/microbiomedata/metaG/releases/tag/0.1", "has_output": [ - "nmdc:c30cb5928ad608e7c8fe1ce77d81933a", - "nmdc:4aa159b1ee973c6e3e309ef60d351018", - "nmdc:8c1683fa4041bd10711aa3beb4735811", - "nmdc:b8be7144441cbd6fbe4a8193f9e055ab", - "nmdc:d4f57641e41f0249f3fde7b973289cf5", - "nmdc:4e9ec619c5611cb0166ea127496fadeb", - "nmdc:ed2b2495ca211e17298ca2e212fe3811", - "nmdc:05d35fc4e391296ff0e716c3fcbbee89", - "nmdc:0d07551972f3230ec2ef4a0e04929b97" + "nmdc:ed0ea2f2ef6b667c5f8e60cd7d197cf5", + "nmdc:25a7ff469ffae5906d6ade4d74cab88f" ], "was_informed_by": "gold:Gp0127649", + "input_read_count": 24889788, + "output_read_bases": 3558782964, "id": "nmdc:74b40db9b9eef8519a02f49fe6034be5", "execution_resource": "NERSC-Cori", - "name": "ReadBased Analysis Activity for nmdc:mga0j4fe07", - "started_at_time": "2021-10-11T02:23:29Z", - "type": "nmdc:ReadbasedAnalysis", - "ended_at_time": "2021-10-11T03:38:32+00:00", - "output_data_objects": [ - { - "name": "Gp0127649_Gottcha2 TSV report", - "description": "Gottcha2 TSV report for Gp0127649", - "url": "https://data.microbiomedata.org/data/nmdc:mga0j4fe07/ReadbasedAnalysis/nmdc_mga0j4fe07_gottcha2_report.tsv", - "md5_checksum": "c30cb5928ad608e7c8fe1ce77d81933a", - "id": "nmdc:c30cb5928ad608e7c8fe1ce77d81933a", - "file_size_bytes": 2079 - }, - { - "name": "Gp0127649_Gottcha2 full TSV report", - "description": "Gottcha2 full TSV report for Gp0127649", - "url": "https://data.microbiomedata.org/data/nmdc:mga0j4fe07/ReadbasedAnalysis/nmdc_mga0j4fe07_gottcha2_report_full.tsv", - "md5_checksum": "4aa159b1ee973c6e3e309ef60d351018", - "id": "nmdc:4aa159b1ee973c6e3e309ef60d351018", - "file_size_bytes": 642861 - }, - { - "name": "Gp0127649_Gottcha2 Krona HTML report", - "description": "Gottcha2 Krona HTML report for Gp0127649", - "data_object_type": "GOTTCHA2 Krona Plot", - "url": "https://data.microbiomedata.org/data/nmdc:mga0j4fe07/ReadbasedAnalysis/nmdc_mga0j4fe07_gottcha2_krona.html", - "md5_checksum": "8c1683fa4041bd10711aa3beb4735811", - "id": "nmdc:8c1683fa4041bd10711aa3beb4735811", - "file_size_bytes": 230792 - }, - { - "name": "Gp0127649_Centrifuge classification TSV report", - "description": "Centrifuge classification TSV report for Gp0127649", - "data_object_type": "Centrifuge Taxonomic Classification", - "url": "https://data.microbiomedata.org/data/nmdc:mga0j4fe07/ReadbasedAnalysis/nmdc_mga0j4fe07_centrifuge_classification.tsv", - "md5_checksum": "b8be7144441cbd6fbe4a8193f9e055ab", - "id": "nmdc:b8be7144441cbd6fbe4a8193f9e055ab", - "file_size_bytes": 1743695420 - }, - { - "name": "Gp0127649_Centrifuge TSV report", - "description": "Centrifuge TSV report for Gp0127649", - "data_object_type": "Centrifuge Classification Report", - "url": "https://data.microbiomedata.org/data/nmdc:mga0j4fe07/ReadbasedAnalysis/nmdc_mga0j4fe07_centrifuge_report.tsv", - "md5_checksum": "d4f57641e41f0249f3fde7b973289cf5", - "id": "nmdc:d4f57641e41f0249f3fde7b973289cf5", - "file_size_bytes": 254036 - }, - { - "name": "Gp0127649_Centrifuge Krona HTML report", - "description": "Centrifuge Krona HTML report for Gp0127649", - "data_object_type": "Centrifuge Krona Plot", - "url": "https://data.microbiomedata.org/data/nmdc:mga0j4fe07/ReadbasedAnalysis/nmdc_mga0j4fe07_centrifuge_krona.html", - "md5_checksum": "4e9ec619c5611cb0166ea127496fadeb", - "id": "nmdc:4e9ec619c5611cb0166ea127496fadeb", - "file_size_bytes": 2332943 - }, - { - "name": "Gp0127649_Kraken classification TSV report", - "description": "Kraken classification TSV report for Gp0127649", - "data_object_type": "Kraken2 Taxonomic Classification", - "url": "https://data.microbiomedata.org/data/nmdc:mga0j4fe07/ReadbasedAnalysis/nmdc_mga0j4fe07_kraken2_classification.tsv", - "md5_checksum": "ed2b2495ca211e17298ca2e212fe3811", - "id": "nmdc:ed2b2495ca211e17298ca2e212fe3811", - "file_size_bytes": 1387669799 - }, - { - "name": "Gp0127649_Kraken2 TSV report", - "description": "Kraken2 TSV report for Gp0127649", - "data_object_type": "Kraken2 Classification Report", - "url": "https://data.microbiomedata.org/data/nmdc:mga0j4fe07/ReadbasedAnalysis/nmdc_mga0j4fe07_kraken2_report.tsv", - "md5_checksum": "05d35fc4e391296ff0e716c3fcbbee89", - "id": "nmdc:05d35fc4e391296ff0e716c3fcbbee89", - "file_size_bytes": 637131 - }, - { - "name": "Gp0127649_Kraken2 Krona HTML report", - "description": "Kraken2 Krona HTML report for Gp0127649", - "data_object_type": "Kraken2 Krona Plot", - "url": "https://data.microbiomedata.org/data/nmdc:mga0j4fe07/ReadbasedAnalysis/nmdc_mga0j4fe07_kraken2_krona.html", - "md5_checksum": "0d07551972f3230ec2ef4a0e04929b97", - "id": "nmdc:0d07551972f3230ec2ef4a0e04929b97", - "file_size_bytes": 3976407 - } - ] - }, + "input_read_bases": 3758357988, + "name": "Read QC Activity for nmdc:mga0j4fe07", + "output_read_count": 23803802, + "started_at_time": "2021-10-11T02:23:29Z", + "type": "nmdc:ReadQCAnalysisActivity", + "ended_at_time": "2021-10-11T03:38:32+00:00" + } + ], + "read_based_taxonomy_analysis_activity_set": [ { "_id": { - "$oid": "649b005f2ca5ee4adb139fb8" + "$oid": "649b009bff710ae353f8cf48" }, "has_input": [ "nmdc:ed0ea2f2ef6b667c5f8e60cd7d197cf5" ], - "part_of": [ - "nmdc:mga0j4fe07" - ], - "ctg_logsum": 157844, - "scaf_logsum": 158661, - "gap_pct": 0.00147, "git_url": "https://github.com/microbiomedata/metaG/releases/tag/0.1", "has_output": [ - "nmdc:5ada15f24d3de4a96521532a4ced6018", - "nmdc:fc32ae27239661670605b59c395dd770", - "nmdc:d6e996af3275c4cdd3e51376517e2b6b", - "nmdc:f52600933fc5a09f7cead5c065d6b100", - "nmdc:5d9826a5f5164cfe20bfc1343144c96f" + "nmdc:c30cb5928ad608e7c8fe1ce77d81933a", + "nmdc:4aa159b1ee973c6e3e309ef60d351018", + "nmdc:8c1683fa4041bd10711aa3beb4735811", + "nmdc:b8be7144441cbd6fbe4a8193f9e055ab", + "nmdc:d4f57641e41f0249f3fde7b973289cf5", + "nmdc:4e9ec619c5611cb0166ea127496fadeb", + "nmdc:ed2b2495ca211e17298ca2e212fe3811", + "nmdc:05d35fc4e391296ff0e716c3fcbbee89", + "nmdc:0d07551972f3230ec2ef4a0e04929b97" ], - "asm_score": 3.279, "was_informed_by": "gold:Gp0127649", - "ctg_powsum": 16877, - "scaf_max": 28201, "id": "nmdc:74b40db9b9eef8519a02f49fe6034be5", - "scaf_powsum": 16967, "execution_resource": "NERSC-Cori", - "contigs": 190009, - "name": "Assembly Activity for nmdc:mga0j4fe07", - "ctg_max": 28201, - "gc_std": 0.09385, - "contig_bp": 87528185, - "gc_avg": 0.62766, + "name": "ReadBased Analysis Activity for nmdc:mga0j4fe07", "started_at_time": "2021-10-11T02:23:29Z", - "scaf_bp": 87529475, - "type": "nmdc:MetagenomeAssembly", - "scaffolds": 189880, - "ended_at_time": "2021-10-11T03:38:32+00:00", - "ctg_l50": 440, - "ctg_l90": 289, - "ctg_n50": 57445, - "ctg_n90": 160942, - "scaf_l50": 440, - "scaf_l90": 289, - "scaf_n50": 57416, - "scaf_n90": 160823, - "output_data_objects": [ - { - "name": "Gp0127649_Assembled contigs fasta", - "description": "Assembled contigs fasta for Gp0127649", - "data_object_type": "Assembly Contigs", - "url": "https://data.microbiomedata.org/data/nmdc:mga0j4fe07/assembly/nmdc_mga0j4fe07_contigs.fna", - "md5_checksum": "5ada15f24d3de4a96521532a4ced6018", - "id": "nmdc:5ada15f24d3de4a96521532a4ced6018", - "file_size_bytes": 94852732 - }, - { - "name": "Gp0127649_Assembled scaffold fasta", - "description": "Assembled scaffold fasta for Gp0127649", - "data_object_type": "Assembly Scaffolds", - "url": "https://data.microbiomedata.org/data/nmdc:mga0j4fe07/assembly/nmdc_mga0j4fe07_scaffolds.fna", - "md5_checksum": "fc32ae27239661670605b59c395dd770", - "id": "nmdc:fc32ae27239661670605b59c395dd770", - "file_size_bytes": 94280572 - }, - { - "name": "Gp0127649_Metagenome Contig Coverage Stats", - "description": "Metagenome Contig Coverage Stats for Gp0127649", - "url": "https://data.microbiomedata.org/data/nmdc:mga0j4fe07/assembly/nmdc_mga0j4fe07_covstats.txt", - "md5_checksum": "d6e996af3275c4cdd3e51376517e2b6b", - "id": "nmdc:d6e996af3275c4cdd3e51376517e2b6b", - "file_size_bytes": 15029734 - }, - { - "name": "Gp0127649_Assembled AGP file", - "description": "Assembled AGP file for Gp0127649", - "data_object_type": "Assembly AGP", - "url": "https://data.microbiomedata.org/data/nmdc:mga0j4fe07/assembly/nmdc_mga0j4fe07_assembly.agp", - "md5_checksum": "f52600933fc5a09f7cead5c065d6b100", - "id": "nmdc:f52600933fc5a09f7cead5c065d6b100", - "file_size_bytes": 14057243 - }, - { - "name": "Gp0127649_Metagenome Alignment BAM file", - "description": "Metagenome Alignment BAM file for Gp0127649", - "data_object_type": "Assembly Coverage BAM", - "url": "https://data.microbiomedata.org/data/nmdc:mga0j4fe07/assembly/nmdc_mga0j4fe07_pairedMapped_sorted.bam", - "md5_checksum": "5d9826a5f5164cfe20bfc1343144c96f", - "id": "nmdc:5d9826a5f5164cfe20bfc1343144c96f", - "file_size_bytes": 2147322298 - } - ] - }, + "type": "nmdc:ReadbasedAnalysis", + "ended_at_time": "2021-10-11T03:38:32+00:00" + } + ], + "study_set": [], + "field_research_site_set": [], + "collecting_biosamples_from_site_set": [], + "date_created": null, + "etl_software_version": null, + "pooling_set": [], + "read_based_analysis_activity_set": [ { "_id": { - "$oid": "649b005bbf2caae0415ef9ca" + "$oid": "61e719f8833bcf838a7018c7" }, "has_input": [ - "nmdc:5ada15f24d3de4a96521532a4ced6018" + "nmdc:ed0ea2f2ef6b667c5f8e60cd7d197cf5" ], "part_of": [ "nmdc:mga0j4fe07" ], "git_url": "https://github.com/microbiomedata/metaG/releases/tag/0.1", "has_output": [ - "nmdc:4e5d87bb4bb3198f5b9955622a781376", - "nmdc:40f79a8b021a3de27c464087fad9f092", - "nmdc:aba74592cf7aa507179e9544c008a0ec", - "nmdc:29500fc3a86f2767cc3752ba02fa0a05", - "nmdc:ba8fedc9b57d401ad0cc2b329038c5a9", - "nmdc:66bd5f2b62818742c6df5c39d1952a99", - "nmdc:e60c77fb34f71861ceacf988074949af", - "nmdc:3738ab59fb56002a9f38d95b101957bd", - "nmdc:2f34c5db7846cbf37add471c0dbca951", - "nmdc:fa7f659afca037861ae65e08092f2d83", - "nmdc:9ee627031c0b425974fa1aa4d695d4ae", - "nmdc:2fc423fd55e34d3400c9a6924df67633" + "nmdc:c30cb5928ad608e7c8fe1ce77d81933a", + "nmdc:4aa159b1ee973c6e3e309ef60d351018", + "nmdc:8c1683fa4041bd10711aa3beb4735811", + "nmdc:b8be7144441cbd6fbe4a8193f9e055ab", + "nmdc:d4f57641e41f0249f3fde7b973289cf5", + "nmdc:4e9ec619c5611cb0166ea127496fadeb", + "nmdc:ed2b2495ca211e17298ca2e212fe3811", + "nmdc:05d35fc4e391296ff0e716c3fcbbee89", + "nmdc:0d07551972f3230ec2ef4a0e04929b97" ], "was_informed_by": "gold:Gp0127649", "id": "nmdc:74b40db9b9eef8519a02f49fe6034be5", "execution_resource": "NERSC-Cori", - "name": "Annotation Activity for nmdc:mga0j4fe07", + "name": "ReadBased Analysis Activity for nmdc:mga0j4fe07", "started_at_time": "2021-10-11T02:23:29Z", - "type": "nmdc:MetagenomeAnnotationActivity", - "ended_at_time": "2021-10-11T03:38:32+00:00", - "output_data_objects": [ - { - "name": "Gp0127649_Protein FAA", - "description": "Protein FAA for Gp0127649", - "data_object_type": "Annotation Amino Acid FASTA", - "url": "https://data.microbiomedata.org/data/nmdc:mga0j4fe07/annotation/nmdc_mga0j4fe07_proteins.faa", - "md5_checksum": "4e5d87bb4bb3198f5b9955622a781376", - "id": "nmdc:4e5d87bb4bb3198f5b9955622a781376", - "file_size_bytes": 55301156 - }, - { - "name": "Gp0127649_Structural annotation GFF file", - "description": "Structural annotation GFF file for Gp0127649", - "data_object_type": "Structural Annotation GFF", - "url": "https://data.microbiomedata.org/data/nmdc:mga0j4fe07/annotation/nmdc_mga0j4fe07_structural_annotation.gff", - "md5_checksum": "40f79a8b021a3de27c464087fad9f092", - "id": "nmdc:40f79a8b021a3de27c464087fad9f092", - "file_size_bytes": 2518 - }, - { - "name": "Gp0127649_Functional annotation GFF file", - "description": "Functional annotation GFF file for Gp0127649", - "data_object_type": "Functional Annotation GFF", - "url": "https://data.microbiomedata.org/data/nmdc:mga0j4fe07/annotation/nmdc_mga0j4fe07_functional_annotation.gff", - "md5_checksum": "aba74592cf7aa507179e9544c008a0ec", - "id": "nmdc:aba74592cf7aa507179e9544c008a0ec", - "file_size_bytes": 63464973 - }, - { - "name": "Gp0127649_KO TSV file", - "description": "KO TSV file for Gp0127649", - "data_object_type": "Annotation KEGG Orthology", - "url": "https://data.microbiomedata.org/data/nmdc:mga0j4fe07/annotation/nmdc_mga0j4fe07_ko.tsv", - "md5_checksum": "29500fc3a86f2767cc3752ba02fa0a05", - "id": "nmdc:29500fc3a86f2767cc3752ba02fa0a05", - "file_size_bytes": 7317450 - }, - { - "name": "Gp0127649_EC TSV file", - "description": "EC TSV file for Gp0127649", - "data_object_type": "Annotation Enzyme Commission", - "url": "https://data.microbiomedata.org/data/nmdc:mga0j4fe07/annotation/nmdc_mga0j4fe07_ec.tsv", - "md5_checksum": "ba8fedc9b57d401ad0cc2b329038c5a9", - "id": "nmdc:ba8fedc9b57d401ad0cc2b329038c5a9", - "file_size_bytes": 4888576 - }, - { - "name": "Gp0127649_COG GFF file", - "description": "COG GFF file for Gp0127649", - "url": "https://data.microbiomedata.org/data/nmdc:mga0j4fe07/annotation/nmdc_mga0j4fe07_cog.gff", - "md5_checksum": "66bd5f2b62818742c6df5c39d1952a99", - "id": "nmdc:66bd5f2b62818742c6df5c39d1952a99", - "file_size_bytes": 36960882 - }, - { - "name": "Gp0127649_PFAM GFF file", - "description": "PFAM GFF file for Gp0127649", - "url": "https://data.microbiomedata.org/data/nmdc:mga0j4fe07/annotation/nmdc_mga0j4fe07_pfam.gff", - "md5_checksum": "e60c77fb34f71861ceacf988074949af", - "id": "nmdc:e60c77fb34f71861ceacf988074949af", - "file_size_bytes": 27535342 - }, - { - "name": "Gp0127649_TigrFam GFF file", - "description": "TigrFam GFF file for Gp0127649", - "url": "https://data.microbiomedata.org/data/nmdc:mga0j4fe07/annotation/nmdc_mga0j4fe07_tigrfam.gff", - "md5_checksum": "3738ab59fb56002a9f38d95b101957bd", - "id": "nmdc:3738ab59fb56002a9f38d95b101957bd", - "file_size_bytes": 2999247 - }, - { - "name": "Gp0127649_SMART GFF file", - "description": "SMART GFF file for Gp0127649", - "url": "https://data.microbiomedata.org/data/nmdc:mga0j4fe07/annotation/nmdc_mga0j4fe07_smart.gff", - "md5_checksum": "2f34c5db7846cbf37add471c0dbca951", - "id": "nmdc:2f34c5db7846cbf37add471c0dbca951", - "file_size_bytes": 8199823 - }, - { - "name": "Gp0127649_SuperFam GFF file", - "description": "SuperFam GFF file for Gp0127649", - "url": "https://data.microbiomedata.org/data/nmdc:mga0j4fe07/annotation/nmdc_mga0j4fe07_supfam.gff", - "md5_checksum": "fa7f659afca037861ae65e08092f2d83", - "id": "nmdc:fa7f659afca037861ae65e08092f2d83", - "file_size_bytes": 46114509 - }, - { - "name": "Gp0127649_Cath FunFam GFF file", - "description": "Cath FunFam GFF file for Gp0127649", - "url": "https://data.microbiomedata.org/data/nmdc:mga0j4fe07/annotation/nmdc_mga0j4fe07_cath_funfam.gff", - "md5_checksum": "9ee627031c0b425974fa1aa4d695d4ae", - "id": "nmdc:9ee627031c0b425974fa1aa4d695d4ae", - "file_size_bytes": 34807554 - }, - { - "name": "Gp0127649_KO_EC GFF file", - "description": "KO_EC GFF file for Gp0127649", - "url": "https://data.microbiomedata.org/data/nmdc:mga0j4fe07/annotation/nmdc_mga0j4fe07_ko_ec.gff", - "md5_checksum": "2fc423fd55e34d3400c9a6924df67633", - "id": "nmdc:2fc423fd55e34d3400c9a6924df67633", - "file_size_bytes": 23276630 - } - ] + "type": "nmdc:ReadbasedAnalysis", + "ended_at_time": "2021-10-11T03:38:32+00:00" + } + ] + }, + { + "functional_annotation_agg": [], + "library_preparation_set": [], + "processed_sample_set": [], + "extraction_set": [], + "activity_set": [], + "biosample_set": [], + "data_object_set": [ + { + "name": "Gp0127652_Filtered Reads", + "description": "Filtered Reads for Gp0127652", + "data_object_type": "Filtered Sequencing Reads", + "url": "https://data.microbiomedata.org/data/nmdc:mga0mfxf90/qa/nmdc_mga0mfxf90_filtered.fastq.gz", + "md5_checksum": "60f03b815160b29125c2bd0776a330bf", + "id": "nmdc:60f03b815160b29125c2bd0776a330bf", + "file_size_bytes": 2019434951 + }, + { + "name": "Gp0127652_Filtered Stats", + "description": "Filtered Stats for Gp0127652", + "data_object_type": "QC Statistics", + "url": "https://data.microbiomedata.org/data/nmdc:mga0mfxf90/qa/nmdc_mga0mfxf90_filterStats.txt", + "md5_checksum": "c40fa552711f6b19130b2a559f2d4cdc", + "id": "nmdc:c40fa552711f6b19130b2a559f2d4cdc", + "file_size_bytes": 290 + }, + { + "name": "Gp0127652_Gottcha2 TSV report", + "description": "Gottcha2 TSV report for Gp0127652", + "url": "https://data.microbiomedata.org/data/nmdc:mga0mfxf90/ReadbasedAnalysis/nmdc_mga0mfxf90_gottcha2_report.tsv", + "md5_checksum": "70f29a321c925cfc0e2003515f708400", + "id": "nmdc:70f29a321c925cfc0e2003515f708400", + "file_size_bytes": 1524 + }, + { + "name": "Gp0127652_Gottcha2 full TSV report", + "description": "Gottcha2 full TSV report for Gp0127652", + "url": "https://data.microbiomedata.org/data/nmdc:mga0mfxf90/ReadbasedAnalysis/nmdc_mga0mfxf90_gottcha2_report_full.tsv", + "md5_checksum": "93d5419c0b31e0696ab8ffef477945fb", + "id": "nmdc:93d5419c0b31e0696ab8ffef477945fb", + "file_size_bytes": 670250 }, + { + "name": "Gp0127652_Gottcha2 Krona HTML report", + "description": "Gottcha2 Krona HTML report for Gp0127652", + "data_object_type": "GOTTCHA2 Krona Plot", + "url": "https://data.microbiomedata.org/data/nmdc:mga0mfxf90/ReadbasedAnalysis/nmdc_mga0mfxf90_gottcha2_krona.html", + "md5_checksum": "9cd3b2939adabd809741ae6a84260266", + "id": "nmdc:9cd3b2939adabd809741ae6a84260266", + "file_size_bytes": 229949 + }, + { + "name": "Gp0127652_Centrifuge classification TSV report", + "description": "Centrifuge classification TSV report for Gp0127652", + "data_object_type": "Centrifuge Taxonomic Classification", + "url": "https://data.microbiomedata.org/data/nmdc:mga0mfxf90/ReadbasedAnalysis/nmdc_mga0mfxf90_centrifuge_classification.tsv", + "md5_checksum": "acea91fced8993a40cf1eb9cda29c4cd", + "id": "nmdc:acea91fced8993a40cf1eb9cda29c4cd", + "file_size_bytes": 1814515284 + }, + { + "name": "Gp0127652_Centrifuge TSV report", + "description": "Centrifuge TSV report for Gp0127652", + "data_object_type": "Centrifuge Classification Report", + "url": "https://data.microbiomedata.org/data/nmdc:mga0mfxf90/ReadbasedAnalysis/nmdc_mga0mfxf90_centrifuge_report.tsv", + "md5_checksum": "b623a0d3bdff34fb97530c74bb558aeb", + "id": "nmdc:b623a0d3bdff34fb97530c74bb558aeb", + "file_size_bytes": 253730 + }, + { + "name": "Gp0127652_Centrifuge Krona HTML report", + "description": "Centrifuge Krona HTML report for Gp0127652", + "data_object_type": "Centrifuge Krona Plot", + "url": "https://data.microbiomedata.org/data/nmdc:mga0mfxf90/ReadbasedAnalysis/nmdc_mga0mfxf90_centrifuge_krona.html", + "md5_checksum": "e461b2e81a22514fcd691caeaa7b0ca1", + "id": "nmdc:e461b2e81a22514fcd691caeaa7b0ca1", + "file_size_bytes": 2330558 + }, + { + "name": "Gp0127652_Kraken classification TSV report", + "description": "Kraken classification TSV report for Gp0127652", + "data_object_type": "Kraken2 Taxonomic Classification", + "url": "https://data.microbiomedata.org/data/nmdc:mga0mfxf90/ReadbasedAnalysis/nmdc_mga0mfxf90_kraken2_classification.tsv", + "md5_checksum": "38b7c63d0157f8bf4316f4295f0e6e28", + "id": "nmdc:38b7c63d0157f8bf4316f4295f0e6e28", + "file_size_bytes": 1445957300 + }, + { + "name": "Gp0127652_Kraken2 TSV report", + "description": "Kraken2 TSV report for Gp0127652", + "data_object_type": "Kraken2 Classification Report", + "url": "https://data.microbiomedata.org/data/nmdc:mga0mfxf90/ReadbasedAnalysis/nmdc_mga0mfxf90_kraken2_report.tsv", + "md5_checksum": "be0c2bc71cefcb0f0a23d270d047f30c", + "id": "nmdc:be0c2bc71cefcb0f0a23d270d047f30c", + "file_size_bytes": 639677 + }, + { + "name": "Gp0127652_Kraken2 Krona HTML report", + "description": "Kraken2 Krona HTML report for Gp0127652", + "data_object_type": "Kraken2 Krona Plot", + "url": "https://data.microbiomedata.org/data/nmdc:mga0mfxf90/ReadbasedAnalysis/nmdc_mga0mfxf90_kraken2_krona.html", + "md5_checksum": "1df4b479c887b43319d89cc80dc35239", + "id": "nmdc:1df4b479c887b43319d89cc80dc35239", + "file_size_bytes": 3991377 + }, + { + "name": "Gp0127652_Gottcha2 TSV report", + "description": "Gottcha2 TSV report for Gp0127652", + "url": "https://data.microbiomedata.org/data/nmdc:mga0mfxf90/ReadbasedAnalysis/nmdc_mga0mfxf90_gottcha2_report.tsv", + "md5_checksum": "70f29a321c925cfc0e2003515f708400", + "id": "nmdc:70f29a321c925cfc0e2003515f708400", + "file_size_bytes": 1524 + }, + { + "name": "Gp0127652_Gottcha2 full TSV report", + "description": "Gottcha2 full TSV report for Gp0127652", + "url": "https://data.microbiomedata.org/data/nmdc:mga0mfxf90/ReadbasedAnalysis/nmdc_mga0mfxf90_gottcha2_report_full.tsv", + "md5_checksum": "93d5419c0b31e0696ab8ffef477945fb", + "id": "nmdc:93d5419c0b31e0696ab8ffef477945fb", + "file_size_bytes": 670250 + }, + { + "name": "Gp0127652_Gottcha2 Krona HTML report", + "description": "Gottcha2 Krona HTML report for Gp0127652", + "data_object_type": "GOTTCHA2 Krona Plot", + "url": "https://data.microbiomedata.org/data/nmdc:mga0mfxf90/ReadbasedAnalysis/nmdc_mga0mfxf90_gottcha2_krona.html", + "md5_checksum": "9cd3b2939adabd809741ae6a84260266", + "id": "nmdc:9cd3b2939adabd809741ae6a84260266", + "file_size_bytes": 229949 + }, + { + "name": "Gp0127652_Centrifuge classification TSV report", + "description": "Centrifuge classification TSV report for Gp0127652", + "data_object_type": "Centrifuge Taxonomic Classification", + "url": "https://data.microbiomedata.org/data/nmdc:mga0mfxf90/ReadbasedAnalysis/nmdc_mga0mfxf90_centrifuge_classification.tsv", + "md5_checksum": "acea91fced8993a40cf1eb9cda29c4cd", + "id": "nmdc:acea91fced8993a40cf1eb9cda29c4cd", + "file_size_bytes": 1814515284 + }, + { + "name": "Gp0127652_Centrifuge TSV report", + "description": "Centrifuge TSV report for Gp0127652", + "data_object_type": "Centrifuge Classification Report", + "url": "https://data.microbiomedata.org/data/nmdc:mga0mfxf90/ReadbasedAnalysis/nmdc_mga0mfxf90_centrifuge_report.tsv", + "md5_checksum": "b623a0d3bdff34fb97530c74bb558aeb", + "id": "nmdc:b623a0d3bdff34fb97530c74bb558aeb", + "file_size_bytes": 253730 + }, + { + "name": "Gp0127652_Centrifuge Krona HTML report", + "description": "Centrifuge Krona HTML report for Gp0127652", + "data_object_type": "Centrifuge Krona Plot", + "url": "https://data.microbiomedata.org/data/nmdc:mga0mfxf90/ReadbasedAnalysis/nmdc_mga0mfxf90_centrifuge_krona.html", + "md5_checksum": "e461b2e81a22514fcd691caeaa7b0ca1", + "id": "nmdc:e461b2e81a22514fcd691caeaa7b0ca1", + "file_size_bytes": 2330558 + }, + { + "name": "Gp0127652_Kraken classification TSV report", + "description": "Kraken classification TSV report for Gp0127652", + "data_object_type": "Kraken2 Taxonomic Classification", + "url": "https://data.microbiomedata.org/data/nmdc:mga0mfxf90/ReadbasedAnalysis/nmdc_mga0mfxf90_kraken2_classification.tsv", + "md5_checksum": "38b7c63d0157f8bf4316f4295f0e6e28", + "id": "nmdc:38b7c63d0157f8bf4316f4295f0e6e28", + "file_size_bytes": 1445957300 + }, + { + "name": "Gp0127652_Kraken2 TSV report", + "description": "Kraken2 TSV report for Gp0127652", + "data_object_type": "Kraken2 Classification Report", + "url": "https://data.microbiomedata.org/data/nmdc:mga0mfxf90/ReadbasedAnalysis/nmdc_mga0mfxf90_kraken2_report.tsv", + "md5_checksum": "be0c2bc71cefcb0f0a23d270d047f30c", + "id": "nmdc:be0c2bc71cefcb0f0a23d270d047f30c", + "file_size_bytes": 639677 + }, + { + "name": "Gp0127652_Kraken2 Krona HTML report", + "description": "Kraken2 Krona HTML report for Gp0127652", + "data_object_type": "Kraken2 Krona Plot", + "url": "https://data.microbiomedata.org/data/nmdc:mga0mfxf90/ReadbasedAnalysis/nmdc_mga0mfxf90_kraken2_krona.html", + "md5_checksum": "1df4b479c887b43319d89cc80dc35239", + "id": "nmdc:1df4b479c887b43319d89cc80dc35239", + "file_size_bytes": 3991377 + }, + { + "name": "Gp0127652_Assembled contigs fasta", + "description": "Assembled contigs fasta for Gp0127652", + "data_object_type": "Assembly Contigs", + "url": "https://data.microbiomedata.org/data/nmdc:mga0mfxf90/assembly/nmdc_mga0mfxf90_contigs.fna", + "md5_checksum": "a550eb6e614b375c1089ab816163ea63", + "id": "nmdc:a550eb6e614b375c1089ab816163ea63", + "file_size_bytes": 117075841 + }, + { + "name": "Gp0127652_Assembled scaffold fasta", + "description": "Assembled scaffold fasta for Gp0127652", + "data_object_type": "Assembly Scaffolds", + "url": "https://data.microbiomedata.org/data/nmdc:mga0mfxf90/assembly/nmdc_mga0mfxf90_scaffolds.fna", + "md5_checksum": "9f194d271c352af3f68f2afeb1dbd499", + "id": "nmdc:9f194d271c352af3f68f2afeb1dbd499", + "file_size_bytes": 116423675 + }, + { + "name": "Gp0127652_Metagenome Contig Coverage Stats", + "description": "Metagenome Contig Coverage Stats for Gp0127652", + "url": "https://data.microbiomedata.org/data/nmdc:mga0mfxf90/assembly/nmdc_mga0mfxf90_covstats.txt", + "md5_checksum": "b0a79069110825cfe5525a8fc4f02cb6", + "id": "nmdc:b0a79069110825cfe5525a8fc4f02cb6", + "file_size_bytes": 17141637 + }, + { + "name": "Gp0127652_Assembled AGP file", + "description": "Assembled AGP file for Gp0127652", + "data_object_type": "Assembly AGP", + "url": "https://data.microbiomedata.org/data/nmdc:mga0mfxf90/assembly/nmdc_mga0mfxf90_assembly.agp", + "md5_checksum": "f54e8bda482b1cb8bc8e121ee5f39e07", + "id": "nmdc:f54e8bda482b1cb8bc8e121ee5f39e07", + "file_size_bytes": 16044279 + }, + { + "name": "Gp0127652_Metagenome Alignment BAM file", + "description": "Metagenome Alignment BAM file for Gp0127652", + "data_object_type": "Assembly Coverage BAM", + "url": "https://data.microbiomedata.org/data/nmdc:mga0mfxf90/assembly/nmdc_mga0mfxf90_pairedMapped_sorted.bam", + "md5_checksum": "c8c5056ee57126695073137d0c1d3d04", + "id": "nmdc:c8c5056ee57126695073137d0c1d3d04", + "file_size_bytes": 2224050507 + }, + { + "name": "Gp0127652_Protein FAA", + "description": "Protein FAA for Gp0127652", + "data_object_type": "Annotation Amino Acid FASTA", + "url": "https://data.microbiomedata.org/data/nmdc:mga0mfxf90/annotation/nmdc_mga0mfxf90_proteins.faa", + "md5_checksum": "096c54bce5ec1cc5d41ac64553e42cb3", + "id": "nmdc:096c54bce5ec1cc5d41ac64553e42cb3", + "file_size_bytes": 66555768 + }, + { + "name": "Gp0127652_Structural annotation GFF file", + "description": "Structural annotation GFF file for Gp0127652", + "data_object_type": "Structural Annotation GFF", + "url": "https://data.microbiomedata.org/data/nmdc:mga0mfxf90/annotation/nmdc_mga0mfxf90_structural_annotation.gff", + "md5_checksum": "ac8cd253a39e6e5fe0a0930f3bf6888a", + "id": "nmdc:ac8cd253a39e6e5fe0a0930f3bf6888a", + "file_size_bytes": 2521 + }, + { + "name": "Gp0127652_Functional annotation GFF file", + "description": "Functional annotation GFF file for Gp0127652", + "data_object_type": "Functional Annotation GFF", + "url": "https://data.microbiomedata.org/data/nmdc:mga0mfxf90/annotation/nmdc_mga0mfxf90_functional_annotation.gff", + "md5_checksum": "863dc502676573c59ce69b1ff786042a", + "id": "nmdc:863dc502676573c59ce69b1ff786042a", + "file_size_bytes": 74520486 + }, + { + "name": "Gp0127652_KO TSV file", + "description": "KO TSV file for Gp0127652", + "data_object_type": "Annotation KEGG Orthology", + "url": "https://data.microbiomedata.org/data/nmdc:mga0mfxf90/annotation/nmdc_mga0mfxf90_ko.tsv", + "md5_checksum": "28ed2a9e345d0e542127fd1dc2173ae7", + "id": "nmdc:28ed2a9e345d0e542127fd1dc2173ae7", + "file_size_bytes": 8379185 + }, + { + "name": "Gp0127652_EC TSV file", + "description": "EC TSV file for Gp0127652", + "data_object_type": "Annotation Enzyme Commission", + "url": "https://data.microbiomedata.org/data/nmdc:mga0mfxf90/annotation/nmdc_mga0mfxf90_ec.tsv", + "md5_checksum": "a826d96e791f69ff7759d57f44a8a510", + "id": "nmdc:a826d96e791f69ff7759d57f44a8a510", + "file_size_bytes": 5555311 + }, + { + "name": "Gp0127652_COG GFF file", + "description": "COG GFF file for Gp0127652", + "url": "https://data.microbiomedata.org/data/nmdc:mga0mfxf90/annotation/nmdc_mga0mfxf90_cog.gff", + "md5_checksum": "58e310990be01a574eef05b3f5dd1495", + "id": "nmdc:58e310990be01a574eef05b3f5dd1495", + "file_size_bytes": 43385646 + }, + { + "name": "Gp0127652_PFAM GFF file", + "description": "PFAM GFF file for Gp0127652", + "url": "https://data.microbiomedata.org/data/nmdc:mga0mfxf90/annotation/nmdc_mga0mfxf90_pfam.gff", + "md5_checksum": "28ce5c4c605a1c4538ce63987252c0ad", + "id": "nmdc:28ce5c4c605a1c4538ce63987252c0ad", + "file_size_bytes": 33061709 + }, + { + "name": "Gp0127652_TigrFam GFF file", + "description": "TigrFam GFF file for Gp0127652", + "url": "https://data.microbiomedata.org/data/nmdc:mga0mfxf90/annotation/nmdc_mga0mfxf90_tigrfam.gff", + "md5_checksum": "6de9ddf0b07c9bcf1409aceb7ee2f941", + "id": "nmdc:6de9ddf0b07c9bcf1409aceb7ee2f941", + "file_size_bytes": 3665042 + }, + { + "name": "Gp0127652_SMART GFF file", + "description": "SMART GFF file for Gp0127652", + "url": "https://data.microbiomedata.org/data/nmdc:mga0mfxf90/annotation/nmdc_mga0mfxf90_smart.gff", + "md5_checksum": "6342c9c98e297d2e39a2144c7ca0191b", + "id": "nmdc:6342c9c98e297d2e39a2144c7ca0191b", + "file_size_bytes": 9667737 + }, + { + "name": "Gp0127652_SuperFam GFF file", + "description": "SuperFam GFF file for Gp0127652", + "url": "https://data.microbiomedata.org/data/nmdc:mga0mfxf90/annotation/nmdc_mga0mfxf90_supfam.gff", + "md5_checksum": "d20aa781d3ad6b0face7cc9c412bc3f7", + "id": "nmdc:d20aa781d3ad6b0face7cc9c412bc3f7", + "file_size_bytes": 54593577 + }, + { + "name": "Gp0127652_Cath FunFam GFF file", + "description": "Cath FunFam GFF file for Gp0127652", + "url": "https://data.microbiomedata.org/data/nmdc:mga0mfxf90/annotation/nmdc_mga0mfxf90_cath_funfam.gff", + "md5_checksum": "db2e4b8f6cc1e8dc934e14b93589805a", + "id": "nmdc:db2e4b8f6cc1e8dc934e14b93589805a", + "file_size_bytes": 41409254 + }, + { + "name": "Gp0127652_KO_EC GFF file", + "description": "KO_EC GFF file for Gp0127652", + "url": "https://data.microbiomedata.org/data/nmdc:mga0mfxf90/annotation/nmdc_mga0mfxf90_ko_ec.gff", + "md5_checksum": "f51f9d679d1b045f4ebc61dab7fc2f08", + "id": "nmdc:f51f9d679d1b045f4ebc61dab7fc2f08", + "file_size_bytes": 26617726 + }, + { + "name": "gold:Gp0452679_metabat2 bin checkm quality assessment result", + "description": "metabat2 bin checkm quality assessment result for gold:Gp0452679", + "data_object_type": "CheckM Statistics", + "url": "https://data.microbiomedata.org/data/nmdc:mga0fsjy84/MAGs/nmdc_mga0fsjy84_checkm_qa.out", + "md5_checksum": "d41d8cd98f00b204e9800998ecf8427e", + "id": "nmdc:d41d8cd98f00b204e9800998ecf8427e", + "file_size_bytes": 0 + }, + { + "name": "Gp0127652_tooShort (< 3kb) filtered contigs fasta file by metaBat2", + "description": "tooShort (< 3kb) filtered contigs fasta file by metaBat2 for Gp0127652", + "url": "https://data.microbiomedata.org/data/nmdc:mga0mfxf90/MAGs/nmdc_mga0mfxf90_bins.tooShort.fa", + "md5_checksum": "4371932b5834f2deadb2fbfc42b056f7", + "id": "nmdc:4371932b5834f2deadb2fbfc42b056f7", + "file_size_bytes": 89154072 + }, + { + "name": "Gp0127652_unbinned fasta file from metabat2", + "description": "unbinned fasta file from metabat2 for Gp0127652", + "url": "https://data.microbiomedata.org/data/nmdc:mga0mfxf90/MAGs/nmdc_mga0mfxf90_bins.unbinned.fa", + "md5_checksum": "5a8d8441e6e472837809ee31d517d32a", + "id": "nmdc:5a8d8441e6e472837809ee31d517d32a", + "file_size_bytes": 24514353 + }, + { + "name": "Gp0127652_metabat2 bin checkm quality assessment result", + "description": "metabat2 bin checkm quality assessment result for Gp0127652", + "data_object_type": "CheckM Statistics", + "url": "https://data.microbiomedata.org/data/nmdc:mga0mfxf90/MAGs/nmdc_mga0mfxf90_checkm_qa.out", + "md5_checksum": "16016a7b2388048eec469f73395bc478", + "id": "nmdc:16016a7b2388048eec469f73395bc478", + "file_size_bytes": 1320 + }, + { + "name": "Gp0127652_high-quality and medium-quality bins", + "description": "high-quality and medium-quality bins for Gp0127652", + "data_object_type": "Metagenome Bins", + "url": "https://data.microbiomedata.org/data/nmdc:mga0mfxf90/MAGs/nmdc_mga0mfxf90_hqmq_bin.zip", + "md5_checksum": "1e604f9f29f74c6169c4d27f839bb7b0", + "id": "nmdc:1e604f9f29f74c6169c4d27f839bb7b0", + "file_size_bytes": 182 + }, + { + "name": "Gp0127652_metabat2 bins", + "description": "metabat2 bins for Gp0127652", + "url": "https://data.microbiomedata.org/data/nmdc:mga0mfxf90/MAGs/nmdc_mga0mfxf90_metabat_bin.zip", + "md5_checksum": "21467369d04671628ae67afbaf1d2076", + "id": "nmdc:21467369d04671628ae67afbaf1d2076", + "file_size_bytes": 1013750 + } + ], + "dissolving_activity_set": [], + "functional_annotation_set": [], + "genome_feature_set": [], + "mags_activity_set": [ { "_id": { - "$oid": "649b0052ec087f6bbab34729" + "$oid": "649b0052ec087f6bbab34727" }, "has_input": [ - "nmdc:5ada15f24d3de4a96521532a4ced6018", - "nmdc:5d9826a5f5164cfe20bfc1343144c96f", - "nmdc:aba74592cf7aa507179e9544c008a0ec" + "nmdc:a550eb6e614b375c1089ab816163ea63", + "nmdc:c8c5056ee57126695073137d0c1d3d04", + "nmdc:863dc502676573c59ce69b1ff786042a" ], - "too_short_contig_num": 180499, + "too_short_contig_num": 200309, "part_of": [ - "nmdc:mga0j4fe07" + "nmdc:mga0mfxf90" ], - "binned_contig_num": 211, + "binned_contig_num": 835, "git_url": "https://github.com/microbiomedata/metaG/releases/tag/0.1", "has_output": [ "nmdc:d41d8cd98f00b204e9800998ecf8427e", - "nmdc:f84d25fee16a4dece54f5580d893ecaa", - "nmdc:ed61fb0056b08bc82f4545c49b744c2a", - "nmdc:8d4bce832a16affbcc3efeb8364e8eaa", - "nmdc:40273505b8b3dddd3ee5cb5c83871067", - "nmdc:b767f2b59d0fd9e650914e140cacf104" + "nmdc:4371932b5834f2deadb2fbfc42b056f7", + "nmdc:5a8d8441e6e472837809ee31d517d32a", + "nmdc:16016a7b2388048eec469f73395bc478", + "nmdc:1e604f9f29f74c6169c4d27f839bb7b0", + "nmdc:21467369d04671628ae67afbaf1d2076" ], - "was_informed_by": "gold:Gp0127649", - "input_contig_num": 190009, - "id": "nmdc:74b40db9b9eef8519a02f49fe6034be5", + "was_informed_by": "gold:Gp0127652", + "input_contig_num": 216252, + "id": "nmdc:c86126b11f214f19721c56fadf91d87c", "execution_resource": "NERSC-Cori", - "name": "MAGs Analysis Activity for nmdc:mga0j4fe07", + "name": "MAGs Analysis Activity for nmdc:mga0mfxf90", "mags_list": [ { - "number_of_contig": 64, - "completeness": 16.46, - "bin_name": "bins.1", - "gene_count": 305, + "number_of_contig": 233, + "completeness": 12.16, + "bin_name": "bins.1", + "gene_count": 1133, + "bin_quality": "LQ", + "gtdbtk_species": "", + "gtdbtk_order": "", + "num_16s": 0, + "gtdbtk_family": "", + "gtdbtk_domain": "", + "contamination": 0.0, + "gtdbtk_class": "", + "gtdbtk_phylum": "", + "num_5s": 0, + "num_23s": 0, + "gtdbtk_genus": "", + "num_t_rna": 10 + }, + { + "number_of_contig": 349, + "completeness": 45.68, + "bin_name": "bins.2", + "gene_count": 1809, "bin_quality": "LQ", "gtdbtk_species": "", "gtdbtk_order": "", "num_16s": 0, "gtdbtk_family": "", "gtdbtk_domain": "", - "contamination": 0.47, + "contamination": 10.0, + "gtdbtk_class": "", + "gtdbtk_phylum": "", + "num_5s": 1, + "num_23s": 1, + "gtdbtk_genus": "", + "num_t_rna": 16 + }, + { + "number_of_contig": 106, + "completeness": 17.54, + "bin_name": "bins.3", + "gene_count": 552, + "bin_quality": "LQ", + "gtdbtk_species": "", + "gtdbtk_order": "", + "num_16s": 0, + "gtdbtk_family": "", + "gtdbtk_domain": "", + "contamination": 0.0, "gtdbtk_class": "", "gtdbtk_phylum": "", "num_5s": 0, "num_23s": 0, "gtdbtk_genus": "", - "num_t_rna": 4 + "num_t_rna": 12 }, { "number_of_contig": 147, - "completeness": 19.16, - "bin_name": "bins.2", - "gene_count": 744, + "completeness": 14.66, + "bin_name": "bins.4", + "gene_count": 668, "bin_quality": "LQ", "gtdbtk_species": "", "gtdbtk_order": "", @@ -29458,366 +31197,54 @@ "num_5s": 0, "num_23s": 0, "gtdbtk_genus": "", - "num_t_rna": 15 + "num_t_rna": 5 } ], - "unbinned_contig_num": 9299, - "started_at_time": "2021-10-11T02:23:29Z", + "unbinned_contig_num": 15108, + "started_at_time": "2021-10-11T02:27:08Z", "type": "nmdc:MAGsAnalysisActivity", - "ended_at_time": "2021-10-11T03:38:32+00:00", - "output_data_objects": [ - { - "name": "gold:Gp0452679_metabat2 bin checkm quality assessment result", - "description": "metabat2 bin checkm quality assessment result for gold:Gp0452679", - "data_object_type": "CheckM Statistics", - "url": "https://data.microbiomedata.org/data/nmdc:mga0fsjy84/MAGs/nmdc_mga0fsjy84_checkm_qa.out", - "md5_checksum": "d41d8cd98f00b204e9800998ecf8427e", - "id": "nmdc:d41d8cd98f00b204e9800998ecf8427e", - "file_size_bytes": 0 - }, - { - "name": "Gp0127649_tooShort (< 3kb) filtered contigs fasta file by metaBat2", - "description": "tooShort (< 3kb) filtered contigs fasta file by metaBat2 for Gp0127649", - "url": "https://data.microbiomedata.org/data/nmdc:mga0j4fe07/MAGs/nmdc_mga0j4fe07_bins.tooShort.fa", - "md5_checksum": "f84d25fee16a4dece54f5580d893ecaa", - "id": "nmdc:f84d25fee16a4dece54f5580d893ecaa", - "file_size_bytes": 79592416 - }, - { - "name": "Gp0127649_unbinned fasta file from metabat2", - "description": "unbinned fasta file from metabat2 for Gp0127649", - "url": "https://data.microbiomedata.org/data/nmdc:mga0j4fe07/MAGs/nmdc_mga0j4fe07_bins.unbinned.fa", - "md5_checksum": "ed61fb0056b08bc82f4545c49b744c2a", - "id": "nmdc:ed61fb0056b08bc82f4545c49b744c2a", - "file_size_bytes": 14383032 - }, - { - "name": "Gp0127649_metabat2 bin checkm quality assessment result", - "description": "metabat2 bin checkm quality assessment result for Gp0127649", - "data_object_type": "CheckM Statistics", - "url": "https://data.microbiomedata.org/data/nmdc:mga0j4fe07/MAGs/nmdc_mga0j4fe07_checkm_qa.out", - "md5_checksum": "8d4bce832a16affbcc3efeb8364e8eaa", - "id": "nmdc:8d4bce832a16affbcc3efeb8364e8eaa", - "file_size_bytes": 942 - }, - { - "name": "Gp0127649_high-quality and medium-quality bins", - "description": "high-quality and medium-quality bins for Gp0127649", - "data_object_type": "Metagenome Bins", - "url": "https://data.microbiomedata.org/data/nmdc:mga0j4fe07/MAGs/nmdc_mga0j4fe07_hqmq_bin.zip", - "md5_checksum": "40273505b8b3dddd3ee5cb5c83871067", - "id": "nmdc:40273505b8b3dddd3ee5cb5c83871067", - "file_size_bytes": 182 - }, - { - "name": "Gp0127649_metabat2 bins", - "description": "metabat2 bins for Gp0127649", - "url": "https://data.microbiomedata.org/data/nmdc:mga0j4fe07/MAGs/nmdc_mga0j4fe07_metabat_bin.zip", - "md5_checksum": "b767f2b59d0fd9e650914e140cacf104", - "id": "nmdc:b767f2b59d0fd9e650914e140cacf104", - "file_size_bytes": 269239 - } - ] + "ended_at_time": "2021-10-11T04:45:21+00:00" } - ] - }, - { - "_id": { - "$oid": "649b009773e8249959349b5f" - }, - "id": "nmdc:omprc-11-j43hz774", - "name": "Riverbed sediment microbial communities from areas with vegetation nearby in Columbia River, Washington, USA - GW-RW S3_50_60", - "description": "Riverbed sediment samples were collected from an area with a lot of surrounding vegetation in a hyporheic zone (subsurface groundwater - surface water mixing zone)", - "has_input": [ - "nmdc:bsm-11-xngecc18" - ], - "has_output": [ - "jgi:574fe0af7ded5e3df1ee1493" - ], - "part_of": [ - "nmdc:sty-11-aygzgv51" - ], - "add_date": "2016-01-11", - "mod_date": "2021-06-15", - "ncbi_project_name": "Riverbed sediment microbial communities from areas with vegetation nearby in Columbia River, Washington, USA - GW-RW S3_50_60", - "omics_type": { - "has_raw_value": "Metagenome" - }, - "principal_investigator": { - "has_raw_value": "James Stegen" - }, - "processing_institution": "JGI", - "type": "nmdc:OmicsProcessing", - "gold_sequencing_project_identifiers": [ - "gold:Gp0127652" - ], - "downstream_workflow_activity_records": [ - { - "_id": { - "$oid": "649b009d6bdd4fd20273c87e" - }, - "has_input": [ - "nmdc:b0548475f69b48e2d150cb90ae27f2c6" - ], - "part_of": [ - "nmdc:mga0mfxf90" - ], - "git_url": "https://github.com/microbiomedata/metaG/releases/tag/0.1", - "has_output": [ - "nmdc:60f03b815160b29125c2bd0776a330bf", - "nmdc:c40fa552711f6b19130b2a559f2d4cdc" - ], - "was_informed_by": "gold:Gp0127652", - "input_read_count": 26604768, - "output_read_bases": 3697162034, - "id": "nmdc:c86126b11f214f19721c56fadf91d87c", - "execution_resource": "NERSC-Cori", - "input_read_bases": 4017319968, - "name": "Read QC Activity for nmdc:mga0mfxf90", - "output_read_count": 24717950, - "started_at_time": "2021-10-11T02:27:08Z", - "type": "nmdc:ReadQCAnalysisActivity", - "ended_at_time": "2021-10-11T04:45:21+00:00", - "output_data_objects": [ - { - "name": "Gp0127652_Filtered Reads", - "description": "Filtered Reads for Gp0127652", - "data_object_type": "Filtered Sequencing Reads", - "url": "https://data.microbiomedata.org/data/nmdc:mga0mfxf90/qa/nmdc_mga0mfxf90_filtered.fastq.gz", - "md5_checksum": "60f03b815160b29125c2bd0776a330bf", - "id": "nmdc:60f03b815160b29125c2bd0776a330bf", - "file_size_bytes": 2019434951 - }, - { - "name": "Gp0127652_Filtered Stats", - "description": "Filtered Stats for Gp0127652", - "data_object_type": "QC Statistics", - "url": "https://data.microbiomedata.org/data/nmdc:mga0mfxf90/qa/nmdc_mga0mfxf90_filterStats.txt", - "md5_checksum": "c40fa552711f6b19130b2a559f2d4cdc", - "id": "nmdc:c40fa552711f6b19130b2a559f2d4cdc", - "file_size_bytes": 290 - } - ] - }, - { - "_id": { - "$oid": "649b009bff710ae353f8cf40" - }, - "has_input": [ - "nmdc:60f03b815160b29125c2bd0776a330bf" - ], - "git_url": "https://github.com/microbiomedata/metaG/releases/tag/0.1", - "has_output": [ - "nmdc:70f29a321c925cfc0e2003515f708400", - "nmdc:93d5419c0b31e0696ab8ffef477945fb", - "nmdc:9cd3b2939adabd809741ae6a84260266", - "nmdc:acea91fced8993a40cf1eb9cda29c4cd", - "nmdc:b623a0d3bdff34fb97530c74bb558aeb", - "nmdc:e461b2e81a22514fcd691caeaa7b0ca1", - "nmdc:38b7c63d0157f8bf4316f4295f0e6e28", - "nmdc:be0c2bc71cefcb0f0a23d270d047f30c", - "nmdc:1df4b479c887b43319d89cc80dc35239" - ], - "was_informed_by": "gold:Gp0127652", - "id": "nmdc:c86126b11f214f19721c56fadf91d87c", - "execution_resource": "NERSC-Cori", - "name": "ReadBased Analysis Activity for nmdc:mga0mfxf90", - "started_at_time": "2021-10-11T02:27:08Z", - "type": "nmdc:ReadbasedAnalysis", - "ended_at_time": "2021-10-11T04:45:21+00:00", - "output_data_objects": [ - { - "name": "Gp0127652_Gottcha2 TSV report", - "description": "Gottcha2 TSV report for Gp0127652", - "url": "https://data.microbiomedata.org/data/nmdc:mga0mfxf90/ReadbasedAnalysis/nmdc_mga0mfxf90_gottcha2_report.tsv", - "md5_checksum": "70f29a321c925cfc0e2003515f708400", - "id": "nmdc:70f29a321c925cfc0e2003515f708400", - "file_size_bytes": 1524 - }, - { - "name": "Gp0127652_Gottcha2 full TSV report", - "description": "Gottcha2 full TSV report for Gp0127652", - "url": "https://data.microbiomedata.org/data/nmdc:mga0mfxf90/ReadbasedAnalysis/nmdc_mga0mfxf90_gottcha2_report_full.tsv", - "md5_checksum": "93d5419c0b31e0696ab8ffef477945fb", - "id": "nmdc:93d5419c0b31e0696ab8ffef477945fb", - "file_size_bytes": 670250 - }, - { - "name": "Gp0127652_Gottcha2 Krona HTML report", - "description": "Gottcha2 Krona HTML report for Gp0127652", - "data_object_type": "GOTTCHA2 Krona Plot", - "url": "https://data.microbiomedata.org/data/nmdc:mga0mfxf90/ReadbasedAnalysis/nmdc_mga0mfxf90_gottcha2_krona.html", - "md5_checksum": "9cd3b2939adabd809741ae6a84260266", - "id": "nmdc:9cd3b2939adabd809741ae6a84260266", - "file_size_bytes": 229949 - }, - { - "name": "Gp0127652_Centrifuge classification TSV report", - "description": "Centrifuge classification TSV report for Gp0127652", - "data_object_type": "Centrifuge Taxonomic Classification", - "url": "https://data.microbiomedata.org/data/nmdc:mga0mfxf90/ReadbasedAnalysis/nmdc_mga0mfxf90_centrifuge_classification.tsv", - "md5_checksum": "acea91fced8993a40cf1eb9cda29c4cd", - "id": "nmdc:acea91fced8993a40cf1eb9cda29c4cd", - "file_size_bytes": 1814515284 - }, - { - "name": "Gp0127652_Centrifuge TSV report", - "description": "Centrifuge TSV report for Gp0127652", - "data_object_type": "Centrifuge Classification Report", - "url": "https://data.microbiomedata.org/data/nmdc:mga0mfxf90/ReadbasedAnalysis/nmdc_mga0mfxf90_centrifuge_report.tsv", - "md5_checksum": "b623a0d3bdff34fb97530c74bb558aeb", - "id": "nmdc:b623a0d3bdff34fb97530c74bb558aeb", - "file_size_bytes": 253730 - }, - { - "name": "Gp0127652_Centrifuge Krona HTML report", - "description": "Centrifuge Krona HTML report for Gp0127652", - "data_object_type": "Centrifuge Krona Plot", - "url": "https://data.microbiomedata.org/data/nmdc:mga0mfxf90/ReadbasedAnalysis/nmdc_mga0mfxf90_centrifuge_krona.html", - "md5_checksum": "e461b2e81a22514fcd691caeaa7b0ca1", - "id": "nmdc:e461b2e81a22514fcd691caeaa7b0ca1", - "file_size_bytes": 2330558 - }, - { - "name": "Gp0127652_Kraken classification TSV report", - "description": "Kraken classification TSV report for Gp0127652", - "data_object_type": "Kraken2 Taxonomic Classification", - "url": "https://data.microbiomedata.org/data/nmdc:mga0mfxf90/ReadbasedAnalysis/nmdc_mga0mfxf90_kraken2_classification.tsv", - "md5_checksum": "38b7c63d0157f8bf4316f4295f0e6e28", - "id": "nmdc:38b7c63d0157f8bf4316f4295f0e6e28", - "file_size_bytes": 1445957300 - }, - { - "name": "Gp0127652_Kraken2 TSV report", - "description": "Kraken2 TSV report for Gp0127652", - "data_object_type": "Kraken2 Classification Report", - "url": "https://data.microbiomedata.org/data/nmdc:mga0mfxf90/ReadbasedAnalysis/nmdc_mga0mfxf90_kraken2_report.tsv", - "md5_checksum": "be0c2bc71cefcb0f0a23d270d047f30c", - "id": "nmdc:be0c2bc71cefcb0f0a23d270d047f30c", - "file_size_bytes": 639677 - }, - { - "name": "Gp0127652_Kraken2 Krona HTML report", - "description": "Kraken2 Krona HTML report for Gp0127652", - "data_object_type": "Kraken2 Krona Plot", - "url": "https://data.microbiomedata.org/data/nmdc:mga0mfxf90/ReadbasedAnalysis/nmdc_mga0mfxf90_kraken2_krona.html", - "md5_checksum": "1df4b479c887b43319d89cc80dc35239", - "id": "nmdc:1df4b479c887b43319d89cc80dc35239", - "file_size_bytes": 3991377 - } - ] - }, + ], + "material_sample_set": [], + "material_sampling_activity_set": [], + "metabolomics_analysis_activity_set": [], + "metagenome_annotation_activity_set": [ { "_id": { - "$oid": "61e719de833bcf838a7015b2" + "$oid": "649b005bbf2caae0415ef9c3" }, "has_input": [ - "nmdc:60f03b815160b29125c2bd0776a330bf" + "nmdc:a550eb6e614b375c1089ab816163ea63" ], "part_of": [ "nmdc:mga0mfxf90" ], "git_url": "https://github.com/microbiomedata/metaG/releases/tag/0.1", "has_output": [ - "nmdc:70f29a321c925cfc0e2003515f708400", - "nmdc:93d5419c0b31e0696ab8ffef477945fb", - "nmdc:9cd3b2939adabd809741ae6a84260266", - "nmdc:acea91fced8993a40cf1eb9cda29c4cd", - "nmdc:b623a0d3bdff34fb97530c74bb558aeb", - "nmdc:e461b2e81a22514fcd691caeaa7b0ca1", - "nmdc:38b7c63d0157f8bf4316f4295f0e6e28", - "nmdc:be0c2bc71cefcb0f0a23d270d047f30c", - "nmdc:1df4b479c887b43319d89cc80dc35239" + "nmdc:096c54bce5ec1cc5d41ac64553e42cb3", + "nmdc:ac8cd253a39e6e5fe0a0930f3bf6888a", + "nmdc:863dc502676573c59ce69b1ff786042a", + "nmdc:28ed2a9e345d0e542127fd1dc2173ae7", + "nmdc:a826d96e791f69ff7759d57f44a8a510", + "nmdc:58e310990be01a574eef05b3f5dd1495", + "nmdc:28ce5c4c605a1c4538ce63987252c0ad", + "nmdc:6de9ddf0b07c9bcf1409aceb7ee2f941", + "nmdc:6342c9c98e297d2e39a2144c7ca0191b", + "nmdc:d20aa781d3ad6b0face7cc9c412bc3f7", + "nmdc:db2e4b8f6cc1e8dc934e14b93589805a", + "nmdc:f51f9d679d1b045f4ebc61dab7fc2f08" ], "was_informed_by": "gold:Gp0127652", "id": "nmdc:c86126b11f214f19721c56fadf91d87c", "execution_resource": "NERSC-Cori", - "name": "ReadBased Analysis Activity for nmdc:mga0mfxf90", + "name": "Annotation Activity for nmdc:mga0mfxf90", "started_at_time": "2021-10-11T02:27:08Z", - "type": "nmdc:ReadbasedAnalysis", - "ended_at_time": "2021-10-11T04:45:21+00:00", - "output_data_objects": [ - { - "name": "Gp0127652_Gottcha2 TSV report", - "description": "Gottcha2 TSV report for Gp0127652", - "url": "https://data.microbiomedata.org/data/nmdc:mga0mfxf90/ReadbasedAnalysis/nmdc_mga0mfxf90_gottcha2_report.tsv", - "md5_checksum": "70f29a321c925cfc0e2003515f708400", - "id": "nmdc:70f29a321c925cfc0e2003515f708400", - "file_size_bytes": 1524 - }, - { - "name": "Gp0127652_Gottcha2 full TSV report", - "description": "Gottcha2 full TSV report for Gp0127652", - "url": "https://data.microbiomedata.org/data/nmdc:mga0mfxf90/ReadbasedAnalysis/nmdc_mga0mfxf90_gottcha2_report_full.tsv", - "md5_checksum": "93d5419c0b31e0696ab8ffef477945fb", - "id": "nmdc:93d5419c0b31e0696ab8ffef477945fb", - "file_size_bytes": 670250 - }, - { - "name": "Gp0127652_Gottcha2 Krona HTML report", - "description": "Gottcha2 Krona HTML report for Gp0127652", - "data_object_type": "GOTTCHA2 Krona Plot", - "url": "https://data.microbiomedata.org/data/nmdc:mga0mfxf90/ReadbasedAnalysis/nmdc_mga0mfxf90_gottcha2_krona.html", - "md5_checksum": "9cd3b2939adabd809741ae6a84260266", - "id": "nmdc:9cd3b2939adabd809741ae6a84260266", - "file_size_bytes": 229949 - }, - { - "name": "Gp0127652_Centrifuge classification TSV report", - "description": "Centrifuge classification TSV report for Gp0127652", - "data_object_type": "Centrifuge Taxonomic Classification", - "url": "https://data.microbiomedata.org/data/nmdc:mga0mfxf90/ReadbasedAnalysis/nmdc_mga0mfxf90_centrifuge_classification.tsv", - "md5_checksum": "acea91fced8993a40cf1eb9cda29c4cd", - "id": "nmdc:acea91fced8993a40cf1eb9cda29c4cd", - "file_size_bytes": 1814515284 - }, - { - "name": "Gp0127652_Centrifuge TSV report", - "description": "Centrifuge TSV report for Gp0127652", - "data_object_type": "Centrifuge Classification Report", - "url": "https://data.microbiomedata.org/data/nmdc:mga0mfxf90/ReadbasedAnalysis/nmdc_mga0mfxf90_centrifuge_report.tsv", - "md5_checksum": "b623a0d3bdff34fb97530c74bb558aeb", - "id": "nmdc:b623a0d3bdff34fb97530c74bb558aeb", - "file_size_bytes": 253730 - }, - { - "name": "Gp0127652_Centrifuge Krona HTML report", - "description": "Centrifuge Krona HTML report for Gp0127652", - "data_object_type": "Centrifuge Krona Plot", - "url": "https://data.microbiomedata.org/data/nmdc:mga0mfxf90/ReadbasedAnalysis/nmdc_mga0mfxf90_centrifuge_krona.html", - "md5_checksum": "e461b2e81a22514fcd691caeaa7b0ca1", - "id": "nmdc:e461b2e81a22514fcd691caeaa7b0ca1", - "file_size_bytes": 2330558 - }, - { - "name": "Gp0127652_Kraken classification TSV report", - "description": "Kraken classification TSV report for Gp0127652", - "data_object_type": "Kraken2 Taxonomic Classification", - "url": "https://data.microbiomedata.org/data/nmdc:mga0mfxf90/ReadbasedAnalysis/nmdc_mga0mfxf90_kraken2_classification.tsv", - "md5_checksum": "38b7c63d0157f8bf4316f4295f0e6e28", - "id": "nmdc:38b7c63d0157f8bf4316f4295f0e6e28", - "file_size_bytes": 1445957300 - }, - { - "name": "Gp0127652_Kraken2 TSV report", - "description": "Kraken2 TSV report for Gp0127652", - "data_object_type": "Kraken2 Classification Report", - "url": "https://data.microbiomedata.org/data/nmdc:mga0mfxf90/ReadbasedAnalysis/nmdc_mga0mfxf90_kraken2_report.tsv", - "md5_checksum": "be0c2bc71cefcb0f0a23d270d047f30c", - "id": "nmdc:be0c2bc71cefcb0f0a23d270d047f30c", - "file_size_bytes": 639677 - }, - { - "name": "Gp0127652_Kraken2 Krona HTML report", - "description": "Kraken2 Krona HTML report for Gp0127652", - "data_object_type": "Kraken2 Krona Plot", - "url": "https://data.microbiomedata.org/data/nmdc:mga0mfxf90/ReadbasedAnalysis/nmdc_mga0mfxf90_kraken2_krona.html", - "md5_checksum": "1df4b479c887b43319d89cc80dc35239", - "id": "nmdc:1df4b479c887b43319d89cc80dc35239", - "file_size_bytes": 3991377 - } - ] - }, + "type": "nmdc:MetagenomeAnnotationActivity", + "ended_at_time": "2021-10-11T04:45:21+00:00" + } + ], + "metagenome_assembly_set": [ { "_id": { "$oid": "649b005f2ca5ee4adb139fb1" @@ -29864,281 +31291,563 @@ "scaf_l50": 493, "scaf_l90": 290, "scaf_n50": 56962, - "scaf_n90": 179563, - "output_data_objects": [ - { - "name": "Gp0127652_Assembled contigs fasta", - "description": "Assembled contigs fasta for Gp0127652", - "data_object_type": "Assembly Contigs", - "url": "https://data.microbiomedata.org/data/nmdc:mga0mfxf90/assembly/nmdc_mga0mfxf90_contigs.fna", - "md5_checksum": "a550eb6e614b375c1089ab816163ea63", - "id": "nmdc:a550eb6e614b375c1089ab816163ea63", - "file_size_bytes": 117075841 - }, - { - "name": "Gp0127652_Assembled scaffold fasta", - "description": "Assembled scaffold fasta for Gp0127652", - "data_object_type": "Assembly Scaffolds", - "url": "https://data.microbiomedata.org/data/nmdc:mga0mfxf90/assembly/nmdc_mga0mfxf90_scaffolds.fna", - "md5_checksum": "9f194d271c352af3f68f2afeb1dbd499", - "id": "nmdc:9f194d271c352af3f68f2afeb1dbd499", - "file_size_bytes": 116423675 - }, - { - "name": "Gp0127652_Metagenome Contig Coverage Stats", - "description": "Metagenome Contig Coverage Stats for Gp0127652", - "url": "https://data.microbiomedata.org/data/nmdc:mga0mfxf90/assembly/nmdc_mga0mfxf90_covstats.txt", - "md5_checksum": "b0a79069110825cfe5525a8fc4f02cb6", - "id": "nmdc:b0a79069110825cfe5525a8fc4f02cb6", - "file_size_bytes": 17141637 - }, - { - "name": "Gp0127652_Assembled AGP file", - "description": "Assembled AGP file for Gp0127652", - "data_object_type": "Assembly AGP", - "url": "https://data.microbiomedata.org/data/nmdc:mga0mfxf90/assembly/nmdc_mga0mfxf90_assembly.agp", - "md5_checksum": "f54e8bda482b1cb8bc8e121ee5f39e07", - "id": "nmdc:f54e8bda482b1cb8bc8e121ee5f39e07", - "file_size_bytes": 16044279 - }, - { - "name": "Gp0127652_Metagenome Alignment BAM file", - "description": "Metagenome Alignment BAM file for Gp0127652", - "data_object_type": "Assembly Coverage BAM", - "url": "https://data.microbiomedata.org/data/nmdc:mga0mfxf90/assembly/nmdc_mga0mfxf90_pairedMapped_sorted.bam", - "md5_checksum": "c8c5056ee57126695073137d0c1d3d04", - "id": "nmdc:c8c5056ee57126695073137d0c1d3d04", - "file_size_bytes": 2224050507 - } + "scaf_n90": 179563 + } + ], + "metagenome_sequencing_activity_set": [], + "metaproteomics_analysis_activity_set": [], + "metatranscriptome_activity_set": [], + "nom_analysis_activity_set": [], + "omics_processing_set": [ + { + "_id": { + "$oid": "649b009773e8249959349b5f" + }, + "id": "nmdc:omprc-11-j43hz774", + "name": "Riverbed sediment microbial communities from areas with vegetation nearby in Columbia River, Washington, USA - GW-RW S3_50_60", + "description": "Riverbed sediment samples were collected from an area with a lot of surrounding vegetation in a hyporheic zone (subsurface groundwater - surface water mixing zone)", + "has_input": [ + "nmdc:bsm-11-xngecc18" + ], + "has_output": [ + "jgi:574fe0af7ded5e3df1ee1493" + ], + "part_of": [ + "nmdc:sty-11-aygzgv51" + ], + "add_date": "2016-01-11", + "mod_date": "2021-06-15", + "ncbi_project_name": "Riverbed sediment microbial communities from areas with vegetation nearby in Columbia River, Washington, USA - GW-RW S3_50_60", + "omics_type": { + "has_raw_value": "Metagenome" + }, + "principal_investigator": { + "has_raw_value": "James Stegen" + }, + "processing_institution": "JGI", + "type": "nmdc:OmicsProcessing", + "gold_sequencing_project_identifiers": [ + "gold:Gp0127652" ] - }, + } + ], + "reaction_activity_set": [], + "read_qc_analysis_activity_set": [ + { + "_id": { + "$oid": "649b009d6bdd4fd20273c87e" + }, + "has_input": [ + "nmdc:b0548475f69b48e2d150cb90ae27f2c6" + ], + "part_of": [ + "nmdc:mga0mfxf90" + ], + "git_url": "https://github.com/microbiomedata/metaG/releases/tag/0.1", + "has_output": [ + "nmdc:60f03b815160b29125c2bd0776a330bf", + "nmdc:c40fa552711f6b19130b2a559f2d4cdc" + ], + "was_informed_by": "gold:Gp0127652", + "input_read_count": 26604768, + "output_read_bases": 3697162034, + "id": "nmdc:c86126b11f214f19721c56fadf91d87c", + "execution_resource": "NERSC-Cori", + "input_read_bases": 4017319968, + "name": "Read QC Activity for nmdc:mga0mfxf90", + "output_read_count": 24717950, + "started_at_time": "2021-10-11T02:27:08Z", + "type": "nmdc:ReadQCAnalysisActivity", + "ended_at_time": "2021-10-11T04:45:21+00:00" + } + ], + "read_based_taxonomy_analysis_activity_set": [ + { + "_id": { + "$oid": "649b009bff710ae353f8cf40" + }, + "has_input": [ + "nmdc:60f03b815160b29125c2bd0776a330bf" + ], + "git_url": "https://github.com/microbiomedata/metaG/releases/tag/0.1", + "has_output": [ + "nmdc:70f29a321c925cfc0e2003515f708400", + "nmdc:93d5419c0b31e0696ab8ffef477945fb", + "nmdc:9cd3b2939adabd809741ae6a84260266", + "nmdc:acea91fced8993a40cf1eb9cda29c4cd", + "nmdc:b623a0d3bdff34fb97530c74bb558aeb", + "nmdc:e461b2e81a22514fcd691caeaa7b0ca1", + "nmdc:38b7c63d0157f8bf4316f4295f0e6e28", + "nmdc:be0c2bc71cefcb0f0a23d270d047f30c", + "nmdc:1df4b479c887b43319d89cc80dc35239" + ], + "was_informed_by": "gold:Gp0127652", + "id": "nmdc:c86126b11f214f19721c56fadf91d87c", + "execution_resource": "NERSC-Cori", + "name": "ReadBased Analysis Activity for nmdc:mga0mfxf90", + "started_at_time": "2021-10-11T02:27:08Z", + "type": "nmdc:ReadbasedAnalysis", + "ended_at_time": "2021-10-11T04:45:21+00:00" + } + ], + "study_set": [], + "field_research_site_set": [], + "collecting_biosamples_from_site_set": [], + "date_created": null, + "etl_software_version": null, + "pooling_set": [], + "read_based_analysis_activity_set": [ { "_id": { - "$oid": "649b005bbf2caae0415ef9c3" + "$oid": "61e719de833bcf838a7015b2" }, "has_input": [ - "nmdc:a550eb6e614b375c1089ab816163ea63" + "nmdc:60f03b815160b29125c2bd0776a330bf" ], "part_of": [ "nmdc:mga0mfxf90" ], "git_url": "https://github.com/microbiomedata/metaG/releases/tag/0.1", "has_output": [ - "nmdc:096c54bce5ec1cc5d41ac64553e42cb3", - "nmdc:ac8cd253a39e6e5fe0a0930f3bf6888a", - "nmdc:863dc502676573c59ce69b1ff786042a", - "nmdc:28ed2a9e345d0e542127fd1dc2173ae7", - "nmdc:a826d96e791f69ff7759d57f44a8a510", - "nmdc:58e310990be01a574eef05b3f5dd1495", - "nmdc:28ce5c4c605a1c4538ce63987252c0ad", - "nmdc:6de9ddf0b07c9bcf1409aceb7ee2f941", - "nmdc:6342c9c98e297d2e39a2144c7ca0191b", - "nmdc:d20aa781d3ad6b0face7cc9c412bc3f7", - "nmdc:db2e4b8f6cc1e8dc934e14b93589805a", - "nmdc:f51f9d679d1b045f4ebc61dab7fc2f08" + "nmdc:70f29a321c925cfc0e2003515f708400", + "nmdc:93d5419c0b31e0696ab8ffef477945fb", + "nmdc:9cd3b2939adabd809741ae6a84260266", + "nmdc:acea91fced8993a40cf1eb9cda29c4cd", + "nmdc:b623a0d3bdff34fb97530c74bb558aeb", + "nmdc:e461b2e81a22514fcd691caeaa7b0ca1", + "nmdc:38b7c63d0157f8bf4316f4295f0e6e28", + "nmdc:be0c2bc71cefcb0f0a23d270d047f30c", + "nmdc:1df4b479c887b43319d89cc80dc35239" ], "was_informed_by": "gold:Gp0127652", "id": "nmdc:c86126b11f214f19721c56fadf91d87c", "execution_resource": "NERSC-Cori", - "name": "Annotation Activity for nmdc:mga0mfxf90", + "name": "ReadBased Analysis Activity for nmdc:mga0mfxf90", "started_at_time": "2021-10-11T02:27:08Z", - "type": "nmdc:MetagenomeAnnotationActivity", - "ended_at_time": "2021-10-11T04:45:21+00:00", - "output_data_objects": [ - { - "name": "Gp0127652_Protein FAA", - "description": "Protein FAA for Gp0127652", - "data_object_type": "Annotation Amino Acid FASTA", - "url": "https://data.microbiomedata.org/data/nmdc:mga0mfxf90/annotation/nmdc_mga0mfxf90_proteins.faa", - "md5_checksum": "096c54bce5ec1cc5d41ac64553e42cb3", - "id": "nmdc:096c54bce5ec1cc5d41ac64553e42cb3", - "file_size_bytes": 66555768 - }, - { - "name": "Gp0127652_Structural annotation GFF file", - "description": "Structural annotation GFF file for Gp0127652", - "data_object_type": "Structural Annotation GFF", - "url": "https://data.microbiomedata.org/data/nmdc:mga0mfxf90/annotation/nmdc_mga0mfxf90_structural_annotation.gff", - "md5_checksum": "ac8cd253a39e6e5fe0a0930f3bf6888a", - "id": "nmdc:ac8cd253a39e6e5fe0a0930f3bf6888a", - "file_size_bytes": 2521 - }, - { - "name": "Gp0127652_Functional annotation GFF file", - "description": "Functional annotation GFF file for Gp0127652", - "data_object_type": "Functional Annotation GFF", - "url": "https://data.microbiomedata.org/data/nmdc:mga0mfxf90/annotation/nmdc_mga0mfxf90_functional_annotation.gff", - "md5_checksum": "863dc502676573c59ce69b1ff786042a", - "id": "nmdc:863dc502676573c59ce69b1ff786042a", - "file_size_bytes": 74520486 - }, - { - "name": "Gp0127652_KO TSV file", - "description": "KO TSV file for Gp0127652", - "data_object_type": "Annotation KEGG Orthology", - "url": "https://data.microbiomedata.org/data/nmdc:mga0mfxf90/annotation/nmdc_mga0mfxf90_ko.tsv", - "md5_checksum": "28ed2a9e345d0e542127fd1dc2173ae7", - "id": "nmdc:28ed2a9e345d0e542127fd1dc2173ae7", - "file_size_bytes": 8379185 - }, - { - "name": "Gp0127652_EC TSV file", - "description": "EC TSV file for Gp0127652", - "data_object_type": "Annotation Enzyme Commission", - "url": "https://data.microbiomedata.org/data/nmdc:mga0mfxf90/annotation/nmdc_mga0mfxf90_ec.tsv", - "md5_checksum": "a826d96e791f69ff7759d57f44a8a510", - "id": "nmdc:a826d96e791f69ff7759d57f44a8a510", - "file_size_bytes": 5555311 - }, - { - "name": "Gp0127652_COG GFF file", - "description": "COG GFF file for Gp0127652", - "url": "https://data.microbiomedata.org/data/nmdc:mga0mfxf90/annotation/nmdc_mga0mfxf90_cog.gff", - "md5_checksum": "58e310990be01a574eef05b3f5dd1495", - "id": "nmdc:58e310990be01a574eef05b3f5dd1495", - "file_size_bytes": 43385646 - }, - { - "name": "Gp0127652_PFAM GFF file", - "description": "PFAM GFF file for Gp0127652", - "url": "https://data.microbiomedata.org/data/nmdc:mga0mfxf90/annotation/nmdc_mga0mfxf90_pfam.gff", - "md5_checksum": "28ce5c4c605a1c4538ce63987252c0ad", - "id": "nmdc:28ce5c4c605a1c4538ce63987252c0ad", - "file_size_bytes": 33061709 - }, - { - "name": "Gp0127652_TigrFam GFF file", - "description": "TigrFam GFF file for Gp0127652", - "url": "https://data.microbiomedata.org/data/nmdc:mga0mfxf90/annotation/nmdc_mga0mfxf90_tigrfam.gff", - "md5_checksum": "6de9ddf0b07c9bcf1409aceb7ee2f941", - "id": "nmdc:6de9ddf0b07c9bcf1409aceb7ee2f941", - "file_size_bytes": 3665042 - }, - { - "name": "Gp0127652_SMART GFF file", - "description": "SMART GFF file for Gp0127652", - "url": "https://data.microbiomedata.org/data/nmdc:mga0mfxf90/annotation/nmdc_mga0mfxf90_smart.gff", - "md5_checksum": "6342c9c98e297d2e39a2144c7ca0191b", - "id": "nmdc:6342c9c98e297d2e39a2144c7ca0191b", - "file_size_bytes": 9667737 - }, - { - "name": "Gp0127652_SuperFam GFF file", - "description": "SuperFam GFF file for Gp0127652", - "url": "https://data.microbiomedata.org/data/nmdc:mga0mfxf90/annotation/nmdc_mga0mfxf90_supfam.gff", - "md5_checksum": "d20aa781d3ad6b0face7cc9c412bc3f7", - "id": "nmdc:d20aa781d3ad6b0face7cc9c412bc3f7", - "file_size_bytes": 54593577 - }, - { - "name": "Gp0127652_Cath FunFam GFF file", - "description": "Cath FunFam GFF file for Gp0127652", - "url": "https://data.microbiomedata.org/data/nmdc:mga0mfxf90/annotation/nmdc_mga0mfxf90_cath_funfam.gff", - "md5_checksum": "db2e4b8f6cc1e8dc934e14b93589805a", - "id": "nmdc:db2e4b8f6cc1e8dc934e14b93589805a", - "file_size_bytes": 41409254 - }, - { - "name": "Gp0127652_KO_EC GFF file", - "description": "KO_EC GFF file for Gp0127652", - "url": "https://data.microbiomedata.org/data/nmdc:mga0mfxf90/annotation/nmdc_mga0mfxf90_ko_ec.gff", - "md5_checksum": "f51f9d679d1b045f4ebc61dab7fc2f08", - "id": "nmdc:f51f9d679d1b045f4ebc61dab7fc2f08", - "file_size_bytes": 26617726 - } - ] + "type": "nmdc:ReadbasedAnalysis", + "ended_at_time": "2021-10-11T04:45:21+00:00" + } + ] + }, + { + "functional_annotation_agg": [], + "library_preparation_set": [], + "processed_sample_set": [], + "extraction_set": [], + "activity_set": [], + "biosample_set": [], + "data_object_set": [ + { + "name": "Gp0127654_Filtered Reads", + "description": "Filtered Reads for Gp0127654", + "data_object_type": "Filtered Sequencing Reads", + "url": "https://data.microbiomedata.org/data/nmdc:mga0h0s362/qa/nmdc_mga0h0s362_filtered.fastq.gz", + "md5_checksum": "c4f29a07f3ce03ee2a2d11c90e8b43d6", + "id": "nmdc:c4f29a07f3ce03ee2a2d11c90e8b43d6", + "file_size_bytes": 2479437709 + }, + { + "name": "Gp0127654_Filtered Stats", + "description": "Filtered Stats for Gp0127654", + "data_object_type": "QC Statistics", + "url": "https://data.microbiomedata.org/data/nmdc:mga0h0s362/qa/nmdc_mga0h0s362_filterStats.txt", + "md5_checksum": "9c600ec3be94d876f00d22808f3e8a59", + "id": "nmdc:9c600ec3be94d876f00d22808f3e8a59", + "file_size_bytes": 284 + }, + { + "name": "Gp0127654_Gottcha2 TSV report", + "description": "Gottcha2 TSV report for Gp0127654", + "url": "https://data.microbiomedata.org/data/nmdc:mga0h0s362/ReadbasedAnalysis/nmdc_mga0h0s362_gottcha2_report.tsv", + "md5_checksum": "130ee7559789726a2cadccd3126dacad", + "id": "nmdc:130ee7559789726a2cadccd3126dacad", + "file_size_bytes": 3508 + }, + { + "name": "Gp0127654_Gottcha2 full TSV report", + "description": "Gottcha2 full TSV report for Gp0127654", + "url": "https://data.microbiomedata.org/data/nmdc:mga0h0s362/ReadbasedAnalysis/nmdc_mga0h0s362_gottcha2_report_full.tsv", + "md5_checksum": "c955eae73afbfe1ad4c4eb2eac51f3f3", + "id": "nmdc:c955eae73afbfe1ad4c4eb2eac51f3f3", + "file_size_bytes": 798264 + }, + { + "name": "Gp0127654_Gottcha2 Krona HTML report", + "description": "Gottcha2 Krona HTML report for Gp0127654", + "data_object_type": "GOTTCHA2 Krona Plot", + "url": "https://data.microbiomedata.org/data/nmdc:mga0h0s362/ReadbasedAnalysis/nmdc_mga0h0s362_gottcha2_krona.html", + "md5_checksum": "7ccb4ee5a0728322154b29a79d13c842", + "id": "nmdc:7ccb4ee5a0728322154b29a79d13c842", + "file_size_bytes": 234834 + }, + { + "name": "Gp0127654_Centrifuge classification TSV report", + "description": "Centrifuge classification TSV report for Gp0127654", + "data_object_type": "Centrifuge Taxonomic Classification", + "url": "https://data.microbiomedata.org/data/nmdc:mga0h0s362/ReadbasedAnalysis/nmdc_mga0h0s362_centrifuge_classification.tsv", + "md5_checksum": "8b88e19f3d4f22c8bb71f66e7aec6dba", + "id": "nmdc:8b88e19f3d4f22c8bb71f66e7aec6dba", + "file_size_bytes": 2231971137 + }, + { + "name": "Gp0127654_Centrifuge TSV report", + "description": "Centrifuge TSV report for Gp0127654", + "data_object_type": "Centrifuge Classification Report", + "url": "https://data.microbiomedata.org/data/nmdc:mga0h0s362/ReadbasedAnalysis/nmdc_mga0h0s362_centrifuge_report.tsv", + "md5_checksum": "35a0d72edac6c5e7f9c8ddf86c5534e0", + "id": "nmdc:35a0d72edac6c5e7f9c8ddf86c5534e0", + "file_size_bytes": 257151 + }, + { + "name": "Gp0127654_Centrifuge Krona HTML report", + "description": "Centrifuge Krona HTML report for Gp0127654", + "data_object_type": "Centrifuge Krona Plot", + "url": "https://data.microbiomedata.org/data/nmdc:mga0h0s362/ReadbasedAnalysis/nmdc_mga0h0s362_centrifuge_krona.html", + "md5_checksum": "f808a89810cdb2a911a5b5388b70ce94", + "id": "nmdc:f808a89810cdb2a911a5b5388b70ce94", + "file_size_bytes": 2341088 + }, + { + "name": "Gp0127654_Kraken classification TSV report", + "description": "Kraken classification TSV report for Gp0127654", + "data_object_type": "Kraken2 Taxonomic Classification", + "url": "https://data.microbiomedata.org/data/nmdc:mga0h0s362/ReadbasedAnalysis/nmdc_mga0h0s362_kraken2_classification.tsv", + "md5_checksum": "dfc90170aa038c2425702be223cb2f23", + "id": "nmdc:dfc90170aa038c2425702be223cb2f23", + "file_size_bytes": 1782429285 + }, + { + "name": "Gp0127654_Kraken2 TSV report", + "description": "Kraken2 TSV report for Gp0127654", + "data_object_type": "Kraken2 Classification Report", + "url": "https://data.microbiomedata.org/data/nmdc:mga0h0s362/ReadbasedAnalysis/nmdc_mga0h0s362_kraken2_report.tsv", + "md5_checksum": "84255d3bab9ea79151db5ad7bcbc677c", + "id": "nmdc:84255d3bab9ea79151db5ad7bcbc677c", + "file_size_bytes": 661482 + }, + { + "name": "Gp0127654_Kraken2 Krona HTML report", + "description": "Kraken2 Krona HTML report for Gp0127654", + "data_object_type": "Kraken2 Krona Plot", + "url": "https://data.microbiomedata.org/data/nmdc:mga0h0s362/ReadbasedAnalysis/nmdc_mga0h0s362_kraken2_krona.html", + "md5_checksum": "1c8339d96884c4a408de7804e00490d1", + "id": "nmdc:1c8339d96884c4a408de7804e00490d1", + "file_size_bytes": 4020719 + }, + { + "name": "Gp0127654_Gottcha2 TSV report", + "description": "Gottcha2 TSV report for Gp0127654", + "url": "https://data.microbiomedata.org/data/nmdc:mga0h0s362/ReadbasedAnalysis/nmdc_mga0h0s362_gottcha2_report.tsv", + "md5_checksum": "130ee7559789726a2cadccd3126dacad", + "id": "nmdc:130ee7559789726a2cadccd3126dacad", + "file_size_bytes": 3508 + }, + { + "name": "Gp0127654_Gottcha2 full TSV report", + "description": "Gottcha2 full TSV report for Gp0127654", + "url": "https://data.microbiomedata.org/data/nmdc:mga0h0s362/ReadbasedAnalysis/nmdc_mga0h0s362_gottcha2_report_full.tsv", + "md5_checksum": "c955eae73afbfe1ad4c4eb2eac51f3f3", + "id": "nmdc:c955eae73afbfe1ad4c4eb2eac51f3f3", + "file_size_bytes": 798264 + }, + { + "name": "Gp0127654_Gottcha2 Krona HTML report", + "description": "Gottcha2 Krona HTML report for Gp0127654", + "data_object_type": "GOTTCHA2 Krona Plot", + "url": "https://data.microbiomedata.org/data/nmdc:mga0h0s362/ReadbasedAnalysis/nmdc_mga0h0s362_gottcha2_krona.html", + "md5_checksum": "7ccb4ee5a0728322154b29a79d13c842", + "id": "nmdc:7ccb4ee5a0728322154b29a79d13c842", + "file_size_bytes": 234834 + }, + { + "name": "Gp0127654_Centrifuge classification TSV report", + "description": "Centrifuge classification TSV report for Gp0127654", + "data_object_type": "Centrifuge Taxonomic Classification", + "url": "https://data.microbiomedata.org/data/nmdc:mga0h0s362/ReadbasedAnalysis/nmdc_mga0h0s362_centrifuge_classification.tsv", + "md5_checksum": "8b88e19f3d4f22c8bb71f66e7aec6dba", + "id": "nmdc:8b88e19f3d4f22c8bb71f66e7aec6dba", + "file_size_bytes": 2231971137 + }, + { + "name": "Gp0127654_Centrifuge TSV report", + "description": "Centrifuge TSV report for Gp0127654", + "data_object_type": "Centrifuge Classification Report", + "url": "https://data.microbiomedata.org/data/nmdc:mga0h0s362/ReadbasedAnalysis/nmdc_mga0h0s362_centrifuge_report.tsv", + "md5_checksum": "35a0d72edac6c5e7f9c8ddf86c5534e0", + "id": "nmdc:35a0d72edac6c5e7f9c8ddf86c5534e0", + "file_size_bytes": 257151 + }, + { + "name": "Gp0127654_Centrifuge Krona HTML report", + "description": "Centrifuge Krona HTML report for Gp0127654", + "data_object_type": "Centrifuge Krona Plot", + "url": "https://data.microbiomedata.org/data/nmdc:mga0h0s362/ReadbasedAnalysis/nmdc_mga0h0s362_centrifuge_krona.html", + "md5_checksum": "f808a89810cdb2a911a5b5388b70ce94", + "id": "nmdc:f808a89810cdb2a911a5b5388b70ce94", + "file_size_bytes": 2341088 + }, + { + "name": "Gp0127654_Kraken classification TSV report", + "description": "Kraken classification TSV report for Gp0127654", + "data_object_type": "Kraken2 Taxonomic Classification", + "url": "https://data.microbiomedata.org/data/nmdc:mga0h0s362/ReadbasedAnalysis/nmdc_mga0h0s362_kraken2_classification.tsv", + "md5_checksum": "dfc90170aa038c2425702be223cb2f23", + "id": "nmdc:dfc90170aa038c2425702be223cb2f23", + "file_size_bytes": 1782429285 + }, + { + "name": "Gp0127654_Kraken2 TSV report", + "description": "Kraken2 TSV report for Gp0127654", + "data_object_type": "Kraken2 Classification Report", + "url": "https://data.microbiomedata.org/data/nmdc:mga0h0s362/ReadbasedAnalysis/nmdc_mga0h0s362_kraken2_report.tsv", + "md5_checksum": "84255d3bab9ea79151db5ad7bcbc677c", + "id": "nmdc:84255d3bab9ea79151db5ad7bcbc677c", + "file_size_bytes": 661482 + }, + { + "name": "Gp0127654_Kraken2 Krona HTML report", + "description": "Kraken2 Krona HTML report for Gp0127654", + "data_object_type": "Kraken2 Krona Plot", + "url": "https://data.microbiomedata.org/data/nmdc:mga0h0s362/ReadbasedAnalysis/nmdc_mga0h0s362_kraken2_krona.html", + "md5_checksum": "1c8339d96884c4a408de7804e00490d1", + "id": "nmdc:1c8339d96884c4a408de7804e00490d1", + "file_size_bytes": 4020719 + }, + { + "name": "Gp0127654_Assembled contigs fasta", + "description": "Assembled contigs fasta for Gp0127654", + "data_object_type": "Assembly Contigs", + "url": "https://data.microbiomedata.org/data/nmdc:mga0h0s362/assembly/nmdc_mga0h0s362_contigs.fna", + "md5_checksum": "909ae2a351ab1b99dfa877969ba33fc0", + "id": "nmdc:909ae2a351ab1b99dfa877969ba33fc0", + "file_size_bytes": 93264957 + }, + { + "name": "Gp0127654_Assembled scaffold fasta", + "description": "Assembled scaffold fasta for Gp0127654", + "data_object_type": "Assembly Scaffolds", + "url": "https://data.microbiomedata.org/data/nmdc:mga0h0s362/assembly/nmdc_mga0h0s362_scaffolds.fna", + "md5_checksum": "1bd3a82d1ced0a3a4e4b207ecdeedc50", + "id": "nmdc:1bd3a82d1ced0a3a4e4b207ecdeedc50", + "file_size_bytes": 92670816 + }, + { + "name": "Gp0127654_Metagenome Contig Coverage Stats", + "description": "Metagenome Contig Coverage Stats for Gp0127654", + "url": "https://data.microbiomedata.org/data/nmdc:mga0h0s362/assembly/nmdc_mga0h0s362_covstats.txt", + "md5_checksum": "e2281ea2c0342c7243ac6a3179948547", + "id": "nmdc:e2281ea2c0342c7243ac6a3179948547", + "file_size_bytes": 15633835 + }, + { + "name": "Gp0127654_Assembled AGP file", + "description": "Assembled AGP file for Gp0127654", + "data_object_type": "Assembly AGP", + "url": "https://data.microbiomedata.org/data/nmdc:mga0h0s362/assembly/nmdc_mga0h0s362_assembly.agp", + "md5_checksum": "ad045e491d27a8a2a4bb13c62ed74fd8", + "id": "nmdc:ad045e491d27a8a2a4bb13c62ed74fd8", + "file_size_bytes": 14624353 + }, + { + "name": "Gp0127654_Metagenome Alignment BAM file", + "description": "Metagenome Alignment BAM file for Gp0127654", + "data_object_type": "Assembly Coverage BAM", + "url": "https://data.microbiomedata.org/data/nmdc:mga0h0s362/assembly/nmdc_mga0h0s362_pairedMapped_sorted.bam", + "md5_checksum": "d8e09db1617046117fbb15631cf4977f", + "id": "nmdc:d8e09db1617046117fbb15631cf4977f", + "file_size_bytes": 2687176632 + }, + { + "name": "Gp0127654_Protein FAA", + "description": "Protein FAA for Gp0127654", + "data_object_type": "Annotation Amino Acid FASTA", + "url": "https://data.microbiomedata.org/data/nmdc:mga0h0s362/annotation/nmdc_mga0h0s362_proteins.faa", + "md5_checksum": "7e7c871dbe9ed0b2692444b77d0afe8d", + "id": "nmdc:7e7c871dbe9ed0b2692444b77d0afe8d", + "file_size_bytes": 55142968 + }, + { + "name": "Gp0127654_Structural annotation GFF file", + "description": "Structural annotation GFF file for Gp0127654", + "data_object_type": "Structural Annotation GFF", + "url": "https://data.microbiomedata.org/data/nmdc:mga0h0s362/annotation/nmdc_mga0h0s362_structural_annotation.gff", + "md5_checksum": "7b466cbbadfde9b125f2a31e48d8c60d", + "id": "nmdc:7b466cbbadfde9b125f2a31e48d8c60d", + "file_size_bytes": 2518 }, + { + "name": "Gp0127654_Functional annotation GFF file", + "description": "Functional annotation GFF file for Gp0127654", + "data_object_type": "Functional Annotation GFF", + "url": "https://data.microbiomedata.org/data/nmdc:mga0h0s362/annotation/nmdc_mga0h0s362_functional_annotation.gff", + "md5_checksum": "6a03c0a78fa59ac0a55777a9ea73e5d0", + "id": "nmdc:6a03c0a78fa59ac0a55777a9ea73e5d0", + "file_size_bytes": 64337475 + }, + { + "name": "Gp0127654_KO TSV file", + "description": "KO TSV file for Gp0127654", + "data_object_type": "Annotation KEGG Orthology", + "url": "https://data.microbiomedata.org/data/nmdc:mga0h0s362/annotation/nmdc_mga0h0s362_ko.tsv", + "md5_checksum": "2275c42fa5206d646c7b477b184b9519", + "id": "nmdc:2275c42fa5206d646c7b477b184b9519", + "file_size_bytes": 7628926 + }, + { + "name": "Gp0127654_EC TSV file", + "description": "EC TSV file for Gp0127654", + "data_object_type": "Annotation Enzyme Commission", + "url": "https://data.microbiomedata.org/data/nmdc:mga0h0s362/annotation/nmdc_mga0h0s362_ec.tsv", + "md5_checksum": "9c7fc55c2cbc986d520695dfb69b3e26", + "id": "nmdc:9c7fc55c2cbc986d520695dfb69b3e26", + "file_size_bytes": 5084393 + }, + { + "name": "Gp0127654_COG GFF file", + "description": "COG GFF file for Gp0127654", + "url": "https://data.microbiomedata.org/data/nmdc:mga0h0s362/annotation/nmdc_mga0h0s362_cog.gff", + "md5_checksum": "fabdc762526357e8a6f288a07f947f06", + "id": "nmdc:fabdc762526357e8a6f288a07f947f06", + "file_size_bytes": 37680499 + }, + { + "name": "Gp0127654_PFAM GFF file", + "description": "PFAM GFF file for Gp0127654", + "url": "https://data.microbiomedata.org/data/nmdc:mga0h0s362/annotation/nmdc_mga0h0s362_pfam.gff", + "md5_checksum": "1e8dcb98dfc7598e3965af187c296f12", + "id": "nmdc:1e8dcb98dfc7598e3965af187c296f12", + "file_size_bytes": 27765282 + }, + { + "name": "Gp0127654_TigrFam GFF file", + "description": "TigrFam GFF file for Gp0127654", + "url": "https://data.microbiomedata.org/data/nmdc:mga0h0s362/annotation/nmdc_mga0h0s362_tigrfam.gff", + "md5_checksum": "86f1a8ccf1532e11fc09d94dc39af57c", + "id": "nmdc:86f1a8ccf1532e11fc09d94dc39af57c", + "file_size_bytes": 2970208 + }, + { + "name": "Gp0127654_SMART GFF file", + "description": "SMART GFF file for Gp0127654", + "url": "https://data.microbiomedata.org/data/nmdc:mga0h0s362/annotation/nmdc_mga0h0s362_smart.gff", + "md5_checksum": "8add80a0fe95822917e4e7eaf275ed4f", + "id": "nmdc:8add80a0fe95822917e4e7eaf275ed4f", + "file_size_bytes": 8172309 + }, + { + "name": "Gp0127654_SuperFam GFF file", + "description": "SuperFam GFF file for Gp0127654", + "url": "https://data.microbiomedata.org/data/nmdc:mga0h0s362/annotation/nmdc_mga0h0s362_supfam.gff", + "md5_checksum": "6268ff527b56548792e7dca811500436", + "id": "nmdc:6268ff527b56548792e7dca811500436", + "file_size_bytes": 46611499 + }, + { + "name": "Gp0127654_Cath FunFam GFF file", + "description": "Cath FunFam GFF file for Gp0127654", + "url": "https://data.microbiomedata.org/data/nmdc:mga0h0s362/annotation/nmdc_mga0h0s362_cath_funfam.gff", + "md5_checksum": "ff7ac6fb709d1f0f7b476c9a5b29524e", + "id": "nmdc:ff7ac6fb709d1f0f7b476c9a5b29524e", + "file_size_bytes": 35108681 + }, + { + "name": "Gp0127654_KO_EC GFF file", + "description": "KO_EC GFF file for Gp0127654", + "url": "https://data.microbiomedata.org/data/nmdc:mga0h0s362/annotation/nmdc_mga0h0s362_ko_ec.gff", + "md5_checksum": "6c50fdd87bdba9116c1ff81e21b8a95c", + "id": "nmdc:6c50fdd87bdba9116c1ff81e21b8a95c", + "file_size_bytes": 24261565 + }, + { + "name": "gold:Gp0452679_metabat2 bin checkm quality assessment result", + "description": "metabat2 bin checkm quality assessment result for gold:Gp0452679", + "data_object_type": "CheckM Statistics", + "url": "https://data.microbiomedata.org/data/nmdc:mga0fsjy84/MAGs/nmdc_mga0fsjy84_checkm_qa.out", + "md5_checksum": "d41d8cd98f00b204e9800998ecf8427e", + "id": "nmdc:d41d8cd98f00b204e9800998ecf8427e", + "file_size_bytes": 0 + }, + { + "name": "Gp0127654_tooShort (< 3kb) filtered contigs fasta file by metaBat2", + "description": "tooShort (< 3kb) filtered contigs fasta file by metaBat2 for Gp0127654", + "url": "https://data.microbiomedata.org/data/nmdc:mga0h0s362/MAGs/nmdc_mga0h0s362_bins.tooShort.fa", + "md5_checksum": "920bcae91eae59ed8b9b19bcb7392ac5", + "id": "nmdc:920bcae91eae59ed8b9b19bcb7392ac5", + "file_size_bytes": 80638518 + }, + { + "name": "Gp0127654_unbinned fasta file from metabat2", + "description": "unbinned fasta file from metabat2 for Gp0127654", + "url": "https://data.microbiomedata.org/data/nmdc:mga0h0s362/MAGs/nmdc_mga0h0s362_bins.unbinned.fa", + "md5_checksum": "d13bc24bdf72e7ba00d60f0e2e0805e8", + "id": "nmdc:d13bc24bdf72e7ba00d60f0e2e0805e8", + "file_size_bytes": 12400628 + }, + { + "name": "Gp0127654_metabat2 bin checkm quality assessment result", + "description": "metabat2 bin checkm quality assessment result for Gp0127654", + "data_object_type": "CheckM Statistics", + "url": "https://data.microbiomedata.org/data/nmdc:mga0h0s362/MAGs/nmdc_mga0h0s362_checkm_qa.out", + "md5_checksum": "3fd777151ef41b39b272cb42c1d5e8ba", + "id": "nmdc:3fd777151ef41b39b272cb42c1d5e8ba", + "file_size_bytes": 785 + }, + { + "name": "Gp0127654_high-quality and medium-quality bins", + "description": "high-quality and medium-quality bins for Gp0127654", + "data_object_type": "Metagenome Bins", + "url": "https://data.microbiomedata.org/data/nmdc:mga0h0s362/MAGs/nmdc_mga0h0s362_hqmq_bin.zip", + "md5_checksum": "470edf3d79702d3b806b545db595ca02", + "id": "nmdc:470edf3d79702d3b806b545db595ca02", + "file_size_bytes": 182 + }, + { + "name": "Gp0127654_metabat2 bins", + "description": "metabat2 bins for Gp0127654", + "url": "https://data.microbiomedata.org/data/nmdc:mga0h0s362/MAGs/nmdc_mga0h0s362_metabat_bin.zip", + "md5_checksum": "8fc6f1a0269aa5179d72c52cf1a9726e", + "id": "nmdc:8fc6f1a0269aa5179d72c52cf1a9726e", + "file_size_bytes": 69938 + } + ], + "dissolving_activity_set": [], + "functional_annotation_set": [], + "genome_feature_set": [], + "mags_activity_set": [ { "_id": { - "$oid": "649b0052ec087f6bbab34727" + "$oid": "649b0052ec087f6bbab34728" }, "has_input": [ - "nmdc:a550eb6e614b375c1089ab816163ea63", - "nmdc:c8c5056ee57126695073137d0c1d3d04", - "nmdc:863dc502676573c59ce69b1ff786042a" + "nmdc:909ae2a351ab1b99dfa877969ba33fc0", + "nmdc:d8e09db1617046117fbb15631cf4977f", + "nmdc:6a03c0a78fa59ac0a55777a9ea73e5d0" ], - "too_short_contig_num": 200309, + "too_short_contig_num": 189586, "part_of": [ - "nmdc:mga0mfxf90" + "nmdc:mga0h0s362" ], - "binned_contig_num": 835, + "binned_contig_num": 56, "git_url": "https://github.com/microbiomedata/metaG/releases/tag/0.1", "has_output": [ "nmdc:d41d8cd98f00b204e9800998ecf8427e", - "nmdc:4371932b5834f2deadb2fbfc42b056f7", - "nmdc:5a8d8441e6e472837809ee31d517d32a", - "nmdc:16016a7b2388048eec469f73395bc478", - "nmdc:1e604f9f29f74c6169c4d27f839bb7b0", - "nmdc:21467369d04671628ae67afbaf1d2076" + "nmdc:920bcae91eae59ed8b9b19bcb7392ac5", + "nmdc:d13bc24bdf72e7ba00d60f0e2e0805e8", + "nmdc:3fd777151ef41b39b272cb42c1d5e8ba", + "nmdc:470edf3d79702d3b806b545db595ca02", + "nmdc:8fc6f1a0269aa5179d72c52cf1a9726e" ], - "was_informed_by": "gold:Gp0127652", - "input_contig_num": 216252, - "id": "nmdc:c86126b11f214f19721c56fadf91d87c", + "was_informed_by": "gold:Gp0127654", + "input_contig_num": 197669, + "id": "nmdc:168441535388b19bbdee0928b42e5b20", "execution_resource": "NERSC-Cori", - "name": "MAGs Analysis Activity for nmdc:mga0mfxf90", - "mags_list": [ - { - "number_of_contig": 233, - "completeness": 12.16, - "bin_name": "bins.1", - "gene_count": 1133, - "bin_quality": "LQ", - "gtdbtk_species": "", - "gtdbtk_order": "", - "num_16s": 0, - "gtdbtk_family": "", - "gtdbtk_domain": "", - "contamination": 0.0, - "gtdbtk_class": "", - "gtdbtk_phylum": "", - "num_5s": 0, - "num_23s": 0, - "gtdbtk_genus": "", - "num_t_rna": 10 - }, - { - "number_of_contig": 349, - "completeness": 45.68, - "bin_name": "bins.2", - "gene_count": 1809, - "bin_quality": "LQ", - "gtdbtk_species": "", - "gtdbtk_order": "", - "num_16s": 0, - "gtdbtk_family": "", - "gtdbtk_domain": "", - "contamination": 10.0, - "gtdbtk_class": "", - "gtdbtk_phylum": "", - "num_5s": 1, - "num_23s": 1, - "gtdbtk_genus": "", - "num_t_rna": 16 - }, - { - "number_of_contig": 106, - "completeness": 17.54, - "bin_name": "bins.3", - "gene_count": 552, - "bin_quality": "LQ", - "gtdbtk_species": "", - "gtdbtk_order": "", - "num_16s": 0, - "gtdbtk_family": "", - "gtdbtk_domain": "", - "contamination": 0.0, - "gtdbtk_class": "", - "gtdbtk_phylum": "", - "num_5s": 0, - "num_23s": 0, - "gtdbtk_genus": "", - "num_t_rna": 12 - }, - { - "number_of_contig": 147, - "completeness": 14.66, - "bin_name": "bins.4", - "gene_count": 668, + "name": "MAGs Analysis Activity for nmdc:mga0h0s362", + "mags_list": [ + { + "number_of_contig": 56, + "completeness": 18.09, + "bin_name": "bins.1", + "gene_count": 272, "bin_quality": "LQ", "gtdbtk_species": "", "gtdbtk_order": "", @@ -30154,97 +31863,142 @@ "num_t_rna": 5 } ], - "unbinned_contig_num": 15108, - "started_at_time": "2021-10-11T02:27:08Z", + "unbinned_contig_num": 8027, + "started_at_time": "2021-10-11T02:23:29Z", "type": "nmdc:MAGsAnalysisActivity", - "ended_at_time": "2021-10-11T04:45:21+00:00", - "output_data_objects": [ - { - "name": "gold:Gp0452679_metabat2 bin checkm quality assessment result", - "description": "metabat2 bin checkm quality assessment result for gold:Gp0452679", - "data_object_type": "CheckM Statistics", - "url": "https://data.microbiomedata.org/data/nmdc:mga0fsjy84/MAGs/nmdc_mga0fsjy84_checkm_qa.out", - "md5_checksum": "d41d8cd98f00b204e9800998ecf8427e", - "id": "nmdc:d41d8cd98f00b204e9800998ecf8427e", - "file_size_bytes": 0 - }, - { - "name": "Gp0127652_tooShort (< 3kb) filtered contigs fasta file by metaBat2", - "description": "tooShort (< 3kb) filtered contigs fasta file by metaBat2 for Gp0127652", - "url": "https://data.microbiomedata.org/data/nmdc:mga0mfxf90/MAGs/nmdc_mga0mfxf90_bins.tooShort.fa", - "md5_checksum": "4371932b5834f2deadb2fbfc42b056f7", - "id": "nmdc:4371932b5834f2deadb2fbfc42b056f7", - "file_size_bytes": 89154072 - }, - { - "name": "Gp0127652_unbinned fasta file from metabat2", - "description": "unbinned fasta file from metabat2 for Gp0127652", - "url": "https://data.microbiomedata.org/data/nmdc:mga0mfxf90/MAGs/nmdc_mga0mfxf90_bins.unbinned.fa", - "md5_checksum": "5a8d8441e6e472837809ee31d517d32a", - "id": "nmdc:5a8d8441e6e472837809ee31d517d32a", - "file_size_bytes": 24514353 - }, - { - "name": "Gp0127652_metabat2 bin checkm quality assessment result", - "description": "metabat2 bin checkm quality assessment result for Gp0127652", - "data_object_type": "CheckM Statistics", - "url": "https://data.microbiomedata.org/data/nmdc:mga0mfxf90/MAGs/nmdc_mga0mfxf90_checkm_qa.out", - "md5_checksum": "16016a7b2388048eec469f73395bc478", - "id": "nmdc:16016a7b2388048eec469f73395bc478", - "file_size_bytes": 1320 - }, - { - "name": "Gp0127652_high-quality and medium-quality bins", - "description": "high-quality and medium-quality bins for Gp0127652", - "data_object_type": "Metagenome Bins", - "url": "https://data.microbiomedata.org/data/nmdc:mga0mfxf90/MAGs/nmdc_mga0mfxf90_hqmq_bin.zip", - "md5_checksum": "1e604f9f29f74c6169c4d27f839bb7b0", - "id": "nmdc:1e604f9f29f74c6169c4d27f839bb7b0", - "file_size_bytes": 182 - }, - { - "name": "Gp0127652_metabat2 bins", - "description": "metabat2 bins for Gp0127652", - "url": "https://data.microbiomedata.org/data/nmdc:mga0mfxf90/MAGs/nmdc_mga0mfxf90_metabat_bin.zip", - "md5_checksum": "21467369d04671628ae67afbaf1d2076", - "id": "nmdc:21467369d04671628ae67afbaf1d2076", - "file_size_bytes": 1013750 - } + "ended_at_time": "2021-10-11T03:58:56+00:00" + } + ], + "material_sample_set": [], + "material_sampling_activity_set": [], + "metabolomics_analysis_activity_set": [], + "metagenome_annotation_activity_set": [ + { + "_id": { + "$oid": "649b005bbf2caae0415ef9c7" + }, + "has_input": [ + "nmdc:909ae2a351ab1b99dfa877969ba33fc0" + ], + "part_of": [ + "nmdc:mga0h0s362" + ], + "git_url": "https://github.com/microbiomedata/metaG/releases/tag/0.1", + "has_output": [ + "nmdc:7e7c871dbe9ed0b2692444b77d0afe8d", + "nmdc:7b466cbbadfde9b125f2a31e48d8c60d", + "nmdc:6a03c0a78fa59ac0a55777a9ea73e5d0", + "nmdc:2275c42fa5206d646c7b477b184b9519", + "nmdc:9c7fc55c2cbc986d520695dfb69b3e26", + "nmdc:fabdc762526357e8a6f288a07f947f06", + "nmdc:1e8dcb98dfc7598e3965af187c296f12", + "nmdc:86f1a8ccf1532e11fc09d94dc39af57c", + "nmdc:8add80a0fe95822917e4e7eaf275ed4f", + "nmdc:6268ff527b56548792e7dca811500436", + "nmdc:ff7ac6fb709d1f0f7b476c9a5b29524e", + "nmdc:6c50fdd87bdba9116c1ff81e21b8a95c" + ], + "was_informed_by": "gold:Gp0127654", + "id": "nmdc:168441535388b19bbdee0928b42e5b20", + "execution_resource": "NERSC-Cori", + "name": "Annotation Activity for nmdc:mga0h0s362", + "started_at_time": "2021-10-11T02:23:29Z", + "type": "nmdc:MetagenomeAnnotationActivity", + "ended_at_time": "2021-10-11T03:58:56+00:00" + } + ], + "metagenome_assembly_set": [ + { + "_id": { + "$oid": "649b005f2ca5ee4adb139fb4" + }, + "has_input": [ + "nmdc:c4f29a07f3ce03ee2a2d11c90e8b43d6" + ], + "part_of": [ + "nmdc:mga0h0s362" + ], + "ctg_logsum": 130142, + "scaf_logsum": 130537, + "gap_pct": 0.0008, + "git_url": "https://github.com/microbiomedata/metaG/releases/tag/0.1", + "has_output": [ + "nmdc:909ae2a351ab1b99dfa877969ba33fc0", + "nmdc:1bd3a82d1ced0a3a4e4b207ecdeedc50", + "nmdc:e2281ea2c0342c7243ac6a3179948547", + "nmdc:ad045e491d27a8a2a4bb13c62ed74fd8", + "nmdc:d8e09db1617046117fbb15631cf4977f" + ], + "asm_score": 4.409, + "was_informed_by": "gold:Gp0127654", + "ctg_powsum": 13918, + "scaf_max": 69027, + "id": "nmdc:168441535388b19bbdee0928b42e5b20", + "scaf_powsum": 13961, + "execution_resource": "NERSC-Cori", + "contigs": 197669, + "name": "Assembly Activity for nmdc:mga0h0s362", + "ctg_max": 69027, + "gc_std": 0.09749, + "contig_bp": 85731750, + "gc_avg": 0.62891, + "started_at_time": "2021-10-11T02:23:29Z", + "scaf_bp": 85732440, + "type": "nmdc:MetagenomeAssembly", + "scaffolds": 197600, + "ended_at_time": "2021-10-11T03:58:56+00:00", + "ctg_l50": 404, + "ctg_l90": 286, + "ctg_n50": 62467, + "ctg_n90": 168661, + "scaf_l50": 404, + "scaf_l90": 286, + "scaf_n50": 62435, + "scaf_n90": 168596, + "scaf_l_gt50k": 69027, + "scaf_n_gt50k": 1, + "scaf_pct_gt50k": 0.080514446 + } + ], + "metagenome_sequencing_activity_set": [], + "metaproteomics_analysis_activity_set": [], + "metatranscriptome_activity_set": [], + "nom_analysis_activity_set": [], + "omics_processing_set": [ + { + "_id": { + "$oid": "649b009773e8249959349b60" + }, + "id": "nmdc:omprc-11-kgxpef29", + "name": "Riverbed sediment microbial communities from areas with vegetation nearby in Columbia River, Washington, USA - GW-RW S2_40_50", + "description": "Riverbed sediment samples were collected from an area with a lot of surrounding vegetation in a hyporheic zone (subsurface groundwater - surface water mixing zone)", + "has_input": [ + "nmdc:bsm-11-tpk9x619" + ], + "has_output": [ + "jgi:574fe0b17ded5e3df1ee1494" + ], + "part_of": [ + "nmdc:sty-11-aygzgv51" + ], + "add_date": "2016-01-11", + "mod_date": "2021-06-15", + "ncbi_project_name": "Riverbed sediment microbial communities from areas with vegetation nearby in Columbia River, Washington, USA - GW-RW S2_40_50", + "omics_type": { + "has_raw_value": "Metagenome" + }, + "principal_investigator": { + "has_raw_value": "James Stegen" + }, + "processing_institution": "JGI", + "type": "nmdc:OmicsProcessing", + "gold_sequencing_project_identifiers": [ + "gold:Gp0127654" ] } - ] - }, - { - "_id": { - "$oid": "649b009773e8249959349b60" - }, - "id": "nmdc:omprc-11-kgxpef29", - "name": "Riverbed sediment microbial communities from areas with vegetation nearby in Columbia River, Washington, USA - GW-RW S2_40_50", - "description": "Riverbed sediment samples were collected from an area with a lot of surrounding vegetation in a hyporheic zone (subsurface groundwater - surface water mixing zone)", - "has_input": [ - "nmdc:bsm-11-tpk9x619" - ], - "has_output": [ - "jgi:574fe0b17ded5e3df1ee1494" - ], - "part_of": [ - "nmdc:sty-11-aygzgv51" - ], - "add_date": "2016-01-11", - "mod_date": "2021-06-15", - "ncbi_project_name": "Riverbed sediment microbial communities from areas with vegetation nearby in Columbia River, Washington, USA - GW-RW S2_40_50", - "omics_type": { - "has_raw_value": "Metagenome" - }, - "principal_investigator": { - "has_raw_value": "James Stegen" - }, - "processing_institution": "JGI", - "type": "nmdc:OmicsProcessing", - "gold_sequencing_project_identifiers": [ - "gold:Gp0127654" - ], - "downstream_workflow_activity_records": [ + ], + "reaction_activity_set": [], + "read_qc_analysis_activity_set": [ { "_id": { "$oid": "649b009d6bdd4fd20273c87f" @@ -30270,28 +32024,10 @@ "output_read_count": 30289044, "started_at_time": "2021-10-11T02:23:29Z", "type": "nmdc:ReadQCAnalysisActivity", - "ended_at_time": "2021-10-11T03:58:56+00:00", - "output_data_objects": [ - { - "name": "Gp0127654_Filtered Reads", - "description": "Filtered Reads for Gp0127654", - "data_object_type": "Filtered Sequencing Reads", - "url": "https://data.microbiomedata.org/data/nmdc:mga0h0s362/qa/nmdc_mga0h0s362_filtered.fastq.gz", - "md5_checksum": "c4f29a07f3ce03ee2a2d11c90e8b43d6", - "id": "nmdc:c4f29a07f3ce03ee2a2d11c90e8b43d6", - "file_size_bytes": 2479437709 - }, - { - "name": "Gp0127654_Filtered Stats", - "description": "Filtered Stats for Gp0127654", - "data_object_type": "QC Statistics", - "url": "https://data.microbiomedata.org/data/nmdc:mga0h0s362/qa/nmdc_mga0h0s362_filterStats.txt", - "md5_checksum": "9c600ec3be94d876f00d22808f3e8a59", - "id": "nmdc:9c600ec3be94d876f00d22808f3e8a59", - "file_size_bytes": 284 - } - ] - }, + "ended_at_time": "2021-10-11T03:58:56+00:00" + } + ], + "read_based_taxonomy_analysis_activity_set": [ { "_id": { "$oid": "649b009bff710ae353f8cf45" @@ -30317,573 +32053,615 @@ "name": "ReadBased Analysis Activity for nmdc:mga0h0s362", "started_at_time": "2021-10-11T02:23:29Z", "type": "nmdc:ReadbasedAnalysis", - "ended_at_time": "2021-10-11T03:58:56+00:00", - "output_data_objects": [ - { - "name": "Gp0127654_Gottcha2 TSV report", - "description": "Gottcha2 TSV report for Gp0127654", - "url": "https://data.microbiomedata.org/data/nmdc:mga0h0s362/ReadbasedAnalysis/nmdc_mga0h0s362_gottcha2_report.tsv", - "md5_checksum": "130ee7559789726a2cadccd3126dacad", - "id": "nmdc:130ee7559789726a2cadccd3126dacad", - "file_size_bytes": 3508 - }, - { - "name": "Gp0127654_Gottcha2 full TSV report", - "description": "Gottcha2 full TSV report for Gp0127654", - "url": "https://data.microbiomedata.org/data/nmdc:mga0h0s362/ReadbasedAnalysis/nmdc_mga0h0s362_gottcha2_report_full.tsv", - "md5_checksum": "c955eae73afbfe1ad4c4eb2eac51f3f3", - "id": "nmdc:c955eae73afbfe1ad4c4eb2eac51f3f3", - "file_size_bytes": 798264 - }, - { - "name": "Gp0127654_Gottcha2 Krona HTML report", - "description": "Gottcha2 Krona HTML report for Gp0127654", - "data_object_type": "GOTTCHA2 Krona Plot", - "url": "https://data.microbiomedata.org/data/nmdc:mga0h0s362/ReadbasedAnalysis/nmdc_mga0h0s362_gottcha2_krona.html", - "md5_checksum": "7ccb4ee5a0728322154b29a79d13c842", - "id": "nmdc:7ccb4ee5a0728322154b29a79d13c842", - "file_size_bytes": 234834 - }, - { - "name": "Gp0127654_Centrifuge classification TSV report", - "description": "Centrifuge classification TSV report for Gp0127654", - "data_object_type": "Centrifuge Taxonomic Classification", - "url": "https://data.microbiomedata.org/data/nmdc:mga0h0s362/ReadbasedAnalysis/nmdc_mga0h0s362_centrifuge_classification.tsv", - "md5_checksum": "8b88e19f3d4f22c8bb71f66e7aec6dba", - "id": "nmdc:8b88e19f3d4f22c8bb71f66e7aec6dba", - "file_size_bytes": 2231971137 - }, - { - "name": "Gp0127654_Centrifuge TSV report", - "description": "Centrifuge TSV report for Gp0127654", - "data_object_type": "Centrifuge Classification Report", - "url": "https://data.microbiomedata.org/data/nmdc:mga0h0s362/ReadbasedAnalysis/nmdc_mga0h0s362_centrifuge_report.tsv", - "md5_checksum": "35a0d72edac6c5e7f9c8ddf86c5534e0", - "id": "nmdc:35a0d72edac6c5e7f9c8ddf86c5534e0", - "file_size_bytes": 257151 - }, - { - "name": "Gp0127654_Centrifuge Krona HTML report", - "description": "Centrifuge Krona HTML report for Gp0127654", - "data_object_type": "Centrifuge Krona Plot", - "url": "https://data.microbiomedata.org/data/nmdc:mga0h0s362/ReadbasedAnalysis/nmdc_mga0h0s362_centrifuge_krona.html", - "md5_checksum": "f808a89810cdb2a911a5b5388b70ce94", - "id": "nmdc:f808a89810cdb2a911a5b5388b70ce94", - "file_size_bytes": 2341088 - }, - { - "name": "Gp0127654_Kraken classification TSV report", - "description": "Kraken classification TSV report for Gp0127654", - "data_object_type": "Kraken2 Taxonomic Classification", - "url": "https://data.microbiomedata.org/data/nmdc:mga0h0s362/ReadbasedAnalysis/nmdc_mga0h0s362_kraken2_classification.tsv", - "md5_checksum": "dfc90170aa038c2425702be223cb2f23", - "id": "nmdc:dfc90170aa038c2425702be223cb2f23", - "file_size_bytes": 1782429285 - }, - { - "name": "Gp0127654_Kraken2 TSV report", - "description": "Kraken2 TSV report for Gp0127654", - "data_object_type": "Kraken2 Classification Report", - "url": "https://data.microbiomedata.org/data/nmdc:mga0h0s362/ReadbasedAnalysis/nmdc_mga0h0s362_kraken2_report.tsv", - "md5_checksum": "84255d3bab9ea79151db5ad7bcbc677c", - "id": "nmdc:84255d3bab9ea79151db5ad7bcbc677c", - "file_size_bytes": 661482 - }, - { - "name": "Gp0127654_Kraken2 Krona HTML report", - "description": "Kraken2 Krona HTML report for Gp0127654", - "data_object_type": "Kraken2 Krona Plot", - "url": "https://data.microbiomedata.org/data/nmdc:mga0h0s362/ReadbasedAnalysis/nmdc_mga0h0s362_kraken2_krona.html", - "md5_checksum": "1c8339d96884c4a408de7804e00490d1", - "id": "nmdc:1c8339d96884c4a408de7804e00490d1", - "file_size_bytes": 4020719 - } - ] + "ended_at_time": "2021-10-11T03:58:56+00:00" + } + ], + "study_set": [], + "field_research_site_set": [], + "collecting_biosamples_from_site_set": [], + "date_created": null, + "etl_software_version": null, + "pooling_set": [], + "read_based_analysis_activity_set": [ + { + "_id": { + "$oid": "61e719f6833bcf838a7017f0" + }, + "has_input": [ + "nmdc:c4f29a07f3ce03ee2a2d11c90e8b43d6" + ], + "part_of": [ + "nmdc:mga0h0s362" + ], + "git_url": "https://github.com/microbiomedata/metaG/releases/tag/0.1", + "has_output": [ + "nmdc:130ee7559789726a2cadccd3126dacad", + "nmdc:c955eae73afbfe1ad4c4eb2eac51f3f3", + "nmdc:7ccb4ee5a0728322154b29a79d13c842", + "nmdc:8b88e19f3d4f22c8bb71f66e7aec6dba", + "nmdc:35a0d72edac6c5e7f9c8ddf86c5534e0", + "nmdc:f808a89810cdb2a911a5b5388b70ce94", + "nmdc:dfc90170aa038c2425702be223cb2f23", + "nmdc:84255d3bab9ea79151db5ad7bcbc677c", + "nmdc:1c8339d96884c4a408de7804e00490d1" + ], + "was_informed_by": "gold:Gp0127654", + "id": "nmdc:168441535388b19bbdee0928b42e5b20", + "execution_resource": "NERSC-Cori", + "name": "ReadBased Analysis Activity for nmdc:mga0h0s362", + "started_at_time": "2021-10-11T02:23:29Z", + "type": "nmdc:ReadbasedAnalysis", + "ended_at_time": "2021-10-11T03:58:56+00:00" + } + ] + }, + { + "functional_annotation_agg": [], + "library_preparation_set": [], + "processed_sample_set": [], + "extraction_set": [], + "activity_set": [], + "biosample_set": [], + "data_object_set": [ + { + "name": "Gp0127656_Filtered Reads", + "description": "Filtered Reads for Gp0127656", + "data_object_type": "Filtered Sequencing Reads", + "url": "https://data.microbiomedata.org/data/nmdc:mga00hh562/qa/nmdc_mga00hh562_filtered.fastq.gz", + "md5_checksum": "cec95659bb04ae095f51821ddaa9fa59", + "id": "nmdc:cec95659bb04ae095f51821ddaa9fa59", + "file_size_bytes": 2195848744 + }, + { + "name": "Gp0127656_Filtered Stats", + "description": "Filtered Stats for Gp0127656", + "data_object_type": "QC Statistics", + "url": "https://data.microbiomedata.org/data/nmdc:mga00hh562/qa/nmdc_mga00hh562_filterStats.txt", + "md5_checksum": "7b4f365bbe942a523890abf13d1b6436", + "id": "nmdc:7b4f365bbe942a523890abf13d1b6436", + "file_size_bytes": 284 + }, + { + "name": "Gp0127656_Gottcha2 TSV report", + "description": "Gottcha2 TSV report for Gp0127656", + "url": "https://data.microbiomedata.org/data/nmdc:mga00hh562/ReadbasedAnalysis/nmdc_mga00hh562_gottcha2_report.tsv", + "md5_checksum": "ccbe419157d8286626330fd0eb0dd0e0", + "id": "nmdc:ccbe419157d8286626330fd0eb0dd0e0", + "file_size_bytes": 2418 + }, + { + "name": "Gp0127656_Gottcha2 full TSV report", + "description": "Gottcha2 full TSV report for Gp0127656", + "url": "https://data.microbiomedata.org/data/nmdc:mga00hh562/ReadbasedAnalysis/nmdc_mga00hh562_gottcha2_report_full.tsv", + "md5_checksum": "92ab65cdaca3367552e03d895123e04f", + "id": "nmdc:92ab65cdaca3367552e03d895123e04f", + "file_size_bytes": 759212 + }, + { + "name": "Gp0127656_Gottcha2 Krona HTML report", + "description": "Gottcha2 Krona HTML report for Gp0127656", + "data_object_type": "GOTTCHA2 Krona Plot", + "url": "https://data.microbiomedata.org/data/nmdc:mga00hh562/ReadbasedAnalysis/nmdc_mga00hh562_gottcha2_krona.html", + "md5_checksum": "0b3ff6503723d6ea9b84552f68ed4270", + "id": "nmdc:0b3ff6503723d6ea9b84552f68ed4270", + "file_size_bytes": 231563 + }, + { + "name": "Gp0127656_Centrifuge classification TSV report", + "description": "Centrifuge classification TSV report for Gp0127656", + "data_object_type": "Centrifuge Taxonomic Classification", + "url": "https://data.microbiomedata.org/data/nmdc:mga00hh562/ReadbasedAnalysis/nmdc_mga00hh562_centrifuge_classification.tsv", + "md5_checksum": "8e5ad12b7fa8873463088d7bf361f7c5", + "id": "nmdc:8e5ad12b7fa8873463088d7bf361f7c5", + "file_size_bytes": 1950007455 + }, + { + "name": "Gp0127656_Centrifuge TSV report", + "description": "Centrifuge TSV report for Gp0127656", + "data_object_type": "Centrifuge Classification Report", + "url": "https://data.microbiomedata.org/data/nmdc:mga00hh562/ReadbasedAnalysis/nmdc_mga00hh562_centrifuge_report.tsv", + "md5_checksum": "a3255df52cd6150f03bbf7cbd655ec76", + "id": "nmdc:a3255df52cd6150f03bbf7cbd655ec76", + "file_size_bytes": 255724 + }, + { + "name": "Gp0127656_Centrifuge Krona HTML report", + "description": "Centrifuge Krona HTML report for Gp0127656", + "data_object_type": "Centrifuge Krona Plot", + "url": "https://data.microbiomedata.org/data/nmdc:mga00hh562/ReadbasedAnalysis/nmdc_mga00hh562_centrifuge_krona.html", + "md5_checksum": "a25a5d7e399624e5e5735b65a9dd322a", + "id": "nmdc:a25a5d7e399624e5e5735b65a9dd322a", + "file_size_bytes": 2337553 + }, + { + "name": "Gp0127656_Kraken classification TSV report", + "description": "Kraken classification TSV report for Gp0127656", + "data_object_type": "Kraken2 Taxonomic Classification", + "url": "https://data.microbiomedata.org/data/nmdc:mga00hh562/ReadbasedAnalysis/nmdc_mga00hh562_kraken2_classification.tsv", + "md5_checksum": "dd953aebfd5cf624a5ffa8c6d6b64b08", + "id": "nmdc:dd953aebfd5cf624a5ffa8c6d6b64b08", + "file_size_bytes": 1555636513 + }, + { + "name": "Gp0127656_Kraken2 TSV report", + "description": "Kraken2 TSV report for Gp0127656", + "data_object_type": "Kraken2 Classification Report", + "url": "https://data.microbiomedata.org/data/nmdc:mga00hh562/ReadbasedAnalysis/nmdc_mga00hh562_kraken2_report.tsv", + "md5_checksum": "96f47f6cd2350fb1c7c7b746d2e9d811", + "id": "nmdc:96f47f6cd2350fb1c7c7b746d2e9d811", + "file_size_bytes": 647090 + }, + { + "name": "Gp0127656_Kraken2 Krona HTML report", + "description": "Kraken2 Krona HTML report for Gp0127656", + "data_object_type": "Kraken2 Krona Plot", + "url": "https://data.microbiomedata.org/data/nmdc:mga00hh562/ReadbasedAnalysis/nmdc_mga00hh562_kraken2_krona.html", + "md5_checksum": "ae369194e4b24e137fc23da0412277a6", + "id": "nmdc:ae369194e4b24e137fc23da0412277a6", + "file_size_bytes": 3939982 + }, + { + "name": "Gp0127656_Gottcha2 TSV report", + "description": "Gottcha2 TSV report for Gp0127656", + "url": "https://data.microbiomedata.org/data/nmdc:mga00hh562/ReadbasedAnalysis/nmdc_mga00hh562_gottcha2_report.tsv", + "md5_checksum": "ccbe419157d8286626330fd0eb0dd0e0", + "id": "nmdc:ccbe419157d8286626330fd0eb0dd0e0", + "file_size_bytes": 2418 + }, + { + "name": "Gp0127656_Gottcha2 full TSV report", + "description": "Gottcha2 full TSV report for Gp0127656", + "url": "https://data.microbiomedata.org/data/nmdc:mga00hh562/ReadbasedAnalysis/nmdc_mga00hh562_gottcha2_report_full.tsv", + "md5_checksum": "92ab65cdaca3367552e03d895123e04f", + "id": "nmdc:92ab65cdaca3367552e03d895123e04f", + "file_size_bytes": 759212 + }, + { + "name": "Gp0127656_Gottcha2 Krona HTML report", + "description": "Gottcha2 Krona HTML report for Gp0127656", + "data_object_type": "GOTTCHA2 Krona Plot", + "url": "https://data.microbiomedata.org/data/nmdc:mga00hh562/ReadbasedAnalysis/nmdc_mga00hh562_gottcha2_krona.html", + "md5_checksum": "0b3ff6503723d6ea9b84552f68ed4270", + "id": "nmdc:0b3ff6503723d6ea9b84552f68ed4270", + "file_size_bytes": 231563 + }, + { + "name": "Gp0127656_Centrifuge classification TSV report", + "description": "Centrifuge classification TSV report for Gp0127656", + "data_object_type": "Centrifuge Taxonomic Classification", + "url": "https://data.microbiomedata.org/data/nmdc:mga00hh562/ReadbasedAnalysis/nmdc_mga00hh562_centrifuge_classification.tsv", + "md5_checksum": "8e5ad12b7fa8873463088d7bf361f7c5", + "id": "nmdc:8e5ad12b7fa8873463088d7bf361f7c5", + "file_size_bytes": 1950007455 + }, + { + "name": "Gp0127656_Centrifuge TSV report", + "description": "Centrifuge TSV report for Gp0127656", + "data_object_type": "Centrifuge Classification Report", + "url": "https://data.microbiomedata.org/data/nmdc:mga00hh562/ReadbasedAnalysis/nmdc_mga00hh562_centrifuge_report.tsv", + "md5_checksum": "a3255df52cd6150f03bbf7cbd655ec76", + "id": "nmdc:a3255df52cd6150f03bbf7cbd655ec76", + "file_size_bytes": 255724 + }, + { + "name": "Gp0127656_Centrifuge Krona HTML report", + "description": "Centrifuge Krona HTML report for Gp0127656", + "data_object_type": "Centrifuge Krona Plot", + "url": "https://data.microbiomedata.org/data/nmdc:mga00hh562/ReadbasedAnalysis/nmdc_mga00hh562_centrifuge_krona.html", + "md5_checksum": "a25a5d7e399624e5e5735b65a9dd322a", + "id": "nmdc:a25a5d7e399624e5e5735b65a9dd322a", + "file_size_bytes": 2337553 + }, + { + "name": "Gp0127656_Kraken classification TSV report", + "description": "Kraken classification TSV report for Gp0127656", + "data_object_type": "Kraken2 Taxonomic Classification", + "url": "https://data.microbiomedata.org/data/nmdc:mga00hh562/ReadbasedAnalysis/nmdc_mga00hh562_kraken2_classification.tsv", + "md5_checksum": "dd953aebfd5cf624a5ffa8c6d6b64b08", + "id": "nmdc:dd953aebfd5cf624a5ffa8c6d6b64b08", + "file_size_bytes": 1555636513 + }, + { + "name": "Gp0127656_Kraken2 TSV report", + "description": "Kraken2 TSV report for Gp0127656", + "data_object_type": "Kraken2 Classification Report", + "url": "https://data.microbiomedata.org/data/nmdc:mga00hh562/ReadbasedAnalysis/nmdc_mga00hh562_kraken2_report.tsv", + "md5_checksum": "96f47f6cd2350fb1c7c7b746d2e9d811", + "id": "nmdc:96f47f6cd2350fb1c7c7b746d2e9d811", + "file_size_bytes": 647090 + }, + { + "name": "Gp0127656_Kraken2 Krona HTML report", + "description": "Kraken2 Krona HTML report for Gp0127656", + "data_object_type": "Kraken2 Krona Plot", + "url": "https://data.microbiomedata.org/data/nmdc:mga00hh562/ReadbasedAnalysis/nmdc_mga00hh562_kraken2_krona.html", + "md5_checksum": "ae369194e4b24e137fc23da0412277a6", + "id": "nmdc:ae369194e4b24e137fc23da0412277a6", + "file_size_bytes": 3939982 + }, + { + "name": "Gp0127656_Assembled contigs fasta", + "description": "Assembled contigs fasta for Gp0127656", + "data_object_type": "Assembly Contigs", + "url": "https://data.microbiomedata.org/data/nmdc:mga00hh562/assembly/nmdc_mga00hh562_contigs.fna", + "md5_checksum": "8106808f8e245ef9a46a4e31561eba7f", + "id": "nmdc:8106808f8e245ef9a46a4e31561eba7f", + "file_size_bytes": 78938478 + }, + { + "name": "Gp0127656_Assembled scaffold fasta", + "description": "Assembled scaffold fasta for Gp0127656", + "data_object_type": "Assembly Scaffolds", + "url": "https://data.microbiomedata.org/data/nmdc:mga00hh562/assembly/nmdc_mga00hh562_scaffolds.fna", + "md5_checksum": "55385159fa8361d7ff747cdc1155512b", + "id": "nmdc:55385159fa8361d7ff747cdc1155512b", + "file_size_bytes": 78428743 + }, + { + "name": "Gp0127656_Metagenome Contig Coverage Stats", + "description": "Metagenome Contig Coverage Stats for Gp0127656", + "url": "https://data.microbiomedata.org/data/nmdc:mga00hh562/assembly/nmdc_mga00hh562_covstats.txt", + "md5_checksum": "4741908a5b07eaa2312ff3e6d2d991aa", + "id": "nmdc:4741908a5b07eaa2312ff3e6d2d991aa", + "file_size_bytes": 13384382 + }, + { + "name": "Gp0127656_Assembled AGP file", + "description": "Assembled AGP file for Gp0127656", + "data_object_type": "Assembly AGP", + "url": "https://data.microbiomedata.org/data/nmdc:mga00hh562/assembly/nmdc_mga00hh562_assembly.agp", + "md5_checksum": "172e5cf3b5c5bf8e4896058dad3e814a", + "id": "nmdc:172e5cf3b5c5bf8e4896058dad3e814a", + "file_size_bytes": 12508060 + }, + { + "name": "Gp0127656_Metagenome Alignment BAM file", + "description": "Metagenome Alignment BAM file for Gp0127656", + "data_object_type": "Assembly Coverage BAM", + "url": "https://data.microbiomedata.org/data/nmdc:mga00hh562/assembly/nmdc_mga00hh562_pairedMapped_sorted.bam", + "md5_checksum": "941f749a92155321c5ce7e5aa32d3b55", + "id": "nmdc:941f749a92155321c5ce7e5aa32d3b55", + "file_size_bytes": 2375706529 + }, + { + "name": "Gp0127656_Protein FAA", + "description": "Protein FAA for Gp0127656", + "data_object_type": "Annotation Amino Acid FASTA", + "url": "https://data.microbiomedata.org/data/nmdc:mga00hh562/annotation/nmdc_mga00hh562_proteins.faa", + "md5_checksum": "18f68cc8acda8d33d5fd6f21a9166aa8", + "id": "nmdc:18f68cc8acda8d33d5fd6f21a9166aa8", + "file_size_bytes": 46951183 + }, + { + "name": "Gp0127656_Structural annotation GFF file", + "description": "Structural annotation GFF file for Gp0127656", + "data_object_type": "Structural Annotation GFF", + "url": "https://data.microbiomedata.org/data/nmdc:mga00hh562/annotation/nmdc_mga00hh562_structural_annotation.gff", + "md5_checksum": "87d5f3a505d23c1aa2deea960702d55b", + "id": "nmdc:87d5f3a505d23c1aa2deea960702d55b", + "file_size_bytes": 2511 + }, + { + "name": "Gp0127656_Functional annotation GFF file", + "description": "Functional annotation GFF file for Gp0127656", + "data_object_type": "Functional Annotation GFF", + "url": "https://data.microbiomedata.org/data/nmdc:mga00hh562/annotation/nmdc_mga00hh562_functional_annotation.gff", + "md5_checksum": "8e8be343bbb1ba11f3e15867b419d05d", + "id": "nmdc:8e8be343bbb1ba11f3e15867b419d05d", + "file_size_bytes": 54902900 + }, + { + "name": "Gp0127656_KO TSV file", + "description": "KO TSV file for Gp0127656", + "data_object_type": "Annotation KEGG Orthology", + "url": "https://data.microbiomedata.org/data/nmdc:mga00hh562/annotation/nmdc_mga00hh562_ko.tsv", + "md5_checksum": "91c2485c0ebf683aed3e7935ec60b7d1", + "id": "nmdc:91c2485c0ebf683aed3e7935ec60b7d1", + "file_size_bytes": 6468844 + }, + { + "name": "Gp0127656_EC TSV file", + "description": "EC TSV file for Gp0127656", + "data_object_type": "Annotation Enzyme Commission", + "url": "https://data.microbiomedata.org/data/nmdc:mga00hh562/annotation/nmdc_mga00hh562_ec.tsv", + "md5_checksum": "fb6740e86534daeea41ab6d5cf9d91d2", + "id": "nmdc:fb6740e86534daeea41ab6d5cf9d91d2", + "file_size_bytes": 4308547 + }, + { + "name": "Gp0127656_COG GFF file", + "description": "COG GFF file for Gp0127656", + "url": "https://data.microbiomedata.org/data/nmdc:mga00hh562/annotation/nmdc_mga00hh562_cog.gff", + "md5_checksum": "19da9b3f211164643f276bc74604c9b0", + "id": "nmdc:19da9b3f211164643f276bc74604c9b0", + "file_size_bytes": 32139189 + }, + { + "name": "Gp0127656_PFAM GFF file", + "description": "PFAM GFF file for Gp0127656", + "url": "https://data.microbiomedata.org/data/nmdc:mga00hh562/annotation/nmdc_mga00hh562_pfam.gff", + "md5_checksum": "19905547dfa37274a9f91c9caaf6bacc", + "id": "nmdc:19905547dfa37274a9f91c9caaf6bacc", + "file_size_bytes": 23590201 + }, + { + "name": "Gp0127656_TigrFam GFF file", + "description": "TigrFam GFF file for Gp0127656", + "url": "https://data.microbiomedata.org/data/nmdc:mga00hh562/annotation/nmdc_mga00hh562_tigrfam.gff", + "md5_checksum": "30c2b0722d225938975243ab1041ed12", + "id": "nmdc:30c2b0722d225938975243ab1041ed12", + "file_size_bytes": 2485400 + }, + { + "name": "Gp0127656_SMART GFF file", + "description": "SMART GFF file for Gp0127656", + "url": "https://data.microbiomedata.org/data/nmdc:mga00hh562/annotation/nmdc_mga00hh562_smart.gff", + "md5_checksum": "623e913fa98f88f6037754daf5d9ffc5", + "id": "nmdc:623e913fa98f88f6037754daf5d9ffc5", + "file_size_bytes": 6932331 + }, + { + "name": "Gp0127656_SuperFam GFF file", + "description": "SuperFam GFF file for Gp0127656", + "url": "https://data.microbiomedata.org/data/nmdc:mga00hh562/annotation/nmdc_mga00hh562_supfam.gff", + "md5_checksum": "ec56df16785bc67e073128f09366ec43", + "id": "nmdc:ec56df16785bc67e073128f09366ec43", + "file_size_bytes": 39880284 + }, + { + "name": "Gp0127656_Cath FunFam GFF file", + "description": "Cath FunFam GFF file for Gp0127656", + "url": "https://data.microbiomedata.org/data/nmdc:mga00hh562/annotation/nmdc_mga00hh562_cath_funfam.gff", + "md5_checksum": "2831d1ead0af4681b2ae1a9f21733637", + "id": "nmdc:2831d1ead0af4681b2ae1a9f21733637", + "file_size_bytes": 29872897 + }, + { + "name": "Gp0127656_KO_EC GFF file", + "description": "KO_EC GFF file for Gp0127656", + "url": "https://data.microbiomedata.org/data/nmdc:mga00hh562/annotation/nmdc_mga00hh562_ko_ec.gff", + "md5_checksum": "53f225f74011f7d30fcfd5c60b3693ae", + "id": "nmdc:53f225f74011f7d30fcfd5c60b3693ae", + "file_size_bytes": 20564625 + }, + { + "name": "gold:Gp0452679_metabat2 bin checkm quality assessment result", + "description": "metabat2 bin checkm quality assessment result for gold:Gp0452679", + "data_object_type": "CheckM Statistics", + "url": "https://data.microbiomedata.org/data/nmdc:mga0fsjy84/MAGs/nmdc_mga0fsjy84_checkm_qa.out", + "md5_checksum": "d41d8cd98f00b204e9800998ecf8427e", + "id": "nmdc:d41d8cd98f00b204e9800998ecf8427e", + "file_size_bytes": 0 + }, + { + "name": "Gp0127656_tooShort (< 3kb) filtered contigs fasta file by metaBat2", + "description": "tooShort (< 3kb) filtered contigs fasta file by metaBat2 for Gp0127656", + "url": "https://data.microbiomedata.org/data/nmdc:mga00hh562/MAGs/nmdc_mga00hh562_bins.tooShort.fa", + "md5_checksum": "313c88df1890a33d388bdb23c7ad37c3", + "id": "nmdc:313c88df1890a33d388bdb23c7ad37c3", + "file_size_bytes": 69332992 + }, + { + "name": "Gp0127656_unbinned fasta file from metabat2", + "description": "unbinned fasta file from metabat2 for Gp0127656", + "url": "https://data.microbiomedata.org/data/nmdc:mga00hh562/MAGs/nmdc_mga00hh562_bins.unbinned.fa", + "md5_checksum": "ae567f55fe899da83831fda23dcd7a20", + "id": "nmdc:ae567f55fe899da83831fda23dcd7a20", + "file_size_bytes": 9275333 + }, + { + "name": "Gp0127656_metabat2 bin checkm quality assessment result", + "description": "metabat2 bin checkm quality assessment result for Gp0127656", + "data_object_type": "CheckM Statistics", + "url": "https://data.microbiomedata.org/data/nmdc:mga00hh562/MAGs/nmdc_mga00hh562_checkm_qa.out", + "md5_checksum": "5a8dbda6aec0825b4159d5b53481db90", + "id": "nmdc:5a8dbda6aec0825b4159d5b53481db90", + "file_size_bytes": 775 + }, + { + "name": "Gp0127656_high-quality and medium-quality bins", + "description": "high-quality and medium-quality bins for Gp0127656", + "data_object_type": "Metagenome Bins", + "url": "https://data.microbiomedata.org/data/nmdc:mga00hh562/MAGs/nmdc_mga00hh562_hqmq_bin.zip", + "md5_checksum": "060a7f90c5c5123cac41ed946a5482af", + "id": "nmdc:060a7f90c5c5123cac41ed946a5482af", + "file_size_bytes": 182 }, + { + "name": "Gp0127656_metabat2 bins", + "description": "metabat2 bins for Gp0127656", + "url": "https://data.microbiomedata.org/data/nmdc:mga00hh562/MAGs/nmdc_mga00hh562_metabat_bin.zip", + "md5_checksum": "e9f5d03e8264308ed77da0b63eb738fe", + "id": "nmdc:e9f5d03e8264308ed77da0b63eb738fe", + "file_size_bytes": 101752 + } + ], + "dissolving_activity_set": [], + "functional_annotation_set": [], + "genome_feature_set": [], + "mags_activity_set": [ { "_id": { - "$oid": "61e719f6833bcf838a7017f0" + "$oid": "649b0052ec087f6bbab34726" }, "has_input": [ - "nmdc:c4f29a07f3ce03ee2a2d11c90e8b43d6" + "nmdc:8106808f8e245ef9a46a4e31561eba7f", + "nmdc:941f749a92155321c5ce7e5aa32d3b55", + "nmdc:8e8be343bbb1ba11f3e15867b419d05d" ], + "too_short_contig_num": 163283, "part_of": [ - "nmdc:mga0h0s362" + "nmdc:mga00hh562" ], + "binned_contig_num": 83, "git_url": "https://github.com/microbiomedata/metaG/releases/tag/0.1", "has_output": [ - "nmdc:130ee7559789726a2cadccd3126dacad", - "nmdc:c955eae73afbfe1ad4c4eb2eac51f3f3", - "nmdc:7ccb4ee5a0728322154b29a79d13c842", - "nmdc:8b88e19f3d4f22c8bb71f66e7aec6dba", - "nmdc:35a0d72edac6c5e7f9c8ddf86c5534e0", - "nmdc:f808a89810cdb2a911a5b5388b70ce94", - "nmdc:dfc90170aa038c2425702be223cb2f23", - "nmdc:84255d3bab9ea79151db5ad7bcbc677c", - "nmdc:1c8339d96884c4a408de7804e00490d1" + "nmdc:d41d8cd98f00b204e9800998ecf8427e", + "nmdc:313c88df1890a33d388bdb23c7ad37c3", + "nmdc:ae567f55fe899da83831fda23dcd7a20", + "nmdc:5a8dbda6aec0825b4159d5b53481db90", + "nmdc:060a7f90c5c5123cac41ed946a5482af", + "nmdc:e9f5d03e8264308ed77da0b63eb738fe" ], - "was_informed_by": "gold:Gp0127654", - "id": "nmdc:168441535388b19bbdee0928b42e5b20", + "was_informed_by": "gold:Gp0127656", + "input_contig_num": 169495, + "id": "nmdc:8e2d8da1d05b292a52a33732a6bc4391", "execution_resource": "NERSC-Cori", - "name": "ReadBased Analysis Activity for nmdc:mga0h0s362", - "started_at_time": "2021-10-11T02:23:29Z", - "type": "nmdc:ReadbasedAnalysis", - "ended_at_time": "2021-10-11T03:58:56+00:00", - "output_data_objects": [ - { - "name": "Gp0127654_Gottcha2 TSV report", - "description": "Gottcha2 TSV report for Gp0127654", - "url": "https://data.microbiomedata.org/data/nmdc:mga0h0s362/ReadbasedAnalysis/nmdc_mga0h0s362_gottcha2_report.tsv", - "md5_checksum": "130ee7559789726a2cadccd3126dacad", - "id": "nmdc:130ee7559789726a2cadccd3126dacad", - "file_size_bytes": 3508 - }, - { - "name": "Gp0127654_Gottcha2 full TSV report", - "description": "Gottcha2 full TSV report for Gp0127654", - "url": "https://data.microbiomedata.org/data/nmdc:mga0h0s362/ReadbasedAnalysis/nmdc_mga0h0s362_gottcha2_report_full.tsv", - "md5_checksum": "c955eae73afbfe1ad4c4eb2eac51f3f3", - "id": "nmdc:c955eae73afbfe1ad4c4eb2eac51f3f3", - "file_size_bytes": 798264 - }, - { - "name": "Gp0127654_Gottcha2 Krona HTML report", - "description": "Gottcha2 Krona HTML report for Gp0127654", - "data_object_type": "GOTTCHA2 Krona Plot", - "url": "https://data.microbiomedata.org/data/nmdc:mga0h0s362/ReadbasedAnalysis/nmdc_mga0h0s362_gottcha2_krona.html", - "md5_checksum": "7ccb4ee5a0728322154b29a79d13c842", - "id": "nmdc:7ccb4ee5a0728322154b29a79d13c842", - "file_size_bytes": 234834 - }, - { - "name": "Gp0127654_Centrifuge classification TSV report", - "description": "Centrifuge classification TSV report for Gp0127654", - "data_object_type": "Centrifuge Taxonomic Classification", - "url": "https://data.microbiomedata.org/data/nmdc:mga0h0s362/ReadbasedAnalysis/nmdc_mga0h0s362_centrifuge_classification.tsv", - "md5_checksum": "8b88e19f3d4f22c8bb71f66e7aec6dba", - "id": "nmdc:8b88e19f3d4f22c8bb71f66e7aec6dba", - "file_size_bytes": 2231971137 - }, - { - "name": "Gp0127654_Centrifuge TSV report", - "description": "Centrifuge TSV report for Gp0127654", - "data_object_type": "Centrifuge Classification Report", - "url": "https://data.microbiomedata.org/data/nmdc:mga0h0s362/ReadbasedAnalysis/nmdc_mga0h0s362_centrifuge_report.tsv", - "md5_checksum": "35a0d72edac6c5e7f9c8ddf86c5534e0", - "id": "nmdc:35a0d72edac6c5e7f9c8ddf86c5534e0", - "file_size_bytes": 257151 - }, - { - "name": "Gp0127654_Centrifuge Krona HTML report", - "description": "Centrifuge Krona HTML report for Gp0127654", - "data_object_type": "Centrifuge Krona Plot", - "url": "https://data.microbiomedata.org/data/nmdc:mga0h0s362/ReadbasedAnalysis/nmdc_mga0h0s362_centrifuge_krona.html", - "md5_checksum": "f808a89810cdb2a911a5b5388b70ce94", - "id": "nmdc:f808a89810cdb2a911a5b5388b70ce94", - "file_size_bytes": 2341088 - }, - { - "name": "Gp0127654_Kraken classification TSV report", - "description": "Kraken classification TSV report for Gp0127654", - "data_object_type": "Kraken2 Taxonomic Classification", - "url": "https://data.microbiomedata.org/data/nmdc:mga0h0s362/ReadbasedAnalysis/nmdc_mga0h0s362_kraken2_classification.tsv", - "md5_checksum": "dfc90170aa038c2425702be223cb2f23", - "id": "nmdc:dfc90170aa038c2425702be223cb2f23", - "file_size_bytes": 1782429285 - }, - { - "name": "Gp0127654_Kraken2 TSV report", - "description": "Kraken2 TSV report for Gp0127654", - "data_object_type": "Kraken2 Classification Report", - "url": "https://data.microbiomedata.org/data/nmdc:mga0h0s362/ReadbasedAnalysis/nmdc_mga0h0s362_kraken2_report.tsv", - "md5_checksum": "84255d3bab9ea79151db5ad7bcbc677c", - "id": "nmdc:84255d3bab9ea79151db5ad7bcbc677c", - "file_size_bytes": 661482 - }, + "name": "MAGs Analysis Activity for nmdc:mga00hh562", + "mags_list": [ { - "name": "Gp0127654_Kraken2 Krona HTML report", - "description": "Kraken2 Krona HTML report for Gp0127654", - "data_object_type": "Kraken2 Krona Plot", - "url": "https://data.microbiomedata.org/data/nmdc:mga0h0s362/ReadbasedAnalysis/nmdc_mga0h0s362_kraken2_krona.html", - "md5_checksum": "1c8339d96884c4a408de7804e00490d1", - "id": "nmdc:1c8339d96884c4a408de7804e00490d1", - "file_size_bytes": 4020719 + "number_of_contig": 83, + "completeness": 14.35, + "bin_name": "bins.1", + "gene_count": 388, + "bin_quality": "LQ", + "gtdbtk_species": "", + "gtdbtk_order": "", + "num_16s": 0, + "gtdbtk_family": "", + "gtdbtk_domain": "", + "contamination": 0.0, + "gtdbtk_class": "", + "gtdbtk_phylum": "", + "num_5s": 0, + "num_23s": 0, + "gtdbtk_genus": "", + "num_t_rna": 5 } - ] - }, + ], + "unbinned_contig_num": 6129, + "started_at_time": "2021-10-11T02:23:35Z", + "type": "nmdc:MAGsAnalysisActivity", + "ended_at_time": "2021-10-11T03:58:56+00:00" + } + ], + "material_sample_set": [], + "material_sampling_activity_set": [], + "metabolomics_analysis_activity_set": [], + "metagenome_annotation_activity_set": [ { "_id": { - "$oid": "649b005f2ca5ee4adb139fb4" + "$oid": "649b005bbf2caae0415ef9c6" }, "has_input": [ - "nmdc:c4f29a07f3ce03ee2a2d11c90e8b43d6" + "nmdc:8106808f8e245ef9a46a4e31561eba7f" ], "part_of": [ - "nmdc:mga0h0s362" + "nmdc:mga00hh562" ], - "ctg_logsum": 130142, - "scaf_logsum": 130537, - "gap_pct": 0.0008, "git_url": "https://github.com/microbiomedata/metaG/releases/tag/0.1", "has_output": [ - "nmdc:909ae2a351ab1b99dfa877969ba33fc0", - "nmdc:1bd3a82d1ced0a3a4e4b207ecdeedc50", - "nmdc:e2281ea2c0342c7243ac6a3179948547", - "nmdc:ad045e491d27a8a2a4bb13c62ed74fd8", - "nmdc:d8e09db1617046117fbb15631cf4977f" + "nmdc:18f68cc8acda8d33d5fd6f21a9166aa8", + "nmdc:87d5f3a505d23c1aa2deea960702d55b", + "nmdc:8e8be343bbb1ba11f3e15867b419d05d", + "nmdc:91c2485c0ebf683aed3e7935ec60b7d1", + "nmdc:fb6740e86534daeea41ab6d5cf9d91d2", + "nmdc:19da9b3f211164643f276bc74604c9b0", + "nmdc:19905547dfa37274a9f91c9caaf6bacc", + "nmdc:30c2b0722d225938975243ab1041ed12", + "nmdc:623e913fa98f88f6037754daf5d9ffc5", + "nmdc:ec56df16785bc67e073128f09366ec43", + "nmdc:2831d1ead0af4681b2ae1a9f21733637", + "nmdc:53f225f74011f7d30fcfd5c60b3693ae" ], - "asm_score": 4.409, - "was_informed_by": "gold:Gp0127654", - "ctg_powsum": 13918, - "scaf_max": 69027, - "id": "nmdc:168441535388b19bbdee0928b42e5b20", - "scaf_powsum": 13961, + "was_informed_by": "gold:Gp0127656", + "id": "nmdc:8e2d8da1d05b292a52a33732a6bc4391", "execution_resource": "NERSC-Cori", - "contigs": 197669, - "name": "Assembly Activity for nmdc:mga0h0s362", - "ctg_max": 69027, - "gc_std": 0.09749, - "contig_bp": 85731750, - "gc_avg": 0.62891, - "started_at_time": "2021-10-11T02:23:29Z", - "scaf_bp": 85732440, - "type": "nmdc:MetagenomeAssembly", - "scaffolds": 197600, - "ended_at_time": "2021-10-11T03:58:56+00:00", - "ctg_l50": 404, - "ctg_l90": 286, - "ctg_n50": 62467, - "ctg_n90": 168661, - "scaf_l50": 404, - "scaf_l90": 286, - "scaf_n50": 62435, - "scaf_n90": 168596, - "scaf_l_gt50k": 69027, - "scaf_n_gt50k": 1, - "scaf_pct_gt50k": 0.080514446, - "output_data_objects": [ - { - "name": "Gp0127654_Assembled contigs fasta", - "description": "Assembled contigs fasta for Gp0127654", - "data_object_type": "Assembly Contigs", - "url": "https://data.microbiomedata.org/data/nmdc:mga0h0s362/assembly/nmdc_mga0h0s362_contigs.fna", - "md5_checksum": "909ae2a351ab1b99dfa877969ba33fc0", - "id": "nmdc:909ae2a351ab1b99dfa877969ba33fc0", - "file_size_bytes": 93264957 - }, - { - "name": "Gp0127654_Assembled scaffold fasta", - "description": "Assembled scaffold fasta for Gp0127654", - "data_object_type": "Assembly Scaffolds", - "url": "https://data.microbiomedata.org/data/nmdc:mga0h0s362/assembly/nmdc_mga0h0s362_scaffolds.fna", - "md5_checksum": "1bd3a82d1ced0a3a4e4b207ecdeedc50", - "id": "nmdc:1bd3a82d1ced0a3a4e4b207ecdeedc50", - "file_size_bytes": 92670816 - }, - { - "name": "Gp0127654_Metagenome Contig Coverage Stats", - "description": "Metagenome Contig Coverage Stats for Gp0127654", - "url": "https://data.microbiomedata.org/data/nmdc:mga0h0s362/assembly/nmdc_mga0h0s362_covstats.txt", - "md5_checksum": "e2281ea2c0342c7243ac6a3179948547", - "id": "nmdc:e2281ea2c0342c7243ac6a3179948547", - "file_size_bytes": 15633835 - }, - { - "name": "Gp0127654_Assembled AGP file", - "description": "Assembled AGP file for Gp0127654", - "data_object_type": "Assembly AGP", - "url": "https://data.microbiomedata.org/data/nmdc:mga0h0s362/assembly/nmdc_mga0h0s362_assembly.agp", - "md5_checksum": "ad045e491d27a8a2a4bb13c62ed74fd8", - "id": "nmdc:ad045e491d27a8a2a4bb13c62ed74fd8", - "file_size_bytes": 14624353 - }, - { - "name": "Gp0127654_Metagenome Alignment BAM file", - "description": "Metagenome Alignment BAM file for Gp0127654", - "data_object_type": "Assembly Coverage BAM", - "url": "https://data.microbiomedata.org/data/nmdc:mga0h0s362/assembly/nmdc_mga0h0s362_pairedMapped_sorted.bam", - "md5_checksum": "d8e09db1617046117fbb15631cf4977f", - "id": "nmdc:d8e09db1617046117fbb15631cf4977f", - "file_size_bytes": 2687176632 - } - ] - }, + "name": "Annotation Activity for nmdc:mga00hh562", + "started_at_time": "2021-10-11T02:23:35Z", + "type": "nmdc:MetagenomeAnnotationActivity", + "ended_at_time": "2021-10-11T03:58:56+00:00" + } + ], + "metagenome_assembly_set": [ { "_id": { - "$oid": "649b005bbf2caae0415ef9c7" + "$oid": "649b005f2ca5ee4adb139fb2" }, "has_input": [ - "nmdc:909ae2a351ab1b99dfa877969ba33fc0" + "nmdc:cec95659bb04ae095f51821ddaa9fa59" ], "part_of": [ - "nmdc:mga0h0s362" + "nmdc:mga00hh562" ], + "ctg_logsum": 98556, + "scaf_logsum": 99077, + "gap_pct": 0.00105, "git_url": "https://github.com/microbiomedata/metaG/releases/tag/0.1", "has_output": [ - "nmdc:7e7c871dbe9ed0b2692444b77d0afe8d", - "nmdc:7b466cbbadfde9b125f2a31e48d8c60d", - "nmdc:6a03c0a78fa59ac0a55777a9ea73e5d0", - "nmdc:2275c42fa5206d646c7b477b184b9519", - "nmdc:9c7fc55c2cbc986d520695dfb69b3e26", - "nmdc:fabdc762526357e8a6f288a07f947f06", - "nmdc:1e8dcb98dfc7598e3965af187c296f12", - "nmdc:86f1a8ccf1532e11fc09d94dc39af57c", - "nmdc:8add80a0fe95822917e4e7eaf275ed4f", - "nmdc:6268ff527b56548792e7dca811500436", - "nmdc:ff7ac6fb709d1f0f7b476c9a5b29524e", - "nmdc:6c50fdd87bdba9116c1ff81e21b8a95c" + "nmdc:8106808f8e245ef9a46a4e31561eba7f", + "nmdc:55385159fa8361d7ff747cdc1155512b", + "nmdc:4741908a5b07eaa2312ff3e6d2d991aa", + "nmdc:172e5cf3b5c5bf8e4896058dad3e814a", + "nmdc:941f749a92155321c5ce7e5aa32d3b55" ], - "was_informed_by": "gold:Gp0127654", - "id": "nmdc:168441535388b19bbdee0928b42e5b20", + "asm_score": 2.914, + "was_informed_by": "gold:Gp0127656", + "ctg_powsum": 10453, + "scaf_max": 9079, + "id": "nmdc:8e2d8da1d05b292a52a33732a6bc4391", + "scaf_powsum": 10508, "execution_resource": "NERSC-Cori", - "name": "Annotation Activity for nmdc:mga0h0s362", - "started_at_time": "2021-10-11T02:23:29Z", - "type": "nmdc:MetagenomeAnnotationActivity", + "contigs": 169495, + "name": "Assembly Activity for nmdc:mga00hh562", + "ctg_max": 9079, + "gc_std": 0.09653, + "contig_bp": 72511508, + "gc_avg": 0.62989, + "started_at_time": "2021-10-11T02:23:35Z", + "scaf_bp": 72512268, + "type": "nmdc:MetagenomeAssembly", + "scaffolds": 169419, "ended_at_time": "2021-10-11T03:58:56+00:00", - "output_data_objects": [ - { - "name": "Gp0127654_Protein FAA", - "description": "Protein FAA for Gp0127654", - "data_object_type": "Annotation Amino Acid FASTA", - "url": "https://data.microbiomedata.org/data/nmdc:mga0h0s362/annotation/nmdc_mga0h0s362_proteins.faa", - "md5_checksum": "7e7c871dbe9ed0b2692444b77d0afe8d", - "id": "nmdc:7e7c871dbe9ed0b2692444b77d0afe8d", - "file_size_bytes": 55142968 - }, - { - "name": "Gp0127654_Structural annotation GFF file", - "description": "Structural annotation GFF file for Gp0127654", - "data_object_type": "Structural Annotation GFF", - "url": "https://data.microbiomedata.org/data/nmdc:mga0h0s362/annotation/nmdc_mga0h0s362_structural_annotation.gff", - "md5_checksum": "7b466cbbadfde9b125f2a31e48d8c60d", - "id": "nmdc:7b466cbbadfde9b125f2a31e48d8c60d", - "file_size_bytes": 2518 - }, - { - "name": "Gp0127654_Functional annotation GFF file", - "description": "Functional annotation GFF file for Gp0127654", - "data_object_type": "Functional Annotation GFF", - "url": "https://data.microbiomedata.org/data/nmdc:mga0h0s362/annotation/nmdc_mga0h0s362_functional_annotation.gff", - "md5_checksum": "6a03c0a78fa59ac0a55777a9ea73e5d0", - "id": "nmdc:6a03c0a78fa59ac0a55777a9ea73e5d0", - "file_size_bytes": 64337475 - }, - { - "name": "Gp0127654_KO TSV file", - "description": "KO TSV file for Gp0127654", - "data_object_type": "Annotation KEGG Orthology", - "url": "https://data.microbiomedata.org/data/nmdc:mga0h0s362/annotation/nmdc_mga0h0s362_ko.tsv", - "md5_checksum": "2275c42fa5206d646c7b477b184b9519", - "id": "nmdc:2275c42fa5206d646c7b477b184b9519", - "file_size_bytes": 7628926 - }, - { - "name": "Gp0127654_EC TSV file", - "description": "EC TSV file for Gp0127654", - "data_object_type": "Annotation Enzyme Commission", - "url": "https://data.microbiomedata.org/data/nmdc:mga0h0s362/annotation/nmdc_mga0h0s362_ec.tsv", - "md5_checksum": "9c7fc55c2cbc986d520695dfb69b3e26", - "id": "nmdc:9c7fc55c2cbc986d520695dfb69b3e26", - "file_size_bytes": 5084393 - }, - { - "name": "Gp0127654_COG GFF file", - "description": "COG GFF file for Gp0127654", - "url": "https://data.microbiomedata.org/data/nmdc:mga0h0s362/annotation/nmdc_mga0h0s362_cog.gff", - "md5_checksum": "fabdc762526357e8a6f288a07f947f06", - "id": "nmdc:fabdc762526357e8a6f288a07f947f06", - "file_size_bytes": 37680499 - }, - { - "name": "Gp0127654_PFAM GFF file", - "description": "PFAM GFF file for Gp0127654", - "url": "https://data.microbiomedata.org/data/nmdc:mga0h0s362/annotation/nmdc_mga0h0s362_pfam.gff", - "md5_checksum": "1e8dcb98dfc7598e3965af187c296f12", - "id": "nmdc:1e8dcb98dfc7598e3965af187c296f12", - "file_size_bytes": 27765282 - }, - { - "name": "Gp0127654_TigrFam GFF file", - "description": "TigrFam GFF file for Gp0127654", - "url": "https://data.microbiomedata.org/data/nmdc:mga0h0s362/annotation/nmdc_mga0h0s362_tigrfam.gff", - "md5_checksum": "86f1a8ccf1532e11fc09d94dc39af57c", - "id": "nmdc:86f1a8ccf1532e11fc09d94dc39af57c", - "file_size_bytes": 2970208 - }, - { - "name": "Gp0127654_SMART GFF file", - "description": "SMART GFF file for Gp0127654", - "url": "https://data.microbiomedata.org/data/nmdc:mga0h0s362/annotation/nmdc_mga0h0s362_smart.gff", - "md5_checksum": "8add80a0fe95822917e4e7eaf275ed4f", - "id": "nmdc:8add80a0fe95822917e4e7eaf275ed4f", - "file_size_bytes": 8172309 - }, - { - "name": "Gp0127654_SuperFam GFF file", - "description": "SuperFam GFF file for Gp0127654", - "url": "https://data.microbiomedata.org/data/nmdc:mga0h0s362/annotation/nmdc_mga0h0s362_supfam.gff", - "md5_checksum": "6268ff527b56548792e7dca811500436", - "id": "nmdc:6268ff527b56548792e7dca811500436", - "file_size_bytes": 46611499 - }, - { - "name": "Gp0127654_Cath FunFam GFF file", - "description": "Cath FunFam GFF file for Gp0127654", - "url": "https://data.microbiomedata.org/data/nmdc:mga0h0s362/annotation/nmdc_mga0h0s362_cath_funfam.gff", - "md5_checksum": "ff7ac6fb709d1f0f7b476c9a5b29524e", - "id": "nmdc:ff7ac6fb709d1f0f7b476c9a5b29524e", - "file_size_bytes": 35108681 - }, - { - "name": "Gp0127654_KO_EC GFF file", - "description": "KO_EC GFF file for Gp0127654", - "url": "https://data.microbiomedata.org/data/nmdc:mga0h0s362/annotation/nmdc_mga0h0s362_ko_ec.gff", - "md5_checksum": "6c50fdd87bdba9116c1ff81e21b8a95c", - "id": "nmdc:6c50fdd87bdba9116c1ff81e21b8a95c", - "file_size_bytes": 24261565 - } - ] - }, + "ctg_l50": 399, + "ctg_l90": 286, + "ctg_n50": 54638, + "ctg_n90": 144448, + "scaf_l50": 399, + "scaf_l90": 286, + "scaf_n50": 54616, + "scaf_n90": 144376 + } + ], + "metagenome_sequencing_activity_set": [], + "metaproteomics_analysis_activity_set": [], + "metatranscriptome_activity_set": [], + "nom_analysis_activity_set": [], + "omics_processing_set": [ { "_id": { - "$oid": "649b0052ec087f6bbab34728" + "$oid": "649b009773e8249959349b61" }, - "has_input": [ - "nmdc:909ae2a351ab1b99dfa877969ba33fc0", - "nmdc:d8e09db1617046117fbb15631cf4977f", - "nmdc:6a03c0a78fa59ac0a55777a9ea73e5d0" - ], - "too_short_contig_num": 189586, - "part_of": [ - "nmdc:mga0h0s362" + "id": "nmdc:omprc-11-qrsway30", + "name": "Riverbed sediment microbial communities from areas with vegetation nearby in Columbia River, Washington, USA - GW-RW S2_30_40", + "description": "Riverbed sediment samples were collected from an area with a lot of surrounding vegetation in a hyporheic zone (subsurface groundwater - surface water mixing zone)", + "has_input": [ + "nmdc:bsm-11-mmr87q87" ], - "binned_contig_num": 56, - "git_url": "https://github.com/microbiomedata/metaG/releases/tag/0.1", "has_output": [ - "nmdc:d41d8cd98f00b204e9800998ecf8427e", - "nmdc:920bcae91eae59ed8b9b19bcb7392ac5", - "nmdc:d13bc24bdf72e7ba00d60f0e2e0805e8", - "nmdc:3fd777151ef41b39b272cb42c1d5e8ba", - "nmdc:470edf3d79702d3b806b545db595ca02", - "nmdc:8fc6f1a0269aa5179d72c52cf1a9726e" + "jgi:574fe0b47ded5e3df1ee1496" ], - "was_informed_by": "gold:Gp0127654", - "input_contig_num": 197669, - "id": "nmdc:168441535388b19bbdee0928b42e5b20", - "execution_resource": "NERSC-Cori", - "name": "MAGs Analysis Activity for nmdc:mga0h0s362", - "mags_list": [ - { - "number_of_contig": 56, - "completeness": 18.09, - "bin_name": "bins.1", - "gene_count": 272, - "bin_quality": "LQ", - "gtdbtk_species": "", - "gtdbtk_order": "", - "num_16s": 0, - "gtdbtk_family": "", - "gtdbtk_domain": "", - "contamination": 0.0, - "gtdbtk_class": "", - "gtdbtk_phylum": "", - "num_5s": 0, - "num_23s": 0, - "gtdbtk_genus": "", - "num_t_rna": 5 - } + "part_of": [ + "nmdc:sty-11-aygzgv51" ], - "unbinned_contig_num": 8027, - "started_at_time": "2021-10-11T02:23:29Z", - "type": "nmdc:MAGsAnalysisActivity", - "ended_at_time": "2021-10-11T03:58:56+00:00", - "output_data_objects": [ - { - "name": "gold:Gp0452679_metabat2 bin checkm quality assessment result", - "description": "metabat2 bin checkm quality assessment result for gold:Gp0452679", - "data_object_type": "CheckM Statistics", - "url": "https://data.microbiomedata.org/data/nmdc:mga0fsjy84/MAGs/nmdc_mga0fsjy84_checkm_qa.out", - "md5_checksum": "d41d8cd98f00b204e9800998ecf8427e", - "id": "nmdc:d41d8cd98f00b204e9800998ecf8427e", - "file_size_bytes": 0 - }, - { - "name": "Gp0127654_tooShort (< 3kb) filtered contigs fasta file by metaBat2", - "description": "tooShort (< 3kb) filtered contigs fasta file by metaBat2 for Gp0127654", - "url": "https://data.microbiomedata.org/data/nmdc:mga0h0s362/MAGs/nmdc_mga0h0s362_bins.tooShort.fa", - "md5_checksum": "920bcae91eae59ed8b9b19bcb7392ac5", - "id": "nmdc:920bcae91eae59ed8b9b19bcb7392ac5", - "file_size_bytes": 80638518 - }, - { - "name": "Gp0127654_unbinned fasta file from metabat2", - "description": "unbinned fasta file from metabat2 for Gp0127654", - "url": "https://data.microbiomedata.org/data/nmdc:mga0h0s362/MAGs/nmdc_mga0h0s362_bins.unbinned.fa", - "md5_checksum": "d13bc24bdf72e7ba00d60f0e2e0805e8", - "id": "nmdc:d13bc24bdf72e7ba00d60f0e2e0805e8", - "file_size_bytes": 12400628 - }, - { - "name": "Gp0127654_metabat2 bin checkm quality assessment result", - "description": "metabat2 bin checkm quality assessment result for Gp0127654", - "data_object_type": "CheckM Statistics", - "url": "https://data.microbiomedata.org/data/nmdc:mga0h0s362/MAGs/nmdc_mga0h0s362_checkm_qa.out", - "md5_checksum": "3fd777151ef41b39b272cb42c1d5e8ba", - "id": "nmdc:3fd777151ef41b39b272cb42c1d5e8ba", - "file_size_bytes": 785 - }, - { - "name": "Gp0127654_high-quality and medium-quality bins", - "description": "high-quality and medium-quality bins for Gp0127654", - "data_object_type": "Metagenome Bins", - "url": "https://data.microbiomedata.org/data/nmdc:mga0h0s362/MAGs/nmdc_mga0h0s362_hqmq_bin.zip", - "md5_checksum": "470edf3d79702d3b806b545db595ca02", - "id": "nmdc:470edf3d79702d3b806b545db595ca02", - "file_size_bytes": 182 - }, - { - "name": "Gp0127654_metabat2 bins", - "description": "metabat2 bins for Gp0127654", - "url": "https://data.microbiomedata.org/data/nmdc:mga0h0s362/MAGs/nmdc_mga0h0s362_metabat_bin.zip", - "md5_checksum": "8fc6f1a0269aa5179d72c52cf1a9726e", - "id": "nmdc:8fc6f1a0269aa5179d72c52cf1a9726e", - "file_size_bytes": 69938 - } + "add_date": "2016-01-11", + "mod_date": "2021-06-18", + "ncbi_project_name": "Riverbed sediment microbial communities from areas with vegetation nearby in Columbia River, Washington, USA - GW-RW S2_30_40", + "omics_type": { + "has_raw_value": "Metagenome" + }, + "principal_investigator": { + "has_raw_value": "James Stegen" + }, + "processing_institution": "JGI", + "type": "nmdc:OmicsProcessing", + "gold_sequencing_project_identifiers": [ + "gold:Gp0127656" ] } - ] - }, - { - "_id": { - "$oid": "649b009773e8249959349b61" - }, - "id": "nmdc:omprc-11-qrsway30", - "name": "Riverbed sediment microbial communities from areas with vegetation nearby in Columbia River, Washington, USA - GW-RW S2_30_40", - "description": "Riverbed sediment samples were collected from an area with a lot of surrounding vegetation in a hyporheic zone (subsurface groundwater - surface water mixing zone)", - "has_input": [ - "nmdc:bsm-11-mmr87q87" - ], - "has_output": [ - "jgi:574fe0b47ded5e3df1ee1496" - ], - "part_of": [ - "nmdc:sty-11-aygzgv51" - ], - "add_date": "2016-01-11", - "mod_date": "2021-06-18", - "ncbi_project_name": "Riverbed sediment microbial communities from areas with vegetation nearby in Columbia River, Washington, USA - GW-RW S2_30_40", - "omics_type": { - "has_raw_value": "Metagenome" - }, - "principal_investigator": { - "has_raw_value": "James Stegen" - }, - "processing_institution": "JGI", - "type": "nmdc:OmicsProcessing", - "gold_sequencing_project_identifiers": [ - "gold:Gp0127656" - ], - "downstream_workflow_activity_records": [ + ], + "reaction_activity_set": [], + "read_qc_analysis_activity_set": [ { "_id": { "$oid": "649b009d6bdd4fd20273c87c" @@ -30909,28 +32687,10 @@ "output_read_count": 26481746, "started_at_time": "2021-10-11T02:23:35Z", "type": "nmdc:ReadQCAnalysisActivity", - "ended_at_time": "2021-10-11T03:58:56+00:00", - "output_data_objects": [ - { - "name": "Gp0127656_Filtered Reads", - "description": "Filtered Reads for Gp0127656", - "data_object_type": "Filtered Sequencing Reads", - "url": "https://data.microbiomedata.org/data/nmdc:mga00hh562/qa/nmdc_mga00hh562_filtered.fastq.gz", - "md5_checksum": "cec95659bb04ae095f51821ddaa9fa59", - "id": "nmdc:cec95659bb04ae095f51821ddaa9fa59", - "file_size_bytes": 2195848744 - }, - { - "name": "Gp0127656_Filtered Stats", - "description": "Filtered Stats for Gp0127656", - "data_object_type": "QC Statistics", - "url": "https://data.microbiomedata.org/data/nmdc:mga00hh562/qa/nmdc_mga00hh562_filterStats.txt", - "md5_checksum": "7b4f365bbe942a523890abf13d1b6436", - "id": "nmdc:7b4f365bbe942a523890abf13d1b6436", - "file_size_bytes": 284 - } - ] - }, + "ended_at_time": "2021-10-11T03:58:56+00:00" + } + ], + "read_based_taxonomy_analysis_activity_set": [ { "_id": { "$oid": "649b009bff710ae353f8cf3e" @@ -30956,89 +32716,16 @@ "name": "ReadBased Analysis Activity for nmdc:mga00hh562", "started_at_time": "2021-10-11T02:23:35Z", "type": "nmdc:ReadbasedAnalysis", - "ended_at_time": "2021-10-11T03:58:56+00:00", - "output_data_objects": [ - { - "name": "Gp0127656_Gottcha2 TSV report", - "description": "Gottcha2 TSV report for Gp0127656", - "url": "https://data.microbiomedata.org/data/nmdc:mga00hh562/ReadbasedAnalysis/nmdc_mga00hh562_gottcha2_report.tsv", - "md5_checksum": "ccbe419157d8286626330fd0eb0dd0e0", - "id": "nmdc:ccbe419157d8286626330fd0eb0dd0e0", - "file_size_bytes": 2418 - }, - { - "name": "Gp0127656_Gottcha2 full TSV report", - "description": "Gottcha2 full TSV report for Gp0127656", - "url": "https://data.microbiomedata.org/data/nmdc:mga00hh562/ReadbasedAnalysis/nmdc_mga00hh562_gottcha2_report_full.tsv", - "md5_checksum": "92ab65cdaca3367552e03d895123e04f", - "id": "nmdc:92ab65cdaca3367552e03d895123e04f", - "file_size_bytes": 759212 - }, - { - "name": "Gp0127656_Gottcha2 Krona HTML report", - "description": "Gottcha2 Krona HTML report for Gp0127656", - "data_object_type": "GOTTCHA2 Krona Plot", - "url": "https://data.microbiomedata.org/data/nmdc:mga00hh562/ReadbasedAnalysis/nmdc_mga00hh562_gottcha2_krona.html", - "md5_checksum": "0b3ff6503723d6ea9b84552f68ed4270", - "id": "nmdc:0b3ff6503723d6ea9b84552f68ed4270", - "file_size_bytes": 231563 - }, - { - "name": "Gp0127656_Centrifuge classification TSV report", - "description": "Centrifuge classification TSV report for Gp0127656", - "data_object_type": "Centrifuge Taxonomic Classification", - "url": "https://data.microbiomedata.org/data/nmdc:mga00hh562/ReadbasedAnalysis/nmdc_mga00hh562_centrifuge_classification.tsv", - "md5_checksum": "8e5ad12b7fa8873463088d7bf361f7c5", - "id": "nmdc:8e5ad12b7fa8873463088d7bf361f7c5", - "file_size_bytes": 1950007455 - }, - { - "name": "Gp0127656_Centrifuge TSV report", - "description": "Centrifuge TSV report for Gp0127656", - "data_object_type": "Centrifuge Classification Report", - "url": "https://data.microbiomedata.org/data/nmdc:mga00hh562/ReadbasedAnalysis/nmdc_mga00hh562_centrifuge_report.tsv", - "md5_checksum": "a3255df52cd6150f03bbf7cbd655ec76", - "id": "nmdc:a3255df52cd6150f03bbf7cbd655ec76", - "file_size_bytes": 255724 - }, - { - "name": "Gp0127656_Centrifuge Krona HTML report", - "description": "Centrifuge Krona HTML report for Gp0127656", - "data_object_type": "Centrifuge Krona Plot", - "url": "https://data.microbiomedata.org/data/nmdc:mga00hh562/ReadbasedAnalysis/nmdc_mga00hh562_centrifuge_krona.html", - "md5_checksum": "a25a5d7e399624e5e5735b65a9dd322a", - "id": "nmdc:a25a5d7e399624e5e5735b65a9dd322a", - "file_size_bytes": 2337553 - }, - { - "name": "Gp0127656_Kraken classification TSV report", - "description": "Kraken classification TSV report for Gp0127656", - "data_object_type": "Kraken2 Taxonomic Classification", - "url": "https://data.microbiomedata.org/data/nmdc:mga00hh562/ReadbasedAnalysis/nmdc_mga00hh562_kraken2_classification.tsv", - "md5_checksum": "dd953aebfd5cf624a5ffa8c6d6b64b08", - "id": "nmdc:dd953aebfd5cf624a5ffa8c6d6b64b08", - "file_size_bytes": 1555636513 - }, - { - "name": "Gp0127656_Kraken2 TSV report", - "description": "Kraken2 TSV report for Gp0127656", - "data_object_type": "Kraken2 Classification Report", - "url": "https://data.microbiomedata.org/data/nmdc:mga00hh562/ReadbasedAnalysis/nmdc_mga00hh562_kraken2_report.tsv", - "md5_checksum": "96f47f6cd2350fb1c7c7b746d2e9d811", - "id": "nmdc:96f47f6cd2350fb1c7c7b746d2e9d811", - "file_size_bytes": 647090 - }, - { - "name": "Gp0127656_Kraken2 Krona HTML report", - "description": "Kraken2 Krona HTML report for Gp0127656", - "data_object_type": "Kraken2 Krona Plot", - "url": "https://data.microbiomedata.org/data/nmdc:mga00hh562/ReadbasedAnalysis/nmdc_mga00hh562_kraken2_krona.html", - "md5_checksum": "ae369194e4b24e137fc23da0412277a6", - "id": "nmdc:ae369194e4b24e137fc23da0412277a6", - "file_size_bytes": 3939982 - } - ] - }, + "ended_at_time": "2021-10-11T03:58:56+00:00" + } + ], + "study_set": [], + "field_research_site_set": [], + "collecting_biosamples_from_site_set": [], + "date_created": null, + "etl_software_version": null, + "pooling_set": [], + "read_based_analysis_activity_set": [ { "_id": { "$oid": "61e719f0833bcf838a701752" @@ -31067,725 +32754,489 @@ "name": "ReadBased Analysis Activity for nmdc:mga00hh562", "started_at_time": "2021-10-11T02:23:35Z", "type": "nmdc:ReadbasedAnalysis", - "ended_at_time": "2021-10-11T03:58:56+00:00", - "output_data_objects": [ - { - "name": "Gp0127656_Gottcha2 TSV report", - "description": "Gottcha2 TSV report for Gp0127656", - "url": "https://data.microbiomedata.org/data/nmdc:mga00hh562/ReadbasedAnalysis/nmdc_mga00hh562_gottcha2_report.tsv", - "md5_checksum": "ccbe419157d8286626330fd0eb0dd0e0", - "id": "nmdc:ccbe419157d8286626330fd0eb0dd0e0", - "file_size_bytes": 2418 - }, - { - "name": "Gp0127656_Gottcha2 full TSV report", - "description": "Gottcha2 full TSV report for Gp0127656", - "url": "https://data.microbiomedata.org/data/nmdc:mga00hh562/ReadbasedAnalysis/nmdc_mga00hh562_gottcha2_report_full.tsv", - "md5_checksum": "92ab65cdaca3367552e03d895123e04f", - "id": "nmdc:92ab65cdaca3367552e03d895123e04f", - "file_size_bytes": 759212 - }, - { - "name": "Gp0127656_Gottcha2 Krona HTML report", - "description": "Gottcha2 Krona HTML report for Gp0127656", - "data_object_type": "GOTTCHA2 Krona Plot", - "url": "https://data.microbiomedata.org/data/nmdc:mga00hh562/ReadbasedAnalysis/nmdc_mga00hh562_gottcha2_krona.html", - "md5_checksum": "0b3ff6503723d6ea9b84552f68ed4270", - "id": "nmdc:0b3ff6503723d6ea9b84552f68ed4270", - "file_size_bytes": 231563 - }, - { - "name": "Gp0127656_Centrifuge classification TSV report", - "description": "Centrifuge classification TSV report for Gp0127656", - "data_object_type": "Centrifuge Taxonomic Classification", - "url": "https://data.microbiomedata.org/data/nmdc:mga00hh562/ReadbasedAnalysis/nmdc_mga00hh562_centrifuge_classification.tsv", - "md5_checksum": "8e5ad12b7fa8873463088d7bf361f7c5", - "id": "nmdc:8e5ad12b7fa8873463088d7bf361f7c5", - "file_size_bytes": 1950007455 - }, - { - "name": "Gp0127656_Centrifuge TSV report", - "description": "Centrifuge TSV report for Gp0127656", - "data_object_type": "Centrifuge Classification Report", - "url": "https://data.microbiomedata.org/data/nmdc:mga00hh562/ReadbasedAnalysis/nmdc_mga00hh562_centrifuge_report.tsv", - "md5_checksum": "a3255df52cd6150f03bbf7cbd655ec76", - "id": "nmdc:a3255df52cd6150f03bbf7cbd655ec76", - "file_size_bytes": 255724 - }, - { - "name": "Gp0127656_Centrifuge Krona HTML report", - "description": "Centrifuge Krona HTML report for Gp0127656", - "data_object_type": "Centrifuge Krona Plot", - "url": "https://data.microbiomedata.org/data/nmdc:mga00hh562/ReadbasedAnalysis/nmdc_mga00hh562_centrifuge_krona.html", - "md5_checksum": "a25a5d7e399624e5e5735b65a9dd322a", - "id": "nmdc:a25a5d7e399624e5e5735b65a9dd322a", - "file_size_bytes": 2337553 - }, - { - "name": "Gp0127656_Kraken classification TSV report", - "description": "Kraken classification TSV report for Gp0127656", - "data_object_type": "Kraken2 Taxonomic Classification", - "url": "https://data.microbiomedata.org/data/nmdc:mga00hh562/ReadbasedAnalysis/nmdc_mga00hh562_kraken2_classification.tsv", - "md5_checksum": "dd953aebfd5cf624a5ffa8c6d6b64b08", - "id": "nmdc:dd953aebfd5cf624a5ffa8c6d6b64b08", - "file_size_bytes": 1555636513 - }, - { - "name": "Gp0127656_Kraken2 TSV report", - "description": "Kraken2 TSV report for Gp0127656", - "data_object_type": "Kraken2 Classification Report", - "url": "https://data.microbiomedata.org/data/nmdc:mga00hh562/ReadbasedAnalysis/nmdc_mga00hh562_kraken2_report.tsv", - "md5_checksum": "96f47f6cd2350fb1c7c7b746d2e9d811", - "id": "nmdc:96f47f6cd2350fb1c7c7b746d2e9d811", - "file_size_bytes": 647090 - }, - { - "name": "Gp0127656_Kraken2 Krona HTML report", - "description": "Kraken2 Krona HTML report for Gp0127656", - "data_object_type": "Kraken2 Krona Plot", - "url": "https://data.microbiomedata.org/data/nmdc:mga00hh562/ReadbasedAnalysis/nmdc_mga00hh562_kraken2_krona.html", - "md5_checksum": "ae369194e4b24e137fc23da0412277a6", - "id": "nmdc:ae369194e4b24e137fc23da0412277a6", - "file_size_bytes": 3939982 - } - ] + "ended_at_time": "2021-10-11T03:58:56+00:00" + } + ] + }, + { + "functional_annotation_agg": [], + "library_preparation_set": [], + "processed_sample_set": [], + "extraction_set": [], + "activity_set": [], + "biosample_set": [], + "data_object_set": [ + { + "name": "Gp0127651_Filtered Reads", + "description": "Filtered Reads for Gp0127651", + "data_object_type": "Filtered Sequencing Reads", + "url": "https://data.microbiomedata.org/data/nmdc:mga08hnt47/qa/nmdc_mga08hnt47_filtered.fastq.gz", + "md5_checksum": "2791a196017767af3b5b21a3029799c0", + "id": "nmdc:2791a196017767af3b5b21a3029799c0", + "file_size_bytes": 1856919615 }, { - "_id": { - "$oid": "649b005f2ca5ee4adb139fb2" - }, - "has_input": [ - "nmdc:cec95659bb04ae095f51821ddaa9fa59" - ], - "part_of": [ - "nmdc:mga00hh562" - ], - "ctg_logsum": 98556, - "scaf_logsum": 99077, - "gap_pct": 0.00105, - "git_url": "https://github.com/microbiomedata/metaG/releases/tag/0.1", - "has_output": [ - "nmdc:8106808f8e245ef9a46a4e31561eba7f", - "nmdc:55385159fa8361d7ff747cdc1155512b", - "nmdc:4741908a5b07eaa2312ff3e6d2d991aa", - "nmdc:172e5cf3b5c5bf8e4896058dad3e814a", - "nmdc:941f749a92155321c5ce7e5aa32d3b55" - ], - "asm_score": 2.914, - "was_informed_by": "gold:Gp0127656", - "ctg_powsum": 10453, - "scaf_max": 9079, - "id": "nmdc:8e2d8da1d05b292a52a33732a6bc4391", - "scaf_powsum": 10508, - "execution_resource": "NERSC-Cori", - "contigs": 169495, - "name": "Assembly Activity for nmdc:mga00hh562", - "ctg_max": 9079, - "gc_std": 0.09653, - "contig_bp": 72511508, - "gc_avg": 0.62989, - "started_at_time": "2021-10-11T02:23:35Z", - "scaf_bp": 72512268, - "type": "nmdc:MetagenomeAssembly", - "scaffolds": 169419, - "ended_at_time": "2021-10-11T03:58:56+00:00", - "ctg_l50": 399, - "ctg_l90": 286, - "ctg_n50": 54638, - "ctg_n90": 144448, - "scaf_l50": 399, - "scaf_l90": 286, - "scaf_n50": 54616, - "scaf_n90": 144376, - "output_data_objects": [ - { - "name": "Gp0127656_Assembled contigs fasta", - "description": "Assembled contigs fasta for Gp0127656", - "data_object_type": "Assembly Contigs", - "url": "https://data.microbiomedata.org/data/nmdc:mga00hh562/assembly/nmdc_mga00hh562_contigs.fna", - "md5_checksum": "8106808f8e245ef9a46a4e31561eba7f", - "id": "nmdc:8106808f8e245ef9a46a4e31561eba7f", - "file_size_bytes": 78938478 - }, - { - "name": "Gp0127656_Assembled scaffold fasta", - "description": "Assembled scaffold fasta for Gp0127656", - "data_object_type": "Assembly Scaffolds", - "url": "https://data.microbiomedata.org/data/nmdc:mga00hh562/assembly/nmdc_mga00hh562_scaffolds.fna", - "md5_checksum": "55385159fa8361d7ff747cdc1155512b", - "id": "nmdc:55385159fa8361d7ff747cdc1155512b", - "file_size_bytes": 78428743 - }, - { - "name": "Gp0127656_Metagenome Contig Coverage Stats", - "description": "Metagenome Contig Coverage Stats for Gp0127656", - "url": "https://data.microbiomedata.org/data/nmdc:mga00hh562/assembly/nmdc_mga00hh562_covstats.txt", - "md5_checksum": "4741908a5b07eaa2312ff3e6d2d991aa", - "id": "nmdc:4741908a5b07eaa2312ff3e6d2d991aa", - "file_size_bytes": 13384382 - }, - { - "name": "Gp0127656_Assembled AGP file", - "description": "Assembled AGP file for Gp0127656", - "data_object_type": "Assembly AGP", - "url": "https://data.microbiomedata.org/data/nmdc:mga00hh562/assembly/nmdc_mga00hh562_assembly.agp", - "md5_checksum": "172e5cf3b5c5bf8e4896058dad3e814a", - "id": "nmdc:172e5cf3b5c5bf8e4896058dad3e814a", - "file_size_bytes": 12508060 - }, - { - "name": "Gp0127656_Metagenome Alignment BAM file", - "description": "Metagenome Alignment BAM file for Gp0127656", - "data_object_type": "Assembly Coverage BAM", - "url": "https://data.microbiomedata.org/data/nmdc:mga00hh562/assembly/nmdc_mga00hh562_pairedMapped_sorted.bam", - "md5_checksum": "941f749a92155321c5ce7e5aa32d3b55", - "id": "nmdc:941f749a92155321c5ce7e5aa32d3b55", - "file_size_bytes": 2375706529 - } - ] + "name": "Gp0127651_Filtered Stats", + "description": "Filtered Stats for Gp0127651", + "data_object_type": "QC Statistics", + "url": "https://data.microbiomedata.org/data/nmdc:mga08hnt47/qa/nmdc_mga08hnt47_filterStats.txt", + "md5_checksum": "92cb49efbff5d5977e00dbad1c4d0d9f", + "id": "nmdc:92cb49efbff5d5977e00dbad1c4d0d9f", + "file_size_bytes": 283 }, { - "_id": { - "$oid": "649b005bbf2caae0415ef9c6" - }, - "has_input": [ - "nmdc:8106808f8e245ef9a46a4e31561eba7f" - ], - "part_of": [ - "nmdc:mga00hh562" - ], - "git_url": "https://github.com/microbiomedata/metaG/releases/tag/0.1", - "has_output": [ - "nmdc:18f68cc8acda8d33d5fd6f21a9166aa8", - "nmdc:87d5f3a505d23c1aa2deea960702d55b", - "nmdc:8e8be343bbb1ba11f3e15867b419d05d", - "nmdc:91c2485c0ebf683aed3e7935ec60b7d1", - "nmdc:fb6740e86534daeea41ab6d5cf9d91d2", - "nmdc:19da9b3f211164643f276bc74604c9b0", - "nmdc:19905547dfa37274a9f91c9caaf6bacc", - "nmdc:30c2b0722d225938975243ab1041ed12", - "nmdc:623e913fa98f88f6037754daf5d9ffc5", - "nmdc:ec56df16785bc67e073128f09366ec43", - "nmdc:2831d1ead0af4681b2ae1a9f21733637", - "nmdc:53f225f74011f7d30fcfd5c60b3693ae" - ], - "was_informed_by": "gold:Gp0127656", - "id": "nmdc:8e2d8da1d05b292a52a33732a6bc4391", - "execution_resource": "NERSC-Cori", - "name": "Annotation Activity for nmdc:mga00hh562", - "started_at_time": "2021-10-11T02:23:35Z", - "type": "nmdc:MetagenomeAnnotationActivity", - "ended_at_time": "2021-10-11T03:58:56+00:00", - "output_data_objects": [ - { - "name": "Gp0127656_Protein FAA", - "description": "Protein FAA for Gp0127656", - "data_object_type": "Annotation Amino Acid FASTA", - "url": "https://data.microbiomedata.org/data/nmdc:mga00hh562/annotation/nmdc_mga00hh562_proteins.faa", - "md5_checksum": "18f68cc8acda8d33d5fd6f21a9166aa8", - "id": "nmdc:18f68cc8acda8d33d5fd6f21a9166aa8", - "file_size_bytes": 46951183 - }, - { - "name": "Gp0127656_Structural annotation GFF file", - "description": "Structural annotation GFF file for Gp0127656", - "data_object_type": "Structural Annotation GFF", - "url": "https://data.microbiomedata.org/data/nmdc:mga00hh562/annotation/nmdc_mga00hh562_structural_annotation.gff", - "md5_checksum": "87d5f3a505d23c1aa2deea960702d55b", - "id": "nmdc:87d5f3a505d23c1aa2deea960702d55b", - "file_size_bytes": 2511 - }, - { - "name": "Gp0127656_Functional annotation GFF file", - "description": "Functional annotation GFF file for Gp0127656", - "data_object_type": "Functional Annotation GFF", - "url": "https://data.microbiomedata.org/data/nmdc:mga00hh562/annotation/nmdc_mga00hh562_functional_annotation.gff", - "md5_checksum": "8e8be343bbb1ba11f3e15867b419d05d", - "id": "nmdc:8e8be343bbb1ba11f3e15867b419d05d", - "file_size_bytes": 54902900 - }, - { - "name": "Gp0127656_KO TSV file", - "description": "KO TSV file for Gp0127656", - "data_object_type": "Annotation KEGG Orthology", - "url": "https://data.microbiomedata.org/data/nmdc:mga00hh562/annotation/nmdc_mga00hh562_ko.tsv", - "md5_checksum": "91c2485c0ebf683aed3e7935ec60b7d1", - "id": "nmdc:91c2485c0ebf683aed3e7935ec60b7d1", - "file_size_bytes": 6468844 - }, - { - "name": "Gp0127656_EC TSV file", - "description": "EC TSV file for Gp0127656", - "data_object_type": "Annotation Enzyme Commission", - "url": "https://data.microbiomedata.org/data/nmdc:mga00hh562/annotation/nmdc_mga00hh562_ec.tsv", - "md5_checksum": "fb6740e86534daeea41ab6d5cf9d91d2", - "id": "nmdc:fb6740e86534daeea41ab6d5cf9d91d2", - "file_size_bytes": 4308547 - }, - { - "name": "Gp0127656_COG GFF file", - "description": "COG GFF file for Gp0127656", - "url": "https://data.microbiomedata.org/data/nmdc:mga00hh562/annotation/nmdc_mga00hh562_cog.gff", - "md5_checksum": "19da9b3f211164643f276bc74604c9b0", - "id": "nmdc:19da9b3f211164643f276bc74604c9b0", - "file_size_bytes": 32139189 - }, - { - "name": "Gp0127656_PFAM GFF file", - "description": "PFAM GFF file for Gp0127656", - "url": "https://data.microbiomedata.org/data/nmdc:mga00hh562/annotation/nmdc_mga00hh562_pfam.gff", - "md5_checksum": "19905547dfa37274a9f91c9caaf6bacc", - "id": "nmdc:19905547dfa37274a9f91c9caaf6bacc", - "file_size_bytes": 23590201 - }, - { - "name": "Gp0127656_TigrFam GFF file", - "description": "TigrFam GFF file for Gp0127656", - "url": "https://data.microbiomedata.org/data/nmdc:mga00hh562/annotation/nmdc_mga00hh562_tigrfam.gff", - "md5_checksum": "30c2b0722d225938975243ab1041ed12", - "id": "nmdc:30c2b0722d225938975243ab1041ed12", - "file_size_bytes": 2485400 - }, - { - "name": "Gp0127656_SMART GFF file", - "description": "SMART GFF file for Gp0127656", - "url": "https://data.microbiomedata.org/data/nmdc:mga00hh562/annotation/nmdc_mga00hh562_smart.gff", - "md5_checksum": "623e913fa98f88f6037754daf5d9ffc5", - "id": "nmdc:623e913fa98f88f6037754daf5d9ffc5", - "file_size_bytes": 6932331 - }, - { - "name": "Gp0127656_SuperFam GFF file", - "description": "SuperFam GFF file for Gp0127656", - "url": "https://data.microbiomedata.org/data/nmdc:mga00hh562/annotation/nmdc_mga00hh562_supfam.gff", - "md5_checksum": "ec56df16785bc67e073128f09366ec43", - "id": "nmdc:ec56df16785bc67e073128f09366ec43", - "file_size_bytes": 39880284 - }, - { - "name": "Gp0127656_Cath FunFam GFF file", - "description": "Cath FunFam GFF file for Gp0127656", - "url": "https://data.microbiomedata.org/data/nmdc:mga00hh562/annotation/nmdc_mga00hh562_cath_funfam.gff", - "md5_checksum": "2831d1ead0af4681b2ae1a9f21733637", - "id": "nmdc:2831d1ead0af4681b2ae1a9f21733637", - "file_size_bytes": 29872897 - }, - { - "name": "Gp0127656_KO_EC GFF file", - "description": "KO_EC GFF file for Gp0127656", - "url": "https://data.microbiomedata.org/data/nmdc:mga00hh562/annotation/nmdc_mga00hh562_ko_ec.gff", - "md5_checksum": "53f225f74011f7d30fcfd5c60b3693ae", - "id": "nmdc:53f225f74011f7d30fcfd5c60b3693ae", - "file_size_bytes": 20564625 - } - ] + "name": "Gp0127651_Gottcha2 TSV report", + "description": "Gottcha2 TSV report for Gp0127651", + "url": "https://data.microbiomedata.org/data/nmdc:mga08hnt47/ReadbasedAnalysis/nmdc_mga08hnt47_gottcha2_report.tsv", + "md5_checksum": "53ee263960c39126e039656a121deb96", + "id": "nmdc:53ee263960c39126e039656a121deb96", + "file_size_bytes": 1199 + }, + { + "name": "Gp0127651_Gottcha2 full TSV report", + "description": "Gottcha2 full TSV report for Gp0127651", + "url": "https://data.microbiomedata.org/data/nmdc:mga08hnt47/ReadbasedAnalysis/nmdc_mga08hnt47_gottcha2_report_full.tsv", + "md5_checksum": "2781b9269b8e24f49a1a301d44d0e3d5", + "id": "nmdc:2781b9269b8e24f49a1a301d44d0e3d5", + "file_size_bytes": 703299 + }, + { + "name": "Gp0127651_Gottcha2 Krona HTML report", + "description": "Gottcha2 Krona HTML report for Gp0127651", + "data_object_type": "GOTTCHA2 Krona Plot", + "url": "https://data.microbiomedata.org/data/nmdc:mga08hnt47/ReadbasedAnalysis/nmdc_mga08hnt47_gottcha2_krona.html", + "md5_checksum": "0ed808b8ce29d39c3b555e7d5bf4c274", + "id": "nmdc:0ed808b8ce29d39c3b555e7d5bf4c274", + "file_size_bytes": 229311 + }, + { + "name": "Gp0127651_Centrifuge classification TSV report", + "description": "Centrifuge classification TSV report for Gp0127651", + "data_object_type": "Centrifuge Taxonomic Classification", + "url": "https://data.microbiomedata.org/data/nmdc:mga08hnt47/ReadbasedAnalysis/nmdc_mga08hnt47_centrifuge_classification.tsv", + "md5_checksum": "a7d8f038b87bd28843e30c5dd115704b", + "id": "nmdc:a7d8f038b87bd28843e30c5dd115704b", + "file_size_bytes": 1642196063 + }, + { + "name": "Gp0127651_Centrifuge TSV report", + "description": "Centrifuge TSV report for Gp0127651", + "data_object_type": "Centrifuge Classification Report", + "url": "https://data.microbiomedata.org/data/nmdc:mga08hnt47/ReadbasedAnalysis/nmdc_mga08hnt47_centrifuge_report.tsv", + "md5_checksum": "b4cbc81c986c67c1037c8b7280924683", + "id": "nmdc:b4cbc81c986c67c1037c8b7280924683", + "file_size_bytes": 254418 + }, + { + "name": "Gp0127651_Centrifuge Krona HTML report", + "description": "Centrifuge Krona HTML report for Gp0127651", + "data_object_type": "Centrifuge Krona Plot", + "url": "https://data.microbiomedata.org/data/nmdc:mga08hnt47/ReadbasedAnalysis/nmdc_mga08hnt47_centrifuge_krona.html", + "md5_checksum": "e0c61a191258597984a05d86eaf4d71f", + "id": "nmdc:e0c61a191258597984a05d86eaf4d71f", + "file_size_bytes": 2333132 + }, + { + "name": "Gp0127651_Kraken classification TSV report", + "description": "Kraken classification TSV report for Gp0127651", + "data_object_type": "Kraken2 Taxonomic Classification", + "url": "https://data.microbiomedata.org/data/nmdc:mga08hnt47/ReadbasedAnalysis/nmdc_mga08hnt47_kraken2_classification.tsv", + "md5_checksum": "e1cbcfa86444a4ff4e992bcb6653d18f", + "id": "nmdc:e1cbcfa86444a4ff4e992bcb6653d18f", + "file_size_bytes": 1309125719 + }, + { + "name": "Gp0127651_Kraken2 TSV report", + "description": "Kraken2 TSV report for Gp0127651", + "data_object_type": "Kraken2 Classification Report", + "url": "https://data.microbiomedata.org/data/nmdc:mga08hnt47/ReadbasedAnalysis/nmdc_mga08hnt47_kraken2_report.tsv", + "md5_checksum": "d2e10038a40e81e81ba94f75ed1ec52c", + "id": "nmdc:d2e10038a40e81e81ba94f75ed1ec52c", + "file_size_bytes": 639737 + }, + { + "name": "Gp0127651_Kraken2 Krona HTML report", + "description": "Kraken2 Krona HTML report for Gp0127651", + "data_object_type": "Kraken2 Krona Plot", + "url": "https://data.microbiomedata.org/data/nmdc:mga08hnt47/ReadbasedAnalysis/nmdc_mga08hnt47_kraken2_krona.html", + "md5_checksum": "ddba84cd45462d3a55df4ac62bb4eeb8", + "id": "nmdc:ddba84cd45462d3a55df4ac62bb4eeb8", + "file_size_bytes": 3988966 + }, + { + "name": "Gp0127651_Gottcha2 TSV report", + "description": "Gottcha2 TSV report for Gp0127651", + "url": "https://data.microbiomedata.org/data/nmdc:mga08hnt47/ReadbasedAnalysis/nmdc_mga08hnt47_gottcha2_report.tsv", + "md5_checksum": "53ee263960c39126e039656a121deb96", + "id": "nmdc:53ee263960c39126e039656a121deb96", + "file_size_bytes": 1199 + }, + { + "name": "Gp0127651_Gottcha2 full TSV report", + "description": "Gottcha2 full TSV report for Gp0127651", + "url": "https://data.microbiomedata.org/data/nmdc:mga08hnt47/ReadbasedAnalysis/nmdc_mga08hnt47_gottcha2_report_full.tsv", + "md5_checksum": "2781b9269b8e24f49a1a301d44d0e3d5", + "id": "nmdc:2781b9269b8e24f49a1a301d44d0e3d5", + "file_size_bytes": 703299 + }, + { + "name": "Gp0127651_Gottcha2 Krona HTML report", + "description": "Gottcha2 Krona HTML report for Gp0127651", + "data_object_type": "GOTTCHA2 Krona Plot", + "url": "https://data.microbiomedata.org/data/nmdc:mga08hnt47/ReadbasedAnalysis/nmdc_mga08hnt47_gottcha2_krona.html", + "md5_checksum": "0ed808b8ce29d39c3b555e7d5bf4c274", + "id": "nmdc:0ed808b8ce29d39c3b555e7d5bf4c274", + "file_size_bytes": 229311 + }, + { + "name": "Gp0127651_Centrifuge classification TSV report", + "description": "Centrifuge classification TSV report for Gp0127651", + "data_object_type": "Centrifuge Taxonomic Classification", + "url": "https://data.microbiomedata.org/data/nmdc:mga08hnt47/ReadbasedAnalysis/nmdc_mga08hnt47_centrifuge_classification.tsv", + "md5_checksum": "a7d8f038b87bd28843e30c5dd115704b", + "id": "nmdc:a7d8f038b87bd28843e30c5dd115704b", + "file_size_bytes": 1642196063 + }, + { + "name": "Gp0127651_Centrifuge TSV report", + "description": "Centrifuge TSV report for Gp0127651", + "data_object_type": "Centrifuge Classification Report", + "url": "https://data.microbiomedata.org/data/nmdc:mga08hnt47/ReadbasedAnalysis/nmdc_mga08hnt47_centrifuge_report.tsv", + "md5_checksum": "b4cbc81c986c67c1037c8b7280924683", + "id": "nmdc:b4cbc81c986c67c1037c8b7280924683", + "file_size_bytes": 254418 + }, + { + "name": "Gp0127651_Centrifuge Krona HTML report", + "description": "Centrifuge Krona HTML report for Gp0127651", + "data_object_type": "Centrifuge Krona Plot", + "url": "https://data.microbiomedata.org/data/nmdc:mga08hnt47/ReadbasedAnalysis/nmdc_mga08hnt47_centrifuge_krona.html", + "md5_checksum": "e0c61a191258597984a05d86eaf4d71f", + "id": "nmdc:e0c61a191258597984a05d86eaf4d71f", + "file_size_bytes": 2333132 + }, + { + "name": "Gp0127651_Kraken classification TSV report", + "description": "Kraken classification TSV report for Gp0127651", + "data_object_type": "Kraken2 Taxonomic Classification", + "url": "https://data.microbiomedata.org/data/nmdc:mga08hnt47/ReadbasedAnalysis/nmdc_mga08hnt47_kraken2_classification.tsv", + "md5_checksum": "e1cbcfa86444a4ff4e992bcb6653d18f", + "id": "nmdc:e1cbcfa86444a4ff4e992bcb6653d18f", + "file_size_bytes": 1309125719 + }, + { + "name": "Gp0127651_Kraken2 TSV report", + "description": "Kraken2 TSV report for Gp0127651", + "data_object_type": "Kraken2 Classification Report", + "url": "https://data.microbiomedata.org/data/nmdc:mga08hnt47/ReadbasedAnalysis/nmdc_mga08hnt47_kraken2_report.tsv", + "md5_checksum": "d2e10038a40e81e81ba94f75ed1ec52c", + "id": "nmdc:d2e10038a40e81e81ba94f75ed1ec52c", + "file_size_bytes": 639737 + }, + { + "name": "Gp0127651_Kraken2 Krona HTML report", + "description": "Kraken2 Krona HTML report for Gp0127651", + "data_object_type": "Kraken2 Krona Plot", + "url": "https://data.microbiomedata.org/data/nmdc:mga08hnt47/ReadbasedAnalysis/nmdc_mga08hnt47_kraken2_krona.html", + "md5_checksum": "ddba84cd45462d3a55df4ac62bb4eeb8", + "id": "nmdc:ddba84cd45462d3a55df4ac62bb4eeb8", + "file_size_bytes": 3988966 + }, + { + "name": "Gp0127651_Assembled contigs fasta", + "description": "Assembled contigs fasta for Gp0127651", + "data_object_type": "Assembly Contigs", + "url": "https://data.microbiomedata.org/data/nmdc:mga08hnt47/assembly/nmdc_mga08hnt47_contigs.fna", + "md5_checksum": "8483663a943ff4c0fc0249353676bfc1", + "id": "nmdc:8483663a943ff4c0fc0249353676bfc1", + "file_size_bytes": 95957530 + }, + { + "name": "Gp0127651_Assembled scaffold fasta", + "description": "Assembled scaffold fasta for Gp0127651", + "data_object_type": "Assembly Scaffolds", + "url": "https://data.microbiomedata.org/data/nmdc:mga08hnt47/assembly/nmdc_mga08hnt47_scaffolds.fna", + "md5_checksum": "ccca920c56ad3d050e2d8801bcbe4855", + "id": "nmdc:ccca920c56ad3d050e2d8801bcbe4855", + "file_size_bytes": 95414704 + }, + { + "name": "Gp0127651_Metagenome Contig Coverage Stats", + "description": "Metagenome Contig Coverage Stats for Gp0127651", + "url": "https://data.microbiomedata.org/data/nmdc:mga08hnt47/assembly/nmdc_mga08hnt47_covstats.txt", + "md5_checksum": "f21e374c1c31c02bd0e41228cc7895c3", + "id": "nmdc:f21e374c1c31c02bd0e41228cc7895c3", + "file_size_bytes": 14289388 + }, + { + "name": "Gp0127651_Assembled AGP file", + "description": "Assembled AGP file for Gp0127651", + "data_object_type": "Assembly AGP", + "url": "https://data.microbiomedata.org/data/nmdc:mga08hnt47/assembly/nmdc_mga08hnt47_assembly.agp", + "md5_checksum": "f43ae7935184d10ba65961171efcac34", + "id": "nmdc:f43ae7935184d10ba65961171efcac34", + "file_size_bytes": 13343603 + }, + { + "name": "Gp0127651_Metagenome Alignment BAM file", + "description": "Metagenome Alignment BAM file for Gp0127651", + "data_object_type": "Assembly Coverage BAM", + "url": "https://data.microbiomedata.org/data/nmdc:mga08hnt47/assembly/nmdc_mga08hnt47_pairedMapped_sorted.bam", + "md5_checksum": "838162ead3f121f5bc02bc1234a32a55", + "id": "nmdc:838162ead3f121f5bc02bc1234a32a55", + "file_size_bytes": 2037589818 + }, + { + "name": "Gp0127651_Protein FAA", + "description": "Protein FAA for Gp0127651", + "data_object_type": "Annotation Amino Acid FASTA", + "url": "https://data.microbiomedata.org/data/nmdc:mga08hnt47/annotation/nmdc_mga08hnt47_proteins.faa", + "md5_checksum": "d8dc4f31293c549b12bbcab915d708cc", + "id": "nmdc:d8dc4f31293c549b12bbcab915d708cc", + "file_size_bytes": 54370216 + }, + { + "name": "Gp0127651_Structural annotation GFF file", + "description": "Structural annotation GFF file for Gp0127651", + "data_object_type": "Structural Annotation GFF", + "url": "https://data.microbiomedata.org/data/nmdc:mga08hnt47/annotation/nmdc_mga08hnt47_structural_annotation.gff", + "md5_checksum": "415256907dcafaa68778a2ba358d9ac5", + "id": "nmdc:415256907dcafaa68778a2ba358d9ac5", + "file_size_bytes": 2517 + }, + { + "name": "Gp0127651_Functional annotation GFF file", + "description": "Functional annotation GFF file for Gp0127651", + "data_object_type": "Functional Annotation GFF", + "url": "https://data.microbiomedata.org/data/nmdc:mga08hnt47/annotation/nmdc_mga08hnt47_functional_annotation.gff", + "md5_checksum": "f0c60a537e6867bf62fde15577669453", + "id": "nmdc:f0c60a537e6867bf62fde15577669453", + "file_size_bytes": 61364019 + }, + { + "name": "Gp0127651_KO TSV file", + "description": "KO TSV file for Gp0127651", + "data_object_type": "Annotation KEGG Orthology", + "url": "https://data.microbiomedata.org/data/nmdc:mga08hnt47/annotation/nmdc_mga08hnt47_ko.tsv", + "md5_checksum": "e0f16b60c50581799b7ecb254e61e537", + "id": "nmdc:e0f16b60c50581799b7ecb254e61e537", + "file_size_bytes": 6908291 + }, + { + "name": "Gp0127651_EC TSV file", + "description": "EC TSV file for Gp0127651", + "data_object_type": "Annotation Enzyme Commission", + "url": "https://data.microbiomedata.org/data/nmdc:mga08hnt47/annotation/nmdc_mga08hnt47_ec.tsv", + "md5_checksum": "6eb21304f0762bd8c11b98826d310321", + "id": "nmdc:6eb21304f0762bd8c11b98826d310321", + "file_size_bytes": 4650091 + }, + { + "name": "Gp0127651_COG GFF file", + "description": "COG GFF file for Gp0127651", + "url": "https://data.microbiomedata.org/data/nmdc:mga08hnt47/annotation/nmdc_mga08hnt47_cog.gff", + "md5_checksum": "4ea7982c99cbb6d8ccc9fd949bee09ec", + "id": "nmdc:4ea7982c99cbb6d8ccc9fd949bee09ec", + "file_size_bytes": 36137856 + }, + { + "name": "Gp0127651_PFAM GFF file", + "description": "PFAM GFF file for Gp0127651", + "url": "https://data.microbiomedata.org/data/nmdc:mga08hnt47/annotation/nmdc_mga08hnt47_pfam.gff", + "md5_checksum": "f389dc8a93de9f21322db385b2788f5f", + "id": "nmdc:f389dc8a93de9f21322db385b2788f5f", + "file_size_bytes": 27173740 + }, + { + "name": "Gp0127651_TigrFam GFF file", + "description": "TigrFam GFF file for Gp0127651", + "url": "https://data.microbiomedata.org/data/nmdc:mga08hnt47/annotation/nmdc_mga08hnt47_tigrfam.gff", + "md5_checksum": "8e6659ce96dfa72ceefda39c74fb1dce", + "id": "nmdc:8e6659ce96dfa72ceefda39c74fb1dce", + "file_size_bytes": 2943355 + }, + { + "name": "Gp0127651_SMART GFF file", + "description": "SMART GFF file for Gp0127651", + "url": "https://data.microbiomedata.org/data/nmdc:mga08hnt47/annotation/nmdc_mga08hnt47_smart.gff", + "md5_checksum": "89bc9cf9183fed6700cde44fad41b830", + "id": "nmdc:89bc9cf9183fed6700cde44fad41b830", + "file_size_bytes": 7927726 + }, + { + "name": "Gp0127651_SuperFam GFF file", + "description": "SuperFam GFF file for Gp0127651", + "url": "https://data.microbiomedata.org/data/nmdc:mga08hnt47/annotation/nmdc_mga08hnt47_supfam.gff", + "md5_checksum": "84aae368e77c1d07c6b6e8deecbc3f3b", + "id": "nmdc:84aae368e77c1d07c6b6e8deecbc3f3b", + "file_size_bytes": 45499652 + }, + { + "name": "Gp0127651_Cath FunFam GFF file", + "description": "Cath FunFam GFF file for Gp0127651", + "url": "https://data.microbiomedata.org/data/nmdc:mga08hnt47/annotation/nmdc_mga08hnt47_cath_funfam.gff", + "md5_checksum": "ee5612e5ee82ec2d57029d1bc4e1756f", + "id": "nmdc:ee5612e5ee82ec2d57029d1bc4e1756f", + "file_size_bytes": 34280847 + }, + { + "name": "Gp0127651_KO_EC GFF file", + "description": "KO_EC GFF file for Gp0127651", + "url": "https://data.microbiomedata.org/data/nmdc:mga08hnt47/annotation/nmdc_mga08hnt47_ko_ec.gff", + "md5_checksum": "68c06be8d27d1697b4a6955537b318c8", + "id": "nmdc:68c06be8d27d1697b4a6955537b318c8", + "file_size_bytes": 21943549 + }, + { + "name": "gold:Gp0452679_metabat2 bin checkm quality assessment result", + "description": "metabat2 bin checkm quality assessment result for gold:Gp0452679", + "data_object_type": "CheckM Statistics", + "url": "https://data.microbiomedata.org/data/nmdc:mga0fsjy84/MAGs/nmdc_mga0fsjy84_checkm_qa.out", + "md5_checksum": "d41d8cd98f00b204e9800998ecf8427e", + "id": "nmdc:d41d8cd98f00b204e9800998ecf8427e", + "file_size_bytes": 0 + }, + { + "name": "Gp0127651_tooShort (< 3kb) filtered contigs fasta file by metaBat2", + "description": "tooShort (< 3kb) filtered contigs fasta file by metaBat2 for Gp0127651", + "url": "https://data.microbiomedata.org/data/nmdc:mga08hnt47/MAGs/nmdc_mga08hnt47_bins.tooShort.fa", + "md5_checksum": "6f012bfca6cb653f92eaf927003de0fa", + "id": "nmdc:6f012bfca6cb653f92eaf927003de0fa", + "file_size_bytes": 77381118 + }, + { + "name": "Gp0127651_unbinned fasta file from metabat2", + "description": "unbinned fasta file from metabat2 for Gp0127651", + "url": "https://data.microbiomedata.org/data/nmdc:mga08hnt47/MAGs/nmdc_mga08hnt47_bins.unbinned.fa", + "md5_checksum": "298e0a0c98ebe4fb673da7de9fcb03a2", + "id": "nmdc:298e0a0c98ebe4fb673da7de9fcb03a2", + "file_size_bytes": 17278743 + }, + { + "name": "Gp0127651_metabat2 bin checkm quality assessment result", + "description": "metabat2 bin checkm quality assessment result for Gp0127651", + "data_object_type": "CheckM Statistics", + "url": "https://data.microbiomedata.org/data/nmdc:mga08hnt47/MAGs/nmdc_mga08hnt47_checkm_qa.out", + "md5_checksum": "66fd77d80cc9257da98c5bce4cb30626", + "id": "nmdc:66fd77d80cc9257da98c5bce4cb30626", + "file_size_bytes": 760 + }, + { + "name": "Gp0127651_high-quality and medium-quality bins", + "description": "high-quality and medium-quality bins for Gp0127651", + "data_object_type": "Metagenome Bins", + "url": "https://data.microbiomedata.org/data/nmdc:mga08hnt47/MAGs/nmdc_mga08hnt47_hqmq_bin.zip", + "md5_checksum": "06caec963e007225d1d9411078829100", + "id": "nmdc:06caec963e007225d1d9411078829100", + "file_size_bytes": 182 }, + { + "name": "Gp0127651_metabat2 bins", + "description": "metabat2 bins for Gp0127651", + "url": "https://data.microbiomedata.org/data/nmdc:mga08hnt47/MAGs/nmdc_mga08hnt47_metabat_bin.zip", + "md5_checksum": "eb5216cc4e09d88c4c59a76c4808a693", + "id": "nmdc:eb5216cc4e09d88c4c59a76c4808a693", + "file_size_bytes": 397044 + } + ], + "dissolving_activity_set": [], + "functional_annotation_set": [], + "genome_feature_set": [], + "mags_activity_set": [ { "_id": { - "$oid": "649b0052ec087f6bbab34726" + "$oid": "649b0052ec087f6bbab34721" }, "has_input": [ - "nmdc:8106808f8e245ef9a46a4e31561eba7f", - "nmdc:941f749a92155321c5ce7e5aa32d3b55", - "nmdc:8e8be343bbb1ba11f3e15867b419d05d" + "nmdc:8483663a943ff4c0fc0249353676bfc1", + "nmdc:838162ead3f121f5bc02bc1234a32a55", + "nmdc:f0c60a537e6867bf62fde15577669453" ], - "too_short_contig_num": 163283, + "too_short_contig_num": 168908, "part_of": [ - "nmdc:mga00hh562" + "nmdc:mga08hnt47" ], - "binned_contig_num": 83, + "binned_contig_num": 216, "git_url": "https://github.com/microbiomedata/metaG/releases/tag/0.1", "has_output": [ "nmdc:d41d8cd98f00b204e9800998ecf8427e", - "nmdc:313c88df1890a33d388bdb23c7ad37c3", - "nmdc:ae567f55fe899da83831fda23dcd7a20", - "nmdc:5a8dbda6aec0825b4159d5b53481db90", - "nmdc:060a7f90c5c5123cac41ed946a5482af", - "nmdc:e9f5d03e8264308ed77da0b63eb738fe" + "nmdc:6f012bfca6cb653f92eaf927003de0fa", + "nmdc:298e0a0c98ebe4fb673da7de9fcb03a2", + "nmdc:66fd77d80cc9257da98c5bce4cb30626", + "nmdc:06caec963e007225d1d9411078829100", + "nmdc:eb5216cc4e09d88c4c59a76c4808a693" ], - "was_informed_by": "gold:Gp0127656", - "input_contig_num": 169495, - "id": "nmdc:8e2d8da1d05b292a52a33732a6bc4391", + "was_informed_by": "gold:Gp0127651", + "input_contig_num": 180437, + "id": "nmdc:158a1d28cb7b14fdffb0f092a644b0f6", "execution_resource": "NERSC-Cori", - "name": "MAGs Analysis Activity for nmdc:mga00hh562", + "name": "MAGs Analysis Activity for nmdc:mga08hnt47", "mags_list": [ { - "number_of_contig": 83, - "completeness": 14.35, + "number_of_contig": 216, + "completeness": 36.79, "bin_name": "bins.1", - "gene_count": 388, + "gene_count": 1612, "bin_quality": "LQ", "gtdbtk_species": "", "gtdbtk_order": "", "num_16s": 0, "gtdbtk_family": "", "gtdbtk_domain": "", - "contamination": 0.0, + "contamination": 0.97, "gtdbtk_class": "", "gtdbtk_phylum": "", "num_5s": 0, "num_23s": 0, "gtdbtk_genus": "", - "num_t_rna": 5 + "num_t_rna": 28 } ], - "unbinned_contig_num": 6129, - "started_at_time": "2021-10-11T02:23:35Z", + "unbinned_contig_num": 11313, + "started_at_time": "2021-10-11T02:27:15Z", "type": "nmdc:MAGsAnalysisActivity", - "ended_at_time": "2021-10-11T03:58:56+00:00", - "output_data_objects": [ - { - "name": "gold:Gp0452679_metabat2 bin checkm quality assessment result", - "description": "metabat2 bin checkm quality assessment result for gold:Gp0452679", - "data_object_type": "CheckM Statistics", - "url": "https://data.microbiomedata.org/data/nmdc:mga0fsjy84/MAGs/nmdc_mga0fsjy84_checkm_qa.out", - "md5_checksum": "d41d8cd98f00b204e9800998ecf8427e", - "id": "nmdc:d41d8cd98f00b204e9800998ecf8427e", - "file_size_bytes": 0 - }, - { - "name": "Gp0127656_tooShort (< 3kb) filtered contigs fasta file by metaBat2", - "description": "tooShort (< 3kb) filtered contigs fasta file by metaBat2 for Gp0127656", - "url": "https://data.microbiomedata.org/data/nmdc:mga00hh562/MAGs/nmdc_mga00hh562_bins.tooShort.fa", - "md5_checksum": "313c88df1890a33d388bdb23c7ad37c3", - "id": "nmdc:313c88df1890a33d388bdb23c7ad37c3", - "file_size_bytes": 69332992 - }, - { - "name": "Gp0127656_unbinned fasta file from metabat2", - "description": "unbinned fasta file from metabat2 for Gp0127656", - "url": "https://data.microbiomedata.org/data/nmdc:mga00hh562/MAGs/nmdc_mga00hh562_bins.unbinned.fa", - "md5_checksum": "ae567f55fe899da83831fda23dcd7a20", - "id": "nmdc:ae567f55fe899da83831fda23dcd7a20", - "file_size_bytes": 9275333 - }, - { - "name": "Gp0127656_metabat2 bin checkm quality assessment result", - "description": "metabat2 bin checkm quality assessment result for Gp0127656", - "data_object_type": "CheckM Statistics", - "url": "https://data.microbiomedata.org/data/nmdc:mga00hh562/MAGs/nmdc_mga00hh562_checkm_qa.out", - "md5_checksum": "5a8dbda6aec0825b4159d5b53481db90", - "id": "nmdc:5a8dbda6aec0825b4159d5b53481db90", - "file_size_bytes": 775 - }, - { - "name": "Gp0127656_high-quality and medium-quality bins", - "description": "high-quality and medium-quality bins for Gp0127656", - "data_object_type": "Metagenome Bins", - "url": "https://data.microbiomedata.org/data/nmdc:mga00hh562/MAGs/nmdc_mga00hh562_hqmq_bin.zip", - "md5_checksum": "060a7f90c5c5123cac41ed946a5482af", - "id": "nmdc:060a7f90c5c5123cac41ed946a5482af", - "file_size_bytes": 182 - }, - { - "name": "Gp0127656_metabat2 bins", - "description": "metabat2 bins for Gp0127656", - "url": "https://data.microbiomedata.org/data/nmdc:mga00hh562/MAGs/nmdc_mga00hh562_metabat_bin.zip", - "md5_checksum": "e9f5d03e8264308ed77da0b63eb738fe", - "id": "nmdc:e9f5d03e8264308ed77da0b63eb738fe", - "file_size_bytes": 101752 - } - ] + "ended_at_time": "2021-10-11T03:57:48+00:00" } - ] - }, - { - "_id": { - "$oid": "649b009773e8249959349b62" - }, - "id": "nmdc:omprc-11-nry91b19", - "name": "Riverbed sediment microbial communities from areas with no vegetation in Columbia River, Washington, USA - GW-RW N3_20_30", - "description": "Riverbed sediment samples were collected from an area with no vegetation in a hyporheic zone (subsurface groundwater - surface water mixing zone)", - "has_input": [ - "nmdc:bsm-11-jqzk1523" - ], - "has_output": [ - "jgi:574fde907ded5e3df1ee1426" - ], - "part_of": [ - "nmdc:sty-11-aygzgv51" - ], - "add_date": "2016-01-11", - "mod_date": "2021-06-15", - "ncbi_project_name": "Riverbed sediment microbial communities from areas with no vegetation in Columbia River, Washington, USA - GW-RW N3_20_30", - "omics_type": { - "has_raw_value": "Metagenome" - }, - "principal_investigator": { - "has_raw_value": "James Stegen" - }, - "processing_institution": "JGI", - "type": "nmdc:OmicsProcessing", - "gold_sequencing_project_identifiers": [ - "gold:Gp0127651" - ], - "downstream_workflow_activity_records": [ - { - "_id": { - "$oid": "649b009d6bdd4fd20273c877" - }, - "has_input": [ - "nmdc:8254ce50b88be8c384fd37fe21e0d0c4" - ], - "part_of": [ - "nmdc:mga08hnt47" - ], - "git_url": "https://github.com/microbiomedata/metaG/releases/tag/0.1", - "has_output": [ - "nmdc:2791a196017767af3b5b21a3029799c0", - "nmdc:92cb49efbff5d5977e00dbad1c4d0d9f" - ], - "was_informed_by": "gold:Gp0127651", - "input_read_count": 23728904, - "output_read_bases": 3352071049, - "id": "nmdc:158a1d28cb7b14fdffb0f092a644b0f6", - "execution_resource": "NERSC-Cori", - "input_read_bases": 3583064504, - "name": "Read QC Activity for nmdc:mga08hnt47", - "output_read_count": 22416634, - "started_at_time": "2021-10-11T02:27:15Z", - "type": "nmdc:ReadQCAnalysisActivity", - "ended_at_time": "2021-10-11T03:57:48+00:00", - "output_data_objects": [ - { - "name": "Gp0127651_Filtered Reads", - "description": "Filtered Reads for Gp0127651", - "data_object_type": "Filtered Sequencing Reads", - "url": "https://data.microbiomedata.org/data/nmdc:mga08hnt47/qa/nmdc_mga08hnt47_filtered.fastq.gz", - "md5_checksum": "2791a196017767af3b5b21a3029799c0", - "id": "nmdc:2791a196017767af3b5b21a3029799c0", - "file_size_bytes": 1856919615 - }, - { - "name": "Gp0127651_Filtered Stats", - "description": "Filtered Stats for Gp0127651", - "data_object_type": "QC Statistics", - "url": "https://data.microbiomedata.org/data/nmdc:mga08hnt47/qa/nmdc_mga08hnt47_filterStats.txt", - "md5_checksum": "92cb49efbff5d5977e00dbad1c4d0d9f", - "id": "nmdc:92cb49efbff5d5977e00dbad1c4d0d9f", - "file_size_bytes": 283 - } - ] - }, - { - "_id": { - "$oid": "649b009bff710ae353f8cf42" - }, - "has_input": [ - "nmdc:2791a196017767af3b5b21a3029799c0" - ], - "git_url": "https://github.com/microbiomedata/metaG/releases/tag/0.1", - "has_output": [ - "nmdc:53ee263960c39126e039656a121deb96", - "nmdc:2781b9269b8e24f49a1a301d44d0e3d5", - "nmdc:0ed808b8ce29d39c3b555e7d5bf4c274", - "nmdc:a7d8f038b87bd28843e30c5dd115704b", - "nmdc:b4cbc81c986c67c1037c8b7280924683", - "nmdc:e0c61a191258597984a05d86eaf4d71f", - "nmdc:e1cbcfa86444a4ff4e992bcb6653d18f", - "nmdc:d2e10038a40e81e81ba94f75ed1ec52c", - "nmdc:ddba84cd45462d3a55df4ac62bb4eeb8" - ], - "was_informed_by": "gold:Gp0127651", - "id": "nmdc:158a1d28cb7b14fdffb0f092a644b0f6", - "execution_resource": "NERSC-Cori", - "name": "ReadBased Analysis Activity for nmdc:mga08hnt47", - "started_at_time": "2021-10-11T02:27:15Z", - "type": "nmdc:ReadbasedAnalysis", - "ended_at_time": "2021-10-11T03:57:48+00:00", - "output_data_objects": [ - { - "name": "Gp0127651_Gottcha2 TSV report", - "description": "Gottcha2 TSV report for Gp0127651", - "url": "https://data.microbiomedata.org/data/nmdc:mga08hnt47/ReadbasedAnalysis/nmdc_mga08hnt47_gottcha2_report.tsv", - "md5_checksum": "53ee263960c39126e039656a121deb96", - "id": "nmdc:53ee263960c39126e039656a121deb96", - "file_size_bytes": 1199 - }, - { - "name": "Gp0127651_Gottcha2 full TSV report", - "description": "Gottcha2 full TSV report for Gp0127651", - "url": "https://data.microbiomedata.org/data/nmdc:mga08hnt47/ReadbasedAnalysis/nmdc_mga08hnt47_gottcha2_report_full.tsv", - "md5_checksum": "2781b9269b8e24f49a1a301d44d0e3d5", - "id": "nmdc:2781b9269b8e24f49a1a301d44d0e3d5", - "file_size_bytes": 703299 - }, - { - "name": "Gp0127651_Gottcha2 Krona HTML report", - "description": "Gottcha2 Krona HTML report for Gp0127651", - "data_object_type": "GOTTCHA2 Krona Plot", - "url": "https://data.microbiomedata.org/data/nmdc:mga08hnt47/ReadbasedAnalysis/nmdc_mga08hnt47_gottcha2_krona.html", - "md5_checksum": "0ed808b8ce29d39c3b555e7d5bf4c274", - "id": "nmdc:0ed808b8ce29d39c3b555e7d5bf4c274", - "file_size_bytes": 229311 - }, - { - "name": "Gp0127651_Centrifuge classification TSV report", - "description": "Centrifuge classification TSV report for Gp0127651", - "data_object_type": "Centrifuge Taxonomic Classification", - "url": "https://data.microbiomedata.org/data/nmdc:mga08hnt47/ReadbasedAnalysis/nmdc_mga08hnt47_centrifuge_classification.tsv", - "md5_checksum": "a7d8f038b87bd28843e30c5dd115704b", - "id": "nmdc:a7d8f038b87bd28843e30c5dd115704b", - "file_size_bytes": 1642196063 - }, - { - "name": "Gp0127651_Centrifuge TSV report", - "description": "Centrifuge TSV report for Gp0127651", - "data_object_type": "Centrifuge Classification Report", - "url": "https://data.microbiomedata.org/data/nmdc:mga08hnt47/ReadbasedAnalysis/nmdc_mga08hnt47_centrifuge_report.tsv", - "md5_checksum": "b4cbc81c986c67c1037c8b7280924683", - "id": "nmdc:b4cbc81c986c67c1037c8b7280924683", - "file_size_bytes": 254418 - }, - { - "name": "Gp0127651_Centrifuge Krona HTML report", - "description": "Centrifuge Krona HTML report for Gp0127651", - "data_object_type": "Centrifuge Krona Plot", - "url": "https://data.microbiomedata.org/data/nmdc:mga08hnt47/ReadbasedAnalysis/nmdc_mga08hnt47_centrifuge_krona.html", - "md5_checksum": "e0c61a191258597984a05d86eaf4d71f", - "id": "nmdc:e0c61a191258597984a05d86eaf4d71f", - "file_size_bytes": 2333132 - }, - { - "name": "Gp0127651_Kraken classification TSV report", - "description": "Kraken classification TSV report for Gp0127651", - "data_object_type": "Kraken2 Taxonomic Classification", - "url": "https://data.microbiomedata.org/data/nmdc:mga08hnt47/ReadbasedAnalysis/nmdc_mga08hnt47_kraken2_classification.tsv", - "md5_checksum": "e1cbcfa86444a4ff4e992bcb6653d18f", - "id": "nmdc:e1cbcfa86444a4ff4e992bcb6653d18f", - "file_size_bytes": 1309125719 - }, - { - "name": "Gp0127651_Kraken2 TSV report", - "description": "Kraken2 TSV report for Gp0127651", - "data_object_type": "Kraken2 Classification Report", - "url": "https://data.microbiomedata.org/data/nmdc:mga08hnt47/ReadbasedAnalysis/nmdc_mga08hnt47_kraken2_report.tsv", - "md5_checksum": "d2e10038a40e81e81ba94f75ed1ec52c", - "id": "nmdc:d2e10038a40e81e81ba94f75ed1ec52c", - "file_size_bytes": 639737 - }, - { - "name": "Gp0127651_Kraken2 Krona HTML report", - "description": "Kraken2 Krona HTML report for Gp0127651", - "data_object_type": "Kraken2 Krona Plot", - "url": "https://data.microbiomedata.org/data/nmdc:mga08hnt47/ReadbasedAnalysis/nmdc_mga08hnt47_kraken2_krona.html", - "md5_checksum": "ddba84cd45462d3a55df4ac62bb4eeb8", - "id": "nmdc:ddba84cd45462d3a55df4ac62bb4eeb8", - "file_size_bytes": 3988966 - } - ] - }, + ], + "material_sample_set": [], + "material_sampling_activity_set": [], + "metabolomics_analysis_activity_set": [], + "metagenome_annotation_activity_set": [ { "_id": { - "$oid": "61e719ba833bcf838a7012b5" + "$oid": "649b005bbf2caae0415ef9bf" }, "has_input": [ - "nmdc:2791a196017767af3b5b21a3029799c0" + "nmdc:8483663a943ff4c0fc0249353676bfc1" ], "part_of": [ "nmdc:mga08hnt47" ], "git_url": "https://github.com/microbiomedata/metaG/releases/tag/0.1", "has_output": [ - "nmdc:53ee263960c39126e039656a121deb96", - "nmdc:2781b9269b8e24f49a1a301d44d0e3d5", - "nmdc:0ed808b8ce29d39c3b555e7d5bf4c274", - "nmdc:a7d8f038b87bd28843e30c5dd115704b", - "nmdc:b4cbc81c986c67c1037c8b7280924683", - "nmdc:e0c61a191258597984a05d86eaf4d71f", - "nmdc:e1cbcfa86444a4ff4e992bcb6653d18f", - "nmdc:d2e10038a40e81e81ba94f75ed1ec52c", - "nmdc:ddba84cd45462d3a55df4ac62bb4eeb8" + "nmdc:d8dc4f31293c549b12bbcab915d708cc", + "nmdc:415256907dcafaa68778a2ba358d9ac5", + "nmdc:f0c60a537e6867bf62fde15577669453", + "nmdc:e0f16b60c50581799b7ecb254e61e537", + "nmdc:6eb21304f0762bd8c11b98826d310321", + "nmdc:4ea7982c99cbb6d8ccc9fd949bee09ec", + "nmdc:f389dc8a93de9f21322db385b2788f5f", + "nmdc:8e6659ce96dfa72ceefda39c74fb1dce", + "nmdc:89bc9cf9183fed6700cde44fad41b830", + "nmdc:84aae368e77c1d07c6b6e8deecbc3f3b", + "nmdc:ee5612e5ee82ec2d57029d1bc4e1756f", + "nmdc:68c06be8d27d1697b4a6955537b318c8" ], "was_informed_by": "gold:Gp0127651", "id": "nmdc:158a1d28cb7b14fdffb0f092a644b0f6", "execution_resource": "NERSC-Cori", - "name": "ReadBased Analysis Activity for nmdc:mga08hnt47", + "name": "Annotation Activity for nmdc:mga08hnt47", "started_at_time": "2021-10-11T02:27:15Z", - "type": "nmdc:ReadbasedAnalysis", - "ended_at_time": "2021-10-11T03:57:48+00:00", - "output_data_objects": [ - { - "name": "Gp0127651_Gottcha2 TSV report", - "description": "Gottcha2 TSV report for Gp0127651", - "url": "https://data.microbiomedata.org/data/nmdc:mga08hnt47/ReadbasedAnalysis/nmdc_mga08hnt47_gottcha2_report.tsv", - "md5_checksum": "53ee263960c39126e039656a121deb96", - "id": "nmdc:53ee263960c39126e039656a121deb96", - "file_size_bytes": 1199 - }, - { - "name": "Gp0127651_Gottcha2 full TSV report", - "description": "Gottcha2 full TSV report for Gp0127651", - "url": "https://data.microbiomedata.org/data/nmdc:mga08hnt47/ReadbasedAnalysis/nmdc_mga08hnt47_gottcha2_report_full.tsv", - "md5_checksum": "2781b9269b8e24f49a1a301d44d0e3d5", - "id": "nmdc:2781b9269b8e24f49a1a301d44d0e3d5", - "file_size_bytes": 703299 - }, - { - "name": "Gp0127651_Gottcha2 Krona HTML report", - "description": "Gottcha2 Krona HTML report for Gp0127651", - "data_object_type": "GOTTCHA2 Krona Plot", - "url": "https://data.microbiomedata.org/data/nmdc:mga08hnt47/ReadbasedAnalysis/nmdc_mga08hnt47_gottcha2_krona.html", - "md5_checksum": "0ed808b8ce29d39c3b555e7d5bf4c274", - "id": "nmdc:0ed808b8ce29d39c3b555e7d5bf4c274", - "file_size_bytes": 229311 - }, - { - "name": "Gp0127651_Centrifuge classification TSV report", - "description": "Centrifuge classification TSV report for Gp0127651", - "data_object_type": "Centrifuge Taxonomic Classification", - "url": "https://data.microbiomedata.org/data/nmdc:mga08hnt47/ReadbasedAnalysis/nmdc_mga08hnt47_centrifuge_classification.tsv", - "md5_checksum": "a7d8f038b87bd28843e30c5dd115704b", - "id": "nmdc:a7d8f038b87bd28843e30c5dd115704b", - "file_size_bytes": 1642196063 - }, - { - "name": "Gp0127651_Centrifuge TSV report", - "description": "Centrifuge TSV report for Gp0127651", - "data_object_type": "Centrifuge Classification Report", - "url": "https://data.microbiomedata.org/data/nmdc:mga08hnt47/ReadbasedAnalysis/nmdc_mga08hnt47_centrifuge_report.tsv", - "md5_checksum": "b4cbc81c986c67c1037c8b7280924683", - "id": "nmdc:b4cbc81c986c67c1037c8b7280924683", - "file_size_bytes": 254418 - }, - { - "name": "Gp0127651_Centrifuge Krona HTML report", - "description": "Centrifuge Krona HTML report for Gp0127651", - "data_object_type": "Centrifuge Krona Plot", - "url": "https://data.microbiomedata.org/data/nmdc:mga08hnt47/ReadbasedAnalysis/nmdc_mga08hnt47_centrifuge_krona.html", - "md5_checksum": "e0c61a191258597984a05d86eaf4d71f", - "id": "nmdc:e0c61a191258597984a05d86eaf4d71f", - "file_size_bytes": 2333132 - }, - { - "name": "Gp0127651_Kraken classification TSV report", - "description": "Kraken classification TSV report for Gp0127651", - "data_object_type": "Kraken2 Taxonomic Classification", - "url": "https://data.microbiomedata.org/data/nmdc:mga08hnt47/ReadbasedAnalysis/nmdc_mga08hnt47_kraken2_classification.tsv", - "md5_checksum": "e1cbcfa86444a4ff4e992bcb6653d18f", - "id": "nmdc:e1cbcfa86444a4ff4e992bcb6653d18f", - "file_size_bytes": 1309125719 - }, - { - "name": "Gp0127651_Kraken2 TSV report", - "description": "Kraken2 TSV report for Gp0127651", - "data_object_type": "Kraken2 Classification Report", - "url": "https://data.microbiomedata.org/data/nmdc:mga08hnt47/ReadbasedAnalysis/nmdc_mga08hnt47_kraken2_report.tsv", - "md5_checksum": "d2e10038a40e81e81ba94f75ed1ec52c", - "id": "nmdc:d2e10038a40e81e81ba94f75ed1ec52c", - "file_size_bytes": 639737 - }, - { - "name": "Gp0127651_Kraken2 Krona HTML report", - "description": "Kraken2 Krona HTML report for Gp0127651", - "data_object_type": "Kraken2 Krona Plot", - "url": "https://data.microbiomedata.org/data/nmdc:mga08hnt47/ReadbasedAnalysis/nmdc_mga08hnt47_kraken2_krona.html", - "md5_checksum": "ddba84cd45462d3a55df4ac62bb4eeb8", - "id": "nmdc:ddba84cd45462d3a55df4ac62bb4eeb8", - "file_size_bytes": 3988966 - } - ] - }, + "type": "nmdc:MetagenomeAnnotationActivity", + "ended_at_time": "2021-10-11T03:57:48+00:00" + } + ], + "metagenome_assembly_set": [ { "_id": { "$oid": "649b005f2ca5ee4adb139fab" @@ -31832,596 +33283,623 @@ "scaf_l50": 493, "scaf_l90": 292, "scaf_n50": 51225, - "scaf_n90": 148971, - "output_data_objects": [ - { - "name": "Gp0127651_Assembled contigs fasta", - "description": "Assembled contigs fasta for Gp0127651", - "data_object_type": "Assembly Contigs", - "url": "https://data.microbiomedata.org/data/nmdc:mga08hnt47/assembly/nmdc_mga08hnt47_contigs.fna", - "md5_checksum": "8483663a943ff4c0fc0249353676bfc1", - "id": "nmdc:8483663a943ff4c0fc0249353676bfc1", - "file_size_bytes": 95957530 - }, - { - "name": "Gp0127651_Assembled scaffold fasta", - "description": "Assembled scaffold fasta for Gp0127651", - "data_object_type": "Assembly Scaffolds", - "url": "https://data.microbiomedata.org/data/nmdc:mga08hnt47/assembly/nmdc_mga08hnt47_scaffolds.fna", - "md5_checksum": "ccca920c56ad3d050e2d8801bcbe4855", - "id": "nmdc:ccca920c56ad3d050e2d8801bcbe4855", - "file_size_bytes": 95414704 - }, - { - "name": "Gp0127651_Metagenome Contig Coverage Stats", - "description": "Metagenome Contig Coverage Stats for Gp0127651", - "url": "https://data.microbiomedata.org/data/nmdc:mga08hnt47/assembly/nmdc_mga08hnt47_covstats.txt", - "md5_checksum": "f21e374c1c31c02bd0e41228cc7895c3", - "id": "nmdc:f21e374c1c31c02bd0e41228cc7895c3", - "file_size_bytes": 14289388 - }, - { - "name": "Gp0127651_Assembled AGP file", - "description": "Assembled AGP file for Gp0127651", - "data_object_type": "Assembly AGP", - "url": "https://data.microbiomedata.org/data/nmdc:mga08hnt47/assembly/nmdc_mga08hnt47_assembly.agp", - "md5_checksum": "f43ae7935184d10ba65961171efcac34", - "id": "nmdc:f43ae7935184d10ba65961171efcac34", - "file_size_bytes": 13343603 - }, - { - "name": "Gp0127651_Metagenome Alignment BAM file", - "description": "Metagenome Alignment BAM file for Gp0127651", - "data_object_type": "Assembly Coverage BAM", - "url": "https://data.microbiomedata.org/data/nmdc:mga08hnt47/assembly/nmdc_mga08hnt47_pairedMapped_sorted.bam", - "md5_checksum": "838162ead3f121f5bc02bc1234a32a55", - "id": "nmdc:838162ead3f121f5bc02bc1234a32a55", - "file_size_bytes": 2037589818 - } + "scaf_n90": 148971 + } + ], + "metagenome_sequencing_activity_set": [], + "metaproteomics_analysis_activity_set": [], + "metatranscriptome_activity_set": [], + "nom_analysis_activity_set": [], + "omics_processing_set": [ + { + "_id": { + "$oid": "649b009773e8249959349b62" + }, + "id": "nmdc:omprc-11-nry91b19", + "name": "Riverbed sediment microbial communities from areas with no vegetation in Columbia River, Washington, USA - GW-RW N3_20_30", + "description": "Riverbed sediment samples were collected from an area with no vegetation in a hyporheic zone (subsurface groundwater - surface water mixing zone)", + "has_input": [ + "nmdc:bsm-11-jqzk1523" + ], + "has_output": [ + "jgi:574fde907ded5e3df1ee1426" + ], + "part_of": [ + "nmdc:sty-11-aygzgv51" + ], + "add_date": "2016-01-11", + "mod_date": "2021-06-15", + "ncbi_project_name": "Riverbed sediment microbial communities from areas with no vegetation in Columbia River, Washington, USA - GW-RW N3_20_30", + "omics_type": { + "has_raw_value": "Metagenome" + }, + "principal_investigator": { + "has_raw_value": "James Stegen" + }, + "processing_institution": "JGI", + "type": "nmdc:OmicsProcessing", + "gold_sequencing_project_identifiers": [ + "gold:Gp0127651" ] - }, + } + ], + "reaction_activity_set": [], + "read_qc_analysis_activity_set": [ { "_id": { - "$oid": "649b005bbf2caae0415ef9bf" + "$oid": "649b009d6bdd4fd20273c877" }, "has_input": [ - "nmdc:8483663a943ff4c0fc0249353676bfc1" + "nmdc:8254ce50b88be8c384fd37fe21e0d0c4" ], "part_of": [ "nmdc:mga08hnt47" ], "git_url": "https://github.com/microbiomedata/metaG/releases/tag/0.1", "has_output": [ - "nmdc:d8dc4f31293c549b12bbcab915d708cc", - "nmdc:415256907dcafaa68778a2ba358d9ac5", - "nmdc:f0c60a537e6867bf62fde15577669453", - "nmdc:e0f16b60c50581799b7ecb254e61e537", - "nmdc:6eb21304f0762bd8c11b98826d310321", - "nmdc:4ea7982c99cbb6d8ccc9fd949bee09ec", - "nmdc:f389dc8a93de9f21322db385b2788f5f", - "nmdc:8e6659ce96dfa72ceefda39c74fb1dce", - "nmdc:89bc9cf9183fed6700cde44fad41b830", - "nmdc:84aae368e77c1d07c6b6e8deecbc3f3b", - "nmdc:ee5612e5ee82ec2d57029d1bc4e1756f", - "nmdc:68c06be8d27d1697b4a6955537b318c8" + "nmdc:2791a196017767af3b5b21a3029799c0", + "nmdc:92cb49efbff5d5977e00dbad1c4d0d9f" ], "was_informed_by": "gold:Gp0127651", + "input_read_count": 23728904, + "output_read_bases": 3352071049, "id": "nmdc:158a1d28cb7b14fdffb0f092a644b0f6", "execution_resource": "NERSC-Cori", - "name": "Annotation Activity for nmdc:mga08hnt47", + "input_read_bases": 3583064504, + "name": "Read QC Activity for nmdc:mga08hnt47", + "output_read_count": 22416634, "started_at_time": "2021-10-11T02:27:15Z", - "type": "nmdc:MetagenomeAnnotationActivity", - "ended_at_time": "2021-10-11T03:57:48+00:00", - "output_data_objects": [ - { - "name": "Gp0127651_Protein FAA", - "description": "Protein FAA for Gp0127651", - "data_object_type": "Annotation Amino Acid FASTA", - "url": "https://data.microbiomedata.org/data/nmdc:mga08hnt47/annotation/nmdc_mga08hnt47_proteins.faa", - "md5_checksum": "d8dc4f31293c549b12bbcab915d708cc", - "id": "nmdc:d8dc4f31293c549b12bbcab915d708cc", - "file_size_bytes": 54370216 - }, - { - "name": "Gp0127651_Structural annotation GFF file", - "description": "Structural annotation GFF file for Gp0127651", - "data_object_type": "Structural Annotation GFF", - "url": "https://data.microbiomedata.org/data/nmdc:mga08hnt47/annotation/nmdc_mga08hnt47_structural_annotation.gff", - "md5_checksum": "415256907dcafaa68778a2ba358d9ac5", - "id": "nmdc:415256907dcafaa68778a2ba358d9ac5", - "file_size_bytes": 2517 - }, - { - "name": "Gp0127651_Functional annotation GFF file", - "description": "Functional annotation GFF file for Gp0127651", - "data_object_type": "Functional Annotation GFF", - "url": "https://data.microbiomedata.org/data/nmdc:mga08hnt47/annotation/nmdc_mga08hnt47_functional_annotation.gff", - "md5_checksum": "f0c60a537e6867bf62fde15577669453", - "id": "nmdc:f0c60a537e6867bf62fde15577669453", - "file_size_bytes": 61364019 - }, - { - "name": "Gp0127651_KO TSV file", - "description": "KO TSV file for Gp0127651", - "data_object_type": "Annotation KEGG Orthology", - "url": "https://data.microbiomedata.org/data/nmdc:mga08hnt47/annotation/nmdc_mga08hnt47_ko.tsv", - "md5_checksum": "e0f16b60c50581799b7ecb254e61e537", - "id": "nmdc:e0f16b60c50581799b7ecb254e61e537", - "file_size_bytes": 6908291 - }, - { - "name": "Gp0127651_EC TSV file", - "description": "EC TSV file for Gp0127651", - "data_object_type": "Annotation Enzyme Commission", - "url": "https://data.microbiomedata.org/data/nmdc:mga08hnt47/annotation/nmdc_mga08hnt47_ec.tsv", - "md5_checksum": "6eb21304f0762bd8c11b98826d310321", - "id": "nmdc:6eb21304f0762bd8c11b98826d310321", - "file_size_bytes": 4650091 - }, - { - "name": "Gp0127651_COG GFF file", - "description": "COG GFF file for Gp0127651", - "url": "https://data.microbiomedata.org/data/nmdc:mga08hnt47/annotation/nmdc_mga08hnt47_cog.gff", - "md5_checksum": "4ea7982c99cbb6d8ccc9fd949bee09ec", - "id": "nmdc:4ea7982c99cbb6d8ccc9fd949bee09ec", - "file_size_bytes": 36137856 - }, - { - "name": "Gp0127651_PFAM GFF file", - "description": "PFAM GFF file for Gp0127651", - "url": "https://data.microbiomedata.org/data/nmdc:mga08hnt47/annotation/nmdc_mga08hnt47_pfam.gff", - "md5_checksum": "f389dc8a93de9f21322db385b2788f5f", - "id": "nmdc:f389dc8a93de9f21322db385b2788f5f", - "file_size_bytes": 27173740 - }, - { - "name": "Gp0127651_TigrFam GFF file", - "description": "TigrFam GFF file for Gp0127651", - "url": "https://data.microbiomedata.org/data/nmdc:mga08hnt47/annotation/nmdc_mga08hnt47_tigrfam.gff", - "md5_checksum": "8e6659ce96dfa72ceefda39c74fb1dce", - "id": "nmdc:8e6659ce96dfa72ceefda39c74fb1dce", - "file_size_bytes": 2943355 - }, - { - "name": "Gp0127651_SMART GFF file", - "description": "SMART GFF file for Gp0127651", - "url": "https://data.microbiomedata.org/data/nmdc:mga08hnt47/annotation/nmdc_mga08hnt47_smart.gff", - "md5_checksum": "89bc9cf9183fed6700cde44fad41b830", - "id": "nmdc:89bc9cf9183fed6700cde44fad41b830", - "file_size_bytes": 7927726 - }, - { - "name": "Gp0127651_SuperFam GFF file", - "description": "SuperFam GFF file for Gp0127651", - "url": "https://data.microbiomedata.org/data/nmdc:mga08hnt47/annotation/nmdc_mga08hnt47_supfam.gff", - "md5_checksum": "84aae368e77c1d07c6b6e8deecbc3f3b", - "id": "nmdc:84aae368e77c1d07c6b6e8deecbc3f3b", - "file_size_bytes": 45499652 - }, - { - "name": "Gp0127651_Cath FunFam GFF file", - "description": "Cath FunFam GFF file for Gp0127651", - "url": "https://data.microbiomedata.org/data/nmdc:mga08hnt47/annotation/nmdc_mga08hnt47_cath_funfam.gff", - "md5_checksum": "ee5612e5ee82ec2d57029d1bc4e1756f", - "id": "nmdc:ee5612e5ee82ec2d57029d1bc4e1756f", - "file_size_bytes": 34280847 - }, - { - "name": "Gp0127651_KO_EC GFF file", - "description": "KO_EC GFF file for Gp0127651", - "url": "https://data.microbiomedata.org/data/nmdc:mga08hnt47/annotation/nmdc_mga08hnt47_ko_ec.gff", - "md5_checksum": "68c06be8d27d1697b4a6955537b318c8", - "id": "nmdc:68c06be8d27d1697b4a6955537b318c8", - "file_size_bytes": 21943549 - } - ] - }, + "type": "nmdc:ReadQCAnalysisActivity", + "ended_at_time": "2021-10-11T03:57:48+00:00" + } + ], + "read_based_taxonomy_analysis_activity_set": [ { "_id": { - "$oid": "649b0052ec087f6bbab34721" + "$oid": "649b009bff710ae353f8cf42" }, "has_input": [ - "nmdc:8483663a943ff4c0fc0249353676bfc1", - "nmdc:838162ead3f121f5bc02bc1234a32a55", - "nmdc:f0c60a537e6867bf62fde15577669453" - ], - "too_short_contig_num": 168908, - "part_of": [ - "nmdc:mga08hnt47" + "nmdc:2791a196017767af3b5b21a3029799c0" ], - "binned_contig_num": 216, "git_url": "https://github.com/microbiomedata/metaG/releases/tag/0.1", "has_output": [ - "nmdc:d41d8cd98f00b204e9800998ecf8427e", - "nmdc:6f012bfca6cb653f92eaf927003de0fa", - "nmdc:298e0a0c98ebe4fb673da7de9fcb03a2", - "nmdc:66fd77d80cc9257da98c5bce4cb30626", - "nmdc:06caec963e007225d1d9411078829100", - "nmdc:eb5216cc4e09d88c4c59a76c4808a693" + "nmdc:53ee263960c39126e039656a121deb96", + "nmdc:2781b9269b8e24f49a1a301d44d0e3d5", + "nmdc:0ed808b8ce29d39c3b555e7d5bf4c274", + "nmdc:a7d8f038b87bd28843e30c5dd115704b", + "nmdc:b4cbc81c986c67c1037c8b7280924683", + "nmdc:e0c61a191258597984a05d86eaf4d71f", + "nmdc:e1cbcfa86444a4ff4e992bcb6653d18f", + "nmdc:d2e10038a40e81e81ba94f75ed1ec52c", + "nmdc:ddba84cd45462d3a55df4ac62bb4eeb8" ], "was_informed_by": "gold:Gp0127651", - "input_contig_num": 180437, "id": "nmdc:158a1d28cb7b14fdffb0f092a644b0f6", "execution_resource": "NERSC-Cori", - "name": "MAGs Analysis Activity for nmdc:mga08hnt47", - "mags_list": [ - { - "number_of_contig": 216, - "completeness": 36.79, - "bin_name": "bins.1", - "gene_count": 1612, - "bin_quality": "LQ", - "gtdbtk_species": "", - "gtdbtk_order": "", - "num_16s": 0, - "gtdbtk_family": "", - "gtdbtk_domain": "", - "contamination": 0.97, - "gtdbtk_class": "", - "gtdbtk_phylum": "", - "num_5s": 0, - "num_23s": 0, - "gtdbtk_genus": "", - "num_t_rna": 28 - } - ], - "unbinned_contig_num": 11313, + "name": "ReadBased Analysis Activity for nmdc:mga08hnt47", "started_at_time": "2021-10-11T02:27:15Z", - "type": "nmdc:MAGsAnalysisActivity", - "ended_at_time": "2021-10-11T03:57:48+00:00", - "output_data_objects": [ - { - "name": "gold:Gp0452679_metabat2 bin checkm quality assessment result", - "description": "metabat2 bin checkm quality assessment result for gold:Gp0452679", - "data_object_type": "CheckM Statistics", - "url": "https://data.microbiomedata.org/data/nmdc:mga0fsjy84/MAGs/nmdc_mga0fsjy84_checkm_qa.out", - "md5_checksum": "d41d8cd98f00b204e9800998ecf8427e", - "id": "nmdc:d41d8cd98f00b204e9800998ecf8427e", - "file_size_bytes": 0 - }, - { - "name": "Gp0127651_tooShort (< 3kb) filtered contigs fasta file by metaBat2", - "description": "tooShort (< 3kb) filtered contigs fasta file by metaBat2 for Gp0127651", - "url": "https://data.microbiomedata.org/data/nmdc:mga08hnt47/MAGs/nmdc_mga08hnt47_bins.tooShort.fa", - "md5_checksum": "6f012bfca6cb653f92eaf927003de0fa", - "id": "nmdc:6f012bfca6cb653f92eaf927003de0fa", - "file_size_bytes": 77381118 - }, - { - "name": "Gp0127651_unbinned fasta file from metabat2", - "description": "unbinned fasta file from metabat2 for Gp0127651", - "url": "https://data.microbiomedata.org/data/nmdc:mga08hnt47/MAGs/nmdc_mga08hnt47_bins.unbinned.fa", - "md5_checksum": "298e0a0c98ebe4fb673da7de9fcb03a2", - "id": "nmdc:298e0a0c98ebe4fb673da7de9fcb03a2", - "file_size_bytes": 17278743 - }, - { - "name": "Gp0127651_metabat2 bin checkm quality assessment result", - "description": "metabat2 bin checkm quality assessment result for Gp0127651", - "data_object_type": "CheckM Statistics", - "url": "https://data.microbiomedata.org/data/nmdc:mga08hnt47/MAGs/nmdc_mga08hnt47_checkm_qa.out", - "md5_checksum": "66fd77d80cc9257da98c5bce4cb30626", - "id": "nmdc:66fd77d80cc9257da98c5bce4cb30626", - "file_size_bytes": 760 - }, - { - "name": "Gp0127651_high-quality and medium-quality bins", - "description": "high-quality and medium-quality bins for Gp0127651", - "data_object_type": "Metagenome Bins", - "url": "https://data.microbiomedata.org/data/nmdc:mga08hnt47/MAGs/nmdc_mga08hnt47_hqmq_bin.zip", - "md5_checksum": "06caec963e007225d1d9411078829100", - "id": "nmdc:06caec963e007225d1d9411078829100", - "file_size_bytes": 182 - }, - { - "name": "Gp0127651_metabat2 bins", - "description": "metabat2 bins for Gp0127651", - "url": "https://data.microbiomedata.org/data/nmdc:mga08hnt47/MAGs/nmdc_mga08hnt47_metabat_bin.zip", - "md5_checksum": "eb5216cc4e09d88c4c59a76c4808a693", - "id": "nmdc:eb5216cc4e09d88c4c59a76c4808a693", - "file_size_bytes": 397044 - } - ] + "type": "nmdc:ReadbasedAnalysis", + "ended_at_time": "2021-10-11T03:57:48+00:00" } - ] - }, - { - "_id": { - "$oid": "649b009773e8249959349b63" - }, - "id": "nmdc:omprc-11-0n8y1d07", - "name": "Riverbed sediment microbial communities from areas with vegetation nearby in Columbia River, Washington, USA - GW-RW S3_20_30", - "description": "Riverbed sediment samples were collected from an area with a lot of surrounding vegetation in a hyporheic zone (subsurface groundwater - surface water mixing zone)", - "has_input": [ - "nmdc:bsm-11-a7fxtx60" - ], - "has_output": [ - "jgi:574fde947ded5e3df1ee1429" - ], - "part_of": [ - "nmdc:sty-11-aygzgv51" - ], - "add_date": "2016-01-11", - "mod_date": "2021-06-15", - "ncbi_project_name": "Riverbed sediment microbial communities from areas with vegetation nearby in Columbia River, Washington, USA - GW-RW S3_20_30", - "omics_type": { - "has_raw_value": "Metagenome" - }, - "principal_investigator": { - "has_raw_value": "James Stegen" - }, - "processing_institution": "JGI", - "type": "nmdc:OmicsProcessing", - "gold_sequencing_project_identifiers": [ - "gold:Gp0127655" - ], - "downstream_workflow_activity_records": [ + ], + "study_set": [], + "field_research_site_set": [], + "collecting_biosamples_from_site_set": [], + "date_created": null, + "etl_software_version": null, + "pooling_set": [], + "read_based_analysis_activity_set": [ { "_id": { - "$oid": "649b009d6bdd4fd20273c880" + "$oid": "61e719ba833bcf838a7012b5" }, "has_input": [ - "nmdc:898017d076d5d2daaf902e9141f0600a" + "nmdc:2791a196017767af3b5b21a3029799c0" ], "part_of": [ - "nmdc:mga0317978" + "nmdc:mga08hnt47" ], "git_url": "https://github.com/microbiomedata/metaG/releases/tag/0.1", "has_output": [ - "nmdc:04b9014981f7035c39bd7f870613ed93", - "nmdc:b66266969ab3df4c1cb2b16c1fa7d098" + "nmdc:53ee263960c39126e039656a121deb96", + "nmdc:2781b9269b8e24f49a1a301d44d0e3d5", + "nmdc:0ed808b8ce29d39c3b555e7d5bf4c274", + "nmdc:a7d8f038b87bd28843e30c5dd115704b", + "nmdc:b4cbc81c986c67c1037c8b7280924683", + "nmdc:e0c61a191258597984a05d86eaf4d71f", + "nmdc:e1cbcfa86444a4ff4e992bcb6653d18f", + "nmdc:d2e10038a40e81e81ba94f75ed1ec52c", + "nmdc:ddba84cd45462d3a55df4ac62bb4eeb8" ], - "was_informed_by": "gold:Gp0127655", - "input_read_count": 23985924, - "output_read_bases": 3400452550, - "id": "nmdc:65af38817454a315aeb8c67ab27e1469", + "was_informed_by": "gold:Gp0127651", + "id": "nmdc:158a1d28cb7b14fdffb0f092a644b0f6", "execution_resource": "NERSC-Cori", - "input_read_bases": 3621874524, - "name": "Read QC Activity for nmdc:mga0317978", - "output_read_count": 22751496, - "started_at_time": "2021-10-11T02:23:42Z", - "type": "nmdc:ReadQCAnalysisActivity", - "ended_at_time": "2021-10-11T03:21:25+00:00", - "output_data_objects": [ - { - "name": "Gp0127655_Filtered Reads", - "description": "Filtered Reads for Gp0127655", - "data_object_type": "Filtered Sequencing Reads", - "url": "https://data.microbiomedata.org/data/nmdc:mga0317978/qa/nmdc_mga0317978_filtered.fastq.gz", - "md5_checksum": "04b9014981f7035c39bd7f870613ed93", - "id": "nmdc:04b9014981f7035c39bd7f870613ed93", - "file_size_bytes": 1880069213 - }, - { - "name": "Gp0127655_Filtered Stats", - "description": "Filtered Stats for Gp0127655", - "data_object_type": "QC Statistics", - "url": "https://data.microbiomedata.org/data/nmdc:mga0317978/qa/nmdc_mga0317978_filterStats.txt", - "md5_checksum": "b66266969ab3df4c1cb2b16c1fa7d098", - "id": "nmdc:b66266969ab3df4c1cb2b16c1fa7d098", - "file_size_bytes": 286 - } - ] + "name": "ReadBased Analysis Activity for nmdc:mga08hnt47", + "started_at_time": "2021-10-11T02:27:15Z", + "type": "nmdc:ReadbasedAnalysis", + "ended_at_time": "2021-10-11T03:57:48+00:00" + } + ] + }, + { + "functional_annotation_agg": [], + "library_preparation_set": [], + "processed_sample_set": [], + "extraction_set": [], + "activity_set": [], + "biosample_set": [], + "data_object_set": [ + { + "name": "Gp0127655_Filtered Reads", + "description": "Filtered Reads for Gp0127655", + "data_object_type": "Filtered Sequencing Reads", + "url": "https://data.microbiomedata.org/data/nmdc:mga0317978/qa/nmdc_mga0317978_filtered.fastq.gz", + "md5_checksum": "04b9014981f7035c39bd7f870613ed93", + "id": "nmdc:04b9014981f7035c39bd7f870613ed93", + "file_size_bytes": 1880069213 + }, + { + "name": "Gp0127655_Filtered Stats", + "description": "Filtered Stats for Gp0127655", + "data_object_type": "QC Statistics", + "url": "https://data.microbiomedata.org/data/nmdc:mga0317978/qa/nmdc_mga0317978_filterStats.txt", + "md5_checksum": "b66266969ab3df4c1cb2b16c1fa7d098", + "id": "nmdc:b66266969ab3df4c1cb2b16c1fa7d098", + "file_size_bytes": 286 + }, + { + "name": "Gp0127655_Gottcha2 TSV report", + "description": "Gottcha2 TSV report for Gp0127655", + "url": "https://data.microbiomedata.org/data/nmdc:mga0317978/ReadbasedAnalysis/nmdc_mga0317978_gottcha2_report.tsv", + "md5_checksum": "46371c7bc8259e459f975f915aaac26f", + "id": "nmdc:46371c7bc8259e459f975f915aaac26f", + "file_size_bytes": 2178 + }, + { + "name": "Gp0127655_Gottcha2 full TSV report", + "description": "Gottcha2 full TSV report for Gp0127655", + "url": "https://data.microbiomedata.org/data/nmdc:mga0317978/ReadbasedAnalysis/nmdc_mga0317978_gottcha2_report_full.tsv", + "md5_checksum": "5dd9bc51105920f3f629e8106235af3b", + "id": "nmdc:5dd9bc51105920f3f629e8106235af3b", + "file_size_bytes": 697690 + }, + { + "name": "Gp0127655_Gottcha2 Krona HTML report", + "description": "Gottcha2 Krona HTML report for Gp0127655", + "data_object_type": "GOTTCHA2 Krona Plot", + "url": "https://data.microbiomedata.org/data/nmdc:mga0317978/ReadbasedAnalysis/nmdc_mga0317978_gottcha2_krona.html", + "md5_checksum": "1879e0e9af6d568ac9c7ffdb47fc7f12", + "id": "nmdc:1879e0e9af6d568ac9c7ffdb47fc7f12", + "file_size_bytes": 231103 + }, + { + "name": "Gp0127655_Centrifuge classification TSV report", + "description": "Centrifuge classification TSV report for Gp0127655", + "data_object_type": "Centrifuge Taxonomic Classification", + "url": "https://data.microbiomedata.org/data/nmdc:mga0317978/ReadbasedAnalysis/nmdc_mga0317978_centrifuge_classification.tsv", + "md5_checksum": "e3f410adc2347396abfdec2a848000d9", + "id": "nmdc:e3f410adc2347396abfdec2a848000d9", + "file_size_bytes": 1676897166 + }, + { + "name": "Gp0127655_Centrifuge TSV report", + "description": "Centrifuge TSV report for Gp0127655", + "data_object_type": "Centrifuge Classification Report", + "url": "https://data.microbiomedata.org/data/nmdc:mga0317978/ReadbasedAnalysis/nmdc_mga0317978_centrifuge_report.tsv", + "md5_checksum": "ed6c4f17d6ae759487164ca8ed5edf45", + "id": "nmdc:ed6c4f17d6ae759487164ca8ed5edf45", + "file_size_bytes": 253692 + }, + { + "name": "Gp0127655_Centrifuge Krona HTML report", + "description": "Centrifuge Krona HTML report for Gp0127655", + "data_object_type": "Centrifuge Krona Plot", + "url": "https://data.microbiomedata.org/data/nmdc:mga0317978/ReadbasedAnalysis/nmdc_mga0317978_centrifuge_krona.html", + "md5_checksum": "6d54f73f251de1bd5c4ca8665f098ac0", + "id": "nmdc:6d54f73f251de1bd5c4ca8665f098ac0", + "file_size_bytes": 2329422 + }, + { + "name": "Gp0127655_Kraken classification TSV report", + "description": "Kraken classification TSV report for Gp0127655", + "data_object_type": "Kraken2 Taxonomic Classification", + "url": "https://data.microbiomedata.org/data/nmdc:mga0317978/ReadbasedAnalysis/nmdc_mga0317978_kraken2_classification.tsv", + "md5_checksum": "1d4f5a605d4549801fda16da567efe56", + "id": "nmdc:1d4f5a605d4549801fda16da567efe56", + "file_size_bytes": 1336793184 + }, + { + "name": "Gp0127655_Kraken2 TSV report", + "description": "Kraken2 TSV report for Gp0127655", + "data_object_type": "Kraken2 Classification Report", + "url": "https://data.microbiomedata.org/data/nmdc:mga0317978/ReadbasedAnalysis/nmdc_mga0317978_kraken2_report.tsv", + "md5_checksum": "8bb5c66575c7c953719ae9947600ad49", + "id": "nmdc:8bb5c66575c7c953719ae9947600ad49", + "file_size_bytes": 632192 + }, + { + "name": "Gp0127655_Kraken2 Krona HTML report", + "description": "Kraken2 Krona HTML report for Gp0127655", + "data_object_type": "Kraken2 Krona Plot", + "url": "https://data.microbiomedata.org/data/nmdc:mga0317978/ReadbasedAnalysis/nmdc_mga0317978_kraken2_krona.html", + "md5_checksum": "157f7672690ba8207808cc4386ff10a4", + "id": "nmdc:157f7672690ba8207808cc4386ff10a4", + "file_size_bytes": 3946317 + }, + { + "name": "Gp0127655_Gottcha2 TSV report", + "description": "Gottcha2 TSV report for Gp0127655", + "url": "https://data.microbiomedata.org/data/nmdc:mga0317978/ReadbasedAnalysis/nmdc_mga0317978_gottcha2_report.tsv", + "md5_checksum": "46371c7bc8259e459f975f915aaac26f", + "id": "nmdc:46371c7bc8259e459f975f915aaac26f", + "file_size_bytes": 2178 + }, + { + "name": "Gp0127655_Gottcha2 full TSV report", + "description": "Gottcha2 full TSV report for Gp0127655", + "url": "https://data.microbiomedata.org/data/nmdc:mga0317978/ReadbasedAnalysis/nmdc_mga0317978_gottcha2_report_full.tsv", + "md5_checksum": "5dd9bc51105920f3f629e8106235af3b", + "id": "nmdc:5dd9bc51105920f3f629e8106235af3b", + "file_size_bytes": 697690 + }, + { + "name": "Gp0127655_Gottcha2 Krona HTML report", + "description": "Gottcha2 Krona HTML report for Gp0127655", + "data_object_type": "GOTTCHA2 Krona Plot", + "url": "https://data.microbiomedata.org/data/nmdc:mga0317978/ReadbasedAnalysis/nmdc_mga0317978_gottcha2_krona.html", + "md5_checksum": "1879e0e9af6d568ac9c7ffdb47fc7f12", + "id": "nmdc:1879e0e9af6d568ac9c7ffdb47fc7f12", + "file_size_bytes": 231103 + }, + { + "name": "Gp0127655_Centrifuge classification TSV report", + "description": "Centrifuge classification TSV report for Gp0127655", + "data_object_type": "Centrifuge Taxonomic Classification", + "url": "https://data.microbiomedata.org/data/nmdc:mga0317978/ReadbasedAnalysis/nmdc_mga0317978_centrifuge_classification.tsv", + "md5_checksum": "e3f410adc2347396abfdec2a848000d9", + "id": "nmdc:e3f410adc2347396abfdec2a848000d9", + "file_size_bytes": 1676897166 + }, + { + "name": "Gp0127655_Centrifuge TSV report", + "description": "Centrifuge TSV report for Gp0127655", + "data_object_type": "Centrifuge Classification Report", + "url": "https://data.microbiomedata.org/data/nmdc:mga0317978/ReadbasedAnalysis/nmdc_mga0317978_centrifuge_report.tsv", + "md5_checksum": "ed6c4f17d6ae759487164ca8ed5edf45", + "id": "nmdc:ed6c4f17d6ae759487164ca8ed5edf45", + "file_size_bytes": 253692 + }, + { + "name": "Gp0127655_Centrifuge Krona HTML report", + "description": "Centrifuge Krona HTML report for Gp0127655", + "data_object_type": "Centrifuge Krona Plot", + "url": "https://data.microbiomedata.org/data/nmdc:mga0317978/ReadbasedAnalysis/nmdc_mga0317978_centrifuge_krona.html", + "md5_checksum": "6d54f73f251de1bd5c4ca8665f098ac0", + "id": "nmdc:6d54f73f251de1bd5c4ca8665f098ac0", + "file_size_bytes": 2329422 + }, + { + "name": "Gp0127655_Kraken classification TSV report", + "description": "Kraken classification TSV report for Gp0127655", + "data_object_type": "Kraken2 Taxonomic Classification", + "url": "https://data.microbiomedata.org/data/nmdc:mga0317978/ReadbasedAnalysis/nmdc_mga0317978_kraken2_classification.tsv", + "md5_checksum": "1d4f5a605d4549801fda16da567efe56", + "id": "nmdc:1d4f5a605d4549801fda16da567efe56", + "file_size_bytes": 1336793184 + }, + { + "name": "Gp0127655_Kraken2 TSV report", + "description": "Kraken2 TSV report for Gp0127655", + "data_object_type": "Kraken2 Classification Report", + "url": "https://data.microbiomedata.org/data/nmdc:mga0317978/ReadbasedAnalysis/nmdc_mga0317978_kraken2_report.tsv", + "md5_checksum": "8bb5c66575c7c953719ae9947600ad49", + "id": "nmdc:8bb5c66575c7c953719ae9947600ad49", + "file_size_bytes": 632192 + }, + { + "name": "Gp0127655_Kraken2 Krona HTML report", + "description": "Kraken2 Krona HTML report for Gp0127655", + "data_object_type": "Kraken2 Krona Plot", + "url": "https://data.microbiomedata.org/data/nmdc:mga0317978/ReadbasedAnalysis/nmdc_mga0317978_kraken2_krona.html", + "md5_checksum": "157f7672690ba8207808cc4386ff10a4", + "id": "nmdc:157f7672690ba8207808cc4386ff10a4", + "file_size_bytes": 3946317 + }, + { + "name": "Gp0127655_Assembled contigs fasta", + "description": "Assembled contigs fasta for Gp0127655", + "data_object_type": "Assembly Contigs", + "url": "https://data.microbiomedata.org/data/nmdc:mga0317978/assembly/nmdc_mga0317978_contigs.fna", + "md5_checksum": "98bc1e8aa3703e255a930f6c6f923453", + "id": "nmdc:98bc1e8aa3703e255a930f6c6f923453", + "file_size_bytes": 93445462 + }, + { + "name": "Gp0127655_Assembled scaffold fasta", + "description": "Assembled scaffold fasta for Gp0127655", + "data_object_type": "Assembly Scaffolds", + "url": "https://data.microbiomedata.org/data/nmdc:mga0317978/assembly/nmdc_mga0317978_scaffolds.fna", + "md5_checksum": "769bd168524b84f2d10dfdb2a42a909d", + "id": "nmdc:769bd168524b84f2d10dfdb2a42a909d", + "file_size_bytes": 92895420 + }, + { + "name": "Gp0127655_Metagenome Contig Coverage Stats", + "description": "Metagenome Contig Coverage Stats for Gp0127655", + "url": "https://data.microbiomedata.org/data/nmdc:mga0317978/assembly/nmdc_mga0317978_covstats.txt", + "md5_checksum": "5bd5f8108ae1d767ea5a79ebde3d83de", + "id": "nmdc:5bd5f8108ae1d767ea5a79ebde3d83de", + "file_size_bytes": 14474338 + }, + { + "name": "Gp0127655_Assembled AGP file", + "description": "Assembled AGP file for Gp0127655", + "data_object_type": "Assembly AGP", + "url": "https://data.microbiomedata.org/data/nmdc:mga0317978/assembly/nmdc_mga0317978_assembly.agp", + "md5_checksum": "933de420870147e58137b328e0d54d87", + "id": "nmdc:933de420870147e58137b328e0d54d87", + "file_size_bytes": 13523380 + }, + { + "name": "Gp0127655_Metagenome Alignment BAM file", + "description": "Metagenome Alignment BAM file for Gp0127655", + "data_object_type": "Assembly Coverage BAM", + "url": "https://data.microbiomedata.org/data/nmdc:mga0317978/assembly/nmdc_mga0317978_pairedMapped_sorted.bam", + "md5_checksum": "2b699163734ee73cbccc94e4767d36c0", + "id": "nmdc:2b699163734ee73cbccc94e4767d36c0", + "file_size_bytes": 2057808015 + }, + { + "name": "Gp0127655_Protein FAA", + "description": "Protein FAA for Gp0127655", + "data_object_type": "Annotation Amino Acid FASTA", + "url": "https://data.microbiomedata.org/data/nmdc:mga0317978/annotation/nmdc_mga0317978_proteins.faa", + "md5_checksum": "9b57eb78fd2e8f0af8b55cf5fb3a2bab", + "id": "nmdc:9b57eb78fd2e8f0af8b55cf5fb3a2bab", + "file_size_bytes": 53898203 + }, + { + "name": "Gp0127655_Structural annotation GFF file", + "description": "Structural annotation GFF file for Gp0127655", + "data_object_type": "Structural Annotation GFF", + "url": "https://data.microbiomedata.org/data/nmdc:mga0317978/annotation/nmdc_mga0317978_structural_annotation.gff", + "md5_checksum": "6b11bf4eaf9723559b6015296b802252", + "id": "nmdc:6b11bf4eaf9723559b6015296b802252", + "file_size_bytes": 2515 + }, + { + "name": "Gp0127655_Functional annotation GFF file", + "description": "Functional annotation GFF file for Gp0127655", + "data_object_type": "Functional Annotation GFF", + "url": "https://data.microbiomedata.org/data/nmdc:mga0317978/annotation/nmdc_mga0317978_functional_annotation.gff", + "md5_checksum": "0940fbdf18becd76e7dd3abcfaba12b5", + "id": "nmdc:0940fbdf18becd76e7dd3abcfaba12b5", + "file_size_bytes": 61535970 + }, + { + "name": "Gp0127655_KO TSV file", + "description": "KO TSV file for Gp0127655", + "data_object_type": "Annotation KEGG Orthology", + "url": "https://data.microbiomedata.org/data/nmdc:mga0317978/annotation/nmdc_mga0317978_ko.tsv", + "md5_checksum": "a1cd7e1382fd1818c42860a0555f1f57", + "id": "nmdc:a1cd7e1382fd1818c42860a0555f1f57", + "file_size_bytes": 6994761 + }, + { + "name": "Gp0127655_EC TSV file", + "description": "EC TSV file for Gp0127655", + "data_object_type": "Annotation Enzyme Commission", + "url": "https://data.microbiomedata.org/data/nmdc:mga0317978/annotation/nmdc_mga0317978_ec.tsv", + "md5_checksum": "3a27c2da0a3d05e4c44547afb2875195", + "id": "nmdc:3a27c2da0a3d05e4c44547afb2875195", + "file_size_bytes": 4598688 + }, + { + "name": "Gp0127655_COG GFF file", + "description": "COG GFF file for Gp0127655", + "url": "https://data.microbiomedata.org/data/nmdc:mga0317978/annotation/nmdc_mga0317978_cog.gff", + "md5_checksum": "1c8529ca35ee0b275b8ca3d2b5c565ec", + "id": "nmdc:1c8529ca35ee0b275b8ca3d2b5c565ec", + "file_size_bytes": 36290392 + }, + { + "name": "Gp0127655_PFAM GFF file", + "description": "PFAM GFF file for Gp0127655", + "url": "https://data.microbiomedata.org/data/nmdc:mga0317978/annotation/nmdc_mga0317978_pfam.gff", + "md5_checksum": "8bf1c44c4a9fc7f55dcf58be1273b46f", + "id": "nmdc:8bf1c44c4a9fc7f55dcf58be1273b46f", + "file_size_bytes": 27016921 + }, + { + "name": "Gp0127655_TigrFam GFF file", + "description": "TigrFam GFF file for Gp0127655", + "url": "https://data.microbiomedata.org/data/nmdc:mga0317978/annotation/nmdc_mga0317978_tigrfam.gff", + "md5_checksum": "acb8325b4800ff62e3fda52b21b92ecc", + "id": "nmdc:acb8325b4800ff62e3fda52b21b92ecc", + "file_size_bytes": 2768301 + }, + { + "name": "Gp0127655_SMART GFF file", + "description": "SMART GFF file for Gp0127655", + "url": "https://data.microbiomedata.org/data/nmdc:mga0317978/annotation/nmdc_mga0317978_smart.gff", + "md5_checksum": "a044873e470ce9f2be06ae99cd1cc242", + "id": "nmdc:a044873e470ce9f2be06ae99cd1cc242", + "file_size_bytes": 7806208 + }, + { + "name": "Gp0127655_SuperFam GFF file", + "description": "SuperFam GFF file for Gp0127655", + "url": "https://data.microbiomedata.org/data/nmdc:mga0317978/annotation/nmdc_mga0317978_supfam.gff", + "md5_checksum": "40f0627934454a354886609d7068a12c", + "id": "nmdc:40f0627934454a354886609d7068a12c", + "file_size_bytes": 45276498 + }, + { + "name": "Gp0127655_Cath FunFam GFF file", + "description": "Cath FunFam GFF file for Gp0127655", + "url": "https://data.microbiomedata.org/data/nmdc:mga0317978/annotation/nmdc_mga0317978_cath_funfam.gff", + "md5_checksum": "60255b31e223a7b5bad8f186b6f65d7c", + "id": "nmdc:60255b31e223a7b5bad8f186b6f65d7c", + "file_size_bytes": 33794110 + }, + { + "name": "Gp0127655_KO_EC GFF file", + "description": "KO_EC GFF file for Gp0127655", + "url": "https://data.microbiomedata.org/data/nmdc:mga0317978/annotation/nmdc_mga0317978_ko_ec.gff", + "md5_checksum": "b8d559d4ea779c4076e3c9e1e92bddcf", + "id": "nmdc:b8d559d4ea779c4076e3c9e1e92bddcf", + "file_size_bytes": 22249696 + }, + { + "name": "gold:Gp0452679_metabat2 bin checkm quality assessment result", + "description": "metabat2 bin checkm quality assessment result for gold:Gp0452679", + "data_object_type": "CheckM Statistics", + "url": "https://data.microbiomedata.org/data/nmdc:mga0fsjy84/MAGs/nmdc_mga0fsjy84_checkm_qa.out", + "md5_checksum": "d41d8cd98f00b204e9800998ecf8427e", + "id": "nmdc:d41d8cd98f00b204e9800998ecf8427e", + "file_size_bytes": 0 + }, + { + "name": "Gp0127655_tooShort (< 3kb) filtered contigs fasta file by metaBat2", + "description": "tooShort (< 3kb) filtered contigs fasta file by metaBat2 for Gp0127655", + "url": "https://data.microbiomedata.org/data/nmdc:mga0317978/MAGs/nmdc_mga0317978_bins.tooShort.fa", + "md5_checksum": "58f2cc63798346be853bccacdd7ca30d", + "id": "nmdc:58f2cc63798346be853bccacdd7ca30d", + "file_size_bytes": 77075570 + }, + { + "name": "Gp0127655_unbinned fasta file from metabat2", + "description": "unbinned fasta file from metabat2 for Gp0127655", + "url": "https://data.microbiomedata.org/data/nmdc:mga0317978/MAGs/nmdc_mga0317978_bins.unbinned.fa", + "md5_checksum": "8b2dbaba9c1219096831ad99d8b7c056", + "id": "nmdc:8b2dbaba9c1219096831ad99d8b7c056", + "file_size_bytes": 14551969 + }, + { + "name": "Gp0127655_metabat2 bin checkm quality assessment result", + "description": "metabat2 bin checkm quality assessment result for Gp0127655", + "data_object_type": "CheckM Statistics", + "url": "https://data.microbiomedata.org/data/nmdc:mga0317978/MAGs/nmdc_mga0317978_checkm_qa.out", + "md5_checksum": "c562d8d5ccc986d672b4e48e006fafab", + "id": "nmdc:c562d8d5ccc986d672b4e48e006fafab", + "file_size_bytes": 775 + }, + { + "name": "Gp0127655_high-quality and medium-quality bins", + "description": "high-quality and medium-quality bins for Gp0127655", + "data_object_type": "Metagenome Bins", + "url": "https://data.microbiomedata.org/data/nmdc:mga0317978/MAGs/nmdc_mga0317978_hqmq_bin.zip", + "md5_checksum": "2eaf0a7d519ac7c034d63797d735080c", + "id": "nmdc:2eaf0a7d519ac7c034d63797d735080c", + "file_size_bytes": 182 }, + { + "name": "Gp0127655_metabat2 bins", + "description": "metabat2 bins for Gp0127655", + "url": "https://data.microbiomedata.org/data/nmdc:mga0317978/MAGs/nmdc_mga0317978_metabat_bin.zip", + "md5_checksum": "668a0a6dbd840dd2178a00c2af4c2237", + "id": "nmdc:668a0a6dbd840dd2178a00c2af4c2237", + "file_size_bytes": 527634 + } + ], + "dissolving_activity_set": [], + "functional_annotation_set": [], + "genome_feature_set": [], + "mags_activity_set": [ { "_id": { - "$oid": "649b009bff710ae353f8cf47" + "$oid": "649b0052ec087f6bbab34725" }, "has_input": [ - "nmdc:04b9014981f7035c39bd7f870613ed93" + "nmdc:98bc1e8aa3703e255a930f6c6f923453", + "nmdc:2b699163734ee73cbccc94e4767d36c0", + "nmdc:0940fbdf18becd76e7dd3abcfaba12b5" ], + "too_short_contig_num": 173159, + "part_of": [ + "nmdc:mga0317978" + ], + "binned_contig_num": 412, "git_url": "https://github.com/microbiomedata/metaG/releases/tag/0.1", "has_output": [ - "nmdc:46371c7bc8259e459f975f915aaac26f", - "nmdc:5dd9bc51105920f3f629e8106235af3b", - "nmdc:1879e0e9af6d568ac9c7ffdb47fc7f12", - "nmdc:e3f410adc2347396abfdec2a848000d9", - "nmdc:ed6c4f17d6ae759487164ca8ed5edf45", - "nmdc:6d54f73f251de1bd5c4ca8665f098ac0", - "nmdc:1d4f5a605d4549801fda16da567efe56", - "nmdc:8bb5c66575c7c953719ae9947600ad49", - "nmdc:157f7672690ba8207808cc4386ff10a4" + "nmdc:d41d8cd98f00b204e9800998ecf8427e", + "nmdc:58f2cc63798346be853bccacdd7ca30d", + "nmdc:8b2dbaba9c1219096831ad99d8b7c056", + "nmdc:c562d8d5ccc986d672b4e48e006fafab", + "nmdc:2eaf0a7d519ac7c034d63797d735080c", + "nmdc:668a0a6dbd840dd2178a00c2af4c2237" ], "was_informed_by": "gold:Gp0127655", + "input_contig_num": 182939, "id": "nmdc:65af38817454a315aeb8c67ab27e1469", "execution_resource": "NERSC-Cori", - "name": "ReadBased Analysis Activity for nmdc:mga0317978", - "started_at_time": "2021-10-11T02:23:42Z", - "type": "nmdc:ReadbasedAnalysis", - "ended_at_time": "2021-10-11T03:21:25+00:00", - "output_data_objects": [ - { - "name": "Gp0127655_Gottcha2 TSV report", - "description": "Gottcha2 TSV report for Gp0127655", - "url": "https://data.microbiomedata.org/data/nmdc:mga0317978/ReadbasedAnalysis/nmdc_mga0317978_gottcha2_report.tsv", - "md5_checksum": "46371c7bc8259e459f975f915aaac26f", - "id": "nmdc:46371c7bc8259e459f975f915aaac26f", - "file_size_bytes": 2178 - }, - { - "name": "Gp0127655_Gottcha2 full TSV report", - "description": "Gottcha2 full TSV report for Gp0127655", - "url": "https://data.microbiomedata.org/data/nmdc:mga0317978/ReadbasedAnalysis/nmdc_mga0317978_gottcha2_report_full.tsv", - "md5_checksum": "5dd9bc51105920f3f629e8106235af3b", - "id": "nmdc:5dd9bc51105920f3f629e8106235af3b", - "file_size_bytes": 697690 - }, - { - "name": "Gp0127655_Gottcha2 Krona HTML report", - "description": "Gottcha2 Krona HTML report for Gp0127655", - "data_object_type": "GOTTCHA2 Krona Plot", - "url": "https://data.microbiomedata.org/data/nmdc:mga0317978/ReadbasedAnalysis/nmdc_mga0317978_gottcha2_krona.html", - "md5_checksum": "1879e0e9af6d568ac9c7ffdb47fc7f12", - "id": "nmdc:1879e0e9af6d568ac9c7ffdb47fc7f12", - "file_size_bytes": 231103 - }, - { - "name": "Gp0127655_Centrifuge classification TSV report", - "description": "Centrifuge classification TSV report for Gp0127655", - "data_object_type": "Centrifuge Taxonomic Classification", - "url": "https://data.microbiomedata.org/data/nmdc:mga0317978/ReadbasedAnalysis/nmdc_mga0317978_centrifuge_classification.tsv", - "md5_checksum": "e3f410adc2347396abfdec2a848000d9", - "id": "nmdc:e3f410adc2347396abfdec2a848000d9", - "file_size_bytes": 1676897166 - }, - { - "name": "Gp0127655_Centrifuge TSV report", - "description": "Centrifuge TSV report for Gp0127655", - "data_object_type": "Centrifuge Classification Report", - "url": "https://data.microbiomedata.org/data/nmdc:mga0317978/ReadbasedAnalysis/nmdc_mga0317978_centrifuge_report.tsv", - "md5_checksum": "ed6c4f17d6ae759487164ca8ed5edf45", - "id": "nmdc:ed6c4f17d6ae759487164ca8ed5edf45", - "file_size_bytes": 253692 - }, - { - "name": "Gp0127655_Centrifuge Krona HTML report", - "description": "Centrifuge Krona HTML report for Gp0127655", - "data_object_type": "Centrifuge Krona Plot", - "url": "https://data.microbiomedata.org/data/nmdc:mga0317978/ReadbasedAnalysis/nmdc_mga0317978_centrifuge_krona.html", - "md5_checksum": "6d54f73f251de1bd5c4ca8665f098ac0", - "id": "nmdc:6d54f73f251de1bd5c4ca8665f098ac0", - "file_size_bytes": 2329422 - }, - { - "name": "Gp0127655_Kraken classification TSV report", - "description": "Kraken classification TSV report for Gp0127655", - "data_object_type": "Kraken2 Taxonomic Classification", - "url": "https://data.microbiomedata.org/data/nmdc:mga0317978/ReadbasedAnalysis/nmdc_mga0317978_kraken2_classification.tsv", - "md5_checksum": "1d4f5a605d4549801fda16da567efe56", - "id": "nmdc:1d4f5a605d4549801fda16da567efe56", - "file_size_bytes": 1336793184 - }, - { - "name": "Gp0127655_Kraken2 TSV report", - "description": "Kraken2 TSV report for Gp0127655", - "data_object_type": "Kraken2 Classification Report", - "url": "https://data.microbiomedata.org/data/nmdc:mga0317978/ReadbasedAnalysis/nmdc_mga0317978_kraken2_report.tsv", - "md5_checksum": "8bb5c66575c7c953719ae9947600ad49", - "id": "nmdc:8bb5c66575c7c953719ae9947600ad49", - "file_size_bytes": 632192 - }, + "name": "MAGs Analysis Activity for nmdc:mga0317978", + "mags_list": [ { - "name": "Gp0127655_Kraken2 Krona HTML report", - "description": "Kraken2 Krona HTML report for Gp0127655", - "data_object_type": "Kraken2 Krona Plot", - "url": "https://data.microbiomedata.org/data/nmdc:mga0317978/ReadbasedAnalysis/nmdc_mga0317978_kraken2_krona.html", - "md5_checksum": "157f7672690ba8207808cc4386ff10a4", - "id": "nmdc:157f7672690ba8207808cc4386ff10a4", - "file_size_bytes": 3946317 + "number_of_contig": 412, + "completeness": 27.84, + "bin_name": "bins.1", + "gene_count": 2086, + "bin_quality": "LQ", + "gtdbtk_species": "", + "gtdbtk_order": "", + "num_16s": 0, + "gtdbtk_family": "", + "gtdbtk_domain": "", + "contamination": 0.0, + "gtdbtk_class": "", + "gtdbtk_phylum": "", + "num_5s": 0, + "num_23s": 0, + "gtdbtk_genus": "", + "num_t_rna": 22 } - ] - }, + ], + "unbinned_contig_num": 9368, + "started_at_time": "2021-10-11T02:23:42Z", + "type": "nmdc:MAGsAnalysisActivity", + "ended_at_time": "2021-10-11T03:21:25+00:00" + } + ], + "material_sample_set": [], + "material_sampling_activity_set": [], + "metabolomics_analysis_activity_set": [], + "metagenome_annotation_activity_set": [ { "_id": { - "$oid": "61e719df833bcf838a701627" + "$oid": "649b005bbf2caae0415ef9c4" }, "has_input": [ - "nmdc:04b9014981f7035c39bd7f870613ed93" + "nmdc:98bc1e8aa3703e255a930f6c6f923453" ], "part_of": [ "nmdc:mga0317978" ], "git_url": "https://github.com/microbiomedata/metaG/releases/tag/0.1", "has_output": [ - "nmdc:46371c7bc8259e459f975f915aaac26f", - "nmdc:5dd9bc51105920f3f629e8106235af3b", - "nmdc:1879e0e9af6d568ac9c7ffdb47fc7f12", - "nmdc:e3f410adc2347396abfdec2a848000d9", - "nmdc:ed6c4f17d6ae759487164ca8ed5edf45", - "nmdc:6d54f73f251de1bd5c4ca8665f098ac0", - "nmdc:1d4f5a605d4549801fda16da567efe56", - "nmdc:8bb5c66575c7c953719ae9947600ad49", - "nmdc:157f7672690ba8207808cc4386ff10a4" + "nmdc:9b57eb78fd2e8f0af8b55cf5fb3a2bab", + "nmdc:6b11bf4eaf9723559b6015296b802252", + "nmdc:0940fbdf18becd76e7dd3abcfaba12b5", + "nmdc:a1cd7e1382fd1818c42860a0555f1f57", + "nmdc:3a27c2da0a3d05e4c44547afb2875195", + "nmdc:1c8529ca35ee0b275b8ca3d2b5c565ec", + "nmdc:8bf1c44c4a9fc7f55dcf58be1273b46f", + "nmdc:acb8325b4800ff62e3fda52b21b92ecc", + "nmdc:a044873e470ce9f2be06ae99cd1cc242", + "nmdc:40f0627934454a354886609d7068a12c", + "nmdc:60255b31e223a7b5bad8f186b6f65d7c", + "nmdc:b8d559d4ea779c4076e3c9e1e92bddcf" ], "was_informed_by": "gold:Gp0127655", "id": "nmdc:65af38817454a315aeb8c67ab27e1469", "execution_resource": "NERSC-Cori", - "name": "ReadBased Analysis Activity for nmdc:mga0317978", + "name": "Annotation Activity for nmdc:mga0317978", "started_at_time": "2021-10-11T02:23:42Z", - "type": "nmdc:ReadbasedAnalysis", - "ended_at_time": "2021-10-11T03:21:25+00:00", - "output_data_objects": [ - { - "name": "Gp0127655_Gottcha2 TSV report", - "description": "Gottcha2 TSV report for Gp0127655", - "url": "https://data.microbiomedata.org/data/nmdc:mga0317978/ReadbasedAnalysis/nmdc_mga0317978_gottcha2_report.tsv", - "md5_checksum": "46371c7bc8259e459f975f915aaac26f", - "id": "nmdc:46371c7bc8259e459f975f915aaac26f", - "file_size_bytes": 2178 - }, - { - "name": "Gp0127655_Gottcha2 full TSV report", - "description": "Gottcha2 full TSV report for Gp0127655", - "url": "https://data.microbiomedata.org/data/nmdc:mga0317978/ReadbasedAnalysis/nmdc_mga0317978_gottcha2_report_full.tsv", - "md5_checksum": "5dd9bc51105920f3f629e8106235af3b", - "id": "nmdc:5dd9bc51105920f3f629e8106235af3b", - "file_size_bytes": 697690 - }, - { - "name": "Gp0127655_Gottcha2 Krona HTML report", - "description": "Gottcha2 Krona HTML report for Gp0127655", - "data_object_type": "GOTTCHA2 Krona Plot", - "url": "https://data.microbiomedata.org/data/nmdc:mga0317978/ReadbasedAnalysis/nmdc_mga0317978_gottcha2_krona.html", - "md5_checksum": "1879e0e9af6d568ac9c7ffdb47fc7f12", - "id": "nmdc:1879e0e9af6d568ac9c7ffdb47fc7f12", - "file_size_bytes": 231103 - }, - { - "name": "Gp0127655_Centrifuge classification TSV report", - "description": "Centrifuge classification TSV report for Gp0127655", - "data_object_type": "Centrifuge Taxonomic Classification", - "url": "https://data.microbiomedata.org/data/nmdc:mga0317978/ReadbasedAnalysis/nmdc_mga0317978_centrifuge_classification.tsv", - "md5_checksum": "e3f410adc2347396abfdec2a848000d9", - "id": "nmdc:e3f410adc2347396abfdec2a848000d9", - "file_size_bytes": 1676897166 - }, - { - "name": "Gp0127655_Centrifuge TSV report", - "description": "Centrifuge TSV report for Gp0127655", - "data_object_type": "Centrifuge Classification Report", - "url": "https://data.microbiomedata.org/data/nmdc:mga0317978/ReadbasedAnalysis/nmdc_mga0317978_centrifuge_report.tsv", - "md5_checksum": "ed6c4f17d6ae759487164ca8ed5edf45", - "id": "nmdc:ed6c4f17d6ae759487164ca8ed5edf45", - "file_size_bytes": 253692 - }, - { - "name": "Gp0127655_Centrifuge Krona HTML report", - "description": "Centrifuge Krona HTML report for Gp0127655", - "data_object_type": "Centrifuge Krona Plot", - "url": "https://data.microbiomedata.org/data/nmdc:mga0317978/ReadbasedAnalysis/nmdc_mga0317978_centrifuge_krona.html", - "md5_checksum": "6d54f73f251de1bd5c4ca8665f098ac0", - "id": "nmdc:6d54f73f251de1bd5c4ca8665f098ac0", - "file_size_bytes": 2329422 - }, - { - "name": "Gp0127655_Kraken classification TSV report", - "description": "Kraken classification TSV report for Gp0127655", - "data_object_type": "Kraken2 Taxonomic Classification", - "url": "https://data.microbiomedata.org/data/nmdc:mga0317978/ReadbasedAnalysis/nmdc_mga0317978_kraken2_classification.tsv", - "md5_checksum": "1d4f5a605d4549801fda16da567efe56", - "id": "nmdc:1d4f5a605d4549801fda16da567efe56", - "file_size_bytes": 1336793184 - }, - { - "name": "Gp0127655_Kraken2 TSV report", - "description": "Kraken2 TSV report for Gp0127655", - "data_object_type": "Kraken2 Classification Report", - "url": "https://data.microbiomedata.org/data/nmdc:mga0317978/ReadbasedAnalysis/nmdc_mga0317978_kraken2_report.tsv", - "md5_checksum": "8bb5c66575c7c953719ae9947600ad49", - "id": "nmdc:8bb5c66575c7c953719ae9947600ad49", - "file_size_bytes": 632192 - }, - { - "name": "Gp0127655_Kraken2 Krona HTML report", - "description": "Kraken2 Krona HTML report for Gp0127655", - "data_object_type": "Kraken2 Krona Plot", - "url": "https://data.microbiomedata.org/data/nmdc:mga0317978/ReadbasedAnalysis/nmdc_mga0317978_kraken2_krona.html", - "md5_checksum": "157f7672690ba8207808cc4386ff10a4", - "id": "nmdc:157f7672690ba8207808cc4386ff10a4", - "file_size_bytes": 3946317 - } - ] - }, + "type": "nmdc:MetagenomeAnnotationActivity", + "ended_at_time": "2021-10-11T03:21:25+00:00" + } + ], + "metagenome_assembly_set": [ { "_id": { "$oid": "649b005f2ca5ee4adb139fb3" @@ -32468,596 +33946,565 @@ "scaf_l50": 457, "scaf_l90": 289, "scaf_n50": 53484, - "scaf_n90": 154812, - "output_data_objects": [ - { - "name": "Gp0127655_Assembled contigs fasta", - "description": "Assembled contigs fasta for Gp0127655", - "data_object_type": "Assembly Contigs", - "url": "https://data.microbiomedata.org/data/nmdc:mga0317978/assembly/nmdc_mga0317978_contigs.fna", - "md5_checksum": "98bc1e8aa3703e255a930f6c6f923453", - "id": "nmdc:98bc1e8aa3703e255a930f6c6f923453", - "file_size_bytes": 93445462 - }, - { - "name": "Gp0127655_Assembled scaffold fasta", - "description": "Assembled scaffold fasta for Gp0127655", - "data_object_type": "Assembly Scaffolds", - "url": "https://data.microbiomedata.org/data/nmdc:mga0317978/assembly/nmdc_mga0317978_scaffolds.fna", - "md5_checksum": "769bd168524b84f2d10dfdb2a42a909d", - "id": "nmdc:769bd168524b84f2d10dfdb2a42a909d", - "file_size_bytes": 92895420 - }, - { - "name": "Gp0127655_Metagenome Contig Coverage Stats", - "description": "Metagenome Contig Coverage Stats for Gp0127655", - "url": "https://data.microbiomedata.org/data/nmdc:mga0317978/assembly/nmdc_mga0317978_covstats.txt", - "md5_checksum": "5bd5f8108ae1d767ea5a79ebde3d83de", - "id": "nmdc:5bd5f8108ae1d767ea5a79ebde3d83de", - "file_size_bytes": 14474338 - }, - { - "name": "Gp0127655_Assembled AGP file", - "description": "Assembled AGP file for Gp0127655", - "data_object_type": "Assembly AGP", - "url": "https://data.microbiomedata.org/data/nmdc:mga0317978/assembly/nmdc_mga0317978_assembly.agp", - "md5_checksum": "933de420870147e58137b328e0d54d87", - "id": "nmdc:933de420870147e58137b328e0d54d87", - "file_size_bytes": 13523380 - }, - { - "name": "Gp0127655_Metagenome Alignment BAM file", - "description": "Metagenome Alignment BAM file for Gp0127655", - "data_object_type": "Assembly Coverage BAM", - "url": "https://data.microbiomedata.org/data/nmdc:mga0317978/assembly/nmdc_mga0317978_pairedMapped_sorted.bam", - "md5_checksum": "2b699163734ee73cbccc94e4767d36c0", - "id": "nmdc:2b699163734ee73cbccc94e4767d36c0", - "file_size_bytes": 2057808015 - } - ] - }, + "scaf_n90": 154812 + } + ], + "metagenome_sequencing_activity_set": [], + "metaproteomics_analysis_activity_set": [], + "metatranscriptome_activity_set": [], + "nom_analysis_activity_set": [], + "omics_processing_set": [ { "_id": { - "$oid": "649b005bbf2caae0415ef9c4" + "$oid": "649b009773e8249959349b63" }, + "id": "nmdc:omprc-11-0n8y1d07", + "name": "Riverbed sediment microbial communities from areas with vegetation nearby in Columbia River, Washington, USA - GW-RW S3_20_30", + "description": "Riverbed sediment samples were collected from an area with a lot of surrounding vegetation in a hyporheic zone (subsurface groundwater - surface water mixing zone)", "has_input": [ - "nmdc:98bc1e8aa3703e255a930f6c6f923453" + "nmdc:bsm-11-a7fxtx60" ], - "part_of": [ - "nmdc:mga0317978" - ], - "git_url": "https://github.com/microbiomedata/metaG/releases/tag/0.1", "has_output": [ - "nmdc:9b57eb78fd2e8f0af8b55cf5fb3a2bab", - "nmdc:6b11bf4eaf9723559b6015296b802252", - "nmdc:0940fbdf18becd76e7dd3abcfaba12b5", - "nmdc:a1cd7e1382fd1818c42860a0555f1f57", - "nmdc:3a27c2da0a3d05e4c44547afb2875195", - "nmdc:1c8529ca35ee0b275b8ca3d2b5c565ec", - "nmdc:8bf1c44c4a9fc7f55dcf58be1273b46f", - "nmdc:acb8325b4800ff62e3fda52b21b92ecc", - "nmdc:a044873e470ce9f2be06ae99cd1cc242", - "nmdc:40f0627934454a354886609d7068a12c", - "nmdc:60255b31e223a7b5bad8f186b6f65d7c", - "nmdc:b8d559d4ea779c4076e3c9e1e92bddcf" + "jgi:574fde947ded5e3df1ee1429" ], - "was_informed_by": "gold:Gp0127655", - "id": "nmdc:65af38817454a315aeb8c67ab27e1469", - "execution_resource": "NERSC-Cori", - "name": "Annotation Activity for nmdc:mga0317978", - "started_at_time": "2021-10-11T02:23:42Z", - "type": "nmdc:MetagenomeAnnotationActivity", - "ended_at_time": "2021-10-11T03:21:25+00:00", - "output_data_objects": [ - { - "name": "Gp0127655_Protein FAA", - "description": "Protein FAA for Gp0127655", - "data_object_type": "Annotation Amino Acid FASTA", - "url": "https://data.microbiomedata.org/data/nmdc:mga0317978/annotation/nmdc_mga0317978_proteins.faa", - "md5_checksum": "9b57eb78fd2e8f0af8b55cf5fb3a2bab", - "id": "nmdc:9b57eb78fd2e8f0af8b55cf5fb3a2bab", - "file_size_bytes": 53898203 - }, - { - "name": "Gp0127655_Structural annotation GFF file", - "description": "Structural annotation GFF file for Gp0127655", - "data_object_type": "Structural Annotation GFF", - "url": "https://data.microbiomedata.org/data/nmdc:mga0317978/annotation/nmdc_mga0317978_structural_annotation.gff", - "md5_checksum": "6b11bf4eaf9723559b6015296b802252", - "id": "nmdc:6b11bf4eaf9723559b6015296b802252", - "file_size_bytes": 2515 - }, - { - "name": "Gp0127655_Functional annotation GFF file", - "description": "Functional annotation GFF file for Gp0127655", - "data_object_type": "Functional Annotation GFF", - "url": "https://data.microbiomedata.org/data/nmdc:mga0317978/annotation/nmdc_mga0317978_functional_annotation.gff", - "md5_checksum": "0940fbdf18becd76e7dd3abcfaba12b5", - "id": "nmdc:0940fbdf18becd76e7dd3abcfaba12b5", - "file_size_bytes": 61535970 - }, - { - "name": "Gp0127655_KO TSV file", - "description": "KO TSV file for Gp0127655", - "data_object_type": "Annotation KEGG Orthology", - "url": "https://data.microbiomedata.org/data/nmdc:mga0317978/annotation/nmdc_mga0317978_ko.tsv", - "md5_checksum": "a1cd7e1382fd1818c42860a0555f1f57", - "id": "nmdc:a1cd7e1382fd1818c42860a0555f1f57", - "file_size_bytes": 6994761 - }, - { - "name": "Gp0127655_EC TSV file", - "description": "EC TSV file for Gp0127655", - "data_object_type": "Annotation Enzyme Commission", - "url": "https://data.microbiomedata.org/data/nmdc:mga0317978/annotation/nmdc_mga0317978_ec.tsv", - "md5_checksum": "3a27c2da0a3d05e4c44547afb2875195", - "id": "nmdc:3a27c2da0a3d05e4c44547afb2875195", - "file_size_bytes": 4598688 - }, - { - "name": "Gp0127655_COG GFF file", - "description": "COG GFF file for Gp0127655", - "url": "https://data.microbiomedata.org/data/nmdc:mga0317978/annotation/nmdc_mga0317978_cog.gff", - "md5_checksum": "1c8529ca35ee0b275b8ca3d2b5c565ec", - "id": "nmdc:1c8529ca35ee0b275b8ca3d2b5c565ec", - "file_size_bytes": 36290392 - }, - { - "name": "Gp0127655_PFAM GFF file", - "description": "PFAM GFF file for Gp0127655", - "url": "https://data.microbiomedata.org/data/nmdc:mga0317978/annotation/nmdc_mga0317978_pfam.gff", - "md5_checksum": "8bf1c44c4a9fc7f55dcf58be1273b46f", - "id": "nmdc:8bf1c44c4a9fc7f55dcf58be1273b46f", - "file_size_bytes": 27016921 - }, - { - "name": "Gp0127655_TigrFam GFF file", - "description": "TigrFam GFF file for Gp0127655", - "url": "https://data.microbiomedata.org/data/nmdc:mga0317978/annotation/nmdc_mga0317978_tigrfam.gff", - "md5_checksum": "acb8325b4800ff62e3fda52b21b92ecc", - "id": "nmdc:acb8325b4800ff62e3fda52b21b92ecc", - "file_size_bytes": 2768301 - }, - { - "name": "Gp0127655_SMART GFF file", - "description": "SMART GFF file for Gp0127655", - "url": "https://data.microbiomedata.org/data/nmdc:mga0317978/annotation/nmdc_mga0317978_smart.gff", - "md5_checksum": "a044873e470ce9f2be06ae99cd1cc242", - "id": "nmdc:a044873e470ce9f2be06ae99cd1cc242", - "file_size_bytes": 7806208 - }, - { - "name": "Gp0127655_SuperFam GFF file", - "description": "SuperFam GFF file for Gp0127655", - "url": "https://data.microbiomedata.org/data/nmdc:mga0317978/annotation/nmdc_mga0317978_supfam.gff", - "md5_checksum": "40f0627934454a354886609d7068a12c", - "id": "nmdc:40f0627934454a354886609d7068a12c", - "file_size_bytes": 45276498 - }, - { - "name": "Gp0127655_Cath FunFam GFF file", - "description": "Cath FunFam GFF file for Gp0127655", - "url": "https://data.microbiomedata.org/data/nmdc:mga0317978/annotation/nmdc_mga0317978_cath_funfam.gff", - "md5_checksum": "60255b31e223a7b5bad8f186b6f65d7c", - "id": "nmdc:60255b31e223a7b5bad8f186b6f65d7c", - "file_size_bytes": 33794110 - }, - { - "name": "Gp0127655_KO_EC GFF file", - "description": "KO_EC GFF file for Gp0127655", - "url": "https://data.microbiomedata.org/data/nmdc:mga0317978/annotation/nmdc_mga0317978_ko_ec.gff", - "md5_checksum": "b8d559d4ea779c4076e3c9e1e92bddcf", - "id": "nmdc:b8d559d4ea779c4076e3c9e1e92bddcf", - "file_size_bytes": 22249696 - } + "part_of": [ + "nmdc:sty-11-aygzgv51" + ], + "add_date": "2016-01-11", + "mod_date": "2021-06-15", + "ncbi_project_name": "Riverbed sediment microbial communities from areas with vegetation nearby in Columbia River, Washington, USA - GW-RW S3_20_30", + "omics_type": { + "has_raw_value": "Metagenome" + }, + "principal_investigator": { + "has_raw_value": "James Stegen" + }, + "processing_institution": "JGI", + "type": "nmdc:OmicsProcessing", + "gold_sequencing_project_identifiers": [ + "gold:Gp0127655" ] - }, + } + ], + "reaction_activity_set": [], + "read_qc_analysis_activity_set": [ { "_id": { - "$oid": "649b0052ec087f6bbab34725" + "$oid": "649b009d6bdd4fd20273c880" }, "has_input": [ - "nmdc:98bc1e8aa3703e255a930f6c6f923453", - "nmdc:2b699163734ee73cbccc94e4767d36c0", - "nmdc:0940fbdf18becd76e7dd3abcfaba12b5" + "nmdc:898017d076d5d2daaf902e9141f0600a" ], - "too_short_contig_num": 173159, "part_of": [ "nmdc:mga0317978" ], - "binned_contig_num": 412, "git_url": "https://github.com/microbiomedata/metaG/releases/tag/0.1", "has_output": [ - "nmdc:d41d8cd98f00b204e9800998ecf8427e", - "nmdc:58f2cc63798346be853bccacdd7ca30d", - "nmdc:8b2dbaba9c1219096831ad99d8b7c056", - "nmdc:c562d8d5ccc986d672b4e48e006fafab", - "nmdc:2eaf0a7d519ac7c034d63797d735080c", - "nmdc:668a0a6dbd840dd2178a00c2af4c2237" + "nmdc:04b9014981f7035c39bd7f870613ed93", + "nmdc:b66266969ab3df4c1cb2b16c1fa7d098" ], "was_informed_by": "gold:Gp0127655", - "input_contig_num": 182939, + "input_read_count": 23985924, + "output_read_bases": 3400452550, "id": "nmdc:65af38817454a315aeb8c67ab27e1469", "execution_resource": "NERSC-Cori", - "name": "MAGs Analysis Activity for nmdc:mga0317978", - "mags_list": [ - { - "number_of_contig": 412, - "completeness": 27.84, - "bin_name": "bins.1", - "gene_count": 2086, - "bin_quality": "LQ", - "gtdbtk_species": "", - "gtdbtk_order": "", - "num_16s": 0, - "gtdbtk_family": "", - "gtdbtk_domain": "", - "contamination": 0.0, - "gtdbtk_class": "", - "gtdbtk_phylum": "", - "num_5s": 0, - "num_23s": 0, - "gtdbtk_genus": "", - "num_t_rna": 22 - } + "input_read_bases": 3621874524, + "name": "Read QC Activity for nmdc:mga0317978", + "output_read_count": 22751496, + "started_at_time": "2021-10-11T02:23:42Z", + "type": "nmdc:ReadQCAnalysisActivity", + "ended_at_time": "2021-10-11T03:21:25+00:00" + } + ], + "read_based_taxonomy_analysis_activity_set": [ + { + "_id": { + "$oid": "649b009bff710ae353f8cf47" + }, + "has_input": [ + "nmdc:04b9014981f7035c39bd7f870613ed93" ], - "unbinned_contig_num": 9368, + "git_url": "https://github.com/microbiomedata/metaG/releases/tag/0.1", + "has_output": [ + "nmdc:46371c7bc8259e459f975f915aaac26f", + "nmdc:5dd9bc51105920f3f629e8106235af3b", + "nmdc:1879e0e9af6d568ac9c7ffdb47fc7f12", + "nmdc:e3f410adc2347396abfdec2a848000d9", + "nmdc:ed6c4f17d6ae759487164ca8ed5edf45", + "nmdc:6d54f73f251de1bd5c4ca8665f098ac0", + "nmdc:1d4f5a605d4549801fda16da567efe56", + "nmdc:8bb5c66575c7c953719ae9947600ad49", + "nmdc:157f7672690ba8207808cc4386ff10a4" + ], + "was_informed_by": "gold:Gp0127655", + "id": "nmdc:65af38817454a315aeb8c67ab27e1469", + "execution_resource": "NERSC-Cori", + "name": "ReadBased Analysis Activity for nmdc:mga0317978", "started_at_time": "2021-10-11T02:23:42Z", - "type": "nmdc:MAGsAnalysisActivity", - "ended_at_time": "2021-10-11T03:21:25+00:00", - "output_data_objects": [ - { - "name": "gold:Gp0452679_metabat2 bin checkm quality assessment result", - "description": "metabat2 bin checkm quality assessment result for gold:Gp0452679", - "data_object_type": "CheckM Statistics", - "url": "https://data.microbiomedata.org/data/nmdc:mga0fsjy84/MAGs/nmdc_mga0fsjy84_checkm_qa.out", - "md5_checksum": "d41d8cd98f00b204e9800998ecf8427e", - "id": "nmdc:d41d8cd98f00b204e9800998ecf8427e", - "file_size_bytes": 0 - }, - { - "name": "Gp0127655_tooShort (< 3kb) filtered contigs fasta file by metaBat2", - "description": "tooShort (< 3kb) filtered contigs fasta file by metaBat2 for Gp0127655", - "url": "https://data.microbiomedata.org/data/nmdc:mga0317978/MAGs/nmdc_mga0317978_bins.tooShort.fa", - "md5_checksum": "58f2cc63798346be853bccacdd7ca30d", - "id": "nmdc:58f2cc63798346be853bccacdd7ca30d", - "file_size_bytes": 77075570 - }, - { - "name": "Gp0127655_unbinned fasta file from metabat2", - "description": "unbinned fasta file from metabat2 for Gp0127655", - "url": "https://data.microbiomedata.org/data/nmdc:mga0317978/MAGs/nmdc_mga0317978_bins.unbinned.fa", - "md5_checksum": "8b2dbaba9c1219096831ad99d8b7c056", - "id": "nmdc:8b2dbaba9c1219096831ad99d8b7c056", - "file_size_bytes": 14551969 - }, - { - "name": "Gp0127655_metabat2 bin checkm quality assessment result", - "description": "metabat2 bin checkm quality assessment result for Gp0127655", - "data_object_type": "CheckM Statistics", - "url": "https://data.microbiomedata.org/data/nmdc:mga0317978/MAGs/nmdc_mga0317978_checkm_qa.out", - "md5_checksum": "c562d8d5ccc986d672b4e48e006fafab", - "id": "nmdc:c562d8d5ccc986d672b4e48e006fafab", - "file_size_bytes": 775 - }, - { - "name": "Gp0127655_high-quality and medium-quality bins", - "description": "high-quality and medium-quality bins for Gp0127655", - "data_object_type": "Metagenome Bins", - "url": "https://data.microbiomedata.org/data/nmdc:mga0317978/MAGs/nmdc_mga0317978_hqmq_bin.zip", - "md5_checksum": "2eaf0a7d519ac7c034d63797d735080c", - "id": "nmdc:2eaf0a7d519ac7c034d63797d735080c", - "file_size_bytes": 182 - }, - { - "name": "Gp0127655_metabat2 bins", - "description": "metabat2 bins for Gp0127655", - "url": "https://data.microbiomedata.org/data/nmdc:mga0317978/MAGs/nmdc_mga0317978_metabat_bin.zip", - "md5_checksum": "668a0a6dbd840dd2178a00c2af4c2237", - "id": "nmdc:668a0a6dbd840dd2178a00c2af4c2237", - "file_size_bytes": 527634 - } - ] + "type": "nmdc:ReadbasedAnalysis", + "ended_at_time": "2021-10-11T03:21:25+00:00" } - ] - }, - { - "_id": { - "$oid": "649b009773e8249959349b64" - }, - "id": "nmdc:omprc-11-p1735e67", - "name": "Riverbed sediment microbial communities from areas with vegetation nearby in Columbia River, Washington, USA - GW-RW S2_0_10", - "description": "Riverbed sediment samples were collected from an area with a lot of surrounding vegetation in a hyporheic zone (subsurface groundwater - surface water mixing zone)", - "has_input": [ - "nmdc:bsm-11-k4wa0808" - ], - "has_output": [ - "jgi:574fde937ded5e3df1ee1428" - ], - "part_of": [ - "nmdc:sty-11-aygzgv51" - ], - "add_date": "2016-01-11", - "mod_date": "2021-06-15", - "ncbi_project_name": "Riverbed sediment microbial communities from areas with vegetation nearby in Columbia River, Washington, USA - GW-RW S2_0_10", - "omics_type": { - "has_raw_value": "Metagenome" - }, - "principal_investigator": { - "has_raw_value": "James Stegen" - }, - "processing_institution": "JGI", - "type": "nmdc:OmicsProcessing", - "gold_sequencing_project_identifiers": [ - "gold:Gp0127653" - ], - "downstream_workflow_activity_records": [ + ], + "study_set": [], + "field_research_site_set": [], + "collecting_biosamples_from_site_set": [], + "date_created": null, + "etl_software_version": null, + "pooling_set": [], + "read_based_analysis_activity_set": [ { "_id": { - "$oid": "649b009d6bdd4fd20273c878" + "$oid": "61e719df833bcf838a701627" }, "has_input": [ - "nmdc:84ffabc3fbd7e759cd2352ec513b89a0" + "nmdc:04b9014981f7035c39bd7f870613ed93" ], "part_of": [ - "nmdc:mga079y988" + "nmdc:mga0317978" ], "git_url": "https://github.com/microbiomedata/metaG/releases/tag/0.1", "has_output": [ - "nmdc:8eec0e9c14abb418b906504d1675ecc5", - "nmdc:5d07358bbc48f25e157ffc91ea7ae3e0" + "nmdc:46371c7bc8259e459f975f915aaac26f", + "nmdc:5dd9bc51105920f3f629e8106235af3b", + "nmdc:1879e0e9af6d568ac9c7ffdb47fc7f12", + "nmdc:e3f410adc2347396abfdec2a848000d9", + "nmdc:ed6c4f17d6ae759487164ca8ed5edf45", + "nmdc:6d54f73f251de1bd5c4ca8665f098ac0", + "nmdc:1d4f5a605d4549801fda16da567efe56", + "nmdc:8bb5c66575c7c953719ae9947600ad49", + "nmdc:157f7672690ba8207808cc4386ff10a4" ], - "was_informed_by": "gold:Gp0127653", - "input_read_count": 20780788, - "output_read_bases": 2918466866, - "id": "nmdc:676183a611b251dc6b8f0b8ca600181b", + "was_informed_by": "gold:Gp0127655", + "id": "nmdc:65af38817454a315aeb8c67ab27e1469", "execution_resource": "NERSC-Cori", - "input_read_bases": 3137898988, - "name": "Read QC Activity for nmdc:mga079y988", - "output_read_count": 19516330, - "started_at_time": "2021-10-11T02:23:35Z", - "type": "nmdc:ReadQCAnalysisActivity", - "ended_at_time": "2021-11-13T18:52:13+00:00", - "output_data_objects": [ - { - "name": "Gp0127653_Filtered Reads", - "description": "Filtered Reads for Gp0127653", - "data_object_type": "Filtered Sequencing Reads", - "url": "https://data.microbiomedata.org/data/nmdc:mga079y988/qa/nmdc_mga079y988_filtered.fastq.gz", - "md5_checksum": "8eec0e9c14abb418b906504d1675ecc5", - "id": "nmdc:8eec0e9c14abb418b906504d1675ecc5", - "file_size_bytes": 1661017378 - }, - { - "name": "Gp0127653_Filtered Stats", - "description": "Filtered Stats for Gp0127653", - "data_object_type": "QC Statistics", - "url": "https://data.microbiomedata.org/data/nmdc:mga079y988/qa/nmdc_mga079y988_filterStats.txt", - "md5_checksum": "5d07358bbc48f25e157ffc91ea7ae3e0", - "id": "nmdc:5d07358bbc48f25e157ffc91ea7ae3e0", - "file_size_bytes": 286 - } - ] + "name": "ReadBased Analysis Activity for nmdc:mga0317978", + "started_at_time": "2021-10-11T02:23:42Z", + "type": "nmdc:ReadbasedAnalysis", + "ended_at_time": "2021-10-11T03:21:25+00:00" + } + ] + }, + { + "functional_annotation_agg": [], + "library_preparation_set": [], + "processed_sample_set": [], + "extraction_set": [], + "activity_set": [], + "biosample_set": [], + "data_object_set": [ + { + "name": "Gp0127653_Filtered Reads", + "description": "Filtered Reads for Gp0127653", + "data_object_type": "Filtered Sequencing Reads", + "url": "https://data.microbiomedata.org/data/nmdc:mga079y988/qa/nmdc_mga079y988_filtered.fastq.gz", + "md5_checksum": "8eec0e9c14abb418b906504d1675ecc5", + "id": "nmdc:8eec0e9c14abb418b906504d1675ecc5", + "file_size_bytes": 1661017378 + }, + { + "name": "Gp0127653_Filtered Stats", + "description": "Filtered Stats for Gp0127653", + "data_object_type": "QC Statistics", + "url": "https://data.microbiomedata.org/data/nmdc:mga079y988/qa/nmdc_mga079y988_filterStats.txt", + "md5_checksum": "5d07358bbc48f25e157ffc91ea7ae3e0", + "id": "nmdc:5d07358bbc48f25e157ffc91ea7ae3e0", + "file_size_bytes": 286 + }, + { + "name": "Gp0127653_Gottcha2 TSV report", + "description": "Gottcha2 TSV report for Gp0127653", + "url": "https://data.microbiomedata.org/data/nmdc:mga079y988/ReadbasedAnalysis/nmdc_mga079y988_gottcha2_report.tsv", + "md5_checksum": "fbbad3e21108a372e3d53c9ee8fc3cd5", + "id": "nmdc:fbbad3e21108a372e3d53c9ee8fc3cd5", + "file_size_bytes": 3812 + }, + { + "name": "Gp0127653_Gottcha2 full TSV report", + "description": "Gottcha2 full TSV report for Gp0127653", + "url": "https://data.microbiomedata.org/data/nmdc:mga079y988/ReadbasedAnalysis/nmdc_mga079y988_gottcha2_report_full.tsv", + "md5_checksum": "dbf03e26f7e1529762830161fe1f1906", + "id": "nmdc:dbf03e26f7e1529762830161fe1f1906", + "file_size_bytes": 857087 + }, + { + "name": "Gp0127653_Gottcha2 Krona HTML report", + "description": "Gottcha2 Krona HTML report for Gp0127653", + "data_object_type": "GOTTCHA2 Krona Plot", + "url": "https://data.microbiomedata.org/data/nmdc:mga079y988/ReadbasedAnalysis/nmdc_mga079y988_gottcha2_krona.html", + "md5_checksum": "284ce1b28b8964cb525025d678277dba", + "id": "nmdc:284ce1b28b8964cb525025d678277dba", + "file_size_bytes": 235621 + }, + { + "name": "Gp0127653_Centrifuge classification TSV report", + "description": "Centrifuge classification TSV report for Gp0127653", + "data_object_type": "Centrifuge Taxonomic Classification", + "url": "https://data.microbiomedata.org/data/nmdc:mga079y988/ReadbasedAnalysis/nmdc_mga079y988_centrifuge_classification.tsv", + "md5_checksum": "a379527f61806391e42b3512146013a8", + "id": "nmdc:a379527f61806391e42b3512146013a8", + "file_size_bytes": 1437707313 + }, + { + "name": "Gp0127653_Centrifuge TSV report", + "description": "Centrifuge TSV report for Gp0127653", + "data_object_type": "Centrifuge Classification Report", + "url": "https://data.microbiomedata.org/data/nmdc:mga079y988/ReadbasedAnalysis/nmdc_mga079y988_centrifuge_report.tsv", + "md5_checksum": "3659ac6c99dea0fb1385c58eac8b1335", + "id": "nmdc:3659ac6c99dea0fb1385c58eac8b1335", + "file_size_bytes": 255105 + }, + { + "name": "Gp0127653_Centrifuge Krona HTML report", + "description": "Centrifuge Krona HTML report for Gp0127653", + "data_object_type": "Centrifuge Krona Plot", + "url": "https://data.microbiomedata.org/data/nmdc:mga079y988/ReadbasedAnalysis/nmdc_mga079y988_centrifuge_krona.html", + "md5_checksum": "3219058371bf2f8081b2dd2b434ec145", + "id": "nmdc:3219058371bf2f8081b2dd2b434ec145", + "file_size_bytes": 2327985 + }, + { + "name": "Gp0127653_Kraken classification TSV report", + "description": "Kraken classification TSV report for Gp0127653", + "data_object_type": "Kraken2 Taxonomic Classification", + "url": "https://data.microbiomedata.org/data/nmdc:mga079y988/ReadbasedAnalysis/nmdc_mga079y988_kraken2_classification.tsv", + "md5_checksum": "be29ebcd7358653afec7381f9ca43431", + "id": "nmdc:be29ebcd7358653afec7381f9ca43431", + "file_size_bytes": 1164013677 + }, + { + "name": "Gp0127653_Kraken2 TSV report", + "description": "Kraken2 TSV report for Gp0127653", + "data_object_type": "Kraken2 Classification Report", + "url": "https://data.microbiomedata.org/data/nmdc:mga079y988/ReadbasedAnalysis/nmdc_mga079y988_kraken2_report.tsv", + "md5_checksum": "a9e6ab6db23ddce02317e3e21ea3f618", + "id": "nmdc:a9e6ab6db23ddce02317e3e21ea3f618", + "file_size_bytes": 638368 + }, + { + "name": "Gp0127653_Kraken2 Krona HTML report", + "description": "Kraken2 Krona HTML report for Gp0127653", + "data_object_type": "Kraken2 Krona Plot", + "url": "https://data.microbiomedata.org/data/nmdc:mga079y988/ReadbasedAnalysis/nmdc_mga079y988_kraken2_krona.html", + "md5_checksum": "4c1aae1a46e51359f9146e48fff0e7f0", + "id": "nmdc:4c1aae1a46e51359f9146e48fff0e7f0", + "file_size_bytes": 3982485 + }, + { + "name": "Gp0127653_Gottcha2 TSV report", + "description": "Gottcha2 TSV report for Gp0127653", + "url": "https://data.microbiomedata.org/data/nmdc:mga079y988/ReadbasedAnalysis/nmdc_mga079y988_gottcha2_report.tsv", + "md5_checksum": "fbbad3e21108a372e3d53c9ee8fc3cd5", + "id": "nmdc:fbbad3e21108a372e3d53c9ee8fc3cd5", + "file_size_bytes": 3812 + }, + { + "name": "Gp0127653_Gottcha2 full TSV report", + "description": "Gottcha2 full TSV report for Gp0127653", + "url": "https://data.microbiomedata.org/data/nmdc:mga079y988/ReadbasedAnalysis/nmdc_mga079y988_gottcha2_report_full.tsv", + "md5_checksum": "dbf03e26f7e1529762830161fe1f1906", + "id": "nmdc:dbf03e26f7e1529762830161fe1f1906", + "file_size_bytes": 857087 + }, + { + "name": "Gp0127653_Gottcha2 Krona HTML report", + "description": "Gottcha2 Krona HTML report for Gp0127653", + "data_object_type": "GOTTCHA2 Krona Plot", + "url": "https://data.microbiomedata.org/data/nmdc:mga079y988/ReadbasedAnalysis/nmdc_mga079y988_gottcha2_krona.html", + "md5_checksum": "284ce1b28b8964cb525025d678277dba", + "id": "nmdc:284ce1b28b8964cb525025d678277dba", + "file_size_bytes": 235621 + }, + { + "name": "Gp0127653_Centrifuge classification TSV report", + "description": "Centrifuge classification TSV report for Gp0127653", + "data_object_type": "Centrifuge Taxonomic Classification", + "url": "https://data.microbiomedata.org/data/nmdc:mga079y988/ReadbasedAnalysis/nmdc_mga079y988_centrifuge_classification.tsv", + "md5_checksum": "a379527f61806391e42b3512146013a8", + "id": "nmdc:a379527f61806391e42b3512146013a8", + "file_size_bytes": 1437707313 + }, + { + "name": "Gp0127653_Centrifuge TSV report", + "description": "Centrifuge TSV report for Gp0127653", + "data_object_type": "Centrifuge Classification Report", + "url": "https://data.microbiomedata.org/data/nmdc:mga079y988/ReadbasedAnalysis/nmdc_mga079y988_centrifuge_report.tsv", + "md5_checksum": "3659ac6c99dea0fb1385c58eac8b1335", + "id": "nmdc:3659ac6c99dea0fb1385c58eac8b1335", + "file_size_bytes": 255105 + }, + { + "name": "Gp0127653_Centrifuge Krona HTML report", + "description": "Centrifuge Krona HTML report for Gp0127653", + "data_object_type": "Centrifuge Krona Plot", + "url": "https://data.microbiomedata.org/data/nmdc:mga079y988/ReadbasedAnalysis/nmdc_mga079y988_centrifuge_krona.html", + "md5_checksum": "3219058371bf2f8081b2dd2b434ec145", + "id": "nmdc:3219058371bf2f8081b2dd2b434ec145", + "file_size_bytes": 2327985 + }, + { + "name": "Gp0127653_Kraken classification TSV report", + "description": "Kraken classification TSV report for Gp0127653", + "data_object_type": "Kraken2 Taxonomic Classification", + "url": "https://data.microbiomedata.org/data/nmdc:mga079y988/ReadbasedAnalysis/nmdc_mga079y988_kraken2_classification.tsv", + "md5_checksum": "be29ebcd7358653afec7381f9ca43431", + "id": "nmdc:be29ebcd7358653afec7381f9ca43431", + "file_size_bytes": 1164013677 + }, + { + "name": "Gp0127653_Kraken2 TSV report", + "description": "Kraken2 TSV report for Gp0127653", + "data_object_type": "Kraken2 Classification Report", + "url": "https://data.microbiomedata.org/data/nmdc:mga079y988/ReadbasedAnalysis/nmdc_mga079y988_kraken2_report.tsv", + "md5_checksum": "a9e6ab6db23ddce02317e3e21ea3f618", + "id": "nmdc:a9e6ab6db23ddce02317e3e21ea3f618", + "file_size_bytes": 638368 + }, + { + "name": "Gp0127653_Kraken2 Krona HTML report", + "description": "Kraken2 Krona HTML report for Gp0127653", + "data_object_type": "Kraken2 Krona Plot", + "url": "https://data.microbiomedata.org/data/nmdc:mga079y988/ReadbasedAnalysis/nmdc_mga079y988_kraken2_krona.html", + "md5_checksum": "4c1aae1a46e51359f9146e48fff0e7f0", + "id": "nmdc:4c1aae1a46e51359f9146e48fff0e7f0", + "file_size_bytes": 3982485 + }, + { + "name": "Gp0127653_Assembled contigs fasta", + "description": "Assembled contigs fasta for Gp0127653", + "data_object_type": "Assembly Contigs", + "url": "https://data.microbiomedata.org/data/nmdc:mga079y988/assembly/nmdc_mga079y988_contigs.fna", + "md5_checksum": "0f2b82878f54787c127bf03338d5c605", + "id": "nmdc:0f2b82878f54787c127bf03338d5c605", + "file_size_bytes": 18722308 + }, + { + "name": "Gp0127653_Assembled scaffold fasta", + "description": "Assembled scaffold fasta for Gp0127653", + "data_object_type": "Assembly Scaffolds", + "url": "https://data.microbiomedata.org/data/nmdc:mga079y988/assembly/nmdc_mga079y988_scaffolds.fna", + "md5_checksum": "02f8e7222e9e6f45c388a189ca66e1f9", + "id": "nmdc:02f8e7222e9e6f45c388a189ca66e1f9", + "file_size_bytes": 18575622 + }, + { + "name": "Gp0127653_Metagenome Contig Coverage Stats", + "description": "Metagenome Contig Coverage Stats for Gp0127653", + "url": "https://data.microbiomedata.org/data/nmdc:mga079y988/assembly/nmdc_mga079y988_covstats.txt", + "md5_checksum": "eea8a4b58ca07019d0050b030be3a3d1", + "id": "nmdc:eea8a4b58ca07019d0050b030be3a3d1", + "file_size_bytes": 3824141 + }, + { + "name": "Gp0127653_Assembled AGP file", + "description": "Assembled AGP file for Gp0127653", + "data_object_type": "Assembly AGP", + "url": "https://data.microbiomedata.org/data/nmdc:mga079y988/assembly/nmdc_mga079y988_assembly.agp", + "md5_checksum": "44b1ad59bd14c3367ac0fa2ca37aa057", + "id": "nmdc:44b1ad59bd14c3367ac0fa2ca37aa057", + "file_size_bytes": 3551123 + }, + { + "name": "Gp0127653_Metagenome Alignment BAM file", + "description": "Metagenome Alignment BAM file for Gp0127653", + "data_object_type": "Assembly Coverage BAM", + "url": "https://data.microbiomedata.org/data/nmdc:mga079y988/assembly/nmdc_mga079y988_pairedMapped_sorted.bam", + "md5_checksum": "ccd5ba8558a92751c59989aa81054e1a", + "id": "nmdc:ccd5ba8558a92751c59989aa81054e1a", + "file_size_bytes": 1757373378 + }, + { + "name": "Gp0127653_Protein FAA", + "description": "Protein FAA for Gp0127653", + "data_object_type": "Annotation Amino Acid FASTA", + "url": "https://data.microbiomedata.org/data/nmdc:mga079y988/annotation/nmdc_mga079y988_proteins.faa", + "md5_checksum": "81f16ca99f73a3314a66e6b24d23376f", + "id": "nmdc:81f16ca99f73a3314a66e6b24d23376f", + "file_size_bytes": 11129064 + }, + { + "name": "Gp0127653_Structural annotation GFF file", + "description": "Structural annotation GFF file for Gp0127653", + "data_object_type": "Structural Annotation GFF", + "url": "https://data.microbiomedata.org/data/nmdc:mga079y988/annotation/nmdc_mga079y988_structural_annotation.gff", + "md5_checksum": "66bb16ef28196379647d319da50426dd", + "id": "nmdc:66bb16ef28196379647d319da50426dd", + "file_size_bytes": 8094827 + }, + { + "name": "Gp0127653_Functional annotation GFF file", + "description": "Functional annotation GFF file for Gp0127653", + "data_object_type": "Functional Annotation GFF", + "url": "https://data.microbiomedata.org/data/nmdc:mga079y988/annotation/nmdc_mga079y988_functional_annotation.gff", + "md5_checksum": "1e7dac5f12cc086509ff905f7133b15a", + "id": "nmdc:1e7dac5f12cc086509ff905f7133b15a", + "file_size_bytes": 13821021 + }, + { + "name": "Gp0127653_KO TSV file", + "description": "KO TSV file for Gp0127653", + "data_object_type": "Annotation KEGG Orthology", + "url": "https://data.microbiomedata.org/data/nmdc:mga079y988/annotation/nmdc_mga079y988_ko.tsv", + "md5_checksum": "2a7343eb6364d769a1c43aa5c94daee8", + "id": "nmdc:2a7343eb6364d769a1c43aa5c94daee8", + "file_size_bytes": 1578987 + }, + { + "name": "Gp0127653_EC TSV file", + "description": "EC TSV file for Gp0127653", + "data_object_type": "Annotation Enzyme Commission", + "url": "https://data.microbiomedata.org/data/nmdc:mga079y988/annotation/nmdc_mga079y988_ec.tsv", + "md5_checksum": "b2cee4d35f68d1f5731bff3af5904fa4", + "id": "nmdc:b2cee4d35f68d1f5731bff3af5904fa4", + "file_size_bytes": 1029657 + }, + { + "name": "Gp0127653_COG GFF file", + "description": "COG GFF file for Gp0127653", + "url": "https://data.microbiomedata.org/data/nmdc:mga079y988/annotation/nmdc_mga079y988_cog.gff", + "md5_checksum": "1d45960b1ba5e27af42c736ec583ecd4", + "id": "nmdc:1d45960b1ba5e27af42c736ec583ecd4", + "file_size_bytes": 7241411 + }, + { + "name": "Gp0127653_PFAM GFF file", + "description": "PFAM GFF file for Gp0127653", + "url": "https://data.microbiomedata.org/data/nmdc:mga079y988/annotation/nmdc_mga079y988_pfam.gff", + "md5_checksum": "3dec47a0a04865ecdcd9ed7cbc78eca4", + "id": "nmdc:3dec47a0a04865ecdcd9ed7cbc78eca4", + "file_size_bytes": 5221877 + }, + { + "name": "Gp0127653_TigrFam GFF file", + "description": "TigrFam GFF file for Gp0127653", + "url": "https://data.microbiomedata.org/data/nmdc:mga079y988/annotation/nmdc_mga079y988_tigrfam.gff", + "md5_checksum": "043322f3cd31d50faf4d4e0ffd1c8427", + "id": "nmdc:043322f3cd31d50faf4d4e0ffd1c8427", + "file_size_bytes": 472233 + }, + { + "name": "Gp0127653_SMART GFF file", + "description": "SMART GFF file for Gp0127653", + "url": "https://data.microbiomedata.org/data/nmdc:mga079y988/annotation/nmdc_mga079y988_smart.gff", + "md5_checksum": "6bed0fc7a7be284936c69fc1faac4be6", + "id": "nmdc:6bed0fc7a7be284936c69fc1faac4be6", + "file_size_bytes": 1586537 + }, + { + "name": "Gp0127653_SuperFam GFF file", + "description": "SuperFam GFF file for Gp0127653", + "url": "https://data.microbiomedata.org/data/nmdc:mga079y988/annotation/nmdc_mga079y988_supfam.gff", + "md5_checksum": "052d3fb0080390255df5772f79e5ef2c", + "id": "nmdc:052d3fb0080390255df5772f79e5ef2c", + "file_size_bytes": 9232981 + }, + { + "name": "Gp0127653_Cath FunFam GFF file", + "description": "Cath FunFam GFF file for Gp0127653", + "url": "https://data.microbiomedata.org/data/nmdc:mga079y988/annotation/nmdc_mga079y988_cath_funfam.gff", + "md5_checksum": "e66a3b85c713e8766e5181da2e393984", + "id": "nmdc:e66a3b85c713e8766e5181da2e393984", + "file_size_bytes": 6697496 + }, + { + "name": "Gp0127653_KO_EC GFF file", + "description": "KO_EC GFF file for Gp0127653", + "url": "https://data.microbiomedata.org/data/nmdc:mga079y988/annotation/nmdc_mga079y988_ko_ec.gff", + "md5_checksum": "949e3b137b3a0591ed9de493ee5c530b", + "id": "nmdc:949e3b137b3a0591ed9de493ee5c530b", + "file_size_bytes": 5035400 + }, + { + "name": "gold:Gp0452679_metabat2 bin checkm quality assessment result", + "description": "metabat2 bin checkm quality assessment result for gold:Gp0452679", + "data_object_type": "CheckM Statistics", + "url": "https://data.microbiomedata.org/data/nmdc:mga0fsjy84/MAGs/nmdc_mga0fsjy84_checkm_qa.out", + "md5_checksum": "d41d8cd98f00b204e9800998ecf8427e", + "id": "nmdc:d41d8cd98f00b204e9800998ecf8427e", + "file_size_bytes": 0 }, + { + "name": "Gp0127653_high-quality and medium-quality bins", + "description": "high-quality and medium-quality bins for Gp0127653", + "data_object_type": "Metagenome Bins", + "url": "https://data.microbiomedata.org/data/nmdc:mga079y988/MAGs/nmdc_mga079y988_hqmq_bin.zip", + "md5_checksum": "1029b97dba32dab780f4267f8224619f", + "id": "nmdc:1029b97dba32dab780f4267f8224619f", + "file_size_bytes": 182 + } + ], + "dissolving_activity_set": [], + "functional_annotation_set": [], + "genome_feature_set": [], + "mags_activity_set": [ { "_id": { - "$oid": "649b009bff710ae353f8cf3d" + "$oid": "649b0052ec087f6bbab34720" }, "has_input": [ - "nmdc:8eec0e9c14abb418b906504d1675ecc5" + "nmdc:0f2b82878f54787c127bf03338d5c605", + "nmdc:ccd5ba8558a92751c59989aa81054e1a", + "nmdc:1e7dac5f12cc086509ff905f7133b15a" + ], + "too_short_contig_num": 48540, + "part_of": [ + "nmdc:mga079y988" ], "git_url": "https://github.com/microbiomedata/metaG/releases/tag/0.1", "has_output": [ - "nmdc:fbbad3e21108a372e3d53c9ee8fc3cd5", - "nmdc:dbf03e26f7e1529762830161fe1f1906", - "nmdc:284ce1b28b8964cb525025d678277dba", - "nmdc:a379527f61806391e42b3512146013a8", - "nmdc:3659ac6c99dea0fb1385c58eac8b1335", - "nmdc:3219058371bf2f8081b2dd2b434ec145", - "nmdc:be29ebcd7358653afec7381f9ca43431", - "nmdc:a9e6ab6db23ddce02317e3e21ea3f618", - "nmdc:4c1aae1a46e51359f9146e48fff0e7f0" + "nmdc:d41d8cd98f00b204e9800998ecf8427e", + "nmdc:1029b97dba32dab780f4267f8224619f" ], "was_informed_by": "gold:Gp0127653", + "input_contig_num": 48931, "id": "nmdc:676183a611b251dc6b8f0b8ca600181b", "execution_resource": "NERSC-Cori", - "name": "ReadBased Analysis Activity for nmdc:mga079y988", + "name": "MAGs Analysis Activity for nmdc:mga079y988", + "mags_list": [], + "unbinned_contig_num": 391, "started_at_time": "2021-10-11T02:23:35Z", - "type": "nmdc:ReadbasedAnalysis", - "ended_at_time": "2021-11-13T18:52:13+00:00", - "output_data_objects": [ - { - "name": "Gp0127653_Gottcha2 TSV report", - "description": "Gottcha2 TSV report for Gp0127653", - "url": "https://data.microbiomedata.org/data/nmdc:mga079y988/ReadbasedAnalysis/nmdc_mga079y988_gottcha2_report.tsv", - "md5_checksum": "fbbad3e21108a372e3d53c9ee8fc3cd5", - "id": "nmdc:fbbad3e21108a372e3d53c9ee8fc3cd5", - "file_size_bytes": 3812 - }, - { - "name": "Gp0127653_Gottcha2 full TSV report", - "description": "Gottcha2 full TSV report for Gp0127653", - "url": "https://data.microbiomedata.org/data/nmdc:mga079y988/ReadbasedAnalysis/nmdc_mga079y988_gottcha2_report_full.tsv", - "md5_checksum": "dbf03e26f7e1529762830161fe1f1906", - "id": "nmdc:dbf03e26f7e1529762830161fe1f1906", - "file_size_bytes": 857087 - }, - { - "name": "Gp0127653_Gottcha2 Krona HTML report", - "description": "Gottcha2 Krona HTML report for Gp0127653", - "data_object_type": "GOTTCHA2 Krona Plot", - "url": "https://data.microbiomedata.org/data/nmdc:mga079y988/ReadbasedAnalysis/nmdc_mga079y988_gottcha2_krona.html", - "md5_checksum": "284ce1b28b8964cb525025d678277dba", - "id": "nmdc:284ce1b28b8964cb525025d678277dba", - "file_size_bytes": 235621 - }, - { - "name": "Gp0127653_Centrifuge classification TSV report", - "description": "Centrifuge classification TSV report for Gp0127653", - "data_object_type": "Centrifuge Taxonomic Classification", - "url": "https://data.microbiomedata.org/data/nmdc:mga079y988/ReadbasedAnalysis/nmdc_mga079y988_centrifuge_classification.tsv", - "md5_checksum": "a379527f61806391e42b3512146013a8", - "id": "nmdc:a379527f61806391e42b3512146013a8", - "file_size_bytes": 1437707313 - }, - { - "name": "Gp0127653_Centrifuge TSV report", - "description": "Centrifuge TSV report for Gp0127653", - "data_object_type": "Centrifuge Classification Report", - "url": "https://data.microbiomedata.org/data/nmdc:mga079y988/ReadbasedAnalysis/nmdc_mga079y988_centrifuge_report.tsv", - "md5_checksum": "3659ac6c99dea0fb1385c58eac8b1335", - "id": "nmdc:3659ac6c99dea0fb1385c58eac8b1335", - "file_size_bytes": 255105 - }, - { - "name": "Gp0127653_Centrifuge Krona HTML report", - "description": "Centrifuge Krona HTML report for Gp0127653", - "data_object_type": "Centrifuge Krona Plot", - "url": "https://data.microbiomedata.org/data/nmdc:mga079y988/ReadbasedAnalysis/nmdc_mga079y988_centrifuge_krona.html", - "md5_checksum": "3219058371bf2f8081b2dd2b434ec145", - "id": "nmdc:3219058371bf2f8081b2dd2b434ec145", - "file_size_bytes": 2327985 - }, - { - "name": "Gp0127653_Kraken classification TSV report", - "description": "Kraken classification TSV report for Gp0127653", - "data_object_type": "Kraken2 Taxonomic Classification", - "url": "https://data.microbiomedata.org/data/nmdc:mga079y988/ReadbasedAnalysis/nmdc_mga079y988_kraken2_classification.tsv", - "md5_checksum": "be29ebcd7358653afec7381f9ca43431", - "id": "nmdc:be29ebcd7358653afec7381f9ca43431", - "file_size_bytes": 1164013677 - }, - { - "name": "Gp0127653_Kraken2 TSV report", - "description": "Kraken2 TSV report for Gp0127653", - "data_object_type": "Kraken2 Classification Report", - "url": "https://data.microbiomedata.org/data/nmdc:mga079y988/ReadbasedAnalysis/nmdc_mga079y988_kraken2_report.tsv", - "md5_checksum": "a9e6ab6db23ddce02317e3e21ea3f618", - "id": "nmdc:a9e6ab6db23ddce02317e3e21ea3f618", - "file_size_bytes": 638368 - }, - { - "name": "Gp0127653_Kraken2 Krona HTML report", - "description": "Kraken2 Krona HTML report for Gp0127653", - "data_object_type": "Kraken2 Krona Plot", - "url": "https://data.microbiomedata.org/data/nmdc:mga079y988/ReadbasedAnalysis/nmdc_mga079y988_kraken2_krona.html", - "md5_checksum": "4c1aae1a46e51359f9146e48fff0e7f0", - "id": "nmdc:4c1aae1a46e51359f9146e48fff0e7f0", - "file_size_bytes": 3982485 - } - ] - }, + "type": "nmdc:MAGsAnalysisActivity", + "ended_at_time": "2021-11-13T18:52:13+00:00" + } + ], + "material_sample_set": [], + "material_sampling_activity_set": [], + "metabolomics_analysis_activity_set": [], + "metagenome_annotation_activity_set": [ { "_id": { - "$oid": "61e719b9833bcf838a70124b" + "$oid": "649b005bbf2caae0415ef9be" }, "has_input": [ - "nmdc:8eec0e9c14abb418b906504d1675ecc5" + "nmdc:0f2b82878f54787c127bf03338d5c605" ], "part_of": [ "nmdc:mga079y988" ], "git_url": "https://github.com/microbiomedata/metaG/releases/tag/0.1", "has_output": [ - "nmdc:fbbad3e21108a372e3d53c9ee8fc3cd5", - "nmdc:dbf03e26f7e1529762830161fe1f1906", - "nmdc:284ce1b28b8964cb525025d678277dba", - "nmdc:a379527f61806391e42b3512146013a8", - "nmdc:3659ac6c99dea0fb1385c58eac8b1335", - "nmdc:3219058371bf2f8081b2dd2b434ec145", - "nmdc:be29ebcd7358653afec7381f9ca43431", - "nmdc:a9e6ab6db23ddce02317e3e21ea3f618", - "nmdc:4c1aae1a46e51359f9146e48fff0e7f0" - ], - "was_informed_by": "gold:Gp0127653", - "id": "nmdc:676183a611b251dc6b8f0b8ca600181b", - "execution_resource": "NERSC-Cori", - "name": "ReadBased Analysis Activity for nmdc:mga079y988", - "started_at_time": "2021-10-11T02:23:35Z", - "type": "nmdc:ReadbasedAnalysis", - "ended_at_time": "2021-11-13T18:52:13+00:00", - "output_data_objects": [ - { - "name": "Gp0127653_Gottcha2 TSV report", - "description": "Gottcha2 TSV report for Gp0127653", - "url": "https://data.microbiomedata.org/data/nmdc:mga079y988/ReadbasedAnalysis/nmdc_mga079y988_gottcha2_report.tsv", - "md5_checksum": "fbbad3e21108a372e3d53c9ee8fc3cd5", - "id": "nmdc:fbbad3e21108a372e3d53c9ee8fc3cd5", - "file_size_bytes": 3812 - }, - { - "name": "Gp0127653_Gottcha2 full TSV report", - "description": "Gottcha2 full TSV report for Gp0127653", - "url": "https://data.microbiomedata.org/data/nmdc:mga079y988/ReadbasedAnalysis/nmdc_mga079y988_gottcha2_report_full.tsv", - "md5_checksum": "dbf03e26f7e1529762830161fe1f1906", - "id": "nmdc:dbf03e26f7e1529762830161fe1f1906", - "file_size_bytes": 857087 - }, - { - "name": "Gp0127653_Gottcha2 Krona HTML report", - "description": "Gottcha2 Krona HTML report for Gp0127653", - "data_object_type": "GOTTCHA2 Krona Plot", - "url": "https://data.microbiomedata.org/data/nmdc:mga079y988/ReadbasedAnalysis/nmdc_mga079y988_gottcha2_krona.html", - "md5_checksum": "284ce1b28b8964cb525025d678277dba", - "id": "nmdc:284ce1b28b8964cb525025d678277dba", - "file_size_bytes": 235621 - }, - { - "name": "Gp0127653_Centrifuge classification TSV report", - "description": "Centrifuge classification TSV report for Gp0127653", - "data_object_type": "Centrifuge Taxonomic Classification", - "url": "https://data.microbiomedata.org/data/nmdc:mga079y988/ReadbasedAnalysis/nmdc_mga079y988_centrifuge_classification.tsv", - "md5_checksum": "a379527f61806391e42b3512146013a8", - "id": "nmdc:a379527f61806391e42b3512146013a8", - "file_size_bytes": 1437707313 - }, - { - "name": "Gp0127653_Centrifuge TSV report", - "description": "Centrifuge TSV report for Gp0127653", - "data_object_type": "Centrifuge Classification Report", - "url": "https://data.microbiomedata.org/data/nmdc:mga079y988/ReadbasedAnalysis/nmdc_mga079y988_centrifuge_report.tsv", - "md5_checksum": "3659ac6c99dea0fb1385c58eac8b1335", - "id": "nmdc:3659ac6c99dea0fb1385c58eac8b1335", - "file_size_bytes": 255105 - }, - { - "name": "Gp0127653_Centrifuge Krona HTML report", - "description": "Centrifuge Krona HTML report for Gp0127653", - "data_object_type": "Centrifuge Krona Plot", - "url": "https://data.microbiomedata.org/data/nmdc:mga079y988/ReadbasedAnalysis/nmdc_mga079y988_centrifuge_krona.html", - "md5_checksum": "3219058371bf2f8081b2dd2b434ec145", - "id": "nmdc:3219058371bf2f8081b2dd2b434ec145", - "file_size_bytes": 2327985 - }, - { - "name": "Gp0127653_Kraken classification TSV report", - "description": "Kraken classification TSV report for Gp0127653", - "data_object_type": "Kraken2 Taxonomic Classification", - "url": "https://data.microbiomedata.org/data/nmdc:mga079y988/ReadbasedAnalysis/nmdc_mga079y988_kraken2_classification.tsv", - "md5_checksum": "be29ebcd7358653afec7381f9ca43431", - "id": "nmdc:be29ebcd7358653afec7381f9ca43431", - "file_size_bytes": 1164013677 - }, - { - "name": "Gp0127653_Kraken2 TSV report", - "description": "Kraken2 TSV report for Gp0127653", - "data_object_type": "Kraken2 Classification Report", - "url": "https://data.microbiomedata.org/data/nmdc:mga079y988/ReadbasedAnalysis/nmdc_mga079y988_kraken2_report.tsv", - "md5_checksum": "a9e6ab6db23ddce02317e3e21ea3f618", - "id": "nmdc:a9e6ab6db23ddce02317e3e21ea3f618", - "file_size_bytes": 638368 - }, - { - "name": "Gp0127653_Kraken2 Krona HTML report", - "description": "Kraken2 Krona HTML report for Gp0127653", - "data_object_type": "Kraken2 Krona Plot", - "url": "https://data.microbiomedata.org/data/nmdc:mga079y988/ReadbasedAnalysis/nmdc_mga079y988_kraken2_krona.html", - "md5_checksum": "4c1aae1a46e51359f9146e48fff0e7f0", - "id": "nmdc:4c1aae1a46e51359f9146e48fff0e7f0", - "file_size_bytes": 3982485 - } - ] - }, + "nmdc:81f16ca99f73a3314a66e6b24d23376f", + "nmdc:66bb16ef28196379647d319da50426dd", + "nmdc:1e7dac5f12cc086509ff905f7133b15a", + "nmdc:2a7343eb6364d769a1c43aa5c94daee8", + "nmdc:b2cee4d35f68d1f5731bff3af5904fa4", + "nmdc:1d45960b1ba5e27af42c736ec583ecd4", + "nmdc:3dec47a0a04865ecdcd9ed7cbc78eca4", + "nmdc:043322f3cd31d50faf4d4e0ffd1c8427", + "nmdc:6bed0fc7a7be284936c69fc1faac4be6", + "nmdc:052d3fb0080390255df5772f79e5ef2c", + "nmdc:e66a3b85c713e8766e5181da2e393984", + "nmdc:949e3b137b3a0591ed9de493ee5c530b" + ], + "was_informed_by": "gold:Gp0127653", + "id": "nmdc:676183a611b251dc6b8f0b8ca600181b", + "execution_resource": "NERSC-Cori", + "name": "Annotation Activity for nmdc:mga079y988", + "started_at_time": "2021-10-11T02:23:35Z", + "type": "nmdc:MetagenomeAnnotationActivity", + "ended_at_time": "2021-11-13T18:52:13+00:00" + } + ], + "metagenome_assembly_set": [ { "_id": { "$oid": "649b005f2ca5ee4adb139faa" @@ -33107,1616 +34554,142 @@ "scaf_n90": 43028, "scaf_l_gt50k": 58655, "scaf_n_gt50k": 1, - "scaf_pct_gt50k": 0.34576103, - "output_data_objects": [ - { - "name": "Gp0127653_Assembled contigs fasta", - "description": "Assembled contigs fasta for Gp0127653", - "data_object_type": "Assembly Contigs", - "url": "https://data.microbiomedata.org/data/nmdc:mga079y988/assembly/nmdc_mga079y988_contigs.fna", - "md5_checksum": "0f2b82878f54787c127bf03338d5c605", - "id": "nmdc:0f2b82878f54787c127bf03338d5c605", - "file_size_bytes": 18722308 - }, - { - "name": "Gp0127653_Assembled scaffold fasta", - "description": "Assembled scaffold fasta for Gp0127653", - "data_object_type": "Assembly Scaffolds", - "url": "https://data.microbiomedata.org/data/nmdc:mga079y988/assembly/nmdc_mga079y988_scaffolds.fna", - "md5_checksum": "02f8e7222e9e6f45c388a189ca66e1f9", - "id": "nmdc:02f8e7222e9e6f45c388a189ca66e1f9", - "file_size_bytes": 18575622 - }, - { - "name": "Gp0127653_Metagenome Contig Coverage Stats", - "description": "Metagenome Contig Coverage Stats for Gp0127653", - "url": "https://data.microbiomedata.org/data/nmdc:mga079y988/assembly/nmdc_mga079y988_covstats.txt", - "md5_checksum": "eea8a4b58ca07019d0050b030be3a3d1", - "id": "nmdc:eea8a4b58ca07019d0050b030be3a3d1", - "file_size_bytes": 3824141 - }, - { - "name": "Gp0127653_Assembled AGP file", - "description": "Assembled AGP file for Gp0127653", - "data_object_type": "Assembly AGP", - "url": "https://data.microbiomedata.org/data/nmdc:mga079y988/assembly/nmdc_mga079y988_assembly.agp", - "md5_checksum": "44b1ad59bd14c3367ac0fa2ca37aa057", - "id": "nmdc:44b1ad59bd14c3367ac0fa2ca37aa057", - "file_size_bytes": 3551123 - }, - { - "name": "Gp0127653_Metagenome Alignment BAM file", - "description": "Metagenome Alignment BAM file for Gp0127653", - "data_object_type": "Assembly Coverage BAM", - "url": "https://data.microbiomedata.org/data/nmdc:mga079y988/assembly/nmdc_mga079y988_pairedMapped_sorted.bam", - "md5_checksum": "ccd5ba8558a92751c59989aa81054e1a", - "id": "nmdc:ccd5ba8558a92751c59989aa81054e1a", - "file_size_bytes": 1757373378 - } + "scaf_pct_gt50k": 0.34576103 + } + ], + "metagenome_sequencing_activity_set": [], + "metaproteomics_analysis_activity_set": [], + "metatranscriptome_activity_set": [], + "nom_analysis_activity_set": [], + "omics_processing_set": [ + { + "_id": { + "$oid": "649b009773e8249959349b64" + }, + "id": "nmdc:omprc-11-p1735e67", + "name": "Riverbed sediment microbial communities from areas with vegetation nearby in Columbia River, Washington, USA - GW-RW S2_0_10", + "description": "Riverbed sediment samples were collected from an area with a lot of surrounding vegetation in a hyporheic zone (subsurface groundwater - surface water mixing zone)", + "has_input": [ + "nmdc:bsm-11-k4wa0808" + ], + "has_output": [ + "jgi:574fde937ded5e3df1ee1428" + ], + "part_of": [ + "nmdc:sty-11-aygzgv51" + ], + "add_date": "2016-01-11", + "mod_date": "2021-06-15", + "ncbi_project_name": "Riverbed sediment microbial communities from areas with vegetation nearby in Columbia River, Washington, USA - GW-RW S2_0_10", + "omics_type": { + "has_raw_value": "Metagenome" + }, + "principal_investigator": { + "has_raw_value": "James Stegen" + }, + "processing_institution": "JGI", + "type": "nmdc:OmicsProcessing", + "gold_sequencing_project_identifiers": [ + "gold:Gp0127653" ] - }, + } + ], + "reaction_activity_set": [], + "read_qc_analysis_activity_set": [ { "_id": { - "$oid": "649b005bbf2caae0415ef9be" + "$oid": "649b009d6bdd4fd20273c878" }, "has_input": [ - "nmdc:0f2b82878f54787c127bf03338d5c605" + "nmdc:84ffabc3fbd7e759cd2352ec513b89a0" ], "part_of": [ "nmdc:mga079y988" ], "git_url": "https://github.com/microbiomedata/metaG/releases/tag/0.1", "has_output": [ - "nmdc:81f16ca99f73a3314a66e6b24d23376f", - "nmdc:66bb16ef28196379647d319da50426dd", - "nmdc:1e7dac5f12cc086509ff905f7133b15a", - "nmdc:2a7343eb6364d769a1c43aa5c94daee8", - "nmdc:b2cee4d35f68d1f5731bff3af5904fa4", - "nmdc:1d45960b1ba5e27af42c736ec583ecd4", - "nmdc:3dec47a0a04865ecdcd9ed7cbc78eca4", - "nmdc:043322f3cd31d50faf4d4e0ffd1c8427", - "nmdc:6bed0fc7a7be284936c69fc1faac4be6", - "nmdc:052d3fb0080390255df5772f79e5ef2c", - "nmdc:e66a3b85c713e8766e5181da2e393984", - "nmdc:949e3b137b3a0591ed9de493ee5c530b" + "nmdc:8eec0e9c14abb418b906504d1675ecc5", + "nmdc:5d07358bbc48f25e157ffc91ea7ae3e0" ], "was_informed_by": "gold:Gp0127653", + "input_read_count": 20780788, + "output_read_bases": 2918466866, "id": "nmdc:676183a611b251dc6b8f0b8ca600181b", "execution_resource": "NERSC-Cori", - "name": "Annotation Activity for nmdc:mga079y988", + "input_read_bases": 3137898988, + "name": "Read QC Activity for nmdc:mga079y988", + "output_read_count": 19516330, "started_at_time": "2021-10-11T02:23:35Z", - "type": "nmdc:MetagenomeAnnotationActivity", - "ended_at_time": "2021-11-13T18:52:13+00:00", - "output_data_objects": [ - { - "name": "Gp0127653_Protein FAA", - "description": "Protein FAA for Gp0127653", - "data_object_type": "Annotation Amino Acid FASTA", - "url": "https://data.microbiomedata.org/data/nmdc:mga079y988/annotation/nmdc_mga079y988_proteins.faa", - "md5_checksum": "81f16ca99f73a3314a66e6b24d23376f", - "id": "nmdc:81f16ca99f73a3314a66e6b24d23376f", - "file_size_bytes": 11129064 - }, - { - "name": "Gp0127653_Structural annotation GFF file", - "description": "Structural annotation GFF file for Gp0127653", - "data_object_type": "Structural Annotation GFF", - "url": "https://data.microbiomedata.org/data/nmdc:mga079y988/annotation/nmdc_mga079y988_structural_annotation.gff", - "md5_checksum": "66bb16ef28196379647d319da50426dd", - "id": "nmdc:66bb16ef28196379647d319da50426dd", - "file_size_bytes": 8094827 - }, - { - "name": "Gp0127653_Functional annotation GFF file", - "description": "Functional annotation GFF file for Gp0127653", - "data_object_type": "Functional Annotation GFF", - "url": "https://data.microbiomedata.org/data/nmdc:mga079y988/annotation/nmdc_mga079y988_functional_annotation.gff", - "md5_checksum": "1e7dac5f12cc086509ff905f7133b15a", - "id": "nmdc:1e7dac5f12cc086509ff905f7133b15a", - "file_size_bytes": 13821021 - }, - { - "name": "Gp0127653_KO TSV file", - "description": "KO TSV file for Gp0127653", - "data_object_type": "Annotation KEGG Orthology", - "url": "https://data.microbiomedata.org/data/nmdc:mga079y988/annotation/nmdc_mga079y988_ko.tsv", - "md5_checksum": "2a7343eb6364d769a1c43aa5c94daee8", - "id": "nmdc:2a7343eb6364d769a1c43aa5c94daee8", - "file_size_bytes": 1578987 - }, - { - "name": "Gp0127653_EC TSV file", - "description": "EC TSV file for Gp0127653", - "data_object_type": "Annotation Enzyme Commission", - "url": "https://data.microbiomedata.org/data/nmdc:mga079y988/annotation/nmdc_mga079y988_ec.tsv", - "md5_checksum": "b2cee4d35f68d1f5731bff3af5904fa4", - "id": "nmdc:b2cee4d35f68d1f5731bff3af5904fa4", - "file_size_bytes": 1029657 - }, - { - "name": "Gp0127653_COG GFF file", - "description": "COG GFF file for Gp0127653", - "url": "https://data.microbiomedata.org/data/nmdc:mga079y988/annotation/nmdc_mga079y988_cog.gff", - "md5_checksum": "1d45960b1ba5e27af42c736ec583ecd4", - "id": "nmdc:1d45960b1ba5e27af42c736ec583ecd4", - "file_size_bytes": 7241411 - }, - { - "name": "Gp0127653_PFAM GFF file", - "description": "PFAM GFF file for Gp0127653", - "url": "https://data.microbiomedata.org/data/nmdc:mga079y988/annotation/nmdc_mga079y988_pfam.gff", - "md5_checksum": "3dec47a0a04865ecdcd9ed7cbc78eca4", - "id": "nmdc:3dec47a0a04865ecdcd9ed7cbc78eca4", - "file_size_bytes": 5221877 - }, - { - "name": "Gp0127653_TigrFam GFF file", - "description": "TigrFam GFF file for Gp0127653", - "url": "https://data.microbiomedata.org/data/nmdc:mga079y988/annotation/nmdc_mga079y988_tigrfam.gff", - "md5_checksum": "043322f3cd31d50faf4d4e0ffd1c8427", - "id": "nmdc:043322f3cd31d50faf4d4e0ffd1c8427", - "file_size_bytes": 472233 - }, - { - "name": "Gp0127653_SMART GFF file", - "description": "SMART GFF file for Gp0127653", - "url": "https://data.microbiomedata.org/data/nmdc:mga079y988/annotation/nmdc_mga079y988_smart.gff", - "md5_checksum": "6bed0fc7a7be284936c69fc1faac4be6", - "id": "nmdc:6bed0fc7a7be284936c69fc1faac4be6", - "file_size_bytes": 1586537 - }, - { - "name": "Gp0127653_SuperFam GFF file", - "description": "SuperFam GFF file for Gp0127653", - "url": "https://data.microbiomedata.org/data/nmdc:mga079y988/annotation/nmdc_mga079y988_supfam.gff", - "md5_checksum": "052d3fb0080390255df5772f79e5ef2c", - "id": "nmdc:052d3fb0080390255df5772f79e5ef2c", - "file_size_bytes": 9232981 - }, - { - "name": "Gp0127653_Cath FunFam GFF file", - "description": "Cath FunFam GFF file for Gp0127653", - "url": "https://data.microbiomedata.org/data/nmdc:mga079y988/annotation/nmdc_mga079y988_cath_funfam.gff", - "md5_checksum": "e66a3b85c713e8766e5181da2e393984", - "id": "nmdc:e66a3b85c713e8766e5181da2e393984", - "file_size_bytes": 6697496 - }, - { - "name": "Gp0127653_KO_EC GFF file", - "description": "KO_EC GFF file for Gp0127653", - "url": "https://data.microbiomedata.org/data/nmdc:mga079y988/annotation/nmdc_mga079y988_ko_ec.gff", - "md5_checksum": "949e3b137b3a0591ed9de493ee5c530b", - "id": "nmdc:949e3b137b3a0591ed9de493ee5c530b", - "file_size_bytes": 5035400 - } - ] - }, + "type": "nmdc:ReadQCAnalysisActivity", + "ended_at_time": "2021-11-13T18:52:13+00:00" + } + ], + "read_based_taxonomy_analysis_activity_set": [ { "_id": { - "$oid": "649b0052ec087f6bbab34720" + "$oid": "649b009bff710ae353f8cf3d" }, "has_input": [ - "nmdc:0f2b82878f54787c127bf03338d5c605", - "nmdc:ccd5ba8558a92751c59989aa81054e1a", - "nmdc:1e7dac5f12cc086509ff905f7133b15a" + "nmdc:8eec0e9c14abb418b906504d1675ecc5" + ], + "git_url": "https://github.com/microbiomedata/metaG/releases/tag/0.1", + "has_output": [ + "nmdc:fbbad3e21108a372e3d53c9ee8fc3cd5", + "nmdc:dbf03e26f7e1529762830161fe1f1906", + "nmdc:284ce1b28b8964cb525025d678277dba", + "nmdc:a379527f61806391e42b3512146013a8", + "nmdc:3659ac6c99dea0fb1385c58eac8b1335", + "nmdc:3219058371bf2f8081b2dd2b434ec145", + "nmdc:be29ebcd7358653afec7381f9ca43431", + "nmdc:a9e6ab6db23ddce02317e3e21ea3f618", + "nmdc:4c1aae1a46e51359f9146e48fff0e7f0" + ], + "was_informed_by": "gold:Gp0127653", + "id": "nmdc:676183a611b251dc6b8f0b8ca600181b", + "execution_resource": "NERSC-Cori", + "name": "ReadBased Analysis Activity for nmdc:mga079y988", + "started_at_time": "2021-10-11T02:23:35Z", + "type": "nmdc:ReadbasedAnalysis", + "ended_at_time": "2021-11-13T18:52:13+00:00" + } + ], + "study_set": [], + "field_research_site_set": [], + "collecting_biosamples_from_site_set": [], + "date_created": null, + "etl_software_version": null, + "pooling_set": [], + "read_based_analysis_activity_set": [ + { + "_id": { + "$oid": "61e719b9833bcf838a70124b" + }, + "has_input": [ + "nmdc:8eec0e9c14abb418b906504d1675ecc5" ], - "too_short_contig_num": 48540, "part_of": [ "nmdc:mga079y988" ], "git_url": "https://github.com/microbiomedata/metaG/releases/tag/0.1", "has_output": [ - "nmdc:d41d8cd98f00b204e9800998ecf8427e", - "nmdc:1029b97dba32dab780f4267f8224619f" + "nmdc:fbbad3e21108a372e3d53c9ee8fc3cd5", + "nmdc:dbf03e26f7e1529762830161fe1f1906", + "nmdc:284ce1b28b8964cb525025d678277dba", + "nmdc:a379527f61806391e42b3512146013a8", + "nmdc:3659ac6c99dea0fb1385c58eac8b1335", + "nmdc:3219058371bf2f8081b2dd2b434ec145", + "nmdc:be29ebcd7358653afec7381f9ca43431", + "nmdc:a9e6ab6db23ddce02317e3e21ea3f618", + "nmdc:4c1aae1a46e51359f9146e48fff0e7f0" ], "was_informed_by": "gold:Gp0127653", - "input_contig_num": 48931, "id": "nmdc:676183a611b251dc6b8f0b8ca600181b", "execution_resource": "NERSC-Cori", - "name": "MAGs Analysis Activity for nmdc:mga079y988", - "mags_list": [], - "unbinned_contig_num": 391, + "name": "ReadBased Analysis Activity for nmdc:mga079y988", "started_at_time": "2021-10-11T02:23:35Z", - "type": "nmdc:MAGsAnalysisActivity", - "ended_at_time": "2021-11-13T18:52:13+00:00", - "output_data_objects": [ - { - "name": "gold:Gp0452679_metabat2 bin checkm quality assessment result", - "description": "metabat2 bin checkm quality assessment result for gold:Gp0452679", - "data_object_type": "CheckM Statistics", - "url": "https://data.microbiomedata.org/data/nmdc:mga0fsjy84/MAGs/nmdc_mga0fsjy84_checkm_qa.out", - "md5_checksum": "d41d8cd98f00b204e9800998ecf8427e", - "id": "nmdc:d41d8cd98f00b204e9800998ecf8427e", - "file_size_bytes": 0 - }, - { - "name": "Gp0127653_high-quality and medium-quality bins", - "description": "high-quality and medium-quality bins for Gp0127653", - "data_object_type": "Metagenome Bins", - "url": "https://data.microbiomedata.org/data/nmdc:mga079y988/MAGs/nmdc_mga079y988_hqmq_bin.zip", - "md5_checksum": "1029b97dba32dab780f4267f8224619f", - "id": "nmdc:1029b97dba32dab780f4267f8224619f", - "file_size_bytes": 182 - } - ] + "type": "nmdc:ReadbasedAnalysis", + "ended_at_time": "2021-11-13T18:52:13+00:00" } ] - }, - { - "_id": { - "$oid": "649b009773e8249959349bd6" - }, - "id": "nmdc:omprc-11-31svgk40", - "name": "SBR_FC_N1_10-20_H2Oext_13Oct15_Leopard_1_01_4405", - "description": "High resolution MS spectra only", - "has_input": [ - "nmdc:bsm-11-6zt5jc55" - ], - "has_output": [ - "emsl:output_456429" - ], - "part_of": [ - "nmdc:sty-11-aygzgv51" - ], - "instrument_name": "12T_FTICR_B", - "omics_type": { - "has_raw_value": "Organic Matter Characterization" - }, - "processing_institution": "EMSL", - "type": "nmdc:OmicsProcessing", - "alternative_identifiers": [ - "emsl:456429" - ], - "downstream_workflow_activity_records": [] - }, - { - "_id": { - "$oid": "649b009773e8249959349bd7" - }, - "id": "nmdc:omprc-11-9z3dj481", - "name": "SBR_FC_N1_00-10_H2Oext_15Oct15_Leopard_1_01_4515", - "description": "High resolution MS spectra only", - "has_input": [ - "nmdc:bsm-11-r39v4766" - ], - "has_output": [ - "emsl:output_456425" - ], - "part_of": [ - "nmdc:sty-11-aygzgv51" - ], - "instrument_name": "12T_FTICR_B", - "omics_type": { - "has_raw_value": "Organic Matter Characterization" - }, - "processing_institution": "EMSL", - "type": "nmdc:OmicsProcessing", - "alternative_identifiers": [ - "emsl:456425" - ], - "downstream_workflow_activity_records": [] - }, - { - "_id": { - "$oid": "649b009773e8249959349bd8" - }, - "id": "nmdc:omprc-11-6dkkx108", - "name": "SBR_FC_N1_00-10_H2Oext_13Oct15_Leopard_1_01_4404", - "description": "High resolution MS spectra only", - "has_input": [ - "nmdc:bsm-11-r39v4766" - ], - "has_output": [ - "emsl:output_456424" - ], - "part_of": [ - "nmdc:sty-11-aygzgv51" - ], - "instrument_name": "12T_FTICR_B", - "omics_type": { - "has_raw_value": "Organic Matter Characterization" - }, - "processing_institution": "EMSL", - "type": "nmdc:OmicsProcessing", - "alternative_identifiers": [ - "emsl:456424" - ], - "downstream_workflow_activity_records": [] - }, - { - "_id": { - "$oid": "649b009773e8249959349bd9" - }, - "id": "nmdc:omprc-11-xv3nwb33", - "name": "SBR_FC_N1_20-30_H2Oext_15Oct15_Leopard_1_01_4517", - "description": "High resolution MS spectra only", - "has_input": [ - "nmdc:bsm-11-fg8jh149" - ], - "has_output": [ - "emsl:output_456435" - ], - "part_of": [ - "nmdc:sty-11-aygzgv51" - ], - "instrument_name": "12T_FTICR_B", - "omics_type": { - "has_raw_value": "Organic Matter Characterization" - }, - "processing_institution": "EMSL", - "type": "nmdc:OmicsProcessing", - "alternative_identifiers": [ - "emsl:456435" - ], - "downstream_workflow_activity_records": [] - }, - { - "_id": { - "$oid": "649b009773e8249959349bda" - }, - "id": "nmdc:omprc-11-47x32496", - "name": "SBR_FC_N1_20-30_H2Oext_13Oct15_Leopard_1_01_4406", - "description": "High resolution MS spectra only", - "has_input": [ - "nmdc:bsm-11-fg8jh149" - ], - "has_output": [ - "emsl:output_456434" - ], - "part_of": [ - "nmdc:sty-11-aygzgv51" - ], - "instrument_name": "12T_FTICR_B", - "omics_type": { - "has_raw_value": "Organic Matter Characterization" - }, - "processing_institution": "EMSL", - "type": "nmdc:OmicsProcessing", - "alternative_identifiers": [ - "emsl:456434" - ], - "downstream_workflow_activity_records": [] - }, - { - "_id": { - "$oid": "649b009773e8249959349bdd" - }, - "id": "nmdc:omprc-11-5kt8rj58", - "name": "SBR_FC_N1_30-40_H2Oext_13Oct15_Leopard_1_01_4407", - "description": "High resolution MS spectra only", - "has_input": [ - "nmdc:bsm-11-y9qebw84" - ], - "has_output": [ - "emsl:output_456439" - ], - "part_of": [ - "nmdc:sty-11-aygzgv51" - ], - "instrument_name": "12T_FTICR_B", - "omics_type": { - "has_raw_value": "Organic Matter Characterization" - }, - "processing_institution": "EMSL", - "type": "nmdc:OmicsProcessing", - "alternative_identifiers": [ - "emsl:456439" - ], - "downstream_workflow_activity_records": [] - }, - { - "_id": { - "$oid": "649b009773e8249959349bdf" - }, - "id": "nmdc:omprc-11-bcnnxa07", - "name": "SBR_FC_N1_40-50_H2Oext_15Oct15_Leopard_1_01_4519", - "description": "High resolution MS spectra only", - "has_input": [ - "nmdc:bsm-11-01n4q492" - ], - "has_output": [ - "emsl:output_456445" - ], - "part_of": [ - "nmdc:sty-11-aygzgv51" - ], - "instrument_name": "12T_FTICR_B", - "omics_type": { - "has_raw_value": "Organic Matter Characterization" - }, - "processing_institution": "EMSL", - "type": "nmdc:OmicsProcessing", - "alternative_identifiers": [ - "emsl:456445" - ], - "downstream_workflow_activity_records": [] - }, - { - "_id": { - "$oid": "649b009773e8249959349be0" - }, - "id": "nmdc:omprc-11-gj7th903", - "name": "SBR_FC_N1_10-20_H2Oext_15Oct15_Leopard_1_01_4516", - "description": "High resolution MS spectra only", - "has_input": [ - "nmdc:bsm-11-6zt5jc55" - ], - "has_output": [ - "emsl:output_456430" - ], - "part_of": [ - "nmdc:sty-11-aygzgv51" - ], - "instrument_name": "12T_FTICR_B", - "omics_type": { - "has_raw_value": "Organic Matter Characterization" - }, - "processing_institution": "EMSL", - "type": "nmdc:OmicsProcessing", - "alternative_identifiers": [ - "emsl:456430" - ], - "downstream_workflow_activity_records": [] - }, - { - "_id": { - "$oid": "649b009773e8249959349be1" - }, - "id": "nmdc:omprc-11-fj734v67", - "name": "SBR_FC_N1_50-60_H2Oext_13Oct15_Leopard_1_01_4409", - "description": "High resolution MS spectra only", - "has_input": [ - "nmdc:bsm-11-0pragn10" - ], - "has_output": [ - "emsl:output_456449" - ], - "part_of": [ - "nmdc:sty-11-aygzgv51" - ], - "instrument_name": "12T_FTICR_B", - "omics_type": { - "has_raw_value": "Organic Matter Characterization" - }, - "processing_institution": "EMSL", - "type": "nmdc:OmicsProcessing", - "alternative_identifiers": [ - "emsl:456449" - ], - "downstream_workflow_activity_records": [] - }, - { - "_id": { - "$oid": "649b009773e8249959349be2" - }, - "id": "nmdc:omprc-11-6ar8e259", - "name": "SBR_FC_N1_50-60_H2Oext_15Oct15_Leopard_1_01_4520", - "description": "High resolution MS spectra only", - "has_input": [ - "nmdc:bsm-11-0pragn10" - ], - "has_output": [ - "emsl:output_456450" - ], - "part_of": [ - "nmdc:sty-11-aygzgv51" - ], - "instrument_name": "12T_FTICR_B", - "omics_type": { - "has_raw_value": "Organic Matter Characterization" - }, - "processing_institution": "EMSL", - "type": "nmdc:OmicsProcessing", - "alternative_identifiers": [ - "emsl:456450" - ], - "downstream_workflow_activity_records": [] - }, - { - "_id": { - "$oid": "649b009773e8249959349be3" - }, - "id": "nmdc:omprc-11-vcc89f39", - "name": "SBR_FC_N2_30-40_H2Oext_13Oct15_Leopard_1_01_4411", - "description": "High resolution MS spectra only", - "has_input": [ - "nmdc:bsm-11-1pewj990" - ], - "has_output": [ - "emsl:output_456459" - ], - "part_of": [ - "nmdc:sty-11-aygzgv51" - ], - "instrument_name": "12T_FTICR_B", - "omics_type": { - "has_raw_value": "Organic Matter Characterization" - }, - "processing_institution": "EMSL", - "type": "nmdc:OmicsProcessing", - "alternative_identifiers": [ - "emsl:456459" - ], - "downstream_workflow_activity_records": [] - }, - { - "_id": { - "$oid": "649b009773e8249959349be4" - }, - "id": "nmdc:omprc-11-k76emy12", - "name": "SBR_FC_N1_30-40_H2Oext_15Oct15_Leopard_1_01_4518", - "description": "High resolution MS spectra only", - "has_input": [ - "nmdc:bsm-11-y9qebw84" - ], - "has_output": [ - "emsl:output_456440" - ], - "part_of": [ - "nmdc:sty-11-aygzgv51" - ], - "instrument_name": "12T_FTICR_B", - "omics_type": { - "has_raw_value": "Organic Matter Characterization" - }, - "processing_institution": "EMSL", - "type": "nmdc:OmicsProcessing", - "alternative_identifiers": [ - "emsl:456440" - ], - "downstream_workflow_activity_records": [] - }, - { - "_id": { - "$oid": "649b009773e8249959349be5" - }, - "id": "nmdc:omprc-11-15c0g775", - "name": "SBR_FC_N2_30-40_H2Oext_15Oct15_Leopard_1_01_4522", - "description": "High resolution MS spectra only", - "has_input": [ - "nmdc:bsm-11-1pewj990" - ], - "has_output": [ - "emsl:output_456460" - ], - "part_of": [ - "nmdc:sty-11-aygzgv51" - ], - "instrument_name": "12T_FTICR_B", - "omics_type": { - "has_raw_value": "Organic Matter Characterization" - }, - "processing_institution": "EMSL", - "type": "nmdc:OmicsProcessing", - "alternative_identifiers": [ - "emsl:456460" - ], - "downstream_workflow_activity_records": [] - }, - { - "_id": { - "$oid": "649b009773e8249959349be6" - }, - "id": "nmdc:omprc-11-pj9h5941", - "name": "SBR_FC_N2_50-60_H2Oext_13Oct15_Leopard_1_01_4413", - "description": "High resolution MS spectra only", - "has_input": [ - "nmdc:bsm-11-567met04" - ], - "has_output": [ - "emsl:output_456469" - ], - "part_of": [ - "nmdc:sty-11-aygzgv51" - ], - "instrument_name": "12T_FTICR_B", - "omics_type": { - "has_raw_value": "Organic Matter Characterization" - }, - "processing_institution": "EMSL", - "type": "nmdc:OmicsProcessing", - "alternative_identifiers": [ - "emsl:456469" - ], - "downstream_workflow_activity_records": [] - }, - { - "_id": { - "$oid": "649b009773e8249959349be7" - }, - "id": "nmdc:omprc-11-hpwqy918", - "name": "SBR_FC_N2_00-30_H2Oext_15Oct15_Leopard_1_01_4521", - "description": "High resolution MS spectra only", - "has_input": [ - "nmdc:bsm-11-cjxybn89" - ], - "has_output": [ - "emsl:output_456455" - ], - "part_of": [ - "nmdc:sty-11-aygzgv51" - ], - "instrument_name": "12T_FTICR_B", - "omics_type": { - "has_raw_value": "Organic Matter Characterization" - }, - "processing_institution": "EMSL", - "type": "nmdc:OmicsProcessing", - "alternative_identifiers": [ - "emsl:456455" - ], - "downstream_workflow_activity_records": [] - }, - { - "_id": { - "$oid": "649b009773e8249959349be8" - }, - "id": "nmdc:omprc-11-wgp5nv05", - "name": "SBR_FC_N2_50-60_H2Oext_15Oct15_Leopard_1_01_4524", - "description": "High resolution MS spectra only", - "has_input": [ - "nmdc:bsm-11-567met04" - ], - "has_output": [ - "emsl:output_456470" - ], - "part_of": [ - "nmdc:sty-11-aygzgv51" - ], - "instrument_name": "12T_FTICR_B", - "omics_type": { - "has_raw_value": "Organic Matter Characterization" - }, - "processing_institution": "EMSL", - "type": "nmdc:OmicsProcessing", - "alternative_identifiers": [ - "emsl:456470" - ], - "downstream_workflow_activity_records": [] - }, - { - "_id": { - "$oid": "649b009773e8249959349be9" - }, - "id": "nmdc:omprc-11-9bn99f79", - "name": "SBR_FC_N2_40-50_H2Oext_13Oct15_Leopard_1_01_4412", - "description": "High resolution MS spectra only", - "has_input": [ - "nmdc:bsm-11-93raqf43" - ], - "has_output": [ - "emsl:output_456464" - ], - "part_of": [ - "nmdc:sty-11-aygzgv51" - ], - "instrument_name": "12T_FTICR_B", - "omics_type": { - "has_raw_value": "Organic Matter Characterization" - }, - "processing_institution": "EMSL", - "type": "nmdc:OmicsProcessing", - "alternative_identifiers": [ - "emsl:456464" - ], - "downstream_workflow_activity_records": [] - }, - { - "_id": { - "$oid": "649b009773e8249959349bea" - }, - "id": "nmdc:omprc-11-5m63ha78", - "name": "SBR_FC_N3_00-10_H2Oext_13Oct15_Leopard_1_01_4414", - "description": "High resolution MS spectra only", - "has_input": [ - "nmdc:bsm-11-g0c8gb14" - ], - "has_output": [ - "emsl:output_456474" - ], - "part_of": [ - "nmdc:sty-11-aygzgv51" - ], - "instrument_name": "12T_FTICR_B", - "omics_type": { - "has_raw_value": "Organic Matter Characterization" - }, - "processing_institution": "EMSL", - "type": "nmdc:OmicsProcessing", - "alternative_identifiers": [ - "emsl:456474" - ], - "downstream_workflow_activity_records": [] - }, - { - "_id": { - "$oid": "649b009773e8249959349beb" - }, - "id": "nmdc:omprc-11-db4my476", - "name": "SBR_FC_N2_00-30_H2Oext_13Oct15_Leopard_1_01_4410", - "description": "High resolution MS spectra only", - "has_input": [ - "nmdc:bsm-11-cjxybn89" - ], - "has_output": [ - "emsl:output_456454" - ], - "part_of": [ - "nmdc:sty-11-aygzgv51" - ], - "instrument_name": "12T_FTICR_B", - "omics_type": { - "has_raw_value": "Organic Matter Characterization" - }, - "processing_institution": "EMSL", - "type": "nmdc:OmicsProcessing", - "alternative_identifiers": [ - "emsl:456454" - ], - "downstream_workflow_activity_records": [] - }, - { - "_id": { - "$oid": "649b009773e8249959349bec" - }, - "id": "nmdc:omprc-11-gb2h5750", - "name": "SBR_FC_N3_00-10_H2Oext_15Oct15_Leopard_1_01_4525", - "description": "High resolution MS spectra only", - "has_input": [ - "nmdc:bsm-11-g0c8gb14" - ], - "has_output": [ - "emsl:output_456475" - ], - "part_of": [ - "nmdc:sty-11-aygzgv51" - ], - "instrument_name": "12T_FTICR_B", - "omics_type": { - "has_raw_value": "Organic Matter Characterization" - }, - "processing_institution": "EMSL", - "type": "nmdc:OmicsProcessing", - "alternative_identifiers": [ - "emsl:456475" - ], - "downstream_workflow_activity_records": [] - }, - { - "_id": { - "$oid": "649b009773e8249959349bed" - }, - "id": "nmdc:omprc-11-z2578j17", - "name": "SBR_FC_N2_40-50_H2Oext_15Oct15_Leopard_1_01_4523", - "description": "High resolution MS spectra only", - "has_input": [ - "nmdc:bsm-11-93raqf43" - ], - "has_output": [ - "emsl:output_456465" - ], - "part_of": [ - "nmdc:sty-11-aygzgv51" - ], - "instrument_name": "12T_FTICR_B", - "omics_type": { - "has_raw_value": "Organic Matter Characterization" - }, - "processing_institution": "EMSL", - "type": "nmdc:OmicsProcessing", - "alternative_identifiers": [ - "emsl:456465" - ], - "downstream_workflow_activity_records": [] - }, - { - "_id": { - "$oid": "649b009773e8249959349bee" - }, - "id": "nmdc:omprc-11-a53tkk54", - "name": "SBR_FC_N3_20-30_H2Oext_13Oct15_Leopard_1_01_4416", - "description": "High resolution MS spectra only", - "has_input": [ - "nmdc:bsm-11-3z48kw62" - ], - "has_output": [ - "emsl:output_456484" - ], - "part_of": [ - "nmdc:sty-11-aygzgv51" - ], - "instrument_name": "12T_FTICR_B", - "omics_type": { - "has_raw_value": "Organic Matter Characterization" - }, - "processing_institution": "EMSL", - "type": "nmdc:OmicsProcessing", - "alternative_identifiers": [ - "emsl:456484" - ], - "downstream_workflow_activity_records": [] - }, - { - "_id": { - "$oid": "649b009773e8249959349bef" - }, - "id": "nmdc:omprc-11-qkyx2m56", - "name": "SBR_FC_N3_10-20_H2Oext_15Oct15_Leopard_1_01_4526", - "description": "High resolution MS spectra only", - "has_input": [ - "nmdc:bsm-11-6aq1va61" - ], - "has_output": [ - "emsl:output_456480" - ], - "part_of": [ - "nmdc:sty-11-aygzgv51" - ], - "instrument_name": "12T_FTICR_B", - "omics_type": { - "has_raw_value": "Organic Matter Characterization" - }, - "processing_institution": "EMSL", - "type": "nmdc:OmicsProcessing", - "alternative_identifiers": [ - "emsl:456480" - ], - "downstream_workflow_activity_records": [] - }, - { - "_id": { - "$oid": "649b009773e8249959349bf0" - }, - "id": "nmdc:omprc-11-dz40jm83", - "name": "SBR_FC_N3_10-20_H2Oext_13Oct15_Leopard_1_01_4415", - "description": "High resolution MS spectra only", - "has_input": [ - "nmdc:bsm-11-6aq1va61" - ], - "has_output": [ - "emsl:output_456479" - ], - "part_of": [ - "nmdc:sty-11-aygzgv51" - ], - "instrument_name": "12T_FTICR_B", - "omics_type": { - "has_raw_value": "Organic Matter Characterization" - }, - "processing_institution": "EMSL", - "type": "nmdc:OmicsProcessing", - "alternative_identifiers": [ - "emsl:456479" - ], - "downstream_workflow_activity_records": [] - }, - { - "_id": { - "$oid": "649b009773e8249959349bf1" - }, - "id": "nmdc:omprc-11-dmk5wj06", - "name": "SBR_FC_N3_20-30_H2Oext_15Oct15_Leopard_1_01_4527", - "description": "High resolution MS spectra only", - "has_input": [ - "nmdc:bsm-11-3z48kw62" - ], - "has_output": [ - "emsl:output_456485" - ], - "part_of": [ - "nmdc:sty-11-aygzgv51" - ], - "instrument_name": "12T_FTICR_B", - "omics_type": { - "has_raw_value": "Organic Matter Characterization" - }, - "processing_institution": "EMSL", - "type": "nmdc:OmicsProcessing", - "alternative_identifiers": [ - "emsl:456485" - ], - "downstream_workflow_activity_records": [] - }, - { - "_id": { - "$oid": "649b009773e8249959349bf2" - }, - "id": "nmdc:omprc-11-nvhp3684", - "name": "SBR_FC_N3_40-50_H2Oext_13Oct15_Leopard_1_01_4418", - "description": "High resolution MS spectra only", - "has_input": [ - "nmdc:bsm-11-wvxw7m55" - ], - "has_output": [ - "emsl:output_456494" - ], - "part_of": [ - "nmdc:sty-11-aygzgv51" - ], - "instrument_name": "12T_FTICR_B", - "omics_type": { - "has_raw_value": "Organic Matter Characterization" - }, - "processing_institution": "EMSL", - "type": "nmdc:OmicsProcessing", - "alternative_identifiers": [ - "emsl:456494" - ], - "downstream_workflow_activity_records": [] - }, - { - "_id": { - "$oid": "649b009773e8249959349bf3" - }, - "id": "nmdc:omprc-11-x9b1vg36", - "name": "SBR_FC_N1_40-50_H2Oext_13Oct15_Leopard_1_01_4408", - "description": "High resolution MS spectra only", - "has_input": [ - "nmdc:bsm-11-01n4q492" - ], - "has_output": [ - "emsl:output_456444" - ], - "part_of": [ - "nmdc:sty-11-aygzgv51" - ], - "instrument_name": "12T_FTICR_B", - "omics_type": { - "has_raw_value": "Organic Matter Characterization" - }, - "processing_institution": "EMSL", - "type": "nmdc:OmicsProcessing", - "alternative_identifiers": [ - "emsl:456444" - ], - "downstream_workflow_activity_records": [] - }, - { - "_id": { - "$oid": "649b009773e8249959349bf4" - }, - "id": "nmdc:omprc-11-mfsr0a96", - "name": "SBR_FC_N3_40-50_H2Oext_15Oct15_Leopard_1_01_4529", - "description": "High resolution MS spectra only", - "has_input": [ - "nmdc:bsm-11-wvxw7m55" - ], - "has_output": [ - "emsl:output_456495" - ], - "part_of": [ - "nmdc:sty-11-aygzgv51" - ], - "instrument_name": "12T_FTICR_B", - "omics_type": { - "has_raw_value": "Organic Matter Characterization" - }, - "processing_institution": "EMSL", - "type": "nmdc:OmicsProcessing", - "alternative_identifiers": [ - "emsl:456495" - ], - "downstream_workflow_activity_records": [] - }, - { - "_id": { - "$oid": "649b009773e8249959349bf5" - }, - "id": "nmdc:omprc-11-vr9c4f75", - "name": "SBR_FC_N3_30-40_H2Oext_13Oct15_Leopard_1_01_4417", - "description": "High resolution MS spectra only", - "has_input": [ - "nmdc:bsm-11-5cpjs440" - ], - "has_output": [ - "emsl:output_456489" - ], - "part_of": [ - "nmdc:sty-11-aygzgv51" - ], - "instrument_name": "12T_FTICR_B", - "omics_type": { - "has_raw_value": "Organic Matter Characterization" - }, - "processing_institution": "EMSL", - "type": "nmdc:OmicsProcessing", - "alternative_identifiers": [ - "emsl:456489" - ], - "downstream_workflow_activity_records": [] - }, - { - "_id": { - "$oid": "649b009773e8249959349bf6" - }, - "id": "nmdc:omprc-11-9r543r49", - "name": "SBR_FC_N3_30-40_H2Oext_15Oct15_Leopard_1_01_4528", - "description": "High resolution MS spectra only", - "has_input": [ - "nmdc:bsm-11-5cpjs440" - ], - "has_output": [ - "emsl:output_456490" - ], - "part_of": [ - "nmdc:sty-11-aygzgv51" - ], - "instrument_name": "12T_FTICR_B", - "omics_type": { - "has_raw_value": "Organic Matter Characterization" - }, - "processing_institution": "EMSL", - "type": "nmdc:OmicsProcessing", - "alternative_identifiers": [ - "emsl:456490" - ], - "downstream_workflow_activity_records": [] - }, - { - "_id": { - "$oid": "649b009773e8249959349bf7" - }, - "id": "nmdc:omprc-11-2vfy3s61", - "name": "SBR_FC_N3_50-60_H2Oext_15Oct15_Leopard_1_01_4530", - "description": "High resolution MS spectra only", - "has_input": [ - "nmdc:bsm-11-8saj5c05" - ], - "has_output": [ - "emsl:output_456500" - ], - "part_of": [ - "nmdc:sty-11-aygzgv51" - ], - "instrument_name": "12T_FTICR_B", - "omics_type": { - "has_raw_value": "Organic Matter Characterization" - }, - "processing_institution": "EMSL", - "type": "nmdc:OmicsProcessing", - "alternative_identifiers": [ - "emsl:456500" - ], - "downstream_workflow_activity_records": [] - }, - { - "_id": { - "$oid": "649b009773e8249959349bf8" - }, - "id": "nmdc:omprc-11-c1ngnw86", - "name": "SBR_FC_S1_00-10_H2Oext_15Oct15_Leopard_1_01_4531", - "description": "High resolution MS spectra only", - "has_input": [ - "nmdc:bsm-11-m0hn5p98" - ], - "has_output": [ - "emsl:output_456505" - ], - "part_of": [ - "nmdc:sty-11-aygzgv51" - ], - "instrument_name": "12T_FTICR_B", - "omics_type": { - "has_raw_value": "Organic Matter Characterization" - }, - "processing_institution": "EMSL", - "type": "nmdc:OmicsProcessing", - "alternative_identifiers": [ - "emsl:456505" - ], - "downstream_workflow_activity_records": [] - }, - { - "_id": { - "$oid": "649b009773e8249959349bf9" - }, - "id": "nmdc:omprc-11-excmek32", - "name": "SBR_FC_S1_10-20_H2Oext_15Oct15_Leopard_1_01_4532", - "description": "High resolution MS spectra only", - "has_input": [ - "nmdc:bsm-11-br7th280" - ], - "has_output": [ - "emsl:output_456510" - ], - "part_of": [ - "nmdc:sty-11-aygzgv51" - ], - "instrument_name": "12T_FTICR_B", - "omics_type": { - "has_raw_value": "Organic Matter Characterization" - }, - "processing_institution": "EMSL", - "type": "nmdc:OmicsProcessing", - "alternative_identifiers": [ - "emsl:456510" - ], - "downstream_workflow_activity_records": [] - }, - { - "_id": { - "$oid": "649b009773e8249959349bfa" - }, - "id": "nmdc:omprc-11-mefm4e20", - "name": "SBR_FC_S1_00-10_H2Oext_13Oct15_Leopard_1_01_4420", - "description": "High resolution MS spectra only", - "has_input": [ - "nmdc:bsm-11-m0hn5p98" - ], - "has_output": [ - "emsl:output_456504" - ], - "part_of": [ - "nmdc:sty-11-aygzgv51" - ], - "instrument_name": "12T_FTICR_B", - "omics_type": { - "has_raw_value": "Organic Matter Characterization" - }, - "processing_institution": "EMSL", - "type": "nmdc:OmicsProcessing", - "alternative_identifiers": [ - "emsl:456504" - ], - "downstream_workflow_activity_records": [] - }, - { - "_id": { - "$oid": "649b009773e8249959349bfb" - }, - "id": "nmdc:omprc-11-fzgdcp80", - "name": "SBR_FC_S1_20-30_H2Oext_13Oct15_Leopard_1_01_4422", - "description": "High resolution MS spectra only", - "has_input": [ - "nmdc:bsm-11-qyz13338" - ], - "has_output": [ - "emsl:output_456514" - ], - "part_of": [ - "nmdc:sty-11-aygzgv51" - ], - "instrument_name": "12T_FTICR_B", - "omics_type": { - "has_raw_value": "Organic Matter Characterization" - }, - "processing_institution": "EMSL", - "type": "nmdc:OmicsProcessing", - "alternative_identifiers": [ - "emsl:456514" - ], - "downstream_workflow_activity_records": [] - }, - { - "_id": { - "$oid": "649b009773e8249959349bfc" - }, - "id": "nmdc:omprc-11-3akts752", - "name": "SBR_FC_N3_50-60_H2Oext_13Oct15_Leopard_1_01_4419", - "description": "High resolution MS spectra only", - "has_input": [ - "nmdc:bsm-11-8saj5c05" - ], - "has_output": [ - "emsl:output_456499" - ], - "part_of": [ - "nmdc:sty-11-aygzgv51" - ], - "instrument_name": "12T_FTICR_B", - "omics_type": { - "has_raw_value": "Organic Matter Characterization" - }, - "processing_institution": "EMSL", - "type": "nmdc:OmicsProcessing", - "alternative_identifiers": [ - "emsl:456499" - ], - "downstream_workflow_activity_records": [] - }, - { - "_id": { - "$oid": "649b009773e8249959349bfd" - }, - "id": "nmdc:omprc-11-6sq6j166", - "name": "SBR_FC_S1_10-20_H2Oext_13Oct15_Leopard_1_01_4421", - "description": "High resolution MS spectra only", - "has_input": [ - "nmdc:bsm-11-br7th280" - ], - "has_output": [ - "emsl:output_456509" - ], - "part_of": [ - "nmdc:sty-11-aygzgv51" - ], - "instrument_name": "12T_FTICR_B", - "omics_type": { - "has_raw_value": "Organic Matter Characterization" - }, - "processing_institution": "EMSL", - "type": "nmdc:OmicsProcessing", - "alternative_identifiers": [ - "emsl:456509" - ], - "downstream_workflow_activity_records": [] - }, - { - "_id": { - "$oid": "649b009773e8249959349bfe" - }, - "id": "nmdc:omprc-11-3awbyv03", - "name": "SBR_FC_S1_30-40_H2Oext_13Oct15_Leopard_1_01_4423", - "description": "High resolution MS spectra only", - "has_input": [ - "nmdc:bsm-11-pzv0cf30" - ], - "has_output": [ - "emsl:output_456519" - ], - "part_of": [ - "nmdc:sty-11-aygzgv51" - ], - "instrument_name": "12T_FTICR_B", - "omics_type": { - "has_raw_value": "Organic Matter Characterization" - }, - "processing_institution": "EMSL", - "type": "nmdc:OmicsProcessing", - "alternative_identifiers": [ - "emsl:456519" - ], - "downstream_workflow_activity_records": [] - }, - { - "_id": { - "$oid": "649b009773e8249959349bff" - }, - "id": "nmdc:omprc-11-4kgm5w41", - "name": "SBR_FC_S1_20-30_H2Oext_15Oct15_Leopard_1_01_4533", - "description": "High resolution MS spectra only", - "has_input": [ - "nmdc:bsm-11-qyz13338" - ], - "has_output": [ - "emsl:output_456515" - ], - "part_of": [ - "nmdc:sty-11-aygzgv51" - ], - "instrument_name": "12T_FTICR_B", - "omics_type": { - "has_raw_value": "Organic Matter Characterization" - }, - "processing_institution": "EMSL", - "type": "nmdc:OmicsProcessing", - "alternative_identifiers": [ - "emsl:456515" - ], - "downstream_workflow_activity_records": [] - }, - { - "_id": { - "$oid": "649b009773e8249959349c00" - }, - "id": "nmdc:omprc-11-6xm1va42", - "name": "SBR_FC_S1_50-60_H2Oext_13Oct15_Leopard_1_01_4425", - "description": "High resolution MS spectra only", - "has_input": [ - "nmdc:bsm-11-pnzwey18" - ], - "has_output": [ - "emsl:output_456529" - ], - "part_of": [ - "nmdc:sty-11-aygzgv51" - ], - "instrument_name": "12T_FTICR_B", - "omics_type": { - "has_raw_value": "Organic Matter Characterization" - }, - "processing_institution": "EMSL", - "type": "nmdc:OmicsProcessing", - "alternative_identifiers": [ - "emsl:456529" - ], - "downstream_workflow_activity_records": [] - }, - { - "_id": { - "$oid": "649b009773e8249959349c01" - }, - "id": "nmdc:omprc-11-t2yd9m69", - "name": "SBR_FC_S2_00-10_H2Oext_15Oct15_Leopard_1_01_4537", - "description": "High resolution MS spectra only", - "has_input": [ - "nmdc:bsm-11-60ayn103" - ], - "has_output": [ - "emsl:output_456535" - ], - "part_of": [ - "nmdc:sty-11-aygzgv51" - ], - "instrument_name": "12T_FTICR_B", - "omics_type": { - "has_raw_value": "Organic Matter Characterization" - }, - "processing_institution": "EMSL", - "type": "nmdc:OmicsProcessing", - "alternative_identifiers": [ - "emsl:456535" - ], - "downstream_workflow_activity_records": [] - }, - { - "_id": { - "$oid": "649b009773e8249959349c02" - }, - "id": "nmdc:omprc-11-9d9fxj83", - "name": "SBR_FC_S2_10-20_H2Oext_15Oct15_Leopard_1_01_4538", - "description": "High resolution MS spectra only", - "has_input": [ - "nmdc:bsm-11-e3hp0a71" - ], - "has_output": [ - "emsl:output_456540" - ], - "part_of": [ - "nmdc:sty-11-aygzgv51" - ], - "instrument_name": "12T_FTICR_B", - "omics_type": { - "has_raw_value": "Organic Matter Characterization" - }, - "processing_institution": "EMSL", - "type": "nmdc:OmicsProcessing", - "alternative_identifiers": [ - "emsl:456540" - ], - "downstream_workflow_activity_records": [] - }, - { - "_id": { - "$oid": "649b009773e8249959349c03" - }, - "id": "nmdc:omprc-11-1rkm8b52", - "name": "SBR_FC_S2_10-20_H2Oext_13Oct15_Leopard_1_01_4427", - "description": "High resolution MS spectra only", - "has_input": [ - "nmdc:bsm-11-e3hp0a71" - ], - "has_output": [ - "emsl:output_456539" - ], - "part_of": [ - "nmdc:sty-11-aygzgv51" - ], - "instrument_name": "12T_FTICR_B", - "omics_type": { - "has_raw_value": "Organic Matter Characterization" - }, - "processing_institution": "EMSL", - "type": "nmdc:OmicsProcessing", - "alternative_identifiers": [ - "emsl:456539" - ], - "downstream_workflow_activity_records": [] - }, - { - "_id": { - "$oid": "649b009773e8249959349c04" - }, - "id": "nmdc:omprc-11-bzhsj954", - "name": "SBR_FC_S2_30-40_H2Oext_15Oct15_Leopard_1_01_4540", - "description": "High resolution MS spectra only", - "has_input": [ - "nmdc:bsm-11-73qw2q23" - ], - "has_output": [ - "emsl:output_456550" - ], - "part_of": [ - "nmdc:sty-11-aygzgv51" - ], - "instrument_name": "12T_FTICR_B", - "omics_type": { - "has_raw_value": "Organic Matter Characterization" - }, - "processing_institution": "EMSL", - "type": "nmdc:OmicsProcessing", - "alternative_identifiers": [ - "emsl:456550" - ], - "downstream_workflow_activity_records": [] - }, - { - "_id": { - "$oid": "649b009773e8249959349c05" - }, - "id": "nmdc:omprc-11-ae716322", - "name": "SBR_FC_S2_20-30_H2Oext_13Oct15_Leopard_1_01_4428", - "description": "High resolution MS spectra only", - "has_input": [ - "nmdc:bsm-11-pknfsy79" - ], - "has_output": [ - "emsl:output_456544" - ], - "part_of": [ - "nmdc:sty-11-aygzgv51" - ], - "instrument_name": "12T_FTICR_B", - "omics_type": { - "has_raw_value": "Organic Matter Characterization" - }, - "processing_institution": "EMSL", - "type": "nmdc:OmicsProcessing", - "alternative_identifiers": [ - "emsl:456544" - ], - "downstream_workflow_activity_records": [] - }, - { - "_id": { - "$oid": "649b009773e8249959349c06" - }, - "id": "nmdc:omprc-11-ryy9gp08", - "name": "SBR_FC_S3_00-10_H2Oext_15Oct15_Leopard_1_01_4543", - "description": "High resolution MS spectra only", - "has_input": [ - "nmdc:bsm-11-rs67sh23" - ], - "has_output": [ - "emsl:output_456565" - ], - "part_of": [ - "nmdc:sty-11-aygzgv51" - ], - "instrument_name": "12T_FTICR_B", - "omics_type": { - "has_raw_value": "Organic Matter Characterization" - }, - "processing_institution": "EMSL", - "type": "nmdc:OmicsProcessing", - "alternative_identifiers": [ - "emsl:456565" - ], - "downstream_workflow_activity_records": [] - }, - { - "_id": { - "$oid": "649b009773e8249959349c07" - }, - "id": "nmdc:omprc-11-qx18h332", - "name": "SBR_FC_S3_10-20_H2Oext_13Oct15_Leopard_1_01_4433", - "description": "High resolution MS spectra only", - "has_input": [ - "nmdc:bsm-11-83ghpk10" - ], - "has_output": [ - "emsl:output_456569" - ], - "part_of": [ - "nmdc:sty-11-aygzgv51" - ], - "instrument_name": "12T_FTICR_B", - "omics_type": { - "has_raw_value": "Organic Matter Characterization" - }, - "processing_institution": "EMSL", - "type": "nmdc:OmicsProcessing", - "alternative_identifiers": [ - "emsl:456569" - ], - "downstream_workflow_activity_records": [] - }, - { - "_id": { - "$oid": "649b009773e8249959349c08" - }, - "id": "nmdc:omprc-11-k3e66918", - "name": "SBR_FC_S3_00-10_H2Oext_13Oct15_Leopard_1_01_4432", - "description": "High resolution MS spectra only", - "has_input": [ - "nmdc:bsm-11-rs67sh23" - ], - "has_output": [ - "emsl:output_456564" - ], - "part_of": [ - "nmdc:sty-11-aygzgv51" - ], - "instrument_name": "12T_FTICR_B", - "omics_type": { - "has_raw_value": "Organic Matter Characterization" - }, - "processing_institution": "EMSL", - "type": "nmdc:OmicsProcessing", - "alternative_identifiers": [ - "emsl:456564" - ], - "downstream_workflow_activity_records": [] - }, - { - "_id": { - "$oid": "649b009773e8249959349c09" - }, - "id": "nmdc:omprc-11-nq83nd29", - "name": "SBR_FC_S3_20-30_H2Oext_13Oct15_Leopard_1_01_4434", - "description": "High resolution MS spectra only", - "has_input": [ - "nmdc:bsm-11-5p6f9e18" - ], - "has_output": [ - "emsl:output_456574" - ], - "part_of": [ - "nmdc:sty-11-aygzgv51" - ], - "instrument_name": "12T_FTICR_B", - "omics_type": { - "has_raw_value": "Organic Matter Characterization" - }, - "processing_institution": "EMSL", - "type": "nmdc:OmicsProcessing", - "alternative_identifiers": [ - "emsl:456574" - ], - "downstream_workflow_activity_records": [] - }, - { - "_id": { - "$oid": "649b009773e8249959349c0a" - }, - "id": "nmdc:omprc-11-xsnaaj68", - "name": "SBR_FC_S2_20-30_H2Oext_15Oct15_Leopard_1_01_4539", - "description": "High resolution MS spectra only", - "has_input": [ - "nmdc:bsm-11-pknfsy79" - ], - "has_output": [ - "emsl:output_456545" - ], - "part_of": [ - "nmdc:sty-11-aygzgv51" - ], - "instrument_name": "12T_FTICR_B", - "omics_type": { - "has_raw_value": "Organic Matter Characterization" - }, - "processing_institution": "EMSL", - "type": "nmdc:OmicsProcessing", - "alternative_identifiers": [ - "emsl:456545" - ], - "downstream_workflow_activity_records": [] - }, - { - "_id": { - "$oid": "649b009773e8249959349c0b" - }, - "id": "nmdc:omprc-11-409hwb80", - "name": "SBR_FC_S3_30-40_H2Oext_13Oct15_Leopard_1_01_4435", - "description": "High resolution MS spectra only", - "has_input": [ - "nmdc:bsm-11-ea7re871" - ], - "has_output": [ - "emsl:output_456579" - ], - "part_of": [ - "nmdc:sty-11-aygzgv51" - ], - "instrument_name": "12T_FTICR_B", - "omics_type": { - "has_raw_value": "Organic Matter Characterization" - }, - "processing_institution": "EMSL", - "type": "nmdc:OmicsProcessing", - "alternative_identifiers": [ - "emsl:456579" - ], - "downstream_workflow_activity_records": [] } ] \ No newline at end of file diff --git a/nmdc_automation/re_iding/scripts/rebuild_metagenome_workflow_records.py b/nmdc_automation/re_iding/scripts/rebuild_metagenome_workflow_records.py index d7a4442b..04e78896 100644 --- a/nmdc_automation/re_iding/scripts/rebuild_metagenome_workflow_records.py +++ b/nmdc_automation/re_iding/scripts/rebuild_metagenome_workflow_records.py @@ -13,6 +13,7 @@ from nmdc_automation.api import NmdcRuntimeUserApi from nmdc_automation.config import Config +import nmdc_schema.nmdc as nmdc GOLD_STUDY_ID = "gold:Gs0114663" STUDY_ID = "nmdc:sty-11-aygzgv51" @@ -82,75 +83,52 @@ def rebuild_workflow_records(study_id: str, site_config: bool): f"Retrieved {len(omics_processing_records)} OmicsProcessing records for study {study_id}" ) - workflow_records_per_study = [] - + retrieved_databases = [] # 2. For each OmicsProcessing record, find the legacy identifier: for omics_processing_record in omics_processing_records: + db = nmdc.Database() logging.info(f"omics_processing_record: " f"{omics_processing_record['id']}") legacy_id = _get_legacy_id(omics_processing_record) logging.info(f"legacy_id: {legacy_id}") - omics_processing_record["downstream_workflow_activity_records"] = [] - if (omics_processing_record["omics_type"]["has_raw_value"] != "Metagenome"): logging.info(f"omics_processing_record {omics_processing_record['id']} " f"is not a Metagenome") continue - - # reads QC records - # Downstream WorkflowExecutionActivity records depend on the `has_output` - # data object of the ReadQcAnalysisActivity record. - set_name = "read_qc_analysis_activity_set" - read_qc_records = query_api_client.get_workflow_activity_informed_by( - set_name, legacy_id - ) - # Add the data objects referenced by the `has_output` property - for record in read_qc_records: - record["output_data_objects"] = [] - for data_object_id in record["has_output"]: - data_object_record = query_api_client.get_data_object_by_id( - data_object_id - ) - record["output_data_objects"].append(data_object_record) - - - logging.info(f"Found {len(read_qc_records)} read_qc_records") - omics_processing_record[ - "downstream_workflow_activity_records"].extend(read_qc_records) + db.omics_processing_set.append(omics_processing_record) # downstream workflow activity sets - taxonomy_records, read_based_analysis_records, metagenome_assembly_records, \ - metagenome_annotation_records, mags_records = [], [], [], [], [] + (read_qc_records, taxonomy_records, read_based_analysis_records, + metagenome_assembly_records, + metagenome_annotation_records, mags_records) = [], [], [], [], [], [] downstream_workflow_activity_sets = { + "read_qc_analysis_activity_set": read_qc_records, "read_based_taxonomy_analysis_activity_set": taxonomy_records, "read_based_analysis_activity_set": read_based_analysis_records, "metagenome_assembly_set": metagenome_assembly_records, "metagenome_annotation_activity_set": metagenome_annotation_records, "mags_activity_set": mags_records, } - for set_name, records in downstream_workflow_activity_sets.items(): records = query_api_client.get_workflow_activity_informed_by( set_name, legacy_id ) + db.__setattr__(set_name, records) # Add the data objects referenced by the `has_output` property for record in records: - record["output_data_objects"] = [] for data_object_id in record["has_output"]: data_object_record = query_api_client.get_data_object_by_id( data_object_id ) - record["output_data_objects"].append(data_object_record) - omics_processing_record[ - "downstream_workflow_activity_records"].extend(records) - workflow_records_per_study.append(records) - logging.info(f"Found {len(records)} {set_name} records") + db.data_object_set.append(data_object_record) + + retrieved_databases.append(db) with open(f"{study_id}_assocated_record_dump.json", 'w') as json_file: - json.dump(omics_processing_records, json_file, indent=4) + json.dump([o.__dict__ for o in retrieved_databases], json_file, indent=4) if __name__ == "__main__": From 0538575103dd2b55349a1ee5274e0f86974ef201 Mon Sep 17 00:00:00 2001 From: Michal Babinski Date: Wed, 1 Nov 2023 14:07:39 -0700 Subject: [PATCH 20/91] added dry run for reads qc transformation --- nmdc_automation/re_iding/re_id_process.py | 132 +++++++++++++++------- 1 file changed, 93 insertions(+), 39 deletions(-) diff --git a/nmdc_automation/re_iding/re_id_process.py b/nmdc_automation/re_iding/re_id_process.py index 633129bb..a47de8e5 100755 --- a/nmdc_automation/re_iding/re_id_process.py +++ b/nmdc_automation/re_iding/re_id_process.py @@ -11,6 +11,7 @@ from re_id_file_operations import * import nmdc_schema.nmdc as nmdc from linkml_runtime.dumpers import json_dumper +import shutil ###GLOBAL###### nmdc_db = nmdc.Database() @@ -109,7 +110,7 @@ def process(db, old_id, new_id): return assocaited_data_object -def minter(config,shoulder): +def minter(shoulder): """ Creates a new ID based on the provided shoulder. @@ -120,8 +121,6 @@ def minter(config,shoulder): - A new ID string """ - runtime_api = NmdcRuntimeApi(config) - return runtime_api.minter(shoulder) @@ -130,9 +129,20 @@ def compute_new_paths(old_url, new_base_dir, omic_id, act_id): Use the url to compute the new file name path and url """ file_name = old_url.split("/")[-1] + suffix = old_url.split("https://data.microbiomedata.org/data/")[1] + old_file_path = base_dir + "/" + suffix file_extenstion = file_name.lstrip("nmdc_").split("_", maxsplit=1)[-1] new_file_name = f"{act_id}_{file_extenstion}" destination = os.path.join(new_base_dir, new_file_name) + + try: + os.link(old_file_path, destination) + print(f"Successfully created link between {old_file_path} and {destination}") + except OSError as e: + print(f"An error occurred while linking the file: {e}") + except Exception as e: + print(f"Unexpected error: {e}") + new_url = f"{base}/{omic_id}/{act_id}/{new_file_name}" return new_url, destination, new_file_name @@ -215,47 +225,43 @@ def copy_outputs(db, outputs, omic_id, act_id): return new_ids, new_data_objects -def make_activity_set(nmdc_db, omics_id, has_input, has_output,workflow_record): - database_activity_set = getattr(nmdc_db, workflow_record["Collection"]) +def make_activity_set(omics_id, activity_id, has_input, has_output, started_time, ended_time, workflow_record_template): + #look at activity range + database_activity_set = getattr(nmdc_db, workflow_record_template["Collection"]) # Lookup the nmdc schema range class - database_activity_range = getattr(nmdc_db, workflow_record["ActivityRange"]) - # Mint an ID - new_id = minter(workflow_record["Type"]) + database_activity_range = getattr(nmdc, workflow_record_template["ActivityRange"]) - activity_id = new_id database_activity_set.append( database_activity_range( id=activity_id, - name=workflow_record["Activity"]["name"].replace("{id}", activity_id), - git_url=workflow_record["Git_repo"], - version=workflow_record["Version"], + name=workflow_record_template["Activity"]["name"].replace("{id}", activity_id), + git_url=workflow_record_template["Git_repo"], + version=workflow_record_template["Version"], part_of=[omics_id], execution_resource="Perlmutter - Nersc", - started_at_time=datetime.datetime.now(pytz.utc).isoformat(), + started_at_time=started_time, has_input=has_input, has_output=has_output, - type=workflow_record["Type"], - ended_at_time=datetime.datetime.now(pytz.utc).isoformat(), + type=workflow_record_template["Type"], + ended_at_time=ended_time, was_informed_by=omics_id, ) ) -def make_data_object(data_object_record, omics_id): +def make_data_object(data_object_record, new_do_id, new_url, updated_name, updated_description): nmdc_db.data_object_set.append( nmdc.DataObject( - file_size_bytes=data_object_record["file_size"], - name=data_object_record["data_object_name"], - url=data_object_record["data_object_url"], - data_object_type=["data_object_type"], + file_size_bytes=data_object_record["file_size_bytes"], + name=updated_name, + url=new_url, + data_object_type=data_object_record["data_object_type"], type="nmdc:DataObject", - id=minter("nmdc:DataObject"), + id=new_do_id, md5_checksum=data_object_record["md5_checksum"], - description=data_object_record["description"].replace( - "{id}",omics_id + description=updated_description ), ) - ) def post_database_object_to_runtime(datase_object): @@ -266,39 +272,80 @@ def post_database_object_to_runtime(datase_object): return res def get_omics_id(omics_record): - return omics_record["id"] + for rec in omics_record["omics_processing_set"]: + return rec["id"] -def get_record_by_type(omics_children_records, record_type): +def get_record_by_type(related_omic_records, record_type): """ - Reads a JSON file and returns the record that matches the given type. + Returns the record that matches the given type. Parameters: - - filename (str): The path to the JSON file. + - related_omic_records (dict): records for an omics processing record. - record_type (str): The desired type value to match. Returns: - dict: The first record that matches the given type, or None if not found. """ - for analysis_record in omics_children_records["downstream_workflow_activity_records"]: - if analysis_record.get("type") == record_type: - return analysis_record - - return None + return related_omic_records[record_type] -def reads_qc_update(omics_record, template_file): +def get_data_object_by_type(related_omic_records, old_activity_record): + + data_object_list = [] + + for act_record in old_activity_record: + has_output_list = act_record["has_output"] + for do_record in related_omic_records["data_object_set"]: + if do_record["id"] in has_output_list: + data_object_list.append(do_record) + + return data_object_list + +def get_associate_data_object_template(data_object_type,data_object_templates): + for data_object in data_object_templates: + if data_object_type == data_object["data_object_type"]: + return data_object + return None - workflow_type = "nmdc:ReadQCAnalysisActivity" +def reads_qc_update(omics_record, template_file, omic_id): + + workflow_type = "read_qc_analysis_activity_set" reads_qc_record = get_record_by_type(omics_record, workflow_type) + reads_qc_data_objects = get_data_object_by_type(omics_record, reads_qc_record) for template in read_workflows_config(template_file): if template['Type'] == "nmdc:ReadQcAnalysisActivity": reads_qc_template = template - #TODO: - #Use nmdc_schema db and update workflow recors and data objects for reads qc (update files) - print(reads_qc_record, reads_qc_template) + new_act_id = minter(reads_qc_template["Type"]) + new_qc_base_dir = os.path.join(base_dir, omic_id, new_act_id) + os.makedirs(new_qc_base_dir,exist_ok=True) + updated_has_output_list = [] + + for data_object in reads_qc_data_objects: + dobj_tmpl = get_associate_data_object_template(data_object["data_object_type"],reads_qc_template["Outputs"]) + new_do_id = minter("nmdc:DataObject") + new_description = re.sub('[^ ]+$', f"{omic_id}", data_object["description"]) + new_url, destination, _ = compute_new_paths(data_object["url"], new_qc_base_dir, omic_id, new_act_id) + make_data_object(data_object, new_do_id, new_url, dobj_tmpl["name"], new_description) + updated_has_output_list.append(new_do_id) + + for rec in reads_qc_record: + has_input = rec["has_input"] + started_time = rec["started_at_time"] + ended_time = rec["ended_at_time"] + #need to change has input to be updated as well, + make_activity_set(omic_id, new_act_id, has_input, updated_has_output_list, started_time, ended_time, reads_qc_template) + + + nmdc_database_object = json.loads( + json_dumper.dumps(nmdc_db, inject_type=False) + ) + + print(nmdc_database_object) + + return destination def process_analysis_sets(study_records, template_file,dry_run=False): @@ -307,9 +354,16 @@ def process_analysis_sets(study_records, template_file,dry_run=False): for omic_record in study_records: omics_id = get_omics_id(omic_record) print(omics_id) - reads_qc_update(omic_record, template_file) + destination = reads_qc_update(omic_record, template_file, omics_id) if dry_run == True: count += 1 + dir_path = os.path.dirname(destination) + parent_dir_path = os.path.dirname(dir_path) + try: + shutil.rmtree(parent_dir_path) + print(f"Directory {parent_dir_path} and all its contents removed successfully!") + except OSError as e: + print(f"Error: {e}") if count == 1: break From f59486a57887ba965e3f9550b775a87912ed68cf Mon Sep 17 00:00:00 2001 From: Michal Babinski Date: Wed, 1 Nov 2023 14:07:59 -0700 Subject: [PATCH 21/91] added ActivityRange for workflows --- configs/re_iding_worklfows.yaml | 3 +++ 1 file changed, 3 insertions(+) diff --git a/configs/re_iding_worklfows.yaml b/configs/re_iding_worklfows.yaml index 7fbce6ef..b77da9a4 100644 --- a/configs/re_iding_worklfows.yaml +++ b/configs/re_iding_worklfows.yaml @@ -6,6 +6,7 @@ Workflows: Version: v1.0.8 WDL: rqcfilter.wdl Collection: read_qc_analysis_activity_set + ActivityRange: ReadQcAnalysisActivity Filter Input Objects: - Metagenome Raw Reads Predecessors: @@ -46,6 +47,7 @@ Workflows: Version: v1.0.3 WDL: jgi_assembly.wdl Collection: metagenome_assembly_set + ActivityRange: MetagenomeAssembly Predecessors: - Reads QC - Reads QC Interleave @@ -121,6 +123,7 @@ Workflows: Version: v1.0.5 WDL: ReadbasedAnalysis.wdl Collection: read_based_taxonomy_analysis_activity_set + ActivityRange: ReadBasedTaxonomyAnalysisActivity Predecessors: - Reads QC - Reads QC Interleave From 966d367d9f9c6a7fb3e0c224cf098d41cbb3a5be Mon Sep 17 00:00:00 2001 From: Michael Thornton Date: Wed, 1 Nov 2023 14:08:42 -0700 Subject: [PATCH 22/91] remove reads_based_analysis_acrtivity_set --- .../re_iding/scripts/rebuild_metagenome_workflow_records.py | 5 ++--- 1 file changed, 2 insertions(+), 3 deletions(-) diff --git a/nmdc_automation/re_iding/scripts/rebuild_metagenome_workflow_records.py b/nmdc_automation/re_iding/scripts/rebuild_metagenome_workflow_records.py index 04e78896..a32ceb99 100644 --- a/nmdc_automation/re_iding/scripts/rebuild_metagenome_workflow_records.py +++ b/nmdc_automation/re_iding/scripts/rebuild_metagenome_workflow_records.py @@ -100,14 +100,13 @@ def rebuild_workflow_records(study_id: str, site_config: bool): db.omics_processing_set.append(omics_processing_record) # downstream workflow activity sets - (read_qc_records, taxonomy_records, read_based_analysis_records, + (read_qc_records, taxonomy_records, metagenome_assembly_records, - metagenome_annotation_records, mags_records) = [], [], [], [], [], [] + metagenome_annotation_records, mags_records) = [], [], [], [], [] downstream_workflow_activity_sets = { "read_qc_analysis_activity_set": read_qc_records, "read_based_taxonomy_analysis_activity_set": taxonomy_records, - "read_based_analysis_activity_set": read_based_analysis_records, "metagenome_assembly_set": metagenome_assembly_records, "metagenome_annotation_activity_set": metagenome_annotation_records, "mags_activity_set": mags_records, From ec7b528d72a03518cc46a63d44f02e16985f1d6d Mon Sep 17 00:00:00 2001 From: Michal Babinski Date: Thu, 2 Nov 2023 10:04:25 -0700 Subject: [PATCH 23/91] remove unused functions --- nmdc_automation/re_iding/re_id_process.py | 182 +++++++++------------- 1 file changed, 75 insertions(+), 107 deletions(-) diff --git a/nmdc_automation/re_iding/re_id_process.py b/nmdc_automation/re_iding/re_id_process.py index a47de8e5..7733cbca 100755 --- a/nmdc_automation/re_iding/re_id_process.py +++ b/nmdc_automation/re_iding/re_id_process.py @@ -62,54 +62,6 @@ def read_map(): return omap -def find_dir(db, old_id): - """ - Finds and returns the directory name associated with the given ID. - - Parameters: - - db: Database connection object - - old_id: The old ID for which the directory name is required - """ - query_by_omics_id = {"was_informed_by": old_id} - activity_record = db.read_qc_analysis_activity_set.find_one(query_by_omics_id) - query_by_id = {"id": activity_record['has_output'][0]} - data_object_record = db.data_object_set.find_one(query_by_id) - return data_object_record['url'].split('/')[4] - - -def process(db, old_id, new_id): - """ - Process the given old ID and returns the associated data. - - Parameters: - - db: Database connection object - - old_id: The old ID to be processed - - new_id: The new ID to be used - - Returns: - - out: A dictionary containing processed data. - """ - directory_name = find_dir(db, old_id) - assocaited_data_object = {"data_object_set": []} - for col in sets: - query_by_old_id = {"was_informed_by": old_id} - res = db[col].find(query_by_old_id) - count = 0 - for doc in res: - count += 1 - if count != 1: - raise ValueError("Too many matches. Failing") - doc.pop("_id") - atype = doc['type'] - func_name = atype.lower().replace("nmdc:", "") - func = globals()[func_name] - activity_records, data_object_records = func(db, doc, new_id) - assocaited_data_object[col] =[activity_records] - assocaited_data_object["data_object_set"].extend(data_object_records) - json.dump(assocaited_data_object, open(f"{new_id}.json", "w"), indent=2) - return assocaited_data_object - - def minter(shoulder): """ Creates a new ID based on the provided shoulder. @@ -133,7 +85,8 @@ def compute_new_paths(old_url, new_base_dir, omic_id, act_id): old_file_path = base_dir + "/" + suffix file_extenstion = file_name.lstrip("nmdc_").split("_", maxsplit=1)[-1] new_file_name = f"{act_id}_{file_extenstion}" - destination = os.path.join(new_base_dir, new_file_name) + modified_new_file_name = new_file_name.replace(":", "_") + destination = os.path.join(new_base_dir, modified_new_file_name) try: os.link(old_file_path, destination) @@ -171,60 +124,6 @@ def find_type(obj): return None -def copy_outputs(db, outputs, omic_id, act_id): - """ - Copy output data objects and generate new metadata for them. - - Args: - - db (MongoClient): MongoDB client instance to fetch data. - - outputs (list): List of output object IDs. - - omic_id (str): ID of the omics process. - - act_id (str): ID of the activity. - - Returns: - - tuple: List of new object IDs and the new objects themselves. - """ - new_data_objects = [] - new_ids = [] - new_base_dir = os.path.join(base_dir, omic_id, act_id) - os.makedirs(new_base_dir, exist_ok=True) - for data_obj_id in outputs: - data_obj = db.data_object_set.find_one({"id": data_obj_id}) - old_url = data_obj["url"] - new_url, dst, new_fn = compute_new_paths(old_url, new_base_dir, omic_id, act_id) - new_id = minter("dobj") - log_mapping("data", data_obj["id"], new_id) - - # Create new obj - data_obj.pop("_id") - desc = data_obj["description"] - data_obj["description"] = re.sub('[^ ]+$', f"{omic_id}", desc) - data_obj["url"] = new_url - data_obj["id"] = new_id - data_obj["name"] = new_fn - data_type = find_type(data_obj) - data_obj["data_object_type"] = data_type - - # Link the file - src = old_url.replace(base, base_dir) - func_name = "bogus" - if data_type: - func_name = data_type.replace(" ", "_").lower() - if func_name in globals(): - sys.stderr.write(f"Using func {func_name}\n") - func = globals()[func_name] - md5, size = func(src, dst, omic_id, act_id) - data_obj["file_size_bytes"] = size - data_obj["md5_checksum"] = md5 - else: - os.link(src, dst) - - # Add to the lists - new_ids.append(new_id) - new_data_objects.append(data_obj) - return new_ids, new_data_objects - - def make_activity_set(omics_id, activity_id, has_input, has_output, started_time, ended_time, workflow_record_template): #look at activity range database_activity_set = getattr(nmdc_db, workflow_record_template["Collection"]) @@ -238,7 +137,7 @@ def make_activity_set(omics_id, activity_id, has_input, has_output, started_time git_url=workflow_record_template["Git_repo"], version=workflow_record_template["Version"], part_of=[omics_id], - execution_resource="Perlmutter - Nersc", + execution_resource="NERSC - Perlmutter", started_at_time=started_time, has_input=has_input, has_output=has_output, @@ -308,29 +207,47 @@ def get_associate_data_object_template(data_object_type,data_object_templates): return None def reads_qc_update(omics_record, template_file, omic_id): + """Extracts relevant information from omics record and template file, update data objects for reads qc + and the analysis activity set record. Performs necessary file operation as well + + Args: + omics_record (dict): omics record corresponding to downstream workflows + template_file (file): template yaml file for relavant worklfows metadata + omic_id (str): string identifier of omics id + + Returns: + nmdc_database_object: dump of nmdc object + """ workflow_type = "read_qc_analysis_activity_set" - + #extract needed metaadata reads_qc_record = get_record_by_type(omics_record, workflow_type) reads_qc_data_objects = get_data_object_by_type(omics_record, reads_qc_record) for template in read_workflows_config(template_file): if template['Type'] == "nmdc:ReadQcAnalysisActivity": reads_qc_template = template - + #set up needed variables new_act_id = minter(reads_qc_template["Type"]) new_qc_base_dir = os.path.join(base_dir, omic_id, new_act_id) os.makedirs(new_qc_base_dir,exist_ok=True) updated_has_output_list = [] + #hold input to downstream workflows + input_to_downstream_workflows = [] + #make new data objects for data_object in reads_qc_data_objects: dobj_tmpl = get_associate_data_object_template(data_object["data_object_type"],reads_qc_template["Outputs"]) new_do_id = minter("nmdc:DataObject") + #save filtered reads + if data_object["data_object_type"] == "Filtered Sequencing Reads": + input_to_downstream_workflows.append(new_do_id) new_description = re.sub('[^ ]+$', f"{omic_id}", data_object["description"]) new_url, destination, _ = compute_new_paths(data_object["url"], new_qc_base_dir, omic_id, new_act_id) make_data_object(data_object, new_do_id, new_url, dobj_tmpl["name"], new_description) updated_has_output_list.append(new_do_id) + #make updated activity record for rec in reads_qc_record: has_input = rec["has_input"] started_time = rec["started_at_time"] @@ -339,6 +256,57 @@ def reads_qc_update(omics_record, template_file, omic_id): make_activity_set(omic_id, new_act_id, has_input, updated_has_output_list, started_time, ended_time, reads_qc_template) + nmdc_database_object = json.loads( + json_dumper.dumps(nmdc_db, inject_type=False) + ) + + print(nmdc_database_object) + + return input_to_downstream_workflows, destination + +def assembly_update(omics_record, template_file, omic_id, workflow_inputs): + """Extracts relevant information from omics record and template file, update data objects for assembly + and the analysis activity set record. Performs necessary file operation as well + + Args: + omics_record (dict): omics record corresponding to downstream workflows + template_file (file): template yaml file for relavant worklfows metadata + omic_id (str): string identifier of omics id + + Returns: + nmdc_database_object: dump of nmdc object + """ + + workflow_type = "metagenome_assembly_set" + #extract needed metaadata + reads_qc_record = get_record_by_type(omics_record, workflow_type) + reads_qc_data_objects = get_data_object_by_type(omics_record, reads_qc_record) + for template in read_workflows_config(template_file): + if template['Type'] == "nmdc:MetagenomeAssembly": + reads_qc_template = template + + #set up needed variables + new_act_id = minter(reads_qc_template["Type"]) + new_qc_base_dir = os.path.join(base_dir, omic_id, new_act_id) + os.makedirs(new_qc_base_dir,exist_ok=True) + updated_has_output_list = [] + + #make new data objects + for data_object in reads_qc_data_objects: + dobj_tmpl = get_associate_data_object_template(data_object["data_object_type"],reads_qc_template["Outputs"]) + new_do_id = minter("nmdc:DataObject") + new_description = re.sub('[^ ]+$', f"{omic_id}", data_object["description"]) + new_url, destination, _ = compute_new_paths(data_object["url"], new_qc_base_dir, omic_id, new_act_id) + make_data_object(data_object, new_do_id, new_url, dobj_tmpl["name"], new_description) + updated_has_output_list.append(new_do_id) + + for rec in reads_qc_record: + started_time = rec["started_at_time"] + ended_time = rec["ended_at_time"] + #need to change has input to be updated as well, + make_activity_set(omic_id, new_act_id, workflow_inputs, updated_has_output_list, started_time, ended_time, reads_qc_template) + + nmdc_database_object = json.loads( json_dumper.dumps(nmdc_db, inject_type=False) ) @@ -354,7 +322,7 @@ def process_analysis_sets(study_records, template_file,dry_run=False): for omic_record in study_records: omics_id = get_omics_id(omic_record) print(omics_id) - destination = reads_qc_update(omic_record, template_file, omics_id) + downstream_input, destination = reads_qc_update(omic_record, template_file, omics_id) if dry_run == True: count += 1 dir_path = os.path.dirname(destination) From d3b1a30c8b21be2f960e435fd1a838272fb9d13f Mon Sep 17 00:00:00 2001 From: Michal Babinski Date: Thu, 2 Nov 2023 13:34:55 -0700 Subject: [PATCH 24/91] helper bash script to rename bam --- .../re_iding/scripts/rewrite_bam.sh | 19 +++++++++++++++++++ 1 file changed, 19 insertions(+) create mode 100755 nmdc_automation/re_iding/scripts/rewrite_bam.sh diff --git a/nmdc_automation/re_iding/scripts/rewrite_bam.sh b/nmdc_automation/re_iding/scripts/rewrite_bam.sh new file mode 100755 index 00000000..558e8c92 --- /dev/null +++ b/nmdc_automation/re_iding/scripts/rewrite_bam.sh @@ -0,0 +1,19 @@ +#!/bin/bash +# + +# IMG=microbiomedata/workflowmeta:1.0.5.1 +IMG=biocontainers/samtools:1.3.1 + +if [ "$1" = "inside" ] ; then + in=$2 + out=$3 + old=$4 + new=$5 + echo "Rewriting $out" + samtools view -h $in | sed "s/${old}/${new}/g" | \ + samtools view -hb -o $out +else + touch $2 + #shifter --image=$IMG $0 inside $@ +fi + From 789463d8b89585d2cca4c2662d04a0d2475c0eb4 Mon Sep 17 00:00:00 2001 From: Michal Babinski Date: Thu, 2 Nov 2023 13:38:11 -0700 Subject: [PATCH 25/91] added operations for assembly --- .../re_iding/re_id_file_operations.py | 80 +++++++++++-------- 1 file changed, 46 insertions(+), 34 deletions(-) diff --git a/nmdc_automation/re_iding/re_id_file_operations.py b/nmdc_automation/re_iding/re_id_file_operations.py index 8ca469c1..132b962c 100644 --- a/nmdc_automation/re_iding/re_id_file_operations.py +++ b/nmdc_automation/re_iding/re_id_file_operations.py @@ -1,17 +1,22 @@ import subprocess -import gzip +import gzip import os import json import hashlib from subprocess import check_output + +base_dir = "/global/cfs/cdirs/m3408/results" +bam_script = "/global/u2/n/nmdcda/tasks/re_id/rewrite_bam.sh" + + def md5_sum(fn): """ Calculate the MD5 hash of a file. - + Args: - fn (str): Path to the file for which the MD5 hash is to be computed. - + Returns: - str: The MD5 hash of the file. """ @@ -21,6 +26,7 @@ def md5_sum(fn): file_hash.update(chunk) return file_hash.hexdigest() + def read_json_file(filename): """ Read a JSON file and return its content as a dictionary. @@ -31,22 +37,23 @@ def read_json_file(filename): Returns: - dict: The content of the JSON file. """ - with open(filename, 'r') as json_file: + with open(filename, "r") as json_file: data = json.load(json_file) return data + def rewrite_id(src, dst, old_id, new_id, prefix=None): """ Rewrite lines in a file, replacing occurrences of an old ID with a new ID. An optional prefix can be specified to limit which lines are modified. - + Args: - src (str): Source file path. - dst (str): Destination file path. - old_id (str): ID to be replaced. - new_id (str): Replacement ID. - prefix (str, optional): Prefix character to determine which lines to modify. Defaults to None. - + Returns: - tuple: MD5 checksum and size (in bytes) of the modified file. """ @@ -69,71 +76,76 @@ def find_assembly_id(src): return "_".join(line[1:].split("_")[0:-1]) -def assembly_contigs(src, dst, omic_id, act_id): +def assembly_contigs(src, dst, act_id): scaf = src.replace("_contigs", "_scaffolds") old_id = find_assembly_id(scaf) return rewrite_id(src, dst, old_id, act_id, prefix=">") -def assembly_scaffolds(src, dst, omic_id, act_id): +def assembly_scaffolds(src, dst, act_id): old_id = find_assembly_id(src) return rewrite_id(src, dst, old_id, act_id, prefix=">") -def assembly_coverage_stats(src, dst, omic_id, act_id): +def assembly_coverage_stats(src, dst, act_id): scaf = src.replace("_covstats.txt", "_scaffolds.fna") old_id = find_assembly_id(scaf) return rewrite_id(src, dst, old_id, act_id) -def assembly_agp(src, dst, omic_id, act_id): +def assembly_agp(src, dst, act_id): scaf = src.replace("_assembly.agp", "_scaffolds.fna") old_id = find_assembly_id(scaf) return rewrite_id(src, dst, old_id, act_id) def convert_script(script, src, dst, old_id, act_id): - cmd = ["./rewrite_bam.sh", src, dst, old_id, act_id] + cmd = [script, src, dst, old_id, act_id] results = check_output(cmd) md5 = md5_sum(dst) size = os.stat(dst).st_size return md5, size -def assembly_coverage_bam(src, dst, omic_id, act_id): +def assembly_coverage_bam(script, src, dst, act_id): scaf = src.replace("_pairedMapped_sorted.bam", "_scaffolds.fna") old_id = find_assembly_id(scaf) - return convert_script("./rewrite_bam.sh", src, dst, old_id, act_id) + return convert_script(script, src, dst, old_id, act_id) def xassembly_info_file(src, dst, omic_id, act_id): return [] -def rewrite_bam(type, old_bam, new_bam, old_id, new_id): - - if type == "inside": - print(f"Rewriting {new_bam}") - cmd1 = ["samtools", "view", "-h", old_bam] - cmd2 = ["sed", f"s/{old_id}/{new_id}/g"] - cmd3 = ["samtools", "view", "-hb", "-o", ] +def rewrite_sam(input_sam, output_sam, old_id, new_id): + with gzip.open(input_sam, "rt") as f_in, gzip.open(output_sam, "wt") as f_out: + for line in f_in: + f_out.write(line.replace(old_id, new_id)) - # Create a pipeline: cmd1 | cmd2 | cmd3 - p1 = subprocess.Popen(cmd1, stdout=subprocess.PIPE) - p2 = subprocess.Popen(cmd2, stdin=p1.stdout, stdout=subprocess.PIPE) - p1.stdout.close() - p3 = subprocess.Popen(cmd3, stdin=p2.stdout) - p2.stdout.close() - p3.communicate() - else: - with open(new_bam, 'w') as f: - pass # touch file +def get_old_file_path(data_object_record): + old_url = data_object_record["url"] + suffix = old_url.split("https://data.microbiomedata.org/data/")[1] + old_file_path = base_dir + "/" + suffix + return old_file_path -def rewrite_sam(input_sam, output_sam, old_id, new_id): + +def assembly_file_operations(data_object_record, destination, act_id): + # get old file path upfront + old_file_path = get_old_file_path(data_object_record) - with gzip.open(input_sam, 'rt') as f_in, gzip.open(output_sam, 'wt') as f_out: - for line in f_in: - f_out.write(line.replace(old_id, new_id)) + if data_object_record["data_object_type"] == "Assembly Coverage Stats": + md5, size = assembly_coverage_stats(old_file_path, destination, act_id) + elif data_object_record["data_object_type"] == "Assembly Contigs": + md5, size = assembly_contigs(old_file_path, destination, act_id) + elif data_object_record["data_object_type"] == "Assembly Scaffolds": + md5, size = assembly_scaffolds(old_file_path, destination, act_id) + elif data_object_record["data_object_type"] == "Assembly AGP": + md5, size = assembly_agp(old_file_path, destination, act_id) + elif data_object_record["data_object_type"] == "Assembly Coverage BAM": + md5, size = assembly_coverage_bam( + bam_script, old_file_path, destination, act_id + ) + return md5, size From fc3b0d015f018d4aaf71a8a5be9387c8c40612df Mon Sep 17 00:00:00 2001 From: Michal Babinski Date: Thu, 2 Nov 2023 13:38:30 -0700 Subject: [PATCH 26/91] added support for assembly and readbased --- nmdc_automation/re_iding/re_id_process.py | 361 +++++++++++++++------- 1 file changed, 253 insertions(+), 108 deletions(-) diff --git a/nmdc_automation/re_iding/re_id_process.py b/nmdc_automation/re_iding/re_id_process.py index 7733cbca..bc16b7a5 100755 --- a/nmdc_automation/re_iding/re_id_process.py +++ b/nmdc_automation/re_iding/re_id_process.py @@ -11,7 +11,7 @@ from re_id_file_operations import * import nmdc_schema.nmdc as nmdc from linkml_runtime.dumpers import json_dumper -import shutil +import shutil ###GLOBAL###### nmdc_db = nmdc.Database() @@ -21,24 +21,26 @@ sets = [ - "read_qc_analysis_activity_set", - "metagenome_assembly_set", - "read_based_taxonomy_analysis_activity_set" - ] + "read_qc_analysis_activity_set", + "metagenome_assembly_set", + "read_based_taxonomy_analysis_activity_set", +] mapping_log = open("mapping.log", "a") + def read_workflows_config(config_file): with open(config_file, "r") as file: workflow_data = yaml.safe_load(file) - + return workflow_data["Workflows"] + def log_mapping(idtype, old, new): """ Logs the mapping information. - + Parameters: - idtype: The type of the ID (e.g., 'data', 'activity') - old: The old ID value @@ -50,7 +52,7 @@ def log_mapping(idtype, old, new): def read_map(): """ Reads a mapping list from a file and returns it as a dictionary. - + Returns: - omap: A dictionary with old ID as key and new ID as value. """ @@ -65,16 +67,29 @@ def read_map(): def minter(shoulder): """ Creates a new ID based on the provided shoulder. - + Parameters: - shoulder: The base string for creating the new ID - + Returns: - A new ID string """ - + return runtime_api.minter(shoulder) +def get_new_paths(old_url, new_base_dir, omic_id, act_id): + """ + Use the url to return the string value of name path and url + """ + file_name = old_url.split("/")[-1] + file_extenstion = file_name.lstrip("nmdc_").split("_", maxsplit=1)[-1] + new_file_name = f"{act_id}_{file_extenstion}" + modified_new_file_name = new_file_name.replace(":", "_") + destination = os.path.join(new_base_dir, modified_new_file_name) + + new_url = f"{base}/{omic_id}/{act_id}/{new_file_name}" + return new_url, destination, new_file_name + def compute_new_paths(old_url, new_base_dir, omic_id, act_id): """ @@ -87,7 +102,7 @@ def compute_new_paths(old_url, new_base_dir, omic_id, act_id): new_file_name = f"{act_id}_{file_extenstion}" modified_new_file_name = new_file_name.replace(":", "_") destination = os.path.join(new_base_dir, modified_new_file_name) - + try: os.link(old_file_path, destination) print(f"Successfully created link between {old_file_path} and {destination}") @@ -95,7 +110,7 @@ def compute_new_paths(old_url, new_base_dir, omic_id, act_id): print(f"An error occurred while linking the file: {e}") except Exception as e: print(f"Unexpected error: {e}") - + new_url = f"{base}/{omic_id}/{act_id}/{new_file_name}" return new_url, destination, new_file_name @@ -103,10 +118,10 @@ def compute_new_paths(old_url, new_base_dir, omic_id, act_id): def find_type(obj): """ Determine the data type of an object based on its URL extension. - + Args: - obj (dict): Dictionary containing the 'url' key which will be inspected to determine the data type. - + Returns: - str: The determined data type or None if the type could not be determined. """ @@ -124,16 +139,26 @@ def find_type(obj): return None -def make_activity_set(omics_id, activity_id, has_input, has_output, started_time, ended_time, workflow_record_template): - #look at activity range +def make_activity_set( + omics_id, + activity_id, + has_input, + has_output, + started_time, + ended_time, + workflow_record_template, +): + # look at activity range database_activity_set = getattr(nmdc_db, workflow_record_template["Collection"]) # Lookup the nmdc schema range class database_activity_range = getattr(nmdc, workflow_record_template["ActivityRange"]) - + database_activity_set.append( database_activity_range( id=activity_id, - name=workflow_record_template["Activity"]["name"].replace("{id}", activity_id), + name=workflow_record_template["Activity"]["name"].replace( + "{id}", activity_id + ), git_url=workflow_record_template["Git_repo"], version=workflow_record_template["Version"], part_of=[omics_id], @@ -147,33 +172,38 @@ def make_activity_set(omics_id, activity_id, has_input, has_output, started_time ) ) -def make_data_object(data_object_record, new_do_id, new_url, updated_name, updated_description): +def make_data_object( + file_size, updated_name, new_url, do_type, new_do_id, updated_description, md5_value +): + nmdc_db.data_object_set.append( nmdc.DataObject( - file_size_bytes=data_object_record["file_size_bytes"], + file_size_bytes=file_size, name=updated_name, url=new_url, - data_object_type=data_object_record["data_object_type"], + data_object_type=do_type, type="nmdc:DataObject", id=new_do_id, - md5_checksum=data_object_record["md5_checksum"], - description=updated_description - ), - ) + md5_checksum=md5_value, + description=updated_description, + ), + ) + def post_database_object_to_runtime(datase_object): - nmdc_database_object = json.loads( - json_dumper.dumps(datase_object, inject_type=False) - ) + json_dumper.dumps(datase_object, inject_type=False) + ) res = runtime_api.post_objects(nmdc_database_object) return res + def get_omics_id(omics_record): for rec in omics_record["omics_processing_set"]: return rec["id"] + def get_record_by_type(related_omic_records, record_type): """ Returns the record that matches the given type. @@ -185,27 +215,48 @@ def get_record_by_type(related_omic_records, record_type): Returns: - dict: The first record that matches the given type, or None if not found. """ - + return related_omic_records[record_type] + def get_data_object_by_type(related_omic_records, old_activity_record): - + """Find and return data objects for a activity record + + Args: + - related_omic_records (dict): records for an omics processing record. + - old_activity_record (dict): old activty record of given ativity type + + Returns: + - List[dict]: data objects for the given activity + """ + data_object_list = [] - + for act_record in old_activity_record: has_output_list = act_record["has_output"] for do_record in related_omic_records["data_object_set"]: if do_record["id"] in has_output_list: data_object_list.append(do_record) - + return data_object_list - -def get_associate_data_object_template(data_object_type,data_object_templates): + + +def get_associate_data_object_template(data_object_type, data_object_templates): + """Get the associated data object template from yaml + + Args: + - data_object_type (str): data object type + - data_object_templates (dict): workflows template + + Returns: + data object record + """ for data_object in data_object_templates: if data_object_type == data_object["data_object_type"]: return data_object return None - + + def reads_qc_update(omics_record, template_file, omic_id): """Extracts relevant information from omics record and template file, update data objects for reads qc and the analysis activity set record. Performs necessary file operation as well @@ -218,52 +269,65 @@ def reads_qc_update(omics_record, template_file, omic_id): Returns: nmdc_database_object: dump of nmdc object """ - + workflow_type = "read_qc_analysis_activity_set" - #extract needed metaadata + # extract needed metaadata reads_qc_record = get_record_by_type(omics_record, workflow_type) reads_qc_data_objects = get_data_object_by_type(omics_record, reads_qc_record) for template in read_workflows_config(template_file): - if template['Type'] == "nmdc:ReadQcAnalysisActivity": + if template["Type"] == "nmdc:ReadQcAnalysisActivity": reads_qc_template = template - - #set up needed variables + + # set up needed variables new_act_id = minter(reads_qc_template["Type"]) new_qc_base_dir = os.path.join(base_dir, omic_id, new_act_id) - os.makedirs(new_qc_base_dir,exist_ok=True) + os.makedirs(new_qc_base_dir, exist_ok=True) updated_has_output_list = [] - - #hold input to downstream workflows + + # hold input to downstream workflows input_to_downstream_workflows = [] - #make new data objects + # make new data objects for data_object in reads_qc_data_objects: - dobj_tmpl = get_associate_data_object_template(data_object["data_object_type"],reads_qc_template["Outputs"]) + dobj_tmpl = get_associate_data_object_template( + data_object["data_object_type"], reads_qc_template["Outputs"] + ) new_do_id = minter("nmdc:DataObject") - #save filtered reads + # save filtered reads if data_object["data_object_type"] == "Filtered Sequencing Reads": input_to_downstream_workflows.append(new_do_id) - new_description = re.sub('[^ ]+$', f"{omic_id}", data_object["description"]) - new_url, destination, _ = compute_new_paths(data_object["url"], new_qc_base_dir, omic_id, new_act_id) - make_data_object(data_object, new_do_id, new_url, dobj_tmpl["name"], new_description) + #get reusable slots + file_size = data_object["file_size_bytes"] + data_object_type = data_object["data_object_type"] + md5_sum = data_object["md5_checksum"] + #get new values for slots + new_description = re.sub("[^ ]+$", f"{omic_id}", data_object["description"]) + new_url, destination, _ = compute_new_paths( + data_object["url"], new_qc_base_dir, omic_id, new_act_id + ) + make_data_object( + file_size, dobj_tmpl["name"], new_url, data_object_type, new_do_id, new_description, md5_sum + ) updated_has_output_list.append(new_do_id) - - #make updated activity record + + # make updated activity record for rec in reads_qc_record: has_input = rec["has_input"] started_time = rec["started_at_time"] ended_time = rec["ended_at_time"] - #need to change has input to be updated as well, - make_activity_set(omic_id, new_act_id, has_input, updated_has_output_list, started_time, ended_time, reads_qc_template) - - - nmdc_database_object = json.loads( - json_dumper.dumps(nmdc_db, inject_type=False) - ) - - print(nmdc_database_object) - + # need to change has input to be updated as well, + make_activity_set( + omic_id, + new_act_id, + has_input, + updated_has_output_list, + started_time, + ended_time, + reads_qc_template, + ) + return input_to_downstream_workflows, destination + def assembly_update(omics_record, template_file, omic_id, workflow_inputs): """Extracts relevant information from omics record and template file, update data objects for assembly and the analysis activity set record. Performs necessary file operation as well @@ -276,79 +340,160 @@ def assembly_update(omics_record, template_file, omic_id, workflow_inputs): Returns: nmdc_database_object: dump of nmdc object """ - + workflow_type = "metagenome_assembly_set" - #extract needed metaadata - reads_qc_record = get_record_by_type(omics_record, workflow_type) - reads_qc_data_objects = get_data_object_by_type(omics_record, reads_qc_record) + # extract needed metaadata + assembly_record = get_record_by_type(omics_record, workflow_type) + assembly_data_objects = get_data_object_by_type(omics_record, assembly_record) for template in read_workflows_config(template_file): - if template['Type'] == "nmdc:MetagenomeAssembly": - reads_qc_template = template - - #set up needed variables - new_act_id = minter(reads_qc_template["Type"]) - new_qc_base_dir = os.path.join(base_dir, omic_id, new_act_id) - os.makedirs(new_qc_base_dir,exist_ok=True) + if template["Type"] == "nmdc:MetagenomeAssembly": + assembly_template = template + + # set up needed variables + new_act_id = minter(assembly_template["Type"]) + new_asm_base_dir = os.path.join(base_dir, omic_id, new_act_id) + os.makedirs(new_asm_base_dir, exist_ok=True) updated_has_output_list = [] - - #make new data objects - for data_object in reads_qc_data_objects: - dobj_tmpl = get_associate_data_object_template(data_object["data_object_type"],reads_qc_template["Outputs"]) + + # make new data objects + for data_object in assembly_data_objects: + + data_object["data_object_type"] = find_type(data_object) + dobj_tmpl = get_associate_data_object_template( + data_object["data_object_type"], assembly_template["Outputs"] + ) + #generate new dataobject type new_do_id = minter("nmdc:DataObject") - new_description = re.sub('[^ ]+$', f"{omic_id}", data_object["description"]) - new_url, destination, _ = compute_new_paths(data_object["url"], new_qc_base_dir, omic_id, new_act_id) - make_data_object(data_object, new_do_id, new_url, dobj_tmpl["name"], new_description) + new_description = re.sub("[^ ]+$", f"{omic_id}", data_object["description"]) + new_url, destination, _ = get_new_paths(data_object["url"], new_asm_base_dir, omic_id, new_act_id) + updated_md5, updated_file_size = assembly_file_operations(data_object, destination, new_act_id) + #get do_type + do_type = data_object["data_object_type"] + make_data_object( + updated_file_size, dobj_tmpl["name"], new_url, do_type, new_do_id, new_description, updated_md5 + ) updated_has_output_list.append(new_do_id) - - for rec in reads_qc_record: + + for rec in assembly_record: started_time = rec["started_at_time"] ended_time = rec["ended_at_time"] - #need to change has input to be updated as well, - make_activity_set(omic_id, new_act_id, workflow_inputs, updated_has_output_list, started_time, ended_time, reads_qc_template) + # need to change has input to be updated as well, + make_activity_set( + omic_id, + new_act_id, + workflow_inputs, + updated_has_output_list, + started_time, + ended_time, + assembly_template, + ) + + +def readbased_update(omics_record, template_file, omic_id, workflow_inputs): + """Extracts relevant information from omics record and template file, update data objects for assembly + and the analysis activity set record. Performs necessary file operation as well + + Args: + omics_record (dict): omics record corresponding to downstream workflows + template_file (file): template yaml file for relavant worklfows metadata + omic_id (str): string identifier of omics id + + Returns: + nmdc_database_object: dump of nmdc object + """ + + workflow_type = "read_based_taxonomy_analysis_activity_set" + # extract needed metaadata + readbased_record = get_record_by_type(omics_record, workflow_type) + readbased_data_objects = get_data_object_by_type(omics_record, readbased_record) + for template in read_workflows_config(template_file): + if template["Type"] == "nmdc:ReadBasedTaxonomyAnalysisActivity": + assembly_template = template + + # set up needed variables + new_act_id = minter(assembly_template["Type"]) + new_readbased_base_dir = os.path.join(base_dir, omic_id, new_act_id) + os.makedirs(new_readbased_base_dir, exist_ok=True) + updated_has_output_list = [] + + # make new data objects + for data_object in readbased_data_objects: - - nmdc_database_object = json.loads( - json_dumper.dumps(nmdc_db, inject_type=False) + data_object["data_object_type"] = find_type(data_object) + + dobj_tmpl = get_associate_data_object_template( + data_object["data_object_type"], assembly_template["Outputs"] ) - - print(nmdc_database_object) - - return destination - - -def process_analysis_sets(study_records, template_file,dry_run=False): + #generate new dataobject type + new_do_id = minter("nmdc:DataObject") + file_size = data_object["file_size_bytes"] + data_object_type = data_object["data_object_type"] + md5_sum = data_object["md5_checksum"] + #get new values for slots + new_description = re.sub("[^ ]+$", f"{omic_id}", data_object["description"]) + new_url, destination, _ = compute_new_paths( + data_object["url"], new_readbased_base_dir, omic_id, new_act_id + ) + make_data_object( + file_size, dobj_tmpl["name"], new_url, data_object_type, new_do_id, new_description, md5_sum + ) + updated_has_output_list.append(new_do_id) + + for rec in readbased_record: + started_time = rec["started_at_time"] + ended_time = rec["ended_at_time"] + make_activity_set( + omic_id, + new_act_id, + workflow_inputs, + updated_has_output_list, + started_time, + ended_time, + assembly_template, + ) + + + +def process_analysis_sets(study_records, template_file, dry_run=False): count = 0 for omic_record in study_records: omics_id = get_omics_id(omic_record) print(omics_id) - downstream_input, destination = reads_qc_update(omic_record, template_file, omics_id) + downstream_input, destination = reads_qc_update( + omic_record, template_file, omics_id + ) + assembly_update(omic_record, template_file, omics_id, downstream_input) + readbased_update(omic_record, template_file, omics_id, downstream_input) + nmdc_database_object = json.loads(json_dumper.dumps(nmdc_db, inject_type=False)) + print(nmdc_database_object) if dry_run == True: count += 1 dir_path = os.path.dirname(destination) parent_dir_path = os.path.dirname(dir_path) try: - shutil.rmtree(parent_dir_path) - print(f"Directory {parent_dir_path} and all its contents removed successfully!") + # shutil.rmtree(parent_dir_path) + print( + f"Directory {parent_dir_path} and all its contents removed successfully!" + ) except OSError as e: print(f"Error: {e}") if count == 1: break - - def main(): - #TODO - #1. Read in json dump of analysis records - #2. Process records for reads qc - generate new metadata, make new records and data objects (this will include file copies and renaming) - #3. save data of updated reads qc records - #4. Fetch old records for readbased analysis and assembly, generate new metadata, make new records and data objects (this will include file copies and renaming files and ids in files) - #5. Validate new records, submit them via runtime api - #6. Write seperate process to delete old records once we have + # TODO + # 1. Read in json dump of analysis records + # 2. Process records for reads qc - generate new metadata, make new records and data objects (this will include file copies and renaming) + # 3. save data of updated reads qc records + # 4. Fetch old records for readbased analysis and assembly, generate new metadata, make new records and data objects (this will include file copies and renaming files and ids in files) + # 5. Validate new records, submit them via runtime api + # 6. Write seperate process to delete old records once we have pass + if __name__ == "__main__": test_file = "scripts/nmdc:sty-11-aygzgv51_assocated_record_dump.json" - template_file = "/global/cfs/cdirs/m3408/aim2/dev/reiding_scripts/nmdc_automation/configs/re_iding_worklfows.yaml" + template_file = "../../nmdc_automation/configs/re_iding_worklfows.yaml" stegen_data = read_json_file(test_file) - process_analysis_sets(stegen_data, template_file, dry_run=True) \ No newline at end of file + process_analysis_sets(stegen_data, template_file, dry_run=True) From b807215bf062d4a20972b8b6a7b90ed2f7159ef1 Mon Sep 17 00:00:00 2001 From: Michal Babinski Date: Thu, 2 Nov 2023 13:48:04 -0700 Subject: [PATCH 27/91] find bam_script locally --- nmdc_automation/re_iding/re_id_file_operations.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/nmdc_automation/re_iding/re_id_file_operations.py b/nmdc_automation/re_iding/re_id_file_operations.py index 132b962c..a82181e5 100644 --- a/nmdc_automation/re_iding/re_id_file_operations.py +++ b/nmdc_automation/re_iding/re_id_file_operations.py @@ -7,7 +7,7 @@ base_dir = "/global/cfs/cdirs/m3408/results" -bam_script = "/global/u2/n/nmdcda/tasks/re_id/rewrite_bam.sh" +bam_script = os.path.abspath("scripts/rewrite_bam.sh") def md5_sum(fn): @@ -134,7 +134,7 @@ def get_old_file_path(data_object_record): def assembly_file_operations(data_object_record, destination, act_id): # get old file path upfront old_file_path = get_old_file_path(data_object_record) - + if data_object_record["data_object_type"] == "Assembly Coverage Stats": md5, size = assembly_coverage_stats(old_file_path, destination, act_id) elif data_object_record["data_object_type"] == "Assembly Contigs": From 8e53ff8e76733cb9e11fdda9799ecb92db117727 Mon Sep 17 00:00:00 2001 From: Michal Babinski Date: Thu, 2 Nov 2023 13:48:16 -0700 Subject: [PATCH 28/91] reformat --- nmdc_automation/re_iding/re_id_process.py | 56 +++++++++++++++-------- 1 file changed, 38 insertions(+), 18 deletions(-) diff --git a/nmdc_automation/re_iding/re_id_process.py b/nmdc_automation/re_iding/re_id_process.py index bc16b7a5..5cf8f7d6 100755 --- a/nmdc_automation/re_iding/re_id_process.py +++ b/nmdc_automation/re_iding/re_id_process.py @@ -77,6 +77,7 @@ def minter(shoulder): return runtime_api.minter(shoulder) + def get_new_paths(old_url, new_base_dir, omic_id, act_id): """ Use the url to return the string value of name path and url @@ -176,7 +177,6 @@ def make_activity_set( def make_data_object( file_size, updated_name, new_url, do_type, new_do_id, updated_description, md5_value ): - nmdc_db.data_object_set.append( nmdc.DataObject( file_size_bytes=file_size, @@ -295,17 +295,23 @@ def reads_qc_update(omics_record, template_file, omic_id): # save filtered reads if data_object["data_object_type"] == "Filtered Sequencing Reads": input_to_downstream_workflows.append(new_do_id) - #get reusable slots + # get reusable slots file_size = data_object["file_size_bytes"] data_object_type = data_object["data_object_type"] md5_sum = data_object["md5_checksum"] - #get new values for slots + # get new values for slots new_description = re.sub("[^ ]+$", f"{omic_id}", data_object["description"]) new_url, destination, _ = compute_new_paths( data_object["url"], new_qc_base_dir, omic_id, new_act_id ) make_data_object( - file_size, dobj_tmpl["name"], new_url, data_object_type, new_do_id, new_description, md5_sum + file_size, + dobj_tmpl["name"], + new_url, + data_object_type, + new_do_id, + new_description, + md5_sum, ) updated_has_output_list.append(new_do_id) @@ -357,20 +363,29 @@ def assembly_update(omics_record, template_file, omic_id, workflow_inputs): # make new data objects for data_object in assembly_data_objects: - data_object["data_object_type"] = find_type(data_object) dobj_tmpl = get_associate_data_object_template( data_object["data_object_type"], assembly_template["Outputs"] ) - #generate new dataobject type + # generate new dataobject type new_do_id = minter("nmdc:DataObject") new_description = re.sub("[^ ]+$", f"{omic_id}", data_object["description"]) - new_url, destination, _ = get_new_paths(data_object["url"], new_asm_base_dir, omic_id, new_act_id) - updated_md5, updated_file_size = assembly_file_operations(data_object, destination, new_act_id) - #get do_type - do_type = data_object["data_object_type"] + new_url, destination, _ = get_new_paths( + data_object["url"], new_asm_base_dir, omic_id, new_act_id + ) + updated_md5, updated_file_size = assembly_file_operations( + data_object, destination, new_act_id + ) + # get do_type + do_type = data_object["data_object_type"] make_data_object( - updated_file_size, dobj_tmpl["name"], new_url, do_type, new_do_id, new_description, updated_md5 + updated_file_size, + dobj_tmpl["name"], + new_url, + do_type, + new_do_id, + new_description, + updated_md5, ) updated_has_output_list.append(new_do_id) @@ -418,31 +433,36 @@ def readbased_update(omics_record, template_file, omic_id, workflow_inputs): # make new data objects for data_object in readbased_data_objects: - data_object["data_object_type"] = find_type(data_object) - + dobj_tmpl = get_associate_data_object_template( data_object["data_object_type"], assembly_template["Outputs"] ) - #generate new dataobject type + # generate new dataobject type new_do_id = minter("nmdc:DataObject") file_size = data_object["file_size_bytes"] data_object_type = data_object["data_object_type"] md5_sum = data_object["md5_checksum"] - #get new values for slots + # get new values for slots new_description = re.sub("[^ ]+$", f"{omic_id}", data_object["description"]) new_url, destination, _ = compute_new_paths( data_object["url"], new_readbased_base_dir, omic_id, new_act_id ) make_data_object( - file_size, dobj_tmpl["name"], new_url, data_object_type, new_do_id, new_description, md5_sum + file_size, + dobj_tmpl["name"], + new_url, + data_object_type, + new_do_id, + new_description, + md5_sum, ) updated_has_output_list.append(new_do_id) for rec in readbased_record: started_time = rec["started_at_time"] ended_time = rec["ended_at_time"] - + make_activity_set( omic_id, new_act_id, @@ -454,7 +474,6 @@ def readbased_update(omics_record, template_file, omic_id, workflow_inputs): ) - def process_analysis_sets(study_records, template_file, dry_run=False): count = 0 for omic_record in study_records: @@ -481,6 +500,7 @@ def process_analysis_sets(study_records, template_file, dry_run=False): if count == 1: break + def main(): # TODO # 1. Read in json dump of analysis records From 980a3a2234a35f1547e5a8ed6901869e2ce10fe4 Mon Sep 17 00:00:00 2001 From: Michael Thornton Date: Thu, 2 Nov 2023 13:53:56 -0700 Subject: [PATCH 29/91] remove read_based_taxonomy_analysis --- .../re_iding/scripts/rebuild_metagenome_workflow_records.py | 6 ++---- 1 file changed, 2 insertions(+), 4 deletions(-) diff --git a/nmdc_automation/re_iding/scripts/rebuild_metagenome_workflow_records.py b/nmdc_automation/re_iding/scripts/rebuild_metagenome_workflow_records.py index a32ceb99..b90621a8 100644 --- a/nmdc_automation/re_iding/scripts/rebuild_metagenome_workflow_records.py +++ b/nmdc_automation/re_iding/scripts/rebuild_metagenome_workflow_records.py @@ -100,13 +100,11 @@ def rebuild_workflow_records(study_id: str, site_config: bool): db.omics_processing_set.append(omics_processing_record) # downstream workflow activity sets - (read_qc_records, taxonomy_records, - metagenome_assembly_records, - metagenome_annotation_records, mags_records) = [], [], [], [], [] + (read_qc_records, metagenome_assembly_records, + metagenome_annotation_records, mags_records) = [], [], [], [] downstream_workflow_activity_sets = { "read_qc_analysis_activity_set": read_qc_records, - "read_based_taxonomy_analysis_activity_set": taxonomy_records, "metagenome_assembly_set": metagenome_assembly_records, "metagenome_annotation_activity_set": metagenome_annotation_records, "mags_activity_set": mags_records, From 8386780742048c302e0920cbc09ecf4597960e06 Mon Sep 17 00:00:00 2001 From: Michal Babinski Date: Fri, 3 Nov 2023 14:03:01 -0700 Subject: [PATCH 30/91] fix file path --- nmdc_automation/re_iding/re_id_process.py | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/nmdc_automation/re_iding/re_id_process.py b/nmdc_automation/re_iding/re_id_process.py index 5cf8f7d6..75e9c312 100755 --- a/nmdc_automation/re_iding/re_id_process.py +++ b/nmdc_automation/re_iding/re_id_process.py @@ -438,6 +438,7 @@ def readbased_update(omics_record, template_file, omic_id, workflow_inputs): dobj_tmpl = get_associate_data_object_template( data_object["data_object_type"], assembly_template["Outputs"] ) + print(dobj_tmpl) # generate new dataobject type new_do_id = minter("nmdc:DataObject") file_size = data_object["file_size_bytes"] @@ -514,6 +515,6 @@ def main(): if __name__ == "__main__": test_file = "scripts/nmdc:sty-11-aygzgv51_assocated_record_dump.json" - template_file = "../../nmdc_automation/configs/re_iding_worklfows.yaml" + template_file = "../../configs/re_iding_worklfows.yaml" stegen_data = read_json_file(test_file) process_analysis_sets(stegen_data, template_file, dry_run=True) From 62c5ddebb5829fda6e13c284cd6f7d85c57597c4 Mon Sep 17 00:00:00 2001 From: Michal Babinski Date: Fri, 3 Nov 2023 14:04:01 -0700 Subject: [PATCH 31/91] remove superflous debugging --- nmdc_automation/re_iding/re_id_process.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/nmdc_automation/re_iding/re_id_process.py b/nmdc_automation/re_iding/re_id_process.py index 75e9c312..d8cbe001 100755 --- a/nmdc_automation/re_iding/re_id_process.py +++ b/nmdc_automation/re_iding/re_id_process.py @@ -438,7 +438,7 @@ def readbased_update(omics_record, template_file, omic_id, workflow_inputs): dobj_tmpl = get_associate_data_object_template( data_object["data_object_type"], assembly_template["Outputs"] ) - print(dobj_tmpl) + # generate new dataobject type new_do_id = minter("nmdc:DataObject") file_size = data_object["file_size_bytes"] @@ -446,7 +446,7 @@ def readbased_update(omics_record, template_file, omic_id, workflow_inputs): md5_sum = data_object["md5_checksum"] # get new values for slots new_description = re.sub("[^ ]+$", f"{omic_id}", data_object["description"]) - new_url, destination, _ = compute_new_paths( + new_url, _, _ = compute_new_paths( data_object["url"], new_readbased_base_dir, omic_id, new_act_id ) make_data_object( From 19f1d5f6311586a97fff2a61750147c9284952c9 Mon Sep 17 00:00:00 2001 From: Michal Babinski Date: Fri, 3 Nov 2023 14:04:28 -0700 Subject: [PATCH 32/91] add back read_based_taxonomy_analysis_activity_set --- .../re_iding/scripts/rebuild_metagenome_workflow_records.py | 5 +++-- 1 file changed, 3 insertions(+), 2 deletions(-) diff --git a/nmdc_automation/re_iding/scripts/rebuild_metagenome_workflow_records.py b/nmdc_automation/re_iding/scripts/rebuild_metagenome_workflow_records.py index b90621a8..fb5eca0a 100644 --- a/nmdc_automation/re_iding/scripts/rebuild_metagenome_workflow_records.py +++ b/nmdc_automation/re_iding/scripts/rebuild_metagenome_workflow_records.py @@ -100,11 +100,12 @@ def rebuild_workflow_records(study_id: str, site_config: bool): db.omics_processing_set.append(omics_processing_record) # downstream workflow activity sets - (read_qc_records, metagenome_assembly_records, - metagenome_annotation_records, mags_records) = [], [], [], [] + (read_qc_records, readbased_records, metagenome_assembly_records, + metagenome_annotation_records, mags_records) = [], [], [], [], [] downstream_workflow_activity_sets = { "read_qc_analysis_activity_set": read_qc_records, + "read_based_taxonomy_analysis_activity_set": readbased_records, "metagenome_assembly_set": metagenome_assembly_records, "metagenome_annotation_activity_set": metagenome_annotation_records, "mags_activity_set": mags_records, From 2960932661b5b48b7466dfc5a7150145b9355073 Mon Sep 17 00:00:00 2001 From: Michal Babinski Date: Fri, 3 Nov 2023 14:33:02 -0700 Subject: [PATCH 33/91] change centrifuge report do type --- configs/re_iding_worklfows.yaml | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/configs/re_iding_worklfows.yaml b/configs/re_iding_worklfows.yaml index b77da9a4..5f71a7da 100644 --- a/configs/re_iding_worklfows.yaml +++ b/configs/re_iding_worklfows.yaml @@ -156,9 +156,9 @@ Workflows: name: Centrifuge output read classification file suffix: _centrifuge_classification.tsv - output: final_centrifuge_report_tsv - data_object_type: Centrifuge output report file + data_object_type: Centrifuge Classification Report description: Centrifuge Report for {id} - name: Centrifuge Classification Report + name: Centrifuge output report file suffix: _centrifuge_report.tsv - output: final_centrifuge_krona_html data_object_type: Centrifuge Krona Plot From b88aa57ea9233d58d2552ea1cef9482b623b0b4c Mon Sep 17 00:00:00 2001 From: Michal Babinski Date: Mon, 6 Nov 2023 12:48:46 -0800 Subject: [PATCH 34/91] add example record --- ...zgv51_updated_records_example_dry_run.json | 243 ++++++++++++++++++ 1 file changed, 243 insertions(+) create mode 100644 nmdc_automation/re_iding/nmdc:sty-11-aygzgv51_updated_records_example_dry_run.json diff --git a/nmdc_automation/re_iding/nmdc:sty-11-aygzgv51_updated_records_example_dry_run.json b/nmdc_automation/re_iding/nmdc:sty-11-aygzgv51_updated_records_example_dry_run.json new file mode 100644 index 00000000..4d4beae5 --- /dev/null +++ b/nmdc_automation/re_iding/nmdc:sty-11-aygzgv51_updated_records_example_dry_run.json @@ -0,0 +1,243 @@ +{ + "data_object_set": [ + { + "id": "nmdc:dobj-12-3zw9ed72", + "name": "Reads QC result fastq (clean data)", + "description": "Filtered Reads for nmdc:omprc-11-bn8jcq58", + "file_size_bytes": 2571324879, + "md5_checksum": "7bf778baef033d36f118f8591256d6ef", + "data_object_type": "Filtered Sequencing Reads", + "url": "https://data.microbiomedata.org/data/nmdc:omprc-11-bn8jcq58/nmdc:wfrqc-12-7bfbg150/nmdc_wfrqc-12-7bfbg150_filtered.fastq.gz", + "type": "nmdc:DataObject" + }, + { + "id": "nmdc:dobj-12-f992g204", + "name": "Reads QC summary statistics", + "description": "Filtered Stats for nmdc:omprc-11-bn8jcq58", + "file_size_bytes": 290, + "md5_checksum": "b99ce8adc125c95f0bfdadf36a3f6848", + "data_object_type": "QC Statistics", + "url": "https://data.microbiomedata.org/data/nmdc:omprc-11-bn8jcq58/nmdc:wfrqc-12-7bfbg150/nmdc_wfrqc-12-7bfbg150_filterStats.txt", + "type": "nmdc:DataObject" + }, + { + "id": "nmdc:dobj-12-g19r1r98", + "name": "Final assembly contigs fasta", + "description": "Assembled contigs fasta for nmdc:omprc-11-bn8jcq58", + "file_size_bytes": 90794959, + "md5_checksum": "148dffaee63c7eccc62db4022d916fe1", + "data_object_type": "Assembly Contigs", + "url": "https://data.microbiomedata.org/data/nmdc:omprc-11-bn8jcq58/nmdc:wfmgas-12-nv5zhv63/nmdc:wfmgas-12-nv5zhv63_contigs.fna", + "type": "nmdc:DataObject" + }, + { + "id": "nmdc:dobj-12-6qs0sb92", + "name": "Final assembly scaffolds fasta", + "description": "Assembled scaffold fasta for nmdc:omprc-11-bn8jcq58", + "file_size_bytes": 90283295, + "md5_checksum": "5f66de5a1fa911f3f5e2e4027af8bb8c", + "data_object_type": "Assembly Scaffolds", + "url": "https://data.microbiomedata.org/data/nmdc:omprc-11-bn8jcq58/nmdc:wfmgas-12-nv5zhv63/nmdc:wfmgas-12-nv5zhv63_scaffolds.fna", + "type": "nmdc:DataObject" + }, + { + "id": "nmdc:dobj-12-0aqx6k97", + "name": "Assembled contigs coverage information", + "description": "Metagenome Contig Coverage Stats for nmdc:omprc-11-bn8jcq58", + "file_size_bytes": 14091491, + "md5_checksum": "b2c4779abd596ab9604b06687f804360", + "data_object_type": "Assembly Coverage Stats", + "url": "https://data.microbiomedata.org/data/nmdc:omprc-11-bn8jcq58/nmdc:wfmgas-12-nv5zhv63/nmdc:wfmgas-12-nv5zhv63_covstats.txt", + "type": "nmdc:DataObject" + }, + { + "id": "nmdc:dobj-12-mxghxv82", + "name": "An AGP format file that describes the assembly", + "description": "Assembled AGP file for nmdc:omprc-11-bn8jcq58", + "file_size_bytes": 13901555, + "md5_checksum": "88e65190df33ce4082a224075e8e0ff4", + "data_object_type": "Assembly AGP", + "url": "https://data.microbiomedata.org/data/nmdc:omprc-11-bn8jcq58/nmdc:wfmgas-12-nv5zhv63/nmdc:wfmgas-12-nv5zhv63_assembly.agp", + "type": "nmdc:DataObject" + }, + { + "id": "nmdc:dobj-12-evyw0b45", + "name": "Sorted bam file of reads mapping back to the final assembly", + "description": "Metagenome Alignment BAM file for nmdc:omprc-11-bn8jcq58", + "file_size_bytes": 0, + "md5_checksum": "d41d8cd98f00b204e9800998ecf8427e", + "data_object_type": "Assembly Coverage BAM", + "url": "https://data.microbiomedata.org/data/nmdc:omprc-11-bn8jcq58/nmdc:wfmgas-12-nv5zhv63/nmdc:wfmgas-12-nv5zhv63_pairedMapped_sorted.bam", + "type": "nmdc:DataObject" + }, + { + "id": "nmdc:dobj-12-a1c08w33", + "name": "GOTTCHA2 classification report file", + "description": "Gottcha2 TSV report for nmdc:omprc-11-bn8jcq58", + "file_size_bytes": 13174, + "md5_checksum": "bc7c1bda004aab357c8f6cf5a42242f9", + "data_object_type": "GOTTCHA2 Classification Report", + "url": "https://data.microbiomedata.org/data/nmdc:omprc-11-bn8jcq58/nmdc:wfrbt-12-bw74zf55/nmdc_wfrbt-12-bw74zf55_gottcha2_report.tsv", + "type": "nmdc:DataObject" + }, + { + "id": "nmdc:dobj-12-y7y6d813", + "name": "GOTTCHA2 report file", + "description": "Gottcha2 full TSV report for nmdc:omprc-11-bn8jcq58", + "file_size_bytes": 1035818, + "md5_checksum": "9481434cadd0d6c154e2ec4c11ef0e04", + "data_object_type": "GOTTCHA2 Report Full", + "url": "https://data.microbiomedata.org/data/nmdc:omprc-11-bn8jcq58/nmdc:wfrbt-12-bw74zf55/nmdc_wfrbt-12-bw74zf55_gottcha2_report_full.tsv", + "type": "nmdc:DataObject" + }, + { + "id": "nmdc:dobj-12-5aweg275", + "name": "GOTTCHA2 krona plot HTML file", + "description": "Gottcha2 Krona HTML report for nmdc:omprc-11-bn8jcq58", + "file_size_bytes": 262669, + "md5_checksum": "6b5bc6ce7f11c1336a5f85a98fc18541", + "data_object_type": "GOTTCHA2 Krona Plot", + "url": "https://data.microbiomedata.org/data/nmdc:omprc-11-bn8jcq58/nmdc:wfrbt-12-bw74zf55/nmdc_wfrbt-12-bw74zf55_gottcha2_krona.html", + "type": "nmdc:DataObject" + }, + { + "id": "nmdc:dobj-12-1xzbt082", + "name": "Centrifuge output read classification file", + "description": "Centrifuge classification TSV report for nmdc:omprc-11-bn8jcq58", + "file_size_bytes": 2189843623, + "md5_checksum": "933c71bbc2f4a2e84d50f0d3864cf940", + "data_object_type": "Centrifuge Taxonomic Classification", + "url": "https://data.microbiomedata.org/data/nmdc:omprc-11-bn8jcq58/nmdc:wfrbt-12-bw74zf55/nmdc_wfrbt-12-bw74zf55_centrifuge_classification.tsv", + "type": "nmdc:DataObject" + }, + { + "id": "nmdc:dobj-12-zaj6ec82", + "name": "Centrifuge output report file", + "description": "Centrifuge TSV report for nmdc:omprc-11-bn8jcq58", + "file_size_bytes": 260134, + "md5_checksum": "1a208e2519770ef50740ac39f1b9ba9a", + "data_object_type": "Centrifuge Classification Report", + "url": "https://data.microbiomedata.org/data/nmdc:omprc-11-bn8jcq58/nmdc:wfrbt-12-bw74zf55/nmdc_wfrbt-12-bw74zf55_centrifuge_report.tsv", + "type": "nmdc:DataObject" + }, + { + "id": "nmdc:dobj-12-gnrpbj27", + "name": "Centrifug krona plot HTML file", + "description": "Centrifuge Krona HTML report for nmdc:omprc-11-bn8jcq58", + "file_size_bytes": 2343980, + "md5_checksum": "f112a3840464ae7a9cf4a3bf295edd5c", + "data_object_type": "Centrifuge Krona Plot", + "url": "https://data.microbiomedata.org/data/nmdc:omprc-11-bn8jcq58/nmdc:wfrbt-12-bw74zf55/nmdc_wfrbt-12-bw74zf55_centrifuge_krona.html", + "type": "nmdc:DataObject" + }, + { + "id": "nmdc:dobj-12-avm4zb05", + "name": "Kraken2 output read classification file", + "description": "Kraken classification TSV report for nmdc:omprc-11-bn8jcq58", + "file_size_bytes": 1785563917, + "md5_checksum": "7ca01ea379f0baed96f87d1435925f95", + "data_object_type": "Kraken2 Taxonomic Classification", + "url": "https://data.microbiomedata.org/data/nmdc:omprc-11-bn8jcq58/nmdc:wfrbt-12-bw74zf55/nmdc_wfrbt-12-bw74zf55_kraken2_classification.tsv", + "type": "nmdc:DataObject" + }, + { + "id": "nmdc:dobj-12-mf77cq08", + "name": "Kraken2 output report file", + "description": "Kraken2 TSV report for nmdc:omprc-11-bn8jcq58", + "file_size_bytes": 699896, + "md5_checksum": "c85f2f2b4a518c4adb23970448a5cb45", + "data_object_type": "Kraken2 Classification Report", + "url": "https://data.microbiomedata.org/data/nmdc:omprc-11-bn8jcq58/nmdc:wfrbt-12-bw74zf55/nmdc_wfrbt-12-bw74zf55_kraken2_report.tsv", + "type": "nmdc:DataObject" + }, + { + "id": "nmdc:dobj-12-gz9j7220", + "name": "Kraken2 Krona plot HTML file", + "description": "Kraken2 Krona HTML report for nmdc:omprc-11-bn8jcq58", + "file_size_bytes": 4221977, + "md5_checksum": "94ee1bc2dc74830a21d5c3471d6cf223", + "data_object_type": "Kraken2 Krona Plot", + "url": "https://data.microbiomedata.org/data/nmdc:omprc-11-bn8jcq58/nmdc:wfrbt-12-bw74zf55/nmdc_wfrbt-12-bw74zf55_kraken2_krona.html", + "type": "nmdc:DataObject" + } + ], + "metagenome_assembly_set": [ + { + "id": "nmdc:wfmgas-12-nv5zhv63", + "name": "Metagenome Assembly Activity for nmdc:omprc-11-bn8jcq58", + "started_at_time": "2021-10-11T02:28:26Z", + "ended_at_time": "2021-10-11T04:56:04+00:00", + "was_informed_by": "nmdc:omprc-11-bn8jcq58", + "execution_resource": "NERSC - Perlmutter", + "git_url": "https://github.com/microbiomedata/metaAssembly", + "has_input": [ + "nmdc:dobj-12-3zw9ed72" + ], + "has_output": [ + "nmdc:dobj-12-g19r1r98", + "nmdc:dobj-12-6qs0sb92", + "nmdc:dobj-12-0aqx6k97", + "nmdc:dobj-12-mxghxv82", + "nmdc:dobj-12-evyw0b45" + ], + "type": "nmdc:MetagenomeAssembly", + "part_of": [ + "nmdc:omprc-11-bn8jcq58" + ], + "version": "v1.0.3" + } + ], + "read_qc_analysis_activity_set": [ + { + "id": "nmdc:wfrqc-12-7bfbg150", + "name": "Read QC Activity for nmdc:omprc-11-bn8jcq58", + "started_at_time": "2021-10-11T02:28:26Z", + "ended_at_time": "2021-10-11T04:56:04+00:00", + "was_informed_by": "nmdc:omprc-11-bn8jcq58", + "execution_resource": "NERSC - Perlmutter", + "git_url": "https://github.com/microbiomedata/ReadsQC", + "has_input": [ + "nmdc:30a06664f29cffbbbc49abad86eae6fc" + ], + "has_output": [ + "nmdc:dobj-12-3zw9ed72", + "nmdc:dobj-12-f992g204" + ], + "type": "nmdc:ReadQcAnalysisActivity", + "part_of": [ + "nmdc:omprc-11-bn8jcq58" + ], + "version": "v1.0.8" + } + ], + "read_based_taxonomy_analysis_activity_set": [ + { + "id": "nmdc:wfrbt-12-bw74zf55", + "name": "Readbased Taxonomy Analysis Activity for nmdc:omprc-11-bn8jcq58", + "started_at_time": "2021-10-11T02:28:26Z", + "ended_at_time": "2021-10-11T04:56:04+00:00", + "was_informed_by": "nmdc:omprc-11-bn8jcq58", + "execution_resource": "NERSC - Perlmutter", + "git_url": "https://github.com/microbiomedata/ReadbasedAnalysis", + "has_input": [ + "nmdc:dobj-12-3zw9ed72" + ], + "has_output": [ + "nmdc:dobj-12-a1c08w33", + "nmdc:dobj-12-y7y6d813", + "nmdc:dobj-12-5aweg275", + "nmdc:dobj-12-1xzbt082", + "nmdc:dobj-12-zaj6ec82", + "nmdc:dobj-12-gnrpbj27", + "nmdc:dobj-12-avm4zb05", + "nmdc:dobj-12-mf77cq08", + "nmdc:dobj-12-gz9j7220" + ], + "type": "nmdc:ReadBasedTaxonomyAnalysisActivity", + "part_of": [ + "nmdc:omprc-11-bn8jcq58" + ], + "version": "v1.0.5" + } + ] +} \ No newline at end of file From d5f113d377b77276083b0cb9a7249e1af4334068 Mon Sep 17 00:00:00 2001 From: Michal Babinski Date: Mon, 6 Nov 2023 12:48:57 -0800 Subject: [PATCH 35/91] added better logging --- nmdc_automation/re_iding/re_id_process.py | 106 +++++++++++++++------- 1 file changed, 72 insertions(+), 34 deletions(-) diff --git a/nmdc_automation/re_iding/re_id_process.py b/nmdc_automation/re_iding/re_id_process.py index d8cbe001..62b8e6c9 100755 --- a/nmdc_automation/re_iding/re_id_process.py +++ b/nmdc_automation/re_iding/re_id_process.py @@ -12,6 +12,7 @@ import nmdc_schema.nmdc as nmdc from linkml_runtime.dumpers import json_dumper import shutil +import logging ###GLOBAL###### nmdc_db = nmdc.Database() @@ -20,6 +21,16 @@ base_dir = "/global/cfs/cdirs/m3408/results" +logging.basicConfig( + level=logging.INFO, + format='%(asctime)s - %(name)s - %(levelname)s - %(message)s', + handlers=[ + logging.StreamHandler() + ] +) + +logger = logging.getLogger(__name__) + sets = [ "read_qc_analysis_activity_set", "metagenome_assembly_set", @@ -106,14 +117,14 @@ def compute_new_paths(old_url, new_base_dir, omic_id, act_id): try: os.link(old_file_path, destination) - print(f"Successfully created link between {old_file_path} and {destination}") + logging.info(f"Successfully created link between {old_file_path} and {destination}") except OSError as e: - print(f"An error occurred while linking the file: {e}") + logging.error(f"An error occurred while linking the file: {e}") except Exception as e: - print(f"Unexpected error: {e}") + logging.error(f"Unexpected error: {e}") - new_url = f"{base}/{omic_id}/{act_id}/{new_file_name}" - return new_url, destination, new_file_name + new_url = f"{base}/{omic_id}/{act_id}/{modified_new_file_name}" + return new_url, destination, modified_new_file_name def find_type(obj): @@ -158,7 +169,7 @@ def make_activity_set( database_activity_range( id=activity_id, name=workflow_record_template["Activity"]["name"].replace( - "{id}", activity_id + "{id}", omics_id ), git_url=workflow_record_template["Git_repo"], version=workflow_record_template["Version"], @@ -270,6 +281,8 @@ def reads_qc_update(omics_record, template_file, omic_id): nmdc_database_object: dump of nmdc object """ + logging.info(f"Updating Reads QC for {omic_id}") + workflow_type = "read_qc_analysis_activity_set" # extract needed metaadata reads_qc_record = get_record_by_type(omics_record, workflow_type) @@ -283,6 +296,15 @@ def reads_qc_update(omics_record, template_file, omic_id): new_qc_base_dir = os.path.join(base_dir, omic_id, new_act_id) os.makedirs(new_qc_base_dir, exist_ok=True) updated_has_output_list = [] + + # scrape useful old metadata + for rec in reads_qc_record: + has_input = rec["has_input"] + started_time = rec["started_at_time"] + ended_time = rec["ended_at_time"] + old_act_id = rec["id"] + + log_mapping("act_id", old_act_id, new_act_id) # hold input to downstream workflows input_to_downstream_workflows = [] @@ -292,6 +314,8 @@ def reads_qc_update(omics_record, template_file, omic_id): data_object["data_object_type"], reads_qc_template["Outputs"] ) new_do_id = minter("nmdc:DataObject") + #log the new id + log_mapping("dobj", data_object["id"], new_do_id) # save filtered reads if data_object["data_object_type"] == "Filtered Sequencing Reads": input_to_downstream_workflows.append(new_do_id) @@ -316,10 +340,6 @@ def reads_qc_update(omics_record, template_file, omic_id): updated_has_output_list.append(new_do_id) # make updated activity record - for rec in reads_qc_record: - has_input = rec["has_input"] - started_time = rec["started_at_time"] - ended_time = rec["ended_at_time"] # need to change has input to be updated as well, make_activity_set( omic_id, @@ -347,6 +367,8 @@ def assembly_update(omics_record, template_file, omic_id, workflow_inputs): nmdc_database_object: dump of nmdc object """ + logging.info(f"Updating Assembly for {omic_id}") + workflow_type = "metagenome_assembly_set" # extract needed metaadata assembly_record = get_record_by_type(omics_record, workflow_type) @@ -360,6 +382,14 @@ def assembly_update(omics_record, template_file, omic_id, workflow_inputs): new_asm_base_dir = os.path.join(base_dir, omic_id, new_act_id) os.makedirs(new_asm_base_dir, exist_ok=True) updated_has_output_list = [] + + #get useful old metadata + for rec in assembly_record: + started_time = rec["started_at_time"] + ended_time = rec["ended_at_time"] + old_act_id = rec["id"] + + log_mapping("act_id", old_act_id, new_act_id) # make new data objects for data_object in assembly_data_objects: @@ -369,6 +399,9 @@ def assembly_update(omics_record, template_file, omic_id, workflow_inputs): ) # generate new dataobject type new_do_id = minter("nmdc:DataObject") + #log the new id + log_mapping("dobj", data_object["id"], new_do_id) + #continue updating metdata for data objects new_description = re.sub("[^ ]+$", f"{omic_id}", data_object["description"]) new_url, destination, _ = get_new_paths( data_object["url"], new_asm_base_dir, omic_id, new_act_id @@ -389,9 +422,6 @@ def assembly_update(omics_record, template_file, omic_id, workflow_inputs): ) updated_has_output_list.append(new_do_id) - for rec in assembly_record: - started_time = rec["started_at_time"] - ended_time = rec["ended_at_time"] # need to change has input to be updated as well, make_activity_set( omic_id, @@ -417,6 +447,8 @@ def readbased_update(omics_record, template_file, omic_id, workflow_inputs): nmdc_database_object: dump of nmdc object """ + logging.info(f"Updating Readbased for {omic_id}") + workflow_type = "read_based_taxonomy_analysis_activity_set" # extract needed metaadata readbased_record = get_record_by_type(omics_record, workflow_type) @@ -430,6 +462,15 @@ def readbased_update(omics_record, template_file, omic_id, workflow_inputs): new_readbased_base_dir = os.path.join(base_dir, omic_id, new_act_id) os.makedirs(new_readbased_base_dir, exist_ok=True) updated_has_output_list = [] + + #get useful old metadata + for rec in readbased_record: + started_time = rec["started_at_time"] + ended_time = rec["ended_at_time"] + old_act_id = rec["id"] + + #log new activity id + log_mapping("act_id", old_act_id, new_act_id) # make new data objects for data_object in readbased_data_objects: @@ -441,6 +482,9 @@ def readbased_update(omics_record, template_file, omic_id, workflow_inputs): # generate new dataobject type new_do_id = minter("nmdc:DataObject") + #log the new id + log_mapping("dobj", data_object["id"], new_do_id) + #continue updating metdata for data objects file_size = data_object["file_size_bytes"] data_object_type = data_object["data_object_type"] md5_sum = data_object["md5_checksum"] @@ -460,10 +504,7 @@ def readbased_update(omics_record, template_file, omic_id, workflow_inputs): ) updated_has_output_list.append(new_do_id) - for rec in readbased_record: - started_time = rec["started_at_time"] - ended_time = rec["ended_at_time"] - + #make updated activity record make_activity_set( omic_id, new_act_id, @@ -475,46 +516,43 @@ def readbased_update(omics_record, template_file, omic_id, workflow_inputs): ) -def process_analysis_sets(study_records, template_file, dry_run=False): +def process_analysis_sets(study_id,study_records, template_file, dry_run=False): count = 0 for omic_record in study_records: omics_id = get_omics_id(omic_record) - print(omics_id) + logging.info(f"Starting re-iding process for {omics_id}") + log_mapping("omics_id", "gold", omics_id) downstream_input, destination = reads_qc_update( omic_record, template_file, omics_id ) assembly_update(omic_record, template_file, omics_id, downstream_input) readbased_update(omic_record, template_file, omics_id, downstream_input) nmdc_database_object = json.loads(json_dumper.dumps(nmdc_db, inject_type=False)) - print(nmdc_database_object) + logging.info(f"Writing nmdc database object dump to json file for {study_id}") + with open(f'{study_id}_updated_records.json', 'w') as json_file: + json.dump(nmdc_database_object, json_file, indent=4) if dry_run == True: count += 1 dir_path = os.path.dirname(destination) parent_dir_path = os.path.dirname(dir_path) try: - # shutil.rmtree(parent_dir_path) - print( - f"Directory {parent_dir_path} and all its contents removed successfully!" + shutil.rmtree(parent_dir_path) + logging.info( + f"Running in dry_run mode, directory {parent_dir_path} and all its contents removed successfully!" ) except OSError as e: - print(f"Error: {e}") + logging.info(f"Error: {e}") if count == 1: break -def main(): - # TODO - # 1. Read in json dump of analysis records - # 2. Process records for reads qc - generate new metadata, make new records and data objects (this will include file copies and renaming) - # 3. save data of updated reads qc records - # 4. Fetch old records for readbased analysis and assembly, generate new metadata, make new records and data objects (this will include file copies and renaming files and ids in files) - # 5. Validate new records, submit them via runtime api - # 6. Write seperate process to delete old records once we have - pass +def main(study_id,study_data, template_file, dry_run=False): + process_analysis_sets(study_id,study_data, template_file, dry_run) if __name__ == "__main__": test_file = "scripts/nmdc:sty-11-aygzgv51_assocated_record_dump.json" + study_id = "nmdc:sty-11-aygzgv51" template_file = "../../configs/re_iding_worklfows.yaml" stegen_data = read_json_file(test_file) - process_analysis_sets(stegen_data, template_file, dry_run=True) + main(study_id,stegen_data, template_file, dry_run=True) From 5cb1b7b97fe33639360be78d317fc5928cca4452 Mon Sep 17 00:00:00 2001 From: Michael Thornton Date: Tue, 7 Nov 2023 09:15:41 -0800 Subject: [PATCH 36/91] Rename script --- ... => extract_metagenome_workflow_records.py} | 18 ++++++++++-------- 1 file changed, 10 insertions(+), 8 deletions(-) rename nmdc_automation/re_iding/scripts/{rebuild_metagenome_workflow_records.py => extract_metagenome_workflow_records.py} (89%) diff --git a/nmdc_automation/re_iding/scripts/rebuild_metagenome_workflow_records.py b/nmdc_automation/re_iding/scripts/extract_metagenome_workflow_records.py similarity index 89% rename from nmdc_automation/re_iding/scripts/rebuild_metagenome_workflow_records.py rename to nmdc_automation/re_iding/scripts/extract_metagenome_workflow_records.py index fb5eca0a..b655e534 100644 --- a/nmdc_automation/re_iding/scripts/rebuild_metagenome_workflow_records.py +++ b/nmdc_automation/re_iding/scripts/extract_metagenome_workflow_records.py @@ -1,9 +1,9 @@ #!/usr/bin/env python3 # coding: utf-8 -# nmdc_schema/napa_compliance/scripts/rebuild_metagenome_workflow_records.py +# nmdc_schema/napa_compliance/scripts/extract_metagenome_workflow_records.py """ -rebuild_metagenome_workflow_records.py: Rebuild metagenome workflow records -after re-ID-ing of OmicsProcessing records. +extract_metagenome_workflow_records.py: Extract metagenome workflow records +for re-ID-ing of OmicsProcessing records. """ import logging import time @@ -50,9 +50,9 @@ def _get_legacy_id(omics_processing_record: dict) -> str: "--site_config", type=click.Path(exists=True), default=NAPA_CONFIG, help="Site configuration file" ) -def rebuild_workflow_records(study_id: str, site_config: bool): +def extract_workflow_records(study_id: str, site_config: bool): """ - Rebuild metagenome workflow records after re-ID-ing of Study, Biosample, and + Extract metagenome workflow records for re-ID-ing of Study, Biosample, and OmicsProcessing records by: 1. Retrieving all OmicsProcessing records for updated study ID 2. For each OmicsProcessing record, retrieve the corresponding @@ -63,8 +63,10 @@ def rebuild_workflow_records(study_id: str, site_config: bool): d. MetagenomeAnnotationActivity e. MagsAnalysisActivity 3. For each WorkflowExecutionActivity record: - TODO - summarize Michal's flowchart here - + a. Retrieve the corresponding DataObject records + 4. Create a database object for each OmicsProcessing record and its + associated WorkflowExecutionActivity and DataObject records + 5. Write the database object to a JSON file """ start_time = time.time() logging.info("starting missing_neon_soils_ecosystem_data.py...") @@ -130,4 +132,4 @@ def rebuild_workflow_records(study_id: str, site_config: bool): if __name__ == "__main__": - rebuild_workflow_records() + extract_workflow_records() From f38ee0effdf9314bb9bed905b8ae895d07d051f4 Mon Sep 17 00:00:00 2001 From: Michael Thornton Date: Tue, 7 Nov 2023 11:11:43 -0800 Subject: [PATCH 37/91] update to extract data object for omics processing has_output --- .../extract_metagenome_workflow_records.py | 5 + ...sty-11-aygzgv51_assocated_record_dump.json | 6790 ++--------------- 2 files changed, 801 insertions(+), 5994 deletions(-) diff --git a/nmdc_automation/re_iding/scripts/extract_metagenome_workflow_records.py b/nmdc_automation/re_iding/scripts/extract_metagenome_workflow_records.py index b655e534..ebf25a64 100644 --- a/nmdc_automation/re_iding/scripts/extract_metagenome_workflow_records.py +++ b/nmdc_automation/re_iding/scripts/extract_metagenome_workflow_records.py @@ -100,6 +100,11 @@ def extract_workflow_records(study_id: str, site_config: bool): f"is not a Metagenome") continue db.omics_processing_set.append(omics_processing_record) + for data_object_id in omics_processing_record["has_output"]: + data_object_record = query_api_client.get_data_object_by_id( + data_object_id + ) + db.data_object_set.append(data_object_record) # downstream workflow activity sets (read_qc_records, readbased_records, metagenome_assembly_records, diff --git a/nmdc_automation/re_iding/scripts/nmdc:sty-11-aygzgv51_assocated_record_dump.json b/nmdc_automation/re_iding/scripts/nmdc:sty-11-aygzgv51_assocated_record_dump.json index 37a912d9..cc5b8c9b 100644 --- a/nmdc_automation/re_iding/scripts/nmdc:sty-11-aygzgv51_assocated_record_dump.json +++ b/nmdc_automation/re_iding/scripts/nmdc:sty-11-aygzgv51_assocated_record_dump.json @@ -7,6 +7,13 @@ "activity_set": [], "biosample_set": [], "data_object_set": [ + { + "description": "Raw sequencer read data", + "file_size_bytes": 2861414297, + "type": "nmdc:DataObject", + "id": "jgi:55d740280d8785342fcf7e39", + "name": "9422.8.132674.GTTTCG.fastq.gz" + }, { "name": "Gp0115663_Filtered Reads", "description": "Filtered Reads for Gp0115663", @@ -104,85 +111,6 @@ "id": "nmdc:94ee1bc2dc74830a21d5c3471d6cf223", "file_size_bytes": 4221977 }, - { - "name": "Gp0115663_Gottcha2 TSV report", - "description": "Gottcha2 TSV report for Gp0115663", - "url": "https://data.microbiomedata.org/data/nmdc:mga0h9dt75/ReadbasedAnalysis/nmdc_mga0h9dt75_gottcha2_report.tsv", - "md5_checksum": "bc7c1bda004aab357c8f6cf5a42242f9", - "id": "nmdc:bc7c1bda004aab357c8f6cf5a42242f9", - "file_size_bytes": 13174 - }, - { - "name": "Gp0115663_Gottcha2 full TSV report", - "description": "Gottcha2 full TSV report for Gp0115663", - "url": "https://data.microbiomedata.org/data/nmdc:mga0h9dt75/ReadbasedAnalysis/nmdc_mga0h9dt75_gottcha2_report_full.tsv", - "md5_checksum": "9481434cadd0d6c154e2ec4c11ef0e04", - "id": "nmdc:9481434cadd0d6c154e2ec4c11ef0e04", - "file_size_bytes": 1035818 - }, - { - "name": "Gp0115663_Gottcha2 Krona HTML report", - "description": "Gottcha2 Krona HTML report for Gp0115663", - "data_object_type": "GOTTCHA2 Krona Plot", - "url": "https://data.microbiomedata.org/data/nmdc:mga0h9dt75/ReadbasedAnalysis/nmdc_mga0h9dt75_gottcha2_krona.html", - "md5_checksum": "6b5bc6ce7f11c1336a5f85a98fc18541", - "id": "nmdc:6b5bc6ce7f11c1336a5f85a98fc18541", - "file_size_bytes": 262669 - }, - { - "name": "Gp0115663_Centrifuge classification TSV report", - "description": "Centrifuge classification TSV report for Gp0115663", - "data_object_type": "Centrifuge Taxonomic Classification", - "url": "https://data.microbiomedata.org/data/nmdc:mga0h9dt75/ReadbasedAnalysis/nmdc_mga0h9dt75_centrifuge_classification.tsv", - "md5_checksum": "933c71bbc2f4a2e84d50f0d3864cf940", - "id": "nmdc:933c71bbc2f4a2e84d50f0d3864cf940", - "file_size_bytes": 2189843623 - }, - { - "name": "Gp0115663_Centrifuge TSV report", - "description": "Centrifuge TSV report for Gp0115663", - "data_object_type": "Centrifuge Classification Report", - "url": "https://data.microbiomedata.org/data/nmdc:mga0h9dt75/ReadbasedAnalysis/nmdc_mga0h9dt75_centrifuge_report.tsv", - "md5_checksum": "1a208e2519770ef50740ac39f1b9ba9a", - "id": "nmdc:1a208e2519770ef50740ac39f1b9ba9a", - "file_size_bytes": 260134 - }, - { - "name": "Gp0115663_Centrifuge Krona HTML report", - "description": "Centrifuge Krona HTML report for Gp0115663", - "data_object_type": "Centrifuge Krona Plot", - "url": "https://data.microbiomedata.org/data/nmdc:mga0h9dt75/ReadbasedAnalysis/nmdc_mga0h9dt75_centrifuge_krona.html", - "md5_checksum": "f112a3840464ae7a9cf4a3bf295edd5c", - "id": "nmdc:f112a3840464ae7a9cf4a3bf295edd5c", - "file_size_bytes": 2343980 - }, - { - "name": "Gp0115663_Kraken classification TSV report", - "description": "Kraken classification TSV report for Gp0115663", - "data_object_type": "Kraken2 Taxonomic Classification", - "url": "https://data.microbiomedata.org/data/nmdc:mga0h9dt75/ReadbasedAnalysis/nmdc_mga0h9dt75_kraken2_classification.tsv", - "md5_checksum": "7ca01ea379f0baed96f87d1435925f95", - "id": "nmdc:7ca01ea379f0baed96f87d1435925f95", - "file_size_bytes": 1785563917 - }, - { - "name": "Gp0115663_Kraken2 TSV report", - "description": "Kraken2 TSV report for Gp0115663", - "data_object_type": "Kraken2 Classification Report", - "url": "https://data.microbiomedata.org/data/nmdc:mga0h9dt75/ReadbasedAnalysis/nmdc_mga0h9dt75_kraken2_report.tsv", - "md5_checksum": "c85f2f2b4a518c4adb23970448a5cb45", - "id": "nmdc:c85f2f2b4a518c4adb23970448a5cb45", - "file_size_bytes": 699896 - }, - { - "name": "Gp0115663_Kraken2 Krona HTML report", - "description": "Kraken2 Krona HTML report for Gp0115663", - "data_object_type": "Kraken2 Krona Plot", - "url": "https://data.microbiomedata.org/data/nmdc:mga0h9dt75/ReadbasedAnalysis/nmdc_mga0h9dt75_kraken2_krona.html", - "md5_checksum": "94ee1bc2dc74830a21d5c3471d6cf223", - "id": "nmdc:94ee1bc2dc74830a21d5c3471d6cf223", - "file_size_bytes": 4221977 - }, { "name": "Gp0115663_Assembled contigs fasta", "description": "Assembled contigs fasta for Gp0115663", @@ -707,39 +635,7 @@ "collecting_biosamples_from_site_set": [], "date_created": null, "etl_software_version": null, - "pooling_set": [], - "read_based_analysis_activity_set": [ - { - "_id": { - "$oid": "61e71a31833bcf838a701ec1" - }, - "has_input": [ - "nmdc:7bf778baef033d36f118f8591256d6ef" - ], - "part_of": [ - "nmdc:mga0h9dt75" - ], - "git_url": "https://github.com/microbiomedata/metaG/releases/tag/0.1", - "has_output": [ - "nmdc:bc7c1bda004aab357c8f6cf5a42242f9", - "nmdc:9481434cadd0d6c154e2ec4c11ef0e04", - "nmdc:6b5bc6ce7f11c1336a5f85a98fc18541", - "nmdc:933c71bbc2f4a2e84d50f0d3864cf940", - "nmdc:1a208e2519770ef50740ac39f1b9ba9a", - "nmdc:f112a3840464ae7a9cf4a3bf295edd5c", - "nmdc:7ca01ea379f0baed96f87d1435925f95", - "nmdc:c85f2f2b4a518c4adb23970448a5cb45", - "nmdc:94ee1bc2dc74830a21d5c3471d6cf223" - ], - "was_informed_by": "gold:Gp0115663", - "id": "nmdc:b31abf9d7fe53e2f802bb53e2d13542b", - "execution_resource": "NERSC-Cori", - "name": "ReadBased Analysis Activity for nmdc:mga0h9dt75", - "started_at_time": "2021-10-11T02:28:26Z", - "type": "nmdc:ReadbasedAnalysis", - "ended_at_time": "2021-10-11T04:56:04+00:00" - } - ] + "pooling_set": [] }, { "functional_annotation_agg": [], @@ -749,6 +645,13 @@ "activity_set": [], "biosample_set": [], "data_object_set": [ + { + "description": "Raw sequencer read data", + "file_size_bytes": 2080914094, + "type": "nmdc:DataObject", + "id": "jgi:55d817fc0d8785342fcf8274", + "name": "9387.2.132031.CCGTCC.fastq.gz" + }, { "name": "Gp0115666_Filtered Reads", "description": "Filtered Reads for Gp0115666", @@ -846,85 +749,6 @@ "id": "nmdc:b5091cfeed4fbea8316e50fbceea89bc", "file_size_bytes": 3983935 }, - { - "name": "Gp0115666_Gottcha2 TSV report", - "description": "Gottcha2 TSV report for Gp0115666", - "url": "https://data.microbiomedata.org/data/nmdc:mga0eehe16/ReadbasedAnalysis/nmdc_mga0eehe16_gottcha2_report.tsv", - "md5_checksum": "17454627f873cc37e80700c4751c81d6", - "id": "nmdc:17454627f873cc37e80700c4751c81d6", - "file_size_bytes": 10721 - }, - { - "name": "Gp0115666_Gottcha2 full TSV report", - "description": "Gottcha2 full TSV report for Gp0115666", - "url": "https://data.microbiomedata.org/data/nmdc:mga0eehe16/ReadbasedAnalysis/nmdc_mga0eehe16_gottcha2_report_full.tsv", - "md5_checksum": "e0479eb7fd3345aaf134640e0b9e11b0", - "id": "nmdc:e0479eb7fd3345aaf134640e0b9e11b0", - "file_size_bytes": 920924 - }, - { - "name": "Gp0115666_Gottcha2 Krona HTML report", - "description": "Gottcha2 Krona HTML report for Gp0115666", - "data_object_type": "GOTTCHA2 Krona Plot", - "url": "https://data.microbiomedata.org/data/nmdc:mga0eehe16/ReadbasedAnalysis/nmdc_mga0eehe16_gottcha2_krona.html", - "md5_checksum": "a8433a0b17d7380fc836e4c9f85a7a54", - "id": "nmdc:a8433a0b17d7380fc836e4c9f85a7a54", - "file_size_bytes": 257441 - }, - { - "name": "Gp0115666_Centrifuge classification TSV report", - "description": "Centrifuge classification TSV report for Gp0115666", - "data_object_type": "Centrifuge Taxonomic Classification", - "url": "https://data.microbiomedata.org/data/nmdc:mga0eehe16/ReadbasedAnalysis/nmdc_mga0eehe16_centrifuge_classification.tsv", - "md5_checksum": "9e061ad19d4a6a3f209d1992d02df9f9", - "id": "nmdc:9e061ad19d4a6a3f209d1992d02df9f9", - "file_size_bytes": 1468295025 - }, - { - "name": "Gp0115666_Centrifuge TSV report", - "description": "Centrifuge TSV report for Gp0115666", - "data_object_type": "Centrifuge Classification Report", - "url": "https://data.microbiomedata.org/data/nmdc:mga0eehe16/ReadbasedAnalysis/nmdc_mga0eehe16_centrifuge_report.tsv", - "md5_checksum": "1d46eebd0f194f57dd9e92c9bc992891", - "id": "nmdc:1d46eebd0f194f57dd9e92c9bc992891", - "file_size_bytes": 257081 - }, - { - "name": "Gp0115666_Centrifuge Krona HTML report", - "description": "Centrifuge Krona HTML report for Gp0115666", - "data_object_type": "Centrifuge Krona Plot", - "url": "https://data.microbiomedata.org/data/nmdc:mga0eehe16/ReadbasedAnalysis/nmdc_mga0eehe16_centrifuge_krona.html", - "md5_checksum": "e5227b1cfdbc266c44d23028c92150a9", - "id": "nmdc:e5227b1cfdbc266c44d23028c92150a9", - "file_size_bytes": 2331968 - }, - { - "name": "Gp0115666_Kraken classification TSV report", - "description": "Kraken classification TSV report for Gp0115666", - "data_object_type": "Kraken2 Taxonomic Classification", - "url": "https://data.microbiomedata.org/data/nmdc:mga0eehe16/ReadbasedAnalysis/nmdc_mga0eehe16_kraken2_classification.tsv", - "md5_checksum": "05f7680c6646904cfb16fc146c0fed4a", - "id": "nmdc:05f7680c6646904cfb16fc146c0fed4a", - "file_size_bytes": 1204548180 - }, - { - "name": "Gp0115666_Kraken2 TSV report", - "description": "Kraken2 TSV report for Gp0115666", - "data_object_type": "Kraken2 Classification Report", - "url": "https://data.microbiomedata.org/data/nmdc:mga0eehe16/ReadbasedAnalysis/nmdc_mga0eehe16_kraken2_report.tsv", - "md5_checksum": "368cf81424348cdf46d17c13908280e7", - "id": "nmdc:368cf81424348cdf46d17c13908280e7", - "file_size_bytes": 653697 - }, - { - "name": "Gp0115666_Kraken2 Krona HTML report", - "description": "Kraken2 Krona HTML report for Gp0115666", - "data_object_type": "Kraken2 Krona Plot", - "url": "https://data.microbiomedata.org/data/nmdc:mga0eehe16/ReadbasedAnalysis/nmdc_mga0eehe16_kraken2_krona.html", - "md5_checksum": "b5091cfeed4fbea8316e50fbceea89bc", - "id": "nmdc:b5091cfeed4fbea8316e50fbceea89bc", - "file_size_bytes": 3983935 - }, { "name": "Gp0115666_Assembled contigs fasta", "description": "Assembled contigs fasta for Gp0115666", @@ -1389,39 +1213,7 @@ "collecting_biosamples_from_site_set": [], "date_created": null, "etl_software_version": null, - "pooling_set": [], - "read_based_analysis_activity_set": [ - { - "_id": { - "$oid": "61e71a15833bcf838a701c88" - }, - "has_input": [ - "nmdc:0b301d2dd917c2be31422dd0e986dd5e" - ], - "part_of": [ - "nmdc:mga0eehe16" - ], - "git_url": "https://github.com/microbiomedata/metaG/releases/tag/0.1", - "has_output": [ - "nmdc:17454627f873cc37e80700c4751c81d6", - "nmdc:e0479eb7fd3345aaf134640e0b9e11b0", - "nmdc:a8433a0b17d7380fc836e4c9f85a7a54", - "nmdc:9e061ad19d4a6a3f209d1992d02df9f9", - "nmdc:1d46eebd0f194f57dd9e92c9bc992891", - "nmdc:e5227b1cfdbc266c44d23028c92150a9", - "nmdc:05f7680c6646904cfb16fc146c0fed4a", - "nmdc:368cf81424348cdf46d17c13908280e7", - "nmdc:b5091cfeed4fbea8316e50fbceea89bc" - ], - "was_informed_by": "gold:Gp0115666", - "id": "nmdc:db181675b157d27d9b0b2f35b5cbf03e", - "execution_resource": "NERSC-Cori", - "name": "ReadBased Analysis Activity for nmdc:mga0eehe16", - "started_at_time": "2021-10-11T02:28:09Z", - "type": "nmdc:ReadbasedAnalysis", - "ended_at_time": "2021-10-11T04:06:19+00:00" - } - ] + "pooling_set": [] }, { "functional_annotation_agg": [], @@ -1431,6 +1223,13 @@ "activity_set": [], "biosample_set": [], "data_object_set": [ + { + "description": "Raw sequencer read data", + "file_size_bytes": 3050291373, + "type": "nmdc:DataObject", + "id": "jgi:55d740240d8785342fcf7e37", + "name": "9422.8.132674.GTGGCC.fastq.gz" + }, { "name": "Gp0115668_Filtered Reads", "description": "Filtered Reads for Gp0115668", @@ -1529,101 +1328,22 @@ "file_size_bytes": 4276256 }, { - "name": "Gp0115668_Gottcha2 TSV report", - "description": "Gottcha2 TSV report for Gp0115668", - "url": "https://data.microbiomedata.org/data/nmdc:mga0n66h21/ReadbasedAnalysis/nmdc_mga0n66h21_gottcha2_report.tsv", - "md5_checksum": "8bdf8bbee24242aaaee763c1d851c05e", - "id": "nmdc:8bdf8bbee24242aaaee763c1d851c05e", - "file_size_bytes": 13875 + "name": "Gp0115668_Assembled contigs fasta", + "description": "Assembled contigs fasta for Gp0115668", + "data_object_type": "Assembly Contigs", + "url": "https://data.microbiomedata.org/data/nmdc:mga0n66h21/assembly/nmdc_mga0n66h21_contigs.fna", + "md5_checksum": "b2b862aede4f333acec79aac3afc7254", + "id": "nmdc:b2b862aede4f333acec79aac3afc7254", + "file_size_bytes": 182488593 }, { - "name": "Gp0115668_Gottcha2 full TSV report", - "description": "Gottcha2 full TSV report for Gp0115668", - "url": "https://data.microbiomedata.org/data/nmdc:mga0n66h21/ReadbasedAnalysis/nmdc_mga0n66h21_gottcha2_report_full.tsv", - "md5_checksum": "2529ede10eb159148711d016ec022af3", - "id": "nmdc:2529ede10eb159148711d016ec022af3", - "file_size_bytes": 956974 - }, - { - "name": "Gp0115668_Gottcha2 Krona HTML report", - "description": "Gottcha2 Krona HTML report for Gp0115668", - "data_object_type": "GOTTCHA2 Krona Plot", - "url": "https://data.microbiomedata.org/data/nmdc:mga0n66h21/ReadbasedAnalysis/nmdc_mga0n66h21_gottcha2_krona.html", - "md5_checksum": "a0631ed87dc2e7c69355ef575dbe4e60", - "id": "nmdc:a0631ed87dc2e7c69355ef575dbe4e60", - "file_size_bytes": 265076 - }, - { - "name": "Gp0115668_Centrifuge classification TSV report", - "description": "Centrifuge classification TSV report for Gp0115668", - "data_object_type": "Centrifuge Taxonomic Classification", - "url": "https://data.microbiomedata.org/data/nmdc:mga0n66h21/ReadbasedAnalysis/nmdc_mga0n66h21_centrifuge_classification.tsv", - "md5_checksum": "93d26b69073bd4d6283aee3c7e5997d4", - "id": "nmdc:93d26b69073bd4d6283aee3c7e5997d4", - "file_size_bytes": 2377445510 - }, - { - "name": "Gp0115668_Centrifuge TSV report", - "description": "Centrifuge TSV report for Gp0115668", - "data_object_type": "Centrifuge Classification Report", - "url": "https://data.microbiomedata.org/data/nmdc:mga0n66h21/ReadbasedAnalysis/nmdc_mga0n66h21_centrifuge_report.tsv", - "md5_checksum": "d7a49bf0d9797a2b603643a2de896b5c", - "id": "nmdc:d7a49bf0d9797a2b603643a2de896b5c", - "file_size_bytes": 258291 - }, - { - "name": "Gp0115668_Centrifuge Krona HTML report", - "description": "Centrifuge Krona HTML report for Gp0115668", - "data_object_type": "Centrifuge Krona Plot", - "url": "https://data.microbiomedata.org/data/nmdc:mga0n66h21/ReadbasedAnalysis/nmdc_mga0n66h21_centrifuge_krona.html", - "md5_checksum": "890f9f52d828e1ea8277b52566763069", - "id": "nmdc:890f9f52d828e1ea8277b52566763069", - "file_size_bytes": 2333775 - }, - { - "name": "Gp0115668_Kraken classification TSV report", - "description": "Kraken classification TSV report for Gp0115668", - "data_object_type": "Kraken2 Taxonomic Classification", - "url": "https://data.microbiomedata.org/data/nmdc:mga0n66h21/ReadbasedAnalysis/nmdc_mga0n66h21_kraken2_classification.tsv", - "md5_checksum": "371b7fabbcbc2d22c3ca84b422a88863", - "id": "nmdc:371b7fabbcbc2d22c3ca84b422a88863", - "file_size_bytes": 1966520263 - }, - { - "name": "Gp0115668_Kraken2 TSV report", - "description": "Kraken2 TSV report for Gp0115668", - "data_object_type": "Kraken2 Classification Report", - "url": "https://data.microbiomedata.org/data/nmdc:mga0n66h21/ReadbasedAnalysis/nmdc_mga0n66h21_kraken2_report.tsv", - "md5_checksum": "8677985c5e8ad92dd6d051f85950a636", - "id": "nmdc:8677985c5e8ad92dd6d051f85950a636", - "file_size_bytes": 707661 - }, - { - "name": "Gp0115668_Kraken2 Krona HTML report", - "description": "Kraken2 Krona HTML report for Gp0115668", - "data_object_type": "Kraken2 Krona Plot", - "url": "https://data.microbiomedata.org/data/nmdc:mga0n66h21/ReadbasedAnalysis/nmdc_mga0n66h21_kraken2_krona.html", - "md5_checksum": "9b2f355a4c2ff3651a3d1179212e2914", - "id": "nmdc:9b2f355a4c2ff3651a3d1179212e2914", - "file_size_bytes": 4276256 - }, - { - "name": "Gp0115668_Assembled contigs fasta", - "description": "Assembled contigs fasta for Gp0115668", - "data_object_type": "Assembly Contigs", - "url": "https://data.microbiomedata.org/data/nmdc:mga0n66h21/assembly/nmdc_mga0n66h21_contigs.fna", - "md5_checksum": "b2b862aede4f333acec79aac3afc7254", - "id": "nmdc:b2b862aede4f333acec79aac3afc7254", - "file_size_bytes": 182488593 - }, - { - "name": "Gp0115668_Assembled scaffold fasta", - "description": "Assembled scaffold fasta for Gp0115668", - "data_object_type": "Assembly Scaffolds", - "url": "https://data.microbiomedata.org/data/nmdc:mga0n66h21/assembly/nmdc_mga0n66h21_scaffolds.fna", - "md5_checksum": "15d4494dad1e12523aa9afb56b1e7cdb", - "id": "nmdc:15d4494dad1e12523aa9afb56b1e7cdb", - "file_size_bytes": 181514952 + "name": "Gp0115668_Assembled scaffold fasta", + "description": "Assembled scaffold fasta for Gp0115668", + "data_object_type": "Assembly Scaffolds", + "url": "https://data.microbiomedata.org/data/nmdc:mga0n66h21/assembly/nmdc_mga0n66h21_scaffolds.fna", + "md5_checksum": "15d4494dad1e12523aa9afb56b1e7cdb", + "id": "nmdc:15d4494dad1e12523aa9afb56b1e7cdb", + "file_size_bytes": 181514952 }, { "name": "Gp0115668_Metagenome Contig Coverage Stats", @@ -2093,39 +1813,7 @@ "collecting_biosamples_from_site_set": [], "date_created": null, "etl_software_version": null, - "pooling_set": [], - "read_based_analysis_activity_set": [ - { - "_id": { - "$oid": "61e71a4d833bcf838a7021ce" - }, - "has_input": [ - "nmdc:121b1c25e803f2a010ae5a2206a8d1d2" - ], - "part_of": [ - "nmdc:mga0n66h21" - ], - "git_url": "https://github.com/microbiomedata/metaG/releases/tag/0.1", - "has_output": [ - "nmdc:8bdf8bbee24242aaaee763c1d851c05e", - "nmdc:2529ede10eb159148711d016ec022af3", - "nmdc:a0631ed87dc2e7c69355ef575dbe4e60", - "nmdc:93d26b69073bd4d6283aee3c7e5997d4", - "nmdc:d7a49bf0d9797a2b603643a2de896b5c", - "nmdc:890f9f52d828e1ea8277b52566763069", - "nmdc:371b7fabbcbc2d22c3ca84b422a88863", - "nmdc:8677985c5e8ad92dd6d051f85950a636", - "nmdc:9b2f355a4c2ff3651a3d1179212e2914" - ], - "was_informed_by": "gold:Gp0115668", - "id": "nmdc:0aec70779dcdcc1b577d7e372c0eca87", - "execution_resource": "NERSC-Cori", - "name": "ReadBased Analysis Activity for nmdc:mga0n66h21", - "started_at_time": "2021-10-11T02:28:43Z", - "type": "nmdc:ReadbasedAnalysis", - "ended_at_time": "2021-10-11T05:19:17+00:00" - } - ] + "pooling_set": [] }, { "functional_annotation_agg": [], @@ -2135,6 +1823,13 @@ "activity_set": [], "biosample_set": [], "data_object_set": [ + { + "description": "Raw sequencer read data", + "file_size_bytes": 6863035214, + "type": "nmdc:DataObject", + "id": "jgi:55a9cb010d87852b21508920", + "name": "9289.1.128215.GGACTCC-AGAGTAG.fastq.gz" + }, { "name": "Gp0115679_Filtered Reads", "description": "Filtered Reads for Gp0115679", @@ -2232,85 +1927,6 @@ "id": "nmdc:77860ee043ae9738e7702a3f665b15fa", "file_size_bytes": 4358324 }, - { - "name": "Gp0115679_Gottcha2 TSV report", - "description": "Gottcha2 TSV report for Gp0115679", - "url": "https://data.microbiomedata.org/data/nmdc:mga0gg1q48/ReadbasedAnalysis/nmdc_mga0gg1q48_gottcha2_report.tsv", - "md5_checksum": "e20f8c00473472fa073adde871860801", - "id": "nmdc:e20f8c00473472fa073adde871860801", - "file_size_bytes": 18551 - }, - { - "name": "Gp0115679_Gottcha2 full TSV report", - "description": "Gottcha2 full TSV report for Gp0115679", - "url": "https://data.microbiomedata.org/data/nmdc:mga0gg1q48/ReadbasedAnalysis/nmdc_mga0gg1q48_gottcha2_report_full.tsv", - "md5_checksum": "52f8c91d04e8d179af98e7fac35a8ff1", - "id": "nmdc:52f8c91d04e8d179af98e7fac35a8ff1", - "file_size_bytes": 1200541 - }, - { - "name": "Gp0115679_Gottcha2 Krona HTML report", - "description": "Gottcha2 Krona HTML report for Gp0115679", - "data_object_type": "GOTTCHA2 Krona Plot", - "url": "https://data.microbiomedata.org/data/nmdc:mga0gg1q48/ReadbasedAnalysis/nmdc_mga0gg1q48_gottcha2_krona.html", - "md5_checksum": "f721d9dd168b0dea080b191a4396167e", - "id": "nmdc:f721d9dd168b0dea080b191a4396167e", - "file_size_bytes": 278990 - }, - { - "name": "Gp0115679_Centrifuge classification TSV report", - "description": "Centrifuge classification TSV report for Gp0115679", - "data_object_type": "Centrifuge Taxonomic Classification", - "url": "https://data.microbiomedata.org/data/nmdc:mga0gg1q48/ReadbasedAnalysis/nmdc_mga0gg1q48_centrifuge_classification.tsv", - "md5_checksum": "ab77e396ec643b58b54da92848b88a96", - "id": "nmdc:ab77e396ec643b58b54da92848b88a96", - "file_size_bytes": 4742886512 - }, - { - "name": "Gp0115679_Centrifuge TSV report", - "description": "Centrifuge TSV report for Gp0115679", - "data_object_type": "Centrifuge Classification Report", - "url": "https://data.microbiomedata.org/data/nmdc:mga0gg1q48/ReadbasedAnalysis/nmdc_mga0gg1q48_centrifuge_report.tsv", - "md5_checksum": "f2514844e47a9e3d268671f80f152bc1", - "id": "nmdc:f2514844e47a9e3d268671f80f152bc1", - "file_size_bytes": 266907 - }, - { - "name": "Gp0115679_Centrifuge Krona HTML report", - "description": "Centrifuge Krona HTML report for Gp0115679", - "data_object_type": "Centrifuge Krona Plot", - "url": "https://data.microbiomedata.org/data/nmdc:mga0gg1q48/ReadbasedAnalysis/nmdc_mga0gg1q48_centrifuge_krona.html", - "md5_checksum": "a3e49f39f33c54bc8d9430a947cd4b16", - "id": "nmdc:a3e49f39f33c54bc8d9430a947cd4b16", - "file_size_bytes": 2359747 - }, - { - "name": "Gp0115679_Kraken classification TSV report", - "description": "Kraken classification TSV report for Gp0115679", - "data_object_type": "Kraken2 Taxonomic Classification", - "url": "https://data.microbiomedata.org/data/nmdc:mga0gg1q48/ReadbasedAnalysis/nmdc_mga0gg1q48_kraken2_classification.tsv", - "md5_checksum": "17bc87145b0dcabbb8e3de0f393f4d4d", - "id": "nmdc:17bc87145b0dcabbb8e3de0f393f4d4d", - "file_size_bytes": 3859620862 - }, - { - "name": "Gp0115679_Kraken2 TSV report", - "description": "Kraken2 TSV report for Gp0115679", - "data_object_type": "Kraken2 Classification Report", - "url": "https://data.microbiomedata.org/data/nmdc:mga0gg1q48/ReadbasedAnalysis/nmdc_mga0gg1q48_kraken2_report.tsv", - "md5_checksum": "aecb320fdfe4c4da35c0206dd34e0f40", - "id": "nmdc:aecb320fdfe4c4da35c0206dd34e0f40", - "file_size_bytes": 729541 - }, - { - "name": "Gp0115679_Kraken2 Krona HTML report", - "description": "Kraken2 Krona HTML report for Gp0115679", - "data_object_type": "Kraken2 Krona Plot", - "url": "https://data.microbiomedata.org/data/nmdc:mga0gg1q48/ReadbasedAnalysis/nmdc_mga0gg1q48_kraken2_krona.html", - "md5_checksum": "77860ee043ae9738e7702a3f665b15fa", - "id": "nmdc:77860ee043ae9738e7702a3f665b15fa", - "file_size_bytes": 4358324 - }, { "name": "Gp0115679_Assembled contigs fasta", "description": "Assembled contigs fasta for Gp0115679", @@ -2930,39 +2546,7 @@ "collecting_biosamples_from_site_set": [], "date_created": null, "etl_software_version": null, - "pooling_set": [], - "read_based_analysis_activity_set": [ - { - "_id": { - "$oid": "61e719fa833bcf838a701935" - }, - "has_input": [ - "nmdc:7e294ff66cb7ddf84edf9c8bed576bcd" - ], - "part_of": [ - "nmdc:mga0gg1q48" - ], - "git_url": "https://github.com/microbiomedata/metaG/releases/tag/0.1", - "has_output": [ - "nmdc:e20f8c00473472fa073adde871860801", - "nmdc:52f8c91d04e8d179af98e7fac35a8ff1", - "nmdc:f721d9dd168b0dea080b191a4396167e", - "nmdc:ab77e396ec643b58b54da92848b88a96", - "nmdc:f2514844e47a9e3d268671f80f152bc1", - "nmdc:a3e49f39f33c54bc8d9430a947cd4b16", - "nmdc:17bc87145b0dcabbb8e3de0f393f4d4d", - "nmdc:aecb320fdfe4c4da35c0206dd34e0f40", - "nmdc:77860ee043ae9738e7702a3f665b15fa" - ], - "was_informed_by": "gold:Gp0115679", - "id": "nmdc:8bc23e2bd69c8af5538db3d6192a3b5b", - "execution_resource": "NERSC-Cori", - "name": "ReadBased Analysis Activity for nmdc:mga0gg1q48", - "started_at_time": "2021-10-11T02:23:30Z", - "type": "nmdc:ReadbasedAnalysis", - "ended_at_time": "2021-10-11T06:30:42+00:00" - } - ] + "pooling_set": [] }, { "functional_annotation_agg": [], @@ -2972,6 +2556,13 @@ "activity_set": [], "biosample_set": [], "data_object_set": [ + { + "description": "Raw sequencer read data", + "file_size_bytes": 1840708400, + "type": "nmdc:DataObject", + "id": "jgi:55d818010d8785342fcf8278", + "name": "9387.2.132031.GTCCGC.fastq.gz" + }, { "name": "Gp0115667_Filtered Reads", "description": "Filtered Reads for Gp0115667", @@ -3070,109 +2661,30 @@ "file_size_bytes": 3979807 }, { - "name": "Gp0115667_Gottcha2 TSV report", - "description": "Gottcha2 TSV report for Gp0115667", - "url": "https://data.microbiomedata.org/data/nmdc:mga0n0je44/ReadbasedAnalysis/nmdc_mga0n0je44_gottcha2_report.tsv", - "md5_checksum": "56edf81e5f5102edf7e416bc9430fbb6", - "id": "nmdc:56edf81e5f5102edf7e416bc9430fbb6", - "file_size_bytes": 10576 + "name": "Gp0115667_Assembled contigs fasta", + "description": "Assembled contigs fasta for Gp0115667", + "data_object_type": "Assembly Contigs", + "url": "https://data.microbiomedata.org/data/nmdc:mga0n0je44/assembly/nmdc_mga0n0je44_contigs.fna", + "md5_checksum": "b3cefc5a9599a4fb9432132baf7f5565", + "id": "nmdc:b3cefc5a9599a4fb9432132baf7f5565", + "file_size_bytes": 62926054 }, { - "name": "Gp0115667_Gottcha2 full TSV report", - "description": "Gottcha2 full TSV report for Gp0115667", - "url": "https://data.microbiomedata.org/data/nmdc:mga0n0je44/ReadbasedAnalysis/nmdc_mga0n0je44_gottcha2_report_full.tsv", - "md5_checksum": "c3d0f03afb44520ef5f2ea14e6daf705", - "id": "nmdc:c3d0f03afb44520ef5f2ea14e6daf705", - "file_size_bytes": 792905 + "name": "Gp0115667_Assembled scaffold fasta", + "description": "Assembled scaffold fasta for Gp0115667", + "data_object_type": "Assembly Scaffolds", + "url": "https://data.microbiomedata.org/data/nmdc:mga0n0je44/assembly/nmdc_mga0n0je44_scaffolds.fna", + "md5_checksum": "b60f674a01e3f7fff5ead95f330cef4f", + "id": "nmdc:b60f674a01e3f7fff5ead95f330cef4f", + "file_size_bytes": 62577490 }, { - "name": "Gp0115667_Gottcha2 Krona HTML report", - "description": "Gottcha2 Krona HTML report for Gp0115667", - "data_object_type": "GOTTCHA2 Krona Plot", - "url": "https://data.microbiomedata.org/data/nmdc:mga0n0je44/ReadbasedAnalysis/nmdc_mga0n0je44_gottcha2_krona.html", - "md5_checksum": "2afff209a40ca4895307f3a47080c534", - "id": "nmdc:2afff209a40ca4895307f3a47080c534", - "file_size_bytes": 254763 - }, - { - "name": "Gp0115667_Centrifuge classification TSV report", - "description": "Centrifuge classification TSV report for Gp0115667", - "data_object_type": "Centrifuge Taxonomic Classification", - "url": "https://data.microbiomedata.org/data/nmdc:mga0n0je44/ReadbasedAnalysis/nmdc_mga0n0je44_centrifuge_classification.tsv", - "md5_checksum": "d76c80bf15c4fd84f28c7150f24a8143", - "id": "nmdc:d76c80bf15c4fd84f28c7150f24a8143", - "file_size_bytes": 1336111813 - }, - { - "name": "Gp0115667_Centrifuge TSV report", - "description": "Centrifuge TSV report for Gp0115667", - "data_object_type": "Centrifuge Classification Report", - "url": "https://data.microbiomedata.org/data/nmdc:mga0n0je44/ReadbasedAnalysis/nmdc_mga0n0je44_centrifuge_report.tsv", - "md5_checksum": "b9d6d8a8297f9a604ac85a334a3412de", - "id": "nmdc:b9d6d8a8297f9a604ac85a334a3412de", - "file_size_bytes": 254506 - }, - { - "name": "Gp0115667_Centrifuge Krona HTML report", - "description": "Centrifuge Krona HTML report for Gp0115667", - "data_object_type": "Centrifuge Krona Plot", - "url": "https://data.microbiomedata.org/data/nmdc:mga0n0je44/ReadbasedAnalysis/nmdc_mga0n0je44_centrifuge_krona.html", - "md5_checksum": "fe4bd9f63c32f50676792e3c4adced08", - "id": "nmdc:fe4bd9f63c32f50676792e3c4adced08", - "file_size_bytes": 2323153 - }, - { - "name": "Gp0115667_Kraken classification TSV report", - "description": "Kraken classification TSV report for Gp0115667", - "data_object_type": "Kraken2 Taxonomic Classification", - "url": "https://data.microbiomedata.org/data/nmdc:mga0n0je44/ReadbasedAnalysis/nmdc_mga0n0je44_kraken2_classification.tsv", - "md5_checksum": "eb189cbf0543203d2521397b73d4d34b", - "id": "nmdc:eb189cbf0543203d2521397b73d4d34b", - "file_size_bytes": 1097852664 - }, - { - "name": "Gp0115667_Kraken2 TSV report", - "description": "Kraken2 TSV report for Gp0115667", - "data_object_type": "Kraken2 Classification Report", - "url": "https://data.microbiomedata.org/data/nmdc:mga0n0je44/ReadbasedAnalysis/nmdc_mga0n0je44_kraken2_report.tsv", - "md5_checksum": "ce3f002a824efde4a7134e6cd2e6306b", - "id": "nmdc:ce3f002a824efde4a7134e6cd2e6306b", - "file_size_bytes": 639213 - }, - { - "name": "Gp0115667_Kraken2 Krona HTML report", - "description": "Kraken2 Krona HTML report for Gp0115667", - "data_object_type": "Kraken2 Krona Plot", - "url": "https://data.microbiomedata.org/data/nmdc:mga0n0je44/ReadbasedAnalysis/nmdc_mga0n0je44_kraken2_krona.html", - "md5_checksum": "ac90bf3384ce44d097f7897ac5ff8134", - "id": "nmdc:ac90bf3384ce44d097f7897ac5ff8134", - "file_size_bytes": 3979807 - }, - { - "name": "Gp0115667_Assembled contigs fasta", - "description": "Assembled contigs fasta for Gp0115667", - "data_object_type": "Assembly Contigs", - "url": "https://data.microbiomedata.org/data/nmdc:mga0n0je44/assembly/nmdc_mga0n0je44_contigs.fna", - "md5_checksum": "b3cefc5a9599a4fb9432132baf7f5565", - "id": "nmdc:b3cefc5a9599a4fb9432132baf7f5565", - "file_size_bytes": 62926054 - }, - { - "name": "Gp0115667_Assembled scaffold fasta", - "description": "Assembled scaffold fasta for Gp0115667", - "data_object_type": "Assembly Scaffolds", - "url": "https://data.microbiomedata.org/data/nmdc:mga0n0je44/assembly/nmdc_mga0n0je44_scaffolds.fna", - "md5_checksum": "b60f674a01e3f7fff5ead95f330cef4f", - "id": "nmdc:b60f674a01e3f7fff5ead95f330cef4f", - "file_size_bytes": 62577490 - }, - { - "name": "Gp0115667_Metagenome Contig Coverage Stats", - "description": "Metagenome Contig Coverage Stats for Gp0115667", - "url": "https://data.microbiomedata.org/data/nmdc:mga0n0je44/assembly/nmdc_mga0n0je44_covstats.txt", - "md5_checksum": "2e4532cb03bb1e9201976b9d65893788", - "id": "nmdc:2e4532cb03bb1e9201976b9d65893788", - "file_size_bytes": 9189143 + "name": "Gp0115667_Metagenome Contig Coverage Stats", + "description": "Metagenome Contig Coverage Stats for Gp0115667", + "url": "https://data.microbiomedata.org/data/nmdc:mga0n0je44/assembly/nmdc_mga0n0je44_covstats.txt", + "md5_checksum": "2e4532cb03bb1e9201976b9d65893788", + "id": "nmdc:2e4532cb03bb1e9201976b9d65893788", + "file_size_bytes": 9189143 }, { "name": "Gp0115667_Assembled AGP file", @@ -3653,39 +3165,7 @@ "collecting_biosamples_from_site_set": [], "date_created": null, "etl_software_version": null, - "pooling_set": [], - "read_based_analysis_activity_set": [ - { - "_id": { - "$oid": "61e71a10833bcf838a701aaa" - }, - "has_input": [ - "nmdc:7d4057e3a44a05171c13fb0ed3e2294a" - ], - "part_of": [ - "nmdc:mga0n0je44" - ], - "git_url": "https://github.com/microbiomedata/metaG/releases/tag/0.1", - "has_output": [ - "nmdc:56edf81e5f5102edf7e416bc9430fbb6", - "nmdc:c3d0f03afb44520ef5f2ea14e6daf705", - "nmdc:2afff209a40ca4895307f3a47080c534", - "nmdc:d76c80bf15c4fd84f28c7150f24a8143", - "nmdc:b9d6d8a8297f9a604ac85a334a3412de", - "nmdc:fe4bd9f63c32f50676792e3c4adced08", - "nmdc:eb189cbf0543203d2521397b73d4d34b", - "nmdc:ce3f002a824efde4a7134e6cd2e6306b", - "nmdc:ac90bf3384ce44d097f7897ac5ff8134" - ], - "was_informed_by": "gold:Gp0115667", - "id": "nmdc:8093869c91384d3299431e56019f7de0", - "execution_resource": "NERSC-Cori", - "name": "ReadBased Analysis Activity for nmdc:mga0n0je44", - "started_at_time": "2021-10-11T02:28:16Z", - "type": "nmdc:ReadbasedAnalysis", - "ended_at_time": "2021-10-11T03:58:24+00:00" - } - ] + "pooling_set": [] }, { "functional_annotation_agg": [], @@ -3695,6 +3175,13 @@ "activity_set": [], "biosample_set": [], "data_object_set": [ + { + "description": "Raw sequencer read data", + "file_size_bytes": 1792111281, + "type": "nmdc:DataObject", + "id": "jgi:55d817f20d8785342fcf826c", + "name": "9387.2.132031.TAGCTT.fastq.gz" + }, { "name": "Gp0115664_Filtered Reads", "description": "Filtered Reads for Gp0115664", @@ -3792,85 +3279,6 @@ "id": "nmdc:29b75e78b0b7fd8115614d8e9d341d46", "file_size_bytes": 3995680 }, - { - "name": "Gp0115664_Gottcha2 TSV report", - "description": "Gottcha2 TSV report for Gp0115664", - "url": "https://data.microbiomedata.org/data/nmdc:mga0dm3v04/ReadbasedAnalysis/nmdc_mga0dm3v04_gottcha2_report.tsv", - "md5_checksum": "9d61d9f0c31a98f88ad8cde86254148d", - "id": "nmdc:9d61d9f0c31a98f88ad8cde86254148d", - "file_size_bytes": 9591 - }, - { - "name": "Gp0115664_Gottcha2 full TSV report", - "description": "Gottcha2 full TSV report for Gp0115664", - "url": "https://data.microbiomedata.org/data/nmdc:mga0dm3v04/ReadbasedAnalysis/nmdc_mga0dm3v04_gottcha2_report_full.tsv", - "md5_checksum": "7f93f97242aed036019f13492f5af35c", - "id": "nmdc:7f93f97242aed036019f13492f5af35c", - "file_size_bytes": 885985 - }, - { - "name": "Gp0115664_Gottcha2 Krona HTML report", - "description": "Gottcha2 Krona HTML report for Gp0115664", - "data_object_type": "GOTTCHA2 Krona Plot", - "url": "https://data.microbiomedata.org/data/nmdc:mga0dm3v04/ReadbasedAnalysis/nmdc_mga0dm3v04_gottcha2_krona.html", - "md5_checksum": "b4d0179bcc68b5186a3544d9ee0c6941", - "id": "nmdc:b4d0179bcc68b5186a3544d9ee0c6941", - "file_size_bytes": 251303 - }, - { - "name": "Gp0115664_Centrifuge classification TSV report", - "description": "Centrifuge classification TSV report for Gp0115664", - "data_object_type": "Centrifuge Taxonomic Classification", - "url": "https://data.microbiomedata.org/data/nmdc:mga0dm3v04/ReadbasedAnalysis/nmdc_mga0dm3v04_centrifuge_classification.tsv", - "md5_checksum": "a4243f71a0288f489c566ae85d85891d", - "id": "nmdc:a4243f71a0288f489c566ae85d85891d", - "file_size_bytes": 1268144933 - }, - { - "name": "Gp0115664_Centrifuge TSV report", - "description": "Centrifuge TSV report for Gp0115664", - "data_object_type": "Centrifuge Classification Report", - "url": "https://data.microbiomedata.org/data/nmdc:mga0dm3v04/ReadbasedAnalysis/nmdc_mga0dm3v04_centrifuge_report.tsv", - "md5_checksum": "f8b6ef830b94c6470056a3cd0a0eafc1", - "id": "nmdc:f8b6ef830b94c6470056a3cd0a0eafc1", - "file_size_bytes": 254575 - }, - { - "name": "Gp0115664_Centrifuge Krona HTML report", - "description": "Centrifuge Krona HTML report for Gp0115664", - "data_object_type": "Centrifuge Krona Plot", - "url": "https://data.microbiomedata.org/data/nmdc:mga0dm3v04/ReadbasedAnalysis/nmdc_mga0dm3v04_centrifuge_krona.html", - "md5_checksum": "a80779b32415ef001d0403f0b618b612", - "id": "nmdc:a80779b32415ef001d0403f0b618b612", - "file_size_bytes": 2327293 - }, - { - "name": "Gp0115664_Kraken classification TSV report", - "description": "Kraken classification TSV report for Gp0115664", - "data_object_type": "Kraken2 Taxonomic Classification", - "url": "https://data.microbiomedata.org/data/nmdc:mga0dm3v04/ReadbasedAnalysis/nmdc_mga0dm3v04_kraken2_classification.tsv", - "md5_checksum": "01581429336a43d7dc2f85b8d49d6c6e", - "id": "nmdc:01581429336a43d7dc2f85b8d49d6c6e", - "file_size_bytes": 1037932028 - }, - { - "name": "Gp0115664_Kraken2 TSV report", - "description": "Kraken2 TSV report for Gp0115664", - "data_object_type": "Kraken2 Classification Report", - "url": "https://data.microbiomedata.org/data/nmdc:mga0dm3v04/ReadbasedAnalysis/nmdc_mga0dm3v04_kraken2_report.tsv", - "md5_checksum": "ce47d6686edb7b3472102d5883229c45", - "id": "nmdc:ce47d6686edb7b3472102d5883229c45", - "file_size_bytes": 641242 - }, - { - "name": "Gp0115664_Kraken2 Krona HTML report", - "description": "Kraken2 Krona HTML report for Gp0115664", - "data_object_type": "Kraken2 Krona Plot", - "url": "https://data.microbiomedata.org/data/nmdc:mga0dm3v04/ReadbasedAnalysis/nmdc_mga0dm3v04_kraken2_krona.html", - "md5_checksum": "29b75e78b0b7fd8115614d8e9d341d46", - "id": "nmdc:29b75e78b0b7fd8115614d8e9d341d46", - "file_size_bytes": 3995680 - }, { "name": "Gp0115664_Assembled contigs fasta", "description": "Assembled contigs fasta for Gp0115664", @@ -4316,39 +3724,7 @@ "collecting_biosamples_from_site_set": [], "date_created": null, "etl_software_version": null, - "pooling_set": [], - "read_based_analysis_activity_set": [ - { - "_id": { - "$oid": "61e719dd833bcf838a70154e" - }, - "has_input": [ - "nmdc:232e31505b6a0251df2303c0563d64c1" - ], - "part_of": [ - "nmdc:mga0dm3v04" - ], - "git_url": "https://github.com/microbiomedata/metaG/releases/tag/0.1", - "has_output": [ - "nmdc:9d61d9f0c31a98f88ad8cde86254148d", - "nmdc:7f93f97242aed036019f13492f5af35c", - "nmdc:b4d0179bcc68b5186a3544d9ee0c6941", - "nmdc:a4243f71a0288f489c566ae85d85891d", - "nmdc:f8b6ef830b94c6470056a3cd0a0eafc1", - "nmdc:a80779b32415ef001d0403f0b618b612", - "nmdc:01581429336a43d7dc2f85b8d49d6c6e", - "nmdc:ce47d6686edb7b3472102d5883229c45", - "nmdc:29b75e78b0b7fd8115614d8e9d341d46" - ], - "was_informed_by": "gold:Gp0115664", - "id": "nmdc:10c6e87f449fdc27c6b8bfbc9e25d6a3", - "execution_resource": "NERSC-Cori", - "name": "ReadBased Analysis Activity for nmdc:mga0dm3v04", - "started_at_time": "2021-10-11T02:28:16Z", - "type": "nmdc:ReadbasedAnalysis", - "ended_at_time": "2021-10-11T03:33:34+00:00" - } - ] + "pooling_set": [] }, { "functional_annotation_agg": [], @@ -4358,6 +3734,13 @@ "activity_set": [], "biosample_set": [], "data_object_set": [ + { + "description": "Raw sequencer read data", + "file_size_bytes": 4674996922, + "type": "nmdc:DataObject", + "id": "jgi:55f23d790d8785306f96497e", + "name": "9491.1.134352.AGTCAA.fastq.gz" + }, { "name": "Gp0115678_Filtered Reads", "description": "Filtered Reads for Gp0115678", @@ -4456,101 +3839,22 @@ "file_size_bytes": 4410156 }, { - "name": "Gp0115678_Gottcha2 TSV report", - "description": "Gottcha2 TSV report for Gp0115678", - "url": "https://data.microbiomedata.org/data/nmdc:mga026tn70/ReadbasedAnalysis/nmdc_mga026tn70_gottcha2_report.tsv", - "md5_checksum": "05bab80e2ff02d160b8e808f056ee2b5", - "id": "nmdc:05bab80e2ff02d160b8e808f056ee2b5", - "file_size_bytes": 19085 - }, - { - "name": "Gp0115678_Gottcha2 full TSV report", - "description": "Gottcha2 full TSV report for Gp0115678", - "url": "https://data.microbiomedata.org/data/nmdc:mga026tn70/ReadbasedAnalysis/nmdc_mga026tn70_gottcha2_report_full.tsv", - "md5_checksum": "12b2d6afc355bce76249d750a9fab534", - "id": "nmdc:12b2d6afc355bce76249d750a9fab534", - "file_size_bytes": 1243929 + "name": "Gp0115678_Assembled contigs fasta", + "description": "Assembled contigs fasta for Gp0115678", + "data_object_type": "Assembly Contigs", + "url": "https://data.microbiomedata.org/data/nmdc:mga026tn70/assembly/nmdc_mga026tn70_contigs.fna", + "md5_checksum": "d305e212cce8f84f14561d3957c968b1", + "id": "nmdc:d305e212cce8f84f14561d3957c968b1", + "file_size_bytes": 205441595 }, { - "name": "Gp0115678_Gottcha2 Krona HTML report", - "description": "Gottcha2 Krona HTML report for Gp0115678", - "data_object_type": "GOTTCHA2 Krona Plot", - "url": "https://data.microbiomedata.org/data/nmdc:mga026tn70/ReadbasedAnalysis/nmdc_mga026tn70_gottcha2_krona.html", - "md5_checksum": "18214017d56658a48723c9c998dcba7e", - "id": "nmdc:18214017d56658a48723c9c998dcba7e", - "file_size_bytes": 281148 - }, - { - "name": "Gp0115678_Centrifuge classification TSV report", - "description": "Centrifuge classification TSV report for Gp0115678", - "data_object_type": "Centrifuge Taxonomic Classification", - "url": "https://data.microbiomedata.org/data/nmdc:mga026tn70/ReadbasedAnalysis/nmdc_mga026tn70_centrifuge_classification.tsv", - "md5_checksum": "99ef009c73c128e561a4b9dcb70d7ff2", - "id": "nmdc:99ef009c73c128e561a4b9dcb70d7ff2", - "file_size_bytes": 3491726958 - }, - { - "name": "Gp0115678_Centrifuge TSV report", - "description": "Centrifuge TSV report for Gp0115678", - "data_object_type": "Centrifuge Classification Report", - "url": "https://data.microbiomedata.org/data/nmdc:mga026tn70/ReadbasedAnalysis/nmdc_mga026tn70_centrifuge_report.tsv", - "md5_checksum": "78dab6988b57c654462ef3dbeb64d8d6", - "id": "nmdc:78dab6988b57c654462ef3dbeb64d8d6", - "file_size_bytes": 264123 - }, - { - "name": "Gp0115678_Centrifuge Krona HTML report", - "description": "Centrifuge Krona HTML report for Gp0115678", - "data_object_type": "Centrifuge Krona Plot", - "url": "https://data.microbiomedata.org/data/nmdc:mga026tn70/ReadbasedAnalysis/nmdc_mga026tn70_centrifuge_krona.html", - "md5_checksum": "f9c01985f057825149d35de0650095a8", - "id": "nmdc:f9c01985f057825149d35de0650095a8", - "file_size_bytes": 2352347 - }, - { - "name": "Gp0115678_Kraken classification TSV report", - "description": "Kraken classification TSV report for Gp0115678", - "data_object_type": "Kraken2 Taxonomic Classification", - "url": "https://data.microbiomedata.org/data/nmdc:mga026tn70/ReadbasedAnalysis/nmdc_mga026tn70_kraken2_classification.tsv", - "md5_checksum": "bcea8bbe63625ad0f3142abe69a4a11d", - "id": "nmdc:bcea8bbe63625ad0f3142abe69a4a11d", - "file_size_bytes": 2880889483 - }, - { - "name": "Gp0115678_Kraken2 TSV report", - "description": "Kraken2 TSV report for Gp0115678", - "data_object_type": "Kraken2 Classification Report", - "url": "https://data.microbiomedata.org/data/nmdc:mga026tn70/ReadbasedAnalysis/nmdc_mga026tn70_kraken2_report.tsv", - "md5_checksum": "054c3097c9682bc9a6e07f88fdecc0ee", - "id": "nmdc:054c3097c9682bc9a6e07f88fdecc0ee", - "file_size_bytes": 735519 - }, - { - "name": "Gp0115678_Kraken2 Krona HTML report", - "description": "Kraken2 Krona HTML report for Gp0115678", - "data_object_type": "Kraken2 Krona Plot", - "url": "https://data.microbiomedata.org/data/nmdc:mga026tn70/ReadbasedAnalysis/nmdc_mga026tn70_kraken2_krona.html", - "md5_checksum": "38d41d4299141abe28bf0405af80cdfc", - "id": "nmdc:38d41d4299141abe28bf0405af80cdfc", - "file_size_bytes": 4410156 - }, - { - "name": "Gp0115678_Assembled contigs fasta", - "description": "Assembled contigs fasta for Gp0115678", - "data_object_type": "Assembly Contigs", - "url": "https://data.microbiomedata.org/data/nmdc:mga026tn70/assembly/nmdc_mga026tn70_contigs.fna", - "md5_checksum": "d305e212cce8f84f14561d3957c968b1", - "id": "nmdc:d305e212cce8f84f14561d3957c968b1", - "file_size_bytes": 205441595 - }, - { - "name": "Gp0115678_Assembled scaffold fasta", - "description": "Assembled scaffold fasta for Gp0115678", - "data_object_type": "Assembly Scaffolds", - "url": "https://data.microbiomedata.org/data/nmdc:mga026tn70/assembly/nmdc_mga026tn70_scaffolds.fna", - "md5_checksum": "fb12da7c2d6d1f9d9c7a1511702758bb", - "id": "nmdc:fb12da7c2d6d1f9d9c7a1511702758bb", - "file_size_bytes": 204286677 + "name": "Gp0115678_Assembled scaffold fasta", + "description": "Assembled scaffold fasta for Gp0115678", + "data_object_type": "Assembly Scaffolds", + "url": "https://data.microbiomedata.org/data/nmdc:mga026tn70/assembly/nmdc_mga026tn70_scaffolds.fna", + "md5_checksum": "fb12da7c2d6d1f9d9c7a1511702758bb", + "id": "nmdc:fb12da7c2d6d1f9d9c7a1511702758bb", + "file_size_bytes": 204286677 }, { "name": "Gp0115678_Metagenome Contig Coverage Stats", @@ -5077,39 +4381,7 @@ "collecting_biosamples_from_site_set": [], "date_created": null, "etl_software_version": null, - "pooling_set": [], - "read_based_analysis_activity_set": [ - { - "_id": { - "$oid": "61e719f6833bcf838a701854" - }, - "has_input": [ - "nmdc:e0ce93b88419f87568ff206e0efe3a24" - ], - "part_of": [ - "nmdc:mga026tn70" - ], - "git_url": "https://github.com/microbiomedata/metaG/releases/tag/0.1", - "has_output": [ - "nmdc:05bab80e2ff02d160b8e808f056ee2b5", - "nmdc:12b2d6afc355bce76249d750a9fab534", - "nmdc:18214017d56658a48723c9c998dcba7e", - "nmdc:99ef009c73c128e561a4b9dcb70d7ff2", - "nmdc:78dab6988b57c654462ef3dbeb64d8d6", - "nmdc:f9c01985f057825149d35de0650095a8", - "nmdc:bcea8bbe63625ad0f3142abe69a4a11d", - "nmdc:054c3097c9682bc9a6e07f88fdecc0ee", - "nmdc:38d41d4299141abe28bf0405af80cdfc" - ], - "was_informed_by": "gold:Gp0115678", - "id": "nmdc:88c0fc9b5b0bb0cd814448bbfba0da6a", - "execution_resource": "NERSC-Cori", - "name": "ReadBased Analysis Activity for nmdc:mga026tn70", - "started_at_time": "2021-10-11T02:23:30Z", - "type": "nmdc:ReadbasedAnalysis", - "ended_at_time": "2021-10-11T06:18:17+00:00" - } - ] + "pooling_set": [] }, { "functional_annotation_agg": [], @@ -5119,6 +4391,13 @@ "activity_set": [], "biosample_set": [], "data_object_set": [ + { + "description": "Raw sequencer read data", + "file_size_bytes": 2106076506, + "type": "nmdc:DataObject", + "id": "jgi:574fde547ded5e3df1ee13fa", + "name": "10533.1.165310.GAGCTCA-TTGAGCT.fastq.gz" + }, { "name": "Gp0127623_Filtered Reads", "description": "Filtered Reads for Gp0127623", @@ -5216,85 +4495,6 @@ "id": "nmdc:f2eed9669268f69dbc31f0c4f839fccf", "file_size_bytes": 3949449 }, - { - "name": "Gp0127623_Gottcha2 TSV report", - "description": "Gottcha2 TSV report for Gp0127623", - "url": "https://data.microbiomedata.org/data/nmdc:mga03eyz63/ReadbasedAnalysis/nmdc_mga03eyz63_gottcha2_report.tsv", - "md5_checksum": "ac39e916e17e08a845bb40d97519d8be", - "id": "nmdc:ac39e916e17e08a845bb40d97519d8be", - "file_size_bytes": 1553 - }, - { - "name": "Gp0127623_Gottcha2 full TSV report", - "description": "Gottcha2 full TSV report for Gp0127623", - "url": "https://data.microbiomedata.org/data/nmdc:mga03eyz63/ReadbasedAnalysis/nmdc_mga03eyz63_gottcha2_report_full.tsv", - "md5_checksum": "c6fd5c573ef8605d9b43ff9c698af423", - "id": "nmdc:c6fd5c573ef8605d9b43ff9c698af423", - "file_size_bytes": 836575 - }, - { - "name": "Gp0127623_Gottcha2 Krona HTML report", - "description": "Gottcha2 Krona HTML report for Gp0127623", - "data_object_type": "GOTTCHA2 Krona Plot", - "url": "https://data.microbiomedata.org/data/nmdc:mga03eyz63/ReadbasedAnalysis/nmdc_mga03eyz63_gottcha2_krona.html", - "md5_checksum": "eda0c04d692ecf137585676c15924626", - "id": "nmdc:eda0c04d692ecf137585676c15924626", - "file_size_bytes": 231097 - }, - { - "name": "Gp0127623_Centrifuge classification TSV report", - "description": "Centrifuge classification TSV report for Gp0127623", - "data_object_type": "Centrifuge Taxonomic Classification", - "url": "https://data.microbiomedata.org/data/nmdc:mga03eyz63/ReadbasedAnalysis/nmdc_mga03eyz63_centrifuge_classification.tsv", - "md5_checksum": "d9ea063be9ab8ea102c1e2ec2fa9f177", - "id": "nmdc:d9ea063be9ab8ea102c1e2ec2fa9f177", - "file_size_bytes": 1669254765 - }, - { - "name": "Gp0127623_Centrifuge TSV report", - "description": "Centrifuge TSV report for Gp0127623", - "data_object_type": "Centrifuge Classification Report", - "url": "https://data.microbiomedata.org/data/nmdc:mga03eyz63/ReadbasedAnalysis/nmdc_mga03eyz63_centrifuge_report.tsv", - "md5_checksum": "e1f164c534830cd628d67c564ace863b", - "id": "nmdc:e1f164c534830cd628d67c564ace863b", - "file_size_bytes": 255784 - }, - { - "name": "Gp0127623_Centrifuge Krona HTML report", - "description": "Centrifuge Krona HTML report for Gp0127623", - "data_object_type": "Centrifuge Krona Plot", - "url": "https://data.microbiomedata.org/data/nmdc:mga03eyz63/ReadbasedAnalysis/nmdc_mga03eyz63_centrifuge_krona.html", - "md5_checksum": "a1062576d998b7b82e39b8d8520fa37e", - "id": "nmdc:a1062576d998b7b82e39b8d8520fa37e", - "file_size_bytes": 2333760 - }, - { - "name": "Gp0127623_Kraken classification TSV report", - "description": "Kraken classification TSV report for Gp0127623", - "data_object_type": "Kraken2 Taxonomic Classification", - "url": "https://data.microbiomedata.org/data/nmdc:mga03eyz63/ReadbasedAnalysis/nmdc_mga03eyz63_kraken2_classification.tsv", - "md5_checksum": "040e6ca695283a12711c16344acd1e76", - "id": "nmdc:040e6ca695283a12711c16344acd1e76", - "file_size_bytes": 1335651191 - }, - { - "name": "Gp0127623_Kraken2 TSV report", - "description": "Kraken2 TSV report for Gp0127623", - "data_object_type": "Kraken2 Classification Report", - "url": "https://data.microbiomedata.org/data/nmdc:mga03eyz63/ReadbasedAnalysis/nmdc_mga03eyz63_kraken2_report.tsv", - "md5_checksum": "ed4ced0ccbe3f6b34c35bd842e882cad", - "id": "nmdc:ed4ced0ccbe3f6b34c35bd842e882cad", - "file_size_bytes": 647609 - }, - { - "name": "Gp0127623_Kraken2 Krona HTML report", - "description": "Kraken2 Krona HTML report for Gp0127623", - "data_object_type": "Kraken2 Krona Plot", - "url": "https://data.microbiomedata.org/data/nmdc:mga03eyz63/ReadbasedAnalysis/nmdc_mga03eyz63_kraken2_krona.html", - "md5_checksum": "f2eed9669268f69dbc31f0c4f839fccf", - "id": "nmdc:f2eed9669268f69dbc31f0c4f839fccf", - "file_size_bytes": 3949449 - }, { "name": "Gp0127623_Assembled contigs fasta", "description": "Assembled contigs fasta for Gp0127623", @@ -5740,39 +4940,7 @@ "collecting_biosamples_from_site_set": [], "date_created": null, "etl_software_version": null, - "pooling_set": [], - "read_based_analysis_activity_set": [ - { - "_id": { - "$oid": "61e718fc833bcf838a6ff4c9" - }, - "has_input": [ - "nmdc:6a8409b21c45ba9feba873ec269c8ff7" - ], - "part_of": [ - "nmdc:mga03eyz63" - ], - "git_url": "https://github.com/microbiomedata/metaG/releases/tag/0.1", - "has_output": [ - "nmdc:ac39e916e17e08a845bb40d97519d8be", - "nmdc:c6fd5c573ef8605d9b43ff9c698af423", - "nmdc:eda0c04d692ecf137585676c15924626", - "nmdc:d9ea063be9ab8ea102c1e2ec2fa9f177", - "nmdc:e1f164c534830cd628d67c564ace863b", - "nmdc:a1062576d998b7b82e39b8d8520fa37e", - "nmdc:040e6ca695283a12711c16344acd1e76", - "nmdc:ed4ced0ccbe3f6b34c35bd842e882cad", - "nmdc:f2eed9669268f69dbc31f0c4f839fccf" - ], - "was_informed_by": "gold:Gp0127623", - "id": "nmdc:e05db57d44a39f083df9a1803551b79b", - "execution_resource": "NERSC-Cori", - "name": "ReadBased Analysis Activity for nmdc:mga03eyz63", - "started_at_time": "2021-10-11T02:23:30Z", - "type": "nmdc:ReadbasedAnalysis", - "ended_at_time": "2021-10-11T02:42:25+00:00" - } - ] + "pooling_set": [] }, { "functional_annotation_agg": [], @@ -5782,6 +4950,13 @@ "activity_set": [], "biosample_set": [], "data_object_set": [ + { + "description": "Raw sequencer read data", + "file_size_bytes": 2351763069, + "type": "nmdc:DataObject", + "id": "jgi:574fde787ded5e3df1ee1416", + "name": "10533.2.165322.CGGTTGT-AACAACC.fastq.gz" + }, { "name": "Gp0127625_Filtered Reads", "description": "Filtered Reads for Gp0127625", @@ -5880,109 +5055,30 @@ "file_size_bytes": 3921941 }, { - "name": "Gp0127625_Gottcha2 TSV report", - "description": "Gottcha2 TSV report for Gp0127625", - "url": "https://data.microbiomedata.org/data/nmdc:mga0bfpq58/ReadbasedAnalysis/nmdc_mga0bfpq58_gottcha2_report.tsv", - "md5_checksum": "550b631e1de3e01392154e54493d47ef", - "id": "nmdc:550b631e1de3e01392154e54493d47ef", - "file_size_bytes": 754 + "name": "Gp0127625_Assembled contigs fasta", + "description": "Assembled contigs fasta for Gp0127625", + "data_object_type": "Assembly Contigs", + "url": "https://data.microbiomedata.org/data/nmdc:mga0bfpq58/assembly/nmdc_mga0bfpq58_contigs.fna", + "md5_checksum": "5b6e7cbece9167002b12c3415afa9bb8", + "id": "nmdc:5b6e7cbece9167002b12c3415afa9bb8", + "file_size_bytes": 171703232 }, { - "name": "Gp0127625_Gottcha2 full TSV report", - "description": "Gottcha2 full TSV report for Gp0127625", - "url": "https://data.microbiomedata.org/data/nmdc:mga0bfpq58/ReadbasedAnalysis/nmdc_mga0bfpq58_gottcha2_report_full.tsv", - "md5_checksum": "3f14ff51550d9d78dae3a7ec08514907", - "id": "nmdc:3f14ff51550d9d78dae3a7ec08514907", - "file_size_bytes": 641658 + "name": "Gp0127625_Assembled scaffold fasta", + "description": "Assembled scaffold fasta for Gp0127625", + "data_object_type": "Assembly Scaffolds", + "url": "https://data.microbiomedata.org/data/nmdc:mga0bfpq58/assembly/nmdc_mga0bfpq58_scaffolds.fna", + "md5_checksum": "f2ef7ceaaedf4d6bf377ce82687b06e3", + "id": "nmdc:f2ef7ceaaedf4d6bf377ce82687b06e3", + "file_size_bytes": 170799869 }, { - "name": "Gp0127625_Gottcha2 Krona HTML report", - "description": "Gottcha2 Krona HTML report for Gp0127625", - "data_object_type": "GOTTCHA2 Krona Plot", - "url": "https://data.microbiomedata.org/data/nmdc:mga0bfpq58/ReadbasedAnalysis/nmdc_mga0bfpq58_gottcha2_krona.html", - "md5_checksum": "1a7b8f8968f451b5d5ccb97a10a56d89", - "id": "nmdc:1a7b8f8968f451b5d5ccb97a10a56d89", - "file_size_bytes": 228494 - }, - { - "name": "Gp0127625_Centrifuge classification TSV report", - "description": "Centrifuge classification TSV report for Gp0127625", - "data_object_type": "Centrifuge Taxonomic Classification", - "url": "https://data.microbiomedata.org/data/nmdc:mga0bfpq58/ReadbasedAnalysis/nmdc_mga0bfpq58_centrifuge_classification.tsv", - "md5_checksum": "b09795fc768257d881e8ce547be0ce68", - "id": "nmdc:b09795fc768257d881e8ce547be0ce68", - "file_size_bytes": 1849982678 - }, - { - "name": "Gp0127625_Centrifuge TSV report", - "description": "Centrifuge TSV report for Gp0127625", - "data_object_type": "Centrifuge Classification Report", - "url": "https://data.microbiomedata.org/data/nmdc:mga0bfpq58/ReadbasedAnalysis/nmdc_mga0bfpq58_centrifuge_report.tsv", - "md5_checksum": "064ba18473eb80ff0b484311565d2894", - "id": "nmdc:064ba18473eb80ff0b484311565d2894", - "file_size_bytes": 253852 - }, - { - "name": "Gp0127625_Centrifuge Krona HTML report", - "description": "Centrifuge Krona HTML report for Gp0127625", - "data_object_type": "Centrifuge Krona Plot", - "url": "https://data.microbiomedata.org/data/nmdc:mga0bfpq58/ReadbasedAnalysis/nmdc_mga0bfpq58_centrifuge_krona.html", - "md5_checksum": "a7b6cc370371668be2e3bb90f5ca0fd1", - "id": "nmdc:a7b6cc370371668be2e3bb90f5ca0fd1", - "file_size_bytes": 2331556 - }, - { - "name": "Gp0127625_Kraken classification TSV report", - "description": "Kraken classification TSV report for Gp0127625", - "data_object_type": "Kraken2 Taxonomic Classification", - "url": "https://data.microbiomedata.org/data/nmdc:mga0bfpq58/ReadbasedAnalysis/nmdc_mga0bfpq58_kraken2_classification.tsv", - "md5_checksum": "60c663a34b79db2ee71edf1afe4c14e3", - "id": "nmdc:60c663a34b79db2ee71edf1afe4c14e3", - "file_size_bytes": 1471976767 - }, - { - "name": "Gp0127625_Kraken2 TSV report", - "description": "Kraken2 TSV report for Gp0127625", - "data_object_type": "Kraken2 Classification Report", - "url": "https://data.microbiomedata.org/data/nmdc:mga0bfpq58/ReadbasedAnalysis/nmdc_mga0bfpq58_kraken2_report.tsv", - "md5_checksum": "bc8acb862c8942616ef07302667c334f", - "id": "nmdc:bc8acb862c8942616ef07302667c334f", - "file_size_bytes": 627498 - }, - { - "name": "Gp0127625_Kraken2 Krona HTML report", - "description": "Kraken2 Krona HTML report for Gp0127625", - "data_object_type": "Kraken2 Krona Plot", - "url": "https://data.microbiomedata.org/data/nmdc:mga0bfpq58/ReadbasedAnalysis/nmdc_mga0bfpq58_kraken2_krona.html", - "md5_checksum": "b797ed6cb135c993b582cac368b2a93c", - "id": "nmdc:b797ed6cb135c993b582cac368b2a93c", - "file_size_bytes": 3921941 - }, - { - "name": "Gp0127625_Assembled contigs fasta", - "description": "Assembled contigs fasta for Gp0127625", - "data_object_type": "Assembly Contigs", - "url": "https://data.microbiomedata.org/data/nmdc:mga0bfpq58/assembly/nmdc_mga0bfpq58_contigs.fna", - "md5_checksum": "5b6e7cbece9167002b12c3415afa9bb8", - "id": "nmdc:5b6e7cbece9167002b12c3415afa9bb8", - "file_size_bytes": 171703232 - }, - { - "name": "Gp0127625_Assembled scaffold fasta", - "description": "Assembled scaffold fasta for Gp0127625", - "data_object_type": "Assembly Scaffolds", - "url": "https://data.microbiomedata.org/data/nmdc:mga0bfpq58/assembly/nmdc_mga0bfpq58_scaffolds.fna", - "md5_checksum": "f2ef7ceaaedf4d6bf377ce82687b06e3", - "id": "nmdc:f2ef7ceaaedf4d6bf377ce82687b06e3", - "file_size_bytes": 170799869 - }, - { - "name": "Gp0127625_Metagenome Contig Coverage Stats", - "description": "Metagenome Contig Coverage Stats for Gp0127625", - "url": "https://data.microbiomedata.org/data/nmdc:mga0bfpq58/assembly/nmdc_mga0bfpq58_covstats.txt", - "md5_checksum": "d231edb2040700184064615a28e65ee5", - "id": "nmdc:d231edb2040700184064615a28e65ee5", - "file_size_bytes": 23875845 + "name": "Gp0127625_Metagenome Contig Coverage Stats", + "description": "Metagenome Contig Coverage Stats for Gp0127625", + "url": "https://data.microbiomedata.org/data/nmdc:mga0bfpq58/assembly/nmdc_mga0bfpq58_covstats.txt", + "md5_checksum": "d231edb2040700184064615a28e65ee5", + "id": "nmdc:d231edb2040700184064615a28e65ee5", + "file_size_bytes": 23875845 }, { "name": "Gp0127625_Assembled AGP file", @@ -6479,39 +5575,7 @@ "collecting_biosamples_from_site_set": [], "date_created": null, "etl_software_version": null, - "pooling_set": [], - "read_based_analysis_activity_set": [ - { - "_id": { - "$oid": "61e718e2833bcf838a6ff0ce" - }, - "has_input": [ - "nmdc:2d13b3a30339b9c5b4fba099f9d4b10f" - ], - "part_of": [ - "nmdc:mga0bfpq58" - ], - "git_url": "https://github.com/microbiomedata/metaG/releases/tag/0.1", - "has_output": [ - "nmdc:550b631e1de3e01392154e54493d47ef", - "nmdc:3f14ff51550d9d78dae3a7ec08514907", - "nmdc:1a7b8f8968f451b5d5ccb97a10a56d89", - "nmdc:b09795fc768257d881e8ce547be0ce68", - "nmdc:064ba18473eb80ff0b484311565d2894", - "nmdc:a7b6cc370371668be2e3bb90f5ca0fd1", - "nmdc:60c663a34b79db2ee71edf1afe4c14e3", - "nmdc:bc8acb862c8942616ef07302667c334f", - "nmdc:b797ed6cb135c993b582cac368b2a93c" - ], - "was_informed_by": "gold:Gp0127625", - "id": "nmdc:aa214e53dc8472f9ff99b02245d8f943", - "execution_resource": "NERSC-Cori", - "name": "ReadBased Analysis Activity for nmdc:mga0bfpq58", - "started_at_time": "2021-10-11T02:23:30Z", - "type": "nmdc:ReadbasedAnalysis", - "ended_at_time": "2021-10-11T03:29:50+00:00" - } - ] + "pooling_set": [] }, { "functional_annotation_agg": [], @@ -6521,6 +5585,13 @@ "activity_set": [], "biosample_set": [], "data_object_set": [ + { + "description": "Raw sequencer read data", + "file_size_bytes": 2167583658, + "type": "nmdc:DataObject", + "id": "jgi:574fde7b7ded5e3df1ee1418", + "name": "10533.2.165322.TACCAAC-GGTTGGT.fastq.gz" + }, { "name": "Gp0127626_Filtered Reads", "description": "Filtered Reads for Gp0127626", @@ -6618,85 +5689,6 @@ "id": "nmdc:bb3e6793c4f036b9756f075d41846964", "file_size_bytes": 3987411 }, - { - "name": "Gp0127626_Gottcha2 TSV report", - "description": "Gottcha2 TSV report for Gp0127626", - "url": "https://data.microbiomedata.org/data/nmdc:mga04xnj45/ReadbasedAnalysis/nmdc_mga04xnj45_gottcha2_report.tsv", - "md5_checksum": "a91f8dccb2baa53550216f5bdfbf1473", - "id": "nmdc:a91f8dccb2baa53550216f5bdfbf1473", - "file_size_bytes": 2399 - }, - { - "name": "Gp0127626_Gottcha2 full TSV report", - "description": "Gottcha2 full TSV report for Gp0127626", - "url": "https://data.microbiomedata.org/data/nmdc:mga04xnj45/ReadbasedAnalysis/nmdc_mga04xnj45_gottcha2_report_full.tsv", - "md5_checksum": "a81ddf4e3bc044e8601554117cd887aa", - "id": "nmdc:a81ddf4e3bc044e8601554117cd887aa", - "file_size_bytes": 743066 - }, - { - "name": "Gp0127626_Gottcha2 Krona HTML report", - "description": "Gottcha2 Krona HTML report for Gp0127626", - "data_object_type": "GOTTCHA2 Krona Plot", - "url": "https://data.microbiomedata.org/data/nmdc:mga04xnj45/ReadbasedAnalysis/nmdc_mga04xnj45_gottcha2_krona.html", - "md5_checksum": "a012dc3a7b44774019c313fd8ee88efc", - "id": "nmdc:a012dc3a7b44774019c313fd8ee88efc", - "file_size_bytes": 233970 - }, - { - "name": "Gp0127626_Centrifuge classification TSV report", - "description": "Centrifuge classification TSV report for Gp0127626", - "data_object_type": "Centrifuge Taxonomic Classification", - "url": "https://data.microbiomedata.org/data/nmdc:mga04xnj45/ReadbasedAnalysis/nmdc_mga04xnj45_centrifuge_classification.tsv", - "md5_checksum": "dd4023a1488bdfc73b12c422b62b274a", - "id": "nmdc:dd4023a1488bdfc73b12c422b62b274a", - "file_size_bytes": 1673697764 - }, - { - "name": "Gp0127626_Centrifuge TSV report", - "description": "Centrifuge TSV report for Gp0127626", - "data_object_type": "Centrifuge Classification Report", - "url": "https://data.microbiomedata.org/data/nmdc:mga04xnj45/ReadbasedAnalysis/nmdc_mga04xnj45_centrifuge_report.tsv", - "md5_checksum": "2f9b1c55d52cc61affbe99f5163b48c8", - "id": "nmdc:2f9b1c55d52cc61affbe99f5163b48c8", - "file_size_bytes": 253730 - }, - { - "name": "Gp0127626_Centrifuge Krona HTML report", - "description": "Centrifuge Krona HTML report for Gp0127626", - "data_object_type": "Centrifuge Krona Plot", - "url": "https://data.microbiomedata.org/data/nmdc:mga04xnj45/ReadbasedAnalysis/nmdc_mga04xnj45_centrifuge_krona.html", - "md5_checksum": "ccf7f447a25ebf354ce44b3f1f90f223", - "id": "nmdc:ccf7f447a25ebf354ce44b3f1f90f223", - "file_size_bytes": 2327521 - }, - { - "name": "Gp0127626_Kraken classification TSV report", - "description": "Kraken classification TSV report for Gp0127626", - "data_object_type": "Kraken2 Taxonomic Classification", - "url": "https://data.microbiomedata.org/data/nmdc:mga04xnj45/ReadbasedAnalysis/nmdc_mga04xnj45_kraken2_classification.tsv", - "md5_checksum": "2c8efdb77cbcd1276c4fb386fd37bd6d", - "id": "nmdc:2c8efdb77cbcd1276c4fb386fd37bd6d", - "file_size_bytes": 1343921825 - }, - { - "name": "Gp0127626_Kraken2 TSV report", - "description": "Kraken2 TSV report for Gp0127626", - "data_object_type": "Kraken2 Classification Report", - "url": "https://data.microbiomedata.org/data/nmdc:mga04xnj45/ReadbasedAnalysis/nmdc_mga04xnj45_kraken2_report.tsv", - "md5_checksum": "806b27f1fa5a423100b113bb56edc708", - "id": "nmdc:806b27f1fa5a423100b113bb56edc708", - "file_size_bytes": 638478 - }, - { - "name": "Gp0127626_Kraken2 Krona HTML report", - "description": "Kraken2 Krona HTML report for Gp0127626", - "data_object_type": "Kraken2 Krona Plot", - "url": "https://data.microbiomedata.org/data/nmdc:mga04xnj45/ReadbasedAnalysis/nmdc_mga04xnj45_kraken2_krona.html", - "md5_checksum": "bb3e6793c4f036b9756f075d41846964", - "id": "nmdc:bb3e6793c4f036b9756f075d41846964", - "file_size_bytes": 3987411 - }, { "name": "Gp0127626_Assembled contigs fasta", "description": "Assembled contigs fasta for Gp0127626", @@ -7105,39 +6097,7 @@ "collecting_biosamples_from_site_set": [], "date_created": null, "etl_software_version": null, - "pooling_set": [], - "read_based_analysis_activity_set": [ - { - "_id": { - "$oid": "61e7195e833bcf838a700602" - }, - "has_input": [ - "nmdc:07499ad2f2b80f42bd7109732b1eef90" - ], - "part_of": [ - "nmdc:mga04xnj45" - ], - "git_url": "https://github.com/microbiomedata/metaG/releases/tag/0.1", - "has_output": [ - "nmdc:a91f8dccb2baa53550216f5bdfbf1473", - "nmdc:a81ddf4e3bc044e8601554117cd887aa", - "nmdc:a012dc3a7b44774019c313fd8ee88efc", - "nmdc:dd4023a1488bdfc73b12c422b62b274a", - "nmdc:2f9b1c55d52cc61affbe99f5163b48c8", - "nmdc:ccf7f447a25ebf354ce44b3f1f90f223", - "nmdc:2c8efdb77cbcd1276c4fb386fd37bd6d", - "nmdc:806b27f1fa5a423100b113bb56edc708", - "nmdc:bb3e6793c4f036b9756f075d41846964" - ], - "was_informed_by": "gold:Gp0127626", - "id": "nmdc:0ef6e04f4cefbcd9e2d3d2a7717baa3e", - "execution_resource": "NERSC-Cori", - "name": "ReadBased Analysis Activity for nmdc:mga04xnj45", - "started_at_time": "2021-12-01T21:31:29Z", - "type": "nmdc:ReadbasedAnalysis", - "ended_at_time": "2021-12-02T20:54:56+00:00" - } - ] + "pooling_set": [] }, { "functional_annotation_agg": [], @@ -7147,6 +6107,13 @@ "activity_set": [], "biosample_set": [], "data_object_set": [ + { + "description": "Raw sequencer read data", + "file_size_bytes": 2150489977, + "type": "nmdc:DataObject", + "id": "jgi:574fde577ded5e3df1ee13fc", + "name": "10533.1.165310.ATAGCGG-ACCGCTA.fastq.gz" + }, { "name": "Gp0127624_Filtered Reads", "description": "Filtered Reads for Gp0127624", @@ -7245,109 +6212,30 @@ "file_size_bytes": 3921891 }, { - "name": "Gp0127624_Gottcha2 TSV report", - "description": "Gottcha2 TSV report for Gp0127624", - "url": "https://data.microbiomedata.org/data/nmdc:mga0e8jh10/ReadbasedAnalysis/nmdc_mga0e8jh10_gottcha2_report.tsv", - "md5_checksum": "fef871a81032dd1f3e57dc1c7d5aa3db", - "id": "nmdc:fef871a81032dd1f3e57dc1c7d5aa3db", - "file_size_bytes": 1500 + "name": "Gp0127624_Assembled contigs fasta", + "description": "Assembled contigs fasta for Gp0127624", + "data_object_type": "Assembly Contigs", + "url": "https://data.microbiomedata.org/data/nmdc:mga0e8jh10/assembly/nmdc_mga0e8jh10_contigs.fna", + "md5_checksum": "464a9db7a94e7e0646b1ff8b501d82f3", + "id": "nmdc:464a9db7a94e7e0646b1ff8b501d82f3", + "file_size_bytes": 95468011 }, { - "name": "Gp0127624_Gottcha2 full TSV report", - "description": "Gottcha2 full TSV report for Gp0127624", - "url": "https://data.microbiomedata.org/data/nmdc:mga0e8jh10/ReadbasedAnalysis/nmdc_mga0e8jh10_gottcha2_report_full.tsv", - "md5_checksum": "6c7fec765f2a225f168ebb1f69961013", - "id": "nmdc:6c7fec765f2a225f168ebb1f69961013", - "file_size_bytes": 692993 + "name": "Gp0127624_Assembled scaffold fasta", + "description": "Assembled scaffold fasta for Gp0127624", + "data_object_type": "Assembly Scaffolds", + "url": "https://data.microbiomedata.org/data/nmdc:mga0e8jh10/assembly/nmdc_mga0e8jh10_scaffolds.fna", + "md5_checksum": "0a50f88775f36e9238152f3319252853", + "id": "nmdc:0a50f88775f36e9238152f3319252853", + "file_size_bytes": 94893921 }, { - "name": "Gp0127624_Gottcha2 Krona HTML report", - "description": "Gottcha2 Krona HTML report for Gp0127624", - "data_object_type": "GOTTCHA2 Krona Plot", - "url": "https://data.microbiomedata.org/data/nmdc:mga0e8jh10/ReadbasedAnalysis/nmdc_mga0e8jh10_gottcha2_krona.html", - "md5_checksum": "6e660d5a062f9c3ad7b49d8d438453d7", - "id": "nmdc:6e660d5a062f9c3ad7b49d8d438453d7", - "file_size_bytes": 230779 - }, - { - "name": "Gp0127624_Centrifuge classification TSV report", - "description": "Centrifuge classification TSV report for Gp0127624", - "data_object_type": "Centrifuge Taxonomic Classification", - "url": "https://data.microbiomedata.org/data/nmdc:mga0e8jh10/ReadbasedAnalysis/nmdc_mga0e8jh10_centrifuge_classification.tsv", - "md5_checksum": "77db34862804280185d3b1ce961e5338", - "id": "nmdc:77db34862804280185d3b1ce961e5338", - "file_size_bytes": 1645928829 - }, - { - "name": "Gp0127624_Centrifuge TSV report", - "description": "Centrifuge TSV report for Gp0127624", - "data_object_type": "Centrifuge Classification Report", - "url": "https://data.microbiomedata.org/data/nmdc:mga0e8jh10/ReadbasedAnalysis/nmdc_mga0e8jh10_centrifuge_report.tsv", - "md5_checksum": "84e3efb84d961d189ece310911ccf475", - "id": "nmdc:84e3efb84d961d189ece310911ccf475", - "file_size_bytes": 254646 - }, - { - "name": "Gp0127624_Centrifuge Krona HTML report", - "description": "Centrifuge Krona HTML report for Gp0127624", - "data_object_type": "Centrifuge Krona Plot", - "url": "https://data.microbiomedata.org/data/nmdc:mga0e8jh10/ReadbasedAnalysis/nmdc_mga0e8jh10_centrifuge_krona.html", - "md5_checksum": "b8fd31679921f8b68c80917e14caa260", - "id": "nmdc:b8fd31679921f8b68c80917e14caa260", - "file_size_bytes": 2332082 - }, - { - "name": "Gp0127624_Kraken classification TSV report", - "description": "Kraken classification TSV report for Gp0127624", - "data_object_type": "Kraken2 Taxonomic Classification", - "url": "https://data.microbiomedata.org/data/nmdc:mga0e8jh10/ReadbasedAnalysis/nmdc_mga0e8jh10_kraken2_classification.tsv", - "md5_checksum": "715c66c69b621478da7d48481f3cbd1d", - "id": "nmdc:715c66c69b621478da7d48481f3cbd1d", - "file_size_bytes": 1316771556 - }, - { - "name": "Gp0127624_Kraken2 TSV report", - "description": "Kraken2 TSV report for Gp0127624", - "data_object_type": "Kraken2 Classification Report", - "url": "https://data.microbiomedata.org/data/nmdc:mga0e8jh10/ReadbasedAnalysis/nmdc_mga0e8jh10_kraken2_report.tsv", - "md5_checksum": "0781e8042688219035efafe7d75858d0", - "id": "nmdc:0781e8042688219035efafe7d75858d0", - "file_size_bytes": 626940 - }, - { - "name": "Gp0127624_Kraken2 Krona HTML report", - "description": "Kraken2 Krona HTML report for Gp0127624", - "data_object_type": "Kraken2 Krona Plot", - "url": "https://data.microbiomedata.org/data/nmdc:mga0e8jh10/ReadbasedAnalysis/nmdc_mga0e8jh10_kraken2_krona.html", - "md5_checksum": "85547ab860ef9d6877ba7abc8881740a", - "id": "nmdc:85547ab860ef9d6877ba7abc8881740a", - "file_size_bytes": 3921891 - }, - { - "name": "Gp0127624_Assembled contigs fasta", - "description": "Assembled contigs fasta for Gp0127624", - "data_object_type": "Assembly Contigs", - "url": "https://data.microbiomedata.org/data/nmdc:mga0e8jh10/assembly/nmdc_mga0e8jh10_contigs.fna", - "md5_checksum": "464a9db7a94e7e0646b1ff8b501d82f3", - "id": "nmdc:464a9db7a94e7e0646b1ff8b501d82f3", - "file_size_bytes": 95468011 - }, - { - "name": "Gp0127624_Assembled scaffold fasta", - "description": "Assembled scaffold fasta for Gp0127624", - "data_object_type": "Assembly Scaffolds", - "url": "https://data.microbiomedata.org/data/nmdc:mga0e8jh10/assembly/nmdc_mga0e8jh10_scaffolds.fna", - "md5_checksum": "0a50f88775f36e9238152f3319252853", - "id": "nmdc:0a50f88775f36e9238152f3319252853", - "file_size_bytes": 94893921 - }, - { - "name": "Gp0127624_Metagenome Contig Coverage Stats", - "description": "Metagenome Contig Coverage Stats for Gp0127624", - "url": "https://data.microbiomedata.org/data/nmdc:mga0e8jh10/assembly/nmdc_mga0e8jh10_covstats.txt", - "md5_checksum": "f0dc2f598fa06efbe99843bddaf54f60", - "id": "nmdc:f0dc2f598fa06efbe99843bddaf54f60", - "file_size_bytes": 15112642 + "name": "Gp0127624_Metagenome Contig Coverage Stats", + "description": "Metagenome Contig Coverage Stats for Gp0127624", + "url": "https://data.microbiomedata.org/data/nmdc:mga0e8jh10/assembly/nmdc_mga0e8jh10_covstats.txt", + "md5_checksum": "f0dc2f598fa06efbe99843bddaf54f60", + "id": "nmdc:f0dc2f598fa06efbe99843bddaf54f60", + "file_size_bytes": 15112642 }, { "name": "Gp0127624_Assembled AGP file", @@ -7806,39 +6694,7 @@ "collecting_biosamples_from_site_set": [], "date_created": null, "etl_software_version": null, - "pooling_set": [], - "read_based_analysis_activity_set": [ - { - "_id": { - "$oid": "61e7191b833bcf838a6ff905" - }, - "has_input": [ - "nmdc:8585f6896702bddf64b02191be5921f4" - ], - "part_of": [ - "nmdc:mga0e8jh10" - ], - "git_url": "https://github.com/microbiomedata/metaG/releases/tag/0.1", - "has_output": [ - "nmdc:fef871a81032dd1f3e57dc1c7d5aa3db", - "nmdc:6c7fec765f2a225f168ebb1f69961013", - "nmdc:6e660d5a062f9c3ad7b49d8d438453d7", - "nmdc:77db34862804280185d3b1ce961e5338", - "nmdc:84e3efb84d961d189ece310911ccf475", - "nmdc:b8fd31679921f8b68c80917e14caa260", - "nmdc:715c66c69b621478da7d48481f3cbd1d", - "nmdc:0781e8042688219035efafe7d75858d0", - "nmdc:85547ab860ef9d6877ba7abc8881740a" - ], - "was_informed_by": "gold:Gp0127624", - "id": "nmdc:20efb77f7b08a72b0c887c059686b7cf", - "execution_resource": "NERSC-Cori", - "name": "ReadBased Analysis Activity for nmdc:mga0e8jh10", - "started_at_time": "2021-10-11T02:23:30Z", - "type": "nmdc:ReadbasedAnalysis", - "ended_at_time": "2021-10-11T03:30:59+00:00" - } - ] + "pooling_set": [] }, { "functional_annotation_agg": [], @@ -7848,6 +6704,13 @@ "activity_set": [], "biosample_set": [], "data_object_set": [ + { + "description": "Raw sequencer read data", + "file_size_bytes": 2057112594, + "type": "nmdc:DataObject", + "id": "jgi:574fde587ded5e3df1ee13fd", + "name": "10533.1.165310.CCAGTGT-AACACTG.fastq.gz" + }, { "name": "Gp0127629_Filtered Reads", "description": "Filtered Reads for Gp0127629", @@ -7945,85 +6808,6 @@ "id": "nmdc:81108175d5ef2ca158f516bfc75d3cd9", "file_size_bytes": 3933712 }, - { - "name": "Gp0127629_Gottcha2 TSV report", - "description": "Gottcha2 TSV report for Gp0127629", - "url": "https://data.microbiomedata.org/data/nmdc:mga071r920/ReadbasedAnalysis/nmdc_mga071r920_gottcha2_report.tsv", - "md5_checksum": "f4f810491708ff25956cddd005cc9944", - "id": "nmdc:f4f810491708ff25956cddd005cc9944", - "file_size_bytes": 1206 - }, - { - "name": "Gp0127629_Gottcha2 full TSV report", - "description": "Gottcha2 full TSV report for Gp0127629", - "url": "https://data.microbiomedata.org/data/nmdc:mga071r920/ReadbasedAnalysis/nmdc_mga071r920_gottcha2_report_full.tsv", - "md5_checksum": "67e3c200d3765733af33d1db1f4bf968", - "id": "nmdc:67e3c200d3765733af33d1db1f4bf968", - "file_size_bytes": 662074 - }, - { - "name": "Gp0127629_Gottcha2 Krona HTML report", - "description": "Gottcha2 Krona HTML report for Gp0127629", - "data_object_type": "GOTTCHA2 Krona Plot", - "url": "https://data.microbiomedata.org/data/nmdc:mga071r920/ReadbasedAnalysis/nmdc_mga071r920_gottcha2_krona.html", - "md5_checksum": "26cd6390e8362da2ee1d7691360d2dfb", - "id": "nmdc:26cd6390e8362da2ee1d7691360d2dfb", - "file_size_bytes": 229307 - }, - { - "name": "Gp0127629_Centrifuge classification TSV report", - "description": "Centrifuge classification TSV report for Gp0127629", - "data_object_type": "Centrifuge Taxonomic Classification", - "url": "https://data.microbiomedata.org/data/nmdc:mga071r920/ReadbasedAnalysis/nmdc_mga071r920_centrifuge_classification.tsv", - "md5_checksum": "80fe705d97ef4a0701b1320e9ba19a82", - "id": "nmdc:80fe705d97ef4a0701b1320e9ba19a82", - "file_size_bytes": 1667543500 - }, - { - "name": "Gp0127629_Centrifuge TSV report", - "description": "Centrifuge TSV report for Gp0127629", - "data_object_type": "Centrifuge Classification Report", - "url": "https://data.microbiomedata.org/data/nmdc:mga071r920/ReadbasedAnalysis/nmdc_mga071r920_centrifuge_report.tsv", - "md5_checksum": "6a216ec913587e26ddc036b703126d76", - "id": "nmdc:6a216ec913587e26ddc036b703126d76", - "file_size_bytes": 253079 - }, - { - "name": "Gp0127629_Centrifuge Krona HTML report", - "description": "Centrifuge Krona HTML report for Gp0127629", - "data_object_type": "Centrifuge Krona Plot", - "url": "https://data.microbiomedata.org/data/nmdc:mga071r920/ReadbasedAnalysis/nmdc_mga071r920_centrifuge_krona.html", - "md5_checksum": "ebed7286f886596764a66a0d1dac3e43", - "id": "nmdc:ebed7286f886596764a66a0d1dac3e43", - "file_size_bytes": 2326900 - }, - { - "name": "Gp0127629_Kraken classification TSV report", - "description": "Kraken classification TSV report for Gp0127629", - "data_object_type": "Kraken2 Taxonomic Classification", - "url": "https://data.microbiomedata.org/data/nmdc:mga071r920/ReadbasedAnalysis/nmdc_mga071r920_kraken2_classification.tsv", - "md5_checksum": "80dd3584d257e8f84b59118ffd0d5e21", - "id": "nmdc:80dd3584d257e8f84b59118ffd0d5e21", - "file_size_bytes": 1328025421 - }, - { - "name": "Gp0127629_Kraken2 TSV report", - "description": "Kraken2 TSV report for Gp0127629", - "data_object_type": "Kraken2 Classification Report", - "url": "https://data.microbiomedata.org/data/nmdc:mga071r920/ReadbasedAnalysis/nmdc_mga071r920_kraken2_report.tsv", - "md5_checksum": "61b5fe5664ca99f6354c7a5a0222678c", - "id": "nmdc:61b5fe5664ca99f6354c7a5a0222678c", - "file_size_bytes": 628969 - }, - { - "name": "Gp0127629_Kraken2 Krona HTML report", - "description": "Kraken2 Krona HTML report for Gp0127629", - "data_object_type": "Kraken2 Krona Plot", - "url": "https://data.microbiomedata.org/data/nmdc:mga071r920/ReadbasedAnalysis/nmdc_mga071r920_kraken2_krona.html", - "md5_checksum": "81108175d5ef2ca158f516bfc75d3cd9", - "id": "nmdc:81108175d5ef2ca158f516bfc75d3cd9", - "file_size_bytes": 3933712 - }, { "name": "Gp0127629_Assembled contigs fasta", "description": "Assembled contigs fasta for Gp0127629", @@ -8488,39 +7272,7 @@ "collecting_biosamples_from_site_set": [], "date_created": null, "etl_software_version": null, - "pooling_set": [], - "read_based_analysis_activity_set": [ - { - "_id": { - "$oid": "61e7195d833bcf838a70058b" - }, - "has_input": [ - "nmdc:0db98173ae3395106e24d250b2655f06" - ], - "part_of": [ - "nmdc:mga071r920" - ], - "git_url": "https://github.com/microbiomedata/metaG/releases/tag/0.1", - "has_output": [ - "nmdc:f4f810491708ff25956cddd005cc9944", - "nmdc:67e3c200d3765733af33d1db1f4bf968", - "nmdc:26cd6390e8362da2ee1d7691360d2dfb", - "nmdc:80fe705d97ef4a0701b1320e9ba19a82", - "nmdc:6a216ec913587e26ddc036b703126d76", - "nmdc:ebed7286f886596764a66a0d1dac3e43", - "nmdc:80dd3584d257e8f84b59118ffd0d5e21", - "nmdc:61b5fe5664ca99f6354c7a5a0222678c", - "nmdc:81108175d5ef2ca158f516bfc75d3cd9" - ], - "was_informed_by": "gold:Gp0127629", - "id": "nmdc:b82754c2c692809f9e59ff9824278c32", - "execution_resource": "NERSC-Cori", - "name": "ReadBased Analysis Activity for nmdc:mga071r920", - "started_at_time": "2021-10-11T02:23:35Z", - "type": "nmdc:ReadbasedAnalysis", - "ended_at_time": "2021-10-11T03:33:33+00:00" - } - ] + "pooling_set": [] }, { "functional_annotation_agg": [], @@ -8530,6 +7282,13 @@ "activity_set": [], "biosample_set": [], "data_object_set": [ + { + "description": "Raw sequencer read data", + "file_size_bytes": 2825784199, + "type": "nmdc:DataObject", + "id": "jgi:574fe09a7ded5e3df1ee1485", + "name": "10533.3.165334.CTGACAC-TGTGTCA.fastq.gz" + }, { "name": "Gp0127628_Filtered Reads", "description": "Filtered Reads for Gp0127628", @@ -8628,101 +7387,22 @@ "file_size_bytes": 4035375 }, { - "name": "Gp0127628_Gottcha2 TSV report", - "description": "Gottcha2 TSV report for Gp0127628", - "url": "https://data.microbiomedata.org/data/nmdc:mga0x5c381/ReadbasedAnalysis/nmdc_mga0x5c381_gottcha2_report.tsv", - "md5_checksum": "a6ed9af48a9ad473ab66721829a5c226", - "id": "nmdc:a6ed9af48a9ad473ab66721829a5c226", - "file_size_bytes": 3472 - }, - { - "name": "Gp0127628_Gottcha2 full TSV report", - "description": "Gottcha2 full TSV report for Gp0127628", - "url": "https://data.microbiomedata.org/data/nmdc:mga0x5c381/ReadbasedAnalysis/nmdc_mga0x5c381_gottcha2_report_full.tsv", - "md5_checksum": "335dbf6f1055de0950988a002f432c0b", - "id": "nmdc:335dbf6f1055de0950988a002f432c0b", - "file_size_bytes": 863867 + "name": "Gp0127628_Assembled contigs fasta", + "description": "Assembled contigs fasta for Gp0127628", + "data_object_type": "Assembly Contigs", + "url": "https://data.microbiomedata.org/data/nmdc:mga0x5c381/assembly/nmdc_mga0x5c381_contigs.fna", + "md5_checksum": "9e550afb3bcd8d66807f861ecfed815b", + "id": "nmdc:9e550afb3bcd8d66807f861ecfed815b", + "file_size_bytes": 74277737 }, { - "name": "Gp0127628_Gottcha2 Krona HTML report", - "description": "Gottcha2 Krona HTML report for Gp0127628", - "data_object_type": "GOTTCHA2 Krona Plot", - "url": "https://data.microbiomedata.org/data/nmdc:mga0x5c381/ReadbasedAnalysis/nmdc_mga0x5c381_gottcha2_krona.html", - "md5_checksum": "35da19bc0e50db1f9a02fe1550d1df0e", - "id": "nmdc:35da19bc0e50db1f9a02fe1550d1df0e", - "file_size_bytes": 234974 - }, - { - "name": "Gp0127628_Centrifuge classification TSV report", - "description": "Centrifuge classification TSV report for Gp0127628", - "data_object_type": "Centrifuge Taxonomic Classification", - "url": "https://data.microbiomedata.org/data/nmdc:mga0x5c381/ReadbasedAnalysis/nmdc_mga0x5c381_centrifuge_classification.tsv", - "md5_checksum": "224085164a389c6f207967ed03b3e6af", - "id": "nmdc:224085164a389c6f207967ed03b3e6af", - "file_size_bytes": 2220789142 - }, - { - "name": "Gp0127628_Centrifuge TSV report", - "description": "Centrifuge TSV report for Gp0127628", - "data_object_type": "Centrifuge Classification Report", - "url": "https://data.microbiomedata.org/data/nmdc:mga0x5c381/ReadbasedAnalysis/nmdc_mga0x5c381_centrifuge_report.tsv", - "md5_checksum": "39ba17263c144761a8bdcc1645c034f5", - "id": "nmdc:39ba17263c144761a8bdcc1645c034f5", - "file_size_bytes": 257030 - }, - { - "name": "Gp0127628_Centrifuge Krona HTML report", - "description": "Centrifuge Krona HTML report for Gp0127628", - "data_object_type": "Centrifuge Krona Plot", - "url": "https://data.microbiomedata.org/data/nmdc:mga0x5c381/ReadbasedAnalysis/nmdc_mga0x5c381_centrifuge_krona.html", - "md5_checksum": "84debc9bd1c09328d60f073d7fc2db4f", - "id": "nmdc:84debc9bd1c09328d60f073d7fc2db4f", - "file_size_bytes": 2337568 - }, - { - "name": "Gp0127628_Kraken classification TSV report", - "description": "Kraken classification TSV report for Gp0127628", - "data_object_type": "Kraken2 Taxonomic Classification", - "url": "https://data.microbiomedata.org/data/nmdc:mga0x5c381/ReadbasedAnalysis/nmdc_mga0x5c381_kraken2_classification.tsv", - "md5_checksum": "8f75800abbcf5a94043ad677d7cb975c", - "id": "nmdc:8f75800abbcf5a94043ad677d7cb975c", - "file_size_bytes": 1776487262 - }, - { - "name": "Gp0127628_Kraken2 TSV report", - "description": "Kraken2 TSV report for Gp0127628", - "data_object_type": "Kraken2 Classification Report", - "url": "https://data.microbiomedata.org/data/nmdc:mga0x5c381/ReadbasedAnalysis/nmdc_mga0x5c381_kraken2_report.tsv", - "md5_checksum": "aae9e961d8ed716457616c8a8841037b", - "id": "nmdc:aae9e961d8ed716457616c8a8841037b", - "file_size_bytes": 664011 - }, - { - "name": "Gp0127628_Kraken2 Krona HTML report", - "description": "Kraken2 Krona HTML report for Gp0127628", - "data_object_type": "Kraken2 Krona Plot", - "url": "https://data.microbiomedata.org/data/nmdc:mga0x5c381/ReadbasedAnalysis/nmdc_mga0x5c381_kraken2_krona.html", - "md5_checksum": "ba83d6ab837403f4bcbc9400a0460457", - "id": "nmdc:ba83d6ab837403f4bcbc9400a0460457", - "file_size_bytes": 4035375 - }, - { - "name": "Gp0127628_Assembled contigs fasta", - "description": "Assembled contigs fasta for Gp0127628", - "data_object_type": "Assembly Contigs", - "url": "https://data.microbiomedata.org/data/nmdc:mga0x5c381/assembly/nmdc_mga0x5c381_contigs.fna", - "md5_checksum": "9e550afb3bcd8d66807f861ecfed815b", - "id": "nmdc:9e550afb3bcd8d66807f861ecfed815b", - "file_size_bytes": 74277737 - }, - { - "name": "Gp0127628_Assembled scaffold fasta", - "description": "Assembled scaffold fasta for Gp0127628", - "data_object_type": "Assembly Scaffolds", - "url": "https://data.microbiomedata.org/data/nmdc:mga0x5c381/assembly/nmdc_mga0x5c381_scaffolds.fna", - "md5_checksum": "5e79fce62ffa8c4479be5159143797e0", - "id": "nmdc:5e79fce62ffa8c4479be5159143797e0", - "file_size_bytes": 73802989 + "name": "Gp0127628_Assembled scaffold fasta", + "description": "Assembled scaffold fasta for Gp0127628", + "data_object_type": "Assembly Scaffolds", + "url": "https://data.microbiomedata.org/data/nmdc:mga0x5c381/assembly/nmdc_mga0x5c381_scaffolds.fna", + "md5_checksum": "5e79fce62ffa8c4479be5159143797e0", + "id": "nmdc:5e79fce62ffa8c4479be5159143797e0", + "file_size_bytes": 73802989 }, { "name": "Gp0127628_Metagenome Contig Coverage Stats", @@ -9151,39 +7831,7 @@ "collecting_biosamples_from_site_set": [], "date_created": null, "etl_software_version": null, - "pooling_set": [], - "read_based_analysis_activity_set": [ - { - "_id": { - "$oid": "61e7193b833bcf838a6fff9c" - }, - "has_input": [ - "nmdc:f6f1760721d73fc57919b2115a1d47ec" - ], - "part_of": [ - "nmdc:mga0x5c381" - ], - "git_url": "https://github.com/microbiomedata/metaG/releases/tag/0.1", - "has_output": [ - "nmdc:a6ed9af48a9ad473ab66721829a5c226", - "nmdc:335dbf6f1055de0950988a002f432c0b", - "nmdc:35da19bc0e50db1f9a02fe1550d1df0e", - "nmdc:224085164a389c6f207967ed03b3e6af", - "nmdc:39ba17263c144761a8bdcc1645c034f5", - "nmdc:84debc9bd1c09328d60f073d7fc2db4f", - "nmdc:8f75800abbcf5a94043ad677d7cb975c", - "nmdc:aae9e961d8ed716457616c8a8841037b", - "nmdc:ba83d6ab837403f4bcbc9400a0460457" - ], - "was_informed_by": "gold:Gp0127628", - "id": "nmdc:a634b0691cf34899d9b09bc4573d4c36", - "execution_resource": "NERSC-Cori", - "name": "ReadBased Analysis Activity for nmdc:mga0x5c381", - "started_at_time": "2021-10-11T02:25:13Z", - "type": "nmdc:ReadbasedAnalysis", - "ended_at_time": "2021-10-11T04:45:59+00:00" - } - ] + "pooling_set": [] }, { "functional_annotation_agg": [], @@ -9193,6 +7841,13 @@ "activity_set": [], "biosample_set": [], "data_object_set": [ + { + "description": "Raw sequencer read data", + "file_size_bytes": 2291612962, + "type": "nmdc:DataObject", + "id": "jgi:574fde5b7ded5e3df1ee13ff", + "name": "10533.1.165310.TCGCTGT-AACAGCG.fastq.gz" + }, { "name": "Gp0127631_Filtered Reads", "description": "Filtered Reads for Gp0127631", @@ -9290,85 +7945,6 @@ "id": "nmdc:50093825ec73dcabe66aa353de766beb", "file_size_bytes": 3993246 }, - { - "name": "Gp0127631_Gottcha2 TSV report", - "description": "Gottcha2 TSV report for Gp0127631", - "url": "https://data.microbiomedata.org/data/nmdc:mga0jx8k09/ReadbasedAnalysis/nmdc_mga0jx8k09_gottcha2_report.tsv", - "md5_checksum": "b78e8246144185beb95c0caf65ef1f1a", - "id": "nmdc:b78e8246144185beb95c0caf65ef1f1a", - "file_size_bytes": 1227 - }, - { - "name": "Gp0127631_Gottcha2 full TSV report", - "description": "Gottcha2 full TSV report for Gp0127631", - "url": "https://data.microbiomedata.org/data/nmdc:mga0jx8k09/ReadbasedAnalysis/nmdc_mga0jx8k09_gottcha2_report_full.tsv", - "md5_checksum": "8875c6ce19e13ed9a88447f2f78bb049", - "id": "nmdc:8875c6ce19e13ed9a88447f2f78bb049", - "file_size_bytes": 647196 - }, - { - "name": "Gp0127631_Gottcha2 Krona HTML report", - "description": "Gottcha2 Krona HTML report for Gp0127631", - "data_object_type": "GOTTCHA2 Krona Plot", - "url": "https://data.microbiomedata.org/data/nmdc:mga0jx8k09/ReadbasedAnalysis/nmdc_mga0jx8k09_gottcha2_krona.html", - "md5_checksum": "3b0aee019c772a695bf4cc8f4a390f4e", - "id": "nmdc:3b0aee019c772a695bf4cc8f4a390f4e", - "file_size_bytes": 229312 - }, - { - "name": "Gp0127631_Centrifuge classification TSV report", - "description": "Centrifuge classification TSV report for Gp0127631", - "data_object_type": "Centrifuge Taxonomic Classification", - "url": "https://data.microbiomedata.org/data/nmdc:mga0jx8k09/ReadbasedAnalysis/nmdc_mga0jx8k09_centrifuge_classification.tsv", - "md5_checksum": "0d1729a83798b752f33eeb8d97afe972", - "id": "nmdc:0d1729a83798b752f33eeb8d97afe972", - "file_size_bytes": 1861431092 - }, - { - "name": "Gp0127631_Centrifuge TSV report", - "description": "Centrifuge TSV report for Gp0127631", - "data_object_type": "Centrifuge Classification Report", - "url": "https://data.microbiomedata.org/data/nmdc:mga0jx8k09/ReadbasedAnalysis/nmdc_mga0jx8k09_centrifuge_report.tsv", - "md5_checksum": "77561a0de3bb8aae04d110429fd9ad0c", - "id": "nmdc:77561a0de3bb8aae04d110429fd9ad0c", - "file_size_bytes": 254665 - }, - { - "name": "Gp0127631_Centrifuge Krona HTML report", - "description": "Centrifuge Krona HTML report for Gp0127631", - "data_object_type": "Centrifuge Krona Plot", - "url": "https://data.microbiomedata.org/data/nmdc:mga0jx8k09/ReadbasedAnalysis/nmdc_mga0jx8k09_centrifuge_krona.html", - "md5_checksum": "ea27c005b1788434c2198ad60939d4bc", - "id": "nmdc:ea27c005b1788434c2198ad60939d4bc", - "file_size_bytes": 2334578 - }, - { - "name": "Gp0127631_Kraken classification TSV report", - "description": "Kraken classification TSV report for Gp0127631", - "data_object_type": "Kraken2 Taxonomic Classification", - "url": "https://data.microbiomedata.org/data/nmdc:mga0jx8k09/ReadbasedAnalysis/nmdc_mga0jx8k09_kraken2_classification.tsv", - "md5_checksum": "6a46583da876b9d6287302308df0b9fd", - "id": "nmdc:6a46583da876b9d6287302308df0b9fd", - "file_size_bytes": 1483354621 - }, - { - "name": "Gp0127631_Kraken2 TSV report", - "description": "Kraken2 TSV report for Gp0127631", - "data_object_type": "Kraken2 Classification Report", - "url": "https://data.microbiomedata.org/data/nmdc:mga0jx8k09/ReadbasedAnalysis/nmdc_mga0jx8k09_kraken2_report.tsv", - "md5_checksum": "af619dc5a0423509a4beaca26aa61000", - "id": "nmdc:af619dc5a0423509a4beaca26aa61000", - "file_size_bytes": 640329 - }, - { - "name": "Gp0127631_Kraken2 Krona HTML report", - "description": "Kraken2 Krona HTML report for Gp0127631", - "data_object_type": "Kraken2 Krona Plot", - "url": "https://data.microbiomedata.org/data/nmdc:mga0jx8k09/ReadbasedAnalysis/nmdc_mga0jx8k09_kraken2_krona.html", - "md5_checksum": "50093825ec73dcabe66aa353de766beb", - "id": "nmdc:50093825ec73dcabe66aa353de766beb", - "file_size_bytes": 3993246 - }, { "name": "Gp0127631_Assembled contigs fasta", "description": "Assembled contigs fasta for Gp0127631", @@ -9852,39 +8428,7 @@ "collecting_biosamples_from_site_set": [], "date_created": null, "etl_software_version": null, - "pooling_set": [], - "read_based_analysis_activity_set": [ - { - "_id": { - "$oid": "61e71938833bcf838a6ffe7a" - }, - "has_input": [ - "nmdc:6969fd7f4b1a5a34fb30d31b92cd6bf8" - ], - "part_of": [ - "nmdc:mga0jx8k09" - ], - "git_url": "https://github.com/microbiomedata/metaG/releases/tag/0.1", - "has_output": [ - "nmdc:b78e8246144185beb95c0caf65ef1f1a", - "nmdc:8875c6ce19e13ed9a88447f2f78bb049", - "nmdc:3b0aee019c772a695bf4cc8f4a390f4e", - "nmdc:0d1729a83798b752f33eeb8d97afe972", - "nmdc:77561a0de3bb8aae04d110429fd9ad0c", - "nmdc:ea27c005b1788434c2198ad60939d4bc", - "nmdc:6a46583da876b9d6287302308df0b9fd", - "nmdc:af619dc5a0423509a4beaca26aa61000", - "nmdc:50093825ec73dcabe66aa353de766beb" - ], - "was_informed_by": "gold:Gp0127631", - "id": "nmdc:00a9e34a36f0ea767ff48aa2f411874f", - "execution_resource": "NERSC-Cori", - "name": "ReadBased Analysis Activity for nmdc:mga0jx8k09", - "started_at_time": "2021-10-11T02:26:22Z", - "type": "nmdc:ReadbasedAnalysis", - "ended_at_time": "2021-10-11T04:40:31+00:00" - } - ] + "pooling_set": [] }, { "functional_annotation_agg": [], @@ -9894,6 +8438,13 @@ "activity_set": [], "biosample_set": [], "data_object_set": [ + { + "description": "Raw sequencer read data", + "file_size_bytes": 2557650099, + "type": "nmdc:DataObject", + "id": "jgi:574fde7c7ded5e3df1ee1419", + "name": "10533.2.165322.TGTACAC-GGTGTAC.fastq.gz" + }, { "name": "Gp0127630_Filtered Reads", "description": "Filtered Reads for Gp0127630", @@ -9992,109 +8543,30 @@ "file_size_bytes": 3962195 }, { - "name": "Gp0127630_Gottcha2 TSV report", - "description": "Gottcha2 TSV report for Gp0127630", - "url": "https://data.microbiomedata.org/data/nmdc:mga09n3g47/ReadbasedAnalysis/nmdc_mga09n3g47_gottcha2_report.tsv", - "md5_checksum": "ad8aa7d317d86bcd1b33e6e68a917198", - "id": "nmdc:ad8aa7d317d86bcd1b33e6e68a917198", - "file_size_bytes": 3373 + "name": "Gp0127630_Assembled contigs fasta", + "description": "Assembled contigs fasta for Gp0127630", + "data_object_type": "Assembly Contigs", + "url": "https://data.microbiomedata.org/data/nmdc:mga09n3g47/assembly/nmdc_mga09n3g47_contigs.fna", + "md5_checksum": "7b35237c97a75f17ba74be0fe96416c9", + "id": "nmdc:7b35237c97a75f17ba74be0fe96416c9", + "file_size_bytes": 57511432 }, { - "name": "Gp0127630_Gottcha2 full TSV report", - "description": "Gottcha2 full TSV report for Gp0127630", - "url": "https://data.microbiomedata.org/data/nmdc:mga09n3g47/ReadbasedAnalysis/nmdc_mga09n3g47_gottcha2_report_full.tsv", - "md5_checksum": "e5f1da9ed5be2adcd65763d387387c9f", - "id": "nmdc:e5f1da9ed5be2adcd65763d387387c9f", - "file_size_bytes": 791488 + "name": "Gp0127630_Assembled scaffold fasta", + "description": "Assembled scaffold fasta for Gp0127630", + "data_object_type": "Assembly Scaffolds", + "url": "https://data.microbiomedata.org/data/nmdc:mga09n3g47/assembly/nmdc_mga09n3g47_scaffolds.fna", + "md5_checksum": "118dd6190bdaf127d3c105cc73012cc3", + "id": "nmdc:118dd6190bdaf127d3c105cc73012cc3", + "file_size_bytes": 57128690 }, { - "name": "Gp0127630_Gottcha2 Krona HTML report", - "description": "Gottcha2 Krona HTML report for Gp0127630", - "data_object_type": "GOTTCHA2 Krona Plot", - "url": "https://data.microbiomedata.org/data/nmdc:mga09n3g47/ReadbasedAnalysis/nmdc_mga09n3g47_gottcha2_krona.html", - "md5_checksum": "db82b41936f37bbbeaa027ffc25b58cd", - "id": "nmdc:db82b41936f37bbbeaa027ffc25b58cd", - "file_size_bytes": 235803 - }, - { - "name": "Gp0127630_Centrifuge classification TSV report", - "description": "Centrifuge classification TSV report for Gp0127630", - "data_object_type": "Centrifuge Taxonomic Classification", - "url": "https://data.microbiomedata.org/data/nmdc:mga09n3g47/ReadbasedAnalysis/nmdc_mga09n3g47_centrifuge_classification.tsv", - "md5_checksum": "2f21fd19f055d1931ab82016ed781a12", - "id": "nmdc:2f21fd19f055d1931ab82016ed781a12", - "file_size_bytes": 1974171566 - }, - { - "name": "Gp0127630_Centrifuge TSV report", - "description": "Centrifuge TSV report for Gp0127630", - "data_object_type": "Centrifuge Classification Report", - "url": "https://data.microbiomedata.org/data/nmdc:mga09n3g47/ReadbasedAnalysis/nmdc_mga09n3g47_centrifuge_report.tsv", - "md5_checksum": "890f494d1dd5e130d6c1688e78f27ff2", - "id": "nmdc:890f494d1dd5e130d6c1688e78f27ff2", - "file_size_bytes": 255012 - }, - { - "name": "Gp0127630_Centrifuge Krona HTML report", - "description": "Centrifuge Krona HTML report for Gp0127630", - "data_object_type": "Centrifuge Krona Plot", - "url": "https://data.microbiomedata.org/data/nmdc:mga09n3g47/ReadbasedAnalysis/nmdc_mga09n3g47_centrifuge_krona.html", - "md5_checksum": "813232a3034ddb9a05efc2f2e9b78cce", - "id": "nmdc:813232a3034ddb9a05efc2f2e9b78cce", - "file_size_bytes": 2330430 - }, - { - "name": "Gp0127630_Kraken classification TSV report", - "description": "Kraken classification TSV report for Gp0127630", - "data_object_type": "Kraken2 Taxonomic Classification", - "url": "https://data.microbiomedata.org/data/nmdc:mga09n3g47/ReadbasedAnalysis/nmdc_mga09n3g47_kraken2_classification.tsv", - "md5_checksum": "ef490241b537bb4c19bd5548cd7b7f6b", - "id": "nmdc:ef490241b537bb4c19bd5548cd7b7f6b", - "file_size_bytes": 1584744477 - }, - { - "name": "Gp0127630_Kraken2 TSV report", - "description": "Kraken2 TSV report for Gp0127630", - "data_object_type": "Kraken2 Classification Report", - "url": "https://data.microbiomedata.org/data/nmdc:mga09n3g47/ReadbasedAnalysis/nmdc_mga09n3g47_kraken2_report.tsv", - "md5_checksum": "6a7de24b01ad1c63ba6edb758e25af40", - "id": "nmdc:6a7de24b01ad1c63ba6edb758e25af40", - "file_size_bytes": 650172 - }, - { - "name": "Gp0127630_Kraken2 Krona HTML report", - "description": "Kraken2 Krona HTML report for Gp0127630", - "data_object_type": "Kraken2 Krona Plot", - "url": "https://data.microbiomedata.org/data/nmdc:mga09n3g47/ReadbasedAnalysis/nmdc_mga09n3g47_kraken2_krona.html", - "md5_checksum": "fc8a855916eb1ba0f7d278b7c1f1786f", - "id": "nmdc:fc8a855916eb1ba0f7d278b7c1f1786f", - "file_size_bytes": 3962195 - }, - { - "name": "Gp0127630_Assembled contigs fasta", - "description": "Assembled contigs fasta for Gp0127630", - "data_object_type": "Assembly Contigs", - "url": "https://data.microbiomedata.org/data/nmdc:mga09n3g47/assembly/nmdc_mga09n3g47_contigs.fna", - "md5_checksum": "7b35237c97a75f17ba74be0fe96416c9", - "id": "nmdc:7b35237c97a75f17ba74be0fe96416c9", - "file_size_bytes": 57511432 - }, - { - "name": "Gp0127630_Assembled scaffold fasta", - "description": "Assembled scaffold fasta for Gp0127630", - "data_object_type": "Assembly Scaffolds", - "url": "https://data.microbiomedata.org/data/nmdc:mga09n3g47/assembly/nmdc_mga09n3g47_scaffolds.fna", - "md5_checksum": "118dd6190bdaf127d3c105cc73012cc3", - "id": "nmdc:118dd6190bdaf127d3c105cc73012cc3", - "file_size_bytes": 57128690 - }, - { - "name": "Gp0127630_Metagenome Contig Coverage Stats", - "description": "Metagenome Contig Coverage Stats for Gp0127630", - "url": "https://data.microbiomedata.org/data/nmdc:mga09n3g47/assembly/nmdc_mga09n3g47_covstats.txt", - "md5_checksum": "9e129133978cb4c4cc4bae9fc28a8a49", - "id": "nmdc:9e129133978cb4c4cc4bae9fc28a8a49", - "file_size_bytes": 10020081 + "name": "Gp0127630_Metagenome Contig Coverage Stats", + "description": "Metagenome Contig Coverage Stats for Gp0127630", + "url": "https://data.microbiomedata.org/data/nmdc:mga09n3g47/assembly/nmdc_mga09n3g47_covstats.txt", + "md5_checksum": "9e129133978cb4c4cc4bae9fc28a8a49", + "id": "nmdc:9e129133978cb4c4cc4bae9fc28a8a49", + "file_size_bytes": 10020081 }, { "name": "Gp0127630_Assembled AGP file", @@ -10534,39 +9006,7 @@ "collecting_biosamples_from_site_set": [], "date_created": null, "etl_software_version": null, - "pooling_set": [], - "read_based_analysis_activity_set": [ - { - "_id": { - "$oid": "61e71939833bcf838a6fff09" - }, - "has_input": [ - "nmdc:eaffb16b5247d85c08f8af73bcb8b65e" - ], - "part_of": [ - "nmdc:mga09n3g47" - ], - "git_url": "https://github.com/microbiomedata/metaG/releases/tag/0.1", - "has_output": [ - "nmdc:ad8aa7d317d86bcd1b33e6e68a917198", - "nmdc:e5f1da9ed5be2adcd65763d387387c9f", - "nmdc:db82b41936f37bbbeaa027ffc25b58cd", - "nmdc:2f21fd19f055d1931ab82016ed781a12", - "nmdc:890f494d1dd5e130d6c1688e78f27ff2", - "nmdc:813232a3034ddb9a05efc2f2e9b78cce", - "nmdc:ef490241b537bb4c19bd5548cd7b7f6b", - "nmdc:6a7de24b01ad1c63ba6edb758e25af40", - "nmdc:fc8a855916eb1ba0f7d278b7c1f1786f" - ], - "was_informed_by": "gold:Gp0127630", - "id": "nmdc:9fb85875014e14636fbb93d97d62f4bd", - "execution_resource": "NERSC-Cori", - "name": "ReadBased Analysis Activity for nmdc:mga09n3g47", - "started_at_time": "2021-10-11T02:26:53Z", - "type": "nmdc:ReadbasedAnalysis", - "ended_at_time": "2021-10-11T04:54:22+00:00" - } - ] + "pooling_set": [] }, { "functional_annotation_agg": [], @@ -10576,6 +9016,13 @@ "activity_set": [], "biosample_set": [], "data_object_set": [ + { + "description": "Raw sequencer read data", + "file_size_bytes": 2003194973, + "type": "nmdc:DataObject", + "id": "jgi:574fde5e7ded5e3df1ee1401", + "name": "10533.1.165310.GGACTGT-AACAGTC.fastq.gz" + }, { "name": "Gp0127633_Filtered Reads", "description": "Filtered Reads for Gp0127633", @@ -10673,85 +9120,6 @@ "id": "nmdc:89e810af4915f0e117eaa60550587453", "file_size_bytes": 3891844 }, - { - "name": "Gp0127633_Gottcha2 TSV report", - "description": "Gottcha2 TSV report for Gp0127633", - "url": "https://data.microbiomedata.org/data/nmdc:mga05zvf81/ReadbasedAnalysis/nmdc_mga05zvf81_gottcha2_report.tsv", - "md5_checksum": "8bd9eb762acabbac5d079c379c28e381", - "id": "nmdc:8bd9eb762acabbac5d079c379c28e381", - "file_size_bytes": 875 - }, - { - "name": "Gp0127633_Gottcha2 full TSV report", - "description": "Gottcha2 full TSV report for Gp0127633", - "url": "https://data.microbiomedata.org/data/nmdc:mga05zvf81/ReadbasedAnalysis/nmdc_mga05zvf81_gottcha2_report_full.tsv", - "md5_checksum": "77351dd18ca40e5552ac1380ba94acbf", - "id": "nmdc:77351dd18ca40e5552ac1380ba94acbf", - "file_size_bytes": 578856 - }, - { - "name": "Gp0127633_Gottcha2 Krona HTML report", - "description": "Gottcha2 Krona HTML report for Gp0127633", - "data_object_type": "GOTTCHA2 Krona Plot", - "url": "https://data.microbiomedata.org/data/nmdc:mga05zvf81/ReadbasedAnalysis/nmdc_mga05zvf81_gottcha2_krona.html", - "md5_checksum": "f445af1a7774572d156f55a898d26f09", - "id": "nmdc:f445af1a7774572d156f55a898d26f09", - "file_size_bytes": 228067 - }, - { - "name": "Gp0127633_Centrifuge classification TSV report", - "description": "Centrifuge classification TSV report for Gp0127633", - "data_object_type": "Centrifuge Taxonomic Classification", - "url": "https://data.microbiomedata.org/data/nmdc:mga05zvf81/ReadbasedAnalysis/nmdc_mga05zvf81_centrifuge_classification.tsv", - "md5_checksum": "e11fcbf66318878c05984fa3d893e3b7", - "id": "nmdc:e11fcbf66318878c05984fa3d893e3b7", - "file_size_bytes": 1646942155 - }, - { - "name": "Gp0127633_Centrifuge TSV report", - "description": "Centrifuge TSV report for Gp0127633", - "data_object_type": "Centrifuge Classification Report", - "url": "https://data.microbiomedata.org/data/nmdc:mga05zvf81/ReadbasedAnalysis/nmdc_mga05zvf81_centrifuge_report.tsv", - "md5_checksum": "28beb8baabdaf346f2066b40f375a152", - "id": "nmdc:28beb8baabdaf346f2066b40f375a152", - "file_size_bytes": 252735 - }, - { - "name": "Gp0127633_Centrifuge Krona HTML report", - "description": "Centrifuge Krona HTML report for Gp0127633", - "data_object_type": "Centrifuge Krona Plot", - "url": "https://data.microbiomedata.org/data/nmdc:mga05zvf81/ReadbasedAnalysis/nmdc_mga05zvf81_centrifuge_krona.html", - "md5_checksum": "1f74a43724c4afed5563499d05601e22", - "id": "nmdc:1f74a43724c4afed5563499d05601e22", - "file_size_bytes": 2329168 - }, - { - "name": "Gp0127633_Kraken classification TSV report", - "description": "Kraken classification TSV report for Gp0127633", - "data_object_type": "Kraken2 Taxonomic Classification", - "url": "https://data.microbiomedata.org/data/nmdc:mga05zvf81/ReadbasedAnalysis/nmdc_mga05zvf81_kraken2_classification.tsv", - "md5_checksum": "4825177c6d0a8b67db82e6070cfbc35f", - "id": "nmdc:4825177c6d0a8b67db82e6070cfbc35f", - "file_size_bytes": 1310443491 - }, - { - "name": "Gp0127633_Kraken2 TSV report", - "description": "Kraken2 TSV report for Gp0127633", - "data_object_type": "Kraken2 Classification Report", - "url": "https://data.microbiomedata.org/data/nmdc:mga05zvf81/ReadbasedAnalysis/nmdc_mga05zvf81_kraken2_report.tsv", - "md5_checksum": "275268a6b5aca33c427d11877bcfa674", - "id": "nmdc:275268a6b5aca33c427d11877bcfa674", - "file_size_bytes": 621441 - }, - { - "name": "Gp0127633_Kraken2 Krona HTML report", - "description": "Kraken2 Krona HTML report for Gp0127633", - "data_object_type": "Kraken2 Krona Plot", - "url": "https://data.microbiomedata.org/data/nmdc:mga05zvf81/ReadbasedAnalysis/nmdc_mga05zvf81_kraken2_krona.html", - "md5_checksum": "89e810af4915f0e117eaa60550587453", - "id": "nmdc:89e810af4915f0e117eaa60550587453", - "file_size_bytes": 3891844 - }, { "name": "Gp0127633_Assembled contigs fasta", "description": "Assembled contigs fasta for Gp0127633", @@ -11292,39 +9660,7 @@ "collecting_biosamples_from_site_set": [], "date_created": null, "etl_software_version": null, - "pooling_set": [], - "read_based_analysis_activity_set": [ - { - "_id": { - "$oid": "61e7193c833bcf838a70001a" - }, - "has_input": [ - "nmdc:7cbd497624d8b60ab2a5e7fdbe4730f2" - ], - "part_of": [ - "nmdc:mga05zvf81" - ], - "git_url": "https://github.com/microbiomedata/metaG/releases/tag/0.1", - "has_output": [ - "nmdc:8bd9eb762acabbac5d079c379c28e381", - "nmdc:77351dd18ca40e5552ac1380ba94acbf", - "nmdc:f445af1a7774572d156f55a898d26f09", - "nmdc:e11fcbf66318878c05984fa3d893e3b7", - "nmdc:28beb8baabdaf346f2066b40f375a152", - "nmdc:1f74a43724c4afed5563499d05601e22", - "nmdc:4825177c6d0a8b67db82e6070cfbc35f", - "nmdc:275268a6b5aca33c427d11877bcfa674", - "nmdc:89e810af4915f0e117eaa60550587453" - ], - "was_informed_by": "gold:Gp0127633", - "id": "nmdc:c88a6f4e80cd6159dbbdeaeffbc28f55", - "execution_resource": "NERSC-Cori", - "name": "ReadBased Analysis Activity for nmdc:mga05zvf81", - "started_at_time": "2021-10-11T02:24:58Z", - "type": "nmdc:ReadbasedAnalysis", - "ended_at_time": "2021-10-11T03:40:06+00:00" - } - ] + "pooling_set": [] }, { "functional_annotation_agg": [], @@ -11334,6 +9670,13 @@ "activity_set": [], "biosample_set": [], "data_object_set": [ + { + "description": "Raw sequencer read data", + "file_size_bytes": 1875083220, + "type": "nmdc:DataObject", + "id": "jgi:574fe0967ded5e3df1ee1482", + "name": "10533.3.165334.GCCTTGT-AACAAGG.fastq.gz" + }, { "name": "Gp0127627_Filtered Reads", "description": "Filtered Reads for Gp0127627", @@ -11432,109 +9775,30 @@ "file_size_bytes": 3926756 }, { - "name": "Gp0127627_Gottcha2 TSV report", - "description": "Gottcha2 TSV report for Gp0127627", - "url": "https://data.microbiomedata.org/data/nmdc:mga0daby71/ReadbasedAnalysis/nmdc_mga0daby71_gottcha2_report.tsv", - "md5_checksum": "a5ac6665e5d66242b1c885a911236982", - "id": "nmdc:a5ac6665e5d66242b1c885a911236982", - "file_size_bytes": 5530 + "name": "Gp0127627_Assembled contigs fasta", + "description": "Assembled contigs fasta for Gp0127627", + "data_object_type": "Assembly Contigs", + "url": "https://data.microbiomedata.org/data/nmdc:mga0daby71/assembly/nmdc_mga0daby71_contigs.fna", + "md5_checksum": "a7db57faea894bec6603a69abfdfcf7d", + "id": "nmdc:a7db57faea894bec6603a69abfdfcf7d", + "file_size_bytes": 19853676 }, { - "name": "Gp0127627_Gottcha2 full TSV report", - "description": "Gottcha2 full TSV report for Gp0127627", - "url": "https://data.microbiomedata.org/data/nmdc:mga0daby71/ReadbasedAnalysis/nmdc_mga0daby71_gottcha2_report_full.tsv", - "md5_checksum": "d19478a191693d643157a89c69cc02d1", - "id": "nmdc:d19478a191693d643157a89c69cc02d1", - "file_size_bytes": 825047 + "name": "Gp0127627_Assembled scaffold fasta", + "description": "Assembled scaffold fasta for Gp0127627", + "data_object_type": "Assembly Scaffolds", + "url": "https://data.microbiomedata.org/data/nmdc:mga0daby71/assembly/nmdc_mga0daby71_scaffolds.fna", + "md5_checksum": "8e798fcdd761feff51cab6a9c97ed7ae", + "id": "nmdc:8e798fcdd761feff51cab6a9c97ed7ae", + "file_size_bytes": 19699986 }, { - "name": "Gp0127627_Gottcha2 Krona HTML report", - "description": "Gottcha2 Krona HTML report for Gp0127627", - "data_object_type": "GOTTCHA2 Krona Plot", - "url": "https://data.microbiomedata.org/data/nmdc:mga0daby71/ReadbasedAnalysis/nmdc_mga0daby71_gottcha2_krona.html", - "md5_checksum": "679a82699663e88a5e8828ee081fa967", - "id": "nmdc:679a82699663e88a5e8828ee081fa967", - "file_size_bytes": 241114 - }, - { - "name": "Gp0127627_Centrifuge classification TSV report", - "description": "Centrifuge classification TSV report for Gp0127627", - "data_object_type": "Centrifuge Taxonomic Classification", - "url": "https://data.microbiomedata.org/data/nmdc:mga0daby71/ReadbasedAnalysis/nmdc_mga0daby71_centrifuge_classification.tsv", - "md5_checksum": "95b3150e6fb62195c1e5ebf06f87c7d5", - "id": "nmdc:95b3150e6fb62195c1e5ebf06f87c7d5", - "file_size_bytes": 1463660267 - }, - { - "name": "Gp0127627_Centrifuge TSV report", - "description": "Centrifuge TSV report for Gp0127627", - "data_object_type": "Centrifuge Classification Report", - "url": "https://data.microbiomedata.org/data/nmdc:mga0daby71/ReadbasedAnalysis/nmdc_mga0daby71_centrifuge_report.tsv", - "md5_checksum": "0380e478962be82e0d97a6339f7f3b91", - "id": "nmdc:0380e478962be82e0d97a6339f7f3b91", - "file_size_bytes": 254347 - }, - { - "name": "Gp0127627_Centrifuge Krona HTML report", - "description": "Centrifuge Krona HTML report for Gp0127627", - "data_object_type": "Centrifuge Krona Plot", - "url": "https://data.microbiomedata.org/data/nmdc:mga0daby71/ReadbasedAnalysis/nmdc_mga0daby71_centrifuge_krona.html", - "md5_checksum": "0c1d139abdfa9fa10f26923abb4d6bda", - "id": "nmdc:0c1d139abdfa9fa10f26923abb4d6bda", - "file_size_bytes": 2330603 - }, - { - "name": "Gp0127627_Kraken classification TSV report", - "description": "Kraken classification TSV report for Gp0127627", - "data_object_type": "Kraken2 Taxonomic Classification", - "url": "https://data.microbiomedata.org/data/nmdc:mga0daby71/ReadbasedAnalysis/nmdc_mga0daby71_kraken2_classification.tsv", - "md5_checksum": "f388f7f0d79d0b2bbec1c3c0c5641814", - "id": "nmdc:f388f7f0d79d0b2bbec1c3c0c5641814", - "file_size_bytes": 1177609473 - }, - { - "name": "Gp0127627_Kraken2 TSV report", - "description": "Kraken2 TSV report for Gp0127627", - "data_object_type": "Kraken2 Classification Report", - "url": "https://data.microbiomedata.org/data/nmdc:mga0daby71/ReadbasedAnalysis/nmdc_mga0daby71_kraken2_report.tsv", - "md5_checksum": "a2a0029691c04851f4a98003a773fe3f", - "id": "nmdc:a2a0029691c04851f4a98003a773fe3f", - "file_size_bytes": 643281 - }, - { - "name": "Gp0127627_Kraken2 Krona HTML report", - "description": "Kraken2 Krona HTML report for Gp0127627", - "data_object_type": "Kraken2 Krona Plot", - "url": "https://data.microbiomedata.org/data/nmdc:mga0daby71/ReadbasedAnalysis/nmdc_mga0daby71_kraken2_krona.html", - "md5_checksum": "bab24ab64ad432d115f182df7198d46e", - "id": "nmdc:bab24ab64ad432d115f182df7198d46e", - "file_size_bytes": 3926756 - }, - { - "name": "Gp0127627_Assembled contigs fasta", - "description": "Assembled contigs fasta for Gp0127627", - "data_object_type": "Assembly Contigs", - "url": "https://data.microbiomedata.org/data/nmdc:mga0daby71/assembly/nmdc_mga0daby71_contigs.fna", - "md5_checksum": "a7db57faea894bec6603a69abfdfcf7d", - "id": "nmdc:a7db57faea894bec6603a69abfdfcf7d", - "file_size_bytes": 19853676 - }, - { - "name": "Gp0127627_Assembled scaffold fasta", - "description": "Assembled scaffold fasta for Gp0127627", - "data_object_type": "Assembly Scaffolds", - "url": "https://data.microbiomedata.org/data/nmdc:mga0daby71/assembly/nmdc_mga0daby71_scaffolds.fna", - "md5_checksum": "8e798fcdd761feff51cab6a9c97ed7ae", - "id": "nmdc:8e798fcdd761feff51cab6a9c97ed7ae", - "file_size_bytes": 19699986 - }, - { - "name": "Gp0127627_Metagenome Contig Coverage Stats", - "description": "Metagenome Contig Coverage Stats for Gp0127627", - "url": "https://data.microbiomedata.org/data/nmdc:mga0daby71/assembly/nmdc_mga0daby71_covstats.txt", - "md5_checksum": "0d3200307a90e23525d3fefa7a25f867", - "id": "nmdc:0d3200307a90e23525d3fefa7a25f867", - "file_size_bytes": 3997845 + "name": "Gp0127627_Metagenome Contig Coverage Stats", + "description": "Metagenome Contig Coverage Stats for Gp0127627", + "url": "https://data.microbiomedata.org/data/nmdc:mga0daby71/assembly/nmdc_mga0daby71_covstats.txt", + "md5_checksum": "0d3200307a90e23525d3fefa7a25f867", + "id": "nmdc:0d3200307a90e23525d3fefa7a25f867", + "file_size_bytes": 3997845 }, { "name": "Gp0127627_Assembled AGP file", @@ -11897,39 +10161,7 @@ "collecting_biosamples_from_site_set": [], "date_created": null, "etl_software_version": null, - "pooling_set": [], - "read_based_analysis_activity_set": [ - { - "_id": { - "$oid": "61e7195c833bcf838a70049b" - }, - "has_input": [ - "nmdc:ed95796b3fd964c6bedb141d70737ebf" - ], - "part_of": [ - "nmdc:mga0daby71" - ], - "git_url": "https://github.com/microbiomedata/metaG/releases/tag/0.1", - "has_output": [ - "nmdc:a5ac6665e5d66242b1c885a911236982", - "nmdc:d19478a191693d643157a89c69cc02d1", - "nmdc:679a82699663e88a5e8828ee081fa967", - "nmdc:95b3150e6fb62195c1e5ebf06f87c7d5", - "nmdc:0380e478962be82e0d97a6339f7f3b91", - "nmdc:0c1d139abdfa9fa10f26923abb4d6bda", - "nmdc:f388f7f0d79d0b2bbec1c3c0c5641814", - "nmdc:a2a0029691c04851f4a98003a773fe3f", - "nmdc:bab24ab64ad432d115f182df7198d46e" - ], - "was_informed_by": "gold:Gp0127627", - "id": "nmdc:ecaab4b51de694b35269756fd7e31ed9", - "execution_resource": "NERSC-Cori", - "name": "ReadBased Analysis Activity for nmdc:mga0daby71", - "started_at_time": "2021-11-13T18:47:34Z", - "type": "nmdc:ReadbasedAnalysis", - "ended_at_time": "2021-11-13T19:08:49+00:00" - } - ] + "pooling_set": [] }, { "functional_annotation_agg": [], @@ -11939,6 +10171,13 @@ "activity_set": [], "biosample_set": [], "data_object_set": [ + { + "description": "Raw sequencer read data", + "file_size_bytes": 2456584646, + "type": "nmdc:DataObject", + "id": "jgi:574fde807ded5e3df1ee141b", + "name": "10533.2.165322.AGCTAAC-GGTTAGC.fastq.gz" + }, { "name": "Gp0127632_Filtered Reads", "description": "Filtered Reads for Gp0127632", @@ -12036,85 +10275,6 @@ "id": "nmdc:e350fda9bd0651755171d79b413b8da3", "file_size_bytes": 3959152 }, - { - "name": "Gp0127632_Gottcha2 TSV report", - "description": "Gottcha2 TSV report for Gp0127632", - "url": "https://data.microbiomedata.org/data/nmdc:mga0b6cy30/ReadbasedAnalysis/nmdc_mga0b6cy30_gottcha2_report.tsv", - "md5_checksum": "3e583cccbbc068e0879ba6618bb6407c", - "id": "nmdc:3e583cccbbc068e0879ba6618bb6407c", - "file_size_bytes": 2899 - }, - { - "name": "Gp0127632_Gottcha2 full TSV report", - "description": "Gottcha2 full TSV report for Gp0127632", - "url": "https://data.microbiomedata.org/data/nmdc:mga0b6cy30/ReadbasedAnalysis/nmdc_mga0b6cy30_gottcha2_report_full.tsv", - "md5_checksum": "6c54105711e818c4d8169ab595b05efe", - "id": "nmdc:6c54105711e818c4d8169ab595b05efe", - "file_size_bytes": 769416 - }, - { - "name": "Gp0127632_Gottcha2 Krona HTML report", - "description": "Gottcha2 Krona HTML report for Gp0127632", - "data_object_type": "GOTTCHA2 Krona Plot", - "url": "https://data.microbiomedata.org/data/nmdc:mga0b6cy30/ReadbasedAnalysis/nmdc_mga0b6cy30_gottcha2_krona.html", - "md5_checksum": "adb155cdb656648496484998a62fb96f", - "id": "nmdc:adb155cdb656648496484998a62fb96f", - "file_size_bytes": 235384 - }, - { - "name": "Gp0127632_Centrifuge classification TSV report", - "description": "Centrifuge classification TSV report for Gp0127632", - "data_object_type": "Centrifuge Taxonomic Classification", - "url": "https://data.microbiomedata.org/data/nmdc:mga0b6cy30/ReadbasedAnalysis/nmdc_mga0b6cy30_centrifuge_classification.tsv", - "md5_checksum": "0a03ac5737750a3b336e7299e9f01ead", - "id": "nmdc:0a03ac5737750a3b336e7299e9f01ead", - "file_size_bytes": 1917130445 - }, - { - "name": "Gp0127632_Centrifuge TSV report", - "description": "Centrifuge TSV report for Gp0127632", - "data_object_type": "Centrifuge Classification Report", - "url": "https://data.microbiomedata.org/data/nmdc:mga0b6cy30/ReadbasedAnalysis/nmdc_mga0b6cy30_centrifuge_report.tsv", - "md5_checksum": "f345b3a57c37097a860e38d5e83835b8", - "id": "nmdc:f345b3a57c37097a860e38d5e83835b8", - "file_size_bytes": 255290 - }, - { - "name": "Gp0127632_Centrifuge Krona HTML report", - "description": "Centrifuge Krona HTML report for Gp0127632", - "data_object_type": "Centrifuge Krona Plot", - "url": "https://data.microbiomedata.org/data/nmdc:mga0b6cy30/ReadbasedAnalysis/nmdc_mga0b6cy30_centrifuge_krona.html", - "md5_checksum": "c1f4471d943b284720a8becb5a2e32b4", - "id": "nmdc:c1f4471d943b284720a8becb5a2e32b4", - "file_size_bytes": 2333225 - }, - { - "name": "Gp0127632_Kraken classification TSV report", - "description": "Kraken classification TSV report for Gp0127632", - "data_object_type": "Kraken2 Taxonomic Classification", - "url": "https://data.microbiomedata.org/data/nmdc:mga0b6cy30/ReadbasedAnalysis/nmdc_mga0b6cy30_kraken2_classification.tsv", - "md5_checksum": "50cfcfc5d0d89245b8370abf6bfef23c", - "id": "nmdc:50cfcfc5d0d89245b8370abf6bfef23c", - "file_size_bytes": 1537863470 - }, - { - "name": "Gp0127632_Kraken2 TSV report", - "description": "Kraken2 TSV report for Gp0127632", - "data_object_type": "Kraken2 Classification Report", - "url": "https://data.microbiomedata.org/data/nmdc:mga0b6cy30/ReadbasedAnalysis/nmdc_mga0b6cy30_kraken2_report.tsv", - "md5_checksum": "a8dd7aa20043510158ad3b2bbe961b42", - "id": "nmdc:a8dd7aa20043510158ad3b2bbe961b42", - "file_size_bytes": 648597 - }, - { - "name": "Gp0127632_Kraken2 Krona HTML report", - "description": "Kraken2 Krona HTML report for Gp0127632", - "data_object_type": "Kraken2 Krona Plot", - "url": "https://data.microbiomedata.org/data/nmdc:mga0b6cy30/ReadbasedAnalysis/nmdc_mga0b6cy30_kraken2_krona.html", - "md5_checksum": "e350fda9bd0651755171d79b413b8da3", - "id": "nmdc:e350fda9bd0651755171d79b413b8da3", - "file_size_bytes": 3959152 - }, { "name": "Gp0127632_Assembled contigs fasta", "description": "Assembled contigs fasta for Gp0127632", @@ -12579,39 +10739,7 @@ "collecting_biosamples_from_site_set": [], "date_created": null, "etl_software_version": null, - "pooling_set": [], - "read_based_analysis_activity_set": [ - { - "_id": { - "$oid": "61e7195d833bcf838a700521" - }, - "has_input": [ - "nmdc:a43bfb55389206c2fc5ddb53e6aa2bc6" - ], - "part_of": [ - "nmdc:mga0b6cy30" - ], - "git_url": "https://github.com/microbiomedata/metaG/releases/tag/0.1", - "has_output": [ - "nmdc:3e583cccbbc068e0879ba6618bb6407c", - "nmdc:6c54105711e818c4d8169ab595b05efe", - "nmdc:adb155cdb656648496484998a62fb96f", - "nmdc:0a03ac5737750a3b336e7299e9f01ead", - "nmdc:f345b3a57c37097a860e38d5e83835b8", - "nmdc:c1f4471d943b284720a8becb5a2e32b4", - "nmdc:50cfcfc5d0d89245b8370abf6bfef23c", - "nmdc:a8dd7aa20043510158ad3b2bbe961b42", - "nmdc:e350fda9bd0651755171d79b413b8da3" - ], - "was_informed_by": "gold:Gp0127632", - "id": "nmdc:2d5f97b7f6c0f385d6185e05ac989b1e", - "execution_resource": "NERSC-Cori", - "name": "ReadBased Analysis Activity for nmdc:mga0b6cy30", - "started_at_time": "2021-10-11T02:23:42Z", - "type": "nmdc:ReadbasedAnalysis", - "ended_at_time": "2021-10-11T04:08:32+00:00" - } - ] + "pooling_set": [] }, { "functional_annotation_agg": [], @@ -12621,6 +10749,13 @@ "activity_set": [], "biosample_set": [], "data_object_set": [ + { + "description": "Raw sequencer read data", + "file_size_bytes": 2759159406, + "type": "nmdc:DataObject", + "id": "jgi:574fe09f7ded5e3df1ee1489", + "name": "10533.3.165334.ACAGCAA-GTTGCTG.fastq.gz" + }, { "name": "Gp0127636_Filtered Reads", "description": "Filtered Reads for Gp0127636", @@ -12718,85 +10853,6 @@ "id": "nmdc:f1543441c59aaaf8ec52036a5bbbe3f4", "file_size_bytes": 4020978 }, - { - "name": "Gp0127636_Gottcha2 TSV report", - "description": "Gottcha2 TSV report for Gp0127636", - "url": "https://data.microbiomedata.org/data/nmdc:mga02tph39/ReadbasedAnalysis/nmdc_mga02tph39_gottcha2_report.tsv", - "md5_checksum": "50d80a30d4ff113e36f6fd64b1f28547", - "id": "nmdc:50d80a30d4ff113e36f6fd64b1f28547", - "file_size_bytes": 5547 - }, - { - "name": "Gp0127636_Gottcha2 full TSV report", - "description": "Gottcha2 full TSV report for Gp0127636", - "url": "https://data.microbiomedata.org/data/nmdc:mga02tph39/ReadbasedAnalysis/nmdc_mga02tph39_gottcha2_report_full.tsv", - "md5_checksum": "c2cd20a2011592a76397f49dc3acd6b7", - "id": "nmdc:c2cd20a2011592a76397f49dc3acd6b7", - "file_size_bytes": 965042 - }, - { - "name": "Gp0127636_Gottcha2 Krona HTML report", - "description": "Gottcha2 Krona HTML report for Gp0127636", - "data_object_type": "GOTTCHA2 Krona Plot", - "url": "https://data.microbiomedata.org/data/nmdc:mga02tph39/ReadbasedAnalysis/nmdc_mga02tph39_gottcha2_krona.html", - "md5_checksum": "827ad863c875ea14473c9903d192fa73", - "id": "nmdc:827ad863c875ea14473c9903d192fa73", - "file_size_bytes": 242495 - }, - { - "name": "Gp0127636_Centrifuge classification TSV report", - "description": "Centrifuge classification TSV report for Gp0127636", - "data_object_type": "Centrifuge Taxonomic Classification", - "url": "https://data.microbiomedata.org/data/nmdc:mga02tph39/ReadbasedAnalysis/nmdc_mga02tph39_centrifuge_classification.tsv", - "md5_checksum": "957074ca49765b22348e27b0133d8ba0", - "id": "nmdc:957074ca49765b22348e27b0133d8ba0", - "file_size_bytes": 2151939041 - }, - { - "name": "Gp0127636_Centrifuge TSV report", - "description": "Centrifuge TSV report for Gp0127636", - "data_object_type": "Centrifuge Classification Report", - "url": "https://data.microbiomedata.org/data/nmdc:mga02tph39/ReadbasedAnalysis/nmdc_mga02tph39_centrifuge_report.tsv", - "md5_checksum": "9253645582296696cb33b11754832574", - "id": "nmdc:9253645582296696cb33b11754832574", - "file_size_bytes": 257932 - }, - { - "name": "Gp0127636_Centrifuge Krona HTML report", - "description": "Centrifuge Krona HTML report for Gp0127636", - "data_object_type": "Centrifuge Krona Plot", - "url": "https://data.microbiomedata.org/data/nmdc:mga02tph39/ReadbasedAnalysis/nmdc_mga02tph39_centrifuge_krona.html", - "md5_checksum": "9aef1d9e04acfe0b7fb1b9dc3b842912", - "id": "nmdc:9aef1d9e04acfe0b7fb1b9dc3b842912", - "file_size_bytes": 2335219 - }, - { - "name": "Gp0127636_Kraken classification TSV report", - "description": "Kraken classification TSV report for Gp0127636", - "data_object_type": "Kraken2 Taxonomic Classification", - "url": "https://data.microbiomedata.org/data/nmdc:mga02tph39/ReadbasedAnalysis/nmdc_mga02tph39_kraken2_classification.tsv", - "md5_checksum": "75180fce38f38a6307231b47a8d2b23b", - "id": "nmdc:75180fce38f38a6307231b47a8d2b23b", - "file_size_bytes": 1746049273 - }, - { - "name": "Gp0127636_Kraken2 TSV report", - "description": "Kraken2 TSV report for Gp0127636", - "data_object_type": "Kraken2 Classification Report", - "url": "https://data.microbiomedata.org/data/nmdc:mga02tph39/ReadbasedAnalysis/nmdc_mga02tph39_kraken2_report.tsv", - "md5_checksum": "b4524a34937893768dbd3752068dee0c", - "id": "nmdc:b4524a34937893768dbd3752068dee0c", - "file_size_bytes": 660975 - }, - { - "name": "Gp0127636_Kraken2 Krona HTML report", - "description": "Kraken2 Krona HTML report for Gp0127636", - "data_object_type": "Kraken2 Krona Plot", - "url": "https://data.microbiomedata.org/data/nmdc:mga02tph39/ReadbasedAnalysis/nmdc_mga02tph39_kraken2_krona.html", - "md5_checksum": "f1543441c59aaaf8ec52036a5bbbe3f4", - "id": "nmdc:f1543441c59aaaf8ec52036a5bbbe3f4", - "file_size_bytes": 4020978 - }, { "name": "Gp0127636_Assembled contigs fasta", "description": "Assembled contigs fasta for Gp0127636", @@ -13184,144 +11240,40 @@ "collecting_biosamples_from_site_set": [], "date_created": null, "etl_software_version": null, - "pooling_set": [], - "read_based_analysis_activity_set": [ - { - "_id": { - "$oid": "61e71959833bcf838a70040a" - }, - "has_input": [ - "nmdc:e4f5675c728fd1896682eb669656b5d6" - ], - "part_of": [ - "nmdc:mga02tph39" - ], - "git_url": "https://github.com/microbiomedata/metaG/releases/tag/0.1", - "has_output": [ - "nmdc:50d80a30d4ff113e36f6fd64b1f28547", - "nmdc:c2cd20a2011592a76397f49dc3acd6b7", - "nmdc:827ad863c875ea14473c9903d192fa73", - "nmdc:957074ca49765b22348e27b0133d8ba0", - "nmdc:9253645582296696cb33b11754832574", - "nmdc:9aef1d9e04acfe0b7fb1b9dc3b842912", - "nmdc:75180fce38f38a6307231b47a8d2b23b", - "nmdc:b4524a34937893768dbd3752068dee0c", - "nmdc:f1543441c59aaaf8ec52036a5bbbe3f4" - ], - "was_informed_by": "gold:Gp0127636", - "id": "nmdc:3a0bb3c0ec1ec60e0487cb98f7f19a90", - "execution_resource": "NERSC-Cori", - "name": "ReadBased Analysis Activity for nmdc:mga02tph39", - "started_at_time": "2021-10-11T02:23:42Z", - "type": "nmdc:ReadbasedAnalysis", - "ended_at_time": "2021-11-13T18:49:37+00:00" - } - ] + "pooling_set": [] }, { "functional_annotation_agg": [], "library_preparation_set": [], "processed_sample_set": [], - "extraction_set": [], - "activity_set": [], - "biosample_set": [], - "data_object_set": [ - { - "name": "Gp0127634_Filtered Reads", - "description": "Filtered Reads for Gp0127634", - "data_object_type": "Filtered Sequencing Reads", - "url": "https://data.microbiomedata.org/data/nmdc:mga0r0vf18/qa/nmdc_mga0r0vf18_filtered.fastq.gz", - "md5_checksum": "ac889627d813c8e34cfbf79a4264c590", - "id": "nmdc:ac889627d813c8e34cfbf79a4264c590", - "file_size_bytes": 2316462404 - }, - { - "name": "Gp0127634_Filtered Stats", - "description": "Filtered Stats for Gp0127634", - "data_object_type": "QC Statistics", - "url": "https://data.microbiomedata.org/data/nmdc:mga0r0vf18/qa/nmdc_mga0r0vf18_filterStats.txt", - "md5_checksum": "0dfd55be1779ae7922d80aa22034c9a1", - "id": "nmdc:0dfd55be1779ae7922d80aa22034c9a1", - "file_size_bytes": 291 - }, - { - "name": "Gp0127634_Gottcha2 TSV report", - "description": "Gottcha2 TSV report for Gp0127634", - "url": "https://data.microbiomedata.org/data/nmdc:mga0r0vf18/ReadbasedAnalysis/nmdc_mga0r0vf18_gottcha2_report.tsv", - "md5_checksum": "0526ea84f6e7893f5b6d62a32f81a199", - "id": "nmdc:0526ea84f6e7893f5b6d62a32f81a199", - "file_size_bytes": 4224 - }, - { - "name": "Gp0127634_Gottcha2 full TSV report", - "description": "Gottcha2 full TSV report for Gp0127634", - "url": "https://data.microbiomedata.org/data/nmdc:mga0r0vf18/ReadbasedAnalysis/nmdc_mga0r0vf18_gottcha2_report_full.tsv", - "md5_checksum": "1a7380f5adb59f36c98c840bf28ad4bd", - "id": "nmdc:1a7380f5adb59f36c98c840bf28ad4bd", - "file_size_bytes": 875501 - }, - { - "name": "Gp0127634_Gottcha2 Krona HTML report", - "description": "Gottcha2 Krona HTML report for Gp0127634", - "data_object_type": "GOTTCHA2 Krona Plot", - "url": "https://data.microbiomedata.org/data/nmdc:mga0r0vf18/ReadbasedAnalysis/nmdc_mga0r0vf18_gottcha2_krona.html", - "md5_checksum": "366ab38bb6de9591f31a086d42ac23d6", - "id": "nmdc:366ab38bb6de9591f31a086d42ac23d6", - "file_size_bytes": 238755 - }, - { - "name": "Gp0127634_Centrifuge classification TSV report", - "description": "Centrifuge classification TSV report for Gp0127634", - "data_object_type": "Centrifuge Taxonomic Classification", - "url": "https://data.microbiomedata.org/data/nmdc:mga0r0vf18/ReadbasedAnalysis/nmdc_mga0r0vf18_centrifuge_classification.tsv", - "md5_checksum": "c44ba44bc6910c2f3ed3a60a52b4a616", - "id": "nmdc:c44ba44bc6910c2f3ed3a60a52b4a616", - "file_size_bytes": 2051793471 - }, - { - "name": "Gp0127634_Centrifuge TSV report", - "description": "Centrifuge TSV report for Gp0127634", - "data_object_type": "Centrifuge Classification Report", - "url": "https://data.microbiomedata.org/data/nmdc:mga0r0vf18/ReadbasedAnalysis/nmdc_mga0r0vf18_centrifuge_report.tsv", - "md5_checksum": "0ca043b630ba304cb80603e8332c78cf", - "id": "nmdc:0ca043b630ba304cb80603e8332c78cf", - "file_size_bytes": 256560 - }, - { - "name": "Gp0127634_Centrifuge Krona HTML report", - "description": "Centrifuge Krona HTML report for Gp0127634", - "data_object_type": "Centrifuge Krona Plot", - "url": "https://data.microbiomedata.org/data/nmdc:mga0r0vf18/ReadbasedAnalysis/nmdc_mga0r0vf18_centrifuge_krona.html", - "md5_checksum": "059ff39ced52c0df45a331c4e9e10fdd", - "id": "nmdc:059ff39ced52c0df45a331c4e9e10fdd", - "file_size_bytes": 2334325 - }, + "extraction_set": [], + "activity_set": [], + "biosample_set": [], + "data_object_set": [ { - "name": "Gp0127634_Kraken classification TSV report", - "description": "Kraken classification TSV report for Gp0127634", - "data_object_type": "Kraken2 Taxonomic Classification", - "url": "https://data.microbiomedata.org/data/nmdc:mga0r0vf18/ReadbasedAnalysis/nmdc_mga0r0vf18_kraken2_classification.tsv", - "md5_checksum": "7bfa3b5b29ec5cf9882251585d99f9bf", - "id": "nmdc:7bfa3b5b29ec5cf9882251585d99f9bf", - "file_size_bytes": 1649071235 + "description": "Raw sequencer read data", + "file_size_bytes": 2620687542, + "type": "nmdc:DataObject", + "id": "jgi:574fe09c7ded5e3df1ee1487", + "name": "10533.3.165334.ACGGAAC-TGTTCCG.fastq.gz" }, { - "name": "Gp0127634_Kraken2 TSV report", - "description": "Kraken2 TSV report for Gp0127634", - "data_object_type": "Kraken2 Classification Report", - "url": "https://data.microbiomedata.org/data/nmdc:mga0r0vf18/ReadbasedAnalysis/nmdc_mga0r0vf18_kraken2_report.tsv", - "md5_checksum": "2fceef0aaf1c3c3e3edd8d69bb72c8d3", - "id": "nmdc:2fceef0aaf1c3c3e3edd8d69bb72c8d3", - "file_size_bytes": 654782 + "name": "Gp0127634_Filtered Reads", + "description": "Filtered Reads for Gp0127634", + "data_object_type": "Filtered Sequencing Reads", + "url": "https://data.microbiomedata.org/data/nmdc:mga0r0vf18/qa/nmdc_mga0r0vf18_filtered.fastq.gz", + "md5_checksum": "ac889627d813c8e34cfbf79a4264c590", + "id": "nmdc:ac889627d813c8e34cfbf79a4264c590", + "file_size_bytes": 2316462404 }, { - "name": "Gp0127634_Kraken2 Krona HTML report", - "description": "Kraken2 Krona HTML report for Gp0127634", - "data_object_type": "Kraken2 Krona Plot", - "url": "https://data.microbiomedata.org/data/nmdc:mga0r0vf18/ReadbasedAnalysis/nmdc_mga0r0vf18_kraken2_krona.html", - "md5_checksum": "678e7c401a6971629f7d3ada83b307ab", - "id": "nmdc:678e7c401a6971629f7d3ada83b307ab", - "file_size_bytes": 3988988 + "name": "Gp0127634_Filtered Stats", + "description": "Filtered Stats for Gp0127634", + "data_object_type": "QC Statistics", + "url": "https://data.microbiomedata.org/data/nmdc:mga0r0vf18/qa/nmdc_mga0r0vf18_filterStats.txt", + "md5_checksum": "0dfd55be1779ae7922d80aa22034c9a1", + "id": "nmdc:0dfd55be1779ae7922d80aa22034c9a1", + "file_size_bytes": 291 }, { "name": "Gp0127634_Gottcha2 TSV report", @@ -13866,39 +11818,7 @@ "collecting_biosamples_from_site_set": [], "date_created": null, "etl_software_version": null, - "pooling_set": [], - "read_based_analysis_activity_set": [ - { - "_id": { - "$oid": "61e71979833bcf838a700840" - }, - "has_input": [ - "nmdc:ac889627d813c8e34cfbf79a4264c590" - ], - "part_of": [ - "nmdc:mga0r0vf18" - ], - "git_url": "https://github.com/microbiomedata/metaG/releases/tag/0.1", - "has_output": [ - "nmdc:0526ea84f6e7893f5b6d62a32f81a199", - "nmdc:1a7380f5adb59f36c98c840bf28ad4bd", - "nmdc:366ab38bb6de9591f31a086d42ac23d6", - "nmdc:c44ba44bc6910c2f3ed3a60a52b4a616", - "nmdc:0ca043b630ba304cb80603e8332c78cf", - "nmdc:059ff39ced52c0df45a331c4e9e10fdd", - "nmdc:7bfa3b5b29ec5cf9882251585d99f9bf", - "nmdc:2fceef0aaf1c3c3e3edd8d69bb72c8d3", - "nmdc:678e7c401a6971629f7d3ada83b307ab" - ], - "was_informed_by": "gold:Gp0127634", - "id": "nmdc:d6415e0c3e4f9b8702c1ae98c6fb2f48", - "execution_resource": "NERSC-Cori", - "name": "ReadBased Analysis Activity for nmdc:mga0r0vf18", - "started_at_time": "2021-10-11T02:23:30Z", - "type": "nmdc:ReadbasedAnalysis", - "ended_at_time": "2021-10-11T04:49:55+00:00" - } - ] + "pooling_set": [] }, { "functional_annotation_agg": [], @@ -13908,6 +11828,13 @@ "activity_set": [], "biosample_set": [], "data_object_set": [ + { + "description": "Raw sequencer read data", + "file_size_bytes": 2197847748, + "type": "nmdc:DataObject", + "id": "jgi:574fde607ded5e3df1ee1403", + "name": "10533.1.165310.GTTCGGT-AACCGAA.fastq.gz" + }, { "name": "Gp0127635_Filtered Reads", "description": "Filtered Reads for Gp0127635", @@ -14005,85 +11932,6 @@ "id": "nmdc:6748020214a3d68ad588e3548107208e", "file_size_bytes": 3996293 }, - { - "name": "Gp0127635_Gottcha2 TSV report", - "description": "Gottcha2 TSV report for Gp0127635", - "url": "https://data.microbiomedata.org/data/nmdc:mga0ak4p20/ReadbasedAnalysis/nmdc_mga0ak4p20_gottcha2_report.tsv", - "md5_checksum": "d8a410c52c8f6cf0097b674492cc3926", - "id": "nmdc:d8a410c52c8f6cf0097b674492cc3926", - "file_size_bytes": 3696 - }, - { - "name": "Gp0127635_Gottcha2 full TSV report", - "description": "Gottcha2 full TSV report for Gp0127635", - "url": "https://data.microbiomedata.org/data/nmdc:mga0ak4p20/ReadbasedAnalysis/nmdc_mga0ak4p20_gottcha2_report_full.tsv", - "md5_checksum": "ddec46781153da60da815c65871f5413", - "id": "nmdc:ddec46781153da60da815c65871f5413", - "file_size_bytes": 677459 - }, - { - "name": "Gp0127635_Gottcha2 Krona HTML report", - "description": "Gottcha2 Krona HTML report for Gp0127635", - "data_object_type": "GOTTCHA2 Krona Plot", - "url": "https://data.microbiomedata.org/data/nmdc:mga0ak4p20/ReadbasedAnalysis/nmdc_mga0ak4p20_gottcha2_krona.html", - "md5_checksum": "e626ec18dba4885613240927cbb99d8b", - "id": "nmdc:e626ec18dba4885613240927cbb99d8b", - "file_size_bytes": 236164 - }, - { - "name": "Gp0127635_Centrifuge classification TSV report", - "description": "Centrifuge classification TSV report for Gp0127635", - "data_object_type": "Centrifuge Taxonomic Classification", - "url": "https://data.microbiomedata.org/data/nmdc:mga0ak4p20/ReadbasedAnalysis/nmdc_mga0ak4p20_centrifuge_classification.tsv", - "md5_checksum": "f8486e4ee029038a452a3484db10cabc", - "id": "nmdc:f8486e4ee029038a452a3484db10cabc", - "file_size_bytes": 1796179546 - }, - { - "name": "Gp0127635_Centrifuge TSV report", - "description": "Centrifuge TSV report for Gp0127635", - "data_object_type": "Centrifuge Classification Report", - "url": "https://data.microbiomedata.org/data/nmdc:mga0ak4p20/ReadbasedAnalysis/nmdc_mga0ak4p20_centrifuge_report.tsv", - "md5_checksum": "4121f2ec52b80b7feb9d9a4749080125", - "id": "nmdc:4121f2ec52b80b7feb9d9a4749080125", - "file_size_bytes": 254661 - }, - { - "name": "Gp0127635_Centrifuge Krona HTML report", - "description": "Centrifuge Krona HTML report for Gp0127635", - "data_object_type": "Centrifuge Krona Plot", - "url": "https://data.microbiomedata.org/data/nmdc:mga0ak4p20/ReadbasedAnalysis/nmdc_mga0ak4p20_centrifuge_krona.html", - "md5_checksum": "5b8c1cd8ba47041c20d3e18cb902a854", - "id": "nmdc:5b8c1cd8ba47041c20d3e18cb902a854", - "file_size_bytes": 2333534 - }, - { - "name": "Gp0127635_Kraken classification TSV report", - "description": "Kraken classification TSV report for Gp0127635", - "data_object_type": "Kraken2 Taxonomic Classification", - "url": "https://data.microbiomedata.org/data/nmdc:mga0ak4p20/ReadbasedAnalysis/nmdc_mga0ak4p20_kraken2_classification.tsv", - "md5_checksum": "59807dae5216b11c96df5593a26d9a88", - "id": "nmdc:59807dae5216b11c96df5593a26d9a88", - "file_size_bytes": 1432249556 - }, - { - "name": "Gp0127635_Kraken2 TSV report", - "description": "Kraken2 TSV report for Gp0127635", - "data_object_type": "Kraken2 Classification Report", - "url": "https://data.microbiomedata.org/data/nmdc:mga0ak4p20/ReadbasedAnalysis/nmdc_mga0ak4p20_kraken2_report.tsv", - "md5_checksum": "a491f6797bd7294dbc5ba301efb3466e", - "id": "nmdc:a491f6797bd7294dbc5ba301efb3466e", - "file_size_bytes": 639738 - }, - { - "name": "Gp0127635_Kraken2 Krona HTML report", - "description": "Kraken2 Krona HTML report for Gp0127635", - "data_object_type": "Kraken2 Krona Plot", - "url": "https://data.microbiomedata.org/data/nmdc:mga0ak4p20/ReadbasedAnalysis/nmdc_mga0ak4p20_kraken2_krona.html", - "md5_checksum": "6748020214a3d68ad588e3548107208e", - "id": "nmdc:6748020214a3d68ad588e3548107208e", - "file_size_bytes": 3996293 - }, { "name": "Gp0127635_Assembled contigs fasta", "description": "Assembled contigs fasta for Gp0127635", @@ -14586,144 +12434,40 @@ "collecting_biosamples_from_site_set": [], "date_created": null, "etl_software_version": null, - "pooling_set": [], - "read_based_analysis_activity_set": [ - { - "_id": { - "$oid": "61e71936833bcf838a6ffdfc" - }, - "has_input": [ - "nmdc:f8bc16e232f7ba0f6d6b5ca35a708c36" - ], - "part_of": [ - "nmdc:mga0ak4p20" - ], - "git_url": "https://github.com/microbiomedata/metaG/releases/tag/0.1", - "has_output": [ - "nmdc:d8a410c52c8f6cf0097b674492cc3926", - "nmdc:ddec46781153da60da815c65871f5413", - "nmdc:e626ec18dba4885613240927cbb99d8b", - "nmdc:f8486e4ee029038a452a3484db10cabc", - "nmdc:4121f2ec52b80b7feb9d9a4749080125", - "nmdc:5b8c1cd8ba47041c20d3e18cb902a854", - "nmdc:59807dae5216b11c96df5593a26d9a88", - "nmdc:a491f6797bd7294dbc5ba301efb3466e", - "nmdc:6748020214a3d68ad588e3548107208e" - ], - "was_informed_by": "gold:Gp0127635", - "id": "nmdc:80cd0785782060a937ca17d1b417b0b6", - "execution_resource": "NERSC-Cori", - "name": "ReadBased Analysis Activity for nmdc:mga0ak4p20", - "started_at_time": "2021-10-11T02:26:59Z", - "type": "nmdc:ReadbasedAnalysis", - "ended_at_time": "2021-10-11T04:11:48+00:00" - } - ] + "pooling_set": [] }, - { - "functional_annotation_agg": [], - "library_preparation_set": [], - "processed_sample_set": [], - "extraction_set": [], - "activity_set": [], - "biosample_set": [], - "data_object_set": [ - { - "name": "Gp0127637_Filtered Reads", - "description": "Filtered Reads for Gp0127637", - "data_object_type": "Filtered Sequencing Reads", - "url": "https://data.microbiomedata.org/data/nmdc:mga0sb9b30/qa/nmdc_mga0sb9b30_filtered.fastq.gz", - "md5_checksum": "805310f4b1e39a0cc9e5b5787576cb8b", - "id": "nmdc:805310f4b1e39a0cc9e5b5787576cb8b", - "file_size_bytes": 1553219358 - }, - { - "name": "Gp0127637_Filtered Stats", - "description": "Filtered Stats for Gp0127637", - "data_object_type": "QC Statistics", - "url": "https://data.microbiomedata.org/data/nmdc:mga0sb9b30/qa/nmdc_mga0sb9b30_filterStats.txt", - "md5_checksum": "611e67df261e050860b1075c6a6a5ff5", - "id": "nmdc:611e67df261e050860b1075c6a6a5ff5", - "file_size_bytes": 289 - }, - { - "name": "Gp0127637_Gottcha2 TSV report", - "description": "Gottcha2 TSV report for Gp0127637", - "url": "https://data.microbiomedata.org/data/nmdc:mga0sb9b30/ReadbasedAnalysis/nmdc_mga0sb9b30_gottcha2_report.tsv", - "md5_checksum": "9268e073dacb7f7cd5f9513393cb0b2a", - "id": "nmdc:9268e073dacb7f7cd5f9513393cb0b2a", - "file_size_bytes": 660 - }, - { - "name": "Gp0127637_Gottcha2 full TSV report", - "description": "Gottcha2 full TSV report for Gp0127637", - "url": "https://data.microbiomedata.org/data/nmdc:mga0sb9b30/ReadbasedAnalysis/nmdc_mga0sb9b30_gottcha2_report_full.tsv", - "md5_checksum": "37dd1d73ad47979ee5284830d27df535", - "id": "nmdc:37dd1d73ad47979ee5284830d27df535", - "file_size_bytes": 594054 - }, - { - "name": "Gp0127637_Gottcha2 Krona HTML report", - "description": "Gottcha2 Krona HTML report for Gp0127637", - "data_object_type": "GOTTCHA2 Krona Plot", - "url": "https://data.microbiomedata.org/data/nmdc:mga0sb9b30/ReadbasedAnalysis/nmdc_mga0sb9b30_gottcha2_krona.html", - "md5_checksum": "43bffbfb830c6e3ccc140ec0dff1e773", - "id": "nmdc:43bffbfb830c6e3ccc140ec0dff1e773", - "file_size_bytes": 227750 - }, - { - "name": "Gp0127637_Centrifuge classification TSV report", - "description": "Centrifuge classification TSV report for Gp0127637", - "data_object_type": "Centrifuge Taxonomic Classification", - "url": "https://data.microbiomedata.org/data/nmdc:mga0sb9b30/ReadbasedAnalysis/nmdc_mga0sb9b30_centrifuge_classification.tsv", - "md5_checksum": "cb3bd5ca5088484cb4e580ad91d736b2", - "id": "nmdc:cb3bd5ca5088484cb4e580ad91d736b2", - "file_size_bytes": 1457058272 - }, - { - "name": "Gp0127637_Centrifuge TSV report", - "description": "Centrifuge TSV report for Gp0127637", - "data_object_type": "Centrifuge Classification Report", - "url": "https://data.microbiomedata.org/data/nmdc:mga0sb9b30/ReadbasedAnalysis/nmdc_mga0sb9b30_centrifuge_report.tsv", - "md5_checksum": "f44a5d59785cdededea0fe4a6a429c30", - "id": "nmdc:f44a5d59785cdededea0fe4a6a429c30", - "file_size_bytes": 251867 - }, - { - "name": "Gp0127637_Centrifuge Krona HTML report", - "description": "Centrifuge Krona HTML report for Gp0127637", - "data_object_type": "Centrifuge Krona Plot", - "url": "https://data.microbiomedata.org/data/nmdc:mga0sb9b30/ReadbasedAnalysis/nmdc_mga0sb9b30_centrifuge_krona.html", - "md5_checksum": "81a6efbd082e07bc2db174a88d64a272", - "id": "nmdc:81a6efbd082e07bc2db174a88d64a272", - "file_size_bytes": 2325282 - }, + { + "functional_annotation_agg": [], + "library_preparation_set": [], + "processed_sample_set": [], + "extraction_set": [], + "activity_set": [], + "biosample_set": [], + "data_object_set": [ { - "name": "Gp0127637_Kraken classification TSV report", - "description": "Kraken classification TSV report for Gp0127637", - "data_object_type": "Kraken2 Taxonomic Classification", - "url": "https://data.microbiomedata.org/data/nmdc:mga0sb9b30/ReadbasedAnalysis/nmdc_mga0sb9b30_kraken2_classification.tsv", - "md5_checksum": "f63856a84bc9afb8954ccdb1803d5fde", - "id": "nmdc:f63856a84bc9afb8954ccdb1803d5fde", - "file_size_bytes": 1160106364 + "description": "Raw sequencer read data", + "file_size_bytes": 1954789686, + "type": "nmdc:DataObject", + "id": "jgi:574fde647ded5e3df1ee1406", + "name": "10533.1.165310.CGTAGGT-AACCTAC.fastq.gz" }, { - "name": "Gp0127637_Kraken2 TSV report", - "description": "Kraken2 TSV report for Gp0127637", - "data_object_type": "Kraken2 Classification Report", - "url": "https://data.microbiomedata.org/data/nmdc:mga0sb9b30/ReadbasedAnalysis/nmdc_mga0sb9b30_kraken2_report.tsv", - "md5_checksum": "9a1826f66ee45187d627076d11dc491f", - "id": "nmdc:9a1826f66ee45187d627076d11dc491f", - "file_size_bytes": 613810 + "name": "Gp0127637_Filtered Reads", + "description": "Filtered Reads for Gp0127637", + "data_object_type": "Filtered Sequencing Reads", + "url": "https://data.microbiomedata.org/data/nmdc:mga0sb9b30/qa/nmdc_mga0sb9b30_filtered.fastq.gz", + "md5_checksum": "805310f4b1e39a0cc9e5b5787576cb8b", + "id": "nmdc:805310f4b1e39a0cc9e5b5787576cb8b", + "file_size_bytes": 1553219358 }, { - "name": "Gp0127637_Kraken2 Krona HTML report", - "description": "Kraken2 Krona HTML report for Gp0127637", - "data_object_type": "Kraken2 Krona Plot", - "url": "https://data.microbiomedata.org/data/nmdc:mga0sb9b30/ReadbasedAnalysis/nmdc_mga0sb9b30_kraken2_krona.html", - "md5_checksum": "67adb9cc2c75251f556a90b1a959ea72", - "id": "nmdc:67adb9cc2c75251f556a90b1a959ea72", - "file_size_bytes": 3853908 + "name": "Gp0127637_Filtered Stats", + "description": "Filtered Stats for Gp0127637", + "data_object_type": "QC Statistics", + "url": "https://data.microbiomedata.org/data/nmdc:mga0sb9b30/qa/nmdc_mga0sb9b30_filterStats.txt", + "md5_checksum": "611e67df261e050860b1075c6a6a5ff5", + "id": "nmdc:611e67df261e050860b1075c6a6a5ff5", + "file_size_bytes": 289 }, { "name": "Gp0127637_Gottcha2 TSV report", @@ -15287,39 +13031,7 @@ "collecting_biosamples_from_site_set": [], "date_created": null, "etl_software_version": null, - "pooling_set": [], - "read_based_analysis_activity_set": [ - { - "_id": { - "$oid": "61e7191f833bcf838a6ffa50" - }, - "has_input": [ - "nmdc:805310f4b1e39a0cc9e5b5787576cb8b" - ], - "part_of": [ - "nmdc:mga0sb9b30" - ], - "git_url": "https://github.com/microbiomedata/metaG/releases/tag/0.1", - "has_output": [ - "nmdc:9268e073dacb7f7cd5f9513393cb0b2a", - "nmdc:37dd1d73ad47979ee5284830d27df535", - "nmdc:43bffbfb830c6e3ccc140ec0dff1e773", - "nmdc:cb3bd5ca5088484cb4e580ad91d736b2", - "nmdc:f44a5d59785cdededea0fe4a6a429c30", - "nmdc:81a6efbd082e07bc2db174a88d64a272", - "nmdc:f63856a84bc9afb8954ccdb1803d5fde", - "nmdc:9a1826f66ee45187d627076d11dc491f", - "nmdc:67adb9cc2c75251f556a90b1a959ea72" - ], - "was_informed_by": "gold:Gp0127637", - "id": "nmdc:c6d11b07b7b33c0d906ce0d4a58a7ccf", - "execution_resource": "NERSC-Cori", - "name": "ReadBased Analysis Activity for nmdc:mga0sb9b30", - "started_at_time": "2021-10-11T02:24:01Z", - "type": "nmdc:ReadbasedAnalysis", - "ended_at_time": "2021-10-11T03:11:56+00:00" - } - ] + "pooling_set": [] }, { "functional_annotation_agg": [], @@ -15329,6 +13041,13 @@ "activity_set": [], "biosample_set": [], "data_object_set": [ + { + "description": "Raw sequencer read data", + "file_size_bytes": 1920284821, + "type": "nmdc:DataObject", + "id": "jgi:574fde837ded5e3df1ee141d", + "name": "10533.2.165322.TCATCAC-GGTGATG.fastq.gz" + }, { "name": "Gp0127638_Filtered Reads", "description": "Filtered Reads for Gp0127638", @@ -15426,85 +13145,6 @@ "id": "nmdc:70c2fc1a2c7c0032528ff91ad1576465", "file_size_bytes": 3896830 }, - { - "name": "Gp0127638_Gottcha2 TSV report", - "description": "Gottcha2 TSV report for Gp0127638", - "url": "https://data.microbiomedata.org/data/nmdc:mga0hjgc20/ReadbasedAnalysis/nmdc_mga0hjgc20_gottcha2_report.tsv", - "md5_checksum": "dbbd6ca6777b71d1fac4aae2cd947deb", - "id": "nmdc:dbbd6ca6777b71d1fac4aae2cd947deb", - "file_size_bytes": 2025 - }, - { - "name": "Gp0127638_Gottcha2 full TSV report", - "description": "Gottcha2 full TSV report for Gp0127638", - "url": "https://data.microbiomedata.org/data/nmdc:mga0hjgc20/ReadbasedAnalysis/nmdc_mga0hjgc20_gottcha2_report_full.tsv", - "md5_checksum": "b6de56746a284f8226dd86817c8ae04e", - "id": "nmdc:b6de56746a284f8226dd86817c8ae04e", - "file_size_bytes": 655633 - }, - { - "name": "Gp0127638_Gottcha2 Krona HTML report", - "description": "Gottcha2 Krona HTML report for Gp0127638", - "data_object_type": "GOTTCHA2 Krona Plot", - "url": "https://data.microbiomedata.org/data/nmdc:mga0hjgc20/ReadbasedAnalysis/nmdc_mga0hjgc20_gottcha2_krona.html", - "md5_checksum": "d9572e708af9f0a06e98cfddfb298359", - "id": "nmdc:d9572e708af9f0a06e98cfddfb298359", - "file_size_bytes": 232133 - }, - { - "name": "Gp0127638_Centrifuge classification TSV report", - "description": "Centrifuge classification TSV report for Gp0127638", - "data_object_type": "Centrifuge Taxonomic Classification", - "url": "https://data.microbiomedata.org/data/nmdc:mga0hjgc20/ReadbasedAnalysis/nmdc_mga0hjgc20_centrifuge_classification.tsv", - "md5_checksum": "e9946f36795474182b7759d3d7532b57", - "id": "nmdc:e9946f36795474182b7759d3d7532b57", - "file_size_bytes": 1448205544 - }, - { - "name": "Gp0127638_Centrifuge TSV report", - "description": "Centrifuge TSV report for Gp0127638", - "data_object_type": "Centrifuge Classification Report", - "url": "https://data.microbiomedata.org/data/nmdc:mga0hjgc20/ReadbasedAnalysis/nmdc_mga0hjgc20_centrifuge_report.tsv", - "md5_checksum": "33ff1d85d17d763afc9e21e481cc10d2", - "id": "nmdc:33ff1d85d17d763afc9e21e481cc10d2", - "file_size_bytes": 253872 - }, - { - "name": "Gp0127638_Centrifuge Krona HTML report", - "description": "Centrifuge Krona HTML report for Gp0127638", - "data_object_type": "Centrifuge Krona Plot", - "url": "https://data.microbiomedata.org/data/nmdc:mga0hjgc20/ReadbasedAnalysis/nmdc_mga0hjgc20_centrifuge_krona.html", - "md5_checksum": "997a66f49a232750bd7132639f3387e7", - "id": "nmdc:997a66f49a232750bd7132639f3387e7", - "file_size_bytes": 2331772 - }, - { - "name": "Gp0127638_Kraken classification TSV report", - "description": "Kraken classification TSV report for Gp0127638", - "data_object_type": "Kraken2 Taxonomic Classification", - "url": "https://data.microbiomedata.org/data/nmdc:mga0hjgc20/ReadbasedAnalysis/nmdc_mga0hjgc20_kraken2_classification.tsv", - "md5_checksum": "d3f604a59babf001839d38a617b62931", - "id": "nmdc:d3f604a59babf001839d38a617b62931", - "file_size_bytes": 1157365410 - }, - { - "name": "Gp0127638_Kraken2 TSV report", - "description": "Kraken2 TSV report for Gp0127638", - "data_object_type": "Kraken2 Classification Report", - "url": "https://data.microbiomedata.org/data/nmdc:mga0hjgc20/ReadbasedAnalysis/nmdc_mga0hjgc20_kraken2_report.tsv", - "md5_checksum": "3abfaa434ee1449cbbb69985e48488b4", - "id": "nmdc:3abfaa434ee1449cbbb69985e48488b4", - "file_size_bytes": 621484 - }, - { - "name": "Gp0127638_Kraken2 Krona HTML report", - "description": "Kraken2 Krona HTML report for Gp0127638", - "data_object_type": "Kraken2 Krona Plot", - "url": "https://data.microbiomedata.org/data/nmdc:mga0hjgc20/ReadbasedAnalysis/nmdc_mga0hjgc20_kraken2_krona.html", - "md5_checksum": "70c2fc1a2c7c0032528ff91ad1576465", - "id": "nmdc:70c2fc1a2c7c0032528ff91ad1576465", - "file_size_bytes": 3896830 - }, { "name": "Gp0127638_Assembled contigs fasta", "description": "Assembled contigs fasta for Gp0127638", @@ -15913,144 +13553,40 @@ "collecting_biosamples_from_site_set": [], "date_created": null, "etl_software_version": null, - "pooling_set": [], - "read_based_analysis_activity_set": [ - { - "_id": { - "$oid": "61e719b5833bcf838a7010e1" - }, - "has_input": [ - "nmdc:56ba2416c050decd6c16c618c1e4a752" - ], - "part_of": [ - "nmdc:mga0hjgc20" - ], - "git_url": "https://github.com/microbiomedata/metaG/releases/tag/0.1", - "has_output": [ - "nmdc:dbbd6ca6777b71d1fac4aae2cd947deb", - "nmdc:b6de56746a284f8226dd86817c8ae04e", - "nmdc:d9572e708af9f0a06e98cfddfb298359", - "nmdc:e9946f36795474182b7759d3d7532b57", - "nmdc:33ff1d85d17d763afc9e21e481cc10d2", - "nmdc:997a66f49a232750bd7132639f3387e7", - "nmdc:d3f604a59babf001839d38a617b62931", - "nmdc:3abfaa434ee1449cbbb69985e48488b4", - "nmdc:70c2fc1a2c7c0032528ff91ad1576465" - ], - "was_informed_by": "gold:Gp0127638", - "id": "nmdc:668844f5ea6cefdcb893db0bb6afc92a", - "execution_resource": "NERSC-Cori", - "name": "ReadBased Analysis Activity for nmdc:mga0hjgc20", - "started_at_time": "2021-12-01T21:31:29Z", - "type": "nmdc:ReadbasedAnalysis", - "ended_at_time": "2021-12-02T20:49:51+00:00" - } - ] + "pooling_set": [] }, { "functional_annotation_agg": [], "library_preparation_set": [], "processed_sample_set": [], - "extraction_set": [], - "activity_set": [], - "biosample_set": [], - "data_object_set": [ - { - "name": "Gp0115670_Filtered Reads", - "description": "Filtered Reads for Gp0115670", - "data_object_type": "Filtered Sequencing Reads", - "url": "https://data.microbiomedata.org/data/nmdc:mga0d7pj22/qa/nmdc_mga0d7pj22_filtered.fastq.gz", - "md5_checksum": "7f6b353300583c60d2d668880b4134cd", - "id": "nmdc:7f6b353300583c60d2d668880b4134cd", - "file_size_bytes": 3012174785 - }, - { - "name": "Gp0115670_Filtered Stats", - "description": "Filtered Stats for Gp0115670", - "data_object_type": "QC Statistics", - "url": "https://data.microbiomedata.org/data/nmdc:mga0d7pj22/qa/nmdc_mga0d7pj22_filterStats.txt", - "md5_checksum": "a4f65d101293fa4345cd865f86597464", - "id": "nmdc:a4f65d101293fa4345cd865f86597464", - "file_size_bytes": 291 - }, - { - "name": "Gp0115670_Gottcha2 TSV report", - "description": "Gottcha2 TSV report for Gp0115670", - "url": "https://data.microbiomedata.org/data/nmdc:mga0d7pj22/ReadbasedAnalysis/nmdc_mga0d7pj22_gottcha2_report.tsv", - "md5_checksum": "e316502f9e7a78c9db3996ef832aa9d7", - "id": "nmdc:e316502f9e7a78c9db3996ef832aa9d7", - "file_size_bytes": 13758 - }, - { - "name": "Gp0115670_Gottcha2 full TSV report", - "description": "Gottcha2 full TSV report for Gp0115670", - "url": "https://data.microbiomedata.org/data/nmdc:mga0d7pj22/ReadbasedAnalysis/nmdc_mga0d7pj22_gottcha2_report_full.tsv", - "md5_checksum": "1ac2be77491e7d425da1d62f69f1508d", - "id": "nmdc:1ac2be77491e7d425da1d62f69f1508d", - "file_size_bytes": 1116084 - }, - { - "name": "Gp0115670_Gottcha2 Krona HTML report", - "description": "Gottcha2 Krona HTML report for Gp0115670", - "data_object_type": "GOTTCHA2 Krona Plot", - "url": "https://data.microbiomedata.org/data/nmdc:mga0d7pj22/ReadbasedAnalysis/nmdc_mga0d7pj22_gottcha2_krona.html", - "md5_checksum": "de5b15fa9d3bdbc3abcc2475ee351323", - "id": "nmdc:de5b15fa9d3bdbc3abcc2475ee351323", - "file_size_bytes": 268542 - }, - { - "name": "Gp0115670_Centrifuge classification TSV report", - "description": "Centrifuge classification TSV report for Gp0115670", - "data_object_type": "Centrifuge Taxonomic Classification", - "url": "https://data.microbiomedata.org/data/nmdc:mga0d7pj22/ReadbasedAnalysis/nmdc_mga0d7pj22_centrifuge_classification.tsv", - "md5_checksum": "a9bbb74833404a2bf3bbd05e83a7a0ed", - "id": "nmdc:a9bbb74833404a2bf3bbd05e83a7a0ed", - "file_size_bytes": 2458475116 - }, - { - "name": "Gp0115670_Centrifuge TSV report", - "description": "Centrifuge TSV report for Gp0115670", - "data_object_type": "Centrifuge Classification Report", - "url": "https://data.microbiomedata.org/data/nmdc:mga0d7pj22/ReadbasedAnalysis/nmdc_mga0d7pj22_centrifuge_report.tsv", - "md5_checksum": "c065784bed2b2495d512af93d05967de", - "id": "nmdc:c065784bed2b2495d512af93d05967de", - "file_size_bytes": 261692 - }, - { - "name": "Gp0115670_Centrifuge Krona HTML report", - "description": "Centrifuge Krona HTML report for Gp0115670", - "data_object_type": "Centrifuge Krona Plot", - "url": "https://data.microbiomedata.org/data/nmdc:mga0d7pj22/ReadbasedAnalysis/nmdc_mga0d7pj22_centrifuge_krona.html", - "md5_checksum": "a34dbcbdebae0861e41c09e7b9a5d9f0", - "id": "nmdc:a34dbcbdebae0861e41c09e7b9a5d9f0", - "file_size_bytes": 2343355 - }, + "extraction_set": [], + "activity_set": [], + "biosample_set": [], + "data_object_set": [ { - "name": "Gp0115670_Kraken classification TSV report", - "description": "Kraken classification TSV report for Gp0115670", - "data_object_type": "Kraken2 Taxonomic Classification", - "url": "https://data.microbiomedata.org/data/nmdc:mga0d7pj22/ReadbasedAnalysis/nmdc_mga0d7pj22_kraken2_classification.tsv", - "md5_checksum": "b2122f5a910a1d4ae8a62956d1cd731c", - "id": "nmdc:b2122f5a910a1d4ae8a62956d1cd731c", - "file_size_bytes": 2019980511 + "description": "Raw sequencer read data", + "file_size_bytes": 3408915289, + "type": "nmdc:DataObject", + "id": "jgi:55d7402a0d8785342fcf7e3b", + "name": "9422.8.132674.CGTACG.fastq.gz" }, { - "name": "Gp0115670_Kraken2 TSV report", - "description": "Kraken2 TSV report for Gp0115670", - "data_object_type": "Kraken2 Classification Report", - "url": "https://data.microbiomedata.org/data/nmdc:mga0d7pj22/ReadbasedAnalysis/nmdc_mga0d7pj22_kraken2_report.tsv", - "md5_checksum": "8a26d8496a70f4777be0e1237092e44c", - "id": "nmdc:8a26d8496a70f4777be0e1237092e44c", - "file_size_bytes": 694029 + "name": "Gp0115670_Filtered Reads", + "description": "Filtered Reads for Gp0115670", + "data_object_type": "Filtered Sequencing Reads", + "url": "https://data.microbiomedata.org/data/nmdc:mga0d7pj22/qa/nmdc_mga0d7pj22_filtered.fastq.gz", + "md5_checksum": "7f6b353300583c60d2d668880b4134cd", + "id": "nmdc:7f6b353300583c60d2d668880b4134cd", + "file_size_bytes": 3012174785 }, { - "name": "Gp0115670_Kraken2 Krona HTML report", - "description": "Kraken2 Krona HTML report for Gp0115670", - "data_object_type": "Kraken2 Krona Plot", - "url": "https://data.microbiomedata.org/data/nmdc:mga0d7pj22/ReadbasedAnalysis/nmdc_mga0d7pj22_kraken2_krona.html", - "md5_checksum": "694b83f0b6f599948d4248dd48dd9ba9", - "id": "nmdc:694b83f0b6f599948d4248dd48dd9ba9", - "file_size_bytes": 4190653 + "name": "Gp0115670_Filtered Stats", + "description": "Filtered Stats for Gp0115670", + "data_object_type": "QC Statistics", + "url": "https://data.microbiomedata.org/data/nmdc:mga0d7pj22/qa/nmdc_mga0d7pj22_filterStats.txt", + "md5_checksum": "a4f65d101293fa4345cd865f86597464", + "id": "nmdc:a4f65d101293fa4345cd865f86597464", + "file_size_bytes": 291 }, { "name": "Gp0115670_Gottcha2 TSV report", @@ -16674,39 +14210,7 @@ "collecting_biosamples_from_site_set": [], "date_created": null, "etl_software_version": null, - "pooling_set": [], - "read_based_analysis_activity_set": [ - { - "_id": { - "$oid": "61e71a12833bcf838a701ba9" - }, - "has_input": [ - "nmdc:7f6b353300583c60d2d668880b4134cd" - ], - "part_of": [ - "nmdc:mga0d7pj22" - ], - "git_url": "https://github.com/microbiomedata/metaG/releases/tag/0.1", - "has_output": [ - "nmdc:e316502f9e7a78c9db3996ef832aa9d7", - "nmdc:1ac2be77491e7d425da1d62f69f1508d", - "nmdc:de5b15fa9d3bdbc3abcc2475ee351323", - "nmdc:a9bbb74833404a2bf3bbd05e83a7a0ed", - "nmdc:c065784bed2b2495d512af93d05967de", - "nmdc:a34dbcbdebae0861e41c09e7b9a5d9f0", - "nmdc:b2122f5a910a1d4ae8a62956d1cd731c", - "nmdc:8a26d8496a70f4777be0e1237092e44c", - "nmdc:694b83f0b6f599948d4248dd48dd9ba9" - ], - "was_informed_by": "gold:Gp0115670", - "id": "nmdc:f1cbe3cc181b2e689272b4223b68c15f", - "execution_resource": "NERSC-Cori", - "name": "ReadBased Analysis Activity for nmdc:mga0d7pj22", - "started_at_time": "2021-10-11T02:28:43Z", - "type": "nmdc:ReadbasedAnalysis", - "ended_at_time": "2021-10-11T05:55:52+00:00" - } - ] + "pooling_set": [] }, { "functional_annotation_agg": [], @@ -16716,6 +14220,13 @@ "activity_set": [], "biosample_set": [], "data_object_set": [ + { + "description": "Raw sequencer read data", + "file_size_bytes": 2350177247, + "type": "nmdc:DataObject", + "id": "jgi:55d7402c0d8785342fcf7e3e", + "name": "9422.8.132674.GGTAGC.fastq.gz" + }, { "name": "Gp0115674_Filtered Reads", "description": "Filtered Reads for Gp0115674", @@ -16813,85 +14324,6 @@ "id": "nmdc:19eb52a96c1dedc9036ec9a0aaeda079", "file_size_bytes": 4070548 }, - { - "name": "Gp0115674_Gottcha2 TSV report", - "description": "Gottcha2 TSV report for Gp0115674", - "url": "https://data.microbiomedata.org/data/nmdc:mga0cf0450/ReadbasedAnalysis/nmdc_mga0cf0450_gottcha2_report.tsv", - "md5_checksum": "7d6ec08ff0d080997fda7c7417f9c3d4", - "id": "nmdc:7d6ec08ff0d080997fda7c7417f9c3d4", - "file_size_bytes": 13768 - }, - { - "name": "Gp0115674_Gottcha2 full TSV report", - "description": "Gottcha2 full TSV report for Gp0115674", - "url": "https://data.microbiomedata.org/data/nmdc:mga0cf0450/ReadbasedAnalysis/nmdc_mga0cf0450_gottcha2_report_full.tsv", - "md5_checksum": "df0dfd58dc386f5e0ded0b65b4a88c58", - "id": "nmdc:df0dfd58dc386f5e0ded0b65b4a88c58", - "file_size_bytes": 1022858 - }, - { - "name": "Gp0115674_Gottcha2 Krona HTML report", - "description": "Gottcha2 Krona HTML report for Gp0115674", - "data_object_type": "GOTTCHA2 Krona Plot", - "url": "https://data.microbiomedata.org/data/nmdc:mga0cf0450/ReadbasedAnalysis/nmdc_mga0cf0450_gottcha2_krona.html", - "md5_checksum": "ce3f31985e0a99f97bd4751bc2469bcb", - "id": "nmdc:ce3f31985e0a99f97bd4751bc2469bcb", - "file_size_bytes": 269166 - }, - { - "name": "Gp0115674_Centrifuge classification TSV report", - "description": "Centrifuge classification TSV report for Gp0115674", - "data_object_type": "Centrifuge Taxonomic Classification", - "url": "https://data.microbiomedata.org/data/nmdc:mga0cf0450/ReadbasedAnalysis/nmdc_mga0cf0450_centrifuge_classification.tsv", - "md5_checksum": "f8740b1fadbc29aef50d32706c955199", - "id": "nmdc:f8740b1fadbc29aef50d32706c955199", - "file_size_bytes": 1904303690 - }, - { - "name": "Gp0115674_Centrifuge TSV report", - "description": "Centrifuge TSV report for Gp0115674", - "data_object_type": "Centrifuge Classification Report", - "url": "https://data.microbiomedata.org/data/nmdc:mga0cf0450/ReadbasedAnalysis/nmdc_mga0cf0450_centrifuge_report.tsv", - "md5_checksum": "80abfcc9b09476af4083b2af1760834f", - "id": "nmdc:80abfcc9b09476af4083b2af1760834f", - "file_size_bytes": 258748 - }, - { - "name": "Gp0115674_Centrifuge Krona HTML report", - "description": "Centrifuge Krona HTML report for Gp0115674", - "data_object_type": "Centrifuge Krona Plot", - "url": "https://data.microbiomedata.org/data/nmdc:mga0cf0450/ReadbasedAnalysis/nmdc_mga0cf0450_centrifuge_krona.html", - "md5_checksum": "f189624af50d8d62908f8ddd5f3451ad", - "id": "nmdc:f189624af50d8d62908f8ddd5f3451ad", - "file_size_bytes": 2335000 - }, - { - "name": "Gp0115674_Kraken classification TSV report", - "description": "Kraken classification TSV report for Gp0115674", - "data_object_type": "Kraken2 Taxonomic Classification", - "url": "https://data.microbiomedata.org/data/nmdc:mga0cf0450/ReadbasedAnalysis/nmdc_mga0cf0450_kraken2_classification.tsv", - "md5_checksum": "09302fbc8e30758a95fac09ee5cfd449", - "id": "nmdc:09302fbc8e30758a95fac09ee5cfd449", - "file_size_bytes": 1574286150 - }, - { - "name": "Gp0115674_Kraken2 TSV report", - "description": "Kraken2 TSV report for Gp0115674", - "data_object_type": "Kraken2 Classification Report", - "url": "https://data.microbiomedata.org/data/nmdc:mga0cf0450/ReadbasedAnalysis/nmdc_mga0cf0450_kraken2_report.tsv", - "md5_checksum": "e44f717fc6f3458c17b4f5129a5e7920", - "id": "nmdc:e44f717fc6f3458c17b4f5129a5e7920", - "file_size_bytes": 671800 - }, - { - "name": "Gp0115674_Kraken2 Krona HTML report", - "description": "Kraken2 Krona HTML report for Gp0115674", - "data_object_type": "Kraken2 Krona Plot", - "url": "https://data.microbiomedata.org/data/nmdc:mga0cf0450/ReadbasedAnalysis/nmdc_mga0cf0450_kraken2_krona.html", - "md5_checksum": "19eb52a96c1dedc9036ec9a0aaeda079", - "id": "nmdc:19eb52a96c1dedc9036ec9a0aaeda079", - "file_size_bytes": 4070548 - }, { "name": "Gp0115674_Assembled contigs fasta", "description": "Assembled contigs fasta for Gp0115674", @@ -17397,144 +14829,40 @@ "collecting_biosamples_from_site_set": [], "date_created": null, "etl_software_version": null, - "pooling_set": [], - "read_based_analysis_activity_set": [ - { - "_id": { - "$oid": "61e71a31833bcf838a701e57" - }, - "has_input": [ - "nmdc:538fd5695eb3decd48891e72acebb8ce" - ], - "part_of": [ - "nmdc:mga0cf0450" - ], - "git_url": "https://github.com/microbiomedata/metaG/releases/tag/0.1", - "has_output": [ - "nmdc:7d6ec08ff0d080997fda7c7417f9c3d4", - "nmdc:df0dfd58dc386f5e0ded0b65b4a88c58", - "nmdc:ce3f31985e0a99f97bd4751bc2469bcb", - "nmdc:f8740b1fadbc29aef50d32706c955199", - "nmdc:80abfcc9b09476af4083b2af1760834f", - "nmdc:f189624af50d8d62908f8ddd5f3451ad", - "nmdc:09302fbc8e30758a95fac09ee5cfd449", - "nmdc:e44f717fc6f3458c17b4f5129a5e7920", - "nmdc:19eb52a96c1dedc9036ec9a0aaeda079" - ], - "was_informed_by": "gold:Gp0115674", - "id": "nmdc:954288cfef2de46d4b895fac3811a7d0", - "execution_resource": "NERSC-Cori", - "name": "ReadBased Analysis Activity for nmdc:mga0cf0450", - "started_at_time": "2021-10-11T02:28:52Z", - "type": "nmdc:ReadbasedAnalysis", - "ended_at_time": "2021-10-11T05:21:41+00:00" - } - ] + "pooling_set": [] }, - { - "functional_annotation_agg": [], - "library_preparation_set": [], - "processed_sample_set": [], - "extraction_set": [], - "activity_set": [], - "biosample_set": [], - "data_object_set": [ - { - "name": "Gp0115673_Filtered Reads", - "description": "Filtered Reads for Gp0115673", - "data_object_type": "Filtered Sequencing Reads", - "url": "https://data.microbiomedata.org/data/nmdc:mga0kpja70/qa/nmdc_mga0kpja70_filtered.fastq.gz", - "md5_checksum": "268918f610926421d2af43f175553680", - "id": "nmdc:268918f610926421d2af43f175553680", - "file_size_bytes": 1492820163 - }, - { - "name": "Gp0115673_Filtered Stats", - "description": "Filtered Stats for Gp0115673", - "data_object_type": "QC Statistics", - "url": "https://data.microbiomedata.org/data/nmdc:mga0kpja70/qa/nmdc_mga0kpja70_filterStats.txt", - "md5_checksum": "4610980cf3558f5a9830797ead97362a", - "id": "nmdc:4610980cf3558f5a9830797ead97362a", - "file_size_bytes": 287 - }, - { - "name": "Gp0115673_Gottcha2 TSV report", - "description": "Gottcha2 TSV report for Gp0115673", - "url": "https://data.microbiomedata.org/data/nmdc:mga0kpja70/ReadbasedAnalysis/nmdc_mga0kpja70_gottcha2_report.tsv", - "md5_checksum": "c7b24571b61a33018cf118b5424b787f", - "id": "nmdc:c7b24571b61a33018cf118b5424b787f", - "file_size_bytes": 9782 - }, - { - "name": "Gp0115673_Gottcha2 full TSV report", - "description": "Gottcha2 full TSV report for Gp0115673", - "url": "https://data.microbiomedata.org/data/nmdc:mga0kpja70/ReadbasedAnalysis/nmdc_mga0kpja70_gottcha2_report_full.tsv", - "md5_checksum": "e185734176505343bf4c83c16a0a9fe2", - "id": "nmdc:e185734176505343bf4c83c16a0a9fe2", - "file_size_bytes": 856112 - }, - { - "name": "Gp0115673_Gottcha2 Krona HTML report", - "description": "Gottcha2 Krona HTML report for Gp0115673", - "data_object_type": "GOTTCHA2 Krona Plot", - "url": "https://data.microbiomedata.org/data/nmdc:mga0kpja70/ReadbasedAnalysis/nmdc_mga0kpja70_gottcha2_krona.html", - "md5_checksum": "7c6b0ef44450c747580826a2e218844b", - "id": "nmdc:7c6b0ef44450c747580826a2e218844b", - "file_size_bytes": 255142 - }, - { - "name": "Gp0115673_Centrifuge classification TSV report", - "description": "Centrifuge classification TSV report for Gp0115673", - "data_object_type": "Centrifuge Taxonomic Classification", - "url": "https://data.microbiomedata.org/data/nmdc:mga0kpja70/ReadbasedAnalysis/nmdc_mga0kpja70_centrifuge_classification.tsv", - "md5_checksum": "5b98c377f424d7609f1a09e350cfb837", - "id": "nmdc:5b98c377f424d7609f1a09e350cfb837", - "file_size_bytes": 1218364738 - }, - { - "name": "Gp0115673_Centrifuge TSV report", - "description": "Centrifuge TSV report for Gp0115673", - "data_object_type": "Centrifuge Classification Report", - "url": "https://data.microbiomedata.org/data/nmdc:mga0kpja70/ReadbasedAnalysis/nmdc_mga0kpja70_centrifuge_report.tsv", - "md5_checksum": "b5f7a68a94b356001014d1be024231af", - "id": "nmdc:b5f7a68a94b356001014d1be024231af", - "file_size_bytes": 254923 - }, - { - "name": "Gp0115673_Centrifuge Krona HTML report", - "description": "Centrifuge Krona HTML report for Gp0115673", - "data_object_type": "Centrifuge Krona Plot", - "url": "https://data.microbiomedata.org/data/nmdc:mga0kpja70/ReadbasedAnalysis/nmdc_mga0kpja70_centrifuge_krona.html", - "md5_checksum": "75bca66cfcdd38331c10edbba03fa0d3", - "id": "nmdc:75bca66cfcdd38331c10edbba03fa0d3", - "file_size_bytes": 2323219 - }, + { + "functional_annotation_agg": [], + "library_preparation_set": [], + "processed_sample_set": [], + "extraction_set": [], + "activity_set": [], + "biosample_set": [], + "data_object_set": [ { - "name": "Gp0115673_Kraken classification TSV report", - "description": "Kraken classification TSV report for Gp0115673", - "data_object_type": "Kraken2 Taxonomic Classification", - "url": "https://data.microbiomedata.org/data/nmdc:mga0kpja70/ReadbasedAnalysis/nmdc_mga0kpja70_kraken2_classification.tsv", - "md5_checksum": "35bf579641b2ffb3614098d9811a4968", - "id": "nmdc:35bf579641b2ffb3614098d9811a4968", - "file_size_bytes": 1001134031 + "description": "Raw sequencer read data", + "file_size_bytes": 1698585233, + "type": "nmdc:DataObject", + "id": "jgi:55d817f70d8785342fcf8270", + "name": "9387.2.132031.CTTGTA.fastq.gz" }, { - "name": "Gp0115673_Kraken2 TSV report", - "description": "Kraken2 TSV report for Gp0115673", - "data_object_type": "Kraken2 Classification Report", - "url": "https://data.microbiomedata.org/data/nmdc:mga0kpja70/ReadbasedAnalysis/nmdc_mga0kpja70_kraken2_report.tsv", - "md5_checksum": "801b79f5442e5bfaa0d15f76786cfbc0", - "id": "nmdc:801b79f5442e5bfaa0d15f76786cfbc0", - "file_size_bytes": 640671 + "name": "Gp0115673_Filtered Reads", + "description": "Filtered Reads for Gp0115673", + "data_object_type": "Filtered Sequencing Reads", + "url": "https://data.microbiomedata.org/data/nmdc:mga0kpja70/qa/nmdc_mga0kpja70_filtered.fastq.gz", + "md5_checksum": "268918f610926421d2af43f175553680", + "id": "nmdc:268918f610926421d2af43f175553680", + "file_size_bytes": 1492820163 }, { - "name": "Gp0115673_Kraken2 Krona HTML report", - "description": "Kraken2 Krona HTML report for Gp0115673", - "data_object_type": "Kraken2 Krona Plot", - "url": "https://data.microbiomedata.org/data/nmdc:mga0kpja70/ReadbasedAnalysis/nmdc_mga0kpja70_kraken2_krona.html", - "md5_checksum": "a7030fa8e9622e3396c2b96448e90c3b", - "id": "nmdc:a7030fa8e9622e3396c2b96448e90c3b", - "file_size_bytes": 3995499 + "name": "Gp0115673_Filtered Stats", + "description": "Filtered Stats for Gp0115673", + "data_object_type": "QC Statistics", + "url": "https://data.microbiomedata.org/data/nmdc:mga0kpja70/qa/nmdc_mga0kpja70_filterStats.txt", + "md5_checksum": "4610980cf3558f5a9830797ead97362a", + "id": "nmdc:4610980cf3558f5a9830797ead97362a", + "file_size_bytes": 287 }, { "name": "Gp0115673_Gottcha2 TSV report", @@ -18079,39 +15407,7 @@ "collecting_biosamples_from_site_set": [], "date_created": null, "etl_software_version": null, - "pooling_set": [], - "read_based_analysis_activity_set": [ - { - "_id": { - "$oid": "61e719b7833bcf838a7011dc" - }, - "has_input": [ - "nmdc:268918f610926421d2af43f175553680" - ], - "part_of": [ - "nmdc:mga0kpja70" - ], - "git_url": "https://github.com/microbiomedata/metaG/releases/tag/0.1", - "has_output": [ - "nmdc:c7b24571b61a33018cf118b5424b787f", - "nmdc:e185734176505343bf4c83c16a0a9fe2", - "nmdc:7c6b0ef44450c747580826a2e218844b", - "nmdc:5b98c377f424d7609f1a09e350cfb837", - "nmdc:b5f7a68a94b356001014d1be024231af", - "nmdc:75bca66cfcdd38331c10edbba03fa0d3", - "nmdc:35bf579641b2ffb3614098d9811a4968", - "nmdc:801b79f5442e5bfaa0d15f76786cfbc0", - "nmdc:a7030fa8e9622e3396c2b96448e90c3b" - ], - "was_informed_by": "gold:Gp0115673", - "id": "nmdc:7ae51c3485db8a27225f08083565b28e", - "execution_resource": "NERSC-Cori", - "name": "ReadBased Analysis Activity for nmdc:mga0kpja70", - "started_at_time": "2021-10-11T02:28:36Z", - "type": "nmdc:ReadbasedAnalysis", - "ended_at_time": "2021-10-11T03:32:43+00:00" - } - ] + "pooling_set": [] }, { "functional_annotation_agg": [], @@ -18121,6 +15417,13 @@ "activity_set": [], "biosample_set": [], "data_object_set": [ + { + "description": "Raw sequencer read data", + "file_size_bytes": 2065080622, + "type": "nmdc:DataObject", + "id": "jgi:55d817fa0d8785342fcf8272", + "name": "9387.2.132031.ATGTCA.fastq.gz" + }, { "name": "Gp0115671_Filtered Reads", "description": "Filtered Reads for Gp0115671", @@ -18218,85 +15521,6 @@ "id": "nmdc:e111cd4927f6736e5de6f6e81e7e6d72", "file_size_bytes": 4010701 }, - { - "name": "Gp0115671_Gottcha2 TSV report", - "description": "Gottcha2 TSV report for Gp0115671", - "url": "https://data.microbiomedata.org/data/nmdc:mga0rw1351/ReadbasedAnalysis/nmdc_mga0rw1351_gottcha2_report.tsv", - "md5_checksum": "358559c32b69eff51758db66ac01021b", - "id": "nmdc:358559c32b69eff51758db66ac01021b", - "file_size_bytes": 11833 - }, - { - "name": "Gp0115671_Gottcha2 full TSV report", - "description": "Gottcha2 full TSV report for Gp0115671", - "url": "https://data.microbiomedata.org/data/nmdc:mga0rw1351/ReadbasedAnalysis/nmdc_mga0rw1351_gottcha2_report_full.tsv", - "md5_checksum": "befbd648249c2871bd27999120e50bf7", - "id": "nmdc:befbd648249c2871bd27999120e50bf7", - "file_size_bytes": 888177 - }, - { - "name": "Gp0115671_Gottcha2 Krona HTML report", - "description": "Gottcha2 Krona HTML report for Gp0115671", - "data_object_type": "GOTTCHA2 Krona Plot", - "url": "https://data.microbiomedata.org/data/nmdc:mga0rw1351/ReadbasedAnalysis/nmdc_mga0rw1351_gottcha2_krona.html", - "md5_checksum": "cacb8f623a808d0cae094d46f2801dd3", - "id": "nmdc:cacb8f623a808d0cae094d46f2801dd3", - "file_size_bytes": 261703 - }, - { - "name": "Gp0115671_Centrifuge classification TSV report", - "description": "Centrifuge classification TSV report for Gp0115671", - "data_object_type": "Centrifuge Taxonomic Classification", - "url": "https://data.microbiomedata.org/data/nmdc:mga0rw1351/ReadbasedAnalysis/nmdc_mga0rw1351_centrifuge_classification.tsv", - "md5_checksum": "1b15ffb745e320a9bf0cac7e672e974b", - "id": "nmdc:1b15ffb745e320a9bf0cac7e672e974b", - "file_size_bytes": 1474970402 - }, - { - "name": "Gp0115671_Centrifuge TSV report", - "description": "Centrifuge TSV report for Gp0115671", - "data_object_type": "Centrifuge Classification Report", - "url": "https://data.microbiomedata.org/data/nmdc:mga0rw1351/ReadbasedAnalysis/nmdc_mga0rw1351_centrifuge_report.tsv", - "md5_checksum": "90b77c7118bf6ec1f99836a50d562a7f", - "id": "nmdc:90b77c7118bf6ec1f99836a50d562a7f", - "file_size_bytes": 255777 - }, - { - "name": "Gp0115671_Centrifuge Krona HTML report", - "description": "Centrifuge Krona HTML report for Gp0115671", - "data_object_type": "Centrifuge Krona Plot", - "url": "https://data.microbiomedata.org/data/nmdc:mga0rw1351/ReadbasedAnalysis/nmdc_mga0rw1351_centrifuge_krona.html", - "md5_checksum": "e0736ff520260ba2097c02b9e767362c", - "id": "nmdc:e0736ff520260ba2097c02b9e767362c", - "file_size_bytes": 2329875 - }, - { - "name": "Gp0115671_Kraken classification TSV report", - "description": "Kraken classification TSV report for Gp0115671", - "data_object_type": "Kraken2 Taxonomic Classification", - "url": "https://data.microbiomedata.org/data/nmdc:mga0rw1351/ReadbasedAnalysis/nmdc_mga0rw1351_kraken2_classification.tsv", - "md5_checksum": "a00960655f9e80726fdb0fade1bec958", - "id": "nmdc:a00960655f9e80726fdb0fade1bec958", - "file_size_bytes": 1213240496 - }, - { - "name": "Gp0115671_Kraken2 TSV report", - "description": "Kraken2 TSV report for Gp0115671", - "data_object_type": "Kraken2 Classification Report", - "url": "https://data.microbiomedata.org/data/nmdc:mga0rw1351/ReadbasedAnalysis/nmdc_mga0rw1351_kraken2_report.tsv", - "md5_checksum": "366bf195f71d2c35a9b47c0f29381e85", - "id": "nmdc:366bf195f71d2c35a9b47c0f29381e85", - "file_size_bytes": 659715 - }, - { - "name": "Gp0115671_Kraken2 Krona HTML report", - "description": "Kraken2 Krona HTML report for Gp0115671", - "data_object_type": "Kraken2 Krona Plot", - "url": "https://data.microbiomedata.org/data/nmdc:mga0rw1351/ReadbasedAnalysis/nmdc_mga0rw1351_kraken2_krona.html", - "md5_checksum": "e111cd4927f6736e5de6f6e81e7e6d72", - "id": "nmdc:e111cd4927f6736e5de6f6e81e7e6d72", - "file_size_bytes": 4010701 - }, { "name": "Gp0115671_Assembled contigs fasta", "description": "Assembled contigs fasta for Gp0115671", @@ -18761,144 +15985,40 @@ "collecting_biosamples_from_site_set": [], "date_created": null, "etl_software_version": null, - "pooling_set": [], - "read_based_analysis_activity_set": [ - { - "_id": { - "$oid": "61e71a4c833bcf838a702155" - }, - "has_input": [ - "nmdc:445f37bc3019e9fe3b29a2ac5bcbfc9c" - ], - "part_of": [ - "nmdc:mga0rw1351" - ], - "git_url": "https://github.com/microbiomedata/metaG/releases/tag/0.1", - "has_output": [ - "nmdc:358559c32b69eff51758db66ac01021b", - "nmdc:befbd648249c2871bd27999120e50bf7", - "nmdc:cacb8f623a808d0cae094d46f2801dd3", - "nmdc:1b15ffb745e320a9bf0cac7e672e974b", - "nmdc:90b77c7118bf6ec1f99836a50d562a7f", - "nmdc:e0736ff520260ba2097c02b9e767362c", - "nmdc:a00960655f9e80726fdb0fade1bec958", - "nmdc:366bf195f71d2c35a9b47c0f29381e85", - "nmdc:e111cd4927f6736e5de6f6e81e7e6d72" - ], - "was_informed_by": "gold:Gp0115671", - "id": "nmdc:c61259a6fe99bc2482a8619c099e6cc2", - "execution_resource": "NERSC-Cori", - "name": "ReadBased Analysis Activity for nmdc:mga0rw1351", - "started_at_time": "2021-10-11T02:27:50Z", - "type": "nmdc:ReadbasedAnalysis", - "ended_at_time": "2021-10-11T03:39:05+00:00" - } - ] + "pooling_set": [] }, { "functional_annotation_agg": [], "library_preparation_set": [], "processed_sample_set": [], - "extraction_set": [], - "activity_set": [], - "biosample_set": [], - "data_object_set": [ - { - "name": "Gp0115676_Filtered Reads", - "description": "Filtered Reads for Gp0115676", - "data_object_type": "Filtered Sequencing Reads", - "url": "https://data.microbiomedata.org/data/nmdc:mga0w3a067/qa/nmdc_mga0w3a067_filtered.fastq.gz", - "md5_checksum": "e777bc518da4bbe0ab7b2959f00e2b08", - "id": "nmdc:e777bc518da4bbe0ab7b2959f00e2b08", - "file_size_bytes": 3113249122 - }, - { - "name": "Gp0115676_Filtered Stats", - "description": "Filtered Stats for Gp0115676", - "data_object_type": "QC Statistics", - "url": "https://data.microbiomedata.org/data/nmdc:mga0w3a067/qa/nmdc_mga0w3a067_filterStats.txt", - "md5_checksum": "79815495339053b7935b55dbde02b2ff", - "id": "nmdc:79815495339053b7935b55dbde02b2ff", - "file_size_bytes": 292 - }, - { - "name": "Gp0115676_Gottcha2 TSV report", - "description": "Gottcha2 TSV report for Gp0115676", - "url": "https://data.microbiomedata.org/data/nmdc:mga0w3a067/ReadbasedAnalysis/nmdc_mga0w3a067_gottcha2_report.tsv", - "md5_checksum": "13343b2533892633bcc3655a1ebe788f", - "id": "nmdc:13343b2533892633bcc3655a1ebe788f", - "file_size_bytes": 13659 - }, - { - "name": "Gp0115676_Gottcha2 full TSV report", - "description": "Gottcha2 full TSV report for Gp0115676", - "url": "https://data.microbiomedata.org/data/nmdc:mga0w3a067/ReadbasedAnalysis/nmdc_mga0w3a067_gottcha2_report_full.tsv", - "md5_checksum": "87b36326bee32ad5642e3ffc2f5ac7db", - "id": "nmdc:87b36326bee32ad5642e3ffc2f5ac7db", - "file_size_bytes": 1168924 - }, - { - "name": "Gp0115676_Gottcha2 Krona HTML report", - "description": "Gottcha2 Krona HTML report for Gp0115676", - "data_object_type": "GOTTCHA2 Krona Plot", - "url": "https://data.microbiomedata.org/data/nmdc:mga0w3a067/ReadbasedAnalysis/nmdc_mga0w3a067_gottcha2_krona.html", - "md5_checksum": "95a2de8be672fd50bf542215194dc4d4", - "id": "nmdc:95a2de8be672fd50bf542215194dc4d4", - "file_size_bytes": 267660 - }, - { - "name": "Gp0115676_Centrifuge classification TSV report", - "description": "Centrifuge classification TSV report for Gp0115676", - "data_object_type": "Centrifuge Taxonomic Classification", - "url": "https://data.microbiomedata.org/data/nmdc:mga0w3a067/ReadbasedAnalysis/nmdc_mga0w3a067_centrifuge_classification.tsv", - "md5_checksum": "6cd0210b345d6908ad8ab683b1a11572", - "id": "nmdc:6cd0210b345d6908ad8ab683b1a11572", - "file_size_bytes": 2721808152 - }, - { - "name": "Gp0115676_Centrifuge TSV report", - "description": "Centrifuge TSV report for Gp0115676", - "data_object_type": "Centrifuge Classification Report", - "url": "https://data.microbiomedata.org/data/nmdc:mga0w3a067/ReadbasedAnalysis/nmdc_mga0w3a067_centrifuge_report.tsv", - "md5_checksum": "5049a65d2a42d73c5d47373e990b70f7", - "id": "nmdc:5049a65d2a42d73c5d47373e990b70f7", - "file_size_bytes": 263207 - }, - { - "name": "Gp0115676_Centrifuge Krona HTML report", - "description": "Centrifuge Krona HTML report for Gp0115676", - "data_object_type": "Centrifuge Krona Plot", - "url": "https://data.microbiomedata.org/data/nmdc:mga0w3a067/ReadbasedAnalysis/nmdc_mga0w3a067_centrifuge_krona.html", - "md5_checksum": "6e1e28773094884d35c04072309e285a", - "id": "nmdc:6e1e28773094884d35c04072309e285a", - "file_size_bytes": 2347912 - }, + "extraction_set": [], + "activity_set": [], + "biosample_set": [], + "data_object_set": [ { - "name": "Gp0115676_Kraken classification TSV report", - "description": "Kraken classification TSV report for Gp0115676", - "data_object_type": "Kraken2 Taxonomic Classification", - "url": "https://data.microbiomedata.org/data/nmdc:mga0w3a067/ReadbasedAnalysis/nmdc_mga0w3a067_kraken2_classification.tsv", - "md5_checksum": "7fa3aba8b1e31ccc00cf56f04f5605ac", - "id": "nmdc:7fa3aba8b1e31ccc00cf56f04f5605ac", - "file_size_bytes": 2224468607 + "description": "Raw sequencer read data", + "file_size_bytes": 3492714581, + "type": "nmdc:DataObject", + "id": "jgi:55d740220d8785342fcf7e35", + "name": "9422.8.132674.GTGAAA.fastq.gz" }, { - "name": "Gp0115676_Kraken2 TSV report", - "description": "Kraken2 TSV report for Gp0115676", - "data_object_type": "Kraken2 Classification Report", - "url": "https://data.microbiomedata.org/data/nmdc:mga0w3a067/ReadbasedAnalysis/nmdc_mga0w3a067_kraken2_report.tsv", - "md5_checksum": "3b3abe337d79d09e9c7ba0a40045ad93", - "id": "nmdc:3b3abe337d79d09e9c7ba0a40045ad93", - "file_size_bytes": 701128 + "name": "Gp0115676_Filtered Reads", + "description": "Filtered Reads for Gp0115676", + "data_object_type": "Filtered Sequencing Reads", + "url": "https://data.microbiomedata.org/data/nmdc:mga0w3a067/qa/nmdc_mga0w3a067_filtered.fastq.gz", + "md5_checksum": "e777bc518da4bbe0ab7b2959f00e2b08", + "id": "nmdc:e777bc518da4bbe0ab7b2959f00e2b08", + "file_size_bytes": 3113249122 }, { - "name": "Gp0115676_Kraken2 Krona HTML report", - "description": "Kraken2 Krona HTML report for Gp0115676", - "data_object_type": "Kraken2 Krona Plot", - "url": "https://data.microbiomedata.org/data/nmdc:mga0w3a067/ReadbasedAnalysis/nmdc_mga0w3a067_kraken2_krona.html", - "md5_checksum": "e8602b20781cdbbd84e6dcb92c048a6b", - "id": "nmdc:e8602b20781cdbbd84e6dcb92c048a6b", - "file_size_bytes": 4217185 + "name": "Gp0115676_Filtered Stats", + "description": "Filtered Stats for Gp0115676", + "data_object_type": "QC Statistics", + "url": "https://data.microbiomedata.org/data/nmdc:mga0w3a067/qa/nmdc_mga0w3a067_filterStats.txt", + "md5_checksum": "79815495339053b7935b55dbde02b2ff", + "id": "nmdc:79815495339053b7935b55dbde02b2ff", + "file_size_bytes": 292 }, { "name": "Gp0115676_Gottcha2 TSV report", @@ -19522,39 +16642,7 @@ "collecting_biosamples_from_site_set": [], "date_created": null, "etl_software_version": null, - "pooling_set": [], - "read_based_analysis_activity_set": [ - { - "_id": { - "$oid": "61e71a12833bcf838a701b3a" - }, - "has_input": [ - "nmdc:e777bc518da4bbe0ab7b2959f00e2b08" - ], - "part_of": [ - "nmdc:mga0w3a067" - ], - "git_url": "https://github.com/microbiomedata/metaG/releases/tag/0.1", - "has_output": [ - "nmdc:13343b2533892633bcc3655a1ebe788f", - "nmdc:87b36326bee32ad5642e3ffc2f5ac7db", - "nmdc:95a2de8be672fd50bf542215194dc4d4", - "nmdc:6cd0210b345d6908ad8ab683b1a11572", - "nmdc:5049a65d2a42d73c5d47373e990b70f7", - "nmdc:6e1e28773094884d35c04072309e285a", - "nmdc:7fa3aba8b1e31ccc00cf56f04f5605ac", - "nmdc:3b3abe337d79d09e9c7ba0a40045ad93", - "nmdc:e8602b20781cdbbd84e6dcb92c048a6b" - ], - "was_informed_by": "gold:Gp0115676", - "id": "nmdc:b2025bb4a2c7616273d414e4093f63ca", - "execution_resource": "NERSC-Cori", - "name": "ReadBased Analysis Activity for nmdc:mga0w3a067", - "started_at_time": "2021-10-11T02:26:37Z", - "type": "nmdc:ReadbasedAnalysis", - "ended_at_time": "2021-10-11T05:40:05+00:00" - } - ] + "pooling_set": [] }, { "functional_annotation_agg": [], @@ -19564,6 +16652,13 @@ "activity_set": [], "biosample_set": [], "data_object_set": [ + { + "description": "Raw sequencer read data", + "file_size_bytes": 6700067822, + "type": "nmdc:DataObject", + "id": "jgi:55a9caff0d87852b2150891e", + "name": "9289.1.128215.TCCTGAG-TATCCTC.fastq.gz" + }, { "name": "Gp0115677_Filtered Reads", "description": "Filtered Reads for Gp0115677", @@ -19661,85 +16756,6 @@ "id": "nmdc:d35583a5ed45df5a58bf084fc67bf988", "file_size_bytes": 4250180 }, - { - "name": "Gp0115677_Gottcha2 TSV report", - "description": "Gottcha2 TSV report for Gp0115677", - "url": "https://data.microbiomedata.org/data/nmdc:mga0zb0766/ReadbasedAnalysis/nmdc_mga0zb0766_gottcha2_report.tsv", - "md5_checksum": "ba32f20b0cc5143783e00c5d1ba15223", - "id": "nmdc:ba32f20b0cc5143783e00c5d1ba15223", - "file_size_bytes": 17895 - }, - { - "name": "Gp0115677_Gottcha2 full TSV report", - "description": "Gottcha2 full TSV report for Gp0115677", - "url": "https://data.microbiomedata.org/data/nmdc:mga0zb0766/ReadbasedAnalysis/nmdc_mga0zb0766_gottcha2_report_full.tsv", - "md5_checksum": "c1730daf5e6017219fd9fc079e42c132", - "id": "nmdc:c1730daf5e6017219fd9fc079e42c132", - "file_size_bytes": 1182538 - }, - { - "name": "Gp0115677_Gottcha2 Krona HTML report", - "description": "Gottcha2 Krona HTML report for Gp0115677", - "data_object_type": "GOTTCHA2 Krona Plot", - "url": "https://data.microbiomedata.org/data/nmdc:mga0zb0766/ReadbasedAnalysis/nmdc_mga0zb0766_gottcha2_krona.html", - "md5_checksum": "55b6c047c48f5bf9fb156f139992e4d8", - "id": "nmdc:55b6c047c48f5bf9fb156f139992e4d8", - "file_size_bytes": 276802 - }, - { - "name": "Gp0115677_Centrifuge classification TSV report", - "description": "Centrifuge classification TSV report for Gp0115677", - "data_object_type": "Centrifuge Taxonomic Classification", - "url": "https://data.microbiomedata.org/data/nmdc:mga0zb0766/ReadbasedAnalysis/nmdc_mga0zb0766_centrifuge_classification.tsv", - "md5_checksum": "1c2e2dff881b35a25b4622bbc66c3140", - "id": "nmdc:1c2e2dff881b35a25b4622bbc66c3140", - "file_size_bytes": 4716470614 - }, - { - "name": "Gp0115677_Centrifuge TSV report", - "description": "Centrifuge TSV report for Gp0115677", - "data_object_type": "Centrifuge Classification Report", - "url": "https://data.microbiomedata.org/data/nmdc:mga0zb0766/ReadbasedAnalysis/nmdc_mga0zb0766_centrifuge_report.tsv", - "md5_checksum": "50f771c7bc17a0b184c2a10a24013f08", - "id": "nmdc:50f771c7bc17a0b184c2a10a24013f08", - "file_size_bytes": 267231 - }, - { - "name": "Gp0115677_Centrifuge Krona HTML report", - "description": "Centrifuge Krona HTML report for Gp0115677", - "data_object_type": "Centrifuge Krona Plot", - "url": "https://data.microbiomedata.org/data/nmdc:mga0zb0766/ReadbasedAnalysis/nmdc_mga0zb0766_centrifuge_krona.html", - "md5_checksum": "229017cdb1832bb718d22dc27db44125", - "id": "nmdc:229017cdb1832bb718d22dc27db44125", - "file_size_bytes": 2356003 - }, - { - "name": "Gp0115677_Kraken classification TSV report", - "description": "Kraken classification TSV report for Gp0115677", - "data_object_type": "Kraken2 Taxonomic Classification", - "url": "https://data.microbiomedata.org/data/nmdc:mga0zb0766/ReadbasedAnalysis/nmdc_mga0zb0766_kraken2_classification.tsv", - "md5_checksum": "49d5d11132bd5a02c3dd077d42a6a16b", - "id": "nmdc:49d5d11132bd5a02c3dd077d42a6a16b", - "file_size_bytes": 3857487871 - }, - { - "name": "Gp0115677_Kraken2 TSV report", - "description": "Kraken2 TSV report for Gp0115677", - "data_object_type": "Kraken2 Classification Report", - "url": "https://data.microbiomedata.org/data/nmdc:mga0zb0766/ReadbasedAnalysis/nmdc_mga0zb0766_kraken2_report.tsv", - "md5_checksum": "bdd701b44e67929ec8bbe279697da937", - "id": "nmdc:bdd701b44e67929ec8bbe279697da937", - "file_size_bytes": 708598 - }, - { - "name": "Gp0115677_Kraken2 Krona HTML report", - "description": "Kraken2 Krona HTML report for Gp0115677", - "data_object_type": "Kraken2 Krona Plot", - "url": "https://data.microbiomedata.org/data/nmdc:mga0zb0766/ReadbasedAnalysis/nmdc_mga0zb0766_kraken2_krona.html", - "md5_checksum": "d35583a5ed45df5a58bf084fc67bf988", - "id": "nmdc:d35583a5ed45df5a58bf084fc67bf988", - "file_size_bytes": 4250180 - }, { "name": "Gp0115677_Assembled contigs fasta", "description": "Assembled contigs fasta for Gp0115677", @@ -20302,144 +17318,40 @@ "collecting_biosamples_from_site_set": [], "date_created": null, "etl_software_version": null, - "pooling_set": [], - "read_based_analysis_activity_set": [ - { - "_id": { - "$oid": "61e719d5833bcf838a70143c" - }, - "has_input": [ - "nmdc:63c857b3011dec61a08044d518291f23" - ], - "part_of": [ - "nmdc:mga0zb0766" - ], - "git_url": "https://github.com/microbiomedata/metaG/releases/tag/0.1", - "has_output": [ - "nmdc:ba32f20b0cc5143783e00c5d1ba15223", - "nmdc:c1730daf5e6017219fd9fc079e42c132", - "nmdc:55b6c047c48f5bf9fb156f139992e4d8", - "nmdc:1c2e2dff881b35a25b4622bbc66c3140", - "nmdc:50f771c7bc17a0b184c2a10a24013f08", - "nmdc:229017cdb1832bb718d22dc27db44125", - "nmdc:49d5d11132bd5a02c3dd077d42a6a16b", - "nmdc:bdd701b44e67929ec8bbe279697da937", - "nmdc:d35583a5ed45df5a58bf084fc67bf988" - ], - "was_informed_by": "gold:Gp0115677", - "id": "nmdc:4fa91d90b1bd5f821a7f09edc8426939", - "execution_resource": "NERSC-Cori", - "name": "ReadBased Analysis Activity for nmdc:mga0zb0766", - "started_at_time": "2021-10-11T02:24:49Z", - "type": "nmdc:ReadbasedAnalysis", - "ended_at_time": "2021-10-11T06:26:42+00:00" - } - ] + "pooling_set": [] }, - { - "functional_annotation_agg": [], - "library_preparation_set": [], - "processed_sample_set": [], - "extraction_set": [], - "activity_set": [], - "biosample_set": [], - "data_object_set": [ - { - "name": "Gp0115675_Filtered Reads", - "description": "Filtered Reads for Gp0115675", - "data_object_type": "Filtered Sequencing Reads", - "url": "https://data.microbiomedata.org/data/nmdc:mga0vf2h47/qa/nmdc_mga0vf2h47_filtered.fastq.gz", - "md5_checksum": "54e3a71218d04224719e0dc8a7fdf9c7", - "id": "nmdc:54e3a71218d04224719e0dc8a7fdf9c7", - "file_size_bytes": 1533239347 - }, - { - "name": "Gp0115675_Filtered Stats", - "description": "Filtered Stats for Gp0115675", - "data_object_type": "QC Statistics", - "url": "https://data.microbiomedata.org/data/nmdc:mga0vf2h47/qa/nmdc_mga0vf2h47_filterStats.txt", - "md5_checksum": "2507e3f107100ce0c72c57191d450818", - "id": "nmdc:2507e3f107100ce0c72c57191d450818", - "file_size_bytes": 287 - }, - { - "name": "Gp0115675_Gottcha2 TSV report", - "description": "Gottcha2 TSV report for Gp0115675", - "url": "https://data.microbiomedata.org/data/nmdc:mga0vf2h47/ReadbasedAnalysis/nmdc_mga0vf2h47_gottcha2_report.tsv", - "md5_checksum": "60d673988c4f4447feb5985e8501e914", - "id": "nmdc:60d673988c4f4447feb5985e8501e914", - "file_size_bytes": 8921 - }, - { - "name": "Gp0115675_Gottcha2 full TSV report", - "description": "Gottcha2 full TSV report for Gp0115675", - "url": "https://data.microbiomedata.org/data/nmdc:mga0vf2h47/ReadbasedAnalysis/nmdc_mga0vf2h47_gottcha2_report_full.tsv", - "md5_checksum": "a8f93ed13033eb949109b4e83980a893", - "id": "nmdc:a8f93ed13033eb949109b4e83980a893", - "file_size_bytes": 871109 - }, - { - "name": "Gp0115675_Gottcha2 Krona HTML report", - "description": "Gottcha2 Krona HTML report for Gp0115675", - "data_object_type": "GOTTCHA2 Krona Plot", - "url": "https://data.microbiomedata.org/data/nmdc:mga0vf2h47/ReadbasedAnalysis/nmdc_mga0vf2h47_gottcha2_krona.html", - "md5_checksum": "31dd6eb616f1e9815778453ab1601195", - "id": "nmdc:31dd6eb616f1e9815778453ab1601195", - "file_size_bytes": 252578 - }, - { - "name": "Gp0115675_Centrifuge classification TSV report", - "description": "Centrifuge classification TSV report for Gp0115675", - "data_object_type": "Centrifuge Taxonomic Classification", - "url": "https://data.microbiomedata.org/data/nmdc:mga0vf2h47/ReadbasedAnalysis/nmdc_mga0vf2h47_centrifuge_classification.tsv", - "md5_checksum": "6d7a930d79f220b06cde8fbf8339e744", - "id": "nmdc:6d7a930d79f220b06cde8fbf8339e744", - "file_size_bytes": 1218767711 - }, - { - "name": "Gp0115675_Centrifuge TSV report", - "description": "Centrifuge TSV report for Gp0115675", - "data_object_type": "Centrifuge Classification Report", - "url": "https://data.microbiomedata.org/data/nmdc:mga0vf2h47/ReadbasedAnalysis/nmdc_mga0vf2h47_centrifuge_report.tsv", - "md5_checksum": "0aaac507db0e29827e1c87df47324932", - "id": "nmdc:0aaac507db0e29827e1c87df47324932", - "file_size_bytes": 254260 - }, - { - "name": "Gp0115675_Centrifuge Krona HTML report", - "description": "Centrifuge Krona HTML report for Gp0115675", - "data_object_type": "Centrifuge Krona Plot", - "url": "https://data.microbiomedata.org/data/nmdc:mga0vf2h47/ReadbasedAnalysis/nmdc_mga0vf2h47_centrifuge_krona.html", - "md5_checksum": "6aec8677139ed24ef9cfe0c75b30056f", - "id": "nmdc:6aec8677139ed24ef9cfe0c75b30056f", - "file_size_bytes": 2324387 - }, + { + "functional_annotation_agg": [], + "library_preparation_set": [], + "processed_sample_set": [], + "extraction_set": [], + "activity_set": [], + "biosample_set": [], + "data_object_set": [ { - "name": "Gp0115675_Kraken classification TSV report", - "description": "Kraken classification TSV report for Gp0115675", - "data_object_type": "Kraken2 Taxonomic Classification", - "url": "https://data.microbiomedata.org/data/nmdc:mga0vf2h47/ReadbasedAnalysis/nmdc_mga0vf2h47_kraken2_classification.tsv", - "md5_checksum": "d39369f32ada967d7cf52cb503fccf4a", - "id": "nmdc:d39369f32ada967d7cf52cb503fccf4a", - "file_size_bytes": 1001846607 + "description": "Raw sequencer read data", + "file_size_bytes": 1777604881, + "type": "nmdc:DataObject", + "id": "jgi:55d817f30d8785342fcf826d", + "name": "9387.2.132031.GGCTAC.fastq.gz" }, { - "name": "Gp0115675_Kraken2 TSV report", - "description": "Kraken2 TSV report for Gp0115675", - "data_object_type": "Kraken2 Classification Report", - "url": "https://data.microbiomedata.org/data/nmdc:mga0vf2h47/ReadbasedAnalysis/nmdc_mga0vf2h47_kraken2_report.tsv", - "md5_checksum": "1ec0247d86889fcef13f39a58a92b066", - "id": "nmdc:1ec0247d86889fcef13f39a58a92b066", - "file_size_bytes": 635541 + "name": "Gp0115675_Filtered Reads", + "description": "Filtered Reads for Gp0115675", + "data_object_type": "Filtered Sequencing Reads", + "url": "https://data.microbiomedata.org/data/nmdc:mga0vf2h47/qa/nmdc_mga0vf2h47_filtered.fastq.gz", + "md5_checksum": "54e3a71218d04224719e0dc8a7fdf9c7", + "id": "nmdc:54e3a71218d04224719e0dc8a7fdf9c7", + "file_size_bytes": 1533239347 }, { - "name": "Gp0115675_Kraken2 Krona HTML report", - "description": "Kraken2 Krona HTML report for Gp0115675", - "data_object_type": "Kraken2 Krona Plot", - "url": "https://data.microbiomedata.org/data/nmdc:mga0vf2h47/ReadbasedAnalysis/nmdc_mga0vf2h47_kraken2_krona.html", - "md5_checksum": "242a1c60f6cb14ba8430375171fda436", - "id": "nmdc:242a1c60f6cb14ba8430375171fda436", - "file_size_bytes": 3968420 + "name": "Gp0115675_Filtered Stats", + "description": "Filtered Stats for Gp0115675", + "data_object_type": "QC Statistics", + "url": "https://data.microbiomedata.org/data/nmdc:mga0vf2h47/qa/nmdc_mga0vf2h47_filterStats.txt", + "md5_checksum": "2507e3f107100ce0c72c57191d450818", + "id": "nmdc:2507e3f107100ce0c72c57191d450818", + "file_size_bytes": 287 }, { "name": "Gp0115675_Gottcha2 TSV report", @@ -21003,39 +17915,7 @@ "collecting_biosamples_from_site_set": [], "date_created": null, "etl_software_version": null, - "pooling_set": [], - "read_based_analysis_activity_set": [ - { - "_id": { - "$oid": "61e719dc833bcf838a7014d6" - }, - "has_input": [ - "nmdc:54e3a71218d04224719e0dc8a7fdf9c7" - ], - "part_of": [ - "nmdc:mga0vf2h47" - ], - "git_url": "https://github.com/microbiomedata/metaG/releases/tag/0.1", - "has_output": [ - "nmdc:60d673988c4f4447feb5985e8501e914", - "nmdc:a8f93ed13033eb949109b4e83980a893", - "nmdc:31dd6eb616f1e9815778453ab1601195", - "nmdc:6d7a930d79f220b06cde8fbf8339e744", - "nmdc:0aaac507db0e29827e1c87df47324932", - "nmdc:6aec8677139ed24ef9cfe0c75b30056f", - "nmdc:d39369f32ada967d7cf52cb503fccf4a", - "nmdc:1ec0247d86889fcef13f39a58a92b066", - "nmdc:242a1c60f6cb14ba8430375171fda436" - ], - "was_informed_by": "gold:Gp0115675", - "id": "nmdc:4cfac32b9cd7a8dd01d49117e1078a79", - "execution_resource": "NERSC-Cori", - "name": "ReadBased Analysis Activity for nmdc:mga0vf2h47", - "started_at_time": "2021-10-11T02:28:05Z", - "type": "nmdc:ReadbasedAnalysis", - "ended_at_time": "2021-10-11T03:25:21+00:00" - } - ] + "pooling_set": [] }, { "functional_annotation_agg": [], @@ -21045,6 +17925,13 @@ "activity_set": [], "biosample_set": [], "data_object_set": [ + { + "description": "Raw sequencer read data", + "file_size_bytes": 4637325661, + "type": "nmdc:DataObject", + "id": "jgi:55f23d820d8785306f964980", + "name": "9491.1.134352.AGTTCC.fastq.gz" + }, { "name": "Gp0115665_Filtered Reads", "description": "Filtered Reads for Gp0115665", @@ -21142,85 +18029,6 @@ "id": "nmdc:9aa0ec113eb8dd22e7f574216d1760b2", "file_size_bytes": 4374689 }, - { - "name": "Gp0115665_Gottcha2 TSV report", - "description": "Gottcha2 TSV report for Gp0115665", - "url": "https://data.microbiomedata.org/data/nmdc:mga06n7k74/ReadbasedAnalysis/nmdc_mga06n7k74_gottcha2_report.tsv", - "md5_checksum": "432fedddcbacb4e69c0350354ab44080", - "id": "nmdc:432fedddcbacb4e69c0350354ab44080", - "file_size_bytes": 18015 - }, - { - "name": "Gp0115665_Gottcha2 full TSV report", - "description": "Gottcha2 full TSV report for Gp0115665", - "url": "https://data.microbiomedata.org/data/nmdc:mga06n7k74/ReadbasedAnalysis/nmdc_mga06n7k74_gottcha2_report_full.tsv", - "md5_checksum": "50b9a4c83b2ec0d1dd683cb8814ed5ad", - "id": "nmdc:50b9a4c83b2ec0d1dd683cb8814ed5ad", - "file_size_bytes": 1283220 - }, - { - "name": "Gp0115665_Gottcha2 Krona HTML report", - "description": "Gottcha2 Krona HTML report for Gp0115665", - "data_object_type": "GOTTCHA2 Krona Plot", - "url": "https://data.microbiomedata.org/data/nmdc:mga06n7k74/ReadbasedAnalysis/nmdc_mga06n7k74_gottcha2_krona.html", - "md5_checksum": "e3d7339ba5c7677be13854f391462474", - "id": "nmdc:e3d7339ba5c7677be13854f391462474", - "file_size_bytes": 281366 - }, - { - "name": "Gp0115665_Centrifuge classification TSV report", - "description": "Centrifuge classification TSV report for Gp0115665", - "data_object_type": "Centrifuge Taxonomic Classification", - "url": "https://data.microbiomedata.org/data/nmdc:mga06n7k74/ReadbasedAnalysis/nmdc_mga06n7k74_centrifuge_classification.tsv", - "md5_checksum": "7bf922ee2f9fc298c031e2ff7d5abe0d", - "id": "nmdc:7bf922ee2f9fc298c031e2ff7d5abe0d", - "file_size_bytes": 3481369185 - }, - { - "name": "Gp0115665_Centrifuge TSV report", - "description": "Centrifuge TSV report for Gp0115665", - "data_object_type": "Centrifuge Classification Report", - "url": "https://data.microbiomedata.org/data/nmdc:mga06n7k74/ReadbasedAnalysis/nmdc_mga06n7k74_centrifuge_report.tsv", - "md5_checksum": "33a20a77c3dc5b4feb102d66dfbfbe11", - "id": "nmdc:33a20a77c3dc5b4feb102d66dfbfbe11", - "file_size_bytes": 263480 - }, - { - "name": "Gp0115665_Centrifuge Krona HTML report", - "description": "Centrifuge Krona HTML report for Gp0115665", - "data_object_type": "Centrifuge Krona Plot", - "url": "https://data.microbiomedata.org/data/nmdc:mga06n7k74/ReadbasedAnalysis/nmdc_mga06n7k74_centrifuge_krona.html", - "md5_checksum": "30bdf0aedf771221ca3f7f18ff4e0067", - "id": "nmdc:30bdf0aedf771221ca3f7f18ff4e0067", - "file_size_bytes": 2347079 - }, - { - "name": "Gp0115665_Kraken classification TSV report", - "description": "Kraken classification TSV report for Gp0115665", - "data_object_type": "Kraken2 Taxonomic Classification", - "url": "https://data.microbiomedata.org/data/nmdc:mga06n7k74/ReadbasedAnalysis/nmdc_mga06n7k74_kraken2_classification.tsv", - "md5_checksum": "8e21ac30de17de0d1051d7d223d0aa0f", - "id": "nmdc:8e21ac30de17de0d1051d7d223d0aa0f", - "file_size_bytes": 2866138771 - }, - { - "name": "Gp0115665_Kraken2 TSV report", - "description": "Kraken2 TSV report for Gp0115665", - "data_object_type": "Kraken2 Classification Report", - "url": "https://data.microbiomedata.org/data/nmdc:mga06n7k74/ReadbasedAnalysis/nmdc_mga06n7k74_kraken2_report.tsv", - "md5_checksum": "64459bec7843953a70f8ea2b09a7e9de", - "id": "nmdc:64459bec7843953a70f8ea2b09a7e9de", - "file_size_bytes": 728030 - }, - { - "name": "Gp0115665_Kraken2 Krona HTML report", - "description": "Kraken2 Krona HTML report for Gp0115665", - "data_object_type": "Kraken2 Krona Plot", - "url": "https://data.microbiomedata.org/data/nmdc:mga06n7k74/ReadbasedAnalysis/nmdc_mga06n7k74_kraken2_krona.html", - "md5_checksum": "9aa0ec113eb8dd22e7f574216d1760b2", - "id": "nmdc:9aa0ec113eb8dd22e7f574216d1760b2", - "file_size_bytes": 4374689 - }, { "name": "Gp0115665_Assembled contigs fasta", "description": "Assembled contigs fasta for Gp0115665", @@ -21761,144 +18569,40 @@ "collecting_biosamples_from_site_set": [], "date_created": null, "etl_software_version": null, - "pooling_set": [], - "read_based_analysis_activity_set": [ - { - "_id": { - "$oid": "61e71a36833bcf838a702021" - }, - "has_input": [ - "nmdc:b0462e18cf9dafc9d2207a58bf085530" - ], - "part_of": [ - "nmdc:mga06n7k74" - ], - "git_url": "https://github.com/microbiomedata/metaG/releases/tag/0.1", - "has_output": [ - "nmdc:432fedddcbacb4e69c0350354ab44080", - "nmdc:50b9a4c83b2ec0d1dd683cb8814ed5ad", - "nmdc:e3d7339ba5c7677be13854f391462474", - "nmdc:7bf922ee2f9fc298c031e2ff7d5abe0d", - "nmdc:33a20a77c3dc5b4feb102d66dfbfbe11", - "nmdc:30bdf0aedf771221ca3f7f18ff4e0067", - "nmdc:8e21ac30de17de0d1051d7d223d0aa0f", - "nmdc:64459bec7843953a70f8ea2b09a7e9de", - "nmdc:9aa0ec113eb8dd22e7f574216d1760b2" - ], - "was_informed_by": "gold:Gp0115665", - "id": "nmdc:ad03f306fbd2bb1f6302eedfc1cde9b2", - "execution_resource": "NERSC-Cori", - "name": "ReadBased Analysis Activity for nmdc:mga06n7k74", - "started_at_time": "2021-10-11T02:28:54Z", - "type": "nmdc:ReadbasedAnalysis", - "ended_at_time": "2021-10-11T06:19:29+00:00" - } - ] + "pooling_set": [] }, { "functional_annotation_agg": [], "library_preparation_set": [], "processed_sample_set": [], - "extraction_set": [], - "activity_set": [], - "biosample_set": [], - "data_object_set": [ - { - "name": "Gp0115669_Filtered Reads", - "description": "Filtered Reads for Gp0115669", - "data_object_type": "Filtered Sequencing Reads", - "url": "https://data.microbiomedata.org/data/nmdc:mga0k85x37/qa/nmdc_mga0k85x37_filtered.fastq.gz", - "md5_checksum": "6eef104db92b99c9741b26c667d75cd9", - "id": "nmdc:6eef104db92b99c9741b26c667d75cd9", - "file_size_bytes": 1806935637 - }, - { - "name": "Gp0115669_Filtered Stats", - "description": "Filtered Stats for Gp0115669", - "data_object_type": "QC Statistics", - "url": "https://data.microbiomedata.org/data/nmdc:mga0k85x37/qa/nmdc_mga0k85x37_filterStats.txt", - "md5_checksum": "58fde3e96dbb28af9133bede850a2653", - "id": "nmdc:58fde3e96dbb28af9133bede850a2653", - "file_size_bytes": 286 - }, - { - "name": "Gp0115669_Gottcha2 TSV report", - "description": "Gottcha2 TSV report for Gp0115669", - "url": "https://data.microbiomedata.org/data/nmdc:mga0k85x37/ReadbasedAnalysis/nmdc_mga0k85x37_gottcha2_report.tsv", - "md5_checksum": "05933784d02331b60b2531e2025cd3b7", - "id": "nmdc:05933784d02331b60b2531e2025cd3b7", - "file_size_bytes": 11362 - }, - { - "name": "Gp0115669_Gottcha2 full TSV report", - "description": "Gottcha2 full TSV report for Gp0115669", - "url": "https://data.microbiomedata.org/data/nmdc:mga0k85x37/ReadbasedAnalysis/nmdc_mga0k85x37_gottcha2_report_full.tsv", - "md5_checksum": "50fc279637cb7048aaaeec9b223d0286", - "id": "nmdc:50fc279637cb7048aaaeec9b223d0286", - "file_size_bytes": 909325 - }, - { - "name": "Gp0115669_Gottcha2 Krona HTML report", - "description": "Gottcha2 Krona HTML report for Gp0115669", - "data_object_type": "GOTTCHA2 Krona Plot", - "url": "https://data.microbiomedata.org/data/nmdc:mga0k85x37/ReadbasedAnalysis/nmdc_mga0k85x37_gottcha2_krona.html", - "md5_checksum": "c3add9c5d34e3ca719096ba3ba9b1c08", - "id": "nmdc:c3add9c5d34e3ca719096ba3ba9b1c08", - "file_size_bytes": 261412 - }, - { - "name": "Gp0115669_Centrifuge classification TSV report", - "description": "Centrifuge classification TSV report for Gp0115669", - "data_object_type": "Centrifuge Taxonomic Classification", - "url": "https://data.microbiomedata.org/data/nmdc:mga0k85x37/ReadbasedAnalysis/nmdc_mga0k85x37_centrifuge_classification.tsv", - "md5_checksum": "2777a04ec7e23aff356bb4f2733e55b7", - "id": "nmdc:2777a04ec7e23aff356bb4f2733e55b7", - "file_size_bytes": 1481087410 - }, - { - "name": "Gp0115669_Centrifuge TSV report", - "description": "Centrifuge TSV report for Gp0115669", - "data_object_type": "Centrifuge Classification Report", - "url": "https://data.microbiomedata.org/data/nmdc:mga0k85x37/ReadbasedAnalysis/nmdc_mga0k85x37_centrifuge_report.tsv", - "md5_checksum": "de45d70cc01749e9b5691dc24674545d", - "id": "nmdc:de45d70cc01749e9b5691dc24674545d", - "file_size_bytes": 256139 - }, - { - "name": "Gp0115669_Centrifuge Krona HTML report", - "description": "Centrifuge Krona HTML report for Gp0115669", - "data_object_type": "Centrifuge Krona Plot", - "url": "https://data.microbiomedata.org/data/nmdc:mga0k85x37/ReadbasedAnalysis/nmdc_mga0k85x37_centrifuge_krona.html", - "md5_checksum": "534f97f3792b74385c4da305196a1b1d", - "id": "nmdc:534f97f3792b74385c4da305196a1b1d", - "file_size_bytes": 2323658 - }, + "extraction_set": [], + "activity_set": [], + "biosample_set": [], + "data_object_set": [ { - "name": "Gp0115669_Kraken classification TSV report", - "description": "Kraken classification TSV report for Gp0115669", - "data_object_type": "Kraken2 Taxonomic Classification", - "url": "https://data.microbiomedata.org/data/nmdc:mga0k85x37/ReadbasedAnalysis/nmdc_mga0k85x37_kraken2_classification.tsv", - "md5_checksum": "fc3e489df923ec344ac0cce7316f49d6", - "id": "nmdc:fc3e489df923ec344ac0cce7316f49d6", - "file_size_bytes": 1220980345 + "description": "Raw sequencer read data", + "file_size_bytes": 1988838112, + "type": "nmdc:DataObject", + "id": "jgi:55d817fe0d8785342fcf8276", + "name": "9387.2.132031.GTAGAG.fastq.gz" }, { - "name": "Gp0115669_Kraken2 TSV report", - "description": "Kraken2 TSV report for Gp0115669", - "data_object_type": "Kraken2 Classification Report", - "url": "https://data.microbiomedata.org/data/nmdc:mga0k85x37/ReadbasedAnalysis/nmdc_mga0k85x37_kraken2_report.tsv", - "md5_checksum": "07b6457a094fab96563168ed287dc59f", - "id": "nmdc:07b6457a094fab96563168ed287dc59f", - "file_size_bytes": 651795 + "name": "Gp0115669_Filtered Reads", + "description": "Filtered Reads for Gp0115669", + "data_object_type": "Filtered Sequencing Reads", + "url": "https://data.microbiomedata.org/data/nmdc:mga0k85x37/qa/nmdc_mga0k85x37_filtered.fastq.gz", + "md5_checksum": "6eef104db92b99c9741b26c667d75cd9", + "id": "nmdc:6eef104db92b99c9741b26c667d75cd9", + "file_size_bytes": 1806935637 }, { - "name": "Gp0115669_Kraken2 Krona HTML report", - "description": "Kraken2 Krona HTML report for Gp0115669", - "data_object_type": "Kraken2 Krona Plot", - "url": "https://data.microbiomedata.org/data/nmdc:mga0k85x37/ReadbasedAnalysis/nmdc_mga0k85x37_kraken2_krona.html", - "md5_checksum": "164a1bc50e8d6509446ae2877be8231c", - "id": "nmdc:164a1bc50e8d6509446ae2877be8231c", - "file_size_bytes": 3963303 + "name": "Gp0115669_Filtered Stats", + "description": "Filtered Stats for Gp0115669", + "data_object_type": "QC Statistics", + "url": "https://data.microbiomedata.org/data/nmdc:mga0k85x37/qa/nmdc_mga0k85x37_filterStats.txt", + "md5_checksum": "58fde3e96dbb28af9133bede850a2653", + "id": "nmdc:58fde3e96dbb28af9133bede850a2653", + "file_size_bytes": 286 }, { "name": "Gp0115669_Gottcha2 TSV report", @@ -22462,39 +19166,7 @@ "collecting_biosamples_from_site_set": [], "date_created": null, "etl_software_version": null, - "pooling_set": [], - "read_based_analysis_activity_set": [ - { - "_id": { - "$oid": "61e71a34833bcf838a701fb0" - }, - "has_input": [ - "nmdc:6eef104db92b99c9741b26c667d75cd9" - ], - "part_of": [ - "nmdc:mga0k85x37" - ], - "git_url": "https://github.com/microbiomedata/metaG/releases/tag/0.1", - "has_output": [ - "nmdc:05933784d02331b60b2531e2025cd3b7", - "nmdc:50fc279637cb7048aaaeec9b223d0286", - "nmdc:c3add9c5d34e3ca719096ba3ba9b1c08", - "nmdc:2777a04ec7e23aff356bb4f2733e55b7", - "nmdc:de45d70cc01749e9b5691dc24674545d", - "nmdc:534f97f3792b74385c4da305196a1b1d", - "nmdc:fc3e489df923ec344ac0cce7316f49d6", - "nmdc:07b6457a094fab96563168ed287dc59f", - "nmdc:164a1bc50e8d6509446ae2877be8231c" - ], - "was_informed_by": "gold:Gp0115669", - "id": "nmdc:ed7745adccd65c2dd20dfa24c2922db2", - "execution_resource": "NERSC-Cori", - "name": "ReadBased Analysis Activity for nmdc:mga0k85x37", - "started_at_time": "2021-10-11T02:28:43Z", - "type": "nmdc:ReadbasedAnalysis", - "ended_at_time": "2021-10-11T04:20:07+00:00" - } - ] + "pooling_set": [] }, { "functional_annotation_agg": [], @@ -22504,6 +19176,13 @@ "activity_set": [], "biosample_set": [], "data_object_set": [ + { + "description": "Raw sequencer read data", + "file_size_bytes": 3054717241, + "type": "nmdc:DataObject", + "id": "jgi:55d7402b0d8785342fcf7e3c", + "name": "9422.8.132674.GAGTGG.fastq.gz" + }, { "name": "Gp0115672_Filtered Reads", "description": "Filtered Reads for Gp0115672", @@ -22601,85 +19280,6 @@ "id": "nmdc:3266e79813577aae1d4377c62e73332c", "file_size_bytes": 4177114 }, - { - "name": "Gp0115672_Gottcha2 TSV report", - "description": "Gottcha2 TSV report for Gp0115672", - "url": "https://data.microbiomedata.org/data/nmdc:mga0cwhj53/ReadbasedAnalysis/nmdc_mga0cwhj53_gottcha2_report.tsv", - "md5_checksum": "5a9326e2e450663a5ed8c97389136b25", - "id": "nmdc:5a9326e2e450663a5ed8c97389136b25", - "file_size_bytes": 15806 - }, - { - "name": "Gp0115672_Gottcha2 full TSV report", - "description": "Gottcha2 full TSV report for Gp0115672", - "url": "https://data.microbiomedata.org/data/nmdc:mga0cwhj53/ReadbasedAnalysis/nmdc_mga0cwhj53_gottcha2_report_full.tsv", - "md5_checksum": "6044f2e33e0dd3e951484e9c50ae10f4", - "id": "nmdc:6044f2e33e0dd3e951484e9c50ae10f4", - "file_size_bytes": 1142479 - }, - { - "name": "Gp0115672_Gottcha2 Krona HTML report", - "description": "Gottcha2 Krona HTML report for Gp0115672", - "data_object_type": "GOTTCHA2 Krona Plot", - "url": "https://data.microbiomedata.org/data/nmdc:mga0cwhj53/ReadbasedAnalysis/nmdc_mga0cwhj53_gottcha2_krona.html", - "md5_checksum": "39a46887587926c9b81e126bb1036005", - "id": "nmdc:39a46887587926c9b81e126bb1036005", - "file_size_bytes": 273611 - }, - { - "name": "Gp0115672_Centrifuge classification TSV report", - "description": "Centrifuge classification TSV report for Gp0115672", - "data_object_type": "Centrifuge Taxonomic Classification", - "url": "https://data.microbiomedata.org/data/nmdc:mga0cwhj53/ReadbasedAnalysis/nmdc_mga0cwhj53_centrifuge_classification.tsv", - "md5_checksum": "b8dde2c047141d9097317c86f723eded", - "id": "nmdc:b8dde2c047141d9097317c86f723eded", - "file_size_bytes": 2436637487 - }, - { - "name": "Gp0115672_Centrifuge TSV report", - "description": "Centrifuge TSV report for Gp0115672", - "data_object_type": "Centrifuge Classification Report", - "url": "https://data.microbiomedata.org/data/nmdc:mga0cwhj53/ReadbasedAnalysis/nmdc_mga0cwhj53_centrifuge_report.tsv", - "md5_checksum": "d530342b37f0785f92650e9650f31d6a", - "id": "nmdc:d530342b37f0785f92650e9650f31d6a", - "file_size_bytes": 261520 - }, - { - "name": "Gp0115672_Centrifuge Krona HTML report", - "description": "Centrifuge Krona HTML report for Gp0115672", - "data_object_type": "Centrifuge Krona Plot", - "url": "https://data.microbiomedata.org/data/nmdc:mga0cwhj53/ReadbasedAnalysis/nmdc_mga0cwhj53_centrifuge_krona.html", - "md5_checksum": "6672aa851b5d39d7381211232b4f6cb2", - "id": "nmdc:6672aa851b5d39d7381211232b4f6cb2", - "file_size_bytes": 2342832 - }, - { - "name": "Gp0115672_Kraken classification TSV report", - "description": "Kraken classification TSV report for Gp0115672", - "data_object_type": "Kraken2 Taxonomic Classification", - "url": "https://data.microbiomedata.org/data/nmdc:mga0cwhj53/ReadbasedAnalysis/nmdc_mga0cwhj53_kraken2_classification.tsv", - "md5_checksum": "61e3c875231ae8999b5aa1dbf7d55cca", - "id": "nmdc:61e3c875231ae8999b5aa1dbf7d55cca", - "file_size_bytes": 1993150715 - }, - { - "name": "Gp0115672_Kraken2 TSV report", - "description": "Kraken2 TSV report for Gp0115672", - "data_object_type": "Kraken2 Classification Report", - "url": "https://data.microbiomedata.org/data/nmdc:mga0cwhj53/ReadbasedAnalysis/nmdc_mga0cwhj53_kraken2_report.tsv", - "md5_checksum": "3049835ed4e3533acce49e9cc60b03fc", - "id": "nmdc:3049835ed4e3533acce49e9cc60b03fc", - "file_size_bytes": 693572 - }, - { - "name": "Gp0115672_Kraken2 Krona HTML report", - "description": "Kraken2 Krona HTML report for Gp0115672", - "data_object_type": "Kraken2 Krona Plot", - "url": "https://data.microbiomedata.org/data/nmdc:mga0cwhj53/ReadbasedAnalysis/nmdc_mga0cwhj53_kraken2_krona.html", - "md5_checksum": "3266e79813577aae1d4377c62e73332c", - "id": "nmdc:3266e79813577aae1d4377c62e73332c", - "file_size_bytes": 4177114 - }, { "name": "Gp0115672_Assembled contigs fasta", "description": "Assembled contigs fasta for Gp0115672", @@ -23261,144 +19861,40 @@ "collecting_biosamples_from_site_set": [], "date_created": null, "etl_software_version": null, - "pooling_set": [], - "read_based_analysis_activity_set": [ - { - "_id": { - "$oid": "61e71a33833bcf838a701f34" - }, - "has_input": [ - "nmdc:eb516fb673793f5161fb634fc19de310" - ], - "part_of": [ - "nmdc:mga0cwhj53" - ], - "git_url": "https://github.com/microbiomedata/metaG/releases/tag/0.1", - "has_output": [ - "nmdc:5a9326e2e450663a5ed8c97389136b25", - "nmdc:6044f2e33e0dd3e951484e9c50ae10f4", - "nmdc:39a46887587926c9b81e126bb1036005", - "nmdc:b8dde2c047141d9097317c86f723eded", - "nmdc:d530342b37f0785f92650e9650f31d6a", - "nmdc:6672aa851b5d39d7381211232b4f6cb2", - "nmdc:61e3c875231ae8999b5aa1dbf7d55cca", - "nmdc:3049835ed4e3533acce49e9cc60b03fc", - "nmdc:3266e79813577aae1d4377c62e73332c" - ], - "was_informed_by": "gold:Gp0115672", - "id": "nmdc:50eb8825777d1294abac150521e5c2db", - "execution_resource": "NERSC-Cori", - "name": "ReadBased Analysis Activity for nmdc:mga0cwhj53", - "started_at_time": "2021-10-11T02:28:16Z", - "type": "nmdc:ReadbasedAnalysis", - "ended_at_time": "2021-10-11T05:56:20+00:00" - } - ] + "pooling_set": [] }, - { - "functional_annotation_agg": [], - "library_preparation_set": [], - "processed_sample_set": [], - "extraction_set": [], - "activity_set": [], - "biosample_set": [], - "data_object_set": [ - { - "name": "Gp0127640_Filtered Reads", - "description": "Filtered Reads for Gp0127640", - "data_object_type": "Filtered Sequencing Reads", - "url": "https://data.microbiomedata.org/data/nmdc:mga06rnc11/qa/nmdc_mga06rnc11_filtered.fastq.gz", - "md5_checksum": "534c94e20d292a6bf09c0a42b550b4c2", - "id": "nmdc:534c94e20d292a6bf09c0a42b550b4c2", - "file_size_bytes": 2416846292 - }, - { - "name": "Gp0127640_Filtered Stats", - "description": "Filtered Stats for Gp0127640", - "data_object_type": "QC Statistics", - "url": "https://data.microbiomedata.org/data/nmdc:mga06rnc11/qa/nmdc_mga06rnc11_filterStats.txt", - "md5_checksum": "db5ccad12d6ddb46947fbd815aae7f9a", - "id": "nmdc:db5ccad12d6ddb46947fbd815aae7f9a", - "file_size_bytes": 285 - }, - { - "name": "Gp0127640_Gottcha2 TSV report", - "description": "Gottcha2 TSV report for Gp0127640", - "url": "https://data.microbiomedata.org/data/nmdc:mga06rnc11/ReadbasedAnalysis/nmdc_mga06rnc11_gottcha2_report.tsv", - "md5_checksum": "7e79b2eba131ed6df71a56f47b1b901f", - "id": "nmdc:7e79b2eba131ed6df71a56f47b1b901f", - "file_size_bytes": 3824 - }, - { - "name": "Gp0127640_Gottcha2 full TSV report", - "description": "Gottcha2 full TSV report for Gp0127640", - "url": "https://data.microbiomedata.org/data/nmdc:mga06rnc11/ReadbasedAnalysis/nmdc_mga06rnc11_gottcha2_report_full.tsv", - "md5_checksum": "bc82dcb8151fc20c22be71b6531a1fb2", - "id": "nmdc:bc82dcb8151fc20c22be71b6531a1fb2", - "file_size_bytes": 850491 - }, - { - "name": "Gp0127640_Gottcha2 Krona HTML report", - "description": "Gottcha2 Krona HTML report for Gp0127640", - "data_object_type": "GOTTCHA2 Krona Plot", - "url": "https://data.microbiomedata.org/data/nmdc:mga06rnc11/ReadbasedAnalysis/nmdc_mga06rnc11_gottcha2_krona.html", - "md5_checksum": "d5e45563875efca0653ba2dd47ee3d68", - "id": "nmdc:d5e45563875efca0653ba2dd47ee3d68", - "file_size_bytes": 236151 - }, - { - "name": "Gp0127640_Centrifuge classification TSV report", - "description": "Centrifuge classification TSV report for Gp0127640", - "data_object_type": "Centrifuge Taxonomic Classification", - "url": "https://data.microbiomedata.org/data/nmdc:mga06rnc11/ReadbasedAnalysis/nmdc_mga06rnc11_centrifuge_classification.tsv", - "md5_checksum": "bf5aa70f6ff14da2ef1393124ec29c4d", - "id": "nmdc:bf5aa70f6ff14da2ef1393124ec29c4d", - "file_size_bytes": 2057333090 - }, - { - "name": "Gp0127640_Centrifuge TSV report", - "description": "Centrifuge TSV report for Gp0127640", - "data_object_type": "Centrifuge Classification Report", - "url": "https://data.microbiomedata.org/data/nmdc:mga06rnc11/ReadbasedAnalysis/nmdc_mga06rnc11_centrifuge_report.tsv", - "md5_checksum": "61f1f6d57fd4d445682e25ec34901721", - "id": "nmdc:61f1f6d57fd4d445682e25ec34901721", - "file_size_bytes": 256577 - }, - { - "name": "Gp0127640_Centrifuge Krona HTML report", - "description": "Centrifuge Krona HTML report for Gp0127640", - "data_object_type": "Centrifuge Krona Plot", - "url": "https://data.microbiomedata.org/data/nmdc:mga06rnc11/ReadbasedAnalysis/nmdc_mga06rnc11_centrifuge_krona.html", - "md5_checksum": "7c31728fc2a51c8d202f9f74b1919886", - "id": "nmdc:7c31728fc2a51c8d202f9f74b1919886", - "file_size_bytes": 2334984 - }, + { + "functional_annotation_agg": [], + "library_preparation_set": [], + "processed_sample_set": [], + "extraction_set": [], + "activity_set": [], + "biosample_set": [], + "data_object_set": [ { - "name": "Gp0127640_Kraken classification TSV report", - "description": "Kraken classification TSV report for Gp0127640", - "data_object_type": "Kraken2 Taxonomic Classification", - "url": "https://data.microbiomedata.org/data/nmdc:mga06rnc11/ReadbasedAnalysis/nmdc_mga06rnc11_kraken2_classification.tsv", - "md5_checksum": "f36c2b28e63d21ca4d9e84035450c8e1", - "id": "nmdc:f36c2b28e63d21ca4d9e84035450c8e1", - "file_size_bytes": 1658481192 + "description": "Raw sequencer read data", + "file_size_bytes": 2619328583, + "type": "nmdc:DataObject", + "id": "jgi:574fe0a17ded5e3df1ee148a", + "name": "10533.3.165334.ACCATCC-TGGATGG.fastq.gz" }, { - "name": "Gp0127640_Kraken2 TSV report", - "description": "Kraken2 TSV report for Gp0127640", - "data_object_type": "Kraken2 Classification Report", - "url": "https://data.microbiomedata.org/data/nmdc:mga06rnc11/ReadbasedAnalysis/nmdc_mga06rnc11_kraken2_report.tsv", - "md5_checksum": "e2939606fc9ff1c0046b333e1740f258", - "id": "nmdc:e2939606fc9ff1c0046b333e1740f258", - "file_size_bytes": 653129 + "name": "Gp0127640_Filtered Reads", + "description": "Filtered Reads for Gp0127640", + "data_object_type": "Filtered Sequencing Reads", + "url": "https://data.microbiomedata.org/data/nmdc:mga06rnc11/qa/nmdc_mga06rnc11_filtered.fastq.gz", + "md5_checksum": "534c94e20d292a6bf09c0a42b550b4c2", + "id": "nmdc:534c94e20d292a6bf09c0a42b550b4c2", + "file_size_bytes": 2416846292 }, { - "name": "Gp0127640_Kraken2 Krona HTML report", - "description": "Kraken2 Krona HTML report for Gp0127640", - "data_object_type": "Kraken2 Krona Plot", - "url": "https://data.microbiomedata.org/data/nmdc:mga06rnc11/ReadbasedAnalysis/nmdc_mga06rnc11_kraken2_krona.html", - "md5_checksum": "d47144fd7ec0608e7677550d9589c889", - "id": "nmdc:d47144fd7ec0608e7677550d9589c889", - "file_size_bytes": 3977820 + "name": "Gp0127640_Filtered Stats", + "description": "Filtered Stats for Gp0127640", + "data_object_type": "QC Statistics", + "url": "https://data.microbiomedata.org/data/nmdc:mga06rnc11/qa/nmdc_mga06rnc11_filterStats.txt", + "md5_checksum": "db5ccad12d6ddb46947fbd815aae7f9a", + "id": "nmdc:db5ccad12d6ddb46947fbd815aae7f9a", + "file_size_bytes": 285 }, { "name": "Gp0127640_Gottcha2 TSV report", @@ -23924,39 +20420,7 @@ "collecting_biosamples_from_site_set": [], "date_created": null, "etl_software_version": null, - "pooling_set": [], - "read_based_analysis_activity_set": [ - { - "_id": { - "$oid": "61e7199d833bcf838a700ec0" - }, - "has_input": [ - "nmdc:534c94e20d292a6bf09c0a42b550b4c2" - ], - "part_of": [ - "nmdc:mga06rnc11" - ], - "git_url": "https://github.com/microbiomedata/metaG/releases/tag/0.1", - "has_output": [ - "nmdc:7e79b2eba131ed6df71a56f47b1b901f", - "nmdc:bc82dcb8151fc20c22be71b6531a1fb2", - "nmdc:d5e45563875efca0653ba2dd47ee3d68", - "nmdc:bf5aa70f6ff14da2ef1393124ec29c4d", - "nmdc:61f1f6d57fd4d445682e25ec34901721", - "nmdc:7c31728fc2a51c8d202f9f74b1919886", - "nmdc:f36c2b28e63d21ca4d9e84035450c8e1", - "nmdc:e2939606fc9ff1c0046b333e1740f258", - "nmdc:d47144fd7ec0608e7677550d9589c889" - ], - "was_informed_by": "gold:Gp0127640", - "id": "nmdc:414c4647eddd8081308d92da2d59815e", - "execution_resource": "NERSC-Cori", - "name": "ReadBased Analysis Activity for nmdc:mga06rnc11", - "started_at_time": "2021-10-11T02:24:27Z", - "type": "nmdc:ReadbasedAnalysis", - "ended_at_time": "2021-10-11T04:33:17+00:00" - } - ] + "pooling_set": [] }, { "functional_annotation_agg": [], @@ -23966,6 +20430,13 @@ "activity_set": [], "biosample_set": [], "data_object_set": [ + { + "description": "Raw sequencer read data", + "file_size_bytes": 2061929348, + "type": "nmdc:DataObject", + "id": "jgi:574fde697ded5e3df1ee140a", + "name": "10533.1.165310.GCTACGT-AACGTAG.fastq.gz" + }, { "name": "Gp0127641_Filtered Reads", "description": "Filtered Reads for Gp0127641", @@ -24063,85 +20534,6 @@ "id": "nmdc:2f36b41c419efa1b1dfb6a9576b965ee", "file_size_bytes": 3964515 }, - { - "name": "Gp0127641_Gottcha2 TSV report", - "description": "Gottcha2 TSV report for Gp0127641", - "url": "https://data.microbiomedata.org/data/nmdc:mga0822t33/ReadbasedAnalysis/nmdc_mga0822t33_gottcha2_report.tsv", - "md5_checksum": "0d021c80bfd39c8293a8b355b8ff3605", - "id": "nmdc:0d021c80bfd39c8293a8b355b8ff3605", - "file_size_bytes": 3331 - }, - { - "name": "Gp0127641_Gottcha2 full TSV report", - "description": "Gottcha2 full TSV report for Gp0127641", - "url": "https://data.microbiomedata.org/data/nmdc:mga0822t33/ReadbasedAnalysis/nmdc_mga0822t33_gottcha2_report_full.tsv", - "md5_checksum": "a42312841b816448d8bd5d3adfa65f58", - "id": "nmdc:a42312841b816448d8bd5d3adfa65f58", - "file_size_bytes": 761359 - }, - { - "name": "Gp0127641_Gottcha2 Krona HTML report", - "description": "Gottcha2 Krona HTML report for Gp0127641", - "data_object_type": "GOTTCHA2 Krona Plot", - "url": "https://data.microbiomedata.org/data/nmdc:mga0822t33/ReadbasedAnalysis/nmdc_mga0822t33_gottcha2_krona.html", - "md5_checksum": "f473f4a99336a49105d2722888ae0510", - "id": "nmdc:f473f4a99336a49105d2722888ae0510", - "file_size_bytes": 236161 - }, - { - "name": "Gp0127641_Centrifuge classification TSV report", - "description": "Centrifuge classification TSV report for Gp0127641", - "data_object_type": "Centrifuge Taxonomic Classification", - "url": "https://data.microbiomedata.org/data/nmdc:mga0822t33/ReadbasedAnalysis/nmdc_mga0822t33_centrifuge_classification.tsv", - "md5_checksum": "ae51ea50660f44fa3b317a45f3015556", - "id": "nmdc:ae51ea50660f44fa3b317a45f3015556", - "file_size_bytes": 1635953327 - }, - { - "name": "Gp0127641_Centrifuge TSV report", - "description": "Centrifuge TSV report for Gp0127641", - "data_object_type": "Centrifuge Classification Report", - "url": "https://data.microbiomedata.org/data/nmdc:mga0822t33/ReadbasedAnalysis/nmdc_mga0822t33_centrifuge_report.tsv", - "md5_checksum": "ef39b44a90c8525e93f45e500b3ae934", - "id": "nmdc:ef39b44a90c8525e93f45e500b3ae934", - "file_size_bytes": 255166 - }, - { - "name": "Gp0127641_Centrifuge Krona HTML report", - "description": "Centrifuge Krona HTML report for Gp0127641", - "data_object_type": "Centrifuge Krona Plot", - "url": "https://data.microbiomedata.org/data/nmdc:mga0822t33/ReadbasedAnalysis/nmdc_mga0822t33_centrifuge_krona.html", - "md5_checksum": "e2653a4ce3f34c235ad7b01e87dd1016", - "id": "nmdc:e2653a4ce3f34c235ad7b01e87dd1016", - "file_size_bytes": 2332521 - }, - { - "name": "Gp0127641_Kraken classification TSV report", - "description": "Kraken classification TSV report for Gp0127641", - "data_object_type": "Kraken2 Taxonomic Classification", - "url": "https://data.microbiomedata.org/data/nmdc:mga0822t33/ReadbasedAnalysis/nmdc_mga0822t33_kraken2_classification.tsv", - "md5_checksum": "869730c4d81163e0c238dd4ae27ebd9e", - "id": "nmdc:869730c4d81163e0c238dd4ae27ebd9e", - "file_size_bytes": 1307934195 - }, - { - "name": "Gp0127641_Kraken2 TSV report", - "description": "Kraken2 TSV report for Gp0127641", - "data_object_type": "Kraken2 Classification Report", - "url": "https://data.microbiomedata.org/data/nmdc:mga0822t33/ReadbasedAnalysis/nmdc_mga0822t33_kraken2_report.tsv", - "md5_checksum": "dc193d1a1693589003f992c820606bab", - "id": "nmdc:dc193d1a1693589003f992c820606bab", - "file_size_bytes": 635050 - }, - { - "name": "Gp0127641_Kraken2 Krona HTML report", - "description": "Kraken2 Krona HTML report for Gp0127641", - "data_object_type": "Kraken2 Krona Plot", - "url": "https://data.microbiomedata.org/data/nmdc:mga0822t33/ReadbasedAnalysis/nmdc_mga0822t33_kraken2_krona.html", - "md5_checksum": "2f36b41c419efa1b1dfb6a9576b965ee", - "id": "nmdc:2f36b41c419efa1b1dfb6a9576b965ee", - "file_size_bytes": 3964515 - }, { "name": "Gp0127641_Assembled contigs fasta", "description": "Assembled contigs fasta for Gp0127641", @@ -24606,144 +20998,40 @@ "collecting_biosamples_from_site_set": [], "date_created": null, "etl_software_version": null, - "pooling_set": [], - "read_based_analysis_activity_set": [ - { - "_id": { - "$oid": "61e7199c833bcf838a700e42" - }, - "has_input": [ - "nmdc:a2700afe93abad6f004a3701348622a2" - ], - "part_of": [ - "nmdc:mga0822t33" - ], - "git_url": "https://github.com/microbiomedata/metaG/releases/tag/0.1", - "has_output": [ - "nmdc:0d021c80bfd39c8293a8b355b8ff3605", - "nmdc:a42312841b816448d8bd5d3adfa65f58", - "nmdc:f473f4a99336a49105d2722888ae0510", - "nmdc:ae51ea50660f44fa3b317a45f3015556", - "nmdc:ef39b44a90c8525e93f45e500b3ae934", - "nmdc:e2653a4ce3f34c235ad7b01e87dd1016", - "nmdc:869730c4d81163e0c238dd4ae27ebd9e", - "nmdc:dc193d1a1693589003f992c820606bab", - "nmdc:2f36b41c419efa1b1dfb6a9576b965ee" - ], - "was_informed_by": "gold:Gp0127641", - "id": "nmdc:363fe7a0dd914e046b274fea70625c52", - "execution_resource": "NERSC-Cori", - "name": "ReadBased Analysis Activity for nmdc:mga0822t33", - "started_at_time": "2021-10-11T02:27:18Z", - "type": "nmdc:ReadbasedAnalysis", - "ended_at_time": "2021-10-11T04:05:47+00:00" - } - ] + "pooling_set": [] }, { "functional_annotation_agg": [], "library_preparation_set": [], "processed_sample_set": [], - "extraction_set": [], - "activity_set": [], - "biosample_set": [], - "data_object_set": [ - { - "name": "Gp0127643_Filtered Reads", - "description": "Filtered Reads for Gp0127643", - "data_object_type": "Filtered Sequencing Reads", - "url": "https://data.microbiomedata.org/data/nmdc:mga0evc178/qa/nmdc_mga0evc178_filtered.fastq.gz", - "md5_checksum": "2ef23543e3064ca73c3034713d87c026", - "id": "nmdc:2ef23543e3064ca73c3034713d87c026", - "file_size_bytes": 1891088172 - }, - { - "name": "Gp0127643_Filtered Stats", - "description": "Filtered Stats for Gp0127643", - "data_object_type": "QC Statistics", - "url": "https://data.microbiomedata.org/data/nmdc:mga0evc178/qa/nmdc_mga0evc178_filterStats.txt", - "md5_checksum": "87b172ead58a37be8d199c0acfc96759", - "id": "nmdc:87b172ead58a37be8d199c0acfc96759", - "file_size_bytes": 289 - }, - { - "name": "Gp0127643_Gottcha2 TSV report", - "description": "Gottcha2 TSV report for Gp0127643", - "url": "https://data.microbiomedata.org/data/nmdc:mga0evc178/ReadbasedAnalysis/nmdc_mga0evc178_gottcha2_report.tsv", - "md5_checksum": "e8f825653e5736e29b73de55bd11a270", - "id": "nmdc:e8f825653e5736e29b73de55bd11a270", - "file_size_bytes": 1326 - }, - { - "name": "Gp0127643_Gottcha2 full TSV report", - "description": "Gottcha2 full TSV report for Gp0127643", - "url": "https://data.microbiomedata.org/data/nmdc:mga0evc178/ReadbasedAnalysis/nmdc_mga0evc178_gottcha2_report_full.tsv", - "md5_checksum": "99bb1311b220e9a03da619fe5fb58f0f", - "id": "nmdc:99bb1311b220e9a03da619fe5fb58f0f", - "file_size_bytes": 664131 - }, - { - "name": "Gp0127643_Gottcha2 Krona HTML report", - "description": "Gottcha2 Krona HTML report for Gp0127643", - "data_object_type": "GOTTCHA2 Krona Plot", - "url": "https://data.microbiomedata.org/data/nmdc:mga0evc178/ReadbasedAnalysis/nmdc_mga0evc178_gottcha2_krona.html", - "md5_checksum": "5c97bc15d4d5999f140664b3b2777c6d", - "id": "nmdc:5c97bc15d4d5999f140664b3b2777c6d", - "file_size_bytes": 229630 - }, - { - "name": "Gp0127643_Centrifuge classification TSV report", - "description": "Centrifuge classification TSV report for Gp0127643", - "data_object_type": "Centrifuge Taxonomic Classification", - "url": "https://data.microbiomedata.org/data/nmdc:mga0evc178/ReadbasedAnalysis/nmdc_mga0evc178_centrifuge_classification.tsv", - "md5_checksum": "c9074b2e05765afd68463dc301b87995", - "id": "nmdc:c9074b2e05765afd68463dc301b87995", - "file_size_bytes": 1726867547 - }, - { - "name": "Gp0127643_Centrifuge TSV report", - "description": "Centrifuge TSV report for Gp0127643", - "data_object_type": "Centrifuge Classification Report", - "url": "https://data.microbiomedata.org/data/nmdc:mga0evc178/ReadbasedAnalysis/nmdc_mga0evc178_centrifuge_report.tsv", - "md5_checksum": "ed2c05d1702a9a811b8a98de748bc82a", - "id": "nmdc:ed2c05d1702a9a811b8a98de748bc82a", - "file_size_bytes": 254021 - }, - { - "name": "Gp0127643_Centrifuge Krona HTML report", - "description": "Centrifuge Krona HTML report for Gp0127643", - "data_object_type": "Centrifuge Krona Plot", - "url": "https://data.microbiomedata.org/data/nmdc:mga0evc178/ReadbasedAnalysis/nmdc_mga0evc178_centrifuge_krona.html", - "md5_checksum": "6465fe59472b111ead1f0414ccf39f62", - "id": "nmdc:6465fe59472b111ead1f0414ccf39f62", - "file_size_bytes": 2331702 - }, + "extraction_set": [], + "activity_set": [], + "biosample_set": [], + "data_object_set": [ { - "name": "Gp0127643_Kraken classification TSV report", - "description": "Kraken classification TSV report for Gp0127643", - "data_object_type": "Kraken2 Taxonomic Classification", - "url": "https://data.microbiomedata.org/data/nmdc:mga0evc178/ReadbasedAnalysis/nmdc_mga0evc178_kraken2_classification.tsv", - "md5_checksum": "9855ca52bce074c34dcebfd154fa94ff", - "id": "nmdc:9855ca52bce074c34dcebfd154fa94ff", - "file_size_bytes": 1376409913 + "description": "Raw sequencer read data", + "file_size_bytes": 2168673471, + "type": "nmdc:DataObject", + "id": "jgi:574fde6c7ded5e3df1ee140c", + "name": "10533.1.165310.TCCGAGT-AACTCGG.fastq.gz" }, { - "name": "Gp0127643_Kraken2 TSV report", - "description": "Kraken2 TSV report for Gp0127643", - "data_object_type": "Kraken2 Classification Report", - "url": "https://data.microbiomedata.org/data/nmdc:mga0evc178/ReadbasedAnalysis/nmdc_mga0evc178_kraken2_report.tsv", - "md5_checksum": "ed8059f366d60112deb41a0c307bc6fc", - "id": "nmdc:ed8059f366d60112deb41a0c307bc6fc", - "file_size_bytes": 640506 + "name": "Gp0127643_Filtered Reads", + "description": "Filtered Reads for Gp0127643", + "data_object_type": "Filtered Sequencing Reads", + "url": "https://data.microbiomedata.org/data/nmdc:mga0evc178/qa/nmdc_mga0evc178_filtered.fastq.gz", + "md5_checksum": "2ef23543e3064ca73c3034713d87c026", + "id": "nmdc:2ef23543e3064ca73c3034713d87c026", + "file_size_bytes": 1891088172 }, { - "name": "Gp0127643_Kraken2 Krona HTML report", - "description": "Kraken2 Krona HTML report for Gp0127643", - "data_object_type": "Kraken2 Krona Plot", - "url": "https://data.microbiomedata.org/data/nmdc:mga0evc178/ReadbasedAnalysis/nmdc_mga0evc178_kraken2_krona.html", - "md5_checksum": "f98bae155bced880c058ecde7d539c18", - "id": "nmdc:f98bae155bced880c058ecde7d539c18", - "file_size_bytes": 3998448 + "name": "Gp0127643_Filtered Stats", + "description": "Filtered Stats for Gp0127643", + "data_object_type": "QC Statistics", + "url": "https://data.microbiomedata.org/data/nmdc:mga0evc178/qa/nmdc_mga0evc178_filterStats.txt", + "md5_checksum": "87b172ead58a37be8d199c0acfc96759", + "id": "nmdc:87b172ead58a37be8d199c0acfc96759", + "file_size_bytes": 289 }, { "name": "Gp0127643_Gottcha2 TSV report", @@ -25269,39 +21557,7 @@ "collecting_biosamples_from_site_set": [], "date_created": null, "etl_software_version": null, - "pooling_set": [], - "read_based_analysis_activity_set": [ - { - "_id": { - "$oid": "61e719b6833bcf838a70116b" - }, - "has_input": [ - "nmdc:2ef23543e3064ca73c3034713d87c026" - ], - "part_of": [ - "nmdc:mga0evc178" - ], - "git_url": "https://github.com/microbiomedata/metaG/releases/tag/0.1", - "has_output": [ - "nmdc:e8f825653e5736e29b73de55bd11a270", - "nmdc:99bb1311b220e9a03da619fe5fb58f0f", - "nmdc:5c97bc15d4d5999f140664b3b2777c6d", - "nmdc:c9074b2e05765afd68463dc301b87995", - "nmdc:ed2c05d1702a9a811b8a98de748bc82a", - "nmdc:6465fe59472b111ead1f0414ccf39f62", - "nmdc:9855ca52bce074c34dcebfd154fa94ff", - "nmdc:ed8059f366d60112deb41a0c307bc6fc", - "nmdc:f98bae155bced880c058ecde7d539c18" - ], - "was_informed_by": "gold:Gp0127643", - "id": "nmdc:30cd614596fd2a4b87220523f8a6ff30", - "execution_resource": "NERSC-Cori", - "name": "ReadBased Analysis Activity for nmdc:mga0evc178", - "started_at_time": "2021-10-11T02:27:00Z", - "type": "nmdc:ReadbasedAnalysis", - "ended_at_time": "2021-10-11T04:04:16+00:00" - } - ] + "pooling_set": [] }, { "functional_annotation_agg": [], @@ -25311,6 +21567,15 @@ "activity_set": [], "biosample_set": [], "data_object_set": [ + { + "description": "Raw sequencer read data", + "data_object_type": "Metagenome Raw Reads", + "url": "https://data.microbiomedata.org/data/raw/10533.3.165334.AGTCTCA-GTGAGAC.fastq.gz", + "file_size_bytes": 939616475, + "type": "nmdc:DataObject", + "id": "jgi:574fe0a87ded5e3df1ee148e", + "name": "10533.3.165334.AGTCTCA-GTGAGAC.fastq.gz" + }, { "name": "Gp0127644_Filtered Reads", "description": "Filtered Reads for Gp0127644", @@ -25408,85 +21673,6 @@ "id": "nmdc:a07c6c5fb68d1a56e39d93e8745b96cb", "file_size_bytes": 3567307 }, - { - "name": "gold:Gp0452677_Gottcha2 TSV report", - "description": "Gottcha2 TSV report for gold:Gp0452677", - "url": "https://data.microbiomedata.org/data/nmdc:mga0fbpz25/ReadbasedAnalysis/nmdc_mga0fbpz25_gottcha2_report.tsv", - "md5_checksum": "dc2e21becda8d6b010a95897cf97ae90", - "id": "nmdc:dc2e21becda8d6b010a95897cf97ae90", - "file_size_bytes": 109 - }, - { - "name": "Gp0127644_Gottcha2 full TSV report", - "description": "Gottcha2 full TSV report for Gp0127644", - "url": "https://data.microbiomedata.org/data/nmdc:mga0bpf635/ReadbasedAnalysis/nmdc_mga0bpf635_gottcha2_report_full.tsv", - "md5_checksum": "0dd334c92557f3a8ac8c78b437c75eaf", - "id": "nmdc:0dd334c92557f3a8ac8c78b437c75eaf", - "file_size_bytes": 426075 - }, - { - "name": "gold:Gp0452677_Gottcha2 Krona HTML report", - "description": "Gottcha2 Krona HTML report for gold:Gp0452677", - "data_object_type": "GOTTCHA2 Krona Plot", - "url": "https://data.microbiomedata.org/data/nmdc:mga0fbpz25/ReadbasedAnalysis/nmdc_mga0fbpz25_gottcha2_krona.html", - "md5_checksum": "425873a08e598b0ca2987ff7b9b5da1f", - "id": "nmdc:425873a08e598b0ca2987ff7b9b5da1f", - "file_size_bytes": 226638 - }, - { - "name": "Gp0127644_Centrifuge classification TSV report", - "description": "Centrifuge classification TSV report for Gp0127644", - "data_object_type": "Centrifuge Taxonomic Classification", - "url": "https://data.microbiomedata.org/data/nmdc:mga0bpf635/ReadbasedAnalysis/nmdc_mga0bpf635_centrifuge_classification.tsv", - "md5_checksum": "b0f2449065b52935ddba8abd6ae6bc88", - "id": "nmdc:b0f2449065b52935ddba8abd6ae6bc88", - "file_size_bytes": 610862986 - }, - { - "name": "Gp0127644_Centrifuge TSV report", - "description": "Centrifuge TSV report for Gp0127644", - "data_object_type": "Centrifuge Classification Report", - "url": "https://data.microbiomedata.org/data/nmdc:mga0bpf635/ReadbasedAnalysis/nmdc_mga0bpf635_centrifuge_report.tsv", - "md5_checksum": "9baa708296f62334e099cf61711b5e16", - "id": "nmdc:9baa708296f62334e099cf61711b5e16", - "file_size_bytes": 243322 - }, - { - "name": "Gp0127644_Centrifuge Krona HTML report", - "description": "Centrifuge Krona HTML report for Gp0127644", - "data_object_type": "Centrifuge Krona Plot", - "url": "https://data.microbiomedata.org/data/nmdc:mga0bpf635/ReadbasedAnalysis/nmdc_mga0bpf635_centrifuge_krona.html", - "md5_checksum": "f2a43278b06876cae5d4e8cdef17cfe1", - "id": "nmdc:f2a43278b06876cae5d4e8cdef17cfe1", - "file_size_bytes": 2294995 - }, - { - "name": "Gp0127644_Kraken classification TSV report", - "description": "Kraken classification TSV report for Gp0127644", - "data_object_type": "Kraken2 Taxonomic Classification", - "url": "https://data.microbiomedata.org/data/nmdc:mga0bpf635/ReadbasedAnalysis/nmdc_mga0bpf635_kraken2_classification.tsv", - "md5_checksum": "f1a811dbc523f9a27dbc004b8a66f0cb", - "id": "nmdc:f1a811dbc523f9a27dbc004b8a66f0cb", - "file_size_bytes": 487178087 - }, - { - "name": "Gp0127644_Kraken2 TSV report", - "description": "Kraken2 TSV report for Gp0127644", - "data_object_type": "Kraken2 Classification Report", - "url": "https://data.microbiomedata.org/data/nmdc:mga0bpf635/ReadbasedAnalysis/nmdc_mga0bpf635_kraken2_report.tsv", - "md5_checksum": "8983fa1acb03f2905bbec3a6ccee2854", - "id": "nmdc:8983fa1acb03f2905bbec3a6ccee2854", - "file_size_bytes": 557688 - }, - { - "name": "Gp0127644_Kraken2 Krona HTML report", - "description": "Kraken2 Krona HTML report for Gp0127644", - "data_object_type": "Kraken2 Krona Plot", - "url": "https://data.microbiomedata.org/data/nmdc:mga0bpf635/ReadbasedAnalysis/nmdc_mga0bpf635_kraken2_krona.html", - "md5_checksum": "a07c6c5fb68d1a56e39d93e8745b96cb", - "id": "nmdc:a07c6c5fb68d1a56e39d93e8745b96cb", - "file_size_bytes": 3567307 - }, { "name": "Gp0127644_Assembled contigs fasta", "description": "Assembled contigs fasta for Gp0127644", @@ -25932,144 +22118,40 @@ "collecting_biosamples_from_site_set": [], "date_created": null, "etl_software_version": null, - "pooling_set": [], - "read_based_analysis_activity_set": [ - { - "_id": { - "$oid": "61e7197c833bcf838a700966" - }, - "has_input": [ - "nmdc:98da35678c59689ce738b2a6bc708692" - ], - "part_of": [ - "nmdc:mga0bpf635" - ], - "git_url": "https://github.com/microbiomedata/metaG/releases/tag/0.1", - "has_output": [ - "nmdc:dc2e21becda8d6b010a95897cf97ae90", - "nmdc:0dd334c92557f3a8ac8c78b437c75eaf", - "nmdc:425873a08e598b0ca2987ff7b9b5da1f", - "nmdc:b0f2449065b52935ddba8abd6ae6bc88", - "nmdc:9baa708296f62334e099cf61711b5e16", - "nmdc:f2a43278b06876cae5d4e8cdef17cfe1", - "nmdc:f1a811dbc523f9a27dbc004b8a66f0cb", - "nmdc:8983fa1acb03f2905bbec3a6ccee2854", - "nmdc:a07c6c5fb68d1a56e39d93e8745b96cb" - ], - "was_informed_by": "gold:Gp0127644", - "id": "nmdc:9990ae6d30bab3988f258ae8224acd89", - "execution_resource": "NERSC-Cori", - "name": "ReadBased Analysis Activity for nmdc:mga0bpf635", - "started_at_time": "2021-10-11T02:26:47Z", - "type": "nmdc:ReadbasedAnalysis", - "ended_at_time": "2021-10-11T02:55:00+00:00" - } - ] + "pooling_set": [] }, - { - "functional_annotation_agg": [], - "library_preparation_set": [], - "processed_sample_set": [], - "extraction_set": [], - "activity_set": [], - "biosample_set": [], - "data_object_set": [ - { - "name": "Gp0127639_Filtered Reads", - "description": "Filtered Reads for Gp0127639", - "data_object_type": "Filtered Sequencing Reads", - "url": "https://data.microbiomedata.org/data/nmdc:mga09wpw60/qa/nmdc_mga09wpw60_filtered.fastq.gz", - "md5_checksum": "833077b40372c6daa20beaed04ed0ae1", - "id": "nmdc:833077b40372c6daa20beaed04ed0ae1", - "file_size_bytes": 1585232805 - }, - { - "name": "Gp0127639_Filtered Stats", - "description": "Filtered Stats for Gp0127639", - "data_object_type": "QC Statistics", - "url": "https://data.microbiomedata.org/data/nmdc:mga09wpw60/qa/nmdc_mga09wpw60_filterStats.txt", - "md5_checksum": "b68178eebde030fad0850797adbb2624", - "id": "nmdc:b68178eebde030fad0850797adbb2624", - "file_size_bytes": 289 - }, - { - "name": "Gp0127639_Gottcha2 TSV report", - "description": "Gottcha2 TSV report for Gp0127639", - "url": "https://data.microbiomedata.org/data/nmdc:mga09wpw60/ReadbasedAnalysis/nmdc_mga09wpw60_gottcha2_report.tsv", - "md5_checksum": "514172bb91ef3b125ae2d001b47bff0b", - "id": "nmdc:514172bb91ef3b125ae2d001b47bff0b", - "file_size_bytes": 648 - }, - { - "name": "Gp0127639_Gottcha2 full TSV report", - "description": "Gottcha2 full TSV report for Gp0127639", - "url": "https://data.microbiomedata.org/data/nmdc:mga09wpw60/ReadbasedAnalysis/nmdc_mga09wpw60_gottcha2_report_full.tsv", - "md5_checksum": "82f072d1931154fbc722531d3d0dc41c", - "id": "nmdc:82f072d1931154fbc722531d3d0dc41c", - "file_size_bytes": 588644 - }, - { - "name": "Gp0127639_Gottcha2 Krona HTML report", - "description": "Gottcha2 Krona HTML report for Gp0127639", - "data_object_type": "GOTTCHA2 Krona Plot", - "url": "https://data.microbiomedata.org/data/nmdc:mga09wpw60/ReadbasedAnalysis/nmdc_mga09wpw60_gottcha2_krona.html", - "md5_checksum": "62a817ebcbfaf2c8feb1abedc35a736f", - "id": "nmdc:62a817ebcbfaf2c8feb1abedc35a736f", - "file_size_bytes": 228175 - }, - { - "name": "Gp0127639_Centrifuge classification TSV report", - "description": "Centrifuge classification TSV report for Gp0127639", - "data_object_type": "Centrifuge Taxonomic Classification", - "url": "https://data.microbiomedata.org/data/nmdc:mga09wpw60/ReadbasedAnalysis/nmdc_mga09wpw60_centrifuge_classification.tsv", - "md5_checksum": "81281fef2c0778516a84b3a672cc0230", - "id": "nmdc:81281fef2c0778516a84b3a672cc0230", - "file_size_bytes": 1468498728 - }, - { - "name": "Gp0127639_Centrifuge TSV report", - "description": "Centrifuge TSV report for Gp0127639", - "data_object_type": "Centrifuge Classification Report", - "url": "https://data.microbiomedata.org/data/nmdc:mga09wpw60/ReadbasedAnalysis/nmdc_mga09wpw60_centrifuge_report.tsv", - "md5_checksum": "86ae054ba9def1126579c8f76db8a07a", - "id": "nmdc:86ae054ba9def1126579c8f76db8a07a", - "file_size_bytes": 251338 - }, - { - "name": "Gp0127639_Centrifuge Krona HTML report", - "description": "Centrifuge Krona HTML report for Gp0127639", - "data_object_type": "Centrifuge Krona Plot", - "url": "https://data.microbiomedata.org/data/nmdc:mga09wpw60/ReadbasedAnalysis/nmdc_mga09wpw60_centrifuge_krona.html", - "md5_checksum": "9db20a88fa3d02eb00f64d1671ef8521", - "id": "nmdc:9db20a88fa3d02eb00f64d1671ef8521", - "file_size_bytes": 2322720 - }, + { + "functional_annotation_agg": [], + "library_preparation_set": [], + "processed_sample_set": [], + "extraction_set": [], + "activity_set": [], + "biosample_set": [], + "data_object_set": [ { - "name": "Gp0127639_Kraken classification TSV report", - "description": "Kraken classification TSV report for Gp0127639", - "data_object_type": "Kraken2 Taxonomic Classification", - "url": "https://data.microbiomedata.org/data/nmdc:mga09wpw60/ReadbasedAnalysis/nmdc_mga09wpw60_kraken2_classification.tsv", - "md5_checksum": "848fc10ed4365047cb139a4b40303808", - "id": "nmdc:848fc10ed4365047cb139a4b40303808", - "file_size_bytes": 1168015909 + "description": "Raw sequencer read data", + "file_size_bytes": 1941323184, + "type": "nmdc:DataObject", + "id": "jgi:574fde667ded5e3df1ee1407", + "name": "10533.1.165310.TGTGCGT-AACGCAC.fastq.gz" }, { - "name": "Gp0127639_Kraken2 TSV report", - "description": "Kraken2 TSV report for Gp0127639", - "data_object_type": "Kraken2 Classification Report", - "url": "https://data.microbiomedata.org/data/nmdc:mga09wpw60/ReadbasedAnalysis/nmdc_mga09wpw60_kraken2_report.tsv", - "md5_checksum": "94e422e0bae86c608fba1c3815e08e92", - "id": "nmdc:94e422e0bae86c608fba1c3815e08e92", - "file_size_bytes": 616202 + "name": "Gp0127639_Filtered Reads", + "description": "Filtered Reads for Gp0127639", + "data_object_type": "Filtered Sequencing Reads", + "url": "https://data.microbiomedata.org/data/nmdc:mga09wpw60/qa/nmdc_mga09wpw60_filtered.fastq.gz", + "md5_checksum": "833077b40372c6daa20beaed04ed0ae1", + "id": "nmdc:833077b40372c6daa20beaed04ed0ae1", + "file_size_bytes": 1585232805 }, { - "name": "Gp0127639_Kraken2 Krona HTML report", - "description": "Kraken2 Krona HTML report for Gp0127639", - "data_object_type": "Kraken2 Krona Plot", - "url": "https://data.microbiomedata.org/data/nmdc:mga09wpw60/ReadbasedAnalysis/nmdc_mga09wpw60_kraken2_krona.html", - "md5_checksum": "c6eb85143a2489921c53f8184d536129", - "id": "nmdc:c6eb85143a2489921c53f8184d536129", - "file_size_bytes": 3863456 + "name": "Gp0127639_Filtered Stats", + "description": "Filtered Stats for Gp0127639", + "data_object_type": "QC Statistics", + "url": "https://data.microbiomedata.org/data/nmdc:mga09wpw60/qa/nmdc_mga09wpw60_filterStats.txt", + "md5_checksum": "b68178eebde030fad0850797adbb2624", + "id": "nmdc:b68178eebde030fad0850797adbb2624", + "file_size_bytes": 289 }, { "name": "Gp0127639_Gottcha2 TSV report", @@ -26690,39 +22772,7 @@ "collecting_biosamples_from_site_set": [], "date_created": null, "etl_software_version": null, - "pooling_set": [], - "read_based_analysis_activity_set": [ - { - "_id": { - "$oid": "61e7199a833bcf838a700d65" - }, - "has_input": [ - "nmdc:833077b40372c6daa20beaed04ed0ae1" - ], - "part_of": [ - "nmdc:mga09wpw60" - ], - "git_url": "https://github.com/microbiomedata/metaG/releases/tag/0.1", - "has_output": [ - "nmdc:514172bb91ef3b125ae2d001b47bff0b", - "nmdc:82f072d1931154fbc722531d3d0dc41c", - "nmdc:62a817ebcbfaf2c8feb1abedc35a736f", - "nmdc:81281fef2c0778516a84b3a672cc0230", - "nmdc:86ae054ba9def1126579c8f76db8a07a", - "nmdc:9db20a88fa3d02eb00f64d1671ef8521", - "nmdc:848fc10ed4365047cb139a4b40303808", - "nmdc:94e422e0bae86c608fba1c3815e08e92", - "nmdc:c6eb85143a2489921c53f8184d536129" - ], - "was_informed_by": "gold:Gp0127639", - "id": "nmdc:b8fea91a695311b43660f2e7044ad15c", - "execution_resource": "NERSC-Cori", - "name": "ReadBased Analysis Activity for nmdc:mga09wpw60", - "started_at_time": "2021-10-11T02:27:08Z", - "type": "nmdc:ReadbasedAnalysis", - "ended_at_time": "2021-10-11T03:27:12+00:00" - } - ] + "pooling_set": [] }, { "functional_annotation_agg": [], @@ -26732,6 +22782,13 @@ "activity_set": [], "biosample_set": [], "data_object_set": [ + { + "description": "Raw sequencer read data", + "file_size_bytes": 2500707412, + "type": "nmdc:DataObject", + "id": "jgi:574fe0a67ded5e3df1ee148d", + "name": "10533.3.165334.CGCTTAA-GTTAAGC.fastq.gz" + }, { "name": "Gp0127642_Filtered Reads", "description": "Filtered Reads for Gp0127642", @@ -26829,85 +22886,6 @@ "id": "nmdc:bca8c2988929e7c176ec7b6609445db2", "file_size_bytes": 4013188 }, - { - "name": "Gp0127642_Gottcha2 TSV report", - "description": "Gottcha2 TSV report for Gp0127642", - "url": "https://data.microbiomedata.org/data/nmdc:mga0cvxk30/ReadbasedAnalysis/nmdc_mga0cvxk30_gottcha2_report.tsv", - "md5_checksum": "bc7f6a9435c3a9aaca7ce9efe9d16e41", - "id": "nmdc:bc7f6a9435c3a9aaca7ce9efe9d16e41", - "file_size_bytes": 5303 - }, - { - "name": "Gp0127642_Gottcha2 full TSV report", - "description": "Gottcha2 full TSV report for Gp0127642", - "url": "https://data.microbiomedata.org/data/nmdc:mga0cvxk30/ReadbasedAnalysis/nmdc_mga0cvxk30_gottcha2_report_full.tsv", - "md5_checksum": "0a079e34648ce23b0837dff31e2be5df", - "id": "nmdc:0a079e34648ce23b0837dff31e2be5df", - "file_size_bytes": 948120 - }, - { - "name": "Gp0127642_Gottcha2 Krona HTML report", - "description": "Gottcha2 Krona HTML report for Gp0127642", - "data_object_type": "GOTTCHA2 Krona Plot", - "url": "https://data.microbiomedata.org/data/nmdc:mga0cvxk30/ReadbasedAnalysis/nmdc_mga0cvxk30_gottcha2_krona.html", - "md5_checksum": "f19bf1723f0f0e9f2158b137d2618b08", - "id": "nmdc:f19bf1723f0f0e9f2158b137d2618b08", - "file_size_bytes": 241990 - }, - { - "name": "Gp0127642_Centrifuge classification TSV report", - "description": "Centrifuge classification TSV report for Gp0127642", - "data_object_type": "Centrifuge Taxonomic Classification", - "url": "https://data.microbiomedata.org/data/nmdc:mga0cvxk30/ReadbasedAnalysis/nmdc_mga0cvxk30_centrifuge_classification.tsv", - "md5_checksum": "81fc62d01a53a7ab5037829a158f0b64", - "id": "nmdc:81fc62d01a53a7ab5037829a158f0b64", - "file_size_bytes": 2023464022 - }, - { - "name": "Gp0127642_Centrifuge TSV report", - "description": "Centrifuge TSV report for Gp0127642", - "data_object_type": "Centrifuge Classification Report", - "url": "https://data.microbiomedata.org/data/nmdc:mga0cvxk30/ReadbasedAnalysis/nmdc_mga0cvxk30_centrifuge_report.tsv", - "md5_checksum": "05cc05eefdcb0d7bac19031619244a4b", - "id": "nmdc:05cc05eefdcb0d7bac19031619244a4b", - "file_size_bytes": 257700 - }, - { - "name": "Gp0127642_Centrifuge Krona HTML report", - "description": "Centrifuge Krona HTML report for Gp0127642", - "data_object_type": "Centrifuge Krona Plot", - "url": "https://data.microbiomedata.org/data/nmdc:mga0cvxk30/ReadbasedAnalysis/nmdc_mga0cvxk30_centrifuge_krona.html", - "md5_checksum": "bb92f0d18280f32aacf482a43a841372", - "id": "nmdc:bb92f0d18280f32aacf482a43a841372", - "file_size_bytes": 2339227 - }, - { - "name": "Gp0127642_Kraken classification TSV report", - "description": "Kraken classification TSV report for Gp0127642", - "data_object_type": "Kraken2 Taxonomic Classification", - "url": "https://data.microbiomedata.org/data/nmdc:mga0cvxk30/ReadbasedAnalysis/nmdc_mga0cvxk30_kraken2_classification.tsv", - "md5_checksum": "2fddd33160498548fa73e95dfc304d1a", - "id": "nmdc:2fddd33160498548fa73e95dfc304d1a", - "file_size_bytes": 1630988221 - }, - { - "name": "Gp0127642_Kraken2 TSV report", - "description": "Kraken2 TSV report for Gp0127642", - "data_object_type": "Kraken2 Classification Report", - "url": "https://data.microbiomedata.org/data/nmdc:mga0cvxk30/ReadbasedAnalysis/nmdc_mga0cvxk30_kraken2_report.tsv", - "md5_checksum": "272e3daee292c6e284026ee95b72d290", - "id": "nmdc:272e3daee292c6e284026ee95b72d290", - "file_size_bytes": 659136 - }, - { - "name": "Gp0127642_Kraken2 Krona HTML report", - "description": "Kraken2 Krona HTML report for Gp0127642", - "data_object_type": "Kraken2 Krona Plot", - "url": "https://data.microbiomedata.org/data/nmdc:mga0cvxk30/ReadbasedAnalysis/nmdc_mga0cvxk30_kraken2_krona.html", - "md5_checksum": "bca8c2988929e7c176ec7b6609445db2", - "id": "nmdc:bca8c2988929e7c176ec7b6609445db2", - "file_size_bytes": 4013188 - }, { "name": "Gp0127642_Assembled contigs fasta", "description": "Assembled contigs fasta for Gp0127642", @@ -27316,144 +23294,40 @@ "collecting_biosamples_from_site_set": [], "date_created": null, "etl_software_version": null, - "pooling_set": [], - "read_based_analysis_activity_set": [ - { - "_id": { - "$oid": "61e7199f833bcf838a700f38" - }, - "has_input": [ - "nmdc:603166d1e0da357d356a2029215d76ea" - ], - "part_of": [ - "nmdc:mga0cvxk30" - ], - "git_url": "https://github.com/microbiomedata/metaG/releases/tag/0.1", - "has_output": [ - "nmdc:bc7f6a9435c3a9aaca7ce9efe9d16e41", - "nmdc:0a079e34648ce23b0837dff31e2be5df", - "nmdc:f19bf1723f0f0e9f2158b137d2618b08", - "nmdc:81fc62d01a53a7ab5037829a158f0b64", - "nmdc:05cc05eefdcb0d7bac19031619244a4b", - "nmdc:bb92f0d18280f32aacf482a43a841372", - "nmdc:2fddd33160498548fa73e95dfc304d1a", - "nmdc:272e3daee292c6e284026ee95b72d290", - "nmdc:bca8c2988929e7c176ec7b6609445db2" - ], - "was_informed_by": "gold:Gp0127642", - "id": "nmdc:2de70a234a7c626b9f512b8aa3b73717", - "execution_resource": "NERSC-Cori", - "name": "ReadBased Analysis Activity for nmdc:mga0cvxk30", - "started_at_time": "2021-12-01T21:30:33Z", - "type": "nmdc:ReadbasedAnalysis", - "ended_at_time": "2021-12-02T20:50:24+00:00" - } - ] + "pooling_set": [] }, { "functional_annotation_agg": [], "library_preparation_set": [], "processed_sample_set": [], - "extraction_set": [], - "activity_set": [], - "biosample_set": [], - "data_object_set": [ - { - "name": "Gp0127646_Filtered Reads", - "description": "Filtered Reads for Gp0127646", - "data_object_type": "Filtered Sequencing Reads", - "url": "https://data.microbiomedata.org/data/nmdc:mga0dm4q17/qa/nmdc_mga0dm4q17_filtered.fastq.gz", - "md5_checksum": "208a3777ef0b99408f0d5832dee576e0", - "id": "nmdc:208a3777ef0b99408f0d5832dee576e0", - "file_size_bytes": 2209739723 - }, - { - "name": "Gp0127646_Filtered Stats", - "description": "Filtered Stats for Gp0127646", - "data_object_type": "QC Statistics", - "url": "https://data.microbiomedata.org/data/nmdc:mga0dm4q17/qa/nmdc_mga0dm4q17_filterStats.txt", - "md5_checksum": "8533a56006bdc1841b6fc16e99b6a84a", - "id": "nmdc:8533a56006bdc1841b6fc16e99b6a84a", - "file_size_bytes": 291 - }, - { - "name": "Gp0127646_Gottcha2 TSV report", - "description": "Gottcha2 TSV report for Gp0127646", - "url": "https://data.microbiomedata.org/data/nmdc:mga0dm4q17/ReadbasedAnalysis/nmdc_mga0dm4q17_gottcha2_report.tsv", - "md5_checksum": "3e0598df41941463bac0fdec5df29f55", - "id": "nmdc:3e0598df41941463bac0fdec5df29f55", - "file_size_bytes": 4650 - }, - { - "name": "Gp0127646_Gottcha2 full TSV report", - "description": "Gottcha2 full TSV report for Gp0127646", - "url": "https://data.microbiomedata.org/data/nmdc:mga0dm4q17/ReadbasedAnalysis/nmdc_mga0dm4q17_gottcha2_report_full.tsv", - "md5_checksum": "1a625b148d8f6d9fe9aeab6cfb67df6c", - "id": "nmdc:1a625b148d8f6d9fe9aeab6cfb67df6c", - "file_size_bytes": 877659 - }, - { - "name": "Gp0127646_Gottcha2 Krona HTML report", - "description": "Gottcha2 Krona HTML report for Gp0127646", - "data_object_type": "GOTTCHA2 Krona Plot", - "url": "https://data.microbiomedata.org/data/nmdc:mga0dm4q17/ReadbasedAnalysis/nmdc_mga0dm4q17_gottcha2_krona.html", - "md5_checksum": "bc8e157195d042d7207d67b4982fea96", - "id": "nmdc:bc8e157195d042d7207d67b4982fea96", - "file_size_bytes": 236676 - }, - { - "name": "Gp0127646_Centrifuge classification TSV report", - "description": "Centrifuge classification TSV report for Gp0127646", - "data_object_type": "Centrifuge Taxonomic Classification", - "url": "https://data.microbiomedata.org/data/nmdc:mga0dm4q17/ReadbasedAnalysis/nmdc_mga0dm4q17_centrifuge_classification.tsv", - "md5_checksum": "a8fc683bb9b3aba316cb605c5fb591ec", - "id": "nmdc:a8fc683bb9b3aba316cb605c5fb591ec", - "file_size_bytes": 1901493736 - }, - { - "name": "Gp0127646_Centrifuge TSV report", - "description": "Centrifuge TSV report for Gp0127646", - "data_object_type": "Centrifuge Classification Report", - "url": "https://data.microbiomedata.org/data/nmdc:mga0dm4q17/ReadbasedAnalysis/nmdc_mga0dm4q17_centrifuge_report.tsv", - "md5_checksum": "b5fe0189dbf00662d78cc55b8b0cc803", - "id": "nmdc:b5fe0189dbf00662d78cc55b8b0cc803", - "file_size_bytes": 256274 - }, - { - "name": "Gp0127646_Centrifuge Krona HTML report", - "description": "Centrifuge Krona HTML report for Gp0127646", - "data_object_type": "Centrifuge Krona Plot", - "url": "https://data.microbiomedata.org/data/nmdc:mga0dm4q17/ReadbasedAnalysis/nmdc_mga0dm4q17_centrifuge_krona.html", - "md5_checksum": "cd10cca62774e66f60d60380ee18132e", - "id": "nmdc:cd10cca62774e66f60d60380ee18132e", - "file_size_bytes": 2333722 - }, + "extraction_set": [], + "activity_set": [], + "biosample_set": [], + "data_object_set": [ { - "name": "Gp0127646_Kraken classification TSV report", - "description": "Kraken classification TSV report for Gp0127646", - "data_object_type": "Kraken2 Taxonomic Classification", - "url": "https://data.microbiomedata.org/data/nmdc:mga0dm4q17/ReadbasedAnalysis/nmdc_mga0dm4q17_kraken2_classification.tsv", - "md5_checksum": "b13ee2ee52d15c3669aecd2e913f2658", - "id": "nmdc:b13ee2ee52d15c3669aecd2e913f2658", - "file_size_bytes": 1534616616 + "description": "Raw sequencer read data", + "file_size_bytes": 2463257736, + "type": "nmdc:DataObject", + "id": "jgi:574fde867ded5e3df1ee1420", + "name": "10533.2.165322.TTCGTAC-GGTACGA.fastq.gz" }, { - "name": "Gp0127646_Kraken2 TSV report", - "description": "Kraken2 TSV report for Gp0127646", - "data_object_type": "Kraken2 Classification Report", - "url": "https://data.microbiomedata.org/data/nmdc:mga0dm4q17/ReadbasedAnalysis/nmdc_mga0dm4q17_kraken2_report.tsv", - "md5_checksum": "09a2d722810b3d90207bc4cfa626133b", - "id": "nmdc:09a2d722810b3d90207bc4cfa626133b", - "file_size_bytes": 663507 + "name": "Gp0127646_Filtered Reads", + "description": "Filtered Reads for Gp0127646", + "data_object_type": "Filtered Sequencing Reads", + "url": "https://data.microbiomedata.org/data/nmdc:mga0dm4q17/qa/nmdc_mga0dm4q17_filtered.fastq.gz", + "md5_checksum": "208a3777ef0b99408f0d5832dee576e0", + "id": "nmdc:208a3777ef0b99408f0d5832dee576e0", + "file_size_bytes": 2209739723 }, { - "name": "Gp0127646_Kraken2 Krona HTML report", - "description": "Kraken2 Krona HTML report for Gp0127646", - "data_object_type": "Kraken2 Krona Plot", - "url": "https://data.microbiomedata.org/data/nmdc:mga0dm4q17/ReadbasedAnalysis/nmdc_mga0dm4q17_kraken2_krona.html", - "md5_checksum": "c3a8d9f48266a43ad74fc581132e2bba", - "id": "nmdc:c3a8d9f48266a43ad74fc581132e2bba", - "file_size_bytes": 4031909 + "name": "Gp0127646_Filtered Stats", + "description": "Filtered Stats for Gp0127646", + "data_object_type": "QC Statistics", + "url": "https://data.microbiomedata.org/data/nmdc:mga0dm4q17/qa/nmdc_mga0dm4q17_filterStats.txt", + "md5_checksum": "8533a56006bdc1841b6fc16e99b6a84a", + "id": "nmdc:8533a56006bdc1841b6fc16e99b6a84a", + "file_size_bytes": 291 }, { "name": "Gp0127646_Gottcha2 TSV report", @@ -27982,39 +23856,7 @@ "collecting_biosamples_from_site_set": [], "date_created": null, "etl_software_version": null, - "pooling_set": [], - "read_based_analysis_activity_set": [ - { - "_id": { - "$oid": "61e7197d833bcf838a7009e9" - }, - "has_input": [ - "nmdc:208a3777ef0b99408f0d5832dee576e0" - ], - "part_of": [ - "nmdc:mga0dm4q17" - ], - "git_url": "https://github.com/microbiomedata/metaG/releases/tag/0.1", - "has_output": [ - "nmdc:3e0598df41941463bac0fdec5df29f55", - "nmdc:1a625b148d8f6d9fe9aeab6cfb67df6c", - "nmdc:bc8e157195d042d7207d67b4982fea96", - "nmdc:a8fc683bb9b3aba316cb605c5fb591ec", - "nmdc:b5fe0189dbf00662d78cc55b8b0cc803", - "nmdc:cd10cca62774e66f60d60380ee18132e", - "nmdc:b13ee2ee52d15c3669aecd2e913f2658", - "nmdc:09a2d722810b3d90207bc4cfa626133b", - "nmdc:c3a8d9f48266a43ad74fc581132e2bba" - ], - "was_informed_by": "gold:Gp0127646", - "id": "nmdc:1a0a2a1b5db9455e9dbbacf102059180", - "execution_resource": "NERSC-Cori", - "name": "ReadBased Analysis Activity for nmdc:mga0dm4q17", - "started_at_time": "2021-10-11T02:23:42Z", - "type": "nmdc:ReadbasedAnalysis", - "ended_at_time": "2021-10-11T04:05:12+00:00" - } - ] + "pooling_set": [] }, { "functional_annotation_agg": [], @@ -28024,6 +23866,13 @@ "activity_set": [], "biosample_set": [], "data_object_set": [ + { + "description": "Raw sequencer read data", + "file_size_bytes": 2446032142, + "type": "nmdc:DataObject", + "id": "jgi:574fe0ac7ded5e3df1ee1491", + "name": "10533.3.165334.GTAACGA-GTCGTTA.fastq.gz" + }, { "name": "Gp0127648_Filtered Reads", "description": "Filtered Reads for Gp0127648", @@ -28121,85 +23970,6 @@ "id": "nmdc:b549d169e5b0693152555373a6d8ee75", "file_size_bytes": 3952548 }, - { - "name": "Gp0127648_Gottcha2 TSV report", - "description": "Gottcha2 TSV report for Gp0127648", - "url": "https://data.microbiomedata.org/data/nmdc:mga0andh11/ReadbasedAnalysis/nmdc_mga0andh11_gottcha2_report.tsv", - "md5_checksum": "5e64b9ccf92f0c974c51bd8393dea50c", - "id": "nmdc:5e64b9ccf92f0c974c51bd8393dea50c", - "file_size_bytes": 3323 - }, - { - "name": "Gp0127648_Gottcha2 full TSV report", - "description": "Gottcha2 full TSV report for Gp0127648", - "url": "https://data.microbiomedata.org/data/nmdc:mga0andh11/ReadbasedAnalysis/nmdc_mga0andh11_gottcha2_report_full.tsv", - "md5_checksum": "1357df297d8d8a872b335e0c3222d102", - "id": "nmdc:1357df297d8d8a872b335e0c3222d102", - "file_size_bytes": 782039 - }, - { - "name": "Gp0127648_Gottcha2 Krona HTML report", - "description": "Gottcha2 Krona HTML report for Gp0127648", - "data_object_type": "GOTTCHA2 Krona Plot", - "url": "https://data.microbiomedata.org/data/nmdc:mga0andh11/ReadbasedAnalysis/nmdc_mga0andh11_gottcha2_krona.html", - "md5_checksum": "5b510e336e60b6120b43e9b6420a074e", - "id": "nmdc:5b510e336e60b6120b43e9b6420a074e", - "file_size_bytes": 236971 - }, - { - "name": "Gp0127648_Centrifuge classification TSV report", - "description": "Centrifuge classification TSV report for Gp0127648", - "data_object_type": "Centrifuge Taxonomic Classification", - "url": "https://data.microbiomedata.org/data/nmdc:mga0andh11/ReadbasedAnalysis/nmdc_mga0andh11_centrifuge_classification.tsv", - "md5_checksum": "33bf814280051c220e0c4a06f7935728", - "id": "nmdc:33bf814280051c220e0c4a06f7935728", - "file_size_bytes": 1945479328 - }, - { - "name": "Gp0127648_Centrifuge TSV report", - "description": "Centrifuge TSV report for Gp0127648", - "data_object_type": "Centrifuge Classification Report", - "url": "https://data.microbiomedata.org/data/nmdc:mga0andh11/ReadbasedAnalysis/nmdc_mga0andh11_centrifuge_report.tsv", - "md5_checksum": "e77a1d052b0d2a99e0a1df3b3c038f7c", - "id": "nmdc:e77a1d052b0d2a99e0a1df3b3c038f7c", - "file_size_bytes": 255338 - }, - { - "name": "Gp0127648_Centrifuge Krona HTML report", - "description": "Centrifuge Krona HTML report for Gp0127648", - "data_object_type": "Centrifuge Krona Plot", - "url": "https://data.microbiomedata.org/data/nmdc:mga0andh11/ReadbasedAnalysis/nmdc_mga0andh11_centrifuge_krona.html", - "md5_checksum": "0efb0ad19234056d7e2e3726dead3622", - "id": "nmdc:0efb0ad19234056d7e2e3726dead3622", - "file_size_bytes": 2333371 - }, - { - "name": "Gp0127648_Kraken classification TSV report", - "description": "Kraken classification TSV report for Gp0127648", - "data_object_type": "Kraken2 Taxonomic Classification", - "url": "https://data.microbiomedata.org/data/nmdc:mga0andh11/ReadbasedAnalysis/nmdc_mga0andh11_kraken2_classification.tsv", - "md5_checksum": "222bac312efdd6c86d2475ad224b7907", - "id": "nmdc:222bac312efdd6c86d2475ad224b7907", - "file_size_bytes": 1562011343 - }, - { - "name": "Gp0127648_Kraken2 TSV report", - "description": "Kraken2 TSV report for Gp0127648", - "data_object_type": "Kraken2 Classification Report", - "url": "https://data.microbiomedata.org/data/nmdc:mga0andh11/ReadbasedAnalysis/nmdc_mga0andh11_kraken2_report.tsv", - "md5_checksum": "baaca868b1fed932b463e489708dd741", - "id": "nmdc:baaca868b1fed932b463e489708dd741", - "file_size_bytes": 647859 - }, - { - "name": "Gp0127648_Kraken2 Krona HTML report", - "description": "Kraken2 Krona HTML report for Gp0127648", - "data_object_type": "Kraken2 Krona Plot", - "url": "https://data.microbiomedata.org/data/nmdc:mga0andh11/ReadbasedAnalysis/nmdc_mga0andh11_kraken2_krona.html", - "md5_checksum": "b549d169e5b0693152555373a6d8ee75", - "id": "nmdc:b549d169e5b0693152555373a6d8ee75", - "file_size_bytes": 3952548 - }, { "name": "Gp0127648_Assembled contigs fasta", "description": "Assembled contigs fasta for Gp0127648", @@ -28664,144 +24434,40 @@ "collecting_biosamples_from_site_set": [], "date_created": null, "etl_software_version": null, - "pooling_set": [], - "read_based_analysis_activity_set": [ - { - "_id": { - "$oid": "61e7199b833bcf838a700dd2" - }, - "has_input": [ - "nmdc:fcc3a92dd2b6ab6045f4be27da6f2cdd" - ], - "part_of": [ - "nmdc:mga0andh11" - ], - "git_url": "https://github.com/microbiomedata/metaG/releases/tag/0.1", - "has_output": [ - "nmdc:5e64b9ccf92f0c974c51bd8393dea50c", - "nmdc:1357df297d8d8a872b335e0c3222d102", - "nmdc:5b510e336e60b6120b43e9b6420a074e", - "nmdc:33bf814280051c220e0c4a06f7935728", - "nmdc:e77a1d052b0d2a99e0a1df3b3c038f7c", - "nmdc:0efb0ad19234056d7e2e3726dead3622", - "nmdc:222bac312efdd6c86d2475ad224b7907", - "nmdc:baaca868b1fed932b463e489708dd741", - "nmdc:b549d169e5b0693152555373a6d8ee75" - ], - "was_informed_by": "gold:Gp0127648", - "id": "nmdc:cb02cfd1451743d94381a247cf0a9d65", - "execution_resource": "NERSC-Cori", - "name": "ReadBased Analysis Activity for nmdc:mga0andh11", - "started_at_time": "2021-10-11T02:23:29Z", - "type": "nmdc:ReadbasedAnalysis", - "ended_at_time": "2021-10-11T04:13:04+00:00" - } - ] + "pooling_set": [] }, - { - "functional_annotation_agg": [], - "library_preparation_set": [], - "processed_sample_set": [], - "extraction_set": [], - "activity_set": [], - "biosample_set": [], - "data_object_set": [ - { - "name": "Gp0127647_Filtered Reads", - "description": "Filtered Reads for Gp0127647", - "data_object_type": "Filtered Sequencing Reads", - "url": "https://data.microbiomedata.org/data/nmdc:mga0g0e588/qa/nmdc_mga0g0e588_filtered.fastq.gz", - "md5_checksum": "c082eff434fe4863c0e29c79b759d100", - "id": "nmdc:c082eff434fe4863c0e29c79b759d100", - "file_size_bytes": 2052448806 - }, - { - "name": "Gp0127647_Filtered Stats", - "description": "Filtered Stats for Gp0127647", - "data_object_type": "QC Statistics", - "url": "https://data.microbiomedata.org/data/nmdc:mga0g0e588/qa/nmdc_mga0g0e588_filterStats.txt", - "md5_checksum": "7f204d0d1d45e77b39d9c9b2362c6b0b", - "id": "nmdc:7f204d0d1d45e77b39d9c9b2362c6b0b", - "file_size_bytes": 282 - }, - { - "name": "Gp0127647_Gottcha2 TSV report", - "description": "Gottcha2 TSV report for Gp0127647", - "url": "https://data.microbiomedata.org/data/nmdc:mga0g0e588/ReadbasedAnalysis/nmdc_mga0g0e588_gottcha2_report.tsv", - "md5_checksum": "7e1438bf8076daf46f3d782d8f9656b4", - "id": "nmdc:7e1438bf8076daf46f3d782d8f9656b4", - "file_size_bytes": 4666 - }, - { - "name": "Gp0127647_Gottcha2 full TSV report", - "description": "Gottcha2 full TSV report for Gp0127647", - "url": "https://data.microbiomedata.org/data/nmdc:mga0g0e588/ReadbasedAnalysis/nmdc_mga0g0e588_gottcha2_report_full.tsv", - "md5_checksum": "cfd63309cd38a293615ddce5e8ea6402", - "id": "nmdc:cfd63309cd38a293615ddce5e8ea6402", - "file_size_bytes": 786018 - }, - { - "name": "Gp0127647_Gottcha2 Krona HTML report", - "description": "Gottcha2 Krona HTML report for Gp0127647", - "data_object_type": "GOTTCHA2 Krona Plot", - "url": "https://data.microbiomedata.org/data/nmdc:mga0g0e588/ReadbasedAnalysis/nmdc_mga0g0e588_gottcha2_krona.html", - "md5_checksum": "7e353b7bfb1586773fa00b515dffe6ec", - "id": "nmdc:7e353b7bfb1586773fa00b515dffe6ec", - "file_size_bytes": 237895 - }, - { - "name": "Gp0127647_Centrifuge classification TSV report", - "description": "Centrifuge classification TSV report for Gp0127647", - "data_object_type": "Centrifuge Taxonomic Classification", - "url": "https://data.microbiomedata.org/data/nmdc:mga0g0e588/ReadbasedAnalysis/nmdc_mga0g0e588_centrifuge_classification.tsv", - "md5_checksum": "6667be33e7867ca2aabfa5d663e2970a", - "id": "nmdc:6667be33e7867ca2aabfa5d663e2970a", - "file_size_bytes": 1767305277 - }, - { - "name": "Gp0127647_Centrifuge TSV report", - "description": "Centrifuge TSV report for Gp0127647", - "data_object_type": "Centrifuge Classification Report", - "url": "https://data.microbiomedata.org/data/nmdc:mga0g0e588/ReadbasedAnalysis/nmdc_mga0g0e588_centrifuge_report.tsv", - "md5_checksum": "7ee0b0b21444ee06752e6b9c32f476af", - "id": "nmdc:7ee0b0b21444ee06752e6b9c32f476af", - "file_size_bytes": 254858 - }, - { - "name": "Gp0127647_Centrifuge Krona HTML report", - "description": "Centrifuge Krona HTML report for Gp0127647", - "data_object_type": "Centrifuge Krona Plot", - "url": "https://data.microbiomedata.org/data/nmdc:mga0g0e588/ReadbasedAnalysis/nmdc_mga0g0e588_centrifuge_krona.html", - "md5_checksum": "d3b27bed597f07ad4bb4a500ad2fb928", - "id": "nmdc:d3b27bed597f07ad4bb4a500ad2fb928", - "file_size_bytes": 2332396 - }, + { + "functional_annotation_agg": [], + "library_preparation_set": [], + "processed_sample_set": [], + "extraction_set": [], + "activity_set": [], + "biosample_set": [], + "data_object_set": [ { - "name": "Gp0127647_Kraken classification TSV report", - "description": "Kraken classification TSV report for Gp0127647", - "data_object_type": "Kraken2 Taxonomic Classification", - "url": "https://data.microbiomedata.org/data/nmdc:mga0g0e588/ReadbasedAnalysis/nmdc_mga0g0e588_kraken2_classification.tsv", - "md5_checksum": "45617f93e5f072fbad25a0308ead6c3d", - "id": "nmdc:45617f93e5f072fbad25a0308ead6c3d", - "file_size_bytes": 1419938277 + "description": "Raw sequencer read data", + "file_size_bytes": 2236205196, + "type": "nmdc:DataObject", + "id": "jgi:574fde8a7ded5e3df1ee1422", + "name": "10533.2.165322.ACGGTCT-AAGACCG.fastq.gz" }, { - "name": "Gp0127647_Kraken2 TSV report", - "description": "Kraken2 TSV report for Gp0127647", - "data_object_type": "Kraken2 Classification Report", - "url": "https://data.microbiomedata.org/data/nmdc:mga0g0e588/ReadbasedAnalysis/nmdc_mga0g0e588_kraken2_report.tsv", - "md5_checksum": "460e7594fcd06678df1b9c5e5075cb4d", - "id": "nmdc:460e7594fcd06678df1b9c5e5075cb4d", - "file_size_bytes": 661837 + "name": "Gp0127647_Filtered Reads", + "description": "Filtered Reads for Gp0127647", + "data_object_type": "Filtered Sequencing Reads", + "url": "https://data.microbiomedata.org/data/nmdc:mga0g0e588/qa/nmdc_mga0g0e588_filtered.fastq.gz", + "md5_checksum": "c082eff434fe4863c0e29c79b759d100", + "id": "nmdc:c082eff434fe4863c0e29c79b759d100", + "file_size_bytes": 2052448806 }, { - "name": "Gp0127647_Kraken2 Krona HTML report", - "description": "Kraken2 Krona HTML report for Gp0127647", - "data_object_type": "Kraken2 Krona Plot", - "url": "https://data.microbiomedata.org/data/nmdc:mga0g0e588/ReadbasedAnalysis/nmdc_mga0g0e588_kraken2_krona.html", - "md5_checksum": "ab80fc324c9206a41a66d64227a97179", - "id": "nmdc:ab80fc324c9206a41a66d64227a97179", - "file_size_bytes": 4028822 + "name": "Gp0127647_Filtered Stats", + "description": "Filtered Stats for Gp0127647", + "data_object_type": "QC Statistics", + "url": "https://data.microbiomedata.org/data/nmdc:mga0g0e588/qa/nmdc_mga0g0e588_filterStats.txt", + "md5_checksum": "7f204d0d1d45e77b39d9c9b2362c6b0b", + "id": "nmdc:7f204d0d1d45e77b39d9c9b2362c6b0b", + "file_size_bytes": 282 }, { "name": "Gp0127647_Gottcha2 TSV report", @@ -29330,39 +24996,7 @@ "collecting_biosamples_from_site_set": [], "date_created": null, "etl_software_version": null, - "pooling_set": [], - "read_based_analysis_activity_set": [ - { - "_id": { - "$oid": "61e7197e833bcf838a700a51" - }, - "has_input": [ - "nmdc:c082eff434fe4863c0e29c79b759d100" - ], - "part_of": [ - "nmdc:mga0g0e588" - ], - "git_url": "https://github.com/microbiomedata/metaG/releases/tag/0.1", - "has_output": [ - "nmdc:7e1438bf8076daf46f3d782d8f9656b4", - "nmdc:cfd63309cd38a293615ddce5e8ea6402", - "nmdc:7e353b7bfb1586773fa00b515dffe6ec", - "nmdc:6667be33e7867ca2aabfa5d663e2970a", - "nmdc:7ee0b0b21444ee06752e6b9c32f476af", - "nmdc:d3b27bed597f07ad4bb4a500ad2fb928", - "nmdc:45617f93e5f072fbad25a0308ead6c3d", - "nmdc:460e7594fcd06678df1b9c5e5075cb4d", - "nmdc:ab80fc324c9206a41a66d64227a97179" - ], - "was_informed_by": "gold:Gp0127647", - "id": "nmdc:36fa8157feb62e7d176b99031e5e41a9", - "execution_resource": "NERSC-Cori", - "name": "ReadBased Analysis Activity for nmdc:mga0g0e588", - "started_at_time": "2021-10-11T02:24:27Z", - "type": "nmdc:ReadbasedAnalysis", - "ended_at_time": "2021-10-11T03:38:33+00:00" - } - ] + "pooling_set": [] }, { "functional_annotation_agg": [], @@ -29372,6 +25006,13 @@ "activity_set": [], "biosample_set": [], "data_object_set": [ + { + "description": "Raw sequencer read data", + "file_size_bytes": 2092289780, + "type": "nmdc:DataObject", + "id": "jgi:574fde6e7ded5e3df1ee140d", + "name": "10533.1.165310.CCTCAGT-AACTGAG.fastq.gz" + }, { "name": "Gp0127645_Filtered Reads", "description": "Filtered Reads for Gp0127645", @@ -29469,85 +25110,6 @@ "id": "nmdc:f318581f0df6e04b7ae2384f9237da06", "file_size_bytes": 3973557 }, - { - "name": "Gp0127645_Gottcha2 TSV report", - "description": "Gottcha2 TSV report for Gp0127645", - "url": "https://data.microbiomedata.org/data/nmdc:mga0jbfx89/ReadbasedAnalysis/nmdc_mga0jbfx89_gottcha2_report.tsv", - "md5_checksum": "694374188ba4372344536fa26a2282b8", - "id": "nmdc:694374188ba4372344536fa26a2282b8", - "file_size_bytes": 3780 - }, - { - "name": "Gp0127645_Gottcha2 full TSV report", - "description": "Gottcha2 full TSV report for Gp0127645", - "url": "https://data.microbiomedata.org/data/nmdc:mga0jbfx89/ReadbasedAnalysis/nmdc_mga0jbfx89_gottcha2_report_full.tsv", - "md5_checksum": "e11dfa7178e8c426c7c930b57aa40377", - "id": "nmdc:e11dfa7178e8c426c7c930b57aa40377", - "file_size_bytes": 822292 - }, - { - "name": "Gp0127645_Gottcha2 Krona HTML report", - "description": "Gottcha2 Krona HTML report for Gp0127645", - "data_object_type": "GOTTCHA2 Krona Plot", - "url": "https://data.microbiomedata.org/data/nmdc:mga0jbfx89/ReadbasedAnalysis/nmdc_mga0jbfx89_gottcha2_krona.html", - "md5_checksum": "46e203465faf61780fad8f626e9ab623", - "id": "nmdc:46e203465faf61780fad8f626e9ab623", - "file_size_bytes": 236496 - }, - { - "name": "Gp0127645_Centrifuge classification TSV report", - "description": "Centrifuge classification TSV report for Gp0127645", - "data_object_type": "Centrifuge Taxonomic Classification", - "url": "https://data.microbiomedata.org/data/nmdc:mga0jbfx89/ReadbasedAnalysis/nmdc_mga0jbfx89_centrifuge_classification.tsv", - "md5_checksum": "7a6b2ded3f49663d9916eaea3e129dc7", - "id": "nmdc:7a6b2ded3f49663d9916eaea3e129dc7", - "file_size_bytes": 1699052782 - }, - { - "name": "Gp0127645_Centrifuge TSV report", - "description": "Centrifuge TSV report for Gp0127645", - "data_object_type": "Centrifuge Classification Report", - "url": "https://data.microbiomedata.org/data/nmdc:mga0jbfx89/ReadbasedAnalysis/nmdc_mga0jbfx89_centrifuge_report.tsv", - "md5_checksum": "6f8be89c7aab1c3f392b4f80c7ddf6a5", - "id": "nmdc:6f8be89c7aab1c3f392b4f80c7ddf6a5", - "file_size_bytes": 256209 - }, - { - "name": "Gp0127645_Centrifuge Krona HTML report", - "description": "Centrifuge Krona HTML report for Gp0127645", - "data_object_type": "Centrifuge Krona Plot", - "url": "https://data.microbiomedata.org/data/nmdc:mga0jbfx89/ReadbasedAnalysis/nmdc_mga0jbfx89_centrifuge_krona.html", - "md5_checksum": "4299b438a815becc8beed40fcb803e9f", - "id": "nmdc:4299b438a815becc8beed40fcb803e9f", - "file_size_bytes": 2336400 - }, - { - "name": "Gp0127645_Kraken classification TSV report", - "description": "Kraken classification TSV report for Gp0127645", - "data_object_type": "Kraken2 Taxonomic Classification", - "url": "https://data.microbiomedata.org/data/nmdc:mga0jbfx89/ReadbasedAnalysis/nmdc_mga0jbfx89_kraken2_classification.tsv", - "md5_checksum": "4ae4dbd13c7338df5c00555bc6755947", - "id": "nmdc:4ae4dbd13c7338df5c00555bc6755947", - "file_size_bytes": 1359323947 - }, - { - "name": "Gp0127645_Kraken2 TSV report", - "description": "Kraken2 TSV report for Gp0127645", - "data_object_type": "Kraken2 Classification Report", - "url": "https://data.microbiomedata.org/data/nmdc:mga0jbfx89/ReadbasedAnalysis/nmdc_mga0jbfx89_kraken2_report.tsv", - "md5_checksum": "2be07eb38d408077a55ecb48e123f7f8", - "id": "nmdc:2be07eb38d408077a55ecb48e123f7f8", - "file_size_bytes": 651624 - }, - { - "name": "Gp0127645_Kraken2 Krona HTML report", - "description": "Kraken2 Krona HTML report for Gp0127645", - "data_object_type": "Kraken2 Krona Plot", - "url": "https://data.microbiomedata.org/data/nmdc:mga0jbfx89/ReadbasedAnalysis/nmdc_mga0jbfx89_kraken2_krona.html", - "md5_checksum": "f318581f0df6e04b7ae2384f9237da06", - "id": "nmdc:f318581f0df6e04b7ae2384f9237da06", - "file_size_bytes": 3973557 - }, { "name": "Gp0127645_Assembled contigs fasta", "description": "Assembled contigs fasta for Gp0127645", @@ -29993,144 +25555,40 @@ "collecting_biosamples_from_site_set": [], "date_created": null, "etl_software_version": null, - "pooling_set": [], - "read_based_analysis_activity_set": [ - { - "_id": { - "$oid": "61e7197f833bcf838a700ac3" - }, - "has_input": [ - "nmdc:034df323b47f010f27e7c032d445a891" - ], - "part_of": [ - "nmdc:mga0jbfx89" - ], - "git_url": "https://github.com/microbiomedata/metaG/releases/tag/0.1", - "has_output": [ - "nmdc:694374188ba4372344536fa26a2282b8", - "nmdc:e11dfa7178e8c426c7c930b57aa40377", - "nmdc:46e203465faf61780fad8f626e9ab623", - "nmdc:7a6b2ded3f49663d9916eaea3e129dc7", - "nmdc:6f8be89c7aab1c3f392b4f80c7ddf6a5", - "nmdc:4299b438a815becc8beed40fcb803e9f", - "nmdc:4ae4dbd13c7338df5c00555bc6755947", - "nmdc:2be07eb38d408077a55ecb48e123f7f8", - "nmdc:f318581f0df6e04b7ae2384f9237da06" - ], - "was_informed_by": "gold:Gp0127645", - "id": "nmdc:f18f00c9c36689307a6b4188b7b18e32", - "execution_resource": "NERSC-Cori", - "name": "ReadBased Analysis Activity for nmdc:mga0jbfx89", - "started_at_time": "2021-10-11T02:24:42Z", - "type": "nmdc:ReadbasedAnalysis", - "ended_at_time": "2021-10-11T04:07:11+00:00" - } - ] + "pooling_set": [] }, { "functional_annotation_agg": [], "library_preparation_set": [], "processed_sample_set": [], - "extraction_set": [], - "activity_set": [], - "biosample_set": [], - "data_object_set": [ - { - "name": "Gp0127649_Filtered Reads", - "description": "Filtered Reads for Gp0127649", - "data_object_type": "Filtered Sequencing Reads", - "url": "https://data.microbiomedata.org/data/nmdc:mga0j4fe07/qa/nmdc_mga0j4fe07_filtered.fastq.gz", - "md5_checksum": "ed0ea2f2ef6b667c5f8e60cd7d197cf5", - "id": "nmdc:ed0ea2f2ef6b667c5f8e60cd7d197cf5", - "file_size_bytes": 1967546513 - }, - { - "name": "Gp0127649_Filtered Stats", - "description": "Filtered Stats for Gp0127649", - "data_object_type": "QC Statistics", - "url": "https://data.microbiomedata.org/data/nmdc:mga0j4fe07/qa/nmdc_mga0j4fe07_filterStats.txt", - "md5_checksum": "25a7ff469ffae5906d6ade4d74cab88f", - "id": "nmdc:25a7ff469ffae5906d6ade4d74cab88f", - "file_size_bytes": 283 - }, - { - "name": "Gp0127649_Gottcha2 TSV report", - "description": "Gottcha2 TSV report for Gp0127649", - "url": "https://data.microbiomedata.org/data/nmdc:mga0j4fe07/ReadbasedAnalysis/nmdc_mga0j4fe07_gottcha2_report.tsv", - "md5_checksum": "c30cb5928ad608e7c8fe1ce77d81933a", - "id": "nmdc:c30cb5928ad608e7c8fe1ce77d81933a", - "file_size_bytes": 2079 - }, - { - "name": "Gp0127649_Gottcha2 full TSV report", - "description": "Gottcha2 full TSV report for Gp0127649", - "url": "https://data.microbiomedata.org/data/nmdc:mga0j4fe07/ReadbasedAnalysis/nmdc_mga0j4fe07_gottcha2_report_full.tsv", - "md5_checksum": "4aa159b1ee973c6e3e309ef60d351018", - "id": "nmdc:4aa159b1ee973c6e3e309ef60d351018", - "file_size_bytes": 642861 - }, - { - "name": "Gp0127649_Gottcha2 Krona HTML report", - "description": "Gottcha2 Krona HTML report for Gp0127649", - "data_object_type": "GOTTCHA2 Krona Plot", - "url": "https://data.microbiomedata.org/data/nmdc:mga0j4fe07/ReadbasedAnalysis/nmdc_mga0j4fe07_gottcha2_krona.html", - "md5_checksum": "8c1683fa4041bd10711aa3beb4735811", - "id": "nmdc:8c1683fa4041bd10711aa3beb4735811", - "file_size_bytes": 230792 - }, - { - "name": "Gp0127649_Centrifuge classification TSV report", - "description": "Centrifuge classification TSV report for Gp0127649", - "data_object_type": "Centrifuge Taxonomic Classification", - "url": "https://data.microbiomedata.org/data/nmdc:mga0j4fe07/ReadbasedAnalysis/nmdc_mga0j4fe07_centrifuge_classification.tsv", - "md5_checksum": "b8be7144441cbd6fbe4a8193f9e055ab", - "id": "nmdc:b8be7144441cbd6fbe4a8193f9e055ab", - "file_size_bytes": 1743695420 - }, - { - "name": "Gp0127649_Centrifuge TSV report", - "description": "Centrifuge TSV report for Gp0127649", - "data_object_type": "Centrifuge Classification Report", - "url": "https://data.microbiomedata.org/data/nmdc:mga0j4fe07/ReadbasedAnalysis/nmdc_mga0j4fe07_centrifuge_report.tsv", - "md5_checksum": "d4f57641e41f0249f3fde7b973289cf5", - "id": "nmdc:d4f57641e41f0249f3fde7b973289cf5", - "file_size_bytes": 254036 - }, - { - "name": "Gp0127649_Centrifuge Krona HTML report", - "description": "Centrifuge Krona HTML report for Gp0127649", - "data_object_type": "Centrifuge Krona Plot", - "url": "https://data.microbiomedata.org/data/nmdc:mga0j4fe07/ReadbasedAnalysis/nmdc_mga0j4fe07_centrifuge_krona.html", - "md5_checksum": "4e9ec619c5611cb0166ea127496fadeb", - "id": "nmdc:4e9ec619c5611cb0166ea127496fadeb", - "file_size_bytes": 2332943 - }, + "extraction_set": [], + "activity_set": [], + "biosample_set": [], + "data_object_set": [ { - "name": "Gp0127649_Kraken classification TSV report", - "description": "Kraken classification TSV report for Gp0127649", - "data_object_type": "Kraken2 Taxonomic Classification", - "url": "https://data.microbiomedata.org/data/nmdc:mga0j4fe07/ReadbasedAnalysis/nmdc_mga0j4fe07_kraken2_classification.tsv", - "md5_checksum": "ed2b2495ca211e17298ca2e212fe3811", - "id": "nmdc:ed2b2495ca211e17298ca2e212fe3811", - "file_size_bytes": 1387669799 + "description": "Raw sequencer read data", + "file_size_bytes": 2196954131, + "type": "nmdc:DataObject", + "id": "jgi:574fde8c7ded5e3df1ee1424", + "name": "10533.2.165322.GAACGCT-AAGCGTT.fastq.gz" }, { - "name": "Gp0127649_Kraken2 TSV report", - "description": "Kraken2 TSV report for Gp0127649", - "data_object_type": "Kraken2 Classification Report", - "url": "https://data.microbiomedata.org/data/nmdc:mga0j4fe07/ReadbasedAnalysis/nmdc_mga0j4fe07_kraken2_report.tsv", - "md5_checksum": "05d35fc4e391296ff0e716c3fcbbee89", - "id": "nmdc:05d35fc4e391296ff0e716c3fcbbee89", - "file_size_bytes": 637131 + "name": "Gp0127649_Filtered Reads", + "description": "Filtered Reads for Gp0127649", + "data_object_type": "Filtered Sequencing Reads", + "url": "https://data.microbiomedata.org/data/nmdc:mga0j4fe07/qa/nmdc_mga0j4fe07_filtered.fastq.gz", + "md5_checksum": "ed0ea2f2ef6b667c5f8e60cd7d197cf5", + "id": "nmdc:ed0ea2f2ef6b667c5f8e60cd7d197cf5", + "file_size_bytes": 1967546513 }, { - "name": "Gp0127649_Kraken2 Krona HTML report", - "description": "Kraken2 Krona HTML report for Gp0127649", - "data_object_type": "Kraken2 Krona Plot", - "url": "https://data.microbiomedata.org/data/nmdc:mga0j4fe07/ReadbasedAnalysis/nmdc_mga0j4fe07_kraken2_krona.html", - "md5_checksum": "0d07551972f3230ec2ef4a0e04929b97", - "id": "nmdc:0d07551972f3230ec2ef4a0e04929b97", - "file_size_bytes": 3976407 + "name": "Gp0127649_Filtered Stats", + "description": "Filtered Stats for Gp0127649", + "data_object_type": "QC Statistics", + "url": "https://data.microbiomedata.org/data/nmdc:mga0j4fe07/qa/nmdc_mga0j4fe07_filterStats.txt", + "md5_checksum": "25a7ff469ffae5906d6ade4d74cab88f", + "id": "nmdc:25a7ff469ffae5906d6ade4d74cab88f", + "file_size_bytes": 283 }, { "name": "Gp0127649_Gottcha2 TSV report", @@ -30675,39 +26133,7 @@ "collecting_biosamples_from_site_set": [], "date_created": null, "etl_software_version": null, - "pooling_set": [], - "read_based_analysis_activity_set": [ - { - "_id": { - "$oid": "61e719f8833bcf838a7018c7" - }, - "has_input": [ - "nmdc:ed0ea2f2ef6b667c5f8e60cd7d197cf5" - ], - "part_of": [ - "nmdc:mga0j4fe07" - ], - "git_url": "https://github.com/microbiomedata/metaG/releases/tag/0.1", - "has_output": [ - "nmdc:c30cb5928ad608e7c8fe1ce77d81933a", - "nmdc:4aa159b1ee973c6e3e309ef60d351018", - "nmdc:8c1683fa4041bd10711aa3beb4735811", - "nmdc:b8be7144441cbd6fbe4a8193f9e055ab", - "nmdc:d4f57641e41f0249f3fde7b973289cf5", - "nmdc:4e9ec619c5611cb0166ea127496fadeb", - "nmdc:ed2b2495ca211e17298ca2e212fe3811", - "nmdc:05d35fc4e391296ff0e716c3fcbbee89", - "nmdc:0d07551972f3230ec2ef4a0e04929b97" - ], - "was_informed_by": "gold:Gp0127649", - "id": "nmdc:74b40db9b9eef8519a02f49fe6034be5", - "execution_resource": "NERSC-Cori", - "name": "ReadBased Analysis Activity for nmdc:mga0j4fe07", - "started_at_time": "2021-10-11T02:23:29Z", - "type": "nmdc:ReadbasedAnalysis", - "ended_at_time": "2021-10-11T03:38:32+00:00" - } - ] + "pooling_set": [] }, { "functional_annotation_agg": [], @@ -30717,6 +26143,13 @@ "activity_set": [], "biosample_set": [], "data_object_set": [ + { + "description": "Raw sequencer read data", + "file_size_bytes": 2318220660, + "type": "nmdc:DataObject", + "id": "jgi:574fe0af7ded5e3df1ee1493", + "name": "10533.3.165334.CAATCGA-GTCGATT.fastq.gz" + }, { "name": "Gp0127652_Filtered Reads", "description": "Filtered Reads for Gp0127652", @@ -30814,85 +26247,6 @@ "id": "nmdc:1df4b479c887b43319d89cc80dc35239", "file_size_bytes": 3991377 }, - { - "name": "Gp0127652_Gottcha2 TSV report", - "description": "Gottcha2 TSV report for Gp0127652", - "url": "https://data.microbiomedata.org/data/nmdc:mga0mfxf90/ReadbasedAnalysis/nmdc_mga0mfxf90_gottcha2_report.tsv", - "md5_checksum": "70f29a321c925cfc0e2003515f708400", - "id": "nmdc:70f29a321c925cfc0e2003515f708400", - "file_size_bytes": 1524 - }, - { - "name": "Gp0127652_Gottcha2 full TSV report", - "description": "Gottcha2 full TSV report for Gp0127652", - "url": "https://data.microbiomedata.org/data/nmdc:mga0mfxf90/ReadbasedAnalysis/nmdc_mga0mfxf90_gottcha2_report_full.tsv", - "md5_checksum": "93d5419c0b31e0696ab8ffef477945fb", - "id": "nmdc:93d5419c0b31e0696ab8ffef477945fb", - "file_size_bytes": 670250 - }, - { - "name": "Gp0127652_Gottcha2 Krona HTML report", - "description": "Gottcha2 Krona HTML report for Gp0127652", - "data_object_type": "GOTTCHA2 Krona Plot", - "url": "https://data.microbiomedata.org/data/nmdc:mga0mfxf90/ReadbasedAnalysis/nmdc_mga0mfxf90_gottcha2_krona.html", - "md5_checksum": "9cd3b2939adabd809741ae6a84260266", - "id": "nmdc:9cd3b2939adabd809741ae6a84260266", - "file_size_bytes": 229949 - }, - { - "name": "Gp0127652_Centrifuge classification TSV report", - "description": "Centrifuge classification TSV report for Gp0127652", - "data_object_type": "Centrifuge Taxonomic Classification", - "url": "https://data.microbiomedata.org/data/nmdc:mga0mfxf90/ReadbasedAnalysis/nmdc_mga0mfxf90_centrifuge_classification.tsv", - "md5_checksum": "acea91fced8993a40cf1eb9cda29c4cd", - "id": "nmdc:acea91fced8993a40cf1eb9cda29c4cd", - "file_size_bytes": 1814515284 - }, - { - "name": "Gp0127652_Centrifuge TSV report", - "description": "Centrifuge TSV report for Gp0127652", - "data_object_type": "Centrifuge Classification Report", - "url": "https://data.microbiomedata.org/data/nmdc:mga0mfxf90/ReadbasedAnalysis/nmdc_mga0mfxf90_centrifuge_report.tsv", - "md5_checksum": "b623a0d3bdff34fb97530c74bb558aeb", - "id": "nmdc:b623a0d3bdff34fb97530c74bb558aeb", - "file_size_bytes": 253730 - }, - { - "name": "Gp0127652_Centrifuge Krona HTML report", - "description": "Centrifuge Krona HTML report for Gp0127652", - "data_object_type": "Centrifuge Krona Plot", - "url": "https://data.microbiomedata.org/data/nmdc:mga0mfxf90/ReadbasedAnalysis/nmdc_mga0mfxf90_centrifuge_krona.html", - "md5_checksum": "e461b2e81a22514fcd691caeaa7b0ca1", - "id": "nmdc:e461b2e81a22514fcd691caeaa7b0ca1", - "file_size_bytes": 2330558 - }, - { - "name": "Gp0127652_Kraken classification TSV report", - "description": "Kraken classification TSV report for Gp0127652", - "data_object_type": "Kraken2 Taxonomic Classification", - "url": "https://data.microbiomedata.org/data/nmdc:mga0mfxf90/ReadbasedAnalysis/nmdc_mga0mfxf90_kraken2_classification.tsv", - "md5_checksum": "38b7c63d0157f8bf4316f4295f0e6e28", - "id": "nmdc:38b7c63d0157f8bf4316f4295f0e6e28", - "file_size_bytes": 1445957300 - }, - { - "name": "Gp0127652_Kraken2 TSV report", - "description": "Kraken2 TSV report for Gp0127652", - "data_object_type": "Kraken2 Classification Report", - "url": "https://data.microbiomedata.org/data/nmdc:mga0mfxf90/ReadbasedAnalysis/nmdc_mga0mfxf90_kraken2_report.tsv", - "md5_checksum": "be0c2bc71cefcb0f0a23d270d047f30c", - "id": "nmdc:be0c2bc71cefcb0f0a23d270d047f30c", - "file_size_bytes": 639677 - }, - { - "name": "Gp0127652_Kraken2 Krona HTML report", - "description": "Kraken2 Krona HTML report for Gp0127652", - "data_object_type": "Kraken2 Krona Plot", - "url": "https://data.microbiomedata.org/data/nmdc:mga0mfxf90/ReadbasedAnalysis/nmdc_mga0mfxf90_kraken2_krona.html", - "md5_checksum": "1df4b479c887b43319d89cc80dc35239", - "id": "nmdc:1df4b479c887b43319d89cc80dc35239", - "file_size_bytes": 3991377 - }, { "name": "Gp0127652_Assembled contigs fasta", "description": "Assembled contigs fasta for Gp0127652", @@ -31395,39 +26749,7 @@ "collecting_biosamples_from_site_set": [], "date_created": null, "etl_software_version": null, - "pooling_set": [], - "read_based_analysis_activity_set": [ - { - "_id": { - "$oid": "61e719de833bcf838a7015b2" - }, - "has_input": [ - "nmdc:60f03b815160b29125c2bd0776a330bf" - ], - "part_of": [ - "nmdc:mga0mfxf90" - ], - "git_url": "https://github.com/microbiomedata/metaG/releases/tag/0.1", - "has_output": [ - "nmdc:70f29a321c925cfc0e2003515f708400", - "nmdc:93d5419c0b31e0696ab8ffef477945fb", - "nmdc:9cd3b2939adabd809741ae6a84260266", - "nmdc:acea91fced8993a40cf1eb9cda29c4cd", - "nmdc:b623a0d3bdff34fb97530c74bb558aeb", - "nmdc:e461b2e81a22514fcd691caeaa7b0ca1", - "nmdc:38b7c63d0157f8bf4316f4295f0e6e28", - "nmdc:be0c2bc71cefcb0f0a23d270d047f30c", - "nmdc:1df4b479c887b43319d89cc80dc35239" - ], - "was_informed_by": "gold:Gp0127652", - "id": "nmdc:c86126b11f214f19721c56fadf91d87c", - "execution_resource": "NERSC-Cori", - "name": "ReadBased Analysis Activity for nmdc:mga0mfxf90", - "started_at_time": "2021-10-11T02:27:08Z", - "type": "nmdc:ReadbasedAnalysis", - "ended_at_time": "2021-10-11T04:45:21+00:00" - } - ] + "pooling_set": [] }, { "functional_annotation_agg": [], @@ -31437,102 +26759,30 @@ "activity_set": [], "biosample_set": [], "data_object_set": [ - { - "name": "Gp0127654_Filtered Reads", - "description": "Filtered Reads for Gp0127654", - "data_object_type": "Filtered Sequencing Reads", - "url": "https://data.microbiomedata.org/data/nmdc:mga0h0s362/qa/nmdc_mga0h0s362_filtered.fastq.gz", - "md5_checksum": "c4f29a07f3ce03ee2a2d11c90e8b43d6", - "id": "nmdc:c4f29a07f3ce03ee2a2d11c90e8b43d6", - "file_size_bytes": 2479437709 - }, - { - "name": "Gp0127654_Filtered Stats", - "description": "Filtered Stats for Gp0127654", - "data_object_type": "QC Statistics", - "url": "https://data.microbiomedata.org/data/nmdc:mga0h0s362/qa/nmdc_mga0h0s362_filterStats.txt", - "md5_checksum": "9c600ec3be94d876f00d22808f3e8a59", - "id": "nmdc:9c600ec3be94d876f00d22808f3e8a59", - "file_size_bytes": 284 - }, - { - "name": "Gp0127654_Gottcha2 TSV report", - "description": "Gottcha2 TSV report for Gp0127654", - "url": "https://data.microbiomedata.org/data/nmdc:mga0h0s362/ReadbasedAnalysis/nmdc_mga0h0s362_gottcha2_report.tsv", - "md5_checksum": "130ee7559789726a2cadccd3126dacad", - "id": "nmdc:130ee7559789726a2cadccd3126dacad", - "file_size_bytes": 3508 - }, - { - "name": "Gp0127654_Gottcha2 full TSV report", - "description": "Gottcha2 full TSV report for Gp0127654", - "url": "https://data.microbiomedata.org/data/nmdc:mga0h0s362/ReadbasedAnalysis/nmdc_mga0h0s362_gottcha2_report_full.tsv", - "md5_checksum": "c955eae73afbfe1ad4c4eb2eac51f3f3", - "id": "nmdc:c955eae73afbfe1ad4c4eb2eac51f3f3", - "file_size_bytes": 798264 - }, - { - "name": "Gp0127654_Gottcha2 Krona HTML report", - "description": "Gottcha2 Krona HTML report for Gp0127654", - "data_object_type": "GOTTCHA2 Krona Plot", - "url": "https://data.microbiomedata.org/data/nmdc:mga0h0s362/ReadbasedAnalysis/nmdc_mga0h0s362_gottcha2_krona.html", - "md5_checksum": "7ccb4ee5a0728322154b29a79d13c842", - "id": "nmdc:7ccb4ee5a0728322154b29a79d13c842", - "file_size_bytes": 234834 - }, - { - "name": "Gp0127654_Centrifuge classification TSV report", - "description": "Centrifuge classification TSV report for Gp0127654", - "data_object_type": "Centrifuge Taxonomic Classification", - "url": "https://data.microbiomedata.org/data/nmdc:mga0h0s362/ReadbasedAnalysis/nmdc_mga0h0s362_centrifuge_classification.tsv", - "md5_checksum": "8b88e19f3d4f22c8bb71f66e7aec6dba", - "id": "nmdc:8b88e19f3d4f22c8bb71f66e7aec6dba", - "file_size_bytes": 2231971137 - }, - { - "name": "Gp0127654_Centrifuge TSV report", - "description": "Centrifuge TSV report for Gp0127654", - "data_object_type": "Centrifuge Classification Report", - "url": "https://data.microbiomedata.org/data/nmdc:mga0h0s362/ReadbasedAnalysis/nmdc_mga0h0s362_centrifuge_report.tsv", - "md5_checksum": "35a0d72edac6c5e7f9c8ddf86c5534e0", - "id": "nmdc:35a0d72edac6c5e7f9c8ddf86c5534e0", - "file_size_bytes": 257151 - }, - { - "name": "Gp0127654_Centrifuge Krona HTML report", - "description": "Centrifuge Krona HTML report for Gp0127654", - "data_object_type": "Centrifuge Krona Plot", - "url": "https://data.microbiomedata.org/data/nmdc:mga0h0s362/ReadbasedAnalysis/nmdc_mga0h0s362_centrifuge_krona.html", - "md5_checksum": "f808a89810cdb2a911a5b5388b70ce94", - "id": "nmdc:f808a89810cdb2a911a5b5388b70ce94", - "file_size_bytes": 2341088 - }, - { - "name": "Gp0127654_Kraken classification TSV report", - "description": "Kraken classification TSV report for Gp0127654", - "data_object_type": "Kraken2 Taxonomic Classification", - "url": "https://data.microbiomedata.org/data/nmdc:mga0h0s362/ReadbasedAnalysis/nmdc_mga0h0s362_kraken2_classification.tsv", - "md5_checksum": "dfc90170aa038c2425702be223cb2f23", - "id": "nmdc:dfc90170aa038c2425702be223cb2f23", - "file_size_bytes": 1782429285 + { + "description": "Raw sequencer read data", + "file_size_bytes": 2711112988, + "type": "nmdc:DataObject", + "id": "jgi:574fe0b17ded5e3df1ee1494", + "name": "10533.3.165334.TGACTGA-GTCAGTC.fastq.gz" }, { - "name": "Gp0127654_Kraken2 TSV report", - "description": "Kraken2 TSV report for Gp0127654", - "data_object_type": "Kraken2 Classification Report", - "url": "https://data.microbiomedata.org/data/nmdc:mga0h0s362/ReadbasedAnalysis/nmdc_mga0h0s362_kraken2_report.tsv", - "md5_checksum": "84255d3bab9ea79151db5ad7bcbc677c", - "id": "nmdc:84255d3bab9ea79151db5ad7bcbc677c", - "file_size_bytes": 661482 + "name": "Gp0127654_Filtered Reads", + "description": "Filtered Reads for Gp0127654", + "data_object_type": "Filtered Sequencing Reads", + "url": "https://data.microbiomedata.org/data/nmdc:mga0h0s362/qa/nmdc_mga0h0s362_filtered.fastq.gz", + "md5_checksum": "c4f29a07f3ce03ee2a2d11c90e8b43d6", + "id": "nmdc:c4f29a07f3ce03ee2a2d11c90e8b43d6", + "file_size_bytes": 2479437709 }, { - "name": "Gp0127654_Kraken2 Krona HTML report", - "description": "Kraken2 Krona HTML report for Gp0127654", - "data_object_type": "Kraken2 Krona Plot", - "url": "https://data.microbiomedata.org/data/nmdc:mga0h0s362/ReadbasedAnalysis/nmdc_mga0h0s362_kraken2_krona.html", - "md5_checksum": "1c8339d96884c4a408de7804e00490d1", - "id": "nmdc:1c8339d96884c4a408de7804e00490d1", - "file_size_bytes": 4020719 + "name": "Gp0127654_Filtered Stats", + "description": "Filtered Stats for Gp0127654", + "data_object_type": "QC Statistics", + "url": "https://data.microbiomedata.org/data/nmdc:mga0h0s362/qa/nmdc_mga0h0s362_filterStats.txt", + "md5_checksum": "9c600ec3be94d876f00d22808f3e8a59", + "id": "nmdc:9c600ec3be94d876f00d22808f3e8a59", + "file_size_bytes": 284 }, { "name": "Gp0127654_Gottcha2 TSV report", @@ -32061,39 +27311,7 @@ "collecting_biosamples_from_site_set": [], "date_created": null, "etl_software_version": null, - "pooling_set": [], - "read_based_analysis_activity_set": [ - { - "_id": { - "$oid": "61e719f6833bcf838a7017f0" - }, - "has_input": [ - "nmdc:c4f29a07f3ce03ee2a2d11c90e8b43d6" - ], - "part_of": [ - "nmdc:mga0h0s362" - ], - "git_url": "https://github.com/microbiomedata/metaG/releases/tag/0.1", - "has_output": [ - "nmdc:130ee7559789726a2cadccd3126dacad", - "nmdc:c955eae73afbfe1ad4c4eb2eac51f3f3", - "nmdc:7ccb4ee5a0728322154b29a79d13c842", - "nmdc:8b88e19f3d4f22c8bb71f66e7aec6dba", - "nmdc:35a0d72edac6c5e7f9c8ddf86c5534e0", - "nmdc:f808a89810cdb2a911a5b5388b70ce94", - "nmdc:dfc90170aa038c2425702be223cb2f23", - "nmdc:84255d3bab9ea79151db5ad7bcbc677c", - "nmdc:1c8339d96884c4a408de7804e00490d1" - ], - "was_informed_by": "gold:Gp0127654", - "id": "nmdc:168441535388b19bbdee0928b42e5b20", - "execution_resource": "NERSC-Cori", - "name": "ReadBased Analysis Activity for nmdc:mga0h0s362", - "started_at_time": "2021-10-11T02:23:29Z", - "type": "nmdc:ReadbasedAnalysis", - "ended_at_time": "2021-10-11T03:58:56+00:00" - } - ] + "pooling_set": [] }, { "functional_annotation_agg": [], @@ -32103,6 +27321,13 @@ "activity_set": [], "biosample_set": [], "data_object_set": [ + { + "description": "Raw sequencer read data", + "file_size_bytes": 2411560282, + "type": "nmdc:DataObject", + "id": "jgi:574fe0b47ded5e3df1ee1496", + "name": "10533.3.165334.ACGATGA-GTCATCG.fastq.gz" + }, { "name": "Gp0127656_Filtered Reads", "description": "Filtered Reads for Gp0127656", @@ -32200,85 +27425,6 @@ "id": "nmdc:ae369194e4b24e137fc23da0412277a6", "file_size_bytes": 3939982 }, - { - "name": "Gp0127656_Gottcha2 TSV report", - "description": "Gottcha2 TSV report for Gp0127656", - "url": "https://data.microbiomedata.org/data/nmdc:mga00hh562/ReadbasedAnalysis/nmdc_mga00hh562_gottcha2_report.tsv", - "md5_checksum": "ccbe419157d8286626330fd0eb0dd0e0", - "id": "nmdc:ccbe419157d8286626330fd0eb0dd0e0", - "file_size_bytes": 2418 - }, - { - "name": "Gp0127656_Gottcha2 full TSV report", - "description": "Gottcha2 full TSV report for Gp0127656", - "url": "https://data.microbiomedata.org/data/nmdc:mga00hh562/ReadbasedAnalysis/nmdc_mga00hh562_gottcha2_report_full.tsv", - "md5_checksum": "92ab65cdaca3367552e03d895123e04f", - "id": "nmdc:92ab65cdaca3367552e03d895123e04f", - "file_size_bytes": 759212 - }, - { - "name": "Gp0127656_Gottcha2 Krona HTML report", - "description": "Gottcha2 Krona HTML report for Gp0127656", - "data_object_type": "GOTTCHA2 Krona Plot", - "url": "https://data.microbiomedata.org/data/nmdc:mga00hh562/ReadbasedAnalysis/nmdc_mga00hh562_gottcha2_krona.html", - "md5_checksum": "0b3ff6503723d6ea9b84552f68ed4270", - "id": "nmdc:0b3ff6503723d6ea9b84552f68ed4270", - "file_size_bytes": 231563 - }, - { - "name": "Gp0127656_Centrifuge classification TSV report", - "description": "Centrifuge classification TSV report for Gp0127656", - "data_object_type": "Centrifuge Taxonomic Classification", - "url": "https://data.microbiomedata.org/data/nmdc:mga00hh562/ReadbasedAnalysis/nmdc_mga00hh562_centrifuge_classification.tsv", - "md5_checksum": "8e5ad12b7fa8873463088d7bf361f7c5", - "id": "nmdc:8e5ad12b7fa8873463088d7bf361f7c5", - "file_size_bytes": 1950007455 - }, - { - "name": "Gp0127656_Centrifuge TSV report", - "description": "Centrifuge TSV report for Gp0127656", - "data_object_type": "Centrifuge Classification Report", - "url": "https://data.microbiomedata.org/data/nmdc:mga00hh562/ReadbasedAnalysis/nmdc_mga00hh562_centrifuge_report.tsv", - "md5_checksum": "a3255df52cd6150f03bbf7cbd655ec76", - "id": "nmdc:a3255df52cd6150f03bbf7cbd655ec76", - "file_size_bytes": 255724 - }, - { - "name": "Gp0127656_Centrifuge Krona HTML report", - "description": "Centrifuge Krona HTML report for Gp0127656", - "data_object_type": "Centrifuge Krona Plot", - "url": "https://data.microbiomedata.org/data/nmdc:mga00hh562/ReadbasedAnalysis/nmdc_mga00hh562_centrifuge_krona.html", - "md5_checksum": "a25a5d7e399624e5e5735b65a9dd322a", - "id": "nmdc:a25a5d7e399624e5e5735b65a9dd322a", - "file_size_bytes": 2337553 - }, - { - "name": "Gp0127656_Kraken classification TSV report", - "description": "Kraken classification TSV report for Gp0127656", - "data_object_type": "Kraken2 Taxonomic Classification", - "url": "https://data.microbiomedata.org/data/nmdc:mga00hh562/ReadbasedAnalysis/nmdc_mga00hh562_kraken2_classification.tsv", - "md5_checksum": "dd953aebfd5cf624a5ffa8c6d6b64b08", - "id": "nmdc:dd953aebfd5cf624a5ffa8c6d6b64b08", - "file_size_bytes": 1555636513 - }, - { - "name": "Gp0127656_Kraken2 TSV report", - "description": "Kraken2 TSV report for Gp0127656", - "data_object_type": "Kraken2 Classification Report", - "url": "https://data.microbiomedata.org/data/nmdc:mga00hh562/ReadbasedAnalysis/nmdc_mga00hh562_kraken2_report.tsv", - "md5_checksum": "96f47f6cd2350fb1c7c7b746d2e9d811", - "id": "nmdc:96f47f6cd2350fb1c7c7b746d2e9d811", - "file_size_bytes": 647090 - }, - { - "name": "Gp0127656_Kraken2 Krona HTML report", - "description": "Kraken2 Krona HTML report for Gp0127656", - "data_object_type": "Kraken2 Krona Plot", - "url": "https://data.microbiomedata.org/data/nmdc:mga00hh562/ReadbasedAnalysis/nmdc_mga00hh562_kraken2_krona.html", - "md5_checksum": "ae369194e4b24e137fc23da0412277a6", - "id": "nmdc:ae369194e4b24e137fc23da0412277a6", - "file_size_bytes": 3939982 - }, { "name": "Gp0127656_Assembled contigs fasta", "description": "Assembled contigs fasta for Gp0127656", @@ -32724,39 +27870,7 @@ "collecting_biosamples_from_site_set": [], "date_created": null, "etl_software_version": null, - "pooling_set": [], - "read_based_analysis_activity_set": [ - { - "_id": { - "$oid": "61e719f0833bcf838a701752" - }, - "has_input": [ - "nmdc:cec95659bb04ae095f51821ddaa9fa59" - ], - "part_of": [ - "nmdc:mga00hh562" - ], - "git_url": "https://github.com/microbiomedata/metaG/releases/tag/0.1", - "has_output": [ - "nmdc:ccbe419157d8286626330fd0eb0dd0e0", - "nmdc:92ab65cdaca3367552e03d895123e04f", - "nmdc:0b3ff6503723d6ea9b84552f68ed4270", - "nmdc:8e5ad12b7fa8873463088d7bf361f7c5", - "nmdc:a3255df52cd6150f03bbf7cbd655ec76", - "nmdc:a25a5d7e399624e5e5735b65a9dd322a", - "nmdc:dd953aebfd5cf624a5ffa8c6d6b64b08", - "nmdc:96f47f6cd2350fb1c7c7b746d2e9d811", - "nmdc:ae369194e4b24e137fc23da0412277a6" - ], - "was_informed_by": "gold:Gp0127656", - "id": "nmdc:8e2d8da1d05b292a52a33732a6bc4391", - "execution_resource": "NERSC-Cori", - "name": "ReadBased Analysis Activity for nmdc:mga00hh562", - "started_at_time": "2021-10-11T02:23:35Z", - "type": "nmdc:ReadbasedAnalysis", - "ended_at_time": "2021-10-11T03:58:56+00:00" - } - ] + "pooling_set": [] }, { "functional_annotation_agg": [], @@ -32766,6 +27880,13 @@ "activity_set": [], "biosample_set": [], "data_object_set": [ + { + "description": "Raw sequencer read data", + "file_size_bytes": 2103957707, + "type": "nmdc:DataObject", + "id": "jgi:574fde907ded5e3df1ee1426", + "name": "10533.2.165322.GTGAGCT-AAGCTCA.fastq.gz" + }, { "name": "Gp0127651_Filtered Reads", "description": "Filtered Reads for Gp0127651", @@ -32776,92 +27897,13 @@ "file_size_bytes": 1856919615 }, { - "name": "Gp0127651_Filtered Stats", - "description": "Filtered Stats for Gp0127651", - "data_object_type": "QC Statistics", - "url": "https://data.microbiomedata.org/data/nmdc:mga08hnt47/qa/nmdc_mga08hnt47_filterStats.txt", - "md5_checksum": "92cb49efbff5d5977e00dbad1c4d0d9f", - "id": "nmdc:92cb49efbff5d5977e00dbad1c4d0d9f", - "file_size_bytes": 283 - }, - { - "name": "Gp0127651_Gottcha2 TSV report", - "description": "Gottcha2 TSV report for Gp0127651", - "url": "https://data.microbiomedata.org/data/nmdc:mga08hnt47/ReadbasedAnalysis/nmdc_mga08hnt47_gottcha2_report.tsv", - "md5_checksum": "53ee263960c39126e039656a121deb96", - "id": "nmdc:53ee263960c39126e039656a121deb96", - "file_size_bytes": 1199 - }, - { - "name": "Gp0127651_Gottcha2 full TSV report", - "description": "Gottcha2 full TSV report for Gp0127651", - "url": "https://data.microbiomedata.org/data/nmdc:mga08hnt47/ReadbasedAnalysis/nmdc_mga08hnt47_gottcha2_report_full.tsv", - "md5_checksum": "2781b9269b8e24f49a1a301d44d0e3d5", - "id": "nmdc:2781b9269b8e24f49a1a301d44d0e3d5", - "file_size_bytes": 703299 - }, - { - "name": "Gp0127651_Gottcha2 Krona HTML report", - "description": "Gottcha2 Krona HTML report for Gp0127651", - "data_object_type": "GOTTCHA2 Krona Plot", - "url": "https://data.microbiomedata.org/data/nmdc:mga08hnt47/ReadbasedAnalysis/nmdc_mga08hnt47_gottcha2_krona.html", - "md5_checksum": "0ed808b8ce29d39c3b555e7d5bf4c274", - "id": "nmdc:0ed808b8ce29d39c3b555e7d5bf4c274", - "file_size_bytes": 229311 - }, - { - "name": "Gp0127651_Centrifuge classification TSV report", - "description": "Centrifuge classification TSV report for Gp0127651", - "data_object_type": "Centrifuge Taxonomic Classification", - "url": "https://data.microbiomedata.org/data/nmdc:mga08hnt47/ReadbasedAnalysis/nmdc_mga08hnt47_centrifuge_classification.tsv", - "md5_checksum": "a7d8f038b87bd28843e30c5dd115704b", - "id": "nmdc:a7d8f038b87bd28843e30c5dd115704b", - "file_size_bytes": 1642196063 - }, - { - "name": "Gp0127651_Centrifuge TSV report", - "description": "Centrifuge TSV report for Gp0127651", - "data_object_type": "Centrifuge Classification Report", - "url": "https://data.microbiomedata.org/data/nmdc:mga08hnt47/ReadbasedAnalysis/nmdc_mga08hnt47_centrifuge_report.tsv", - "md5_checksum": "b4cbc81c986c67c1037c8b7280924683", - "id": "nmdc:b4cbc81c986c67c1037c8b7280924683", - "file_size_bytes": 254418 - }, - { - "name": "Gp0127651_Centrifuge Krona HTML report", - "description": "Centrifuge Krona HTML report for Gp0127651", - "data_object_type": "Centrifuge Krona Plot", - "url": "https://data.microbiomedata.org/data/nmdc:mga08hnt47/ReadbasedAnalysis/nmdc_mga08hnt47_centrifuge_krona.html", - "md5_checksum": "e0c61a191258597984a05d86eaf4d71f", - "id": "nmdc:e0c61a191258597984a05d86eaf4d71f", - "file_size_bytes": 2333132 - }, - { - "name": "Gp0127651_Kraken classification TSV report", - "description": "Kraken classification TSV report for Gp0127651", - "data_object_type": "Kraken2 Taxonomic Classification", - "url": "https://data.microbiomedata.org/data/nmdc:mga08hnt47/ReadbasedAnalysis/nmdc_mga08hnt47_kraken2_classification.tsv", - "md5_checksum": "e1cbcfa86444a4ff4e992bcb6653d18f", - "id": "nmdc:e1cbcfa86444a4ff4e992bcb6653d18f", - "file_size_bytes": 1309125719 - }, - { - "name": "Gp0127651_Kraken2 TSV report", - "description": "Kraken2 TSV report for Gp0127651", - "data_object_type": "Kraken2 Classification Report", - "url": "https://data.microbiomedata.org/data/nmdc:mga08hnt47/ReadbasedAnalysis/nmdc_mga08hnt47_kraken2_report.tsv", - "md5_checksum": "d2e10038a40e81e81ba94f75ed1ec52c", - "id": "nmdc:d2e10038a40e81e81ba94f75ed1ec52c", - "file_size_bytes": 639737 - }, - { - "name": "Gp0127651_Kraken2 Krona HTML report", - "description": "Kraken2 Krona HTML report for Gp0127651", - "data_object_type": "Kraken2 Krona Plot", - "url": "https://data.microbiomedata.org/data/nmdc:mga08hnt47/ReadbasedAnalysis/nmdc_mga08hnt47_kraken2_krona.html", - "md5_checksum": "ddba84cd45462d3a55df4ac62bb4eeb8", - "id": "nmdc:ddba84cd45462d3a55df4ac62bb4eeb8", - "file_size_bytes": 3988966 + "name": "Gp0127651_Filtered Stats", + "description": "Filtered Stats for Gp0127651", + "data_object_type": "QC Statistics", + "url": "https://data.microbiomedata.org/data/nmdc:mga08hnt47/qa/nmdc_mga08hnt47_filterStats.txt", + "md5_checksum": "92cb49efbff5d5977e00dbad1c4d0d9f", + "id": "nmdc:92cb49efbff5d5977e00dbad1c4d0d9f", + "file_size_bytes": 283 }, { "name": "Gp0127651_Gottcha2 TSV report", @@ -33387,39 +28429,7 @@ "collecting_biosamples_from_site_set": [], "date_created": null, "etl_software_version": null, - "pooling_set": [], - "read_based_analysis_activity_set": [ - { - "_id": { - "$oid": "61e719ba833bcf838a7012b5" - }, - "has_input": [ - "nmdc:2791a196017767af3b5b21a3029799c0" - ], - "part_of": [ - "nmdc:mga08hnt47" - ], - "git_url": "https://github.com/microbiomedata/metaG/releases/tag/0.1", - "has_output": [ - "nmdc:53ee263960c39126e039656a121deb96", - "nmdc:2781b9269b8e24f49a1a301d44d0e3d5", - "nmdc:0ed808b8ce29d39c3b555e7d5bf4c274", - "nmdc:a7d8f038b87bd28843e30c5dd115704b", - "nmdc:b4cbc81c986c67c1037c8b7280924683", - "nmdc:e0c61a191258597984a05d86eaf4d71f", - "nmdc:e1cbcfa86444a4ff4e992bcb6653d18f", - "nmdc:d2e10038a40e81e81ba94f75ed1ec52c", - "nmdc:ddba84cd45462d3a55df4ac62bb4eeb8" - ], - "was_informed_by": "gold:Gp0127651", - "id": "nmdc:158a1d28cb7b14fdffb0f092a644b0f6", - "execution_resource": "NERSC-Cori", - "name": "ReadBased Analysis Activity for nmdc:mga08hnt47", - "started_at_time": "2021-10-11T02:27:15Z", - "type": "nmdc:ReadbasedAnalysis", - "ended_at_time": "2021-10-11T03:57:48+00:00" - } - ] + "pooling_set": [] }, { "functional_annotation_agg": [], @@ -33429,6 +28439,13 @@ "activity_set": [], "biosample_set": [], "data_object_set": [ + { + "description": "Raw sequencer read data", + "file_size_bytes": 2116898122, + "type": "nmdc:DataObject", + "id": "jgi:574fde947ded5e3df1ee1429", + "name": "10533.2.165322.GTCTCCT-AAGGAGA.fastq.gz" + }, { "name": "Gp0127655_Filtered Reads", "description": "Filtered Reads for Gp0127655", @@ -33526,85 +28543,6 @@ "id": "nmdc:157f7672690ba8207808cc4386ff10a4", "file_size_bytes": 3946317 }, - { - "name": "Gp0127655_Gottcha2 TSV report", - "description": "Gottcha2 TSV report for Gp0127655", - "url": "https://data.microbiomedata.org/data/nmdc:mga0317978/ReadbasedAnalysis/nmdc_mga0317978_gottcha2_report.tsv", - "md5_checksum": "46371c7bc8259e459f975f915aaac26f", - "id": "nmdc:46371c7bc8259e459f975f915aaac26f", - "file_size_bytes": 2178 - }, - { - "name": "Gp0127655_Gottcha2 full TSV report", - "description": "Gottcha2 full TSV report for Gp0127655", - "url": "https://data.microbiomedata.org/data/nmdc:mga0317978/ReadbasedAnalysis/nmdc_mga0317978_gottcha2_report_full.tsv", - "md5_checksum": "5dd9bc51105920f3f629e8106235af3b", - "id": "nmdc:5dd9bc51105920f3f629e8106235af3b", - "file_size_bytes": 697690 - }, - { - "name": "Gp0127655_Gottcha2 Krona HTML report", - "description": "Gottcha2 Krona HTML report for Gp0127655", - "data_object_type": "GOTTCHA2 Krona Plot", - "url": "https://data.microbiomedata.org/data/nmdc:mga0317978/ReadbasedAnalysis/nmdc_mga0317978_gottcha2_krona.html", - "md5_checksum": "1879e0e9af6d568ac9c7ffdb47fc7f12", - "id": "nmdc:1879e0e9af6d568ac9c7ffdb47fc7f12", - "file_size_bytes": 231103 - }, - { - "name": "Gp0127655_Centrifuge classification TSV report", - "description": "Centrifuge classification TSV report for Gp0127655", - "data_object_type": "Centrifuge Taxonomic Classification", - "url": "https://data.microbiomedata.org/data/nmdc:mga0317978/ReadbasedAnalysis/nmdc_mga0317978_centrifuge_classification.tsv", - "md5_checksum": "e3f410adc2347396abfdec2a848000d9", - "id": "nmdc:e3f410adc2347396abfdec2a848000d9", - "file_size_bytes": 1676897166 - }, - { - "name": "Gp0127655_Centrifuge TSV report", - "description": "Centrifuge TSV report for Gp0127655", - "data_object_type": "Centrifuge Classification Report", - "url": "https://data.microbiomedata.org/data/nmdc:mga0317978/ReadbasedAnalysis/nmdc_mga0317978_centrifuge_report.tsv", - "md5_checksum": "ed6c4f17d6ae759487164ca8ed5edf45", - "id": "nmdc:ed6c4f17d6ae759487164ca8ed5edf45", - "file_size_bytes": 253692 - }, - { - "name": "Gp0127655_Centrifuge Krona HTML report", - "description": "Centrifuge Krona HTML report for Gp0127655", - "data_object_type": "Centrifuge Krona Plot", - "url": "https://data.microbiomedata.org/data/nmdc:mga0317978/ReadbasedAnalysis/nmdc_mga0317978_centrifuge_krona.html", - "md5_checksum": "6d54f73f251de1bd5c4ca8665f098ac0", - "id": "nmdc:6d54f73f251de1bd5c4ca8665f098ac0", - "file_size_bytes": 2329422 - }, - { - "name": "Gp0127655_Kraken classification TSV report", - "description": "Kraken classification TSV report for Gp0127655", - "data_object_type": "Kraken2 Taxonomic Classification", - "url": "https://data.microbiomedata.org/data/nmdc:mga0317978/ReadbasedAnalysis/nmdc_mga0317978_kraken2_classification.tsv", - "md5_checksum": "1d4f5a605d4549801fda16da567efe56", - "id": "nmdc:1d4f5a605d4549801fda16da567efe56", - "file_size_bytes": 1336793184 - }, - { - "name": "Gp0127655_Kraken2 TSV report", - "description": "Kraken2 TSV report for Gp0127655", - "data_object_type": "Kraken2 Classification Report", - "url": "https://data.microbiomedata.org/data/nmdc:mga0317978/ReadbasedAnalysis/nmdc_mga0317978_kraken2_report.tsv", - "md5_checksum": "8bb5c66575c7c953719ae9947600ad49", - "id": "nmdc:8bb5c66575c7c953719ae9947600ad49", - "file_size_bytes": 632192 - }, - { - "name": "Gp0127655_Kraken2 Krona HTML report", - "description": "Kraken2 Krona HTML report for Gp0127655", - "data_object_type": "Kraken2 Krona Plot", - "url": "https://data.microbiomedata.org/data/nmdc:mga0317978/ReadbasedAnalysis/nmdc_mga0317978_kraken2_krona.html", - "md5_checksum": "157f7672690ba8207808cc4386ff10a4", - "id": "nmdc:157f7672690ba8207808cc4386ff10a4", - "file_size_bytes": 3946317 - }, { "name": "Gp0127655_Assembled contigs fasta", "description": "Assembled contigs fasta for Gp0127655", @@ -34050,39 +28988,7 @@ "collecting_biosamples_from_site_set": [], "date_created": null, "etl_software_version": null, - "pooling_set": [], - "read_based_analysis_activity_set": [ - { - "_id": { - "$oid": "61e719df833bcf838a701627" - }, - "has_input": [ - "nmdc:04b9014981f7035c39bd7f870613ed93" - ], - "part_of": [ - "nmdc:mga0317978" - ], - "git_url": "https://github.com/microbiomedata/metaG/releases/tag/0.1", - "has_output": [ - "nmdc:46371c7bc8259e459f975f915aaac26f", - "nmdc:5dd9bc51105920f3f629e8106235af3b", - "nmdc:1879e0e9af6d568ac9c7ffdb47fc7f12", - "nmdc:e3f410adc2347396abfdec2a848000d9", - "nmdc:ed6c4f17d6ae759487164ca8ed5edf45", - "nmdc:6d54f73f251de1bd5c4ca8665f098ac0", - "nmdc:1d4f5a605d4549801fda16da567efe56", - "nmdc:8bb5c66575c7c953719ae9947600ad49", - "nmdc:157f7672690ba8207808cc4386ff10a4" - ], - "was_informed_by": "gold:Gp0127655", - "id": "nmdc:65af38817454a315aeb8c67ab27e1469", - "execution_resource": "NERSC-Cori", - "name": "ReadBased Analysis Activity for nmdc:mga0317978", - "started_at_time": "2021-10-11T02:23:42Z", - "type": "nmdc:ReadbasedAnalysis", - "ended_at_time": "2021-10-11T03:21:25+00:00" - } - ] + "pooling_set": [] }, { "functional_annotation_agg": [], @@ -34092,6 +28998,13 @@ "activity_set": [], "biosample_set": [], "data_object_set": [ + { + "description": "Raw sequencer read data", + "file_size_bytes": 1827996307, + "type": "nmdc:DataObject", + "id": "jgi:574fde937ded5e3df1ee1428", + "name": "10533.2.165322.CCTTCCT-AAGGAAG.fastq.gz" + }, { "name": "Gp0127653_Filtered Reads", "description": "Filtered Reads for Gp0127653", @@ -34189,85 +29102,6 @@ "id": "nmdc:4c1aae1a46e51359f9146e48fff0e7f0", "file_size_bytes": 3982485 }, - { - "name": "Gp0127653_Gottcha2 TSV report", - "description": "Gottcha2 TSV report for Gp0127653", - "url": "https://data.microbiomedata.org/data/nmdc:mga079y988/ReadbasedAnalysis/nmdc_mga079y988_gottcha2_report.tsv", - "md5_checksum": "fbbad3e21108a372e3d53c9ee8fc3cd5", - "id": "nmdc:fbbad3e21108a372e3d53c9ee8fc3cd5", - "file_size_bytes": 3812 - }, - { - "name": "Gp0127653_Gottcha2 full TSV report", - "description": "Gottcha2 full TSV report for Gp0127653", - "url": "https://data.microbiomedata.org/data/nmdc:mga079y988/ReadbasedAnalysis/nmdc_mga079y988_gottcha2_report_full.tsv", - "md5_checksum": "dbf03e26f7e1529762830161fe1f1906", - "id": "nmdc:dbf03e26f7e1529762830161fe1f1906", - "file_size_bytes": 857087 - }, - { - "name": "Gp0127653_Gottcha2 Krona HTML report", - "description": "Gottcha2 Krona HTML report for Gp0127653", - "data_object_type": "GOTTCHA2 Krona Plot", - "url": "https://data.microbiomedata.org/data/nmdc:mga079y988/ReadbasedAnalysis/nmdc_mga079y988_gottcha2_krona.html", - "md5_checksum": "284ce1b28b8964cb525025d678277dba", - "id": "nmdc:284ce1b28b8964cb525025d678277dba", - "file_size_bytes": 235621 - }, - { - "name": "Gp0127653_Centrifuge classification TSV report", - "description": "Centrifuge classification TSV report for Gp0127653", - "data_object_type": "Centrifuge Taxonomic Classification", - "url": "https://data.microbiomedata.org/data/nmdc:mga079y988/ReadbasedAnalysis/nmdc_mga079y988_centrifuge_classification.tsv", - "md5_checksum": "a379527f61806391e42b3512146013a8", - "id": "nmdc:a379527f61806391e42b3512146013a8", - "file_size_bytes": 1437707313 - }, - { - "name": "Gp0127653_Centrifuge TSV report", - "description": "Centrifuge TSV report for Gp0127653", - "data_object_type": "Centrifuge Classification Report", - "url": "https://data.microbiomedata.org/data/nmdc:mga079y988/ReadbasedAnalysis/nmdc_mga079y988_centrifuge_report.tsv", - "md5_checksum": "3659ac6c99dea0fb1385c58eac8b1335", - "id": "nmdc:3659ac6c99dea0fb1385c58eac8b1335", - "file_size_bytes": 255105 - }, - { - "name": "Gp0127653_Centrifuge Krona HTML report", - "description": "Centrifuge Krona HTML report for Gp0127653", - "data_object_type": "Centrifuge Krona Plot", - "url": "https://data.microbiomedata.org/data/nmdc:mga079y988/ReadbasedAnalysis/nmdc_mga079y988_centrifuge_krona.html", - "md5_checksum": "3219058371bf2f8081b2dd2b434ec145", - "id": "nmdc:3219058371bf2f8081b2dd2b434ec145", - "file_size_bytes": 2327985 - }, - { - "name": "Gp0127653_Kraken classification TSV report", - "description": "Kraken classification TSV report for Gp0127653", - "data_object_type": "Kraken2 Taxonomic Classification", - "url": "https://data.microbiomedata.org/data/nmdc:mga079y988/ReadbasedAnalysis/nmdc_mga079y988_kraken2_classification.tsv", - "md5_checksum": "be29ebcd7358653afec7381f9ca43431", - "id": "nmdc:be29ebcd7358653afec7381f9ca43431", - "file_size_bytes": 1164013677 - }, - { - "name": "Gp0127653_Kraken2 TSV report", - "description": "Kraken2 TSV report for Gp0127653", - "data_object_type": "Kraken2 Classification Report", - "url": "https://data.microbiomedata.org/data/nmdc:mga079y988/ReadbasedAnalysis/nmdc_mga079y988_kraken2_report.tsv", - "md5_checksum": "a9e6ab6db23ddce02317e3e21ea3f618", - "id": "nmdc:a9e6ab6db23ddce02317e3e21ea3f618", - "file_size_bytes": 638368 - }, - { - "name": "Gp0127653_Kraken2 Krona HTML report", - "description": "Kraken2 Krona HTML report for Gp0127653", - "data_object_type": "Kraken2 Krona Plot", - "url": "https://data.microbiomedata.org/data/nmdc:mga079y988/ReadbasedAnalysis/nmdc_mga079y988_kraken2_krona.html", - "md5_checksum": "4c1aae1a46e51359f9146e48fff0e7f0", - "id": "nmdc:4c1aae1a46e51359f9146e48fff0e7f0", - "file_size_bytes": 3982485 - }, { "name": "Gp0127653_Assembled contigs fasta", "description": "Assembled contigs fasta for Gp0127653", @@ -34658,38 +29492,6 @@ "collecting_biosamples_from_site_set": [], "date_created": null, "etl_software_version": null, - "pooling_set": [], - "read_based_analysis_activity_set": [ - { - "_id": { - "$oid": "61e719b9833bcf838a70124b" - }, - "has_input": [ - "nmdc:8eec0e9c14abb418b906504d1675ecc5" - ], - "part_of": [ - "nmdc:mga079y988" - ], - "git_url": "https://github.com/microbiomedata/metaG/releases/tag/0.1", - "has_output": [ - "nmdc:fbbad3e21108a372e3d53c9ee8fc3cd5", - "nmdc:dbf03e26f7e1529762830161fe1f1906", - "nmdc:284ce1b28b8964cb525025d678277dba", - "nmdc:a379527f61806391e42b3512146013a8", - "nmdc:3659ac6c99dea0fb1385c58eac8b1335", - "nmdc:3219058371bf2f8081b2dd2b434ec145", - "nmdc:be29ebcd7358653afec7381f9ca43431", - "nmdc:a9e6ab6db23ddce02317e3e21ea3f618", - "nmdc:4c1aae1a46e51359f9146e48fff0e7f0" - ], - "was_informed_by": "gold:Gp0127653", - "id": "nmdc:676183a611b251dc6b8f0b8ca600181b", - "execution_resource": "NERSC-Cori", - "name": "ReadBased Analysis Activity for nmdc:mga079y988", - "started_at_time": "2021-10-11T02:23:35Z", - "type": "nmdc:ReadbasedAnalysis", - "ended_at_time": "2021-11-13T18:52:13+00:00" - } - ] + "pooling_set": [] } ] \ No newline at end of file From f33b6cea7cf99d5c85ee0c6b89e8338906395484 Mon Sep 17 00:00:00 2001 From: Michal Babinski Date: Tue, 7 Nov 2023 12:44:27 -0800 Subject: [PATCH 38/91] updated with reabased_taxonomy --- ...sty-11-aygzgv51_assocated_record_dump.json | 6552 ++--------------- 1 file changed, 501 insertions(+), 6051 deletions(-) diff --git a/nmdc_automation/re_iding/scripts/nmdc:sty-11-aygzgv51_assocated_record_dump.json b/nmdc_automation/re_iding/scripts/nmdc:sty-11-aygzgv51_assocated_record_dump.json index 37a912d9..a23b6caa 100644 --- a/nmdc_automation/re_iding/scripts/nmdc:sty-11-aygzgv51_assocated_record_dump.json +++ b/nmdc_automation/re_iding/scripts/nmdc:sty-11-aygzgv51_assocated_record_dump.json @@ -104,85 +104,6 @@ "id": "nmdc:94ee1bc2dc74830a21d5c3471d6cf223", "file_size_bytes": 4221977 }, - { - "name": "Gp0115663_Gottcha2 TSV report", - "description": "Gottcha2 TSV report for Gp0115663", - "url": "https://data.microbiomedata.org/data/nmdc:mga0h9dt75/ReadbasedAnalysis/nmdc_mga0h9dt75_gottcha2_report.tsv", - "md5_checksum": "bc7c1bda004aab357c8f6cf5a42242f9", - "id": "nmdc:bc7c1bda004aab357c8f6cf5a42242f9", - "file_size_bytes": 13174 - }, - { - "name": "Gp0115663_Gottcha2 full TSV report", - "description": "Gottcha2 full TSV report for Gp0115663", - "url": "https://data.microbiomedata.org/data/nmdc:mga0h9dt75/ReadbasedAnalysis/nmdc_mga0h9dt75_gottcha2_report_full.tsv", - "md5_checksum": "9481434cadd0d6c154e2ec4c11ef0e04", - "id": "nmdc:9481434cadd0d6c154e2ec4c11ef0e04", - "file_size_bytes": 1035818 - }, - { - "name": "Gp0115663_Gottcha2 Krona HTML report", - "description": "Gottcha2 Krona HTML report for Gp0115663", - "data_object_type": "GOTTCHA2 Krona Plot", - "url": "https://data.microbiomedata.org/data/nmdc:mga0h9dt75/ReadbasedAnalysis/nmdc_mga0h9dt75_gottcha2_krona.html", - "md5_checksum": "6b5bc6ce7f11c1336a5f85a98fc18541", - "id": "nmdc:6b5bc6ce7f11c1336a5f85a98fc18541", - "file_size_bytes": 262669 - }, - { - "name": "Gp0115663_Centrifuge classification TSV report", - "description": "Centrifuge classification TSV report for Gp0115663", - "data_object_type": "Centrifuge Taxonomic Classification", - "url": "https://data.microbiomedata.org/data/nmdc:mga0h9dt75/ReadbasedAnalysis/nmdc_mga0h9dt75_centrifuge_classification.tsv", - "md5_checksum": "933c71bbc2f4a2e84d50f0d3864cf940", - "id": "nmdc:933c71bbc2f4a2e84d50f0d3864cf940", - "file_size_bytes": 2189843623 - }, - { - "name": "Gp0115663_Centrifuge TSV report", - "description": "Centrifuge TSV report for Gp0115663", - "data_object_type": "Centrifuge Classification Report", - "url": "https://data.microbiomedata.org/data/nmdc:mga0h9dt75/ReadbasedAnalysis/nmdc_mga0h9dt75_centrifuge_report.tsv", - "md5_checksum": "1a208e2519770ef50740ac39f1b9ba9a", - "id": "nmdc:1a208e2519770ef50740ac39f1b9ba9a", - "file_size_bytes": 260134 - }, - { - "name": "Gp0115663_Centrifuge Krona HTML report", - "description": "Centrifuge Krona HTML report for Gp0115663", - "data_object_type": "Centrifuge Krona Plot", - "url": "https://data.microbiomedata.org/data/nmdc:mga0h9dt75/ReadbasedAnalysis/nmdc_mga0h9dt75_centrifuge_krona.html", - "md5_checksum": "f112a3840464ae7a9cf4a3bf295edd5c", - "id": "nmdc:f112a3840464ae7a9cf4a3bf295edd5c", - "file_size_bytes": 2343980 - }, - { - "name": "Gp0115663_Kraken classification TSV report", - "description": "Kraken classification TSV report for Gp0115663", - "data_object_type": "Kraken2 Taxonomic Classification", - "url": "https://data.microbiomedata.org/data/nmdc:mga0h9dt75/ReadbasedAnalysis/nmdc_mga0h9dt75_kraken2_classification.tsv", - "md5_checksum": "7ca01ea379f0baed96f87d1435925f95", - "id": "nmdc:7ca01ea379f0baed96f87d1435925f95", - "file_size_bytes": 1785563917 - }, - { - "name": "Gp0115663_Kraken2 TSV report", - "description": "Kraken2 TSV report for Gp0115663", - "data_object_type": "Kraken2 Classification Report", - "url": "https://data.microbiomedata.org/data/nmdc:mga0h9dt75/ReadbasedAnalysis/nmdc_mga0h9dt75_kraken2_report.tsv", - "md5_checksum": "c85f2f2b4a518c4adb23970448a5cb45", - "id": "nmdc:c85f2f2b4a518c4adb23970448a5cb45", - "file_size_bytes": 699896 - }, - { - "name": "Gp0115663_Kraken2 Krona HTML report", - "description": "Kraken2 Krona HTML report for Gp0115663", - "data_object_type": "Kraken2 Krona Plot", - "url": "https://data.microbiomedata.org/data/nmdc:mga0h9dt75/ReadbasedAnalysis/nmdc_mga0h9dt75_kraken2_krona.html", - "md5_checksum": "94ee1bc2dc74830a21d5c3471d6cf223", - "id": "nmdc:94ee1bc2dc74830a21d5c3471d6cf223", - "file_size_bytes": 4221977 - }, { "name": "Gp0115663_Assembled contigs fasta", "description": "Assembled contigs fasta for Gp0115663", @@ -707,39 +628,7 @@ "collecting_biosamples_from_site_set": [], "date_created": null, "etl_software_version": null, - "pooling_set": [], - "read_based_analysis_activity_set": [ - { - "_id": { - "$oid": "61e71a31833bcf838a701ec1" - }, - "has_input": [ - "nmdc:7bf778baef033d36f118f8591256d6ef" - ], - "part_of": [ - "nmdc:mga0h9dt75" - ], - "git_url": "https://github.com/microbiomedata/metaG/releases/tag/0.1", - "has_output": [ - "nmdc:bc7c1bda004aab357c8f6cf5a42242f9", - "nmdc:9481434cadd0d6c154e2ec4c11ef0e04", - "nmdc:6b5bc6ce7f11c1336a5f85a98fc18541", - "nmdc:933c71bbc2f4a2e84d50f0d3864cf940", - "nmdc:1a208e2519770ef50740ac39f1b9ba9a", - "nmdc:f112a3840464ae7a9cf4a3bf295edd5c", - "nmdc:7ca01ea379f0baed96f87d1435925f95", - "nmdc:c85f2f2b4a518c4adb23970448a5cb45", - "nmdc:94ee1bc2dc74830a21d5c3471d6cf223" - ], - "was_informed_by": "gold:Gp0115663", - "id": "nmdc:b31abf9d7fe53e2f802bb53e2d13542b", - "execution_resource": "NERSC-Cori", - "name": "ReadBased Analysis Activity for nmdc:mga0h9dt75", - "started_at_time": "2021-10-11T02:28:26Z", - "type": "nmdc:ReadbasedAnalysis", - "ended_at_time": "2021-10-11T04:56:04+00:00" - } - ] + "pooling_set": [] }, { "functional_annotation_agg": [], @@ -846,85 +735,6 @@ "id": "nmdc:b5091cfeed4fbea8316e50fbceea89bc", "file_size_bytes": 3983935 }, - { - "name": "Gp0115666_Gottcha2 TSV report", - "description": "Gottcha2 TSV report for Gp0115666", - "url": "https://data.microbiomedata.org/data/nmdc:mga0eehe16/ReadbasedAnalysis/nmdc_mga0eehe16_gottcha2_report.tsv", - "md5_checksum": "17454627f873cc37e80700c4751c81d6", - "id": "nmdc:17454627f873cc37e80700c4751c81d6", - "file_size_bytes": 10721 - }, - { - "name": "Gp0115666_Gottcha2 full TSV report", - "description": "Gottcha2 full TSV report for Gp0115666", - "url": "https://data.microbiomedata.org/data/nmdc:mga0eehe16/ReadbasedAnalysis/nmdc_mga0eehe16_gottcha2_report_full.tsv", - "md5_checksum": "e0479eb7fd3345aaf134640e0b9e11b0", - "id": "nmdc:e0479eb7fd3345aaf134640e0b9e11b0", - "file_size_bytes": 920924 - }, - { - "name": "Gp0115666_Gottcha2 Krona HTML report", - "description": "Gottcha2 Krona HTML report for Gp0115666", - "data_object_type": "GOTTCHA2 Krona Plot", - "url": "https://data.microbiomedata.org/data/nmdc:mga0eehe16/ReadbasedAnalysis/nmdc_mga0eehe16_gottcha2_krona.html", - "md5_checksum": "a8433a0b17d7380fc836e4c9f85a7a54", - "id": "nmdc:a8433a0b17d7380fc836e4c9f85a7a54", - "file_size_bytes": 257441 - }, - { - "name": "Gp0115666_Centrifuge classification TSV report", - "description": "Centrifuge classification TSV report for Gp0115666", - "data_object_type": "Centrifuge Taxonomic Classification", - "url": "https://data.microbiomedata.org/data/nmdc:mga0eehe16/ReadbasedAnalysis/nmdc_mga0eehe16_centrifuge_classification.tsv", - "md5_checksum": "9e061ad19d4a6a3f209d1992d02df9f9", - "id": "nmdc:9e061ad19d4a6a3f209d1992d02df9f9", - "file_size_bytes": 1468295025 - }, - { - "name": "Gp0115666_Centrifuge TSV report", - "description": "Centrifuge TSV report for Gp0115666", - "data_object_type": "Centrifuge Classification Report", - "url": "https://data.microbiomedata.org/data/nmdc:mga0eehe16/ReadbasedAnalysis/nmdc_mga0eehe16_centrifuge_report.tsv", - "md5_checksum": "1d46eebd0f194f57dd9e92c9bc992891", - "id": "nmdc:1d46eebd0f194f57dd9e92c9bc992891", - "file_size_bytes": 257081 - }, - { - "name": "Gp0115666_Centrifuge Krona HTML report", - "description": "Centrifuge Krona HTML report for Gp0115666", - "data_object_type": "Centrifuge Krona Plot", - "url": "https://data.microbiomedata.org/data/nmdc:mga0eehe16/ReadbasedAnalysis/nmdc_mga0eehe16_centrifuge_krona.html", - "md5_checksum": "e5227b1cfdbc266c44d23028c92150a9", - "id": "nmdc:e5227b1cfdbc266c44d23028c92150a9", - "file_size_bytes": 2331968 - }, - { - "name": "Gp0115666_Kraken classification TSV report", - "description": "Kraken classification TSV report for Gp0115666", - "data_object_type": "Kraken2 Taxonomic Classification", - "url": "https://data.microbiomedata.org/data/nmdc:mga0eehe16/ReadbasedAnalysis/nmdc_mga0eehe16_kraken2_classification.tsv", - "md5_checksum": "05f7680c6646904cfb16fc146c0fed4a", - "id": "nmdc:05f7680c6646904cfb16fc146c0fed4a", - "file_size_bytes": 1204548180 - }, - { - "name": "Gp0115666_Kraken2 TSV report", - "description": "Kraken2 TSV report for Gp0115666", - "data_object_type": "Kraken2 Classification Report", - "url": "https://data.microbiomedata.org/data/nmdc:mga0eehe16/ReadbasedAnalysis/nmdc_mga0eehe16_kraken2_report.tsv", - "md5_checksum": "368cf81424348cdf46d17c13908280e7", - "id": "nmdc:368cf81424348cdf46d17c13908280e7", - "file_size_bytes": 653697 - }, - { - "name": "Gp0115666_Kraken2 Krona HTML report", - "description": "Kraken2 Krona HTML report for Gp0115666", - "data_object_type": "Kraken2 Krona Plot", - "url": "https://data.microbiomedata.org/data/nmdc:mga0eehe16/ReadbasedAnalysis/nmdc_mga0eehe16_kraken2_krona.html", - "md5_checksum": "b5091cfeed4fbea8316e50fbceea89bc", - "id": "nmdc:b5091cfeed4fbea8316e50fbceea89bc", - "file_size_bytes": 3983935 - }, { "name": "Gp0115666_Assembled contigs fasta", "description": "Assembled contigs fasta for Gp0115666", @@ -1389,39 +1199,7 @@ "collecting_biosamples_from_site_set": [], "date_created": null, "etl_software_version": null, - "pooling_set": [], - "read_based_analysis_activity_set": [ - { - "_id": { - "$oid": "61e71a15833bcf838a701c88" - }, - "has_input": [ - "nmdc:0b301d2dd917c2be31422dd0e986dd5e" - ], - "part_of": [ - "nmdc:mga0eehe16" - ], - "git_url": "https://github.com/microbiomedata/metaG/releases/tag/0.1", - "has_output": [ - "nmdc:17454627f873cc37e80700c4751c81d6", - "nmdc:e0479eb7fd3345aaf134640e0b9e11b0", - "nmdc:a8433a0b17d7380fc836e4c9f85a7a54", - "nmdc:9e061ad19d4a6a3f209d1992d02df9f9", - "nmdc:1d46eebd0f194f57dd9e92c9bc992891", - "nmdc:e5227b1cfdbc266c44d23028c92150a9", - "nmdc:05f7680c6646904cfb16fc146c0fed4a", - "nmdc:368cf81424348cdf46d17c13908280e7", - "nmdc:b5091cfeed4fbea8316e50fbceea89bc" - ], - "was_informed_by": "gold:Gp0115666", - "id": "nmdc:db181675b157d27d9b0b2f35b5cbf03e", - "execution_resource": "NERSC-Cori", - "name": "ReadBased Analysis Activity for nmdc:mga0eehe16", - "started_at_time": "2021-10-11T02:28:09Z", - "type": "nmdc:ReadbasedAnalysis", - "ended_at_time": "2021-10-11T04:06:19+00:00" - } - ] + "pooling_set": [] }, { "functional_annotation_agg": [], @@ -1529,109 +1307,30 @@ "file_size_bytes": 4276256 }, { - "name": "Gp0115668_Gottcha2 TSV report", - "description": "Gottcha2 TSV report for Gp0115668", - "url": "https://data.microbiomedata.org/data/nmdc:mga0n66h21/ReadbasedAnalysis/nmdc_mga0n66h21_gottcha2_report.tsv", - "md5_checksum": "8bdf8bbee24242aaaee763c1d851c05e", - "id": "nmdc:8bdf8bbee24242aaaee763c1d851c05e", - "file_size_bytes": 13875 + "name": "Gp0115668_Assembled contigs fasta", + "description": "Assembled contigs fasta for Gp0115668", + "data_object_type": "Assembly Contigs", + "url": "https://data.microbiomedata.org/data/nmdc:mga0n66h21/assembly/nmdc_mga0n66h21_contigs.fna", + "md5_checksum": "b2b862aede4f333acec79aac3afc7254", + "id": "nmdc:b2b862aede4f333acec79aac3afc7254", + "file_size_bytes": 182488593 }, { - "name": "Gp0115668_Gottcha2 full TSV report", - "description": "Gottcha2 full TSV report for Gp0115668", - "url": "https://data.microbiomedata.org/data/nmdc:mga0n66h21/ReadbasedAnalysis/nmdc_mga0n66h21_gottcha2_report_full.tsv", - "md5_checksum": "2529ede10eb159148711d016ec022af3", - "id": "nmdc:2529ede10eb159148711d016ec022af3", - "file_size_bytes": 956974 + "name": "Gp0115668_Assembled scaffold fasta", + "description": "Assembled scaffold fasta for Gp0115668", + "data_object_type": "Assembly Scaffolds", + "url": "https://data.microbiomedata.org/data/nmdc:mga0n66h21/assembly/nmdc_mga0n66h21_scaffolds.fna", + "md5_checksum": "15d4494dad1e12523aa9afb56b1e7cdb", + "id": "nmdc:15d4494dad1e12523aa9afb56b1e7cdb", + "file_size_bytes": 181514952 }, { - "name": "Gp0115668_Gottcha2 Krona HTML report", - "description": "Gottcha2 Krona HTML report for Gp0115668", - "data_object_type": "GOTTCHA2 Krona Plot", - "url": "https://data.microbiomedata.org/data/nmdc:mga0n66h21/ReadbasedAnalysis/nmdc_mga0n66h21_gottcha2_krona.html", - "md5_checksum": "a0631ed87dc2e7c69355ef575dbe4e60", - "id": "nmdc:a0631ed87dc2e7c69355ef575dbe4e60", - "file_size_bytes": 265076 - }, - { - "name": "Gp0115668_Centrifuge classification TSV report", - "description": "Centrifuge classification TSV report for Gp0115668", - "data_object_type": "Centrifuge Taxonomic Classification", - "url": "https://data.microbiomedata.org/data/nmdc:mga0n66h21/ReadbasedAnalysis/nmdc_mga0n66h21_centrifuge_classification.tsv", - "md5_checksum": "93d26b69073bd4d6283aee3c7e5997d4", - "id": "nmdc:93d26b69073bd4d6283aee3c7e5997d4", - "file_size_bytes": 2377445510 - }, - { - "name": "Gp0115668_Centrifuge TSV report", - "description": "Centrifuge TSV report for Gp0115668", - "data_object_type": "Centrifuge Classification Report", - "url": "https://data.microbiomedata.org/data/nmdc:mga0n66h21/ReadbasedAnalysis/nmdc_mga0n66h21_centrifuge_report.tsv", - "md5_checksum": "d7a49bf0d9797a2b603643a2de896b5c", - "id": "nmdc:d7a49bf0d9797a2b603643a2de896b5c", - "file_size_bytes": 258291 - }, - { - "name": "Gp0115668_Centrifuge Krona HTML report", - "description": "Centrifuge Krona HTML report for Gp0115668", - "data_object_type": "Centrifuge Krona Plot", - "url": "https://data.microbiomedata.org/data/nmdc:mga0n66h21/ReadbasedAnalysis/nmdc_mga0n66h21_centrifuge_krona.html", - "md5_checksum": "890f9f52d828e1ea8277b52566763069", - "id": "nmdc:890f9f52d828e1ea8277b52566763069", - "file_size_bytes": 2333775 - }, - { - "name": "Gp0115668_Kraken classification TSV report", - "description": "Kraken classification TSV report for Gp0115668", - "data_object_type": "Kraken2 Taxonomic Classification", - "url": "https://data.microbiomedata.org/data/nmdc:mga0n66h21/ReadbasedAnalysis/nmdc_mga0n66h21_kraken2_classification.tsv", - "md5_checksum": "371b7fabbcbc2d22c3ca84b422a88863", - "id": "nmdc:371b7fabbcbc2d22c3ca84b422a88863", - "file_size_bytes": 1966520263 - }, - { - "name": "Gp0115668_Kraken2 TSV report", - "description": "Kraken2 TSV report for Gp0115668", - "data_object_type": "Kraken2 Classification Report", - "url": "https://data.microbiomedata.org/data/nmdc:mga0n66h21/ReadbasedAnalysis/nmdc_mga0n66h21_kraken2_report.tsv", - "md5_checksum": "8677985c5e8ad92dd6d051f85950a636", - "id": "nmdc:8677985c5e8ad92dd6d051f85950a636", - "file_size_bytes": 707661 - }, - { - "name": "Gp0115668_Kraken2 Krona HTML report", - "description": "Kraken2 Krona HTML report for Gp0115668", - "data_object_type": "Kraken2 Krona Plot", - "url": "https://data.microbiomedata.org/data/nmdc:mga0n66h21/ReadbasedAnalysis/nmdc_mga0n66h21_kraken2_krona.html", - "md5_checksum": "9b2f355a4c2ff3651a3d1179212e2914", - "id": "nmdc:9b2f355a4c2ff3651a3d1179212e2914", - "file_size_bytes": 4276256 - }, - { - "name": "Gp0115668_Assembled contigs fasta", - "description": "Assembled contigs fasta for Gp0115668", - "data_object_type": "Assembly Contigs", - "url": "https://data.microbiomedata.org/data/nmdc:mga0n66h21/assembly/nmdc_mga0n66h21_contigs.fna", - "md5_checksum": "b2b862aede4f333acec79aac3afc7254", - "id": "nmdc:b2b862aede4f333acec79aac3afc7254", - "file_size_bytes": 182488593 - }, - { - "name": "Gp0115668_Assembled scaffold fasta", - "description": "Assembled scaffold fasta for Gp0115668", - "data_object_type": "Assembly Scaffolds", - "url": "https://data.microbiomedata.org/data/nmdc:mga0n66h21/assembly/nmdc_mga0n66h21_scaffolds.fna", - "md5_checksum": "15d4494dad1e12523aa9afb56b1e7cdb", - "id": "nmdc:15d4494dad1e12523aa9afb56b1e7cdb", - "file_size_bytes": 181514952 - }, - { - "name": "Gp0115668_Metagenome Contig Coverage Stats", - "description": "Metagenome Contig Coverage Stats for Gp0115668", - "url": "https://data.microbiomedata.org/data/nmdc:mga0n66h21/assembly/nmdc_mga0n66h21_covstats.txt", - "md5_checksum": "6ccb798d615b67dfb9c64ff32d6586c4", - "id": "nmdc:6ccb798d615b67dfb9c64ff32d6586c4", - "file_size_bytes": 25682298 + "name": "Gp0115668_Metagenome Contig Coverage Stats", + "description": "Metagenome Contig Coverage Stats for Gp0115668", + "url": "https://data.microbiomedata.org/data/nmdc:mga0n66h21/assembly/nmdc_mga0n66h21_covstats.txt", + "md5_checksum": "6ccb798d615b67dfb9c64ff32d6586c4", + "id": "nmdc:6ccb798d615b67dfb9c64ff32d6586c4", + "file_size_bytes": 25682298 }, { "name": "Gp0115668_Assembled AGP file", @@ -2093,39 +1792,7 @@ "collecting_biosamples_from_site_set": [], "date_created": null, "etl_software_version": null, - "pooling_set": [], - "read_based_analysis_activity_set": [ - { - "_id": { - "$oid": "61e71a4d833bcf838a7021ce" - }, - "has_input": [ - "nmdc:121b1c25e803f2a010ae5a2206a8d1d2" - ], - "part_of": [ - "nmdc:mga0n66h21" - ], - "git_url": "https://github.com/microbiomedata/metaG/releases/tag/0.1", - "has_output": [ - "nmdc:8bdf8bbee24242aaaee763c1d851c05e", - "nmdc:2529ede10eb159148711d016ec022af3", - "nmdc:a0631ed87dc2e7c69355ef575dbe4e60", - "nmdc:93d26b69073bd4d6283aee3c7e5997d4", - "nmdc:d7a49bf0d9797a2b603643a2de896b5c", - "nmdc:890f9f52d828e1ea8277b52566763069", - "nmdc:371b7fabbcbc2d22c3ca84b422a88863", - "nmdc:8677985c5e8ad92dd6d051f85950a636", - "nmdc:9b2f355a4c2ff3651a3d1179212e2914" - ], - "was_informed_by": "gold:Gp0115668", - "id": "nmdc:0aec70779dcdcc1b577d7e372c0eca87", - "execution_resource": "NERSC-Cori", - "name": "ReadBased Analysis Activity for nmdc:mga0n66h21", - "started_at_time": "2021-10-11T02:28:43Z", - "type": "nmdc:ReadbasedAnalysis", - "ended_at_time": "2021-10-11T05:19:17+00:00" - } - ] + "pooling_set": [] }, { "functional_annotation_agg": [], @@ -2232,85 +1899,6 @@ "id": "nmdc:77860ee043ae9738e7702a3f665b15fa", "file_size_bytes": 4358324 }, - { - "name": "Gp0115679_Gottcha2 TSV report", - "description": "Gottcha2 TSV report for Gp0115679", - "url": "https://data.microbiomedata.org/data/nmdc:mga0gg1q48/ReadbasedAnalysis/nmdc_mga0gg1q48_gottcha2_report.tsv", - "md5_checksum": "e20f8c00473472fa073adde871860801", - "id": "nmdc:e20f8c00473472fa073adde871860801", - "file_size_bytes": 18551 - }, - { - "name": "Gp0115679_Gottcha2 full TSV report", - "description": "Gottcha2 full TSV report for Gp0115679", - "url": "https://data.microbiomedata.org/data/nmdc:mga0gg1q48/ReadbasedAnalysis/nmdc_mga0gg1q48_gottcha2_report_full.tsv", - "md5_checksum": "52f8c91d04e8d179af98e7fac35a8ff1", - "id": "nmdc:52f8c91d04e8d179af98e7fac35a8ff1", - "file_size_bytes": 1200541 - }, - { - "name": "Gp0115679_Gottcha2 Krona HTML report", - "description": "Gottcha2 Krona HTML report for Gp0115679", - "data_object_type": "GOTTCHA2 Krona Plot", - "url": "https://data.microbiomedata.org/data/nmdc:mga0gg1q48/ReadbasedAnalysis/nmdc_mga0gg1q48_gottcha2_krona.html", - "md5_checksum": "f721d9dd168b0dea080b191a4396167e", - "id": "nmdc:f721d9dd168b0dea080b191a4396167e", - "file_size_bytes": 278990 - }, - { - "name": "Gp0115679_Centrifuge classification TSV report", - "description": "Centrifuge classification TSV report for Gp0115679", - "data_object_type": "Centrifuge Taxonomic Classification", - "url": "https://data.microbiomedata.org/data/nmdc:mga0gg1q48/ReadbasedAnalysis/nmdc_mga0gg1q48_centrifuge_classification.tsv", - "md5_checksum": "ab77e396ec643b58b54da92848b88a96", - "id": "nmdc:ab77e396ec643b58b54da92848b88a96", - "file_size_bytes": 4742886512 - }, - { - "name": "Gp0115679_Centrifuge TSV report", - "description": "Centrifuge TSV report for Gp0115679", - "data_object_type": "Centrifuge Classification Report", - "url": "https://data.microbiomedata.org/data/nmdc:mga0gg1q48/ReadbasedAnalysis/nmdc_mga0gg1q48_centrifuge_report.tsv", - "md5_checksum": "f2514844e47a9e3d268671f80f152bc1", - "id": "nmdc:f2514844e47a9e3d268671f80f152bc1", - "file_size_bytes": 266907 - }, - { - "name": "Gp0115679_Centrifuge Krona HTML report", - "description": "Centrifuge Krona HTML report for Gp0115679", - "data_object_type": "Centrifuge Krona Plot", - "url": "https://data.microbiomedata.org/data/nmdc:mga0gg1q48/ReadbasedAnalysis/nmdc_mga0gg1q48_centrifuge_krona.html", - "md5_checksum": "a3e49f39f33c54bc8d9430a947cd4b16", - "id": "nmdc:a3e49f39f33c54bc8d9430a947cd4b16", - "file_size_bytes": 2359747 - }, - { - "name": "Gp0115679_Kraken classification TSV report", - "description": "Kraken classification TSV report for Gp0115679", - "data_object_type": "Kraken2 Taxonomic Classification", - "url": "https://data.microbiomedata.org/data/nmdc:mga0gg1q48/ReadbasedAnalysis/nmdc_mga0gg1q48_kraken2_classification.tsv", - "md5_checksum": "17bc87145b0dcabbb8e3de0f393f4d4d", - "id": "nmdc:17bc87145b0dcabbb8e3de0f393f4d4d", - "file_size_bytes": 3859620862 - }, - { - "name": "Gp0115679_Kraken2 TSV report", - "description": "Kraken2 TSV report for Gp0115679", - "data_object_type": "Kraken2 Classification Report", - "url": "https://data.microbiomedata.org/data/nmdc:mga0gg1q48/ReadbasedAnalysis/nmdc_mga0gg1q48_kraken2_report.tsv", - "md5_checksum": "aecb320fdfe4c4da35c0206dd34e0f40", - "id": "nmdc:aecb320fdfe4c4da35c0206dd34e0f40", - "file_size_bytes": 729541 - }, - { - "name": "Gp0115679_Kraken2 Krona HTML report", - "description": "Kraken2 Krona HTML report for Gp0115679", - "data_object_type": "Kraken2 Krona Plot", - "url": "https://data.microbiomedata.org/data/nmdc:mga0gg1q48/ReadbasedAnalysis/nmdc_mga0gg1q48_kraken2_krona.html", - "md5_checksum": "77860ee043ae9738e7702a3f665b15fa", - "id": "nmdc:77860ee043ae9738e7702a3f665b15fa", - "file_size_bytes": 4358324 - }, { "name": "Gp0115679_Assembled contigs fasta", "description": "Assembled contigs fasta for Gp0115679", @@ -2930,39 +2518,7 @@ "collecting_biosamples_from_site_set": [], "date_created": null, "etl_software_version": null, - "pooling_set": [], - "read_based_analysis_activity_set": [ - { - "_id": { - "$oid": "61e719fa833bcf838a701935" - }, - "has_input": [ - "nmdc:7e294ff66cb7ddf84edf9c8bed576bcd" - ], - "part_of": [ - "nmdc:mga0gg1q48" - ], - "git_url": "https://github.com/microbiomedata/metaG/releases/tag/0.1", - "has_output": [ - "nmdc:e20f8c00473472fa073adde871860801", - "nmdc:52f8c91d04e8d179af98e7fac35a8ff1", - "nmdc:f721d9dd168b0dea080b191a4396167e", - "nmdc:ab77e396ec643b58b54da92848b88a96", - "nmdc:f2514844e47a9e3d268671f80f152bc1", - "nmdc:a3e49f39f33c54bc8d9430a947cd4b16", - "nmdc:17bc87145b0dcabbb8e3de0f393f4d4d", - "nmdc:aecb320fdfe4c4da35c0206dd34e0f40", - "nmdc:77860ee043ae9738e7702a3f665b15fa" - ], - "was_informed_by": "gold:Gp0115679", - "id": "nmdc:8bc23e2bd69c8af5538db3d6192a3b5b", - "execution_resource": "NERSC-Cori", - "name": "ReadBased Analysis Activity for nmdc:mga0gg1q48", - "started_at_time": "2021-10-11T02:23:30Z", - "type": "nmdc:ReadbasedAnalysis", - "ended_at_time": "2021-10-11T06:30:42+00:00" - } - ] + "pooling_set": [] }, { "functional_annotation_agg": [], @@ -3070,109 +2626,30 @@ "file_size_bytes": 3979807 }, { - "name": "Gp0115667_Gottcha2 TSV report", - "description": "Gottcha2 TSV report for Gp0115667", - "url": "https://data.microbiomedata.org/data/nmdc:mga0n0je44/ReadbasedAnalysis/nmdc_mga0n0je44_gottcha2_report.tsv", - "md5_checksum": "56edf81e5f5102edf7e416bc9430fbb6", - "id": "nmdc:56edf81e5f5102edf7e416bc9430fbb6", - "file_size_bytes": 10576 + "name": "Gp0115667_Assembled contigs fasta", + "description": "Assembled contigs fasta for Gp0115667", + "data_object_type": "Assembly Contigs", + "url": "https://data.microbiomedata.org/data/nmdc:mga0n0je44/assembly/nmdc_mga0n0je44_contigs.fna", + "md5_checksum": "b3cefc5a9599a4fb9432132baf7f5565", + "id": "nmdc:b3cefc5a9599a4fb9432132baf7f5565", + "file_size_bytes": 62926054 }, { - "name": "Gp0115667_Gottcha2 full TSV report", - "description": "Gottcha2 full TSV report for Gp0115667", - "url": "https://data.microbiomedata.org/data/nmdc:mga0n0je44/ReadbasedAnalysis/nmdc_mga0n0je44_gottcha2_report_full.tsv", - "md5_checksum": "c3d0f03afb44520ef5f2ea14e6daf705", - "id": "nmdc:c3d0f03afb44520ef5f2ea14e6daf705", - "file_size_bytes": 792905 + "name": "Gp0115667_Assembled scaffold fasta", + "description": "Assembled scaffold fasta for Gp0115667", + "data_object_type": "Assembly Scaffolds", + "url": "https://data.microbiomedata.org/data/nmdc:mga0n0je44/assembly/nmdc_mga0n0je44_scaffolds.fna", + "md5_checksum": "b60f674a01e3f7fff5ead95f330cef4f", + "id": "nmdc:b60f674a01e3f7fff5ead95f330cef4f", + "file_size_bytes": 62577490 }, { - "name": "Gp0115667_Gottcha2 Krona HTML report", - "description": "Gottcha2 Krona HTML report for Gp0115667", - "data_object_type": "GOTTCHA2 Krona Plot", - "url": "https://data.microbiomedata.org/data/nmdc:mga0n0je44/ReadbasedAnalysis/nmdc_mga0n0je44_gottcha2_krona.html", - "md5_checksum": "2afff209a40ca4895307f3a47080c534", - "id": "nmdc:2afff209a40ca4895307f3a47080c534", - "file_size_bytes": 254763 - }, - { - "name": "Gp0115667_Centrifuge classification TSV report", - "description": "Centrifuge classification TSV report for Gp0115667", - "data_object_type": "Centrifuge Taxonomic Classification", - "url": "https://data.microbiomedata.org/data/nmdc:mga0n0je44/ReadbasedAnalysis/nmdc_mga0n0je44_centrifuge_classification.tsv", - "md5_checksum": "d76c80bf15c4fd84f28c7150f24a8143", - "id": "nmdc:d76c80bf15c4fd84f28c7150f24a8143", - "file_size_bytes": 1336111813 - }, - { - "name": "Gp0115667_Centrifuge TSV report", - "description": "Centrifuge TSV report for Gp0115667", - "data_object_type": "Centrifuge Classification Report", - "url": "https://data.microbiomedata.org/data/nmdc:mga0n0je44/ReadbasedAnalysis/nmdc_mga0n0je44_centrifuge_report.tsv", - "md5_checksum": "b9d6d8a8297f9a604ac85a334a3412de", - "id": "nmdc:b9d6d8a8297f9a604ac85a334a3412de", - "file_size_bytes": 254506 - }, - { - "name": "Gp0115667_Centrifuge Krona HTML report", - "description": "Centrifuge Krona HTML report for Gp0115667", - "data_object_type": "Centrifuge Krona Plot", - "url": "https://data.microbiomedata.org/data/nmdc:mga0n0je44/ReadbasedAnalysis/nmdc_mga0n0je44_centrifuge_krona.html", - "md5_checksum": "fe4bd9f63c32f50676792e3c4adced08", - "id": "nmdc:fe4bd9f63c32f50676792e3c4adced08", - "file_size_bytes": 2323153 - }, - { - "name": "Gp0115667_Kraken classification TSV report", - "description": "Kraken classification TSV report for Gp0115667", - "data_object_type": "Kraken2 Taxonomic Classification", - "url": "https://data.microbiomedata.org/data/nmdc:mga0n0je44/ReadbasedAnalysis/nmdc_mga0n0je44_kraken2_classification.tsv", - "md5_checksum": "eb189cbf0543203d2521397b73d4d34b", - "id": "nmdc:eb189cbf0543203d2521397b73d4d34b", - "file_size_bytes": 1097852664 - }, - { - "name": "Gp0115667_Kraken2 TSV report", - "description": "Kraken2 TSV report for Gp0115667", - "data_object_type": "Kraken2 Classification Report", - "url": "https://data.microbiomedata.org/data/nmdc:mga0n0je44/ReadbasedAnalysis/nmdc_mga0n0je44_kraken2_report.tsv", - "md5_checksum": "ce3f002a824efde4a7134e6cd2e6306b", - "id": "nmdc:ce3f002a824efde4a7134e6cd2e6306b", - "file_size_bytes": 639213 - }, - { - "name": "Gp0115667_Kraken2 Krona HTML report", - "description": "Kraken2 Krona HTML report for Gp0115667", - "data_object_type": "Kraken2 Krona Plot", - "url": "https://data.microbiomedata.org/data/nmdc:mga0n0je44/ReadbasedAnalysis/nmdc_mga0n0je44_kraken2_krona.html", - "md5_checksum": "ac90bf3384ce44d097f7897ac5ff8134", - "id": "nmdc:ac90bf3384ce44d097f7897ac5ff8134", - "file_size_bytes": 3979807 - }, - { - "name": "Gp0115667_Assembled contigs fasta", - "description": "Assembled contigs fasta for Gp0115667", - "data_object_type": "Assembly Contigs", - "url": "https://data.microbiomedata.org/data/nmdc:mga0n0je44/assembly/nmdc_mga0n0je44_contigs.fna", - "md5_checksum": "b3cefc5a9599a4fb9432132baf7f5565", - "id": "nmdc:b3cefc5a9599a4fb9432132baf7f5565", - "file_size_bytes": 62926054 - }, - { - "name": "Gp0115667_Assembled scaffold fasta", - "description": "Assembled scaffold fasta for Gp0115667", - "data_object_type": "Assembly Scaffolds", - "url": "https://data.microbiomedata.org/data/nmdc:mga0n0je44/assembly/nmdc_mga0n0je44_scaffolds.fna", - "md5_checksum": "b60f674a01e3f7fff5ead95f330cef4f", - "id": "nmdc:b60f674a01e3f7fff5ead95f330cef4f", - "file_size_bytes": 62577490 - }, - { - "name": "Gp0115667_Metagenome Contig Coverage Stats", - "description": "Metagenome Contig Coverage Stats for Gp0115667", - "url": "https://data.microbiomedata.org/data/nmdc:mga0n0je44/assembly/nmdc_mga0n0je44_covstats.txt", - "md5_checksum": "2e4532cb03bb1e9201976b9d65893788", - "id": "nmdc:2e4532cb03bb1e9201976b9d65893788", - "file_size_bytes": 9189143 + "name": "Gp0115667_Metagenome Contig Coverage Stats", + "description": "Metagenome Contig Coverage Stats for Gp0115667", + "url": "https://data.microbiomedata.org/data/nmdc:mga0n0je44/assembly/nmdc_mga0n0je44_covstats.txt", + "md5_checksum": "2e4532cb03bb1e9201976b9d65893788", + "id": "nmdc:2e4532cb03bb1e9201976b9d65893788", + "file_size_bytes": 9189143 }, { "name": "Gp0115667_Assembled AGP file", @@ -3653,39 +3130,7 @@ "collecting_biosamples_from_site_set": [], "date_created": null, "etl_software_version": null, - "pooling_set": [], - "read_based_analysis_activity_set": [ - { - "_id": { - "$oid": "61e71a10833bcf838a701aaa" - }, - "has_input": [ - "nmdc:7d4057e3a44a05171c13fb0ed3e2294a" - ], - "part_of": [ - "nmdc:mga0n0je44" - ], - "git_url": "https://github.com/microbiomedata/metaG/releases/tag/0.1", - "has_output": [ - "nmdc:56edf81e5f5102edf7e416bc9430fbb6", - "nmdc:c3d0f03afb44520ef5f2ea14e6daf705", - "nmdc:2afff209a40ca4895307f3a47080c534", - "nmdc:d76c80bf15c4fd84f28c7150f24a8143", - "nmdc:b9d6d8a8297f9a604ac85a334a3412de", - "nmdc:fe4bd9f63c32f50676792e3c4adced08", - "nmdc:eb189cbf0543203d2521397b73d4d34b", - "nmdc:ce3f002a824efde4a7134e6cd2e6306b", - "nmdc:ac90bf3384ce44d097f7897ac5ff8134" - ], - "was_informed_by": "gold:Gp0115667", - "id": "nmdc:8093869c91384d3299431e56019f7de0", - "execution_resource": "NERSC-Cori", - "name": "ReadBased Analysis Activity for nmdc:mga0n0je44", - "started_at_time": "2021-10-11T02:28:16Z", - "type": "nmdc:ReadbasedAnalysis", - "ended_at_time": "2021-10-11T03:58:24+00:00" - } - ] + "pooling_set": [] }, { "functional_annotation_agg": [], @@ -3792,85 +3237,6 @@ "id": "nmdc:29b75e78b0b7fd8115614d8e9d341d46", "file_size_bytes": 3995680 }, - { - "name": "Gp0115664_Gottcha2 TSV report", - "description": "Gottcha2 TSV report for Gp0115664", - "url": "https://data.microbiomedata.org/data/nmdc:mga0dm3v04/ReadbasedAnalysis/nmdc_mga0dm3v04_gottcha2_report.tsv", - "md5_checksum": "9d61d9f0c31a98f88ad8cde86254148d", - "id": "nmdc:9d61d9f0c31a98f88ad8cde86254148d", - "file_size_bytes": 9591 - }, - { - "name": "Gp0115664_Gottcha2 full TSV report", - "description": "Gottcha2 full TSV report for Gp0115664", - "url": "https://data.microbiomedata.org/data/nmdc:mga0dm3v04/ReadbasedAnalysis/nmdc_mga0dm3v04_gottcha2_report_full.tsv", - "md5_checksum": "7f93f97242aed036019f13492f5af35c", - "id": "nmdc:7f93f97242aed036019f13492f5af35c", - "file_size_bytes": 885985 - }, - { - "name": "Gp0115664_Gottcha2 Krona HTML report", - "description": "Gottcha2 Krona HTML report for Gp0115664", - "data_object_type": "GOTTCHA2 Krona Plot", - "url": "https://data.microbiomedata.org/data/nmdc:mga0dm3v04/ReadbasedAnalysis/nmdc_mga0dm3v04_gottcha2_krona.html", - "md5_checksum": "b4d0179bcc68b5186a3544d9ee0c6941", - "id": "nmdc:b4d0179bcc68b5186a3544d9ee0c6941", - "file_size_bytes": 251303 - }, - { - "name": "Gp0115664_Centrifuge classification TSV report", - "description": "Centrifuge classification TSV report for Gp0115664", - "data_object_type": "Centrifuge Taxonomic Classification", - "url": "https://data.microbiomedata.org/data/nmdc:mga0dm3v04/ReadbasedAnalysis/nmdc_mga0dm3v04_centrifuge_classification.tsv", - "md5_checksum": "a4243f71a0288f489c566ae85d85891d", - "id": "nmdc:a4243f71a0288f489c566ae85d85891d", - "file_size_bytes": 1268144933 - }, - { - "name": "Gp0115664_Centrifuge TSV report", - "description": "Centrifuge TSV report for Gp0115664", - "data_object_type": "Centrifuge Classification Report", - "url": "https://data.microbiomedata.org/data/nmdc:mga0dm3v04/ReadbasedAnalysis/nmdc_mga0dm3v04_centrifuge_report.tsv", - "md5_checksum": "f8b6ef830b94c6470056a3cd0a0eafc1", - "id": "nmdc:f8b6ef830b94c6470056a3cd0a0eafc1", - "file_size_bytes": 254575 - }, - { - "name": "Gp0115664_Centrifuge Krona HTML report", - "description": "Centrifuge Krona HTML report for Gp0115664", - "data_object_type": "Centrifuge Krona Plot", - "url": "https://data.microbiomedata.org/data/nmdc:mga0dm3v04/ReadbasedAnalysis/nmdc_mga0dm3v04_centrifuge_krona.html", - "md5_checksum": "a80779b32415ef001d0403f0b618b612", - "id": "nmdc:a80779b32415ef001d0403f0b618b612", - "file_size_bytes": 2327293 - }, - { - "name": "Gp0115664_Kraken classification TSV report", - "description": "Kraken classification TSV report for Gp0115664", - "data_object_type": "Kraken2 Taxonomic Classification", - "url": "https://data.microbiomedata.org/data/nmdc:mga0dm3v04/ReadbasedAnalysis/nmdc_mga0dm3v04_kraken2_classification.tsv", - "md5_checksum": "01581429336a43d7dc2f85b8d49d6c6e", - "id": "nmdc:01581429336a43d7dc2f85b8d49d6c6e", - "file_size_bytes": 1037932028 - }, - { - "name": "Gp0115664_Kraken2 TSV report", - "description": "Kraken2 TSV report for Gp0115664", - "data_object_type": "Kraken2 Classification Report", - "url": "https://data.microbiomedata.org/data/nmdc:mga0dm3v04/ReadbasedAnalysis/nmdc_mga0dm3v04_kraken2_report.tsv", - "md5_checksum": "ce47d6686edb7b3472102d5883229c45", - "id": "nmdc:ce47d6686edb7b3472102d5883229c45", - "file_size_bytes": 641242 - }, - { - "name": "Gp0115664_Kraken2 Krona HTML report", - "description": "Kraken2 Krona HTML report for Gp0115664", - "data_object_type": "Kraken2 Krona Plot", - "url": "https://data.microbiomedata.org/data/nmdc:mga0dm3v04/ReadbasedAnalysis/nmdc_mga0dm3v04_kraken2_krona.html", - "md5_checksum": "29b75e78b0b7fd8115614d8e9d341d46", - "id": "nmdc:29b75e78b0b7fd8115614d8e9d341d46", - "file_size_bytes": 3995680 - }, { "name": "Gp0115664_Assembled contigs fasta", "description": "Assembled contigs fasta for Gp0115664", @@ -4316,39 +3682,7 @@ "collecting_biosamples_from_site_set": [], "date_created": null, "etl_software_version": null, - "pooling_set": [], - "read_based_analysis_activity_set": [ - { - "_id": { - "$oid": "61e719dd833bcf838a70154e" - }, - "has_input": [ - "nmdc:232e31505b6a0251df2303c0563d64c1" - ], - "part_of": [ - "nmdc:mga0dm3v04" - ], - "git_url": "https://github.com/microbiomedata/metaG/releases/tag/0.1", - "has_output": [ - "nmdc:9d61d9f0c31a98f88ad8cde86254148d", - "nmdc:7f93f97242aed036019f13492f5af35c", - "nmdc:b4d0179bcc68b5186a3544d9ee0c6941", - "nmdc:a4243f71a0288f489c566ae85d85891d", - "nmdc:f8b6ef830b94c6470056a3cd0a0eafc1", - "nmdc:a80779b32415ef001d0403f0b618b612", - "nmdc:01581429336a43d7dc2f85b8d49d6c6e", - "nmdc:ce47d6686edb7b3472102d5883229c45", - "nmdc:29b75e78b0b7fd8115614d8e9d341d46" - ], - "was_informed_by": "gold:Gp0115664", - "id": "nmdc:10c6e87f449fdc27c6b8bfbc9e25d6a3", - "execution_resource": "NERSC-Cori", - "name": "ReadBased Analysis Activity for nmdc:mga0dm3v04", - "started_at_time": "2021-10-11T02:28:16Z", - "type": "nmdc:ReadbasedAnalysis", - "ended_at_time": "2021-10-11T03:33:34+00:00" - } - ] + "pooling_set": [] }, { "functional_annotation_agg": [], @@ -4456,109 +3790,30 @@ "file_size_bytes": 4410156 }, { - "name": "Gp0115678_Gottcha2 TSV report", - "description": "Gottcha2 TSV report for Gp0115678", - "url": "https://data.microbiomedata.org/data/nmdc:mga026tn70/ReadbasedAnalysis/nmdc_mga026tn70_gottcha2_report.tsv", - "md5_checksum": "05bab80e2ff02d160b8e808f056ee2b5", - "id": "nmdc:05bab80e2ff02d160b8e808f056ee2b5", - "file_size_bytes": 19085 + "name": "Gp0115678_Assembled contigs fasta", + "description": "Assembled contigs fasta for Gp0115678", + "data_object_type": "Assembly Contigs", + "url": "https://data.microbiomedata.org/data/nmdc:mga026tn70/assembly/nmdc_mga026tn70_contigs.fna", + "md5_checksum": "d305e212cce8f84f14561d3957c968b1", + "id": "nmdc:d305e212cce8f84f14561d3957c968b1", + "file_size_bytes": 205441595 }, { - "name": "Gp0115678_Gottcha2 full TSV report", - "description": "Gottcha2 full TSV report for Gp0115678", - "url": "https://data.microbiomedata.org/data/nmdc:mga026tn70/ReadbasedAnalysis/nmdc_mga026tn70_gottcha2_report_full.tsv", - "md5_checksum": "12b2d6afc355bce76249d750a9fab534", - "id": "nmdc:12b2d6afc355bce76249d750a9fab534", - "file_size_bytes": 1243929 + "name": "Gp0115678_Assembled scaffold fasta", + "description": "Assembled scaffold fasta for Gp0115678", + "data_object_type": "Assembly Scaffolds", + "url": "https://data.microbiomedata.org/data/nmdc:mga026tn70/assembly/nmdc_mga026tn70_scaffolds.fna", + "md5_checksum": "fb12da7c2d6d1f9d9c7a1511702758bb", + "id": "nmdc:fb12da7c2d6d1f9d9c7a1511702758bb", + "file_size_bytes": 204286677 }, { - "name": "Gp0115678_Gottcha2 Krona HTML report", - "description": "Gottcha2 Krona HTML report for Gp0115678", - "data_object_type": "GOTTCHA2 Krona Plot", - "url": "https://data.microbiomedata.org/data/nmdc:mga026tn70/ReadbasedAnalysis/nmdc_mga026tn70_gottcha2_krona.html", - "md5_checksum": "18214017d56658a48723c9c998dcba7e", - "id": "nmdc:18214017d56658a48723c9c998dcba7e", - "file_size_bytes": 281148 - }, - { - "name": "Gp0115678_Centrifuge classification TSV report", - "description": "Centrifuge classification TSV report for Gp0115678", - "data_object_type": "Centrifuge Taxonomic Classification", - "url": "https://data.microbiomedata.org/data/nmdc:mga026tn70/ReadbasedAnalysis/nmdc_mga026tn70_centrifuge_classification.tsv", - "md5_checksum": "99ef009c73c128e561a4b9dcb70d7ff2", - "id": "nmdc:99ef009c73c128e561a4b9dcb70d7ff2", - "file_size_bytes": 3491726958 - }, - { - "name": "Gp0115678_Centrifuge TSV report", - "description": "Centrifuge TSV report for Gp0115678", - "data_object_type": "Centrifuge Classification Report", - "url": "https://data.microbiomedata.org/data/nmdc:mga026tn70/ReadbasedAnalysis/nmdc_mga026tn70_centrifuge_report.tsv", - "md5_checksum": "78dab6988b57c654462ef3dbeb64d8d6", - "id": "nmdc:78dab6988b57c654462ef3dbeb64d8d6", - "file_size_bytes": 264123 - }, - { - "name": "Gp0115678_Centrifuge Krona HTML report", - "description": "Centrifuge Krona HTML report for Gp0115678", - "data_object_type": "Centrifuge Krona Plot", - "url": "https://data.microbiomedata.org/data/nmdc:mga026tn70/ReadbasedAnalysis/nmdc_mga026tn70_centrifuge_krona.html", - "md5_checksum": "f9c01985f057825149d35de0650095a8", - "id": "nmdc:f9c01985f057825149d35de0650095a8", - "file_size_bytes": 2352347 - }, - { - "name": "Gp0115678_Kraken classification TSV report", - "description": "Kraken classification TSV report for Gp0115678", - "data_object_type": "Kraken2 Taxonomic Classification", - "url": "https://data.microbiomedata.org/data/nmdc:mga026tn70/ReadbasedAnalysis/nmdc_mga026tn70_kraken2_classification.tsv", - "md5_checksum": "bcea8bbe63625ad0f3142abe69a4a11d", - "id": "nmdc:bcea8bbe63625ad0f3142abe69a4a11d", - "file_size_bytes": 2880889483 - }, - { - "name": "Gp0115678_Kraken2 TSV report", - "description": "Kraken2 TSV report for Gp0115678", - "data_object_type": "Kraken2 Classification Report", - "url": "https://data.microbiomedata.org/data/nmdc:mga026tn70/ReadbasedAnalysis/nmdc_mga026tn70_kraken2_report.tsv", - "md5_checksum": "054c3097c9682bc9a6e07f88fdecc0ee", - "id": "nmdc:054c3097c9682bc9a6e07f88fdecc0ee", - "file_size_bytes": 735519 - }, - { - "name": "Gp0115678_Kraken2 Krona HTML report", - "description": "Kraken2 Krona HTML report for Gp0115678", - "data_object_type": "Kraken2 Krona Plot", - "url": "https://data.microbiomedata.org/data/nmdc:mga026tn70/ReadbasedAnalysis/nmdc_mga026tn70_kraken2_krona.html", - "md5_checksum": "38d41d4299141abe28bf0405af80cdfc", - "id": "nmdc:38d41d4299141abe28bf0405af80cdfc", - "file_size_bytes": 4410156 - }, - { - "name": "Gp0115678_Assembled contigs fasta", - "description": "Assembled contigs fasta for Gp0115678", - "data_object_type": "Assembly Contigs", - "url": "https://data.microbiomedata.org/data/nmdc:mga026tn70/assembly/nmdc_mga026tn70_contigs.fna", - "md5_checksum": "d305e212cce8f84f14561d3957c968b1", - "id": "nmdc:d305e212cce8f84f14561d3957c968b1", - "file_size_bytes": 205441595 - }, - { - "name": "Gp0115678_Assembled scaffold fasta", - "description": "Assembled scaffold fasta for Gp0115678", - "data_object_type": "Assembly Scaffolds", - "url": "https://data.microbiomedata.org/data/nmdc:mga026tn70/assembly/nmdc_mga026tn70_scaffolds.fna", - "md5_checksum": "fb12da7c2d6d1f9d9c7a1511702758bb", - "id": "nmdc:fb12da7c2d6d1f9d9c7a1511702758bb", - "file_size_bytes": 204286677 - }, - { - "name": "Gp0115678_Metagenome Contig Coverage Stats", - "description": "Metagenome Contig Coverage Stats for Gp0115678", - "url": "https://data.microbiomedata.org/data/nmdc:mga026tn70/assembly/nmdc_mga026tn70_covstats.txt", - "md5_checksum": "444562a4e7108077b7e541a5d9064086", - "id": "nmdc:444562a4e7108077b7e541a5d9064086", - "file_size_bytes": 30470067 + "name": "Gp0115678_Metagenome Contig Coverage Stats", + "description": "Metagenome Contig Coverage Stats for Gp0115678", + "url": "https://data.microbiomedata.org/data/nmdc:mga026tn70/assembly/nmdc_mga026tn70_covstats.txt", + "md5_checksum": "444562a4e7108077b7e541a5d9064086", + "id": "nmdc:444562a4e7108077b7e541a5d9064086", + "file_size_bytes": 30470067 }, { "name": "Gp0115678_Assembled AGP file", @@ -5077,39 +4332,7 @@ "collecting_biosamples_from_site_set": [], "date_created": null, "etl_software_version": null, - "pooling_set": [], - "read_based_analysis_activity_set": [ - { - "_id": { - "$oid": "61e719f6833bcf838a701854" - }, - "has_input": [ - "nmdc:e0ce93b88419f87568ff206e0efe3a24" - ], - "part_of": [ - "nmdc:mga026tn70" - ], - "git_url": "https://github.com/microbiomedata/metaG/releases/tag/0.1", - "has_output": [ - "nmdc:05bab80e2ff02d160b8e808f056ee2b5", - "nmdc:12b2d6afc355bce76249d750a9fab534", - "nmdc:18214017d56658a48723c9c998dcba7e", - "nmdc:99ef009c73c128e561a4b9dcb70d7ff2", - "nmdc:78dab6988b57c654462ef3dbeb64d8d6", - "nmdc:f9c01985f057825149d35de0650095a8", - "nmdc:bcea8bbe63625ad0f3142abe69a4a11d", - "nmdc:054c3097c9682bc9a6e07f88fdecc0ee", - "nmdc:38d41d4299141abe28bf0405af80cdfc" - ], - "was_informed_by": "gold:Gp0115678", - "id": "nmdc:88c0fc9b5b0bb0cd814448bbfba0da6a", - "execution_resource": "NERSC-Cori", - "name": "ReadBased Analysis Activity for nmdc:mga026tn70", - "started_at_time": "2021-10-11T02:23:30Z", - "type": "nmdc:ReadbasedAnalysis", - "ended_at_time": "2021-10-11T06:18:17+00:00" - } - ] + "pooling_set": [] }, { "functional_annotation_agg": [], @@ -5216,85 +4439,6 @@ "id": "nmdc:f2eed9669268f69dbc31f0c4f839fccf", "file_size_bytes": 3949449 }, - { - "name": "Gp0127623_Gottcha2 TSV report", - "description": "Gottcha2 TSV report for Gp0127623", - "url": "https://data.microbiomedata.org/data/nmdc:mga03eyz63/ReadbasedAnalysis/nmdc_mga03eyz63_gottcha2_report.tsv", - "md5_checksum": "ac39e916e17e08a845bb40d97519d8be", - "id": "nmdc:ac39e916e17e08a845bb40d97519d8be", - "file_size_bytes": 1553 - }, - { - "name": "Gp0127623_Gottcha2 full TSV report", - "description": "Gottcha2 full TSV report for Gp0127623", - "url": "https://data.microbiomedata.org/data/nmdc:mga03eyz63/ReadbasedAnalysis/nmdc_mga03eyz63_gottcha2_report_full.tsv", - "md5_checksum": "c6fd5c573ef8605d9b43ff9c698af423", - "id": "nmdc:c6fd5c573ef8605d9b43ff9c698af423", - "file_size_bytes": 836575 - }, - { - "name": "Gp0127623_Gottcha2 Krona HTML report", - "description": "Gottcha2 Krona HTML report for Gp0127623", - "data_object_type": "GOTTCHA2 Krona Plot", - "url": "https://data.microbiomedata.org/data/nmdc:mga03eyz63/ReadbasedAnalysis/nmdc_mga03eyz63_gottcha2_krona.html", - "md5_checksum": "eda0c04d692ecf137585676c15924626", - "id": "nmdc:eda0c04d692ecf137585676c15924626", - "file_size_bytes": 231097 - }, - { - "name": "Gp0127623_Centrifuge classification TSV report", - "description": "Centrifuge classification TSV report for Gp0127623", - "data_object_type": "Centrifuge Taxonomic Classification", - "url": "https://data.microbiomedata.org/data/nmdc:mga03eyz63/ReadbasedAnalysis/nmdc_mga03eyz63_centrifuge_classification.tsv", - "md5_checksum": "d9ea063be9ab8ea102c1e2ec2fa9f177", - "id": "nmdc:d9ea063be9ab8ea102c1e2ec2fa9f177", - "file_size_bytes": 1669254765 - }, - { - "name": "Gp0127623_Centrifuge TSV report", - "description": "Centrifuge TSV report for Gp0127623", - "data_object_type": "Centrifuge Classification Report", - "url": "https://data.microbiomedata.org/data/nmdc:mga03eyz63/ReadbasedAnalysis/nmdc_mga03eyz63_centrifuge_report.tsv", - "md5_checksum": "e1f164c534830cd628d67c564ace863b", - "id": "nmdc:e1f164c534830cd628d67c564ace863b", - "file_size_bytes": 255784 - }, - { - "name": "Gp0127623_Centrifuge Krona HTML report", - "description": "Centrifuge Krona HTML report for Gp0127623", - "data_object_type": "Centrifuge Krona Plot", - "url": "https://data.microbiomedata.org/data/nmdc:mga03eyz63/ReadbasedAnalysis/nmdc_mga03eyz63_centrifuge_krona.html", - "md5_checksum": "a1062576d998b7b82e39b8d8520fa37e", - "id": "nmdc:a1062576d998b7b82e39b8d8520fa37e", - "file_size_bytes": 2333760 - }, - { - "name": "Gp0127623_Kraken classification TSV report", - "description": "Kraken classification TSV report for Gp0127623", - "data_object_type": "Kraken2 Taxonomic Classification", - "url": "https://data.microbiomedata.org/data/nmdc:mga03eyz63/ReadbasedAnalysis/nmdc_mga03eyz63_kraken2_classification.tsv", - "md5_checksum": "040e6ca695283a12711c16344acd1e76", - "id": "nmdc:040e6ca695283a12711c16344acd1e76", - "file_size_bytes": 1335651191 - }, - { - "name": "Gp0127623_Kraken2 TSV report", - "description": "Kraken2 TSV report for Gp0127623", - "data_object_type": "Kraken2 Classification Report", - "url": "https://data.microbiomedata.org/data/nmdc:mga03eyz63/ReadbasedAnalysis/nmdc_mga03eyz63_kraken2_report.tsv", - "md5_checksum": "ed4ced0ccbe3f6b34c35bd842e882cad", - "id": "nmdc:ed4ced0ccbe3f6b34c35bd842e882cad", - "file_size_bytes": 647609 - }, - { - "name": "Gp0127623_Kraken2 Krona HTML report", - "description": "Kraken2 Krona HTML report for Gp0127623", - "data_object_type": "Kraken2 Krona Plot", - "url": "https://data.microbiomedata.org/data/nmdc:mga03eyz63/ReadbasedAnalysis/nmdc_mga03eyz63_kraken2_krona.html", - "md5_checksum": "f2eed9669268f69dbc31f0c4f839fccf", - "id": "nmdc:f2eed9669268f69dbc31f0c4f839fccf", - "file_size_bytes": 3949449 - }, { "name": "Gp0127623_Assembled contigs fasta", "description": "Assembled contigs fasta for Gp0127623", @@ -5740,39 +4884,7 @@ "collecting_biosamples_from_site_set": [], "date_created": null, "etl_software_version": null, - "pooling_set": [], - "read_based_analysis_activity_set": [ - { - "_id": { - "$oid": "61e718fc833bcf838a6ff4c9" - }, - "has_input": [ - "nmdc:6a8409b21c45ba9feba873ec269c8ff7" - ], - "part_of": [ - "nmdc:mga03eyz63" - ], - "git_url": "https://github.com/microbiomedata/metaG/releases/tag/0.1", - "has_output": [ - "nmdc:ac39e916e17e08a845bb40d97519d8be", - "nmdc:c6fd5c573ef8605d9b43ff9c698af423", - "nmdc:eda0c04d692ecf137585676c15924626", - "nmdc:d9ea063be9ab8ea102c1e2ec2fa9f177", - "nmdc:e1f164c534830cd628d67c564ace863b", - "nmdc:a1062576d998b7b82e39b8d8520fa37e", - "nmdc:040e6ca695283a12711c16344acd1e76", - "nmdc:ed4ced0ccbe3f6b34c35bd842e882cad", - "nmdc:f2eed9669268f69dbc31f0c4f839fccf" - ], - "was_informed_by": "gold:Gp0127623", - "id": "nmdc:e05db57d44a39f083df9a1803551b79b", - "execution_resource": "NERSC-Cori", - "name": "ReadBased Analysis Activity for nmdc:mga03eyz63", - "started_at_time": "2021-10-11T02:23:30Z", - "type": "nmdc:ReadbasedAnalysis", - "ended_at_time": "2021-10-11T02:42:25+00:00" - } - ] + "pooling_set": [] }, { "functional_annotation_agg": [], @@ -5880,109 +4992,30 @@ "file_size_bytes": 3921941 }, { - "name": "Gp0127625_Gottcha2 TSV report", - "description": "Gottcha2 TSV report for Gp0127625", - "url": "https://data.microbiomedata.org/data/nmdc:mga0bfpq58/ReadbasedAnalysis/nmdc_mga0bfpq58_gottcha2_report.tsv", - "md5_checksum": "550b631e1de3e01392154e54493d47ef", - "id": "nmdc:550b631e1de3e01392154e54493d47ef", - "file_size_bytes": 754 + "name": "Gp0127625_Assembled contigs fasta", + "description": "Assembled contigs fasta for Gp0127625", + "data_object_type": "Assembly Contigs", + "url": "https://data.microbiomedata.org/data/nmdc:mga0bfpq58/assembly/nmdc_mga0bfpq58_contigs.fna", + "md5_checksum": "5b6e7cbece9167002b12c3415afa9bb8", + "id": "nmdc:5b6e7cbece9167002b12c3415afa9bb8", + "file_size_bytes": 171703232 }, { - "name": "Gp0127625_Gottcha2 full TSV report", - "description": "Gottcha2 full TSV report for Gp0127625", - "url": "https://data.microbiomedata.org/data/nmdc:mga0bfpq58/ReadbasedAnalysis/nmdc_mga0bfpq58_gottcha2_report_full.tsv", - "md5_checksum": "3f14ff51550d9d78dae3a7ec08514907", - "id": "nmdc:3f14ff51550d9d78dae3a7ec08514907", - "file_size_bytes": 641658 + "name": "Gp0127625_Assembled scaffold fasta", + "description": "Assembled scaffold fasta for Gp0127625", + "data_object_type": "Assembly Scaffolds", + "url": "https://data.microbiomedata.org/data/nmdc:mga0bfpq58/assembly/nmdc_mga0bfpq58_scaffolds.fna", + "md5_checksum": "f2ef7ceaaedf4d6bf377ce82687b06e3", + "id": "nmdc:f2ef7ceaaedf4d6bf377ce82687b06e3", + "file_size_bytes": 170799869 }, { - "name": "Gp0127625_Gottcha2 Krona HTML report", - "description": "Gottcha2 Krona HTML report for Gp0127625", - "data_object_type": "GOTTCHA2 Krona Plot", - "url": "https://data.microbiomedata.org/data/nmdc:mga0bfpq58/ReadbasedAnalysis/nmdc_mga0bfpq58_gottcha2_krona.html", - "md5_checksum": "1a7b8f8968f451b5d5ccb97a10a56d89", - "id": "nmdc:1a7b8f8968f451b5d5ccb97a10a56d89", - "file_size_bytes": 228494 - }, - { - "name": "Gp0127625_Centrifuge classification TSV report", - "description": "Centrifuge classification TSV report for Gp0127625", - "data_object_type": "Centrifuge Taxonomic Classification", - "url": "https://data.microbiomedata.org/data/nmdc:mga0bfpq58/ReadbasedAnalysis/nmdc_mga0bfpq58_centrifuge_classification.tsv", - "md5_checksum": "b09795fc768257d881e8ce547be0ce68", - "id": "nmdc:b09795fc768257d881e8ce547be0ce68", - "file_size_bytes": 1849982678 - }, - { - "name": "Gp0127625_Centrifuge TSV report", - "description": "Centrifuge TSV report for Gp0127625", - "data_object_type": "Centrifuge Classification Report", - "url": "https://data.microbiomedata.org/data/nmdc:mga0bfpq58/ReadbasedAnalysis/nmdc_mga0bfpq58_centrifuge_report.tsv", - "md5_checksum": "064ba18473eb80ff0b484311565d2894", - "id": "nmdc:064ba18473eb80ff0b484311565d2894", - "file_size_bytes": 253852 - }, - { - "name": "Gp0127625_Centrifuge Krona HTML report", - "description": "Centrifuge Krona HTML report for Gp0127625", - "data_object_type": "Centrifuge Krona Plot", - "url": "https://data.microbiomedata.org/data/nmdc:mga0bfpq58/ReadbasedAnalysis/nmdc_mga0bfpq58_centrifuge_krona.html", - "md5_checksum": "a7b6cc370371668be2e3bb90f5ca0fd1", - "id": "nmdc:a7b6cc370371668be2e3bb90f5ca0fd1", - "file_size_bytes": 2331556 - }, - { - "name": "Gp0127625_Kraken classification TSV report", - "description": "Kraken classification TSV report for Gp0127625", - "data_object_type": "Kraken2 Taxonomic Classification", - "url": "https://data.microbiomedata.org/data/nmdc:mga0bfpq58/ReadbasedAnalysis/nmdc_mga0bfpq58_kraken2_classification.tsv", - "md5_checksum": "60c663a34b79db2ee71edf1afe4c14e3", - "id": "nmdc:60c663a34b79db2ee71edf1afe4c14e3", - "file_size_bytes": 1471976767 - }, - { - "name": "Gp0127625_Kraken2 TSV report", - "description": "Kraken2 TSV report for Gp0127625", - "data_object_type": "Kraken2 Classification Report", - "url": "https://data.microbiomedata.org/data/nmdc:mga0bfpq58/ReadbasedAnalysis/nmdc_mga0bfpq58_kraken2_report.tsv", - "md5_checksum": "bc8acb862c8942616ef07302667c334f", - "id": "nmdc:bc8acb862c8942616ef07302667c334f", - "file_size_bytes": 627498 - }, - { - "name": "Gp0127625_Kraken2 Krona HTML report", - "description": "Kraken2 Krona HTML report for Gp0127625", - "data_object_type": "Kraken2 Krona Plot", - "url": "https://data.microbiomedata.org/data/nmdc:mga0bfpq58/ReadbasedAnalysis/nmdc_mga0bfpq58_kraken2_krona.html", - "md5_checksum": "b797ed6cb135c993b582cac368b2a93c", - "id": "nmdc:b797ed6cb135c993b582cac368b2a93c", - "file_size_bytes": 3921941 - }, - { - "name": "Gp0127625_Assembled contigs fasta", - "description": "Assembled contigs fasta for Gp0127625", - "data_object_type": "Assembly Contigs", - "url": "https://data.microbiomedata.org/data/nmdc:mga0bfpq58/assembly/nmdc_mga0bfpq58_contigs.fna", - "md5_checksum": "5b6e7cbece9167002b12c3415afa9bb8", - "id": "nmdc:5b6e7cbece9167002b12c3415afa9bb8", - "file_size_bytes": 171703232 - }, - { - "name": "Gp0127625_Assembled scaffold fasta", - "description": "Assembled scaffold fasta for Gp0127625", - "data_object_type": "Assembly Scaffolds", - "url": "https://data.microbiomedata.org/data/nmdc:mga0bfpq58/assembly/nmdc_mga0bfpq58_scaffolds.fna", - "md5_checksum": "f2ef7ceaaedf4d6bf377ce82687b06e3", - "id": "nmdc:f2ef7ceaaedf4d6bf377ce82687b06e3", - "file_size_bytes": 170799869 - }, - { - "name": "Gp0127625_Metagenome Contig Coverage Stats", - "description": "Metagenome Contig Coverage Stats for Gp0127625", - "url": "https://data.microbiomedata.org/data/nmdc:mga0bfpq58/assembly/nmdc_mga0bfpq58_covstats.txt", - "md5_checksum": "d231edb2040700184064615a28e65ee5", - "id": "nmdc:d231edb2040700184064615a28e65ee5", - "file_size_bytes": 23875845 + "name": "Gp0127625_Metagenome Contig Coverage Stats", + "description": "Metagenome Contig Coverage Stats for Gp0127625", + "url": "https://data.microbiomedata.org/data/nmdc:mga0bfpq58/assembly/nmdc_mga0bfpq58_covstats.txt", + "md5_checksum": "d231edb2040700184064615a28e65ee5", + "id": "nmdc:d231edb2040700184064615a28e65ee5", + "file_size_bytes": 23875845 }, { "name": "Gp0127625_Assembled AGP file", @@ -6479,39 +5512,7 @@ "collecting_biosamples_from_site_set": [], "date_created": null, "etl_software_version": null, - "pooling_set": [], - "read_based_analysis_activity_set": [ - { - "_id": { - "$oid": "61e718e2833bcf838a6ff0ce" - }, - "has_input": [ - "nmdc:2d13b3a30339b9c5b4fba099f9d4b10f" - ], - "part_of": [ - "nmdc:mga0bfpq58" - ], - "git_url": "https://github.com/microbiomedata/metaG/releases/tag/0.1", - "has_output": [ - "nmdc:550b631e1de3e01392154e54493d47ef", - "nmdc:3f14ff51550d9d78dae3a7ec08514907", - "nmdc:1a7b8f8968f451b5d5ccb97a10a56d89", - "nmdc:b09795fc768257d881e8ce547be0ce68", - "nmdc:064ba18473eb80ff0b484311565d2894", - "nmdc:a7b6cc370371668be2e3bb90f5ca0fd1", - "nmdc:60c663a34b79db2ee71edf1afe4c14e3", - "nmdc:bc8acb862c8942616ef07302667c334f", - "nmdc:b797ed6cb135c993b582cac368b2a93c" - ], - "was_informed_by": "gold:Gp0127625", - "id": "nmdc:aa214e53dc8472f9ff99b02245d8f943", - "execution_resource": "NERSC-Cori", - "name": "ReadBased Analysis Activity for nmdc:mga0bfpq58", - "started_at_time": "2021-10-11T02:23:30Z", - "type": "nmdc:ReadbasedAnalysis", - "ended_at_time": "2021-10-11T03:29:50+00:00" - } - ] + "pooling_set": [] }, { "functional_annotation_agg": [], @@ -6618,85 +5619,6 @@ "id": "nmdc:bb3e6793c4f036b9756f075d41846964", "file_size_bytes": 3987411 }, - { - "name": "Gp0127626_Gottcha2 TSV report", - "description": "Gottcha2 TSV report for Gp0127626", - "url": "https://data.microbiomedata.org/data/nmdc:mga04xnj45/ReadbasedAnalysis/nmdc_mga04xnj45_gottcha2_report.tsv", - "md5_checksum": "a91f8dccb2baa53550216f5bdfbf1473", - "id": "nmdc:a91f8dccb2baa53550216f5bdfbf1473", - "file_size_bytes": 2399 - }, - { - "name": "Gp0127626_Gottcha2 full TSV report", - "description": "Gottcha2 full TSV report for Gp0127626", - "url": "https://data.microbiomedata.org/data/nmdc:mga04xnj45/ReadbasedAnalysis/nmdc_mga04xnj45_gottcha2_report_full.tsv", - "md5_checksum": "a81ddf4e3bc044e8601554117cd887aa", - "id": "nmdc:a81ddf4e3bc044e8601554117cd887aa", - "file_size_bytes": 743066 - }, - { - "name": "Gp0127626_Gottcha2 Krona HTML report", - "description": "Gottcha2 Krona HTML report for Gp0127626", - "data_object_type": "GOTTCHA2 Krona Plot", - "url": "https://data.microbiomedata.org/data/nmdc:mga04xnj45/ReadbasedAnalysis/nmdc_mga04xnj45_gottcha2_krona.html", - "md5_checksum": "a012dc3a7b44774019c313fd8ee88efc", - "id": "nmdc:a012dc3a7b44774019c313fd8ee88efc", - "file_size_bytes": 233970 - }, - { - "name": "Gp0127626_Centrifuge classification TSV report", - "description": "Centrifuge classification TSV report for Gp0127626", - "data_object_type": "Centrifuge Taxonomic Classification", - "url": "https://data.microbiomedata.org/data/nmdc:mga04xnj45/ReadbasedAnalysis/nmdc_mga04xnj45_centrifuge_classification.tsv", - "md5_checksum": "dd4023a1488bdfc73b12c422b62b274a", - "id": "nmdc:dd4023a1488bdfc73b12c422b62b274a", - "file_size_bytes": 1673697764 - }, - { - "name": "Gp0127626_Centrifuge TSV report", - "description": "Centrifuge TSV report for Gp0127626", - "data_object_type": "Centrifuge Classification Report", - "url": "https://data.microbiomedata.org/data/nmdc:mga04xnj45/ReadbasedAnalysis/nmdc_mga04xnj45_centrifuge_report.tsv", - "md5_checksum": "2f9b1c55d52cc61affbe99f5163b48c8", - "id": "nmdc:2f9b1c55d52cc61affbe99f5163b48c8", - "file_size_bytes": 253730 - }, - { - "name": "Gp0127626_Centrifuge Krona HTML report", - "description": "Centrifuge Krona HTML report for Gp0127626", - "data_object_type": "Centrifuge Krona Plot", - "url": "https://data.microbiomedata.org/data/nmdc:mga04xnj45/ReadbasedAnalysis/nmdc_mga04xnj45_centrifuge_krona.html", - "md5_checksum": "ccf7f447a25ebf354ce44b3f1f90f223", - "id": "nmdc:ccf7f447a25ebf354ce44b3f1f90f223", - "file_size_bytes": 2327521 - }, - { - "name": "Gp0127626_Kraken classification TSV report", - "description": "Kraken classification TSV report for Gp0127626", - "data_object_type": "Kraken2 Taxonomic Classification", - "url": "https://data.microbiomedata.org/data/nmdc:mga04xnj45/ReadbasedAnalysis/nmdc_mga04xnj45_kraken2_classification.tsv", - "md5_checksum": "2c8efdb77cbcd1276c4fb386fd37bd6d", - "id": "nmdc:2c8efdb77cbcd1276c4fb386fd37bd6d", - "file_size_bytes": 1343921825 - }, - { - "name": "Gp0127626_Kraken2 TSV report", - "description": "Kraken2 TSV report for Gp0127626", - "data_object_type": "Kraken2 Classification Report", - "url": "https://data.microbiomedata.org/data/nmdc:mga04xnj45/ReadbasedAnalysis/nmdc_mga04xnj45_kraken2_report.tsv", - "md5_checksum": "806b27f1fa5a423100b113bb56edc708", - "id": "nmdc:806b27f1fa5a423100b113bb56edc708", - "file_size_bytes": 638478 - }, - { - "name": "Gp0127626_Kraken2 Krona HTML report", - "description": "Kraken2 Krona HTML report for Gp0127626", - "data_object_type": "Kraken2 Krona Plot", - "url": "https://data.microbiomedata.org/data/nmdc:mga04xnj45/ReadbasedAnalysis/nmdc_mga04xnj45_kraken2_krona.html", - "md5_checksum": "bb3e6793c4f036b9756f075d41846964", - "id": "nmdc:bb3e6793c4f036b9756f075d41846964", - "file_size_bytes": 3987411 - }, { "name": "Gp0127626_Assembled contigs fasta", "description": "Assembled contigs fasta for Gp0127626", @@ -7105,39 +6027,7 @@ "collecting_biosamples_from_site_set": [], "date_created": null, "etl_software_version": null, - "pooling_set": [], - "read_based_analysis_activity_set": [ - { - "_id": { - "$oid": "61e7195e833bcf838a700602" - }, - "has_input": [ - "nmdc:07499ad2f2b80f42bd7109732b1eef90" - ], - "part_of": [ - "nmdc:mga04xnj45" - ], - "git_url": "https://github.com/microbiomedata/metaG/releases/tag/0.1", - "has_output": [ - "nmdc:a91f8dccb2baa53550216f5bdfbf1473", - "nmdc:a81ddf4e3bc044e8601554117cd887aa", - "nmdc:a012dc3a7b44774019c313fd8ee88efc", - "nmdc:dd4023a1488bdfc73b12c422b62b274a", - "nmdc:2f9b1c55d52cc61affbe99f5163b48c8", - "nmdc:ccf7f447a25ebf354ce44b3f1f90f223", - "nmdc:2c8efdb77cbcd1276c4fb386fd37bd6d", - "nmdc:806b27f1fa5a423100b113bb56edc708", - "nmdc:bb3e6793c4f036b9756f075d41846964" - ], - "was_informed_by": "gold:Gp0127626", - "id": "nmdc:0ef6e04f4cefbcd9e2d3d2a7717baa3e", - "execution_resource": "NERSC-Cori", - "name": "ReadBased Analysis Activity for nmdc:mga04xnj45", - "started_at_time": "2021-12-01T21:31:29Z", - "type": "nmdc:ReadbasedAnalysis", - "ended_at_time": "2021-12-02T20:54:56+00:00" - } - ] + "pooling_set": [] }, { "functional_annotation_agg": [], @@ -7245,109 +6135,30 @@ "file_size_bytes": 3921891 }, { - "name": "Gp0127624_Gottcha2 TSV report", - "description": "Gottcha2 TSV report for Gp0127624", - "url": "https://data.microbiomedata.org/data/nmdc:mga0e8jh10/ReadbasedAnalysis/nmdc_mga0e8jh10_gottcha2_report.tsv", - "md5_checksum": "fef871a81032dd1f3e57dc1c7d5aa3db", - "id": "nmdc:fef871a81032dd1f3e57dc1c7d5aa3db", - "file_size_bytes": 1500 + "name": "Gp0127624_Assembled contigs fasta", + "description": "Assembled contigs fasta for Gp0127624", + "data_object_type": "Assembly Contigs", + "url": "https://data.microbiomedata.org/data/nmdc:mga0e8jh10/assembly/nmdc_mga0e8jh10_contigs.fna", + "md5_checksum": "464a9db7a94e7e0646b1ff8b501d82f3", + "id": "nmdc:464a9db7a94e7e0646b1ff8b501d82f3", + "file_size_bytes": 95468011 }, { - "name": "Gp0127624_Gottcha2 full TSV report", - "description": "Gottcha2 full TSV report for Gp0127624", - "url": "https://data.microbiomedata.org/data/nmdc:mga0e8jh10/ReadbasedAnalysis/nmdc_mga0e8jh10_gottcha2_report_full.tsv", - "md5_checksum": "6c7fec765f2a225f168ebb1f69961013", - "id": "nmdc:6c7fec765f2a225f168ebb1f69961013", - "file_size_bytes": 692993 + "name": "Gp0127624_Assembled scaffold fasta", + "description": "Assembled scaffold fasta for Gp0127624", + "data_object_type": "Assembly Scaffolds", + "url": "https://data.microbiomedata.org/data/nmdc:mga0e8jh10/assembly/nmdc_mga0e8jh10_scaffolds.fna", + "md5_checksum": "0a50f88775f36e9238152f3319252853", + "id": "nmdc:0a50f88775f36e9238152f3319252853", + "file_size_bytes": 94893921 }, { - "name": "Gp0127624_Gottcha2 Krona HTML report", - "description": "Gottcha2 Krona HTML report for Gp0127624", - "data_object_type": "GOTTCHA2 Krona Plot", - "url": "https://data.microbiomedata.org/data/nmdc:mga0e8jh10/ReadbasedAnalysis/nmdc_mga0e8jh10_gottcha2_krona.html", - "md5_checksum": "6e660d5a062f9c3ad7b49d8d438453d7", - "id": "nmdc:6e660d5a062f9c3ad7b49d8d438453d7", - "file_size_bytes": 230779 - }, - { - "name": "Gp0127624_Centrifuge classification TSV report", - "description": "Centrifuge classification TSV report for Gp0127624", - "data_object_type": "Centrifuge Taxonomic Classification", - "url": "https://data.microbiomedata.org/data/nmdc:mga0e8jh10/ReadbasedAnalysis/nmdc_mga0e8jh10_centrifuge_classification.tsv", - "md5_checksum": "77db34862804280185d3b1ce961e5338", - "id": "nmdc:77db34862804280185d3b1ce961e5338", - "file_size_bytes": 1645928829 - }, - { - "name": "Gp0127624_Centrifuge TSV report", - "description": "Centrifuge TSV report for Gp0127624", - "data_object_type": "Centrifuge Classification Report", - "url": "https://data.microbiomedata.org/data/nmdc:mga0e8jh10/ReadbasedAnalysis/nmdc_mga0e8jh10_centrifuge_report.tsv", - "md5_checksum": "84e3efb84d961d189ece310911ccf475", - "id": "nmdc:84e3efb84d961d189ece310911ccf475", - "file_size_bytes": 254646 - }, - { - "name": "Gp0127624_Centrifuge Krona HTML report", - "description": "Centrifuge Krona HTML report for Gp0127624", - "data_object_type": "Centrifuge Krona Plot", - "url": "https://data.microbiomedata.org/data/nmdc:mga0e8jh10/ReadbasedAnalysis/nmdc_mga0e8jh10_centrifuge_krona.html", - "md5_checksum": "b8fd31679921f8b68c80917e14caa260", - "id": "nmdc:b8fd31679921f8b68c80917e14caa260", - "file_size_bytes": 2332082 - }, - { - "name": "Gp0127624_Kraken classification TSV report", - "description": "Kraken classification TSV report for Gp0127624", - "data_object_type": "Kraken2 Taxonomic Classification", - "url": "https://data.microbiomedata.org/data/nmdc:mga0e8jh10/ReadbasedAnalysis/nmdc_mga0e8jh10_kraken2_classification.tsv", - "md5_checksum": "715c66c69b621478da7d48481f3cbd1d", - "id": "nmdc:715c66c69b621478da7d48481f3cbd1d", - "file_size_bytes": 1316771556 - }, - { - "name": "Gp0127624_Kraken2 TSV report", - "description": "Kraken2 TSV report for Gp0127624", - "data_object_type": "Kraken2 Classification Report", - "url": "https://data.microbiomedata.org/data/nmdc:mga0e8jh10/ReadbasedAnalysis/nmdc_mga0e8jh10_kraken2_report.tsv", - "md5_checksum": "0781e8042688219035efafe7d75858d0", - "id": "nmdc:0781e8042688219035efafe7d75858d0", - "file_size_bytes": 626940 - }, - { - "name": "Gp0127624_Kraken2 Krona HTML report", - "description": "Kraken2 Krona HTML report for Gp0127624", - "data_object_type": "Kraken2 Krona Plot", - "url": "https://data.microbiomedata.org/data/nmdc:mga0e8jh10/ReadbasedAnalysis/nmdc_mga0e8jh10_kraken2_krona.html", - "md5_checksum": "85547ab860ef9d6877ba7abc8881740a", - "id": "nmdc:85547ab860ef9d6877ba7abc8881740a", - "file_size_bytes": 3921891 - }, - { - "name": "Gp0127624_Assembled contigs fasta", - "description": "Assembled contigs fasta for Gp0127624", - "data_object_type": "Assembly Contigs", - "url": "https://data.microbiomedata.org/data/nmdc:mga0e8jh10/assembly/nmdc_mga0e8jh10_contigs.fna", - "md5_checksum": "464a9db7a94e7e0646b1ff8b501d82f3", - "id": "nmdc:464a9db7a94e7e0646b1ff8b501d82f3", - "file_size_bytes": 95468011 - }, - { - "name": "Gp0127624_Assembled scaffold fasta", - "description": "Assembled scaffold fasta for Gp0127624", - "data_object_type": "Assembly Scaffolds", - "url": "https://data.microbiomedata.org/data/nmdc:mga0e8jh10/assembly/nmdc_mga0e8jh10_scaffolds.fna", - "md5_checksum": "0a50f88775f36e9238152f3319252853", - "id": "nmdc:0a50f88775f36e9238152f3319252853", - "file_size_bytes": 94893921 - }, - { - "name": "Gp0127624_Metagenome Contig Coverage Stats", - "description": "Metagenome Contig Coverage Stats for Gp0127624", - "url": "https://data.microbiomedata.org/data/nmdc:mga0e8jh10/assembly/nmdc_mga0e8jh10_covstats.txt", - "md5_checksum": "f0dc2f598fa06efbe99843bddaf54f60", - "id": "nmdc:f0dc2f598fa06efbe99843bddaf54f60", - "file_size_bytes": 15112642 + "name": "Gp0127624_Metagenome Contig Coverage Stats", + "description": "Metagenome Contig Coverage Stats for Gp0127624", + "url": "https://data.microbiomedata.org/data/nmdc:mga0e8jh10/assembly/nmdc_mga0e8jh10_covstats.txt", + "md5_checksum": "f0dc2f598fa06efbe99843bddaf54f60", + "id": "nmdc:f0dc2f598fa06efbe99843bddaf54f60", + "file_size_bytes": 15112642 }, { "name": "Gp0127624_Assembled AGP file", @@ -7806,39 +6617,7 @@ "collecting_biosamples_from_site_set": [], "date_created": null, "etl_software_version": null, - "pooling_set": [], - "read_based_analysis_activity_set": [ - { - "_id": { - "$oid": "61e7191b833bcf838a6ff905" - }, - "has_input": [ - "nmdc:8585f6896702bddf64b02191be5921f4" - ], - "part_of": [ - "nmdc:mga0e8jh10" - ], - "git_url": "https://github.com/microbiomedata/metaG/releases/tag/0.1", - "has_output": [ - "nmdc:fef871a81032dd1f3e57dc1c7d5aa3db", - "nmdc:6c7fec765f2a225f168ebb1f69961013", - "nmdc:6e660d5a062f9c3ad7b49d8d438453d7", - "nmdc:77db34862804280185d3b1ce961e5338", - "nmdc:84e3efb84d961d189ece310911ccf475", - "nmdc:b8fd31679921f8b68c80917e14caa260", - "nmdc:715c66c69b621478da7d48481f3cbd1d", - "nmdc:0781e8042688219035efafe7d75858d0", - "nmdc:85547ab860ef9d6877ba7abc8881740a" - ], - "was_informed_by": "gold:Gp0127624", - "id": "nmdc:20efb77f7b08a72b0c887c059686b7cf", - "execution_resource": "NERSC-Cori", - "name": "ReadBased Analysis Activity for nmdc:mga0e8jh10", - "started_at_time": "2021-10-11T02:23:30Z", - "type": "nmdc:ReadbasedAnalysis", - "ended_at_time": "2021-10-11T03:30:59+00:00" - } - ] + "pooling_set": [] }, { "functional_annotation_agg": [], @@ -7945,85 +6724,6 @@ "id": "nmdc:81108175d5ef2ca158f516bfc75d3cd9", "file_size_bytes": 3933712 }, - { - "name": "Gp0127629_Gottcha2 TSV report", - "description": "Gottcha2 TSV report for Gp0127629", - "url": "https://data.microbiomedata.org/data/nmdc:mga071r920/ReadbasedAnalysis/nmdc_mga071r920_gottcha2_report.tsv", - "md5_checksum": "f4f810491708ff25956cddd005cc9944", - "id": "nmdc:f4f810491708ff25956cddd005cc9944", - "file_size_bytes": 1206 - }, - { - "name": "Gp0127629_Gottcha2 full TSV report", - "description": "Gottcha2 full TSV report for Gp0127629", - "url": "https://data.microbiomedata.org/data/nmdc:mga071r920/ReadbasedAnalysis/nmdc_mga071r920_gottcha2_report_full.tsv", - "md5_checksum": "67e3c200d3765733af33d1db1f4bf968", - "id": "nmdc:67e3c200d3765733af33d1db1f4bf968", - "file_size_bytes": 662074 - }, - { - "name": "Gp0127629_Gottcha2 Krona HTML report", - "description": "Gottcha2 Krona HTML report for Gp0127629", - "data_object_type": "GOTTCHA2 Krona Plot", - "url": "https://data.microbiomedata.org/data/nmdc:mga071r920/ReadbasedAnalysis/nmdc_mga071r920_gottcha2_krona.html", - "md5_checksum": "26cd6390e8362da2ee1d7691360d2dfb", - "id": "nmdc:26cd6390e8362da2ee1d7691360d2dfb", - "file_size_bytes": 229307 - }, - { - "name": "Gp0127629_Centrifuge classification TSV report", - "description": "Centrifuge classification TSV report for Gp0127629", - "data_object_type": "Centrifuge Taxonomic Classification", - "url": "https://data.microbiomedata.org/data/nmdc:mga071r920/ReadbasedAnalysis/nmdc_mga071r920_centrifuge_classification.tsv", - "md5_checksum": "80fe705d97ef4a0701b1320e9ba19a82", - "id": "nmdc:80fe705d97ef4a0701b1320e9ba19a82", - "file_size_bytes": 1667543500 - }, - { - "name": "Gp0127629_Centrifuge TSV report", - "description": "Centrifuge TSV report for Gp0127629", - "data_object_type": "Centrifuge Classification Report", - "url": "https://data.microbiomedata.org/data/nmdc:mga071r920/ReadbasedAnalysis/nmdc_mga071r920_centrifuge_report.tsv", - "md5_checksum": "6a216ec913587e26ddc036b703126d76", - "id": "nmdc:6a216ec913587e26ddc036b703126d76", - "file_size_bytes": 253079 - }, - { - "name": "Gp0127629_Centrifuge Krona HTML report", - "description": "Centrifuge Krona HTML report for Gp0127629", - "data_object_type": "Centrifuge Krona Plot", - "url": "https://data.microbiomedata.org/data/nmdc:mga071r920/ReadbasedAnalysis/nmdc_mga071r920_centrifuge_krona.html", - "md5_checksum": "ebed7286f886596764a66a0d1dac3e43", - "id": "nmdc:ebed7286f886596764a66a0d1dac3e43", - "file_size_bytes": 2326900 - }, - { - "name": "Gp0127629_Kraken classification TSV report", - "description": "Kraken classification TSV report for Gp0127629", - "data_object_type": "Kraken2 Taxonomic Classification", - "url": "https://data.microbiomedata.org/data/nmdc:mga071r920/ReadbasedAnalysis/nmdc_mga071r920_kraken2_classification.tsv", - "md5_checksum": "80dd3584d257e8f84b59118ffd0d5e21", - "id": "nmdc:80dd3584d257e8f84b59118ffd0d5e21", - "file_size_bytes": 1328025421 - }, - { - "name": "Gp0127629_Kraken2 TSV report", - "description": "Kraken2 TSV report for Gp0127629", - "data_object_type": "Kraken2 Classification Report", - "url": "https://data.microbiomedata.org/data/nmdc:mga071r920/ReadbasedAnalysis/nmdc_mga071r920_kraken2_report.tsv", - "md5_checksum": "61b5fe5664ca99f6354c7a5a0222678c", - "id": "nmdc:61b5fe5664ca99f6354c7a5a0222678c", - "file_size_bytes": 628969 - }, - { - "name": "Gp0127629_Kraken2 Krona HTML report", - "description": "Kraken2 Krona HTML report for Gp0127629", - "data_object_type": "Kraken2 Krona Plot", - "url": "https://data.microbiomedata.org/data/nmdc:mga071r920/ReadbasedAnalysis/nmdc_mga071r920_kraken2_krona.html", - "md5_checksum": "81108175d5ef2ca158f516bfc75d3cd9", - "id": "nmdc:81108175d5ef2ca158f516bfc75d3cd9", - "file_size_bytes": 3933712 - }, { "name": "Gp0127629_Assembled contigs fasta", "description": "Assembled contigs fasta for Gp0127629", @@ -8488,39 +7188,7 @@ "collecting_biosamples_from_site_set": [], "date_created": null, "etl_software_version": null, - "pooling_set": [], - "read_based_analysis_activity_set": [ - { - "_id": { - "$oid": "61e7195d833bcf838a70058b" - }, - "has_input": [ - "nmdc:0db98173ae3395106e24d250b2655f06" - ], - "part_of": [ - "nmdc:mga071r920" - ], - "git_url": "https://github.com/microbiomedata/metaG/releases/tag/0.1", - "has_output": [ - "nmdc:f4f810491708ff25956cddd005cc9944", - "nmdc:67e3c200d3765733af33d1db1f4bf968", - "nmdc:26cd6390e8362da2ee1d7691360d2dfb", - "nmdc:80fe705d97ef4a0701b1320e9ba19a82", - "nmdc:6a216ec913587e26ddc036b703126d76", - "nmdc:ebed7286f886596764a66a0d1dac3e43", - "nmdc:80dd3584d257e8f84b59118ffd0d5e21", - "nmdc:61b5fe5664ca99f6354c7a5a0222678c", - "nmdc:81108175d5ef2ca158f516bfc75d3cd9" - ], - "was_informed_by": "gold:Gp0127629", - "id": "nmdc:b82754c2c692809f9e59ff9824278c32", - "execution_resource": "NERSC-Cori", - "name": "ReadBased Analysis Activity for nmdc:mga071r920", - "started_at_time": "2021-10-11T02:23:35Z", - "type": "nmdc:ReadbasedAnalysis", - "ended_at_time": "2021-10-11T03:33:33+00:00" - } - ] + "pooling_set": [] }, { "functional_annotation_agg": [], @@ -8628,109 +7296,30 @@ "file_size_bytes": 4035375 }, { - "name": "Gp0127628_Gottcha2 TSV report", - "description": "Gottcha2 TSV report for Gp0127628", - "url": "https://data.microbiomedata.org/data/nmdc:mga0x5c381/ReadbasedAnalysis/nmdc_mga0x5c381_gottcha2_report.tsv", - "md5_checksum": "a6ed9af48a9ad473ab66721829a5c226", - "id": "nmdc:a6ed9af48a9ad473ab66721829a5c226", - "file_size_bytes": 3472 + "name": "Gp0127628_Assembled contigs fasta", + "description": "Assembled contigs fasta for Gp0127628", + "data_object_type": "Assembly Contigs", + "url": "https://data.microbiomedata.org/data/nmdc:mga0x5c381/assembly/nmdc_mga0x5c381_contigs.fna", + "md5_checksum": "9e550afb3bcd8d66807f861ecfed815b", + "id": "nmdc:9e550afb3bcd8d66807f861ecfed815b", + "file_size_bytes": 74277737 }, { - "name": "Gp0127628_Gottcha2 full TSV report", - "description": "Gottcha2 full TSV report for Gp0127628", - "url": "https://data.microbiomedata.org/data/nmdc:mga0x5c381/ReadbasedAnalysis/nmdc_mga0x5c381_gottcha2_report_full.tsv", - "md5_checksum": "335dbf6f1055de0950988a002f432c0b", - "id": "nmdc:335dbf6f1055de0950988a002f432c0b", - "file_size_bytes": 863867 + "name": "Gp0127628_Assembled scaffold fasta", + "description": "Assembled scaffold fasta for Gp0127628", + "data_object_type": "Assembly Scaffolds", + "url": "https://data.microbiomedata.org/data/nmdc:mga0x5c381/assembly/nmdc_mga0x5c381_scaffolds.fna", + "md5_checksum": "5e79fce62ffa8c4479be5159143797e0", + "id": "nmdc:5e79fce62ffa8c4479be5159143797e0", + "file_size_bytes": 73802989 }, { - "name": "Gp0127628_Gottcha2 Krona HTML report", - "description": "Gottcha2 Krona HTML report for Gp0127628", - "data_object_type": "GOTTCHA2 Krona Plot", - "url": "https://data.microbiomedata.org/data/nmdc:mga0x5c381/ReadbasedAnalysis/nmdc_mga0x5c381_gottcha2_krona.html", - "md5_checksum": "35da19bc0e50db1f9a02fe1550d1df0e", - "id": "nmdc:35da19bc0e50db1f9a02fe1550d1df0e", - "file_size_bytes": 234974 - }, - { - "name": "Gp0127628_Centrifuge classification TSV report", - "description": "Centrifuge classification TSV report for Gp0127628", - "data_object_type": "Centrifuge Taxonomic Classification", - "url": "https://data.microbiomedata.org/data/nmdc:mga0x5c381/ReadbasedAnalysis/nmdc_mga0x5c381_centrifuge_classification.tsv", - "md5_checksum": "224085164a389c6f207967ed03b3e6af", - "id": "nmdc:224085164a389c6f207967ed03b3e6af", - "file_size_bytes": 2220789142 - }, - { - "name": "Gp0127628_Centrifuge TSV report", - "description": "Centrifuge TSV report for Gp0127628", - "data_object_type": "Centrifuge Classification Report", - "url": "https://data.microbiomedata.org/data/nmdc:mga0x5c381/ReadbasedAnalysis/nmdc_mga0x5c381_centrifuge_report.tsv", - "md5_checksum": "39ba17263c144761a8bdcc1645c034f5", - "id": "nmdc:39ba17263c144761a8bdcc1645c034f5", - "file_size_bytes": 257030 - }, - { - "name": "Gp0127628_Centrifuge Krona HTML report", - "description": "Centrifuge Krona HTML report for Gp0127628", - "data_object_type": "Centrifuge Krona Plot", - "url": "https://data.microbiomedata.org/data/nmdc:mga0x5c381/ReadbasedAnalysis/nmdc_mga0x5c381_centrifuge_krona.html", - "md5_checksum": "84debc9bd1c09328d60f073d7fc2db4f", - "id": "nmdc:84debc9bd1c09328d60f073d7fc2db4f", - "file_size_bytes": 2337568 - }, - { - "name": "Gp0127628_Kraken classification TSV report", - "description": "Kraken classification TSV report for Gp0127628", - "data_object_type": "Kraken2 Taxonomic Classification", - "url": "https://data.microbiomedata.org/data/nmdc:mga0x5c381/ReadbasedAnalysis/nmdc_mga0x5c381_kraken2_classification.tsv", - "md5_checksum": "8f75800abbcf5a94043ad677d7cb975c", - "id": "nmdc:8f75800abbcf5a94043ad677d7cb975c", - "file_size_bytes": 1776487262 - }, - { - "name": "Gp0127628_Kraken2 TSV report", - "description": "Kraken2 TSV report for Gp0127628", - "data_object_type": "Kraken2 Classification Report", - "url": "https://data.microbiomedata.org/data/nmdc:mga0x5c381/ReadbasedAnalysis/nmdc_mga0x5c381_kraken2_report.tsv", - "md5_checksum": "aae9e961d8ed716457616c8a8841037b", - "id": "nmdc:aae9e961d8ed716457616c8a8841037b", - "file_size_bytes": 664011 - }, - { - "name": "Gp0127628_Kraken2 Krona HTML report", - "description": "Kraken2 Krona HTML report for Gp0127628", - "data_object_type": "Kraken2 Krona Plot", - "url": "https://data.microbiomedata.org/data/nmdc:mga0x5c381/ReadbasedAnalysis/nmdc_mga0x5c381_kraken2_krona.html", - "md5_checksum": "ba83d6ab837403f4bcbc9400a0460457", - "id": "nmdc:ba83d6ab837403f4bcbc9400a0460457", - "file_size_bytes": 4035375 - }, - { - "name": "Gp0127628_Assembled contigs fasta", - "description": "Assembled contigs fasta for Gp0127628", - "data_object_type": "Assembly Contigs", - "url": "https://data.microbiomedata.org/data/nmdc:mga0x5c381/assembly/nmdc_mga0x5c381_contigs.fna", - "md5_checksum": "9e550afb3bcd8d66807f861ecfed815b", - "id": "nmdc:9e550afb3bcd8d66807f861ecfed815b", - "file_size_bytes": 74277737 - }, - { - "name": "Gp0127628_Assembled scaffold fasta", - "description": "Assembled scaffold fasta for Gp0127628", - "data_object_type": "Assembly Scaffolds", - "url": "https://data.microbiomedata.org/data/nmdc:mga0x5c381/assembly/nmdc_mga0x5c381_scaffolds.fna", - "md5_checksum": "5e79fce62ffa8c4479be5159143797e0", - "id": "nmdc:5e79fce62ffa8c4479be5159143797e0", - "file_size_bytes": 73802989 - }, - { - "name": "Gp0127628_Metagenome Contig Coverage Stats", - "description": "Metagenome Contig Coverage Stats for Gp0127628", - "url": "https://data.microbiomedata.org/data/nmdc:mga0x5c381/assembly/nmdc_mga0x5c381_covstats.txt", - "md5_checksum": "682fd042d6adcd93f75c3eae2cf32241", - "id": "nmdc:682fd042d6adcd93f75c3eae2cf32241", - "file_size_bytes": 12462125 + "name": "Gp0127628_Metagenome Contig Coverage Stats", + "description": "Metagenome Contig Coverage Stats for Gp0127628", + "url": "https://data.microbiomedata.org/data/nmdc:mga0x5c381/assembly/nmdc_mga0x5c381_covstats.txt", + "md5_checksum": "682fd042d6adcd93f75c3eae2cf32241", + "id": "nmdc:682fd042d6adcd93f75c3eae2cf32241", + "file_size_bytes": 12462125 }, { "name": "Gp0127628_Assembled AGP file", @@ -9151,39 +7740,7 @@ "collecting_biosamples_from_site_set": [], "date_created": null, "etl_software_version": null, - "pooling_set": [], - "read_based_analysis_activity_set": [ - { - "_id": { - "$oid": "61e7193b833bcf838a6fff9c" - }, - "has_input": [ - "nmdc:f6f1760721d73fc57919b2115a1d47ec" - ], - "part_of": [ - "nmdc:mga0x5c381" - ], - "git_url": "https://github.com/microbiomedata/metaG/releases/tag/0.1", - "has_output": [ - "nmdc:a6ed9af48a9ad473ab66721829a5c226", - "nmdc:335dbf6f1055de0950988a002f432c0b", - "nmdc:35da19bc0e50db1f9a02fe1550d1df0e", - "nmdc:224085164a389c6f207967ed03b3e6af", - "nmdc:39ba17263c144761a8bdcc1645c034f5", - "nmdc:84debc9bd1c09328d60f073d7fc2db4f", - "nmdc:8f75800abbcf5a94043ad677d7cb975c", - "nmdc:aae9e961d8ed716457616c8a8841037b", - "nmdc:ba83d6ab837403f4bcbc9400a0460457" - ], - "was_informed_by": "gold:Gp0127628", - "id": "nmdc:a634b0691cf34899d9b09bc4573d4c36", - "execution_resource": "NERSC-Cori", - "name": "ReadBased Analysis Activity for nmdc:mga0x5c381", - "started_at_time": "2021-10-11T02:25:13Z", - "type": "nmdc:ReadbasedAnalysis", - "ended_at_time": "2021-10-11T04:45:59+00:00" - } - ] + "pooling_set": [] }, { "functional_annotation_agg": [], @@ -9290,85 +7847,6 @@ "id": "nmdc:50093825ec73dcabe66aa353de766beb", "file_size_bytes": 3993246 }, - { - "name": "Gp0127631_Gottcha2 TSV report", - "description": "Gottcha2 TSV report for Gp0127631", - "url": "https://data.microbiomedata.org/data/nmdc:mga0jx8k09/ReadbasedAnalysis/nmdc_mga0jx8k09_gottcha2_report.tsv", - "md5_checksum": "b78e8246144185beb95c0caf65ef1f1a", - "id": "nmdc:b78e8246144185beb95c0caf65ef1f1a", - "file_size_bytes": 1227 - }, - { - "name": "Gp0127631_Gottcha2 full TSV report", - "description": "Gottcha2 full TSV report for Gp0127631", - "url": "https://data.microbiomedata.org/data/nmdc:mga0jx8k09/ReadbasedAnalysis/nmdc_mga0jx8k09_gottcha2_report_full.tsv", - "md5_checksum": "8875c6ce19e13ed9a88447f2f78bb049", - "id": "nmdc:8875c6ce19e13ed9a88447f2f78bb049", - "file_size_bytes": 647196 - }, - { - "name": "Gp0127631_Gottcha2 Krona HTML report", - "description": "Gottcha2 Krona HTML report for Gp0127631", - "data_object_type": "GOTTCHA2 Krona Plot", - "url": "https://data.microbiomedata.org/data/nmdc:mga0jx8k09/ReadbasedAnalysis/nmdc_mga0jx8k09_gottcha2_krona.html", - "md5_checksum": "3b0aee019c772a695bf4cc8f4a390f4e", - "id": "nmdc:3b0aee019c772a695bf4cc8f4a390f4e", - "file_size_bytes": 229312 - }, - { - "name": "Gp0127631_Centrifuge classification TSV report", - "description": "Centrifuge classification TSV report for Gp0127631", - "data_object_type": "Centrifuge Taxonomic Classification", - "url": "https://data.microbiomedata.org/data/nmdc:mga0jx8k09/ReadbasedAnalysis/nmdc_mga0jx8k09_centrifuge_classification.tsv", - "md5_checksum": "0d1729a83798b752f33eeb8d97afe972", - "id": "nmdc:0d1729a83798b752f33eeb8d97afe972", - "file_size_bytes": 1861431092 - }, - { - "name": "Gp0127631_Centrifuge TSV report", - "description": "Centrifuge TSV report for Gp0127631", - "data_object_type": "Centrifuge Classification Report", - "url": "https://data.microbiomedata.org/data/nmdc:mga0jx8k09/ReadbasedAnalysis/nmdc_mga0jx8k09_centrifuge_report.tsv", - "md5_checksum": "77561a0de3bb8aae04d110429fd9ad0c", - "id": "nmdc:77561a0de3bb8aae04d110429fd9ad0c", - "file_size_bytes": 254665 - }, - { - "name": "Gp0127631_Centrifuge Krona HTML report", - "description": "Centrifuge Krona HTML report for Gp0127631", - "data_object_type": "Centrifuge Krona Plot", - "url": "https://data.microbiomedata.org/data/nmdc:mga0jx8k09/ReadbasedAnalysis/nmdc_mga0jx8k09_centrifuge_krona.html", - "md5_checksum": "ea27c005b1788434c2198ad60939d4bc", - "id": "nmdc:ea27c005b1788434c2198ad60939d4bc", - "file_size_bytes": 2334578 - }, - { - "name": "Gp0127631_Kraken classification TSV report", - "description": "Kraken classification TSV report for Gp0127631", - "data_object_type": "Kraken2 Taxonomic Classification", - "url": "https://data.microbiomedata.org/data/nmdc:mga0jx8k09/ReadbasedAnalysis/nmdc_mga0jx8k09_kraken2_classification.tsv", - "md5_checksum": "6a46583da876b9d6287302308df0b9fd", - "id": "nmdc:6a46583da876b9d6287302308df0b9fd", - "file_size_bytes": 1483354621 - }, - { - "name": "Gp0127631_Kraken2 TSV report", - "description": "Kraken2 TSV report for Gp0127631", - "data_object_type": "Kraken2 Classification Report", - "url": "https://data.microbiomedata.org/data/nmdc:mga0jx8k09/ReadbasedAnalysis/nmdc_mga0jx8k09_kraken2_report.tsv", - "md5_checksum": "af619dc5a0423509a4beaca26aa61000", - "id": "nmdc:af619dc5a0423509a4beaca26aa61000", - "file_size_bytes": 640329 - }, - { - "name": "Gp0127631_Kraken2 Krona HTML report", - "description": "Kraken2 Krona HTML report for Gp0127631", - "data_object_type": "Kraken2 Krona Plot", - "url": "https://data.microbiomedata.org/data/nmdc:mga0jx8k09/ReadbasedAnalysis/nmdc_mga0jx8k09_kraken2_krona.html", - "md5_checksum": "50093825ec73dcabe66aa353de766beb", - "id": "nmdc:50093825ec73dcabe66aa353de766beb", - "file_size_bytes": 3993246 - }, { "name": "Gp0127631_Assembled contigs fasta", "description": "Assembled contigs fasta for Gp0127631", @@ -9852,39 +8330,7 @@ "collecting_biosamples_from_site_set": [], "date_created": null, "etl_software_version": null, - "pooling_set": [], - "read_based_analysis_activity_set": [ - { - "_id": { - "$oid": "61e71938833bcf838a6ffe7a" - }, - "has_input": [ - "nmdc:6969fd7f4b1a5a34fb30d31b92cd6bf8" - ], - "part_of": [ - "nmdc:mga0jx8k09" - ], - "git_url": "https://github.com/microbiomedata/metaG/releases/tag/0.1", - "has_output": [ - "nmdc:b78e8246144185beb95c0caf65ef1f1a", - "nmdc:8875c6ce19e13ed9a88447f2f78bb049", - "nmdc:3b0aee019c772a695bf4cc8f4a390f4e", - "nmdc:0d1729a83798b752f33eeb8d97afe972", - "nmdc:77561a0de3bb8aae04d110429fd9ad0c", - "nmdc:ea27c005b1788434c2198ad60939d4bc", - "nmdc:6a46583da876b9d6287302308df0b9fd", - "nmdc:af619dc5a0423509a4beaca26aa61000", - "nmdc:50093825ec73dcabe66aa353de766beb" - ], - "was_informed_by": "gold:Gp0127631", - "id": "nmdc:00a9e34a36f0ea767ff48aa2f411874f", - "execution_resource": "NERSC-Cori", - "name": "ReadBased Analysis Activity for nmdc:mga0jx8k09", - "started_at_time": "2021-10-11T02:26:22Z", - "type": "nmdc:ReadbasedAnalysis", - "ended_at_time": "2021-10-11T04:40:31+00:00" - } - ] + "pooling_set": [] }, { "functional_annotation_agg": [], @@ -9992,109 +8438,30 @@ "file_size_bytes": 3962195 }, { - "name": "Gp0127630_Gottcha2 TSV report", - "description": "Gottcha2 TSV report for Gp0127630", - "url": "https://data.microbiomedata.org/data/nmdc:mga09n3g47/ReadbasedAnalysis/nmdc_mga09n3g47_gottcha2_report.tsv", - "md5_checksum": "ad8aa7d317d86bcd1b33e6e68a917198", - "id": "nmdc:ad8aa7d317d86bcd1b33e6e68a917198", - "file_size_bytes": 3373 + "name": "Gp0127630_Assembled contigs fasta", + "description": "Assembled contigs fasta for Gp0127630", + "data_object_type": "Assembly Contigs", + "url": "https://data.microbiomedata.org/data/nmdc:mga09n3g47/assembly/nmdc_mga09n3g47_contigs.fna", + "md5_checksum": "7b35237c97a75f17ba74be0fe96416c9", + "id": "nmdc:7b35237c97a75f17ba74be0fe96416c9", + "file_size_bytes": 57511432 }, { - "name": "Gp0127630_Gottcha2 full TSV report", - "description": "Gottcha2 full TSV report for Gp0127630", - "url": "https://data.microbiomedata.org/data/nmdc:mga09n3g47/ReadbasedAnalysis/nmdc_mga09n3g47_gottcha2_report_full.tsv", - "md5_checksum": "e5f1da9ed5be2adcd65763d387387c9f", - "id": "nmdc:e5f1da9ed5be2adcd65763d387387c9f", - "file_size_bytes": 791488 + "name": "Gp0127630_Assembled scaffold fasta", + "description": "Assembled scaffold fasta for Gp0127630", + "data_object_type": "Assembly Scaffolds", + "url": "https://data.microbiomedata.org/data/nmdc:mga09n3g47/assembly/nmdc_mga09n3g47_scaffolds.fna", + "md5_checksum": "118dd6190bdaf127d3c105cc73012cc3", + "id": "nmdc:118dd6190bdaf127d3c105cc73012cc3", + "file_size_bytes": 57128690 }, { - "name": "Gp0127630_Gottcha2 Krona HTML report", - "description": "Gottcha2 Krona HTML report for Gp0127630", - "data_object_type": "GOTTCHA2 Krona Plot", - "url": "https://data.microbiomedata.org/data/nmdc:mga09n3g47/ReadbasedAnalysis/nmdc_mga09n3g47_gottcha2_krona.html", - "md5_checksum": "db82b41936f37bbbeaa027ffc25b58cd", - "id": "nmdc:db82b41936f37bbbeaa027ffc25b58cd", - "file_size_bytes": 235803 - }, - { - "name": "Gp0127630_Centrifuge classification TSV report", - "description": "Centrifuge classification TSV report for Gp0127630", - "data_object_type": "Centrifuge Taxonomic Classification", - "url": "https://data.microbiomedata.org/data/nmdc:mga09n3g47/ReadbasedAnalysis/nmdc_mga09n3g47_centrifuge_classification.tsv", - "md5_checksum": "2f21fd19f055d1931ab82016ed781a12", - "id": "nmdc:2f21fd19f055d1931ab82016ed781a12", - "file_size_bytes": 1974171566 - }, - { - "name": "Gp0127630_Centrifuge TSV report", - "description": "Centrifuge TSV report for Gp0127630", - "data_object_type": "Centrifuge Classification Report", - "url": "https://data.microbiomedata.org/data/nmdc:mga09n3g47/ReadbasedAnalysis/nmdc_mga09n3g47_centrifuge_report.tsv", - "md5_checksum": "890f494d1dd5e130d6c1688e78f27ff2", - "id": "nmdc:890f494d1dd5e130d6c1688e78f27ff2", - "file_size_bytes": 255012 - }, - { - "name": "Gp0127630_Centrifuge Krona HTML report", - "description": "Centrifuge Krona HTML report for Gp0127630", - "data_object_type": "Centrifuge Krona Plot", - "url": "https://data.microbiomedata.org/data/nmdc:mga09n3g47/ReadbasedAnalysis/nmdc_mga09n3g47_centrifuge_krona.html", - "md5_checksum": "813232a3034ddb9a05efc2f2e9b78cce", - "id": "nmdc:813232a3034ddb9a05efc2f2e9b78cce", - "file_size_bytes": 2330430 - }, - { - "name": "Gp0127630_Kraken classification TSV report", - "description": "Kraken classification TSV report for Gp0127630", - "data_object_type": "Kraken2 Taxonomic Classification", - "url": "https://data.microbiomedata.org/data/nmdc:mga09n3g47/ReadbasedAnalysis/nmdc_mga09n3g47_kraken2_classification.tsv", - "md5_checksum": "ef490241b537bb4c19bd5548cd7b7f6b", - "id": "nmdc:ef490241b537bb4c19bd5548cd7b7f6b", - "file_size_bytes": 1584744477 - }, - { - "name": "Gp0127630_Kraken2 TSV report", - "description": "Kraken2 TSV report for Gp0127630", - "data_object_type": "Kraken2 Classification Report", - "url": "https://data.microbiomedata.org/data/nmdc:mga09n3g47/ReadbasedAnalysis/nmdc_mga09n3g47_kraken2_report.tsv", - "md5_checksum": "6a7de24b01ad1c63ba6edb758e25af40", - "id": "nmdc:6a7de24b01ad1c63ba6edb758e25af40", - "file_size_bytes": 650172 - }, - { - "name": "Gp0127630_Kraken2 Krona HTML report", - "description": "Kraken2 Krona HTML report for Gp0127630", - "data_object_type": "Kraken2 Krona Plot", - "url": "https://data.microbiomedata.org/data/nmdc:mga09n3g47/ReadbasedAnalysis/nmdc_mga09n3g47_kraken2_krona.html", - "md5_checksum": "fc8a855916eb1ba0f7d278b7c1f1786f", - "id": "nmdc:fc8a855916eb1ba0f7d278b7c1f1786f", - "file_size_bytes": 3962195 - }, - { - "name": "Gp0127630_Assembled contigs fasta", - "description": "Assembled contigs fasta for Gp0127630", - "data_object_type": "Assembly Contigs", - "url": "https://data.microbiomedata.org/data/nmdc:mga09n3g47/assembly/nmdc_mga09n3g47_contigs.fna", - "md5_checksum": "7b35237c97a75f17ba74be0fe96416c9", - "id": "nmdc:7b35237c97a75f17ba74be0fe96416c9", - "file_size_bytes": 57511432 - }, - { - "name": "Gp0127630_Assembled scaffold fasta", - "description": "Assembled scaffold fasta for Gp0127630", - "data_object_type": "Assembly Scaffolds", - "url": "https://data.microbiomedata.org/data/nmdc:mga09n3g47/assembly/nmdc_mga09n3g47_scaffolds.fna", - "md5_checksum": "118dd6190bdaf127d3c105cc73012cc3", - "id": "nmdc:118dd6190bdaf127d3c105cc73012cc3", - "file_size_bytes": 57128690 - }, - { - "name": "Gp0127630_Metagenome Contig Coverage Stats", - "description": "Metagenome Contig Coverage Stats for Gp0127630", - "url": "https://data.microbiomedata.org/data/nmdc:mga09n3g47/assembly/nmdc_mga09n3g47_covstats.txt", - "md5_checksum": "9e129133978cb4c4cc4bae9fc28a8a49", - "id": "nmdc:9e129133978cb4c4cc4bae9fc28a8a49", - "file_size_bytes": 10020081 + "name": "Gp0127630_Metagenome Contig Coverage Stats", + "description": "Metagenome Contig Coverage Stats for Gp0127630", + "url": "https://data.microbiomedata.org/data/nmdc:mga09n3g47/assembly/nmdc_mga09n3g47_covstats.txt", + "md5_checksum": "9e129133978cb4c4cc4bae9fc28a8a49", + "id": "nmdc:9e129133978cb4c4cc4bae9fc28a8a49", + "file_size_bytes": 10020081 }, { "name": "Gp0127630_Assembled AGP file", @@ -10534,39 +8901,7 @@ "collecting_biosamples_from_site_set": [], "date_created": null, "etl_software_version": null, - "pooling_set": [], - "read_based_analysis_activity_set": [ - { - "_id": { - "$oid": "61e71939833bcf838a6fff09" - }, - "has_input": [ - "nmdc:eaffb16b5247d85c08f8af73bcb8b65e" - ], - "part_of": [ - "nmdc:mga09n3g47" - ], - "git_url": "https://github.com/microbiomedata/metaG/releases/tag/0.1", - "has_output": [ - "nmdc:ad8aa7d317d86bcd1b33e6e68a917198", - "nmdc:e5f1da9ed5be2adcd65763d387387c9f", - "nmdc:db82b41936f37bbbeaa027ffc25b58cd", - "nmdc:2f21fd19f055d1931ab82016ed781a12", - "nmdc:890f494d1dd5e130d6c1688e78f27ff2", - "nmdc:813232a3034ddb9a05efc2f2e9b78cce", - "nmdc:ef490241b537bb4c19bd5548cd7b7f6b", - "nmdc:6a7de24b01ad1c63ba6edb758e25af40", - "nmdc:fc8a855916eb1ba0f7d278b7c1f1786f" - ], - "was_informed_by": "gold:Gp0127630", - "id": "nmdc:9fb85875014e14636fbb93d97d62f4bd", - "execution_resource": "NERSC-Cori", - "name": "ReadBased Analysis Activity for nmdc:mga09n3g47", - "started_at_time": "2021-10-11T02:26:53Z", - "type": "nmdc:ReadbasedAnalysis", - "ended_at_time": "2021-10-11T04:54:22+00:00" - } - ] + "pooling_set": [] }, { "functional_annotation_agg": [], @@ -10673,85 +9008,6 @@ "id": "nmdc:89e810af4915f0e117eaa60550587453", "file_size_bytes": 3891844 }, - { - "name": "Gp0127633_Gottcha2 TSV report", - "description": "Gottcha2 TSV report for Gp0127633", - "url": "https://data.microbiomedata.org/data/nmdc:mga05zvf81/ReadbasedAnalysis/nmdc_mga05zvf81_gottcha2_report.tsv", - "md5_checksum": "8bd9eb762acabbac5d079c379c28e381", - "id": "nmdc:8bd9eb762acabbac5d079c379c28e381", - "file_size_bytes": 875 - }, - { - "name": "Gp0127633_Gottcha2 full TSV report", - "description": "Gottcha2 full TSV report for Gp0127633", - "url": "https://data.microbiomedata.org/data/nmdc:mga05zvf81/ReadbasedAnalysis/nmdc_mga05zvf81_gottcha2_report_full.tsv", - "md5_checksum": "77351dd18ca40e5552ac1380ba94acbf", - "id": "nmdc:77351dd18ca40e5552ac1380ba94acbf", - "file_size_bytes": 578856 - }, - { - "name": "Gp0127633_Gottcha2 Krona HTML report", - "description": "Gottcha2 Krona HTML report for Gp0127633", - "data_object_type": "GOTTCHA2 Krona Plot", - "url": "https://data.microbiomedata.org/data/nmdc:mga05zvf81/ReadbasedAnalysis/nmdc_mga05zvf81_gottcha2_krona.html", - "md5_checksum": "f445af1a7774572d156f55a898d26f09", - "id": "nmdc:f445af1a7774572d156f55a898d26f09", - "file_size_bytes": 228067 - }, - { - "name": "Gp0127633_Centrifuge classification TSV report", - "description": "Centrifuge classification TSV report for Gp0127633", - "data_object_type": "Centrifuge Taxonomic Classification", - "url": "https://data.microbiomedata.org/data/nmdc:mga05zvf81/ReadbasedAnalysis/nmdc_mga05zvf81_centrifuge_classification.tsv", - "md5_checksum": "e11fcbf66318878c05984fa3d893e3b7", - "id": "nmdc:e11fcbf66318878c05984fa3d893e3b7", - "file_size_bytes": 1646942155 - }, - { - "name": "Gp0127633_Centrifuge TSV report", - "description": "Centrifuge TSV report for Gp0127633", - "data_object_type": "Centrifuge Classification Report", - "url": "https://data.microbiomedata.org/data/nmdc:mga05zvf81/ReadbasedAnalysis/nmdc_mga05zvf81_centrifuge_report.tsv", - "md5_checksum": "28beb8baabdaf346f2066b40f375a152", - "id": "nmdc:28beb8baabdaf346f2066b40f375a152", - "file_size_bytes": 252735 - }, - { - "name": "Gp0127633_Centrifuge Krona HTML report", - "description": "Centrifuge Krona HTML report for Gp0127633", - "data_object_type": "Centrifuge Krona Plot", - "url": "https://data.microbiomedata.org/data/nmdc:mga05zvf81/ReadbasedAnalysis/nmdc_mga05zvf81_centrifuge_krona.html", - "md5_checksum": "1f74a43724c4afed5563499d05601e22", - "id": "nmdc:1f74a43724c4afed5563499d05601e22", - "file_size_bytes": 2329168 - }, - { - "name": "Gp0127633_Kraken classification TSV report", - "description": "Kraken classification TSV report for Gp0127633", - "data_object_type": "Kraken2 Taxonomic Classification", - "url": "https://data.microbiomedata.org/data/nmdc:mga05zvf81/ReadbasedAnalysis/nmdc_mga05zvf81_kraken2_classification.tsv", - "md5_checksum": "4825177c6d0a8b67db82e6070cfbc35f", - "id": "nmdc:4825177c6d0a8b67db82e6070cfbc35f", - "file_size_bytes": 1310443491 - }, - { - "name": "Gp0127633_Kraken2 TSV report", - "description": "Kraken2 TSV report for Gp0127633", - "data_object_type": "Kraken2 Classification Report", - "url": "https://data.microbiomedata.org/data/nmdc:mga05zvf81/ReadbasedAnalysis/nmdc_mga05zvf81_kraken2_report.tsv", - "md5_checksum": "275268a6b5aca33c427d11877bcfa674", - "id": "nmdc:275268a6b5aca33c427d11877bcfa674", - "file_size_bytes": 621441 - }, - { - "name": "Gp0127633_Kraken2 Krona HTML report", - "description": "Kraken2 Krona HTML report for Gp0127633", - "data_object_type": "Kraken2 Krona Plot", - "url": "https://data.microbiomedata.org/data/nmdc:mga05zvf81/ReadbasedAnalysis/nmdc_mga05zvf81_kraken2_krona.html", - "md5_checksum": "89e810af4915f0e117eaa60550587453", - "id": "nmdc:89e810af4915f0e117eaa60550587453", - "file_size_bytes": 3891844 - }, { "name": "Gp0127633_Assembled contigs fasta", "description": "Assembled contigs fasta for Gp0127633", @@ -11292,39 +9548,7 @@ "collecting_biosamples_from_site_set": [], "date_created": null, "etl_software_version": null, - "pooling_set": [], - "read_based_analysis_activity_set": [ - { - "_id": { - "$oid": "61e7193c833bcf838a70001a" - }, - "has_input": [ - "nmdc:7cbd497624d8b60ab2a5e7fdbe4730f2" - ], - "part_of": [ - "nmdc:mga05zvf81" - ], - "git_url": "https://github.com/microbiomedata/metaG/releases/tag/0.1", - "has_output": [ - "nmdc:8bd9eb762acabbac5d079c379c28e381", - "nmdc:77351dd18ca40e5552ac1380ba94acbf", - "nmdc:f445af1a7774572d156f55a898d26f09", - "nmdc:e11fcbf66318878c05984fa3d893e3b7", - "nmdc:28beb8baabdaf346f2066b40f375a152", - "nmdc:1f74a43724c4afed5563499d05601e22", - "nmdc:4825177c6d0a8b67db82e6070cfbc35f", - "nmdc:275268a6b5aca33c427d11877bcfa674", - "nmdc:89e810af4915f0e117eaa60550587453" - ], - "was_informed_by": "gold:Gp0127633", - "id": "nmdc:c88a6f4e80cd6159dbbdeaeffbc28f55", - "execution_resource": "NERSC-Cori", - "name": "ReadBased Analysis Activity for nmdc:mga05zvf81", - "started_at_time": "2021-10-11T02:24:58Z", - "type": "nmdc:ReadbasedAnalysis", - "ended_at_time": "2021-10-11T03:40:06+00:00" - } - ] + "pooling_set": [] }, { "functional_annotation_agg": [], @@ -11432,109 +9656,30 @@ "file_size_bytes": 3926756 }, { - "name": "Gp0127627_Gottcha2 TSV report", - "description": "Gottcha2 TSV report for Gp0127627", - "url": "https://data.microbiomedata.org/data/nmdc:mga0daby71/ReadbasedAnalysis/nmdc_mga0daby71_gottcha2_report.tsv", - "md5_checksum": "a5ac6665e5d66242b1c885a911236982", - "id": "nmdc:a5ac6665e5d66242b1c885a911236982", - "file_size_bytes": 5530 + "name": "Gp0127627_Assembled contigs fasta", + "description": "Assembled contigs fasta for Gp0127627", + "data_object_type": "Assembly Contigs", + "url": "https://data.microbiomedata.org/data/nmdc:mga0daby71/assembly/nmdc_mga0daby71_contigs.fna", + "md5_checksum": "a7db57faea894bec6603a69abfdfcf7d", + "id": "nmdc:a7db57faea894bec6603a69abfdfcf7d", + "file_size_bytes": 19853676 }, { - "name": "Gp0127627_Gottcha2 full TSV report", - "description": "Gottcha2 full TSV report for Gp0127627", - "url": "https://data.microbiomedata.org/data/nmdc:mga0daby71/ReadbasedAnalysis/nmdc_mga0daby71_gottcha2_report_full.tsv", - "md5_checksum": "d19478a191693d643157a89c69cc02d1", - "id": "nmdc:d19478a191693d643157a89c69cc02d1", - "file_size_bytes": 825047 + "name": "Gp0127627_Assembled scaffold fasta", + "description": "Assembled scaffold fasta for Gp0127627", + "data_object_type": "Assembly Scaffolds", + "url": "https://data.microbiomedata.org/data/nmdc:mga0daby71/assembly/nmdc_mga0daby71_scaffolds.fna", + "md5_checksum": "8e798fcdd761feff51cab6a9c97ed7ae", + "id": "nmdc:8e798fcdd761feff51cab6a9c97ed7ae", + "file_size_bytes": 19699986 }, { - "name": "Gp0127627_Gottcha2 Krona HTML report", - "description": "Gottcha2 Krona HTML report for Gp0127627", - "data_object_type": "GOTTCHA2 Krona Plot", - "url": "https://data.microbiomedata.org/data/nmdc:mga0daby71/ReadbasedAnalysis/nmdc_mga0daby71_gottcha2_krona.html", - "md5_checksum": "679a82699663e88a5e8828ee081fa967", - "id": "nmdc:679a82699663e88a5e8828ee081fa967", - "file_size_bytes": 241114 - }, - { - "name": "Gp0127627_Centrifuge classification TSV report", - "description": "Centrifuge classification TSV report for Gp0127627", - "data_object_type": "Centrifuge Taxonomic Classification", - "url": "https://data.microbiomedata.org/data/nmdc:mga0daby71/ReadbasedAnalysis/nmdc_mga0daby71_centrifuge_classification.tsv", - "md5_checksum": "95b3150e6fb62195c1e5ebf06f87c7d5", - "id": "nmdc:95b3150e6fb62195c1e5ebf06f87c7d5", - "file_size_bytes": 1463660267 - }, - { - "name": "Gp0127627_Centrifuge TSV report", - "description": "Centrifuge TSV report for Gp0127627", - "data_object_type": "Centrifuge Classification Report", - "url": "https://data.microbiomedata.org/data/nmdc:mga0daby71/ReadbasedAnalysis/nmdc_mga0daby71_centrifuge_report.tsv", - "md5_checksum": "0380e478962be82e0d97a6339f7f3b91", - "id": "nmdc:0380e478962be82e0d97a6339f7f3b91", - "file_size_bytes": 254347 - }, - { - "name": "Gp0127627_Centrifuge Krona HTML report", - "description": "Centrifuge Krona HTML report for Gp0127627", - "data_object_type": "Centrifuge Krona Plot", - "url": "https://data.microbiomedata.org/data/nmdc:mga0daby71/ReadbasedAnalysis/nmdc_mga0daby71_centrifuge_krona.html", - "md5_checksum": "0c1d139abdfa9fa10f26923abb4d6bda", - "id": "nmdc:0c1d139abdfa9fa10f26923abb4d6bda", - "file_size_bytes": 2330603 - }, - { - "name": "Gp0127627_Kraken classification TSV report", - "description": "Kraken classification TSV report for Gp0127627", - "data_object_type": "Kraken2 Taxonomic Classification", - "url": "https://data.microbiomedata.org/data/nmdc:mga0daby71/ReadbasedAnalysis/nmdc_mga0daby71_kraken2_classification.tsv", - "md5_checksum": "f388f7f0d79d0b2bbec1c3c0c5641814", - "id": "nmdc:f388f7f0d79d0b2bbec1c3c0c5641814", - "file_size_bytes": 1177609473 - }, - { - "name": "Gp0127627_Kraken2 TSV report", - "description": "Kraken2 TSV report for Gp0127627", - "data_object_type": "Kraken2 Classification Report", - "url": "https://data.microbiomedata.org/data/nmdc:mga0daby71/ReadbasedAnalysis/nmdc_mga0daby71_kraken2_report.tsv", - "md5_checksum": "a2a0029691c04851f4a98003a773fe3f", - "id": "nmdc:a2a0029691c04851f4a98003a773fe3f", - "file_size_bytes": 643281 - }, - { - "name": "Gp0127627_Kraken2 Krona HTML report", - "description": "Kraken2 Krona HTML report for Gp0127627", - "data_object_type": "Kraken2 Krona Plot", - "url": "https://data.microbiomedata.org/data/nmdc:mga0daby71/ReadbasedAnalysis/nmdc_mga0daby71_kraken2_krona.html", - "md5_checksum": "bab24ab64ad432d115f182df7198d46e", - "id": "nmdc:bab24ab64ad432d115f182df7198d46e", - "file_size_bytes": 3926756 - }, - { - "name": "Gp0127627_Assembled contigs fasta", - "description": "Assembled contigs fasta for Gp0127627", - "data_object_type": "Assembly Contigs", - "url": "https://data.microbiomedata.org/data/nmdc:mga0daby71/assembly/nmdc_mga0daby71_contigs.fna", - "md5_checksum": "a7db57faea894bec6603a69abfdfcf7d", - "id": "nmdc:a7db57faea894bec6603a69abfdfcf7d", - "file_size_bytes": 19853676 - }, - { - "name": "Gp0127627_Assembled scaffold fasta", - "description": "Assembled scaffold fasta for Gp0127627", - "data_object_type": "Assembly Scaffolds", - "url": "https://data.microbiomedata.org/data/nmdc:mga0daby71/assembly/nmdc_mga0daby71_scaffolds.fna", - "md5_checksum": "8e798fcdd761feff51cab6a9c97ed7ae", - "id": "nmdc:8e798fcdd761feff51cab6a9c97ed7ae", - "file_size_bytes": 19699986 - }, - { - "name": "Gp0127627_Metagenome Contig Coverage Stats", - "description": "Metagenome Contig Coverage Stats for Gp0127627", - "url": "https://data.microbiomedata.org/data/nmdc:mga0daby71/assembly/nmdc_mga0daby71_covstats.txt", - "md5_checksum": "0d3200307a90e23525d3fefa7a25f867", - "id": "nmdc:0d3200307a90e23525d3fefa7a25f867", - "file_size_bytes": 3997845 + "name": "Gp0127627_Metagenome Contig Coverage Stats", + "description": "Metagenome Contig Coverage Stats for Gp0127627", + "url": "https://data.microbiomedata.org/data/nmdc:mga0daby71/assembly/nmdc_mga0daby71_covstats.txt", + "md5_checksum": "0d3200307a90e23525d3fefa7a25f867", + "id": "nmdc:0d3200307a90e23525d3fefa7a25f867", + "file_size_bytes": 3997845 }, { "name": "Gp0127627_Assembled AGP file", @@ -11897,39 +10042,7 @@ "collecting_biosamples_from_site_set": [], "date_created": null, "etl_software_version": null, - "pooling_set": [], - "read_based_analysis_activity_set": [ - { - "_id": { - "$oid": "61e7195c833bcf838a70049b" - }, - "has_input": [ - "nmdc:ed95796b3fd964c6bedb141d70737ebf" - ], - "part_of": [ - "nmdc:mga0daby71" - ], - "git_url": "https://github.com/microbiomedata/metaG/releases/tag/0.1", - "has_output": [ - "nmdc:a5ac6665e5d66242b1c885a911236982", - "nmdc:d19478a191693d643157a89c69cc02d1", - "nmdc:679a82699663e88a5e8828ee081fa967", - "nmdc:95b3150e6fb62195c1e5ebf06f87c7d5", - "nmdc:0380e478962be82e0d97a6339f7f3b91", - "nmdc:0c1d139abdfa9fa10f26923abb4d6bda", - "nmdc:f388f7f0d79d0b2bbec1c3c0c5641814", - "nmdc:a2a0029691c04851f4a98003a773fe3f", - "nmdc:bab24ab64ad432d115f182df7198d46e" - ], - "was_informed_by": "gold:Gp0127627", - "id": "nmdc:ecaab4b51de694b35269756fd7e31ed9", - "execution_resource": "NERSC-Cori", - "name": "ReadBased Analysis Activity for nmdc:mga0daby71", - "started_at_time": "2021-11-13T18:47:34Z", - "type": "nmdc:ReadbasedAnalysis", - "ended_at_time": "2021-11-13T19:08:49+00:00" - } - ] + "pooling_set": [] }, { "functional_annotation_agg": [], @@ -12036,85 +10149,6 @@ "id": "nmdc:e350fda9bd0651755171d79b413b8da3", "file_size_bytes": 3959152 }, - { - "name": "Gp0127632_Gottcha2 TSV report", - "description": "Gottcha2 TSV report for Gp0127632", - "url": "https://data.microbiomedata.org/data/nmdc:mga0b6cy30/ReadbasedAnalysis/nmdc_mga0b6cy30_gottcha2_report.tsv", - "md5_checksum": "3e583cccbbc068e0879ba6618bb6407c", - "id": "nmdc:3e583cccbbc068e0879ba6618bb6407c", - "file_size_bytes": 2899 - }, - { - "name": "Gp0127632_Gottcha2 full TSV report", - "description": "Gottcha2 full TSV report for Gp0127632", - "url": "https://data.microbiomedata.org/data/nmdc:mga0b6cy30/ReadbasedAnalysis/nmdc_mga0b6cy30_gottcha2_report_full.tsv", - "md5_checksum": "6c54105711e818c4d8169ab595b05efe", - "id": "nmdc:6c54105711e818c4d8169ab595b05efe", - "file_size_bytes": 769416 - }, - { - "name": "Gp0127632_Gottcha2 Krona HTML report", - "description": "Gottcha2 Krona HTML report for Gp0127632", - "data_object_type": "GOTTCHA2 Krona Plot", - "url": "https://data.microbiomedata.org/data/nmdc:mga0b6cy30/ReadbasedAnalysis/nmdc_mga0b6cy30_gottcha2_krona.html", - "md5_checksum": "adb155cdb656648496484998a62fb96f", - "id": "nmdc:adb155cdb656648496484998a62fb96f", - "file_size_bytes": 235384 - }, - { - "name": "Gp0127632_Centrifuge classification TSV report", - "description": "Centrifuge classification TSV report for Gp0127632", - "data_object_type": "Centrifuge Taxonomic Classification", - "url": "https://data.microbiomedata.org/data/nmdc:mga0b6cy30/ReadbasedAnalysis/nmdc_mga0b6cy30_centrifuge_classification.tsv", - "md5_checksum": "0a03ac5737750a3b336e7299e9f01ead", - "id": "nmdc:0a03ac5737750a3b336e7299e9f01ead", - "file_size_bytes": 1917130445 - }, - { - "name": "Gp0127632_Centrifuge TSV report", - "description": "Centrifuge TSV report for Gp0127632", - "data_object_type": "Centrifuge Classification Report", - "url": "https://data.microbiomedata.org/data/nmdc:mga0b6cy30/ReadbasedAnalysis/nmdc_mga0b6cy30_centrifuge_report.tsv", - "md5_checksum": "f345b3a57c37097a860e38d5e83835b8", - "id": "nmdc:f345b3a57c37097a860e38d5e83835b8", - "file_size_bytes": 255290 - }, - { - "name": "Gp0127632_Centrifuge Krona HTML report", - "description": "Centrifuge Krona HTML report for Gp0127632", - "data_object_type": "Centrifuge Krona Plot", - "url": "https://data.microbiomedata.org/data/nmdc:mga0b6cy30/ReadbasedAnalysis/nmdc_mga0b6cy30_centrifuge_krona.html", - "md5_checksum": "c1f4471d943b284720a8becb5a2e32b4", - "id": "nmdc:c1f4471d943b284720a8becb5a2e32b4", - "file_size_bytes": 2333225 - }, - { - "name": "Gp0127632_Kraken classification TSV report", - "description": "Kraken classification TSV report for Gp0127632", - "data_object_type": "Kraken2 Taxonomic Classification", - "url": "https://data.microbiomedata.org/data/nmdc:mga0b6cy30/ReadbasedAnalysis/nmdc_mga0b6cy30_kraken2_classification.tsv", - "md5_checksum": "50cfcfc5d0d89245b8370abf6bfef23c", - "id": "nmdc:50cfcfc5d0d89245b8370abf6bfef23c", - "file_size_bytes": 1537863470 - }, - { - "name": "Gp0127632_Kraken2 TSV report", - "description": "Kraken2 TSV report for Gp0127632", - "data_object_type": "Kraken2 Classification Report", - "url": "https://data.microbiomedata.org/data/nmdc:mga0b6cy30/ReadbasedAnalysis/nmdc_mga0b6cy30_kraken2_report.tsv", - "md5_checksum": "a8dd7aa20043510158ad3b2bbe961b42", - "id": "nmdc:a8dd7aa20043510158ad3b2bbe961b42", - "file_size_bytes": 648597 - }, - { - "name": "Gp0127632_Kraken2 Krona HTML report", - "description": "Kraken2 Krona HTML report for Gp0127632", - "data_object_type": "Kraken2 Krona Plot", - "url": "https://data.microbiomedata.org/data/nmdc:mga0b6cy30/ReadbasedAnalysis/nmdc_mga0b6cy30_kraken2_krona.html", - "md5_checksum": "e350fda9bd0651755171d79b413b8da3", - "id": "nmdc:e350fda9bd0651755171d79b413b8da3", - "file_size_bytes": 3959152 - }, { "name": "Gp0127632_Assembled contigs fasta", "description": "Assembled contigs fasta for Gp0127632", @@ -12579,39 +10613,7 @@ "collecting_biosamples_from_site_set": [], "date_created": null, "etl_software_version": null, - "pooling_set": [], - "read_based_analysis_activity_set": [ - { - "_id": { - "$oid": "61e7195d833bcf838a700521" - }, - "has_input": [ - "nmdc:a43bfb55389206c2fc5ddb53e6aa2bc6" - ], - "part_of": [ - "nmdc:mga0b6cy30" - ], - "git_url": "https://github.com/microbiomedata/metaG/releases/tag/0.1", - "has_output": [ - "nmdc:3e583cccbbc068e0879ba6618bb6407c", - "nmdc:6c54105711e818c4d8169ab595b05efe", - "nmdc:adb155cdb656648496484998a62fb96f", - "nmdc:0a03ac5737750a3b336e7299e9f01ead", - "nmdc:f345b3a57c37097a860e38d5e83835b8", - "nmdc:c1f4471d943b284720a8becb5a2e32b4", - "nmdc:50cfcfc5d0d89245b8370abf6bfef23c", - "nmdc:a8dd7aa20043510158ad3b2bbe961b42", - "nmdc:e350fda9bd0651755171d79b413b8da3" - ], - "was_informed_by": "gold:Gp0127632", - "id": "nmdc:2d5f97b7f6c0f385d6185e05ac989b1e", - "execution_resource": "NERSC-Cori", - "name": "ReadBased Analysis Activity for nmdc:mga0b6cy30", - "started_at_time": "2021-10-11T02:23:42Z", - "type": "nmdc:ReadbasedAnalysis", - "ended_at_time": "2021-10-11T04:08:32+00:00" - } - ] + "pooling_set": [] }, { "functional_annotation_agg": [], @@ -12718,85 +10720,6 @@ "id": "nmdc:f1543441c59aaaf8ec52036a5bbbe3f4", "file_size_bytes": 4020978 }, - { - "name": "Gp0127636_Gottcha2 TSV report", - "description": "Gottcha2 TSV report for Gp0127636", - "url": "https://data.microbiomedata.org/data/nmdc:mga02tph39/ReadbasedAnalysis/nmdc_mga02tph39_gottcha2_report.tsv", - "md5_checksum": "50d80a30d4ff113e36f6fd64b1f28547", - "id": "nmdc:50d80a30d4ff113e36f6fd64b1f28547", - "file_size_bytes": 5547 - }, - { - "name": "Gp0127636_Gottcha2 full TSV report", - "description": "Gottcha2 full TSV report for Gp0127636", - "url": "https://data.microbiomedata.org/data/nmdc:mga02tph39/ReadbasedAnalysis/nmdc_mga02tph39_gottcha2_report_full.tsv", - "md5_checksum": "c2cd20a2011592a76397f49dc3acd6b7", - "id": "nmdc:c2cd20a2011592a76397f49dc3acd6b7", - "file_size_bytes": 965042 - }, - { - "name": "Gp0127636_Gottcha2 Krona HTML report", - "description": "Gottcha2 Krona HTML report for Gp0127636", - "data_object_type": "GOTTCHA2 Krona Plot", - "url": "https://data.microbiomedata.org/data/nmdc:mga02tph39/ReadbasedAnalysis/nmdc_mga02tph39_gottcha2_krona.html", - "md5_checksum": "827ad863c875ea14473c9903d192fa73", - "id": "nmdc:827ad863c875ea14473c9903d192fa73", - "file_size_bytes": 242495 - }, - { - "name": "Gp0127636_Centrifuge classification TSV report", - "description": "Centrifuge classification TSV report for Gp0127636", - "data_object_type": "Centrifuge Taxonomic Classification", - "url": "https://data.microbiomedata.org/data/nmdc:mga02tph39/ReadbasedAnalysis/nmdc_mga02tph39_centrifuge_classification.tsv", - "md5_checksum": "957074ca49765b22348e27b0133d8ba0", - "id": "nmdc:957074ca49765b22348e27b0133d8ba0", - "file_size_bytes": 2151939041 - }, - { - "name": "Gp0127636_Centrifuge TSV report", - "description": "Centrifuge TSV report for Gp0127636", - "data_object_type": "Centrifuge Classification Report", - "url": "https://data.microbiomedata.org/data/nmdc:mga02tph39/ReadbasedAnalysis/nmdc_mga02tph39_centrifuge_report.tsv", - "md5_checksum": "9253645582296696cb33b11754832574", - "id": "nmdc:9253645582296696cb33b11754832574", - "file_size_bytes": 257932 - }, - { - "name": "Gp0127636_Centrifuge Krona HTML report", - "description": "Centrifuge Krona HTML report for Gp0127636", - "data_object_type": "Centrifuge Krona Plot", - "url": "https://data.microbiomedata.org/data/nmdc:mga02tph39/ReadbasedAnalysis/nmdc_mga02tph39_centrifuge_krona.html", - "md5_checksum": "9aef1d9e04acfe0b7fb1b9dc3b842912", - "id": "nmdc:9aef1d9e04acfe0b7fb1b9dc3b842912", - "file_size_bytes": 2335219 - }, - { - "name": "Gp0127636_Kraken classification TSV report", - "description": "Kraken classification TSV report for Gp0127636", - "data_object_type": "Kraken2 Taxonomic Classification", - "url": "https://data.microbiomedata.org/data/nmdc:mga02tph39/ReadbasedAnalysis/nmdc_mga02tph39_kraken2_classification.tsv", - "md5_checksum": "75180fce38f38a6307231b47a8d2b23b", - "id": "nmdc:75180fce38f38a6307231b47a8d2b23b", - "file_size_bytes": 1746049273 - }, - { - "name": "Gp0127636_Kraken2 TSV report", - "description": "Kraken2 TSV report for Gp0127636", - "data_object_type": "Kraken2 Classification Report", - "url": "https://data.microbiomedata.org/data/nmdc:mga02tph39/ReadbasedAnalysis/nmdc_mga02tph39_kraken2_report.tsv", - "md5_checksum": "b4524a34937893768dbd3752068dee0c", - "id": "nmdc:b4524a34937893768dbd3752068dee0c", - "file_size_bytes": 660975 - }, - { - "name": "Gp0127636_Kraken2 Krona HTML report", - "description": "Kraken2 Krona HTML report for Gp0127636", - "data_object_type": "Kraken2 Krona Plot", - "url": "https://data.microbiomedata.org/data/nmdc:mga02tph39/ReadbasedAnalysis/nmdc_mga02tph39_kraken2_krona.html", - "md5_checksum": "f1543441c59aaaf8ec52036a5bbbe3f4", - "id": "nmdc:f1543441c59aaaf8ec52036a5bbbe3f4", - "file_size_bytes": 4020978 - }, { "name": "Gp0127636_Assembled contigs fasta", "description": "Assembled contigs fasta for Gp0127636", @@ -13182,146 +11105,35 @@ "study_set": [], "field_research_site_set": [], "collecting_biosamples_from_site_set": [], - "date_created": null, - "etl_software_version": null, - "pooling_set": [], - "read_based_analysis_activity_set": [ - { - "_id": { - "$oid": "61e71959833bcf838a70040a" - }, - "has_input": [ - "nmdc:e4f5675c728fd1896682eb669656b5d6" - ], - "part_of": [ - "nmdc:mga02tph39" - ], - "git_url": "https://github.com/microbiomedata/metaG/releases/tag/0.1", - "has_output": [ - "nmdc:50d80a30d4ff113e36f6fd64b1f28547", - "nmdc:c2cd20a2011592a76397f49dc3acd6b7", - "nmdc:827ad863c875ea14473c9903d192fa73", - "nmdc:957074ca49765b22348e27b0133d8ba0", - "nmdc:9253645582296696cb33b11754832574", - "nmdc:9aef1d9e04acfe0b7fb1b9dc3b842912", - "nmdc:75180fce38f38a6307231b47a8d2b23b", - "nmdc:b4524a34937893768dbd3752068dee0c", - "nmdc:f1543441c59aaaf8ec52036a5bbbe3f4" - ], - "was_informed_by": "gold:Gp0127636", - "id": "nmdc:3a0bb3c0ec1ec60e0487cb98f7f19a90", - "execution_resource": "NERSC-Cori", - "name": "ReadBased Analysis Activity for nmdc:mga02tph39", - "started_at_time": "2021-10-11T02:23:42Z", - "type": "nmdc:ReadbasedAnalysis", - "ended_at_time": "2021-11-13T18:49:37+00:00" - } - ] - }, - { - "functional_annotation_agg": [], - "library_preparation_set": [], - "processed_sample_set": [], - "extraction_set": [], - "activity_set": [], - "biosample_set": [], - "data_object_set": [ - { - "name": "Gp0127634_Filtered Reads", - "description": "Filtered Reads for Gp0127634", - "data_object_type": "Filtered Sequencing Reads", - "url": "https://data.microbiomedata.org/data/nmdc:mga0r0vf18/qa/nmdc_mga0r0vf18_filtered.fastq.gz", - "md5_checksum": "ac889627d813c8e34cfbf79a4264c590", - "id": "nmdc:ac889627d813c8e34cfbf79a4264c590", - "file_size_bytes": 2316462404 - }, - { - "name": "Gp0127634_Filtered Stats", - "description": "Filtered Stats for Gp0127634", - "data_object_type": "QC Statistics", - "url": "https://data.microbiomedata.org/data/nmdc:mga0r0vf18/qa/nmdc_mga0r0vf18_filterStats.txt", - "md5_checksum": "0dfd55be1779ae7922d80aa22034c9a1", - "id": "nmdc:0dfd55be1779ae7922d80aa22034c9a1", - "file_size_bytes": 291 - }, - { - "name": "Gp0127634_Gottcha2 TSV report", - "description": "Gottcha2 TSV report for Gp0127634", - "url": "https://data.microbiomedata.org/data/nmdc:mga0r0vf18/ReadbasedAnalysis/nmdc_mga0r0vf18_gottcha2_report.tsv", - "md5_checksum": "0526ea84f6e7893f5b6d62a32f81a199", - "id": "nmdc:0526ea84f6e7893f5b6d62a32f81a199", - "file_size_bytes": 4224 - }, - { - "name": "Gp0127634_Gottcha2 full TSV report", - "description": "Gottcha2 full TSV report for Gp0127634", - "url": "https://data.microbiomedata.org/data/nmdc:mga0r0vf18/ReadbasedAnalysis/nmdc_mga0r0vf18_gottcha2_report_full.tsv", - "md5_checksum": "1a7380f5adb59f36c98c840bf28ad4bd", - "id": "nmdc:1a7380f5adb59f36c98c840bf28ad4bd", - "file_size_bytes": 875501 - }, - { - "name": "Gp0127634_Gottcha2 Krona HTML report", - "description": "Gottcha2 Krona HTML report for Gp0127634", - "data_object_type": "GOTTCHA2 Krona Plot", - "url": "https://data.microbiomedata.org/data/nmdc:mga0r0vf18/ReadbasedAnalysis/nmdc_mga0r0vf18_gottcha2_krona.html", - "md5_checksum": "366ab38bb6de9591f31a086d42ac23d6", - "id": "nmdc:366ab38bb6de9591f31a086d42ac23d6", - "file_size_bytes": 238755 - }, - { - "name": "Gp0127634_Centrifuge classification TSV report", - "description": "Centrifuge classification TSV report for Gp0127634", - "data_object_type": "Centrifuge Taxonomic Classification", - "url": "https://data.microbiomedata.org/data/nmdc:mga0r0vf18/ReadbasedAnalysis/nmdc_mga0r0vf18_centrifuge_classification.tsv", - "md5_checksum": "c44ba44bc6910c2f3ed3a60a52b4a616", - "id": "nmdc:c44ba44bc6910c2f3ed3a60a52b4a616", - "file_size_bytes": 2051793471 - }, - { - "name": "Gp0127634_Centrifuge TSV report", - "description": "Centrifuge TSV report for Gp0127634", - "data_object_type": "Centrifuge Classification Report", - "url": "https://data.microbiomedata.org/data/nmdc:mga0r0vf18/ReadbasedAnalysis/nmdc_mga0r0vf18_centrifuge_report.tsv", - "md5_checksum": "0ca043b630ba304cb80603e8332c78cf", - "id": "nmdc:0ca043b630ba304cb80603e8332c78cf", - "file_size_bytes": 256560 - }, - { - "name": "Gp0127634_Centrifuge Krona HTML report", - "description": "Centrifuge Krona HTML report for Gp0127634", - "data_object_type": "Centrifuge Krona Plot", - "url": "https://data.microbiomedata.org/data/nmdc:mga0r0vf18/ReadbasedAnalysis/nmdc_mga0r0vf18_centrifuge_krona.html", - "md5_checksum": "059ff39ced52c0df45a331c4e9e10fdd", - "id": "nmdc:059ff39ced52c0df45a331c4e9e10fdd", - "file_size_bytes": 2334325 - }, - { - "name": "Gp0127634_Kraken classification TSV report", - "description": "Kraken classification TSV report for Gp0127634", - "data_object_type": "Kraken2 Taxonomic Classification", - "url": "https://data.microbiomedata.org/data/nmdc:mga0r0vf18/ReadbasedAnalysis/nmdc_mga0r0vf18_kraken2_classification.tsv", - "md5_checksum": "7bfa3b5b29ec5cf9882251585d99f9bf", - "id": "nmdc:7bfa3b5b29ec5cf9882251585d99f9bf", - "file_size_bytes": 1649071235 - }, + "date_created": null, + "etl_software_version": null, + "pooling_set": [] + }, + { + "functional_annotation_agg": [], + "library_preparation_set": [], + "processed_sample_set": [], + "extraction_set": [], + "activity_set": [], + "biosample_set": [], + "data_object_set": [ { - "name": "Gp0127634_Kraken2 TSV report", - "description": "Kraken2 TSV report for Gp0127634", - "data_object_type": "Kraken2 Classification Report", - "url": "https://data.microbiomedata.org/data/nmdc:mga0r0vf18/ReadbasedAnalysis/nmdc_mga0r0vf18_kraken2_report.tsv", - "md5_checksum": "2fceef0aaf1c3c3e3edd8d69bb72c8d3", - "id": "nmdc:2fceef0aaf1c3c3e3edd8d69bb72c8d3", - "file_size_bytes": 654782 + "name": "Gp0127634_Filtered Reads", + "description": "Filtered Reads for Gp0127634", + "data_object_type": "Filtered Sequencing Reads", + "url": "https://data.microbiomedata.org/data/nmdc:mga0r0vf18/qa/nmdc_mga0r0vf18_filtered.fastq.gz", + "md5_checksum": "ac889627d813c8e34cfbf79a4264c590", + "id": "nmdc:ac889627d813c8e34cfbf79a4264c590", + "file_size_bytes": 2316462404 }, { - "name": "Gp0127634_Kraken2 Krona HTML report", - "description": "Kraken2 Krona HTML report for Gp0127634", - "data_object_type": "Kraken2 Krona Plot", - "url": "https://data.microbiomedata.org/data/nmdc:mga0r0vf18/ReadbasedAnalysis/nmdc_mga0r0vf18_kraken2_krona.html", - "md5_checksum": "678e7c401a6971629f7d3ada83b307ab", - "id": "nmdc:678e7c401a6971629f7d3ada83b307ab", - "file_size_bytes": 3988988 + "name": "Gp0127634_Filtered Stats", + "description": "Filtered Stats for Gp0127634", + "data_object_type": "QC Statistics", + "url": "https://data.microbiomedata.org/data/nmdc:mga0r0vf18/qa/nmdc_mga0r0vf18_filterStats.txt", + "md5_checksum": "0dfd55be1779ae7922d80aa22034c9a1", + "id": "nmdc:0dfd55be1779ae7922d80aa22034c9a1", + "file_size_bytes": 291 }, { "name": "Gp0127634_Gottcha2 TSV report", @@ -13866,39 +11678,7 @@ "collecting_biosamples_from_site_set": [], "date_created": null, "etl_software_version": null, - "pooling_set": [], - "read_based_analysis_activity_set": [ - { - "_id": { - "$oid": "61e71979833bcf838a700840" - }, - "has_input": [ - "nmdc:ac889627d813c8e34cfbf79a4264c590" - ], - "part_of": [ - "nmdc:mga0r0vf18" - ], - "git_url": "https://github.com/microbiomedata/metaG/releases/tag/0.1", - "has_output": [ - "nmdc:0526ea84f6e7893f5b6d62a32f81a199", - "nmdc:1a7380f5adb59f36c98c840bf28ad4bd", - "nmdc:366ab38bb6de9591f31a086d42ac23d6", - "nmdc:c44ba44bc6910c2f3ed3a60a52b4a616", - "nmdc:0ca043b630ba304cb80603e8332c78cf", - "nmdc:059ff39ced52c0df45a331c4e9e10fdd", - "nmdc:7bfa3b5b29ec5cf9882251585d99f9bf", - "nmdc:2fceef0aaf1c3c3e3edd8d69bb72c8d3", - "nmdc:678e7c401a6971629f7d3ada83b307ab" - ], - "was_informed_by": "gold:Gp0127634", - "id": "nmdc:d6415e0c3e4f9b8702c1ae98c6fb2f48", - "execution_resource": "NERSC-Cori", - "name": "ReadBased Analysis Activity for nmdc:mga0r0vf18", - "started_at_time": "2021-10-11T02:23:30Z", - "type": "nmdc:ReadbasedAnalysis", - "ended_at_time": "2021-10-11T04:49:55+00:00" - } - ] + "pooling_set": [] }, { "functional_annotation_agg": [], @@ -14005,85 +11785,6 @@ "id": "nmdc:6748020214a3d68ad588e3548107208e", "file_size_bytes": 3996293 }, - { - "name": "Gp0127635_Gottcha2 TSV report", - "description": "Gottcha2 TSV report for Gp0127635", - "url": "https://data.microbiomedata.org/data/nmdc:mga0ak4p20/ReadbasedAnalysis/nmdc_mga0ak4p20_gottcha2_report.tsv", - "md5_checksum": "d8a410c52c8f6cf0097b674492cc3926", - "id": "nmdc:d8a410c52c8f6cf0097b674492cc3926", - "file_size_bytes": 3696 - }, - { - "name": "Gp0127635_Gottcha2 full TSV report", - "description": "Gottcha2 full TSV report for Gp0127635", - "url": "https://data.microbiomedata.org/data/nmdc:mga0ak4p20/ReadbasedAnalysis/nmdc_mga0ak4p20_gottcha2_report_full.tsv", - "md5_checksum": "ddec46781153da60da815c65871f5413", - "id": "nmdc:ddec46781153da60da815c65871f5413", - "file_size_bytes": 677459 - }, - { - "name": "Gp0127635_Gottcha2 Krona HTML report", - "description": "Gottcha2 Krona HTML report for Gp0127635", - "data_object_type": "GOTTCHA2 Krona Plot", - "url": "https://data.microbiomedata.org/data/nmdc:mga0ak4p20/ReadbasedAnalysis/nmdc_mga0ak4p20_gottcha2_krona.html", - "md5_checksum": "e626ec18dba4885613240927cbb99d8b", - "id": "nmdc:e626ec18dba4885613240927cbb99d8b", - "file_size_bytes": 236164 - }, - { - "name": "Gp0127635_Centrifuge classification TSV report", - "description": "Centrifuge classification TSV report for Gp0127635", - "data_object_type": "Centrifuge Taxonomic Classification", - "url": "https://data.microbiomedata.org/data/nmdc:mga0ak4p20/ReadbasedAnalysis/nmdc_mga0ak4p20_centrifuge_classification.tsv", - "md5_checksum": "f8486e4ee029038a452a3484db10cabc", - "id": "nmdc:f8486e4ee029038a452a3484db10cabc", - "file_size_bytes": 1796179546 - }, - { - "name": "Gp0127635_Centrifuge TSV report", - "description": "Centrifuge TSV report for Gp0127635", - "data_object_type": "Centrifuge Classification Report", - "url": "https://data.microbiomedata.org/data/nmdc:mga0ak4p20/ReadbasedAnalysis/nmdc_mga0ak4p20_centrifuge_report.tsv", - "md5_checksum": "4121f2ec52b80b7feb9d9a4749080125", - "id": "nmdc:4121f2ec52b80b7feb9d9a4749080125", - "file_size_bytes": 254661 - }, - { - "name": "Gp0127635_Centrifuge Krona HTML report", - "description": "Centrifuge Krona HTML report for Gp0127635", - "data_object_type": "Centrifuge Krona Plot", - "url": "https://data.microbiomedata.org/data/nmdc:mga0ak4p20/ReadbasedAnalysis/nmdc_mga0ak4p20_centrifuge_krona.html", - "md5_checksum": "5b8c1cd8ba47041c20d3e18cb902a854", - "id": "nmdc:5b8c1cd8ba47041c20d3e18cb902a854", - "file_size_bytes": 2333534 - }, - { - "name": "Gp0127635_Kraken classification TSV report", - "description": "Kraken classification TSV report for Gp0127635", - "data_object_type": "Kraken2 Taxonomic Classification", - "url": "https://data.microbiomedata.org/data/nmdc:mga0ak4p20/ReadbasedAnalysis/nmdc_mga0ak4p20_kraken2_classification.tsv", - "md5_checksum": "59807dae5216b11c96df5593a26d9a88", - "id": "nmdc:59807dae5216b11c96df5593a26d9a88", - "file_size_bytes": 1432249556 - }, - { - "name": "Gp0127635_Kraken2 TSV report", - "description": "Kraken2 TSV report for Gp0127635", - "data_object_type": "Kraken2 Classification Report", - "url": "https://data.microbiomedata.org/data/nmdc:mga0ak4p20/ReadbasedAnalysis/nmdc_mga0ak4p20_kraken2_report.tsv", - "md5_checksum": "a491f6797bd7294dbc5ba301efb3466e", - "id": "nmdc:a491f6797bd7294dbc5ba301efb3466e", - "file_size_bytes": 639738 - }, - { - "name": "Gp0127635_Kraken2 Krona HTML report", - "description": "Kraken2 Krona HTML report for Gp0127635", - "data_object_type": "Kraken2 Krona Plot", - "url": "https://data.microbiomedata.org/data/nmdc:mga0ak4p20/ReadbasedAnalysis/nmdc_mga0ak4p20_kraken2_krona.html", - "md5_checksum": "6748020214a3d68ad588e3548107208e", - "id": "nmdc:6748020214a3d68ad588e3548107208e", - "file_size_bytes": 3996293 - }, { "name": "Gp0127635_Assembled contigs fasta", "description": "Assembled contigs fasta for Gp0127635", @@ -14586,39 +12287,7 @@ "collecting_biosamples_from_site_set": [], "date_created": null, "etl_software_version": null, - "pooling_set": [], - "read_based_analysis_activity_set": [ - { - "_id": { - "$oid": "61e71936833bcf838a6ffdfc" - }, - "has_input": [ - "nmdc:f8bc16e232f7ba0f6d6b5ca35a708c36" - ], - "part_of": [ - "nmdc:mga0ak4p20" - ], - "git_url": "https://github.com/microbiomedata/metaG/releases/tag/0.1", - "has_output": [ - "nmdc:d8a410c52c8f6cf0097b674492cc3926", - "nmdc:ddec46781153da60da815c65871f5413", - "nmdc:e626ec18dba4885613240927cbb99d8b", - "nmdc:f8486e4ee029038a452a3484db10cabc", - "nmdc:4121f2ec52b80b7feb9d9a4749080125", - "nmdc:5b8c1cd8ba47041c20d3e18cb902a854", - "nmdc:59807dae5216b11c96df5593a26d9a88", - "nmdc:a491f6797bd7294dbc5ba301efb3466e", - "nmdc:6748020214a3d68ad588e3548107208e" - ], - "was_informed_by": "gold:Gp0127635", - "id": "nmdc:80cd0785782060a937ca17d1b417b0b6", - "execution_resource": "NERSC-Cori", - "name": "ReadBased Analysis Activity for nmdc:mga0ak4p20", - "started_at_time": "2021-10-11T02:26:59Z", - "type": "nmdc:ReadbasedAnalysis", - "ended_at_time": "2021-10-11T04:11:48+00:00" - } - ] + "pooling_set": [] }, { "functional_annotation_agg": [], @@ -14627,103 +12296,24 @@ "extraction_set": [], "activity_set": [], "biosample_set": [], - "data_object_set": [ - { - "name": "Gp0127637_Filtered Reads", - "description": "Filtered Reads for Gp0127637", - "data_object_type": "Filtered Sequencing Reads", - "url": "https://data.microbiomedata.org/data/nmdc:mga0sb9b30/qa/nmdc_mga0sb9b30_filtered.fastq.gz", - "md5_checksum": "805310f4b1e39a0cc9e5b5787576cb8b", - "id": "nmdc:805310f4b1e39a0cc9e5b5787576cb8b", - "file_size_bytes": 1553219358 - }, - { - "name": "Gp0127637_Filtered Stats", - "description": "Filtered Stats for Gp0127637", - "data_object_type": "QC Statistics", - "url": "https://data.microbiomedata.org/data/nmdc:mga0sb9b30/qa/nmdc_mga0sb9b30_filterStats.txt", - "md5_checksum": "611e67df261e050860b1075c6a6a5ff5", - "id": "nmdc:611e67df261e050860b1075c6a6a5ff5", - "file_size_bytes": 289 - }, - { - "name": "Gp0127637_Gottcha2 TSV report", - "description": "Gottcha2 TSV report for Gp0127637", - "url": "https://data.microbiomedata.org/data/nmdc:mga0sb9b30/ReadbasedAnalysis/nmdc_mga0sb9b30_gottcha2_report.tsv", - "md5_checksum": "9268e073dacb7f7cd5f9513393cb0b2a", - "id": "nmdc:9268e073dacb7f7cd5f9513393cb0b2a", - "file_size_bytes": 660 - }, - { - "name": "Gp0127637_Gottcha2 full TSV report", - "description": "Gottcha2 full TSV report for Gp0127637", - "url": "https://data.microbiomedata.org/data/nmdc:mga0sb9b30/ReadbasedAnalysis/nmdc_mga0sb9b30_gottcha2_report_full.tsv", - "md5_checksum": "37dd1d73ad47979ee5284830d27df535", - "id": "nmdc:37dd1d73ad47979ee5284830d27df535", - "file_size_bytes": 594054 - }, - { - "name": "Gp0127637_Gottcha2 Krona HTML report", - "description": "Gottcha2 Krona HTML report for Gp0127637", - "data_object_type": "GOTTCHA2 Krona Plot", - "url": "https://data.microbiomedata.org/data/nmdc:mga0sb9b30/ReadbasedAnalysis/nmdc_mga0sb9b30_gottcha2_krona.html", - "md5_checksum": "43bffbfb830c6e3ccc140ec0dff1e773", - "id": "nmdc:43bffbfb830c6e3ccc140ec0dff1e773", - "file_size_bytes": 227750 - }, - { - "name": "Gp0127637_Centrifuge classification TSV report", - "description": "Centrifuge classification TSV report for Gp0127637", - "data_object_type": "Centrifuge Taxonomic Classification", - "url": "https://data.microbiomedata.org/data/nmdc:mga0sb9b30/ReadbasedAnalysis/nmdc_mga0sb9b30_centrifuge_classification.tsv", - "md5_checksum": "cb3bd5ca5088484cb4e580ad91d736b2", - "id": "nmdc:cb3bd5ca5088484cb4e580ad91d736b2", - "file_size_bytes": 1457058272 - }, - { - "name": "Gp0127637_Centrifuge TSV report", - "description": "Centrifuge TSV report for Gp0127637", - "data_object_type": "Centrifuge Classification Report", - "url": "https://data.microbiomedata.org/data/nmdc:mga0sb9b30/ReadbasedAnalysis/nmdc_mga0sb9b30_centrifuge_report.tsv", - "md5_checksum": "f44a5d59785cdededea0fe4a6a429c30", - "id": "nmdc:f44a5d59785cdededea0fe4a6a429c30", - "file_size_bytes": 251867 - }, - { - "name": "Gp0127637_Centrifuge Krona HTML report", - "description": "Centrifuge Krona HTML report for Gp0127637", - "data_object_type": "Centrifuge Krona Plot", - "url": "https://data.microbiomedata.org/data/nmdc:mga0sb9b30/ReadbasedAnalysis/nmdc_mga0sb9b30_centrifuge_krona.html", - "md5_checksum": "81a6efbd082e07bc2db174a88d64a272", - "id": "nmdc:81a6efbd082e07bc2db174a88d64a272", - "file_size_bytes": 2325282 - }, - { - "name": "Gp0127637_Kraken classification TSV report", - "description": "Kraken classification TSV report for Gp0127637", - "data_object_type": "Kraken2 Taxonomic Classification", - "url": "https://data.microbiomedata.org/data/nmdc:mga0sb9b30/ReadbasedAnalysis/nmdc_mga0sb9b30_kraken2_classification.tsv", - "md5_checksum": "f63856a84bc9afb8954ccdb1803d5fde", - "id": "nmdc:f63856a84bc9afb8954ccdb1803d5fde", - "file_size_bytes": 1160106364 - }, + "data_object_set": [ { - "name": "Gp0127637_Kraken2 TSV report", - "description": "Kraken2 TSV report for Gp0127637", - "data_object_type": "Kraken2 Classification Report", - "url": "https://data.microbiomedata.org/data/nmdc:mga0sb9b30/ReadbasedAnalysis/nmdc_mga0sb9b30_kraken2_report.tsv", - "md5_checksum": "9a1826f66ee45187d627076d11dc491f", - "id": "nmdc:9a1826f66ee45187d627076d11dc491f", - "file_size_bytes": 613810 + "name": "Gp0127637_Filtered Reads", + "description": "Filtered Reads for Gp0127637", + "data_object_type": "Filtered Sequencing Reads", + "url": "https://data.microbiomedata.org/data/nmdc:mga0sb9b30/qa/nmdc_mga0sb9b30_filtered.fastq.gz", + "md5_checksum": "805310f4b1e39a0cc9e5b5787576cb8b", + "id": "nmdc:805310f4b1e39a0cc9e5b5787576cb8b", + "file_size_bytes": 1553219358 }, { - "name": "Gp0127637_Kraken2 Krona HTML report", - "description": "Kraken2 Krona HTML report for Gp0127637", - "data_object_type": "Kraken2 Krona Plot", - "url": "https://data.microbiomedata.org/data/nmdc:mga0sb9b30/ReadbasedAnalysis/nmdc_mga0sb9b30_kraken2_krona.html", - "md5_checksum": "67adb9cc2c75251f556a90b1a959ea72", - "id": "nmdc:67adb9cc2c75251f556a90b1a959ea72", - "file_size_bytes": 3853908 + "name": "Gp0127637_Filtered Stats", + "description": "Filtered Stats for Gp0127637", + "data_object_type": "QC Statistics", + "url": "https://data.microbiomedata.org/data/nmdc:mga0sb9b30/qa/nmdc_mga0sb9b30_filterStats.txt", + "md5_checksum": "611e67df261e050860b1075c6a6a5ff5", + "id": "nmdc:611e67df261e050860b1075c6a6a5ff5", + "file_size_bytes": 289 }, { "name": "Gp0127637_Gottcha2 TSV report", @@ -15287,39 +12877,7 @@ "collecting_biosamples_from_site_set": [], "date_created": null, "etl_software_version": null, - "pooling_set": [], - "read_based_analysis_activity_set": [ - { - "_id": { - "$oid": "61e7191f833bcf838a6ffa50" - }, - "has_input": [ - "nmdc:805310f4b1e39a0cc9e5b5787576cb8b" - ], - "part_of": [ - "nmdc:mga0sb9b30" - ], - "git_url": "https://github.com/microbiomedata/metaG/releases/tag/0.1", - "has_output": [ - "nmdc:9268e073dacb7f7cd5f9513393cb0b2a", - "nmdc:37dd1d73ad47979ee5284830d27df535", - "nmdc:43bffbfb830c6e3ccc140ec0dff1e773", - "nmdc:cb3bd5ca5088484cb4e580ad91d736b2", - "nmdc:f44a5d59785cdededea0fe4a6a429c30", - "nmdc:81a6efbd082e07bc2db174a88d64a272", - "nmdc:f63856a84bc9afb8954ccdb1803d5fde", - "nmdc:9a1826f66ee45187d627076d11dc491f", - "nmdc:67adb9cc2c75251f556a90b1a959ea72" - ], - "was_informed_by": "gold:Gp0127637", - "id": "nmdc:c6d11b07b7b33c0d906ce0d4a58a7ccf", - "execution_resource": "NERSC-Cori", - "name": "ReadBased Analysis Activity for nmdc:mga0sb9b30", - "started_at_time": "2021-10-11T02:24:01Z", - "type": "nmdc:ReadbasedAnalysis", - "ended_at_time": "2021-10-11T03:11:56+00:00" - } - ] + "pooling_set": [] }, { "functional_annotation_agg": [], @@ -15426,85 +12984,6 @@ "id": "nmdc:70c2fc1a2c7c0032528ff91ad1576465", "file_size_bytes": 3896830 }, - { - "name": "Gp0127638_Gottcha2 TSV report", - "description": "Gottcha2 TSV report for Gp0127638", - "url": "https://data.microbiomedata.org/data/nmdc:mga0hjgc20/ReadbasedAnalysis/nmdc_mga0hjgc20_gottcha2_report.tsv", - "md5_checksum": "dbbd6ca6777b71d1fac4aae2cd947deb", - "id": "nmdc:dbbd6ca6777b71d1fac4aae2cd947deb", - "file_size_bytes": 2025 - }, - { - "name": "Gp0127638_Gottcha2 full TSV report", - "description": "Gottcha2 full TSV report for Gp0127638", - "url": "https://data.microbiomedata.org/data/nmdc:mga0hjgc20/ReadbasedAnalysis/nmdc_mga0hjgc20_gottcha2_report_full.tsv", - "md5_checksum": "b6de56746a284f8226dd86817c8ae04e", - "id": "nmdc:b6de56746a284f8226dd86817c8ae04e", - "file_size_bytes": 655633 - }, - { - "name": "Gp0127638_Gottcha2 Krona HTML report", - "description": "Gottcha2 Krona HTML report for Gp0127638", - "data_object_type": "GOTTCHA2 Krona Plot", - "url": "https://data.microbiomedata.org/data/nmdc:mga0hjgc20/ReadbasedAnalysis/nmdc_mga0hjgc20_gottcha2_krona.html", - "md5_checksum": "d9572e708af9f0a06e98cfddfb298359", - "id": "nmdc:d9572e708af9f0a06e98cfddfb298359", - "file_size_bytes": 232133 - }, - { - "name": "Gp0127638_Centrifuge classification TSV report", - "description": "Centrifuge classification TSV report for Gp0127638", - "data_object_type": "Centrifuge Taxonomic Classification", - "url": "https://data.microbiomedata.org/data/nmdc:mga0hjgc20/ReadbasedAnalysis/nmdc_mga0hjgc20_centrifuge_classification.tsv", - "md5_checksum": "e9946f36795474182b7759d3d7532b57", - "id": "nmdc:e9946f36795474182b7759d3d7532b57", - "file_size_bytes": 1448205544 - }, - { - "name": "Gp0127638_Centrifuge TSV report", - "description": "Centrifuge TSV report for Gp0127638", - "data_object_type": "Centrifuge Classification Report", - "url": "https://data.microbiomedata.org/data/nmdc:mga0hjgc20/ReadbasedAnalysis/nmdc_mga0hjgc20_centrifuge_report.tsv", - "md5_checksum": "33ff1d85d17d763afc9e21e481cc10d2", - "id": "nmdc:33ff1d85d17d763afc9e21e481cc10d2", - "file_size_bytes": 253872 - }, - { - "name": "Gp0127638_Centrifuge Krona HTML report", - "description": "Centrifuge Krona HTML report for Gp0127638", - "data_object_type": "Centrifuge Krona Plot", - "url": "https://data.microbiomedata.org/data/nmdc:mga0hjgc20/ReadbasedAnalysis/nmdc_mga0hjgc20_centrifuge_krona.html", - "md5_checksum": "997a66f49a232750bd7132639f3387e7", - "id": "nmdc:997a66f49a232750bd7132639f3387e7", - "file_size_bytes": 2331772 - }, - { - "name": "Gp0127638_Kraken classification TSV report", - "description": "Kraken classification TSV report for Gp0127638", - "data_object_type": "Kraken2 Taxonomic Classification", - "url": "https://data.microbiomedata.org/data/nmdc:mga0hjgc20/ReadbasedAnalysis/nmdc_mga0hjgc20_kraken2_classification.tsv", - "md5_checksum": "d3f604a59babf001839d38a617b62931", - "id": "nmdc:d3f604a59babf001839d38a617b62931", - "file_size_bytes": 1157365410 - }, - { - "name": "Gp0127638_Kraken2 TSV report", - "description": "Kraken2 TSV report for Gp0127638", - "data_object_type": "Kraken2 Classification Report", - "url": "https://data.microbiomedata.org/data/nmdc:mga0hjgc20/ReadbasedAnalysis/nmdc_mga0hjgc20_kraken2_report.tsv", - "md5_checksum": "3abfaa434ee1449cbbb69985e48488b4", - "id": "nmdc:3abfaa434ee1449cbbb69985e48488b4", - "file_size_bytes": 621484 - }, - { - "name": "Gp0127638_Kraken2 Krona HTML report", - "description": "Kraken2 Krona HTML report for Gp0127638", - "data_object_type": "Kraken2 Krona Plot", - "url": "https://data.microbiomedata.org/data/nmdc:mga0hjgc20/ReadbasedAnalysis/nmdc_mga0hjgc20_kraken2_krona.html", - "md5_checksum": "70c2fc1a2c7c0032528ff91ad1576465", - "id": "nmdc:70c2fc1a2c7c0032528ff91ad1576465", - "file_size_bytes": 3896830 - }, { "name": "Gp0127638_Assembled contigs fasta", "description": "Assembled contigs fasta for Gp0127638", @@ -15911,146 +13390,35 @@ "study_set": [], "field_research_site_set": [], "collecting_biosamples_from_site_set": [], - "date_created": null, - "etl_software_version": null, - "pooling_set": [], - "read_based_analysis_activity_set": [ - { - "_id": { - "$oid": "61e719b5833bcf838a7010e1" - }, - "has_input": [ - "nmdc:56ba2416c050decd6c16c618c1e4a752" - ], - "part_of": [ - "nmdc:mga0hjgc20" - ], - "git_url": "https://github.com/microbiomedata/metaG/releases/tag/0.1", - "has_output": [ - "nmdc:dbbd6ca6777b71d1fac4aae2cd947deb", - "nmdc:b6de56746a284f8226dd86817c8ae04e", - "nmdc:d9572e708af9f0a06e98cfddfb298359", - "nmdc:e9946f36795474182b7759d3d7532b57", - "nmdc:33ff1d85d17d763afc9e21e481cc10d2", - "nmdc:997a66f49a232750bd7132639f3387e7", - "nmdc:d3f604a59babf001839d38a617b62931", - "nmdc:3abfaa434ee1449cbbb69985e48488b4", - "nmdc:70c2fc1a2c7c0032528ff91ad1576465" - ], - "was_informed_by": "gold:Gp0127638", - "id": "nmdc:668844f5ea6cefdcb893db0bb6afc92a", - "execution_resource": "NERSC-Cori", - "name": "ReadBased Analysis Activity for nmdc:mga0hjgc20", - "started_at_time": "2021-12-01T21:31:29Z", - "type": "nmdc:ReadbasedAnalysis", - "ended_at_time": "2021-12-02T20:49:51+00:00" - } - ] - }, - { - "functional_annotation_agg": [], - "library_preparation_set": [], - "processed_sample_set": [], - "extraction_set": [], - "activity_set": [], - "biosample_set": [], - "data_object_set": [ - { - "name": "Gp0115670_Filtered Reads", - "description": "Filtered Reads for Gp0115670", - "data_object_type": "Filtered Sequencing Reads", - "url": "https://data.microbiomedata.org/data/nmdc:mga0d7pj22/qa/nmdc_mga0d7pj22_filtered.fastq.gz", - "md5_checksum": "7f6b353300583c60d2d668880b4134cd", - "id": "nmdc:7f6b353300583c60d2d668880b4134cd", - "file_size_bytes": 3012174785 - }, - { - "name": "Gp0115670_Filtered Stats", - "description": "Filtered Stats for Gp0115670", - "data_object_type": "QC Statistics", - "url": "https://data.microbiomedata.org/data/nmdc:mga0d7pj22/qa/nmdc_mga0d7pj22_filterStats.txt", - "md5_checksum": "a4f65d101293fa4345cd865f86597464", - "id": "nmdc:a4f65d101293fa4345cd865f86597464", - "file_size_bytes": 291 - }, - { - "name": "Gp0115670_Gottcha2 TSV report", - "description": "Gottcha2 TSV report for Gp0115670", - "url": "https://data.microbiomedata.org/data/nmdc:mga0d7pj22/ReadbasedAnalysis/nmdc_mga0d7pj22_gottcha2_report.tsv", - "md5_checksum": "e316502f9e7a78c9db3996ef832aa9d7", - "id": "nmdc:e316502f9e7a78c9db3996ef832aa9d7", - "file_size_bytes": 13758 - }, - { - "name": "Gp0115670_Gottcha2 full TSV report", - "description": "Gottcha2 full TSV report for Gp0115670", - "url": "https://data.microbiomedata.org/data/nmdc:mga0d7pj22/ReadbasedAnalysis/nmdc_mga0d7pj22_gottcha2_report_full.tsv", - "md5_checksum": "1ac2be77491e7d425da1d62f69f1508d", - "id": "nmdc:1ac2be77491e7d425da1d62f69f1508d", - "file_size_bytes": 1116084 - }, - { - "name": "Gp0115670_Gottcha2 Krona HTML report", - "description": "Gottcha2 Krona HTML report for Gp0115670", - "data_object_type": "GOTTCHA2 Krona Plot", - "url": "https://data.microbiomedata.org/data/nmdc:mga0d7pj22/ReadbasedAnalysis/nmdc_mga0d7pj22_gottcha2_krona.html", - "md5_checksum": "de5b15fa9d3bdbc3abcc2475ee351323", - "id": "nmdc:de5b15fa9d3bdbc3abcc2475ee351323", - "file_size_bytes": 268542 - }, - { - "name": "Gp0115670_Centrifuge classification TSV report", - "description": "Centrifuge classification TSV report for Gp0115670", - "data_object_type": "Centrifuge Taxonomic Classification", - "url": "https://data.microbiomedata.org/data/nmdc:mga0d7pj22/ReadbasedAnalysis/nmdc_mga0d7pj22_centrifuge_classification.tsv", - "md5_checksum": "a9bbb74833404a2bf3bbd05e83a7a0ed", - "id": "nmdc:a9bbb74833404a2bf3bbd05e83a7a0ed", - "file_size_bytes": 2458475116 - }, - { - "name": "Gp0115670_Centrifuge TSV report", - "description": "Centrifuge TSV report for Gp0115670", - "data_object_type": "Centrifuge Classification Report", - "url": "https://data.microbiomedata.org/data/nmdc:mga0d7pj22/ReadbasedAnalysis/nmdc_mga0d7pj22_centrifuge_report.tsv", - "md5_checksum": "c065784bed2b2495d512af93d05967de", - "id": "nmdc:c065784bed2b2495d512af93d05967de", - "file_size_bytes": 261692 - }, - { - "name": "Gp0115670_Centrifuge Krona HTML report", - "description": "Centrifuge Krona HTML report for Gp0115670", - "data_object_type": "Centrifuge Krona Plot", - "url": "https://data.microbiomedata.org/data/nmdc:mga0d7pj22/ReadbasedAnalysis/nmdc_mga0d7pj22_centrifuge_krona.html", - "md5_checksum": "a34dbcbdebae0861e41c09e7b9a5d9f0", - "id": "nmdc:a34dbcbdebae0861e41c09e7b9a5d9f0", - "file_size_bytes": 2343355 - }, - { - "name": "Gp0115670_Kraken classification TSV report", - "description": "Kraken classification TSV report for Gp0115670", - "data_object_type": "Kraken2 Taxonomic Classification", - "url": "https://data.microbiomedata.org/data/nmdc:mga0d7pj22/ReadbasedAnalysis/nmdc_mga0d7pj22_kraken2_classification.tsv", - "md5_checksum": "b2122f5a910a1d4ae8a62956d1cd731c", - "id": "nmdc:b2122f5a910a1d4ae8a62956d1cd731c", - "file_size_bytes": 2019980511 - }, + "date_created": null, + "etl_software_version": null, + "pooling_set": [] + }, + { + "functional_annotation_agg": [], + "library_preparation_set": [], + "processed_sample_set": [], + "extraction_set": [], + "activity_set": [], + "biosample_set": [], + "data_object_set": [ { - "name": "Gp0115670_Kraken2 TSV report", - "description": "Kraken2 TSV report for Gp0115670", - "data_object_type": "Kraken2 Classification Report", - "url": "https://data.microbiomedata.org/data/nmdc:mga0d7pj22/ReadbasedAnalysis/nmdc_mga0d7pj22_kraken2_report.tsv", - "md5_checksum": "8a26d8496a70f4777be0e1237092e44c", - "id": "nmdc:8a26d8496a70f4777be0e1237092e44c", - "file_size_bytes": 694029 + "name": "Gp0115670_Filtered Reads", + "description": "Filtered Reads for Gp0115670", + "data_object_type": "Filtered Sequencing Reads", + "url": "https://data.microbiomedata.org/data/nmdc:mga0d7pj22/qa/nmdc_mga0d7pj22_filtered.fastq.gz", + "md5_checksum": "7f6b353300583c60d2d668880b4134cd", + "id": "nmdc:7f6b353300583c60d2d668880b4134cd", + "file_size_bytes": 3012174785 }, { - "name": "Gp0115670_Kraken2 Krona HTML report", - "description": "Kraken2 Krona HTML report for Gp0115670", - "data_object_type": "Kraken2 Krona Plot", - "url": "https://data.microbiomedata.org/data/nmdc:mga0d7pj22/ReadbasedAnalysis/nmdc_mga0d7pj22_kraken2_krona.html", - "md5_checksum": "694b83f0b6f599948d4248dd48dd9ba9", - "id": "nmdc:694b83f0b6f599948d4248dd48dd9ba9", - "file_size_bytes": 4190653 + "name": "Gp0115670_Filtered Stats", + "description": "Filtered Stats for Gp0115670", + "data_object_type": "QC Statistics", + "url": "https://data.microbiomedata.org/data/nmdc:mga0d7pj22/qa/nmdc_mga0d7pj22_filterStats.txt", + "md5_checksum": "a4f65d101293fa4345cd865f86597464", + "id": "nmdc:a4f65d101293fa4345cd865f86597464", + "file_size_bytes": 291 }, { "name": "Gp0115670_Gottcha2 TSV report", @@ -16674,39 +14042,7 @@ "collecting_biosamples_from_site_set": [], "date_created": null, "etl_software_version": null, - "pooling_set": [], - "read_based_analysis_activity_set": [ - { - "_id": { - "$oid": "61e71a12833bcf838a701ba9" - }, - "has_input": [ - "nmdc:7f6b353300583c60d2d668880b4134cd" - ], - "part_of": [ - "nmdc:mga0d7pj22" - ], - "git_url": "https://github.com/microbiomedata/metaG/releases/tag/0.1", - "has_output": [ - "nmdc:e316502f9e7a78c9db3996ef832aa9d7", - "nmdc:1ac2be77491e7d425da1d62f69f1508d", - "nmdc:de5b15fa9d3bdbc3abcc2475ee351323", - "nmdc:a9bbb74833404a2bf3bbd05e83a7a0ed", - "nmdc:c065784bed2b2495d512af93d05967de", - "nmdc:a34dbcbdebae0861e41c09e7b9a5d9f0", - "nmdc:b2122f5a910a1d4ae8a62956d1cd731c", - "nmdc:8a26d8496a70f4777be0e1237092e44c", - "nmdc:694b83f0b6f599948d4248dd48dd9ba9" - ], - "was_informed_by": "gold:Gp0115670", - "id": "nmdc:f1cbe3cc181b2e689272b4223b68c15f", - "execution_resource": "NERSC-Cori", - "name": "ReadBased Analysis Activity for nmdc:mga0d7pj22", - "started_at_time": "2021-10-11T02:28:43Z", - "type": "nmdc:ReadbasedAnalysis", - "ended_at_time": "2021-10-11T05:55:52+00:00" - } - ] + "pooling_set": [] }, { "functional_annotation_agg": [], @@ -16813,85 +14149,6 @@ "id": "nmdc:19eb52a96c1dedc9036ec9a0aaeda079", "file_size_bytes": 4070548 }, - { - "name": "Gp0115674_Gottcha2 TSV report", - "description": "Gottcha2 TSV report for Gp0115674", - "url": "https://data.microbiomedata.org/data/nmdc:mga0cf0450/ReadbasedAnalysis/nmdc_mga0cf0450_gottcha2_report.tsv", - "md5_checksum": "7d6ec08ff0d080997fda7c7417f9c3d4", - "id": "nmdc:7d6ec08ff0d080997fda7c7417f9c3d4", - "file_size_bytes": 13768 - }, - { - "name": "Gp0115674_Gottcha2 full TSV report", - "description": "Gottcha2 full TSV report for Gp0115674", - "url": "https://data.microbiomedata.org/data/nmdc:mga0cf0450/ReadbasedAnalysis/nmdc_mga0cf0450_gottcha2_report_full.tsv", - "md5_checksum": "df0dfd58dc386f5e0ded0b65b4a88c58", - "id": "nmdc:df0dfd58dc386f5e0ded0b65b4a88c58", - "file_size_bytes": 1022858 - }, - { - "name": "Gp0115674_Gottcha2 Krona HTML report", - "description": "Gottcha2 Krona HTML report for Gp0115674", - "data_object_type": "GOTTCHA2 Krona Plot", - "url": "https://data.microbiomedata.org/data/nmdc:mga0cf0450/ReadbasedAnalysis/nmdc_mga0cf0450_gottcha2_krona.html", - "md5_checksum": "ce3f31985e0a99f97bd4751bc2469bcb", - "id": "nmdc:ce3f31985e0a99f97bd4751bc2469bcb", - "file_size_bytes": 269166 - }, - { - "name": "Gp0115674_Centrifuge classification TSV report", - "description": "Centrifuge classification TSV report for Gp0115674", - "data_object_type": "Centrifuge Taxonomic Classification", - "url": "https://data.microbiomedata.org/data/nmdc:mga0cf0450/ReadbasedAnalysis/nmdc_mga0cf0450_centrifuge_classification.tsv", - "md5_checksum": "f8740b1fadbc29aef50d32706c955199", - "id": "nmdc:f8740b1fadbc29aef50d32706c955199", - "file_size_bytes": 1904303690 - }, - { - "name": "Gp0115674_Centrifuge TSV report", - "description": "Centrifuge TSV report for Gp0115674", - "data_object_type": "Centrifuge Classification Report", - "url": "https://data.microbiomedata.org/data/nmdc:mga0cf0450/ReadbasedAnalysis/nmdc_mga0cf0450_centrifuge_report.tsv", - "md5_checksum": "80abfcc9b09476af4083b2af1760834f", - "id": "nmdc:80abfcc9b09476af4083b2af1760834f", - "file_size_bytes": 258748 - }, - { - "name": "Gp0115674_Centrifuge Krona HTML report", - "description": "Centrifuge Krona HTML report for Gp0115674", - "data_object_type": "Centrifuge Krona Plot", - "url": "https://data.microbiomedata.org/data/nmdc:mga0cf0450/ReadbasedAnalysis/nmdc_mga0cf0450_centrifuge_krona.html", - "md5_checksum": "f189624af50d8d62908f8ddd5f3451ad", - "id": "nmdc:f189624af50d8d62908f8ddd5f3451ad", - "file_size_bytes": 2335000 - }, - { - "name": "Gp0115674_Kraken classification TSV report", - "description": "Kraken classification TSV report for Gp0115674", - "data_object_type": "Kraken2 Taxonomic Classification", - "url": "https://data.microbiomedata.org/data/nmdc:mga0cf0450/ReadbasedAnalysis/nmdc_mga0cf0450_kraken2_classification.tsv", - "md5_checksum": "09302fbc8e30758a95fac09ee5cfd449", - "id": "nmdc:09302fbc8e30758a95fac09ee5cfd449", - "file_size_bytes": 1574286150 - }, - { - "name": "Gp0115674_Kraken2 TSV report", - "description": "Kraken2 TSV report for Gp0115674", - "data_object_type": "Kraken2 Classification Report", - "url": "https://data.microbiomedata.org/data/nmdc:mga0cf0450/ReadbasedAnalysis/nmdc_mga0cf0450_kraken2_report.tsv", - "md5_checksum": "e44f717fc6f3458c17b4f5129a5e7920", - "id": "nmdc:e44f717fc6f3458c17b4f5129a5e7920", - "file_size_bytes": 671800 - }, - { - "name": "Gp0115674_Kraken2 Krona HTML report", - "description": "Kraken2 Krona HTML report for Gp0115674", - "data_object_type": "Kraken2 Krona Plot", - "url": "https://data.microbiomedata.org/data/nmdc:mga0cf0450/ReadbasedAnalysis/nmdc_mga0cf0450_kraken2_krona.html", - "md5_checksum": "19eb52a96c1dedc9036ec9a0aaeda079", - "id": "nmdc:19eb52a96c1dedc9036ec9a0aaeda079", - "file_size_bytes": 4070548 - }, { "name": "Gp0115674_Assembled contigs fasta", "description": "Assembled contigs fasta for Gp0115674", @@ -17397,39 +14654,7 @@ "collecting_biosamples_from_site_set": [], "date_created": null, "etl_software_version": null, - "pooling_set": [], - "read_based_analysis_activity_set": [ - { - "_id": { - "$oid": "61e71a31833bcf838a701e57" - }, - "has_input": [ - "nmdc:538fd5695eb3decd48891e72acebb8ce" - ], - "part_of": [ - "nmdc:mga0cf0450" - ], - "git_url": "https://github.com/microbiomedata/metaG/releases/tag/0.1", - "has_output": [ - "nmdc:7d6ec08ff0d080997fda7c7417f9c3d4", - "nmdc:df0dfd58dc386f5e0ded0b65b4a88c58", - "nmdc:ce3f31985e0a99f97bd4751bc2469bcb", - "nmdc:f8740b1fadbc29aef50d32706c955199", - "nmdc:80abfcc9b09476af4083b2af1760834f", - "nmdc:f189624af50d8d62908f8ddd5f3451ad", - "nmdc:09302fbc8e30758a95fac09ee5cfd449", - "nmdc:e44f717fc6f3458c17b4f5129a5e7920", - "nmdc:19eb52a96c1dedc9036ec9a0aaeda079" - ], - "was_informed_by": "gold:Gp0115674", - "id": "nmdc:954288cfef2de46d4b895fac3811a7d0", - "execution_resource": "NERSC-Cori", - "name": "ReadBased Analysis Activity for nmdc:mga0cf0450", - "started_at_time": "2021-10-11T02:28:52Z", - "type": "nmdc:ReadbasedAnalysis", - "ended_at_time": "2021-10-11T05:21:41+00:00" - } - ] + "pooling_set": [] }, { "functional_annotation_agg": [], @@ -17438,103 +14663,24 @@ "extraction_set": [], "activity_set": [], "biosample_set": [], - "data_object_set": [ - { - "name": "Gp0115673_Filtered Reads", - "description": "Filtered Reads for Gp0115673", - "data_object_type": "Filtered Sequencing Reads", - "url": "https://data.microbiomedata.org/data/nmdc:mga0kpja70/qa/nmdc_mga0kpja70_filtered.fastq.gz", - "md5_checksum": "268918f610926421d2af43f175553680", - "id": "nmdc:268918f610926421d2af43f175553680", - "file_size_bytes": 1492820163 - }, - { - "name": "Gp0115673_Filtered Stats", - "description": "Filtered Stats for Gp0115673", - "data_object_type": "QC Statistics", - "url": "https://data.microbiomedata.org/data/nmdc:mga0kpja70/qa/nmdc_mga0kpja70_filterStats.txt", - "md5_checksum": "4610980cf3558f5a9830797ead97362a", - "id": "nmdc:4610980cf3558f5a9830797ead97362a", - "file_size_bytes": 287 - }, - { - "name": "Gp0115673_Gottcha2 TSV report", - "description": "Gottcha2 TSV report for Gp0115673", - "url": "https://data.microbiomedata.org/data/nmdc:mga0kpja70/ReadbasedAnalysis/nmdc_mga0kpja70_gottcha2_report.tsv", - "md5_checksum": "c7b24571b61a33018cf118b5424b787f", - "id": "nmdc:c7b24571b61a33018cf118b5424b787f", - "file_size_bytes": 9782 - }, - { - "name": "Gp0115673_Gottcha2 full TSV report", - "description": "Gottcha2 full TSV report for Gp0115673", - "url": "https://data.microbiomedata.org/data/nmdc:mga0kpja70/ReadbasedAnalysis/nmdc_mga0kpja70_gottcha2_report_full.tsv", - "md5_checksum": "e185734176505343bf4c83c16a0a9fe2", - "id": "nmdc:e185734176505343bf4c83c16a0a9fe2", - "file_size_bytes": 856112 - }, - { - "name": "Gp0115673_Gottcha2 Krona HTML report", - "description": "Gottcha2 Krona HTML report for Gp0115673", - "data_object_type": "GOTTCHA2 Krona Plot", - "url": "https://data.microbiomedata.org/data/nmdc:mga0kpja70/ReadbasedAnalysis/nmdc_mga0kpja70_gottcha2_krona.html", - "md5_checksum": "7c6b0ef44450c747580826a2e218844b", - "id": "nmdc:7c6b0ef44450c747580826a2e218844b", - "file_size_bytes": 255142 - }, - { - "name": "Gp0115673_Centrifuge classification TSV report", - "description": "Centrifuge classification TSV report for Gp0115673", - "data_object_type": "Centrifuge Taxonomic Classification", - "url": "https://data.microbiomedata.org/data/nmdc:mga0kpja70/ReadbasedAnalysis/nmdc_mga0kpja70_centrifuge_classification.tsv", - "md5_checksum": "5b98c377f424d7609f1a09e350cfb837", - "id": "nmdc:5b98c377f424d7609f1a09e350cfb837", - "file_size_bytes": 1218364738 - }, - { - "name": "Gp0115673_Centrifuge TSV report", - "description": "Centrifuge TSV report for Gp0115673", - "data_object_type": "Centrifuge Classification Report", - "url": "https://data.microbiomedata.org/data/nmdc:mga0kpja70/ReadbasedAnalysis/nmdc_mga0kpja70_centrifuge_report.tsv", - "md5_checksum": "b5f7a68a94b356001014d1be024231af", - "id": "nmdc:b5f7a68a94b356001014d1be024231af", - "file_size_bytes": 254923 - }, - { - "name": "Gp0115673_Centrifuge Krona HTML report", - "description": "Centrifuge Krona HTML report for Gp0115673", - "data_object_type": "Centrifuge Krona Plot", - "url": "https://data.microbiomedata.org/data/nmdc:mga0kpja70/ReadbasedAnalysis/nmdc_mga0kpja70_centrifuge_krona.html", - "md5_checksum": "75bca66cfcdd38331c10edbba03fa0d3", - "id": "nmdc:75bca66cfcdd38331c10edbba03fa0d3", - "file_size_bytes": 2323219 - }, - { - "name": "Gp0115673_Kraken classification TSV report", - "description": "Kraken classification TSV report for Gp0115673", - "data_object_type": "Kraken2 Taxonomic Classification", - "url": "https://data.microbiomedata.org/data/nmdc:mga0kpja70/ReadbasedAnalysis/nmdc_mga0kpja70_kraken2_classification.tsv", - "md5_checksum": "35bf579641b2ffb3614098d9811a4968", - "id": "nmdc:35bf579641b2ffb3614098d9811a4968", - "file_size_bytes": 1001134031 - }, + "data_object_set": [ { - "name": "Gp0115673_Kraken2 TSV report", - "description": "Kraken2 TSV report for Gp0115673", - "data_object_type": "Kraken2 Classification Report", - "url": "https://data.microbiomedata.org/data/nmdc:mga0kpja70/ReadbasedAnalysis/nmdc_mga0kpja70_kraken2_report.tsv", - "md5_checksum": "801b79f5442e5bfaa0d15f76786cfbc0", - "id": "nmdc:801b79f5442e5bfaa0d15f76786cfbc0", - "file_size_bytes": 640671 + "name": "Gp0115673_Filtered Reads", + "description": "Filtered Reads for Gp0115673", + "data_object_type": "Filtered Sequencing Reads", + "url": "https://data.microbiomedata.org/data/nmdc:mga0kpja70/qa/nmdc_mga0kpja70_filtered.fastq.gz", + "md5_checksum": "268918f610926421d2af43f175553680", + "id": "nmdc:268918f610926421d2af43f175553680", + "file_size_bytes": 1492820163 }, { - "name": "Gp0115673_Kraken2 Krona HTML report", - "description": "Kraken2 Krona HTML report for Gp0115673", - "data_object_type": "Kraken2 Krona Plot", - "url": "https://data.microbiomedata.org/data/nmdc:mga0kpja70/ReadbasedAnalysis/nmdc_mga0kpja70_kraken2_krona.html", - "md5_checksum": "a7030fa8e9622e3396c2b96448e90c3b", - "id": "nmdc:a7030fa8e9622e3396c2b96448e90c3b", - "file_size_bytes": 3995499 + "name": "Gp0115673_Filtered Stats", + "description": "Filtered Stats for Gp0115673", + "data_object_type": "QC Statistics", + "url": "https://data.microbiomedata.org/data/nmdc:mga0kpja70/qa/nmdc_mga0kpja70_filterStats.txt", + "md5_checksum": "4610980cf3558f5a9830797ead97362a", + "id": "nmdc:4610980cf3558f5a9830797ead97362a", + "file_size_bytes": 287 }, { "name": "Gp0115673_Gottcha2 TSV report", @@ -18079,39 +15225,7 @@ "collecting_biosamples_from_site_set": [], "date_created": null, "etl_software_version": null, - "pooling_set": [], - "read_based_analysis_activity_set": [ - { - "_id": { - "$oid": "61e719b7833bcf838a7011dc" - }, - "has_input": [ - "nmdc:268918f610926421d2af43f175553680" - ], - "part_of": [ - "nmdc:mga0kpja70" - ], - "git_url": "https://github.com/microbiomedata/metaG/releases/tag/0.1", - "has_output": [ - "nmdc:c7b24571b61a33018cf118b5424b787f", - "nmdc:e185734176505343bf4c83c16a0a9fe2", - "nmdc:7c6b0ef44450c747580826a2e218844b", - "nmdc:5b98c377f424d7609f1a09e350cfb837", - "nmdc:b5f7a68a94b356001014d1be024231af", - "nmdc:75bca66cfcdd38331c10edbba03fa0d3", - "nmdc:35bf579641b2ffb3614098d9811a4968", - "nmdc:801b79f5442e5bfaa0d15f76786cfbc0", - "nmdc:a7030fa8e9622e3396c2b96448e90c3b" - ], - "was_informed_by": "gold:Gp0115673", - "id": "nmdc:7ae51c3485db8a27225f08083565b28e", - "execution_resource": "NERSC-Cori", - "name": "ReadBased Analysis Activity for nmdc:mga0kpja70", - "started_at_time": "2021-10-11T02:28:36Z", - "type": "nmdc:ReadbasedAnalysis", - "ended_at_time": "2021-10-11T03:32:43+00:00" - } - ] + "pooling_set": [] }, { "functional_annotation_agg": [], @@ -18218,85 +15332,6 @@ "id": "nmdc:e111cd4927f6736e5de6f6e81e7e6d72", "file_size_bytes": 4010701 }, - { - "name": "Gp0115671_Gottcha2 TSV report", - "description": "Gottcha2 TSV report for Gp0115671", - "url": "https://data.microbiomedata.org/data/nmdc:mga0rw1351/ReadbasedAnalysis/nmdc_mga0rw1351_gottcha2_report.tsv", - "md5_checksum": "358559c32b69eff51758db66ac01021b", - "id": "nmdc:358559c32b69eff51758db66ac01021b", - "file_size_bytes": 11833 - }, - { - "name": "Gp0115671_Gottcha2 full TSV report", - "description": "Gottcha2 full TSV report for Gp0115671", - "url": "https://data.microbiomedata.org/data/nmdc:mga0rw1351/ReadbasedAnalysis/nmdc_mga0rw1351_gottcha2_report_full.tsv", - "md5_checksum": "befbd648249c2871bd27999120e50bf7", - "id": "nmdc:befbd648249c2871bd27999120e50bf7", - "file_size_bytes": 888177 - }, - { - "name": "Gp0115671_Gottcha2 Krona HTML report", - "description": "Gottcha2 Krona HTML report for Gp0115671", - "data_object_type": "GOTTCHA2 Krona Plot", - "url": "https://data.microbiomedata.org/data/nmdc:mga0rw1351/ReadbasedAnalysis/nmdc_mga0rw1351_gottcha2_krona.html", - "md5_checksum": "cacb8f623a808d0cae094d46f2801dd3", - "id": "nmdc:cacb8f623a808d0cae094d46f2801dd3", - "file_size_bytes": 261703 - }, - { - "name": "Gp0115671_Centrifuge classification TSV report", - "description": "Centrifuge classification TSV report for Gp0115671", - "data_object_type": "Centrifuge Taxonomic Classification", - "url": "https://data.microbiomedata.org/data/nmdc:mga0rw1351/ReadbasedAnalysis/nmdc_mga0rw1351_centrifuge_classification.tsv", - "md5_checksum": "1b15ffb745e320a9bf0cac7e672e974b", - "id": "nmdc:1b15ffb745e320a9bf0cac7e672e974b", - "file_size_bytes": 1474970402 - }, - { - "name": "Gp0115671_Centrifuge TSV report", - "description": "Centrifuge TSV report for Gp0115671", - "data_object_type": "Centrifuge Classification Report", - "url": "https://data.microbiomedata.org/data/nmdc:mga0rw1351/ReadbasedAnalysis/nmdc_mga0rw1351_centrifuge_report.tsv", - "md5_checksum": "90b77c7118bf6ec1f99836a50d562a7f", - "id": "nmdc:90b77c7118bf6ec1f99836a50d562a7f", - "file_size_bytes": 255777 - }, - { - "name": "Gp0115671_Centrifuge Krona HTML report", - "description": "Centrifuge Krona HTML report for Gp0115671", - "data_object_type": "Centrifuge Krona Plot", - "url": "https://data.microbiomedata.org/data/nmdc:mga0rw1351/ReadbasedAnalysis/nmdc_mga0rw1351_centrifuge_krona.html", - "md5_checksum": "e0736ff520260ba2097c02b9e767362c", - "id": "nmdc:e0736ff520260ba2097c02b9e767362c", - "file_size_bytes": 2329875 - }, - { - "name": "Gp0115671_Kraken classification TSV report", - "description": "Kraken classification TSV report for Gp0115671", - "data_object_type": "Kraken2 Taxonomic Classification", - "url": "https://data.microbiomedata.org/data/nmdc:mga0rw1351/ReadbasedAnalysis/nmdc_mga0rw1351_kraken2_classification.tsv", - "md5_checksum": "a00960655f9e80726fdb0fade1bec958", - "id": "nmdc:a00960655f9e80726fdb0fade1bec958", - "file_size_bytes": 1213240496 - }, - { - "name": "Gp0115671_Kraken2 TSV report", - "description": "Kraken2 TSV report for Gp0115671", - "data_object_type": "Kraken2 Classification Report", - "url": "https://data.microbiomedata.org/data/nmdc:mga0rw1351/ReadbasedAnalysis/nmdc_mga0rw1351_kraken2_report.tsv", - "md5_checksum": "366bf195f71d2c35a9b47c0f29381e85", - "id": "nmdc:366bf195f71d2c35a9b47c0f29381e85", - "file_size_bytes": 659715 - }, - { - "name": "Gp0115671_Kraken2 Krona HTML report", - "description": "Kraken2 Krona HTML report for Gp0115671", - "data_object_type": "Kraken2 Krona Plot", - "url": "https://data.microbiomedata.org/data/nmdc:mga0rw1351/ReadbasedAnalysis/nmdc_mga0rw1351_kraken2_krona.html", - "md5_checksum": "e111cd4927f6736e5de6f6e81e7e6d72", - "id": "nmdc:e111cd4927f6736e5de6f6e81e7e6d72", - "file_size_bytes": 4010701 - }, { "name": "Gp0115671_Assembled contigs fasta", "description": "Assembled contigs fasta for Gp0115671", @@ -18759,146 +15794,35 @@ "study_set": [], "field_research_site_set": [], "collecting_biosamples_from_site_set": [], - "date_created": null, - "etl_software_version": null, - "pooling_set": [], - "read_based_analysis_activity_set": [ - { - "_id": { - "$oid": "61e71a4c833bcf838a702155" - }, - "has_input": [ - "nmdc:445f37bc3019e9fe3b29a2ac5bcbfc9c" - ], - "part_of": [ - "nmdc:mga0rw1351" - ], - "git_url": "https://github.com/microbiomedata/metaG/releases/tag/0.1", - "has_output": [ - "nmdc:358559c32b69eff51758db66ac01021b", - "nmdc:befbd648249c2871bd27999120e50bf7", - "nmdc:cacb8f623a808d0cae094d46f2801dd3", - "nmdc:1b15ffb745e320a9bf0cac7e672e974b", - "nmdc:90b77c7118bf6ec1f99836a50d562a7f", - "nmdc:e0736ff520260ba2097c02b9e767362c", - "nmdc:a00960655f9e80726fdb0fade1bec958", - "nmdc:366bf195f71d2c35a9b47c0f29381e85", - "nmdc:e111cd4927f6736e5de6f6e81e7e6d72" - ], - "was_informed_by": "gold:Gp0115671", - "id": "nmdc:c61259a6fe99bc2482a8619c099e6cc2", - "execution_resource": "NERSC-Cori", - "name": "ReadBased Analysis Activity for nmdc:mga0rw1351", - "started_at_time": "2021-10-11T02:27:50Z", - "type": "nmdc:ReadbasedAnalysis", - "ended_at_time": "2021-10-11T03:39:05+00:00" - } - ] - }, - { - "functional_annotation_agg": [], - "library_preparation_set": [], - "processed_sample_set": [], - "extraction_set": [], - "activity_set": [], - "biosample_set": [], - "data_object_set": [ - { - "name": "Gp0115676_Filtered Reads", - "description": "Filtered Reads for Gp0115676", - "data_object_type": "Filtered Sequencing Reads", - "url": "https://data.microbiomedata.org/data/nmdc:mga0w3a067/qa/nmdc_mga0w3a067_filtered.fastq.gz", - "md5_checksum": "e777bc518da4bbe0ab7b2959f00e2b08", - "id": "nmdc:e777bc518da4bbe0ab7b2959f00e2b08", - "file_size_bytes": 3113249122 - }, - { - "name": "Gp0115676_Filtered Stats", - "description": "Filtered Stats for Gp0115676", - "data_object_type": "QC Statistics", - "url": "https://data.microbiomedata.org/data/nmdc:mga0w3a067/qa/nmdc_mga0w3a067_filterStats.txt", - "md5_checksum": "79815495339053b7935b55dbde02b2ff", - "id": "nmdc:79815495339053b7935b55dbde02b2ff", - "file_size_bytes": 292 - }, - { - "name": "Gp0115676_Gottcha2 TSV report", - "description": "Gottcha2 TSV report for Gp0115676", - "url": "https://data.microbiomedata.org/data/nmdc:mga0w3a067/ReadbasedAnalysis/nmdc_mga0w3a067_gottcha2_report.tsv", - "md5_checksum": "13343b2533892633bcc3655a1ebe788f", - "id": "nmdc:13343b2533892633bcc3655a1ebe788f", - "file_size_bytes": 13659 - }, - { - "name": "Gp0115676_Gottcha2 full TSV report", - "description": "Gottcha2 full TSV report for Gp0115676", - "url": "https://data.microbiomedata.org/data/nmdc:mga0w3a067/ReadbasedAnalysis/nmdc_mga0w3a067_gottcha2_report_full.tsv", - "md5_checksum": "87b36326bee32ad5642e3ffc2f5ac7db", - "id": "nmdc:87b36326bee32ad5642e3ffc2f5ac7db", - "file_size_bytes": 1168924 - }, - { - "name": "Gp0115676_Gottcha2 Krona HTML report", - "description": "Gottcha2 Krona HTML report for Gp0115676", - "data_object_type": "GOTTCHA2 Krona Plot", - "url": "https://data.microbiomedata.org/data/nmdc:mga0w3a067/ReadbasedAnalysis/nmdc_mga0w3a067_gottcha2_krona.html", - "md5_checksum": "95a2de8be672fd50bf542215194dc4d4", - "id": "nmdc:95a2de8be672fd50bf542215194dc4d4", - "file_size_bytes": 267660 - }, - { - "name": "Gp0115676_Centrifuge classification TSV report", - "description": "Centrifuge classification TSV report for Gp0115676", - "data_object_type": "Centrifuge Taxonomic Classification", - "url": "https://data.microbiomedata.org/data/nmdc:mga0w3a067/ReadbasedAnalysis/nmdc_mga0w3a067_centrifuge_classification.tsv", - "md5_checksum": "6cd0210b345d6908ad8ab683b1a11572", - "id": "nmdc:6cd0210b345d6908ad8ab683b1a11572", - "file_size_bytes": 2721808152 - }, - { - "name": "Gp0115676_Centrifuge TSV report", - "description": "Centrifuge TSV report for Gp0115676", - "data_object_type": "Centrifuge Classification Report", - "url": "https://data.microbiomedata.org/data/nmdc:mga0w3a067/ReadbasedAnalysis/nmdc_mga0w3a067_centrifuge_report.tsv", - "md5_checksum": "5049a65d2a42d73c5d47373e990b70f7", - "id": "nmdc:5049a65d2a42d73c5d47373e990b70f7", - "file_size_bytes": 263207 - }, - { - "name": "Gp0115676_Centrifuge Krona HTML report", - "description": "Centrifuge Krona HTML report for Gp0115676", - "data_object_type": "Centrifuge Krona Plot", - "url": "https://data.microbiomedata.org/data/nmdc:mga0w3a067/ReadbasedAnalysis/nmdc_mga0w3a067_centrifuge_krona.html", - "md5_checksum": "6e1e28773094884d35c04072309e285a", - "id": "nmdc:6e1e28773094884d35c04072309e285a", - "file_size_bytes": 2347912 - }, - { - "name": "Gp0115676_Kraken classification TSV report", - "description": "Kraken classification TSV report for Gp0115676", - "data_object_type": "Kraken2 Taxonomic Classification", - "url": "https://data.microbiomedata.org/data/nmdc:mga0w3a067/ReadbasedAnalysis/nmdc_mga0w3a067_kraken2_classification.tsv", - "md5_checksum": "7fa3aba8b1e31ccc00cf56f04f5605ac", - "id": "nmdc:7fa3aba8b1e31ccc00cf56f04f5605ac", - "file_size_bytes": 2224468607 - }, + "date_created": null, + "etl_software_version": null, + "pooling_set": [] + }, + { + "functional_annotation_agg": [], + "library_preparation_set": [], + "processed_sample_set": [], + "extraction_set": [], + "activity_set": [], + "biosample_set": [], + "data_object_set": [ { - "name": "Gp0115676_Kraken2 TSV report", - "description": "Kraken2 TSV report for Gp0115676", - "data_object_type": "Kraken2 Classification Report", - "url": "https://data.microbiomedata.org/data/nmdc:mga0w3a067/ReadbasedAnalysis/nmdc_mga0w3a067_kraken2_report.tsv", - "md5_checksum": "3b3abe337d79d09e9c7ba0a40045ad93", - "id": "nmdc:3b3abe337d79d09e9c7ba0a40045ad93", - "file_size_bytes": 701128 + "name": "Gp0115676_Filtered Reads", + "description": "Filtered Reads for Gp0115676", + "data_object_type": "Filtered Sequencing Reads", + "url": "https://data.microbiomedata.org/data/nmdc:mga0w3a067/qa/nmdc_mga0w3a067_filtered.fastq.gz", + "md5_checksum": "e777bc518da4bbe0ab7b2959f00e2b08", + "id": "nmdc:e777bc518da4bbe0ab7b2959f00e2b08", + "file_size_bytes": 3113249122 }, { - "name": "Gp0115676_Kraken2 Krona HTML report", - "description": "Kraken2 Krona HTML report for Gp0115676", - "data_object_type": "Kraken2 Krona Plot", - "url": "https://data.microbiomedata.org/data/nmdc:mga0w3a067/ReadbasedAnalysis/nmdc_mga0w3a067_kraken2_krona.html", - "md5_checksum": "e8602b20781cdbbd84e6dcb92c048a6b", - "id": "nmdc:e8602b20781cdbbd84e6dcb92c048a6b", - "file_size_bytes": 4217185 + "name": "Gp0115676_Filtered Stats", + "description": "Filtered Stats for Gp0115676", + "data_object_type": "QC Statistics", + "url": "https://data.microbiomedata.org/data/nmdc:mga0w3a067/qa/nmdc_mga0w3a067_filterStats.txt", + "md5_checksum": "79815495339053b7935b55dbde02b2ff", + "id": "nmdc:79815495339053b7935b55dbde02b2ff", + "file_size_bytes": 292 }, { "name": "Gp0115676_Gottcha2 TSV report", @@ -19522,39 +16446,7 @@ "collecting_biosamples_from_site_set": [], "date_created": null, "etl_software_version": null, - "pooling_set": [], - "read_based_analysis_activity_set": [ - { - "_id": { - "$oid": "61e71a12833bcf838a701b3a" - }, - "has_input": [ - "nmdc:e777bc518da4bbe0ab7b2959f00e2b08" - ], - "part_of": [ - "nmdc:mga0w3a067" - ], - "git_url": "https://github.com/microbiomedata/metaG/releases/tag/0.1", - "has_output": [ - "nmdc:13343b2533892633bcc3655a1ebe788f", - "nmdc:87b36326bee32ad5642e3ffc2f5ac7db", - "nmdc:95a2de8be672fd50bf542215194dc4d4", - "nmdc:6cd0210b345d6908ad8ab683b1a11572", - "nmdc:5049a65d2a42d73c5d47373e990b70f7", - "nmdc:6e1e28773094884d35c04072309e285a", - "nmdc:7fa3aba8b1e31ccc00cf56f04f5605ac", - "nmdc:3b3abe337d79d09e9c7ba0a40045ad93", - "nmdc:e8602b20781cdbbd84e6dcb92c048a6b" - ], - "was_informed_by": "gold:Gp0115676", - "id": "nmdc:b2025bb4a2c7616273d414e4093f63ca", - "execution_resource": "NERSC-Cori", - "name": "ReadBased Analysis Activity for nmdc:mga0w3a067", - "started_at_time": "2021-10-11T02:26:37Z", - "type": "nmdc:ReadbasedAnalysis", - "ended_at_time": "2021-10-11T05:40:05+00:00" - } - ] + "pooling_set": [] }, { "functional_annotation_agg": [], @@ -19661,85 +16553,6 @@ "id": "nmdc:d35583a5ed45df5a58bf084fc67bf988", "file_size_bytes": 4250180 }, - { - "name": "Gp0115677_Gottcha2 TSV report", - "description": "Gottcha2 TSV report for Gp0115677", - "url": "https://data.microbiomedata.org/data/nmdc:mga0zb0766/ReadbasedAnalysis/nmdc_mga0zb0766_gottcha2_report.tsv", - "md5_checksum": "ba32f20b0cc5143783e00c5d1ba15223", - "id": "nmdc:ba32f20b0cc5143783e00c5d1ba15223", - "file_size_bytes": 17895 - }, - { - "name": "Gp0115677_Gottcha2 full TSV report", - "description": "Gottcha2 full TSV report for Gp0115677", - "url": "https://data.microbiomedata.org/data/nmdc:mga0zb0766/ReadbasedAnalysis/nmdc_mga0zb0766_gottcha2_report_full.tsv", - "md5_checksum": "c1730daf5e6017219fd9fc079e42c132", - "id": "nmdc:c1730daf5e6017219fd9fc079e42c132", - "file_size_bytes": 1182538 - }, - { - "name": "Gp0115677_Gottcha2 Krona HTML report", - "description": "Gottcha2 Krona HTML report for Gp0115677", - "data_object_type": "GOTTCHA2 Krona Plot", - "url": "https://data.microbiomedata.org/data/nmdc:mga0zb0766/ReadbasedAnalysis/nmdc_mga0zb0766_gottcha2_krona.html", - "md5_checksum": "55b6c047c48f5bf9fb156f139992e4d8", - "id": "nmdc:55b6c047c48f5bf9fb156f139992e4d8", - "file_size_bytes": 276802 - }, - { - "name": "Gp0115677_Centrifuge classification TSV report", - "description": "Centrifuge classification TSV report for Gp0115677", - "data_object_type": "Centrifuge Taxonomic Classification", - "url": "https://data.microbiomedata.org/data/nmdc:mga0zb0766/ReadbasedAnalysis/nmdc_mga0zb0766_centrifuge_classification.tsv", - "md5_checksum": "1c2e2dff881b35a25b4622bbc66c3140", - "id": "nmdc:1c2e2dff881b35a25b4622bbc66c3140", - "file_size_bytes": 4716470614 - }, - { - "name": "Gp0115677_Centrifuge TSV report", - "description": "Centrifuge TSV report for Gp0115677", - "data_object_type": "Centrifuge Classification Report", - "url": "https://data.microbiomedata.org/data/nmdc:mga0zb0766/ReadbasedAnalysis/nmdc_mga0zb0766_centrifuge_report.tsv", - "md5_checksum": "50f771c7bc17a0b184c2a10a24013f08", - "id": "nmdc:50f771c7bc17a0b184c2a10a24013f08", - "file_size_bytes": 267231 - }, - { - "name": "Gp0115677_Centrifuge Krona HTML report", - "description": "Centrifuge Krona HTML report for Gp0115677", - "data_object_type": "Centrifuge Krona Plot", - "url": "https://data.microbiomedata.org/data/nmdc:mga0zb0766/ReadbasedAnalysis/nmdc_mga0zb0766_centrifuge_krona.html", - "md5_checksum": "229017cdb1832bb718d22dc27db44125", - "id": "nmdc:229017cdb1832bb718d22dc27db44125", - "file_size_bytes": 2356003 - }, - { - "name": "Gp0115677_Kraken classification TSV report", - "description": "Kraken classification TSV report for Gp0115677", - "data_object_type": "Kraken2 Taxonomic Classification", - "url": "https://data.microbiomedata.org/data/nmdc:mga0zb0766/ReadbasedAnalysis/nmdc_mga0zb0766_kraken2_classification.tsv", - "md5_checksum": "49d5d11132bd5a02c3dd077d42a6a16b", - "id": "nmdc:49d5d11132bd5a02c3dd077d42a6a16b", - "file_size_bytes": 3857487871 - }, - { - "name": "Gp0115677_Kraken2 TSV report", - "description": "Kraken2 TSV report for Gp0115677", - "data_object_type": "Kraken2 Classification Report", - "url": "https://data.microbiomedata.org/data/nmdc:mga0zb0766/ReadbasedAnalysis/nmdc_mga0zb0766_kraken2_report.tsv", - "md5_checksum": "bdd701b44e67929ec8bbe279697da937", - "id": "nmdc:bdd701b44e67929ec8bbe279697da937", - "file_size_bytes": 708598 - }, - { - "name": "Gp0115677_Kraken2 Krona HTML report", - "description": "Kraken2 Krona HTML report for Gp0115677", - "data_object_type": "Kraken2 Krona Plot", - "url": "https://data.microbiomedata.org/data/nmdc:mga0zb0766/ReadbasedAnalysis/nmdc_mga0zb0766_kraken2_krona.html", - "md5_checksum": "d35583a5ed45df5a58bf084fc67bf988", - "id": "nmdc:d35583a5ed45df5a58bf084fc67bf988", - "file_size_bytes": 4250180 - }, { "name": "Gp0115677_Assembled contigs fasta", "description": "Assembled contigs fasta for Gp0115677", @@ -20302,39 +17115,7 @@ "collecting_biosamples_from_site_set": [], "date_created": null, "etl_software_version": null, - "pooling_set": [], - "read_based_analysis_activity_set": [ - { - "_id": { - "$oid": "61e719d5833bcf838a70143c" - }, - "has_input": [ - "nmdc:63c857b3011dec61a08044d518291f23" - ], - "part_of": [ - "nmdc:mga0zb0766" - ], - "git_url": "https://github.com/microbiomedata/metaG/releases/tag/0.1", - "has_output": [ - "nmdc:ba32f20b0cc5143783e00c5d1ba15223", - "nmdc:c1730daf5e6017219fd9fc079e42c132", - "nmdc:55b6c047c48f5bf9fb156f139992e4d8", - "nmdc:1c2e2dff881b35a25b4622bbc66c3140", - "nmdc:50f771c7bc17a0b184c2a10a24013f08", - "nmdc:229017cdb1832bb718d22dc27db44125", - "nmdc:49d5d11132bd5a02c3dd077d42a6a16b", - "nmdc:bdd701b44e67929ec8bbe279697da937", - "nmdc:d35583a5ed45df5a58bf084fc67bf988" - ], - "was_informed_by": "gold:Gp0115677", - "id": "nmdc:4fa91d90b1bd5f821a7f09edc8426939", - "execution_resource": "NERSC-Cori", - "name": "ReadBased Analysis Activity for nmdc:mga0zb0766", - "started_at_time": "2021-10-11T02:24:49Z", - "type": "nmdc:ReadbasedAnalysis", - "ended_at_time": "2021-10-11T06:26:42+00:00" - } - ] + "pooling_set": [] }, { "functional_annotation_agg": [], @@ -20343,103 +17124,24 @@ "extraction_set": [], "activity_set": [], "biosample_set": [], - "data_object_set": [ - { - "name": "Gp0115675_Filtered Reads", - "description": "Filtered Reads for Gp0115675", - "data_object_type": "Filtered Sequencing Reads", - "url": "https://data.microbiomedata.org/data/nmdc:mga0vf2h47/qa/nmdc_mga0vf2h47_filtered.fastq.gz", - "md5_checksum": "54e3a71218d04224719e0dc8a7fdf9c7", - "id": "nmdc:54e3a71218d04224719e0dc8a7fdf9c7", - "file_size_bytes": 1533239347 - }, - { - "name": "Gp0115675_Filtered Stats", - "description": "Filtered Stats for Gp0115675", - "data_object_type": "QC Statistics", - "url": "https://data.microbiomedata.org/data/nmdc:mga0vf2h47/qa/nmdc_mga0vf2h47_filterStats.txt", - "md5_checksum": "2507e3f107100ce0c72c57191d450818", - "id": "nmdc:2507e3f107100ce0c72c57191d450818", - "file_size_bytes": 287 - }, - { - "name": "Gp0115675_Gottcha2 TSV report", - "description": "Gottcha2 TSV report for Gp0115675", - "url": "https://data.microbiomedata.org/data/nmdc:mga0vf2h47/ReadbasedAnalysis/nmdc_mga0vf2h47_gottcha2_report.tsv", - "md5_checksum": "60d673988c4f4447feb5985e8501e914", - "id": "nmdc:60d673988c4f4447feb5985e8501e914", - "file_size_bytes": 8921 - }, - { - "name": "Gp0115675_Gottcha2 full TSV report", - "description": "Gottcha2 full TSV report for Gp0115675", - "url": "https://data.microbiomedata.org/data/nmdc:mga0vf2h47/ReadbasedAnalysis/nmdc_mga0vf2h47_gottcha2_report_full.tsv", - "md5_checksum": "a8f93ed13033eb949109b4e83980a893", - "id": "nmdc:a8f93ed13033eb949109b4e83980a893", - "file_size_bytes": 871109 - }, - { - "name": "Gp0115675_Gottcha2 Krona HTML report", - "description": "Gottcha2 Krona HTML report for Gp0115675", - "data_object_type": "GOTTCHA2 Krona Plot", - "url": "https://data.microbiomedata.org/data/nmdc:mga0vf2h47/ReadbasedAnalysis/nmdc_mga0vf2h47_gottcha2_krona.html", - "md5_checksum": "31dd6eb616f1e9815778453ab1601195", - "id": "nmdc:31dd6eb616f1e9815778453ab1601195", - "file_size_bytes": 252578 - }, - { - "name": "Gp0115675_Centrifuge classification TSV report", - "description": "Centrifuge classification TSV report for Gp0115675", - "data_object_type": "Centrifuge Taxonomic Classification", - "url": "https://data.microbiomedata.org/data/nmdc:mga0vf2h47/ReadbasedAnalysis/nmdc_mga0vf2h47_centrifuge_classification.tsv", - "md5_checksum": "6d7a930d79f220b06cde8fbf8339e744", - "id": "nmdc:6d7a930d79f220b06cde8fbf8339e744", - "file_size_bytes": 1218767711 - }, - { - "name": "Gp0115675_Centrifuge TSV report", - "description": "Centrifuge TSV report for Gp0115675", - "data_object_type": "Centrifuge Classification Report", - "url": "https://data.microbiomedata.org/data/nmdc:mga0vf2h47/ReadbasedAnalysis/nmdc_mga0vf2h47_centrifuge_report.tsv", - "md5_checksum": "0aaac507db0e29827e1c87df47324932", - "id": "nmdc:0aaac507db0e29827e1c87df47324932", - "file_size_bytes": 254260 - }, - { - "name": "Gp0115675_Centrifuge Krona HTML report", - "description": "Centrifuge Krona HTML report for Gp0115675", - "data_object_type": "Centrifuge Krona Plot", - "url": "https://data.microbiomedata.org/data/nmdc:mga0vf2h47/ReadbasedAnalysis/nmdc_mga0vf2h47_centrifuge_krona.html", - "md5_checksum": "6aec8677139ed24ef9cfe0c75b30056f", - "id": "nmdc:6aec8677139ed24ef9cfe0c75b30056f", - "file_size_bytes": 2324387 - }, - { - "name": "Gp0115675_Kraken classification TSV report", - "description": "Kraken classification TSV report for Gp0115675", - "data_object_type": "Kraken2 Taxonomic Classification", - "url": "https://data.microbiomedata.org/data/nmdc:mga0vf2h47/ReadbasedAnalysis/nmdc_mga0vf2h47_kraken2_classification.tsv", - "md5_checksum": "d39369f32ada967d7cf52cb503fccf4a", - "id": "nmdc:d39369f32ada967d7cf52cb503fccf4a", - "file_size_bytes": 1001846607 - }, + "data_object_set": [ { - "name": "Gp0115675_Kraken2 TSV report", - "description": "Kraken2 TSV report for Gp0115675", - "data_object_type": "Kraken2 Classification Report", - "url": "https://data.microbiomedata.org/data/nmdc:mga0vf2h47/ReadbasedAnalysis/nmdc_mga0vf2h47_kraken2_report.tsv", - "md5_checksum": "1ec0247d86889fcef13f39a58a92b066", - "id": "nmdc:1ec0247d86889fcef13f39a58a92b066", - "file_size_bytes": 635541 + "name": "Gp0115675_Filtered Reads", + "description": "Filtered Reads for Gp0115675", + "data_object_type": "Filtered Sequencing Reads", + "url": "https://data.microbiomedata.org/data/nmdc:mga0vf2h47/qa/nmdc_mga0vf2h47_filtered.fastq.gz", + "md5_checksum": "54e3a71218d04224719e0dc8a7fdf9c7", + "id": "nmdc:54e3a71218d04224719e0dc8a7fdf9c7", + "file_size_bytes": 1533239347 }, { - "name": "Gp0115675_Kraken2 Krona HTML report", - "description": "Kraken2 Krona HTML report for Gp0115675", - "data_object_type": "Kraken2 Krona Plot", - "url": "https://data.microbiomedata.org/data/nmdc:mga0vf2h47/ReadbasedAnalysis/nmdc_mga0vf2h47_kraken2_krona.html", - "md5_checksum": "242a1c60f6cb14ba8430375171fda436", - "id": "nmdc:242a1c60f6cb14ba8430375171fda436", - "file_size_bytes": 3968420 + "name": "Gp0115675_Filtered Stats", + "description": "Filtered Stats for Gp0115675", + "data_object_type": "QC Statistics", + "url": "https://data.microbiomedata.org/data/nmdc:mga0vf2h47/qa/nmdc_mga0vf2h47_filterStats.txt", + "md5_checksum": "2507e3f107100ce0c72c57191d450818", + "id": "nmdc:2507e3f107100ce0c72c57191d450818", + "file_size_bytes": 287 }, { "name": "Gp0115675_Gottcha2 TSV report", @@ -21003,39 +17705,7 @@ "collecting_biosamples_from_site_set": [], "date_created": null, "etl_software_version": null, - "pooling_set": [], - "read_based_analysis_activity_set": [ - { - "_id": { - "$oid": "61e719dc833bcf838a7014d6" - }, - "has_input": [ - "nmdc:54e3a71218d04224719e0dc8a7fdf9c7" - ], - "part_of": [ - "nmdc:mga0vf2h47" - ], - "git_url": "https://github.com/microbiomedata/metaG/releases/tag/0.1", - "has_output": [ - "nmdc:60d673988c4f4447feb5985e8501e914", - "nmdc:a8f93ed13033eb949109b4e83980a893", - "nmdc:31dd6eb616f1e9815778453ab1601195", - "nmdc:6d7a930d79f220b06cde8fbf8339e744", - "nmdc:0aaac507db0e29827e1c87df47324932", - "nmdc:6aec8677139ed24ef9cfe0c75b30056f", - "nmdc:d39369f32ada967d7cf52cb503fccf4a", - "nmdc:1ec0247d86889fcef13f39a58a92b066", - "nmdc:242a1c60f6cb14ba8430375171fda436" - ], - "was_informed_by": "gold:Gp0115675", - "id": "nmdc:4cfac32b9cd7a8dd01d49117e1078a79", - "execution_resource": "NERSC-Cori", - "name": "ReadBased Analysis Activity for nmdc:mga0vf2h47", - "started_at_time": "2021-10-11T02:28:05Z", - "type": "nmdc:ReadbasedAnalysis", - "ended_at_time": "2021-10-11T03:25:21+00:00" - } - ] + "pooling_set": [] }, { "functional_annotation_agg": [], @@ -21142,85 +17812,6 @@ "id": "nmdc:9aa0ec113eb8dd22e7f574216d1760b2", "file_size_bytes": 4374689 }, - { - "name": "Gp0115665_Gottcha2 TSV report", - "description": "Gottcha2 TSV report for Gp0115665", - "url": "https://data.microbiomedata.org/data/nmdc:mga06n7k74/ReadbasedAnalysis/nmdc_mga06n7k74_gottcha2_report.tsv", - "md5_checksum": "432fedddcbacb4e69c0350354ab44080", - "id": "nmdc:432fedddcbacb4e69c0350354ab44080", - "file_size_bytes": 18015 - }, - { - "name": "Gp0115665_Gottcha2 full TSV report", - "description": "Gottcha2 full TSV report for Gp0115665", - "url": "https://data.microbiomedata.org/data/nmdc:mga06n7k74/ReadbasedAnalysis/nmdc_mga06n7k74_gottcha2_report_full.tsv", - "md5_checksum": "50b9a4c83b2ec0d1dd683cb8814ed5ad", - "id": "nmdc:50b9a4c83b2ec0d1dd683cb8814ed5ad", - "file_size_bytes": 1283220 - }, - { - "name": "Gp0115665_Gottcha2 Krona HTML report", - "description": "Gottcha2 Krona HTML report for Gp0115665", - "data_object_type": "GOTTCHA2 Krona Plot", - "url": "https://data.microbiomedata.org/data/nmdc:mga06n7k74/ReadbasedAnalysis/nmdc_mga06n7k74_gottcha2_krona.html", - "md5_checksum": "e3d7339ba5c7677be13854f391462474", - "id": "nmdc:e3d7339ba5c7677be13854f391462474", - "file_size_bytes": 281366 - }, - { - "name": "Gp0115665_Centrifuge classification TSV report", - "description": "Centrifuge classification TSV report for Gp0115665", - "data_object_type": "Centrifuge Taxonomic Classification", - "url": "https://data.microbiomedata.org/data/nmdc:mga06n7k74/ReadbasedAnalysis/nmdc_mga06n7k74_centrifuge_classification.tsv", - "md5_checksum": "7bf922ee2f9fc298c031e2ff7d5abe0d", - "id": "nmdc:7bf922ee2f9fc298c031e2ff7d5abe0d", - "file_size_bytes": 3481369185 - }, - { - "name": "Gp0115665_Centrifuge TSV report", - "description": "Centrifuge TSV report for Gp0115665", - "data_object_type": "Centrifuge Classification Report", - "url": "https://data.microbiomedata.org/data/nmdc:mga06n7k74/ReadbasedAnalysis/nmdc_mga06n7k74_centrifuge_report.tsv", - "md5_checksum": "33a20a77c3dc5b4feb102d66dfbfbe11", - "id": "nmdc:33a20a77c3dc5b4feb102d66dfbfbe11", - "file_size_bytes": 263480 - }, - { - "name": "Gp0115665_Centrifuge Krona HTML report", - "description": "Centrifuge Krona HTML report for Gp0115665", - "data_object_type": "Centrifuge Krona Plot", - "url": "https://data.microbiomedata.org/data/nmdc:mga06n7k74/ReadbasedAnalysis/nmdc_mga06n7k74_centrifuge_krona.html", - "md5_checksum": "30bdf0aedf771221ca3f7f18ff4e0067", - "id": "nmdc:30bdf0aedf771221ca3f7f18ff4e0067", - "file_size_bytes": 2347079 - }, - { - "name": "Gp0115665_Kraken classification TSV report", - "description": "Kraken classification TSV report for Gp0115665", - "data_object_type": "Kraken2 Taxonomic Classification", - "url": "https://data.microbiomedata.org/data/nmdc:mga06n7k74/ReadbasedAnalysis/nmdc_mga06n7k74_kraken2_classification.tsv", - "md5_checksum": "8e21ac30de17de0d1051d7d223d0aa0f", - "id": "nmdc:8e21ac30de17de0d1051d7d223d0aa0f", - "file_size_bytes": 2866138771 - }, - { - "name": "Gp0115665_Kraken2 TSV report", - "description": "Kraken2 TSV report for Gp0115665", - "data_object_type": "Kraken2 Classification Report", - "url": "https://data.microbiomedata.org/data/nmdc:mga06n7k74/ReadbasedAnalysis/nmdc_mga06n7k74_kraken2_report.tsv", - "md5_checksum": "64459bec7843953a70f8ea2b09a7e9de", - "id": "nmdc:64459bec7843953a70f8ea2b09a7e9de", - "file_size_bytes": 728030 - }, - { - "name": "Gp0115665_Kraken2 Krona HTML report", - "description": "Kraken2 Krona HTML report for Gp0115665", - "data_object_type": "Kraken2 Krona Plot", - "url": "https://data.microbiomedata.org/data/nmdc:mga06n7k74/ReadbasedAnalysis/nmdc_mga06n7k74_kraken2_krona.html", - "md5_checksum": "9aa0ec113eb8dd22e7f574216d1760b2", - "id": "nmdc:9aa0ec113eb8dd22e7f574216d1760b2", - "file_size_bytes": 4374689 - }, { "name": "Gp0115665_Assembled contigs fasta", "description": "Assembled contigs fasta for Gp0115665", @@ -21759,146 +18350,35 @@ "study_set": [], "field_research_site_set": [], "collecting_biosamples_from_site_set": [], - "date_created": null, - "etl_software_version": null, - "pooling_set": [], - "read_based_analysis_activity_set": [ - { - "_id": { - "$oid": "61e71a36833bcf838a702021" - }, - "has_input": [ - "nmdc:b0462e18cf9dafc9d2207a58bf085530" - ], - "part_of": [ - "nmdc:mga06n7k74" - ], - "git_url": "https://github.com/microbiomedata/metaG/releases/tag/0.1", - "has_output": [ - "nmdc:432fedddcbacb4e69c0350354ab44080", - "nmdc:50b9a4c83b2ec0d1dd683cb8814ed5ad", - "nmdc:e3d7339ba5c7677be13854f391462474", - "nmdc:7bf922ee2f9fc298c031e2ff7d5abe0d", - "nmdc:33a20a77c3dc5b4feb102d66dfbfbe11", - "nmdc:30bdf0aedf771221ca3f7f18ff4e0067", - "nmdc:8e21ac30de17de0d1051d7d223d0aa0f", - "nmdc:64459bec7843953a70f8ea2b09a7e9de", - "nmdc:9aa0ec113eb8dd22e7f574216d1760b2" - ], - "was_informed_by": "gold:Gp0115665", - "id": "nmdc:ad03f306fbd2bb1f6302eedfc1cde9b2", - "execution_resource": "NERSC-Cori", - "name": "ReadBased Analysis Activity for nmdc:mga06n7k74", - "started_at_time": "2021-10-11T02:28:54Z", - "type": "nmdc:ReadbasedAnalysis", - "ended_at_time": "2021-10-11T06:19:29+00:00" - } - ] - }, - { - "functional_annotation_agg": [], - "library_preparation_set": [], - "processed_sample_set": [], - "extraction_set": [], - "activity_set": [], - "biosample_set": [], - "data_object_set": [ - { - "name": "Gp0115669_Filtered Reads", - "description": "Filtered Reads for Gp0115669", - "data_object_type": "Filtered Sequencing Reads", - "url": "https://data.microbiomedata.org/data/nmdc:mga0k85x37/qa/nmdc_mga0k85x37_filtered.fastq.gz", - "md5_checksum": "6eef104db92b99c9741b26c667d75cd9", - "id": "nmdc:6eef104db92b99c9741b26c667d75cd9", - "file_size_bytes": 1806935637 - }, - { - "name": "Gp0115669_Filtered Stats", - "description": "Filtered Stats for Gp0115669", - "data_object_type": "QC Statistics", - "url": "https://data.microbiomedata.org/data/nmdc:mga0k85x37/qa/nmdc_mga0k85x37_filterStats.txt", - "md5_checksum": "58fde3e96dbb28af9133bede850a2653", - "id": "nmdc:58fde3e96dbb28af9133bede850a2653", - "file_size_bytes": 286 - }, - { - "name": "Gp0115669_Gottcha2 TSV report", - "description": "Gottcha2 TSV report for Gp0115669", - "url": "https://data.microbiomedata.org/data/nmdc:mga0k85x37/ReadbasedAnalysis/nmdc_mga0k85x37_gottcha2_report.tsv", - "md5_checksum": "05933784d02331b60b2531e2025cd3b7", - "id": "nmdc:05933784d02331b60b2531e2025cd3b7", - "file_size_bytes": 11362 - }, - { - "name": "Gp0115669_Gottcha2 full TSV report", - "description": "Gottcha2 full TSV report for Gp0115669", - "url": "https://data.microbiomedata.org/data/nmdc:mga0k85x37/ReadbasedAnalysis/nmdc_mga0k85x37_gottcha2_report_full.tsv", - "md5_checksum": "50fc279637cb7048aaaeec9b223d0286", - "id": "nmdc:50fc279637cb7048aaaeec9b223d0286", - "file_size_bytes": 909325 - }, - { - "name": "Gp0115669_Gottcha2 Krona HTML report", - "description": "Gottcha2 Krona HTML report for Gp0115669", - "data_object_type": "GOTTCHA2 Krona Plot", - "url": "https://data.microbiomedata.org/data/nmdc:mga0k85x37/ReadbasedAnalysis/nmdc_mga0k85x37_gottcha2_krona.html", - "md5_checksum": "c3add9c5d34e3ca719096ba3ba9b1c08", - "id": "nmdc:c3add9c5d34e3ca719096ba3ba9b1c08", - "file_size_bytes": 261412 - }, - { - "name": "Gp0115669_Centrifuge classification TSV report", - "description": "Centrifuge classification TSV report for Gp0115669", - "data_object_type": "Centrifuge Taxonomic Classification", - "url": "https://data.microbiomedata.org/data/nmdc:mga0k85x37/ReadbasedAnalysis/nmdc_mga0k85x37_centrifuge_classification.tsv", - "md5_checksum": "2777a04ec7e23aff356bb4f2733e55b7", - "id": "nmdc:2777a04ec7e23aff356bb4f2733e55b7", - "file_size_bytes": 1481087410 - }, - { - "name": "Gp0115669_Centrifuge TSV report", - "description": "Centrifuge TSV report for Gp0115669", - "data_object_type": "Centrifuge Classification Report", - "url": "https://data.microbiomedata.org/data/nmdc:mga0k85x37/ReadbasedAnalysis/nmdc_mga0k85x37_centrifuge_report.tsv", - "md5_checksum": "de45d70cc01749e9b5691dc24674545d", - "id": "nmdc:de45d70cc01749e9b5691dc24674545d", - "file_size_bytes": 256139 - }, - { - "name": "Gp0115669_Centrifuge Krona HTML report", - "description": "Centrifuge Krona HTML report for Gp0115669", - "data_object_type": "Centrifuge Krona Plot", - "url": "https://data.microbiomedata.org/data/nmdc:mga0k85x37/ReadbasedAnalysis/nmdc_mga0k85x37_centrifuge_krona.html", - "md5_checksum": "534f97f3792b74385c4da305196a1b1d", - "id": "nmdc:534f97f3792b74385c4da305196a1b1d", - "file_size_bytes": 2323658 - }, - { - "name": "Gp0115669_Kraken classification TSV report", - "description": "Kraken classification TSV report for Gp0115669", - "data_object_type": "Kraken2 Taxonomic Classification", - "url": "https://data.microbiomedata.org/data/nmdc:mga0k85x37/ReadbasedAnalysis/nmdc_mga0k85x37_kraken2_classification.tsv", - "md5_checksum": "fc3e489df923ec344ac0cce7316f49d6", - "id": "nmdc:fc3e489df923ec344ac0cce7316f49d6", - "file_size_bytes": 1220980345 - }, + "date_created": null, + "etl_software_version": null, + "pooling_set": [] + }, + { + "functional_annotation_agg": [], + "library_preparation_set": [], + "processed_sample_set": [], + "extraction_set": [], + "activity_set": [], + "biosample_set": [], + "data_object_set": [ { - "name": "Gp0115669_Kraken2 TSV report", - "description": "Kraken2 TSV report for Gp0115669", - "data_object_type": "Kraken2 Classification Report", - "url": "https://data.microbiomedata.org/data/nmdc:mga0k85x37/ReadbasedAnalysis/nmdc_mga0k85x37_kraken2_report.tsv", - "md5_checksum": "07b6457a094fab96563168ed287dc59f", - "id": "nmdc:07b6457a094fab96563168ed287dc59f", - "file_size_bytes": 651795 + "name": "Gp0115669_Filtered Reads", + "description": "Filtered Reads for Gp0115669", + "data_object_type": "Filtered Sequencing Reads", + "url": "https://data.microbiomedata.org/data/nmdc:mga0k85x37/qa/nmdc_mga0k85x37_filtered.fastq.gz", + "md5_checksum": "6eef104db92b99c9741b26c667d75cd9", + "id": "nmdc:6eef104db92b99c9741b26c667d75cd9", + "file_size_bytes": 1806935637 }, { - "name": "Gp0115669_Kraken2 Krona HTML report", - "description": "Kraken2 Krona HTML report for Gp0115669", - "data_object_type": "Kraken2 Krona Plot", - "url": "https://data.microbiomedata.org/data/nmdc:mga0k85x37/ReadbasedAnalysis/nmdc_mga0k85x37_kraken2_krona.html", - "md5_checksum": "164a1bc50e8d6509446ae2877be8231c", - "id": "nmdc:164a1bc50e8d6509446ae2877be8231c", - "file_size_bytes": 3963303 + "name": "Gp0115669_Filtered Stats", + "description": "Filtered Stats for Gp0115669", + "data_object_type": "QC Statistics", + "url": "https://data.microbiomedata.org/data/nmdc:mga0k85x37/qa/nmdc_mga0k85x37_filterStats.txt", + "md5_checksum": "58fde3e96dbb28af9133bede850a2653", + "id": "nmdc:58fde3e96dbb28af9133bede850a2653", + "file_size_bytes": 286 }, { "name": "Gp0115669_Gottcha2 TSV report", @@ -22462,39 +18942,7 @@ "collecting_biosamples_from_site_set": [], "date_created": null, "etl_software_version": null, - "pooling_set": [], - "read_based_analysis_activity_set": [ - { - "_id": { - "$oid": "61e71a34833bcf838a701fb0" - }, - "has_input": [ - "nmdc:6eef104db92b99c9741b26c667d75cd9" - ], - "part_of": [ - "nmdc:mga0k85x37" - ], - "git_url": "https://github.com/microbiomedata/metaG/releases/tag/0.1", - "has_output": [ - "nmdc:05933784d02331b60b2531e2025cd3b7", - "nmdc:50fc279637cb7048aaaeec9b223d0286", - "nmdc:c3add9c5d34e3ca719096ba3ba9b1c08", - "nmdc:2777a04ec7e23aff356bb4f2733e55b7", - "nmdc:de45d70cc01749e9b5691dc24674545d", - "nmdc:534f97f3792b74385c4da305196a1b1d", - "nmdc:fc3e489df923ec344ac0cce7316f49d6", - "nmdc:07b6457a094fab96563168ed287dc59f", - "nmdc:164a1bc50e8d6509446ae2877be8231c" - ], - "was_informed_by": "gold:Gp0115669", - "id": "nmdc:ed7745adccd65c2dd20dfa24c2922db2", - "execution_resource": "NERSC-Cori", - "name": "ReadBased Analysis Activity for nmdc:mga0k85x37", - "started_at_time": "2021-10-11T02:28:43Z", - "type": "nmdc:ReadbasedAnalysis", - "ended_at_time": "2021-10-11T04:20:07+00:00" - } - ] + "pooling_set": [] }, { "functional_annotation_agg": [], @@ -22601,85 +19049,6 @@ "id": "nmdc:3266e79813577aae1d4377c62e73332c", "file_size_bytes": 4177114 }, - { - "name": "Gp0115672_Gottcha2 TSV report", - "description": "Gottcha2 TSV report for Gp0115672", - "url": "https://data.microbiomedata.org/data/nmdc:mga0cwhj53/ReadbasedAnalysis/nmdc_mga0cwhj53_gottcha2_report.tsv", - "md5_checksum": "5a9326e2e450663a5ed8c97389136b25", - "id": "nmdc:5a9326e2e450663a5ed8c97389136b25", - "file_size_bytes": 15806 - }, - { - "name": "Gp0115672_Gottcha2 full TSV report", - "description": "Gottcha2 full TSV report for Gp0115672", - "url": "https://data.microbiomedata.org/data/nmdc:mga0cwhj53/ReadbasedAnalysis/nmdc_mga0cwhj53_gottcha2_report_full.tsv", - "md5_checksum": "6044f2e33e0dd3e951484e9c50ae10f4", - "id": "nmdc:6044f2e33e0dd3e951484e9c50ae10f4", - "file_size_bytes": 1142479 - }, - { - "name": "Gp0115672_Gottcha2 Krona HTML report", - "description": "Gottcha2 Krona HTML report for Gp0115672", - "data_object_type": "GOTTCHA2 Krona Plot", - "url": "https://data.microbiomedata.org/data/nmdc:mga0cwhj53/ReadbasedAnalysis/nmdc_mga0cwhj53_gottcha2_krona.html", - "md5_checksum": "39a46887587926c9b81e126bb1036005", - "id": "nmdc:39a46887587926c9b81e126bb1036005", - "file_size_bytes": 273611 - }, - { - "name": "Gp0115672_Centrifuge classification TSV report", - "description": "Centrifuge classification TSV report for Gp0115672", - "data_object_type": "Centrifuge Taxonomic Classification", - "url": "https://data.microbiomedata.org/data/nmdc:mga0cwhj53/ReadbasedAnalysis/nmdc_mga0cwhj53_centrifuge_classification.tsv", - "md5_checksum": "b8dde2c047141d9097317c86f723eded", - "id": "nmdc:b8dde2c047141d9097317c86f723eded", - "file_size_bytes": 2436637487 - }, - { - "name": "Gp0115672_Centrifuge TSV report", - "description": "Centrifuge TSV report for Gp0115672", - "data_object_type": "Centrifuge Classification Report", - "url": "https://data.microbiomedata.org/data/nmdc:mga0cwhj53/ReadbasedAnalysis/nmdc_mga0cwhj53_centrifuge_report.tsv", - "md5_checksum": "d530342b37f0785f92650e9650f31d6a", - "id": "nmdc:d530342b37f0785f92650e9650f31d6a", - "file_size_bytes": 261520 - }, - { - "name": "Gp0115672_Centrifuge Krona HTML report", - "description": "Centrifuge Krona HTML report for Gp0115672", - "data_object_type": "Centrifuge Krona Plot", - "url": "https://data.microbiomedata.org/data/nmdc:mga0cwhj53/ReadbasedAnalysis/nmdc_mga0cwhj53_centrifuge_krona.html", - "md5_checksum": "6672aa851b5d39d7381211232b4f6cb2", - "id": "nmdc:6672aa851b5d39d7381211232b4f6cb2", - "file_size_bytes": 2342832 - }, - { - "name": "Gp0115672_Kraken classification TSV report", - "description": "Kraken classification TSV report for Gp0115672", - "data_object_type": "Kraken2 Taxonomic Classification", - "url": "https://data.microbiomedata.org/data/nmdc:mga0cwhj53/ReadbasedAnalysis/nmdc_mga0cwhj53_kraken2_classification.tsv", - "md5_checksum": "61e3c875231ae8999b5aa1dbf7d55cca", - "id": "nmdc:61e3c875231ae8999b5aa1dbf7d55cca", - "file_size_bytes": 1993150715 - }, - { - "name": "Gp0115672_Kraken2 TSV report", - "description": "Kraken2 TSV report for Gp0115672", - "data_object_type": "Kraken2 Classification Report", - "url": "https://data.microbiomedata.org/data/nmdc:mga0cwhj53/ReadbasedAnalysis/nmdc_mga0cwhj53_kraken2_report.tsv", - "md5_checksum": "3049835ed4e3533acce49e9cc60b03fc", - "id": "nmdc:3049835ed4e3533acce49e9cc60b03fc", - "file_size_bytes": 693572 - }, - { - "name": "Gp0115672_Kraken2 Krona HTML report", - "description": "Kraken2 Krona HTML report for Gp0115672", - "data_object_type": "Kraken2 Krona Plot", - "url": "https://data.microbiomedata.org/data/nmdc:mga0cwhj53/ReadbasedAnalysis/nmdc_mga0cwhj53_kraken2_krona.html", - "md5_checksum": "3266e79813577aae1d4377c62e73332c", - "id": "nmdc:3266e79813577aae1d4377c62e73332c", - "file_size_bytes": 4177114 - }, { "name": "Gp0115672_Assembled contigs fasta", "description": "Assembled contigs fasta for Gp0115672", @@ -23261,39 +19630,7 @@ "collecting_biosamples_from_site_set": [], "date_created": null, "etl_software_version": null, - "pooling_set": [], - "read_based_analysis_activity_set": [ - { - "_id": { - "$oid": "61e71a33833bcf838a701f34" - }, - "has_input": [ - "nmdc:eb516fb673793f5161fb634fc19de310" - ], - "part_of": [ - "nmdc:mga0cwhj53" - ], - "git_url": "https://github.com/microbiomedata/metaG/releases/tag/0.1", - "has_output": [ - "nmdc:5a9326e2e450663a5ed8c97389136b25", - "nmdc:6044f2e33e0dd3e951484e9c50ae10f4", - "nmdc:39a46887587926c9b81e126bb1036005", - "nmdc:b8dde2c047141d9097317c86f723eded", - "nmdc:d530342b37f0785f92650e9650f31d6a", - "nmdc:6672aa851b5d39d7381211232b4f6cb2", - "nmdc:61e3c875231ae8999b5aa1dbf7d55cca", - "nmdc:3049835ed4e3533acce49e9cc60b03fc", - "nmdc:3266e79813577aae1d4377c62e73332c" - ], - "was_informed_by": "gold:Gp0115672", - "id": "nmdc:50eb8825777d1294abac150521e5c2db", - "execution_resource": "NERSC-Cori", - "name": "ReadBased Analysis Activity for nmdc:mga0cwhj53", - "started_at_time": "2021-10-11T02:28:16Z", - "type": "nmdc:ReadbasedAnalysis", - "ended_at_time": "2021-10-11T05:56:20+00:00" - } - ] + "pooling_set": [] }, { "functional_annotation_agg": [], @@ -23302,103 +19639,24 @@ "extraction_set": [], "activity_set": [], "biosample_set": [], - "data_object_set": [ - { - "name": "Gp0127640_Filtered Reads", - "description": "Filtered Reads for Gp0127640", - "data_object_type": "Filtered Sequencing Reads", - "url": "https://data.microbiomedata.org/data/nmdc:mga06rnc11/qa/nmdc_mga06rnc11_filtered.fastq.gz", - "md5_checksum": "534c94e20d292a6bf09c0a42b550b4c2", - "id": "nmdc:534c94e20d292a6bf09c0a42b550b4c2", - "file_size_bytes": 2416846292 - }, - { - "name": "Gp0127640_Filtered Stats", - "description": "Filtered Stats for Gp0127640", - "data_object_type": "QC Statistics", - "url": "https://data.microbiomedata.org/data/nmdc:mga06rnc11/qa/nmdc_mga06rnc11_filterStats.txt", - "md5_checksum": "db5ccad12d6ddb46947fbd815aae7f9a", - "id": "nmdc:db5ccad12d6ddb46947fbd815aae7f9a", - "file_size_bytes": 285 - }, - { - "name": "Gp0127640_Gottcha2 TSV report", - "description": "Gottcha2 TSV report for Gp0127640", - "url": "https://data.microbiomedata.org/data/nmdc:mga06rnc11/ReadbasedAnalysis/nmdc_mga06rnc11_gottcha2_report.tsv", - "md5_checksum": "7e79b2eba131ed6df71a56f47b1b901f", - "id": "nmdc:7e79b2eba131ed6df71a56f47b1b901f", - "file_size_bytes": 3824 - }, - { - "name": "Gp0127640_Gottcha2 full TSV report", - "description": "Gottcha2 full TSV report for Gp0127640", - "url": "https://data.microbiomedata.org/data/nmdc:mga06rnc11/ReadbasedAnalysis/nmdc_mga06rnc11_gottcha2_report_full.tsv", - "md5_checksum": "bc82dcb8151fc20c22be71b6531a1fb2", - "id": "nmdc:bc82dcb8151fc20c22be71b6531a1fb2", - "file_size_bytes": 850491 - }, - { - "name": "Gp0127640_Gottcha2 Krona HTML report", - "description": "Gottcha2 Krona HTML report for Gp0127640", - "data_object_type": "GOTTCHA2 Krona Plot", - "url": "https://data.microbiomedata.org/data/nmdc:mga06rnc11/ReadbasedAnalysis/nmdc_mga06rnc11_gottcha2_krona.html", - "md5_checksum": "d5e45563875efca0653ba2dd47ee3d68", - "id": "nmdc:d5e45563875efca0653ba2dd47ee3d68", - "file_size_bytes": 236151 - }, - { - "name": "Gp0127640_Centrifuge classification TSV report", - "description": "Centrifuge classification TSV report for Gp0127640", - "data_object_type": "Centrifuge Taxonomic Classification", - "url": "https://data.microbiomedata.org/data/nmdc:mga06rnc11/ReadbasedAnalysis/nmdc_mga06rnc11_centrifuge_classification.tsv", - "md5_checksum": "bf5aa70f6ff14da2ef1393124ec29c4d", - "id": "nmdc:bf5aa70f6ff14da2ef1393124ec29c4d", - "file_size_bytes": 2057333090 - }, - { - "name": "Gp0127640_Centrifuge TSV report", - "description": "Centrifuge TSV report for Gp0127640", - "data_object_type": "Centrifuge Classification Report", - "url": "https://data.microbiomedata.org/data/nmdc:mga06rnc11/ReadbasedAnalysis/nmdc_mga06rnc11_centrifuge_report.tsv", - "md5_checksum": "61f1f6d57fd4d445682e25ec34901721", - "id": "nmdc:61f1f6d57fd4d445682e25ec34901721", - "file_size_bytes": 256577 - }, - { - "name": "Gp0127640_Centrifuge Krona HTML report", - "description": "Centrifuge Krona HTML report for Gp0127640", - "data_object_type": "Centrifuge Krona Plot", - "url": "https://data.microbiomedata.org/data/nmdc:mga06rnc11/ReadbasedAnalysis/nmdc_mga06rnc11_centrifuge_krona.html", - "md5_checksum": "7c31728fc2a51c8d202f9f74b1919886", - "id": "nmdc:7c31728fc2a51c8d202f9f74b1919886", - "file_size_bytes": 2334984 - }, - { - "name": "Gp0127640_Kraken classification TSV report", - "description": "Kraken classification TSV report for Gp0127640", - "data_object_type": "Kraken2 Taxonomic Classification", - "url": "https://data.microbiomedata.org/data/nmdc:mga06rnc11/ReadbasedAnalysis/nmdc_mga06rnc11_kraken2_classification.tsv", - "md5_checksum": "f36c2b28e63d21ca4d9e84035450c8e1", - "id": "nmdc:f36c2b28e63d21ca4d9e84035450c8e1", - "file_size_bytes": 1658481192 - }, + "data_object_set": [ { - "name": "Gp0127640_Kraken2 TSV report", - "description": "Kraken2 TSV report for Gp0127640", - "data_object_type": "Kraken2 Classification Report", - "url": "https://data.microbiomedata.org/data/nmdc:mga06rnc11/ReadbasedAnalysis/nmdc_mga06rnc11_kraken2_report.tsv", - "md5_checksum": "e2939606fc9ff1c0046b333e1740f258", - "id": "nmdc:e2939606fc9ff1c0046b333e1740f258", - "file_size_bytes": 653129 + "name": "Gp0127640_Filtered Reads", + "description": "Filtered Reads for Gp0127640", + "data_object_type": "Filtered Sequencing Reads", + "url": "https://data.microbiomedata.org/data/nmdc:mga06rnc11/qa/nmdc_mga06rnc11_filtered.fastq.gz", + "md5_checksum": "534c94e20d292a6bf09c0a42b550b4c2", + "id": "nmdc:534c94e20d292a6bf09c0a42b550b4c2", + "file_size_bytes": 2416846292 }, { - "name": "Gp0127640_Kraken2 Krona HTML report", - "description": "Kraken2 Krona HTML report for Gp0127640", - "data_object_type": "Kraken2 Krona Plot", - "url": "https://data.microbiomedata.org/data/nmdc:mga06rnc11/ReadbasedAnalysis/nmdc_mga06rnc11_kraken2_krona.html", - "md5_checksum": "d47144fd7ec0608e7677550d9589c889", - "id": "nmdc:d47144fd7ec0608e7677550d9589c889", - "file_size_bytes": 3977820 + "name": "Gp0127640_Filtered Stats", + "description": "Filtered Stats for Gp0127640", + "data_object_type": "QC Statistics", + "url": "https://data.microbiomedata.org/data/nmdc:mga06rnc11/qa/nmdc_mga06rnc11_filterStats.txt", + "md5_checksum": "db5ccad12d6ddb46947fbd815aae7f9a", + "id": "nmdc:db5ccad12d6ddb46947fbd815aae7f9a", + "file_size_bytes": 285 }, { "name": "Gp0127640_Gottcha2 TSV report", @@ -23924,39 +20182,7 @@ "collecting_biosamples_from_site_set": [], "date_created": null, "etl_software_version": null, - "pooling_set": [], - "read_based_analysis_activity_set": [ - { - "_id": { - "$oid": "61e7199d833bcf838a700ec0" - }, - "has_input": [ - "nmdc:534c94e20d292a6bf09c0a42b550b4c2" - ], - "part_of": [ - "nmdc:mga06rnc11" - ], - "git_url": "https://github.com/microbiomedata/metaG/releases/tag/0.1", - "has_output": [ - "nmdc:7e79b2eba131ed6df71a56f47b1b901f", - "nmdc:bc82dcb8151fc20c22be71b6531a1fb2", - "nmdc:d5e45563875efca0653ba2dd47ee3d68", - "nmdc:bf5aa70f6ff14da2ef1393124ec29c4d", - "nmdc:61f1f6d57fd4d445682e25ec34901721", - "nmdc:7c31728fc2a51c8d202f9f74b1919886", - "nmdc:f36c2b28e63d21ca4d9e84035450c8e1", - "nmdc:e2939606fc9ff1c0046b333e1740f258", - "nmdc:d47144fd7ec0608e7677550d9589c889" - ], - "was_informed_by": "gold:Gp0127640", - "id": "nmdc:414c4647eddd8081308d92da2d59815e", - "execution_resource": "NERSC-Cori", - "name": "ReadBased Analysis Activity for nmdc:mga06rnc11", - "started_at_time": "2021-10-11T02:24:27Z", - "type": "nmdc:ReadbasedAnalysis", - "ended_at_time": "2021-10-11T04:33:17+00:00" - } - ] + "pooling_set": [] }, { "functional_annotation_agg": [], @@ -24063,85 +20289,6 @@ "id": "nmdc:2f36b41c419efa1b1dfb6a9576b965ee", "file_size_bytes": 3964515 }, - { - "name": "Gp0127641_Gottcha2 TSV report", - "description": "Gottcha2 TSV report for Gp0127641", - "url": "https://data.microbiomedata.org/data/nmdc:mga0822t33/ReadbasedAnalysis/nmdc_mga0822t33_gottcha2_report.tsv", - "md5_checksum": "0d021c80bfd39c8293a8b355b8ff3605", - "id": "nmdc:0d021c80bfd39c8293a8b355b8ff3605", - "file_size_bytes": 3331 - }, - { - "name": "Gp0127641_Gottcha2 full TSV report", - "description": "Gottcha2 full TSV report for Gp0127641", - "url": "https://data.microbiomedata.org/data/nmdc:mga0822t33/ReadbasedAnalysis/nmdc_mga0822t33_gottcha2_report_full.tsv", - "md5_checksum": "a42312841b816448d8bd5d3adfa65f58", - "id": "nmdc:a42312841b816448d8bd5d3adfa65f58", - "file_size_bytes": 761359 - }, - { - "name": "Gp0127641_Gottcha2 Krona HTML report", - "description": "Gottcha2 Krona HTML report for Gp0127641", - "data_object_type": "GOTTCHA2 Krona Plot", - "url": "https://data.microbiomedata.org/data/nmdc:mga0822t33/ReadbasedAnalysis/nmdc_mga0822t33_gottcha2_krona.html", - "md5_checksum": "f473f4a99336a49105d2722888ae0510", - "id": "nmdc:f473f4a99336a49105d2722888ae0510", - "file_size_bytes": 236161 - }, - { - "name": "Gp0127641_Centrifuge classification TSV report", - "description": "Centrifuge classification TSV report for Gp0127641", - "data_object_type": "Centrifuge Taxonomic Classification", - "url": "https://data.microbiomedata.org/data/nmdc:mga0822t33/ReadbasedAnalysis/nmdc_mga0822t33_centrifuge_classification.tsv", - "md5_checksum": "ae51ea50660f44fa3b317a45f3015556", - "id": "nmdc:ae51ea50660f44fa3b317a45f3015556", - "file_size_bytes": 1635953327 - }, - { - "name": "Gp0127641_Centrifuge TSV report", - "description": "Centrifuge TSV report for Gp0127641", - "data_object_type": "Centrifuge Classification Report", - "url": "https://data.microbiomedata.org/data/nmdc:mga0822t33/ReadbasedAnalysis/nmdc_mga0822t33_centrifuge_report.tsv", - "md5_checksum": "ef39b44a90c8525e93f45e500b3ae934", - "id": "nmdc:ef39b44a90c8525e93f45e500b3ae934", - "file_size_bytes": 255166 - }, - { - "name": "Gp0127641_Centrifuge Krona HTML report", - "description": "Centrifuge Krona HTML report for Gp0127641", - "data_object_type": "Centrifuge Krona Plot", - "url": "https://data.microbiomedata.org/data/nmdc:mga0822t33/ReadbasedAnalysis/nmdc_mga0822t33_centrifuge_krona.html", - "md5_checksum": "e2653a4ce3f34c235ad7b01e87dd1016", - "id": "nmdc:e2653a4ce3f34c235ad7b01e87dd1016", - "file_size_bytes": 2332521 - }, - { - "name": "Gp0127641_Kraken classification TSV report", - "description": "Kraken classification TSV report for Gp0127641", - "data_object_type": "Kraken2 Taxonomic Classification", - "url": "https://data.microbiomedata.org/data/nmdc:mga0822t33/ReadbasedAnalysis/nmdc_mga0822t33_kraken2_classification.tsv", - "md5_checksum": "869730c4d81163e0c238dd4ae27ebd9e", - "id": "nmdc:869730c4d81163e0c238dd4ae27ebd9e", - "file_size_bytes": 1307934195 - }, - { - "name": "Gp0127641_Kraken2 TSV report", - "description": "Kraken2 TSV report for Gp0127641", - "data_object_type": "Kraken2 Classification Report", - "url": "https://data.microbiomedata.org/data/nmdc:mga0822t33/ReadbasedAnalysis/nmdc_mga0822t33_kraken2_report.tsv", - "md5_checksum": "dc193d1a1693589003f992c820606bab", - "id": "nmdc:dc193d1a1693589003f992c820606bab", - "file_size_bytes": 635050 - }, - { - "name": "Gp0127641_Kraken2 Krona HTML report", - "description": "Kraken2 Krona HTML report for Gp0127641", - "data_object_type": "Kraken2 Krona Plot", - "url": "https://data.microbiomedata.org/data/nmdc:mga0822t33/ReadbasedAnalysis/nmdc_mga0822t33_kraken2_krona.html", - "md5_checksum": "2f36b41c419efa1b1dfb6a9576b965ee", - "id": "nmdc:2f36b41c419efa1b1dfb6a9576b965ee", - "file_size_bytes": 3964515 - }, { "name": "Gp0127641_Assembled contigs fasta", "description": "Assembled contigs fasta for Gp0127641", @@ -24604,146 +20751,35 @@ "study_set": [], "field_research_site_set": [], "collecting_biosamples_from_site_set": [], - "date_created": null, - "etl_software_version": null, - "pooling_set": [], - "read_based_analysis_activity_set": [ - { - "_id": { - "$oid": "61e7199c833bcf838a700e42" - }, - "has_input": [ - "nmdc:a2700afe93abad6f004a3701348622a2" - ], - "part_of": [ - "nmdc:mga0822t33" - ], - "git_url": "https://github.com/microbiomedata/metaG/releases/tag/0.1", - "has_output": [ - "nmdc:0d021c80bfd39c8293a8b355b8ff3605", - "nmdc:a42312841b816448d8bd5d3adfa65f58", - "nmdc:f473f4a99336a49105d2722888ae0510", - "nmdc:ae51ea50660f44fa3b317a45f3015556", - "nmdc:ef39b44a90c8525e93f45e500b3ae934", - "nmdc:e2653a4ce3f34c235ad7b01e87dd1016", - "nmdc:869730c4d81163e0c238dd4ae27ebd9e", - "nmdc:dc193d1a1693589003f992c820606bab", - "nmdc:2f36b41c419efa1b1dfb6a9576b965ee" - ], - "was_informed_by": "gold:Gp0127641", - "id": "nmdc:363fe7a0dd914e046b274fea70625c52", - "execution_resource": "NERSC-Cori", - "name": "ReadBased Analysis Activity for nmdc:mga0822t33", - "started_at_time": "2021-10-11T02:27:18Z", - "type": "nmdc:ReadbasedAnalysis", - "ended_at_time": "2021-10-11T04:05:47+00:00" - } - ] - }, - { - "functional_annotation_agg": [], - "library_preparation_set": [], - "processed_sample_set": [], - "extraction_set": [], - "activity_set": [], - "biosample_set": [], - "data_object_set": [ - { - "name": "Gp0127643_Filtered Reads", - "description": "Filtered Reads for Gp0127643", - "data_object_type": "Filtered Sequencing Reads", - "url": "https://data.microbiomedata.org/data/nmdc:mga0evc178/qa/nmdc_mga0evc178_filtered.fastq.gz", - "md5_checksum": "2ef23543e3064ca73c3034713d87c026", - "id": "nmdc:2ef23543e3064ca73c3034713d87c026", - "file_size_bytes": 1891088172 - }, - { - "name": "Gp0127643_Filtered Stats", - "description": "Filtered Stats for Gp0127643", - "data_object_type": "QC Statistics", - "url": "https://data.microbiomedata.org/data/nmdc:mga0evc178/qa/nmdc_mga0evc178_filterStats.txt", - "md5_checksum": "87b172ead58a37be8d199c0acfc96759", - "id": "nmdc:87b172ead58a37be8d199c0acfc96759", - "file_size_bytes": 289 - }, - { - "name": "Gp0127643_Gottcha2 TSV report", - "description": "Gottcha2 TSV report for Gp0127643", - "url": "https://data.microbiomedata.org/data/nmdc:mga0evc178/ReadbasedAnalysis/nmdc_mga0evc178_gottcha2_report.tsv", - "md5_checksum": "e8f825653e5736e29b73de55bd11a270", - "id": "nmdc:e8f825653e5736e29b73de55bd11a270", - "file_size_bytes": 1326 - }, - { - "name": "Gp0127643_Gottcha2 full TSV report", - "description": "Gottcha2 full TSV report for Gp0127643", - "url": "https://data.microbiomedata.org/data/nmdc:mga0evc178/ReadbasedAnalysis/nmdc_mga0evc178_gottcha2_report_full.tsv", - "md5_checksum": "99bb1311b220e9a03da619fe5fb58f0f", - "id": "nmdc:99bb1311b220e9a03da619fe5fb58f0f", - "file_size_bytes": 664131 - }, - { - "name": "Gp0127643_Gottcha2 Krona HTML report", - "description": "Gottcha2 Krona HTML report for Gp0127643", - "data_object_type": "GOTTCHA2 Krona Plot", - "url": "https://data.microbiomedata.org/data/nmdc:mga0evc178/ReadbasedAnalysis/nmdc_mga0evc178_gottcha2_krona.html", - "md5_checksum": "5c97bc15d4d5999f140664b3b2777c6d", - "id": "nmdc:5c97bc15d4d5999f140664b3b2777c6d", - "file_size_bytes": 229630 - }, - { - "name": "Gp0127643_Centrifuge classification TSV report", - "description": "Centrifuge classification TSV report for Gp0127643", - "data_object_type": "Centrifuge Taxonomic Classification", - "url": "https://data.microbiomedata.org/data/nmdc:mga0evc178/ReadbasedAnalysis/nmdc_mga0evc178_centrifuge_classification.tsv", - "md5_checksum": "c9074b2e05765afd68463dc301b87995", - "id": "nmdc:c9074b2e05765afd68463dc301b87995", - "file_size_bytes": 1726867547 - }, - { - "name": "Gp0127643_Centrifuge TSV report", - "description": "Centrifuge TSV report for Gp0127643", - "data_object_type": "Centrifuge Classification Report", - "url": "https://data.microbiomedata.org/data/nmdc:mga0evc178/ReadbasedAnalysis/nmdc_mga0evc178_centrifuge_report.tsv", - "md5_checksum": "ed2c05d1702a9a811b8a98de748bc82a", - "id": "nmdc:ed2c05d1702a9a811b8a98de748bc82a", - "file_size_bytes": 254021 - }, - { - "name": "Gp0127643_Centrifuge Krona HTML report", - "description": "Centrifuge Krona HTML report for Gp0127643", - "data_object_type": "Centrifuge Krona Plot", - "url": "https://data.microbiomedata.org/data/nmdc:mga0evc178/ReadbasedAnalysis/nmdc_mga0evc178_centrifuge_krona.html", - "md5_checksum": "6465fe59472b111ead1f0414ccf39f62", - "id": "nmdc:6465fe59472b111ead1f0414ccf39f62", - "file_size_bytes": 2331702 - }, - { - "name": "Gp0127643_Kraken classification TSV report", - "description": "Kraken classification TSV report for Gp0127643", - "data_object_type": "Kraken2 Taxonomic Classification", - "url": "https://data.microbiomedata.org/data/nmdc:mga0evc178/ReadbasedAnalysis/nmdc_mga0evc178_kraken2_classification.tsv", - "md5_checksum": "9855ca52bce074c34dcebfd154fa94ff", - "id": "nmdc:9855ca52bce074c34dcebfd154fa94ff", - "file_size_bytes": 1376409913 - }, + "date_created": null, + "etl_software_version": null, + "pooling_set": [] + }, + { + "functional_annotation_agg": [], + "library_preparation_set": [], + "processed_sample_set": [], + "extraction_set": [], + "activity_set": [], + "biosample_set": [], + "data_object_set": [ { - "name": "Gp0127643_Kraken2 TSV report", - "description": "Kraken2 TSV report for Gp0127643", - "data_object_type": "Kraken2 Classification Report", - "url": "https://data.microbiomedata.org/data/nmdc:mga0evc178/ReadbasedAnalysis/nmdc_mga0evc178_kraken2_report.tsv", - "md5_checksum": "ed8059f366d60112deb41a0c307bc6fc", - "id": "nmdc:ed8059f366d60112deb41a0c307bc6fc", - "file_size_bytes": 640506 + "name": "Gp0127643_Filtered Reads", + "description": "Filtered Reads for Gp0127643", + "data_object_type": "Filtered Sequencing Reads", + "url": "https://data.microbiomedata.org/data/nmdc:mga0evc178/qa/nmdc_mga0evc178_filtered.fastq.gz", + "md5_checksum": "2ef23543e3064ca73c3034713d87c026", + "id": "nmdc:2ef23543e3064ca73c3034713d87c026", + "file_size_bytes": 1891088172 }, { - "name": "Gp0127643_Kraken2 Krona HTML report", - "description": "Kraken2 Krona HTML report for Gp0127643", - "data_object_type": "Kraken2 Krona Plot", - "url": "https://data.microbiomedata.org/data/nmdc:mga0evc178/ReadbasedAnalysis/nmdc_mga0evc178_kraken2_krona.html", - "md5_checksum": "f98bae155bced880c058ecde7d539c18", - "id": "nmdc:f98bae155bced880c058ecde7d539c18", - "file_size_bytes": 3998448 + "name": "Gp0127643_Filtered Stats", + "description": "Filtered Stats for Gp0127643", + "data_object_type": "QC Statistics", + "url": "https://data.microbiomedata.org/data/nmdc:mga0evc178/qa/nmdc_mga0evc178_filterStats.txt", + "md5_checksum": "87b172ead58a37be8d199c0acfc96759", + "id": "nmdc:87b172ead58a37be8d199c0acfc96759", + "file_size_bytes": 289 }, { "name": "Gp0127643_Gottcha2 TSV report", @@ -25269,39 +21305,7 @@ "collecting_biosamples_from_site_set": [], "date_created": null, "etl_software_version": null, - "pooling_set": [], - "read_based_analysis_activity_set": [ - { - "_id": { - "$oid": "61e719b6833bcf838a70116b" - }, - "has_input": [ - "nmdc:2ef23543e3064ca73c3034713d87c026" - ], - "part_of": [ - "nmdc:mga0evc178" - ], - "git_url": "https://github.com/microbiomedata/metaG/releases/tag/0.1", - "has_output": [ - "nmdc:e8f825653e5736e29b73de55bd11a270", - "nmdc:99bb1311b220e9a03da619fe5fb58f0f", - "nmdc:5c97bc15d4d5999f140664b3b2777c6d", - "nmdc:c9074b2e05765afd68463dc301b87995", - "nmdc:ed2c05d1702a9a811b8a98de748bc82a", - "nmdc:6465fe59472b111ead1f0414ccf39f62", - "nmdc:9855ca52bce074c34dcebfd154fa94ff", - "nmdc:ed8059f366d60112deb41a0c307bc6fc", - "nmdc:f98bae155bced880c058ecde7d539c18" - ], - "was_informed_by": "gold:Gp0127643", - "id": "nmdc:30cd614596fd2a4b87220523f8a6ff30", - "execution_resource": "NERSC-Cori", - "name": "ReadBased Analysis Activity for nmdc:mga0evc178", - "started_at_time": "2021-10-11T02:27:00Z", - "type": "nmdc:ReadbasedAnalysis", - "ended_at_time": "2021-10-11T04:04:16+00:00" - } - ] + "pooling_set": [] }, { "functional_annotation_agg": [], @@ -25408,85 +21412,6 @@ "id": "nmdc:a07c6c5fb68d1a56e39d93e8745b96cb", "file_size_bytes": 3567307 }, - { - "name": "gold:Gp0452677_Gottcha2 TSV report", - "description": "Gottcha2 TSV report for gold:Gp0452677", - "url": "https://data.microbiomedata.org/data/nmdc:mga0fbpz25/ReadbasedAnalysis/nmdc_mga0fbpz25_gottcha2_report.tsv", - "md5_checksum": "dc2e21becda8d6b010a95897cf97ae90", - "id": "nmdc:dc2e21becda8d6b010a95897cf97ae90", - "file_size_bytes": 109 - }, - { - "name": "Gp0127644_Gottcha2 full TSV report", - "description": "Gottcha2 full TSV report for Gp0127644", - "url": "https://data.microbiomedata.org/data/nmdc:mga0bpf635/ReadbasedAnalysis/nmdc_mga0bpf635_gottcha2_report_full.tsv", - "md5_checksum": "0dd334c92557f3a8ac8c78b437c75eaf", - "id": "nmdc:0dd334c92557f3a8ac8c78b437c75eaf", - "file_size_bytes": 426075 - }, - { - "name": "gold:Gp0452677_Gottcha2 Krona HTML report", - "description": "Gottcha2 Krona HTML report for gold:Gp0452677", - "data_object_type": "GOTTCHA2 Krona Plot", - "url": "https://data.microbiomedata.org/data/nmdc:mga0fbpz25/ReadbasedAnalysis/nmdc_mga0fbpz25_gottcha2_krona.html", - "md5_checksum": "425873a08e598b0ca2987ff7b9b5da1f", - "id": "nmdc:425873a08e598b0ca2987ff7b9b5da1f", - "file_size_bytes": 226638 - }, - { - "name": "Gp0127644_Centrifuge classification TSV report", - "description": "Centrifuge classification TSV report for Gp0127644", - "data_object_type": "Centrifuge Taxonomic Classification", - "url": "https://data.microbiomedata.org/data/nmdc:mga0bpf635/ReadbasedAnalysis/nmdc_mga0bpf635_centrifuge_classification.tsv", - "md5_checksum": "b0f2449065b52935ddba8abd6ae6bc88", - "id": "nmdc:b0f2449065b52935ddba8abd6ae6bc88", - "file_size_bytes": 610862986 - }, - { - "name": "Gp0127644_Centrifuge TSV report", - "description": "Centrifuge TSV report for Gp0127644", - "data_object_type": "Centrifuge Classification Report", - "url": "https://data.microbiomedata.org/data/nmdc:mga0bpf635/ReadbasedAnalysis/nmdc_mga0bpf635_centrifuge_report.tsv", - "md5_checksum": "9baa708296f62334e099cf61711b5e16", - "id": "nmdc:9baa708296f62334e099cf61711b5e16", - "file_size_bytes": 243322 - }, - { - "name": "Gp0127644_Centrifuge Krona HTML report", - "description": "Centrifuge Krona HTML report for Gp0127644", - "data_object_type": "Centrifuge Krona Plot", - "url": "https://data.microbiomedata.org/data/nmdc:mga0bpf635/ReadbasedAnalysis/nmdc_mga0bpf635_centrifuge_krona.html", - "md5_checksum": "f2a43278b06876cae5d4e8cdef17cfe1", - "id": "nmdc:f2a43278b06876cae5d4e8cdef17cfe1", - "file_size_bytes": 2294995 - }, - { - "name": "Gp0127644_Kraken classification TSV report", - "description": "Kraken classification TSV report for Gp0127644", - "data_object_type": "Kraken2 Taxonomic Classification", - "url": "https://data.microbiomedata.org/data/nmdc:mga0bpf635/ReadbasedAnalysis/nmdc_mga0bpf635_kraken2_classification.tsv", - "md5_checksum": "f1a811dbc523f9a27dbc004b8a66f0cb", - "id": "nmdc:f1a811dbc523f9a27dbc004b8a66f0cb", - "file_size_bytes": 487178087 - }, - { - "name": "Gp0127644_Kraken2 TSV report", - "description": "Kraken2 TSV report for Gp0127644", - "data_object_type": "Kraken2 Classification Report", - "url": "https://data.microbiomedata.org/data/nmdc:mga0bpf635/ReadbasedAnalysis/nmdc_mga0bpf635_kraken2_report.tsv", - "md5_checksum": "8983fa1acb03f2905bbec3a6ccee2854", - "id": "nmdc:8983fa1acb03f2905bbec3a6ccee2854", - "file_size_bytes": 557688 - }, - { - "name": "Gp0127644_Kraken2 Krona HTML report", - "description": "Kraken2 Krona HTML report for Gp0127644", - "data_object_type": "Kraken2 Krona Plot", - "url": "https://data.microbiomedata.org/data/nmdc:mga0bpf635/ReadbasedAnalysis/nmdc_mga0bpf635_kraken2_krona.html", - "md5_checksum": "a07c6c5fb68d1a56e39d93e8745b96cb", - "id": "nmdc:a07c6c5fb68d1a56e39d93e8745b96cb", - "file_size_bytes": 3567307 - }, { "name": "Gp0127644_Assembled contigs fasta", "description": "Assembled contigs fasta for Gp0127644", @@ -25932,39 +21857,7 @@ "collecting_biosamples_from_site_set": [], "date_created": null, "etl_software_version": null, - "pooling_set": [], - "read_based_analysis_activity_set": [ - { - "_id": { - "$oid": "61e7197c833bcf838a700966" - }, - "has_input": [ - "nmdc:98da35678c59689ce738b2a6bc708692" - ], - "part_of": [ - "nmdc:mga0bpf635" - ], - "git_url": "https://github.com/microbiomedata/metaG/releases/tag/0.1", - "has_output": [ - "nmdc:dc2e21becda8d6b010a95897cf97ae90", - "nmdc:0dd334c92557f3a8ac8c78b437c75eaf", - "nmdc:425873a08e598b0ca2987ff7b9b5da1f", - "nmdc:b0f2449065b52935ddba8abd6ae6bc88", - "nmdc:9baa708296f62334e099cf61711b5e16", - "nmdc:f2a43278b06876cae5d4e8cdef17cfe1", - "nmdc:f1a811dbc523f9a27dbc004b8a66f0cb", - "nmdc:8983fa1acb03f2905bbec3a6ccee2854", - "nmdc:a07c6c5fb68d1a56e39d93e8745b96cb" - ], - "was_informed_by": "gold:Gp0127644", - "id": "nmdc:9990ae6d30bab3988f258ae8224acd89", - "execution_resource": "NERSC-Cori", - "name": "ReadBased Analysis Activity for nmdc:mga0bpf635", - "started_at_time": "2021-10-11T02:26:47Z", - "type": "nmdc:ReadbasedAnalysis", - "ended_at_time": "2021-10-11T02:55:00+00:00" - } - ] + "pooling_set": [] }, { "functional_annotation_agg": [], @@ -25973,103 +21866,24 @@ "extraction_set": [], "activity_set": [], "biosample_set": [], - "data_object_set": [ - { - "name": "Gp0127639_Filtered Reads", - "description": "Filtered Reads for Gp0127639", - "data_object_type": "Filtered Sequencing Reads", - "url": "https://data.microbiomedata.org/data/nmdc:mga09wpw60/qa/nmdc_mga09wpw60_filtered.fastq.gz", - "md5_checksum": "833077b40372c6daa20beaed04ed0ae1", - "id": "nmdc:833077b40372c6daa20beaed04ed0ae1", - "file_size_bytes": 1585232805 - }, - { - "name": "Gp0127639_Filtered Stats", - "description": "Filtered Stats for Gp0127639", - "data_object_type": "QC Statistics", - "url": "https://data.microbiomedata.org/data/nmdc:mga09wpw60/qa/nmdc_mga09wpw60_filterStats.txt", - "md5_checksum": "b68178eebde030fad0850797adbb2624", - "id": "nmdc:b68178eebde030fad0850797adbb2624", - "file_size_bytes": 289 - }, - { - "name": "Gp0127639_Gottcha2 TSV report", - "description": "Gottcha2 TSV report for Gp0127639", - "url": "https://data.microbiomedata.org/data/nmdc:mga09wpw60/ReadbasedAnalysis/nmdc_mga09wpw60_gottcha2_report.tsv", - "md5_checksum": "514172bb91ef3b125ae2d001b47bff0b", - "id": "nmdc:514172bb91ef3b125ae2d001b47bff0b", - "file_size_bytes": 648 - }, - { - "name": "Gp0127639_Gottcha2 full TSV report", - "description": "Gottcha2 full TSV report for Gp0127639", - "url": "https://data.microbiomedata.org/data/nmdc:mga09wpw60/ReadbasedAnalysis/nmdc_mga09wpw60_gottcha2_report_full.tsv", - "md5_checksum": "82f072d1931154fbc722531d3d0dc41c", - "id": "nmdc:82f072d1931154fbc722531d3d0dc41c", - "file_size_bytes": 588644 - }, - { - "name": "Gp0127639_Gottcha2 Krona HTML report", - "description": "Gottcha2 Krona HTML report for Gp0127639", - "data_object_type": "GOTTCHA2 Krona Plot", - "url": "https://data.microbiomedata.org/data/nmdc:mga09wpw60/ReadbasedAnalysis/nmdc_mga09wpw60_gottcha2_krona.html", - "md5_checksum": "62a817ebcbfaf2c8feb1abedc35a736f", - "id": "nmdc:62a817ebcbfaf2c8feb1abedc35a736f", - "file_size_bytes": 228175 - }, - { - "name": "Gp0127639_Centrifuge classification TSV report", - "description": "Centrifuge classification TSV report for Gp0127639", - "data_object_type": "Centrifuge Taxonomic Classification", - "url": "https://data.microbiomedata.org/data/nmdc:mga09wpw60/ReadbasedAnalysis/nmdc_mga09wpw60_centrifuge_classification.tsv", - "md5_checksum": "81281fef2c0778516a84b3a672cc0230", - "id": "nmdc:81281fef2c0778516a84b3a672cc0230", - "file_size_bytes": 1468498728 - }, - { - "name": "Gp0127639_Centrifuge TSV report", - "description": "Centrifuge TSV report for Gp0127639", - "data_object_type": "Centrifuge Classification Report", - "url": "https://data.microbiomedata.org/data/nmdc:mga09wpw60/ReadbasedAnalysis/nmdc_mga09wpw60_centrifuge_report.tsv", - "md5_checksum": "86ae054ba9def1126579c8f76db8a07a", - "id": "nmdc:86ae054ba9def1126579c8f76db8a07a", - "file_size_bytes": 251338 - }, - { - "name": "Gp0127639_Centrifuge Krona HTML report", - "description": "Centrifuge Krona HTML report for Gp0127639", - "data_object_type": "Centrifuge Krona Plot", - "url": "https://data.microbiomedata.org/data/nmdc:mga09wpw60/ReadbasedAnalysis/nmdc_mga09wpw60_centrifuge_krona.html", - "md5_checksum": "9db20a88fa3d02eb00f64d1671ef8521", - "id": "nmdc:9db20a88fa3d02eb00f64d1671ef8521", - "file_size_bytes": 2322720 - }, - { - "name": "Gp0127639_Kraken classification TSV report", - "description": "Kraken classification TSV report for Gp0127639", - "data_object_type": "Kraken2 Taxonomic Classification", - "url": "https://data.microbiomedata.org/data/nmdc:mga09wpw60/ReadbasedAnalysis/nmdc_mga09wpw60_kraken2_classification.tsv", - "md5_checksum": "848fc10ed4365047cb139a4b40303808", - "id": "nmdc:848fc10ed4365047cb139a4b40303808", - "file_size_bytes": 1168015909 - }, + "data_object_set": [ { - "name": "Gp0127639_Kraken2 TSV report", - "description": "Kraken2 TSV report for Gp0127639", - "data_object_type": "Kraken2 Classification Report", - "url": "https://data.microbiomedata.org/data/nmdc:mga09wpw60/ReadbasedAnalysis/nmdc_mga09wpw60_kraken2_report.tsv", - "md5_checksum": "94e422e0bae86c608fba1c3815e08e92", - "id": "nmdc:94e422e0bae86c608fba1c3815e08e92", - "file_size_bytes": 616202 + "name": "Gp0127639_Filtered Reads", + "description": "Filtered Reads for Gp0127639", + "data_object_type": "Filtered Sequencing Reads", + "url": "https://data.microbiomedata.org/data/nmdc:mga09wpw60/qa/nmdc_mga09wpw60_filtered.fastq.gz", + "md5_checksum": "833077b40372c6daa20beaed04ed0ae1", + "id": "nmdc:833077b40372c6daa20beaed04ed0ae1", + "file_size_bytes": 1585232805 }, { - "name": "Gp0127639_Kraken2 Krona HTML report", - "description": "Kraken2 Krona HTML report for Gp0127639", - "data_object_type": "Kraken2 Krona Plot", - "url": "https://data.microbiomedata.org/data/nmdc:mga09wpw60/ReadbasedAnalysis/nmdc_mga09wpw60_kraken2_krona.html", - "md5_checksum": "c6eb85143a2489921c53f8184d536129", - "id": "nmdc:c6eb85143a2489921c53f8184d536129", - "file_size_bytes": 3863456 + "name": "Gp0127639_Filtered Stats", + "description": "Filtered Stats for Gp0127639", + "data_object_type": "QC Statistics", + "url": "https://data.microbiomedata.org/data/nmdc:mga09wpw60/qa/nmdc_mga09wpw60_filterStats.txt", + "md5_checksum": "b68178eebde030fad0850797adbb2624", + "id": "nmdc:b68178eebde030fad0850797adbb2624", + "file_size_bytes": 289 }, { "name": "Gp0127639_Gottcha2 TSV report", @@ -26690,39 +22504,7 @@ "collecting_biosamples_from_site_set": [], "date_created": null, "etl_software_version": null, - "pooling_set": [], - "read_based_analysis_activity_set": [ - { - "_id": { - "$oid": "61e7199a833bcf838a700d65" - }, - "has_input": [ - "nmdc:833077b40372c6daa20beaed04ed0ae1" - ], - "part_of": [ - "nmdc:mga09wpw60" - ], - "git_url": "https://github.com/microbiomedata/metaG/releases/tag/0.1", - "has_output": [ - "nmdc:514172bb91ef3b125ae2d001b47bff0b", - "nmdc:82f072d1931154fbc722531d3d0dc41c", - "nmdc:62a817ebcbfaf2c8feb1abedc35a736f", - "nmdc:81281fef2c0778516a84b3a672cc0230", - "nmdc:86ae054ba9def1126579c8f76db8a07a", - "nmdc:9db20a88fa3d02eb00f64d1671ef8521", - "nmdc:848fc10ed4365047cb139a4b40303808", - "nmdc:94e422e0bae86c608fba1c3815e08e92", - "nmdc:c6eb85143a2489921c53f8184d536129" - ], - "was_informed_by": "gold:Gp0127639", - "id": "nmdc:b8fea91a695311b43660f2e7044ad15c", - "execution_resource": "NERSC-Cori", - "name": "ReadBased Analysis Activity for nmdc:mga09wpw60", - "started_at_time": "2021-10-11T02:27:08Z", - "type": "nmdc:ReadbasedAnalysis", - "ended_at_time": "2021-10-11T03:27:12+00:00" - } - ] + "pooling_set": [] }, { "functional_annotation_agg": [], @@ -26829,85 +22611,6 @@ "id": "nmdc:bca8c2988929e7c176ec7b6609445db2", "file_size_bytes": 4013188 }, - { - "name": "Gp0127642_Gottcha2 TSV report", - "description": "Gottcha2 TSV report for Gp0127642", - "url": "https://data.microbiomedata.org/data/nmdc:mga0cvxk30/ReadbasedAnalysis/nmdc_mga0cvxk30_gottcha2_report.tsv", - "md5_checksum": "bc7f6a9435c3a9aaca7ce9efe9d16e41", - "id": "nmdc:bc7f6a9435c3a9aaca7ce9efe9d16e41", - "file_size_bytes": 5303 - }, - { - "name": "Gp0127642_Gottcha2 full TSV report", - "description": "Gottcha2 full TSV report for Gp0127642", - "url": "https://data.microbiomedata.org/data/nmdc:mga0cvxk30/ReadbasedAnalysis/nmdc_mga0cvxk30_gottcha2_report_full.tsv", - "md5_checksum": "0a079e34648ce23b0837dff31e2be5df", - "id": "nmdc:0a079e34648ce23b0837dff31e2be5df", - "file_size_bytes": 948120 - }, - { - "name": "Gp0127642_Gottcha2 Krona HTML report", - "description": "Gottcha2 Krona HTML report for Gp0127642", - "data_object_type": "GOTTCHA2 Krona Plot", - "url": "https://data.microbiomedata.org/data/nmdc:mga0cvxk30/ReadbasedAnalysis/nmdc_mga0cvxk30_gottcha2_krona.html", - "md5_checksum": "f19bf1723f0f0e9f2158b137d2618b08", - "id": "nmdc:f19bf1723f0f0e9f2158b137d2618b08", - "file_size_bytes": 241990 - }, - { - "name": "Gp0127642_Centrifuge classification TSV report", - "description": "Centrifuge classification TSV report for Gp0127642", - "data_object_type": "Centrifuge Taxonomic Classification", - "url": "https://data.microbiomedata.org/data/nmdc:mga0cvxk30/ReadbasedAnalysis/nmdc_mga0cvxk30_centrifuge_classification.tsv", - "md5_checksum": "81fc62d01a53a7ab5037829a158f0b64", - "id": "nmdc:81fc62d01a53a7ab5037829a158f0b64", - "file_size_bytes": 2023464022 - }, - { - "name": "Gp0127642_Centrifuge TSV report", - "description": "Centrifuge TSV report for Gp0127642", - "data_object_type": "Centrifuge Classification Report", - "url": "https://data.microbiomedata.org/data/nmdc:mga0cvxk30/ReadbasedAnalysis/nmdc_mga0cvxk30_centrifuge_report.tsv", - "md5_checksum": "05cc05eefdcb0d7bac19031619244a4b", - "id": "nmdc:05cc05eefdcb0d7bac19031619244a4b", - "file_size_bytes": 257700 - }, - { - "name": "Gp0127642_Centrifuge Krona HTML report", - "description": "Centrifuge Krona HTML report for Gp0127642", - "data_object_type": "Centrifuge Krona Plot", - "url": "https://data.microbiomedata.org/data/nmdc:mga0cvxk30/ReadbasedAnalysis/nmdc_mga0cvxk30_centrifuge_krona.html", - "md5_checksum": "bb92f0d18280f32aacf482a43a841372", - "id": "nmdc:bb92f0d18280f32aacf482a43a841372", - "file_size_bytes": 2339227 - }, - { - "name": "Gp0127642_Kraken classification TSV report", - "description": "Kraken classification TSV report for Gp0127642", - "data_object_type": "Kraken2 Taxonomic Classification", - "url": "https://data.microbiomedata.org/data/nmdc:mga0cvxk30/ReadbasedAnalysis/nmdc_mga0cvxk30_kraken2_classification.tsv", - "md5_checksum": "2fddd33160498548fa73e95dfc304d1a", - "id": "nmdc:2fddd33160498548fa73e95dfc304d1a", - "file_size_bytes": 1630988221 - }, - { - "name": "Gp0127642_Kraken2 TSV report", - "description": "Kraken2 TSV report for Gp0127642", - "data_object_type": "Kraken2 Classification Report", - "url": "https://data.microbiomedata.org/data/nmdc:mga0cvxk30/ReadbasedAnalysis/nmdc_mga0cvxk30_kraken2_report.tsv", - "md5_checksum": "272e3daee292c6e284026ee95b72d290", - "id": "nmdc:272e3daee292c6e284026ee95b72d290", - "file_size_bytes": 659136 - }, - { - "name": "Gp0127642_Kraken2 Krona HTML report", - "description": "Kraken2 Krona HTML report for Gp0127642", - "data_object_type": "Kraken2 Krona Plot", - "url": "https://data.microbiomedata.org/data/nmdc:mga0cvxk30/ReadbasedAnalysis/nmdc_mga0cvxk30_kraken2_krona.html", - "md5_checksum": "bca8c2988929e7c176ec7b6609445db2", - "id": "nmdc:bca8c2988929e7c176ec7b6609445db2", - "file_size_bytes": 4013188 - }, { "name": "Gp0127642_Assembled contigs fasta", "description": "Assembled contigs fasta for Gp0127642", @@ -27314,146 +23017,35 @@ "study_set": [], "field_research_site_set": [], "collecting_biosamples_from_site_set": [], - "date_created": null, - "etl_software_version": null, - "pooling_set": [], - "read_based_analysis_activity_set": [ - { - "_id": { - "$oid": "61e7199f833bcf838a700f38" - }, - "has_input": [ - "nmdc:603166d1e0da357d356a2029215d76ea" - ], - "part_of": [ - "nmdc:mga0cvxk30" - ], - "git_url": "https://github.com/microbiomedata/metaG/releases/tag/0.1", - "has_output": [ - "nmdc:bc7f6a9435c3a9aaca7ce9efe9d16e41", - "nmdc:0a079e34648ce23b0837dff31e2be5df", - "nmdc:f19bf1723f0f0e9f2158b137d2618b08", - "nmdc:81fc62d01a53a7ab5037829a158f0b64", - "nmdc:05cc05eefdcb0d7bac19031619244a4b", - "nmdc:bb92f0d18280f32aacf482a43a841372", - "nmdc:2fddd33160498548fa73e95dfc304d1a", - "nmdc:272e3daee292c6e284026ee95b72d290", - "nmdc:bca8c2988929e7c176ec7b6609445db2" - ], - "was_informed_by": "gold:Gp0127642", - "id": "nmdc:2de70a234a7c626b9f512b8aa3b73717", - "execution_resource": "NERSC-Cori", - "name": "ReadBased Analysis Activity for nmdc:mga0cvxk30", - "started_at_time": "2021-12-01T21:30:33Z", - "type": "nmdc:ReadbasedAnalysis", - "ended_at_time": "2021-12-02T20:50:24+00:00" - } - ] - }, - { - "functional_annotation_agg": [], - "library_preparation_set": [], - "processed_sample_set": [], - "extraction_set": [], - "activity_set": [], - "biosample_set": [], - "data_object_set": [ - { - "name": "Gp0127646_Filtered Reads", - "description": "Filtered Reads for Gp0127646", - "data_object_type": "Filtered Sequencing Reads", - "url": "https://data.microbiomedata.org/data/nmdc:mga0dm4q17/qa/nmdc_mga0dm4q17_filtered.fastq.gz", - "md5_checksum": "208a3777ef0b99408f0d5832dee576e0", - "id": "nmdc:208a3777ef0b99408f0d5832dee576e0", - "file_size_bytes": 2209739723 - }, - { - "name": "Gp0127646_Filtered Stats", - "description": "Filtered Stats for Gp0127646", - "data_object_type": "QC Statistics", - "url": "https://data.microbiomedata.org/data/nmdc:mga0dm4q17/qa/nmdc_mga0dm4q17_filterStats.txt", - "md5_checksum": "8533a56006bdc1841b6fc16e99b6a84a", - "id": "nmdc:8533a56006bdc1841b6fc16e99b6a84a", - "file_size_bytes": 291 - }, - { - "name": "Gp0127646_Gottcha2 TSV report", - "description": "Gottcha2 TSV report for Gp0127646", - "url": "https://data.microbiomedata.org/data/nmdc:mga0dm4q17/ReadbasedAnalysis/nmdc_mga0dm4q17_gottcha2_report.tsv", - "md5_checksum": "3e0598df41941463bac0fdec5df29f55", - "id": "nmdc:3e0598df41941463bac0fdec5df29f55", - "file_size_bytes": 4650 - }, - { - "name": "Gp0127646_Gottcha2 full TSV report", - "description": "Gottcha2 full TSV report for Gp0127646", - "url": "https://data.microbiomedata.org/data/nmdc:mga0dm4q17/ReadbasedAnalysis/nmdc_mga0dm4q17_gottcha2_report_full.tsv", - "md5_checksum": "1a625b148d8f6d9fe9aeab6cfb67df6c", - "id": "nmdc:1a625b148d8f6d9fe9aeab6cfb67df6c", - "file_size_bytes": 877659 - }, - { - "name": "Gp0127646_Gottcha2 Krona HTML report", - "description": "Gottcha2 Krona HTML report for Gp0127646", - "data_object_type": "GOTTCHA2 Krona Plot", - "url": "https://data.microbiomedata.org/data/nmdc:mga0dm4q17/ReadbasedAnalysis/nmdc_mga0dm4q17_gottcha2_krona.html", - "md5_checksum": "bc8e157195d042d7207d67b4982fea96", - "id": "nmdc:bc8e157195d042d7207d67b4982fea96", - "file_size_bytes": 236676 - }, - { - "name": "Gp0127646_Centrifuge classification TSV report", - "description": "Centrifuge classification TSV report for Gp0127646", - "data_object_type": "Centrifuge Taxonomic Classification", - "url": "https://data.microbiomedata.org/data/nmdc:mga0dm4q17/ReadbasedAnalysis/nmdc_mga0dm4q17_centrifuge_classification.tsv", - "md5_checksum": "a8fc683bb9b3aba316cb605c5fb591ec", - "id": "nmdc:a8fc683bb9b3aba316cb605c5fb591ec", - "file_size_bytes": 1901493736 - }, - { - "name": "Gp0127646_Centrifuge TSV report", - "description": "Centrifuge TSV report for Gp0127646", - "data_object_type": "Centrifuge Classification Report", - "url": "https://data.microbiomedata.org/data/nmdc:mga0dm4q17/ReadbasedAnalysis/nmdc_mga0dm4q17_centrifuge_report.tsv", - "md5_checksum": "b5fe0189dbf00662d78cc55b8b0cc803", - "id": "nmdc:b5fe0189dbf00662d78cc55b8b0cc803", - "file_size_bytes": 256274 - }, - { - "name": "Gp0127646_Centrifuge Krona HTML report", - "description": "Centrifuge Krona HTML report for Gp0127646", - "data_object_type": "Centrifuge Krona Plot", - "url": "https://data.microbiomedata.org/data/nmdc:mga0dm4q17/ReadbasedAnalysis/nmdc_mga0dm4q17_centrifuge_krona.html", - "md5_checksum": "cd10cca62774e66f60d60380ee18132e", - "id": "nmdc:cd10cca62774e66f60d60380ee18132e", - "file_size_bytes": 2333722 - }, - { - "name": "Gp0127646_Kraken classification TSV report", - "description": "Kraken classification TSV report for Gp0127646", - "data_object_type": "Kraken2 Taxonomic Classification", - "url": "https://data.microbiomedata.org/data/nmdc:mga0dm4q17/ReadbasedAnalysis/nmdc_mga0dm4q17_kraken2_classification.tsv", - "md5_checksum": "b13ee2ee52d15c3669aecd2e913f2658", - "id": "nmdc:b13ee2ee52d15c3669aecd2e913f2658", - "file_size_bytes": 1534616616 - }, + "date_created": null, + "etl_software_version": null, + "pooling_set": [] + }, + { + "functional_annotation_agg": [], + "library_preparation_set": [], + "processed_sample_set": [], + "extraction_set": [], + "activity_set": [], + "biosample_set": [], + "data_object_set": [ { - "name": "Gp0127646_Kraken2 TSV report", - "description": "Kraken2 TSV report for Gp0127646", - "data_object_type": "Kraken2 Classification Report", - "url": "https://data.microbiomedata.org/data/nmdc:mga0dm4q17/ReadbasedAnalysis/nmdc_mga0dm4q17_kraken2_report.tsv", - "md5_checksum": "09a2d722810b3d90207bc4cfa626133b", - "id": "nmdc:09a2d722810b3d90207bc4cfa626133b", - "file_size_bytes": 663507 + "name": "Gp0127646_Filtered Reads", + "description": "Filtered Reads for Gp0127646", + "data_object_type": "Filtered Sequencing Reads", + "url": "https://data.microbiomedata.org/data/nmdc:mga0dm4q17/qa/nmdc_mga0dm4q17_filtered.fastq.gz", + "md5_checksum": "208a3777ef0b99408f0d5832dee576e0", + "id": "nmdc:208a3777ef0b99408f0d5832dee576e0", + "file_size_bytes": 2209739723 }, { - "name": "Gp0127646_Kraken2 Krona HTML report", - "description": "Kraken2 Krona HTML report for Gp0127646", - "data_object_type": "Kraken2 Krona Plot", - "url": "https://data.microbiomedata.org/data/nmdc:mga0dm4q17/ReadbasedAnalysis/nmdc_mga0dm4q17_kraken2_krona.html", - "md5_checksum": "c3a8d9f48266a43ad74fc581132e2bba", - "id": "nmdc:c3a8d9f48266a43ad74fc581132e2bba", - "file_size_bytes": 4031909 + "name": "Gp0127646_Filtered Stats", + "description": "Filtered Stats for Gp0127646", + "data_object_type": "QC Statistics", + "url": "https://data.microbiomedata.org/data/nmdc:mga0dm4q17/qa/nmdc_mga0dm4q17_filterStats.txt", + "md5_checksum": "8533a56006bdc1841b6fc16e99b6a84a", + "id": "nmdc:8533a56006bdc1841b6fc16e99b6a84a", + "file_size_bytes": 291 }, { "name": "Gp0127646_Gottcha2 TSV report", @@ -27982,39 +23574,7 @@ "collecting_biosamples_from_site_set": [], "date_created": null, "etl_software_version": null, - "pooling_set": [], - "read_based_analysis_activity_set": [ - { - "_id": { - "$oid": "61e7197d833bcf838a7009e9" - }, - "has_input": [ - "nmdc:208a3777ef0b99408f0d5832dee576e0" - ], - "part_of": [ - "nmdc:mga0dm4q17" - ], - "git_url": "https://github.com/microbiomedata/metaG/releases/tag/0.1", - "has_output": [ - "nmdc:3e0598df41941463bac0fdec5df29f55", - "nmdc:1a625b148d8f6d9fe9aeab6cfb67df6c", - "nmdc:bc8e157195d042d7207d67b4982fea96", - "nmdc:a8fc683bb9b3aba316cb605c5fb591ec", - "nmdc:b5fe0189dbf00662d78cc55b8b0cc803", - "nmdc:cd10cca62774e66f60d60380ee18132e", - "nmdc:b13ee2ee52d15c3669aecd2e913f2658", - "nmdc:09a2d722810b3d90207bc4cfa626133b", - "nmdc:c3a8d9f48266a43ad74fc581132e2bba" - ], - "was_informed_by": "gold:Gp0127646", - "id": "nmdc:1a0a2a1b5db9455e9dbbacf102059180", - "execution_resource": "NERSC-Cori", - "name": "ReadBased Analysis Activity for nmdc:mga0dm4q17", - "started_at_time": "2021-10-11T02:23:42Z", - "type": "nmdc:ReadbasedAnalysis", - "ended_at_time": "2021-10-11T04:05:12+00:00" - } - ] + "pooling_set": [] }, { "functional_annotation_agg": [], @@ -28121,85 +23681,6 @@ "id": "nmdc:b549d169e5b0693152555373a6d8ee75", "file_size_bytes": 3952548 }, - { - "name": "Gp0127648_Gottcha2 TSV report", - "description": "Gottcha2 TSV report for Gp0127648", - "url": "https://data.microbiomedata.org/data/nmdc:mga0andh11/ReadbasedAnalysis/nmdc_mga0andh11_gottcha2_report.tsv", - "md5_checksum": "5e64b9ccf92f0c974c51bd8393dea50c", - "id": "nmdc:5e64b9ccf92f0c974c51bd8393dea50c", - "file_size_bytes": 3323 - }, - { - "name": "Gp0127648_Gottcha2 full TSV report", - "description": "Gottcha2 full TSV report for Gp0127648", - "url": "https://data.microbiomedata.org/data/nmdc:mga0andh11/ReadbasedAnalysis/nmdc_mga0andh11_gottcha2_report_full.tsv", - "md5_checksum": "1357df297d8d8a872b335e0c3222d102", - "id": "nmdc:1357df297d8d8a872b335e0c3222d102", - "file_size_bytes": 782039 - }, - { - "name": "Gp0127648_Gottcha2 Krona HTML report", - "description": "Gottcha2 Krona HTML report for Gp0127648", - "data_object_type": "GOTTCHA2 Krona Plot", - "url": "https://data.microbiomedata.org/data/nmdc:mga0andh11/ReadbasedAnalysis/nmdc_mga0andh11_gottcha2_krona.html", - "md5_checksum": "5b510e336e60b6120b43e9b6420a074e", - "id": "nmdc:5b510e336e60b6120b43e9b6420a074e", - "file_size_bytes": 236971 - }, - { - "name": "Gp0127648_Centrifuge classification TSV report", - "description": "Centrifuge classification TSV report for Gp0127648", - "data_object_type": "Centrifuge Taxonomic Classification", - "url": "https://data.microbiomedata.org/data/nmdc:mga0andh11/ReadbasedAnalysis/nmdc_mga0andh11_centrifuge_classification.tsv", - "md5_checksum": "33bf814280051c220e0c4a06f7935728", - "id": "nmdc:33bf814280051c220e0c4a06f7935728", - "file_size_bytes": 1945479328 - }, - { - "name": "Gp0127648_Centrifuge TSV report", - "description": "Centrifuge TSV report for Gp0127648", - "data_object_type": "Centrifuge Classification Report", - "url": "https://data.microbiomedata.org/data/nmdc:mga0andh11/ReadbasedAnalysis/nmdc_mga0andh11_centrifuge_report.tsv", - "md5_checksum": "e77a1d052b0d2a99e0a1df3b3c038f7c", - "id": "nmdc:e77a1d052b0d2a99e0a1df3b3c038f7c", - "file_size_bytes": 255338 - }, - { - "name": "Gp0127648_Centrifuge Krona HTML report", - "description": "Centrifuge Krona HTML report for Gp0127648", - "data_object_type": "Centrifuge Krona Plot", - "url": "https://data.microbiomedata.org/data/nmdc:mga0andh11/ReadbasedAnalysis/nmdc_mga0andh11_centrifuge_krona.html", - "md5_checksum": "0efb0ad19234056d7e2e3726dead3622", - "id": "nmdc:0efb0ad19234056d7e2e3726dead3622", - "file_size_bytes": 2333371 - }, - { - "name": "Gp0127648_Kraken classification TSV report", - "description": "Kraken classification TSV report for Gp0127648", - "data_object_type": "Kraken2 Taxonomic Classification", - "url": "https://data.microbiomedata.org/data/nmdc:mga0andh11/ReadbasedAnalysis/nmdc_mga0andh11_kraken2_classification.tsv", - "md5_checksum": "222bac312efdd6c86d2475ad224b7907", - "id": "nmdc:222bac312efdd6c86d2475ad224b7907", - "file_size_bytes": 1562011343 - }, - { - "name": "Gp0127648_Kraken2 TSV report", - "description": "Kraken2 TSV report for Gp0127648", - "data_object_type": "Kraken2 Classification Report", - "url": "https://data.microbiomedata.org/data/nmdc:mga0andh11/ReadbasedAnalysis/nmdc_mga0andh11_kraken2_report.tsv", - "md5_checksum": "baaca868b1fed932b463e489708dd741", - "id": "nmdc:baaca868b1fed932b463e489708dd741", - "file_size_bytes": 647859 - }, - { - "name": "Gp0127648_Kraken2 Krona HTML report", - "description": "Kraken2 Krona HTML report for Gp0127648", - "data_object_type": "Kraken2 Krona Plot", - "url": "https://data.microbiomedata.org/data/nmdc:mga0andh11/ReadbasedAnalysis/nmdc_mga0andh11_kraken2_krona.html", - "md5_checksum": "b549d169e5b0693152555373a6d8ee75", - "id": "nmdc:b549d169e5b0693152555373a6d8ee75", - "file_size_bytes": 3952548 - }, { "name": "Gp0127648_Assembled contigs fasta", "description": "Assembled contigs fasta for Gp0127648", @@ -28664,39 +24145,7 @@ "collecting_biosamples_from_site_set": [], "date_created": null, "etl_software_version": null, - "pooling_set": [], - "read_based_analysis_activity_set": [ - { - "_id": { - "$oid": "61e7199b833bcf838a700dd2" - }, - "has_input": [ - "nmdc:fcc3a92dd2b6ab6045f4be27da6f2cdd" - ], - "part_of": [ - "nmdc:mga0andh11" - ], - "git_url": "https://github.com/microbiomedata/metaG/releases/tag/0.1", - "has_output": [ - "nmdc:5e64b9ccf92f0c974c51bd8393dea50c", - "nmdc:1357df297d8d8a872b335e0c3222d102", - "nmdc:5b510e336e60b6120b43e9b6420a074e", - "nmdc:33bf814280051c220e0c4a06f7935728", - "nmdc:e77a1d052b0d2a99e0a1df3b3c038f7c", - "nmdc:0efb0ad19234056d7e2e3726dead3622", - "nmdc:222bac312efdd6c86d2475ad224b7907", - "nmdc:baaca868b1fed932b463e489708dd741", - "nmdc:b549d169e5b0693152555373a6d8ee75" - ], - "was_informed_by": "gold:Gp0127648", - "id": "nmdc:cb02cfd1451743d94381a247cf0a9d65", - "execution_resource": "NERSC-Cori", - "name": "ReadBased Analysis Activity for nmdc:mga0andh11", - "started_at_time": "2021-10-11T02:23:29Z", - "type": "nmdc:ReadbasedAnalysis", - "ended_at_time": "2021-10-11T04:13:04+00:00" - } - ] + "pooling_set": [] }, { "functional_annotation_agg": [], @@ -28705,103 +24154,24 @@ "extraction_set": [], "activity_set": [], "biosample_set": [], - "data_object_set": [ - { - "name": "Gp0127647_Filtered Reads", - "description": "Filtered Reads for Gp0127647", - "data_object_type": "Filtered Sequencing Reads", - "url": "https://data.microbiomedata.org/data/nmdc:mga0g0e588/qa/nmdc_mga0g0e588_filtered.fastq.gz", - "md5_checksum": "c082eff434fe4863c0e29c79b759d100", - "id": "nmdc:c082eff434fe4863c0e29c79b759d100", - "file_size_bytes": 2052448806 - }, - { - "name": "Gp0127647_Filtered Stats", - "description": "Filtered Stats for Gp0127647", - "data_object_type": "QC Statistics", - "url": "https://data.microbiomedata.org/data/nmdc:mga0g0e588/qa/nmdc_mga0g0e588_filterStats.txt", - "md5_checksum": "7f204d0d1d45e77b39d9c9b2362c6b0b", - "id": "nmdc:7f204d0d1d45e77b39d9c9b2362c6b0b", - "file_size_bytes": 282 - }, - { - "name": "Gp0127647_Gottcha2 TSV report", - "description": "Gottcha2 TSV report for Gp0127647", - "url": "https://data.microbiomedata.org/data/nmdc:mga0g0e588/ReadbasedAnalysis/nmdc_mga0g0e588_gottcha2_report.tsv", - "md5_checksum": "7e1438bf8076daf46f3d782d8f9656b4", - "id": "nmdc:7e1438bf8076daf46f3d782d8f9656b4", - "file_size_bytes": 4666 - }, - { - "name": "Gp0127647_Gottcha2 full TSV report", - "description": "Gottcha2 full TSV report for Gp0127647", - "url": "https://data.microbiomedata.org/data/nmdc:mga0g0e588/ReadbasedAnalysis/nmdc_mga0g0e588_gottcha2_report_full.tsv", - "md5_checksum": "cfd63309cd38a293615ddce5e8ea6402", - "id": "nmdc:cfd63309cd38a293615ddce5e8ea6402", - "file_size_bytes": 786018 - }, - { - "name": "Gp0127647_Gottcha2 Krona HTML report", - "description": "Gottcha2 Krona HTML report for Gp0127647", - "data_object_type": "GOTTCHA2 Krona Plot", - "url": "https://data.microbiomedata.org/data/nmdc:mga0g0e588/ReadbasedAnalysis/nmdc_mga0g0e588_gottcha2_krona.html", - "md5_checksum": "7e353b7bfb1586773fa00b515dffe6ec", - "id": "nmdc:7e353b7bfb1586773fa00b515dffe6ec", - "file_size_bytes": 237895 - }, - { - "name": "Gp0127647_Centrifuge classification TSV report", - "description": "Centrifuge classification TSV report for Gp0127647", - "data_object_type": "Centrifuge Taxonomic Classification", - "url": "https://data.microbiomedata.org/data/nmdc:mga0g0e588/ReadbasedAnalysis/nmdc_mga0g0e588_centrifuge_classification.tsv", - "md5_checksum": "6667be33e7867ca2aabfa5d663e2970a", - "id": "nmdc:6667be33e7867ca2aabfa5d663e2970a", - "file_size_bytes": 1767305277 - }, - { - "name": "Gp0127647_Centrifuge TSV report", - "description": "Centrifuge TSV report for Gp0127647", - "data_object_type": "Centrifuge Classification Report", - "url": "https://data.microbiomedata.org/data/nmdc:mga0g0e588/ReadbasedAnalysis/nmdc_mga0g0e588_centrifuge_report.tsv", - "md5_checksum": "7ee0b0b21444ee06752e6b9c32f476af", - "id": "nmdc:7ee0b0b21444ee06752e6b9c32f476af", - "file_size_bytes": 254858 - }, - { - "name": "Gp0127647_Centrifuge Krona HTML report", - "description": "Centrifuge Krona HTML report for Gp0127647", - "data_object_type": "Centrifuge Krona Plot", - "url": "https://data.microbiomedata.org/data/nmdc:mga0g0e588/ReadbasedAnalysis/nmdc_mga0g0e588_centrifuge_krona.html", - "md5_checksum": "d3b27bed597f07ad4bb4a500ad2fb928", - "id": "nmdc:d3b27bed597f07ad4bb4a500ad2fb928", - "file_size_bytes": 2332396 - }, - { - "name": "Gp0127647_Kraken classification TSV report", - "description": "Kraken classification TSV report for Gp0127647", - "data_object_type": "Kraken2 Taxonomic Classification", - "url": "https://data.microbiomedata.org/data/nmdc:mga0g0e588/ReadbasedAnalysis/nmdc_mga0g0e588_kraken2_classification.tsv", - "md5_checksum": "45617f93e5f072fbad25a0308ead6c3d", - "id": "nmdc:45617f93e5f072fbad25a0308ead6c3d", - "file_size_bytes": 1419938277 - }, + "data_object_set": [ { - "name": "Gp0127647_Kraken2 TSV report", - "description": "Kraken2 TSV report for Gp0127647", - "data_object_type": "Kraken2 Classification Report", - "url": "https://data.microbiomedata.org/data/nmdc:mga0g0e588/ReadbasedAnalysis/nmdc_mga0g0e588_kraken2_report.tsv", - "md5_checksum": "460e7594fcd06678df1b9c5e5075cb4d", - "id": "nmdc:460e7594fcd06678df1b9c5e5075cb4d", - "file_size_bytes": 661837 + "name": "Gp0127647_Filtered Reads", + "description": "Filtered Reads for Gp0127647", + "data_object_type": "Filtered Sequencing Reads", + "url": "https://data.microbiomedata.org/data/nmdc:mga0g0e588/qa/nmdc_mga0g0e588_filtered.fastq.gz", + "md5_checksum": "c082eff434fe4863c0e29c79b759d100", + "id": "nmdc:c082eff434fe4863c0e29c79b759d100", + "file_size_bytes": 2052448806 }, { - "name": "Gp0127647_Kraken2 Krona HTML report", - "description": "Kraken2 Krona HTML report for Gp0127647", - "data_object_type": "Kraken2 Krona Plot", - "url": "https://data.microbiomedata.org/data/nmdc:mga0g0e588/ReadbasedAnalysis/nmdc_mga0g0e588_kraken2_krona.html", - "md5_checksum": "ab80fc324c9206a41a66d64227a97179", - "id": "nmdc:ab80fc324c9206a41a66d64227a97179", - "file_size_bytes": 4028822 + "name": "Gp0127647_Filtered Stats", + "description": "Filtered Stats for Gp0127647", + "data_object_type": "QC Statistics", + "url": "https://data.microbiomedata.org/data/nmdc:mga0g0e588/qa/nmdc_mga0g0e588_filterStats.txt", + "md5_checksum": "7f204d0d1d45e77b39d9c9b2362c6b0b", + "id": "nmdc:7f204d0d1d45e77b39d9c9b2362c6b0b", + "file_size_bytes": 282 }, { "name": "Gp0127647_Gottcha2 TSV report", @@ -29330,39 +24700,7 @@ "collecting_biosamples_from_site_set": [], "date_created": null, "etl_software_version": null, - "pooling_set": [], - "read_based_analysis_activity_set": [ - { - "_id": { - "$oid": "61e7197e833bcf838a700a51" - }, - "has_input": [ - "nmdc:c082eff434fe4863c0e29c79b759d100" - ], - "part_of": [ - "nmdc:mga0g0e588" - ], - "git_url": "https://github.com/microbiomedata/metaG/releases/tag/0.1", - "has_output": [ - "nmdc:7e1438bf8076daf46f3d782d8f9656b4", - "nmdc:cfd63309cd38a293615ddce5e8ea6402", - "nmdc:7e353b7bfb1586773fa00b515dffe6ec", - "nmdc:6667be33e7867ca2aabfa5d663e2970a", - "nmdc:7ee0b0b21444ee06752e6b9c32f476af", - "nmdc:d3b27bed597f07ad4bb4a500ad2fb928", - "nmdc:45617f93e5f072fbad25a0308ead6c3d", - "nmdc:460e7594fcd06678df1b9c5e5075cb4d", - "nmdc:ab80fc324c9206a41a66d64227a97179" - ], - "was_informed_by": "gold:Gp0127647", - "id": "nmdc:36fa8157feb62e7d176b99031e5e41a9", - "execution_resource": "NERSC-Cori", - "name": "ReadBased Analysis Activity for nmdc:mga0g0e588", - "started_at_time": "2021-10-11T02:24:27Z", - "type": "nmdc:ReadbasedAnalysis", - "ended_at_time": "2021-10-11T03:38:33+00:00" - } - ] + "pooling_set": [] }, { "functional_annotation_agg": [], @@ -29469,85 +24807,6 @@ "id": "nmdc:f318581f0df6e04b7ae2384f9237da06", "file_size_bytes": 3973557 }, - { - "name": "Gp0127645_Gottcha2 TSV report", - "description": "Gottcha2 TSV report for Gp0127645", - "url": "https://data.microbiomedata.org/data/nmdc:mga0jbfx89/ReadbasedAnalysis/nmdc_mga0jbfx89_gottcha2_report.tsv", - "md5_checksum": "694374188ba4372344536fa26a2282b8", - "id": "nmdc:694374188ba4372344536fa26a2282b8", - "file_size_bytes": 3780 - }, - { - "name": "Gp0127645_Gottcha2 full TSV report", - "description": "Gottcha2 full TSV report for Gp0127645", - "url": "https://data.microbiomedata.org/data/nmdc:mga0jbfx89/ReadbasedAnalysis/nmdc_mga0jbfx89_gottcha2_report_full.tsv", - "md5_checksum": "e11dfa7178e8c426c7c930b57aa40377", - "id": "nmdc:e11dfa7178e8c426c7c930b57aa40377", - "file_size_bytes": 822292 - }, - { - "name": "Gp0127645_Gottcha2 Krona HTML report", - "description": "Gottcha2 Krona HTML report for Gp0127645", - "data_object_type": "GOTTCHA2 Krona Plot", - "url": "https://data.microbiomedata.org/data/nmdc:mga0jbfx89/ReadbasedAnalysis/nmdc_mga0jbfx89_gottcha2_krona.html", - "md5_checksum": "46e203465faf61780fad8f626e9ab623", - "id": "nmdc:46e203465faf61780fad8f626e9ab623", - "file_size_bytes": 236496 - }, - { - "name": "Gp0127645_Centrifuge classification TSV report", - "description": "Centrifuge classification TSV report for Gp0127645", - "data_object_type": "Centrifuge Taxonomic Classification", - "url": "https://data.microbiomedata.org/data/nmdc:mga0jbfx89/ReadbasedAnalysis/nmdc_mga0jbfx89_centrifuge_classification.tsv", - "md5_checksum": "7a6b2ded3f49663d9916eaea3e129dc7", - "id": "nmdc:7a6b2ded3f49663d9916eaea3e129dc7", - "file_size_bytes": 1699052782 - }, - { - "name": "Gp0127645_Centrifuge TSV report", - "description": "Centrifuge TSV report for Gp0127645", - "data_object_type": "Centrifuge Classification Report", - "url": "https://data.microbiomedata.org/data/nmdc:mga0jbfx89/ReadbasedAnalysis/nmdc_mga0jbfx89_centrifuge_report.tsv", - "md5_checksum": "6f8be89c7aab1c3f392b4f80c7ddf6a5", - "id": "nmdc:6f8be89c7aab1c3f392b4f80c7ddf6a5", - "file_size_bytes": 256209 - }, - { - "name": "Gp0127645_Centrifuge Krona HTML report", - "description": "Centrifuge Krona HTML report for Gp0127645", - "data_object_type": "Centrifuge Krona Plot", - "url": "https://data.microbiomedata.org/data/nmdc:mga0jbfx89/ReadbasedAnalysis/nmdc_mga0jbfx89_centrifuge_krona.html", - "md5_checksum": "4299b438a815becc8beed40fcb803e9f", - "id": "nmdc:4299b438a815becc8beed40fcb803e9f", - "file_size_bytes": 2336400 - }, - { - "name": "Gp0127645_Kraken classification TSV report", - "description": "Kraken classification TSV report for Gp0127645", - "data_object_type": "Kraken2 Taxonomic Classification", - "url": "https://data.microbiomedata.org/data/nmdc:mga0jbfx89/ReadbasedAnalysis/nmdc_mga0jbfx89_kraken2_classification.tsv", - "md5_checksum": "4ae4dbd13c7338df5c00555bc6755947", - "id": "nmdc:4ae4dbd13c7338df5c00555bc6755947", - "file_size_bytes": 1359323947 - }, - { - "name": "Gp0127645_Kraken2 TSV report", - "description": "Kraken2 TSV report for Gp0127645", - "data_object_type": "Kraken2 Classification Report", - "url": "https://data.microbiomedata.org/data/nmdc:mga0jbfx89/ReadbasedAnalysis/nmdc_mga0jbfx89_kraken2_report.tsv", - "md5_checksum": "2be07eb38d408077a55ecb48e123f7f8", - "id": "nmdc:2be07eb38d408077a55ecb48e123f7f8", - "file_size_bytes": 651624 - }, - { - "name": "Gp0127645_Kraken2 Krona HTML report", - "description": "Kraken2 Krona HTML report for Gp0127645", - "data_object_type": "Kraken2 Krona Plot", - "url": "https://data.microbiomedata.org/data/nmdc:mga0jbfx89/ReadbasedAnalysis/nmdc_mga0jbfx89_kraken2_krona.html", - "md5_checksum": "f318581f0df6e04b7ae2384f9237da06", - "id": "nmdc:f318581f0df6e04b7ae2384f9237da06", - "file_size_bytes": 3973557 - }, { "name": "Gp0127645_Assembled contigs fasta", "description": "Assembled contigs fasta for Gp0127645", @@ -29991,146 +25250,35 @@ "study_set": [], "field_research_site_set": [], "collecting_biosamples_from_site_set": [], - "date_created": null, - "etl_software_version": null, - "pooling_set": [], - "read_based_analysis_activity_set": [ - { - "_id": { - "$oid": "61e7197f833bcf838a700ac3" - }, - "has_input": [ - "nmdc:034df323b47f010f27e7c032d445a891" - ], - "part_of": [ - "nmdc:mga0jbfx89" - ], - "git_url": "https://github.com/microbiomedata/metaG/releases/tag/0.1", - "has_output": [ - "nmdc:694374188ba4372344536fa26a2282b8", - "nmdc:e11dfa7178e8c426c7c930b57aa40377", - "nmdc:46e203465faf61780fad8f626e9ab623", - "nmdc:7a6b2ded3f49663d9916eaea3e129dc7", - "nmdc:6f8be89c7aab1c3f392b4f80c7ddf6a5", - "nmdc:4299b438a815becc8beed40fcb803e9f", - "nmdc:4ae4dbd13c7338df5c00555bc6755947", - "nmdc:2be07eb38d408077a55ecb48e123f7f8", - "nmdc:f318581f0df6e04b7ae2384f9237da06" - ], - "was_informed_by": "gold:Gp0127645", - "id": "nmdc:f18f00c9c36689307a6b4188b7b18e32", - "execution_resource": "NERSC-Cori", - "name": "ReadBased Analysis Activity for nmdc:mga0jbfx89", - "started_at_time": "2021-10-11T02:24:42Z", - "type": "nmdc:ReadbasedAnalysis", - "ended_at_time": "2021-10-11T04:07:11+00:00" - } - ] - }, - { - "functional_annotation_agg": [], - "library_preparation_set": [], - "processed_sample_set": [], - "extraction_set": [], - "activity_set": [], - "biosample_set": [], - "data_object_set": [ - { - "name": "Gp0127649_Filtered Reads", - "description": "Filtered Reads for Gp0127649", - "data_object_type": "Filtered Sequencing Reads", - "url": "https://data.microbiomedata.org/data/nmdc:mga0j4fe07/qa/nmdc_mga0j4fe07_filtered.fastq.gz", - "md5_checksum": "ed0ea2f2ef6b667c5f8e60cd7d197cf5", - "id": "nmdc:ed0ea2f2ef6b667c5f8e60cd7d197cf5", - "file_size_bytes": 1967546513 - }, - { - "name": "Gp0127649_Filtered Stats", - "description": "Filtered Stats for Gp0127649", - "data_object_type": "QC Statistics", - "url": "https://data.microbiomedata.org/data/nmdc:mga0j4fe07/qa/nmdc_mga0j4fe07_filterStats.txt", - "md5_checksum": "25a7ff469ffae5906d6ade4d74cab88f", - "id": "nmdc:25a7ff469ffae5906d6ade4d74cab88f", - "file_size_bytes": 283 - }, - { - "name": "Gp0127649_Gottcha2 TSV report", - "description": "Gottcha2 TSV report for Gp0127649", - "url": "https://data.microbiomedata.org/data/nmdc:mga0j4fe07/ReadbasedAnalysis/nmdc_mga0j4fe07_gottcha2_report.tsv", - "md5_checksum": "c30cb5928ad608e7c8fe1ce77d81933a", - "id": "nmdc:c30cb5928ad608e7c8fe1ce77d81933a", - "file_size_bytes": 2079 - }, - { - "name": "Gp0127649_Gottcha2 full TSV report", - "description": "Gottcha2 full TSV report for Gp0127649", - "url": "https://data.microbiomedata.org/data/nmdc:mga0j4fe07/ReadbasedAnalysis/nmdc_mga0j4fe07_gottcha2_report_full.tsv", - "md5_checksum": "4aa159b1ee973c6e3e309ef60d351018", - "id": "nmdc:4aa159b1ee973c6e3e309ef60d351018", - "file_size_bytes": 642861 - }, - { - "name": "Gp0127649_Gottcha2 Krona HTML report", - "description": "Gottcha2 Krona HTML report for Gp0127649", - "data_object_type": "GOTTCHA2 Krona Plot", - "url": "https://data.microbiomedata.org/data/nmdc:mga0j4fe07/ReadbasedAnalysis/nmdc_mga0j4fe07_gottcha2_krona.html", - "md5_checksum": "8c1683fa4041bd10711aa3beb4735811", - "id": "nmdc:8c1683fa4041bd10711aa3beb4735811", - "file_size_bytes": 230792 - }, - { - "name": "Gp0127649_Centrifuge classification TSV report", - "description": "Centrifuge classification TSV report for Gp0127649", - "data_object_type": "Centrifuge Taxonomic Classification", - "url": "https://data.microbiomedata.org/data/nmdc:mga0j4fe07/ReadbasedAnalysis/nmdc_mga0j4fe07_centrifuge_classification.tsv", - "md5_checksum": "b8be7144441cbd6fbe4a8193f9e055ab", - "id": "nmdc:b8be7144441cbd6fbe4a8193f9e055ab", - "file_size_bytes": 1743695420 - }, - { - "name": "Gp0127649_Centrifuge TSV report", - "description": "Centrifuge TSV report for Gp0127649", - "data_object_type": "Centrifuge Classification Report", - "url": "https://data.microbiomedata.org/data/nmdc:mga0j4fe07/ReadbasedAnalysis/nmdc_mga0j4fe07_centrifuge_report.tsv", - "md5_checksum": "d4f57641e41f0249f3fde7b973289cf5", - "id": "nmdc:d4f57641e41f0249f3fde7b973289cf5", - "file_size_bytes": 254036 - }, - { - "name": "Gp0127649_Centrifuge Krona HTML report", - "description": "Centrifuge Krona HTML report for Gp0127649", - "data_object_type": "Centrifuge Krona Plot", - "url": "https://data.microbiomedata.org/data/nmdc:mga0j4fe07/ReadbasedAnalysis/nmdc_mga0j4fe07_centrifuge_krona.html", - "md5_checksum": "4e9ec619c5611cb0166ea127496fadeb", - "id": "nmdc:4e9ec619c5611cb0166ea127496fadeb", - "file_size_bytes": 2332943 - }, - { - "name": "Gp0127649_Kraken classification TSV report", - "description": "Kraken classification TSV report for Gp0127649", - "data_object_type": "Kraken2 Taxonomic Classification", - "url": "https://data.microbiomedata.org/data/nmdc:mga0j4fe07/ReadbasedAnalysis/nmdc_mga0j4fe07_kraken2_classification.tsv", - "md5_checksum": "ed2b2495ca211e17298ca2e212fe3811", - "id": "nmdc:ed2b2495ca211e17298ca2e212fe3811", - "file_size_bytes": 1387669799 - }, + "date_created": null, + "etl_software_version": null, + "pooling_set": [] + }, + { + "functional_annotation_agg": [], + "library_preparation_set": [], + "processed_sample_set": [], + "extraction_set": [], + "activity_set": [], + "biosample_set": [], + "data_object_set": [ { - "name": "Gp0127649_Kraken2 TSV report", - "description": "Kraken2 TSV report for Gp0127649", - "data_object_type": "Kraken2 Classification Report", - "url": "https://data.microbiomedata.org/data/nmdc:mga0j4fe07/ReadbasedAnalysis/nmdc_mga0j4fe07_kraken2_report.tsv", - "md5_checksum": "05d35fc4e391296ff0e716c3fcbbee89", - "id": "nmdc:05d35fc4e391296ff0e716c3fcbbee89", - "file_size_bytes": 637131 + "name": "Gp0127649_Filtered Reads", + "description": "Filtered Reads for Gp0127649", + "data_object_type": "Filtered Sequencing Reads", + "url": "https://data.microbiomedata.org/data/nmdc:mga0j4fe07/qa/nmdc_mga0j4fe07_filtered.fastq.gz", + "md5_checksum": "ed0ea2f2ef6b667c5f8e60cd7d197cf5", + "id": "nmdc:ed0ea2f2ef6b667c5f8e60cd7d197cf5", + "file_size_bytes": 1967546513 }, { - "name": "Gp0127649_Kraken2 Krona HTML report", - "description": "Kraken2 Krona HTML report for Gp0127649", - "data_object_type": "Kraken2 Krona Plot", - "url": "https://data.microbiomedata.org/data/nmdc:mga0j4fe07/ReadbasedAnalysis/nmdc_mga0j4fe07_kraken2_krona.html", - "md5_checksum": "0d07551972f3230ec2ef4a0e04929b97", - "id": "nmdc:0d07551972f3230ec2ef4a0e04929b97", - "file_size_bytes": 3976407 + "name": "Gp0127649_Filtered Stats", + "description": "Filtered Stats for Gp0127649", + "data_object_type": "QC Statistics", + "url": "https://data.microbiomedata.org/data/nmdc:mga0j4fe07/qa/nmdc_mga0j4fe07_filterStats.txt", + "md5_checksum": "25a7ff469ffae5906d6ade4d74cab88f", + "id": "nmdc:25a7ff469ffae5906d6ade4d74cab88f", + "file_size_bytes": 283 }, { "name": "Gp0127649_Gottcha2 TSV report", @@ -30675,39 +25823,7 @@ "collecting_biosamples_from_site_set": [], "date_created": null, "etl_software_version": null, - "pooling_set": [], - "read_based_analysis_activity_set": [ - { - "_id": { - "$oid": "61e719f8833bcf838a7018c7" - }, - "has_input": [ - "nmdc:ed0ea2f2ef6b667c5f8e60cd7d197cf5" - ], - "part_of": [ - "nmdc:mga0j4fe07" - ], - "git_url": "https://github.com/microbiomedata/metaG/releases/tag/0.1", - "has_output": [ - "nmdc:c30cb5928ad608e7c8fe1ce77d81933a", - "nmdc:4aa159b1ee973c6e3e309ef60d351018", - "nmdc:8c1683fa4041bd10711aa3beb4735811", - "nmdc:b8be7144441cbd6fbe4a8193f9e055ab", - "nmdc:d4f57641e41f0249f3fde7b973289cf5", - "nmdc:4e9ec619c5611cb0166ea127496fadeb", - "nmdc:ed2b2495ca211e17298ca2e212fe3811", - "nmdc:05d35fc4e391296ff0e716c3fcbbee89", - "nmdc:0d07551972f3230ec2ef4a0e04929b97" - ], - "was_informed_by": "gold:Gp0127649", - "id": "nmdc:74b40db9b9eef8519a02f49fe6034be5", - "execution_resource": "NERSC-Cori", - "name": "ReadBased Analysis Activity for nmdc:mga0j4fe07", - "started_at_time": "2021-10-11T02:23:29Z", - "type": "nmdc:ReadbasedAnalysis", - "ended_at_time": "2021-10-11T03:38:32+00:00" - } - ] + "pooling_set": [] }, { "functional_annotation_agg": [], @@ -30814,85 +25930,6 @@ "id": "nmdc:1df4b479c887b43319d89cc80dc35239", "file_size_bytes": 3991377 }, - { - "name": "Gp0127652_Gottcha2 TSV report", - "description": "Gottcha2 TSV report for Gp0127652", - "url": "https://data.microbiomedata.org/data/nmdc:mga0mfxf90/ReadbasedAnalysis/nmdc_mga0mfxf90_gottcha2_report.tsv", - "md5_checksum": "70f29a321c925cfc0e2003515f708400", - "id": "nmdc:70f29a321c925cfc0e2003515f708400", - "file_size_bytes": 1524 - }, - { - "name": "Gp0127652_Gottcha2 full TSV report", - "description": "Gottcha2 full TSV report for Gp0127652", - "url": "https://data.microbiomedata.org/data/nmdc:mga0mfxf90/ReadbasedAnalysis/nmdc_mga0mfxf90_gottcha2_report_full.tsv", - "md5_checksum": "93d5419c0b31e0696ab8ffef477945fb", - "id": "nmdc:93d5419c0b31e0696ab8ffef477945fb", - "file_size_bytes": 670250 - }, - { - "name": "Gp0127652_Gottcha2 Krona HTML report", - "description": "Gottcha2 Krona HTML report for Gp0127652", - "data_object_type": "GOTTCHA2 Krona Plot", - "url": "https://data.microbiomedata.org/data/nmdc:mga0mfxf90/ReadbasedAnalysis/nmdc_mga0mfxf90_gottcha2_krona.html", - "md5_checksum": "9cd3b2939adabd809741ae6a84260266", - "id": "nmdc:9cd3b2939adabd809741ae6a84260266", - "file_size_bytes": 229949 - }, - { - "name": "Gp0127652_Centrifuge classification TSV report", - "description": "Centrifuge classification TSV report for Gp0127652", - "data_object_type": "Centrifuge Taxonomic Classification", - "url": "https://data.microbiomedata.org/data/nmdc:mga0mfxf90/ReadbasedAnalysis/nmdc_mga0mfxf90_centrifuge_classification.tsv", - "md5_checksum": "acea91fced8993a40cf1eb9cda29c4cd", - "id": "nmdc:acea91fced8993a40cf1eb9cda29c4cd", - "file_size_bytes": 1814515284 - }, - { - "name": "Gp0127652_Centrifuge TSV report", - "description": "Centrifuge TSV report for Gp0127652", - "data_object_type": "Centrifuge Classification Report", - "url": "https://data.microbiomedata.org/data/nmdc:mga0mfxf90/ReadbasedAnalysis/nmdc_mga0mfxf90_centrifuge_report.tsv", - "md5_checksum": "b623a0d3bdff34fb97530c74bb558aeb", - "id": "nmdc:b623a0d3bdff34fb97530c74bb558aeb", - "file_size_bytes": 253730 - }, - { - "name": "Gp0127652_Centrifuge Krona HTML report", - "description": "Centrifuge Krona HTML report for Gp0127652", - "data_object_type": "Centrifuge Krona Plot", - "url": "https://data.microbiomedata.org/data/nmdc:mga0mfxf90/ReadbasedAnalysis/nmdc_mga0mfxf90_centrifuge_krona.html", - "md5_checksum": "e461b2e81a22514fcd691caeaa7b0ca1", - "id": "nmdc:e461b2e81a22514fcd691caeaa7b0ca1", - "file_size_bytes": 2330558 - }, - { - "name": "Gp0127652_Kraken classification TSV report", - "description": "Kraken classification TSV report for Gp0127652", - "data_object_type": "Kraken2 Taxonomic Classification", - "url": "https://data.microbiomedata.org/data/nmdc:mga0mfxf90/ReadbasedAnalysis/nmdc_mga0mfxf90_kraken2_classification.tsv", - "md5_checksum": "38b7c63d0157f8bf4316f4295f0e6e28", - "id": "nmdc:38b7c63d0157f8bf4316f4295f0e6e28", - "file_size_bytes": 1445957300 - }, - { - "name": "Gp0127652_Kraken2 TSV report", - "description": "Kraken2 TSV report for Gp0127652", - "data_object_type": "Kraken2 Classification Report", - "url": "https://data.microbiomedata.org/data/nmdc:mga0mfxf90/ReadbasedAnalysis/nmdc_mga0mfxf90_kraken2_report.tsv", - "md5_checksum": "be0c2bc71cefcb0f0a23d270d047f30c", - "id": "nmdc:be0c2bc71cefcb0f0a23d270d047f30c", - "file_size_bytes": 639677 - }, - { - "name": "Gp0127652_Kraken2 Krona HTML report", - "description": "Kraken2 Krona HTML report for Gp0127652", - "data_object_type": "Kraken2 Krona Plot", - "url": "https://data.microbiomedata.org/data/nmdc:mga0mfxf90/ReadbasedAnalysis/nmdc_mga0mfxf90_kraken2_krona.html", - "md5_checksum": "1df4b479c887b43319d89cc80dc35239", - "id": "nmdc:1df4b479c887b43319d89cc80dc35239", - "file_size_bytes": 3991377 - }, { "name": "Gp0127652_Assembled contigs fasta", "description": "Assembled contigs fasta for Gp0127652", @@ -31395,39 +26432,7 @@ "collecting_biosamples_from_site_set": [], "date_created": null, "etl_software_version": null, - "pooling_set": [], - "read_based_analysis_activity_set": [ - { - "_id": { - "$oid": "61e719de833bcf838a7015b2" - }, - "has_input": [ - "nmdc:60f03b815160b29125c2bd0776a330bf" - ], - "part_of": [ - "nmdc:mga0mfxf90" - ], - "git_url": "https://github.com/microbiomedata/metaG/releases/tag/0.1", - "has_output": [ - "nmdc:70f29a321c925cfc0e2003515f708400", - "nmdc:93d5419c0b31e0696ab8ffef477945fb", - "nmdc:9cd3b2939adabd809741ae6a84260266", - "nmdc:acea91fced8993a40cf1eb9cda29c4cd", - "nmdc:b623a0d3bdff34fb97530c74bb558aeb", - "nmdc:e461b2e81a22514fcd691caeaa7b0ca1", - "nmdc:38b7c63d0157f8bf4316f4295f0e6e28", - "nmdc:be0c2bc71cefcb0f0a23d270d047f30c", - "nmdc:1df4b479c887b43319d89cc80dc35239" - ], - "was_informed_by": "gold:Gp0127652", - "id": "nmdc:c86126b11f214f19721c56fadf91d87c", - "execution_resource": "NERSC-Cori", - "name": "ReadBased Analysis Activity for nmdc:mga0mfxf90", - "started_at_time": "2021-10-11T02:27:08Z", - "type": "nmdc:ReadbasedAnalysis", - "ended_at_time": "2021-10-11T04:45:21+00:00" - } - ] + "pooling_set": [] }, { "functional_annotation_agg": [], @@ -31436,103 +26441,24 @@ "extraction_set": [], "activity_set": [], "biosample_set": [], - "data_object_set": [ - { - "name": "Gp0127654_Filtered Reads", - "description": "Filtered Reads for Gp0127654", - "data_object_type": "Filtered Sequencing Reads", - "url": "https://data.microbiomedata.org/data/nmdc:mga0h0s362/qa/nmdc_mga0h0s362_filtered.fastq.gz", - "md5_checksum": "c4f29a07f3ce03ee2a2d11c90e8b43d6", - "id": "nmdc:c4f29a07f3ce03ee2a2d11c90e8b43d6", - "file_size_bytes": 2479437709 - }, - { - "name": "Gp0127654_Filtered Stats", - "description": "Filtered Stats for Gp0127654", - "data_object_type": "QC Statistics", - "url": "https://data.microbiomedata.org/data/nmdc:mga0h0s362/qa/nmdc_mga0h0s362_filterStats.txt", - "md5_checksum": "9c600ec3be94d876f00d22808f3e8a59", - "id": "nmdc:9c600ec3be94d876f00d22808f3e8a59", - "file_size_bytes": 284 - }, - { - "name": "Gp0127654_Gottcha2 TSV report", - "description": "Gottcha2 TSV report for Gp0127654", - "url": "https://data.microbiomedata.org/data/nmdc:mga0h0s362/ReadbasedAnalysis/nmdc_mga0h0s362_gottcha2_report.tsv", - "md5_checksum": "130ee7559789726a2cadccd3126dacad", - "id": "nmdc:130ee7559789726a2cadccd3126dacad", - "file_size_bytes": 3508 - }, - { - "name": "Gp0127654_Gottcha2 full TSV report", - "description": "Gottcha2 full TSV report for Gp0127654", - "url": "https://data.microbiomedata.org/data/nmdc:mga0h0s362/ReadbasedAnalysis/nmdc_mga0h0s362_gottcha2_report_full.tsv", - "md5_checksum": "c955eae73afbfe1ad4c4eb2eac51f3f3", - "id": "nmdc:c955eae73afbfe1ad4c4eb2eac51f3f3", - "file_size_bytes": 798264 - }, - { - "name": "Gp0127654_Gottcha2 Krona HTML report", - "description": "Gottcha2 Krona HTML report for Gp0127654", - "data_object_type": "GOTTCHA2 Krona Plot", - "url": "https://data.microbiomedata.org/data/nmdc:mga0h0s362/ReadbasedAnalysis/nmdc_mga0h0s362_gottcha2_krona.html", - "md5_checksum": "7ccb4ee5a0728322154b29a79d13c842", - "id": "nmdc:7ccb4ee5a0728322154b29a79d13c842", - "file_size_bytes": 234834 - }, - { - "name": "Gp0127654_Centrifuge classification TSV report", - "description": "Centrifuge classification TSV report for Gp0127654", - "data_object_type": "Centrifuge Taxonomic Classification", - "url": "https://data.microbiomedata.org/data/nmdc:mga0h0s362/ReadbasedAnalysis/nmdc_mga0h0s362_centrifuge_classification.tsv", - "md5_checksum": "8b88e19f3d4f22c8bb71f66e7aec6dba", - "id": "nmdc:8b88e19f3d4f22c8bb71f66e7aec6dba", - "file_size_bytes": 2231971137 - }, - { - "name": "Gp0127654_Centrifuge TSV report", - "description": "Centrifuge TSV report for Gp0127654", - "data_object_type": "Centrifuge Classification Report", - "url": "https://data.microbiomedata.org/data/nmdc:mga0h0s362/ReadbasedAnalysis/nmdc_mga0h0s362_centrifuge_report.tsv", - "md5_checksum": "35a0d72edac6c5e7f9c8ddf86c5534e0", - "id": "nmdc:35a0d72edac6c5e7f9c8ddf86c5534e0", - "file_size_bytes": 257151 - }, - { - "name": "Gp0127654_Centrifuge Krona HTML report", - "description": "Centrifuge Krona HTML report for Gp0127654", - "data_object_type": "Centrifuge Krona Plot", - "url": "https://data.microbiomedata.org/data/nmdc:mga0h0s362/ReadbasedAnalysis/nmdc_mga0h0s362_centrifuge_krona.html", - "md5_checksum": "f808a89810cdb2a911a5b5388b70ce94", - "id": "nmdc:f808a89810cdb2a911a5b5388b70ce94", - "file_size_bytes": 2341088 - }, - { - "name": "Gp0127654_Kraken classification TSV report", - "description": "Kraken classification TSV report for Gp0127654", - "data_object_type": "Kraken2 Taxonomic Classification", - "url": "https://data.microbiomedata.org/data/nmdc:mga0h0s362/ReadbasedAnalysis/nmdc_mga0h0s362_kraken2_classification.tsv", - "md5_checksum": "dfc90170aa038c2425702be223cb2f23", - "id": "nmdc:dfc90170aa038c2425702be223cb2f23", - "file_size_bytes": 1782429285 - }, + "data_object_set": [ { - "name": "Gp0127654_Kraken2 TSV report", - "description": "Kraken2 TSV report for Gp0127654", - "data_object_type": "Kraken2 Classification Report", - "url": "https://data.microbiomedata.org/data/nmdc:mga0h0s362/ReadbasedAnalysis/nmdc_mga0h0s362_kraken2_report.tsv", - "md5_checksum": "84255d3bab9ea79151db5ad7bcbc677c", - "id": "nmdc:84255d3bab9ea79151db5ad7bcbc677c", - "file_size_bytes": 661482 + "name": "Gp0127654_Filtered Reads", + "description": "Filtered Reads for Gp0127654", + "data_object_type": "Filtered Sequencing Reads", + "url": "https://data.microbiomedata.org/data/nmdc:mga0h0s362/qa/nmdc_mga0h0s362_filtered.fastq.gz", + "md5_checksum": "c4f29a07f3ce03ee2a2d11c90e8b43d6", + "id": "nmdc:c4f29a07f3ce03ee2a2d11c90e8b43d6", + "file_size_bytes": 2479437709 }, { - "name": "Gp0127654_Kraken2 Krona HTML report", - "description": "Kraken2 Krona HTML report for Gp0127654", - "data_object_type": "Kraken2 Krona Plot", - "url": "https://data.microbiomedata.org/data/nmdc:mga0h0s362/ReadbasedAnalysis/nmdc_mga0h0s362_kraken2_krona.html", - "md5_checksum": "1c8339d96884c4a408de7804e00490d1", - "id": "nmdc:1c8339d96884c4a408de7804e00490d1", - "file_size_bytes": 4020719 + "name": "Gp0127654_Filtered Stats", + "description": "Filtered Stats for Gp0127654", + "data_object_type": "QC Statistics", + "url": "https://data.microbiomedata.org/data/nmdc:mga0h0s362/qa/nmdc_mga0h0s362_filterStats.txt", + "md5_checksum": "9c600ec3be94d876f00d22808f3e8a59", + "id": "nmdc:9c600ec3be94d876f00d22808f3e8a59", + "file_size_bytes": 284 }, { "name": "Gp0127654_Gottcha2 TSV report", @@ -32061,39 +26987,7 @@ "collecting_biosamples_from_site_set": [], "date_created": null, "etl_software_version": null, - "pooling_set": [], - "read_based_analysis_activity_set": [ - { - "_id": { - "$oid": "61e719f6833bcf838a7017f0" - }, - "has_input": [ - "nmdc:c4f29a07f3ce03ee2a2d11c90e8b43d6" - ], - "part_of": [ - "nmdc:mga0h0s362" - ], - "git_url": "https://github.com/microbiomedata/metaG/releases/tag/0.1", - "has_output": [ - "nmdc:130ee7559789726a2cadccd3126dacad", - "nmdc:c955eae73afbfe1ad4c4eb2eac51f3f3", - "nmdc:7ccb4ee5a0728322154b29a79d13c842", - "nmdc:8b88e19f3d4f22c8bb71f66e7aec6dba", - "nmdc:35a0d72edac6c5e7f9c8ddf86c5534e0", - "nmdc:f808a89810cdb2a911a5b5388b70ce94", - "nmdc:dfc90170aa038c2425702be223cb2f23", - "nmdc:84255d3bab9ea79151db5ad7bcbc677c", - "nmdc:1c8339d96884c4a408de7804e00490d1" - ], - "was_informed_by": "gold:Gp0127654", - "id": "nmdc:168441535388b19bbdee0928b42e5b20", - "execution_resource": "NERSC-Cori", - "name": "ReadBased Analysis Activity for nmdc:mga0h0s362", - "started_at_time": "2021-10-11T02:23:29Z", - "type": "nmdc:ReadbasedAnalysis", - "ended_at_time": "2021-10-11T03:58:56+00:00" - } - ] + "pooling_set": [] }, { "functional_annotation_agg": [], @@ -32200,85 +27094,6 @@ "id": "nmdc:ae369194e4b24e137fc23da0412277a6", "file_size_bytes": 3939982 }, - { - "name": "Gp0127656_Gottcha2 TSV report", - "description": "Gottcha2 TSV report for Gp0127656", - "url": "https://data.microbiomedata.org/data/nmdc:mga00hh562/ReadbasedAnalysis/nmdc_mga00hh562_gottcha2_report.tsv", - "md5_checksum": "ccbe419157d8286626330fd0eb0dd0e0", - "id": "nmdc:ccbe419157d8286626330fd0eb0dd0e0", - "file_size_bytes": 2418 - }, - { - "name": "Gp0127656_Gottcha2 full TSV report", - "description": "Gottcha2 full TSV report for Gp0127656", - "url": "https://data.microbiomedata.org/data/nmdc:mga00hh562/ReadbasedAnalysis/nmdc_mga00hh562_gottcha2_report_full.tsv", - "md5_checksum": "92ab65cdaca3367552e03d895123e04f", - "id": "nmdc:92ab65cdaca3367552e03d895123e04f", - "file_size_bytes": 759212 - }, - { - "name": "Gp0127656_Gottcha2 Krona HTML report", - "description": "Gottcha2 Krona HTML report for Gp0127656", - "data_object_type": "GOTTCHA2 Krona Plot", - "url": "https://data.microbiomedata.org/data/nmdc:mga00hh562/ReadbasedAnalysis/nmdc_mga00hh562_gottcha2_krona.html", - "md5_checksum": "0b3ff6503723d6ea9b84552f68ed4270", - "id": "nmdc:0b3ff6503723d6ea9b84552f68ed4270", - "file_size_bytes": 231563 - }, - { - "name": "Gp0127656_Centrifuge classification TSV report", - "description": "Centrifuge classification TSV report for Gp0127656", - "data_object_type": "Centrifuge Taxonomic Classification", - "url": "https://data.microbiomedata.org/data/nmdc:mga00hh562/ReadbasedAnalysis/nmdc_mga00hh562_centrifuge_classification.tsv", - "md5_checksum": "8e5ad12b7fa8873463088d7bf361f7c5", - "id": "nmdc:8e5ad12b7fa8873463088d7bf361f7c5", - "file_size_bytes": 1950007455 - }, - { - "name": "Gp0127656_Centrifuge TSV report", - "description": "Centrifuge TSV report for Gp0127656", - "data_object_type": "Centrifuge Classification Report", - "url": "https://data.microbiomedata.org/data/nmdc:mga00hh562/ReadbasedAnalysis/nmdc_mga00hh562_centrifuge_report.tsv", - "md5_checksum": "a3255df52cd6150f03bbf7cbd655ec76", - "id": "nmdc:a3255df52cd6150f03bbf7cbd655ec76", - "file_size_bytes": 255724 - }, - { - "name": "Gp0127656_Centrifuge Krona HTML report", - "description": "Centrifuge Krona HTML report for Gp0127656", - "data_object_type": "Centrifuge Krona Plot", - "url": "https://data.microbiomedata.org/data/nmdc:mga00hh562/ReadbasedAnalysis/nmdc_mga00hh562_centrifuge_krona.html", - "md5_checksum": "a25a5d7e399624e5e5735b65a9dd322a", - "id": "nmdc:a25a5d7e399624e5e5735b65a9dd322a", - "file_size_bytes": 2337553 - }, - { - "name": "Gp0127656_Kraken classification TSV report", - "description": "Kraken classification TSV report for Gp0127656", - "data_object_type": "Kraken2 Taxonomic Classification", - "url": "https://data.microbiomedata.org/data/nmdc:mga00hh562/ReadbasedAnalysis/nmdc_mga00hh562_kraken2_classification.tsv", - "md5_checksum": "dd953aebfd5cf624a5ffa8c6d6b64b08", - "id": "nmdc:dd953aebfd5cf624a5ffa8c6d6b64b08", - "file_size_bytes": 1555636513 - }, - { - "name": "Gp0127656_Kraken2 TSV report", - "description": "Kraken2 TSV report for Gp0127656", - "data_object_type": "Kraken2 Classification Report", - "url": "https://data.microbiomedata.org/data/nmdc:mga00hh562/ReadbasedAnalysis/nmdc_mga00hh562_kraken2_report.tsv", - "md5_checksum": "96f47f6cd2350fb1c7c7b746d2e9d811", - "id": "nmdc:96f47f6cd2350fb1c7c7b746d2e9d811", - "file_size_bytes": 647090 - }, - { - "name": "Gp0127656_Kraken2 Krona HTML report", - "description": "Kraken2 Krona HTML report for Gp0127656", - "data_object_type": "Kraken2 Krona Plot", - "url": "https://data.microbiomedata.org/data/nmdc:mga00hh562/ReadbasedAnalysis/nmdc_mga00hh562_kraken2_krona.html", - "md5_checksum": "ae369194e4b24e137fc23da0412277a6", - "id": "nmdc:ae369194e4b24e137fc23da0412277a6", - "file_size_bytes": 3939982 - }, { "name": "Gp0127656_Assembled contigs fasta", "description": "Assembled contigs fasta for Gp0127656", @@ -32724,39 +27539,7 @@ "collecting_biosamples_from_site_set": [], "date_created": null, "etl_software_version": null, - "pooling_set": [], - "read_based_analysis_activity_set": [ - { - "_id": { - "$oid": "61e719f0833bcf838a701752" - }, - "has_input": [ - "nmdc:cec95659bb04ae095f51821ddaa9fa59" - ], - "part_of": [ - "nmdc:mga00hh562" - ], - "git_url": "https://github.com/microbiomedata/metaG/releases/tag/0.1", - "has_output": [ - "nmdc:ccbe419157d8286626330fd0eb0dd0e0", - "nmdc:92ab65cdaca3367552e03d895123e04f", - "nmdc:0b3ff6503723d6ea9b84552f68ed4270", - "nmdc:8e5ad12b7fa8873463088d7bf361f7c5", - "nmdc:a3255df52cd6150f03bbf7cbd655ec76", - "nmdc:a25a5d7e399624e5e5735b65a9dd322a", - "nmdc:dd953aebfd5cf624a5ffa8c6d6b64b08", - "nmdc:96f47f6cd2350fb1c7c7b746d2e9d811", - "nmdc:ae369194e4b24e137fc23da0412277a6" - ], - "was_informed_by": "gold:Gp0127656", - "id": "nmdc:8e2d8da1d05b292a52a33732a6bc4391", - "execution_resource": "NERSC-Cori", - "name": "ReadBased Analysis Activity for nmdc:mga00hh562", - "started_at_time": "2021-10-11T02:23:35Z", - "type": "nmdc:ReadbasedAnalysis", - "ended_at_time": "2021-10-11T03:58:56+00:00" - } - ] + "pooling_set": [] }, { "functional_annotation_agg": [], @@ -32772,96 +27555,17 @@ "data_object_type": "Filtered Sequencing Reads", "url": "https://data.microbiomedata.org/data/nmdc:mga08hnt47/qa/nmdc_mga08hnt47_filtered.fastq.gz", "md5_checksum": "2791a196017767af3b5b21a3029799c0", - "id": "nmdc:2791a196017767af3b5b21a3029799c0", - "file_size_bytes": 1856919615 - }, - { - "name": "Gp0127651_Filtered Stats", - "description": "Filtered Stats for Gp0127651", - "data_object_type": "QC Statistics", - "url": "https://data.microbiomedata.org/data/nmdc:mga08hnt47/qa/nmdc_mga08hnt47_filterStats.txt", - "md5_checksum": "92cb49efbff5d5977e00dbad1c4d0d9f", - "id": "nmdc:92cb49efbff5d5977e00dbad1c4d0d9f", - "file_size_bytes": 283 - }, - { - "name": "Gp0127651_Gottcha2 TSV report", - "description": "Gottcha2 TSV report for Gp0127651", - "url": "https://data.microbiomedata.org/data/nmdc:mga08hnt47/ReadbasedAnalysis/nmdc_mga08hnt47_gottcha2_report.tsv", - "md5_checksum": "53ee263960c39126e039656a121deb96", - "id": "nmdc:53ee263960c39126e039656a121deb96", - "file_size_bytes": 1199 - }, - { - "name": "Gp0127651_Gottcha2 full TSV report", - "description": "Gottcha2 full TSV report for Gp0127651", - "url": "https://data.microbiomedata.org/data/nmdc:mga08hnt47/ReadbasedAnalysis/nmdc_mga08hnt47_gottcha2_report_full.tsv", - "md5_checksum": "2781b9269b8e24f49a1a301d44d0e3d5", - "id": "nmdc:2781b9269b8e24f49a1a301d44d0e3d5", - "file_size_bytes": 703299 - }, - { - "name": "Gp0127651_Gottcha2 Krona HTML report", - "description": "Gottcha2 Krona HTML report for Gp0127651", - "data_object_type": "GOTTCHA2 Krona Plot", - "url": "https://data.microbiomedata.org/data/nmdc:mga08hnt47/ReadbasedAnalysis/nmdc_mga08hnt47_gottcha2_krona.html", - "md5_checksum": "0ed808b8ce29d39c3b555e7d5bf4c274", - "id": "nmdc:0ed808b8ce29d39c3b555e7d5bf4c274", - "file_size_bytes": 229311 - }, - { - "name": "Gp0127651_Centrifuge classification TSV report", - "description": "Centrifuge classification TSV report for Gp0127651", - "data_object_type": "Centrifuge Taxonomic Classification", - "url": "https://data.microbiomedata.org/data/nmdc:mga08hnt47/ReadbasedAnalysis/nmdc_mga08hnt47_centrifuge_classification.tsv", - "md5_checksum": "a7d8f038b87bd28843e30c5dd115704b", - "id": "nmdc:a7d8f038b87bd28843e30c5dd115704b", - "file_size_bytes": 1642196063 - }, - { - "name": "Gp0127651_Centrifuge TSV report", - "description": "Centrifuge TSV report for Gp0127651", - "data_object_type": "Centrifuge Classification Report", - "url": "https://data.microbiomedata.org/data/nmdc:mga08hnt47/ReadbasedAnalysis/nmdc_mga08hnt47_centrifuge_report.tsv", - "md5_checksum": "b4cbc81c986c67c1037c8b7280924683", - "id": "nmdc:b4cbc81c986c67c1037c8b7280924683", - "file_size_bytes": 254418 - }, - { - "name": "Gp0127651_Centrifuge Krona HTML report", - "description": "Centrifuge Krona HTML report for Gp0127651", - "data_object_type": "Centrifuge Krona Plot", - "url": "https://data.microbiomedata.org/data/nmdc:mga08hnt47/ReadbasedAnalysis/nmdc_mga08hnt47_centrifuge_krona.html", - "md5_checksum": "e0c61a191258597984a05d86eaf4d71f", - "id": "nmdc:e0c61a191258597984a05d86eaf4d71f", - "file_size_bytes": 2333132 - }, - { - "name": "Gp0127651_Kraken classification TSV report", - "description": "Kraken classification TSV report for Gp0127651", - "data_object_type": "Kraken2 Taxonomic Classification", - "url": "https://data.microbiomedata.org/data/nmdc:mga08hnt47/ReadbasedAnalysis/nmdc_mga08hnt47_kraken2_classification.tsv", - "md5_checksum": "e1cbcfa86444a4ff4e992bcb6653d18f", - "id": "nmdc:e1cbcfa86444a4ff4e992bcb6653d18f", - "file_size_bytes": 1309125719 - }, - { - "name": "Gp0127651_Kraken2 TSV report", - "description": "Kraken2 TSV report for Gp0127651", - "data_object_type": "Kraken2 Classification Report", - "url": "https://data.microbiomedata.org/data/nmdc:mga08hnt47/ReadbasedAnalysis/nmdc_mga08hnt47_kraken2_report.tsv", - "md5_checksum": "d2e10038a40e81e81ba94f75ed1ec52c", - "id": "nmdc:d2e10038a40e81e81ba94f75ed1ec52c", - "file_size_bytes": 639737 - }, - { - "name": "Gp0127651_Kraken2 Krona HTML report", - "description": "Kraken2 Krona HTML report for Gp0127651", - "data_object_type": "Kraken2 Krona Plot", - "url": "https://data.microbiomedata.org/data/nmdc:mga08hnt47/ReadbasedAnalysis/nmdc_mga08hnt47_kraken2_krona.html", - "md5_checksum": "ddba84cd45462d3a55df4ac62bb4eeb8", - "id": "nmdc:ddba84cd45462d3a55df4ac62bb4eeb8", - "file_size_bytes": 3988966 + "id": "nmdc:2791a196017767af3b5b21a3029799c0", + "file_size_bytes": 1856919615 + }, + { + "name": "Gp0127651_Filtered Stats", + "description": "Filtered Stats for Gp0127651", + "data_object_type": "QC Statistics", + "url": "https://data.microbiomedata.org/data/nmdc:mga08hnt47/qa/nmdc_mga08hnt47_filterStats.txt", + "md5_checksum": "92cb49efbff5d5977e00dbad1c4d0d9f", + "id": "nmdc:92cb49efbff5d5977e00dbad1c4d0d9f", + "file_size_bytes": 283 }, { "name": "Gp0127651_Gottcha2 TSV report", @@ -33387,39 +28091,7 @@ "collecting_biosamples_from_site_set": [], "date_created": null, "etl_software_version": null, - "pooling_set": [], - "read_based_analysis_activity_set": [ - { - "_id": { - "$oid": "61e719ba833bcf838a7012b5" - }, - "has_input": [ - "nmdc:2791a196017767af3b5b21a3029799c0" - ], - "part_of": [ - "nmdc:mga08hnt47" - ], - "git_url": "https://github.com/microbiomedata/metaG/releases/tag/0.1", - "has_output": [ - "nmdc:53ee263960c39126e039656a121deb96", - "nmdc:2781b9269b8e24f49a1a301d44d0e3d5", - "nmdc:0ed808b8ce29d39c3b555e7d5bf4c274", - "nmdc:a7d8f038b87bd28843e30c5dd115704b", - "nmdc:b4cbc81c986c67c1037c8b7280924683", - "nmdc:e0c61a191258597984a05d86eaf4d71f", - "nmdc:e1cbcfa86444a4ff4e992bcb6653d18f", - "nmdc:d2e10038a40e81e81ba94f75ed1ec52c", - "nmdc:ddba84cd45462d3a55df4ac62bb4eeb8" - ], - "was_informed_by": "gold:Gp0127651", - "id": "nmdc:158a1d28cb7b14fdffb0f092a644b0f6", - "execution_resource": "NERSC-Cori", - "name": "ReadBased Analysis Activity for nmdc:mga08hnt47", - "started_at_time": "2021-10-11T02:27:15Z", - "type": "nmdc:ReadbasedAnalysis", - "ended_at_time": "2021-10-11T03:57:48+00:00" - } - ] + "pooling_set": [] }, { "functional_annotation_agg": [], @@ -33526,85 +28198,6 @@ "id": "nmdc:157f7672690ba8207808cc4386ff10a4", "file_size_bytes": 3946317 }, - { - "name": "Gp0127655_Gottcha2 TSV report", - "description": "Gottcha2 TSV report for Gp0127655", - "url": "https://data.microbiomedata.org/data/nmdc:mga0317978/ReadbasedAnalysis/nmdc_mga0317978_gottcha2_report.tsv", - "md5_checksum": "46371c7bc8259e459f975f915aaac26f", - "id": "nmdc:46371c7bc8259e459f975f915aaac26f", - "file_size_bytes": 2178 - }, - { - "name": "Gp0127655_Gottcha2 full TSV report", - "description": "Gottcha2 full TSV report for Gp0127655", - "url": "https://data.microbiomedata.org/data/nmdc:mga0317978/ReadbasedAnalysis/nmdc_mga0317978_gottcha2_report_full.tsv", - "md5_checksum": "5dd9bc51105920f3f629e8106235af3b", - "id": "nmdc:5dd9bc51105920f3f629e8106235af3b", - "file_size_bytes": 697690 - }, - { - "name": "Gp0127655_Gottcha2 Krona HTML report", - "description": "Gottcha2 Krona HTML report for Gp0127655", - "data_object_type": "GOTTCHA2 Krona Plot", - "url": "https://data.microbiomedata.org/data/nmdc:mga0317978/ReadbasedAnalysis/nmdc_mga0317978_gottcha2_krona.html", - "md5_checksum": "1879e0e9af6d568ac9c7ffdb47fc7f12", - "id": "nmdc:1879e0e9af6d568ac9c7ffdb47fc7f12", - "file_size_bytes": 231103 - }, - { - "name": "Gp0127655_Centrifuge classification TSV report", - "description": "Centrifuge classification TSV report for Gp0127655", - "data_object_type": "Centrifuge Taxonomic Classification", - "url": "https://data.microbiomedata.org/data/nmdc:mga0317978/ReadbasedAnalysis/nmdc_mga0317978_centrifuge_classification.tsv", - "md5_checksum": "e3f410adc2347396abfdec2a848000d9", - "id": "nmdc:e3f410adc2347396abfdec2a848000d9", - "file_size_bytes": 1676897166 - }, - { - "name": "Gp0127655_Centrifuge TSV report", - "description": "Centrifuge TSV report for Gp0127655", - "data_object_type": "Centrifuge Classification Report", - "url": "https://data.microbiomedata.org/data/nmdc:mga0317978/ReadbasedAnalysis/nmdc_mga0317978_centrifuge_report.tsv", - "md5_checksum": "ed6c4f17d6ae759487164ca8ed5edf45", - "id": "nmdc:ed6c4f17d6ae759487164ca8ed5edf45", - "file_size_bytes": 253692 - }, - { - "name": "Gp0127655_Centrifuge Krona HTML report", - "description": "Centrifuge Krona HTML report for Gp0127655", - "data_object_type": "Centrifuge Krona Plot", - "url": "https://data.microbiomedata.org/data/nmdc:mga0317978/ReadbasedAnalysis/nmdc_mga0317978_centrifuge_krona.html", - "md5_checksum": "6d54f73f251de1bd5c4ca8665f098ac0", - "id": "nmdc:6d54f73f251de1bd5c4ca8665f098ac0", - "file_size_bytes": 2329422 - }, - { - "name": "Gp0127655_Kraken classification TSV report", - "description": "Kraken classification TSV report for Gp0127655", - "data_object_type": "Kraken2 Taxonomic Classification", - "url": "https://data.microbiomedata.org/data/nmdc:mga0317978/ReadbasedAnalysis/nmdc_mga0317978_kraken2_classification.tsv", - "md5_checksum": "1d4f5a605d4549801fda16da567efe56", - "id": "nmdc:1d4f5a605d4549801fda16da567efe56", - "file_size_bytes": 1336793184 - }, - { - "name": "Gp0127655_Kraken2 TSV report", - "description": "Kraken2 TSV report for Gp0127655", - "data_object_type": "Kraken2 Classification Report", - "url": "https://data.microbiomedata.org/data/nmdc:mga0317978/ReadbasedAnalysis/nmdc_mga0317978_kraken2_report.tsv", - "md5_checksum": "8bb5c66575c7c953719ae9947600ad49", - "id": "nmdc:8bb5c66575c7c953719ae9947600ad49", - "file_size_bytes": 632192 - }, - { - "name": "Gp0127655_Kraken2 Krona HTML report", - "description": "Kraken2 Krona HTML report for Gp0127655", - "data_object_type": "Kraken2 Krona Plot", - "url": "https://data.microbiomedata.org/data/nmdc:mga0317978/ReadbasedAnalysis/nmdc_mga0317978_kraken2_krona.html", - "md5_checksum": "157f7672690ba8207808cc4386ff10a4", - "id": "nmdc:157f7672690ba8207808cc4386ff10a4", - "file_size_bytes": 3946317 - }, { "name": "Gp0127655_Assembled contigs fasta", "description": "Assembled contigs fasta for Gp0127655", @@ -34050,39 +28643,7 @@ "collecting_biosamples_from_site_set": [], "date_created": null, "etl_software_version": null, - "pooling_set": [], - "read_based_analysis_activity_set": [ - { - "_id": { - "$oid": "61e719df833bcf838a701627" - }, - "has_input": [ - "nmdc:04b9014981f7035c39bd7f870613ed93" - ], - "part_of": [ - "nmdc:mga0317978" - ], - "git_url": "https://github.com/microbiomedata/metaG/releases/tag/0.1", - "has_output": [ - "nmdc:46371c7bc8259e459f975f915aaac26f", - "nmdc:5dd9bc51105920f3f629e8106235af3b", - "nmdc:1879e0e9af6d568ac9c7ffdb47fc7f12", - "nmdc:e3f410adc2347396abfdec2a848000d9", - "nmdc:ed6c4f17d6ae759487164ca8ed5edf45", - "nmdc:6d54f73f251de1bd5c4ca8665f098ac0", - "nmdc:1d4f5a605d4549801fda16da567efe56", - "nmdc:8bb5c66575c7c953719ae9947600ad49", - "nmdc:157f7672690ba8207808cc4386ff10a4" - ], - "was_informed_by": "gold:Gp0127655", - "id": "nmdc:65af38817454a315aeb8c67ab27e1469", - "execution_resource": "NERSC-Cori", - "name": "ReadBased Analysis Activity for nmdc:mga0317978", - "started_at_time": "2021-10-11T02:23:42Z", - "type": "nmdc:ReadbasedAnalysis", - "ended_at_time": "2021-10-11T03:21:25+00:00" - } - ] + "pooling_set": [] }, { "functional_annotation_agg": [], @@ -34189,85 +28750,6 @@ "id": "nmdc:4c1aae1a46e51359f9146e48fff0e7f0", "file_size_bytes": 3982485 }, - { - "name": "Gp0127653_Gottcha2 TSV report", - "description": "Gottcha2 TSV report for Gp0127653", - "url": "https://data.microbiomedata.org/data/nmdc:mga079y988/ReadbasedAnalysis/nmdc_mga079y988_gottcha2_report.tsv", - "md5_checksum": "fbbad3e21108a372e3d53c9ee8fc3cd5", - "id": "nmdc:fbbad3e21108a372e3d53c9ee8fc3cd5", - "file_size_bytes": 3812 - }, - { - "name": "Gp0127653_Gottcha2 full TSV report", - "description": "Gottcha2 full TSV report for Gp0127653", - "url": "https://data.microbiomedata.org/data/nmdc:mga079y988/ReadbasedAnalysis/nmdc_mga079y988_gottcha2_report_full.tsv", - "md5_checksum": "dbf03e26f7e1529762830161fe1f1906", - "id": "nmdc:dbf03e26f7e1529762830161fe1f1906", - "file_size_bytes": 857087 - }, - { - "name": "Gp0127653_Gottcha2 Krona HTML report", - "description": "Gottcha2 Krona HTML report for Gp0127653", - "data_object_type": "GOTTCHA2 Krona Plot", - "url": "https://data.microbiomedata.org/data/nmdc:mga079y988/ReadbasedAnalysis/nmdc_mga079y988_gottcha2_krona.html", - "md5_checksum": "284ce1b28b8964cb525025d678277dba", - "id": "nmdc:284ce1b28b8964cb525025d678277dba", - "file_size_bytes": 235621 - }, - { - "name": "Gp0127653_Centrifuge classification TSV report", - "description": "Centrifuge classification TSV report for Gp0127653", - "data_object_type": "Centrifuge Taxonomic Classification", - "url": "https://data.microbiomedata.org/data/nmdc:mga079y988/ReadbasedAnalysis/nmdc_mga079y988_centrifuge_classification.tsv", - "md5_checksum": "a379527f61806391e42b3512146013a8", - "id": "nmdc:a379527f61806391e42b3512146013a8", - "file_size_bytes": 1437707313 - }, - { - "name": "Gp0127653_Centrifuge TSV report", - "description": "Centrifuge TSV report for Gp0127653", - "data_object_type": "Centrifuge Classification Report", - "url": "https://data.microbiomedata.org/data/nmdc:mga079y988/ReadbasedAnalysis/nmdc_mga079y988_centrifuge_report.tsv", - "md5_checksum": "3659ac6c99dea0fb1385c58eac8b1335", - "id": "nmdc:3659ac6c99dea0fb1385c58eac8b1335", - "file_size_bytes": 255105 - }, - { - "name": "Gp0127653_Centrifuge Krona HTML report", - "description": "Centrifuge Krona HTML report for Gp0127653", - "data_object_type": "Centrifuge Krona Plot", - "url": "https://data.microbiomedata.org/data/nmdc:mga079y988/ReadbasedAnalysis/nmdc_mga079y988_centrifuge_krona.html", - "md5_checksum": "3219058371bf2f8081b2dd2b434ec145", - "id": "nmdc:3219058371bf2f8081b2dd2b434ec145", - "file_size_bytes": 2327985 - }, - { - "name": "Gp0127653_Kraken classification TSV report", - "description": "Kraken classification TSV report for Gp0127653", - "data_object_type": "Kraken2 Taxonomic Classification", - "url": "https://data.microbiomedata.org/data/nmdc:mga079y988/ReadbasedAnalysis/nmdc_mga079y988_kraken2_classification.tsv", - "md5_checksum": "be29ebcd7358653afec7381f9ca43431", - "id": "nmdc:be29ebcd7358653afec7381f9ca43431", - "file_size_bytes": 1164013677 - }, - { - "name": "Gp0127653_Kraken2 TSV report", - "description": "Kraken2 TSV report for Gp0127653", - "data_object_type": "Kraken2 Classification Report", - "url": "https://data.microbiomedata.org/data/nmdc:mga079y988/ReadbasedAnalysis/nmdc_mga079y988_kraken2_report.tsv", - "md5_checksum": "a9e6ab6db23ddce02317e3e21ea3f618", - "id": "nmdc:a9e6ab6db23ddce02317e3e21ea3f618", - "file_size_bytes": 638368 - }, - { - "name": "Gp0127653_Kraken2 Krona HTML report", - "description": "Kraken2 Krona HTML report for Gp0127653", - "data_object_type": "Kraken2 Krona Plot", - "url": "https://data.microbiomedata.org/data/nmdc:mga079y988/ReadbasedAnalysis/nmdc_mga079y988_kraken2_krona.html", - "md5_checksum": "4c1aae1a46e51359f9146e48fff0e7f0", - "id": "nmdc:4c1aae1a46e51359f9146e48fff0e7f0", - "file_size_bytes": 3982485 - }, { "name": "Gp0127653_Assembled contigs fasta", "description": "Assembled contigs fasta for Gp0127653", @@ -34658,38 +29140,6 @@ "collecting_biosamples_from_site_set": [], "date_created": null, "etl_software_version": null, - "pooling_set": [], - "read_based_analysis_activity_set": [ - { - "_id": { - "$oid": "61e719b9833bcf838a70124b" - }, - "has_input": [ - "nmdc:8eec0e9c14abb418b906504d1675ecc5" - ], - "part_of": [ - "nmdc:mga079y988" - ], - "git_url": "https://github.com/microbiomedata/metaG/releases/tag/0.1", - "has_output": [ - "nmdc:fbbad3e21108a372e3d53c9ee8fc3cd5", - "nmdc:dbf03e26f7e1529762830161fe1f1906", - "nmdc:284ce1b28b8964cb525025d678277dba", - "nmdc:a379527f61806391e42b3512146013a8", - "nmdc:3659ac6c99dea0fb1385c58eac8b1335", - "nmdc:3219058371bf2f8081b2dd2b434ec145", - "nmdc:be29ebcd7358653afec7381f9ca43431", - "nmdc:a9e6ab6db23ddce02317e3e21ea3f618", - "nmdc:4c1aae1a46e51359f9146e48fff0e7f0" - ], - "was_informed_by": "gold:Gp0127653", - "id": "nmdc:676183a611b251dc6b8f0b8ca600181b", - "execution_resource": "NERSC-Cori", - "name": "ReadBased Analysis Activity for nmdc:mga079y988", - "started_at_time": "2021-10-11T02:23:35Z", - "type": "nmdc:ReadbasedAnalysis", - "ended_at_time": "2021-11-13T18:52:13+00:00" - } - ] + "pooling_set": [] } ] \ No newline at end of file From 50aad19632c6b28b8871f84fb59ddaf178c85c75 Mon Sep 17 00:00:00 2001 From: Michael Thornton Date: Tue, 7 Nov 2023 13:02:42 -0800 Subject: [PATCH 39/91] add single-record extract output for testing purposes --- .../scripts/test_metagenome_record.json | 640 ++++++++++++++++++ 1 file changed, 640 insertions(+) create mode 100644 nmdc_automation/re_iding/scripts/test_metagenome_record.json diff --git a/nmdc_automation/re_iding/scripts/test_metagenome_record.json b/nmdc_automation/re_iding/scripts/test_metagenome_record.json new file mode 100644 index 00000000..638c9f4d --- /dev/null +++ b/nmdc_automation/re_iding/scripts/test_metagenome_record.json @@ -0,0 +1,640 @@ +[ + { + "functional_annotation_agg": [], + "library_preparation_set": [], + "processed_sample_set": [], + "extraction_set": [], + "activity_set": [], + "biosample_set": [], + "data_object_set": [ + { + "description": "Raw sequencer read data", + "file_size_bytes": 2861414297, + "type": "nmdc:DataObject", + "id": "jgi:55d740280d8785342fcf7e39", + "name": "9422.8.132674.GTTTCG.fastq.gz" + }, + { + "name": "Gp0115663_Filtered Reads", + "description": "Filtered Reads for Gp0115663", + "data_object_type": "Filtered Sequencing Reads", + "url": "https://data.microbiomedata.org/data/nmdc:mga0h9dt75/qa/nmdc_mga0h9dt75_filtered.fastq.gz", + "md5_checksum": "7bf778baef033d36f118f8591256d6ef", + "id": "nmdc:7bf778baef033d36f118f8591256d6ef", + "file_size_bytes": 2571324879 + }, + { + "name": "Gp0115663_Filtered Stats", + "description": "Filtered Stats for Gp0115663", + "data_object_type": "QC Statistics", + "url": "https://data.microbiomedata.org/data/nmdc:mga0h9dt75/qa/nmdc_mga0h9dt75_filterStats.txt", + "md5_checksum": "b99ce8adc125c95f0bfdadf36a3f6848", + "id": "nmdc:b99ce8adc125c95f0bfdadf36a3f6848", + "file_size_bytes": 290 + }, + { + "name": "Gp0115663_Gottcha2 TSV report", + "description": "Gottcha2 TSV report for Gp0115663", + "url": "https://data.microbiomedata.org/data/nmdc:mga0h9dt75/ReadbasedAnalysis/nmdc_mga0h9dt75_gottcha2_report.tsv", + "md5_checksum": "bc7c1bda004aab357c8f6cf5a42242f9", + "id": "nmdc:bc7c1bda004aab357c8f6cf5a42242f9", + "file_size_bytes": 13174 + }, + { + "name": "Gp0115663_Gottcha2 full TSV report", + "description": "Gottcha2 full TSV report for Gp0115663", + "url": "https://data.microbiomedata.org/data/nmdc:mga0h9dt75/ReadbasedAnalysis/nmdc_mga0h9dt75_gottcha2_report_full.tsv", + "md5_checksum": "9481434cadd0d6c154e2ec4c11ef0e04", + "id": "nmdc:9481434cadd0d6c154e2ec4c11ef0e04", + "file_size_bytes": 1035818 + }, + { + "name": "Gp0115663_Gottcha2 Krona HTML report", + "description": "Gottcha2 Krona HTML report for Gp0115663", + "data_object_type": "GOTTCHA2 Krona Plot", + "url": "https://data.microbiomedata.org/data/nmdc:mga0h9dt75/ReadbasedAnalysis/nmdc_mga0h9dt75_gottcha2_krona.html", + "md5_checksum": "6b5bc6ce7f11c1336a5f85a98fc18541", + "id": "nmdc:6b5bc6ce7f11c1336a5f85a98fc18541", + "file_size_bytes": 262669 + }, + { + "name": "Gp0115663_Centrifuge classification TSV report", + "description": "Centrifuge classification TSV report for Gp0115663", + "data_object_type": "Centrifuge Taxonomic Classification", + "url": "https://data.microbiomedata.org/data/nmdc:mga0h9dt75/ReadbasedAnalysis/nmdc_mga0h9dt75_centrifuge_classification.tsv", + "md5_checksum": "933c71bbc2f4a2e84d50f0d3864cf940", + "id": "nmdc:933c71bbc2f4a2e84d50f0d3864cf940", + "file_size_bytes": 2189843623 + }, + { + "name": "Gp0115663_Centrifuge TSV report", + "description": "Centrifuge TSV report for Gp0115663", + "data_object_type": "Centrifuge Classification Report", + "url": "https://data.microbiomedata.org/data/nmdc:mga0h9dt75/ReadbasedAnalysis/nmdc_mga0h9dt75_centrifuge_report.tsv", + "md5_checksum": "1a208e2519770ef50740ac39f1b9ba9a", + "id": "nmdc:1a208e2519770ef50740ac39f1b9ba9a", + "file_size_bytes": 260134 + }, + { + "name": "Gp0115663_Centrifuge Krona HTML report", + "description": "Centrifuge Krona HTML report for Gp0115663", + "data_object_type": "Centrifuge Krona Plot", + "url": "https://data.microbiomedata.org/data/nmdc:mga0h9dt75/ReadbasedAnalysis/nmdc_mga0h9dt75_centrifuge_krona.html", + "md5_checksum": "f112a3840464ae7a9cf4a3bf295edd5c", + "id": "nmdc:f112a3840464ae7a9cf4a3bf295edd5c", + "file_size_bytes": 2343980 + }, + { + "name": "Gp0115663_Kraken classification TSV report", + "description": "Kraken classification TSV report for Gp0115663", + "data_object_type": "Kraken2 Taxonomic Classification", + "url": "https://data.microbiomedata.org/data/nmdc:mga0h9dt75/ReadbasedAnalysis/nmdc_mga0h9dt75_kraken2_classification.tsv", + "md5_checksum": "7ca01ea379f0baed96f87d1435925f95", + "id": "nmdc:7ca01ea379f0baed96f87d1435925f95", + "file_size_bytes": 1785563917 + }, + { + "name": "Gp0115663_Kraken2 TSV report", + "description": "Kraken2 TSV report for Gp0115663", + "data_object_type": "Kraken2 Classification Report", + "url": "https://data.microbiomedata.org/data/nmdc:mga0h9dt75/ReadbasedAnalysis/nmdc_mga0h9dt75_kraken2_report.tsv", + "md5_checksum": "c85f2f2b4a518c4adb23970448a5cb45", + "id": "nmdc:c85f2f2b4a518c4adb23970448a5cb45", + "file_size_bytes": 699896 + }, + { + "name": "Gp0115663_Kraken2 Krona HTML report", + "description": "Kraken2 Krona HTML report for Gp0115663", + "data_object_type": "Kraken2 Krona Plot", + "url": "https://data.microbiomedata.org/data/nmdc:mga0h9dt75/ReadbasedAnalysis/nmdc_mga0h9dt75_kraken2_krona.html", + "md5_checksum": "94ee1bc2dc74830a21d5c3471d6cf223", + "id": "nmdc:94ee1bc2dc74830a21d5c3471d6cf223", + "file_size_bytes": 4221977 + }, + { + "name": "Gp0115663_Assembled contigs fasta", + "description": "Assembled contigs fasta for Gp0115663", + "data_object_type": "Assembly Contigs", + "url": "https://data.microbiomedata.org/data/nmdc:mga0h9dt75/assembly/nmdc_mga0h9dt75_contigs.fna", + "md5_checksum": "deddd162bf0128fba13b3bc1ca38d1aa", + "id": "nmdc:deddd162bf0128fba13b3bc1ca38d1aa", + "file_size_bytes": 90115831 + }, + { + "name": "Gp0115663_Assembled scaffold fasta", + "description": "Assembled scaffold fasta for Gp0115663", + "data_object_type": "Assembly Scaffolds", + "url": "https://data.microbiomedata.org/data/nmdc:mga0h9dt75/assembly/nmdc_mga0h9dt75_scaffolds.fna", + "md5_checksum": "b3573e3cda5a06611de71ca04c5c14cc", + "id": "nmdc:b3573e3cda5a06611de71ca04c5c14cc", + "file_size_bytes": 89604715 + }, + { + "name": "Gp0115663_Metagenome Contig Coverage Stats", + "description": "Metagenome Contig Coverage Stats for Gp0115663", + "url": "https://data.microbiomedata.org/data/nmdc:mga0h9dt75/assembly/nmdc_mga0h9dt75_covstats.txt", + "md5_checksum": "c6d0d4cea985ca6fb50a060e15b4a856", + "id": "nmdc:c6d0d4cea985ca6fb50a060e15b4a856", + "file_size_bytes": 13412363 + }, + { + "name": "Gp0115663_Assembled AGP file", + "description": "Assembled AGP file for Gp0115663", + "data_object_type": "Assembly AGP", + "url": "https://data.microbiomedata.org/data/nmdc:mga0h9dt75/assembly/nmdc_mga0h9dt75_assembly.agp", + "md5_checksum": "f450e3800e17691d5874c89fc46c186a", + "id": "nmdc:f450e3800e17691d5874c89fc46c186a", + "file_size_bytes": 12542171 + }, + { + "name": "Gp0115663_Metagenome Alignment BAM file", + "description": "Metagenome Alignment BAM file for Gp0115663", + "data_object_type": "Assembly Coverage BAM", + "url": "https://data.microbiomedata.org/data/nmdc:mga0h9dt75/assembly/nmdc_mga0h9dt75_pairedMapped_sorted.bam", + "md5_checksum": "31dc958d116d02122509e90b0883954f", + "id": "nmdc:31dc958d116d02122509e90b0883954f", + "file_size_bytes": 2773429299 + }, + { + "name": "Gp0115663_Protein FAA", + "description": "Protein FAA for Gp0115663", + "data_object_type": "Annotation Amino Acid FASTA", + "url": "https://data.microbiomedata.org/data/nmdc:mga0h9dt75/annotation/nmdc_mga0h9dt75_proteins.faa", + "md5_checksum": "879988d212ecec46928b8598e2f8391f", + "id": "nmdc:879988d212ecec46928b8598e2f8391f", + "file_size_bytes": 50165060 + }, + { + "name": "Gp0115663_Structural annotation GFF file", + "description": "Structural annotation GFF file for Gp0115663", + "data_object_type": "Structural Annotation GFF", + "url": "https://data.microbiomedata.org/data/nmdc:mga0h9dt75/annotation/nmdc_mga0h9dt75_structural_annotation.gff", + "md5_checksum": "884b95102f5965cc0ee2d9b7f198e5a4", + "id": "nmdc:884b95102f5965cc0ee2d9b7f198e5a4", + "file_size_bytes": 2767 + }, + { + "name": "Gp0115663_Functional annotation GFF file", + "description": "Functional annotation GFF file for Gp0115663", + "data_object_type": "Functional Annotation GFF", + "url": "https://data.microbiomedata.org/data/nmdc:mga0h9dt75/annotation/nmdc_mga0h9dt75_functional_annotation.gff", + "md5_checksum": "002e4ebc728f8b91cb5f298d340ab013", + "id": "nmdc:002e4ebc728f8b91cb5f298d340ab013", + "file_size_bytes": 55139586 + }, + { + "name": "Gp0115663_KO TSV file", + "description": "KO TSV file for Gp0115663", + "data_object_type": "Annotation KEGG Orthology", + "url": "https://data.microbiomedata.org/data/nmdc:mga0h9dt75/annotation/nmdc_mga0h9dt75_ko.tsv", + "md5_checksum": "6851078f29716d89e3f41f0969ae7bf0", + "id": "nmdc:6851078f29716d89e3f41f0969ae7bf0", + "file_size_bytes": 6023696 + }, + { + "name": "Gp0115663_EC TSV file", + "description": "EC TSV file for Gp0115663", + "data_object_type": "Annotation Enzyme Commission", + "url": "https://data.microbiomedata.org/data/nmdc:mga0h9dt75/annotation/nmdc_mga0h9dt75_ec.tsv", + "md5_checksum": "4f88c89459f36655eb7c1eceec19602a", + "id": "nmdc:4f88c89459f36655eb7c1eceec19602a", + "file_size_bytes": 3982918 + }, + { + "name": "Gp0115663_COG GFF file", + "description": "COG GFF file for Gp0115663", + "url": "https://data.microbiomedata.org/data/nmdc:mga0h9dt75/annotation/nmdc_mga0h9dt75_cog.gff", + "md5_checksum": "a068b9ce6ebb7deb15ff932b513817a9", + "id": "nmdc:a068b9ce6ebb7deb15ff932b513817a9", + "file_size_bytes": 27362917 + }, + { + "name": "Gp0115663_PFAM GFF file", + "description": "PFAM GFF file for Gp0115663", + "url": "https://data.microbiomedata.org/data/nmdc:mga0h9dt75/annotation/nmdc_mga0h9dt75_pfam.gff", + "md5_checksum": "618b18fa8635c80cc0091371f451a6f0", + "id": "nmdc:618b18fa8635c80cc0091371f451a6f0", + "file_size_bytes": 21572048 + }, + { + "name": "Gp0115663_TigrFam GFF file", + "description": "TigrFam GFF file for Gp0115663", + "url": "https://data.microbiomedata.org/data/nmdc:mga0h9dt75/annotation/nmdc_mga0h9dt75_tigrfam.gff", + "md5_checksum": "17e55a1a1a133ffbf8cbe4024d997a6f", + "id": "nmdc:17e55a1a1a133ffbf8cbe4024d997a6f", + "file_size_bytes": 2900068 + }, + { + "name": "Gp0115663_SMART GFF file", + "description": "SMART GFF file for Gp0115663", + "url": "https://data.microbiomedata.org/data/nmdc:mga0h9dt75/annotation/nmdc_mga0h9dt75_smart.gff", + "md5_checksum": "8f80142c0f5723af5a3b44b7ff4e4339", + "id": "nmdc:8f80142c0f5723af5a3b44b7ff4e4339", + "file_size_bytes": 6905519 + }, + { + "name": "Gp0115663_SuperFam GFF file", + "description": "SuperFam GFF file for Gp0115663", + "url": "https://data.microbiomedata.org/data/nmdc:mga0h9dt75/annotation/nmdc_mga0h9dt75_supfam.gff", + "md5_checksum": "fdd2e8741ffef40db383674a10bb4d11", + "id": "nmdc:fdd2e8741ffef40db383674a10bb4d11", + "file_size_bytes": 38787856 + }, + { + "name": "Gp0115663_Cath FunFam GFF file", + "description": "Cath FunFam GFF file for Gp0115663", + "url": "https://data.microbiomedata.org/data/nmdc:mga0h9dt75/annotation/nmdc_mga0h9dt75_cath_funfam.gff", + "md5_checksum": "8eb49ac20a6c2721d6db227f4fb3356a", + "id": "nmdc:8eb49ac20a6c2721d6db227f4fb3356a", + "file_size_bytes": 30134783 + }, + { + "name": "Gp0115663_KO_EC GFF file", + "description": "KO_EC GFF file for Gp0115663", + "url": "https://data.microbiomedata.org/data/nmdc:mga0h9dt75/annotation/nmdc_mga0h9dt75_ko_ec.gff", + "md5_checksum": "75f481e0d98793cfb4f9508cb3e31622", + "id": "nmdc:75f481e0d98793cfb4f9508cb3e31622", + "file_size_bytes": 19194308 + }, + { + "name": "gold:Gp0452679_metabat2 bin checkm quality assessment result", + "description": "metabat2 bin checkm quality assessment result for gold:Gp0452679", + "data_object_type": "CheckM Statistics", + "url": "https://data.microbiomedata.org/data/nmdc:mga0fsjy84/MAGs/nmdc_mga0fsjy84_checkm_qa.out", + "md5_checksum": "d41d8cd98f00b204e9800998ecf8427e", + "id": "nmdc:d41d8cd98f00b204e9800998ecf8427e", + "file_size_bytes": 0 + }, + { + "name": "Gp0115663_tooShort (< 3kb) filtered contigs fasta file by metaBat2", + "description": "tooShort (< 3kb) filtered contigs fasta file by metaBat2 for Gp0115663", + "url": "https://data.microbiomedata.org/data/nmdc:mga0h9dt75/MAGs/nmdc_mga0h9dt75_bins.tooShort.fa", + "md5_checksum": "c092b018cb4652c4ca0620b37a4b3fad", + "id": "nmdc:c092b018cb4652c4ca0620b37a4b3fad", + "file_size_bytes": 70411007 + }, + { + "name": "Gp0115663_unbinned fasta file from metabat2", + "description": "unbinned fasta file from metabat2 for Gp0115663", + "url": "https://data.microbiomedata.org/data/nmdc:mga0h9dt75/MAGs/nmdc_mga0h9dt75_bins.unbinned.fa", + "md5_checksum": "70d7c8a307f47adb05056bee1b01f9d4", + "id": "nmdc:70d7c8a307f47adb05056bee1b01f9d4", + "file_size_bytes": 15998690 + }, + { + "name": "Gp0115663_metabat2 bin checkm quality assessment result", + "description": "metabat2 bin checkm quality assessment result for Gp0115663", + "data_object_type": "CheckM Statistics", + "url": "https://data.microbiomedata.org/data/nmdc:mga0h9dt75/MAGs/nmdc_mga0h9dt75_checkm_qa.out", + "md5_checksum": "4545ab2039ae70f4439a93316f4fb7bc", + "id": "nmdc:4545ab2039ae70f4439a93316f4fb7bc", + "file_size_bytes": 1530 + }, + { + "name": "Gp0115663_high-quality and medium-quality bins", + "description": "high-quality and medium-quality bins for Gp0115663", + "data_object_type": "Metagenome Bins", + "url": "https://data.microbiomedata.org/data/nmdc:mga0h9dt75/MAGs/nmdc_mga0h9dt75_hqmq_bin.zip", + "md5_checksum": "280b63ae1cc1fa8d6154a0681d47c399", + "id": "nmdc:280b63ae1cc1fa8d6154a0681d47c399", + "file_size_bytes": 182 + }, + { + "name": "Gp0115663_metabat2 bins", + "description": "metabat2 bins for Gp0115663", + "url": "https://data.microbiomedata.org/data/nmdc:mga0h9dt75/MAGs/nmdc_mga0h9dt75_metabat_bin.zip", + "md5_checksum": "27c07072f175571200b5931550adb8aa", + "id": "nmdc:27c07072f175571200b5931550adb8aa", + "file_size_bytes": 1114314 + } + ], + "dissolving_activity_set": [], + "functional_annotation_set": [], + "genome_feature_set": [], + "mags_activity_set": [ + { + "_id": { + "$oid": "649b0052ec087f6bbab34734" + }, + "has_input": [ + "nmdc:deddd162bf0128fba13b3bc1ca38d1aa", + "nmdc:31dc958d116d02122509e90b0883954f", + "nmdc:002e4ebc728f8b91cb5f298d340ab013" + ], + "too_short_contig_num": 159810, + "part_of": [ + "nmdc:mga0h9dt75" + ], + "binned_contig_num": 684, + "git_url": "https://github.com/microbiomedata/metaG/releases/tag/0.1", + "has_output": [ + "nmdc:d41d8cd98f00b204e9800998ecf8427e", + "nmdc:c092b018cb4652c4ca0620b37a4b3fad", + "nmdc:70d7c8a307f47adb05056bee1b01f9d4", + "nmdc:4545ab2039ae70f4439a93316f4fb7bc", + "nmdc:280b63ae1cc1fa8d6154a0681d47c399", + "nmdc:27c07072f175571200b5931550adb8aa" + ], + "was_informed_by": "gold:Gp0115663", + "input_contig_num": 169782, + "id": "nmdc:b31abf9d7fe53e2f802bb53e2d13542b", + "execution_resource": "NERSC-Cori", + "name": "MAGs Analysis Activity for nmdc:mga0h9dt75", + "mags_list": [ + { + "number_of_contig": 61, + "completeness": 13.82, + "bin_name": "bins.1", + "gene_count": 294, + "bin_quality": "LQ", + "gtdbtk_species": "", + "gtdbtk_order": "", + "num_16s": 1, + "gtdbtk_family": "", + "gtdbtk_domain": "", + "contamination": 0.62, + "gtdbtk_class": "", + "gtdbtk_phylum": "", + "num_5s": 1, + "num_23s": 0, + "gtdbtk_genus": "", + "num_t_rna": 0 + }, + { + "number_of_contig": 485, + "completeness": 66.03, + "bin_name": "bins.2", + "gene_count": 2871, + "bin_quality": "LQ", + "gtdbtk_species": "", + "gtdbtk_order": "", + "num_16s": 0, + "gtdbtk_family": "", + "gtdbtk_domain": "", + "contamination": 10.87, + "gtdbtk_class": "", + "gtdbtk_phylum": "", + "num_5s": 1, + "num_23s": 0, + "gtdbtk_genus": "", + "num_t_rna": 32 + }, + { + "number_of_contig": 56, + "completeness": 34.23, + "bin_name": "bins.3", + "gene_count": 337, + "bin_quality": "LQ", + "gtdbtk_species": "", + "gtdbtk_order": "", + "num_16s": 0, + "gtdbtk_family": "", + "gtdbtk_domain": "", + "contamination": 0.0, + "gtdbtk_class": "", + "gtdbtk_phylum": "", + "num_5s": 0, + "num_23s": 0, + "gtdbtk_genus": "", + "num_t_rna": 9 + }, + { + "number_of_contig": 63, + "completeness": 6.9, + "bin_name": "bins.4", + "gene_count": 276, + "bin_quality": "LQ", + "gtdbtk_species": "", + "gtdbtk_order": "", + "num_16s": 0, + "gtdbtk_family": "", + "gtdbtk_domain": "", + "contamination": 0.0, + "gtdbtk_class": "", + "gtdbtk_phylum": "", + "num_5s": 0, + "num_23s": 0, + "gtdbtk_genus": "", + "num_t_rna": 6 + }, + { + "number_of_contig": 19, + "completeness": 4.45, + "bin_name": "bins.5", + "gene_count": 463, + "bin_quality": "LQ", + "gtdbtk_species": "", + "gtdbtk_order": "", + "num_16s": 0, + "gtdbtk_family": "", + "gtdbtk_domain": "", + "contamination": 0.0, + "gtdbtk_class": "", + "gtdbtk_phylum": "", + "num_5s": 0, + "num_23s": 0, + "gtdbtk_genus": "", + "num_t_rna": 4 + } + ], + "unbinned_contig_num": 9288, + "started_at_time": "2021-10-11T02:28:26Z", + "type": "nmdc:MAGsAnalysisActivity", + "ended_at_time": "2021-10-11T04:56:04+00:00" + } + ], + "material_sample_set": [], + "material_sampling_activity_set": [], + "metabolomics_analysis_activity_set": [], + "metagenome_annotation_activity_set": [ + { + "_id": { + "$oid": "649b005bbf2caae0415ef9d6" + }, + "has_input": [ + "nmdc:deddd162bf0128fba13b3bc1ca38d1aa" + ], + "part_of": [ + "nmdc:mga0h9dt75" + ], + "git_url": "https://github.com/microbiomedata/metaG/releases/tag/0.1", + "has_output": [ + "nmdc:879988d212ecec46928b8598e2f8391f", + "nmdc:884b95102f5965cc0ee2d9b7f198e5a4", + "nmdc:002e4ebc728f8b91cb5f298d340ab013", + "nmdc:6851078f29716d89e3f41f0969ae7bf0", + "nmdc:4f88c89459f36655eb7c1eceec19602a", + "nmdc:a068b9ce6ebb7deb15ff932b513817a9", + "nmdc:618b18fa8635c80cc0091371f451a6f0", + "nmdc:17e55a1a1a133ffbf8cbe4024d997a6f", + "nmdc:8f80142c0f5723af5a3b44b7ff4e4339", + "nmdc:fdd2e8741ffef40db383674a10bb4d11", + "nmdc:8eb49ac20a6c2721d6db227f4fb3356a", + "nmdc:75f481e0d98793cfb4f9508cb3e31622" + ], + "was_informed_by": "gold:Gp0115663", + "id": "nmdc:b31abf9d7fe53e2f802bb53e2d13542b", + "execution_resource": "NERSC-Cori", + "name": "Annotation Activity for nmdc:mga0h9dt75", + "started_at_time": "2021-10-11T02:28:26Z", + "type": "nmdc:MetagenomeAnnotationActivity", + "ended_at_time": "2021-10-11T04:56:04+00:00" + } + ], + "metagenome_assembly_set": [ + { + "_id": { + "$oid": "649b005f2ca5ee4adb139fb9" + }, + "has_input": [ + "nmdc:7bf778baef033d36f118f8591256d6ef" + ], + "part_of": [ + "nmdc:mga0h9dt75" + ], + "ctg_logsum": 214373, + "scaf_logsum": 215363, + "gap_pct": 0.00188, + "git_url": "https://github.com/microbiomedata/metaG/releases/tag/0.1", + "has_output": [ + "nmdc:deddd162bf0128fba13b3bc1ca38d1aa", + "nmdc:b3573e3cda5a06611de71ca04c5c14cc", + "nmdc:c6d0d4cea985ca6fb50a060e15b4a856", + "nmdc:f450e3800e17691d5874c89fc46c186a", + "nmdc:31dc958d116d02122509e90b0883954f" + ], + "asm_score": 6.577, + "was_informed_by": "gold:Gp0115663", + "ctg_powsum": 24284, + "scaf_max": 68135, + "id": "nmdc:b31abf9d7fe53e2f802bb53e2d13542b", + "scaf_powsum": 24422, + "execution_resource": "NERSC-Cori", + "contigs": 169784, + "name": "Assembly Activity for nmdc:mga0h9dt75", + "ctg_max": 68135, + "gc_std": 0.11726, + "contig_bp": 83494920, + "gc_avg": 0.46001, + "started_at_time": "2021-10-11T02:28:26Z", + "scaf_bp": 83496490, + "type": "nmdc:MetagenomeAssembly", + "scaffolds": 169645, + "ended_at_time": "2021-10-11T04:56:04+00:00", + "ctg_l50": 470, + "ctg_l90": 290, + "ctg_n50": 45584, + "ctg_n90": 141996, + "scaf_l50": 470, + "scaf_l90": 290, + "scaf_n50": 45550, + "scaf_n90": 141870, + "scaf_l_gt50k": 68135, + "scaf_n_gt50k": 1, + "scaf_pct_gt50k": 0.08160224 + } + ], + "metagenome_sequencing_activity_set": [], + "metaproteomics_analysis_activity_set": [], + "metatranscriptome_activity_set": [], + "nom_analysis_activity_set": [], + "omics_processing_set": [ + { + "_id": { + "$oid": "649b009773e8249959349b33" + }, + "id": "nmdc:omprc-11-bn8jcq58", + "name": "Sand microcosm microbial communities from a hyporheic zone in Columbia River, Washington, USA - GW-RW T2_23-Sept-14", + "description": "Sterilized sand packs were incubated back in the ground and collected at time point T2.", + "has_input": [ + "nmdc:bsm-11-qq8s6x03" + ], + "has_output": [ + "jgi:55d740280d8785342fcf7e39" + ], + "part_of": [ + "nmdc:sty-11-aygzgv51" + ], + "add_date": "2015-05-28", + "mod_date": "2021-06-15", + "ncbi_project_name": "Sand microcosm microbial communities from a hyporheic zone in Columbia River, Washington, USA - GW-RW T2_23-Sept-14", + "omics_type": { + "has_raw_value": "Metagenome" + }, + "principal_investigator": { + "has_raw_value": "James Stegen" + }, + "processing_institution": "JGI", + "type": "nmdc:OmicsProcessing", + "gold_sequencing_project_identifiers": [ + "gold:Gp0115663" + ] + } + ], + "reaction_activity_set": [], + "read_qc_analysis_activity_set": [ + { + "_id": { + "$oid": "649b009d6bdd4fd20273c88b" + }, + "has_input": [ + "nmdc:30a06664f29cffbbbc49abad86eae6fc" + ], + "part_of": [ + "nmdc:mga0h9dt75" + ], + "git_url": "https://github.com/microbiomedata/metaG/releases/tag/0.1", + "has_output": [ + "nmdc:7bf778baef033d36f118f8591256d6ef", + "nmdc:b99ce8adc125c95f0bfdadf36a3f6848" + ], + "was_informed_by": "gold:Gp0115663", + "input_read_count": 32238374, + "output_read_bases": 4608772924, + "id": "nmdc:b31abf9d7fe53e2f802bb53e2d13542b", + "execution_resource": "NERSC-Cori", + "input_read_bases": 4867994474, + "name": "Read QC Activity for nmdc:mga0h9dt75", + "output_read_count": 30774080, + "started_at_time": "2021-10-11T02:28:26Z", + "type": "nmdc:ReadQCAnalysisActivity", + "ended_at_time": "2021-10-11T04:56:04+00:00" + } + ], + "read_based_taxonomy_analysis_activity_set": [ + { + "_id": { + "$oid": "649b009bff710ae353f8cf4f" + }, + "has_input": [ + "nmdc:7bf778baef033d36f118f8591256d6ef" + ], + "git_url": "https://github.com/microbiomedata/metaG/releases/tag/0.1", + "has_output": [ + "nmdc:bc7c1bda004aab357c8f6cf5a42242f9", + "nmdc:9481434cadd0d6c154e2ec4c11ef0e04", + "nmdc:6b5bc6ce7f11c1336a5f85a98fc18541", + "nmdc:933c71bbc2f4a2e84d50f0d3864cf940", + "nmdc:1a208e2519770ef50740ac39f1b9ba9a", + "nmdc:f112a3840464ae7a9cf4a3bf295edd5c", + "nmdc:7ca01ea379f0baed96f87d1435925f95", + "nmdc:c85f2f2b4a518c4adb23970448a5cb45", + "nmdc:94ee1bc2dc74830a21d5c3471d6cf223" + ], + "was_informed_by": "gold:Gp0115663", + "id": "nmdc:b31abf9d7fe53e2f802bb53e2d13542b", + "execution_resource": "NERSC-Cori", + "name": "ReadBased Analysis Activity for nmdc:mga0h9dt75", + "started_at_time": "2021-10-11T02:28:26Z", + "type": "nmdc:ReadbasedAnalysis", + "ended_at_time": "2021-10-11T04:56:04+00:00" + } + ], + "study_set": [], + "field_research_site_set": [], + "collecting_biosamples_from_site_set": [], + "date_created": null, + "etl_software_version": null, + "pooling_set": [] + } +] \ No newline at end of file From bfac07ba6af342594787cc45ef31dd21c569b68a Mon Sep 17 00:00:00 2001 From: Michal Babinski Date: Tue, 7 Nov 2023 13:40:31 -0800 Subject: [PATCH 40/91] added click argument for process_analysis_set --- nmdc_automation/re_iding/re_id_process.py | 36 +++++++++++++++-------- 1 file changed, 23 insertions(+), 13 deletions(-) diff --git a/nmdc_automation/re_iding/re_id_process.py b/nmdc_automation/re_iding/re_id_process.py index 62b8e6c9..baef81d4 100755 --- a/nmdc_automation/re_iding/re_id_process.py +++ b/nmdc_automation/re_iding/re_id_process.py @@ -3,8 +3,7 @@ import sys import re import yaml -import datetime -import pytz +import click from pymongo import MongoClient import json from nmdc_automation.api import NmdcRuntimeApi @@ -516,8 +515,25 @@ def readbased_update(omics_record, template_file, omic_id, workflow_inputs): ) -def process_analysis_sets(study_id,study_records, template_file, dry_run=False): +@click.command() +@click.argument('study_id') +@click.argument('study_records_file', type=click.Path(exists=True)) +@click.argument('template_file', type=click.Path(exists=True)) +@click.option('--dry-run', is_flag=True, help='Run the script in dry-run mode without making any changes.') +def process_analysis_sets(study_id,study_records_file, template_file, dry_run): + """ + Process analysis activity sets for a given STUDY_ID using STUDY_RECORDS and TEMPLATE_FILE. + + \b + STUDY_ID: The identifier for the study. + STUDY_RECORDS: The path to the JSON file containing study records. + TEMPLATE_FILE: The path to the template file containing . + """ count = 0 + + #read in json to memory: + study_records = read_json_file(study_records_file) + for omic_record in study_records: omics_id = get_omics_id(omic_record) logging.info(f"Starting re-iding process for {omics_id}") @@ -545,14 +561,8 @@ def process_analysis_sets(study_id,study_records, template_file, dry_run=False): if count == 1: break - -def main(study_id,study_data, template_file, dry_run=False): - process_analysis_sets(study_id,study_data, template_file, dry_run) - - if __name__ == "__main__": - test_file = "scripts/nmdc:sty-11-aygzgv51_assocated_record_dump.json" - study_id = "nmdc:sty-11-aygzgv51" - template_file = "../../configs/re_iding_worklfows.yaml" - stegen_data = read_json_file(test_file) - main(study_id,stegen_data, template_file, dry_run=True) + # test_file = "scripts/nmdc:sty-11-aygzgv51_assocated_record_dump.json" + # study_id = "nmdc:sty-11-aygzgv51" + # template_file = "../../configs/re_iding_worklfows.yaml" + process_analysis_sets() From 5da652225266b2d92014c2f6828fc9953d55be00 Mon Sep 17 00:00:00 2001 From: Michael Thornton Date: Tue, 7 Nov 2023 13:56:02 -0800 Subject: [PATCH 41/91] update to search for orphaned data records and include in output --- nmdc_automation/api/nmdcapi.py | 15 + nmdc_automation/re_iding/mapping.log | 1 + .../extract_metagenome_workflow_records.py | 14 + ...sty-11-aygzgv51_assocated_record_dump.json | 12546 +++++++++++++++- 4 files changed, 11891 insertions(+), 685 deletions(-) create mode 100644 nmdc_automation/re_iding/mapping.log diff --git a/nmdc_automation/api/nmdcapi.py b/nmdc_automation/api/nmdcapi.py index 2c86bcd7..7a5988cf 100755 --- a/nmdc_automation/api/nmdcapi.py +++ b/nmdc_automation/api/nmdcapi.py @@ -403,6 +403,21 @@ def get_workflow_activity_informed_by(self, workflow_activity_set: str, workflow_activity_record = response.json()["cursor"]["firstBatch"] return workflow_activity_record + def get_data_objects_by_description(self, description: str): + """ + Retrieve data objects the given description in its description. + """ + response = self.request( + "POST", + "queries:run", + params_or_json_data={ + "find": "data_object_set", + "filter": {"description": {"$regex": description, "$options": "i"}}, + }, + ) + response.raise_for_status() + return response.json()["cursor"]["firstBatch"] + def get_data_object_by_id(self, data_object_id: str): """ Retrieve a data object record for the given data object ID. diff --git a/nmdc_automation/re_iding/mapping.log b/nmdc_automation/re_iding/mapping.log new file mode 100644 index 00000000..974bbbe8 --- /dev/null +++ b/nmdc_automation/re_iding/mapping.log @@ -0,0 +1 @@ +omics_id gold nmdc:omprc-11-bn8jcq58 diff --git a/nmdc_automation/re_iding/scripts/extract_metagenome_workflow_records.py b/nmdc_automation/re_iding/scripts/extract_metagenome_workflow_records.py index ebf25a64..079214e1 100644 --- a/nmdc_automation/re_iding/scripts/extract_metagenome_workflow_records.py +++ b/nmdc_automation/re_iding/scripts/extract_metagenome_workflow_records.py @@ -124,12 +124,26 @@ def extract_workflow_records(study_id: str, site_config: bool): db.__setattr__(set_name, records) # Add the data objects referenced by the `has_output` property for record in records: + logging.info(f"record: {record['id']}, {record['name']}") for data_object_id in record["has_output"]: data_object_record = query_api_client.get_data_object_by_id( data_object_id ) + logging.info(f"data_object_record: " + f"{data_object_record['id']}, {data_object_record['description']}") db.data_object_set.append(data_object_record) + # Search for orphaned data objects with the legacy ID in the description + orphaned_data_objects = query_api_client.get_data_objects_by_description( + legacy_id + ) + # check that we don't already have the data object in the set + for data_object in orphaned_data_objects: + if data_object["id"] not in [d["id"] for d in db.data_object_set]: + db.data_object_set.append(data_object) + logging.info(f"Added orphaned data object: " + f"{data_object['id']}, {data_object['description']}") + retrieved_databases.append(db) with open(f"{study_id}_assocated_record_dump.json", 'w') as json_file: diff --git a/nmdc_automation/re_iding/scripts/nmdc:sty-11-aygzgv51_assocated_record_dump.json b/nmdc_automation/re_iding/scripts/nmdc:sty-11-aygzgv51_assocated_record_dump.json index cc5b8c9b..d3156d14 100644 --- a/nmdc_automation/re_iding/scripts/nmdc:sty-11-aygzgv51_assocated_record_dump.json +++ b/nmdc_automation/re_iding/scripts/nmdc:sty-11-aygzgv51_assocated_record_dump.json @@ -306,6 +306,224 @@ "md5_checksum": "27c07072f175571200b5931550adb8aa", "id": "nmdc:27c07072f175571200b5931550adb8aa", "file_size_bytes": 1114314 + }, + { + "_id": { + "$oid": "649b003c1ae706d7b5b14d5a" + }, + "description": "Assembled scaffold fasta for gold:Gp0115663", + "url": "https://data.microbiomedata.org/data/1781_86101/assembly/assembly_scaffolds.fna", + "file_size_bytes": 88756490, + "type": "nmdc:DataObject", + "id": "nmdc:321a497bc1c3cf25affc8e659b746ba5", + "name": "assembly_scaffolds.fna", + "data_object_type": "Assembly Scaffolds" + }, + { + "_id": { + "$oid": "649b003c1ae706d7b5b14d5d" + }, + "description": "Metagenome Contig Coverage Stats for gold:Gp0115663", + "url": "https://data.microbiomedata.org/data/1781_86101/assembly/mapping_stats.txt", + "file_size_bytes": 12563453, + "type": "nmdc:DataObject", + "id": "nmdc:ad47215b9b079c1d94a8fc56385dee36", + "name": "mapping_stats.txt", + "data_object_type": "Assembly Coverage Stats" + }, + { + "_id": { + "$oid": "649b003c1ae706d7b5b14d5f" + }, + "description": "Assembled contigs fasta for gold:Gp0115663", + "url": "https://data.microbiomedata.org/data/1781_86101/assembly/assembly_contigs.fna", + "file_size_bytes": 89266921, + "type": "nmdc:DataObject", + "id": "nmdc:0a3d00715d01ad7b8f3aee59b674dfe9", + "name": "assembly_contigs.fna", + "data_object_type": "Assembly Contigs" + }, + { + "_id": { + "$oid": "649b003c1ae706d7b5b14d61" + }, + "description": "Assembled AGP file for gold:Gp0115663", + "url": "https://data.microbiomedata.org/data/1781_86101/assembly/assembly.agp", + "file_size_bytes": 10842941, + "type": "nmdc:DataObject", + "id": "nmdc:bc01f0f507c9dac65d8a8e40e41a8c48", + "name": "assembly.agp", + "data_object_type": "Assembly AGP" + }, + { + "_id": { + "$oid": "649b003c1ae706d7b5b14d63" + }, + "description": "Metagenome Alignment BAM file for gold:Gp0115663", + "url": "https://data.microbiomedata.org/data/1781_86101/assembly/pairedMapped_sorted.bam", + "file_size_bytes": 2729039400, + "type": "nmdc:DataObject", + "id": "nmdc:668d207be5ea844f988fbfb2813564cc", + "name": "pairedMapped_sorted.bam", + "data_object_type": "Assembly Coverage BAM" + }, + { + "_id": { + "$oid": "649b003d1ae706d7b5b15979" + }, + "id": "nmdc:9509adb5a013006dfda9754429cfc968", + "name": "1781_86101.krona.html", + "description": "Gold:Gp0115663 KRONA plot HTML file", + "url": "https://data.microbiomedata.org/1781_86101/ReadbasedAnalysis/centrifuge/1781_86101.krona.html", + "file_size_bytes": 3385, + "data_object_type": "Centrifuge Krona Plot", + "type": "nmdc:DataObject" + }, + { + "_id": { + "$oid": "649b003d1ae706d7b5b15980" + }, + "id": "nmdc:6dea4c58f402b5c3935e8f1a545bec47", + "name": "1781_86101.json", + "description": "Gold:Gp0115663 ReadbasedAnalysis result JSON file", + "url": "https://data.microbiomedata.org/1781_86101/ReadbasedAnalysis/1781_86101.json", + "file_size_bytes": 3385, + "type": "nmdc:DataObject" + }, + { + "_id": { + "$oid": "649b003f1ae706d7b5b16253" + }, + "id": "nmdc:0c4c875e5b10c6b742c14c22e2926751", + "name": "bins.tooShort.fa", + "description": "tooShort (< 3kb) filtered contigs fasta file by metaBat2 for gold:Gp0115663", + "file_size_bytes": 68423774, + "url": "https://data.microbiomedata.org/data/1781_86101/img_MAGs/bins.tooShort.fa", + "type": "nmdc:DataObject" + }, + { + "_id": { + "$oid": "649b003f1ae706d7b5b16254" + }, + "id": "nmdc:c55d6b00aa5d4af8cd46d349e17d4127", + "name": "bins.unbinned.fa", + "description": "unbinned fasta file from metabat2 for gold:Gp0115663", + "file_size_bytes": 16857267, + "url": "https://data.microbiomedata.org/data/1781_86101/img_MAGs/bins.unbinned.fa", + "type": "nmdc:DataObject" + }, + { + "_id": { + "$oid": "649b003f1ae706d7b5b16256" + }, + "id": "nmdc:1346fe25b6ff22180eb3a51204e0b1fc", + "name": "gold:Gp0115663.bins.1.fa", + "description": "metabat2 binned contig file for gold:Gp0115663", + "file_size_bytes": 224772, + "url": "https://data.microbiomedata.org/data/1781_86101/img_MAGs/metabat-bins/bins.1.fa", + "type": "nmdc:DataObject", + "data_object_type": "Metagenome Bins" + }, + { + "_id": { + "$oid": "649b003f1ae706d7b5b16258" + }, + "id": "nmdc:818f5a47d1371295f9313909ea12eb50", + "name": "checkm_qa.out", + "description": "metabat2 bin checkm quality assessment result for gold:Gp0115663", + "file_size_bytes": 1141, + "url": "https://data.microbiomedata.org/data/1781_86101/img_MAGs/checkm_qa.out", + "type": "nmdc:DataObject", + "data_object_type": "CheckM Statistics" + }, + { + "_id": { + "$oid": "649b003f1ae706d7b5b16259" + }, + "id": "nmdc:a755bb87aded36aefbd8022506a793c7", + "name": "gold:Gp0115663.bins.2.fa", + "description": "metabat2 binned contig file for gold:Gp0115663", + "file_size_bytes": 2225340, + "url": "https://data.microbiomedata.org/data/1781_86101/img_MAGs/metabat-bins/bins.2.fa", + "type": "nmdc:DataObject", + "data_object_type": "Metagenome Bins" + }, + { + "_id": { + "$oid": "649b003f1ae706d7b5b1625a" + }, + "id": "nmdc:e0b7421514f976cb7ad8c343cf3077a9", + "name": "gold:Gp0115663.bins.3.fa", + "description": "metabat2 binned contig file for gold:Gp0115663", + "file_size_bytes": 288873, + "url": "https://data.microbiomedata.org/data/1781_86101/img_MAGs/metabat-bins/bins.3.fa", + "type": "nmdc:DataObject", + "data_object_type": "Metagenome Bins" + }, + { + "_id": { + "$oid": "649b00401ae706d7b5b16d91" + }, + "description": "KO TSV File for gold:Gp0115663", + "url": "https://data.microbiomedata.org/1781_86101/img_annotation/Ga0482264_ko.tsv", + "md5_checksum": "8d250650c90956edff8bafccc56fd630", + "file_size_bytes": 3385, + "id": "nmdc:8d250650c90956edff8bafccc56fd630", + "name": "gold:Gp0115663_KO TSV File", + "data_object_type": "Annotation KEGG Orthology", + "type": "nmdc:DataObject" + }, + { + "_id": { + "$oid": "649b00401ae706d7b5b16d94" + }, + "description": "Functional annotation GFF file for gold:Gp0115663", + "url": "https://data.microbiomedata.org/1781_86101/img_annotation/Ga0482264_functional_annotation.gff", + "md5_checksum": "b7e9c8d0bffdd13ace6f862a61fa87d2", + "file_size_bytes": 3385, + "id": "nmdc:b7e9c8d0bffdd13ace6f862a61fa87d2", + "name": "gold:Gp0115663_Functional annotation GFF file", + "data_object_type": "Functional Annotation GFF", + "type": "nmdc:DataObject" + }, + { + "_id": { + "$oid": "649b00401ae706d7b5b16d95" + }, + "description": "Protein FAA for gold:Gp0115663", + "url": "https://data.microbiomedata.org/1781_86101/img_annotation/Ga0482264_proteins.faa", + "md5_checksum": "754074d3bcade65aba2a6f8236619ab7", + "file_size_bytes": 3385, + "id": "nmdc:754074d3bcade65aba2a6f8236619ab7", + "name": "gold:Gp0115663_Protein FAA", + "data_object_type": "Annotation Amino Acid FASTA", + "type": "nmdc:DataObject" + }, + { + "_id": { + "$oid": "649b00401ae706d7b5b16d97" + }, + "description": "Structural annotation GFF file for gold:Gp0115663", + "url": "https://data.microbiomedata.org/1781_86101/img_annotation/Ga0482264_structural_annotation.gff", + "md5_checksum": "a4b4c623457aa10161d88a9ac4eef522", + "file_size_bytes": 3385, + "id": "nmdc:a4b4c623457aa10161d88a9ac4eef522", + "name": "gold:Gp0115663_Structural annotation GFF file", + "data_object_type": "Structural Annotation GFF", + "type": "nmdc:DataObject" + }, + { + "_id": { + "$oid": "649b00401ae706d7b5b16da8" + }, + "description": "EC TSV File for gold:Gp0115663", + "url": "https://data.microbiomedata.org/1781_86101/img_annotation/Ga0482264_ec.tsv", + "md5_checksum": "27319f58c616a07159e1fac12635bd4b", + "file_size_bytes": 3385, + "id": "nmdc:27319f58c616a07159e1fac12635bd4b", + "name": "gold:Gp0115663_EC TSV File", + "data_object_type": "Annotation Enzyme Commission", + "type": "nmdc:DataObject" } ], "dissolving_activity_set": [], @@ -944,6 +1162,224 @@ "md5_checksum": "df08913532a84681996a29d1a1c127b3", "id": "nmdc:df08913532a84681996a29d1a1c127b3", "file_size_bytes": 1559491 + }, + { + "_id": { + "$oid": "649b003c1ae706d7b5b14d6c" + }, + "description": "Assembled contigs fasta for gold:Gp0115666", + "url": "https://data.microbiomedata.org/data/1781_86096/assembly/assembly_contigs.fna", + "file_size_bytes": 62686167, + "type": "nmdc:DataObject", + "id": "nmdc:aa60f90793266081a0ba6d125fb06e55", + "name": "assembly_contigs.fna", + "data_object_type": "Assembly Contigs" + }, + { + "_id": { + "$oid": "649b003c1ae706d7b5b14d6d" + }, + "description": "Metagenome Contig Coverage Stats for gold:Gp0115666", + "url": "https://data.microbiomedata.org/data/1781_86096/assembly/mapping_stats.txt", + "file_size_bytes": 8596464, + "type": "nmdc:DataObject", + "id": "nmdc:0157a89bc2e56ad99bae7289b9df7fb6", + "name": "mapping_stats.txt", + "data_object_type": "Assembly Coverage Stats" + }, + { + "_id": { + "$oid": "649b003c1ae706d7b5b14d6e" + }, + "description": "Assembled scaffold fasta for gold:Gp0115666", + "url": "https://data.microbiomedata.org/data/1781_86096/assembly/assembly_scaffolds.fna", + "file_size_bytes": 62335089, + "type": "nmdc:DataObject", + "id": "nmdc:2dd25f896d7b6100a24987d1496e2646", + "name": "assembly_scaffolds.fna", + "data_object_type": "Assembly Scaffolds" + }, + { + "_id": { + "$oid": "649b003c1ae706d7b5b14d6f" + }, + "description": "Assembled AGP file for gold:Gp0115666", + "url": "https://data.microbiomedata.org/data/1781_86096/assembly/assembly.agp", + "file_size_bytes": 7382646, + "type": "nmdc:DataObject", + "id": "nmdc:c29f2a80b289985b57570884a5c92548", + "name": "assembly.agp", + "data_object_type": "Assembly AGP" + }, + { + "_id": { + "$oid": "649b003c1ae706d7b5b14d73" + }, + "description": "Metagenome Alignment BAM file for gold:Gp0115666", + "url": "https://data.microbiomedata.org/data/1781_86096/assembly/pairedMapped_sorted.bam", + "file_size_bytes": 1914805023, + "type": "nmdc:DataObject", + "id": "nmdc:1abeac4b3490b9baf8206f8df963a646", + "name": "pairedMapped_sorted.bam", + "data_object_type": "Assembly Coverage BAM" + }, + { + "_id": { + "$oid": "649b003d1ae706d7b5b159a5" + }, + "id": "nmdc:2e76bec484e1a06b2f84b1c230cd97b4", + "name": "1781_86096.json", + "description": "Gold:Gp0115666 ReadbasedAnalysis result JSON file", + "url": "https://data.microbiomedata.org/1781_86096/ReadbasedAnalysis/1781_86096.json", + "file_size_bytes": 3385, + "type": "nmdc:DataObject" + }, + { + "_id": { + "$oid": "649b003d1ae706d7b5b159a9" + }, + "id": "nmdc:d5ab2504c9505d6cb96b348b71f2efc5", + "name": "1781_86096.krona.html", + "description": "Gold:Gp0115666 KRONA plot HTML file", + "url": "https://data.microbiomedata.org/1781_86096/ReadbasedAnalysis/centrifuge/1781_86096.krona.html", + "file_size_bytes": 3385, + "data_object_type": "Centrifuge Krona Plot", + "type": "nmdc:DataObject" + }, + { + "_id": { + "$oid": "649b003f1ae706d7b5b16267" + }, + "id": "nmdc:5b80fde8feaaab94a1774e7d61863048", + "name": "bins.tooShort.fa", + "description": "tooShort (< 3kb) filtered contigs fasta file by metaBat2 for gold:Gp0115666", + "file_size_bytes": 45445156, + "url": "https://data.microbiomedata.org/data/1781_86096/img_MAGs/bins.tooShort.fa", + "type": "nmdc:DataObject" + }, + { + "_id": { + "$oid": "649b003f1ae706d7b5b1626b" + }, + "id": "nmdc:5c7b37ac5d11fd3ffcbcc63b2e15f627", + "name": "checkm_qa.out", + "description": "metabat2 bin checkm quality assessment result for gold:Gp0115666", + "file_size_bytes": 1190, + "url": "https://data.microbiomedata.org/data/1781_86096/img_MAGs/checkm_qa.out", + "type": "nmdc:DataObject", + "data_object_type": "CheckM Statistics" + }, + { + "_id": { + "$oid": "649b003f1ae706d7b5b1626d" + }, + "id": "nmdc:a4fab93f1102baf069e09b65cb13e87a", + "name": "gold:Gp0115666.bins.3.fa", + "description": "metabat2 binned contig file for gold:Gp0115666", + "file_size_bytes": 3971570, + "url": "https://data.microbiomedata.org/data/1781_86096/img_MAGs/metabat-bins/bins.3.fa", + "type": "nmdc:DataObject", + "data_object_type": "Metagenome Bins" + }, + { + "_id": { + "$oid": "649b003f1ae706d7b5b1626e" + }, + "id": "nmdc:88ef6e640707d816e9df8d751c31e71b", + "name": "gold:Gp0115666.bins.2.fa", + "description": "metabat2 binned contig file for gold:Gp0115666", + "file_size_bytes": 559859, + "url": "https://data.microbiomedata.org/data/1781_86096/img_MAGs/metabat-bins/bins.2.fa", + "type": "nmdc:DataObject", + "data_object_type": "Metagenome Bins" + }, + { + "_id": { + "$oid": "649b003f1ae706d7b5b16271" + }, + "id": "nmdc:ce2c968f1093b25da9ac4399291eede6", + "name": "bins.unbinned.fa", + "description": "unbinned fasta file from metabat2 for gold:Gp0115666", + "file_size_bytes": 11541386, + "url": "https://data.microbiomedata.org/data/1781_86096/img_MAGs/bins.unbinned.fa", + "type": "nmdc:DataObject" + }, + { + "_id": { + "$oid": "649b003f1ae706d7b5b16281" + }, + "id": "nmdc:ffdbeb92d859d6b7e828f3d6f8219e0b", + "name": "gold:Gp0115666.bins.1.fa", + "description": "metabat2 binned contig file for gold:Gp0115666", + "file_size_bytes": 346195, + "url": "https://data.microbiomedata.org/data/1781_86096/img_MAGs/metabat-bins/bins.1.fa", + "type": "nmdc:DataObject", + "data_object_type": "Metagenome Bins" + }, + { + "_id": { + "$oid": "649b00401ae706d7b5b16d7d" + }, + "description": "Functional annotation GFF file for gold:Gp0115666", + "url": "https://data.microbiomedata.org/1781_86096/img_annotation/Ga0482261_functional_annotation.gff", + "md5_checksum": "a1e8795537eca0522357d60045780ab3", + "file_size_bytes": 3385, + "id": "nmdc:a1e8795537eca0522357d60045780ab3", + "name": "gold:Gp0115666_Functional annotation GFF file", + "data_object_type": "Functional Annotation GFF", + "type": "nmdc:DataObject" + }, + { + "_id": { + "$oid": "649b00401ae706d7b5b16d81" + }, + "description": "Structural annotation GFF file for gold:Gp0115666", + "url": "https://data.microbiomedata.org/1781_86096/img_annotation/Ga0482261_structural_annotation.gff", + "md5_checksum": "654201c4699079bdd923dcff52881c07", + "file_size_bytes": 3385, + "id": "nmdc:654201c4699079bdd923dcff52881c07", + "name": "gold:Gp0115666_Structural annotation GFF file", + "data_object_type": "Structural Annotation GFF", + "type": "nmdc:DataObject" + }, + { + "_id": { + "$oid": "649b00401ae706d7b5b16d84" + }, + "description": "EC TSV File for gold:Gp0115666", + "url": "https://data.microbiomedata.org/1781_86096/img_annotation/Ga0482261_ec.tsv", + "md5_checksum": "4e3f389524497182aa3e8832aa7b373b", + "file_size_bytes": 3385, + "id": "nmdc:4e3f389524497182aa3e8832aa7b373b", + "name": "gold:Gp0115666_EC TSV File", + "data_object_type": "Annotation Enzyme Commission", + "type": "nmdc:DataObject" + }, + { + "_id": { + "$oid": "649b00401ae706d7b5b16d8a" + }, + "description": "KO TSV File for gold:Gp0115666", + "url": "https://data.microbiomedata.org/1781_86096/img_annotation/Ga0482261_ko.tsv", + "md5_checksum": "ab262feeaf856be190b60ea7c0a4c030", + "file_size_bytes": 3385, + "id": "nmdc:ab262feeaf856be190b60ea7c0a4c030", + "name": "gold:Gp0115666_KO TSV File", + "data_object_type": "Annotation KEGG Orthology", + "type": "nmdc:DataObject" + }, + { + "_id": { + "$oid": "649b00401ae706d7b5b16d8d" + }, + "description": "Protein FAA for gold:Gp0115666", + "url": "https://data.microbiomedata.org/1781_86096/img_annotation/Ga0482261_proteins.faa", + "md5_checksum": "70c8e0fc6e64b20e99a4c0f783014142", + "file_size_bytes": 3385, + "id": "nmdc:70c8e0fc6e64b20e99a4c0f783014142", + "name": "gold:Gp0115666_Protein FAA", + "data_object_type": "Annotation Amino Acid FASTA", + "type": "nmdc:DataObject" } ], "dissolving_activity_set": [], @@ -1522,90 +1958,332 @@ "md5_checksum": "3f4c7c98bb94687eb96382799c8626fe", "id": "nmdc:3f4c7c98bb94687eb96382799c8626fe", "file_size_bytes": 2145953 - } - ], - "dissolving_activity_set": [], - "functional_annotation_set": [], - "genome_feature_set": [], - "mags_activity_set": [ + }, { "_id": { - "$oid": "649b0052ec087f6bbab34736" + "$oid": "649b003c1ae706d7b5b14d77" }, - "has_input": [ - "nmdc:b2b862aede4f333acec79aac3afc7254", - "nmdc:f7a4bb0be4599b544360617190b45681", - "nmdc:7a861805138d425525f298c1790b58ed" - ], - "too_short_contig_num": 297764, - "part_of": [ - "nmdc:mga0n66h21" - ], - "binned_contig_num": 1669, - "git_url": "https://github.com/microbiomedata/metaG/releases/tag/0.1", - "has_output": [ - "nmdc:d41d8cd98f00b204e9800998ecf8427e", - "nmdc:8c05fc754583d51714bc1aa81396e59d", - "nmdc:9ef1be5df79aee7c64f2addc4bda6afa", - "nmdc:60db1474ee6a099c10e4fdc728420cf8", - "nmdc:5a36d8ba758ee510ab2be3e01fda3e0f", - "nmdc:3f4c7c98bb94687eb96382799c8626fe" - ], - "was_informed_by": "gold:Gp0115668", - "input_contig_num": 323261, - "id": "nmdc:0aec70779dcdcc1b577d7e372c0eca87", - "execution_resource": "NERSC-Cori", - "name": "MAGs Analysis Activity for nmdc:mga0n66h21", - "mags_list": [ - { - "number_of_contig": 1013, - "completeness": 12.29, - "bin_name": "bins.1", - "gene_count": 4188, - "bin_quality": "LQ", - "gtdbtk_species": "", - "gtdbtk_order": "", - "num_16s": 0, - "gtdbtk_family": "", - "gtdbtk_domain": "", - "contamination": 2.32, - "gtdbtk_class": "", - "gtdbtk_phylum": "", - "num_5s": 0, - "num_23s": 0, - "gtdbtk_genus": "", - "num_t_rna": 20 - }, - { - "number_of_contig": 599, - "completeness": 58.72, - "bin_name": "bins.2", - "gene_count": 2940, - "bin_quality": "LQ", - "gtdbtk_species": "", - "gtdbtk_order": "", - "num_16s": 1, - "gtdbtk_family": "", - "gtdbtk_domain": "", - "contamination": 12.95, - "gtdbtk_class": "", - "gtdbtk_phylum": "", - "num_5s": 0, - "num_23s": 1, - "gtdbtk_genus": "", - "num_t_rna": 25 - }, - { - "number_of_contig": 57, - "completeness": 4.0, - "bin_name": "bins.3", - "gene_count": 258, - "bin_quality": "LQ", - "gtdbtk_species": "", - "gtdbtk_order": "", - "num_16s": 0, - "gtdbtk_family": "", - "gtdbtk_domain": "", + "description": "Assembled contigs fasta for gold:Gp0115668", + "url": "https://data.microbiomedata.org/data/1781_86100/assembly/assembly_contigs.fna", + "file_size_bytes": 180872288, + "type": "nmdc:DataObject", + "id": "nmdc:0ce94528dc5ad4d5b62293d4d95c1e9e", + "name": "assembly_contigs.fna", + "data_object_type": "Assembly Contigs" + }, + { + "_id": { + "$oid": "649b003c1ae706d7b5b14d79" + }, + "description": "Metagenome Contig Coverage Stats for gold:Gp0115668", + "url": "https://data.microbiomedata.org/data/1781_86100/assembly/mapping_stats.txt", + "file_size_bytes": 24065993, + "type": "nmdc:DataObject", + "id": "nmdc:3d9a9633303a795133a0afbbe7541354", + "name": "mapping_stats.txt", + "data_object_type": "Assembly Coverage Stats" + }, + { + "_id": { + "$oid": "649b003c1ae706d7b5b14d7b" + }, + "description": "Metagenome Alignment BAM file for gold:Gp0115668", + "url": "https://data.microbiomedata.org/data/1781_86100/assembly/pairedMapped_sorted.bam", + "file_size_bytes": 2912328623, + "type": "nmdc:DataObject", + "id": "nmdc:0b3a3146c8e3d01fe0cbda4de3a58ff1", + "name": "pairedMapped_sorted.bam", + "data_object_type": "Assembly Coverage BAM" + }, + { + "_id": { + "$oid": "649b003c1ae706d7b5b14d7d" + }, + "description": "Assembled scaffold fasta for gold:Gp0115668", + "url": "https://data.microbiomedata.org/data/1781_86100/assembly/assembly_scaffolds.fna", + "file_size_bytes": 179900502, + "type": "nmdc:DataObject", + "id": "nmdc:aaab457bbc67e3a755340b9c94d15286", + "name": "assembly_scaffolds.fna", + "data_object_type": "Assembly Scaffolds" + }, + { + "_id": { + "$oid": "649b003c1ae706d7b5b14d82" + }, + "description": "Assembled AGP file for gold:Gp0115668", + "url": "https://data.microbiomedata.org/data/1781_86100/assembly/assembly.agp", + "file_size_bytes": 20866681, + "type": "nmdc:DataObject", + "id": "nmdc:327d188b5936d3c95c61bc9f2131da76", + "name": "assembly.agp", + "data_object_type": "Assembly AGP" + }, + { + "_id": { + "$oid": "649b003d1ae706d7b5b159a0" + }, + "id": "nmdc:b582f88ff691a520217093bc43cf2cbf", + "name": "1781_86100.krona.html", + "description": "Gold:Gp0115668 KRONA plot HTML file", + "url": "https://data.microbiomedata.org/1781_86100/ReadbasedAnalysis/centrifuge/1781_86100.krona.html", + "file_size_bytes": 3385, + "data_object_type": "Centrifuge Krona Plot", + "type": "nmdc:DataObject" + }, + { + "_id": { + "$oid": "649b003d1ae706d7b5b159ab" + }, + "id": "nmdc:34e913d729110bd83d9e44e130550f83", + "name": "1781_86100.json", + "description": "Gold:Gp0115668 ReadbasedAnalysis result JSON file", + "url": "https://data.microbiomedata.org/1781_86100/ReadbasedAnalysis/1781_86100.json", + "file_size_bytes": 3385, + "type": "nmdc:DataObject" + }, + { + "_id": { + "$oid": "649b003f1ae706d7b5b16272" + }, + "id": "nmdc:328968741e1f9405e81f711e4e419c60", + "name": "bins.unbinned.fa", + "description": "unbinned fasta file from metabat2 for gold:Gp0115668", + "file_size_bytes": 39575271, + "url": "https://data.microbiomedata.org/data/1781_86100/img_MAGs/bins.unbinned.fa", + "type": "nmdc:DataObject" + }, + { + "_id": { + "$oid": "649b003f1ae706d7b5b16273" + }, + "id": "nmdc:af5ce540e803059bb726b9d73a794dc2", + "name": "bins.tooShort.fa", + "description": "tooShort (< 3kb) filtered contigs fasta file by metaBat2 for gold:Gp0115668", + "file_size_bytes": 132519280, + "url": "https://data.microbiomedata.org/data/1781_86100/img_MAGs/bins.tooShort.fa", + "type": "nmdc:DataObject" + }, + { + "_id": { + "$oid": "649b003f1ae706d7b5b16275" + }, + "id": "nmdc:f61532f16df6a916b9ecc308a8d555a2", + "name": "gold:Gp0115668.bins.5.fa", + "description": "metabat2 binned contig file for gold:Gp0115668", + "file_size_bytes": 957388, + "url": "https://data.microbiomedata.org/data/1781_86100/img_MAGs/metabat-bins/bins.5.fa", + "type": "nmdc:DataObject", + "data_object_type": "Metagenome Bins" + }, + { + "_id": { + "$oid": "649b003f1ae706d7b5b16276" + }, + "id": "nmdc:40b27b33a28b3b16da74479fb8516aff", + "name": "checkm_qa.out", + "description": "metabat2 bin checkm quality assessment result for gold:Gp0115668", + "file_size_bytes": 1404, + "url": "https://data.microbiomedata.org/data/1781_86100/img_MAGs/checkm_qa.out", + "type": "nmdc:DataObject", + "data_object_type": "CheckM Statistics" + }, + { + "_id": { + "$oid": "649b003f1ae706d7b5b16277" + }, + "id": "nmdc:e0d56b325b27af3c1fff5d603e5c5db1", + "name": "gold:Gp0115668.bins.4.fa", + "description": "metabat2 binned contig file for gold:Gp0115668", + "file_size_bytes": 246415, + "url": "https://data.microbiomedata.org/data/1781_86100/img_MAGs/metabat-bins/bins.4.fa", + "type": "nmdc:DataObject", + "data_object_type": "Metagenome Bins" + }, + { + "_id": { + "$oid": "649b003f1ae706d7b5b16279" + }, + "id": "nmdc:5dc85b63c568dfee4fabacf43b5ec75c", + "name": "gold:Gp0115668.bins.2.fa", + "description": "metabat2 binned contig file for gold:Gp0115668", + "file_size_bytes": 1076024, + "url": "https://data.microbiomedata.org/data/1781_86100/img_MAGs/metabat-bins/bins.2.fa", + "type": "nmdc:DataObject", + "data_object_type": "Metagenome Bins" + }, + { + "_id": { + "$oid": "649b003f1ae706d7b5b1627a" + }, + "id": "nmdc:39ac1fcf35046edc399b1b64faa56ca0", + "name": "gold:Gp0115668.bins.3.fa", + "description": "metabat2 binned contig file for gold:Gp0115668", + "file_size_bytes": 1385677, + "url": "https://data.microbiomedata.org/data/1781_86100/img_MAGs/metabat-bins/bins.3.fa", + "type": "nmdc:DataObject", + "data_object_type": "Metagenome Bins" + }, + { + "_id": { + "$oid": "649b003f1ae706d7b5b16280" + }, + "id": "nmdc:81a69601c9bf2a04762f30b38fd796ea", + "name": "gold:Gp0115668.bins.1.fa", + "description": "metabat2 binned contig file for gold:Gp0115668", + "file_size_bytes": 2654069, + "url": "https://data.microbiomedata.org/data/1781_86100/img_MAGs/metabat-bins/bins.1.fa", + "type": "nmdc:DataObject", + "data_object_type": "Metagenome Bins" + }, + { + "_id": { + "$oid": "649b00401ae706d7b5b16d8e" + }, + "description": "Protein FAA for gold:Gp0115668", + "url": "https://data.microbiomedata.org/1781_86100/img_annotation/Ga0482259_proteins.faa", + "md5_checksum": "f97c44951275f8b68fa94ded40fda756", + "file_size_bytes": 3385, + "id": "nmdc:f97c44951275f8b68fa94ded40fda756", + "name": "gold:Gp0115668_Protein FAA", + "data_object_type": "Annotation Amino Acid FASTA", + "type": "nmdc:DataObject" + }, + { + "_id": { + "$oid": "649b00401ae706d7b5b16d8f" + }, + "description": "Structural annotation GFF file for gold:Gp0115668", + "url": "https://data.microbiomedata.org/1781_86100/img_annotation/Ga0482259_structural_annotation.gff", + "md5_checksum": "b4764f173896dcb134d7c94c1ee13ca3", + "file_size_bytes": 3385, + "id": "nmdc:b4764f173896dcb134d7c94c1ee13ca3", + "name": "gold:Gp0115668_Structural annotation GFF file", + "data_object_type": "Structural Annotation GFF", + "type": "nmdc:DataObject" + }, + { + "_id": { + "$oid": "649b00401ae706d7b5b16d90" + }, + "description": "Functional annotation GFF file for gold:Gp0115668", + "url": "https://data.microbiomedata.org/1781_86100/img_annotation/Ga0482259_functional_annotation.gff", + "md5_checksum": "c57d28f7dd791aab5c4caee00b247ef9", + "file_size_bytes": 3385, + "id": "nmdc:c57d28f7dd791aab5c4caee00b247ef9", + "name": "gold:Gp0115668_Functional annotation GFF file", + "data_object_type": "Functional Annotation GFF", + "type": "nmdc:DataObject" + }, + { + "_id": { + "$oid": "649b00401ae706d7b5b16da6" + }, + "description": "KO TSV File for gold:Gp0115668", + "url": "https://data.microbiomedata.org/1781_86100/img_annotation/Ga0482259_ko.tsv", + "md5_checksum": "dbd78725415f5f8e80f590c3588a1c60", + "file_size_bytes": 3385, + "id": "nmdc:dbd78725415f5f8e80f590c3588a1c60", + "name": "gold:Gp0115668_KO TSV File", + "data_object_type": "Annotation KEGG Orthology", + "type": "nmdc:DataObject" + }, + { + "_id": { + "$oid": "649b00401ae706d7b5b16daa" + }, + "description": "EC TSV File for gold:Gp0115668", + "url": "https://data.microbiomedata.org/1781_86100/img_annotation/Ga0482259_ec.tsv", + "md5_checksum": "bcbae14f9733da2b512b5f5b6c8fcb98", + "file_size_bytes": 3385, + "id": "nmdc:bcbae14f9733da2b512b5f5b6c8fcb98", + "name": "gold:Gp0115668_EC TSV File", + "data_object_type": "Annotation Enzyme Commission", + "type": "nmdc:DataObject" + } + ], + "dissolving_activity_set": [], + "functional_annotation_set": [], + "genome_feature_set": [], + "mags_activity_set": [ + { + "_id": { + "$oid": "649b0052ec087f6bbab34736" + }, + "has_input": [ + "nmdc:b2b862aede4f333acec79aac3afc7254", + "nmdc:f7a4bb0be4599b544360617190b45681", + "nmdc:7a861805138d425525f298c1790b58ed" + ], + "too_short_contig_num": 297764, + "part_of": [ + "nmdc:mga0n66h21" + ], + "binned_contig_num": 1669, + "git_url": "https://github.com/microbiomedata/metaG/releases/tag/0.1", + "has_output": [ + "nmdc:d41d8cd98f00b204e9800998ecf8427e", + "nmdc:8c05fc754583d51714bc1aa81396e59d", + "nmdc:9ef1be5df79aee7c64f2addc4bda6afa", + "nmdc:60db1474ee6a099c10e4fdc728420cf8", + "nmdc:5a36d8ba758ee510ab2be3e01fda3e0f", + "nmdc:3f4c7c98bb94687eb96382799c8626fe" + ], + "was_informed_by": "gold:Gp0115668", + "input_contig_num": 323261, + "id": "nmdc:0aec70779dcdcc1b577d7e372c0eca87", + "execution_resource": "NERSC-Cori", + "name": "MAGs Analysis Activity for nmdc:mga0n66h21", + "mags_list": [ + { + "number_of_contig": 1013, + "completeness": 12.29, + "bin_name": "bins.1", + "gene_count": 4188, + "bin_quality": "LQ", + "gtdbtk_species": "", + "gtdbtk_order": "", + "num_16s": 0, + "gtdbtk_family": "", + "gtdbtk_domain": "", + "contamination": 2.32, + "gtdbtk_class": "", + "gtdbtk_phylum": "", + "num_5s": 0, + "num_23s": 0, + "gtdbtk_genus": "", + "num_t_rna": 20 + }, + { + "number_of_contig": 599, + "completeness": 58.72, + "bin_name": "bins.2", + "gene_count": 2940, + "bin_quality": "LQ", + "gtdbtk_species": "", + "gtdbtk_order": "", + "num_16s": 1, + "gtdbtk_family": "", + "gtdbtk_domain": "", + "contamination": 12.95, + "gtdbtk_class": "", + "gtdbtk_phylum": "", + "num_5s": 0, + "num_23s": 1, + "gtdbtk_genus": "", + "num_t_rna": 25 + }, + { + "number_of_contig": 57, + "completeness": 4.0, + "bin_name": "bins.3", + "gene_count": 258, + "bin_quality": "LQ", + "gtdbtk_species": "", + "gtdbtk_order": "", + "num_16s": 0, + "gtdbtk_family": "", + "gtdbtk_domain": "", "contamination": 0.03, "gtdbtk_class": "", "gtdbtk_phylum": "", @@ -2122,6 +2800,391 @@ "md5_checksum": "9250ad41cb19e04a6002e62bda38bbfb", "id": "nmdc:9250ad41cb19e04a6002e62bda38bbfb", "file_size_bytes": 1649649 + }, + { + "_id": { + "$oid": "649b003d1ae706d7b5b14da7" + }, + "description": "Metagenome Contig Coverage Stats for gold:Gp0115679", + "url": "https://data.microbiomedata.org/data/1781_86105/assembly/mapping_stats.txt", + "file_size_bytes": 39709915, + "type": "nmdc:DataObject", + "id": "nmdc:eb7b565580c8a81f8c674ce87a7c07c3", + "name": "mapping_stats.txt", + "data_object_type": "Assembly Coverage Stats" + }, + { + "_id": { + "$oid": "649b003d1ae706d7b5b14da9" + }, + "description": "Metagenome Alignment BAM file for gold:Gp0115679", + "url": "https://data.microbiomedata.org/data/1781_86105/assembly/pairedMapped_sorted.bam", + "file_size_bytes": 6165329815, + "type": "nmdc:DataObject", + "id": "nmdc:11956fa2a6c996aedac70f779222570f", + "name": "pairedMapped_sorted.bam", + "data_object_type": "Assembly Coverage BAM" + }, + { + "_id": { + "$oid": "649b003d1ae706d7b5b14dab" + }, + "description": "Assembled scaffold fasta for gold:Gp0115679", + "url": "https://data.microbiomedata.org/data/1781_86105/assembly/assembly_scaffolds.fna", + "file_size_bytes": 270701949, + "type": "nmdc:DataObject", + "id": "nmdc:c8a6971a9982af6e8a054dee6d1cb78d", + "name": "assembly_scaffolds.fna", + "data_object_type": "Assembly Scaffolds" + }, + { + "_id": { + "$oid": "649b003d1ae706d7b5b14db3" + }, + "description": "Assembled contigs fasta for gold:Gp0115679", + "url": "https://data.microbiomedata.org/data/1781_86105/assembly/assembly_contigs.fna", + "file_size_bytes": 272371965, + "type": "nmdc:DataObject", + "id": "nmdc:4fb2f3e8ebd99cea1e797e248b2e5c1d", + "name": "assembly_contigs.fna", + "data_object_type": "Assembly Contigs" + }, + { + "_id": { + "$oid": "649b003d1ae706d7b5b14dbd" + }, + "description": "Assembled AGP file for gold:Gp0115679", + "url": "https://data.microbiomedata.org/data/1781_86105/assembly/assembly.agp", + "file_size_bytes": 34847488, + "type": "nmdc:DataObject", + "id": "nmdc:a85f992644d46cb23475ac9850f4e864", + "name": "assembly.agp", + "data_object_type": "Assembly AGP" + }, + { + "_id": { + "$oid": "649b003d1ae706d7b5b15a0e" + }, + "id": "nmdc:ed54f8af3521aae47d2757852695a188", + "name": "1781_86105.json", + "description": "Gold:Gp0115679 ReadbasedAnalysis result JSON file", + "url": "https://data.microbiomedata.org/1781_86105/ReadbasedAnalysis/1781_86105.json", + "file_size_bytes": 3385, + "type": "nmdc:DataObject" + }, + { + "_id": { + "$oid": "649b003d1ae706d7b5b15a10" + }, + "id": "nmdc:7ca0d8ecd80292bc9bea1862c755a2f8", + "name": "1781_86105.krona.html", + "description": "Gold:Gp0115679 KRONA plot HTML file", + "url": "https://data.microbiomedata.org/1781_86105/ReadbasedAnalysis/centrifuge/1781_86105.krona.html", + "file_size_bytes": 3385, + "data_object_type": "Centrifuge Krona Plot", + "type": "nmdc:DataObject" + }, + { + "_id": { + "$oid": "649b003f1ae706d7b5b162ed" + }, + "id": "nmdc:6ef4477214fc698cd494f3e516cdda23", + "name": "bins.tooShort.fa", + "description": "tooShort (< 3kb) filtered contigs fasta file by metaBat2 for gold:Gp0115679", + "file_size_bytes": 208557497, + "url": "https://data.microbiomedata.org/data/1781_86105/img_MAGs/bins.tooShort.fa", + "type": "nmdc:DataObject" + }, + { + "_id": { + "$oid": "649b003f1ae706d7b5b162ef" + }, + "id": "nmdc:f81a3c77048a277c28b110d4797f48d5", + "name": "checkm_qa.out", + "description": "metabat2 bin checkm quality assessment result for gold:Gp0115679", + "file_size_bytes": 3400, + "url": "https://data.microbiomedata.org/data/1781_86105/img_MAGs/checkm_qa.out", + "type": "nmdc:DataObject", + "data_object_type": "CheckM Statistics" + }, + { + "_id": { + "$oid": "649b003f1ae706d7b5b162f0" + }, + "id": "nmdc:e3396f34bc4bcc83c4b43bbd1f698450", + "name": "bins.unbinned.fa", + "description": "unbinned fasta file from metabat2 for gold:Gp0115679", + "file_size_bytes": 51045792, + "url": "https://data.microbiomedata.org/data/1781_86105/img_MAGs/bins.unbinned.fa", + "type": "nmdc:DataObject" + }, + { + "_id": { + "$oid": "649b003f1ae706d7b5b162f1" + }, + "id": "nmdc:84cac08cd46c06525b4001424027fd60", + "name": "gtdbtk.bac120.summary.tsv", + "description": "gtdbtk bacterial assignment result summary table for gold:Gp0115679", + "file_size_bytes": 830, + "url": "https://data.microbiomedata.org/data/1781_86105/img_MAGs/gtdbtk_output/classify/gtdbtk.bac120.summary.tsv", + "type": "nmdc:DataObject" + }, + { + "_id": { + "$oid": "649b003f1ae706d7b5b162f3" + }, + "id": "nmdc:45cce58fd37ad46815381000dd21470d", + "name": "gold:Gp0115679.bins.9.fa", + "description": "metabat2 binned contig file for gold:Gp0115679", + "file_size_bytes": 657176, + "url": "https://data.microbiomedata.org/data/1781_86105/img_MAGs/metabat-bins/bins.9.fa", + "type": "nmdc:DataObject", + "data_object_type": "Metagenome Bins" + }, + { + "_id": { + "$oid": "649b003f1ae706d7b5b162f4" + }, + "id": "nmdc:db508aa3c84853ff9e2c156d7afcbd7b", + "name": "gold:Gp0115679.bins.3.fa", + "description": "metabat2 binned contig file for gold:Gp0115679", + "file_size_bytes": 358418, + "url": "https://data.microbiomedata.org/data/1781_86105/img_MAGs/metabat-bins/bins.3.fa", + "type": "nmdc:DataObject", + "data_object_type": "Metagenome Bins" + }, + { + "_id": { + "$oid": "649b003f1ae706d7b5b162f5" + }, + "id": "nmdc:219d9761a4dbe9e7374c659a03e8ecf0", + "name": "gold:Gp0115679.bins.6.fa", + "description": "metabat2 binned contig file for gold:Gp0115679", + "file_size_bytes": 921488, + "url": "https://data.microbiomedata.org/data/1781_86105/img_MAGs/metabat-bins/bins.6.fa", + "type": "nmdc:DataObject", + "data_object_type": "Metagenome Bins" + }, + { + "_id": { + "$oid": "649b003f1ae706d7b5b162f6" + }, + "id": "nmdc:d0582754ac551686e46730419ec9d047", + "name": "gold:Gp0115679.bins.5.fa", + "description": "metabat2 binned contig file for gold:Gp0115679", + "file_size_bytes": 374792, + "url": "https://data.microbiomedata.org/data/1781_86105/img_MAGs/metabat-bins/bins.5.fa", + "type": "nmdc:DataObject", + "data_object_type": "Metagenome Bins" + }, + { + "_id": { + "$oid": "649b003f1ae706d7b5b162f7" + }, + "id": "nmdc:40152478dc669bc63bbbd4bda0d0c5df", + "name": "gold:Gp0115679.bins.11.fa", + "description": "metabat2 binned contig file for gold:Gp0115679", + "file_size_bytes": 332863, + "url": "https://data.microbiomedata.org/data/1781_86105/img_MAGs/metabat-bins/bins.11.fa", + "type": "nmdc:DataObject", + "data_object_type": "Metagenome Bins" + }, + { + "_id": { + "$oid": "649b003f1ae706d7b5b162f8" + }, + "id": "nmdc:4d9e7c6ffa3fa24c9b9fdeb0e722c57f", + "name": "gold:Gp0115679.bins.12.fa", + "description": "metabat2 binned contig file for gold:Gp0115679", + "file_size_bytes": 658843, + "url": "https://data.microbiomedata.org/data/1781_86105/img_MAGs/metabat-bins/bins.12.fa", + "type": "nmdc:DataObject", + "data_object_type": "Metagenome Bins" + }, + { + "_id": { + "$oid": "649b003f1ae706d7b5b162f9" + }, + "id": "nmdc:df7fd9d79f734d8e02589a0bae44a810", + "name": "gold:Gp0115679.bins.10.fa", + "description": "metabat2 binned contig file for gold:Gp0115679", + "file_size_bytes": 325713, + "url": "https://data.microbiomedata.org/data/1781_86105/img_MAGs/metabat-bins/bins.10.fa", + "type": "nmdc:DataObject", + "data_object_type": "Metagenome Bins" + }, + { + "_id": { + "$oid": "649b003f1ae706d7b5b162fa" + }, + "id": "nmdc:abe52bd40df93d12453b1f543a782b2a", + "name": "gold:Gp0115679.bins.4.fa", + "description": "metabat2 binned contig file for gold:Gp0115679", + "file_size_bytes": 324504, + "url": "https://data.microbiomedata.org/data/1781_86105/img_MAGs/metabat-bins/bins.4.fa", + "type": "nmdc:DataObject", + "data_object_type": "Metagenome Bins" + }, + { + "_id": { + "$oid": "649b003f1ae706d7b5b162fb" + }, + "id": "nmdc:8e65f7abf4dfe56dce5ea4d42dc83b12", + "name": "gold:Gp0115679.bins.7.fa", + "description": "metabat2 binned contig file for gold:Gp0115679", + "file_size_bytes": 430938, + "url": "https://data.microbiomedata.org/data/1781_86105/img_MAGs/metabat-bins/bins.7.fa", + "type": "nmdc:DataObject", + "data_object_type": "Metagenome Bins" + }, + { + "_id": { + "$oid": "649b003f1ae706d7b5b162fc" + }, + "id": "nmdc:aaac58966e74cbc7e8d0ba7048a8691f", + "name": "gold:Gp0115679.bins.8.fa", + "description": "metabat2 binned contig file for gold:Gp0115679", + "file_size_bytes": 378596, + "url": "https://data.microbiomedata.org/data/1781_86105/img_MAGs/metabat-bins/bins.8.fa", + "type": "nmdc:DataObject", + "data_object_type": "Metagenome Bins" + }, + { + "_id": { + "$oid": "649b003f1ae706d7b5b162fd" + }, + "id": "nmdc:d4646cc3223b1d9d13c012a6f0dd98e3", + "name": "gold:Gp0115679.bins.13.fa", + "description": "metabat2 binned contig file for gold:Gp0115679", + "file_size_bytes": 209262, + "url": "https://data.microbiomedata.org/data/1781_86105/img_MAGs/metabat-bins/bins.13.fa", + "type": "nmdc:DataObject", + "data_object_type": "Metagenome Bins" + }, + { + "_id": { + "$oid": "649b003f1ae706d7b5b162fe" + }, + "id": "nmdc:4a6a269ffe428009827fe87d5fc82555", + "name": "gold:Gp0115679.bins.2.fa", + "description": "metabat2 binned contig file for gold:Gp0115679", + "file_size_bytes": 238341, + "url": "https://data.microbiomedata.org/data/1781_86105/img_MAGs/metabat-bins/bins.2.fa", + "type": "nmdc:DataObject", + "data_object_type": "Metagenome Bins" + }, + { + "_id": { + "$oid": "649b003f1ae706d7b5b162ff" + }, + "id": "nmdc:5f136581b45cbac4ffcd9bd61c3b2c0c", + "name": "gold:Gp0115679.bins.16.fa", + "description": "metabat2 binned contig file for gold:Gp0115679", + "file_size_bytes": 645317, + "url": "https://data.microbiomedata.org/data/1781_86105/img_MAGs/metabat-bins/bins.16.fa", + "type": "nmdc:DataObject", + "data_object_type": "Metagenome Bins" + }, + { + "_id": { + "$oid": "649b003f1ae706d7b5b16300" + }, + "id": "nmdc:c42cd75d1b3e947bb6d82fcca7d4d1a2", + "name": "gold:Gp0115679.bins.1.fa", + "description": "metabat2 binned contig file for gold:Gp0115679", + "file_size_bytes": 2098951, + "url": "https://data.microbiomedata.org/data/1781_86105/img_MAGs/metabat-bins/bins.1.fa", + "type": "nmdc:DataObject", + "data_object_type": "Metagenome Bins" + }, + { + "_id": { + "$oid": "649b003f1ae706d7b5b16301" + }, + "id": "nmdc:0922541390d01fa0e51bd93665be8913", + "name": "gold:Gp0115679.bins.14.fa", + "description": "metabat2 binned contig file for gold:Gp0115679", + "file_size_bytes": 380540, + "url": "https://data.microbiomedata.org/data/1781_86105/img_MAGs/metabat-bins/bins.14.fa", + "type": "nmdc:DataObject", + "data_object_type": "Metagenome Bins" + }, + { + "_id": { + "$oid": "649b003f1ae706d7b5b16306" + }, + "id": "nmdc:c5df3f066326803dbeafb15a36aa0d93", + "name": "gold:Gp0115679.bins.15.fa", + "description": "metabat2 binned contig file for gold:Gp0115679", + "file_size_bytes": 299202, + "url": "https://data.microbiomedata.org/data/1781_86105/img_MAGs/metabat-bins/bins.15.fa", + "type": "nmdc:DataObject", + "data_object_type": "Metagenome Bins" + }, + { + "_id": { + "$oid": "649b00401ae706d7b5b16dad" + }, + "description": "Functional annotation GFF file for gold:Gp0115679", + "url": "https://data.microbiomedata.org/1781_86105/img_annotation/Ga0482248_functional_annotation.gff", + "md5_checksum": "b8c895face8e8e77bbfc7163c7eb7850", + "file_size_bytes": 3385, + "id": "nmdc:b8c895face8e8e77bbfc7163c7eb7850", + "name": "gold:Gp0115679_Functional annotation GFF file", + "data_object_type": "Functional Annotation GFF", + "type": "nmdc:DataObject" + }, + { + "_id": { + "$oid": "649b00401ae706d7b5b16dae" + }, + "description": "KO TSV File for gold:Gp0115679", + "url": "https://data.microbiomedata.org/1781_86105/img_annotation/Ga0482248_ko.tsv", + "md5_checksum": "ee75eaed19b9a259e0e70e20a53f7fba", + "file_size_bytes": 3385, + "id": "nmdc:ee75eaed19b9a259e0e70e20a53f7fba", + "name": "gold:Gp0115679_KO TSV File", + "data_object_type": "Annotation KEGG Orthology", + "type": "nmdc:DataObject" + }, + { + "_id": { + "$oid": "649b00401ae706d7b5b16daf" + }, + "description": "EC TSV File for gold:Gp0115679", + "url": "https://data.microbiomedata.org/1781_86105/img_annotation/Ga0482248_ec.tsv", + "md5_checksum": "aa5fa1b83592459bd3e742be4949d0b1", + "file_size_bytes": 3385, + "id": "nmdc:aa5fa1b83592459bd3e742be4949d0b1", + "name": "gold:Gp0115679_EC TSV File", + "data_object_type": "Annotation Enzyme Commission", + "type": "nmdc:DataObject" + }, + { + "_id": { + "$oid": "649b00401ae706d7b5b16db1" + }, + "description": "Protein FAA for gold:Gp0115679", + "url": "https://data.microbiomedata.org/1781_86105/img_annotation/Ga0482248_proteins.faa", + "md5_checksum": "21f3d777493f87403b60a4a1b3dd2f1b", + "file_size_bytes": 3385, + "id": "nmdc:21f3d777493f87403b60a4a1b3dd2f1b", + "name": "gold:Gp0115679_Protein FAA", + "data_object_type": "Annotation Amino Acid FASTA", + "type": "nmdc:DataObject" + }, + { + "_id": { + "$oid": "649b00401ae706d7b5b16db6" + }, + "description": "Structural annotation GFF file for gold:Gp0115679", + "url": "https://data.microbiomedata.org/1781_86105/img_annotation/Ga0482248_structural_annotation.gff", + "md5_checksum": "b63b42c7892b4a14e5661bca5bfa2419", + "file_size_bytes": 3385, + "id": "nmdc:b63b42c7892b4a14e5661bca5bfa2419", + "name": "gold:Gp0115679_Structural annotation GFF file", + "data_object_type": "Structural Annotation GFF", + "type": "nmdc:DataObject" } ], "dissolving_activity_set": [], @@ -2849,12 +3912,301 @@ "file_size_bytes": 1462611 }, { - "name": "Gp0115667_metabat2 bins", - "description": "metabat2 bins for Gp0115667", - "url": "https://data.microbiomedata.org/data/nmdc:mga0n0je44/MAGs/nmdc_mga0n0je44_metabat_bin.zip", - "md5_checksum": "733e798989606c802b3bbfc952a38841", - "id": "nmdc:733e798989606c802b3bbfc952a38841", - "file_size_bytes": 334014 + "name": "Gp0115667_metabat2 bins", + "description": "metabat2 bins for Gp0115667", + "url": "https://data.microbiomedata.org/data/nmdc:mga0n0je44/MAGs/nmdc_mga0n0je44_metabat_bin.zip", + "md5_checksum": "733e798989606c802b3bbfc952a38841", + "id": "nmdc:733e798989606c802b3bbfc952a38841", + "file_size_bytes": 334014 + }, + { + "_id": { + "$oid": "649b003c1ae706d7b5b14d71" + }, + "description": "Metagenome Contig Coverage Stats for gold:Gp0115667", + "url": "https://data.microbiomedata.org/data/1781_86098/assembly/mapping_stats.txt", + "file_size_bytes": 8608508, + "type": "nmdc:DataObject", + "id": "nmdc:65454371fa1809684cc19c5c1cb49c4c", + "name": "mapping_stats.txt", + "data_object_type": "Assembly Coverage Stats" + }, + { + "_id": { + "$oid": "649b003c1ae706d7b5b14d72" + }, + "description": "Assembled contigs fasta for gold:Gp0115667", + "url": "https://data.microbiomedata.org/data/1781_86098/assembly/assembly_contigs.fna", + "file_size_bytes": 62345419, + "type": "nmdc:DataObject", + "id": "nmdc:28a8512eff8b81cebce0614fe5ed18a0", + "name": "assembly_contigs.fna", + "data_object_type": "Assembly Contigs" + }, + { + "_id": { + "$oid": "649b003c1ae706d7b5b14d74" + }, + "description": "Assembled AGP file for gold:Gp0115667", + "url": "https://data.microbiomedata.org/data/1781_86098/assembly/assembly.agp", + "file_size_bytes": 7346593, + "type": "nmdc:DataObject", + "id": "nmdc:7ce2c4d4d2cbf019fd43453b6fb54fac", + "name": "assembly.agp", + "data_object_type": "Assembly AGP" + }, + { + "_id": { + "$oid": "649b003c1ae706d7b5b14d75" + }, + "description": "Assembled scaffold fasta for gold:Gp0115667", + "url": "https://data.microbiomedata.org/data/1781_86098/assembly/assembly_scaffolds.fna", + "file_size_bytes": 61997325, + "type": "nmdc:DataObject", + "id": "nmdc:5c6200f0a56a1ec503ac0245b1d2cbdf", + "name": "assembly_scaffolds.fna", + "data_object_type": "Assembly Scaffolds" + }, + { + "_id": { + "$oid": "649b003c1ae706d7b5b14d76" + }, + "description": "Metagenome Alignment BAM file for gold:Gp0115667", + "url": "https://data.microbiomedata.org/data/1781_86098/assembly/pairedMapped_sorted.bam", + "file_size_bytes": 1747730642, + "type": "nmdc:DataObject", + "id": "nmdc:d07858a6b7932797c1e4e8b019f82131", + "name": "pairedMapped_sorted.bam", + "data_object_type": "Assembly Coverage BAM" + }, + { + "_id": { + "$oid": "649b003d1ae706d7b5b15992" + }, + "id": "nmdc:257cca2e47a0917e48596800a3f9f161", + "name": "1781_86098.krona.html", + "description": "Gold:Gp0115667 KRONA plot HTML file", + "url": "https://data.microbiomedata.org/1781_86098/ReadbasedAnalysis/centrifuge/1781_86098.krona.html", + "file_size_bytes": 3385, + "data_object_type": "Centrifuge Krona Plot", + "type": "nmdc:DataObject" + }, + { + "_id": { + "$oid": "649b003d1ae706d7b5b15999" + }, + "id": "nmdc:734cf235a0ede4b50b75488ee5fe893a", + "name": "1781_86098.json", + "description": "Gold:Gp0115667 ReadbasedAnalysis result JSON file", + "url": "https://data.microbiomedata.org/1781_86098/ReadbasedAnalysis/1781_86098.json", + "file_size_bytes": 3385, + "type": "nmdc:DataObject" + }, + { + "_id": { + "$oid": "649b003f1ae706d7b5b16270" + }, + "id": "nmdc:1e34d5f7bf6a095e74dc5b0ba743c6c4", + "name": "bins.unbinned.fa", + "description": "unbinned fasta file from metabat2 for gold:Gp0115667", + "file_size_bytes": 11386423, + "url": "https://data.microbiomedata.org/data/1781_86098/img_MAGs/bins.unbinned.fa", + "type": "nmdc:DataObject" + }, + { + "_id": { + "$oid": "649b003f1ae706d7b5b16274" + }, + "id": "nmdc:12c891d4c33195700fbf605402639c77", + "name": "gold:Gp0115667.bins.3.fa", + "description": "metabat2 binned contig file for gold:Gp0115667", + "file_size_bytes": 211939, + "url": "https://data.microbiomedata.org/data/1781_86098/img_MAGs/metabat-bins/bins.3.fa", + "type": "nmdc:DataObject", + "data_object_type": "Metagenome Bins" + }, + { + "_id": { + "$oid": "649b003f1ae706d7b5b16278" + }, + "id": "nmdc:6ac802499984a5da4fc7aa2cd17af998", + "name": "checkm_qa.out", + "description": "metabat2 bin checkm quality assessment result for gold:Gp0115667", + "file_size_bytes": 2040, + "url": "https://data.microbiomedata.org/data/1781_86098/img_MAGs/checkm_qa.out", + "type": "nmdc:DataObject", + "data_object_type": "CheckM Statistics" + }, + { + "_id": { + "$oid": "649b003f1ae706d7b5b1628a" + }, + "id": "nmdc:3fc44ed589c47ca3a915e818dc9ef957", + "name": "gold:Gp0115667.bins.4.fa", + "description": "metabat2 binned contig file for gold:Gp0115667", + "file_size_bytes": 229517, + "url": "https://data.microbiomedata.org/data/1781_86098/img_MAGs/metabat-bins/bins.4.fa", + "type": "nmdc:DataObject", + "data_object_type": "Metagenome Bins" + }, + { + "_id": { + "$oid": "649b003f1ae706d7b5b1628c" + }, + "id": "nmdc:ecf40d29af87b508f4128c7520dbddff", + "name": "gold:Gp0115667.bins.2.fa", + "description": "metabat2 binned contig file for gold:Gp0115667", + "file_size_bytes": 288428, + "url": "https://data.microbiomedata.org/data/1781_86098/img_MAGs/metabat-bins/bins.2.fa", + "type": "nmdc:DataObject", + "data_object_type": "Metagenome Bins" + }, + { + "_id": { + "$oid": "649b003f1ae706d7b5b1628f" + }, + "id": "nmdc:3d69f4d7f7bdcabac2d974bf0436cba0", + "name": "gold:Gp0115667.bins.7.fa", + "description": "metabat2 binned contig file for gold:Gp0115667", + "file_size_bytes": 332716, + "url": "https://data.microbiomedata.org/data/1781_86098/img_MAGs/metabat-bins/bins.7.fa", + "type": "nmdc:DataObject", + "data_object_type": "Metagenome Bins" + }, + { + "_id": { + "$oid": "649b003f1ae706d7b5b16293" + }, + "id": "nmdc:5977ef8709f03d6b5dd25112cf45dd6a", + "name": "gold:Gp0115667.bins.5.fa", + "description": "metabat2 binned contig file for gold:Gp0115667", + "file_size_bytes": 770132, + "url": "https://data.microbiomedata.org/data/1781_86098/img_MAGs/metabat-bins/bins.5.fa", + "type": "nmdc:DataObject", + "data_object_type": "Metagenome Bins" + }, + { + "_id": { + "$oid": "649b003f1ae706d7b5b16294" + }, + "id": "nmdc:411197955ce463b3374262983e6e6c12", + "name": "gold:Gp0115667.bins.1.fa", + "description": "metabat2 binned contig file for gold:Gp0115667", + "file_size_bytes": 1544238, + "url": "https://data.microbiomedata.org/data/1781_86098/img_MAGs/metabat-bins/bins.1.fa", + "type": "nmdc:DataObject", + "data_object_type": "Metagenome Bins" + }, + { + "_id": { + "$oid": "649b003f1ae706d7b5b16299" + }, + "id": "nmdc:e4e449bf8b38e28b1c585494dd53f83a", + "name": "bins.tooShort.fa", + "description": "tooShort (< 3kb) filtered contigs fasta file by metaBat2 for gold:Gp0115667", + "file_size_bytes": 45010132, + "url": "https://data.microbiomedata.org/data/1781_86098/img_MAGs/bins.tooShort.fa", + "type": "nmdc:DataObject" + }, + { + "_id": { + "$oid": "649b003f1ae706d7b5b1629a" + }, + "id": "nmdc:9e21ea6dec1b46e65841a271b2bbe8fe", + "name": "gtdbtk.bac120.summary.tsv", + "description": "gtdbtk bacterial assignment result summary table for gold:Gp0115667", + "file_size_bytes": 846, + "url": "https://data.microbiomedata.org/data/1781_86098/img_MAGs/gtdbtk_output/classify/gtdbtk.bac120.summary.tsv", + "type": "nmdc:DataObject" + }, + { + "_id": { + "$oid": "649b003f1ae706d7b5b162a0" + }, + "id": "nmdc:a2fecaa8e738191ae8a4934f235ad934", + "name": "gold:Gp0115667.bins.6.fa", + "description": "metabat2 binned contig file for gold:Gp0115667", + "file_size_bytes": 305691, + "url": "https://data.microbiomedata.org/data/1781_86098/img_MAGs/metabat-bins/bins.6.fa", + "type": "nmdc:DataObject", + "data_object_type": "Metagenome Bins" + }, + { + "_id": { + "$oid": "649b003f1ae706d7b5b162a1" + }, + "id": "nmdc:843c9624d7bb8bdbcfe26fdde4117f0d", + "name": "gold:Gp0115667.bins.8.fa", + "description": "metabat2 binned contig file for gold:Gp0115667", + "file_size_bytes": 1448474, + "url": "https://data.microbiomedata.org/data/1781_86098/img_MAGs/metabat-bins/bins.8.fa", + "type": "nmdc:DataObject", + "data_object_type": "Metagenome Bins" + }, + { + "_id": { + "$oid": "649b00401ae706d7b5b16d85" + }, + "description": "EC TSV File for gold:Gp0115667", + "url": "https://data.microbiomedata.org/1781_86098/img_annotation/Ga0482260_ec.tsv", + "md5_checksum": "babc9f95621eed35bc7975dee8b417b9", + "file_size_bytes": 3385, + "id": "nmdc:babc9f95621eed35bc7975dee8b417b9", + "name": "gold:Gp0115667_EC TSV File", + "data_object_type": "Annotation Enzyme Commission", + "type": "nmdc:DataObject" + }, + { + "_id": { + "$oid": "649b00401ae706d7b5b16d86" + }, + "description": "KO TSV File for gold:Gp0115667", + "url": "https://data.microbiomedata.org/1781_86098/img_annotation/Ga0482260_ko.tsv", + "md5_checksum": "bc5043b689463c3651c15ad4ba1aa9a4", + "file_size_bytes": 3385, + "id": "nmdc:bc5043b689463c3651c15ad4ba1aa9a4", + "name": "gold:Gp0115667_KO TSV File", + "data_object_type": "Annotation KEGG Orthology", + "type": "nmdc:DataObject" + }, + { + "_id": { + "$oid": "649b00401ae706d7b5b16d88" + }, + "description": "Functional annotation GFF file for gold:Gp0115667", + "url": "https://data.microbiomedata.org/1781_86098/img_annotation/Ga0482260_functional_annotation.gff", + "md5_checksum": "c47020ef7958f3a4c4458e0797fc2400", + "file_size_bytes": 3385, + "id": "nmdc:c47020ef7958f3a4c4458e0797fc2400", + "name": "gold:Gp0115667_Functional annotation GFF file", + "data_object_type": "Functional Annotation GFF", + "type": "nmdc:DataObject" + }, + { + "_id": { + "$oid": "649b00401ae706d7b5b16d89" + }, + "description": "Protein FAA for gold:Gp0115667", + "url": "https://data.microbiomedata.org/1781_86098/img_annotation/Ga0482260_proteins.faa", + "md5_checksum": "acdedd1c48e28e4f4e0d0679cae417f9", + "file_size_bytes": 3385, + "id": "nmdc:acdedd1c48e28e4f4e0d0679cae417f9", + "name": "gold:Gp0115667_Protein FAA", + "data_object_type": "Annotation Amino Acid FASTA", + "type": "nmdc:DataObject" + }, + { + "_id": { + "$oid": "649b00401ae706d7b5b16d8b" + }, + "description": "Structural annotation GFF file for gold:Gp0115667", + "url": "https://data.microbiomedata.org/1781_86098/img_annotation/Ga0482260_structural_annotation.gff", + "md5_checksum": "6f236cc8b728333fcf85e4f27873a500", + "file_size_bytes": 3385, + "id": "nmdc:6f236cc8b728333fcf85e4f27873a500", + "name": "gold:Gp0115667_Structural annotation GFF file", + "data_object_type": "Structural Annotation GFF", + "type": "nmdc:DataObject" } ], "dissolving_activity_set": [], @@ -3474,6 +4826,224 @@ "md5_checksum": "bb5835f621252fca37967e00245517ac", "id": "nmdc:bb5835f621252fca37967e00245517ac", "file_size_bytes": 314358 + }, + { + "_id": { + "$oid": "649b003c1ae706d7b5b14d62" + }, + "description": "Metagenome Contig Coverage Stats for gold:Gp0115664", + "url": "https://data.microbiomedata.org/data/1781_86089/assembly/mapping_stats.txt", + "file_size_bytes": 5751397, + "type": "nmdc:DataObject", + "id": "nmdc:115045c0b7102243d0b9f2d4ffaa20a0", + "name": "mapping_stats.txt", + "data_object_type": "Assembly Coverage Stats" + }, + { + "_id": { + "$oid": "649b003c1ae706d7b5b14d64" + }, + "description": "Assembled scaffold fasta for gold:Gp0115664", + "url": "https://data.microbiomedata.org/data/1781_86089/assembly/assembly_scaffolds.fna", + "file_size_bytes": 35384873, + "type": "nmdc:DataObject", + "id": "nmdc:71b690c6d9ad021d8ea68b8fd9d31135", + "name": "assembly_scaffolds.fna", + "data_object_type": "Assembly Scaffolds" + }, + { + "_id": { + "$oid": "649b003c1ae706d7b5b14d65" + }, + "description": "Assembled contigs fasta for gold:Gp0115664", + "url": "https://data.microbiomedata.org/data/1781_86089/assembly/assembly_contigs.fna", + "file_size_bytes": 35620717, + "type": "nmdc:DataObject", + "id": "nmdc:b78f599c21fb31b00d3f8a3c56daeb88", + "name": "assembly_contigs.fna", + "data_object_type": "Assembly Contigs" + }, + { + "_id": { + "$oid": "649b003c1ae706d7b5b14d66" + }, + "description": "Assembled AGP file for gold:Gp0115664", + "url": "https://data.microbiomedata.org/data/1781_86089/assembly/assembly.agp", + "file_size_bytes": 4925804, + "type": "nmdc:DataObject", + "id": "nmdc:f592b315dbd5a060ddb075ad98bf4803", + "name": "assembly.agp", + "data_object_type": "Assembly AGP" + }, + { + "_id": { + "$oid": "649b003c1ae706d7b5b14d67" + }, + "description": "Metagenome Alignment BAM file for gold:Gp0115664", + "url": "https://data.microbiomedata.org/data/1781_86089/assembly/pairedMapped_sorted.bam", + "file_size_bytes": 1649453824, + "type": "nmdc:DataObject", + "id": "nmdc:662dc676b0b5a486248357f5b887c18b", + "name": "pairedMapped_sorted.bam", + "data_object_type": "Assembly Coverage BAM" + }, + { + "_id": { + "$oid": "649b003d1ae706d7b5b15983" + }, + "id": "nmdc:bd586aef31587a585d6be2b9814a2551", + "name": "1781_86089.krona.html", + "description": "Gold:Gp0115664 KRONA plot HTML file", + "url": "https://data.microbiomedata.org/1781_86089/ReadbasedAnalysis/centrifuge/1781_86089.krona.html", + "file_size_bytes": 3385, + "data_object_type": "Centrifuge Krona Plot", + "type": "nmdc:DataObject" + }, + { + "_id": { + "$oid": "649b003d1ae706d7b5b15988" + }, + "id": "nmdc:81b89289645757b1b3608d93bc563c73", + "name": "1781_86089.json", + "description": "Gold:Gp0115664 ReadbasedAnalysis result JSON file", + "url": "https://data.microbiomedata.org/1781_86089/ReadbasedAnalysis/1781_86089.json", + "file_size_bytes": 3385, + "type": "nmdc:DataObject" + }, + { + "_id": { + "$oid": "649b003f1ae706d7b5b16257" + }, + "id": "nmdc:e8ec230bfe68a272b34540e7f5ab5b2b", + "name": "gold:Gp0115664.bins.3.fa", + "description": "metabat2 binned contig file for gold:Gp0115664", + "file_size_bytes": 287705, + "url": "https://data.microbiomedata.org/data/1781_86089/img_MAGs/metabat-bins/bins.3.fa", + "type": "nmdc:DataObject", + "data_object_type": "Metagenome Bins" + }, + { + "_id": { + "$oid": "649b003f1ae706d7b5b1625b" + }, + "id": "nmdc:7a652e7e0f8ded35496989fe90b40c40", + "name": "bins.unbinned.fa", + "description": "unbinned fasta file from metabat2 for gold:Gp0115664", + "file_size_bytes": 4643149, + "url": "https://data.microbiomedata.org/data/1781_86089/img_MAGs/bins.unbinned.fa", + "type": "nmdc:DataObject" + }, + { + "_id": { + "$oid": "649b003f1ae706d7b5b1625c" + }, + "id": "nmdc:ab198c4e10213c9e85c4506b269452ee", + "name": "bins.tooShort.fa", + "description": "tooShort (< 3kb) filtered contigs fasta file by metaBat2 for gold:Gp0115664", + "file_size_bytes": 29395917, + "url": "https://data.microbiomedata.org/data/1781_86089/img_MAGs/bins.tooShort.fa", + "type": "nmdc:DataObject" + }, + { + "_id": { + "$oid": "649b003f1ae706d7b5b1625d" + }, + "id": "nmdc:c24915651cfdfc91f3e6b5bac679c3af", + "name": "checkm_qa.out", + "description": "metabat2 bin checkm quality assessment result for gold:Gp0115664", + "file_size_bytes": 1176, + "url": "https://data.microbiomedata.org/data/1781_86089/img_MAGs/checkm_qa.out", + "type": "nmdc:DataObject", + "data_object_type": "CheckM Statistics" + }, + { + "_id": { + "$oid": "649b003f1ae706d7b5b1625f" + }, + "id": "nmdc:9800add41d26829494265ba81a100c53", + "name": "gold:Gp0115664.bins.1.fa", + "description": "metabat2 binned contig file for gold:Gp0115664", + "file_size_bytes": 351859, + "url": "https://data.microbiomedata.org/data/1781_86089/img_MAGs/metabat-bins/bins.1.fa", + "type": "nmdc:DataObject", + "data_object_type": "Metagenome Bins" + }, + { + "_id": { + "$oid": "649b003f1ae706d7b5b16261" + }, + "id": "nmdc:474fa29bd39452fa80f5a32e9e6be6f4", + "name": "gold:Gp0115664.bins.2.fa", + "description": "metabat2 binned contig file for gold:Gp0115664", + "file_size_bytes": 326275, + "url": "https://data.microbiomedata.org/data/1781_86089/img_MAGs/metabat-bins/bins.2.fa", + "type": "nmdc:DataObject", + "data_object_type": "Metagenome Bins" + }, + { + "_id": { + "$oid": "649b00401ae706d7b5b16d61" + }, + "description": "Functional annotation GFF file for gold:Gp0115664", + "url": "https://data.microbiomedata.org/1781_86089/img_annotation/Ga0482263_functional_annotation.gff", + "md5_checksum": "bc034c7024043ea88b44d0897bb5bece", + "file_size_bytes": 3385, + "id": "nmdc:bc034c7024043ea88b44d0897bb5bece", + "name": "gold:Gp0115664_Functional annotation GFF file", + "data_object_type": "Functional Annotation GFF", + "type": "nmdc:DataObject" + }, + { + "_id": { + "$oid": "649b00401ae706d7b5b16d62" + }, + "description": "KO TSV File for gold:Gp0115664", + "url": "https://data.microbiomedata.org/1781_86089/img_annotation/Ga0482263_ko.tsv", + "md5_checksum": "76537a4ab5012ba3b407471da373ef1c", + "file_size_bytes": 3385, + "id": "nmdc:76537a4ab5012ba3b407471da373ef1c", + "name": "gold:Gp0115664_KO TSV File", + "data_object_type": "Annotation KEGG Orthology", + "type": "nmdc:DataObject" + }, + { + "_id": { + "$oid": "649b00401ae706d7b5b16d64" + }, + "description": "Structural annotation GFF file for gold:Gp0115664", + "url": "https://data.microbiomedata.org/1781_86089/img_annotation/Ga0482263_structural_annotation.gff", + "md5_checksum": "10117f9500d0dd54655a5d70195f7df5", + "file_size_bytes": 3385, + "id": "nmdc:10117f9500d0dd54655a5d70195f7df5", + "name": "gold:Gp0115664_Structural annotation GFF file", + "data_object_type": "Structural Annotation GFF", + "type": "nmdc:DataObject" + }, + { + "_id": { + "$oid": "649b00401ae706d7b5b16d65" + }, + "description": "EC TSV File for gold:Gp0115664", + "url": "https://data.microbiomedata.org/1781_86089/img_annotation/Ga0482263_ec.tsv", + "md5_checksum": "8a812604db9b4e2bdbad6d0b3539f6ea", + "file_size_bytes": 3385, + "id": "nmdc:8a812604db9b4e2bdbad6d0b3539f6ea", + "name": "gold:Gp0115664_EC TSV File", + "data_object_type": "Annotation Enzyme Commission", + "type": "nmdc:DataObject" + }, + { + "_id": { + "$oid": "649b00401ae706d7b5b16d68" + }, + "description": "Protein FAA for gold:Gp0115664", + "url": "https://data.microbiomedata.org/1781_86089/img_annotation/Ga0482263_proteins.faa", + "md5_checksum": "fc419491cce16671e828d76083252841", + "file_size_bytes": 3385, + "id": "nmdc:fc419491cce16671e828d76083252841", + "name": "gold:Gp0115664_Protein FAA", + "data_object_type": "Annotation Amino Acid FASTA", + "type": "nmdc:DataObject" } ], "dissolving_activity_set": [], @@ -3968,71 +5538,408 @@ "file_size_bytes": 80713018 }, { - "name": "Gp0115678_Cath FunFam GFF file", - "description": "Cath FunFam GFF file for Gp0115678", - "url": "https://data.microbiomedata.org/data/nmdc:mga026tn70/annotation/nmdc_mga026tn70_cath_funfam.gff", - "md5_checksum": "4a3d00839e3067973b06771a31bbae93", - "id": "nmdc:4a3d00839e3067973b06771a31bbae93", - "file_size_bytes": 66327975 + "name": "Gp0115678_Cath FunFam GFF file", + "description": "Cath FunFam GFF file for Gp0115678", + "url": "https://data.microbiomedata.org/data/nmdc:mga026tn70/annotation/nmdc_mga026tn70_cath_funfam.gff", + "md5_checksum": "4a3d00839e3067973b06771a31bbae93", + "id": "nmdc:4a3d00839e3067973b06771a31bbae93", + "file_size_bytes": 66327975 + }, + { + "name": "Gp0115678_KO_EC GFF file", + "description": "KO_EC GFF file for Gp0115678", + "url": "https://data.microbiomedata.org/data/nmdc:mga026tn70/annotation/nmdc_mga026tn70_ko_ec.gff", + "md5_checksum": "f01768e30cdd8f7650f631883d1c5d23", + "id": "nmdc:f01768e30cdd8f7650f631883d1c5d23", + "file_size_bytes": 40908900 + }, + { + "name": "gold:Gp0452679_metabat2 bin checkm quality assessment result", + "description": "metabat2 bin checkm quality assessment result for gold:Gp0452679", + "data_object_type": "CheckM Statistics", + "url": "https://data.microbiomedata.org/data/nmdc:mga0fsjy84/MAGs/nmdc_mga0fsjy84_checkm_qa.out", + "md5_checksum": "d41d8cd98f00b204e9800998ecf8427e", + "id": "nmdc:d41d8cd98f00b204e9800998ecf8427e", + "file_size_bytes": 0 + }, + { + "name": "Gp0115678_tooShort (< 3kb) filtered contigs fasta file by metaBat2", + "description": "tooShort (< 3kb) filtered contigs fasta file by metaBat2 for Gp0115678", + "url": "https://data.microbiomedata.org/data/nmdc:mga026tn70/MAGs/nmdc_mga026tn70_bins.tooShort.fa", + "md5_checksum": "cf2d0eb0281d2822373d4e7d25c8d1e6", + "id": "nmdc:cf2d0eb0281d2822373d4e7d25c8d1e6", + "file_size_bytes": 160811096 + }, + { + "name": "Gp0115678_unbinned fasta file from metabat2", + "description": "unbinned fasta file from metabat2 for Gp0115678", + "url": "https://data.microbiomedata.org/data/nmdc:mga026tn70/MAGs/nmdc_mga026tn70_bins.unbinned.fa", + "md5_checksum": "85defe7977c263b8fba3f31f89f101f9", + "id": "nmdc:85defe7977c263b8fba3f31f89f101f9", + "file_size_bytes": 31022166 + }, + { + "name": "Gp0115678_metabat2 bin checkm quality assessment result", + "description": "metabat2 bin checkm quality assessment result for Gp0115678", + "data_object_type": "CheckM Statistics", + "url": "https://data.microbiomedata.org/data/nmdc:mga026tn70/MAGs/nmdc_mga026tn70_checkm_qa.out", + "md5_checksum": "19a6a8410cece1118a06763023cc1313", + "id": "nmdc:19a6a8410cece1118a06763023cc1313", + "file_size_bytes": 1690 + }, + { + "name": "Gp0115678_high-quality and medium-quality bins", + "description": "high-quality and medium-quality bins for Gp0115678", + "data_object_type": "Metagenome Bins", + "url": "https://data.microbiomedata.org/data/nmdc:mga026tn70/MAGs/nmdc_mga026tn70_hqmq_bin.zip", + "md5_checksum": "54ed3f096ca7eacec9e5078ca45a6530", + "id": "nmdc:54ed3f096ca7eacec9e5078ca45a6530", + "file_size_bytes": 4026276 + }, + { + "name": "Gp0115678_metabat2 bins", + "description": "metabat2 bins for Gp0115678", + "url": "https://data.microbiomedata.org/data/nmdc:mga026tn70/MAGs/nmdc_mga026tn70_metabat_bin.zip", + "md5_checksum": "8493c05e428d90f8893e4c58755b2e95", + "id": "nmdc:8493c05e428d90f8893e4c58755b2e95", + "file_size_bytes": 72078 + }, + { + "_id": { + "$oid": "649b003d1ae706d7b5b14da5" + }, + "description": "Metagenome Alignment BAM file for gold:Gp0115678", + "url": "https://data.microbiomedata.org/data/1781_86093/assembly/pairedMapped_sorted.bam", + "file_size_bytes": 4408046431, + "type": "nmdc:DataObject", + "id": "nmdc:5d038f63644b03794d5a931f380bfd04", + "name": "pairedMapped_sorted.bam", + "data_object_type": "Assembly Coverage BAM" + }, + { + "_id": { + "$oid": "649b003d1ae706d7b5b14db7" + }, + "description": "Metagenome Contig Coverage Stats for gold:Gp0115678", + "url": "https://data.microbiomedata.org/data/1781_86093/assembly/mapping_stats.txt", + "file_size_bytes": 28551512, + "type": "nmdc:DataObject", + "id": "nmdc:56f166420a42acf12a021f3a66004127", + "name": "mapping_stats.txt", + "data_object_type": "Assembly Coverage Stats" + }, + { + "_id": { + "$oid": "649b003d1ae706d7b5b14db9" + }, + "description": "Assembled scaffold fasta for gold:Gp0115678", + "url": "https://data.microbiomedata.org/data/1781_86093/assembly/assembly_scaffolds.fna", + "file_size_bytes": 202369442, + "type": "nmdc:DataObject", + "id": "nmdc:eabfcbcc20b7c6b2732fab7d2ce8b44b", + "name": "assembly_scaffolds.fna", + "data_object_type": "Assembly Scaffolds" + }, + { + "_id": { + "$oid": "649b003d1ae706d7b5b14dbc" + }, + "description": "Assembled contigs fasta for gold:Gp0115678", + "url": "https://data.microbiomedata.org/data/1781_86093/assembly/assembly_contigs.fna", + "file_size_bytes": 203523040, + "type": "nmdc:DataObject", + "id": "nmdc:d1dee40a000226d9f2c8f4f05e0f85f1", + "name": "assembly_contigs.fna", + "data_object_type": "Assembly Contigs" + }, + { + "_id": { + "$oid": "649b003d1ae706d7b5b14dbe" + }, + "description": "Assembled AGP file for gold:Gp0115678", + "url": "https://data.microbiomedata.org/data/1781_86093/assembly/assembly.agp", + "file_size_bytes": 24779500, + "type": "nmdc:DataObject", + "id": "nmdc:ac56c44a98ebb58393634c4c2f83028d", + "name": "assembly.agp", + "data_object_type": "Assembly AGP" + }, + { + "_id": { + "$oid": "649b003d1ae706d7b5b15a00" + }, + "id": "nmdc:acb4672087a4cbe2f4e5a65dc291f70b", + "name": "1781_86093.krona.html", + "description": "Gold:Gp0115678 KRONA plot HTML file", + "url": "https://data.microbiomedata.org/1781_86093/ReadbasedAnalysis/centrifuge/1781_86093.krona.html", + "file_size_bytes": 3385, + "data_object_type": "Centrifuge Krona Plot", + "type": "nmdc:DataObject" + }, + { + "_id": { + "$oid": "649b003d1ae706d7b5b15a0a" + }, + "id": "nmdc:5334ea32a928a691d0be326a7a73ffe4", + "name": "1781_86093.json", + "description": "Gold:Gp0115678 ReadbasedAnalysis result JSON file", + "url": "https://data.microbiomedata.org/1781_86093/ReadbasedAnalysis/1781_86093.json", + "file_size_bytes": 3385, + "type": "nmdc:DataObject" + }, + { + "_id": { + "$oid": "649b003f1ae706d7b5b162de" + }, + "id": "nmdc:8576b99e74ec8da1f25e14a8c09c6815", + "name": "bins.unbinned.fa", + "description": "unbinned fasta file from metabat2 for gold:Gp0115678", + "file_size_bytes": 33583798, + "url": "https://data.microbiomedata.org/data/1781_86093/img_MAGs/bins.unbinned.fa", + "type": "nmdc:DataObject" + }, + { + "_id": { + "$oid": "649b003f1ae706d7b5b162df" + }, + "id": "nmdc:6afbb385aa127eb27cefb63eb516c8bc", + "name": "bins.tooShort.fa", + "description": "tooShort (< 3kb) filtered contigs fasta file by metaBat2 for gold:Gp0115678", + "file_size_bytes": 156176336, + "url": "https://data.microbiomedata.org/data/1781_86093/img_MAGs/bins.tooShort.fa", + "type": "nmdc:DataObject" + }, + { + "_id": { + "$oid": "649b003f1ae706d7b5b162e1" + }, + "id": "nmdc:127bab89a08a1eed165a2afbde8fedd9", + "name": "gtdbtk.bac120.summary.tsv", + "description": "gtdbtk bacterial assignment result summary table for gold:Gp0115678", + "file_size_bytes": 1861, + "url": "https://data.microbiomedata.org/data/1781_86093/img_MAGs/gtdbtk_output/classify/gtdbtk.bac120.summary.tsv", + "type": "nmdc:DataObject" + }, + { + "_id": { + "$oid": "649b003f1ae706d7b5b162e2" + }, + "id": "nmdc:8ead45679db7866273e97e259a27773f", + "name": "gold:Gp0115678.bins.7.fa", + "description": "metabat2 binned contig file for gold:Gp0115678", + "file_size_bytes": 2830025, + "url": "https://data.microbiomedata.org/data/1781_86093/img_MAGs/metabat-bins/bins.7.fa", + "type": "nmdc:DataObject", + "data_object_type": "Metagenome Bins" + }, + { + "_id": { + "$oid": "649b003f1ae706d7b5b162e3" + }, + "id": "nmdc:7c9a51eb968568c463672c6e4d0cbb0b", + "name": "gold:Gp0115678.bins.9.fa", + "description": "metabat2 binned contig file for gold:Gp0115678", + "file_size_bytes": 699704, + "url": "https://data.microbiomedata.org/data/1781_86093/img_MAGs/metabat-bins/bins.9.fa", + "type": "nmdc:DataObject", + "data_object_type": "Metagenome Bins" + }, + { + "_id": { + "$oid": "649b003f1ae706d7b5b162e4" + }, + "id": "nmdc:aedec76f4ab12cc4534fc2204677aa81", + "name": "gold:Gp0115678.bins.5.fa", + "description": "metabat2 binned contig file for gold:Gp0115678", + "file_size_bytes": 1797224, + "url": "https://data.microbiomedata.org/data/1781_86093/img_MAGs/metabat-bins/bins.5.fa", + "type": "nmdc:DataObject", + "data_object_type": "Metagenome Bins" + }, + { + "_id": { + "$oid": "649b003f1ae706d7b5b162e5" + }, + "id": "nmdc:d272087deef90faf8499f958061f818d", + "name": "checkm_qa.out", + "description": "metabat2 bin checkm quality assessment result for gold:Gp0115678", + "file_size_bytes": 2720, + "url": "https://data.microbiomedata.org/data/1781_86093/img_MAGs/checkm_qa.out", + "type": "nmdc:DataObject", + "data_object_type": "CheckM Statistics" + }, + { + "_id": { + "$oid": "649b003f1ae706d7b5b162e6" + }, + "id": "nmdc:b0b8774fb7f948606a1f4fa015e7f05e", + "name": "gold:Gp0115678.bins.3.fa", + "description": "metabat2 binned contig file for gold:Gp0115678", + "file_size_bytes": 866150, + "url": "https://data.microbiomedata.org/data/1781_86093/img_MAGs/metabat-bins/bins.3.fa", + "type": "nmdc:DataObject", + "data_object_type": "Metagenome Bins" + }, + { + "_id": { + "$oid": "649b003f1ae706d7b5b162e7" + }, + "id": "nmdc:ea9e5c76a6942b053a3b50ac9d56db97", + "name": "gold:Gp0115678.bins.11.fa", + "description": "metabat2 binned contig file for gold:Gp0115678", + "file_size_bytes": 672888, + "url": "https://data.microbiomedata.org/data/1781_86093/img_MAGs/metabat-bins/bins.11.fa", + "type": "nmdc:DataObject", + "data_object_type": "Metagenome Bins" + }, + { + "_id": { + "$oid": "649b003f1ae706d7b5b162e8" + }, + "id": "nmdc:33a8648962959be82a7140b07cb4eec5", + "name": "gold:Gp0115678.bins.2.fa", + "description": "metabat2 binned contig file for gold:Gp0115678", + "file_size_bytes": 245890, + "url": "https://data.microbiomedata.org/data/1781_86093/img_MAGs/metabat-bins/bins.2.fa", + "type": "nmdc:DataObject", + "data_object_type": "Metagenome Bins" + }, + { + "_id": { + "$oid": "649b003f1ae706d7b5b162e9" + }, + "id": "nmdc:a72a3f4d459210af63005c1438af24ca", + "name": "gold:Gp0115678.bins.12.fa", + "description": "metabat2 binned contig file for gold:Gp0115678", + "file_size_bytes": 232797, + "url": "https://data.microbiomedata.org/data/1781_86093/img_MAGs/metabat-bins/bins.12.fa", + "type": "nmdc:DataObject", + "data_object_type": "Metagenome Bins" + }, + { + "_id": { + "$oid": "649b003f1ae706d7b5b162ea" + }, + "id": "nmdc:1d650b2d0318de6359afd3393562f3a1", + "name": "gold:Gp0115678.bins.10.fa", + "description": "metabat2 binned contig file for gold:Gp0115678", + "file_size_bytes": 1157673, + "url": "https://data.microbiomedata.org/data/1781_86093/img_MAGs/metabat-bins/bins.10.fa", + "type": "nmdc:DataObject", + "data_object_type": "Metagenome Bins" + }, + { + "_id": { + "$oid": "649b003f1ae706d7b5b162eb" + }, + "id": "nmdc:3bb54fe9860b1a3c7ad831e2ba2d311e", + "name": "gold:Gp0115678.bins.1.fa", + "description": "metabat2 binned contig file for gold:Gp0115678", + "file_size_bytes": 307851, + "url": "https://data.microbiomedata.org/data/1781_86093/img_MAGs/metabat-bins/bins.1.fa", + "type": "nmdc:DataObject", + "data_object_type": "Metagenome Bins" + }, + { + "_id": { + "$oid": "649b003f1ae706d7b5b162ec" + }, + "id": "nmdc:5f7a9cc615e036a5f42b35abc88dda66", + "name": "gold:Gp0115678.bins.4.fa", + "description": "metabat2 binned contig file for gold:Gp0115678", + "file_size_bytes": 366506, + "url": "https://data.microbiomedata.org/data/1781_86093/img_MAGs/metabat-bins/bins.4.fa", + "type": "nmdc:DataObject", + "data_object_type": "Metagenome Bins" }, { - "name": "Gp0115678_KO_EC GFF file", - "description": "KO_EC GFF file for Gp0115678", - "url": "https://data.microbiomedata.org/data/nmdc:mga026tn70/annotation/nmdc_mga026tn70_ko_ec.gff", - "md5_checksum": "f01768e30cdd8f7650f631883d1c5d23", - "id": "nmdc:f01768e30cdd8f7650f631883d1c5d23", - "file_size_bytes": 40908900 + "_id": { + "$oid": "649b003f1ae706d7b5b162ee" + }, + "id": "nmdc:e581ce6782a654cf7528153e52a8c80f", + "name": "gold:Gp0115678.bins.8.fa", + "description": "metabat2 binned contig file for gold:Gp0115678", + "file_size_bytes": 439839, + "url": "https://data.microbiomedata.org/data/1781_86093/img_MAGs/metabat-bins/bins.8.fa", + "type": "nmdc:DataObject", + "data_object_type": "Metagenome Bins" }, { - "name": "gold:Gp0452679_metabat2 bin checkm quality assessment result", - "description": "metabat2 bin checkm quality assessment result for gold:Gp0452679", - "data_object_type": "CheckM Statistics", - "url": "https://data.microbiomedata.org/data/nmdc:mga0fsjy84/MAGs/nmdc_mga0fsjy84_checkm_qa.out", - "md5_checksum": "d41d8cd98f00b204e9800998ecf8427e", - "id": "nmdc:d41d8cd98f00b204e9800998ecf8427e", - "file_size_bytes": 0 + "_id": { + "$oid": "649b003f1ae706d7b5b162f2" + }, + "id": "nmdc:de85ac00876e5ea0c61208d366b084b2", + "name": "gold:Gp0115678.bins.6.fa", + "description": "metabat2 binned contig file for gold:Gp0115678", + "file_size_bytes": 1192968, + "url": "https://data.microbiomedata.org/data/1781_86093/img_MAGs/metabat-bins/bins.6.fa", + "type": "nmdc:DataObject", + "data_object_type": "Metagenome Bins" }, { - "name": "Gp0115678_tooShort (< 3kb) filtered contigs fasta file by metaBat2", - "description": "tooShort (< 3kb) filtered contigs fasta file by metaBat2 for Gp0115678", - "url": "https://data.microbiomedata.org/data/nmdc:mga026tn70/MAGs/nmdc_mga026tn70_bins.tooShort.fa", - "md5_checksum": "cf2d0eb0281d2822373d4e7d25c8d1e6", - "id": "nmdc:cf2d0eb0281d2822373d4e7d25c8d1e6", - "file_size_bytes": 160811096 + "_id": { + "$oid": "649b00401ae706d7b5b16d6e" + }, + "description": "Protein FAA for gold:Gp0115678", + "url": "https://data.microbiomedata.org/1781_86093/img_annotation/Ga0482249_proteins.faa", + "md5_checksum": "78a99f435ce2bdd6cd83ebb807dc0ef3", + "file_size_bytes": 3385, + "id": "nmdc:78a99f435ce2bdd6cd83ebb807dc0ef3", + "name": "gold:Gp0115678_Protein FAA", + "data_object_type": "Annotation Amino Acid FASTA", + "type": "nmdc:DataObject" }, { - "name": "Gp0115678_unbinned fasta file from metabat2", - "description": "unbinned fasta file from metabat2 for Gp0115678", - "url": "https://data.microbiomedata.org/data/nmdc:mga026tn70/MAGs/nmdc_mga026tn70_bins.unbinned.fa", - "md5_checksum": "85defe7977c263b8fba3f31f89f101f9", - "id": "nmdc:85defe7977c263b8fba3f31f89f101f9", - "file_size_bytes": 31022166 + "_id": { + "$oid": "649b00401ae706d7b5b16d75" + }, + "description": "KO TSV File for gold:Gp0115678", + "url": "https://data.microbiomedata.org/1781_86093/img_annotation/Ga0482249_ko.tsv", + "md5_checksum": "cfe4a8ce52735eedacc38bacdc8785e4", + "file_size_bytes": 3385, + "id": "nmdc:cfe4a8ce52735eedacc38bacdc8785e4", + "name": "gold:Gp0115678_KO TSV File", + "data_object_type": "Annotation KEGG Orthology", + "type": "nmdc:DataObject" }, { - "name": "Gp0115678_metabat2 bin checkm quality assessment result", - "description": "metabat2 bin checkm quality assessment result for Gp0115678", - "data_object_type": "CheckM Statistics", - "url": "https://data.microbiomedata.org/data/nmdc:mga026tn70/MAGs/nmdc_mga026tn70_checkm_qa.out", - "md5_checksum": "19a6a8410cece1118a06763023cc1313", - "id": "nmdc:19a6a8410cece1118a06763023cc1313", - "file_size_bytes": 1690 + "_id": { + "$oid": "649b00401ae706d7b5b16d7b" + }, + "description": "Functional annotation GFF file for gold:Gp0115678", + "url": "https://data.microbiomedata.org/1781_86093/img_annotation/Ga0482249_functional_annotation.gff", + "md5_checksum": "6d1553b3e100a61f3b2b453fb7e71094", + "file_size_bytes": 3385, + "id": "nmdc:6d1553b3e100a61f3b2b453fb7e71094", + "name": "gold:Gp0115678_Functional annotation GFF file", + "data_object_type": "Functional Annotation GFF", + "type": "nmdc:DataObject" }, { - "name": "Gp0115678_high-quality and medium-quality bins", - "description": "high-quality and medium-quality bins for Gp0115678", - "data_object_type": "Metagenome Bins", - "url": "https://data.microbiomedata.org/data/nmdc:mga026tn70/MAGs/nmdc_mga026tn70_hqmq_bin.zip", - "md5_checksum": "54ed3f096ca7eacec9e5078ca45a6530", - "id": "nmdc:54ed3f096ca7eacec9e5078ca45a6530", - "file_size_bytes": 4026276 + "_id": { + "$oid": "649b00401ae706d7b5b16d7c" + }, + "description": "EC TSV File for gold:Gp0115678", + "url": "https://data.microbiomedata.org/1781_86093/img_annotation/Ga0482249_ec.tsv", + "md5_checksum": "240064338b65f944556e88ebd44fbd03", + "file_size_bytes": 3385, + "id": "nmdc:240064338b65f944556e88ebd44fbd03", + "name": "gold:Gp0115678_EC TSV File", + "data_object_type": "Annotation Enzyme Commission", + "type": "nmdc:DataObject" }, { - "name": "Gp0115678_metabat2 bins", - "description": "metabat2 bins for Gp0115678", - "url": "https://data.microbiomedata.org/data/nmdc:mga026tn70/MAGs/nmdc_mga026tn70_metabat_bin.zip", - "md5_checksum": "8493c05e428d90f8893e4c58755b2e95", - "id": "nmdc:8493c05e428d90f8893e4c58755b2e95", - "file_size_bytes": 72078 + "_id": { + "$oid": "649b00401ae706d7b5b16dc2" + }, + "description": "Structural annotation GFF file for gold:Gp0115678", + "url": "https://data.microbiomedata.org/1781_86093/img_annotation/Ga0482249_structural_annotation.gff", + "md5_checksum": "ac989404b8a9e07880788cfb061015ba", + "file_size_bytes": 3385, + "id": "nmdc:ac989404b8a9e07880788cfb061015ba", + "name": "gold:Gp0115678_Structural annotation GFF file", + "data_object_type": "Structural Annotation GFF", + "type": "nmdc:DataObject" } ], "dissolving_activity_set": [], @@ -4690,6 +6597,176 @@ "md5_checksum": "c70853ef1a6ab162b85df5215a76666b", "id": "nmdc:c70853ef1a6ab162b85df5215a76666b", "file_size_bytes": 236177 + }, + { + "_id": { + "$oid": "649b003d1ae706d7b5b14e44" + }, + "description": "Assembled AGP file for gold:Gp0127623", + "url": "https://data.microbiomedata.org/data/1781_100325/assembly/assembly.agp", + "file_size_bytes": 7722651, + "type": "nmdc:DataObject", + "id": "nmdc:157fde8313174776bf9fd98b41c53aae", + "name": "assembly.agp", + "data_object_type": "Assembly AGP" + }, + { + "_id": { + "$oid": "649b003d1ae706d7b5b14e45" + }, + "description": "Assembled scaffold fasta for gold:Gp0127623", + "url": "https://data.microbiomedata.org/data/1781_100325/assembly/assembly_scaffolds.fna", + "file_size_bytes": 54390822, + "type": "nmdc:DataObject", + "id": "nmdc:c654cffcafc3b8bed2acfdf8e2dc2f3b", + "name": "assembly_scaffolds.fna", + "data_object_type": "Assembly Scaffolds" + }, + { + "_id": { + "$oid": "649b003d1ae706d7b5b14e46" + }, + "description": "Metagenome Alignment BAM file for gold:Gp0127623", + "url": "https://data.microbiomedata.org/data/1781_100325/assembly/pairedMapped_sorted.bam", + "file_size_bytes": 2033548061, + "type": "nmdc:DataObject", + "id": "nmdc:d41f2a097f2cb6f9d6c8378f203cc565", + "name": "pairedMapped_sorted.bam", + "data_object_type": "Assembly Coverage BAM" + }, + { + "_id": { + "$oid": "649b003d1ae706d7b5b14e47" + }, + "description": "Assembled contigs fasta for gold:Gp0127623", + "url": "https://data.microbiomedata.org/data/1781_100325/assembly/assembly_contigs.fna", + "file_size_bytes": 54746466, + "type": "nmdc:DataObject", + "id": "nmdc:9339ba4d7b731220024b995f87ddc5e1", + "name": "assembly_contigs.fna", + "data_object_type": "Assembly Contigs" + }, + { + "_id": { + "$oid": "649b003d1ae706d7b5b14e48" + }, + "description": "Metagenome Contig Coverage Stats for gold:Gp0127623", + "url": "https://data.microbiomedata.org/data/1781_100325/assembly/mapping_stats.txt", + "file_size_bytes": 8848183, + "type": "nmdc:DataObject", + "id": "nmdc:f283aad4ed4b528d3ca14bb8fbd8abcd", + "name": "mapping_stats.txt", + "data_object_type": "Assembly Coverage Stats" + }, + { + "_id": { + "$oid": "649b003d1ae706d7b5b15ae0" + }, + "id": "nmdc:6dbd96624464bcccba0269cd46f59c1f", + "name": "1781_100325.krona.html", + "description": "Gold:Gp0127623 KRONA plot HTML file", + "url": "https://data.microbiomedata.org/1781_100325/ReadbasedAnalysis/centrifuge/1781_100325.krona.html", + "file_size_bytes": 3385, + "data_object_type": "Centrifuge Krona Plot", + "type": "nmdc:DataObject" + }, + { + "_id": { + "$oid": "649b003d1ae706d7b5b15aea" + }, + "id": "nmdc:91dc3ab40d04608ca5f5a30baa2d48b5", + "name": "1781_100325.json", + "description": "Gold:Gp0127623 ReadbasedAnalysis result JSON file", + "url": "https://data.microbiomedata.org/1781_100325/ReadbasedAnalysis/1781_100325.json", + "file_size_bytes": 3385, + "type": "nmdc:DataObject" + }, + { + "_id": { + "$oid": "649b003f1ae706d7b5b165b9" + }, + "id": "nmdc:295741c2b87623f465d21c6eaacec974", + "name": "bins.unbinned.fa", + "description": "unbinned fasta file from metabat2 for gold:Gp0127623", + "file_size_bytes": 6753212, + "url": "https://data.microbiomedata.org/data/1781_100325/img_MAGs/bins.unbinned.fa", + "type": "nmdc:DataObject" + }, + { + "_id": { + "$oid": "649b003f1ae706d7b5b165ba" + }, + "id": "nmdc:dc2d3cbd8386e59252f48f52900f76a4", + "name": "bins.tooShort.fa", + "description": "tooShort (< 3kb) filtered contigs fasta file by metaBat2 for gold:Gp0127623", + "file_size_bytes": 47038634, + "url": "https://data.microbiomedata.org/data/1781_100325/img_MAGs/bins.tooShort.fa", + "type": "nmdc:DataObject" + }, + { + "_id": { + "$oid": "649b003f1ae706d7b5b16cbd" + }, + "description": "EC TSV File for gold:Gp0127623", + "url": "https://data.microbiomedata.org/1781_100325/img_annotation/Ga0482247_ec.tsv", + "md5_checksum": "d6afa54f891852b3a5befc294ce84489", + "file_size_bytes": 3385, + "id": "nmdc:d6afa54f891852b3a5befc294ce84489", + "name": "gold:Gp0127623_EC TSV File", + "data_object_type": "Annotation Enzyme Commission", + "type": "nmdc:DataObject" + }, + { + "_id": { + "$oid": "649b003f1ae706d7b5b16cbf" + }, + "description": "KO TSV File for gold:Gp0127623", + "url": "https://data.microbiomedata.org/1781_100325/img_annotation/Ga0482247_ko.tsv", + "md5_checksum": "e15c4db1e4e26208b302ecb9bc2c094c", + "file_size_bytes": 3385, + "id": "nmdc:e15c4db1e4e26208b302ecb9bc2c094c", + "name": "gold:Gp0127623_KO TSV File", + "data_object_type": "Annotation KEGG Orthology", + "type": "nmdc:DataObject" + }, + { + "_id": { + "$oid": "649b003f1ae706d7b5b16cc0" + }, + "description": "Functional annotation GFF file for gold:Gp0127623", + "url": "https://data.microbiomedata.org/1781_100325/img_annotation/Ga0482247_functional_annotation.gff", + "md5_checksum": "3acc269b9e2b5e97ffcc3c1a0d85381c", + "file_size_bytes": 3385, + "id": "nmdc:3acc269b9e2b5e97ffcc3c1a0d85381c", + "name": "gold:Gp0127623_Functional annotation GFF file", + "data_object_type": "Functional Annotation GFF", + "type": "nmdc:DataObject" + }, + { + "_id": { + "$oid": "649b003f1ae706d7b5b16cc1" + }, + "description": "Structural annotation GFF file for gold:Gp0127623", + "url": "https://data.microbiomedata.org/1781_100325/img_annotation/Ga0482247_structural_annotation.gff", + "md5_checksum": "feb21db71dc44afceeb88bb725315b42", + "file_size_bytes": 3385, + "id": "nmdc:feb21db71dc44afceeb88bb725315b42", + "name": "gold:Gp0127623_Structural annotation GFF file", + "data_object_type": "Structural Annotation GFF", + "type": "nmdc:DataObject" + }, + { + "_id": { + "$oid": "649b003f1ae706d7b5b16cc4" + }, + "description": "Protein FAA for gold:Gp0127623", + "url": "https://data.microbiomedata.org/1781_100325/img_annotation/Ga0482247_proteins.faa", + "md5_checksum": "be62e3b68916c8077955d0b3d3aaf5aa", + "file_size_bytes": 3385, + "id": "nmdc:be62e3b68916c8077955d0b3d3aaf5aa", + "name": "gold:Gp0127623_Protein FAA", + "data_object_type": "Annotation Amino Acid FASTA", + "type": "nmdc:DataObject" } ], "dissolving_activity_set": [], @@ -5192,63 +7269,281 @@ "file_size_bytes": 60777542 }, { - "name": "Gp0127625_KO_EC GFF file", - "description": "KO_EC GFF file for Gp0127625", - "url": "https://data.microbiomedata.org/data/nmdc:mga0bfpq58/annotation/nmdc_mga0bfpq58_ko_ec.gff", - "md5_checksum": "5a230cb34060373c2e9a0af8b8040f46", - "id": "nmdc:5a230cb34060373c2e9a0af8b8040f46", - "file_size_bytes": 38117675 + "name": "Gp0127625_KO_EC GFF file", + "description": "KO_EC GFF file for Gp0127625", + "url": "https://data.microbiomedata.org/data/nmdc:mga0bfpq58/annotation/nmdc_mga0bfpq58_ko_ec.gff", + "md5_checksum": "5a230cb34060373c2e9a0af8b8040f46", + "id": "nmdc:5a230cb34060373c2e9a0af8b8040f46", + "file_size_bytes": 38117675 + }, + { + "name": "gold:Gp0452679_metabat2 bin checkm quality assessment result", + "description": "metabat2 bin checkm quality assessment result for gold:Gp0452679", + "data_object_type": "CheckM Statistics", + "url": "https://data.microbiomedata.org/data/nmdc:mga0fsjy84/MAGs/nmdc_mga0fsjy84_checkm_qa.out", + "md5_checksum": "d41d8cd98f00b204e9800998ecf8427e", + "id": "nmdc:d41d8cd98f00b204e9800998ecf8427e", + "file_size_bytes": 0 + }, + { + "name": "Gp0127625_tooShort (< 3kb) filtered contigs fasta file by metaBat2", + "description": "tooShort (< 3kb) filtered contigs fasta file by metaBat2 for Gp0127625", + "url": "https://data.microbiomedata.org/data/nmdc:mga0bfpq58/MAGs/nmdc_mga0bfpq58_bins.tooShort.fa", + "md5_checksum": "27f14f2f1af3ad7d17505a6ddc52d860", + "id": "nmdc:27f14f2f1af3ad7d17505a6ddc52d860", + "file_size_bytes": 128750891 + }, + { + "name": "Gp0127625_unbinned fasta file from metabat2", + "description": "unbinned fasta file from metabat2 for Gp0127625", + "url": "https://data.microbiomedata.org/data/nmdc:mga0bfpq58/MAGs/nmdc_mga0bfpq58_bins.unbinned.fa", + "md5_checksum": "b66d8fd47536ed5299c280aa873e2130", + "id": "nmdc:b66d8fd47536ed5299c280aa873e2130", + "file_size_bytes": 37223163 + }, + { + "name": "Gp0127625_metabat2 bin checkm quality assessment result", + "description": "metabat2 bin checkm quality assessment result for Gp0127625", + "data_object_type": "CheckM Statistics", + "url": "https://data.microbiomedata.org/data/nmdc:mga0bfpq58/MAGs/nmdc_mga0bfpq58_checkm_qa.out", + "md5_checksum": "dac476e3a7a8cdb2f3be5946ae437906", + "id": "nmdc:dac476e3a7a8cdb2f3be5946ae437906", + "file_size_bytes": 1413 + }, + { + "name": "Gp0127625_high-quality and medium-quality bins", + "description": "high-quality and medium-quality bins for Gp0127625", + "data_object_type": "Metagenome Bins", + "url": "https://data.microbiomedata.org/data/nmdc:mga0bfpq58/MAGs/nmdc_mga0bfpq58_hqmq_bin.zip", + "md5_checksum": "1ce4d3dcf2c9cbe245b437ca14a2772f", + "id": "nmdc:1ce4d3dcf2c9cbe245b437ca14a2772f", + "file_size_bytes": 182 + }, + { + "name": "Gp0127625_metabat2 bins", + "description": "metabat2 bins for Gp0127625", + "url": "https://data.microbiomedata.org/data/nmdc:mga0bfpq58/MAGs/nmdc_mga0bfpq58_metabat_bin.zip", + "md5_checksum": "d312dfb56973b50497bab8faf7409db8", + "id": "nmdc:d312dfb56973b50497bab8faf7409db8", + "file_size_bytes": 1729165 + }, + { + "_id": { + "$oid": "649b003d1ae706d7b5b14e4b" + }, + "description": "Metagenome Contig Coverage Stats for gold:Gp0127625", + "url": "https://data.microbiomedata.org/data/1781_100327/assembly/mapping_stats.txt", + "file_size_bytes": 22675445, + "type": "nmdc:DataObject", + "id": "nmdc:d5f4718482fe0012f1e39efbd22c50b4", + "name": "mapping_stats.txt", + "data_object_type": "Assembly Coverage Stats" + }, + { + "_id": { + "$oid": "649b003d1ae706d7b5b14e4e" + }, + "description": "Assembled scaffold fasta for gold:Gp0127625", + "url": "https://data.microbiomedata.org/data/1781_100327/assembly/assembly_scaffolds.fna", + "file_size_bytes": 169600309, + "type": "nmdc:DataObject", + "id": "nmdc:f1b48b2f19ff83ba6fd51df86ec966ec", + "name": "assembly_scaffolds.fna", + "data_object_type": "Assembly Scaffolds" + }, + { + "_id": { + "$oid": "649b003d1ae706d7b5b14e50" + }, + "description": "Metagenome Alignment BAM file for gold:Gp0127625", + "url": "https://data.microbiomedata.org/data/1781_100327/assembly/pairedMapped_sorted.bam", + "file_size_bytes": 2276431089, + "type": "nmdc:DataObject", + "id": "nmdc:8ff0fbe4939b764e12158e783f049f23", + "name": "pairedMapped_sorted.bam", + "data_object_type": "Assembly Coverage BAM" + }, + { + "_id": { + "$oid": "649b003d1ae706d7b5b14e52" + }, + "description": "Assembled contigs fasta for gold:Gp0127625", + "url": "https://data.microbiomedata.org/data/1781_100327/assembly/assembly_contigs.fna", + "file_size_bytes": 170502832, + "type": "nmdc:DataObject", + "id": "nmdc:593237bf7f38f66d40eca1dbb23c7aef", + "name": "assembly_contigs.fna", + "data_object_type": "Assembly Contigs" + }, + { + "_id": { + "$oid": "649b003d1ae706d7b5b14e55" + }, + "description": "Assembled AGP file for gold:Gp0127625", + "url": "https://data.microbiomedata.org/data/1781_100327/assembly/assembly.agp", + "file_size_bytes": 19948625, + "type": "nmdc:DataObject", + "id": "nmdc:6c6032861ed3d9b16040e414aac58731", + "name": "assembly.agp", + "data_object_type": "Assembly AGP" + }, + { + "_id": { + "$oid": "649b003d1ae706d7b5b15af9" + }, + "id": "nmdc:99433bf558d171ea575501775dabdb36", + "name": "1781_100327.krona.html", + "description": "Gold:Gp0127625 KRONA plot HTML file", + "url": "https://data.microbiomedata.org/1781_100327/ReadbasedAnalysis/centrifuge/1781_100327.krona.html", + "file_size_bytes": 3385, + "data_object_type": "Centrifuge Krona Plot", + "type": "nmdc:DataObject" + }, + { + "_id": { + "$oid": "649b003d1ae706d7b5b15b3a" + }, + "id": "nmdc:17804fc0900e6fddc51600638be0e04c", + "name": "1781_100327.json", + "description": "Gold:Gp0127625 ReadbasedAnalysis result JSON file", + "url": "https://data.microbiomedata.org/1781_100327/ReadbasedAnalysis/1781_100327.json", + "file_size_bytes": 3385, + "type": "nmdc:DataObject" + }, + { + "_id": { + "$oid": "649b003f1ae706d7b5b165c3" + }, + "id": "nmdc:f48e06ed804bffa9e3fbafe2548c0d23", + "name": "bins.tooShort.fa", + "description": "tooShort (< 3kb) filtered contigs fasta file by metaBat2 for gold:Gp0127625", + "file_size_bytes": 125246089, + "url": "https://data.microbiomedata.org/data/1781_100327/img_MAGs/bins.tooShort.fa", + "type": "nmdc:DataObject" + }, + { + "_id": { + "$oid": "649b003f1ae706d7b5b165c4" + }, + "id": "nmdc:6355ef8f8d9c6f954797ae92ca908c41", + "name": "bins.unbinned.fa", + "description": "unbinned fasta file from metabat2 for gold:Gp0127625", + "file_size_bytes": 39081008, + "url": "https://data.microbiomedata.org/data/1781_100327/img_MAGs/bins.unbinned.fa", + "type": "nmdc:DataObject" + }, + { + "_id": { + "$oid": "649b003f1ae706d7b5b165c7" + }, + "id": "nmdc:9c77e38f33eedf5f9c2eb4e672ce3951", + "name": "checkm_qa.out", + "description": "metabat2 bin checkm quality assessment result for gold:Gp0127625", + "file_size_bytes": 1099, + "url": "https://data.microbiomedata.org/data/1781_100327/img_MAGs/checkm_qa.out", + "type": "nmdc:DataObject", + "data_object_type": "CheckM Statistics" + }, + { + "_id": { + "$oid": "649b003f1ae706d7b5b165c8" + }, + "id": "nmdc:d22c092793a0476bfc8bba9c5e1a6d22", + "name": "gold:Gp0127625.bins.3.fa", + "description": "metabat2 binned contig file for gold:Gp0127625", + "file_size_bytes": 1411952, + "url": "https://data.microbiomedata.org/data/1781_100327/img_MAGs/metabat-bins/bins.3.fa", + "type": "nmdc:DataObject", + "data_object_type": "Metagenome Bins" + }, + { + "_id": { + "$oid": "649b003f1ae706d7b5b165ca" + }, + "id": "nmdc:80a9f95cbc30cf2ccd164b5b85b866b6", + "name": "gold:Gp0127625.bins.1.fa", + "description": "metabat2 binned contig file for gold:Gp0127625", + "file_size_bytes": 1533567, + "url": "https://data.microbiomedata.org/data/1781_100327/img_MAGs/metabat-bins/bins.1.fa", + "type": "nmdc:DataObject", + "data_object_type": "Metagenome Bins" }, { - "name": "gold:Gp0452679_metabat2 bin checkm quality assessment result", - "description": "metabat2 bin checkm quality assessment result for gold:Gp0452679", - "data_object_type": "CheckM Statistics", - "url": "https://data.microbiomedata.org/data/nmdc:mga0fsjy84/MAGs/nmdc_mga0fsjy84_checkm_qa.out", - "md5_checksum": "d41d8cd98f00b204e9800998ecf8427e", - "id": "nmdc:d41d8cd98f00b204e9800998ecf8427e", - "file_size_bytes": 0 + "_id": { + "$oid": "649b003f1ae706d7b5b165cb" + }, + "id": "nmdc:31079c1f43cb55dd4d8d603e8413cc56", + "name": "gold:Gp0127625.bins.2.fa", + "description": "metabat2 binned contig file for gold:Gp0127625", + "file_size_bytes": 640735, + "url": "https://data.microbiomedata.org/data/1781_100327/img_MAGs/metabat-bins/bins.2.fa", + "type": "nmdc:DataObject", + "data_object_type": "Metagenome Bins" }, { - "name": "Gp0127625_tooShort (< 3kb) filtered contigs fasta file by metaBat2", - "description": "tooShort (< 3kb) filtered contigs fasta file by metaBat2 for Gp0127625", - "url": "https://data.microbiomedata.org/data/nmdc:mga0bfpq58/MAGs/nmdc_mga0bfpq58_bins.tooShort.fa", - "md5_checksum": "27f14f2f1af3ad7d17505a6ddc52d860", - "id": "nmdc:27f14f2f1af3ad7d17505a6ddc52d860", - "file_size_bytes": 128750891 + "_id": { + "$oid": "649b003f1ae706d7b5b16cc8" + }, + "description": "EC TSV File for gold:Gp0127625", + "url": "https://data.microbiomedata.org/1781_100327/img_annotation/Ga0482245_ec.tsv", + "md5_checksum": "16eb9d7ffc8dbf8872cbdb9b7f0a1c82", + "file_size_bytes": 3385, + "id": "nmdc:16eb9d7ffc8dbf8872cbdb9b7f0a1c82", + "name": "gold:Gp0127625_EC TSV File", + "data_object_type": "Annotation Enzyme Commission", + "type": "nmdc:DataObject" }, { - "name": "Gp0127625_unbinned fasta file from metabat2", - "description": "unbinned fasta file from metabat2 for Gp0127625", - "url": "https://data.microbiomedata.org/data/nmdc:mga0bfpq58/MAGs/nmdc_mga0bfpq58_bins.unbinned.fa", - "md5_checksum": "b66d8fd47536ed5299c280aa873e2130", - "id": "nmdc:b66d8fd47536ed5299c280aa873e2130", - "file_size_bytes": 37223163 + "_id": { + "$oid": "649b003f1ae706d7b5b16cc9" + }, + "description": "Structural annotation GFF file for gold:Gp0127625", + "url": "https://data.microbiomedata.org/1781_100327/img_annotation/Ga0482245_structural_annotation.gff", + "md5_checksum": "bf23db2dda841d77cf51b7c9120ba503", + "file_size_bytes": 3385, + "id": "nmdc:bf23db2dda841d77cf51b7c9120ba503", + "name": "gold:Gp0127625_Structural annotation GFF file", + "data_object_type": "Structural Annotation GFF", + "type": "nmdc:DataObject" }, { - "name": "Gp0127625_metabat2 bin checkm quality assessment result", - "description": "metabat2 bin checkm quality assessment result for Gp0127625", - "data_object_type": "CheckM Statistics", - "url": "https://data.microbiomedata.org/data/nmdc:mga0bfpq58/MAGs/nmdc_mga0bfpq58_checkm_qa.out", - "md5_checksum": "dac476e3a7a8cdb2f3be5946ae437906", - "id": "nmdc:dac476e3a7a8cdb2f3be5946ae437906", - "file_size_bytes": 1413 + "_id": { + "$oid": "649b003f1ae706d7b5b16cca" + }, + "description": "KO TSV File for gold:Gp0127625", + "url": "https://data.microbiomedata.org/1781_100327/img_annotation/Ga0482245_ko.tsv", + "md5_checksum": "18dd16caf7af261c4d647da91a6f526a", + "file_size_bytes": 3385, + "id": "nmdc:18dd16caf7af261c4d647da91a6f526a", + "name": "gold:Gp0127625_KO TSV File", + "data_object_type": "Annotation KEGG Orthology", + "type": "nmdc:DataObject" }, { - "name": "Gp0127625_high-quality and medium-quality bins", - "description": "high-quality and medium-quality bins for Gp0127625", - "data_object_type": "Metagenome Bins", - "url": "https://data.microbiomedata.org/data/nmdc:mga0bfpq58/MAGs/nmdc_mga0bfpq58_hqmq_bin.zip", - "md5_checksum": "1ce4d3dcf2c9cbe245b437ca14a2772f", - "id": "nmdc:1ce4d3dcf2c9cbe245b437ca14a2772f", - "file_size_bytes": 182 + "_id": { + "$oid": "649b003f1ae706d7b5b16ccd" + }, + "description": "Protein FAA for gold:Gp0127625", + "url": "https://data.microbiomedata.org/1781_100327/img_annotation/Ga0482245_proteins.faa", + "md5_checksum": "c6fb34fc2da63a5cc46522279e768db9", + "file_size_bytes": 3385, + "id": "nmdc:c6fb34fc2da63a5cc46522279e768db9", + "name": "gold:Gp0127625_Protein FAA", + "data_object_type": "Annotation Amino Acid FASTA", + "type": "nmdc:DataObject" }, { - "name": "Gp0127625_metabat2 bins", - "description": "metabat2 bins for Gp0127625", - "url": "https://data.microbiomedata.org/data/nmdc:mga0bfpq58/MAGs/nmdc_mga0bfpq58_metabat_bin.zip", - "md5_checksum": "d312dfb56973b50497bab8faf7409db8", - "id": "nmdc:d312dfb56973b50497bab8faf7409db8", - "file_size_bytes": 1729165 + "_id": { + "$oid": "649b003f1ae706d7b5b16cce" + }, + "description": "Functional annotation GFF file for gold:Gp0127625", + "url": "https://data.microbiomedata.org/1781_100327/img_annotation/Ga0482245_functional_annotation.gff", + "md5_checksum": "3b039b9d5a75b97a67edf5d50b34d9f0", + "file_size_bytes": 3385, + "id": "nmdc:3b039b9d5a75b97a67edf5d50b34d9f0", + "name": "gold:Gp0127625_Functional annotation GFF file", + "data_object_type": "Functional Annotation GFF", + "type": "nmdc:DataObject" } ], "dissolving_activity_set": [], @@ -5851,6 +8146,236 @@ "md5_checksum": "1b0f148dc6a3a6a007482d1b03fe7e6a", "id": "nmdc:1b0f148dc6a3a6a007482d1b03fe7e6a", "file_size_bytes": 520239 + }, + { + "_id": { + "$oid": "649b003d1ae706d7b5b14e51" + }, + "description": "Metagenome Contig Coverage Stats for gold:Gp0127626", + "url": "https://data.microbiomedata.org/data/1781_100328/assembly/mapping_stats.txt", + "file_size_bytes": 7848645, + "type": "nmdc:DataObject", + "id": "nmdc:f12072d88720efdfb5cecb913d4a595f", + "name": "mapping_stats.txt", + "data_object_type": "Assembly Coverage Stats" + }, + { + "_id": { + "$oid": "649b003d1ae706d7b5b14e53" + }, + "description": "Assembled contigs fasta for gold:Gp0127626", + "url": "https://data.microbiomedata.org/data/1781_100328/assembly/assembly_contigs.fna", + "file_size_bytes": 46893748, + "type": "nmdc:DataObject", + "id": "nmdc:59e99f35194f3f98fa07d401dddd4959", + "name": "assembly_contigs.fna", + "data_object_type": "Assembly Contigs" + }, + { + "_id": { + "$oid": "649b003d1ae706d7b5b14e54" + }, + "description": "Assembled scaffold fasta for gold:Gp0127626", + "url": "https://data.microbiomedata.org/data/1781_100328/assembly/assembly_scaffolds.fna", + "file_size_bytes": 46577279, + "type": "nmdc:DataObject", + "id": "nmdc:8856365e5fa1681e630bca38b7376fd1", + "name": "assembly_scaffolds.fna", + "data_object_type": "Assembly Scaffolds" + }, + { + "_id": { + "$oid": "649b003d1ae706d7b5b14e56" + }, + "description": "Assembled AGP file for gold:Gp0127626", + "url": "https://data.microbiomedata.org/data/1781_100328/assembly/assembly.agp", + "file_size_bytes": 6846909, + "type": "nmdc:DataObject", + "id": "nmdc:f3ab16f91b806aff91f36167bc832f4a", + "name": "assembly.agp", + "data_object_type": "Assembly AGP" + }, + { + "_id": { + "$oid": "649b003d1ae706d7b5b14e57" + }, + "description": "Metagenome Alignment BAM file for gold:Gp0127626", + "url": "https://data.microbiomedata.org/data/1781_100328/assembly/pairedMapped_sorted.bam", + "file_size_bytes": 2056447451, + "type": "nmdc:DataObject", + "id": "nmdc:353e83f4603072d1fe5d15f4c193397f", + "name": "pairedMapped_sorted.bam", + "data_object_type": "Assembly Coverage BAM" + }, + { + "_id": { + "$oid": "649b003d1ae706d7b5b15b00" + }, + "id": "nmdc:3441b097a56424b593c10323e71636f7", + "name": "1781_100328.json", + "description": "Gold:Gp0127626 ReadbasedAnalysis result JSON file", + "url": "https://data.microbiomedata.org/1781_100328/ReadbasedAnalysis/1781_100328.json", + "file_size_bytes": 3385, + "type": "nmdc:DataObject" + }, + { + "_id": { + "$oid": "649b003d1ae706d7b5b15b08" + }, + "id": "nmdc:04f3b5daa5e47ce69c5c95dce5507f61", + "name": "1781_100328.krona.html", + "description": "Gold:Gp0127626 KRONA plot HTML file", + "url": "https://data.microbiomedata.org/1781_100328/ReadbasedAnalysis/centrifuge/1781_100328.krona.html", + "file_size_bytes": 3385, + "data_object_type": "Centrifuge Krona Plot", + "type": "nmdc:DataObject" + }, + { + "_id": { + "$oid": "649b003f1ae706d7b5b165c6" + }, + "id": "nmdc:4f708453d292f67572466be1e73f5e63", + "name": "bins.unbinned.fa", + "description": "unbinned fasta file from metabat2 for gold:Gp0127626", + "file_size_bytes": 4192133, + "url": "https://data.microbiomedata.org/data/1781_100328/img_MAGs/bins.unbinned.fa", + "type": "nmdc:DataObject" + }, + { + "_id": { + "$oid": "649b003f1ae706d7b5b165c9" + }, + "id": "nmdc:6ac7d20ce76667dbe7737db5074574c5", + "name": "bins.tooShort.fa", + "description": "tooShort (< 3kb) filtered contigs fasta file by metaBat2 for gold:Gp0127626", + "file_size_bytes": 40372419, + "url": "https://data.microbiomedata.org/data/1781_100328/img_MAGs/bins.tooShort.fa", + "type": "nmdc:DataObject" + }, + { + "_id": { + "$oid": "649b003f1ae706d7b5b165cc" + }, + "id": "nmdc:a38cd7c53173358f551112e22bffa7b3", + "name": "checkm_qa.out", + "description": "metabat2 bin checkm quality assessment result for gold:Gp0127626", + "file_size_bytes": 1224, + "url": "https://data.microbiomedata.org/data/1781_100328/img_MAGs/checkm_qa.out", + "type": "nmdc:DataObject", + "data_object_type": "CheckM Statistics" + }, + { + "_id": { + "$oid": "649b003f1ae706d7b5b165cd" + }, + "id": "nmdc:02a1905f0e4d6c3106d2d43932ad44d7", + "name": "gold:Gp0127626.bins.3.fa", + "description": "metabat2 binned contig file for gold:Gp0127626", + "file_size_bytes": 232694, + "url": "https://data.microbiomedata.org/data/1781_100328/img_MAGs/metabat-bins/bins.3.fa", + "type": "nmdc:DataObject", + "data_object_type": "Metagenome Bins" + }, + { + "_id": { + "$oid": "649b003f1ae706d7b5b165ce" + }, + "id": "nmdc:4473bba41dba6e11ed96dff91fd1b9e4", + "name": "gold:Gp0127626.bins.4.fa", + "description": "metabat2 binned contig file for gold:Gp0127626", + "file_size_bytes": 305463, + "url": "https://data.microbiomedata.org/data/1781_100328/img_MAGs/metabat-bins/bins.4.fa", + "type": "nmdc:DataObject", + "data_object_type": "Metagenome Bins" + }, + { + "_id": { + "$oid": "649b003f1ae706d7b5b165cf" + }, + "id": "nmdc:d0941c29cff73ecfa187a4e38108efab", + "name": "gold:Gp0127626.bins.2.fa", + "description": "metabat2 binned contig file for gold:Gp0127626", + "file_size_bytes": 437942, + "url": "https://data.microbiomedata.org/data/1781_100328/img_MAGs/metabat-bins/bins.2.fa", + "type": "nmdc:DataObject", + "data_object_type": "Metagenome Bins" + }, + { + "_id": { + "$oid": "649b003f1ae706d7b5b165d1" + }, + "id": "nmdc:98c71d88192e9665597c98b72266ae0f", + "name": "gold:Gp0127626.bins.1.fa", + "description": "metabat2 binned contig file for gold:Gp0127626", + "file_size_bytes": 515703, + "url": "https://data.microbiomedata.org/data/1781_100328/img_MAGs/metabat-bins/bins.1.fa", + "type": "nmdc:DataObject", + "data_object_type": "Metagenome Bins" + }, + { + "_id": { + "$oid": "649b003f1ae706d7b5b16ccb" + }, + "description": "EC TSV File for gold:Gp0127626", + "url": "https://data.microbiomedata.org/1781_100328/img_annotation/Ga0482244_ec.tsv", + "md5_checksum": "dde9a2b70a0552a8d6f7cda7f4862aa9", + "file_size_bytes": 3385, + "id": "nmdc:dde9a2b70a0552a8d6f7cda7f4862aa9", + "name": "gold:Gp0127626_EC TSV File", + "data_object_type": "Annotation Enzyme Commission", + "type": "nmdc:DataObject" + }, + { + "_id": { + "$oid": "649b003f1ae706d7b5b16ccc" + }, + "description": "KO TSV File for gold:Gp0127626", + "url": "https://data.microbiomedata.org/1781_100328/img_annotation/Ga0482244_ko.tsv", + "md5_checksum": "1f45d481e2882a15e7d060e47cbbfda3", + "file_size_bytes": 3385, + "id": "nmdc:1f45d481e2882a15e7d060e47cbbfda3", + "name": "gold:Gp0127626_KO TSV File", + "data_object_type": "Annotation KEGG Orthology", + "type": "nmdc:DataObject" + }, + { + "_id": { + "$oid": "649b003f1ae706d7b5b16ccf" + }, + "description": "Functional annotation GFF file for gold:Gp0127626", + "url": "https://data.microbiomedata.org/1781_100328/img_annotation/Ga0482244_functional_annotation.gff", + "md5_checksum": "8e19f17a8fd0747410b68d804b87139d", + "file_size_bytes": 3385, + "id": "nmdc:8e19f17a8fd0747410b68d804b87139d", + "name": "gold:Gp0127626_Functional annotation GFF file", + "data_object_type": "Functional Annotation GFF", + "type": "nmdc:DataObject" + }, + { + "_id": { + "$oid": "649b003f1ae706d7b5b16cd0" + }, + "description": "Structural annotation GFF file for gold:Gp0127626", + "url": "https://data.microbiomedata.org/1781_100328/img_annotation/Ga0482244_structural_annotation.gff", + "md5_checksum": "f1b6a4b001b67ec72eb5b5411e1321c9", + "file_size_bytes": 3385, + "id": "nmdc:f1b6a4b001b67ec72eb5b5411e1321c9", + "name": "gold:Gp0127626_Structural annotation GFF file", + "data_object_type": "Structural Annotation GFF", + "type": "nmdc:DataObject" + }, + { + "_id": { + "$oid": "649b003f1ae706d7b5b16cd2" + }, + "description": "Protein FAA for gold:Gp0127626", + "url": "https://data.microbiomedata.org/1781_100328/img_annotation/Ga0482244_proteins.faa", + "md5_checksum": "11741b35b589852f2b652d1f73afb663", + "file_size_bytes": 3385, + "id": "nmdc:11741b35b589852f2b652d1f73afb663", + "name": "gold:Gp0127626_Protein FAA", + "data_object_type": "Annotation Amino Acid FASTA", + "type": "nmdc:DataObject" } ], "dissolving_activity_set": [], @@ -6400,12 +8925,265 @@ "file_size_bytes": 507790 }, { - "name": "Gp0127624_metabat2 bins", - "description": "metabat2 bins for Gp0127624", - "url": "https://data.microbiomedata.org/data/nmdc:mga0e8jh10/MAGs/nmdc_mga0e8jh10_metabat_bin.zip", - "md5_checksum": "2d174febedeca0ce515939dd53d6ccb9", - "id": "nmdc:2d174febedeca0ce515939dd53d6ccb9", - "file_size_bytes": 230699 + "name": "Gp0127624_metabat2 bins", + "description": "metabat2 bins for Gp0127624", + "url": "https://data.microbiomedata.org/data/nmdc:mga0e8jh10/MAGs/nmdc_mga0e8jh10_metabat_bin.zip", + "md5_checksum": "2d174febedeca0ce515939dd53d6ccb9", + "id": "nmdc:2d174febedeca0ce515939dd53d6ccb9", + "file_size_bytes": 230699 + }, + { + "_id": { + "$oid": "649b003d1ae706d7b5b14e49" + }, + "description": "Assembled contigs fasta for gold:Gp0127624", + "url": "https://data.microbiomedata.org/data/1781_100326/assembly/assembly_contigs.fna", + "file_size_bytes": 94703971, + "type": "nmdc:DataObject", + "id": "nmdc:70c6cfaac2821e95aad6732da590276e", + "name": "assembly_contigs.fna", + "data_object_type": "Assembly Contigs" + }, + { + "_id": { + "$oid": "649b003d1ae706d7b5b14e4a" + }, + "description": "Assembled scaffold fasta for gold:Gp0127624", + "url": "https://data.microbiomedata.org/data/1781_100326/assembly/assembly_scaffolds.fna", + "file_size_bytes": 94130161, + "type": "nmdc:DataObject", + "id": "nmdc:3b26db32c98a95990057fb0a38d243ca", + "name": "assembly_scaffolds.fna", + "data_object_type": "Assembly Scaffolds" + }, + { + "_id": { + "$oid": "649b003d1ae706d7b5b14e4c" + }, + "description": "Assembled AGP file for gold:Gp0127624", + "url": "https://data.microbiomedata.org/data/1781_100326/assembly/assembly.agp", + "file_size_bytes": 12598209, + "type": "nmdc:DataObject", + "id": "nmdc:8f50a4da5f7f50b0271523331b484e18", + "name": "assembly.agp", + "data_object_type": "Assembly AGP" + }, + { + "_id": { + "$oid": "649b003d1ae706d7b5b14e4d" + }, + "description": "Metagenome Contig Coverage Stats for gold:Gp0127624", + "url": "https://data.microbiomedata.org/data/1781_100326/assembly/mapping_stats.txt", + "file_size_bytes": 14348602, + "type": "nmdc:DataObject", + "id": "nmdc:b7e396b2ead7ab3abf0b39139af1ba09", + "name": "mapping_stats.txt", + "data_object_type": "Assembly Coverage Stats" + }, + { + "_id": { + "$oid": "649b003d1ae706d7b5b14e4f" + }, + "description": "Metagenome Alignment BAM file for gold:Gp0127624", + "url": "https://data.microbiomedata.org/data/1781_100326/assembly/pairedMapped_sorted.bam", + "file_size_bytes": 1949775740, + "type": "nmdc:DataObject", + "id": "nmdc:d27b41f3c6392653daeb9b6bbc0277be", + "name": "pairedMapped_sorted.bam", + "data_object_type": "Assembly Coverage BAM" + }, + { + "_id": { + "$oid": "649b003d1ae706d7b5b15af3" + }, + "id": "nmdc:d84b79e2655d147759dfe8b579b7e4b9", + "name": "1781_100326.json", + "description": "Gold:Gp0127624 ReadbasedAnalysis result JSON file", + "url": "https://data.microbiomedata.org/1781_100326/ReadbasedAnalysis/1781_100326.json", + "file_size_bytes": 3385, + "type": "nmdc:DataObject" + }, + { + "_id": { + "$oid": "649b003d1ae706d7b5b15af4" + }, + "id": "nmdc:9670430bbf29ecc709d0e98a383ce37e", + "name": "1781_100326.krona.html", + "description": "Gold:Gp0127624 KRONA plot HTML file", + "url": "https://data.microbiomedata.org/1781_100326/ReadbasedAnalysis/centrifuge/1781_100326.krona.html", + "file_size_bytes": 3385, + "data_object_type": "Centrifuge Krona Plot", + "type": "nmdc:DataObject" + }, + { + "_id": { + "$oid": "649b003f1ae706d7b5b165b8" + }, + "id": "nmdc:d84b1a0fea4e91826aa72971c4580662", + "name": "bins.tooShort.fa", + "description": "tooShort (< 3kb) filtered contigs fasta file by metaBat2 for gold:Gp0127624", + "file_size_bytes": 76923851, + "url": "https://data.microbiomedata.org/data/1781_100326/img_MAGs/bins.tooShort.fa", + "type": "nmdc:DataObject" + }, + { + "_id": { + "$oid": "649b003f1ae706d7b5b165bb" + }, + "id": "nmdc:e47f7f56d18d80c1d06b96dac4fb1090", + "name": "gtdbtk.ar122.summary.tsv", + "description": "gtdbtk archaea assignment result summary table for gold:Gp0127624", + "file_size_bytes": 1004, + "url": "https://data.microbiomedata.org/data/1781_100326/img_MAGs/gtdbtk_output/classify/gtdbtk.ar122.summary.tsv", + "type": "nmdc:DataObject" + }, + { + "_id": { + "$oid": "649b003f1ae706d7b5b165bc" + }, + "id": "nmdc:72ae589425ade6d237d1fb6bb7f88dd8", + "name": "checkm_qa.out", + "description": "metabat2 bin checkm quality assessment result for gold:Gp0127624", + "file_size_bytes": 1256, + "url": "https://data.microbiomedata.org/data/1781_100326/img_MAGs/checkm_qa.out", + "type": "nmdc:DataObject", + "data_object_type": "CheckM Statistics" + }, + { + "_id": { + "$oid": "649b003f1ae706d7b5b165bd" + }, + "id": "nmdc:d95dfd1f76e7e53f84b231e534b3aba7", + "name": "gold:Gp0127624.bins.3.fa", + "description": "hqmq binned contig file for gold:Gp0127624", + "file_size_bytes": 958082, + "url": "https://data.microbiomedata.org/data/1781_100326/img_MAGs/hqmq-metabat-bins/bins.3.fa", + "type": "nmdc:DataObject", + "data_object_type": "Metagenome Bins" + }, + { + "_id": { + "$oid": "649b003f1ae706d7b5b165be" + }, + "id": "nmdc:8eec32d3611abd677f9849ca12dba02e", + "name": "bins.unbinned.fa", + "description": "unbinned fasta file from metabat2 for gold:Gp0127624", + "file_size_bytes": 14174146, + "url": "https://data.microbiomedata.org/data/1781_100326/img_MAGs/bins.unbinned.fa", + "type": "nmdc:DataObject" + }, + { + "_id": { + "$oid": "649b003f1ae706d7b5b165bf" + }, + "id": "nmdc:79950b448257c67890bd38c137557aa0", + "name": "gold:Gp0127624.bins.2.fa", + "description": "metabat2 binned contig file for gold:Gp0127624", + "file_size_bytes": 263657, + "url": "https://data.microbiomedata.org/data/1781_100326/img_MAGs/metabat-bins/bins.2.fa", + "type": "nmdc:DataObject", + "data_object_type": "Metagenome Bins" + }, + { + "_id": { + "$oid": "649b003f1ae706d7b5b165c0" + }, + "id": "nmdc:105f97b2c45693bd730d438123566b41", + "name": "gold:Gp0127624.bins.4.fa", + "description": "metabat2 binned contig file for gold:Gp0127624", + "file_size_bytes": 517581, + "url": "https://data.microbiomedata.org/data/1781_100326/img_MAGs/metabat-bins/bins.4.fa", + "type": "nmdc:DataObject", + "data_object_type": "Metagenome Bins" + }, + { + "_id": { + "$oid": "649b003f1ae706d7b5b165c1" + }, + "id": "nmdc:0907e3098e3adb4ce9d8bc4ad240191a", + "name": "gold:Gp0127624.bins.3.fa", + "description": "metabat2 binned contig file for gold:Gp0127624", + "file_size_bytes": 978794, + "url": "https://data.microbiomedata.org/data/1781_100326/img_MAGs/metabat-bins/bins.3.fa", + "type": "nmdc:DataObject", + "data_object_type": "Metagenome Bins" + }, + { + "_id": { + "$oid": "649b003f1ae706d7b5b165c2" + }, + "id": "nmdc:44c236036e8d7c34d7066b96066415b5", + "name": "gold:Gp0127624.bins.1.fa", + "description": "metabat2 binned contig file for gold:Gp0127624", + "file_size_bytes": 238087, + "url": "https://data.microbiomedata.org/data/1781_100326/img_MAGs/metabat-bins/bins.1.fa", + "type": "nmdc:DataObject", + "data_object_type": "Metagenome Bins" + }, + { + "_id": { + "$oid": "649b003f1ae706d7b5b16cc2" + }, + "description": "EC TSV File for gold:Gp0127624", + "url": "https://data.microbiomedata.org/1781_100326/img_annotation/Ga0482246_ec.tsv", + "md5_checksum": "73ebb84a8744552c890ad2508e313972", + "file_size_bytes": 3385, + "id": "nmdc:73ebb84a8744552c890ad2508e313972", + "name": "gold:Gp0127624_EC TSV File", + "data_object_type": "Annotation Enzyme Commission", + "type": "nmdc:DataObject" + }, + { + "_id": { + "$oid": "649b003f1ae706d7b5b16cc3" + }, + "description": "KO TSV File for gold:Gp0127624", + "url": "https://data.microbiomedata.org/1781_100326/img_annotation/Ga0482246_ko.tsv", + "md5_checksum": "89a4bb36ef225146a2ba0daaaea512fd", + "file_size_bytes": 3385, + "id": "nmdc:89a4bb36ef225146a2ba0daaaea512fd", + "name": "gold:Gp0127624_KO TSV File", + "data_object_type": "Annotation KEGG Orthology", + "type": "nmdc:DataObject" + }, + { + "_id": { + "$oid": "649b003f1ae706d7b5b16cc5" + }, + "description": "Protein FAA for gold:Gp0127624", + "url": "https://data.microbiomedata.org/1781_100326/img_annotation/Ga0482246_proteins.faa", + "md5_checksum": "075262a23b12fd4da073a973a5b6cf15", + "file_size_bytes": 3385, + "id": "nmdc:075262a23b12fd4da073a973a5b6cf15", + "name": "gold:Gp0127624_Protein FAA", + "data_object_type": "Annotation Amino Acid FASTA", + "type": "nmdc:DataObject" + }, + { + "_id": { + "$oid": "649b003f1ae706d7b5b16cc6" + }, + "description": "Structural annotation GFF file for gold:Gp0127624", + "url": "https://data.microbiomedata.org/1781_100326/img_annotation/Ga0482246_structural_annotation.gff", + "md5_checksum": "3fb3966095303ea8aa7f27bff3e9db50", + "file_size_bytes": 3385, + "id": "nmdc:3fb3966095303ea8aa7f27bff3e9db50", + "name": "gold:Gp0127624_Structural annotation GFF file", + "data_object_type": "Structural Annotation GFF", + "type": "nmdc:DataObject" + }, + { + "_id": { + "$oid": "649b003f1ae706d7b5b16cc7" + }, + "description": "Functional annotation GFF file for gold:Gp0127624", + "url": "https://data.microbiomedata.org/1781_100326/img_annotation/Ga0482246_functional_annotation.gff", + "md5_checksum": "6bcdfc58ee6b4eb5ae022c71636a88b4", + "file_size_bytes": 3385, + "id": "nmdc:6bcdfc58ee6b4eb5ae022c71636a88b4", + "name": "gold:Gp0127624_Functional annotation GFF file", + "data_object_type": "Functional Annotation GFF", + "type": "nmdc:DataObject" } ], "dissolving_activity_set": [], @@ -7003,6 +9781,176 @@ "md5_checksum": "8f4f5294de942734837fba3d68ffc6b4", "id": "nmdc:8f4f5294de942734837fba3d68ffc6b4", "file_size_bytes": 377953 + }, + { + "_id": { + "$oid": "649b003d1ae706d7b5b14e61" + }, + "description": "Assembled scaffold fasta for gold:Gp0127629", + "url": "https://data.microbiomedata.org/data/1781_100331/assembly/assembly_scaffolds.fna", + "file_size_bytes": 107683315, + "type": "nmdc:DataObject", + "id": "nmdc:eb0c17effb4ea272e31318eecbe890da", + "name": "assembly_scaffolds.fna", + "data_object_type": "Assembly Scaffolds" + }, + { + "_id": { + "$oid": "649b003d1ae706d7b5b14e62" + }, + "description": "Assembled contigs fasta for gold:Gp0127629", + "url": "https://data.microbiomedata.org/data/1781_100331/assembly/assembly_contigs.fna", + "file_size_bytes": 108309886, + "type": "nmdc:DataObject", + "id": "nmdc:9eed2da9f67c58f243329daf2289f40e", + "name": "assembly_contigs.fna", + "data_object_type": "Assembly Contigs" + }, + { + "_id": { + "$oid": "649b003d1ae706d7b5b14e63" + }, + "description": "Assembled AGP file for gold:Gp0127629", + "url": "https://data.microbiomedata.org/data/1781_100331/assembly/assembly.agp", + "file_size_bytes": 13784613, + "type": "nmdc:DataObject", + "id": "nmdc:0680d9887963e661ca943b8b2779d954", + "name": "assembly.agp", + "data_object_type": "Assembly AGP" + }, + { + "_id": { + "$oid": "649b003d1ae706d7b5b14e64" + }, + "description": "Metagenome Contig Coverage Stats for gold:Gp0127629", + "url": "https://data.microbiomedata.org/data/1781_100331/assembly/mapping_stats.txt", + "file_size_bytes": 15702721, + "type": "nmdc:DataObject", + "id": "nmdc:047d86c83005c22cf581cb6c092a6362", + "name": "mapping_stats.txt", + "data_object_type": "Assembly Coverage Stats" + }, + { + "_id": { + "$oid": "649b003d1ae706d7b5b14e67" + }, + "description": "Metagenome Alignment BAM file for gold:Gp0127629", + "url": "https://data.microbiomedata.org/data/1781_100331/assembly/pairedMapped_sorted.bam", + "file_size_bytes": 1973835303, + "type": "nmdc:DataObject", + "id": "nmdc:42c05d533d1a4ecaaa7367180a1b9b36", + "name": "pairedMapped_sorted.bam", + "data_object_type": "Assembly Coverage BAM" + }, + { + "_id": { + "$oid": "649b003d1ae706d7b5b15b18" + }, + "id": "nmdc:7d974a6b38d5572992a870fb6dbcfb24", + "name": "1781_100331.json", + "description": "Gold:Gp0127629 ReadbasedAnalysis result JSON file", + "url": "https://data.microbiomedata.org/1781_100331/ReadbasedAnalysis/1781_100331.json", + "file_size_bytes": 3385, + "type": "nmdc:DataObject" + }, + { + "_id": { + "$oid": "649b003d1ae706d7b5b15b1a" + }, + "id": "nmdc:728f96c592e66b7def86b6b6a227fd51", + "name": "1781_100331.krona.html", + "description": "Gold:Gp0127629 KRONA plot HTML file", + "url": "https://data.microbiomedata.org/1781_100331/ReadbasedAnalysis/centrifuge/1781_100331.krona.html", + "file_size_bytes": 3385, + "data_object_type": "Centrifuge Krona Plot", + "type": "nmdc:DataObject" + }, + { + "_id": { + "$oid": "649b003f1ae706d7b5b165e2" + }, + "id": "nmdc:26b0b714933a5d25157f88d0e491104b", + "name": "bins.unbinned.fa", + "description": "unbinned fasta file from metabat2 for gold:Gp0127629", + "file_size_bytes": 20329609, + "url": "https://data.microbiomedata.org/data/1781_100331/img_MAGs/bins.unbinned.fa", + "type": "nmdc:DataObject" + }, + { + "_id": { + "$oid": "649b003f1ae706d7b5b165fa" + }, + "id": "nmdc:3508c879a1c420c55e7325f8be819542", + "name": "bins.tooShort.fa", + "description": "tooShort (< 3kb) filtered contigs fasta file by metaBat2 for gold:Gp0127629", + "file_size_bytes": 86214629, + "url": "https://data.microbiomedata.org/data/1781_100331/img_MAGs/bins.tooShort.fa", + "type": "nmdc:DataObject" + }, + { + "_id": { + "$oid": "649b003f1ae706d7b5b16cda" + }, + "description": "KO TSV File for gold:Gp0127629", + "url": "https://data.microbiomedata.org/1781_100331/img_annotation/Ga0482241_ko.tsv", + "md5_checksum": "37fb326b25c1ae3caebddf668feadd76", + "file_size_bytes": 3385, + "id": "nmdc:37fb326b25c1ae3caebddf668feadd76", + "name": "gold:Gp0127629_KO TSV File", + "data_object_type": "Annotation KEGG Orthology", + "type": "nmdc:DataObject" + }, + { + "_id": { + "$oid": "649b003f1ae706d7b5b16cdb" + }, + "description": "Functional annotation GFF file for gold:Gp0127629", + "url": "https://data.microbiomedata.org/1781_100331/img_annotation/Ga0482241_functional_annotation.gff", + "md5_checksum": "75e43708767f06de878e1c2115714e0b", + "file_size_bytes": 3385, + "id": "nmdc:75e43708767f06de878e1c2115714e0b", + "name": "gold:Gp0127629_Functional annotation GFF file", + "data_object_type": "Functional Annotation GFF", + "type": "nmdc:DataObject" + }, + { + "_id": { + "$oid": "649b003f1ae706d7b5b16cdc" + }, + "description": "EC TSV File for gold:Gp0127629", + "url": "https://data.microbiomedata.org/1781_100331/img_annotation/Ga0482241_ec.tsv", + "md5_checksum": "f9211f36dc6992c2dfecd160987434c7", + "file_size_bytes": 3385, + "id": "nmdc:f9211f36dc6992c2dfecd160987434c7", + "name": "gold:Gp0127629_EC TSV File", + "data_object_type": "Annotation Enzyme Commission", + "type": "nmdc:DataObject" + }, + { + "_id": { + "$oid": "649b00401ae706d7b5b16ce1" + }, + "description": "Structural annotation GFF file for gold:Gp0127629", + "url": "https://data.microbiomedata.org/1781_100331/img_annotation/Ga0482241_structural_annotation.gff", + "md5_checksum": "9b3fb3e409e3d3128a8a43cc58d32a95", + "file_size_bytes": 3385, + "id": "nmdc:9b3fb3e409e3d3128a8a43cc58d32a95", + "name": "gold:Gp0127629_Structural annotation GFF file", + "data_object_type": "Structural Annotation GFF", + "type": "nmdc:DataObject" + }, + { + "_id": { + "$oid": "649b00401ae706d7b5b16ce2" + }, + "description": "Protein FAA for gold:Gp0127629", + "url": "https://data.microbiomedata.org/1781_100331/img_annotation/Ga0482241_proteins.faa", + "md5_checksum": "9559ebd9a8921ff8ae9f89c2ffcef6f7", + "file_size_bytes": 3385, + "id": "nmdc:9559ebd9a8921ff8ae9f89c2ffcef6f7", + "name": "gold:Gp0127629_Protein FAA", + "data_object_type": "Annotation Amino Acid FASTA", + "type": "nmdc:DataObject" } ], "dissolving_activity_set": [], @@ -7581,6 +10529,200 @@ "md5_checksum": "5ef5ad24cfe3990c0256d420f51f9010", "id": "nmdc:5ef5ad24cfe3990c0256d420f51f9010", "file_size_bytes": 279359 + }, + { + "_id": { + "$oid": "649b003d1ae706d7b5b14e5c" + }, + "description": "Metagenome Contig Coverage Stats for gold:Gp0127628", + "url": "https://data.microbiomedata.org/data/1781_100330/assembly/mapping_stats.txt", + "file_size_bytes": 11830693, + "type": "nmdc:DataObject", + "id": "nmdc:29cc178c2efed5702e8d984729345761", + "name": "mapping_stats.txt", + "data_object_type": "Assembly Coverage Stats" + }, + { + "_id": { + "$oid": "649b003d1ae706d7b5b14e5d" + }, + "description": "Assembled contigs fasta for gold:Gp0127628", + "url": "https://data.microbiomedata.org/data/1781_100330/assembly/assembly_contigs.fna", + "file_size_bytes": 73646305, + "type": "nmdc:DataObject", + "id": "nmdc:e54d5475a6bf7148d2312d0fcc349cdb", + "name": "assembly_contigs.fna", + "data_object_type": "Assembly Contigs" + }, + { + "_id": { + "$oid": "649b003d1ae706d7b5b14e5e" + }, + "description": "Assembled scaffold fasta for gold:Gp0127628", + "url": "https://data.microbiomedata.org/data/1781_100330/assembly/assembly_scaffolds.fna", + "file_size_bytes": 73171893, + "type": "nmdc:DataObject", + "id": "nmdc:140f23c819c51594790d1209780f8f60", + "name": "assembly_scaffolds.fna", + "data_object_type": "Assembly Scaffolds" + }, + { + "_id": { + "$oid": "649b003d1ae706d7b5b14e60" + }, + "description": "Assembled AGP file for gold:Gp0127628", + "url": "https://data.microbiomedata.org/data/1781_100330/assembly/assembly.agp", + "file_size_bytes": 10372800, + "type": "nmdc:DataObject", + "id": "nmdc:64d6bc3e0883eb23926cd05e43c42d2c", + "name": "assembly.agp", + "data_object_type": "Assembly AGP" + }, + { + "_id": { + "$oid": "649b003d1ae706d7b5b14e65" + }, + "description": "Metagenome Alignment BAM file for gold:Gp0127628", + "url": "https://data.microbiomedata.org/data/1781_100330/assembly/pairedMapped_sorted.bam", + "file_size_bytes": 2706803208, + "type": "nmdc:DataObject", + "id": "nmdc:72951466c19bec33fea0232a2becf637", + "name": "pairedMapped_sorted.bam", + "data_object_type": "Assembly Coverage BAM" + }, + { + "_id": { + "$oid": "649b003d1ae706d7b5b15b09" + }, + "id": "nmdc:c65969e7112a41f10ba56435077e1833", + "name": "1781_100330.krona.html", + "description": "Gold:Gp0127628 KRONA plot HTML file", + "url": "https://data.microbiomedata.org/1781_100330/ReadbasedAnalysis/centrifuge/1781_100330.krona.html", + "file_size_bytes": 3385, + "data_object_type": "Centrifuge Krona Plot", + "type": "nmdc:DataObject" + }, + { + "_id": { + "$oid": "649b003d1ae706d7b5b15b10" + }, + "id": "nmdc:94230c82a668902f15d13898305b06d0", + "name": "1781_100330.json", + "description": "Gold:Gp0127628 ReadbasedAnalysis result JSON file", + "url": "https://data.microbiomedata.org/1781_100330/ReadbasedAnalysis/1781_100330.json", + "file_size_bytes": 3385, + "type": "nmdc:DataObject" + }, + { + "_id": { + "$oid": "649b003f1ae706d7b5b165d3" + }, + "id": "nmdc:58091a36e7ab3d84f65b6d4e08f1a528", + "name": "bins.tooShort.fa", + "description": "tooShort (< 3kb) filtered contigs fasta file by metaBat2 for gold:Gp0127628", + "file_size_bytes": 61790019, + "url": "https://data.microbiomedata.org/data/1781_100330/img_MAGs/bins.tooShort.fa", + "type": "nmdc:DataObject" + }, + { + "_id": { + "$oid": "649b003f1ae706d7b5b165d4" + }, + "id": "nmdc:756c3abe18401097b557a9d3f2788fa1", + "name": "bins.unbinned.fa", + "description": "unbinned fasta file from metabat2 for gold:Gp0127628", + "file_size_bytes": 10332738, + "url": "https://data.microbiomedata.org/data/1781_100330/img_MAGs/bins.unbinned.fa", + "type": "nmdc:DataObject" + }, + { + "_id": { + "$oid": "649b003f1ae706d7b5b165d5" + }, + "id": "nmdc:bd1a55c5a24a4ec234f5c22ce66ba8e2", + "name": "gold:Gp0127628.bins.1.fa", + "description": "metabat2 binned contig file for gold:Gp0127628", + "file_size_bytes": 214091, + "url": "https://data.microbiomedata.org/data/1781_100330/img_MAGs/metabat-bins/bins.1.fa", + "type": "nmdc:DataObject", + "data_object_type": "Metagenome Bins" + }, + { + "_id": { + "$oid": "649b003f1ae706d7b5b165d9" + }, + "id": "nmdc:4d6fbc978933ca7a2fb204c0230252d1", + "name": "checkm_qa.out", + "description": "metabat2 bin checkm quality assessment result for gold:Gp0127628", + "file_size_bytes": 770, + "url": "https://data.microbiomedata.org/data/1781_100330/img_MAGs/checkm_qa.out", + "type": "nmdc:DataObject", + "data_object_type": "CheckM Statistics" + }, + { + "_id": { + "$oid": "649b003f1ae706d7b5b16cd4" + }, + "description": "EC TSV File for gold:Gp0127628", + "url": "https://data.microbiomedata.org/1781_100330/img_annotation/Ga0482242_ec.tsv", + "md5_checksum": "760a1e1bc5aac21dd0b96098c72133ff", + "file_size_bytes": 3385, + "id": "nmdc:760a1e1bc5aac21dd0b96098c72133ff", + "name": "gold:Gp0127628_EC TSV File", + "data_object_type": "Annotation Enzyme Commission", + "type": "nmdc:DataObject" + }, + { + "_id": { + "$oid": "649b003f1ae706d7b5b16cd7" + }, + "description": "Protein FAA for gold:Gp0127628", + "url": "https://data.microbiomedata.org/1781_100330/img_annotation/Ga0482242_proteins.faa", + "md5_checksum": "ec55b61e1204cde7fe61841179b88b53", + "file_size_bytes": 3385, + "id": "nmdc:ec55b61e1204cde7fe61841179b88b53", + "name": "gold:Gp0127628_Protein FAA", + "data_object_type": "Annotation Amino Acid FASTA", + "type": "nmdc:DataObject" + }, + { + "_id": { + "$oid": "649b003f1ae706d7b5b16cd8" + }, + "description": "Functional annotation GFF file for gold:Gp0127628", + "url": "https://data.microbiomedata.org/1781_100330/img_annotation/Ga0482242_functional_annotation.gff", + "md5_checksum": "b73bf45facd909d89bfab76dee85a2cc", + "file_size_bytes": 3385, + "id": "nmdc:b73bf45facd909d89bfab76dee85a2cc", + "name": "gold:Gp0127628_Functional annotation GFF file", + "data_object_type": "Functional Annotation GFF", + "type": "nmdc:DataObject" + }, + { + "_id": { + "$oid": "649b003f1ae706d7b5b16cd9" + }, + "description": "KO TSV File for gold:Gp0127628", + "url": "https://data.microbiomedata.org/1781_100330/img_annotation/Ga0482242_ko.tsv", + "md5_checksum": "69da278e8966a688cafb7bb2c8f2e4d1", + "file_size_bytes": 3385, + "id": "nmdc:69da278e8966a688cafb7bb2c8f2e4d1", + "name": "gold:Gp0127628_KO TSV File", + "data_object_type": "Annotation KEGG Orthology", + "type": "nmdc:DataObject" + }, + { + "_id": { + "$oid": "649b00401ae706d7b5b16ce0" + }, + "description": "Structural annotation GFF file for gold:Gp0127628", + "url": "https://data.microbiomedata.org/1781_100330/img_annotation/Ga0482242_structural_annotation.gff", + "md5_checksum": "bd55dfd59ed0aa7ea685734c5b7ecbab", + "file_size_bytes": 3385, + "id": "nmdc:bd55dfd59ed0aa7ea685734c5b7ecbab", + "name": "gold:Gp0127628_Structural annotation GFF file", + "data_object_type": "Structural Annotation GFF", + "type": "nmdc:DataObject" } ], "dissolving_activity_set": [], @@ -8091,55 +11233,261 @@ "file_size_bytes": 28510384 }, { - "name": "gold:Gp0452679_metabat2 bin checkm quality assessment result", - "description": "metabat2 bin checkm quality assessment result for gold:Gp0452679", - "data_object_type": "CheckM Statistics", - "url": "https://data.microbiomedata.org/data/nmdc:mga0fsjy84/MAGs/nmdc_mga0fsjy84_checkm_qa.out", - "md5_checksum": "d41d8cd98f00b204e9800998ecf8427e", - "id": "nmdc:d41d8cd98f00b204e9800998ecf8427e", - "file_size_bytes": 0 + "name": "gold:Gp0452679_metabat2 bin checkm quality assessment result", + "description": "metabat2 bin checkm quality assessment result for gold:Gp0452679", + "data_object_type": "CheckM Statistics", + "url": "https://data.microbiomedata.org/data/nmdc:mga0fsjy84/MAGs/nmdc_mga0fsjy84_checkm_qa.out", + "md5_checksum": "d41d8cd98f00b204e9800998ecf8427e", + "id": "nmdc:d41d8cd98f00b204e9800998ecf8427e", + "file_size_bytes": 0 + }, + { + "name": "Gp0127631_tooShort (< 3kb) filtered contigs fasta file by metaBat2", + "description": "tooShort (< 3kb) filtered contigs fasta file by metaBat2 for Gp0127631", + "url": "https://data.microbiomedata.org/data/nmdc:mga0jx8k09/MAGs/nmdc_mga0jx8k09_bins.tooShort.fa", + "md5_checksum": "53faea62cf1183292bc6fca374f75ed1", + "id": "nmdc:53faea62cf1183292bc6fca374f75ed1", + "file_size_bytes": 99316833 + }, + { + "name": "Gp0127631_unbinned fasta file from metabat2", + "description": "unbinned fasta file from metabat2 for Gp0127631", + "url": "https://data.microbiomedata.org/data/nmdc:mga0jx8k09/MAGs/nmdc_mga0jx8k09_bins.unbinned.fa", + "md5_checksum": "7a6616d3262630c2aea2923e3c2683d0", + "id": "nmdc:7a6616d3262630c2aea2923e3c2683d0", + "file_size_bytes": 27381739 + }, + { + "name": "Gp0127631_metabat2 bin checkm quality assessment result", + "description": "metabat2 bin checkm quality assessment result for Gp0127631", + "data_object_type": "CheckM Statistics", + "url": "https://data.microbiomedata.org/data/nmdc:mga0jx8k09/MAGs/nmdc_mga0jx8k09_checkm_qa.out", + "md5_checksum": "e16dde65e7229d69949c9e2dee7e2413", + "id": "nmdc:e16dde65e7229d69949c9e2dee7e2413", + "file_size_bytes": 1085 + }, + { + "name": "Gp0127631_high-quality and medium-quality bins", + "description": "high-quality and medium-quality bins for Gp0127631", + "data_object_type": "Metagenome Bins", + "url": "https://data.microbiomedata.org/data/nmdc:mga0jx8k09/MAGs/nmdc_mga0jx8k09_hqmq_bin.zip", + "md5_checksum": "58acda197bd8136a80d5047342008cdf", + "id": "nmdc:58acda197bd8136a80d5047342008cdf", + "file_size_bytes": 182 + }, + { + "name": "Gp0127631_metabat2 bins", + "description": "metabat2 bins for Gp0127631", + "url": "https://data.microbiomedata.org/data/nmdc:mga0jx8k09/MAGs/nmdc_mga0jx8k09_metabat_bin.zip", + "md5_checksum": "8d5e2b8a8dede83c2f74182f506f9176", + "id": "nmdc:8d5e2b8a8dede83c2f74182f506f9176", + "file_size_bytes": 596616 + }, + { + "_id": { + "$oid": "649b003d1ae706d7b5b14ea8" + }, + "description": "Metagenome Contig Coverage Stats for gold:Gp0127631", + "url": "https://data.microbiomedata.org/data/1781_100333/assembly/mapping_stats.txt", + "file_size_bytes": 17881866, + "type": "nmdc:DataObject", + "id": "nmdc:b8891d5a1c93a83756f25450b1fe5e6e", + "name": "mapping_stats.txt", + "data_object_type": "Assembly Coverage Stats" + }, + { + "_id": { + "$oid": "649b003d1ae706d7b5b14eaa" + }, + "description": "Assembled AGP file for gold:Gp0127631", + "url": "https://data.microbiomedata.org/data/1781_100333/assembly/assembly.agp", + "file_size_bytes": 15733352, + "type": "nmdc:DataObject", + "id": "nmdc:00fd0de6e8c1ba5bdd6308a282f543cc", + "name": "assembly.agp", + "data_object_type": "Assembly AGP" + }, + { + "_id": { + "$oid": "649b003d1ae706d7b5b14ec6" + }, + "description": "Assembled contigs fasta for gold:Gp0127631", + "url": "https://data.microbiomedata.org/data/1781_100333/assembly/assembly_contigs.fna", + "file_size_bytes": 127764502, + "type": "nmdc:DataObject", + "id": "nmdc:ff68d07b09e5a9cdd866208394d66bd6", + "name": "assembly_contigs.fna", + "data_object_type": "Assembly Contigs" + }, + { + "_id": { + "$oid": "649b003d1ae706d7b5b14ecc" + }, + "description": "Metagenome Alignment BAM file for gold:Gp0127631", + "url": "https://data.microbiomedata.org/data/1781_100333/assembly/pairedMapped_sorted.bam", + "file_size_bytes": 2213957632, + "type": "nmdc:DataObject", + "id": "nmdc:39703cdbfb1e7fbb52a08061a05d8f4d", + "name": "pairedMapped_sorted.bam", + "data_object_type": "Assembly Coverage BAM" + }, + { + "_id": { + "$oid": "649b003d1ae706d7b5b14ed5" + }, + "description": "Assembled scaffold fasta for gold:Gp0127631", + "url": "https://data.microbiomedata.org/data/1781_100333/assembly/assembly_scaffolds.fna", + "file_size_bytes": 127049764, + "type": "nmdc:DataObject", + "id": "nmdc:7beaf6e386659d8d728720bb1ab2f2b9", + "name": "assembly_scaffolds.fna", + "data_object_type": "Assembly Scaffolds" + }, + { + "_id": { + "$oid": "649b003d1ae706d7b5b15b26" + }, + "id": "nmdc:22bf0b09cd9b46acaa8436ac81aec2f3", + "name": "1781_100333.json", + "description": "Gold:Gp0127631 ReadbasedAnalysis result JSON file", + "url": "https://data.microbiomedata.org/1781_100333/ReadbasedAnalysis/1781_100333.json", + "file_size_bytes": 3385, + "type": "nmdc:DataObject" + }, + { + "_id": { + "$oid": "649b003d1ae706d7b5b15b27" + }, + "id": "nmdc:d376ada84b52516325fc31f0f95fc1c4", + "name": "1781_100333.krona.html", + "description": "Gold:Gp0127631 KRONA plot HTML file", + "url": "https://data.microbiomedata.org/1781_100333/ReadbasedAnalysis/centrifuge/1781_100333.krona.html", + "file_size_bytes": 3385, + "data_object_type": "Centrifuge Krona Plot", + "type": "nmdc:DataObject" + }, + { + "_id": { + "$oid": "649b003f1ae706d7b5b165f1" + }, + "id": "nmdc:3d29d0956f968142b75f0ca9a03e3abb", + "name": "bins.tooShort.fa", + "description": "tooShort (< 3kb) filtered contigs fasta file by metaBat2 for gold:Gp0127631", + "file_size_bytes": 96541361, + "url": "https://data.microbiomedata.org/data/1781_100333/img_MAGs/bins.tooShort.fa", + "type": "nmdc:DataObject" + }, + { + "_id": { + "$oid": "649b003f1ae706d7b5b165f3" + }, + "id": "nmdc:0ee0d3a741d960268c288071b826ccb2", + "name": "checkm_qa.out", + "description": "metabat2 bin checkm quality assessment result for gold:Gp0127631", + "file_size_bytes": 930, + "url": "https://data.microbiomedata.org/data/1781_100333/img_MAGs/checkm_qa.out", + "type": "nmdc:DataObject", + "data_object_type": "CheckM Statistics" + }, + { + "_id": { + "$oid": "649b003f1ae706d7b5b165f4" + }, + "id": "nmdc:80e41aba95e325a6687edf730910288b", + "name": "gold:Gp0127631.bins.1.fa", + "description": "metabat2 binned contig file for gold:Gp0127631", + "file_size_bytes": 299114, + "url": "https://data.microbiomedata.org/data/1781_100333/img_MAGs/metabat-bins/bins.1.fa", + "type": "nmdc:DataObject", + "data_object_type": "Metagenome Bins" + }, + { + "_id": { + "$oid": "649b003f1ae706d7b5b165ff" + }, + "id": "nmdc:be8af74d4f97b2443a2dac045fa8af6d", + "name": "bins.unbinned.fa", + "description": "unbinned fasta file from metabat2 for gold:Gp0127631", + "file_size_bytes": 28526080, + "url": "https://data.microbiomedata.org/data/1781_100333/img_MAGs/bins.unbinned.fa", + "type": "nmdc:DataObject" }, { - "name": "Gp0127631_tooShort (< 3kb) filtered contigs fasta file by metaBat2", - "description": "tooShort (< 3kb) filtered contigs fasta file by metaBat2 for Gp0127631", - "url": "https://data.microbiomedata.org/data/nmdc:mga0jx8k09/MAGs/nmdc_mga0jx8k09_bins.tooShort.fa", - "md5_checksum": "53faea62cf1183292bc6fca374f75ed1", - "id": "nmdc:53faea62cf1183292bc6fca374f75ed1", - "file_size_bytes": 99316833 + "_id": { + "$oid": "649b003f1ae706d7b5b16629" + }, + "id": "nmdc:16ad3e594738a8fb4ee46a931ce0444c", + "name": "gold:Gp0127631.bins.2.fa", + "description": "metabat2 binned contig file for gold:Gp0127631", + "file_size_bytes": 372823, + "url": "https://data.microbiomedata.org/data/1781_100333/img_MAGs/metabat-bins/bins.2.fa", + "type": "nmdc:DataObject", + "data_object_type": "Metagenome Bins" }, { - "name": "Gp0127631_unbinned fasta file from metabat2", - "description": "unbinned fasta file from metabat2 for Gp0127631", - "url": "https://data.microbiomedata.org/data/nmdc:mga0jx8k09/MAGs/nmdc_mga0jx8k09_bins.unbinned.fa", - "md5_checksum": "7a6616d3262630c2aea2923e3c2683d0", - "id": "nmdc:7a6616d3262630c2aea2923e3c2683d0", - "file_size_bytes": 27381739 + "_id": { + "$oid": "649b00401ae706d7b5b16ce5" + }, + "description": "Functional annotation GFF file for gold:Gp0127631", + "url": "https://data.microbiomedata.org/1781_100333/img_annotation/Ga0482239_functional_annotation.gff", + "md5_checksum": "5723e7023b0e3994e92c7c5e72aa34ec", + "file_size_bytes": 3385, + "id": "nmdc:5723e7023b0e3994e92c7c5e72aa34ec", + "name": "gold:Gp0127631_Functional annotation GFF file", + "data_object_type": "Functional Annotation GFF", + "type": "nmdc:DataObject" }, { - "name": "Gp0127631_metabat2 bin checkm quality assessment result", - "description": "metabat2 bin checkm quality assessment result for Gp0127631", - "data_object_type": "CheckM Statistics", - "url": "https://data.microbiomedata.org/data/nmdc:mga0jx8k09/MAGs/nmdc_mga0jx8k09_checkm_qa.out", - "md5_checksum": "e16dde65e7229d69949c9e2dee7e2413", - "id": "nmdc:e16dde65e7229d69949c9e2dee7e2413", - "file_size_bytes": 1085 + "_id": { + "$oid": "649b00401ae706d7b5b16ce7" + }, + "description": "EC TSV File for gold:Gp0127631", + "url": "https://data.microbiomedata.org/1781_100333/img_annotation/Ga0482239_ec.tsv", + "md5_checksum": "ee276fe3eb490475ad3d7280a8c67464", + "file_size_bytes": 3385, + "id": "nmdc:ee276fe3eb490475ad3d7280a8c67464", + "name": "gold:Gp0127631_EC TSV File", + "data_object_type": "Annotation Enzyme Commission", + "type": "nmdc:DataObject" }, { - "name": "Gp0127631_high-quality and medium-quality bins", - "description": "high-quality and medium-quality bins for Gp0127631", - "data_object_type": "Metagenome Bins", - "url": "https://data.microbiomedata.org/data/nmdc:mga0jx8k09/MAGs/nmdc_mga0jx8k09_hqmq_bin.zip", - "md5_checksum": "58acda197bd8136a80d5047342008cdf", - "id": "nmdc:58acda197bd8136a80d5047342008cdf", - "file_size_bytes": 182 + "_id": { + "$oid": "649b00401ae706d7b5b16ce8" + }, + "description": "Structural annotation GFF file for gold:Gp0127631", + "url": "https://data.microbiomedata.org/1781_100333/img_annotation/Ga0482239_structural_annotation.gff", + "md5_checksum": "d57f28027b2d6f82b96f5413bf8c9a59", + "file_size_bytes": 3385, + "id": "nmdc:d57f28027b2d6f82b96f5413bf8c9a59", + "name": "gold:Gp0127631_Structural annotation GFF file", + "data_object_type": "Structural Annotation GFF", + "type": "nmdc:DataObject" }, { - "name": "Gp0127631_metabat2 bins", - "description": "metabat2 bins for Gp0127631", - "url": "https://data.microbiomedata.org/data/nmdc:mga0jx8k09/MAGs/nmdc_mga0jx8k09_metabat_bin.zip", - "md5_checksum": "8d5e2b8a8dede83c2f74182f506f9176", - "id": "nmdc:8d5e2b8a8dede83c2f74182f506f9176", - "file_size_bytes": 596616 + "_id": { + "$oid": "649b00401ae706d7b5b16cea" + }, + "description": "Protein FAA for gold:Gp0127631", + "url": "https://data.microbiomedata.org/1781_100333/img_annotation/Ga0482239_proteins.faa", + "md5_checksum": "04c97ac7af06bf37da8f1ffe827e454d", + "file_size_bytes": 3385, + "id": "nmdc:04c97ac7af06bf37da8f1ffe827e454d", + "name": "gold:Gp0127631_Protein FAA", + "data_object_type": "Annotation Amino Acid FASTA", + "type": "nmdc:DataObject" + }, + { + "_id": { + "$oid": "649b00401ae706d7b5b16cec" + }, + "description": "KO TSV File for gold:Gp0127631", + "url": "https://data.microbiomedata.org/1781_100333/img_annotation/Ga0482239_ko.tsv", + "md5_checksum": "e2ef79ef2b6669d93af5e90ba2c58fcf", + "file_size_bytes": 3385, + "id": "nmdc:e2ef79ef2b6669d93af5e90ba2c58fcf", + "name": "gold:Gp0127631_KO TSV File", + "data_object_type": "Annotation KEGG Orthology", + "type": "nmdc:DataObject" } ], "dissolving_activity_set": [], @@ -8737,6 +12085,212 @@ "md5_checksum": "4ad58f05545a75edc1b933a0b0286d16", "id": "nmdc:4ad58f05545a75edc1b933a0b0286d16", "file_size_bytes": 110526 + }, + { + "_id": { + "$oid": "649b003d1ae706d7b5b14e66" + }, + "description": "Metagenome Contig Coverage Stats for gold:Gp0127630", + "url": "https://data.microbiomedata.org/data/1781_100332/assembly/mapping_stats.txt", + "file_size_bytes": 9510797, + "type": "nmdc:DataObject", + "id": "nmdc:e4e89517e39bd367af05e5dc5849b32b", + "name": "mapping_stats.txt", + "data_object_type": "Assembly Coverage Stats" + }, + { + "_id": { + "$oid": "649b003d1ae706d7b5b14e68" + }, + "description": "Assembled contigs fasta for gold:Gp0127630", + "url": "https://data.microbiomedata.org/data/1781_100332/assembly/assembly_contigs.fna", + "file_size_bytes": 57002148, + "type": "nmdc:DataObject", + "id": "nmdc:c3958f0be344c850d06ee61865c95ff6", + "name": "assembly_contigs.fna", + "data_object_type": "Assembly Contigs" + }, + { + "_id": { + "$oid": "649b003d1ae706d7b5b14e6b" + }, + "description": "Assembled scaffold fasta for gold:Gp0127630", + "url": "https://data.microbiomedata.org/data/1781_100332/assembly/assembly_scaffolds.fna", + "file_size_bytes": 56619602, + "type": "nmdc:DataObject", + "id": "nmdc:354cac10ff205a59fffc795554aa3539", + "name": "assembly_scaffolds.fna", + "data_object_type": "Assembly Scaffolds" + }, + { + "_id": { + "$oid": "649b003d1ae706d7b5b14ebf" + }, + "description": "Assembled AGP file for gold:Gp0127630", + "url": "https://data.microbiomedata.org/data/1781_100332/assembly/assembly.agp", + "file_size_bytes": 8318715, + "type": "nmdc:DataObject", + "id": "nmdc:a08b36a85343a2f3dc45d62000a34274", + "name": "assembly.agp", + "data_object_type": "Assembly AGP" + }, + { + "_id": { + "$oid": "649b003d1ae706d7b5b14ec5" + }, + "description": "Metagenome Alignment BAM file for gold:Gp0127630", + "url": "https://data.microbiomedata.org/data/1781_100332/assembly/pairedMapped_sorted.bam", + "file_size_bytes": 2430916917, + "type": "nmdc:DataObject", + "id": "nmdc:1d1e719a8aa56730007392e34c0515a7", + "name": "pairedMapped_sorted.bam", + "data_object_type": "Assembly Coverage BAM" + }, + { + "_id": { + "$oid": "649b003d1ae706d7b5b15b1c" + }, + "id": "nmdc:82aac6da49d2ea7174e5786d247ceb42", + "name": "1781_100332.krona.html", + "description": "Gold:Gp0127630 KRONA plot HTML file", + "url": "https://data.microbiomedata.org/1781_100332/ReadbasedAnalysis/centrifuge/1781_100332.krona.html", + "file_size_bytes": 3385, + "data_object_type": "Centrifuge Krona Plot", + "type": "nmdc:DataObject" + }, + { + "_id": { + "$oid": "649b003d1ae706d7b5b15b2b" + }, + "id": "nmdc:65bf42cc5b458fd298f30d1df2cdb6d6", + "name": "1781_100332.json", + "description": "Gold:Gp0127630 ReadbasedAnalysis result JSON file", + "url": "https://data.microbiomedata.org/1781_100332/ReadbasedAnalysis/1781_100332.json", + "file_size_bytes": 3385, + "type": "nmdc:DataObject" + }, + { + "_id": { + "$oid": "649b003f1ae706d7b5b165d8" + }, + "id": "nmdc:1cc2fa15c0c8c54f427684eac47d9288", + "name": "bins.tooShort.fa", + "description": "tooShort (< 3kb) filtered contigs fasta file by metaBat2 for gold:Gp0127630", + "file_size_bytes": 48944272, + "url": "https://data.microbiomedata.org/data/1781_100332/img_MAGs/bins.tooShort.fa", + "type": "nmdc:DataObject" + }, + { + "_id": { + "$oid": "649b003f1ae706d7b5b165dc" + }, + "id": "nmdc:f19e68b486a6cfb5a09a20d9c388f679", + "name": "gold:Gp0127630.bins.1.fa", + "description": "metabat2 binned contig file for gold:Gp0127630", + "file_size_bytes": 211455, + "url": "https://data.microbiomedata.org/data/1781_100332/img_MAGs/metabat-bins/bins.1.fa", + "type": "nmdc:DataObject", + "data_object_type": "Metagenome Bins" + }, + { + "_id": { + "$oid": "649b003f1ae706d7b5b165e0" + }, + "id": "nmdc:904adbe6f49936fd689e59f7e970b4ab", + "name": "bins.unbinned.fa", + "description": "unbinned fasta file from metabat2 for gold:Gp0127630", + "file_size_bytes": 6557371, + "url": "https://data.microbiomedata.org/data/1781_100332/img_MAGs/bins.unbinned.fa", + "type": "nmdc:DataObject" + }, + { + "_id": { + "$oid": "649b003f1ae706d7b5b165e7" + }, + "id": "nmdc:abf67b79d7b9f94c9454eab172da8823", + "name": "checkm_qa.out", + "description": "metabat2 bin checkm quality assessment result for gold:Gp0127630", + "file_size_bytes": 918, + "url": "https://data.microbiomedata.org/data/1781_100332/img_MAGs/checkm_qa.out", + "type": "nmdc:DataObject", + "data_object_type": "CheckM Statistics" + }, + { + "_id": { + "$oid": "649b003f1ae706d7b5b165ea" + }, + "id": "nmdc:77678d37b9822be709b6ed462de42e71", + "name": "gold:Gp0127630.bins.2.fa", + "description": "metabat2 binned contig file for gold:Gp0127630", + "file_size_bytes": 254378, + "url": "https://data.microbiomedata.org/data/1781_100332/img_MAGs/metabat-bins/bins.2.fa", + "type": "nmdc:DataObject", + "data_object_type": "Metagenome Bins" + }, + { + "_id": { + "$oid": "649b003f1ae706d7b5b16cde" + }, + "description": "EC TSV File for gold:Gp0127630", + "url": "https://data.microbiomedata.org/1781_100332/img_annotation/Ga0482240_ec.tsv", + "md5_checksum": "81ab86211731bc0547d3e8f8786c3e8b", + "file_size_bytes": 3385, + "id": "nmdc:81ab86211731bc0547d3e8f8786c3e8b", + "name": "gold:Gp0127630_EC TSV File", + "data_object_type": "Annotation Enzyme Commission", + "type": "nmdc:DataObject" + }, + { + "_id": { + "$oid": "649b00401ae706d7b5b16cdf" + }, + "description": "Protein FAA for gold:Gp0127630", + "url": "https://data.microbiomedata.org/1781_100332/img_annotation/Ga0482240_proteins.faa", + "md5_checksum": "6bca5ad106b3519416205a82d3a14b16", + "file_size_bytes": 3385, + "id": "nmdc:6bca5ad106b3519416205a82d3a14b16", + "name": "gold:Gp0127630_Protein FAA", + "data_object_type": "Annotation Amino Acid FASTA", + "type": "nmdc:DataObject" + }, + { + "_id": { + "$oid": "649b00401ae706d7b5b16ce3" + }, + "description": "Functional annotation GFF file for gold:Gp0127630", + "url": "https://data.microbiomedata.org/1781_100332/img_annotation/Ga0482240_functional_annotation.gff", + "md5_checksum": "070f0952308650d35ae05c4fed188677", + "file_size_bytes": 3385, + "id": "nmdc:070f0952308650d35ae05c4fed188677", + "name": "gold:Gp0127630_Functional annotation GFF file", + "data_object_type": "Functional Annotation GFF", + "type": "nmdc:DataObject" + }, + { + "_id": { + "$oid": "649b00401ae706d7b5b16ce4" + }, + "description": "KO TSV File for gold:Gp0127630", + "url": "https://data.microbiomedata.org/1781_100332/img_annotation/Ga0482240_ko.tsv", + "md5_checksum": "c6b5f388349af0214d65d1357026c7ee", + "file_size_bytes": 3385, + "id": "nmdc:c6b5f388349af0214d65d1357026c7ee", + "name": "gold:Gp0127630_KO TSV File", + "data_object_type": "Annotation KEGG Orthology", + "type": "nmdc:DataObject" + }, + { + "_id": { + "$oid": "649b00401ae706d7b5b16ce6" + }, + "description": "Structural annotation GFF file for gold:Gp0127630", + "url": "https://data.microbiomedata.org/1781_100332/img_annotation/Ga0482240_structural_annotation.gff", + "md5_checksum": "f921989651475b06052058126db54de9", + "file_size_bytes": 3385, + "id": "nmdc:f921989651475b06052058126db54de9", + "name": "gold:Gp0127630_Structural annotation GFF file", + "data_object_type": "Structural Annotation GFF", + "type": "nmdc:DataObject" } ], "dissolving_activity_set": [], @@ -9258,63 +12812,281 @@ "file_size_bytes": 53950895 }, { - "name": "Gp0127633_KO_EC GFF file", - "description": "KO_EC GFF file for Gp0127633", - "url": "https://data.microbiomedata.org/data/nmdc:mga05zvf81/annotation/nmdc_mga05zvf81_ko_ec.gff", - "md5_checksum": "907439e314b4f4623244e2cec8532098", - "id": "nmdc:907439e314b4f4623244e2cec8532098", - "file_size_bytes": 33781965 + "name": "Gp0127633_KO_EC GFF file", + "description": "KO_EC GFF file for Gp0127633", + "url": "https://data.microbiomedata.org/data/nmdc:mga05zvf81/annotation/nmdc_mga05zvf81_ko_ec.gff", + "md5_checksum": "907439e314b4f4623244e2cec8532098", + "id": "nmdc:907439e314b4f4623244e2cec8532098", + "file_size_bytes": 33781965 + }, + { + "name": "gold:Gp0452679_metabat2 bin checkm quality assessment result", + "description": "metabat2 bin checkm quality assessment result for gold:Gp0452679", + "data_object_type": "CheckM Statistics", + "url": "https://data.microbiomedata.org/data/nmdc:mga0fsjy84/MAGs/nmdc_mga0fsjy84_checkm_qa.out", + "md5_checksum": "d41d8cd98f00b204e9800998ecf8427e", + "id": "nmdc:d41d8cd98f00b204e9800998ecf8427e", + "file_size_bytes": 0 + }, + { + "name": "Gp0127633_tooShort (< 3kb) filtered contigs fasta file by metaBat2", + "description": "tooShort (< 3kb) filtered contigs fasta file by metaBat2 for Gp0127633", + "url": "https://data.microbiomedata.org/data/nmdc:mga05zvf81/MAGs/nmdc_mga05zvf81_bins.tooShort.fa", + "md5_checksum": "00415cf72f9a77f907e3467a08b123c5", + "id": "nmdc:00415cf72f9a77f907e3467a08b123c5", + "file_size_bytes": 116930318 + }, + { + "name": "Gp0127633_unbinned fasta file from metabat2", + "description": "unbinned fasta file from metabat2 for Gp0127633", + "url": "https://data.microbiomedata.org/data/nmdc:mga05zvf81/MAGs/nmdc_mga05zvf81_bins.unbinned.fa", + "md5_checksum": "83064ec7bfc35a79a1ca76fdd8ad75fd", + "id": "nmdc:83064ec7bfc35a79a1ca76fdd8ad75fd", + "file_size_bytes": 31883888 + }, + { + "name": "Gp0127633_metabat2 bin checkm quality assessment result", + "description": "metabat2 bin checkm quality assessment result for Gp0127633", + "data_object_type": "CheckM Statistics", + "url": "https://data.microbiomedata.org/data/nmdc:mga05zvf81/MAGs/nmdc_mga05zvf81_checkm_qa.out", + "md5_checksum": "3f435d6da551400a4ba4400fa3608e7f", + "id": "nmdc:3f435d6da551400a4ba4400fa3608e7f", + "file_size_bytes": 1590 + }, + { + "name": "Gp0127633_high-quality and medium-quality bins", + "description": "high-quality and medium-quality bins for Gp0127633", + "data_object_type": "Metagenome Bins", + "url": "https://data.microbiomedata.org/data/nmdc:mga05zvf81/MAGs/nmdc_mga05zvf81_hqmq_bin.zip", + "md5_checksum": "c66f93153962f8b80c8f3d6978b6d802", + "id": "nmdc:c66f93153962f8b80c8f3d6978b6d802", + "file_size_bytes": 460412 + }, + { + "name": "Gp0127633_metabat2 bins", + "description": "metabat2 bins for Gp0127633", + "url": "https://data.microbiomedata.org/data/nmdc:mga05zvf81/MAGs/nmdc_mga05zvf81_metabat_bin.zip", + "md5_checksum": "ce2a364ec51a1d6311a319509751266e", + "id": "nmdc:ce2a364ec51a1d6311a319509751266e", + "file_size_bytes": 753147 + }, + { + "_id": { + "$oid": "649b003d1ae706d7b5b14e6e" + }, + "description": "Metagenome Contig Coverage Stats for gold:Gp0127633", + "url": "https://data.microbiomedata.org/data/1781_100335/assembly/mapping_stats.txt", + "file_size_bytes": 20586724, + "type": "nmdc:DataObject", + "id": "nmdc:262d79d1a7606b75f88468b3b9f80b59", + "name": "mapping_stats.txt", + "data_object_type": "Assembly Coverage Stats" + }, + { + "_id": { + "$oid": "649b003d1ae706d7b5b14e6f" + }, + "description": "Assembled contigs fasta for gold:Gp0127633", + "url": "https://data.microbiomedata.org/data/1781_100335/assembly/assembly_contigs.fna", + "file_size_bytes": 151723098, + "type": "nmdc:DataObject", + "id": "nmdc:e04c866a9a015bec110f1235db7223dc", + "name": "assembly_contigs.fna", + "data_object_type": "Assembly Contigs" + }, + { + "_id": { + "$oid": "649b003d1ae706d7b5b14e71" + }, + "description": "Assembled scaffold fasta for gold:Gp0127633", + "url": "https://data.microbiomedata.org/data/1781_100335/assembly/assembly_scaffolds.fna", + "file_size_bytes": 150902924, + "type": "nmdc:DataObject", + "id": "nmdc:bfdd3614128940d958264690470bce14", + "name": "assembly_scaffolds.fna", + "data_object_type": "Assembly Scaffolds" + }, + { + "_id": { + "$oid": "649b003d1ae706d7b5b14e72" + }, + "description": "Metagenome Alignment BAM file for gold:Gp0127633", + "url": "https://data.microbiomedata.org/data/1781_100335/assembly/pairedMapped_sorted.bam", + "file_size_bytes": 1932857393, + "type": "nmdc:DataObject", + "id": "nmdc:49bbdbc432b3c36c0c9196c53f4b952d", + "name": "pairedMapped_sorted.bam", + "data_object_type": "Assembly Coverage BAM" + }, + { + "_id": { + "$oid": "649b003d1ae706d7b5b14e73" + }, + "description": "Assembled AGP file for gold:Gp0127633", + "url": "https://data.microbiomedata.org/data/1781_100335/assembly/assembly.agp", + "file_size_bytes": 18119007, + "type": "nmdc:DataObject", + "id": "nmdc:8d617079209f2f0a15a4752fc68f5e81", + "name": "assembly.agp", + "data_object_type": "Assembly AGP" + }, + { + "_id": { + "$oid": "649b003d1ae706d7b5b15b32" + }, + "id": "nmdc:6496a165a51c3500ed2439270887c660", + "name": "1781_100335.krona.html", + "description": "Gold:Gp0127633 KRONA plot HTML file", + "url": "https://data.microbiomedata.org/1781_100335/ReadbasedAnalysis/centrifuge/1781_100335.krona.html", + "file_size_bytes": 3385, + "data_object_type": "Centrifuge Krona Plot", + "type": "nmdc:DataObject" + }, + { + "_id": { + "$oid": "649b003d1ae706d7b5b15b3c" + }, + "id": "nmdc:432510ad975787c5c15f94f45f1226c4", + "name": "1781_100335.json", + "description": "Gold:Gp0127633 ReadbasedAnalysis result JSON file", + "url": "https://data.microbiomedata.org/1781_100335/ReadbasedAnalysis/1781_100335.json", + "file_size_bytes": 3385, + "type": "nmdc:DataObject" + }, + { + "_id": { + "$oid": "649b003f1ae706d7b5b165df" + }, + "id": "nmdc:f4e28c4b7ce3ff07a6a824da9006df87", + "name": "bins.unbinned.fa", + "description": "unbinned fasta file from metabat2 for gold:Gp0127633", + "file_size_bytes": 34864009, + "url": "https://data.microbiomedata.org/data/1781_100335/img_MAGs/bins.unbinned.fa", + "type": "nmdc:DataObject" + }, + { + "_id": { + "$oid": "649b003f1ae706d7b5b165e1" + }, + "id": "nmdc:4f3514bc849b503f135d5652ae7d867d", + "name": "bins.tooShort.fa", + "description": "tooShort (< 3kb) filtered contigs fasta file by metaBat2 for gold:Gp0127633", + "file_size_bytes": 113729207, + "url": "https://data.microbiomedata.org/data/1781_100335/img_MAGs/bins.tooShort.fa", + "type": "nmdc:DataObject" + }, + { + "_id": { + "$oid": "649b003f1ae706d7b5b165e3" + }, + "id": "nmdc:07a65c967ab1996f34d016aedd3b0451", + "name": "checkm_qa.out", + "description": "metabat2 bin checkm quality assessment result for gold:Gp0127633", + "file_size_bytes": 1092, + "url": "https://data.microbiomedata.org/data/1781_100335/img_MAGs/checkm_qa.out", + "type": "nmdc:DataObject", + "data_object_type": "CheckM Statistics" + }, + { + "_id": { + "$oid": "649b003f1ae706d7b5b165e4" + }, + "id": "nmdc:50b7d0804958ce8d87de9374cc46af89", + "name": "gold:Gp0127633.bins.3.fa", + "description": "metabat2 binned contig file for gold:Gp0127633", + "file_size_bytes": 286733, + "url": "https://data.microbiomedata.org/data/1781_100335/img_MAGs/metabat-bins/bins.3.fa", + "type": "nmdc:DataObject", + "data_object_type": "Metagenome Bins" + }, + { + "_id": { + "$oid": "649b003f1ae706d7b5b165e6" + }, + "id": "nmdc:ca5d7e1b38c31fa6fe1af6931632d74e", + "name": "gold:Gp0127633.bins.2.fa", + "description": "metabat2 binned contig file for gold:Gp0127633", + "file_size_bytes": 269570, + "url": "https://data.microbiomedata.org/data/1781_100335/img_MAGs/metabat-bins/bins.2.fa", + "type": "nmdc:DataObject", + "data_object_type": "Metagenome Bins" }, { - "name": "gold:Gp0452679_metabat2 bin checkm quality assessment result", - "description": "metabat2 bin checkm quality assessment result for gold:Gp0452679", - "data_object_type": "CheckM Statistics", - "url": "https://data.microbiomedata.org/data/nmdc:mga0fsjy84/MAGs/nmdc_mga0fsjy84_checkm_qa.out", - "md5_checksum": "d41d8cd98f00b204e9800998ecf8427e", - "id": "nmdc:d41d8cd98f00b204e9800998ecf8427e", - "file_size_bytes": 0 + "_id": { + "$oid": "649b003f1ae706d7b5b165e8" + }, + "id": "nmdc:cd5403a50c10d21375a7449c9a81d214", + "name": "gold:Gp0127633.bins.1.fa", + "description": "metabat2 binned contig file for gold:Gp0127633", + "file_size_bytes": 229207, + "url": "https://data.microbiomedata.org/data/1781_100335/img_MAGs/metabat-bins/bins.1.fa", + "type": "nmdc:DataObject", + "data_object_type": "Metagenome Bins" }, { - "name": "Gp0127633_tooShort (< 3kb) filtered contigs fasta file by metaBat2", - "description": "tooShort (< 3kb) filtered contigs fasta file by metaBat2 for Gp0127633", - "url": "https://data.microbiomedata.org/data/nmdc:mga05zvf81/MAGs/nmdc_mga05zvf81_bins.tooShort.fa", - "md5_checksum": "00415cf72f9a77f907e3467a08b123c5", - "id": "nmdc:00415cf72f9a77f907e3467a08b123c5", - "file_size_bytes": 116930318 + "_id": { + "$oid": "649b00401ae706d7b5b16ced" + }, + "description": "KO TSV File for gold:Gp0127633", + "url": "https://data.microbiomedata.org/1781_100335/img_annotation/Ga0482237_ko.tsv", + "md5_checksum": "9ef3a52b2d97cc4afb64e37d04e59865", + "file_size_bytes": 3385, + "id": "nmdc:9ef3a52b2d97cc4afb64e37d04e59865", + "name": "gold:Gp0127633_KO TSV File", + "data_object_type": "Annotation KEGG Orthology", + "type": "nmdc:DataObject" }, { - "name": "Gp0127633_unbinned fasta file from metabat2", - "description": "unbinned fasta file from metabat2 for Gp0127633", - "url": "https://data.microbiomedata.org/data/nmdc:mga05zvf81/MAGs/nmdc_mga05zvf81_bins.unbinned.fa", - "md5_checksum": "83064ec7bfc35a79a1ca76fdd8ad75fd", - "id": "nmdc:83064ec7bfc35a79a1ca76fdd8ad75fd", - "file_size_bytes": 31883888 + "_id": { + "$oid": "649b00401ae706d7b5b16cf3" + }, + "description": "EC TSV File for gold:Gp0127633", + "url": "https://data.microbiomedata.org/1781_100335/img_annotation/Ga0482237_ec.tsv", + "md5_checksum": "31e2f5b7b055f2959d50a990ebda7ff6", + "file_size_bytes": 3385, + "id": "nmdc:31e2f5b7b055f2959d50a990ebda7ff6", + "name": "gold:Gp0127633_EC TSV File", + "data_object_type": "Annotation Enzyme Commission", + "type": "nmdc:DataObject" }, { - "name": "Gp0127633_metabat2 bin checkm quality assessment result", - "description": "metabat2 bin checkm quality assessment result for Gp0127633", - "data_object_type": "CheckM Statistics", - "url": "https://data.microbiomedata.org/data/nmdc:mga05zvf81/MAGs/nmdc_mga05zvf81_checkm_qa.out", - "md5_checksum": "3f435d6da551400a4ba4400fa3608e7f", - "id": "nmdc:3f435d6da551400a4ba4400fa3608e7f", - "file_size_bytes": 1590 + "_id": { + "$oid": "649b00401ae706d7b5b16cf8" + }, + "description": "Structural annotation GFF file for gold:Gp0127633", + "url": "https://data.microbiomedata.org/1781_100335/img_annotation/Ga0482237_structural_annotation.gff", + "md5_checksum": "b18381667b4e7401e1bb58e8aede5d4a", + "file_size_bytes": 3385, + "id": "nmdc:b18381667b4e7401e1bb58e8aede5d4a", + "name": "gold:Gp0127633_Structural annotation GFF file", + "data_object_type": "Structural Annotation GFF", + "type": "nmdc:DataObject" }, { - "name": "Gp0127633_high-quality and medium-quality bins", - "description": "high-quality and medium-quality bins for Gp0127633", - "data_object_type": "Metagenome Bins", - "url": "https://data.microbiomedata.org/data/nmdc:mga05zvf81/MAGs/nmdc_mga05zvf81_hqmq_bin.zip", - "md5_checksum": "c66f93153962f8b80c8f3d6978b6d802", - "id": "nmdc:c66f93153962f8b80c8f3d6978b6d802", - "file_size_bytes": 460412 + "_id": { + "$oid": "649b00401ae706d7b5b16cfa" + }, + "description": "Functional annotation GFF file for gold:Gp0127633", + "url": "https://data.microbiomedata.org/1781_100335/img_annotation/Ga0482237_functional_annotation.gff", + "md5_checksum": "740240c975daffee3e63251fc86cfd33", + "file_size_bytes": 3385, + "id": "nmdc:740240c975daffee3e63251fc86cfd33", + "name": "gold:Gp0127633_Functional annotation GFF file", + "data_object_type": "Functional Annotation GFF", + "type": "nmdc:DataObject" }, { - "name": "Gp0127633_metabat2 bins", - "description": "metabat2 bins for Gp0127633", - "url": "https://data.microbiomedata.org/data/nmdc:mga05zvf81/MAGs/nmdc_mga05zvf81_metabat_bin.zip", - "md5_checksum": "ce2a364ec51a1d6311a319509751266e", - "id": "nmdc:ce2a364ec51a1d6311a319509751266e", - "file_size_bytes": 753147 + "_id": { + "$oid": "649b00401ae706d7b5b16cfd" + }, + "description": "Protein FAA for gold:Gp0127633", + "url": "https://data.microbiomedata.org/1781_100335/img_annotation/Ga0482237_proteins.faa", + "md5_checksum": "79fd564d59bf9fe4cfb2c771daa84f29", + "file_size_bytes": 3385, + "id": "nmdc:79fd564d59bf9fe4cfb2c771daa84f29", + "name": "gold:Gp0127633_Protein FAA", + "data_object_type": "Annotation Amino Acid FASTA", + "type": "nmdc:DataObject" } ], "dissolving_activity_set": [], @@ -9936,6 +13708,176 @@ "md5_checksum": "ba468a2c4f4810d87ba95ad9e123483d", "id": "nmdc:ba468a2c4f4810d87ba95ad9e123483d", "file_size_bytes": 182 + }, + { + "_id": { + "$oid": "649b003d1ae706d7b5b14e58" + }, + "description": "Assembled contigs fasta for gold:Gp0127627", + "url": "https://data.microbiomedata.org/data/1781_100329/assembly/assembly_contigs.fna", + "file_size_bytes": 19648924, + "type": "nmdc:DataObject", + "id": "nmdc:245e4bf7ae2d630d26223054f851e31c", + "name": "assembly_contigs.fna", + "data_object_type": "Assembly Contigs" + }, + { + "_id": { + "$oid": "649b003d1ae706d7b5b14e59" + }, + "description": "Metagenome Contig Coverage Stats for gold:Gp0127627", + "url": "https://data.microbiomedata.org/data/1781_100329/assembly/mapping_stats.txt", + "file_size_bytes": 3793093, + "type": "nmdc:DataObject", + "id": "nmdc:53931f648c95c33e09552eb092065622", + "name": "mapping_stats.txt", + "data_object_type": "Assembly Coverage Stats" + }, + { + "_id": { + "$oid": "649b003d1ae706d7b5b14e5a" + }, + "description": "Assembled AGP file for gold:Gp0127627", + "url": "https://data.microbiomedata.org/data/1781_100329/assembly/assembly.agp", + "file_size_bytes": 3306333, + "type": "nmdc:DataObject", + "id": "nmdc:75f11f70792c4e6055068a31d0b8f64b", + "name": "assembly.agp", + "data_object_type": "Assembly AGP" + }, + { + "_id": { + "$oid": "649b003d1ae706d7b5b14e5b" + }, + "description": "Assembled scaffold fasta for gold:Gp0127627", + "url": "https://data.microbiomedata.org/data/1781_100329/assembly/assembly_scaffolds.fna", + "file_size_bytes": 19495266, + "type": "nmdc:DataObject", + "id": "nmdc:06427cb05246b5573ed4b85f93c0f155", + "name": "assembly_scaffolds.fna", + "data_object_type": "Assembly Scaffolds" + }, + { + "_id": { + "$oid": "649b003d1ae706d7b5b14e5f" + }, + "description": "Metagenome Alignment BAM file for gold:Gp0127627", + "url": "https://data.microbiomedata.org/data/1781_100329/assembly/pairedMapped_sorted.bam", + "file_size_bytes": 1829579569, + "type": "nmdc:DataObject", + "id": "nmdc:7d0f0b73c319579aac90fa171f8d77d2", + "name": "pairedMapped_sorted.bam", + "data_object_type": "Assembly Coverage BAM" + }, + { + "_id": { + "$oid": "649b003d1ae706d7b5b15afe" + }, + "id": "nmdc:84280bb9e2ed61950aca03e7a5248bf0", + "name": "1781_100329.krona.html", + "description": "Gold:Gp0127627 KRONA plot HTML file", + "url": "https://data.microbiomedata.org/1781_100329/ReadbasedAnalysis/centrifuge/1781_100329.krona.html", + "file_size_bytes": 3385, + "data_object_type": "Centrifuge Krona Plot", + "type": "nmdc:DataObject" + }, + { + "_id": { + "$oid": "649b003d1ae706d7b5b15b01" + }, + "id": "nmdc:1b842adedef085708050a71c63cbccb3", + "name": "1781_100329.json", + "description": "Gold:Gp0127627 ReadbasedAnalysis result JSON file", + "url": "https://data.microbiomedata.org/1781_100329/ReadbasedAnalysis/1781_100329.json", + "file_size_bytes": 3385, + "type": "nmdc:DataObject" + }, + { + "_id": { + "$oid": "649b003f1ae706d7b5b165d0" + }, + "id": "nmdc:0ab7113a0f5362f23f64b4b7cd7abcb8", + "name": "bins.tooShort.fa", + "description": "tooShort (< 3kb) filtered contigs fasta file by metaBat2 for gold:Gp0127627", + "file_size_bytes": 18589862, + "url": "https://data.microbiomedata.org/data/1781_100329/img_MAGs/bins.tooShort.fa", + "type": "nmdc:DataObject" + }, + { + "_id": { + "$oid": "649b003f1ae706d7b5b165d2" + }, + "id": "nmdc:ec364473e457f915d5fe7fb700c210cd", + "name": "bins.unbinned.fa", + "description": "unbinned fasta file from metabat2 for gold:Gp0127627", + "file_size_bytes": 609489, + "url": "https://data.microbiomedata.org/data/1781_100329/img_MAGs/bins.unbinned.fa", + "type": "nmdc:DataObject" + }, + { + "_id": { + "$oid": "649b003f1ae706d7b5b16cd1" + }, + "description": "EC TSV File for gold:Gp0127627", + "url": "https://data.microbiomedata.org/1781_100329/img_annotation/Ga0482243_ec.tsv", + "md5_checksum": "4c97ec34649fc995f167408bd39c9998", + "file_size_bytes": 3385, + "id": "nmdc:4c97ec34649fc995f167408bd39c9998", + "name": "gold:Gp0127627_EC TSV File", + "data_object_type": "Annotation Enzyme Commission", + "type": "nmdc:DataObject" + }, + { + "_id": { + "$oid": "649b003f1ae706d7b5b16cd3" + }, + "description": "Functional annotation GFF file for gold:Gp0127627", + "url": "https://data.microbiomedata.org/1781_100329/img_annotation/Ga0482243_functional_annotation.gff", + "md5_checksum": "6c96999ab72498624aae8bb9b0bfbc66", + "file_size_bytes": 3385, + "id": "nmdc:6c96999ab72498624aae8bb9b0bfbc66", + "name": "gold:Gp0127627_Functional annotation GFF file", + "data_object_type": "Functional Annotation GFF", + "type": "nmdc:DataObject" + }, + { + "_id": { + "$oid": "649b003f1ae706d7b5b16cd5" + }, + "description": "KO TSV File for gold:Gp0127627", + "url": "https://data.microbiomedata.org/1781_100329/img_annotation/Ga0482243_ko.tsv", + "md5_checksum": "874ae45fc2a007a7d5f9ff964fa8117a", + "file_size_bytes": 3385, + "id": "nmdc:874ae45fc2a007a7d5f9ff964fa8117a", + "name": "gold:Gp0127627_KO TSV File", + "data_object_type": "Annotation KEGG Orthology", + "type": "nmdc:DataObject" + }, + { + "_id": { + "$oid": "649b003f1ae706d7b5b16cd6" + }, + "description": "Structural annotation GFF file for gold:Gp0127627", + "url": "https://data.microbiomedata.org/1781_100329/img_annotation/Ga0482243_structural_annotation.gff", + "md5_checksum": "48ab9737528d088ffde37b733e3f728f", + "file_size_bytes": 3385, + "id": "nmdc:48ab9737528d088ffde37b733e3f728f", + "name": "gold:Gp0127627_Structural annotation GFF file", + "data_object_type": "Structural Annotation GFF", + "type": "nmdc:DataObject" + }, + { + "_id": { + "$oid": "649b003f1ae706d7b5b16cdd" + }, + "description": "Protein FAA for gold:Gp0127627", + "url": "https://data.microbiomedata.org/1781_100329/img_annotation/Ga0482243_proteins.faa", + "md5_checksum": "fec0b3842897bbce9166a628c4c2d7a0", + "file_size_bytes": 3385, + "id": "nmdc:fec0b3842897bbce9166a628c4c2d7a0", + "name": "gold:Gp0127627_Protein FAA", + "data_object_type": "Annotation Amino Acid FASTA", + "type": "nmdc:DataObject" } ], "dissolving_activity_set": [], @@ -10397,79 +14339,321 @@ "file_size_bytes": 5383998 }, { - "name": "Gp0127632_SuperFam GFF file", - "description": "SuperFam GFF file for Gp0127632", - "url": "https://data.microbiomedata.org/data/nmdc:mga0b6cy30/annotation/nmdc_mga0b6cy30_supfam.gff", - "md5_checksum": "18cdb0f987a2d417d0a39a685e435729", - "id": "nmdc:18cdb0f987a2d417d0a39a685e435729", - "file_size_bytes": 30162479 + "name": "Gp0127632_SuperFam GFF file", + "description": "SuperFam GFF file for Gp0127632", + "url": "https://data.microbiomedata.org/data/nmdc:mga0b6cy30/annotation/nmdc_mga0b6cy30_supfam.gff", + "md5_checksum": "18cdb0f987a2d417d0a39a685e435729", + "id": "nmdc:18cdb0f987a2d417d0a39a685e435729", + "file_size_bytes": 30162479 + }, + { + "name": "Gp0127632_Cath FunFam GFF file", + "description": "Cath FunFam GFF file for Gp0127632", + "url": "https://data.microbiomedata.org/data/nmdc:mga0b6cy30/annotation/nmdc_mga0b6cy30_cath_funfam.gff", + "md5_checksum": "b34e4d1823bd5cd88aa42832d10b3431", + "id": "nmdc:b34e4d1823bd5cd88aa42832d10b3431", + "file_size_bytes": 22459777 + }, + { + "name": "Gp0127632_KO_EC GFF file", + "description": "KO_EC GFF file for Gp0127632", + "url": "https://data.microbiomedata.org/data/nmdc:mga0b6cy30/annotation/nmdc_mga0b6cy30_ko_ec.gff", + "md5_checksum": "dc544f4796d49c520372e1872c5aea49", + "id": "nmdc:dc544f4796d49c520372e1872c5aea49", + "file_size_bytes": 15047897 + }, + { + "name": "gold:Gp0452679_metabat2 bin checkm quality assessment result", + "description": "metabat2 bin checkm quality assessment result for gold:Gp0452679", + "data_object_type": "CheckM Statistics", + "url": "https://data.microbiomedata.org/data/nmdc:mga0fsjy84/MAGs/nmdc_mga0fsjy84_checkm_qa.out", + "md5_checksum": "d41d8cd98f00b204e9800998ecf8427e", + "id": "nmdc:d41d8cd98f00b204e9800998ecf8427e", + "file_size_bytes": 0 + }, + { + "name": "Gp0127632_tooShort (< 3kb) filtered contigs fasta file by metaBat2", + "description": "tooShort (< 3kb) filtered contigs fasta file by metaBat2 for Gp0127632", + "url": "https://data.microbiomedata.org/data/nmdc:mga0b6cy30/MAGs/nmdc_mga0b6cy30_bins.tooShort.fa", + "md5_checksum": "2941988fcfb708d20ad1e44682c78e22", + "id": "nmdc:2941988fcfb708d20ad1e44682c78e22", + "file_size_bytes": 52475207 + }, + { + "name": "Gp0127632_unbinned fasta file from metabat2", + "description": "unbinned fasta file from metabat2 for Gp0127632", + "url": "https://data.microbiomedata.org/data/nmdc:mga0b6cy30/MAGs/nmdc_mga0b6cy30_bins.unbinned.fa", + "md5_checksum": "a6bc8d9d5ba5fe9713829aa7aef3c4cd", + "id": "nmdc:a6bc8d9d5ba5fe9713829aa7aef3c4cd", + "file_size_bytes": 5473493 + }, + { + "name": "Gp0127632_metabat2 bin checkm quality assessment result", + "description": "metabat2 bin checkm quality assessment result for Gp0127632", + "data_object_type": "CheckM Statistics", + "url": "https://data.microbiomedata.org/data/nmdc:mga0b6cy30/MAGs/nmdc_mga0b6cy30_checkm_qa.out", + "md5_checksum": "2914266e7ac7a8668c6f8d8722466c69", + "id": "nmdc:2914266e7ac7a8668c6f8d8722466c69", + "file_size_bytes": 948 + }, + { + "name": "Gp0127632_high-quality and medium-quality bins", + "description": "high-quality and medium-quality bins for Gp0127632", + "data_object_type": "Metagenome Bins", + "url": "https://data.microbiomedata.org/data/nmdc:mga0b6cy30/MAGs/nmdc_mga0b6cy30_hqmq_bin.zip", + "md5_checksum": "0fd97ca0ce01d42361ce817d3753a65e", + "id": "nmdc:0fd97ca0ce01d42361ce817d3753a65e", + "file_size_bytes": 497493 + }, + { + "name": "Gp0127632_metabat2 bins", + "description": "metabat2 bins for Gp0127632", + "url": "https://data.microbiomedata.org/data/nmdc:mga0b6cy30/MAGs/nmdc_mga0b6cy30_metabat_bin.zip", + "md5_checksum": "8e7832cac0ae99e2b63dfdfa34c24927", + "id": "nmdc:8e7832cac0ae99e2b63dfdfa34c24927", + "file_size_bytes": 108323 + }, + { + "_id": { + "$oid": "649b003d1ae706d7b5b14e69" + }, + "description": "Assembled contigs fasta for gold:Gp0127632", + "url": "https://data.microbiomedata.org/data/1781_100334/assembly/assembly_contigs.fna", + "file_size_bytes": 59400374, + "type": "nmdc:DataObject", + "id": "nmdc:8f8931e086f72961675aa936b1356f86", + "name": "assembly_contigs.fna", + "data_object_type": "Assembly Contigs" + }, + { + "_id": { + "$oid": "649b003d1ae706d7b5b14e6a" + }, + "description": "Assembled scaffold fasta for gold:Gp0127632", + "url": "https://data.microbiomedata.org/data/1781_100334/assembly/assembly_scaffolds.fna", + "file_size_bytes": 59002431, + "type": "nmdc:DataObject", + "id": "nmdc:e780311c63e956d852cd3c1bbd957f86", + "name": "assembly_scaffolds.fna", + "data_object_type": "Assembly Scaffolds" + }, + { + "_id": { + "$oid": "649b003d1ae706d7b5b14e6c" + }, + "description": "Assembled AGP file for gold:Gp0127632", + "url": "https://data.microbiomedata.org/data/1781_100334/assembly/assembly.agp", + "file_size_bytes": 8665587, + "type": "nmdc:DataObject", + "id": "nmdc:9c08a645e240b0861d3b8c912c7eaed0", + "name": "assembly.agp", + "data_object_type": "Assembly AGP" + }, + { + "_id": { + "$oid": "649b003d1ae706d7b5b14e6d" + }, + "description": "Metagenome Contig Coverage Stats for gold:Gp0127632", + "url": "https://data.microbiomedata.org/data/1781_100334/assembly/mapping_stats.txt", + "file_size_bytes": 9898680, + "type": "nmdc:DataObject", + "id": "nmdc:0ca761b2a51f8db8f46b694f06c0809d", + "name": "mapping_stats.txt", + "data_object_type": "Assembly Coverage Stats" + }, + { + "_id": { + "$oid": "649b003d1ae706d7b5b14e70" + }, + "description": "Metagenome Alignment BAM file for gold:Gp0127632", + "url": "https://data.microbiomedata.org/data/1781_100334/assembly/pairedMapped_sorted.bam", + "file_size_bytes": 2332493253, + "type": "nmdc:DataObject", + "id": "nmdc:4a26faa9b34a5c9f3bb65815cf2ad5c8", + "name": "pairedMapped_sorted.bam", + "data_object_type": "Assembly Coverage BAM" + }, + { + "_id": { + "$oid": "649b003d1ae706d7b5b15b29" + }, + "id": "nmdc:a95788f887b5af704f7e2cfd9868e8a4", + "name": "1781_100334.krona.html", + "description": "Gold:Gp0127632 KRONA plot HTML file", + "url": "https://data.microbiomedata.org/1781_100334/ReadbasedAnalysis/centrifuge/1781_100334.krona.html", + "file_size_bytes": 3385, + "data_object_type": "Centrifuge Krona Plot", + "type": "nmdc:DataObject" + }, + { + "_id": { + "$oid": "649b003d1ae706d7b5b15b30" + }, + "id": "nmdc:634b959933536776d62c9c66d43ec8ed", + "name": "1781_100334.json", + "description": "Gold:Gp0127632 ReadbasedAnalysis result JSON file", + "url": "https://data.microbiomedata.org/1781_100334/ReadbasedAnalysis/1781_100334.json", + "file_size_bytes": 3385, + "type": "nmdc:DataObject" + }, + { + "_id": { + "$oid": "649b003f1ae706d7b5b165d6" + }, + "id": "nmdc:e45be81dca6d2cb4a1d7d17ee6d166a4", + "name": "checkm_qa.out", + "description": "metabat2 bin checkm quality assessment result for gold:Gp0127632", + "file_size_bytes": 1413, + "url": "https://data.microbiomedata.org/data/1781_100334/img_MAGs/checkm_qa.out", + "type": "nmdc:DataObject", + "data_object_type": "CheckM Statistics" + }, + { + "_id": { + "$oid": "649b003f1ae706d7b5b165d7" + }, + "id": "nmdc:0aa200afbb24fe1e4c26c79c54c070dd", + "name": "gold:Gp0127632.bins.5.fa", + "description": "metabat2 binned contig file for gold:Gp0127632", + "file_size_bytes": 273113, + "url": "https://data.microbiomedata.org/data/1781_100334/img_MAGs/metabat-bins/bins.5.fa", + "type": "nmdc:DataObject", + "data_object_type": "Metagenome Bins" + }, + { + "_id": { + "$oid": "649b003f1ae706d7b5b165da" + }, + "id": "nmdc:695d01e510529d34d501f4fa62d5c9b8", + "name": "gold:Gp0127632.bins.3.fa", + "description": "metabat2 binned contig file for gold:Gp0127632", + "file_size_bytes": 210794, + "url": "https://data.microbiomedata.org/data/1781_100334/img_MAGs/metabat-bins/bins.3.fa", + "type": "nmdc:DataObject", + "data_object_type": "Metagenome Bins" + }, + { + "_id": { + "$oid": "649b003f1ae706d7b5b165db" + }, + "id": "nmdc:86c8e7c9c103a298357f6a2102bd8772", + "name": "gold:Gp0127632.bins.2.fa", + "description": "metabat2 binned contig file for gold:Gp0127632", + "file_size_bytes": 311763, + "url": "https://data.microbiomedata.org/data/1781_100334/img_MAGs/metabat-bins/bins.2.fa", + "type": "nmdc:DataObject", + "data_object_type": "Metagenome Bins" }, { - "name": "Gp0127632_Cath FunFam GFF file", - "description": "Cath FunFam GFF file for Gp0127632", - "url": "https://data.microbiomedata.org/data/nmdc:mga0b6cy30/annotation/nmdc_mga0b6cy30_cath_funfam.gff", - "md5_checksum": "b34e4d1823bd5cd88aa42832d10b3431", - "id": "nmdc:b34e4d1823bd5cd88aa42832d10b3431", - "file_size_bytes": 22459777 + "_id": { + "$oid": "649b003f1ae706d7b5b165dd" + }, + "id": "nmdc:97771c73e3e872ea1da72cd758a03453", + "name": "gold:Gp0127632.bins.4.fa", + "description": "metabat2 binned contig file for gold:Gp0127632", + "file_size_bytes": 236159, + "url": "https://data.microbiomedata.org/data/1781_100334/img_MAGs/metabat-bins/bins.4.fa", + "type": "nmdc:DataObject", + "data_object_type": "Metagenome Bins" }, { - "name": "Gp0127632_KO_EC GFF file", - "description": "KO_EC GFF file for Gp0127632", - "url": "https://data.microbiomedata.org/data/nmdc:mga0b6cy30/annotation/nmdc_mga0b6cy30_ko_ec.gff", - "md5_checksum": "dc544f4796d49c520372e1872c5aea49", - "id": "nmdc:dc544f4796d49c520372e1872c5aea49", - "file_size_bytes": 15047897 + "_id": { + "$oid": "649b003f1ae706d7b5b165de" + }, + "id": "nmdc:e7e4a297417b55b9714702401da79d96", + "name": "gold:Gp0127632.bins.1.fa", + "description": "metabat2 binned contig file for gold:Gp0127632", + "file_size_bytes": 346669, + "url": "https://data.microbiomedata.org/data/1781_100334/img_MAGs/metabat-bins/bins.1.fa", + "type": "nmdc:DataObject", + "data_object_type": "Metagenome Bins" }, { - "name": "gold:Gp0452679_metabat2 bin checkm quality assessment result", - "description": "metabat2 bin checkm quality assessment result for gold:Gp0452679", - "data_object_type": "CheckM Statistics", - "url": "https://data.microbiomedata.org/data/nmdc:mga0fsjy84/MAGs/nmdc_mga0fsjy84_checkm_qa.out", - "md5_checksum": "d41d8cd98f00b204e9800998ecf8427e", - "id": "nmdc:d41d8cd98f00b204e9800998ecf8427e", - "file_size_bytes": 0 + "_id": { + "$oid": "649b003f1ae706d7b5b165eb" + }, + "id": "nmdc:fa90657e9d2a9b11f5cea076316d0a50", + "name": "bins.unbinned.fa", + "description": "unbinned fasta file from metabat2 for gold:Gp0127632", + "file_size_bytes": 6037283, + "url": "https://data.microbiomedata.org/data/1781_100334/img_MAGs/bins.unbinned.fa", + "type": "nmdc:DataObject" }, { - "name": "Gp0127632_tooShort (< 3kb) filtered contigs fasta file by metaBat2", - "description": "tooShort (< 3kb) filtered contigs fasta file by metaBat2 for Gp0127632", - "url": "https://data.microbiomedata.org/data/nmdc:mga0b6cy30/MAGs/nmdc_mga0b6cy30_bins.tooShort.fa", - "md5_checksum": "2941988fcfb708d20ad1e44682c78e22", - "id": "nmdc:2941988fcfb708d20ad1e44682c78e22", - "file_size_bytes": 52475207 + "_id": { + "$oid": "649b003f1ae706d7b5b1662f" + }, + "id": "nmdc:800b596b9573c1ddf6a9e357c1eb8d86", + "name": "bins.tooShort.fa", + "description": "tooShort (< 3kb) filtered contigs fasta file by metaBat2 for gold:Gp0127632", + "file_size_bytes": 50903302, + "url": "https://data.microbiomedata.org/data/1781_100334/img_MAGs/bins.tooShort.fa", + "type": "nmdc:DataObject" }, { - "name": "Gp0127632_unbinned fasta file from metabat2", - "description": "unbinned fasta file from metabat2 for Gp0127632", - "url": "https://data.microbiomedata.org/data/nmdc:mga0b6cy30/MAGs/nmdc_mga0b6cy30_bins.unbinned.fa", - "md5_checksum": "a6bc8d9d5ba5fe9713829aa7aef3c4cd", - "id": "nmdc:a6bc8d9d5ba5fe9713829aa7aef3c4cd", - "file_size_bytes": 5473493 + "_id": { + "$oid": "649b00401ae706d7b5b16ce9" + }, + "description": "KO TSV File for gold:Gp0127632", + "url": "https://data.microbiomedata.org/1781_100334/img_annotation/Ga0482238_ko.tsv", + "md5_checksum": "aeafeb18adb193b1a3c5c3c2ff9a912e", + "file_size_bytes": 3385, + "id": "nmdc:aeafeb18adb193b1a3c5c3c2ff9a912e", + "name": "gold:Gp0127632_KO TSV File", + "data_object_type": "Annotation KEGG Orthology", + "type": "nmdc:DataObject" }, { - "name": "Gp0127632_metabat2 bin checkm quality assessment result", - "description": "metabat2 bin checkm quality assessment result for Gp0127632", - "data_object_type": "CheckM Statistics", - "url": "https://data.microbiomedata.org/data/nmdc:mga0b6cy30/MAGs/nmdc_mga0b6cy30_checkm_qa.out", - "md5_checksum": "2914266e7ac7a8668c6f8d8722466c69", - "id": "nmdc:2914266e7ac7a8668c6f8d8722466c69", - "file_size_bytes": 948 + "_id": { + "$oid": "649b00401ae706d7b5b16ceb" + }, + "description": "Protein FAA for gold:Gp0127632", + "url": "https://data.microbiomedata.org/1781_100334/img_annotation/Ga0482238_proteins.faa", + "md5_checksum": "a89e8af0fc6daf895e7a87f1ff7087f2", + "file_size_bytes": 3385, + "id": "nmdc:a89e8af0fc6daf895e7a87f1ff7087f2", + "name": "gold:Gp0127632_Protein FAA", + "data_object_type": "Annotation Amino Acid FASTA", + "type": "nmdc:DataObject" }, { - "name": "Gp0127632_high-quality and medium-quality bins", - "description": "high-quality and medium-quality bins for Gp0127632", - "data_object_type": "Metagenome Bins", - "url": "https://data.microbiomedata.org/data/nmdc:mga0b6cy30/MAGs/nmdc_mga0b6cy30_hqmq_bin.zip", - "md5_checksum": "0fd97ca0ce01d42361ce817d3753a65e", - "id": "nmdc:0fd97ca0ce01d42361ce817d3753a65e", - "file_size_bytes": 497493 + "_id": { + "$oid": "649b00401ae706d7b5b16cee" + }, + "description": "EC TSV File for gold:Gp0127632", + "url": "https://data.microbiomedata.org/1781_100334/img_annotation/Ga0482238_ec.tsv", + "md5_checksum": "b8d886e71031cbe4fb1284f479348740", + "file_size_bytes": 3385, + "id": "nmdc:b8d886e71031cbe4fb1284f479348740", + "name": "gold:Gp0127632_EC TSV File", + "data_object_type": "Annotation Enzyme Commission", + "type": "nmdc:DataObject" }, { - "name": "Gp0127632_metabat2 bins", - "description": "metabat2 bins for Gp0127632", - "url": "https://data.microbiomedata.org/data/nmdc:mga0b6cy30/MAGs/nmdc_mga0b6cy30_metabat_bin.zip", - "md5_checksum": "8e7832cac0ae99e2b63dfdfa34c24927", - "id": "nmdc:8e7832cac0ae99e2b63dfdfa34c24927", - "file_size_bytes": 108323 + "_id": { + "$oid": "649b00401ae706d7b5b16cf2" + }, + "description": "Structural annotation GFF file for gold:Gp0127632", + "url": "https://data.microbiomedata.org/1781_100334/img_annotation/Ga0482238_structural_annotation.gff", + "md5_checksum": "89b895fbf3c13801ddba22ff59bb385a", + "file_size_bytes": 3385, + "id": "nmdc:89b895fbf3c13801ddba22ff59bb385a", + "name": "gold:Gp0127632_Structural annotation GFF file", + "data_object_type": "Structural Annotation GFF", + "type": "nmdc:DataObject" + }, + { + "_id": { + "$oid": "649b00401ae706d7b5b16cf5" + }, + "description": "Functional annotation GFF file for gold:Gp0127632", + "url": "https://data.microbiomedata.org/1781_100334/img_annotation/Ga0482238_functional_annotation.gff", + "md5_checksum": "2395040203b3351554a9e3ffb48b0b88", + "file_size_bytes": 3385, + "id": "nmdc:2395040203b3351554a9e3ffb48b0b88", + "name": "gold:Gp0127632_Functional annotation GFF file", + "data_object_type": "Functional Annotation GFF", + "type": "nmdc:DataObject" } ], "dissolving_activity_set": [], @@ -11015,6 +15199,176 @@ "md5_checksum": "2d1e318b8b815a8a5487f23315d0fe02", "id": "nmdc:2d1e318b8b815a8a5487f23315d0fe02", "file_size_bytes": 182 + }, + { + "_id": { + "$oid": "649b003d1ae706d7b5b14e7c" + }, + "description": "Assembled contigs fasta for gold:Gp0127636", + "url": "https://data.microbiomedata.org/data/1781_100338/assembly/assembly_contigs.fna", + "file_size_bytes": 38679584, + "type": "nmdc:DataObject", + "id": "nmdc:555541de209f6b5bc8b4e36f9c5a96c1", + "name": "assembly_contigs.fna", + "data_object_type": "Assembly Contigs" + }, + { + "_id": { + "$oid": "649b003d1ae706d7b5b14e7d" + }, + "description": "Assembled scaffold fasta for gold:Gp0127636", + "url": "https://data.microbiomedata.org/data/1781_100338/assembly/assembly_scaffolds.fna", + "file_size_bytes": 38392508, + "type": "nmdc:DataObject", + "id": "nmdc:28a962bc24ab2ba5b7d2e486c36cf6b5", + "name": "assembly_scaffolds.fna", + "data_object_type": "Assembly Scaffolds" + }, + { + "_id": { + "$oid": "649b003d1ae706d7b5b14e81" + }, + "description": "Assembled AGP file for gold:Gp0127636", + "url": "https://data.microbiomedata.org/data/1781_100338/assembly/assembly.agp", + "file_size_bytes": 6197503, + "type": "nmdc:DataObject", + "id": "nmdc:e67c00b23e1c74597d0c07f129d35890", + "name": "assembly.agp", + "data_object_type": "Assembly AGP" + }, + { + "_id": { + "$oid": "649b003d1ae706d7b5b14e82" + }, + "description": "Metagenome Contig Coverage Stats for gold:Gp0127636", + "url": "https://data.microbiomedata.org/data/1781_100338/assembly/mapping_stats.txt", + "file_size_bytes": 7113525, + "type": "nmdc:DataObject", + "id": "nmdc:fd1201530245f5e4ef4c5d263b34c0a3", + "name": "mapping_stats.txt", + "data_object_type": "Assembly Coverage Stats" + }, + { + "_id": { + "$oid": "649b003d1ae706d7b5b14e85" + }, + "description": "Metagenome Alignment BAM file for gold:Gp0127636", + "url": "https://data.microbiomedata.org/data/1781_100338/assembly/pairedMapped_sorted.bam", + "file_size_bytes": 2590059661, + "type": "nmdc:DataObject", + "id": "nmdc:937d790bc414e5aa80c09a419b25dfe1", + "name": "pairedMapped_sorted.bam", + "data_object_type": "Assembly Coverage BAM" + }, + { + "_id": { + "$oid": "649b003d1ae706d7b5b15b57" + }, + "id": "nmdc:ad763856d102ea442c511ce8c2ac7641", + "name": "1781_100338.krona.html", + "description": "Gold:Gp0127636 KRONA plot HTML file", + "url": "https://data.microbiomedata.org/1781_100338/ReadbasedAnalysis/centrifuge/1781_100338.krona.html", + "file_size_bytes": 3385, + "data_object_type": "Centrifuge Krona Plot", + "type": "nmdc:DataObject" + }, + { + "_id": { + "$oid": "649b003d1ae706d7b5b15b5f" + }, + "id": "nmdc:6ea65689d5fe9ac44c9e0e415304f4e3", + "name": "1781_100338.json", + "description": "Gold:Gp0127636 ReadbasedAnalysis result JSON file", + "url": "https://data.microbiomedata.org/1781_100338/ReadbasedAnalysis/1781_100338.json", + "file_size_bytes": 3385, + "type": "nmdc:DataObject" + }, + { + "_id": { + "$oid": "649b003f1ae706d7b5b165f8" + }, + "id": "nmdc:09f9ddea688b24a3ffa3f858851f6011", + "name": "bins.unbinned.fa", + "description": "unbinned fasta file from metabat2 for gold:Gp0127636", + "file_size_bytes": 3427718, + "url": "https://data.microbiomedata.org/data/1781_100338/img_MAGs/bins.unbinned.fa", + "type": "nmdc:DataObject" + }, + { + "_id": { + "$oid": "649b003f1ae706d7b5b16622" + }, + "id": "nmdc:7c210c1bd10b2654864e4d571336a8b8", + "name": "bins.tooShort.fa", + "description": "tooShort (< 3kb) filtered contigs fasta file by metaBat2 for gold:Gp0127636", + "file_size_bytes": 34402553, + "url": "https://data.microbiomedata.org/data/1781_100338/img_MAGs/bins.tooShort.fa", + "type": "nmdc:DataObject" + }, + { + "_id": { + "$oid": "649b00401ae706d7b5b16cff" + }, + "description": "EC TSV File for gold:Gp0127636", + "url": "https://data.microbiomedata.org/1781_100338/img_annotation/Ga0482234_ec.tsv", + "md5_checksum": "80ec7d76d2509e6eeab61d092808908b", + "file_size_bytes": 3385, + "id": "nmdc:80ec7d76d2509e6eeab61d092808908b", + "name": "gold:Gp0127636_EC TSV File", + "data_object_type": "Annotation Enzyme Commission", + "type": "nmdc:DataObject" + }, + { + "_id": { + "$oid": "649b00401ae706d7b5b16d00" + }, + "description": "KO TSV File for gold:Gp0127636", + "url": "https://data.microbiomedata.org/1781_100338/img_annotation/Ga0482234_ko.tsv", + "md5_checksum": "d68e6d4245c33a73666148570aac9c10", + "file_size_bytes": 3385, + "id": "nmdc:d68e6d4245c33a73666148570aac9c10", + "name": "gold:Gp0127636_KO TSV File", + "data_object_type": "Annotation KEGG Orthology", + "type": "nmdc:DataObject" + }, + { + "_id": { + "$oid": "649b00401ae706d7b5b16d01" + }, + "description": "Protein FAA for gold:Gp0127636", + "url": "https://data.microbiomedata.org/1781_100338/img_annotation/Ga0482234_proteins.faa", + "md5_checksum": "66d9d6751efad0b8019a565488f950a5", + "file_size_bytes": 3385, + "id": "nmdc:66d9d6751efad0b8019a565488f950a5", + "name": "gold:Gp0127636_Protein FAA", + "data_object_type": "Annotation Amino Acid FASTA", + "type": "nmdc:DataObject" + }, + { + "_id": { + "$oid": "649b00401ae706d7b5b16d03" + }, + "description": "Structural annotation GFF file for gold:Gp0127636", + "url": "https://data.microbiomedata.org/1781_100338/img_annotation/Ga0482234_structural_annotation.gff", + "md5_checksum": "e3b57dff7ca37c0da6b7d4bfb4450d9c", + "file_size_bytes": 3385, + "id": "nmdc:e3b57dff7ca37c0da6b7d4bfb4450d9c", + "name": "gold:Gp0127636_Structural annotation GFF file", + "data_object_type": "Structural Annotation GFF", + "type": "nmdc:DataObject" + }, + { + "_id": { + "$oid": "649b00401ae706d7b5b16d04" + }, + "description": "Functional annotation GFF file for gold:Gp0127636", + "url": "https://data.microbiomedata.org/1781_100338/img_annotation/Ga0482234_functional_annotation.gff", + "md5_checksum": "31f8346eeca4b929a6c28686bb8b2043", + "file_size_bytes": 3385, + "id": "nmdc:31f8346eeca4b929a6c28686bb8b2043", + "name": "gold:Gp0127636_Functional annotation GFF file", + "data_object_type": "Functional Annotation GFF", + "type": "nmdc:DataObject" } ], "dissolving_activity_set": [], @@ -11500,55 +15854,273 @@ "file_size_bytes": 20999001 }, { - "name": "gold:Gp0452679_metabat2 bin checkm quality assessment result", - "description": "metabat2 bin checkm quality assessment result for gold:Gp0452679", - "data_object_type": "CheckM Statistics", - "url": "https://data.microbiomedata.org/data/nmdc:mga0fsjy84/MAGs/nmdc_mga0fsjy84_checkm_qa.out", - "md5_checksum": "d41d8cd98f00b204e9800998ecf8427e", - "id": "nmdc:d41d8cd98f00b204e9800998ecf8427e", - "file_size_bytes": 0 + "name": "gold:Gp0452679_metabat2 bin checkm quality assessment result", + "description": "metabat2 bin checkm quality assessment result for gold:Gp0452679", + "data_object_type": "CheckM Statistics", + "url": "https://data.microbiomedata.org/data/nmdc:mga0fsjy84/MAGs/nmdc_mga0fsjy84_checkm_qa.out", + "md5_checksum": "d41d8cd98f00b204e9800998ecf8427e", + "id": "nmdc:d41d8cd98f00b204e9800998ecf8427e", + "file_size_bytes": 0 + }, + { + "name": "Gp0127634_tooShort (< 3kb) filtered contigs fasta file by metaBat2", + "description": "tooShort (< 3kb) filtered contigs fasta file by metaBat2 for Gp0127634", + "url": "https://data.microbiomedata.org/data/nmdc:mga0r0vf18/MAGs/nmdc_mga0r0vf18_bins.tooShort.fa", + "md5_checksum": "3c8eadbcf4f583090d8f378ea6758799", + "id": "nmdc:3c8eadbcf4f583090d8f378ea6758799", + "file_size_bytes": 71683990 + }, + { + "name": "Gp0127634_unbinned fasta file from metabat2", + "description": "unbinned fasta file from metabat2 for Gp0127634", + "url": "https://data.microbiomedata.org/data/nmdc:mga0r0vf18/MAGs/nmdc_mga0r0vf18_bins.unbinned.fa", + "md5_checksum": "1be647dc835ee8fe666fe9893266bd21", + "id": "nmdc:1be647dc835ee8fe666fe9893266bd21", + "file_size_bytes": 11353478 + }, + { + "name": "Gp0127634_metabat2 bin checkm quality assessment result", + "description": "metabat2 bin checkm quality assessment result for Gp0127634", + "data_object_type": "CheckM Statistics", + "url": "https://data.microbiomedata.org/data/nmdc:mga0r0vf18/MAGs/nmdc_mga0r0vf18_checkm_qa.out", + "md5_checksum": "6cc278c455cafc691333c0a74fe6c540", + "id": "nmdc:6cc278c455cafc691333c0a74fe6c540", + "file_size_bytes": 936 + }, + { + "name": "Gp0127634_high-quality and medium-quality bins", + "description": "high-quality and medium-quality bins for Gp0127634", + "data_object_type": "Metagenome Bins", + "url": "https://data.microbiomedata.org/data/nmdc:mga0r0vf18/MAGs/nmdc_mga0r0vf18_hqmq_bin.zip", + "md5_checksum": "de4d0180489bdaa5526977508a489b99", + "id": "nmdc:de4d0180489bdaa5526977508a489b99", + "file_size_bytes": 518340 + }, + { + "name": "Gp0127634_metabat2 bins", + "description": "metabat2 bins for Gp0127634", + "url": "https://data.microbiomedata.org/data/nmdc:mga0r0vf18/MAGs/nmdc_mga0r0vf18_metabat_bin.zip", + "md5_checksum": "16a08c4a3a6e9c70a5d47209177d0e60", + "id": "nmdc:16a08c4a3a6e9c70a5d47209177d0e60", + "file_size_bytes": 63768 + }, + { + "_id": { + "$oid": "649b003d1ae706d7b5b14e74" + }, + "description": "Assembled AGP file for gold:Gp0127634", + "url": "https://data.microbiomedata.org/data/1781_100336/assembly/assembly.agp", + "file_size_bytes": 11578650, + "type": "nmdc:DataObject", + "id": "nmdc:38fc6fb4189df6c3ba567ce6e9eb0492", + "name": "assembly.agp", + "data_object_type": "Assembly AGP" + }, + { + "_id": { + "$oid": "649b003d1ae706d7b5b14e75" + }, + "description": "Assembled scaffold fasta for gold:Gp0127634", + "url": "https://data.microbiomedata.org/data/1781_100336/assembly/assembly_scaffolds.fna", + "file_size_bytes": 83708608, + "type": "nmdc:DataObject", + "id": "nmdc:3c3519053f5bf24ac5faa2db3b9c258b", + "name": "assembly_scaffolds.fna", + "data_object_type": "Assembly Scaffolds" + }, + { + "_id": { + "$oid": "649b003d1ae706d7b5b14e76" + }, + "description": "Metagenome Alignment BAM file for gold:Gp0127634", + "url": "https://data.microbiomedata.org/data/1781_100336/assembly/pairedMapped_sorted.bam", + "file_size_bytes": 2483793009, + "type": "nmdc:DataObject", + "id": "nmdc:ac6c2b405bcedaa830f122f55b389245", + "name": "pairedMapped_sorted.bam", + "data_object_type": "Assembly Coverage BAM" + }, + { + "_id": { + "$oid": "649b003d1ae706d7b5b14e77" + }, + "description": "Metagenome Contig Coverage Stats for gold:Gp0127634", + "url": "https://data.microbiomedata.org/data/1781_100336/assembly/mapping_stats.txt", + "file_size_bytes": 13192221, + "type": "nmdc:DataObject", + "id": "nmdc:6783fd1b5292dc59fa04a4f20725b721", + "name": "mapping_stats.txt", + "data_object_type": "Assembly Coverage Stats" + }, + { + "_id": { + "$oid": "649b003d1ae706d7b5b14e78" + }, + "description": "Assembled contigs fasta for gold:Gp0127634", + "url": "https://data.microbiomedata.org/data/1781_100336/assembly/assembly_contigs.fna", + "file_size_bytes": 84236599, + "type": "nmdc:DataObject", + "id": "nmdc:b502e282cb52690232ce6ec6e1cfd4bc", + "name": "assembly_contigs.fna", + "data_object_type": "Assembly Contigs" + }, + { + "_id": { + "$oid": "649b003d1ae706d7b5b15b40" + }, + "id": "nmdc:613dbb558d3cf5ece974268c1b0b1243", + "name": "1781_100336.krona.html", + "description": "Gold:Gp0127634 KRONA plot HTML file", + "url": "https://data.microbiomedata.org/1781_100336/ReadbasedAnalysis/centrifuge/1781_100336.krona.html", + "file_size_bytes": 3385, + "data_object_type": "Centrifuge Krona Plot", + "type": "nmdc:DataObject" + }, + { + "_id": { + "$oid": "649b003d1ae706d7b5b15b4b" + }, + "id": "nmdc:009de2a4d412df442a83e43028aed210", + "name": "1781_100336.json", + "description": "Gold:Gp0127634 ReadbasedAnalysis result JSON file", + "url": "https://data.microbiomedata.org/1781_100336/ReadbasedAnalysis/1781_100336.json", + "file_size_bytes": 3385, + "type": "nmdc:DataObject" + }, + { + "_id": { + "$oid": "649b003f1ae706d7b5b165e5" + }, + "id": "nmdc:941f2a63752dd68925387a6dde7bd88a", + "name": "bins.tooShort.fa", + "description": "tooShort (< 3kb) filtered contigs fasta file by metaBat2 for gold:Gp0127634", + "file_size_bytes": 69587941, + "url": "https://data.microbiomedata.org/data/1781_100336/img_MAGs/bins.tooShort.fa", + "type": "nmdc:DataObject" + }, + { + "_id": { + "$oid": "649b003f1ae706d7b5b165e9" + }, + "id": "nmdc:77e41ad1f4836947b39a43f7ea971076", + "name": "bins.unbinned.fa", + "description": "unbinned fasta file from metabat2 for gold:Gp0127634", + "file_size_bytes": 12216952, + "url": "https://data.microbiomedata.org/data/1781_100336/img_MAGs/bins.unbinned.fa", + "type": "nmdc:DataObject" + }, + { + "_id": { + "$oid": "649b003f1ae706d7b5b165ec" + }, + "id": "nmdc:aba84545a4941088387ffe076be49a4d", + "name": "gold:Gp0127634.bins.3.fa", + "description": "metabat2 binned contig file for gold:Gp0127634", + "file_size_bytes": 367898, + "url": "https://data.microbiomedata.org/data/1781_100336/img_MAGs/metabat-bins/bins.3.fa", + "type": "nmdc:DataObject", + "data_object_type": "Metagenome Bins" + }, + { + "_id": { + "$oid": "649b003f1ae706d7b5b165ed" + }, + "id": "nmdc:3713d982c02d72fba230a0d408598a1f", + "name": "gold:Gp0127634.bins.1.fa", + "description": "metabat2 binned contig file for gold:Gp0127634", + "file_size_bytes": 272314, + "url": "https://data.microbiomedata.org/data/1781_100336/img_MAGs/metabat-bins/bins.1.fa", + "type": "nmdc:DataObject", + "data_object_type": "Metagenome Bins" + }, + { + "_id": { + "$oid": "649b003f1ae706d7b5b165ef" + }, + "id": "nmdc:29c82fc14dfa2a0f5dc057d234ac6c5a", + "name": "checkm_qa.out", + "description": "metabat2 bin checkm quality assessment result for gold:Gp0127634", + "file_size_bytes": 1071, + "url": "https://data.microbiomedata.org/data/1781_100336/img_MAGs/checkm_qa.out", + "type": "nmdc:DataObject", + "data_object_type": "CheckM Statistics" }, { - "name": "Gp0127634_tooShort (< 3kb) filtered contigs fasta file by metaBat2", - "description": "tooShort (< 3kb) filtered contigs fasta file by metaBat2 for Gp0127634", - "url": "https://data.microbiomedata.org/data/nmdc:mga0r0vf18/MAGs/nmdc_mga0r0vf18_bins.tooShort.fa", - "md5_checksum": "3c8eadbcf4f583090d8f378ea6758799", - "id": "nmdc:3c8eadbcf4f583090d8f378ea6758799", - "file_size_bytes": 71683990 + "_id": { + "$oid": "649b003f1ae706d7b5b165f0" + }, + "id": "nmdc:0e28d59da33c112a00b6793a19b71189", + "name": "gold:Gp0127634.bins.2.fa", + "description": "metabat2 binned contig file for gold:Gp0127634", + "file_size_bytes": 320360, + "url": "https://data.microbiomedata.org/data/1781_100336/img_MAGs/metabat-bins/bins.2.fa", + "type": "nmdc:DataObject", + "data_object_type": "Metagenome Bins" }, { - "name": "Gp0127634_unbinned fasta file from metabat2", - "description": "unbinned fasta file from metabat2 for Gp0127634", - "url": "https://data.microbiomedata.org/data/nmdc:mga0r0vf18/MAGs/nmdc_mga0r0vf18_bins.unbinned.fa", - "md5_checksum": "1be647dc835ee8fe666fe9893266bd21", - "id": "nmdc:1be647dc835ee8fe666fe9893266bd21", - "file_size_bytes": 11353478 + "_id": { + "$oid": "649b00401ae706d7b5b16cef" + }, + "description": "Structural annotation GFF file for gold:Gp0127634", + "url": "https://data.microbiomedata.org/1781_100336/img_annotation/Ga0482236_structural_annotation.gff", + "md5_checksum": "1e286398d6b164538bbdefb9cc8a41e9", + "file_size_bytes": 3385, + "id": "nmdc:1e286398d6b164538bbdefb9cc8a41e9", + "name": "gold:Gp0127634_Structural annotation GFF file", + "data_object_type": "Structural Annotation GFF", + "type": "nmdc:DataObject" }, { - "name": "Gp0127634_metabat2 bin checkm quality assessment result", - "description": "metabat2 bin checkm quality assessment result for Gp0127634", - "data_object_type": "CheckM Statistics", - "url": "https://data.microbiomedata.org/data/nmdc:mga0r0vf18/MAGs/nmdc_mga0r0vf18_checkm_qa.out", - "md5_checksum": "6cc278c455cafc691333c0a74fe6c540", - "id": "nmdc:6cc278c455cafc691333c0a74fe6c540", - "file_size_bytes": 936 + "_id": { + "$oid": "649b00401ae706d7b5b16cf1" + }, + "description": "EC TSV File for gold:Gp0127634", + "url": "https://data.microbiomedata.org/1781_100336/img_annotation/Ga0482236_ec.tsv", + "md5_checksum": "01b078b5b9dde5699e9b9ab02af272df", + "file_size_bytes": 3385, + "id": "nmdc:01b078b5b9dde5699e9b9ab02af272df", + "name": "gold:Gp0127634_EC TSV File", + "data_object_type": "Annotation Enzyme Commission", + "type": "nmdc:DataObject" }, { - "name": "Gp0127634_high-quality and medium-quality bins", - "description": "high-quality and medium-quality bins for Gp0127634", - "data_object_type": "Metagenome Bins", - "url": "https://data.microbiomedata.org/data/nmdc:mga0r0vf18/MAGs/nmdc_mga0r0vf18_hqmq_bin.zip", - "md5_checksum": "de4d0180489bdaa5526977508a489b99", - "id": "nmdc:de4d0180489bdaa5526977508a489b99", - "file_size_bytes": 518340 + "_id": { + "$oid": "649b00401ae706d7b5b16cf4" + }, + "description": "Protein FAA for gold:Gp0127634", + "url": "https://data.microbiomedata.org/1781_100336/img_annotation/Ga0482236_proteins.faa", + "md5_checksum": "3374b8708ae6b77b16cd01ce4f33ee72", + "file_size_bytes": 3385, + "id": "nmdc:3374b8708ae6b77b16cd01ce4f33ee72", + "name": "gold:Gp0127634_Protein FAA", + "data_object_type": "Annotation Amino Acid FASTA", + "type": "nmdc:DataObject" }, { - "name": "Gp0127634_metabat2 bins", - "description": "metabat2 bins for Gp0127634", - "url": "https://data.microbiomedata.org/data/nmdc:mga0r0vf18/MAGs/nmdc_mga0r0vf18_metabat_bin.zip", - "md5_checksum": "16a08c4a3a6e9c70a5d47209177d0e60", - "id": "nmdc:16a08c4a3a6e9c70a5d47209177d0e60", - "file_size_bytes": 63768 + "_id": { + "$oid": "649b00401ae706d7b5b16cf9" + }, + "description": "KO TSV File for gold:Gp0127634", + "url": "https://data.microbiomedata.org/1781_100336/img_annotation/Ga0482236_ko.tsv", + "md5_checksum": "5b2ff10d97d2b516716a67dafb137937", + "file_size_bytes": 3385, + "id": "nmdc:5b2ff10d97d2b516716a67dafb137937", + "name": "gold:Gp0127634_KO TSV File", + "data_object_type": "Annotation KEGG Orthology", + "type": "nmdc:DataObject" + }, + { + "_id": { + "$oid": "649b00401ae706d7b5b16cfb" + }, + "description": "Functional annotation GFF file for gold:Gp0127634", + "url": "https://data.microbiomedata.org/1781_100336/img_annotation/Ga0482236_functional_annotation.gff", + "md5_checksum": "803451414e1935d4de9f9911963efe8d", + "file_size_bytes": 3385, + "id": "nmdc:803451414e1935d4de9f9911963efe8d", + "name": "gold:Gp0127634_Functional annotation GFF file", + "data_object_type": "Functional Annotation GFF", + "type": "nmdc:DataObject" } ], "dissolving_activity_set": [], @@ -12127,6 +16699,224 @@ "md5_checksum": "fba0bfa144e9ef179edb10b5a941c259", "id": "nmdc:fba0bfa144e9ef179edb10b5a941c259", "file_size_bytes": 795127 + }, + { + "_id": { + "$oid": "649b003d1ae706d7b5b14e79" + }, + "description": "Assembled contigs fasta for gold:Gp0127635", + "url": "https://data.microbiomedata.org/data/1781_100337/assembly/assembly_contigs.fna", + "file_size_bytes": 111137612, + "type": "nmdc:DataObject", + "id": "nmdc:178298f959546299f78fb2bff07cd460", + "name": "assembly_contigs.fna", + "data_object_type": "Assembly Contigs" + }, + { + "_id": { + "$oid": "649b003d1ae706d7b5b14e7a" + }, + "description": "Metagenome Contig Coverage Stats for gold:Gp0127635", + "url": "https://data.microbiomedata.org/data/1781_100337/assembly/mapping_stats.txt", + "file_size_bytes": 15570972, + "type": "nmdc:DataObject", + "id": "nmdc:4fc895272dffb49edac9e03d08684d05", + "name": "mapping_stats.txt", + "data_object_type": "Assembly Coverage Stats" + }, + { + "_id": { + "$oid": "649b003d1ae706d7b5b14e7b" + }, + "description": "Assembled AGP file for gold:Gp0127635", + "url": "https://data.microbiomedata.org/data/1781_100337/assembly/assembly.agp", + "file_size_bytes": 13670021, + "type": "nmdc:DataObject", + "id": "nmdc:b148c0b9078ed2c9dc0ef9d47d6c4273", + "name": "assembly.agp", + "data_object_type": "Assembly AGP" + }, + { + "_id": { + "$oid": "649b003d1ae706d7b5b14e7e" + }, + "description": "Assembled scaffold fasta for gold:Gp0127635", + "url": "https://data.microbiomedata.org/data/1781_100337/assembly/assembly_scaffolds.fna", + "file_size_bytes": 110516271, + "type": "nmdc:DataObject", + "id": "nmdc:c2ae4e2ecc5f68caf6fb04e4c0da29a8", + "name": "assembly_scaffolds.fna", + "data_object_type": "Assembly Scaffolds" + }, + { + "_id": { + "$oid": "649b003d1ae706d7b5b14e7f" + }, + "description": "Metagenome Alignment BAM file for gold:Gp0127635", + "url": "https://data.microbiomedata.org/data/1781_100337/assembly/pairedMapped_sorted.bam", + "file_size_bytes": 2128896439, + "type": "nmdc:DataObject", + "id": "nmdc:2e8b9eb5d9a8cbc0e2289bd29ab58bd5", + "name": "pairedMapped_sorted.bam", + "data_object_type": "Assembly Coverage BAM" + }, + { + "_id": { + "$oid": "649b003d1ae706d7b5b15b49" + }, + "id": "nmdc:1ef5e7e08bb9692d1ce21b338888f92b", + "name": "1781_100337.krona.html", + "description": "Gold:Gp0127635 KRONA plot HTML file", + "url": "https://data.microbiomedata.org/1781_100337/ReadbasedAnalysis/centrifuge/1781_100337.krona.html", + "file_size_bytes": 3385, + "data_object_type": "Centrifuge Krona Plot", + "type": "nmdc:DataObject" + }, + { + "_id": { + "$oid": "649b003d1ae706d7b5b15b51" + }, + "id": "nmdc:c9595cd833ad6e651762d7ee3a8e9e5b", + "name": "1781_100337.json", + "description": "Gold:Gp0127635 ReadbasedAnalysis result JSON file", + "url": "https://data.microbiomedata.org/1781_100337/ReadbasedAnalysis/1781_100337.json", + "file_size_bytes": 3385, + "type": "nmdc:DataObject" + }, + { + "_id": { + "$oid": "649b003f1ae706d7b5b165ee" + }, + "id": "nmdc:5eb1b1b53aab751c8ad74e9547ff8a70", + "name": "bins.tooShort.fa", + "description": "tooShort (< 3kb) filtered contigs fasta file by metaBat2 for gold:Gp0127635", + "file_size_bytes": 84061517, + "url": "https://data.microbiomedata.org/data/1781_100337/img_MAGs/bins.tooShort.fa", + "type": "nmdc:DataObject" + }, + { + "_id": { + "$oid": "649b003f1ae706d7b5b165f2" + }, + "id": "nmdc:1c62a2e67e8af295a7f57e2b4492dc22", + "name": "bins.unbinned.fa", + "description": "unbinned fasta file from metabat2 for gold:Gp0127635", + "file_size_bytes": 24394459, + "url": "https://data.microbiomedata.org/data/1781_100337/img_MAGs/bins.unbinned.fa", + "type": "nmdc:DataObject" + }, + { + "_id": { + "$oid": "649b003f1ae706d7b5b165f5" + }, + "id": "nmdc:093a82c4b7951c837e8a281cfae9f128", + "name": "checkm_qa.out", + "description": "metabat2 bin checkm quality assessment result for gold:Gp0127635", + "file_size_bytes": 1071, + "url": "https://data.microbiomedata.org/data/1781_100337/img_MAGs/checkm_qa.out", + "type": "nmdc:DataObject", + "data_object_type": "CheckM Statistics" + }, + { + "_id": { + "$oid": "649b003f1ae706d7b5b165f6" + }, + "id": "nmdc:0f5cac2a54bebd617c1c00bcd7e4ba50", + "name": "gold:Gp0127635.bins.3.fa", + "description": "metabat2 binned contig file for gold:Gp0127635", + "file_size_bytes": 365625, + "url": "https://data.microbiomedata.org/data/1781_100337/img_MAGs/metabat-bins/bins.3.fa", + "type": "nmdc:DataObject", + "data_object_type": "Metagenome Bins" + }, + { + "_id": { + "$oid": "649b003f1ae706d7b5b165f7" + }, + "id": "nmdc:db607d74248055a9962eda6db70c280e", + "name": "gold:Gp0127635.bins.1.fa", + "description": "metabat2 binned contig file for gold:Gp0127635", + "file_size_bytes": 217636, + "url": "https://data.microbiomedata.org/data/1781_100337/img_MAGs/metabat-bins/bins.1.fa", + "type": "nmdc:DataObject", + "data_object_type": "Metagenome Bins" + }, + { + "_id": { + "$oid": "649b003f1ae706d7b5b165fd" + }, + "id": "nmdc:90a132bcef3ba4ebdddc7d4b1297f157", + "name": "gold:Gp0127635.bins.2.fa", + "description": "metabat2 binned contig file for gold:Gp0127635", + "file_size_bytes": 348955, + "url": "https://data.microbiomedata.org/data/1781_100337/img_MAGs/metabat-bins/bins.2.fa", + "type": "nmdc:DataObject", + "data_object_type": "Metagenome Bins" + }, + { + "_id": { + "$oid": "649b00401ae706d7b5b16cf0" + }, + "description": "Structural annotation GFF file for gold:Gp0127635", + "url": "https://data.microbiomedata.org/1781_100337/img_annotation/Ga0482235_structural_annotation.gff", + "md5_checksum": "e1cd02b3a92223d8e30e8d7c90837d9a", + "file_size_bytes": 3385, + "id": "nmdc:e1cd02b3a92223d8e30e8d7c90837d9a", + "name": "gold:Gp0127635_Structural annotation GFF file", + "data_object_type": "Structural Annotation GFF", + "type": "nmdc:DataObject" + }, + { + "_id": { + "$oid": "649b00401ae706d7b5b16cf6" + }, + "description": "Protein FAA for gold:Gp0127635", + "url": "https://data.microbiomedata.org/1781_100337/img_annotation/Ga0482235_proteins.faa", + "md5_checksum": "b2ee2639269e6d665f772fc8c4e31d07", + "file_size_bytes": 3385, + "id": "nmdc:b2ee2639269e6d665f772fc8c4e31d07", + "name": "gold:Gp0127635_Protein FAA", + "data_object_type": "Annotation Amino Acid FASTA", + "type": "nmdc:DataObject" + }, + { + "_id": { + "$oid": "649b00401ae706d7b5b16cf7" + }, + "description": "Functional annotation GFF file for gold:Gp0127635", + "url": "https://data.microbiomedata.org/1781_100337/img_annotation/Ga0482235_functional_annotation.gff", + "md5_checksum": "4768d5de701a1ac55ed0c2d57a270dd2", + "file_size_bytes": 3385, + "id": "nmdc:4768d5de701a1ac55ed0c2d57a270dd2", + "name": "gold:Gp0127635_Functional annotation GFF file", + "data_object_type": "Functional Annotation GFF", + "type": "nmdc:DataObject" + }, + { + "_id": { + "$oid": "649b00401ae706d7b5b16cfc" + }, + "description": "KO TSV File for gold:Gp0127635", + "url": "https://data.microbiomedata.org/1781_100337/img_annotation/Ga0482235_ko.tsv", + "md5_checksum": "4cddc89fb8b405210d66b836825c37ee", + "file_size_bytes": 3385, + "id": "nmdc:4cddc89fb8b405210d66b836825c37ee", + "name": "gold:Gp0127635_KO TSV File", + "data_object_type": "Annotation KEGG Orthology", + "type": "nmdc:DataObject" + }, + { + "_id": { + "$oid": "649b00401ae706d7b5b16cfe" + }, + "description": "EC TSV File for gold:Gp0127635", + "url": "https://data.microbiomedata.org/1781_100337/img_annotation/Ga0482235_ec.tsv", + "md5_checksum": "9e0a73962f7014df93613b04fae9f8be", + "file_size_bytes": 3385, + "id": "nmdc:9e0a73962f7014df93613b04fae9f8be", + "name": "gold:Gp0127635_EC TSV File", + "data_object_type": "Annotation Enzyme Commission", + "type": "nmdc:DataObject" } ], "dissolving_activity_set": [], @@ -12703,46 +17493,287 @@ "file_size_bytes": 0 }, { - "name": "Gp0127637_tooShort (< 3kb) filtered contigs fasta file by metaBat2", - "description": "tooShort (< 3kb) filtered contigs fasta file by metaBat2 for Gp0127637", - "url": "https://data.microbiomedata.org/data/nmdc:mga0sb9b30/MAGs/nmdc_mga0sb9b30_bins.tooShort.fa", - "md5_checksum": "7968c6b88e49f066bd24982b4d54965b", - "id": "nmdc:7968c6b88e49f066bd24982b4d54965b", - "file_size_bytes": 91577123 + "name": "Gp0127637_tooShort (< 3kb) filtered contigs fasta file by metaBat2", + "description": "tooShort (< 3kb) filtered contigs fasta file by metaBat2 for Gp0127637", + "url": "https://data.microbiomedata.org/data/nmdc:mga0sb9b30/MAGs/nmdc_mga0sb9b30_bins.tooShort.fa", + "md5_checksum": "7968c6b88e49f066bd24982b4d54965b", + "id": "nmdc:7968c6b88e49f066bd24982b4d54965b", + "file_size_bytes": 91577123 + }, + { + "name": "Gp0127637_unbinned fasta file from metabat2", + "description": "unbinned fasta file from metabat2 for Gp0127637", + "url": "https://data.microbiomedata.org/data/nmdc:mga0sb9b30/MAGs/nmdc_mga0sb9b30_bins.unbinned.fa", + "md5_checksum": "120fbaa7439eb628d9a982de573446a8", + "id": "nmdc:120fbaa7439eb628d9a982de573446a8", + "file_size_bytes": 22556841 + }, + { + "name": "Gp0127637_metabat2 bin checkm quality assessment result", + "description": "metabat2 bin checkm quality assessment result for Gp0127637", + "data_object_type": "CheckM Statistics", + "url": "https://data.microbiomedata.org/data/nmdc:mga0sb9b30/MAGs/nmdc_mga0sb9b30_checkm_qa.out", + "md5_checksum": "347a7ee18b37674e031cca9046e92623", + "id": "nmdc:347a7ee18b37674e031cca9046e92623", + "file_size_bytes": 1092 + }, + { + "name": "Gp0127637_high-quality and medium-quality bins", + "description": "high-quality and medium-quality bins for Gp0127637", + "data_object_type": "Metagenome Bins", + "url": "https://data.microbiomedata.org/data/nmdc:mga0sb9b30/MAGs/nmdc_mga0sb9b30_hqmq_bin.zip", + "md5_checksum": "de1da5ea4bfdf3131a6c510b79b145c2", + "id": "nmdc:de1da5ea4bfdf3131a6c510b79b145c2", + "file_size_bytes": 504932 + }, + { + "name": "Gp0127637_metabat2 bins", + "description": "metabat2 bins for Gp0127637", + "url": "https://data.microbiomedata.org/data/nmdc:mga0sb9b30/MAGs/nmdc_mga0sb9b30_metabat_bin.zip", + "md5_checksum": "382d00338a5e4829285e58a203de153e", + "id": "nmdc:382d00338a5e4829285e58a203de153e", + "file_size_bytes": 432910 + }, + { + "_id": { + "$oid": "649b003d1ae706d7b5b14e80" + }, + "description": "Assembled contigs fasta for gold:Gp0127637", + "url": "https://data.microbiomedata.org/data/1781_100339/assembly/assembly_contigs.fna", + "file_size_bytes": 116341325, + "type": "nmdc:DataObject", + "id": "nmdc:d66bd2d4b3ad1abef6787addfb5aa8b6", + "name": "assembly_contigs.fna", + "data_object_type": "Assembly Contigs" + }, + { + "_id": { + "$oid": "649b003d1ae706d7b5b14e83" + }, + "description": "Metagenome Contig Coverage Stats for gold:Gp0127637", + "url": "https://data.microbiomedata.org/data/1781_100339/assembly/mapping_stats.txt", + "file_size_bytes": 16178302, + "type": "nmdc:DataObject", + "id": "nmdc:c15461f775c7a7b44ec57af9e2897e17", + "name": "mapping_stats.txt", + "data_object_type": "Assembly Coverage Stats" + }, + { + "_id": { + "$oid": "649b003d1ae706d7b5b14e84" + }, + "description": "Assembled scaffold fasta for gold:Gp0127637", + "url": "https://data.microbiomedata.org/data/1781_100339/assembly/assembly_scaffolds.fna", + "file_size_bytes": 115695690, + "type": "nmdc:DataObject", + "id": "nmdc:cd9b2948e3c873bc1c9a2aebe3401cc5", + "name": "assembly_scaffolds.fna", + "data_object_type": "Assembly Scaffolds" + }, + { + "_id": { + "$oid": "649b003d1ae706d7b5b14e87" + }, + "description": "Metagenome Alignment BAM file for gold:Gp0127637", + "url": "https://data.microbiomedata.org/data/1781_100339/assembly/pairedMapped_sorted.bam", + "file_size_bytes": 1715892686, + "type": "nmdc:DataObject", + "id": "nmdc:aa0ca68275dfc45fe70cc94e247e1a69", + "name": "pairedMapped_sorted.bam", + "data_object_type": "Assembly Coverage BAM" + }, + { + "_id": { + "$oid": "649b003d1ae706d7b5b14e88" + }, + "description": "Assembled AGP file for gold:Gp0127637", + "url": "https://data.microbiomedata.org/data/1781_100339/assembly/assembly.agp", + "file_size_bytes": 14211451, + "type": "nmdc:DataObject", + "id": "nmdc:b152dd521661ba36392ccc535795542e", + "name": "assembly.agp", + "data_object_type": "Assembly AGP" + }, + { + "_id": { + "$oid": "649b003d1ae706d7b5b15b60" + }, + "id": "nmdc:9eeee33a195e10f3f8e95f2049dac406", + "name": "1781_100339.krona.html", + "description": "Gold:Gp0127637 KRONA plot HTML file", + "url": "https://data.microbiomedata.org/1781_100339/ReadbasedAnalysis/centrifuge/1781_100339.krona.html", + "file_size_bytes": 3385, + "data_object_type": "Centrifuge Krona Plot", + "type": "nmdc:DataObject" + }, + { + "_id": { + "$oid": "649b003d1ae706d7b5b15b65" + }, + "id": "nmdc:3eca864f3fb90709467d18cc9247709b", + "name": "1781_100339.json", + "description": "Gold:Gp0127637 ReadbasedAnalysis result JSON file", + "url": "https://data.microbiomedata.org/1781_100339/ReadbasedAnalysis/1781_100339.json", + "file_size_bytes": 3385, + "type": "nmdc:DataObject" + }, + { + "_id": { + "$oid": "649b003f1ae706d7b5b165f9" + }, + "id": "nmdc:51db76a0482441e5ade7e3d99694ffe6", + "name": "bins.tooShort.fa", + "description": "tooShort (< 3kb) filtered contigs fasta file by metaBat2 for gold:Gp0127637", + "file_size_bytes": 89058634, + "url": "https://data.microbiomedata.org/data/1781_100339/img_MAGs/bins.tooShort.fa", + "type": "nmdc:DataObject" + }, + { + "_id": { + "$oid": "649b003f1ae706d7b5b165fb" + }, + "id": "nmdc:6a3a98a4c86866bcb2c347d35006fc55", + "name": "bins.unbinned.fa", + "description": "unbinned fasta file from metabat2 for gold:Gp0127637", + "file_size_bytes": 22532401, + "url": "https://data.microbiomedata.org/data/1781_100339/img_MAGs/bins.unbinned.fa", + "type": "nmdc:DataObject" + }, + { + "_id": { + "$oid": "649b003f1ae706d7b5b165fc" + }, + "id": "nmdc:6c4325a1e9ba6b106d58e1fa99239c56", + "name": "checkm_qa.out", + "description": "metabat2 bin checkm quality assessment result for gold:Gp0127637", + "file_size_bytes": 1092, + "url": "https://data.microbiomedata.org/data/1781_100339/img_MAGs/checkm_qa.out", + "type": "nmdc:DataObject", + "data_object_type": "CheckM Statistics" + }, + { + "_id": { + "$oid": "649b003f1ae706d7b5b165fe" + }, + "id": "nmdc:419c789c7c7458e9775dcfb3e95530b2", + "name": "gtdbtk.ar122.summary.tsv", + "description": "gtdbtk archaea assignment result summary table for gold:Gp0127637", + "file_size_bytes": 1003, + "url": "https://data.microbiomedata.org/data/1781_100339/img_MAGs/gtdbtk_output/classify/gtdbtk.ar122.summary.tsv", + "type": "nmdc:DataObject" + }, + { + "_id": { + "$oid": "649b003f1ae706d7b5b16600" + }, + "id": "nmdc:2e3079d5c7c114727c5445f2ed43ed5f", + "name": "gold:Gp0127637.bins.2.fa", + "description": "metabat2 binned contig file for gold:Gp0127637", + "file_size_bytes": 1650635, + "url": "https://data.microbiomedata.org/data/1781_100339/img_MAGs/metabat-bins/bins.2.fa", + "type": "nmdc:DataObject", + "data_object_type": "Metagenome Bins" + }, + { + "_id": { + "$oid": "649b003f1ae706d7b5b16601" + }, + "id": "nmdc:004ec11cd626b798ffccba986dd4f129", + "name": "gold:Gp0127637.bins.3.fa", + "description": "metabat2 binned contig file for gold:Gp0127637", + "file_size_bytes": 1046842, + "url": "https://data.microbiomedata.org/data/1781_100339/img_MAGs/metabat-bins/bins.3.fa", + "type": "nmdc:DataObject", + "data_object_type": "Metagenome Bins" + }, + { + "_id": { + "$oid": "649b003f1ae706d7b5b16603" + }, + "id": "nmdc:024873d88915f917de41ed83c0b7b98c", + "name": "gold:Gp0127637.bins.1.fa", + "description": "metabat2 binned contig file for gold:Gp0127637", + "file_size_bytes": 230352, + "url": "https://data.microbiomedata.org/data/1781_100339/img_MAGs/metabat-bins/bins.1.fa", + "type": "nmdc:DataObject", + "data_object_type": "Metagenome Bins" + }, + { + "_id": { + "$oid": "649b003f1ae706d7b5b16607" + }, + "id": "nmdc:6920af8dfe8dc23b9a267c837184d619", + "name": "gold:Gp0127637.bins.2.fa", + "description": "hqmq binned contig file for gold:Gp0127637", + "file_size_bytes": 1619653, + "url": "https://data.microbiomedata.org/data/1781_100339/img_MAGs/hqmq-metabat-bins/bins.2.fa", + "type": "nmdc:DataObject", + "data_object_type": "Metagenome Bins" }, { - "name": "Gp0127637_unbinned fasta file from metabat2", - "description": "unbinned fasta file from metabat2 for Gp0127637", - "url": "https://data.microbiomedata.org/data/nmdc:mga0sb9b30/MAGs/nmdc_mga0sb9b30_bins.unbinned.fa", - "md5_checksum": "120fbaa7439eb628d9a982de573446a8", - "id": "nmdc:120fbaa7439eb628d9a982de573446a8", - "file_size_bytes": 22556841 + "_id": { + "$oid": "649b00401ae706d7b5b16d02" + }, + "description": "KO TSV File for gold:Gp0127637", + "url": "https://data.microbiomedata.org/1781_100339/img_annotation/Ga0482233_ko.tsv", + "md5_checksum": "ce74f349e03ae28dd49fc5ea4cd1d91d", + "file_size_bytes": 3385, + "id": "nmdc:ce74f349e03ae28dd49fc5ea4cd1d91d", + "name": "gold:Gp0127637_KO TSV File", + "data_object_type": "Annotation KEGG Orthology", + "type": "nmdc:DataObject" }, { - "name": "Gp0127637_metabat2 bin checkm quality assessment result", - "description": "metabat2 bin checkm quality assessment result for Gp0127637", - "data_object_type": "CheckM Statistics", - "url": "https://data.microbiomedata.org/data/nmdc:mga0sb9b30/MAGs/nmdc_mga0sb9b30_checkm_qa.out", - "md5_checksum": "347a7ee18b37674e031cca9046e92623", - "id": "nmdc:347a7ee18b37674e031cca9046e92623", - "file_size_bytes": 1092 + "_id": { + "$oid": "649b00401ae706d7b5b16d05" + }, + "description": "Protein FAA for gold:Gp0127637", + "url": "https://data.microbiomedata.org/1781_100339/img_annotation/Ga0482233_proteins.faa", + "md5_checksum": "c43a6b5a306a8f14aab780d8f1bf9c41", + "file_size_bytes": 3385, + "id": "nmdc:c43a6b5a306a8f14aab780d8f1bf9c41", + "name": "gold:Gp0127637_Protein FAA", + "data_object_type": "Annotation Amino Acid FASTA", + "type": "nmdc:DataObject" }, { - "name": "Gp0127637_high-quality and medium-quality bins", - "description": "high-quality and medium-quality bins for Gp0127637", - "data_object_type": "Metagenome Bins", - "url": "https://data.microbiomedata.org/data/nmdc:mga0sb9b30/MAGs/nmdc_mga0sb9b30_hqmq_bin.zip", - "md5_checksum": "de1da5ea4bfdf3131a6c510b79b145c2", - "id": "nmdc:de1da5ea4bfdf3131a6c510b79b145c2", - "file_size_bytes": 504932 + "_id": { + "$oid": "649b00401ae706d7b5b16d06" + }, + "description": "Structural annotation GFF file for gold:Gp0127637", + "url": "https://data.microbiomedata.org/1781_100339/img_annotation/Ga0482233_structural_annotation.gff", + "md5_checksum": "5f6b287493cde8cf8cb49348a2868aa6", + "file_size_bytes": 3385, + "id": "nmdc:5f6b287493cde8cf8cb49348a2868aa6", + "name": "gold:Gp0127637_Structural annotation GFF file", + "data_object_type": "Structural Annotation GFF", + "type": "nmdc:DataObject" }, { - "name": "Gp0127637_metabat2 bins", - "description": "metabat2 bins for Gp0127637", - "url": "https://data.microbiomedata.org/data/nmdc:mga0sb9b30/MAGs/nmdc_mga0sb9b30_metabat_bin.zip", - "md5_checksum": "382d00338a5e4829285e58a203de153e", - "id": "nmdc:382d00338a5e4829285e58a203de153e", - "file_size_bytes": 432910 + "_id": { + "$oid": "649b00401ae706d7b5b16d09" + }, + "description": "EC TSV File for gold:Gp0127637", + "url": "https://data.microbiomedata.org/1781_100339/img_annotation/Ga0482233_ec.tsv", + "md5_checksum": "1549562abe1044734fab8562585ec161", + "file_size_bytes": 3385, + "id": "nmdc:1549562abe1044734fab8562585ec161", + "name": "gold:Gp0127637_EC TSV File", + "data_object_type": "Annotation Enzyme Commission", + "type": "nmdc:DataObject" + }, + { + "_id": { + "$oid": "649b00401ae706d7b5b16d0f" + }, + "description": "Functional annotation GFF file for gold:Gp0127637", + "url": "https://data.microbiomedata.org/1781_100339/img_annotation/Ga0482233_functional_annotation.gff", + "md5_checksum": "74b2fc3dd196a3d615c7d0d478fa2f90", + "file_size_bytes": 3385, + "id": "nmdc:74b2fc3dd196a3d615c7d0d478fa2f90", + "name": "gold:Gp0127637_Functional annotation GFF file", + "data_object_type": "Functional Annotation GFF", + "type": "nmdc:DataObject" } ], "dissolving_activity_set": [], @@ -13307,6 +18338,211 @@ "md5_checksum": "8ca8e2250dc68643e937163323f2a826", "id": "nmdc:8ca8e2250dc68643e937163323f2a826", "file_size_bytes": 508443 + }, + { + "_id": { + "$oid": "649b003d1ae706d7b5b14e86" + }, + "description": "Assembled contigs fasta for gold:Gp0127638", + "url": "https://data.microbiomedata.org/data/1781_100340/assembly/assembly_contigs.fna", + "file_size_bytes": 83628276, + "type": "nmdc:DataObject", + "id": "nmdc:ed782cb1e889b9965707363c1324ee22", + "name": "assembly_contigs.fna", + "data_object_type": "Assembly Contigs" + }, + { + "_id": { + "$oid": "649b003d1ae706d7b5b14e89" + }, + "description": "Assembled scaffold fasta for gold:Gp0127638", + "url": "https://data.microbiomedata.org/data/1781_100340/assembly/assembly_scaffolds.fna", + "file_size_bytes": 83118450, + "type": "nmdc:DataObject", + "id": "nmdc:37adb1b2ce1b858809930aa12526e720", + "name": "assembly_scaffolds.fna", + "data_object_type": "Assembly Scaffolds" + }, + { + "_id": { + "$oid": "649b003d1ae706d7b5b14e8a" + }, + "description": "Metagenome Contig Coverage Stats for gold:Gp0127638", + "url": "https://data.microbiomedata.org/data/1781_100340/assembly/mapping_stats.txt", + "file_size_bytes": 12735011, + "type": "nmdc:DataObject", + "id": "nmdc:05e4b13c2533a4969139e6e11ae71984", + "name": "mapping_stats.txt", + "data_object_type": "Assembly Coverage Stats" + }, + { + "_id": { + "$oid": "649b003d1ae706d7b5b14e8d" + }, + "description": "Assembled AGP file for gold:Gp0127638", + "url": "https://data.microbiomedata.org/data/1781_100340/assembly/assembly.agp", + "file_size_bytes": 11167924, + "type": "nmdc:DataObject", + "id": "nmdc:d98e0c56d4ea8b29a62f6ba8cc058c72", + "name": "assembly.agp", + "data_object_type": "Assembly AGP" + }, + { + "_id": { + "$oid": "649b003d1ae706d7b5b14e8e" + }, + "description": "Metagenome Alignment BAM file for gold:Gp0127638", + "url": "https://data.microbiomedata.org/data/1781_100340/assembly/pairedMapped_sorted.bam", + "file_size_bytes": 1787924636, + "type": "nmdc:DataObject", + "id": "nmdc:0aa8f1c4c591a4080152f9712431f85b", + "name": "pairedMapped_sorted.bam", + "data_object_type": "Assembly Coverage BAM" + }, + { + "_id": { + "$oid": "649b003d1ae706d7b5b15b6b" + }, + "id": "nmdc:3114c2a7faf5fc63ff1e8be9c15ae9ac", + "name": "1781_100340.krona.html", + "description": "Gold:Gp0127638 KRONA plot HTML file", + "url": "https://data.microbiomedata.org/1781_100340/ReadbasedAnalysis/centrifuge/1781_100340.krona.html", + "file_size_bytes": 3385, + "data_object_type": "Centrifuge Krona Plot", + "type": "nmdc:DataObject" + }, + { + "_id": { + "$oid": "649b003d1ae706d7b5b15b6e" + }, + "id": "nmdc:73e4e17fc849b239ced558102cb107de", + "name": "1781_100340.json", + "description": "Gold:Gp0127638 ReadbasedAnalysis result JSON file", + "url": "https://data.microbiomedata.org/1781_100340/ReadbasedAnalysis/1781_100340.json", + "file_size_bytes": 3385, + "type": "nmdc:DataObject" + }, + { + "_id": { + "$oid": "649b003f1ae706d7b5b16602" + }, + "id": "nmdc:1ed134939eb54d78ba95134f8b11abf0", + "name": "bins.tooShort.fa", + "description": "tooShort (< 3kb) filtered contigs fasta file by metaBat2 for gold:Gp0127638", + "file_size_bytes": 69018209, + "url": "https://data.microbiomedata.org/data/1781_100340/img_MAGs/bins.tooShort.fa", + "type": "nmdc:DataObject" + }, + { + "_id": { + "$oid": "649b003f1ae706d7b5b16604" + }, + "id": "nmdc:4bf67b94461ec33e4d3bf4f28442c6b5", + "name": "bins.unbinned.fa", + "description": "unbinned fasta file from metabat2 for gold:Gp0127638", + "file_size_bytes": 11722895, + "url": "https://data.microbiomedata.org/data/1781_100340/img_MAGs/bins.unbinned.fa", + "type": "nmdc:DataObject" + }, + { + "_id": { + "$oid": "649b003f1ae706d7b5b16605" + }, + "id": "nmdc:28848628e266e83cfb2e9af8e90ae9c0", + "name": "checkm_qa.out", + "description": "metabat2 bin checkm quality assessment result for gold:Gp0127638", + "file_size_bytes": 760, + "url": "https://data.microbiomedata.org/data/1781_100340/img_MAGs/checkm_qa.out", + "type": "nmdc:DataObject", + "data_object_type": "CheckM Statistics" + }, + { + "_id": { + "$oid": "649b003f1ae706d7b5b16606" + }, + "id": "nmdc:143af6193b463b4b8e685e999fc0d756", + "name": "gtdbtk.ar122.summary.tsv", + "description": "gtdbtk archaea assignment result summary table for gold:Gp0127638", + "file_size_bytes": 1003, + "url": "https://data.microbiomedata.org/data/1781_100340/img_MAGs/gtdbtk_output/classify/gtdbtk.ar122.summary.tsv", + "type": "nmdc:DataObject" + }, + { + "_id": { + "$oid": "649b003f1ae706d7b5b16609" + }, + "id": "nmdc:e4e5dd8c3aaba918e6e98db827fc9d28", + "name": "gold:Gp0127638.bins.1.fa", + "description": "metabat2 binned contig file for gold:Gp0127638", + "file_size_bytes": 1471149, + "url": "https://data.microbiomedata.org/data/1781_100340/img_MAGs/metabat-bins/bins.1.fa", + "type": "nmdc:DataObject", + "data_object_type": "Metagenome Bins" + }, + { + "_id": { + "$oid": "649b00401ae706d7b5b16d07" + }, + "description": "EC TSV File for gold:Gp0127638", + "url": "https://data.microbiomedata.org/1781_100340/img_annotation/Ga0482232_ec.tsv", + "md5_checksum": "3bd360103e4e8fc8f89c1df345367776", + "file_size_bytes": 3385, + "id": "nmdc:3bd360103e4e8fc8f89c1df345367776", + "name": "gold:Gp0127638_EC TSV File", + "data_object_type": "Annotation Enzyme Commission", + "type": "nmdc:DataObject" + }, + { + "_id": { + "$oid": "649b00401ae706d7b5b16d08" + }, + "description": "KO TSV File for gold:Gp0127638", + "url": "https://data.microbiomedata.org/1781_100340/img_annotation/Ga0482232_ko.tsv", + "md5_checksum": "1aba5135d8cddc36da3cd37579be190b", + "file_size_bytes": 3385, + "id": "nmdc:1aba5135d8cddc36da3cd37579be190b", + "name": "gold:Gp0127638_KO TSV File", + "data_object_type": "Annotation KEGG Orthology", + "type": "nmdc:DataObject" + }, + { + "_id": { + "$oid": "649b00401ae706d7b5b16d0a" + }, + "description": "Functional annotation GFF file for gold:Gp0127638", + "url": "https://data.microbiomedata.org/1781_100340/img_annotation/Ga0482232_functional_annotation.gff", + "md5_checksum": "3da4d2f1c2db68033fa2264f4db7f459", + "file_size_bytes": 3385, + "id": "nmdc:3da4d2f1c2db68033fa2264f4db7f459", + "name": "gold:Gp0127638_Functional annotation GFF file", + "data_object_type": "Functional Annotation GFF", + "type": "nmdc:DataObject" + }, + { + "_id": { + "$oid": "649b00401ae706d7b5b16d0b" + }, + "description": "Structural annotation GFF file for gold:Gp0127638", + "url": "https://data.microbiomedata.org/1781_100340/img_annotation/Ga0482232_structural_annotation.gff", + "md5_checksum": "2ca3e1a0ba8007e86dedbec47e85adba", + "file_size_bytes": 3385, + "id": "nmdc:2ca3e1a0ba8007e86dedbec47e85adba", + "name": "gold:Gp0127638_Structural annotation GFF file", + "data_object_type": "Structural Annotation GFF", + "type": "nmdc:DataObject" + }, + { + "_id": { + "$oid": "649b00401ae706d7b5b16d0c" + }, + "description": "Protein FAA for gold:Gp0127638", + "url": "https://data.microbiomedata.org/1781_100340/img_annotation/Ga0482232_proteins.faa", + "md5_checksum": "17993d4fcfa7be4fd4488804d23b67c6", + "file_size_bytes": 3385, + "id": "nmdc:17993d4fcfa7be4fd4488804d23b67c6", + "name": "gold:Gp0127638_Protein FAA", + "data_object_type": "Annotation Amino Acid FASTA", + "type": "nmdc:DataObject" } ], "dissolving_activity_set": [], @@ -13838,30 +19074,320 @@ "file_size_bytes": 15075820 }, { - "name": "Gp0115670_metabat2 bin checkm quality assessment result", - "description": "metabat2 bin checkm quality assessment result for Gp0115670", - "data_object_type": "CheckM Statistics", - "url": "https://data.microbiomedata.org/data/nmdc:mga0d7pj22/MAGs/nmdc_mga0d7pj22_checkm_qa.out", - "md5_checksum": "b0866d1a944aa27e34dc7a140aeaf336", - "id": "nmdc:b0866d1a944aa27e34dc7a140aeaf336", - "file_size_bytes": 1690 + "name": "Gp0115670_metabat2 bin checkm quality assessment result", + "description": "metabat2 bin checkm quality assessment result for Gp0115670", + "data_object_type": "CheckM Statistics", + "url": "https://data.microbiomedata.org/data/nmdc:mga0d7pj22/MAGs/nmdc_mga0d7pj22_checkm_qa.out", + "md5_checksum": "b0866d1a944aa27e34dc7a140aeaf336", + "id": "nmdc:b0866d1a944aa27e34dc7a140aeaf336", + "file_size_bytes": 1690 + }, + { + "name": "Gp0115670_high-quality and medium-quality bins", + "description": "high-quality and medium-quality bins for Gp0115670", + "data_object_type": "Metagenome Bins", + "url": "https://data.microbiomedata.org/data/nmdc:mga0d7pj22/MAGs/nmdc_mga0d7pj22_hqmq_bin.zip", + "md5_checksum": "0875e5107d03a40832d15e5cf80adbbc", + "id": "nmdc:0875e5107d03a40832d15e5cf80adbbc", + "file_size_bytes": 1944800 + }, + { + "name": "Gp0115670_metabat2 bins", + "description": "metabat2 bins for Gp0115670", + "url": "https://data.microbiomedata.org/data/nmdc:mga0d7pj22/MAGs/nmdc_mga0d7pj22_metabat_bin.zip", + "md5_checksum": "9b60c7c905d34e08427781eafbce9b12", + "id": "nmdc:9b60c7c905d34e08427781eafbce9b12", + "file_size_bytes": 658258 + }, + { + "_id": { + "$oid": "649b003c1ae706d7b5b14d7e" + }, + "description": "Assembled contigs fasta for gold:Gp0115670", + "url": "https://data.microbiomedata.org/data/1781_86102/assembly/assembly_contigs.fna", + "file_size_bytes": 84815235, + "type": "nmdc:DataObject", + "id": "nmdc:6c7beb91bbdcda84076fd786d59cab20", + "name": "assembly_contigs.fna", + "data_object_type": "Assembly Contigs" + }, + { + "_id": { + "$oid": "649b003c1ae706d7b5b14d80" + }, + "description": "Metagenome Contig Coverage Stats for gold:Gp0115670", + "url": "https://data.microbiomedata.org/data/1781_86102/assembly/mapping_stats.txt", + "file_size_bytes": 11305858, + "type": "nmdc:DataObject", + "id": "nmdc:5133fdf5c818f740f9e7ca276477f5db", + "name": "mapping_stats.txt", + "data_object_type": "Assembly Coverage Stats" + }, + { + "_id": { + "$oid": "649b003c1ae706d7b5b14d84" + }, + "description": "Assembled scaffold fasta for gold:Gp0115670", + "url": "https://data.microbiomedata.org/data/1781_86102/assembly/assembly_scaffolds.fna", + "file_size_bytes": 84354304, + "type": "nmdc:DataObject", + "id": "nmdc:cbd3fb5b5b99d86979e4c481bcd52d91", + "name": "assembly_scaffolds.fna", + "data_object_type": "Assembly Scaffolds" + }, + { + "_id": { + "$oid": "649b003c1ae706d7b5b14d85" + }, + "description": "Assembled AGP file for gold:Gp0115670", + "url": "https://data.microbiomedata.org/data/1781_86102/assembly/assembly.agp", + "file_size_bytes": 9735435, + "type": "nmdc:DataObject", + "id": "nmdc:5f92683e40ac788a1bebf1d6e02415ad", + "name": "assembly.agp", + "data_object_type": "Assembly AGP" + }, + { + "_id": { + "$oid": "649b003c1ae706d7b5b14d87" + }, + "description": "Metagenome Alignment BAM file for gold:Gp0115670", + "url": "https://data.microbiomedata.org/data/1781_86102/assembly/pairedMapped_sorted.bam", + "file_size_bytes": 3205338070, + "type": "nmdc:DataObject", + "id": "nmdc:bd2f5662f242a2be294876530634afaf", + "name": "pairedMapped_sorted.bam", + "data_object_type": "Assembly Coverage BAM" + }, + { + "_id": { + "$oid": "649b003d1ae706d7b5b159b6" + }, + "id": "nmdc:0a93ee11d25618c9207f4c109dd0859d", + "name": "1781_86102.krona.html", + "description": "Gold:Gp0115670 KRONA plot HTML file", + "url": "https://data.microbiomedata.org/1781_86102/ReadbasedAnalysis/centrifuge/1781_86102.krona.html", + "file_size_bytes": 3385, + "data_object_type": "Centrifuge Krona Plot", + "type": "nmdc:DataObject" + }, + { + "_id": { + "$oid": "649b003d1ae706d7b5b159bd" + }, + "id": "nmdc:43f9235ab417dd2dff189967b1a66ac7", + "name": "1781_86102.json", + "description": "Gold:Gp0115670 ReadbasedAnalysis result JSON file", + "url": "https://data.microbiomedata.org/1781_86102/ReadbasedAnalysis/1781_86102.json", + "file_size_bytes": 3385, + "type": "nmdc:DataObject" + }, + { + "_id": { + "$oid": "649b003f1ae706d7b5b16283" + }, + "id": "nmdc:fc533d14a7bb4e0dc462c4d95818e01e", + "name": "bins.tooShort.fa", + "description": "tooShort (< 3kb) filtered contigs fasta file by metaBat2 for gold:Gp0115670", + "file_size_bytes": 60065481, + "url": "https://data.microbiomedata.org/data/1781_86102/img_MAGs/bins.tooShort.fa", + "type": "nmdc:DataObject" + }, + { + "_id": { + "$oid": "649b003f1ae706d7b5b16284" + }, + "id": "nmdc:5b15dd3d951dc863beb945de63d7ec25", + "name": "bins.unbinned.fa", + "description": "unbinned fasta file from metabat2 for gold:Gp0115670", + "file_size_bytes": 17564113, + "url": "https://data.microbiomedata.org/data/1781_86102/img_MAGs/bins.unbinned.fa", + "type": "nmdc:DataObject" + }, + { + "_id": { + "$oid": "649b003f1ae706d7b5b16285" + }, + "id": "nmdc:53398d224211bc133f6dce929cae0d72", + "name": "checkm_qa.out", + "description": "metabat2 bin checkm quality assessment result for gold:Gp0115670", + "file_size_bytes": 2158, + "url": "https://data.microbiomedata.org/data/1781_86102/img_MAGs/checkm_qa.out", + "type": "nmdc:DataObject", + "data_object_type": "CheckM Statistics" + }, + { + "_id": { + "$oid": "649b003f1ae706d7b5b16286" + }, + "id": "nmdc:46666048e020a995a98c70df53ac4d9f", + "name": "gold:Gp0115670.bins.9.fa", + "description": "metabat2 binned contig file for gold:Gp0115670", + "file_size_bytes": 412159, + "url": "https://data.microbiomedata.org/data/1781_86102/img_MAGs/metabat-bins/bins.9.fa", + "type": "nmdc:DataObject", + "data_object_type": "Metagenome Bins" + }, + { + "_id": { + "$oid": "649b003f1ae706d7b5b16287" + }, + "id": "nmdc:667049b22edf3a81a717ccf63fa6021c", + "name": "gold:Gp0115670.bins.2.fa", + "description": "metabat2 binned contig file for gold:Gp0115670", + "file_size_bytes": 1551626, + "url": "https://data.microbiomedata.org/data/1781_86102/img_MAGs/metabat-bins/bins.2.fa", + "type": "nmdc:DataObject", + "data_object_type": "Metagenome Bins" + }, + { + "_id": { + "$oid": "649b003f1ae706d7b5b16288" + }, + "id": "nmdc:1a8477ea8d089e78cb03052c64a35249", + "name": "gold:Gp0115670.bins.5.fa", + "description": "metabat2 binned contig file for gold:Gp0115670", + "file_size_bytes": 298749, + "url": "https://data.microbiomedata.org/data/1781_86102/img_MAGs/metabat-bins/bins.5.fa", + "type": "nmdc:DataObject", + "data_object_type": "Metagenome Bins" + }, + { + "_id": { + "$oid": "649b003f1ae706d7b5b16289" + }, + "id": "nmdc:d0b11003b25eb91d3759fbc6b7477c37", + "name": "gold:Gp0115670.bins.3.fa", + "description": "metabat2 binned contig file for gold:Gp0115670", + "file_size_bytes": 389627, + "url": "https://data.microbiomedata.org/data/1781_86102/img_MAGs/metabat-bins/bins.3.fa", + "type": "nmdc:DataObject", + "data_object_type": "Metagenome Bins" + }, + { + "_id": { + "$oid": "649b003f1ae706d7b5b1628b" + }, + "id": "nmdc:9d6dace191d0fa9b660b12af98402fab", + "name": "gold:Gp0115670.bins.7.fa", + "description": "metabat2 binned contig file for gold:Gp0115670", + "file_size_bytes": 329051, + "url": "https://data.microbiomedata.org/data/1781_86102/img_MAGs/metabat-bins/bins.7.fa", + "type": "nmdc:DataObject", + "data_object_type": "Metagenome Bins" + }, + { + "_id": { + "$oid": "649b003f1ae706d7b5b1628d" + }, + "id": "nmdc:a15523e666fa33f919c66d5cba8bc0f5", + "name": "gold:Gp0115670.bins.4.fa", + "description": "metabat2 binned contig file for gold:Gp0115670", + "file_size_bytes": 570961, + "url": "https://data.microbiomedata.org/data/1781_86102/img_MAGs/metabat-bins/bins.4.fa", + "type": "nmdc:DataObject", + "data_object_type": "Metagenome Bins" + }, + { + "_id": { + "$oid": "649b003f1ae706d7b5b1628e" + }, + "id": "nmdc:4162f320ed86534d75ecb1ccf2763d47", + "name": "gold:Gp0115670.bins.1.fa", + "description": "metabat2 binned contig file for gold:Gp0115670", + "file_size_bytes": 743867, + "url": "https://data.microbiomedata.org/data/1781_86102/img_MAGs/metabat-bins/bins.1.fa", + "type": "nmdc:DataObject", + "data_object_type": "Metagenome Bins" + }, + { + "_id": { + "$oid": "649b003f1ae706d7b5b16290" + }, + "id": "nmdc:9b032e23d57bfbb90069887246064d22", + "name": "gold:Gp0115670.bins.8.fa", + "description": "metabat2 binned contig file for gold:Gp0115670", + "file_size_bytes": 1008688, + "url": "https://data.microbiomedata.org/data/1781_86102/img_MAGs/metabat-bins/bins.8.fa", + "type": "nmdc:DataObject", + "data_object_type": "Metagenome Bins" + }, + { + "_id": { + "$oid": "649b003f1ae706d7b5b16291" + }, + "id": "nmdc:32d3414866d65ea1c0a43f9fc60004ec", + "name": "gold:Gp0115670.bins.6.fa", + "description": "metabat2 binned contig file for gold:Gp0115670", + "file_size_bytes": 771722, + "url": "https://data.microbiomedata.org/data/1781_86102/img_MAGs/metabat-bins/bins.6.fa", + "type": "nmdc:DataObject", + "data_object_type": "Metagenome Bins" + }, + { + "_id": { + "$oid": "649b00401ae706d7b5b16d98" + }, + "description": "EC TSV File for gold:Gp0115670", + "url": "https://data.microbiomedata.org/1781_86102/img_annotation/Ga0482257_ec.tsv", + "md5_checksum": "483453952f8e4dc70687e02842b2bfc8", + "file_size_bytes": 3385, + "id": "nmdc:483453952f8e4dc70687e02842b2bfc8", + "name": "gold:Gp0115670_EC TSV File", + "data_object_type": "Annotation Enzyme Commission", + "type": "nmdc:DataObject" + }, + { + "_id": { + "$oid": "649b00401ae706d7b5b16d99" + }, + "description": "KO TSV File for gold:Gp0115670", + "url": "https://data.microbiomedata.org/1781_86102/img_annotation/Ga0482257_ko.tsv", + "md5_checksum": "4226d30b4f7d4018245613abbb2cc254", + "file_size_bytes": 3385, + "id": "nmdc:4226d30b4f7d4018245613abbb2cc254", + "name": "gold:Gp0115670_KO TSV File", + "data_object_type": "Annotation KEGG Orthology", + "type": "nmdc:DataObject" }, { - "name": "Gp0115670_high-quality and medium-quality bins", - "description": "high-quality and medium-quality bins for Gp0115670", - "data_object_type": "Metagenome Bins", - "url": "https://data.microbiomedata.org/data/nmdc:mga0d7pj22/MAGs/nmdc_mga0d7pj22_hqmq_bin.zip", - "md5_checksum": "0875e5107d03a40832d15e5cf80adbbc", - "id": "nmdc:0875e5107d03a40832d15e5cf80adbbc", - "file_size_bytes": 1944800 + "_id": { + "$oid": "649b00401ae706d7b5b16d9b" + }, + "description": "Protein FAA for gold:Gp0115670", + "url": "https://data.microbiomedata.org/1781_86102/img_annotation/Ga0482257_proteins.faa", + "md5_checksum": "7e531f55eba2bd29d5bb4b1af8417b7c", + "file_size_bytes": 3385, + "id": "nmdc:7e531f55eba2bd29d5bb4b1af8417b7c", + "name": "gold:Gp0115670_Protein FAA", + "data_object_type": "Annotation Amino Acid FASTA", + "type": "nmdc:DataObject" }, { - "name": "Gp0115670_metabat2 bins", - "description": "metabat2 bins for Gp0115670", - "url": "https://data.microbiomedata.org/data/nmdc:mga0d7pj22/MAGs/nmdc_mga0d7pj22_metabat_bin.zip", - "md5_checksum": "9b60c7c905d34e08427781eafbce9b12", - "id": "nmdc:9b60c7c905d34e08427781eafbce9b12", - "file_size_bytes": 658258 + "_id": { + "$oid": "649b00401ae706d7b5b16d9c" + }, + "description": "Functional annotation GFF file for gold:Gp0115670", + "url": "https://data.microbiomedata.org/1781_86102/img_annotation/Ga0482257_functional_annotation.gff", + "md5_checksum": "75a1e23a29f8b793c0b0abb7778d8661", + "file_size_bytes": 3385, + "id": "nmdc:75a1e23a29f8b793c0b0abb7778d8661", + "name": "gold:Gp0115670_Functional annotation GFF file", + "data_object_type": "Functional Annotation GFF", + "type": "nmdc:DataObject" + }, + { + "_id": { + "$oid": "649b00401ae706d7b5b16d9d" + }, + "description": "Structural annotation GFF file for gold:Gp0115670", + "url": "https://data.microbiomedata.org/1781_86102/img_annotation/Ga0482257_structural_annotation.gff", + "md5_checksum": "f05ecf0db08d716edb7a3f499582a2b7", + "file_size_bytes": 3385, + "id": "nmdc:f05ecf0db08d716edb7a3f499582a2b7", + "name": "gold:Gp0115670_Structural annotation GFF file", + "data_object_type": "Structural Annotation GFF", + "type": "nmdc:DataObject" } ], "dissolving_activity_set": [], @@ -14519,6 +20045,331 @@ "md5_checksum": "1a29af6f30c21f38b25e4553605f50ef", "id": "nmdc:1a29af6f30c21f38b25e4553605f50ef", "file_size_bytes": 469326 + }, + { + "_id": { + "$oid": "649b003c1ae706d7b5b14d94" + }, + "description": "Metagenome Contig Coverage Stats for gold:Gp0115674", + "url": "https://data.microbiomedata.org/data/1781_86104/assembly/mapping_stats.txt", + "file_size_bytes": 10283424, + "type": "nmdc:DataObject", + "id": "nmdc:dafe01b902d5308bc53a143024f4c0be", + "name": "mapping_stats.txt", + "data_object_type": "Assembly Coverage Stats" + }, + { + "_id": { + "$oid": "649b003c1ae706d7b5b14d95" + }, + "description": "Assembled scaffold fasta for gold:Gp0115674", + "url": "https://data.microbiomedata.org/data/1781_86104/assembly/assembly_scaffolds.fna", + "file_size_bytes": 77571545, + "type": "nmdc:DataObject", + "id": "nmdc:52e5a91a8c71575c66793012fbdc1d38", + "name": "assembly_scaffolds.fna", + "data_object_type": "Assembly Scaffolds" + }, + { + "_id": { + "$oid": "649b003c1ae706d7b5b14d97" + }, + "description": "Assembled contigs fasta for gold:Gp0115674", + "url": "https://data.microbiomedata.org/data/1781_86104/assembly/assembly_contigs.fna", + "file_size_bytes": 77989885, + "type": "nmdc:DataObject", + "id": "nmdc:1689f2f2e14c55ab5d2af78ad3eb99bd", + "name": "assembly_contigs.fna", + "data_object_type": "Assembly Contigs" + }, + { + "_id": { + "$oid": "649b003c1ae706d7b5b14d98" + }, + "description": "Assembled AGP file for gold:Gp0115674", + "url": "https://data.microbiomedata.org/data/1781_86104/assembly/assembly.agp", + "file_size_bytes": 8855354, + "type": "nmdc:DataObject", + "id": "nmdc:e3b48b89ae6f02705022bf443f649bc2", + "name": "assembly.agp", + "data_object_type": "Assembly AGP" + }, + { + "_id": { + "$oid": "649b003c1ae706d7b5b14d99" + }, + "description": "Metagenome Alignment BAM file for gold:Gp0115674", + "url": "https://data.microbiomedata.org/data/1781_86104/assembly/pairedMapped_sorted.bam", + "file_size_bytes": 2269403358, + "type": "nmdc:DataObject", + "id": "nmdc:0e448fc98b179d70a76f38beb90171cf", + "name": "pairedMapped_sorted.bam", + "data_object_type": "Assembly Coverage BAM" + }, + { + "_id": { + "$oid": "649b003d1ae706d7b5b159de" + }, + "id": "nmdc:d9d2c48e8e6cc1e9111eba4cd5aa44ce", + "name": "1781_86104.krona.html", + "description": "Gold:Gp0115674 KRONA plot HTML file", + "url": "https://data.microbiomedata.org/1781_86104/ReadbasedAnalysis/centrifuge/1781_86104.krona.html", + "file_size_bytes": 3385, + "data_object_type": "Centrifuge Krona Plot", + "type": "nmdc:DataObject" + }, + { + "_id": { + "$oid": "649b003d1ae706d7b5b159e3" + }, + "id": "nmdc:ede7eba8751ecdb4bde5cdbded5598a8", + "name": "1781_86104.json", + "description": "Gold:Gp0115674 ReadbasedAnalysis result JSON file", + "url": "https://data.microbiomedata.org/1781_86104/ReadbasedAnalysis/1781_86104.json", + "file_size_bytes": 3385, + "type": "nmdc:DataObject" + }, + { + "_id": { + "$oid": "649b003f1ae706d7b5b162b0" + }, + "id": "nmdc:4164b9671b26c93cf3580eff524af8a6", + "name": "bins.tooShort.fa", + "description": "tooShort (< 3kb) filtered contigs fasta file by metaBat2 for gold:Gp0115674", + "file_size_bytes": 54726629, + "url": "https://data.microbiomedata.org/data/1781_86104/img_MAGs/bins.tooShort.fa", + "type": "nmdc:DataObject" + }, + { + "_id": { + "$oid": "649b003f1ae706d7b5b162b1" + }, + "id": "nmdc:ecb95407253379e53508c4a5d200ae4e", + "name": "bins.unbinned.fa", + "description": "unbinned fasta file from metabat2 for gold:Gp0115674", + "file_size_bytes": 12697686, + "url": "https://data.microbiomedata.org/data/1781_86104/img_MAGs/bins.unbinned.fa", + "type": "nmdc:DataObject" + }, + { + "_id": { + "$oid": "649b003f1ae706d7b5b162b2" + }, + "id": "nmdc:44451dde40f3facbdb6357985448cb9f", + "name": "gold:Gp0115674.bins.9.fa", + "description": "metabat2 binned contig file for gold:Gp0115674", + "file_size_bytes": 2583019, + "url": "https://data.microbiomedata.org/data/1781_86104/img_MAGs/metabat-bins/bins.9.fa", + "type": "nmdc:DataObject", + "data_object_type": "Metagenome Bins" + }, + { + "_id": { + "$oid": "649b003f1ae706d7b5b162b3" + }, + "id": "nmdc:36a4a672be5f95492ec7b48b501bc666", + "name": "checkm_qa.out", + "description": "metabat2 bin checkm quality assessment result for gold:Gp0115674", + "file_size_bytes": 2550, + "url": "https://data.microbiomedata.org/data/1781_86104/img_MAGs/checkm_qa.out", + "type": "nmdc:DataObject", + "data_object_type": "CheckM Statistics" + }, + { + "_id": { + "$oid": "649b003f1ae706d7b5b162b4" + }, + "id": "nmdc:ffcb32d9c9e558bdbd8827712761d752", + "name": "gold:Gp0115674.bins.5.fa", + "description": "metabat2 binned contig file for gold:Gp0115674", + "file_size_bytes": 897656, + "url": "https://data.microbiomedata.org/data/1781_86104/img_MAGs/metabat-bins/bins.5.fa", + "type": "nmdc:DataObject", + "data_object_type": "Metagenome Bins" + }, + { + "_id": { + "$oid": "649b003f1ae706d7b5b162b5" + }, + "id": "nmdc:7c49f6fc2918adadd95fac344eb321f9", + "name": "gold:Gp0115674.bins.11.fa", + "description": "metabat2 binned contig file for gold:Gp0115674", + "file_size_bytes": 891103, + "url": "https://data.microbiomedata.org/data/1781_86104/img_MAGs/metabat-bins/bins.11.fa", + "type": "nmdc:DataObject", + "data_object_type": "Metagenome Bins" + }, + { + "_id": { + "$oid": "649b003f1ae706d7b5b162b6" + }, + "id": "nmdc:e8abf1d316f52b3d1234bbaa8cf33c82", + "name": "gold:Gp0115674.bins.3.fa", + "description": "metabat2 binned contig file for gold:Gp0115674", + "file_size_bytes": 1084295, + "url": "https://data.microbiomedata.org/data/1781_86104/img_MAGs/metabat-bins/bins.3.fa", + "type": "nmdc:DataObject", + "data_object_type": "Metagenome Bins" + }, + { + "_id": { + "$oid": "649b003f1ae706d7b5b162b7" + }, + "id": "nmdc:612b7364e691619f08e480035620ddb3", + "name": "gold:Gp0115674.bins.4.fa", + "description": "metabat2 binned contig file for gold:Gp0115674", + "file_size_bytes": 221915, + "url": "https://data.microbiomedata.org/data/1781_86104/img_MAGs/metabat-bins/bins.4.fa", + "type": "nmdc:DataObject", + "data_object_type": "Metagenome Bins" + }, + { + "_id": { + "$oid": "649b003f1ae706d7b5b162b8" + }, + "id": "nmdc:56823e8ec01d84f84c1f44ba0020cefd", + "name": "gold:Gp0115674.bins.2.fa", + "description": "metabat2 binned contig file for gold:Gp0115674", + "file_size_bytes": 1378648, + "url": "https://data.microbiomedata.org/data/1781_86104/img_MAGs/metabat-bins/bins.2.fa", + "type": "nmdc:DataObject", + "data_object_type": "Metagenome Bins" + }, + { + "_id": { + "$oid": "649b003f1ae706d7b5b162b9" + }, + "id": "nmdc:f8f2d349759dfc70e53916dd7e07d796", + "name": "gtdbtk.bac120.summary.tsv", + "description": "gtdbtk bacterial assignment result summary table for gold:Gp0115674", + "file_size_bytes": 4807, + "url": "https://data.microbiomedata.org/data/1781_86104/img_MAGs/gtdbtk_output/classify/gtdbtk.bac120.summary.tsv", + "type": "nmdc:DataObject" + }, + { + "_id": { + "$oid": "649b003f1ae706d7b5b162ba" + }, + "id": "nmdc:8e481f201f444f86ce93109f8f25c356", + "name": "gold:Gp0115674.bins.7.fa", + "description": "metabat2 binned contig file for gold:Gp0115674", + "file_size_bytes": 571743, + "url": "https://data.microbiomedata.org/data/1781_86104/img_MAGs/metabat-bins/bins.7.fa", + "type": "nmdc:DataObject", + "data_object_type": "Metagenome Bins" + }, + { + "_id": { + "$oid": "649b003f1ae706d7b5b162bb" + }, + "id": "nmdc:3d38153edc983c24deb7609306105632", + "name": "gold:Gp0115674.bins.10.fa", + "description": "metabat2 binned contig file for gold:Gp0115674", + "file_size_bytes": 637469, + "url": "https://data.microbiomedata.org/data/1781_86104/img_MAGs/metabat-bins/bins.10.fa", + "type": "nmdc:DataObject", + "data_object_type": "Metagenome Bins" + }, + { + "_id": { + "$oid": "649b003f1ae706d7b5b162bc" + }, + "id": "nmdc:2e8fbb22de466ca13ed4441fee49faab", + "name": "gold:Gp0115674.bins.1.fa", + "description": "metabat2 binned contig file for gold:Gp0115674", + "file_size_bytes": 589376, + "url": "https://data.microbiomedata.org/data/1781_86104/img_MAGs/metabat-bins/bins.1.fa", + "type": "nmdc:DataObject", + "data_object_type": "Metagenome Bins" + }, + { + "_id": { + "$oid": "649b003f1ae706d7b5b162be" + }, + "id": "nmdc:448e30e675ef810514b1e091992df2fc", + "name": "gold:Gp0115674.bins.8.fa", + "description": "metabat2 binned contig file for gold:Gp0115674", + "file_size_bytes": 318161, + "url": "https://data.microbiomedata.org/data/1781_86104/img_MAGs/metabat-bins/bins.8.fa", + "type": "nmdc:DataObject", + "data_object_type": "Metagenome Bins" + }, + { + "_id": { + "$oid": "649b003f1ae706d7b5b162bf" + }, + "id": "nmdc:f7ee01c219b7141044ca7338877ddf5e", + "name": "gold:Gp0115674.bins.6.fa", + "description": "metabat2 binned contig file for gold:Gp0115674", + "file_size_bytes": 412815, + "url": "https://data.microbiomedata.org/data/1781_86104/img_MAGs/metabat-bins/bins.6.fa", + "type": "nmdc:DataObject", + "data_object_type": "Metagenome Bins" + }, + { + "_id": { + "$oid": "649b00401ae706d7b5b16da3" + }, + "description": "EC TSV File for gold:Gp0115674", + "url": "https://data.microbiomedata.org/1781_86104/img_annotation/Ga0482253_ec.tsv", + "md5_checksum": "72ede7603b72206d929c03364769021c", + "file_size_bytes": 3385, + "id": "nmdc:72ede7603b72206d929c03364769021c", + "name": "gold:Gp0115674_EC TSV File", + "data_object_type": "Annotation Enzyme Commission", + "type": "nmdc:DataObject" + }, + { + "_id": { + "$oid": "649b00401ae706d7b5b16da4" + }, + "description": "KO TSV File for gold:Gp0115674", + "url": "https://data.microbiomedata.org/1781_86104/img_annotation/Ga0482253_ko.tsv", + "md5_checksum": "9c248ab2a22c7b49060e544f37b9c798", + "file_size_bytes": 3385, + "id": "nmdc:9c248ab2a22c7b49060e544f37b9c798", + "name": "gold:Gp0115674_KO TSV File", + "data_object_type": "Annotation KEGG Orthology", + "type": "nmdc:DataObject" + }, + { + "_id": { + "$oid": "649b00401ae706d7b5b16da5" + }, + "description": "Functional annotation GFF file for gold:Gp0115674", + "url": "https://data.microbiomedata.org/1781_86104/img_annotation/Ga0482253_functional_annotation.gff", + "md5_checksum": "876382e7107a83b87a059e4e961bff75", + "file_size_bytes": 3385, + "id": "nmdc:876382e7107a83b87a059e4e961bff75", + "name": "gold:Gp0115674_Functional annotation GFF file", + "data_object_type": "Functional Annotation GFF", + "type": "nmdc:DataObject" + }, + { + "_id": { + "$oid": "649b00401ae706d7b5b16dac" + }, + "description": "Structural annotation GFF file for gold:Gp0115674", + "url": "https://data.microbiomedata.org/1781_86104/img_annotation/Ga0482253_structural_annotation.gff", + "md5_checksum": "17f2fbdeb3f5891c37f2e9e43a40c7b1", + "file_size_bytes": 3385, + "id": "nmdc:17f2fbdeb3f5891c37f2e9e43a40c7b1", + "name": "gold:Gp0115674_Structural annotation GFF file", + "data_object_type": "Structural Annotation GFF", + "type": "nmdc:DataObject" + }, + { + "_id": { + "$oid": "649b00401ae706d7b5b16db0" + }, + "description": "Protein FAA for gold:Gp0115674", + "url": "https://data.microbiomedata.org/1781_86104/img_annotation/Ga0482253_proteins.faa", + "md5_checksum": "c70d6973abeb3ee231d3e38c3c5dced4", + "file_size_bytes": 3385, + "id": "nmdc:c70d6973abeb3ee231d3e38c3c5dced4", + "name": "gold:Gp0115674_Protein FAA", + "data_object_type": "Annotation Amino Acid FASTA", + "type": "nmdc:DataObject" } ], "dissolving_activity_set": [], @@ -15114,30 +20965,236 @@ "file_size_bytes": 8810307 }, { - "name": "Gp0115673_metabat2 bin checkm quality assessment result", - "description": "metabat2 bin checkm quality assessment result for Gp0115673", - "data_object_type": "CheckM Statistics", - "url": "https://data.microbiomedata.org/data/nmdc:mga0kpja70/MAGs/nmdc_mga0kpja70_checkm_qa.out", - "md5_checksum": "af15089c0cb19ec9bd65f98e59dc94f1", - "id": "nmdc:af15089c0cb19ec9bd65f98e59dc94f1", - "file_size_bytes": 942 + "name": "Gp0115673_metabat2 bin checkm quality assessment result", + "description": "metabat2 bin checkm quality assessment result for Gp0115673", + "data_object_type": "CheckM Statistics", + "url": "https://data.microbiomedata.org/data/nmdc:mga0kpja70/MAGs/nmdc_mga0kpja70_checkm_qa.out", + "md5_checksum": "af15089c0cb19ec9bd65f98e59dc94f1", + "id": "nmdc:af15089c0cb19ec9bd65f98e59dc94f1", + "file_size_bytes": 942 + }, + { + "name": "Gp0115673_high-quality and medium-quality bins", + "description": "high-quality and medium-quality bins for Gp0115673", + "data_object_type": "Metagenome Bins", + "url": "https://data.microbiomedata.org/data/nmdc:mga0kpja70/MAGs/nmdc_mga0kpja70_hqmq_bin.zip", + "md5_checksum": "70d3f2afd9f32a2bdaa81a6fc547f6fb", + "id": "nmdc:70d3f2afd9f32a2bdaa81a6fc547f6fb", + "file_size_bytes": 182 + }, + { + "name": "Gp0115673_metabat2 bins", + "description": "metabat2 bins for Gp0115673", + "url": "https://data.microbiomedata.org/data/nmdc:mga0kpja70/MAGs/nmdc_mga0kpja70_metabat_bin.zip", + "md5_checksum": "f40d84a4fc0c87d76c144777f9e8a8ea", + "id": "nmdc:f40d84a4fc0c87d76c144777f9e8a8ea", + "file_size_bytes": 1658458 + }, + { + "_id": { + "$oid": "649b003c1ae706d7b5b14d8c" + }, + "description": "Metagenome Contig Coverage Stats for gold:Gp0115673", + "url": "https://data.microbiomedata.org/data/1781_86091/assembly/mapping_stats.txt", + "file_size_bytes": 6599486, + "type": "nmdc:DataObject", + "id": "nmdc:c8b6932baf9efa891ba3ef22cdfc747f", + "name": "mapping_stats.txt", + "data_object_type": "Assembly Coverage Stats" + }, + { + "_id": { + "$oid": "649b003c1ae706d7b5b14d90" + }, + "description": "Assembled AGP file for gold:Gp0115673", + "url": "https://data.microbiomedata.org/data/1781_86091/assembly/assembly.agp", + "file_size_bytes": 5657846, + "type": "nmdc:DataObject", + "id": "nmdc:8f202f5c73cded42a0ee74842d99d453", + "name": "assembly.agp", + "data_object_type": "Assembly AGP" + }, + { + "_id": { + "$oid": "649b003c1ae706d7b5b14d91" + }, + "description": "Assembled contigs fasta for gold:Gp0115673", + "url": "https://data.microbiomedata.org/data/1781_86091/assembly/assembly_contigs.fna", + "file_size_bytes": 49161128, + "type": "nmdc:DataObject", + "id": "nmdc:b2f2d476b77fca0725cb68b0305ea3b0", + "name": "assembly_contigs.fna", + "data_object_type": "Assembly Contigs" + }, + { + "_id": { + "$oid": "649b003c1ae706d7b5b14d93" + }, + "description": "Metagenome Alignment BAM file for gold:Gp0115673", + "url": "https://data.microbiomedata.org/data/1781_86091/assembly/pairedMapped_sorted.bam", + "file_size_bytes": 1579984662, + "type": "nmdc:DataObject", + "id": "nmdc:7a768ecc03a7f9bf2f48e0ff038e286c", + "name": "pairedMapped_sorted.bam", + "data_object_type": "Assembly Coverage BAM" + }, + { + "_id": { + "$oid": "649b003c1ae706d7b5b14d96" + }, + "description": "Assembled scaffold fasta for gold:Gp0115673", + "url": "https://data.microbiomedata.org/data/1781_86091/assembly/assembly_scaffolds.fna", + "file_size_bytes": 48890657, + "type": "nmdc:DataObject", + "id": "nmdc:af2802220167f0c190a161f58e7140ef", + "name": "assembly_scaffolds.fna", + "data_object_type": "Assembly Scaffolds" + }, + { + "_id": { + "$oid": "649b003d1ae706d7b5b159d4" + }, + "id": "nmdc:54ab9f23cfb3900421112f1c63981d19", + "name": "1781_86091.krona.html", + "description": "Gold:Gp0115673 KRONA plot HTML file", + "url": "https://data.microbiomedata.org/1781_86091/ReadbasedAnalysis/centrifuge/1781_86091.krona.html", + "file_size_bytes": 3385, + "data_object_type": "Centrifuge Krona Plot", + "type": "nmdc:DataObject" + }, + { + "_id": { + "$oid": "649b003d1ae706d7b5b159da" + }, + "id": "nmdc:b92cba553fb3b7f7488f9cf0153170a4", + "name": "1781_86091.json", + "description": "Gold:Gp0115673 ReadbasedAnalysis result JSON file", + "url": "https://data.microbiomedata.org/1781_86091/ReadbasedAnalysis/1781_86091.json", + "file_size_bytes": 3385, + "type": "nmdc:DataObject" + }, + { + "_id": { + "$oid": "649b003f1ae706d7b5b162ab" + }, + "id": "nmdc:64d4b2d627893f1add14860728cce4dd", + "name": "bins.tooShort.fa", + "description": "tooShort (< 3kb) filtered contigs fasta file by metaBat2 for gold:Gp0115673", + "file_size_bytes": 34259785, + "url": "https://data.microbiomedata.org/data/1781_86091/img_MAGs/bins.tooShort.fa", + "type": "nmdc:DataObject" + }, + { + "_id": { + "$oid": "649b003f1ae706d7b5b162ac" + }, + "id": "nmdc:844cbe586fb4d8c7523f5e48bcf269e4", + "name": "bins.unbinned.fa", + "description": "unbinned fasta file from metabat2 for gold:Gp0115673", + "file_size_bytes": 9383451, + "url": "https://data.microbiomedata.org/data/1781_86091/img_MAGs/bins.unbinned.fa", + "type": "nmdc:DataObject" + }, + { + "_id": { + "$oid": "649b003f1ae706d7b5b162ad" + }, + "id": "nmdc:51877a97315cae458f13c66d23bb5938", + "name": "checkm_qa.out", + "description": "metabat2 bin checkm quality assessment result for gold:Gp0115673", + "file_size_bytes": 936, + "url": "https://data.microbiomedata.org/data/1781_86091/img_MAGs/checkm_qa.out", + "type": "nmdc:DataObject", + "data_object_type": "CheckM Statistics" + }, + { + "_id": { + "$oid": "649b003f1ae706d7b5b162ae" + }, + "id": "nmdc:9234a7d807019d0678be49a2b0bf8902", + "name": "gold:Gp0115673.bins.1.fa", + "description": "metabat2 binned contig file for gold:Gp0115673", + "file_size_bytes": 3819274, + "url": "https://data.microbiomedata.org/data/1781_86091/img_MAGs/metabat-bins/bins.1.fa", + "type": "nmdc:DataObject", + "data_object_type": "Metagenome Bins" + }, + { + "_id": { + "$oid": "649b003f1ae706d7b5b162af" + }, + "id": "nmdc:99160995b52b9234959f882fee6d2a6b", + "name": "gold:Gp0115673.bins.2.fa", + "description": "metabat2 binned contig file for gold:Gp0115673", + "file_size_bytes": 991444, + "url": "https://data.microbiomedata.org/data/1781_86091/img_MAGs/metabat-bins/bins.2.fa", + "type": "nmdc:DataObject", + "data_object_type": "Metagenome Bins" + }, + { + "_id": { + "$oid": "649b00401ae706d7b5b16d6b" + }, + "description": "Structural annotation GFF file for gold:Gp0115673", + "url": "https://data.microbiomedata.org/1781_86091/img_annotation/Ga0482254_structural_annotation.gff", + "md5_checksum": "2fba563f11988f4e30d2b4283c3c5487", + "file_size_bytes": 3385, + "id": "nmdc:2fba563f11988f4e30d2b4283c3c5487", + "name": "gold:Gp0115673_Structural annotation GFF file", + "data_object_type": "Structural Annotation GFF", + "type": "nmdc:DataObject" + }, + { + "_id": { + "$oid": "649b00401ae706d7b5b16d6d" + }, + "description": "EC TSV File for gold:Gp0115673", + "url": "https://data.microbiomedata.org/1781_86091/img_annotation/Ga0482254_ec.tsv", + "md5_checksum": "da4d331daa6d5965be8e201c3c9ba4d4", + "file_size_bytes": 3385, + "id": "nmdc:da4d331daa6d5965be8e201c3c9ba4d4", + "name": "gold:Gp0115673_EC TSV File", + "data_object_type": "Annotation Enzyme Commission", + "type": "nmdc:DataObject" }, { - "name": "Gp0115673_high-quality and medium-quality bins", - "description": "high-quality and medium-quality bins for Gp0115673", - "data_object_type": "Metagenome Bins", - "url": "https://data.microbiomedata.org/data/nmdc:mga0kpja70/MAGs/nmdc_mga0kpja70_hqmq_bin.zip", - "md5_checksum": "70d3f2afd9f32a2bdaa81a6fc547f6fb", - "id": "nmdc:70d3f2afd9f32a2bdaa81a6fc547f6fb", - "file_size_bytes": 182 + "_id": { + "$oid": "649b00401ae706d7b5b16d93" + }, + "description": "Functional annotation GFF file for gold:Gp0115673", + "url": "https://data.microbiomedata.org/1781_86091/img_annotation/Ga0482254_functional_annotation.gff", + "md5_checksum": "b7264d7a1c56fc32c4a0c050fe04208e", + "file_size_bytes": 3385, + "id": "nmdc:b7264d7a1c56fc32c4a0c050fe04208e", + "name": "gold:Gp0115673_Functional annotation GFF file", + "data_object_type": "Functional Annotation GFF", + "type": "nmdc:DataObject" }, { - "name": "Gp0115673_metabat2 bins", - "description": "metabat2 bins for Gp0115673", - "url": "https://data.microbiomedata.org/data/nmdc:mga0kpja70/MAGs/nmdc_mga0kpja70_metabat_bin.zip", - "md5_checksum": "f40d84a4fc0c87d76c144777f9e8a8ea", - "id": "nmdc:f40d84a4fc0c87d76c144777f9e8a8ea", - "file_size_bytes": 1658458 + "_id": { + "$oid": "649b00401ae706d7b5b16dbb" + }, + "description": "Protein FAA for gold:Gp0115673", + "url": "https://data.microbiomedata.org/1781_86091/img_annotation/Ga0482254_proteins.faa", + "md5_checksum": "d325906b9b82b3bfc2fe8ed7321a828e", + "file_size_bytes": 3385, + "id": "nmdc:d325906b9b82b3bfc2fe8ed7321a828e", + "name": "gold:Gp0115673_Protein FAA", + "data_object_type": "Annotation Amino Acid FASTA", + "type": "nmdc:DataObject" + }, + { + "_id": { + "$oid": "649b00401ae706d7b5b16dd0" + }, + "description": "KO TSV File for gold:Gp0115673", + "url": "https://data.microbiomedata.org/1781_86091/img_annotation/Ga0482254_ko.tsv", + "md5_checksum": "73cac6bcbfa2627ab291bf230ded9748", + "file_size_bytes": 3385, + "id": "nmdc:73cac6bcbfa2627ab291bf230ded9748", + "name": "gold:Gp0115673_KO TSV File", + "data_object_type": "Annotation KEGG Orthology", + "type": "nmdc:DataObject" } ], "dissolving_activity_set": [], @@ -15716,6 +21773,212 @@ "md5_checksum": "c484ee1e530a0c9b47069c0288110e47", "id": "nmdc:c484ee1e530a0c9b47069c0288110e47", "file_size_bytes": 444082 + }, + { + "_id": { + "$oid": "649b003c1ae706d7b5b14d83" + }, + "description": "Metagenome Contig Coverage Stats for gold:Gp0115671", + "url": "https://data.microbiomedata.org/data/1781_86095/assembly/mapping_stats.txt", + "file_size_bytes": 8863080, + "type": "nmdc:DataObject", + "id": "nmdc:b422a9fcf9c3fb738a67d9b007e6e063", + "name": "mapping_stats.txt", + "data_object_type": "Assembly Coverage Stats" + }, + { + "_id": { + "$oid": "649b003c1ae706d7b5b14d86" + }, + "description": "Assembled contigs fasta for gold:Gp0115671", + "url": "https://data.microbiomedata.org/data/1781_86095/assembly/assembly_contigs.fna", + "file_size_bytes": 58143080, + "type": "nmdc:DataObject", + "id": "nmdc:cbbbd9da9ae7fc0d7cd3ad507977a0fe", + "name": "assembly_contigs.fna", + "data_object_type": "Assembly Contigs" + }, + { + "_id": { + "$oid": "649b003c1ae706d7b5b14d88" + }, + "description": "Metagenome Alignment BAM file for gold:Gp0115671", + "url": "https://data.microbiomedata.org/data/1781_86095/assembly/pairedMapped_sorted.bam", + "file_size_bytes": 1911271746, + "type": "nmdc:DataObject", + "id": "nmdc:0594950317ff722111f4ffd3a11304ab", + "name": "pairedMapped_sorted.bam", + "data_object_type": "Assembly Coverage BAM" + }, + { + "_id": { + "$oid": "649b003c1ae706d7b5b14d89" + }, + "description": "Assembled scaffold fasta for gold:Gp0115671", + "url": "https://data.microbiomedata.org/data/1781_86095/assembly/assembly_scaffolds.fna", + "file_size_bytes": 57781170, + "type": "nmdc:DataObject", + "id": "nmdc:84d2fa8698a27a1b5b5e493494863296", + "name": "assembly_scaffolds.fna", + "data_object_type": "Assembly Scaffolds" + }, + { + "_id": { + "$oid": "649b003c1ae706d7b5b14d8a" + }, + "description": "Assembled AGP file for gold:Gp0115671", + "url": "https://data.microbiomedata.org/data/1781_86095/assembly/assembly.agp", + "file_size_bytes": 7616352, + "type": "nmdc:DataObject", + "id": "nmdc:49e748d2c3a8f4aaeb65019da319287e", + "name": "assembly.agp", + "data_object_type": "Assembly AGP" + }, + { + "_id": { + "$oid": "649b003d1ae706d7b5b159c0" + }, + "id": "nmdc:dd2f65c7b8ae6d5b3348968d354fb744", + "name": "1781_86095.krona.html", + "description": "Gold:Gp0115671 KRONA plot HTML file", + "url": "https://data.microbiomedata.org/1781_86095/ReadbasedAnalysis/centrifuge/1781_86095.krona.html", + "file_size_bytes": 3385, + "data_object_type": "Centrifuge Krona Plot", + "type": "nmdc:DataObject" + }, + { + "_id": { + "$oid": "649b003d1ae706d7b5b159cd" + }, + "id": "nmdc:60800e393e9c757603261909577320b6", + "name": "1781_86095.json", + "description": "Gold:Gp0115671 ReadbasedAnalysis result JSON file", + "url": "https://data.microbiomedata.org/1781_86095/ReadbasedAnalysis/1781_86095.json", + "file_size_bytes": 3385, + "type": "nmdc:DataObject" + }, + { + "_id": { + "$oid": "649b003f1ae706d7b5b16292" + }, + "id": "nmdc:0f654b06229134dbe8dca13a709b9575", + "name": "bins.tooShort.fa", + "description": "tooShort (< 3kb) filtered contigs fasta file by metaBat2 for gold:Gp0115671", + "file_size_bytes": 46779370, + "url": "https://data.microbiomedata.org/data/1781_86095/img_MAGs/bins.tooShort.fa", + "type": "nmdc:DataObject" + }, + { + "_id": { + "$oid": "649b003f1ae706d7b5b16295" + }, + "id": "nmdc:1bf83c5dc0174021cd428b7354033bb8", + "name": "bins.unbinned.fa", + "description": "unbinned fasta file from metabat2 for gold:Gp0115671", + "file_size_bytes": 9883327, + "url": "https://data.microbiomedata.org/data/1781_86095/img_MAGs/bins.unbinned.fa", + "type": "nmdc:DataObject" + }, + { + "_id": { + "$oid": "649b003f1ae706d7b5b16296" + }, + "id": "nmdc:8c12b5ddb4bda80c1c255b2c887afb34", + "name": "gold:Gp0115671.bins.1.fa", + "description": "metabat2 binned contig file for gold:Gp0115671", + "file_size_bytes": 223419, + "url": "https://data.microbiomedata.org/data/1781_86095/img_MAGs/metabat-bins/bins.1.fa", + "type": "nmdc:DataObject", + "data_object_type": "Metagenome Bins" + }, + { + "_id": { + "$oid": "649b003f1ae706d7b5b16298" + }, + "id": "nmdc:d27e4b42b83c999df80390a378c2c189", + "name": "gold:Gp0115671.bins.2.fa", + "description": "metabat2 binned contig file for gold:Gp0115671", + "file_size_bytes": 405648, + "url": "https://data.microbiomedata.org/data/1781_86095/img_MAGs/metabat-bins/bins.2.fa", + "type": "nmdc:DataObject", + "data_object_type": "Metagenome Bins" + }, + { + "_id": { + "$oid": "649b003f1ae706d7b5b1629b" + }, + "id": "nmdc:2de7dcd5c53b16b1f2ea8e6006384dec", + "name": "checkm_qa.out", + "description": "metabat2 bin checkm quality assessment result for gold:Gp0115671", + "file_size_bytes": 930, + "url": "https://data.microbiomedata.org/data/1781_86095/img_MAGs/checkm_qa.out", + "type": "nmdc:DataObject", + "data_object_type": "CheckM Statistics" + }, + { + "_id": { + "$oid": "649b00401ae706d7b5b16d73" + }, + "description": "EC TSV File for gold:Gp0115671", + "url": "https://data.microbiomedata.org/1781_86095/img_annotation/Ga0482256_ec.tsv", + "md5_checksum": "75e88ab163c9d092836f9110768c6a52", + "file_size_bytes": 3385, + "id": "nmdc:75e88ab163c9d092836f9110768c6a52", + "name": "gold:Gp0115671_EC TSV File", + "data_object_type": "Annotation Enzyme Commission", + "type": "nmdc:DataObject" + }, + { + "_id": { + "$oid": "649b00401ae706d7b5b16d76" + }, + "description": "KO TSV File for gold:Gp0115671", + "url": "https://data.microbiomedata.org/1781_86095/img_annotation/Ga0482256_ko.tsv", + "md5_checksum": "9c6c644e821021661d936d374ee9fc1b", + "file_size_bytes": 3385, + "id": "nmdc:9c6c644e821021661d936d374ee9fc1b", + "name": "gold:Gp0115671_KO TSV File", + "data_object_type": "Annotation KEGG Orthology", + "type": "nmdc:DataObject" + }, + { + "_id": { + "$oid": "649b00401ae706d7b5b16d78" + }, + "description": "Functional annotation GFF file for gold:Gp0115671", + "url": "https://data.microbiomedata.org/1781_86095/img_annotation/Ga0482256_functional_annotation.gff", + "md5_checksum": "8f5a7f2db6790e67282439becd4c04b2", + "file_size_bytes": 3385, + "id": "nmdc:8f5a7f2db6790e67282439becd4c04b2", + "name": "gold:Gp0115671_Functional annotation GFF file", + "data_object_type": "Functional Annotation GFF", + "type": "nmdc:DataObject" + }, + { + "_id": { + "$oid": "649b00401ae706d7b5b16d7e" + }, + "description": "Protein FAA for gold:Gp0115671", + "url": "https://data.microbiomedata.org/1781_86095/img_annotation/Ga0482256_proteins.faa", + "md5_checksum": "f5a4336c7ac10e908cfe90a61a991c65", + "file_size_bytes": 3385, + "id": "nmdc:f5a4336c7ac10e908cfe90a61a991c65", + "name": "gold:Gp0115671_Protein FAA", + "data_object_type": "Annotation Amino Acid FASTA", + "type": "nmdc:DataObject" + }, + { + "_id": { + "$oid": "649b00401ae706d7b5b16d87" + }, + "description": "Structural annotation GFF file for gold:Gp0115671", + "url": "https://data.microbiomedata.org/1781_86095/img_annotation/Ga0482256_structural_annotation.gff", + "md5_checksum": "ad6e88d469fbad7b0684afb933403a6c", + "file_size_bytes": 3385, + "id": "nmdc:ad6e88d469fbad7b0684afb933403a6c", + "name": "gold:Gp0115671_Structural annotation GFF file", + "data_object_type": "Structural Annotation GFF", + "type": "nmdc:DataObject" } ], "dissolving_activity_set": [], @@ -16205,95 +22468,372 @@ "file_size_bytes": 31343624 }, { - "name": "Gp0115676_TigrFam GFF file", - "description": "TigrFam GFF file for Gp0115676", - "url": "https://data.microbiomedata.org/data/nmdc:mga0w3a067/annotation/nmdc_mga0w3a067_tigrfam.gff", - "md5_checksum": "4d0469ae5b27dd4045d637d2493ccba9", - "id": "nmdc:4d0469ae5b27dd4045d637d2493ccba9", - "file_size_bytes": 4260344 + "name": "Gp0115676_TigrFam GFF file", + "description": "TigrFam GFF file for Gp0115676", + "url": "https://data.microbiomedata.org/data/nmdc:mga0w3a067/annotation/nmdc_mga0w3a067_tigrfam.gff", + "md5_checksum": "4d0469ae5b27dd4045d637d2493ccba9", + "id": "nmdc:4d0469ae5b27dd4045d637d2493ccba9", + "file_size_bytes": 4260344 + }, + { + "name": "Gp0115676_SMART GFF file", + "description": "SMART GFF file for Gp0115676", + "url": "https://data.microbiomedata.org/data/nmdc:mga0w3a067/annotation/nmdc_mga0w3a067_smart.gff", + "md5_checksum": "a893783f6886e31b6bca5b6baede9f66", + "id": "nmdc:a893783f6886e31b6bca5b6baede9f66", + "file_size_bytes": 8240017 + }, + { + "name": "Gp0115676_SuperFam GFF file", + "description": "SuperFam GFF file for Gp0115676", + "url": "https://data.microbiomedata.org/data/nmdc:mga0w3a067/annotation/nmdc_mga0w3a067_supfam.gff", + "md5_checksum": "2225c723ccf0fd5ea309cfb5ca90d536", + "id": "nmdc:2225c723ccf0fd5ea309cfb5ca90d536", + "file_size_bytes": 48186264 + }, + { + "name": "Gp0115676_Cath FunFam GFF file", + "description": "Cath FunFam GFF file for Gp0115676", + "url": "https://data.microbiomedata.org/data/nmdc:mga0w3a067/annotation/nmdc_mga0w3a067_cath_funfam.gff", + "md5_checksum": "1abd69f8096f98174d95d9a3a13c2a3b", + "id": "nmdc:1abd69f8096f98174d95d9a3a13c2a3b", + "file_size_bytes": 38259823 + }, + { + "name": "Gp0115676_KO_EC GFF file", + "description": "KO_EC GFF file for Gp0115676", + "url": "https://data.microbiomedata.org/data/nmdc:mga0w3a067/annotation/nmdc_mga0w3a067_ko_ec.gff", + "md5_checksum": "83647c3e1ed96fda36f7c119a3e98182", + "id": "nmdc:83647c3e1ed96fda36f7c119a3e98182", + "file_size_bytes": 29337291 + }, + { + "name": "gold:Gp0452679_metabat2 bin checkm quality assessment result", + "description": "metabat2 bin checkm quality assessment result for gold:Gp0452679", + "data_object_type": "CheckM Statistics", + "url": "https://data.microbiomedata.org/data/nmdc:mga0fsjy84/MAGs/nmdc_mga0fsjy84_checkm_qa.out", + "md5_checksum": "d41d8cd98f00b204e9800998ecf8427e", + "id": "nmdc:d41d8cd98f00b204e9800998ecf8427e", + "file_size_bytes": 0 + }, + { + "name": "Gp0115676_tooShort (< 3kb) filtered contigs fasta file by metaBat2", + "description": "tooShort (< 3kb) filtered contigs fasta file by metaBat2 for Gp0115676", + "url": "https://data.microbiomedata.org/data/nmdc:mga0w3a067/MAGs/nmdc_mga0w3a067_bins.tooShort.fa", + "md5_checksum": "71667f3b8ee0cb5acadc541fa6914022", + "id": "nmdc:71667f3b8ee0cb5acadc541fa6914022", + "file_size_bytes": 75793492 + }, + { + "name": "Gp0115676_unbinned fasta file from metabat2", + "description": "unbinned fasta file from metabat2 for Gp0115676", + "url": "https://data.microbiomedata.org/data/nmdc:mga0w3a067/MAGs/nmdc_mga0w3a067_bins.unbinned.fa", + "md5_checksum": "0141a64077e0f18adc42cb1915a00fa2", + "id": "nmdc:0141a64077e0f18adc42cb1915a00fa2", + "file_size_bytes": 17366889 + }, + { + "name": "Gp0115676_metabat2 bin checkm quality assessment result", + "description": "metabat2 bin checkm quality assessment result for Gp0115676", + "data_object_type": "CheckM Statistics", + "url": "https://data.microbiomedata.org/data/nmdc:mga0w3a067/MAGs/nmdc_mga0w3a067_checkm_qa.out", + "md5_checksum": "982b47616dde63a388400fcc57d7c5b0", + "id": "nmdc:982b47616dde63a388400fcc57d7c5b0", + "file_size_bytes": 1700 + }, + { + "name": "Gp0115676_high-quality and medium-quality bins", + "description": "high-quality and medium-quality bins for Gp0115676", + "data_object_type": "Metagenome Bins", + "url": "https://data.microbiomedata.org/data/nmdc:mga0w3a067/MAGs/nmdc_mga0w3a067_hqmq_bin.zip", + "md5_checksum": "313eb61bc7577e272eca6332e923f9c4", + "id": "nmdc:313eb61bc7577e272eca6332e923f9c4", + "file_size_bytes": 677741 + }, + { + "name": "Gp0115676_metabat2 bins", + "description": "metabat2 bins for Gp0115676", + "url": "https://data.microbiomedata.org/data/nmdc:mga0w3a067/MAGs/nmdc_mga0w3a067_metabat_bin.zip", + "md5_checksum": "763eb40a8905e9b0d459c45222f1b05e", + "id": "nmdc:763eb40a8905e9b0d459c45222f1b05e", + "file_size_bytes": 2885722 + }, + { + "_id": { + "$oid": "649b003c1ae706d7b5b14d9e" + }, + "description": "Metagenome Contig Coverage Stats for gold:Gp0115676", + "url": "https://data.microbiomedata.org/data/1781_86099/assembly/mapping_stats.txt", + "file_size_bytes": 13876163, + "type": "nmdc:DataObject", + "id": "nmdc:96941ca922d1e71c5651c276dae2951e", + "name": "mapping_stats.txt", + "data_object_type": "Assembly Coverage Stats" + }, + { + "_id": { + "$oid": "649b003d1ae706d7b5b14da0" + }, + "description": "Assembled scaffold fasta for gold:Gp0115676", + "url": "https://data.microbiomedata.org/data/1781_86099/assembly/assembly_scaffolds.fna", + "file_size_bytes": 103511507, + "type": "nmdc:DataObject", + "id": "nmdc:f6a39ee8aee7ca6e8d4b3a351af5097e", + "name": "assembly_scaffolds.fna", + "data_object_type": "Assembly Scaffolds" + }, + { + "_id": { + "$oid": "649b003d1ae706d7b5b14da1" + }, + "description": "Assembled AGP file for gold:Gp0115676", + "url": "https://data.microbiomedata.org/data/1781_86099/assembly/assembly.agp", + "file_size_bytes": 11980587, + "type": "nmdc:DataObject", + "id": "nmdc:665e21a0e5c6dc4de9165db7fc04944b", + "name": "assembly.agp", + "data_object_type": "Assembly AGP" + }, + { + "_id": { + "$oid": "649b003d1ae706d7b5b14da2" + }, + "description": "Metagenome Alignment BAM file for gold:Gp0115676", + "url": "https://data.microbiomedata.org/data/1781_86099/assembly/pairedMapped_sorted.bam", + "file_size_bytes": 3317390706, + "type": "nmdc:DataObject", + "id": "nmdc:7b206a8925a1ea97bf5cfbbafd4c1331", + "name": "pairedMapped_sorted.bam", + "data_object_type": "Assembly Coverage BAM" + }, + { + "_id": { + "$oid": "649b003d1ae706d7b5b14da4" + }, + "description": "Assembled contigs fasta for gold:Gp0115676", + "url": "https://data.microbiomedata.org/data/1781_86099/assembly/assembly_contigs.fna", + "file_size_bytes": 104075065, + "type": "nmdc:DataObject", + "id": "nmdc:6525bd7de120f6ed4dd75069d597f261", + "name": "assembly_contigs.fna", + "data_object_type": "Assembly Contigs" + }, + { + "_id": { + "$oid": "649b003d1ae706d7b5b159f1" + }, + "id": "nmdc:7bfbfbfea6176042739cd5079cda14bd", + "name": "1781_86099.krona.html", + "description": "Gold:Gp0115676 KRONA plot HTML file", + "url": "https://data.microbiomedata.org/1781_86099/ReadbasedAnalysis/centrifuge/1781_86099.krona.html", + "file_size_bytes": 3385, + "data_object_type": "Centrifuge Krona Plot", + "type": "nmdc:DataObject" + }, + { + "_id": { + "$oid": "649b003d1ae706d7b5b159f5" + }, + "id": "nmdc:e98001eea268f1373182f7b83d43ab1f", + "name": "1781_86099.json", + "description": "Gold:Gp0115676 ReadbasedAnalysis result JSON file", + "url": "https://data.microbiomedata.org/1781_86099/ReadbasedAnalysis/1781_86099.json", + "file_size_bytes": 3385, + "type": "nmdc:DataObject" + }, + { + "_id": { + "$oid": "649b003f1ae706d7b5b162c5" + }, + "id": "nmdc:b80b655f568ca46cf4789674ac6a83cc", + "name": "checkm_qa.out", + "description": "metabat2 bin checkm quality assessment result for gold:Gp0115676", + "file_size_bytes": 1881, + "url": "https://data.microbiomedata.org/data/1781_86099/img_MAGs/checkm_qa.out", + "type": "nmdc:DataObject", + "data_object_type": "CheckM Statistics" + }, + { + "_id": { + "$oid": "649b003f1ae706d7b5b162c6" + }, + "id": "nmdc:4c9a69a0c2311a6cdaf5a476cc8c9d42", + "name": "gtdbtk.bac120.summary.tsv", + "description": "gtdbtk bacterial assignment result summary table for gold:Gp0115676", + "file_size_bytes": 815, + "url": "https://data.microbiomedata.org/data/1781_86099/img_MAGs/gtdbtk_output/classify/gtdbtk.bac120.summary.tsv", + "type": "nmdc:DataObject" + }, + { + "_id": { + "$oid": "649b003f1ae706d7b5b162c7" + }, + "id": "nmdc:956841108fcf5c6634a57da8f2b7fac7", + "name": "bins.unbinned.fa", + "description": "unbinned fasta file from metabat2 for gold:Gp0115676", + "file_size_bytes": 19565106, + "url": "https://data.microbiomedata.org/data/1781_86099/img_MAGs/bins.unbinned.fa", + "type": "nmdc:DataObject" + }, + { + "_id": { + "$oid": "649b003f1ae706d7b5b162c8" + }, + "id": "nmdc:e91e2f12beabcf429ee849748b76801f", + "name": "gold:Gp0115676.bins.3.fa", + "description": "metabat2 binned contig file for gold:Gp0115676", + "file_size_bytes": 2056638, + "url": "https://data.microbiomedata.org/data/1781_86099/img_MAGs/metabat-bins/bins.3.fa", + "type": "nmdc:DataObject", + "data_object_type": "Metagenome Bins" + }, + { + "_id": { + "$oid": "649b003f1ae706d7b5b162c9" + }, + "id": "nmdc:31bc3893618af40d8f63e24e2dad6772", + "name": "bins.tooShort.fa", + "description": "tooShort (< 3kb) filtered contigs fasta file by metaBat2 for gold:Gp0115676", + "file_size_bytes": 73605331, + "url": "https://data.microbiomedata.org/data/1781_86099/img_MAGs/bins.tooShort.fa", + "type": "nmdc:DataObject" }, { - "name": "Gp0115676_SMART GFF file", - "description": "SMART GFF file for Gp0115676", - "url": "https://data.microbiomedata.org/data/nmdc:mga0w3a067/annotation/nmdc_mga0w3a067_smart.gff", - "md5_checksum": "a893783f6886e31b6bca5b6baede9f66", - "id": "nmdc:a893783f6886e31b6bca5b6baede9f66", - "file_size_bytes": 8240017 + "_id": { + "$oid": "649b003f1ae706d7b5b162ca" + }, + "id": "nmdc:ccc55d14e487d71a93085ff56130b44a", + "name": "gold:Gp0115676.bins.5.fa", + "description": "metabat2 binned contig file for gold:Gp0115676", + "file_size_bytes": 3283805, + "url": "https://data.microbiomedata.org/data/1781_86099/img_MAGs/metabat-bins/bins.5.fa", + "type": "nmdc:DataObject", + "data_object_type": "Metagenome Bins" }, { - "name": "Gp0115676_SuperFam GFF file", - "description": "SuperFam GFF file for Gp0115676", - "url": "https://data.microbiomedata.org/data/nmdc:mga0w3a067/annotation/nmdc_mga0w3a067_supfam.gff", - "md5_checksum": "2225c723ccf0fd5ea309cfb5ca90d536", - "id": "nmdc:2225c723ccf0fd5ea309cfb5ca90d536", - "file_size_bytes": 48186264 + "_id": { + "$oid": "649b003f1ae706d7b5b162cb" + }, + "id": "nmdc:3b5521b99ccb093a2984e693c20cef84", + "name": "gold:Gp0115676.bins.2.fa", + "description": "metabat2 binned contig file for gold:Gp0115676", + "file_size_bytes": 642694, + "url": "https://data.microbiomedata.org/data/1781_86099/img_MAGs/metabat-bins/bins.2.fa", + "type": "nmdc:DataObject", + "data_object_type": "Metagenome Bins" }, { - "name": "Gp0115676_Cath FunFam GFF file", - "description": "Cath FunFam GFF file for Gp0115676", - "url": "https://data.microbiomedata.org/data/nmdc:mga0w3a067/annotation/nmdc_mga0w3a067_cath_funfam.gff", - "md5_checksum": "1abd69f8096f98174d95d9a3a13c2a3b", - "id": "nmdc:1abd69f8096f98174d95d9a3a13c2a3b", - "file_size_bytes": 38259823 + "_id": { + "$oid": "649b003f1ae706d7b5b162cc" + }, + "id": "nmdc:b0793c3769f2849c8a6a1496bfc555bd", + "name": "gold:Gp0115676.bins.7.fa", + "description": "metabat2 binned contig file for gold:Gp0115676", + "file_size_bytes": 537417, + "url": "https://data.microbiomedata.org/data/1781_86099/img_MAGs/metabat-bins/bins.7.fa", + "type": "nmdc:DataObject", + "data_object_type": "Metagenome Bins" }, { - "name": "Gp0115676_KO_EC GFF file", - "description": "KO_EC GFF file for Gp0115676", - "url": "https://data.microbiomedata.org/data/nmdc:mga0w3a067/annotation/nmdc_mga0w3a067_ko_ec.gff", - "md5_checksum": "83647c3e1ed96fda36f7c119a3e98182", - "id": "nmdc:83647c3e1ed96fda36f7c119a3e98182", - "file_size_bytes": 29337291 + "_id": { + "$oid": "649b003f1ae706d7b5b162cd" + }, + "id": "nmdc:bd0dc950e63b986b1585aa25c81c0a52", + "name": "gold:Gp0115676.bins.4.fa", + "description": "metabat2 binned contig file for gold:Gp0115676", + "file_size_bytes": 2597982, + "url": "https://data.microbiomedata.org/data/1781_86099/img_MAGs/metabat-bins/bins.4.fa", + "type": "nmdc:DataObject", + "data_object_type": "Metagenome Bins" }, { - "name": "gold:Gp0452679_metabat2 bin checkm quality assessment result", - "description": "metabat2 bin checkm quality assessment result for gold:Gp0452679", - "data_object_type": "CheckM Statistics", - "url": "https://data.microbiomedata.org/data/nmdc:mga0fsjy84/MAGs/nmdc_mga0fsjy84_checkm_qa.out", - "md5_checksum": "d41d8cd98f00b204e9800998ecf8427e", - "id": "nmdc:d41d8cd98f00b204e9800998ecf8427e", - "file_size_bytes": 0 + "_id": { + "$oid": "649b003f1ae706d7b5b162d0" + }, + "id": "nmdc:b151fd8de6fd473abe671917580a23d9", + "name": "gold:Gp0115676.bins.6.fa", + "description": "metabat2 binned contig file for gold:Gp0115676", + "file_size_bytes": 249502, + "url": "https://data.microbiomedata.org/data/1781_86099/img_MAGs/metabat-bins/bins.6.fa", + "type": "nmdc:DataObject", + "data_object_type": "Metagenome Bins" }, { - "name": "Gp0115676_tooShort (< 3kb) filtered contigs fasta file by metaBat2", - "description": "tooShort (< 3kb) filtered contigs fasta file by metaBat2 for Gp0115676", - "url": "https://data.microbiomedata.org/data/nmdc:mga0w3a067/MAGs/nmdc_mga0w3a067_bins.tooShort.fa", - "md5_checksum": "71667f3b8ee0cb5acadc541fa6914022", - "id": "nmdc:71667f3b8ee0cb5acadc541fa6914022", - "file_size_bytes": 75793492 + "_id": { + "$oid": "649b003f1ae706d7b5b162d2" + }, + "id": "nmdc:575b0300e32da8f7a051c5019b4ccfee", + "name": "gold:Gp0115676.bins.1.fa", + "description": "metabat2 binned contig file for gold:Gp0115676", + "file_size_bytes": 228256, + "url": "https://data.microbiomedata.org/data/1781_86099/img_MAGs/metabat-bins/bins.1.fa", + "type": "nmdc:DataObject", + "data_object_type": "Metagenome Bins" }, { - "name": "Gp0115676_unbinned fasta file from metabat2", - "description": "unbinned fasta file from metabat2 for Gp0115676", - "url": "https://data.microbiomedata.org/data/nmdc:mga0w3a067/MAGs/nmdc_mga0w3a067_bins.unbinned.fa", - "md5_checksum": "0141a64077e0f18adc42cb1915a00fa2", - "id": "nmdc:0141a64077e0f18adc42cb1915a00fa2", - "file_size_bytes": 17366889 + "_id": { + "$oid": "649b00401ae706d7b5b16d92" + }, + "description": "Protein FAA for gold:Gp0115676", + "url": "https://data.microbiomedata.org/1781_86099/img_annotation/Ga0482251_proteins.faa", + "md5_checksum": "5193d8fa7e151b96396afa8d61851af8", + "file_size_bytes": 3385, + "id": "nmdc:5193d8fa7e151b96396afa8d61851af8", + "name": "gold:Gp0115676_Protein FAA", + "data_object_type": "Annotation Amino Acid FASTA", + "type": "nmdc:DataObject" }, { - "name": "Gp0115676_metabat2 bin checkm quality assessment result", - "description": "metabat2 bin checkm quality assessment result for Gp0115676", - "data_object_type": "CheckM Statistics", - "url": "https://data.microbiomedata.org/data/nmdc:mga0w3a067/MAGs/nmdc_mga0w3a067_checkm_qa.out", - "md5_checksum": "982b47616dde63a388400fcc57d7c5b0", - "id": "nmdc:982b47616dde63a388400fcc57d7c5b0", - "file_size_bytes": 1700 + "_id": { + "$oid": "649b00401ae706d7b5b16d96" + }, + "description": "KO TSV File for gold:Gp0115676", + "url": "https://data.microbiomedata.org/1781_86099/img_annotation/Ga0482251_ko.tsv", + "md5_checksum": "23762ea8dc5ce375c3827aded41ae2c0", + "file_size_bytes": 3385, + "id": "nmdc:23762ea8dc5ce375c3827aded41ae2c0", + "name": "gold:Gp0115676_KO TSV File", + "data_object_type": "Annotation KEGG Orthology", + "type": "nmdc:DataObject" }, { - "name": "Gp0115676_high-quality and medium-quality bins", - "description": "high-quality and medium-quality bins for Gp0115676", - "data_object_type": "Metagenome Bins", - "url": "https://data.microbiomedata.org/data/nmdc:mga0w3a067/MAGs/nmdc_mga0w3a067_hqmq_bin.zip", - "md5_checksum": "313eb61bc7577e272eca6332e923f9c4", - "id": "nmdc:313eb61bc7577e272eca6332e923f9c4", - "file_size_bytes": 677741 + "_id": { + "$oid": "649b00401ae706d7b5b16d9a" + }, + "description": "EC TSV File for gold:Gp0115676", + "url": "https://data.microbiomedata.org/1781_86099/img_annotation/Ga0482251_ec.tsv", + "md5_checksum": "bc4755bf8b2c0b7c384eb4ffd8e9e017", + "file_size_bytes": 3385, + "id": "nmdc:bc4755bf8b2c0b7c384eb4ffd8e9e017", + "name": "gold:Gp0115676_EC TSV File", + "data_object_type": "Annotation Enzyme Commission", + "type": "nmdc:DataObject" }, { - "name": "Gp0115676_metabat2 bins", - "description": "metabat2 bins for Gp0115676", - "url": "https://data.microbiomedata.org/data/nmdc:mga0w3a067/MAGs/nmdc_mga0w3a067_metabat_bin.zip", - "md5_checksum": "763eb40a8905e9b0d459c45222f1b05e", - "id": "nmdc:763eb40a8905e9b0d459c45222f1b05e", - "file_size_bytes": 2885722 + "_id": { + "$oid": "649b00401ae706d7b5b16da7" + }, + "description": "Structural annotation GFF file for gold:Gp0115676", + "url": "https://data.microbiomedata.org/1781_86099/img_annotation/Ga0482251_structural_annotation.gff", + "md5_checksum": "e3b04bb85be48814ca078ee871a9296b", + "file_size_bytes": 3385, + "id": "nmdc:e3b04bb85be48814ca078ee871a9296b", + "name": "gold:Gp0115676_Structural annotation GFF file", + "data_object_type": "Structural Annotation GFF", + "type": "nmdc:DataObject" + }, + { + "_id": { + "$oid": "649b00401ae706d7b5b16da9" + }, + "description": "Functional annotation GFF file for gold:Gp0115676", + "url": "https://data.microbiomedata.org/1781_86099/img_annotation/Ga0482251_functional_annotation.gff", + "md5_checksum": "d429e7a9bb0344196ed7bcca6131e3c0", + "file_size_bytes": 3385, + "id": "nmdc:d429e7a9bb0344196ed7bcca6131e3c0", + "name": "gold:Gp0115676_Functional annotation GFF file", + "data_object_type": "Functional Annotation GFF", + "type": "nmdc:DataObject" } ], "dissolving_activity_set": [], @@ -16911,46 +23451,371 @@ "file_size_bytes": 0 }, { - "name": "Gp0115677_tooShort (< 3kb) filtered contigs fasta file by metaBat2", - "description": "tooShort (< 3kb) filtered contigs fasta file by metaBat2 for Gp0115677", - "url": "https://data.microbiomedata.org/data/nmdc:mga0zb0766/MAGs/nmdc_mga0zb0766_bins.tooShort.fa", - "md5_checksum": "603009bd6294d2318d929a57b5d3e5d3", - "id": "nmdc:603009bd6294d2318d929a57b5d3e5d3", - "file_size_bytes": 215021876 + "name": "Gp0115677_tooShort (< 3kb) filtered contigs fasta file by metaBat2", + "description": "tooShort (< 3kb) filtered contigs fasta file by metaBat2 for Gp0115677", + "url": "https://data.microbiomedata.org/data/nmdc:mga0zb0766/MAGs/nmdc_mga0zb0766_bins.tooShort.fa", + "md5_checksum": "603009bd6294d2318d929a57b5d3e5d3", + "id": "nmdc:603009bd6294d2318d929a57b5d3e5d3", + "file_size_bytes": 215021876 + }, + { + "name": "Gp0115677_unbinned fasta file from metabat2", + "description": "unbinned fasta file from metabat2 for Gp0115677", + "url": "https://data.microbiomedata.org/data/nmdc:mga0zb0766/MAGs/nmdc_mga0zb0766_bins.unbinned.fa", + "md5_checksum": "c5334a4e305f78c294c304c3c0526826", + "id": "nmdc:c5334a4e305f78c294c304c3c0526826", + "file_size_bytes": 26658018 + }, + { + "name": "Gp0115677_metabat2 bin checkm quality assessment result", + "description": "metabat2 bin checkm quality assessment result for Gp0115677", + "data_object_type": "CheckM Statistics", + "url": "https://data.microbiomedata.org/data/nmdc:mga0zb0766/MAGs/nmdc_mga0zb0766_checkm_qa.out", + "md5_checksum": "6adacc1ba06e5e451f3636c394c71ae8", + "id": "nmdc:6adacc1ba06e5e451f3636c394c71ae8", + "file_size_bytes": 1859 + }, + { + "name": "Gp0115677_high-quality and medium-quality bins", + "description": "high-quality and medium-quality bins for Gp0115677", + "data_object_type": "Metagenome Bins", + "url": "https://data.microbiomedata.org/data/nmdc:mga0zb0766/MAGs/nmdc_mga0zb0766_hqmq_bin.zip", + "md5_checksum": "77d4e2a7f358b9ac1d53b69d7e8c45e1", + "id": "nmdc:77d4e2a7f358b9ac1d53b69d7e8c45e1", + "file_size_bytes": 2309404 + }, + { + "name": "Gp0115677_metabat2 bins", + "description": "metabat2 bins for Gp0115677", + "url": "https://data.microbiomedata.org/data/nmdc:mga0zb0766/MAGs/nmdc_mga0zb0766_metabat_bin.zip", + "md5_checksum": "42c3fb9a3906f6b413f99e3276bb7550", + "id": "nmdc:42c3fb9a3906f6b413f99e3276bb7550", + "file_size_bytes": 450699 + }, + { + "_id": { + "$oid": "649b003d1ae706d7b5b14da3" + }, + "description": "Assembled contigs fasta for gold:Gp0115677", + "url": "https://data.microbiomedata.org/data/1781_86092/assembly/assembly_contigs.fna", + "file_size_bytes": 248003503, + "type": "nmdc:DataObject", + "id": "nmdc:9ca27b985234aaed07e3f6659e0416d0", + "name": "assembly_contigs.fna", + "data_object_type": "Assembly Contigs" + }, + { + "_id": { + "$oid": "649b003d1ae706d7b5b14da6" + }, + "description": "Metagenome Contig Coverage Stats for gold:Gp0115677", + "url": "https://data.microbiomedata.org/data/1781_86092/assembly/mapping_stats.txt", + "file_size_bytes": 40972895, + "type": "nmdc:DataObject", + "id": "nmdc:26847612e684ef73baf6d1daf75ba042", + "name": "mapping_stats.txt", + "data_object_type": "Assembly Coverage Stats" + }, + { + "_id": { + "$oid": "649b003d1ae706d7b5b14db1" + }, + "description": "Assembled scaffold fasta for gold:Gp0115677", + "url": "https://data.microbiomedata.org/data/1781_86092/assembly/assembly_scaffolds.fna", + "file_size_bytes": 246291939, + "type": "nmdc:DataObject", + "id": "nmdc:fcac84657291d9e28e15e83d656ce7fd", + "name": "assembly_scaffolds.fna", + "data_object_type": "Assembly Scaffolds" + }, + { + "_id": { + "$oid": "649b003d1ae706d7b5b14db2" + }, + "description": "Metagenome Alignment BAM file for gold:Gp0115677", + "url": "https://data.microbiomedata.org/data/1781_86092/assembly/pairedMapped_sorted.bam", + "file_size_bytes": 5769674213, + "type": "nmdc:DataObject", + "id": "nmdc:32366af15429dcf03ef716a44fed367e", + "name": "pairedMapped_sorted.bam", + "data_object_type": "Assembly Coverage BAM" + }, + { + "_id": { + "$oid": "649b003d1ae706d7b5b14db4" + }, + "description": "Assembled AGP file for gold:Gp0115677", + "url": "https://data.microbiomedata.org/data/1781_86092/assembly/assembly.agp", + "file_size_bytes": 35864331, + "type": "nmdc:DataObject", + "id": "nmdc:4027ab07eb8931ae9c5a17b480d238b3", + "name": "assembly.agp", + "data_object_type": "Assembly AGP" + }, + { + "_id": { + "$oid": "649b003d1ae706d7b5b159f6" + }, + "id": "nmdc:9921b494b07bde6a76e1c3e9e4da11ce", + "name": "1781_86092.krona.html", + "description": "Gold:Gp0115677 KRONA plot HTML file", + "url": "https://data.microbiomedata.org/1781_86092/ReadbasedAnalysis/centrifuge/1781_86092.krona.html", + "file_size_bytes": 3385, + "data_object_type": "Centrifuge Krona Plot", + "type": "nmdc:DataObject" + }, + { + "_id": { + "$oid": "649b003d1ae706d7b5b159fe" + }, + "id": "nmdc:a99cc1fb22427c72f4d6b67cec82948e", + "name": "1781_86092.json", + "description": "Gold:Gp0115677 ReadbasedAnalysis result JSON file", + "url": "https://data.microbiomedata.org/1781_86092/ReadbasedAnalysis/1781_86092.json", + "file_size_bytes": 3385, + "type": "nmdc:DataObject" + }, + { + "_id": { + "$oid": "649b003f1ae706d7b5b162ce" + }, + "id": "nmdc:7085e0d349fac196f897eadc405b081a", + "name": "bins.tooShort.fa", + "description": "tooShort (< 3kb) filtered contigs fasta file by metaBat2 for gold:Gp0115677", + "file_size_bytes": 208193586, + "url": "https://data.microbiomedata.org/data/1781_86092/img_MAGs/bins.tooShort.fa", + "type": "nmdc:DataObject" + }, + { + "_id": { + "$oid": "649b003f1ae706d7b5b162cf" + }, + "id": "nmdc:20faf8ed13a8cde73b0522cb954acf0e", + "name": "bins.unbinned.fa", + "description": "unbinned fasta file from metabat2 for gold:Gp0115677", + "file_size_bytes": 28797768, + "url": "https://data.microbiomedata.org/data/1781_86092/img_MAGs/bins.unbinned.fa", + "type": "nmdc:DataObject" + }, + { + "_id": { + "$oid": "649b003f1ae706d7b5b162d1" + }, + "id": "nmdc:963a658cc3e72ac4966f260064cf3c4e", + "name": "gtdbtk.bac120.summary.tsv", + "description": "gtdbtk bacterial assignment result summary table for gold:Gp0115677", + "file_size_bytes": 832, + "url": "https://data.microbiomedata.org/data/1781_86092/img_MAGs/gtdbtk_output/classify/gtdbtk.bac120.summary.tsv", + "type": "nmdc:DataObject" + }, + { + "_id": { + "$oid": "649b003f1ae706d7b5b162d3" + }, + "id": "nmdc:d5296a7efb592e30a0f2439b3a6aad40", + "name": "gold:Gp0115677.bins.10.fa", + "description": "metabat2 binned contig file for gold:Gp0115677", + "file_size_bytes": 756006, + "url": "https://data.microbiomedata.org/data/1781_86092/img_MAGs/metabat-bins/bins.10.fa", + "type": "nmdc:DataObject", + "data_object_type": "Metagenome Bins" + }, + { + "_id": { + "$oid": "649b003f1ae706d7b5b162d4" + }, + "id": "nmdc:e371db754cc99ab772212052997f6e12", + "name": "gold:Gp0115677.bins.3.fa", + "description": "metabat2 binned contig file for gold:Gp0115677", + "file_size_bytes": 557597, + "url": "https://data.microbiomedata.org/data/1781_86092/img_MAGs/metabat-bins/bins.3.fa", + "type": "nmdc:DataObject", + "data_object_type": "Metagenome Bins" + }, + { + "_id": { + "$oid": "649b003f1ae706d7b5b162d5" + }, + "id": "nmdc:82753f458e12c2c99121f4aec2f62b03", + "name": "gold:Gp0115677.bins.9.fa", + "description": "metabat2 binned contig file for gold:Gp0115677", + "file_size_bytes": 314985, + "url": "https://data.microbiomedata.org/data/1781_86092/img_MAGs/metabat-bins/bins.9.fa", + "type": "nmdc:DataObject", + "data_object_type": "Metagenome Bins" + }, + { + "_id": { + "$oid": "649b003f1ae706d7b5b162d6" + }, + "id": "nmdc:e795c55b78d5504a3c9da194492bd8f4", + "name": "gold:Gp0115677.bins.2.fa", + "description": "metabat2 binned contig file for gold:Gp0115677", + "file_size_bytes": 1087308, + "url": "https://data.microbiomedata.org/data/1781_86092/img_MAGs/metabat-bins/bins.2.fa", + "type": "nmdc:DataObject", + "data_object_type": "Metagenome Bins" + }, + { + "_id": { + "$oid": "649b003f1ae706d7b5b162d7" + }, + "id": "nmdc:5f7c596adbc713a159c13ac5e8d88e2f", + "name": "gold:Gp0115677.bins.5.fa", + "description": "metabat2 binned contig file for gold:Gp0115677", + "file_size_bytes": 621780, + "url": "https://data.microbiomedata.org/data/1781_86092/img_MAGs/metabat-bins/bins.5.fa", + "type": "nmdc:DataObject", + "data_object_type": "Metagenome Bins" + }, + { + "_id": { + "$oid": "649b003f1ae706d7b5b162d8" + }, + "id": "nmdc:56ebd3517fb82d228d679991b7b9cfb8", + "name": "checkm_qa.out", + "description": "metabat2 bin checkm quality assessment result for gold:Gp0115677", + "file_size_bytes": 2535, + "url": "https://data.microbiomedata.org/data/1781_86092/img_MAGs/checkm_qa.out", + "type": "nmdc:DataObject", + "data_object_type": "CheckM Statistics" + }, + { + "_id": { + "$oid": "649b003f1ae706d7b5b162d9" + }, + "id": "nmdc:f81840dde4202f7feef24e36df596446", + "name": "gold:Gp0115677.bins.11.fa", + "description": "metabat2 binned contig file for gold:Gp0115677", + "file_size_bytes": 291445, + "url": "https://data.microbiomedata.org/data/1781_86092/img_MAGs/metabat-bins/bins.11.fa", + "type": "nmdc:DataObject", + "data_object_type": "Metagenome Bins" + }, + { + "_id": { + "$oid": "649b003f1ae706d7b5b162da" + }, + "id": "nmdc:edae13d8586f04ad81f447fa27adc7a9", + "name": "gold:Gp0115677.bins.4.fa", + "description": "metabat2 binned contig file for gold:Gp0115677", + "file_size_bytes": 1196625, + "url": "https://data.microbiomedata.org/data/1781_86092/img_MAGs/metabat-bins/bins.4.fa", + "type": "nmdc:DataObject", + "data_object_type": "Metagenome Bins" + }, + { + "_id": { + "$oid": "649b003f1ae706d7b5b162db" + }, + "id": "nmdc:ff320d478c10b7118c4da20ce49793c9", + "name": "gold:Gp0115677.bins.8.fa", + "description": "metabat2 binned contig file for gold:Gp0115677", + "file_size_bytes": 225889, + "url": "https://data.microbiomedata.org/data/1781_86092/img_MAGs/metabat-bins/bins.8.fa", + "type": "nmdc:DataObject", + "data_object_type": "Metagenome Bins" + }, + { + "_id": { + "$oid": "649b003f1ae706d7b5b162dc" + }, + "id": "nmdc:ca9221f7ba635008e04c92c7111633e9", + "name": "gold:Gp0115677.bins.7.fa", + "description": "metabat2 binned contig file for gold:Gp0115677", + "file_size_bytes": 642978, + "url": "https://data.microbiomedata.org/data/1781_86092/img_MAGs/metabat-bins/bins.7.fa", + "type": "nmdc:DataObject", + "data_object_type": "Metagenome Bins" + }, + { + "_id": { + "$oid": "649b003f1ae706d7b5b162dd" + }, + "id": "nmdc:2b9b32a7151436cea05cbddc6ec0dded", + "name": "gold:Gp0115677.bins.1.fa", + "description": "metabat2 binned contig file for gold:Gp0115677", + "file_size_bytes": 723505, + "url": "https://data.microbiomedata.org/data/1781_86092/img_MAGs/metabat-bins/bins.1.fa", + "type": "nmdc:DataObject", + "data_object_type": "Metagenome Bins" + }, + { + "_id": { + "$oid": "649b003f1ae706d7b5b162e0" + }, + "id": "nmdc:ee41bb68b0e1e5c7d4724b00c79e5649", + "name": "gold:Gp0115677.bins.6.fa", + "description": "metabat2 binned contig file for gold:Gp0115677", + "file_size_bytes": 323769, + "url": "https://data.microbiomedata.org/data/1781_86092/img_MAGs/metabat-bins/bins.6.fa", + "type": "nmdc:DataObject", + "data_object_type": "Metagenome Bins" + }, + { + "_id": { + "$oid": "649b00401ae706d7b5b16d6c" + }, + "description": "KO TSV File for gold:Gp0115677", + "url": "https://data.microbiomedata.org/1781_86092/img_annotation/Ga0482250_ko.tsv", + "md5_checksum": "34d53203f08e6c25c8f85f6e04d6df24", + "file_size_bytes": 3385, + "id": "nmdc:34d53203f08e6c25c8f85f6e04d6df24", + "name": "gold:Gp0115677_KO TSV File", + "data_object_type": "Annotation KEGG Orthology", + "type": "nmdc:DataObject" }, { - "name": "Gp0115677_unbinned fasta file from metabat2", - "description": "unbinned fasta file from metabat2 for Gp0115677", - "url": "https://data.microbiomedata.org/data/nmdc:mga0zb0766/MAGs/nmdc_mga0zb0766_bins.unbinned.fa", - "md5_checksum": "c5334a4e305f78c294c304c3c0526826", - "id": "nmdc:c5334a4e305f78c294c304c3c0526826", - "file_size_bytes": 26658018 + "_id": { + "$oid": "649b00401ae706d7b5b16d6f" + }, + "description": "EC TSV File for gold:Gp0115677", + "url": "https://data.microbiomedata.org/1781_86092/img_annotation/Ga0482250_ec.tsv", + "md5_checksum": "8a39e09943350e563b00e23a146c3ec1", + "file_size_bytes": 3385, + "id": "nmdc:8a39e09943350e563b00e23a146c3ec1", + "name": "gold:Gp0115677_EC TSV File", + "data_object_type": "Annotation Enzyme Commission", + "type": "nmdc:DataObject" }, { - "name": "Gp0115677_metabat2 bin checkm quality assessment result", - "description": "metabat2 bin checkm quality assessment result for Gp0115677", - "data_object_type": "CheckM Statistics", - "url": "https://data.microbiomedata.org/data/nmdc:mga0zb0766/MAGs/nmdc_mga0zb0766_checkm_qa.out", - "md5_checksum": "6adacc1ba06e5e451f3636c394c71ae8", - "id": "nmdc:6adacc1ba06e5e451f3636c394c71ae8", - "file_size_bytes": 1859 + "_id": { + "$oid": "649b00401ae706d7b5b16d70" + }, + "description": "Functional annotation GFF file for gold:Gp0115677", + "url": "https://data.microbiomedata.org/1781_86092/img_annotation/Ga0482250_functional_annotation.gff", + "md5_checksum": "e7df895e1a7776ba16b6d77fdc9b077d", + "file_size_bytes": 3385, + "id": "nmdc:e7df895e1a7776ba16b6d77fdc9b077d", + "name": "gold:Gp0115677_Functional annotation GFF file", + "data_object_type": "Functional Annotation GFF", + "type": "nmdc:DataObject" }, { - "name": "Gp0115677_high-quality and medium-quality bins", - "description": "high-quality and medium-quality bins for Gp0115677", - "data_object_type": "Metagenome Bins", - "url": "https://data.microbiomedata.org/data/nmdc:mga0zb0766/MAGs/nmdc_mga0zb0766_hqmq_bin.zip", - "md5_checksum": "77d4e2a7f358b9ac1d53b69d7e8c45e1", - "id": "nmdc:77d4e2a7f358b9ac1d53b69d7e8c45e1", - "file_size_bytes": 2309404 + "_id": { + "$oid": "649b00401ae706d7b5b16d71" + }, + "description": "Protein FAA for gold:Gp0115677", + "url": "https://data.microbiomedata.org/1781_86092/img_annotation/Ga0482250_proteins.faa", + "md5_checksum": "c0365d39cb481d6e0f729b587dac10c8", + "file_size_bytes": 3385, + "id": "nmdc:c0365d39cb481d6e0f729b587dac10c8", + "name": "gold:Gp0115677_Protein FAA", + "data_object_type": "Annotation Amino Acid FASTA", + "type": "nmdc:DataObject" }, { - "name": "Gp0115677_metabat2 bins", - "description": "metabat2 bins for Gp0115677", - "url": "https://data.microbiomedata.org/data/nmdc:mga0zb0766/MAGs/nmdc_mga0zb0766_metabat_bin.zip", - "md5_checksum": "42c3fb9a3906f6b413f99e3276bb7550", - "id": "nmdc:42c3fb9a3906f6b413f99e3276bb7550", - "file_size_bytes": 450699 + "_id": { + "$oid": "649b00401ae706d7b5b16d77" + }, + "description": "Structural annotation GFF file for gold:Gp0115677", + "url": "https://data.microbiomedata.org/1781_86092/img_annotation/Ga0482250_structural_annotation.gff", + "md5_checksum": "bfbd1bd1ad70307dd01b699ecc4ffb2a", + "file_size_bytes": 3385, + "id": "nmdc:bfbd1bd1ad70307dd01b699ecc4ffb2a", + "name": "gold:Gp0115677_Structural annotation GFF file", + "data_object_type": "Structural Annotation GFF", + "type": "nmdc:DataObject" } ], "dissolving_activity_set": [], @@ -17627,6 +24492,224 @@ "md5_checksum": "55f66520d821205e80dcd303cc2793bc", "id": "nmdc:55f66520d821205e80dcd303cc2793bc", "file_size_bytes": 1259160 + }, + { + "_id": { + "$oid": "649b003c1ae706d7b5b14d9a" + }, + "description": "Assembled contigs fasta for gold:Gp0115675", + "url": "https://data.microbiomedata.org/data/1781_86090/assembly/assembly_contigs.fna", + "file_size_bytes": 41258072, + "type": "nmdc:DataObject", + "id": "nmdc:333b8256818eefecf0581f31a45719f9", + "name": "assembly_contigs.fna", + "data_object_type": "Assembly Contigs" + }, + { + "_id": { + "$oid": "649b003c1ae706d7b5b14d9b" + }, + "description": "Assembled AGP file for gold:Gp0115675", + "url": "https://data.microbiomedata.org/data/1781_86090/assembly/assembly.agp", + "file_size_bytes": 5091186, + "type": "nmdc:DataObject", + "id": "nmdc:a153a87ca330ba427510d800ac847c95", + "name": "assembly.agp", + "data_object_type": "Assembly AGP" + }, + { + "_id": { + "$oid": "649b003c1ae706d7b5b14d9c" + }, + "description": "Metagenome Contig Coverage Stats for gold:Gp0115675", + "url": "https://data.microbiomedata.org/data/1781_86090/assembly/mapping_stats.txt", + "file_size_bytes": 5934586, + "type": "nmdc:DataObject", + "id": "nmdc:6eca425a70ac889b1d110b88f7907b74", + "name": "mapping_stats.txt", + "data_object_type": "Assembly Coverage Stats" + }, + { + "_id": { + "$oid": "649b003c1ae706d7b5b14d9d" + }, + "description": "Assembled scaffold fasta for gold:Gp0115675", + "url": "https://data.microbiomedata.org/data/1781_86090/assembly/assembly_scaffolds.fna", + "file_size_bytes": 41014137, + "type": "nmdc:DataObject", + "id": "nmdc:b6558fa3c0fcd24593d86fc5c63ab5b5", + "name": "assembly_scaffolds.fna", + "data_object_type": "Assembly Scaffolds" + }, + { + "_id": { + "$oid": "649b003d1ae706d7b5b14d9f" + }, + "description": "Metagenome Alignment BAM file for gold:Gp0115675", + "url": "https://data.microbiomedata.org/data/1781_86090/assembly/pairedMapped_sorted.bam", + "file_size_bytes": 1614431528, + "type": "nmdc:DataObject", + "id": "nmdc:c7cce636f2a9bd54e8f62742da37f5cc", + "name": "pairedMapped_sorted.bam", + "data_object_type": "Assembly Coverage BAM" + }, + { + "_id": { + "$oid": "649b003d1ae706d7b5b159e6" + }, + "id": "nmdc:f4335a3fd80dc97fbf2ce8bc5b64f0a4", + "name": "1781_86090.krona.html", + "description": "Gold:Gp0115675 KRONA plot HTML file", + "url": "https://data.microbiomedata.org/1781_86090/ReadbasedAnalysis/centrifuge/1781_86090.krona.html", + "file_size_bytes": 3385, + "data_object_type": "Centrifuge Krona Plot", + "type": "nmdc:DataObject" + }, + { + "_id": { + "$oid": "649b003d1ae706d7b5b159ea" + }, + "id": "nmdc:05966e29e6f087d77b9e766b5fb9c64f", + "name": "1781_86090.json", + "description": "Gold:Gp0115675 ReadbasedAnalysis result JSON file", + "url": "https://data.microbiomedata.org/1781_86090/ReadbasedAnalysis/1781_86090.json", + "file_size_bytes": 3385, + "type": "nmdc:DataObject" + }, + { + "_id": { + "$oid": "649b003f1ae706d7b5b162bd" + }, + "id": "nmdc:360d6a6ecfa44731fe4d69f778f11285", + "name": "bins.unbinned.fa", + "description": "unbinned fasta file from metabat2 for gold:Gp0115675", + "file_size_bytes": 6936846, + "url": "https://data.microbiomedata.org/data/1781_86090/img_MAGs/bins.unbinned.fa", + "type": "nmdc:DataObject" + }, + { + "_id": { + "$oid": "649b003f1ae706d7b5b162c0" + }, + "id": "nmdc:5aa5000cbfc131f8162b1cd1bc37698e", + "name": "gold:Gp0115675.bins.3.fa", + "description": "metabat2 binned contig file for gold:Gp0115675", + "file_size_bytes": 2739890, + "url": "https://data.microbiomedata.org/data/1781_86090/img_MAGs/metabat-bins/bins.3.fa", + "type": "nmdc:DataObject", + "data_object_type": "Metagenome Bins" + }, + { + "_id": { + "$oid": "649b003f1ae706d7b5b162c1" + }, + "id": "nmdc:aaaee9ac7ea2ec601b554f01e38e2a4c", + "name": "checkm_qa.out", + "description": "metabat2 bin checkm quality assessment result for gold:Gp0115675", + "file_size_bytes": 1176, + "url": "https://data.microbiomedata.org/data/1781_86090/img_MAGs/checkm_qa.out", + "type": "nmdc:DataObject", + "data_object_type": "CheckM Statistics" + }, + { + "_id": { + "$oid": "649b003f1ae706d7b5b162c2" + }, + "id": "nmdc:d2c69965d41ba1023d9422e40e3366cc", + "name": "gold:Gp0115675.bins.1.fa", + "description": "metabat2 binned contig file for gold:Gp0115675", + "file_size_bytes": 221316, + "url": "https://data.microbiomedata.org/data/1781_86090/img_MAGs/metabat-bins/bins.1.fa", + "type": "nmdc:DataObject", + "data_object_type": "Metagenome Bins" + }, + { + "_id": { + "$oid": "649b003f1ae706d7b5b162c3" + }, + "id": "nmdc:bec36f95050a4825f0e8eec250dec56a", + "name": "gold:Gp0115675.bins.2.fa", + "description": "metabat2 binned contig file for gold:Gp0115675", + "file_size_bytes": 464857, + "url": "https://data.microbiomedata.org/data/1781_86090/img_MAGs/metabat-bins/bins.2.fa", + "type": "nmdc:DataObject", + "data_object_type": "Metagenome Bins" + }, + { + "_id": { + "$oid": "649b003f1ae706d7b5b162c4" + }, + "id": "nmdc:83f09a72cb190961374eae70d64af121", + "name": "bins.tooShort.fa", + "description": "tooShort (< 3kb) filtered contigs fasta file by metaBat2 for gold:Gp0115675", + "file_size_bytes": 30259643, + "url": "https://data.microbiomedata.org/data/1781_86090/img_MAGs/bins.tooShort.fa", + "type": "nmdc:DataObject" + }, + { + "_id": { + "$oid": "649b00401ae706d7b5b16d66" + }, + "description": "EC TSV File for gold:Gp0115675", + "url": "https://data.microbiomedata.org/1781_86090/img_annotation/Ga0482252_ec.tsv", + "md5_checksum": "b30bdfcd025588bd80ebb3bcdad2cdc8", + "file_size_bytes": 3385, + "id": "nmdc:b30bdfcd025588bd80ebb3bcdad2cdc8", + "name": "gold:Gp0115675_EC TSV File", + "data_object_type": "Annotation Enzyme Commission", + "type": "nmdc:DataObject" + }, + { + "_id": { + "$oid": "649b00401ae706d7b5b16d69" + }, + "description": "Functional annotation GFF file for gold:Gp0115675", + "url": "https://data.microbiomedata.org/1781_86090/img_annotation/Ga0482252_functional_annotation.gff", + "md5_checksum": "e745ff0c0a95c89393f8789cd8c409e9", + "file_size_bytes": 3385, + "id": "nmdc:e745ff0c0a95c89393f8789cd8c409e9", + "name": "gold:Gp0115675_Functional annotation GFF file", + "data_object_type": "Functional Annotation GFF", + "type": "nmdc:DataObject" + }, + { + "_id": { + "$oid": "649b00401ae706d7b5b16d6a" + }, + "description": "KO TSV File for gold:Gp0115675", + "url": "https://data.microbiomedata.org/1781_86090/img_annotation/Ga0482252_ko.tsv", + "md5_checksum": "7ab72f45de20843e167ee1e595bb752d", + "file_size_bytes": 3385, + "id": "nmdc:7ab72f45de20843e167ee1e595bb752d", + "name": "gold:Gp0115675_KO TSV File", + "data_object_type": "Annotation KEGG Orthology", + "type": "nmdc:DataObject" + }, + { + "_id": { + "$oid": "649b00401ae706d7b5b16dab" + }, + "description": "Structural annotation GFF file for gold:Gp0115675", + "url": "https://data.microbiomedata.org/1781_86090/img_annotation/Ga0482252_structural_annotation.gff", + "md5_checksum": "dcb8211231f718d57e22f8dea1efc6d0", + "file_size_bytes": 3385, + "id": "nmdc:dcb8211231f718d57e22f8dea1efc6d0", + "name": "gold:Gp0115675_Structural annotation GFF file", + "data_object_type": "Structural Annotation GFF", + "type": "nmdc:DataObject" + }, + { + "_id": { + "$oid": "649b00401ae706d7b5b16dc7" + }, + "description": "Protein FAA for gold:Gp0115675", + "url": "https://data.microbiomedata.org/1781_86090/img_annotation/Ga0482252_proteins.faa", + "md5_checksum": "51f3c008db6a106ee14e160f35f7d9f3", + "file_size_bytes": 3385, + "id": "nmdc:51f3c008db6a106ee14e160f35f7d9f3", + "name": "gold:Gp0115675_Protein FAA", + "data_object_type": "Annotation Amino Acid FASTA", + "type": "nmdc:DataObject" } ], "dissolving_activity_set": [], @@ -18209,21 +25292,311 @@ "file_size_bytes": 1700 }, { - "name": "Gp0115665_high-quality and medium-quality bins", - "description": "high-quality and medium-quality bins for Gp0115665", - "data_object_type": "Metagenome Bins", - "url": "https://data.microbiomedata.org/data/nmdc:mga06n7k74/MAGs/nmdc_mga06n7k74_hqmq_bin.zip", - "md5_checksum": "e344d87dbac42a645fd3c7d5b9d0a1a5", - "id": "nmdc:e344d87dbac42a645fd3c7d5b9d0a1a5", - "file_size_bytes": 2294379 + "name": "Gp0115665_high-quality and medium-quality bins", + "description": "high-quality and medium-quality bins for Gp0115665", + "data_object_type": "Metagenome Bins", + "url": "https://data.microbiomedata.org/data/nmdc:mga06n7k74/MAGs/nmdc_mga06n7k74_hqmq_bin.zip", + "md5_checksum": "e344d87dbac42a645fd3c7d5b9d0a1a5", + "id": "nmdc:e344d87dbac42a645fd3c7d5b9d0a1a5", + "file_size_bytes": 2294379 + }, + { + "name": "Gp0115665_metabat2 bins", + "description": "metabat2 bins for Gp0115665", + "url": "https://data.microbiomedata.org/data/nmdc:mga06n7k74/MAGs/nmdc_mga06n7k74_metabat_bin.zip", + "md5_checksum": "1098bd9921c6ab8f52aca786e3b7bf1d", + "id": "nmdc:1098bd9921c6ab8f52aca786e3b7bf1d", + "file_size_bytes": 534425 + }, + { + "_id": { + "$oid": "649b003c1ae706d7b5b14d68" + }, + "description": "Metagenome Contig Coverage Stats for gold:Gp0115665", + "url": "https://data.microbiomedata.org/data/1781_86094/assembly/mapping_stats.txt", + "file_size_bytes": 26201542, + "type": "nmdc:DataObject", + "id": "nmdc:0b1c4ab81deba76f53eb5b266566cc4e", + "name": "mapping_stats.txt", + "data_object_type": "Assembly Coverage Stats" + }, + { + "_id": { + "$oid": "649b003c1ae706d7b5b14d69" + }, + "description": "Assembled scaffold fasta for gold:Gp0115665", + "url": "https://data.microbiomedata.org/data/1781_86094/assembly/assembly_scaffolds.fna", + "file_size_bytes": 183060964, + "type": "nmdc:DataObject", + "id": "nmdc:4a666a393dc8497e61a35c6842a369be", + "name": "assembly_scaffolds.fna", + "data_object_type": "Assembly Scaffolds" + }, + { + "_id": { + "$oid": "649b003c1ae706d7b5b14d6a" + }, + "description": "Metagenome Alignment BAM file for gold:Gp0115665", + "url": "https://data.microbiomedata.org/data/1781_86094/assembly/pairedMapped_sorted.bam", + "file_size_bytes": 4399182435, + "type": "nmdc:DataObject", + "id": "nmdc:292511a07ffb1791b7546b4db9843a07", + "name": "pairedMapped_sorted.bam", + "data_object_type": "Assembly Coverage BAM" + }, + { + "_id": { + "$oid": "649b003c1ae706d7b5b14d6b" + }, + "description": "Assembled AGP file for gold:Gp0115665", + "url": "https://data.microbiomedata.org/data/1781_86094/assembly/assembly.agp", + "file_size_bytes": 22724422, + "type": "nmdc:DataObject", + "id": "nmdc:a54a1ab841896539024d2748e1b66131", + "name": "assembly.agp", + "data_object_type": "Assembly AGP" + }, + { + "_id": { + "$oid": "649b003c1ae706d7b5b14d70" + }, + "description": "Assembled contigs fasta for gold:Gp0115665", + "url": "https://data.microbiomedata.org/data/1781_86094/assembly/assembly_contigs.fna", + "file_size_bytes": 184120398, + "type": "nmdc:DataObject", + "id": "nmdc:2235febcd5329a40beb86d8d8411e0c1", + "name": "assembly_contigs.fna", + "data_object_type": "Assembly Contigs" + }, + { + "_id": { + "$oid": "649b003d1ae706d7b5b1598e" + }, + "id": "nmdc:79966acbb43ba7d6f0044b54b7c44a6b", + "name": "1781_86094.krona.html", + "description": "Gold:Gp0115665 KRONA plot HTML file", + "url": "https://data.microbiomedata.org/1781_86094/ReadbasedAnalysis/centrifuge/1781_86094.krona.html", + "file_size_bytes": 3385, + "data_object_type": "Centrifuge Krona Plot", + "type": "nmdc:DataObject" + }, + { + "_id": { + "$oid": "649b003d1ae706d7b5b159a4" + }, + "id": "nmdc:7a86a0c36f9ef12596ff3796cd9277d9", + "name": "1781_86094.json", + "description": "Gold:Gp0115665 ReadbasedAnalysis result JSON file", + "url": "https://data.microbiomedata.org/1781_86094/ReadbasedAnalysis/1781_86094.json", + "file_size_bytes": 3385, + "type": "nmdc:DataObject" + }, + { + "_id": { + "$oid": "649b003f1ae706d7b5b1625e" + }, + "id": "nmdc:f9db08f4e245f63eede42dedcdbb4def", + "name": "checkm_qa.out", + "description": "metabat2 bin checkm quality assessment result for gold:Gp0115665", + "file_size_bytes": 2145, + "url": "https://data.microbiomedata.org/data/1781_86094/img_MAGs/checkm_qa.out", + "type": "nmdc:DataObject", + "data_object_type": "CheckM Statistics" + }, + { + "_id": { + "$oid": "649b003f1ae706d7b5b16260" + }, + "id": "nmdc:97218a09f8e0949fea768a5c616191b0", + "name": "bins.unbinned.fa", + "description": "unbinned fasta file from metabat2 for gold:Gp0115665", + "file_size_bytes": 33177668, + "url": "https://data.microbiomedata.org/data/1781_86094/img_MAGs/bins.unbinned.fa", + "type": "nmdc:DataObject" + }, + { + "_id": { + "$oid": "649b003f1ae706d7b5b16262" + }, + "id": "nmdc:7ecd6e3a6a8cb6fe9365b57becf82216", + "name": "gold:Gp0115665.bins.5.fa", + "description": "metabat2 binned contig file for gold:Gp0115665", + "file_size_bytes": 1121208, + "url": "https://data.microbiomedata.org/data/1781_86094/img_MAGs/metabat-bins/bins.5.fa", + "type": "nmdc:DataObject", + "data_object_type": "Metagenome Bins" + }, + { + "_id": { + "$oid": "649b003f1ae706d7b5b16263" + }, + "id": "nmdc:7596d700873fd3e46d0f78284fe0c0f5", + "name": "gold:Gp0115665.bins.9.fa", + "description": "metabat2 binned contig file for gold:Gp0115665", + "file_size_bytes": 298154, + "url": "https://data.microbiomedata.org/data/1781_86094/img_MAGs/metabat-bins/bins.9.fa", + "type": "nmdc:DataObject", + "data_object_type": "Metagenome Bins" + }, + { + "_id": { + "$oid": "649b003f1ae706d7b5b16264" + }, + "id": "nmdc:b44f6446e22ea134f1f306f0412f0f5f", + "name": "gold:Gp0115665.bins.3.fa", + "description": "metabat2 binned contig file for gold:Gp0115665", + "file_size_bytes": 591265, + "url": "https://data.microbiomedata.org/data/1781_86094/img_MAGs/metabat-bins/bins.3.fa", + "type": "nmdc:DataObject", + "data_object_type": "Metagenome Bins" + }, + { + "_id": { + "$oid": "649b003f1ae706d7b5b16265" + }, + "id": "nmdc:fcb189e3ab45040fe52458c90dd29f6c", + "name": "bins.tooShort.fa", + "description": "tooShort (< 3kb) filtered contigs fasta file by metaBat2 for gold:Gp0115665", + "file_size_bytes": 142092787, + "url": "https://data.microbiomedata.org/data/1781_86094/img_MAGs/bins.tooShort.fa", + "type": "nmdc:DataObject" + }, + { + "_id": { + "$oid": "649b003f1ae706d7b5b16266" + }, + "id": "nmdc:ee01c153a5253dfe9e8de1bbbf9480fe", + "name": "gold:Gp0115665.bins.4.fa", + "description": "metabat2 binned contig file for gold:Gp0115665", + "file_size_bytes": 1178929, + "url": "https://data.microbiomedata.org/data/1781_86094/img_MAGs/metabat-bins/bins.4.fa", + "type": "nmdc:DataObject", + "data_object_type": "Metagenome Bins" + }, + { + "_id": { + "$oid": "649b003f1ae706d7b5b16268" + }, + "id": "nmdc:e08dcba67c8844436094e8c77b0f16f9", + "name": "gold:Gp0115665.bins.6.fa", + "description": "metabat2 binned contig file for gold:Gp0115665", + "file_size_bytes": 964743, + "url": "https://data.microbiomedata.org/data/1781_86094/img_MAGs/metabat-bins/bins.6.fa", + "type": "nmdc:DataObject", + "data_object_type": "Metagenome Bins" + }, + { + "_id": { + "$oid": "649b003f1ae706d7b5b16269" + }, + "id": "nmdc:54d4d93e1b2c00805f59308f47c950dd", + "name": "gold:Gp0115665.bins.8.fa", + "description": "metabat2 binned contig file for gold:Gp0115665", + "file_size_bytes": 377968, + "url": "https://data.microbiomedata.org/data/1781_86094/img_MAGs/metabat-bins/bins.8.fa", + "type": "nmdc:DataObject", + "data_object_type": "Metagenome Bins" + }, + { + "_id": { + "$oid": "649b003f1ae706d7b5b1626a" + }, + "id": "nmdc:b7d7c5484e7902cd4fd2e5b946467326", + "name": "gold:Gp0115665.bins.2.fa", + "description": "metabat2 binned contig file for gold:Gp0115665", + "file_size_bytes": 221952, + "url": "https://data.microbiomedata.org/data/1781_86094/img_MAGs/metabat-bins/bins.2.fa", + "type": "nmdc:DataObject", + "data_object_type": "Metagenome Bins" + }, + { + "_id": { + "$oid": "649b003f1ae706d7b5b1626c" + }, + "id": "nmdc:ec4e307b82b0fc7da2a123df702d2202", + "name": "gold:Gp0115665.bins.1.fa", + "description": "metabat2 binned contig file for gold:Gp0115665", + "file_size_bytes": 1185129, + "url": "https://data.microbiomedata.org/data/1781_86094/img_MAGs/metabat-bins/bins.1.fa", + "type": "nmdc:DataObject", + "data_object_type": "Metagenome Bins" + }, + { + "_id": { + "$oid": "649b003f1ae706d7b5b1626f" + }, + "id": "nmdc:43913ff26d7b2fd7315e4468948fccac", + "name": "gold:Gp0115665.bins.7.fa", + "description": "metabat2 binned contig file for gold:Gp0115665", + "file_size_bytes": 205826, + "url": "https://data.microbiomedata.org/data/1781_86094/img_MAGs/metabat-bins/bins.7.fa", + "type": "nmdc:DataObject", + "data_object_type": "Metagenome Bins" + }, + { + "_id": { + "$oid": "649b00401ae706d7b5b16d72" + }, + "description": "Protein FAA for gold:Gp0115665", + "url": "https://data.microbiomedata.org/1781_86094/img_annotation/Ga0482262_proteins.faa", + "md5_checksum": "1b5b79d300bb60afffec76da4cda7f14", + "file_size_bytes": 3385, + "id": "nmdc:1b5b79d300bb60afffec76da4cda7f14", + "name": "gold:Gp0115665_Protein FAA", + "data_object_type": "Annotation Amino Acid FASTA", + "type": "nmdc:DataObject" + }, + { + "_id": { + "$oid": "649b00401ae706d7b5b16d74" + }, + "description": "KO TSV File for gold:Gp0115665", + "url": "https://data.microbiomedata.org/1781_86094/img_annotation/Ga0482262_ko.tsv", + "md5_checksum": "e28746f79f2d58d71fd5f42dff8b6dd5", + "file_size_bytes": 3385, + "id": "nmdc:e28746f79f2d58d71fd5f42dff8b6dd5", + "name": "gold:Gp0115665_KO TSV File", + "data_object_type": "Annotation KEGG Orthology", + "type": "nmdc:DataObject" + }, + { + "_id": { + "$oid": "649b00401ae706d7b5b16d79" + }, + "description": "Functional annotation GFF file for gold:Gp0115665", + "url": "https://data.microbiomedata.org/1781_86094/img_annotation/Ga0482262_functional_annotation.gff", + "md5_checksum": "dceabe03f9758a72038b9824794337e1", + "file_size_bytes": 3385, + "id": "nmdc:dceabe03f9758a72038b9824794337e1", + "name": "gold:Gp0115665_Functional annotation GFF file", + "data_object_type": "Functional Annotation GFF", + "type": "nmdc:DataObject" + }, + { + "_id": { + "$oid": "649b00401ae706d7b5b16d7a" + }, + "description": "Structural annotation GFF file for gold:Gp0115665", + "url": "https://data.microbiomedata.org/1781_86094/img_annotation/Ga0482262_structural_annotation.gff", + "md5_checksum": "431860b46c896880c1d8d779fb2645ec", + "file_size_bytes": 3385, + "id": "nmdc:431860b46c896880c1d8d779fb2645ec", + "name": "gold:Gp0115665_Structural annotation GFF file", + "data_object_type": "Structural Annotation GFF", + "type": "nmdc:DataObject" }, { - "name": "Gp0115665_metabat2 bins", - "description": "metabat2 bins for Gp0115665", - "url": "https://data.microbiomedata.org/data/nmdc:mga06n7k74/MAGs/nmdc_mga06n7k74_metabat_bin.zip", - "md5_checksum": "1098bd9921c6ab8f52aca786e3b7bf1d", - "id": "nmdc:1098bd9921c6ab8f52aca786e3b7bf1d", - "file_size_bytes": 534425 + "_id": { + "$oid": "649b00401ae706d7b5b16dc6" + }, + "description": "EC TSV File for gold:Gp0115665", + "url": "https://data.microbiomedata.org/1781_86094/img_annotation/Ga0482262_ec.tsv", + "md5_checksum": "b4a623a8d9418c04567b5712889fcdfd", + "file_size_bytes": 3385, + "id": "nmdc:b4a623a8d9418c04567b5712889fcdfd", + "name": "gold:Gp0115665_EC TSV File", + "data_object_type": "Annotation Enzyme Commission", + "type": "nmdc:DataObject" } ], "dissolving_activity_set": [], @@ -18878,6 +26251,223 @@ "md5_checksum": "6a80769f6812a45615890cc2b03e9abf", "id": "nmdc:6a80769f6812a45615890cc2b03e9abf", "file_size_bytes": 359752 + }, + { + "_id": { + "$oid": "649b003c1ae706d7b5b14d78" + }, + "description": "Assembled contigs fasta for gold:Gp0115669", + "url": "https://data.microbiomedata.org/data/1781_86097/assembly/assembly_contigs.fna", + "file_size_bytes": 58380875, + "type": "nmdc:DataObject", + "id": "nmdc:17cff5e222ad522c357863eb39418117", + "name": "assembly_contigs.fna", + "data_object_type": "Assembly Contigs" + }, + { + "_id": { + "$oid": "649b003c1ae706d7b5b14d7a" + }, + "description": "Metagenome Contig Coverage Stats for gold:Gp0115669", + "url": "https://data.microbiomedata.org/data/1781_86097/assembly/mapping_stats.txt", + "file_size_bytes": 8408070, + "type": "nmdc:DataObject", + "id": "nmdc:3f087e100be127e3b95dae0eeff2cb95", + "name": "mapping_stats.txt", + "data_object_type": "Assembly Coverage Stats" + }, + { + "_id": { + "$oid": "649b003c1ae706d7b5b14d7c" + }, + "description": "Assembled scaffold fasta for gold:Gp0115669", + "url": "https://data.microbiomedata.org/data/1781_86097/assembly/assembly_scaffolds.fna", + "file_size_bytes": 58037702, + "type": "nmdc:DataObject", + "id": "nmdc:72840aa9e6a9a5b8e1ca113008cf44b1", + "name": "assembly_scaffolds.fna", + "data_object_type": "Assembly Scaffolds" + }, + { + "_id": { + "$oid": "649b003c1ae706d7b5b14d7f" + }, + "description": "Metagenome Alignment BAM file for gold:Gp0115669", + "url": "https://data.microbiomedata.org/data/1781_86097/assembly/pairedMapped_sorted.bam", + "file_size_bytes": 1913779393, + "type": "nmdc:DataObject", + "id": "nmdc:3eea7321716d25a836521bbd70da488b", + "name": "pairedMapped_sorted.bam", + "data_object_type": "Assembly Coverage BAM" + }, + { + "_id": { + "$oid": "649b003c1ae706d7b5b14d81" + }, + "description": "Assembled AGP file for gold:Gp0115669", + "url": "https://data.microbiomedata.org/data/1781_86097/assembly/assembly.agp", + "file_size_bytes": 7215836, + "type": "nmdc:DataObject", + "id": "nmdc:bcab2f9486464ccf89e94e63626cfc5c", + "name": "assembly.agp", + "data_object_type": "Assembly AGP" + }, + { + "_id": { + "$oid": "649b003d1ae706d7b5b159af" + }, + "id": "nmdc:398b32d16246f98d91f1a6952a26feba", + "name": "1781_86097.krona.html", + "description": "Gold:Gp0115669 KRONA plot HTML file", + "url": "https://data.microbiomedata.org/1781_86097/ReadbasedAnalysis/centrifuge/1781_86097.krona.html", + "file_size_bytes": 3385, + "data_object_type": "Centrifuge Krona Plot", + "type": "nmdc:DataObject" + }, + { + "_id": { + "$oid": "649b003d1ae706d7b5b159b3" + }, + "id": "nmdc:9b7edfd57bb79efa110f07d6a03bb2f4", + "name": "1781_86097.json", + "description": "Gold:Gp0115669 ReadbasedAnalysis result JSON file", + "url": "https://data.microbiomedata.org/1781_86097/ReadbasedAnalysis/1781_86097.json", + "file_size_bytes": 3385, + "type": "nmdc:DataObject" + }, + { + "_id": { + "$oid": "649b003f1ae706d7b5b1627b" + }, + "id": "nmdc:f0f1a3e612de5a76d0e0517864378138", + "name": "bins.tooShort.fa", + "description": "tooShort (< 3kb) filtered contigs fasta file by metaBat2 for gold:Gp0115669", + "file_size_bytes": 43682660, + "url": "https://data.microbiomedata.org/data/1781_86097/img_MAGs/bins.tooShort.fa", + "type": "nmdc:DataObject" + }, + { + "_id": { + "$oid": "649b003f1ae706d7b5b1627c" + }, + "id": "nmdc:2b7abbacb5fbbb936d9421e78e9116c9", + "name": "bins.unbinned.fa", + "description": "unbinned fasta file from metabat2 for gold:Gp0115669", + "file_size_bytes": 11095337, + "url": "https://data.microbiomedata.org/data/1781_86097/img_MAGs/bins.unbinned.fa", + "type": "nmdc:DataObject" + }, + { + "_id": { + "$oid": "649b003f1ae706d7b5b1627d" + }, + "id": "nmdc:ed20a53339faee3206f7eacf9031fa26", + "name": "gold:Gp0115669.bins.2.fa", + "description": "metabat2 binned contig file for gold:Gp0115669", + "file_size_bytes": 2141471, + "url": "https://data.microbiomedata.org/data/1781_86097/img_MAGs/metabat-bins/bins.2.fa", + "type": "nmdc:DataObject", + "data_object_type": "Metagenome Bins" + }, + { + "_id": { + "$oid": "649b003f1ae706d7b5b1627e" + }, + "id": "nmdc:45d197f727234e5dd5756bc48f88bf0f", + "name": "gtdbtk.bac120.summary.tsv", + "description": "gtdbtk bacterial assignment result summary table for gold:Gp0115669", + "file_size_bytes": 4804, + "url": "https://data.microbiomedata.org/data/1781_86097/img_MAGs/gtdbtk_output/classify/gtdbtk.bac120.summary.tsv", + "type": "nmdc:DataObject" + }, + { + "_id": { + "$oid": "649b003f1ae706d7b5b1627f" + }, + "id": "nmdc:c397d43fa00df5c21b4865775bea17ba", + "name": "checkm_qa.out", + "description": "metabat2 bin checkm quality assessment result for gold:Gp0115669", + "file_size_bytes": 996, + "url": "https://data.microbiomedata.org/data/1781_86097/img_MAGs/checkm_qa.out", + "type": "nmdc:DataObject", + "data_object_type": "CheckM Statistics" + }, + { + "_id": { + "$oid": "649b003f1ae706d7b5b16282" + }, + "id": "nmdc:8602045050811243d163714135d5dce5", + "name": "gold:Gp0115669.bins.1.fa", + "description": "metabat2 binned contig file for gold:Gp0115669", + "file_size_bytes": 659826, + "url": "https://data.microbiomedata.org/data/1781_86097/img_MAGs/metabat-bins/bins.1.fa", + "type": "nmdc:DataObject", + "data_object_type": "Metagenome Bins" + }, + { + "_id": { + "$oid": "649b00401ae706d7b5b16d7f" + }, + "description": "Protein FAA for gold:Gp0115669", + "url": "https://data.microbiomedata.org/1781_86097/img_annotation/Ga0482258_proteins.faa", + "md5_checksum": "6de20d427454895dce6caeb7b9543c11", + "file_size_bytes": 3385, + "id": "nmdc:6de20d427454895dce6caeb7b9543c11", + "name": "gold:Gp0115669_Protein FAA", + "data_object_type": "Annotation Amino Acid FASTA", + "type": "nmdc:DataObject" + }, + { + "_id": { + "$oid": "649b00401ae706d7b5b16d80" + }, + "description": "EC TSV File for gold:Gp0115669", + "url": "https://data.microbiomedata.org/1781_86097/img_annotation/Ga0482258_ec.tsv", + "md5_checksum": "e74cb5e168717574193a15d5ac04a01f", + "file_size_bytes": 3385, + "id": "nmdc:e74cb5e168717574193a15d5ac04a01f", + "name": "gold:Gp0115669_EC TSV File", + "data_object_type": "Annotation Enzyme Commission", + "type": "nmdc:DataObject" + }, + { + "_id": { + "$oid": "649b00401ae706d7b5b16d82" + }, + "description": "Functional annotation GFF file for gold:Gp0115669", + "url": "https://data.microbiomedata.org/1781_86097/img_annotation/Ga0482258_functional_annotation.gff", + "md5_checksum": "4f7a6e682f6f13b7ea73511265fdd2a9", + "file_size_bytes": 3385, + "id": "nmdc:4f7a6e682f6f13b7ea73511265fdd2a9", + "name": "gold:Gp0115669_Functional annotation GFF file", + "data_object_type": "Functional Annotation GFF", + "type": "nmdc:DataObject" + }, + { + "_id": { + "$oid": "649b00401ae706d7b5b16d83" + }, + "description": "Structural annotation GFF file for gold:Gp0115669", + "url": "https://data.microbiomedata.org/1781_86097/img_annotation/Ga0482258_structural_annotation.gff", + "md5_checksum": "9c68523f458ee1f8ec395e1442b1f508", + "file_size_bytes": 3385, + "id": "nmdc:9c68523f458ee1f8ec395e1442b1f508", + "name": "gold:Gp0115669_Structural annotation GFF file", + "data_object_type": "Structural Annotation GFF", + "type": "nmdc:DataObject" + }, + { + "_id": { + "$oid": "649b00401ae706d7b5b16d8c" + }, + "description": "KO TSV File for gold:Gp0115669", + "url": "https://data.microbiomedata.org/1781_86097/img_annotation/Ga0482258_ko.tsv", + "md5_checksum": "0bc9b55e2d8f3c45b18725845815bfde", + "file_size_bytes": 3385, + "id": "nmdc:0bc9b55e2d8f3c45b18725845815bfde", + "name": "gold:Gp0115669_KO TSV File", + "data_object_type": "Annotation KEGG Orthology", + "type": "nmdc:DataObject" } ], "dissolving_activity_set": [], @@ -19402,79 +26992,392 @@ "file_size_bytes": 10141642 }, { - "name": "Gp0115672_SuperFam GFF file", - "description": "SuperFam GFF file for Gp0115672", - "url": "https://data.microbiomedata.org/data/nmdc:mga0cwhj53/annotation/nmdc_mga0cwhj53_supfam.gff", - "md5_checksum": "82ac29a9999c6bc097cb0f35e4177e35", - "id": "nmdc:82ac29a9999c6bc097cb0f35e4177e35", - "file_size_bytes": 56808220 + "name": "Gp0115672_SuperFam GFF file", + "description": "SuperFam GFF file for Gp0115672", + "url": "https://data.microbiomedata.org/data/nmdc:mga0cwhj53/annotation/nmdc_mga0cwhj53_supfam.gff", + "md5_checksum": "82ac29a9999c6bc097cb0f35e4177e35", + "id": "nmdc:82ac29a9999c6bc097cb0f35e4177e35", + "file_size_bytes": 56808220 + }, + { + "name": "Gp0115672_Cath FunFam GFF file", + "description": "Cath FunFam GFF file for Gp0115672", + "url": "https://data.microbiomedata.org/data/nmdc:mga0cwhj53/annotation/nmdc_mga0cwhj53_cath_funfam.gff", + "md5_checksum": "5b0e8395559ef0d8a341ae0e132e60f6", + "id": "nmdc:5b0e8395559ef0d8a341ae0e132e60f6", + "file_size_bytes": 45632833 + }, + { + "name": "Gp0115672_KO_EC GFF file", + "description": "KO_EC GFF file for Gp0115672", + "url": "https://data.microbiomedata.org/data/nmdc:mga0cwhj53/annotation/nmdc_mga0cwhj53_ko_ec.gff", + "md5_checksum": "1e74c3df751a59a34e5c0d87f4a37563", + "id": "nmdc:1e74c3df751a59a34e5c0d87f4a37563", + "file_size_bytes": 33782864 + }, + { + "name": "gold:Gp0452679_metabat2 bin checkm quality assessment result", + "description": "metabat2 bin checkm quality assessment result for gold:Gp0452679", + "data_object_type": "CheckM Statistics", + "url": "https://data.microbiomedata.org/data/nmdc:mga0fsjy84/MAGs/nmdc_mga0fsjy84_checkm_qa.out", + "md5_checksum": "d41d8cd98f00b204e9800998ecf8427e", + "id": "nmdc:d41d8cd98f00b204e9800998ecf8427e", + "file_size_bytes": 0 + }, + { + "name": "Gp0115672_tooShort (< 3kb) filtered contigs fasta file by metaBat2", + "description": "tooShort (< 3kb) filtered contigs fasta file by metaBat2 for Gp0115672", + "url": "https://data.microbiomedata.org/data/nmdc:mga0cwhj53/MAGs/nmdc_mga0cwhj53_bins.tooShort.fa", + "md5_checksum": "2b6e0195e34697039eff38b51026be24", + "id": "nmdc:2b6e0195e34697039eff38b51026be24", + "file_size_bytes": 91055942 + }, + { + "name": "Gp0115672_unbinned fasta file from metabat2", + "description": "unbinned fasta file from metabat2 for Gp0115672", + "url": "https://data.microbiomedata.org/data/nmdc:mga0cwhj53/MAGs/nmdc_mga0cwhj53_bins.unbinned.fa", + "md5_checksum": "f02d361fbef7549e2289bf4da623787d", + "id": "nmdc:f02d361fbef7549e2289bf4da623787d", + "file_size_bytes": 23202832 + }, + { + "name": "Gp0115672_metabat2 bin checkm quality assessment result", + "description": "metabat2 bin checkm quality assessment result for Gp0115672", + "data_object_type": "CheckM Statistics", + "url": "https://data.microbiomedata.org/data/nmdc:mga0cwhj53/MAGs/nmdc_mga0cwhj53_checkm_qa.out", + "md5_checksum": "2de282e5507477269238ead458f11ac0", + "id": "nmdc:2de282e5507477269238ead458f11ac0", + "file_size_bytes": 2040 + }, + { + "name": "Gp0115672_high-quality and medium-quality bins", + "description": "high-quality and medium-quality bins for Gp0115672", + "data_object_type": "Metagenome Bins", + "url": "https://data.microbiomedata.org/data/nmdc:mga0cwhj53/MAGs/nmdc_mga0cwhj53_hqmq_bin.zip", + "md5_checksum": "3abae1a573f9f0ac6da47e1ab9b9a723", + "id": "nmdc:3abae1a573f9f0ac6da47e1ab9b9a723", + "file_size_bytes": 1815861 + }, + { + "name": "Gp0115672_metabat2 bins", + "description": "metabat2 bins for Gp0115672", + "url": "https://data.microbiomedata.org/data/nmdc:mga0cwhj53/MAGs/nmdc_mga0cwhj53_metabat_bin.zip", + "md5_checksum": "4d315d8dac1d9605d110ff2298b10229", + "id": "nmdc:4d315d8dac1d9605d110ff2298b10229", + "file_size_bytes": 2757900 + }, + { + "_id": { + "$oid": "649b003c1ae706d7b5b14d8b" + }, + "description": "Metagenome Contig Coverage Stats for gold:Gp0115672", + "url": "https://data.microbiomedata.org/data/1781_86103/assembly/mapping_stats.txt", + "file_size_bytes": 16391024, + "type": "nmdc:DataObject", + "id": "nmdc:b5be8aa1d11106aabbcf86f4a31e558b", + "name": "mapping_stats.txt", + "data_object_type": "Assembly Coverage Stats" + }, + { + "_id": { + "$oid": "649b003c1ae706d7b5b14d8d" + }, + "description": "Assembled contigs fasta for gold:Gp0115672", + "url": "https://data.microbiomedata.org/data/1781_86103/assembly/assembly_contigs.fna", + "file_size_bytes": 128215940, + "type": "nmdc:DataObject", + "id": "nmdc:f74d007a0d55515291e2ab3ecd50461f", + "name": "assembly_contigs.fna", + "data_object_type": "Assembly Contigs" + }, + { + "_id": { + "$oid": "649b003c1ae706d7b5b14d8e" + }, + "description": "Assembled AGP file for gold:Gp0115672", + "url": "https://data.microbiomedata.org/data/1781_86103/assembly/assembly.agp", + "file_size_bytes": 14188798, + "type": "nmdc:DataObject", + "id": "nmdc:39b43fc42da1d32ab929d57555ff63ee", + "name": "assembly.agp", + "data_object_type": "Assembly AGP" + }, + { + "_id": { + "$oid": "649b003c1ae706d7b5b14d8f" + }, + "description": "Metagenome Alignment BAM file for gold:Gp0115672", + "url": "https://data.microbiomedata.org/data/1781_86103/assembly/pairedMapped_sorted.bam", + "file_size_bytes": 2905683228, + "type": "nmdc:DataObject", + "id": "nmdc:c01fcbe10ff6779259fbe584b123b82d", + "name": "pairedMapped_sorted.bam", + "data_object_type": "Assembly Coverage BAM" + }, + { + "_id": { + "$oid": "649b003c1ae706d7b5b14d92" + }, + "description": "Assembled scaffold fasta for gold:Gp0115672", + "url": "https://data.microbiomedata.org/data/1781_86103/assembly/assembly_scaffolds.fna", + "file_size_bytes": 127550998, + "type": "nmdc:DataObject", + "id": "nmdc:49c6eb2c5d792edf921a7226b03351bf", + "name": "assembly_scaffolds.fna", + "data_object_type": "Assembly Scaffolds" + }, + { + "_id": { + "$oid": "649b003d1ae706d7b5b159ca" + }, + "id": "nmdc:bc647f348d91e409e4125941b495ff13", + "name": "1781_86103.krona.html", + "description": "Gold:Gp0115672 KRONA plot HTML file", + "url": "https://data.microbiomedata.org/1781_86103/ReadbasedAnalysis/centrifuge/1781_86103.krona.html", + "file_size_bytes": 3385, + "data_object_type": "Centrifuge Krona Plot", + "type": "nmdc:DataObject" + }, + { + "_id": { + "$oid": "649b003d1ae706d7b5b159cf" + }, + "id": "nmdc:986355b49a83d2548afbc1792128513e", + "name": "1781_86103.json", + "description": "Gold:Gp0115672 ReadbasedAnalysis result JSON file", + "url": "https://data.microbiomedata.org/1781_86103/ReadbasedAnalysis/1781_86103.json", + "file_size_bytes": 3385, + "type": "nmdc:DataObject" + }, + { + "_id": { + "$oid": "649b003f1ae706d7b5b16297" + }, + "id": "nmdc:0c7691992c142a735412ded115a1debd", + "name": "bins.tooShort.fa", + "description": "tooShort (< 3kb) filtered contigs fasta file by metaBat2 for gold:Gp0115672", + "file_size_bytes": 88459668, + "url": "https://data.microbiomedata.org/data/1781_86103/img_MAGs/bins.tooShort.fa", + "type": "nmdc:DataObject" + }, + { + "_id": { + "$oid": "649b003f1ae706d7b5b1629c" + }, + "id": "nmdc:521024b7b73f146c2b00dba84fb2d303", + "name": "bins.unbinned.fa", + "description": "unbinned fasta file from metabat2 for gold:Gp0115672", + "file_size_bytes": 24826673, + "url": "https://data.microbiomedata.org/data/1781_86103/img_MAGs/bins.unbinned.fa", + "type": "nmdc:DataObject" + }, + { + "_id": { + "$oid": "649b003f1ae706d7b5b1629d" + }, + "id": "nmdc:d6bee3893d2f63b687bc6078ce48dc2e", + "name": "gtdbtk.bac120.summary.tsv", + "description": "gtdbtk bacterial assignment result summary table for gold:Gp0115672", + "file_size_bytes": 815, + "url": "https://data.microbiomedata.org/data/1781_86103/img_MAGs/gtdbtk_output/classify/gtdbtk.bac120.summary.tsv", + "type": "nmdc:DataObject" + }, + { + "_id": { + "$oid": "649b003f1ae706d7b5b1629e" + }, + "id": "nmdc:82cfd339c167866e4b67cc4b12d7478d", + "name": "checkm_qa.out", + "description": "metabat2 bin checkm quality assessment result for gold:Gp0115672", + "file_size_bytes": 2394, + "url": "https://data.microbiomedata.org/data/1781_86103/img_MAGs/checkm_qa.out", + "type": "nmdc:DataObject", + "data_object_type": "CheckM Statistics" + }, + { + "_id": { + "$oid": "649b003f1ae706d7b5b1629f" + }, + "id": "nmdc:a939ca808857119835c8340b2a79d302", + "name": "gold:Gp0115672.bins.1.fa", + "description": "metabat2 binned contig file for gold:Gp0115672", + "file_size_bytes": 2103943, + "url": "https://data.microbiomedata.org/data/1781_86103/img_MAGs/metabat-bins/bins.1.fa", + "type": "nmdc:DataObject", + "data_object_type": "Metagenome Bins" + }, + { + "_id": { + "$oid": "649b003f1ae706d7b5b162a2" + }, + "id": "nmdc:e3e116dfc8712b6e35a071845657d1d1", + "name": "gold:Gp0115672.bins.5.fa", + "description": "metabat2 binned contig file for gold:Gp0115672", + "file_size_bytes": 374860, + "url": "https://data.microbiomedata.org/data/1781_86103/img_MAGs/metabat-bins/bins.5.fa", + "type": "nmdc:DataObject", + "data_object_type": "Metagenome Bins" + }, + { + "_id": { + "$oid": "649b003f1ae706d7b5b162a3" + }, + "id": "nmdc:e8a46f36b4956575ad78e022e604a89a", + "name": "gold:Gp0115672.bins.3.fa", + "description": "metabat2 binned contig file for gold:Gp0115672", + "file_size_bytes": 689749, + "url": "https://data.microbiomedata.org/data/1781_86103/img_MAGs/metabat-bins/bins.3.fa", + "type": "nmdc:DataObject", + "data_object_type": "Metagenome Bins" + }, + { + "_id": { + "$oid": "649b003f1ae706d7b5b162a4" + }, + "id": "nmdc:561b7c7429a778107b65ece41a39bbb8", + "name": "gold:Gp0115672.bins.9.fa", + "description": "metabat2 binned contig file for gold:Gp0115672", + "file_size_bytes": 313467, + "url": "https://data.microbiomedata.org/data/1781_86103/img_MAGs/metabat-bins/bins.9.fa", + "type": "nmdc:DataObject", + "data_object_type": "Metagenome Bins" + }, + { + "_id": { + "$oid": "649b003f1ae706d7b5b162a5" + }, + "id": "nmdc:1daea4e61ec3220e37e2c86742d9ba90", + "name": "gold:Gp0115672.bins.4.fa", + "description": "metabat2 binned contig file for gold:Gp0115672", + "file_size_bytes": 1599533, + "url": "https://data.microbiomedata.org/data/1781_86103/img_MAGs/metabat-bins/bins.4.fa", + "type": "nmdc:DataObject", + "data_object_type": "Metagenome Bins" + }, + { + "_id": { + "$oid": "649b003f1ae706d7b5b162a6" + }, + "id": "nmdc:45fa952f6821c80c16e77c526d6506c0", + "name": "gold:Gp0115672.bins.2.fa", + "description": "metabat2 binned contig file for gold:Gp0115672", + "file_size_bytes": 471200, + "url": "https://data.microbiomedata.org/data/1781_86103/img_MAGs/metabat-bins/bins.2.fa", + "type": "nmdc:DataObject", + "data_object_type": "Metagenome Bins" }, { - "name": "Gp0115672_Cath FunFam GFF file", - "description": "Cath FunFam GFF file for Gp0115672", - "url": "https://data.microbiomedata.org/data/nmdc:mga0cwhj53/annotation/nmdc_mga0cwhj53_cath_funfam.gff", - "md5_checksum": "5b0e8395559ef0d8a341ae0e132e60f6", - "id": "nmdc:5b0e8395559ef0d8a341ae0e132e60f6", - "file_size_bytes": 45632833 + "_id": { + "$oid": "649b003f1ae706d7b5b162a7" + }, + "id": "nmdc:48a3756c08657061dbbd1b3fbd92b52d", + "name": "gold:Gp0115672.bins.7.fa", + "description": "metabat2 binned contig file for gold:Gp0115672", + "file_size_bytes": 232940, + "url": "https://data.microbiomedata.org/data/1781_86103/img_MAGs/metabat-bins/bins.7.fa", + "type": "nmdc:DataObject", + "data_object_type": "Metagenome Bins" }, { - "name": "Gp0115672_KO_EC GFF file", - "description": "KO_EC GFF file for Gp0115672", - "url": "https://data.microbiomedata.org/data/nmdc:mga0cwhj53/annotation/nmdc_mga0cwhj53_ko_ec.gff", - "md5_checksum": "1e74c3df751a59a34e5c0d87f4a37563", - "id": "nmdc:1e74c3df751a59a34e5c0d87f4a37563", - "file_size_bytes": 33782864 + "_id": { + "$oid": "649b003f1ae706d7b5b162a8" + }, + "id": "nmdc:28fcb3eb2cf1d63d9623996a438b3cae", + "name": "gold:Gp0115672.bins.8.fa", + "description": "metabat2 binned contig file for gold:Gp0115672", + "file_size_bytes": 834579, + "url": "https://data.microbiomedata.org/data/1781_86103/img_MAGs/metabat-bins/bins.8.fa", + "type": "nmdc:DataObject", + "data_object_type": "Metagenome Bins" }, { - "name": "gold:Gp0452679_metabat2 bin checkm quality assessment result", - "description": "metabat2 bin checkm quality assessment result for gold:Gp0452679", - "data_object_type": "CheckM Statistics", - "url": "https://data.microbiomedata.org/data/nmdc:mga0fsjy84/MAGs/nmdc_mga0fsjy84_checkm_qa.out", - "md5_checksum": "d41d8cd98f00b204e9800998ecf8427e", - "id": "nmdc:d41d8cd98f00b204e9800998ecf8427e", - "file_size_bytes": 0 + "_id": { + "$oid": "649b003f1ae706d7b5b162a9" + }, + "id": "nmdc:97a872124142327afa9e896d56b3c263", + "name": "gold:Gp0115672.bins.6.fa", + "description": "metabat2 binned contig file for gold:Gp0115672", + "file_size_bytes": 943843, + "url": "https://data.microbiomedata.org/data/1781_86103/img_MAGs/metabat-bins/bins.6.fa", + "type": "nmdc:DataObject", + "data_object_type": "Metagenome Bins" }, { - "name": "Gp0115672_tooShort (< 3kb) filtered contigs fasta file by metaBat2", - "description": "tooShort (< 3kb) filtered contigs fasta file by metaBat2 for Gp0115672", - "url": "https://data.microbiomedata.org/data/nmdc:mga0cwhj53/MAGs/nmdc_mga0cwhj53_bins.tooShort.fa", - "md5_checksum": "2b6e0195e34697039eff38b51026be24", - "id": "nmdc:2b6e0195e34697039eff38b51026be24", - "file_size_bytes": 91055942 + "_id": { + "$oid": "649b003f1ae706d7b5b162aa" + }, + "id": "nmdc:20f94f48572c63758d65c10c19dc3a44", + "name": "gold:Gp0115672.bins.10.fa", + "description": "metabat2 binned contig file for gold:Gp0115672", + "file_size_bytes": 5938384, + "url": "https://data.microbiomedata.org/data/1781_86103/img_MAGs/metabat-bins/bins.10.fa", + "type": "nmdc:DataObject", + "data_object_type": "Metagenome Bins" }, { - "name": "Gp0115672_unbinned fasta file from metabat2", - "description": "unbinned fasta file from metabat2 for Gp0115672", - "url": "https://data.microbiomedata.org/data/nmdc:mga0cwhj53/MAGs/nmdc_mga0cwhj53_bins.unbinned.fa", - "md5_checksum": "f02d361fbef7549e2289bf4da623787d", - "id": "nmdc:f02d361fbef7549e2289bf4da623787d", - "file_size_bytes": 23202832 + "_id": { + "$oid": "649b00401ae706d7b5b16d9e" + }, + "description": "EC TSV File for gold:Gp0115672", + "url": "https://data.microbiomedata.org/1781_86103/img_annotation/Ga0482255_ec.tsv", + "md5_checksum": "e029f10a29dd5e9d81dce82c2211fdee", + "file_size_bytes": 3385, + "id": "nmdc:e029f10a29dd5e9d81dce82c2211fdee", + "name": "gold:Gp0115672_EC TSV File", + "data_object_type": "Annotation Enzyme Commission", + "type": "nmdc:DataObject" }, { - "name": "Gp0115672_metabat2 bin checkm quality assessment result", - "description": "metabat2 bin checkm quality assessment result for Gp0115672", - "data_object_type": "CheckM Statistics", - "url": "https://data.microbiomedata.org/data/nmdc:mga0cwhj53/MAGs/nmdc_mga0cwhj53_checkm_qa.out", - "md5_checksum": "2de282e5507477269238ead458f11ac0", - "id": "nmdc:2de282e5507477269238ead458f11ac0", - "file_size_bytes": 2040 + "_id": { + "$oid": "649b00401ae706d7b5b16d9f" + }, + "description": "KO TSV File for gold:Gp0115672", + "url": "https://data.microbiomedata.org/1781_86103/img_annotation/Ga0482255_ko.tsv", + "md5_checksum": "f6230d3d3eadab80074ecfe59a623c10", + "file_size_bytes": 3385, + "id": "nmdc:f6230d3d3eadab80074ecfe59a623c10", + "name": "gold:Gp0115672_KO TSV File", + "data_object_type": "Annotation KEGG Orthology", + "type": "nmdc:DataObject" }, { - "name": "Gp0115672_high-quality and medium-quality bins", - "description": "high-quality and medium-quality bins for Gp0115672", - "data_object_type": "Metagenome Bins", - "url": "https://data.microbiomedata.org/data/nmdc:mga0cwhj53/MAGs/nmdc_mga0cwhj53_hqmq_bin.zip", - "md5_checksum": "3abae1a573f9f0ac6da47e1ab9b9a723", - "id": "nmdc:3abae1a573f9f0ac6da47e1ab9b9a723", - "file_size_bytes": 1815861 + "_id": { + "$oid": "649b00401ae706d7b5b16da0" + }, + "description": "Functional annotation GFF file for gold:Gp0115672", + "url": "https://data.microbiomedata.org/1781_86103/img_annotation/Ga0482255_functional_annotation.gff", + "md5_checksum": "5c1afd4ffb1b1594807fbd0901da7a88", + "file_size_bytes": 3385, + "id": "nmdc:5c1afd4ffb1b1594807fbd0901da7a88", + "name": "gold:Gp0115672_Functional annotation GFF file", + "data_object_type": "Functional Annotation GFF", + "type": "nmdc:DataObject" }, { - "name": "Gp0115672_metabat2 bins", - "description": "metabat2 bins for Gp0115672", - "url": "https://data.microbiomedata.org/data/nmdc:mga0cwhj53/MAGs/nmdc_mga0cwhj53_metabat_bin.zip", - "md5_checksum": "4d315d8dac1d9605d110ff2298b10229", - "id": "nmdc:4d315d8dac1d9605d110ff2298b10229", - "file_size_bytes": 2757900 + "_id": { + "$oid": "649b00401ae706d7b5b16da1" + }, + "description": "Protein FAA for gold:Gp0115672", + "url": "https://data.microbiomedata.org/1781_86103/img_annotation/Ga0482255_proteins.faa", + "md5_checksum": "b0687d58e2803a41864c9d830977402b", + "file_size_bytes": 3385, + "id": "nmdc:b0687d58e2803a41864c9d830977402b", + "name": "gold:Gp0115672_Protein FAA", + "data_object_type": "Annotation Amino Acid FASTA", + "type": "nmdc:DataObject" + }, + { + "_id": { + "$oid": "649b00401ae706d7b5b16da2" + }, + "description": "Structural annotation GFF file for gold:Gp0115672", + "url": "https://data.microbiomedata.org/1781_86103/img_annotation/Ga0482255_structural_annotation.gff", + "md5_checksum": "644d67586f9337bf4d12ff5859d4cd54", + "file_size_bytes": 3385, + "id": "nmdc:644d67586f9337bf4d12ff5859d4cd54", + "name": "gold:Gp0115672_Structural annotation GFF file", + "data_object_type": "Structural Annotation GFF", + "type": "nmdc:DataObject" } ], "dissolving_activity_set": [], @@ -20170,6 +28073,212 @@ "md5_checksum": "d1cf2992bd60e25032eedeb09858d14b", "id": "nmdc:d1cf2992bd60e25032eedeb09858d14b", "file_size_bytes": 345388 + }, + { + "_id": { + "$oid": "649b003d1ae706d7b5b14e92" + }, + "description": "Assembled scaffold fasta for gold:Gp0127640", + "url": "https://data.microbiomedata.org/data/1781_100342/assembly/assembly_scaffolds.fna", + "file_size_bytes": 43496758, + "type": "nmdc:DataObject", + "id": "nmdc:aa1bb1c144d1bca4e8aeeb2c9d640d75", + "name": "assembly_scaffolds.fna", + "data_object_type": "Assembly Scaffolds" + }, + { + "_id": { + "$oid": "649b003d1ae706d7b5b14e93" + }, + "description": "Assembled AGP file for gold:Gp0127640", + "url": "https://data.microbiomedata.org/data/1781_100342/assembly/assembly.agp", + "file_size_bytes": 6929297, + "type": "nmdc:DataObject", + "id": "nmdc:c4688faca5539c65da5223b1468045be", + "name": "assembly.agp", + "data_object_type": "Assembly AGP" + }, + { + "_id": { + "$oid": "649b003d1ae706d7b5b14e94" + }, + "description": "Metagenome Contig Coverage Stats for gold:Gp0127640", + "url": "https://data.microbiomedata.org/data/1781_100342/assembly/mapping_stats.txt", + "file_size_bytes": 7938723, + "type": "nmdc:DataObject", + "id": "nmdc:82ea1e2021fb7f53d998452af137427c", + "name": "mapping_stats.txt", + "data_object_type": "Assembly Coverage Stats" + }, + { + "_id": { + "$oid": "649b003d1ae706d7b5b14e96" + }, + "description": "Assembled contigs fasta for gold:Gp0127640", + "url": "https://data.microbiomedata.org/data/1781_100342/assembly/assembly_contigs.fna", + "file_size_bytes": 43816991, + "type": "nmdc:DataObject", + "id": "nmdc:e2d5ce50f49731a49740d9f61f630550", + "name": "assembly_contigs.fna", + "data_object_type": "Assembly Contigs" + }, + { + "_id": { + "$oid": "649b003d1ae706d7b5b14e98" + }, + "description": "Metagenome Alignment BAM file for gold:Gp0127640", + "url": "https://data.microbiomedata.org/data/1781_100342/assembly/pairedMapped_sorted.bam", + "file_size_bytes": 2545520278, + "type": "nmdc:DataObject", + "id": "nmdc:68a7046814acf2ffe580fa8ce70e8a06", + "name": "pairedMapped_sorted.bam", + "data_object_type": "Assembly Coverage BAM" + }, + { + "_id": { + "$oid": "649b003d1ae706d7b5b15b7b" + }, + "id": "nmdc:252bb7818bcf5f8a50bf88d1fd0a297c", + "name": "1781_100342.krona.html", + "description": "Gold:Gp0127640 KRONA plot HTML file", + "url": "https://data.microbiomedata.org/1781_100342/ReadbasedAnalysis/centrifuge/1781_100342.krona.html", + "file_size_bytes": 3385, + "data_object_type": "Centrifuge Krona Plot", + "type": "nmdc:DataObject" + }, + { + "_id": { + "$oid": "649b003d1ae706d7b5b15b82" + }, + "id": "nmdc:0bf64f8fcce67bacdf9e484f8ea2268e", + "name": "1781_100342.json", + "description": "Gold:Gp0127640 ReadbasedAnalysis result JSON file", + "url": "https://data.microbiomedata.org/1781_100342/ReadbasedAnalysis/1781_100342.json", + "file_size_bytes": 3385, + "type": "nmdc:DataObject" + }, + { + "_id": { + "$oid": "649b003f1ae706d7b5b1660c" + }, + "id": "nmdc:e9110de20a054251e14eddda17e204a6", + "name": "bins.tooShort.fa", + "description": "tooShort (< 3kb) filtered contigs fasta file by metaBat2 for gold:Gp0127640", + "file_size_bytes": 39101595, + "url": "https://data.microbiomedata.org/data/1781_100342/img_MAGs/bins.tooShort.fa", + "type": "nmdc:DataObject" + }, + { + "_id": { + "$oid": "649b003f1ae706d7b5b1660d" + }, + "id": "nmdc:21e6cb23babaec38d6e8d431893c23a3", + "name": "bins.unbinned.fa", + "description": "unbinned fasta file from metabat2 for gold:Gp0127640", + "file_size_bytes": 3314124, + "url": "https://data.microbiomedata.org/data/1781_100342/img_MAGs/bins.unbinned.fa", + "type": "nmdc:DataObject" + }, + { + "_id": { + "$oid": "649b003f1ae706d7b5b1660e" + }, + "id": "nmdc:363ee3fe300a57198050ef502d613d92", + "name": "checkm_qa.out", + "description": "metabat2 bin checkm quality assessment result for gold:Gp0127640", + "file_size_bytes": 918, + "url": "https://data.microbiomedata.org/data/1781_100342/img_MAGs/checkm_qa.out", + "type": "nmdc:DataObject", + "data_object_type": "CheckM Statistics" + }, + { + "_id": { + "$oid": "649b003f1ae706d7b5b1660f" + }, + "id": "nmdc:59a345dc3bc08ee0f1837d41a276654f", + "name": "gold:Gp0127640.bins.2.fa", + "description": "metabat2 binned contig file for gold:Gp0127640", + "file_size_bytes": 275145, + "url": "https://data.microbiomedata.org/data/1781_100342/img_MAGs/metabat-bins/bins.2.fa", + "type": "nmdc:DataObject", + "data_object_type": "Metagenome Bins" + }, + { + "_id": { + "$oid": "649b003f1ae706d7b5b16626" + }, + "id": "nmdc:5356fae3a74ea20c0344e57c8ef11166", + "name": "gold:Gp0127640.bins.1.fa", + "description": "metabat2 binned contig file for gold:Gp0127640", + "file_size_bytes": 277293, + "url": "https://data.microbiomedata.org/data/1781_100342/img_MAGs/metabat-bins/bins.1.fa", + "type": "nmdc:DataObject", + "data_object_type": "Metagenome Bins" + }, + { + "_id": { + "$oid": "649b00401ae706d7b5b16d11" + }, + "description": "EC TSV File for gold:Gp0127640", + "url": "https://data.microbiomedata.org/1781_100342/img_annotation/Ga0482230_ec.tsv", + "md5_checksum": "e90b16891cff9bd5b0034cc6c89f8080", + "file_size_bytes": 3385, + "id": "nmdc:e90b16891cff9bd5b0034cc6c89f8080", + "name": "gold:Gp0127640_EC TSV File", + "data_object_type": "Annotation Enzyme Commission", + "type": "nmdc:DataObject" + }, + { + "_id": { + "$oid": "649b00401ae706d7b5b16d14" + }, + "description": "Functional annotation GFF file for gold:Gp0127640", + "url": "https://data.microbiomedata.org/1781_100342/img_annotation/Ga0482230_functional_annotation.gff", + "md5_checksum": "86b6734c5eb64c0cae6e95fa7f062123", + "file_size_bytes": 3385, + "id": "nmdc:86b6734c5eb64c0cae6e95fa7f062123", + "name": "gold:Gp0127640_Functional annotation GFF file", + "data_object_type": "Functional Annotation GFF", + "type": "nmdc:DataObject" + }, + { + "_id": { + "$oid": "649b00401ae706d7b5b16d15" + }, + "description": "KO TSV File for gold:Gp0127640", + "url": "https://data.microbiomedata.org/1781_100342/img_annotation/Ga0482230_ko.tsv", + "md5_checksum": "4950dc66d2b5a3c325454fb106d6b726", + "file_size_bytes": 3385, + "id": "nmdc:4950dc66d2b5a3c325454fb106d6b726", + "name": "gold:Gp0127640_KO TSV File", + "data_object_type": "Annotation KEGG Orthology", + "type": "nmdc:DataObject" + }, + { + "_id": { + "$oid": "649b00401ae706d7b5b16d1c" + }, + "description": "Protein FAA for gold:Gp0127640", + "url": "https://data.microbiomedata.org/1781_100342/img_annotation/Ga0482230_proteins.faa", + "md5_checksum": "1fbb7302a6ad581085d561e9fd3ed802", + "file_size_bytes": 3385, + "id": "nmdc:1fbb7302a6ad581085d561e9fd3ed802", + "name": "gold:Gp0127640_Protein FAA", + "data_object_type": "Annotation Amino Acid FASTA", + "type": "nmdc:DataObject" + }, + { + "_id": { + "$oid": "649b00401ae706d7b5b16d22" + }, + "description": "Structural annotation GFF file for gold:Gp0127640", + "url": "https://data.microbiomedata.org/1781_100342/img_annotation/Ga0482230_structural_annotation.gff", + "md5_checksum": "812cf8b77747ff65cfd237158535d310", + "file_size_bytes": 3385, + "id": "nmdc:812cf8b77747ff65cfd237158535d310", + "name": "gold:Gp0127640_Structural annotation GFF file", + "data_object_type": "Structural Annotation GFF", + "type": "nmdc:DataObject" } ], "dissolving_activity_set": [], @@ -20729,6 +28838,212 @@ "md5_checksum": "8f6b89831cabcd1dc7aa5e26d87f5063", "id": "nmdc:8f6b89831cabcd1dc7aa5e26d87f5063", "file_size_bytes": 625863 + }, + { + "_id": { + "$oid": "649b003d1ae706d7b5b14e95" + }, + "description": "Assembled contigs fasta for gold:Gp0127641", + "url": "https://data.microbiomedata.org/data/1781_100343/assembly/assembly_contigs.fna", + "file_size_bytes": 101616916, + "type": "nmdc:DataObject", + "id": "nmdc:a707d24e95ee536650d1cc70bbf997d8", + "name": "assembly_contigs.fna", + "data_object_type": "Assembly Contigs" + }, + { + "_id": { + "$oid": "649b003d1ae706d7b5b14e97" + }, + "description": "Metagenome Alignment BAM file for gold:Gp0127641", + "url": "https://data.microbiomedata.org/data/1781_100343/assembly/pairedMapped_sorted.bam", + "file_size_bytes": 1939284551, + "type": "nmdc:DataObject", + "id": "nmdc:af117c2397f282c3f1d319c499d72b01", + "name": "pairedMapped_sorted.bam", + "data_object_type": "Assembly Coverage BAM" + }, + { + "_id": { + "$oid": "649b003d1ae706d7b5b14e99" + }, + "description": "Assembled AGP file for gold:Gp0127641", + "url": "https://data.microbiomedata.org/data/1781_100343/assembly/assembly.agp", + "file_size_bytes": 12669908, + "type": "nmdc:DataObject", + "id": "nmdc:662fa061e9042db360dd7981f6068505", + "name": "assembly.agp", + "data_object_type": "Assembly AGP" + }, + { + "_id": { + "$oid": "649b003d1ae706d7b5b14e9a" + }, + "description": "Assembled scaffold fasta for gold:Gp0127641", + "url": "https://data.microbiomedata.org/data/1781_100343/assembly/assembly_scaffolds.fna", + "file_size_bytes": 101039761, + "type": "nmdc:DataObject", + "id": "nmdc:3c5870bf66d9acb165352c67638b29c8", + "name": "assembly_scaffolds.fna", + "data_object_type": "Assembly Scaffolds" + }, + { + "_id": { + "$oid": "649b003d1ae706d7b5b14e9e" + }, + "description": "Metagenome Contig Coverage Stats for gold:Gp0127641", + "url": "https://data.microbiomedata.org/data/1781_100343/assembly/mapping_stats.txt", + "file_size_bytes": 14436822, + "type": "nmdc:DataObject", + "id": "nmdc:bb3f818e2f6299570c76a7ea96fcf7e4", + "name": "mapping_stats.txt", + "data_object_type": "Assembly Coverage Stats" + }, + { + "_id": { + "$oid": "649b003d1ae706d7b5b15b84" + }, + "id": "nmdc:f6cd4b98b207dc9f70dcfa063d4afb92", + "name": "1781_100343.krona.html", + "description": "Gold:Gp0127641 KRONA plot HTML file", + "url": "https://data.microbiomedata.org/1781_100343/ReadbasedAnalysis/centrifuge/1781_100343.krona.html", + "file_size_bytes": 3385, + "data_object_type": "Centrifuge Krona Plot", + "type": "nmdc:DataObject" + }, + { + "_id": { + "$oid": "649b003d1ae706d7b5b15b8a" + }, + "id": "nmdc:48c8c09803af12f6092d895de5a1eff9", + "name": "1781_100343.json", + "description": "Gold:Gp0127641 ReadbasedAnalysis result JSON file", + "url": "https://data.microbiomedata.org/1781_100343/ReadbasedAnalysis/1781_100343.json", + "file_size_bytes": 3385, + "type": "nmdc:DataObject" + }, + { + "_id": { + "$oid": "649b003f1ae706d7b5b16610" + }, + "id": "nmdc:ede01d68f85f6183407fe751475b2350", + "name": "bins.tooShort.fa", + "description": "tooShort (< 3kb) filtered contigs fasta file by metaBat2 for gold:Gp0127641", + "file_size_bytes": 78611268, + "url": "https://data.microbiomedata.org/data/1781_100343/img_MAGs/bins.tooShort.fa", + "type": "nmdc:DataObject" + }, + { + "_id": { + "$oid": "649b003f1ae706d7b5b16611" + }, + "id": "nmdc:af8a167fb92c9470eaa77ed8617d454d", + "name": "checkm_qa.out", + "description": "metabat2 bin checkm quality assessment result for gold:Gp0127641", + "file_size_bytes": 930, + "url": "https://data.microbiomedata.org/data/1781_100343/img_MAGs/checkm_qa.out", + "type": "nmdc:DataObject", + "data_object_type": "CheckM Statistics" + }, + { + "_id": { + "$oid": "649b003f1ae706d7b5b16614" + }, + "id": "nmdc:55b42e3f671bfeab937473ef45b55b4b", + "name": "gold:Gp0127641.bins.2.fa", + "description": "metabat2 binned contig file for gold:Gp0127641", + "file_size_bytes": 434241, + "url": "https://data.microbiomedata.org/data/1781_100343/img_MAGs/metabat-bins/bins.2.fa", + "type": "nmdc:DataObject", + "data_object_type": "Metagenome Bins" + }, + { + "_id": { + "$oid": "649b003f1ae706d7b5b1661a" + }, + "id": "nmdc:9e790d1073c174456b4d98661bf92d81", + "name": "gold:Gp0127641.bins.1.fa", + "description": "metabat2 binned contig file for gold:Gp0127641", + "file_size_bytes": 740186, + "url": "https://data.microbiomedata.org/data/1781_100343/img_MAGs/metabat-bins/bins.1.fa", + "type": "nmdc:DataObject", + "data_object_type": "Metagenome Bins" + }, + { + "_id": { + "$oid": "649b003f1ae706d7b5b16628" + }, + "id": "nmdc:5d6128d308651aad814210c9a3a28f3b", + "name": "bins.unbinned.fa", + "description": "unbinned fasta file from metabat2 for gold:Gp0127641", + "file_size_bytes": 20215397, + "url": "https://data.microbiomedata.org/data/1781_100343/img_MAGs/bins.unbinned.fa", + "type": "nmdc:DataObject" + }, + { + "_id": { + "$oid": "649b00401ae706d7b5b16d17" + }, + "description": "Structural annotation GFF file for gold:Gp0127641", + "url": "https://data.microbiomedata.org/1781_100343/img_annotation/Ga0482229_structural_annotation.gff", + "md5_checksum": "a33ac2dc640b7088767a99517f22421f", + "file_size_bytes": 3385, + "id": "nmdc:a33ac2dc640b7088767a99517f22421f", + "name": "gold:Gp0127641_Structural annotation GFF file", + "data_object_type": "Structural Annotation GFF", + "type": "nmdc:DataObject" + }, + { + "_id": { + "$oid": "649b00401ae706d7b5b16d18" + }, + "description": "KO TSV File for gold:Gp0127641", + "url": "https://data.microbiomedata.org/1781_100343/img_annotation/Ga0482229_ko.tsv", + "md5_checksum": "4ec0cbf7d166057c3d2904b2dd2f6b15", + "file_size_bytes": 3385, + "id": "nmdc:4ec0cbf7d166057c3d2904b2dd2f6b15", + "name": "gold:Gp0127641_KO TSV File", + "data_object_type": "Annotation KEGG Orthology", + "type": "nmdc:DataObject" + }, + { + "_id": { + "$oid": "649b00401ae706d7b5b16d2d" + }, + "description": "Protein FAA for gold:Gp0127641", + "url": "https://data.microbiomedata.org/1781_100343/img_annotation/Ga0482229_proteins.faa", + "md5_checksum": "d10b0c9b0d5e646d09c570eb2e08b793", + "file_size_bytes": 3385, + "id": "nmdc:d10b0c9b0d5e646d09c570eb2e08b793", + "name": "gold:Gp0127641_Protein FAA", + "data_object_type": "Annotation Amino Acid FASTA", + "type": "nmdc:DataObject" + }, + { + "_id": { + "$oid": "649b00401ae706d7b5b16d31" + }, + "description": "EC TSV File for gold:Gp0127641", + "url": "https://data.microbiomedata.org/1781_100343/img_annotation/Ga0482229_ec.tsv", + "md5_checksum": "71306193abf043865cafa413b3ca9c1e", + "file_size_bytes": 3385, + "id": "nmdc:71306193abf043865cafa413b3ca9c1e", + "name": "gold:Gp0127641_EC TSV File", + "data_object_type": "Annotation Enzyme Commission", + "type": "nmdc:DataObject" + }, + { + "_id": { + "$oid": "649b00401ae706d7b5b16d33" + }, + "description": "Functional annotation GFF file for gold:Gp0127641", + "url": "https://data.microbiomedata.org/1781_100343/img_annotation/Ga0482229_functional_annotation.gff", + "md5_checksum": "11d4524c896f4fd678ff05a0547b6b52", + "file_size_bytes": 3385, + "id": "nmdc:11d4524c896f4fd678ff05a0547b6b52", + "name": "gold:Gp0127641_Functional annotation GFF file", + "data_object_type": "Functional Annotation GFF", + "type": "nmdc:DataObject" } ], "dissolving_activity_set": [], @@ -21307,6 +29622,212 @@ "md5_checksum": "79de6d81848956e1c06a811bc9bdab81", "id": "nmdc:79de6d81848956e1c06a811bc9bdab81", "file_size_bytes": 614113 + }, + { + "_id": { + "$oid": "649b003d1ae706d7b5b14ea0" + }, + "description": "Assembled AGP file for gold:Gp0127643", + "url": "https://data.microbiomedata.org/data/1781_100345/assembly/assembly.agp", + "file_size_bytes": 13820270, + "type": "nmdc:DataObject", + "id": "nmdc:b0ff6dcafcb9bed83c5290e6f974dbf0", + "name": "assembly.agp", + "data_object_type": "Assembly AGP" + }, + { + "_id": { + "$oid": "649b003d1ae706d7b5b14ea1" + }, + "description": "Metagenome Alignment BAM file for gold:Gp0127643", + "url": "https://data.microbiomedata.org/data/1781_100345/assembly/pairedMapped_sorted.bam", + "file_size_bytes": 2057080151, + "type": "nmdc:DataObject", + "id": "nmdc:372822daf5aee3e4a9b1f8e621dbd3f5", + "name": "pairedMapped_sorted.bam", + "data_object_type": "Assembly Coverage BAM" + }, + { + "_id": { + "$oid": "649b003d1ae706d7b5b14ea3" + }, + "description": "Assembled scaffold fasta for gold:Gp0127643", + "url": "https://data.microbiomedata.org/data/1781_100345/assembly/assembly_scaffolds.fna", + "file_size_bytes": 111307907, + "type": "nmdc:DataObject", + "id": "nmdc:7b778a5f68bdd7a7deeb51a98df7ac3d", + "name": "assembly_scaffolds.fna", + "data_object_type": "Assembly Scaffolds" + }, + { + "_id": { + "$oid": "649b003d1ae706d7b5b14ea4" + }, + "description": "Assembled contigs fasta for gold:Gp0127643", + "url": "https://data.microbiomedata.org/data/1781_100345/assembly/assembly_contigs.fna", + "file_size_bytes": 111937017, + "type": "nmdc:DataObject", + "id": "nmdc:e087926bf099d6b56eaa8ed38dc9587c", + "name": "assembly_contigs.fna", + "data_object_type": "Assembly Contigs" + }, + { + "_id": { + "$oid": "649b003d1ae706d7b5b14ea5" + }, + "description": "Metagenome Contig Coverage Stats for gold:Gp0127643", + "url": "https://data.microbiomedata.org/data/1781_100345/assembly/mapping_stats.txt", + "file_size_bytes": 15727329, + "type": "nmdc:DataObject", + "id": "nmdc:3e82935d61f88ddbd5c4d0be5f3a4974", + "name": "mapping_stats.txt", + "data_object_type": "Assembly Coverage Stats" + }, + { + "_id": { + "$oid": "649b003d1ae706d7b5b15b98" + }, + "id": "nmdc:f562a2cbd61dd314aa652b5a7962a453", + "name": "1781_100345.krona.html", + "description": "Gold:Gp0127643 KRONA plot HTML file", + "url": "https://data.microbiomedata.org/1781_100345/ReadbasedAnalysis/centrifuge/1781_100345.krona.html", + "file_size_bytes": 3385, + "data_object_type": "Centrifuge Krona Plot", + "type": "nmdc:DataObject" + }, + { + "_id": { + "$oid": "649b003d1ae706d7b5b15b9a" + }, + "id": "nmdc:60405949438243714571490c6faab9f5", + "name": "1781_100345.json", + "description": "Gold:Gp0127643 ReadbasedAnalysis result JSON file", + "url": "https://data.microbiomedata.org/1781_100345/ReadbasedAnalysis/1781_100345.json", + "file_size_bytes": 3385, + "type": "nmdc:DataObject" + }, + { + "_id": { + "$oid": "649b003f1ae706d7b5b16617" + }, + "id": "nmdc:843af81eb17f23a12d17e72a36922a7a", + "name": "bins.unbinned.fa", + "description": "unbinned fasta file from metabat2 for gold:Gp0127643", + "file_size_bytes": 22959128, + "url": "https://data.microbiomedata.org/data/1781_100345/img_MAGs/bins.unbinned.fa", + "type": "nmdc:DataObject" + }, + { + "_id": { + "$oid": "649b003f1ae706d7b5b1661b" + }, + "id": "nmdc:00606078cd171aa99d50f89abea30559", + "name": "gold:Gp0127643.bins.2.fa", + "description": "metabat2 binned contig file for gold:Gp0127643", + "file_size_bytes": 232512, + "url": "https://data.microbiomedata.org/data/1781_100345/img_MAGs/metabat-bins/bins.2.fa", + "type": "nmdc:DataObject", + "data_object_type": "Metagenome Bins" + }, + { + "_id": { + "$oid": "649b003f1ae706d7b5b1661c" + }, + "id": "nmdc:ea805619c536992228a7e6ad5e3ee57a", + "name": "checkm_qa.out", + "description": "metabat2 bin checkm quality assessment result for gold:Gp0127643", + "file_size_bytes": 930, + "url": "https://data.microbiomedata.org/data/1781_100345/img_MAGs/checkm_qa.out", + "type": "nmdc:DataObject", + "data_object_type": "CheckM Statistics" + }, + { + "_id": { + "$oid": "649b003f1ae706d7b5b1661d" + }, + "id": "nmdc:4c1506844b7b4f668c62e266dc7180da", + "name": "gold:Gp0127643.bins.1.fa", + "description": "metabat2 binned contig file for gold:Gp0127643", + "file_size_bytes": 1495841, + "url": "https://data.microbiomedata.org/data/1781_100345/img_MAGs/metabat-bins/bins.1.fa", + "type": "nmdc:DataObject", + "data_object_type": "Metagenome Bins" + }, + { + "_id": { + "$oid": "649b003f1ae706d7b5b1662a" + }, + "id": "nmdc:7c45113f19fcf47e76d2408c9e4aa2af", + "name": "bins.tooShort.fa", + "description": "tooShort (< 3kb) filtered contigs fasta file by metaBat2 for gold:Gp0127643", + "file_size_bytes": 85480222, + "url": "https://data.microbiomedata.org/data/1781_100345/img_MAGs/bins.tooShort.fa", + "type": "nmdc:DataObject" + }, + { + "_id": { + "$oid": "649b00401ae706d7b5b16d16" + }, + "description": "KO TSV File for gold:Gp0127643", + "url": "https://data.microbiomedata.org/1781_100345/img_annotation/Ga0482227_ko.tsv", + "md5_checksum": "b7b422e726f82668cd9c2ea9f0786f41", + "file_size_bytes": 3385, + "id": "nmdc:b7b422e726f82668cd9c2ea9f0786f41", + "name": "gold:Gp0127643_KO TSV File", + "data_object_type": "Annotation KEGG Orthology", + "type": "nmdc:DataObject" + }, + { + "_id": { + "$oid": "649b00401ae706d7b5b16d19" + }, + "description": "Functional annotation GFF file for gold:Gp0127643", + "url": "https://data.microbiomedata.org/1781_100345/img_annotation/Ga0482227_functional_annotation.gff", + "md5_checksum": "f8df0729f51da70739b75a2458e32020", + "file_size_bytes": 3385, + "id": "nmdc:f8df0729f51da70739b75a2458e32020", + "name": "gold:Gp0127643_Functional annotation GFF file", + "data_object_type": "Functional Annotation GFF", + "type": "nmdc:DataObject" + }, + { + "_id": { + "$oid": "649b00401ae706d7b5b16d1a" + }, + "description": "Protein FAA for gold:Gp0127643", + "url": "https://data.microbiomedata.org/1781_100345/img_annotation/Ga0482227_proteins.faa", + "md5_checksum": "7434bd60874fc6d05530ee0652a9e18f", + "file_size_bytes": 3385, + "id": "nmdc:7434bd60874fc6d05530ee0652a9e18f", + "name": "gold:Gp0127643_Protein FAA", + "data_object_type": "Annotation Amino Acid FASTA", + "type": "nmdc:DataObject" + }, + { + "_id": { + "$oid": "649b00401ae706d7b5b16d1b" + }, + "description": "Structural annotation GFF file for gold:Gp0127643", + "url": "https://data.microbiomedata.org/1781_100345/img_annotation/Ga0482227_structural_annotation.gff", + "md5_checksum": "d897fea88896a93843966962f6bbb7be", + "file_size_bytes": 3385, + "id": "nmdc:d897fea88896a93843966962f6bbb7be", + "name": "gold:Gp0127643_Structural annotation GFF file", + "data_object_type": "Structural Annotation GFF", + "type": "nmdc:DataObject" + }, + { + "_id": { + "$oid": "649b00401ae706d7b5b16d50" + }, + "description": "EC TSV File for gold:Gp0127643", + "url": "https://data.microbiomedata.org/1781_100345/img_annotation/Ga0482227_ec.tsv", + "md5_checksum": "0b7fc1ad662f267eaa604075f9968b7c", + "file_size_bytes": 3385, + "id": "nmdc:0b7fc1ad662f267eaa604075f9968b7c", + "name": "gold:Gp0127643_EC TSV File", + "data_object_type": "Annotation Enzyme Commission", + "type": "nmdc:DataObject" } ], "dissolving_activity_set": [], @@ -21868,6 +30389,212 @@ "md5_checksum": "9d712c5924d6d0ee6d7305918e69302d", "id": "nmdc:9d712c5924d6d0ee6d7305918e69302d", "file_size_bytes": 218004 + }, + { + "_id": { + "$oid": "649b003d1ae706d7b5b14ea2" + }, + "description": "Metagenome Contig Coverage Stats for gold:Gp0127644", + "url": "https://data.microbiomedata.org/data/1781_100346/assembly/mapping_stats.txt", + "file_size_bytes": 3427545, + "type": "nmdc:DataObject", + "id": "nmdc:8a13cc4cdcd17eff35bdd65c4ffba887", + "name": "mapping_stats.txt", + "data_object_type": "Assembly Coverage Stats" + }, + { + "_id": { + "$oid": "649b003d1ae706d7b5b14ea7" + }, + "description": "Assembled contigs fasta for gold:Gp0127644", + "url": "https://data.microbiomedata.org/data/1781_100346/assembly/assembly_contigs.fna", + "file_size_bytes": 21697071, + "type": "nmdc:DataObject", + "id": "nmdc:f40e4315c5285ac27f850a924b9f0d19", + "name": "assembly_contigs.fna", + "data_object_type": "Assembly Contigs" + }, + { + "_id": { + "$oid": "649b003d1ae706d7b5b14ebe" + }, + "description": "Metagenome Alignment BAM file for gold:Gp0127644", + "url": "https://data.microbiomedata.org/data/1781_100346/assembly/pairedMapped_sorted.bam", + "file_size_bytes": 737012011, + "type": "nmdc:DataObject", + "id": "nmdc:0d039fa249c3d84d8f41ba5302cdbf44", + "name": "pairedMapped_sorted.bam", + "data_object_type": "Assembly Coverage BAM" + }, + { + "_id": { + "$oid": "649b003d1ae706d7b5b14ec1" + }, + "description": "Assembled AGP file for gold:Gp0127644", + "url": "https://data.microbiomedata.org/data/1781_100346/assembly/assembly.agp", + "file_size_bytes": 2981406, + "type": "nmdc:DataObject", + "id": "nmdc:0cd8988c1aa59aed46dc245a4fc85fae", + "name": "assembly.agp", + "data_object_type": "Assembly AGP" + }, + { + "_id": { + "$oid": "649b003d1ae706d7b5b14ec2" + }, + "description": "Assembled scaffold fasta for gold:Gp0127644", + "url": "https://data.microbiomedata.org/data/1781_100346/assembly/assembly_scaffolds.fna", + "file_size_bytes": 21558498, + "type": "nmdc:DataObject", + "id": "nmdc:72856cd0c04a3d82033e4eeb78036c79", + "name": "assembly_scaffolds.fna", + "data_object_type": "Assembly Scaffolds" + }, + { + "_id": { + "$oid": "649b003d1ae706d7b5b15ba0" + }, + "id": "nmdc:ad532c16f1d8772ef78f2b4977e13fbd", + "name": "1781_100346.krona.html", + "description": "Gold:Gp0127644 KRONA plot HTML file", + "url": "https://data.microbiomedata.org/1781_100346/ReadbasedAnalysis/centrifuge/1781_100346.krona.html", + "file_size_bytes": 3385, + "data_object_type": "Centrifuge Krona Plot", + "type": "nmdc:DataObject" + }, + { + "_id": { + "$oid": "649b003d1ae706d7b5b15ba4" + }, + "id": "nmdc:92cdb6d9a145d9ae65275474604499cc", + "name": "1781_100346.json", + "description": "Gold:Gp0127644 ReadbasedAnalysis result JSON file", + "url": "https://data.microbiomedata.org/1781_100346/ReadbasedAnalysis/1781_100346.json", + "file_size_bytes": 3385, + "type": "nmdc:DataObject" + }, + { + "_id": { + "$oid": "649b003f1ae706d7b5b1661e" + }, + "id": "nmdc:db639c1a9c06584736a3a8551fd080c4", + "name": "bins.unbinned.fa", + "description": "unbinned fasta file from metabat2 for gold:Gp0127644", + "file_size_bytes": 2937035, + "url": "https://data.microbiomedata.org/data/1781_100346/img_MAGs/bins.unbinned.fa", + "type": "nmdc:DataObject" + }, + { + "_id": { + "$oid": "649b003f1ae706d7b5b1661f" + }, + "id": "nmdc:fa322b3ff5e9a665ddc2a40878a19292", + "name": "bins.tooShort.fa", + "description": "tooShort (< 3kb) filtered contigs fasta file by metaBat2 for gold:Gp0127644", + "file_size_bytes": 17744229, + "url": "https://data.microbiomedata.org/data/1781_100346/img_MAGs/bins.tooShort.fa", + "type": "nmdc:DataObject" + }, + { + "_id": { + "$oid": "649b003f1ae706d7b5b16620" + }, + "id": "nmdc:5506404345e9af51ae1ef526737952eb", + "name": "gold:Gp0127644.bins.2.fa", + "description": "metabat2 binned contig file for gold:Gp0127644", + "file_size_bytes": 291225, + "url": "https://data.microbiomedata.org/data/1781_100346/img_MAGs/metabat-bins/bins.2.fa", + "type": "nmdc:DataObject", + "data_object_type": "Metagenome Bins" + }, + { + "_id": { + "$oid": "649b003f1ae706d7b5b16621" + }, + "id": "nmdc:0d447336e66ce46fe603146e03f77994", + "name": "checkm_qa.out", + "description": "metabat2 bin checkm quality assessment result for gold:Gp0127644", + "file_size_bytes": 918, + "url": "https://data.microbiomedata.org/data/1781_100346/img_MAGs/checkm_qa.out", + "type": "nmdc:DataObject", + "data_object_type": "CheckM Statistics" + }, + { + "_id": { + "$oid": "649b003f1ae706d7b5b16623" + }, + "id": "nmdc:e199232899faf46559a302f30bd9e0c8", + "name": "gold:Gp0127644.bins.1.fa", + "description": "metabat2 binned contig file for gold:Gp0127644", + "file_size_bytes": 320491, + "url": "https://data.microbiomedata.org/data/1781_100346/img_MAGs/metabat-bins/bins.1.fa", + "type": "nmdc:DataObject", + "data_object_type": "Metagenome Bins" + }, + { + "_id": { + "$oid": "649b00401ae706d7b5b16d1d" + }, + "description": "Functional annotation GFF file for gold:Gp0127644", + "url": "https://data.microbiomedata.org/1781_100346/img_annotation/Ga0482226_functional_annotation.gff", + "md5_checksum": "0626957517790befa95e8fefad58be0c", + "file_size_bytes": 3385, + "id": "nmdc:0626957517790befa95e8fefad58be0c", + "name": "gold:Gp0127644_Functional annotation GFF file", + "data_object_type": "Functional Annotation GFF", + "type": "nmdc:DataObject" + }, + { + "_id": { + "$oid": "649b00401ae706d7b5b16d1e" + }, + "description": "Protein FAA for gold:Gp0127644", + "url": "https://data.microbiomedata.org/1781_100346/img_annotation/Ga0482226_proteins.faa", + "md5_checksum": "2c7d55cbee1f35793da90275740d3651", + "file_size_bytes": 3385, + "id": "nmdc:2c7d55cbee1f35793da90275740d3651", + "name": "gold:Gp0127644_Protein FAA", + "data_object_type": "Annotation Amino Acid FASTA", + "type": "nmdc:DataObject" + }, + { + "_id": { + "$oid": "649b00401ae706d7b5b16d1f" + }, + "description": "Structural annotation GFF file for gold:Gp0127644", + "url": "https://data.microbiomedata.org/1781_100346/img_annotation/Ga0482226_structural_annotation.gff", + "md5_checksum": "0973e6d47848f6677ced2a8d463670fa", + "file_size_bytes": 3385, + "id": "nmdc:0973e6d47848f6677ced2a8d463670fa", + "name": "gold:Gp0127644_Structural annotation GFF file", + "data_object_type": "Structural Annotation GFF", + "type": "nmdc:DataObject" + }, + { + "_id": { + "$oid": "649b00401ae706d7b5b16d20" + }, + "description": "EC TSV File for gold:Gp0127644", + "url": "https://data.microbiomedata.org/1781_100346/img_annotation/Ga0482226_ec.tsv", + "md5_checksum": "03c32c8ae757623520f6211ff641c40a", + "file_size_bytes": 3385, + "id": "nmdc:03c32c8ae757623520f6211ff641c40a", + "name": "gold:Gp0127644_EC TSV File", + "data_object_type": "Annotation Enzyme Commission", + "type": "nmdc:DataObject" + }, + { + "_id": { + "$oid": "649b00401ae706d7b5b16d21" + }, + "description": "KO TSV File for gold:Gp0127644", + "url": "https://data.microbiomedata.org/1781_100346/img_annotation/Ga0482226_ko.tsv", + "md5_checksum": "3eced892c4712a2b13e805a978ec0819", + "file_size_bytes": 3385, + "id": "nmdc:3eced892c4712a2b13e805a978ec0819", + "name": "gold:Gp0127644_KO TSV File", + "data_object_type": "Annotation KEGG Orthology", + "type": "nmdc:DataObject" } ], "dissolving_activity_set": [], @@ -22412,21 +31139,239 @@ "file_size_bytes": 1570 }, { - "name": "Gp0127639_high-quality and medium-quality bins", - "description": "high-quality and medium-quality bins for Gp0127639", - "data_object_type": "Metagenome Bins", - "url": "https://data.microbiomedata.org/data/nmdc:mga09wpw60/MAGs/nmdc_mga09wpw60_hqmq_bin.zip", - "md5_checksum": "7072cfd6665082a95b2c09a4bc88760c", - "id": "nmdc:7072cfd6665082a95b2c09a4bc88760c", - "file_size_bytes": 182 + "name": "Gp0127639_high-quality and medium-quality bins", + "description": "high-quality and medium-quality bins for Gp0127639", + "data_object_type": "Metagenome Bins", + "url": "https://data.microbiomedata.org/data/nmdc:mga09wpw60/MAGs/nmdc_mga09wpw60_hqmq_bin.zip", + "md5_checksum": "7072cfd6665082a95b2c09a4bc88760c", + "id": "nmdc:7072cfd6665082a95b2c09a4bc88760c", + "file_size_bytes": 182 + }, + { + "name": "Gp0127639_metabat2 bins", + "description": "metabat2 bins for Gp0127639", + "url": "https://data.microbiomedata.org/data/nmdc:mga09wpw60/MAGs/nmdc_mga09wpw60_metabat_bin.zip", + "md5_checksum": "b0db190d9d1093ef87a5efb8a600e9ef", + "id": "nmdc:b0db190d9d1093ef87a5efb8a600e9ef", + "file_size_bytes": 1000457 + }, + { + "_id": { + "$oid": "649b003d1ae706d7b5b14e8b" + }, + "description": "Assembled scaffold fasta for gold:Gp0127639", + "url": "https://data.microbiomedata.org/data/1781_100341/assembly/assembly_scaffolds.fna", + "file_size_bytes": 119007591, + "type": "nmdc:DataObject", + "id": "nmdc:3200c62a99e8ddd0fd6403d6dfe5fc5d", + "name": "assembly_scaffolds.fna", + "data_object_type": "Assembly Scaffolds" + }, + { + "_id": { + "$oid": "649b003d1ae706d7b5b14e8c" + }, + "description": "Metagenome Contig Coverage Stats for gold:Gp0127639", + "url": "https://data.microbiomedata.org/data/1781_100341/assembly/mapping_stats.txt", + "file_size_bytes": 16022429, + "type": "nmdc:DataObject", + "id": "nmdc:b2bff56e405eaffed2b0a3d7d6000b37", + "name": "mapping_stats.txt", + "data_object_type": "Assembly Coverage Stats" + }, + { + "_id": { + "$oid": "649b003d1ae706d7b5b14e8f" + }, + "description": "Assembled contigs fasta for gold:Gp0127639", + "url": "https://data.microbiomedata.org/data/1781_100341/assembly/assembly_contigs.fna", + "file_size_bytes": 119647240, + "type": "nmdc:DataObject", + "id": "nmdc:1d1610f39b4543fe7a0ecde2b1d8d710", + "name": "assembly_contigs.fna", + "data_object_type": "Assembly Contigs" + }, + { + "_id": { + "$oid": "649b003d1ae706d7b5b14e90" + }, + "description": "Assembled AGP file for gold:Gp0127639", + "url": "https://data.microbiomedata.org/data/1781_100341/assembly/assembly.agp", + "file_size_bytes": 14066973, + "type": "nmdc:DataObject", + "id": "nmdc:71da65a514fef7d1e2b3cf2a8dbcba74", + "name": "assembly.agp", + "data_object_type": "Assembly AGP" + }, + { + "_id": { + "$oid": "649b003d1ae706d7b5b14e91" + }, + "description": "Metagenome Alignment BAM file for gold:Gp0127639", + "url": "https://data.microbiomedata.org/data/1781_100341/assembly/pairedMapped_sorted.bam", + "file_size_bytes": 1755614129, + "type": "nmdc:DataObject", + "id": "nmdc:164f413fa91ee1433e3f441649315c61", + "name": "pairedMapped_sorted.bam", + "data_object_type": "Assembly Coverage BAM" + }, + { + "_id": { + "$oid": "649b003d1ae706d7b5b15b74" + }, + "id": "nmdc:1bf82e8b1c00260947b645449b0bedcb", + "name": "1781_100341.krona.html", + "description": "Gold:Gp0127639 KRONA plot HTML file", + "url": "https://data.microbiomedata.org/1781_100341/ReadbasedAnalysis/centrifuge/1781_100341.krona.html", + "file_size_bytes": 3385, + "data_object_type": "Centrifuge Krona Plot", + "type": "nmdc:DataObject" + }, + { + "_id": { + "$oid": "649b003d1ae706d7b5b15b7a" + }, + "id": "nmdc:813f4c0b656c2812a7db73fc0df92f23", + "name": "1781_100341.json", + "description": "Gold:Gp0127639 ReadbasedAnalysis result JSON file", + "url": "https://data.microbiomedata.org/1781_100341/ReadbasedAnalysis/1781_100341.json", + "file_size_bytes": 3385, + "type": "nmdc:DataObject" + }, + { + "_id": { + "$oid": "649b003f1ae706d7b5b16608" + }, + "id": "nmdc:bd6adf1661bff8cdbad6416f39136291", + "name": "checkm_qa.out", + "description": "metabat2 bin checkm quality assessment result for gold:Gp0127639", + "file_size_bytes": 1092, + "url": "https://data.microbiomedata.org/data/1781_100341/img_MAGs/checkm_qa.out", + "type": "nmdc:DataObject", + "data_object_type": "CheckM Statistics" + }, + { + "_id": { + "$oid": "649b003f1ae706d7b5b1660a" + }, + "id": "nmdc:03f2b08b396c5b273845ff6fb1a7a2d2", + "name": "bins.tooShort.fa", + "description": "tooShort (< 3kb) filtered contigs fasta file by metaBat2 for gold:Gp0127639", + "file_size_bytes": 87721724, + "url": "https://data.microbiomedata.org/data/1781_100341/img_MAGs/bins.tooShort.fa", + "type": "nmdc:DataObject" + }, + { + "_id": { + "$oid": "649b003f1ae706d7b5b1660b" + }, + "id": "nmdc:a57183db98a4cd0611a8587010c37d52", + "name": "bins.unbinned.fa", + "description": "unbinned fasta file from metabat2 for gold:Gp0127639", + "file_size_bytes": 29133879, + "url": "https://data.microbiomedata.org/data/1781_100341/img_MAGs/bins.unbinned.fa", + "type": "nmdc:DataObject" + }, + { + "_id": { + "$oid": "649b003f1ae706d7b5b16612" + }, + "id": "nmdc:168e30f70b1513401a606fa75bdabf50", + "name": "gold:Gp0127639.bins.2.fa", + "description": "metabat2 binned contig file for gold:Gp0127639", + "file_size_bytes": 235784, + "url": "https://data.microbiomedata.org/data/1781_100341/img_MAGs/metabat-bins/bins.2.fa", + "type": "nmdc:DataObject", + "data_object_type": "Metagenome Bins" + }, + { + "_id": { + "$oid": "649b003f1ae706d7b5b16627" + }, + "id": "nmdc:390577e22b09a5c74de14b9e3a9a6b19", + "name": "gold:Gp0127639.bins.1.fa", + "description": "metabat2 binned contig file for gold:Gp0127639", + "file_size_bytes": 472014, + "url": "https://data.microbiomedata.org/data/1781_100341/img_MAGs/metabat-bins/bins.1.fa", + "type": "nmdc:DataObject", + "data_object_type": "Metagenome Bins" + }, + { + "_id": { + "$oid": "649b003f1ae706d7b5b1662c" + }, + "id": "nmdc:2cc7b6f61240516e9bf15f84809635c8", + "name": "gold:Gp0127639.bins.3.fa", + "description": "metabat2 binned contig file for gold:Gp0127639", + "file_size_bytes": 282214, + "url": "https://data.microbiomedata.org/data/1781_100341/img_MAGs/metabat-bins/bins.3.fa", + "type": "nmdc:DataObject", + "data_object_type": "Metagenome Bins" + }, + { + "_id": { + "$oid": "649b00401ae706d7b5b16d0d" + }, + "description": "EC TSV File for gold:Gp0127639", + "url": "https://data.microbiomedata.org/1781_100341/img_annotation/Ga0482231_ec.tsv", + "md5_checksum": "7e710e983d3a5ffbddc618c5e252e06b", + "file_size_bytes": 3385, + "id": "nmdc:7e710e983d3a5ffbddc618c5e252e06b", + "name": "gold:Gp0127639_EC TSV File", + "data_object_type": "Annotation Enzyme Commission", + "type": "nmdc:DataObject" + }, + { + "_id": { + "$oid": "649b00401ae706d7b5b16d0e" + }, + "description": "KO TSV File for gold:Gp0127639", + "url": "https://data.microbiomedata.org/1781_100341/img_annotation/Ga0482231_ko.tsv", + "md5_checksum": "ccbc768cb20e4c1b25d7627b611eb8dc", + "file_size_bytes": 3385, + "id": "nmdc:ccbc768cb20e4c1b25d7627b611eb8dc", + "name": "gold:Gp0127639_KO TSV File", + "data_object_type": "Annotation KEGG Orthology", + "type": "nmdc:DataObject" + }, + { + "_id": { + "$oid": "649b00401ae706d7b5b16d10" + }, + "description": "Protein FAA for gold:Gp0127639", + "url": "https://data.microbiomedata.org/1781_100341/img_annotation/Ga0482231_proteins.faa", + "md5_checksum": "fccc8283a46f12babeed0b2c7cc4eebd", + "file_size_bytes": 3385, + "id": "nmdc:fccc8283a46f12babeed0b2c7cc4eebd", + "name": "gold:Gp0127639_Protein FAA", + "data_object_type": "Annotation Amino Acid FASTA", + "type": "nmdc:DataObject" + }, + { + "_id": { + "$oid": "649b00401ae706d7b5b16d12" + }, + "description": "Functional annotation GFF file for gold:Gp0127639", + "url": "https://data.microbiomedata.org/1781_100341/img_annotation/Ga0482231_functional_annotation.gff", + "md5_checksum": "ee416a49155f7c07bcb776962708fb04", + "file_size_bytes": 3385, + "id": "nmdc:ee416a49155f7c07bcb776962708fb04", + "name": "gold:Gp0127639_Functional annotation GFF file", + "data_object_type": "Functional Annotation GFF", + "type": "nmdc:DataObject" }, { - "name": "Gp0127639_metabat2 bins", - "description": "metabat2 bins for Gp0127639", - "url": "https://data.microbiomedata.org/data/nmdc:mga09wpw60/MAGs/nmdc_mga09wpw60_metabat_bin.zip", - "md5_checksum": "b0db190d9d1093ef87a5efb8a600e9ef", - "id": "nmdc:b0db190d9d1093ef87a5efb8a600e9ef", - "file_size_bytes": 1000457 + "_id": { + "$oid": "649b00401ae706d7b5b16d13" + }, + "description": "Structural annotation GFF file for gold:Gp0127639", + "url": "https://data.microbiomedata.org/1781_100341/img_annotation/Ga0482231_structural_annotation.gff", + "md5_checksum": "d0452fefd4ad4f4cd10c974294bf9058", + "file_size_bytes": 3385, + "id": "nmdc:d0452fefd4ad4f4cd10c974294bf9058", + "name": "gold:Gp0127639_Structural annotation GFF file", + "data_object_type": "Structural Annotation GFF", + "type": "nmdc:DataObject" } ], "dissolving_activity_set": [], @@ -23048,6 +31993,212 @@ "md5_checksum": "46858bd4b45bdaa4e4344820f3c54b3b", "id": "nmdc:46858bd4b45bdaa4e4344820f3c54b3b", "file_size_bytes": 472684 + }, + { + "_id": { + "$oid": "649b003d1ae706d7b5b14e9b" + }, + "description": "Assembled scaffold fasta for gold:Gp0127642", + "url": "https://data.microbiomedata.org/data/1781_100344/assembly/assembly_scaffolds.fna", + "file_size_bytes": 43652238, + "type": "nmdc:DataObject", + "id": "nmdc:6eca0963e47257569a60827999eeaaa8", + "name": "assembly_scaffolds.fna", + "data_object_type": "Assembly Scaffolds" + }, + { + "_id": { + "$oid": "649b003d1ae706d7b5b14e9c" + }, + "description": "Metagenome Contig Coverage Stats for gold:Gp0127642", + "url": "https://data.microbiomedata.org/data/1781_100344/assembly/mapping_stats.txt", + "file_size_bytes": 7677591, + "type": "nmdc:DataObject", + "id": "nmdc:8a499e5986fac773f987576c5c2ec223", + "name": "mapping_stats.txt", + "data_object_type": "Assembly Coverage Stats" + }, + { + "_id": { + "$oid": "649b003d1ae706d7b5b14e9d" + }, + "description": "Metagenome Alignment BAM file for gold:Gp0127642", + "url": "https://data.microbiomedata.org/data/1781_100344/assembly/pairedMapped_sorted.bam", + "file_size_bytes": 2429450297, + "type": "nmdc:DataObject", + "id": "nmdc:4938ea35089362aa1ee2e129706e1e8a", + "name": "pairedMapped_sorted.bam", + "data_object_type": "Assembly Coverage BAM" + }, + { + "_id": { + "$oid": "649b003d1ae706d7b5b14e9f" + }, + "description": "Assembled contigs fasta for gold:Gp0127642", + "url": "https://data.microbiomedata.org/data/1781_100344/assembly/assembly_contigs.fna", + "file_size_bytes": 43961966, + "type": "nmdc:DataObject", + "id": "nmdc:1e4e73c9d1faa4585cb3a266b5a6cd39", + "name": "assembly_contigs.fna", + "data_object_type": "Assembly Contigs" + }, + { + "_id": { + "$oid": "649b003d1ae706d7b5b14ea6" + }, + "description": "Assembled AGP file for gold:Gp0127642", + "url": "https://data.microbiomedata.org/data/1781_100344/assembly/assembly.agp", + "file_size_bytes": 6698219, + "type": "nmdc:DataObject", + "id": "nmdc:ff66a5de4da06400243924f54998c37d", + "name": "assembly.agp", + "data_object_type": "Assembly AGP" + }, + { + "_id": { + "$oid": "649b003d1ae706d7b5b15b8e" + }, + "id": "nmdc:57c57663cd0c81252303be99f87ec09e", + "name": "1781_100344.krona.html", + "description": "Gold:Gp0127642 KRONA plot HTML file", + "url": "https://data.microbiomedata.org/1781_100344/ReadbasedAnalysis/centrifuge/1781_100344.krona.html", + "file_size_bytes": 3385, + "data_object_type": "Centrifuge Krona Plot", + "type": "nmdc:DataObject" + }, + { + "_id": { + "$oid": "649b003d1ae706d7b5b15b92" + }, + "id": "nmdc:07632d9f02d85eee5b556a94acf251ef", + "name": "1781_100344.json", + "description": "Gold:Gp0127642 ReadbasedAnalysis result JSON file", + "url": "https://data.microbiomedata.org/1781_100344/ReadbasedAnalysis/1781_100344.json", + "file_size_bytes": 3385, + "type": "nmdc:DataObject" + }, + { + "_id": { + "$oid": "649b003f1ae706d7b5b16613" + }, + "id": "nmdc:231cfca4487ba7ec3ab476022e003ac7", + "name": "bins.unbinned.fa", + "description": "unbinned fasta file from metabat2 for gold:Gp0127642", + "file_size_bytes": 3446845, + "url": "https://data.microbiomedata.org/data/1781_100344/img_MAGs/bins.unbinned.fa", + "type": "nmdc:DataObject" + }, + { + "_id": { + "$oid": "649b003f1ae706d7b5b16615" + }, + "id": "nmdc:bceccf441b752fc5608db53515a9552e", + "name": "gold:Gp0127642.bins.1.fa", + "description": "metabat2 binned contig file for gold:Gp0127642", + "file_size_bytes": 276719, + "url": "https://data.microbiomedata.org/data/1781_100344/img_MAGs/metabat-bins/bins.1.fa", + "type": "nmdc:DataObject", + "data_object_type": "Metagenome Bins" + }, + { + "_id": { + "$oid": "649b003f1ae706d7b5b16616" + }, + "id": "nmdc:c26ef51c7ce6c79f8dad28e39f4238d7", + "name": "checkm_qa.out", + "description": "metabat2 bin checkm quality assessment result for gold:Gp0127642", + "file_size_bytes": 918, + "url": "https://data.microbiomedata.org/data/1781_100344/img_MAGs/checkm_qa.out", + "type": "nmdc:DataObject", + "data_object_type": "CheckM Statistics" + }, + { + "_id": { + "$oid": "649b003f1ae706d7b5b16618" + }, + "id": "nmdc:914e14cda452df07bf33be9bda12738c", + "name": "bins.tooShort.fa", + "description": "tooShort (< 3kb) filtered contigs fasta file by metaBat2 for gold:Gp0127642", + "file_size_bytes": 38677251, + "url": "https://data.microbiomedata.org/data/1781_100344/img_MAGs/bins.tooShort.fa", + "type": "nmdc:DataObject" + }, + { + "_id": { + "$oid": "649b003f1ae706d7b5b16619" + }, + "id": "nmdc:0d6107d2c1b4c0e3423f54ae1895aad9", + "name": "gold:Gp0127642.bins.2.fa", + "description": "metabat2 binned contig file for gold:Gp0127642", + "file_size_bytes": 743464, + "url": "https://data.microbiomedata.org/data/1781_100344/img_MAGs/metabat-bins/bins.2.fa", + "type": "nmdc:DataObject", + "data_object_type": "Metagenome Bins" + }, + { + "_id": { + "$oid": "649b00401ae706d7b5b16d29" + }, + "description": "Functional annotation GFF file for gold:Gp0127642", + "url": "https://data.microbiomedata.org/1781_100344/img_annotation/Ga0482228_functional_annotation.gff", + "md5_checksum": "657b2348517d3e169df0914f5d8a2d21", + "file_size_bytes": 3385, + "id": "nmdc:657b2348517d3e169df0914f5d8a2d21", + "name": "gold:Gp0127642_Functional annotation GFF file", + "data_object_type": "Functional Annotation GFF", + "type": "nmdc:DataObject" + }, + { + "_id": { + "$oid": "649b00401ae706d7b5b16d2c" + }, + "description": "Protein FAA for gold:Gp0127642", + "url": "https://data.microbiomedata.org/1781_100344/img_annotation/Ga0482228_proteins.faa", + "md5_checksum": "263acdd17bdb9ed72102610070da3d65", + "file_size_bytes": 3385, + "id": "nmdc:263acdd17bdb9ed72102610070da3d65", + "name": "gold:Gp0127642_Protein FAA", + "data_object_type": "Annotation Amino Acid FASTA", + "type": "nmdc:DataObject" + }, + { + "_id": { + "$oid": "649b00401ae706d7b5b16d2e" + }, + "description": "EC TSV File for gold:Gp0127642", + "url": "https://data.microbiomedata.org/1781_100344/img_annotation/Ga0482228_ec.tsv", + "md5_checksum": "4f8de602126deeb9ef60cf5f739d601a", + "file_size_bytes": 3385, + "id": "nmdc:4f8de602126deeb9ef60cf5f739d601a", + "name": "gold:Gp0127642_EC TSV File", + "data_object_type": "Annotation Enzyme Commission", + "type": "nmdc:DataObject" + }, + { + "_id": { + "$oid": "649b00401ae706d7b5b16d30" + }, + "description": "KO TSV File for gold:Gp0127642", + "url": "https://data.microbiomedata.org/1781_100344/img_annotation/Ga0482228_ko.tsv", + "md5_checksum": "65319d4c3ffdbf5dcdb2e2837aea8cf4", + "file_size_bytes": 3385, + "id": "nmdc:65319d4c3ffdbf5dcdb2e2837aea8cf4", + "name": "gold:Gp0127642_KO TSV File", + "data_object_type": "Annotation KEGG Orthology", + "type": "nmdc:DataObject" + }, + { + "_id": { + "$oid": "649b00401ae706d7b5b16d32" + }, + "description": "Structural annotation GFF file for gold:Gp0127642", + "url": "https://data.microbiomedata.org/1781_100344/img_annotation/Ga0482228_structural_annotation.gff", + "md5_checksum": "9e55f66e86f57487e23029b90a84c4a4", + "file_size_bytes": 3385, + "id": "nmdc:9e55f66e86f57487e23029b90a84c4a4", + "name": "gold:Gp0127642_Structural annotation GFF file", + "data_object_type": "Structural Annotation GFF", + "type": "nmdc:DataObject" } ], "dissolving_activity_set": [], @@ -23603,6 +32754,176 @@ "md5_checksum": "17e9a7763327f2b5d3f841079c2f68d8", "id": "nmdc:17e9a7763327f2b5d3f841079c2f68d8", "file_size_bytes": 82006 + }, + { + "_id": { + "$oid": "649b003d1ae706d7b5b14ea9" + }, + "description": "Assembled contigs fasta for gold:Gp0127646", + "url": "https://data.microbiomedata.org/data/1781_100348/assembly/assembly_contigs.fna", + "file_size_bytes": 32744062, + "type": "nmdc:DataObject", + "id": "nmdc:cfb56be5f505927c085fb3105561b578", + "name": "assembly_contigs.fna", + "data_object_type": "Assembly Contigs" + }, + { + "_id": { + "$oid": "649b003d1ae706d7b5b14eab" + }, + "description": "Assembled scaffold fasta for gold:Gp0127646", + "url": "https://data.microbiomedata.org/data/1781_100348/assembly/assembly_scaffolds.fna", + "file_size_bytes": 32499084, + "type": "nmdc:DataObject", + "id": "nmdc:fda96a730e2bfe0ced5e4ff057aae5d3", + "name": "assembly_scaffolds.fna", + "data_object_type": "Assembly Scaffolds" + }, + { + "_id": { + "$oid": "649b003d1ae706d7b5b14eac" + }, + "description": "Assembled AGP file for gold:Gp0127646", + "url": "https://data.microbiomedata.org/data/1781_100348/assembly/assembly.agp", + "file_size_bytes": 5288971, + "type": "nmdc:DataObject", + "id": "nmdc:5cd6af80a19658f0fa7b8229f6ba8242", + "name": "assembly.agp", + "data_object_type": "Assembly AGP" + }, + { + "_id": { + "$oid": "649b003d1ae706d7b5b14ead" + }, + "description": "Metagenome Alignment BAM file for gold:Gp0127646", + "url": "https://data.microbiomedata.org/data/1781_100348/assembly/pairedMapped_sorted.bam", + "file_size_bytes": 2314898025, + "type": "nmdc:DataObject", + "id": "nmdc:3c03b1fab29a1825d07195ca4992fb31", + "name": "pairedMapped_sorted.bam", + "data_object_type": "Assembly Coverage BAM" + }, + { + "_id": { + "$oid": "649b003d1ae706d7b5b14eb9" + }, + "description": "Metagenome Contig Coverage Stats for gold:Gp0127646", + "url": "https://data.microbiomedata.org/data/1781_100348/assembly/mapping_stats.txt", + "file_size_bytes": 6067070, + "type": "nmdc:DataObject", + "id": "nmdc:e17989e7cc9952a4f9d8321328229316", + "name": "mapping_stats.txt", + "data_object_type": "Assembly Coverage Stats" + }, + { + "_id": { + "$oid": "649b003d1ae706d7b5b15bb5" + }, + "id": "nmdc:bfb473bea17c38bdb8fc5e394c1021b7", + "name": "1781_100348.json", + "description": "Gold:Gp0127646 ReadbasedAnalysis result JSON file", + "url": "https://data.microbiomedata.org/1781_100348/ReadbasedAnalysis/1781_100348.json", + "file_size_bytes": 3385, + "type": "nmdc:DataObject" + }, + { + "_id": { + "$oid": "649b003d1ae706d7b5b15bb6" + }, + "id": "nmdc:26bb18215b48754fcd58dbb38e8d01e1", + "name": "1781_100348.krona.html", + "description": "Gold:Gp0127646 KRONA plot HTML file", + "url": "https://data.microbiomedata.org/1781_100348/ReadbasedAnalysis/centrifuge/1781_100348.krona.html", + "file_size_bytes": 3385, + "data_object_type": "Centrifuge Krona Plot", + "type": "nmdc:DataObject" + }, + { + "_id": { + "$oid": "649b003f1ae706d7b5b1662b" + }, + "id": "nmdc:10a6a1f1b3bf7cc4960ad08e0914edc7", + "name": "bins.tooShort.fa", + "description": "tooShort (< 3kb) filtered contigs fasta file by metaBat2 for gold:Gp0127646", + "file_size_bytes": 30170371, + "url": "https://data.microbiomedata.org/data/1781_100348/img_MAGs/bins.tooShort.fa", + "type": "nmdc:DataObject" + }, + { + "_id": { + "$oid": "649b003f1ae706d7b5b16631" + }, + "id": "nmdc:dc8ebe9a5e1a8e38d88a63afbc535046", + "name": "bins.unbinned.fa", + "description": "unbinned fasta file from metabat2 for gold:Gp0127646", + "file_size_bytes": 1849966, + "url": "https://data.microbiomedata.org/data/1781_100348/img_MAGs/bins.unbinned.fa", + "type": "nmdc:DataObject" + }, + { + "_id": { + "$oid": "649b00401ae706d7b5b16d27" + }, + "description": "KO TSV File for gold:Gp0127646", + "url": "https://data.microbiomedata.org/1781_100348/img_annotation/Ga0482224_ko.tsv", + "md5_checksum": "5cd2f970cbb8eb5d8e52ac7a08bfb9a3", + "file_size_bytes": 3385, + "id": "nmdc:5cd2f970cbb8eb5d8e52ac7a08bfb9a3", + "name": "gold:Gp0127646_KO TSV File", + "data_object_type": "Annotation KEGG Orthology", + "type": "nmdc:DataObject" + }, + { + "_id": { + "$oid": "649b00401ae706d7b5b16d28" + }, + "description": "EC TSV File for gold:Gp0127646", + "url": "https://data.microbiomedata.org/1781_100348/img_annotation/Ga0482224_ec.tsv", + "md5_checksum": "9d87100ad8b6278b4a442c4686d7aef7", + "file_size_bytes": 3385, + "id": "nmdc:9d87100ad8b6278b4a442c4686d7aef7", + "name": "gold:Gp0127646_EC TSV File", + "data_object_type": "Annotation Enzyme Commission", + "type": "nmdc:DataObject" + }, + { + "_id": { + "$oid": "649b00401ae706d7b5b16d2b" + }, + "description": "Functional annotation GFF file for gold:Gp0127646", + "url": "https://data.microbiomedata.org/1781_100348/img_annotation/Ga0482224_functional_annotation.gff", + "md5_checksum": "c0858f9a847f241ed28f454adb580bf4", + "file_size_bytes": 3385, + "id": "nmdc:c0858f9a847f241ed28f454adb580bf4", + "name": "gold:Gp0127646_Functional annotation GFF file", + "data_object_type": "Functional Annotation GFF", + "type": "nmdc:DataObject" + }, + { + "_id": { + "$oid": "649b00401ae706d7b5b16d2f" + }, + "description": "Protein FAA for gold:Gp0127646", + "url": "https://data.microbiomedata.org/1781_100348/img_annotation/Ga0482224_proteins.faa", + "md5_checksum": "646648c11733f7ab7ea23008729360ce", + "file_size_bytes": 3385, + "id": "nmdc:646648c11733f7ab7ea23008729360ce", + "name": "gold:Gp0127646_Protein FAA", + "data_object_type": "Annotation Amino Acid FASTA", + "type": "nmdc:DataObject" + }, + { + "_id": { + "$oid": "649b00401ae706d7b5b16d35" + }, + "description": "Structural annotation GFF file for gold:Gp0127646", + "url": "https://data.microbiomedata.org/1781_100348/img_annotation/Ga0482224_structural_annotation.gff", + "md5_checksum": "94574634e1ccfe241af033259e27df1a", + "file_size_bytes": 3385, + "id": "nmdc:94574634e1ccfe241af033259e27df1a", + "name": "gold:Gp0127646_Structural annotation GFF file", + "data_object_type": "Structural Annotation GFF", + "type": "nmdc:DataObject" } ], "dissolving_activity_set": [], @@ -24141,30 +33462,235 @@ "file_size_bytes": 6656731 }, { - "name": "Gp0127648_metabat2 bin checkm quality assessment result", - "description": "metabat2 bin checkm quality assessment result for Gp0127648", - "data_object_type": "CheckM Statistics", - "url": "https://data.microbiomedata.org/data/nmdc:mga0andh11/MAGs/nmdc_mga0andh11_checkm_qa.out", - "md5_checksum": "942bd7c28c52e6301bf97dab0ea2852a", - "id": "nmdc:942bd7c28c52e6301bf97dab0ea2852a", - "file_size_bytes": 930 + "name": "Gp0127648_metabat2 bin checkm quality assessment result", + "description": "metabat2 bin checkm quality assessment result for Gp0127648", + "data_object_type": "CheckM Statistics", + "url": "https://data.microbiomedata.org/data/nmdc:mga0andh11/MAGs/nmdc_mga0andh11_checkm_qa.out", + "md5_checksum": "942bd7c28c52e6301bf97dab0ea2852a", + "id": "nmdc:942bd7c28c52e6301bf97dab0ea2852a", + "file_size_bytes": 930 + }, + { + "name": "Gp0127648_high-quality and medium-quality bins", + "description": "high-quality and medium-quality bins for Gp0127648", + "data_object_type": "Metagenome Bins", + "url": "https://data.microbiomedata.org/data/nmdc:mga0andh11/MAGs/nmdc_mga0andh11_hqmq_bin.zip", + "md5_checksum": "82ebf9065be9715e1230a50bf7a02197", + "id": "nmdc:82ebf9065be9715e1230a50bf7a02197", + "file_size_bytes": 466157 + }, + { + "name": "Gp0127648_metabat2 bins", + "description": "metabat2 bins for Gp0127648", + "url": "https://data.microbiomedata.org/data/nmdc:mga0andh11/MAGs/nmdc_mga0andh11_metabat_bin.zip", + "md5_checksum": "897536007e7e3525457df5d3baddd593", + "id": "nmdc:897536007e7e3525457df5d3baddd593", + "file_size_bytes": 90255 + }, + { + "_id": { + "$oid": "649b003d1ae706d7b5b14eb3" + }, + "description": "Metagenome Contig Coverage Stats for gold:Gp0127648", + "url": "https://data.microbiomedata.org/data/1781_100350/assembly/mapping_stats.txt", + "file_size_bytes": 11021314, + "type": "nmdc:DataObject", + "id": "nmdc:c12e8ed1aade1238318952b0c311cff1", + "name": "mapping_stats.txt", + "data_object_type": "Assembly Coverage Stats" + }, + { + "_id": { + "$oid": "649b003d1ae706d7b5b14eb5" + }, + "description": "Assembled contigs fasta for gold:Gp0127648", + "url": "https://data.microbiomedata.org/data/1781_100350/assembly/assembly_contigs.fna", + "file_size_bytes": 66849907, + "type": "nmdc:DataObject", + "id": "nmdc:621134d8dd8a6b117924f92ffed69ba7", + "name": "assembly_contigs.fna", + "data_object_type": "Assembly Contigs" + }, + { + "_id": { + "$oid": "649b003d1ae706d7b5b14eb6" + }, + "description": "Assembled scaffold fasta for gold:Gp0127648", + "url": "https://data.microbiomedata.org/data/1781_100350/assembly/assembly_scaffolds.fna", + "file_size_bytes": 66407046, + "type": "nmdc:DataObject", + "id": "nmdc:ede66d623f73ce756b3495f83205e6d9", + "name": "assembly_scaffolds.fna", + "data_object_type": "Assembly Scaffolds" + }, + { + "_id": { + "$oid": "649b003d1ae706d7b5b14eb7" + }, + "description": "Assembled AGP file for gold:Gp0127648", + "url": "https://data.microbiomedata.org/data/1781_100350/assembly/assembly.agp", + "file_size_bytes": 9663138, + "type": "nmdc:DataObject", + "id": "nmdc:d48d39804b7f30d62b244ec9556fd8b1", + "name": "assembly.agp", + "data_object_type": "Assembly AGP" + }, + { + "_id": { + "$oid": "649b003d1ae706d7b5b14eb8" + }, + "description": "Metagenome Alignment BAM file for gold:Gp0127648", + "url": "https://data.microbiomedata.org/data/1781_100350/assembly/pairedMapped_sorted.bam", + "file_size_bytes": 2330726850, + "type": "nmdc:DataObject", + "id": "nmdc:11791ba21a4e2cea1d5e889dce811985", + "name": "pairedMapped_sorted.bam", + "data_object_type": "Assembly Coverage BAM" + }, + { + "_id": { + "$oid": "649b003d1ae706d7b5b15bc0" + }, + "id": "nmdc:af639412ee5bca47c42088a8c81df692", + "name": "1781_100350.krona.html", + "description": "Gold:Gp0127648 KRONA plot HTML file", + "url": "https://data.microbiomedata.org/1781_100350/ReadbasedAnalysis/centrifuge/1781_100350.krona.html", + "file_size_bytes": 3385, + "data_object_type": "Centrifuge Krona Plot", + "type": "nmdc:DataObject" + }, + { + "_id": { + "$oid": "649b003d1ae706d7b5b15bc6" + }, + "id": "nmdc:e0e59ec0d07f88b6bc73664a179627e6", + "name": "1781_100350.json", + "description": "Gold:Gp0127648 ReadbasedAnalysis result JSON file", + "url": "https://data.microbiomedata.org/1781_100350/ReadbasedAnalysis/1781_100350.json", + "file_size_bytes": 3385, + "type": "nmdc:DataObject" + }, + { + "_id": { + "$oid": "649b003f1ae706d7b5b1662d" + }, + "id": "nmdc:ef8ef2e558c88161ea6b3c05434b56ac", + "name": "bins.unbinned.fa", + "description": "unbinned fasta file from metabat2 for gold:Gp0127648", + "file_size_bytes": 7023427, + "url": "https://data.microbiomedata.org/data/1781_100350/img_MAGs/bins.unbinned.fa", + "type": "nmdc:DataObject" + }, + { + "_id": { + "$oid": "649b003f1ae706d7b5b16630" + }, + "id": "nmdc:74b41590eebfcdef0e90406daaa4c95d", + "name": "bins.tooShort.fa", + "description": "tooShort (< 3kb) filtered contigs fasta file by metaBat2 for gold:Gp0127648", + "file_size_bytes": 57206330, + "url": "https://data.microbiomedata.org/data/1781_100350/img_MAGs/bins.tooShort.fa", + "type": "nmdc:DataObject" + }, + { + "_id": { + "$oid": "649b003f1ae706d7b5b16633" + }, + "id": "nmdc:a783b176b63e949529058e9db9ede106", + "name": "checkm_qa.out", + "description": "metabat2 bin checkm quality assessment result for gold:Gp0127648", + "file_size_bytes": 760, + "url": "https://data.microbiomedata.org/data/1781_100350/img_MAGs/checkm_qa.out", + "type": "nmdc:DataObject", + "data_object_type": "CheckM Statistics" + }, + { + "_id": { + "$oid": "649b003f1ae706d7b5b16634" + }, + "id": "nmdc:9fbf80a1258e24efe59ba434424a14e6", + "name": "gold:Gp0127648.bins.1.fa", + "description": "metabat2 binned contig file for gold:Gp0127648", + "file_size_bytes": 1405337, + "url": "https://data.microbiomedata.org/data/1781_100350/img_MAGs/metabat-bins/bins.1.fa", + "type": "nmdc:DataObject", + "data_object_type": "Metagenome Bins" + }, + { + "_id": { + "$oid": "649b003f1ae706d7b5b16636" + }, + "id": "nmdc:30f30b256de77b4a0b0194330f337dbe", + "name": "gtdbtk.ar122.summary.tsv", + "description": "gtdbtk archaea assignment result summary table for gold:Gp0127648", + "file_size_bytes": 1004, + "url": "https://data.microbiomedata.org/data/1781_100350/img_MAGs/gtdbtk_output/classify/gtdbtk.ar122.summary.tsv", + "type": "nmdc:DataObject" + }, + { + "_id": { + "$oid": "649b00401ae706d7b5b16d39" + }, + "description": "Structural annotation GFF file for gold:Gp0127648", + "url": "https://data.microbiomedata.org/1781_100350/img_annotation/Ga0482222_structural_annotation.gff", + "md5_checksum": "863f93ecf208a6e19f17d460d8e1a963", + "file_size_bytes": 3385, + "id": "nmdc:863f93ecf208a6e19f17d460d8e1a963", + "name": "gold:Gp0127648_Structural annotation GFF file", + "data_object_type": "Structural Annotation GFF", + "type": "nmdc:DataObject" + }, + { + "_id": { + "$oid": "649b00401ae706d7b5b16d3b" + }, + "description": "KO TSV File for gold:Gp0127648", + "url": "https://data.microbiomedata.org/1781_100350/img_annotation/Ga0482222_ko.tsv", + "md5_checksum": "1287c2532770a0f0d6792192c7400c0c", + "file_size_bytes": 3385, + "id": "nmdc:1287c2532770a0f0d6792192c7400c0c", + "name": "gold:Gp0127648_KO TSV File", + "data_object_type": "Annotation KEGG Orthology", + "type": "nmdc:DataObject" }, { - "name": "Gp0127648_high-quality and medium-quality bins", - "description": "high-quality and medium-quality bins for Gp0127648", - "data_object_type": "Metagenome Bins", - "url": "https://data.microbiomedata.org/data/nmdc:mga0andh11/MAGs/nmdc_mga0andh11_hqmq_bin.zip", - "md5_checksum": "82ebf9065be9715e1230a50bf7a02197", - "id": "nmdc:82ebf9065be9715e1230a50bf7a02197", - "file_size_bytes": 466157 + "_id": { + "$oid": "649b00401ae706d7b5b16d3c" + }, + "description": "EC TSV File for gold:Gp0127648", + "url": "https://data.microbiomedata.org/1781_100350/img_annotation/Ga0482222_ec.tsv", + "md5_checksum": "06042b9d083bd6b9879bc5486c0b38ba", + "file_size_bytes": 3385, + "id": "nmdc:06042b9d083bd6b9879bc5486c0b38ba", + "name": "gold:Gp0127648_EC TSV File", + "data_object_type": "Annotation Enzyme Commission", + "type": "nmdc:DataObject" }, { - "name": "Gp0127648_metabat2 bins", - "description": "metabat2 bins for Gp0127648", - "url": "https://data.microbiomedata.org/data/nmdc:mga0andh11/MAGs/nmdc_mga0andh11_metabat_bin.zip", - "md5_checksum": "897536007e7e3525457df5d3baddd593", - "id": "nmdc:897536007e7e3525457df5d3baddd593", - "file_size_bytes": 90255 + "_id": { + "$oid": "649b00401ae706d7b5b16d3d" + }, + "description": "Protein FAA for gold:Gp0127648", + "url": "https://data.microbiomedata.org/1781_100350/img_annotation/Ga0482222_proteins.faa", + "md5_checksum": "d27fabc532b52dec4afa4673f920633a", + "file_size_bytes": 3385, + "id": "nmdc:d27fabc532b52dec4afa4673f920633a", + "name": "gold:Gp0127648_Protein FAA", + "data_object_type": "Annotation Amino Acid FASTA", + "type": "nmdc:DataObject" + }, + { + "_id": { + "$oid": "649b00401ae706d7b5b16d43" + }, + "description": "Functional annotation GFF file for gold:Gp0127648", + "url": "https://data.microbiomedata.org/1781_100350/img_annotation/Ga0482222_functional_annotation.gff", + "md5_checksum": "c11e44f28b422233e151d324d2accb43", + "file_size_bytes": 3385, + "id": "nmdc:c11e44f28b422233e151d324d2accb43", + "name": "gold:Gp0127648_Functional annotation GFF file", + "data_object_type": "Functional Annotation GFF", + "type": "nmdc:DataObject" } ], "dissolving_activity_set": [], @@ -24743,6 +34269,176 @@ "md5_checksum": "6e92868d1912cb8f5b32fbf507721d16", "id": "nmdc:6e92868d1912cb8f5b32fbf507721d16", "file_size_bytes": 91931 + }, + { + "_id": { + "$oid": "649b003d1ae706d7b5b14eae" + }, + "description": "Metagenome Contig Coverage Stats for gold:Gp0127647", + "url": "https://data.microbiomedata.org/data/1781_100349/assembly/mapping_stats.txt", + "file_size_bytes": 7240138, + "type": "nmdc:DataObject", + "id": "nmdc:c61b0651682d71b4a62ee2e51223af99", + "name": "mapping_stats.txt", + "data_object_type": "Assembly Coverage Stats" + }, + { + "_id": { + "$oid": "649b003d1ae706d7b5b14eaf" + }, + "description": "Assembled contigs fasta for gold:Gp0127647", + "url": "https://data.microbiomedata.org/data/1781_100349/assembly/assembly_contigs.fna", + "file_size_bytes": 41307096, + "type": "nmdc:DataObject", + "id": "nmdc:9aefb925f949c698cd2a0d71d1d2d7cc", + "name": "assembly_contigs.fna", + "data_object_type": "Assembly Contigs" + }, + { + "_id": { + "$oid": "649b003d1ae706d7b5b14eb1" + }, + "description": "Assembled scaffold fasta for gold:Gp0127647", + "url": "https://data.microbiomedata.org/data/1781_100349/assembly/assembly_scaffolds.fna", + "file_size_bytes": 41014628, + "type": "nmdc:DataObject", + "id": "nmdc:ba8fe365f6e8a08812efe185c3454385", + "name": "assembly_scaffolds.fna", + "data_object_type": "Assembly Scaffolds" + }, + { + "_id": { + "$oid": "649b003d1ae706d7b5b14eb2" + }, + "description": "Assembled AGP file for gold:Gp0127647", + "url": "https://data.microbiomedata.org/data/1781_100349/assembly/assembly.agp", + "file_size_bytes": 6312116, + "type": "nmdc:DataObject", + "id": "nmdc:32c394cf3ff8c87b4d60ff769265b544", + "name": "assembly.agp", + "data_object_type": "Assembly AGP" + }, + { + "_id": { + "$oid": "649b003d1ae706d7b5b14eb4" + }, + "description": "Metagenome Alignment BAM file for gold:Gp0127647", + "url": "https://data.microbiomedata.org/data/1781_100349/assembly/pairedMapped_sorted.bam", + "file_size_bytes": 2160760884, + "type": "nmdc:DataObject", + "id": "nmdc:f68a2e204a75c536142b2fd9dfd9ee8b", + "name": "pairedMapped_sorted.bam", + "data_object_type": "Assembly Coverage BAM" + }, + { + "_id": { + "$oid": "649b003d1ae706d7b5b15bab" + }, + "id": "nmdc:34a7f42ed597813c10ad6d3935563bf7", + "name": "1781_100349.krona.html", + "description": "Gold:Gp0127647 KRONA plot HTML file", + "url": "https://data.microbiomedata.org/1781_100349/ReadbasedAnalysis/centrifuge/1781_100349.krona.html", + "file_size_bytes": 3385, + "data_object_type": "Centrifuge Krona Plot", + "type": "nmdc:DataObject" + }, + { + "_id": { + "$oid": "649b003d1ae706d7b5b15bb1" + }, + "id": "nmdc:8ec86e6598c064e0091960a9921de1d6", + "name": "1781_100349.json", + "description": "Gold:Gp0127647 ReadbasedAnalysis result JSON file", + "url": "https://data.microbiomedata.org/1781_100349/ReadbasedAnalysis/1781_100349.json", + "file_size_bytes": 3385, + "type": "nmdc:DataObject" + }, + { + "_id": { + "$oid": "649b003f1ae706d7b5b1662e" + }, + "id": "nmdc:a406d9ea4e02f98d5f48ba53b992dfa2", + "name": "bins.unbinned.fa", + "description": "unbinned fasta file from metabat2 for gold:Gp0127647", + "file_size_bytes": 3475613, + "url": "https://data.microbiomedata.org/data/1781_100349/img_MAGs/bins.unbinned.fa", + "type": "nmdc:DataObject" + }, + { + "_id": { + "$oid": "649b003f1ae706d7b5b16632" + }, + "id": "nmdc:a0f5a2359ba3651c0315060a9827e39d", + "name": "bins.tooShort.fa", + "description": "tooShort (< 3kb) filtered contigs fasta file by metaBat2 for gold:Gp0127647", + "file_size_bytes": 36966483, + "url": "https://data.microbiomedata.org/data/1781_100349/img_MAGs/bins.tooShort.fa", + "type": "nmdc:DataObject" + }, + { + "_id": { + "$oid": "649b00401ae706d7b5b16d34" + }, + "description": "Functional annotation GFF file for gold:Gp0127647", + "url": "https://data.microbiomedata.org/1781_100349/img_annotation/Ga0482223_functional_annotation.gff", + "md5_checksum": "af2496c3ae96ff31e6bdaae75b507ea7", + "file_size_bytes": 3385, + "id": "nmdc:af2496c3ae96ff31e6bdaae75b507ea7", + "name": "gold:Gp0127647_Functional annotation GFF file", + "data_object_type": "Functional Annotation GFF", + "type": "nmdc:DataObject" + }, + { + "_id": { + "$oid": "649b00401ae706d7b5b16d36" + }, + "description": "Protein FAA for gold:Gp0127647", + "url": "https://data.microbiomedata.org/1781_100349/img_annotation/Ga0482223_proteins.faa", + "md5_checksum": "ec6d01297279eee2d4c03ecfda9309c9", + "file_size_bytes": 3385, + "id": "nmdc:ec6d01297279eee2d4c03ecfda9309c9", + "name": "gold:Gp0127647_Protein FAA", + "data_object_type": "Annotation Amino Acid FASTA", + "type": "nmdc:DataObject" + }, + { + "_id": { + "$oid": "649b00401ae706d7b5b16d37" + }, + "description": "EC TSV File for gold:Gp0127647", + "url": "https://data.microbiomedata.org/1781_100349/img_annotation/Ga0482223_ec.tsv", + "md5_checksum": "18d40bd5ff2707ba9a4512363d05537d", + "file_size_bytes": 3385, + "id": "nmdc:18d40bd5ff2707ba9a4512363d05537d", + "name": "gold:Gp0127647_EC TSV File", + "data_object_type": "Annotation Enzyme Commission", + "type": "nmdc:DataObject" + }, + { + "_id": { + "$oid": "649b00401ae706d7b5b16d38" + }, + "description": "KO TSV File for gold:Gp0127647", + "url": "https://data.microbiomedata.org/1781_100349/img_annotation/Ga0482223_ko.tsv", + "md5_checksum": "d855bc2d72a6ba238acfe746299cf26a", + "file_size_bytes": 3385, + "id": "nmdc:d855bc2d72a6ba238acfe746299cf26a", + "name": "gold:Gp0127647_KO TSV File", + "data_object_type": "Annotation KEGG Orthology", + "type": "nmdc:DataObject" + }, + { + "_id": { + "$oid": "649b00401ae706d7b5b16d3a" + }, + "description": "Structural annotation GFF file for gold:Gp0127647", + "url": "https://data.microbiomedata.org/1781_100349/img_annotation/Ga0482223_structural_annotation.gff", + "md5_checksum": "a57c9b7f192351676e897b8187cf6641", + "file_size_bytes": 3385, + "id": "nmdc:a57c9b7f192351676e897b8187cf6641", + "name": "gold:Gp0127647_Structural annotation GFF file", + "data_object_type": "Structural Annotation GFF", + "type": "nmdc:DataObject" } ], "dissolving_activity_set": [], @@ -25305,6 +35001,176 @@ "md5_checksum": "106983a66b58a2d07f0592d9379ad635", "id": "nmdc:106983a66b58a2d07f0592d9379ad635", "file_size_bytes": 76018 + }, + { + "_id": { + "$oid": "649b003d1ae706d7b5b14eb0" + }, + "description": "Metagenome Contig Coverage Stats for gold:Gp0127645", + "url": "https://data.microbiomedata.org/data/1781_100347/assembly/mapping_stats.txt", + "file_size_bytes": 7967021, + "type": "nmdc:DataObject", + "id": "nmdc:b9abce64459572cfb1b7ab2bed3c24f5", + "name": "mapping_stats.txt", + "data_object_type": "Assembly Coverage Stats" + }, + { + "_id": { + "$oid": "649b003d1ae706d7b5b14ebc" + }, + "description": "Assembled contigs fasta for gold:Gp0127645", + "url": "https://data.microbiomedata.org/data/1781_100347/assembly/assembly_contigs.fna", + "file_size_bytes": 49051776, + "type": "nmdc:DataObject", + "id": "nmdc:eb1d97165017b3e14d15f6407a181be3", + "name": "assembly_contigs.fna", + "data_object_type": "Assembly Contigs" + }, + { + "_id": { + "$oid": "649b003d1ae706d7b5b14ec0" + }, + "description": "Assembled scaffold fasta for gold:Gp0127645", + "url": "https://data.microbiomedata.org/data/1781_100347/assembly/assembly_scaffolds.fna", + "file_size_bytes": 48730645, + "type": "nmdc:DataObject", + "id": "nmdc:def65e725117abf461c8c182f7f56a72", + "name": "assembly_scaffolds.fna", + "data_object_type": "Assembly Scaffolds" + }, + { + "_id": { + "$oid": "649b003d1ae706d7b5b14ec3" + }, + "description": "Assembled AGP file for gold:Gp0127645", + "url": "https://data.microbiomedata.org/data/1781_100347/assembly/assembly.agp", + "file_size_bytes": 6948927, + "type": "nmdc:DataObject", + "id": "nmdc:b71d0b119b5c306cf7e692196f77ca98", + "name": "assembly.agp", + "data_object_type": "Assembly AGP" + }, + { + "_id": { + "$oid": "649b003d1ae706d7b5b14ec4" + }, + "description": "Metagenome Alignment BAM file for gold:Gp0127645", + "url": "https://data.microbiomedata.org/data/1781_100347/assembly/pairedMapped_sorted.bam", + "file_size_bytes": 2016145304, + "type": "nmdc:DataObject", + "id": "nmdc:f628c83e48578369510c07a7f81fdb56", + "name": "pairedMapped_sorted.bam", + "data_object_type": "Assembly Coverage BAM" + }, + { + "_id": { + "$oid": "649b003d1ae706d7b5b15ba5" + }, + "id": "nmdc:0a84281526e0db5a01a8cc737d2febd8", + "name": "1781_100347.krona.html", + "description": "Gold:Gp0127645 KRONA plot HTML file", + "url": "https://data.microbiomedata.org/1781_100347/ReadbasedAnalysis/centrifuge/1781_100347.krona.html", + "file_size_bytes": 3385, + "data_object_type": "Centrifuge Krona Plot", + "type": "nmdc:DataObject" + }, + { + "_id": { + "$oid": "649b003d1ae706d7b5b15bb7" + }, + "id": "nmdc:f2e4cd496ba10ca8ae09e148c1a62e05", + "name": "1781_100347.json", + "description": "Gold:Gp0127645 ReadbasedAnalysis result JSON file", + "url": "https://data.microbiomedata.org/1781_100347/ReadbasedAnalysis/1781_100347.json", + "file_size_bytes": 3385, + "type": "nmdc:DataObject" + }, + { + "_id": { + "$oid": "649b003f1ae706d7b5b16624" + }, + "id": "nmdc:5bd5972264fb1269fa8516647b3cffd9", + "name": "bins.tooShort.fa", + "description": "tooShort (< 3kb) filtered contigs fasta file by metaBat2 for gold:Gp0127645", + "file_size_bytes": 41844692, + "url": "https://data.microbiomedata.org/data/1781_100347/img_MAGs/bins.tooShort.fa", + "type": "nmdc:DataObject" + }, + { + "_id": { + "$oid": "649b003f1ae706d7b5b16625" + }, + "id": "nmdc:0e1d05cdd010f61435994457a58076cc", + "name": "bins.unbinned.fa", + "description": "unbinned fasta file from metabat2 for gold:Gp0127645", + "file_size_bytes": 6356502, + "url": "https://data.microbiomedata.org/data/1781_100347/img_MAGs/bins.unbinned.fa", + "type": "nmdc:DataObject" + }, + { + "_id": { + "$oid": "649b00401ae706d7b5b16d23" + }, + "description": "EC TSV File for gold:Gp0127645", + "url": "https://data.microbiomedata.org/1781_100347/img_annotation/Ga0482225_ec.tsv", + "md5_checksum": "17524561a0e1f2c9d9ffdebc3b2df6a8", + "file_size_bytes": 3385, + "id": "nmdc:17524561a0e1f2c9d9ffdebc3b2df6a8", + "name": "gold:Gp0127645_EC TSV File", + "data_object_type": "Annotation Enzyme Commission", + "type": "nmdc:DataObject" + }, + { + "_id": { + "$oid": "649b00401ae706d7b5b16d24" + }, + "description": "KO TSV File for gold:Gp0127645", + "url": "https://data.microbiomedata.org/1781_100347/img_annotation/Ga0482225_ko.tsv", + "md5_checksum": "e2b3ea50301aa3efaea18732ddba04f4", + "file_size_bytes": 3385, + "id": "nmdc:e2b3ea50301aa3efaea18732ddba04f4", + "name": "gold:Gp0127645_KO TSV File", + "data_object_type": "Annotation KEGG Orthology", + "type": "nmdc:DataObject" + }, + { + "_id": { + "$oid": "649b00401ae706d7b5b16d25" + }, + "description": "Functional annotation GFF file for gold:Gp0127645", + "url": "https://data.microbiomedata.org/1781_100347/img_annotation/Ga0482225_functional_annotation.gff", + "md5_checksum": "c3a8cfa76e5da83b2b24bc6a52f71952", + "file_size_bytes": 3385, + "id": "nmdc:c3a8cfa76e5da83b2b24bc6a52f71952", + "name": "gold:Gp0127645_Functional annotation GFF file", + "data_object_type": "Functional Annotation GFF", + "type": "nmdc:DataObject" + }, + { + "_id": { + "$oid": "649b00401ae706d7b5b16d26" + }, + "description": "Protein FAA for gold:Gp0127645", + "url": "https://data.microbiomedata.org/1781_100347/img_annotation/Ga0482225_proteins.faa", + "md5_checksum": "2ab0820d09b9c331ec56d7d3e20552e6", + "file_size_bytes": 3385, + "id": "nmdc:2ab0820d09b9c331ec56d7d3e20552e6", + "name": "gold:Gp0127645_Protein FAA", + "data_object_type": "Annotation Amino Acid FASTA", + "type": "nmdc:DataObject" + }, + { + "_id": { + "$oid": "649b00401ae706d7b5b16d2a" + }, + "description": "Structural annotation GFF file for gold:Gp0127645", + "url": "https://data.microbiomedata.org/1781_100347/img_annotation/Ga0482225_structural_annotation.gff", + "md5_checksum": "06280b3737fbf704d850ac68da190166", + "file_size_bytes": 3385, + "id": "nmdc:06280b3737fbf704d850ac68da190166", + "name": "gold:Gp0127645_Structural annotation GFF file", + "data_object_type": "Structural Annotation GFF", + "type": "nmdc:DataObject" } ], "dissolving_activity_set": [], @@ -25840,30 +35706,224 @@ "file_size_bytes": 14383032 }, { - "name": "Gp0127649_metabat2 bin checkm quality assessment result", - "description": "metabat2 bin checkm quality assessment result for Gp0127649", - "data_object_type": "CheckM Statistics", - "url": "https://data.microbiomedata.org/data/nmdc:mga0j4fe07/MAGs/nmdc_mga0j4fe07_checkm_qa.out", - "md5_checksum": "8d4bce832a16affbcc3efeb8364e8eaa", - "id": "nmdc:8d4bce832a16affbcc3efeb8364e8eaa", - "file_size_bytes": 942 + "name": "Gp0127649_metabat2 bin checkm quality assessment result", + "description": "metabat2 bin checkm quality assessment result for Gp0127649", + "data_object_type": "CheckM Statistics", + "url": "https://data.microbiomedata.org/data/nmdc:mga0j4fe07/MAGs/nmdc_mga0j4fe07_checkm_qa.out", + "md5_checksum": "8d4bce832a16affbcc3efeb8364e8eaa", + "id": "nmdc:8d4bce832a16affbcc3efeb8364e8eaa", + "file_size_bytes": 942 + }, + { + "name": "Gp0127649_high-quality and medium-quality bins", + "description": "high-quality and medium-quality bins for Gp0127649", + "data_object_type": "Metagenome Bins", + "url": "https://data.microbiomedata.org/data/nmdc:mga0j4fe07/MAGs/nmdc_mga0j4fe07_hqmq_bin.zip", + "md5_checksum": "40273505b8b3dddd3ee5cb5c83871067", + "id": "nmdc:40273505b8b3dddd3ee5cb5c83871067", + "file_size_bytes": 182 + }, + { + "name": "Gp0127649_metabat2 bins", + "description": "metabat2 bins for Gp0127649", + "url": "https://data.microbiomedata.org/data/nmdc:mga0j4fe07/MAGs/nmdc_mga0j4fe07_metabat_bin.zip", + "md5_checksum": "b767f2b59d0fd9e650914e140cacf104", + "id": "nmdc:b767f2b59d0fd9e650914e140cacf104", + "file_size_bytes": 269239 + }, + { + "_id": { + "$oid": "649b003d1ae706d7b5b14eba" + }, + "description": "Metagenome Contig Coverage Stats for gold:Gp0127649", + "url": "https://data.microbiomedata.org/data/1781_100351/assembly/mapping_stats.txt", + "file_size_bytes": 14269698, + "type": "nmdc:DataObject", + "id": "nmdc:02f1408424cf3420cad010fe4f672f7a", + "name": "mapping_stats.txt", + "data_object_type": "Assembly Coverage Stats" + }, + { + "_id": { + "$oid": "649b003d1ae706d7b5b14ebb" + }, + "description": "Assembled contigs fasta for gold:Gp0127649", + "url": "https://data.microbiomedata.org/data/1781_100351/assembly/assembly_contigs.fna", + "file_size_bytes": 94092696, + "type": "nmdc:DataObject", + "id": "nmdc:6300cd8140abe6322e4a9c1921584476", + "name": "assembly_contigs.fna", + "data_object_type": "Assembly Contigs" + }, + { + "_id": { + "$oid": "649b003d1ae706d7b5b14ebd" + }, + "description": "Assembled scaffold fasta for gold:Gp0127649", + "url": "https://data.microbiomedata.org/data/1781_100351/assembly/assembly_scaffolds.fna", + "file_size_bytes": 93521052, + "type": "nmdc:DataObject", + "id": "nmdc:0dc5339ec9b3ea1dad516beff981255e", + "name": "assembly_scaffolds.fna", + "data_object_type": "Assembly Scaffolds" + }, + { + "_id": { + "$oid": "649b003d1ae706d7b5b14ec7" + }, + "description": "Assembled AGP file for gold:Gp0127649", + "url": "https://data.microbiomedata.org/data/1781_100351/assembly/assembly.agp", + "file_size_bytes": 12536139, + "type": "nmdc:DataObject", + "id": "nmdc:8477d852446a073d2d2de6b146b2921b", + "name": "assembly.agp", + "data_object_type": "Assembly AGP" + }, + { + "_id": { + "$oid": "649b003d1ae706d7b5b14ec8" + }, + "description": "Metagenome Alignment BAM file for gold:Gp0127649", + "url": "https://data.microbiomedata.org/data/1781_100351/assembly/pairedMapped_sorted.bam", + "file_size_bytes": 2118848396, + "type": "nmdc:DataObject", + "id": "nmdc:b486014481aba75b91177c9cc3a9cf7b", + "name": "pairedMapped_sorted.bam", + "data_object_type": "Assembly Coverage BAM" + }, + { + "_id": { + "$oid": "649b003d1ae706d7b5b15bc7" + }, + "id": "nmdc:082242b653e5a0803121d043375f93a3", + "name": "1781_100351.krona.html", + "description": "Gold:Gp0127649 KRONA plot HTML file", + "url": "https://data.microbiomedata.org/1781_100351/ReadbasedAnalysis/centrifuge/1781_100351.krona.html", + "file_size_bytes": 3385, + "data_object_type": "Centrifuge Krona Plot", + "type": "nmdc:DataObject" + }, + { + "_id": { + "$oid": "649b003d1ae706d7b5b15bd1" + }, + "id": "nmdc:b7078aa8cc8165e23978ece3312e9192", + "name": "1781_100351.json", + "description": "Gold:Gp0127649 ReadbasedAnalysis result JSON file", + "url": "https://data.microbiomedata.org/1781_100351/ReadbasedAnalysis/1781_100351.json", + "file_size_bytes": 3385, + "type": "nmdc:DataObject" + }, + { + "_id": { + "$oid": "649b003f1ae706d7b5b16635" + }, + "id": "nmdc:a0108431c6c1fcfc7cec6830ef96dcb9", + "name": "checkm_qa.out", + "description": "metabat2 bin checkm quality assessment result for gold:Gp0127649", + "file_size_bytes": 775, + "url": "https://data.microbiomedata.org/data/1781_100351/img_MAGs/checkm_qa.out", + "type": "nmdc:DataObject", + "data_object_type": "CheckM Statistics" + }, + { + "_id": { + "$oid": "649b003f1ae706d7b5b16639" + }, + "id": "nmdc:0c1c4f73f64b5651a20d421225d67f24", + "name": "bins.tooShort.fa", + "description": "tooShort (< 3kb) filtered contigs fasta file by metaBat2 for gold:Gp0127649", + "file_size_bytes": 77337130, + "url": "https://data.microbiomedata.org/data/1781_100351/img_MAGs/bins.tooShort.fa", + "type": "nmdc:DataObject" + }, + { + "_id": { + "$oid": "649b003f1ae706d7b5b1663a" + }, + "id": "nmdc:d11e901143547313fd0037177b5555ae", + "name": "gold:Gp0127649.bins.1.fa", + "description": "metabat2 binned contig file for gold:Gp0127649", + "file_size_bytes": 265082, + "url": "https://data.microbiomedata.org/data/1781_100351/img_MAGs/metabat-bins/bins.1.fa", + "type": "nmdc:DataObject", + "data_object_type": "Metagenome Bins" + }, + { + "_id": { + "$oid": "649b003f1ae706d7b5b1663b" + }, + "id": "nmdc:680731abdb97ba4977d33afbbc0b0c61", + "name": "bins.unbinned.fa", + "description": "unbinned fasta file from metabat2 for gold:Gp0127649", + "file_size_bytes": 14891738, + "url": "https://data.microbiomedata.org/data/1781_100351/img_MAGs/bins.unbinned.fa", + "type": "nmdc:DataObject" + }, + { + "_id": { + "$oid": "649b00401ae706d7b5b16d3e" + }, + "description": "KO TSV File for gold:Gp0127649", + "url": "https://data.microbiomedata.org/1781_100351/img_annotation/Ga0482221_ko.tsv", + "md5_checksum": "35c0fd91c2225f595df469b61ba9578b", + "file_size_bytes": 3385, + "id": "nmdc:35c0fd91c2225f595df469b61ba9578b", + "name": "gold:Gp0127649_KO TSV File", + "data_object_type": "Annotation KEGG Orthology", + "type": "nmdc:DataObject" + }, + { + "_id": { + "$oid": "649b00401ae706d7b5b16d3f" + }, + "description": "Structural annotation GFF file for gold:Gp0127649", + "url": "https://data.microbiomedata.org/1781_100351/img_annotation/Ga0482221_structural_annotation.gff", + "md5_checksum": "cff0a71781a84c7096ee79b39c3336f8", + "file_size_bytes": 3385, + "id": "nmdc:cff0a71781a84c7096ee79b39c3336f8", + "name": "gold:Gp0127649_Structural annotation GFF file", + "data_object_type": "Structural Annotation GFF", + "type": "nmdc:DataObject" + }, + { + "_id": { + "$oid": "649b00401ae706d7b5b16d40" + }, + "description": "EC TSV File for gold:Gp0127649", + "url": "https://data.microbiomedata.org/1781_100351/img_annotation/Ga0482221_ec.tsv", + "md5_checksum": "a14b836f963c0f6b02a70f0fc8cd40c0", + "file_size_bytes": 3385, + "id": "nmdc:a14b836f963c0f6b02a70f0fc8cd40c0", + "name": "gold:Gp0127649_EC TSV File", + "data_object_type": "Annotation Enzyme Commission", + "type": "nmdc:DataObject" }, { - "name": "Gp0127649_high-quality and medium-quality bins", - "description": "high-quality and medium-quality bins for Gp0127649", - "data_object_type": "Metagenome Bins", - "url": "https://data.microbiomedata.org/data/nmdc:mga0j4fe07/MAGs/nmdc_mga0j4fe07_hqmq_bin.zip", - "md5_checksum": "40273505b8b3dddd3ee5cb5c83871067", - "id": "nmdc:40273505b8b3dddd3ee5cb5c83871067", - "file_size_bytes": 182 + "_id": { + "$oid": "649b00401ae706d7b5b16d41" + }, + "description": "Functional annotation GFF file for gold:Gp0127649", + "url": "https://data.microbiomedata.org/1781_100351/img_annotation/Ga0482221_functional_annotation.gff", + "md5_checksum": "a022fd9c3254ad5dc6ae5be40cd35c0b", + "file_size_bytes": 3385, + "id": "nmdc:a022fd9c3254ad5dc6ae5be40cd35c0b", + "name": "gold:Gp0127649_Functional annotation GFF file", + "data_object_type": "Functional Annotation GFF", + "type": "nmdc:DataObject" }, { - "name": "Gp0127649_metabat2 bins", - "description": "metabat2 bins for Gp0127649", - "url": "https://data.microbiomedata.org/data/nmdc:mga0j4fe07/MAGs/nmdc_mga0j4fe07_metabat_bin.zip", - "md5_checksum": "b767f2b59d0fd9e650914e140cacf104", - "id": "nmdc:b767f2b59d0fd9e650914e140cacf104", - "file_size_bytes": 269239 + "_id": { + "$oid": "649b00401ae706d7b5b16d42" + }, + "description": "Protein FAA for gold:Gp0127649", + "url": "https://data.microbiomedata.org/1781_100351/img_annotation/Ga0482221_proteins.faa", + "md5_checksum": "56c3ac34fb2f1c2ba7bcd9bd56be731a", + "file_size_bytes": 3385, + "id": "nmdc:56c3ac34fb2f1c2ba7bcd9bd56be731a", + "name": "gold:Gp0127649_Protein FAA", + "data_object_type": "Annotation Amino Acid FASTA", + "type": "nmdc:DataObject" } ], "dissolving_activity_set": [], @@ -26442,6 +36502,224 @@ "md5_checksum": "21467369d04671628ae67afbaf1d2076", "id": "nmdc:21467369d04671628ae67afbaf1d2076", "file_size_bytes": 1013750 + }, + { + "_id": { + "$oid": "649b003d1ae706d7b5b14ece" + }, + "description": "Metagenome Contig Coverage Stats for gold:Gp0127652", + "url": "https://data.microbiomedata.org/data/1781_100354/assembly/mapping_stats.txt", + "file_size_bytes": 16276629, + "type": "nmdc:DataObject", + "id": "nmdc:23dcbb19af7db7cda8f06a1b375f12bb", + "name": "mapping_stats.txt", + "data_object_type": "Assembly Coverage Stats" + }, + { + "_id": { + "$oid": "649b003d1ae706d7b5b14ecf" + }, + "description": "Assembled scaffold fasta for gold:Gp0127652", + "url": "https://data.microbiomedata.org/data/1781_100354/assembly/assembly_scaffolds.fna", + "file_size_bytes": 115559491, + "type": "nmdc:DataObject", + "id": "nmdc:880b4e3e1b337def43f9dc694227eb50", + "name": "assembly_scaffolds.fna", + "data_object_type": "Assembly Scaffolds" + }, + { + "_id": { + "$oid": "649b003d1ae706d7b5b14ed0" + }, + "description": "Assembled contigs fasta for gold:Gp0127652", + "url": "https://data.microbiomedata.org/data/1781_100354/assembly/assembly_contigs.fna", + "file_size_bytes": 116210833, + "type": "nmdc:DataObject", + "id": "nmdc:e8bc7228a422a7c1a2641276ee3f6e37", + "name": "assembly_contigs.fna", + "data_object_type": "Assembly Contigs" + }, + { + "_id": { + "$oid": "649b003d1ae706d7b5b14ed1" + }, + "description": "Metagenome Alignment BAM file for gold:Gp0127652", + "url": "https://data.microbiomedata.org/data/1781_100354/assembly/pairedMapped_sorted.bam", + "file_size_bytes": 2194603382, + "type": "nmdc:DataObject", + "id": "nmdc:8f74962a51f82e4cebc78b6ac49dee49", + "name": "pairedMapped_sorted.bam", + "data_object_type": "Assembly Coverage BAM" + }, + { + "_id": { + "$oid": "649b003d1ae706d7b5b14eda" + }, + "description": "Assembled AGP file for gold:Gp0127652", + "url": "https://data.microbiomedata.org/data/1781_100354/assembly/assembly.agp", + "file_size_bytes": 14312615, + "type": "nmdc:DataObject", + "id": "nmdc:6a251e6317c4450686a6215b61cd85d1", + "name": "assembly.agp", + "data_object_type": "Assembly AGP" + }, + { + "_id": { + "$oid": "649b003d1ae706d7b5b15bde" + }, + "id": "nmdc:a29b48c9962bc2acbf5d7e1b5a8e3a41", + "name": "1781_100354.krona.html", + "description": "Gold:Gp0127652 KRONA plot HTML file", + "url": "https://data.microbiomedata.org/1781_100354/ReadbasedAnalysis/centrifuge/1781_100354.krona.html", + "file_size_bytes": 3385, + "data_object_type": "Centrifuge Krona Plot", + "type": "nmdc:DataObject" + }, + { + "_id": { + "$oid": "649b003d1ae706d7b5b15be2" + }, + "id": "nmdc:966945bb7952a4629efc713c78ef927f", + "name": "1781_100354.json", + "description": "Gold:Gp0127652 ReadbasedAnalysis result JSON file", + "url": "https://data.microbiomedata.org/1781_100354/ReadbasedAnalysis/1781_100354.json", + "file_size_bytes": 3385, + "type": "nmdc:DataObject" + }, + { + "_id": { + "$oid": "649b003f1ae706d7b5b16637" + }, + "id": "nmdc:0327492db7a99ab0fb672213e49e2f84", + "name": "bins.tooShort.fa", + "description": "tooShort (< 3kb) filtered contigs fasta file by metaBat2 for gold:Gp0127652", + "file_size_bytes": 86634430, + "url": "https://data.microbiomedata.org/data/1781_100354/img_MAGs/bins.tooShort.fa", + "type": "nmdc:DataObject" + }, + { + "_id": { + "$oid": "649b003f1ae706d7b5b1663c" + }, + "id": "nmdc:376fce40c578be064e55103093f99f66", + "name": "gold:Gp0127652.bins.2.fa", + "description": "metabat2 binned contig file for gold:Gp0127652", + "file_size_bytes": 254409, + "url": "https://data.microbiomedata.org/data/1781_100354/img_MAGs/metabat-bins/bins.2.fa", + "type": "nmdc:DataObject", + "data_object_type": "Metagenome Bins" + }, + { + "_id": { + "$oid": "649b003f1ae706d7b5b1663d" + }, + "id": "nmdc:a28fa4b0897b1eae6d10053c47d07319", + "name": "checkm_qa.out", + "description": "metabat2 bin checkm quality assessment result for gold:Gp0127652", + "file_size_bytes": 1148, + "url": "https://data.microbiomedata.org/data/1781_100354/img_MAGs/checkm_qa.out", + "type": "nmdc:DataObject", + "data_object_type": "CheckM Statistics" + }, + { + "_id": { + "$oid": "649b003f1ae706d7b5b1663e" + }, + "id": "nmdc:b7511c0e296be199db386a3ae4181e45", + "name": "bins.unbinned.fa", + "description": "unbinned fasta file from metabat2 for gold:Gp0127652", + "file_size_bytes": 26455665, + "url": "https://data.microbiomedata.org/data/1781_100354/img_MAGs/bins.unbinned.fa", + "type": "nmdc:DataObject" + }, + { + "_id": { + "$oid": "649b003f1ae706d7b5b1663f" + }, + "id": "nmdc:1946e92c1dfeddcf766605d2f6227934", + "name": "gold:Gp0127652.bins.3.fa", + "description": "metabat2 binned contig file for gold:Gp0127652", + "file_size_bytes": 340274, + "url": "https://data.microbiomedata.org/data/1781_100354/img_MAGs/metabat-bins/bins.3.fa", + "type": "nmdc:DataObject", + "data_object_type": "Metagenome Bins" + }, + { + "_id": { + "$oid": "649b003f1ae706d7b5b16640" + }, + "id": "nmdc:794cbd38c4fe3d18faf5ceb5f543de61", + "name": "gold:Gp0127652.bins.1.fa", + "description": "metabat2 binned contig file for gold:Gp0127652", + "file_size_bytes": 691252, + "url": "https://data.microbiomedata.org/data/1781_100354/img_MAGs/metabat-bins/bins.1.fa", + "type": "nmdc:DataObject", + "data_object_type": "Metagenome Bins" + }, + { + "_id": { + "$oid": "649b00401ae706d7b5b16d47" + }, + "description": "EC TSV File for gold:Gp0127652", + "url": "https://data.microbiomedata.org/1781_100354/img_annotation/Ga0482219_ec.tsv", + "md5_checksum": "06ceb99673dcb924ca223539267a962a", + "file_size_bytes": 3385, + "id": "nmdc:06ceb99673dcb924ca223539267a962a", + "name": "gold:Gp0127652_EC TSV File", + "data_object_type": "Annotation Enzyme Commission", + "type": "nmdc:DataObject" + }, + { + "_id": { + "$oid": "649b00401ae706d7b5b16d49" + }, + "description": "KO TSV File for gold:Gp0127652", + "url": "https://data.microbiomedata.org/1781_100354/img_annotation/Ga0482219_ko.tsv", + "md5_checksum": "4d16f813aefc09c7720770f065964c49", + "file_size_bytes": 3385, + "id": "nmdc:4d16f813aefc09c7720770f065964c49", + "name": "gold:Gp0127652_KO TSV File", + "data_object_type": "Annotation KEGG Orthology", + "type": "nmdc:DataObject" + }, + { + "_id": { + "$oid": "649b00401ae706d7b5b16d4a" + }, + "description": "Structural annotation GFF file for gold:Gp0127652", + "url": "https://data.microbiomedata.org/1781_100354/img_annotation/Ga0482219_structural_annotation.gff", + "md5_checksum": "6b39045cb99ca6220e27c4fa960f4dd1", + "file_size_bytes": 3385, + "id": "nmdc:6b39045cb99ca6220e27c4fa960f4dd1", + "name": "gold:Gp0127652_Structural annotation GFF file", + "data_object_type": "Structural Annotation GFF", + "type": "nmdc:DataObject" + }, + { + "_id": { + "$oid": "649b00401ae706d7b5b16d4b" + }, + "description": "Functional annotation GFF file for gold:Gp0127652", + "url": "https://data.microbiomedata.org/1781_100354/img_annotation/Ga0482219_functional_annotation.gff", + "md5_checksum": "80c28fa3efc78e6d23d0abcf1161c983", + "file_size_bytes": 3385, + "id": "nmdc:80c28fa3efc78e6d23d0abcf1161c983", + "name": "gold:Gp0127652_Functional annotation GFF file", + "data_object_type": "Functional Annotation GFF", + "type": "nmdc:DataObject" + }, + { + "_id": { + "$oid": "649b00401ae706d7b5b16d4e" + }, + "description": "Protein FAA for gold:Gp0127652", + "url": "https://data.microbiomedata.org/1781_100354/img_annotation/Ga0482219_proteins.faa", + "md5_checksum": "48bb698de57cd77bf1ddda9004e89c01", + "file_size_bytes": 3385, + "id": "nmdc:48bb698de57cd77bf1ddda9004e89c01", + "name": "gold:Gp0127652_Protein FAA", + "data_object_type": "Annotation Amino Acid FASTA", + "type": "nmdc:DataObject" } ], "dissolving_activity_set": [], @@ -27058,6 +37336,176 @@ "md5_checksum": "8fc6f1a0269aa5179d72c52cf1a9726e", "id": "nmdc:8fc6f1a0269aa5179d72c52cf1a9726e", "file_size_bytes": 69938 + }, + { + "_id": { + "$oid": "649b003d1ae706d7b5b14ed7" + }, + "description": "Metagenome Contig Coverage Stats for gold:Gp0127654", + "url": "https://data.microbiomedata.org/data/1781_100356/assembly/mapping_stats.txt", + "file_size_bytes": 14843159, + "type": "nmdc:DataObject", + "id": "nmdc:414faae2752dc595ae4f2ddab4438ec7", + "name": "mapping_stats.txt", + "data_object_type": "Assembly Coverage Stats" + }, + { + "_id": { + "$oid": "649b003d1ae706d7b5b14ed8" + }, + "description": "Assembled contigs fasta for gold:Gp0127654", + "url": "https://data.microbiomedata.org/data/1781_100356/assembly/assembly_contigs.fna", + "file_size_bytes": 92474281, + "type": "nmdc:DataObject", + "id": "nmdc:cd12b50afea3097034758d6883864dd5", + "name": "assembly_contigs.fna", + "data_object_type": "Assembly Contigs" + }, + { + "_id": { + "$oid": "649b003d1ae706d7b5b14edc" + }, + "description": "Metagenome Alignment BAM file for gold:Gp0127654", + "url": "https://data.microbiomedata.org/data/1781_100356/assembly/pairedMapped_sorted.bam", + "file_size_bytes": 2651150044, + "type": "nmdc:DataObject", + "id": "nmdc:fdde2ac466c983fc1154c7968631df20", + "name": "pairedMapped_sorted.bam", + "data_object_type": "Assembly Coverage BAM" + }, + { + "_id": { + "$oid": "649b003d1ae706d7b5b14ede" + }, + "description": "Assembled scaffold fasta for gold:Gp0127654", + "url": "https://data.microbiomedata.org/data/1781_100356/assembly/assembly_scaffolds.fna", + "file_size_bytes": 91880416, + "type": "nmdc:DataObject", + "id": "nmdc:6e1f393ec856d3445d9a4ac23ff1b249", + "name": "assembly_scaffolds.fna", + "data_object_type": "Assembly Scaffolds" + }, + { + "_id": { + "$oid": "649b003d1ae706d7b5b14ee1" + }, + "description": "Assembled AGP file for gold:Gp0127654", + "url": "https://data.microbiomedata.org/data/1781_100356/assembly/assembly.agp", + "file_size_bytes": 13042449, + "type": "nmdc:DataObject", + "id": "nmdc:c2e8b30ea935a2ca7bece5b913116f65", + "name": "assembly.agp", + "data_object_type": "Assembly AGP" + }, + { + "_id": { + "$oid": "649b003d1ae706d7b5b15bee" + }, + "id": "nmdc:336911e31f6622b74af1c92d2ed5f4b6", + "name": "1781_100356.krona.html", + "description": "Gold:Gp0127654 KRONA plot HTML file", + "url": "https://data.microbiomedata.org/1781_100356/ReadbasedAnalysis/centrifuge/1781_100356.krona.html", + "file_size_bytes": 3385, + "data_object_type": "Centrifuge Krona Plot", + "type": "nmdc:DataObject" + }, + { + "_id": { + "$oid": "649b003d1ae706d7b5b15bef" + }, + "id": "nmdc:cdd1b6d43bd7a8963fa3c5bab4296498", + "name": "1781_100356.json", + "description": "Gold:Gp0127654 ReadbasedAnalysis result JSON file", + "url": "https://data.microbiomedata.org/1781_100356/ReadbasedAnalysis/1781_100356.json", + "file_size_bytes": 3385, + "type": "nmdc:DataObject" + }, + { + "_id": { + "$oid": "649b003f1ae706d7b5b16645" + }, + "id": "nmdc:e22ff97901fed9397f221fbd8048f87d", + "name": "bins.tooShort.fa", + "description": "tooShort (< 3kb) filtered contigs fasta file by metaBat2 for gold:Gp0127654", + "file_size_bytes": 78267851, + "url": "https://data.microbiomedata.org/data/1781_100356/img_MAGs/bins.tooShort.fa", + "type": "nmdc:DataObject" + }, + { + "_id": { + "$oid": "649b003f1ae706d7b5b16648" + }, + "id": "nmdc:9f21a2cf85bdf5ec51f41a6e331819cc", + "name": "bins.unbinned.fa", + "description": "unbinned fasta file from metabat2 for gold:Gp0127654", + "file_size_bytes": 12538639, + "url": "https://data.microbiomedata.org/data/1781_100356/img_MAGs/bins.unbinned.fa", + "type": "nmdc:DataObject" + }, + { + "_id": { + "$oid": "649b00401ae706d7b5b16d55" + }, + "description": "Functional annotation GFF file for gold:Gp0127654", + "url": "https://data.microbiomedata.org/1781_100356/img_annotation/Ga0482217_functional_annotation.gff", + "md5_checksum": "b28a675c6560b34691a960f7e873841d", + "file_size_bytes": 3385, + "id": "nmdc:b28a675c6560b34691a960f7e873841d", + "name": "gold:Gp0127654_Functional annotation GFF file", + "data_object_type": "Functional Annotation GFF", + "type": "nmdc:DataObject" + }, + { + "_id": { + "$oid": "649b00401ae706d7b5b16d5c" + }, + "description": "KO TSV File for gold:Gp0127654", + "url": "https://data.microbiomedata.org/1781_100356/img_annotation/Ga0482217_ko.tsv", + "md5_checksum": "3b7734343770dce929591ee83d96acb6", + "file_size_bytes": 3385, + "id": "nmdc:3b7734343770dce929591ee83d96acb6", + "name": "gold:Gp0127654_KO TSV File", + "data_object_type": "Annotation KEGG Orthology", + "type": "nmdc:DataObject" + }, + { + "_id": { + "$oid": "649b00401ae706d7b5b16d5e" + }, + "description": "Protein FAA for gold:Gp0127654", + "url": "https://data.microbiomedata.org/1781_100356/img_annotation/Ga0482217_proteins.faa", + "md5_checksum": "deda4116aac7e262c0edf3358bb8e384", + "file_size_bytes": 3385, + "id": "nmdc:deda4116aac7e262c0edf3358bb8e384", + "name": "gold:Gp0127654_Protein FAA", + "data_object_type": "Annotation Amino Acid FASTA", + "type": "nmdc:DataObject" + }, + { + "_id": { + "$oid": "649b00401ae706d7b5b16d63" + }, + "description": "EC TSV File for gold:Gp0127654", + "url": "https://data.microbiomedata.org/1781_100356/img_annotation/Ga0482217_ec.tsv", + "md5_checksum": "b785c7809fa99d5beca859eded4a9b0f", + "file_size_bytes": 3385, + "id": "nmdc:b785c7809fa99d5beca859eded4a9b0f", + "name": "gold:Gp0127654_EC TSV File", + "data_object_type": "Annotation Enzyme Commission", + "type": "nmdc:DataObject" + }, + { + "_id": { + "$oid": "649b00401ae706d7b5b16d67" + }, + "description": "Structural annotation GFF file for gold:Gp0127654", + "url": "https://data.microbiomedata.org/1781_100356/img_annotation/Ga0482217_structural_annotation.gff", + "md5_checksum": "a9cf54b925e1c5b8c3e0299730f5a464", + "file_size_bytes": 3385, + "id": "nmdc:a9cf54b925e1c5b8c3e0299730f5a464", + "name": "gold:Gp0127654_Structural annotation GFF file", + "data_object_type": "Structural Annotation GFF", + "type": "nmdc:DataObject" } ], "dissolving_activity_set": [], @@ -27596,30 +38044,200 @@ "file_size_bytes": 9275333 }, { - "name": "Gp0127656_metabat2 bin checkm quality assessment result", - "description": "metabat2 bin checkm quality assessment result for Gp0127656", - "data_object_type": "CheckM Statistics", - "url": "https://data.microbiomedata.org/data/nmdc:mga00hh562/MAGs/nmdc_mga00hh562_checkm_qa.out", - "md5_checksum": "5a8dbda6aec0825b4159d5b53481db90", - "id": "nmdc:5a8dbda6aec0825b4159d5b53481db90", - "file_size_bytes": 775 + "name": "Gp0127656_metabat2 bin checkm quality assessment result", + "description": "metabat2 bin checkm quality assessment result for Gp0127656", + "data_object_type": "CheckM Statistics", + "url": "https://data.microbiomedata.org/data/nmdc:mga00hh562/MAGs/nmdc_mga00hh562_checkm_qa.out", + "md5_checksum": "5a8dbda6aec0825b4159d5b53481db90", + "id": "nmdc:5a8dbda6aec0825b4159d5b53481db90", + "file_size_bytes": 775 + }, + { + "name": "Gp0127656_high-quality and medium-quality bins", + "description": "high-quality and medium-quality bins for Gp0127656", + "data_object_type": "Metagenome Bins", + "url": "https://data.microbiomedata.org/data/nmdc:mga00hh562/MAGs/nmdc_mga00hh562_hqmq_bin.zip", + "md5_checksum": "060a7f90c5c5123cac41ed946a5482af", + "id": "nmdc:060a7f90c5c5123cac41ed946a5482af", + "file_size_bytes": 182 + }, + { + "name": "Gp0127656_metabat2 bins", + "description": "metabat2 bins for Gp0127656", + "url": "https://data.microbiomedata.org/data/nmdc:mga00hh562/MAGs/nmdc_mga00hh562_metabat_bin.zip", + "md5_checksum": "e9f5d03e8264308ed77da0b63eb738fe", + "id": "nmdc:e9f5d03e8264308ed77da0b63eb738fe", + "file_size_bytes": 101752 + }, + { + "_id": { + "$oid": "649b003d1ae706d7b5b14ee3" + }, + "description": "Assembled scaffold fasta for gold:Gp0127656", + "url": "https://data.microbiomedata.org/data/1781_100633/assembly/assembly_scaffolds.fna", + "file_size_bytes": 77751067, + "type": "nmdc:DataObject", + "id": "nmdc:cea40db59e6f0f57dfb38ed4339225f7", + "name": "assembly_scaffolds.fna", + "data_object_type": "Assembly Scaffolds" + }, + { + "_id": { + "$oid": "649b003d1ae706d7b5b14ee4" + }, + "description": "Metagenome Contig Coverage Stats for gold:Gp0127656", + "url": "https://data.microbiomedata.org/data/1781_100633/assembly/mapping_stats.txt", + "file_size_bytes": 12706402, + "type": "nmdc:DataObject", + "id": "nmdc:b31206dd7fe7d961882d0654ab5aaffa", + "name": "mapping_stats.txt", + "data_object_type": "Assembly Coverage Stats" + }, + { + "_id": { + "$oid": "649b003d1ae706d7b5b14ee5" + }, + "description": "Assembled AGP file for gold:Gp0127656", + "url": "https://data.microbiomedata.org/data/1781_100633/assembly/assembly.agp", + "file_size_bytes": 11151492, + "type": "nmdc:DataObject", + "id": "nmdc:3794d834e9a6e8c1e2acf616a2cc7625", + "name": "assembly.agp", + "data_object_type": "Assembly AGP" + }, + { + "_id": { + "$oid": "649b003d1ae706d7b5b14ee6" + }, + "description": "Assembled contigs fasta for gold:Gp0127656", + "url": "https://data.microbiomedata.org/data/1781_100633/assembly/assembly_contigs.fna", + "file_size_bytes": 78260498, + "type": "nmdc:DataObject", + "id": "nmdc:3dfd278d5e4fc3539b6dfd021acdac76", + "name": "assembly_contigs.fna", + "data_object_type": "Assembly Contigs" + }, + { + "_id": { + "$oid": "649b003d1ae706d7b5b14ee7" + }, + "description": "Metagenome Alignment BAM file for gold:Gp0127656", + "url": "https://data.microbiomedata.org/data/1781_100633/assembly/pairedMapped_sorted.bam", + "file_size_bytes": 2344270684, + "type": "nmdc:DataObject", + "id": "nmdc:6e0e10b90c8b52db8afc73199c3d6028", + "name": "pairedMapped_sorted.bam", + "data_object_type": "Assembly Coverage BAM" + }, + { + "_id": { + "$oid": "649b003d1ae706d7b5b15bf4" + }, + "id": "nmdc:daa88ce1c3c1f25b3b19a8c98c255e7c", + "name": "1781_100633.krona.html", + "description": "Gold:Gp0127656 KRONA plot HTML file", + "url": "https://data.microbiomedata.org/1781_100633/ReadbasedAnalysis/centrifuge/1781_100633.krona.html", + "file_size_bytes": 3385, + "data_object_type": "Centrifuge Krona Plot", + "type": "nmdc:DataObject" + }, + { + "_id": { + "$oid": "649b003d1ae706d7b5b15bfb" + }, + "id": "nmdc:e47f46c4a96d30e9bc65ded042a90033", + "name": "1781_100633.json", + "description": "Gold:Gp0127656 ReadbasedAnalysis result JSON file", + "url": "https://data.microbiomedata.org/1781_100633/ReadbasedAnalysis/1781_100633.json", + "file_size_bytes": 3385, + "type": "nmdc:DataObject" + }, + { + "_id": { + "$oid": "649b003f1ae706d7b5b16649" + }, + "id": "nmdc:0f2602d1171d6e2e1a09f0b41f6ded92", + "name": "bins.unbinned.fa", + "description": "unbinned fasta file from metabat2 for gold:Gp0127656", + "file_size_bytes": 9538263, + "url": "https://data.microbiomedata.org/data/1781_100633/img_MAGs/bins.unbinned.fa", + "type": "nmdc:DataObject" + }, + { + "_id": { + "$oid": "649b003f1ae706d7b5b1664e" + }, + "id": "nmdc:5184f24c83a7a7b9a0aafb8a934234ac", + "name": "bins.tooShort.fa", + "description": "tooShort (< 3kb) filtered contigs fasta file by metaBat2 for gold:Gp0127656", + "file_size_bytes": 67308024, + "url": "https://data.microbiomedata.org/data/1781_100633/img_MAGs/bins.tooShort.fa", + "type": "nmdc:DataObject" + }, + { + "_id": { + "$oid": "649b00401ae706d7b5b16d59" + }, + "description": "Functional annotation GFF file for gold:Gp0127656", + "url": "https://data.microbiomedata.org/1781_100633/img_annotation/Ga0482215_functional_annotation.gff", + "md5_checksum": "00f42710ff9df37cd23e5e73d54e4dd1", + "file_size_bytes": 3385, + "id": "nmdc:00f42710ff9df37cd23e5e73d54e4dd1", + "name": "gold:Gp0127656_Functional annotation GFF file", + "data_object_type": "Functional Annotation GFF", + "type": "nmdc:DataObject" + }, + { + "_id": { + "$oid": "649b00401ae706d7b5b16d5a" + }, + "description": "Protein FAA for gold:Gp0127656", + "url": "https://data.microbiomedata.org/1781_100633/img_annotation/Ga0482215_proteins.faa", + "md5_checksum": "2819bbb349ca5bdbf311aeae6ada532b", + "file_size_bytes": 3385, + "id": "nmdc:2819bbb349ca5bdbf311aeae6ada532b", + "name": "gold:Gp0127656_Protein FAA", + "data_object_type": "Annotation Amino Acid FASTA", + "type": "nmdc:DataObject" }, { - "name": "Gp0127656_high-quality and medium-quality bins", - "description": "high-quality and medium-quality bins for Gp0127656", - "data_object_type": "Metagenome Bins", - "url": "https://data.microbiomedata.org/data/nmdc:mga00hh562/MAGs/nmdc_mga00hh562_hqmq_bin.zip", - "md5_checksum": "060a7f90c5c5123cac41ed946a5482af", - "id": "nmdc:060a7f90c5c5123cac41ed946a5482af", - "file_size_bytes": 182 + "_id": { + "$oid": "649b00401ae706d7b5b16d5d" + }, + "description": "Structural annotation GFF file for gold:Gp0127656", + "url": "https://data.microbiomedata.org/1781_100633/img_annotation/Ga0482215_structural_annotation.gff", + "md5_checksum": "0c2ae5a86d4840a0b324d73977170f1e", + "file_size_bytes": 3385, + "id": "nmdc:0c2ae5a86d4840a0b324d73977170f1e", + "name": "gold:Gp0127656_Structural annotation GFF file", + "data_object_type": "Structural Annotation GFF", + "type": "nmdc:DataObject" }, { - "name": "Gp0127656_metabat2 bins", - "description": "metabat2 bins for Gp0127656", - "url": "https://data.microbiomedata.org/data/nmdc:mga00hh562/MAGs/nmdc_mga00hh562_metabat_bin.zip", - "md5_checksum": "e9f5d03e8264308ed77da0b63eb738fe", - "id": "nmdc:e9f5d03e8264308ed77da0b63eb738fe", - "file_size_bytes": 101752 + "_id": { + "$oid": "649b00401ae706d7b5b16d5f" + }, + "description": "EC TSV File for gold:Gp0127656", + "url": "https://data.microbiomedata.org/1781_100633/img_annotation/Ga0482215_ec.tsv", + "md5_checksum": "33e0f5ff7c448ded210f04798894a031", + "file_size_bytes": 3385, + "id": "nmdc:33e0f5ff7c448ded210f04798894a031", + "name": "gold:Gp0127656_EC TSV File", + "data_object_type": "Annotation Enzyme Commission", + "type": "nmdc:DataObject" + }, + { + "_id": { + "$oid": "649b00401ae706d7b5b16d60" + }, + "description": "KO TSV File for gold:Gp0127656", + "url": "https://data.microbiomedata.org/1781_100633/img_annotation/Ga0482215_ko.tsv", + "md5_checksum": "8d230dd7948d2b08c4de1adc0d0002b8", + "file_size_bytes": 3385, + "id": "nmdc:8d230dd7948d2b08c4de1adc0d0002b8", + "name": "gold:Gp0127656_KO TSV File", + "data_object_type": "Annotation KEGG Orthology", + "type": "nmdc:DataObject" } ], "dissolving_activity_set": [], @@ -28179,6 +38797,176 @@ "md5_checksum": "eb5216cc4e09d88c4c59a76c4808a693", "id": "nmdc:eb5216cc4e09d88c4c59a76c4808a693", "file_size_bytes": 397044 + }, + { + "_id": { + "$oid": "649b003d1ae706d7b5b14ec9" + }, + "description": "Assembled contigs fasta for gold:Gp0127651", + "url": "https://data.microbiomedata.org/data/1781_100353/assembly/assembly_contigs.fna", + "file_size_bytes": 95235782, + "type": "nmdc:DataObject", + "id": "nmdc:49c49b255b8db84f4b79e0ad5a963c82", + "name": "assembly_contigs.fna", + "data_object_type": "Assembly Contigs" + }, + { + "_id": { + "$oid": "649b003d1ae706d7b5b14eca" + }, + "description": "Assembled scaffold fasta for gold:Gp0127651", + "url": "https://data.microbiomedata.org/data/1781_100353/assembly/assembly_scaffolds.fna", + "file_size_bytes": 94693464, + "type": "nmdc:DataObject", + "id": "nmdc:6b1d7af20d7a316f3b13f1707ce7c518", + "name": "assembly_scaffolds.fna", + "data_object_type": "Assembly Scaffolds" + }, + { + "_id": { + "$oid": "649b003d1ae706d7b5b14ecb" + }, + "description": "Assembled AGP file for gold:Gp0127651", + "url": "https://data.microbiomedata.org/data/1781_100353/assembly/assembly.agp", + "file_size_bytes": 11899059, + "type": "nmdc:DataObject", + "id": "nmdc:36f080b0d13effe19b1f18dfc041a341", + "name": "assembly.agp", + "data_object_type": "Assembly AGP" + }, + { + "_id": { + "$oid": "649b003d1ae706d7b5b14ecd" + }, + "description": "Metagenome Contig Coverage Stats for gold:Gp0127651", + "url": "https://data.microbiomedata.org/data/1781_100353/assembly/mapping_stats.txt", + "file_size_bytes": 13567640, + "type": "nmdc:DataObject", + "id": "nmdc:e1e806d81cc6cd9f22702e75849f5e31", + "name": "mapping_stats.txt", + "data_object_type": "Assembly Coverage Stats" + }, + { + "_id": { + "$oid": "649b003d1ae706d7b5b14ed6" + }, + "description": "Metagenome Alignment BAM file for gold:Gp0127651", + "url": "https://data.microbiomedata.org/data/1781_100353/assembly/pairedMapped_sorted.bam", + "file_size_bytes": 2010414032, + "type": "nmdc:DataObject", + "id": "nmdc:b041d1ee91abbe2d6ade41bc46c67ab9", + "name": "pairedMapped_sorted.bam", + "data_object_type": "Assembly Coverage BAM" + }, + { + "_id": { + "$oid": "649b003d1ae706d7b5b15bd4" + }, + "id": "nmdc:4ee6b6c602c6f2c054154f48da58b304", + "name": "1781_100353.krona.html", + "description": "Gold:Gp0127651 KRONA plot HTML file", + "url": "https://data.microbiomedata.org/1781_100353/ReadbasedAnalysis/centrifuge/1781_100353.krona.html", + "file_size_bytes": 3385, + "data_object_type": "Centrifuge Krona Plot", + "type": "nmdc:DataObject" + }, + { + "_id": { + "$oid": "649b003d1ae706d7b5b15bd9" + }, + "id": "nmdc:fba8766b1f1e3e5375ac56ecde508e96", + "name": "1781_100353.json", + "description": "Gold:Gp0127651 ReadbasedAnalysis result JSON file", + "url": "https://data.microbiomedata.org/1781_100353/ReadbasedAnalysis/1781_100353.json", + "file_size_bytes": 3385, + "type": "nmdc:DataObject" + }, + { + "_id": { + "$oid": "649b003f1ae706d7b5b16638" + }, + "id": "nmdc:2574b731c1f785d106e9033639833750", + "name": "bins.tooShort.fa", + "description": "tooShort (< 3kb) filtered contigs fasta file by metaBat2 for gold:Gp0127651", + "file_size_bytes": 75274019, + "url": "https://data.microbiomedata.org/data/1781_100353/img_MAGs/bins.tooShort.fa", + "type": "nmdc:DataObject" + }, + { + "_id": { + "$oid": "649b003f1ae706d7b5b16644" + }, + "id": "nmdc:3fc9dca08829f51d49a574f916099e20", + "name": "bins.unbinned.fa", + "description": "unbinned fasta file from metabat2 for gold:Gp0127651", + "file_size_bytes": 18449153, + "url": "https://data.microbiomedata.org/data/1781_100353/img_MAGs/bins.unbinned.fa", + "type": "nmdc:DataObject" + }, + { + "_id": { + "$oid": "649b00401ae706d7b5b16d44" + }, + "description": "EC TSV File for gold:Gp0127651", + "url": "https://data.microbiomedata.org/1781_100353/img_annotation/Ga0482220_ec.tsv", + "md5_checksum": "2a7c5ba82dff4dd5d996ad5bc824103c", + "file_size_bytes": 3385, + "id": "nmdc:2a7c5ba82dff4dd5d996ad5bc824103c", + "name": "gold:Gp0127651_EC TSV File", + "data_object_type": "Annotation Enzyme Commission", + "type": "nmdc:DataObject" + }, + { + "_id": { + "$oid": "649b00401ae706d7b5b16d45" + }, + "description": "KO TSV File for gold:Gp0127651", + "url": "https://data.microbiomedata.org/1781_100353/img_annotation/Ga0482220_ko.tsv", + "md5_checksum": "84dc1abc2d39254da6c3d2cd6cff6d9d", + "file_size_bytes": 3385, + "id": "nmdc:84dc1abc2d39254da6c3d2cd6cff6d9d", + "name": "gold:Gp0127651_KO TSV File", + "data_object_type": "Annotation KEGG Orthology", + "type": "nmdc:DataObject" + }, + { + "_id": { + "$oid": "649b00401ae706d7b5b16d46" + }, + "description": "Protein FAA for gold:Gp0127651", + "url": "https://data.microbiomedata.org/1781_100353/img_annotation/Ga0482220_proteins.faa", + "md5_checksum": "67dfacdfc27cb6b0ec4787e1a40d9547", + "file_size_bytes": 3385, + "id": "nmdc:67dfacdfc27cb6b0ec4787e1a40d9547", + "name": "gold:Gp0127651_Protein FAA", + "data_object_type": "Annotation Amino Acid FASTA", + "type": "nmdc:DataObject" + }, + { + "_id": { + "$oid": "649b00401ae706d7b5b16d48" + }, + "description": "Structural annotation GFF file for gold:Gp0127651", + "url": "https://data.microbiomedata.org/1781_100353/img_annotation/Ga0482220_structural_annotation.gff", + "md5_checksum": "714fb73a8b3011d0b2faea98eda477c3", + "file_size_bytes": 3385, + "id": "nmdc:714fb73a8b3011d0b2faea98eda477c3", + "name": "gold:Gp0127651_Structural annotation GFF file", + "data_object_type": "Structural Annotation GFF", + "type": "nmdc:DataObject" + }, + { + "_id": { + "$oid": "649b00401ae706d7b5b16d4c" + }, + "description": "Functional annotation GFF file for gold:Gp0127651", + "url": "https://data.microbiomedata.org/1781_100353/img_annotation/Ga0482220_functional_annotation.gff", + "md5_checksum": "e25cb289f398c007806c72c080724872", + "file_size_bytes": 3385, + "id": "nmdc:e25cb289f398c007806c72c080724872", + "name": "gold:Gp0127651_Functional annotation GFF file", + "data_object_type": "Functional Annotation GFF", + "type": "nmdc:DataObject" } ], "dissolving_activity_set": [], @@ -28738,6 +39526,224 @@ "md5_checksum": "668a0a6dbd840dd2178a00c2af4c2237", "id": "nmdc:668a0a6dbd840dd2178a00c2af4c2237", "file_size_bytes": 527634 + }, + { + "_id": { + "$oid": "649b003d1ae706d7b5b14edb" + }, + "description": "Metagenome Contig Coverage Stats for gold:Gp0127655", + "url": "https://data.microbiomedata.org/data/1781_100357/assembly/mapping_stats.txt", + "file_size_bytes": 13742582, + "type": "nmdc:DataObject", + "id": "nmdc:f4e6a47ebd604f90384f130eca3e401e", + "name": "mapping_stats.txt", + "data_object_type": "Assembly Coverage Stats" + }, + { + "_id": { + "$oid": "649b003d1ae706d7b5b14edf" + }, + "description": "Assembled contigs fasta for gold:Gp0127655", + "url": "https://data.microbiomedata.org/data/1781_100357/assembly/assembly_contigs.fna", + "file_size_bytes": 92713706, + "type": "nmdc:DataObject", + "id": "nmdc:2860c363baa5fd6e5bbdc96a8d54b56b", + "name": "assembly_contigs.fna", + "data_object_type": "Assembly Contigs" + }, + { + "_id": { + "$oid": "649b003d1ae706d7b5b14ee0" + }, + "description": "Metagenome Alignment BAM file for gold:Gp0127655", + "url": "https://data.microbiomedata.org/data/1781_100357/assembly/pairedMapped_sorted.bam", + "file_size_bytes": 2031000089, + "type": "nmdc:DataObject", + "id": "nmdc:a20a83922a21eba2ec447dacc259c083", + "name": "pairedMapped_sorted.bam", + "data_object_type": "Assembly Coverage BAM" + }, + { + "_id": { + "$oid": "649b003d1ae706d7b5b14ee2" + }, + "description": "Assembled scaffold fasta for gold:Gp0127655", + "url": "https://data.microbiomedata.org/data/1781_100357/assembly/assembly_scaffolds.fna", + "file_size_bytes": 92163960, + "type": "nmdc:DataObject", + "id": "nmdc:07f2db98361b0d4e4d4c6a89294348ce", + "name": "assembly_scaffolds.fna", + "data_object_type": "Assembly Scaffolds" + }, + { + "_id": { + "$oid": "649b003d1ae706d7b5b14ee9" + }, + "description": "Assembled AGP file for gold:Gp0127655", + "url": "https://data.microbiomedata.org/data/1781_100357/assembly/assembly.agp", + "file_size_bytes": 12059276, + "type": "nmdc:DataObject", + "id": "nmdc:4f8b4cfdd8cbff990d5f4c5b932beb96", + "name": "assembly.agp", + "data_object_type": "Assembly AGP" + }, + { + "_id": { + "$oid": "649b003d1ae706d7b5b15c0a" + }, + "id": "nmdc:8efdeab08615731f46e30a1cdc6bcb2d", + "name": "1781_100357.krona.html", + "description": "Gold:Gp0127655 KRONA plot HTML file", + "url": "https://data.microbiomedata.org/1781_100357/ReadbasedAnalysis/centrifuge/1781_100357.krona.html", + "file_size_bytes": 3385, + "data_object_type": "Centrifuge Krona Plot", + "type": "nmdc:DataObject" + }, + { + "_id": { + "$oid": "649b003d1ae706d7b5b15c0e" + }, + "id": "nmdc:4df533784cd9ca8514f9622ba3ae0036", + "name": "1781_100357.json", + "description": "Gold:Gp0127655 ReadbasedAnalysis result JSON file", + "url": "https://data.microbiomedata.org/1781_100357/ReadbasedAnalysis/1781_100357.json", + "file_size_bytes": 3385, + "type": "nmdc:DataObject" + }, + { + "_id": { + "$oid": "649b003f1ae706d7b5b16641" + }, + "id": "nmdc:57a6185b7ec704380a4856d0083dbd1d", + "name": "bins.tooShort.fa", + "description": "tooShort (< 3kb) filtered contigs fasta file by metaBat2 for gold:Gp0127655", + "file_size_bytes": 74915065, + "url": "https://data.microbiomedata.org/data/1781_100357/img_MAGs/bins.tooShort.fa", + "type": "nmdc:DataObject" + }, + { + "_id": { + "$oid": "649b003f1ae706d7b5b16643" + }, + "id": "nmdc:7432d75e55847cf9a3c66589024e342c", + "name": "gold:Gp0127655.bins.3.fa", + "description": "metabat2 binned contig file for gold:Gp0127655", + "file_size_bytes": 210579, + "url": "https://data.microbiomedata.org/data/1781_100357/img_MAGs/metabat-bins/bins.3.fa", + "type": "nmdc:DataObject", + "data_object_type": "Metagenome Bins" + }, + { + "_id": { + "$oid": "649b003f1ae706d7b5b16646" + }, + "id": "nmdc:a9494cde349debe8557cbd59c43138fe", + "name": "checkm_qa.out", + "description": "metabat2 bin checkm quality assessment result for gold:Gp0127655", + "file_size_bytes": 1085, + "url": "https://data.microbiomedata.org/data/1781_100357/img_MAGs/checkm_qa.out", + "type": "nmdc:DataObject", + "data_object_type": "CheckM Statistics" + }, + { + "_id": { + "$oid": "649b003f1ae706d7b5b1664a" + }, + "id": "nmdc:299ed9ebee6f2a5e5c202a11b9e5536a", + "name": "gold:Gp0127655.bins.1.fa", + "description": "metabat2 binned contig file for gold:Gp0127655", + "file_size_bytes": 216122, + "url": "https://data.microbiomedata.org/data/1781_100357/img_MAGs/metabat-bins/bins.1.fa", + "type": "nmdc:DataObject", + "data_object_type": "Metagenome Bins" + }, + { + "_id": { + "$oid": "649b003f1ae706d7b5b1664b" + }, + "id": "nmdc:61c5dd80a3ac06408612da5aa2ad8bc1", + "name": "bins.unbinned.fa", + "description": "unbinned fasta file from metabat2 for gold:Gp0127655", + "file_size_bytes": 15144355, + "url": "https://data.microbiomedata.org/data/1781_100357/img_MAGs/bins.unbinned.fa", + "type": "nmdc:DataObject" + }, + { + "_id": { + "$oid": "649b003f1ae706d7b5b1664c" + }, + "id": "nmdc:556fbc4cc2220b73f70dce6b46ff34c7", + "name": "gold:Gp0127655.bins.2.fa", + "description": "metabat2 binned contig file for gold:Gp0127655", + "file_size_bytes": 692364, + "url": "https://data.microbiomedata.org/data/1781_100357/img_MAGs/metabat-bins/bins.2.fa", + "type": "nmdc:DataObject", + "data_object_type": "Metagenome Bins" + }, + { + "_id": { + "$oid": "649b00401ae706d7b5b16d52" + }, + "description": "KO TSV File for gold:Gp0127655", + "url": "https://data.microbiomedata.org/1781_100357/img_annotation/Ga0482216_ko.tsv", + "md5_checksum": "6d1185f4034e364b74109d40326a450a", + "file_size_bytes": 3385, + "id": "nmdc:6d1185f4034e364b74109d40326a450a", + "name": "gold:Gp0127655_KO TSV File", + "data_object_type": "Annotation KEGG Orthology", + "type": "nmdc:DataObject" + }, + { + "_id": { + "$oid": "649b00401ae706d7b5b16d53" + }, + "description": "Structural annotation GFF file for gold:Gp0127655", + "url": "https://data.microbiomedata.org/1781_100357/img_annotation/Ga0482216_structural_annotation.gff", + "md5_checksum": "05e2702ecae6ba0ba0b0898132850b9f", + "file_size_bytes": 3385, + "id": "nmdc:05e2702ecae6ba0ba0b0898132850b9f", + "name": "gold:Gp0127655_Structural annotation GFF file", + "data_object_type": "Structural Annotation GFF", + "type": "nmdc:DataObject" + }, + { + "_id": { + "$oid": "649b00401ae706d7b5b16d56" + }, + "description": "Functional annotation GFF file for gold:Gp0127655", + "url": "https://data.microbiomedata.org/1781_100357/img_annotation/Ga0482216_functional_annotation.gff", + "md5_checksum": "0b4a5dc91c42b7fea3fd514d5cb3138b", + "file_size_bytes": 3385, + "id": "nmdc:0b4a5dc91c42b7fea3fd514d5cb3138b", + "name": "gold:Gp0127655_Functional annotation GFF file", + "data_object_type": "Functional Annotation GFF", + "type": "nmdc:DataObject" + }, + { + "_id": { + "$oid": "649b00401ae706d7b5b16d57" + }, + "description": "EC TSV File for gold:Gp0127655", + "url": "https://data.microbiomedata.org/1781_100357/img_annotation/Ga0482216_ec.tsv", + "md5_checksum": "32c6c6dbce4a1c6ab92810a86f90c574", + "file_size_bytes": 3385, + "id": "nmdc:32c6c6dbce4a1c6ab92810a86f90c574", + "name": "gold:Gp0127655_EC TSV File", + "data_object_type": "Annotation Enzyme Commission", + "type": "nmdc:DataObject" + }, + { + "_id": { + "$oid": "649b00401ae706d7b5b16d58" + }, + "description": "Protein FAA for gold:Gp0127655", + "url": "https://data.microbiomedata.org/1781_100357/img_annotation/Ga0482216_proteins.faa", + "md5_checksum": "a31096eb3e473fd0c68d09096bc3fd85", + "file_size_bytes": 3385, + "id": "nmdc:a31096eb3e473fd0c68d09096bc3fd85", + "name": "gold:Gp0127655_Protein FAA", + "data_object_type": "Annotation Amino Acid FASTA", + "type": "nmdc:DataObject" } ], "dissolving_activity_set": [], @@ -29264,6 +40270,176 @@ "md5_checksum": "1029b97dba32dab780f4267f8224619f", "id": "nmdc:1029b97dba32dab780f4267f8224619f", "file_size_bytes": 182 + }, + { + "_id": { + "$oid": "649b003d1ae706d7b5b14ed2" + }, + "description": "Metagenome Contig Coverage Stats for gold:Gp0127653", + "url": "https://data.microbiomedata.org/data/1781_100355/assembly/mapping_stats.txt", + "file_size_bytes": 3628417, + "type": "nmdc:DataObject", + "id": "nmdc:59f6f5bd2480f717a09946125a0cac46", + "name": "mapping_stats.txt", + "data_object_type": "Assembly Coverage Stats" + }, + { + "_id": { + "$oid": "649b003d1ae706d7b5b14ed3" + }, + "description": "Assembled contigs fasta for gold:Gp0127653", + "url": "https://data.microbiomedata.org/data/1781_100355/assembly/assembly_contigs.fna", + "file_size_bytes": 18526584, + "type": "nmdc:DataObject", + "id": "nmdc:c9708409d9e8f45dcc89e688b3482e5e", + "name": "assembly_contigs.fna", + "data_object_type": "Assembly Contigs" + }, + { + "_id": { + "$oid": "649b003d1ae706d7b5b14ed4" + }, + "description": "Assembled scaffold fasta for gold:Gp0127653", + "url": "https://data.microbiomedata.org/data/1781_100355/assembly/assembly_scaffolds.fna", + "file_size_bytes": 18379922, + "type": "nmdc:DataObject", + "id": "nmdc:c76c1fdcd6be23a0d7add5ea3a23f754", + "name": "assembly_scaffolds.fna", + "data_object_type": "Assembly Scaffolds" + }, + { + "_id": { + "$oid": "649b003d1ae706d7b5b14ed9" + }, + "description": "Assembled AGP file for gold:Gp0127653", + "url": "https://data.microbiomedata.org/data/1781_100355/assembly/assembly.agp", + "file_size_bytes": 3159611, + "type": "nmdc:DataObject", + "id": "nmdc:ac8be882728344819f210f42d5ea8577", + "name": "assembly.agp", + "data_object_type": "Assembly AGP" + }, + { + "_id": { + "$oid": "649b003d1ae706d7b5b14edd" + }, + "description": "Metagenome Alignment BAM file for gold:Gp0127653", + "url": "https://data.microbiomedata.org/data/1781_100355/assembly/pairedMapped_sorted.bam", + "file_size_bytes": 1732533897, + "type": "nmdc:DataObject", + "id": "nmdc:fadc083a0534b4961c902c8af8a8ebba", + "name": "pairedMapped_sorted.bam", + "data_object_type": "Assembly Coverage BAM" + }, + { + "_id": { + "$oid": "649b003d1ae706d7b5b15be6" + }, + "id": "nmdc:2d6f886bc9561f305d3b15be14bc192f", + "name": "1781_100355.krona.html", + "description": "Gold:Gp0127653 KRONA plot HTML file", + "url": "https://data.microbiomedata.org/1781_100355/ReadbasedAnalysis/centrifuge/1781_100355.krona.html", + "file_size_bytes": 3385, + "data_object_type": "Centrifuge Krona Plot", + "type": "nmdc:DataObject" + }, + { + "_id": { + "$oid": "649b003d1ae706d7b5b15c08" + }, + "id": "nmdc:bcf6968cd97d5db72fbe6d048a638fd7", + "name": "1781_100355.json", + "description": "Gold:Gp0127653 ReadbasedAnalysis result JSON file", + "url": "https://data.microbiomedata.org/1781_100355/ReadbasedAnalysis/1781_100355.json", + "file_size_bytes": 3385, + "type": "nmdc:DataObject" + }, + { + "_id": { + "$oid": "649b003f1ae706d7b5b16642" + }, + "id": "nmdc:9429c50986c3904bdd48e585bfc74dfd", + "name": "bins.tooShort.fa", + "description": "tooShort (< 3kb) filtered contigs fasta file by metaBat2 for gold:Gp0127653", + "file_size_bytes": 17290021, + "url": "https://data.microbiomedata.org/data/1781_100355/img_MAGs/bins.tooShort.fa", + "type": "nmdc:DataObject" + }, + { + "_id": { + "$oid": "649b003f1ae706d7b5b16647" + }, + "id": "nmdc:eb6bfb2af22e43df303aa691a87889bc", + "name": "bins.unbinned.fa", + "description": "unbinned fasta file from metabat2 for gold:Gp0127653", + "file_size_bytes": 807307, + "url": "https://data.microbiomedata.org/data/1781_100355/img_MAGs/bins.unbinned.fa", + "type": "nmdc:DataObject" + }, + { + "_id": { + "$oid": "649b00401ae706d7b5b16d4d" + }, + "description": "EC TSV File for gold:Gp0127653", + "url": "https://data.microbiomedata.org/1781_100355/img_annotation/Ga0482218_ec.tsv", + "md5_checksum": "05cc9ce5321d6bc909ab63b8cbc59d02", + "file_size_bytes": 3385, + "id": "nmdc:05cc9ce5321d6bc909ab63b8cbc59d02", + "name": "gold:Gp0127653_EC TSV File", + "data_object_type": "Annotation Enzyme Commission", + "type": "nmdc:DataObject" + }, + { + "_id": { + "$oid": "649b00401ae706d7b5b16d4f" + }, + "description": "Protein FAA for gold:Gp0127653", + "url": "https://data.microbiomedata.org/1781_100355/img_annotation/Ga0482218_proteins.faa", + "md5_checksum": "658231efecf9d087ec2a6e9467f4e968", + "file_size_bytes": 3385, + "id": "nmdc:658231efecf9d087ec2a6e9467f4e968", + "name": "gold:Gp0127653_Protein FAA", + "data_object_type": "Annotation Amino Acid FASTA", + "type": "nmdc:DataObject" + }, + { + "_id": { + "$oid": "649b00401ae706d7b5b16d51" + }, + "description": "Functional annotation GFF file for gold:Gp0127653", + "url": "https://data.microbiomedata.org/1781_100355/img_annotation/Ga0482218_functional_annotation.gff", + "md5_checksum": "8d83e502a533b5db8cd3bc943ae8b18b", + "file_size_bytes": 3385, + "id": "nmdc:8d83e502a533b5db8cd3bc943ae8b18b", + "name": "gold:Gp0127653_Functional annotation GFF file", + "data_object_type": "Functional Annotation GFF", + "type": "nmdc:DataObject" + }, + { + "_id": { + "$oid": "649b00401ae706d7b5b16d54" + }, + "description": "KO TSV File for gold:Gp0127653", + "url": "https://data.microbiomedata.org/1781_100355/img_annotation/Ga0482218_ko.tsv", + "md5_checksum": "44f8e708349a1effdff745880f4fdd12", + "file_size_bytes": 3385, + "id": "nmdc:44f8e708349a1effdff745880f4fdd12", + "name": "gold:Gp0127653_KO TSV File", + "data_object_type": "Annotation KEGG Orthology", + "type": "nmdc:DataObject" + }, + { + "_id": { + "$oid": "649b00401ae706d7b5b16d5b" + }, + "description": "Structural annotation GFF file for gold:Gp0127653", + "url": "https://data.microbiomedata.org/1781_100355/img_annotation/Ga0482218_structural_annotation.gff", + "md5_checksum": "511ae319ddff2bdcbc3296d951e42d7e", + "file_size_bytes": 3385, + "id": "nmdc:511ae319ddff2bdcbc3296d951e42d7e", + "name": "gold:Gp0127653_Structural annotation GFF file", + "data_object_type": "Structural Annotation GFF", + "type": "nmdc:DataObject" } ], "dissolving_activity_set": [], From d3ee0630e4d0513f3015403e8ae15a30f1b51d14 Mon Sep 17 00:00:00 2001 From: Michael Thornton Date: Thu, 9 Nov 2023 17:00:16 -0800 Subject: [PATCH 42/91] Begin refactoring Add packages in re_iding add a basic unit test combine both commands in single script --- nmdc_automation/re_iding/__init__.py | 0 nmdc_automation/re_iding/base.py | 40 ++ nmdc_automation/re_iding/db_utils.py | 46 ++ nmdc_automation/re_iding/mapping.log | 1 + .../dryrun_associated_record_dump.json} | 0 ...sty-11-aygzgv51_assocated_record_dump.json | 0 .../extract_metagenome_workflow_records.py | 5 +- .../re_iding/scripts/re_id_tool.py | 144 ++++ nmdc_automation/re_iding/tests/__init__.py | 0 nmdc_automation/re_iding/tests/conftest.py | 11 + .../re_iding/tests/test_data/db_record.json | 638 ++++++++++++++++++ .../re_iding/tests/test_re_iding_base.py | 15 + 12 files changed, 898 insertions(+), 2 deletions(-) create mode 100644 nmdc_automation/re_iding/__init__.py create mode 100644 nmdc_automation/re_iding/base.py create mode 100644 nmdc_automation/re_iding/db_utils.py rename nmdc_automation/re_iding/scripts/{test_metagenome_record.json => data/dryrun_associated_record_dump.json} (100%) rename nmdc_automation/re_iding/scripts/{ => data}/nmdc:sty-11-aygzgv51_assocated_record_dump.json (100%) create mode 100755 nmdc_automation/re_iding/scripts/re_id_tool.py create mode 100644 nmdc_automation/re_iding/tests/__init__.py create mode 100644 nmdc_automation/re_iding/tests/conftest.py create mode 100644 nmdc_automation/re_iding/tests/test_data/db_record.json create mode 100644 nmdc_automation/re_iding/tests/test_re_iding_base.py diff --git a/nmdc_automation/re_iding/__init__.py b/nmdc_automation/re_iding/__init__.py new file mode 100644 index 00000000..e69de29b diff --git a/nmdc_automation/re_iding/base.py b/nmdc_automation/re_iding/base.py new file mode 100644 index 00000000..f3753246 --- /dev/null +++ b/nmdc_automation/re_iding/base.py @@ -0,0 +1,40 @@ +# nmdc_automation/nmddc_automation/re_iding/base.py +""" +base.py - Provides classes and functions for re-ID-ing NMDC metagenome workflow +records and data objects. +""" +import logging +from typing import List +from pathlib import Path +import json +from typing import Dict, List + +from nmdc_schema.nmdc import Database, DataObject +from nmdc_automation.re_iding.db_utils import ( + OMICS_PROCESSING_SET, + DATA_OBJECT_SET, + check_for_single_omics_processing_record, +) + + +NAPA_TEMPLATE = "../../configs/re_iding_worklfows.yaml" + + +def get_new_db_and_downstream_inputs( + db_record: Dict, config_file: str = None) -> (Database, + List[DataObject]): + """ + Return a new Database instance with the given new_omics_processing_record + and the data objects that are has_output of the given old_omics_processing_record + """ + if config_file is None: + config_file = NAPA_TEMPLATE + config_file = Path(config_file) + + new_db = Database() + downstream_inputs = [] + + check_for_single_omics_processing_record(db_record) + old_omics_processing_record = db_record[OMICS_PROCESSING_SET][0] + + return new_db, downstream_inputs \ No newline at end of file diff --git a/nmdc_automation/re_iding/db_utils.py b/nmdc_automation/re_iding/db_utils.py new file mode 100644 index 00000000..bdf5674b --- /dev/null +++ b/nmdc_automation/re_iding/db_utils.py @@ -0,0 +1,46 @@ +# nmdc_automation/nmdc_automation/re_iding/db_utils.py +""" +nmdc_db_utils.py: Provides utility functions for working with NMDC Database +records and data objects as dicts. +""" +from dataclasses import dataclass +from typing import Dict, List +from nmdc_schema.nmdc import Database, DataObject + +# Some constants for set names we care about +OMICS_PROCESSING_SET = "omics_processing_set" +DATA_OBJECT_SET = "data_object_set" + + + + +def get_omics_processing_id(db_record: Dict) -> str: + """ + Get the ID of the OmicsProcessing record in the given Database instance. + The OmicsProcessing record acts as the root of the workflow graph and + the data objects. + """ + check_for_single_omics_processing_record(db_record) + omics_processing_set = db_record[OMICS_PROCESSING_SET] + return omics_processing_set[0]["id"] + + +def check_for_single_omics_processing_record(old_db_record: Dict) -> bool: + """ + Check that there is only one OmicsProcessing record in the Database. + """ + omics_processing_set = old_db_record.get("omics_processing_set", []) + if len(omics_processing_set) == 0: + raise ValueError("No omics_processing_set found in db_record") + elif len(omics_processing_set) > 1: + raise ValueError("Multiple omics_processing_set found in db_record") + return True + + + + + + + + + diff --git a/nmdc_automation/re_iding/mapping.log b/nmdc_automation/re_iding/mapping.log index 974bbbe8..c4b7fca5 100644 --- a/nmdc_automation/re_iding/mapping.log +++ b/nmdc_automation/re_iding/mapping.log @@ -1 +1,2 @@ omics_id gold nmdc:omprc-11-bn8jcq58 +omics_id gold nmdc:omprc-11-bn8jcq58 diff --git a/nmdc_automation/re_iding/scripts/test_metagenome_record.json b/nmdc_automation/re_iding/scripts/data/dryrun_associated_record_dump.json similarity index 100% rename from nmdc_automation/re_iding/scripts/test_metagenome_record.json rename to nmdc_automation/re_iding/scripts/data/dryrun_associated_record_dump.json diff --git a/nmdc_automation/re_iding/scripts/nmdc:sty-11-aygzgv51_assocated_record_dump.json b/nmdc_automation/re_iding/scripts/data/nmdc:sty-11-aygzgv51_assocated_record_dump.json similarity index 100% rename from nmdc_automation/re_iding/scripts/nmdc:sty-11-aygzgv51_assocated_record_dump.json rename to nmdc_automation/re_iding/scripts/data/nmdc:sty-11-aygzgv51_assocated_record_dump.json diff --git a/nmdc_automation/re_iding/scripts/extract_metagenome_workflow_records.py b/nmdc_automation/re_iding/scripts/extract_metagenome_workflow_records.py index 079214e1..49c71c1f 100644 --- a/nmdc_automation/re_iding/scripts/extract_metagenome_workflow_records.py +++ b/nmdc_automation/re_iding/scripts/extract_metagenome_workflow_records.py @@ -2,8 +2,9 @@ # coding: utf-8 # nmdc_schema/napa_compliance/scripts/extract_metagenome_workflow_records.py """ -extract_metagenome_workflow_records.py: Extract metagenome workflow records -for re-ID-ing of OmicsProcessing records. +extract_metagenome_workflow_records.py: Starting with OmicsProcessing, extract +metagenome workflow activity records and their associated data objects. Write +the results, as a list of nmdc-schema Database instances to a JSON file. """ import logging import time diff --git a/nmdc_automation/re_iding/scripts/re_id_tool.py b/nmdc_automation/re_iding/scripts/re_id_tool.py new file mode 100755 index 00000000..8fc4713c --- /dev/null +++ b/nmdc_automation/re_iding/scripts/re_id_tool.py @@ -0,0 +1,144 @@ +#!/usr/bin/env python3 +# nmdc_automation/nmdc_automation/re_iding/scripts/re_id_tool.py +""" +re_id_tool.py: Provides command-line tools to extract and re-ID NMDC metagenome +workflow records. +""" +import logging +import time +from pathlib import Path +import json +import click + +from nmdc_automation.api import NmdcRuntimeUserApi +from nmdc_automation.config import Config +import nmdc_schema.nmdc as nmdc +from nmdc_automation.re_iding.base import get_new_db_and_downstream_inputs +from nmdc_automation.re_iding.db_utils import get_omics_processing_id + +# Defaults +GOLD_STUDY_ID = "gold:Gs0114663" +STUDY_ID = "nmdc:sty-11-aygzgv51" +NAPA_CONFIG = Path("../../../configs/napa_config.toml") + + +BASE_DATAFILE_DIR = "/global/cfs/cdirs/m3408/results" +DRYRUN_DATAFILE_DIR = "./data/dryrun_data/results" + +DATA_DIR = Path(__file__).parent.absolute().joinpath("data") + +logging.basicConfig( + level=logging.INFO, + format='%(asctime)s - %(name)s - %(levelname)s - %(message)s', + handlers=[ + logging.StreamHandler() + ] +) + +logger = logging.getLogger(__name__) + + + +@click.group() +@click.option("--site-config", type=click.Path(exists=True), + default=NAPA_CONFIG,) +@click.pass_context +def cli(ctx, site_config): + """ + NMDC re-ID tool + """ + ctx.ensure_object(dict) + ctx.obj['site_config'] = site_config + + +@cli.command() +@click.option('--study_id', default=STUDY_ID, + help=f'Optional updated study ID. Default: {STUDY_ID}') +@click.pass_context +def extract_records(ctx, study_id): + """ + Extract metagenome workflow activities and their data object records + that are informed_by the legacy ID (GOLD Study ID) for a re-ID-ed Study/ + Biosample/OmicsProcessing. + + Write the results, as a list of nmdc-schema Database instances to a JSON file. + """ + start_time = time.time() + logging.info(f"Extracting workflow records for study_id: {study_id}") + + +@cli.command() + +@click.option('--dryrun / --no-dryrun', is_flag=True, default=True, + help='Dryrun mode: use local data dir and do not save results') +@click.option('--study_id', is_required=False, default=STUDY_ID, + help=f'Optional updated study ID. Default: {STUDY_ID}') +@click.option('--data_dir', is_required=False, default=BASE_DATAFILE_DIR, + help=f'Optional base datafile directory. Default: {BASE_DATAFILE_DIR}') +@click.pass_context +def process_records(ctx, dryrun, study_id, data_dir): + """ + Read the JSON file of extracted workflow records and their data objects and + re-ID the records with newly-minted NMDC IDs, update data file headers. + + Write the results to a new JSON file of nmdc Database instances. + """ + start_time = time.time() + logging.info(f"Processing workflow records for study_id: {study_id}") + + + + # Get Database dump file paths and the data directory + db_infile, db_outfile = _get_database_paths(study_id, dryrun) + data_dir = _get_data_dir(data_dir, dryrun) + + + # Read extracted DB records + logging.info(f"Using db_infile: {db_infile}") + with open(db_infile, "r") as f: + db_records = json.load(f) + logging.info(f"Read {len(db_records)} records from db_infile") + + re_ided_db_records = [] + for db_record in db_records: + omics_processing_id = get_omics_processing_id(db_record) + logging.info(f"omics_processing_id: {omics_processing_id}") + + # Get new Database instance and downstream data objects + new_db, downstream_inputs = get_new_db_and_downstream_inputs(db_record) + # Re-ID db_record + # Update data file headers + # Write re-IDed db_record to db_outfile + # Write updated data file to datafile_dir + # Log results + + +def _get_data_dir(data_dir, dryrun): + """ + Return the path to the data object files + """ + if dryrun: + logging.info("Running in dryrun mode") + return DRYRUN_DATAFILE_DIR + elif not data_dir: + data_dir = BASE_DATAFILE_DIR + logging.info(f"Using datafile_dir: {data_dir}") + return data_dir + +def _get_database_paths(study_id, dryrun): + """ + Return the paths to the input and output data files + """ + db_infile_suffix = "_associated_record_dump.json" + db_outfile_suffix = "_re_ided_record_dump.json" + if dryrun: + db_infile = DATA_DIR.joinpath(f"dryrun{db_infile_suffix}") + db_outfile = DATA_DIR.joinpath(f"dryrun{db_outfile_suffix}") + else: + db_infile = DATA_DIR.joinpath(f"{study_id}{db_infile_suffix}") + db_outfile = DATA_DIR.joinpath(f"{study_id}{db_outfile_suffix}") + return db_infile, db_outfile + + +if __name__ == '__main__': + cli(obj={}) diff --git a/nmdc_automation/re_iding/tests/__init__.py b/nmdc_automation/re_iding/tests/__init__.py new file mode 100644 index 00000000..e69de29b diff --git a/nmdc_automation/re_iding/tests/conftest.py b/nmdc_automation/re_iding/tests/conftest.py new file mode 100644 index 00000000..341ac2bc --- /dev/null +++ b/nmdc_automation/re_iding/tests/conftest.py @@ -0,0 +1,11 @@ +import pytest +from pathlib import Path +import json + +TEST_DATA_DIR = Path(__file__).parent / "test_data" + +@pytest.fixture +def db_record(): + """Return a dict of a test Database instance""" + with open(TEST_DATA_DIR / "db_record.json", "r") as f: + return json.load(f) diff --git a/nmdc_automation/re_iding/tests/test_data/db_record.json b/nmdc_automation/re_iding/tests/test_data/db_record.json new file mode 100644 index 00000000..657e1469 --- /dev/null +++ b/nmdc_automation/re_iding/tests/test_data/db_record.json @@ -0,0 +1,638 @@ +{ + "functional_annotation_agg": [], + "library_preparation_set": [], + "processed_sample_set": [], + "extraction_set": [], + "activity_set": [], + "biosample_set": [], + "data_object_set": [ + { + "description": "Raw sequencer read data", + "file_size_bytes": 2861414297, + "type": "nmdc:DataObject", + "id": "jgi:55d740280d8785342fcf7e39", + "name": "9422.8.132674.GTTTCG.fastq.gz" + }, + { + "name": "Gp0115663_Filtered Reads", + "description": "Filtered Reads for Gp0115663", + "data_object_type": "Filtered Sequencing Reads", + "url": "https://data.microbiomedata.org/data/nmdc:mga0h9dt75/qa/nmdc_mga0h9dt75_filtered.fastq.gz", + "md5_checksum": "7bf778baef033d36f118f8591256d6ef", + "id": "nmdc:7bf778baef033d36f118f8591256d6ef", + "file_size_bytes": 2571324879 + }, + { + "name": "Gp0115663_Filtered Stats", + "description": "Filtered Stats for Gp0115663", + "data_object_type": "QC Statistics", + "url": "https://data.microbiomedata.org/data/nmdc:mga0h9dt75/qa/nmdc_mga0h9dt75_filterStats.txt", + "md5_checksum": "b99ce8adc125c95f0bfdadf36a3f6848", + "id": "nmdc:b99ce8adc125c95f0bfdadf36a3f6848", + "file_size_bytes": 290 + }, + { + "name": "Gp0115663_Gottcha2 TSV report", + "description": "Gottcha2 TSV report for Gp0115663", + "url": "https://data.microbiomedata.org/data/nmdc:mga0h9dt75/ReadbasedAnalysis/nmdc_mga0h9dt75_gottcha2_report.tsv", + "md5_checksum": "bc7c1bda004aab357c8f6cf5a42242f9", + "id": "nmdc:bc7c1bda004aab357c8f6cf5a42242f9", + "file_size_bytes": 13174 + }, + { + "name": "Gp0115663_Gottcha2 full TSV report", + "description": "Gottcha2 full TSV report for Gp0115663", + "url": "https://data.microbiomedata.org/data/nmdc:mga0h9dt75/ReadbasedAnalysis/nmdc_mga0h9dt75_gottcha2_report_full.tsv", + "md5_checksum": "9481434cadd0d6c154e2ec4c11ef0e04", + "id": "nmdc:9481434cadd0d6c154e2ec4c11ef0e04", + "file_size_bytes": 1035818 + }, + { + "name": "Gp0115663_Gottcha2 Krona HTML report", + "description": "Gottcha2 Krona HTML report for Gp0115663", + "data_object_type": "GOTTCHA2 Krona Plot", + "url": "https://data.microbiomedata.org/data/nmdc:mga0h9dt75/ReadbasedAnalysis/nmdc_mga0h9dt75_gottcha2_krona.html", + "md5_checksum": "6b5bc6ce7f11c1336a5f85a98fc18541", + "id": "nmdc:6b5bc6ce7f11c1336a5f85a98fc18541", + "file_size_bytes": 262669 + }, + { + "name": "Gp0115663_Centrifuge classification TSV report", + "description": "Centrifuge classification TSV report for Gp0115663", + "data_object_type": "Centrifuge Taxonomic Classification", + "url": "https://data.microbiomedata.org/data/nmdc:mga0h9dt75/ReadbasedAnalysis/nmdc_mga0h9dt75_centrifuge_classification.tsv", + "md5_checksum": "933c71bbc2f4a2e84d50f0d3864cf940", + "id": "nmdc:933c71bbc2f4a2e84d50f0d3864cf940", + "file_size_bytes": 2189843623 + }, + { + "name": "Gp0115663_Centrifuge TSV report", + "description": "Centrifuge TSV report for Gp0115663", + "data_object_type": "Centrifuge Classification Report", + "url": "https://data.microbiomedata.org/data/nmdc:mga0h9dt75/ReadbasedAnalysis/nmdc_mga0h9dt75_centrifuge_report.tsv", + "md5_checksum": "1a208e2519770ef50740ac39f1b9ba9a", + "id": "nmdc:1a208e2519770ef50740ac39f1b9ba9a", + "file_size_bytes": 260134 + }, + { + "name": "Gp0115663_Centrifuge Krona HTML report", + "description": "Centrifuge Krona HTML report for Gp0115663", + "data_object_type": "Centrifuge Krona Plot", + "url": "https://data.microbiomedata.org/data/nmdc:mga0h9dt75/ReadbasedAnalysis/nmdc_mga0h9dt75_centrifuge_krona.html", + "md5_checksum": "f112a3840464ae7a9cf4a3bf295edd5c", + "id": "nmdc:f112a3840464ae7a9cf4a3bf295edd5c", + "file_size_bytes": 2343980 + }, + { + "name": "Gp0115663_Kraken classification TSV report", + "description": "Kraken classification TSV report for Gp0115663", + "data_object_type": "Kraken2 Taxonomic Classification", + "url": "https://data.microbiomedata.org/data/nmdc:mga0h9dt75/ReadbasedAnalysis/nmdc_mga0h9dt75_kraken2_classification.tsv", + "md5_checksum": "7ca01ea379f0baed96f87d1435925f95", + "id": "nmdc:7ca01ea379f0baed96f87d1435925f95", + "file_size_bytes": 1785563917 + }, + { + "name": "Gp0115663_Kraken2 TSV report", + "description": "Kraken2 TSV report for Gp0115663", + "data_object_type": "Kraken2 Classification Report", + "url": "https://data.microbiomedata.org/data/nmdc:mga0h9dt75/ReadbasedAnalysis/nmdc_mga0h9dt75_kraken2_report.tsv", + "md5_checksum": "c85f2f2b4a518c4adb23970448a5cb45", + "id": "nmdc:c85f2f2b4a518c4adb23970448a5cb45", + "file_size_bytes": 699896 + }, + { + "name": "Gp0115663_Kraken2 Krona HTML report", + "description": "Kraken2 Krona HTML report for Gp0115663", + "data_object_type": "Kraken2 Krona Plot", + "url": "https://data.microbiomedata.org/data/nmdc:mga0h9dt75/ReadbasedAnalysis/nmdc_mga0h9dt75_kraken2_krona.html", + "md5_checksum": "94ee1bc2dc74830a21d5c3471d6cf223", + "id": "nmdc:94ee1bc2dc74830a21d5c3471d6cf223", + "file_size_bytes": 4221977 + }, + { + "name": "Gp0115663_Assembled contigs fasta", + "description": "Assembled contigs fasta for Gp0115663", + "data_object_type": "Assembly Contigs", + "url": "https://data.microbiomedata.org/data/nmdc:mga0h9dt75/assembly/nmdc_mga0h9dt75_contigs.fna", + "md5_checksum": "deddd162bf0128fba13b3bc1ca38d1aa", + "id": "nmdc:deddd162bf0128fba13b3bc1ca38d1aa", + "file_size_bytes": 90115831 + }, + { + "name": "Gp0115663_Assembled scaffold fasta", + "description": "Assembled scaffold fasta for Gp0115663", + "data_object_type": "Assembly Scaffolds", + "url": "https://data.microbiomedata.org/data/nmdc:mga0h9dt75/assembly/nmdc_mga0h9dt75_scaffolds.fna", + "md5_checksum": "b3573e3cda5a06611de71ca04c5c14cc", + "id": "nmdc:b3573e3cda5a06611de71ca04c5c14cc", + "file_size_bytes": 89604715 + }, + { + "name": "Gp0115663_Metagenome Contig Coverage Stats", + "description": "Metagenome Contig Coverage Stats for Gp0115663", + "url": "https://data.microbiomedata.org/data/nmdc:mga0h9dt75/assembly/nmdc_mga0h9dt75_covstats.txt", + "md5_checksum": "c6d0d4cea985ca6fb50a060e15b4a856", + "id": "nmdc:c6d0d4cea985ca6fb50a060e15b4a856", + "file_size_bytes": 13412363 + }, + { + "name": "Gp0115663_Assembled AGP file", + "description": "Assembled AGP file for Gp0115663", + "data_object_type": "Assembly AGP", + "url": "https://data.microbiomedata.org/data/nmdc:mga0h9dt75/assembly/nmdc_mga0h9dt75_assembly.agp", + "md5_checksum": "f450e3800e17691d5874c89fc46c186a", + "id": "nmdc:f450e3800e17691d5874c89fc46c186a", + "file_size_bytes": 12542171 + }, + { + "name": "Gp0115663_Metagenome Alignment BAM file", + "description": "Metagenome Alignment BAM file for Gp0115663", + "data_object_type": "Assembly Coverage BAM", + "url": "https://data.microbiomedata.org/data/nmdc:mga0h9dt75/assembly/nmdc_mga0h9dt75_pairedMapped_sorted.bam", + "md5_checksum": "31dc958d116d02122509e90b0883954f", + "id": "nmdc:31dc958d116d02122509e90b0883954f", + "file_size_bytes": 2773429299 + }, + { + "name": "Gp0115663_Protein FAA", + "description": "Protein FAA for Gp0115663", + "data_object_type": "Annotation Amino Acid FASTA", + "url": "https://data.microbiomedata.org/data/nmdc:mga0h9dt75/annotation/nmdc_mga0h9dt75_proteins.faa", + "md5_checksum": "879988d212ecec46928b8598e2f8391f", + "id": "nmdc:879988d212ecec46928b8598e2f8391f", + "file_size_bytes": 50165060 + }, + { + "name": "Gp0115663_Structural annotation GFF file", + "description": "Structural annotation GFF file for Gp0115663", + "data_object_type": "Structural Annotation GFF", + "url": "https://data.microbiomedata.org/data/nmdc:mga0h9dt75/annotation/nmdc_mga0h9dt75_structural_annotation.gff", + "md5_checksum": "884b95102f5965cc0ee2d9b7f198e5a4", + "id": "nmdc:884b95102f5965cc0ee2d9b7f198e5a4", + "file_size_bytes": 2767 + }, + { + "name": "Gp0115663_Functional annotation GFF file", + "description": "Functional annotation GFF file for Gp0115663", + "data_object_type": "Functional Annotation GFF", + "url": "https://data.microbiomedata.org/data/nmdc:mga0h9dt75/annotation/nmdc_mga0h9dt75_functional_annotation.gff", + "md5_checksum": "002e4ebc728f8b91cb5f298d340ab013", + "id": "nmdc:002e4ebc728f8b91cb5f298d340ab013", + "file_size_bytes": 55139586 + }, + { + "name": "Gp0115663_KO TSV file", + "description": "KO TSV file for Gp0115663", + "data_object_type": "Annotation KEGG Orthology", + "url": "https://data.microbiomedata.org/data/nmdc:mga0h9dt75/annotation/nmdc_mga0h9dt75_ko.tsv", + "md5_checksum": "6851078f29716d89e3f41f0969ae7bf0", + "id": "nmdc:6851078f29716d89e3f41f0969ae7bf0", + "file_size_bytes": 6023696 + }, + { + "name": "Gp0115663_EC TSV file", + "description": "EC TSV file for Gp0115663", + "data_object_type": "Annotation Enzyme Commission", + "url": "https://data.microbiomedata.org/data/nmdc:mga0h9dt75/annotation/nmdc_mga0h9dt75_ec.tsv", + "md5_checksum": "4f88c89459f36655eb7c1eceec19602a", + "id": "nmdc:4f88c89459f36655eb7c1eceec19602a", + "file_size_bytes": 3982918 + }, + { + "name": "Gp0115663_COG GFF file", + "description": "COG GFF file for Gp0115663", + "url": "https://data.microbiomedata.org/data/nmdc:mga0h9dt75/annotation/nmdc_mga0h9dt75_cog.gff", + "md5_checksum": "a068b9ce6ebb7deb15ff932b513817a9", + "id": "nmdc:a068b9ce6ebb7deb15ff932b513817a9", + "file_size_bytes": 27362917 + }, + { + "name": "Gp0115663_PFAM GFF file", + "description": "PFAM GFF file for Gp0115663", + "url": "https://data.microbiomedata.org/data/nmdc:mga0h9dt75/annotation/nmdc_mga0h9dt75_pfam.gff", + "md5_checksum": "618b18fa8635c80cc0091371f451a6f0", + "id": "nmdc:618b18fa8635c80cc0091371f451a6f0", + "file_size_bytes": 21572048 + }, + { + "name": "Gp0115663_TigrFam GFF file", + "description": "TigrFam GFF file for Gp0115663", + "url": "https://data.microbiomedata.org/data/nmdc:mga0h9dt75/annotation/nmdc_mga0h9dt75_tigrfam.gff", + "md5_checksum": "17e55a1a1a133ffbf8cbe4024d997a6f", + "id": "nmdc:17e55a1a1a133ffbf8cbe4024d997a6f", + "file_size_bytes": 2900068 + }, + { + "name": "Gp0115663_SMART GFF file", + "description": "SMART GFF file for Gp0115663", + "url": "https://data.microbiomedata.org/data/nmdc:mga0h9dt75/annotation/nmdc_mga0h9dt75_smart.gff", + "md5_checksum": "8f80142c0f5723af5a3b44b7ff4e4339", + "id": "nmdc:8f80142c0f5723af5a3b44b7ff4e4339", + "file_size_bytes": 6905519 + }, + { + "name": "Gp0115663_SuperFam GFF file", + "description": "SuperFam GFF file for Gp0115663", + "url": "https://data.microbiomedata.org/data/nmdc:mga0h9dt75/annotation/nmdc_mga0h9dt75_supfam.gff", + "md5_checksum": "fdd2e8741ffef40db383674a10bb4d11", + "id": "nmdc:fdd2e8741ffef40db383674a10bb4d11", + "file_size_bytes": 38787856 + }, + { + "name": "Gp0115663_Cath FunFam GFF file", + "description": "Cath FunFam GFF file for Gp0115663", + "url": "https://data.microbiomedata.org/data/nmdc:mga0h9dt75/annotation/nmdc_mga0h9dt75_cath_funfam.gff", + "md5_checksum": "8eb49ac20a6c2721d6db227f4fb3356a", + "id": "nmdc:8eb49ac20a6c2721d6db227f4fb3356a", + "file_size_bytes": 30134783 + }, + { + "name": "Gp0115663_KO_EC GFF file", + "description": "KO_EC GFF file for Gp0115663", + "url": "https://data.microbiomedata.org/data/nmdc:mga0h9dt75/annotation/nmdc_mga0h9dt75_ko_ec.gff", + "md5_checksum": "75f481e0d98793cfb4f9508cb3e31622", + "id": "nmdc:75f481e0d98793cfb4f9508cb3e31622", + "file_size_bytes": 19194308 + }, + { + "name": "gold:Gp0452679_metabat2 bin checkm quality assessment result", + "description": "metabat2 bin checkm quality assessment result for gold:Gp0452679", + "data_object_type": "CheckM Statistics", + "url": "https://data.microbiomedata.org/data/nmdc:mga0fsjy84/MAGs/nmdc_mga0fsjy84_checkm_qa.out", + "md5_checksum": "d41d8cd98f00b204e9800998ecf8427e", + "id": "nmdc:d41d8cd98f00b204e9800998ecf8427e", + "file_size_bytes": 0 + }, + { + "name": "Gp0115663_tooShort (< 3kb) filtered contigs fasta file by metaBat2", + "description": "tooShort (< 3kb) filtered contigs fasta file by metaBat2 for Gp0115663", + "url": "https://data.microbiomedata.org/data/nmdc:mga0h9dt75/MAGs/nmdc_mga0h9dt75_bins.tooShort.fa", + "md5_checksum": "c092b018cb4652c4ca0620b37a4b3fad", + "id": "nmdc:c092b018cb4652c4ca0620b37a4b3fad", + "file_size_bytes": 70411007 + }, + { + "name": "Gp0115663_unbinned fasta file from metabat2", + "description": "unbinned fasta file from metabat2 for Gp0115663", + "url": "https://data.microbiomedata.org/data/nmdc:mga0h9dt75/MAGs/nmdc_mga0h9dt75_bins.unbinned.fa", + "md5_checksum": "70d7c8a307f47adb05056bee1b01f9d4", + "id": "nmdc:70d7c8a307f47adb05056bee1b01f9d4", + "file_size_bytes": 15998690 + }, + { + "name": "Gp0115663_metabat2 bin checkm quality assessment result", + "description": "metabat2 bin checkm quality assessment result for Gp0115663", + "data_object_type": "CheckM Statistics", + "url": "https://data.microbiomedata.org/data/nmdc:mga0h9dt75/MAGs/nmdc_mga0h9dt75_checkm_qa.out", + "md5_checksum": "4545ab2039ae70f4439a93316f4fb7bc", + "id": "nmdc:4545ab2039ae70f4439a93316f4fb7bc", + "file_size_bytes": 1530 + }, + { + "name": "Gp0115663_high-quality and medium-quality bins", + "description": "high-quality and medium-quality bins for Gp0115663", + "data_object_type": "Metagenome Bins", + "url": "https://data.microbiomedata.org/data/nmdc:mga0h9dt75/MAGs/nmdc_mga0h9dt75_hqmq_bin.zip", + "md5_checksum": "280b63ae1cc1fa8d6154a0681d47c399", + "id": "nmdc:280b63ae1cc1fa8d6154a0681d47c399", + "file_size_bytes": 182 + }, + { + "name": "Gp0115663_metabat2 bins", + "description": "metabat2 bins for Gp0115663", + "url": "https://data.microbiomedata.org/data/nmdc:mga0h9dt75/MAGs/nmdc_mga0h9dt75_metabat_bin.zip", + "md5_checksum": "27c07072f175571200b5931550adb8aa", + "id": "nmdc:27c07072f175571200b5931550adb8aa", + "file_size_bytes": 1114314 + } + ], + "dissolving_activity_set": [], + "functional_annotation_set": [], + "genome_feature_set": [], + "mags_activity_set": [ + { + "_id": { + "$oid": "649b0052ec087f6bbab34734" + }, + "has_input": [ + "nmdc:deddd162bf0128fba13b3bc1ca38d1aa", + "nmdc:31dc958d116d02122509e90b0883954f", + "nmdc:002e4ebc728f8b91cb5f298d340ab013" + ], + "too_short_contig_num": 159810, + "part_of": [ + "nmdc:mga0h9dt75" + ], + "binned_contig_num": 684, + "git_url": "https://github.com/microbiomedata/metaG/releases/tag/0.1", + "has_output": [ + "nmdc:d41d8cd98f00b204e9800998ecf8427e", + "nmdc:c092b018cb4652c4ca0620b37a4b3fad", + "nmdc:70d7c8a307f47adb05056bee1b01f9d4", + "nmdc:4545ab2039ae70f4439a93316f4fb7bc", + "nmdc:280b63ae1cc1fa8d6154a0681d47c399", + "nmdc:27c07072f175571200b5931550adb8aa" + ], + "was_informed_by": "gold:Gp0115663", + "input_contig_num": 169782, + "id": "nmdc:b31abf9d7fe53e2f802bb53e2d13542b", + "execution_resource": "NERSC-Cori", + "name": "MAGs Analysis Activity for nmdc:mga0h9dt75", + "mags_list": [ + { + "number_of_contig": 61, + "completeness": 13.82, + "bin_name": "bins.1", + "gene_count": 294, + "bin_quality": "LQ", + "gtdbtk_species": "", + "gtdbtk_order": "", + "num_16s": 1, + "gtdbtk_family": "", + "gtdbtk_domain": "", + "contamination": 0.62, + "gtdbtk_class": "", + "gtdbtk_phylum": "", + "num_5s": 1, + "num_23s": 0, + "gtdbtk_genus": "", + "num_t_rna": 0 + }, + { + "number_of_contig": 485, + "completeness": 66.03, + "bin_name": "bins.2", + "gene_count": 2871, + "bin_quality": "LQ", + "gtdbtk_species": "", + "gtdbtk_order": "", + "num_16s": 0, + "gtdbtk_family": "", + "gtdbtk_domain": "", + "contamination": 10.87, + "gtdbtk_class": "", + "gtdbtk_phylum": "", + "num_5s": 1, + "num_23s": 0, + "gtdbtk_genus": "", + "num_t_rna": 32 + }, + { + "number_of_contig": 56, + "completeness": 34.23, + "bin_name": "bins.3", + "gene_count": 337, + "bin_quality": "LQ", + "gtdbtk_species": "", + "gtdbtk_order": "", + "num_16s": 0, + "gtdbtk_family": "", + "gtdbtk_domain": "", + "contamination": 0.0, + "gtdbtk_class": "", + "gtdbtk_phylum": "", + "num_5s": 0, + "num_23s": 0, + "gtdbtk_genus": "", + "num_t_rna": 9 + }, + { + "number_of_contig": 63, + "completeness": 6.9, + "bin_name": "bins.4", + "gene_count": 276, + "bin_quality": "LQ", + "gtdbtk_species": "", + "gtdbtk_order": "", + "num_16s": 0, + "gtdbtk_family": "", + "gtdbtk_domain": "", + "contamination": 0.0, + "gtdbtk_class": "", + "gtdbtk_phylum": "", + "num_5s": 0, + "num_23s": 0, + "gtdbtk_genus": "", + "num_t_rna": 6 + }, + { + "number_of_contig": 19, + "completeness": 4.45, + "bin_name": "bins.5", + "gene_count": 463, + "bin_quality": "LQ", + "gtdbtk_species": "", + "gtdbtk_order": "", + "num_16s": 0, + "gtdbtk_family": "", + "gtdbtk_domain": "", + "contamination": 0.0, + "gtdbtk_class": "", + "gtdbtk_phylum": "", + "num_5s": 0, + "num_23s": 0, + "gtdbtk_genus": "", + "num_t_rna": 4 + } + ], + "unbinned_contig_num": 9288, + "started_at_time": "2021-10-11T02:28:26Z", + "type": "nmdc:MAGsAnalysisActivity", + "ended_at_time": "2021-10-11T04:56:04+00:00" + } + ], + "material_sample_set": [], + "material_sampling_activity_set": [], + "metabolomics_analysis_activity_set": [], + "metagenome_annotation_activity_set": [ + { + "_id": { + "$oid": "649b005bbf2caae0415ef9d6" + }, + "has_input": [ + "nmdc:deddd162bf0128fba13b3bc1ca38d1aa" + ], + "part_of": [ + "nmdc:mga0h9dt75" + ], + "git_url": "https://github.com/microbiomedata/metaG/releases/tag/0.1", + "has_output": [ + "nmdc:879988d212ecec46928b8598e2f8391f", + "nmdc:884b95102f5965cc0ee2d9b7f198e5a4", + "nmdc:002e4ebc728f8b91cb5f298d340ab013", + "nmdc:6851078f29716d89e3f41f0969ae7bf0", + "nmdc:4f88c89459f36655eb7c1eceec19602a", + "nmdc:a068b9ce6ebb7deb15ff932b513817a9", + "nmdc:618b18fa8635c80cc0091371f451a6f0", + "nmdc:17e55a1a1a133ffbf8cbe4024d997a6f", + "nmdc:8f80142c0f5723af5a3b44b7ff4e4339", + "nmdc:fdd2e8741ffef40db383674a10bb4d11", + "nmdc:8eb49ac20a6c2721d6db227f4fb3356a", + "nmdc:75f481e0d98793cfb4f9508cb3e31622" + ], + "was_informed_by": "gold:Gp0115663", + "id": "nmdc:b31abf9d7fe53e2f802bb53e2d13542b", + "execution_resource": "NERSC-Cori", + "name": "Annotation Activity for nmdc:mga0h9dt75", + "started_at_time": "2021-10-11T02:28:26Z", + "type": "nmdc:MetagenomeAnnotationActivity", + "ended_at_time": "2021-10-11T04:56:04+00:00" + } + ], + "metagenome_assembly_set": [ + { + "_id": { + "$oid": "649b005f2ca5ee4adb139fb9" + }, + "has_input": [ + "nmdc:7bf778baef033d36f118f8591256d6ef" + ], + "part_of": [ + "nmdc:mga0h9dt75" + ], + "ctg_logsum": 214373, + "scaf_logsum": 215363, + "gap_pct": 0.00188, + "git_url": "https://github.com/microbiomedata/metaG/releases/tag/0.1", + "has_output": [ + "nmdc:deddd162bf0128fba13b3bc1ca38d1aa", + "nmdc:b3573e3cda5a06611de71ca04c5c14cc", + "nmdc:c6d0d4cea985ca6fb50a060e15b4a856", + "nmdc:f450e3800e17691d5874c89fc46c186a", + "nmdc:31dc958d116d02122509e90b0883954f" + ], + "asm_score": 6.577, + "was_informed_by": "gold:Gp0115663", + "ctg_powsum": 24284, + "scaf_max": 68135, + "id": "nmdc:b31abf9d7fe53e2f802bb53e2d13542b", + "scaf_powsum": 24422, + "execution_resource": "NERSC-Cori", + "contigs": 169784, + "name": "Assembly Activity for nmdc:mga0h9dt75", + "ctg_max": 68135, + "gc_std": 0.11726, + "contig_bp": 83494920, + "gc_avg": 0.46001, + "started_at_time": "2021-10-11T02:28:26Z", + "scaf_bp": 83496490, + "type": "nmdc:MetagenomeAssembly", + "scaffolds": 169645, + "ended_at_time": "2021-10-11T04:56:04+00:00", + "ctg_l50": 470, + "ctg_l90": 290, + "ctg_n50": 45584, + "ctg_n90": 141996, + "scaf_l50": 470, + "scaf_l90": 290, + "scaf_n50": 45550, + "scaf_n90": 141870, + "scaf_l_gt50k": 68135, + "scaf_n_gt50k": 1, + "scaf_pct_gt50k": 0.08160224 + } + ], + "metagenome_sequencing_activity_set": [], + "metaproteomics_analysis_activity_set": [], + "metatranscriptome_activity_set": [], + "nom_analysis_activity_set": [], + "omics_processing_set": [ + { + "_id": { + "$oid": "649b009773e8249959349b33" + }, + "id": "nmdc:omprc-11-bn8jcq58", + "name": "Sand microcosm microbial communities from a hyporheic zone in Columbia River, Washington, USA - GW-RW T2_23-Sept-14", + "description": "Sterilized sand packs were incubated back in the ground and collected at time point T2.", + "has_input": [ + "nmdc:bsm-11-qq8s6x03" + ], + "has_output": [ + "jgi:55d740280d8785342fcf7e39" + ], + "part_of": [ + "nmdc:sty-11-aygzgv51" + ], + "add_date": "2015-05-28", + "mod_date": "2021-06-15", + "ncbi_project_name": "Sand microcosm microbial communities from a hyporheic zone in Columbia River, Washington, USA - GW-RW T2_23-Sept-14", + "omics_type": { + "has_raw_value": "Metagenome" + }, + "principal_investigator": { + "has_raw_value": "James Stegen" + }, + "processing_institution": "JGI", + "type": "nmdc:OmicsProcessing", + "gold_sequencing_project_identifiers": [ + "gold:Gp0115663" + ] + } + ], + "reaction_activity_set": [], + "read_qc_analysis_activity_set": [ + { + "_id": { + "$oid": "649b009d6bdd4fd20273c88b" + }, + "has_input": [ + "nmdc:30a06664f29cffbbbc49abad86eae6fc" + ], + "part_of": [ + "nmdc:mga0h9dt75" + ], + "git_url": "https://github.com/microbiomedata/metaG/releases/tag/0.1", + "has_output": [ + "nmdc:7bf778baef033d36f118f8591256d6ef", + "nmdc:b99ce8adc125c95f0bfdadf36a3f6848" + ], + "was_informed_by": "gold:Gp0115663", + "input_read_count": 32238374, + "output_read_bases": 4608772924, + "id": "nmdc:b31abf9d7fe53e2f802bb53e2d13542b", + "execution_resource": "NERSC-Cori", + "input_read_bases": 4867994474, + "name": "Read QC Activity for nmdc:mga0h9dt75", + "output_read_count": 30774080, + "started_at_time": "2021-10-11T02:28:26Z", + "type": "nmdc:ReadQCAnalysisActivity", + "ended_at_time": "2021-10-11T04:56:04+00:00" + } + ], + "read_based_taxonomy_analysis_activity_set": [ + { + "_id": { + "$oid": "649b009bff710ae353f8cf4f" + }, + "has_input": [ + "nmdc:7bf778baef033d36f118f8591256d6ef" + ], + "git_url": "https://github.com/microbiomedata/metaG/releases/tag/0.1", + "has_output": [ + "nmdc:bc7c1bda004aab357c8f6cf5a42242f9", + "nmdc:9481434cadd0d6c154e2ec4c11ef0e04", + "nmdc:6b5bc6ce7f11c1336a5f85a98fc18541", + "nmdc:933c71bbc2f4a2e84d50f0d3864cf940", + "nmdc:1a208e2519770ef50740ac39f1b9ba9a", + "nmdc:f112a3840464ae7a9cf4a3bf295edd5c", + "nmdc:7ca01ea379f0baed96f87d1435925f95", + "nmdc:c85f2f2b4a518c4adb23970448a5cb45", + "nmdc:94ee1bc2dc74830a21d5c3471d6cf223" + ], + "was_informed_by": "gold:Gp0115663", + "id": "nmdc:b31abf9d7fe53e2f802bb53e2d13542b", + "execution_resource": "NERSC-Cori", + "name": "ReadBased Analysis Activity for nmdc:mga0h9dt75", + "started_at_time": "2021-10-11T02:28:26Z", + "type": "nmdc:ReadbasedAnalysis", + "ended_at_time": "2021-10-11T04:56:04+00:00" + } + ], + "study_set": [], + "field_research_site_set": [], + "collecting_biosamples_from_site_set": [], + "date_created": null, + "etl_software_version": null, + "pooling_set": [] + } \ No newline at end of file diff --git a/nmdc_automation/re_iding/tests/test_re_iding_base.py b/nmdc_automation/re_iding/tests/test_re_iding_base.py new file mode 100644 index 00000000..e2976676 --- /dev/null +++ b/nmdc_automation/re_iding/tests/test_re_iding_base.py @@ -0,0 +1,15 @@ +# nmdc_automation/re_iding/tests/test_re_iding_base.py + +from nmdc_schema.nmdc import Database +from nmdc_automation.re_iding.base import get_new_db_and_downstream_inputs + + + +def test_get_new_db_and_downstream_inputs(db_record): + """ + Test that we can get a new Database instance and downstream inputs from an + existing Database instance. + """ + new_db, downstream_inputs = get_new_db_and_downstream_inputs(db_record) + assert isinstance(new_db, Database) + assert isinstance(downstream_inputs, list) \ No newline at end of file From fa5df3d4135b92fe6dde23cf346062fe26406d1a Mon Sep 17 00:00:00 2001 From: Michael Thornton Date: Fri, 10 Nov 2023 13:33:32 -0800 Subject: [PATCH 43/91] add omics processing update method and basic unit test --- nmdc_automation/re_iding/base.py | 96 ++++++++++++++----- nmdc_automation/re_iding/db_utils.py | 16 +++- .../re_iding/scripts/re_id_tool.py | 21 ++-- .../re_iding/tests/test_re_iding_base.py | 28 ++++-- pyproject.toml | 1 + 5 files changed, 118 insertions(+), 44 deletions(-) diff --git a/nmdc_automation/re_iding/base.py b/nmdc_automation/re_iding/base.py index f3753246..77226a6e 100644 --- a/nmdc_automation/re_iding/base.py +++ b/nmdc_automation/re_iding/base.py @@ -4,37 +4,81 @@ records and data objects. """ import logging -from typing import List -from pathlib import Path -import json -from typing import Dict, List - -from nmdc_schema.nmdc import Database, DataObject -from nmdc_automation.re_iding.db_utils import ( - OMICS_PROCESSING_SET, - DATA_OBJECT_SET, - check_for_single_omics_processing_record, +from typing import Dict +import yaml + +from nmdc_schema.nmdc import DataObject as NmdcDataObject, \ + Database as NmdcDatabase, OmicsProcessing + +from nmdc_automation.api import NmdcRuntimeApi +from nmdc_automation.re_iding.db_utils import (OMICS_PROCESSING_SET, + READS_QC_SET, + check_for_single_omics_processing_record, + get_data_object_record_by_id, ) + +NAPA_TEMPLATE = "../../../configs/re_iding_worklfows.yaml" + +logging.basicConfig( + level=logging.INFO, + format='%(asctime)s - %(name)s - %(levelname)s - %(message)s', + handlers=[logging.StreamHandler()] ) +logger = logging.getLogger(__name__) -NAPA_TEMPLATE = "../../configs/re_iding_worklfows.yaml" +class ReIdTool: + def __init__(self, api_client: NmdcRuntimeApi, template_file: str = None): + self.api_client = api_client + if template_file is None: + template_file = NAPA_TEMPLATE + with open(template_file, "r") as f: + self.template = yaml.safe_load(f) + def update_omics_processing_has_output( + self, db_record: Dict, + new_db: NmdcDatabase) -> (NmdcDatabase): + """ + Return a new Database instance with the omics processing record has_output + data object IDs updated to new IDs. -def get_new_db_and_downstream_inputs( - db_record: Dict, config_file: str = None) -> (Database, - List[DataObject]): - """ - Return a new Database instance with the given new_omics_processing_record - and the data objects that are has_output of the given old_omics_processing_record - """ - if config_file is None: - config_file = NAPA_TEMPLATE - config_file = Path(config_file) + Note: This function assumes that there is only one omics processing record, + and that id and name have already been updated. + """ + check_for_single_omics_processing_record(db_record) + omics_record = db_record[OMICS_PROCESSING_SET][0] + # Strip out and keep has_output and strip out _id + has_output = omics_record.pop("has_output", []) + omics_record.pop("_id", None) + # make a new omics processing record + new_omics = OmicsProcessing(**omics_record) + + # make new data objects with updated IDs + for old_do_id in has_output: + old_do_rec = get_data_object_record_by_id(db_record, old_do_id) + old_do_id = old_do_rec.pop("id") + new_do_id = self.api_client.minter("nmdc:DataObject") + logger.info(f"nmdcDataObject\t{old_do_id}\t{new_do_id}") - new_db = Database() - downstream_inputs = [] + # Add new do ID to new OmicsProcessing has_output + new_omics.has_output.append(new_do_id) + # Make a new data object record with the new ID + new_db.data_object_set.append( + NmdcDataObject(**old_do_rec, id=new_do_id) + ) + new_db.omics_processing_set.append(new_omics) + return new_db - check_for_single_omics_processing_record(db_record) - old_omics_processing_record = db_record[OMICS_PROCESSING_SET][0] - return new_db, downstream_inputs \ No newline at end of file + + + + +def update_reads_qc_analysis_activity_set(db_record: Dict, new_db: NmdcDatabase, + api_client: NmdcRuntimeApi) -> (NmdcDatabase): + """ + Return a new Database instance with the reads_qc_analysis_activity_set + and its data objects updated to new IDs. + """ + for reads_qc_rec in db_record[READS_QC_SET]: + pass + diff --git a/nmdc_automation/re_iding/db_utils.py b/nmdc_automation/re_iding/db_utils.py index bdf5674b..f1c77c65 100644 --- a/nmdc_automation/re_iding/db_utils.py +++ b/nmdc_automation/re_iding/db_utils.py @@ -10,6 +10,7 @@ # Some constants for set names we care about OMICS_PROCESSING_SET = "omics_processing_set" DATA_OBJECT_SET = "data_object_set" +READS_QC_SET = "read_qc_analysis_activity_set" @@ -25,17 +26,28 @@ def get_omics_processing_id(db_record: Dict) -> str: return omics_processing_set[0]["id"] -def check_for_single_omics_processing_record(old_db_record: Dict) -> bool: +def check_for_single_omics_processing_record(db_record: Dict) -> bool: """ Check that there is only one OmicsProcessing record in the Database. """ - omics_processing_set = old_db_record.get("omics_processing_set", []) + omics_processing_set = db_record.get("omics_processing_set", []) if len(omics_processing_set) == 0: raise ValueError("No omics_processing_set found in db_record") elif len(omics_processing_set) > 1: raise ValueError("Multiple omics_processing_set found in db_record") return True +def get_data_object_record_by_id(db_record: Dict, id: str)-> Dict: + """ + Return the data object record with the given ID. + """ + data_objects = [d for d in db_record[DATA_OBJECT_SET] if d["id"] == id] + if len(data_objects) == 0: + raise ValueError(f"No data object found with id: {id}") + elif len(data_objects) > 1: + raise ValueError(f"Multiple data objects found with id: {id}") + return data_objects[0] + diff --git a/nmdc_automation/re_iding/scripts/re_id_tool.py b/nmdc_automation/re_iding/scripts/re_id_tool.py index 8fc4713c..a31ffed9 100755 --- a/nmdc_automation/re_iding/scripts/re_id_tool.py +++ b/nmdc_automation/re_iding/scripts/re_id_tool.py @@ -10,10 +10,10 @@ import json import click -from nmdc_automation.api import NmdcRuntimeUserApi +from nmdc_automation.api import NmdcRuntimeApi from nmdc_automation.config import Config import nmdc_schema.nmdc as nmdc -from nmdc_automation.re_iding.base import get_new_db_and_downstream_inputs +from nmdc_automation.re_iding.base import update_omics_processing_has_output from nmdc_automation.re_iding.db_utils import get_omics_processing_id # Defaults @@ -68,12 +68,11 @@ def extract_records(ctx, study_id): @cli.command() - @click.option('--dryrun / --no-dryrun', is_flag=True, default=True, help='Dryrun mode: use local data dir and do not save results') -@click.option('--study_id', is_required=False, default=STUDY_ID, +@click.option('--study_id', default=STUDY_ID, help=f'Optional updated study ID. Default: {STUDY_ID}') -@click.option('--data_dir', is_required=False, default=BASE_DATAFILE_DIR, +@click.option('--data_dir', default=BASE_DATAFILE_DIR, help=f'Optional base datafile directory. Default: {BASE_DATAFILE_DIR}') @click.pass_context def process_records(ctx, dryrun, study_id, data_dir): @@ -86,7 +85,9 @@ def process_records(ctx, dryrun, study_id, data_dir): start_time = time.time() logging.info(f"Processing workflow records for study_id: {study_id}") - + # Get API client + config = ctx.obj['site_config'] + api_client = NmdcRuntimeApi(config) # Get Database dump file paths and the data directory db_infile, db_outfile = _get_database_paths(study_id, dryrun) @@ -104,8 +105,12 @@ def process_records(ctx, dryrun, study_id, data_dir): omics_processing_id = get_omics_processing_id(db_record) logging.info(f"omics_processing_id: {omics_processing_id}") - # Get new Database instance and downstream data objects - new_db, downstream_inputs = get_new_db_and_downstream_inputs(db_record) + new_db = nmdc.Database() + # update OmicsProcessing has_output and related DataObject records + new_db = update_omics_processing_has_output(db_record, new_db, api_client) + + + # Re-ID db_record # Update data file headers # Write re-IDed db_record to db_outfile diff --git a/nmdc_automation/re_iding/tests/test_re_iding_base.py b/nmdc_automation/re_iding/tests/test_re_iding_base.py index e2976676..2459fc37 100644 --- a/nmdc_automation/re_iding/tests/test_re_iding_base.py +++ b/nmdc_automation/re_iding/tests/test_re_iding_base.py @@ -1,15 +1,27 @@ # nmdc_automation/re_iding/tests/test_re_iding_base.py +import pytest_mock -from nmdc_schema.nmdc import Database -from nmdc_automation.re_iding.base import get_new_db_and_downstream_inputs +from nmdc_automation.api import NmdcRuntimeApi +from nmdc_schema.nmdc import Database as NmdcDatabase +from nmdc_automation.re_iding.base import ReIdTool -def test_get_new_db_and_downstream_inputs(db_record): + +def test_update_omics_processing_has_output(db_record, mocker): """ - Test that we can get a new Database instance and downstream inputs from an - existing Database instance. + Test that we can get a new Database with updated omics processing has_output + and re-IDed data objects. """ - new_db, downstream_inputs = get_new_db_and_downstream_inputs(db_record) - assert isinstance(new_db, Database) - assert isinstance(downstream_inputs, list) \ No newline at end of file + exp_do_id = "nmdc:dobj-1234-abcd12345" + mock_api = mocker.Mock(spec=NmdcRuntimeApi) + mock_api.minter.return_value = exp_do_id + reid_tool = ReIdTool(mock_api) + new_db = NmdcDatabase() + new_db = reid_tool.update_omics_processing_has_output(db_record, new_db) + assert isinstance(new_db, NmdcDatabase) + assert new_db.omics_processing_set + + assert new_db.omics_processing_set[0].has_output[0] == exp_do_id + + diff --git a/pyproject.toml b/pyproject.toml index 98165d60..2d3b1673 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -18,6 +18,7 @@ pytz = "^2023.3" python-dotenv = "^1.0.0" click = "^8.1.3" pytest = "^7.3.1" +pytest-mock = "^3.12.0" [tool.poetry.group.dev.dependencies] pytest = "^7.3.1" From 360c37ecf8e670cf566406f3696033e0c93d2f6b Mon Sep 17 00:00:00 2001 From: Michael Thornton Date: Mon, 13 Nov 2023 08:08:44 -0800 Subject: [PATCH 44/91] update re-id tool --- nmdc_automation/re_iding/base.py | 158 +++++++++++++++--- .../re_iding/scripts/re_id_tool.py | 9 +- 2 files changed, 139 insertions(+), 28 deletions(-) diff --git a/nmdc_automation/re_iding/base.py b/nmdc_automation/re_iding/base.py index 77226a6e..8194c58b 100644 --- a/nmdc_automation/re_iding/base.py +++ b/nmdc_automation/re_iding/base.py @@ -3,18 +3,22 @@ base.py - Provides classes and functions for re-ID-ing NMDC metagenome workflow records and data objects. """ +import copy import logging -from typing import Dict +from typing import Dict, List +import re import yaml from nmdc_schema.nmdc import DataObject as NmdcDataObject, \ - Database as NmdcDatabase, OmicsProcessing + Database as NmdcDatabase, OmicsProcessing, WorkflowExecutionActivity +import nmdc_schema.nmdc as nmdc from nmdc_automation.api import NmdcRuntimeApi from nmdc_automation.re_iding.db_utils import (OMICS_PROCESSING_SET, READS_QC_SET, check_for_single_omics_processing_record, - get_data_object_record_by_id, ) + get_data_object_record_by_id, + get_omics_processing_id) NAPA_TEMPLATE = "../../../configs/re_iding_worklfows.yaml" @@ -26,16 +30,60 @@ logger = logging.getLogger(__name__) + class ReIdTool: - def __init__(self, api_client: NmdcRuntimeApi, template_file: str = None): + def __init__(self, api_client: NmdcRuntimeApi, data_dir: str, + template_file: str = None): self.api_client = api_client + self.data_dir = data_dir if template_file is None: template_file = NAPA_TEMPLATE with open(template_file, "r") as f: - self.template = yaml.safe_load(f) + self.workflow_template = yaml.safe_load(f)["Workflows"] - def update_omics_processing_has_output( - self, db_record: Dict, + def _workflow_template_for_type(self, workflow_type: str) -> Dict: + """ + Return the workflow template for the given workflow name. + """ + templates = [] + for t in self.workflow_template: + type = t["Type"] + if type == workflow_type: + templates.append(t) + + # templates = [t for t in self.workflow_template if + # t["Type"] == workflow_type] + if len(templates) == 0: + raise ValueError(f"No workflow template found for {workflow_type}") + elif len(templates) > 1: + raise ValueError( + f"Multiple workflow templates found for " + f"{workflow_type}" + ) + return templates[0] + + def data_object_template(self, workflow_type: str, + data_object_type: str) -> Dict: + """ + Return the data object template for the given workflow name and data + object type. + """ + template = self._workflow_template_for_type(workflow_type) + data_object_templates = [t for t in template["Outputs"] if + t["data_object_type"] == data_object_type] + if len(data_object_templates) == 0: + raise ValueError( + f"No data object template found for " + f"{workflow_type} and {data_object_type}" + ) + elif len(data_object_templates) > 1: + raise ValueError( + f"Multiple data object templates found for " + f"{workflow_type} and {data_object_type}" + ) + return data_object_templates[0] + + def update_omics_processing_has_output(self, db_record: Dict, new_db: NmdcDatabase) -> (NmdcDatabase): """ Return a new Database instance with the omics processing record has_output @@ -47,15 +95,18 @@ def update_omics_processing_has_output( check_for_single_omics_processing_record(db_record) omics_record = db_record[OMICS_PROCESSING_SET][0] # Strip out and keep has_output and strip out _id - has_output = omics_record.pop("has_output", []) - omics_record.pop("_id", None) + params = copy.deepcopy(omics_record) + params.pop("has_output", []) + params.pop("_id", None) # make a new omics processing record - new_omics = OmicsProcessing(**omics_record) + new_omics = OmicsProcessing(**params) # make new data objects with updated IDs - for old_do_id in has_output: + for old_do_id in omics_record["has_output"]: old_do_rec = get_data_object_record_by_id(db_record, old_do_id) - old_do_id = old_do_rec.pop("id") + old_do_id = old_do_rec.get("id") + params = copy.deepcopy(old_do_rec) + params.pop("id", None) new_do_id = self.api_client.minter("nmdc:DataObject") logger.info(f"nmdcDataObject\t{old_do_id}\t{new_do_id}") @@ -63,22 +114,77 @@ def update_omics_processing_has_output( new_omics.has_output.append(new_do_id) # Make a new data object record with the new ID new_db.data_object_set.append( - NmdcDataObject(**old_do_rec, id=new_do_id) + NmdcDataObject(**params, id=new_do_id) ) new_db.omics_processing_set.append(new_omics) return new_db + def update_reads_qc_analysis_activity_set(self, db_record: Dict, + new_db: NmdcDatabase) -> (NmdcDatabase): + """ + Return a new Database instance with the reads_qc_analysis_activity_set + and its data objects updated to new IDs. + """ + logger.info( + f"Updating reads_qc_analysis_activity_set for " + f"{db_record[OMICS_PROCESSING_SET][0]['id']}" + ) + new_omics_processing = new_db.omics_processing_set[0] + logger.info(new_omics_processing) + for reads_qc_rec in db_record[READS_QC_SET]: + # old records have non-conforming type + activity_type = "nmdc:ReadQcAnalysisActivity" + omics_processing_id = new_omics_processing.id + has_input = new_omics_processing.has_output + updated_has_output = [] + # Get ReadQC data objects and update IDs + for old_do_id in reads_qc_rec["has_output"]: + logger.info(f"old_do_id: {old_do_id}") + old_do_rec = get_data_object_record_by_id(db_record, old_do_id) + new_do = self._make_new_data_object( + omics_processing_id, activity_type, old_do_rec + ) + new_db.data_object_set.append(new_do) + updated_has_output.append(new_do.id) + return new_db - - - - -def update_reads_qc_analysis_activity_set(db_record: Dict, new_db: NmdcDatabase, - api_client: NmdcRuntimeApi) -> (NmdcDatabase): - """ - Return a new Database instance with the reads_qc_analysis_activity_set - and its data objects updated to new IDs. - """ - for reads_qc_rec in db_record[READS_QC_SET]: - pass - + def _make_new_activity_set_object(self, omics_processing_id: str, + activity_set_rec: Dict, has_input: List, + has_output: List) -> WorkflowExecutionActivity: + """ + Return a new activity set object with updated IDs. + """ + activity_type = activity_set_rec["type"] + template = self.workflow_template(activity_type) + activity_class = getattr(nmdc, template["ActivityRange"]) + new_activity_id = self.api_client.minter(activity_type) + logger.info( + f"{activity_type}\t{activity_set_rec['id']}\t{new_activity_id}" + ) + activity = activity_class( + id=new_activity_id, name=template["Activity"]["name"].replace( + "{id}", omics_processing_id + ), git_url=template["Git_repo"], version=template["Version"], + part_of=[omics_processing_id], + execution_resource="NERSC - Perlmutter", + started_at_time=activity_set_rec["started_at_time"], + has_input=has_input, has_output=has_output, + ended_at_time=activity_set_rec["ended_at_time"], + was_informed_by=omics_processing_id, ) + return activity + + def _make_new_data_object(self, omics_processing_id: str, + activity_type: str, data_object_rec: Dict) -> NmdcDataObject: + """ + Return a new data object with updated IDs. + """ + data_object_type = data_object_rec["data_object_type"] + template = self.data_object_template( + activity_type, data_object_type + ) + new_data_object_id = self.api_client.minter("nmdc:DataObject") + logger.info(f"nmdcDataObject\t{data_object_rec['id']}\t{new_data_object_id}") + new_description = re.sub( + "[^ ]+$", f"{omics_processing_id}", data_object_rec["description"] + ) + logger.info(f"new_description: {new_description}") diff --git a/nmdc_automation/re_iding/scripts/re_id_tool.py b/nmdc_automation/re_iding/scripts/re_id_tool.py index a31ffed9..890ba8be 100755 --- a/nmdc_automation/re_iding/scripts/re_id_tool.py +++ b/nmdc_automation/re_iding/scripts/re_id_tool.py @@ -13,7 +13,7 @@ from nmdc_automation.api import NmdcRuntimeApi from nmdc_automation.config import Config import nmdc_schema.nmdc as nmdc -from nmdc_automation.re_iding.base import update_omics_processing_has_output +from nmdc_automation.re_iding.base import ReIdTool from nmdc_automation.re_iding.db_utils import get_omics_processing_id # Defaults @@ -89,10 +89,14 @@ def process_records(ctx, dryrun, study_id, data_dir): config = ctx.obj['site_config'] api_client = NmdcRuntimeApi(config) + # Get Database dump file paths and the data directory db_infile, db_outfile = _get_database_paths(study_id, dryrun) data_dir = _get_data_dir(data_dir, dryrun) + # Initialize re-ID tool + reid_tool = ReIdTool(api_client, data_dir) + # Read extracted DB records logging.info(f"Using db_infile: {db_infile}") @@ -107,7 +111,8 @@ def process_records(ctx, dryrun, study_id, data_dir): new_db = nmdc.Database() # update OmicsProcessing has_output and related DataObject records - new_db = update_omics_processing_has_output(db_record, new_db, api_client) + new_db = reid_tool.update_omics_processing_has_output(db_record, new_db) + new_db = reid_tool.update_reads_qc_analysis_activity_set(db_record, new_db) From 68cd47a8d2098e31b34701369278a2e56aaeac50 Mon Sep 17 00:00:00 2001 From: Michael Thornton Date: Mon, 13 Nov 2023 09:17:42 -0800 Subject: [PATCH 45/91] _make_new_data_object --- nmdc_automation/re_iding/base.py | 31 +++++++++++++++++++++++++++++++ 1 file changed, 31 insertions(+) diff --git a/nmdc_automation/re_iding/base.py b/nmdc_automation/re_iding/base.py index 8194c58b..3bd74424 100644 --- a/nmdc_automation/re_iding/base.py +++ b/nmdc_automation/re_iding/base.py @@ -21,6 +21,7 @@ get_omics_processing_id) NAPA_TEMPLATE = "../../../configs/re_iding_worklfows.yaml" +BASE_DIR = "/global/cfs/cdirs/m3408/results" logging.basicConfig( level=logging.INFO, @@ -188,3 +189,33 @@ def _make_new_data_object(self, omics_processing_id: str, "[^ ]+$", f"{omics_processing_id}", data_object_rec["description"] ) logger.info(f"new_description: {new_description}") + new_filename = self._make_new_filename(new_data_object_id, data_object_rec) + logger.info(f"new_filename: {new_filename}") + new_url = f"{BASE_DIR}/{omics_processing_id}/{new_data_object_id}/{new_filename}" + + data_object = NmdcDataObject( + id=new_data_object_id, + name=template["name"].replace("{id}", omics_processing_id), + description=new_description, + type="nmdc:Data_Object", + file_size_bytes=data_object_rec["file_size_bytes"], + md5_checksum=data_object_rec["md5_checksum"], + url=new_url, + ) + return data_object + + def _make_new_filename(self, new_data_object_id: str, + data_object_record: Dict) -> str: + """ + Return the updated filename. + """ + filename = data_object_record["url"].split("/")[-1] + file_extenstion = filename.lstrip("nmdc_").split("_", maxsplit=1)[-1] + new_filename = f"{new_data_object_id}_{file_extenstion}".replace(":", + "_") + return new_filename + + + + + From dff972636bb8a9d3dc137d9c4f11199d113752c9 Mon Sep 17 00:00:00 2001 From: Michael Thornton Date: Mon, 13 Nov 2023 09:40:14 -0800 Subject: [PATCH 46/91] Consolidate record extraction script to re_id_tool.py --- .../extract_metagenome_workflow_records.py | 155 ------------------ .../re_iding/scripts/re_id_tool.py | 122 +++++++++++++- 2 files changed, 116 insertions(+), 161 deletions(-) delete mode 100644 nmdc_automation/re_iding/scripts/extract_metagenome_workflow_records.py diff --git a/nmdc_automation/re_iding/scripts/extract_metagenome_workflow_records.py b/nmdc_automation/re_iding/scripts/extract_metagenome_workflow_records.py deleted file mode 100644 index 49c71c1f..00000000 --- a/nmdc_automation/re_iding/scripts/extract_metagenome_workflow_records.py +++ /dev/null @@ -1,155 +0,0 @@ -#!/usr/bin/env python3 -# coding: utf-8 -# nmdc_schema/napa_compliance/scripts/extract_metagenome_workflow_records.py -""" -extract_metagenome_workflow_records.py: Starting with OmicsProcessing, extract -metagenome workflow activity records and their associated data objects. Write -the results, as a list of nmdc-schema Database instances to a JSON file. -""" -import logging -import time -from pathlib import Path -import json -import click - -from nmdc_automation.api import NmdcRuntimeUserApi -from nmdc_automation.config import Config -import nmdc_schema.nmdc as nmdc - -GOLD_STUDY_ID = "gold:Gs0114663" -STUDY_ID = "nmdc:sty-11-aygzgv51" -NAPA_CONFIG = Path("../../../configs/napa_config.toml") - - -def _get_legacy_id(omics_processing_record: dict) -> str: - """ - Get the legacy ID for the given OmicsProcessing record. - """ - legacy_id = None - legacy_ids = [] - gold_ids = omics_processing_record.get("gold_sequencing_project_identifiers", []) - legacy_ids.extend(gold_ids) - alternative_ids = omics_processing_record.get("alternative_identifiers", []) - legacy_ids.extend(alternative_ids) - if len(legacy_ids) == 0: - logging.warning( - f"No legacy IDs found for omics_processing_record: {omics_processing_record['id']}" - ) - return None - elif len(legacy_ids) > 1: - logging.warning( - f"Multiple legacy IDs found for omics_processing_record: {omics_processing_record['id']}" - ) - return None - else: - legacy_id = legacy_ids[0] - return legacy_id - -@click.command() -@click.option("--study_id", default=STUDY_ID, help="Updated study ID") -@click.option( - "--site_config", type=click.Path(exists=True), default=NAPA_CONFIG, - help="Site configuration file" -) -def extract_workflow_records(study_id: str, site_config: bool): - """ - Extract metagenome workflow records for re-ID-ing of Study, Biosample, and - OmicsProcessing records by: - 1. Retrieving all OmicsProcessing records for updated study ID - 2. For each OmicsProcessing record, retrieve the corresponding - WorkflowExecutionActivity records: - a. ReadQcAnalysisActivity - b. ReadBasedTaxonomyAnalysisActivity - c. MetagenomeAssembly - d. MetagenomeAnnotationActivity - e. MagsAnalysisActivity - 3. For each WorkflowExecutionActivity record: - a. Retrieve the corresponding DataObject records - 4. Create a database object for each OmicsProcessing record and its - associated WorkflowExecutionActivity and DataObject records - 5. Write the database object to a JSON file - """ - start_time = time.time() - logging.info("starting missing_neon_soils_ecosystem_data.py...") - logging.info(f"study_id: {study_id}") - - config = Config(site_config) - query_api_client = NmdcRuntimeUserApi( - username=config.napa_username, password=config.napa_password, - base_url=config.napa_base_url, ) - - # 1. Retrieve all OmicsProcessing records for the updated NMDC study ID - omics_processing_records = query_api_client.get_omics_processing_records_for_nmdc_study( - study_id - ) - logging.info( - f"Retrieved {len(omics_processing_records)} OmicsProcessing records for study {study_id}" - ) - - retrieved_databases = [] - # 2. For each OmicsProcessing record, find the legacy identifier: - for omics_processing_record in omics_processing_records: - db = nmdc.Database() - logging.info(f"omics_processing_record: " - f"{omics_processing_record['id']}") - legacy_id = _get_legacy_id(omics_processing_record) - logging.info(f"legacy_id: {legacy_id}") - - if (omics_processing_record["omics_type"]["has_raw_value"] != - "Metagenome"): - logging.info(f"omics_processing_record {omics_processing_record['id']} " - f"is not a Metagenome") - continue - db.omics_processing_set.append(omics_processing_record) - for data_object_id in omics_processing_record["has_output"]: - data_object_record = query_api_client.get_data_object_by_id( - data_object_id - ) - db.data_object_set.append(data_object_record) - - # downstream workflow activity sets - (read_qc_records, readbased_records, metagenome_assembly_records, - metagenome_annotation_records, mags_records) = [], [], [], [], [] - - downstream_workflow_activity_sets = { - "read_qc_analysis_activity_set": read_qc_records, - "read_based_taxonomy_analysis_activity_set": readbased_records, - "metagenome_assembly_set": metagenome_assembly_records, - "metagenome_annotation_activity_set": metagenome_annotation_records, - "mags_activity_set": mags_records, - } - for set_name, records in downstream_workflow_activity_sets.items(): - records = query_api_client.get_workflow_activity_informed_by( - set_name, legacy_id - ) - db.__setattr__(set_name, records) - # Add the data objects referenced by the `has_output` property - for record in records: - logging.info(f"record: {record['id']}, {record['name']}") - for data_object_id in record["has_output"]: - data_object_record = query_api_client.get_data_object_by_id( - data_object_id - ) - logging.info(f"data_object_record: " - f"{data_object_record['id']}, {data_object_record['description']}") - db.data_object_set.append(data_object_record) - - # Search for orphaned data objects with the legacy ID in the description - orphaned_data_objects = query_api_client.get_data_objects_by_description( - legacy_id - ) - # check that we don't already have the data object in the set - for data_object in orphaned_data_objects: - if data_object["id"] not in [d["id"] for d in db.data_object_set]: - db.data_object_set.append(data_object) - logging.info(f"Added orphaned data object: " - f"{data_object['id']}, {data_object['description']}") - - retrieved_databases.append(db) - - with open(f"{study_id}_assocated_record_dump.json", 'w') as json_file: - json.dump([o.__dict__ for o in retrieved_databases], json_file, indent=4) - - -if __name__ == "__main__": - extract_workflow_records() diff --git a/nmdc_automation/re_iding/scripts/re_id_tool.py b/nmdc_automation/re_iding/scripts/re_id_tool.py index 890ba8be..0890f4db 100755 --- a/nmdc_automation/re_iding/scripts/re_id_tool.py +++ b/nmdc_automation/re_iding/scripts/re_id_tool.py @@ -10,7 +10,7 @@ import json import click -from nmdc_automation.api import NmdcRuntimeApi +from nmdc_automation.api import NmdcRuntimeApi, NmdcRuntimeUserApi from nmdc_automation.config import Config import nmdc_schema.nmdc as nmdc from nmdc_automation.re_iding.base import ReIdTool @@ -65,6 +65,93 @@ def extract_records(ctx, study_id): """ start_time = time.time() logging.info(f"Extracting workflow records for study_id: {study_id}") + logging.info(f"study_id: {study_id}") + + config = Config(ctx.obj['site_config']) + api_client = NmdcRuntimeUserApi(username=config.napa_username, password=config.napa_password, + base_url=config.napa_base_url) + + # 1. Retrieve all OmicsProcessing records for the updated NMDC study ID + omics_processing_records = api_client.get_omics_processing_records_for_nmdc_study( + study_id + ) + logging.info( + f"Retrieved {len(omics_processing_records)} OmicsProcessing records for study {study_id}" + ) + + retrieved_databases = [] + # 2. For each OmicsProcessing record, find the legacy identifier: + for omics_processing_record in omics_processing_records: + db = nmdc.Database() + logging.info( + f"omics_processing_record: " + f"{omics_processing_record['id']}" + ) + legacy_id = _get_legacy_id(omics_processing_record) + logging.info(f"legacy_id: {legacy_id}") + + if (omics_processing_record["omics_type"]["has_raw_value"] != + "Metagenome"): + logging.info( + f"omics_processing_record {omics_processing_record['id']} " + f"is not a Metagenome" + ) + continue + db.omics_processing_set.append(omics_processing_record) + for data_object_id in omics_processing_record["has_output"]: + data_object_record = api_client.get_data_object_by_id( + data_object_id + ) + db.data_object_set.append(data_object_record) + + # downstream workflow activity sets + (read_qc_records, readbased_records, metagenome_assembly_records, + metagenome_annotation_records, mags_records) = [], [], [], [], [] + + downstream_workflow_activity_sets = { + "read_qc_analysis_activity_set": read_qc_records, + "read_based_taxonomy_analysis_activity_set": readbased_records, + "metagenome_assembly_set": metagenome_assembly_records, + "metagenome_annotation_activity_set": metagenome_annotation_records, + "mags_activity_set": mags_records, + } + for set_name, records in downstream_workflow_activity_sets.items(): + records = api_client.get_workflow_activity_informed_by( + set_name, legacy_id + ) + db.__setattr__(set_name, records) + # Add the data objects referenced by the `has_output` property + for record in records: + logging.info(f"record: {record['id']}, {record['name']}") + for data_object_id in record["has_output"]: + data_object_record = api_client.get_data_object_by_id( + data_object_id + ) + logging.info( + f"data_object_record: " + f"{data_object_record['id']}, {data_object_record['description']}" + ) + db.data_object_set.append(data_object_record) + + # Search for orphaned data objects with the legacy ID in the description + orphaned_data_objects = api_client.get_data_objects_by_description( + legacy_id + ) + # check that we don't already have the data object in the set + for data_object in orphaned_data_objects: + if data_object["id"] not in [d["id"] for d in db.data_object_set]: + db.data_object_set.append(data_object) + logging.info( + f"Added orphaned data object: " + f"{data_object['id']}, {data_object['description']}" + ) + + retrieved_databases.append(db) + + with open(f"{study_id}_assocated_record_dump.json", 'w') as json_file: + json.dump( + [o.__dict__ for o in retrieved_databases], json_file, indent=4 + ) @cli.command() @@ -114,13 +201,13 @@ def process_records(ctx, dryrun, study_id, data_dir): new_db = reid_tool.update_omics_processing_has_output(db_record, new_db) new_db = reid_tool.update_reads_qc_analysis_activity_set(db_record, new_db) + re_ided_db_records.append(new_db) - # Re-ID db_record - # Update data file headers - # Write re-IDed db_record to db_outfile - # Write updated data file to datafile_dir - # Log results + with open(f"{study_id}_updated_record_dump.json", 'w') as json_file: + json.dump( + [o.__dict__ for o in re_ided_db_records], json_file, indent=4 + ) def _get_data_dir(data_dir, dryrun): @@ -149,6 +236,29 @@ def _get_database_paths(study_id, dryrun): db_outfile = DATA_DIR.joinpath(f"{study_id}{db_outfile_suffix}") return db_infile, db_outfile +def _get_legacy_id(omics_processing_record: dict) -> str: + """ + Get the legacy ID for the given OmicsProcessing record. + """ + legacy_id = None + legacy_ids = [] + gold_ids = omics_processing_record.get("gold_sequencing_project_identifiers", []) + legacy_ids.extend(gold_ids) + alternative_ids = omics_processing_record.get("alternative_identifiers", []) + legacy_ids.extend(alternative_ids) + if len(legacy_ids) == 0: + logging.warning( + f"No legacy IDs found for omics_processing_record: {omics_processing_record['id']}" + ) + return None + elif len(legacy_ids) > 1: + logging.warning( + f"Multiple legacy IDs found for omics_processing_record: {omics_processing_record['id']}" + ) + return None + else: + legacy_id = legacy_ids[0] + return legacy_id if __name__ == '__main__': cli(obj={}) From 92f18b68b9536bad2c648cb0ef58f058c6020fbe Mon Sep 17 00:00:00 2001 From: Michael Thornton Date: Mon, 13 Nov 2023 10:25:02 -0800 Subject: [PATCH 47/91] normalize QC vs Qc and output json for re-ided records --- nmdc_automation/api/nmdcapi.py | 2 +- nmdc_automation/re_iding/base.py | 28 ++- .../data/dryrun_re_ided_record_dump.json | 198 ++++++++++++++++++ .../re_iding/scripts/re_id_tool.py | 9 +- 4 files changed, 223 insertions(+), 14 deletions(-) create mode 100644 nmdc_automation/re_iding/scripts/data/dryrun_re_ided_record_dump.json diff --git a/nmdc_automation/api/nmdcapi.py b/nmdc_automation/api/nmdcapi.py index 7a5988cf..0927c23a 100755 --- a/nmdc_automation/api/nmdcapi.py +++ b/nmdc_automation/api/nmdcapi.py @@ -96,7 +96,7 @@ def minter(self, id_type, informed_by=None): data = {"schema_class": {"id": id_type}, "how_many": 1} resp = requests.post(url, data=json.dumps(data), headers=self.header) if not resp.ok: - raise ValueError("Failed to mint ID") + raise ValueError(f"Failed to mint ID of type {id_type}") id = resp.json()[0] if informed_by: url = f"{self._base_url}pids/bind" diff --git a/nmdc_automation/re_iding/base.py b/nmdc_automation/re_iding/base.py index 3bd74424..c6d0c7fa 100644 --- a/nmdc_automation/re_iding/base.py +++ b/nmdc_automation/re_iding/base.py @@ -47,13 +47,12 @@ def _workflow_template_for_type(self, workflow_type: str) -> Dict: Return the workflow template for the given workflow name. """ templates = [] + workflow_type = workflow_type.replace("QC", "Qc") for t in self.workflow_template: type = t["Type"] if type == workflow_type: templates.append(t) - # templates = [t for t in self.workflow_template if - # t["Type"] == workflow_type] if len(templates) == 0: raise ValueError(f"No workflow template found for {workflow_type}") elif len(templates) > 1: @@ -137,6 +136,7 @@ def update_reads_qc_analysis_activity_set(self, db_record: Dict, activity_type = "nmdc:ReadQcAnalysisActivity" omics_processing_id = new_omics_processing.id has_input = new_omics_processing.has_output + updated_has_output = [] # Get ReadQC data objects and update IDs for old_do_id in reads_qc_rec["has_output"]: @@ -145,8 +145,15 @@ def update_reads_qc_analysis_activity_set(self, db_record: Dict, new_do = self._make_new_data_object( omics_processing_id, activity_type, old_do_rec ) + # add new data object to new database and update has_output new_db.data_object_set.append(new_do) updated_has_output.append(new_do.id) + + # Get new ReadQC activity set + new_reads_qc = self._make_new_activity_set_object( + omics_processing_id, reads_qc_rec, has_input, updated_has_output + ) + new_db.read_qc_analysis_activity_set.append(new_reads_qc) return new_db def _make_new_activity_set_object(self, omics_processing_id: str, @@ -155,23 +162,26 @@ def _make_new_activity_set_object(self, omics_processing_id: str, """ Return a new activity set object with updated IDs. """ - activity_type = activity_set_rec["type"] - template = self.workflow_template(activity_type) + activity_type = activity_set_rec["type"].replace("QC", "Qc") + template = self._workflow_template_for_type(activity_type) activity_class = getattr(nmdc, template["ActivityRange"]) new_activity_id = self.api_client.minter(activity_type) logger.info( f"{activity_type}\t{activity_set_rec['id']}\t{new_activity_id}" ) activity = activity_class( - id=new_activity_id, name=template["Activity"]["name"].replace( - "{id}", omics_processing_id - ), git_url=template["Git_repo"], version=template["Version"], + id=new_activity_id, + name=template["Activity"]["name"].replace("{id}", omics_processing_id), + git_url=template["Git_repo"], version=template["Version"], part_of=[omics_processing_id], execution_resource="NERSC - Perlmutter", started_at_time=activity_set_rec["started_at_time"], - has_input=has_input, has_output=has_output, + has_input=has_input, + has_output=has_output, ended_at_time=activity_set_rec["ended_at_time"], - was_informed_by=omics_processing_id, ) + was_informed_by=omics_processing_id, + type=template["Type"], + ) return activity def _make_new_data_object(self, omics_processing_id: str, diff --git a/nmdc_automation/re_iding/scripts/data/dryrun_re_ided_record_dump.json b/nmdc_automation/re_iding/scripts/data/dryrun_re_ided_record_dump.json new file mode 100644 index 00000000..94aeaf23 --- /dev/null +++ b/nmdc_automation/re_iding/scripts/data/dryrun_re_ided_record_dump.json @@ -0,0 +1,198 @@ +[ + { + "functional_annotation_agg": [], + "library_preparation_set": [], + "processed_sample_set": [], + "extraction_set": [], + "activity_set": [], + "biosample_set": [], + "data_object_set": [ + { + "id": "nmdc:dobj-11-9t9hwj09", + "name": "9422.8.132674.GTTTCG.fastq.gz", + "description": "Raw sequencer read data", + "alternative_identifiers": [], + "file_size_bytes": 2861414297, + "md5_checksum": null, + "data_object_type": null, + "compression_type": null, + "was_generated_by": null, + "url": null, + "type": "nmdc:DataObject" + }, + { + "id": "nmdc:dobj-11-03dw0x03", + "name": "Reads QC result fastq (clean data)", + "description": "Filtered Reads for nmdc:omprc-11-bn8jcq58", + "alternative_identifiers": [], + "file_size_bytes": 2571324879, + "md5_checksum": "7bf778baef033d36f118f8591256d6ef", + "data_object_type": null, + "compression_type": null, + "was_generated_by": null, + "url": "/global/cfs/cdirs/m3408/results/nmdc:omprc-11-bn8jcq58/nmdc:dobj-11-03dw0x03/nmdc_dobj-11-03dw0x03_filtered.fastq.gz", + "type": "nmdc:Data_Object" + }, + { + "id": "nmdc:dobj-11-ds6gd769", + "name": "Reads QC summary statistics", + "description": "Filtered Stats for nmdc:omprc-11-bn8jcq58", + "alternative_identifiers": [], + "file_size_bytes": 290, + "md5_checksum": "b99ce8adc125c95f0bfdadf36a3f6848", + "data_object_type": null, + "compression_type": null, + "was_generated_by": null, + "url": "/global/cfs/cdirs/m3408/results/nmdc:omprc-11-bn8jcq58/nmdc:dobj-11-ds6gd769/nmdc_dobj-11-ds6gd769_filterStats.txt", + "type": "nmdc:Data_Object" + } + ], + "dissolving_activity_set": [], + "functional_annotation_set": [], + "genome_feature_set": [], + "mags_activity_set": [], + "material_sample_set": [], + "material_sampling_activity_set": [], + "metabolomics_analysis_activity_set": [], + "metagenome_annotation_activity_set": [], + "metagenome_assembly_set": [], + "metagenome_sequencing_activity_set": [], + "metaproteomics_analysis_activity_set": [], + "metatranscriptome_activity_set": [], + "nom_analysis_activity_set": [], + "omics_processing_set": [ + { + "id": "nmdc:omprc-11-bn8jcq58", + "name": "Sand microcosm microbial communities from a hyporheic zone in Columbia River, Washington, USA - GW-RW T2_23-Sept-14", + "description": "Sterilized sand packs were incubated back in the ground and collected at time point T2.", + "alternative_identifiers": [], + "has_input": [ + "nmdc:bsm-11-qq8s6x03" + ], + "add_date": "2015-05-28", + "chimera_check": null, + "gold_sequencing_project_identifiers": [ + "gold:Gp0115663" + ], + "has_output": [ + "nmdc:dobj-11-9t9hwj09" + ], + "insdc_experiment_identifiers": [], + "instrument_name": null, + "mod_date": "2021-06-15", + "ncbi_project_name": "Sand microcosm microbial communities from a hyporheic zone in Columbia River, Washington, USA - GW-RW T2_23-Sept-14", + "nucl_acid_amp": null, + "nucl_acid_ext": null, + "omics_type": { + "has_raw_value": "Metagenome", + "was_generated_by": null, + "type": null, + "term": null + }, + "part_of": [ + "nmdc:sty-11-aygzgv51" + ], + "pcr_cond": null, + "pcr_primers": null, + "principal_investigator": { + "has_raw_value": "James Stegen", + "was_generated_by": null, + "type": null, + "orcid": null, + "profile_image_url": null, + "email": null, + "name": null, + "websites": [] + }, + "processing_institution": { + "_code": { + "text": "JGI", + "description": null, + "meaning": null, + "unit": null, + "is_a": null, + "mixins": [], + "extensions": {}, + "annotations": {}, + "alt_descriptions": {}, + "title": null, + "deprecated": null, + "todos": [], + "notes": [], + "comments": [], + "examples": [], + "in_subset": [], + "from_schema": null, + "imported_from": null, + "source": null, + "in_language": null, + "see_also": [], + "deprecated_element_has_exact_replacement": null, + "deprecated_element_has_possible_replacement": null, + "aliases": [], + "structured_aliases": {}, + "mappings": [], + "exact_mappings": [], + "close_mappings": [], + "related_mappings": [], + "narrow_mappings": [], + "broad_mappings": [], + "created_by": null, + "contributors": [], + "created_on": null, + "last_updated_on": null, + "modified_by": null, + "status": null, + "rank": null, + "categories": [], + "keywords": [] + } + }, + "samp_vol_we_dna_ext": null, + "seq_meth": null, + "seq_quality_check": null, + "target_gene": null, + "target_subfragment": null, + "type": "nmdc:OmicsProcessing" + } + ], + "reaction_activity_set": [], + "read_qc_analysis_activity_set": [ + { + "id": "nmdc:wfrqc-11-wt45ty13", + "name": "Read QC Activity for nmdc:omprc-11-bn8jcq58", + "started_at_time": "2021-10-11T02:28:26Z", + "ended_at_time": "2021-10-11T04:56:04+00:00", + "was_informed_by": "nmdc:omprc-11-bn8jcq58", + "used": null, + "execution_resource": "NERSC - Perlmutter", + "git_url": "https://github.com/microbiomedata/ReadsQC", + "has_input": [ + "nmdc:dobj-11-9t9hwj09" + ], + "has_output": [ + "nmdc:dobj-11-03dw0x03", + "nmdc:dobj-11-ds6gd769" + ], + "type": "nmdc:ReadQcAnalysisActivity", + "part_of": [ + "nmdc:omprc-11-bn8jcq58" + ], + "version": "v1.0.8", + "input_read_count": null, + "input_base_count": null, + "output_read_count": null, + "output_base_count": null, + "input_read_bases": null, + "output_read_bases": null + } + ], + "read_based_taxonomy_analysis_activity_set": [], + "study_set": [], + "field_research_site_set": [], + "collecting_biosamples_from_site_set": [], + "date_created": null, + "etl_software_version": null, + "pooling_set": [] + } +] \ No newline at end of file diff --git a/nmdc_automation/re_iding/scripts/re_id_tool.py b/nmdc_automation/re_iding/scripts/re_id_tool.py index 0890f4db..354a03de 100755 --- a/nmdc_automation/re_iding/scripts/re_id_tool.py +++ b/nmdc_automation/re_iding/scripts/re_id_tool.py @@ -199,15 +199,16 @@ def process_records(ctx, dryrun, study_id, data_dir): new_db = nmdc.Database() # update OmicsProcessing has_output and related DataObject records new_db = reid_tool.update_omics_processing_has_output(db_record, new_db) + # update ReadsQC new_db = reid_tool.update_reads_qc_analysis_activity_set(db_record, new_db) re_ided_db_records.append(new_db) - with open(f"{study_id}_updated_record_dump.json", 'w') as json_file: - json.dump( - [o.__dict__ for o in re_ided_db_records], json_file, indent=4 - ) + json_data = json.dumps(re_ided_db_records, default=lambda o: o.__dict__, indent=4) + logging.info(f"Writing re_ided_db_records to {db_outfile}") + with open(db_outfile, "w") as f: + f.write(json_data) def _get_data_dir(data_dir, dryrun): From e0634acaca6859b9d2c39f4001bca7564a2a629e Mon Sep 17 00:00:00 2001 From: Michael Thornton Date: Mon, 13 Nov 2023 10:39:05 -0800 Subject: [PATCH 48/91] update readQC specific properties --- nmdc_automation/re_iding/base.py | 9 ++++++ .../data/dryrun_re_ided_record_dump.json | 28 +++++++++---------- 2 files changed, 23 insertions(+), 14 deletions(-) diff --git a/nmdc_automation/re_iding/base.py b/nmdc_automation/re_iding/base.py index c6d0c7fa..1b745769 100644 --- a/nmdc_automation/re_iding/base.py +++ b/nmdc_automation/re_iding/base.py @@ -153,6 +153,15 @@ def update_reads_qc_analysis_activity_set(self, db_record: Dict, new_reads_qc = self._make_new_activity_set_object( omics_processing_id, reads_qc_rec, has_input, updated_has_output ) + # update activity-specific properties + new_reads_qc.input_read_count = reads_qc_rec.get("input_read_count") + new_reads_qc.input_base_count = reads_qc_rec.get("input_base_count") + new_reads_qc.output_read_count = reads_qc_rec.get("output_read_count") + new_reads_qc.output_base_count = reads_qc_rec.get("output_base_count") + new_reads_qc.input_read_bases = reads_qc_rec.get("input_read_bases") + new_reads_qc.output_read_bases = reads_qc_rec.get("output_read_bases") + + new_db.read_qc_analysis_activity_set.append(new_reads_qc) return new_db diff --git a/nmdc_automation/re_iding/scripts/data/dryrun_re_ided_record_dump.json b/nmdc_automation/re_iding/scripts/data/dryrun_re_ided_record_dump.json index 94aeaf23..c4d5b59a 100644 --- a/nmdc_automation/re_iding/scripts/data/dryrun_re_ided_record_dump.json +++ b/nmdc_automation/re_iding/scripts/data/dryrun_re_ided_record_dump.json @@ -8,7 +8,7 @@ "biosample_set": [], "data_object_set": [ { - "id": "nmdc:dobj-11-9t9hwj09", + "id": "nmdc:dobj-11-3w7nex07", "name": "9422.8.132674.GTTTCG.fastq.gz", "description": "Raw sequencer read data", "alternative_identifiers": [], @@ -21,7 +21,7 @@ "type": "nmdc:DataObject" }, { - "id": "nmdc:dobj-11-03dw0x03", + "id": "nmdc:dobj-11-aazp9v53", "name": "Reads QC result fastq (clean data)", "description": "Filtered Reads for nmdc:omprc-11-bn8jcq58", "alternative_identifiers": [], @@ -30,11 +30,11 @@ "data_object_type": null, "compression_type": null, "was_generated_by": null, - "url": "/global/cfs/cdirs/m3408/results/nmdc:omprc-11-bn8jcq58/nmdc:dobj-11-03dw0x03/nmdc_dobj-11-03dw0x03_filtered.fastq.gz", + "url": "/global/cfs/cdirs/m3408/results/nmdc:omprc-11-bn8jcq58/nmdc:dobj-11-aazp9v53/nmdc_dobj-11-aazp9v53_filtered.fastq.gz", "type": "nmdc:Data_Object" }, { - "id": "nmdc:dobj-11-ds6gd769", + "id": "nmdc:dobj-11-wtayma27", "name": "Reads QC summary statistics", "description": "Filtered Stats for nmdc:omprc-11-bn8jcq58", "alternative_identifiers": [], @@ -43,7 +43,7 @@ "data_object_type": null, "compression_type": null, "was_generated_by": null, - "url": "/global/cfs/cdirs/m3408/results/nmdc:omprc-11-bn8jcq58/nmdc:dobj-11-ds6gd769/nmdc_dobj-11-ds6gd769_filterStats.txt", + "url": "/global/cfs/cdirs/m3408/results/nmdc:omprc-11-bn8jcq58/nmdc:dobj-11-wtayma27/nmdc_dobj-11-wtayma27_filterStats.txt", "type": "nmdc:Data_Object" } ], @@ -75,7 +75,7 @@ "gold:Gp0115663" ], "has_output": [ - "nmdc:dobj-11-9t9hwj09" + "nmdc:dobj-11-3w7nex07" ], "insdc_experiment_identifiers": [], "instrument_name": null, @@ -159,7 +159,7 @@ "reaction_activity_set": [], "read_qc_analysis_activity_set": [ { - "id": "nmdc:wfrqc-11-wt45ty13", + "id": "nmdc:wfrqc-11-08etj008", "name": "Read QC Activity for nmdc:omprc-11-bn8jcq58", "started_at_time": "2021-10-11T02:28:26Z", "ended_at_time": "2021-10-11T04:56:04+00:00", @@ -168,23 +168,23 @@ "execution_resource": "NERSC - Perlmutter", "git_url": "https://github.com/microbiomedata/ReadsQC", "has_input": [ - "nmdc:dobj-11-9t9hwj09" + "nmdc:dobj-11-3w7nex07" ], "has_output": [ - "nmdc:dobj-11-03dw0x03", - "nmdc:dobj-11-ds6gd769" + "nmdc:dobj-11-aazp9v53", + "nmdc:dobj-11-wtayma27" ], "type": "nmdc:ReadQcAnalysisActivity", "part_of": [ "nmdc:omprc-11-bn8jcq58" ], "version": "v1.0.8", - "input_read_count": null, + "input_read_count": 32238374, "input_base_count": null, - "output_read_count": null, + "output_read_count": 30774080, "output_base_count": null, - "input_read_bases": null, - "output_read_bases": null + "input_read_bases": 4867994474, + "output_read_bases": 4608772924 } ], "read_based_taxonomy_analysis_activity_set": [], From 1354a52b29370982e38dc922c8fe35099f041eb8 Mon Sep 17 00:00:00 2001 From: Michael Thornton Date: Mon, 13 Nov 2023 11:17:18 -0800 Subject: [PATCH 49/91] add update_metagenome_assembly_set --- nmdc_automation/re_iding/base.py | 46 +++++++++++- nmdc_automation/re_iding/db_utils.py | 1 + .../data/dryrun_re_ided_record_dump.json | 72 ++++++++++++++++--- .../re_iding/scripts/re_id_tool.py | 2 + 4 files changed, 109 insertions(+), 12 deletions(-) diff --git a/nmdc_automation/re_iding/base.py b/nmdc_automation/re_iding/base.py index 1b745769..3207b271 100644 --- a/nmdc_automation/re_iding/base.py +++ b/nmdc_automation/re_iding/base.py @@ -16,6 +16,7 @@ from nmdc_automation.api import NmdcRuntimeApi from nmdc_automation.re_iding.db_utils import (OMICS_PROCESSING_SET, READS_QC_SET, + METAGENOME_ASSEMBLY_SET, check_for_single_omics_processing_record, get_data_object_record_by_id, get_omics_processing_id) @@ -130,7 +131,6 @@ def update_reads_qc_analysis_activity_set(self, db_record: Dict, f"{db_record[OMICS_PROCESSING_SET][0]['id']}" ) new_omics_processing = new_db.omics_processing_set[0] - logger.info(new_omics_processing) for reads_qc_rec in db_record[READS_QC_SET]: # old records have non-conforming type activity_type = "nmdc:ReadQcAnalysisActivity" @@ -165,6 +165,48 @@ def update_reads_qc_analysis_activity_set(self, db_record: Dict, new_db.read_qc_analysis_activity_set.append(new_reads_qc) return new_db + def update_metagenome_assembly_set(self, db_record: Dict, + new_db: NmdcDatabase) -> (NmdcDatabase): + """ + Return a new Database instance with the metagenome_assembly_set + and its data objects updated to new IDs. + """ + logger.info(f"Updating metagenome_assembly_set for " + f"{db_record[OMICS_PROCESSING_SET][0]['id']}") + new_omics_processing = new_db.omics_processing_set[0] + + for assembly_rec in db_record[METAGENOME_ASSEMBLY_SET]: + activity_type = "nmdc:MetagenomeAssembly" + omics_processing_id = new_omics_processing.id + new_read_qc = new_db.read_qc_analysis_activity_set[0] + has_input = new_read_qc.has_output + updated_has_output = [] + for old_do_id in assembly_rec["has_output"]: + logger.info(f"old_do_id: {old_do_id}") + old_do_rec = get_data_object_record_by_id(db_record, old_do_id) + # TODO we need to handle missing data_object_type - until + # then though.. + if not old_do_rec.get("data_object_type"): + logger.warning(f"Skipping {old_do_id} - no " + f"data_object_type") + continue + new_do = self._make_new_data_object( + omics_processing_id, activity_type, old_do_rec + ) + # add new data object to new database and update has_output + new_db.data_object_set.append(new_do) + updated_has_output.append(new_do.id) + + # Get new Metagenome Assembly activity set + new_reads_qc = self._make_new_activity_set_object( + omics_processing_id, assembly_rec, has_input, + updated_has_output + ) + # update activity-specific properties + return new_db + + + def _make_new_activity_set_object(self, omics_processing_id: str, activity_set_rec: Dict, has_input: List, has_output: List) -> WorkflowExecutionActivity: @@ -198,7 +240,7 @@ def _make_new_data_object(self, omics_processing_id: str, """ Return a new data object with updated IDs. """ - data_object_type = data_object_rec["data_object_type"] + data_object_type = data_object_rec.get("data_object_type") template = self.data_object_template( activity_type, data_object_type ) diff --git a/nmdc_automation/re_iding/db_utils.py b/nmdc_automation/re_iding/db_utils.py index f1c77c65..6f99fb4b 100644 --- a/nmdc_automation/re_iding/db_utils.py +++ b/nmdc_automation/re_iding/db_utils.py @@ -11,6 +11,7 @@ OMICS_PROCESSING_SET = "omics_processing_set" DATA_OBJECT_SET = "data_object_set" READS_QC_SET = "read_qc_analysis_activity_set" +METAGENOME_ASSEMBLY_SET = "metagenome_assembly_set" diff --git a/nmdc_automation/re_iding/scripts/data/dryrun_re_ided_record_dump.json b/nmdc_automation/re_iding/scripts/data/dryrun_re_ided_record_dump.json index c4d5b59a..80692407 100644 --- a/nmdc_automation/re_iding/scripts/data/dryrun_re_ided_record_dump.json +++ b/nmdc_automation/re_iding/scripts/data/dryrun_re_ided_record_dump.json @@ -8,7 +8,7 @@ "biosample_set": [], "data_object_set": [ { - "id": "nmdc:dobj-11-3w7nex07", + "id": "nmdc:dobj-11-6bpfeq94", "name": "9422.8.132674.GTTTCG.fastq.gz", "description": "Raw sequencer read data", "alternative_identifiers": [], @@ -21,7 +21,7 @@ "type": "nmdc:DataObject" }, { - "id": "nmdc:dobj-11-aazp9v53", + "id": "nmdc:dobj-11-dqtqva56", "name": "Reads QC result fastq (clean data)", "description": "Filtered Reads for nmdc:omprc-11-bn8jcq58", "alternative_identifiers": [], @@ -30,11 +30,11 @@ "data_object_type": null, "compression_type": null, "was_generated_by": null, - "url": "/global/cfs/cdirs/m3408/results/nmdc:omprc-11-bn8jcq58/nmdc:dobj-11-aazp9v53/nmdc_dobj-11-aazp9v53_filtered.fastq.gz", + "url": "/global/cfs/cdirs/m3408/results/nmdc:omprc-11-bn8jcq58/nmdc:dobj-11-dqtqva56/nmdc_dobj-11-dqtqva56_filtered.fastq.gz", "type": "nmdc:Data_Object" }, { - "id": "nmdc:dobj-11-wtayma27", + "id": "nmdc:dobj-11-450z1443", "name": "Reads QC summary statistics", "description": "Filtered Stats for nmdc:omprc-11-bn8jcq58", "alternative_identifiers": [], @@ -43,7 +43,59 @@ "data_object_type": null, "compression_type": null, "was_generated_by": null, - "url": "/global/cfs/cdirs/m3408/results/nmdc:omprc-11-bn8jcq58/nmdc:dobj-11-wtayma27/nmdc_dobj-11-wtayma27_filterStats.txt", + "url": "/global/cfs/cdirs/m3408/results/nmdc:omprc-11-bn8jcq58/nmdc:dobj-11-450z1443/nmdc_dobj-11-450z1443_filterStats.txt", + "type": "nmdc:Data_Object" + }, + { + "id": "nmdc:dobj-11-g1z1r257", + "name": "Final assembly contigs fasta", + "description": "Assembled contigs fasta for nmdc:omprc-11-bn8jcq58", + "alternative_identifiers": [], + "file_size_bytes": 90115831, + "md5_checksum": "deddd162bf0128fba13b3bc1ca38d1aa", + "data_object_type": null, + "compression_type": null, + "was_generated_by": null, + "url": "/global/cfs/cdirs/m3408/results/nmdc:omprc-11-bn8jcq58/nmdc:dobj-11-g1z1r257/nmdc_dobj-11-g1z1r257_contigs.fna", + "type": "nmdc:Data_Object" + }, + { + "id": "nmdc:dobj-11-m0s9ek67", + "name": "Final assembly scaffolds fasta", + "description": "Assembled scaffold fasta for nmdc:omprc-11-bn8jcq58", + "alternative_identifiers": [], + "file_size_bytes": 89604715, + "md5_checksum": "b3573e3cda5a06611de71ca04c5c14cc", + "data_object_type": null, + "compression_type": null, + "was_generated_by": null, + "url": "/global/cfs/cdirs/m3408/results/nmdc:omprc-11-bn8jcq58/nmdc:dobj-11-m0s9ek67/nmdc_dobj-11-m0s9ek67_scaffolds.fna", + "type": "nmdc:Data_Object" + }, + { + "id": "nmdc:dobj-11-3degsk51", + "name": "An AGP format file that describes the assembly", + "description": "Assembled AGP file for nmdc:omprc-11-bn8jcq58", + "alternative_identifiers": [], + "file_size_bytes": 12542171, + "md5_checksum": "f450e3800e17691d5874c89fc46c186a", + "data_object_type": null, + "compression_type": null, + "was_generated_by": null, + "url": "/global/cfs/cdirs/m3408/results/nmdc:omprc-11-bn8jcq58/nmdc:dobj-11-3degsk51/nmdc_dobj-11-3degsk51_assembly.agp", + "type": "nmdc:Data_Object" + }, + { + "id": "nmdc:dobj-11-zfemcr89", + "name": "Sorted bam file of reads mapping back to the final assembly", + "description": "Metagenome Alignment BAM file for nmdc:omprc-11-bn8jcq58", + "alternative_identifiers": [], + "file_size_bytes": 2773429299, + "md5_checksum": "31dc958d116d02122509e90b0883954f", + "data_object_type": null, + "compression_type": null, + "was_generated_by": null, + "url": "/global/cfs/cdirs/m3408/results/nmdc:omprc-11-bn8jcq58/nmdc:dobj-11-zfemcr89/nmdc_dobj-11-zfemcr89_pairedMapped_sorted.bam", "type": "nmdc:Data_Object" } ], @@ -75,7 +127,7 @@ "gold:Gp0115663" ], "has_output": [ - "nmdc:dobj-11-3w7nex07" + "nmdc:dobj-11-6bpfeq94" ], "insdc_experiment_identifiers": [], "instrument_name": null, @@ -159,7 +211,7 @@ "reaction_activity_set": [], "read_qc_analysis_activity_set": [ { - "id": "nmdc:wfrqc-11-08etj008", + "id": "nmdc:wfrqc-11-rssxx935", "name": "Read QC Activity for nmdc:omprc-11-bn8jcq58", "started_at_time": "2021-10-11T02:28:26Z", "ended_at_time": "2021-10-11T04:56:04+00:00", @@ -168,11 +220,11 @@ "execution_resource": "NERSC - Perlmutter", "git_url": "https://github.com/microbiomedata/ReadsQC", "has_input": [ - "nmdc:dobj-11-3w7nex07" + "nmdc:dobj-11-6bpfeq94" ], "has_output": [ - "nmdc:dobj-11-aazp9v53", - "nmdc:dobj-11-wtayma27" + "nmdc:dobj-11-dqtqva56", + "nmdc:dobj-11-450z1443" ], "type": "nmdc:ReadQcAnalysisActivity", "part_of": [ diff --git a/nmdc_automation/re_iding/scripts/re_id_tool.py b/nmdc_automation/re_iding/scripts/re_id_tool.py index 354a03de..1da55380 100755 --- a/nmdc_automation/re_iding/scripts/re_id_tool.py +++ b/nmdc_automation/re_iding/scripts/re_id_tool.py @@ -201,6 +201,8 @@ def process_records(ctx, dryrun, study_id, data_dir): new_db = reid_tool.update_omics_processing_has_output(db_record, new_db) # update ReadsQC new_db = reid_tool.update_reads_qc_analysis_activity_set(db_record, new_db) + # update Metagenome Assembly + new_db = reid_tool.update_metagenome_assembly_set(db_record, new_db) re_ided_db_records.append(new_db) From 88e7471e496a411536f7dd1a5bcea26395ab4f3d Mon Sep 17 00:00:00 2001 From: Michael Thornton Date: Mon, 13 Nov 2023 11:26:17 -0800 Subject: [PATCH 50/91] sketch in update read based analysis method --- nmdc_automation/re_iding/base.py | 13 +++++++ .../data/dryrun_re_ided_record_dump.json | 36 +++++++++---------- .../re_iding/scripts/re_id_tool.py | 2 ++ 3 files changed, 33 insertions(+), 18 deletions(-) diff --git a/nmdc_automation/re_iding/base.py b/nmdc_automation/re_iding/base.py index 3207b271..9f4756a1 100644 --- a/nmdc_automation/re_iding/base.py +++ b/nmdc_automation/re_iding/base.py @@ -205,7 +205,20 @@ def update_metagenome_assembly_set(self, db_record: Dict, # update activity-specific properties return new_db + def update_read_based_taxonomy_analysis_activity_set(self, db_record: Dict, + new_db: NmdcDatabase) -> (NmdcDatabase): + """ + Return a new Database instance with the read_based_taxonomy_analysis_activity_set + and its data objects updated to new IDs. + """ + logger.info(f"Updating read_based_taxonomy_analysis_activity_set for " + f"{db_record[OMICS_PROCESSING_SET][0]['id']}") + new_omics_processing = new_db.omics_processing_set[0] + for readbased_rec in db_record["read_based_taxonomy_analysis_activity_set"]: + pass + + return new_db def _make_new_activity_set_object(self, omics_processing_id: str, activity_set_rec: Dict, has_input: List, diff --git a/nmdc_automation/re_iding/scripts/data/dryrun_re_ided_record_dump.json b/nmdc_automation/re_iding/scripts/data/dryrun_re_ided_record_dump.json index 80692407..524ca2e2 100644 --- a/nmdc_automation/re_iding/scripts/data/dryrun_re_ided_record_dump.json +++ b/nmdc_automation/re_iding/scripts/data/dryrun_re_ided_record_dump.json @@ -8,7 +8,7 @@ "biosample_set": [], "data_object_set": [ { - "id": "nmdc:dobj-11-6bpfeq94", + "id": "nmdc:dobj-11-4vnpfh80", "name": "9422.8.132674.GTTTCG.fastq.gz", "description": "Raw sequencer read data", "alternative_identifiers": [], @@ -21,7 +21,7 @@ "type": "nmdc:DataObject" }, { - "id": "nmdc:dobj-11-dqtqva56", + "id": "nmdc:dobj-11-jt200525", "name": "Reads QC result fastq (clean data)", "description": "Filtered Reads for nmdc:omprc-11-bn8jcq58", "alternative_identifiers": [], @@ -30,11 +30,11 @@ "data_object_type": null, "compression_type": null, "was_generated_by": null, - "url": "/global/cfs/cdirs/m3408/results/nmdc:omprc-11-bn8jcq58/nmdc:dobj-11-dqtqva56/nmdc_dobj-11-dqtqva56_filtered.fastq.gz", + "url": "/global/cfs/cdirs/m3408/results/nmdc:omprc-11-bn8jcq58/nmdc:dobj-11-jt200525/nmdc_dobj-11-jt200525_filtered.fastq.gz", "type": "nmdc:Data_Object" }, { - "id": "nmdc:dobj-11-450z1443", + "id": "nmdc:dobj-11-qvxhra11", "name": "Reads QC summary statistics", "description": "Filtered Stats for nmdc:omprc-11-bn8jcq58", "alternative_identifiers": [], @@ -43,11 +43,11 @@ "data_object_type": null, "compression_type": null, "was_generated_by": null, - "url": "/global/cfs/cdirs/m3408/results/nmdc:omprc-11-bn8jcq58/nmdc:dobj-11-450z1443/nmdc_dobj-11-450z1443_filterStats.txt", + "url": "/global/cfs/cdirs/m3408/results/nmdc:omprc-11-bn8jcq58/nmdc:dobj-11-qvxhra11/nmdc_dobj-11-qvxhra11_filterStats.txt", "type": "nmdc:Data_Object" }, { - "id": "nmdc:dobj-11-g1z1r257", + "id": "nmdc:dobj-11-9b3c8a90", "name": "Final assembly contigs fasta", "description": "Assembled contigs fasta for nmdc:omprc-11-bn8jcq58", "alternative_identifiers": [], @@ -56,11 +56,11 @@ "data_object_type": null, "compression_type": null, "was_generated_by": null, - "url": "/global/cfs/cdirs/m3408/results/nmdc:omprc-11-bn8jcq58/nmdc:dobj-11-g1z1r257/nmdc_dobj-11-g1z1r257_contigs.fna", + "url": "/global/cfs/cdirs/m3408/results/nmdc:omprc-11-bn8jcq58/nmdc:dobj-11-9b3c8a90/nmdc_dobj-11-9b3c8a90_contigs.fna", "type": "nmdc:Data_Object" }, { - "id": "nmdc:dobj-11-m0s9ek67", + "id": "nmdc:dobj-11-302hqy22", "name": "Final assembly scaffolds fasta", "description": "Assembled scaffold fasta for nmdc:omprc-11-bn8jcq58", "alternative_identifiers": [], @@ -69,11 +69,11 @@ "data_object_type": null, "compression_type": null, "was_generated_by": null, - "url": "/global/cfs/cdirs/m3408/results/nmdc:omprc-11-bn8jcq58/nmdc:dobj-11-m0s9ek67/nmdc_dobj-11-m0s9ek67_scaffolds.fna", + "url": "/global/cfs/cdirs/m3408/results/nmdc:omprc-11-bn8jcq58/nmdc:dobj-11-302hqy22/nmdc_dobj-11-302hqy22_scaffolds.fna", "type": "nmdc:Data_Object" }, { - "id": "nmdc:dobj-11-3degsk51", + "id": "nmdc:dobj-11-8c4dad48", "name": "An AGP format file that describes the assembly", "description": "Assembled AGP file for nmdc:omprc-11-bn8jcq58", "alternative_identifiers": [], @@ -82,11 +82,11 @@ "data_object_type": null, "compression_type": null, "was_generated_by": null, - "url": "/global/cfs/cdirs/m3408/results/nmdc:omprc-11-bn8jcq58/nmdc:dobj-11-3degsk51/nmdc_dobj-11-3degsk51_assembly.agp", + "url": "/global/cfs/cdirs/m3408/results/nmdc:omprc-11-bn8jcq58/nmdc:dobj-11-8c4dad48/nmdc_dobj-11-8c4dad48_assembly.agp", "type": "nmdc:Data_Object" }, { - "id": "nmdc:dobj-11-zfemcr89", + "id": "nmdc:dobj-11-sqshnm40", "name": "Sorted bam file of reads mapping back to the final assembly", "description": "Metagenome Alignment BAM file for nmdc:omprc-11-bn8jcq58", "alternative_identifiers": [], @@ -95,7 +95,7 @@ "data_object_type": null, "compression_type": null, "was_generated_by": null, - "url": "/global/cfs/cdirs/m3408/results/nmdc:omprc-11-bn8jcq58/nmdc:dobj-11-zfemcr89/nmdc_dobj-11-zfemcr89_pairedMapped_sorted.bam", + "url": "/global/cfs/cdirs/m3408/results/nmdc:omprc-11-bn8jcq58/nmdc:dobj-11-sqshnm40/nmdc_dobj-11-sqshnm40_pairedMapped_sorted.bam", "type": "nmdc:Data_Object" } ], @@ -127,7 +127,7 @@ "gold:Gp0115663" ], "has_output": [ - "nmdc:dobj-11-6bpfeq94" + "nmdc:dobj-11-4vnpfh80" ], "insdc_experiment_identifiers": [], "instrument_name": null, @@ -211,7 +211,7 @@ "reaction_activity_set": [], "read_qc_analysis_activity_set": [ { - "id": "nmdc:wfrqc-11-rssxx935", + "id": "nmdc:wfrqc-11-5d5fmd43", "name": "Read QC Activity for nmdc:omprc-11-bn8jcq58", "started_at_time": "2021-10-11T02:28:26Z", "ended_at_time": "2021-10-11T04:56:04+00:00", @@ -220,11 +220,11 @@ "execution_resource": "NERSC - Perlmutter", "git_url": "https://github.com/microbiomedata/ReadsQC", "has_input": [ - "nmdc:dobj-11-6bpfeq94" + "nmdc:dobj-11-4vnpfh80" ], "has_output": [ - "nmdc:dobj-11-dqtqva56", - "nmdc:dobj-11-450z1443" + "nmdc:dobj-11-jt200525", + "nmdc:dobj-11-qvxhra11" ], "type": "nmdc:ReadQcAnalysisActivity", "part_of": [ diff --git a/nmdc_automation/re_iding/scripts/re_id_tool.py b/nmdc_automation/re_iding/scripts/re_id_tool.py index 1da55380..2ec0ce77 100755 --- a/nmdc_automation/re_iding/scripts/re_id_tool.py +++ b/nmdc_automation/re_iding/scripts/re_id_tool.py @@ -203,6 +203,8 @@ def process_records(ctx, dryrun, study_id, data_dir): new_db = reid_tool.update_reads_qc_analysis_activity_set(db_record, new_db) # update Metagenome Assembly new_db = reid_tool.update_metagenome_assembly_set(db_record, new_db) + # update Read Based Taxonomy Analysis + new_db = reid_tool.update_read_based_taxonomy_analysis_activity_set(db_record, new_db) re_ided_db_records.append(new_db) From fe3f3d92004feded9f57f0bd10c39e3ed61017d0 Mon Sep 17 00:00:00 2001 From: Michael Thornton Date: Mon, 13 Nov 2023 12:07:50 -0800 Subject: [PATCH 51/91] Handle missing data_object_type --- nmdc_automation/re_iding/base.py | 19 +++---- nmdc_automation/re_iding/db_utils.py | 2 +- nmdc_automation/re_iding/file_utils.py | 38 ++++++++++++++ .../data/dryrun_re_ided_record_dump.json | 49 ++++++++++++------- .../re_iding/scripts/re_id_tool.py | 5 +- 5 files changed, 84 insertions(+), 29 deletions(-) create mode 100644 nmdc_automation/re_iding/file_utils.py diff --git a/nmdc_automation/re_iding/base.py b/nmdc_automation/re_iding/base.py index 9f4756a1..0c23272a 100644 --- a/nmdc_automation/re_iding/base.py +++ b/nmdc_automation/re_iding/base.py @@ -20,6 +20,7 @@ check_for_single_omics_processing_record, get_data_object_record_by_id, get_omics_processing_id) +from nmdc_automation.re_iding.file_utils import find_data_object_type NAPA_TEMPLATE = "../../../configs/re_iding_worklfows.yaml" BASE_DIR = "/global/cfs/cdirs/m3408/results" @@ -142,8 +143,10 @@ def update_reads_qc_analysis_activity_set(self, db_record: Dict, for old_do_id in reads_qc_rec["has_output"]: logger.info(f"old_do_id: {old_do_id}") old_do_rec = get_data_object_record_by_id(db_record, old_do_id) + data_object_type = find_data_object_type(old_do_rec) new_do = self._make_new_data_object( - omics_processing_id, activity_type, old_do_rec + omics_processing_id, activity_type, old_do_rec, + data_object_type, ) # add new data object to new database and update has_output new_db.data_object_set.append(new_do) @@ -184,14 +187,11 @@ def update_metagenome_assembly_set(self, db_record: Dict, for old_do_id in assembly_rec["has_output"]: logger.info(f"old_do_id: {old_do_id}") old_do_rec = get_data_object_record_by_id(db_record, old_do_id) - # TODO we need to handle missing data_object_type - until - # then though.. - if not old_do_rec.get("data_object_type"): - logger.warning(f"Skipping {old_do_id} - no " - f"data_object_type") + data_object_type = find_data_object_type(old_do_rec) + if not data_object_type: continue new_do = self._make_new_data_object( - omics_processing_id, activity_type, old_do_rec + omics_processing_id, activity_type, old_do_rec, data_object_type ) # add new data object to new database and update has_output new_db.data_object_set.append(new_do) @@ -249,11 +249,12 @@ def _make_new_activity_set_object(self, omics_processing_id: str, return activity def _make_new_data_object(self, omics_processing_id: str, - activity_type: str, data_object_rec: Dict) -> NmdcDataObject: + activity_type: str, + data_object_rec: Dict, + data_object_type: str) -> NmdcDataObject: """ Return a new data object with updated IDs. """ - data_object_type = data_object_rec.get("data_object_type") template = self.data_object_template( activity_type, data_object_type ) diff --git a/nmdc_automation/re_iding/db_utils.py b/nmdc_automation/re_iding/db_utils.py index 6f99fb4b..dee607eb 100644 --- a/nmdc_automation/re_iding/db_utils.py +++ b/nmdc_automation/re_iding/db_utils.py @@ -1,6 +1,6 @@ # nmdc_automation/nmdc_automation/re_iding/db_utils.py """ -nmdc_db_utils.py: Provides utility functions for working with NMDC Database +db_utils.py: Provides utility functions for working with NMDC Database records and data objects as dicts. """ from dataclasses import dataclass diff --git a/nmdc_automation/re_iding/file_utils.py b/nmdc_automation/re_iding/file_utils.py new file mode 100644 index 00000000..b1ad8342 --- /dev/null +++ b/nmdc_automation/re_iding/file_utils.py @@ -0,0 +1,38 @@ +# nmdc_automation/re_iding/file_utils.py +""" +file_utils.py: Provides utility functions for working with files. +""" +import logging +from typing import Dict, Optional + +logging.basicConfig( + level=logging.INFO, + format='%(asctime)s - %(name)s - %(levelname)s - %(message)s', + handlers=[logging.StreamHandler()] +) + +logger = logging.getLogger(__name__) + +def find_data_object_type(data_object_rec: Dict)-> Optional[str]: + """ + Determine the data_object_type for a DO record based on its URL extension. + + Args: + - data_object_record (dict): Dictionary containing the 'url' key which + will be inspected to determine the data type. + + Returns: + - str: The determined data type or None if the type could not be determined. + """ + if "data_object_type" in data_object_rec: + return data_object_rec["data_object_type"] + url = data_object_rec["url"] + if url.endswith("_covstats.txt"): + return "Assembly Coverage Stats" + elif url.endswith("_gottcha2_report.tsv"): + return "GOTTCHA2 Classification Report" + elif url.endswith("_gottcha2_report_full.tsv"): + return "GOTTCHA2 Report Full" + else: + logger.error(f"Missing type: {url}") + return None \ No newline at end of file diff --git a/nmdc_automation/re_iding/scripts/data/dryrun_re_ided_record_dump.json b/nmdc_automation/re_iding/scripts/data/dryrun_re_ided_record_dump.json index 524ca2e2..99614bfd 100644 --- a/nmdc_automation/re_iding/scripts/data/dryrun_re_ided_record_dump.json +++ b/nmdc_automation/re_iding/scripts/data/dryrun_re_ided_record_dump.json @@ -8,7 +8,7 @@ "biosample_set": [], "data_object_set": [ { - "id": "nmdc:dobj-11-4vnpfh80", + "id": "nmdc:dobj-11-7ytdgk21", "name": "9422.8.132674.GTTTCG.fastq.gz", "description": "Raw sequencer read data", "alternative_identifiers": [], @@ -21,7 +21,7 @@ "type": "nmdc:DataObject" }, { - "id": "nmdc:dobj-11-jt200525", + "id": "nmdc:dobj-11-xt71bd89", "name": "Reads QC result fastq (clean data)", "description": "Filtered Reads for nmdc:omprc-11-bn8jcq58", "alternative_identifiers": [], @@ -30,11 +30,11 @@ "data_object_type": null, "compression_type": null, "was_generated_by": null, - "url": "/global/cfs/cdirs/m3408/results/nmdc:omprc-11-bn8jcq58/nmdc:dobj-11-jt200525/nmdc_dobj-11-jt200525_filtered.fastq.gz", + "url": "/global/cfs/cdirs/m3408/results/nmdc:omprc-11-bn8jcq58/nmdc:dobj-11-xt71bd89/nmdc_dobj-11-xt71bd89_filtered.fastq.gz", "type": "nmdc:Data_Object" }, { - "id": "nmdc:dobj-11-qvxhra11", + "id": "nmdc:dobj-11-3q6mm589", "name": "Reads QC summary statistics", "description": "Filtered Stats for nmdc:omprc-11-bn8jcq58", "alternative_identifiers": [], @@ -43,11 +43,11 @@ "data_object_type": null, "compression_type": null, "was_generated_by": null, - "url": "/global/cfs/cdirs/m3408/results/nmdc:omprc-11-bn8jcq58/nmdc:dobj-11-qvxhra11/nmdc_dobj-11-qvxhra11_filterStats.txt", + "url": "/global/cfs/cdirs/m3408/results/nmdc:omprc-11-bn8jcq58/nmdc:dobj-11-3q6mm589/nmdc_dobj-11-3q6mm589_filterStats.txt", "type": "nmdc:Data_Object" }, { - "id": "nmdc:dobj-11-9b3c8a90", + "id": "nmdc:dobj-11-1g293v37", "name": "Final assembly contigs fasta", "description": "Assembled contigs fasta for nmdc:omprc-11-bn8jcq58", "alternative_identifiers": [], @@ -56,11 +56,11 @@ "data_object_type": null, "compression_type": null, "was_generated_by": null, - "url": "/global/cfs/cdirs/m3408/results/nmdc:omprc-11-bn8jcq58/nmdc:dobj-11-9b3c8a90/nmdc_dobj-11-9b3c8a90_contigs.fna", + "url": "/global/cfs/cdirs/m3408/results/nmdc:omprc-11-bn8jcq58/nmdc:dobj-11-1g293v37/nmdc_dobj-11-1g293v37_contigs.fna", "type": "nmdc:Data_Object" }, { - "id": "nmdc:dobj-11-302hqy22", + "id": "nmdc:dobj-11-9a01ta97", "name": "Final assembly scaffolds fasta", "description": "Assembled scaffold fasta for nmdc:omprc-11-bn8jcq58", "alternative_identifiers": [], @@ -69,11 +69,24 @@ "data_object_type": null, "compression_type": null, "was_generated_by": null, - "url": "/global/cfs/cdirs/m3408/results/nmdc:omprc-11-bn8jcq58/nmdc:dobj-11-302hqy22/nmdc_dobj-11-302hqy22_scaffolds.fna", + "url": "/global/cfs/cdirs/m3408/results/nmdc:omprc-11-bn8jcq58/nmdc:dobj-11-9a01ta97/nmdc_dobj-11-9a01ta97_scaffolds.fna", "type": "nmdc:Data_Object" }, { - "id": "nmdc:dobj-11-8c4dad48", + "id": "nmdc:dobj-11-33scvx87", + "name": "Assembled contigs coverage information", + "description": "Metagenome Contig Coverage Stats for nmdc:omprc-11-bn8jcq58", + "alternative_identifiers": [], + "file_size_bytes": 13412363, + "md5_checksum": "c6d0d4cea985ca6fb50a060e15b4a856", + "data_object_type": null, + "compression_type": null, + "was_generated_by": null, + "url": "/global/cfs/cdirs/m3408/results/nmdc:omprc-11-bn8jcq58/nmdc:dobj-11-33scvx87/nmdc_dobj-11-33scvx87_covstats.txt", + "type": "nmdc:Data_Object" + }, + { + "id": "nmdc:dobj-11-dnhkse46", "name": "An AGP format file that describes the assembly", "description": "Assembled AGP file for nmdc:omprc-11-bn8jcq58", "alternative_identifiers": [], @@ -82,11 +95,11 @@ "data_object_type": null, "compression_type": null, "was_generated_by": null, - "url": "/global/cfs/cdirs/m3408/results/nmdc:omprc-11-bn8jcq58/nmdc:dobj-11-8c4dad48/nmdc_dobj-11-8c4dad48_assembly.agp", + "url": "/global/cfs/cdirs/m3408/results/nmdc:omprc-11-bn8jcq58/nmdc:dobj-11-dnhkse46/nmdc_dobj-11-dnhkse46_assembly.agp", "type": "nmdc:Data_Object" }, { - "id": "nmdc:dobj-11-sqshnm40", + "id": "nmdc:dobj-11-v9rgvc54", "name": "Sorted bam file of reads mapping back to the final assembly", "description": "Metagenome Alignment BAM file for nmdc:omprc-11-bn8jcq58", "alternative_identifiers": [], @@ -95,7 +108,7 @@ "data_object_type": null, "compression_type": null, "was_generated_by": null, - "url": "/global/cfs/cdirs/m3408/results/nmdc:omprc-11-bn8jcq58/nmdc:dobj-11-sqshnm40/nmdc_dobj-11-sqshnm40_pairedMapped_sorted.bam", + "url": "/global/cfs/cdirs/m3408/results/nmdc:omprc-11-bn8jcq58/nmdc:dobj-11-v9rgvc54/nmdc_dobj-11-v9rgvc54_pairedMapped_sorted.bam", "type": "nmdc:Data_Object" } ], @@ -127,7 +140,7 @@ "gold:Gp0115663" ], "has_output": [ - "nmdc:dobj-11-4vnpfh80" + "nmdc:dobj-11-7ytdgk21" ], "insdc_experiment_identifiers": [], "instrument_name": null, @@ -211,7 +224,7 @@ "reaction_activity_set": [], "read_qc_analysis_activity_set": [ { - "id": "nmdc:wfrqc-11-5d5fmd43", + "id": "nmdc:wfrqc-11-jm2edq55", "name": "Read QC Activity for nmdc:omprc-11-bn8jcq58", "started_at_time": "2021-10-11T02:28:26Z", "ended_at_time": "2021-10-11T04:56:04+00:00", @@ -220,11 +233,11 @@ "execution_resource": "NERSC - Perlmutter", "git_url": "https://github.com/microbiomedata/ReadsQC", "has_input": [ - "nmdc:dobj-11-4vnpfh80" + "nmdc:dobj-11-7ytdgk21" ], "has_output": [ - "nmdc:dobj-11-jt200525", - "nmdc:dobj-11-qvxhra11" + "nmdc:dobj-11-xt71bd89", + "nmdc:dobj-11-3q6mm589" ], "type": "nmdc:ReadQcAnalysisActivity", "part_of": [ diff --git a/nmdc_automation/re_iding/scripts/re_id_tool.py b/nmdc_automation/re_iding/scripts/re_id_tool.py index 2ec0ce77..26df0e81 100755 --- a/nmdc_automation/re_iding/scripts/re_id_tool.py +++ b/nmdc_automation/re_iding/scripts/re_id_tool.py @@ -199,8 +199,11 @@ def process_records(ctx, dryrun, study_id, data_dir): new_db = nmdc.Database() # update OmicsProcessing has_output and related DataObject records new_db = reid_tool.update_omics_processing_has_output(db_record, new_db) - # update ReadsQC + # update ReadsQC: + # db records new_db = reid_tool.update_reads_qc_analysis_activity_set(db_record, new_db) + # files + # TODO - update reads qc files # update Metagenome Assembly new_db = reid_tool.update_metagenome_assembly_set(db_record, new_db) # update Read Based Taxonomy Analysis From c78dcb28031180ba993b61b1f788efe6e7fe29f7 Mon Sep 17 00:00:00 2001 From: Michael Thornton Date: Mon, 13 Nov 2023 12:49:59 -0800 Subject: [PATCH 52/91] add data_object_type to new data objects --- nmdc_automation/re_iding/base.py | 77 +- .../data/dryrun_re_ided_record_dump.json | 984 +++++++++++++++++- 2 files changed, 1005 insertions(+), 56 deletions(-) diff --git a/nmdc_automation/re_iding/base.py b/nmdc_automation/re_iding/base.py index 0c23272a..f9ea0831 100644 --- a/nmdc_automation/re_iding/base.py +++ b/nmdc_automation/re_iding/base.py @@ -49,7 +49,12 @@ def _workflow_template_for_type(self, workflow_type: str) -> Dict: Return the workflow template for the given workflow name. """ templates = [] + # There are some inconsistencies in the workflow names between + # template and object records workflow_type = workflow_type.replace("QC", "Qc") + if workflow_type == "nmdc:ReadbasedAnalysis": + workflow_type = "nmdc:ReadBasedTaxonomyAnalysisActivity" + for t in self.workflow_template: type = t["Type"] if type == workflow_type: @@ -157,13 +162,13 @@ def update_reads_qc_analysis_activity_set(self, db_record: Dict, omics_processing_id, reads_qc_rec, has_input, updated_has_output ) # update activity-specific properties - new_reads_qc.input_read_count = reads_qc_rec.get("input_read_count") - new_reads_qc.input_base_count = reads_qc_rec.get("input_base_count") - new_reads_qc.output_read_count = reads_qc_rec.get("output_read_count") - new_reads_qc.output_base_count = reads_qc_rec.get("output_base_count") - new_reads_qc.input_read_bases = reads_qc_rec.get("input_read_bases") - new_reads_qc.output_read_bases = reads_qc_rec.get("output_read_bases") - + unset_properties = [ + p for p in new_reads_qc.__dict__ if not new_reads_qc.__dict__[p] + ] + # check for that value in old record + for p in unset_properties: + if p in reads_qc_rec: + setattr(new_reads_qc, p, reads_qc_rec[p]) new_db.read_qc_analysis_activity_set.append(new_reads_qc) return new_db @@ -198,11 +203,21 @@ def update_metagenome_assembly_set(self, db_record: Dict, updated_has_output.append(new_do.id) # Get new Metagenome Assembly activity set - new_reads_qc = self._make_new_activity_set_object( + new_assembly = self._make_new_activity_set_object( omics_processing_id, assembly_rec, has_input, updated_has_output ) # update activity-specific properties + # get new_assembly properties with no set value + unset_properties = [ + p for p in new_assembly.__dict__ if not new_assembly.__dict__[p] + ] + # check for that value in old record + for p in unset_properties: + if p in assembly_rec: + setattr(new_assembly, p, assembly_rec[p]) + + new_db.metagenome_assembly_set.append(new_assembly) return new_db def update_read_based_taxonomy_analysis_activity_set(self, db_record: Dict, @@ -215,8 +230,42 @@ def update_read_based_taxonomy_analysis_activity_set(self, db_record: Dict, f"{db_record[OMICS_PROCESSING_SET][0]['id']}") new_omics_processing = new_db.omics_processing_set[0] - for readbased_rec in db_record["read_based_taxonomy_analysis_activity_set"]: - pass + for read_based_rec in db_record[ + "read_based_taxonomy_analysis_activity_set"]: + activity_type = "nmdc:ReadBasedTaxonomyAnalysisActivity" + omics_processing_id = new_omics_processing.id + new_assembly = new_db.metagenome_assembly_set[0] + has_input = new_assembly.has_output + updated_has_output = [] + for old_do_id in read_based_rec["has_output"]: + logger.info(f"old_do_id: {old_do_id}") + old_do_rec = get_data_object_record_by_id(db_record, old_do_id) + data_object_type = find_data_object_type(old_do_rec) + if not data_object_type: + continue + new_do = self._make_new_data_object( + omics_processing_id, activity_type, old_do_rec, data_object_type + ) + # add new data object to new database and update has_output + new_db.data_object_set.append(new_do) + updated_has_output.append(new_do.id) + + # Get new ReadBasedTaxonomyAnalysisActivity activity set + new_read_based = self._make_new_activity_set_object( + omics_processing_id, read_based_rec, has_input, + updated_has_output + ) + # update activity-specific properties + # get new_read_based properties with no set value + unset_properties = [ + p for p in new_read_based.__dict__ if not new_read_based.__dict__[p] + ] + # check for that value in old record + for p in unset_properties: + if p in read_based_rec: + setattr(new_read_based, p, read_based_rec[p]) + + new_db.read_based_taxonomy_analysis_activity_set.append(new_read_based) return new_db @@ -224,9 +273,14 @@ def _make_new_activity_set_object(self, omics_processing_id: str, activity_set_rec: Dict, has_input: List, has_output: List) -> WorkflowExecutionActivity: """ - Return a new activity set object with updated IDs. + Return a new activity set object with updated IDs and common properties: + - id, name, git_url, version, part_of, execution_resource, + started_at_time, ended_at_time, was_informed_by, type, has_input, + has_output """ activity_type = activity_set_rec["type"].replace("QC", "Qc") + if activity_type == "nmdc:ReadbasedAnalysis": + activity_type = "nmdc:ReadBasedTaxonomyAnalysisActivity" template = self._workflow_template_for_type(activity_type) activity_class = getattr(nmdc, template["ActivityRange"]) new_activity_id = self.api_client.minter(activity_type) @@ -276,6 +330,7 @@ def _make_new_data_object(self, omics_processing_id: str, file_size_bytes=data_object_rec["file_size_bytes"], md5_checksum=data_object_rec["md5_checksum"], url=new_url, + data_object_type=data_object_type, ) return data_object diff --git a/nmdc_automation/re_iding/scripts/data/dryrun_re_ided_record_dump.json b/nmdc_automation/re_iding/scripts/data/dryrun_re_ided_record_dump.json index 99614bfd..d1e04d60 100644 --- a/nmdc_automation/re_iding/scripts/data/dryrun_re_ided_record_dump.json +++ b/nmdc_automation/re_iding/scripts/data/dryrun_re_ided_record_dump.json @@ -8,7 +8,7 @@ "biosample_set": [], "data_object_set": [ { - "id": "nmdc:dobj-11-7ytdgk21", + "id": "nmdc:dobj-11-q4pmsv93", "name": "9422.8.132674.GTTTCG.fastq.gz", "description": "Raw sequencer read data", "alternative_identifiers": [], @@ -21,94 +21,899 @@ "type": "nmdc:DataObject" }, { - "id": "nmdc:dobj-11-xt71bd89", + "id": "nmdc:dobj-11-hfr31e20", "name": "Reads QC result fastq (clean data)", "description": "Filtered Reads for nmdc:omprc-11-bn8jcq58", "alternative_identifiers": [], "file_size_bytes": 2571324879, "md5_checksum": "7bf778baef033d36f118f8591256d6ef", - "data_object_type": null, + "data_object_type": { + "_code": { + "text": "Filtered Sequencing Reads", + "description": "Reads QC result fastq (clean data)", + "meaning": null, + "unit": null, + "is_a": null, + "mixins": [], + "extensions": {}, + "annotations": {}, + "alt_descriptions": {}, + "title": null, + "deprecated": null, + "todos": [], + "notes": [], + "comments": [], + "examples": [], + "in_subset": [], + "from_schema": null, + "imported_from": null, + "source": null, + "in_language": null, + "see_also": [], + "deprecated_element_has_exact_replacement": null, + "deprecated_element_has_possible_replacement": null, + "aliases": [], + "structured_aliases": {}, + "mappings": [], + "exact_mappings": [], + "close_mappings": [], + "related_mappings": [], + "narrow_mappings": [], + "broad_mappings": [], + "created_by": null, + "contributors": [], + "created_on": null, + "last_updated_on": null, + "modified_by": null, + "status": null, + "rank": null, + "categories": [], + "keywords": [] + } + }, "compression_type": null, "was_generated_by": null, - "url": "/global/cfs/cdirs/m3408/results/nmdc:omprc-11-bn8jcq58/nmdc:dobj-11-xt71bd89/nmdc_dobj-11-xt71bd89_filtered.fastq.gz", + "url": "/global/cfs/cdirs/m3408/results/nmdc:omprc-11-bn8jcq58/nmdc:dobj-11-hfr31e20/nmdc_dobj-11-hfr31e20_filtered.fastq.gz", "type": "nmdc:Data_Object" }, { - "id": "nmdc:dobj-11-3q6mm589", + "id": "nmdc:dobj-11-xc5jhn80", "name": "Reads QC summary statistics", "description": "Filtered Stats for nmdc:omprc-11-bn8jcq58", "alternative_identifiers": [], "file_size_bytes": 290, "md5_checksum": "b99ce8adc125c95f0bfdadf36a3f6848", - "data_object_type": null, + "data_object_type": { + "_code": { + "text": "QC Statistics", + "description": "Reads QC summary statistics", + "meaning": null, + "unit": null, + "is_a": null, + "mixins": [], + "extensions": {}, + "annotations": {}, + "alt_descriptions": {}, + "title": null, + "deprecated": null, + "todos": [], + "notes": [], + "comments": [], + "examples": [], + "in_subset": [], + "from_schema": null, + "imported_from": null, + "source": null, + "in_language": null, + "see_also": [], + "deprecated_element_has_exact_replacement": null, + "deprecated_element_has_possible_replacement": null, + "aliases": [], + "structured_aliases": {}, + "mappings": [], + "exact_mappings": [], + "close_mappings": [], + "related_mappings": [], + "narrow_mappings": [], + "broad_mappings": [], + "created_by": null, + "contributors": [], + "created_on": null, + "last_updated_on": null, + "modified_by": null, + "status": null, + "rank": null, + "categories": [], + "keywords": [] + } + }, "compression_type": null, "was_generated_by": null, - "url": "/global/cfs/cdirs/m3408/results/nmdc:omprc-11-bn8jcq58/nmdc:dobj-11-3q6mm589/nmdc_dobj-11-3q6mm589_filterStats.txt", + "url": "/global/cfs/cdirs/m3408/results/nmdc:omprc-11-bn8jcq58/nmdc:dobj-11-xc5jhn80/nmdc_dobj-11-xc5jhn80_filterStats.txt", "type": "nmdc:Data_Object" }, { - "id": "nmdc:dobj-11-1g293v37", + "id": "nmdc:dobj-11-wz0zg037", "name": "Final assembly contigs fasta", "description": "Assembled contigs fasta for nmdc:omprc-11-bn8jcq58", "alternative_identifiers": [], "file_size_bytes": 90115831, "md5_checksum": "deddd162bf0128fba13b3bc1ca38d1aa", - "data_object_type": null, + "data_object_type": { + "_code": { + "text": "Assembly Contigs", + "description": "Final assembly contigs fasta", + "meaning": null, + "unit": null, + "is_a": null, + "mixins": [], + "extensions": {}, + "annotations": {}, + "alt_descriptions": {}, + "title": null, + "deprecated": null, + "todos": [], + "notes": [], + "comments": [], + "examples": [], + "in_subset": [], + "from_schema": null, + "imported_from": null, + "source": null, + "in_language": null, + "see_also": [], + "deprecated_element_has_exact_replacement": null, + "deprecated_element_has_possible_replacement": null, + "aliases": [], + "structured_aliases": {}, + "mappings": [], + "exact_mappings": [], + "close_mappings": [], + "related_mappings": [], + "narrow_mappings": [], + "broad_mappings": [], + "created_by": null, + "contributors": [], + "created_on": null, + "last_updated_on": null, + "modified_by": null, + "status": null, + "rank": null, + "categories": [], + "keywords": [] + } + }, + "compression_type": null, + "was_generated_by": null, + "url": "/global/cfs/cdirs/m3408/results/nmdc:omprc-11-bn8jcq58/nmdc:dobj-11-wz0zg037/nmdc_dobj-11-wz0zg037_contigs.fna", + "type": "nmdc:Data_Object" + }, + { + "id": "nmdc:dobj-11-dc20ch29", + "name": "Final assembly scaffolds fasta", + "description": "Assembled scaffold fasta for nmdc:omprc-11-bn8jcq58", + "alternative_identifiers": [], + "file_size_bytes": 89604715, + "md5_checksum": "b3573e3cda5a06611de71ca04c5c14cc", + "data_object_type": { + "_code": { + "text": "Assembly Scaffolds", + "description": "Final assembly scaffolds fasta", + "meaning": null, + "unit": null, + "is_a": null, + "mixins": [], + "extensions": {}, + "annotations": {}, + "alt_descriptions": {}, + "title": null, + "deprecated": null, + "todos": [], + "notes": [], + "comments": [], + "examples": [], + "in_subset": [], + "from_schema": null, + "imported_from": null, + "source": null, + "in_language": null, + "see_also": [], + "deprecated_element_has_exact_replacement": null, + "deprecated_element_has_possible_replacement": null, + "aliases": [], + "structured_aliases": {}, + "mappings": [], + "exact_mappings": [], + "close_mappings": [], + "related_mappings": [], + "narrow_mappings": [], + "broad_mappings": [], + "created_by": null, + "contributors": [], + "created_on": null, + "last_updated_on": null, + "modified_by": null, + "status": null, + "rank": null, + "categories": [], + "keywords": [] + } + }, + "compression_type": null, + "was_generated_by": null, + "url": "/global/cfs/cdirs/m3408/results/nmdc:omprc-11-bn8jcq58/nmdc:dobj-11-dc20ch29/nmdc_dobj-11-dc20ch29_scaffolds.fna", + "type": "nmdc:Data_Object" + }, + { + "id": "nmdc:dobj-11-zr4b4z40", + "name": "Assembled contigs coverage information", + "description": "Metagenome Contig Coverage Stats for nmdc:omprc-11-bn8jcq58", + "alternative_identifiers": [], + "file_size_bytes": 13412363, + "md5_checksum": "c6d0d4cea985ca6fb50a060e15b4a856", + "data_object_type": { + "_code": { + "text": "Assembly Coverage Stats", + "description": "Assembled contigs coverage information", + "meaning": null, + "unit": null, + "is_a": null, + "mixins": [], + "extensions": {}, + "annotations": {}, + "alt_descriptions": {}, + "title": null, + "deprecated": null, + "todos": [], + "notes": [], + "comments": [], + "examples": [], + "in_subset": [], + "from_schema": null, + "imported_from": null, + "source": null, + "in_language": null, + "see_also": [], + "deprecated_element_has_exact_replacement": null, + "deprecated_element_has_possible_replacement": null, + "aliases": [], + "structured_aliases": {}, + "mappings": [], + "exact_mappings": [], + "close_mappings": [], + "related_mappings": [], + "narrow_mappings": [], + "broad_mappings": [], + "created_by": null, + "contributors": [], + "created_on": null, + "last_updated_on": null, + "modified_by": null, + "status": null, + "rank": null, + "categories": [], + "keywords": [] + } + }, + "compression_type": null, + "was_generated_by": null, + "url": "/global/cfs/cdirs/m3408/results/nmdc:omprc-11-bn8jcq58/nmdc:dobj-11-zr4b4z40/nmdc_dobj-11-zr4b4z40_covstats.txt", + "type": "nmdc:Data_Object" + }, + { + "id": "nmdc:dobj-11-fe6kea76", + "name": "An AGP format file that describes the assembly", + "description": "Assembled AGP file for nmdc:omprc-11-bn8jcq58", + "alternative_identifiers": [], + "file_size_bytes": 12542171, + "md5_checksum": "f450e3800e17691d5874c89fc46c186a", + "data_object_type": { + "_code": { + "text": "Assembly AGP", + "description": "An AGP format file that describes the assembly", + "meaning": null, + "unit": null, + "is_a": null, + "mixins": [], + "extensions": {}, + "annotations": {}, + "alt_descriptions": {}, + "title": null, + "deprecated": null, + "todos": [], + "notes": [], + "comments": [], + "examples": [], + "in_subset": [], + "from_schema": null, + "imported_from": null, + "source": null, + "in_language": null, + "see_also": [], + "deprecated_element_has_exact_replacement": null, + "deprecated_element_has_possible_replacement": null, + "aliases": [], + "structured_aliases": {}, + "mappings": [], + "exact_mappings": [], + "close_mappings": [], + "related_mappings": [], + "narrow_mappings": [], + "broad_mappings": [], + "created_by": null, + "contributors": [], + "created_on": null, + "last_updated_on": null, + "modified_by": null, + "status": null, + "rank": null, + "categories": [], + "keywords": [] + } + }, + "compression_type": null, + "was_generated_by": null, + "url": "/global/cfs/cdirs/m3408/results/nmdc:omprc-11-bn8jcq58/nmdc:dobj-11-fe6kea76/nmdc_dobj-11-fe6kea76_assembly.agp", + "type": "nmdc:Data_Object" + }, + { + "id": "nmdc:dobj-11-f8bj1b18", + "name": "Sorted bam file of reads mapping back to the final assembly", + "description": "Metagenome Alignment BAM file for nmdc:omprc-11-bn8jcq58", + "alternative_identifiers": [], + "file_size_bytes": 2773429299, + "md5_checksum": "31dc958d116d02122509e90b0883954f", + "data_object_type": { + "_code": { + "text": "Assembly Coverage BAM", + "description": "Sorted bam file of reads mapping back to the final assembly", + "meaning": null, + "unit": null, + "is_a": null, + "mixins": [], + "extensions": {}, + "annotations": {}, + "alt_descriptions": {}, + "title": null, + "deprecated": null, + "todos": [], + "notes": [], + "comments": [], + "examples": [], + "in_subset": [], + "from_schema": null, + "imported_from": null, + "source": null, + "in_language": null, + "see_also": [], + "deprecated_element_has_exact_replacement": null, + "deprecated_element_has_possible_replacement": null, + "aliases": [], + "structured_aliases": {}, + "mappings": [], + "exact_mappings": [], + "close_mappings": [], + "related_mappings": [], + "narrow_mappings": [], + "broad_mappings": [], + "created_by": null, + "contributors": [], + "created_on": null, + "last_updated_on": null, + "modified_by": null, + "status": null, + "rank": null, + "categories": [], + "keywords": [] + } + }, + "compression_type": null, + "was_generated_by": null, + "url": "/global/cfs/cdirs/m3408/results/nmdc:omprc-11-bn8jcq58/nmdc:dobj-11-f8bj1b18/nmdc_dobj-11-f8bj1b18_pairedMapped_sorted.bam", + "type": "nmdc:Data_Object" + }, + { + "id": "nmdc:dobj-11-rw9dtn21", + "name": "GOTTCHA2 classification report file", + "description": "Gottcha2 TSV report for nmdc:omprc-11-bn8jcq58", + "alternative_identifiers": [], + "file_size_bytes": 13174, + "md5_checksum": "bc7c1bda004aab357c8f6cf5a42242f9", + "data_object_type": { + "_code": { + "text": "GOTTCHA2 Classification Report", + "description": "GOTTCHA2 classification report file", + "meaning": null, + "unit": null, + "is_a": null, + "mixins": [], + "extensions": {}, + "annotations": {}, + "alt_descriptions": {}, + "title": null, + "deprecated": null, + "todos": [], + "notes": [], + "comments": [], + "examples": [], + "in_subset": [], + "from_schema": null, + "imported_from": null, + "source": null, + "in_language": null, + "see_also": [], + "deprecated_element_has_exact_replacement": null, + "deprecated_element_has_possible_replacement": null, + "aliases": [], + "structured_aliases": {}, + "mappings": [], + "exact_mappings": [], + "close_mappings": [], + "related_mappings": [], + "narrow_mappings": [], + "broad_mappings": [], + "created_by": null, + "contributors": [], + "created_on": null, + "last_updated_on": null, + "modified_by": null, + "status": null, + "rank": null, + "categories": [], + "keywords": [] + } + }, + "compression_type": null, + "was_generated_by": null, + "url": "/global/cfs/cdirs/m3408/results/nmdc:omprc-11-bn8jcq58/nmdc:dobj-11-rw9dtn21/nmdc_dobj-11-rw9dtn21_gottcha2_report.tsv", + "type": "nmdc:Data_Object" + }, + { + "id": "nmdc:dobj-11-0wqjwr10", + "name": "GOTTCHA2 report file", + "description": "Gottcha2 full TSV report for nmdc:omprc-11-bn8jcq58", + "alternative_identifiers": [], + "file_size_bytes": 1035818, + "md5_checksum": "9481434cadd0d6c154e2ec4c11ef0e04", + "data_object_type": { + "_code": { + "text": "GOTTCHA2 Report Full", + "description": "GOTTCHA2 report file", + "meaning": null, + "unit": null, + "is_a": null, + "mixins": [], + "extensions": {}, + "annotations": {}, + "alt_descriptions": {}, + "title": null, + "deprecated": null, + "todos": [], + "notes": [], + "comments": [], + "examples": [], + "in_subset": [], + "from_schema": null, + "imported_from": null, + "source": null, + "in_language": null, + "see_also": [], + "deprecated_element_has_exact_replacement": null, + "deprecated_element_has_possible_replacement": null, + "aliases": [], + "structured_aliases": {}, + "mappings": [], + "exact_mappings": [], + "close_mappings": [], + "related_mappings": [], + "narrow_mappings": [], + "broad_mappings": [], + "created_by": null, + "contributors": [], + "created_on": null, + "last_updated_on": null, + "modified_by": null, + "status": null, + "rank": null, + "categories": [], + "keywords": [] + } + }, + "compression_type": null, + "was_generated_by": null, + "url": "/global/cfs/cdirs/m3408/results/nmdc:omprc-11-bn8jcq58/nmdc:dobj-11-0wqjwr10/nmdc_dobj-11-0wqjwr10_gottcha2_report_full.tsv", + "type": "nmdc:Data_Object" + }, + { + "id": "nmdc:dobj-11-405tgq07", + "name": "GOTTCHA2 krona plot HTML file", + "description": "Gottcha2 Krona HTML report for nmdc:omprc-11-bn8jcq58", + "alternative_identifiers": [], + "file_size_bytes": 262669, + "md5_checksum": "6b5bc6ce7f11c1336a5f85a98fc18541", + "data_object_type": { + "_code": { + "text": "GOTTCHA2 Krona Plot", + "description": "GOTTCHA2 krona plot HTML file", + "meaning": null, + "unit": null, + "is_a": null, + "mixins": [], + "extensions": {}, + "annotations": {}, + "alt_descriptions": {}, + "title": null, + "deprecated": null, + "todos": [], + "notes": [], + "comments": [], + "examples": [], + "in_subset": [], + "from_schema": null, + "imported_from": null, + "source": null, + "in_language": null, + "see_also": [], + "deprecated_element_has_exact_replacement": null, + "deprecated_element_has_possible_replacement": null, + "aliases": [], + "structured_aliases": {}, + "mappings": [], + "exact_mappings": [], + "close_mappings": [], + "related_mappings": [], + "narrow_mappings": [], + "broad_mappings": [], + "created_by": null, + "contributors": [], + "created_on": null, + "last_updated_on": null, + "modified_by": null, + "status": null, + "rank": null, + "categories": [], + "keywords": [] + } + }, + "compression_type": null, + "was_generated_by": null, + "url": "/global/cfs/cdirs/m3408/results/nmdc:omprc-11-bn8jcq58/nmdc:dobj-11-405tgq07/nmdc_dobj-11-405tgq07_gottcha2_krona.html", + "type": "nmdc:Data_Object" + }, + { + "id": "nmdc:dobj-11-h7mbw370", + "name": "Centrifuge output read classification file", + "description": "Centrifuge classification TSV report for nmdc:omprc-11-bn8jcq58", + "alternative_identifiers": [], + "file_size_bytes": 2189843623, + "md5_checksum": "933c71bbc2f4a2e84d50f0d3864cf940", + "data_object_type": { + "_code": { + "text": "Centrifuge Taxonomic Classification", + "description": "Centrifuge output read classification file", + "meaning": null, + "unit": null, + "is_a": null, + "mixins": [], + "extensions": {}, + "annotations": {}, + "alt_descriptions": {}, + "title": null, + "deprecated": null, + "todos": [], + "notes": [], + "comments": [], + "examples": [], + "in_subset": [], + "from_schema": null, + "imported_from": null, + "source": null, + "in_language": null, + "see_also": [], + "deprecated_element_has_exact_replacement": null, + "deprecated_element_has_possible_replacement": null, + "aliases": [], + "structured_aliases": {}, + "mappings": [], + "exact_mappings": [], + "close_mappings": [], + "related_mappings": [], + "narrow_mappings": [], + "broad_mappings": [], + "created_by": null, + "contributors": [], + "created_on": null, + "last_updated_on": null, + "modified_by": null, + "status": null, + "rank": null, + "categories": [], + "keywords": [] + } + }, + "compression_type": null, + "was_generated_by": null, + "url": "/global/cfs/cdirs/m3408/results/nmdc:omprc-11-bn8jcq58/nmdc:dobj-11-h7mbw370/nmdc_dobj-11-h7mbw370_centrifuge_classification.tsv", + "type": "nmdc:Data_Object" + }, + { + "id": "nmdc:dobj-11-8a9rmt95", + "name": "Centrifuge output report file", + "description": "Centrifuge TSV report for nmdc:omprc-11-bn8jcq58", + "alternative_identifiers": [], + "file_size_bytes": 260134, + "md5_checksum": "1a208e2519770ef50740ac39f1b9ba9a", + "data_object_type": { + "_code": { + "text": "Centrifuge Classification Report", + "description": "Centrifuge output report file", + "meaning": null, + "unit": null, + "is_a": null, + "mixins": [], + "extensions": {}, + "annotations": {}, + "alt_descriptions": {}, + "title": null, + "deprecated": null, + "todos": [], + "notes": [], + "comments": [], + "examples": [], + "in_subset": [], + "from_schema": null, + "imported_from": null, + "source": null, + "in_language": null, + "see_also": [], + "deprecated_element_has_exact_replacement": null, + "deprecated_element_has_possible_replacement": null, + "aliases": [], + "structured_aliases": {}, + "mappings": [], + "exact_mappings": [], + "close_mappings": [], + "related_mappings": [], + "narrow_mappings": [], + "broad_mappings": [], + "created_by": null, + "contributors": [], + "created_on": null, + "last_updated_on": null, + "modified_by": null, + "status": null, + "rank": null, + "categories": [], + "keywords": [] + } + }, "compression_type": null, "was_generated_by": null, - "url": "/global/cfs/cdirs/m3408/results/nmdc:omprc-11-bn8jcq58/nmdc:dobj-11-1g293v37/nmdc_dobj-11-1g293v37_contigs.fna", + "url": "/global/cfs/cdirs/m3408/results/nmdc:omprc-11-bn8jcq58/nmdc:dobj-11-8a9rmt95/nmdc_dobj-11-8a9rmt95_centrifuge_report.tsv", "type": "nmdc:Data_Object" }, { - "id": "nmdc:dobj-11-9a01ta97", - "name": "Final assembly scaffolds fasta", - "description": "Assembled scaffold fasta for nmdc:omprc-11-bn8jcq58", + "id": "nmdc:dobj-11-wbynk051", + "name": "Centrifug krona plot HTML file", + "description": "Centrifuge Krona HTML report for nmdc:omprc-11-bn8jcq58", "alternative_identifiers": [], - "file_size_bytes": 89604715, - "md5_checksum": "b3573e3cda5a06611de71ca04c5c14cc", - "data_object_type": null, + "file_size_bytes": 2343980, + "md5_checksum": "f112a3840464ae7a9cf4a3bf295edd5c", + "data_object_type": { + "_code": { + "text": "Centrifuge Krona Plot", + "description": "Centrifuge krona plot HTML file", + "meaning": null, + "unit": null, + "is_a": null, + "mixins": [], + "extensions": {}, + "annotations": {}, + "alt_descriptions": {}, + "title": null, + "deprecated": null, + "todos": [], + "notes": [], + "comments": [], + "examples": [], + "in_subset": [], + "from_schema": null, + "imported_from": null, + "source": null, + "in_language": null, + "see_also": [], + "deprecated_element_has_exact_replacement": null, + "deprecated_element_has_possible_replacement": null, + "aliases": [], + "structured_aliases": {}, + "mappings": [], + "exact_mappings": [], + "close_mappings": [], + "related_mappings": [], + "narrow_mappings": [], + "broad_mappings": [], + "created_by": null, + "contributors": [], + "created_on": null, + "last_updated_on": null, + "modified_by": null, + "status": null, + "rank": null, + "categories": [], + "keywords": [] + } + }, "compression_type": null, "was_generated_by": null, - "url": "/global/cfs/cdirs/m3408/results/nmdc:omprc-11-bn8jcq58/nmdc:dobj-11-9a01ta97/nmdc_dobj-11-9a01ta97_scaffolds.fna", + "url": "/global/cfs/cdirs/m3408/results/nmdc:omprc-11-bn8jcq58/nmdc:dobj-11-wbynk051/nmdc_dobj-11-wbynk051_centrifuge_krona.html", "type": "nmdc:Data_Object" }, { - "id": "nmdc:dobj-11-33scvx87", - "name": "Assembled contigs coverage information", - "description": "Metagenome Contig Coverage Stats for nmdc:omprc-11-bn8jcq58", + "id": "nmdc:dobj-11-wpw7gs92", + "name": "Kraken2 output read classification file", + "description": "Kraken classification TSV report for nmdc:omprc-11-bn8jcq58", "alternative_identifiers": [], - "file_size_bytes": 13412363, - "md5_checksum": "c6d0d4cea985ca6fb50a060e15b4a856", - "data_object_type": null, + "file_size_bytes": 1785563917, + "md5_checksum": "7ca01ea379f0baed96f87d1435925f95", + "data_object_type": { + "_code": { + "text": "Kraken2 Taxonomic Classification", + "description": "Kraken2 output read classification file", + "meaning": null, + "unit": null, + "is_a": null, + "mixins": [], + "extensions": {}, + "annotations": {}, + "alt_descriptions": {}, + "title": null, + "deprecated": null, + "todos": [], + "notes": [], + "comments": [], + "examples": [], + "in_subset": [], + "from_schema": null, + "imported_from": null, + "source": null, + "in_language": null, + "see_also": [], + "deprecated_element_has_exact_replacement": null, + "deprecated_element_has_possible_replacement": null, + "aliases": [], + "structured_aliases": {}, + "mappings": [], + "exact_mappings": [], + "close_mappings": [], + "related_mappings": [], + "narrow_mappings": [], + "broad_mappings": [], + "created_by": null, + "contributors": [], + "created_on": null, + "last_updated_on": null, + "modified_by": null, + "status": null, + "rank": null, + "categories": [], + "keywords": [] + } + }, "compression_type": null, "was_generated_by": null, - "url": "/global/cfs/cdirs/m3408/results/nmdc:omprc-11-bn8jcq58/nmdc:dobj-11-33scvx87/nmdc_dobj-11-33scvx87_covstats.txt", + "url": "/global/cfs/cdirs/m3408/results/nmdc:omprc-11-bn8jcq58/nmdc:dobj-11-wpw7gs92/nmdc_dobj-11-wpw7gs92_kraken2_classification.tsv", "type": "nmdc:Data_Object" }, { - "id": "nmdc:dobj-11-dnhkse46", - "name": "An AGP format file that describes the assembly", - "description": "Assembled AGP file for nmdc:omprc-11-bn8jcq58", + "id": "nmdc:dobj-11-s4wxq602", + "name": "Kraken2 output report file", + "description": "Kraken2 TSV report for nmdc:omprc-11-bn8jcq58", "alternative_identifiers": [], - "file_size_bytes": 12542171, - "md5_checksum": "f450e3800e17691d5874c89fc46c186a", - "data_object_type": null, + "file_size_bytes": 699896, + "md5_checksum": "c85f2f2b4a518c4adb23970448a5cb45", + "data_object_type": { + "_code": { + "text": "Kraken2 Classification Report", + "description": "Kraken2 output report file", + "meaning": null, + "unit": null, + "is_a": null, + "mixins": [], + "extensions": {}, + "annotations": {}, + "alt_descriptions": {}, + "title": null, + "deprecated": null, + "todos": [], + "notes": [], + "comments": [], + "examples": [], + "in_subset": [], + "from_schema": null, + "imported_from": null, + "source": null, + "in_language": null, + "see_also": [], + "deprecated_element_has_exact_replacement": null, + "deprecated_element_has_possible_replacement": null, + "aliases": [], + "structured_aliases": {}, + "mappings": [], + "exact_mappings": [], + "close_mappings": [], + "related_mappings": [], + "narrow_mappings": [], + "broad_mappings": [], + "created_by": null, + "contributors": [], + "created_on": null, + "last_updated_on": null, + "modified_by": null, + "status": null, + "rank": null, + "categories": [], + "keywords": [] + } + }, "compression_type": null, "was_generated_by": null, - "url": "/global/cfs/cdirs/m3408/results/nmdc:omprc-11-bn8jcq58/nmdc:dobj-11-dnhkse46/nmdc_dobj-11-dnhkse46_assembly.agp", + "url": "/global/cfs/cdirs/m3408/results/nmdc:omprc-11-bn8jcq58/nmdc:dobj-11-s4wxq602/nmdc_dobj-11-s4wxq602_kraken2_report.tsv", "type": "nmdc:Data_Object" }, { - "id": "nmdc:dobj-11-v9rgvc54", - "name": "Sorted bam file of reads mapping back to the final assembly", - "description": "Metagenome Alignment BAM file for nmdc:omprc-11-bn8jcq58", + "id": "nmdc:dobj-11-vf207x18", + "name": "Kraken2 Krona plot HTML file", + "description": "Kraken2 Krona HTML report for nmdc:omprc-11-bn8jcq58", "alternative_identifiers": [], - "file_size_bytes": 2773429299, - "md5_checksum": "31dc958d116d02122509e90b0883954f", - "data_object_type": null, + "file_size_bytes": 4221977, + "md5_checksum": "94ee1bc2dc74830a21d5c3471d6cf223", + "data_object_type": { + "_code": { + "text": "Kraken2 Krona Plot", + "description": "Kraken2 krona plot HTML file", + "meaning": null, + "unit": null, + "is_a": null, + "mixins": [], + "extensions": {}, + "annotations": {}, + "alt_descriptions": {}, + "title": null, + "deprecated": null, + "todos": [], + "notes": [], + "comments": [], + "examples": [], + "in_subset": [], + "from_schema": null, + "imported_from": null, + "source": null, + "in_language": null, + "see_also": [], + "deprecated_element_has_exact_replacement": null, + "deprecated_element_has_possible_replacement": null, + "aliases": [], + "structured_aliases": {}, + "mappings": [], + "exact_mappings": [], + "close_mappings": [], + "related_mappings": [], + "narrow_mappings": [], + "broad_mappings": [], + "created_by": null, + "contributors": [], + "created_on": null, + "last_updated_on": null, + "modified_by": null, + "status": null, + "rank": null, + "categories": [], + "keywords": [] + } + }, "compression_type": null, "was_generated_by": null, - "url": "/global/cfs/cdirs/m3408/results/nmdc:omprc-11-bn8jcq58/nmdc:dobj-11-v9rgvc54/nmdc_dobj-11-v9rgvc54_pairedMapped_sorted.bam", + "url": "/global/cfs/cdirs/m3408/results/nmdc:omprc-11-bn8jcq58/nmdc:dobj-11-vf207x18/nmdc_dobj-11-vf207x18_kraken2_krona.html", "type": "nmdc:Data_Object" } ], @@ -120,7 +925,62 @@ "material_sampling_activity_set": [], "metabolomics_analysis_activity_set": [], "metagenome_annotation_activity_set": [], - "metagenome_assembly_set": [], + "metagenome_assembly_set": [ + { + "id": "nmdc:wfmgas-11-wa0zvp69", + "name": "Metagenome Assembly Activity for nmdc:omprc-11-bn8jcq58", + "started_at_time": "2021-10-11T02:28:26Z", + "ended_at_time": "2021-10-11T04:56:04+00:00", + "was_informed_by": "nmdc:omprc-11-bn8jcq58", + "used": null, + "execution_resource": "NERSC - Perlmutter", + "git_url": "https://github.com/microbiomedata/metaAssembly", + "has_input": [ + "nmdc:dobj-11-hfr31e20", + "nmdc:dobj-11-xc5jhn80" + ], + "has_output": [ + "nmdc:dobj-11-wz0zg037", + "nmdc:dobj-11-dc20ch29", + "nmdc:dobj-11-zr4b4z40", + "nmdc:dobj-11-fe6kea76", + "nmdc:dobj-11-f8bj1b18" + ], + "type": "nmdc:MetagenomeAssembly", + "part_of": [ + "nmdc:omprc-11-bn8jcq58" + ], + "version": "v1.0.3", + "asm_score": 6.577, + "scaffolds": 169645, + "scaf_logsum": 215363, + "scaf_powsum": 24422, + "scaf_max": 68135, + "scaf_bp": 83496490, + "scaf_n50": 45550, + "scaf_n90": 141870, + "scaf_l50": 470, + "scaf_l90": 290, + "scaf_n_gt50k": 1, + "scaf_l_gt50k": 68135, + "scaf_pct_gt50k": 0.08160224, + "contigs": 169784, + "contig_bp": 83494920, + "ctg_n50": 45584, + "ctg_l50": 470, + "ctg_n90": 141996, + "ctg_l90": 290, + "ctg_logsum": 214373, + "ctg_powsum": 24284, + "ctg_max": 68135, + "gap_pct": 0.00188, + "gc_std": 0.11726, + "gc_avg": 0.46001, + "num_input_reads": null, + "num_aligned_reads": null, + "insdc_assembly_identifiers": null + } + ], "metagenome_sequencing_activity_set": [], "metaproteomics_analysis_activity_set": [], "metatranscriptome_activity_set": [], @@ -140,7 +1000,7 @@ "gold:Gp0115663" ], "has_output": [ - "nmdc:dobj-11-7ytdgk21" + "nmdc:dobj-11-q4pmsv93" ], "insdc_experiment_identifiers": [], "instrument_name": null, @@ -224,7 +1084,7 @@ "reaction_activity_set": [], "read_qc_analysis_activity_set": [ { - "id": "nmdc:wfrqc-11-jm2edq55", + "id": "nmdc:wfrqc-11-wq4r6842", "name": "Read QC Activity for nmdc:omprc-11-bn8jcq58", "started_at_time": "2021-10-11T02:28:26Z", "ended_at_time": "2021-10-11T04:56:04+00:00", @@ -233,11 +1093,11 @@ "execution_resource": "NERSC - Perlmutter", "git_url": "https://github.com/microbiomedata/ReadsQC", "has_input": [ - "nmdc:dobj-11-7ytdgk21" + "nmdc:dobj-11-q4pmsv93" ], "has_output": [ - "nmdc:dobj-11-xt71bd89", - "nmdc:dobj-11-3q6mm589" + "nmdc:dobj-11-hfr31e20", + "nmdc:dobj-11-xc5jhn80" ], "type": "nmdc:ReadQcAnalysisActivity", "part_of": [ @@ -252,7 +1112,41 @@ "output_read_bases": 4608772924 } ], - "read_based_taxonomy_analysis_activity_set": [], + "read_based_taxonomy_analysis_activity_set": [ + { + "id": "nmdc:wfrbt-11-psqmn810", + "name": "Readbased Taxonomy Analysis Activity for nmdc:omprc-11-bn8jcq58", + "started_at_time": "2021-10-11T02:28:26Z", + "ended_at_time": "2021-10-11T04:56:04+00:00", + "was_informed_by": "nmdc:omprc-11-bn8jcq58", + "used": null, + "execution_resource": "NERSC - Perlmutter", + "git_url": "https://github.com/microbiomedata/ReadbasedAnalysis", + "has_input": [ + "nmdc:dobj-11-wz0zg037", + "nmdc:dobj-11-dc20ch29", + "nmdc:dobj-11-zr4b4z40", + "nmdc:dobj-11-fe6kea76", + "nmdc:dobj-11-f8bj1b18" + ], + "has_output": [ + "nmdc:dobj-11-rw9dtn21", + "nmdc:dobj-11-0wqjwr10", + "nmdc:dobj-11-405tgq07", + "nmdc:dobj-11-h7mbw370", + "nmdc:dobj-11-8a9rmt95", + "nmdc:dobj-11-wbynk051", + "nmdc:dobj-11-wpw7gs92", + "nmdc:dobj-11-s4wxq602", + "nmdc:dobj-11-vf207x18" + ], + "type": "nmdc:ReadBasedTaxonomyAnalysisActivity", + "part_of": [ + "nmdc:omprc-11-bn8jcq58" + ], + "version": "v1.0.5" + } + ], "study_set": [], "field_research_site_set": [], "collecting_biosamples_from_site_set": [], From 38c2dd7c0dfb4dc8b8454f6725753b2f07a75a9d Mon Sep 17 00:00:00 2001 From: Michal Babinski Date: Mon, 13 Nov 2023 14:19:42 -0800 Subject: [PATCH 53/91] exteded file operations for computing new paths and assembly operations --- nmdc_automation/re_iding/file_utils.py | 184 ++++++++++++++++++++++++- 1 file changed, 183 insertions(+), 1 deletion(-) diff --git a/nmdc_automation/re_iding/file_utils.py b/nmdc_automation/re_iding/file_utils.py index b1ad8342..69b14f36 100644 --- a/nmdc_automation/re_iding/file_utils.py +++ b/nmdc_automation/re_iding/file_utils.py @@ -3,8 +3,18 @@ file_utils.py: Provides utility functions for working with files. """ import logging +import os +import hashlib +import json +import gzip +from subprocess import check_output from typing import Dict, Optional + +base_dir = "/global/cfs/cdirs/m3408/results" +bam_script = os.path.abspath("scripts/rewrite_bam.sh") +base = "https://data.microbiomedata.org/data" + logging.basicConfig( level=logging.INFO, format='%(asctime)s - %(name)s - %(levelname)s - %(message)s', @@ -35,4 +45,176 @@ def find_data_object_type(data_object_rec: Dict)-> Optional[str]: return "GOTTCHA2 Report Full" else: logger.error(f"Missing type: {url}") - return None \ No newline at end of file + return None + +def md5_sum(fn): + """ + Calculate the MD5 hash of a file. + + Args: + - fn (str): Path to the file for which the MD5 hash is to be computed. + + Returns: + - str: The MD5 hash of the file. + """ + with open(fn, "rb") as f: + file_hash = hashlib.md5() + while chunk := f.read(8192): + file_hash.update(chunk) + return file_hash.hexdigest() + + +def read_json_file(filename): + """ + Read a JSON file and return its content as a dictionary. + + Parameters: + - filename (str): The path to the JSON file. + + Returns: + - dict: The content of the JSON file. + """ + with open(filename, "r") as json_file: + data = json.load(json_file) + return data + + +def rewrite_id(src, dst, old_id, new_id, prefix=None): + """ + Rewrite lines in a file, replacing occurrences of an old ID with a new ID. + An optional prefix can be specified to limit which lines are modified. + + Args: + - src (str): Source file path. + - dst (str): Destination file path. + - old_id (str): ID to be replaced. + - new_id (str): Replacement ID. + - prefix (str, optional): Prefix character to determine which lines to modify. Defaults to None. + + Returns: + - tuple: MD5 checksum and size (in bytes) of the modified file. + """ + fsrc = open(src) + fdst = open(dst, "w") + for line in fsrc: + if not prefix or (prefix and line[0] == prefix): + line = line.replace(old_id, new_id) + fdst.write(line) + fsrc.close() + fdst.close() + md5 = md5_sum(dst) + size = os.stat(dst).st_size + return md5, size + + +def find_assembly_id(src): + fsrc = open(src) + line = fsrc.readline() + return "_".join(line[1:].split("_")[0:-1]) + + +def assembly_contigs(src, dst, act_id): + scaf = src.replace("_contigs", "_scaffolds") + old_id = find_assembly_id(scaf) + return rewrite_id(src, dst, old_id, act_id, prefix=">") + + +def assembly_scaffolds(src, dst, act_id): + old_id = find_assembly_id(src) + return rewrite_id(src, dst, old_id, act_id, prefix=">") + + +def assembly_coverage_stats(src, dst, act_id): + scaf = src.replace("_covstats.txt", "_scaffolds.fna") + old_id = find_assembly_id(scaf) + return rewrite_id(src, dst, old_id, act_id) + + +def assembly_agp(src, dst, act_id): + scaf = src.replace("_assembly.agp", "_scaffolds.fna") + old_id = find_assembly_id(scaf) + return rewrite_id(src, dst, old_id, act_id) + + +def convert_script(script, src, dst, old_id, act_id): + cmd = [script, src, dst, old_id, act_id] + results = check_output(cmd) + md5 = md5_sum(dst) + size = os.stat(dst).st_size + return md5, size + + +def assembly_coverage_bam(script, src, dst, act_id): + scaf = src.replace("_pairedMapped_sorted.bam", "_scaffolds.fna") + old_id = find_assembly_id(scaf) + return convert_script(script, src, dst, old_id, act_id) + + +def rewrite_sam(input_sam, output_sam, old_id, new_id): + with gzip.open(input_sam, "rt") as f_in, gzip.open(output_sam, "wt") as f_out: + for line in f_in: + f_out.write(line.replace(old_id, new_id)) + + +def get_old_file_path(data_object_record): + old_url = data_object_record["url"] + suffix = old_url.split("https://data.microbiomedata.org/data/")[1] + old_file_path = base_dir + "/" + suffix + + return old_file_path + + +def assembly_file_operations(data_object_record, destination, act_id): + # get old file path upfront + old_file_path = get_old_file_path(data_object_record) + + if data_object_record["data_object_type"] == "Assembly Coverage Stats": + md5, size = assembly_coverage_stats(old_file_path, destination, act_id) + elif data_object_record["data_object_type"] == "Assembly Contigs": + md5, size = assembly_contigs(old_file_path, destination, act_id) + elif data_object_record["data_object_type"] == "Assembly Scaffolds": + md5, size = assembly_scaffolds(old_file_path, destination, act_id) + elif data_object_record["data_object_type"] == "Assembly AGP": + md5, size = assembly_agp(old_file_path, destination, act_id) + elif data_object_record["data_object_type"] == "Assembly Coverage BAM": + md5, size = assembly_coverage_bam( + bam_script, old_file_path, destination, act_id + ) + + return md5, size + +def get_new_paths(old_url, new_base_dir, act_id): + """ + Use the url to return the string value of name path and url + """ + file_name = old_url.split("/")[-1] + file_extenstion = file_name.lstrip("nmdc_").split("_", maxsplit=1)[-1] + new_file_name = f"{act_id}_{file_extenstion}" + modified_new_file_name = new_file_name.replace(":", "_") + destination = os.path.join(new_base_dir, modified_new_file_name) + + return destination + + +def compute_new_paths(old_url, new_base_dir, act_id): + """ + Use the url to compute the new file name path and url + """ + file_name = old_url.split("/")[-1] + suffix = old_url.split("https://data.microbiomedata.org/data/")[1] + old_file_path = base_dir + "/" + suffix + file_extenstion = file_name.lstrip("nmdc_").split("_", maxsplit=1)[-1] + new_file_name = f"{act_id}_{file_extenstion}" + modified_new_file_name = new_file_name.replace(":", "_") + destination = os.path.join(new_base_dir, modified_new_file_name) + + try: + os.link(old_file_path, destination) + logging.info(f"Successfully created link between {old_file_path} and {destination}") + except OSError as e: + logging.error(f"An error occurred while linking the file: {e}") + except Exception as e: + logging.error(f"Unexpected error: {e}") + + + return destination \ No newline at end of file From e46924dddde0052d11be0e3b6b44bc5c614a0fde Mon Sep 17 00:00:00 2001 From: Michal Babinski Date: Mon, 13 Nov 2023 14:20:22 -0800 Subject: [PATCH 54/91] added file operations and fixed url names --- nmdc_automation/re_iding/base.py | 74 ++++++++++++++++++++++++-------- 1 file changed, 55 insertions(+), 19 deletions(-) diff --git a/nmdc_automation/re_iding/base.py b/nmdc_automation/re_iding/base.py index f9ea0831..47d8bbd5 100644 --- a/nmdc_automation/re_iding/base.py +++ b/nmdc_automation/re_iding/base.py @@ -6,6 +6,7 @@ import copy import logging from typing import Dict, List +import os import re import yaml @@ -20,7 +21,10 @@ check_for_single_omics_processing_record, get_data_object_record_by_id, get_omics_processing_id) -from nmdc_automation.re_iding.file_utils import find_data_object_type +from nmdc_automation.re_iding.file_utils import (find_data_object_type, + compute_new_paths, + get_new_paths, + assembly_file_operations) NAPA_TEMPLATE = "../../../configs/re_iding_worklfows.yaml" BASE_DIR = "/global/cfs/cdirs/m3408/results" @@ -142,6 +146,12 @@ def update_reads_qc_analysis_activity_set(self, db_record: Dict, activity_type = "nmdc:ReadQcAnalysisActivity" omics_processing_id = new_omics_processing.id has_input = new_omics_processing.has_output + + new_activity_id = self.api_client.minter(activity_type) + logging.info("New activity id created for {omics_processing_id} activity type {activity_type}: {new_activity_id}") + + new_readsqc_base_dir = os.path.join(BASE_DIR, omics_processing_id, new_activity_id) + os.makedirs(new_readsqc_base_dir, exist_ok=True) updated_has_output = [] # Get ReadQC data objects and update IDs @@ -149,8 +159,12 @@ def update_reads_qc_analysis_activity_set(self, db_record: Dict, logger.info(f"old_do_id: {old_do_id}") old_do_rec = get_data_object_record_by_id(db_record, old_do_id) data_object_type = find_data_object_type(old_do_rec) + new_file_path = compute_new_paths( + old_do_rec["url"], new_readsqc_base_dir, omics_processing_id, new_activity_id + ) + logging.info(f"New file path computed for {data_object_type}: {new_file_path}") new_do = self._make_new_data_object( - omics_processing_id, activity_type, old_do_rec, + omics_processing_id, activity_type, new_activity_id, old_do_rec, data_object_type, ) # add new data object to new database and update has_output @@ -159,7 +173,7 @@ def update_reads_qc_analysis_activity_set(self, db_record: Dict, # Get new ReadQC activity set new_reads_qc = self._make_new_activity_set_object( - omics_processing_id, reads_qc_rec, has_input, updated_has_output + omics_processing_id, new_activity_id, reads_qc_rec, has_input, updated_has_output ) # update activity-specific properties unset_properties = [ @@ -189,14 +203,28 @@ def update_metagenome_assembly_set(self, db_record: Dict, new_read_qc = new_db.read_qc_analysis_activity_set[0] has_input = new_read_qc.has_output updated_has_output = [] + + new_activity_id = self.api_client.minter(activity_type) + logging.info("New activity id created for {omics_processing_id} activity type {activity_type}: {new_activity_id}") + + new_assembly_base_dir = os.path.join(BASE_DIR, omics_processing_id, new_activity_id) + os.makedirs(new_assembly_base_dir, exist_ok=True) + for old_do_id in assembly_rec["has_output"]: logger.info(f"old_do_id: {old_do_id}") old_do_rec = get_data_object_record_by_id(db_record, old_do_id) data_object_type = find_data_object_type(old_do_rec) if not data_object_type: continue + new_file_path = get_new_paths(old_do_rec["url"],new_assembly_base_dir, new_activity_id) + updated_md5, updated_file_size = assembly_file_operations( + old_do_rec, new_file_path, new_activity_id) + logging.info(f"New file path computed for {data_object_type}: {new_file_path}") + #update md5 and file byte size in place to use _make_new_data_object function without functions + old_do_rec["file_size_bytes"] = updated_file_size + old_do_rec["md5_checksum"] = updated_md5 new_do = self._make_new_data_object( - omics_processing_id, activity_type, old_do_rec, data_object_type + omics_processing_id, activity_type, new_activity_id, old_do_rec, data_object_type ) # add new data object to new database and update has_output new_db.data_object_set.append(new_do) @@ -204,7 +232,7 @@ def update_metagenome_assembly_set(self, db_record: Dict, # Get new Metagenome Assembly activity set new_assembly = self._make_new_activity_set_object( - omics_processing_id, assembly_rec, has_input, + omics_processing_id, new_activity_id, assembly_rec, has_input, updated_has_output ) # update activity-specific properties @@ -234,8 +262,15 @@ def update_read_based_taxonomy_analysis_activity_set(self, db_record: Dict, "read_based_taxonomy_analysis_activity_set"]: activity_type = "nmdc:ReadBasedTaxonomyAnalysisActivity" omics_processing_id = new_omics_processing.id - new_assembly = new_db.metagenome_assembly_set[0] - has_input = new_assembly.has_output + new_read_qc = new_db.read_qc_analysis_activity_set[0] + has_input = new_read_qc.has_output + + new_activity_id = self.api_client.minter(activity_type) + logging.info("New activity id created for {omics_processing_id} activity type {activity_type}: {new_activity_id}") + + new_readbased_base_dir = os.path.join(BASE_DIR, omics_processing_id, new_activity_id) + os.makedirs(new_readbased_base_dir, exist_ok=True) + updated_has_output = [] for old_do_id in read_based_rec["has_output"]: logger.info(f"old_do_id: {old_do_id}") @@ -243,8 +278,12 @@ def update_read_based_taxonomy_analysis_activity_set(self, db_record: Dict, data_object_type = find_data_object_type(old_do_rec) if not data_object_type: continue + new_file_path = compute_new_paths( + old_do_rec["url"], new_readbased_base_dir, omics_processing_id, new_activity_id + ) + logging.info(f"New file path computed for {data_object_type}: {new_file_path}") new_do = self._make_new_data_object( - omics_processing_id, activity_type, old_do_rec, data_object_type + omics_processing_id, activity_type, new_activity_id, old_do_rec, data_object_type ) # add new data object to new database and update has_output new_db.data_object_set.append(new_do) @@ -252,7 +291,7 @@ def update_read_based_taxonomy_analysis_activity_set(self, db_record: Dict, # Get new ReadBasedTaxonomyAnalysisActivity activity set new_read_based = self._make_new_activity_set_object( - omics_processing_id, read_based_rec, has_input, + omics_processing_id, new_activity_id,read_based_rec, has_input, updated_has_output ) # update activity-specific properties @@ -269,7 +308,7 @@ def update_read_based_taxonomy_analysis_activity_set(self, db_record: Dict, return new_db - def _make_new_activity_set_object(self, omics_processing_id: str, + def _make_new_activity_set_object(self, omics_processing_id: str, new_activity_id: str, activity_set_rec: Dict, has_input: List, has_output: List) -> WorkflowExecutionActivity: """ @@ -283,7 +322,7 @@ def _make_new_activity_set_object(self, omics_processing_id: str, activity_type = "nmdc:ReadBasedTaxonomyAnalysisActivity" template = self._workflow_template_for_type(activity_type) activity_class = getattr(nmdc, template["ActivityRange"]) - new_activity_id = self.api_client.minter(activity_type) + logger.info( f"{activity_type}\t{activity_set_rec['id']}\t{new_activity_id}" ) @@ -304,6 +343,7 @@ def _make_new_activity_set_object(self, omics_processing_id: str, def _make_new_data_object(self, omics_processing_id: str, activity_type: str, + new_activity_id: str, data_object_rec: Dict, data_object_type: str) -> NmdcDataObject: """ @@ -318,9 +358,9 @@ def _make_new_data_object(self, omics_processing_id: str, "[^ ]+$", f"{omics_processing_id}", data_object_rec["description"] ) logger.info(f"new_description: {new_description}") - new_filename = self._make_new_filename(new_data_object_id, data_object_rec) + new_filename = self._make_new_filename(new_activity_id, data_object_rec) logger.info(f"new_filename: {new_filename}") - new_url = f"{BASE_DIR}/{omics_processing_id}/{new_data_object_id}/{new_filename}" + new_url = f"{BASE_DIR}/{omics_processing_id}/{new_activity_id}/{new_filename}" data_object = NmdcDataObject( id=new_data_object_id, @@ -334,18 +374,14 @@ def _make_new_data_object(self, omics_processing_id: str, ) return data_object - def _make_new_filename(self, new_data_object_id: str, + def _make_new_filename(self, new_activity_id: str, data_object_record: Dict) -> str: """ Return the updated filename. """ filename = data_object_record["url"].split("/")[-1] file_extenstion = filename.lstrip("nmdc_").split("_", maxsplit=1)[-1] - new_filename = f"{new_data_object_id}_{file_extenstion}".replace(":", + new_filename = f"{new_activity_id}_{file_extenstion}".replace(":", "_") return new_filename - - - - From 0412ed0586eb1f0e56bf24d29a55307d19b06059 Mon Sep 17 00:00:00 2001 From: Michal Babinski Date: Mon, 13 Nov 2023 14:36:40 -0800 Subject: [PATCH 55/91] pass data_object_type into assembly_file_operations to avoid records missing field --- nmdc_automation/re_iding/file_utils.py | 14 +++++++------- 1 file changed, 7 insertions(+), 7 deletions(-) diff --git a/nmdc_automation/re_iding/file_utils.py b/nmdc_automation/re_iding/file_utils.py index 69b14f36..59b877c7 100644 --- a/nmdc_automation/re_iding/file_utils.py +++ b/nmdc_automation/re_iding/file_utils.py @@ -12,7 +12,7 @@ base_dir = "/global/cfs/cdirs/m3408/results" -bam_script = os.path.abspath("scripts/rewrite_bam.sh") +bam_script = os.path.abspath("rewrite_bam.sh") base = "https://data.microbiomedata.org/data" logging.basicConfig( @@ -164,19 +164,19 @@ def get_old_file_path(data_object_record): return old_file_path -def assembly_file_operations(data_object_record, destination, act_id): +def assembly_file_operations(data_object_record, data_object_type, destination, act_id): # get old file path upfront old_file_path = get_old_file_path(data_object_record) - if data_object_record["data_object_type"] == "Assembly Coverage Stats": + if data_object_type == "Assembly Coverage Stats": md5, size = assembly_coverage_stats(old_file_path, destination, act_id) - elif data_object_record["data_object_type"] == "Assembly Contigs": + elif data_object_type == "Assembly Contigs": md5, size = assembly_contigs(old_file_path, destination, act_id) - elif data_object_record["data_object_type"] == "Assembly Scaffolds": + elif data_object_type == "Assembly Scaffolds": md5, size = assembly_scaffolds(old_file_path, destination, act_id) - elif data_object_record["data_object_type"] == "Assembly AGP": + elif data_object_type == "Assembly AGP": md5, size = assembly_agp(old_file_path, destination, act_id) - elif data_object_record["data_object_type"] == "Assembly Coverage BAM": + elif data_object_type == "Assembly Coverage BAM": md5, size = assembly_coverage_bam( bam_script, old_file_path, destination, act_id ) From 74973531f14a191327c002aeee931a7cb89e7d3b Mon Sep 17 00:00:00 2001 From: Michal Babinski Date: Mon, 13 Nov 2023 14:37:38 -0800 Subject: [PATCH 56/91] pass data_object_type directly to assembly_file_operations --- nmdc_automation/re_iding/base.py | 12 ++++++------ 1 file changed, 6 insertions(+), 6 deletions(-) diff --git a/nmdc_automation/re_iding/base.py b/nmdc_automation/re_iding/base.py index 47d8bbd5..66ebab03 100644 --- a/nmdc_automation/re_iding/base.py +++ b/nmdc_automation/re_iding/base.py @@ -148,7 +148,7 @@ def update_reads_qc_analysis_activity_set(self, db_record: Dict, has_input = new_omics_processing.has_output new_activity_id = self.api_client.minter(activity_type) - logging.info("New activity id created for {omics_processing_id} activity type {activity_type}: {new_activity_id}") + logging.info(f"New activity id created for {omics_processing_id} activity type {activity_type}: {new_activity_id}") new_readsqc_base_dir = os.path.join(BASE_DIR, omics_processing_id, new_activity_id) os.makedirs(new_readsqc_base_dir, exist_ok=True) @@ -160,7 +160,7 @@ def update_reads_qc_analysis_activity_set(self, db_record: Dict, old_do_rec = get_data_object_record_by_id(db_record, old_do_id) data_object_type = find_data_object_type(old_do_rec) new_file_path = compute_new_paths( - old_do_rec["url"], new_readsqc_base_dir, omics_processing_id, new_activity_id + old_do_rec["url"], new_readsqc_base_dir, new_activity_id ) logging.info(f"New file path computed for {data_object_type}: {new_file_path}") new_do = self._make_new_data_object( @@ -205,7 +205,7 @@ def update_metagenome_assembly_set(self, db_record: Dict, updated_has_output = [] new_activity_id = self.api_client.minter(activity_type) - logging.info("New activity id created for {omics_processing_id} activity type {activity_type}: {new_activity_id}") + logging.info(f"New activity id created for {omics_processing_id} activity type {activity_type}: {new_activity_id}") new_assembly_base_dir = os.path.join(BASE_DIR, omics_processing_id, new_activity_id) os.makedirs(new_assembly_base_dir, exist_ok=True) @@ -218,7 +218,7 @@ def update_metagenome_assembly_set(self, db_record: Dict, continue new_file_path = get_new_paths(old_do_rec["url"],new_assembly_base_dir, new_activity_id) updated_md5, updated_file_size = assembly_file_operations( - old_do_rec, new_file_path, new_activity_id) + old_do_rec, data_object_type, new_file_path, new_activity_id) logging.info(f"New file path computed for {data_object_type}: {new_file_path}") #update md5 and file byte size in place to use _make_new_data_object function without functions old_do_rec["file_size_bytes"] = updated_file_size @@ -266,7 +266,7 @@ def update_read_based_taxonomy_analysis_activity_set(self, db_record: Dict, has_input = new_read_qc.has_output new_activity_id = self.api_client.minter(activity_type) - logging.info("New activity id created for {omics_processing_id} activity type {activity_type}: {new_activity_id}") + logging.info(f"New activity id created for {omics_processing_id} activity type {activity_type}: {new_activity_id}") new_readbased_base_dir = os.path.join(BASE_DIR, omics_processing_id, new_activity_id) os.makedirs(new_readbased_base_dir, exist_ok=True) @@ -279,7 +279,7 @@ def update_read_based_taxonomy_analysis_activity_set(self, db_record: Dict, if not data_object_type: continue new_file_path = compute_new_paths( - old_do_rec["url"], new_readbased_base_dir, omics_processing_id, new_activity_id + old_do_rec["url"], new_readbased_base_dir, new_activity_id ) logging.info(f"New file path computed for {data_object_type}: {new_file_path}") new_do = self._make_new_data_object( From 5c8a4f0d9cf268307910639034a6f03919752ad1 Mon Sep 17 00:00:00 2001 From: Michal Babinski Date: Mon, 13 Nov 2023 14:39:12 -0800 Subject: [PATCH 57/91] Added with file operations --- .../data/dryrun_re_ided_record_dump.json | 139 +++++++++--------- 1 file changed, 68 insertions(+), 71 deletions(-) diff --git a/nmdc_automation/re_iding/scripts/data/dryrun_re_ided_record_dump.json b/nmdc_automation/re_iding/scripts/data/dryrun_re_ided_record_dump.json index d1e04d60..74e29a57 100644 --- a/nmdc_automation/re_iding/scripts/data/dryrun_re_ided_record_dump.json +++ b/nmdc_automation/re_iding/scripts/data/dryrun_re_ided_record_dump.json @@ -8,7 +8,7 @@ "biosample_set": [], "data_object_set": [ { - "id": "nmdc:dobj-11-q4pmsv93", + "id": "nmdc:dobj-11-bgp6z123", "name": "9422.8.132674.GTTTCG.fastq.gz", "description": "Raw sequencer read data", "alternative_identifiers": [], @@ -21,7 +21,7 @@ "type": "nmdc:DataObject" }, { - "id": "nmdc:dobj-11-hfr31e20", + "id": "nmdc:dobj-11-b20kzg40", "name": "Reads QC result fastq (clean data)", "description": "Filtered Reads for nmdc:omprc-11-bn8jcq58", "alternative_identifiers": [], @@ -73,11 +73,11 @@ }, "compression_type": null, "was_generated_by": null, - "url": "/global/cfs/cdirs/m3408/results/nmdc:omprc-11-bn8jcq58/nmdc:dobj-11-hfr31e20/nmdc_dobj-11-hfr31e20_filtered.fastq.gz", + "url": "/global/cfs/cdirs/m3408/results/nmdc:omprc-11-bn8jcq58/nmdc:wfrqc-11-5sfwhy50/nmdc_wfrqc-11-5sfwhy50_filtered.fastq.gz", "type": "nmdc:Data_Object" }, { - "id": "nmdc:dobj-11-xc5jhn80", + "id": "nmdc:dobj-11-mhccey92", "name": "Reads QC summary statistics", "description": "Filtered Stats for nmdc:omprc-11-bn8jcq58", "alternative_identifiers": [], @@ -129,16 +129,16 @@ }, "compression_type": null, "was_generated_by": null, - "url": "/global/cfs/cdirs/m3408/results/nmdc:omprc-11-bn8jcq58/nmdc:dobj-11-xc5jhn80/nmdc_dobj-11-xc5jhn80_filterStats.txt", + "url": "/global/cfs/cdirs/m3408/results/nmdc:omprc-11-bn8jcq58/nmdc:wfrqc-11-5sfwhy50/nmdc_wfrqc-11-5sfwhy50_filterStats.txt", "type": "nmdc:Data_Object" }, { - "id": "nmdc:dobj-11-wz0zg037", + "id": "nmdc:dobj-11-b27rkd90", "name": "Final assembly contigs fasta", "description": "Assembled contigs fasta for nmdc:omprc-11-bn8jcq58", "alternative_identifiers": [], - "file_size_bytes": 90115831, - "md5_checksum": "deddd162bf0128fba13b3bc1ca38d1aa", + "file_size_bytes": 90794959, + "md5_checksum": "1a2ade31cf1edad5430ef0939d94ac43", "data_object_type": { "_code": { "text": "Assembly Contigs", @@ -185,16 +185,16 @@ }, "compression_type": null, "was_generated_by": null, - "url": "/global/cfs/cdirs/m3408/results/nmdc:omprc-11-bn8jcq58/nmdc:dobj-11-wz0zg037/nmdc_dobj-11-wz0zg037_contigs.fna", + "url": "/global/cfs/cdirs/m3408/results/nmdc:omprc-11-bn8jcq58/nmdc:wfmgas-11-xc17b248/nmdc_wfmgas-11-xc17b248_contigs.fna", "type": "nmdc:Data_Object" }, { - "id": "nmdc:dobj-11-dc20ch29", + "id": "nmdc:dobj-11-tcg9he98", "name": "Final assembly scaffolds fasta", "description": "Assembled scaffold fasta for nmdc:omprc-11-bn8jcq58", "alternative_identifiers": [], - "file_size_bytes": 89604715, - "md5_checksum": "b3573e3cda5a06611de71ca04c5c14cc", + "file_size_bytes": 90283295, + "md5_checksum": "642e3bae5809fa7a2f0be592e0693a10", "data_object_type": { "_code": { "text": "Assembly Scaffolds", @@ -241,16 +241,16 @@ }, "compression_type": null, "was_generated_by": null, - "url": "/global/cfs/cdirs/m3408/results/nmdc:omprc-11-bn8jcq58/nmdc:dobj-11-dc20ch29/nmdc_dobj-11-dc20ch29_scaffolds.fna", + "url": "/global/cfs/cdirs/m3408/results/nmdc:omprc-11-bn8jcq58/nmdc:wfmgas-11-xc17b248/nmdc_wfmgas-11-xc17b248_scaffolds.fna", "type": "nmdc:Data_Object" }, { - "id": "nmdc:dobj-11-zr4b4z40", + "id": "nmdc:dobj-11-z5f6jq78", "name": "Assembled contigs coverage information", "description": "Metagenome Contig Coverage Stats for nmdc:omprc-11-bn8jcq58", "alternative_identifiers": [], - "file_size_bytes": 13412363, - "md5_checksum": "c6d0d4cea985ca6fb50a060e15b4a856", + "file_size_bytes": 14091491, + "md5_checksum": "298156a0eddb0aa59ea3e312406fb56f", "data_object_type": { "_code": { "text": "Assembly Coverage Stats", @@ -297,16 +297,16 @@ }, "compression_type": null, "was_generated_by": null, - "url": "/global/cfs/cdirs/m3408/results/nmdc:omprc-11-bn8jcq58/nmdc:dobj-11-zr4b4z40/nmdc_dobj-11-zr4b4z40_covstats.txt", + "url": "/global/cfs/cdirs/m3408/results/nmdc:omprc-11-bn8jcq58/nmdc:wfmgas-11-xc17b248/nmdc_wfmgas-11-xc17b248_covstats.txt", "type": "nmdc:Data_Object" }, { - "id": "nmdc:dobj-11-fe6kea76", + "id": "nmdc:dobj-11-kv8gp304", "name": "An AGP format file that describes the assembly", "description": "Assembled AGP file for nmdc:omprc-11-bn8jcq58", "alternative_identifiers": [], - "file_size_bytes": 12542171, - "md5_checksum": "f450e3800e17691d5874c89fc46c186a", + "file_size_bytes": 13901555, + "md5_checksum": "c319c47a6c2f2469ecdcaf972abd8f2b", "data_object_type": { "_code": { "text": "Assembly AGP", @@ -353,16 +353,16 @@ }, "compression_type": null, "was_generated_by": null, - "url": "/global/cfs/cdirs/m3408/results/nmdc:omprc-11-bn8jcq58/nmdc:dobj-11-fe6kea76/nmdc_dobj-11-fe6kea76_assembly.agp", + "url": "/global/cfs/cdirs/m3408/results/nmdc:omprc-11-bn8jcq58/nmdc:wfmgas-11-xc17b248/nmdc_wfmgas-11-xc17b248_assembly.agp", "type": "nmdc:Data_Object" }, { - "id": "nmdc:dobj-11-f8bj1b18", + "id": "nmdc:dobj-11-xkzgxw24", "name": "Sorted bam file of reads mapping back to the final assembly", "description": "Metagenome Alignment BAM file for nmdc:omprc-11-bn8jcq58", "alternative_identifiers": [], - "file_size_bytes": 2773429299, - "md5_checksum": "31dc958d116d02122509e90b0883954f", + "file_size_bytes": 0, + "md5_checksum": "d41d8cd98f00b204e9800998ecf8427e", "data_object_type": { "_code": { "text": "Assembly Coverage BAM", @@ -409,11 +409,11 @@ }, "compression_type": null, "was_generated_by": null, - "url": "/global/cfs/cdirs/m3408/results/nmdc:omprc-11-bn8jcq58/nmdc:dobj-11-f8bj1b18/nmdc_dobj-11-f8bj1b18_pairedMapped_sorted.bam", + "url": "/global/cfs/cdirs/m3408/results/nmdc:omprc-11-bn8jcq58/nmdc:wfmgas-11-xc17b248/nmdc_wfmgas-11-xc17b248_pairedMapped_sorted.bam", "type": "nmdc:Data_Object" }, { - "id": "nmdc:dobj-11-rw9dtn21", + "id": "nmdc:dobj-11-vw7fz645", "name": "GOTTCHA2 classification report file", "description": "Gottcha2 TSV report for nmdc:omprc-11-bn8jcq58", "alternative_identifiers": [], @@ -465,11 +465,11 @@ }, "compression_type": null, "was_generated_by": null, - "url": "/global/cfs/cdirs/m3408/results/nmdc:omprc-11-bn8jcq58/nmdc:dobj-11-rw9dtn21/nmdc_dobj-11-rw9dtn21_gottcha2_report.tsv", + "url": "/global/cfs/cdirs/m3408/results/nmdc:omprc-11-bn8jcq58/nmdc:wfrbt-11-wy6fa249/nmdc_wfrbt-11-wy6fa249_gottcha2_report.tsv", "type": "nmdc:Data_Object" }, { - "id": "nmdc:dobj-11-0wqjwr10", + "id": "nmdc:dobj-11-bhnxqw16", "name": "GOTTCHA2 report file", "description": "Gottcha2 full TSV report for nmdc:omprc-11-bn8jcq58", "alternative_identifiers": [], @@ -521,11 +521,11 @@ }, "compression_type": null, "was_generated_by": null, - "url": "/global/cfs/cdirs/m3408/results/nmdc:omprc-11-bn8jcq58/nmdc:dobj-11-0wqjwr10/nmdc_dobj-11-0wqjwr10_gottcha2_report_full.tsv", + "url": "/global/cfs/cdirs/m3408/results/nmdc:omprc-11-bn8jcq58/nmdc:wfrbt-11-wy6fa249/nmdc_wfrbt-11-wy6fa249_gottcha2_report_full.tsv", "type": "nmdc:Data_Object" }, { - "id": "nmdc:dobj-11-405tgq07", + "id": "nmdc:dobj-11-hycy9b80", "name": "GOTTCHA2 krona plot HTML file", "description": "Gottcha2 Krona HTML report for nmdc:omprc-11-bn8jcq58", "alternative_identifiers": [], @@ -577,11 +577,11 @@ }, "compression_type": null, "was_generated_by": null, - "url": "/global/cfs/cdirs/m3408/results/nmdc:omprc-11-bn8jcq58/nmdc:dobj-11-405tgq07/nmdc_dobj-11-405tgq07_gottcha2_krona.html", + "url": "/global/cfs/cdirs/m3408/results/nmdc:omprc-11-bn8jcq58/nmdc:wfrbt-11-wy6fa249/nmdc_wfrbt-11-wy6fa249_gottcha2_krona.html", "type": "nmdc:Data_Object" }, { - "id": "nmdc:dobj-11-h7mbw370", + "id": "nmdc:dobj-11-8prvd856", "name": "Centrifuge output read classification file", "description": "Centrifuge classification TSV report for nmdc:omprc-11-bn8jcq58", "alternative_identifiers": [], @@ -633,11 +633,11 @@ }, "compression_type": null, "was_generated_by": null, - "url": "/global/cfs/cdirs/m3408/results/nmdc:omprc-11-bn8jcq58/nmdc:dobj-11-h7mbw370/nmdc_dobj-11-h7mbw370_centrifuge_classification.tsv", + "url": "/global/cfs/cdirs/m3408/results/nmdc:omprc-11-bn8jcq58/nmdc:wfrbt-11-wy6fa249/nmdc_wfrbt-11-wy6fa249_centrifuge_classification.tsv", "type": "nmdc:Data_Object" }, { - "id": "nmdc:dobj-11-8a9rmt95", + "id": "nmdc:dobj-11-acryw383", "name": "Centrifuge output report file", "description": "Centrifuge TSV report for nmdc:omprc-11-bn8jcq58", "alternative_identifiers": [], @@ -689,11 +689,11 @@ }, "compression_type": null, "was_generated_by": null, - "url": "/global/cfs/cdirs/m3408/results/nmdc:omprc-11-bn8jcq58/nmdc:dobj-11-8a9rmt95/nmdc_dobj-11-8a9rmt95_centrifuge_report.tsv", + "url": "/global/cfs/cdirs/m3408/results/nmdc:omprc-11-bn8jcq58/nmdc:wfrbt-11-wy6fa249/nmdc_wfrbt-11-wy6fa249_centrifuge_report.tsv", "type": "nmdc:Data_Object" }, { - "id": "nmdc:dobj-11-wbynk051", + "id": "nmdc:dobj-11-f6j3wm49", "name": "Centrifug krona plot HTML file", "description": "Centrifuge Krona HTML report for nmdc:omprc-11-bn8jcq58", "alternative_identifiers": [], @@ -745,11 +745,11 @@ }, "compression_type": null, "was_generated_by": null, - "url": "/global/cfs/cdirs/m3408/results/nmdc:omprc-11-bn8jcq58/nmdc:dobj-11-wbynk051/nmdc_dobj-11-wbynk051_centrifuge_krona.html", + "url": "/global/cfs/cdirs/m3408/results/nmdc:omprc-11-bn8jcq58/nmdc:wfrbt-11-wy6fa249/nmdc_wfrbt-11-wy6fa249_centrifuge_krona.html", "type": "nmdc:Data_Object" }, { - "id": "nmdc:dobj-11-wpw7gs92", + "id": "nmdc:dobj-11-xbn3d347", "name": "Kraken2 output read classification file", "description": "Kraken classification TSV report for nmdc:omprc-11-bn8jcq58", "alternative_identifiers": [], @@ -801,11 +801,11 @@ }, "compression_type": null, "was_generated_by": null, - "url": "/global/cfs/cdirs/m3408/results/nmdc:omprc-11-bn8jcq58/nmdc:dobj-11-wpw7gs92/nmdc_dobj-11-wpw7gs92_kraken2_classification.tsv", + "url": "/global/cfs/cdirs/m3408/results/nmdc:omprc-11-bn8jcq58/nmdc:wfrbt-11-wy6fa249/nmdc_wfrbt-11-wy6fa249_kraken2_classification.tsv", "type": "nmdc:Data_Object" }, { - "id": "nmdc:dobj-11-s4wxq602", + "id": "nmdc:dobj-11-sf23g974", "name": "Kraken2 output report file", "description": "Kraken2 TSV report for nmdc:omprc-11-bn8jcq58", "alternative_identifiers": [], @@ -857,11 +857,11 @@ }, "compression_type": null, "was_generated_by": null, - "url": "/global/cfs/cdirs/m3408/results/nmdc:omprc-11-bn8jcq58/nmdc:dobj-11-s4wxq602/nmdc_dobj-11-s4wxq602_kraken2_report.tsv", + "url": "/global/cfs/cdirs/m3408/results/nmdc:omprc-11-bn8jcq58/nmdc:wfrbt-11-wy6fa249/nmdc_wfrbt-11-wy6fa249_kraken2_report.tsv", "type": "nmdc:Data_Object" }, { - "id": "nmdc:dobj-11-vf207x18", + "id": "nmdc:dobj-11-cn18ys57", "name": "Kraken2 Krona plot HTML file", "description": "Kraken2 Krona HTML report for nmdc:omprc-11-bn8jcq58", "alternative_identifiers": [], @@ -913,7 +913,7 @@ }, "compression_type": null, "was_generated_by": null, - "url": "/global/cfs/cdirs/m3408/results/nmdc:omprc-11-bn8jcq58/nmdc:dobj-11-vf207x18/nmdc_dobj-11-vf207x18_kraken2_krona.html", + "url": "/global/cfs/cdirs/m3408/results/nmdc:omprc-11-bn8jcq58/nmdc:wfrbt-11-wy6fa249/nmdc_wfrbt-11-wy6fa249_kraken2_krona.html", "type": "nmdc:Data_Object" } ], @@ -927,7 +927,7 @@ "metagenome_annotation_activity_set": [], "metagenome_assembly_set": [ { - "id": "nmdc:wfmgas-11-wa0zvp69", + "id": "nmdc:wfmgas-11-xc17b248", "name": "Metagenome Assembly Activity for nmdc:omprc-11-bn8jcq58", "started_at_time": "2021-10-11T02:28:26Z", "ended_at_time": "2021-10-11T04:56:04+00:00", @@ -936,15 +936,15 @@ "execution_resource": "NERSC - Perlmutter", "git_url": "https://github.com/microbiomedata/metaAssembly", "has_input": [ - "nmdc:dobj-11-hfr31e20", - "nmdc:dobj-11-xc5jhn80" + "nmdc:dobj-11-b20kzg40", + "nmdc:dobj-11-mhccey92" ], "has_output": [ - "nmdc:dobj-11-wz0zg037", - "nmdc:dobj-11-dc20ch29", - "nmdc:dobj-11-zr4b4z40", - "nmdc:dobj-11-fe6kea76", - "nmdc:dobj-11-f8bj1b18" + "nmdc:dobj-11-b27rkd90", + "nmdc:dobj-11-tcg9he98", + "nmdc:dobj-11-z5f6jq78", + "nmdc:dobj-11-kv8gp304", + "nmdc:dobj-11-xkzgxw24" ], "type": "nmdc:MetagenomeAssembly", "part_of": [ @@ -1000,7 +1000,7 @@ "gold:Gp0115663" ], "has_output": [ - "nmdc:dobj-11-q4pmsv93" + "nmdc:dobj-11-bgp6z123" ], "insdc_experiment_identifiers": [], "instrument_name": null, @@ -1084,7 +1084,7 @@ "reaction_activity_set": [], "read_qc_analysis_activity_set": [ { - "id": "nmdc:wfrqc-11-wq4r6842", + "id": "nmdc:wfrqc-11-5sfwhy50", "name": "Read QC Activity for nmdc:omprc-11-bn8jcq58", "started_at_time": "2021-10-11T02:28:26Z", "ended_at_time": "2021-10-11T04:56:04+00:00", @@ -1093,11 +1093,11 @@ "execution_resource": "NERSC - Perlmutter", "git_url": "https://github.com/microbiomedata/ReadsQC", "has_input": [ - "nmdc:dobj-11-q4pmsv93" + "nmdc:dobj-11-bgp6z123" ], "has_output": [ - "nmdc:dobj-11-hfr31e20", - "nmdc:dobj-11-xc5jhn80" + "nmdc:dobj-11-b20kzg40", + "nmdc:dobj-11-mhccey92" ], "type": "nmdc:ReadQcAnalysisActivity", "part_of": [ @@ -1114,7 +1114,7 @@ ], "read_based_taxonomy_analysis_activity_set": [ { - "id": "nmdc:wfrbt-11-psqmn810", + "id": "nmdc:wfrbt-11-wy6fa249", "name": "Readbased Taxonomy Analysis Activity for nmdc:omprc-11-bn8jcq58", "started_at_time": "2021-10-11T02:28:26Z", "ended_at_time": "2021-10-11T04:56:04+00:00", @@ -1123,22 +1123,19 @@ "execution_resource": "NERSC - Perlmutter", "git_url": "https://github.com/microbiomedata/ReadbasedAnalysis", "has_input": [ - "nmdc:dobj-11-wz0zg037", - "nmdc:dobj-11-dc20ch29", - "nmdc:dobj-11-zr4b4z40", - "nmdc:dobj-11-fe6kea76", - "nmdc:dobj-11-f8bj1b18" + "nmdc:dobj-11-b20kzg40", + "nmdc:dobj-11-mhccey92" ], "has_output": [ - "nmdc:dobj-11-rw9dtn21", - "nmdc:dobj-11-0wqjwr10", - "nmdc:dobj-11-405tgq07", - "nmdc:dobj-11-h7mbw370", - "nmdc:dobj-11-8a9rmt95", - "nmdc:dobj-11-wbynk051", - "nmdc:dobj-11-wpw7gs92", - "nmdc:dobj-11-s4wxq602", - "nmdc:dobj-11-vf207x18" + "nmdc:dobj-11-vw7fz645", + "nmdc:dobj-11-bhnxqw16", + "nmdc:dobj-11-hycy9b80", + "nmdc:dobj-11-8prvd856", + "nmdc:dobj-11-acryw383", + "nmdc:dobj-11-f6j3wm49", + "nmdc:dobj-11-xbn3d347", + "nmdc:dobj-11-sf23g974", + "nmdc:dobj-11-cn18ys57" ], "type": "nmdc:ReadBasedTaxonomyAnalysisActivity", "part_of": [ From 62987ae4d189a98849473c08d078adae734e76c7 Mon Sep 17 00:00:00 2001 From: Michael Thornton Date: Wed, 15 Nov 2023 09:51:16 -0800 Subject: [PATCH 58/91] localize data file ops and fix output serialization --- nmdc_automation/re_iding/base.py | 21 +++++++++++++-------- nmdc_automation/re_iding/file_utils.py | 15 ++++++++------- 2 files changed, 21 insertions(+), 15 deletions(-) diff --git a/nmdc_automation/re_iding/base.py b/nmdc_automation/re_iding/base.py index 66ebab03..eff1de17 100644 --- a/nmdc_automation/re_iding/base.py +++ b/nmdc_automation/re_iding/base.py @@ -27,7 +27,7 @@ assembly_file_operations) NAPA_TEMPLATE = "../../../configs/re_iding_worklfows.yaml" -BASE_DIR = "/global/cfs/cdirs/m3408/results" +# BASE_DIR = "/global/cfs/cdirs/m3408/results" logging.basicConfig( level=logging.INFO, @@ -150,7 +150,8 @@ def update_reads_qc_analysis_activity_set(self, db_record: Dict, new_activity_id = self.api_client.minter(activity_type) logging.info(f"New activity id created for {omics_processing_id} activity type {activity_type}: {new_activity_id}") - new_readsqc_base_dir = os.path.join(BASE_DIR, omics_processing_id, new_activity_id) + new_readsqc_base_dir = os.path.join(self.data_dir, omics_processing_id, + new_activity_id) os.makedirs(new_readsqc_base_dir, exist_ok=True) updated_has_output = [] @@ -160,7 +161,7 @@ def update_reads_qc_analysis_activity_set(self, db_record: Dict, old_do_rec = get_data_object_record_by_id(db_record, old_do_id) data_object_type = find_data_object_type(old_do_rec) new_file_path = compute_new_paths( - old_do_rec["url"], new_readsqc_base_dir, new_activity_id + old_do_rec["url"], new_readsqc_base_dir, new_activity_id, self.data_dir ) logging.info(f"New file path computed for {data_object_type}: {new_file_path}") new_do = self._make_new_data_object( @@ -207,7 +208,8 @@ def update_metagenome_assembly_set(self, db_record: Dict, new_activity_id = self.api_client.minter(activity_type) logging.info(f"New activity id created for {omics_processing_id} activity type {activity_type}: {new_activity_id}") - new_assembly_base_dir = os.path.join(BASE_DIR, omics_processing_id, new_activity_id) + new_assembly_base_dir = os.path.join(self.data_dir, omics_processing_id, + new_activity_id) os.makedirs(new_assembly_base_dir, exist_ok=True) for old_do_id in assembly_rec["has_output"]: @@ -218,7 +220,8 @@ def update_metagenome_assembly_set(self, db_record: Dict, continue new_file_path = get_new_paths(old_do_rec["url"],new_assembly_base_dir, new_activity_id) updated_md5, updated_file_size = assembly_file_operations( - old_do_rec, data_object_type, new_file_path, new_activity_id) + old_do_rec, data_object_type, new_file_path, new_activity_id, + self.data_dir) logging.info(f"New file path computed for {data_object_type}: {new_file_path}") #update md5 and file byte size in place to use _make_new_data_object function without functions old_do_rec["file_size_bytes"] = updated_file_size @@ -268,7 +271,8 @@ def update_read_based_taxonomy_analysis_activity_set(self, db_record: Dict, new_activity_id = self.api_client.minter(activity_type) logging.info(f"New activity id created for {omics_processing_id} activity type {activity_type}: {new_activity_id}") - new_readbased_base_dir = os.path.join(BASE_DIR, omics_processing_id, new_activity_id) + new_readbased_base_dir = os.path.join(self.data_dir, omics_processing_id, + new_activity_id) os.makedirs(new_readbased_base_dir, exist_ok=True) updated_has_output = [] @@ -279,7 +283,7 @@ def update_read_based_taxonomy_analysis_activity_set(self, db_record: Dict, if not data_object_type: continue new_file_path = compute_new_paths( - old_do_rec["url"], new_readbased_base_dir, new_activity_id + old_do_rec["url"], new_readbased_base_dir, new_activity_id, self.data_dir ) logging.info(f"New file path computed for {data_object_type}: {new_file_path}") new_do = self._make_new_data_object( @@ -360,7 +364,8 @@ def _make_new_data_object(self, omics_processing_id: str, logger.info(f"new_description: {new_description}") new_filename = self._make_new_filename(new_activity_id, data_object_rec) logger.info(f"new_filename: {new_filename}") - new_url = f"{BASE_DIR}/{omics_processing_id}/{new_activity_id}/{new_filename}" + new_url = (f"{self.data_dir}/{omics_processing_id}/{new_activity_id}" + f"/{new_filename}") data_object = NmdcDataObject( id=new_data_object_id, diff --git a/nmdc_automation/re_iding/file_utils.py b/nmdc_automation/re_iding/file_utils.py index 59b877c7..b5bbae2e 100644 --- a/nmdc_automation/re_iding/file_utils.py +++ b/nmdc_automation/re_iding/file_utils.py @@ -11,7 +11,7 @@ from typing import Dict, Optional -base_dir = "/global/cfs/cdirs/m3408/results" +# BASE_DIR = "/global/cfs/cdirs/m3408/results" bam_script = os.path.abspath("rewrite_bam.sh") base = "https://data.microbiomedata.org/data" @@ -156,17 +156,18 @@ def rewrite_sam(input_sam, output_sam, old_id, new_id): f_out.write(line.replace(old_id, new_id)) -def get_old_file_path(data_object_record): +def get_old_file_path(data_object_record, old_base_dir): old_url = data_object_record["url"] suffix = old_url.split("https://data.microbiomedata.org/data/")[1] - old_file_path = base_dir + "/" + suffix + old_file_path = old_base_dir + "/" + suffix return old_file_path -def assembly_file_operations(data_object_record, data_object_type, destination, act_id): +def assembly_file_operations(data_object_record, data_object_type, + destination, act_id, old_base_dir): # get old file path upfront - old_file_path = get_old_file_path(data_object_record) + old_file_path = get_old_file_path(data_object_record, old_base_dir) if data_object_type == "Assembly Coverage Stats": md5, size = assembly_coverage_stats(old_file_path, destination, act_id) @@ -196,13 +197,13 @@ def get_new_paths(old_url, new_base_dir, act_id): return destination -def compute_new_paths(old_url, new_base_dir, act_id): +def compute_new_paths(old_url, new_base_dir, act_id, old_base_dir): """ Use the url to compute the new file name path and url """ file_name = old_url.split("/")[-1] suffix = old_url.split("https://data.microbiomedata.org/data/")[1] - old_file_path = base_dir + "/" + suffix + old_file_path = old_base_dir + "/" + suffix file_extenstion = file_name.lstrip("nmdc_").split("_", maxsplit=1)[-1] new_file_name = f"{act_id}_{file_extenstion}" modified_new_file_name = new_file_name.replace(":", "_") From 40e5ce586c4b4cd2c9171e9fe045a87cd3a001d0 Mon Sep 17 00:00:00 2001 From: Michael Thornton Date: Wed, 15 Nov 2023 09:53:16 -0800 Subject: [PATCH 59/91] updated output dump --- .../data/dryrun_associated_record_dump.json | 640 - ...sty-11-aygzgv51_assocated_record_dump.json | 40673 ---------------- 2 files changed, 41313 deletions(-) delete mode 100644 nmdc_automation/re_iding/scripts/data/dryrun_associated_record_dump.json delete mode 100644 nmdc_automation/re_iding/scripts/data/nmdc:sty-11-aygzgv51_assocated_record_dump.json diff --git a/nmdc_automation/re_iding/scripts/data/dryrun_associated_record_dump.json b/nmdc_automation/re_iding/scripts/data/dryrun_associated_record_dump.json deleted file mode 100644 index 638c9f4d..00000000 --- a/nmdc_automation/re_iding/scripts/data/dryrun_associated_record_dump.json +++ /dev/null @@ -1,640 +0,0 @@ -[ - { - "functional_annotation_agg": [], - "library_preparation_set": [], - "processed_sample_set": [], - "extraction_set": [], - "activity_set": [], - "biosample_set": [], - "data_object_set": [ - { - "description": "Raw sequencer read data", - "file_size_bytes": 2861414297, - "type": "nmdc:DataObject", - "id": "jgi:55d740280d8785342fcf7e39", - "name": "9422.8.132674.GTTTCG.fastq.gz" - }, - { - "name": "Gp0115663_Filtered Reads", - "description": "Filtered Reads for Gp0115663", - "data_object_type": "Filtered Sequencing Reads", - "url": "https://data.microbiomedata.org/data/nmdc:mga0h9dt75/qa/nmdc_mga0h9dt75_filtered.fastq.gz", - "md5_checksum": "7bf778baef033d36f118f8591256d6ef", - "id": "nmdc:7bf778baef033d36f118f8591256d6ef", - "file_size_bytes": 2571324879 - }, - { - "name": "Gp0115663_Filtered Stats", - "description": "Filtered Stats for Gp0115663", - "data_object_type": "QC Statistics", - "url": "https://data.microbiomedata.org/data/nmdc:mga0h9dt75/qa/nmdc_mga0h9dt75_filterStats.txt", - "md5_checksum": "b99ce8adc125c95f0bfdadf36a3f6848", - "id": "nmdc:b99ce8adc125c95f0bfdadf36a3f6848", - "file_size_bytes": 290 - }, - { - "name": "Gp0115663_Gottcha2 TSV report", - "description": "Gottcha2 TSV report for Gp0115663", - "url": "https://data.microbiomedata.org/data/nmdc:mga0h9dt75/ReadbasedAnalysis/nmdc_mga0h9dt75_gottcha2_report.tsv", - "md5_checksum": "bc7c1bda004aab357c8f6cf5a42242f9", - "id": "nmdc:bc7c1bda004aab357c8f6cf5a42242f9", - "file_size_bytes": 13174 - }, - { - "name": "Gp0115663_Gottcha2 full TSV report", - "description": "Gottcha2 full TSV report for Gp0115663", - "url": "https://data.microbiomedata.org/data/nmdc:mga0h9dt75/ReadbasedAnalysis/nmdc_mga0h9dt75_gottcha2_report_full.tsv", - "md5_checksum": "9481434cadd0d6c154e2ec4c11ef0e04", - "id": "nmdc:9481434cadd0d6c154e2ec4c11ef0e04", - "file_size_bytes": 1035818 - }, - { - "name": "Gp0115663_Gottcha2 Krona HTML report", - "description": "Gottcha2 Krona HTML report for Gp0115663", - "data_object_type": "GOTTCHA2 Krona Plot", - "url": "https://data.microbiomedata.org/data/nmdc:mga0h9dt75/ReadbasedAnalysis/nmdc_mga0h9dt75_gottcha2_krona.html", - "md5_checksum": "6b5bc6ce7f11c1336a5f85a98fc18541", - "id": "nmdc:6b5bc6ce7f11c1336a5f85a98fc18541", - "file_size_bytes": 262669 - }, - { - "name": "Gp0115663_Centrifuge classification TSV report", - "description": "Centrifuge classification TSV report for Gp0115663", - "data_object_type": "Centrifuge Taxonomic Classification", - "url": "https://data.microbiomedata.org/data/nmdc:mga0h9dt75/ReadbasedAnalysis/nmdc_mga0h9dt75_centrifuge_classification.tsv", - "md5_checksum": "933c71bbc2f4a2e84d50f0d3864cf940", - "id": "nmdc:933c71bbc2f4a2e84d50f0d3864cf940", - "file_size_bytes": 2189843623 - }, - { - "name": "Gp0115663_Centrifuge TSV report", - "description": "Centrifuge TSV report for Gp0115663", - "data_object_type": "Centrifuge Classification Report", - "url": "https://data.microbiomedata.org/data/nmdc:mga0h9dt75/ReadbasedAnalysis/nmdc_mga0h9dt75_centrifuge_report.tsv", - "md5_checksum": "1a208e2519770ef50740ac39f1b9ba9a", - "id": "nmdc:1a208e2519770ef50740ac39f1b9ba9a", - "file_size_bytes": 260134 - }, - { - "name": "Gp0115663_Centrifuge Krona HTML report", - "description": "Centrifuge Krona HTML report for Gp0115663", - "data_object_type": "Centrifuge Krona Plot", - "url": "https://data.microbiomedata.org/data/nmdc:mga0h9dt75/ReadbasedAnalysis/nmdc_mga0h9dt75_centrifuge_krona.html", - "md5_checksum": "f112a3840464ae7a9cf4a3bf295edd5c", - "id": "nmdc:f112a3840464ae7a9cf4a3bf295edd5c", - "file_size_bytes": 2343980 - }, - { - "name": "Gp0115663_Kraken classification TSV report", - "description": "Kraken classification TSV report for Gp0115663", - "data_object_type": "Kraken2 Taxonomic Classification", - "url": "https://data.microbiomedata.org/data/nmdc:mga0h9dt75/ReadbasedAnalysis/nmdc_mga0h9dt75_kraken2_classification.tsv", - "md5_checksum": "7ca01ea379f0baed96f87d1435925f95", - "id": "nmdc:7ca01ea379f0baed96f87d1435925f95", - "file_size_bytes": 1785563917 - }, - { - "name": "Gp0115663_Kraken2 TSV report", - "description": "Kraken2 TSV report for Gp0115663", - "data_object_type": "Kraken2 Classification Report", - "url": "https://data.microbiomedata.org/data/nmdc:mga0h9dt75/ReadbasedAnalysis/nmdc_mga0h9dt75_kraken2_report.tsv", - "md5_checksum": "c85f2f2b4a518c4adb23970448a5cb45", - "id": "nmdc:c85f2f2b4a518c4adb23970448a5cb45", - "file_size_bytes": 699896 - }, - { - "name": "Gp0115663_Kraken2 Krona HTML report", - "description": "Kraken2 Krona HTML report for Gp0115663", - "data_object_type": "Kraken2 Krona Plot", - "url": "https://data.microbiomedata.org/data/nmdc:mga0h9dt75/ReadbasedAnalysis/nmdc_mga0h9dt75_kraken2_krona.html", - "md5_checksum": "94ee1bc2dc74830a21d5c3471d6cf223", - "id": "nmdc:94ee1bc2dc74830a21d5c3471d6cf223", - "file_size_bytes": 4221977 - }, - { - "name": "Gp0115663_Assembled contigs fasta", - "description": "Assembled contigs fasta for Gp0115663", - "data_object_type": "Assembly Contigs", - "url": "https://data.microbiomedata.org/data/nmdc:mga0h9dt75/assembly/nmdc_mga0h9dt75_contigs.fna", - "md5_checksum": "deddd162bf0128fba13b3bc1ca38d1aa", - "id": "nmdc:deddd162bf0128fba13b3bc1ca38d1aa", - "file_size_bytes": 90115831 - }, - { - "name": "Gp0115663_Assembled scaffold fasta", - "description": "Assembled scaffold fasta for Gp0115663", - "data_object_type": "Assembly Scaffolds", - "url": "https://data.microbiomedata.org/data/nmdc:mga0h9dt75/assembly/nmdc_mga0h9dt75_scaffolds.fna", - "md5_checksum": "b3573e3cda5a06611de71ca04c5c14cc", - "id": "nmdc:b3573e3cda5a06611de71ca04c5c14cc", - "file_size_bytes": 89604715 - }, - { - "name": "Gp0115663_Metagenome Contig Coverage Stats", - "description": "Metagenome Contig Coverage Stats for Gp0115663", - "url": "https://data.microbiomedata.org/data/nmdc:mga0h9dt75/assembly/nmdc_mga0h9dt75_covstats.txt", - "md5_checksum": "c6d0d4cea985ca6fb50a060e15b4a856", - "id": "nmdc:c6d0d4cea985ca6fb50a060e15b4a856", - "file_size_bytes": 13412363 - }, - { - "name": "Gp0115663_Assembled AGP file", - "description": "Assembled AGP file for Gp0115663", - "data_object_type": "Assembly AGP", - "url": "https://data.microbiomedata.org/data/nmdc:mga0h9dt75/assembly/nmdc_mga0h9dt75_assembly.agp", - "md5_checksum": "f450e3800e17691d5874c89fc46c186a", - "id": "nmdc:f450e3800e17691d5874c89fc46c186a", - "file_size_bytes": 12542171 - }, - { - "name": "Gp0115663_Metagenome Alignment BAM file", - "description": "Metagenome Alignment BAM file for Gp0115663", - "data_object_type": "Assembly Coverage BAM", - "url": "https://data.microbiomedata.org/data/nmdc:mga0h9dt75/assembly/nmdc_mga0h9dt75_pairedMapped_sorted.bam", - "md5_checksum": "31dc958d116d02122509e90b0883954f", - "id": "nmdc:31dc958d116d02122509e90b0883954f", - "file_size_bytes": 2773429299 - }, - { - "name": "Gp0115663_Protein FAA", - "description": "Protein FAA for Gp0115663", - "data_object_type": "Annotation Amino Acid FASTA", - "url": "https://data.microbiomedata.org/data/nmdc:mga0h9dt75/annotation/nmdc_mga0h9dt75_proteins.faa", - "md5_checksum": "879988d212ecec46928b8598e2f8391f", - "id": "nmdc:879988d212ecec46928b8598e2f8391f", - "file_size_bytes": 50165060 - }, - { - "name": "Gp0115663_Structural annotation GFF file", - "description": "Structural annotation GFF file for Gp0115663", - "data_object_type": "Structural Annotation GFF", - "url": "https://data.microbiomedata.org/data/nmdc:mga0h9dt75/annotation/nmdc_mga0h9dt75_structural_annotation.gff", - "md5_checksum": "884b95102f5965cc0ee2d9b7f198e5a4", - "id": "nmdc:884b95102f5965cc0ee2d9b7f198e5a4", - "file_size_bytes": 2767 - }, - { - "name": "Gp0115663_Functional annotation GFF file", - "description": "Functional annotation GFF file for Gp0115663", - "data_object_type": "Functional Annotation GFF", - "url": "https://data.microbiomedata.org/data/nmdc:mga0h9dt75/annotation/nmdc_mga0h9dt75_functional_annotation.gff", - "md5_checksum": "002e4ebc728f8b91cb5f298d340ab013", - "id": "nmdc:002e4ebc728f8b91cb5f298d340ab013", - "file_size_bytes": 55139586 - }, - { - "name": "Gp0115663_KO TSV file", - "description": "KO TSV file for Gp0115663", - "data_object_type": "Annotation KEGG Orthology", - "url": "https://data.microbiomedata.org/data/nmdc:mga0h9dt75/annotation/nmdc_mga0h9dt75_ko.tsv", - "md5_checksum": "6851078f29716d89e3f41f0969ae7bf0", - "id": "nmdc:6851078f29716d89e3f41f0969ae7bf0", - "file_size_bytes": 6023696 - }, - { - "name": "Gp0115663_EC TSV file", - "description": "EC TSV file for Gp0115663", - "data_object_type": "Annotation Enzyme Commission", - "url": "https://data.microbiomedata.org/data/nmdc:mga0h9dt75/annotation/nmdc_mga0h9dt75_ec.tsv", - "md5_checksum": "4f88c89459f36655eb7c1eceec19602a", - "id": "nmdc:4f88c89459f36655eb7c1eceec19602a", - "file_size_bytes": 3982918 - }, - { - "name": "Gp0115663_COG GFF file", - "description": "COG GFF file for Gp0115663", - "url": "https://data.microbiomedata.org/data/nmdc:mga0h9dt75/annotation/nmdc_mga0h9dt75_cog.gff", - "md5_checksum": "a068b9ce6ebb7deb15ff932b513817a9", - "id": "nmdc:a068b9ce6ebb7deb15ff932b513817a9", - "file_size_bytes": 27362917 - }, - { - "name": "Gp0115663_PFAM GFF file", - "description": "PFAM GFF file for Gp0115663", - "url": "https://data.microbiomedata.org/data/nmdc:mga0h9dt75/annotation/nmdc_mga0h9dt75_pfam.gff", - "md5_checksum": "618b18fa8635c80cc0091371f451a6f0", - "id": "nmdc:618b18fa8635c80cc0091371f451a6f0", - "file_size_bytes": 21572048 - }, - { - "name": "Gp0115663_TigrFam GFF file", - "description": "TigrFam GFF file for Gp0115663", - "url": "https://data.microbiomedata.org/data/nmdc:mga0h9dt75/annotation/nmdc_mga0h9dt75_tigrfam.gff", - "md5_checksum": "17e55a1a1a133ffbf8cbe4024d997a6f", - "id": "nmdc:17e55a1a1a133ffbf8cbe4024d997a6f", - "file_size_bytes": 2900068 - }, - { - "name": "Gp0115663_SMART GFF file", - "description": "SMART GFF file for Gp0115663", - "url": "https://data.microbiomedata.org/data/nmdc:mga0h9dt75/annotation/nmdc_mga0h9dt75_smart.gff", - "md5_checksum": "8f80142c0f5723af5a3b44b7ff4e4339", - "id": "nmdc:8f80142c0f5723af5a3b44b7ff4e4339", - "file_size_bytes": 6905519 - }, - { - "name": "Gp0115663_SuperFam GFF file", - "description": "SuperFam GFF file for Gp0115663", - "url": "https://data.microbiomedata.org/data/nmdc:mga0h9dt75/annotation/nmdc_mga0h9dt75_supfam.gff", - "md5_checksum": "fdd2e8741ffef40db383674a10bb4d11", - "id": "nmdc:fdd2e8741ffef40db383674a10bb4d11", - "file_size_bytes": 38787856 - }, - { - "name": "Gp0115663_Cath FunFam GFF file", - "description": "Cath FunFam GFF file for Gp0115663", - "url": "https://data.microbiomedata.org/data/nmdc:mga0h9dt75/annotation/nmdc_mga0h9dt75_cath_funfam.gff", - "md5_checksum": "8eb49ac20a6c2721d6db227f4fb3356a", - "id": "nmdc:8eb49ac20a6c2721d6db227f4fb3356a", - "file_size_bytes": 30134783 - }, - { - "name": "Gp0115663_KO_EC GFF file", - "description": "KO_EC GFF file for Gp0115663", - "url": "https://data.microbiomedata.org/data/nmdc:mga0h9dt75/annotation/nmdc_mga0h9dt75_ko_ec.gff", - "md5_checksum": "75f481e0d98793cfb4f9508cb3e31622", - "id": "nmdc:75f481e0d98793cfb4f9508cb3e31622", - "file_size_bytes": 19194308 - }, - { - "name": "gold:Gp0452679_metabat2 bin checkm quality assessment result", - "description": "metabat2 bin checkm quality assessment result for gold:Gp0452679", - "data_object_type": "CheckM Statistics", - "url": "https://data.microbiomedata.org/data/nmdc:mga0fsjy84/MAGs/nmdc_mga0fsjy84_checkm_qa.out", - "md5_checksum": "d41d8cd98f00b204e9800998ecf8427e", - "id": "nmdc:d41d8cd98f00b204e9800998ecf8427e", - "file_size_bytes": 0 - }, - { - "name": "Gp0115663_tooShort (< 3kb) filtered contigs fasta file by metaBat2", - "description": "tooShort (< 3kb) filtered contigs fasta file by metaBat2 for Gp0115663", - "url": "https://data.microbiomedata.org/data/nmdc:mga0h9dt75/MAGs/nmdc_mga0h9dt75_bins.tooShort.fa", - "md5_checksum": "c092b018cb4652c4ca0620b37a4b3fad", - "id": "nmdc:c092b018cb4652c4ca0620b37a4b3fad", - "file_size_bytes": 70411007 - }, - { - "name": "Gp0115663_unbinned fasta file from metabat2", - "description": "unbinned fasta file from metabat2 for Gp0115663", - "url": "https://data.microbiomedata.org/data/nmdc:mga0h9dt75/MAGs/nmdc_mga0h9dt75_bins.unbinned.fa", - "md5_checksum": "70d7c8a307f47adb05056bee1b01f9d4", - "id": "nmdc:70d7c8a307f47adb05056bee1b01f9d4", - "file_size_bytes": 15998690 - }, - { - "name": "Gp0115663_metabat2 bin checkm quality assessment result", - "description": "metabat2 bin checkm quality assessment result for Gp0115663", - "data_object_type": "CheckM Statistics", - "url": "https://data.microbiomedata.org/data/nmdc:mga0h9dt75/MAGs/nmdc_mga0h9dt75_checkm_qa.out", - "md5_checksum": "4545ab2039ae70f4439a93316f4fb7bc", - "id": "nmdc:4545ab2039ae70f4439a93316f4fb7bc", - "file_size_bytes": 1530 - }, - { - "name": "Gp0115663_high-quality and medium-quality bins", - "description": "high-quality and medium-quality bins for Gp0115663", - "data_object_type": "Metagenome Bins", - "url": "https://data.microbiomedata.org/data/nmdc:mga0h9dt75/MAGs/nmdc_mga0h9dt75_hqmq_bin.zip", - "md5_checksum": "280b63ae1cc1fa8d6154a0681d47c399", - "id": "nmdc:280b63ae1cc1fa8d6154a0681d47c399", - "file_size_bytes": 182 - }, - { - "name": "Gp0115663_metabat2 bins", - "description": "metabat2 bins for Gp0115663", - "url": "https://data.microbiomedata.org/data/nmdc:mga0h9dt75/MAGs/nmdc_mga0h9dt75_metabat_bin.zip", - "md5_checksum": "27c07072f175571200b5931550adb8aa", - "id": "nmdc:27c07072f175571200b5931550adb8aa", - "file_size_bytes": 1114314 - } - ], - "dissolving_activity_set": [], - "functional_annotation_set": [], - "genome_feature_set": [], - "mags_activity_set": [ - { - "_id": { - "$oid": "649b0052ec087f6bbab34734" - }, - "has_input": [ - "nmdc:deddd162bf0128fba13b3bc1ca38d1aa", - "nmdc:31dc958d116d02122509e90b0883954f", - "nmdc:002e4ebc728f8b91cb5f298d340ab013" - ], - "too_short_contig_num": 159810, - "part_of": [ - "nmdc:mga0h9dt75" - ], - "binned_contig_num": 684, - "git_url": "https://github.com/microbiomedata/metaG/releases/tag/0.1", - "has_output": [ - "nmdc:d41d8cd98f00b204e9800998ecf8427e", - "nmdc:c092b018cb4652c4ca0620b37a4b3fad", - "nmdc:70d7c8a307f47adb05056bee1b01f9d4", - "nmdc:4545ab2039ae70f4439a93316f4fb7bc", - "nmdc:280b63ae1cc1fa8d6154a0681d47c399", - "nmdc:27c07072f175571200b5931550adb8aa" - ], - "was_informed_by": "gold:Gp0115663", - "input_contig_num": 169782, - "id": "nmdc:b31abf9d7fe53e2f802bb53e2d13542b", - "execution_resource": "NERSC-Cori", - "name": "MAGs Analysis Activity for nmdc:mga0h9dt75", - "mags_list": [ - { - "number_of_contig": 61, - "completeness": 13.82, - "bin_name": "bins.1", - "gene_count": 294, - "bin_quality": "LQ", - "gtdbtk_species": "", - "gtdbtk_order": "", - "num_16s": 1, - "gtdbtk_family": "", - "gtdbtk_domain": "", - "contamination": 0.62, - "gtdbtk_class": "", - "gtdbtk_phylum": "", - "num_5s": 1, - "num_23s": 0, - "gtdbtk_genus": "", - "num_t_rna": 0 - }, - { - "number_of_contig": 485, - "completeness": 66.03, - "bin_name": "bins.2", - "gene_count": 2871, - "bin_quality": "LQ", - "gtdbtk_species": "", - "gtdbtk_order": "", - "num_16s": 0, - "gtdbtk_family": "", - "gtdbtk_domain": "", - "contamination": 10.87, - "gtdbtk_class": "", - "gtdbtk_phylum": "", - "num_5s": 1, - "num_23s": 0, - "gtdbtk_genus": "", - "num_t_rna": 32 - }, - { - "number_of_contig": 56, - "completeness": 34.23, - "bin_name": "bins.3", - "gene_count": 337, - "bin_quality": "LQ", - "gtdbtk_species": "", - "gtdbtk_order": "", - "num_16s": 0, - "gtdbtk_family": "", - "gtdbtk_domain": "", - "contamination": 0.0, - "gtdbtk_class": "", - "gtdbtk_phylum": "", - "num_5s": 0, - "num_23s": 0, - "gtdbtk_genus": "", - "num_t_rna": 9 - }, - { - "number_of_contig": 63, - "completeness": 6.9, - "bin_name": "bins.4", - "gene_count": 276, - "bin_quality": "LQ", - "gtdbtk_species": "", - "gtdbtk_order": "", - "num_16s": 0, - "gtdbtk_family": "", - "gtdbtk_domain": "", - "contamination": 0.0, - "gtdbtk_class": "", - "gtdbtk_phylum": "", - "num_5s": 0, - "num_23s": 0, - "gtdbtk_genus": "", - "num_t_rna": 6 - }, - { - "number_of_contig": 19, - "completeness": 4.45, - "bin_name": "bins.5", - "gene_count": 463, - "bin_quality": "LQ", - "gtdbtk_species": "", - "gtdbtk_order": "", - "num_16s": 0, - "gtdbtk_family": "", - "gtdbtk_domain": "", - "contamination": 0.0, - "gtdbtk_class": "", - "gtdbtk_phylum": "", - "num_5s": 0, - "num_23s": 0, - "gtdbtk_genus": "", - "num_t_rna": 4 - } - ], - "unbinned_contig_num": 9288, - "started_at_time": "2021-10-11T02:28:26Z", - "type": "nmdc:MAGsAnalysisActivity", - "ended_at_time": "2021-10-11T04:56:04+00:00" - } - ], - "material_sample_set": [], - "material_sampling_activity_set": [], - "metabolomics_analysis_activity_set": [], - "metagenome_annotation_activity_set": [ - { - "_id": { - "$oid": "649b005bbf2caae0415ef9d6" - }, - "has_input": [ - "nmdc:deddd162bf0128fba13b3bc1ca38d1aa" - ], - "part_of": [ - "nmdc:mga0h9dt75" - ], - "git_url": "https://github.com/microbiomedata/metaG/releases/tag/0.1", - "has_output": [ - "nmdc:879988d212ecec46928b8598e2f8391f", - "nmdc:884b95102f5965cc0ee2d9b7f198e5a4", - "nmdc:002e4ebc728f8b91cb5f298d340ab013", - "nmdc:6851078f29716d89e3f41f0969ae7bf0", - "nmdc:4f88c89459f36655eb7c1eceec19602a", - "nmdc:a068b9ce6ebb7deb15ff932b513817a9", - "nmdc:618b18fa8635c80cc0091371f451a6f0", - "nmdc:17e55a1a1a133ffbf8cbe4024d997a6f", - "nmdc:8f80142c0f5723af5a3b44b7ff4e4339", - "nmdc:fdd2e8741ffef40db383674a10bb4d11", - "nmdc:8eb49ac20a6c2721d6db227f4fb3356a", - "nmdc:75f481e0d98793cfb4f9508cb3e31622" - ], - "was_informed_by": "gold:Gp0115663", - "id": "nmdc:b31abf9d7fe53e2f802bb53e2d13542b", - "execution_resource": "NERSC-Cori", - "name": "Annotation Activity for nmdc:mga0h9dt75", - "started_at_time": "2021-10-11T02:28:26Z", - "type": "nmdc:MetagenomeAnnotationActivity", - "ended_at_time": "2021-10-11T04:56:04+00:00" - } - ], - "metagenome_assembly_set": [ - { - "_id": { - "$oid": "649b005f2ca5ee4adb139fb9" - }, - "has_input": [ - "nmdc:7bf778baef033d36f118f8591256d6ef" - ], - "part_of": [ - "nmdc:mga0h9dt75" - ], - "ctg_logsum": 214373, - "scaf_logsum": 215363, - "gap_pct": 0.00188, - "git_url": "https://github.com/microbiomedata/metaG/releases/tag/0.1", - "has_output": [ - "nmdc:deddd162bf0128fba13b3bc1ca38d1aa", - "nmdc:b3573e3cda5a06611de71ca04c5c14cc", - "nmdc:c6d0d4cea985ca6fb50a060e15b4a856", - "nmdc:f450e3800e17691d5874c89fc46c186a", - "nmdc:31dc958d116d02122509e90b0883954f" - ], - "asm_score": 6.577, - "was_informed_by": "gold:Gp0115663", - "ctg_powsum": 24284, - "scaf_max": 68135, - "id": "nmdc:b31abf9d7fe53e2f802bb53e2d13542b", - "scaf_powsum": 24422, - "execution_resource": "NERSC-Cori", - "contigs": 169784, - "name": "Assembly Activity for nmdc:mga0h9dt75", - "ctg_max": 68135, - "gc_std": 0.11726, - "contig_bp": 83494920, - "gc_avg": 0.46001, - "started_at_time": "2021-10-11T02:28:26Z", - "scaf_bp": 83496490, - "type": "nmdc:MetagenomeAssembly", - "scaffolds": 169645, - "ended_at_time": "2021-10-11T04:56:04+00:00", - "ctg_l50": 470, - "ctg_l90": 290, - "ctg_n50": 45584, - "ctg_n90": 141996, - "scaf_l50": 470, - "scaf_l90": 290, - "scaf_n50": 45550, - "scaf_n90": 141870, - "scaf_l_gt50k": 68135, - "scaf_n_gt50k": 1, - "scaf_pct_gt50k": 0.08160224 - } - ], - "metagenome_sequencing_activity_set": [], - "metaproteomics_analysis_activity_set": [], - "metatranscriptome_activity_set": [], - "nom_analysis_activity_set": [], - "omics_processing_set": [ - { - "_id": { - "$oid": "649b009773e8249959349b33" - }, - "id": "nmdc:omprc-11-bn8jcq58", - "name": "Sand microcosm microbial communities from a hyporheic zone in Columbia River, Washington, USA - GW-RW T2_23-Sept-14", - "description": "Sterilized sand packs were incubated back in the ground and collected at time point T2.", - "has_input": [ - "nmdc:bsm-11-qq8s6x03" - ], - "has_output": [ - "jgi:55d740280d8785342fcf7e39" - ], - "part_of": [ - "nmdc:sty-11-aygzgv51" - ], - "add_date": "2015-05-28", - "mod_date": "2021-06-15", - "ncbi_project_name": "Sand microcosm microbial communities from a hyporheic zone in Columbia River, Washington, USA - GW-RW T2_23-Sept-14", - "omics_type": { - "has_raw_value": "Metagenome" - }, - "principal_investigator": { - "has_raw_value": "James Stegen" - }, - "processing_institution": "JGI", - "type": "nmdc:OmicsProcessing", - "gold_sequencing_project_identifiers": [ - "gold:Gp0115663" - ] - } - ], - "reaction_activity_set": [], - "read_qc_analysis_activity_set": [ - { - "_id": { - "$oid": "649b009d6bdd4fd20273c88b" - }, - "has_input": [ - "nmdc:30a06664f29cffbbbc49abad86eae6fc" - ], - "part_of": [ - "nmdc:mga0h9dt75" - ], - "git_url": "https://github.com/microbiomedata/metaG/releases/tag/0.1", - "has_output": [ - "nmdc:7bf778baef033d36f118f8591256d6ef", - "nmdc:b99ce8adc125c95f0bfdadf36a3f6848" - ], - "was_informed_by": "gold:Gp0115663", - "input_read_count": 32238374, - "output_read_bases": 4608772924, - "id": "nmdc:b31abf9d7fe53e2f802bb53e2d13542b", - "execution_resource": "NERSC-Cori", - "input_read_bases": 4867994474, - "name": "Read QC Activity for nmdc:mga0h9dt75", - "output_read_count": 30774080, - "started_at_time": "2021-10-11T02:28:26Z", - "type": "nmdc:ReadQCAnalysisActivity", - "ended_at_time": "2021-10-11T04:56:04+00:00" - } - ], - "read_based_taxonomy_analysis_activity_set": [ - { - "_id": { - "$oid": "649b009bff710ae353f8cf4f" - }, - "has_input": [ - "nmdc:7bf778baef033d36f118f8591256d6ef" - ], - "git_url": "https://github.com/microbiomedata/metaG/releases/tag/0.1", - "has_output": [ - "nmdc:bc7c1bda004aab357c8f6cf5a42242f9", - "nmdc:9481434cadd0d6c154e2ec4c11ef0e04", - "nmdc:6b5bc6ce7f11c1336a5f85a98fc18541", - "nmdc:933c71bbc2f4a2e84d50f0d3864cf940", - "nmdc:1a208e2519770ef50740ac39f1b9ba9a", - "nmdc:f112a3840464ae7a9cf4a3bf295edd5c", - "nmdc:7ca01ea379f0baed96f87d1435925f95", - "nmdc:c85f2f2b4a518c4adb23970448a5cb45", - "nmdc:94ee1bc2dc74830a21d5c3471d6cf223" - ], - "was_informed_by": "gold:Gp0115663", - "id": "nmdc:b31abf9d7fe53e2f802bb53e2d13542b", - "execution_resource": "NERSC-Cori", - "name": "ReadBased Analysis Activity for nmdc:mga0h9dt75", - "started_at_time": "2021-10-11T02:28:26Z", - "type": "nmdc:ReadbasedAnalysis", - "ended_at_time": "2021-10-11T04:56:04+00:00" - } - ], - "study_set": [], - "field_research_site_set": [], - "collecting_biosamples_from_site_set": [], - "date_created": null, - "etl_software_version": null, - "pooling_set": [] - } -] \ No newline at end of file diff --git a/nmdc_automation/re_iding/scripts/data/nmdc:sty-11-aygzgv51_assocated_record_dump.json b/nmdc_automation/re_iding/scripts/data/nmdc:sty-11-aygzgv51_assocated_record_dump.json deleted file mode 100644 index d3156d14..00000000 --- a/nmdc_automation/re_iding/scripts/data/nmdc:sty-11-aygzgv51_assocated_record_dump.json +++ /dev/null @@ -1,40673 +0,0 @@ -[ - { - "functional_annotation_agg": [], - "library_preparation_set": [], - "processed_sample_set": [], - "extraction_set": [], - "activity_set": [], - "biosample_set": [], - "data_object_set": [ - { - "description": "Raw sequencer read data", - "file_size_bytes": 2861414297, - "type": "nmdc:DataObject", - "id": "jgi:55d740280d8785342fcf7e39", - "name": "9422.8.132674.GTTTCG.fastq.gz" - }, - { - "name": "Gp0115663_Filtered Reads", - "description": "Filtered Reads for Gp0115663", - "data_object_type": "Filtered Sequencing Reads", - "url": "https://data.microbiomedata.org/data/nmdc:mga0h9dt75/qa/nmdc_mga0h9dt75_filtered.fastq.gz", - "md5_checksum": "7bf778baef033d36f118f8591256d6ef", - "id": "nmdc:7bf778baef033d36f118f8591256d6ef", - "file_size_bytes": 2571324879 - }, - { - "name": "Gp0115663_Filtered Stats", - "description": "Filtered Stats for Gp0115663", - "data_object_type": "QC Statistics", - "url": "https://data.microbiomedata.org/data/nmdc:mga0h9dt75/qa/nmdc_mga0h9dt75_filterStats.txt", - "md5_checksum": "b99ce8adc125c95f0bfdadf36a3f6848", - "id": "nmdc:b99ce8adc125c95f0bfdadf36a3f6848", - "file_size_bytes": 290 - }, - { - "name": "Gp0115663_Gottcha2 TSV report", - "description": "Gottcha2 TSV report for Gp0115663", - "url": "https://data.microbiomedata.org/data/nmdc:mga0h9dt75/ReadbasedAnalysis/nmdc_mga0h9dt75_gottcha2_report.tsv", - "md5_checksum": "bc7c1bda004aab357c8f6cf5a42242f9", - "id": "nmdc:bc7c1bda004aab357c8f6cf5a42242f9", - "file_size_bytes": 13174 - }, - { - "name": "Gp0115663_Gottcha2 full TSV report", - "description": "Gottcha2 full TSV report for Gp0115663", - "url": "https://data.microbiomedata.org/data/nmdc:mga0h9dt75/ReadbasedAnalysis/nmdc_mga0h9dt75_gottcha2_report_full.tsv", - "md5_checksum": "9481434cadd0d6c154e2ec4c11ef0e04", - "id": "nmdc:9481434cadd0d6c154e2ec4c11ef0e04", - "file_size_bytes": 1035818 - }, - { - "name": "Gp0115663_Gottcha2 Krona HTML report", - "description": "Gottcha2 Krona HTML report for Gp0115663", - "data_object_type": "GOTTCHA2 Krona Plot", - "url": "https://data.microbiomedata.org/data/nmdc:mga0h9dt75/ReadbasedAnalysis/nmdc_mga0h9dt75_gottcha2_krona.html", - "md5_checksum": "6b5bc6ce7f11c1336a5f85a98fc18541", - "id": "nmdc:6b5bc6ce7f11c1336a5f85a98fc18541", - "file_size_bytes": 262669 - }, - { - "name": "Gp0115663_Centrifuge classification TSV report", - "description": "Centrifuge classification TSV report for Gp0115663", - "data_object_type": "Centrifuge Taxonomic Classification", - "url": "https://data.microbiomedata.org/data/nmdc:mga0h9dt75/ReadbasedAnalysis/nmdc_mga0h9dt75_centrifuge_classification.tsv", - "md5_checksum": "933c71bbc2f4a2e84d50f0d3864cf940", - "id": "nmdc:933c71bbc2f4a2e84d50f0d3864cf940", - "file_size_bytes": 2189843623 - }, - { - "name": "Gp0115663_Centrifuge TSV report", - "description": "Centrifuge TSV report for Gp0115663", - "data_object_type": "Centrifuge Classification Report", - "url": "https://data.microbiomedata.org/data/nmdc:mga0h9dt75/ReadbasedAnalysis/nmdc_mga0h9dt75_centrifuge_report.tsv", - "md5_checksum": "1a208e2519770ef50740ac39f1b9ba9a", - "id": "nmdc:1a208e2519770ef50740ac39f1b9ba9a", - "file_size_bytes": 260134 - }, - { - "name": "Gp0115663_Centrifuge Krona HTML report", - "description": "Centrifuge Krona HTML report for Gp0115663", - "data_object_type": "Centrifuge Krona Plot", - "url": "https://data.microbiomedata.org/data/nmdc:mga0h9dt75/ReadbasedAnalysis/nmdc_mga0h9dt75_centrifuge_krona.html", - "md5_checksum": "f112a3840464ae7a9cf4a3bf295edd5c", - "id": "nmdc:f112a3840464ae7a9cf4a3bf295edd5c", - "file_size_bytes": 2343980 - }, - { - "name": "Gp0115663_Kraken classification TSV report", - "description": "Kraken classification TSV report for Gp0115663", - "data_object_type": "Kraken2 Taxonomic Classification", - "url": "https://data.microbiomedata.org/data/nmdc:mga0h9dt75/ReadbasedAnalysis/nmdc_mga0h9dt75_kraken2_classification.tsv", - "md5_checksum": "7ca01ea379f0baed96f87d1435925f95", - "id": "nmdc:7ca01ea379f0baed96f87d1435925f95", - "file_size_bytes": 1785563917 - }, - { - "name": "Gp0115663_Kraken2 TSV report", - "description": "Kraken2 TSV report for Gp0115663", - "data_object_type": "Kraken2 Classification Report", - "url": "https://data.microbiomedata.org/data/nmdc:mga0h9dt75/ReadbasedAnalysis/nmdc_mga0h9dt75_kraken2_report.tsv", - "md5_checksum": "c85f2f2b4a518c4adb23970448a5cb45", - "id": "nmdc:c85f2f2b4a518c4adb23970448a5cb45", - "file_size_bytes": 699896 - }, - { - "name": "Gp0115663_Kraken2 Krona HTML report", - "description": "Kraken2 Krona HTML report for Gp0115663", - "data_object_type": "Kraken2 Krona Plot", - "url": "https://data.microbiomedata.org/data/nmdc:mga0h9dt75/ReadbasedAnalysis/nmdc_mga0h9dt75_kraken2_krona.html", - "md5_checksum": "94ee1bc2dc74830a21d5c3471d6cf223", - "id": "nmdc:94ee1bc2dc74830a21d5c3471d6cf223", - "file_size_bytes": 4221977 - }, - { - "name": "Gp0115663_Assembled contigs fasta", - "description": "Assembled contigs fasta for Gp0115663", - "data_object_type": "Assembly Contigs", - "url": "https://data.microbiomedata.org/data/nmdc:mga0h9dt75/assembly/nmdc_mga0h9dt75_contigs.fna", - "md5_checksum": "deddd162bf0128fba13b3bc1ca38d1aa", - "id": "nmdc:deddd162bf0128fba13b3bc1ca38d1aa", - "file_size_bytes": 90115831 - }, - { - "name": "Gp0115663_Assembled scaffold fasta", - "description": "Assembled scaffold fasta for Gp0115663", - "data_object_type": "Assembly Scaffolds", - "url": "https://data.microbiomedata.org/data/nmdc:mga0h9dt75/assembly/nmdc_mga0h9dt75_scaffolds.fna", - "md5_checksum": "b3573e3cda5a06611de71ca04c5c14cc", - "id": "nmdc:b3573e3cda5a06611de71ca04c5c14cc", - "file_size_bytes": 89604715 - }, - { - "name": "Gp0115663_Metagenome Contig Coverage Stats", - "description": "Metagenome Contig Coverage Stats for Gp0115663", - "url": "https://data.microbiomedata.org/data/nmdc:mga0h9dt75/assembly/nmdc_mga0h9dt75_covstats.txt", - "md5_checksum": "c6d0d4cea985ca6fb50a060e15b4a856", - "id": "nmdc:c6d0d4cea985ca6fb50a060e15b4a856", - "file_size_bytes": 13412363 - }, - { - "name": "Gp0115663_Assembled AGP file", - "description": "Assembled AGP file for Gp0115663", - "data_object_type": "Assembly AGP", - "url": "https://data.microbiomedata.org/data/nmdc:mga0h9dt75/assembly/nmdc_mga0h9dt75_assembly.agp", - "md5_checksum": "f450e3800e17691d5874c89fc46c186a", - "id": "nmdc:f450e3800e17691d5874c89fc46c186a", - "file_size_bytes": 12542171 - }, - { - "name": "Gp0115663_Metagenome Alignment BAM file", - "description": "Metagenome Alignment BAM file for Gp0115663", - "data_object_type": "Assembly Coverage BAM", - "url": "https://data.microbiomedata.org/data/nmdc:mga0h9dt75/assembly/nmdc_mga0h9dt75_pairedMapped_sorted.bam", - "md5_checksum": "31dc958d116d02122509e90b0883954f", - "id": "nmdc:31dc958d116d02122509e90b0883954f", - "file_size_bytes": 2773429299 - }, - { - "name": "Gp0115663_Protein FAA", - "description": "Protein FAA for Gp0115663", - "data_object_type": "Annotation Amino Acid FASTA", - "url": "https://data.microbiomedata.org/data/nmdc:mga0h9dt75/annotation/nmdc_mga0h9dt75_proteins.faa", - "md5_checksum": "879988d212ecec46928b8598e2f8391f", - "id": "nmdc:879988d212ecec46928b8598e2f8391f", - "file_size_bytes": 50165060 - }, - { - "name": "Gp0115663_Structural annotation GFF file", - "description": "Structural annotation GFF file for Gp0115663", - "data_object_type": "Structural Annotation GFF", - "url": "https://data.microbiomedata.org/data/nmdc:mga0h9dt75/annotation/nmdc_mga0h9dt75_structural_annotation.gff", - "md5_checksum": "884b95102f5965cc0ee2d9b7f198e5a4", - "id": "nmdc:884b95102f5965cc0ee2d9b7f198e5a4", - "file_size_bytes": 2767 - }, - { - "name": "Gp0115663_Functional annotation GFF file", - "description": "Functional annotation GFF file for Gp0115663", - "data_object_type": "Functional Annotation GFF", - "url": "https://data.microbiomedata.org/data/nmdc:mga0h9dt75/annotation/nmdc_mga0h9dt75_functional_annotation.gff", - "md5_checksum": "002e4ebc728f8b91cb5f298d340ab013", - "id": "nmdc:002e4ebc728f8b91cb5f298d340ab013", - "file_size_bytes": 55139586 - }, - { - "name": "Gp0115663_KO TSV file", - "description": "KO TSV file for Gp0115663", - "data_object_type": "Annotation KEGG Orthology", - "url": "https://data.microbiomedata.org/data/nmdc:mga0h9dt75/annotation/nmdc_mga0h9dt75_ko.tsv", - "md5_checksum": "6851078f29716d89e3f41f0969ae7bf0", - "id": "nmdc:6851078f29716d89e3f41f0969ae7bf0", - "file_size_bytes": 6023696 - }, - { - "name": "Gp0115663_EC TSV file", - "description": "EC TSV file for Gp0115663", - "data_object_type": "Annotation Enzyme Commission", - "url": "https://data.microbiomedata.org/data/nmdc:mga0h9dt75/annotation/nmdc_mga0h9dt75_ec.tsv", - "md5_checksum": "4f88c89459f36655eb7c1eceec19602a", - "id": "nmdc:4f88c89459f36655eb7c1eceec19602a", - "file_size_bytes": 3982918 - }, - { - "name": "Gp0115663_COG GFF file", - "description": "COG GFF file for Gp0115663", - "url": "https://data.microbiomedata.org/data/nmdc:mga0h9dt75/annotation/nmdc_mga0h9dt75_cog.gff", - "md5_checksum": "a068b9ce6ebb7deb15ff932b513817a9", - "id": "nmdc:a068b9ce6ebb7deb15ff932b513817a9", - "file_size_bytes": 27362917 - }, - { - "name": "Gp0115663_PFAM GFF file", - "description": "PFAM GFF file for Gp0115663", - "url": "https://data.microbiomedata.org/data/nmdc:mga0h9dt75/annotation/nmdc_mga0h9dt75_pfam.gff", - "md5_checksum": "618b18fa8635c80cc0091371f451a6f0", - "id": "nmdc:618b18fa8635c80cc0091371f451a6f0", - "file_size_bytes": 21572048 - }, - { - "name": "Gp0115663_TigrFam GFF file", - "description": "TigrFam GFF file for Gp0115663", - "url": "https://data.microbiomedata.org/data/nmdc:mga0h9dt75/annotation/nmdc_mga0h9dt75_tigrfam.gff", - "md5_checksum": "17e55a1a1a133ffbf8cbe4024d997a6f", - "id": "nmdc:17e55a1a1a133ffbf8cbe4024d997a6f", - "file_size_bytes": 2900068 - }, - { - "name": "Gp0115663_SMART GFF file", - "description": "SMART GFF file for Gp0115663", - "url": "https://data.microbiomedata.org/data/nmdc:mga0h9dt75/annotation/nmdc_mga0h9dt75_smart.gff", - "md5_checksum": "8f80142c0f5723af5a3b44b7ff4e4339", - "id": "nmdc:8f80142c0f5723af5a3b44b7ff4e4339", - "file_size_bytes": 6905519 - }, - { - "name": "Gp0115663_SuperFam GFF file", - "description": "SuperFam GFF file for Gp0115663", - "url": "https://data.microbiomedata.org/data/nmdc:mga0h9dt75/annotation/nmdc_mga0h9dt75_supfam.gff", - "md5_checksum": "fdd2e8741ffef40db383674a10bb4d11", - "id": "nmdc:fdd2e8741ffef40db383674a10bb4d11", - "file_size_bytes": 38787856 - }, - { - "name": "Gp0115663_Cath FunFam GFF file", - "description": "Cath FunFam GFF file for Gp0115663", - "url": "https://data.microbiomedata.org/data/nmdc:mga0h9dt75/annotation/nmdc_mga0h9dt75_cath_funfam.gff", - "md5_checksum": "8eb49ac20a6c2721d6db227f4fb3356a", - "id": "nmdc:8eb49ac20a6c2721d6db227f4fb3356a", - "file_size_bytes": 30134783 - }, - { - "name": "Gp0115663_KO_EC GFF file", - "description": "KO_EC GFF file for Gp0115663", - "url": "https://data.microbiomedata.org/data/nmdc:mga0h9dt75/annotation/nmdc_mga0h9dt75_ko_ec.gff", - "md5_checksum": "75f481e0d98793cfb4f9508cb3e31622", - "id": "nmdc:75f481e0d98793cfb4f9508cb3e31622", - "file_size_bytes": 19194308 - }, - { - "name": "gold:Gp0452679_metabat2 bin checkm quality assessment result", - "description": "metabat2 bin checkm quality assessment result for gold:Gp0452679", - "data_object_type": "CheckM Statistics", - "url": "https://data.microbiomedata.org/data/nmdc:mga0fsjy84/MAGs/nmdc_mga0fsjy84_checkm_qa.out", - "md5_checksum": "d41d8cd98f00b204e9800998ecf8427e", - "id": "nmdc:d41d8cd98f00b204e9800998ecf8427e", - "file_size_bytes": 0 - }, - { - "name": "Gp0115663_tooShort (< 3kb) filtered contigs fasta file by metaBat2", - "description": "tooShort (< 3kb) filtered contigs fasta file by metaBat2 for Gp0115663", - "url": "https://data.microbiomedata.org/data/nmdc:mga0h9dt75/MAGs/nmdc_mga0h9dt75_bins.tooShort.fa", - "md5_checksum": "c092b018cb4652c4ca0620b37a4b3fad", - "id": "nmdc:c092b018cb4652c4ca0620b37a4b3fad", - "file_size_bytes": 70411007 - }, - { - "name": "Gp0115663_unbinned fasta file from metabat2", - "description": "unbinned fasta file from metabat2 for Gp0115663", - "url": "https://data.microbiomedata.org/data/nmdc:mga0h9dt75/MAGs/nmdc_mga0h9dt75_bins.unbinned.fa", - "md5_checksum": "70d7c8a307f47adb05056bee1b01f9d4", - "id": "nmdc:70d7c8a307f47adb05056bee1b01f9d4", - "file_size_bytes": 15998690 - }, - { - "name": "Gp0115663_metabat2 bin checkm quality assessment result", - "description": "metabat2 bin checkm quality assessment result for Gp0115663", - "data_object_type": "CheckM Statistics", - "url": "https://data.microbiomedata.org/data/nmdc:mga0h9dt75/MAGs/nmdc_mga0h9dt75_checkm_qa.out", - "md5_checksum": "4545ab2039ae70f4439a93316f4fb7bc", - "id": "nmdc:4545ab2039ae70f4439a93316f4fb7bc", - "file_size_bytes": 1530 - }, - { - "name": "Gp0115663_high-quality and medium-quality bins", - "description": "high-quality and medium-quality bins for Gp0115663", - "data_object_type": "Metagenome Bins", - "url": "https://data.microbiomedata.org/data/nmdc:mga0h9dt75/MAGs/nmdc_mga0h9dt75_hqmq_bin.zip", - "md5_checksum": "280b63ae1cc1fa8d6154a0681d47c399", - "id": "nmdc:280b63ae1cc1fa8d6154a0681d47c399", - "file_size_bytes": 182 - }, - { - "name": "Gp0115663_metabat2 bins", - "description": "metabat2 bins for Gp0115663", - "url": "https://data.microbiomedata.org/data/nmdc:mga0h9dt75/MAGs/nmdc_mga0h9dt75_metabat_bin.zip", - "md5_checksum": "27c07072f175571200b5931550adb8aa", - "id": "nmdc:27c07072f175571200b5931550adb8aa", - "file_size_bytes": 1114314 - }, - { - "_id": { - "$oid": "649b003c1ae706d7b5b14d5a" - }, - "description": "Assembled scaffold fasta for gold:Gp0115663", - "url": "https://data.microbiomedata.org/data/1781_86101/assembly/assembly_scaffolds.fna", - "file_size_bytes": 88756490, - "type": "nmdc:DataObject", - "id": "nmdc:321a497bc1c3cf25affc8e659b746ba5", - "name": "assembly_scaffolds.fna", - "data_object_type": "Assembly Scaffolds" - }, - { - "_id": { - "$oid": "649b003c1ae706d7b5b14d5d" - }, - "description": "Metagenome Contig Coverage Stats for gold:Gp0115663", - "url": "https://data.microbiomedata.org/data/1781_86101/assembly/mapping_stats.txt", - "file_size_bytes": 12563453, - "type": "nmdc:DataObject", - "id": "nmdc:ad47215b9b079c1d94a8fc56385dee36", - "name": "mapping_stats.txt", - "data_object_type": "Assembly Coverage Stats" - }, - { - "_id": { - "$oid": "649b003c1ae706d7b5b14d5f" - }, - "description": "Assembled contigs fasta for gold:Gp0115663", - "url": "https://data.microbiomedata.org/data/1781_86101/assembly/assembly_contigs.fna", - "file_size_bytes": 89266921, - "type": "nmdc:DataObject", - "id": "nmdc:0a3d00715d01ad7b8f3aee59b674dfe9", - "name": "assembly_contigs.fna", - "data_object_type": "Assembly Contigs" - }, - { - "_id": { - "$oid": "649b003c1ae706d7b5b14d61" - }, - "description": "Assembled AGP file for gold:Gp0115663", - "url": "https://data.microbiomedata.org/data/1781_86101/assembly/assembly.agp", - "file_size_bytes": 10842941, - "type": "nmdc:DataObject", - "id": "nmdc:bc01f0f507c9dac65d8a8e40e41a8c48", - "name": "assembly.agp", - "data_object_type": "Assembly AGP" - }, - { - "_id": { - "$oid": "649b003c1ae706d7b5b14d63" - }, - "description": "Metagenome Alignment BAM file for gold:Gp0115663", - "url": "https://data.microbiomedata.org/data/1781_86101/assembly/pairedMapped_sorted.bam", - "file_size_bytes": 2729039400, - "type": "nmdc:DataObject", - "id": "nmdc:668d207be5ea844f988fbfb2813564cc", - "name": "pairedMapped_sorted.bam", - "data_object_type": "Assembly Coverage BAM" - }, - { - "_id": { - "$oid": "649b003d1ae706d7b5b15979" - }, - "id": "nmdc:9509adb5a013006dfda9754429cfc968", - "name": "1781_86101.krona.html", - "description": "Gold:Gp0115663 KRONA plot HTML file", - "url": "https://data.microbiomedata.org/1781_86101/ReadbasedAnalysis/centrifuge/1781_86101.krona.html", - "file_size_bytes": 3385, - "data_object_type": "Centrifuge Krona Plot", - "type": "nmdc:DataObject" - }, - { - "_id": { - "$oid": "649b003d1ae706d7b5b15980" - }, - "id": "nmdc:6dea4c58f402b5c3935e8f1a545bec47", - "name": "1781_86101.json", - "description": "Gold:Gp0115663 ReadbasedAnalysis result JSON file", - "url": "https://data.microbiomedata.org/1781_86101/ReadbasedAnalysis/1781_86101.json", - "file_size_bytes": 3385, - "type": "nmdc:DataObject" - }, - { - "_id": { - "$oid": "649b003f1ae706d7b5b16253" - }, - "id": "nmdc:0c4c875e5b10c6b742c14c22e2926751", - "name": "bins.tooShort.fa", - "description": "tooShort (< 3kb) filtered contigs fasta file by metaBat2 for gold:Gp0115663", - "file_size_bytes": 68423774, - "url": "https://data.microbiomedata.org/data/1781_86101/img_MAGs/bins.tooShort.fa", - "type": "nmdc:DataObject" - }, - { - "_id": { - "$oid": "649b003f1ae706d7b5b16254" - }, - "id": "nmdc:c55d6b00aa5d4af8cd46d349e17d4127", - "name": "bins.unbinned.fa", - "description": "unbinned fasta file from metabat2 for gold:Gp0115663", - "file_size_bytes": 16857267, - "url": "https://data.microbiomedata.org/data/1781_86101/img_MAGs/bins.unbinned.fa", - "type": "nmdc:DataObject" - }, - { - "_id": { - "$oid": "649b003f1ae706d7b5b16256" - }, - "id": "nmdc:1346fe25b6ff22180eb3a51204e0b1fc", - "name": "gold:Gp0115663.bins.1.fa", - "description": "metabat2 binned contig file for gold:Gp0115663", - "file_size_bytes": 224772, - "url": "https://data.microbiomedata.org/data/1781_86101/img_MAGs/metabat-bins/bins.1.fa", - "type": "nmdc:DataObject", - "data_object_type": "Metagenome Bins" - }, - { - "_id": { - "$oid": "649b003f1ae706d7b5b16258" - }, - "id": "nmdc:818f5a47d1371295f9313909ea12eb50", - "name": "checkm_qa.out", - "description": "metabat2 bin checkm quality assessment result for gold:Gp0115663", - "file_size_bytes": 1141, - "url": "https://data.microbiomedata.org/data/1781_86101/img_MAGs/checkm_qa.out", - "type": "nmdc:DataObject", - "data_object_type": "CheckM Statistics" - }, - { - "_id": { - "$oid": "649b003f1ae706d7b5b16259" - }, - "id": "nmdc:a755bb87aded36aefbd8022506a793c7", - "name": "gold:Gp0115663.bins.2.fa", - "description": "metabat2 binned contig file for gold:Gp0115663", - "file_size_bytes": 2225340, - "url": "https://data.microbiomedata.org/data/1781_86101/img_MAGs/metabat-bins/bins.2.fa", - "type": "nmdc:DataObject", - "data_object_type": "Metagenome Bins" - }, - { - "_id": { - "$oid": "649b003f1ae706d7b5b1625a" - }, - "id": "nmdc:e0b7421514f976cb7ad8c343cf3077a9", - "name": "gold:Gp0115663.bins.3.fa", - "description": "metabat2 binned contig file for gold:Gp0115663", - "file_size_bytes": 288873, - "url": "https://data.microbiomedata.org/data/1781_86101/img_MAGs/metabat-bins/bins.3.fa", - "type": "nmdc:DataObject", - "data_object_type": "Metagenome Bins" - }, - { - "_id": { - "$oid": "649b00401ae706d7b5b16d91" - }, - "description": "KO TSV File for gold:Gp0115663", - "url": "https://data.microbiomedata.org/1781_86101/img_annotation/Ga0482264_ko.tsv", - "md5_checksum": "8d250650c90956edff8bafccc56fd630", - "file_size_bytes": 3385, - "id": "nmdc:8d250650c90956edff8bafccc56fd630", - "name": "gold:Gp0115663_KO TSV File", - "data_object_type": "Annotation KEGG Orthology", - "type": "nmdc:DataObject" - }, - { - "_id": { - "$oid": "649b00401ae706d7b5b16d94" - }, - "description": "Functional annotation GFF file for gold:Gp0115663", - "url": "https://data.microbiomedata.org/1781_86101/img_annotation/Ga0482264_functional_annotation.gff", - "md5_checksum": "b7e9c8d0bffdd13ace6f862a61fa87d2", - "file_size_bytes": 3385, - "id": "nmdc:b7e9c8d0bffdd13ace6f862a61fa87d2", - "name": "gold:Gp0115663_Functional annotation GFF file", - "data_object_type": "Functional Annotation GFF", - "type": "nmdc:DataObject" - }, - { - "_id": { - "$oid": "649b00401ae706d7b5b16d95" - }, - "description": "Protein FAA for gold:Gp0115663", - "url": "https://data.microbiomedata.org/1781_86101/img_annotation/Ga0482264_proteins.faa", - "md5_checksum": "754074d3bcade65aba2a6f8236619ab7", - "file_size_bytes": 3385, - "id": "nmdc:754074d3bcade65aba2a6f8236619ab7", - "name": "gold:Gp0115663_Protein FAA", - "data_object_type": "Annotation Amino Acid FASTA", - "type": "nmdc:DataObject" - }, - { - "_id": { - "$oid": "649b00401ae706d7b5b16d97" - }, - "description": "Structural annotation GFF file for gold:Gp0115663", - "url": "https://data.microbiomedata.org/1781_86101/img_annotation/Ga0482264_structural_annotation.gff", - "md5_checksum": "a4b4c623457aa10161d88a9ac4eef522", - "file_size_bytes": 3385, - "id": "nmdc:a4b4c623457aa10161d88a9ac4eef522", - "name": "gold:Gp0115663_Structural annotation GFF file", - "data_object_type": "Structural Annotation GFF", - "type": "nmdc:DataObject" - }, - { - "_id": { - "$oid": "649b00401ae706d7b5b16da8" - }, - "description": "EC TSV File for gold:Gp0115663", - "url": "https://data.microbiomedata.org/1781_86101/img_annotation/Ga0482264_ec.tsv", - "md5_checksum": "27319f58c616a07159e1fac12635bd4b", - "file_size_bytes": 3385, - "id": "nmdc:27319f58c616a07159e1fac12635bd4b", - "name": "gold:Gp0115663_EC TSV File", - "data_object_type": "Annotation Enzyme Commission", - "type": "nmdc:DataObject" - } - ], - "dissolving_activity_set": [], - "functional_annotation_set": [], - "genome_feature_set": [], - "mags_activity_set": [ - { - "_id": { - "$oid": "649b0052ec087f6bbab34734" - }, - "has_input": [ - "nmdc:deddd162bf0128fba13b3bc1ca38d1aa", - "nmdc:31dc958d116d02122509e90b0883954f", - "nmdc:002e4ebc728f8b91cb5f298d340ab013" - ], - "too_short_contig_num": 159810, - "part_of": [ - "nmdc:mga0h9dt75" - ], - "binned_contig_num": 684, - "git_url": "https://github.com/microbiomedata/metaG/releases/tag/0.1", - "has_output": [ - "nmdc:d41d8cd98f00b204e9800998ecf8427e", - "nmdc:c092b018cb4652c4ca0620b37a4b3fad", - "nmdc:70d7c8a307f47adb05056bee1b01f9d4", - "nmdc:4545ab2039ae70f4439a93316f4fb7bc", - "nmdc:280b63ae1cc1fa8d6154a0681d47c399", - "nmdc:27c07072f175571200b5931550adb8aa" - ], - "was_informed_by": "gold:Gp0115663", - "input_contig_num": 169782, - "id": "nmdc:b31abf9d7fe53e2f802bb53e2d13542b", - "execution_resource": "NERSC-Cori", - "name": "MAGs Analysis Activity for nmdc:mga0h9dt75", - "mags_list": [ - { - "number_of_contig": 61, - "completeness": 13.82, - "bin_name": "bins.1", - "gene_count": 294, - "bin_quality": "LQ", - "gtdbtk_species": "", - "gtdbtk_order": "", - "num_16s": 1, - "gtdbtk_family": "", - "gtdbtk_domain": "", - "contamination": 0.62, - "gtdbtk_class": "", - "gtdbtk_phylum": "", - "num_5s": 1, - "num_23s": 0, - "gtdbtk_genus": "", - "num_t_rna": 0 - }, - { - "number_of_contig": 485, - "completeness": 66.03, - "bin_name": "bins.2", - "gene_count": 2871, - "bin_quality": "LQ", - "gtdbtk_species": "", - "gtdbtk_order": "", - "num_16s": 0, - "gtdbtk_family": "", - "gtdbtk_domain": "", - "contamination": 10.87, - "gtdbtk_class": "", - "gtdbtk_phylum": "", - "num_5s": 1, - "num_23s": 0, - "gtdbtk_genus": "", - "num_t_rna": 32 - }, - { - "number_of_contig": 56, - "completeness": 34.23, - "bin_name": "bins.3", - "gene_count": 337, - "bin_quality": "LQ", - "gtdbtk_species": "", - "gtdbtk_order": "", - "num_16s": 0, - "gtdbtk_family": "", - "gtdbtk_domain": "", - "contamination": 0.0, - "gtdbtk_class": "", - "gtdbtk_phylum": "", - "num_5s": 0, - "num_23s": 0, - "gtdbtk_genus": "", - "num_t_rna": 9 - }, - { - "number_of_contig": 63, - "completeness": 6.9, - "bin_name": "bins.4", - "gene_count": 276, - "bin_quality": "LQ", - "gtdbtk_species": "", - "gtdbtk_order": "", - "num_16s": 0, - "gtdbtk_family": "", - "gtdbtk_domain": "", - "contamination": 0.0, - "gtdbtk_class": "", - "gtdbtk_phylum": "", - "num_5s": 0, - "num_23s": 0, - "gtdbtk_genus": "", - "num_t_rna": 6 - }, - { - "number_of_contig": 19, - "completeness": 4.45, - "bin_name": "bins.5", - "gene_count": 463, - "bin_quality": "LQ", - "gtdbtk_species": "", - "gtdbtk_order": "", - "num_16s": 0, - "gtdbtk_family": "", - "gtdbtk_domain": "", - "contamination": 0.0, - "gtdbtk_class": "", - "gtdbtk_phylum": "", - "num_5s": 0, - "num_23s": 0, - "gtdbtk_genus": "", - "num_t_rna": 4 - } - ], - "unbinned_contig_num": 9288, - "started_at_time": "2021-10-11T02:28:26Z", - "type": "nmdc:MAGsAnalysisActivity", - "ended_at_time": "2021-10-11T04:56:04+00:00" - } - ], - "material_sample_set": [], - "material_sampling_activity_set": [], - "metabolomics_analysis_activity_set": [], - "metagenome_annotation_activity_set": [ - { - "_id": { - "$oid": "649b005bbf2caae0415ef9d6" - }, - "has_input": [ - "nmdc:deddd162bf0128fba13b3bc1ca38d1aa" - ], - "part_of": [ - "nmdc:mga0h9dt75" - ], - "git_url": "https://github.com/microbiomedata/metaG/releases/tag/0.1", - "has_output": [ - "nmdc:879988d212ecec46928b8598e2f8391f", - "nmdc:884b95102f5965cc0ee2d9b7f198e5a4", - "nmdc:002e4ebc728f8b91cb5f298d340ab013", - "nmdc:6851078f29716d89e3f41f0969ae7bf0", - "nmdc:4f88c89459f36655eb7c1eceec19602a", - "nmdc:a068b9ce6ebb7deb15ff932b513817a9", - "nmdc:618b18fa8635c80cc0091371f451a6f0", - "nmdc:17e55a1a1a133ffbf8cbe4024d997a6f", - "nmdc:8f80142c0f5723af5a3b44b7ff4e4339", - "nmdc:fdd2e8741ffef40db383674a10bb4d11", - "nmdc:8eb49ac20a6c2721d6db227f4fb3356a", - "nmdc:75f481e0d98793cfb4f9508cb3e31622" - ], - "was_informed_by": "gold:Gp0115663", - "id": "nmdc:b31abf9d7fe53e2f802bb53e2d13542b", - "execution_resource": "NERSC-Cori", - "name": "Annotation Activity for nmdc:mga0h9dt75", - "started_at_time": "2021-10-11T02:28:26Z", - "type": "nmdc:MetagenomeAnnotationActivity", - "ended_at_time": "2021-10-11T04:56:04+00:00" - } - ], - "metagenome_assembly_set": [ - { - "_id": { - "$oid": "649b005f2ca5ee4adb139fb9" - }, - "has_input": [ - "nmdc:7bf778baef033d36f118f8591256d6ef" - ], - "part_of": [ - "nmdc:mga0h9dt75" - ], - "ctg_logsum": 214373, - "scaf_logsum": 215363, - "gap_pct": 0.00188, - "git_url": "https://github.com/microbiomedata/metaG/releases/tag/0.1", - "has_output": [ - "nmdc:deddd162bf0128fba13b3bc1ca38d1aa", - "nmdc:b3573e3cda5a06611de71ca04c5c14cc", - "nmdc:c6d0d4cea985ca6fb50a060e15b4a856", - "nmdc:f450e3800e17691d5874c89fc46c186a", - "nmdc:31dc958d116d02122509e90b0883954f" - ], - "asm_score": 6.577, - "was_informed_by": "gold:Gp0115663", - "ctg_powsum": 24284, - "scaf_max": 68135, - "id": "nmdc:b31abf9d7fe53e2f802bb53e2d13542b", - "scaf_powsum": 24422, - "execution_resource": "NERSC-Cori", - "contigs": 169784, - "name": "Assembly Activity for nmdc:mga0h9dt75", - "ctg_max": 68135, - "gc_std": 0.11726, - "contig_bp": 83494920, - "gc_avg": 0.46001, - "started_at_time": "2021-10-11T02:28:26Z", - "scaf_bp": 83496490, - "type": "nmdc:MetagenomeAssembly", - "scaffolds": 169645, - "ended_at_time": "2021-10-11T04:56:04+00:00", - "ctg_l50": 470, - "ctg_l90": 290, - "ctg_n50": 45584, - "ctg_n90": 141996, - "scaf_l50": 470, - "scaf_l90": 290, - "scaf_n50": 45550, - "scaf_n90": 141870, - "scaf_l_gt50k": 68135, - "scaf_n_gt50k": 1, - "scaf_pct_gt50k": 0.08160224 - } - ], - "metagenome_sequencing_activity_set": [], - "metaproteomics_analysis_activity_set": [], - "metatranscriptome_activity_set": [], - "nom_analysis_activity_set": [], - "omics_processing_set": [ - { - "_id": { - "$oid": "649b009773e8249959349b33" - }, - "id": "nmdc:omprc-11-bn8jcq58", - "name": "Sand microcosm microbial communities from a hyporheic zone in Columbia River, Washington, USA - GW-RW T2_23-Sept-14", - "description": "Sterilized sand packs were incubated back in the ground and collected at time point T2.", - "has_input": [ - "nmdc:bsm-11-qq8s6x03" - ], - "has_output": [ - "jgi:55d740280d8785342fcf7e39" - ], - "part_of": [ - "nmdc:sty-11-aygzgv51" - ], - "add_date": "2015-05-28", - "mod_date": "2021-06-15", - "ncbi_project_name": "Sand microcosm microbial communities from a hyporheic zone in Columbia River, Washington, USA - GW-RW T2_23-Sept-14", - "omics_type": { - "has_raw_value": "Metagenome" - }, - "principal_investigator": { - "has_raw_value": "James Stegen" - }, - "processing_institution": "JGI", - "type": "nmdc:OmicsProcessing", - "gold_sequencing_project_identifiers": [ - "gold:Gp0115663" - ] - } - ], - "reaction_activity_set": [], - "read_qc_analysis_activity_set": [ - { - "_id": { - "$oid": "649b009d6bdd4fd20273c88b" - }, - "has_input": [ - "nmdc:30a06664f29cffbbbc49abad86eae6fc" - ], - "part_of": [ - "nmdc:mga0h9dt75" - ], - "git_url": "https://github.com/microbiomedata/metaG/releases/tag/0.1", - "has_output": [ - "nmdc:7bf778baef033d36f118f8591256d6ef", - "nmdc:b99ce8adc125c95f0bfdadf36a3f6848" - ], - "was_informed_by": "gold:Gp0115663", - "input_read_count": 32238374, - "output_read_bases": 4608772924, - "id": "nmdc:b31abf9d7fe53e2f802bb53e2d13542b", - "execution_resource": "NERSC-Cori", - "input_read_bases": 4867994474, - "name": "Read QC Activity for nmdc:mga0h9dt75", - "output_read_count": 30774080, - "started_at_time": "2021-10-11T02:28:26Z", - "type": "nmdc:ReadQCAnalysisActivity", - "ended_at_time": "2021-10-11T04:56:04+00:00" - } - ], - "read_based_taxonomy_analysis_activity_set": [ - { - "_id": { - "$oid": "649b009bff710ae353f8cf4f" - }, - "has_input": [ - "nmdc:7bf778baef033d36f118f8591256d6ef" - ], - "git_url": "https://github.com/microbiomedata/metaG/releases/tag/0.1", - "has_output": [ - "nmdc:bc7c1bda004aab357c8f6cf5a42242f9", - "nmdc:9481434cadd0d6c154e2ec4c11ef0e04", - "nmdc:6b5bc6ce7f11c1336a5f85a98fc18541", - "nmdc:933c71bbc2f4a2e84d50f0d3864cf940", - "nmdc:1a208e2519770ef50740ac39f1b9ba9a", - "nmdc:f112a3840464ae7a9cf4a3bf295edd5c", - "nmdc:7ca01ea379f0baed96f87d1435925f95", - "nmdc:c85f2f2b4a518c4adb23970448a5cb45", - "nmdc:94ee1bc2dc74830a21d5c3471d6cf223" - ], - "was_informed_by": "gold:Gp0115663", - "id": "nmdc:b31abf9d7fe53e2f802bb53e2d13542b", - "execution_resource": "NERSC-Cori", - "name": "ReadBased Analysis Activity for nmdc:mga0h9dt75", - "started_at_time": "2021-10-11T02:28:26Z", - "type": "nmdc:ReadbasedAnalysis", - "ended_at_time": "2021-10-11T04:56:04+00:00" - } - ], - "study_set": [], - "field_research_site_set": [], - "collecting_biosamples_from_site_set": [], - "date_created": null, - "etl_software_version": null, - "pooling_set": [] - }, - { - "functional_annotation_agg": [], - "library_preparation_set": [], - "processed_sample_set": [], - "extraction_set": [], - "activity_set": [], - "biosample_set": [], - "data_object_set": [ - { - "description": "Raw sequencer read data", - "file_size_bytes": 2080914094, - "type": "nmdc:DataObject", - "id": "jgi:55d817fc0d8785342fcf8274", - "name": "9387.2.132031.CCGTCC.fastq.gz" - }, - { - "name": "Gp0115666_Filtered Reads", - "description": "Filtered Reads for Gp0115666", - "data_object_type": "Filtered Sequencing Reads", - "url": "https://data.microbiomedata.org/data/nmdc:mga0eehe16/qa/nmdc_mga0eehe16_filtered.fastq.gz", - "md5_checksum": "0b301d2dd917c2be31422dd0e986dd5e", - "id": "nmdc:0b301d2dd917c2be31422dd0e986dd5e", - "file_size_bytes": 1806510860 - }, - { - "name": "Gp0115666_Filtered Stats", - "description": "Filtered Stats for Gp0115666", - "data_object_type": "QC Statistics", - "url": "https://data.microbiomedata.org/data/nmdc:mga0eehe16/qa/nmdc_mga0eehe16_filterStats.txt", - "md5_checksum": "0634e8261ce976d167457993d7f7a4ec", - "id": "nmdc:0634e8261ce976d167457993d7f7a4ec", - "file_size_bytes": 289 - }, - { - "name": "Gp0115666_Gottcha2 TSV report", - "description": "Gottcha2 TSV report for Gp0115666", - "url": "https://data.microbiomedata.org/data/nmdc:mga0eehe16/ReadbasedAnalysis/nmdc_mga0eehe16_gottcha2_report.tsv", - "md5_checksum": "17454627f873cc37e80700c4751c81d6", - "id": "nmdc:17454627f873cc37e80700c4751c81d6", - "file_size_bytes": 10721 - }, - { - "name": "Gp0115666_Gottcha2 full TSV report", - "description": "Gottcha2 full TSV report for Gp0115666", - "url": "https://data.microbiomedata.org/data/nmdc:mga0eehe16/ReadbasedAnalysis/nmdc_mga0eehe16_gottcha2_report_full.tsv", - "md5_checksum": "e0479eb7fd3345aaf134640e0b9e11b0", - "id": "nmdc:e0479eb7fd3345aaf134640e0b9e11b0", - "file_size_bytes": 920924 - }, - { - "name": "Gp0115666_Gottcha2 Krona HTML report", - "description": "Gottcha2 Krona HTML report for Gp0115666", - "data_object_type": "GOTTCHA2 Krona Plot", - "url": "https://data.microbiomedata.org/data/nmdc:mga0eehe16/ReadbasedAnalysis/nmdc_mga0eehe16_gottcha2_krona.html", - "md5_checksum": "a8433a0b17d7380fc836e4c9f85a7a54", - "id": "nmdc:a8433a0b17d7380fc836e4c9f85a7a54", - "file_size_bytes": 257441 - }, - { - "name": "Gp0115666_Centrifuge classification TSV report", - "description": "Centrifuge classification TSV report for Gp0115666", - "data_object_type": "Centrifuge Taxonomic Classification", - "url": "https://data.microbiomedata.org/data/nmdc:mga0eehe16/ReadbasedAnalysis/nmdc_mga0eehe16_centrifuge_classification.tsv", - "md5_checksum": "9e061ad19d4a6a3f209d1992d02df9f9", - "id": "nmdc:9e061ad19d4a6a3f209d1992d02df9f9", - "file_size_bytes": 1468295025 - }, - { - "name": "Gp0115666_Centrifuge TSV report", - "description": "Centrifuge TSV report for Gp0115666", - "data_object_type": "Centrifuge Classification Report", - "url": "https://data.microbiomedata.org/data/nmdc:mga0eehe16/ReadbasedAnalysis/nmdc_mga0eehe16_centrifuge_report.tsv", - "md5_checksum": "1d46eebd0f194f57dd9e92c9bc992891", - "id": "nmdc:1d46eebd0f194f57dd9e92c9bc992891", - "file_size_bytes": 257081 - }, - { - "name": "Gp0115666_Centrifuge Krona HTML report", - "description": "Centrifuge Krona HTML report for Gp0115666", - "data_object_type": "Centrifuge Krona Plot", - "url": "https://data.microbiomedata.org/data/nmdc:mga0eehe16/ReadbasedAnalysis/nmdc_mga0eehe16_centrifuge_krona.html", - "md5_checksum": "e5227b1cfdbc266c44d23028c92150a9", - "id": "nmdc:e5227b1cfdbc266c44d23028c92150a9", - "file_size_bytes": 2331968 - }, - { - "name": "Gp0115666_Kraken classification TSV report", - "description": "Kraken classification TSV report for Gp0115666", - "data_object_type": "Kraken2 Taxonomic Classification", - "url": "https://data.microbiomedata.org/data/nmdc:mga0eehe16/ReadbasedAnalysis/nmdc_mga0eehe16_kraken2_classification.tsv", - "md5_checksum": "05f7680c6646904cfb16fc146c0fed4a", - "id": "nmdc:05f7680c6646904cfb16fc146c0fed4a", - "file_size_bytes": 1204548180 - }, - { - "name": "Gp0115666_Kraken2 TSV report", - "description": "Kraken2 TSV report for Gp0115666", - "data_object_type": "Kraken2 Classification Report", - "url": "https://data.microbiomedata.org/data/nmdc:mga0eehe16/ReadbasedAnalysis/nmdc_mga0eehe16_kraken2_report.tsv", - "md5_checksum": "368cf81424348cdf46d17c13908280e7", - "id": "nmdc:368cf81424348cdf46d17c13908280e7", - "file_size_bytes": 653697 - }, - { - "name": "Gp0115666_Kraken2 Krona HTML report", - "description": "Kraken2 Krona HTML report for Gp0115666", - "data_object_type": "Kraken2 Krona Plot", - "url": "https://data.microbiomedata.org/data/nmdc:mga0eehe16/ReadbasedAnalysis/nmdc_mga0eehe16_kraken2_krona.html", - "md5_checksum": "b5091cfeed4fbea8316e50fbceea89bc", - "id": "nmdc:b5091cfeed4fbea8316e50fbceea89bc", - "file_size_bytes": 3983935 - }, - { - "name": "Gp0115666_Assembled contigs fasta", - "description": "Assembled contigs fasta for Gp0115666", - "data_object_type": "Assembly Contigs", - "url": "https://data.microbiomedata.org/data/nmdc:mga0eehe16/assembly/nmdc_mga0eehe16_contigs.fna", - "md5_checksum": "e557facdf4c3066ba4b5ba168995ba85", - "id": "nmdc:e557facdf4c3066ba4b5ba168995ba85", - "file_size_bytes": 63269472 - }, - { - "name": "Gp0115666_Assembled scaffold fasta", - "description": "Assembled scaffold fasta for Gp0115666", - "data_object_type": "Assembly Scaffolds", - "url": "https://data.microbiomedata.org/data/nmdc:mga0eehe16/assembly/nmdc_mga0eehe16_scaffolds.fna", - "md5_checksum": "92cc678ca9e54cb92118b9ae746fb996", - "id": "nmdc:92cc678ca9e54cb92118b9ae746fb996", - "file_size_bytes": 62917914 - }, - { - "name": "Gp0115666_Metagenome Contig Coverage Stats", - "description": "Metagenome Contig Coverage Stats for Gp0115666", - "url": "https://data.microbiomedata.org/data/nmdc:mga0eehe16/assembly/nmdc_mga0eehe16_covstats.txt", - "md5_checksum": "7082b41c627571a03466f94ba80c15b8", - "id": "nmdc:7082b41c627571a03466f94ba80c15b8", - "file_size_bytes": 9179769 - }, - { - "name": "Gp0115666_Assembled AGP file", - "description": "Assembled AGP file for Gp0115666", - "data_object_type": "Assembly AGP", - "url": "https://data.microbiomedata.org/data/nmdc:mga0eehe16/assembly/nmdc_mga0eehe16_assembly.agp", - "md5_checksum": "c5ccd39d97d652d5ec8804202a324b0e", - "id": "nmdc:c5ccd39d97d652d5ec8804202a324b0e", - "file_size_bytes": 8550216 - }, - { - "name": "Gp0115666_Metagenome Alignment BAM file", - "description": "Metagenome Alignment BAM file for Gp0115666", - "data_object_type": "Assembly Coverage BAM", - "url": "https://data.microbiomedata.org/data/nmdc:mga0eehe16/assembly/nmdc_mga0eehe16_pairedMapped_sorted.bam", - "md5_checksum": "3ece2c377622cebdddfb9322047cb115", - "id": "nmdc:3ece2c377622cebdddfb9322047cb115", - "file_size_bytes": 1940309089 - }, - { - "name": "Gp0115666_Protein FAA", - "description": "Protein FAA for Gp0115666", - "data_object_type": "Annotation Amino Acid FASTA", - "url": "https://data.microbiomedata.org/data/nmdc:mga0eehe16/annotation/nmdc_mga0eehe16_proteins.faa", - "md5_checksum": "4d509c29cad07f0b18d3f7e0e724c493", - "id": "nmdc:4d509c29cad07f0b18d3f7e0e724c493", - "file_size_bytes": 35706777 - }, - { - "name": "Gp0115666_Structural annotation GFF file", - "description": "Structural annotation GFF file for Gp0115666", - "data_object_type": "Structural Annotation GFF", - "url": "https://data.microbiomedata.org/data/nmdc:mga0eehe16/annotation/nmdc_mga0eehe16_structural_annotation.gff", - "md5_checksum": "60d04bb0a2d1a1d593bd849a2a13e405", - "id": "nmdc:60d04bb0a2d1a1d593bd849a2a13e405", - "file_size_bytes": 2520 - }, - { - "name": "Gp0115666_Functional annotation GFF file", - "description": "Functional annotation GFF file for Gp0115666", - "data_object_type": "Functional Annotation GFF", - "url": "https://data.microbiomedata.org/data/nmdc:mga0eehe16/annotation/nmdc_mga0eehe16_functional_annotation.gff", - "md5_checksum": "91cd273ea95a29b2c4e326c56eafe08a", - "id": "nmdc:91cd273ea95a29b2c4e326c56eafe08a", - "file_size_bytes": 40030386 - }, - { - "name": "Gp0115666_KO TSV file", - "description": "KO TSV file for Gp0115666", - "data_object_type": "Annotation KEGG Orthology", - "url": "https://data.microbiomedata.org/data/nmdc:mga0eehe16/annotation/nmdc_mga0eehe16_ko.tsv", - "md5_checksum": "e08c6253ec5a15eb43d8cb4d69d09d4c", - "id": "nmdc:e08c6253ec5a15eb43d8cb4d69d09d4c", - "file_size_bytes": 5584125 - }, - { - "name": "Gp0115666_EC TSV file", - "description": "EC TSV file for Gp0115666", - "data_object_type": "Annotation Enzyme Commission", - "url": "https://data.microbiomedata.org/data/nmdc:mga0eehe16/annotation/nmdc_mga0eehe16_ec.tsv", - "md5_checksum": "9edfc4fee191b722148af1e2648f787f", - "id": "nmdc:9edfc4fee191b722148af1e2648f787f", - "file_size_bytes": 3575242 - }, - { - "name": "Gp0115666_COG GFF file", - "description": "COG GFF file for Gp0115666", - "url": "https://data.microbiomedata.org/data/nmdc:mga0eehe16/annotation/nmdc_mga0eehe16_cog.gff", - "md5_checksum": "886402044865256b80bfaf42ca148a61", - "id": "nmdc:886402044865256b80bfaf42ca148a61", - "file_size_bytes": 23390091 - }, - { - "name": "Gp0115666_PFAM GFF file", - "description": "PFAM GFF file for Gp0115666", - "url": "https://data.microbiomedata.org/data/nmdc:mga0eehe16/annotation/nmdc_mga0eehe16_pfam.gff", - "md5_checksum": "1b2bc9b96a15ebdfe3ff1e30027544af", - "id": "nmdc:1b2bc9b96a15ebdfe3ff1e30027544af", - "file_size_bytes": 18444613 - }, - { - "name": "Gp0115666_TigrFam GFF file", - "description": "TigrFam GFF file for Gp0115666", - "url": "https://data.microbiomedata.org/data/nmdc:mga0eehe16/annotation/nmdc_mga0eehe16_tigrfam.gff", - "md5_checksum": "2d730834b8841b7a7ad30786bff382fa", - "id": "nmdc:2d730834b8841b7a7ad30786bff382fa", - "file_size_bytes": 2596225 - }, - { - "name": "Gp0115666_SMART GFF file", - "description": "SMART GFF file for Gp0115666", - "url": "https://data.microbiomedata.org/data/nmdc:mga0eehe16/annotation/nmdc_mga0eehe16_smart.gff", - "md5_checksum": "46d62d69e48d7aeecb87106e02102753", - "id": "nmdc:46d62d69e48d7aeecb87106e02102753", - "file_size_bytes": 4932262 - }, - { - "name": "Gp0115666_SuperFam GFF file", - "description": "SuperFam GFF file for Gp0115666", - "url": "https://data.microbiomedata.org/data/nmdc:mga0eehe16/annotation/nmdc_mga0eehe16_supfam.gff", - "md5_checksum": "1896e41000aa9e4acc98cc7702e42304", - "id": "nmdc:1896e41000aa9e4acc98cc7702e42304", - "file_size_bytes": 28911479 - }, - { - "name": "Gp0115666_Cath FunFam GFF file", - "description": "Cath FunFam GFF file for Gp0115666", - "url": "https://data.microbiomedata.org/data/nmdc:mga0eehe16/annotation/nmdc_mga0eehe16_cath_funfam.gff", - "md5_checksum": "f40bfd77fb3f24be2529fdafc01104c7", - "id": "nmdc:f40bfd77fb3f24be2529fdafc01104c7", - "file_size_bytes": 22881869 - }, - { - "name": "Gp0115666_KO_EC GFF file", - "description": "KO_EC GFF file for Gp0115666", - "url": "https://data.microbiomedata.org/data/nmdc:mga0eehe16/annotation/nmdc_mga0eehe16_ko_ec.gff", - "md5_checksum": "920be8f090654360619fbb16163b8513", - "id": "nmdc:920be8f090654360619fbb16163b8513", - "file_size_bytes": 17844749 - }, - { - "name": "gold:Gp0452679_metabat2 bin checkm quality assessment result", - "description": "metabat2 bin checkm quality assessment result for gold:Gp0452679", - "data_object_type": "CheckM Statistics", - "url": "https://data.microbiomedata.org/data/nmdc:mga0fsjy84/MAGs/nmdc_mga0fsjy84_checkm_qa.out", - "md5_checksum": "d41d8cd98f00b204e9800998ecf8427e", - "id": "nmdc:d41d8cd98f00b204e9800998ecf8427e", - "file_size_bytes": 0 - }, - { - "name": "Gp0115666_tooShort (< 3kb) filtered contigs fasta file by metaBat2", - "description": "tooShort (< 3kb) filtered contigs fasta file by metaBat2 for Gp0115666", - "url": "https://data.microbiomedata.org/data/nmdc:mga0eehe16/MAGs/nmdc_mga0eehe16_bins.tooShort.fa", - "md5_checksum": "9944a9020ce981a2423ca81424998e66", - "id": "nmdc:9944a9020ce981a2423ca81424998e66", - "file_size_bytes": 46766610 - }, - { - "name": "Gp0115666_unbinned fasta file from metabat2", - "description": "unbinned fasta file from metabat2 for Gp0115666", - "url": "https://data.microbiomedata.org/data/nmdc:mga0eehe16/MAGs/nmdc_mga0eehe16_bins.unbinned.fa", - "md5_checksum": "d2a24728b9006fd4fb4bf4f326138dc2", - "id": "nmdc:d2a24728b9006fd4fb4bf4f326138dc2", - "file_size_bytes": 11382048 - }, - { - "name": "Gp0115666_metabat2 bin checkm quality assessment result", - "description": "metabat2 bin checkm quality assessment result for Gp0115666", - "data_object_type": "CheckM Statistics", - "url": "https://data.microbiomedata.org/data/nmdc:mga0eehe16/MAGs/nmdc_mga0eehe16_checkm_qa.out", - "md5_checksum": "415dfed655f9c4673f2cce4f9947c2e4", - "id": "nmdc:415dfed655f9c4673f2cce4f9947c2e4", - "file_size_bytes": 1020 - }, - { - "name": "Gp0115666_high-quality and medium-quality bins", - "description": "high-quality and medium-quality bins for Gp0115666", - "data_object_type": "Metagenome Bins", - "url": "https://data.microbiomedata.org/data/nmdc:mga0eehe16/MAGs/nmdc_mga0eehe16_hqmq_bin.zip", - "md5_checksum": "be6482b534716166ce5daea5a07cba06", - "id": "nmdc:be6482b534716166ce5daea5a07cba06", - "file_size_bytes": 182 - }, - { - "name": "Gp0115666_metabat2 bins", - "description": "metabat2 bins for Gp0115666", - "url": "https://data.microbiomedata.org/data/nmdc:mga0eehe16/MAGs/nmdc_mga0eehe16_metabat_bin.zip", - "md5_checksum": "df08913532a84681996a29d1a1c127b3", - "id": "nmdc:df08913532a84681996a29d1a1c127b3", - "file_size_bytes": 1559491 - }, - { - "_id": { - "$oid": "649b003c1ae706d7b5b14d6c" - }, - "description": "Assembled contigs fasta for gold:Gp0115666", - "url": "https://data.microbiomedata.org/data/1781_86096/assembly/assembly_contigs.fna", - "file_size_bytes": 62686167, - "type": "nmdc:DataObject", - "id": "nmdc:aa60f90793266081a0ba6d125fb06e55", - "name": "assembly_contigs.fna", - "data_object_type": "Assembly Contigs" - }, - { - "_id": { - "$oid": "649b003c1ae706d7b5b14d6d" - }, - "description": "Metagenome Contig Coverage Stats for gold:Gp0115666", - "url": "https://data.microbiomedata.org/data/1781_86096/assembly/mapping_stats.txt", - "file_size_bytes": 8596464, - "type": "nmdc:DataObject", - "id": "nmdc:0157a89bc2e56ad99bae7289b9df7fb6", - "name": "mapping_stats.txt", - "data_object_type": "Assembly Coverage Stats" - }, - { - "_id": { - "$oid": "649b003c1ae706d7b5b14d6e" - }, - "description": "Assembled scaffold fasta for gold:Gp0115666", - "url": "https://data.microbiomedata.org/data/1781_86096/assembly/assembly_scaffolds.fna", - "file_size_bytes": 62335089, - "type": "nmdc:DataObject", - "id": "nmdc:2dd25f896d7b6100a24987d1496e2646", - "name": "assembly_scaffolds.fna", - "data_object_type": "Assembly Scaffolds" - }, - { - "_id": { - "$oid": "649b003c1ae706d7b5b14d6f" - }, - "description": "Assembled AGP file for gold:Gp0115666", - "url": "https://data.microbiomedata.org/data/1781_86096/assembly/assembly.agp", - "file_size_bytes": 7382646, - "type": "nmdc:DataObject", - "id": "nmdc:c29f2a80b289985b57570884a5c92548", - "name": "assembly.agp", - "data_object_type": "Assembly AGP" - }, - { - "_id": { - "$oid": "649b003c1ae706d7b5b14d73" - }, - "description": "Metagenome Alignment BAM file for gold:Gp0115666", - "url": "https://data.microbiomedata.org/data/1781_86096/assembly/pairedMapped_sorted.bam", - "file_size_bytes": 1914805023, - "type": "nmdc:DataObject", - "id": "nmdc:1abeac4b3490b9baf8206f8df963a646", - "name": "pairedMapped_sorted.bam", - "data_object_type": "Assembly Coverage BAM" - }, - { - "_id": { - "$oid": "649b003d1ae706d7b5b159a5" - }, - "id": "nmdc:2e76bec484e1a06b2f84b1c230cd97b4", - "name": "1781_86096.json", - "description": "Gold:Gp0115666 ReadbasedAnalysis result JSON file", - "url": "https://data.microbiomedata.org/1781_86096/ReadbasedAnalysis/1781_86096.json", - "file_size_bytes": 3385, - "type": "nmdc:DataObject" - }, - { - "_id": { - "$oid": "649b003d1ae706d7b5b159a9" - }, - "id": "nmdc:d5ab2504c9505d6cb96b348b71f2efc5", - "name": "1781_86096.krona.html", - "description": "Gold:Gp0115666 KRONA plot HTML file", - "url": "https://data.microbiomedata.org/1781_86096/ReadbasedAnalysis/centrifuge/1781_86096.krona.html", - "file_size_bytes": 3385, - "data_object_type": "Centrifuge Krona Plot", - "type": "nmdc:DataObject" - }, - { - "_id": { - "$oid": "649b003f1ae706d7b5b16267" - }, - "id": "nmdc:5b80fde8feaaab94a1774e7d61863048", - "name": "bins.tooShort.fa", - "description": "tooShort (< 3kb) filtered contigs fasta file by metaBat2 for gold:Gp0115666", - "file_size_bytes": 45445156, - "url": "https://data.microbiomedata.org/data/1781_86096/img_MAGs/bins.tooShort.fa", - "type": "nmdc:DataObject" - }, - { - "_id": { - "$oid": "649b003f1ae706d7b5b1626b" - }, - "id": "nmdc:5c7b37ac5d11fd3ffcbcc63b2e15f627", - "name": "checkm_qa.out", - "description": "metabat2 bin checkm quality assessment result for gold:Gp0115666", - "file_size_bytes": 1190, - "url": "https://data.microbiomedata.org/data/1781_86096/img_MAGs/checkm_qa.out", - "type": "nmdc:DataObject", - "data_object_type": "CheckM Statistics" - }, - { - "_id": { - "$oid": "649b003f1ae706d7b5b1626d" - }, - "id": "nmdc:a4fab93f1102baf069e09b65cb13e87a", - "name": "gold:Gp0115666.bins.3.fa", - "description": "metabat2 binned contig file for gold:Gp0115666", - "file_size_bytes": 3971570, - "url": "https://data.microbiomedata.org/data/1781_86096/img_MAGs/metabat-bins/bins.3.fa", - "type": "nmdc:DataObject", - "data_object_type": "Metagenome Bins" - }, - { - "_id": { - "$oid": "649b003f1ae706d7b5b1626e" - }, - "id": "nmdc:88ef6e640707d816e9df8d751c31e71b", - "name": "gold:Gp0115666.bins.2.fa", - "description": "metabat2 binned contig file for gold:Gp0115666", - "file_size_bytes": 559859, - "url": "https://data.microbiomedata.org/data/1781_86096/img_MAGs/metabat-bins/bins.2.fa", - "type": "nmdc:DataObject", - "data_object_type": "Metagenome Bins" - }, - { - "_id": { - "$oid": "649b003f1ae706d7b5b16271" - }, - "id": "nmdc:ce2c968f1093b25da9ac4399291eede6", - "name": "bins.unbinned.fa", - "description": "unbinned fasta file from metabat2 for gold:Gp0115666", - "file_size_bytes": 11541386, - "url": "https://data.microbiomedata.org/data/1781_86096/img_MAGs/bins.unbinned.fa", - "type": "nmdc:DataObject" - }, - { - "_id": { - "$oid": "649b003f1ae706d7b5b16281" - }, - "id": "nmdc:ffdbeb92d859d6b7e828f3d6f8219e0b", - "name": "gold:Gp0115666.bins.1.fa", - "description": "metabat2 binned contig file for gold:Gp0115666", - "file_size_bytes": 346195, - "url": "https://data.microbiomedata.org/data/1781_86096/img_MAGs/metabat-bins/bins.1.fa", - "type": "nmdc:DataObject", - "data_object_type": "Metagenome Bins" - }, - { - "_id": { - "$oid": "649b00401ae706d7b5b16d7d" - }, - "description": "Functional annotation GFF file for gold:Gp0115666", - "url": "https://data.microbiomedata.org/1781_86096/img_annotation/Ga0482261_functional_annotation.gff", - "md5_checksum": "a1e8795537eca0522357d60045780ab3", - "file_size_bytes": 3385, - "id": "nmdc:a1e8795537eca0522357d60045780ab3", - "name": "gold:Gp0115666_Functional annotation GFF file", - "data_object_type": "Functional Annotation GFF", - "type": "nmdc:DataObject" - }, - { - "_id": { - "$oid": "649b00401ae706d7b5b16d81" - }, - "description": "Structural annotation GFF file for gold:Gp0115666", - "url": "https://data.microbiomedata.org/1781_86096/img_annotation/Ga0482261_structural_annotation.gff", - "md5_checksum": "654201c4699079bdd923dcff52881c07", - "file_size_bytes": 3385, - "id": "nmdc:654201c4699079bdd923dcff52881c07", - "name": "gold:Gp0115666_Structural annotation GFF file", - "data_object_type": "Structural Annotation GFF", - "type": "nmdc:DataObject" - }, - { - "_id": { - "$oid": "649b00401ae706d7b5b16d84" - }, - "description": "EC TSV File for gold:Gp0115666", - "url": "https://data.microbiomedata.org/1781_86096/img_annotation/Ga0482261_ec.tsv", - "md5_checksum": "4e3f389524497182aa3e8832aa7b373b", - "file_size_bytes": 3385, - "id": "nmdc:4e3f389524497182aa3e8832aa7b373b", - "name": "gold:Gp0115666_EC TSV File", - "data_object_type": "Annotation Enzyme Commission", - "type": "nmdc:DataObject" - }, - { - "_id": { - "$oid": "649b00401ae706d7b5b16d8a" - }, - "description": "KO TSV File for gold:Gp0115666", - "url": "https://data.microbiomedata.org/1781_86096/img_annotation/Ga0482261_ko.tsv", - "md5_checksum": "ab262feeaf856be190b60ea7c0a4c030", - "file_size_bytes": 3385, - "id": "nmdc:ab262feeaf856be190b60ea7c0a4c030", - "name": "gold:Gp0115666_KO TSV File", - "data_object_type": "Annotation KEGG Orthology", - "type": "nmdc:DataObject" - }, - { - "_id": { - "$oid": "649b00401ae706d7b5b16d8d" - }, - "description": "Protein FAA for gold:Gp0115666", - "url": "https://data.microbiomedata.org/1781_86096/img_annotation/Ga0482261_proteins.faa", - "md5_checksum": "70c8e0fc6e64b20e99a4c0f783014142", - "file_size_bytes": 3385, - "id": "nmdc:70c8e0fc6e64b20e99a4c0f783014142", - "name": "gold:Gp0115666_Protein FAA", - "data_object_type": "Annotation Amino Acid FASTA", - "type": "nmdc:DataObject" - } - ], - "dissolving_activity_set": [], - "functional_annotation_set": [], - "genome_feature_set": [], - "mags_activity_set": [ - { - "_id": { - "$oid": "649b0052ec087f6bbab34730" - }, - "has_input": [ - "nmdc:e557facdf4c3066ba4b5ba168995ba85", - "nmdc:3ece2c377622cebdddfb9322047cb115", - "nmdc:91cd273ea95a29b2c4e326c56eafe08a" - ], - "too_short_contig_num": 108937, - "part_of": [ - "nmdc:mga0eehe16" - ], - "binned_contig_num": 899, - "git_url": "https://github.com/microbiomedata/metaG/releases/tag/0.1", - "has_output": [ - "nmdc:d41d8cd98f00b204e9800998ecf8427e", - "nmdc:9944a9020ce981a2423ca81424998e66", - "nmdc:d2a24728b9006fd4fb4bf4f326138dc2", - "nmdc:415dfed655f9c4673f2cce4f9947c2e4", - "nmdc:be6482b534716166ce5daea5a07cba06", - "nmdc:df08913532a84681996a29d1a1c127b3" - ], - "was_informed_by": "gold:Gp0115666", - "input_contig_num": 116661, - "id": "nmdc:db181675b157d27d9b0b2f35b5cbf03e", - "execution_resource": "NERSC-Cori", - "name": "MAGs Analysis Activity for nmdc:mga0eehe16", - "mags_list": [ - { - "number_of_contig": 216, - "completeness": 41.57, - "bin_name": "bins.1", - "gene_count": 1176, - "bin_quality": "LQ", - "gtdbtk_species": "", - "gtdbtk_order": "", - "num_16s": 0, - "gtdbtk_family": "", - "gtdbtk_domain": "", - "contamination": 0.93, - "gtdbtk_class": "", - "gtdbtk_phylum": "", - "num_5s": 0, - "num_23s": 0, - "gtdbtk_genus": "", - "num_t_rna": 11 - }, - { - "number_of_contig": 683, - "completeness": 87.59, - "bin_name": "bins.2", - "gene_count": 4526, - "bin_quality": "LQ", - "gtdbtk_species": "", - "gtdbtk_order": "", - "num_16s": 1, - "gtdbtk_family": "", - "gtdbtk_domain": "", - "contamination": 33.23, - "gtdbtk_class": "", - "gtdbtk_phylum": "", - "num_5s": 0, - "num_23s": 0, - "gtdbtk_genus": "", - "num_t_rna": 56 - } - ], - "unbinned_contig_num": 6825, - "started_at_time": "2021-10-11T02:28:09Z", - "type": "nmdc:MAGsAnalysisActivity", - "ended_at_time": "2021-10-11T04:06:19+00:00" - } - ], - "material_sample_set": [], - "material_sampling_activity_set": [], - "metabolomics_analysis_activity_set": [], - "metagenome_annotation_activity_set": [ - { - "_id": { - "$oid": "649b005bbf2caae0415ef9ce" - }, - "has_input": [ - "nmdc:e557facdf4c3066ba4b5ba168995ba85" - ], - "part_of": [ - "nmdc:mga0eehe16" - ], - "git_url": "https://github.com/microbiomedata/metaG/releases/tag/0.1", - "has_output": [ - "nmdc:4d509c29cad07f0b18d3f7e0e724c493", - "nmdc:60d04bb0a2d1a1d593bd849a2a13e405", - "nmdc:91cd273ea95a29b2c4e326c56eafe08a", - "nmdc:e08c6253ec5a15eb43d8cb4d69d09d4c", - "nmdc:9edfc4fee191b722148af1e2648f787f", - "nmdc:886402044865256b80bfaf42ca148a61", - "nmdc:1b2bc9b96a15ebdfe3ff1e30027544af", - "nmdc:2d730834b8841b7a7ad30786bff382fa", - "nmdc:46d62d69e48d7aeecb87106e02102753", - "nmdc:1896e41000aa9e4acc98cc7702e42304", - "nmdc:f40bfd77fb3f24be2529fdafc01104c7", - "nmdc:920be8f090654360619fbb16163b8513" - ], - "was_informed_by": "gold:Gp0115666", - "id": "nmdc:db181675b157d27d9b0b2f35b5cbf03e", - "execution_resource": "NERSC-Cori", - "name": "Annotation Activity for nmdc:mga0eehe16", - "started_at_time": "2021-10-11T02:28:09Z", - "type": "nmdc:MetagenomeAnnotationActivity", - "ended_at_time": "2021-10-11T04:06:19+00:00" - } - ], - "metagenome_assembly_set": [ - { - "_id": { - "$oid": "649b005f2ca5ee4adb139fbb" - }, - "has_input": [ - "nmdc:0b301d2dd917c2be31422dd0e986dd5e" - ], - "part_of": [ - "nmdc:mga0eehe16" - ], - "ctg_logsum": 181484, - "scaf_logsum": 182081, - "gap_pct": 0.00163, - "git_url": "https://github.com/microbiomedata/metaG/releases/tag/0.1", - "has_output": [ - "nmdc:e557facdf4c3066ba4b5ba168995ba85", - "nmdc:92cc678ca9e54cb92118b9ae746fb996", - "nmdc:7082b41c627571a03466f94ba80c15b8", - "nmdc:c5ccd39d97d652d5ec8804202a324b0e", - "nmdc:3ece2c377622cebdddfb9322047cb115" - ], - "asm_score": 5.224, - "was_informed_by": "gold:Gp0115666", - "ctg_powsum": 20653, - "scaf_max": 25973, - "id": "nmdc:db181675b157d27d9b0b2f35b5cbf03e", - "scaf_powsum": 20721, - "execution_resource": "NERSC-Cori", - "contigs": 116661, - "name": "Assembly Activity for nmdc:mga0eehe16", - "ctg_max": 25973, - "gc_std": 0.10759, - "contig_bp": 58735100, - "gc_avg": 0.57262, - "started_at_time": "2021-10-11T02:28:09Z", - "scaf_bp": 58736060, - "type": "nmdc:MetagenomeAssembly", - "scaffolds": 116565, - "ended_at_time": "2021-10-11T04:06:19+00:00", - "ctg_l50": 493, - "ctg_l90": 286, - "ctg_n50": 27791, - "ctg_n90": 95962, - "scaf_l50": 493, - "scaf_l90": 286, - "scaf_n50": 27775, - "scaf_n90": 95875 - } - ], - "metagenome_sequencing_activity_set": [], - "metaproteomics_analysis_activity_set": [], - "metatranscriptome_activity_set": [], - "nom_analysis_activity_set": [], - "omics_processing_set": [ - { - "_id": { - "$oid": "649b009773e8249959349b34" - }, - "id": "nmdc:omprc-11-zp2ar437", - "name": "Sand microcosm microbial communities from a hyporheic zone in Columbia River, Washington, USA - GW-RW T3_23-Sept-14", - "description": "Sterilized sand packs were incubated back in the ground and collected at time point T3.", - "has_input": [ - "nmdc:bsm-11-4qsqg549" - ], - "has_output": [ - "jgi:55d817fc0d8785342fcf8274" - ], - "part_of": [ - "nmdc:sty-11-aygzgv51" - ], - "add_date": "2015-05-28", - "mod_date": "2021-06-15", - "ncbi_project_name": "Sand microcosm microbial communities from a hyporheic zone in Columbia River, Washington, USA - GW-RW T3_23-Sept-14", - "omics_type": { - "has_raw_value": "Metagenome" - }, - "principal_investigator": { - "has_raw_value": "James Stegen" - }, - "processing_institution": "JGI", - "type": "nmdc:OmicsProcessing", - "gold_sequencing_project_identifiers": [ - "gold:Gp0115666" - ] - } - ], - "reaction_activity_set": [], - "read_qc_analysis_activity_set": [ - { - "_id": { - "$oid": "649b009d6bdd4fd20273c887" - }, - "has_input": [ - "nmdc:76893480c05758ad2977df78a5b050e5" - ], - "part_of": [ - "nmdc:mga0eehe16" - ], - "git_url": "https://github.com/microbiomedata/metaG/releases/tag/0.1", - "has_output": [ - "nmdc:0b301d2dd917c2be31422dd0e986dd5e", - "nmdc:0634e8261ce976d167457993d7f7a4ec" - ], - "was_informed_by": "gold:Gp0115666", - "input_read_count": 22183982, - "output_read_bases": 3025260554, - "id": "nmdc:db181675b157d27d9b0b2f35b5cbf03e", - "execution_resource": "NERSC-Cori", - "input_read_bases": 3349781282, - "name": "Read QC Activity for nmdc:mga0eehe16", - "output_read_count": 20195754, - "started_at_time": "2021-10-11T02:28:09Z", - "type": "nmdc:ReadQCAnalysisActivity", - "ended_at_time": "2021-10-11T04:06:19+00:00" - } - ], - "read_based_taxonomy_analysis_activity_set": [ - { - "_id": { - "$oid": "649b009bff710ae353f8cf55" - }, - "has_input": [ - "nmdc:0b301d2dd917c2be31422dd0e986dd5e" - ], - "git_url": "https://github.com/microbiomedata/metaG/releases/tag/0.1", - "has_output": [ - "nmdc:17454627f873cc37e80700c4751c81d6", - "nmdc:e0479eb7fd3345aaf134640e0b9e11b0", - "nmdc:a8433a0b17d7380fc836e4c9f85a7a54", - "nmdc:9e061ad19d4a6a3f209d1992d02df9f9", - "nmdc:1d46eebd0f194f57dd9e92c9bc992891", - "nmdc:e5227b1cfdbc266c44d23028c92150a9", - "nmdc:05f7680c6646904cfb16fc146c0fed4a", - "nmdc:368cf81424348cdf46d17c13908280e7", - "nmdc:b5091cfeed4fbea8316e50fbceea89bc" - ], - "was_informed_by": "gold:Gp0115666", - "id": "nmdc:db181675b157d27d9b0b2f35b5cbf03e", - "execution_resource": "NERSC-Cori", - "name": "ReadBased Analysis Activity for nmdc:mga0eehe16", - "started_at_time": "2021-10-11T02:28:09Z", - "type": "nmdc:ReadbasedAnalysis", - "ended_at_time": "2021-10-11T04:06:19+00:00" - } - ], - "study_set": [], - "field_research_site_set": [], - "collecting_biosamples_from_site_set": [], - "date_created": null, - "etl_software_version": null, - "pooling_set": [] - }, - { - "functional_annotation_agg": [], - "library_preparation_set": [], - "processed_sample_set": [], - "extraction_set": [], - "activity_set": [], - "biosample_set": [], - "data_object_set": [ - { - "description": "Raw sequencer read data", - "file_size_bytes": 3050291373, - "type": "nmdc:DataObject", - "id": "jgi:55d740240d8785342fcf7e37", - "name": "9422.8.132674.GTGGCC.fastq.gz" - }, - { - "name": "Gp0115668_Filtered Reads", - "description": "Filtered Reads for Gp0115668", - "data_object_type": "Filtered Sequencing Reads", - "url": "https://data.microbiomedata.org/data/nmdc:mga0n66h21/qa/nmdc_mga0n66h21_filtered.fastq.gz", - "md5_checksum": "121b1c25e803f2a010ae5a2206a8d1d2", - "id": "nmdc:121b1c25e803f2a010ae5a2206a8d1d2", - "file_size_bytes": 2665008319 - }, - { - "name": "Gp0115668_Filtered Stats", - "description": "Filtered Stats for Gp0115668", - "data_object_type": "QC Statistics", - "url": "https://data.microbiomedata.org/data/nmdc:mga0n66h21/qa/nmdc_mga0n66h21_filterStats.txt", - "md5_checksum": "63fb5949ebafd1846ba60f2ce033191c", - "id": "nmdc:63fb5949ebafd1846ba60f2ce033191c", - "file_size_bytes": 289 - }, - { - "name": "Gp0115668_Gottcha2 TSV report", - "description": "Gottcha2 TSV report for Gp0115668", - "url": "https://data.microbiomedata.org/data/nmdc:mga0n66h21/ReadbasedAnalysis/nmdc_mga0n66h21_gottcha2_report.tsv", - "md5_checksum": "8bdf8bbee24242aaaee763c1d851c05e", - "id": "nmdc:8bdf8bbee24242aaaee763c1d851c05e", - "file_size_bytes": 13875 - }, - { - "name": "Gp0115668_Gottcha2 full TSV report", - "description": "Gottcha2 full TSV report for Gp0115668", - "url": "https://data.microbiomedata.org/data/nmdc:mga0n66h21/ReadbasedAnalysis/nmdc_mga0n66h21_gottcha2_report_full.tsv", - "md5_checksum": "2529ede10eb159148711d016ec022af3", - "id": "nmdc:2529ede10eb159148711d016ec022af3", - "file_size_bytes": 956974 - }, - { - "name": "Gp0115668_Gottcha2 Krona HTML report", - "description": "Gottcha2 Krona HTML report for Gp0115668", - "data_object_type": "GOTTCHA2 Krona Plot", - "url": "https://data.microbiomedata.org/data/nmdc:mga0n66h21/ReadbasedAnalysis/nmdc_mga0n66h21_gottcha2_krona.html", - "md5_checksum": "a0631ed87dc2e7c69355ef575dbe4e60", - "id": "nmdc:a0631ed87dc2e7c69355ef575dbe4e60", - "file_size_bytes": 265076 - }, - { - "name": "Gp0115668_Centrifuge classification TSV report", - "description": "Centrifuge classification TSV report for Gp0115668", - "data_object_type": "Centrifuge Taxonomic Classification", - "url": "https://data.microbiomedata.org/data/nmdc:mga0n66h21/ReadbasedAnalysis/nmdc_mga0n66h21_centrifuge_classification.tsv", - "md5_checksum": "93d26b69073bd4d6283aee3c7e5997d4", - "id": "nmdc:93d26b69073bd4d6283aee3c7e5997d4", - "file_size_bytes": 2377445510 - }, - { - "name": "Gp0115668_Centrifuge TSV report", - "description": "Centrifuge TSV report for Gp0115668", - "data_object_type": "Centrifuge Classification Report", - "url": "https://data.microbiomedata.org/data/nmdc:mga0n66h21/ReadbasedAnalysis/nmdc_mga0n66h21_centrifuge_report.tsv", - "md5_checksum": "d7a49bf0d9797a2b603643a2de896b5c", - "id": "nmdc:d7a49bf0d9797a2b603643a2de896b5c", - "file_size_bytes": 258291 - }, - { - "name": "Gp0115668_Centrifuge Krona HTML report", - "description": "Centrifuge Krona HTML report for Gp0115668", - "data_object_type": "Centrifuge Krona Plot", - "url": "https://data.microbiomedata.org/data/nmdc:mga0n66h21/ReadbasedAnalysis/nmdc_mga0n66h21_centrifuge_krona.html", - "md5_checksum": "890f9f52d828e1ea8277b52566763069", - "id": "nmdc:890f9f52d828e1ea8277b52566763069", - "file_size_bytes": 2333775 - }, - { - "name": "Gp0115668_Kraken classification TSV report", - "description": "Kraken classification TSV report for Gp0115668", - "data_object_type": "Kraken2 Taxonomic Classification", - "url": "https://data.microbiomedata.org/data/nmdc:mga0n66h21/ReadbasedAnalysis/nmdc_mga0n66h21_kraken2_classification.tsv", - "md5_checksum": "371b7fabbcbc2d22c3ca84b422a88863", - "id": "nmdc:371b7fabbcbc2d22c3ca84b422a88863", - "file_size_bytes": 1966520263 - }, - { - "name": "Gp0115668_Kraken2 TSV report", - "description": "Kraken2 TSV report for Gp0115668", - "data_object_type": "Kraken2 Classification Report", - "url": "https://data.microbiomedata.org/data/nmdc:mga0n66h21/ReadbasedAnalysis/nmdc_mga0n66h21_kraken2_report.tsv", - "md5_checksum": "8677985c5e8ad92dd6d051f85950a636", - "id": "nmdc:8677985c5e8ad92dd6d051f85950a636", - "file_size_bytes": 707661 - }, - { - "name": "Gp0115668_Kraken2 Krona HTML report", - "description": "Kraken2 Krona HTML report for Gp0115668", - "data_object_type": "Kraken2 Krona Plot", - "url": "https://data.microbiomedata.org/data/nmdc:mga0n66h21/ReadbasedAnalysis/nmdc_mga0n66h21_kraken2_krona.html", - "md5_checksum": "9b2f355a4c2ff3651a3d1179212e2914", - "id": "nmdc:9b2f355a4c2ff3651a3d1179212e2914", - "file_size_bytes": 4276256 - }, - { - "name": "Gp0115668_Assembled contigs fasta", - "description": "Assembled contigs fasta for Gp0115668", - "data_object_type": "Assembly Contigs", - "url": "https://data.microbiomedata.org/data/nmdc:mga0n66h21/assembly/nmdc_mga0n66h21_contigs.fna", - "md5_checksum": "b2b862aede4f333acec79aac3afc7254", - "id": "nmdc:b2b862aede4f333acec79aac3afc7254", - "file_size_bytes": 182488593 - }, - { - "name": "Gp0115668_Assembled scaffold fasta", - "description": "Assembled scaffold fasta for Gp0115668", - "data_object_type": "Assembly Scaffolds", - "url": "https://data.microbiomedata.org/data/nmdc:mga0n66h21/assembly/nmdc_mga0n66h21_scaffolds.fna", - "md5_checksum": "15d4494dad1e12523aa9afb56b1e7cdb", - "id": "nmdc:15d4494dad1e12523aa9afb56b1e7cdb", - "file_size_bytes": 181514952 - }, - { - "name": "Gp0115668_Metagenome Contig Coverage Stats", - "description": "Metagenome Contig Coverage Stats for Gp0115668", - "url": "https://data.microbiomedata.org/data/nmdc:mga0n66h21/assembly/nmdc_mga0n66h21_covstats.txt", - "md5_checksum": "6ccb798d615b67dfb9c64ff32d6586c4", - "id": "nmdc:6ccb798d615b67dfb9c64ff32d6586c4", - "file_size_bytes": 25682298 - }, - { - "name": "Gp0115668_Assembled AGP file", - "description": "Assembled AGP file for Gp0115668", - "data_object_type": "Assembly AGP", - "url": "https://data.microbiomedata.org/data/nmdc:mga0n66h21/assembly/nmdc_mga0n66h21_assembly.agp", - "md5_checksum": "da27801a4e0ab450485f5a3aeb75a7d6", - "id": "nmdc:da27801a4e0ab450485f5a3aeb75a7d6", - "file_size_bytes": 24103161 - }, - { - "name": "Gp0115668_Metagenome Alignment BAM file", - "description": "Metagenome Alignment BAM file for Gp0115668", - "data_object_type": "Assembly Coverage BAM", - "url": "https://data.microbiomedata.org/data/nmdc:mga0n66h21/assembly/nmdc_mga0n66h21_pairedMapped_sorted.bam", - "md5_checksum": "f7a4bb0be4599b544360617190b45681", - "id": "nmdc:f7a4bb0be4599b544360617190b45681", - "file_size_bytes": 2958311801 - }, - { - "name": "Gp0115668_Protein FAA", - "description": "Protein FAA for Gp0115668", - "data_object_type": "Annotation Amino Acid FASTA", - "url": "https://data.microbiomedata.org/data/nmdc:mga0n66h21/annotation/nmdc_mga0n66h21_proteins.faa", - "md5_checksum": "5cb6273cd171d1ae5a8d77c8f131517f", - "id": "nmdc:5cb6273cd171d1ae5a8d77c8f131517f", - "file_size_bytes": 88016165 - }, - { - "name": "Gp0115668_Structural annotation GFF file", - "description": "Structural annotation GFF file for Gp0115668", - "data_object_type": "Structural Annotation GFF", - "url": "https://data.microbiomedata.org/data/nmdc:mga0n66h21/annotation/nmdc_mga0n66h21_structural_annotation.gff", - "md5_checksum": "d49149a48134c1091c001448cc91f8e2", - "id": "nmdc:d49149a48134c1091c001448cc91f8e2", - "file_size_bytes": 2527 - }, - { - "name": "Gp0115668_Functional annotation GFF file", - "description": "Functional annotation GFF file for Gp0115668", - "data_object_type": "Functional Annotation GFF", - "url": "https://data.microbiomedata.org/data/nmdc:mga0n66h21/annotation/nmdc_mga0n66h21_functional_annotation.gff", - "md5_checksum": "7a861805138d425525f298c1790b58ed", - "id": "nmdc:7a861805138d425525f298c1790b58ed", - "file_size_bytes": 91926507 - }, - { - "name": "Gp0115668_KO TSV file", - "description": "KO TSV file for Gp0115668", - "data_object_type": "Annotation KEGG Orthology", - "url": "https://data.microbiomedata.org/data/nmdc:mga0n66h21/annotation/nmdc_mga0n66h21_ko.tsv", - "md5_checksum": "0d0a80f2dafb68f4659709dd2ebd2f28", - "id": "nmdc:0d0a80f2dafb68f4659709dd2ebd2f28", - "file_size_bytes": 6651856 - }, - { - "name": "Gp0115668_EC TSV file", - "description": "EC TSV file for Gp0115668", - "data_object_type": "Annotation Enzyme Commission", - "url": "https://data.microbiomedata.org/data/nmdc:mga0n66h21/annotation/nmdc_mga0n66h21_ec.tsv", - "md5_checksum": "a8b689fdef54bf7235532de634cf553e", - "id": "nmdc:a8b689fdef54bf7235532de634cf553e", - "file_size_bytes": 4156019 - }, - { - "name": "Gp0115668_COG GFF file", - "description": "COG GFF file for Gp0115668", - "url": "https://data.microbiomedata.org/data/nmdc:mga0n66h21/annotation/nmdc_mga0n66h21_cog.gff", - "md5_checksum": "017daaa53039bc1135ca8f013596eb14", - "id": "nmdc:017daaa53039bc1135ca8f013596eb14", - "file_size_bytes": 33686729 - }, - { - "name": "Gp0115668_PFAM GFF file", - "description": "PFAM GFF file for Gp0115668", - "url": "https://data.microbiomedata.org/data/nmdc:mga0n66h21/annotation/nmdc_mga0n66h21_pfam.gff", - "md5_checksum": "e3eb963d76dc6bdc54756cfa80977611", - "id": "nmdc:e3eb963d76dc6bdc54756cfa80977611", - "file_size_bytes": 29534588 - }, - { - "name": "Gp0115668_TigrFam GFF file", - "description": "TigrFam GFF file for Gp0115668", - "url": "https://data.microbiomedata.org/data/nmdc:mga0n66h21/annotation/nmdc_mga0n66h21_tigrfam.gff", - "md5_checksum": "ab1d561046fbe146ac418e4ed822e861", - "id": "nmdc:ab1d561046fbe146ac418e4ed822e861", - "file_size_bytes": 2596288 - }, - { - "name": "Gp0115668_SMART GFF file", - "description": "SMART GFF file for Gp0115668", - "url": "https://data.microbiomedata.org/data/nmdc:mga0n66h21/annotation/nmdc_mga0n66h21_smart.gff", - "md5_checksum": "51054c4da9edc391b03418b5f9327815", - "id": "nmdc:51054c4da9edc391b03418b5f9327815", - "file_size_bytes": 18133874 - }, - { - "name": "Gp0115668_SuperFam GFF file", - "description": "SuperFam GFF file for Gp0115668", - "url": "https://data.microbiomedata.org/data/nmdc:mga0n66h21/annotation/nmdc_mga0n66h21_supfam.gff", - "md5_checksum": "335576d20d4f5c061a875529cbe9572c", - "id": "nmdc:335576d20d4f5c061a875529cbe9572c", - "file_size_bytes": 61337132 - }, - { - "name": "Gp0115668_Cath FunFam GFF file", - "description": "Cath FunFam GFF file for Gp0115668", - "url": "https://data.microbiomedata.org/data/nmdc:mga0n66h21/annotation/nmdc_mga0n66h21_cath_funfam.gff", - "md5_checksum": "6c5387ac5acb8b340a2c2a9e17e62bae", - "id": "nmdc:6c5387ac5acb8b340a2c2a9e17e62bae", - "file_size_bytes": 52005922 - }, - { - "name": "Gp0115668_KO_EC GFF file", - "description": "KO_EC GFF file for Gp0115668", - "url": "https://data.microbiomedata.org/data/nmdc:mga0n66h21/annotation/nmdc_mga0n66h21_ko_ec.gff", - "md5_checksum": "eea36326caba5baa0536ac2f5e36d497", - "id": "nmdc:eea36326caba5baa0536ac2f5e36d497", - "file_size_bytes": 21150415 - }, - { - "name": "gold:Gp0452679_metabat2 bin checkm quality assessment result", - "description": "metabat2 bin checkm quality assessment result for gold:Gp0452679", - "data_object_type": "CheckM Statistics", - "url": "https://data.microbiomedata.org/data/nmdc:mga0fsjy84/MAGs/nmdc_mga0fsjy84_checkm_qa.out", - "md5_checksum": "d41d8cd98f00b204e9800998ecf8427e", - "id": "nmdc:d41d8cd98f00b204e9800998ecf8427e", - "file_size_bytes": 0 - }, - { - "name": "Gp0115668_tooShort (< 3kb) filtered contigs fasta file by metaBat2", - "description": "tooShort (< 3kb) filtered contigs fasta file by metaBat2 for Gp0115668", - "url": "https://data.microbiomedata.org/data/nmdc:mga0n66h21/MAGs/nmdc_mga0n66h21_bins.tooShort.fa", - "md5_checksum": "8c05fc754583d51714bc1aa81396e59d", - "id": "nmdc:8c05fc754583d51714bc1aa81396e59d", - "file_size_bytes": 136315210 - }, - { - "name": "Gp0115668_unbinned fasta file from metabat2", - "description": "unbinned fasta file from metabat2 for Gp0115668", - "url": "https://data.microbiomedata.org/data/nmdc:mga0n66h21/MAGs/nmdc_mga0n66h21_bins.unbinned.fa", - "md5_checksum": "9ef1be5df79aee7c64f2addc4bda6afa", - "id": "nmdc:9ef1be5df79aee7c64f2addc4bda6afa", - "file_size_bytes": 39131745 - }, - { - "name": "Gp0115668_metabat2 bin checkm quality assessment result", - "description": "metabat2 bin checkm quality assessment result for Gp0115668", - "data_object_type": "CheckM Statistics", - "url": "https://data.microbiomedata.org/data/nmdc:mga0n66h21/MAGs/nmdc_mga0n66h21_checkm_qa.out", - "md5_checksum": "60db1474ee6a099c10e4fdc728420cf8", - "id": "nmdc:60db1474ee6a099c10e4fdc728420cf8", - "file_size_bytes": 1176 - }, - { - "name": "Gp0115668_high-quality and medium-quality bins", - "description": "high-quality and medium-quality bins for Gp0115668", - "data_object_type": "Metagenome Bins", - "url": "https://data.microbiomedata.org/data/nmdc:mga0n66h21/MAGs/nmdc_mga0n66h21_hqmq_bin.zip", - "md5_checksum": "5a36d8ba758ee510ab2be3e01fda3e0f", - "id": "nmdc:5a36d8ba758ee510ab2be3e01fda3e0f", - "file_size_bytes": 182 - }, - { - "name": "Gp0115668_metabat2 bins", - "description": "metabat2 bins for Gp0115668", - "url": "https://data.microbiomedata.org/data/nmdc:mga0n66h21/MAGs/nmdc_mga0n66h21_metabat_bin.zip", - "md5_checksum": "3f4c7c98bb94687eb96382799c8626fe", - "id": "nmdc:3f4c7c98bb94687eb96382799c8626fe", - "file_size_bytes": 2145953 - }, - { - "_id": { - "$oid": "649b003c1ae706d7b5b14d77" - }, - "description": "Assembled contigs fasta for gold:Gp0115668", - "url": "https://data.microbiomedata.org/data/1781_86100/assembly/assembly_contigs.fna", - "file_size_bytes": 180872288, - "type": "nmdc:DataObject", - "id": "nmdc:0ce94528dc5ad4d5b62293d4d95c1e9e", - "name": "assembly_contigs.fna", - "data_object_type": "Assembly Contigs" - }, - { - "_id": { - "$oid": "649b003c1ae706d7b5b14d79" - }, - "description": "Metagenome Contig Coverage Stats for gold:Gp0115668", - "url": "https://data.microbiomedata.org/data/1781_86100/assembly/mapping_stats.txt", - "file_size_bytes": 24065993, - "type": "nmdc:DataObject", - "id": "nmdc:3d9a9633303a795133a0afbbe7541354", - "name": "mapping_stats.txt", - "data_object_type": "Assembly Coverage Stats" - }, - { - "_id": { - "$oid": "649b003c1ae706d7b5b14d7b" - }, - "description": "Metagenome Alignment BAM file for gold:Gp0115668", - "url": "https://data.microbiomedata.org/data/1781_86100/assembly/pairedMapped_sorted.bam", - "file_size_bytes": 2912328623, - "type": "nmdc:DataObject", - "id": "nmdc:0b3a3146c8e3d01fe0cbda4de3a58ff1", - "name": "pairedMapped_sorted.bam", - "data_object_type": "Assembly Coverage BAM" - }, - { - "_id": { - "$oid": "649b003c1ae706d7b5b14d7d" - }, - "description": "Assembled scaffold fasta for gold:Gp0115668", - "url": "https://data.microbiomedata.org/data/1781_86100/assembly/assembly_scaffolds.fna", - "file_size_bytes": 179900502, - "type": "nmdc:DataObject", - "id": "nmdc:aaab457bbc67e3a755340b9c94d15286", - "name": "assembly_scaffolds.fna", - "data_object_type": "Assembly Scaffolds" - }, - { - "_id": { - "$oid": "649b003c1ae706d7b5b14d82" - }, - "description": "Assembled AGP file for gold:Gp0115668", - "url": "https://data.microbiomedata.org/data/1781_86100/assembly/assembly.agp", - "file_size_bytes": 20866681, - "type": "nmdc:DataObject", - "id": "nmdc:327d188b5936d3c95c61bc9f2131da76", - "name": "assembly.agp", - "data_object_type": "Assembly AGP" - }, - { - "_id": { - "$oid": "649b003d1ae706d7b5b159a0" - }, - "id": "nmdc:b582f88ff691a520217093bc43cf2cbf", - "name": "1781_86100.krona.html", - "description": "Gold:Gp0115668 KRONA plot HTML file", - "url": "https://data.microbiomedata.org/1781_86100/ReadbasedAnalysis/centrifuge/1781_86100.krona.html", - "file_size_bytes": 3385, - "data_object_type": "Centrifuge Krona Plot", - "type": "nmdc:DataObject" - }, - { - "_id": { - "$oid": "649b003d1ae706d7b5b159ab" - }, - "id": "nmdc:34e913d729110bd83d9e44e130550f83", - "name": "1781_86100.json", - "description": "Gold:Gp0115668 ReadbasedAnalysis result JSON file", - "url": "https://data.microbiomedata.org/1781_86100/ReadbasedAnalysis/1781_86100.json", - "file_size_bytes": 3385, - "type": "nmdc:DataObject" - }, - { - "_id": { - "$oid": "649b003f1ae706d7b5b16272" - }, - "id": "nmdc:328968741e1f9405e81f711e4e419c60", - "name": "bins.unbinned.fa", - "description": "unbinned fasta file from metabat2 for gold:Gp0115668", - "file_size_bytes": 39575271, - "url": "https://data.microbiomedata.org/data/1781_86100/img_MAGs/bins.unbinned.fa", - "type": "nmdc:DataObject" - }, - { - "_id": { - "$oid": "649b003f1ae706d7b5b16273" - }, - "id": "nmdc:af5ce540e803059bb726b9d73a794dc2", - "name": "bins.tooShort.fa", - "description": "tooShort (< 3kb) filtered contigs fasta file by metaBat2 for gold:Gp0115668", - "file_size_bytes": 132519280, - "url": "https://data.microbiomedata.org/data/1781_86100/img_MAGs/bins.tooShort.fa", - "type": "nmdc:DataObject" - }, - { - "_id": { - "$oid": "649b003f1ae706d7b5b16275" - }, - "id": "nmdc:f61532f16df6a916b9ecc308a8d555a2", - "name": "gold:Gp0115668.bins.5.fa", - "description": "metabat2 binned contig file for gold:Gp0115668", - "file_size_bytes": 957388, - "url": "https://data.microbiomedata.org/data/1781_86100/img_MAGs/metabat-bins/bins.5.fa", - "type": "nmdc:DataObject", - "data_object_type": "Metagenome Bins" - }, - { - "_id": { - "$oid": "649b003f1ae706d7b5b16276" - }, - "id": "nmdc:40b27b33a28b3b16da74479fb8516aff", - "name": "checkm_qa.out", - "description": "metabat2 bin checkm quality assessment result for gold:Gp0115668", - "file_size_bytes": 1404, - "url": "https://data.microbiomedata.org/data/1781_86100/img_MAGs/checkm_qa.out", - "type": "nmdc:DataObject", - "data_object_type": "CheckM Statistics" - }, - { - "_id": { - "$oid": "649b003f1ae706d7b5b16277" - }, - "id": "nmdc:e0d56b325b27af3c1fff5d603e5c5db1", - "name": "gold:Gp0115668.bins.4.fa", - "description": "metabat2 binned contig file for gold:Gp0115668", - "file_size_bytes": 246415, - "url": "https://data.microbiomedata.org/data/1781_86100/img_MAGs/metabat-bins/bins.4.fa", - "type": "nmdc:DataObject", - "data_object_type": "Metagenome Bins" - }, - { - "_id": { - "$oid": "649b003f1ae706d7b5b16279" - }, - "id": "nmdc:5dc85b63c568dfee4fabacf43b5ec75c", - "name": "gold:Gp0115668.bins.2.fa", - "description": "metabat2 binned contig file for gold:Gp0115668", - "file_size_bytes": 1076024, - "url": "https://data.microbiomedata.org/data/1781_86100/img_MAGs/metabat-bins/bins.2.fa", - "type": "nmdc:DataObject", - "data_object_type": "Metagenome Bins" - }, - { - "_id": { - "$oid": "649b003f1ae706d7b5b1627a" - }, - "id": "nmdc:39ac1fcf35046edc399b1b64faa56ca0", - "name": "gold:Gp0115668.bins.3.fa", - "description": "metabat2 binned contig file for gold:Gp0115668", - "file_size_bytes": 1385677, - "url": "https://data.microbiomedata.org/data/1781_86100/img_MAGs/metabat-bins/bins.3.fa", - "type": "nmdc:DataObject", - "data_object_type": "Metagenome Bins" - }, - { - "_id": { - "$oid": "649b003f1ae706d7b5b16280" - }, - "id": "nmdc:81a69601c9bf2a04762f30b38fd796ea", - "name": "gold:Gp0115668.bins.1.fa", - "description": "metabat2 binned contig file for gold:Gp0115668", - "file_size_bytes": 2654069, - "url": "https://data.microbiomedata.org/data/1781_86100/img_MAGs/metabat-bins/bins.1.fa", - "type": "nmdc:DataObject", - "data_object_type": "Metagenome Bins" - }, - { - "_id": { - "$oid": "649b00401ae706d7b5b16d8e" - }, - "description": "Protein FAA for gold:Gp0115668", - "url": "https://data.microbiomedata.org/1781_86100/img_annotation/Ga0482259_proteins.faa", - "md5_checksum": "f97c44951275f8b68fa94ded40fda756", - "file_size_bytes": 3385, - "id": "nmdc:f97c44951275f8b68fa94ded40fda756", - "name": "gold:Gp0115668_Protein FAA", - "data_object_type": "Annotation Amino Acid FASTA", - "type": "nmdc:DataObject" - }, - { - "_id": { - "$oid": "649b00401ae706d7b5b16d8f" - }, - "description": "Structural annotation GFF file for gold:Gp0115668", - "url": "https://data.microbiomedata.org/1781_86100/img_annotation/Ga0482259_structural_annotation.gff", - "md5_checksum": "b4764f173896dcb134d7c94c1ee13ca3", - "file_size_bytes": 3385, - "id": "nmdc:b4764f173896dcb134d7c94c1ee13ca3", - "name": "gold:Gp0115668_Structural annotation GFF file", - "data_object_type": "Structural Annotation GFF", - "type": "nmdc:DataObject" - }, - { - "_id": { - "$oid": "649b00401ae706d7b5b16d90" - }, - "description": "Functional annotation GFF file for gold:Gp0115668", - "url": "https://data.microbiomedata.org/1781_86100/img_annotation/Ga0482259_functional_annotation.gff", - "md5_checksum": "c57d28f7dd791aab5c4caee00b247ef9", - "file_size_bytes": 3385, - "id": "nmdc:c57d28f7dd791aab5c4caee00b247ef9", - "name": "gold:Gp0115668_Functional annotation GFF file", - "data_object_type": "Functional Annotation GFF", - "type": "nmdc:DataObject" - }, - { - "_id": { - "$oid": "649b00401ae706d7b5b16da6" - }, - "description": "KO TSV File for gold:Gp0115668", - "url": "https://data.microbiomedata.org/1781_86100/img_annotation/Ga0482259_ko.tsv", - "md5_checksum": "dbd78725415f5f8e80f590c3588a1c60", - "file_size_bytes": 3385, - "id": "nmdc:dbd78725415f5f8e80f590c3588a1c60", - "name": "gold:Gp0115668_KO TSV File", - "data_object_type": "Annotation KEGG Orthology", - "type": "nmdc:DataObject" - }, - { - "_id": { - "$oid": "649b00401ae706d7b5b16daa" - }, - "description": "EC TSV File for gold:Gp0115668", - "url": "https://data.microbiomedata.org/1781_86100/img_annotation/Ga0482259_ec.tsv", - "md5_checksum": "bcbae14f9733da2b512b5f5b6c8fcb98", - "file_size_bytes": 3385, - "id": "nmdc:bcbae14f9733da2b512b5f5b6c8fcb98", - "name": "gold:Gp0115668_EC TSV File", - "data_object_type": "Annotation Enzyme Commission", - "type": "nmdc:DataObject" - } - ], - "dissolving_activity_set": [], - "functional_annotation_set": [], - "genome_feature_set": [], - "mags_activity_set": [ - { - "_id": { - "$oid": "649b0052ec087f6bbab34736" - }, - "has_input": [ - "nmdc:b2b862aede4f333acec79aac3afc7254", - "nmdc:f7a4bb0be4599b544360617190b45681", - "nmdc:7a861805138d425525f298c1790b58ed" - ], - "too_short_contig_num": 297764, - "part_of": [ - "nmdc:mga0n66h21" - ], - "binned_contig_num": 1669, - "git_url": "https://github.com/microbiomedata/metaG/releases/tag/0.1", - "has_output": [ - "nmdc:d41d8cd98f00b204e9800998ecf8427e", - "nmdc:8c05fc754583d51714bc1aa81396e59d", - "nmdc:9ef1be5df79aee7c64f2addc4bda6afa", - "nmdc:60db1474ee6a099c10e4fdc728420cf8", - "nmdc:5a36d8ba758ee510ab2be3e01fda3e0f", - "nmdc:3f4c7c98bb94687eb96382799c8626fe" - ], - "was_informed_by": "gold:Gp0115668", - "input_contig_num": 323261, - "id": "nmdc:0aec70779dcdcc1b577d7e372c0eca87", - "execution_resource": "NERSC-Cori", - "name": "MAGs Analysis Activity for nmdc:mga0n66h21", - "mags_list": [ - { - "number_of_contig": 1013, - "completeness": 12.29, - "bin_name": "bins.1", - "gene_count": 4188, - "bin_quality": "LQ", - "gtdbtk_species": "", - "gtdbtk_order": "", - "num_16s": 0, - "gtdbtk_family": "", - "gtdbtk_domain": "", - "contamination": 2.32, - "gtdbtk_class": "", - "gtdbtk_phylum": "", - "num_5s": 0, - "num_23s": 0, - "gtdbtk_genus": "", - "num_t_rna": 20 - }, - { - "number_of_contig": 599, - "completeness": 58.72, - "bin_name": "bins.2", - "gene_count": 2940, - "bin_quality": "LQ", - "gtdbtk_species": "", - "gtdbtk_order": "", - "num_16s": 1, - "gtdbtk_family": "", - "gtdbtk_domain": "", - "contamination": 12.95, - "gtdbtk_class": "", - "gtdbtk_phylum": "", - "num_5s": 0, - "num_23s": 1, - "gtdbtk_genus": "", - "num_t_rna": 25 - }, - { - "number_of_contig": 57, - "completeness": 4.0, - "bin_name": "bins.3", - "gene_count": 258, - "bin_quality": "LQ", - "gtdbtk_species": "", - "gtdbtk_order": "", - "num_16s": 0, - "gtdbtk_family": "", - "gtdbtk_domain": "", - "contamination": 0.03, - "gtdbtk_class": "", - "gtdbtk_phylum": "", - "num_5s": 0, - "num_23s": 0, - "gtdbtk_genus": "", - "num_t_rna": 1 - } - ], - "unbinned_contig_num": 23828, - "started_at_time": "2021-10-11T02:28:43Z", - "type": "nmdc:MAGsAnalysisActivity", - "ended_at_time": "2021-10-11T05:19:17+00:00" - } - ], - "material_sample_set": [], - "material_sampling_activity_set": [], - "metabolomics_analysis_activity_set": [], - "metagenome_annotation_activity_set": [ - { - "_id": { - "$oid": "649b005bbf2caae0415ef9d8" - }, - "has_input": [ - "nmdc:b2b862aede4f333acec79aac3afc7254" - ], - "part_of": [ - "nmdc:mga0n66h21" - ], - "git_url": "https://github.com/microbiomedata/metaG/releases/tag/0.1", - "has_output": [ - "nmdc:5cb6273cd171d1ae5a8d77c8f131517f", - "nmdc:d49149a48134c1091c001448cc91f8e2", - "nmdc:7a861805138d425525f298c1790b58ed", - "nmdc:0d0a80f2dafb68f4659709dd2ebd2f28", - "nmdc:a8b689fdef54bf7235532de634cf553e", - "nmdc:017daaa53039bc1135ca8f013596eb14", - "nmdc:e3eb963d76dc6bdc54756cfa80977611", - "nmdc:ab1d561046fbe146ac418e4ed822e861", - "nmdc:51054c4da9edc391b03418b5f9327815", - "nmdc:335576d20d4f5c061a875529cbe9572c", - "nmdc:6c5387ac5acb8b340a2c2a9e17e62bae", - "nmdc:eea36326caba5baa0536ac2f5e36d497" - ], - "was_informed_by": "gold:Gp0115668", - "id": "nmdc:0aec70779dcdcc1b577d7e372c0eca87", - "execution_resource": "NERSC-Cori", - "name": "Annotation Activity for nmdc:mga0n66h21", - "started_at_time": "2021-10-11T02:28:43Z", - "type": "nmdc:MetagenomeAnnotationActivity", - "ended_at_time": "2021-10-11T05:19:17+00:00" - } - ], - "metagenome_assembly_set": [ - { - "_id": { - "$oid": "649b005f2ca5ee4adb139fc3" - }, - "has_input": [ - "nmdc:121b1c25e803f2a010ae5a2206a8d1d2" - ], - "part_of": [ - "nmdc:mga0n66h21" - ], - "ctg_logsum": 489108, - "scaf_logsum": 491574, - "gap_pct": 0.00308, - "git_url": "https://github.com/microbiomedata/metaG/releases/tag/0.1", - "has_output": [ - "nmdc:b2b862aede4f333acec79aac3afc7254", - "nmdc:15d4494dad1e12523aa9afb56b1e7cdb", - "nmdc:6ccb798d615b67dfb9c64ff32d6586c4", - "nmdc:da27801a4e0ab450485f5a3aeb75a7d6", - "nmdc:f7a4bb0be4599b544360617190b45681" - ], - "asm_score": 4.087, - "was_informed_by": "gold:Gp0115668", - "ctg_powsum": 53542, - "scaf_max": 53286, - "id": "nmdc:0aec70779dcdcc1b577d7e372c0eca87", - "scaf_powsum": 53839, - "execution_resource": "NERSC-Cori", - "contigs": 323269, - "name": "Assembly Activity for nmdc:mga0n66h21", - "ctg_max": 53286, - "gc_std": 0.10793, - "contig_bp": 169601906, - "gc_avg": 0.39548, - "started_at_time": "2021-10-11T02:28:43Z", - "scaf_bp": 169607136, - "type": "nmdc:MetagenomeAssembly", - "scaffolds": 322890, - "ended_at_time": "2021-10-11T05:19:17+00:00", - "ctg_l50": 525, - "ctg_l90": 299, - "ctg_n50": 83667, - "ctg_n90": 263711, - "scaf_l50": 526, - "scaf_l90": 299, - "scaf_n50": 83307, - "scaf_n90": 263381, - "scaf_l_gt50k": 53286, - "scaf_n_gt50k": 1, - "scaf_pct_gt50k": 0.03141731 - } - ], - "metagenome_sequencing_activity_set": [], - "metaproteomics_analysis_activity_set": [], - "metatranscriptome_activity_set": [], - "nom_analysis_activity_set": [], - "omics_processing_set": [ - { - "_id": { - "$oid": "649b009773e8249959349b35" - }, - "id": "nmdc:omprc-11-wepaa271", - "name": "Sand microcosm microbial communities from a hyporheic zone in Columbia River, Washington, USA - GW-RW T3_30-Apr-14", - "description": "Sterilized sand packs were incubated back in the ground and collected at time point T3.", - "has_input": [ - "nmdc:bsm-11-srz83p34" - ], - "has_output": [ - "jgi:55d740240d8785342fcf7e37" - ], - "part_of": [ - "nmdc:sty-11-aygzgv51" - ], - "add_date": "2015-05-28", - "mod_date": "2021-06-15", - "ncbi_project_name": "Sand microcosm microbial communities from a hyporheic zone in Columbia River, Washington, USA - GW-RW T3_30-Apr-14", - "omics_type": { - "has_raw_value": "Metagenome" - }, - "principal_investigator": { - "has_raw_value": "James Stegen" - }, - "processing_institution": "JGI", - "type": "nmdc:OmicsProcessing", - "gold_sequencing_project_identifiers": [ - "gold:Gp0115668" - ] - } - ], - "reaction_activity_set": [], - "read_qc_analysis_activity_set": [ - { - "_id": { - "$oid": "649b009d6bdd4fd20273c88c" - }, - "has_input": [ - "nmdc:0967bbbe5ee2737f66bc6ee7bf366bbb" - ], - "part_of": [ - "nmdc:mga0n66h21" - ], - "git_url": "https://github.com/microbiomedata/metaG/releases/tag/0.1", - "has_output": [ - "nmdc:121b1c25e803f2a010ae5a2206a8d1d2", - "nmdc:63fb5949ebafd1846ba60f2ce033191c" - ], - "was_informed_by": "gold:Gp0115668", - "input_read_count": 35064492, - "output_read_bases": 5069132469, - "id": "nmdc:0aec70779dcdcc1b577d7e372c0eca87", - "execution_resource": "NERSC-Cori", - "input_read_bases": 5294738292, - "name": "Read QC Activity for nmdc:mga0n66h21", - "output_read_count": 33873238, - "started_at_time": "2021-10-11T02:28:43Z", - "type": "nmdc:ReadQCAnalysisActivity", - "ended_at_time": "2021-10-11T05:19:17+00:00" - } - ], - "read_based_taxonomy_analysis_activity_set": [ - { - "_id": { - "$oid": "649b009bff710ae353f8cf53" - }, - "has_input": [ - "nmdc:121b1c25e803f2a010ae5a2206a8d1d2" - ], - "git_url": "https://github.com/microbiomedata/metaG/releases/tag/0.1", - "has_output": [ - "nmdc:8bdf8bbee24242aaaee763c1d851c05e", - "nmdc:2529ede10eb159148711d016ec022af3", - "nmdc:a0631ed87dc2e7c69355ef575dbe4e60", - "nmdc:93d26b69073bd4d6283aee3c7e5997d4", - "nmdc:d7a49bf0d9797a2b603643a2de896b5c", - "nmdc:890f9f52d828e1ea8277b52566763069", - "nmdc:371b7fabbcbc2d22c3ca84b422a88863", - "nmdc:8677985c5e8ad92dd6d051f85950a636", - "nmdc:9b2f355a4c2ff3651a3d1179212e2914" - ], - "was_informed_by": "gold:Gp0115668", - "id": "nmdc:0aec70779dcdcc1b577d7e372c0eca87", - "execution_resource": "NERSC-Cori", - "name": "ReadBased Analysis Activity for nmdc:mga0n66h21", - "started_at_time": "2021-10-11T02:28:43Z", - "type": "nmdc:ReadbasedAnalysis", - "ended_at_time": "2021-10-11T05:19:17+00:00" - } - ], - "study_set": [], - "field_research_site_set": [], - "collecting_biosamples_from_site_set": [], - "date_created": null, - "etl_software_version": null, - "pooling_set": [] - }, - { - "functional_annotation_agg": [], - "library_preparation_set": [], - "processed_sample_set": [], - "extraction_set": [], - "activity_set": [], - "biosample_set": [], - "data_object_set": [ - { - "description": "Raw sequencer read data", - "file_size_bytes": 6863035214, - "type": "nmdc:DataObject", - "id": "jgi:55a9cb010d87852b21508920", - "name": "9289.1.128215.GGACTCC-AGAGTAG.fastq.gz" - }, - { - "name": "Gp0115679_Filtered Reads", - "description": "Filtered Reads for Gp0115679", - "data_object_type": "Filtered Sequencing Reads", - "url": "https://data.microbiomedata.org/data/nmdc:mga0gg1q48/qa/nmdc_mga0gg1q48_filtered.fastq.gz", - "md5_checksum": "7e294ff66cb7ddf84edf9c8bed576bcd", - "id": "nmdc:7e294ff66cb7ddf84edf9c8bed576bcd", - "file_size_bytes": 5673282665 - }, - { - "name": "Gp0115679_Filtered Stats", - "description": "Filtered Stats for Gp0115679", - "data_object_type": "QC Statistics", - "url": "https://data.microbiomedata.org/data/nmdc:mga0gg1q48/qa/nmdc_mga0gg1q48_filterStats.txt", - "md5_checksum": "08e2a96f7aaaff5ff6f747cfe6f49e49", - "id": "nmdc:08e2a96f7aaaff5ff6f747cfe6f49e49", - "file_size_bytes": 276 - }, - { - "name": "Gp0115679_Gottcha2 TSV report", - "description": "Gottcha2 TSV report for Gp0115679", - "url": "https://data.microbiomedata.org/data/nmdc:mga0gg1q48/ReadbasedAnalysis/nmdc_mga0gg1q48_gottcha2_report.tsv", - "md5_checksum": "e20f8c00473472fa073adde871860801", - "id": "nmdc:e20f8c00473472fa073adde871860801", - "file_size_bytes": 18551 - }, - { - "name": "Gp0115679_Gottcha2 full TSV report", - "description": "Gottcha2 full TSV report for Gp0115679", - "url": "https://data.microbiomedata.org/data/nmdc:mga0gg1q48/ReadbasedAnalysis/nmdc_mga0gg1q48_gottcha2_report_full.tsv", - "md5_checksum": "52f8c91d04e8d179af98e7fac35a8ff1", - "id": "nmdc:52f8c91d04e8d179af98e7fac35a8ff1", - "file_size_bytes": 1200541 - }, - { - "name": "Gp0115679_Gottcha2 Krona HTML report", - "description": "Gottcha2 Krona HTML report for Gp0115679", - "data_object_type": "GOTTCHA2 Krona Plot", - "url": "https://data.microbiomedata.org/data/nmdc:mga0gg1q48/ReadbasedAnalysis/nmdc_mga0gg1q48_gottcha2_krona.html", - "md5_checksum": "f721d9dd168b0dea080b191a4396167e", - "id": "nmdc:f721d9dd168b0dea080b191a4396167e", - "file_size_bytes": 278990 - }, - { - "name": "Gp0115679_Centrifuge classification TSV report", - "description": "Centrifuge classification TSV report for Gp0115679", - "data_object_type": "Centrifuge Taxonomic Classification", - "url": "https://data.microbiomedata.org/data/nmdc:mga0gg1q48/ReadbasedAnalysis/nmdc_mga0gg1q48_centrifuge_classification.tsv", - "md5_checksum": "ab77e396ec643b58b54da92848b88a96", - "id": "nmdc:ab77e396ec643b58b54da92848b88a96", - "file_size_bytes": 4742886512 - }, - { - "name": "Gp0115679_Centrifuge TSV report", - "description": "Centrifuge TSV report for Gp0115679", - "data_object_type": "Centrifuge Classification Report", - "url": "https://data.microbiomedata.org/data/nmdc:mga0gg1q48/ReadbasedAnalysis/nmdc_mga0gg1q48_centrifuge_report.tsv", - "md5_checksum": "f2514844e47a9e3d268671f80f152bc1", - "id": "nmdc:f2514844e47a9e3d268671f80f152bc1", - "file_size_bytes": 266907 - }, - { - "name": "Gp0115679_Centrifuge Krona HTML report", - "description": "Centrifuge Krona HTML report for Gp0115679", - "data_object_type": "Centrifuge Krona Plot", - "url": "https://data.microbiomedata.org/data/nmdc:mga0gg1q48/ReadbasedAnalysis/nmdc_mga0gg1q48_centrifuge_krona.html", - "md5_checksum": "a3e49f39f33c54bc8d9430a947cd4b16", - "id": "nmdc:a3e49f39f33c54bc8d9430a947cd4b16", - "file_size_bytes": 2359747 - }, - { - "name": "Gp0115679_Kraken classification TSV report", - "description": "Kraken classification TSV report for Gp0115679", - "data_object_type": "Kraken2 Taxonomic Classification", - "url": "https://data.microbiomedata.org/data/nmdc:mga0gg1q48/ReadbasedAnalysis/nmdc_mga0gg1q48_kraken2_classification.tsv", - "md5_checksum": "17bc87145b0dcabbb8e3de0f393f4d4d", - "id": "nmdc:17bc87145b0dcabbb8e3de0f393f4d4d", - "file_size_bytes": 3859620862 - }, - { - "name": "Gp0115679_Kraken2 TSV report", - "description": "Kraken2 TSV report for Gp0115679", - "data_object_type": "Kraken2 Classification Report", - "url": "https://data.microbiomedata.org/data/nmdc:mga0gg1q48/ReadbasedAnalysis/nmdc_mga0gg1q48_kraken2_report.tsv", - "md5_checksum": "aecb320fdfe4c4da35c0206dd34e0f40", - "id": "nmdc:aecb320fdfe4c4da35c0206dd34e0f40", - "file_size_bytes": 729541 - }, - { - "name": "Gp0115679_Kraken2 Krona HTML report", - "description": "Kraken2 Krona HTML report for Gp0115679", - "data_object_type": "Kraken2 Krona Plot", - "url": "https://data.microbiomedata.org/data/nmdc:mga0gg1q48/ReadbasedAnalysis/nmdc_mga0gg1q48_kraken2_krona.html", - "md5_checksum": "77860ee043ae9738e7702a3f665b15fa", - "id": "nmdc:77860ee043ae9738e7702a3f665b15fa", - "file_size_bytes": 4358324 - }, - { - "name": "Gp0115679_Assembled contigs fasta", - "description": "Assembled contigs fasta for Gp0115679", - "data_object_type": "Assembly Contigs", - "url": "https://data.microbiomedata.org/data/nmdc:mga0gg1q48/assembly/nmdc_mga0gg1q48_contigs.fna", - "md5_checksum": "e4314c3743795e0be8beda8b7f806557", - "id": "nmdc:e4314c3743795e0be8beda8b7f806557", - "file_size_bytes": 275030840 - }, - { - "name": "Gp0115679_Assembled scaffold fasta", - "description": "Assembled scaffold fasta for Gp0115679", - "data_object_type": "Assembly Scaffolds", - "url": "https://data.microbiomedata.org/data/nmdc:mga0gg1q48/assembly/nmdc_mga0gg1q48_scaffolds.fna", - "md5_checksum": "2a288a5827b66c88f8abf202bbe37aab", - "id": "nmdc:2a288a5827b66c88f8abf202bbe37aab", - "file_size_bytes": 273327529 - }, - { - "name": "Gp0115679_Metagenome Contig Coverage Stats", - "description": "Metagenome Contig Coverage Stats for Gp0115679", - "url": "https://data.microbiomedata.org/data/nmdc:mga0gg1q48/assembly/nmdc_mga0gg1q48_covstats.txt", - "md5_checksum": "a51c7b3a70601a885594936fd6c753bc", - "id": "nmdc:a51c7b3a70601a885594936fd6c753bc", - "file_size_bytes": 42368790 - }, - { - "name": "Gp0115679_Assembled AGP file", - "description": "Assembled AGP file for Gp0115679", - "data_object_type": "Assembly AGP", - "url": "https://data.microbiomedata.org/data/nmdc:mga0gg1q48/assembly/nmdc_mga0gg1q48_assembly.agp", - "md5_checksum": "8851d6fed8e5bbee88aeb7af77bbcfe3", - "id": "nmdc:8851d6fed8e5bbee88aeb7af77bbcfe3", - "file_size_bytes": 40232148 - }, - { - "name": "Gp0115679_Metagenome Alignment BAM file", - "description": "Metagenome Alignment BAM file for Gp0115679", - "data_object_type": "Assembly Coverage BAM", - "url": "https://data.microbiomedata.org/data/nmdc:mga0gg1q48/assembly/nmdc_mga0gg1q48_pairedMapped_sorted.bam", - "md5_checksum": "002ed5f389b8a13735d27a8741290f6b", - "id": "nmdc:002ed5f389b8a13735d27a8741290f6b", - "file_size_bytes": 6236105158 - }, - { - "name": "Gp0115679_Protein FAA", - "description": "Protein FAA for Gp0115679", - "data_object_type": "Annotation Amino Acid FASTA", - "url": "https://data.microbiomedata.org/data/nmdc:mga0gg1q48/annotation/nmdc_mga0gg1q48_proteins.faa", - "md5_checksum": "ac3faa8ad0e8e7827fcf6b882ec90706", - "id": "nmdc:ac3faa8ad0e8e7827fcf6b882ec90706", - "file_size_bytes": 151048115 - }, - { - "name": "Gp0115679_Structural annotation GFF file", - "description": "Structural annotation GFF file for Gp0115679", - "data_object_type": "Structural Annotation GFF", - "url": "https://data.microbiomedata.org/data/nmdc:mga0gg1q48/annotation/nmdc_mga0gg1q48_structural_annotation.gff", - "md5_checksum": "e3712dbbf0d0bfa14b9b340e73ebf4d0", - "id": "nmdc:e3712dbbf0d0bfa14b9b340e73ebf4d0", - "file_size_bytes": 2549 - }, - { - "name": "Gp0115679_Functional annotation GFF file", - "description": "Functional annotation GFF file for Gp0115679", - "data_object_type": "Functional Annotation GFF", - "url": "https://data.microbiomedata.org/data/nmdc:mga0gg1q48/annotation/nmdc_mga0gg1q48_functional_annotation.gff", - "md5_checksum": "8aed63ca1302c874040e74aceb54ff05", - "id": "nmdc:8aed63ca1302c874040e74aceb54ff05", - "file_size_bytes": 166415068 - }, - { - "name": "Gp0115679_KO TSV file", - "description": "KO TSV file for Gp0115679", - "data_object_type": "Annotation KEGG Orthology", - "url": "https://data.microbiomedata.org/data/nmdc:mga0gg1q48/annotation/nmdc_mga0gg1q48_ko.tsv", - "md5_checksum": "6361a06de62d93909abfb565a47fd5f0", - "id": "nmdc:6361a06de62d93909abfb565a47fd5f0", - "file_size_bytes": 18038415 - }, - { - "name": "Gp0115679_EC TSV file", - "description": "EC TSV file for Gp0115679", - "data_object_type": "Annotation Enzyme Commission", - "url": "https://data.microbiomedata.org/data/nmdc:mga0gg1q48/annotation/nmdc_mga0gg1q48_ec.tsv", - "md5_checksum": "bd9d330d1d6a925066003d653a171ca5", - "id": "nmdc:bd9d330d1d6a925066003d653a171ca5", - "file_size_bytes": 11896121 - }, - { - "name": "Gp0115679_COG GFF file", - "description": "COG GFF file for Gp0115679", - "url": "https://data.microbiomedata.org/data/nmdc:mga0gg1q48/annotation/nmdc_mga0gg1q48_cog.gff", - "md5_checksum": "c497ffc128d6738bf3868529eb7ff899", - "id": "nmdc:c497ffc128d6738bf3868529eb7ff899", - "file_size_bytes": 81943107 - }, - { - "name": "Gp0115679_PFAM GFF file", - "description": "PFAM GFF file for Gp0115679", - "url": "https://data.microbiomedata.org/data/nmdc:mga0gg1q48/annotation/nmdc_mga0gg1q48_pfam.gff", - "md5_checksum": "b67886515193abbd1eec79de067b3196", - "id": "nmdc:b67886515193abbd1eec79de067b3196", - "file_size_bytes": 65136506 - }, - { - "name": "Gp0115679_TigrFam GFF file", - "description": "TigrFam GFF file for Gp0115679", - "url": "https://data.microbiomedata.org/data/nmdc:mga0gg1q48/annotation/nmdc_mga0gg1q48_tigrfam.gff", - "md5_checksum": "05e7a016dddba90801c29de448c43c3c", - "id": "nmdc:05e7a016dddba90801c29de448c43c3c", - "file_size_bytes": 8536835 - }, - { - "name": "Gp0115679_SMART GFF file", - "description": "SMART GFF file for Gp0115679", - "url": "https://data.microbiomedata.org/data/nmdc:mga0gg1q48/annotation/nmdc_mga0gg1q48_smart.gff", - "md5_checksum": "7effd4db11316ff95f6a8303807d530f", - "id": "nmdc:7effd4db11316ff95f6a8303807d530f", - "file_size_bytes": 19907975 - }, - { - "name": "Gp0115679_SuperFam GFF file", - "description": "SuperFam GFF file for Gp0115679", - "url": "https://data.microbiomedata.org/data/nmdc:mga0gg1q48/annotation/nmdc_mga0gg1q48_supfam.gff", - "md5_checksum": "503770f008dd2cf04d73821412dcf23a", - "id": "nmdc:503770f008dd2cf04d73821412dcf23a", - "file_size_bytes": 107636995 - }, - { - "name": "Gp0115679_Cath FunFam GFF file", - "description": "Cath FunFam GFF file for Gp0115679", - "url": "https://data.microbiomedata.org/data/nmdc:mga0gg1q48/annotation/nmdc_mga0gg1q48_cath_funfam.gff", - "md5_checksum": "c33049c64af55f8ac54d52c861b0a221", - "id": "nmdc:c33049c64af55f8ac54d52c861b0a221", - "file_size_bytes": 89046662 - }, - { - "name": "Gp0115679_KO_EC GFF file", - "description": "KO_EC GFF file for Gp0115679", - "url": "https://data.microbiomedata.org/data/nmdc:mga0gg1q48/annotation/nmdc_mga0gg1q48_ko_ec.gff", - "md5_checksum": "b162efd63f79bc34de66f61348471b74", - "id": "nmdc:b162efd63f79bc34de66f61348471b74", - "file_size_bytes": 57348606 - }, - { - "name": "gold:Gp0452679_metabat2 bin checkm quality assessment result", - "description": "metabat2 bin checkm quality assessment result for gold:Gp0452679", - "data_object_type": "CheckM Statistics", - "url": "https://data.microbiomedata.org/data/nmdc:mga0fsjy84/MAGs/nmdc_mga0fsjy84_checkm_qa.out", - "md5_checksum": "d41d8cd98f00b204e9800998ecf8427e", - "id": "nmdc:d41d8cd98f00b204e9800998ecf8427e", - "file_size_bytes": 0 - }, - { - "name": "Gp0115679_tooShort (< 3kb) filtered contigs fasta file by metaBat2", - "description": "tooShort (< 3kb) filtered contigs fasta file by metaBat2 for Gp0115679", - "url": "https://data.microbiomedata.org/data/nmdc:mga0gg1q48/MAGs/nmdc_mga0gg1q48_bins.tooShort.fa", - "md5_checksum": "d830e60f4fb30ecb0610f991dcc70e47", - "id": "nmdc:d830e60f4fb30ecb0610f991dcc70e47", - "file_size_bytes": 215033122 - }, - { - "name": "Gp0115679_unbinned fasta file from metabat2", - "description": "unbinned fasta file from metabat2 for Gp0115679", - "url": "https://data.microbiomedata.org/data/nmdc:mga0gg1q48/MAGs/nmdc_mga0gg1q48_bins.unbinned.fa", - "md5_checksum": "d33af65556b85b1aaf3a5c48b6e294de", - "id": "nmdc:d33af65556b85b1aaf3a5c48b6e294de", - "file_size_bytes": 44057142 - }, - { - "name": "Gp0115679_metabat2 bin checkm quality assessment result", - "description": "metabat2 bin checkm quality assessment result for Gp0115679", - "data_object_type": "CheckM Statistics", - "url": "https://data.microbiomedata.org/data/nmdc:mga0gg1q48/MAGs/nmdc_mga0gg1q48_checkm_qa.out", - "md5_checksum": "d2d655091735e6308aafca1e1633aad9", - "id": "nmdc:d2d655091735e6308aafca1e1633aad9", - "file_size_bytes": 2394 - }, - { - "name": "Gp0115679_high-quality and medium-quality bins", - "description": "high-quality and medium-quality bins for Gp0115679", - "data_object_type": "Metagenome Bins", - "url": "https://data.microbiomedata.org/data/nmdc:mga0gg1q48/MAGs/nmdc_mga0gg1q48_hqmq_bin.zip", - "md5_checksum": "17c6259329da1bbe6da5a18274452a8d", - "id": "nmdc:17c6259329da1bbe6da5a18274452a8d", - "file_size_bytes": 3215059 - }, - { - "name": "Gp0115679_metabat2 bins", - "description": "metabat2 bins for Gp0115679", - "url": "https://data.microbiomedata.org/data/nmdc:mga0gg1q48/MAGs/nmdc_mga0gg1q48_metabat_bin.zip", - "md5_checksum": "9250ad41cb19e04a6002e62bda38bbfb", - "id": "nmdc:9250ad41cb19e04a6002e62bda38bbfb", - "file_size_bytes": 1649649 - }, - { - "_id": { - "$oid": "649b003d1ae706d7b5b14da7" - }, - "description": "Metagenome Contig Coverage Stats for gold:Gp0115679", - "url": "https://data.microbiomedata.org/data/1781_86105/assembly/mapping_stats.txt", - "file_size_bytes": 39709915, - "type": "nmdc:DataObject", - "id": "nmdc:eb7b565580c8a81f8c674ce87a7c07c3", - "name": "mapping_stats.txt", - "data_object_type": "Assembly Coverage Stats" - }, - { - "_id": { - "$oid": "649b003d1ae706d7b5b14da9" - }, - "description": "Metagenome Alignment BAM file for gold:Gp0115679", - "url": "https://data.microbiomedata.org/data/1781_86105/assembly/pairedMapped_sorted.bam", - "file_size_bytes": 6165329815, - "type": "nmdc:DataObject", - "id": "nmdc:11956fa2a6c996aedac70f779222570f", - "name": "pairedMapped_sorted.bam", - "data_object_type": "Assembly Coverage BAM" - }, - { - "_id": { - "$oid": "649b003d1ae706d7b5b14dab" - }, - "description": "Assembled scaffold fasta for gold:Gp0115679", - "url": "https://data.microbiomedata.org/data/1781_86105/assembly/assembly_scaffolds.fna", - "file_size_bytes": 270701949, - "type": "nmdc:DataObject", - "id": "nmdc:c8a6971a9982af6e8a054dee6d1cb78d", - "name": "assembly_scaffolds.fna", - "data_object_type": "Assembly Scaffolds" - }, - { - "_id": { - "$oid": "649b003d1ae706d7b5b14db3" - }, - "description": "Assembled contigs fasta for gold:Gp0115679", - "url": "https://data.microbiomedata.org/data/1781_86105/assembly/assembly_contigs.fna", - "file_size_bytes": 272371965, - "type": "nmdc:DataObject", - "id": "nmdc:4fb2f3e8ebd99cea1e797e248b2e5c1d", - "name": "assembly_contigs.fna", - "data_object_type": "Assembly Contigs" - }, - { - "_id": { - "$oid": "649b003d1ae706d7b5b14dbd" - }, - "description": "Assembled AGP file for gold:Gp0115679", - "url": "https://data.microbiomedata.org/data/1781_86105/assembly/assembly.agp", - "file_size_bytes": 34847488, - "type": "nmdc:DataObject", - "id": "nmdc:a85f992644d46cb23475ac9850f4e864", - "name": "assembly.agp", - "data_object_type": "Assembly AGP" - }, - { - "_id": { - "$oid": "649b003d1ae706d7b5b15a0e" - }, - "id": "nmdc:ed54f8af3521aae47d2757852695a188", - "name": "1781_86105.json", - "description": "Gold:Gp0115679 ReadbasedAnalysis result JSON file", - "url": "https://data.microbiomedata.org/1781_86105/ReadbasedAnalysis/1781_86105.json", - "file_size_bytes": 3385, - "type": "nmdc:DataObject" - }, - { - "_id": { - "$oid": "649b003d1ae706d7b5b15a10" - }, - "id": "nmdc:7ca0d8ecd80292bc9bea1862c755a2f8", - "name": "1781_86105.krona.html", - "description": "Gold:Gp0115679 KRONA plot HTML file", - "url": "https://data.microbiomedata.org/1781_86105/ReadbasedAnalysis/centrifuge/1781_86105.krona.html", - "file_size_bytes": 3385, - "data_object_type": "Centrifuge Krona Plot", - "type": "nmdc:DataObject" - }, - { - "_id": { - "$oid": "649b003f1ae706d7b5b162ed" - }, - "id": "nmdc:6ef4477214fc698cd494f3e516cdda23", - "name": "bins.tooShort.fa", - "description": "tooShort (< 3kb) filtered contigs fasta file by metaBat2 for gold:Gp0115679", - "file_size_bytes": 208557497, - "url": "https://data.microbiomedata.org/data/1781_86105/img_MAGs/bins.tooShort.fa", - "type": "nmdc:DataObject" - }, - { - "_id": { - "$oid": "649b003f1ae706d7b5b162ef" - }, - "id": "nmdc:f81a3c77048a277c28b110d4797f48d5", - "name": "checkm_qa.out", - "description": "metabat2 bin checkm quality assessment result for gold:Gp0115679", - "file_size_bytes": 3400, - "url": "https://data.microbiomedata.org/data/1781_86105/img_MAGs/checkm_qa.out", - "type": "nmdc:DataObject", - "data_object_type": "CheckM Statistics" - }, - { - "_id": { - "$oid": "649b003f1ae706d7b5b162f0" - }, - "id": "nmdc:e3396f34bc4bcc83c4b43bbd1f698450", - "name": "bins.unbinned.fa", - "description": "unbinned fasta file from metabat2 for gold:Gp0115679", - "file_size_bytes": 51045792, - "url": "https://data.microbiomedata.org/data/1781_86105/img_MAGs/bins.unbinned.fa", - "type": "nmdc:DataObject" - }, - { - "_id": { - "$oid": "649b003f1ae706d7b5b162f1" - }, - "id": "nmdc:84cac08cd46c06525b4001424027fd60", - "name": "gtdbtk.bac120.summary.tsv", - "description": "gtdbtk bacterial assignment result summary table for gold:Gp0115679", - "file_size_bytes": 830, - "url": "https://data.microbiomedata.org/data/1781_86105/img_MAGs/gtdbtk_output/classify/gtdbtk.bac120.summary.tsv", - "type": "nmdc:DataObject" - }, - { - "_id": { - "$oid": "649b003f1ae706d7b5b162f3" - }, - "id": "nmdc:45cce58fd37ad46815381000dd21470d", - "name": "gold:Gp0115679.bins.9.fa", - "description": "metabat2 binned contig file for gold:Gp0115679", - "file_size_bytes": 657176, - "url": "https://data.microbiomedata.org/data/1781_86105/img_MAGs/metabat-bins/bins.9.fa", - "type": "nmdc:DataObject", - "data_object_type": "Metagenome Bins" - }, - { - "_id": { - "$oid": "649b003f1ae706d7b5b162f4" - }, - "id": "nmdc:db508aa3c84853ff9e2c156d7afcbd7b", - "name": "gold:Gp0115679.bins.3.fa", - "description": "metabat2 binned contig file for gold:Gp0115679", - "file_size_bytes": 358418, - "url": "https://data.microbiomedata.org/data/1781_86105/img_MAGs/metabat-bins/bins.3.fa", - "type": "nmdc:DataObject", - "data_object_type": "Metagenome Bins" - }, - { - "_id": { - "$oid": "649b003f1ae706d7b5b162f5" - }, - "id": "nmdc:219d9761a4dbe9e7374c659a03e8ecf0", - "name": "gold:Gp0115679.bins.6.fa", - "description": "metabat2 binned contig file for gold:Gp0115679", - "file_size_bytes": 921488, - "url": "https://data.microbiomedata.org/data/1781_86105/img_MAGs/metabat-bins/bins.6.fa", - "type": "nmdc:DataObject", - "data_object_type": "Metagenome Bins" - }, - { - "_id": { - "$oid": "649b003f1ae706d7b5b162f6" - }, - "id": "nmdc:d0582754ac551686e46730419ec9d047", - "name": "gold:Gp0115679.bins.5.fa", - "description": "metabat2 binned contig file for gold:Gp0115679", - "file_size_bytes": 374792, - "url": "https://data.microbiomedata.org/data/1781_86105/img_MAGs/metabat-bins/bins.5.fa", - "type": "nmdc:DataObject", - "data_object_type": "Metagenome Bins" - }, - { - "_id": { - "$oid": "649b003f1ae706d7b5b162f7" - }, - "id": "nmdc:40152478dc669bc63bbbd4bda0d0c5df", - "name": "gold:Gp0115679.bins.11.fa", - "description": "metabat2 binned contig file for gold:Gp0115679", - "file_size_bytes": 332863, - "url": "https://data.microbiomedata.org/data/1781_86105/img_MAGs/metabat-bins/bins.11.fa", - "type": "nmdc:DataObject", - "data_object_type": "Metagenome Bins" - }, - { - "_id": { - "$oid": "649b003f1ae706d7b5b162f8" - }, - "id": "nmdc:4d9e7c6ffa3fa24c9b9fdeb0e722c57f", - "name": "gold:Gp0115679.bins.12.fa", - "description": "metabat2 binned contig file for gold:Gp0115679", - "file_size_bytes": 658843, - "url": "https://data.microbiomedata.org/data/1781_86105/img_MAGs/metabat-bins/bins.12.fa", - "type": "nmdc:DataObject", - "data_object_type": "Metagenome Bins" - }, - { - "_id": { - "$oid": "649b003f1ae706d7b5b162f9" - }, - "id": "nmdc:df7fd9d79f734d8e02589a0bae44a810", - "name": "gold:Gp0115679.bins.10.fa", - "description": "metabat2 binned contig file for gold:Gp0115679", - "file_size_bytes": 325713, - "url": "https://data.microbiomedata.org/data/1781_86105/img_MAGs/metabat-bins/bins.10.fa", - "type": "nmdc:DataObject", - "data_object_type": "Metagenome Bins" - }, - { - "_id": { - "$oid": "649b003f1ae706d7b5b162fa" - }, - "id": "nmdc:abe52bd40df93d12453b1f543a782b2a", - "name": "gold:Gp0115679.bins.4.fa", - "description": "metabat2 binned contig file for gold:Gp0115679", - "file_size_bytes": 324504, - "url": "https://data.microbiomedata.org/data/1781_86105/img_MAGs/metabat-bins/bins.4.fa", - "type": "nmdc:DataObject", - "data_object_type": "Metagenome Bins" - }, - { - "_id": { - "$oid": "649b003f1ae706d7b5b162fb" - }, - "id": "nmdc:8e65f7abf4dfe56dce5ea4d42dc83b12", - "name": "gold:Gp0115679.bins.7.fa", - "description": "metabat2 binned contig file for gold:Gp0115679", - "file_size_bytes": 430938, - "url": "https://data.microbiomedata.org/data/1781_86105/img_MAGs/metabat-bins/bins.7.fa", - "type": "nmdc:DataObject", - "data_object_type": "Metagenome Bins" - }, - { - "_id": { - "$oid": "649b003f1ae706d7b5b162fc" - }, - "id": "nmdc:aaac58966e74cbc7e8d0ba7048a8691f", - "name": "gold:Gp0115679.bins.8.fa", - "description": "metabat2 binned contig file for gold:Gp0115679", - "file_size_bytes": 378596, - "url": "https://data.microbiomedata.org/data/1781_86105/img_MAGs/metabat-bins/bins.8.fa", - "type": "nmdc:DataObject", - "data_object_type": "Metagenome Bins" - }, - { - "_id": { - "$oid": "649b003f1ae706d7b5b162fd" - }, - "id": "nmdc:d4646cc3223b1d9d13c012a6f0dd98e3", - "name": "gold:Gp0115679.bins.13.fa", - "description": "metabat2 binned contig file for gold:Gp0115679", - "file_size_bytes": 209262, - "url": "https://data.microbiomedata.org/data/1781_86105/img_MAGs/metabat-bins/bins.13.fa", - "type": "nmdc:DataObject", - "data_object_type": "Metagenome Bins" - }, - { - "_id": { - "$oid": "649b003f1ae706d7b5b162fe" - }, - "id": "nmdc:4a6a269ffe428009827fe87d5fc82555", - "name": "gold:Gp0115679.bins.2.fa", - "description": "metabat2 binned contig file for gold:Gp0115679", - "file_size_bytes": 238341, - "url": "https://data.microbiomedata.org/data/1781_86105/img_MAGs/metabat-bins/bins.2.fa", - "type": "nmdc:DataObject", - "data_object_type": "Metagenome Bins" - }, - { - "_id": { - "$oid": "649b003f1ae706d7b5b162ff" - }, - "id": "nmdc:5f136581b45cbac4ffcd9bd61c3b2c0c", - "name": "gold:Gp0115679.bins.16.fa", - "description": "metabat2 binned contig file for gold:Gp0115679", - "file_size_bytes": 645317, - "url": "https://data.microbiomedata.org/data/1781_86105/img_MAGs/metabat-bins/bins.16.fa", - "type": "nmdc:DataObject", - "data_object_type": "Metagenome Bins" - }, - { - "_id": { - "$oid": "649b003f1ae706d7b5b16300" - }, - "id": "nmdc:c42cd75d1b3e947bb6d82fcca7d4d1a2", - "name": "gold:Gp0115679.bins.1.fa", - "description": "metabat2 binned contig file for gold:Gp0115679", - "file_size_bytes": 2098951, - "url": "https://data.microbiomedata.org/data/1781_86105/img_MAGs/metabat-bins/bins.1.fa", - "type": "nmdc:DataObject", - "data_object_type": "Metagenome Bins" - }, - { - "_id": { - "$oid": "649b003f1ae706d7b5b16301" - }, - "id": "nmdc:0922541390d01fa0e51bd93665be8913", - "name": "gold:Gp0115679.bins.14.fa", - "description": "metabat2 binned contig file for gold:Gp0115679", - "file_size_bytes": 380540, - "url": "https://data.microbiomedata.org/data/1781_86105/img_MAGs/metabat-bins/bins.14.fa", - "type": "nmdc:DataObject", - "data_object_type": "Metagenome Bins" - }, - { - "_id": { - "$oid": "649b003f1ae706d7b5b16306" - }, - "id": "nmdc:c5df3f066326803dbeafb15a36aa0d93", - "name": "gold:Gp0115679.bins.15.fa", - "description": "metabat2 binned contig file for gold:Gp0115679", - "file_size_bytes": 299202, - "url": "https://data.microbiomedata.org/data/1781_86105/img_MAGs/metabat-bins/bins.15.fa", - "type": "nmdc:DataObject", - "data_object_type": "Metagenome Bins" - }, - { - "_id": { - "$oid": "649b00401ae706d7b5b16dad" - }, - "description": "Functional annotation GFF file for gold:Gp0115679", - "url": "https://data.microbiomedata.org/1781_86105/img_annotation/Ga0482248_functional_annotation.gff", - "md5_checksum": "b8c895face8e8e77bbfc7163c7eb7850", - "file_size_bytes": 3385, - "id": "nmdc:b8c895face8e8e77bbfc7163c7eb7850", - "name": "gold:Gp0115679_Functional annotation GFF file", - "data_object_type": "Functional Annotation GFF", - "type": "nmdc:DataObject" - }, - { - "_id": { - "$oid": "649b00401ae706d7b5b16dae" - }, - "description": "KO TSV File for gold:Gp0115679", - "url": "https://data.microbiomedata.org/1781_86105/img_annotation/Ga0482248_ko.tsv", - "md5_checksum": "ee75eaed19b9a259e0e70e20a53f7fba", - "file_size_bytes": 3385, - "id": "nmdc:ee75eaed19b9a259e0e70e20a53f7fba", - "name": "gold:Gp0115679_KO TSV File", - "data_object_type": "Annotation KEGG Orthology", - "type": "nmdc:DataObject" - }, - { - "_id": { - "$oid": "649b00401ae706d7b5b16daf" - }, - "description": "EC TSV File for gold:Gp0115679", - "url": "https://data.microbiomedata.org/1781_86105/img_annotation/Ga0482248_ec.tsv", - "md5_checksum": "aa5fa1b83592459bd3e742be4949d0b1", - "file_size_bytes": 3385, - "id": "nmdc:aa5fa1b83592459bd3e742be4949d0b1", - "name": "gold:Gp0115679_EC TSV File", - "data_object_type": "Annotation Enzyme Commission", - "type": "nmdc:DataObject" - }, - { - "_id": { - "$oid": "649b00401ae706d7b5b16db1" - }, - "description": "Protein FAA for gold:Gp0115679", - "url": "https://data.microbiomedata.org/1781_86105/img_annotation/Ga0482248_proteins.faa", - "md5_checksum": "21f3d777493f87403b60a4a1b3dd2f1b", - "file_size_bytes": 3385, - "id": "nmdc:21f3d777493f87403b60a4a1b3dd2f1b", - "name": "gold:Gp0115679_Protein FAA", - "data_object_type": "Annotation Amino Acid FASTA", - "type": "nmdc:DataObject" - }, - { - "_id": { - "$oid": "649b00401ae706d7b5b16db6" - }, - "description": "Structural annotation GFF file for gold:Gp0115679", - "url": "https://data.microbiomedata.org/1781_86105/img_annotation/Ga0482248_structural_annotation.gff", - "md5_checksum": "b63b42c7892b4a14e5661bca5bfa2419", - "file_size_bytes": 3385, - "id": "nmdc:b63b42c7892b4a14e5661bca5bfa2419", - "name": "gold:Gp0115679_Structural annotation GFF file", - "data_object_type": "Structural Annotation GFF", - "type": "nmdc:DataObject" - } - ], - "dissolving_activity_set": [], - "functional_annotation_set": [], - "genome_feature_set": [], - "mags_activity_set": [ - { - "_id": { - "$oid": "649b0052ec087f6bbab3472e" - }, - "has_input": [ - "nmdc:e4314c3743795e0be8beda8b7f806557", - "nmdc:002ed5f389b8a13735d27a8741290f6b", - "nmdc:8aed63ca1302c874040e74aceb54ff05" - ], - "too_short_contig_num": 504368, - "part_of": [ - "nmdc:mga0gg1q48" - ], - "binned_contig_num": 1887, - "git_url": "https://github.com/microbiomedata/metaG/releases/tag/0.1", - "has_output": [ - "nmdc:d41d8cd98f00b204e9800998ecf8427e", - "nmdc:d830e60f4fb30ecb0610f991dcc70e47", - "nmdc:d33af65556b85b1aaf3a5c48b6e294de", - "nmdc:d2d655091735e6308aafca1e1633aad9", - "nmdc:17c6259329da1bbe6da5a18274452a8d", - "nmdc:9250ad41cb19e04a6002e62bda38bbfb" - ], - "was_informed_by": "gold:Gp0115679", - "input_contig_num": 531775, - "id": "nmdc:8bc23e2bd69c8af5538db3d6192a3b5b", - "execution_resource": "NERSC-Cori", - "name": "MAGs Analysis Activity for nmdc:mga0gg1q48", - "mags_list": [ - { - "number_of_contig": 73, - "completeness": 95.65, - "bin_name": "bins.1", - "gene_count": 2974, - "bin_quality": "HQ", - "gtdbtk_species": "", - "gtdbtk_order": "UBA11222", - "num_16s": 1, - "gtdbtk_family": "UBA11222", - "gtdbtk_domain": "Bacteria", - "contamination": 0.22, - "gtdbtk_class": "Alphaproteobacteria", - "gtdbtk_phylum": "Proteobacteria", - "num_5s": 1, - "num_23s": 1, - "gtdbtk_genus": "UBA11222", - "num_t_rna": 45 - }, - { - "number_of_contig": 253, - "completeness": 39.12, - "bin_name": "bins.10", - "gene_count": 1586, - "bin_quality": "LQ", - "gtdbtk_species": "", - "gtdbtk_order": "", - "num_16s": 0, - "gtdbtk_family": "", - "gtdbtk_domain": "", - "contamination": 1.79, - "gtdbtk_class": "", - "gtdbtk_phylum": "", - "num_5s": 0, - "num_23s": 0, - "gtdbtk_genus": "", - "num_t_rna": 20 - }, - { - "number_of_contig": 135, - "completeness": 16.83, - "bin_name": "bins.2", - "gene_count": 706, - "bin_quality": "LQ", - "gtdbtk_species": "", - "gtdbtk_order": "", - "num_16s": 0, - "gtdbtk_family": "", - "gtdbtk_domain": "", - "contamination": 0.0, - "gtdbtk_class": "", - "gtdbtk_phylum": "", - "num_5s": 0, - "num_23s": 0, - "gtdbtk_genus": "", - "num_t_rna": 11 - }, - { - "number_of_contig": 144, - "completeness": 22.53, - "bin_name": "bins.3", - "gene_count": 731, - "bin_quality": "LQ", - "gtdbtk_species": "", - "gtdbtk_order": "", - "num_16s": 0, - "gtdbtk_family": "", - "gtdbtk_domain": "", - "contamination": 0.14, - "gtdbtk_class": "", - "gtdbtk_phylum": "", - "num_5s": 0, - "num_23s": 0, - "gtdbtk_genus": "", - "num_t_rna": 16 - }, - { - "number_of_contig": 273, - "completeness": 68.97, - "bin_name": "bins.4", - "gene_count": 2023, - "bin_quality": "MQ", - "gtdbtk_species": "UBA5335 sp002862435", - "gtdbtk_order": "UBA5335", - "num_16s": 0, - "gtdbtk_family": "UBA5335", - "gtdbtk_domain": "Bacteria", - "contamination": 1.72, - "gtdbtk_class": "Gammaproteobacteria", - "gtdbtk_phylum": "Proteobacteria", - "num_5s": 0, - "num_23s": 0, - "gtdbtk_genus": "UBA5335", - "num_t_rna": 33 - }, - { - "number_of_contig": 3, - "completeness": 8.33, - "bin_name": "bins.5", - "gene_count": 306, - "bin_quality": "LQ", - "gtdbtk_species": "", - "gtdbtk_order": "", - "num_16s": 0, - "gtdbtk_family": "", - "gtdbtk_domain": "", - "contamination": 16.67, - "gtdbtk_class": "", - "gtdbtk_phylum": "", - "num_5s": 0, - "num_23s": 0, - "gtdbtk_genus": "", - "num_t_rna": 15 - }, - { - "number_of_contig": 4, - "completeness": 77.01, - "bin_name": "bins.6", - "gene_count": 976, - "bin_quality": "MQ", - "gtdbtk_species": "", - "gtdbtk_order": "UBA9983_A", - "num_16s": 1, - "gtdbtk_family": "UBA2163", - "gtdbtk_domain": "Bacteria", - "contamination": 1.72, - "gtdbtk_class": "Paceibacteria", - "gtdbtk_phylum": "Patescibacteria", - "num_5s": 1, - "num_23s": 1, - "gtdbtk_genus": "1-14-0-10-47-16", - "num_t_rna": 34 - }, - { - "number_of_contig": 309, - "completeness": 74.97, - "bin_name": "bins.7", - "gene_count": 2072, - "bin_quality": "MQ", - "gtdbtk_species": "", - "gtdbtk_order": "Pseudomonadales", - "num_16s": 0, - "gtdbtk_family": "UBA3067", - "gtdbtk_domain": "Bacteria", - "contamination": 2.13, - "gtdbtk_class": "Gammaproteobacteria", - "gtdbtk_phylum": "Proteobacteria", - "num_5s": 1, - "num_23s": 1, - "gtdbtk_genus": "UBA3067", - "num_t_rna": 28 - }, - { - "number_of_contig": 182, - "completeness": 92.61, - "bin_name": "bins.8", - "gene_count": 3044, - "bin_quality": "MQ", - "gtdbtk_species": "", - "gtdbtk_order": "UBA11222", - "num_16s": 0, - "gtdbtk_family": "UBA11222", - "gtdbtk_domain": "Bacteria", - "contamination": 5.0, - "gtdbtk_class": "Alphaproteobacteria", - "gtdbtk_phylum": "Proteobacteria", - "num_5s": 1, - "num_23s": 1, - "gtdbtk_genus": "UBA11222", - "num_t_rna": 41 - }, - { - "number_of_contig": 511, - "completeness": 48.64, - "bin_name": "bins.9", - "gene_count": 2267, - "bin_quality": "LQ", - "gtdbtk_species": "", - "gtdbtk_order": "", - "num_16s": 0, - "gtdbtk_family": "", - "gtdbtk_domain": "", - "contamination": 5.03, - "gtdbtk_class": "", - "gtdbtk_phylum": "", - "num_5s": 0, - "num_23s": 0, - "gtdbtk_genus": "", - "num_t_rna": 15 - } - ], - "unbinned_contig_num": 25520, - "started_at_time": "2021-10-11T02:23:30Z", - "type": "nmdc:MAGsAnalysisActivity", - "ended_at_time": "2021-10-11T06:30:42+00:00" - } - ], - "material_sample_set": [], - "material_sampling_activity_set": [], - "metabolomics_analysis_activity_set": [], - "metagenome_annotation_activity_set": [ - { - "_id": { - "$oid": "649b005bbf2caae0415ef9c9" - }, - "has_input": [ - "nmdc:e4314c3743795e0be8beda8b7f806557" - ], - "part_of": [ - "nmdc:mga0gg1q48" - ], - "git_url": "https://github.com/microbiomedata/metaG/releases/tag/0.1", - "has_output": [ - "nmdc:ac3faa8ad0e8e7827fcf6b882ec90706", - "nmdc:e3712dbbf0d0bfa14b9b340e73ebf4d0", - "nmdc:8aed63ca1302c874040e74aceb54ff05", - "nmdc:6361a06de62d93909abfb565a47fd5f0", - "nmdc:bd9d330d1d6a925066003d653a171ca5", - "nmdc:c497ffc128d6738bf3868529eb7ff899", - "nmdc:b67886515193abbd1eec79de067b3196", - "nmdc:05e7a016dddba90801c29de448c43c3c", - "nmdc:7effd4db11316ff95f6a8303807d530f", - "nmdc:503770f008dd2cf04d73821412dcf23a", - "nmdc:c33049c64af55f8ac54d52c861b0a221", - "nmdc:b162efd63f79bc34de66f61348471b74" - ], - "was_informed_by": "gold:Gp0115679", - "id": "nmdc:8bc23e2bd69c8af5538db3d6192a3b5b", - "execution_resource": "NERSC-Cori", - "name": "Annotation Activity for nmdc:mga0gg1q48", - "started_at_time": "2021-10-11T02:23:30Z", - "type": "nmdc:MetagenomeAnnotationActivity", - "ended_at_time": "2021-10-11T06:30:42+00:00" - } - ], - "metagenome_assembly_set": [ - { - "_id": { - "$oid": "649b005f2ca5ee4adb139fbd" - }, - "has_input": [ - "nmdc:7e294ff66cb7ddf84edf9c8bed576bcd" - ], - "part_of": [ - "nmdc:mga0gg1q48" - ], - "ctg_logsum": 682158, - "scaf_logsum": 725191, - "gap_pct": 0.02692, - "git_url": "https://github.com/microbiomedata/metaG/releases/tag/0.1", - "has_output": [ - "nmdc:e4314c3743795e0be8beda8b7f806557", - "nmdc:2a288a5827b66c88f8abf202bbe37aab", - "nmdc:a51c7b3a70601a885594936fd6c753bc", - "nmdc:8851d6fed8e5bbee88aeb7af77bbcfe3", - "nmdc:002ed5f389b8a13735d27a8741290f6b" - ], - "asm_score": 12.582, - "was_informed_by": "gold:Gp0115679", - "ctg_powsum": 84136, - "scaf_max": 884972, - "id": "nmdc:8bc23e2bd69c8af5538db3d6192a3b5b", - "scaf_powsum": 89882, - "execution_resource": "NERSC-Cori", - "contigs": 531791, - "name": "Assembly Activity for nmdc:mga0gg1q48", - "ctg_max": 719201, - "gc_std": 0.09689, - "contig_bp": 254202396, - "gc_avg": 0.48697, - "started_at_time": "2021-10-11T02:23:30Z", - "scaf_bp": 254270837, - "type": "nmdc:MetagenomeAssembly", - "scaffolds": 525116, - "ended_at_time": "2021-10-11T06:30:42+00:00", - "ctg_l50": 449, - "ctg_l90": 285, - "ctg_n50": 139317, - "ctg_n90": 451813, - "scaf_l50": 455, - "scaf_l90": 285, - "scaf_n50": 133535, - "scaf_n90": 445430, - "scaf_l_gt50k": 3540548, - "scaf_n_gt50k": 34, - "scaf_pct_gt50k": 1.3924317 - } - ], - "metagenome_sequencing_activity_set": [], - "metaproteomics_analysis_activity_set": [], - "metatranscriptome_activity_set": [], - "nom_analysis_activity_set": [], - "omics_processing_set": [ - { - "_id": { - "$oid": "649b009773e8249959349b36" - }, - "id": "nmdc:omprc-11-hymrq852", - "name": "Sand microcosm microbial communities from a hyporheic zone in Columbia River, Washington, USA - GW-RW T4_4-Nov-14", - "description": "Sterilized sand packs were incubated back in the ground and collected at time point T4.", - "has_input": [ - "nmdc:bsm-11-11219w54" - ], - "has_output": [ - "jgi:55a9cb010d87852b21508920" - ], - "part_of": [ - "nmdc:sty-11-aygzgv51" - ], - "add_date": "2015-05-28", - "mod_date": "2021-06-15", - "ncbi_project_name": "Sand microcosm microbial communities from a hyporheic zone in Columbia River, Washington, USA - GW-RW T4_4-Nov-14", - "omics_type": { - "has_raw_value": "Metagenome" - }, - "principal_investigator": { - "has_raw_value": "James Stegen" - }, - "processing_institution": "JGI", - "type": "nmdc:OmicsProcessing", - "gold_sequencing_project_identifiers": [ - "gold:Gp0115679" - ] - } - ], - "reaction_activity_set": [], - "read_qc_analysis_activity_set": [ - { - "_id": { - "$oid": "649b009d6bdd4fd20273c884" - }, - "has_input": [ - "nmdc:3bf389b767cf8a49224dc0028e55eeb7" - ], - "part_of": [ - "nmdc:mga0gg1q48" - ], - "git_url": "https://github.com/microbiomedata/metaG/releases/tag/0.1", - "has_output": [ - "nmdc:7e294ff66cb7ddf84edf9c8bed576bcd", - "nmdc:08e2a96f7aaaff5ff6f747cfe6f49e49" - ], - "was_informed_by": "gold:Gp0115679", - "input_read_count": 67696542, - "output_read_bases": 9825387057, - "id": "nmdc:8bc23e2bd69c8af5538db3d6192a3b5b", - "execution_resource": "NERSC-Cori", - "input_read_bases": 10222177842, - "name": "Read QC Activity for nmdc:mga0gg1q48", - "output_read_count": 67147510, - "started_at_time": "2021-10-11T02:23:30Z", - "type": "nmdc:ReadQCAnalysisActivity", - "ended_at_time": "2021-10-11T06:30:42+00:00" - } - ], - "read_based_taxonomy_analysis_activity_set": [ - { - "_id": { - "$oid": "649b009bff710ae353f8cf4b" - }, - "has_input": [ - "nmdc:7e294ff66cb7ddf84edf9c8bed576bcd" - ], - "git_url": "https://github.com/microbiomedata/metaG/releases/tag/0.1", - "has_output": [ - "nmdc:e20f8c00473472fa073adde871860801", - "nmdc:52f8c91d04e8d179af98e7fac35a8ff1", - "nmdc:f721d9dd168b0dea080b191a4396167e", - "nmdc:ab77e396ec643b58b54da92848b88a96", - "nmdc:f2514844e47a9e3d268671f80f152bc1", - "nmdc:a3e49f39f33c54bc8d9430a947cd4b16", - "nmdc:17bc87145b0dcabbb8e3de0f393f4d4d", - "nmdc:aecb320fdfe4c4da35c0206dd34e0f40", - "nmdc:77860ee043ae9738e7702a3f665b15fa" - ], - "was_informed_by": "gold:Gp0115679", - "id": "nmdc:8bc23e2bd69c8af5538db3d6192a3b5b", - "execution_resource": "NERSC-Cori", - "name": "ReadBased Analysis Activity for nmdc:mga0gg1q48", - "started_at_time": "2021-10-11T02:23:30Z", - "type": "nmdc:ReadbasedAnalysis", - "ended_at_time": "2021-10-11T06:30:42+00:00" - } - ], - "study_set": [], - "field_research_site_set": [], - "collecting_biosamples_from_site_set": [], - "date_created": null, - "etl_software_version": null, - "pooling_set": [] - }, - { - "functional_annotation_agg": [], - "library_preparation_set": [], - "processed_sample_set": [], - "extraction_set": [], - "activity_set": [], - "biosample_set": [], - "data_object_set": [ - { - "description": "Raw sequencer read data", - "file_size_bytes": 1840708400, - "type": "nmdc:DataObject", - "id": "jgi:55d818010d8785342fcf8278", - "name": "9387.2.132031.GTCCGC.fastq.gz" - }, - { - "name": "Gp0115667_Filtered Reads", - "description": "Filtered Reads for Gp0115667", - "data_object_type": "Filtered Sequencing Reads", - "url": "https://data.microbiomedata.org/data/nmdc:mga0n0je44/qa/nmdc_mga0n0je44_filtered.fastq.gz", - "md5_checksum": "7d4057e3a44a05171c13fb0ed3e2294a", - "id": "nmdc:7d4057e3a44a05171c13fb0ed3e2294a", - "file_size_bytes": 1599931347 - }, - { - "name": "Gp0115667_Filtered Stats", - "description": "Filtered Stats for Gp0115667", - "data_object_type": "QC Statistics", - "url": "https://data.microbiomedata.org/data/nmdc:mga0n0je44/qa/nmdc_mga0n0je44_filterStats.txt", - "md5_checksum": "dae7c6e067f69ef6db39b4240cc450ba", - "id": "nmdc:dae7c6e067f69ef6db39b4240cc450ba", - "file_size_bytes": 286 - }, - { - "name": "Gp0115667_Gottcha2 TSV report", - "description": "Gottcha2 TSV report for Gp0115667", - "url": "https://data.microbiomedata.org/data/nmdc:mga0n0je44/ReadbasedAnalysis/nmdc_mga0n0je44_gottcha2_report.tsv", - "md5_checksum": "56edf81e5f5102edf7e416bc9430fbb6", - "id": "nmdc:56edf81e5f5102edf7e416bc9430fbb6", - "file_size_bytes": 10576 - }, - { - "name": "Gp0115667_Gottcha2 full TSV report", - "description": "Gottcha2 full TSV report for Gp0115667", - "url": "https://data.microbiomedata.org/data/nmdc:mga0n0je44/ReadbasedAnalysis/nmdc_mga0n0je44_gottcha2_report_full.tsv", - "md5_checksum": "c3d0f03afb44520ef5f2ea14e6daf705", - "id": "nmdc:c3d0f03afb44520ef5f2ea14e6daf705", - "file_size_bytes": 792905 - }, - { - "name": "Gp0115667_Gottcha2 Krona HTML report", - "description": "Gottcha2 Krona HTML report for Gp0115667", - "data_object_type": "GOTTCHA2 Krona Plot", - "url": "https://data.microbiomedata.org/data/nmdc:mga0n0je44/ReadbasedAnalysis/nmdc_mga0n0je44_gottcha2_krona.html", - "md5_checksum": "2afff209a40ca4895307f3a47080c534", - "id": "nmdc:2afff209a40ca4895307f3a47080c534", - "file_size_bytes": 254763 - }, - { - "name": "Gp0115667_Centrifuge classification TSV report", - "description": "Centrifuge classification TSV report for Gp0115667", - "data_object_type": "Centrifuge Taxonomic Classification", - "url": "https://data.microbiomedata.org/data/nmdc:mga0n0je44/ReadbasedAnalysis/nmdc_mga0n0je44_centrifuge_classification.tsv", - "md5_checksum": "d76c80bf15c4fd84f28c7150f24a8143", - "id": "nmdc:d76c80bf15c4fd84f28c7150f24a8143", - "file_size_bytes": 1336111813 - }, - { - "name": "Gp0115667_Centrifuge TSV report", - "description": "Centrifuge TSV report for Gp0115667", - "data_object_type": "Centrifuge Classification Report", - "url": "https://data.microbiomedata.org/data/nmdc:mga0n0je44/ReadbasedAnalysis/nmdc_mga0n0je44_centrifuge_report.tsv", - "md5_checksum": "b9d6d8a8297f9a604ac85a334a3412de", - "id": "nmdc:b9d6d8a8297f9a604ac85a334a3412de", - "file_size_bytes": 254506 - }, - { - "name": "Gp0115667_Centrifuge Krona HTML report", - "description": "Centrifuge Krona HTML report for Gp0115667", - "data_object_type": "Centrifuge Krona Plot", - "url": "https://data.microbiomedata.org/data/nmdc:mga0n0je44/ReadbasedAnalysis/nmdc_mga0n0je44_centrifuge_krona.html", - "md5_checksum": "fe4bd9f63c32f50676792e3c4adced08", - "id": "nmdc:fe4bd9f63c32f50676792e3c4adced08", - "file_size_bytes": 2323153 - }, - { - "name": "Gp0115667_Kraken classification TSV report", - "description": "Kraken classification TSV report for Gp0115667", - "data_object_type": "Kraken2 Taxonomic Classification", - "url": "https://data.microbiomedata.org/data/nmdc:mga0n0je44/ReadbasedAnalysis/nmdc_mga0n0je44_kraken2_classification.tsv", - "md5_checksum": "eb189cbf0543203d2521397b73d4d34b", - "id": "nmdc:eb189cbf0543203d2521397b73d4d34b", - "file_size_bytes": 1097852664 - }, - { - "name": "Gp0115667_Kraken2 TSV report", - "description": "Kraken2 TSV report for Gp0115667", - "data_object_type": "Kraken2 Classification Report", - "url": "https://data.microbiomedata.org/data/nmdc:mga0n0je44/ReadbasedAnalysis/nmdc_mga0n0je44_kraken2_report.tsv", - "md5_checksum": "ce3f002a824efde4a7134e6cd2e6306b", - "id": "nmdc:ce3f002a824efde4a7134e6cd2e6306b", - "file_size_bytes": 639213 - }, - { - "name": "Gp0115667_Kraken2 Krona HTML report", - "description": "Kraken2 Krona HTML report for Gp0115667", - "data_object_type": "Kraken2 Krona Plot", - "url": "https://data.microbiomedata.org/data/nmdc:mga0n0je44/ReadbasedAnalysis/nmdc_mga0n0je44_kraken2_krona.html", - "md5_checksum": "ac90bf3384ce44d097f7897ac5ff8134", - "id": "nmdc:ac90bf3384ce44d097f7897ac5ff8134", - "file_size_bytes": 3979807 - }, - { - "name": "Gp0115667_Assembled contigs fasta", - "description": "Assembled contigs fasta for Gp0115667", - "data_object_type": "Assembly Contigs", - "url": "https://data.microbiomedata.org/data/nmdc:mga0n0je44/assembly/nmdc_mga0n0je44_contigs.fna", - "md5_checksum": "b3cefc5a9599a4fb9432132baf7f5565", - "id": "nmdc:b3cefc5a9599a4fb9432132baf7f5565", - "file_size_bytes": 62926054 - }, - { - "name": "Gp0115667_Assembled scaffold fasta", - "description": "Assembled scaffold fasta for Gp0115667", - "data_object_type": "Assembly Scaffolds", - "url": "https://data.microbiomedata.org/data/nmdc:mga0n0je44/assembly/nmdc_mga0n0je44_scaffolds.fna", - "md5_checksum": "b60f674a01e3f7fff5ead95f330cef4f", - "id": "nmdc:b60f674a01e3f7fff5ead95f330cef4f", - "file_size_bytes": 62577490 - }, - { - "name": "Gp0115667_Metagenome Contig Coverage Stats", - "description": "Metagenome Contig Coverage Stats for Gp0115667", - "url": "https://data.microbiomedata.org/data/nmdc:mga0n0je44/assembly/nmdc_mga0n0je44_covstats.txt", - "md5_checksum": "2e4532cb03bb1e9201976b9d65893788", - "id": "nmdc:2e4532cb03bb1e9201976b9d65893788", - "file_size_bytes": 9189143 - }, - { - "name": "Gp0115667_Assembled AGP file", - "description": "Assembled AGP file for Gp0115667", - "data_object_type": "Assembly AGP", - "url": "https://data.microbiomedata.org/data/nmdc:mga0n0je44/assembly/nmdc_mga0n0je44_assembly.agp", - "md5_checksum": "e49f8a26a9cd0420b688c967bbacb4c6", - "id": "nmdc:e49f8a26a9cd0420b688c967bbacb4c6", - "file_size_bytes": 8508903 - }, - { - "name": "Gp0115667_Metagenome Alignment BAM file", - "description": "Metagenome Alignment BAM file for Gp0115667", - "data_object_type": "Assembly Coverage BAM", - "url": "https://data.microbiomedata.org/data/nmdc:mga0n0je44/assembly/nmdc_mga0n0je44_pairedMapped_sorted.bam", - "md5_checksum": "d9b957c7efe7f753fe67441d0be605c6", - "id": "nmdc:d9b957c7efe7f753fe67441d0be605c6", - "file_size_bytes": 1771039554 - }, - { - "name": "Gp0115667_Protein FAA", - "description": "Protein FAA for Gp0115667", - "data_object_type": "Annotation Amino Acid FASTA", - "url": "https://data.microbiomedata.org/data/nmdc:mga0n0je44/annotation/nmdc_mga0n0je44_proteins.faa", - "md5_checksum": "45e8b887fc06ddbf2af3ecf9c91a7bf7", - "id": "nmdc:45e8b887fc06ddbf2af3ecf9c91a7bf7", - "file_size_bytes": 31564336 - }, - { - "name": "Gp0115667_Structural annotation GFF file", - "description": "Structural annotation GFF file for Gp0115667", - "data_object_type": "Structural Annotation GFF", - "url": "https://data.microbiomedata.org/data/nmdc:mga0n0je44/annotation/nmdc_mga0n0je44_structural_annotation.gff", - "md5_checksum": "26ab4381753f685c44091e1f17d8bab5", - "id": "nmdc:26ab4381753f685c44091e1f17d8bab5", - "file_size_bytes": 2760 - }, - { - "name": "Gp0115667_Functional annotation GFF file", - "description": "Functional annotation GFF file for Gp0115667", - "data_object_type": "Functional Annotation GFF", - "url": "https://data.microbiomedata.org/data/nmdc:mga0n0je44/annotation/nmdc_mga0n0je44_functional_annotation.gff", - "md5_checksum": "5a378f3975ab6c2cf2a36b0b007ea3f8", - "id": "nmdc:5a378f3975ab6c2cf2a36b0b007ea3f8", - "file_size_bytes": 34525554 - }, - { - "name": "Gp0115667_KO TSV file", - "description": "KO TSV file for Gp0115667", - "data_object_type": "Annotation KEGG Orthology", - "url": "https://data.microbiomedata.org/data/nmdc:mga0n0je44/annotation/nmdc_mga0n0je44_ko.tsv", - "md5_checksum": "6df49253fee066c699d6a5191a0efaed", - "id": "nmdc:6df49253fee066c699d6a5191a0efaed", - "file_size_bytes": 3439857 - }, - { - "name": "Gp0115667_EC TSV file", - "description": "EC TSV file for Gp0115667", - "data_object_type": "Annotation Enzyme Commission", - "url": "https://data.microbiomedata.org/data/nmdc:mga0n0je44/annotation/nmdc_mga0n0je44_ec.tsv", - "md5_checksum": "5e35e51a595f892968e57681ee448e5f", - "id": "nmdc:5e35e51a595f892968e57681ee448e5f", - "file_size_bytes": 2203532 - }, - { - "name": "Gp0115667_COG GFF file", - "description": "COG GFF file for Gp0115667", - "url": "https://data.microbiomedata.org/data/nmdc:mga0n0je44/annotation/nmdc_mga0n0je44_cog.gff", - "md5_checksum": "ae1bc890152d28387f65c65d434b97ea", - "id": "nmdc:ae1bc890152d28387f65c65d434b97ea", - "file_size_bytes": 15384958 - }, - { - "name": "Gp0115667_PFAM GFF file", - "description": "PFAM GFF file for Gp0115667", - "url": "https://data.microbiomedata.org/data/nmdc:mga0n0je44/annotation/nmdc_mga0n0je44_pfam.gff", - "md5_checksum": "fb736eaba77cbd99135ddbc32168db94", - "id": "nmdc:fb736eaba77cbd99135ddbc32168db94", - "file_size_bytes": 12472999 - }, - { - "name": "Gp0115667_TigrFam GFF file", - "description": "TigrFam GFF file for Gp0115667", - "url": "https://data.microbiomedata.org/data/nmdc:mga0n0je44/annotation/nmdc_mga0n0je44_tigrfam.gff", - "md5_checksum": "3b00892f95bc4dedaf4384685a75d52f", - "id": "nmdc:3b00892f95bc4dedaf4384685a75d52f", - "file_size_bytes": 1755779 - }, - { - "name": "Gp0115667_SMART GFF file", - "description": "SMART GFF file for Gp0115667", - "url": "https://data.microbiomedata.org/data/nmdc:mga0n0je44/annotation/nmdc_mga0n0je44_smart.gff", - "md5_checksum": "b8c0d7c187169f34aafc17308aeea2ed", - "id": "nmdc:b8c0d7c187169f34aafc17308aeea2ed", - "file_size_bytes": 3937293 - }, - { - "name": "Gp0115667_SuperFam GFF file", - "description": "SuperFam GFF file for Gp0115667", - "url": "https://data.microbiomedata.org/data/nmdc:mga0n0je44/annotation/nmdc_mga0n0je44_supfam.gff", - "md5_checksum": "2a8e4bb3922ec664bbb5ce49a30cc87e", - "id": "nmdc:2a8e4bb3922ec664bbb5ce49a30cc87e", - "file_size_bytes": 22725250 - }, - { - "name": "Gp0115667_Cath FunFam GFF file", - "description": "Cath FunFam GFF file for Gp0115667", - "url": "https://data.microbiomedata.org/data/nmdc:mga0n0je44/annotation/nmdc_mga0n0je44_cath_funfam.gff", - "md5_checksum": "34eddc2289f3e3b4707a6c8060f6dd99", - "id": "nmdc:34eddc2289f3e3b4707a6c8060f6dd99", - "file_size_bytes": 17788890 - }, - { - "name": "Gp0115667_KO_EC GFF file", - "description": "KO_EC GFF file for Gp0115667", - "url": "https://data.microbiomedata.org/data/nmdc:mga0n0je44/annotation/nmdc_mga0n0je44_ko_ec.gff", - "md5_checksum": "0a51a22e2cf94c853657381549aa8f04", - "id": "nmdc:0a51a22e2cf94c853657381549aa8f04", - "file_size_bytes": 11004264 - }, - { - "name": "gold:Gp0452679_metabat2 bin checkm quality assessment result", - "description": "metabat2 bin checkm quality assessment result for gold:Gp0452679", - "data_object_type": "CheckM Statistics", - "url": "https://data.microbiomedata.org/data/nmdc:mga0fsjy84/MAGs/nmdc_mga0fsjy84_checkm_qa.out", - "md5_checksum": "d41d8cd98f00b204e9800998ecf8427e", - "id": "nmdc:d41d8cd98f00b204e9800998ecf8427e", - "file_size_bytes": 0 - }, - { - "name": "Gp0115667_tooShort (< 3kb) filtered contigs fasta file by metaBat2", - "description": "tooShort (< 3kb) filtered contigs fasta file by metaBat2 for Gp0115667", - "url": "https://data.microbiomedata.org/data/nmdc:mga0n0je44/MAGs/nmdc_mga0n0je44_bins.tooShort.fa", - "md5_checksum": "1277a6924ab380e001a7208e7ebbb0e3", - "id": "nmdc:1277a6924ab380e001a7208e7ebbb0e3", - "file_size_bytes": 46335107 - }, - { - "name": "Gp0115667_unbinned fasta file from metabat2", - "description": "unbinned fasta file from metabat2 for Gp0115667", - "url": "https://data.microbiomedata.org/data/nmdc:mga0n0je44/MAGs/nmdc_mga0n0je44_bins.unbinned.fa", - "md5_checksum": "48772112891988a2ef3f0c40786c11fd", - "id": "nmdc:48772112891988a2ef3f0c40786c11fd", - "file_size_bytes": 10701981 - }, - { - "name": "Gp0115667_metabat2 bin checkm quality assessment result", - "description": "metabat2 bin checkm quality assessment result for Gp0115667", - "data_object_type": "CheckM Statistics", - "url": "https://data.microbiomedata.org/data/nmdc:mga0n0je44/MAGs/nmdc_mga0n0je44_checkm_qa.out", - "md5_checksum": "527e2c19607c225a707db67b5be01b6f", - "id": "nmdc:527e2c19607c225a707db67b5be01b6f", - "file_size_bytes": 1360 - }, - { - "name": "Gp0115667_high-quality and medium-quality bins", - "description": "high-quality and medium-quality bins for Gp0115667", - "data_object_type": "Metagenome Bins", - "url": "https://data.microbiomedata.org/data/nmdc:mga0n0je44/MAGs/nmdc_mga0n0je44_hqmq_bin.zip", - "md5_checksum": "027626ff998bf1e495e32d09cab4bb08", - "id": "nmdc:027626ff998bf1e495e32d09cab4bb08", - "file_size_bytes": 1462611 - }, - { - "name": "Gp0115667_metabat2 bins", - "description": "metabat2 bins for Gp0115667", - "url": "https://data.microbiomedata.org/data/nmdc:mga0n0je44/MAGs/nmdc_mga0n0je44_metabat_bin.zip", - "md5_checksum": "733e798989606c802b3bbfc952a38841", - "id": "nmdc:733e798989606c802b3bbfc952a38841", - "file_size_bytes": 334014 - }, - { - "_id": { - "$oid": "649b003c1ae706d7b5b14d71" - }, - "description": "Metagenome Contig Coverage Stats for gold:Gp0115667", - "url": "https://data.microbiomedata.org/data/1781_86098/assembly/mapping_stats.txt", - "file_size_bytes": 8608508, - "type": "nmdc:DataObject", - "id": "nmdc:65454371fa1809684cc19c5c1cb49c4c", - "name": "mapping_stats.txt", - "data_object_type": "Assembly Coverage Stats" - }, - { - "_id": { - "$oid": "649b003c1ae706d7b5b14d72" - }, - "description": "Assembled contigs fasta for gold:Gp0115667", - "url": "https://data.microbiomedata.org/data/1781_86098/assembly/assembly_contigs.fna", - "file_size_bytes": 62345419, - "type": "nmdc:DataObject", - "id": "nmdc:28a8512eff8b81cebce0614fe5ed18a0", - "name": "assembly_contigs.fna", - "data_object_type": "Assembly Contigs" - }, - { - "_id": { - "$oid": "649b003c1ae706d7b5b14d74" - }, - "description": "Assembled AGP file for gold:Gp0115667", - "url": "https://data.microbiomedata.org/data/1781_86098/assembly/assembly.agp", - "file_size_bytes": 7346593, - "type": "nmdc:DataObject", - "id": "nmdc:7ce2c4d4d2cbf019fd43453b6fb54fac", - "name": "assembly.agp", - "data_object_type": "Assembly AGP" - }, - { - "_id": { - "$oid": "649b003c1ae706d7b5b14d75" - }, - "description": "Assembled scaffold fasta for gold:Gp0115667", - "url": "https://data.microbiomedata.org/data/1781_86098/assembly/assembly_scaffolds.fna", - "file_size_bytes": 61997325, - "type": "nmdc:DataObject", - "id": "nmdc:5c6200f0a56a1ec503ac0245b1d2cbdf", - "name": "assembly_scaffolds.fna", - "data_object_type": "Assembly Scaffolds" - }, - { - "_id": { - "$oid": "649b003c1ae706d7b5b14d76" - }, - "description": "Metagenome Alignment BAM file for gold:Gp0115667", - "url": "https://data.microbiomedata.org/data/1781_86098/assembly/pairedMapped_sorted.bam", - "file_size_bytes": 1747730642, - "type": "nmdc:DataObject", - "id": "nmdc:d07858a6b7932797c1e4e8b019f82131", - "name": "pairedMapped_sorted.bam", - "data_object_type": "Assembly Coverage BAM" - }, - { - "_id": { - "$oid": "649b003d1ae706d7b5b15992" - }, - "id": "nmdc:257cca2e47a0917e48596800a3f9f161", - "name": "1781_86098.krona.html", - "description": "Gold:Gp0115667 KRONA plot HTML file", - "url": "https://data.microbiomedata.org/1781_86098/ReadbasedAnalysis/centrifuge/1781_86098.krona.html", - "file_size_bytes": 3385, - "data_object_type": "Centrifuge Krona Plot", - "type": "nmdc:DataObject" - }, - { - "_id": { - "$oid": "649b003d1ae706d7b5b15999" - }, - "id": "nmdc:734cf235a0ede4b50b75488ee5fe893a", - "name": "1781_86098.json", - "description": "Gold:Gp0115667 ReadbasedAnalysis result JSON file", - "url": "https://data.microbiomedata.org/1781_86098/ReadbasedAnalysis/1781_86098.json", - "file_size_bytes": 3385, - "type": "nmdc:DataObject" - }, - { - "_id": { - "$oid": "649b003f1ae706d7b5b16270" - }, - "id": "nmdc:1e34d5f7bf6a095e74dc5b0ba743c6c4", - "name": "bins.unbinned.fa", - "description": "unbinned fasta file from metabat2 for gold:Gp0115667", - "file_size_bytes": 11386423, - "url": "https://data.microbiomedata.org/data/1781_86098/img_MAGs/bins.unbinned.fa", - "type": "nmdc:DataObject" - }, - { - "_id": { - "$oid": "649b003f1ae706d7b5b16274" - }, - "id": "nmdc:12c891d4c33195700fbf605402639c77", - "name": "gold:Gp0115667.bins.3.fa", - "description": "metabat2 binned contig file for gold:Gp0115667", - "file_size_bytes": 211939, - "url": "https://data.microbiomedata.org/data/1781_86098/img_MAGs/metabat-bins/bins.3.fa", - "type": "nmdc:DataObject", - "data_object_type": "Metagenome Bins" - }, - { - "_id": { - "$oid": "649b003f1ae706d7b5b16278" - }, - "id": "nmdc:6ac802499984a5da4fc7aa2cd17af998", - "name": "checkm_qa.out", - "description": "metabat2 bin checkm quality assessment result for gold:Gp0115667", - "file_size_bytes": 2040, - "url": "https://data.microbiomedata.org/data/1781_86098/img_MAGs/checkm_qa.out", - "type": "nmdc:DataObject", - "data_object_type": "CheckM Statistics" - }, - { - "_id": { - "$oid": "649b003f1ae706d7b5b1628a" - }, - "id": "nmdc:3fc44ed589c47ca3a915e818dc9ef957", - "name": "gold:Gp0115667.bins.4.fa", - "description": "metabat2 binned contig file for gold:Gp0115667", - "file_size_bytes": 229517, - "url": "https://data.microbiomedata.org/data/1781_86098/img_MAGs/metabat-bins/bins.4.fa", - "type": "nmdc:DataObject", - "data_object_type": "Metagenome Bins" - }, - { - "_id": { - "$oid": "649b003f1ae706d7b5b1628c" - }, - "id": "nmdc:ecf40d29af87b508f4128c7520dbddff", - "name": "gold:Gp0115667.bins.2.fa", - "description": "metabat2 binned contig file for gold:Gp0115667", - "file_size_bytes": 288428, - "url": "https://data.microbiomedata.org/data/1781_86098/img_MAGs/metabat-bins/bins.2.fa", - "type": "nmdc:DataObject", - "data_object_type": "Metagenome Bins" - }, - { - "_id": { - "$oid": "649b003f1ae706d7b5b1628f" - }, - "id": "nmdc:3d69f4d7f7bdcabac2d974bf0436cba0", - "name": "gold:Gp0115667.bins.7.fa", - "description": "metabat2 binned contig file for gold:Gp0115667", - "file_size_bytes": 332716, - "url": "https://data.microbiomedata.org/data/1781_86098/img_MAGs/metabat-bins/bins.7.fa", - "type": "nmdc:DataObject", - "data_object_type": "Metagenome Bins" - }, - { - "_id": { - "$oid": "649b003f1ae706d7b5b16293" - }, - "id": "nmdc:5977ef8709f03d6b5dd25112cf45dd6a", - "name": "gold:Gp0115667.bins.5.fa", - "description": "metabat2 binned contig file for gold:Gp0115667", - "file_size_bytes": 770132, - "url": "https://data.microbiomedata.org/data/1781_86098/img_MAGs/metabat-bins/bins.5.fa", - "type": "nmdc:DataObject", - "data_object_type": "Metagenome Bins" - }, - { - "_id": { - "$oid": "649b003f1ae706d7b5b16294" - }, - "id": "nmdc:411197955ce463b3374262983e6e6c12", - "name": "gold:Gp0115667.bins.1.fa", - "description": "metabat2 binned contig file for gold:Gp0115667", - "file_size_bytes": 1544238, - "url": "https://data.microbiomedata.org/data/1781_86098/img_MAGs/metabat-bins/bins.1.fa", - "type": "nmdc:DataObject", - "data_object_type": "Metagenome Bins" - }, - { - "_id": { - "$oid": "649b003f1ae706d7b5b16299" - }, - "id": "nmdc:e4e449bf8b38e28b1c585494dd53f83a", - "name": "bins.tooShort.fa", - "description": "tooShort (< 3kb) filtered contigs fasta file by metaBat2 for gold:Gp0115667", - "file_size_bytes": 45010132, - "url": "https://data.microbiomedata.org/data/1781_86098/img_MAGs/bins.tooShort.fa", - "type": "nmdc:DataObject" - }, - { - "_id": { - "$oid": "649b003f1ae706d7b5b1629a" - }, - "id": "nmdc:9e21ea6dec1b46e65841a271b2bbe8fe", - "name": "gtdbtk.bac120.summary.tsv", - "description": "gtdbtk bacterial assignment result summary table for gold:Gp0115667", - "file_size_bytes": 846, - "url": "https://data.microbiomedata.org/data/1781_86098/img_MAGs/gtdbtk_output/classify/gtdbtk.bac120.summary.tsv", - "type": "nmdc:DataObject" - }, - { - "_id": { - "$oid": "649b003f1ae706d7b5b162a0" - }, - "id": "nmdc:a2fecaa8e738191ae8a4934f235ad934", - "name": "gold:Gp0115667.bins.6.fa", - "description": "metabat2 binned contig file for gold:Gp0115667", - "file_size_bytes": 305691, - "url": "https://data.microbiomedata.org/data/1781_86098/img_MAGs/metabat-bins/bins.6.fa", - "type": "nmdc:DataObject", - "data_object_type": "Metagenome Bins" - }, - { - "_id": { - "$oid": "649b003f1ae706d7b5b162a1" - }, - "id": "nmdc:843c9624d7bb8bdbcfe26fdde4117f0d", - "name": "gold:Gp0115667.bins.8.fa", - "description": "metabat2 binned contig file for gold:Gp0115667", - "file_size_bytes": 1448474, - "url": "https://data.microbiomedata.org/data/1781_86098/img_MAGs/metabat-bins/bins.8.fa", - "type": "nmdc:DataObject", - "data_object_type": "Metagenome Bins" - }, - { - "_id": { - "$oid": "649b00401ae706d7b5b16d85" - }, - "description": "EC TSV File for gold:Gp0115667", - "url": "https://data.microbiomedata.org/1781_86098/img_annotation/Ga0482260_ec.tsv", - "md5_checksum": "babc9f95621eed35bc7975dee8b417b9", - "file_size_bytes": 3385, - "id": "nmdc:babc9f95621eed35bc7975dee8b417b9", - "name": "gold:Gp0115667_EC TSV File", - "data_object_type": "Annotation Enzyme Commission", - "type": "nmdc:DataObject" - }, - { - "_id": { - "$oid": "649b00401ae706d7b5b16d86" - }, - "description": "KO TSV File for gold:Gp0115667", - "url": "https://data.microbiomedata.org/1781_86098/img_annotation/Ga0482260_ko.tsv", - "md5_checksum": "bc5043b689463c3651c15ad4ba1aa9a4", - "file_size_bytes": 3385, - "id": "nmdc:bc5043b689463c3651c15ad4ba1aa9a4", - "name": "gold:Gp0115667_KO TSV File", - "data_object_type": "Annotation KEGG Orthology", - "type": "nmdc:DataObject" - }, - { - "_id": { - "$oid": "649b00401ae706d7b5b16d88" - }, - "description": "Functional annotation GFF file for gold:Gp0115667", - "url": "https://data.microbiomedata.org/1781_86098/img_annotation/Ga0482260_functional_annotation.gff", - "md5_checksum": "c47020ef7958f3a4c4458e0797fc2400", - "file_size_bytes": 3385, - "id": "nmdc:c47020ef7958f3a4c4458e0797fc2400", - "name": "gold:Gp0115667_Functional annotation GFF file", - "data_object_type": "Functional Annotation GFF", - "type": "nmdc:DataObject" - }, - { - "_id": { - "$oid": "649b00401ae706d7b5b16d89" - }, - "description": "Protein FAA for gold:Gp0115667", - "url": "https://data.microbiomedata.org/1781_86098/img_annotation/Ga0482260_proteins.faa", - "md5_checksum": "acdedd1c48e28e4f4e0d0679cae417f9", - "file_size_bytes": 3385, - "id": "nmdc:acdedd1c48e28e4f4e0d0679cae417f9", - "name": "gold:Gp0115667_Protein FAA", - "data_object_type": "Annotation Amino Acid FASTA", - "type": "nmdc:DataObject" - }, - { - "_id": { - "$oid": "649b00401ae706d7b5b16d8b" - }, - "description": "Structural annotation GFF file for gold:Gp0115667", - "url": "https://data.microbiomedata.org/1781_86098/img_annotation/Ga0482260_structural_annotation.gff", - "md5_checksum": "6f236cc8b728333fcf85e4f27873a500", - "file_size_bytes": 3385, - "id": "nmdc:6f236cc8b728333fcf85e4f27873a500", - "name": "gold:Gp0115667_Structural annotation GFF file", - "data_object_type": "Structural Annotation GFF", - "type": "nmdc:DataObject" - } - ], - "dissolving_activity_set": [], - "functional_annotation_set": [], - "genome_feature_set": [], - "mags_activity_set": [ - { - "_id": { - "$oid": "649b0052ec087f6bbab3472a" - }, - "has_input": [ - "nmdc:b3cefc5a9599a4fb9432132baf7f5565", - "nmdc:d9b957c7efe7f753fe67441d0be605c6", - "nmdc:5a378f3975ab6c2cf2a36b0b007ea3f8" - ], - "too_short_contig_num": 109354, - "part_of": [ - "nmdc:mga0n0je44" - ], - "binned_contig_num": 596, - "git_url": "https://github.com/microbiomedata/metaG/releases/tag/0.1", - "has_output": [ - "nmdc:d41d8cd98f00b204e9800998ecf8427e", - "nmdc:1277a6924ab380e001a7208e7ebbb0e3", - "nmdc:48772112891988a2ef3f0c40786c11fd", - "nmdc:527e2c19607c225a707db67b5be01b6f", - "nmdc:027626ff998bf1e495e32d09cab4bb08", - "nmdc:733e798989606c802b3bbfc952a38841" - ], - "was_informed_by": "gold:Gp0115667", - "input_contig_num": 116127, - "id": "nmdc:8093869c91384d3299431e56019f7de0", - "execution_resource": "NERSC-Cori", - "name": "MAGs Analysis Activity for nmdc:mga0n0je44", - "mags_list": [ - { - "number_of_contig": 166, - "completeness": 19.51, - "bin_name": "bins.1", - "gene_count": 906, - "bin_quality": "LQ", - "gtdbtk_species": "", - "gtdbtk_order": "", - "num_16s": 0, - "gtdbtk_family": "", - "gtdbtk_domain": "", - "contamination": 0.04, - "gtdbtk_class": "", - "gtdbtk_phylum": "", - "num_5s": 0, - "num_23s": 0, - "gtdbtk_genus": "", - "num_t_rna": 14 - }, - { - "number_of_contig": 70, - "completeness": 99.78, - "bin_name": "bins.2", - "gene_count": 3225, - "bin_quality": "MQ", - "gtdbtk_species": "", - "gtdbtk_order": "UBA11222", - "num_16s": 1, - "gtdbtk_family": "UBA11222", - "gtdbtk_domain": "Bacteria", - "contamination": 0.43, - "gtdbtk_class": "Alphaproteobacteria", - "gtdbtk_phylum": "Proteobacteria", - "num_5s": 0, - "num_23s": 0, - "gtdbtk_genus": "UBA11222", - "num_t_rna": 49 - }, - { - "number_of_contig": 67, - "completeness": 41.5, - "bin_name": "bins.3", - "gene_count": 464, - "bin_quality": "LQ", - "gtdbtk_species": "", - "gtdbtk_order": "", - "num_16s": 0, - "gtdbtk_family": "", - "gtdbtk_domain": "", - "contamination": 3.76, - "gtdbtk_class": "", - "gtdbtk_phylum": "", - "num_5s": 1, - "num_23s": 1, - "gtdbtk_genus": "", - "num_t_rna": 10 - }, - { - "number_of_contig": 293, - "completeness": 56.99, - "bin_name": "bins.4", - "gene_count": 1734, - "bin_quality": "MQ", - "gtdbtk_species": "", - "gtdbtk_order": "Pseudomonadales", - "num_16s": 0, - "gtdbtk_family": "UBA3067", - "gtdbtk_domain": "Bacteria", - "contamination": 3.01, - "gtdbtk_class": "Gammaproteobacteria", - "gtdbtk_phylum": "Proteobacteria", - "num_5s": 0, - "num_23s": 0, - "gtdbtk_genus": "UBA3067", - "num_t_rna": 27 - } - ], - "unbinned_contig_num": 6177, - "started_at_time": "2021-10-11T02:28:16Z", - "type": "nmdc:MAGsAnalysisActivity", - "ended_at_time": "2021-10-11T03:58:24+00:00" - } - ], - "material_sample_set": [], - "material_sampling_activity_set": [], - "metabolomics_analysis_activity_set": [], - "metagenome_annotation_activity_set": [ - { - "_id": { - "$oid": "649b005bbf2caae0415ef9cc" - }, - "has_input": [ - "nmdc:b3cefc5a9599a4fb9432132baf7f5565" - ], - "part_of": [ - "nmdc:mga0n0je44" - ], - "git_url": "https://github.com/microbiomedata/metaG/releases/tag/0.1", - "has_output": [ - "nmdc:45e8b887fc06ddbf2af3ecf9c91a7bf7", - "nmdc:26ab4381753f685c44091e1f17d8bab5", - "nmdc:5a378f3975ab6c2cf2a36b0b007ea3f8", - "nmdc:6df49253fee066c699d6a5191a0efaed", - "nmdc:5e35e51a595f892968e57681ee448e5f", - "nmdc:ae1bc890152d28387f65c65d434b97ea", - "nmdc:fb736eaba77cbd99135ddbc32168db94", - "nmdc:3b00892f95bc4dedaf4384685a75d52f", - "nmdc:b8c0d7c187169f34aafc17308aeea2ed", - "nmdc:2a8e4bb3922ec664bbb5ce49a30cc87e", - "nmdc:34eddc2289f3e3b4707a6c8060f6dd99", - "nmdc:0a51a22e2cf94c853657381549aa8f04" - ], - "was_informed_by": "gold:Gp0115667", - "id": "nmdc:8093869c91384d3299431e56019f7de0", - "execution_resource": "NERSC-Cori", - "name": "Annotation Activity for nmdc:mga0n0je44", - "started_at_time": "2021-10-11T02:28:16Z", - "type": "nmdc:MetagenomeAnnotationActivity", - "ended_at_time": "2021-10-11T03:58:24+00:00" - } - ], - "metagenome_assembly_set": [ - { - "_id": { - "$oid": "649b005f2ca5ee4adb139fb7" - }, - "has_input": [ - "nmdc:7d4057e3a44a05171c13fb0ed3e2294a" - ], - "part_of": [ - "nmdc:mga0n0je44" - ], - "ctg_logsum": 195440, - "scaf_logsum": 196103, - "gap_pct": 0.00293, - "git_url": "https://github.com/microbiomedata/metaG/releases/tag/0.1", - "has_output": [ - "nmdc:b3cefc5a9599a4fb9432132baf7f5565", - "nmdc:b60f674a01e3f7fff5ead95f330cef4f", - "nmdc:2e4532cb03bb1e9201976b9d65893788", - "nmdc:e49f8a26a9cd0420b688c967bbacb4c6", - "nmdc:d9b957c7efe7f753fe67441d0be605c6" - ], - "asm_score": 17.061, - "was_informed_by": "gold:Gp0115667", - "ctg_powsum": 25448, - "scaf_max": 245816, - "id": "nmdc:8093869c91384d3299431e56019f7de0", - "scaf_powsum": 25552, - "execution_resource": "NERSC-Cori", - "contigs": 116132, - "name": "Assembly Activity for nmdc:mga0n0je44", - "ctg_max": 245816, - "gc_std": 0.12277, - "contig_bp": 58413782, - "gc_avg": 0.47644, - "started_at_time": "2021-10-11T02:28:16Z", - "scaf_bp": 58415492, - "type": "nmdc:MetagenomeAssembly", - "scaffolds": 116033, - "ended_at_time": "2021-10-11T03:58:24+00:00", - "ctg_l50": 479, - "ctg_l90": 286, - "ctg_n50": 26909, - "ctg_n90": 95138, - "scaf_l50": 479, - "scaf_l90": 286, - "scaf_n50": 26889, - "scaf_n90": 95057, - "scaf_l_gt50k": 1865703, - "scaf_n_gt50k": 17, - "scaf_pct_gt50k": 3.1938498 - } - ], - "metagenome_sequencing_activity_set": [], - "metaproteomics_analysis_activity_set": [], - "metatranscriptome_activity_set": [], - "nom_analysis_activity_set": [], - "omics_processing_set": [ - { - "_id": { - "$oid": "649b009773e8249959349b37" - }, - "id": "nmdc:omprc-11-yt8css91", - "name": "Sand microcosm microbial communities from a hyporheic zone in Columbia River, Washington, USA - GW-RW T3_25-Nov-14", - "description": "Sterilized sand packs were incubated back in the ground and collected at time point T3.", - "has_input": [ - "nmdc:bsm-11-ynevd369" - ], - "has_output": [ - "jgi:55d818010d8785342fcf8278" - ], - "part_of": [ - "nmdc:sty-11-aygzgv51" - ], - "add_date": "2015-05-28", - "mod_date": "2021-06-15", - "ncbi_project_name": "Sand microcosm microbial communities from a hyporheic zone in Columbia River, Washington, USA - GW-RW T3_25-Nov-14", - "omics_type": { - "has_raw_value": "Metagenome" - }, - "principal_investigator": { - "has_raw_value": "James Stegen" - }, - "processing_institution": "JGI", - "type": "nmdc:OmicsProcessing", - "gold_sequencing_project_identifiers": [ - "gold:Gp0115667" - ] - } - ], - "reaction_activity_set": [], - "read_qc_analysis_activity_set": [ - { - "_id": { - "$oid": "649b009d6bdd4fd20273c882" - }, - "has_input": [ - "nmdc:cb2e0605e8f22a398d982e35aee57715" - ], - "part_of": [ - "nmdc:mga0n0je44" - ], - "git_url": "https://github.com/microbiomedata/metaG/releases/tag/0.1", - "has_output": [ - "nmdc:7d4057e3a44a05171c13fb0ed3e2294a", - "nmdc:dae7c6e067f69ef6db39b4240cc450ba" - ], - "was_informed_by": "gold:Gp0115667", - "input_read_count": 19416222, - "output_read_bases": 2825090769, - "id": "nmdc:8093869c91384d3299431e56019f7de0", - "execution_resource": "NERSC-Cori", - "input_read_bases": 2931849522, - "name": "Read QC Activity for nmdc:mga0n0je44", - "output_read_count": 18855352, - "started_at_time": "2021-10-11T02:28:16Z", - "type": "nmdc:ReadQCAnalysisActivity", - "ended_at_time": "2021-10-11T03:58:24+00:00" - } - ], - "read_based_taxonomy_analysis_activity_set": [ - { - "_id": { - "$oid": "649b009bff710ae353f8cf4a" - }, - "has_input": [ - "nmdc:7d4057e3a44a05171c13fb0ed3e2294a" - ], - "git_url": "https://github.com/microbiomedata/metaG/releases/tag/0.1", - "has_output": [ - "nmdc:56edf81e5f5102edf7e416bc9430fbb6", - "nmdc:c3d0f03afb44520ef5f2ea14e6daf705", - "nmdc:2afff209a40ca4895307f3a47080c534", - "nmdc:d76c80bf15c4fd84f28c7150f24a8143", - "nmdc:b9d6d8a8297f9a604ac85a334a3412de", - "nmdc:fe4bd9f63c32f50676792e3c4adced08", - "nmdc:eb189cbf0543203d2521397b73d4d34b", - "nmdc:ce3f002a824efde4a7134e6cd2e6306b", - "nmdc:ac90bf3384ce44d097f7897ac5ff8134" - ], - "was_informed_by": "gold:Gp0115667", - "id": "nmdc:8093869c91384d3299431e56019f7de0", - "execution_resource": "NERSC-Cori", - "name": "ReadBased Analysis Activity for nmdc:mga0n0je44", - "started_at_time": "2021-10-11T02:28:16Z", - "type": "nmdc:ReadbasedAnalysis", - "ended_at_time": "2021-10-11T03:58:24+00:00" - } - ], - "study_set": [], - "field_research_site_set": [], - "collecting_biosamples_from_site_set": [], - "date_created": null, - "etl_software_version": null, - "pooling_set": [] - }, - { - "functional_annotation_agg": [], - "library_preparation_set": [], - "processed_sample_set": [], - "extraction_set": [], - "activity_set": [], - "biosample_set": [], - "data_object_set": [ - { - "description": "Raw sequencer read data", - "file_size_bytes": 1792111281, - "type": "nmdc:DataObject", - "id": "jgi:55d817f20d8785342fcf826c", - "name": "9387.2.132031.TAGCTT.fastq.gz" - }, - { - "name": "Gp0115664_Filtered Reads", - "description": "Filtered Reads for Gp0115664", - "data_object_type": "Filtered Sequencing Reads", - "url": "https://data.microbiomedata.org/data/nmdc:mga0dm3v04/qa/nmdc_mga0dm3v04_filtered.fastq.gz", - "md5_checksum": "232e31505b6a0251df2303c0563d64c1", - "id": "nmdc:232e31505b6a0251df2303c0563d64c1", - "file_size_bytes": 1566732675 - }, - { - "name": "Gp0115664_Filtered Stats", - "description": "Filtered Stats for Gp0115664", - "data_object_type": "QC Statistics", - "url": "https://data.microbiomedata.org/data/nmdc:mga0dm3v04/qa/nmdc_mga0dm3v04_filterStats.txt", - "md5_checksum": "f3f4f75f19c92af6e98d2b45cccaacd5", - "id": "nmdc:f3f4f75f19c92af6e98d2b45cccaacd5", - "file_size_bytes": 289 - }, - { - "name": "Gp0115664_Gottcha2 TSV report", - "description": "Gottcha2 TSV report for Gp0115664", - "url": "https://data.microbiomedata.org/data/nmdc:mga0dm3v04/ReadbasedAnalysis/nmdc_mga0dm3v04_gottcha2_report.tsv", - "md5_checksum": "9d61d9f0c31a98f88ad8cde86254148d", - "id": "nmdc:9d61d9f0c31a98f88ad8cde86254148d", - "file_size_bytes": 9591 - }, - { - "name": "Gp0115664_Gottcha2 full TSV report", - "description": "Gottcha2 full TSV report for Gp0115664", - "url": "https://data.microbiomedata.org/data/nmdc:mga0dm3v04/ReadbasedAnalysis/nmdc_mga0dm3v04_gottcha2_report_full.tsv", - "md5_checksum": "7f93f97242aed036019f13492f5af35c", - "id": "nmdc:7f93f97242aed036019f13492f5af35c", - "file_size_bytes": 885985 - }, - { - "name": "Gp0115664_Gottcha2 Krona HTML report", - "description": "Gottcha2 Krona HTML report for Gp0115664", - "data_object_type": "GOTTCHA2 Krona Plot", - "url": "https://data.microbiomedata.org/data/nmdc:mga0dm3v04/ReadbasedAnalysis/nmdc_mga0dm3v04_gottcha2_krona.html", - "md5_checksum": "b4d0179bcc68b5186a3544d9ee0c6941", - "id": "nmdc:b4d0179bcc68b5186a3544d9ee0c6941", - "file_size_bytes": 251303 - }, - { - "name": "Gp0115664_Centrifuge classification TSV report", - "description": "Centrifuge classification TSV report for Gp0115664", - "data_object_type": "Centrifuge Taxonomic Classification", - "url": "https://data.microbiomedata.org/data/nmdc:mga0dm3v04/ReadbasedAnalysis/nmdc_mga0dm3v04_centrifuge_classification.tsv", - "md5_checksum": "a4243f71a0288f489c566ae85d85891d", - "id": "nmdc:a4243f71a0288f489c566ae85d85891d", - "file_size_bytes": 1268144933 - }, - { - "name": "Gp0115664_Centrifuge TSV report", - "description": "Centrifuge TSV report for Gp0115664", - "data_object_type": "Centrifuge Classification Report", - "url": "https://data.microbiomedata.org/data/nmdc:mga0dm3v04/ReadbasedAnalysis/nmdc_mga0dm3v04_centrifuge_report.tsv", - "md5_checksum": "f8b6ef830b94c6470056a3cd0a0eafc1", - "id": "nmdc:f8b6ef830b94c6470056a3cd0a0eafc1", - "file_size_bytes": 254575 - }, - { - "name": "Gp0115664_Centrifuge Krona HTML report", - "description": "Centrifuge Krona HTML report for Gp0115664", - "data_object_type": "Centrifuge Krona Plot", - "url": "https://data.microbiomedata.org/data/nmdc:mga0dm3v04/ReadbasedAnalysis/nmdc_mga0dm3v04_centrifuge_krona.html", - "md5_checksum": "a80779b32415ef001d0403f0b618b612", - "id": "nmdc:a80779b32415ef001d0403f0b618b612", - "file_size_bytes": 2327293 - }, - { - "name": "Gp0115664_Kraken classification TSV report", - "description": "Kraken classification TSV report for Gp0115664", - "data_object_type": "Kraken2 Taxonomic Classification", - "url": "https://data.microbiomedata.org/data/nmdc:mga0dm3v04/ReadbasedAnalysis/nmdc_mga0dm3v04_kraken2_classification.tsv", - "md5_checksum": "01581429336a43d7dc2f85b8d49d6c6e", - "id": "nmdc:01581429336a43d7dc2f85b8d49d6c6e", - "file_size_bytes": 1037932028 - }, - { - "name": "Gp0115664_Kraken2 TSV report", - "description": "Kraken2 TSV report for Gp0115664", - "data_object_type": "Kraken2 Classification Report", - "url": "https://data.microbiomedata.org/data/nmdc:mga0dm3v04/ReadbasedAnalysis/nmdc_mga0dm3v04_kraken2_report.tsv", - "md5_checksum": "ce47d6686edb7b3472102d5883229c45", - "id": "nmdc:ce47d6686edb7b3472102d5883229c45", - "file_size_bytes": 641242 - }, - { - "name": "Gp0115664_Kraken2 Krona HTML report", - "description": "Kraken2 Krona HTML report for Gp0115664", - "data_object_type": "Kraken2 Krona Plot", - "url": "https://data.microbiomedata.org/data/nmdc:mga0dm3v04/ReadbasedAnalysis/nmdc_mga0dm3v04_kraken2_krona.html", - "md5_checksum": "29b75e78b0b7fd8115614d8e9d341d46", - "id": "nmdc:29b75e78b0b7fd8115614d8e9d341d46", - "file_size_bytes": 3995680 - }, - { - "name": "Gp0115664_Assembled contigs fasta", - "description": "Assembled contigs fasta for Gp0115664", - "data_object_type": "Assembly Contigs", - "url": "https://data.microbiomedata.org/data/nmdc:mga0dm3v04/assembly/nmdc_mga0dm3v04_contigs.fna", - "md5_checksum": "3faf965a2e745048afed5d1c065a78c4", - "id": "nmdc:3faf965a2e745048afed5d1c065a78c4", - "file_size_bytes": 36012597 - }, - { - "name": "Gp0115664_Assembled scaffold fasta", - "description": "Assembled scaffold fasta for Gp0115664", - "data_object_type": "Assembly Scaffolds", - "url": "https://data.microbiomedata.org/data/nmdc:mga0dm3v04/assembly/nmdc_mga0dm3v04_scaffolds.fna", - "md5_checksum": "2d99daff632b19ebdea3f3e5784e2fbc", - "id": "nmdc:2d99daff632b19ebdea3f3e5784e2fbc", - "file_size_bytes": 35776428 - }, - { - "name": "Gp0115664_Metagenome Contig Coverage Stats", - "description": "Metagenome Contig Coverage Stats for Gp0115664", - "url": "https://data.microbiomedata.org/data/nmdc:mga0dm3v04/assembly/nmdc_mga0dm3v04_covstats.txt", - "md5_checksum": "d8f255300e5f214baad3c3b4b3c0b51b", - "id": "nmdc:d8f255300e5f214baad3c3b4b3c0b51b", - "file_size_bytes": 6143277 - }, - { - "name": "Gp0115664_Assembled AGP file", - "description": "Assembled AGP file for Gp0115664", - "data_object_type": "Assembly AGP", - "url": "https://data.microbiomedata.org/data/nmdc:mga0dm3v04/assembly/nmdc_mga0dm3v04_assembly.agp", - "md5_checksum": "1f9a75569aedc406a3db8ff779b03c19", - "id": "nmdc:1f9a75569aedc406a3db8ff779b03c19", - "file_size_bytes": 5710214 - }, - { - "name": "Gp0115664_Metagenome Alignment BAM file", - "description": "Metagenome Alignment BAM file for Gp0115664", - "data_object_type": "Assembly Coverage BAM", - "url": "https://data.microbiomedata.org/data/nmdc:mga0dm3v04/assembly/nmdc_mga0dm3v04_pairedMapped_sorted.bam", - "md5_checksum": "faeb84260d97f23162a6176b9442a5c8", - "id": "nmdc:faeb84260d97f23162a6176b9442a5c8", - "file_size_bytes": 1670248615 - }, - { - "name": "Gp0115664_Protein FAA", - "description": "Protein FAA for Gp0115664", - "data_object_type": "Annotation Amino Acid FASTA", - "url": "https://data.microbiomedata.org/data/nmdc:mga0dm3v04/annotation/nmdc_mga0dm3v04_proteins.faa", - "md5_checksum": "338a8f2f739dfc89557e090d604302f6", - "id": "nmdc:338a8f2f739dfc89557e090d604302f6", - "file_size_bytes": 21010319 - }, - { - "name": "Gp0115664_Structural annotation GFF file", - "description": "Structural annotation GFF file for Gp0115664", - "data_object_type": "Structural Annotation GFF", - "url": "https://data.microbiomedata.org/data/nmdc:mga0dm3v04/annotation/nmdc_mga0dm3v04_structural_annotation.gff", - "md5_checksum": "0ce03dd69826edcc8b5f6dd01ca176dc", - "id": "nmdc:0ce03dd69826edcc8b5f6dd01ca176dc", - "file_size_bytes": 2497 - }, - { - "name": "Gp0115664_Functional annotation GFF file", - "description": "Functional annotation GFF file for Gp0115664", - "data_object_type": "Functional Annotation GFF", - "url": "https://data.microbiomedata.org/data/nmdc:mga0dm3v04/annotation/nmdc_mga0dm3v04_functional_annotation.gff", - "md5_checksum": "dc720d27299f6f5c1d38c4dcf1dfc8db", - "id": "nmdc:dc720d27299f6f5c1d38c4dcf1dfc8db", - "file_size_bytes": 24426623 - }, - { - "name": "Gp0115664_KO TSV file", - "description": "KO TSV file for Gp0115664", - "data_object_type": "Annotation KEGG Orthology", - "url": "https://data.microbiomedata.org/data/nmdc:mga0dm3v04/annotation/nmdc_mga0dm3v04_ko.tsv", - "md5_checksum": "bc7f7df6865acffd4e07f8b592573eb9", - "id": "nmdc:bc7f7df6865acffd4e07f8b592573eb9", - "file_size_bytes": 2875393 - }, - { - "name": "Gp0115664_EC TSV file", - "description": "EC TSV file for Gp0115664", - "data_object_type": "Annotation Enzyme Commission", - "url": "https://data.microbiomedata.org/data/nmdc:mga0dm3v04/annotation/nmdc_mga0dm3v04_ec.tsv", - "md5_checksum": "be38bedd77ab3c072bafbb2c201c953d", - "id": "nmdc:be38bedd77ab3c072bafbb2c201c953d", - "file_size_bytes": 1882878 - }, - { - "name": "Gp0115664_COG GFF file", - "description": "COG GFF file for Gp0115664", - "url": "https://data.microbiomedata.org/data/nmdc:mga0dm3v04/annotation/nmdc_mga0dm3v04_cog.gff", - "md5_checksum": "d7318549a735853b679d15171f5c7ea7", - "id": "nmdc:d7318549a735853b679d15171f5c7ea7", - "file_size_bytes": 12475107 - }, - { - "name": "Gp0115664_PFAM GFF file", - "description": "PFAM GFF file for Gp0115664", - "url": "https://data.microbiomedata.org/data/nmdc:mga0dm3v04/annotation/nmdc_mga0dm3v04_pfam.gff", - "md5_checksum": "c1617e0980c6e52149692aee39e30f8c", - "id": "nmdc:c1617e0980c6e52149692aee39e30f8c", - "file_size_bytes": 9305713 - }, - { - "name": "Gp0115664_TigrFam GFF file", - "description": "TigrFam GFF file for Gp0115664", - "url": "https://data.microbiomedata.org/data/nmdc:mga0dm3v04/annotation/nmdc_mga0dm3v04_tigrfam.gff", - "md5_checksum": "bd5a9b5e55605ece8873d6ac05e76e0d", - "id": "nmdc:bd5a9b5e55605ece8873d6ac05e76e0d", - "file_size_bytes": 1181236 - }, - { - "name": "Gp0115664_SMART GFF file", - "description": "SMART GFF file for Gp0115664", - "url": "https://data.microbiomedata.org/data/nmdc:mga0dm3v04/annotation/nmdc_mga0dm3v04_smart.gff", - "md5_checksum": "eb1fba5cad14c3e211baa2de796bca2e", - "id": "nmdc:eb1fba5cad14c3e211baa2de796bca2e", - "file_size_bytes": 2718910 - }, - { - "name": "Gp0115664_SuperFam GFF file", - "description": "SuperFam GFF file for Gp0115664", - "url": "https://data.microbiomedata.org/data/nmdc:mga0dm3v04/annotation/nmdc_mga0dm3v04_supfam.gff", - "md5_checksum": "2146449222f410a286e4786bf19c9a5e", - "id": "nmdc:2146449222f410a286e4786bf19c9a5e", - "file_size_bytes": 16463047 - }, - { - "name": "Gp0115664_Cath FunFam GFF file", - "description": "Cath FunFam GFF file for Gp0115664", - "url": "https://data.microbiomedata.org/data/nmdc:mga0dm3v04/annotation/nmdc_mga0dm3v04_cath_funfam.gff", - "md5_checksum": "20ced78c72f67d064bddcc8d5534ebb6", - "id": "nmdc:20ced78c72f67d064bddcc8d5534ebb6", - "file_size_bytes": 12501882 - }, - { - "name": "Gp0115664_KO_EC GFF file", - "description": "KO_EC GFF file for Gp0115664", - "url": "https://data.microbiomedata.org/data/nmdc:mga0dm3v04/annotation/nmdc_mga0dm3v04_ko_ec.gff", - "md5_checksum": "7ffe90ceb10c9f40f755aa8d7aa30170", - "id": "nmdc:7ffe90ceb10c9f40f755aa8d7aa30170", - "file_size_bytes": 9217314 - }, - { - "name": "gold:Gp0452679_metabat2 bin checkm quality assessment result", - "description": "metabat2 bin checkm quality assessment result for gold:Gp0452679", - "data_object_type": "CheckM Statistics", - "url": "https://data.microbiomedata.org/data/nmdc:mga0fsjy84/MAGs/nmdc_mga0fsjy84_checkm_qa.out", - "md5_checksum": "d41d8cd98f00b204e9800998ecf8427e", - "id": "nmdc:d41d8cd98f00b204e9800998ecf8427e", - "file_size_bytes": 0 - }, - { - "name": "Gp0115664_tooShort (< 3kb) filtered contigs fasta file by metaBat2", - "description": "tooShort (< 3kb) filtered contigs fasta file by metaBat2 for Gp0115664", - "url": "https://data.microbiomedata.org/data/nmdc:mga0dm3v04/MAGs/nmdc_mga0dm3v04_bins.tooShort.fa", - "md5_checksum": "767a36b1bffa42d3d25af3f81b15e11b", - "id": "nmdc:767a36b1bffa42d3d25af3f81b15e11b", - "file_size_bytes": 30368582 - }, - { - "name": "Gp0115664_unbinned fasta file from metabat2", - "description": "unbinned fasta file from metabat2 for Gp0115664", - "url": "https://data.microbiomedata.org/data/nmdc:mga0dm3v04/MAGs/nmdc_mga0dm3v04_bins.unbinned.fa", - "md5_checksum": "994fd58ab9a53c19ba1cdb830e37a132", - "id": "nmdc:994fd58ab9a53c19ba1cdb830e37a132", - "file_size_bytes": 4608000 - }, - { - "name": "Gp0115664_metabat2 bin checkm quality assessment result", - "description": "metabat2 bin checkm quality assessment result for Gp0115664", - "data_object_type": "CheckM Statistics", - "url": "https://data.microbiomedata.org/data/nmdc:mga0dm3v04/MAGs/nmdc_mga0dm3v04_checkm_qa.out", - "md5_checksum": "db59a64c874a9e06c1f1ba58df96fe0d", - "id": "nmdc:db59a64c874a9e06c1f1ba58df96fe0d", - "file_size_bytes": 845 - }, - { - "name": "Gp0115664_high-quality and medium-quality bins", - "description": "high-quality and medium-quality bins for Gp0115664", - "data_object_type": "Metagenome Bins", - "url": "https://data.microbiomedata.org/data/nmdc:mga0dm3v04/MAGs/nmdc_mga0dm3v04_hqmq_bin.zip", - "md5_checksum": "0d45611a5d0c80679c00fa759c939df0", - "id": "nmdc:0d45611a5d0c80679c00fa759c939df0", - "file_size_bytes": 182 - }, - { - "name": "Gp0115664_metabat2 bins", - "description": "metabat2 bins for Gp0115664", - "url": "https://data.microbiomedata.org/data/nmdc:mga0dm3v04/MAGs/nmdc_mga0dm3v04_metabat_bin.zip", - "md5_checksum": "bb5835f621252fca37967e00245517ac", - "id": "nmdc:bb5835f621252fca37967e00245517ac", - "file_size_bytes": 314358 - }, - { - "_id": { - "$oid": "649b003c1ae706d7b5b14d62" - }, - "description": "Metagenome Contig Coverage Stats for gold:Gp0115664", - "url": "https://data.microbiomedata.org/data/1781_86089/assembly/mapping_stats.txt", - "file_size_bytes": 5751397, - "type": "nmdc:DataObject", - "id": "nmdc:115045c0b7102243d0b9f2d4ffaa20a0", - "name": "mapping_stats.txt", - "data_object_type": "Assembly Coverage Stats" - }, - { - "_id": { - "$oid": "649b003c1ae706d7b5b14d64" - }, - "description": "Assembled scaffold fasta for gold:Gp0115664", - "url": "https://data.microbiomedata.org/data/1781_86089/assembly/assembly_scaffolds.fna", - "file_size_bytes": 35384873, - "type": "nmdc:DataObject", - "id": "nmdc:71b690c6d9ad021d8ea68b8fd9d31135", - "name": "assembly_scaffolds.fna", - "data_object_type": "Assembly Scaffolds" - }, - { - "_id": { - "$oid": "649b003c1ae706d7b5b14d65" - }, - "description": "Assembled contigs fasta for gold:Gp0115664", - "url": "https://data.microbiomedata.org/data/1781_86089/assembly/assembly_contigs.fna", - "file_size_bytes": 35620717, - "type": "nmdc:DataObject", - "id": "nmdc:b78f599c21fb31b00d3f8a3c56daeb88", - "name": "assembly_contigs.fna", - "data_object_type": "Assembly Contigs" - }, - { - "_id": { - "$oid": "649b003c1ae706d7b5b14d66" - }, - "description": "Assembled AGP file for gold:Gp0115664", - "url": "https://data.microbiomedata.org/data/1781_86089/assembly/assembly.agp", - "file_size_bytes": 4925804, - "type": "nmdc:DataObject", - "id": "nmdc:f592b315dbd5a060ddb075ad98bf4803", - "name": "assembly.agp", - "data_object_type": "Assembly AGP" - }, - { - "_id": { - "$oid": "649b003c1ae706d7b5b14d67" - }, - "description": "Metagenome Alignment BAM file for gold:Gp0115664", - "url": "https://data.microbiomedata.org/data/1781_86089/assembly/pairedMapped_sorted.bam", - "file_size_bytes": 1649453824, - "type": "nmdc:DataObject", - "id": "nmdc:662dc676b0b5a486248357f5b887c18b", - "name": "pairedMapped_sorted.bam", - "data_object_type": "Assembly Coverage BAM" - }, - { - "_id": { - "$oid": "649b003d1ae706d7b5b15983" - }, - "id": "nmdc:bd586aef31587a585d6be2b9814a2551", - "name": "1781_86089.krona.html", - "description": "Gold:Gp0115664 KRONA plot HTML file", - "url": "https://data.microbiomedata.org/1781_86089/ReadbasedAnalysis/centrifuge/1781_86089.krona.html", - "file_size_bytes": 3385, - "data_object_type": "Centrifuge Krona Plot", - "type": "nmdc:DataObject" - }, - { - "_id": { - "$oid": "649b003d1ae706d7b5b15988" - }, - "id": "nmdc:81b89289645757b1b3608d93bc563c73", - "name": "1781_86089.json", - "description": "Gold:Gp0115664 ReadbasedAnalysis result JSON file", - "url": "https://data.microbiomedata.org/1781_86089/ReadbasedAnalysis/1781_86089.json", - "file_size_bytes": 3385, - "type": "nmdc:DataObject" - }, - { - "_id": { - "$oid": "649b003f1ae706d7b5b16257" - }, - "id": "nmdc:e8ec230bfe68a272b34540e7f5ab5b2b", - "name": "gold:Gp0115664.bins.3.fa", - "description": "metabat2 binned contig file for gold:Gp0115664", - "file_size_bytes": 287705, - "url": "https://data.microbiomedata.org/data/1781_86089/img_MAGs/metabat-bins/bins.3.fa", - "type": "nmdc:DataObject", - "data_object_type": "Metagenome Bins" - }, - { - "_id": { - "$oid": "649b003f1ae706d7b5b1625b" - }, - "id": "nmdc:7a652e7e0f8ded35496989fe90b40c40", - "name": "bins.unbinned.fa", - "description": "unbinned fasta file from metabat2 for gold:Gp0115664", - "file_size_bytes": 4643149, - "url": "https://data.microbiomedata.org/data/1781_86089/img_MAGs/bins.unbinned.fa", - "type": "nmdc:DataObject" - }, - { - "_id": { - "$oid": "649b003f1ae706d7b5b1625c" - }, - "id": "nmdc:ab198c4e10213c9e85c4506b269452ee", - "name": "bins.tooShort.fa", - "description": "tooShort (< 3kb) filtered contigs fasta file by metaBat2 for gold:Gp0115664", - "file_size_bytes": 29395917, - "url": "https://data.microbiomedata.org/data/1781_86089/img_MAGs/bins.tooShort.fa", - "type": "nmdc:DataObject" - }, - { - "_id": { - "$oid": "649b003f1ae706d7b5b1625d" - }, - "id": "nmdc:c24915651cfdfc91f3e6b5bac679c3af", - "name": "checkm_qa.out", - "description": "metabat2 bin checkm quality assessment result for gold:Gp0115664", - "file_size_bytes": 1176, - "url": "https://data.microbiomedata.org/data/1781_86089/img_MAGs/checkm_qa.out", - "type": "nmdc:DataObject", - "data_object_type": "CheckM Statistics" - }, - { - "_id": { - "$oid": "649b003f1ae706d7b5b1625f" - }, - "id": "nmdc:9800add41d26829494265ba81a100c53", - "name": "gold:Gp0115664.bins.1.fa", - "description": "metabat2 binned contig file for gold:Gp0115664", - "file_size_bytes": 351859, - "url": "https://data.microbiomedata.org/data/1781_86089/img_MAGs/metabat-bins/bins.1.fa", - "type": "nmdc:DataObject", - "data_object_type": "Metagenome Bins" - }, - { - "_id": { - "$oid": "649b003f1ae706d7b5b16261" - }, - "id": "nmdc:474fa29bd39452fa80f5a32e9e6be6f4", - "name": "gold:Gp0115664.bins.2.fa", - "description": "metabat2 binned contig file for gold:Gp0115664", - "file_size_bytes": 326275, - "url": "https://data.microbiomedata.org/data/1781_86089/img_MAGs/metabat-bins/bins.2.fa", - "type": "nmdc:DataObject", - "data_object_type": "Metagenome Bins" - }, - { - "_id": { - "$oid": "649b00401ae706d7b5b16d61" - }, - "description": "Functional annotation GFF file for gold:Gp0115664", - "url": "https://data.microbiomedata.org/1781_86089/img_annotation/Ga0482263_functional_annotation.gff", - "md5_checksum": "bc034c7024043ea88b44d0897bb5bece", - "file_size_bytes": 3385, - "id": "nmdc:bc034c7024043ea88b44d0897bb5bece", - "name": "gold:Gp0115664_Functional annotation GFF file", - "data_object_type": "Functional Annotation GFF", - "type": "nmdc:DataObject" - }, - { - "_id": { - "$oid": "649b00401ae706d7b5b16d62" - }, - "description": "KO TSV File for gold:Gp0115664", - "url": "https://data.microbiomedata.org/1781_86089/img_annotation/Ga0482263_ko.tsv", - "md5_checksum": "76537a4ab5012ba3b407471da373ef1c", - "file_size_bytes": 3385, - "id": "nmdc:76537a4ab5012ba3b407471da373ef1c", - "name": "gold:Gp0115664_KO TSV File", - "data_object_type": "Annotation KEGG Orthology", - "type": "nmdc:DataObject" - }, - { - "_id": { - "$oid": "649b00401ae706d7b5b16d64" - }, - "description": "Structural annotation GFF file for gold:Gp0115664", - "url": "https://data.microbiomedata.org/1781_86089/img_annotation/Ga0482263_structural_annotation.gff", - "md5_checksum": "10117f9500d0dd54655a5d70195f7df5", - "file_size_bytes": 3385, - "id": "nmdc:10117f9500d0dd54655a5d70195f7df5", - "name": "gold:Gp0115664_Structural annotation GFF file", - "data_object_type": "Structural Annotation GFF", - "type": "nmdc:DataObject" - }, - { - "_id": { - "$oid": "649b00401ae706d7b5b16d65" - }, - "description": "EC TSV File for gold:Gp0115664", - "url": "https://data.microbiomedata.org/1781_86089/img_annotation/Ga0482263_ec.tsv", - "md5_checksum": "8a812604db9b4e2bdbad6d0b3539f6ea", - "file_size_bytes": 3385, - "id": "nmdc:8a812604db9b4e2bdbad6d0b3539f6ea", - "name": "gold:Gp0115664_EC TSV File", - "data_object_type": "Annotation Enzyme Commission", - "type": "nmdc:DataObject" - }, - { - "_id": { - "$oid": "649b00401ae706d7b5b16d68" - }, - "description": "Protein FAA for gold:Gp0115664", - "url": "https://data.microbiomedata.org/1781_86089/img_annotation/Ga0482263_proteins.faa", - "md5_checksum": "fc419491cce16671e828d76083252841", - "file_size_bytes": 3385, - "id": "nmdc:fc419491cce16671e828d76083252841", - "name": "gold:Gp0115664_Protein FAA", - "data_object_type": "Annotation Amino Acid FASTA", - "type": "nmdc:DataObject" - } - ], - "dissolving_activity_set": [], - "functional_annotation_set": [], - "genome_feature_set": [], - "mags_activity_set": [ - { - "_id": { - "$oid": "649b0052ec087f6bbab34723" - }, - "has_input": [ - "nmdc:3faf965a2e745048afed5d1c065a78c4", - "nmdc:faeb84260d97f23162a6176b9442a5c8", - "nmdc:dc720d27299f6f5c1d38c4dcf1dfc8db" - ], - "too_short_contig_num": 75364, - "part_of": [ - "nmdc:mga0dm3v04" - ], - "binned_contig_num": 220, - "git_url": "https://github.com/microbiomedata/metaG/releases/tag/0.1", - "has_output": [ - "nmdc:d41d8cd98f00b204e9800998ecf8427e", - "nmdc:767a36b1bffa42d3d25af3f81b15e11b", - "nmdc:994fd58ab9a53c19ba1cdb830e37a132", - "nmdc:db59a64c874a9e06c1f1ba58df96fe0d", - "nmdc:0d45611a5d0c80679c00fa759c939df0", - "nmdc:bb5835f621252fca37967e00245517ac" - ], - "was_informed_by": "gold:Gp0115664", - "input_contig_num": 78376, - "id": "nmdc:10c6e87f449fdc27c6b8bfbc9e25d6a3", - "execution_resource": "NERSC-Cori", - "name": "MAGs Analysis Activity for nmdc:mga0dm3v04", - "mags_list": [ - { - "number_of_contig": 220, - "completeness": 45.41, - "bin_name": "bins.1", - "gene_count": 1182, - "bin_quality": "LQ", - "gtdbtk_species": "", - "gtdbtk_order": "", - "num_16s": 0, - "gtdbtk_family": "", - "gtdbtk_domain": "", - "contamination": 0.72, - "gtdbtk_class": "", - "gtdbtk_phylum": "", - "num_5s": 0, - "num_23s": 0, - "gtdbtk_genus": "", - "num_t_rna": 16 - } - ], - "unbinned_contig_num": 2792, - "started_at_time": "2021-10-11T02:28:16Z", - "type": "nmdc:MAGsAnalysisActivity", - "ended_at_time": "2021-10-11T03:33:34+00:00" - } - ], - "material_sample_set": [], - "material_sampling_activity_set": [], - "metabolomics_analysis_activity_set": [], - "metagenome_annotation_activity_set": [ - { - "_id": { - "$oid": "649b005bbf2caae0415ef9c5" - }, - "has_input": [ - "nmdc:3faf965a2e745048afed5d1c065a78c4" - ], - "part_of": [ - "nmdc:mga0dm3v04" - ], - "git_url": "https://github.com/microbiomedata/metaG/releases/tag/0.1", - "has_output": [ - "nmdc:338a8f2f739dfc89557e090d604302f6", - "nmdc:0ce03dd69826edcc8b5f6dd01ca176dc", - "nmdc:dc720d27299f6f5c1d38c4dcf1dfc8db", - "nmdc:bc7f7df6865acffd4e07f8b592573eb9", - "nmdc:be38bedd77ab3c072bafbb2c201c953d", - "nmdc:d7318549a735853b679d15171f5c7ea7", - "nmdc:c1617e0980c6e52149692aee39e30f8c", - "nmdc:bd5a9b5e55605ece8873d6ac05e76e0d", - "nmdc:eb1fba5cad14c3e211baa2de796bca2e", - "nmdc:2146449222f410a286e4786bf19c9a5e", - "nmdc:20ced78c72f67d064bddcc8d5534ebb6", - "nmdc:7ffe90ceb10c9f40f755aa8d7aa30170" - ], - "was_informed_by": "gold:Gp0115664", - "id": "nmdc:10c6e87f449fdc27c6b8bfbc9e25d6a3", - "execution_resource": "NERSC-Cori", - "name": "Annotation Activity for nmdc:mga0dm3v04", - "started_at_time": "2021-10-11T02:28:16Z", - "type": "nmdc:MetagenomeAnnotationActivity", - "ended_at_time": "2021-10-11T03:33:34+00:00" - } - ], - "metagenome_assembly_set": [ - { - "_id": { - "$oid": "649b005f2ca5ee4adb139fb0" - }, - "has_input": [ - "nmdc:232e31505b6a0251df2303c0563d64c1" - ], - "part_of": [ - "nmdc:mga0dm3v04" - ], - "ctg_logsum": 60365, - "scaf_logsum": 60806, - "gap_pct": 0.00196, - "git_url": "https://github.com/microbiomedata/metaG/releases/tag/0.1", - "has_output": [ - "nmdc:3faf965a2e745048afed5d1c065a78c4", - "nmdc:2d99daff632b19ebdea3f3e5784e2fbc", - "nmdc:d8f255300e5f214baad3c3b4b3c0b51b", - "nmdc:1f9a75569aedc406a3db8ff779b03c19", - "nmdc:faeb84260d97f23162a6176b9442a5c8" - ], - "asm_score": 4.21, - "was_informed_by": "gold:Gp0115664", - "ctg_powsum": 6668.288, - "scaf_max": 15348, - "id": "nmdc:10c6e87f449fdc27c6b8bfbc9e25d6a3", - "scaf_powsum": 6720.964, - "execution_resource": "NERSC-Cori", - "contigs": 78376, - "name": "Assembly Activity for nmdc:mga0dm3v04", - "ctg_max": 15348, - "gc_std": 0.11459, - "contig_bp": 33088752, - "gc_avg": 0.5432, - "started_at_time": "2021-10-11T02:28:16Z", - "scaf_bp": 33089402, - "type": "nmdc:MetagenomeAssembly", - "scaffolds": 78311, - "ended_at_time": "2021-10-11T03:33:34+00:00", - "ctg_l50": 377, - "ctg_l90": 283, - "ctg_n50": 23883, - "ctg_n90": 67231, - "scaf_l50": 377, - "scaf_l90": 283, - "scaf_n50": 23850, - "scaf_n90": 67169 - } - ], - "metagenome_sequencing_activity_set": [], - "metaproteomics_analysis_activity_set": [], - "metatranscriptome_activity_set": [], - "nom_analysis_activity_set": [], - "omics_processing_set": [ - { - "_id": { - "$oid": "649b009773e8249959349b38" - }, - "id": "nmdc:omprc-11-hgehsc37", - "name": "Sand microcosm microbial communities from a hyporheic zone in Columbia River, Washington, USA - GW-RW T2_25-Nov-14", - "description": "Sterilized sand packs were incubated back in the ground and collected at time point T2.", - "has_input": [ - "nmdc:bsm-11-qxntpg05" - ], - "has_output": [ - "jgi:55d817f20d8785342fcf826c" - ], - "part_of": [ - "nmdc:sty-11-aygzgv51" - ], - "add_date": "2015-05-28", - "mod_date": "2021-06-15", - "ncbi_project_name": "Sand microcosm microbial communities from a hyporheic zone in Columbia River, Washington, USA - GW-RW T2_25-Nov-14", - "omics_type": { - "has_raw_value": "Metagenome" - }, - "principal_investigator": { - "has_raw_value": "James Stegen" - }, - "processing_institution": "JGI", - "type": "nmdc:OmicsProcessing", - "gold_sequencing_project_identifiers": [ - "gold:Gp0115664" - ] - } - ], - "reaction_activity_set": [], - "read_qc_analysis_activity_set": [ - { - "_id": { - "$oid": "649b009d6bdd4fd20273c87b" - }, - "has_input": [ - "nmdc:86929bf5b2afcb965129dcf0eae2d8fc" - ], - "part_of": [ - "nmdc:mga0dm3v04" - ], - "git_url": "https://github.com/microbiomedata/metaG/releases/tag/0.1", - "has_output": [ - "nmdc:232e31505b6a0251df2303c0563d64c1", - "nmdc:f3f4f75f19c92af6e98d2b45cccaacd5" - ], - "was_informed_by": "gold:Gp0115664", - "input_read_count": 19058974, - "output_read_bases": 2597325375, - "id": "nmdc:10c6e87f449fdc27c6b8bfbc9e25d6a3", - "execution_resource": "NERSC-Cori", - "input_read_bases": 2877905074, - "name": "Read QC Activity for nmdc:mga0dm3v04", - "output_read_count": 17338778, - "started_at_time": "2021-10-11T02:28:16Z", - "type": "nmdc:ReadQCAnalysisActivity", - "ended_at_time": "2021-10-11T03:33:34+00:00" - } - ], - "read_based_taxonomy_analysis_activity_set": [ - { - "_id": { - "$oid": "649b009bff710ae353f8cf3f" - }, - "has_input": [ - "nmdc:232e31505b6a0251df2303c0563d64c1" - ], - "git_url": "https://github.com/microbiomedata/metaG/releases/tag/0.1", - "has_output": [ - "nmdc:9d61d9f0c31a98f88ad8cde86254148d", - "nmdc:7f93f97242aed036019f13492f5af35c", - "nmdc:b4d0179bcc68b5186a3544d9ee0c6941", - "nmdc:a4243f71a0288f489c566ae85d85891d", - "nmdc:f8b6ef830b94c6470056a3cd0a0eafc1", - "nmdc:a80779b32415ef001d0403f0b618b612", - "nmdc:01581429336a43d7dc2f85b8d49d6c6e", - "nmdc:ce47d6686edb7b3472102d5883229c45", - "nmdc:29b75e78b0b7fd8115614d8e9d341d46" - ], - "was_informed_by": "gold:Gp0115664", - "id": "nmdc:10c6e87f449fdc27c6b8bfbc9e25d6a3", - "execution_resource": "NERSC-Cori", - "name": "ReadBased Analysis Activity for nmdc:mga0dm3v04", - "started_at_time": "2021-10-11T02:28:16Z", - "type": "nmdc:ReadbasedAnalysis", - "ended_at_time": "2021-10-11T03:33:34+00:00" - } - ], - "study_set": [], - "field_research_site_set": [], - "collecting_biosamples_from_site_set": [], - "date_created": null, - "etl_software_version": null, - "pooling_set": [] - }, - { - "functional_annotation_agg": [], - "library_preparation_set": [], - "processed_sample_set": [], - "extraction_set": [], - "activity_set": [], - "biosample_set": [], - "data_object_set": [ - { - "description": "Raw sequencer read data", - "file_size_bytes": 4674996922, - "type": "nmdc:DataObject", - "id": "jgi:55f23d790d8785306f96497e", - "name": "9491.1.134352.AGTCAA.fastq.gz" - }, - { - "name": "Gp0115678_Filtered Reads", - "description": "Filtered Reads for Gp0115678", - "data_object_type": "Filtered Sequencing Reads", - "url": "https://data.microbiomedata.org/data/nmdc:mga026tn70/qa/nmdc_mga026tn70_filtered.fastq.gz", - "md5_checksum": "e0ce93b88419f87568ff206e0efe3a24", - "id": "nmdc:e0ce93b88419f87568ff206e0efe3a24", - "file_size_bytes": 4090026888 - }, - { - "name": "Gp0115678_Filtered Stats", - "description": "Filtered Stats for Gp0115678", - "data_object_type": "QC Statistics", - "url": "https://data.microbiomedata.org/data/nmdc:mga026tn70/qa/nmdc_mga026tn70_filterStats.txt", - "md5_checksum": "7bf8ff4cf0d98cccd8e1c20f77dd1690", - "id": "nmdc:7bf8ff4cf0d98cccd8e1c20f77dd1690", - "file_size_bytes": 292 - }, - { - "name": "Gp0115678_Gottcha2 TSV report", - "description": "Gottcha2 TSV report for Gp0115678", - "url": "https://data.microbiomedata.org/data/nmdc:mga026tn70/ReadbasedAnalysis/nmdc_mga026tn70_gottcha2_report.tsv", - "md5_checksum": "05bab80e2ff02d160b8e808f056ee2b5", - "id": "nmdc:05bab80e2ff02d160b8e808f056ee2b5", - "file_size_bytes": 19085 - }, - { - "name": "Gp0115678_Gottcha2 full TSV report", - "description": "Gottcha2 full TSV report for Gp0115678", - "url": "https://data.microbiomedata.org/data/nmdc:mga026tn70/ReadbasedAnalysis/nmdc_mga026tn70_gottcha2_report_full.tsv", - "md5_checksum": "12b2d6afc355bce76249d750a9fab534", - "id": "nmdc:12b2d6afc355bce76249d750a9fab534", - "file_size_bytes": 1243929 - }, - { - "name": "Gp0115678_Gottcha2 Krona HTML report", - "description": "Gottcha2 Krona HTML report for Gp0115678", - "data_object_type": "GOTTCHA2 Krona Plot", - "url": "https://data.microbiomedata.org/data/nmdc:mga026tn70/ReadbasedAnalysis/nmdc_mga026tn70_gottcha2_krona.html", - "md5_checksum": "18214017d56658a48723c9c998dcba7e", - "id": "nmdc:18214017d56658a48723c9c998dcba7e", - "file_size_bytes": 281148 - }, - { - "name": "Gp0115678_Centrifuge classification TSV report", - "description": "Centrifuge classification TSV report for Gp0115678", - "data_object_type": "Centrifuge Taxonomic Classification", - "url": "https://data.microbiomedata.org/data/nmdc:mga026tn70/ReadbasedAnalysis/nmdc_mga026tn70_centrifuge_classification.tsv", - "md5_checksum": "99ef009c73c128e561a4b9dcb70d7ff2", - "id": "nmdc:99ef009c73c128e561a4b9dcb70d7ff2", - "file_size_bytes": 3491726958 - }, - { - "name": "Gp0115678_Centrifuge TSV report", - "description": "Centrifuge TSV report for Gp0115678", - "data_object_type": "Centrifuge Classification Report", - "url": "https://data.microbiomedata.org/data/nmdc:mga026tn70/ReadbasedAnalysis/nmdc_mga026tn70_centrifuge_report.tsv", - "md5_checksum": "78dab6988b57c654462ef3dbeb64d8d6", - "id": "nmdc:78dab6988b57c654462ef3dbeb64d8d6", - "file_size_bytes": 264123 - }, - { - "name": "Gp0115678_Centrifuge Krona HTML report", - "description": "Centrifuge Krona HTML report for Gp0115678", - "data_object_type": "Centrifuge Krona Plot", - "url": "https://data.microbiomedata.org/data/nmdc:mga026tn70/ReadbasedAnalysis/nmdc_mga026tn70_centrifuge_krona.html", - "md5_checksum": "f9c01985f057825149d35de0650095a8", - "id": "nmdc:f9c01985f057825149d35de0650095a8", - "file_size_bytes": 2352347 - }, - { - "name": "Gp0115678_Kraken classification TSV report", - "description": "Kraken classification TSV report for Gp0115678", - "data_object_type": "Kraken2 Taxonomic Classification", - "url": "https://data.microbiomedata.org/data/nmdc:mga026tn70/ReadbasedAnalysis/nmdc_mga026tn70_kraken2_classification.tsv", - "md5_checksum": "bcea8bbe63625ad0f3142abe69a4a11d", - "id": "nmdc:bcea8bbe63625ad0f3142abe69a4a11d", - "file_size_bytes": 2880889483 - }, - { - "name": "Gp0115678_Kraken2 TSV report", - "description": "Kraken2 TSV report for Gp0115678", - "data_object_type": "Kraken2 Classification Report", - "url": "https://data.microbiomedata.org/data/nmdc:mga026tn70/ReadbasedAnalysis/nmdc_mga026tn70_kraken2_report.tsv", - "md5_checksum": "054c3097c9682bc9a6e07f88fdecc0ee", - "id": "nmdc:054c3097c9682bc9a6e07f88fdecc0ee", - "file_size_bytes": 735519 - }, - { - "name": "Gp0115678_Kraken2 Krona HTML report", - "description": "Kraken2 Krona HTML report for Gp0115678", - "data_object_type": "Kraken2 Krona Plot", - "url": "https://data.microbiomedata.org/data/nmdc:mga026tn70/ReadbasedAnalysis/nmdc_mga026tn70_kraken2_krona.html", - "md5_checksum": "38d41d4299141abe28bf0405af80cdfc", - "id": "nmdc:38d41d4299141abe28bf0405af80cdfc", - "file_size_bytes": 4410156 - }, - { - "name": "Gp0115678_Assembled contigs fasta", - "description": "Assembled contigs fasta for Gp0115678", - "data_object_type": "Assembly Contigs", - "url": "https://data.microbiomedata.org/data/nmdc:mga026tn70/assembly/nmdc_mga026tn70_contigs.fna", - "md5_checksum": "d305e212cce8f84f14561d3957c968b1", - "id": "nmdc:d305e212cce8f84f14561d3957c968b1", - "file_size_bytes": 205441595 - }, - { - "name": "Gp0115678_Assembled scaffold fasta", - "description": "Assembled scaffold fasta for Gp0115678", - "data_object_type": "Assembly Scaffolds", - "url": "https://data.microbiomedata.org/data/nmdc:mga026tn70/assembly/nmdc_mga026tn70_scaffolds.fna", - "md5_checksum": "fb12da7c2d6d1f9d9c7a1511702758bb", - "id": "nmdc:fb12da7c2d6d1f9d9c7a1511702758bb", - "file_size_bytes": 204286677 - }, - { - "name": "Gp0115678_Metagenome Contig Coverage Stats", - "description": "Metagenome Contig Coverage Stats for Gp0115678", - "url": "https://data.microbiomedata.org/data/nmdc:mga026tn70/assembly/nmdc_mga026tn70_covstats.txt", - "md5_checksum": "444562a4e7108077b7e541a5d9064086", - "id": "nmdc:444562a4e7108077b7e541a5d9064086", - "file_size_bytes": 30470067 - }, - { - "name": "Gp0115678_Assembled AGP file", - "description": "Assembled AGP file for Gp0115678", - "data_object_type": "Assembly AGP", - "url": "https://data.microbiomedata.org/data/nmdc:mga026tn70/assembly/nmdc_mga026tn70_assembly.agp", - "md5_checksum": "6c400425b7188b24ac49533d9ce0d43b", - "id": "nmdc:6c400425b7188b24ac49533d9ce0d43b", - "file_size_bytes": 28619270 - }, - { - "name": "Gp0115678_Metagenome Alignment BAM file", - "description": "Metagenome Alignment BAM file for Gp0115678", - "data_object_type": "Assembly Coverage BAM", - "url": "https://data.microbiomedata.org/data/nmdc:mga026tn70/assembly/nmdc_mga026tn70_pairedMapped_sorted.bam", - "md5_checksum": "1c63639a894aa686e77e57787fcafbc6", - "id": "nmdc:1c63639a894aa686e77e57787fcafbc6", - "file_size_bytes": 4471336607 - }, - { - "name": "Gp0115678_Protein FAA", - "description": "Protein FAA for Gp0115678", - "data_object_type": "Annotation Amino Acid FASTA", - "url": "https://data.microbiomedata.org/data/nmdc:mga026tn70/annotation/nmdc_mga026tn70_proteins.faa", - "md5_checksum": "ecfb1a4d469d9f95a91c8a3a3d5475af", - "id": "nmdc:ecfb1a4d469d9f95a91c8a3a3d5475af", - "file_size_bytes": 109377096 - }, - { - "name": "Gp0115678_Structural annotation GFF file", - "description": "Structural annotation GFF file for Gp0115678", - "data_object_type": "Structural Annotation GFF", - "url": "https://data.microbiomedata.org/data/nmdc:mga026tn70/annotation/nmdc_mga026tn70_structural_annotation.gff", - "md5_checksum": "4eeee677df10364f622a0d4789522c69", - "id": "nmdc:4eeee677df10364f622a0d4789522c69", - "file_size_bytes": 2533 - }, - { - "name": "Gp0115678_Functional annotation GFF file", - "description": "Functional annotation GFF file for Gp0115678", - "data_object_type": "Functional Annotation GFF", - "url": "https://data.microbiomedata.org/data/nmdc:mga026tn70/annotation/nmdc_mga026tn70_functional_annotation.gff", - "md5_checksum": "351ff91eddf2bc89acbdf04eab68aef1", - "id": "nmdc:351ff91eddf2bc89acbdf04eab68aef1", - "file_size_bytes": 118933051 - }, - { - "name": "Gp0115678_KO TSV file", - "description": "KO TSV file for Gp0115678", - "data_object_type": "Annotation KEGG Orthology", - "url": "https://data.microbiomedata.org/data/nmdc:mga026tn70/annotation/nmdc_mga026tn70_ko.tsv", - "md5_checksum": "64b9d934918b78de80f1cf80a013557f", - "id": "nmdc:64b9d934918b78de80f1cf80a013557f", - "file_size_bytes": 12839157 - }, - { - "name": "Gp0115678_EC TSV file", - "description": "EC TSV file for Gp0115678", - "data_object_type": "Annotation Enzyme Commission", - "url": "https://data.microbiomedata.org/data/nmdc:mga026tn70/annotation/nmdc_mga026tn70_ec.tsv", - "md5_checksum": "903f2015c41660ae53e16bfc369d566a", - "id": "nmdc:903f2015c41660ae53e16bfc369d566a", - "file_size_bytes": 8227424 - }, - { - "name": "Gp0115678_COG GFF file", - "description": "COG GFF file for Gp0115678", - "url": "https://data.microbiomedata.org/data/nmdc:mga026tn70/annotation/nmdc_mga026tn70_cog.gff", - "md5_checksum": "bf72ad74b2375abe730ecf7dc50b1557", - "id": "nmdc:bf72ad74b2375abe730ecf7dc50b1557", - "file_size_bytes": 57084923 - }, - { - "name": "Gp0115678_PFAM GFF file", - "description": "PFAM GFF file for Gp0115678", - "url": "https://data.microbiomedata.org/data/nmdc:mga026tn70/annotation/nmdc_mga026tn70_pfam.gff", - "md5_checksum": "92f4707b0b022c217463f76d229dd3cb", - "id": "nmdc:92f4707b0b022c217463f76d229dd3cb", - "file_size_bytes": 46625196 - }, - { - "name": "Gp0115678_TigrFam GFF file", - "description": "TigrFam GFF file for Gp0115678", - "url": "https://data.microbiomedata.org/data/nmdc:mga026tn70/annotation/nmdc_mga026tn70_tigrfam.gff", - "md5_checksum": "4f6f494c878aeff4308f2de2b2682ea6", - "id": "nmdc:4f6f494c878aeff4308f2de2b2682ea6", - "file_size_bytes": 5472483 - }, - { - "name": "Gp0115678_SMART GFF file", - "description": "SMART GFF file for Gp0115678", - "url": "https://data.microbiomedata.org/data/nmdc:mga026tn70/annotation/nmdc_mga026tn70_smart.gff", - "md5_checksum": "c44ff7df84f2b777b7fee22f7d28e205", - "id": "nmdc:c44ff7df84f2b777b7fee22f7d28e205", - "file_size_bytes": 18005129 - }, - { - "name": "Gp0115678_SuperFam GFF file", - "description": "SuperFam GFF file for Gp0115678", - "url": "https://data.microbiomedata.org/data/nmdc:mga026tn70/annotation/nmdc_mga026tn70_supfam.gff", - "md5_checksum": "b4fad8c887bc33c67a3316475ccc3572", - "id": "nmdc:b4fad8c887bc33c67a3316475ccc3572", - "file_size_bytes": 80713018 - }, - { - "name": "Gp0115678_Cath FunFam GFF file", - "description": "Cath FunFam GFF file for Gp0115678", - "url": "https://data.microbiomedata.org/data/nmdc:mga026tn70/annotation/nmdc_mga026tn70_cath_funfam.gff", - "md5_checksum": "4a3d00839e3067973b06771a31bbae93", - "id": "nmdc:4a3d00839e3067973b06771a31bbae93", - "file_size_bytes": 66327975 - }, - { - "name": "Gp0115678_KO_EC GFF file", - "description": "KO_EC GFF file for Gp0115678", - "url": "https://data.microbiomedata.org/data/nmdc:mga026tn70/annotation/nmdc_mga026tn70_ko_ec.gff", - "md5_checksum": "f01768e30cdd8f7650f631883d1c5d23", - "id": "nmdc:f01768e30cdd8f7650f631883d1c5d23", - "file_size_bytes": 40908900 - }, - { - "name": "gold:Gp0452679_metabat2 bin checkm quality assessment result", - "description": "metabat2 bin checkm quality assessment result for gold:Gp0452679", - "data_object_type": "CheckM Statistics", - "url": "https://data.microbiomedata.org/data/nmdc:mga0fsjy84/MAGs/nmdc_mga0fsjy84_checkm_qa.out", - "md5_checksum": "d41d8cd98f00b204e9800998ecf8427e", - "id": "nmdc:d41d8cd98f00b204e9800998ecf8427e", - "file_size_bytes": 0 - }, - { - "name": "Gp0115678_tooShort (< 3kb) filtered contigs fasta file by metaBat2", - "description": "tooShort (< 3kb) filtered contigs fasta file by metaBat2 for Gp0115678", - "url": "https://data.microbiomedata.org/data/nmdc:mga026tn70/MAGs/nmdc_mga026tn70_bins.tooShort.fa", - "md5_checksum": "cf2d0eb0281d2822373d4e7d25c8d1e6", - "id": "nmdc:cf2d0eb0281d2822373d4e7d25c8d1e6", - "file_size_bytes": 160811096 - }, - { - "name": "Gp0115678_unbinned fasta file from metabat2", - "description": "unbinned fasta file from metabat2 for Gp0115678", - "url": "https://data.microbiomedata.org/data/nmdc:mga026tn70/MAGs/nmdc_mga026tn70_bins.unbinned.fa", - "md5_checksum": "85defe7977c263b8fba3f31f89f101f9", - "id": "nmdc:85defe7977c263b8fba3f31f89f101f9", - "file_size_bytes": 31022166 - }, - { - "name": "Gp0115678_metabat2 bin checkm quality assessment result", - "description": "metabat2 bin checkm quality assessment result for Gp0115678", - "data_object_type": "CheckM Statistics", - "url": "https://data.microbiomedata.org/data/nmdc:mga026tn70/MAGs/nmdc_mga026tn70_checkm_qa.out", - "md5_checksum": "19a6a8410cece1118a06763023cc1313", - "id": "nmdc:19a6a8410cece1118a06763023cc1313", - "file_size_bytes": 1690 - }, - { - "name": "Gp0115678_high-quality and medium-quality bins", - "description": "high-quality and medium-quality bins for Gp0115678", - "data_object_type": "Metagenome Bins", - "url": "https://data.microbiomedata.org/data/nmdc:mga026tn70/MAGs/nmdc_mga026tn70_hqmq_bin.zip", - "md5_checksum": "54ed3f096ca7eacec9e5078ca45a6530", - "id": "nmdc:54ed3f096ca7eacec9e5078ca45a6530", - "file_size_bytes": 4026276 - }, - { - "name": "Gp0115678_metabat2 bins", - "description": "metabat2 bins for Gp0115678", - "url": "https://data.microbiomedata.org/data/nmdc:mga026tn70/MAGs/nmdc_mga026tn70_metabat_bin.zip", - "md5_checksum": "8493c05e428d90f8893e4c58755b2e95", - "id": "nmdc:8493c05e428d90f8893e4c58755b2e95", - "file_size_bytes": 72078 - }, - { - "_id": { - "$oid": "649b003d1ae706d7b5b14da5" - }, - "description": "Metagenome Alignment BAM file for gold:Gp0115678", - "url": "https://data.microbiomedata.org/data/1781_86093/assembly/pairedMapped_sorted.bam", - "file_size_bytes": 4408046431, - "type": "nmdc:DataObject", - "id": "nmdc:5d038f63644b03794d5a931f380bfd04", - "name": "pairedMapped_sorted.bam", - "data_object_type": "Assembly Coverage BAM" - }, - { - "_id": { - "$oid": "649b003d1ae706d7b5b14db7" - }, - "description": "Metagenome Contig Coverage Stats for gold:Gp0115678", - "url": "https://data.microbiomedata.org/data/1781_86093/assembly/mapping_stats.txt", - "file_size_bytes": 28551512, - "type": "nmdc:DataObject", - "id": "nmdc:56f166420a42acf12a021f3a66004127", - "name": "mapping_stats.txt", - "data_object_type": "Assembly Coverage Stats" - }, - { - "_id": { - "$oid": "649b003d1ae706d7b5b14db9" - }, - "description": "Assembled scaffold fasta for gold:Gp0115678", - "url": "https://data.microbiomedata.org/data/1781_86093/assembly/assembly_scaffolds.fna", - "file_size_bytes": 202369442, - "type": "nmdc:DataObject", - "id": "nmdc:eabfcbcc20b7c6b2732fab7d2ce8b44b", - "name": "assembly_scaffolds.fna", - "data_object_type": "Assembly Scaffolds" - }, - { - "_id": { - "$oid": "649b003d1ae706d7b5b14dbc" - }, - "description": "Assembled contigs fasta for gold:Gp0115678", - "url": "https://data.microbiomedata.org/data/1781_86093/assembly/assembly_contigs.fna", - "file_size_bytes": 203523040, - "type": "nmdc:DataObject", - "id": "nmdc:d1dee40a000226d9f2c8f4f05e0f85f1", - "name": "assembly_contigs.fna", - "data_object_type": "Assembly Contigs" - }, - { - "_id": { - "$oid": "649b003d1ae706d7b5b14dbe" - }, - "description": "Assembled AGP file for gold:Gp0115678", - "url": "https://data.microbiomedata.org/data/1781_86093/assembly/assembly.agp", - "file_size_bytes": 24779500, - "type": "nmdc:DataObject", - "id": "nmdc:ac56c44a98ebb58393634c4c2f83028d", - "name": "assembly.agp", - "data_object_type": "Assembly AGP" - }, - { - "_id": { - "$oid": "649b003d1ae706d7b5b15a00" - }, - "id": "nmdc:acb4672087a4cbe2f4e5a65dc291f70b", - "name": "1781_86093.krona.html", - "description": "Gold:Gp0115678 KRONA plot HTML file", - "url": "https://data.microbiomedata.org/1781_86093/ReadbasedAnalysis/centrifuge/1781_86093.krona.html", - "file_size_bytes": 3385, - "data_object_type": "Centrifuge Krona Plot", - "type": "nmdc:DataObject" - }, - { - "_id": { - "$oid": "649b003d1ae706d7b5b15a0a" - }, - "id": "nmdc:5334ea32a928a691d0be326a7a73ffe4", - "name": "1781_86093.json", - "description": "Gold:Gp0115678 ReadbasedAnalysis result JSON file", - "url": "https://data.microbiomedata.org/1781_86093/ReadbasedAnalysis/1781_86093.json", - "file_size_bytes": 3385, - "type": "nmdc:DataObject" - }, - { - "_id": { - "$oid": "649b003f1ae706d7b5b162de" - }, - "id": "nmdc:8576b99e74ec8da1f25e14a8c09c6815", - "name": "bins.unbinned.fa", - "description": "unbinned fasta file from metabat2 for gold:Gp0115678", - "file_size_bytes": 33583798, - "url": "https://data.microbiomedata.org/data/1781_86093/img_MAGs/bins.unbinned.fa", - "type": "nmdc:DataObject" - }, - { - "_id": { - "$oid": "649b003f1ae706d7b5b162df" - }, - "id": "nmdc:6afbb385aa127eb27cefb63eb516c8bc", - "name": "bins.tooShort.fa", - "description": "tooShort (< 3kb) filtered contigs fasta file by metaBat2 for gold:Gp0115678", - "file_size_bytes": 156176336, - "url": "https://data.microbiomedata.org/data/1781_86093/img_MAGs/bins.tooShort.fa", - "type": "nmdc:DataObject" - }, - { - "_id": { - "$oid": "649b003f1ae706d7b5b162e1" - }, - "id": "nmdc:127bab89a08a1eed165a2afbde8fedd9", - "name": "gtdbtk.bac120.summary.tsv", - "description": "gtdbtk bacterial assignment result summary table for gold:Gp0115678", - "file_size_bytes": 1861, - "url": "https://data.microbiomedata.org/data/1781_86093/img_MAGs/gtdbtk_output/classify/gtdbtk.bac120.summary.tsv", - "type": "nmdc:DataObject" - }, - { - "_id": { - "$oid": "649b003f1ae706d7b5b162e2" - }, - "id": "nmdc:8ead45679db7866273e97e259a27773f", - "name": "gold:Gp0115678.bins.7.fa", - "description": "metabat2 binned contig file for gold:Gp0115678", - "file_size_bytes": 2830025, - "url": "https://data.microbiomedata.org/data/1781_86093/img_MAGs/metabat-bins/bins.7.fa", - "type": "nmdc:DataObject", - "data_object_type": "Metagenome Bins" - }, - { - "_id": { - "$oid": "649b003f1ae706d7b5b162e3" - }, - "id": "nmdc:7c9a51eb968568c463672c6e4d0cbb0b", - "name": "gold:Gp0115678.bins.9.fa", - "description": "metabat2 binned contig file for gold:Gp0115678", - "file_size_bytes": 699704, - "url": "https://data.microbiomedata.org/data/1781_86093/img_MAGs/metabat-bins/bins.9.fa", - "type": "nmdc:DataObject", - "data_object_type": "Metagenome Bins" - }, - { - "_id": { - "$oid": "649b003f1ae706d7b5b162e4" - }, - "id": "nmdc:aedec76f4ab12cc4534fc2204677aa81", - "name": "gold:Gp0115678.bins.5.fa", - "description": "metabat2 binned contig file for gold:Gp0115678", - "file_size_bytes": 1797224, - "url": "https://data.microbiomedata.org/data/1781_86093/img_MAGs/metabat-bins/bins.5.fa", - "type": "nmdc:DataObject", - "data_object_type": "Metagenome Bins" - }, - { - "_id": { - "$oid": "649b003f1ae706d7b5b162e5" - }, - "id": "nmdc:d272087deef90faf8499f958061f818d", - "name": "checkm_qa.out", - "description": "metabat2 bin checkm quality assessment result for gold:Gp0115678", - "file_size_bytes": 2720, - "url": "https://data.microbiomedata.org/data/1781_86093/img_MAGs/checkm_qa.out", - "type": "nmdc:DataObject", - "data_object_type": "CheckM Statistics" - }, - { - "_id": { - "$oid": "649b003f1ae706d7b5b162e6" - }, - "id": "nmdc:b0b8774fb7f948606a1f4fa015e7f05e", - "name": "gold:Gp0115678.bins.3.fa", - "description": "metabat2 binned contig file for gold:Gp0115678", - "file_size_bytes": 866150, - "url": "https://data.microbiomedata.org/data/1781_86093/img_MAGs/metabat-bins/bins.3.fa", - "type": "nmdc:DataObject", - "data_object_type": "Metagenome Bins" - }, - { - "_id": { - "$oid": "649b003f1ae706d7b5b162e7" - }, - "id": "nmdc:ea9e5c76a6942b053a3b50ac9d56db97", - "name": "gold:Gp0115678.bins.11.fa", - "description": "metabat2 binned contig file for gold:Gp0115678", - "file_size_bytes": 672888, - "url": "https://data.microbiomedata.org/data/1781_86093/img_MAGs/metabat-bins/bins.11.fa", - "type": "nmdc:DataObject", - "data_object_type": "Metagenome Bins" - }, - { - "_id": { - "$oid": "649b003f1ae706d7b5b162e8" - }, - "id": "nmdc:33a8648962959be82a7140b07cb4eec5", - "name": "gold:Gp0115678.bins.2.fa", - "description": "metabat2 binned contig file for gold:Gp0115678", - "file_size_bytes": 245890, - "url": "https://data.microbiomedata.org/data/1781_86093/img_MAGs/metabat-bins/bins.2.fa", - "type": "nmdc:DataObject", - "data_object_type": "Metagenome Bins" - }, - { - "_id": { - "$oid": "649b003f1ae706d7b5b162e9" - }, - "id": "nmdc:a72a3f4d459210af63005c1438af24ca", - "name": "gold:Gp0115678.bins.12.fa", - "description": "metabat2 binned contig file for gold:Gp0115678", - "file_size_bytes": 232797, - "url": "https://data.microbiomedata.org/data/1781_86093/img_MAGs/metabat-bins/bins.12.fa", - "type": "nmdc:DataObject", - "data_object_type": "Metagenome Bins" - }, - { - "_id": { - "$oid": "649b003f1ae706d7b5b162ea" - }, - "id": "nmdc:1d650b2d0318de6359afd3393562f3a1", - "name": "gold:Gp0115678.bins.10.fa", - "description": "metabat2 binned contig file for gold:Gp0115678", - "file_size_bytes": 1157673, - "url": "https://data.microbiomedata.org/data/1781_86093/img_MAGs/metabat-bins/bins.10.fa", - "type": "nmdc:DataObject", - "data_object_type": "Metagenome Bins" - }, - { - "_id": { - "$oid": "649b003f1ae706d7b5b162eb" - }, - "id": "nmdc:3bb54fe9860b1a3c7ad831e2ba2d311e", - "name": "gold:Gp0115678.bins.1.fa", - "description": "metabat2 binned contig file for gold:Gp0115678", - "file_size_bytes": 307851, - "url": "https://data.microbiomedata.org/data/1781_86093/img_MAGs/metabat-bins/bins.1.fa", - "type": "nmdc:DataObject", - "data_object_type": "Metagenome Bins" - }, - { - "_id": { - "$oid": "649b003f1ae706d7b5b162ec" - }, - "id": "nmdc:5f7a9cc615e036a5f42b35abc88dda66", - "name": "gold:Gp0115678.bins.4.fa", - "description": "metabat2 binned contig file for gold:Gp0115678", - "file_size_bytes": 366506, - "url": "https://data.microbiomedata.org/data/1781_86093/img_MAGs/metabat-bins/bins.4.fa", - "type": "nmdc:DataObject", - "data_object_type": "Metagenome Bins" - }, - { - "_id": { - "$oid": "649b003f1ae706d7b5b162ee" - }, - "id": "nmdc:e581ce6782a654cf7528153e52a8c80f", - "name": "gold:Gp0115678.bins.8.fa", - "description": "metabat2 binned contig file for gold:Gp0115678", - "file_size_bytes": 439839, - "url": "https://data.microbiomedata.org/data/1781_86093/img_MAGs/metabat-bins/bins.8.fa", - "type": "nmdc:DataObject", - "data_object_type": "Metagenome Bins" - }, - { - "_id": { - "$oid": "649b003f1ae706d7b5b162f2" - }, - "id": "nmdc:de85ac00876e5ea0c61208d366b084b2", - "name": "gold:Gp0115678.bins.6.fa", - "description": "metabat2 binned contig file for gold:Gp0115678", - "file_size_bytes": 1192968, - "url": "https://data.microbiomedata.org/data/1781_86093/img_MAGs/metabat-bins/bins.6.fa", - "type": "nmdc:DataObject", - "data_object_type": "Metagenome Bins" - }, - { - "_id": { - "$oid": "649b00401ae706d7b5b16d6e" - }, - "description": "Protein FAA for gold:Gp0115678", - "url": "https://data.microbiomedata.org/1781_86093/img_annotation/Ga0482249_proteins.faa", - "md5_checksum": "78a99f435ce2bdd6cd83ebb807dc0ef3", - "file_size_bytes": 3385, - "id": "nmdc:78a99f435ce2bdd6cd83ebb807dc0ef3", - "name": "gold:Gp0115678_Protein FAA", - "data_object_type": "Annotation Amino Acid FASTA", - "type": "nmdc:DataObject" - }, - { - "_id": { - "$oid": "649b00401ae706d7b5b16d75" - }, - "description": "KO TSV File for gold:Gp0115678", - "url": "https://data.microbiomedata.org/1781_86093/img_annotation/Ga0482249_ko.tsv", - "md5_checksum": "cfe4a8ce52735eedacc38bacdc8785e4", - "file_size_bytes": 3385, - "id": "nmdc:cfe4a8ce52735eedacc38bacdc8785e4", - "name": "gold:Gp0115678_KO TSV File", - "data_object_type": "Annotation KEGG Orthology", - "type": "nmdc:DataObject" - }, - { - "_id": { - "$oid": "649b00401ae706d7b5b16d7b" - }, - "description": "Functional annotation GFF file for gold:Gp0115678", - "url": "https://data.microbiomedata.org/1781_86093/img_annotation/Ga0482249_functional_annotation.gff", - "md5_checksum": "6d1553b3e100a61f3b2b453fb7e71094", - "file_size_bytes": 3385, - "id": "nmdc:6d1553b3e100a61f3b2b453fb7e71094", - "name": "gold:Gp0115678_Functional annotation GFF file", - "data_object_type": "Functional Annotation GFF", - "type": "nmdc:DataObject" - }, - { - "_id": { - "$oid": "649b00401ae706d7b5b16d7c" - }, - "description": "EC TSV File for gold:Gp0115678", - "url": "https://data.microbiomedata.org/1781_86093/img_annotation/Ga0482249_ec.tsv", - "md5_checksum": "240064338b65f944556e88ebd44fbd03", - "file_size_bytes": 3385, - "id": "nmdc:240064338b65f944556e88ebd44fbd03", - "name": "gold:Gp0115678_EC TSV File", - "data_object_type": "Annotation Enzyme Commission", - "type": "nmdc:DataObject" - }, - { - "_id": { - "$oid": "649b00401ae706d7b5b16dc2" - }, - "description": "Structural annotation GFF file for gold:Gp0115678", - "url": "https://data.microbiomedata.org/1781_86093/img_annotation/Ga0482249_structural_annotation.gff", - "md5_checksum": "ac989404b8a9e07880788cfb061015ba", - "file_size_bytes": 3385, - "id": "nmdc:ac989404b8a9e07880788cfb061015ba", - "name": "gold:Gp0115678_Structural annotation GFF file", - "data_object_type": "Structural Annotation GFF", - "type": "nmdc:DataObject" - } - ], - "dissolving_activity_set": [], - "functional_annotation_set": [], - "genome_feature_set": [], - "mags_activity_set": [ - { - "_id": { - "$oid": "649b0052ec087f6bbab3472b" - }, - "has_input": [ - "nmdc:d305e212cce8f84f14561d3957c968b1", - "nmdc:1c63639a894aa686e77e57787fcafbc6", - "nmdc:351ff91eddf2bc89acbdf04eab68aef1" - ], - "too_short_contig_num": 362617, - "part_of": [ - "nmdc:mga026tn70" - ], - "binned_contig_num": 2089, - "git_url": "https://github.com/microbiomedata/metaG/releases/tag/0.1", - "has_output": [ - "nmdc:d41d8cd98f00b204e9800998ecf8427e", - "nmdc:cf2d0eb0281d2822373d4e7d25c8d1e6", - "nmdc:85defe7977c263b8fba3f31f89f101f9", - "nmdc:19a6a8410cece1118a06763023cc1313", - "nmdc:54ed3f096ca7eacec9e5078ca45a6530", - "nmdc:8493c05e428d90f8893e4c58755b2e95" - ], - "was_informed_by": "gold:Gp0115678", - "input_contig_num": 383711, - "id": "nmdc:88c0fc9b5b0bb0cd814448bbfba0da6a", - "execution_resource": "NERSC-Cori", - "name": "MAGs Analysis Activity for nmdc:mga026tn70", - "mags_list": [ - { - "number_of_contig": 5, - "completeness": 0.31, - "bin_name": "bins.1", - "gene_count": 264, - "bin_quality": "LQ", - "gtdbtk_species": "", - "gtdbtk_order": "", - "num_16s": 0, - "gtdbtk_family": "", - "gtdbtk_domain": "", - "contamination": 0.0, - "gtdbtk_class": "", - "gtdbtk_phylum": "", - "num_5s": 0, - "num_23s": 0, - "gtdbtk_genus": "", - "num_t_rna": 0 - }, - { - "number_of_contig": 231, - "completeness": 50.86, - "bin_name": "bins.2", - "gene_count": 1187, - "bin_quality": "MQ", - "gtdbtk_species": "", - "gtdbtk_order": "Pseudomonadales", - "num_16s": 0, - "gtdbtk_family": "UBA3067", - "gtdbtk_domain": "Bacteria", - "contamination": 0.86, - "gtdbtk_class": "Gammaproteobacteria", - "gtdbtk_phylum": "Proteobacteria", - "num_5s": 1, - "num_23s": 0, - "gtdbtk_genus": "UBA3067", - "num_t_rna": 19 - }, - { - "number_of_contig": 675, - "completeness": 74.51, - "bin_name": "bins.3", - "gene_count": 4479, - "bin_quality": "MQ", - "gtdbtk_species": "", - "gtdbtk_order": "Burkholderiales", - "num_16s": 1, - "gtdbtk_family": "Burkholderiaceae", - "gtdbtk_domain": "Bacteria", - "contamination": 1.06, - "gtdbtk_class": "Gammaproteobacteria", - "gtdbtk_phylum": "Proteobacteria", - "num_5s": 1, - "num_23s": 1, - "gtdbtk_genus": "Rhizobacter", - "num_t_rna": 37 - }, - { - "number_of_contig": 314, - "completeness": 64.35, - "bin_name": "bins.4", - "gene_count": 1988, - "bin_quality": "MQ", - "gtdbtk_species": "", - "gtdbtk_order": "Sphingomonadales", - "num_16s": 0, - "gtdbtk_family": "Sphingomonadaceae", - "gtdbtk_domain": "Bacteria", - "contamination": 0.48, - "gtdbtk_class": "Alphaproteobacteria", - "gtdbtk_phylum": "Proteobacteria", - "num_5s": 0, - "num_23s": 0, - "gtdbtk_genus": "Ga0077559", - "num_t_rna": 24 - }, - { - "number_of_contig": 574, - "completeness": 73.7, - "bin_name": "bins.5", - "gene_count": 3601, - "bin_quality": "MQ", - "gtdbtk_species": "", - "gtdbtk_order": "Burkholderiales", - "num_16s": 0, - "gtdbtk_family": "Burkholderiaceae", - "gtdbtk_domain": "Bacteria", - "contamination": 1.88, - "gtdbtk_class": "Gammaproteobacteria", - "gtdbtk_phylum": "Proteobacteria", - "num_5s": 0, - "num_23s": 0, - "gtdbtk_genus": "Aquabacterium", - "num_t_rna": 32 - }, - { - "number_of_contig": 290, - "completeness": 91.23, - "bin_name": "bins.6", - "gene_count": 3090, - "bin_quality": "MQ", - "gtdbtk_species": "", - "gtdbtk_order": "Pseudomonadales", - "num_16s": 0, - "gtdbtk_family": "Moraxellaceae", - "gtdbtk_domain": "Bacteria", - "contamination": 0.0, - "gtdbtk_class": "Gammaproteobacteria", - "gtdbtk_phylum": "Proteobacteria", - "num_5s": 0, - "num_23s": 0, - "gtdbtk_genus": "", - "num_t_rna": 32 - } - ], - "unbinned_contig_num": 19005, - "started_at_time": "2021-10-11T02:23:30Z", - "type": "nmdc:MAGsAnalysisActivity", - "ended_at_time": "2021-10-11T06:18:17+00:00" - } - ], - "material_sample_set": [], - "material_sampling_activity_set": [], - "metabolomics_analysis_activity_set": [], - "metagenome_annotation_activity_set": [ - { - "_id": { - "$oid": "649b005bbf2caae0415ef9c8" - }, - "has_input": [ - "nmdc:d305e212cce8f84f14561d3957c968b1" - ], - "part_of": [ - "nmdc:mga026tn70" - ], - "git_url": "https://github.com/microbiomedata/metaG/releases/tag/0.1", - "has_output": [ - "nmdc:ecfb1a4d469d9f95a91c8a3a3d5475af", - "nmdc:4eeee677df10364f622a0d4789522c69", - "nmdc:351ff91eddf2bc89acbdf04eab68aef1", - "nmdc:64b9d934918b78de80f1cf80a013557f", - "nmdc:903f2015c41660ae53e16bfc369d566a", - "nmdc:bf72ad74b2375abe730ecf7dc50b1557", - "nmdc:92f4707b0b022c217463f76d229dd3cb", - "nmdc:4f6f494c878aeff4308f2de2b2682ea6", - "nmdc:c44ff7df84f2b777b7fee22f7d28e205", - "nmdc:b4fad8c887bc33c67a3316475ccc3572", - "nmdc:4a3d00839e3067973b06771a31bbae93", - "nmdc:f01768e30cdd8f7650f631883d1c5d23" - ], - "was_informed_by": "gold:Gp0115678", - "id": "nmdc:88c0fc9b5b0bb0cd814448bbfba0da6a", - "execution_resource": "NERSC-Cori", - "name": "Annotation Activity for nmdc:mga026tn70", - "started_at_time": "2021-10-11T02:23:30Z", - "type": "nmdc:MetagenomeAnnotationActivity", - "ended_at_time": "2021-10-11T06:18:17+00:00" - } - ], - "metagenome_assembly_set": [ - { - "_id": { - "$oid": "649b005f2ca5ee4adb139fb5" - }, - "has_input": [ - "nmdc:e0ce93b88419f87568ff206e0efe3a24" - ], - "part_of": [ - "nmdc:mga026tn70" - ], - "ctg_logsum": 494917, - "scaf_logsum": 496628, - "gap_pct": 0.00163, - "git_url": "https://github.com/microbiomedata/metaG/releases/tag/0.1", - "has_output": [ - "nmdc:d305e212cce8f84f14561d3957c968b1", - "nmdc:fb12da7c2d6d1f9d9c7a1511702758bb", - "nmdc:444562a4e7108077b7e541a5d9064086", - "nmdc:6c400425b7188b24ac49533d9ce0d43b", - "nmdc:1c63639a894aa686e77e57787fcafbc6" - ], - "asm_score": 7.785, - "was_informed_by": "gold:Gp0115678", - "ctg_powsum": 57423, - "scaf_max": 116556, - "id": "nmdc:88c0fc9b5b0bb0cd814448bbfba0da6a", - "scaf_powsum": 57689, - "execution_resource": "NERSC-Cori", - "contigs": 383712, - "name": "Assembly Activity for nmdc:mga026tn70", - "ctg_max": 116556, - "gc_std": 0.13426, - "contig_bp": 190310453, - "gc_avg": 0.48844, - "started_at_time": "2021-10-11T02:23:30Z", - "scaf_bp": 190313553, - "type": "nmdc:MetagenomeAssembly", - "scaffolds": 383447, - "ended_at_time": "2021-10-11T06:18:17+00:00", - "ctg_l50": 474, - "ctg_l90": 290, - "ctg_n50": 102228, - "ctg_n90": 321321, - "scaf_l50": 474, - "scaf_l90": 290, - "scaf_n50": 102177, - "scaf_n90": 321076, - "scaf_l_gt50k": 453691, - "scaf_n_gt50k": 6, - "scaf_pct_gt50k": 0.23839132 - } - ], - "metagenome_sequencing_activity_set": [], - "metaproteomics_analysis_activity_set": [], - "metatranscriptome_activity_set": [], - "nom_analysis_activity_set": [], - "omics_processing_set": [ - { - "_id": { - "$oid": "649b009773e8249959349b39" - }, - "id": "nmdc:omprc-11-7vsv7h78", - "name": "Sand microcosm microbial communities from a hyporheic zone in Columbia River, Washington, USA - GW-RW T4_30-Apr-14", - "description": "Sterilized sand packs were incubated back in the ground and collected at time point T4.", - "has_input": [ - "nmdc:bsm-11-j0wbx741" - ], - "has_output": [ - "jgi:55f23d790d8785306f96497e" - ], - "part_of": [ - "nmdc:sty-11-aygzgv51" - ], - "add_date": "2015-05-28", - "mod_date": "2021-06-15", - "ncbi_project_name": "Sand microcosm microbial communities from a hyporheic zone in Columbia River, Washington, USA - GW-RW T4_30-Apr-14", - "omics_type": { - "has_raw_value": "Metagenome" - }, - "principal_investigator": { - "has_raw_value": "James Stegen" - }, - "processing_institution": "JGI", - "type": "nmdc:OmicsProcessing", - "gold_sequencing_project_identifiers": [ - "gold:Gp0115678" - ] - } - ], - "reaction_activity_set": [], - "read_qc_analysis_activity_set": [ - { - "_id": { - "$oid": "649b009d6bdd4fd20273c881" - }, - "has_input": [ - "nmdc:0e6219b7901669483a0a0386cfc01f93" - ], - "part_of": [ - "nmdc:mga026tn70" - ], - "git_url": "https://github.com/microbiomedata/metaG/releases/tag/0.1", - "has_output": [ - "nmdc:e0ce93b88419f87568ff206e0efe3a24", - "nmdc:7bf8ff4cf0d98cccd8e1c20f77dd1690" - ], - "was_informed_by": "gold:Gp0115678", - "input_read_count": 51286688, - "output_read_bases": 7231449575, - "id": "nmdc:88c0fc9b5b0bb0cd814448bbfba0da6a", - "execution_resource": "NERSC-Cori", - "input_read_bases": 7744289888, - "name": "Read QC Activity for nmdc:mga026tn70", - "output_read_count": 48276864, - "started_at_time": "2021-10-11T02:23:30Z", - "type": "nmdc:ReadQCAnalysisActivity", - "ended_at_time": "2021-10-11T06:18:17+00:00" - } - ], - "read_based_taxonomy_analysis_activity_set": [ - { - "_id": { - "$oid": "649b009bff710ae353f8cf49" - }, - "has_input": [ - "nmdc:e0ce93b88419f87568ff206e0efe3a24" - ], - "git_url": "https://github.com/microbiomedata/metaG/releases/tag/0.1", - "has_output": [ - "nmdc:05bab80e2ff02d160b8e808f056ee2b5", - "nmdc:12b2d6afc355bce76249d750a9fab534", - "nmdc:18214017d56658a48723c9c998dcba7e", - "nmdc:99ef009c73c128e561a4b9dcb70d7ff2", - "nmdc:78dab6988b57c654462ef3dbeb64d8d6", - "nmdc:f9c01985f057825149d35de0650095a8", - "nmdc:bcea8bbe63625ad0f3142abe69a4a11d", - "nmdc:054c3097c9682bc9a6e07f88fdecc0ee", - "nmdc:38d41d4299141abe28bf0405af80cdfc" - ], - "was_informed_by": "gold:Gp0115678", - "id": "nmdc:88c0fc9b5b0bb0cd814448bbfba0da6a", - "execution_resource": "NERSC-Cori", - "name": "ReadBased Analysis Activity for nmdc:mga026tn70", - "started_at_time": "2021-10-11T02:23:30Z", - "type": "nmdc:ReadbasedAnalysis", - "ended_at_time": "2021-10-11T06:18:17+00:00" - } - ], - "study_set": [], - "field_research_site_set": [], - "collecting_biosamples_from_site_set": [], - "date_created": null, - "etl_software_version": null, - "pooling_set": [] - }, - { - "functional_annotation_agg": [], - "library_preparation_set": [], - "processed_sample_set": [], - "extraction_set": [], - "activity_set": [], - "biosample_set": [], - "data_object_set": [ - { - "description": "Raw sequencer read data", - "file_size_bytes": 2106076506, - "type": "nmdc:DataObject", - "id": "jgi:574fde547ded5e3df1ee13fa", - "name": "10533.1.165310.GAGCTCA-TTGAGCT.fastq.gz" - }, - { - "name": "Gp0127623_Filtered Reads", - "description": "Filtered Reads for Gp0127623", - "data_object_type": "Filtered Sequencing Reads", - "url": "https://data.microbiomedata.org/data/nmdc:mga03eyz63/qa/nmdc_mga03eyz63_filtered.fastq.gz", - "md5_checksum": "6a8409b21c45ba9feba873ec269c8ff7", - "id": "nmdc:6a8409b21c45ba9feba873ec269c8ff7", - "file_size_bytes": 1917552858 - }, - { - "name": "Gp0127623_Filtered Stats", - "description": "Filtered Stats for Gp0127623", - "data_object_type": "QC Statistics", - "url": "https://data.microbiomedata.org/data/nmdc:mga03eyz63/qa/nmdc_mga03eyz63_filterStats.txt", - "md5_checksum": "61fb06de10fe3a0c49c5afe14ab7fb32", - "id": "nmdc:61fb06de10fe3a0c49c5afe14ab7fb32", - "file_size_bytes": 283 - }, - { - "name": "Gp0127623_Gottcha2 TSV report", - "description": "Gottcha2 TSV report for Gp0127623", - "url": "https://data.microbiomedata.org/data/nmdc:mga03eyz63/ReadbasedAnalysis/nmdc_mga03eyz63_gottcha2_report.tsv", - "md5_checksum": "ac39e916e17e08a845bb40d97519d8be", - "id": "nmdc:ac39e916e17e08a845bb40d97519d8be", - "file_size_bytes": 1553 - }, - { - "name": "Gp0127623_Gottcha2 full TSV report", - "description": "Gottcha2 full TSV report for Gp0127623", - "url": "https://data.microbiomedata.org/data/nmdc:mga03eyz63/ReadbasedAnalysis/nmdc_mga03eyz63_gottcha2_report_full.tsv", - "md5_checksum": "c6fd5c573ef8605d9b43ff9c698af423", - "id": "nmdc:c6fd5c573ef8605d9b43ff9c698af423", - "file_size_bytes": 836575 - }, - { - "name": "Gp0127623_Gottcha2 Krona HTML report", - "description": "Gottcha2 Krona HTML report for Gp0127623", - "data_object_type": "GOTTCHA2 Krona Plot", - "url": "https://data.microbiomedata.org/data/nmdc:mga03eyz63/ReadbasedAnalysis/nmdc_mga03eyz63_gottcha2_krona.html", - "md5_checksum": "eda0c04d692ecf137585676c15924626", - "id": "nmdc:eda0c04d692ecf137585676c15924626", - "file_size_bytes": 231097 - }, - { - "name": "Gp0127623_Centrifuge classification TSV report", - "description": "Centrifuge classification TSV report for Gp0127623", - "data_object_type": "Centrifuge Taxonomic Classification", - "url": "https://data.microbiomedata.org/data/nmdc:mga03eyz63/ReadbasedAnalysis/nmdc_mga03eyz63_centrifuge_classification.tsv", - "md5_checksum": "d9ea063be9ab8ea102c1e2ec2fa9f177", - "id": "nmdc:d9ea063be9ab8ea102c1e2ec2fa9f177", - "file_size_bytes": 1669254765 - }, - { - "name": "Gp0127623_Centrifuge TSV report", - "description": "Centrifuge TSV report for Gp0127623", - "data_object_type": "Centrifuge Classification Report", - "url": "https://data.microbiomedata.org/data/nmdc:mga03eyz63/ReadbasedAnalysis/nmdc_mga03eyz63_centrifuge_report.tsv", - "md5_checksum": "e1f164c534830cd628d67c564ace863b", - "id": "nmdc:e1f164c534830cd628d67c564ace863b", - "file_size_bytes": 255784 - }, - { - "name": "Gp0127623_Centrifuge Krona HTML report", - "description": "Centrifuge Krona HTML report for Gp0127623", - "data_object_type": "Centrifuge Krona Plot", - "url": "https://data.microbiomedata.org/data/nmdc:mga03eyz63/ReadbasedAnalysis/nmdc_mga03eyz63_centrifuge_krona.html", - "md5_checksum": "a1062576d998b7b82e39b8d8520fa37e", - "id": "nmdc:a1062576d998b7b82e39b8d8520fa37e", - "file_size_bytes": 2333760 - }, - { - "name": "Gp0127623_Kraken classification TSV report", - "description": "Kraken classification TSV report for Gp0127623", - "data_object_type": "Kraken2 Taxonomic Classification", - "url": "https://data.microbiomedata.org/data/nmdc:mga03eyz63/ReadbasedAnalysis/nmdc_mga03eyz63_kraken2_classification.tsv", - "md5_checksum": "040e6ca695283a12711c16344acd1e76", - "id": "nmdc:040e6ca695283a12711c16344acd1e76", - "file_size_bytes": 1335651191 - }, - { - "name": "Gp0127623_Kraken2 TSV report", - "description": "Kraken2 TSV report for Gp0127623", - "data_object_type": "Kraken2 Classification Report", - "url": "https://data.microbiomedata.org/data/nmdc:mga03eyz63/ReadbasedAnalysis/nmdc_mga03eyz63_kraken2_report.tsv", - "md5_checksum": "ed4ced0ccbe3f6b34c35bd842e882cad", - "id": "nmdc:ed4ced0ccbe3f6b34c35bd842e882cad", - "file_size_bytes": 647609 - }, - { - "name": "Gp0127623_Kraken2 Krona HTML report", - "description": "Kraken2 Krona HTML report for Gp0127623", - "data_object_type": "Kraken2 Krona Plot", - "url": "https://data.microbiomedata.org/data/nmdc:mga03eyz63/ReadbasedAnalysis/nmdc_mga03eyz63_kraken2_krona.html", - "md5_checksum": "f2eed9669268f69dbc31f0c4f839fccf", - "id": "nmdc:f2eed9669268f69dbc31f0c4f839fccf", - "file_size_bytes": 3949449 - }, - { - "name": "Gp0127623_Assembled contigs fasta", - "description": "Assembled contigs fasta for Gp0127623", - "data_object_type": "Assembly Contigs", - "url": "https://data.microbiomedata.org/data/nmdc:mga03eyz63/assembly/nmdc_mga03eyz63_contigs.fna", - "md5_checksum": "3373ef564b5b97fa472dc8f2c2277dbc", - "id": "nmdc:3373ef564b5b97fa472dc8f2c2277dbc", - "file_size_bytes": 55220158 - }, - { - "name": "Gp0127623_Assembled scaffold fasta", - "description": "Assembled scaffold fasta for Gp0127623", - "data_object_type": "Assembly Scaffolds", - "url": "https://data.microbiomedata.org/data/nmdc:mga03eyz63/assembly/nmdc_mga03eyz63_scaffolds.fna", - "md5_checksum": "a0377bb7d752e66b754753fcefb5005a", - "id": "nmdc:a0377bb7d752e66b754753fcefb5005a", - "file_size_bytes": 54864386 - }, - { - "name": "Gp0127623_Metagenome Contig Coverage Stats", - "description": "Metagenome Contig Coverage Stats for Gp0127623", - "url": "https://data.microbiomedata.org/data/nmdc:mga03eyz63/assembly/nmdc_mga03eyz63_covstats.txt", - "md5_checksum": "081017d0d9e68a999c245618eb907c08", - "id": "nmdc:081017d0d9e68a999c245618eb907c08", - "file_size_bytes": 9321875 - }, - { - "name": "Gp0127623_Assembled AGP file", - "description": "Assembled AGP file for Gp0127623", - "data_object_type": "Assembly AGP", - "url": "https://data.microbiomedata.org/data/nmdc:mga03eyz63/assembly/nmdc_mga03eyz63_assembly.agp", - "md5_checksum": "4a6ed00a6c2156c142d7bbec6baa36b5", - "id": "nmdc:4a6ed00a6c2156c142d7bbec6baa36b5", - "file_size_bytes": 8670291 - }, - { - "name": "Gp0127623_Metagenome Alignment BAM file", - "description": "Metagenome Alignment BAM file for Gp0127623", - "data_object_type": "Assembly Coverage BAM", - "url": "https://data.microbiomedata.org/data/nmdc:mga03eyz63/assembly/nmdc_mga03eyz63_pairedMapped_sorted.bam", - "md5_checksum": "21fb280328baf81e8135733eaf440b66", - "id": "nmdc:21fb280328baf81e8135733eaf440b66", - "file_size_bytes": 2062412797 - }, - { - "name": "Gp0127623_Protein FAA", - "description": "Protein FAA for Gp0127623", - "data_object_type": "Annotation Amino Acid FASTA", - "url": "https://data.microbiomedata.org/data/nmdc:mga03eyz63/annotation/nmdc_mga03eyz63_proteins.faa", - "md5_checksum": "8ac52d00bad1f9349da2acde572006b6", - "id": "nmdc:8ac52d00bad1f9349da2acde572006b6", - "file_size_bytes": 32224726 - }, - { - "name": "Gp0127623_Structural annotation GFF file", - "description": "Structural annotation GFF file for Gp0127623", - "data_object_type": "Structural Annotation GFF", - "url": "https://data.microbiomedata.org/data/nmdc:mga03eyz63/annotation/nmdc_mga03eyz63_structural_annotation.gff", - "md5_checksum": "9dd5eb06fe24f63d5012e34e364a580c", - "id": "nmdc:9dd5eb06fe24f63d5012e34e364a580c", - "file_size_bytes": 2512 - }, - { - "name": "Gp0127623_Functional annotation GFF file", - "description": "Functional annotation GFF file for Gp0127623", - "data_object_type": "Functional Annotation GFF", - "url": "https://data.microbiomedata.org/data/nmdc:mga03eyz63/annotation/nmdc_mga03eyz63_functional_annotation.gff", - "md5_checksum": "05107e0217e199d7b0cd571db88f7d09", - "id": "nmdc:05107e0217e199d7b0cd571db88f7d09", - "file_size_bytes": 37779373 - }, - { - "name": "Gp0127623_KO TSV file", - "description": "KO TSV file for Gp0127623", - "data_object_type": "Annotation KEGG Orthology", - "url": "https://data.microbiomedata.org/data/nmdc:mga03eyz63/annotation/nmdc_mga03eyz63_ko.tsv", - "md5_checksum": "02ffcaeeb9a73edea47ba3671396026a", - "id": "nmdc:02ffcaeeb9a73edea47ba3671396026a", - "file_size_bytes": 4343179 - }, - { - "name": "Gp0127623_EC TSV file", - "description": "EC TSV file for Gp0127623", - "data_object_type": "Annotation Enzyme Commission", - "url": "https://data.microbiomedata.org/data/nmdc:mga03eyz63/annotation/nmdc_mga03eyz63_ec.tsv", - "md5_checksum": "b9b4ccafc50787f86ef03680eb23848d", - "id": "nmdc:b9b4ccafc50787f86ef03680eb23848d", - "file_size_bytes": 2966454 - }, - { - "name": "Gp0127623_COG GFF file", - "description": "COG GFF file for Gp0127623", - "url": "https://data.microbiomedata.org/data/nmdc:mga03eyz63/annotation/nmdc_mga03eyz63_cog.gff", - "md5_checksum": "fbd178d9c302b841e3fde3ab9acd8160", - "id": "nmdc:fbd178d9c302b841e3fde3ab9acd8160", - "file_size_bytes": 22023330 - }, - { - "name": "Gp0127623_PFAM GFF file", - "description": "PFAM GFF file for Gp0127623", - "url": "https://data.microbiomedata.org/data/nmdc:mga03eyz63/annotation/nmdc_mga03eyz63_pfam.gff", - "md5_checksum": "1bcc35e753e7dad78ef8ae4989eb901a", - "id": "nmdc:1bcc35e753e7dad78ef8ae4989eb901a", - "file_size_bytes": 15956001 - }, - { - "name": "Gp0127623_TigrFam GFF file", - "description": "TigrFam GFF file for Gp0127623", - "url": "https://data.microbiomedata.org/data/nmdc:mga03eyz63/annotation/nmdc_mga03eyz63_tigrfam.gff", - "md5_checksum": "f6d6d2ea3c539560ad30bbd6df8bc71a", - "id": "nmdc:f6d6d2ea3c539560ad30bbd6df8bc71a", - "file_size_bytes": 1656727 - }, - { - "name": "Gp0127623_SMART GFF file", - "description": "SMART GFF file for Gp0127623", - "url": "https://data.microbiomedata.org/data/nmdc:mga03eyz63/annotation/nmdc_mga03eyz63_smart.gff", - "md5_checksum": "45536a48cef31f2c3870c7bacb3d785a", - "id": "nmdc:45536a48cef31f2c3870c7bacb3d785a", - "file_size_bytes": 4731416 - }, - { - "name": "Gp0127623_SuperFam GFF file", - "description": "SuperFam GFF file for Gp0127623", - "url": "https://data.microbiomedata.org/data/nmdc:mga03eyz63/annotation/nmdc_mga03eyz63_supfam.gff", - "md5_checksum": "a52d057d005504857f82bcf661dd7676", - "id": "nmdc:a52d057d005504857f82bcf661dd7676", - "file_size_bytes": 27616681 - }, - { - "name": "Gp0127623_Cath FunFam GFF file", - "description": "Cath FunFam GFF file for Gp0127623", - "url": "https://data.microbiomedata.org/data/nmdc:mga03eyz63/annotation/nmdc_mga03eyz63_cath_funfam.gff", - "md5_checksum": "b92cb96900a31a3c70ccf9cfe45f02c3", - "id": "nmdc:b92cb96900a31a3c70ccf9cfe45f02c3", - "file_size_bytes": 20817140 - }, - { - "name": "Gp0127623_KO_EC GFF file", - "description": "KO_EC GFF file for Gp0127623", - "url": "https://data.microbiomedata.org/data/nmdc:mga03eyz63/annotation/nmdc_mga03eyz63_ko_ec.gff", - "md5_checksum": "32eca4cab8525b09cf1b0ed2353f9278", - "id": "nmdc:32eca4cab8525b09cf1b0ed2353f9278", - "file_size_bytes": 13827629 - }, - { - "name": "gold:Gp0452679_metabat2 bin checkm quality assessment result", - "description": "metabat2 bin checkm quality assessment result for gold:Gp0452679", - "data_object_type": "CheckM Statistics", - "url": "https://data.microbiomedata.org/data/nmdc:mga0fsjy84/MAGs/nmdc_mga0fsjy84_checkm_qa.out", - "md5_checksum": "d41d8cd98f00b204e9800998ecf8427e", - "id": "nmdc:d41d8cd98f00b204e9800998ecf8427e", - "file_size_bytes": 0 - }, - { - "name": "Gp0127623_tooShort (< 3kb) filtered contigs fasta file by metaBat2", - "description": "tooShort (< 3kb) filtered contigs fasta file by metaBat2 for Gp0127623", - "url": "https://data.microbiomedata.org/data/nmdc:mga03eyz63/MAGs/nmdc_mga03eyz63_bins.tooShort.fa", - "md5_checksum": "e63c76f92bc0ae95dfc238c099296e91", - "id": "nmdc:e63c76f92bc0ae95dfc238c099296e91", - "file_size_bytes": 48421824 - }, - { - "name": "Gp0127623_unbinned fasta file from metabat2", - "description": "unbinned fasta file from metabat2 for Gp0127623", - "url": "https://data.microbiomedata.org/data/nmdc:mga03eyz63/MAGs/nmdc_mga03eyz63_bins.unbinned.fa", - "md5_checksum": "3dfba77d38712870f8c415203f991496", - "id": "nmdc:3dfba77d38712870f8c415203f991496", - "file_size_bytes": 6028115 - }, - { - "name": "Gp0127623_metabat2 bin checkm quality assessment result", - "description": "metabat2 bin checkm quality assessment result for Gp0127623", - "data_object_type": "CheckM Statistics", - "url": "https://data.microbiomedata.org/data/nmdc:mga03eyz63/MAGs/nmdc_mga03eyz63_checkm_qa.out", - "md5_checksum": "5e98d27533164fdf67c07cc224090547", - "id": "nmdc:5e98d27533164fdf67c07cc224090547", - "file_size_bytes": 765 - }, - { - "name": "Gp0127623_high-quality and medium-quality bins", - "description": "high-quality and medium-quality bins for Gp0127623", - "data_object_type": "Metagenome Bins", - "url": "https://data.microbiomedata.org/data/nmdc:mga03eyz63/MAGs/nmdc_mga03eyz63_hqmq_bin.zip", - "md5_checksum": "bfbe3e3a21e8a089c4c7a0d945c79b7b", - "id": "nmdc:bfbe3e3a21e8a089c4c7a0d945c79b7b", - "file_size_bytes": 182 - }, - { - "name": "Gp0127623_metabat2 bins", - "description": "metabat2 bins for Gp0127623", - "url": "https://data.microbiomedata.org/data/nmdc:mga03eyz63/MAGs/nmdc_mga03eyz63_metabat_bin.zip", - "md5_checksum": "c70853ef1a6ab162b85df5215a76666b", - "id": "nmdc:c70853ef1a6ab162b85df5215a76666b", - "file_size_bytes": 236177 - }, - { - "_id": { - "$oid": "649b003d1ae706d7b5b14e44" - }, - "description": "Assembled AGP file for gold:Gp0127623", - "url": "https://data.microbiomedata.org/data/1781_100325/assembly/assembly.agp", - "file_size_bytes": 7722651, - "type": "nmdc:DataObject", - "id": "nmdc:157fde8313174776bf9fd98b41c53aae", - "name": "assembly.agp", - "data_object_type": "Assembly AGP" - }, - { - "_id": { - "$oid": "649b003d1ae706d7b5b14e45" - }, - "description": "Assembled scaffold fasta for gold:Gp0127623", - "url": "https://data.microbiomedata.org/data/1781_100325/assembly/assembly_scaffolds.fna", - "file_size_bytes": 54390822, - "type": "nmdc:DataObject", - "id": "nmdc:c654cffcafc3b8bed2acfdf8e2dc2f3b", - "name": "assembly_scaffolds.fna", - "data_object_type": "Assembly Scaffolds" - }, - { - "_id": { - "$oid": "649b003d1ae706d7b5b14e46" - }, - "description": "Metagenome Alignment BAM file for gold:Gp0127623", - "url": "https://data.microbiomedata.org/data/1781_100325/assembly/pairedMapped_sorted.bam", - "file_size_bytes": 2033548061, - "type": "nmdc:DataObject", - "id": "nmdc:d41f2a097f2cb6f9d6c8378f203cc565", - "name": "pairedMapped_sorted.bam", - "data_object_type": "Assembly Coverage BAM" - }, - { - "_id": { - "$oid": "649b003d1ae706d7b5b14e47" - }, - "description": "Assembled contigs fasta for gold:Gp0127623", - "url": "https://data.microbiomedata.org/data/1781_100325/assembly/assembly_contigs.fna", - "file_size_bytes": 54746466, - "type": "nmdc:DataObject", - "id": "nmdc:9339ba4d7b731220024b995f87ddc5e1", - "name": "assembly_contigs.fna", - "data_object_type": "Assembly Contigs" - }, - { - "_id": { - "$oid": "649b003d1ae706d7b5b14e48" - }, - "description": "Metagenome Contig Coverage Stats for gold:Gp0127623", - "url": "https://data.microbiomedata.org/data/1781_100325/assembly/mapping_stats.txt", - "file_size_bytes": 8848183, - "type": "nmdc:DataObject", - "id": "nmdc:f283aad4ed4b528d3ca14bb8fbd8abcd", - "name": "mapping_stats.txt", - "data_object_type": "Assembly Coverage Stats" - }, - { - "_id": { - "$oid": "649b003d1ae706d7b5b15ae0" - }, - "id": "nmdc:6dbd96624464bcccba0269cd46f59c1f", - "name": "1781_100325.krona.html", - "description": "Gold:Gp0127623 KRONA plot HTML file", - "url": "https://data.microbiomedata.org/1781_100325/ReadbasedAnalysis/centrifuge/1781_100325.krona.html", - "file_size_bytes": 3385, - "data_object_type": "Centrifuge Krona Plot", - "type": "nmdc:DataObject" - }, - { - "_id": { - "$oid": "649b003d1ae706d7b5b15aea" - }, - "id": "nmdc:91dc3ab40d04608ca5f5a30baa2d48b5", - "name": "1781_100325.json", - "description": "Gold:Gp0127623 ReadbasedAnalysis result JSON file", - "url": "https://data.microbiomedata.org/1781_100325/ReadbasedAnalysis/1781_100325.json", - "file_size_bytes": 3385, - "type": "nmdc:DataObject" - }, - { - "_id": { - "$oid": "649b003f1ae706d7b5b165b9" - }, - "id": "nmdc:295741c2b87623f465d21c6eaacec974", - "name": "bins.unbinned.fa", - "description": "unbinned fasta file from metabat2 for gold:Gp0127623", - "file_size_bytes": 6753212, - "url": "https://data.microbiomedata.org/data/1781_100325/img_MAGs/bins.unbinned.fa", - "type": "nmdc:DataObject" - }, - { - "_id": { - "$oid": "649b003f1ae706d7b5b165ba" - }, - "id": "nmdc:dc2d3cbd8386e59252f48f52900f76a4", - "name": "bins.tooShort.fa", - "description": "tooShort (< 3kb) filtered contigs fasta file by metaBat2 for gold:Gp0127623", - "file_size_bytes": 47038634, - "url": "https://data.microbiomedata.org/data/1781_100325/img_MAGs/bins.tooShort.fa", - "type": "nmdc:DataObject" - }, - { - "_id": { - "$oid": "649b003f1ae706d7b5b16cbd" - }, - "description": "EC TSV File for gold:Gp0127623", - "url": "https://data.microbiomedata.org/1781_100325/img_annotation/Ga0482247_ec.tsv", - "md5_checksum": "d6afa54f891852b3a5befc294ce84489", - "file_size_bytes": 3385, - "id": "nmdc:d6afa54f891852b3a5befc294ce84489", - "name": "gold:Gp0127623_EC TSV File", - "data_object_type": "Annotation Enzyme Commission", - "type": "nmdc:DataObject" - }, - { - "_id": { - "$oid": "649b003f1ae706d7b5b16cbf" - }, - "description": "KO TSV File for gold:Gp0127623", - "url": "https://data.microbiomedata.org/1781_100325/img_annotation/Ga0482247_ko.tsv", - "md5_checksum": "e15c4db1e4e26208b302ecb9bc2c094c", - "file_size_bytes": 3385, - "id": "nmdc:e15c4db1e4e26208b302ecb9bc2c094c", - "name": "gold:Gp0127623_KO TSV File", - "data_object_type": "Annotation KEGG Orthology", - "type": "nmdc:DataObject" - }, - { - "_id": { - "$oid": "649b003f1ae706d7b5b16cc0" - }, - "description": "Functional annotation GFF file for gold:Gp0127623", - "url": "https://data.microbiomedata.org/1781_100325/img_annotation/Ga0482247_functional_annotation.gff", - "md5_checksum": "3acc269b9e2b5e97ffcc3c1a0d85381c", - "file_size_bytes": 3385, - "id": "nmdc:3acc269b9e2b5e97ffcc3c1a0d85381c", - "name": "gold:Gp0127623_Functional annotation GFF file", - "data_object_type": "Functional Annotation GFF", - "type": "nmdc:DataObject" - }, - { - "_id": { - "$oid": "649b003f1ae706d7b5b16cc1" - }, - "description": "Structural annotation GFF file for gold:Gp0127623", - "url": "https://data.microbiomedata.org/1781_100325/img_annotation/Ga0482247_structural_annotation.gff", - "md5_checksum": "feb21db71dc44afceeb88bb725315b42", - "file_size_bytes": 3385, - "id": "nmdc:feb21db71dc44afceeb88bb725315b42", - "name": "gold:Gp0127623_Structural annotation GFF file", - "data_object_type": "Structural Annotation GFF", - "type": "nmdc:DataObject" - }, - { - "_id": { - "$oid": "649b003f1ae706d7b5b16cc4" - }, - "description": "Protein FAA for gold:Gp0127623", - "url": "https://data.microbiomedata.org/1781_100325/img_annotation/Ga0482247_proteins.faa", - "md5_checksum": "be62e3b68916c8077955d0b3d3aaf5aa", - "file_size_bytes": 3385, - "id": "nmdc:be62e3b68916c8077955d0b3d3aaf5aa", - "name": "gold:Gp0127623_Protein FAA", - "data_object_type": "Annotation Amino Acid FASTA", - "type": "nmdc:DataObject" - } - ], - "dissolving_activity_set": [], - "functional_annotation_set": [], - "genome_feature_set": [], - "mags_activity_set": [ - { - "_id": { - "$oid": "649b0052ec087f6bbab346fd" - }, - "has_input": [ - "nmdc:3373ef564b5b97fa472dc8f2c2277dbc", - "nmdc:21fb280328baf81e8135733eaf440b66", - "nmdc:05107e0217e199d7b0cd571db88f7d09" - ], - "too_short_contig_num": 114220, - "part_of": [ - "nmdc:mga03eyz63" - ], - "binned_contig_num": 171, - "git_url": "https://github.com/microbiomedata/metaG/releases/tag/0.1", - "has_output": [ - "nmdc:d41d8cd98f00b204e9800998ecf8427e", - "nmdc:e63c76f92bc0ae95dfc238c099296e91", - "nmdc:3dfba77d38712870f8c415203f991496", - "nmdc:5e98d27533164fdf67c07cc224090547", - "nmdc:bfbe3e3a21e8a089c4c7a0d945c79b7b", - "nmdc:c70853ef1a6ab162b85df5215a76666b" - ], - "was_informed_by": "gold:Gp0127623", - "input_contig_num": 118423, - "id": "nmdc:e05db57d44a39f083df9a1803551b79b", - "execution_resource": "NERSC-Cori", - "name": "MAGs Analysis Activity for nmdc:mga03eyz63", - "mags_list": [ - { - "number_of_contig": 171, - "completeness": 30.1, - "bin_name": "bins.1", - "gene_count": 991, - "bin_quality": "LQ", - "gtdbtk_species": "", - "gtdbtk_order": "", - "num_16s": 0, - "gtdbtk_family": "", - "gtdbtk_domain": "", - "contamination": 0.97, - "gtdbtk_class": "", - "gtdbtk_phylum": "", - "num_5s": 0, - "num_23s": 0, - "gtdbtk_genus": "", - "num_t_rna": 12 - } - ], - "unbinned_contig_num": 4032, - "started_at_time": "2021-10-11T02:23:30Z", - "type": "nmdc:MAGsAnalysisActivity", - "ended_at_time": "2021-10-11T02:42:25+00:00" - } - ], - "material_sample_set": [], - "material_sampling_activity_set": [], - "metabolomics_analysis_activity_set": [], - "metagenome_annotation_activity_set": [ - { - "_id": { - "$oid": "649b005bbf2caae0415ef99a" - }, - "has_input": [ - "nmdc:3373ef564b5b97fa472dc8f2c2277dbc" - ], - "part_of": [ - "nmdc:mga03eyz63" - ], - "git_url": "https://github.com/microbiomedata/metaG/releases/tag/0.1", - "has_output": [ - "nmdc:8ac52d00bad1f9349da2acde572006b6", - "nmdc:9dd5eb06fe24f63d5012e34e364a580c", - "nmdc:05107e0217e199d7b0cd571db88f7d09", - "nmdc:02ffcaeeb9a73edea47ba3671396026a", - "nmdc:b9b4ccafc50787f86ef03680eb23848d", - "nmdc:fbd178d9c302b841e3fde3ab9acd8160", - "nmdc:1bcc35e753e7dad78ef8ae4989eb901a", - "nmdc:f6d6d2ea3c539560ad30bbd6df8bc71a", - "nmdc:45536a48cef31f2c3870c7bacb3d785a", - "nmdc:a52d057d005504857f82bcf661dd7676", - "nmdc:b92cb96900a31a3c70ccf9cfe45f02c3", - "nmdc:32eca4cab8525b09cf1b0ed2353f9278" - ], - "was_informed_by": "gold:Gp0127623", - "id": "nmdc:e05db57d44a39f083df9a1803551b79b", - "execution_resource": "NERSC-Cori", - "name": "Annotation Activity for nmdc:mga03eyz63", - "started_at_time": "2021-10-11T02:23:30Z", - "type": "nmdc:MetagenomeAnnotationActivity", - "ended_at_time": "2021-10-11T02:42:25+00:00" - } - ], - "metagenome_assembly_set": [ - { - "_id": { - "$oid": "649b005f2ca5ee4adb139f9f" - }, - "has_input": [ - "nmdc:6a8409b21c45ba9feba873ec269c8ff7" - ], - "part_of": [ - "nmdc:mga03eyz63" - ], - "ctg_logsum": 70596, - "scaf_logsum": 70885, - "gap_pct": 0.00063, - "git_url": "https://github.com/microbiomedata/metaG/releases/tag/0.1", - "has_output": [ - "nmdc:3373ef564b5b97fa472dc8f2c2277dbc", - "nmdc:a0377bb7d752e66b754753fcefb5005a", - "nmdc:081017d0d9e68a999c245618eb907c08", - "nmdc:4a6ed00a6c2156c142d7bbec6baa36b5", - "nmdc:21fb280328baf81e8135733eaf440b66" - ], - "asm_score": 3.626, - "was_informed_by": "gold:Gp0127623", - "ctg_powsum": 7584.611, - "scaf_max": 12785, - "id": "nmdc:e05db57d44a39f083df9a1803551b79b", - "scaf_powsum": 7618.086, - "execution_resource": "NERSC-Cori", - "contigs": 118423, - "name": "Assembly Activity for nmdc:mga03eyz63", - "ctg_max": 11834, - "gc_std": 0.12108, - "contig_bp": 50762396, - "gc_avg": 0.59992, - "started_at_time": "2021-10-11T02:23:30Z", - "scaf_bp": 50762716, - "type": "nmdc:MetagenomeAssembly", - "scaffolds": 118391, - "ended_at_time": "2021-10-11T02:42:25+00:00", - "ctg_l50": 402, - "ctg_l90": 285, - "ctg_n50": 37682, - "ctg_n90": 100987, - "scaf_l50": 402, - "scaf_l90": 285, - "scaf_n50": 37659, - "scaf_n90": 100956 - } - ], - "metagenome_sequencing_activity_set": [], - "metaproteomics_analysis_activity_set": [], - "metatranscriptome_activity_set": [], - "nom_analysis_activity_set": [], - "omics_processing_set": [ - { - "_id": { - "$oid": "649b009773e8249959349b3a" - }, - "id": "nmdc:omprc-11-5r54nt37", - "name": "Riverbed sediment microbial communities from areas with vegetation nearby in Columbia River, Washington, USA - GW-RW S2_20_30", - "description": "Riverbed sediment samples were collected from an area with a lot of surrounding vegetation in a hyporheic zone (subsurface groundwater - surface water mixing zone)", - "has_input": [ - "nmdc:bsm-11-r7ggfc16" - ], - "has_output": [ - "jgi:574fde547ded5e3df1ee13fa" - ], - "part_of": [ - "nmdc:sty-11-aygzgv51" - ], - "add_date": "2016-01-11", - "mod_date": "2021-06-15", - "ncbi_project_name": "Riverbed sediment microbial communities from areas with vegetation nearby in Columbia River, Washington, USA - GW-RW S2_20_30", - "omics_type": { - "has_raw_value": "Metagenome" - }, - "principal_investigator": { - "has_raw_value": "James Stegen" - }, - "processing_institution": "JGI", - "type": "nmdc:OmicsProcessing", - "gold_sequencing_project_identifiers": [ - "gold:Gp0127623" - ] - } - ], - "reaction_activity_set": [], - "read_qc_analysis_activity_set": [ - { - "_id": { - "$oid": "649b009d6bdd4fd20273c85a" - }, - "has_input": [ - "nmdc:14766bc431808b2a29c03beecb66bbac" - ], - "part_of": [ - "nmdc:mga03eyz63" - ], - "git_url": "https://github.com/microbiomedata/metaG/releases/tag/0.1", - "has_output": [ - "nmdc:6a8409b21c45ba9feba873ec269c8ff7", - "nmdc:61fb06de10fe3a0c49c5afe14ab7fb32" - ], - "was_informed_by": "gold:Gp0127623", - "input_read_count": 23705118, - "output_read_bases": 3409425046, - "id": "nmdc:e05db57d44a39f083df9a1803551b79b", - "execution_resource": "NERSC-Cori", - "input_read_bases": 3579472818, - "name": "Read QC Activity for nmdc:mga03eyz63", - "output_read_count": 22801896, - "started_at_time": "2021-10-11T02:23:30Z", - "type": "nmdc:ReadQCAnalysisActivity", - "ended_at_time": "2021-10-11T02:42:25+00:00" - } - ], - "read_based_taxonomy_analysis_activity_set": [ - { - "_id": { - "$oid": "649b009bff710ae353f8cf17" - }, - "has_input": [ - "nmdc:6a8409b21c45ba9feba873ec269c8ff7" - ], - "git_url": "https://github.com/microbiomedata/metaG/releases/tag/0.1", - "has_output": [ - "nmdc:ac39e916e17e08a845bb40d97519d8be", - "nmdc:c6fd5c573ef8605d9b43ff9c698af423", - "nmdc:eda0c04d692ecf137585676c15924626", - "nmdc:d9ea063be9ab8ea102c1e2ec2fa9f177", - "nmdc:e1f164c534830cd628d67c564ace863b", - "nmdc:a1062576d998b7b82e39b8d8520fa37e", - "nmdc:040e6ca695283a12711c16344acd1e76", - "nmdc:ed4ced0ccbe3f6b34c35bd842e882cad", - "nmdc:f2eed9669268f69dbc31f0c4f839fccf" - ], - "was_informed_by": "gold:Gp0127623", - "id": "nmdc:e05db57d44a39f083df9a1803551b79b", - "execution_resource": "NERSC-Cori", - "name": "ReadBased Analysis Activity for nmdc:mga03eyz63", - "started_at_time": "2021-10-11T02:23:30Z", - "type": "nmdc:ReadbasedAnalysis", - "ended_at_time": "2021-10-11T02:42:25+00:00" - } - ], - "study_set": [], - "field_research_site_set": [], - "collecting_biosamples_from_site_set": [], - "date_created": null, - "etl_software_version": null, - "pooling_set": [] - }, - { - "functional_annotation_agg": [], - "library_preparation_set": [], - "processed_sample_set": [], - "extraction_set": [], - "activity_set": [], - "biosample_set": [], - "data_object_set": [ - { - "description": "Raw sequencer read data", - "file_size_bytes": 2351763069, - "type": "nmdc:DataObject", - "id": "jgi:574fde787ded5e3df1ee1416", - "name": "10533.2.165322.CGGTTGT-AACAACC.fastq.gz" - }, - { - "name": "Gp0127625_Filtered Reads", - "description": "Filtered Reads for Gp0127625", - "data_object_type": "Filtered Sequencing Reads", - "url": "https://data.microbiomedata.org/data/nmdc:mga0bfpq58/qa/nmdc_mga0bfpq58_filtered.fastq.gz", - "md5_checksum": "2d13b3a30339b9c5b4fba099f9d4b10f", - "id": "nmdc:2d13b3a30339b9c5b4fba099f9d4b10f", - "file_size_bytes": 2037866145 - }, - { - "name": "Gp0127625_Filtered Stats", - "description": "Filtered Stats for Gp0127625", - "data_object_type": "QC Statistics", - "url": "https://data.microbiomedata.org/data/nmdc:mga0bfpq58/qa/nmdc_mga0bfpq58_filterStats.txt", - "md5_checksum": "42be49edad69619e550ddd69d150490f", - "id": "nmdc:42be49edad69619e550ddd69d150490f", - "file_size_bytes": 284 - }, - { - "name": "Gp0127625_Gottcha2 TSV report", - "description": "Gottcha2 TSV report for Gp0127625", - "url": "https://data.microbiomedata.org/data/nmdc:mga0bfpq58/ReadbasedAnalysis/nmdc_mga0bfpq58_gottcha2_report.tsv", - "md5_checksum": "550b631e1de3e01392154e54493d47ef", - "id": "nmdc:550b631e1de3e01392154e54493d47ef", - "file_size_bytes": 754 - }, - { - "name": "Gp0127625_Gottcha2 full TSV report", - "description": "Gottcha2 full TSV report for Gp0127625", - "url": "https://data.microbiomedata.org/data/nmdc:mga0bfpq58/ReadbasedAnalysis/nmdc_mga0bfpq58_gottcha2_report_full.tsv", - "md5_checksum": "3f14ff51550d9d78dae3a7ec08514907", - "id": "nmdc:3f14ff51550d9d78dae3a7ec08514907", - "file_size_bytes": 641658 - }, - { - "name": "Gp0127625_Gottcha2 Krona HTML report", - "description": "Gottcha2 Krona HTML report for Gp0127625", - "data_object_type": "GOTTCHA2 Krona Plot", - "url": "https://data.microbiomedata.org/data/nmdc:mga0bfpq58/ReadbasedAnalysis/nmdc_mga0bfpq58_gottcha2_krona.html", - "md5_checksum": "1a7b8f8968f451b5d5ccb97a10a56d89", - "id": "nmdc:1a7b8f8968f451b5d5ccb97a10a56d89", - "file_size_bytes": 228494 - }, - { - "name": "Gp0127625_Centrifuge classification TSV report", - "description": "Centrifuge classification TSV report for Gp0127625", - "data_object_type": "Centrifuge Taxonomic Classification", - "url": "https://data.microbiomedata.org/data/nmdc:mga0bfpq58/ReadbasedAnalysis/nmdc_mga0bfpq58_centrifuge_classification.tsv", - "md5_checksum": "b09795fc768257d881e8ce547be0ce68", - "id": "nmdc:b09795fc768257d881e8ce547be0ce68", - "file_size_bytes": 1849982678 - }, - { - "name": "Gp0127625_Centrifuge TSV report", - "description": "Centrifuge TSV report for Gp0127625", - "data_object_type": "Centrifuge Classification Report", - "url": "https://data.microbiomedata.org/data/nmdc:mga0bfpq58/ReadbasedAnalysis/nmdc_mga0bfpq58_centrifuge_report.tsv", - "md5_checksum": "064ba18473eb80ff0b484311565d2894", - "id": "nmdc:064ba18473eb80ff0b484311565d2894", - "file_size_bytes": 253852 - }, - { - "name": "Gp0127625_Centrifuge Krona HTML report", - "description": "Centrifuge Krona HTML report for Gp0127625", - "data_object_type": "Centrifuge Krona Plot", - "url": "https://data.microbiomedata.org/data/nmdc:mga0bfpq58/ReadbasedAnalysis/nmdc_mga0bfpq58_centrifuge_krona.html", - "md5_checksum": "a7b6cc370371668be2e3bb90f5ca0fd1", - "id": "nmdc:a7b6cc370371668be2e3bb90f5ca0fd1", - "file_size_bytes": 2331556 - }, - { - "name": "Gp0127625_Kraken classification TSV report", - "description": "Kraken classification TSV report for Gp0127625", - "data_object_type": "Kraken2 Taxonomic Classification", - "url": "https://data.microbiomedata.org/data/nmdc:mga0bfpq58/ReadbasedAnalysis/nmdc_mga0bfpq58_kraken2_classification.tsv", - "md5_checksum": "60c663a34b79db2ee71edf1afe4c14e3", - "id": "nmdc:60c663a34b79db2ee71edf1afe4c14e3", - "file_size_bytes": 1471976767 - }, - { - "name": "Gp0127625_Kraken2 TSV report", - "description": "Kraken2 TSV report for Gp0127625", - "data_object_type": "Kraken2 Classification Report", - "url": "https://data.microbiomedata.org/data/nmdc:mga0bfpq58/ReadbasedAnalysis/nmdc_mga0bfpq58_kraken2_report.tsv", - "md5_checksum": "bc8acb862c8942616ef07302667c334f", - "id": "nmdc:bc8acb862c8942616ef07302667c334f", - "file_size_bytes": 627498 - }, - { - "name": "Gp0127625_Kraken2 Krona HTML report", - "description": "Kraken2 Krona HTML report for Gp0127625", - "data_object_type": "Kraken2 Krona Plot", - "url": "https://data.microbiomedata.org/data/nmdc:mga0bfpq58/ReadbasedAnalysis/nmdc_mga0bfpq58_kraken2_krona.html", - "md5_checksum": "b797ed6cb135c993b582cac368b2a93c", - "id": "nmdc:b797ed6cb135c993b582cac368b2a93c", - "file_size_bytes": 3921941 - }, - { - "name": "Gp0127625_Assembled contigs fasta", - "description": "Assembled contigs fasta for Gp0127625", - "data_object_type": "Assembly Contigs", - "url": "https://data.microbiomedata.org/data/nmdc:mga0bfpq58/assembly/nmdc_mga0bfpq58_contigs.fna", - "md5_checksum": "5b6e7cbece9167002b12c3415afa9bb8", - "id": "nmdc:5b6e7cbece9167002b12c3415afa9bb8", - "file_size_bytes": 171703232 - }, - { - "name": "Gp0127625_Assembled scaffold fasta", - "description": "Assembled scaffold fasta for Gp0127625", - "data_object_type": "Assembly Scaffolds", - "url": "https://data.microbiomedata.org/data/nmdc:mga0bfpq58/assembly/nmdc_mga0bfpq58_scaffolds.fna", - "md5_checksum": "f2ef7ceaaedf4d6bf377ce82687b06e3", - "id": "nmdc:f2ef7ceaaedf4d6bf377ce82687b06e3", - "file_size_bytes": 170799869 - }, - { - "name": "Gp0127625_Metagenome Contig Coverage Stats", - "description": "Metagenome Contig Coverage Stats for Gp0127625", - "url": "https://data.microbiomedata.org/data/nmdc:mga0bfpq58/assembly/nmdc_mga0bfpq58_covstats.txt", - "md5_checksum": "d231edb2040700184064615a28e65ee5", - "id": "nmdc:d231edb2040700184064615a28e65ee5", - "file_size_bytes": 23875845 - }, - { - "name": "Gp0127625_Assembled AGP file", - "description": "Assembled AGP file for Gp0127625", - "data_object_type": "Assembly AGP", - "url": "https://data.microbiomedata.org/data/nmdc:mga0bfpq58/assembly/nmdc_mga0bfpq58_assembly.agp", - "md5_checksum": "9e3e55fe2f337ee0192604f8aa13da8e", - "id": "nmdc:9e3e55fe2f337ee0192604f8aa13da8e", - "file_size_bytes": 22351137 - }, - { - "name": "Gp0127625_Metagenome Alignment BAM file", - "description": "Metagenome Alignment BAM file for Gp0127625", - "data_object_type": "Assembly Coverage BAM", - "url": "https://data.microbiomedata.org/data/nmdc:mga0bfpq58/assembly/nmdc_mga0bfpq58_pairedMapped_sorted.bam", - "md5_checksum": "ff612445b348b65f906cd8858c4ec54e", - "id": "nmdc:ff612445b348b65f906cd8858c4ec54e", - "file_size_bytes": 2304803186 - }, - { - "name": "Gp0127625_Protein FAA", - "description": "Protein FAA for Gp0127625", - "data_object_type": "Annotation Amino Acid FASTA", - "url": "https://data.microbiomedata.org/data/nmdc:mga0bfpq58/annotation/nmdc_mga0bfpq58_proteins.faa", - "md5_checksum": "b1cae75f11c5efc7b37ea38c8d690e09", - "id": "nmdc:b1cae75f11c5efc7b37ea38c8d690e09", - "file_size_bytes": 96076876 - }, - { - "name": "Gp0127625_Structural annotation GFF file", - "description": "Structural annotation GFF file for Gp0127625", - "data_object_type": "Structural Annotation GFF", - "url": "https://data.microbiomedata.org/data/nmdc:mga0bfpq58/annotation/nmdc_mga0bfpq58_structural_annotation.gff", - "md5_checksum": "035d81e38b01174de882d15a859390a0", - "id": "nmdc:035d81e38b01174de882d15a859390a0", - "file_size_bytes": 2526 - }, - { - "name": "Gp0127625_Functional annotation GFF file", - "description": "Functional annotation GFF file for Gp0127625", - "data_object_type": "Functional Annotation GFF", - "url": "https://data.microbiomedata.org/data/nmdc:mga0bfpq58/annotation/nmdc_mga0bfpq58_functional_annotation.gff", - "md5_checksum": "da95ab39eb229378ef9c1c7317f58c36", - "id": "nmdc:da95ab39eb229378ef9c1c7317f58c36", - "file_size_bytes": 106301187 - }, - { - "name": "Gp0127625_KO TSV file", - "description": "KO TSV file for Gp0127625", - "data_object_type": "Annotation KEGG Orthology", - "url": "https://data.microbiomedata.org/data/nmdc:mga0bfpq58/annotation/nmdc_mga0bfpq58_ko.tsv", - "md5_checksum": "7ba2f365814fc2ae2896849d4dbb619d", - "id": "nmdc:7ba2f365814fc2ae2896849d4dbb619d", - "file_size_bytes": 12012992 - }, - { - "name": "Gp0127625_EC TSV file", - "description": "EC TSV file for Gp0127625", - "data_object_type": "Annotation Enzyme Commission", - "url": "https://data.microbiomedata.org/data/nmdc:mga0bfpq58/annotation/nmdc_mga0bfpq58_ec.tsv", - "md5_checksum": "91ade9a89599592c1e699b8990a11fba", - "id": "nmdc:91ade9a89599592c1e699b8990a11fba", - "file_size_bytes": 7987608 - }, - { - "name": "Gp0127625_COG GFF file", - "description": "COG GFF file for Gp0127625", - "url": "https://data.microbiomedata.org/data/nmdc:mga0bfpq58/annotation/nmdc_mga0bfpq58_cog.gff", - "md5_checksum": "a1c78cb8202825bd692c572b1537b549", - "id": "nmdc:a1c78cb8202825bd692c572b1537b549", - "file_size_bytes": 63761051 - }, - { - "name": "Gp0127625_PFAM GFF file", - "description": "PFAM GFF file for Gp0127625", - "url": "https://data.microbiomedata.org/data/nmdc:mga0bfpq58/annotation/nmdc_mga0bfpq58_pfam.gff", - "md5_checksum": "b83f7bca7166e0bbeb5d260af5920d00", - "id": "nmdc:b83f7bca7166e0bbeb5d260af5920d00", - "file_size_bytes": 49051515 - }, - { - "name": "Gp0127625_TigrFam GFF file", - "description": "TigrFam GFF file for Gp0127625", - "url": "https://data.microbiomedata.org/data/nmdc:mga0bfpq58/annotation/nmdc_mga0bfpq58_tigrfam.gff", - "md5_checksum": "2f8d30335b71e6d7f29458795d20daf4", - "id": "nmdc:2f8d30335b71e6d7f29458795d20daf4", - "file_size_bytes": 5446717 - }, - { - "name": "Gp0127625_SMART GFF file", - "description": "SMART GFF file for Gp0127625", - "url": "https://data.microbiomedata.org/data/nmdc:mga0bfpq58/annotation/nmdc_mga0bfpq58_smart.gff", - "md5_checksum": "77a22d4fe5949259acc0f12eafe264a2", - "id": "nmdc:77a22d4fe5949259acc0f12eafe264a2", - "file_size_bytes": 14046377 - }, - { - "name": "Gp0127625_SuperFam GFF file", - "description": "SuperFam GFF file for Gp0127625", - "url": "https://data.microbiomedata.org/data/nmdc:mga0bfpq58/annotation/nmdc_mga0bfpq58_supfam.gff", - "md5_checksum": "f21f3b5ed41e8945b4eebdbb044f832a", - "id": "nmdc:f21f3b5ed41e8945b4eebdbb044f832a", - "file_size_bytes": 79091420 - }, - { - "name": "Gp0127625_Cath FunFam GFF file", - "description": "Cath FunFam GFF file for Gp0127625", - "url": "https://data.microbiomedata.org/data/nmdc:mga0bfpq58/annotation/nmdc_mga0bfpq58_cath_funfam.gff", - "md5_checksum": "4a365e4bb51f09bb4f21470a753eac42", - "id": "nmdc:4a365e4bb51f09bb4f21470a753eac42", - "file_size_bytes": 60777542 - }, - { - "name": "Gp0127625_KO_EC GFF file", - "description": "KO_EC GFF file for Gp0127625", - "url": "https://data.microbiomedata.org/data/nmdc:mga0bfpq58/annotation/nmdc_mga0bfpq58_ko_ec.gff", - "md5_checksum": "5a230cb34060373c2e9a0af8b8040f46", - "id": "nmdc:5a230cb34060373c2e9a0af8b8040f46", - "file_size_bytes": 38117675 - }, - { - "name": "gold:Gp0452679_metabat2 bin checkm quality assessment result", - "description": "metabat2 bin checkm quality assessment result for gold:Gp0452679", - "data_object_type": "CheckM Statistics", - "url": "https://data.microbiomedata.org/data/nmdc:mga0fsjy84/MAGs/nmdc_mga0fsjy84_checkm_qa.out", - "md5_checksum": "d41d8cd98f00b204e9800998ecf8427e", - "id": "nmdc:d41d8cd98f00b204e9800998ecf8427e", - "file_size_bytes": 0 - }, - { - "name": "Gp0127625_tooShort (< 3kb) filtered contigs fasta file by metaBat2", - "description": "tooShort (< 3kb) filtered contigs fasta file by metaBat2 for Gp0127625", - "url": "https://data.microbiomedata.org/data/nmdc:mga0bfpq58/MAGs/nmdc_mga0bfpq58_bins.tooShort.fa", - "md5_checksum": "27f14f2f1af3ad7d17505a6ddc52d860", - "id": "nmdc:27f14f2f1af3ad7d17505a6ddc52d860", - "file_size_bytes": 128750891 - }, - { - "name": "Gp0127625_unbinned fasta file from metabat2", - "description": "unbinned fasta file from metabat2 for Gp0127625", - "url": "https://data.microbiomedata.org/data/nmdc:mga0bfpq58/MAGs/nmdc_mga0bfpq58_bins.unbinned.fa", - "md5_checksum": "b66d8fd47536ed5299c280aa873e2130", - "id": "nmdc:b66d8fd47536ed5299c280aa873e2130", - "file_size_bytes": 37223163 - }, - { - "name": "Gp0127625_metabat2 bin checkm quality assessment result", - "description": "metabat2 bin checkm quality assessment result for Gp0127625", - "data_object_type": "CheckM Statistics", - "url": "https://data.microbiomedata.org/data/nmdc:mga0bfpq58/MAGs/nmdc_mga0bfpq58_checkm_qa.out", - "md5_checksum": "dac476e3a7a8cdb2f3be5946ae437906", - "id": "nmdc:dac476e3a7a8cdb2f3be5946ae437906", - "file_size_bytes": 1413 - }, - { - "name": "Gp0127625_high-quality and medium-quality bins", - "description": "high-quality and medium-quality bins for Gp0127625", - "data_object_type": "Metagenome Bins", - "url": "https://data.microbiomedata.org/data/nmdc:mga0bfpq58/MAGs/nmdc_mga0bfpq58_hqmq_bin.zip", - "md5_checksum": "1ce4d3dcf2c9cbe245b437ca14a2772f", - "id": "nmdc:1ce4d3dcf2c9cbe245b437ca14a2772f", - "file_size_bytes": 182 - }, - { - "name": "Gp0127625_metabat2 bins", - "description": "metabat2 bins for Gp0127625", - "url": "https://data.microbiomedata.org/data/nmdc:mga0bfpq58/MAGs/nmdc_mga0bfpq58_metabat_bin.zip", - "md5_checksum": "d312dfb56973b50497bab8faf7409db8", - "id": "nmdc:d312dfb56973b50497bab8faf7409db8", - "file_size_bytes": 1729165 - }, - { - "_id": { - "$oid": "649b003d1ae706d7b5b14e4b" - }, - "description": "Metagenome Contig Coverage Stats for gold:Gp0127625", - "url": "https://data.microbiomedata.org/data/1781_100327/assembly/mapping_stats.txt", - "file_size_bytes": 22675445, - "type": "nmdc:DataObject", - "id": "nmdc:d5f4718482fe0012f1e39efbd22c50b4", - "name": "mapping_stats.txt", - "data_object_type": "Assembly Coverage Stats" - }, - { - "_id": { - "$oid": "649b003d1ae706d7b5b14e4e" - }, - "description": "Assembled scaffold fasta for gold:Gp0127625", - "url": "https://data.microbiomedata.org/data/1781_100327/assembly/assembly_scaffolds.fna", - "file_size_bytes": 169600309, - "type": "nmdc:DataObject", - "id": "nmdc:f1b48b2f19ff83ba6fd51df86ec966ec", - "name": "assembly_scaffolds.fna", - "data_object_type": "Assembly Scaffolds" - }, - { - "_id": { - "$oid": "649b003d1ae706d7b5b14e50" - }, - "description": "Metagenome Alignment BAM file for gold:Gp0127625", - "url": "https://data.microbiomedata.org/data/1781_100327/assembly/pairedMapped_sorted.bam", - "file_size_bytes": 2276431089, - "type": "nmdc:DataObject", - "id": "nmdc:8ff0fbe4939b764e12158e783f049f23", - "name": "pairedMapped_sorted.bam", - "data_object_type": "Assembly Coverage BAM" - }, - { - "_id": { - "$oid": "649b003d1ae706d7b5b14e52" - }, - "description": "Assembled contigs fasta for gold:Gp0127625", - "url": "https://data.microbiomedata.org/data/1781_100327/assembly/assembly_contigs.fna", - "file_size_bytes": 170502832, - "type": "nmdc:DataObject", - "id": "nmdc:593237bf7f38f66d40eca1dbb23c7aef", - "name": "assembly_contigs.fna", - "data_object_type": "Assembly Contigs" - }, - { - "_id": { - "$oid": "649b003d1ae706d7b5b14e55" - }, - "description": "Assembled AGP file for gold:Gp0127625", - "url": "https://data.microbiomedata.org/data/1781_100327/assembly/assembly.agp", - "file_size_bytes": 19948625, - "type": "nmdc:DataObject", - "id": "nmdc:6c6032861ed3d9b16040e414aac58731", - "name": "assembly.agp", - "data_object_type": "Assembly AGP" - }, - { - "_id": { - "$oid": "649b003d1ae706d7b5b15af9" - }, - "id": "nmdc:99433bf558d171ea575501775dabdb36", - "name": "1781_100327.krona.html", - "description": "Gold:Gp0127625 KRONA plot HTML file", - "url": "https://data.microbiomedata.org/1781_100327/ReadbasedAnalysis/centrifuge/1781_100327.krona.html", - "file_size_bytes": 3385, - "data_object_type": "Centrifuge Krona Plot", - "type": "nmdc:DataObject" - }, - { - "_id": { - "$oid": "649b003d1ae706d7b5b15b3a" - }, - "id": "nmdc:17804fc0900e6fddc51600638be0e04c", - "name": "1781_100327.json", - "description": "Gold:Gp0127625 ReadbasedAnalysis result JSON file", - "url": "https://data.microbiomedata.org/1781_100327/ReadbasedAnalysis/1781_100327.json", - "file_size_bytes": 3385, - "type": "nmdc:DataObject" - }, - { - "_id": { - "$oid": "649b003f1ae706d7b5b165c3" - }, - "id": "nmdc:f48e06ed804bffa9e3fbafe2548c0d23", - "name": "bins.tooShort.fa", - "description": "tooShort (< 3kb) filtered contigs fasta file by metaBat2 for gold:Gp0127625", - "file_size_bytes": 125246089, - "url": "https://data.microbiomedata.org/data/1781_100327/img_MAGs/bins.tooShort.fa", - "type": "nmdc:DataObject" - }, - { - "_id": { - "$oid": "649b003f1ae706d7b5b165c4" - }, - "id": "nmdc:6355ef8f8d9c6f954797ae92ca908c41", - "name": "bins.unbinned.fa", - "description": "unbinned fasta file from metabat2 for gold:Gp0127625", - "file_size_bytes": 39081008, - "url": "https://data.microbiomedata.org/data/1781_100327/img_MAGs/bins.unbinned.fa", - "type": "nmdc:DataObject" - }, - { - "_id": { - "$oid": "649b003f1ae706d7b5b165c7" - }, - "id": "nmdc:9c77e38f33eedf5f9c2eb4e672ce3951", - "name": "checkm_qa.out", - "description": "metabat2 bin checkm quality assessment result for gold:Gp0127625", - "file_size_bytes": 1099, - "url": "https://data.microbiomedata.org/data/1781_100327/img_MAGs/checkm_qa.out", - "type": "nmdc:DataObject", - "data_object_type": "CheckM Statistics" - }, - { - "_id": { - "$oid": "649b003f1ae706d7b5b165c8" - }, - "id": "nmdc:d22c092793a0476bfc8bba9c5e1a6d22", - "name": "gold:Gp0127625.bins.3.fa", - "description": "metabat2 binned contig file for gold:Gp0127625", - "file_size_bytes": 1411952, - "url": "https://data.microbiomedata.org/data/1781_100327/img_MAGs/metabat-bins/bins.3.fa", - "type": "nmdc:DataObject", - "data_object_type": "Metagenome Bins" - }, - { - "_id": { - "$oid": "649b003f1ae706d7b5b165ca" - }, - "id": "nmdc:80a9f95cbc30cf2ccd164b5b85b866b6", - "name": "gold:Gp0127625.bins.1.fa", - "description": "metabat2 binned contig file for gold:Gp0127625", - "file_size_bytes": 1533567, - "url": "https://data.microbiomedata.org/data/1781_100327/img_MAGs/metabat-bins/bins.1.fa", - "type": "nmdc:DataObject", - "data_object_type": "Metagenome Bins" - }, - { - "_id": { - "$oid": "649b003f1ae706d7b5b165cb" - }, - "id": "nmdc:31079c1f43cb55dd4d8d603e8413cc56", - "name": "gold:Gp0127625.bins.2.fa", - "description": "metabat2 binned contig file for gold:Gp0127625", - "file_size_bytes": 640735, - "url": "https://data.microbiomedata.org/data/1781_100327/img_MAGs/metabat-bins/bins.2.fa", - "type": "nmdc:DataObject", - "data_object_type": "Metagenome Bins" - }, - { - "_id": { - "$oid": "649b003f1ae706d7b5b16cc8" - }, - "description": "EC TSV File for gold:Gp0127625", - "url": "https://data.microbiomedata.org/1781_100327/img_annotation/Ga0482245_ec.tsv", - "md5_checksum": "16eb9d7ffc8dbf8872cbdb9b7f0a1c82", - "file_size_bytes": 3385, - "id": "nmdc:16eb9d7ffc8dbf8872cbdb9b7f0a1c82", - "name": "gold:Gp0127625_EC TSV File", - "data_object_type": "Annotation Enzyme Commission", - "type": "nmdc:DataObject" - }, - { - "_id": { - "$oid": "649b003f1ae706d7b5b16cc9" - }, - "description": "Structural annotation GFF file for gold:Gp0127625", - "url": "https://data.microbiomedata.org/1781_100327/img_annotation/Ga0482245_structural_annotation.gff", - "md5_checksum": "bf23db2dda841d77cf51b7c9120ba503", - "file_size_bytes": 3385, - "id": "nmdc:bf23db2dda841d77cf51b7c9120ba503", - "name": "gold:Gp0127625_Structural annotation GFF file", - "data_object_type": "Structural Annotation GFF", - "type": "nmdc:DataObject" - }, - { - "_id": { - "$oid": "649b003f1ae706d7b5b16cca" - }, - "description": "KO TSV File for gold:Gp0127625", - "url": "https://data.microbiomedata.org/1781_100327/img_annotation/Ga0482245_ko.tsv", - "md5_checksum": "18dd16caf7af261c4d647da91a6f526a", - "file_size_bytes": 3385, - "id": "nmdc:18dd16caf7af261c4d647da91a6f526a", - "name": "gold:Gp0127625_KO TSV File", - "data_object_type": "Annotation KEGG Orthology", - "type": "nmdc:DataObject" - }, - { - "_id": { - "$oid": "649b003f1ae706d7b5b16ccd" - }, - "description": "Protein FAA for gold:Gp0127625", - "url": "https://data.microbiomedata.org/1781_100327/img_annotation/Ga0482245_proteins.faa", - "md5_checksum": "c6fb34fc2da63a5cc46522279e768db9", - "file_size_bytes": 3385, - "id": "nmdc:c6fb34fc2da63a5cc46522279e768db9", - "name": "gold:Gp0127625_Protein FAA", - "data_object_type": "Annotation Amino Acid FASTA", - "type": "nmdc:DataObject" - }, - { - "_id": { - "$oid": "649b003f1ae706d7b5b16cce" - }, - "description": "Functional annotation GFF file for gold:Gp0127625", - "url": "https://data.microbiomedata.org/1781_100327/img_annotation/Ga0482245_functional_annotation.gff", - "md5_checksum": "3b039b9d5a75b97a67edf5d50b34d9f0", - "file_size_bytes": 3385, - "id": "nmdc:3b039b9d5a75b97a67edf5d50b34d9f0", - "name": "gold:Gp0127625_Functional annotation GFF file", - "data_object_type": "Functional Annotation GFF", - "type": "nmdc:DataObject" - } - ], - "dissolving_activity_set": [], - "functional_annotation_set": [], - "genome_feature_set": [], - "mags_activity_set": [ - { - "_id": { - "$oid": "649b0052ec087f6bbab346fc" - }, - "has_input": [ - "nmdc:5b6e7cbece9167002b12c3415afa9bb8", - "nmdc:ff612445b348b65f906cd8858c4ec54e", - "nmdc:da95ab39eb229378ef9c1c7317f58c36" - ], - "too_short_contig_num": 275414, - "part_of": [ - "nmdc:mga0bfpq58" - ], - "binned_contig_num": 1195, - "git_url": "https://github.com/microbiomedata/metaG/releases/tag/0.1", - "has_output": [ - "nmdc:d41d8cd98f00b204e9800998ecf8427e", - "nmdc:27f14f2f1af3ad7d17505a6ddc52d860", - "nmdc:b66d8fd47536ed5299c280aa873e2130", - "nmdc:dac476e3a7a8cdb2f3be5946ae437906", - "nmdc:1ce4d3dcf2c9cbe245b437ca14a2772f", - "nmdc:d312dfb56973b50497bab8faf7409db8" - ], - "was_informed_by": "gold:Gp0127625", - "input_contig_num": 300100, - "id": "nmdc:aa214e53dc8472f9ff99b02245d8f943", - "execution_resource": "NERSC-Cori", - "name": "MAGs Analysis Activity for nmdc:mga0bfpq58", - "mags_list": [ - { - "number_of_contig": 382, - "completeness": 47.74, - "bin_name": "bins.1", - "gene_count": 2054, - "bin_quality": "LQ", - "gtdbtk_species": "", - "gtdbtk_order": "", - "num_16s": 0, - "gtdbtk_family": "", - "gtdbtk_domain": "", - "contamination": 2.69, - "gtdbtk_class": "", - "gtdbtk_phylum": "", - "num_5s": 0, - "num_23s": 0, - "gtdbtk_genus": "", - "num_t_rna": 21 - }, - { - "number_of_contig": 197, - "completeness": 22.93, - "bin_name": "bins.2", - "gene_count": 1005, - "bin_quality": "LQ", - "gtdbtk_species": "", - "gtdbtk_order": "", - "num_16s": 0, - "gtdbtk_family": "", - "gtdbtk_domain": "", - "contamination": 1.03, - "gtdbtk_class": "", - "gtdbtk_phylum": "", - "num_5s": 0, - "num_23s": 0, - "gtdbtk_genus": "", - "num_t_rna": 17 - }, - { - "number_of_contig": 95, - "completeness": 7.24, - "bin_name": "bins.3", - "gene_count": 447, - "bin_quality": "LQ", - "gtdbtk_species": "", - "gtdbtk_order": "", - "num_16s": 0, - "gtdbtk_family": "", - "gtdbtk_domain": "", - "contamination": 1.72, - "gtdbtk_class": "", - "gtdbtk_phylum": "", - "num_5s": 0, - "num_23s": 0, - "gtdbtk_genus": "", - "num_t_rna": 2 - }, - { - "number_of_contig": 193, - "completeness": 17.79, - "bin_name": "bins.4", - "gene_count": 1386, - "bin_quality": "LQ", - "gtdbtk_species": "", - "gtdbtk_order": "", - "num_16s": 0, - "gtdbtk_family": "", - "gtdbtk_domain": "", - "contamination": 0.97, - "gtdbtk_class": "", - "gtdbtk_phylum": "", - "num_5s": 0, - "num_23s": 0, - "gtdbtk_genus": "", - "num_t_rna": 19 - }, - { - "number_of_contig": 328, - "completeness": 37.37, - "bin_name": "bins.5", - "gene_count": 1978, - "bin_quality": "LQ", - "gtdbtk_species": "", - "gtdbtk_order": "", - "num_16s": 0, - "gtdbtk_family": "", - "gtdbtk_domain": "", - "contamination": 2.56, - "gtdbtk_class": "", - "gtdbtk_phylum": "", - "num_5s": 1, - "num_23s": 0, - "gtdbtk_genus": "", - "num_t_rna": 35 - } - ], - "unbinned_contig_num": 23491, - "started_at_time": "2021-10-11T02:23:30Z", - "type": "nmdc:MAGsAnalysisActivity", - "ended_at_time": "2021-10-11T03:29:50+00:00" - } - ], - "material_sample_set": [], - "material_sampling_activity_set": [], - "metabolomics_analysis_activity_set": [], - "metagenome_annotation_activity_set": [ - { - "_id": { - "$oid": "649b005bbf2caae0415ef999" - }, - "has_input": [ - "nmdc:5b6e7cbece9167002b12c3415afa9bb8" - ], - "part_of": [ - "nmdc:mga0bfpq58" - ], - "git_url": "https://github.com/microbiomedata/metaG/releases/tag/0.1", - "has_output": [ - "nmdc:b1cae75f11c5efc7b37ea38c8d690e09", - "nmdc:035d81e38b01174de882d15a859390a0", - "nmdc:da95ab39eb229378ef9c1c7317f58c36", - "nmdc:7ba2f365814fc2ae2896849d4dbb619d", - "nmdc:91ade9a89599592c1e699b8990a11fba", - "nmdc:a1c78cb8202825bd692c572b1537b549", - "nmdc:b83f7bca7166e0bbeb5d260af5920d00", - "nmdc:2f8d30335b71e6d7f29458795d20daf4", - "nmdc:77a22d4fe5949259acc0f12eafe264a2", - "nmdc:f21f3b5ed41e8945b4eebdbb044f832a", - "nmdc:4a365e4bb51f09bb4f21470a753eac42", - "nmdc:5a230cb34060373c2e9a0af8b8040f46" - ], - "was_informed_by": "gold:Gp0127625", - "id": "nmdc:aa214e53dc8472f9ff99b02245d8f943", - "execution_resource": "NERSC-Cori", - "name": "Annotation Activity for nmdc:mga0bfpq58", - "started_at_time": "2021-10-11T02:23:30Z", - "type": "nmdc:MetagenomeAnnotationActivity", - "ended_at_time": "2021-10-11T03:29:50+00:00" - } - ], - "metagenome_assembly_set": [ - { - "_id": { - "$oid": "649b005f2ca5ee4adb139f89" - }, - "has_input": [ - "nmdc:2d13b3a30339b9c5b4fba099f9d4b10f" - ], - "part_of": [ - "nmdc:mga0bfpq58" - ], - "ctg_logsum": 452076, - "scaf_logsum": 453436, - "gap_pct": 0.00138, - "git_url": "https://github.com/microbiomedata/metaG/releases/tag/0.1", - "has_output": [ - "nmdc:5b6e7cbece9167002b12c3415afa9bb8", - "nmdc:f2ef7ceaaedf4d6bf377ce82687b06e3", - "nmdc:d231edb2040700184064615a28e65ee5", - "nmdc:9e3e55fe2f337ee0192604f8aa13da8e", - "nmdc:ff612445b348b65f906cd8858c4ec54e" - ], - "asm_score": 3.923, - "was_informed_by": "gold:Gp0127625", - "ctg_powsum": 49204, - "scaf_max": 29400, - "id": "nmdc:aa214e53dc8472f9ff99b02245d8f943", - "scaf_powsum": 49370, - "execution_resource": "NERSC-Cori", - "contigs": 300102, - "name": "Assembly Activity for nmdc:mga0bfpq58", - "ctg_max": 29400, - "gc_std": 0.0955, - "contig_bp": 159709614, - "gc_avg": 0.6367, - "started_at_time": "2021-10-11T02:23:30Z", - "scaf_bp": 159711824, - "type": "nmdc:MetagenomeAssembly", - "scaffolds": 299890, - "ended_at_time": "2021-10-11T03:29:50+00:00", - "ctg_l50": 546, - "ctg_l90": 301, - "ctg_n50": 78532, - "ctg_n90": 244428, - "scaf_l50": 546, - "scaf_l90": 301, - "scaf_n50": 78517, - "scaf_n90": 244244 - } - ], - "metagenome_sequencing_activity_set": [], - "metaproteomics_analysis_activity_set": [], - "metatranscriptome_activity_set": [], - "nom_analysis_activity_set": [], - "omics_processing_set": [ - { - "_id": { - "$oid": "649b009773e8249959349b3b" - }, - "id": "nmdc:omprc-11-76ebsj44", - "name": "Riverbed sediment microbial communities from areas with no vegetation in Columbia River, Washington, USA - GW-RW N1_20_30", - "description": "Riverbed sediment samples were collected from an area with no vegetation in a hyporheic zone (subsurface groundwater - surface water mixing zone)", - "has_input": [ - "nmdc:bsm-11-k3t2wk45" - ], - "has_output": [ - "jgi:574fde787ded5e3df1ee1416" - ], - "part_of": [ - "nmdc:sty-11-aygzgv51" - ], - "add_date": "2016-01-11", - "mod_date": "2021-06-15", - "ncbi_project_name": "Riverbed sediment microbial communities from areas with no vegetation in Columbia River, Washington, USA - GW-RW N1_20_30", - "omics_type": { - "has_raw_value": "Metagenome" - }, - "principal_investigator": { - "has_raw_value": "James Stegen" - }, - "processing_institution": "JGI", - "type": "nmdc:OmicsProcessing", - "gold_sequencing_project_identifiers": [ - "gold:Gp0127625" - ] - } - ], - "reaction_activity_set": [], - "read_qc_analysis_activity_set": [ - { - "_id": { - "$oid": "649b009d6bdd4fd20273c858" - }, - "has_input": [ - "nmdc:93c62425e46296c35415039d7fd9cb56" - ], - "part_of": [ - "nmdc:mga0bfpq58" - ], - "git_url": "https://github.com/microbiomedata/metaG/releases/tag/0.1", - "has_output": [ - "nmdc:2d13b3a30339b9c5b4fba099f9d4b10f", - "nmdc:42be49edad69619e550ddd69d150490f" - ], - "was_informed_by": "gold:Gp0127625", - "input_read_count": 26227312, - "output_read_bases": 3764845015, - "id": "nmdc:aa214e53dc8472f9ff99b02245d8f943", - "execution_resource": "NERSC-Cori", - "input_read_bases": 3960324112, - "name": "Read QC Activity for nmdc:mga0bfpq58", - "output_read_count": 25182244, - "started_at_time": "2021-10-11T02:23:30Z", - "type": "nmdc:ReadQCAnalysisActivity", - "ended_at_time": "2021-10-11T03:29:50+00:00" - } - ], - "read_based_taxonomy_analysis_activity_set": [ - { - "_id": { - "$oid": "649b009bff710ae353f8cf15" - }, - "has_input": [ - "nmdc:2d13b3a30339b9c5b4fba099f9d4b10f" - ], - "git_url": "https://github.com/microbiomedata/metaG/releases/tag/0.1", - "has_output": [ - "nmdc:550b631e1de3e01392154e54493d47ef", - "nmdc:3f14ff51550d9d78dae3a7ec08514907", - "nmdc:1a7b8f8968f451b5d5ccb97a10a56d89", - "nmdc:b09795fc768257d881e8ce547be0ce68", - "nmdc:064ba18473eb80ff0b484311565d2894", - "nmdc:a7b6cc370371668be2e3bb90f5ca0fd1", - "nmdc:60c663a34b79db2ee71edf1afe4c14e3", - "nmdc:bc8acb862c8942616ef07302667c334f", - "nmdc:b797ed6cb135c993b582cac368b2a93c" - ], - "was_informed_by": "gold:Gp0127625", - "id": "nmdc:aa214e53dc8472f9ff99b02245d8f943", - "execution_resource": "NERSC-Cori", - "name": "ReadBased Analysis Activity for nmdc:mga0bfpq58", - "started_at_time": "2021-10-11T02:23:30Z", - "type": "nmdc:ReadbasedAnalysis", - "ended_at_time": "2021-10-11T03:29:50+00:00" - } - ], - "study_set": [], - "field_research_site_set": [], - "collecting_biosamples_from_site_set": [], - "date_created": null, - "etl_software_version": null, - "pooling_set": [] - }, - { - "functional_annotation_agg": [], - "library_preparation_set": [], - "processed_sample_set": [], - "extraction_set": [], - "activity_set": [], - "biosample_set": [], - "data_object_set": [ - { - "description": "Raw sequencer read data", - "file_size_bytes": 2167583658, - "type": "nmdc:DataObject", - "id": "jgi:574fde7b7ded5e3df1ee1418", - "name": "10533.2.165322.TACCAAC-GGTTGGT.fastq.gz" - }, - { - "name": "Gp0127626_Filtered Reads", - "description": "Filtered Reads for Gp0127626", - "data_object_type": "Filtered Sequencing Reads", - "url": "https://data.microbiomedata.org/data/nmdc:mga04xnj45/qa/nmdc_mga04xnj45_filtered.fastq.gz", - "md5_checksum": "07499ad2f2b80f42bd7109732b1eef90", - "id": "nmdc:07499ad2f2b80f42bd7109732b1eef90", - "file_size_bytes": 1944721961 - }, - { - "name": "Gp0127626_Filtered Stats", - "description": "Filtered Stats for Gp0127626", - "data_object_type": "QC Statistics", - "url": "https://data.microbiomedata.org/data/nmdc:mga04xnj45/qa/nmdc_mga04xnj45_filterStats.txt", - "md5_checksum": "9089d07fdee5ed03e901c1656206af02", - "id": "nmdc:9089d07fdee5ed03e901c1656206af02", - "file_size_bytes": 287 - }, - { - "name": "Gp0127626_Gottcha2 TSV report", - "description": "Gottcha2 TSV report for Gp0127626", - "url": "https://data.microbiomedata.org/data/nmdc:mga04xnj45/ReadbasedAnalysis/nmdc_mga04xnj45_gottcha2_report.tsv", - "md5_checksum": "a91f8dccb2baa53550216f5bdfbf1473", - "id": "nmdc:a91f8dccb2baa53550216f5bdfbf1473", - "file_size_bytes": 2399 - }, - { - "name": "Gp0127626_Gottcha2 full TSV report", - "description": "Gottcha2 full TSV report for Gp0127626", - "url": "https://data.microbiomedata.org/data/nmdc:mga04xnj45/ReadbasedAnalysis/nmdc_mga04xnj45_gottcha2_report_full.tsv", - "md5_checksum": "a81ddf4e3bc044e8601554117cd887aa", - "id": "nmdc:a81ddf4e3bc044e8601554117cd887aa", - "file_size_bytes": 743066 - }, - { - "name": "Gp0127626_Gottcha2 Krona HTML report", - "description": "Gottcha2 Krona HTML report for Gp0127626", - "data_object_type": "GOTTCHA2 Krona Plot", - "url": "https://data.microbiomedata.org/data/nmdc:mga04xnj45/ReadbasedAnalysis/nmdc_mga04xnj45_gottcha2_krona.html", - "md5_checksum": "a012dc3a7b44774019c313fd8ee88efc", - "id": "nmdc:a012dc3a7b44774019c313fd8ee88efc", - "file_size_bytes": 233970 - }, - { - "name": "Gp0127626_Centrifuge classification TSV report", - "description": "Centrifuge classification TSV report for Gp0127626", - "data_object_type": "Centrifuge Taxonomic Classification", - "url": "https://data.microbiomedata.org/data/nmdc:mga04xnj45/ReadbasedAnalysis/nmdc_mga04xnj45_centrifuge_classification.tsv", - "md5_checksum": "dd4023a1488bdfc73b12c422b62b274a", - "id": "nmdc:dd4023a1488bdfc73b12c422b62b274a", - "file_size_bytes": 1673697764 - }, - { - "name": "Gp0127626_Centrifuge TSV report", - "description": "Centrifuge TSV report for Gp0127626", - "data_object_type": "Centrifuge Classification Report", - "url": "https://data.microbiomedata.org/data/nmdc:mga04xnj45/ReadbasedAnalysis/nmdc_mga04xnj45_centrifuge_report.tsv", - "md5_checksum": "2f9b1c55d52cc61affbe99f5163b48c8", - "id": "nmdc:2f9b1c55d52cc61affbe99f5163b48c8", - "file_size_bytes": 253730 - }, - { - "name": "Gp0127626_Centrifuge Krona HTML report", - "description": "Centrifuge Krona HTML report for Gp0127626", - "data_object_type": "Centrifuge Krona Plot", - "url": "https://data.microbiomedata.org/data/nmdc:mga04xnj45/ReadbasedAnalysis/nmdc_mga04xnj45_centrifuge_krona.html", - "md5_checksum": "ccf7f447a25ebf354ce44b3f1f90f223", - "id": "nmdc:ccf7f447a25ebf354ce44b3f1f90f223", - "file_size_bytes": 2327521 - }, - { - "name": "Gp0127626_Kraken classification TSV report", - "description": "Kraken classification TSV report for Gp0127626", - "data_object_type": "Kraken2 Taxonomic Classification", - "url": "https://data.microbiomedata.org/data/nmdc:mga04xnj45/ReadbasedAnalysis/nmdc_mga04xnj45_kraken2_classification.tsv", - "md5_checksum": "2c8efdb77cbcd1276c4fb386fd37bd6d", - "id": "nmdc:2c8efdb77cbcd1276c4fb386fd37bd6d", - "file_size_bytes": 1343921825 - }, - { - "name": "Gp0127626_Kraken2 TSV report", - "description": "Kraken2 TSV report for Gp0127626", - "data_object_type": "Kraken2 Classification Report", - "url": "https://data.microbiomedata.org/data/nmdc:mga04xnj45/ReadbasedAnalysis/nmdc_mga04xnj45_kraken2_report.tsv", - "md5_checksum": "806b27f1fa5a423100b113bb56edc708", - "id": "nmdc:806b27f1fa5a423100b113bb56edc708", - "file_size_bytes": 638478 - }, - { - "name": "Gp0127626_Kraken2 Krona HTML report", - "description": "Kraken2 Krona HTML report for Gp0127626", - "data_object_type": "Kraken2 Krona Plot", - "url": "https://data.microbiomedata.org/data/nmdc:mga04xnj45/ReadbasedAnalysis/nmdc_mga04xnj45_kraken2_krona.html", - "md5_checksum": "bb3e6793c4f036b9756f075d41846964", - "id": "nmdc:bb3e6793c4f036b9756f075d41846964", - "file_size_bytes": 3987411 - }, - { - "name": "Gp0127626_Assembled contigs fasta", - "description": "Assembled contigs fasta for Gp0127626", - "data_object_type": "Assembly Contigs", - "url": "https://data.microbiomedata.org/data/nmdc:mga04xnj45/assembly/nmdc_mga04xnj45_contigs.fna", - "md5_checksum": "6d72d9fb6a282f8872cd3d5b8ce1a29d", - "id": "nmdc:6d72d9fb6a282f8872cd3d5b8ce1a29d", - "file_size_bytes": 47315336 - }, - { - "name": "Gp0127626_Assembled scaffold fasta", - "description": "Assembled scaffold fasta for Gp0127626", - "data_object_type": "Assembly Scaffolds", - "url": "https://data.microbiomedata.org/data/nmdc:mga04xnj45/assembly/nmdc_mga04xnj45_scaffolds.fna", - "md5_checksum": "2d89ade1cc6267bb77b48daa176442f2", - "id": "nmdc:2d89ade1cc6267bb77b48daa176442f2", - "file_size_bytes": 46998743 - }, - { - "name": "Gp0127626_Metagenome Contig Coverage Stats", - "description": "Metagenome Contig Coverage Stats for Gp0127626", - "url": "https://data.microbiomedata.org/data/nmdc:mga04xnj45/assembly/nmdc_mga04xnj45_covstats.txt", - "md5_checksum": "79588f527e08eace069ddc63171f004c", - "id": "nmdc:79588f527e08eace069ddc63171f004c", - "file_size_bytes": 8270233 - }, - { - "name": "Gp0127626_Assembled AGP file", - "description": "Assembled AGP file for Gp0127626", - "data_object_type": "Assembly AGP", - "url": "https://data.microbiomedata.org/data/nmdc:mga04xnj45/assembly/nmdc_mga04xnj45_assembly.agp", - "md5_checksum": "cc855d3c15387d078c6919d1b19f8c05", - "id": "nmdc:cc855d3c15387d078c6919d1b19f8c05", - "file_size_bytes": 7690333 - }, - { - "name": "Gp0127626_Metagenome Alignment BAM file", - "description": "Metagenome Alignment BAM file for Gp0127626", - "data_object_type": "Assembly Coverage BAM", - "url": "https://data.microbiomedata.org/data/nmdc:mga04xnj45/assembly/nmdc_mga04xnj45_pairedMapped_sorted.bam", - "md5_checksum": "ef722a8ecd2b85d9202560df41eca7ed", - "id": "nmdc:ef722a8ecd2b85d9202560df41eca7ed", - "file_size_bytes": 2083099081 - }, - { - "name": "Gp0127626_Protein FAA", - "description": "Protein FAA for Gp0127626", - "data_object_type": "Annotation Amino Acid FASTA", - "url": "https://data.microbiomedata.org/data/nmdc:mga04xnj45/annotation/nmdc_mga04xnj45_proteins.faa", - "md5_checksum": "26360324fcaed21fd48b54972cce09cb", - "id": "nmdc:26360324fcaed21fd48b54972cce09cb", - "file_size_bytes": 28150597 - }, - { - "name": "Gp0127626_Structural annotation GFF file", - "description": "Structural annotation GFF file for Gp0127626", - "data_object_type": "Structural Annotation GFF", - "url": "https://data.microbiomedata.org/data/nmdc:mga04xnj45/annotation/nmdc_mga04xnj45_structural_annotation.gff", - "md5_checksum": "d2be135e631726360cf6ac23a3d56629", - "id": "nmdc:d2be135e631726360cf6ac23a3d56629", - "file_size_bytes": 2511 - }, - { - "name": "Gp0127626_Functional annotation GFF file", - "description": "Functional annotation GFF file for Gp0127626", - "data_object_type": "Functional Annotation GFF", - "url": "https://data.microbiomedata.org/data/nmdc:mga04xnj45/annotation/nmdc_mga04xnj45_functional_annotation.gff", - "md5_checksum": "b2fdf525bc1ddadb30427cba91c63483", - "id": "nmdc:b2fdf525bc1ddadb30427cba91c63483", - "file_size_bytes": 33351979 - }, - { - "name": "Gp0127626_KO TSV file", - "description": "KO TSV file for Gp0127626", - "data_object_type": "Annotation KEGG Orthology", - "url": "https://data.microbiomedata.org/data/nmdc:mga04xnj45/annotation/nmdc_mga04xnj45_ko.tsv", - "md5_checksum": "75ff61c1b51ace76d6e01930ae41c38c", - "id": "nmdc:75ff61c1b51ace76d6e01930ae41c38c", - "file_size_bytes": 3842650 - }, - { - "name": "Gp0127626_EC TSV file", - "description": "EC TSV file for Gp0127626", - "data_object_type": "Annotation Enzyme Commission", - "url": "https://data.microbiomedata.org/data/nmdc:mga04xnj45/annotation/nmdc_mga04xnj45_ec.tsv", - "md5_checksum": "4210daa7b1b0b84a6e5b6591e4e93c55", - "id": "nmdc:4210daa7b1b0b84a6e5b6591e4e93c55", - "file_size_bytes": 2561980 - }, - { - "name": "Gp0127626_COG GFF file", - "description": "COG GFF file for Gp0127626", - "url": "https://data.microbiomedata.org/data/nmdc:mga04xnj45/annotation/nmdc_mga04xnj45_cog.gff", - "md5_checksum": "cfd7a714b2e18f136d6dc48b9162e1c0", - "id": "nmdc:cfd7a714b2e18f136d6dc48b9162e1c0", - "file_size_bytes": 19108716 - }, - { - "name": "Gp0127626_PFAM GFF file", - "description": "PFAM GFF file for Gp0127626", - "url": "https://data.microbiomedata.org/data/nmdc:mga04xnj45/annotation/nmdc_mga04xnj45_pfam.gff", - "md5_checksum": "80a1ed51631f5fbc43032aa4afbfbf1d", - "id": "nmdc:80a1ed51631f5fbc43032aa4afbfbf1d", - "file_size_bytes": 13800768 - }, - { - "name": "Gp0127626_TigrFam GFF file", - "description": "TigrFam GFF file for Gp0127626", - "url": "https://data.microbiomedata.org/data/nmdc:mga04xnj45/annotation/nmdc_mga04xnj45_tigrfam.gff", - "md5_checksum": "52e64eec8c715affde1612b871e2490e", - "id": "nmdc:52e64eec8c715affde1612b871e2490e", - "file_size_bytes": 1446190 - }, - { - "name": "Gp0127626_SMART GFF file", - "description": "SMART GFF file for Gp0127626", - "url": "https://data.microbiomedata.org/data/nmdc:mga04xnj45/annotation/nmdc_mga04xnj45_smart.gff", - "md5_checksum": "4687e89ae41c98bc49ca81ded0b4c622", - "id": "nmdc:4687e89ae41c98bc49ca81ded0b4c622", - "file_size_bytes": 4252918 - }, - { - "name": "Gp0127626_SuperFam GFF file", - "description": "SuperFam GFF file for Gp0127626", - "url": "https://data.microbiomedata.org/data/nmdc:mga04xnj45/annotation/nmdc_mga04xnj45_supfam.gff", - "md5_checksum": "8cc7e6c8e232891c3ac7d952302905b6", - "id": "nmdc:8cc7e6c8e232891c3ac7d952302905b6", - "file_size_bytes": 24007157 - }, - { - "name": "Gp0127626_Cath FunFam GFF file", - "description": "Cath FunFam GFF file for Gp0127626", - "url": "https://data.microbiomedata.org/data/nmdc:mga04xnj45/annotation/nmdc_mga04xnj45_cath_funfam.gff", - "md5_checksum": "445ce659140104b37475c5c2e3fb7761", - "id": "nmdc:445ce659140104b37475c5c2e3fb7761", - "file_size_bytes": 17990080 - }, - { - "name": "Gp0127626_KO_EC GFF file", - "description": "KO_EC GFF file for Gp0127626", - "url": "https://data.microbiomedata.org/data/nmdc:mga04xnj45/annotation/nmdc_mga04xnj45_ko_ec.gff", - "md5_checksum": "32e15eb7eab763990dbb0ce947321718", - "id": "nmdc:32e15eb7eab763990dbb0ce947321718", - "file_size_bytes": 12235401 - }, - { - "name": "Gp0127626_metabat2 bin checkm quality assessment result", - "description": "metabat2 bin checkm quality assessment result for Gp0127626", - "data_object_type": "CheckM Statistics", - "url": "https://data.microbiomedata.org/data/nmdc:mga04xnj45/MAGs/nmdc_mga04xnj45_checkm_qa.out", - "md5_checksum": "66dea8d60f61c7a150ae4cbc3ce88757", - "id": "nmdc:66dea8d60f61c7a150ae4cbc3ce88757", - "file_size_bytes": 765 - }, - { - "name": "Gp0127626_high-quality and medium-quality bins", - "description": "high-quality and medium-quality bins for Gp0127626", - "data_object_type": "Metagenome Bins", - "url": "https://data.microbiomedata.org/data/nmdc:mga04xnj45/MAGs/nmdc_mga04xnj45_hqmq_bin.zip", - "md5_checksum": "1b0f148dc6a3a6a007482d1b03fe7e6a", - "id": "nmdc:1b0f148dc6a3a6a007482d1b03fe7e6a", - "file_size_bytes": 520239 - }, - { - "_id": { - "$oid": "649b003d1ae706d7b5b14e51" - }, - "description": "Metagenome Contig Coverage Stats for gold:Gp0127626", - "url": "https://data.microbiomedata.org/data/1781_100328/assembly/mapping_stats.txt", - "file_size_bytes": 7848645, - "type": "nmdc:DataObject", - "id": "nmdc:f12072d88720efdfb5cecb913d4a595f", - "name": "mapping_stats.txt", - "data_object_type": "Assembly Coverage Stats" - }, - { - "_id": { - "$oid": "649b003d1ae706d7b5b14e53" - }, - "description": "Assembled contigs fasta for gold:Gp0127626", - "url": "https://data.microbiomedata.org/data/1781_100328/assembly/assembly_contigs.fna", - "file_size_bytes": 46893748, - "type": "nmdc:DataObject", - "id": "nmdc:59e99f35194f3f98fa07d401dddd4959", - "name": "assembly_contigs.fna", - "data_object_type": "Assembly Contigs" - }, - { - "_id": { - "$oid": "649b003d1ae706d7b5b14e54" - }, - "description": "Assembled scaffold fasta for gold:Gp0127626", - "url": "https://data.microbiomedata.org/data/1781_100328/assembly/assembly_scaffolds.fna", - "file_size_bytes": 46577279, - "type": "nmdc:DataObject", - "id": "nmdc:8856365e5fa1681e630bca38b7376fd1", - "name": "assembly_scaffolds.fna", - "data_object_type": "Assembly Scaffolds" - }, - { - "_id": { - "$oid": "649b003d1ae706d7b5b14e56" - }, - "description": "Assembled AGP file for gold:Gp0127626", - "url": "https://data.microbiomedata.org/data/1781_100328/assembly/assembly.agp", - "file_size_bytes": 6846909, - "type": "nmdc:DataObject", - "id": "nmdc:f3ab16f91b806aff91f36167bc832f4a", - "name": "assembly.agp", - "data_object_type": "Assembly AGP" - }, - { - "_id": { - "$oid": "649b003d1ae706d7b5b14e57" - }, - "description": "Metagenome Alignment BAM file for gold:Gp0127626", - "url": "https://data.microbiomedata.org/data/1781_100328/assembly/pairedMapped_sorted.bam", - "file_size_bytes": 2056447451, - "type": "nmdc:DataObject", - "id": "nmdc:353e83f4603072d1fe5d15f4c193397f", - "name": "pairedMapped_sorted.bam", - "data_object_type": "Assembly Coverage BAM" - }, - { - "_id": { - "$oid": "649b003d1ae706d7b5b15b00" - }, - "id": "nmdc:3441b097a56424b593c10323e71636f7", - "name": "1781_100328.json", - "description": "Gold:Gp0127626 ReadbasedAnalysis result JSON file", - "url": "https://data.microbiomedata.org/1781_100328/ReadbasedAnalysis/1781_100328.json", - "file_size_bytes": 3385, - "type": "nmdc:DataObject" - }, - { - "_id": { - "$oid": "649b003d1ae706d7b5b15b08" - }, - "id": "nmdc:04f3b5daa5e47ce69c5c95dce5507f61", - "name": "1781_100328.krona.html", - "description": "Gold:Gp0127626 KRONA plot HTML file", - "url": "https://data.microbiomedata.org/1781_100328/ReadbasedAnalysis/centrifuge/1781_100328.krona.html", - "file_size_bytes": 3385, - "data_object_type": "Centrifuge Krona Plot", - "type": "nmdc:DataObject" - }, - { - "_id": { - "$oid": "649b003f1ae706d7b5b165c6" - }, - "id": "nmdc:4f708453d292f67572466be1e73f5e63", - "name": "bins.unbinned.fa", - "description": "unbinned fasta file from metabat2 for gold:Gp0127626", - "file_size_bytes": 4192133, - "url": "https://data.microbiomedata.org/data/1781_100328/img_MAGs/bins.unbinned.fa", - "type": "nmdc:DataObject" - }, - { - "_id": { - "$oid": "649b003f1ae706d7b5b165c9" - }, - "id": "nmdc:6ac7d20ce76667dbe7737db5074574c5", - "name": "bins.tooShort.fa", - "description": "tooShort (< 3kb) filtered contigs fasta file by metaBat2 for gold:Gp0127626", - "file_size_bytes": 40372419, - "url": "https://data.microbiomedata.org/data/1781_100328/img_MAGs/bins.tooShort.fa", - "type": "nmdc:DataObject" - }, - { - "_id": { - "$oid": "649b003f1ae706d7b5b165cc" - }, - "id": "nmdc:a38cd7c53173358f551112e22bffa7b3", - "name": "checkm_qa.out", - "description": "metabat2 bin checkm quality assessment result for gold:Gp0127626", - "file_size_bytes": 1224, - "url": "https://data.microbiomedata.org/data/1781_100328/img_MAGs/checkm_qa.out", - "type": "nmdc:DataObject", - "data_object_type": "CheckM Statistics" - }, - { - "_id": { - "$oid": "649b003f1ae706d7b5b165cd" - }, - "id": "nmdc:02a1905f0e4d6c3106d2d43932ad44d7", - "name": "gold:Gp0127626.bins.3.fa", - "description": "metabat2 binned contig file for gold:Gp0127626", - "file_size_bytes": 232694, - "url": "https://data.microbiomedata.org/data/1781_100328/img_MAGs/metabat-bins/bins.3.fa", - "type": "nmdc:DataObject", - "data_object_type": "Metagenome Bins" - }, - { - "_id": { - "$oid": "649b003f1ae706d7b5b165ce" - }, - "id": "nmdc:4473bba41dba6e11ed96dff91fd1b9e4", - "name": "gold:Gp0127626.bins.4.fa", - "description": "metabat2 binned contig file for gold:Gp0127626", - "file_size_bytes": 305463, - "url": "https://data.microbiomedata.org/data/1781_100328/img_MAGs/metabat-bins/bins.4.fa", - "type": "nmdc:DataObject", - "data_object_type": "Metagenome Bins" - }, - { - "_id": { - "$oid": "649b003f1ae706d7b5b165cf" - }, - "id": "nmdc:d0941c29cff73ecfa187a4e38108efab", - "name": "gold:Gp0127626.bins.2.fa", - "description": "metabat2 binned contig file for gold:Gp0127626", - "file_size_bytes": 437942, - "url": "https://data.microbiomedata.org/data/1781_100328/img_MAGs/metabat-bins/bins.2.fa", - "type": "nmdc:DataObject", - "data_object_type": "Metagenome Bins" - }, - { - "_id": { - "$oid": "649b003f1ae706d7b5b165d1" - }, - "id": "nmdc:98c71d88192e9665597c98b72266ae0f", - "name": "gold:Gp0127626.bins.1.fa", - "description": "metabat2 binned contig file for gold:Gp0127626", - "file_size_bytes": 515703, - "url": "https://data.microbiomedata.org/data/1781_100328/img_MAGs/metabat-bins/bins.1.fa", - "type": "nmdc:DataObject", - "data_object_type": "Metagenome Bins" - }, - { - "_id": { - "$oid": "649b003f1ae706d7b5b16ccb" - }, - "description": "EC TSV File for gold:Gp0127626", - "url": "https://data.microbiomedata.org/1781_100328/img_annotation/Ga0482244_ec.tsv", - "md5_checksum": "dde9a2b70a0552a8d6f7cda7f4862aa9", - "file_size_bytes": 3385, - "id": "nmdc:dde9a2b70a0552a8d6f7cda7f4862aa9", - "name": "gold:Gp0127626_EC TSV File", - "data_object_type": "Annotation Enzyme Commission", - "type": "nmdc:DataObject" - }, - { - "_id": { - "$oid": "649b003f1ae706d7b5b16ccc" - }, - "description": "KO TSV File for gold:Gp0127626", - "url": "https://data.microbiomedata.org/1781_100328/img_annotation/Ga0482244_ko.tsv", - "md5_checksum": "1f45d481e2882a15e7d060e47cbbfda3", - "file_size_bytes": 3385, - "id": "nmdc:1f45d481e2882a15e7d060e47cbbfda3", - "name": "gold:Gp0127626_KO TSV File", - "data_object_type": "Annotation KEGG Orthology", - "type": "nmdc:DataObject" - }, - { - "_id": { - "$oid": "649b003f1ae706d7b5b16ccf" - }, - "description": "Functional annotation GFF file for gold:Gp0127626", - "url": "https://data.microbiomedata.org/1781_100328/img_annotation/Ga0482244_functional_annotation.gff", - "md5_checksum": "8e19f17a8fd0747410b68d804b87139d", - "file_size_bytes": 3385, - "id": "nmdc:8e19f17a8fd0747410b68d804b87139d", - "name": "gold:Gp0127626_Functional annotation GFF file", - "data_object_type": "Functional Annotation GFF", - "type": "nmdc:DataObject" - }, - { - "_id": { - "$oid": "649b003f1ae706d7b5b16cd0" - }, - "description": "Structural annotation GFF file for gold:Gp0127626", - "url": "https://data.microbiomedata.org/1781_100328/img_annotation/Ga0482244_structural_annotation.gff", - "md5_checksum": "f1b6a4b001b67ec72eb5b5411e1321c9", - "file_size_bytes": 3385, - "id": "nmdc:f1b6a4b001b67ec72eb5b5411e1321c9", - "name": "gold:Gp0127626_Structural annotation GFF file", - "data_object_type": "Structural Annotation GFF", - "type": "nmdc:DataObject" - }, - { - "_id": { - "$oid": "649b003f1ae706d7b5b16cd2" - }, - "description": "Protein FAA for gold:Gp0127626", - "url": "https://data.microbiomedata.org/1781_100328/img_annotation/Ga0482244_proteins.faa", - "md5_checksum": "11741b35b589852f2b652d1f73afb663", - "file_size_bytes": 3385, - "id": "nmdc:11741b35b589852f2b652d1f73afb663", - "name": "gold:Gp0127626_Protein FAA", - "data_object_type": "Annotation Amino Acid FASTA", - "type": "nmdc:DataObject" - } - ], - "dissolving_activity_set": [], - "functional_annotation_set": [], - "genome_feature_set": [], - "mags_activity_set": [ - { - "_id": { - "$oid": "649b0052ec087f6bbab34712" - }, - "has_input": [ - "nmdc:6d72d9fb6a282f8872cd3d5b8ce1a29d", - "nmdc:ef722a8ecd2b85d9202560df41eca7ed", - "nmdc:b2fdf525bc1ddadb30427cba91c63483" - ], - "too_short_contig_num": 102702, - "part_of": [ - "nmdc:mga04xnj45" - ], - "binned_contig_num": 230, - "git_url": "https://github.com/microbiomedata/metaG/releases/tag/0.1", - "has_output": [ - "nmdc:66dea8d60f61c7a150ae4cbc3ce88757", - "nmdc:1b0f148dc6a3a6a007482d1b03fe7e6a" - ], - "was_informed_by": "gold:Gp0127626", - "input_contig_num": 105397, - "id": "nmdc:0ef6e04f4cefbcd9e2d3d2a7717baa3e", - "execution_resource": "NERSC-Cori", - "name": "MAGs Analysis Activity for nmdc:mga04xnj45", - "mags_list": [ - { - "number_of_contig": 230, - "completeness": 81.4, - "bin_name": "bins.1", - "gene_count": 2055, - "bin_quality": "MQ", - "gtdbtk_species": "", - "gtdbtk_order": "Nitrososphaerales", - "num_16s": 0, - "gtdbtk_family": "Nitrososphaeraceae", - "gtdbtk_domain": "Archaea", - "contamination": 2.43, - "gtdbtk_class": "Nitrososphaeria", - "gtdbtk_phylum": "Crenarchaeota", - "num_5s": 1, - "num_23s": 0, - "gtdbtk_genus": "", - "num_t_rna": 37 - } - ], - "unbinned_contig_num": 2465, - "started_at_time": "2021-12-01T21:31:29Z", - "type": "nmdc:MAGsAnalysisActivity", - "ended_at_time": "2021-12-02T20:54:56+00:00" - } - ], - "material_sample_set": [], - "material_sampling_activity_set": [], - "metabolomics_analysis_activity_set": [], - "metagenome_annotation_activity_set": [ - { - "_id": { - "$oid": "649b005bbf2caae0415ef9b1" - }, - "has_input": [ - "nmdc:6d72d9fb6a282f8872cd3d5b8ce1a29d" - ], - "part_of": [ - "nmdc:mga04xnj45" - ], - "git_url": "https://github.com/microbiomedata/metaG/releases/tag/0.1", - "has_output": [ - "nmdc:26360324fcaed21fd48b54972cce09cb", - "nmdc:d2be135e631726360cf6ac23a3d56629", - "nmdc:b2fdf525bc1ddadb30427cba91c63483", - "nmdc:75ff61c1b51ace76d6e01930ae41c38c", - "nmdc:4210daa7b1b0b84a6e5b6591e4e93c55", - "nmdc:cfd7a714b2e18f136d6dc48b9162e1c0", - "nmdc:80a1ed51631f5fbc43032aa4afbfbf1d", - "nmdc:52e64eec8c715affde1612b871e2490e", - "nmdc:4687e89ae41c98bc49ca81ded0b4c622", - "nmdc:8cc7e6c8e232891c3ac7d952302905b6", - "nmdc:445ce659140104b37475c5c2e3fb7761", - "nmdc:32e15eb7eab763990dbb0ce947321718" - ], - "was_informed_by": "gold:Gp0127626", - "id": "nmdc:0ef6e04f4cefbcd9e2d3d2a7717baa3e", - "execution_resource": "NERSC-Cori", - "name": "Annotation Activity for nmdc:mga04xnj45", - "started_at_time": "2021-12-01T21:31:29Z", - "type": "nmdc:MetagenomeAnnotationActivity", - "ended_at_time": "2021-12-02T20:54:56+00:00" - } - ], - "metagenome_assembly_set": [ - { - "_id": { - "$oid": "649b005f2ca5ee4adb139f9e" - }, - "has_input": [ - "nmdc:07499ad2f2b80f42bd7109732b1eef90" - ], - "part_of": [ - "nmdc:mga04xnj45" - ], - "ctg_logsum": 63429, - "scaf_logsum": 63657, - "gap_pct": 0.00092, - "git_url": "https://github.com/microbiomedata/metaG/releases/tag/0.1", - "has_output": [ - "nmdc:6d72d9fb6a282f8872cd3d5b8ce1a29d", - "nmdc:2d89ade1cc6267bb77b48daa176442f2", - "nmdc:79588f527e08eace069ddc63171f004c", - "nmdc:cc855d3c15387d078c6919d1b19f8c05", - "nmdc:ef722a8ecd2b85d9202560df41eca7ed" - ], - "asm_score": 7.629, - "was_informed_by": "gold:Gp0127626", - "ctg_powsum": 7359.443, - "scaf_max": 30685, - "id": "nmdc:0ef6e04f4cefbcd9e2d3d2a7717baa3e", - "scaf_powsum": 7386.413, - "execution_resource": "NERSC-Cori", - "contigs": 105397, - "name": "Assembly Activity for nmdc:mga04xnj45", - "ctg_max": 30685, - "gc_std": 0.09232, - "gc_avg": 0.60819, - "contig_bp": 43390261, - "started_at_time": "2021-12-01T21:31:29Z", - "scaf_bp": 43390661, - "type": "nmdc:MetagenomeAssembly", - "scaffolds": 105366, - "ended_at_time": "2021-12-02T20:54:56+00:00", - "ctg_l50": 368, - "ctg_l90": 284, - "ctg_n50": 34766, - "ctg_n90": 91597, - "scaf_l50": 368, - "scaf_l90": 284, - "scaf_n50": 34749, - "scaf_n90": 91567 - } - ], - "metagenome_sequencing_activity_set": [], - "metaproteomics_analysis_activity_set": [], - "metatranscriptome_activity_set": [], - "nom_analysis_activity_set": [], - "omics_processing_set": [ - { - "_id": { - "$oid": "649b009773e8249959349b3c" - }, - "id": "nmdc:omprc-11-s6wqag22", - "name": "Riverbed sediment microbial communities from areas with no vegetation in Columbia River, Washington, USA - GW-RW N3_40_50", - "description": "Riverbed sediment samples were collected from an area with no vegetation in a hyporheic zone (subsurface groundwater - surface water mixing zone)", - "has_input": [ - "nmdc:bsm-11-mxdygh62" - ], - "has_output": [ - "jgi:574fde7b7ded5e3df1ee1418" - ], - "part_of": [ - "nmdc:sty-11-aygzgv51" - ], - "add_date": "2016-01-11", - "mod_date": "2021-06-15", - "ncbi_project_name": "Riverbed sediment microbial communities from areas with no vegetation in Columbia River, Washington, USA - GW-RW N3_40_50", - "omics_type": { - "has_raw_value": "Metagenome" - }, - "principal_investigator": { - "has_raw_value": "James Stegen" - }, - "processing_institution": "JGI", - "type": "nmdc:OmicsProcessing", - "gold_sequencing_project_identifiers": [ - "gold:Gp0127626" - ] - } - ], - "reaction_activity_set": [], - "read_qc_analysis_activity_set": [ - { - "_id": { - "$oid": "649b009d6bdd4fd20273c86c" - }, - "has_input": [ - "nmdc:8bee270fc5b3a39f7e7609b60e191766" - ], - "part_of": [ - "nmdc:mga04xnj45" - ], - "git_url": "https://github.com/microbiomedata/metaG/releases/tag/0.1", - "has_output": [ - "nmdc:07499ad2f2b80f42bd7109732b1eef90", - "nmdc:9089d07fdee5ed03e901c1656206af02" - ], - "was_informed_by": "gold:Gp0127626", - "input_read_count": 24223170, - "output_read_bases": 3405205631, - "id": "nmdc:0ef6e04f4cefbcd9e2d3d2a7717baa3e", - "execution_resource": "NERSC-Cori", - "input_read_bases": 3657698670, - "name": "Read QC Activity for nmdc:mga04xnj45", - "output_read_count": 22768968, - "started_at_time": "2021-12-01T21:31:29Z", - "type": "nmdc:ReadQCAnalysisActivity", - "ended_at_time": "2021-12-02T20:54:56+00:00" - } - ], - "read_based_taxonomy_analysis_activity_set": [ - { - "_id": { - "$oid": "649b009bff710ae353f8cf30" - }, - "has_input": [ - "nmdc:07499ad2f2b80f42bd7109732b1eef90" - ], - "git_url": "https://github.com/microbiomedata/metaG/releases/tag/0.1", - "has_output": [ - "nmdc:a91f8dccb2baa53550216f5bdfbf1473", - "nmdc:a81ddf4e3bc044e8601554117cd887aa", - "nmdc:a012dc3a7b44774019c313fd8ee88efc", - "nmdc:dd4023a1488bdfc73b12c422b62b274a", - "nmdc:2f9b1c55d52cc61affbe99f5163b48c8", - "nmdc:ccf7f447a25ebf354ce44b3f1f90f223", - "nmdc:2c8efdb77cbcd1276c4fb386fd37bd6d", - "nmdc:806b27f1fa5a423100b113bb56edc708", - "nmdc:bb3e6793c4f036b9756f075d41846964" - ], - "was_informed_by": "gold:Gp0127626", - "id": "nmdc:0ef6e04f4cefbcd9e2d3d2a7717baa3e", - "execution_resource": "NERSC-Cori", - "name": "ReadBased Analysis Activity for nmdc:mga04xnj45", - "started_at_time": "2021-12-01T21:31:29Z", - "type": "nmdc:ReadbasedAnalysis", - "ended_at_time": "2021-12-02T20:54:56+00:00" - } - ], - "study_set": [], - "field_research_site_set": [], - "collecting_biosamples_from_site_set": [], - "date_created": null, - "etl_software_version": null, - "pooling_set": [] - }, - { - "functional_annotation_agg": [], - "library_preparation_set": [], - "processed_sample_set": [], - "extraction_set": [], - "activity_set": [], - "biosample_set": [], - "data_object_set": [ - { - "description": "Raw sequencer read data", - "file_size_bytes": 2150489977, - "type": "nmdc:DataObject", - "id": "jgi:574fde577ded5e3df1ee13fc", - "name": "10533.1.165310.ATAGCGG-ACCGCTA.fastq.gz" - }, - { - "name": "Gp0127624_Filtered Reads", - "description": "Filtered Reads for Gp0127624", - "data_object_type": "Filtered Sequencing Reads", - "url": "https://data.microbiomedata.org/data/nmdc:mga0e8jh10/qa/nmdc_mga0e8jh10_filtered.fastq.gz", - "md5_checksum": "8585f6896702bddf64b02191be5921f4", - "id": "nmdc:8585f6896702bddf64b02191be5921f4", - "file_size_bytes": 1795382596 - }, - { - "name": "Gp0127624_Filtered Stats", - "description": "Filtered Stats for Gp0127624", - "data_object_type": "QC Statistics", - "url": "https://data.microbiomedata.org/data/nmdc:mga0e8jh10/qa/nmdc_mga0e8jh10_filterStats.txt", - "md5_checksum": "b9b6464ecc746a4cc39b549696c5fe9c", - "id": "nmdc:b9b6464ecc746a4cc39b549696c5fe9c", - "file_size_bytes": 289 - }, - { - "name": "Gp0127624_Gottcha2 TSV report", - "description": "Gottcha2 TSV report for Gp0127624", - "url": "https://data.microbiomedata.org/data/nmdc:mga0e8jh10/ReadbasedAnalysis/nmdc_mga0e8jh10_gottcha2_report.tsv", - "md5_checksum": "fef871a81032dd1f3e57dc1c7d5aa3db", - "id": "nmdc:fef871a81032dd1f3e57dc1c7d5aa3db", - "file_size_bytes": 1500 - }, - { - "name": "Gp0127624_Gottcha2 full TSV report", - "description": "Gottcha2 full TSV report for Gp0127624", - "url": "https://data.microbiomedata.org/data/nmdc:mga0e8jh10/ReadbasedAnalysis/nmdc_mga0e8jh10_gottcha2_report_full.tsv", - "md5_checksum": "6c7fec765f2a225f168ebb1f69961013", - "id": "nmdc:6c7fec765f2a225f168ebb1f69961013", - "file_size_bytes": 692993 - }, - { - "name": "Gp0127624_Gottcha2 Krona HTML report", - "description": "Gottcha2 Krona HTML report for Gp0127624", - "data_object_type": "GOTTCHA2 Krona Plot", - "url": "https://data.microbiomedata.org/data/nmdc:mga0e8jh10/ReadbasedAnalysis/nmdc_mga0e8jh10_gottcha2_krona.html", - "md5_checksum": "6e660d5a062f9c3ad7b49d8d438453d7", - "id": "nmdc:6e660d5a062f9c3ad7b49d8d438453d7", - "file_size_bytes": 230779 - }, - { - "name": "Gp0127624_Centrifuge classification TSV report", - "description": "Centrifuge classification TSV report for Gp0127624", - "data_object_type": "Centrifuge Taxonomic Classification", - "url": "https://data.microbiomedata.org/data/nmdc:mga0e8jh10/ReadbasedAnalysis/nmdc_mga0e8jh10_centrifuge_classification.tsv", - "md5_checksum": "77db34862804280185d3b1ce961e5338", - "id": "nmdc:77db34862804280185d3b1ce961e5338", - "file_size_bytes": 1645928829 - }, - { - "name": "Gp0127624_Centrifuge TSV report", - "description": "Centrifuge TSV report for Gp0127624", - "data_object_type": "Centrifuge Classification Report", - "url": "https://data.microbiomedata.org/data/nmdc:mga0e8jh10/ReadbasedAnalysis/nmdc_mga0e8jh10_centrifuge_report.tsv", - "md5_checksum": "84e3efb84d961d189ece310911ccf475", - "id": "nmdc:84e3efb84d961d189ece310911ccf475", - "file_size_bytes": 254646 - }, - { - "name": "Gp0127624_Centrifuge Krona HTML report", - "description": "Centrifuge Krona HTML report for Gp0127624", - "data_object_type": "Centrifuge Krona Plot", - "url": "https://data.microbiomedata.org/data/nmdc:mga0e8jh10/ReadbasedAnalysis/nmdc_mga0e8jh10_centrifuge_krona.html", - "md5_checksum": "b8fd31679921f8b68c80917e14caa260", - "id": "nmdc:b8fd31679921f8b68c80917e14caa260", - "file_size_bytes": 2332082 - }, - { - "name": "Gp0127624_Kraken classification TSV report", - "description": "Kraken classification TSV report for Gp0127624", - "data_object_type": "Kraken2 Taxonomic Classification", - "url": "https://data.microbiomedata.org/data/nmdc:mga0e8jh10/ReadbasedAnalysis/nmdc_mga0e8jh10_kraken2_classification.tsv", - "md5_checksum": "715c66c69b621478da7d48481f3cbd1d", - "id": "nmdc:715c66c69b621478da7d48481f3cbd1d", - "file_size_bytes": 1316771556 - }, - { - "name": "Gp0127624_Kraken2 TSV report", - "description": "Kraken2 TSV report for Gp0127624", - "data_object_type": "Kraken2 Classification Report", - "url": "https://data.microbiomedata.org/data/nmdc:mga0e8jh10/ReadbasedAnalysis/nmdc_mga0e8jh10_kraken2_report.tsv", - "md5_checksum": "0781e8042688219035efafe7d75858d0", - "id": "nmdc:0781e8042688219035efafe7d75858d0", - "file_size_bytes": 626940 - }, - { - "name": "Gp0127624_Kraken2 Krona HTML report", - "description": "Kraken2 Krona HTML report for Gp0127624", - "data_object_type": "Kraken2 Krona Plot", - "url": "https://data.microbiomedata.org/data/nmdc:mga0e8jh10/ReadbasedAnalysis/nmdc_mga0e8jh10_kraken2_krona.html", - "md5_checksum": "85547ab860ef9d6877ba7abc8881740a", - "id": "nmdc:85547ab860ef9d6877ba7abc8881740a", - "file_size_bytes": 3921891 - }, - { - "name": "Gp0127624_Assembled contigs fasta", - "description": "Assembled contigs fasta for Gp0127624", - "data_object_type": "Assembly Contigs", - "url": "https://data.microbiomedata.org/data/nmdc:mga0e8jh10/assembly/nmdc_mga0e8jh10_contigs.fna", - "md5_checksum": "464a9db7a94e7e0646b1ff8b501d82f3", - "id": "nmdc:464a9db7a94e7e0646b1ff8b501d82f3", - "file_size_bytes": 95468011 - }, - { - "name": "Gp0127624_Assembled scaffold fasta", - "description": "Assembled scaffold fasta for Gp0127624", - "data_object_type": "Assembly Scaffolds", - "url": "https://data.microbiomedata.org/data/nmdc:mga0e8jh10/assembly/nmdc_mga0e8jh10_scaffolds.fna", - "md5_checksum": "0a50f88775f36e9238152f3319252853", - "id": "nmdc:0a50f88775f36e9238152f3319252853", - "file_size_bytes": 94893921 - }, - { - "name": "Gp0127624_Metagenome Contig Coverage Stats", - "description": "Metagenome Contig Coverage Stats for Gp0127624", - "url": "https://data.microbiomedata.org/data/nmdc:mga0e8jh10/assembly/nmdc_mga0e8jh10_covstats.txt", - "md5_checksum": "f0dc2f598fa06efbe99843bddaf54f60", - "id": "nmdc:f0dc2f598fa06efbe99843bddaf54f60", - "file_size_bytes": 15112642 - }, - { - "name": "Gp0127624_Assembled AGP file", - "description": "Assembled AGP file for Gp0127624", - "data_object_type": "Assembly AGP", - "url": "https://data.microbiomedata.org/data/nmdc:mga0e8jh10/assembly/nmdc_mga0e8jh10_assembly.agp", - "md5_checksum": "a4405d49e8efe2ee124d25e2414de56c", - "id": "nmdc:a4405d49e8efe2ee124d25e2414de56c", - "file_size_bytes": 14126849 - }, - { - "name": "Gp0127624_Metagenome Alignment BAM file", - "description": "Metagenome Alignment BAM file for Gp0127624", - "data_object_type": "Assembly Coverage BAM", - "url": "https://data.microbiomedata.org/data/nmdc:mga0e8jh10/assembly/nmdc_mga0e8jh10_pairedMapped_sorted.bam", - "md5_checksum": "8c37ab0b3594cc975348041e4841f6ac", - "id": "nmdc:8c37ab0b3594cc975348041e4841f6ac", - "file_size_bytes": 1976821836 - }, - { - "name": "Gp0127624_Protein FAA", - "description": "Protein FAA for Gp0127624", - "data_object_type": "Annotation Amino Acid FASTA", - "url": "https://data.microbiomedata.org/data/nmdc:mga0e8jh10/annotation/nmdc_mga0e8jh10_proteins.faa", - "md5_checksum": "40d15cb24063dbb6097fd1626f62db95", - "id": "nmdc:40d15cb24063dbb6097fd1626f62db95", - "file_size_bytes": 55458746 - }, - { - "name": "Gp0127624_Structural annotation GFF file", - "description": "Structural annotation GFF file for Gp0127624", - "data_object_type": "Structural Annotation GFF", - "url": "https://data.microbiomedata.org/data/nmdc:mga0e8jh10/annotation/nmdc_mga0e8jh10_structural_annotation.gff", - "md5_checksum": "f70325438abce4c6f56e6c82619dd44a", - "id": "nmdc:f70325438abce4c6f56e6c82619dd44a", - "file_size_bytes": 2518 - }, - { - "name": "Gp0127624_Functional annotation GFF file", - "description": "Functional annotation GFF file for Gp0127624", - "data_object_type": "Functional Annotation GFF", - "url": "https://data.microbiomedata.org/data/nmdc:mga0e8jh10/annotation/nmdc_mga0e8jh10_functional_annotation.gff", - "md5_checksum": "c5cf33c1f2f68a7c63fef6dd623a97c0", - "id": "nmdc:c5cf33c1f2f68a7c63fef6dd623a97c0", - "file_size_bytes": 63778960 - }, - { - "name": "Gp0127624_KO TSV file", - "description": "KO TSV file for Gp0127624", - "data_object_type": "Annotation KEGG Orthology", - "url": "https://data.microbiomedata.org/data/nmdc:mga0e8jh10/annotation/nmdc_mga0e8jh10_ko.tsv", - "md5_checksum": "4aca66fe81c8c056fa5617c7aa77bc7d", - "id": "nmdc:4aca66fe81c8c056fa5617c7aa77bc7d", - "file_size_bytes": 7252005 - }, - { - "name": "Gp0127624_EC TSV file", - "description": "EC TSV file for Gp0127624", - "data_object_type": "Annotation Enzyme Commission", - "url": "https://data.microbiomedata.org/data/nmdc:mga0e8jh10/annotation/nmdc_mga0e8jh10_ec.tsv", - "md5_checksum": "303a5e88a0eae8942082e9e13f9f6eba", - "id": "nmdc:303a5e88a0eae8942082e9e13f9f6eba", - "file_size_bytes": 4835920 - }, - { - "name": "Gp0127624_COG GFF file", - "description": "COG GFF file for Gp0127624", - "url": "https://data.microbiomedata.org/data/nmdc:mga0e8jh10/annotation/nmdc_mga0e8jh10_cog.gff", - "md5_checksum": "d919f65e54a8351324e332a5daa6a831", - "id": "nmdc:d919f65e54a8351324e332a5daa6a831", - "file_size_bytes": 37494199 - }, - { - "name": "Gp0127624_PFAM GFF file", - "description": "PFAM GFF file for Gp0127624", - "url": "https://data.microbiomedata.org/data/nmdc:mga0e8jh10/annotation/nmdc_mga0e8jh10_pfam.gff", - "md5_checksum": "764c7c2b5554fc6b860b036cab22e0ef", - "id": "nmdc:764c7c2b5554fc6b860b036cab22e0ef", - "file_size_bytes": 27739105 - }, - { - "name": "Gp0127624_TigrFam GFF file", - "description": "TigrFam GFF file for Gp0127624", - "url": "https://data.microbiomedata.org/data/nmdc:mga0e8jh10/annotation/nmdc_mga0e8jh10_tigrfam.gff", - "md5_checksum": "d0a86560767836f901bdd2625bea46e3", - "id": "nmdc:d0a86560767836f901bdd2625bea46e3", - "file_size_bytes": 3077428 - }, - { - "name": "Gp0127624_SMART GFF file", - "description": "SMART GFF file for Gp0127624", - "url": "https://data.microbiomedata.org/data/nmdc:mga0e8jh10/annotation/nmdc_mga0e8jh10_smart.gff", - "md5_checksum": "2f64111072a2b19a726aed9c9f54bba7", - "id": "nmdc:2f64111072a2b19a726aed9c9f54bba7", - "file_size_bytes": 8547849 - }, - { - "name": "Gp0127624_SuperFam GFF file", - "description": "SuperFam GFF file for Gp0127624", - "url": "https://data.microbiomedata.org/data/nmdc:mga0e8jh10/annotation/nmdc_mga0e8jh10_supfam.gff", - "md5_checksum": "51a011777869ff58b977991f5c90fc47", - "id": "nmdc:51a011777869ff58b977991f5c90fc47", - "file_size_bytes": 46844460 - }, - { - "name": "Gp0127624_Cath FunFam GFF file", - "description": "Cath FunFam GFF file for Gp0127624", - "url": "https://data.microbiomedata.org/data/nmdc:mga0e8jh10/annotation/nmdc_mga0e8jh10_cath_funfam.gff", - "md5_checksum": "53f57253df5119d338b9813aa81c7c9b", - "id": "nmdc:53f57253df5119d338b9813aa81c7c9b", - "file_size_bytes": 35558659 - }, - { - "name": "Gp0127624_KO_EC GFF file", - "description": "KO_EC GFF file for Gp0127624", - "url": "https://data.microbiomedata.org/data/nmdc:mga0e8jh10/annotation/nmdc_mga0e8jh10_ko_ec.gff", - "md5_checksum": "c4aa03608fa7442a05cd23fdcc29bc21", - "id": "nmdc:c4aa03608fa7442a05cd23fdcc29bc21", - "file_size_bytes": 23055213 - }, - { - "name": "gold:Gp0452679_metabat2 bin checkm quality assessment result", - "description": "metabat2 bin checkm quality assessment result for gold:Gp0452679", - "data_object_type": "CheckM Statistics", - "url": "https://data.microbiomedata.org/data/nmdc:mga0fsjy84/MAGs/nmdc_mga0fsjy84_checkm_qa.out", - "md5_checksum": "d41d8cd98f00b204e9800998ecf8427e", - "id": "nmdc:d41d8cd98f00b204e9800998ecf8427e", - "file_size_bytes": 0 - }, - { - "name": "Gp0127624_tooShort (< 3kb) filtered contigs fasta file by metaBat2", - "description": "tooShort (< 3kb) filtered contigs fasta file by metaBat2 for Gp0127624", - "url": "https://data.microbiomedata.org/data/nmdc:mga0e8jh10/MAGs/nmdc_mga0e8jh10_bins.tooShort.fa", - "md5_checksum": "73aca2cc587d8a632a730dcc6ff53d3b", - "id": "nmdc:73aca2cc587d8a632a730dcc6ff53d3b", - "file_size_bytes": 79198373 - }, - { - "name": "Gp0127624_unbinned fasta file from metabat2", - "description": "unbinned fasta file from metabat2 for Gp0127624", - "url": "https://data.microbiomedata.org/data/nmdc:mga0e8jh10/MAGs/nmdc_mga0e8jh10_bins.unbinned.fa", - "md5_checksum": "822be4fbeadb0c8c24f4a680d646b62f", - "id": "nmdc:822be4fbeadb0c8c24f4a680d646b62f", - "file_size_bytes": 13854717 - }, - { - "name": "Gp0127624_metabat2 bin checkm quality assessment result", - "description": "metabat2 bin checkm quality assessment result for Gp0127624", - "data_object_type": "CheckM Statistics", - "url": "https://data.microbiomedata.org/data/nmdc:mga0e8jh10/MAGs/nmdc_mga0e8jh10_checkm_qa.out", - "md5_checksum": "6b39bdb404c651428634ad28f8f15e2a", - "id": "nmdc:6b39bdb404c651428634ad28f8f15e2a", - "file_size_bytes": 1106 - }, - { - "name": "Gp0127624_high-quality and medium-quality bins", - "description": "high-quality and medium-quality bins for Gp0127624", - "data_object_type": "Metagenome Bins", - "url": "https://data.microbiomedata.org/data/nmdc:mga0e8jh10/MAGs/nmdc_mga0e8jh10_hqmq_bin.zip", - "md5_checksum": "0bd9d9e5f15087ccd35c38956bb3a210", - "id": "nmdc:0bd9d9e5f15087ccd35c38956bb3a210", - "file_size_bytes": 507790 - }, - { - "name": "Gp0127624_metabat2 bins", - "description": "metabat2 bins for Gp0127624", - "url": "https://data.microbiomedata.org/data/nmdc:mga0e8jh10/MAGs/nmdc_mga0e8jh10_metabat_bin.zip", - "md5_checksum": "2d174febedeca0ce515939dd53d6ccb9", - "id": "nmdc:2d174febedeca0ce515939dd53d6ccb9", - "file_size_bytes": 230699 - }, - { - "_id": { - "$oid": "649b003d1ae706d7b5b14e49" - }, - "description": "Assembled contigs fasta for gold:Gp0127624", - "url": "https://data.microbiomedata.org/data/1781_100326/assembly/assembly_contigs.fna", - "file_size_bytes": 94703971, - "type": "nmdc:DataObject", - "id": "nmdc:70c6cfaac2821e95aad6732da590276e", - "name": "assembly_contigs.fna", - "data_object_type": "Assembly Contigs" - }, - { - "_id": { - "$oid": "649b003d1ae706d7b5b14e4a" - }, - "description": "Assembled scaffold fasta for gold:Gp0127624", - "url": "https://data.microbiomedata.org/data/1781_100326/assembly/assembly_scaffolds.fna", - "file_size_bytes": 94130161, - "type": "nmdc:DataObject", - "id": "nmdc:3b26db32c98a95990057fb0a38d243ca", - "name": "assembly_scaffolds.fna", - "data_object_type": "Assembly Scaffolds" - }, - { - "_id": { - "$oid": "649b003d1ae706d7b5b14e4c" - }, - "description": "Assembled AGP file for gold:Gp0127624", - "url": "https://data.microbiomedata.org/data/1781_100326/assembly/assembly.agp", - "file_size_bytes": 12598209, - "type": "nmdc:DataObject", - "id": "nmdc:8f50a4da5f7f50b0271523331b484e18", - "name": "assembly.agp", - "data_object_type": "Assembly AGP" - }, - { - "_id": { - "$oid": "649b003d1ae706d7b5b14e4d" - }, - "description": "Metagenome Contig Coverage Stats for gold:Gp0127624", - "url": "https://data.microbiomedata.org/data/1781_100326/assembly/mapping_stats.txt", - "file_size_bytes": 14348602, - "type": "nmdc:DataObject", - "id": "nmdc:b7e396b2ead7ab3abf0b39139af1ba09", - "name": "mapping_stats.txt", - "data_object_type": "Assembly Coverage Stats" - }, - { - "_id": { - "$oid": "649b003d1ae706d7b5b14e4f" - }, - "description": "Metagenome Alignment BAM file for gold:Gp0127624", - "url": "https://data.microbiomedata.org/data/1781_100326/assembly/pairedMapped_sorted.bam", - "file_size_bytes": 1949775740, - "type": "nmdc:DataObject", - "id": "nmdc:d27b41f3c6392653daeb9b6bbc0277be", - "name": "pairedMapped_sorted.bam", - "data_object_type": "Assembly Coverage BAM" - }, - { - "_id": { - "$oid": "649b003d1ae706d7b5b15af3" - }, - "id": "nmdc:d84b79e2655d147759dfe8b579b7e4b9", - "name": "1781_100326.json", - "description": "Gold:Gp0127624 ReadbasedAnalysis result JSON file", - "url": "https://data.microbiomedata.org/1781_100326/ReadbasedAnalysis/1781_100326.json", - "file_size_bytes": 3385, - "type": "nmdc:DataObject" - }, - { - "_id": { - "$oid": "649b003d1ae706d7b5b15af4" - }, - "id": "nmdc:9670430bbf29ecc709d0e98a383ce37e", - "name": "1781_100326.krona.html", - "description": "Gold:Gp0127624 KRONA plot HTML file", - "url": "https://data.microbiomedata.org/1781_100326/ReadbasedAnalysis/centrifuge/1781_100326.krona.html", - "file_size_bytes": 3385, - "data_object_type": "Centrifuge Krona Plot", - "type": "nmdc:DataObject" - }, - { - "_id": { - "$oid": "649b003f1ae706d7b5b165b8" - }, - "id": "nmdc:d84b1a0fea4e91826aa72971c4580662", - "name": "bins.tooShort.fa", - "description": "tooShort (< 3kb) filtered contigs fasta file by metaBat2 for gold:Gp0127624", - "file_size_bytes": 76923851, - "url": "https://data.microbiomedata.org/data/1781_100326/img_MAGs/bins.tooShort.fa", - "type": "nmdc:DataObject" - }, - { - "_id": { - "$oid": "649b003f1ae706d7b5b165bb" - }, - "id": "nmdc:e47f7f56d18d80c1d06b96dac4fb1090", - "name": "gtdbtk.ar122.summary.tsv", - "description": "gtdbtk archaea assignment result summary table for gold:Gp0127624", - "file_size_bytes": 1004, - "url": "https://data.microbiomedata.org/data/1781_100326/img_MAGs/gtdbtk_output/classify/gtdbtk.ar122.summary.tsv", - "type": "nmdc:DataObject" - }, - { - "_id": { - "$oid": "649b003f1ae706d7b5b165bc" - }, - "id": "nmdc:72ae589425ade6d237d1fb6bb7f88dd8", - "name": "checkm_qa.out", - "description": "metabat2 bin checkm quality assessment result for gold:Gp0127624", - "file_size_bytes": 1256, - "url": "https://data.microbiomedata.org/data/1781_100326/img_MAGs/checkm_qa.out", - "type": "nmdc:DataObject", - "data_object_type": "CheckM Statistics" - }, - { - "_id": { - "$oid": "649b003f1ae706d7b5b165bd" - }, - "id": "nmdc:d95dfd1f76e7e53f84b231e534b3aba7", - "name": "gold:Gp0127624.bins.3.fa", - "description": "hqmq binned contig file for gold:Gp0127624", - "file_size_bytes": 958082, - "url": "https://data.microbiomedata.org/data/1781_100326/img_MAGs/hqmq-metabat-bins/bins.3.fa", - "type": "nmdc:DataObject", - "data_object_type": "Metagenome Bins" - }, - { - "_id": { - "$oid": "649b003f1ae706d7b5b165be" - }, - "id": "nmdc:8eec32d3611abd677f9849ca12dba02e", - "name": "bins.unbinned.fa", - "description": "unbinned fasta file from metabat2 for gold:Gp0127624", - "file_size_bytes": 14174146, - "url": "https://data.microbiomedata.org/data/1781_100326/img_MAGs/bins.unbinned.fa", - "type": "nmdc:DataObject" - }, - { - "_id": { - "$oid": "649b003f1ae706d7b5b165bf" - }, - "id": "nmdc:79950b448257c67890bd38c137557aa0", - "name": "gold:Gp0127624.bins.2.fa", - "description": "metabat2 binned contig file for gold:Gp0127624", - "file_size_bytes": 263657, - "url": "https://data.microbiomedata.org/data/1781_100326/img_MAGs/metabat-bins/bins.2.fa", - "type": "nmdc:DataObject", - "data_object_type": "Metagenome Bins" - }, - { - "_id": { - "$oid": "649b003f1ae706d7b5b165c0" - }, - "id": "nmdc:105f97b2c45693bd730d438123566b41", - "name": "gold:Gp0127624.bins.4.fa", - "description": "metabat2 binned contig file for gold:Gp0127624", - "file_size_bytes": 517581, - "url": "https://data.microbiomedata.org/data/1781_100326/img_MAGs/metabat-bins/bins.4.fa", - "type": "nmdc:DataObject", - "data_object_type": "Metagenome Bins" - }, - { - "_id": { - "$oid": "649b003f1ae706d7b5b165c1" - }, - "id": "nmdc:0907e3098e3adb4ce9d8bc4ad240191a", - "name": "gold:Gp0127624.bins.3.fa", - "description": "metabat2 binned contig file for gold:Gp0127624", - "file_size_bytes": 978794, - "url": "https://data.microbiomedata.org/data/1781_100326/img_MAGs/metabat-bins/bins.3.fa", - "type": "nmdc:DataObject", - "data_object_type": "Metagenome Bins" - }, - { - "_id": { - "$oid": "649b003f1ae706d7b5b165c2" - }, - "id": "nmdc:44c236036e8d7c34d7066b96066415b5", - "name": "gold:Gp0127624.bins.1.fa", - "description": "metabat2 binned contig file for gold:Gp0127624", - "file_size_bytes": 238087, - "url": "https://data.microbiomedata.org/data/1781_100326/img_MAGs/metabat-bins/bins.1.fa", - "type": "nmdc:DataObject", - "data_object_type": "Metagenome Bins" - }, - { - "_id": { - "$oid": "649b003f1ae706d7b5b16cc2" - }, - "description": "EC TSV File for gold:Gp0127624", - "url": "https://data.microbiomedata.org/1781_100326/img_annotation/Ga0482246_ec.tsv", - "md5_checksum": "73ebb84a8744552c890ad2508e313972", - "file_size_bytes": 3385, - "id": "nmdc:73ebb84a8744552c890ad2508e313972", - "name": "gold:Gp0127624_EC TSV File", - "data_object_type": "Annotation Enzyme Commission", - "type": "nmdc:DataObject" - }, - { - "_id": { - "$oid": "649b003f1ae706d7b5b16cc3" - }, - "description": "KO TSV File for gold:Gp0127624", - "url": "https://data.microbiomedata.org/1781_100326/img_annotation/Ga0482246_ko.tsv", - "md5_checksum": "89a4bb36ef225146a2ba0daaaea512fd", - "file_size_bytes": 3385, - "id": "nmdc:89a4bb36ef225146a2ba0daaaea512fd", - "name": "gold:Gp0127624_KO TSV File", - "data_object_type": "Annotation KEGG Orthology", - "type": "nmdc:DataObject" - }, - { - "_id": { - "$oid": "649b003f1ae706d7b5b16cc5" - }, - "description": "Protein FAA for gold:Gp0127624", - "url": "https://data.microbiomedata.org/1781_100326/img_annotation/Ga0482246_proteins.faa", - "md5_checksum": "075262a23b12fd4da073a973a5b6cf15", - "file_size_bytes": 3385, - "id": "nmdc:075262a23b12fd4da073a973a5b6cf15", - "name": "gold:Gp0127624_Protein FAA", - "data_object_type": "Annotation Amino Acid FASTA", - "type": "nmdc:DataObject" - }, - { - "_id": { - "$oid": "649b003f1ae706d7b5b16cc6" - }, - "description": "Structural annotation GFF file for gold:Gp0127624", - "url": "https://data.microbiomedata.org/1781_100326/img_annotation/Ga0482246_structural_annotation.gff", - "md5_checksum": "3fb3966095303ea8aa7f27bff3e9db50", - "file_size_bytes": 3385, - "id": "nmdc:3fb3966095303ea8aa7f27bff3e9db50", - "name": "gold:Gp0127624_Structural annotation GFF file", - "data_object_type": "Structural Annotation GFF", - "type": "nmdc:DataObject" - }, - { - "_id": { - "$oid": "649b003f1ae706d7b5b16cc7" - }, - "description": "Functional annotation GFF file for gold:Gp0127624", - "url": "https://data.microbiomedata.org/1781_100326/img_annotation/Ga0482246_functional_annotation.gff", - "md5_checksum": "6bcdfc58ee6b4eb5ae022c71636a88b4", - "file_size_bytes": 3385, - "id": "nmdc:6bcdfc58ee6b4eb5ae022c71636a88b4", - "name": "gold:Gp0127624_Functional annotation GFF file", - "data_object_type": "Functional Annotation GFF", - "type": "nmdc:DataObject" - } - ], - "dissolving_activity_set": [], - "functional_annotation_set": [], - "genome_feature_set": [], - "mags_activity_set": [ - { - "_id": { - "$oid": "649b0052ec087f6bbab34701" - }, - "has_input": [ - "nmdc:464a9db7a94e7e0646b1ff8b501d82f3", - "nmdc:8c37ab0b3594cc975348041e4841f6ac", - "nmdc:c5cf33c1f2f68a7c63fef6dd623a97c0" - ], - "too_short_contig_num": 182057, - "part_of": [ - "nmdc:mga0e8jh10" - ], - "binned_contig_num": 364, - "git_url": "https://github.com/microbiomedata/metaG/releases/tag/0.1", - "has_output": [ - "nmdc:d41d8cd98f00b204e9800998ecf8427e", - "nmdc:73aca2cc587d8a632a730dcc6ff53d3b", - "nmdc:822be4fbeadb0c8c24f4a680d646b62f", - "nmdc:6b39bdb404c651428634ad28f8f15e2a", - "nmdc:0bd9d9e5f15087ccd35c38956bb3a210", - "nmdc:2d174febedeca0ce515939dd53d6ccb9" - ], - "was_informed_by": "gold:Gp0127624", - "input_contig_num": 191010, - "id": "nmdc:20efb77f7b08a72b0c887c059686b7cf", - "execution_resource": "NERSC-Cori", - "name": "MAGs Analysis Activity for nmdc:mga0e8jh10", - "mags_list": [ - { - "number_of_contig": 69, - "completeness": 11.21, - "bin_name": "bins.1", - "gene_count": 328, - "bin_quality": "LQ", - "gtdbtk_species": "", - "gtdbtk_order": "", - "num_16s": 0, - "gtdbtk_family": "", - "gtdbtk_domain": "", - "contamination": 0.0, - "gtdbtk_class": "", - "gtdbtk_phylum": "", - "num_5s": 0, - "num_23s": 0, - "gtdbtk_genus": "", - "num_t_rna": 8 - }, - { - "number_of_contig": 194, - "completeness": 75.24, - "bin_name": "bins.2", - "gene_count": 2023, - "bin_quality": "MQ", - "gtdbtk_species": "", - "gtdbtk_order": "Nitrososphaerales", - "num_16s": 0, - "gtdbtk_family": "Nitrososphaeraceae", - "gtdbtk_domain": "Archaea", - "contamination": 1.78, - "gtdbtk_class": "Nitrososphaeria", - "gtdbtk_phylum": "Crenarchaeota", - "num_5s": 1, - "num_23s": 0, - "gtdbtk_genus": "", - "num_t_rna": 35 - }, - { - "number_of_contig": 101, - "completeness": 19.54, - "bin_name": "bins.3", - "gene_count": 585, - "bin_quality": "LQ", - "gtdbtk_species": "", - "gtdbtk_order": "", - "num_16s": 0, - "gtdbtk_family": "", - "gtdbtk_domain": "", - "contamination": 0.0, - "gtdbtk_class": "", - "gtdbtk_phylum": "", - "num_5s": 0, - "num_23s": 0, - "gtdbtk_genus": "", - "num_t_rna": 10 - } - ], - "unbinned_contig_num": 8589, - "started_at_time": "2021-10-11T02:23:30Z", - "type": "nmdc:MAGsAnalysisActivity", - "ended_at_time": "2021-10-11T03:30:59+00:00" - } - ], - "material_sample_set": [], - "material_sampling_activity_set": [], - "metabolomics_analysis_activity_set": [], - "metagenome_annotation_activity_set": [ - { - "_id": { - "$oid": "649b005bbf2caae0415ef9a2" - }, - "has_input": [ - "nmdc:464a9db7a94e7e0646b1ff8b501d82f3" - ], - "part_of": [ - "nmdc:mga0e8jh10" - ], - "git_url": "https://github.com/microbiomedata/metaG/releases/tag/0.1", - "has_output": [ - "nmdc:40d15cb24063dbb6097fd1626f62db95", - "nmdc:f70325438abce4c6f56e6c82619dd44a", - "nmdc:c5cf33c1f2f68a7c63fef6dd623a97c0", - "nmdc:4aca66fe81c8c056fa5617c7aa77bc7d", - "nmdc:303a5e88a0eae8942082e9e13f9f6eba", - "nmdc:d919f65e54a8351324e332a5daa6a831", - "nmdc:764c7c2b5554fc6b860b036cab22e0ef", - "nmdc:d0a86560767836f901bdd2625bea46e3", - "nmdc:2f64111072a2b19a726aed9c9f54bba7", - "nmdc:51a011777869ff58b977991f5c90fc47", - "nmdc:53f57253df5119d338b9813aa81c7c9b", - "nmdc:c4aa03608fa7442a05cd23fdcc29bc21" - ], - "was_informed_by": "gold:Gp0127624", - "id": "nmdc:20efb77f7b08a72b0c887c059686b7cf", - "execution_resource": "NERSC-Cori", - "name": "Annotation Activity for nmdc:mga0e8jh10", - "started_at_time": "2021-10-11T02:23:30Z", - "type": "nmdc:MetagenomeAnnotationActivity", - "ended_at_time": "2021-10-11T03:30:59+00:00" - } - ], - "metagenome_assembly_set": [ - { - "_id": { - "$oid": "649b005f2ca5ee4adb139f8d" - }, - "has_input": [ - "nmdc:8585f6896702bddf64b02191be5921f4" - ], - "part_of": [ - "nmdc:mga0e8jh10" - ], - "ctg_logsum": 174168, - "scaf_logsum": 174680, - "gap_pct": 0.0009, - "git_url": "https://github.com/microbiomedata/metaG/releases/tag/0.1", - "has_output": [ - "nmdc:464a9db7a94e7e0646b1ff8b501d82f3", - "nmdc:0a50f88775f36e9238152f3319252853", - "nmdc:f0dc2f598fa06efbe99843bddaf54f60", - "nmdc:a4405d49e8efe2ee124d25e2414de56c", - "nmdc:8c37ab0b3594cc975348041e4841f6ac" - ], - "asm_score": 5.95, - "was_informed_by": "gold:Gp0127624", - "ctg_powsum": 19404, - "scaf_max": 33408, - "id": "nmdc:20efb77f7b08a72b0c887c059686b7cf", - "scaf_powsum": 19462, - "execution_resource": "NERSC-Cori", - "contigs": 191010, - "name": "Assembly Activity for nmdc:mga0e8jh10", - "ctg_max": 33408, - "gc_std": 0.09154, - "contig_bp": 88102698, - "gc_avg": 0.62452, - "started_at_time": "2021-10-11T02:23:30Z", - "scaf_bp": 88103488, - "type": "nmdc:MetagenomeAssembly", - "scaffolds": 190940, - "ended_at_time": "2021-10-11T03:30:59+00:00", - "ctg_l50": 434, - "ctg_l90": 288, - "ctg_n50": 56361, - "ctg_n90": 162547, - "scaf_l50": 434, - "scaf_l90": 288, - "scaf_n50": 56334, - "scaf_n90": 162481 - } - ], - "metagenome_sequencing_activity_set": [], - "metaproteomics_analysis_activity_set": [], - "metatranscriptome_activity_set": [], - "nom_analysis_activity_set": [], - "omics_processing_set": [ - { - "_id": { - "$oid": "649b009773e8249959349b3d" - }, - "id": "nmdc:omprc-11-x0es2p18", - "name": "Riverbed sediment microbial communities from areas with no vegetation in Columbia River, Washington, USA - GW-RW N3_10_20", - "description": "Riverbed sediment samples were collected from an area with no vegetation in a hyporheic zone (subsurface groundwater - surface water mixing zone)", - "has_input": [ - "nmdc:bsm-11-msqbhe76" - ], - "has_output": [ - "jgi:574fde577ded5e3df1ee13fc" - ], - "part_of": [ - "nmdc:sty-11-aygzgv51" - ], - "add_date": "2016-01-11", - "mod_date": "2021-06-15", - "ncbi_project_name": "Riverbed sediment microbial communities from areas with no vegetation in Columbia River, Washington, USA - GW-RW N3_10_20", - "omics_type": { - "has_raw_value": "Metagenome" - }, - "principal_investigator": { - "has_raw_value": "James Stegen" - }, - "processing_institution": "JGI", - "type": "nmdc:OmicsProcessing", - "gold_sequencing_project_identifiers": [ - "gold:Gp0127624" - ] - } - ], - "reaction_activity_set": [], - "read_qc_analysis_activity_set": [ - { - "_id": { - "$oid": "649b009d6bdd4fd20273c85b" - }, - "has_input": [ - "nmdc:e24b00c4de7a24629f5933940070e06c" - ], - "part_of": [ - "nmdc:mga0e8jh10" - ], - "git_url": "https://github.com/microbiomedata/metaG/releases/tag/0.1", - "has_output": [ - "nmdc:8585f6896702bddf64b02191be5921f4", - "nmdc:b9b6464ecc746a4cc39b549696c5fe9c" - ], - "was_informed_by": "gold:Gp0127624", - "input_read_count": 25674112, - "output_read_bases": 3361311014, - "id": "nmdc:20efb77f7b08a72b0c887c059686b7cf", - "execution_resource": "NERSC-Cori", - "input_read_bases": 3876790912, - "name": "Read QC Activity for nmdc:mga0e8jh10", - "output_read_count": 22503352, - "started_at_time": "2021-10-11T02:23:30Z", - "type": "nmdc:ReadQCAnalysisActivity", - "ended_at_time": "2021-10-11T03:30:59+00:00" - } - ], - "read_based_taxonomy_analysis_activity_set": [ - { - "_id": { - "$oid": "649b009bff710ae353f8cf1c" - }, - "has_input": [ - "nmdc:8585f6896702bddf64b02191be5921f4" - ], - "git_url": "https://github.com/microbiomedata/metaG/releases/tag/0.1", - "has_output": [ - "nmdc:fef871a81032dd1f3e57dc1c7d5aa3db", - "nmdc:6c7fec765f2a225f168ebb1f69961013", - "nmdc:6e660d5a062f9c3ad7b49d8d438453d7", - "nmdc:77db34862804280185d3b1ce961e5338", - "nmdc:84e3efb84d961d189ece310911ccf475", - "nmdc:b8fd31679921f8b68c80917e14caa260", - "nmdc:715c66c69b621478da7d48481f3cbd1d", - "nmdc:0781e8042688219035efafe7d75858d0", - "nmdc:85547ab860ef9d6877ba7abc8881740a" - ], - "was_informed_by": "gold:Gp0127624", - "id": "nmdc:20efb77f7b08a72b0c887c059686b7cf", - "execution_resource": "NERSC-Cori", - "name": "ReadBased Analysis Activity for nmdc:mga0e8jh10", - "started_at_time": "2021-10-11T02:23:30Z", - "type": "nmdc:ReadbasedAnalysis", - "ended_at_time": "2021-10-11T03:30:59+00:00" - } - ], - "study_set": [], - "field_research_site_set": [], - "collecting_biosamples_from_site_set": [], - "date_created": null, - "etl_software_version": null, - "pooling_set": [] - }, - { - "functional_annotation_agg": [], - "library_preparation_set": [], - "processed_sample_set": [], - "extraction_set": [], - "activity_set": [], - "biosample_set": [], - "data_object_set": [ - { - "description": "Raw sequencer read data", - "file_size_bytes": 2057112594, - "type": "nmdc:DataObject", - "id": "jgi:574fde587ded5e3df1ee13fd", - "name": "10533.1.165310.CCAGTGT-AACACTG.fastq.gz" - }, - { - "name": "Gp0127629_Filtered Reads", - "description": "Filtered Reads for Gp0127629", - "data_object_type": "Filtered Sequencing Reads", - "url": "https://data.microbiomedata.org/data/nmdc:mga071r920/qa/nmdc_mga071r920_filtered.fastq.gz", - "md5_checksum": "0db98173ae3395106e24d250b2655f06", - "id": "nmdc:0db98173ae3395106e24d250b2655f06", - "file_size_bytes": 1807840952 - }, - { - "name": "Gp0127629_Filtered Stats", - "description": "Filtered Stats for Gp0127629", - "data_object_type": "QC Statistics", - "url": "https://data.microbiomedata.org/data/nmdc:mga071r920/qa/nmdc_mga071r920_filterStats.txt", - "md5_checksum": "bc0874c01bbd31c644cd598e2fdad3c4", - "id": "nmdc:bc0874c01bbd31c644cd598e2fdad3c4", - "file_size_bytes": 284 - }, - { - "name": "Gp0127629_Gottcha2 TSV report", - "description": "Gottcha2 TSV report for Gp0127629", - "url": "https://data.microbiomedata.org/data/nmdc:mga071r920/ReadbasedAnalysis/nmdc_mga071r920_gottcha2_report.tsv", - "md5_checksum": "f4f810491708ff25956cddd005cc9944", - "id": "nmdc:f4f810491708ff25956cddd005cc9944", - "file_size_bytes": 1206 - }, - { - "name": "Gp0127629_Gottcha2 full TSV report", - "description": "Gottcha2 full TSV report for Gp0127629", - "url": "https://data.microbiomedata.org/data/nmdc:mga071r920/ReadbasedAnalysis/nmdc_mga071r920_gottcha2_report_full.tsv", - "md5_checksum": "67e3c200d3765733af33d1db1f4bf968", - "id": "nmdc:67e3c200d3765733af33d1db1f4bf968", - "file_size_bytes": 662074 - }, - { - "name": "Gp0127629_Gottcha2 Krona HTML report", - "description": "Gottcha2 Krona HTML report for Gp0127629", - "data_object_type": "GOTTCHA2 Krona Plot", - "url": "https://data.microbiomedata.org/data/nmdc:mga071r920/ReadbasedAnalysis/nmdc_mga071r920_gottcha2_krona.html", - "md5_checksum": "26cd6390e8362da2ee1d7691360d2dfb", - "id": "nmdc:26cd6390e8362da2ee1d7691360d2dfb", - "file_size_bytes": 229307 - }, - { - "name": "Gp0127629_Centrifuge classification TSV report", - "description": "Centrifuge classification TSV report for Gp0127629", - "data_object_type": "Centrifuge Taxonomic Classification", - "url": "https://data.microbiomedata.org/data/nmdc:mga071r920/ReadbasedAnalysis/nmdc_mga071r920_centrifuge_classification.tsv", - "md5_checksum": "80fe705d97ef4a0701b1320e9ba19a82", - "id": "nmdc:80fe705d97ef4a0701b1320e9ba19a82", - "file_size_bytes": 1667543500 - }, - { - "name": "Gp0127629_Centrifuge TSV report", - "description": "Centrifuge TSV report for Gp0127629", - "data_object_type": "Centrifuge Classification Report", - "url": "https://data.microbiomedata.org/data/nmdc:mga071r920/ReadbasedAnalysis/nmdc_mga071r920_centrifuge_report.tsv", - "md5_checksum": "6a216ec913587e26ddc036b703126d76", - "id": "nmdc:6a216ec913587e26ddc036b703126d76", - "file_size_bytes": 253079 - }, - { - "name": "Gp0127629_Centrifuge Krona HTML report", - "description": "Centrifuge Krona HTML report for Gp0127629", - "data_object_type": "Centrifuge Krona Plot", - "url": "https://data.microbiomedata.org/data/nmdc:mga071r920/ReadbasedAnalysis/nmdc_mga071r920_centrifuge_krona.html", - "md5_checksum": "ebed7286f886596764a66a0d1dac3e43", - "id": "nmdc:ebed7286f886596764a66a0d1dac3e43", - "file_size_bytes": 2326900 - }, - { - "name": "Gp0127629_Kraken classification TSV report", - "description": "Kraken classification TSV report for Gp0127629", - "data_object_type": "Kraken2 Taxonomic Classification", - "url": "https://data.microbiomedata.org/data/nmdc:mga071r920/ReadbasedAnalysis/nmdc_mga071r920_kraken2_classification.tsv", - "md5_checksum": "80dd3584d257e8f84b59118ffd0d5e21", - "id": "nmdc:80dd3584d257e8f84b59118ffd0d5e21", - "file_size_bytes": 1328025421 - }, - { - "name": "Gp0127629_Kraken2 TSV report", - "description": "Kraken2 TSV report for Gp0127629", - "data_object_type": "Kraken2 Classification Report", - "url": "https://data.microbiomedata.org/data/nmdc:mga071r920/ReadbasedAnalysis/nmdc_mga071r920_kraken2_report.tsv", - "md5_checksum": "61b5fe5664ca99f6354c7a5a0222678c", - "id": "nmdc:61b5fe5664ca99f6354c7a5a0222678c", - "file_size_bytes": 628969 - }, - { - "name": "Gp0127629_Kraken2 Krona HTML report", - "description": "Kraken2 Krona HTML report for Gp0127629", - "data_object_type": "Kraken2 Krona Plot", - "url": "https://data.microbiomedata.org/data/nmdc:mga071r920/ReadbasedAnalysis/nmdc_mga071r920_kraken2_krona.html", - "md5_checksum": "81108175d5ef2ca158f516bfc75d3cd9", - "id": "nmdc:81108175d5ef2ca158f516bfc75d3cd9", - "file_size_bytes": 3933712 - }, - { - "name": "Gp0127629_Assembled contigs fasta", - "description": "Assembled contigs fasta for Gp0127629", - "data_object_type": "Assembly Contigs", - "url": "https://data.microbiomedata.org/data/nmdc:mga071r920/assembly/nmdc_mga071r920_contigs.fna", - "md5_checksum": "7badcefc26b24213b514cd4c3c9a87d7", - "id": "nmdc:7badcefc26b24213b514cd4c3c9a87d7", - "file_size_bytes": 109144090 - }, - { - "name": "Gp0127629_Assembled scaffold fasta", - "description": "Assembled scaffold fasta for Gp0127629", - "data_object_type": "Assembly Scaffolds", - "url": "https://data.microbiomedata.org/data/nmdc:mga071r920/assembly/nmdc_mga071r920_scaffolds.fna", - "md5_checksum": "89dd3c10791083ae5a5b30c2154deabd", - "id": "nmdc:89dd3c10791083ae5a5b30c2154deabd", - "file_size_bytes": 108517023 - }, - { - "name": "Gp0127629_Metagenome Contig Coverage Stats", - "description": "Metagenome Contig Coverage Stats for Gp0127629", - "url": "https://data.microbiomedata.org/data/nmdc:mga071r920/assembly/nmdc_mga071r920_covstats.txt", - "md5_checksum": "5e503e3abe6eb9e94c34a55da5bbafdc", - "id": "nmdc:5e503e3abe6eb9e94c34a55da5bbafdc", - "file_size_bytes": 16536925 - }, - { - "name": "Gp0127629_Assembled AGP file", - "description": "Assembled AGP file for Gp0127629", - "data_object_type": "Assembly AGP", - "url": "https://data.microbiomedata.org/data/nmdc:mga071r920/assembly/nmdc_mga071r920_assembly.agp", - "md5_checksum": "0a1f96cd74ec9f1a6668924745689014", - "id": "nmdc:0a1f96cd74ec9f1a6668924745689014", - "file_size_bytes": 15454045 - }, - { - "name": "Gp0127629_Metagenome Alignment BAM file", - "description": "Metagenome Alignment BAM file for Gp0127629", - "data_object_type": "Assembly Coverage BAM", - "url": "https://data.microbiomedata.org/data/nmdc:mga071r920/assembly/nmdc_mga071r920_pairedMapped_sorted.bam", - "md5_checksum": "1608f12840c36ac1d882cc6ef4f4627f", - "id": "nmdc:1608f12840c36ac1d882cc6ef4f4627f", - "file_size_bytes": 2001264626 - }, - { - "name": "Gp0127629_Protein FAA", - "description": "Protein FAA for Gp0127629", - "data_object_type": "Annotation Amino Acid FASTA", - "url": "https://data.microbiomedata.org/data/nmdc:mga071r920/annotation/nmdc_mga071r920_proteins.faa", - "md5_checksum": "ba15f54043fad473edec771b60f5b040", - "id": "nmdc:ba15f54043fad473edec771b60f5b040", - "file_size_bytes": 62222526 - }, - { - "name": "Gp0127629_Structural annotation GFF file", - "description": "Structural annotation GFF file for Gp0127629", - "data_object_type": "Structural Annotation GFF", - "url": "https://data.microbiomedata.org/data/nmdc:mga071r920/annotation/nmdc_mga071r920_structural_annotation.gff", - "md5_checksum": "f6d684abab1c60b2b95ade84644e6a38", - "id": "nmdc:f6d684abab1c60b2b95ade84644e6a38", - "file_size_bytes": 2521 - }, - { - "name": "Gp0127629_Functional annotation GFF file", - "description": "Functional annotation GFF file for Gp0127629", - "data_object_type": "Functional Annotation GFF", - "url": "https://data.microbiomedata.org/data/nmdc:mga071r920/annotation/nmdc_mga071r920_functional_annotation.gff", - "md5_checksum": "496e0fa5ac1c04849338c972189ee3f6", - "id": "nmdc:496e0fa5ac1c04849338c972189ee3f6", - "file_size_bytes": 70803412 - }, - { - "name": "Gp0127629_KO TSV file", - "description": "KO TSV file for Gp0127629", - "data_object_type": "Annotation KEGG Orthology", - "url": "https://data.microbiomedata.org/data/nmdc:mga071r920/annotation/nmdc_mga071r920_ko.tsv", - "md5_checksum": "311ffbbfc80f28908615a1f18492ae5e", - "id": "nmdc:311ffbbfc80f28908615a1f18492ae5e", - "file_size_bytes": 8203743 - }, - { - "name": "Gp0127629_EC TSV file", - "description": "EC TSV file for Gp0127629", - "data_object_type": "Annotation Enzyme Commission", - "url": "https://data.microbiomedata.org/data/nmdc:mga071r920/annotation/nmdc_mga071r920_ec.tsv", - "md5_checksum": "7d90dec4cdc8c8e8a4bc8f7bddf2e0c2", - "id": "nmdc:7d90dec4cdc8c8e8a4bc8f7bddf2e0c2", - "file_size_bytes": 5508974 - }, - { - "name": "Gp0127629_COG GFF file", - "description": "COG GFF file for Gp0127629", - "url": "https://data.microbiomedata.org/data/nmdc:mga071r920/annotation/nmdc_mga071r920_cog.gff", - "md5_checksum": "1116328ed7ba951246f0eec1d3f065b4", - "id": "nmdc:1116328ed7ba951246f0eec1d3f065b4", - "file_size_bytes": 42250648 - }, - { - "name": "Gp0127629_PFAM GFF file", - "description": "PFAM GFF file for Gp0127629", - "url": "https://data.microbiomedata.org/data/nmdc:mga071r920/annotation/nmdc_mga071r920_pfam.gff", - "md5_checksum": "325e47bc009aeba79fc767e3b6daeee2", - "id": "nmdc:325e47bc009aeba79fc767e3b6daeee2", - "file_size_bytes": 31677996 - }, - { - "name": "Gp0127629_TigrFam GFF file", - "description": "TigrFam GFF file for Gp0127629", - "url": "https://data.microbiomedata.org/data/nmdc:mga071r920/annotation/nmdc_mga071r920_tigrfam.gff", - "md5_checksum": "f820db8ce6a1ae7c3e8af40729f5b62b", - "id": "nmdc:f820db8ce6a1ae7c3e8af40729f5b62b", - "file_size_bytes": 3472661 - }, - { - "name": "Gp0127629_SMART GFF file", - "description": "SMART GFF file for Gp0127629", - "url": "https://data.microbiomedata.org/data/nmdc:mga071r920/annotation/nmdc_mga071r920_smart.gff", - "md5_checksum": "96ab6fa258a08490082b4f99269f3e8d", - "id": "nmdc:96ab6fa258a08490082b4f99269f3e8d", - "file_size_bytes": 9149681 - }, - { - "name": "Gp0127629_SuperFam GFF file", - "description": "SuperFam GFF file for Gp0127629", - "url": "https://data.microbiomedata.org/data/nmdc:mga071r920/annotation/nmdc_mga071r920_supfam.gff", - "md5_checksum": "a2b630c408bd557d693b147f95627fdc", - "id": "nmdc:a2b630c408bd557d693b147f95627fdc", - "file_size_bytes": 52308332 - }, - { - "name": "Gp0127629_Cath FunFam GFF file", - "description": "Cath FunFam GFF file for Gp0127629", - "url": "https://data.microbiomedata.org/data/nmdc:mga071r920/annotation/nmdc_mga071r920_cath_funfam.gff", - "md5_checksum": "ba87cd24242288e0b6d8f32a2bcbbb80", - "id": "nmdc:ba87cd24242288e0b6d8f32a2bcbbb80", - "file_size_bytes": 39926818 - }, - { - "name": "Gp0127629_KO_EC GFF file", - "description": "KO_EC GFF file for Gp0127629", - "url": "https://data.microbiomedata.org/data/nmdc:mga071r920/annotation/nmdc_mga071r920_ko_ec.gff", - "md5_checksum": "9c97bd7a5e4978e31ed1e5386c3619f3", - "id": "nmdc:9c97bd7a5e4978e31ed1e5386c3619f3", - "file_size_bytes": 26101397 - }, - { - "name": "gold:Gp0452679_metabat2 bin checkm quality assessment result", - "description": "metabat2 bin checkm quality assessment result for gold:Gp0452679", - "data_object_type": "CheckM Statistics", - "url": "https://data.microbiomedata.org/data/nmdc:mga0fsjy84/MAGs/nmdc_mga0fsjy84_checkm_qa.out", - "md5_checksum": "d41d8cd98f00b204e9800998ecf8427e", - "id": "nmdc:d41d8cd98f00b204e9800998ecf8427e", - "file_size_bytes": 0 - }, - { - "name": "Gp0127629_tooShort (< 3kb) filtered contigs fasta file by metaBat2", - "description": "tooShort (< 3kb) filtered contigs fasta file by metaBat2 for Gp0127629", - "url": "https://data.microbiomedata.org/data/nmdc:mga071r920/MAGs/nmdc_mga071r920_bins.tooShort.fa", - "md5_checksum": "2bbd475ff6a15058b38244e71456024a", - "id": "nmdc:2bbd475ff6a15058b38244e71456024a", - "file_size_bytes": 88674437 - }, - { - "name": "Gp0127629_unbinned fasta file from metabat2", - "description": "unbinned fasta file from metabat2 for Gp0127629", - "url": "https://data.microbiomedata.org/data/nmdc:mga071r920/MAGs/nmdc_mga071r920_bins.unbinned.fa", - "md5_checksum": "70901a70c06fdcfc71efa2d004e210fd", - "id": "nmdc:70901a70c06fdcfc71efa2d004e210fd", - "file_size_bytes": 19226945 - }, - { - "name": "Gp0127629_metabat2 bin checkm quality assessment result", - "description": "metabat2 bin checkm quality assessment result for Gp0127629", - "data_object_type": "CheckM Statistics", - "url": "https://data.microbiomedata.org/data/nmdc:mga071r920/MAGs/nmdc_mga071r920_checkm_qa.out", - "md5_checksum": "d52b4ae6b61161082fee7d42ecf5ee87", - "id": "nmdc:d52b4ae6b61161082fee7d42ecf5ee87", - "file_size_bytes": 978 - }, - { - "name": "Gp0127629_high-quality and medium-quality bins", - "description": "high-quality and medium-quality bins for Gp0127629", - "data_object_type": "Metagenome Bins", - "url": "https://data.microbiomedata.org/data/nmdc:mga071r920/MAGs/nmdc_mga071r920_hqmq_bin.zip", - "md5_checksum": "58d9cd30ca53424cd0f1ce27d0a8a885", - "id": "nmdc:58d9cd30ca53424cd0f1ce27d0a8a885", - "file_size_bytes": 182 - }, - { - "name": "Gp0127629_metabat2 bins", - "description": "metabat2 bins for Gp0127629", - "url": "https://data.microbiomedata.org/data/nmdc:mga071r920/MAGs/nmdc_mga071r920_metabat_bin.zip", - "md5_checksum": "8f4f5294de942734837fba3d68ffc6b4", - "id": "nmdc:8f4f5294de942734837fba3d68ffc6b4", - "file_size_bytes": 377953 - }, - { - "_id": { - "$oid": "649b003d1ae706d7b5b14e61" - }, - "description": "Assembled scaffold fasta for gold:Gp0127629", - "url": "https://data.microbiomedata.org/data/1781_100331/assembly/assembly_scaffolds.fna", - "file_size_bytes": 107683315, - "type": "nmdc:DataObject", - "id": "nmdc:eb0c17effb4ea272e31318eecbe890da", - "name": "assembly_scaffolds.fna", - "data_object_type": "Assembly Scaffolds" - }, - { - "_id": { - "$oid": "649b003d1ae706d7b5b14e62" - }, - "description": "Assembled contigs fasta for gold:Gp0127629", - "url": "https://data.microbiomedata.org/data/1781_100331/assembly/assembly_contigs.fna", - "file_size_bytes": 108309886, - "type": "nmdc:DataObject", - "id": "nmdc:9eed2da9f67c58f243329daf2289f40e", - "name": "assembly_contigs.fna", - "data_object_type": "Assembly Contigs" - }, - { - "_id": { - "$oid": "649b003d1ae706d7b5b14e63" - }, - "description": "Assembled AGP file for gold:Gp0127629", - "url": "https://data.microbiomedata.org/data/1781_100331/assembly/assembly.agp", - "file_size_bytes": 13784613, - "type": "nmdc:DataObject", - "id": "nmdc:0680d9887963e661ca943b8b2779d954", - "name": "assembly.agp", - "data_object_type": "Assembly AGP" - }, - { - "_id": { - "$oid": "649b003d1ae706d7b5b14e64" - }, - "description": "Metagenome Contig Coverage Stats for gold:Gp0127629", - "url": "https://data.microbiomedata.org/data/1781_100331/assembly/mapping_stats.txt", - "file_size_bytes": 15702721, - "type": "nmdc:DataObject", - "id": "nmdc:047d86c83005c22cf581cb6c092a6362", - "name": "mapping_stats.txt", - "data_object_type": "Assembly Coverage Stats" - }, - { - "_id": { - "$oid": "649b003d1ae706d7b5b14e67" - }, - "description": "Metagenome Alignment BAM file for gold:Gp0127629", - "url": "https://data.microbiomedata.org/data/1781_100331/assembly/pairedMapped_sorted.bam", - "file_size_bytes": 1973835303, - "type": "nmdc:DataObject", - "id": "nmdc:42c05d533d1a4ecaaa7367180a1b9b36", - "name": "pairedMapped_sorted.bam", - "data_object_type": "Assembly Coverage BAM" - }, - { - "_id": { - "$oid": "649b003d1ae706d7b5b15b18" - }, - "id": "nmdc:7d974a6b38d5572992a870fb6dbcfb24", - "name": "1781_100331.json", - "description": "Gold:Gp0127629 ReadbasedAnalysis result JSON file", - "url": "https://data.microbiomedata.org/1781_100331/ReadbasedAnalysis/1781_100331.json", - "file_size_bytes": 3385, - "type": "nmdc:DataObject" - }, - { - "_id": { - "$oid": "649b003d1ae706d7b5b15b1a" - }, - "id": "nmdc:728f96c592e66b7def86b6b6a227fd51", - "name": "1781_100331.krona.html", - "description": "Gold:Gp0127629 KRONA plot HTML file", - "url": "https://data.microbiomedata.org/1781_100331/ReadbasedAnalysis/centrifuge/1781_100331.krona.html", - "file_size_bytes": 3385, - "data_object_type": "Centrifuge Krona Plot", - "type": "nmdc:DataObject" - }, - { - "_id": { - "$oid": "649b003f1ae706d7b5b165e2" - }, - "id": "nmdc:26b0b714933a5d25157f88d0e491104b", - "name": "bins.unbinned.fa", - "description": "unbinned fasta file from metabat2 for gold:Gp0127629", - "file_size_bytes": 20329609, - "url": "https://data.microbiomedata.org/data/1781_100331/img_MAGs/bins.unbinned.fa", - "type": "nmdc:DataObject" - }, - { - "_id": { - "$oid": "649b003f1ae706d7b5b165fa" - }, - "id": "nmdc:3508c879a1c420c55e7325f8be819542", - "name": "bins.tooShort.fa", - "description": "tooShort (< 3kb) filtered contigs fasta file by metaBat2 for gold:Gp0127629", - "file_size_bytes": 86214629, - "url": "https://data.microbiomedata.org/data/1781_100331/img_MAGs/bins.tooShort.fa", - "type": "nmdc:DataObject" - }, - { - "_id": { - "$oid": "649b003f1ae706d7b5b16cda" - }, - "description": "KO TSV File for gold:Gp0127629", - "url": "https://data.microbiomedata.org/1781_100331/img_annotation/Ga0482241_ko.tsv", - "md5_checksum": "37fb326b25c1ae3caebddf668feadd76", - "file_size_bytes": 3385, - "id": "nmdc:37fb326b25c1ae3caebddf668feadd76", - "name": "gold:Gp0127629_KO TSV File", - "data_object_type": "Annotation KEGG Orthology", - "type": "nmdc:DataObject" - }, - { - "_id": { - "$oid": "649b003f1ae706d7b5b16cdb" - }, - "description": "Functional annotation GFF file for gold:Gp0127629", - "url": "https://data.microbiomedata.org/1781_100331/img_annotation/Ga0482241_functional_annotation.gff", - "md5_checksum": "75e43708767f06de878e1c2115714e0b", - "file_size_bytes": 3385, - "id": "nmdc:75e43708767f06de878e1c2115714e0b", - "name": "gold:Gp0127629_Functional annotation GFF file", - "data_object_type": "Functional Annotation GFF", - "type": "nmdc:DataObject" - }, - { - "_id": { - "$oid": "649b003f1ae706d7b5b16cdc" - }, - "description": "EC TSV File for gold:Gp0127629", - "url": "https://data.microbiomedata.org/1781_100331/img_annotation/Ga0482241_ec.tsv", - "md5_checksum": "f9211f36dc6992c2dfecd160987434c7", - "file_size_bytes": 3385, - "id": "nmdc:f9211f36dc6992c2dfecd160987434c7", - "name": "gold:Gp0127629_EC TSV File", - "data_object_type": "Annotation Enzyme Commission", - "type": "nmdc:DataObject" - }, - { - "_id": { - "$oid": "649b00401ae706d7b5b16ce1" - }, - "description": "Structural annotation GFF file for gold:Gp0127629", - "url": "https://data.microbiomedata.org/1781_100331/img_annotation/Ga0482241_structural_annotation.gff", - "md5_checksum": "9b3fb3e409e3d3128a8a43cc58d32a95", - "file_size_bytes": 3385, - "id": "nmdc:9b3fb3e409e3d3128a8a43cc58d32a95", - "name": "gold:Gp0127629_Structural annotation GFF file", - "data_object_type": "Structural Annotation GFF", - "type": "nmdc:DataObject" - }, - { - "_id": { - "$oid": "649b00401ae706d7b5b16ce2" - }, - "description": "Protein FAA for gold:Gp0127629", - "url": "https://data.microbiomedata.org/1781_100331/img_annotation/Ga0482241_proteins.faa", - "md5_checksum": "9559ebd9a8921ff8ae9f89c2ffcef6f7", - "file_size_bytes": 3385, - "id": "nmdc:9559ebd9a8921ff8ae9f89c2ffcef6f7", - "name": "gold:Gp0127629_Protein FAA", - "data_object_type": "Annotation Amino Acid FASTA", - "type": "nmdc:DataObject" - } - ], - "dissolving_activity_set": [], - "functional_annotation_set": [], - "genome_feature_set": [], - "mags_activity_set": [ - { - "_id": { - "$oid": "649b0052ec087f6bbab3470c" - }, - "has_input": [ - "nmdc:7badcefc26b24213b514cd4c3c9a87d7", - "nmdc:1608f12840c36ac1d882cc6ef4f4627f", - "nmdc:496e0fa5ac1c04849338c972189ee3f6" - ], - "too_short_contig_num": 195955, - "part_of": [ - "nmdc:mga071r920" - ], - "binned_contig_num": 271, - "git_url": "https://github.com/microbiomedata/metaG/releases/tag/0.1", - "has_output": [ - "nmdc:d41d8cd98f00b204e9800998ecf8427e", - "nmdc:2bbd475ff6a15058b38244e71456024a", - "nmdc:70901a70c06fdcfc71efa2d004e210fd", - "nmdc:d52b4ae6b61161082fee7d42ecf5ee87", - "nmdc:58d9cd30ca53424cd0f1ce27d0a8a885", - "nmdc:8f4f5294de942734837fba3d68ffc6b4" - ], - "was_informed_by": "gold:Gp0127629", - "input_contig_num": 208551, - "id": "nmdc:b82754c2c692809f9e59ff9824278c32", - "execution_resource": "NERSC-Cori", - "name": "MAGs Analysis Activity for nmdc:mga071r920", - "mags_list": [ - { - "number_of_contig": 177, - "completeness": 9.71, - "bin_name": "bins.1", - "gene_count": 1122, - "bin_quality": "LQ", - "gtdbtk_species": "", - "gtdbtk_order": "", - "num_16s": 0, - "gtdbtk_family": "", - "gtdbtk_domain": "", - "contamination": 0.0, - "gtdbtk_class": "", - "gtdbtk_phylum": "", - "num_5s": 1, - "num_23s": 0, - "gtdbtk_genus": "", - "num_t_rna": 14 - }, - { - "number_of_contig": 94, - "completeness": 16.81, - "bin_name": "bins.2", - "gene_count": 465, - "bin_quality": "LQ", - "gtdbtk_species": "", - "gtdbtk_order": "", - "num_16s": 0, - "gtdbtk_family": "", - "gtdbtk_domain": "", - "contamination": 0.34, - "gtdbtk_class": "", - "gtdbtk_phylum": "", - "num_5s": 0, - "num_23s": 0, - "gtdbtk_genus": "", - "num_t_rna": 5 - } - ], - "unbinned_contig_num": 12325, - "started_at_time": "2021-10-11T02:23:35Z", - "type": "nmdc:MAGsAnalysisActivity", - "ended_at_time": "2021-10-11T03:33:33+00:00" - } - ], - "material_sample_set": [], - "material_sampling_activity_set": [], - "metabolomics_analysis_activity_set": [], - "metagenome_annotation_activity_set": [ - { - "_id": { - "$oid": "649b005bbf2caae0415ef9af" - }, - "has_input": [ - "nmdc:7badcefc26b24213b514cd4c3c9a87d7" - ], - "part_of": [ - "nmdc:mga071r920" - ], - "git_url": "https://github.com/microbiomedata/metaG/releases/tag/0.1", - "has_output": [ - "nmdc:ba15f54043fad473edec771b60f5b040", - "nmdc:f6d684abab1c60b2b95ade84644e6a38", - "nmdc:496e0fa5ac1c04849338c972189ee3f6", - "nmdc:311ffbbfc80f28908615a1f18492ae5e", - "nmdc:7d90dec4cdc8c8e8a4bc8f7bddf2e0c2", - "nmdc:1116328ed7ba951246f0eec1d3f065b4", - "nmdc:325e47bc009aeba79fc767e3b6daeee2", - "nmdc:f820db8ce6a1ae7c3e8af40729f5b62b", - "nmdc:96ab6fa258a08490082b4f99269f3e8d", - "nmdc:a2b630c408bd557d693b147f95627fdc", - "nmdc:ba87cd24242288e0b6d8f32a2bcbbb80", - "nmdc:9c97bd7a5e4978e31ed1e5386c3619f3" - ], - "was_informed_by": "gold:Gp0127629", - "id": "nmdc:b82754c2c692809f9e59ff9824278c32", - "execution_resource": "NERSC-Cori", - "name": "Annotation Activity for nmdc:mga071r920", - "started_at_time": "2021-10-11T02:23:35Z", - "type": "nmdc:MetagenomeAnnotationActivity", - "ended_at_time": "2021-10-11T03:33:33+00:00" - } - ], - "metagenome_assembly_set": [ - { - "_id": { - "$oid": "649b005f2ca5ee4adb139f9a" - }, - "has_input": [ - "nmdc:0db98173ae3395106e24d250b2655f06" - ], - "part_of": [ - "nmdc:mga071r920" - ], - "ctg_logsum": 212258, - "scaf_logsum": 212917, - "gap_pct": 0.00151, - "git_url": "https://github.com/microbiomedata/metaG/releases/tag/0.1", - "has_output": [ - "nmdc:7badcefc26b24213b514cd4c3c9a87d7", - "nmdc:89dd3c10791083ae5a5b30c2154deabd", - "nmdc:5e503e3abe6eb9e94c34a55da5bbafdc", - "nmdc:0a1f96cd74ec9f1a6668924745689014", - "nmdc:1608f12840c36ac1d882cc6ef4f4627f" - ], - "asm_score": 3.305, - "was_informed_by": "gold:Gp0127629", - "ctg_powsum": 22751, - "scaf_max": 23996, - "id": "nmdc:b82754c2c692809f9e59ff9824278c32", - "scaf_powsum": 22826, - "execution_resource": "NERSC-Cori", - "contigs": 208553, - "name": "Assembly Activity for nmdc:mga071r920", - "ctg_max": 23996, - "gc_std": 0.1053, - "contig_bp": 101011771, - "gc_avg": 0.62056, - "started_at_time": "2021-10-11T02:23:35Z", - "scaf_bp": 101013301, - "type": "nmdc:MetagenomeAssembly", - "scaffolds": 208427, - "ended_at_time": "2021-10-11T03:33:33+00:00", - "ctg_l50": 478, - "ctg_l90": 290, - "ctg_n50": 59884, - "ctg_n90": 174522, - "scaf_l50": 478, - "scaf_l90": 290, - "scaf_n50": 59864, - "scaf_n90": 174416 - } - ], - "metagenome_sequencing_activity_set": [], - "metaproteomics_analysis_activity_set": [], - "metatranscriptome_activity_set": [], - "nom_analysis_activity_set": [], - "omics_processing_set": [ - { - "_id": { - "$oid": "649b009773e8249959349b3e" - }, - "id": "nmdc:omprc-11-1nvcer55", - "name": "Riverbed sediment microbial communities from areas with vegetation nearby in Columbia River, Washington, USA - GW-RW S3_10_20", - "description": "Riverbed sediment samples were collected from an area with a lot of surrounding vegetation in a hyporheic zone (subsurface groundwater - surface water mixing zone)", - "has_input": [ - "nmdc:bsm-11-3sfanv57" - ], - "has_output": [ - "jgi:574fde587ded5e3df1ee13fd" - ], - "part_of": [ - "nmdc:sty-11-aygzgv51" - ], - "add_date": "2016-01-11", - "mod_date": "2021-06-15", - "ncbi_project_name": "Riverbed sediment microbial communities from areas with vegetation nearby in Columbia River, Washington, USA - GW-RW S3_10_20", - "omics_type": { - "has_raw_value": "Metagenome" - }, - "principal_investigator": { - "has_raw_value": "James Stegen" - }, - "processing_institution": "JGI", - "type": "nmdc:OmicsProcessing", - "gold_sequencing_project_identifiers": [ - "gold:Gp0127629" - ] - } - ], - "reaction_activity_set": [], - "read_qc_analysis_activity_set": [ - { - "_id": { - "$oid": "649b009d6bdd4fd20273c869" - }, - "has_input": [ - "nmdc:22f8150866c51b35726066d2ec13c5ca" - ], - "part_of": [ - "nmdc:mga071r920" - ], - "git_url": "https://github.com/microbiomedata/metaG/releases/tag/0.1", - "has_output": [ - "nmdc:0db98173ae3395106e24d250b2655f06", - "nmdc:bc0874c01bbd31c644cd598e2fdad3c4" - ], - "was_informed_by": "gold:Gp0127629", - "input_read_count": 23886420, - "output_read_bases": 3395256515, - "id": "nmdc:b82754c2c692809f9e59ff9824278c32", - "execution_resource": "NERSC-Cori", - "input_read_bases": 3606849420, - "name": "Read QC Activity for nmdc:mga071r920", - "output_read_count": 22738452, - "started_at_time": "2021-10-11T02:23:35Z", - "type": "nmdc:ReadQCAnalysisActivity", - "ended_at_time": "2021-10-11T03:33:33+00:00" - } - ], - "read_based_taxonomy_analysis_activity_set": [ - { - "_id": { - "$oid": "649b009bff710ae353f8cf2b" - }, - "has_input": [ - "nmdc:0db98173ae3395106e24d250b2655f06" - ], - "git_url": "https://github.com/microbiomedata/metaG/releases/tag/0.1", - "has_output": [ - "nmdc:f4f810491708ff25956cddd005cc9944", - "nmdc:67e3c200d3765733af33d1db1f4bf968", - "nmdc:26cd6390e8362da2ee1d7691360d2dfb", - "nmdc:80fe705d97ef4a0701b1320e9ba19a82", - "nmdc:6a216ec913587e26ddc036b703126d76", - "nmdc:ebed7286f886596764a66a0d1dac3e43", - "nmdc:80dd3584d257e8f84b59118ffd0d5e21", - "nmdc:61b5fe5664ca99f6354c7a5a0222678c", - "nmdc:81108175d5ef2ca158f516bfc75d3cd9" - ], - "was_informed_by": "gold:Gp0127629", - "id": "nmdc:b82754c2c692809f9e59ff9824278c32", - "execution_resource": "NERSC-Cori", - "name": "ReadBased Analysis Activity for nmdc:mga071r920", - "started_at_time": "2021-10-11T02:23:35Z", - "type": "nmdc:ReadbasedAnalysis", - "ended_at_time": "2021-10-11T03:33:33+00:00" - } - ], - "study_set": [], - "field_research_site_set": [], - "collecting_biosamples_from_site_set": [], - "date_created": null, - "etl_software_version": null, - "pooling_set": [] - }, - { - "functional_annotation_agg": [], - "library_preparation_set": [], - "processed_sample_set": [], - "extraction_set": [], - "activity_set": [], - "biosample_set": [], - "data_object_set": [ - { - "description": "Raw sequencer read data", - "file_size_bytes": 2825784199, - "type": "nmdc:DataObject", - "id": "jgi:574fe09a7ded5e3df1ee1485", - "name": "10533.3.165334.CTGACAC-TGTGTCA.fastq.gz" - }, - { - "name": "Gp0127628_Filtered Reads", - "description": "Filtered Reads for Gp0127628", - "data_object_type": "Filtered Sequencing Reads", - "url": "https://data.microbiomedata.org/data/nmdc:mga0x5c381/qa/nmdc_mga0x5c381_filtered.fastq.gz", - "md5_checksum": "f6f1760721d73fc57919b2115a1d47ec", - "id": "nmdc:f6f1760721d73fc57919b2115a1d47ec", - "file_size_bytes": 2548975208 - }, - { - "name": "Gp0127628_Filtered Stats", - "description": "Filtered Stats for Gp0127628", - "data_object_type": "QC Statistics", - "url": "https://data.microbiomedata.org/data/nmdc:mga0x5c381/qa/nmdc_mga0x5c381_filterStats.txt", - "md5_checksum": "2225f9d41343590d818186fa2d66852d", - "id": "nmdc:2225f9d41343590d818186fa2d66852d", - "file_size_bytes": 291 - }, - { - "name": "Gp0127628_Gottcha2 TSV report", - "description": "Gottcha2 TSV report for Gp0127628", - "url": "https://data.microbiomedata.org/data/nmdc:mga0x5c381/ReadbasedAnalysis/nmdc_mga0x5c381_gottcha2_report.tsv", - "md5_checksum": "a6ed9af48a9ad473ab66721829a5c226", - "id": "nmdc:a6ed9af48a9ad473ab66721829a5c226", - "file_size_bytes": 3472 - }, - { - "name": "Gp0127628_Gottcha2 full TSV report", - "description": "Gottcha2 full TSV report for Gp0127628", - "url": "https://data.microbiomedata.org/data/nmdc:mga0x5c381/ReadbasedAnalysis/nmdc_mga0x5c381_gottcha2_report_full.tsv", - "md5_checksum": "335dbf6f1055de0950988a002f432c0b", - "id": "nmdc:335dbf6f1055de0950988a002f432c0b", - "file_size_bytes": 863867 - }, - { - "name": "Gp0127628_Gottcha2 Krona HTML report", - "description": "Gottcha2 Krona HTML report for Gp0127628", - "data_object_type": "GOTTCHA2 Krona Plot", - "url": "https://data.microbiomedata.org/data/nmdc:mga0x5c381/ReadbasedAnalysis/nmdc_mga0x5c381_gottcha2_krona.html", - "md5_checksum": "35da19bc0e50db1f9a02fe1550d1df0e", - "id": "nmdc:35da19bc0e50db1f9a02fe1550d1df0e", - "file_size_bytes": 234974 - }, - { - "name": "Gp0127628_Centrifuge classification TSV report", - "description": "Centrifuge classification TSV report for Gp0127628", - "data_object_type": "Centrifuge Taxonomic Classification", - "url": "https://data.microbiomedata.org/data/nmdc:mga0x5c381/ReadbasedAnalysis/nmdc_mga0x5c381_centrifuge_classification.tsv", - "md5_checksum": "224085164a389c6f207967ed03b3e6af", - "id": "nmdc:224085164a389c6f207967ed03b3e6af", - "file_size_bytes": 2220789142 - }, - { - "name": "Gp0127628_Centrifuge TSV report", - "description": "Centrifuge TSV report for Gp0127628", - "data_object_type": "Centrifuge Classification Report", - "url": "https://data.microbiomedata.org/data/nmdc:mga0x5c381/ReadbasedAnalysis/nmdc_mga0x5c381_centrifuge_report.tsv", - "md5_checksum": "39ba17263c144761a8bdcc1645c034f5", - "id": "nmdc:39ba17263c144761a8bdcc1645c034f5", - "file_size_bytes": 257030 - }, - { - "name": "Gp0127628_Centrifuge Krona HTML report", - "description": "Centrifuge Krona HTML report for Gp0127628", - "data_object_type": "Centrifuge Krona Plot", - "url": "https://data.microbiomedata.org/data/nmdc:mga0x5c381/ReadbasedAnalysis/nmdc_mga0x5c381_centrifuge_krona.html", - "md5_checksum": "84debc9bd1c09328d60f073d7fc2db4f", - "id": "nmdc:84debc9bd1c09328d60f073d7fc2db4f", - "file_size_bytes": 2337568 - }, - { - "name": "Gp0127628_Kraken classification TSV report", - "description": "Kraken classification TSV report for Gp0127628", - "data_object_type": "Kraken2 Taxonomic Classification", - "url": "https://data.microbiomedata.org/data/nmdc:mga0x5c381/ReadbasedAnalysis/nmdc_mga0x5c381_kraken2_classification.tsv", - "md5_checksum": "8f75800abbcf5a94043ad677d7cb975c", - "id": "nmdc:8f75800abbcf5a94043ad677d7cb975c", - "file_size_bytes": 1776487262 - }, - { - "name": "Gp0127628_Kraken2 TSV report", - "description": "Kraken2 TSV report for Gp0127628", - "data_object_type": "Kraken2 Classification Report", - "url": "https://data.microbiomedata.org/data/nmdc:mga0x5c381/ReadbasedAnalysis/nmdc_mga0x5c381_kraken2_report.tsv", - "md5_checksum": "aae9e961d8ed716457616c8a8841037b", - "id": "nmdc:aae9e961d8ed716457616c8a8841037b", - "file_size_bytes": 664011 - }, - { - "name": "Gp0127628_Kraken2 Krona HTML report", - "description": "Kraken2 Krona HTML report for Gp0127628", - "data_object_type": "Kraken2 Krona Plot", - "url": "https://data.microbiomedata.org/data/nmdc:mga0x5c381/ReadbasedAnalysis/nmdc_mga0x5c381_kraken2_krona.html", - "md5_checksum": "ba83d6ab837403f4bcbc9400a0460457", - "id": "nmdc:ba83d6ab837403f4bcbc9400a0460457", - "file_size_bytes": 4035375 - }, - { - "name": "Gp0127628_Assembled contigs fasta", - "description": "Assembled contigs fasta for Gp0127628", - "data_object_type": "Assembly Contigs", - "url": "https://data.microbiomedata.org/data/nmdc:mga0x5c381/assembly/nmdc_mga0x5c381_contigs.fna", - "md5_checksum": "9e550afb3bcd8d66807f861ecfed815b", - "id": "nmdc:9e550afb3bcd8d66807f861ecfed815b", - "file_size_bytes": 74277737 - }, - { - "name": "Gp0127628_Assembled scaffold fasta", - "description": "Assembled scaffold fasta for Gp0127628", - "data_object_type": "Assembly Scaffolds", - "url": "https://data.microbiomedata.org/data/nmdc:mga0x5c381/assembly/nmdc_mga0x5c381_scaffolds.fna", - "md5_checksum": "5e79fce62ffa8c4479be5159143797e0", - "id": "nmdc:5e79fce62ffa8c4479be5159143797e0", - "file_size_bytes": 73802989 - }, - { - "name": "Gp0127628_Metagenome Contig Coverage Stats", - "description": "Metagenome Contig Coverage Stats for Gp0127628", - "url": "https://data.microbiomedata.org/data/nmdc:mga0x5c381/assembly/nmdc_mga0x5c381_covstats.txt", - "md5_checksum": "682fd042d6adcd93f75c3eae2cf32241", - "id": "nmdc:682fd042d6adcd93f75c3eae2cf32241", - "file_size_bytes": 12462125 - }, - { - "name": "Gp0127628_Assembled AGP file", - "description": "Assembled AGP file for Gp0127628", - "data_object_type": "Assembly AGP", - "url": "https://data.microbiomedata.org/data/nmdc:mga0x5c381/assembly/nmdc_mga0x5c381_assembly.agp", - "md5_checksum": "9d607ebd92ad5bcbaaa405884d4a83a3", - "id": "nmdc:9d607ebd92ad5bcbaaa405884d4a83a3", - "file_size_bytes": 11636352 - }, - { - "name": "Gp0127628_Metagenome Alignment BAM file", - "description": "Metagenome Alignment BAM file for Gp0127628", - "data_object_type": "Assembly Coverage BAM", - "url": "https://data.microbiomedata.org/data/nmdc:mga0x5c381/assembly/nmdc_mga0x5c381_pairedMapped_sorted.bam", - "md5_checksum": "9163caaba1f60d1af9a551559069ca08", - "id": "nmdc:9163caaba1f60d1af9a551559069ca08", - "file_size_bytes": 2743529039 - }, - { - "name": "Gp0127628_Protein FAA", - "description": "Protein FAA for Gp0127628", - "data_object_type": "Annotation Amino Acid FASTA", - "url": "https://data.microbiomedata.org/data/nmdc:mga0x5c381/annotation/nmdc_mga0x5c381_proteins.faa", - "md5_checksum": "9c21fbee23b4098d69ac618d32fe44c3", - "id": "nmdc:9c21fbee23b4098d69ac618d32fe44c3", - "file_size_bytes": 43551850 - }, - { - "name": "Gp0127628_Structural annotation GFF file", - "description": "Structural annotation GFF file for Gp0127628", - "data_object_type": "Structural Annotation GFF", - "url": "https://data.microbiomedata.org/data/nmdc:mga0x5c381/annotation/nmdc_mga0x5c381_structural_annotation.gff", - "md5_checksum": "c668eaf35e0ebbb7a304271a03dfd3cd", - "id": "nmdc:c668eaf35e0ebbb7a304271a03dfd3cd", - "file_size_bytes": 2518 - }, - { - "name": "Gp0127628_Functional annotation GFF file", - "description": "Functional annotation GFF file for Gp0127628", - "data_object_type": "Functional Annotation GFF", - "url": "https://data.microbiomedata.org/data/nmdc:mga0x5c381/annotation/nmdc_mga0x5c381_functional_annotation.gff", - "md5_checksum": "cf08b19ebb993d895845588d073c02fe", - "id": "nmdc:cf08b19ebb993d895845588d073c02fe", - "file_size_bytes": 50830515 - }, - { - "name": "Gp0127628_KO TSV file", - "description": "KO TSV file for Gp0127628", - "data_object_type": "Annotation KEGG Orthology", - "url": "https://data.microbiomedata.org/data/nmdc:mga0x5c381/annotation/nmdc_mga0x5c381_ko.tsv", - "md5_checksum": "e110cecd0dcfbefbde06b88e89047c94", - "id": "nmdc:e110cecd0dcfbefbde06b88e89047c94", - "file_size_bytes": 5904167 - }, - { - "name": "Gp0127628_EC TSV file", - "description": "EC TSV file for Gp0127628", - "data_object_type": "Annotation Enzyme Commission", - "url": "https://data.microbiomedata.org/data/nmdc:mga0x5c381/annotation/nmdc_mga0x5c381_ec.tsv", - "md5_checksum": "5f393bad4aacf75d348d7e7d5fe00a06", - "id": "nmdc:5f393bad4aacf75d348d7e7d5fe00a06", - "file_size_bytes": 3917008 - }, - { - "name": "Gp0127628_COG GFF file", - "description": "COG GFF file for Gp0127628", - "url": "https://data.microbiomedata.org/data/nmdc:mga0x5c381/annotation/nmdc_mga0x5c381_cog.gff", - "md5_checksum": "c8834a004633752f76b91883416c34b8", - "id": "nmdc:c8834a004633752f76b91883416c34b8", - "file_size_bytes": 29634134 - }, - { - "name": "Gp0127628_PFAM GFF file", - "description": "PFAM GFF file for Gp0127628", - "url": "https://data.microbiomedata.org/data/nmdc:mga0x5c381/annotation/nmdc_mga0x5c381_pfam.gff", - "md5_checksum": "adc813c11b8b32e205aa65ab971d4159", - "id": "nmdc:adc813c11b8b32e205aa65ab971d4159", - "file_size_bytes": 21661208 - }, - { - "name": "Gp0127628_TigrFam GFF file", - "description": "TigrFam GFF file for Gp0127628", - "url": "https://data.microbiomedata.org/data/nmdc:mga0x5c381/annotation/nmdc_mga0x5c381_tigrfam.gff", - "md5_checksum": "eecb4098ed258acb0820c17e9e308a9d", - "id": "nmdc:eecb4098ed258acb0820c17e9e308a9d", - "file_size_bytes": 2198767 - }, - { - "name": "Gp0127628_SMART GFF file", - "description": "SMART GFF file for Gp0127628", - "url": "https://data.microbiomedata.org/data/nmdc:mga0x5c381/annotation/nmdc_mga0x5c381_smart.gff", - "md5_checksum": "cd2cbf38f357d4c7ec5080072e994861", - "id": "nmdc:cd2cbf38f357d4c7ec5080072e994861", - "file_size_bytes": 6281175 - }, - { - "name": "Gp0127628_SuperFam GFF file", - "description": "SuperFam GFF file for Gp0127628", - "url": "https://data.microbiomedata.org/data/nmdc:mga0x5c381/annotation/nmdc_mga0x5c381_supfam.gff", - "md5_checksum": "1e7aefe1539f0dbe510f805a8d0a6930", - "id": "nmdc:1e7aefe1539f0dbe510f805a8d0a6930", - "file_size_bytes": 36891824 - }, - { - "name": "Gp0127628_Cath FunFam GFF file", - "description": "Cath FunFam GFF file for Gp0127628", - "url": "https://data.microbiomedata.org/data/nmdc:mga0x5c381/annotation/nmdc_mga0x5c381_cath_funfam.gff", - "md5_checksum": "29e9378a37cc56837c1343de85993789", - "id": "nmdc:29e9378a37cc56837c1343de85993789", - "file_size_bytes": 27671574 - }, - { - "name": "Gp0127628_KO_EC GFF file", - "description": "KO_EC GFF file for Gp0127628", - "url": "https://data.microbiomedata.org/data/nmdc:mga0x5c381/annotation/nmdc_mga0x5c381_ko_ec.gff", - "md5_checksum": "5faeccd78a03acd094263a777faa5fe2", - "id": "nmdc:5faeccd78a03acd094263a777faa5fe2", - "file_size_bytes": 18790529 - }, - { - "name": "gold:Gp0452679_metabat2 bin checkm quality assessment result", - "description": "metabat2 bin checkm quality assessment result for gold:Gp0452679", - "data_object_type": "CheckM Statistics", - "url": "https://data.microbiomedata.org/data/nmdc:mga0fsjy84/MAGs/nmdc_mga0fsjy84_checkm_qa.out", - "md5_checksum": "d41d8cd98f00b204e9800998ecf8427e", - "id": "nmdc:d41d8cd98f00b204e9800998ecf8427e", - "file_size_bytes": 0 - }, - { - "name": "Gp0127628_tooShort (< 3kb) filtered contigs fasta file by metaBat2", - "description": "tooShort (< 3kb) filtered contigs fasta file by metaBat2 for Gp0127628", - "url": "https://data.microbiomedata.org/data/nmdc:mga0x5c381/MAGs/nmdc_mga0x5c381_bins.tooShort.fa", - "md5_checksum": "13137fa415f537d2874808d8c75c1b3d", - "id": "nmdc:13137fa415f537d2874808d8c75c1b3d", - "file_size_bytes": 63661919 - }, - { - "name": "Gp0127628_unbinned fasta file from metabat2", - "description": "unbinned fasta file from metabat2 for Gp0127628", - "url": "https://data.microbiomedata.org/data/nmdc:mga0x5c381/MAGs/nmdc_mga0x5c381_bins.unbinned.fa", - "md5_checksum": "196d2699f8fdab4e38c8a638f92093b1", - "id": "nmdc:196d2699f8fdab4e38c8a638f92093b1", - "file_size_bytes": 9649261 - }, - { - "name": "Gp0127628_metabat2 bin checkm quality assessment result", - "description": "metabat2 bin checkm quality assessment result for Gp0127628", - "data_object_type": "CheckM Statistics", - "url": "https://data.microbiomedata.org/data/nmdc:mga0x5c381/MAGs/nmdc_mga0x5c381_checkm_qa.out", - "md5_checksum": "b67b26f8f76faa347575352000021faf", - "id": "nmdc:b67b26f8f76faa347575352000021faf", - "file_size_bytes": 785 - }, - { - "name": "Gp0127628_high-quality and medium-quality bins", - "description": "high-quality and medium-quality bins for Gp0127628", - "data_object_type": "Metagenome Bins", - "url": "https://data.microbiomedata.org/data/nmdc:mga0x5c381/MAGs/nmdc_mga0x5c381_hqmq_bin.zip", - "md5_checksum": "166c8a0ad2f4d57e9b16cdc699d56c09", - "id": "nmdc:166c8a0ad2f4d57e9b16cdc699d56c09", - "file_size_bytes": 182 - }, - { - "name": "Gp0127628_metabat2 bins", - "description": "metabat2 bins for Gp0127628", - "url": "https://data.microbiomedata.org/data/nmdc:mga0x5c381/MAGs/nmdc_mga0x5c381_metabat_bin.zip", - "md5_checksum": "5ef5ad24cfe3990c0256d420f51f9010", - "id": "nmdc:5ef5ad24cfe3990c0256d420f51f9010", - "file_size_bytes": 279359 - }, - { - "_id": { - "$oid": "649b003d1ae706d7b5b14e5c" - }, - "description": "Metagenome Contig Coverage Stats for gold:Gp0127628", - "url": "https://data.microbiomedata.org/data/1781_100330/assembly/mapping_stats.txt", - "file_size_bytes": 11830693, - "type": "nmdc:DataObject", - "id": "nmdc:29cc178c2efed5702e8d984729345761", - "name": "mapping_stats.txt", - "data_object_type": "Assembly Coverage Stats" - }, - { - "_id": { - "$oid": "649b003d1ae706d7b5b14e5d" - }, - "description": "Assembled contigs fasta for gold:Gp0127628", - "url": "https://data.microbiomedata.org/data/1781_100330/assembly/assembly_contigs.fna", - "file_size_bytes": 73646305, - "type": "nmdc:DataObject", - "id": "nmdc:e54d5475a6bf7148d2312d0fcc349cdb", - "name": "assembly_contigs.fna", - "data_object_type": "Assembly Contigs" - }, - { - "_id": { - "$oid": "649b003d1ae706d7b5b14e5e" - }, - "description": "Assembled scaffold fasta for gold:Gp0127628", - "url": "https://data.microbiomedata.org/data/1781_100330/assembly/assembly_scaffolds.fna", - "file_size_bytes": 73171893, - "type": "nmdc:DataObject", - "id": "nmdc:140f23c819c51594790d1209780f8f60", - "name": "assembly_scaffolds.fna", - "data_object_type": "Assembly Scaffolds" - }, - { - "_id": { - "$oid": "649b003d1ae706d7b5b14e60" - }, - "description": "Assembled AGP file for gold:Gp0127628", - "url": "https://data.microbiomedata.org/data/1781_100330/assembly/assembly.agp", - "file_size_bytes": 10372800, - "type": "nmdc:DataObject", - "id": "nmdc:64d6bc3e0883eb23926cd05e43c42d2c", - "name": "assembly.agp", - "data_object_type": "Assembly AGP" - }, - { - "_id": { - "$oid": "649b003d1ae706d7b5b14e65" - }, - "description": "Metagenome Alignment BAM file for gold:Gp0127628", - "url": "https://data.microbiomedata.org/data/1781_100330/assembly/pairedMapped_sorted.bam", - "file_size_bytes": 2706803208, - "type": "nmdc:DataObject", - "id": "nmdc:72951466c19bec33fea0232a2becf637", - "name": "pairedMapped_sorted.bam", - "data_object_type": "Assembly Coverage BAM" - }, - { - "_id": { - "$oid": "649b003d1ae706d7b5b15b09" - }, - "id": "nmdc:c65969e7112a41f10ba56435077e1833", - "name": "1781_100330.krona.html", - "description": "Gold:Gp0127628 KRONA plot HTML file", - "url": "https://data.microbiomedata.org/1781_100330/ReadbasedAnalysis/centrifuge/1781_100330.krona.html", - "file_size_bytes": 3385, - "data_object_type": "Centrifuge Krona Plot", - "type": "nmdc:DataObject" - }, - { - "_id": { - "$oid": "649b003d1ae706d7b5b15b10" - }, - "id": "nmdc:94230c82a668902f15d13898305b06d0", - "name": "1781_100330.json", - "description": "Gold:Gp0127628 ReadbasedAnalysis result JSON file", - "url": "https://data.microbiomedata.org/1781_100330/ReadbasedAnalysis/1781_100330.json", - "file_size_bytes": 3385, - "type": "nmdc:DataObject" - }, - { - "_id": { - "$oid": "649b003f1ae706d7b5b165d3" - }, - "id": "nmdc:58091a36e7ab3d84f65b6d4e08f1a528", - "name": "bins.tooShort.fa", - "description": "tooShort (< 3kb) filtered contigs fasta file by metaBat2 for gold:Gp0127628", - "file_size_bytes": 61790019, - "url": "https://data.microbiomedata.org/data/1781_100330/img_MAGs/bins.tooShort.fa", - "type": "nmdc:DataObject" - }, - { - "_id": { - "$oid": "649b003f1ae706d7b5b165d4" - }, - "id": "nmdc:756c3abe18401097b557a9d3f2788fa1", - "name": "bins.unbinned.fa", - "description": "unbinned fasta file from metabat2 for gold:Gp0127628", - "file_size_bytes": 10332738, - "url": "https://data.microbiomedata.org/data/1781_100330/img_MAGs/bins.unbinned.fa", - "type": "nmdc:DataObject" - }, - { - "_id": { - "$oid": "649b003f1ae706d7b5b165d5" - }, - "id": "nmdc:bd1a55c5a24a4ec234f5c22ce66ba8e2", - "name": "gold:Gp0127628.bins.1.fa", - "description": "metabat2 binned contig file for gold:Gp0127628", - "file_size_bytes": 214091, - "url": "https://data.microbiomedata.org/data/1781_100330/img_MAGs/metabat-bins/bins.1.fa", - "type": "nmdc:DataObject", - "data_object_type": "Metagenome Bins" - }, - { - "_id": { - "$oid": "649b003f1ae706d7b5b165d9" - }, - "id": "nmdc:4d6fbc978933ca7a2fb204c0230252d1", - "name": "checkm_qa.out", - "description": "metabat2 bin checkm quality assessment result for gold:Gp0127628", - "file_size_bytes": 770, - "url": "https://data.microbiomedata.org/data/1781_100330/img_MAGs/checkm_qa.out", - "type": "nmdc:DataObject", - "data_object_type": "CheckM Statistics" - }, - { - "_id": { - "$oid": "649b003f1ae706d7b5b16cd4" - }, - "description": "EC TSV File for gold:Gp0127628", - "url": "https://data.microbiomedata.org/1781_100330/img_annotation/Ga0482242_ec.tsv", - "md5_checksum": "760a1e1bc5aac21dd0b96098c72133ff", - "file_size_bytes": 3385, - "id": "nmdc:760a1e1bc5aac21dd0b96098c72133ff", - "name": "gold:Gp0127628_EC TSV File", - "data_object_type": "Annotation Enzyme Commission", - "type": "nmdc:DataObject" - }, - { - "_id": { - "$oid": "649b003f1ae706d7b5b16cd7" - }, - "description": "Protein FAA for gold:Gp0127628", - "url": "https://data.microbiomedata.org/1781_100330/img_annotation/Ga0482242_proteins.faa", - "md5_checksum": "ec55b61e1204cde7fe61841179b88b53", - "file_size_bytes": 3385, - "id": "nmdc:ec55b61e1204cde7fe61841179b88b53", - "name": "gold:Gp0127628_Protein FAA", - "data_object_type": "Annotation Amino Acid FASTA", - "type": "nmdc:DataObject" - }, - { - "_id": { - "$oid": "649b003f1ae706d7b5b16cd8" - }, - "description": "Functional annotation GFF file for gold:Gp0127628", - "url": "https://data.microbiomedata.org/1781_100330/img_annotation/Ga0482242_functional_annotation.gff", - "md5_checksum": "b73bf45facd909d89bfab76dee85a2cc", - "file_size_bytes": 3385, - "id": "nmdc:b73bf45facd909d89bfab76dee85a2cc", - "name": "gold:Gp0127628_Functional annotation GFF file", - "data_object_type": "Functional Annotation GFF", - "type": "nmdc:DataObject" - }, - { - "_id": { - "$oid": "649b003f1ae706d7b5b16cd9" - }, - "description": "KO TSV File for gold:Gp0127628", - "url": "https://data.microbiomedata.org/1781_100330/img_annotation/Ga0482242_ko.tsv", - "md5_checksum": "69da278e8966a688cafb7bb2c8f2e4d1", - "file_size_bytes": 3385, - "id": "nmdc:69da278e8966a688cafb7bb2c8f2e4d1", - "name": "gold:Gp0127628_KO TSV File", - "data_object_type": "Annotation KEGG Orthology", - "type": "nmdc:DataObject" - }, - { - "_id": { - "$oid": "649b00401ae706d7b5b16ce0" - }, - "description": "Structural annotation GFF file for gold:Gp0127628", - "url": "https://data.microbiomedata.org/1781_100330/img_annotation/Ga0482242_structural_annotation.gff", - "md5_checksum": "bd55dfd59ed0aa7ea685734c5b7ecbab", - "file_size_bytes": 3385, - "id": "nmdc:bd55dfd59ed0aa7ea685734c5b7ecbab", - "name": "gold:Gp0127628_Structural annotation GFF file", - "data_object_type": "Structural Annotation GFF", - "type": "nmdc:DataObject" - } - ], - "dissolving_activity_set": [], - "functional_annotation_set": [], - "genome_feature_set": [], - "mags_activity_set": [ - { - "_id": { - "$oid": "649b0052ec087f6bbab34705" - }, - "has_input": [ - "nmdc:9e550afb3bcd8d66807f861ecfed815b", - "nmdc:9163caaba1f60d1af9a551559069ca08", - "nmdc:cf08b19ebb993d895845588d073c02fe" - ], - "too_short_contig_num": 151485, - "part_of": [ - "nmdc:mga0x5c381" - ], - "binned_contig_num": 238, - "git_url": "https://github.com/microbiomedata/metaG/releases/tag/0.1", - "has_output": [ - "nmdc:d41d8cd98f00b204e9800998ecf8427e", - "nmdc:13137fa415f537d2874808d8c75c1b3d", - "nmdc:196d2699f8fdab4e38c8a638f92093b1", - "nmdc:b67b26f8f76faa347575352000021faf", - "nmdc:166c8a0ad2f4d57e9b16cdc699d56c09", - "nmdc:5ef5ad24cfe3990c0256d420f51f9010" - ], - "was_informed_by": "gold:Gp0127628", - "input_contig_num": 157858, - "id": "nmdc:a634b0691cf34899d9b09bc4573d4c36", - "execution_resource": "NERSC-Cori", - "name": "MAGs Analysis Activity for nmdc:mga0x5c381", - "mags_list": [ - { - "number_of_contig": 238, - "completeness": 30.86, - "bin_name": "bins.1", - "gene_count": 1126, - "bin_quality": "LQ", - "gtdbtk_species": "", - "gtdbtk_order": "", - "num_16s": 0, - "gtdbtk_family": "", - "gtdbtk_domain": "", - "contamination": 0.0, - "gtdbtk_class": "", - "gtdbtk_phylum": "", - "num_5s": 0, - "num_23s": 0, - "gtdbtk_genus": "", - "num_t_rna": 5 - } - ], - "unbinned_contig_num": 6135, - "started_at_time": "2021-10-11T02:25:13Z", - "type": "nmdc:MAGsAnalysisActivity", - "ended_at_time": "2021-10-11T04:45:59+00:00" - } - ], - "material_sample_set": [], - "material_sampling_activity_set": [], - "metabolomics_analysis_activity_set": [], - "metagenome_annotation_activity_set": [ - { - "_id": { - "$oid": "649b005bbf2caae0415ef9aa" - }, - "has_input": [ - "nmdc:9e550afb3bcd8d66807f861ecfed815b" - ], - "part_of": [ - "nmdc:mga0x5c381" - ], - "git_url": "https://github.com/microbiomedata/metaG/releases/tag/0.1", - "has_output": [ - "nmdc:9c21fbee23b4098d69ac618d32fe44c3", - "nmdc:c668eaf35e0ebbb7a304271a03dfd3cd", - "nmdc:cf08b19ebb993d895845588d073c02fe", - "nmdc:e110cecd0dcfbefbde06b88e89047c94", - "nmdc:5f393bad4aacf75d348d7e7d5fe00a06", - "nmdc:c8834a004633752f76b91883416c34b8", - "nmdc:adc813c11b8b32e205aa65ab971d4159", - "nmdc:eecb4098ed258acb0820c17e9e308a9d", - "nmdc:cd2cbf38f357d4c7ec5080072e994861", - "nmdc:1e7aefe1539f0dbe510f805a8d0a6930", - "nmdc:29e9378a37cc56837c1343de85993789", - "nmdc:5faeccd78a03acd094263a777faa5fe2" - ], - "was_informed_by": "gold:Gp0127628", - "id": "nmdc:a634b0691cf34899d9b09bc4573d4c36", - "execution_resource": "NERSC-Cori", - "name": "Annotation Activity for nmdc:mga0x5c381", - "started_at_time": "2021-10-11T02:25:13Z", - "type": "nmdc:MetagenomeAnnotationActivity", - "ended_at_time": "2021-10-11T04:45:59+00:00" - } - ], - "metagenome_assembly_set": [ - { - "_id": { - "$oid": "649b005f2ca5ee4adb139fae" - }, - "has_input": [ - "nmdc:f6f1760721d73fc57919b2115a1d47ec" - ], - "part_of": [ - "nmdc:mga0x5c381" - ], - "ctg_logsum": 110768, - "scaf_logsum": 111226, - "gap_pct": 0.00124, - "git_url": "https://github.com/microbiomedata/metaG/releases/tag/0.1", - "has_output": [ - "nmdc:9e550afb3bcd8d66807f861ecfed815b", - "nmdc:5e79fce62ffa8c4479be5159143797e0", - "nmdc:682fd042d6adcd93f75c3eae2cf32241", - "nmdc:9d607ebd92ad5bcbaaa405884d4a83a3", - "nmdc:9163caaba1f60d1af9a551559069ca08" - ], - "asm_score": 4.319, - "was_informed_by": "gold:Gp0127628", - "ctg_powsum": 11962, - "scaf_max": 45540, - "id": "nmdc:a634b0691cf34899d9b09bc4573d4c36", - "scaf_powsum": 12026, - "execution_resource": "NERSC-Cori", - "contigs": 157859, - "name": "Assembly Activity for nmdc:mga0x5c381", - "ctg_max": 40273, - "gc_std": 0.10673, - "contig_bp": 68288279, - "gc_avg": 0.61453, - "started_at_time": "2021-10-11T02:25:13Z", - "scaf_bp": 68289129, - "type": "nmdc:MetagenomeAssembly", - "scaffolds": 157774, - "ended_at_time": "2021-10-11T04:45:59+00:00", - "ctg_l50": 400, - "ctg_l90": 285, - "ctg_n50": 49248, - "ctg_n90": 135173, - "scaf_l50": 400, - "scaf_l90": 285, - "scaf_n50": 49230, - "scaf_n90": 135095 - } - ], - "metagenome_sequencing_activity_set": [], - "metaproteomics_analysis_activity_set": [], - "metatranscriptome_activity_set": [], - "nom_analysis_activity_set": [], - "omics_processing_set": [ - { - "_id": { - "$oid": "649b009773e8249959349b3f" - }, - "id": "nmdc:omprc-11-b051xn44", - "name": "Riverbed sediment microbial communities from areas with vegetation nearby in Columbia River, Washington, USA - GW-RW S3_0_10", - "description": "Riverbed sediment samples were collected from an area with a lot of surrounding vegetation in a hyporheic zone (subsurface groundwater - surface water mixing zone)", - "has_input": [ - "nmdc:bsm-11-jdsasr43" - ], - "has_output": [ - "jgi:574fe09a7ded5e3df1ee1485" - ], - "part_of": [ - "nmdc:sty-11-aygzgv51" - ], - "add_date": "2016-01-11", - "mod_date": "2021-06-15", - "ncbi_project_name": "Riverbed sediment microbial communities from areas with vegetation nearby in Columbia River, Washington, USA - GW-RW S3_0_10", - "omics_type": { - "has_raw_value": "Metagenome" - }, - "principal_investigator": { - "has_raw_value": "James Stegen" - }, - "processing_institution": "JGI", - "type": "nmdc:OmicsProcessing", - "gold_sequencing_project_identifiers": [ - "gold:Gp0127628" - ] - } - ], - "reaction_activity_set": [], - "read_qc_analysis_activity_set": [ - { - "_id": { - "$oid": "649b009d6bdd4fd20273c863" - }, - "has_input": [ - "nmdc:efca984ecf94cc8de2aeabf94e0b87cc" - ], - "part_of": [ - "nmdc:mga0x5c381" - ], - "git_url": "https://github.com/microbiomedata/metaG/releases/tag/0.1", - "has_output": [ - "nmdc:f6f1760721d73fc57919b2115a1d47ec", - "nmdc:2225f9d41343590d818186fa2d66852d" - ], - "was_informed_by": "gold:Gp0127628", - "input_read_count": 31715882, - "output_read_bases": 4516265181, - "id": "nmdc:a634b0691cf34899d9b09bc4573d4c36", - "execution_resource": "NERSC-Cori", - "input_read_bases": 4789098182, - "name": "Read QC Activity for nmdc:mga0x5c381", - "output_read_count": 30212248, - "started_at_time": "2021-10-11T02:25:13Z", - "type": "nmdc:ReadQCAnalysisActivity", - "ended_at_time": "2021-10-11T04:45:59+00:00" - } - ], - "read_based_taxonomy_analysis_activity_set": [ - { - "_id": { - "$oid": "649b009bff710ae353f8cf24" - }, - "has_input": [ - "nmdc:f6f1760721d73fc57919b2115a1d47ec" - ], - "git_url": "https://github.com/microbiomedata/metaG/releases/tag/0.1", - "has_output": [ - "nmdc:a6ed9af48a9ad473ab66721829a5c226", - "nmdc:335dbf6f1055de0950988a002f432c0b", - "nmdc:35da19bc0e50db1f9a02fe1550d1df0e", - "nmdc:224085164a389c6f207967ed03b3e6af", - "nmdc:39ba17263c144761a8bdcc1645c034f5", - "nmdc:84debc9bd1c09328d60f073d7fc2db4f", - "nmdc:8f75800abbcf5a94043ad677d7cb975c", - "nmdc:aae9e961d8ed716457616c8a8841037b", - "nmdc:ba83d6ab837403f4bcbc9400a0460457" - ], - "was_informed_by": "gold:Gp0127628", - "id": "nmdc:a634b0691cf34899d9b09bc4573d4c36", - "execution_resource": "NERSC-Cori", - "name": "ReadBased Analysis Activity for nmdc:mga0x5c381", - "started_at_time": "2021-10-11T02:25:13Z", - "type": "nmdc:ReadbasedAnalysis", - "ended_at_time": "2021-10-11T04:45:59+00:00" - } - ], - "study_set": [], - "field_research_site_set": [], - "collecting_biosamples_from_site_set": [], - "date_created": null, - "etl_software_version": null, - "pooling_set": [] - }, - { - "functional_annotation_agg": [], - "library_preparation_set": [], - "processed_sample_set": [], - "extraction_set": [], - "activity_set": [], - "biosample_set": [], - "data_object_set": [ - { - "description": "Raw sequencer read data", - "file_size_bytes": 2291612962, - "type": "nmdc:DataObject", - "id": "jgi:574fde5b7ded5e3df1ee13ff", - "name": "10533.1.165310.TCGCTGT-AACAGCG.fastq.gz" - }, - { - "name": "Gp0127631_Filtered Reads", - "description": "Filtered Reads for Gp0127631", - "data_object_type": "Filtered Sequencing Reads", - "url": "https://data.microbiomedata.org/data/nmdc:mga0jx8k09/qa/nmdc_mga0jx8k09_filtered.fastq.gz", - "md5_checksum": "6969fd7f4b1a5a34fb30d31b92cd6bf8", - "id": "nmdc:6969fd7f4b1a5a34fb30d31b92cd6bf8", - "file_size_bytes": 2030538721 - }, - { - "name": "Gp0127631_Filtered Stats", - "description": "Filtered Stats for Gp0127631", - "data_object_type": "QC Statistics", - "url": "https://data.microbiomedata.org/data/nmdc:mga0jx8k09/qa/nmdc_mga0jx8k09_filterStats.txt", - "md5_checksum": "b280141d234edf10cde8794539700654", - "id": "nmdc:b280141d234edf10cde8794539700654", - "file_size_bytes": 284 - }, - { - "name": "Gp0127631_Gottcha2 TSV report", - "description": "Gottcha2 TSV report for Gp0127631", - "url": "https://data.microbiomedata.org/data/nmdc:mga0jx8k09/ReadbasedAnalysis/nmdc_mga0jx8k09_gottcha2_report.tsv", - "md5_checksum": "b78e8246144185beb95c0caf65ef1f1a", - "id": "nmdc:b78e8246144185beb95c0caf65ef1f1a", - "file_size_bytes": 1227 - }, - { - "name": "Gp0127631_Gottcha2 full TSV report", - "description": "Gottcha2 full TSV report for Gp0127631", - "url": "https://data.microbiomedata.org/data/nmdc:mga0jx8k09/ReadbasedAnalysis/nmdc_mga0jx8k09_gottcha2_report_full.tsv", - "md5_checksum": "8875c6ce19e13ed9a88447f2f78bb049", - "id": "nmdc:8875c6ce19e13ed9a88447f2f78bb049", - "file_size_bytes": 647196 - }, - { - "name": "Gp0127631_Gottcha2 Krona HTML report", - "description": "Gottcha2 Krona HTML report for Gp0127631", - "data_object_type": "GOTTCHA2 Krona Plot", - "url": "https://data.microbiomedata.org/data/nmdc:mga0jx8k09/ReadbasedAnalysis/nmdc_mga0jx8k09_gottcha2_krona.html", - "md5_checksum": "3b0aee019c772a695bf4cc8f4a390f4e", - "id": "nmdc:3b0aee019c772a695bf4cc8f4a390f4e", - "file_size_bytes": 229312 - }, - { - "name": "Gp0127631_Centrifuge classification TSV report", - "description": "Centrifuge classification TSV report for Gp0127631", - "data_object_type": "Centrifuge Taxonomic Classification", - "url": "https://data.microbiomedata.org/data/nmdc:mga0jx8k09/ReadbasedAnalysis/nmdc_mga0jx8k09_centrifuge_classification.tsv", - "md5_checksum": "0d1729a83798b752f33eeb8d97afe972", - "id": "nmdc:0d1729a83798b752f33eeb8d97afe972", - "file_size_bytes": 1861431092 - }, - { - "name": "Gp0127631_Centrifuge TSV report", - "description": "Centrifuge TSV report for Gp0127631", - "data_object_type": "Centrifuge Classification Report", - "url": "https://data.microbiomedata.org/data/nmdc:mga0jx8k09/ReadbasedAnalysis/nmdc_mga0jx8k09_centrifuge_report.tsv", - "md5_checksum": "77561a0de3bb8aae04d110429fd9ad0c", - "id": "nmdc:77561a0de3bb8aae04d110429fd9ad0c", - "file_size_bytes": 254665 - }, - { - "name": "Gp0127631_Centrifuge Krona HTML report", - "description": "Centrifuge Krona HTML report for Gp0127631", - "data_object_type": "Centrifuge Krona Plot", - "url": "https://data.microbiomedata.org/data/nmdc:mga0jx8k09/ReadbasedAnalysis/nmdc_mga0jx8k09_centrifuge_krona.html", - "md5_checksum": "ea27c005b1788434c2198ad60939d4bc", - "id": "nmdc:ea27c005b1788434c2198ad60939d4bc", - "file_size_bytes": 2334578 - }, - { - "name": "Gp0127631_Kraken classification TSV report", - "description": "Kraken classification TSV report for Gp0127631", - "data_object_type": "Kraken2 Taxonomic Classification", - "url": "https://data.microbiomedata.org/data/nmdc:mga0jx8k09/ReadbasedAnalysis/nmdc_mga0jx8k09_kraken2_classification.tsv", - "md5_checksum": "6a46583da876b9d6287302308df0b9fd", - "id": "nmdc:6a46583da876b9d6287302308df0b9fd", - "file_size_bytes": 1483354621 - }, - { - "name": "Gp0127631_Kraken2 TSV report", - "description": "Kraken2 TSV report for Gp0127631", - "data_object_type": "Kraken2 Classification Report", - "url": "https://data.microbiomedata.org/data/nmdc:mga0jx8k09/ReadbasedAnalysis/nmdc_mga0jx8k09_kraken2_report.tsv", - "md5_checksum": "af619dc5a0423509a4beaca26aa61000", - "id": "nmdc:af619dc5a0423509a4beaca26aa61000", - "file_size_bytes": 640329 - }, - { - "name": "Gp0127631_Kraken2 Krona HTML report", - "description": "Kraken2 Krona HTML report for Gp0127631", - "data_object_type": "Kraken2 Krona Plot", - "url": "https://data.microbiomedata.org/data/nmdc:mga0jx8k09/ReadbasedAnalysis/nmdc_mga0jx8k09_kraken2_krona.html", - "md5_checksum": "50093825ec73dcabe66aa353de766beb", - "id": "nmdc:50093825ec73dcabe66aa353de766beb", - "file_size_bytes": 3993246 - }, - { - "name": "Gp0127631_Assembled contigs fasta", - "description": "Assembled contigs fasta for Gp0127631", - "data_object_type": "Assembly Contigs", - "url": "https://data.microbiomedata.org/data/nmdc:mga0jx8k09/assembly/nmdc_mga0jx8k09_contigs.fna", - "md5_checksum": "1eb44ff780f2aad1053ca336b53d7b98", - "id": "nmdc:1eb44ff780f2aad1053ca336b53d7b98", - "file_size_bytes": 128714098 - }, - { - "name": "Gp0127631_Assembled scaffold fasta", - "description": "Assembled scaffold fasta for Gp0127631", - "data_object_type": "Assembly Scaffolds", - "url": "https://data.microbiomedata.org/data/nmdc:mga0jx8k09/assembly/nmdc_mga0jx8k09_scaffolds.fna", - "md5_checksum": "992fb303b5ced60489fea0ce6dae71f9", - "id": "nmdc:992fb303b5ced60489fea0ce6dae71f9", - "file_size_bytes": 127998496 - }, - { - "name": "Gp0127631_Metagenome Contig Coverage Stats", - "description": "Metagenome Contig Coverage Stats for Gp0127631", - "url": "https://data.microbiomedata.org/data/nmdc:mga0jx8k09/assembly/nmdc_mga0jx8k09_covstats.txt", - "md5_checksum": "a0f466071ed249babf1a5653e1c20a02", - "id": "nmdc:a0f466071ed249babf1a5653e1c20a02", - "file_size_bytes": 18831462 - }, - { - "name": "Gp0127631_Assembled AGP file", - "description": "Assembled AGP file for Gp0127631", - "data_object_type": "Assembly AGP", - "url": "https://data.microbiomedata.org/data/nmdc:mga0jx8k09/assembly/nmdc_mga0jx8k09_assembly.agp", - "md5_checksum": "5eddebfbfabfd9c0e71c2699bee73870", - "id": "nmdc:5eddebfbfabfd9c0e71c2699bee73870", - "file_size_bytes": 17634272 - }, - { - "name": "Gp0127631_Metagenome Alignment BAM file", - "description": "Metagenome Alignment BAM file for Gp0127631", - "data_object_type": "Assembly Coverage BAM", - "url": "https://data.microbiomedata.org/data/nmdc:mga0jx8k09/assembly/nmdc_mga0jx8k09_pairedMapped_sorted.bam", - "md5_checksum": "0ecd5e99ec93ba17c7b02483560bafdf", - "id": "nmdc:0ecd5e99ec93ba17c7b02483560bafdf", - "file_size_bytes": 2245356551 - }, - { - "name": "Gp0127631_Protein FAA", - "description": "Protein FAA for Gp0127631", - "data_object_type": "Annotation Amino Acid FASTA", - "url": "https://data.microbiomedata.org/data/nmdc:mga0jx8k09/annotation/nmdc_mga0jx8k09_proteins.faa", - "md5_checksum": "8c26f97b6a3196ed09dc4f54857d4972", - "id": "nmdc:8c26f97b6a3196ed09dc4f54857d4972", - "file_size_bytes": 72966123 - }, - { - "name": "Gp0127631_Structural annotation GFF file", - "description": "Structural annotation GFF file for Gp0127631", - "data_object_type": "Structural Annotation GFF", - "url": "https://data.microbiomedata.org/data/nmdc:mga0jx8k09/annotation/nmdc_mga0jx8k09_structural_annotation.gff", - "md5_checksum": "c7112633e322d7bc609bd479f7ddddb9", - "id": "nmdc:c7112633e322d7bc609bd479f7ddddb9", - "file_size_bytes": 2524 - }, - { - "name": "Gp0127631_Functional annotation GFF file", - "description": "Functional annotation GFF file for Gp0127631", - "data_object_type": "Functional Annotation GFF", - "url": "https://data.microbiomedata.org/data/nmdc:mga0jx8k09/annotation/nmdc_mga0jx8k09_functional_annotation.gff", - "md5_checksum": "2f6baf7176d2d904c02ae71875a8d326", - "id": "nmdc:2f6baf7176d2d904c02ae71875a8d326", - "file_size_bytes": 81929295 - }, - { - "name": "Gp0127631_KO TSV file", - "description": "KO TSV file for Gp0127631", - "data_object_type": "Annotation KEGG Orthology", - "url": "https://data.microbiomedata.org/data/nmdc:mga0jx8k09/annotation/nmdc_mga0jx8k09_ko.tsv", - "md5_checksum": "1abb9d211201bef0cb545e70a65de8cf", - "id": "nmdc:1abb9d211201bef0cb545e70a65de8cf", - "file_size_bytes": 8979915 - }, - { - "name": "Gp0127631_EC TSV file", - "description": "EC TSV file for Gp0127631", - "data_object_type": "Annotation Enzyme Commission", - "url": "https://data.microbiomedata.org/data/nmdc:mga0jx8k09/annotation/nmdc_mga0jx8k09_ec.tsv", - "md5_checksum": "985a23612611fb258d2dbaee1e4458f5", - "id": "nmdc:985a23612611fb258d2dbaee1e4458f5", - "file_size_bytes": 5914861 - }, - { - "name": "Gp0127631_COG GFF file", - "description": "COG GFF file for Gp0127631", - "url": "https://data.microbiomedata.org/data/nmdc:mga0jx8k09/annotation/nmdc_mga0jx8k09_cog.gff", - "md5_checksum": "44c3fa82e71af5647b7619b0dd8a0728", - "id": "nmdc:44c3fa82e71af5647b7619b0dd8a0728", - "file_size_bytes": 47190255 - }, - { - "name": "Gp0127631_PFAM GFF file", - "description": "PFAM GFF file for Gp0127631", - "url": "https://data.microbiomedata.org/data/nmdc:mga0jx8k09/annotation/nmdc_mga0jx8k09_pfam.gff", - "md5_checksum": "fb70c00e07d0b93b12cacbded87dcea6", - "id": "nmdc:fb70c00e07d0b93b12cacbded87dcea6", - "file_size_bytes": 35794646 - }, - { - "name": "Gp0127631_TigrFam GFF file", - "description": "TigrFam GFF file for Gp0127631", - "url": "https://data.microbiomedata.org/data/nmdc:mga0jx8k09/annotation/nmdc_mga0jx8k09_tigrfam.gff", - "md5_checksum": "98e1311ba5e96a176baccdb9a95439f9", - "id": "nmdc:98e1311ba5e96a176baccdb9a95439f9", - "file_size_bytes": 3856365 - }, - { - "name": "Gp0127631_SMART GFF file", - "description": "SMART GFF file for Gp0127631", - "url": "https://data.microbiomedata.org/data/nmdc:mga0jx8k09/annotation/nmdc_mga0jx8k09_smart.gff", - "md5_checksum": "0685da4455dde2dec9f221b9356f008c", - "id": "nmdc:0685da4455dde2dec9f221b9356f008c", - "file_size_bytes": 10561278 - }, - { - "name": "Gp0127631_SuperFam GFF file", - "description": "SuperFam GFF file for Gp0127631", - "url": "https://data.microbiomedata.org/data/nmdc:mga0jx8k09/annotation/nmdc_mga0jx8k09_supfam.gff", - "md5_checksum": "14b7f064a3a2fad830fad893ff3257bc", - "id": "nmdc:14b7f064a3a2fad830fad893ff3257bc", - "file_size_bytes": 59641133 - }, - { - "name": "Gp0127631_Cath FunFam GFF file", - "description": "Cath FunFam GFF file for Gp0127631", - "url": "https://data.microbiomedata.org/data/nmdc:mga0jx8k09/annotation/nmdc_mga0jx8k09_cath_funfam.gff", - "md5_checksum": "1b8b64c254f88dd9a8e3cd42bde7b7ba", - "id": "nmdc:1b8b64c254f88dd9a8e3cd42bde7b7ba", - "file_size_bytes": 45160077 - }, - { - "name": "Gp0127631_KO_EC GFF file", - "description": "KO_EC GFF file for Gp0127631", - "url": "https://data.microbiomedata.org/data/nmdc:mga0jx8k09/annotation/nmdc_mga0jx8k09_ko_ec.gff", - "md5_checksum": "01769b6920ba82884f19ac3f88428db1", - "id": "nmdc:01769b6920ba82884f19ac3f88428db1", - "file_size_bytes": 28510384 - }, - { - "name": "gold:Gp0452679_metabat2 bin checkm quality assessment result", - "description": "metabat2 bin checkm quality assessment result for gold:Gp0452679", - "data_object_type": "CheckM Statistics", - "url": "https://data.microbiomedata.org/data/nmdc:mga0fsjy84/MAGs/nmdc_mga0fsjy84_checkm_qa.out", - "md5_checksum": "d41d8cd98f00b204e9800998ecf8427e", - "id": "nmdc:d41d8cd98f00b204e9800998ecf8427e", - "file_size_bytes": 0 - }, - { - "name": "Gp0127631_tooShort (< 3kb) filtered contigs fasta file by metaBat2", - "description": "tooShort (< 3kb) filtered contigs fasta file by metaBat2 for Gp0127631", - "url": "https://data.microbiomedata.org/data/nmdc:mga0jx8k09/MAGs/nmdc_mga0jx8k09_bins.tooShort.fa", - "md5_checksum": "53faea62cf1183292bc6fca374f75ed1", - "id": "nmdc:53faea62cf1183292bc6fca374f75ed1", - "file_size_bytes": 99316833 - }, - { - "name": "Gp0127631_unbinned fasta file from metabat2", - "description": "unbinned fasta file from metabat2 for Gp0127631", - "url": "https://data.microbiomedata.org/data/nmdc:mga0jx8k09/MAGs/nmdc_mga0jx8k09_bins.unbinned.fa", - "md5_checksum": "7a6616d3262630c2aea2923e3c2683d0", - "id": "nmdc:7a6616d3262630c2aea2923e3c2683d0", - "file_size_bytes": 27381739 - }, - { - "name": "Gp0127631_metabat2 bin checkm quality assessment result", - "description": "metabat2 bin checkm quality assessment result for Gp0127631", - "data_object_type": "CheckM Statistics", - "url": "https://data.microbiomedata.org/data/nmdc:mga0jx8k09/MAGs/nmdc_mga0jx8k09_checkm_qa.out", - "md5_checksum": "e16dde65e7229d69949c9e2dee7e2413", - "id": "nmdc:e16dde65e7229d69949c9e2dee7e2413", - "file_size_bytes": 1085 - }, - { - "name": "Gp0127631_high-quality and medium-quality bins", - "description": "high-quality and medium-quality bins for Gp0127631", - "data_object_type": "Metagenome Bins", - "url": "https://data.microbiomedata.org/data/nmdc:mga0jx8k09/MAGs/nmdc_mga0jx8k09_hqmq_bin.zip", - "md5_checksum": "58acda197bd8136a80d5047342008cdf", - "id": "nmdc:58acda197bd8136a80d5047342008cdf", - "file_size_bytes": 182 - }, - { - "name": "Gp0127631_metabat2 bins", - "description": "metabat2 bins for Gp0127631", - "url": "https://data.microbiomedata.org/data/nmdc:mga0jx8k09/MAGs/nmdc_mga0jx8k09_metabat_bin.zip", - "md5_checksum": "8d5e2b8a8dede83c2f74182f506f9176", - "id": "nmdc:8d5e2b8a8dede83c2f74182f506f9176", - "file_size_bytes": 596616 - }, - { - "_id": { - "$oid": "649b003d1ae706d7b5b14ea8" - }, - "description": "Metagenome Contig Coverage Stats for gold:Gp0127631", - "url": "https://data.microbiomedata.org/data/1781_100333/assembly/mapping_stats.txt", - "file_size_bytes": 17881866, - "type": "nmdc:DataObject", - "id": "nmdc:b8891d5a1c93a83756f25450b1fe5e6e", - "name": "mapping_stats.txt", - "data_object_type": "Assembly Coverage Stats" - }, - { - "_id": { - "$oid": "649b003d1ae706d7b5b14eaa" - }, - "description": "Assembled AGP file for gold:Gp0127631", - "url": "https://data.microbiomedata.org/data/1781_100333/assembly/assembly.agp", - "file_size_bytes": 15733352, - "type": "nmdc:DataObject", - "id": "nmdc:00fd0de6e8c1ba5bdd6308a282f543cc", - "name": "assembly.agp", - "data_object_type": "Assembly AGP" - }, - { - "_id": { - "$oid": "649b003d1ae706d7b5b14ec6" - }, - "description": "Assembled contigs fasta for gold:Gp0127631", - "url": "https://data.microbiomedata.org/data/1781_100333/assembly/assembly_contigs.fna", - "file_size_bytes": 127764502, - "type": "nmdc:DataObject", - "id": "nmdc:ff68d07b09e5a9cdd866208394d66bd6", - "name": "assembly_contigs.fna", - "data_object_type": "Assembly Contigs" - }, - { - "_id": { - "$oid": "649b003d1ae706d7b5b14ecc" - }, - "description": "Metagenome Alignment BAM file for gold:Gp0127631", - "url": "https://data.microbiomedata.org/data/1781_100333/assembly/pairedMapped_sorted.bam", - "file_size_bytes": 2213957632, - "type": "nmdc:DataObject", - "id": "nmdc:39703cdbfb1e7fbb52a08061a05d8f4d", - "name": "pairedMapped_sorted.bam", - "data_object_type": "Assembly Coverage BAM" - }, - { - "_id": { - "$oid": "649b003d1ae706d7b5b14ed5" - }, - "description": "Assembled scaffold fasta for gold:Gp0127631", - "url": "https://data.microbiomedata.org/data/1781_100333/assembly/assembly_scaffolds.fna", - "file_size_bytes": 127049764, - "type": "nmdc:DataObject", - "id": "nmdc:7beaf6e386659d8d728720bb1ab2f2b9", - "name": "assembly_scaffolds.fna", - "data_object_type": "Assembly Scaffolds" - }, - { - "_id": { - "$oid": "649b003d1ae706d7b5b15b26" - }, - "id": "nmdc:22bf0b09cd9b46acaa8436ac81aec2f3", - "name": "1781_100333.json", - "description": "Gold:Gp0127631 ReadbasedAnalysis result JSON file", - "url": "https://data.microbiomedata.org/1781_100333/ReadbasedAnalysis/1781_100333.json", - "file_size_bytes": 3385, - "type": "nmdc:DataObject" - }, - { - "_id": { - "$oid": "649b003d1ae706d7b5b15b27" - }, - "id": "nmdc:d376ada84b52516325fc31f0f95fc1c4", - "name": "1781_100333.krona.html", - "description": "Gold:Gp0127631 KRONA plot HTML file", - "url": "https://data.microbiomedata.org/1781_100333/ReadbasedAnalysis/centrifuge/1781_100333.krona.html", - "file_size_bytes": 3385, - "data_object_type": "Centrifuge Krona Plot", - "type": "nmdc:DataObject" - }, - { - "_id": { - "$oid": "649b003f1ae706d7b5b165f1" - }, - "id": "nmdc:3d29d0956f968142b75f0ca9a03e3abb", - "name": "bins.tooShort.fa", - "description": "tooShort (< 3kb) filtered contigs fasta file by metaBat2 for gold:Gp0127631", - "file_size_bytes": 96541361, - "url": "https://data.microbiomedata.org/data/1781_100333/img_MAGs/bins.tooShort.fa", - "type": "nmdc:DataObject" - }, - { - "_id": { - "$oid": "649b003f1ae706d7b5b165f3" - }, - "id": "nmdc:0ee0d3a741d960268c288071b826ccb2", - "name": "checkm_qa.out", - "description": "metabat2 bin checkm quality assessment result for gold:Gp0127631", - "file_size_bytes": 930, - "url": "https://data.microbiomedata.org/data/1781_100333/img_MAGs/checkm_qa.out", - "type": "nmdc:DataObject", - "data_object_type": "CheckM Statistics" - }, - { - "_id": { - "$oid": "649b003f1ae706d7b5b165f4" - }, - "id": "nmdc:80e41aba95e325a6687edf730910288b", - "name": "gold:Gp0127631.bins.1.fa", - "description": "metabat2 binned contig file for gold:Gp0127631", - "file_size_bytes": 299114, - "url": "https://data.microbiomedata.org/data/1781_100333/img_MAGs/metabat-bins/bins.1.fa", - "type": "nmdc:DataObject", - "data_object_type": "Metagenome Bins" - }, - { - "_id": { - "$oid": "649b003f1ae706d7b5b165ff" - }, - "id": "nmdc:be8af74d4f97b2443a2dac045fa8af6d", - "name": "bins.unbinned.fa", - "description": "unbinned fasta file from metabat2 for gold:Gp0127631", - "file_size_bytes": 28526080, - "url": "https://data.microbiomedata.org/data/1781_100333/img_MAGs/bins.unbinned.fa", - "type": "nmdc:DataObject" - }, - { - "_id": { - "$oid": "649b003f1ae706d7b5b16629" - }, - "id": "nmdc:16ad3e594738a8fb4ee46a931ce0444c", - "name": "gold:Gp0127631.bins.2.fa", - "description": "metabat2 binned contig file for gold:Gp0127631", - "file_size_bytes": 372823, - "url": "https://data.microbiomedata.org/data/1781_100333/img_MAGs/metabat-bins/bins.2.fa", - "type": "nmdc:DataObject", - "data_object_type": "Metagenome Bins" - }, - { - "_id": { - "$oid": "649b00401ae706d7b5b16ce5" - }, - "description": "Functional annotation GFF file for gold:Gp0127631", - "url": "https://data.microbiomedata.org/1781_100333/img_annotation/Ga0482239_functional_annotation.gff", - "md5_checksum": "5723e7023b0e3994e92c7c5e72aa34ec", - "file_size_bytes": 3385, - "id": "nmdc:5723e7023b0e3994e92c7c5e72aa34ec", - "name": "gold:Gp0127631_Functional annotation GFF file", - "data_object_type": "Functional Annotation GFF", - "type": "nmdc:DataObject" - }, - { - "_id": { - "$oid": "649b00401ae706d7b5b16ce7" - }, - "description": "EC TSV File for gold:Gp0127631", - "url": "https://data.microbiomedata.org/1781_100333/img_annotation/Ga0482239_ec.tsv", - "md5_checksum": "ee276fe3eb490475ad3d7280a8c67464", - "file_size_bytes": 3385, - "id": "nmdc:ee276fe3eb490475ad3d7280a8c67464", - "name": "gold:Gp0127631_EC TSV File", - "data_object_type": "Annotation Enzyme Commission", - "type": "nmdc:DataObject" - }, - { - "_id": { - "$oid": "649b00401ae706d7b5b16ce8" - }, - "description": "Structural annotation GFF file for gold:Gp0127631", - "url": "https://data.microbiomedata.org/1781_100333/img_annotation/Ga0482239_structural_annotation.gff", - "md5_checksum": "d57f28027b2d6f82b96f5413bf8c9a59", - "file_size_bytes": 3385, - "id": "nmdc:d57f28027b2d6f82b96f5413bf8c9a59", - "name": "gold:Gp0127631_Structural annotation GFF file", - "data_object_type": "Structural Annotation GFF", - "type": "nmdc:DataObject" - }, - { - "_id": { - "$oid": "649b00401ae706d7b5b16cea" - }, - "description": "Protein FAA for gold:Gp0127631", - "url": "https://data.microbiomedata.org/1781_100333/img_annotation/Ga0482239_proteins.faa", - "md5_checksum": "04c97ac7af06bf37da8f1ffe827e454d", - "file_size_bytes": 3385, - "id": "nmdc:04c97ac7af06bf37da8f1ffe827e454d", - "name": "gold:Gp0127631_Protein FAA", - "data_object_type": "Annotation Amino Acid FASTA", - "type": "nmdc:DataObject" - }, - { - "_id": { - "$oid": "649b00401ae706d7b5b16cec" - }, - "description": "KO TSV File for gold:Gp0127631", - "url": "https://data.microbiomedata.org/1781_100333/img_annotation/Ga0482239_ko.tsv", - "md5_checksum": "e2ef79ef2b6669d93af5e90ba2c58fcf", - "file_size_bytes": 3385, - "id": "nmdc:e2ef79ef2b6669d93af5e90ba2c58fcf", - "name": "gold:Gp0127631_KO TSV File", - "data_object_type": "Annotation KEGG Orthology", - "type": "nmdc:DataObject" - } - ], - "dissolving_activity_set": [], - "functional_annotation_set": [], - "genome_feature_set": [], - "mags_activity_set": [ - { - "_id": { - "$oid": "649b0052ec087f6bbab34706" - }, - "has_input": [ - "nmdc:1eb44ff780f2aad1053ca336b53d7b98", - "nmdc:0ecd5e99ec93ba17c7b02483560bafdf", - "nmdc:2f6baf7176d2d904c02ae71875a8d326" - ], - "too_short_contig_num": 219869, - "part_of": [ - "nmdc:mga0jx8k09" - ], - "binned_contig_num": 506, - "git_url": "https://github.com/microbiomedata/metaG/releases/tag/0.1", - "has_output": [ - "nmdc:d41d8cd98f00b204e9800998ecf8427e", - "nmdc:53faea62cf1183292bc6fca374f75ed1", - "nmdc:7a6616d3262630c2aea2923e3c2683d0", - "nmdc:e16dde65e7229d69949c9e2dee7e2413", - "nmdc:58acda197bd8136a80d5047342008cdf", - "nmdc:8d5e2b8a8dede83c2f74182f506f9176" - ], - "was_informed_by": "gold:Gp0127631", - "input_contig_num": 237399, - "id": "nmdc:00a9e34a36f0ea767ff48aa2f411874f", - "execution_resource": "NERSC-Cori", - "name": "MAGs Analysis Activity for nmdc:mga0jx8k09", - "mags_list": [ - { - "number_of_contig": 151, - "completeness": 11.4, - "bin_name": "bins.1", - "gene_count": 748, - "bin_quality": "LQ", - "gtdbtk_species": "", - "gtdbtk_order": "", - "num_16s": 0, - "gtdbtk_family": "", - "gtdbtk_domain": "", - "contamination": 0.0, - "gtdbtk_class": "", - "gtdbtk_phylum": "", - "num_5s": 0, - "num_23s": 0, - "gtdbtk_genus": "", - "num_t_rna": 6 - }, - { - "number_of_contig": 268, - "completeness": 7.47, - "bin_name": "bins.2", - "gene_count": 1304, - "bin_quality": "LQ", - "gtdbtk_species": "", - "gtdbtk_order": "", - "num_16s": 0, - "gtdbtk_family": "", - "gtdbtk_domain": "", - "contamination": 0.0, - "gtdbtk_class": "", - "gtdbtk_phylum": "", - "num_5s": 0, - "num_23s": 0, - "gtdbtk_genus": "", - "num_t_rna": 12 - }, - { - "number_of_contig": 87, - "completeness": 13.32, - "bin_name": "bins.3", - "gene_count": 412, - "bin_quality": "LQ", - "gtdbtk_species": "", - "gtdbtk_order": "", - "num_16s": 0, - "gtdbtk_family": "", - "gtdbtk_domain": "", - "contamination": 0.0, - "gtdbtk_class": "", - "gtdbtk_phylum": "", - "num_5s": 0, - "num_23s": 0, - "gtdbtk_genus": "", - "num_t_rna": 4 - } - ], - "unbinned_contig_num": 17024, - "started_at_time": "2021-10-11T02:26:22Z", - "type": "nmdc:MAGsAnalysisActivity", - "ended_at_time": "2021-10-11T04:40:31+00:00" - } - ], - "material_sample_set": [], - "material_sampling_activity_set": [], - "metabolomics_analysis_activity_set": [], - "metagenome_annotation_activity_set": [ - { - "_id": { - "$oid": "649b005bbf2caae0415ef9a8" - }, - "has_input": [ - "nmdc:1eb44ff780f2aad1053ca336b53d7b98" - ], - "part_of": [ - "nmdc:mga0jx8k09" - ], - "git_url": "https://github.com/microbiomedata/metaG/releases/tag/0.1", - "has_output": [ - "nmdc:8c26f97b6a3196ed09dc4f54857d4972", - "nmdc:c7112633e322d7bc609bd479f7ddddb9", - "nmdc:2f6baf7176d2d904c02ae71875a8d326", - "nmdc:1abb9d211201bef0cb545e70a65de8cf", - "nmdc:985a23612611fb258d2dbaee1e4458f5", - "nmdc:44c3fa82e71af5647b7619b0dd8a0728", - "nmdc:fb70c00e07d0b93b12cacbded87dcea6", - "nmdc:98e1311ba5e96a176baccdb9a95439f9", - "nmdc:0685da4455dde2dec9f221b9356f008c", - "nmdc:14b7f064a3a2fad830fad893ff3257bc", - "nmdc:1b8b64c254f88dd9a8e3cd42bde7b7ba", - "nmdc:01769b6920ba82884f19ac3f88428db1" - ], - "was_informed_by": "gold:Gp0127631", - "id": "nmdc:00a9e34a36f0ea767ff48aa2f411874f", - "execution_resource": "NERSC-Cori", - "name": "Annotation Activity for nmdc:mga0jx8k09", - "started_at_time": "2021-10-11T02:26:22Z", - "type": "nmdc:MetagenomeAnnotationActivity", - "ended_at_time": "2021-10-11T04:40:31+00:00" - } - ], - "metagenome_assembly_set": [ - { - "_id": { - "$oid": "649b005f2ca5ee4adb139f94" - }, - "has_input": [ - "nmdc:6969fd7f4b1a5a34fb30d31b92cd6bf8" - ], - "part_of": [ - "nmdc:mga0jx8k09" - ], - "ctg_logsum": 306128, - "scaf_logsum": 307525, - "gap_pct": 0.00196, - "git_url": "https://github.com/microbiomedata/metaG/releases/tag/0.1", - "has_output": [ - "nmdc:1eb44ff780f2aad1053ca336b53d7b98", - "nmdc:992fb303b5ced60489fea0ce6dae71f9", - "nmdc:a0f466071ed249babf1a5653e1c20a02", - "nmdc:5eddebfbfabfd9c0e71c2699bee73870", - "nmdc:0ecd5e99ec93ba17c7b02483560bafdf" - ], - "asm_score": 3.117, - "was_informed_by": "gold:Gp0127631", - "ctg_powsum": 32898, - "scaf_max": 14244, - "id": "nmdc:00a9e34a36f0ea767ff48aa2f411874f", - "scaf_powsum": 33057, - "execution_resource": "NERSC-Cori", - "contigs": 237399, - "name": "Assembly Activity for nmdc:mga0jx8k09", - "ctg_max": 14244, - "gc_std": 0.09594, - "contig_bp": 119367623, - "gc_avg": 0.62364, - "started_at_time": "2021-10-11T02:26:22Z", - "scaf_bp": 119369963, - "type": "nmdc:MetagenomeAssembly", - "scaffolds": 237183, - "ended_at_time": "2021-10-11T04:40:31+00:00", - "ctg_l50": 499, - "ctg_l90": 292, - "ctg_n50": 64310, - "ctg_n90": 195626, - "scaf_l50": 500, - "scaf_l90": 292, - "scaf_n50": 64017, - "scaf_n90": 195424 - } - ], - "metagenome_sequencing_activity_set": [], - "metaproteomics_analysis_activity_set": [], - "metatranscriptome_activity_set": [], - "nom_analysis_activity_set": [], - "omics_processing_set": [ - { - "_id": { - "$oid": "649b009773e8249959349b40" - }, - "id": "nmdc:omprc-11-k8kt2j31", - "name": "Riverbed sediment microbial communities from areas with vegetation nearby in Columbia River, Washington, USA - GW-RW S1_20_30", - "description": "Riverbed sediment samples were collected from an area with a lot of surrounding vegetation in a hyporheic zone (subsurface groundwater - surface water mixing zone)", - "has_input": [ - "nmdc:bsm-11-4vqhvw07" - ], - "has_output": [ - "jgi:574fde5b7ded5e3df1ee13ff" - ], - "part_of": [ - "nmdc:sty-11-aygzgv51" - ], - "add_date": "2016-01-11", - "mod_date": "2021-06-15", - "ncbi_project_name": "Riverbed sediment microbial communities from areas with vegetation nearby in Columbia River, Washington, USA - GW-RW S1_20_30", - "omics_type": { - "has_raw_value": "Metagenome" - }, - "principal_investigator": { - "has_raw_value": "James Stegen" - }, - "processing_institution": "JGI", - "type": "nmdc:OmicsProcessing", - "gold_sequencing_project_identifiers": [ - "gold:Gp0127631" - ] - } - ], - "reaction_activity_set": [], - "read_qc_analysis_activity_set": [ - { - "_id": { - "$oid": "649b009d6bdd4fd20273c862" - }, - "has_input": [ - "nmdc:9c97e4b734b9cac731fe30fb07a32bb7" - ], - "part_of": [ - "nmdc:mga0jx8k09" - ], - "git_url": "https://github.com/microbiomedata/metaG/releases/tag/0.1", - "has_output": [ - "nmdc:6969fd7f4b1a5a34fb30d31b92cd6bf8", - "nmdc:b280141d234edf10cde8794539700654" - ], - "was_informed_by": "gold:Gp0127631", - "input_read_count": 26419652, - "output_read_bases": 3798930297, - "id": "nmdc:00a9e34a36f0ea767ff48aa2f411874f", - "execution_resource": "NERSC-Cori", - "input_read_bases": 3989367452, - "name": "Read QC Activity for nmdc:mga0jx8k09", - "output_read_count": 25434840, - "started_at_time": "2021-10-11T02:26:22Z", - "type": "nmdc:ReadQCAnalysisActivity", - "ended_at_time": "2021-10-11T04:40:31+00:00" - } - ], - "read_based_taxonomy_analysis_activity_set": [ - { - "_id": { - "$oid": "649b009bff710ae353f8cf22" - }, - "has_input": [ - "nmdc:6969fd7f4b1a5a34fb30d31b92cd6bf8" - ], - "git_url": "https://github.com/microbiomedata/metaG/releases/tag/0.1", - "has_output": [ - "nmdc:b78e8246144185beb95c0caf65ef1f1a", - "nmdc:8875c6ce19e13ed9a88447f2f78bb049", - "nmdc:3b0aee019c772a695bf4cc8f4a390f4e", - "nmdc:0d1729a83798b752f33eeb8d97afe972", - "nmdc:77561a0de3bb8aae04d110429fd9ad0c", - "nmdc:ea27c005b1788434c2198ad60939d4bc", - "nmdc:6a46583da876b9d6287302308df0b9fd", - "nmdc:af619dc5a0423509a4beaca26aa61000", - "nmdc:50093825ec73dcabe66aa353de766beb" - ], - "was_informed_by": "gold:Gp0127631", - "id": "nmdc:00a9e34a36f0ea767ff48aa2f411874f", - "execution_resource": "NERSC-Cori", - "name": "ReadBased Analysis Activity for nmdc:mga0jx8k09", - "started_at_time": "2021-10-11T02:26:22Z", - "type": "nmdc:ReadbasedAnalysis", - "ended_at_time": "2021-10-11T04:40:31+00:00" - } - ], - "study_set": [], - "field_research_site_set": [], - "collecting_biosamples_from_site_set": [], - "date_created": null, - "etl_software_version": null, - "pooling_set": [] - }, - { - "functional_annotation_agg": [], - "library_preparation_set": [], - "processed_sample_set": [], - "extraction_set": [], - "activity_set": [], - "biosample_set": [], - "data_object_set": [ - { - "description": "Raw sequencer read data", - "file_size_bytes": 2557650099, - "type": "nmdc:DataObject", - "id": "jgi:574fde7c7ded5e3df1ee1419", - "name": "10533.2.165322.TGTACAC-GGTGTAC.fastq.gz" - }, - { - "name": "Gp0127630_Filtered Reads", - "description": "Filtered Reads for Gp0127630", - "data_object_type": "Filtered Sequencing Reads", - "url": "https://data.microbiomedata.org/data/nmdc:mga09n3g47/qa/nmdc_mga09n3g47_filtered.fastq.gz", - "md5_checksum": "eaffb16b5247d85c08f8af73bcb8b65e", - "id": "nmdc:eaffb16b5247d85c08f8af73bcb8b65e", - "file_size_bytes": 2294158265 - }, - { - "name": "Gp0127630_Filtered Stats", - "description": "Filtered Stats for Gp0127630", - "data_object_type": "QC Statistics", - "url": "https://data.microbiomedata.org/data/nmdc:mga09n3g47/qa/nmdc_mga09n3g47_filterStats.txt", - "md5_checksum": "088fd18cb9169097e739289d2e5ebb13", - "id": "nmdc:088fd18cb9169097e739289d2e5ebb13", - "file_size_bytes": 288 - }, - { - "name": "Gp0127630_Gottcha2 TSV report", - "description": "Gottcha2 TSV report for Gp0127630", - "url": "https://data.microbiomedata.org/data/nmdc:mga09n3g47/ReadbasedAnalysis/nmdc_mga09n3g47_gottcha2_report.tsv", - "md5_checksum": "ad8aa7d317d86bcd1b33e6e68a917198", - "id": "nmdc:ad8aa7d317d86bcd1b33e6e68a917198", - "file_size_bytes": 3373 - }, - { - "name": "Gp0127630_Gottcha2 full TSV report", - "description": "Gottcha2 full TSV report for Gp0127630", - "url": "https://data.microbiomedata.org/data/nmdc:mga09n3g47/ReadbasedAnalysis/nmdc_mga09n3g47_gottcha2_report_full.tsv", - "md5_checksum": "e5f1da9ed5be2adcd65763d387387c9f", - "id": "nmdc:e5f1da9ed5be2adcd65763d387387c9f", - "file_size_bytes": 791488 - }, - { - "name": "Gp0127630_Gottcha2 Krona HTML report", - "description": "Gottcha2 Krona HTML report for Gp0127630", - "data_object_type": "GOTTCHA2 Krona Plot", - "url": "https://data.microbiomedata.org/data/nmdc:mga09n3g47/ReadbasedAnalysis/nmdc_mga09n3g47_gottcha2_krona.html", - "md5_checksum": "db82b41936f37bbbeaa027ffc25b58cd", - "id": "nmdc:db82b41936f37bbbeaa027ffc25b58cd", - "file_size_bytes": 235803 - }, - { - "name": "Gp0127630_Centrifuge classification TSV report", - "description": "Centrifuge classification TSV report for Gp0127630", - "data_object_type": "Centrifuge Taxonomic Classification", - "url": "https://data.microbiomedata.org/data/nmdc:mga09n3g47/ReadbasedAnalysis/nmdc_mga09n3g47_centrifuge_classification.tsv", - "md5_checksum": "2f21fd19f055d1931ab82016ed781a12", - "id": "nmdc:2f21fd19f055d1931ab82016ed781a12", - "file_size_bytes": 1974171566 - }, - { - "name": "Gp0127630_Centrifuge TSV report", - "description": "Centrifuge TSV report for Gp0127630", - "data_object_type": "Centrifuge Classification Report", - "url": "https://data.microbiomedata.org/data/nmdc:mga09n3g47/ReadbasedAnalysis/nmdc_mga09n3g47_centrifuge_report.tsv", - "md5_checksum": "890f494d1dd5e130d6c1688e78f27ff2", - "id": "nmdc:890f494d1dd5e130d6c1688e78f27ff2", - "file_size_bytes": 255012 - }, - { - "name": "Gp0127630_Centrifuge Krona HTML report", - "description": "Centrifuge Krona HTML report for Gp0127630", - "data_object_type": "Centrifuge Krona Plot", - "url": "https://data.microbiomedata.org/data/nmdc:mga09n3g47/ReadbasedAnalysis/nmdc_mga09n3g47_centrifuge_krona.html", - "md5_checksum": "813232a3034ddb9a05efc2f2e9b78cce", - "id": "nmdc:813232a3034ddb9a05efc2f2e9b78cce", - "file_size_bytes": 2330430 - }, - { - "name": "Gp0127630_Kraken classification TSV report", - "description": "Kraken classification TSV report for Gp0127630", - "data_object_type": "Kraken2 Taxonomic Classification", - "url": "https://data.microbiomedata.org/data/nmdc:mga09n3g47/ReadbasedAnalysis/nmdc_mga09n3g47_kraken2_classification.tsv", - "md5_checksum": "ef490241b537bb4c19bd5548cd7b7f6b", - "id": "nmdc:ef490241b537bb4c19bd5548cd7b7f6b", - "file_size_bytes": 1584744477 - }, - { - "name": "Gp0127630_Kraken2 TSV report", - "description": "Kraken2 TSV report for Gp0127630", - "data_object_type": "Kraken2 Classification Report", - "url": "https://data.microbiomedata.org/data/nmdc:mga09n3g47/ReadbasedAnalysis/nmdc_mga09n3g47_kraken2_report.tsv", - "md5_checksum": "6a7de24b01ad1c63ba6edb758e25af40", - "id": "nmdc:6a7de24b01ad1c63ba6edb758e25af40", - "file_size_bytes": 650172 - }, - { - "name": "Gp0127630_Kraken2 Krona HTML report", - "description": "Kraken2 Krona HTML report for Gp0127630", - "data_object_type": "Kraken2 Krona Plot", - "url": "https://data.microbiomedata.org/data/nmdc:mga09n3g47/ReadbasedAnalysis/nmdc_mga09n3g47_kraken2_krona.html", - "md5_checksum": "fc8a855916eb1ba0f7d278b7c1f1786f", - "id": "nmdc:fc8a855916eb1ba0f7d278b7c1f1786f", - "file_size_bytes": 3962195 - }, - { - "name": "Gp0127630_Assembled contigs fasta", - "description": "Assembled contigs fasta for Gp0127630", - "data_object_type": "Assembly Contigs", - "url": "https://data.microbiomedata.org/data/nmdc:mga09n3g47/assembly/nmdc_mga09n3g47_contigs.fna", - "md5_checksum": "7b35237c97a75f17ba74be0fe96416c9", - "id": "nmdc:7b35237c97a75f17ba74be0fe96416c9", - "file_size_bytes": 57511432 - }, - { - "name": "Gp0127630_Assembled scaffold fasta", - "description": "Assembled scaffold fasta for Gp0127630", - "data_object_type": "Assembly Scaffolds", - "url": "https://data.microbiomedata.org/data/nmdc:mga09n3g47/assembly/nmdc_mga09n3g47_scaffolds.fna", - "md5_checksum": "118dd6190bdaf127d3c105cc73012cc3", - "id": "nmdc:118dd6190bdaf127d3c105cc73012cc3", - "file_size_bytes": 57128690 - }, - { - "name": "Gp0127630_Metagenome Contig Coverage Stats", - "description": "Metagenome Contig Coverage Stats for Gp0127630", - "url": "https://data.microbiomedata.org/data/nmdc:mga09n3g47/assembly/nmdc_mga09n3g47_covstats.txt", - "md5_checksum": "9e129133978cb4c4cc4bae9fc28a8a49", - "id": "nmdc:9e129133978cb4c4cc4bae9fc28a8a49", - "file_size_bytes": 10020081 - }, - { - "name": "Gp0127630_Assembled AGP file", - "description": "Assembled AGP file for Gp0127630", - "data_object_type": "Assembly AGP", - "url": "https://data.microbiomedata.org/data/nmdc:mga09n3g47/assembly/nmdc_mga09n3g47_assembly.agp", - "md5_checksum": "33d86c437a046031ea2b4bed5a2d2d6b", - "id": "nmdc:33d86c437a046031ea2b4bed5a2d2d6b", - "file_size_bytes": 9337675 - }, - { - "name": "Gp0127630_Metagenome Alignment BAM file", - "description": "Metagenome Alignment BAM file for Gp0127630", - "data_object_type": "Assembly Coverage BAM", - "url": "https://data.microbiomedata.org/data/nmdc:mga09n3g47/assembly/nmdc_mga09n3g47_pairedMapped_sorted.bam", - "md5_checksum": "873f16e03e0f94c9ec28573fb10ad6d8", - "id": "nmdc:873f16e03e0f94c9ec28573fb10ad6d8", - "file_size_bytes": 2461822274 - }, - { - "name": "Gp0127630_Protein FAA", - "description": "Protein FAA for Gp0127630", - "data_object_type": "Annotation Amino Acid FASTA", - "url": "https://data.microbiomedata.org/data/nmdc:mga09n3g47/annotation/nmdc_mga09n3g47_proteins.faa", - "md5_checksum": "f7735eb161908954feda34285993f1b9", - "id": "nmdc:f7735eb161908954feda34285993f1b9", - "file_size_bytes": 34246728 - }, - { - "name": "Gp0127630_Structural annotation GFF file", - "description": "Structural annotation GFF file for Gp0127630", - "data_object_type": "Structural Annotation GFF", - "url": "https://data.microbiomedata.org/data/nmdc:mga09n3g47/annotation/nmdc_mga09n3g47_structural_annotation.gff", - "md5_checksum": "c6053080461e8cc0bbadd13e0775e108", - "id": "nmdc:c6053080461e8cc0bbadd13e0775e108", - "file_size_bytes": 2515 - }, - { - "name": "Gp0127630_Functional annotation GFF file", - "description": "Functional annotation GFF file for Gp0127630", - "data_object_type": "Functional Annotation GFF", - "url": "https://data.microbiomedata.org/data/nmdc:mga09n3g47/annotation/nmdc_mga09n3g47_functional_annotation.gff", - "md5_checksum": "4878e3d5a95e67c0bb81da53e03400be", - "id": "nmdc:4878e3d5a95e67c0bb81da53e03400be", - "file_size_bytes": 40345940 - }, - { - "name": "Gp0127630_KO TSV file", - "description": "KO TSV file for Gp0127630", - "data_object_type": "Annotation KEGG Orthology", - "url": "https://data.microbiomedata.org/data/nmdc:mga09n3g47/annotation/nmdc_mga09n3g47_ko.tsv", - "md5_checksum": "dbc4d4e179a86aa95211de3e62219191", - "id": "nmdc:dbc4d4e179a86aa95211de3e62219191", - "file_size_bytes": 4543233 - }, - { - "name": "Gp0127630_EC TSV file", - "description": "EC TSV file for Gp0127630", - "data_object_type": "Annotation Enzyme Commission", - "url": "https://data.microbiomedata.org/data/nmdc:mga09n3g47/annotation/nmdc_mga09n3g47_ec.tsv", - "md5_checksum": "5bdd96be3fbc888969d92c2ed6392846", - "id": "nmdc:5bdd96be3fbc888969d92c2ed6392846", - "file_size_bytes": 3027431 - }, - { - "name": "Gp0127630_COG GFF file", - "description": "COG GFF file for Gp0127630", - "url": "https://data.microbiomedata.org/data/nmdc:mga09n3g47/annotation/nmdc_mga09n3g47_cog.gff", - "md5_checksum": "78026e2afc7644463828fbbfa4d8d727", - "id": "nmdc:78026e2afc7644463828fbbfa4d8d727", - "file_size_bytes": 23085097 - }, - { - "name": "Gp0127630_PFAM GFF file", - "description": "PFAM GFF file for Gp0127630", - "url": "https://data.microbiomedata.org/data/nmdc:mga09n3g47/annotation/nmdc_mga09n3g47_pfam.gff", - "md5_checksum": "ef99a9afe80e1acc086694ca8ab4cca7", - "id": "nmdc:ef99a9afe80e1acc086694ca8ab4cca7", - "file_size_bytes": 16769237 - }, - { - "name": "Gp0127630_TigrFam GFF file", - "description": "TigrFam GFF file for Gp0127630", - "url": "https://data.microbiomedata.org/data/nmdc:mga09n3g47/annotation/nmdc_mga09n3g47_tigrfam.gff", - "md5_checksum": "f949efd8a6b6affb4707a4314980e86e", - "id": "nmdc:f949efd8a6b6affb4707a4314980e86e", - "file_size_bytes": 1710760 - }, - { - "name": "Gp0127630_SMART GFF file", - "description": "SMART GFF file for Gp0127630", - "url": "https://data.microbiomedata.org/data/nmdc:mga09n3g47/annotation/nmdc_mga09n3g47_smart.gff", - "md5_checksum": "2f9f0b8164c35117da1e121e63ad772f", - "id": "nmdc:2f9f0b8164c35117da1e121e63ad772f", - "file_size_bytes": 5166448 - }, - { - "name": "Gp0127630_SuperFam GFF file", - "description": "SuperFam GFF file for Gp0127630", - "url": "https://data.microbiomedata.org/data/nmdc:mga09n3g47/annotation/nmdc_mga09n3g47_supfam.gff", - "md5_checksum": "1e3d433d3cb308d086dec26916b6b1bf", - "id": "nmdc:1e3d433d3cb308d086dec26916b6b1bf", - "file_size_bytes": 29155547 - }, - { - "name": "Gp0127630_Cath FunFam GFF file", - "description": "Cath FunFam GFF file for Gp0127630", - "url": "https://data.microbiomedata.org/data/nmdc:mga09n3g47/annotation/nmdc_mga09n3g47_cath_funfam.gff", - "md5_checksum": "d467bd6407a5a41798aa84df69a4a31d", - "id": "nmdc:d467bd6407a5a41798aa84df69a4a31d", - "file_size_bytes": 21679406 - }, - { - "name": "Gp0127630_KO_EC GFF file", - "description": "KO_EC GFF file for Gp0127630", - "url": "https://data.microbiomedata.org/data/nmdc:mga09n3g47/annotation/nmdc_mga09n3g47_ko_ec.gff", - "md5_checksum": "4cb3db8f0ff98bf805f4750af65eb9d1", - "id": "nmdc:4cb3db8f0ff98bf805f4750af65eb9d1", - "file_size_bytes": 14461252 - }, - { - "name": "gold:Gp0452679_metabat2 bin checkm quality assessment result", - "description": "metabat2 bin checkm quality assessment result for gold:Gp0452679", - "data_object_type": "CheckM Statistics", - "url": "https://data.microbiomedata.org/data/nmdc:mga0fsjy84/MAGs/nmdc_mga0fsjy84_checkm_qa.out", - "md5_checksum": "d41d8cd98f00b204e9800998ecf8427e", - "id": "nmdc:d41d8cd98f00b204e9800998ecf8427e", - "file_size_bytes": 0 - }, - { - "name": "Gp0127630_tooShort (< 3kb) filtered contigs fasta file by metaBat2", - "description": "tooShort (< 3kb) filtered contigs fasta file by metaBat2 for Gp0127630", - "url": "https://data.microbiomedata.org/data/nmdc:mga09n3g47/MAGs/nmdc_mga09n3g47_bins.tooShort.fa", - "md5_checksum": "ce09d99bdfdf0379b09a3ae75c65d830", - "id": "nmdc:ce09d99bdfdf0379b09a3ae75c65d830", - "file_size_bytes": 50450286 - }, - { - "name": "Gp0127630_unbinned fasta file from metabat2", - "description": "unbinned fasta file from metabat2 for Gp0127630", - "url": "https://data.microbiomedata.org/data/nmdc:mga09n3g47/MAGs/nmdc_mga09n3g47_bins.unbinned.fa", - "md5_checksum": "acd651395108c71dd20eeebf9b177d06", - "id": "nmdc:acd651395108c71dd20eeebf9b177d06", - "file_size_bytes": 5114463 - }, - { - "name": "Gp0127630_metabat2 bin checkm quality assessment result", - "description": "metabat2 bin checkm quality assessment result for Gp0127630", - "data_object_type": "CheckM Statistics", - "url": "https://data.microbiomedata.org/data/nmdc:mga09n3g47/MAGs/nmdc_mga09n3g47_checkm_qa.out", - "md5_checksum": "850a6fbbd2993f4dfeb5a40485e67f8e", - "id": "nmdc:850a6fbbd2993f4dfeb5a40485e67f8e", - "file_size_bytes": 948 - }, - { - "name": "Gp0127630_high-quality and medium-quality bins", - "description": "high-quality and medium-quality bins for Gp0127630", - "data_object_type": "Metagenome Bins", - "url": "https://data.microbiomedata.org/data/nmdc:mga09n3g47/MAGs/nmdc_mga09n3g47_hqmq_bin.zip", - "md5_checksum": "287529453d35eab4acb72032a59994d0", - "id": "nmdc:287529453d35eab4acb72032a59994d0", - "file_size_bytes": 484667 - }, - { - "name": "Gp0127630_metabat2 bins", - "description": "metabat2 bins for Gp0127630", - "url": "https://data.microbiomedata.org/data/nmdc:mga09n3g47/MAGs/nmdc_mga09n3g47_metabat_bin.zip", - "md5_checksum": "4ad58f05545a75edc1b933a0b0286d16", - "id": "nmdc:4ad58f05545a75edc1b933a0b0286d16", - "file_size_bytes": 110526 - }, - { - "_id": { - "$oid": "649b003d1ae706d7b5b14e66" - }, - "description": "Metagenome Contig Coverage Stats for gold:Gp0127630", - "url": "https://data.microbiomedata.org/data/1781_100332/assembly/mapping_stats.txt", - "file_size_bytes": 9510797, - "type": "nmdc:DataObject", - "id": "nmdc:e4e89517e39bd367af05e5dc5849b32b", - "name": "mapping_stats.txt", - "data_object_type": "Assembly Coverage Stats" - }, - { - "_id": { - "$oid": "649b003d1ae706d7b5b14e68" - }, - "description": "Assembled contigs fasta for gold:Gp0127630", - "url": "https://data.microbiomedata.org/data/1781_100332/assembly/assembly_contigs.fna", - "file_size_bytes": 57002148, - "type": "nmdc:DataObject", - "id": "nmdc:c3958f0be344c850d06ee61865c95ff6", - "name": "assembly_contigs.fna", - "data_object_type": "Assembly Contigs" - }, - { - "_id": { - "$oid": "649b003d1ae706d7b5b14e6b" - }, - "description": "Assembled scaffold fasta for gold:Gp0127630", - "url": "https://data.microbiomedata.org/data/1781_100332/assembly/assembly_scaffolds.fna", - "file_size_bytes": 56619602, - "type": "nmdc:DataObject", - "id": "nmdc:354cac10ff205a59fffc795554aa3539", - "name": "assembly_scaffolds.fna", - "data_object_type": "Assembly Scaffolds" - }, - { - "_id": { - "$oid": "649b003d1ae706d7b5b14ebf" - }, - "description": "Assembled AGP file for gold:Gp0127630", - "url": "https://data.microbiomedata.org/data/1781_100332/assembly/assembly.agp", - "file_size_bytes": 8318715, - "type": "nmdc:DataObject", - "id": "nmdc:a08b36a85343a2f3dc45d62000a34274", - "name": "assembly.agp", - "data_object_type": "Assembly AGP" - }, - { - "_id": { - "$oid": "649b003d1ae706d7b5b14ec5" - }, - "description": "Metagenome Alignment BAM file for gold:Gp0127630", - "url": "https://data.microbiomedata.org/data/1781_100332/assembly/pairedMapped_sorted.bam", - "file_size_bytes": 2430916917, - "type": "nmdc:DataObject", - "id": "nmdc:1d1e719a8aa56730007392e34c0515a7", - "name": "pairedMapped_sorted.bam", - "data_object_type": "Assembly Coverage BAM" - }, - { - "_id": { - "$oid": "649b003d1ae706d7b5b15b1c" - }, - "id": "nmdc:82aac6da49d2ea7174e5786d247ceb42", - "name": "1781_100332.krona.html", - "description": "Gold:Gp0127630 KRONA plot HTML file", - "url": "https://data.microbiomedata.org/1781_100332/ReadbasedAnalysis/centrifuge/1781_100332.krona.html", - "file_size_bytes": 3385, - "data_object_type": "Centrifuge Krona Plot", - "type": "nmdc:DataObject" - }, - { - "_id": { - "$oid": "649b003d1ae706d7b5b15b2b" - }, - "id": "nmdc:65bf42cc5b458fd298f30d1df2cdb6d6", - "name": "1781_100332.json", - "description": "Gold:Gp0127630 ReadbasedAnalysis result JSON file", - "url": "https://data.microbiomedata.org/1781_100332/ReadbasedAnalysis/1781_100332.json", - "file_size_bytes": 3385, - "type": "nmdc:DataObject" - }, - { - "_id": { - "$oid": "649b003f1ae706d7b5b165d8" - }, - "id": "nmdc:1cc2fa15c0c8c54f427684eac47d9288", - "name": "bins.tooShort.fa", - "description": "tooShort (< 3kb) filtered contigs fasta file by metaBat2 for gold:Gp0127630", - "file_size_bytes": 48944272, - "url": "https://data.microbiomedata.org/data/1781_100332/img_MAGs/bins.tooShort.fa", - "type": "nmdc:DataObject" - }, - { - "_id": { - "$oid": "649b003f1ae706d7b5b165dc" - }, - "id": "nmdc:f19e68b486a6cfb5a09a20d9c388f679", - "name": "gold:Gp0127630.bins.1.fa", - "description": "metabat2 binned contig file for gold:Gp0127630", - "file_size_bytes": 211455, - "url": "https://data.microbiomedata.org/data/1781_100332/img_MAGs/metabat-bins/bins.1.fa", - "type": "nmdc:DataObject", - "data_object_type": "Metagenome Bins" - }, - { - "_id": { - "$oid": "649b003f1ae706d7b5b165e0" - }, - "id": "nmdc:904adbe6f49936fd689e59f7e970b4ab", - "name": "bins.unbinned.fa", - "description": "unbinned fasta file from metabat2 for gold:Gp0127630", - "file_size_bytes": 6557371, - "url": "https://data.microbiomedata.org/data/1781_100332/img_MAGs/bins.unbinned.fa", - "type": "nmdc:DataObject" - }, - { - "_id": { - "$oid": "649b003f1ae706d7b5b165e7" - }, - "id": "nmdc:abf67b79d7b9f94c9454eab172da8823", - "name": "checkm_qa.out", - "description": "metabat2 bin checkm quality assessment result for gold:Gp0127630", - "file_size_bytes": 918, - "url": "https://data.microbiomedata.org/data/1781_100332/img_MAGs/checkm_qa.out", - "type": "nmdc:DataObject", - "data_object_type": "CheckM Statistics" - }, - { - "_id": { - "$oid": "649b003f1ae706d7b5b165ea" - }, - "id": "nmdc:77678d37b9822be709b6ed462de42e71", - "name": "gold:Gp0127630.bins.2.fa", - "description": "metabat2 binned contig file for gold:Gp0127630", - "file_size_bytes": 254378, - "url": "https://data.microbiomedata.org/data/1781_100332/img_MAGs/metabat-bins/bins.2.fa", - "type": "nmdc:DataObject", - "data_object_type": "Metagenome Bins" - }, - { - "_id": { - "$oid": "649b003f1ae706d7b5b16cde" - }, - "description": "EC TSV File for gold:Gp0127630", - "url": "https://data.microbiomedata.org/1781_100332/img_annotation/Ga0482240_ec.tsv", - "md5_checksum": "81ab86211731bc0547d3e8f8786c3e8b", - "file_size_bytes": 3385, - "id": "nmdc:81ab86211731bc0547d3e8f8786c3e8b", - "name": "gold:Gp0127630_EC TSV File", - "data_object_type": "Annotation Enzyme Commission", - "type": "nmdc:DataObject" - }, - { - "_id": { - "$oid": "649b00401ae706d7b5b16cdf" - }, - "description": "Protein FAA for gold:Gp0127630", - "url": "https://data.microbiomedata.org/1781_100332/img_annotation/Ga0482240_proteins.faa", - "md5_checksum": "6bca5ad106b3519416205a82d3a14b16", - "file_size_bytes": 3385, - "id": "nmdc:6bca5ad106b3519416205a82d3a14b16", - "name": "gold:Gp0127630_Protein FAA", - "data_object_type": "Annotation Amino Acid FASTA", - "type": "nmdc:DataObject" - }, - { - "_id": { - "$oid": "649b00401ae706d7b5b16ce3" - }, - "description": "Functional annotation GFF file for gold:Gp0127630", - "url": "https://data.microbiomedata.org/1781_100332/img_annotation/Ga0482240_functional_annotation.gff", - "md5_checksum": "070f0952308650d35ae05c4fed188677", - "file_size_bytes": 3385, - "id": "nmdc:070f0952308650d35ae05c4fed188677", - "name": "gold:Gp0127630_Functional annotation GFF file", - "data_object_type": "Functional Annotation GFF", - "type": "nmdc:DataObject" - }, - { - "_id": { - "$oid": "649b00401ae706d7b5b16ce4" - }, - "description": "KO TSV File for gold:Gp0127630", - "url": "https://data.microbiomedata.org/1781_100332/img_annotation/Ga0482240_ko.tsv", - "md5_checksum": "c6b5f388349af0214d65d1357026c7ee", - "file_size_bytes": 3385, - "id": "nmdc:c6b5f388349af0214d65d1357026c7ee", - "name": "gold:Gp0127630_KO TSV File", - "data_object_type": "Annotation KEGG Orthology", - "type": "nmdc:DataObject" - }, - { - "_id": { - "$oid": "649b00401ae706d7b5b16ce6" - }, - "description": "Structural annotation GFF file for gold:Gp0127630", - "url": "https://data.microbiomedata.org/1781_100332/img_annotation/Ga0482240_structural_annotation.gff", - "md5_checksum": "f921989651475b06052058126db54de9", - "file_size_bytes": 3385, - "id": "nmdc:f921989651475b06052058126db54de9", - "name": "gold:Gp0127630_Structural annotation GFF file", - "data_object_type": "Structural Annotation GFF", - "type": "nmdc:DataObject" - } - ], - "dissolving_activity_set": [], - "functional_annotation_set": [], - "genome_feature_set": [], - "mags_activity_set": [ - { - "_id": { - "$oid": "649b0052ec087f6bbab34707" - }, - "has_input": [ - "nmdc:7b35237c97a75f17ba74be0fe96416c9", - "nmdc:873f16e03e0f94c9ec28573fb10ad6d8", - "nmdc:4878e3d5a95e67c0bb81da53e03400be" - ], - "too_short_contig_num": 123771, - "part_of": [ - "nmdc:mga09n3g47" - ], - "binned_contig_num": 313, - "git_url": "https://github.com/microbiomedata/metaG/releases/tag/0.1", - "has_output": [ - "nmdc:d41d8cd98f00b204e9800998ecf8427e", - "nmdc:ce09d99bdfdf0379b09a3ae75c65d830", - "nmdc:acd651395108c71dd20eeebf9b177d06", - "nmdc:850a6fbbd2993f4dfeb5a40485e67f8e", - "nmdc:287529453d35eab4acb72032a59994d0", - "nmdc:4ad58f05545a75edc1b933a0b0286d16" - ], - "was_informed_by": "gold:Gp0127630", - "input_contig_num": 127321, - "id": "nmdc:9fb85875014e14636fbb93d97d62f4bd", - "execution_resource": "NERSC-Cori", - "name": "MAGs Analysis Activity for nmdc:mga09n3g47", - "mags_list": [ - { - "number_of_contig": 86, - "completeness": 19.9, - "bin_name": "bins.1", - "gene_count": 422, - "bin_quality": "LQ", - "gtdbtk_species": "", - "gtdbtk_order": "", - "num_16s": 0, - "gtdbtk_family": "", - "gtdbtk_domain": "", - "contamination": 0.0, - "gtdbtk_class": "", - "gtdbtk_phylum": "", - "num_5s": 0, - "num_23s": 0, - "gtdbtk_genus": "", - "num_t_rna": 7 - }, - { - "number_of_contig": 227, - "completeness": 70.23, - "bin_name": "bins.2", - "gene_count": 1932, - "bin_quality": "MQ", - "gtdbtk_species": "", - "gtdbtk_order": "Nitrososphaerales", - "num_16s": 0, - "gtdbtk_family": "Nitrososphaeraceae", - "gtdbtk_domain": "Archaea", - "contamination": 1.94, - "gtdbtk_class": "Nitrososphaeria", - "gtdbtk_phylum": "Crenarchaeota", - "num_5s": 1, - "num_23s": 0, - "gtdbtk_genus": "", - "num_t_rna": 35 - } - ], - "unbinned_contig_num": 3237, - "started_at_time": "2021-10-11T02:26:53Z", - "type": "nmdc:MAGsAnalysisActivity", - "ended_at_time": "2021-10-11T04:54:22+00:00" - } - ], - "material_sample_set": [], - "material_sampling_activity_set": [], - "metabolomics_analysis_activity_set": [], - "metagenome_annotation_activity_set": [ - { - "_id": { - "$oid": "649b005bbf2caae0415ef9ad" - }, - "has_input": [ - "nmdc:7b35237c97a75f17ba74be0fe96416c9" - ], - "part_of": [ - "nmdc:mga09n3g47" - ], - "git_url": "https://github.com/microbiomedata/metaG/releases/tag/0.1", - "has_output": [ - "nmdc:f7735eb161908954feda34285993f1b9", - "nmdc:c6053080461e8cc0bbadd13e0775e108", - "nmdc:4878e3d5a95e67c0bb81da53e03400be", - "nmdc:dbc4d4e179a86aa95211de3e62219191", - "nmdc:5bdd96be3fbc888969d92c2ed6392846", - "nmdc:78026e2afc7644463828fbbfa4d8d727", - "nmdc:ef99a9afe80e1acc086694ca8ab4cca7", - "nmdc:f949efd8a6b6affb4707a4314980e86e", - "nmdc:2f9f0b8164c35117da1e121e63ad772f", - "nmdc:1e3d433d3cb308d086dec26916b6b1bf", - "nmdc:d467bd6407a5a41798aa84df69a4a31d", - "nmdc:4cb3db8f0ff98bf805f4750af65eb9d1" - ], - "was_informed_by": "gold:Gp0127630", - "id": "nmdc:9fb85875014e14636fbb93d97d62f4bd", - "execution_resource": "NERSC-Cori", - "name": "Annotation Activity for nmdc:mga09n3g47", - "started_at_time": "2021-10-11T02:26:53Z", - "type": "nmdc:MetagenomeAnnotationActivity", - "ended_at_time": "2021-10-11T04:54:22+00:00" - } - ], - "metagenome_assembly_set": [ - { - "_id": { - "$oid": "649b005f2ca5ee4adb139f95" - }, - "has_input": [ - "nmdc:eaffb16b5247d85c08f8af73bcb8b65e" - ], - "part_of": [ - "nmdc:mga09n3g47" - ], - "ctg_logsum": 77070, - "scaf_logsum": 77428, - "gap_pct": 0.00093, - "git_url": "https://github.com/microbiomedata/metaG/releases/tag/0.1", - "has_output": [ - "nmdc:7b35237c97a75f17ba74be0fe96416c9", - "nmdc:118dd6190bdaf127d3c105cc73012cc3", - "nmdc:9e129133978cb4c4cc4bae9fc28a8a49", - "nmdc:33d86c437a046031ea2b4bed5a2d2d6b", - "nmdc:873f16e03e0f94c9ec28573fb10ad6d8" - ], - "asm_score": 6.312, - "was_informed_by": "gold:Gp0127630", - "ctg_powsum": 8755.579, - "scaf_max": 31136, - "id": "nmdc:9fb85875014e14636fbb93d97d62f4bd", - "scaf_powsum": 8795.268, - "execution_resource": "NERSC-Cori", - "contigs": 127321, - "name": "Assembly Activity for nmdc:mga09n3g47", - "ctg_max": 31136, - "gc_std": 0.09346, - "contig_bp": 52740992, - "gc_avg": 0.61288, - "started_at_time": "2021-10-11T02:26:53Z", - "scaf_bp": 52741482, - "type": "nmdc:MetagenomeAssembly", - "scaffolds": 127272, - "ended_at_time": "2021-10-11T04:54:22+00:00", - "ctg_l50": 372, - "ctg_l90": 284, - "ctg_n50": 41888, - "ctg_n90": 110882, - "scaf_l50": 372, - "scaf_l90": 284, - "scaf_n50": 41856, - "scaf_n90": 110834 - } - ], - "metagenome_sequencing_activity_set": [], - "metaproteomics_analysis_activity_set": [], - "metatranscriptome_activity_set": [], - "nom_analysis_activity_set": [], - "omics_processing_set": [ - { - "_id": { - "$oid": "649b009773e8249959349b41" - }, - "id": "nmdc:omprc-11-9pbab972", - "name": "Riverbed sediment microbial communities from areas with no vegetation in Columbia River, Washington, USA - GW-RW N3_30_40", - "description": "Riverbed sediment samples were collected from an area with no vegetation in a hyporheic zone (subsurface groundwater - surface water mixing zone)", - "has_input": [ - "nmdc:bsm-11-3yjh4z33" - ], - "has_output": [ - "jgi:574fde7c7ded5e3df1ee1419" - ], - "part_of": [ - "nmdc:sty-11-aygzgv51" - ], - "add_date": "2016-01-11", - "mod_date": "2021-06-15", - "ncbi_project_name": "Riverbed sediment microbial communities from areas with no vegetation in Columbia River, Washington, USA - GW-RW N3_30_40", - "omics_type": { - "has_raw_value": "Metagenome" - }, - "principal_investigator": { - "has_raw_value": "James Stegen" - }, - "processing_institution": "JGI", - "type": "nmdc:OmicsProcessing", - "gold_sequencing_project_identifiers": [ - "gold:Gp0127630" - ] - } - ], - "reaction_activity_set": [], - "read_qc_analysis_activity_set": [ - { - "_id": { - "$oid": "649b009d6bdd4fd20273c865" - }, - "has_input": [ - "nmdc:0e737a8e36535f70bff074004ee1f9c0" - ], - "part_of": [ - "nmdc:mga09n3g47" - ], - "git_url": "https://github.com/microbiomedata/metaG/releases/tag/0.1", - "has_output": [ - "nmdc:eaffb16b5247d85c08f8af73bcb8b65e", - "nmdc:088fd18cb9169097e739289d2e5ebb13" - ], - "was_informed_by": "gold:Gp0127630", - "input_read_count": 28569382, - "output_read_bases": 4016672570, - "id": "nmdc:9fb85875014e14636fbb93d97d62f4bd", - "execution_resource": "NERSC-Cori", - "input_read_bases": 4313976682, - "name": "Read QC Activity for nmdc:mga09n3g47", - "output_read_count": 26868700, - "started_at_time": "2021-10-11T02:26:53Z", - "type": "nmdc:ReadQCAnalysisActivity", - "ended_at_time": "2021-10-11T04:54:22+00:00" - } - ], - "read_based_taxonomy_analysis_activity_set": [ - { - "_id": { - "$oid": "649b009bff710ae353f8cf27" - }, - "has_input": [ - "nmdc:eaffb16b5247d85c08f8af73bcb8b65e" - ], - "git_url": "https://github.com/microbiomedata/metaG/releases/tag/0.1", - "has_output": [ - "nmdc:ad8aa7d317d86bcd1b33e6e68a917198", - "nmdc:e5f1da9ed5be2adcd65763d387387c9f", - "nmdc:db82b41936f37bbbeaa027ffc25b58cd", - "nmdc:2f21fd19f055d1931ab82016ed781a12", - "nmdc:890f494d1dd5e130d6c1688e78f27ff2", - "nmdc:813232a3034ddb9a05efc2f2e9b78cce", - "nmdc:ef490241b537bb4c19bd5548cd7b7f6b", - "nmdc:6a7de24b01ad1c63ba6edb758e25af40", - "nmdc:fc8a855916eb1ba0f7d278b7c1f1786f" - ], - "was_informed_by": "gold:Gp0127630", - "id": "nmdc:9fb85875014e14636fbb93d97d62f4bd", - "execution_resource": "NERSC-Cori", - "name": "ReadBased Analysis Activity for nmdc:mga09n3g47", - "started_at_time": "2021-10-11T02:26:53Z", - "type": "nmdc:ReadbasedAnalysis", - "ended_at_time": "2021-10-11T04:54:22+00:00" - } - ], - "study_set": [], - "field_research_site_set": [], - "collecting_biosamples_from_site_set": [], - "date_created": null, - "etl_software_version": null, - "pooling_set": [] - }, - { - "functional_annotation_agg": [], - "library_preparation_set": [], - "processed_sample_set": [], - "extraction_set": [], - "activity_set": [], - "biosample_set": [], - "data_object_set": [ - { - "description": "Raw sequencer read data", - "file_size_bytes": 2003194973, - "type": "nmdc:DataObject", - "id": "jgi:574fde5e7ded5e3df1ee1401", - "name": "10533.1.165310.GGACTGT-AACAGTC.fastq.gz" - }, - { - "name": "Gp0127633_Filtered Reads", - "description": "Filtered Reads for Gp0127633", - "data_object_type": "Filtered Sequencing Reads", - "url": "https://data.microbiomedata.org/data/nmdc:mga05zvf81/qa/nmdc_mga05zvf81_filtered.fastq.gz", - "md5_checksum": "7cbd497624d8b60ab2a5e7fdbe4730f2", - "id": "nmdc:7cbd497624d8b60ab2a5e7fdbe4730f2", - "file_size_bytes": 1727224362 - }, - { - "name": "Gp0127633_Filtered Stats", - "description": "Filtered Stats for Gp0127633", - "data_object_type": "QC Statistics", - "url": "https://data.microbiomedata.org/data/nmdc:mga05zvf81/qa/nmdc_mga05zvf81_filterStats.txt", - "md5_checksum": "eccf0501d08f920a88b6598d573a8e3e", - "id": "nmdc:eccf0501d08f920a88b6598d573a8e3e", - "file_size_bytes": 280 - }, - { - "name": "Gp0127633_Gottcha2 TSV report", - "description": "Gottcha2 TSV report for Gp0127633", - "url": "https://data.microbiomedata.org/data/nmdc:mga05zvf81/ReadbasedAnalysis/nmdc_mga05zvf81_gottcha2_report.tsv", - "md5_checksum": "8bd9eb762acabbac5d079c379c28e381", - "id": "nmdc:8bd9eb762acabbac5d079c379c28e381", - "file_size_bytes": 875 - }, - { - "name": "Gp0127633_Gottcha2 full TSV report", - "description": "Gottcha2 full TSV report for Gp0127633", - "url": "https://data.microbiomedata.org/data/nmdc:mga05zvf81/ReadbasedAnalysis/nmdc_mga05zvf81_gottcha2_report_full.tsv", - "md5_checksum": "77351dd18ca40e5552ac1380ba94acbf", - "id": "nmdc:77351dd18ca40e5552ac1380ba94acbf", - "file_size_bytes": 578856 - }, - { - "name": "Gp0127633_Gottcha2 Krona HTML report", - "description": "Gottcha2 Krona HTML report for Gp0127633", - "data_object_type": "GOTTCHA2 Krona Plot", - "url": "https://data.microbiomedata.org/data/nmdc:mga05zvf81/ReadbasedAnalysis/nmdc_mga05zvf81_gottcha2_krona.html", - "md5_checksum": "f445af1a7774572d156f55a898d26f09", - "id": "nmdc:f445af1a7774572d156f55a898d26f09", - "file_size_bytes": 228067 - }, - { - "name": "Gp0127633_Centrifuge classification TSV report", - "description": "Centrifuge classification TSV report for Gp0127633", - "data_object_type": "Centrifuge Taxonomic Classification", - "url": "https://data.microbiomedata.org/data/nmdc:mga05zvf81/ReadbasedAnalysis/nmdc_mga05zvf81_centrifuge_classification.tsv", - "md5_checksum": "e11fcbf66318878c05984fa3d893e3b7", - "id": "nmdc:e11fcbf66318878c05984fa3d893e3b7", - "file_size_bytes": 1646942155 - }, - { - "name": "Gp0127633_Centrifuge TSV report", - "description": "Centrifuge TSV report for Gp0127633", - "data_object_type": "Centrifuge Classification Report", - "url": "https://data.microbiomedata.org/data/nmdc:mga05zvf81/ReadbasedAnalysis/nmdc_mga05zvf81_centrifuge_report.tsv", - "md5_checksum": "28beb8baabdaf346f2066b40f375a152", - "id": "nmdc:28beb8baabdaf346f2066b40f375a152", - "file_size_bytes": 252735 - }, - { - "name": "Gp0127633_Centrifuge Krona HTML report", - "description": "Centrifuge Krona HTML report for Gp0127633", - "data_object_type": "Centrifuge Krona Plot", - "url": "https://data.microbiomedata.org/data/nmdc:mga05zvf81/ReadbasedAnalysis/nmdc_mga05zvf81_centrifuge_krona.html", - "md5_checksum": "1f74a43724c4afed5563499d05601e22", - "id": "nmdc:1f74a43724c4afed5563499d05601e22", - "file_size_bytes": 2329168 - }, - { - "name": "Gp0127633_Kraken classification TSV report", - "description": "Kraken classification TSV report for Gp0127633", - "data_object_type": "Kraken2 Taxonomic Classification", - "url": "https://data.microbiomedata.org/data/nmdc:mga05zvf81/ReadbasedAnalysis/nmdc_mga05zvf81_kraken2_classification.tsv", - "md5_checksum": "4825177c6d0a8b67db82e6070cfbc35f", - "id": "nmdc:4825177c6d0a8b67db82e6070cfbc35f", - "file_size_bytes": 1310443491 - }, - { - "name": "Gp0127633_Kraken2 TSV report", - "description": "Kraken2 TSV report for Gp0127633", - "data_object_type": "Kraken2 Classification Report", - "url": "https://data.microbiomedata.org/data/nmdc:mga05zvf81/ReadbasedAnalysis/nmdc_mga05zvf81_kraken2_report.tsv", - "md5_checksum": "275268a6b5aca33c427d11877bcfa674", - "id": "nmdc:275268a6b5aca33c427d11877bcfa674", - "file_size_bytes": 621441 - }, - { - "name": "Gp0127633_Kraken2 Krona HTML report", - "description": "Kraken2 Krona HTML report for Gp0127633", - "data_object_type": "Kraken2 Krona Plot", - "url": "https://data.microbiomedata.org/data/nmdc:mga05zvf81/ReadbasedAnalysis/nmdc_mga05zvf81_kraken2_krona.html", - "md5_checksum": "89e810af4915f0e117eaa60550587453", - "id": "nmdc:89e810af4915f0e117eaa60550587453", - "file_size_bytes": 3891844 - }, - { - "name": "Gp0127633_Assembled contigs fasta", - "description": "Assembled contigs fasta for Gp0127633", - "data_object_type": "Assembly Contigs", - "url": "https://data.microbiomedata.org/data/nmdc:mga05zvf81/assembly/nmdc_mga05zvf81_contigs.fna", - "md5_checksum": "ea5ca9478871b3e2600e1df0d748cbef", - "id": "nmdc:ea5ca9478871b3e2600e1df0d748cbef", - "file_size_bytes": 152814586 - }, - { - "name": "Gp0127633_Assembled scaffold fasta", - "description": "Assembled scaffold fasta for Gp0127633", - "data_object_type": "Assembly Scaffolds", - "url": "https://data.microbiomedata.org/data/nmdc:mga05zvf81/assembly/nmdc_mga05zvf81_scaffolds.fna", - "md5_checksum": "327e130872e4c5faac2f1c9f8dea2316", - "id": "nmdc:327e130872e4c5faac2f1c9f8dea2316", - "file_size_bytes": 151993436 - }, - { - "name": "Gp0127633_Metagenome Contig Coverage Stats", - "description": "Metagenome Contig Coverage Stats for Gp0127633", - "url": "https://data.microbiomedata.org/data/nmdc:mga05zvf81/assembly/nmdc_mga05zvf81_covstats.txt", - "md5_checksum": "f61f1e62791a38beae95bd95833a6784", - "id": "nmdc:f61f1e62791a38beae95bd95833a6784", - "file_size_bytes": 21678212 - }, - { - "name": "Gp0127633_Assembled AGP file", - "description": "Assembled AGP file for Gp0127633", - "data_object_type": "Assembly AGP", - "url": "https://data.microbiomedata.org/data/nmdc:mga05zvf81/assembly/nmdc_mga05zvf81_assembly.agp", - "md5_checksum": "416254a3bfc685dd16c11d65a222305f", - "id": "nmdc:416254a3bfc685dd16c11d65a222305f", - "file_size_bytes": 20304047 - }, - { - "name": "Gp0127633_Metagenome Alignment BAM file", - "description": "Metagenome Alignment BAM file for Gp0127633", - "data_object_type": "Assembly Coverage BAM", - "url": "https://data.microbiomedata.org/data/nmdc:mga05zvf81/assembly/nmdc_mga05zvf81_pairedMapped_sorted.bam", - "md5_checksum": "bc054294600fa310924f104484effd3e", - "id": "nmdc:bc054294600fa310924f104484effd3e", - "file_size_bytes": 1959649749 - }, - { - "name": "Gp0127633_Protein FAA", - "description": "Protein FAA for Gp0127633", - "data_object_type": "Annotation Amino Acid FASTA", - "url": "https://data.microbiomedata.org/data/nmdc:mga05zvf81/annotation/nmdc_mga05zvf81_proteins.faa", - "md5_checksum": "8defcf55f08cd56d8b2560e27f490ca5", - "id": "nmdc:8defcf55f08cd56d8b2560e27f490ca5", - "file_size_bytes": 85918779 - }, - { - "name": "Gp0127633_Structural annotation GFF file", - "description": "Structural annotation GFF file for Gp0127633", - "data_object_type": "Structural Annotation GFF", - "url": "https://data.microbiomedata.org/data/nmdc:mga05zvf81/annotation/nmdc_mga05zvf81_structural_annotation.gff", - "md5_checksum": "a6031c0a101419dd413a0804937425ca", - "id": "nmdc:a6031c0a101419dd413a0804937425ca", - "file_size_bytes": 2527 - }, - { - "name": "Gp0127633_Functional annotation GFF file", - "description": "Functional annotation GFF file for Gp0127633", - "data_object_type": "Functional Annotation GFF", - "url": "https://data.microbiomedata.org/data/nmdc:mga05zvf81/annotation/nmdc_mga05zvf81_functional_annotation.gff", - "md5_checksum": "43069b1146c84c064b7ff334dc9ff100", - "id": "nmdc:43069b1146c84c064b7ff334dc9ff100", - "file_size_bytes": 95647963 - }, - { - "name": "Gp0127633_KO TSV file", - "description": "KO TSV file for Gp0127633", - "data_object_type": "Annotation KEGG Orthology", - "url": "https://data.microbiomedata.org/data/nmdc:mga05zvf81/annotation/nmdc_mga05zvf81_ko.tsv", - "md5_checksum": "acc5a2c445dc6e00668c9a5d50aecdb8", - "id": "nmdc:acc5a2c445dc6e00668c9a5d50aecdb8", - "file_size_bytes": 10638485 - }, - { - "name": "Gp0127633_EC TSV file", - "description": "EC TSV file for Gp0127633", - "data_object_type": "Annotation Enzyme Commission", - "url": "https://data.microbiomedata.org/data/nmdc:mga05zvf81/annotation/nmdc_mga05zvf81_ec.tsv", - "md5_checksum": "ec91d5d7a8af4fb845e22cbe7ab82bde", - "id": "nmdc:ec91d5d7a8af4fb845e22cbe7ab82bde", - "file_size_bytes": 6991172 - }, - { - "name": "Gp0127633_COG GFF file", - "description": "COG GFF file for Gp0127633", - "url": "https://data.microbiomedata.org/data/nmdc:mga05zvf81/annotation/nmdc_mga05zvf81_cog.gff", - "md5_checksum": "3cd238ff1bb176b7a159aeb34a7c4683", - "id": "nmdc:3cd238ff1bb176b7a159aeb34a7c4683", - "file_size_bytes": 56525933 - }, - { - "name": "Gp0127633_PFAM GFF file", - "description": "PFAM GFF file for Gp0127633", - "url": "https://data.microbiomedata.org/data/nmdc:mga05zvf81/annotation/nmdc_mga05zvf81_pfam.gff", - "md5_checksum": "5103ea2a481ea3b82f1aa98ab7a36998", - "id": "nmdc:5103ea2a481ea3b82f1aa98ab7a36998", - "file_size_bytes": 43189711 - }, - { - "name": "Gp0127633_TigrFam GFF file", - "description": "TigrFam GFF file for Gp0127633", - "url": "https://data.microbiomedata.org/data/nmdc:mga05zvf81/annotation/nmdc_mga05zvf81_tigrfam.gff", - "md5_checksum": "8f7429420cbefb9e27bcdbe6252e5288", - "id": "nmdc:8f7429420cbefb9e27bcdbe6252e5288", - "file_size_bytes": 4806086 - }, - { - "name": "Gp0127633_SMART GFF file", - "description": "SMART GFF file for Gp0127633", - "url": "https://data.microbiomedata.org/data/nmdc:mga05zvf81/annotation/nmdc_mga05zvf81_smart.gff", - "md5_checksum": "6d69127dc30609e4861a7b2443b99164", - "id": "nmdc:6d69127dc30609e4861a7b2443b99164", - "file_size_bytes": 12776467 - }, - { - "name": "Gp0127633_SuperFam GFF file", - "description": "SuperFam GFF file for Gp0127633", - "url": "https://data.microbiomedata.org/data/nmdc:mga05zvf81/annotation/nmdc_mga05zvf81_supfam.gff", - "md5_checksum": "00243bcaf50313d937a7685380a876bb", - "id": "nmdc:00243bcaf50313d937a7685380a876bb", - "file_size_bytes": 70607320 - }, - { - "name": "Gp0127633_Cath FunFam GFF file", - "description": "Cath FunFam GFF file for Gp0127633", - "url": "https://data.microbiomedata.org/data/nmdc:mga05zvf81/annotation/nmdc_mga05zvf81_cath_funfam.gff", - "md5_checksum": "ec6ffd40772dee9d48dbec0beb6b3321", - "id": "nmdc:ec6ffd40772dee9d48dbec0beb6b3321", - "file_size_bytes": 53950895 - }, - { - "name": "Gp0127633_KO_EC GFF file", - "description": "KO_EC GFF file for Gp0127633", - "url": "https://data.microbiomedata.org/data/nmdc:mga05zvf81/annotation/nmdc_mga05zvf81_ko_ec.gff", - "md5_checksum": "907439e314b4f4623244e2cec8532098", - "id": "nmdc:907439e314b4f4623244e2cec8532098", - "file_size_bytes": 33781965 - }, - { - "name": "gold:Gp0452679_metabat2 bin checkm quality assessment result", - "description": "metabat2 bin checkm quality assessment result for gold:Gp0452679", - "data_object_type": "CheckM Statistics", - "url": "https://data.microbiomedata.org/data/nmdc:mga0fsjy84/MAGs/nmdc_mga0fsjy84_checkm_qa.out", - "md5_checksum": "d41d8cd98f00b204e9800998ecf8427e", - "id": "nmdc:d41d8cd98f00b204e9800998ecf8427e", - "file_size_bytes": 0 - }, - { - "name": "Gp0127633_tooShort (< 3kb) filtered contigs fasta file by metaBat2", - "description": "tooShort (< 3kb) filtered contigs fasta file by metaBat2 for Gp0127633", - "url": "https://data.microbiomedata.org/data/nmdc:mga05zvf81/MAGs/nmdc_mga05zvf81_bins.tooShort.fa", - "md5_checksum": "00415cf72f9a77f907e3467a08b123c5", - "id": "nmdc:00415cf72f9a77f907e3467a08b123c5", - "file_size_bytes": 116930318 - }, - { - "name": "Gp0127633_unbinned fasta file from metabat2", - "description": "unbinned fasta file from metabat2 for Gp0127633", - "url": "https://data.microbiomedata.org/data/nmdc:mga05zvf81/MAGs/nmdc_mga05zvf81_bins.unbinned.fa", - "md5_checksum": "83064ec7bfc35a79a1ca76fdd8ad75fd", - "id": "nmdc:83064ec7bfc35a79a1ca76fdd8ad75fd", - "file_size_bytes": 31883888 - }, - { - "name": "Gp0127633_metabat2 bin checkm quality assessment result", - "description": "metabat2 bin checkm quality assessment result for Gp0127633", - "data_object_type": "CheckM Statistics", - "url": "https://data.microbiomedata.org/data/nmdc:mga05zvf81/MAGs/nmdc_mga05zvf81_checkm_qa.out", - "md5_checksum": "3f435d6da551400a4ba4400fa3608e7f", - "id": "nmdc:3f435d6da551400a4ba4400fa3608e7f", - "file_size_bytes": 1590 - }, - { - "name": "Gp0127633_high-quality and medium-quality bins", - "description": "high-quality and medium-quality bins for Gp0127633", - "data_object_type": "Metagenome Bins", - "url": "https://data.microbiomedata.org/data/nmdc:mga05zvf81/MAGs/nmdc_mga05zvf81_hqmq_bin.zip", - "md5_checksum": "c66f93153962f8b80c8f3d6978b6d802", - "id": "nmdc:c66f93153962f8b80c8f3d6978b6d802", - "file_size_bytes": 460412 - }, - { - "name": "Gp0127633_metabat2 bins", - "description": "metabat2 bins for Gp0127633", - "url": "https://data.microbiomedata.org/data/nmdc:mga05zvf81/MAGs/nmdc_mga05zvf81_metabat_bin.zip", - "md5_checksum": "ce2a364ec51a1d6311a319509751266e", - "id": "nmdc:ce2a364ec51a1d6311a319509751266e", - "file_size_bytes": 753147 - }, - { - "_id": { - "$oid": "649b003d1ae706d7b5b14e6e" - }, - "description": "Metagenome Contig Coverage Stats for gold:Gp0127633", - "url": "https://data.microbiomedata.org/data/1781_100335/assembly/mapping_stats.txt", - "file_size_bytes": 20586724, - "type": "nmdc:DataObject", - "id": "nmdc:262d79d1a7606b75f88468b3b9f80b59", - "name": "mapping_stats.txt", - "data_object_type": "Assembly Coverage Stats" - }, - { - "_id": { - "$oid": "649b003d1ae706d7b5b14e6f" - }, - "description": "Assembled contigs fasta for gold:Gp0127633", - "url": "https://data.microbiomedata.org/data/1781_100335/assembly/assembly_contigs.fna", - "file_size_bytes": 151723098, - "type": "nmdc:DataObject", - "id": "nmdc:e04c866a9a015bec110f1235db7223dc", - "name": "assembly_contigs.fna", - "data_object_type": "Assembly Contigs" - }, - { - "_id": { - "$oid": "649b003d1ae706d7b5b14e71" - }, - "description": "Assembled scaffold fasta for gold:Gp0127633", - "url": "https://data.microbiomedata.org/data/1781_100335/assembly/assembly_scaffolds.fna", - "file_size_bytes": 150902924, - "type": "nmdc:DataObject", - "id": "nmdc:bfdd3614128940d958264690470bce14", - "name": "assembly_scaffolds.fna", - "data_object_type": "Assembly Scaffolds" - }, - { - "_id": { - "$oid": "649b003d1ae706d7b5b14e72" - }, - "description": "Metagenome Alignment BAM file for gold:Gp0127633", - "url": "https://data.microbiomedata.org/data/1781_100335/assembly/pairedMapped_sorted.bam", - "file_size_bytes": 1932857393, - "type": "nmdc:DataObject", - "id": "nmdc:49bbdbc432b3c36c0c9196c53f4b952d", - "name": "pairedMapped_sorted.bam", - "data_object_type": "Assembly Coverage BAM" - }, - { - "_id": { - "$oid": "649b003d1ae706d7b5b14e73" - }, - "description": "Assembled AGP file for gold:Gp0127633", - "url": "https://data.microbiomedata.org/data/1781_100335/assembly/assembly.agp", - "file_size_bytes": 18119007, - "type": "nmdc:DataObject", - "id": "nmdc:8d617079209f2f0a15a4752fc68f5e81", - "name": "assembly.agp", - "data_object_type": "Assembly AGP" - }, - { - "_id": { - "$oid": "649b003d1ae706d7b5b15b32" - }, - "id": "nmdc:6496a165a51c3500ed2439270887c660", - "name": "1781_100335.krona.html", - "description": "Gold:Gp0127633 KRONA plot HTML file", - "url": "https://data.microbiomedata.org/1781_100335/ReadbasedAnalysis/centrifuge/1781_100335.krona.html", - "file_size_bytes": 3385, - "data_object_type": "Centrifuge Krona Plot", - "type": "nmdc:DataObject" - }, - { - "_id": { - "$oid": "649b003d1ae706d7b5b15b3c" - }, - "id": "nmdc:432510ad975787c5c15f94f45f1226c4", - "name": "1781_100335.json", - "description": "Gold:Gp0127633 ReadbasedAnalysis result JSON file", - "url": "https://data.microbiomedata.org/1781_100335/ReadbasedAnalysis/1781_100335.json", - "file_size_bytes": 3385, - "type": "nmdc:DataObject" - }, - { - "_id": { - "$oid": "649b003f1ae706d7b5b165df" - }, - "id": "nmdc:f4e28c4b7ce3ff07a6a824da9006df87", - "name": "bins.unbinned.fa", - "description": "unbinned fasta file from metabat2 for gold:Gp0127633", - "file_size_bytes": 34864009, - "url": "https://data.microbiomedata.org/data/1781_100335/img_MAGs/bins.unbinned.fa", - "type": "nmdc:DataObject" - }, - { - "_id": { - "$oid": "649b003f1ae706d7b5b165e1" - }, - "id": "nmdc:4f3514bc849b503f135d5652ae7d867d", - "name": "bins.tooShort.fa", - "description": "tooShort (< 3kb) filtered contigs fasta file by metaBat2 for gold:Gp0127633", - "file_size_bytes": 113729207, - "url": "https://data.microbiomedata.org/data/1781_100335/img_MAGs/bins.tooShort.fa", - "type": "nmdc:DataObject" - }, - { - "_id": { - "$oid": "649b003f1ae706d7b5b165e3" - }, - "id": "nmdc:07a65c967ab1996f34d016aedd3b0451", - "name": "checkm_qa.out", - "description": "metabat2 bin checkm quality assessment result for gold:Gp0127633", - "file_size_bytes": 1092, - "url": "https://data.microbiomedata.org/data/1781_100335/img_MAGs/checkm_qa.out", - "type": "nmdc:DataObject", - "data_object_type": "CheckM Statistics" - }, - { - "_id": { - "$oid": "649b003f1ae706d7b5b165e4" - }, - "id": "nmdc:50b7d0804958ce8d87de9374cc46af89", - "name": "gold:Gp0127633.bins.3.fa", - "description": "metabat2 binned contig file for gold:Gp0127633", - "file_size_bytes": 286733, - "url": "https://data.microbiomedata.org/data/1781_100335/img_MAGs/metabat-bins/bins.3.fa", - "type": "nmdc:DataObject", - "data_object_type": "Metagenome Bins" - }, - { - "_id": { - "$oid": "649b003f1ae706d7b5b165e6" - }, - "id": "nmdc:ca5d7e1b38c31fa6fe1af6931632d74e", - "name": "gold:Gp0127633.bins.2.fa", - "description": "metabat2 binned contig file for gold:Gp0127633", - "file_size_bytes": 269570, - "url": "https://data.microbiomedata.org/data/1781_100335/img_MAGs/metabat-bins/bins.2.fa", - "type": "nmdc:DataObject", - "data_object_type": "Metagenome Bins" - }, - { - "_id": { - "$oid": "649b003f1ae706d7b5b165e8" - }, - "id": "nmdc:cd5403a50c10d21375a7449c9a81d214", - "name": "gold:Gp0127633.bins.1.fa", - "description": "metabat2 binned contig file for gold:Gp0127633", - "file_size_bytes": 229207, - "url": "https://data.microbiomedata.org/data/1781_100335/img_MAGs/metabat-bins/bins.1.fa", - "type": "nmdc:DataObject", - "data_object_type": "Metagenome Bins" - }, - { - "_id": { - "$oid": "649b00401ae706d7b5b16ced" - }, - "description": "KO TSV File for gold:Gp0127633", - "url": "https://data.microbiomedata.org/1781_100335/img_annotation/Ga0482237_ko.tsv", - "md5_checksum": "9ef3a52b2d97cc4afb64e37d04e59865", - "file_size_bytes": 3385, - "id": "nmdc:9ef3a52b2d97cc4afb64e37d04e59865", - "name": "gold:Gp0127633_KO TSV File", - "data_object_type": "Annotation KEGG Orthology", - "type": "nmdc:DataObject" - }, - { - "_id": { - "$oid": "649b00401ae706d7b5b16cf3" - }, - "description": "EC TSV File for gold:Gp0127633", - "url": "https://data.microbiomedata.org/1781_100335/img_annotation/Ga0482237_ec.tsv", - "md5_checksum": "31e2f5b7b055f2959d50a990ebda7ff6", - "file_size_bytes": 3385, - "id": "nmdc:31e2f5b7b055f2959d50a990ebda7ff6", - "name": "gold:Gp0127633_EC TSV File", - "data_object_type": "Annotation Enzyme Commission", - "type": "nmdc:DataObject" - }, - { - "_id": { - "$oid": "649b00401ae706d7b5b16cf8" - }, - "description": "Structural annotation GFF file for gold:Gp0127633", - "url": "https://data.microbiomedata.org/1781_100335/img_annotation/Ga0482237_structural_annotation.gff", - "md5_checksum": "b18381667b4e7401e1bb58e8aede5d4a", - "file_size_bytes": 3385, - "id": "nmdc:b18381667b4e7401e1bb58e8aede5d4a", - "name": "gold:Gp0127633_Structural annotation GFF file", - "data_object_type": "Structural Annotation GFF", - "type": "nmdc:DataObject" - }, - { - "_id": { - "$oid": "649b00401ae706d7b5b16cfa" - }, - "description": "Functional annotation GFF file for gold:Gp0127633", - "url": "https://data.microbiomedata.org/1781_100335/img_annotation/Ga0482237_functional_annotation.gff", - "md5_checksum": "740240c975daffee3e63251fc86cfd33", - "file_size_bytes": 3385, - "id": "nmdc:740240c975daffee3e63251fc86cfd33", - "name": "gold:Gp0127633_Functional annotation GFF file", - "data_object_type": "Functional Annotation GFF", - "type": "nmdc:DataObject" - }, - { - "_id": { - "$oid": "649b00401ae706d7b5b16cfd" - }, - "description": "Protein FAA for gold:Gp0127633", - "url": "https://data.microbiomedata.org/1781_100335/img_annotation/Ga0482237_proteins.faa", - "md5_checksum": "79fd564d59bf9fe4cfb2c771daa84f29", - "file_size_bytes": 3385, - "id": "nmdc:79fd564d59bf9fe4cfb2c771daa84f29", - "name": "gold:Gp0127633_Protein FAA", - "data_object_type": "Annotation Amino Acid FASTA", - "type": "nmdc:DataObject" - } - ], - "dissolving_activity_set": [], - "functional_annotation_set": [], - "genome_feature_set": [], - "mags_activity_set": [ - { - "_id": { - "$oid": "649b0052ec087f6bbab3470e" - }, - "has_input": [ - "nmdc:ea5ca9478871b3e2600e1df0d748cbef", - "nmdc:bc054294600fa310924f104484effd3e", - "nmdc:43069b1146c84c064b7ff334dc9ff100" - ], - "too_short_contig_num": 252383, - "part_of": [ - "nmdc:mga05zvf81" - ], - "binned_contig_num": 738, - "git_url": "https://github.com/microbiomedata/metaG/releases/tag/0.1", - "has_output": [ - "nmdc:d41d8cd98f00b204e9800998ecf8427e", - "nmdc:00415cf72f9a77f907e3467a08b123c5", - "nmdc:83064ec7bfc35a79a1ca76fdd8ad75fd", - "nmdc:3f435d6da551400a4ba4400fa3608e7f", - "nmdc:c66f93153962f8b80c8f3d6978b6d802", - "nmdc:ce2a364ec51a1d6311a319509751266e" - ], - "was_informed_by": "gold:Gp0127633", - "input_contig_num": 272872, - "id": "nmdc:c88a6f4e80cd6159dbbdeaeffbc28f55", - "execution_resource": "NERSC-Cori", - "name": "MAGs Analysis Activity for nmdc:mga05zvf81", - "mags_list": [ - { - "number_of_contig": 83, - "completeness": 0.0, - "bin_name": "bins.1", - "gene_count": 600, - "bin_quality": "LQ", - "gtdbtk_species": "", - "gtdbtk_order": "", - "num_16s": 0, - "gtdbtk_family": "", - "gtdbtk_domain": "", - "contamination": 0.0, - "gtdbtk_class": "", - "gtdbtk_phylum": "", - "num_5s": 0, - "num_23s": 0, - "gtdbtk_genus": "", - "num_t_rna": 5 - }, - { - "number_of_contig": 142, - "completeness": 43.03, - "bin_name": "bins.2", - "gene_count": 746, - "bin_quality": "LQ", - "gtdbtk_species": "", - "gtdbtk_order": "", - "num_16s": 0, - "gtdbtk_family": "", - "gtdbtk_domain": "", - "contamination": 1.72, - "gtdbtk_class": "", - "gtdbtk_phylum": "", - "num_5s": 0, - "num_23s": 0, - "gtdbtk_genus": "", - "num_t_rna": 6 - }, - { - "number_of_contig": 194, - "completeness": 73.62, - "bin_name": "bins.3", - "gene_count": 1844, - "bin_quality": "MQ", - "gtdbtk_species": "", - "gtdbtk_order": "Nitrososphaerales", - "num_16s": 0, - "gtdbtk_family": "Nitrososphaeraceae", - "gtdbtk_domain": "Archaea", - "contamination": 2.43, - "gtdbtk_class": "Nitrososphaeria", - "gtdbtk_phylum": "Crenarchaeota", - "num_5s": 1, - "num_23s": 0, - "gtdbtk_genus": "", - "num_t_rna": 31 - }, - { - "number_of_contig": 91, - "completeness": 10.82, - "bin_name": "bins.4", - "gene_count": 442, - "bin_quality": "LQ", - "gtdbtk_species": "", - "gtdbtk_order": "", - "num_16s": 0, - "gtdbtk_family": "", - "gtdbtk_domain": "", - "contamination": 0.0, - "gtdbtk_class": "", - "gtdbtk_phylum": "", - "num_5s": 0, - "num_23s": 0, - "gtdbtk_genus": "", - "num_t_rna": 2 - }, - { - "number_of_contig": 82, - "completeness": 10.97, - "bin_name": "bins.5", - "gene_count": 385, - "bin_quality": "LQ", - "gtdbtk_species": "", - "gtdbtk_order": "", - "num_16s": 1, - "gtdbtk_family": "", - "gtdbtk_domain": "", - "contamination": 0.0, - "gtdbtk_class": "", - "gtdbtk_phylum": "", - "num_5s": 0, - "num_23s": 0, - "gtdbtk_genus": "", - "num_t_rna": 3 - }, - { - "number_of_contig": 146, - "completeness": 31.6, - "bin_name": "bins.6", - "gene_count": 800, - "bin_quality": "LQ", - "gtdbtk_species": "", - "gtdbtk_order": "", - "num_16s": 1, - "gtdbtk_family": "", - "gtdbtk_domain": "", - "contamination": 1.6, - "gtdbtk_class": "", - "gtdbtk_phylum": "", - "num_5s": 0, - "num_23s": 1, - "gtdbtk_genus": "", - "num_t_rna": 20 - } - ], - "unbinned_contig_num": 19751, - "started_at_time": "2021-10-11T02:24:58Z", - "type": "nmdc:MAGsAnalysisActivity", - "ended_at_time": "2021-10-11T03:40:06+00:00" - } - ], - "material_sample_set": [], - "material_sampling_activity_set": [], - "metabolomics_analysis_activity_set": [], - "metagenome_annotation_activity_set": [ - { - "_id": { - "$oid": "649b005bbf2caae0415ef9b0" - }, - "has_input": [ - "nmdc:ea5ca9478871b3e2600e1df0d748cbef" - ], - "part_of": [ - "nmdc:mga05zvf81" - ], - "git_url": "https://github.com/microbiomedata/metaG/releases/tag/0.1", - "has_output": [ - "nmdc:8defcf55f08cd56d8b2560e27f490ca5", - "nmdc:a6031c0a101419dd413a0804937425ca", - "nmdc:43069b1146c84c064b7ff334dc9ff100", - "nmdc:acc5a2c445dc6e00668c9a5d50aecdb8", - "nmdc:ec91d5d7a8af4fb845e22cbe7ab82bde", - "nmdc:3cd238ff1bb176b7a159aeb34a7c4683", - "nmdc:5103ea2a481ea3b82f1aa98ab7a36998", - "nmdc:8f7429420cbefb9e27bcdbe6252e5288", - "nmdc:6d69127dc30609e4861a7b2443b99164", - "nmdc:00243bcaf50313d937a7685380a876bb", - "nmdc:ec6ffd40772dee9d48dbec0beb6b3321", - "nmdc:907439e314b4f4623244e2cec8532098" - ], - "was_informed_by": "gold:Gp0127633", - "id": "nmdc:c88a6f4e80cd6159dbbdeaeffbc28f55", - "execution_resource": "NERSC-Cori", - "name": "Annotation Activity for nmdc:mga05zvf81", - "started_at_time": "2021-10-11T02:24:58Z", - "type": "nmdc:MetagenomeAnnotationActivity", - "ended_at_time": "2021-10-11T03:40:06+00:00" - } - ], - "metagenome_assembly_set": [ - { - "_id": { - "$oid": "649b005f2ca5ee4adb139f97" - }, - "has_input": [ - "nmdc:7cbd497624d8b60ab2a5e7fdbe4730f2" - ], - "part_of": [ - "nmdc:mga05zvf81" - ], - "ctg_logsum": 378958, - "scaf_logsum": 380592, - "gap_pct": 0.00189, - "git_url": "https://github.com/microbiomedata/metaG/releases/tag/0.1", - "has_output": [ - "nmdc:ea5ca9478871b3e2600e1df0d748cbef", - "nmdc:327e130872e4c5faac2f1c9f8dea2316", - "nmdc:f61f1e62791a38beae95bd95833a6784", - "nmdc:416254a3bfc685dd16c11d65a222305f", - "nmdc:bc054294600fa310924f104484effd3e" - ], - "asm_score": 4.48, - "was_informed_by": "gold:Gp0127633", - "ctg_powsum": 41464, - "scaf_max": 30530, - "id": "nmdc:c88a6f4e80cd6159dbbdeaeffbc28f55", - "scaf_powsum": 41655, - "execution_resource": "NERSC-Cori", - "contigs": 272879, - "name": "Assembly Activity for nmdc:mga05zvf81", - "ctg_max": 30530, - "gc_std": 0.08353, - "contig_bp": 141974737, - "gc_avg": 0.63381, - "started_at_time": "2021-10-11T02:24:58Z", - "scaf_bp": 141977427, - "type": "nmdc:MetagenomeAssembly", - "scaffolds": 272628, - "ended_at_time": "2021-10-11T03:40:06+00:00", - "ctg_l50": 526, - "ctg_l90": 298, - "ctg_n50": 72824, - "ctg_n90": 224178, - "scaf_l50": 527, - "scaf_l90": 298, - "scaf_n50": 72571, - "scaf_n90": 223970 - } - ], - "metagenome_sequencing_activity_set": [], - "metaproteomics_analysis_activity_set": [], - "metatranscriptome_activity_set": [], - "nom_analysis_activity_set": [], - "omics_processing_set": [ - { - "_id": { - "$oid": "649b009773e8249959349b42" - }, - "id": "nmdc:omprc-11-0g415160", - "name": "Riverbed sediment microbial communities from areas with vegetation nearby in Columbia River, Washington, USA - GW-RW S1_30_40", - "description": "Riverbed sediment samples were collected from an area with a lot of surrounding vegetation in a hyporheic zone (subsurface groundwater - surface water mixing zone)", - "has_input": [ - "nmdc:bsm-11-nbgp1x53" - ], - "has_output": [ - "jgi:574fde5e7ded5e3df1ee1401" - ], - "part_of": [ - "nmdc:sty-11-aygzgv51" - ], - "add_date": "2016-01-11", - "mod_date": "2021-06-15", - "ncbi_project_name": "Riverbed sediment microbial communities from areas with vegetation nearby in Columbia River, Washington, USA - GW-RW S1_30_40", - "omics_type": { - "has_raw_value": "Metagenome" - }, - "principal_investigator": { - "has_raw_value": "James Stegen" - }, - "processing_institution": "JGI", - "type": "nmdc:OmicsProcessing", - "gold_sequencing_project_identifiers": [ - "gold:Gp0127633" - ] - } - ], - "reaction_activity_set": [], - "read_qc_analysis_activity_set": [ - { - "_id": { - "$oid": "649b009d6bdd4fd20273c867" - }, - "has_input": [ - "nmdc:c0b8d6516c48cfe5a0b110abe67ee983" - ], - "part_of": [ - "nmdc:mga05zvf81" - ], - "git_url": "https://github.com/microbiomedata/metaG/releases/tag/0.1", - "has_output": [ - "nmdc:7cbd497624d8b60ab2a5e7fdbe4730f2", - "nmdc:eccf0501d08f920a88b6598d573a8e3e" - ], - "was_informed_by": "gold:Gp0127633", - "input_read_count": 23291434, - "output_read_bases": 3367024367, - "id": "nmdc:c88a6f4e80cd6159dbbdeaeffbc28f55", - "execution_resource": "NERSC-Cori", - "input_read_bases": 3517006534, - "name": "Read QC Activity for nmdc:mga05zvf81", - "output_read_count": 22556158, - "started_at_time": "2021-10-11T02:24:58Z", - "type": "nmdc:ReadQCAnalysisActivity", - "ended_at_time": "2021-10-11T03:40:06+00:00" - } - ], - "read_based_taxonomy_analysis_activity_set": [ - { - "_id": { - "$oid": "649b009bff710ae353f8cf29" - }, - "has_input": [ - "nmdc:7cbd497624d8b60ab2a5e7fdbe4730f2" - ], - "git_url": "https://github.com/microbiomedata/metaG/releases/tag/0.1", - "has_output": [ - "nmdc:8bd9eb762acabbac5d079c379c28e381", - "nmdc:77351dd18ca40e5552ac1380ba94acbf", - "nmdc:f445af1a7774572d156f55a898d26f09", - "nmdc:e11fcbf66318878c05984fa3d893e3b7", - "nmdc:28beb8baabdaf346f2066b40f375a152", - "nmdc:1f74a43724c4afed5563499d05601e22", - "nmdc:4825177c6d0a8b67db82e6070cfbc35f", - "nmdc:275268a6b5aca33c427d11877bcfa674", - "nmdc:89e810af4915f0e117eaa60550587453" - ], - "was_informed_by": "gold:Gp0127633", - "id": "nmdc:c88a6f4e80cd6159dbbdeaeffbc28f55", - "execution_resource": "NERSC-Cori", - "name": "ReadBased Analysis Activity for nmdc:mga05zvf81", - "started_at_time": "2021-10-11T02:24:58Z", - "type": "nmdc:ReadbasedAnalysis", - "ended_at_time": "2021-10-11T03:40:06+00:00" - } - ], - "study_set": [], - "field_research_site_set": [], - "collecting_biosamples_from_site_set": [], - "date_created": null, - "etl_software_version": null, - "pooling_set": [] - }, - { - "functional_annotation_agg": [], - "library_preparation_set": [], - "processed_sample_set": [], - "extraction_set": [], - "activity_set": [], - "biosample_set": [], - "data_object_set": [ - { - "description": "Raw sequencer read data", - "file_size_bytes": 1875083220, - "type": "nmdc:DataObject", - "id": "jgi:574fe0967ded5e3df1ee1482", - "name": "10533.3.165334.GCCTTGT-AACAAGG.fastq.gz" - }, - { - "name": "Gp0127627_Filtered Reads", - "description": "Filtered Reads for Gp0127627", - "data_object_type": "Filtered Sequencing Reads", - "url": "https://data.microbiomedata.org/data/nmdc:mga0daby71/qa/nmdc_mga0daby71_filtered.fastq.gz", - "md5_checksum": "ed95796b3fd964c6bedb141d70737ebf", - "id": "nmdc:ed95796b3fd964c6bedb141d70737ebf", - "file_size_bytes": 1752924191 - }, - { - "name": "Gp0127627_Filtered Stats", - "description": "Filtered Stats for Gp0127627", - "data_object_type": "QC Statistics", - "url": "https://data.microbiomedata.org/data/nmdc:mga0daby71/qa/nmdc_mga0daby71_filterStats.txt", - "md5_checksum": "308ae373809697291bbc7947a1e4ed2d", - "id": "nmdc:308ae373809697291bbc7947a1e4ed2d", - "file_size_bytes": 281 - }, - { - "name": "Gp0127627_Gottcha2 TSV report", - "description": "Gottcha2 TSV report for Gp0127627", - "url": "https://data.microbiomedata.org/data/nmdc:mga0daby71/ReadbasedAnalysis/nmdc_mga0daby71_gottcha2_report.tsv", - "md5_checksum": "a5ac6665e5d66242b1c885a911236982", - "id": "nmdc:a5ac6665e5d66242b1c885a911236982", - "file_size_bytes": 5530 - }, - { - "name": "Gp0127627_Gottcha2 full TSV report", - "description": "Gottcha2 full TSV report for Gp0127627", - "url": "https://data.microbiomedata.org/data/nmdc:mga0daby71/ReadbasedAnalysis/nmdc_mga0daby71_gottcha2_report_full.tsv", - "md5_checksum": "d19478a191693d643157a89c69cc02d1", - "id": "nmdc:d19478a191693d643157a89c69cc02d1", - "file_size_bytes": 825047 - }, - { - "name": "Gp0127627_Gottcha2 Krona HTML report", - "description": "Gottcha2 Krona HTML report for Gp0127627", - "data_object_type": "GOTTCHA2 Krona Plot", - "url": "https://data.microbiomedata.org/data/nmdc:mga0daby71/ReadbasedAnalysis/nmdc_mga0daby71_gottcha2_krona.html", - "md5_checksum": "679a82699663e88a5e8828ee081fa967", - "id": "nmdc:679a82699663e88a5e8828ee081fa967", - "file_size_bytes": 241114 - }, - { - "name": "Gp0127627_Centrifuge classification TSV report", - "description": "Centrifuge classification TSV report for Gp0127627", - "data_object_type": "Centrifuge Taxonomic Classification", - "url": "https://data.microbiomedata.org/data/nmdc:mga0daby71/ReadbasedAnalysis/nmdc_mga0daby71_centrifuge_classification.tsv", - "md5_checksum": "95b3150e6fb62195c1e5ebf06f87c7d5", - "id": "nmdc:95b3150e6fb62195c1e5ebf06f87c7d5", - "file_size_bytes": 1463660267 - }, - { - "name": "Gp0127627_Centrifuge TSV report", - "description": "Centrifuge TSV report for Gp0127627", - "data_object_type": "Centrifuge Classification Report", - "url": "https://data.microbiomedata.org/data/nmdc:mga0daby71/ReadbasedAnalysis/nmdc_mga0daby71_centrifuge_report.tsv", - "md5_checksum": "0380e478962be82e0d97a6339f7f3b91", - "id": "nmdc:0380e478962be82e0d97a6339f7f3b91", - "file_size_bytes": 254347 - }, - { - "name": "Gp0127627_Centrifuge Krona HTML report", - "description": "Centrifuge Krona HTML report for Gp0127627", - "data_object_type": "Centrifuge Krona Plot", - "url": "https://data.microbiomedata.org/data/nmdc:mga0daby71/ReadbasedAnalysis/nmdc_mga0daby71_centrifuge_krona.html", - "md5_checksum": "0c1d139abdfa9fa10f26923abb4d6bda", - "id": "nmdc:0c1d139abdfa9fa10f26923abb4d6bda", - "file_size_bytes": 2330603 - }, - { - "name": "Gp0127627_Kraken classification TSV report", - "description": "Kraken classification TSV report for Gp0127627", - "data_object_type": "Kraken2 Taxonomic Classification", - "url": "https://data.microbiomedata.org/data/nmdc:mga0daby71/ReadbasedAnalysis/nmdc_mga0daby71_kraken2_classification.tsv", - "md5_checksum": "f388f7f0d79d0b2bbec1c3c0c5641814", - "id": "nmdc:f388f7f0d79d0b2bbec1c3c0c5641814", - "file_size_bytes": 1177609473 - }, - { - "name": "Gp0127627_Kraken2 TSV report", - "description": "Kraken2 TSV report for Gp0127627", - "data_object_type": "Kraken2 Classification Report", - "url": "https://data.microbiomedata.org/data/nmdc:mga0daby71/ReadbasedAnalysis/nmdc_mga0daby71_kraken2_report.tsv", - "md5_checksum": "a2a0029691c04851f4a98003a773fe3f", - "id": "nmdc:a2a0029691c04851f4a98003a773fe3f", - "file_size_bytes": 643281 - }, - { - "name": "Gp0127627_Kraken2 Krona HTML report", - "description": "Kraken2 Krona HTML report for Gp0127627", - "data_object_type": "Kraken2 Krona Plot", - "url": "https://data.microbiomedata.org/data/nmdc:mga0daby71/ReadbasedAnalysis/nmdc_mga0daby71_kraken2_krona.html", - "md5_checksum": "bab24ab64ad432d115f182df7198d46e", - "id": "nmdc:bab24ab64ad432d115f182df7198d46e", - "file_size_bytes": 3926756 - }, - { - "name": "Gp0127627_Assembled contigs fasta", - "description": "Assembled contigs fasta for Gp0127627", - "data_object_type": "Assembly Contigs", - "url": "https://data.microbiomedata.org/data/nmdc:mga0daby71/assembly/nmdc_mga0daby71_contigs.fna", - "md5_checksum": "a7db57faea894bec6603a69abfdfcf7d", - "id": "nmdc:a7db57faea894bec6603a69abfdfcf7d", - "file_size_bytes": 19853676 - }, - { - "name": "Gp0127627_Assembled scaffold fasta", - "description": "Assembled scaffold fasta for Gp0127627", - "data_object_type": "Assembly Scaffolds", - "url": "https://data.microbiomedata.org/data/nmdc:mga0daby71/assembly/nmdc_mga0daby71_scaffolds.fna", - "md5_checksum": "8e798fcdd761feff51cab6a9c97ed7ae", - "id": "nmdc:8e798fcdd761feff51cab6a9c97ed7ae", - "file_size_bytes": 19699986 - }, - { - "name": "Gp0127627_Metagenome Contig Coverage Stats", - "description": "Metagenome Contig Coverage Stats for Gp0127627", - "url": "https://data.microbiomedata.org/data/nmdc:mga0daby71/assembly/nmdc_mga0daby71_covstats.txt", - "md5_checksum": "0d3200307a90e23525d3fefa7a25f867", - "id": "nmdc:0d3200307a90e23525d3fefa7a25f867", - "file_size_bytes": 3997845 - }, - { - "name": "Gp0127627_Assembled AGP file", - "description": "Assembled AGP file for Gp0127627", - "data_object_type": "Assembly AGP", - "url": "https://data.microbiomedata.org/data/nmdc:mga0daby71/assembly/nmdc_mga0daby71_assembly.agp", - "md5_checksum": "e6e7f40bb1f1e333904f20dc3c317e37", - "id": "nmdc:e6e7f40bb1f1e333904f20dc3c317e37", - "file_size_bytes": 3715901 - }, - { - "name": "Gp0127627_Metagenome Alignment BAM file", - "description": "Metagenome Alignment BAM file for Gp0127627", - "data_object_type": "Assembly Coverage BAM", - "url": "https://data.microbiomedata.org/data/nmdc:mga0daby71/assembly/nmdc_mga0daby71_pairedMapped_sorted.bam", - "md5_checksum": "08f1ba3d3d380a167182c1beb7da304f", - "id": "nmdc:08f1ba3d3d380a167182c1beb7da304f", - "file_size_bytes": 1854522814 - }, - { - "name": "Gp0127627_Protein FAA", - "description": "Protein FAA for Gp0127627", - "data_object_type": "Annotation Amino Acid FASTA", - "url": "https://data.microbiomedata.org/data/nmdc:mga0daby71/annotation/nmdc_mga0daby71_proteins.faa", - "md5_checksum": "6003e73aa18ac6aa3cc0f7e020c7170e", - "id": "nmdc:6003e73aa18ac6aa3cc0f7e020c7170e", - "file_size_bytes": 12141650 - }, - { - "name": "Gp0127627_Structural annotation GFF file", - "description": "Structural annotation GFF file for Gp0127627", - "data_object_type": "Structural Annotation GFF", - "url": "https://data.microbiomedata.org/data/nmdc:mga0daby71/annotation/nmdc_mga0daby71_structural_annotation.gff", - "md5_checksum": "7e5852b8ca5590f81c543ea69398410f", - "id": "nmdc:7e5852b8ca5590f81c543ea69398410f", - "file_size_bytes": 8716031 - }, - { - "name": "Gp0127627_Functional annotation GFF file", - "description": "Functional annotation GFF file for Gp0127627", - "data_object_type": "Functional Annotation GFF", - "url": "https://data.microbiomedata.org/data/nmdc:mga0daby71/annotation/nmdc_mga0daby71_functional_annotation.gff", - "md5_checksum": "cf868630ca2d9037e69e82cfb76a7bd7", - "id": "nmdc:cf868630ca2d9037e69e82cfb76a7bd7", - "file_size_bytes": 14995284 - }, - { - "name": "Gp0127627_KO TSV file", - "description": "KO TSV file for Gp0127627", - "data_object_type": "Annotation KEGG Orthology", - "url": "https://data.microbiomedata.org/data/nmdc:mga0daby71/annotation/nmdc_mga0daby71_ko.tsv", - "md5_checksum": "9e52b5a16f0eff5df36bd46038702a52", - "id": "nmdc:9e52b5a16f0eff5df36bd46038702a52", - "file_size_bytes": 1782540 - }, - { - "name": "Gp0127627_EC TSV file", - "description": "EC TSV file for Gp0127627", - "data_object_type": "Annotation Enzyme Commission", - "url": "https://data.microbiomedata.org/data/nmdc:mga0daby71/annotation/nmdc_mga0daby71_ec.tsv", - "md5_checksum": "c44dceb1684f1a4249e7b8e944a2b7cf", - "id": "nmdc:c44dceb1684f1a4249e7b8e944a2b7cf", - "file_size_bytes": 1180943 - }, - { - "name": "Gp0127627_COG GFF file", - "description": "COG GFF file for Gp0127627", - "url": "https://data.microbiomedata.org/data/nmdc:mga0daby71/annotation/nmdc_mga0daby71_cog.gff", - "md5_checksum": "4a788566d47b89e8bc79eea6e26f2c42", - "id": "nmdc:4a788566d47b89e8bc79eea6e26f2c42", - "file_size_bytes": 8144598 - }, - { - "name": "Gp0127627_PFAM GFF file", - "description": "PFAM GFF file for Gp0127627", - "url": "https://data.microbiomedata.org/data/nmdc:mga0daby71/annotation/nmdc_mga0daby71_pfam.gff", - "md5_checksum": "3d01f11a480f59cefdc67e7b6c7f9fc6", - "id": "nmdc:3d01f11a480f59cefdc67e7b6c7f9fc6", - "file_size_bytes": 5854816 - }, - { - "name": "Gp0127627_TigrFam GFF file", - "description": "TigrFam GFF file for Gp0127627", - "url": "https://data.microbiomedata.org/data/nmdc:mga0daby71/annotation/nmdc_mga0daby71_tigrfam.gff", - "md5_checksum": "196a8e27999a32a6168d23f30d84f37b", - "id": "nmdc:196a8e27999a32a6168d23f30d84f37b", - "file_size_bytes": 549612 - }, - { - "name": "Gp0127627_SMART GFF file", - "description": "SMART GFF file for Gp0127627", - "url": "https://data.microbiomedata.org/data/nmdc:mga0daby71/annotation/nmdc_mga0daby71_smart.gff", - "md5_checksum": "c3040fe67c2c8b2924c6db6c53b268ce", - "id": "nmdc:c3040fe67c2c8b2924c6db6c53b268ce", - "file_size_bytes": 1739035 - }, - { - "name": "Gp0127627_SuperFam GFF file", - "description": "SuperFam GFF file for Gp0127627", - "url": "https://data.microbiomedata.org/data/nmdc:mga0daby71/annotation/nmdc_mga0daby71_supfam.gff", - "md5_checksum": "5594ce118ad4b2f9ec03adc10ebb6267", - "id": "nmdc:5594ce118ad4b2f9ec03adc10ebb6267", - "file_size_bytes": 10326655 - }, - { - "name": "Gp0127627_Cath FunFam GFF file", - "description": "Cath FunFam GFF file for Gp0127627", - "url": "https://data.microbiomedata.org/data/nmdc:mga0daby71/annotation/nmdc_mga0daby71_cath_funfam.gff", - "md5_checksum": "9d2ac6550f5a1dc3d4b3743e8fe2ceec", - "id": "nmdc:9d2ac6550f5a1dc3d4b3743e8fe2ceec", - "file_size_bytes": 7571959 - }, - { - "name": "Gp0127627_KO_EC GFF file", - "description": "KO_EC GFF file for Gp0127627", - "url": "https://data.microbiomedata.org/data/nmdc:mga0daby71/annotation/nmdc_mga0daby71_ko_ec.gff", - "md5_checksum": "2c73a261047ff94b898c190418373075", - "id": "nmdc:2c73a261047ff94b898c190418373075", - "file_size_bytes": 5683569 - }, - { - "name": "gold:Gp0452679_metabat2 bin checkm quality assessment result", - "description": "metabat2 bin checkm quality assessment result for gold:Gp0452679", - "data_object_type": "CheckM Statistics", - "url": "https://data.microbiomedata.org/data/nmdc:mga0fsjy84/MAGs/nmdc_mga0fsjy84_checkm_qa.out", - "md5_checksum": "d41d8cd98f00b204e9800998ecf8427e", - "id": "nmdc:d41d8cd98f00b204e9800998ecf8427e", - "file_size_bytes": 0 - }, - { - "name": "Gp0127627_high-quality and medium-quality bins", - "description": "high-quality and medium-quality bins for Gp0127627", - "data_object_type": "Metagenome Bins", - "url": "https://data.microbiomedata.org/data/nmdc:mga0daby71/MAGs/nmdc_mga0daby71_hqmq_bin.zip", - "md5_checksum": "ba468a2c4f4810d87ba95ad9e123483d", - "id": "nmdc:ba468a2c4f4810d87ba95ad9e123483d", - "file_size_bytes": 182 - }, - { - "_id": { - "$oid": "649b003d1ae706d7b5b14e58" - }, - "description": "Assembled contigs fasta for gold:Gp0127627", - "url": "https://data.microbiomedata.org/data/1781_100329/assembly/assembly_contigs.fna", - "file_size_bytes": 19648924, - "type": "nmdc:DataObject", - "id": "nmdc:245e4bf7ae2d630d26223054f851e31c", - "name": "assembly_contigs.fna", - "data_object_type": "Assembly Contigs" - }, - { - "_id": { - "$oid": "649b003d1ae706d7b5b14e59" - }, - "description": "Metagenome Contig Coverage Stats for gold:Gp0127627", - "url": "https://data.microbiomedata.org/data/1781_100329/assembly/mapping_stats.txt", - "file_size_bytes": 3793093, - "type": "nmdc:DataObject", - "id": "nmdc:53931f648c95c33e09552eb092065622", - "name": "mapping_stats.txt", - "data_object_type": "Assembly Coverage Stats" - }, - { - "_id": { - "$oid": "649b003d1ae706d7b5b14e5a" - }, - "description": "Assembled AGP file for gold:Gp0127627", - "url": "https://data.microbiomedata.org/data/1781_100329/assembly/assembly.agp", - "file_size_bytes": 3306333, - "type": "nmdc:DataObject", - "id": "nmdc:75f11f70792c4e6055068a31d0b8f64b", - "name": "assembly.agp", - "data_object_type": "Assembly AGP" - }, - { - "_id": { - "$oid": "649b003d1ae706d7b5b14e5b" - }, - "description": "Assembled scaffold fasta for gold:Gp0127627", - "url": "https://data.microbiomedata.org/data/1781_100329/assembly/assembly_scaffolds.fna", - "file_size_bytes": 19495266, - "type": "nmdc:DataObject", - "id": "nmdc:06427cb05246b5573ed4b85f93c0f155", - "name": "assembly_scaffolds.fna", - "data_object_type": "Assembly Scaffolds" - }, - { - "_id": { - "$oid": "649b003d1ae706d7b5b14e5f" - }, - "description": "Metagenome Alignment BAM file for gold:Gp0127627", - "url": "https://data.microbiomedata.org/data/1781_100329/assembly/pairedMapped_sorted.bam", - "file_size_bytes": 1829579569, - "type": "nmdc:DataObject", - "id": "nmdc:7d0f0b73c319579aac90fa171f8d77d2", - "name": "pairedMapped_sorted.bam", - "data_object_type": "Assembly Coverage BAM" - }, - { - "_id": { - "$oid": "649b003d1ae706d7b5b15afe" - }, - "id": "nmdc:84280bb9e2ed61950aca03e7a5248bf0", - "name": "1781_100329.krona.html", - "description": "Gold:Gp0127627 KRONA plot HTML file", - "url": "https://data.microbiomedata.org/1781_100329/ReadbasedAnalysis/centrifuge/1781_100329.krona.html", - "file_size_bytes": 3385, - "data_object_type": "Centrifuge Krona Plot", - "type": "nmdc:DataObject" - }, - { - "_id": { - "$oid": "649b003d1ae706d7b5b15b01" - }, - "id": "nmdc:1b842adedef085708050a71c63cbccb3", - "name": "1781_100329.json", - "description": "Gold:Gp0127627 ReadbasedAnalysis result JSON file", - "url": "https://data.microbiomedata.org/1781_100329/ReadbasedAnalysis/1781_100329.json", - "file_size_bytes": 3385, - "type": "nmdc:DataObject" - }, - { - "_id": { - "$oid": "649b003f1ae706d7b5b165d0" - }, - "id": "nmdc:0ab7113a0f5362f23f64b4b7cd7abcb8", - "name": "bins.tooShort.fa", - "description": "tooShort (< 3kb) filtered contigs fasta file by metaBat2 for gold:Gp0127627", - "file_size_bytes": 18589862, - "url": "https://data.microbiomedata.org/data/1781_100329/img_MAGs/bins.tooShort.fa", - "type": "nmdc:DataObject" - }, - { - "_id": { - "$oid": "649b003f1ae706d7b5b165d2" - }, - "id": "nmdc:ec364473e457f915d5fe7fb700c210cd", - "name": "bins.unbinned.fa", - "description": "unbinned fasta file from metabat2 for gold:Gp0127627", - "file_size_bytes": 609489, - "url": "https://data.microbiomedata.org/data/1781_100329/img_MAGs/bins.unbinned.fa", - "type": "nmdc:DataObject" - }, - { - "_id": { - "$oid": "649b003f1ae706d7b5b16cd1" - }, - "description": "EC TSV File for gold:Gp0127627", - "url": "https://data.microbiomedata.org/1781_100329/img_annotation/Ga0482243_ec.tsv", - "md5_checksum": "4c97ec34649fc995f167408bd39c9998", - "file_size_bytes": 3385, - "id": "nmdc:4c97ec34649fc995f167408bd39c9998", - "name": "gold:Gp0127627_EC TSV File", - "data_object_type": "Annotation Enzyme Commission", - "type": "nmdc:DataObject" - }, - { - "_id": { - "$oid": "649b003f1ae706d7b5b16cd3" - }, - "description": "Functional annotation GFF file for gold:Gp0127627", - "url": "https://data.microbiomedata.org/1781_100329/img_annotation/Ga0482243_functional_annotation.gff", - "md5_checksum": "6c96999ab72498624aae8bb9b0bfbc66", - "file_size_bytes": 3385, - "id": "nmdc:6c96999ab72498624aae8bb9b0bfbc66", - "name": "gold:Gp0127627_Functional annotation GFF file", - "data_object_type": "Functional Annotation GFF", - "type": "nmdc:DataObject" - }, - { - "_id": { - "$oid": "649b003f1ae706d7b5b16cd5" - }, - "description": "KO TSV File for gold:Gp0127627", - "url": "https://data.microbiomedata.org/1781_100329/img_annotation/Ga0482243_ko.tsv", - "md5_checksum": "874ae45fc2a007a7d5f9ff964fa8117a", - "file_size_bytes": 3385, - "id": "nmdc:874ae45fc2a007a7d5f9ff964fa8117a", - "name": "gold:Gp0127627_KO TSV File", - "data_object_type": "Annotation KEGG Orthology", - "type": "nmdc:DataObject" - }, - { - "_id": { - "$oid": "649b003f1ae706d7b5b16cd6" - }, - "description": "Structural annotation GFF file for gold:Gp0127627", - "url": "https://data.microbiomedata.org/1781_100329/img_annotation/Ga0482243_structural_annotation.gff", - "md5_checksum": "48ab9737528d088ffde37b733e3f728f", - "file_size_bytes": 3385, - "id": "nmdc:48ab9737528d088ffde37b733e3f728f", - "name": "gold:Gp0127627_Structural annotation GFF file", - "data_object_type": "Structural Annotation GFF", - "type": "nmdc:DataObject" - }, - { - "_id": { - "$oid": "649b003f1ae706d7b5b16cdd" - }, - "description": "Protein FAA for gold:Gp0127627", - "url": "https://data.microbiomedata.org/1781_100329/img_annotation/Ga0482243_proteins.faa", - "md5_checksum": "fec0b3842897bbce9166a628c4c2d7a0", - "file_size_bytes": 3385, - "id": "nmdc:fec0b3842897bbce9166a628c4c2d7a0", - "name": "gold:Gp0127627_Protein FAA", - "data_object_type": "Annotation Amino Acid FASTA", - "type": "nmdc:DataObject" - } - ], - "dissolving_activity_set": [], - "functional_annotation_set": [], - "genome_feature_set": [], - "mags_activity_set": [ - { - "_id": { - "$oid": "649b0052ec087f6bbab3470b" - }, - "has_input": [ - "nmdc:a7db57faea894bec6603a69abfdfcf7d", - "nmdc:08f1ba3d3d380a167182c1beb7da304f", - "nmdc:cf868630ca2d9037e69e82cfb76a7bd7" - ], - "too_short_contig_num": 50792, - "part_of": [ - "nmdc:mga0daby71" - ], - "git_url": "https://github.com/microbiomedata/metaG/releases/tag/0.1", - "has_output": [ - "nmdc:d41d8cd98f00b204e9800998ecf8427e", - "nmdc:ba468a2c4f4810d87ba95ad9e123483d" - ], - "was_informed_by": "gold:Gp0127627", - "input_contig_num": 51188, - "id": "nmdc:ecaab4b51de694b35269756fd7e31ed9", - "execution_resource": "NERSC-Cori", - "name": "MAGs Analysis Activity for nmdc:mga0daby71", - "mags_list": [], - "unbinned_contig_num": 396, - "started_at_time": "2021-11-13T18:47:34Z", - "type": "nmdc:MAGsAnalysisActivity", - "ended_at_time": "2021-11-13T19:08:49+00:00" - } - ], - "material_sample_set": [], - "material_sampling_activity_set": [], - "metabolomics_analysis_activity_set": [], - "metagenome_annotation_activity_set": [ - { - "_id": { - "$oid": "649b005bbf2caae0415ef9ab" - }, - "has_input": [ - "nmdc:a7db57faea894bec6603a69abfdfcf7d" - ], - "part_of": [ - "nmdc:mga0daby71" - ], - "git_url": "https://github.com/microbiomedata/metaG/releases/tag/0.1", - "has_output": [ - "nmdc:6003e73aa18ac6aa3cc0f7e020c7170e", - "nmdc:7e5852b8ca5590f81c543ea69398410f", - "nmdc:cf868630ca2d9037e69e82cfb76a7bd7", - "nmdc:9e52b5a16f0eff5df36bd46038702a52", - "nmdc:c44dceb1684f1a4249e7b8e944a2b7cf", - "nmdc:4a788566d47b89e8bc79eea6e26f2c42", - "nmdc:3d01f11a480f59cefdc67e7b6c7f9fc6", - "nmdc:196a8e27999a32a6168d23f30d84f37b", - "nmdc:c3040fe67c2c8b2924c6db6c53b268ce", - "nmdc:5594ce118ad4b2f9ec03adc10ebb6267", - "nmdc:9d2ac6550f5a1dc3d4b3743e8fe2ceec", - "nmdc:2c73a261047ff94b898c190418373075" - ], - "was_informed_by": "gold:Gp0127627", - "id": "nmdc:ecaab4b51de694b35269756fd7e31ed9", - "execution_resource": "NERSC-Cori", - "name": "Annotation Activity for nmdc:mga0daby71", - "started_at_time": "2021-11-13T18:47:34Z", - "type": "nmdc:MetagenomeAnnotationActivity", - "ended_at_time": "2021-11-13T19:08:49+00:00" - } - ], - "metagenome_assembly_set": [ - { - "_id": { - "$oid": "649b005f2ca5ee4adb139f99" - }, - "has_input": [ - "nmdc:ed95796b3fd964c6bedb141d70737ebf" - ], - "part_of": [ - "nmdc:mga0daby71" - ], - "ctg_logsum": 6346.305, - "scaf_logsum": 6368.36, - "gap_pct": 0.00044, - "git_url": "https://github.com/microbiomedata/metaG/releases/tag/0.1", - "has_output": [ - "nmdc:a7db57faea894bec6603a69abfdfcf7d", - "nmdc:8e798fcdd761feff51cab6a9c97ed7ae", - "nmdc:0d3200307a90e23525d3fefa7a25f867", - "nmdc:e6e7f40bb1f1e333904f20dc3c317e37", - "nmdc:08f1ba3d3d380a167182c1beb7da304f" - ], - "asm_score": 4.807, - "was_informed_by": "gold:Gp0127627", - "ctg_powsum": 681.483, - "scaf_max": 15604, - "id": "nmdc:ecaab4b51de694b35269756fd7e31ed9", - "scaf_powsum": 683.717, - "execution_resource": "NERSC-Cori", - "contigs": 51188, - "name": "Assembly Activity for nmdc:mga0daby71", - "ctg_max": 15604, - "gc_std": 0.11462, - "gc_avg": 0.57328, - "contig_bp": 18008171, - "started_at_time": "2021-11-13T18:47:34Z", - "scaf_bp": 18008251, - "type": "nmdc:MetagenomeAssembly", - "scaffolds": 51180, - "ended_at_time": "2021-11-13T19:08:49+00:00", - "ctg_l50": 321, - "ctg_l90": 282, - "ctg_n50": 20415, - "ctg_n90": 44756, - "scaf_l50": 321, - "scaf_l90": 282, - "scaf_n50": 20413, - "scaf_n90": 44748 - } - ], - "metagenome_sequencing_activity_set": [], - "metaproteomics_analysis_activity_set": [], - "metatranscriptome_activity_set": [], - "nom_analysis_activity_set": [], - "omics_processing_set": [ - { - "_id": { - "$oid": "649b009773e8249959349b43" - }, - "id": "nmdc:omprc-11-z5qv0f24", - "name": "Riverbed sediment microbial communities from areas with vegetation nearby in Columbia River, Washington, USA - GW-RW S1_40_50", - "description": "Riverbed sediment samples were collected from an area with a lot of surrounding vegetation in a hyporheic zone (subsurface groundwater - surface water mixing zone)", - "has_input": [ - "nmdc:bsm-11-v0q5ak63" - ], - "has_output": [ - "jgi:574fe0967ded5e3df1ee1482" - ], - "part_of": [ - "nmdc:sty-11-aygzgv51" - ], - "add_date": "2016-01-11", - "mod_date": "2021-06-15", - "ncbi_project_name": "Riverbed sediment microbial communities from areas with vegetation nearby in Columbia River, Washington, USA - GW-RW S1_40_50", - "omics_type": { - "has_raw_value": "Metagenome" - }, - "principal_investigator": { - "has_raw_value": "James Stegen" - }, - "processing_institution": "JGI", - "type": "nmdc:OmicsProcessing", - "gold_sequencing_project_identifiers": [ - "gold:Gp0127627" - ] - } - ], - "reaction_activity_set": [], - "read_qc_analysis_activity_set": [ - { - "_id": { - "$oid": "649b009d6bdd4fd20273c866" - }, - "has_input": [ - "nmdc:45f15cded08bad75a2ef9d7e4b1f42de" - ], - "part_of": [ - "nmdc:mga0daby71" - ], - "git_url": "https://github.com/microbiomedata/metaG/releases/tag/0.1", - "has_output": [ - "nmdc:ed95796b3fd964c6bedb141d70737ebf", - "nmdc:308ae373809697291bbc7947a1e4ed2d" - ], - "was_informed_by": "gold:Gp0127627", - "input_read_count": 20505370, - "output_read_bases": 2992084693, - "id": "nmdc:ecaab4b51de694b35269756fd7e31ed9", - "execution_resource": "NERSC-Cori", - "input_read_bases": 3096310870, - "name": "Read QC Activity for nmdc:mga0daby71", - "output_read_count": 19995028, - "started_at_time": "2021-11-13T18:47:34Z", - "type": "nmdc:ReadQCAnalysisActivity", - "ended_at_time": "2021-11-13T19:08:49+00:00" - } - ], - "read_based_taxonomy_analysis_activity_set": [ - { - "_id": { - "$oid": "649b009bff710ae353f8cf2a" - }, - "has_input": [ - "nmdc:ed95796b3fd964c6bedb141d70737ebf" - ], - "git_url": "https://github.com/microbiomedata/metaG/releases/tag/0.1", - "has_output": [ - "nmdc:a5ac6665e5d66242b1c885a911236982", - "nmdc:d19478a191693d643157a89c69cc02d1", - "nmdc:679a82699663e88a5e8828ee081fa967", - "nmdc:95b3150e6fb62195c1e5ebf06f87c7d5", - "nmdc:0380e478962be82e0d97a6339f7f3b91", - "nmdc:0c1d139abdfa9fa10f26923abb4d6bda", - "nmdc:f388f7f0d79d0b2bbec1c3c0c5641814", - "nmdc:a2a0029691c04851f4a98003a773fe3f", - "nmdc:bab24ab64ad432d115f182df7198d46e" - ], - "was_informed_by": "gold:Gp0127627", - "id": "nmdc:ecaab4b51de694b35269756fd7e31ed9", - "execution_resource": "NERSC-Cori", - "name": "ReadBased Analysis Activity for nmdc:mga0daby71", - "started_at_time": "2021-11-13T18:47:34Z", - "type": "nmdc:ReadbasedAnalysis", - "ended_at_time": "2021-11-13T19:08:49+00:00" - } - ], - "study_set": [], - "field_research_site_set": [], - "collecting_biosamples_from_site_set": [], - "date_created": null, - "etl_software_version": null, - "pooling_set": [] - }, - { - "functional_annotation_agg": [], - "library_preparation_set": [], - "processed_sample_set": [], - "extraction_set": [], - "activity_set": [], - "biosample_set": [], - "data_object_set": [ - { - "description": "Raw sequencer read data", - "file_size_bytes": 2456584646, - "type": "nmdc:DataObject", - "id": "jgi:574fde807ded5e3df1ee141b", - "name": "10533.2.165322.AGCTAAC-GGTTAGC.fastq.gz" - }, - { - "name": "Gp0127632_Filtered Reads", - "description": "Filtered Reads for Gp0127632", - "data_object_type": "Filtered Sequencing Reads", - "url": "https://data.microbiomedata.org/data/nmdc:mga0b6cy30/qa/nmdc_mga0b6cy30_filtered.fastq.gz", - "md5_checksum": "a43bfb55389206c2fc5ddb53e6aa2bc6", - "id": "nmdc:a43bfb55389206c2fc5ddb53e6aa2bc6", - "file_size_bytes": 2199178772 - }, - { - "name": "Gp0127632_Filtered Stats", - "description": "Filtered Stats for Gp0127632", - "data_object_type": "QC Statistics", - "url": "https://data.microbiomedata.org/data/nmdc:mga0b6cy30/qa/nmdc_mga0b6cy30_filterStats.txt", - "md5_checksum": "919c5aade4fffb76f743a33b035b2839", - "id": "nmdc:919c5aade4fffb76f743a33b035b2839", - "file_size_bytes": 289 - }, - { - "name": "Gp0127632_Gottcha2 TSV report", - "description": "Gottcha2 TSV report for Gp0127632", - "url": "https://data.microbiomedata.org/data/nmdc:mga0b6cy30/ReadbasedAnalysis/nmdc_mga0b6cy30_gottcha2_report.tsv", - "md5_checksum": "3e583cccbbc068e0879ba6618bb6407c", - "id": "nmdc:3e583cccbbc068e0879ba6618bb6407c", - "file_size_bytes": 2899 - }, - { - "name": "Gp0127632_Gottcha2 full TSV report", - "description": "Gottcha2 full TSV report for Gp0127632", - "url": "https://data.microbiomedata.org/data/nmdc:mga0b6cy30/ReadbasedAnalysis/nmdc_mga0b6cy30_gottcha2_report_full.tsv", - "md5_checksum": "6c54105711e818c4d8169ab595b05efe", - "id": "nmdc:6c54105711e818c4d8169ab595b05efe", - "file_size_bytes": 769416 - }, - { - "name": "Gp0127632_Gottcha2 Krona HTML report", - "description": "Gottcha2 Krona HTML report for Gp0127632", - "data_object_type": "GOTTCHA2 Krona Plot", - "url": "https://data.microbiomedata.org/data/nmdc:mga0b6cy30/ReadbasedAnalysis/nmdc_mga0b6cy30_gottcha2_krona.html", - "md5_checksum": "adb155cdb656648496484998a62fb96f", - "id": "nmdc:adb155cdb656648496484998a62fb96f", - "file_size_bytes": 235384 - }, - { - "name": "Gp0127632_Centrifuge classification TSV report", - "description": "Centrifuge classification TSV report for Gp0127632", - "data_object_type": "Centrifuge Taxonomic Classification", - "url": "https://data.microbiomedata.org/data/nmdc:mga0b6cy30/ReadbasedAnalysis/nmdc_mga0b6cy30_centrifuge_classification.tsv", - "md5_checksum": "0a03ac5737750a3b336e7299e9f01ead", - "id": "nmdc:0a03ac5737750a3b336e7299e9f01ead", - "file_size_bytes": 1917130445 - }, - { - "name": "Gp0127632_Centrifuge TSV report", - "description": "Centrifuge TSV report for Gp0127632", - "data_object_type": "Centrifuge Classification Report", - "url": "https://data.microbiomedata.org/data/nmdc:mga0b6cy30/ReadbasedAnalysis/nmdc_mga0b6cy30_centrifuge_report.tsv", - "md5_checksum": "f345b3a57c37097a860e38d5e83835b8", - "id": "nmdc:f345b3a57c37097a860e38d5e83835b8", - "file_size_bytes": 255290 - }, - { - "name": "Gp0127632_Centrifuge Krona HTML report", - "description": "Centrifuge Krona HTML report for Gp0127632", - "data_object_type": "Centrifuge Krona Plot", - "url": "https://data.microbiomedata.org/data/nmdc:mga0b6cy30/ReadbasedAnalysis/nmdc_mga0b6cy30_centrifuge_krona.html", - "md5_checksum": "c1f4471d943b284720a8becb5a2e32b4", - "id": "nmdc:c1f4471d943b284720a8becb5a2e32b4", - "file_size_bytes": 2333225 - }, - { - "name": "Gp0127632_Kraken classification TSV report", - "description": "Kraken classification TSV report for Gp0127632", - "data_object_type": "Kraken2 Taxonomic Classification", - "url": "https://data.microbiomedata.org/data/nmdc:mga0b6cy30/ReadbasedAnalysis/nmdc_mga0b6cy30_kraken2_classification.tsv", - "md5_checksum": "50cfcfc5d0d89245b8370abf6bfef23c", - "id": "nmdc:50cfcfc5d0d89245b8370abf6bfef23c", - "file_size_bytes": 1537863470 - }, - { - "name": "Gp0127632_Kraken2 TSV report", - "description": "Kraken2 TSV report for Gp0127632", - "data_object_type": "Kraken2 Classification Report", - "url": "https://data.microbiomedata.org/data/nmdc:mga0b6cy30/ReadbasedAnalysis/nmdc_mga0b6cy30_kraken2_report.tsv", - "md5_checksum": "a8dd7aa20043510158ad3b2bbe961b42", - "id": "nmdc:a8dd7aa20043510158ad3b2bbe961b42", - "file_size_bytes": 648597 - }, - { - "name": "Gp0127632_Kraken2 Krona HTML report", - "description": "Kraken2 Krona HTML report for Gp0127632", - "data_object_type": "Kraken2 Krona Plot", - "url": "https://data.microbiomedata.org/data/nmdc:mga0b6cy30/ReadbasedAnalysis/nmdc_mga0b6cy30_kraken2_krona.html", - "md5_checksum": "e350fda9bd0651755171d79b413b8da3", - "id": "nmdc:e350fda9bd0651755171d79b413b8da3", - "file_size_bytes": 3959152 - }, - { - "name": "Gp0127632_Assembled contigs fasta", - "description": "Assembled contigs fasta for Gp0127632", - "data_object_type": "Assembly Contigs", - "url": "https://data.microbiomedata.org/data/nmdc:mga0b6cy30/assembly/nmdc_mga0b6cy30_contigs.fna", - "md5_checksum": "b5094d52c6d48836de0aac261c622868", - "id": "nmdc:b5094d52c6d48836de0aac261c622868", - "file_size_bytes": 59930370 - }, - { - "name": "Gp0127632_Assembled scaffold fasta", - "description": "Assembled scaffold fasta for Gp0127632", - "data_object_type": "Assembly Scaffolds", - "url": "https://data.microbiomedata.org/data/nmdc:mga0b6cy30/assembly/nmdc_mga0b6cy30_scaffolds.fna", - "md5_checksum": "4d9d83ac8db218e6d0bd4f29801c3ce3", - "id": "nmdc:4d9d83ac8db218e6d0bd4f29801c3ce3", - "file_size_bytes": 59532251 - }, - { - "name": "Gp0127632_Metagenome Contig Coverage Stats", - "description": "Metagenome Contig Coverage Stats for Gp0127632", - "url": "https://data.microbiomedata.org/data/nmdc:mga0b6cy30/assembly/nmdc_mga0b6cy30_covstats.txt", - "md5_checksum": "f8fad4cf225943d8fddec3fa3402c53a", - "id": "nmdc:f8fad4cf225943d8fddec3fa3402c53a", - "file_size_bytes": 10428676 - }, - { - "name": "Gp0127632_Assembled AGP file", - "description": "Assembled AGP file for Gp0127632", - "data_object_type": "Assembly AGP", - "url": "https://data.microbiomedata.org/data/nmdc:mga0b6cy30/assembly/nmdc_mga0b6cy30_assembly.agp", - "md5_checksum": "52f130d084757d6e27177ed108e9e5bf", - "id": "nmdc:52f130d084757d6e27177ed108e9e5bf", - "file_size_bytes": 9725931 - }, - { - "name": "Gp0127632_Metagenome Alignment BAM file", - "description": "Metagenome Alignment BAM file for Gp0127632", - "data_object_type": "Assembly Coverage BAM", - "url": "https://data.microbiomedata.org/data/nmdc:mga0b6cy30/assembly/nmdc_mga0b6cy30_pairedMapped_sorted.bam", - "md5_checksum": "9e5deaa9e7ac3f5f90d79b6520d39d53", - "id": "nmdc:9e5deaa9e7ac3f5f90d79b6520d39d53", - "file_size_bytes": 2363431165 - }, - { - "name": "Gp0127632_Protein FAA", - "description": "Protein FAA for Gp0127632", - "data_object_type": "Annotation Amino Acid FASTA", - "url": "https://data.microbiomedata.org/data/nmdc:mga0b6cy30/annotation/nmdc_mga0b6cy30_proteins.faa", - "md5_checksum": "42989e75458691fbd17e537582c56d5e", - "id": "nmdc:42989e75458691fbd17e537582c56d5e", - "file_size_bytes": 35685584 - }, - { - "name": "Gp0127632_Structural annotation GFF file", - "description": "Structural annotation GFF file for Gp0127632", - "data_object_type": "Structural Annotation GFF", - "url": "https://data.microbiomedata.org/data/nmdc:mga0b6cy30/annotation/nmdc_mga0b6cy30_structural_annotation.gff", - "md5_checksum": "09240a6d1afc5f8b965a80a64aa96ef4", - "id": "nmdc:09240a6d1afc5f8b965a80a64aa96ef4", - "file_size_bytes": 2512 - }, - { - "name": "Gp0127632_Functional annotation GFF file", - "description": "Functional annotation GFF file for Gp0127632", - "data_object_type": "Functional Annotation GFF", - "url": "https://data.microbiomedata.org/data/nmdc:mga0b6cy30/annotation/nmdc_mga0b6cy30_functional_annotation.gff", - "md5_checksum": "c595237698baaf882fdeeac92f1b02be", - "id": "nmdc:c595237698baaf882fdeeac92f1b02be", - "file_size_bytes": 41979225 - }, - { - "name": "Gp0127632_KO TSV file", - "description": "KO TSV file for Gp0127632", - "data_object_type": "Annotation KEGG Orthology", - "url": "https://data.microbiomedata.org/data/nmdc:mga0b6cy30/annotation/nmdc_mga0b6cy30_ko.tsv", - "md5_checksum": "cd87df7a80ed03eef7d9923b9e9621e4", - "id": "nmdc:cd87df7a80ed03eef7d9923b9e9621e4", - "file_size_bytes": 4726366 - }, - { - "name": "Gp0127632_EC TSV file", - "description": "EC TSV file for Gp0127632", - "data_object_type": "Annotation Enzyme Commission", - "url": "https://data.microbiomedata.org/data/nmdc:mga0b6cy30/annotation/nmdc_mga0b6cy30_ec.tsv", - "md5_checksum": "57053d5594bb80495014664df22b0bb0", - "id": "nmdc:57053d5594bb80495014664df22b0bb0", - "file_size_bytes": 3155078 - }, - { - "name": "Gp0127632_COG GFF file", - "description": "COG GFF file for Gp0127632", - "url": "https://data.microbiomedata.org/data/nmdc:mga0b6cy30/annotation/nmdc_mga0b6cy30_cog.gff", - "md5_checksum": "3c82ee6a19674bd5abd4072cb137d96f", - "id": "nmdc:3c82ee6a19674bd5abd4072cb137d96f", - "file_size_bytes": 23956687 - }, - { - "name": "Gp0127632_PFAM GFF file", - "description": "PFAM GFF file for Gp0127632", - "url": "https://data.microbiomedata.org/data/nmdc:mga0b6cy30/annotation/nmdc_mga0b6cy30_pfam.gff", - "md5_checksum": "c9bf48d6c88b3db0f431a08d93873c4a", - "id": "nmdc:c9bf48d6c88b3db0f431a08d93873c4a", - "file_size_bytes": 17333907 - }, - { - "name": "Gp0127632_TigrFam GFF file", - "description": "TigrFam GFF file for Gp0127632", - "url": "https://data.microbiomedata.org/data/nmdc:mga0b6cy30/annotation/nmdc_mga0b6cy30_tigrfam.gff", - "md5_checksum": "2475726e21bd8369f76d529f55f21a3f", - "id": "nmdc:2475726e21bd8369f76d529f55f21a3f", - "file_size_bytes": 1771706 - }, - { - "name": "Gp0127632_SMART GFF file", - "description": "SMART GFF file for Gp0127632", - "url": "https://data.microbiomedata.org/data/nmdc:mga0b6cy30/annotation/nmdc_mga0b6cy30_smart.gff", - "md5_checksum": "5698830d572ddc4e35a5f6642da7981a", - "id": "nmdc:5698830d572ddc4e35a5f6642da7981a", - "file_size_bytes": 5383998 - }, - { - "name": "Gp0127632_SuperFam GFF file", - "description": "SuperFam GFF file for Gp0127632", - "url": "https://data.microbiomedata.org/data/nmdc:mga0b6cy30/annotation/nmdc_mga0b6cy30_supfam.gff", - "md5_checksum": "18cdb0f987a2d417d0a39a685e435729", - "id": "nmdc:18cdb0f987a2d417d0a39a685e435729", - "file_size_bytes": 30162479 - }, - { - "name": "Gp0127632_Cath FunFam GFF file", - "description": "Cath FunFam GFF file for Gp0127632", - "url": "https://data.microbiomedata.org/data/nmdc:mga0b6cy30/annotation/nmdc_mga0b6cy30_cath_funfam.gff", - "md5_checksum": "b34e4d1823bd5cd88aa42832d10b3431", - "id": "nmdc:b34e4d1823bd5cd88aa42832d10b3431", - "file_size_bytes": 22459777 - }, - { - "name": "Gp0127632_KO_EC GFF file", - "description": "KO_EC GFF file for Gp0127632", - "url": "https://data.microbiomedata.org/data/nmdc:mga0b6cy30/annotation/nmdc_mga0b6cy30_ko_ec.gff", - "md5_checksum": "dc544f4796d49c520372e1872c5aea49", - "id": "nmdc:dc544f4796d49c520372e1872c5aea49", - "file_size_bytes": 15047897 - }, - { - "name": "gold:Gp0452679_metabat2 bin checkm quality assessment result", - "description": "metabat2 bin checkm quality assessment result for gold:Gp0452679", - "data_object_type": "CheckM Statistics", - "url": "https://data.microbiomedata.org/data/nmdc:mga0fsjy84/MAGs/nmdc_mga0fsjy84_checkm_qa.out", - "md5_checksum": "d41d8cd98f00b204e9800998ecf8427e", - "id": "nmdc:d41d8cd98f00b204e9800998ecf8427e", - "file_size_bytes": 0 - }, - { - "name": "Gp0127632_tooShort (< 3kb) filtered contigs fasta file by metaBat2", - "description": "tooShort (< 3kb) filtered contigs fasta file by metaBat2 for Gp0127632", - "url": "https://data.microbiomedata.org/data/nmdc:mga0b6cy30/MAGs/nmdc_mga0b6cy30_bins.tooShort.fa", - "md5_checksum": "2941988fcfb708d20ad1e44682c78e22", - "id": "nmdc:2941988fcfb708d20ad1e44682c78e22", - "file_size_bytes": 52475207 - }, - { - "name": "Gp0127632_unbinned fasta file from metabat2", - "description": "unbinned fasta file from metabat2 for Gp0127632", - "url": "https://data.microbiomedata.org/data/nmdc:mga0b6cy30/MAGs/nmdc_mga0b6cy30_bins.unbinned.fa", - "md5_checksum": "a6bc8d9d5ba5fe9713829aa7aef3c4cd", - "id": "nmdc:a6bc8d9d5ba5fe9713829aa7aef3c4cd", - "file_size_bytes": 5473493 - }, - { - "name": "Gp0127632_metabat2 bin checkm quality assessment result", - "description": "metabat2 bin checkm quality assessment result for Gp0127632", - "data_object_type": "CheckM Statistics", - "url": "https://data.microbiomedata.org/data/nmdc:mga0b6cy30/MAGs/nmdc_mga0b6cy30_checkm_qa.out", - "md5_checksum": "2914266e7ac7a8668c6f8d8722466c69", - "id": "nmdc:2914266e7ac7a8668c6f8d8722466c69", - "file_size_bytes": 948 - }, - { - "name": "Gp0127632_high-quality and medium-quality bins", - "description": "high-quality and medium-quality bins for Gp0127632", - "data_object_type": "Metagenome Bins", - "url": "https://data.microbiomedata.org/data/nmdc:mga0b6cy30/MAGs/nmdc_mga0b6cy30_hqmq_bin.zip", - "md5_checksum": "0fd97ca0ce01d42361ce817d3753a65e", - "id": "nmdc:0fd97ca0ce01d42361ce817d3753a65e", - "file_size_bytes": 497493 - }, - { - "name": "Gp0127632_metabat2 bins", - "description": "metabat2 bins for Gp0127632", - "url": "https://data.microbiomedata.org/data/nmdc:mga0b6cy30/MAGs/nmdc_mga0b6cy30_metabat_bin.zip", - "md5_checksum": "8e7832cac0ae99e2b63dfdfa34c24927", - "id": "nmdc:8e7832cac0ae99e2b63dfdfa34c24927", - "file_size_bytes": 108323 - }, - { - "_id": { - "$oid": "649b003d1ae706d7b5b14e69" - }, - "description": "Assembled contigs fasta for gold:Gp0127632", - "url": "https://data.microbiomedata.org/data/1781_100334/assembly/assembly_contigs.fna", - "file_size_bytes": 59400374, - "type": "nmdc:DataObject", - "id": "nmdc:8f8931e086f72961675aa936b1356f86", - "name": "assembly_contigs.fna", - "data_object_type": "Assembly Contigs" - }, - { - "_id": { - "$oid": "649b003d1ae706d7b5b14e6a" - }, - "description": "Assembled scaffold fasta for gold:Gp0127632", - "url": "https://data.microbiomedata.org/data/1781_100334/assembly/assembly_scaffolds.fna", - "file_size_bytes": 59002431, - "type": "nmdc:DataObject", - "id": "nmdc:e780311c63e956d852cd3c1bbd957f86", - "name": "assembly_scaffolds.fna", - "data_object_type": "Assembly Scaffolds" - }, - { - "_id": { - "$oid": "649b003d1ae706d7b5b14e6c" - }, - "description": "Assembled AGP file for gold:Gp0127632", - "url": "https://data.microbiomedata.org/data/1781_100334/assembly/assembly.agp", - "file_size_bytes": 8665587, - "type": "nmdc:DataObject", - "id": "nmdc:9c08a645e240b0861d3b8c912c7eaed0", - "name": "assembly.agp", - "data_object_type": "Assembly AGP" - }, - { - "_id": { - "$oid": "649b003d1ae706d7b5b14e6d" - }, - "description": "Metagenome Contig Coverage Stats for gold:Gp0127632", - "url": "https://data.microbiomedata.org/data/1781_100334/assembly/mapping_stats.txt", - "file_size_bytes": 9898680, - "type": "nmdc:DataObject", - "id": "nmdc:0ca761b2a51f8db8f46b694f06c0809d", - "name": "mapping_stats.txt", - "data_object_type": "Assembly Coverage Stats" - }, - { - "_id": { - "$oid": "649b003d1ae706d7b5b14e70" - }, - "description": "Metagenome Alignment BAM file for gold:Gp0127632", - "url": "https://data.microbiomedata.org/data/1781_100334/assembly/pairedMapped_sorted.bam", - "file_size_bytes": 2332493253, - "type": "nmdc:DataObject", - "id": "nmdc:4a26faa9b34a5c9f3bb65815cf2ad5c8", - "name": "pairedMapped_sorted.bam", - "data_object_type": "Assembly Coverage BAM" - }, - { - "_id": { - "$oid": "649b003d1ae706d7b5b15b29" - }, - "id": "nmdc:a95788f887b5af704f7e2cfd9868e8a4", - "name": "1781_100334.krona.html", - "description": "Gold:Gp0127632 KRONA plot HTML file", - "url": "https://data.microbiomedata.org/1781_100334/ReadbasedAnalysis/centrifuge/1781_100334.krona.html", - "file_size_bytes": 3385, - "data_object_type": "Centrifuge Krona Plot", - "type": "nmdc:DataObject" - }, - { - "_id": { - "$oid": "649b003d1ae706d7b5b15b30" - }, - "id": "nmdc:634b959933536776d62c9c66d43ec8ed", - "name": "1781_100334.json", - "description": "Gold:Gp0127632 ReadbasedAnalysis result JSON file", - "url": "https://data.microbiomedata.org/1781_100334/ReadbasedAnalysis/1781_100334.json", - "file_size_bytes": 3385, - "type": "nmdc:DataObject" - }, - { - "_id": { - "$oid": "649b003f1ae706d7b5b165d6" - }, - "id": "nmdc:e45be81dca6d2cb4a1d7d17ee6d166a4", - "name": "checkm_qa.out", - "description": "metabat2 bin checkm quality assessment result for gold:Gp0127632", - "file_size_bytes": 1413, - "url": "https://data.microbiomedata.org/data/1781_100334/img_MAGs/checkm_qa.out", - "type": "nmdc:DataObject", - "data_object_type": "CheckM Statistics" - }, - { - "_id": { - "$oid": "649b003f1ae706d7b5b165d7" - }, - "id": "nmdc:0aa200afbb24fe1e4c26c79c54c070dd", - "name": "gold:Gp0127632.bins.5.fa", - "description": "metabat2 binned contig file for gold:Gp0127632", - "file_size_bytes": 273113, - "url": "https://data.microbiomedata.org/data/1781_100334/img_MAGs/metabat-bins/bins.5.fa", - "type": "nmdc:DataObject", - "data_object_type": "Metagenome Bins" - }, - { - "_id": { - "$oid": "649b003f1ae706d7b5b165da" - }, - "id": "nmdc:695d01e510529d34d501f4fa62d5c9b8", - "name": "gold:Gp0127632.bins.3.fa", - "description": "metabat2 binned contig file for gold:Gp0127632", - "file_size_bytes": 210794, - "url": "https://data.microbiomedata.org/data/1781_100334/img_MAGs/metabat-bins/bins.3.fa", - "type": "nmdc:DataObject", - "data_object_type": "Metagenome Bins" - }, - { - "_id": { - "$oid": "649b003f1ae706d7b5b165db" - }, - "id": "nmdc:86c8e7c9c103a298357f6a2102bd8772", - "name": "gold:Gp0127632.bins.2.fa", - "description": "metabat2 binned contig file for gold:Gp0127632", - "file_size_bytes": 311763, - "url": "https://data.microbiomedata.org/data/1781_100334/img_MAGs/metabat-bins/bins.2.fa", - "type": "nmdc:DataObject", - "data_object_type": "Metagenome Bins" - }, - { - "_id": { - "$oid": "649b003f1ae706d7b5b165dd" - }, - "id": "nmdc:97771c73e3e872ea1da72cd758a03453", - "name": "gold:Gp0127632.bins.4.fa", - "description": "metabat2 binned contig file for gold:Gp0127632", - "file_size_bytes": 236159, - "url": "https://data.microbiomedata.org/data/1781_100334/img_MAGs/metabat-bins/bins.4.fa", - "type": "nmdc:DataObject", - "data_object_type": "Metagenome Bins" - }, - { - "_id": { - "$oid": "649b003f1ae706d7b5b165de" - }, - "id": "nmdc:e7e4a297417b55b9714702401da79d96", - "name": "gold:Gp0127632.bins.1.fa", - "description": "metabat2 binned contig file for gold:Gp0127632", - "file_size_bytes": 346669, - "url": "https://data.microbiomedata.org/data/1781_100334/img_MAGs/metabat-bins/bins.1.fa", - "type": "nmdc:DataObject", - "data_object_type": "Metagenome Bins" - }, - { - "_id": { - "$oid": "649b003f1ae706d7b5b165eb" - }, - "id": "nmdc:fa90657e9d2a9b11f5cea076316d0a50", - "name": "bins.unbinned.fa", - "description": "unbinned fasta file from metabat2 for gold:Gp0127632", - "file_size_bytes": 6037283, - "url": "https://data.microbiomedata.org/data/1781_100334/img_MAGs/bins.unbinned.fa", - "type": "nmdc:DataObject" - }, - { - "_id": { - "$oid": "649b003f1ae706d7b5b1662f" - }, - "id": "nmdc:800b596b9573c1ddf6a9e357c1eb8d86", - "name": "bins.tooShort.fa", - "description": "tooShort (< 3kb) filtered contigs fasta file by metaBat2 for gold:Gp0127632", - "file_size_bytes": 50903302, - "url": "https://data.microbiomedata.org/data/1781_100334/img_MAGs/bins.tooShort.fa", - "type": "nmdc:DataObject" - }, - { - "_id": { - "$oid": "649b00401ae706d7b5b16ce9" - }, - "description": "KO TSV File for gold:Gp0127632", - "url": "https://data.microbiomedata.org/1781_100334/img_annotation/Ga0482238_ko.tsv", - "md5_checksum": "aeafeb18adb193b1a3c5c3c2ff9a912e", - "file_size_bytes": 3385, - "id": "nmdc:aeafeb18adb193b1a3c5c3c2ff9a912e", - "name": "gold:Gp0127632_KO TSV File", - "data_object_type": "Annotation KEGG Orthology", - "type": "nmdc:DataObject" - }, - { - "_id": { - "$oid": "649b00401ae706d7b5b16ceb" - }, - "description": "Protein FAA for gold:Gp0127632", - "url": "https://data.microbiomedata.org/1781_100334/img_annotation/Ga0482238_proteins.faa", - "md5_checksum": "a89e8af0fc6daf895e7a87f1ff7087f2", - "file_size_bytes": 3385, - "id": "nmdc:a89e8af0fc6daf895e7a87f1ff7087f2", - "name": "gold:Gp0127632_Protein FAA", - "data_object_type": "Annotation Amino Acid FASTA", - "type": "nmdc:DataObject" - }, - { - "_id": { - "$oid": "649b00401ae706d7b5b16cee" - }, - "description": "EC TSV File for gold:Gp0127632", - "url": "https://data.microbiomedata.org/1781_100334/img_annotation/Ga0482238_ec.tsv", - "md5_checksum": "b8d886e71031cbe4fb1284f479348740", - "file_size_bytes": 3385, - "id": "nmdc:b8d886e71031cbe4fb1284f479348740", - "name": "gold:Gp0127632_EC TSV File", - "data_object_type": "Annotation Enzyme Commission", - "type": "nmdc:DataObject" - }, - { - "_id": { - "$oid": "649b00401ae706d7b5b16cf2" - }, - "description": "Structural annotation GFF file for gold:Gp0127632", - "url": "https://data.microbiomedata.org/1781_100334/img_annotation/Ga0482238_structural_annotation.gff", - "md5_checksum": "89b895fbf3c13801ddba22ff59bb385a", - "file_size_bytes": 3385, - "id": "nmdc:89b895fbf3c13801ddba22ff59bb385a", - "name": "gold:Gp0127632_Structural annotation GFF file", - "data_object_type": "Structural Annotation GFF", - "type": "nmdc:DataObject" - }, - { - "_id": { - "$oid": "649b00401ae706d7b5b16cf5" - }, - "description": "Functional annotation GFF file for gold:Gp0127632", - "url": "https://data.microbiomedata.org/1781_100334/img_annotation/Ga0482238_functional_annotation.gff", - "md5_checksum": "2395040203b3351554a9e3ffb48b0b88", - "file_size_bytes": 3385, - "id": "nmdc:2395040203b3351554a9e3ffb48b0b88", - "name": "gold:Gp0127632_Functional annotation GFF file", - "data_object_type": "Functional Annotation GFF", - "type": "nmdc:DataObject" - } - ], - "dissolving_activity_set": [], - "functional_annotation_set": [], - "genome_feature_set": [], - "mags_activity_set": [ - { - "_id": { - "$oid": "649b0052ec087f6bbab3470d" - }, - "has_input": [ - "nmdc:b5094d52c6d48836de0aac261c622868", - "nmdc:9e5deaa9e7ac3f5f90d79b6520d39d53", - "nmdc:c595237698baaf882fdeeac92f1b02be" - ], - "too_short_contig_num": 128818, - "part_of": [ - "nmdc:mga0b6cy30" - ], - "binned_contig_num": 313, - "git_url": "https://github.com/microbiomedata/metaG/releases/tag/0.1", - "has_output": [ - "nmdc:d41d8cd98f00b204e9800998ecf8427e", - "nmdc:2941988fcfb708d20ad1e44682c78e22", - "nmdc:a6bc8d9d5ba5fe9713829aa7aef3c4cd", - "nmdc:2914266e7ac7a8668c6f8d8722466c69", - "nmdc:0fd97ca0ce01d42361ce817d3753a65e", - "nmdc:8e7832cac0ae99e2b63dfdfa34c24927" - ], - "was_informed_by": "gold:Gp0127632", - "input_contig_num": 132499, - "id": "nmdc:2d5f97b7f6c0f385d6185e05ac989b1e", - "execution_resource": "NERSC-Cori", - "name": "MAGs Analysis Activity for nmdc:mga0b6cy30", - "mags_list": [ - { - "number_of_contig": 84, - "completeness": 27.81, - "bin_name": "bins.1", - "gene_count": 437, - "bin_quality": "LQ", - "gtdbtk_species": "", - "gtdbtk_order": "", - "num_16s": 0, - "gtdbtk_family": "", - "gtdbtk_domain": "", - "contamination": 1.71, - "gtdbtk_class": "", - "gtdbtk_phylum": "", - "num_5s": 1, - "num_23s": 1, - "gtdbtk_genus": "", - "num_t_rna": 10 - }, - { - "number_of_contig": 229, - "completeness": 71.45, - "bin_name": "bins.2", - "gene_count": 1997, - "bin_quality": "MQ", - "gtdbtk_species": "", - "gtdbtk_order": "Nitrososphaerales", - "num_16s": 0, - "gtdbtk_family": "Nitrososphaeraceae", - "gtdbtk_domain": "Archaea", - "contamination": 0.97, - "gtdbtk_class": "Nitrososphaeria", - "gtdbtk_phylum": "Crenarchaeota", - "num_5s": 1, - "num_23s": 0, - "gtdbtk_genus": "", - "num_t_rna": 36 - } - ], - "unbinned_contig_num": 3368, - "started_at_time": "2021-10-11T02:23:42Z", - "type": "nmdc:MAGsAnalysisActivity", - "ended_at_time": "2021-10-11T04:08:32+00:00" - } - ], - "material_sample_set": [], - "material_sampling_activity_set": [], - "metabolomics_analysis_activity_set": [], - "metagenome_annotation_activity_set": [ - { - "_id": { - "$oid": "649b005bbf2caae0415ef9ae" - }, - "has_input": [ - "nmdc:b5094d52c6d48836de0aac261c622868" - ], - "part_of": [ - "nmdc:mga0b6cy30" - ], - "git_url": "https://github.com/microbiomedata/metaG/releases/tag/0.1", - "has_output": [ - "nmdc:42989e75458691fbd17e537582c56d5e", - "nmdc:09240a6d1afc5f8b965a80a64aa96ef4", - "nmdc:c595237698baaf882fdeeac92f1b02be", - "nmdc:cd87df7a80ed03eef7d9923b9e9621e4", - "nmdc:57053d5594bb80495014664df22b0bb0", - "nmdc:3c82ee6a19674bd5abd4072cb137d96f", - "nmdc:c9bf48d6c88b3db0f431a08d93873c4a", - "nmdc:2475726e21bd8369f76d529f55f21a3f", - "nmdc:5698830d572ddc4e35a5f6642da7981a", - "nmdc:18cdb0f987a2d417d0a39a685e435729", - "nmdc:b34e4d1823bd5cd88aa42832d10b3431", - "nmdc:dc544f4796d49c520372e1872c5aea49" - ], - "was_informed_by": "gold:Gp0127632", - "id": "nmdc:2d5f97b7f6c0f385d6185e05ac989b1e", - "execution_resource": "NERSC-Cori", - "name": "Annotation Activity for nmdc:mga0b6cy30", - "started_at_time": "2021-10-11T02:23:42Z", - "type": "nmdc:MetagenomeAnnotationActivity", - "ended_at_time": "2021-10-11T04:08:32+00:00" - } - ], - "metagenome_assembly_set": [ - { - "_id": { - "$oid": "649b005f2ca5ee4adb139f9b" - }, - "has_input": [ - "nmdc:a43bfb55389206c2fc5ddb53e6aa2bc6" - ], - "part_of": [ - "nmdc:mga0b6cy30" - ], - "ctg_logsum": 81568, - "scaf_logsum": 81839, - "gap_pct": 0.00096, - "git_url": "https://github.com/microbiomedata/metaG/releases/tag/0.1", - "has_output": [ - "nmdc:b5094d52c6d48836de0aac261c622868", - "nmdc:4d9d83ac8db218e6d0bd4f29801c3ce3", - "nmdc:f8fad4cf225943d8fddec3fa3402c53a", - "nmdc:52f130d084757d6e27177ed108e9e5bf", - "nmdc:9e5deaa9e7ac3f5f90d79b6520d39d53" - ], - "asm_score": 5.986, - "was_informed_by": "gold:Gp0127632", - "ctg_powsum": 9274.272, - "scaf_max": 23706, - "id": "nmdc:2d5f97b7f6c0f385d6185e05ac989b1e", - "scaf_powsum": 9304.689, - "execution_resource": "NERSC-Cori", - "contigs": 132499, - "name": "Assembly Activity for nmdc:mga0b6cy30", - "ctg_max": 23706, - "gc_std": 0.09103, - "contig_bp": 54959738, - "gc_avg": 0.61354, - "started_at_time": "2021-10-11T02:23:42Z", - "scaf_bp": 54960268, - "type": "nmdc:MetagenomeAssembly", - "scaffolds": 132455, - "ended_at_time": "2021-10-11T04:08:32+00:00", - "ctg_l50": 372, - "ctg_l90": 285, - "ctg_n50": 43541, - "ctg_n90": 113564, - "scaf_l50": 372, - "scaf_l90": 285, - "scaf_n50": 43524, - "scaf_n90": 113522 - } - ], - "metagenome_sequencing_activity_set": [], - "metaproteomics_analysis_activity_set": [], - "metatranscriptome_activity_set": [], - "nom_analysis_activity_set": [], - "omics_processing_set": [ - { - "_id": { - "$oid": "649b009773e8249959349b44" - }, - "id": "nmdc:omprc-11-8qms8262", - "name": "Riverbed sediment microbial communities from areas with vegetation nearby in Columbia River, Washington, USA - GW-RW S3_40_50", - "description": "Riverbed sediment samples were collected from an area with a lot of surrounding vegetation in a hyporheic zone (subsurface groundwater - surface water mixing zone)", - "has_input": [ - "nmdc:bsm-11-0xprxw22" - ], - "has_output": [ - "jgi:574fde807ded5e3df1ee141b" - ], - "part_of": [ - "nmdc:sty-11-aygzgv51" - ], - "add_date": "2016-01-11", - "mod_date": "2021-06-15", - "ncbi_project_name": "Riverbed sediment microbial communities from areas with vegetation nearby in Columbia River, Washington, USA - GW-RW S3_40_50", - "omics_type": { - "has_raw_value": "Metagenome" - }, - "principal_investigator": { - "has_raw_value": "James Stegen" - }, - "processing_institution": "JGI", - "type": "nmdc:OmicsProcessing", - "gold_sequencing_project_identifiers": [ - "gold:Gp0127632" - ] - } - ], - "reaction_activity_set": [], - "read_qc_analysis_activity_set": [ - { - "_id": { - "$oid": "649b009d6bdd4fd20273c868" - }, - "has_input": [ - "nmdc:5cbd7ceb39903cbded77b36ae866fe9f" - ], - "part_of": [ - "nmdc:mga0b6cy30" - ], - "git_url": "https://github.com/microbiomedata/metaG/releases/tag/0.1", - "has_output": [ - "nmdc:a43bfb55389206c2fc5ddb53e6aa2bc6", - "nmdc:919c5aade4fffb76f743a33b035b2839" - ], - "was_informed_by": "gold:Gp0127632", - "input_read_count": 27906294, - "output_read_bases": 3905482172, - "id": "nmdc:2d5f97b7f6c0f385d6185e05ac989b1e", - "execution_resource": "NERSC-Cori", - "input_read_bases": 4213850394, - "name": "Read QC Activity for nmdc:mga0b6cy30", - "output_read_count": 26116440, - "started_at_time": "2021-10-11T02:23:42Z", - "type": "nmdc:ReadQCAnalysisActivity", - "ended_at_time": "2021-10-11T04:08:32+00:00" - } - ], - "read_based_taxonomy_analysis_activity_set": [ - { - "_id": { - "$oid": "649b009bff710ae353f8cf31" - }, - "has_input": [ - "nmdc:a43bfb55389206c2fc5ddb53e6aa2bc6" - ], - "git_url": "https://github.com/microbiomedata/metaG/releases/tag/0.1", - "has_output": [ - "nmdc:3e583cccbbc068e0879ba6618bb6407c", - "nmdc:6c54105711e818c4d8169ab595b05efe", - "nmdc:adb155cdb656648496484998a62fb96f", - "nmdc:0a03ac5737750a3b336e7299e9f01ead", - "nmdc:f345b3a57c37097a860e38d5e83835b8", - "nmdc:c1f4471d943b284720a8becb5a2e32b4", - "nmdc:50cfcfc5d0d89245b8370abf6bfef23c", - "nmdc:a8dd7aa20043510158ad3b2bbe961b42", - "nmdc:e350fda9bd0651755171d79b413b8da3" - ], - "was_informed_by": "gold:Gp0127632", - "id": "nmdc:2d5f97b7f6c0f385d6185e05ac989b1e", - "execution_resource": "NERSC-Cori", - "name": "ReadBased Analysis Activity for nmdc:mga0b6cy30", - "started_at_time": "2021-10-11T02:23:42Z", - "type": "nmdc:ReadbasedAnalysis", - "ended_at_time": "2021-10-11T04:08:32+00:00" - } - ], - "study_set": [], - "field_research_site_set": [], - "collecting_biosamples_from_site_set": [], - "date_created": null, - "etl_software_version": null, - "pooling_set": [] - }, - { - "functional_annotation_agg": [], - "library_preparation_set": [], - "processed_sample_set": [], - "extraction_set": [], - "activity_set": [], - "biosample_set": [], - "data_object_set": [ - { - "description": "Raw sequencer read data", - "file_size_bytes": 2759159406, - "type": "nmdc:DataObject", - "id": "jgi:574fe09f7ded5e3df1ee1489", - "name": "10533.3.165334.ACAGCAA-GTTGCTG.fastq.gz" - }, - { - "name": "Gp0127636_Filtered Reads", - "description": "Filtered Reads for Gp0127636", - "data_object_type": "Filtered Sequencing Reads", - "url": "https://data.microbiomedata.org/data/nmdc:mga02tph39/qa/nmdc_mga02tph39_filtered.fastq.gz", - "md5_checksum": "e4f5675c728fd1896682eb669656b5d6", - "id": "nmdc:e4f5675c728fd1896682eb669656b5d6", - "file_size_bytes": 2463342132 - }, - { - "name": "Gp0127636_Filtered Stats", - "description": "Filtered Stats for Gp0127636", - "data_object_type": "QC Statistics", - "url": "https://data.microbiomedata.org/data/nmdc:mga02tph39/qa/nmdc_mga02tph39_filterStats.txt", - "md5_checksum": "64f455185b1bc610a8d74a84ed12683f", - "id": "nmdc:64f455185b1bc610a8d74a84ed12683f", - "file_size_bytes": 293 - }, - { - "name": "Gp0127636_Gottcha2 TSV report", - "description": "Gottcha2 TSV report for Gp0127636", - "url": "https://data.microbiomedata.org/data/nmdc:mga02tph39/ReadbasedAnalysis/nmdc_mga02tph39_gottcha2_report.tsv", - "md5_checksum": "50d80a30d4ff113e36f6fd64b1f28547", - "id": "nmdc:50d80a30d4ff113e36f6fd64b1f28547", - "file_size_bytes": 5547 - }, - { - "name": "Gp0127636_Gottcha2 full TSV report", - "description": "Gottcha2 full TSV report for Gp0127636", - "url": "https://data.microbiomedata.org/data/nmdc:mga02tph39/ReadbasedAnalysis/nmdc_mga02tph39_gottcha2_report_full.tsv", - "md5_checksum": "c2cd20a2011592a76397f49dc3acd6b7", - "id": "nmdc:c2cd20a2011592a76397f49dc3acd6b7", - "file_size_bytes": 965042 - }, - { - "name": "Gp0127636_Gottcha2 Krona HTML report", - "description": "Gottcha2 Krona HTML report for Gp0127636", - "data_object_type": "GOTTCHA2 Krona Plot", - "url": "https://data.microbiomedata.org/data/nmdc:mga02tph39/ReadbasedAnalysis/nmdc_mga02tph39_gottcha2_krona.html", - "md5_checksum": "827ad863c875ea14473c9903d192fa73", - "id": "nmdc:827ad863c875ea14473c9903d192fa73", - "file_size_bytes": 242495 - }, - { - "name": "Gp0127636_Centrifuge classification TSV report", - "description": "Centrifuge classification TSV report for Gp0127636", - "data_object_type": "Centrifuge Taxonomic Classification", - "url": "https://data.microbiomedata.org/data/nmdc:mga02tph39/ReadbasedAnalysis/nmdc_mga02tph39_centrifuge_classification.tsv", - "md5_checksum": "957074ca49765b22348e27b0133d8ba0", - "id": "nmdc:957074ca49765b22348e27b0133d8ba0", - "file_size_bytes": 2151939041 - }, - { - "name": "Gp0127636_Centrifuge TSV report", - "description": "Centrifuge TSV report for Gp0127636", - "data_object_type": "Centrifuge Classification Report", - "url": "https://data.microbiomedata.org/data/nmdc:mga02tph39/ReadbasedAnalysis/nmdc_mga02tph39_centrifuge_report.tsv", - "md5_checksum": "9253645582296696cb33b11754832574", - "id": "nmdc:9253645582296696cb33b11754832574", - "file_size_bytes": 257932 - }, - { - "name": "Gp0127636_Centrifuge Krona HTML report", - "description": "Centrifuge Krona HTML report for Gp0127636", - "data_object_type": "Centrifuge Krona Plot", - "url": "https://data.microbiomedata.org/data/nmdc:mga02tph39/ReadbasedAnalysis/nmdc_mga02tph39_centrifuge_krona.html", - "md5_checksum": "9aef1d9e04acfe0b7fb1b9dc3b842912", - "id": "nmdc:9aef1d9e04acfe0b7fb1b9dc3b842912", - "file_size_bytes": 2335219 - }, - { - "name": "Gp0127636_Kraken classification TSV report", - "description": "Kraken classification TSV report for Gp0127636", - "data_object_type": "Kraken2 Taxonomic Classification", - "url": "https://data.microbiomedata.org/data/nmdc:mga02tph39/ReadbasedAnalysis/nmdc_mga02tph39_kraken2_classification.tsv", - "md5_checksum": "75180fce38f38a6307231b47a8d2b23b", - "id": "nmdc:75180fce38f38a6307231b47a8d2b23b", - "file_size_bytes": 1746049273 - }, - { - "name": "Gp0127636_Kraken2 TSV report", - "description": "Kraken2 TSV report for Gp0127636", - "data_object_type": "Kraken2 Classification Report", - "url": "https://data.microbiomedata.org/data/nmdc:mga02tph39/ReadbasedAnalysis/nmdc_mga02tph39_kraken2_report.tsv", - "md5_checksum": "b4524a34937893768dbd3752068dee0c", - "id": "nmdc:b4524a34937893768dbd3752068dee0c", - "file_size_bytes": 660975 - }, - { - "name": "Gp0127636_Kraken2 Krona HTML report", - "description": "Kraken2 Krona HTML report for Gp0127636", - "data_object_type": "Kraken2 Krona Plot", - "url": "https://data.microbiomedata.org/data/nmdc:mga02tph39/ReadbasedAnalysis/nmdc_mga02tph39_kraken2_krona.html", - "md5_checksum": "f1543441c59aaaf8ec52036a5bbbe3f4", - "id": "nmdc:f1543441c59aaaf8ec52036a5bbbe3f4", - "file_size_bytes": 4020978 - }, - { - "name": "Gp0127636_Assembled contigs fasta", - "description": "Assembled contigs fasta for Gp0127636", - "data_object_type": "Assembly Contigs", - "url": "https://data.microbiomedata.org/data/nmdc:mga02tph39/assembly/nmdc_mga02tph39_contigs.fna", - "md5_checksum": "36692b7b93756aaabd7f1f6259753c4e", - "id": "nmdc:36692b7b93756aaabd7f1f6259753c4e", - "file_size_bytes": 39062008 - }, - { - "name": "Gp0127636_Assembled scaffold fasta", - "description": "Assembled scaffold fasta for Gp0127636", - "data_object_type": "Assembly Scaffolds", - "url": "https://data.microbiomedata.org/data/nmdc:mga02tph39/assembly/nmdc_mga02tph39_scaffolds.fna", - "md5_checksum": "8d02adf1319d5b95c2abc6ed5b5c1683", - "id": "nmdc:8d02adf1319d5b95c2abc6ed5b5c1683", - "file_size_bytes": 38774844 - }, - { - "name": "Gp0127636_Metagenome Contig Coverage Stats", - "description": "Metagenome Contig Coverage Stats for Gp0127636", - "url": "https://data.microbiomedata.org/data/nmdc:mga02tph39/assembly/nmdc_mga02tph39_covstats.txt", - "md5_checksum": "9830a711accd3a5ed899a2e616d0f4bf", - "id": "nmdc:9830a711accd3a5ed899a2e616d0f4bf", - "file_size_bytes": 7495949 - }, - { - "name": "Gp0127636_Assembled AGP file", - "description": "Assembled AGP file for Gp0127636", - "data_object_type": "Assembly AGP", - "url": "https://data.microbiomedata.org/data/nmdc:mga02tph39/assembly/nmdc_mga02tph39_assembly.agp", - "md5_checksum": "481fbd8cdeacd71e54a45c78d5decb36", - "id": "nmdc:481fbd8cdeacd71e54a45c78d5decb36", - "file_size_bytes": 6962527 - }, - { - "name": "Gp0127636_Metagenome Alignment BAM file", - "description": "Metagenome Alignment BAM file for Gp0127636", - "data_object_type": "Assembly Coverage BAM", - "url": "https://data.microbiomedata.org/data/nmdc:mga02tph39/assembly/nmdc_mga02tph39_pairedMapped_sorted.bam", - "md5_checksum": "a24edc9ffd773c30cea8ea709988307a", - "id": "nmdc:a24edc9ffd773c30cea8ea709988307a", - "file_size_bytes": 2624769069 - }, - { - "name": "Gp0127636_Protein FAA", - "description": "Protein FAA for Gp0127636", - "data_object_type": "Annotation Amino Acid FASTA", - "url": "https://data.microbiomedata.org/data/nmdc:mga02tph39/annotation/nmdc_mga02tph39_proteins.faa", - "md5_checksum": "a5d97f323fe7117cb38a2eea1f2246d2", - "id": "nmdc:a5d97f323fe7117cb38a2eea1f2246d2", - "file_size_bytes": 23469553 - }, - { - "name": "Gp0127636_Structural annotation GFF file", - "description": "Structural annotation GFF file for Gp0127636", - "data_object_type": "Structural Annotation GFF", - "url": "https://data.microbiomedata.org/data/nmdc:mga02tph39/annotation/nmdc_mga02tph39_structural_annotation.gff", - "md5_checksum": "2b791fb3e2964d7808388b32086e0de2", - "id": "nmdc:2b791fb3e2964d7808388b32086e0de2", - "file_size_bytes": 16532352 - }, - { - "name": "Gp0127636_Functional annotation GFF file", - "description": "Functional annotation GFF file for Gp0127636", - "data_object_type": "Functional Annotation GFF", - "url": "https://data.microbiomedata.org/data/nmdc:mga02tph39/annotation/nmdc_mga02tph39_functional_annotation.gff", - "md5_checksum": "f61ed86592491b2d83b5893749e12406", - "id": "nmdc:f61ed86592491b2d83b5893749e12406", - "file_size_bytes": 28432426 - }, - { - "name": "Gp0127636_KO TSV file", - "description": "KO TSV file for Gp0127636", - "data_object_type": "Annotation KEGG Orthology", - "url": "https://data.microbiomedata.org/data/nmdc:mga02tph39/annotation/nmdc_mga02tph39_ko.tsv", - "md5_checksum": "e983789bdc08364b00a000684062ed16", - "id": "nmdc:e983789bdc08364b00a000684062ed16", - "file_size_bytes": 3189682 - }, - { - "name": "Gp0127636_EC TSV file", - "description": "EC TSV file for Gp0127636", - "data_object_type": "Annotation Enzyme Commission", - "url": "https://data.microbiomedata.org/data/nmdc:mga02tph39/annotation/nmdc_mga02tph39_ec.tsv", - "md5_checksum": "3cd47d66b6e9006ff683a2eda168285f", - "id": "nmdc:3cd47d66b6e9006ff683a2eda168285f", - "file_size_bytes": 2100535 - }, - { - "name": "Gp0127636_COG GFF file", - "description": "COG GFF file for Gp0127636", - "url": "https://data.microbiomedata.org/data/nmdc:mga02tph39/annotation/nmdc_mga02tph39_cog.gff", - "md5_checksum": "e056ee666e8001bdb6f790efb3394093", - "id": "nmdc:e056ee666e8001bdb6f790efb3394093", - "file_size_bytes": 15585690 - }, - { - "name": "Gp0127636_PFAM GFF file", - "description": "PFAM GFF file for Gp0127636", - "url": "https://data.microbiomedata.org/data/nmdc:mga02tph39/annotation/nmdc_mga02tph39_pfam.gff", - "md5_checksum": "2b90fcb7628c3ffa9e7a14a32612b7af", - "id": "nmdc:2b90fcb7628c3ffa9e7a14a32612b7af", - "file_size_bytes": 11182350 - }, - { - "name": "Gp0127636_TigrFam GFF file", - "description": "TigrFam GFF file for Gp0127636", - "url": "https://data.microbiomedata.org/data/nmdc:mga02tph39/annotation/nmdc_mga02tph39_tigrfam.gff", - "md5_checksum": "4e2f1d4b2d20bfb0209a320a60c4aeac", - "id": "nmdc:4e2f1d4b2d20bfb0209a320a60c4aeac", - "file_size_bytes": 995758 - }, - { - "name": "Gp0127636_SMART GFF file", - "description": "SMART GFF file for Gp0127636", - "url": "https://data.microbiomedata.org/data/nmdc:mga02tph39/annotation/nmdc_mga02tph39_smart.gff", - "md5_checksum": "dd24a8b0f774555ac91e663416745428", - "id": "nmdc:dd24a8b0f774555ac91e663416745428", - "file_size_bytes": 3256325 - }, - { - "name": "Gp0127636_SuperFam GFF file", - "description": "SuperFam GFF file for Gp0127636", - "url": "https://data.microbiomedata.org/data/nmdc:mga02tph39/annotation/nmdc_mga02tph39_supfam.gff", - "md5_checksum": "2e76b71475b854e2bf2d0aa15a53dd7d", - "id": "nmdc:2e76b71475b854e2bf2d0aa15a53dd7d", - "file_size_bytes": 19666317 - }, - { - "name": "Gp0127636_Cath FunFam GFF file", - "description": "Cath FunFam GFF file for Gp0127636", - "url": "https://data.microbiomedata.org/data/nmdc:mga02tph39/annotation/nmdc_mga02tph39_cath_funfam.gff", - "md5_checksum": "2f297176cd51b2ede33c313f713b40b1", - "id": "nmdc:2f297176cd51b2ede33c313f713b40b1", - "file_size_bytes": 14458019 - }, - { - "name": "Gp0127636_KO_EC GFF file", - "description": "KO_EC GFF file for Gp0127636", - "url": "https://data.microbiomedata.org/data/nmdc:mga02tph39/annotation/nmdc_mga02tph39_ko_ec.gff", - "md5_checksum": "678a7af05a89d9d4f5f5d598dc2e3013", - "id": "nmdc:678a7af05a89d9d4f5f5d598dc2e3013", - "file_size_bytes": 10187098 - }, - { - "name": "gold:Gp0452679_metabat2 bin checkm quality assessment result", - "description": "metabat2 bin checkm quality assessment result for gold:Gp0452679", - "data_object_type": "CheckM Statistics", - "url": "https://data.microbiomedata.org/data/nmdc:mga0fsjy84/MAGs/nmdc_mga0fsjy84_checkm_qa.out", - "md5_checksum": "d41d8cd98f00b204e9800998ecf8427e", - "id": "nmdc:d41d8cd98f00b204e9800998ecf8427e", - "file_size_bytes": 0 - }, - { - "name": "Gp0127636_high-quality and medium-quality bins", - "description": "high-quality and medium-quality bins for Gp0127636", - "data_object_type": "Metagenome Bins", - "url": "https://data.microbiomedata.org/data/nmdc:mga02tph39/MAGs/nmdc_mga02tph39_hqmq_bin.zip", - "md5_checksum": "2d1e318b8b815a8a5487f23315d0fe02", - "id": "nmdc:2d1e318b8b815a8a5487f23315d0fe02", - "file_size_bytes": 182 - }, - { - "_id": { - "$oid": "649b003d1ae706d7b5b14e7c" - }, - "description": "Assembled contigs fasta for gold:Gp0127636", - "url": "https://data.microbiomedata.org/data/1781_100338/assembly/assembly_contigs.fna", - "file_size_bytes": 38679584, - "type": "nmdc:DataObject", - "id": "nmdc:555541de209f6b5bc8b4e36f9c5a96c1", - "name": "assembly_contigs.fna", - "data_object_type": "Assembly Contigs" - }, - { - "_id": { - "$oid": "649b003d1ae706d7b5b14e7d" - }, - "description": "Assembled scaffold fasta for gold:Gp0127636", - "url": "https://data.microbiomedata.org/data/1781_100338/assembly/assembly_scaffolds.fna", - "file_size_bytes": 38392508, - "type": "nmdc:DataObject", - "id": "nmdc:28a962bc24ab2ba5b7d2e486c36cf6b5", - "name": "assembly_scaffolds.fna", - "data_object_type": "Assembly Scaffolds" - }, - { - "_id": { - "$oid": "649b003d1ae706d7b5b14e81" - }, - "description": "Assembled AGP file for gold:Gp0127636", - "url": "https://data.microbiomedata.org/data/1781_100338/assembly/assembly.agp", - "file_size_bytes": 6197503, - "type": "nmdc:DataObject", - "id": "nmdc:e67c00b23e1c74597d0c07f129d35890", - "name": "assembly.agp", - "data_object_type": "Assembly AGP" - }, - { - "_id": { - "$oid": "649b003d1ae706d7b5b14e82" - }, - "description": "Metagenome Contig Coverage Stats for gold:Gp0127636", - "url": "https://data.microbiomedata.org/data/1781_100338/assembly/mapping_stats.txt", - "file_size_bytes": 7113525, - "type": "nmdc:DataObject", - "id": "nmdc:fd1201530245f5e4ef4c5d263b34c0a3", - "name": "mapping_stats.txt", - "data_object_type": "Assembly Coverage Stats" - }, - { - "_id": { - "$oid": "649b003d1ae706d7b5b14e85" - }, - "description": "Metagenome Alignment BAM file for gold:Gp0127636", - "url": "https://data.microbiomedata.org/data/1781_100338/assembly/pairedMapped_sorted.bam", - "file_size_bytes": 2590059661, - "type": "nmdc:DataObject", - "id": "nmdc:937d790bc414e5aa80c09a419b25dfe1", - "name": "pairedMapped_sorted.bam", - "data_object_type": "Assembly Coverage BAM" - }, - { - "_id": { - "$oid": "649b003d1ae706d7b5b15b57" - }, - "id": "nmdc:ad763856d102ea442c511ce8c2ac7641", - "name": "1781_100338.krona.html", - "description": "Gold:Gp0127636 KRONA plot HTML file", - "url": "https://data.microbiomedata.org/1781_100338/ReadbasedAnalysis/centrifuge/1781_100338.krona.html", - "file_size_bytes": 3385, - "data_object_type": "Centrifuge Krona Plot", - "type": "nmdc:DataObject" - }, - { - "_id": { - "$oid": "649b003d1ae706d7b5b15b5f" - }, - "id": "nmdc:6ea65689d5fe9ac44c9e0e415304f4e3", - "name": "1781_100338.json", - "description": "Gold:Gp0127636 ReadbasedAnalysis result JSON file", - "url": "https://data.microbiomedata.org/1781_100338/ReadbasedAnalysis/1781_100338.json", - "file_size_bytes": 3385, - "type": "nmdc:DataObject" - }, - { - "_id": { - "$oid": "649b003f1ae706d7b5b165f8" - }, - "id": "nmdc:09f9ddea688b24a3ffa3f858851f6011", - "name": "bins.unbinned.fa", - "description": "unbinned fasta file from metabat2 for gold:Gp0127636", - "file_size_bytes": 3427718, - "url": "https://data.microbiomedata.org/data/1781_100338/img_MAGs/bins.unbinned.fa", - "type": "nmdc:DataObject" - }, - { - "_id": { - "$oid": "649b003f1ae706d7b5b16622" - }, - "id": "nmdc:7c210c1bd10b2654864e4d571336a8b8", - "name": "bins.tooShort.fa", - "description": "tooShort (< 3kb) filtered contigs fasta file by metaBat2 for gold:Gp0127636", - "file_size_bytes": 34402553, - "url": "https://data.microbiomedata.org/data/1781_100338/img_MAGs/bins.tooShort.fa", - "type": "nmdc:DataObject" - }, - { - "_id": { - "$oid": "649b00401ae706d7b5b16cff" - }, - "description": "EC TSV File for gold:Gp0127636", - "url": "https://data.microbiomedata.org/1781_100338/img_annotation/Ga0482234_ec.tsv", - "md5_checksum": "80ec7d76d2509e6eeab61d092808908b", - "file_size_bytes": 3385, - "id": "nmdc:80ec7d76d2509e6eeab61d092808908b", - "name": "gold:Gp0127636_EC TSV File", - "data_object_type": "Annotation Enzyme Commission", - "type": "nmdc:DataObject" - }, - { - "_id": { - "$oid": "649b00401ae706d7b5b16d00" - }, - "description": "KO TSV File for gold:Gp0127636", - "url": "https://data.microbiomedata.org/1781_100338/img_annotation/Ga0482234_ko.tsv", - "md5_checksum": "d68e6d4245c33a73666148570aac9c10", - "file_size_bytes": 3385, - "id": "nmdc:d68e6d4245c33a73666148570aac9c10", - "name": "gold:Gp0127636_KO TSV File", - "data_object_type": "Annotation KEGG Orthology", - "type": "nmdc:DataObject" - }, - { - "_id": { - "$oid": "649b00401ae706d7b5b16d01" - }, - "description": "Protein FAA for gold:Gp0127636", - "url": "https://data.microbiomedata.org/1781_100338/img_annotation/Ga0482234_proteins.faa", - "md5_checksum": "66d9d6751efad0b8019a565488f950a5", - "file_size_bytes": 3385, - "id": "nmdc:66d9d6751efad0b8019a565488f950a5", - "name": "gold:Gp0127636_Protein FAA", - "data_object_type": "Annotation Amino Acid FASTA", - "type": "nmdc:DataObject" - }, - { - "_id": { - "$oid": "649b00401ae706d7b5b16d03" - }, - "description": "Structural annotation GFF file for gold:Gp0127636", - "url": "https://data.microbiomedata.org/1781_100338/img_annotation/Ga0482234_structural_annotation.gff", - "md5_checksum": "e3b57dff7ca37c0da6b7d4bfb4450d9c", - "file_size_bytes": 3385, - "id": "nmdc:e3b57dff7ca37c0da6b7d4bfb4450d9c", - "name": "gold:Gp0127636_Structural annotation GFF file", - "data_object_type": "Structural Annotation GFF", - "type": "nmdc:DataObject" - }, - { - "_id": { - "$oid": "649b00401ae706d7b5b16d04" - }, - "description": "Functional annotation GFF file for gold:Gp0127636", - "url": "https://data.microbiomedata.org/1781_100338/img_annotation/Ga0482234_functional_annotation.gff", - "md5_checksum": "31f8346eeca4b929a6c28686bb8b2043", - "file_size_bytes": 3385, - "id": "nmdc:31f8346eeca4b929a6c28686bb8b2043", - "name": "gold:Gp0127636_Functional annotation GFF file", - "data_object_type": "Functional Annotation GFF", - "type": "nmdc:DataObject" - } - ], - "dissolving_activity_set": [], - "functional_annotation_set": [], - "genome_feature_set": [], - "mags_activity_set": [ - { - "_id": { - "$oid": "649b0052ec087f6bbab34709" - }, - "has_input": [ - "nmdc:36692b7b93756aaabd7f1f6259753c4e", - "nmdc:a24edc9ffd773c30cea8ea709988307a", - "nmdc:f61ed86592491b2d83b5893749e12406" - ], - "too_short_contig_num": 93687, - "part_of": [ - "nmdc:mga02tph39" - ], - "git_url": "https://github.com/microbiomedata/metaG/releases/tag/0.1", - "has_output": [ - "nmdc:d41d8cd98f00b204e9800998ecf8427e", - "nmdc:2d1e318b8b815a8a5487f23315d0fe02" - ], - "was_informed_by": "gold:Gp0127636", - "input_contig_num": 95606, - "id": "nmdc:3a0bb3c0ec1ec60e0487cb98f7f19a90", - "execution_resource": "NERSC-Cori", - "name": "MAGs Analysis Activity for nmdc:mga02tph39", - "mags_list": [], - "unbinned_contig_num": 1919, - "started_at_time": "2021-10-11T02:23:42Z", - "type": "nmdc:MAGsAnalysisActivity", - "ended_at_time": "2021-11-13T18:49:37+00:00" - } - ], - "material_sample_set": [], - "material_sampling_activity_set": [], - "metabolomics_analysis_activity_set": [], - "metagenome_annotation_activity_set": [ - { - "_id": { - "$oid": "649b005bbf2caae0415ef9ac" - }, - "has_input": [ - "nmdc:36692b7b93756aaabd7f1f6259753c4e" - ], - "part_of": [ - "nmdc:mga02tph39" - ], - "git_url": "https://github.com/microbiomedata/metaG/releases/tag/0.1", - "has_output": [ - "nmdc:a5d97f323fe7117cb38a2eea1f2246d2", - "nmdc:2b791fb3e2964d7808388b32086e0de2", - "nmdc:f61ed86592491b2d83b5893749e12406", - "nmdc:e983789bdc08364b00a000684062ed16", - "nmdc:3cd47d66b6e9006ff683a2eda168285f", - "nmdc:e056ee666e8001bdb6f790efb3394093", - "nmdc:2b90fcb7628c3ffa9e7a14a32612b7af", - "nmdc:4e2f1d4b2d20bfb0209a320a60c4aeac", - "nmdc:dd24a8b0f774555ac91e663416745428", - "nmdc:2e76b71475b854e2bf2d0aa15a53dd7d", - "nmdc:2f297176cd51b2ede33c313f713b40b1", - "nmdc:678a7af05a89d9d4f5f5d598dc2e3013" - ], - "was_informed_by": "gold:Gp0127636", - "id": "nmdc:3a0bb3c0ec1ec60e0487cb98f7f19a90", - "execution_resource": "NERSC-Cori", - "name": "Annotation Activity for nmdc:mga02tph39", - "started_at_time": "2021-10-11T02:23:42Z", - "type": "nmdc:MetagenomeAnnotationActivity", - "ended_at_time": "2021-11-13T18:49:37+00:00" - } - ], - "metagenome_assembly_set": [ - { - "_id": { - "$oid": "649b005f2ca5ee4adb139f91" - }, - "has_input": [ - "nmdc:e4f5675c728fd1896682eb669656b5d6" - ], - "part_of": [ - "nmdc:mga02tph39" - ], - "ctg_logsum": 36469, - "scaf_logsum": 36615, - "gap_pct": 0.00062, - "git_url": "https://github.com/microbiomedata/metaG/releases/tag/0.1", - "has_output": [ - "nmdc:36692b7b93756aaabd7f1f6259753c4e", - "nmdc:8d02adf1319d5b95c2abc6ed5b5c1683", - "nmdc:9830a711accd3a5ed899a2e616d0f4bf", - "nmdc:481fbd8cdeacd71e54a45c78d5decb36", - "nmdc:a24edc9ffd773c30cea8ea709988307a" - ], - "asm_score": 3.618, - "was_informed_by": "gold:Gp0127636", - "ctg_powsum": 3976.058, - "scaf_max": 23067, - "id": "nmdc:3a0bb3c0ec1ec60e0487cb98f7f19a90", - "scaf_powsum": 3993.143, - "execution_resource": "NERSC-Cori", - "contigs": 95606, - "name": "Assembly Activity for nmdc:mga02tph39", - "ctg_max": 23067, - "gc_std": 0.11099, - "gc_avg": 0.57474, - "contig_bp": 35573088, - "started_at_time": "2021-10-11T02:23:42Z", - "scaf_bp": 35573308, - "type": "nmdc:MetagenomeAssembly", - "scaffolds": 95584, - "ended_at_time": "2021-11-13T18:49:37+00:00", - "ctg_l50": 329, - "ctg_l90": 282, - "ctg_n50": 35238, - "ctg_n90": 83377, - "scaf_l50": 329, - "scaf_l90": 282, - "scaf_n50": 35220, - "scaf_n90": 83355 - } - ], - "metagenome_sequencing_activity_set": [], - "metaproteomics_analysis_activity_set": [], - "metatranscriptome_activity_set": [], - "nom_analysis_activity_set": [], - "omics_processing_set": [ - { - "_id": { - "$oid": "649b009773e8249959349b45" - }, - "id": "nmdc:omprc-11-k675bw84", - "name": "Riverbed sediment microbial communities from areas with no vegetation in Columbia River, Washington, USA - GW-RW N2_30_40", - "description": "Riverbed sediment samples were collected from an area with no vegetation in a hyporheic zone (subsurface groundwater - surface water mixing zone)", - "has_input": [ - "nmdc:bsm-11-rtf54942" - ], - "has_output": [ - "jgi:574fe09f7ded5e3df1ee1489" - ], - "part_of": [ - "nmdc:sty-11-aygzgv51" - ], - "add_date": "2016-01-11", - "mod_date": "2021-06-15", - "ncbi_project_name": "Riverbed sediment microbial communities from areas with no vegetation in Columbia River, Washington, USA - GW-RW N2_30_40", - "omics_type": { - "has_raw_value": "Metagenome" - }, - "principal_investigator": { - "has_raw_value": "James Stegen" - }, - "processing_institution": "JGI", - "type": "nmdc:OmicsProcessing", - "gold_sequencing_project_identifiers": [ - "gold:Gp0127636" - ] - } - ], - "reaction_activity_set": [], - "read_qc_analysis_activity_set": [ - { - "_id": { - "$oid": "649b009d6bdd4fd20273c864" - }, - "has_input": [ - "nmdc:341830a5735c34968da2304bc27edd2a" - ], - "part_of": [ - "nmdc:mga02tph39" - ], - "git_url": "https://github.com/microbiomedata/metaG/releases/tag/0.1", - "has_output": [ - "nmdc:e4f5675c728fd1896682eb669656b5d6", - "nmdc:64f455185b1bc610a8d74a84ed12683f" - ], - "was_informed_by": "gold:Gp0127636", - "input_read_count": 31642056, - "output_read_bases": 4354491393, - "id": "nmdc:3a0bb3c0ec1ec60e0487cb98f7f19a90", - "execution_resource": "NERSC-Cori", - "input_read_bases": 4777950456, - "name": "Read QC Activity for nmdc:mga02tph39", - "output_read_count": 29115818, - "started_at_time": "2021-10-11T02:23:42Z", - "type": "nmdc:ReadQCAnalysisActivity", - "ended_at_time": "2021-11-13T18:49:37+00:00" - } - ], - "read_based_taxonomy_analysis_activity_set": [ - { - "_id": { - "$oid": "649b009bff710ae353f8cf26" - }, - "has_input": [ - "nmdc:e4f5675c728fd1896682eb669656b5d6" - ], - "git_url": "https://github.com/microbiomedata/metaG/releases/tag/0.1", - "has_output": [ - "nmdc:50d80a30d4ff113e36f6fd64b1f28547", - "nmdc:c2cd20a2011592a76397f49dc3acd6b7", - "nmdc:827ad863c875ea14473c9903d192fa73", - "nmdc:957074ca49765b22348e27b0133d8ba0", - "nmdc:9253645582296696cb33b11754832574", - "nmdc:9aef1d9e04acfe0b7fb1b9dc3b842912", - "nmdc:75180fce38f38a6307231b47a8d2b23b", - "nmdc:b4524a34937893768dbd3752068dee0c", - "nmdc:f1543441c59aaaf8ec52036a5bbbe3f4" - ], - "was_informed_by": "gold:Gp0127636", - "id": "nmdc:3a0bb3c0ec1ec60e0487cb98f7f19a90", - "execution_resource": "NERSC-Cori", - "name": "ReadBased Analysis Activity for nmdc:mga02tph39", - "started_at_time": "2021-10-11T02:23:42Z", - "type": "nmdc:ReadbasedAnalysis", - "ended_at_time": "2021-11-13T18:49:37+00:00" - } - ], - "study_set": [], - "field_research_site_set": [], - "collecting_biosamples_from_site_set": [], - "date_created": null, - "etl_software_version": null, - "pooling_set": [] - }, - { - "functional_annotation_agg": [], - "library_preparation_set": [], - "processed_sample_set": [], - "extraction_set": [], - "activity_set": [], - "biosample_set": [], - "data_object_set": [ - { - "description": "Raw sequencer read data", - "file_size_bytes": 2620687542, - "type": "nmdc:DataObject", - "id": "jgi:574fe09c7ded5e3df1ee1487", - "name": "10533.3.165334.ACGGAAC-TGTTCCG.fastq.gz" - }, - { - "name": "Gp0127634_Filtered Reads", - "description": "Filtered Reads for Gp0127634", - "data_object_type": "Filtered Sequencing Reads", - "url": "https://data.microbiomedata.org/data/nmdc:mga0r0vf18/qa/nmdc_mga0r0vf18_filtered.fastq.gz", - "md5_checksum": "ac889627d813c8e34cfbf79a4264c590", - "id": "nmdc:ac889627d813c8e34cfbf79a4264c590", - "file_size_bytes": 2316462404 - }, - { - "name": "Gp0127634_Filtered Stats", - "description": "Filtered Stats for Gp0127634", - "data_object_type": "QC Statistics", - "url": "https://data.microbiomedata.org/data/nmdc:mga0r0vf18/qa/nmdc_mga0r0vf18_filterStats.txt", - "md5_checksum": "0dfd55be1779ae7922d80aa22034c9a1", - "id": "nmdc:0dfd55be1779ae7922d80aa22034c9a1", - "file_size_bytes": 291 - }, - { - "name": "Gp0127634_Gottcha2 TSV report", - "description": "Gottcha2 TSV report for Gp0127634", - "url": "https://data.microbiomedata.org/data/nmdc:mga0r0vf18/ReadbasedAnalysis/nmdc_mga0r0vf18_gottcha2_report.tsv", - "md5_checksum": "0526ea84f6e7893f5b6d62a32f81a199", - "id": "nmdc:0526ea84f6e7893f5b6d62a32f81a199", - "file_size_bytes": 4224 - }, - { - "name": "Gp0127634_Gottcha2 full TSV report", - "description": "Gottcha2 full TSV report for Gp0127634", - "url": "https://data.microbiomedata.org/data/nmdc:mga0r0vf18/ReadbasedAnalysis/nmdc_mga0r0vf18_gottcha2_report_full.tsv", - "md5_checksum": "1a7380f5adb59f36c98c840bf28ad4bd", - "id": "nmdc:1a7380f5adb59f36c98c840bf28ad4bd", - "file_size_bytes": 875501 - }, - { - "name": "Gp0127634_Gottcha2 Krona HTML report", - "description": "Gottcha2 Krona HTML report for Gp0127634", - "data_object_type": "GOTTCHA2 Krona Plot", - "url": "https://data.microbiomedata.org/data/nmdc:mga0r0vf18/ReadbasedAnalysis/nmdc_mga0r0vf18_gottcha2_krona.html", - "md5_checksum": "366ab38bb6de9591f31a086d42ac23d6", - "id": "nmdc:366ab38bb6de9591f31a086d42ac23d6", - "file_size_bytes": 238755 - }, - { - "name": "Gp0127634_Centrifuge classification TSV report", - "description": "Centrifuge classification TSV report for Gp0127634", - "data_object_type": "Centrifuge Taxonomic Classification", - "url": "https://data.microbiomedata.org/data/nmdc:mga0r0vf18/ReadbasedAnalysis/nmdc_mga0r0vf18_centrifuge_classification.tsv", - "md5_checksum": "c44ba44bc6910c2f3ed3a60a52b4a616", - "id": "nmdc:c44ba44bc6910c2f3ed3a60a52b4a616", - "file_size_bytes": 2051793471 - }, - { - "name": "Gp0127634_Centrifuge TSV report", - "description": "Centrifuge TSV report for Gp0127634", - "data_object_type": "Centrifuge Classification Report", - "url": "https://data.microbiomedata.org/data/nmdc:mga0r0vf18/ReadbasedAnalysis/nmdc_mga0r0vf18_centrifuge_report.tsv", - "md5_checksum": "0ca043b630ba304cb80603e8332c78cf", - "id": "nmdc:0ca043b630ba304cb80603e8332c78cf", - "file_size_bytes": 256560 - }, - { - "name": "Gp0127634_Centrifuge Krona HTML report", - "description": "Centrifuge Krona HTML report for Gp0127634", - "data_object_type": "Centrifuge Krona Plot", - "url": "https://data.microbiomedata.org/data/nmdc:mga0r0vf18/ReadbasedAnalysis/nmdc_mga0r0vf18_centrifuge_krona.html", - "md5_checksum": "059ff39ced52c0df45a331c4e9e10fdd", - "id": "nmdc:059ff39ced52c0df45a331c4e9e10fdd", - "file_size_bytes": 2334325 - }, - { - "name": "Gp0127634_Kraken classification TSV report", - "description": "Kraken classification TSV report for Gp0127634", - "data_object_type": "Kraken2 Taxonomic Classification", - "url": "https://data.microbiomedata.org/data/nmdc:mga0r0vf18/ReadbasedAnalysis/nmdc_mga0r0vf18_kraken2_classification.tsv", - "md5_checksum": "7bfa3b5b29ec5cf9882251585d99f9bf", - "id": "nmdc:7bfa3b5b29ec5cf9882251585d99f9bf", - "file_size_bytes": 1649071235 - }, - { - "name": "Gp0127634_Kraken2 TSV report", - "description": "Kraken2 TSV report for Gp0127634", - "data_object_type": "Kraken2 Classification Report", - "url": "https://data.microbiomedata.org/data/nmdc:mga0r0vf18/ReadbasedAnalysis/nmdc_mga0r0vf18_kraken2_report.tsv", - "md5_checksum": "2fceef0aaf1c3c3e3edd8d69bb72c8d3", - "id": "nmdc:2fceef0aaf1c3c3e3edd8d69bb72c8d3", - "file_size_bytes": 654782 - }, - { - "name": "Gp0127634_Kraken2 Krona HTML report", - "description": "Kraken2 Krona HTML report for Gp0127634", - "data_object_type": "Kraken2 Krona Plot", - "url": "https://data.microbiomedata.org/data/nmdc:mga0r0vf18/ReadbasedAnalysis/nmdc_mga0r0vf18_kraken2_krona.html", - "md5_checksum": "678e7c401a6971629f7d3ada83b307ab", - "id": "nmdc:678e7c401a6971629f7d3ada83b307ab", - "file_size_bytes": 3988988 - }, - { - "name": "Gp0127634_Assembled contigs fasta", - "description": "Assembled contigs fasta for Gp0127634", - "data_object_type": "Assembly Contigs", - "url": "https://data.microbiomedata.org/data/nmdc:mga0r0vf18/assembly/nmdc_mga0r0vf18_contigs.fna", - "md5_checksum": "2a30cf44cc596923301befc34edf6c0a", - "id": "nmdc:2a30cf44cc596923301befc34edf6c0a", - "file_size_bytes": 84939887 - }, - { - "name": "Gp0127634_Assembled scaffold fasta", - "description": "Assembled scaffold fasta for Gp0127634", - "data_object_type": "Assembly Scaffolds", - "url": "https://data.microbiomedata.org/data/nmdc:mga0r0vf18/assembly/nmdc_mga0r0vf18_scaffolds.fna", - "md5_checksum": "f147264a5a4a7eec4d68f05ab52ecc1d", - "id": "nmdc:f147264a5a4a7eec4d68f05ab52ecc1d", - "file_size_bytes": 84411544 - }, - { - "name": "Gp0127634_Metagenome Contig Coverage Stats", - "description": "Metagenome Contig Coverage Stats for Gp0127634", - "url": "https://data.microbiomedata.org/data/nmdc:mga0r0vf18/assembly/nmdc_mga0r0vf18_covstats.txt", - "md5_checksum": "9bd1b25df71c0a6f9ca408ddc045ffed", - "id": "nmdc:9bd1b25df71c0a6f9ca408ddc045ffed", - "file_size_bytes": 13895509 - }, - { - "name": "Gp0127634_Assembled AGP file", - "description": "Assembled AGP file for Gp0127634", - "data_object_type": "Assembly AGP", - "url": "https://data.microbiomedata.org/data/nmdc:mga0r0vf18/assembly/nmdc_mga0r0vf18_assembly.agp", - "md5_checksum": "825969095ff134b195b06a40fcc6089a", - "id": "nmdc:825969095ff134b195b06a40fcc6089a", - "file_size_bytes": 12985962 - }, - { - "name": "Gp0127634_Metagenome Alignment BAM file", - "description": "Metagenome Alignment BAM file for Gp0127634", - "data_object_type": "Assembly Coverage BAM", - "url": "https://data.microbiomedata.org/data/nmdc:mga0r0vf18/assembly/nmdc_mga0r0vf18_pairedMapped_sorted.bam", - "md5_checksum": "356d9ca409747590849dd894998166ee", - "id": "nmdc:356d9ca409747590849dd894998166ee", - "file_size_bytes": 2516463401 - }, - { - "name": "Gp0127634_Protein FAA", - "description": "Protein FAA for Gp0127634", - "data_object_type": "Annotation Amino Acid FASTA", - "url": "https://data.microbiomedata.org/data/nmdc:mga0r0vf18/annotation/nmdc_mga0r0vf18_proteins.faa", - "md5_checksum": "ca16203099dc1d6bbce00320bb753974", - "id": "nmdc:ca16203099dc1d6bbce00320bb753974", - "file_size_bytes": 49630516 - }, - { - "name": "Gp0127634_Structural annotation GFF file", - "description": "Structural annotation GFF file for Gp0127634", - "data_object_type": "Structural Annotation GFF", - "url": "https://data.microbiomedata.org/data/nmdc:mga0r0vf18/annotation/nmdc_mga0r0vf18_structural_annotation.gff", - "md5_checksum": "fffbb7b52a4886755df429e22a152427", - "id": "nmdc:fffbb7b52a4886755df429e22a152427", - "file_size_bytes": 2519 - }, - { - "name": "Gp0127634_Functional annotation GFF file", - "description": "Functional annotation GFF file for Gp0127634", - "data_object_type": "Functional Annotation GFF", - "url": "https://data.microbiomedata.org/data/nmdc:mga0r0vf18/annotation/nmdc_mga0r0vf18_functional_annotation.gff", - "md5_checksum": "f63b43e7797845fa94dc6f552ba1ea39", - "id": "nmdc:f63b43e7797845fa94dc6f552ba1ea39", - "file_size_bytes": 57589694 - }, - { - "name": "Gp0127634_KO TSV file", - "description": "KO TSV file for Gp0127634", - "data_object_type": "Annotation KEGG Orthology", - "url": "https://data.microbiomedata.org/data/nmdc:mga0r0vf18/annotation/nmdc_mga0r0vf18_ko.tsv", - "md5_checksum": "8ab8f39bfc76267daa4ce5a34811bff1", - "id": "nmdc:8ab8f39bfc76267daa4ce5a34811bff1", - "file_size_bytes": 6602379 - }, - { - "name": "Gp0127634_EC TSV file", - "description": "EC TSV file for Gp0127634", - "data_object_type": "Annotation Enzyme Commission", - "url": "https://data.microbiomedata.org/data/nmdc:mga0r0vf18/annotation/nmdc_mga0r0vf18_ec.tsv", - "md5_checksum": "d6ff8f2f0d5c77495b2b43a7020e5730", - "id": "nmdc:d6ff8f2f0d5c77495b2b43a7020e5730", - "file_size_bytes": 4399755 - }, - { - "name": "Gp0127634_COG GFF file", - "description": "COG GFF file for Gp0127634", - "url": "https://data.microbiomedata.org/data/nmdc:mga0r0vf18/annotation/nmdc_mga0r0vf18_cog.gff", - "md5_checksum": "763d16c5dbadbeba61ceee91ed5209f3", - "id": "nmdc:763d16c5dbadbeba61ceee91ed5209f3", - "file_size_bytes": 33737036 - }, - { - "name": "Gp0127634_PFAM GFF file", - "description": "PFAM GFF file for Gp0127634", - "url": "https://data.microbiomedata.org/data/nmdc:mga0r0vf18/annotation/nmdc_mga0r0vf18_pfam.gff", - "md5_checksum": "52cba722f402eea06fda75ec1e5a5103", - "id": "nmdc:52cba722f402eea06fda75ec1e5a5103", - "file_size_bytes": 24757263 - }, - { - "name": "Gp0127634_TigrFam GFF file", - "description": "TigrFam GFF file for Gp0127634", - "url": "https://data.microbiomedata.org/data/nmdc:mga0r0vf18/annotation/nmdc_mga0r0vf18_tigrfam.gff", - "md5_checksum": "ad358ce4b479febc34a2acdd9f249517", - "id": "nmdc:ad358ce4b479febc34a2acdd9f249517", - "file_size_bytes": 2661782 - }, - { - "name": "Gp0127634_SMART GFF file", - "description": "SMART GFF file for Gp0127634", - "url": "https://data.microbiomedata.org/data/nmdc:mga0r0vf18/annotation/nmdc_mga0r0vf18_smart.gff", - "md5_checksum": "10a0ca82cf662ac4d9b465f05ed1fb2b", - "id": "nmdc:10a0ca82cf662ac4d9b465f05ed1fb2b", - "file_size_bytes": 7506881 - }, - { - "name": "Gp0127634_SuperFam GFF file", - "description": "SuperFam GFF file for Gp0127634", - "url": "https://data.microbiomedata.org/data/nmdc:mga0r0vf18/annotation/nmdc_mga0r0vf18_supfam.gff", - "md5_checksum": "d0e8459e010015e726c31f0f8c18d359", - "id": "nmdc:d0e8459e010015e726c31f0f8c18d359", - "file_size_bytes": 42013513 - }, - { - "name": "Gp0127634_Cath FunFam GFF file", - "description": "Cath FunFam GFF file for Gp0127634", - "url": "https://data.microbiomedata.org/data/nmdc:mga0r0vf18/annotation/nmdc_mga0r0vf18_cath_funfam.gff", - "md5_checksum": "41d7ca149efb4c12bce48e5a19649a84", - "id": "nmdc:41d7ca149efb4c12bce48e5a19649a84", - "file_size_bytes": 31747110 - }, - { - "name": "Gp0127634_KO_EC GFF file", - "description": "KO_EC GFF file for Gp0127634", - "url": "https://data.microbiomedata.org/data/nmdc:mga0r0vf18/annotation/nmdc_mga0r0vf18_ko_ec.gff", - "md5_checksum": "9da1883e60979e17665b0211198c35f0", - "id": "nmdc:9da1883e60979e17665b0211198c35f0", - "file_size_bytes": 20999001 - }, - { - "name": "gold:Gp0452679_metabat2 bin checkm quality assessment result", - "description": "metabat2 bin checkm quality assessment result for gold:Gp0452679", - "data_object_type": "CheckM Statistics", - "url": "https://data.microbiomedata.org/data/nmdc:mga0fsjy84/MAGs/nmdc_mga0fsjy84_checkm_qa.out", - "md5_checksum": "d41d8cd98f00b204e9800998ecf8427e", - "id": "nmdc:d41d8cd98f00b204e9800998ecf8427e", - "file_size_bytes": 0 - }, - { - "name": "Gp0127634_tooShort (< 3kb) filtered contigs fasta file by metaBat2", - "description": "tooShort (< 3kb) filtered contigs fasta file by metaBat2 for Gp0127634", - "url": "https://data.microbiomedata.org/data/nmdc:mga0r0vf18/MAGs/nmdc_mga0r0vf18_bins.tooShort.fa", - "md5_checksum": "3c8eadbcf4f583090d8f378ea6758799", - "id": "nmdc:3c8eadbcf4f583090d8f378ea6758799", - "file_size_bytes": 71683990 - }, - { - "name": "Gp0127634_unbinned fasta file from metabat2", - "description": "unbinned fasta file from metabat2 for Gp0127634", - "url": "https://data.microbiomedata.org/data/nmdc:mga0r0vf18/MAGs/nmdc_mga0r0vf18_bins.unbinned.fa", - "md5_checksum": "1be647dc835ee8fe666fe9893266bd21", - "id": "nmdc:1be647dc835ee8fe666fe9893266bd21", - "file_size_bytes": 11353478 - }, - { - "name": "Gp0127634_metabat2 bin checkm quality assessment result", - "description": "metabat2 bin checkm quality assessment result for Gp0127634", - "data_object_type": "CheckM Statistics", - "url": "https://data.microbiomedata.org/data/nmdc:mga0r0vf18/MAGs/nmdc_mga0r0vf18_checkm_qa.out", - "md5_checksum": "6cc278c455cafc691333c0a74fe6c540", - "id": "nmdc:6cc278c455cafc691333c0a74fe6c540", - "file_size_bytes": 936 - }, - { - "name": "Gp0127634_high-quality and medium-quality bins", - "description": "high-quality and medium-quality bins for Gp0127634", - "data_object_type": "Metagenome Bins", - "url": "https://data.microbiomedata.org/data/nmdc:mga0r0vf18/MAGs/nmdc_mga0r0vf18_hqmq_bin.zip", - "md5_checksum": "de4d0180489bdaa5526977508a489b99", - "id": "nmdc:de4d0180489bdaa5526977508a489b99", - "file_size_bytes": 518340 - }, - { - "name": "Gp0127634_metabat2 bins", - "description": "metabat2 bins for Gp0127634", - "url": "https://data.microbiomedata.org/data/nmdc:mga0r0vf18/MAGs/nmdc_mga0r0vf18_metabat_bin.zip", - "md5_checksum": "16a08c4a3a6e9c70a5d47209177d0e60", - "id": "nmdc:16a08c4a3a6e9c70a5d47209177d0e60", - "file_size_bytes": 63768 - }, - { - "_id": { - "$oid": "649b003d1ae706d7b5b14e74" - }, - "description": "Assembled AGP file for gold:Gp0127634", - "url": "https://data.microbiomedata.org/data/1781_100336/assembly/assembly.agp", - "file_size_bytes": 11578650, - "type": "nmdc:DataObject", - "id": "nmdc:38fc6fb4189df6c3ba567ce6e9eb0492", - "name": "assembly.agp", - "data_object_type": "Assembly AGP" - }, - { - "_id": { - "$oid": "649b003d1ae706d7b5b14e75" - }, - "description": "Assembled scaffold fasta for gold:Gp0127634", - "url": "https://data.microbiomedata.org/data/1781_100336/assembly/assembly_scaffolds.fna", - "file_size_bytes": 83708608, - "type": "nmdc:DataObject", - "id": "nmdc:3c3519053f5bf24ac5faa2db3b9c258b", - "name": "assembly_scaffolds.fna", - "data_object_type": "Assembly Scaffolds" - }, - { - "_id": { - "$oid": "649b003d1ae706d7b5b14e76" - }, - "description": "Metagenome Alignment BAM file for gold:Gp0127634", - "url": "https://data.microbiomedata.org/data/1781_100336/assembly/pairedMapped_sorted.bam", - "file_size_bytes": 2483793009, - "type": "nmdc:DataObject", - "id": "nmdc:ac6c2b405bcedaa830f122f55b389245", - "name": "pairedMapped_sorted.bam", - "data_object_type": "Assembly Coverage BAM" - }, - { - "_id": { - "$oid": "649b003d1ae706d7b5b14e77" - }, - "description": "Metagenome Contig Coverage Stats for gold:Gp0127634", - "url": "https://data.microbiomedata.org/data/1781_100336/assembly/mapping_stats.txt", - "file_size_bytes": 13192221, - "type": "nmdc:DataObject", - "id": "nmdc:6783fd1b5292dc59fa04a4f20725b721", - "name": "mapping_stats.txt", - "data_object_type": "Assembly Coverage Stats" - }, - { - "_id": { - "$oid": "649b003d1ae706d7b5b14e78" - }, - "description": "Assembled contigs fasta for gold:Gp0127634", - "url": "https://data.microbiomedata.org/data/1781_100336/assembly/assembly_contigs.fna", - "file_size_bytes": 84236599, - "type": "nmdc:DataObject", - "id": "nmdc:b502e282cb52690232ce6ec6e1cfd4bc", - "name": "assembly_contigs.fna", - "data_object_type": "Assembly Contigs" - }, - { - "_id": { - "$oid": "649b003d1ae706d7b5b15b40" - }, - "id": "nmdc:613dbb558d3cf5ece974268c1b0b1243", - "name": "1781_100336.krona.html", - "description": "Gold:Gp0127634 KRONA plot HTML file", - "url": "https://data.microbiomedata.org/1781_100336/ReadbasedAnalysis/centrifuge/1781_100336.krona.html", - "file_size_bytes": 3385, - "data_object_type": "Centrifuge Krona Plot", - "type": "nmdc:DataObject" - }, - { - "_id": { - "$oid": "649b003d1ae706d7b5b15b4b" - }, - "id": "nmdc:009de2a4d412df442a83e43028aed210", - "name": "1781_100336.json", - "description": "Gold:Gp0127634 ReadbasedAnalysis result JSON file", - "url": "https://data.microbiomedata.org/1781_100336/ReadbasedAnalysis/1781_100336.json", - "file_size_bytes": 3385, - "type": "nmdc:DataObject" - }, - { - "_id": { - "$oid": "649b003f1ae706d7b5b165e5" - }, - "id": "nmdc:941f2a63752dd68925387a6dde7bd88a", - "name": "bins.tooShort.fa", - "description": "tooShort (< 3kb) filtered contigs fasta file by metaBat2 for gold:Gp0127634", - "file_size_bytes": 69587941, - "url": "https://data.microbiomedata.org/data/1781_100336/img_MAGs/bins.tooShort.fa", - "type": "nmdc:DataObject" - }, - { - "_id": { - "$oid": "649b003f1ae706d7b5b165e9" - }, - "id": "nmdc:77e41ad1f4836947b39a43f7ea971076", - "name": "bins.unbinned.fa", - "description": "unbinned fasta file from metabat2 for gold:Gp0127634", - "file_size_bytes": 12216952, - "url": "https://data.microbiomedata.org/data/1781_100336/img_MAGs/bins.unbinned.fa", - "type": "nmdc:DataObject" - }, - { - "_id": { - "$oid": "649b003f1ae706d7b5b165ec" - }, - "id": "nmdc:aba84545a4941088387ffe076be49a4d", - "name": "gold:Gp0127634.bins.3.fa", - "description": "metabat2 binned contig file for gold:Gp0127634", - "file_size_bytes": 367898, - "url": "https://data.microbiomedata.org/data/1781_100336/img_MAGs/metabat-bins/bins.3.fa", - "type": "nmdc:DataObject", - "data_object_type": "Metagenome Bins" - }, - { - "_id": { - "$oid": "649b003f1ae706d7b5b165ed" - }, - "id": "nmdc:3713d982c02d72fba230a0d408598a1f", - "name": "gold:Gp0127634.bins.1.fa", - "description": "metabat2 binned contig file for gold:Gp0127634", - "file_size_bytes": 272314, - "url": "https://data.microbiomedata.org/data/1781_100336/img_MAGs/metabat-bins/bins.1.fa", - "type": "nmdc:DataObject", - "data_object_type": "Metagenome Bins" - }, - { - "_id": { - "$oid": "649b003f1ae706d7b5b165ef" - }, - "id": "nmdc:29c82fc14dfa2a0f5dc057d234ac6c5a", - "name": "checkm_qa.out", - "description": "metabat2 bin checkm quality assessment result for gold:Gp0127634", - "file_size_bytes": 1071, - "url": "https://data.microbiomedata.org/data/1781_100336/img_MAGs/checkm_qa.out", - "type": "nmdc:DataObject", - "data_object_type": "CheckM Statistics" - }, - { - "_id": { - "$oid": "649b003f1ae706d7b5b165f0" - }, - "id": "nmdc:0e28d59da33c112a00b6793a19b71189", - "name": "gold:Gp0127634.bins.2.fa", - "description": "metabat2 binned contig file for gold:Gp0127634", - "file_size_bytes": 320360, - "url": "https://data.microbiomedata.org/data/1781_100336/img_MAGs/metabat-bins/bins.2.fa", - "type": "nmdc:DataObject", - "data_object_type": "Metagenome Bins" - }, - { - "_id": { - "$oid": "649b00401ae706d7b5b16cef" - }, - "description": "Structural annotation GFF file for gold:Gp0127634", - "url": "https://data.microbiomedata.org/1781_100336/img_annotation/Ga0482236_structural_annotation.gff", - "md5_checksum": "1e286398d6b164538bbdefb9cc8a41e9", - "file_size_bytes": 3385, - "id": "nmdc:1e286398d6b164538bbdefb9cc8a41e9", - "name": "gold:Gp0127634_Structural annotation GFF file", - "data_object_type": "Structural Annotation GFF", - "type": "nmdc:DataObject" - }, - { - "_id": { - "$oid": "649b00401ae706d7b5b16cf1" - }, - "description": "EC TSV File for gold:Gp0127634", - "url": "https://data.microbiomedata.org/1781_100336/img_annotation/Ga0482236_ec.tsv", - "md5_checksum": "01b078b5b9dde5699e9b9ab02af272df", - "file_size_bytes": 3385, - "id": "nmdc:01b078b5b9dde5699e9b9ab02af272df", - "name": "gold:Gp0127634_EC TSV File", - "data_object_type": "Annotation Enzyme Commission", - "type": "nmdc:DataObject" - }, - { - "_id": { - "$oid": "649b00401ae706d7b5b16cf4" - }, - "description": "Protein FAA for gold:Gp0127634", - "url": "https://data.microbiomedata.org/1781_100336/img_annotation/Ga0482236_proteins.faa", - "md5_checksum": "3374b8708ae6b77b16cd01ce4f33ee72", - "file_size_bytes": 3385, - "id": "nmdc:3374b8708ae6b77b16cd01ce4f33ee72", - "name": "gold:Gp0127634_Protein FAA", - "data_object_type": "Annotation Amino Acid FASTA", - "type": "nmdc:DataObject" - }, - { - "_id": { - "$oid": "649b00401ae706d7b5b16cf9" - }, - "description": "KO TSV File for gold:Gp0127634", - "url": "https://data.microbiomedata.org/1781_100336/img_annotation/Ga0482236_ko.tsv", - "md5_checksum": "5b2ff10d97d2b516716a67dafb137937", - "file_size_bytes": 3385, - "id": "nmdc:5b2ff10d97d2b516716a67dafb137937", - "name": "gold:Gp0127634_KO TSV File", - "data_object_type": "Annotation KEGG Orthology", - "type": "nmdc:DataObject" - }, - { - "_id": { - "$oid": "649b00401ae706d7b5b16cfb" - }, - "description": "Functional annotation GFF file for gold:Gp0127634", - "url": "https://data.microbiomedata.org/1781_100336/img_annotation/Ga0482236_functional_annotation.gff", - "md5_checksum": "803451414e1935d4de9f9911963efe8d", - "file_size_bytes": 3385, - "id": "nmdc:803451414e1935d4de9f9911963efe8d", - "name": "gold:Gp0127634_Functional annotation GFF file", - "data_object_type": "Functional Annotation GFF", - "type": "nmdc:DataObject" - } - ], - "dissolving_activity_set": [], - "functional_annotation_set": [], - "genome_feature_set": [], - "mags_activity_set": [ - { - "_id": { - "$oid": "649b0052ec087f6bbab34713" - }, - "has_input": [ - "nmdc:2a30cf44cc596923301befc34edf6c0a", - "nmdc:356d9ca409747590849dd894998166ee", - "nmdc:f63b43e7797845fa94dc6f552ba1ea39" - ], - "too_short_contig_num": 168596, - "part_of": [ - "nmdc:mga0r0vf18" - ], - "binned_contig_num": 278, - "git_url": "https://github.com/microbiomedata/metaG/releases/tag/0.1", - "has_output": [ - "nmdc:d41d8cd98f00b204e9800998ecf8427e", - "nmdc:3c8eadbcf4f583090d8f378ea6758799", - "nmdc:1be647dc835ee8fe666fe9893266bd21", - "nmdc:6cc278c455cafc691333c0a74fe6c540", - "nmdc:de4d0180489bdaa5526977508a489b99", - "nmdc:16a08c4a3a6e9c70a5d47209177d0e60" - ], - "was_informed_by": "gold:Gp0127634", - "input_contig_num": 175822, - "id": "nmdc:d6415e0c3e4f9b8702c1ae98c6fb2f48", - "execution_resource": "NERSC-Cori", - "name": "MAGs Analysis Activity for nmdc:mga0r0vf18", - "mags_list": [ - { - "number_of_contig": 235, - "completeness": 68.28, - "bin_name": "bins.1", - "gene_count": 2056, - "bin_quality": "MQ", - "gtdbtk_species": "", - "gtdbtk_order": "Nitrososphaerales", - "num_16s": 0, - "gtdbtk_family": "Nitrososphaeraceae", - "gtdbtk_domain": "Archaea", - "contamination": 2.91, - "gtdbtk_class": "Nitrososphaeria", - "gtdbtk_phylum": "Crenarchaeota", - "num_5s": 1, - "num_23s": 0, - "gtdbtk_genus": "", - "num_t_rna": 34 - }, - { - "number_of_contig": 43, - "completeness": 10.69, - "bin_name": "bins.2", - "gene_count": 247, - "bin_quality": "LQ", - "gtdbtk_species": "", - "gtdbtk_order": "", - "num_16s": 0, - "gtdbtk_family": "", - "gtdbtk_domain": "", - "contamination": 0.0, - "gtdbtk_class": "", - "gtdbtk_phylum": "", - "num_5s": 0, - "num_23s": 0, - "gtdbtk_genus": "", - "num_t_rna": 4 - } - ], - "unbinned_contig_num": 6948, - "started_at_time": "2021-10-11T02:23:30Z", - "type": "nmdc:MAGsAnalysisActivity", - "ended_at_time": "2021-10-11T04:49:55+00:00" - } - ], - "material_sample_set": [], - "material_sampling_activity_set": [], - "metabolomics_analysis_activity_set": [], - "metagenome_annotation_activity_set": [ - { - "_id": { - "$oid": "649b005bbf2caae0415ef9b3" - }, - "has_input": [ - "nmdc:2a30cf44cc596923301befc34edf6c0a" - ], - "part_of": [ - "nmdc:mga0r0vf18" - ], - "git_url": "https://github.com/microbiomedata/metaG/releases/tag/0.1", - "has_output": [ - "nmdc:ca16203099dc1d6bbce00320bb753974", - "nmdc:fffbb7b52a4886755df429e22a152427", - "nmdc:f63b43e7797845fa94dc6f552ba1ea39", - "nmdc:8ab8f39bfc76267daa4ce5a34811bff1", - "nmdc:d6ff8f2f0d5c77495b2b43a7020e5730", - "nmdc:763d16c5dbadbeba61ceee91ed5209f3", - "nmdc:52cba722f402eea06fda75ec1e5a5103", - "nmdc:ad358ce4b479febc34a2acdd9f249517", - "nmdc:10a0ca82cf662ac4d9b465f05ed1fb2b", - "nmdc:d0e8459e010015e726c31f0f8c18d359", - "nmdc:41d7ca149efb4c12bce48e5a19649a84", - "nmdc:9da1883e60979e17665b0211198c35f0" - ], - "was_informed_by": "gold:Gp0127634", - "id": "nmdc:d6415e0c3e4f9b8702c1ae98c6fb2f48", - "execution_resource": "NERSC-Cori", - "name": "Annotation Activity for nmdc:mga0r0vf18", - "started_at_time": "2021-10-11T02:23:30Z", - "type": "nmdc:MetagenomeAnnotationActivity", - "ended_at_time": "2021-10-11T04:49:55+00:00" - } - ], - "metagenome_assembly_set": [ - { - "_id": { - "$oid": "649b005f2ca5ee4adb139f9c" - }, - "has_input": [ - "nmdc:ac889627d813c8e34cfbf79a4264c590" - ], - "part_of": [ - "nmdc:mga0r0vf18" - ], - "ctg_logsum": 142091, - "scaf_logsum": 142614, - "gap_pct": 0.00138, - "git_url": "https://github.com/microbiomedata/metaG/releases/tag/0.1", - "has_output": [ - "nmdc:2a30cf44cc596923301befc34edf6c0a", - "nmdc:f147264a5a4a7eec4d68f05ab52ecc1d", - "nmdc:9bd1b25df71c0a6f9ca408ddc045ffed", - "nmdc:825969095ff134b195b06a40fcc6089a", - "nmdc:356d9ca409747590849dd894998166ee" - ], - "asm_score": 5.751, - "was_informed_by": "gold:Gp0127634", - "ctg_powsum": 15837, - "scaf_max": 33833, - "id": "nmdc:d6415e0c3e4f9b8702c1ae98c6fb2f48", - "scaf_powsum": 15897, - "execution_resource": "NERSC-Cori", - "contigs": 175824, - "name": "Assembly Activity for nmdc:mga0r0vf18", - "ctg_max": 33833, - "gc_std": 0.09424, - "contig_bp": 78219291, - "gc_avg": 0.62214, - "started_at_time": "2021-10-11T02:23:30Z", - "scaf_bp": 78220371, - "type": "nmdc:MetagenomeAssembly", - "scaffolds": 175734, - "ended_at_time": "2021-10-11T04:49:55+00:00", - "ctg_l50": 412, - "ctg_l90": 286, - "ctg_n50": 53340, - "ctg_n90": 150131, - "scaf_l50": 412, - "scaf_l90": 286, - "scaf_n50": 53321, - "scaf_n90": 150048 - } - ], - "metagenome_sequencing_activity_set": [], - "metaproteomics_analysis_activity_set": [], - "metatranscriptome_activity_set": [], - "nom_analysis_activity_set": [], - "omics_processing_set": [ - { - "_id": { - "$oid": "649b009773e8249959349b46" - }, - "id": "nmdc:omprc-11-mbv2jc69", - "name": "Riverbed sediment microbial communities from areas with no vegetation in Columbia River, Washington, USA - GW-RW N2_50_60", - "description": "Riverbed sediment samples were collected from an area with no vegetation in a hyporheic zone (subsurface groundwater - surface water mixing zone)", - "has_input": [ - "nmdc:bsm-11-jdgzjq31" - ], - "has_output": [ - "jgi:574fe09c7ded5e3df1ee1487" - ], - "part_of": [ - "nmdc:sty-11-aygzgv51" - ], - "add_date": "2016-01-11", - "mod_date": "2021-06-15", - "ncbi_project_name": "Riverbed sediment microbial communities from areas with no vegetation in Columbia River, Washington, USA - GW-RW N2_50_60", - "omics_type": { - "has_raw_value": "Metagenome" - }, - "principal_investigator": { - "has_raw_value": "James Stegen" - }, - "processing_institution": "JGI", - "type": "nmdc:OmicsProcessing", - "gold_sequencing_project_identifiers": [ - "gold:Gp0127634" - ] - } - ], - "reaction_activity_set": [], - "read_qc_analysis_activity_set": [ - { - "_id": { - "$oid": "649b009d6bdd4fd20273c86a" - }, - "has_input": [ - "nmdc:2b7712d32a159eca66fc50936de000a5" - ], - "part_of": [ - "nmdc:mga0r0vf18" - ], - "git_url": "https://github.com/microbiomedata/metaG/releases/tag/0.1", - "has_output": [ - "nmdc:ac889627d813c8e34cfbf79a4264c590", - "nmdc:0dfd55be1779ae7922d80aa22034c9a1" - ], - "was_informed_by": "gold:Gp0127634", - "input_read_count": 29872658, - "output_read_bases": 4172764161, - "id": "nmdc:d6415e0c3e4f9b8702c1ae98c6fb2f48", - "execution_resource": "NERSC-Cori", - "input_read_bases": 4510771358, - "name": "Read QC Activity for nmdc:mga0r0vf18", - "output_read_count": 27896694, - "started_at_time": "2021-10-11T02:23:30Z", - "type": "nmdc:ReadQCAnalysisActivity", - "ended_at_time": "2021-10-11T04:49:55+00:00" - } - ], - "read_based_taxonomy_analysis_activity_set": [ - { - "_id": { - "$oid": "649b009bff710ae353f8cf2d" - }, - "has_input": [ - "nmdc:ac889627d813c8e34cfbf79a4264c590" - ], - "git_url": "https://github.com/microbiomedata/metaG/releases/tag/0.1", - "has_output": [ - "nmdc:0526ea84f6e7893f5b6d62a32f81a199", - "nmdc:1a7380f5adb59f36c98c840bf28ad4bd", - "nmdc:366ab38bb6de9591f31a086d42ac23d6", - "nmdc:c44ba44bc6910c2f3ed3a60a52b4a616", - "nmdc:0ca043b630ba304cb80603e8332c78cf", - "nmdc:059ff39ced52c0df45a331c4e9e10fdd", - "nmdc:7bfa3b5b29ec5cf9882251585d99f9bf", - "nmdc:2fceef0aaf1c3c3e3edd8d69bb72c8d3", - "nmdc:678e7c401a6971629f7d3ada83b307ab" - ], - "was_informed_by": "gold:Gp0127634", - "id": "nmdc:d6415e0c3e4f9b8702c1ae98c6fb2f48", - "execution_resource": "NERSC-Cori", - "name": "ReadBased Analysis Activity for nmdc:mga0r0vf18", - "started_at_time": "2021-10-11T02:23:30Z", - "type": "nmdc:ReadbasedAnalysis", - "ended_at_time": "2021-10-11T04:49:55+00:00" - } - ], - "study_set": [], - "field_research_site_set": [], - "collecting_biosamples_from_site_set": [], - "date_created": null, - "etl_software_version": null, - "pooling_set": [] - }, - { - "functional_annotation_agg": [], - "library_preparation_set": [], - "processed_sample_set": [], - "extraction_set": [], - "activity_set": [], - "biosample_set": [], - "data_object_set": [ - { - "description": "Raw sequencer read data", - "file_size_bytes": 2197847748, - "type": "nmdc:DataObject", - "id": "jgi:574fde607ded5e3df1ee1403", - "name": "10533.1.165310.GTTCGGT-AACCGAA.fastq.gz" - }, - { - "name": "Gp0127635_Filtered Reads", - "description": "Filtered Reads for Gp0127635", - "data_object_type": "Filtered Sequencing Reads", - "url": "https://data.microbiomedata.org/data/nmdc:mga0ak4p20/qa/nmdc_mga0ak4p20_filtered.fastq.gz", - "md5_checksum": "f8bc16e232f7ba0f6d6b5ca35a708c36", - "id": "nmdc:f8bc16e232f7ba0f6d6b5ca35a708c36", - "file_size_bytes": 1951049105 - }, - { - "name": "Gp0127635_Filtered Stats", - "description": "Filtered Stats for Gp0127635", - "data_object_type": "QC Statistics", - "url": "https://data.microbiomedata.org/data/nmdc:mga0ak4p20/qa/nmdc_mga0ak4p20_filterStats.txt", - "md5_checksum": "fbc260443529d6e8067efdac3b58a8c1", - "id": "nmdc:fbc260443529d6e8067efdac3b58a8c1", - "file_size_bytes": 280 - }, - { - "name": "Gp0127635_Gottcha2 TSV report", - "description": "Gottcha2 TSV report for Gp0127635", - "url": "https://data.microbiomedata.org/data/nmdc:mga0ak4p20/ReadbasedAnalysis/nmdc_mga0ak4p20_gottcha2_report.tsv", - "md5_checksum": "d8a410c52c8f6cf0097b674492cc3926", - "id": "nmdc:d8a410c52c8f6cf0097b674492cc3926", - "file_size_bytes": 3696 - }, - { - "name": "Gp0127635_Gottcha2 full TSV report", - "description": "Gottcha2 full TSV report for Gp0127635", - "url": "https://data.microbiomedata.org/data/nmdc:mga0ak4p20/ReadbasedAnalysis/nmdc_mga0ak4p20_gottcha2_report_full.tsv", - "md5_checksum": "ddec46781153da60da815c65871f5413", - "id": "nmdc:ddec46781153da60da815c65871f5413", - "file_size_bytes": 677459 - }, - { - "name": "Gp0127635_Gottcha2 Krona HTML report", - "description": "Gottcha2 Krona HTML report for Gp0127635", - "data_object_type": "GOTTCHA2 Krona Plot", - "url": "https://data.microbiomedata.org/data/nmdc:mga0ak4p20/ReadbasedAnalysis/nmdc_mga0ak4p20_gottcha2_krona.html", - "md5_checksum": "e626ec18dba4885613240927cbb99d8b", - "id": "nmdc:e626ec18dba4885613240927cbb99d8b", - "file_size_bytes": 236164 - }, - { - "name": "Gp0127635_Centrifuge classification TSV report", - "description": "Centrifuge classification TSV report for Gp0127635", - "data_object_type": "Centrifuge Taxonomic Classification", - "url": "https://data.microbiomedata.org/data/nmdc:mga0ak4p20/ReadbasedAnalysis/nmdc_mga0ak4p20_centrifuge_classification.tsv", - "md5_checksum": "f8486e4ee029038a452a3484db10cabc", - "id": "nmdc:f8486e4ee029038a452a3484db10cabc", - "file_size_bytes": 1796179546 - }, - { - "name": "Gp0127635_Centrifuge TSV report", - "description": "Centrifuge TSV report for Gp0127635", - "data_object_type": "Centrifuge Classification Report", - "url": "https://data.microbiomedata.org/data/nmdc:mga0ak4p20/ReadbasedAnalysis/nmdc_mga0ak4p20_centrifuge_report.tsv", - "md5_checksum": "4121f2ec52b80b7feb9d9a4749080125", - "id": "nmdc:4121f2ec52b80b7feb9d9a4749080125", - "file_size_bytes": 254661 - }, - { - "name": "Gp0127635_Centrifuge Krona HTML report", - "description": "Centrifuge Krona HTML report for Gp0127635", - "data_object_type": "Centrifuge Krona Plot", - "url": "https://data.microbiomedata.org/data/nmdc:mga0ak4p20/ReadbasedAnalysis/nmdc_mga0ak4p20_centrifuge_krona.html", - "md5_checksum": "5b8c1cd8ba47041c20d3e18cb902a854", - "id": "nmdc:5b8c1cd8ba47041c20d3e18cb902a854", - "file_size_bytes": 2333534 - }, - { - "name": "Gp0127635_Kraken classification TSV report", - "description": "Kraken classification TSV report for Gp0127635", - "data_object_type": "Kraken2 Taxonomic Classification", - "url": "https://data.microbiomedata.org/data/nmdc:mga0ak4p20/ReadbasedAnalysis/nmdc_mga0ak4p20_kraken2_classification.tsv", - "md5_checksum": "59807dae5216b11c96df5593a26d9a88", - "id": "nmdc:59807dae5216b11c96df5593a26d9a88", - "file_size_bytes": 1432249556 - }, - { - "name": "Gp0127635_Kraken2 TSV report", - "description": "Kraken2 TSV report for Gp0127635", - "data_object_type": "Kraken2 Classification Report", - "url": "https://data.microbiomedata.org/data/nmdc:mga0ak4p20/ReadbasedAnalysis/nmdc_mga0ak4p20_kraken2_report.tsv", - "md5_checksum": "a491f6797bd7294dbc5ba301efb3466e", - "id": "nmdc:a491f6797bd7294dbc5ba301efb3466e", - "file_size_bytes": 639738 - }, - { - "name": "Gp0127635_Kraken2 Krona HTML report", - "description": "Kraken2 Krona HTML report for Gp0127635", - "data_object_type": "Kraken2 Krona Plot", - "url": "https://data.microbiomedata.org/data/nmdc:mga0ak4p20/ReadbasedAnalysis/nmdc_mga0ak4p20_kraken2_krona.html", - "md5_checksum": "6748020214a3d68ad588e3548107208e", - "id": "nmdc:6748020214a3d68ad588e3548107208e", - "file_size_bytes": 3996293 - }, - { - "name": "Gp0127635_Assembled contigs fasta", - "description": "Assembled contigs fasta for Gp0127635", - "data_object_type": "Assembly Contigs", - "url": "https://data.microbiomedata.org/data/nmdc:mga0ak4p20/assembly/nmdc_mga0ak4p20_contigs.fna", - "md5_checksum": "3d1b5043e0c49ac6062aeba4ebbba910", - "id": "nmdc:3d1b5043e0c49ac6062aeba4ebbba910", - "file_size_bytes": 111964628 - }, - { - "name": "Gp0127635_Assembled scaffold fasta", - "description": "Assembled scaffold fasta for Gp0127635", - "data_object_type": "Assembly Scaffolds", - "url": "https://data.microbiomedata.org/data/nmdc:mga0ak4p20/assembly/nmdc_mga0ak4p20_scaffolds.fna", - "md5_checksum": "4d4497f63f95f7d2f8986178dab3ae52", - "id": "nmdc:4d4497f63f95f7d2f8986178dab3ae52", - "file_size_bytes": 111342667 - }, - { - "name": "Gp0127635_Metagenome Contig Coverage Stats", - "description": "Metagenome Contig Coverage Stats for Gp0127635", - "url": "https://data.microbiomedata.org/data/nmdc:mga0ak4p20/assembly/nmdc_mga0ak4p20_covstats.txt", - "md5_checksum": "ac98d3d128ec5b045a9ef019a5653b99", - "id": "nmdc:ac98d3d128ec5b045a9ef019a5653b99", - "file_size_bytes": 16397988 - }, - { - "name": "Gp0127635_Assembled AGP file", - "description": "Assembled AGP file for Gp0127635", - "data_object_type": "Assembly AGP", - "url": "https://data.microbiomedata.org/data/nmdc:mga0ak4p20/assembly/nmdc_mga0ak4p20_assembly.agp", - "md5_checksum": "1d0302bec371a73f040d052f4b66277c", - "id": "nmdc:1d0302bec371a73f040d052f4b66277c", - "file_size_bytes": 15325341 - }, - { - "name": "Gp0127635_Metagenome Alignment BAM file", - "description": "Metagenome Alignment BAM file for Gp0127635", - "data_object_type": "Assembly Coverage BAM", - "url": "https://data.microbiomedata.org/data/nmdc:mga0ak4p20/assembly/nmdc_mga0ak4p20_pairedMapped_sorted.bam", - "md5_checksum": "2d8cca230f439e38f1e628666e40e013", - "id": "nmdc:2d8cca230f439e38f1e628666e40e013", - "file_size_bytes": 2159251548 - }, - { - "name": "Gp0127635_Protein FAA", - "description": "Protein FAA for Gp0127635", - "data_object_type": "Annotation Amino Acid FASTA", - "url": "https://data.microbiomedata.org/data/nmdc:mga0ak4p20/annotation/nmdc_mga0ak4p20_proteins.faa", - "md5_checksum": "bb7eae2b3dbc58168b9122098f078bb5", - "id": "nmdc:bb7eae2b3dbc58168b9122098f078bb5", - "file_size_bytes": 63157189 - }, - { - "name": "Gp0127635_Structural annotation GFF file", - "description": "Structural annotation GFF file for Gp0127635", - "data_object_type": "Structural Annotation GFF", - "url": "https://data.microbiomedata.org/data/nmdc:mga0ak4p20/annotation/nmdc_mga0ak4p20_structural_annotation.gff", - "md5_checksum": "2af7f6c008858f2f0d47c00fa9758129", - "id": "nmdc:2af7f6c008858f2f0d47c00fa9758129", - "file_size_bytes": 2526 - }, - { - "name": "Gp0127635_Functional annotation GFF file", - "description": "Functional annotation GFF file for Gp0127635", - "data_object_type": "Functional Annotation GFF", - "url": "https://data.microbiomedata.org/data/nmdc:mga0ak4p20/annotation/nmdc_mga0ak4p20_functional_annotation.gff", - "md5_checksum": "dd3668477e39a65243179dfb9e4bf26e", - "id": "nmdc:dd3668477e39a65243179dfb9e4bf26e", - "file_size_bytes": 71092075 - }, - { - "name": "Gp0127635_KO TSV file", - "description": "KO TSV file for Gp0127635", - "data_object_type": "Annotation KEGG Orthology", - "url": "https://data.microbiomedata.org/data/nmdc:mga0ak4p20/annotation/nmdc_mga0ak4p20_ko.tsv", - "md5_checksum": "be0e9a5999ddfd46bf5daac56aa96b86", - "id": "nmdc:be0e9a5999ddfd46bf5daac56aa96b86", - "file_size_bytes": 8023056 - }, - { - "name": "Gp0127635_EC TSV file", - "description": "EC TSV file for Gp0127635", - "data_object_type": "Annotation Enzyme Commission", - "url": "https://data.microbiomedata.org/data/nmdc:mga0ak4p20/annotation/nmdc_mga0ak4p20_ec.tsv", - "md5_checksum": "95a6a1f91bf18bc1a781a8890d2e1bc5", - "id": "nmdc:95a6a1f91bf18bc1a781a8890d2e1bc5", - "file_size_bytes": 5303502 - }, - { - "name": "Gp0127635_COG GFF file", - "description": "COG GFF file for Gp0127635", - "url": "https://data.microbiomedata.org/data/nmdc:mga0ak4p20/annotation/nmdc_mga0ak4p20_cog.gff", - "md5_checksum": "6960907313875913a789e1fda46ed34e", - "id": "nmdc:6960907313875913a789e1fda46ed34e", - "file_size_bytes": 42106254 - }, - { - "name": "Gp0127635_PFAM GFF file", - "description": "PFAM GFF file for Gp0127635", - "url": "https://data.microbiomedata.org/data/nmdc:mga0ak4p20/annotation/nmdc_mga0ak4p20_pfam.gff", - "md5_checksum": "033da43cdca9f81ed2270a9094fdb065", - "id": "nmdc:033da43cdca9f81ed2270a9094fdb065", - "file_size_bytes": 31806020 - }, - { - "name": "Gp0127635_TigrFam GFF file", - "description": "TigrFam GFF file for Gp0127635", - "url": "https://data.microbiomedata.org/data/nmdc:mga0ak4p20/annotation/nmdc_mga0ak4p20_tigrfam.gff", - "md5_checksum": "e9603ffd918db8a21df1310b890315ff", - "id": "nmdc:e9603ffd918db8a21df1310b890315ff", - "file_size_bytes": 3500524 - }, - { - "name": "Gp0127635_SMART GFF file", - "description": "SMART GFF file for Gp0127635", - "url": "https://data.microbiomedata.org/data/nmdc:mga0ak4p20/annotation/nmdc_mga0ak4p20_smart.gff", - "md5_checksum": "fd98e0cfe1f4ca7b9e4af833c5ef199c", - "id": "nmdc:fd98e0cfe1f4ca7b9e4af833c5ef199c", - "file_size_bytes": 9346082 - }, - { - "name": "Gp0127635_SuperFam GFF file", - "description": "SuperFam GFF file for Gp0127635", - "url": "https://data.microbiomedata.org/data/nmdc:mga0ak4p20/annotation/nmdc_mga0ak4p20_supfam.gff", - "md5_checksum": "03481d99958ae1c9dcccb8fd91c0bbf7", - "id": "nmdc:03481d99958ae1c9dcccb8fd91c0bbf7", - "file_size_bytes": 52582333 - }, - { - "name": "Gp0127635_Cath FunFam GFF file", - "description": "Cath FunFam GFF file for Gp0127635", - "url": "https://data.microbiomedata.org/data/nmdc:mga0ak4p20/annotation/nmdc_mga0ak4p20_cath_funfam.gff", - "md5_checksum": "f0a96fb57947358a42053e9fb7134e70", - "id": "nmdc:f0a96fb57947358a42053e9fb7134e70", - "file_size_bytes": 40179818 - }, - { - "name": "Gp0127635_KO_EC GFF file", - "description": "KO_EC GFF file for Gp0127635", - "url": "https://data.microbiomedata.org/data/nmdc:mga0ak4p20/annotation/nmdc_mga0ak4p20_ko_ec.gff", - "md5_checksum": "9737b61f2e6e923ac662e0a1c4f6aaa9", - "id": "nmdc:9737b61f2e6e923ac662e0a1c4f6aaa9", - "file_size_bytes": 25482964 - }, - { - "name": "gold:Gp0452679_metabat2 bin checkm quality assessment result", - "description": "metabat2 bin checkm quality assessment result for gold:Gp0452679", - "data_object_type": "CheckM Statistics", - "url": "https://data.microbiomedata.org/data/nmdc:mga0fsjy84/MAGs/nmdc_mga0fsjy84_checkm_qa.out", - "md5_checksum": "d41d8cd98f00b204e9800998ecf8427e", - "id": "nmdc:d41d8cd98f00b204e9800998ecf8427e", - "file_size_bytes": 0 - }, - { - "name": "Gp0127635_tooShort (< 3kb) filtered contigs fasta file by metaBat2", - "description": "tooShort (< 3kb) filtered contigs fasta file by metaBat2 for Gp0127635", - "url": "https://data.microbiomedata.org/data/nmdc:mga0ak4p20/MAGs/nmdc_mga0ak4p20_bins.tooShort.fa", - "md5_checksum": "daed5e3af5201fe510e780f155f90bc3", - "id": "nmdc:daed5e3af5201fe510e780f155f90bc3", - "file_size_bytes": 86476884 - }, - { - "name": "Gp0127635_unbinned fasta file from metabat2", - "description": "unbinned fasta file from metabat2 for Gp0127635", - "url": "https://data.microbiomedata.org/data/nmdc:mga0ak4p20/MAGs/nmdc_mga0ak4p20_bins.unbinned.fa", - "md5_checksum": "7cdb1c384c8bc63b3c127e5bc434ac6b", - "id": "nmdc:7cdb1c384c8bc63b3c127e5bc434ac6b", - "file_size_bytes": 22898396 - }, - { - "name": "Gp0127635_metabat2 bin checkm quality assessment result", - "description": "metabat2 bin checkm quality assessment result for Gp0127635", - "data_object_type": "CheckM Statistics", - "url": "https://data.microbiomedata.org/data/nmdc:mga0ak4p20/MAGs/nmdc_mga0ak4p20_checkm_qa.out", - "md5_checksum": "b5ae13756638f09d74fdbe03183b231f", - "id": "nmdc:b5ae13756638f09d74fdbe03183b231f", - "file_size_bytes": 1240 - }, - { - "name": "Gp0127635_high-quality and medium-quality bins", - "description": "high-quality and medium-quality bins for Gp0127635", - "data_object_type": "Metagenome Bins", - "url": "https://data.microbiomedata.org/data/nmdc:mga0ak4p20/MAGs/nmdc_mga0ak4p20_hqmq_bin.zip", - "md5_checksum": "1dc5796596177362849da19fc4e50b13", - "id": "nmdc:1dc5796596177362849da19fc4e50b13", - "file_size_bytes": 182 - }, - { - "name": "Gp0127635_metabat2 bins", - "description": "metabat2 bins for Gp0127635", - "url": "https://data.microbiomedata.org/data/nmdc:mga0ak4p20/MAGs/nmdc_mga0ak4p20_metabat_bin.zip", - "md5_checksum": "fba0bfa144e9ef179edb10b5a941c259", - "id": "nmdc:fba0bfa144e9ef179edb10b5a941c259", - "file_size_bytes": 795127 - }, - { - "_id": { - "$oid": "649b003d1ae706d7b5b14e79" - }, - "description": "Assembled contigs fasta for gold:Gp0127635", - "url": "https://data.microbiomedata.org/data/1781_100337/assembly/assembly_contigs.fna", - "file_size_bytes": 111137612, - "type": "nmdc:DataObject", - "id": "nmdc:178298f959546299f78fb2bff07cd460", - "name": "assembly_contigs.fna", - "data_object_type": "Assembly Contigs" - }, - { - "_id": { - "$oid": "649b003d1ae706d7b5b14e7a" - }, - "description": "Metagenome Contig Coverage Stats for gold:Gp0127635", - "url": "https://data.microbiomedata.org/data/1781_100337/assembly/mapping_stats.txt", - "file_size_bytes": 15570972, - "type": "nmdc:DataObject", - "id": "nmdc:4fc895272dffb49edac9e03d08684d05", - "name": "mapping_stats.txt", - "data_object_type": "Assembly Coverage Stats" - }, - { - "_id": { - "$oid": "649b003d1ae706d7b5b14e7b" - }, - "description": "Assembled AGP file for gold:Gp0127635", - "url": "https://data.microbiomedata.org/data/1781_100337/assembly/assembly.agp", - "file_size_bytes": 13670021, - "type": "nmdc:DataObject", - "id": "nmdc:b148c0b9078ed2c9dc0ef9d47d6c4273", - "name": "assembly.agp", - "data_object_type": "Assembly AGP" - }, - { - "_id": { - "$oid": "649b003d1ae706d7b5b14e7e" - }, - "description": "Assembled scaffold fasta for gold:Gp0127635", - "url": "https://data.microbiomedata.org/data/1781_100337/assembly/assembly_scaffolds.fna", - "file_size_bytes": 110516271, - "type": "nmdc:DataObject", - "id": "nmdc:c2ae4e2ecc5f68caf6fb04e4c0da29a8", - "name": "assembly_scaffolds.fna", - "data_object_type": "Assembly Scaffolds" - }, - { - "_id": { - "$oid": "649b003d1ae706d7b5b14e7f" - }, - "description": "Metagenome Alignment BAM file for gold:Gp0127635", - "url": "https://data.microbiomedata.org/data/1781_100337/assembly/pairedMapped_sorted.bam", - "file_size_bytes": 2128896439, - "type": "nmdc:DataObject", - "id": "nmdc:2e8b9eb5d9a8cbc0e2289bd29ab58bd5", - "name": "pairedMapped_sorted.bam", - "data_object_type": "Assembly Coverage BAM" - }, - { - "_id": { - "$oid": "649b003d1ae706d7b5b15b49" - }, - "id": "nmdc:1ef5e7e08bb9692d1ce21b338888f92b", - "name": "1781_100337.krona.html", - "description": "Gold:Gp0127635 KRONA plot HTML file", - "url": "https://data.microbiomedata.org/1781_100337/ReadbasedAnalysis/centrifuge/1781_100337.krona.html", - "file_size_bytes": 3385, - "data_object_type": "Centrifuge Krona Plot", - "type": "nmdc:DataObject" - }, - { - "_id": { - "$oid": "649b003d1ae706d7b5b15b51" - }, - "id": "nmdc:c9595cd833ad6e651762d7ee3a8e9e5b", - "name": "1781_100337.json", - "description": "Gold:Gp0127635 ReadbasedAnalysis result JSON file", - "url": "https://data.microbiomedata.org/1781_100337/ReadbasedAnalysis/1781_100337.json", - "file_size_bytes": 3385, - "type": "nmdc:DataObject" - }, - { - "_id": { - "$oid": "649b003f1ae706d7b5b165ee" - }, - "id": "nmdc:5eb1b1b53aab751c8ad74e9547ff8a70", - "name": "bins.tooShort.fa", - "description": "tooShort (< 3kb) filtered contigs fasta file by metaBat2 for gold:Gp0127635", - "file_size_bytes": 84061517, - "url": "https://data.microbiomedata.org/data/1781_100337/img_MAGs/bins.tooShort.fa", - "type": "nmdc:DataObject" - }, - { - "_id": { - "$oid": "649b003f1ae706d7b5b165f2" - }, - "id": "nmdc:1c62a2e67e8af295a7f57e2b4492dc22", - "name": "bins.unbinned.fa", - "description": "unbinned fasta file from metabat2 for gold:Gp0127635", - "file_size_bytes": 24394459, - "url": "https://data.microbiomedata.org/data/1781_100337/img_MAGs/bins.unbinned.fa", - "type": "nmdc:DataObject" - }, - { - "_id": { - "$oid": "649b003f1ae706d7b5b165f5" - }, - "id": "nmdc:093a82c4b7951c837e8a281cfae9f128", - "name": "checkm_qa.out", - "description": "metabat2 bin checkm quality assessment result for gold:Gp0127635", - "file_size_bytes": 1071, - "url": "https://data.microbiomedata.org/data/1781_100337/img_MAGs/checkm_qa.out", - "type": "nmdc:DataObject", - "data_object_type": "CheckM Statistics" - }, - { - "_id": { - "$oid": "649b003f1ae706d7b5b165f6" - }, - "id": "nmdc:0f5cac2a54bebd617c1c00bcd7e4ba50", - "name": "gold:Gp0127635.bins.3.fa", - "description": "metabat2 binned contig file for gold:Gp0127635", - "file_size_bytes": 365625, - "url": "https://data.microbiomedata.org/data/1781_100337/img_MAGs/metabat-bins/bins.3.fa", - "type": "nmdc:DataObject", - "data_object_type": "Metagenome Bins" - }, - { - "_id": { - "$oid": "649b003f1ae706d7b5b165f7" - }, - "id": "nmdc:db607d74248055a9962eda6db70c280e", - "name": "gold:Gp0127635.bins.1.fa", - "description": "metabat2 binned contig file for gold:Gp0127635", - "file_size_bytes": 217636, - "url": "https://data.microbiomedata.org/data/1781_100337/img_MAGs/metabat-bins/bins.1.fa", - "type": "nmdc:DataObject", - "data_object_type": "Metagenome Bins" - }, - { - "_id": { - "$oid": "649b003f1ae706d7b5b165fd" - }, - "id": "nmdc:90a132bcef3ba4ebdddc7d4b1297f157", - "name": "gold:Gp0127635.bins.2.fa", - "description": "metabat2 binned contig file for gold:Gp0127635", - "file_size_bytes": 348955, - "url": "https://data.microbiomedata.org/data/1781_100337/img_MAGs/metabat-bins/bins.2.fa", - "type": "nmdc:DataObject", - "data_object_type": "Metagenome Bins" - }, - { - "_id": { - "$oid": "649b00401ae706d7b5b16cf0" - }, - "description": "Structural annotation GFF file for gold:Gp0127635", - "url": "https://data.microbiomedata.org/1781_100337/img_annotation/Ga0482235_structural_annotation.gff", - "md5_checksum": "e1cd02b3a92223d8e30e8d7c90837d9a", - "file_size_bytes": 3385, - "id": "nmdc:e1cd02b3a92223d8e30e8d7c90837d9a", - "name": "gold:Gp0127635_Structural annotation GFF file", - "data_object_type": "Structural Annotation GFF", - "type": "nmdc:DataObject" - }, - { - "_id": { - "$oid": "649b00401ae706d7b5b16cf6" - }, - "description": "Protein FAA for gold:Gp0127635", - "url": "https://data.microbiomedata.org/1781_100337/img_annotation/Ga0482235_proteins.faa", - "md5_checksum": "b2ee2639269e6d665f772fc8c4e31d07", - "file_size_bytes": 3385, - "id": "nmdc:b2ee2639269e6d665f772fc8c4e31d07", - "name": "gold:Gp0127635_Protein FAA", - "data_object_type": "Annotation Amino Acid FASTA", - "type": "nmdc:DataObject" - }, - { - "_id": { - "$oid": "649b00401ae706d7b5b16cf7" - }, - "description": "Functional annotation GFF file for gold:Gp0127635", - "url": "https://data.microbiomedata.org/1781_100337/img_annotation/Ga0482235_functional_annotation.gff", - "md5_checksum": "4768d5de701a1ac55ed0c2d57a270dd2", - "file_size_bytes": 3385, - "id": "nmdc:4768d5de701a1ac55ed0c2d57a270dd2", - "name": "gold:Gp0127635_Functional annotation GFF file", - "data_object_type": "Functional Annotation GFF", - "type": "nmdc:DataObject" - }, - { - "_id": { - "$oid": "649b00401ae706d7b5b16cfc" - }, - "description": "KO TSV File for gold:Gp0127635", - "url": "https://data.microbiomedata.org/1781_100337/img_annotation/Ga0482235_ko.tsv", - "md5_checksum": "4cddc89fb8b405210d66b836825c37ee", - "file_size_bytes": 3385, - "id": "nmdc:4cddc89fb8b405210d66b836825c37ee", - "name": "gold:Gp0127635_KO TSV File", - "data_object_type": "Annotation KEGG Orthology", - "type": "nmdc:DataObject" - }, - { - "_id": { - "$oid": "649b00401ae706d7b5b16cfe" - }, - "description": "EC TSV File for gold:Gp0127635", - "url": "https://data.microbiomedata.org/1781_100337/img_annotation/Ga0482235_ec.tsv", - "md5_checksum": "9e0a73962f7014df93613b04fae9f8be", - "file_size_bytes": 3385, - "id": "nmdc:9e0a73962f7014df93613b04fae9f8be", - "name": "gold:Gp0127635_EC TSV File", - "data_object_type": "Annotation Enzyme Commission", - "type": "nmdc:DataObject" - } - ], - "dissolving_activity_set": [], - "functional_annotation_set": [], - "genome_feature_set": [], - "mags_activity_set": [ - { - "_id": { - "$oid": "649b0052ec087f6bbab34708" - }, - "has_input": [ - "nmdc:3d1b5043e0c49ac6062aeba4ebbba910", - "nmdc:2d8cca230f439e38f1e628666e40e013", - "nmdc:dd3668477e39a65243179dfb9e4bf26e" - ], - "too_short_contig_num": 192406, - "part_of": [ - "nmdc:mga0ak4p20" - ], - "binned_contig_num": 502, - "git_url": "https://github.com/microbiomedata/metaG/releases/tag/0.1", - "has_output": [ - "nmdc:d41d8cd98f00b204e9800998ecf8427e", - "nmdc:daed5e3af5201fe510e780f155f90bc3", - "nmdc:7cdb1c384c8bc63b3c127e5bc434ac6b", - "nmdc:b5ae13756638f09d74fdbe03183b231f", - "nmdc:1dc5796596177362849da19fc4e50b13", - "nmdc:fba0bfa144e9ef179edb10b5a941c259" - ], - "was_informed_by": "gold:Gp0127635", - "input_contig_num": 206754, - "id": "nmdc:80cd0785782060a937ca17d1b417b0b6", - "execution_resource": "NERSC-Cori", - "name": "MAGs Analysis Activity for nmdc:mga0ak4p20", - "mags_list": [ - { - "number_of_contig": 203, - "completeness": 41.91, - "bin_name": "bins.1", - "gene_count": 1456, - "bin_quality": "LQ", - "gtdbtk_species": "", - "gtdbtk_order": "", - "num_16s": 0, - "gtdbtk_family": "", - "gtdbtk_domain": "", - "contamination": 3.88, - "gtdbtk_class": "", - "gtdbtk_phylum": "", - "num_5s": 0, - "num_23s": 0, - "gtdbtk_genus": "", - "num_t_rna": 24 - }, - { - "number_of_contig": 171, - "completeness": 8.33, - "bin_name": "bins.2", - "gene_count": 880, - "bin_quality": "LQ", - "gtdbtk_species": "", - "gtdbtk_order": "", - "num_16s": 0, - "gtdbtk_family": "", - "gtdbtk_domain": "", - "contamination": 0.0, - "gtdbtk_class": "", - "gtdbtk_phylum": "", - "num_5s": 0, - "num_23s": 0, - "gtdbtk_genus": "", - "num_t_rna": 5 - }, - { - "number_of_contig": 55, - "completeness": 14.66, - "bin_name": "bins.3", - "gene_count": 269, - "bin_quality": "LQ", - "gtdbtk_species": "", - "gtdbtk_order": "", - "num_16s": 0, - "gtdbtk_family": "", - "gtdbtk_domain": "", - "contamination": 0.0, - "gtdbtk_class": "", - "gtdbtk_phylum": "", - "num_5s": 0, - "num_23s": 0, - "gtdbtk_genus": "", - "num_t_rna": 1 - }, - { - "number_of_contig": 73, - "completeness": 0.0, - "bin_name": "bins.4", - "gene_count": 475, - "bin_quality": "LQ", - "gtdbtk_species": "", - "gtdbtk_order": "", - "num_16s": 0, - "gtdbtk_family": "", - "gtdbtk_domain": "", - "contamination": 0.0, - "gtdbtk_class": "", - "gtdbtk_phylum": "", - "num_5s": 0, - "num_23s": 0, - "gtdbtk_genus": "", - "num_t_rna": 6 - } - ], - "unbinned_contig_num": 13846, - "started_at_time": "2021-10-11T02:26:59Z", - "type": "nmdc:MAGsAnalysisActivity", - "ended_at_time": "2021-10-11T04:11:48+00:00" - } - ], - "material_sample_set": [], - "material_sampling_activity_set": [], - "metabolomics_analysis_activity_set": [], - "metagenome_annotation_activity_set": [ - { - "_id": { - "$oid": "649b005bbf2caae0415ef9a7" - }, - "has_input": [ - "nmdc:3d1b5043e0c49ac6062aeba4ebbba910" - ], - "part_of": [ - "nmdc:mga0ak4p20" - ], - "git_url": "https://github.com/microbiomedata/metaG/releases/tag/0.1", - "has_output": [ - "nmdc:bb7eae2b3dbc58168b9122098f078bb5", - "nmdc:2af7f6c008858f2f0d47c00fa9758129", - "nmdc:dd3668477e39a65243179dfb9e4bf26e", - "nmdc:be0e9a5999ddfd46bf5daac56aa96b86", - "nmdc:95a6a1f91bf18bc1a781a8890d2e1bc5", - "nmdc:6960907313875913a789e1fda46ed34e", - "nmdc:033da43cdca9f81ed2270a9094fdb065", - "nmdc:e9603ffd918db8a21df1310b890315ff", - "nmdc:fd98e0cfe1f4ca7b9e4af833c5ef199c", - "nmdc:03481d99958ae1c9dcccb8fd91c0bbf7", - "nmdc:f0a96fb57947358a42053e9fb7134e70", - "nmdc:9737b61f2e6e923ac662e0a1c4f6aaa9" - ], - "was_informed_by": "gold:Gp0127635", - "id": "nmdc:80cd0785782060a937ca17d1b417b0b6", - "execution_resource": "NERSC-Cori", - "name": "Annotation Activity for nmdc:mga0ak4p20", - "started_at_time": "2021-10-11T02:26:59Z", - "type": "nmdc:MetagenomeAnnotationActivity", - "ended_at_time": "2021-10-11T04:11:48+00:00" - } - ], - "metagenome_assembly_set": [ - { - "_id": { - "$oid": "649b005f2ca5ee4adb139f90" - }, - "has_input": [ - "nmdc:f8bc16e232f7ba0f6d6b5ca35a708c36" - ], - "part_of": [ - "nmdc:mga0ak4p20" - ], - "ctg_logsum": 269360, - "scaf_logsum": 270403, - "gap_pct": 0.00195, - "git_url": "https://github.com/microbiomedata/metaG/releases/tag/0.1", - "has_output": [ - "nmdc:3d1b5043e0c49ac6062aeba4ebbba910", - "nmdc:4d4497f63f95f7d2f8986178dab3ae52", - "nmdc:ac98d3d128ec5b045a9ef019a5653b99", - "nmdc:1d0302bec371a73f040d052f4b66277c", - "nmdc:2d8cca230f439e38f1e628666e40e013" - ], - "asm_score": 3.934, - "was_informed_by": "gold:Gp0127635", - "ctg_powsum": 29422, - "scaf_max": 23775, - "id": "nmdc:80cd0785782060a937ca17d1b417b0b6", - "scaf_powsum": 29544, - "execution_resource": "NERSC-Cori", - "contigs": 206757, - "name": "Assembly Activity for nmdc:mga0ak4p20", - "ctg_max": 23775, - "gc_std": 0.10033, - "contig_bp": 103842002, - "gc_avg": 0.61621, - "started_at_time": "2021-10-11T02:26:59Z", - "scaf_bp": 103844032, - "type": "nmdc:MetagenomeAssembly", - "scaffolds": 206599, - "ended_at_time": "2021-10-11T04:11:48+00:00", - "ctg_l50": 496, - "ctg_l90": 290, - "ctg_n50": 55322, - "ctg_n90": 171862, - "scaf_l50": 497, - "scaf_l90": 290, - "scaf_n50": 55067, - "scaf_n90": 171721 - } - ], - "metagenome_sequencing_activity_set": [], - "metaproteomics_analysis_activity_set": [], - "metatranscriptome_activity_set": [], - "nom_analysis_activity_set": [], - "omics_processing_set": [ - { - "_id": { - "$oid": "649b009773e8249959349b47" - }, - "id": "nmdc:omprc-11-kc23zq65", - "name": "Riverbed sediment microbial communities from areas with no vegetation in Columbia River, Washington, USA - GW-RW N1_10_20", - "description": "Riverbed sediment samples were collected from an area with no vegetation in a hyporheic zone (subsurface groundwater - surface water mixing zone)", - "has_input": [ - "nmdc:bsm-11-59xteq78" - ], - "has_output": [ - "jgi:574fde607ded5e3df1ee1403" - ], - "part_of": [ - "nmdc:sty-11-aygzgv51" - ], - "add_date": "2016-01-11", - "mod_date": "2021-06-15", - "ncbi_project_name": "Riverbed sediment microbial communities from areas with no vegetation in Columbia River, Washington, USA - GW-RW N1_10_20", - "omics_type": { - "has_raw_value": "Metagenome" - }, - "principal_investigator": { - "has_raw_value": "James Stegen" - }, - "processing_institution": "JGI", - "type": "nmdc:OmicsProcessing", - "gold_sequencing_project_identifiers": [ - "gold:Gp0127635" - ] - } - ], - "reaction_activity_set": [], - "read_qc_analysis_activity_set": [ - { - "_id": { - "$oid": "649b009d6bdd4fd20273c860" - }, - "has_input": [ - "nmdc:1a16fdf096087338922b288165a924b8" - ], - "part_of": [ - "nmdc:mga0ak4p20" - ], - "git_url": "https://github.com/microbiomedata/metaG/releases/tag/0.1", - "has_output": [ - "nmdc:f8bc16e232f7ba0f6d6b5ca35a708c36", - "nmdc:fbc260443529d6e8067efdac3b58a8c1" - ], - "was_informed_by": "gold:Gp0127635", - "input_read_count": 25320866, - "output_read_bases": 3673182178, - "id": "nmdc:80cd0785782060a937ca17d1b417b0b6", - "execution_resource": "NERSC-Cori", - "input_read_bases": 3823450766, - "name": "Read QC Activity for nmdc:mga0ak4p20", - "output_read_count": 24600396, - "started_at_time": "2021-10-11T02:26:59Z", - "type": "nmdc:ReadQCAnalysisActivity", - "ended_at_time": "2021-10-11T04:11:48+00:00" - } - ], - "read_based_taxonomy_analysis_activity_set": [ - { - "_id": { - "$oid": "649b009bff710ae353f8cf25" - }, - "has_input": [ - "nmdc:f8bc16e232f7ba0f6d6b5ca35a708c36" - ], - "git_url": "https://github.com/microbiomedata/metaG/releases/tag/0.1", - "has_output": [ - "nmdc:d8a410c52c8f6cf0097b674492cc3926", - "nmdc:ddec46781153da60da815c65871f5413", - "nmdc:e626ec18dba4885613240927cbb99d8b", - "nmdc:f8486e4ee029038a452a3484db10cabc", - "nmdc:4121f2ec52b80b7feb9d9a4749080125", - "nmdc:5b8c1cd8ba47041c20d3e18cb902a854", - "nmdc:59807dae5216b11c96df5593a26d9a88", - "nmdc:a491f6797bd7294dbc5ba301efb3466e", - "nmdc:6748020214a3d68ad588e3548107208e" - ], - "was_informed_by": "gold:Gp0127635", - "id": "nmdc:80cd0785782060a937ca17d1b417b0b6", - "execution_resource": "NERSC-Cori", - "name": "ReadBased Analysis Activity for nmdc:mga0ak4p20", - "started_at_time": "2021-10-11T02:26:59Z", - "type": "nmdc:ReadbasedAnalysis", - "ended_at_time": "2021-10-11T04:11:48+00:00" - } - ], - "study_set": [], - "field_research_site_set": [], - "collecting_biosamples_from_site_set": [], - "date_created": null, - "etl_software_version": null, - "pooling_set": [] - }, - { - "functional_annotation_agg": [], - "library_preparation_set": [], - "processed_sample_set": [], - "extraction_set": [], - "activity_set": [], - "biosample_set": [], - "data_object_set": [ - { - "description": "Raw sequencer read data", - "file_size_bytes": 1954789686, - "type": "nmdc:DataObject", - "id": "jgi:574fde647ded5e3df1ee1406", - "name": "10533.1.165310.CGTAGGT-AACCTAC.fastq.gz" - }, - { - "name": "Gp0127637_Filtered Reads", - "description": "Filtered Reads for Gp0127637", - "data_object_type": "Filtered Sequencing Reads", - "url": "https://data.microbiomedata.org/data/nmdc:mga0sb9b30/qa/nmdc_mga0sb9b30_filtered.fastq.gz", - "md5_checksum": "805310f4b1e39a0cc9e5b5787576cb8b", - "id": "nmdc:805310f4b1e39a0cc9e5b5787576cb8b", - "file_size_bytes": 1553219358 - }, - { - "name": "Gp0127637_Filtered Stats", - "description": "Filtered Stats for Gp0127637", - "data_object_type": "QC Statistics", - "url": "https://data.microbiomedata.org/data/nmdc:mga0sb9b30/qa/nmdc_mga0sb9b30_filterStats.txt", - "md5_checksum": "611e67df261e050860b1075c6a6a5ff5", - "id": "nmdc:611e67df261e050860b1075c6a6a5ff5", - "file_size_bytes": 289 - }, - { - "name": "Gp0127637_Gottcha2 TSV report", - "description": "Gottcha2 TSV report for Gp0127637", - "url": "https://data.microbiomedata.org/data/nmdc:mga0sb9b30/ReadbasedAnalysis/nmdc_mga0sb9b30_gottcha2_report.tsv", - "md5_checksum": "9268e073dacb7f7cd5f9513393cb0b2a", - "id": "nmdc:9268e073dacb7f7cd5f9513393cb0b2a", - "file_size_bytes": 660 - }, - { - "name": "Gp0127637_Gottcha2 full TSV report", - "description": "Gottcha2 full TSV report for Gp0127637", - "url": "https://data.microbiomedata.org/data/nmdc:mga0sb9b30/ReadbasedAnalysis/nmdc_mga0sb9b30_gottcha2_report_full.tsv", - "md5_checksum": "37dd1d73ad47979ee5284830d27df535", - "id": "nmdc:37dd1d73ad47979ee5284830d27df535", - "file_size_bytes": 594054 - }, - { - "name": "Gp0127637_Gottcha2 Krona HTML report", - "description": "Gottcha2 Krona HTML report for Gp0127637", - "data_object_type": "GOTTCHA2 Krona Plot", - "url": "https://data.microbiomedata.org/data/nmdc:mga0sb9b30/ReadbasedAnalysis/nmdc_mga0sb9b30_gottcha2_krona.html", - "md5_checksum": "43bffbfb830c6e3ccc140ec0dff1e773", - "id": "nmdc:43bffbfb830c6e3ccc140ec0dff1e773", - "file_size_bytes": 227750 - }, - { - "name": "Gp0127637_Centrifuge classification TSV report", - "description": "Centrifuge classification TSV report for Gp0127637", - "data_object_type": "Centrifuge Taxonomic Classification", - "url": "https://data.microbiomedata.org/data/nmdc:mga0sb9b30/ReadbasedAnalysis/nmdc_mga0sb9b30_centrifuge_classification.tsv", - "md5_checksum": "cb3bd5ca5088484cb4e580ad91d736b2", - "id": "nmdc:cb3bd5ca5088484cb4e580ad91d736b2", - "file_size_bytes": 1457058272 - }, - { - "name": "Gp0127637_Centrifuge TSV report", - "description": "Centrifuge TSV report for Gp0127637", - "data_object_type": "Centrifuge Classification Report", - "url": "https://data.microbiomedata.org/data/nmdc:mga0sb9b30/ReadbasedAnalysis/nmdc_mga0sb9b30_centrifuge_report.tsv", - "md5_checksum": "f44a5d59785cdededea0fe4a6a429c30", - "id": "nmdc:f44a5d59785cdededea0fe4a6a429c30", - "file_size_bytes": 251867 - }, - { - "name": "Gp0127637_Centrifuge Krona HTML report", - "description": "Centrifuge Krona HTML report for Gp0127637", - "data_object_type": "Centrifuge Krona Plot", - "url": "https://data.microbiomedata.org/data/nmdc:mga0sb9b30/ReadbasedAnalysis/nmdc_mga0sb9b30_centrifuge_krona.html", - "md5_checksum": "81a6efbd082e07bc2db174a88d64a272", - "id": "nmdc:81a6efbd082e07bc2db174a88d64a272", - "file_size_bytes": 2325282 - }, - { - "name": "Gp0127637_Kraken classification TSV report", - "description": "Kraken classification TSV report for Gp0127637", - "data_object_type": "Kraken2 Taxonomic Classification", - "url": "https://data.microbiomedata.org/data/nmdc:mga0sb9b30/ReadbasedAnalysis/nmdc_mga0sb9b30_kraken2_classification.tsv", - "md5_checksum": "f63856a84bc9afb8954ccdb1803d5fde", - "id": "nmdc:f63856a84bc9afb8954ccdb1803d5fde", - "file_size_bytes": 1160106364 - }, - { - "name": "Gp0127637_Kraken2 TSV report", - "description": "Kraken2 TSV report for Gp0127637", - "data_object_type": "Kraken2 Classification Report", - "url": "https://data.microbiomedata.org/data/nmdc:mga0sb9b30/ReadbasedAnalysis/nmdc_mga0sb9b30_kraken2_report.tsv", - "md5_checksum": "9a1826f66ee45187d627076d11dc491f", - "id": "nmdc:9a1826f66ee45187d627076d11dc491f", - "file_size_bytes": 613810 - }, - { - "name": "Gp0127637_Kraken2 Krona HTML report", - "description": "Kraken2 Krona HTML report for Gp0127637", - "data_object_type": "Kraken2 Krona Plot", - "url": "https://data.microbiomedata.org/data/nmdc:mga0sb9b30/ReadbasedAnalysis/nmdc_mga0sb9b30_kraken2_krona.html", - "md5_checksum": "67adb9cc2c75251f556a90b1a959ea72", - "id": "nmdc:67adb9cc2c75251f556a90b1a959ea72", - "file_size_bytes": 3853908 - }, - { - "name": "Gp0127637_Assembled contigs fasta", - "description": "Assembled contigs fasta for Gp0127637", - "data_object_type": "Assembly Contigs", - "url": "https://data.microbiomedata.org/data/nmdc:mga0sb9b30/assembly/nmdc_mga0sb9b30_contigs.fna", - "md5_checksum": "aee81646e593045bbb32a0012870b88b", - "id": "nmdc:aee81646e593045bbb32a0012870b88b", - "file_size_bytes": 117200777 - }, - { - "name": "Gp0127637_Assembled scaffold fasta", - "description": "Assembled scaffold fasta for Gp0127637", - "data_object_type": "Assembly Scaffolds", - "url": "https://data.microbiomedata.org/data/nmdc:mga0sb9b30/assembly/nmdc_mga0sb9b30_scaffolds.fna", - "md5_checksum": "f1026db242cad285204c9c3d6307c183", - "id": "nmdc:f1026db242cad285204c9c3d6307c183", - "file_size_bytes": 116554638 - }, - { - "name": "Gp0127637_Metagenome Contig Coverage Stats", - "description": "Metagenome Contig Coverage Stats for Gp0127637", - "url": "https://data.microbiomedata.org/data/nmdc:mga0sb9b30/assembly/nmdc_mga0sb9b30_covstats.txt", - "md5_checksum": "b02b0a0145d14e97a31e6a6f7e4b8dc8", - "id": "nmdc:b02b0a0145d14e97a31e6a6f7e4b8dc8", - "file_size_bytes": 17037754 - }, - { - "name": "Gp0127637_Assembled AGP file", - "description": "Assembled AGP file for Gp0127637", - "data_object_type": "Assembly AGP", - "url": "https://data.microbiomedata.org/data/nmdc:mga0sb9b30/assembly/nmdc_mga0sb9b30_assembly.agp", - "md5_checksum": "8afcf1e8b7b3f35edaefee7a0c31e19f", - "id": "nmdc:8afcf1e8b7b3f35edaefee7a0c31e19f", - "file_size_bytes": 15931363 - }, - { - "name": "Gp0127637_Metagenome Alignment BAM file", - "description": "Metagenome Alignment BAM file for Gp0127637", - "data_object_type": "Assembly Coverage BAM", - "url": "https://data.microbiomedata.org/data/nmdc:mga0sb9b30/assembly/nmdc_mga0sb9b30_pairedMapped_sorted.bam", - "md5_checksum": "dee5fa37f57a24685b65e00380d6e433", - "id": "nmdc:dee5fa37f57a24685b65e00380d6e433", - "file_size_bytes": 1739825120 - }, - { - "name": "Gp0127637_Protein FAA", - "description": "Protein FAA for Gp0127637", - "data_object_type": "Annotation Amino Acid FASTA", - "url": "https://data.microbiomedata.org/data/nmdc:mga0sb9b30/annotation/nmdc_mga0sb9b30_proteins.faa", - "md5_checksum": "69603434971f93dbd79860c18dd5c61a", - "id": "nmdc:69603434971f93dbd79860c18dd5c61a", - "file_size_bytes": 66263123 - }, - { - "name": "Gp0127637_Structural annotation GFF file", - "description": "Structural annotation GFF file for Gp0127637", - "data_object_type": "Structural Annotation GFF", - "url": "https://data.microbiomedata.org/data/nmdc:mga0sb9b30/annotation/nmdc_mga0sb9b30_structural_annotation.gff", - "md5_checksum": "bf8f822c6730b4cc73715ced3d25c262", - "id": "nmdc:bf8f822c6730b4cc73715ced3d25c262", - "file_size_bytes": 2521 - }, - { - "name": "Gp0127637_Functional annotation GFF file", - "description": "Functional annotation GFF file for Gp0127637", - "data_object_type": "Functional Annotation GFF", - "url": "https://data.microbiomedata.org/data/nmdc:mga0sb9b30/annotation/nmdc_mga0sb9b30_functional_annotation.gff", - "md5_checksum": "b9ec0754ffaa338c899244703bc91386", - "id": "nmdc:b9ec0754ffaa338c899244703bc91386", - "file_size_bytes": 74459552 - }, - { - "name": "Gp0127637_KO TSV file", - "description": "KO TSV file for Gp0127637", - "data_object_type": "Annotation KEGG Orthology", - "url": "https://data.microbiomedata.org/data/nmdc:mga0sb9b30/annotation/nmdc_mga0sb9b30_ko.tsv", - "md5_checksum": "22402cc61770feb5a0aaa4f760808366", - "id": "nmdc:22402cc61770feb5a0aaa4f760808366", - "file_size_bytes": 8394894 - }, - { - "name": "Gp0127637_EC TSV file", - "description": "EC TSV file for Gp0127637", - "data_object_type": "Annotation Enzyme Commission", - "url": "https://data.microbiomedata.org/data/nmdc:mga0sb9b30/annotation/nmdc_mga0sb9b30_ec.tsv", - "md5_checksum": "8c96f7faa38c361acc247b5a107a6b54", - "id": "nmdc:8c96f7faa38c361acc247b5a107a6b54", - "file_size_bytes": 5556852 - }, - { - "name": "Gp0127637_COG GFF file", - "description": "COG GFF file for Gp0127637", - "url": "https://data.microbiomedata.org/data/nmdc:mga0sb9b30/annotation/nmdc_mga0sb9b30_cog.gff", - "md5_checksum": "7a28d1eafd3a3c181e95f61eb3d18bf1", - "id": "nmdc:7a28d1eafd3a3c181e95f61eb3d18bf1", - "file_size_bytes": 44328195 - }, - { - "name": "Gp0127637_PFAM GFF file", - "description": "PFAM GFF file for Gp0127637", - "url": "https://data.microbiomedata.org/data/nmdc:mga0sb9b30/annotation/nmdc_mga0sb9b30_pfam.gff", - "md5_checksum": "89a8657f659710b3927baab155917fdf", - "id": "nmdc:89a8657f659710b3927baab155917fdf", - "file_size_bytes": 33562431 - }, - { - "name": "Gp0127637_TigrFam GFF file", - "description": "TigrFam GFF file for Gp0127637", - "url": "https://data.microbiomedata.org/data/nmdc:mga0sb9b30/annotation/nmdc_mga0sb9b30_tigrfam.gff", - "md5_checksum": "9b9ecf34f2f6ef6865d4864f5debfbb7", - "id": "nmdc:9b9ecf34f2f6ef6865d4864f5debfbb7", - "file_size_bytes": 3752251 - }, - { - "name": "Gp0127637_SMART GFF file", - "description": "SMART GFF file for Gp0127637", - "url": "https://data.microbiomedata.org/data/nmdc:mga0sb9b30/annotation/nmdc_mga0sb9b30_smart.gff", - "md5_checksum": "5cae6736713d02ccbe26543d733875cb", - "id": "nmdc:5cae6736713d02ccbe26543d733875cb", - "file_size_bytes": 9871224 - }, - { - "name": "Gp0127637_SuperFam GFF file", - "description": "SuperFam GFF file for Gp0127637", - "url": "https://data.microbiomedata.org/data/nmdc:mga0sb9b30/annotation/nmdc_mga0sb9b30_supfam.gff", - "md5_checksum": "a64350eb947c199cc1fbfb087191c0c7", - "id": "nmdc:a64350eb947c199cc1fbfb087191c0c7", - "file_size_bytes": 55329770 - }, - { - "name": "Gp0127637_Cath FunFam GFF file", - "description": "Cath FunFam GFF file for Gp0127637", - "url": "https://data.microbiomedata.org/data/nmdc:mga0sb9b30/annotation/nmdc_mga0sb9b30_cath_funfam.gff", - "md5_checksum": "b8492828a1ad078d9c3192bab4d9a3fa", - "id": "nmdc:b8492828a1ad078d9c3192bab4d9a3fa", - "file_size_bytes": 42052238 - }, - { - "name": "Gp0127637_KO_EC GFF file", - "description": "KO_EC GFF file for Gp0127637", - "url": "https://data.microbiomedata.org/data/nmdc:mga0sb9b30/annotation/nmdc_mga0sb9b30_ko_ec.gff", - "md5_checksum": "2471f27b6cf11b6f93c791c273989731", - "id": "nmdc:2471f27b6cf11b6f93c791c273989731", - "file_size_bytes": 26689447 - }, - { - "name": "gold:Gp0452679_metabat2 bin checkm quality assessment result", - "description": "metabat2 bin checkm quality assessment result for gold:Gp0452679", - "data_object_type": "CheckM Statistics", - "url": "https://data.microbiomedata.org/data/nmdc:mga0fsjy84/MAGs/nmdc_mga0fsjy84_checkm_qa.out", - "md5_checksum": "d41d8cd98f00b204e9800998ecf8427e", - "id": "nmdc:d41d8cd98f00b204e9800998ecf8427e", - "file_size_bytes": 0 - }, - { - "name": "Gp0127637_tooShort (< 3kb) filtered contigs fasta file by metaBat2", - "description": "tooShort (< 3kb) filtered contigs fasta file by metaBat2 for Gp0127637", - "url": "https://data.microbiomedata.org/data/nmdc:mga0sb9b30/MAGs/nmdc_mga0sb9b30_bins.tooShort.fa", - "md5_checksum": "7968c6b88e49f066bd24982b4d54965b", - "id": "nmdc:7968c6b88e49f066bd24982b4d54965b", - "file_size_bytes": 91577123 - }, - { - "name": "Gp0127637_unbinned fasta file from metabat2", - "description": "unbinned fasta file from metabat2 for Gp0127637", - "url": "https://data.microbiomedata.org/data/nmdc:mga0sb9b30/MAGs/nmdc_mga0sb9b30_bins.unbinned.fa", - "md5_checksum": "120fbaa7439eb628d9a982de573446a8", - "id": "nmdc:120fbaa7439eb628d9a982de573446a8", - "file_size_bytes": 22556841 - }, - { - "name": "Gp0127637_metabat2 bin checkm quality assessment result", - "description": "metabat2 bin checkm quality assessment result for Gp0127637", - "data_object_type": "CheckM Statistics", - "url": "https://data.microbiomedata.org/data/nmdc:mga0sb9b30/MAGs/nmdc_mga0sb9b30_checkm_qa.out", - "md5_checksum": "347a7ee18b37674e031cca9046e92623", - "id": "nmdc:347a7ee18b37674e031cca9046e92623", - "file_size_bytes": 1092 - }, - { - "name": "Gp0127637_high-quality and medium-quality bins", - "description": "high-quality and medium-quality bins for Gp0127637", - "data_object_type": "Metagenome Bins", - "url": "https://data.microbiomedata.org/data/nmdc:mga0sb9b30/MAGs/nmdc_mga0sb9b30_hqmq_bin.zip", - "md5_checksum": "de1da5ea4bfdf3131a6c510b79b145c2", - "id": "nmdc:de1da5ea4bfdf3131a6c510b79b145c2", - "file_size_bytes": 504932 - }, - { - "name": "Gp0127637_metabat2 bins", - "description": "metabat2 bins for Gp0127637", - "url": "https://data.microbiomedata.org/data/nmdc:mga0sb9b30/MAGs/nmdc_mga0sb9b30_metabat_bin.zip", - "md5_checksum": "382d00338a5e4829285e58a203de153e", - "id": "nmdc:382d00338a5e4829285e58a203de153e", - "file_size_bytes": 432910 - }, - { - "_id": { - "$oid": "649b003d1ae706d7b5b14e80" - }, - "description": "Assembled contigs fasta for gold:Gp0127637", - "url": "https://data.microbiomedata.org/data/1781_100339/assembly/assembly_contigs.fna", - "file_size_bytes": 116341325, - "type": "nmdc:DataObject", - "id": "nmdc:d66bd2d4b3ad1abef6787addfb5aa8b6", - "name": "assembly_contigs.fna", - "data_object_type": "Assembly Contigs" - }, - { - "_id": { - "$oid": "649b003d1ae706d7b5b14e83" - }, - "description": "Metagenome Contig Coverage Stats for gold:Gp0127637", - "url": "https://data.microbiomedata.org/data/1781_100339/assembly/mapping_stats.txt", - "file_size_bytes": 16178302, - "type": "nmdc:DataObject", - "id": "nmdc:c15461f775c7a7b44ec57af9e2897e17", - "name": "mapping_stats.txt", - "data_object_type": "Assembly Coverage Stats" - }, - { - "_id": { - "$oid": "649b003d1ae706d7b5b14e84" - }, - "description": "Assembled scaffold fasta for gold:Gp0127637", - "url": "https://data.microbiomedata.org/data/1781_100339/assembly/assembly_scaffolds.fna", - "file_size_bytes": 115695690, - "type": "nmdc:DataObject", - "id": "nmdc:cd9b2948e3c873bc1c9a2aebe3401cc5", - "name": "assembly_scaffolds.fna", - "data_object_type": "Assembly Scaffolds" - }, - { - "_id": { - "$oid": "649b003d1ae706d7b5b14e87" - }, - "description": "Metagenome Alignment BAM file for gold:Gp0127637", - "url": "https://data.microbiomedata.org/data/1781_100339/assembly/pairedMapped_sorted.bam", - "file_size_bytes": 1715892686, - "type": "nmdc:DataObject", - "id": "nmdc:aa0ca68275dfc45fe70cc94e247e1a69", - "name": "pairedMapped_sorted.bam", - "data_object_type": "Assembly Coverage BAM" - }, - { - "_id": { - "$oid": "649b003d1ae706d7b5b14e88" - }, - "description": "Assembled AGP file for gold:Gp0127637", - "url": "https://data.microbiomedata.org/data/1781_100339/assembly/assembly.agp", - "file_size_bytes": 14211451, - "type": "nmdc:DataObject", - "id": "nmdc:b152dd521661ba36392ccc535795542e", - "name": "assembly.agp", - "data_object_type": "Assembly AGP" - }, - { - "_id": { - "$oid": "649b003d1ae706d7b5b15b60" - }, - "id": "nmdc:9eeee33a195e10f3f8e95f2049dac406", - "name": "1781_100339.krona.html", - "description": "Gold:Gp0127637 KRONA plot HTML file", - "url": "https://data.microbiomedata.org/1781_100339/ReadbasedAnalysis/centrifuge/1781_100339.krona.html", - "file_size_bytes": 3385, - "data_object_type": "Centrifuge Krona Plot", - "type": "nmdc:DataObject" - }, - { - "_id": { - "$oid": "649b003d1ae706d7b5b15b65" - }, - "id": "nmdc:3eca864f3fb90709467d18cc9247709b", - "name": "1781_100339.json", - "description": "Gold:Gp0127637 ReadbasedAnalysis result JSON file", - "url": "https://data.microbiomedata.org/1781_100339/ReadbasedAnalysis/1781_100339.json", - "file_size_bytes": 3385, - "type": "nmdc:DataObject" - }, - { - "_id": { - "$oid": "649b003f1ae706d7b5b165f9" - }, - "id": "nmdc:51db76a0482441e5ade7e3d99694ffe6", - "name": "bins.tooShort.fa", - "description": "tooShort (< 3kb) filtered contigs fasta file by metaBat2 for gold:Gp0127637", - "file_size_bytes": 89058634, - "url": "https://data.microbiomedata.org/data/1781_100339/img_MAGs/bins.tooShort.fa", - "type": "nmdc:DataObject" - }, - { - "_id": { - "$oid": "649b003f1ae706d7b5b165fb" - }, - "id": "nmdc:6a3a98a4c86866bcb2c347d35006fc55", - "name": "bins.unbinned.fa", - "description": "unbinned fasta file from metabat2 for gold:Gp0127637", - "file_size_bytes": 22532401, - "url": "https://data.microbiomedata.org/data/1781_100339/img_MAGs/bins.unbinned.fa", - "type": "nmdc:DataObject" - }, - { - "_id": { - "$oid": "649b003f1ae706d7b5b165fc" - }, - "id": "nmdc:6c4325a1e9ba6b106d58e1fa99239c56", - "name": "checkm_qa.out", - "description": "metabat2 bin checkm quality assessment result for gold:Gp0127637", - "file_size_bytes": 1092, - "url": "https://data.microbiomedata.org/data/1781_100339/img_MAGs/checkm_qa.out", - "type": "nmdc:DataObject", - "data_object_type": "CheckM Statistics" - }, - { - "_id": { - "$oid": "649b003f1ae706d7b5b165fe" - }, - "id": "nmdc:419c789c7c7458e9775dcfb3e95530b2", - "name": "gtdbtk.ar122.summary.tsv", - "description": "gtdbtk archaea assignment result summary table for gold:Gp0127637", - "file_size_bytes": 1003, - "url": "https://data.microbiomedata.org/data/1781_100339/img_MAGs/gtdbtk_output/classify/gtdbtk.ar122.summary.tsv", - "type": "nmdc:DataObject" - }, - { - "_id": { - "$oid": "649b003f1ae706d7b5b16600" - }, - "id": "nmdc:2e3079d5c7c114727c5445f2ed43ed5f", - "name": "gold:Gp0127637.bins.2.fa", - "description": "metabat2 binned contig file for gold:Gp0127637", - "file_size_bytes": 1650635, - "url": "https://data.microbiomedata.org/data/1781_100339/img_MAGs/metabat-bins/bins.2.fa", - "type": "nmdc:DataObject", - "data_object_type": "Metagenome Bins" - }, - { - "_id": { - "$oid": "649b003f1ae706d7b5b16601" - }, - "id": "nmdc:004ec11cd626b798ffccba986dd4f129", - "name": "gold:Gp0127637.bins.3.fa", - "description": "metabat2 binned contig file for gold:Gp0127637", - "file_size_bytes": 1046842, - "url": "https://data.microbiomedata.org/data/1781_100339/img_MAGs/metabat-bins/bins.3.fa", - "type": "nmdc:DataObject", - "data_object_type": "Metagenome Bins" - }, - { - "_id": { - "$oid": "649b003f1ae706d7b5b16603" - }, - "id": "nmdc:024873d88915f917de41ed83c0b7b98c", - "name": "gold:Gp0127637.bins.1.fa", - "description": "metabat2 binned contig file for gold:Gp0127637", - "file_size_bytes": 230352, - "url": "https://data.microbiomedata.org/data/1781_100339/img_MAGs/metabat-bins/bins.1.fa", - "type": "nmdc:DataObject", - "data_object_type": "Metagenome Bins" - }, - { - "_id": { - "$oid": "649b003f1ae706d7b5b16607" - }, - "id": "nmdc:6920af8dfe8dc23b9a267c837184d619", - "name": "gold:Gp0127637.bins.2.fa", - "description": "hqmq binned contig file for gold:Gp0127637", - "file_size_bytes": 1619653, - "url": "https://data.microbiomedata.org/data/1781_100339/img_MAGs/hqmq-metabat-bins/bins.2.fa", - "type": "nmdc:DataObject", - "data_object_type": "Metagenome Bins" - }, - { - "_id": { - "$oid": "649b00401ae706d7b5b16d02" - }, - "description": "KO TSV File for gold:Gp0127637", - "url": "https://data.microbiomedata.org/1781_100339/img_annotation/Ga0482233_ko.tsv", - "md5_checksum": "ce74f349e03ae28dd49fc5ea4cd1d91d", - "file_size_bytes": 3385, - "id": "nmdc:ce74f349e03ae28dd49fc5ea4cd1d91d", - "name": "gold:Gp0127637_KO TSV File", - "data_object_type": "Annotation KEGG Orthology", - "type": "nmdc:DataObject" - }, - { - "_id": { - "$oid": "649b00401ae706d7b5b16d05" - }, - "description": "Protein FAA for gold:Gp0127637", - "url": "https://data.microbiomedata.org/1781_100339/img_annotation/Ga0482233_proteins.faa", - "md5_checksum": "c43a6b5a306a8f14aab780d8f1bf9c41", - "file_size_bytes": 3385, - "id": "nmdc:c43a6b5a306a8f14aab780d8f1bf9c41", - "name": "gold:Gp0127637_Protein FAA", - "data_object_type": "Annotation Amino Acid FASTA", - "type": "nmdc:DataObject" - }, - { - "_id": { - "$oid": "649b00401ae706d7b5b16d06" - }, - "description": "Structural annotation GFF file for gold:Gp0127637", - "url": "https://data.microbiomedata.org/1781_100339/img_annotation/Ga0482233_structural_annotation.gff", - "md5_checksum": "5f6b287493cde8cf8cb49348a2868aa6", - "file_size_bytes": 3385, - "id": "nmdc:5f6b287493cde8cf8cb49348a2868aa6", - "name": "gold:Gp0127637_Structural annotation GFF file", - "data_object_type": "Structural Annotation GFF", - "type": "nmdc:DataObject" - }, - { - "_id": { - "$oid": "649b00401ae706d7b5b16d09" - }, - "description": "EC TSV File for gold:Gp0127637", - "url": "https://data.microbiomedata.org/1781_100339/img_annotation/Ga0482233_ec.tsv", - "md5_checksum": "1549562abe1044734fab8562585ec161", - "file_size_bytes": 3385, - "id": "nmdc:1549562abe1044734fab8562585ec161", - "name": "gold:Gp0127637_EC TSV File", - "data_object_type": "Annotation Enzyme Commission", - "type": "nmdc:DataObject" - }, - { - "_id": { - "$oid": "649b00401ae706d7b5b16d0f" - }, - "description": "Functional annotation GFF file for gold:Gp0127637", - "url": "https://data.microbiomedata.org/1781_100339/img_annotation/Ga0482233_functional_annotation.gff", - "md5_checksum": "74b2fc3dd196a3d615c7d0d478fa2f90", - "file_size_bytes": 3385, - "id": "nmdc:74b2fc3dd196a3d615c7d0d478fa2f90", - "name": "gold:Gp0127637_Functional annotation GFF file", - "data_object_type": "Functional Annotation GFF", - "type": "nmdc:DataObject" - } - ], - "dissolving_activity_set": [], - "functional_annotation_set": [], - "genome_feature_set": [], - "mags_activity_set": [ - { - "_id": { - "$oid": "649b0052ec087f6bbab34702" - }, - "has_input": [ - "nmdc:aee81646e593045bbb32a0012870b88b", - "nmdc:dee5fa37f57a24685b65e00380d6e433", - "nmdc:b9ec0754ffaa338c899244703bc91386" - ], - "too_short_contig_num": 200319, - "part_of": [ - "nmdc:mga0sb9b30" - ], - "binned_contig_num": 482, - "git_url": "https://github.com/microbiomedata/metaG/releases/tag/0.1", - "has_output": [ - "nmdc:d41d8cd98f00b204e9800998ecf8427e", - "nmdc:7968c6b88e49f066bd24982b4d54965b", - "nmdc:120fbaa7439eb628d9a982de573446a8", - "nmdc:347a7ee18b37674e031cca9046e92623", - "nmdc:de1da5ea4bfdf3131a6c510b79b145c2", - "nmdc:382d00338a5e4829285e58a203de153e" - ], - "was_informed_by": "gold:Gp0127637", - "input_contig_num": 214863, - "id": "nmdc:c6d11b07b7b33c0d906ce0d4a58a7ccf", - "execution_resource": "NERSC-Cori", - "name": "MAGs Analysis Activity for nmdc:mga0sb9b30", - "mags_list": [ - { - "number_of_contig": 59, - "completeness": 8.33, - "bin_name": "bins.1", - "gene_count": 295, - "bin_quality": "LQ", - "gtdbtk_species": "", - "gtdbtk_order": "", - "num_16s": 0, - "gtdbtk_family": "", - "gtdbtk_domain": "", - "contamination": 0.0, - "gtdbtk_class": "", - "gtdbtk_phylum": "", - "num_5s": 0, - "num_23s": 0, - "gtdbtk_genus": "", - "num_t_rna": 0 - }, - { - "number_of_contig": 233, - "completeness": 45.87, - "bin_name": "bins.2", - "gene_count": 1342, - "bin_quality": "LQ", - "gtdbtk_species": "", - "gtdbtk_order": "", - "num_16s": 0, - "gtdbtk_family": "", - "gtdbtk_domain": "", - "contamination": 1.28, - "gtdbtk_class": "", - "gtdbtk_phylum": "", - "num_5s": 0, - "num_23s": 0, - "gtdbtk_genus": "", - "num_t_rna": 18 - }, - { - "number_of_contig": 190, - "completeness": 75.08, - "bin_name": "bins.3", - "gene_count": 1991, - "bin_quality": "MQ", - "gtdbtk_species": "", - "gtdbtk_order": "Nitrososphaerales", - "num_16s": 0, - "gtdbtk_family": "Nitrososphaeraceae", - "gtdbtk_domain": "Archaea", - "contamination": 1.21, - "gtdbtk_class": "Nitrososphaeria", - "gtdbtk_phylum": "Crenarchaeota", - "num_5s": 1, - "num_23s": 1, - "gtdbtk_genus": "", - "num_t_rna": 37 - } - ], - "unbinned_contig_num": 14062, - "started_at_time": "2021-10-11T02:24:01Z", - "type": "nmdc:MAGsAnalysisActivity", - "ended_at_time": "2021-10-11T03:11:56+00:00" - } - ], - "material_sample_set": [], - "material_sampling_activity_set": [], - "metabolomics_analysis_activity_set": [], - "metagenome_annotation_activity_set": [ - { - "_id": { - "$oid": "649b005bbf2caae0415ef9a6" - }, - "has_input": [ - "nmdc:aee81646e593045bbb32a0012870b88b" - ], - "part_of": [ - "nmdc:mga0sb9b30" - ], - "git_url": "https://github.com/microbiomedata/metaG/releases/tag/0.1", - "has_output": [ - "nmdc:69603434971f93dbd79860c18dd5c61a", - "nmdc:bf8f822c6730b4cc73715ced3d25c262", - "nmdc:b9ec0754ffaa338c899244703bc91386", - "nmdc:22402cc61770feb5a0aaa4f760808366", - "nmdc:8c96f7faa38c361acc247b5a107a6b54", - "nmdc:7a28d1eafd3a3c181e95f61eb3d18bf1", - "nmdc:89a8657f659710b3927baab155917fdf", - "nmdc:9b9ecf34f2f6ef6865d4864f5debfbb7", - "nmdc:5cae6736713d02ccbe26543d733875cb", - "nmdc:a64350eb947c199cc1fbfb087191c0c7", - "nmdc:b8492828a1ad078d9c3192bab4d9a3fa", - "nmdc:2471f27b6cf11b6f93c791c273989731" - ], - "was_informed_by": "gold:Gp0127637", - "id": "nmdc:c6d11b07b7b33c0d906ce0d4a58a7ccf", - "execution_resource": "NERSC-Cori", - "name": "Annotation Activity for nmdc:mga0sb9b30", - "started_at_time": "2021-10-11T02:24:01Z", - "type": "nmdc:MetagenomeAnnotationActivity", - "ended_at_time": "2021-10-11T03:11:56+00:00" - } - ], - "metagenome_assembly_set": [ - { - "_id": { - "$oid": "649b005f2ca5ee4adb139f93" - }, - "has_input": [ - "nmdc:805310f4b1e39a0cc9e5b5787576cb8b" - ], - "part_of": [ - "nmdc:mga0sb9b30" - ], - "ctg_logsum": 271617, - "scaf_logsum": 272416, - "gap_pct": 0.00166, - "git_url": "https://github.com/microbiomedata/metaG/releases/tag/0.1", - "has_output": [ - "nmdc:aee81646e593045bbb32a0012870b88b", - "nmdc:f1026db242cad285204c9c3d6307c183", - "nmdc:b02b0a0145d14e97a31e6a6f7e4b8dc8", - "nmdc:8afcf1e8b7b3f35edaefee7a0c31e19f", - "nmdc:dee5fa37f57a24685b65e00380d6e433" - ], - "asm_score": 5.062, - "was_informed_by": "gold:Gp0127637", - "ctg_powsum": 29885, - "scaf_max": 43650, - "id": "nmdc:c6d11b07b7b33c0d906ce0d4a58a7ccf", - "scaf_powsum": 29983, - "execution_resource": "NERSC-Cori", - "contigs": 214863, - "name": "Assembly Activity for nmdc:mga0sb9b30", - "ctg_max": 43650, - "gc_std": 0.08814, - "contig_bp": 108739484, - "gc_avg": 0.63266, - "started_at_time": "2021-10-11T02:24:01Z", - "scaf_bp": 108741284, - "type": "nmdc:MetagenomeAssembly", - "scaffolds": 214737, - "ended_at_time": "2021-10-11T03:11:56+00:00", - "ctg_l50": 505, - "ctg_l90": 294, - "ctg_n50": 58474, - "ctg_n90": 177521, - "scaf_l50": 505, - "scaf_l90": 294, - "scaf_n50": 58469, - "scaf_n90": 177412 - } - ], - "metagenome_sequencing_activity_set": [], - "metaproteomics_analysis_activity_set": [], - "metatranscriptome_activity_set": [], - "nom_analysis_activity_set": [], - "omics_processing_set": [ - { - "_id": { - "$oid": "649b009773e8249959349b48" - }, - "id": "nmdc:omprc-11-c8dzx197", - "name": "Riverbed sediment microbial communities from areas with vegetation nearby in Columbia River, Washington, USA - GW-RW S1_0_10", - "description": "Riverbed sediment samples were collected from an area with a lot of surrounding vegetation in a hyporheic zone (subsurface groundwater - surface water mixing zone)", - "has_input": [ - "nmdc:bsm-11-kwfbp795" - ], - "has_output": [ - "jgi:574fde647ded5e3df1ee1406" - ], - "part_of": [ - "nmdc:sty-11-aygzgv51" - ], - "add_date": "2016-01-11", - "mod_date": "2021-06-15", - "ncbi_project_name": "Riverbed sediment microbial communities from areas with vegetation nearby in Columbia River, Washington, USA - GW-RW S1_0_10", - "omics_type": { - "has_raw_value": "Metagenome" - }, - "principal_investigator": { - "has_raw_value": "James Stegen" - }, - "processing_institution": "JGI", - "type": "nmdc:OmicsProcessing", - "gold_sequencing_project_identifiers": [ - "gold:Gp0127637" - ] - } - ], - "reaction_activity_set": [], - "read_qc_analysis_activity_set": [ - { - "_id": { - "$oid": "649b009d6bdd4fd20273c85f" - }, - "has_input": [ - "nmdc:320ac579913ecc4c218607b6b3b915b3" - ], - "part_of": [ - "nmdc:mga0sb9b30" - ], - "git_url": "https://github.com/microbiomedata/metaG/releases/tag/0.1", - "has_output": [ - "nmdc:805310f4b1e39a0cc9e5b5787576cb8b", - "nmdc:611e67df261e050860b1075c6a6a5ff5" - ], - "was_informed_by": "gold:Gp0127637", - "input_read_count": 24239336, - "output_read_bases": 2975652755, - "id": "nmdc:c6d11b07b7b33c0d906ce0d4a58a7ccf", - "execution_resource": "NERSC-Cori", - "input_read_bases": 3660139736, - "name": "Read QC Activity for nmdc:mga0sb9b30", - "output_read_count": 19917090, - "started_at_time": "2021-10-11T02:24:01Z", - "type": "nmdc:ReadQCAnalysisActivity", - "ended_at_time": "2021-10-11T03:11:56+00:00" - } - ], - "read_based_taxonomy_analysis_activity_set": [ - { - "_id": { - "$oid": "649b009bff710ae353f8cf23" - }, - "has_input": [ - "nmdc:805310f4b1e39a0cc9e5b5787576cb8b" - ], - "git_url": "https://github.com/microbiomedata/metaG/releases/tag/0.1", - "has_output": [ - "nmdc:9268e073dacb7f7cd5f9513393cb0b2a", - "nmdc:37dd1d73ad47979ee5284830d27df535", - "nmdc:43bffbfb830c6e3ccc140ec0dff1e773", - "nmdc:cb3bd5ca5088484cb4e580ad91d736b2", - "nmdc:f44a5d59785cdededea0fe4a6a429c30", - "nmdc:81a6efbd082e07bc2db174a88d64a272", - "nmdc:f63856a84bc9afb8954ccdb1803d5fde", - "nmdc:9a1826f66ee45187d627076d11dc491f", - "nmdc:67adb9cc2c75251f556a90b1a959ea72" - ], - "was_informed_by": "gold:Gp0127637", - "id": "nmdc:c6d11b07b7b33c0d906ce0d4a58a7ccf", - "execution_resource": "NERSC-Cori", - "name": "ReadBased Analysis Activity for nmdc:mga0sb9b30", - "started_at_time": "2021-10-11T02:24:01Z", - "type": "nmdc:ReadbasedAnalysis", - "ended_at_time": "2021-10-11T03:11:56+00:00" - } - ], - "study_set": [], - "field_research_site_set": [], - "collecting_biosamples_from_site_set": [], - "date_created": null, - "etl_software_version": null, - "pooling_set": [] - }, - { - "functional_annotation_agg": [], - "library_preparation_set": [], - "processed_sample_set": [], - "extraction_set": [], - "activity_set": [], - "biosample_set": [], - "data_object_set": [ - { - "description": "Raw sequencer read data", - "file_size_bytes": 1920284821, - "type": "nmdc:DataObject", - "id": "jgi:574fde837ded5e3df1ee141d", - "name": "10533.2.165322.TCATCAC-GGTGATG.fastq.gz" - }, - { - "name": "Gp0127638_Filtered Reads", - "description": "Filtered Reads for Gp0127638", - "data_object_type": "Filtered Sequencing Reads", - "url": "https://data.microbiomedata.org/data/nmdc:mga0hjgc20/qa/nmdc_mga0hjgc20_filtered.fastq.gz", - "md5_checksum": "56ba2416c050decd6c16c618c1e4a752", - "id": "nmdc:56ba2416c050decd6c16c618c1e4a752", - "file_size_bytes": 1649318115 - }, - { - "name": "Gp0127638_Filtered Stats", - "description": "Filtered Stats for Gp0127638", - "data_object_type": "QC Statistics", - "url": "https://data.microbiomedata.org/data/nmdc:mga0hjgc20/qa/nmdc_mga0hjgc20_filterStats.txt", - "md5_checksum": "5c9398042e9ff608befa78e86597bdf0", - "id": "nmdc:5c9398042e9ff608befa78e86597bdf0", - "file_size_bytes": 283 - }, - { - "name": "Gp0127638_Gottcha2 TSV report", - "description": "Gottcha2 TSV report for Gp0127638", - "url": "https://data.microbiomedata.org/data/nmdc:mga0hjgc20/ReadbasedAnalysis/nmdc_mga0hjgc20_gottcha2_report.tsv", - "md5_checksum": "dbbd6ca6777b71d1fac4aae2cd947deb", - "id": "nmdc:dbbd6ca6777b71d1fac4aae2cd947deb", - "file_size_bytes": 2025 - }, - { - "name": "Gp0127638_Gottcha2 full TSV report", - "description": "Gottcha2 full TSV report for Gp0127638", - "url": "https://data.microbiomedata.org/data/nmdc:mga0hjgc20/ReadbasedAnalysis/nmdc_mga0hjgc20_gottcha2_report_full.tsv", - "md5_checksum": "b6de56746a284f8226dd86817c8ae04e", - "id": "nmdc:b6de56746a284f8226dd86817c8ae04e", - "file_size_bytes": 655633 - }, - { - "name": "Gp0127638_Gottcha2 Krona HTML report", - "description": "Gottcha2 Krona HTML report for Gp0127638", - "data_object_type": "GOTTCHA2 Krona Plot", - "url": "https://data.microbiomedata.org/data/nmdc:mga0hjgc20/ReadbasedAnalysis/nmdc_mga0hjgc20_gottcha2_krona.html", - "md5_checksum": "d9572e708af9f0a06e98cfddfb298359", - "id": "nmdc:d9572e708af9f0a06e98cfddfb298359", - "file_size_bytes": 232133 - }, - { - "name": "Gp0127638_Centrifuge classification TSV report", - "description": "Centrifuge classification TSV report for Gp0127638", - "data_object_type": "Centrifuge Taxonomic Classification", - "url": "https://data.microbiomedata.org/data/nmdc:mga0hjgc20/ReadbasedAnalysis/nmdc_mga0hjgc20_centrifuge_classification.tsv", - "md5_checksum": "e9946f36795474182b7759d3d7532b57", - "id": "nmdc:e9946f36795474182b7759d3d7532b57", - "file_size_bytes": 1448205544 - }, - { - "name": "Gp0127638_Centrifuge TSV report", - "description": "Centrifuge TSV report for Gp0127638", - "data_object_type": "Centrifuge Classification Report", - "url": "https://data.microbiomedata.org/data/nmdc:mga0hjgc20/ReadbasedAnalysis/nmdc_mga0hjgc20_centrifuge_report.tsv", - "md5_checksum": "33ff1d85d17d763afc9e21e481cc10d2", - "id": "nmdc:33ff1d85d17d763afc9e21e481cc10d2", - "file_size_bytes": 253872 - }, - { - "name": "Gp0127638_Centrifuge Krona HTML report", - "description": "Centrifuge Krona HTML report for Gp0127638", - "data_object_type": "Centrifuge Krona Plot", - "url": "https://data.microbiomedata.org/data/nmdc:mga0hjgc20/ReadbasedAnalysis/nmdc_mga0hjgc20_centrifuge_krona.html", - "md5_checksum": "997a66f49a232750bd7132639f3387e7", - "id": "nmdc:997a66f49a232750bd7132639f3387e7", - "file_size_bytes": 2331772 - }, - { - "name": "Gp0127638_Kraken classification TSV report", - "description": "Kraken classification TSV report for Gp0127638", - "data_object_type": "Kraken2 Taxonomic Classification", - "url": "https://data.microbiomedata.org/data/nmdc:mga0hjgc20/ReadbasedAnalysis/nmdc_mga0hjgc20_kraken2_classification.tsv", - "md5_checksum": "d3f604a59babf001839d38a617b62931", - "id": "nmdc:d3f604a59babf001839d38a617b62931", - "file_size_bytes": 1157365410 - }, - { - "name": "Gp0127638_Kraken2 TSV report", - "description": "Kraken2 TSV report for Gp0127638", - "data_object_type": "Kraken2 Classification Report", - "url": "https://data.microbiomedata.org/data/nmdc:mga0hjgc20/ReadbasedAnalysis/nmdc_mga0hjgc20_kraken2_report.tsv", - "md5_checksum": "3abfaa434ee1449cbbb69985e48488b4", - "id": "nmdc:3abfaa434ee1449cbbb69985e48488b4", - "file_size_bytes": 621484 - }, - { - "name": "Gp0127638_Kraken2 Krona HTML report", - "description": "Kraken2 Krona HTML report for Gp0127638", - "data_object_type": "Kraken2 Krona Plot", - "url": "https://data.microbiomedata.org/data/nmdc:mga0hjgc20/ReadbasedAnalysis/nmdc_mga0hjgc20_kraken2_krona.html", - "md5_checksum": "70c2fc1a2c7c0032528ff91ad1576465", - "id": "nmdc:70c2fc1a2c7c0032528ff91ad1576465", - "file_size_bytes": 3896830 - }, - { - "name": "Gp0127638_Assembled contigs fasta", - "description": "Assembled contigs fasta for Gp0127638", - "data_object_type": "Assembly Contigs", - "url": "https://data.microbiomedata.org/data/nmdc:mga0hjgc20/assembly/nmdc_mga0hjgc20_contigs.fna", - "md5_checksum": "5122503797ac0ed9694a6f4feecab955", - "id": "nmdc:5122503797ac0ed9694a6f4feecab955", - "file_size_bytes": 84307064 - }, - { - "name": "Gp0127638_Assembled scaffold fasta", - "description": "Assembled scaffold fasta for Gp0127638", - "data_object_type": "Assembly Scaffolds", - "url": "https://data.microbiomedata.org/data/nmdc:mga0hjgc20/assembly/nmdc_mga0hjgc20_scaffolds.fna", - "md5_checksum": "d7ee4628101b11bc5fb67d961a4e1a0a", - "id": "nmdc:d7ee4628101b11bc5fb67d961a4e1a0a", - "file_size_bytes": 83796938 - }, - { - "name": "Gp0127638_Metagenome Contig Coverage Stats", - "description": "Metagenome Contig Coverage Stats for Gp0127638", - "url": "https://data.microbiomedata.org/data/nmdc:mga0hjgc20/assembly/nmdc_mga0hjgc20_covstats.txt", - "md5_checksum": "0944f2c0dd70a751117fb10d9a41fddc", - "id": "nmdc:0944f2c0dd70a751117fb10d9a41fddc", - "file_size_bytes": 13413799 - }, - { - "name": "Gp0127638_Assembled AGP file", - "description": "Assembled AGP file for Gp0127638", - "data_object_type": "Assembly AGP", - "url": "https://data.microbiomedata.org/data/nmdc:mga0hjgc20/assembly/nmdc_mga0hjgc20_assembly.agp", - "md5_checksum": "1917dcbbe1efcc2a57c511648a7f332e", - "id": "nmdc:1917dcbbe1efcc2a57c511648a7f332e", - "file_size_bytes": 12526116 - }, - { - "name": "Gp0127638_Metagenome Alignment BAM file", - "description": "Metagenome Alignment BAM file for Gp0127638", - "data_object_type": "Assembly Coverage BAM", - "url": "https://data.microbiomedata.org/data/nmdc:mga0hjgc20/assembly/nmdc_mga0hjgc20_pairedMapped_sorted.bam", - "md5_checksum": "6420476f7e93425a68aa00b8e09cd6e7", - "id": "nmdc:6420476f7e93425a68aa00b8e09cd6e7", - "file_size_bytes": 1810224630 - }, - { - "name": "Gp0127638_Protein FAA", - "description": "Protein FAA for Gp0127638", - "data_object_type": "Annotation Amino Acid FASTA", - "url": "https://data.microbiomedata.org/data/nmdc:mga0hjgc20/annotation/nmdc_mga0hjgc20_proteins.faa", - "md5_checksum": "f56690d136c4dafdc1eaa64a21fd9210", - "id": "nmdc:f56690d136c4dafdc1eaa64a21fd9210", - "file_size_bytes": 49236514 - }, - { - "name": "Gp0127638_Structural annotation GFF file", - "description": "Structural annotation GFF file for Gp0127638", - "data_object_type": "Structural Annotation GFF", - "url": "https://data.microbiomedata.org/data/nmdc:mga0hjgc20/annotation/nmdc_mga0hjgc20_structural_annotation.gff", - "md5_checksum": "8be4e8ac2d00bf1d5b4863c36dc3678c", - "id": "nmdc:8be4e8ac2d00bf1d5b4863c36dc3678c", - "file_size_bytes": 2519 - }, - { - "name": "Gp0127638_Functional annotation GFF file", - "description": "Functional annotation GFF file for Gp0127638", - "data_object_type": "Functional Annotation GFF", - "url": "https://data.microbiomedata.org/data/nmdc:mga0hjgc20/annotation/nmdc_mga0hjgc20_functional_annotation.gff", - "md5_checksum": "41453202313c56e06b0cc00b5ee6c375", - "id": "nmdc:41453202313c56e06b0cc00b5ee6c375", - "file_size_bytes": 56761027 - }, - { - "name": "Gp0127638_KO TSV file", - "description": "KO TSV file for Gp0127638", - "data_object_type": "Annotation KEGG Orthology", - "url": "https://data.microbiomedata.org/data/nmdc:mga0hjgc20/annotation/nmdc_mga0hjgc20_ko.tsv", - "md5_checksum": "e06bd74dce2e5b839b35ac1012d93ba4", - "id": "nmdc:e06bd74dce2e5b839b35ac1012d93ba4", - "file_size_bytes": 6728487 - }, - { - "name": "Gp0127638_EC TSV file", - "description": "EC TSV file for Gp0127638", - "data_object_type": "Annotation Enzyme Commission", - "url": "https://data.microbiomedata.org/data/nmdc:mga0hjgc20/annotation/nmdc_mga0hjgc20_ec.tsv", - "md5_checksum": "f2786d1f8a17bedd0104b01ec06ebfce", - "id": "nmdc:f2786d1f8a17bedd0104b01ec06ebfce", - "file_size_bytes": 4522678 - }, - { - "name": "Gp0127638_COG GFF file", - "description": "COG GFF file for Gp0127638", - "url": "https://data.microbiomedata.org/data/nmdc:mga0hjgc20/annotation/nmdc_mga0hjgc20_cog.gff", - "md5_checksum": "37cb3fb060da091a84f1baa7ef3743fc", - "id": "nmdc:37cb3fb060da091a84f1baa7ef3743fc", - "file_size_bytes": 33992392 - }, - { - "name": "Gp0127638_PFAM GFF file", - "description": "PFAM GFF file for Gp0127638", - "url": "https://data.microbiomedata.org/data/nmdc:mga0hjgc20/annotation/nmdc_mga0hjgc20_pfam.gff", - "md5_checksum": "34680897818585cefbef6e69109e7de4", - "id": "nmdc:34680897818585cefbef6e69109e7de4", - "file_size_bytes": 25203872 - }, - { - "name": "Gp0127638_TigrFam GFF file", - "description": "TigrFam GFF file for Gp0127638", - "url": "https://data.microbiomedata.org/data/nmdc:mga0hjgc20/annotation/nmdc_mga0hjgc20_tigrfam.gff", - "md5_checksum": "a00404838fbe9f846a704e1dbb14f2b2", - "id": "nmdc:a00404838fbe9f846a704e1dbb14f2b2", - "file_size_bytes": 2852587 - }, - { - "name": "Gp0127638_SMART GFF file", - "description": "SMART GFF file for Gp0127638", - "url": "https://data.microbiomedata.org/data/nmdc:mga0hjgc20/annotation/nmdc_mga0hjgc20_smart.gff", - "md5_checksum": "700dd121a0ac41e3fa8077d7330adae7", - "id": "nmdc:700dd121a0ac41e3fa8077d7330adae7", - "file_size_bytes": 7723231 - }, - { - "name": "Gp0127638_SuperFam GFF file", - "description": "SuperFam GFF file for Gp0127638", - "url": "https://data.microbiomedata.org/data/nmdc:mga0hjgc20/annotation/nmdc_mga0hjgc20_supfam.gff", - "md5_checksum": "e429651ae53a18b07d99880d09a19b26", - "id": "nmdc:e429651ae53a18b07d99880d09a19b26", - "file_size_bytes": 42064836 - }, - { - "name": "Gp0127638_Cath FunFam GFF file", - "description": "Cath FunFam GFF file for Gp0127638", - "url": "https://data.microbiomedata.org/data/nmdc:mga0hjgc20/annotation/nmdc_mga0hjgc20_cath_funfam.gff", - "md5_checksum": "b22aab3cc1b9231102b23c31b418eff4", - "id": "nmdc:b22aab3cc1b9231102b23c31b418eff4", - "file_size_bytes": 32005228 - }, - { - "name": "Gp0127638_KO_EC GFF file", - "description": "KO_EC GFF file for Gp0127638", - "url": "https://data.microbiomedata.org/data/nmdc:mga0hjgc20/annotation/nmdc_mga0hjgc20_ko_ec.gff", - "md5_checksum": "ebb5a6a7ad1f14fd8cf2178ec59969ef", - "id": "nmdc:ebb5a6a7ad1f14fd8cf2178ec59969ef", - "file_size_bytes": 21405596 - }, - { - "name": "Gp0127638_metabat2 bin checkm quality assessment result", - "description": "metabat2 bin checkm quality assessment result for Gp0127638", - "data_object_type": "CheckM Statistics", - "url": "https://data.microbiomedata.org/data/nmdc:mga0hjgc20/MAGs/nmdc_mga0hjgc20_checkm_qa.out", - "md5_checksum": "dcdd7e33e92d3658fe68056f21b57f5d", - "id": "nmdc:dcdd7e33e92d3658fe68056f21b57f5d", - "file_size_bytes": 760 - }, - { - "name": "Gp0127638_high-quality and medium-quality bins", - "description": "high-quality and medium-quality bins for Gp0127638", - "data_object_type": "Metagenome Bins", - "url": "https://data.microbiomedata.org/data/nmdc:mga0hjgc20/MAGs/nmdc_mga0hjgc20_hqmq_bin.zip", - "md5_checksum": "8ca8e2250dc68643e937163323f2a826", - "id": "nmdc:8ca8e2250dc68643e937163323f2a826", - "file_size_bytes": 508443 - }, - { - "_id": { - "$oid": "649b003d1ae706d7b5b14e86" - }, - "description": "Assembled contigs fasta for gold:Gp0127638", - "url": "https://data.microbiomedata.org/data/1781_100340/assembly/assembly_contigs.fna", - "file_size_bytes": 83628276, - "type": "nmdc:DataObject", - "id": "nmdc:ed782cb1e889b9965707363c1324ee22", - "name": "assembly_contigs.fna", - "data_object_type": "Assembly Contigs" - }, - { - "_id": { - "$oid": "649b003d1ae706d7b5b14e89" - }, - "description": "Assembled scaffold fasta for gold:Gp0127638", - "url": "https://data.microbiomedata.org/data/1781_100340/assembly/assembly_scaffolds.fna", - "file_size_bytes": 83118450, - "type": "nmdc:DataObject", - "id": "nmdc:37adb1b2ce1b858809930aa12526e720", - "name": "assembly_scaffolds.fna", - "data_object_type": "Assembly Scaffolds" - }, - { - "_id": { - "$oid": "649b003d1ae706d7b5b14e8a" - }, - "description": "Metagenome Contig Coverage Stats for gold:Gp0127638", - "url": "https://data.microbiomedata.org/data/1781_100340/assembly/mapping_stats.txt", - "file_size_bytes": 12735011, - "type": "nmdc:DataObject", - "id": "nmdc:05e4b13c2533a4969139e6e11ae71984", - "name": "mapping_stats.txt", - "data_object_type": "Assembly Coverage Stats" - }, - { - "_id": { - "$oid": "649b003d1ae706d7b5b14e8d" - }, - "description": "Assembled AGP file for gold:Gp0127638", - "url": "https://data.microbiomedata.org/data/1781_100340/assembly/assembly.agp", - "file_size_bytes": 11167924, - "type": "nmdc:DataObject", - "id": "nmdc:d98e0c56d4ea8b29a62f6ba8cc058c72", - "name": "assembly.agp", - "data_object_type": "Assembly AGP" - }, - { - "_id": { - "$oid": "649b003d1ae706d7b5b14e8e" - }, - "description": "Metagenome Alignment BAM file for gold:Gp0127638", - "url": "https://data.microbiomedata.org/data/1781_100340/assembly/pairedMapped_sorted.bam", - "file_size_bytes": 1787924636, - "type": "nmdc:DataObject", - "id": "nmdc:0aa8f1c4c591a4080152f9712431f85b", - "name": "pairedMapped_sorted.bam", - "data_object_type": "Assembly Coverage BAM" - }, - { - "_id": { - "$oid": "649b003d1ae706d7b5b15b6b" - }, - "id": "nmdc:3114c2a7faf5fc63ff1e8be9c15ae9ac", - "name": "1781_100340.krona.html", - "description": "Gold:Gp0127638 KRONA plot HTML file", - "url": "https://data.microbiomedata.org/1781_100340/ReadbasedAnalysis/centrifuge/1781_100340.krona.html", - "file_size_bytes": 3385, - "data_object_type": "Centrifuge Krona Plot", - "type": "nmdc:DataObject" - }, - { - "_id": { - "$oid": "649b003d1ae706d7b5b15b6e" - }, - "id": "nmdc:73e4e17fc849b239ced558102cb107de", - "name": "1781_100340.json", - "description": "Gold:Gp0127638 ReadbasedAnalysis result JSON file", - "url": "https://data.microbiomedata.org/1781_100340/ReadbasedAnalysis/1781_100340.json", - "file_size_bytes": 3385, - "type": "nmdc:DataObject" - }, - { - "_id": { - "$oid": "649b003f1ae706d7b5b16602" - }, - "id": "nmdc:1ed134939eb54d78ba95134f8b11abf0", - "name": "bins.tooShort.fa", - "description": "tooShort (< 3kb) filtered contigs fasta file by metaBat2 for gold:Gp0127638", - "file_size_bytes": 69018209, - "url": "https://data.microbiomedata.org/data/1781_100340/img_MAGs/bins.tooShort.fa", - "type": "nmdc:DataObject" - }, - { - "_id": { - "$oid": "649b003f1ae706d7b5b16604" - }, - "id": "nmdc:4bf67b94461ec33e4d3bf4f28442c6b5", - "name": "bins.unbinned.fa", - "description": "unbinned fasta file from metabat2 for gold:Gp0127638", - "file_size_bytes": 11722895, - "url": "https://data.microbiomedata.org/data/1781_100340/img_MAGs/bins.unbinned.fa", - "type": "nmdc:DataObject" - }, - { - "_id": { - "$oid": "649b003f1ae706d7b5b16605" - }, - "id": "nmdc:28848628e266e83cfb2e9af8e90ae9c0", - "name": "checkm_qa.out", - "description": "metabat2 bin checkm quality assessment result for gold:Gp0127638", - "file_size_bytes": 760, - "url": "https://data.microbiomedata.org/data/1781_100340/img_MAGs/checkm_qa.out", - "type": "nmdc:DataObject", - "data_object_type": "CheckM Statistics" - }, - { - "_id": { - "$oid": "649b003f1ae706d7b5b16606" - }, - "id": "nmdc:143af6193b463b4b8e685e999fc0d756", - "name": "gtdbtk.ar122.summary.tsv", - "description": "gtdbtk archaea assignment result summary table for gold:Gp0127638", - "file_size_bytes": 1003, - "url": "https://data.microbiomedata.org/data/1781_100340/img_MAGs/gtdbtk_output/classify/gtdbtk.ar122.summary.tsv", - "type": "nmdc:DataObject" - }, - { - "_id": { - "$oid": "649b003f1ae706d7b5b16609" - }, - "id": "nmdc:e4e5dd8c3aaba918e6e98db827fc9d28", - "name": "gold:Gp0127638.bins.1.fa", - "description": "metabat2 binned contig file for gold:Gp0127638", - "file_size_bytes": 1471149, - "url": "https://data.microbiomedata.org/data/1781_100340/img_MAGs/metabat-bins/bins.1.fa", - "type": "nmdc:DataObject", - "data_object_type": "Metagenome Bins" - }, - { - "_id": { - "$oid": "649b00401ae706d7b5b16d07" - }, - "description": "EC TSV File for gold:Gp0127638", - "url": "https://data.microbiomedata.org/1781_100340/img_annotation/Ga0482232_ec.tsv", - "md5_checksum": "3bd360103e4e8fc8f89c1df345367776", - "file_size_bytes": 3385, - "id": "nmdc:3bd360103e4e8fc8f89c1df345367776", - "name": "gold:Gp0127638_EC TSV File", - "data_object_type": "Annotation Enzyme Commission", - "type": "nmdc:DataObject" - }, - { - "_id": { - "$oid": "649b00401ae706d7b5b16d08" - }, - "description": "KO TSV File for gold:Gp0127638", - "url": "https://data.microbiomedata.org/1781_100340/img_annotation/Ga0482232_ko.tsv", - "md5_checksum": "1aba5135d8cddc36da3cd37579be190b", - "file_size_bytes": 3385, - "id": "nmdc:1aba5135d8cddc36da3cd37579be190b", - "name": "gold:Gp0127638_KO TSV File", - "data_object_type": "Annotation KEGG Orthology", - "type": "nmdc:DataObject" - }, - { - "_id": { - "$oid": "649b00401ae706d7b5b16d0a" - }, - "description": "Functional annotation GFF file for gold:Gp0127638", - "url": "https://data.microbiomedata.org/1781_100340/img_annotation/Ga0482232_functional_annotation.gff", - "md5_checksum": "3da4d2f1c2db68033fa2264f4db7f459", - "file_size_bytes": 3385, - "id": "nmdc:3da4d2f1c2db68033fa2264f4db7f459", - "name": "gold:Gp0127638_Functional annotation GFF file", - "data_object_type": "Functional Annotation GFF", - "type": "nmdc:DataObject" - }, - { - "_id": { - "$oid": "649b00401ae706d7b5b16d0b" - }, - "description": "Structural annotation GFF file for gold:Gp0127638", - "url": "https://data.microbiomedata.org/1781_100340/img_annotation/Ga0482232_structural_annotation.gff", - "md5_checksum": "2ca3e1a0ba8007e86dedbec47e85adba", - "file_size_bytes": 3385, - "id": "nmdc:2ca3e1a0ba8007e86dedbec47e85adba", - "name": "gold:Gp0127638_Structural annotation GFF file", - "data_object_type": "Structural Annotation GFF", - "type": "nmdc:DataObject" - }, - { - "_id": { - "$oid": "649b00401ae706d7b5b16d0c" - }, - "description": "Protein FAA for gold:Gp0127638", - "url": "https://data.microbiomedata.org/1781_100340/img_annotation/Ga0482232_proteins.faa", - "md5_checksum": "17993d4fcfa7be4fd4488804d23b67c6", - "file_size_bytes": 3385, - "id": "nmdc:17993d4fcfa7be4fd4488804d23b67c6", - "name": "gold:Gp0127638_Protein FAA", - "data_object_type": "Annotation Amino Acid FASTA", - "type": "nmdc:DataObject" - } - ], - "dissolving_activity_set": [], - "functional_annotation_set": [], - "genome_feature_set": [], - "mags_activity_set": [ - { - "_id": { - "$oid": "649b0052ec087f6bbab3471d" - }, - "has_input": [ - "nmdc:5122503797ac0ed9694a6f4feecab955", - "nmdc:6420476f7e93425a68aa00b8e09cd6e7", - "nmdc:41453202313c56e06b0cc00b5ee6c375" - ], - "too_short_contig_num": 162130, - "part_of": [ - "nmdc:mga0hjgc20" - ], - "binned_contig_num": 189, - "git_url": "https://github.com/microbiomedata/metaG/releases/tag/0.1", - "has_output": [ - "nmdc:dcdd7e33e92d3658fe68056f21b57f5d", - "nmdc:8ca8e2250dc68643e937163323f2a826" - ], - "was_informed_by": "gold:Gp0127638", - "input_contig_num": 169697, - "id": "nmdc:668844f5ea6cefdcb893db0bb6afc92a", - "execution_resource": "NERSC-Cori", - "name": "MAGs Analysis Activity for nmdc:mga0hjgc20", - "mags_list": [ - { - "number_of_contig": 189, - "completeness": 73.5, - "bin_name": "bins.1", - "gene_count": 2020, - "bin_quality": "MQ", - "gtdbtk_species": "", - "gtdbtk_order": "Nitrososphaerales", - "num_16s": 0, - "gtdbtk_family": "Nitrososphaeraceae", - "gtdbtk_domain": "Archaea", - "contamination": 0.97, - "gtdbtk_class": "Nitrososphaeria", - "gtdbtk_phylum": "Crenarchaeota", - "num_5s": 0, - "num_23s": 0, - "gtdbtk_genus": "", - "num_t_rna": 37 - } - ], - "unbinned_contig_num": 7378, - "started_at_time": "2021-12-01T21:31:29Z", - "type": "nmdc:MAGsAnalysisActivity", - "ended_at_time": "2021-12-02T20:49:51+00:00" - } - ], - "material_sample_set": [], - "material_sampling_activity_set": [], - "metabolomics_analysis_activity_set": [], - "metagenome_annotation_activity_set": [ - { - "_id": { - "$oid": "649b005bbf2caae0415ef9bc" - }, - "has_input": [ - "nmdc:5122503797ac0ed9694a6f4feecab955" - ], - "part_of": [ - "nmdc:mga0hjgc20" - ], - "git_url": "https://github.com/microbiomedata/metaG/releases/tag/0.1", - "has_output": [ - "nmdc:f56690d136c4dafdc1eaa64a21fd9210", - "nmdc:8be4e8ac2d00bf1d5b4863c36dc3678c", - "nmdc:41453202313c56e06b0cc00b5ee6c375", - "nmdc:e06bd74dce2e5b839b35ac1012d93ba4", - "nmdc:f2786d1f8a17bedd0104b01ec06ebfce", - "nmdc:37cb3fb060da091a84f1baa7ef3743fc", - "nmdc:34680897818585cefbef6e69109e7de4", - "nmdc:a00404838fbe9f846a704e1dbb14f2b2", - "nmdc:700dd121a0ac41e3fa8077d7330adae7", - "nmdc:e429651ae53a18b07d99880d09a19b26", - "nmdc:b22aab3cc1b9231102b23c31b418eff4", - "nmdc:ebb5a6a7ad1f14fd8cf2178ec59969ef" - ], - "was_informed_by": "gold:Gp0127638", - "id": "nmdc:668844f5ea6cefdcb893db0bb6afc92a", - "execution_resource": "NERSC-Cori", - "name": "Annotation Activity for nmdc:mga0hjgc20", - "started_at_time": "2021-12-01T21:31:29Z", - "type": "nmdc:MetagenomeAnnotationActivity", - "ended_at_time": "2021-12-02T20:49:51+00:00" - } - ], - "metagenome_assembly_set": [ - { - "_id": { - "$oid": "649b005f2ca5ee4adb139fac" - }, - "has_input": [ - "nmdc:56ba2416c050decd6c16c618c1e4a752" - ], - "part_of": [ - "nmdc:mga0hjgc20" - ], - "ctg_logsum": 141543, - "scaf_logsum": 141966, - "gap_pct": 0.00109, - "git_url": "https://github.com/microbiomedata/metaG/releases/tag/0.1", - "has_output": [ - "nmdc:5122503797ac0ed9694a6f4feecab955", - "nmdc:d7ee4628101b11bc5fb67d961a4e1a0a", - "nmdc:0944f2c0dd70a751117fb10d9a41fddc", - "nmdc:1917dcbbe1efcc2a57c511648a7f332e", - "nmdc:6420476f7e93425a68aa00b8e09cd6e7" - ], - "asm_score": 6.89, - "was_informed_by": "gold:Gp0127638", - "ctg_powsum": 15753, - "scaf_max": 48487, - "id": "nmdc:668844f5ea6cefdcb893db0bb6afc92a", - "scaf_powsum": 15801, - "execution_resource": "NERSC-Cori", - "contigs": 169698, - "name": "Assembly Activity for nmdc:mga0hjgc20", - "ctg_max": 48487, - "gc_std": 0.08917, - "gc_avg": 0.63213, - "contig_bp": 77783768, - "started_at_time": "2021-12-01T21:31:29Z", - "scaf_bp": 77784618, - "type": "nmdc:MetagenomeAssembly", - "scaffolds": 169622, - "ended_at_time": "2021-12-02T20:49:51+00:00", - "ctg_l50": 433, - "ctg_l90": 289, - "ctg_n50": 51455, - "ctg_n90": 144304, - "scaf_l50": 433, - "scaf_l90": 289, - "scaf_n50": 51437, - "scaf_n90": 144234 - } - ], - "metagenome_sequencing_activity_set": [], - "metaproteomics_analysis_activity_set": [], - "metatranscriptome_activity_set": [], - "nom_analysis_activity_set": [], - "omics_processing_set": [ - { - "_id": { - "$oid": "649b009773e8249959349b49" - }, - "id": "nmdc:omprc-11-tgxmb243", - "name": "Riverbed sediment microbial communities from areas with no vegetation in Columbia River, Washington, USA - GW-RW N2_40_50", - "description": "Riverbed sediment samples were collected from an area with no vegetation in a hyporheic zone (subsurface groundwater - surface water mixing zone)", - "has_input": [ - "nmdc:bsm-11-pq3zmp51" - ], - "has_output": [ - "jgi:574fde837ded5e3df1ee141d" - ], - "part_of": [ - "nmdc:sty-11-aygzgv51" - ], - "add_date": "2016-01-11", - "mod_date": "2021-06-15", - "ncbi_project_name": "Riverbed sediment microbial communities from areas with no vegetation in Columbia River, Washington, USA - GW-RW N2_40_50", - "omics_type": { - "has_raw_value": "Metagenome" - }, - "principal_investigator": { - "has_raw_value": "James Stegen" - }, - "processing_institution": "JGI", - "type": "nmdc:OmicsProcessing", - "gold_sequencing_project_identifiers": [ - "gold:Gp0127638" - ] - } - ], - "reaction_activity_set": [], - "read_qc_analysis_activity_set": [ - { - "_id": { - "$oid": "649b009d6bdd4fd20273c873" - }, - "has_input": [ - "nmdc:56b2d94789953adf1b4ed35f09f0edd4" - ], - "part_of": [ - "nmdc:mga0hjgc20" - ], - "git_url": "https://github.com/microbiomedata/metaG/releases/tag/0.1", - "has_output": [ - "nmdc:56ba2416c050decd6c16c618c1e4a752", - "nmdc:5c9398042e9ff608befa78e86597bdf0" - ], - "was_informed_by": "gold:Gp0127638", - "input_read_count": 21721428, - "output_read_bases": 2949961420, - "id": "nmdc:668844f5ea6cefdcb893db0bb6afc92a", - "execution_resource": "NERSC-Cori", - "input_read_bases": 3279935628, - "name": "Read QC Activity for nmdc:mga0hjgc20", - "output_read_count": 19723416, - "started_at_time": "2021-12-01T21:31:29Z", - "type": "nmdc:ReadQCAnalysisActivity", - "ended_at_time": "2021-12-02T20:49:51+00:00" - } - ], - "read_based_taxonomy_analysis_activity_set": [ - { - "_id": { - "$oid": "649b009bff710ae353f8cf35" - }, - "has_input": [ - "nmdc:56ba2416c050decd6c16c618c1e4a752" - ], - "git_url": "https://github.com/microbiomedata/metaG/releases/tag/0.1", - "has_output": [ - "nmdc:dbbd6ca6777b71d1fac4aae2cd947deb", - "nmdc:b6de56746a284f8226dd86817c8ae04e", - "nmdc:d9572e708af9f0a06e98cfddfb298359", - "nmdc:e9946f36795474182b7759d3d7532b57", - "nmdc:33ff1d85d17d763afc9e21e481cc10d2", - "nmdc:997a66f49a232750bd7132639f3387e7", - "nmdc:d3f604a59babf001839d38a617b62931", - "nmdc:3abfaa434ee1449cbbb69985e48488b4", - "nmdc:70c2fc1a2c7c0032528ff91ad1576465" - ], - "was_informed_by": "gold:Gp0127638", - "id": "nmdc:668844f5ea6cefdcb893db0bb6afc92a", - "execution_resource": "NERSC-Cori", - "name": "ReadBased Analysis Activity for nmdc:mga0hjgc20", - "started_at_time": "2021-12-01T21:31:29Z", - "type": "nmdc:ReadbasedAnalysis", - "ended_at_time": "2021-12-02T20:49:51+00:00" - } - ], - "study_set": [], - "field_research_site_set": [], - "collecting_biosamples_from_site_set": [], - "date_created": null, - "etl_software_version": null, - "pooling_set": [] - }, - { - "functional_annotation_agg": [], - "library_preparation_set": [], - "processed_sample_set": [], - "extraction_set": [], - "activity_set": [], - "biosample_set": [], - "data_object_set": [ - { - "description": "Raw sequencer read data", - "file_size_bytes": 3408915289, - "type": "nmdc:DataObject", - "id": "jgi:55d7402a0d8785342fcf7e3b", - "name": "9422.8.132674.CGTACG.fastq.gz" - }, - { - "name": "Gp0115670_Filtered Reads", - "description": "Filtered Reads for Gp0115670", - "data_object_type": "Filtered Sequencing Reads", - "url": "https://data.microbiomedata.org/data/nmdc:mga0d7pj22/qa/nmdc_mga0d7pj22_filtered.fastq.gz", - "md5_checksum": "7f6b353300583c60d2d668880b4134cd", - "id": "nmdc:7f6b353300583c60d2d668880b4134cd", - "file_size_bytes": 3012174785 - }, - { - "name": "Gp0115670_Filtered Stats", - "description": "Filtered Stats for Gp0115670", - "data_object_type": "QC Statistics", - "url": "https://data.microbiomedata.org/data/nmdc:mga0d7pj22/qa/nmdc_mga0d7pj22_filterStats.txt", - "md5_checksum": "a4f65d101293fa4345cd865f86597464", - "id": "nmdc:a4f65d101293fa4345cd865f86597464", - "file_size_bytes": 291 - }, - { - "name": "Gp0115670_Gottcha2 TSV report", - "description": "Gottcha2 TSV report for Gp0115670", - "url": "https://data.microbiomedata.org/data/nmdc:mga0d7pj22/ReadbasedAnalysis/nmdc_mga0d7pj22_gottcha2_report.tsv", - "md5_checksum": "e316502f9e7a78c9db3996ef832aa9d7", - "id": "nmdc:e316502f9e7a78c9db3996ef832aa9d7", - "file_size_bytes": 13758 - }, - { - "name": "Gp0115670_Gottcha2 full TSV report", - "description": "Gottcha2 full TSV report for Gp0115670", - "url": "https://data.microbiomedata.org/data/nmdc:mga0d7pj22/ReadbasedAnalysis/nmdc_mga0d7pj22_gottcha2_report_full.tsv", - "md5_checksum": "1ac2be77491e7d425da1d62f69f1508d", - "id": "nmdc:1ac2be77491e7d425da1d62f69f1508d", - "file_size_bytes": 1116084 - }, - { - "name": "Gp0115670_Gottcha2 Krona HTML report", - "description": "Gottcha2 Krona HTML report for Gp0115670", - "data_object_type": "GOTTCHA2 Krona Plot", - "url": "https://data.microbiomedata.org/data/nmdc:mga0d7pj22/ReadbasedAnalysis/nmdc_mga0d7pj22_gottcha2_krona.html", - "md5_checksum": "de5b15fa9d3bdbc3abcc2475ee351323", - "id": "nmdc:de5b15fa9d3bdbc3abcc2475ee351323", - "file_size_bytes": 268542 - }, - { - "name": "Gp0115670_Centrifuge classification TSV report", - "description": "Centrifuge classification TSV report for Gp0115670", - "data_object_type": "Centrifuge Taxonomic Classification", - "url": "https://data.microbiomedata.org/data/nmdc:mga0d7pj22/ReadbasedAnalysis/nmdc_mga0d7pj22_centrifuge_classification.tsv", - "md5_checksum": "a9bbb74833404a2bf3bbd05e83a7a0ed", - "id": "nmdc:a9bbb74833404a2bf3bbd05e83a7a0ed", - "file_size_bytes": 2458475116 - }, - { - "name": "Gp0115670_Centrifuge TSV report", - "description": "Centrifuge TSV report for Gp0115670", - "data_object_type": "Centrifuge Classification Report", - "url": "https://data.microbiomedata.org/data/nmdc:mga0d7pj22/ReadbasedAnalysis/nmdc_mga0d7pj22_centrifuge_report.tsv", - "md5_checksum": "c065784bed2b2495d512af93d05967de", - "id": "nmdc:c065784bed2b2495d512af93d05967de", - "file_size_bytes": 261692 - }, - { - "name": "Gp0115670_Centrifuge Krona HTML report", - "description": "Centrifuge Krona HTML report for Gp0115670", - "data_object_type": "Centrifuge Krona Plot", - "url": "https://data.microbiomedata.org/data/nmdc:mga0d7pj22/ReadbasedAnalysis/nmdc_mga0d7pj22_centrifuge_krona.html", - "md5_checksum": "a34dbcbdebae0861e41c09e7b9a5d9f0", - "id": "nmdc:a34dbcbdebae0861e41c09e7b9a5d9f0", - "file_size_bytes": 2343355 - }, - { - "name": "Gp0115670_Kraken classification TSV report", - "description": "Kraken classification TSV report for Gp0115670", - "data_object_type": "Kraken2 Taxonomic Classification", - "url": "https://data.microbiomedata.org/data/nmdc:mga0d7pj22/ReadbasedAnalysis/nmdc_mga0d7pj22_kraken2_classification.tsv", - "md5_checksum": "b2122f5a910a1d4ae8a62956d1cd731c", - "id": "nmdc:b2122f5a910a1d4ae8a62956d1cd731c", - "file_size_bytes": 2019980511 - }, - { - "name": "Gp0115670_Kraken2 TSV report", - "description": "Kraken2 TSV report for Gp0115670", - "data_object_type": "Kraken2 Classification Report", - "url": "https://data.microbiomedata.org/data/nmdc:mga0d7pj22/ReadbasedAnalysis/nmdc_mga0d7pj22_kraken2_report.tsv", - "md5_checksum": "8a26d8496a70f4777be0e1237092e44c", - "id": "nmdc:8a26d8496a70f4777be0e1237092e44c", - "file_size_bytes": 694029 - }, - { - "name": "Gp0115670_Kraken2 Krona HTML report", - "description": "Kraken2 Krona HTML report for Gp0115670", - "data_object_type": "Kraken2 Krona Plot", - "url": "https://data.microbiomedata.org/data/nmdc:mga0d7pj22/ReadbasedAnalysis/nmdc_mga0d7pj22_kraken2_krona.html", - "md5_checksum": "694b83f0b6f599948d4248dd48dd9ba9", - "id": "nmdc:694b83f0b6f599948d4248dd48dd9ba9", - "file_size_bytes": 4190653 - }, - { - "name": "Gp0115670_Assembled contigs fasta", - "description": "Assembled contigs fasta for Gp0115670", - "data_object_type": "Assembly Contigs", - "url": "https://data.microbiomedata.org/data/nmdc:mga0d7pj22/assembly/nmdc_mga0d7pj22_contigs.fna", - "md5_checksum": "975cdb0a18df949be4efb80d1dc4ef0b", - "id": "nmdc:975cdb0a18df949be4efb80d1dc4ef0b", - "file_size_bytes": 85578260 - }, - { - "name": "Gp0115670_Assembled scaffold fasta", - "description": "Assembled scaffold fasta for Gp0115670", - "data_object_type": "Assembly Scaffolds", - "url": "https://data.microbiomedata.org/data/nmdc:mga0d7pj22/assembly/nmdc_mga0d7pj22_scaffolds.fna", - "md5_checksum": "1dfaed4da055c5fd4226abe08bd91db9", - "id": "nmdc:1dfaed4da055c5fd4226abe08bd91db9", - "file_size_bytes": 85115954 - }, - { - "name": "Gp0115670_Metagenome Contig Coverage Stats", - "description": "Metagenome Contig Coverage Stats for Gp0115670", - "url": "https://data.microbiomedata.org/data/nmdc:mga0d7pj22/assembly/nmdc_mga0d7pj22_covstats.txt", - "md5_checksum": "8a749340eefc40901a22a0ef603bc803", - "id": "nmdc:8a749340eefc40901a22a0ef603bc803", - "file_size_bytes": 12068883 - }, - { - "name": "Gp0115670_Assembled AGP file", - "description": "Assembled AGP file for Gp0115670", - "data_object_type": "Assembly AGP", - "url": "https://data.microbiomedata.org/data/nmdc:mga0d7pj22/assembly/nmdc_mga0d7pj22_assembly.agp", - "md5_checksum": "ad027e4c3ca67907154c03feeebbd97b", - "id": "nmdc:ad027e4c3ca67907154c03feeebbd97b", - "file_size_bytes": 11264235 - }, - { - "name": "Gp0115670_Metagenome Alignment BAM file", - "description": "Metagenome Alignment BAM file for Gp0115670", - "data_object_type": "Assembly Coverage BAM", - "url": "https://data.microbiomedata.org/data/nmdc:mga0d7pj22/assembly/nmdc_mga0d7pj22_pairedMapped_sorted.bam", - "md5_checksum": "c4f2407273babd894282d4d0f20be5d1", - "id": "nmdc:c4f2407273babd894282d4d0f20be5d1", - "file_size_bytes": 3245960211 - }, - { - "name": "Gp0115670_Protein FAA", - "description": "Protein FAA for Gp0115670", - "data_object_type": "Annotation Amino Acid FASTA", - "url": "https://data.microbiomedata.org/data/nmdc:mga0d7pj22/annotation/nmdc_mga0d7pj22_proteins.faa", - "md5_checksum": "21230aff7bb5b266fb544905f9ac5ce2", - "id": "nmdc:21230aff7bb5b266fb544905f9ac5ce2", - "file_size_bytes": 46061226 - }, - { - "name": "Gp0115670_Structural annotation GFF file", - "description": "Structural annotation GFF file for Gp0115670", - "data_object_type": "Structural Annotation GFF", - "url": "https://data.microbiomedata.org/data/nmdc:mga0d7pj22/annotation/nmdc_mga0d7pj22_structural_annotation.gff", - "md5_checksum": "91c5cc265ef61ab83111a5bc9462e8b2", - "id": "nmdc:91c5cc265ef61ab83111a5bc9462e8b2", - "file_size_bytes": 2769 - }, - { - "name": "Gp0115670_Functional annotation GFF file", - "description": "Functional annotation GFF file for Gp0115670", - "data_object_type": "Functional Annotation GFF", - "url": "https://data.microbiomedata.org/data/nmdc:mga0d7pj22/annotation/nmdc_mga0d7pj22_functional_annotation.gff", - "md5_checksum": "0bc4d8b8ef11724c3d7e728b0e8e0ea5", - "id": "nmdc:0bc4d8b8ef11724c3d7e728b0e8e0ea5", - "file_size_bytes": 50449176 - }, - { - "name": "Gp0115670_KO TSV file", - "description": "KO TSV file for Gp0115670", - "data_object_type": "Annotation KEGG Orthology", - "url": "https://data.microbiomedata.org/data/nmdc:mga0d7pj22/annotation/nmdc_mga0d7pj22_ko.tsv", - "md5_checksum": "811910b7d8c300befddd039e833b0453", - "id": "nmdc:811910b7d8c300befddd039e833b0453", - "file_size_bytes": 6653168 - }, - { - "name": "Gp0115670_EC TSV file", - "description": "EC TSV file for Gp0115670", - "data_object_type": "Annotation Enzyme Commission", - "url": "https://data.microbiomedata.org/data/nmdc:mga0d7pj22/annotation/nmdc_mga0d7pj22_ec.tsv", - "md5_checksum": "9ed55d9535d1592866a66e9d5cd936a2", - "id": "nmdc:9ed55d9535d1592866a66e9d5cd936a2", - "file_size_bytes": 4232890 - }, - { - "name": "Gp0115670_COG GFF file", - "description": "COG GFF file for Gp0115670", - "url": "https://data.microbiomedata.org/data/nmdc:mga0d7pj22/annotation/nmdc_mga0d7pj22_cog.gff", - "md5_checksum": "a127efaa423e6dd6d24d7ab67cc2124a", - "id": "nmdc:a127efaa423e6dd6d24d7ab67cc2124a", - "file_size_bytes": 28376544 - }, - { - "name": "Gp0115670_PFAM GFF file", - "description": "PFAM GFF file for Gp0115670", - "url": "https://data.microbiomedata.org/data/nmdc:mga0d7pj22/annotation/nmdc_mga0d7pj22_pfam.gff", - "md5_checksum": "4b56646de8c37278beaaf9797e4ddf2f", - "id": "nmdc:4b56646de8c37278beaaf9797e4ddf2f", - "file_size_bytes": 22850790 - }, - { - "name": "Gp0115670_TigrFam GFF file", - "description": "TigrFam GFF file for Gp0115670", - "url": "https://data.microbiomedata.org/data/nmdc:mga0d7pj22/annotation/nmdc_mga0d7pj22_tigrfam.gff", - "md5_checksum": "53a0873376e22fef62f2740f6afead21", - "id": "nmdc:53a0873376e22fef62f2740f6afead21", - "file_size_bytes": 3099434 - }, - { - "name": "Gp0115670_SMART GFF file", - "description": "SMART GFF file for Gp0115670", - "url": "https://data.microbiomedata.org/data/nmdc:mga0d7pj22/annotation/nmdc_mga0d7pj22_smart.gff", - "md5_checksum": "36748318682076112ba81283c8bc767a", - "id": "nmdc:36748318682076112ba81283c8bc767a", - "file_size_bytes": 6433811 - }, - { - "name": "Gp0115670_SuperFam GFF file", - "description": "SuperFam GFF file for Gp0115670", - "url": "https://data.microbiomedata.org/data/nmdc:mga0d7pj22/annotation/nmdc_mga0d7pj22_supfam.gff", - "md5_checksum": "5dd32385b351847f23ec4eac63eb70ff", - "id": "nmdc:5dd32385b351847f23ec4eac63eb70ff", - "file_size_bytes": 36427587 - }, - { - "name": "Gp0115670_Cath FunFam GFF file", - "description": "Cath FunFam GFF file for Gp0115670", - "url": "https://data.microbiomedata.org/data/nmdc:mga0d7pj22/annotation/nmdc_mga0d7pj22_cath_funfam.gff", - "md5_checksum": "95076052a4d5d57e1ed0c7699e4f5472", - "id": "nmdc:95076052a4d5d57e1ed0c7699e4f5472", - "file_size_bytes": 28909664 - }, - { - "name": "Gp0115670_KO_EC GFF file", - "description": "KO_EC GFF file for Gp0115670", - "url": "https://data.microbiomedata.org/data/nmdc:mga0d7pj22/annotation/nmdc_mga0d7pj22_ko_ec.gff", - "md5_checksum": "6ae89cc4b2fb7d09614c106d3358be27", - "id": "nmdc:6ae89cc4b2fb7d09614c106d3358be27", - "file_size_bytes": 21214802 - }, - { - "name": "gold:Gp0452679_metabat2 bin checkm quality assessment result", - "description": "metabat2 bin checkm quality assessment result for gold:Gp0452679", - "data_object_type": "CheckM Statistics", - "url": "https://data.microbiomedata.org/data/nmdc:mga0fsjy84/MAGs/nmdc_mga0fsjy84_checkm_qa.out", - "md5_checksum": "d41d8cd98f00b204e9800998ecf8427e", - "id": "nmdc:d41d8cd98f00b204e9800998ecf8427e", - "file_size_bytes": 0 - }, - { - "name": "Gp0115670_tooShort (< 3kb) filtered contigs fasta file by metaBat2", - "description": "tooShort (< 3kb) filtered contigs fasta file by metaBat2 for Gp0115670", - "url": "https://data.microbiomedata.org/data/nmdc:mga0d7pj22/MAGs/nmdc_mga0d7pj22_bins.tooShort.fa", - "md5_checksum": "fd5fe3f1faaaf3cd8a88d9bbfb016827", - "id": "nmdc:fd5fe3f1faaaf3cd8a88d9bbfb016827", - "file_size_bytes": 61828850 - }, - { - "name": "Gp0115670_unbinned fasta file from metabat2", - "description": "unbinned fasta file from metabat2 for Gp0115670", - "url": "https://data.microbiomedata.org/data/nmdc:mga0d7pj22/MAGs/nmdc_mga0d7pj22_bins.unbinned.fa", - "md5_checksum": "e27b736ee699ef2a8468a684811aaabd", - "id": "nmdc:e27b736ee699ef2a8468a684811aaabd", - "file_size_bytes": 15075820 - }, - { - "name": "Gp0115670_metabat2 bin checkm quality assessment result", - "description": "metabat2 bin checkm quality assessment result for Gp0115670", - "data_object_type": "CheckM Statistics", - "url": "https://data.microbiomedata.org/data/nmdc:mga0d7pj22/MAGs/nmdc_mga0d7pj22_checkm_qa.out", - "md5_checksum": "b0866d1a944aa27e34dc7a140aeaf336", - "id": "nmdc:b0866d1a944aa27e34dc7a140aeaf336", - "file_size_bytes": 1690 - }, - { - "name": "Gp0115670_high-quality and medium-quality bins", - "description": "high-quality and medium-quality bins for Gp0115670", - "data_object_type": "Metagenome Bins", - "url": "https://data.microbiomedata.org/data/nmdc:mga0d7pj22/MAGs/nmdc_mga0d7pj22_hqmq_bin.zip", - "md5_checksum": "0875e5107d03a40832d15e5cf80adbbc", - "id": "nmdc:0875e5107d03a40832d15e5cf80adbbc", - "file_size_bytes": 1944800 - }, - { - "name": "Gp0115670_metabat2 bins", - "description": "metabat2 bins for Gp0115670", - "url": "https://data.microbiomedata.org/data/nmdc:mga0d7pj22/MAGs/nmdc_mga0d7pj22_metabat_bin.zip", - "md5_checksum": "9b60c7c905d34e08427781eafbce9b12", - "id": "nmdc:9b60c7c905d34e08427781eafbce9b12", - "file_size_bytes": 658258 - }, - { - "_id": { - "$oid": "649b003c1ae706d7b5b14d7e" - }, - "description": "Assembled contigs fasta for gold:Gp0115670", - "url": "https://data.microbiomedata.org/data/1781_86102/assembly/assembly_contigs.fna", - "file_size_bytes": 84815235, - "type": "nmdc:DataObject", - "id": "nmdc:6c7beb91bbdcda84076fd786d59cab20", - "name": "assembly_contigs.fna", - "data_object_type": "Assembly Contigs" - }, - { - "_id": { - "$oid": "649b003c1ae706d7b5b14d80" - }, - "description": "Metagenome Contig Coverage Stats for gold:Gp0115670", - "url": "https://data.microbiomedata.org/data/1781_86102/assembly/mapping_stats.txt", - "file_size_bytes": 11305858, - "type": "nmdc:DataObject", - "id": "nmdc:5133fdf5c818f740f9e7ca276477f5db", - "name": "mapping_stats.txt", - "data_object_type": "Assembly Coverage Stats" - }, - { - "_id": { - "$oid": "649b003c1ae706d7b5b14d84" - }, - "description": "Assembled scaffold fasta for gold:Gp0115670", - "url": "https://data.microbiomedata.org/data/1781_86102/assembly/assembly_scaffolds.fna", - "file_size_bytes": 84354304, - "type": "nmdc:DataObject", - "id": "nmdc:cbd3fb5b5b99d86979e4c481bcd52d91", - "name": "assembly_scaffolds.fna", - "data_object_type": "Assembly Scaffolds" - }, - { - "_id": { - "$oid": "649b003c1ae706d7b5b14d85" - }, - "description": "Assembled AGP file for gold:Gp0115670", - "url": "https://data.microbiomedata.org/data/1781_86102/assembly/assembly.agp", - "file_size_bytes": 9735435, - "type": "nmdc:DataObject", - "id": "nmdc:5f92683e40ac788a1bebf1d6e02415ad", - "name": "assembly.agp", - "data_object_type": "Assembly AGP" - }, - { - "_id": { - "$oid": "649b003c1ae706d7b5b14d87" - }, - "description": "Metagenome Alignment BAM file for gold:Gp0115670", - "url": "https://data.microbiomedata.org/data/1781_86102/assembly/pairedMapped_sorted.bam", - "file_size_bytes": 3205338070, - "type": "nmdc:DataObject", - "id": "nmdc:bd2f5662f242a2be294876530634afaf", - "name": "pairedMapped_sorted.bam", - "data_object_type": "Assembly Coverage BAM" - }, - { - "_id": { - "$oid": "649b003d1ae706d7b5b159b6" - }, - "id": "nmdc:0a93ee11d25618c9207f4c109dd0859d", - "name": "1781_86102.krona.html", - "description": "Gold:Gp0115670 KRONA plot HTML file", - "url": "https://data.microbiomedata.org/1781_86102/ReadbasedAnalysis/centrifuge/1781_86102.krona.html", - "file_size_bytes": 3385, - "data_object_type": "Centrifuge Krona Plot", - "type": "nmdc:DataObject" - }, - { - "_id": { - "$oid": "649b003d1ae706d7b5b159bd" - }, - "id": "nmdc:43f9235ab417dd2dff189967b1a66ac7", - "name": "1781_86102.json", - "description": "Gold:Gp0115670 ReadbasedAnalysis result JSON file", - "url": "https://data.microbiomedata.org/1781_86102/ReadbasedAnalysis/1781_86102.json", - "file_size_bytes": 3385, - "type": "nmdc:DataObject" - }, - { - "_id": { - "$oid": "649b003f1ae706d7b5b16283" - }, - "id": "nmdc:fc533d14a7bb4e0dc462c4d95818e01e", - "name": "bins.tooShort.fa", - "description": "tooShort (< 3kb) filtered contigs fasta file by metaBat2 for gold:Gp0115670", - "file_size_bytes": 60065481, - "url": "https://data.microbiomedata.org/data/1781_86102/img_MAGs/bins.tooShort.fa", - "type": "nmdc:DataObject" - }, - { - "_id": { - "$oid": "649b003f1ae706d7b5b16284" - }, - "id": "nmdc:5b15dd3d951dc863beb945de63d7ec25", - "name": "bins.unbinned.fa", - "description": "unbinned fasta file from metabat2 for gold:Gp0115670", - "file_size_bytes": 17564113, - "url": "https://data.microbiomedata.org/data/1781_86102/img_MAGs/bins.unbinned.fa", - "type": "nmdc:DataObject" - }, - { - "_id": { - "$oid": "649b003f1ae706d7b5b16285" - }, - "id": "nmdc:53398d224211bc133f6dce929cae0d72", - "name": "checkm_qa.out", - "description": "metabat2 bin checkm quality assessment result for gold:Gp0115670", - "file_size_bytes": 2158, - "url": "https://data.microbiomedata.org/data/1781_86102/img_MAGs/checkm_qa.out", - "type": "nmdc:DataObject", - "data_object_type": "CheckM Statistics" - }, - { - "_id": { - "$oid": "649b003f1ae706d7b5b16286" - }, - "id": "nmdc:46666048e020a995a98c70df53ac4d9f", - "name": "gold:Gp0115670.bins.9.fa", - "description": "metabat2 binned contig file for gold:Gp0115670", - "file_size_bytes": 412159, - "url": "https://data.microbiomedata.org/data/1781_86102/img_MAGs/metabat-bins/bins.9.fa", - "type": "nmdc:DataObject", - "data_object_type": "Metagenome Bins" - }, - { - "_id": { - "$oid": "649b003f1ae706d7b5b16287" - }, - "id": "nmdc:667049b22edf3a81a717ccf63fa6021c", - "name": "gold:Gp0115670.bins.2.fa", - "description": "metabat2 binned contig file for gold:Gp0115670", - "file_size_bytes": 1551626, - "url": "https://data.microbiomedata.org/data/1781_86102/img_MAGs/metabat-bins/bins.2.fa", - "type": "nmdc:DataObject", - "data_object_type": "Metagenome Bins" - }, - { - "_id": { - "$oid": "649b003f1ae706d7b5b16288" - }, - "id": "nmdc:1a8477ea8d089e78cb03052c64a35249", - "name": "gold:Gp0115670.bins.5.fa", - "description": "metabat2 binned contig file for gold:Gp0115670", - "file_size_bytes": 298749, - "url": "https://data.microbiomedata.org/data/1781_86102/img_MAGs/metabat-bins/bins.5.fa", - "type": "nmdc:DataObject", - "data_object_type": "Metagenome Bins" - }, - { - "_id": { - "$oid": "649b003f1ae706d7b5b16289" - }, - "id": "nmdc:d0b11003b25eb91d3759fbc6b7477c37", - "name": "gold:Gp0115670.bins.3.fa", - "description": "metabat2 binned contig file for gold:Gp0115670", - "file_size_bytes": 389627, - "url": "https://data.microbiomedata.org/data/1781_86102/img_MAGs/metabat-bins/bins.3.fa", - "type": "nmdc:DataObject", - "data_object_type": "Metagenome Bins" - }, - { - "_id": { - "$oid": "649b003f1ae706d7b5b1628b" - }, - "id": "nmdc:9d6dace191d0fa9b660b12af98402fab", - "name": "gold:Gp0115670.bins.7.fa", - "description": "metabat2 binned contig file for gold:Gp0115670", - "file_size_bytes": 329051, - "url": "https://data.microbiomedata.org/data/1781_86102/img_MAGs/metabat-bins/bins.7.fa", - "type": "nmdc:DataObject", - "data_object_type": "Metagenome Bins" - }, - { - "_id": { - "$oid": "649b003f1ae706d7b5b1628d" - }, - "id": "nmdc:a15523e666fa33f919c66d5cba8bc0f5", - "name": "gold:Gp0115670.bins.4.fa", - "description": "metabat2 binned contig file for gold:Gp0115670", - "file_size_bytes": 570961, - "url": "https://data.microbiomedata.org/data/1781_86102/img_MAGs/metabat-bins/bins.4.fa", - "type": "nmdc:DataObject", - "data_object_type": "Metagenome Bins" - }, - { - "_id": { - "$oid": "649b003f1ae706d7b5b1628e" - }, - "id": "nmdc:4162f320ed86534d75ecb1ccf2763d47", - "name": "gold:Gp0115670.bins.1.fa", - "description": "metabat2 binned contig file for gold:Gp0115670", - "file_size_bytes": 743867, - "url": "https://data.microbiomedata.org/data/1781_86102/img_MAGs/metabat-bins/bins.1.fa", - "type": "nmdc:DataObject", - "data_object_type": "Metagenome Bins" - }, - { - "_id": { - "$oid": "649b003f1ae706d7b5b16290" - }, - "id": "nmdc:9b032e23d57bfbb90069887246064d22", - "name": "gold:Gp0115670.bins.8.fa", - "description": "metabat2 binned contig file for gold:Gp0115670", - "file_size_bytes": 1008688, - "url": "https://data.microbiomedata.org/data/1781_86102/img_MAGs/metabat-bins/bins.8.fa", - "type": "nmdc:DataObject", - "data_object_type": "Metagenome Bins" - }, - { - "_id": { - "$oid": "649b003f1ae706d7b5b16291" - }, - "id": "nmdc:32d3414866d65ea1c0a43f9fc60004ec", - "name": "gold:Gp0115670.bins.6.fa", - "description": "metabat2 binned contig file for gold:Gp0115670", - "file_size_bytes": 771722, - "url": "https://data.microbiomedata.org/data/1781_86102/img_MAGs/metabat-bins/bins.6.fa", - "type": "nmdc:DataObject", - "data_object_type": "Metagenome Bins" - }, - { - "_id": { - "$oid": "649b00401ae706d7b5b16d98" - }, - "description": "EC TSV File for gold:Gp0115670", - "url": "https://data.microbiomedata.org/1781_86102/img_annotation/Ga0482257_ec.tsv", - "md5_checksum": "483453952f8e4dc70687e02842b2bfc8", - "file_size_bytes": 3385, - "id": "nmdc:483453952f8e4dc70687e02842b2bfc8", - "name": "gold:Gp0115670_EC TSV File", - "data_object_type": "Annotation Enzyme Commission", - "type": "nmdc:DataObject" - }, - { - "_id": { - "$oid": "649b00401ae706d7b5b16d99" - }, - "description": "KO TSV File for gold:Gp0115670", - "url": "https://data.microbiomedata.org/1781_86102/img_annotation/Ga0482257_ko.tsv", - "md5_checksum": "4226d30b4f7d4018245613abbb2cc254", - "file_size_bytes": 3385, - "id": "nmdc:4226d30b4f7d4018245613abbb2cc254", - "name": "gold:Gp0115670_KO TSV File", - "data_object_type": "Annotation KEGG Orthology", - "type": "nmdc:DataObject" - }, - { - "_id": { - "$oid": "649b00401ae706d7b5b16d9b" - }, - "description": "Protein FAA for gold:Gp0115670", - "url": "https://data.microbiomedata.org/1781_86102/img_annotation/Ga0482257_proteins.faa", - "md5_checksum": "7e531f55eba2bd29d5bb4b1af8417b7c", - "file_size_bytes": 3385, - "id": "nmdc:7e531f55eba2bd29d5bb4b1af8417b7c", - "name": "gold:Gp0115670_Protein FAA", - "data_object_type": "Annotation Amino Acid FASTA", - "type": "nmdc:DataObject" - }, - { - "_id": { - "$oid": "649b00401ae706d7b5b16d9c" - }, - "description": "Functional annotation GFF file for gold:Gp0115670", - "url": "https://data.microbiomedata.org/1781_86102/img_annotation/Ga0482257_functional_annotation.gff", - "md5_checksum": "75a1e23a29f8b793c0b0abb7778d8661", - "file_size_bytes": 3385, - "id": "nmdc:75a1e23a29f8b793c0b0abb7778d8661", - "name": "gold:Gp0115670_Functional annotation GFF file", - "data_object_type": "Functional Annotation GFF", - "type": "nmdc:DataObject" - }, - { - "_id": { - "$oid": "649b00401ae706d7b5b16d9d" - }, - "description": "Structural annotation GFF file for gold:Gp0115670", - "url": "https://data.microbiomedata.org/1781_86102/img_annotation/Ga0482257_structural_annotation.gff", - "md5_checksum": "f05ecf0db08d716edb7a3f499582a2b7", - "file_size_bytes": 3385, - "id": "nmdc:f05ecf0db08d716edb7a3f499582a2b7", - "name": "gold:Gp0115670_Structural annotation GFF file", - "data_object_type": "Structural Annotation GFF", - "type": "nmdc:DataObject" - } - ], - "dissolving_activity_set": [], - "functional_annotation_set": [], - "genome_feature_set": [], - "mags_activity_set": [ - { - "_id": { - "$oid": "649b0052ec087f6bbab3472d" - }, - "has_input": [ - "nmdc:975cdb0a18df949be4efb80d1dc4ef0b", - "nmdc:c4f2407273babd894282d4d0f20be5d1", - "nmdc:0bc4d8b8ef11724c3d7e728b0e8e0ea5" - ], - "too_short_contig_num": 142606, - "part_of": [ - "nmdc:mga0d7pj22" - ], - "binned_contig_num": 1261, - "git_url": "https://github.com/microbiomedata/metaG/releases/tag/0.1", - "has_output": [ - "nmdc:d41d8cd98f00b204e9800998ecf8427e", - "nmdc:fd5fe3f1faaaf3cd8a88d9bbfb016827", - "nmdc:e27b736ee699ef2a8468a684811aaabd", - "nmdc:b0866d1a944aa27e34dc7a140aeaf336", - "nmdc:0875e5107d03a40832d15e5cf80adbbc", - "nmdc:9b60c7c905d34e08427781eafbce9b12" - ], - "was_informed_by": "gold:Gp0115670", - "input_contig_num": 152605, - "id": "nmdc:f1cbe3cc181b2e689272b4223b68c15f", - "execution_resource": "NERSC-Cori", - "name": "MAGs Analysis Activity for nmdc:mga0d7pj22", - "mags_list": [ - { - "number_of_contig": 118, - "completeness": 23.28, - "bin_name": "bins.1", - "gene_count": 572, - "bin_quality": "LQ", - "gtdbtk_species": "", - "gtdbtk_order": "", - "num_16s": 0, - "gtdbtk_family": "", - "gtdbtk_domain": "", - "contamination": 0.0, - "gtdbtk_class": "", - "gtdbtk_phylum": "", - "num_5s": 0, - "num_23s": 0, - "gtdbtk_genus": "", - "num_t_rna": 5 - }, - { - "number_of_contig": 151, - "completeness": 38.09, - "bin_name": "bins.2", - "gene_count": 725, - "bin_quality": "LQ", - "gtdbtk_species": "", - "gtdbtk_order": "", - "num_16s": 0, - "gtdbtk_family": "", - "gtdbtk_domain": "", - "contamination": 0.0, - "gtdbtk_class": "", - "gtdbtk_phylum": "", - "num_5s": 0, - "num_23s": 0, - "gtdbtk_genus": "", - "num_t_rna": 9 - }, - { - "number_of_contig": 100, - "completeness": 99.01, - "bin_name": "bins.3", - "gene_count": 3233, - "bin_quality": "HQ", - "gtdbtk_species": "", - "gtdbtk_order": "Sphingomonadales", - "num_16s": 1, - "gtdbtk_family": "Sphingomonadaceae", - "gtdbtk_domain": "Bacteria", - "contamination": 1.38, - "gtdbtk_class": "Alphaproteobacteria", - "gtdbtk_phylum": "Proteobacteria", - "num_5s": 1, - "num_23s": 2, - "gtdbtk_genus": "Novosphingobium", - "num_t_rna": 47 - }, - { - "number_of_contig": 135, - "completeness": 34.24, - "bin_name": "bins.4", - "gene_count": 689, - "bin_quality": "LQ", - "gtdbtk_species": "", - "gtdbtk_order": "", - "num_16s": 0, - "gtdbtk_family": "", - "gtdbtk_domain": "", - "contamination": 0.91, - "gtdbtk_class": "", - "gtdbtk_phylum": "", - "num_5s": 0, - "num_23s": 0, - "gtdbtk_genus": "", - "num_t_rna": 6 - }, - { - "number_of_contig": 652, - "completeness": 57.14, - "bin_name": "bins.5", - "gene_count": 3635, - "bin_quality": "MQ", - "gtdbtk_species": "", - "gtdbtk_order": "Burkholderiales", - "num_16s": 1, - "gtdbtk_family": "Burkholderiaceae", - "gtdbtk_domain": "Bacteria", - "contamination": 2.6, - "gtdbtk_class": "Gammaproteobacteria", - "gtdbtk_phylum": "Proteobacteria", - "num_5s": 1, - "num_23s": 1, - "gtdbtk_genus": "Rhizobacter", - "num_t_rna": 27 - }, - { - "number_of_contig": 105, - "completeness": 27.22, - "bin_name": "bins.6", - "gene_count": 509, - "bin_quality": "LQ", - "gtdbtk_species": "", - "gtdbtk_order": "", - "num_16s": 0, - "gtdbtk_family": "", - "gtdbtk_domain": "", - "contamination": 0.19, - "gtdbtk_class": "", - "gtdbtk_phylum": "", - "num_5s": 0, - "num_23s": 0, - "gtdbtk_genus": "", - "num_t_rna": 9 - } - ], - "unbinned_contig_num": 8738, - "started_at_time": "2021-10-11T02:28:43Z", - "type": "nmdc:MAGsAnalysisActivity", - "ended_at_time": "2021-10-11T05:55:52+00:00" - } - ], - "material_sample_set": [], - "material_sampling_activity_set": [], - "metabolomics_analysis_activity_set": [], - "metagenome_annotation_activity_set": [ - { - "_id": { - "$oid": "649b005bbf2caae0415ef9cf" - }, - "has_input": [ - "nmdc:975cdb0a18df949be4efb80d1dc4ef0b" - ], - "part_of": [ - "nmdc:mga0d7pj22" - ], - "git_url": "https://github.com/microbiomedata/metaG/releases/tag/0.1", - "has_output": [ - "nmdc:21230aff7bb5b266fb544905f9ac5ce2", - "nmdc:91c5cc265ef61ab83111a5bc9462e8b2", - "nmdc:0bc4d8b8ef11724c3d7e728b0e8e0ea5", - "nmdc:811910b7d8c300befddd039e833b0453", - "nmdc:9ed55d9535d1592866a66e9d5cd936a2", - "nmdc:a127efaa423e6dd6d24d7ab67cc2124a", - "nmdc:4b56646de8c37278beaaf9797e4ddf2f", - "nmdc:53a0873376e22fef62f2740f6afead21", - "nmdc:36748318682076112ba81283c8bc767a", - "nmdc:5dd32385b351847f23ec4eac63eb70ff", - "nmdc:95076052a4d5d57e1ed0c7699e4f5472", - "nmdc:6ae89cc4b2fb7d09614c106d3358be27" - ], - "was_informed_by": "gold:Gp0115670", - "id": "nmdc:f1cbe3cc181b2e689272b4223b68c15f", - "execution_resource": "NERSC-Cori", - "name": "Annotation Activity for nmdc:mga0d7pj22", - "started_at_time": "2021-10-11T02:28:43Z", - "type": "nmdc:MetagenomeAnnotationActivity", - "ended_at_time": "2021-10-11T05:55:52+00:00" - } - ], - "metagenome_assembly_set": [ - { - "_id": { - "$oid": "649b005f2ca5ee4adb139fc0" - }, - "has_input": [ - "nmdc:7f6b353300583c60d2d668880b4134cd" - ], - "part_of": [ - "nmdc:mga0d7pj22" - ], - "ctg_logsum": 272574, - "scaf_logsum": 274450, - "gap_pct": 0.00346, - "git_url": "https://github.com/microbiomedata/metaG/releases/tag/0.1", - "has_output": [ - "nmdc:975cdb0a18df949be4efb80d1dc4ef0b", - "nmdc:1dfaed4da055c5fd4226abe08bd91db9", - "nmdc:8a749340eefc40901a22a0ef603bc803", - "nmdc:ad027e4c3ca67907154c03feeebbd97b", - "nmdc:c4f2407273babd894282d4d0f20be5d1" - ], - "asm_score": 12.57, - "was_informed_by": "gold:Gp0115670", - "ctg_powsum": 33596, - "scaf_max": 211520, - "id": "nmdc:f1cbe3cc181b2e689272b4223b68c15f", - "scaf_powsum": 33865, - "execution_resource": "NERSC-Cori", - "contigs": 152605, - "name": "Assembly Activity for nmdc:mga0d7pj22", - "ctg_max": 211520, - "gc_std": 0.125, - "contig_bp": 79563543, - "gc_avg": 0.57036, - "started_at_time": "2021-10-11T02:28:43Z", - "scaf_bp": 79566293, - "type": "nmdc:MetagenomeAssembly", - "scaffolds": 152330, - "ended_at_time": "2021-10-11T05:55:52+00:00", - "ctg_l50": 492, - "ctg_l90": 290, - "ctg_n50": 35595, - "ctg_n90": 126332, - "scaf_l50": 493, - "scaf_l90": 290, - "scaf_n50": 35340, - "scaf_n90": 126070, - "scaf_l_gt50k": 1744421, - "scaf_n_gt50k": 21, - "scaf_pct_gt50k": 2.192412 - } - ], - "metagenome_sequencing_activity_set": [], - "metaproteomics_analysis_activity_set": [], - "metatranscriptome_activity_set": [], - "nom_analysis_activity_set": [], - "omics_processing_set": [ - { - "_id": { - "$oid": "649b009773e8249959349b4a" - }, - "id": "nmdc:omprc-11-t0xjjc50", - "name": "Sand microcosm microbial communities from a hyporheic zone in Columbia River, Washington, USA - GW-RW T4_10-June-14", - "description": "Sterilized sand packs were incubated back in the ground and collected at time point T4.", - "has_input": [ - "nmdc:bsm-11-vg9vy382" - ], - "has_output": [ - "jgi:55d7402a0d8785342fcf7e3b" - ], - "part_of": [ - "nmdc:sty-11-aygzgv51" - ], - "add_date": "2015-05-28", - "mod_date": "2021-06-15", - "ncbi_project_name": "Sand microcosm microbial communities from a hyporheic zone in Columbia River, Washington, USA - GW-RW T4_10-June-14", - "omics_type": { - "has_raw_value": "Metagenome" - }, - "principal_investigator": { - "has_raw_value": "James Stegen" - }, - "processing_institution": "JGI", - "type": "nmdc:OmicsProcessing", - "gold_sequencing_project_identifiers": [ - "gold:Gp0115670" - ] - } - ], - "reaction_activity_set": [], - "read_qc_analysis_activity_set": [ - { - "_id": { - "$oid": "649b009d6bdd4fd20273c888" - }, - "has_input": [ - "nmdc:aa477a857eb9da284635b774477f3f54" - ], - "part_of": [ - "nmdc:mga0d7pj22" - ], - "git_url": "https://github.com/microbiomedata/metaG/releases/tag/0.1", - "has_output": [ - "nmdc:7f6b353300583c60d2d668880b4134cd", - "nmdc:a4f65d101293fa4345cd865f86597464" - ], - "was_informed_by": "gold:Gp0115670", - "input_read_count": 36554212, - "output_read_bases": 5044444014, - "id": "nmdc:f1cbe3cc181b2e689272b4223b68c15f", - "execution_resource": "NERSC-Cori", - "input_read_bases": 5519686012, - "name": "Read QC Activity for nmdc:mga0d7pj22", - "output_read_count": 33663942, - "started_at_time": "2021-10-11T02:28:43Z", - "type": "nmdc:ReadQCAnalysisActivity", - "ended_at_time": "2021-10-11T05:55:52+00:00" - } - ], - "read_based_taxonomy_analysis_activity_set": [ - { - "_id": { - "$oid": "649b009bff710ae353f8cf4d" - }, - "has_input": [ - "nmdc:7f6b353300583c60d2d668880b4134cd" - ], - "git_url": "https://github.com/microbiomedata/metaG/releases/tag/0.1", - "has_output": [ - "nmdc:e316502f9e7a78c9db3996ef832aa9d7", - "nmdc:1ac2be77491e7d425da1d62f69f1508d", - "nmdc:de5b15fa9d3bdbc3abcc2475ee351323", - "nmdc:a9bbb74833404a2bf3bbd05e83a7a0ed", - "nmdc:c065784bed2b2495d512af93d05967de", - "nmdc:a34dbcbdebae0861e41c09e7b9a5d9f0", - "nmdc:b2122f5a910a1d4ae8a62956d1cd731c", - "nmdc:8a26d8496a70f4777be0e1237092e44c", - "nmdc:694b83f0b6f599948d4248dd48dd9ba9" - ], - "was_informed_by": "gold:Gp0115670", - "id": "nmdc:f1cbe3cc181b2e689272b4223b68c15f", - "execution_resource": "NERSC-Cori", - "name": "ReadBased Analysis Activity for nmdc:mga0d7pj22", - "started_at_time": "2021-10-11T02:28:43Z", - "type": "nmdc:ReadbasedAnalysis", - "ended_at_time": "2021-10-11T05:55:52+00:00" - } - ], - "study_set": [], - "field_research_site_set": [], - "collecting_biosamples_from_site_set": [], - "date_created": null, - "etl_software_version": null, - "pooling_set": [] - }, - { - "functional_annotation_agg": [], - "library_preparation_set": [], - "processed_sample_set": [], - "extraction_set": [], - "activity_set": [], - "biosample_set": [], - "data_object_set": [ - { - "description": "Raw sequencer read data", - "file_size_bytes": 2350177247, - "type": "nmdc:DataObject", - "id": "jgi:55d7402c0d8785342fcf7e3e", - "name": "9422.8.132674.GGTAGC.fastq.gz" - }, - { - "name": "Gp0115674_Filtered Reads", - "description": "Filtered Reads for Gp0115674", - "data_object_type": "Filtered Sequencing Reads", - "url": "https://data.microbiomedata.org/data/nmdc:mga0cf0450/qa/nmdc_mga0cf0450_filtered.fastq.gz", - "md5_checksum": "538fd5695eb3decd48891e72acebb8ce", - "id": "nmdc:538fd5695eb3decd48891e72acebb8ce", - "file_size_bytes": 2126353222 - }, - { - "name": "Gp0115674_Filtered Stats", - "description": "Filtered Stats for Gp0115674", - "data_object_type": "QC Statistics", - "url": "https://data.microbiomedata.org/data/nmdc:mga0cf0450/qa/nmdc_mga0cf0450_filterStats.txt", - "md5_checksum": "dde2b1748e16380e63476430ee27083a", - "id": "nmdc:dde2b1748e16380e63476430ee27083a", - "file_size_bytes": 288 - }, - { - "name": "Gp0115674_Gottcha2 TSV report", - "description": "Gottcha2 TSV report for Gp0115674", - "url": "https://data.microbiomedata.org/data/nmdc:mga0cf0450/ReadbasedAnalysis/nmdc_mga0cf0450_gottcha2_report.tsv", - "md5_checksum": "7d6ec08ff0d080997fda7c7417f9c3d4", - "id": "nmdc:7d6ec08ff0d080997fda7c7417f9c3d4", - "file_size_bytes": 13768 - }, - { - "name": "Gp0115674_Gottcha2 full TSV report", - "description": "Gottcha2 full TSV report for Gp0115674", - "url": "https://data.microbiomedata.org/data/nmdc:mga0cf0450/ReadbasedAnalysis/nmdc_mga0cf0450_gottcha2_report_full.tsv", - "md5_checksum": "df0dfd58dc386f5e0ded0b65b4a88c58", - "id": "nmdc:df0dfd58dc386f5e0ded0b65b4a88c58", - "file_size_bytes": 1022858 - }, - { - "name": "Gp0115674_Gottcha2 Krona HTML report", - "description": "Gottcha2 Krona HTML report for Gp0115674", - "data_object_type": "GOTTCHA2 Krona Plot", - "url": "https://data.microbiomedata.org/data/nmdc:mga0cf0450/ReadbasedAnalysis/nmdc_mga0cf0450_gottcha2_krona.html", - "md5_checksum": "ce3f31985e0a99f97bd4751bc2469bcb", - "id": "nmdc:ce3f31985e0a99f97bd4751bc2469bcb", - "file_size_bytes": 269166 - }, - { - "name": "Gp0115674_Centrifuge classification TSV report", - "description": "Centrifuge classification TSV report for Gp0115674", - "data_object_type": "Centrifuge Taxonomic Classification", - "url": "https://data.microbiomedata.org/data/nmdc:mga0cf0450/ReadbasedAnalysis/nmdc_mga0cf0450_centrifuge_classification.tsv", - "md5_checksum": "f8740b1fadbc29aef50d32706c955199", - "id": "nmdc:f8740b1fadbc29aef50d32706c955199", - "file_size_bytes": 1904303690 - }, - { - "name": "Gp0115674_Centrifuge TSV report", - "description": "Centrifuge TSV report for Gp0115674", - "data_object_type": "Centrifuge Classification Report", - "url": "https://data.microbiomedata.org/data/nmdc:mga0cf0450/ReadbasedAnalysis/nmdc_mga0cf0450_centrifuge_report.tsv", - "md5_checksum": "80abfcc9b09476af4083b2af1760834f", - "id": "nmdc:80abfcc9b09476af4083b2af1760834f", - "file_size_bytes": 258748 - }, - { - "name": "Gp0115674_Centrifuge Krona HTML report", - "description": "Centrifuge Krona HTML report for Gp0115674", - "data_object_type": "Centrifuge Krona Plot", - "url": "https://data.microbiomedata.org/data/nmdc:mga0cf0450/ReadbasedAnalysis/nmdc_mga0cf0450_centrifuge_krona.html", - "md5_checksum": "f189624af50d8d62908f8ddd5f3451ad", - "id": "nmdc:f189624af50d8d62908f8ddd5f3451ad", - "file_size_bytes": 2335000 - }, - { - "name": "Gp0115674_Kraken classification TSV report", - "description": "Kraken classification TSV report for Gp0115674", - "data_object_type": "Kraken2 Taxonomic Classification", - "url": "https://data.microbiomedata.org/data/nmdc:mga0cf0450/ReadbasedAnalysis/nmdc_mga0cf0450_kraken2_classification.tsv", - "md5_checksum": "09302fbc8e30758a95fac09ee5cfd449", - "id": "nmdc:09302fbc8e30758a95fac09ee5cfd449", - "file_size_bytes": 1574286150 - }, - { - "name": "Gp0115674_Kraken2 TSV report", - "description": "Kraken2 TSV report for Gp0115674", - "data_object_type": "Kraken2 Classification Report", - "url": "https://data.microbiomedata.org/data/nmdc:mga0cf0450/ReadbasedAnalysis/nmdc_mga0cf0450_kraken2_report.tsv", - "md5_checksum": "e44f717fc6f3458c17b4f5129a5e7920", - "id": "nmdc:e44f717fc6f3458c17b4f5129a5e7920", - "file_size_bytes": 671800 - }, - { - "name": "Gp0115674_Kraken2 Krona HTML report", - "description": "Kraken2 Krona HTML report for Gp0115674", - "data_object_type": "Kraken2 Krona Plot", - "url": "https://data.microbiomedata.org/data/nmdc:mga0cf0450/ReadbasedAnalysis/nmdc_mga0cf0450_kraken2_krona.html", - "md5_checksum": "19eb52a96c1dedc9036ec9a0aaeda079", - "id": "nmdc:19eb52a96c1dedc9036ec9a0aaeda079", - "file_size_bytes": 4070548 - }, - { - "name": "Gp0115674_Assembled contigs fasta", - "description": "Assembled contigs fasta for Gp0115674", - "data_object_type": "Assembly Contigs", - "url": "https://data.microbiomedata.org/data/nmdc:mga0cf0450/assembly/nmdc_mga0cf0450_contigs.fna", - "md5_checksum": "ed2e4b90c8c2947486cc5c3c5828f949", - "id": "nmdc:ed2e4b90c8c2947486cc5c3c5828f949", - "file_size_bytes": 78686505 - }, - { - "name": "Gp0115674_Assembled scaffold fasta", - "description": "Assembled scaffold fasta for Gp0115674", - "data_object_type": "Assembly Scaffolds", - "url": "https://data.microbiomedata.org/data/nmdc:mga0cf0450/assembly/nmdc_mga0cf0450_scaffolds.fna", - "md5_checksum": "e8fa9ae5e04a2969d220d81f1fb752f2", - "id": "nmdc:e8fa9ae5e04a2969d220d81f1fb752f2", - "file_size_bytes": 78267725 - }, - { - "name": "Gp0115674_Metagenome Contig Coverage Stats", - "description": "Metagenome Contig Coverage Stats for Gp0115674", - "url": "https://data.microbiomedata.org/data/nmdc:mga0cf0450/assembly/nmdc_mga0cf0450_covstats.txt", - "md5_checksum": "5f308ea3cb43a331cda55ac9f91c6a53", - "id": "nmdc:5f308ea3cb43a331cda55ac9f91c6a53", - "file_size_bytes": 10980044 - }, - { - "name": "Gp0115674_Assembled AGP file", - "description": "Assembled AGP file for Gp0115674", - "data_object_type": "Assembly AGP", - "url": "https://data.microbiomedata.org/data/nmdc:mga0cf0450/assembly/nmdc_mga0cf0450_assembly.agp", - "md5_checksum": "604ed99b7c622082ddf174bb11d2787f", - "id": "nmdc:604ed99b7c622082ddf174bb11d2787f", - "file_size_bytes": 10249514 - }, - { - "name": "Gp0115674_Metagenome Alignment BAM file", - "description": "Metagenome Alignment BAM file for Gp0115674", - "data_object_type": "Assembly Coverage BAM", - "url": "https://data.microbiomedata.org/data/nmdc:mga0cf0450/assembly/nmdc_mga0cf0450_pairedMapped_sorted.bam", - "md5_checksum": "a0263d8b11653306a05f598395ca603a", - "id": "nmdc:a0263d8b11653306a05f598395ca603a", - "file_size_bytes": 2304306876 - }, - { - "name": "Gp0115674_Protein FAA", - "description": "Protein FAA for Gp0115674", - "data_object_type": "Annotation Amino Acid FASTA", - "url": "https://data.microbiomedata.org/data/nmdc:mga0cf0450/annotation/nmdc_mga0cf0450_proteins.faa", - "md5_checksum": "9ae7cb8ba4bee2ce9a46c963d00ba6ba", - "id": "nmdc:9ae7cb8ba4bee2ce9a46c963d00ba6ba", - "file_size_bytes": 43650605 - }, - { - "name": "Gp0115674_Structural annotation GFF file", - "description": "Structural annotation GFF file for Gp0115674", - "data_object_type": "Structural Annotation GFF", - "url": "https://data.microbiomedata.org/data/nmdc:mga0cf0450/annotation/nmdc_mga0cf0450_structural_annotation.gff", - "md5_checksum": "ce90743969776fd717671aeb21d37379", - "id": "nmdc:ce90743969776fd717671aeb21d37379", - "file_size_bytes": 2529 - }, - { - "name": "Gp0115674_Functional annotation GFF file", - "description": "Functional annotation GFF file for Gp0115674", - "data_object_type": "Functional Annotation GFF", - "url": "https://data.microbiomedata.org/data/nmdc:mga0cf0450/annotation/nmdc_mga0cf0450_functional_annotation.gff", - "md5_checksum": "1a4f5145ccf0838811fe570a93549fdf", - "id": "nmdc:1a4f5145ccf0838811fe570a93549fdf", - "file_size_bytes": 47604509 - }, - { - "name": "Gp0115674_KO TSV file", - "description": "KO TSV file for Gp0115674", - "data_object_type": "Annotation KEGG Orthology", - "url": "https://data.microbiomedata.org/data/nmdc:mga0cf0450/annotation/nmdc_mga0cf0450_ko.tsv", - "md5_checksum": "662dae8ba0ea9dda93637c2ea60c1f4e", - "id": "nmdc:662dae8ba0ea9dda93637c2ea60c1f4e", - "file_size_bytes": 6436472 - }, - { - "name": "Gp0115674_EC TSV file", - "description": "EC TSV file for Gp0115674", - "data_object_type": "Annotation Enzyme Commission", - "url": "https://data.microbiomedata.org/data/nmdc:mga0cf0450/annotation/nmdc_mga0cf0450_ec.tsv", - "md5_checksum": "b5db445feb8edb47022c2a0ee86d828d", - "id": "nmdc:b5db445feb8edb47022c2a0ee86d828d", - "file_size_bytes": 4111562 - }, - { - "name": "Gp0115674_COG GFF file", - "description": "COG GFF file for Gp0115674", - "url": "https://data.microbiomedata.org/data/nmdc:mga0cf0450/annotation/nmdc_mga0cf0450_cog.gff", - "md5_checksum": "157d24f6f63091fbe9ef98cc3090975d", - "id": "nmdc:157d24f6f63091fbe9ef98cc3090975d", - "file_size_bytes": 27373015 - }, - { - "name": "Gp0115674_PFAM GFF file", - "description": "PFAM GFF file for Gp0115674", - "url": "https://data.microbiomedata.org/data/nmdc:mga0cf0450/annotation/nmdc_mga0cf0450_pfam.gff", - "md5_checksum": "afa217feffb94965aa1839041305237e", - "id": "nmdc:afa217feffb94965aa1839041305237e", - "file_size_bytes": 22153817 - }, - { - "name": "Gp0115674_TigrFam GFF file", - "description": "TigrFam GFF file for Gp0115674", - "url": "https://data.microbiomedata.org/data/nmdc:mga0cf0450/annotation/nmdc_mga0cf0450_tigrfam.gff", - "md5_checksum": "4a00e0c0bc479b8e6f1139c8de3149d5", - "id": "nmdc:4a00e0c0bc479b8e6f1139c8de3149d5", - "file_size_bytes": 2995281 - }, - { - "name": "Gp0115674_SMART GFF file", - "description": "SMART GFF file for Gp0115674", - "url": "https://data.microbiomedata.org/data/nmdc:mga0cf0450/annotation/nmdc_mga0cf0450_smart.gff", - "md5_checksum": "ffcd280a63fab7bcfa5422f34070d87f", - "id": "nmdc:ffcd280a63fab7bcfa5422f34070d87f", - "file_size_bytes": 6393135 - }, - { - "name": "Gp0115674_SuperFam GFF file", - "description": "SuperFam GFF file for Gp0115674", - "url": "https://data.microbiomedata.org/data/nmdc:mga0cf0450/annotation/nmdc_mga0cf0450_supfam.gff", - "md5_checksum": "9fb334fc9409e6db51aaa1f960b08f4b", - "id": "nmdc:9fb334fc9409e6db51aaa1f960b08f4b", - "file_size_bytes": 35023258 - }, - { - "name": "Gp0115674_Cath FunFam GFF file", - "description": "Cath FunFam GFF file for Gp0115674", - "url": "https://data.microbiomedata.org/data/nmdc:mga0cf0450/annotation/nmdc_mga0cf0450_cath_funfam.gff", - "md5_checksum": "d5676c01e67f71559a382850f42c3493", - "id": "nmdc:d5676c01e67f71559a382850f42c3493", - "file_size_bytes": 27788764 - }, - { - "name": "Gp0115674_KO_EC GFF file", - "description": "KO_EC GFF file for Gp0115674", - "url": "https://data.microbiomedata.org/data/nmdc:mga0cf0450/annotation/nmdc_mga0cf0450_ko_ec.gff", - "md5_checksum": "121fab4d5bff0dcbb9d1849738a72347", - "id": "nmdc:121fab4d5bff0dcbb9d1849738a72347", - "file_size_bytes": 20542466 - }, - { - "name": "gold:Gp0452679_metabat2 bin checkm quality assessment result", - "description": "metabat2 bin checkm quality assessment result for gold:Gp0452679", - "data_object_type": "CheckM Statistics", - "url": "https://data.microbiomedata.org/data/nmdc:mga0fsjy84/MAGs/nmdc_mga0fsjy84_checkm_qa.out", - "md5_checksum": "d41d8cd98f00b204e9800998ecf8427e", - "id": "nmdc:d41d8cd98f00b204e9800998ecf8427e", - "file_size_bytes": 0 - }, - { - "name": "Gp0115674_tooShort (< 3kb) filtered contigs fasta file by metaBat2", - "description": "tooShort (< 3kb) filtered contigs fasta file by metaBat2 for Gp0115674", - "url": "https://data.microbiomedata.org/data/nmdc:mga0cf0450/MAGs/nmdc_mga0cf0450_bins.tooShort.fa", - "md5_checksum": "6a03eb0156b154ea68ffff9b473e73a5", - "id": "nmdc:6a03eb0156b154ea68ffff9b473e73a5", - "file_size_bytes": 56345518 - }, - { - "name": "Gp0115674_unbinned fasta file from metabat2", - "description": "unbinned fasta file from metabat2 for Gp0115674", - "url": "https://data.microbiomedata.org/data/nmdc:mga0cf0450/MAGs/nmdc_mga0cf0450_bins.unbinned.fa", - "md5_checksum": "33a477987509b67fcfa5096d20c7c40b", - "id": "nmdc:33a477987509b67fcfa5096d20c7c40b", - "file_size_bytes": 10836032 - }, - { - "name": "Gp0115674_metabat2 bin checkm quality assessment result", - "description": "metabat2 bin checkm quality assessment result for Gp0115674", - "data_object_type": "CheckM Statistics", - "url": "https://data.microbiomedata.org/data/nmdc:mga0cf0450/MAGs/nmdc_mga0cf0450_checkm_qa.out", - "md5_checksum": "314c92c3a9458e1aa304e3c474209acf", - "id": "nmdc:314c92c3a9458e1aa304e3c474209acf", - "file_size_bytes": 1360 - }, - { - "name": "Gp0115674_high-quality and medium-quality bins", - "description": "high-quality and medium-quality bins for Gp0115674", - "data_object_type": "Metagenome Bins", - "url": "https://data.microbiomedata.org/data/nmdc:mga0cf0450/MAGs/nmdc_mga0cf0450_hqmq_bin.zip", - "md5_checksum": "a4f9093efaf84855cab58880b262afd5", - "id": "nmdc:a4f9093efaf84855cab58880b262afd5", - "file_size_bytes": 2974639 - }, - { - "name": "Gp0115674_metabat2 bins", - "description": "metabat2 bins for Gp0115674", - "url": "https://data.microbiomedata.org/data/nmdc:mga0cf0450/MAGs/nmdc_mga0cf0450_metabat_bin.zip", - "md5_checksum": "1a29af6f30c21f38b25e4553605f50ef", - "id": "nmdc:1a29af6f30c21f38b25e4553605f50ef", - "file_size_bytes": 469326 - }, - { - "_id": { - "$oid": "649b003c1ae706d7b5b14d94" - }, - "description": "Metagenome Contig Coverage Stats for gold:Gp0115674", - "url": "https://data.microbiomedata.org/data/1781_86104/assembly/mapping_stats.txt", - "file_size_bytes": 10283424, - "type": "nmdc:DataObject", - "id": "nmdc:dafe01b902d5308bc53a143024f4c0be", - "name": "mapping_stats.txt", - "data_object_type": "Assembly Coverage Stats" - }, - { - "_id": { - "$oid": "649b003c1ae706d7b5b14d95" - }, - "description": "Assembled scaffold fasta for gold:Gp0115674", - "url": "https://data.microbiomedata.org/data/1781_86104/assembly/assembly_scaffolds.fna", - "file_size_bytes": 77571545, - "type": "nmdc:DataObject", - "id": "nmdc:52e5a91a8c71575c66793012fbdc1d38", - "name": "assembly_scaffolds.fna", - "data_object_type": "Assembly Scaffolds" - }, - { - "_id": { - "$oid": "649b003c1ae706d7b5b14d97" - }, - "description": "Assembled contigs fasta for gold:Gp0115674", - "url": "https://data.microbiomedata.org/data/1781_86104/assembly/assembly_contigs.fna", - "file_size_bytes": 77989885, - "type": "nmdc:DataObject", - "id": "nmdc:1689f2f2e14c55ab5d2af78ad3eb99bd", - "name": "assembly_contigs.fna", - "data_object_type": "Assembly Contigs" - }, - { - "_id": { - "$oid": "649b003c1ae706d7b5b14d98" - }, - "description": "Assembled AGP file for gold:Gp0115674", - "url": "https://data.microbiomedata.org/data/1781_86104/assembly/assembly.agp", - "file_size_bytes": 8855354, - "type": "nmdc:DataObject", - "id": "nmdc:e3b48b89ae6f02705022bf443f649bc2", - "name": "assembly.agp", - "data_object_type": "Assembly AGP" - }, - { - "_id": { - "$oid": "649b003c1ae706d7b5b14d99" - }, - "description": "Metagenome Alignment BAM file for gold:Gp0115674", - "url": "https://data.microbiomedata.org/data/1781_86104/assembly/pairedMapped_sorted.bam", - "file_size_bytes": 2269403358, - "type": "nmdc:DataObject", - "id": "nmdc:0e448fc98b179d70a76f38beb90171cf", - "name": "pairedMapped_sorted.bam", - "data_object_type": "Assembly Coverage BAM" - }, - { - "_id": { - "$oid": "649b003d1ae706d7b5b159de" - }, - "id": "nmdc:d9d2c48e8e6cc1e9111eba4cd5aa44ce", - "name": "1781_86104.krona.html", - "description": "Gold:Gp0115674 KRONA plot HTML file", - "url": "https://data.microbiomedata.org/1781_86104/ReadbasedAnalysis/centrifuge/1781_86104.krona.html", - "file_size_bytes": 3385, - "data_object_type": "Centrifuge Krona Plot", - "type": "nmdc:DataObject" - }, - { - "_id": { - "$oid": "649b003d1ae706d7b5b159e3" - }, - "id": "nmdc:ede7eba8751ecdb4bde5cdbded5598a8", - "name": "1781_86104.json", - "description": "Gold:Gp0115674 ReadbasedAnalysis result JSON file", - "url": "https://data.microbiomedata.org/1781_86104/ReadbasedAnalysis/1781_86104.json", - "file_size_bytes": 3385, - "type": "nmdc:DataObject" - }, - { - "_id": { - "$oid": "649b003f1ae706d7b5b162b0" - }, - "id": "nmdc:4164b9671b26c93cf3580eff524af8a6", - "name": "bins.tooShort.fa", - "description": "tooShort (< 3kb) filtered contigs fasta file by metaBat2 for gold:Gp0115674", - "file_size_bytes": 54726629, - "url": "https://data.microbiomedata.org/data/1781_86104/img_MAGs/bins.tooShort.fa", - "type": "nmdc:DataObject" - }, - { - "_id": { - "$oid": "649b003f1ae706d7b5b162b1" - }, - "id": "nmdc:ecb95407253379e53508c4a5d200ae4e", - "name": "bins.unbinned.fa", - "description": "unbinned fasta file from metabat2 for gold:Gp0115674", - "file_size_bytes": 12697686, - "url": "https://data.microbiomedata.org/data/1781_86104/img_MAGs/bins.unbinned.fa", - "type": "nmdc:DataObject" - }, - { - "_id": { - "$oid": "649b003f1ae706d7b5b162b2" - }, - "id": "nmdc:44451dde40f3facbdb6357985448cb9f", - "name": "gold:Gp0115674.bins.9.fa", - "description": "metabat2 binned contig file for gold:Gp0115674", - "file_size_bytes": 2583019, - "url": "https://data.microbiomedata.org/data/1781_86104/img_MAGs/metabat-bins/bins.9.fa", - "type": "nmdc:DataObject", - "data_object_type": "Metagenome Bins" - }, - { - "_id": { - "$oid": "649b003f1ae706d7b5b162b3" - }, - "id": "nmdc:36a4a672be5f95492ec7b48b501bc666", - "name": "checkm_qa.out", - "description": "metabat2 bin checkm quality assessment result for gold:Gp0115674", - "file_size_bytes": 2550, - "url": "https://data.microbiomedata.org/data/1781_86104/img_MAGs/checkm_qa.out", - "type": "nmdc:DataObject", - "data_object_type": "CheckM Statistics" - }, - { - "_id": { - "$oid": "649b003f1ae706d7b5b162b4" - }, - "id": "nmdc:ffcb32d9c9e558bdbd8827712761d752", - "name": "gold:Gp0115674.bins.5.fa", - "description": "metabat2 binned contig file for gold:Gp0115674", - "file_size_bytes": 897656, - "url": "https://data.microbiomedata.org/data/1781_86104/img_MAGs/metabat-bins/bins.5.fa", - "type": "nmdc:DataObject", - "data_object_type": "Metagenome Bins" - }, - { - "_id": { - "$oid": "649b003f1ae706d7b5b162b5" - }, - "id": "nmdc:7c49f6fc2918adadd95fac344eb321f9", - "name": "gold:Gp0115674.bins.11.fa", - "description": "metabat2 binned contig file for gold:Gp0115674", - "file_size_bytes": 891103, - "url": "https://data.microbiomedata.org/data/1781_86104/img_MAGs/metabat-bins/bins.11.fa", - "type": "nmdc:DataObject", - "data_object_type": "Metagenome Bins" - }, - { - "_id": { - "$oid": "649b003f1ae706d7b5b162b6" - }, - "id": "nmdc:e8abf1d316f52b3d1234bbaa8cf33c82", - "name": "gold:Gp0115674.bins.3.fa", - "description": "metabat2 binned contig file for gold:Gp0115674", - "file_size_bytes": 1084295, - "url": "https://data.microbiomedata.org/data/1781_86104/img_MAGs/metabat-bins/bins.3.fa", - "type": "nmdc:DataObject", - "data_object_type": "Metagenome Bins" - }, - { - "_id": { - "$oid": "649b003f1ae706d7b5b162b7" - }, - "id": "nmdc:612b7364e691619f08e480035620ddb3", - "name": "gold:Gp0115674.bins.4.fa", - "description": "metabat2 binned contig file for gold:Gp0115674", - "file_size_bytes": 221915, - "url": "https://data.microbiomedata.org/data/1781_86104/img_MAGs/metabat-bins/bins.4.fa", - "type": "nmdc:DataObject", - "data_object_type": "Metagenome Bins" - }, - { - "_id": { - "$oid": "649b003f1ae706d7b5b162b8" - }, - "id": "nmdc:56823e8ec01d84f84c1f44ba0020cefd", - "name": "gold:Gp0115674.bins.2.fa", - "description": "metabat2 binned contig file for gold:Gp0115674", - "file_size_bytes": 1378648, - "url": "https://data.microbiomedata.org/data/1781_86104/img_MAGs/metabat-bins/bins.2.fa", - "type": "nmdc:DataObject", - "data_object_type": "Metagenome Bins" - }, - { - "_id": { - "$oid": "649b003f1ae706d7b5b162b9" - }, - "id": "nmdc:f8f2d349759dfc70e53916dd7e07d796", - "name": "gtdbtk.bac120.summary.tsv", - "description": "gtdbtk bacterial assignment result summary table for gold:Gp0115674", - "file_size_bytes": 4807, - "url": "https://data.microbiomedata.org/data/1781_86104/img_MAGs/gtdbtk_output/classify/gtdbtk.bac120.summary.tsv", - "type": "nmdc:DataObject" - }, - { - "_id": { - "$oid": "649b003f1ae706d7b5b162ba" - }, - "id": "nmdc:8e481f201f444f86ce93109f8f25c356", - "name": "gold:Gp0115674.bins.7.fa", - "description": "metabat2 binned contig file for gold:Gp0115674", - "file_size_bytes": 571743, - "url": "https://data.microbiomedata.org/data/1781_86104/img_MAGs/metabat-bins/bins.7.fa", - "type": "nmdc:DataObject", - "data_object_type": "Metagenome Bins" - }, - { - "_id": { - "$oid": "649b003f1ae706d7b5b162bb" - }, - "id": "nmdc:3d38153edc983c24deb7609306105632", - "name": "gold:Gp0115674.bins.10.fa", - "description": "metabat2 binned contig file for gold:Gp0115674", - "file_size_bytes": 637469, - "url": "https://data.microbiomedata.org/data/1781_86104/img_MAGs/metabat-bins/bins.10.fa", - "type": "nmdc:DataObject", - "data_object_type": "Metagenome Bins" - }, - { - "_id": { - "$oid": "649b003f1ae706d7b5b162bc" - }, - "id": "nmdc:2e8fbb22de466ca13ed4441fee49faab", - "name": "gold:Gp0115674.bins.1.fa", - "description": "metabat2 binned contig file for gold:Gp0115674", - "file_size_bytes": 589376, - "url": "https://data.microbiomedata.org/data/1781_86104/img_MAGs/metabat-bins/bins.1.fa", - "type": "nmdc:DataObject", - "data_object_type": "Metagenome Bins" - }, - { - "_id": { - "$oid": "649b003f1ae706d7b5b162be" - }, - "id": "nmdc:448e30e675ef810514b1e091992df2fc", - "name": "gold:Gp0115674.bins.8.fa", - "description": "metabat2 binned contig file for gold:Gp0115674", - "file_size_bytes": 318161, - "url": "https://data.microbiomedata.org/data/1781_86104/img_MAGs/metabat-bins/bins.8.fa", - "type": "nmdc:DataObject", - "data_object_type": "Metagenome Bins" - }, - { - "_id": { - "$oid": "649b003f1ae706d7b5b162bf" - }, - "id": "nmdc:f7ee01c219b7141044ca7338877ddf5e", - "name": "gold:Gp0115674.bins.6.fa", - "description": "metabat2 binned contig file for gold:Gp0115674", - "file_size_bytes": 412815, - "url": "https://data.microbiomedata.org/data/1781_86104/img_MAGs/metabat-bins/bins.6.fa", - "type": "nmdc:DataObject", - "data_object_type": "Metagenome Bins" - }, - { - "_id": { - "$oid": "649b00401ae706d7b5b16da3" - }, - "description": "EC TSV File for gold:Gp0115674", - "url": "https://data.microbiomedata.org/1781_86104/img_annotation/Ga0482253_ec.tsv", - "md5_checksum": "72ede7603b72206d929c03364769021c", - "file_size_bytes": 3385, - "id": "nmdc:72ede7603b72206d929c03364769021c", - "name": "gold:Gp0115674_EC TSV File", - "data_object_type": "Annotation Enzyme Commission", - "type": "nmdc:DataObject" - }, - { - "_id": { - "$oid": "649b00401ae706d7b5b16da4" - }, - "description": "KO TSV File for gold:Gp0115674", - "url": "https://data.microbiomedata.org/1781_86104/img_annotation/Ga0482253_ko.tsv", - "md5_checksum": "9c248ab2a22c7b49060e544f37b9c798", - "file_size_bytes": 3385, - "id": "nmdc:9c248ab2a22c7b49060e544f37b9c798", - "name": "gold:Gp0115674_KO TSV File", - "data_object_type": "Annotation KEGG Orthology", - "type": "nmdc:DataObject" - }, - { - "_id": { - "$oid": "649b00401ae706d7b5b16da5" - }, - "description": "Functional annotation GFF file for gold:Gp0115674", - "url": "https://data.microbiomedata.org/1781_86104/img_annotation/Ga0482253_functional_annotation.gff", - "md5_checksum": "876382e7107a83b87a059e4e961bff75", - "file_size_bytes": 3385, - "id": "nmdc:876382e7107a83b87a059e4e961bff75", - "name": "gold:Gp0115674_Functional annotation GFF file", - "data_object_type": "Functional Annotation GFF", - "type": "nmdc:DataObject" - }, - { - "_id": { - "$oid": "649b00401ae706d7b5b16dac" - }, - "description": "Structural annotation GFF file for gold:Gp0115674", - "url": "https://data.microbiomedata.org/1781_86104/img_annotation/Ga0482253_structural_annotation.gff", - "md5_checksum": "17f2fbdeb3f5891c37f2e9e43a40c7b1", - "file_size_bytes": 3385, - "id": "nmdc:17f2fbdeb3f5891c37f2e9e43a40c7b1", - "name": "gold:Gp0115674_Structural annotation GFF file", - "data_object_type": "Structural Annotation GFF", - "type": "nmdc:DataObject" - }, - { - "_id": { - "$oid": "649b00401ae706d7b5b16db0" - }, - "description": "Protein FAA for gold:Gp0115674", - "url": "https://data.microbiomedata.org/1781_86104/img_annotation/Ga0482253_proteins.faa", - "md5_checksum": "c70d6973abeb3ee231d3e38c3c5dced4", - "file_size_bytes": 3385, - "id": "nmdc:c70d6973abeb3ee231d3e38c3c5dced4", - "name": "gold:Gp0115674_Protein FAA", - "data_object_type": "Annotation Amino Acid FASTA", - "type": "nmdc:DataObject" - } - ], - "dissolving_activity_set": [], - "functional_annotation_set": [], - "genome_feature_set": [], - "mags_activity_set": [ - { - "_id": { - "$oid": "649b0052ec087f6bbab34732" - }, - "has_input": [ - "nmdc:ed2e4b90c8c2947486cc5c3c5828f949", - "nmdc:a0263d8b11653306a05f598395ca603a", - "nmdc:1a4f5145ccf0838811fe570a93549fdf" - ], - "too_short_contig_num": 131855, - "part_of": [ - "nmdc:mga0cf0450" - ], - "binned_contig_num": 1119, - "git_url": "https://github.com/microbiomedata/metaG/releases/tag/0.1", - "has_output": [ - "nmdc:d41d8cd98f00b204e9800998ecf8427e", - "nmdc:6a03eb0156b154ea68ffff9b473e73a5", - "nmdc:33a477987509b67fcfa5096d20c7c40b", - "nmdc:314c92c3a9458e1aa304e3c474209acf", - "nmdc:a4f9093efaf84855cab58880b262afd5", - "nmdc:1a29af6f30c21f38b25e4553605f50ef" - ], - "was_informed_by": "gold:Gp0115674", - "input_contig_num": 139324, - "id": "nmdc:954288cfef2de46d4b895fac3811a7d0", - "execution_resource": "NERSC-Cori", - "name": "MAGs Analysis Activity for nmdc:mga0cf0450", - "mags_list": [ - { - "number_of_contig": 198, - "completeness": 100.0, - "bin_name": "bins.1", - "gene_count": 5608, - "bin_quality": "HQ", - "gtdbtk_species": "", - "gtdbtk_order": "Burkholderiales", - "num_16s": 1, - "gtdbtk_family": "Burkholderiaceae", - "gtdbtk_domain": "Bacteria", - "contamination": 1.29, - "gtdbtk_class": "Gammaproteobacteria", - "gtdbtk_phylum": "Proteobacteria", - "num_5s": 1, - "num_23s": 1, - "gtdbtk_genus": "Rhizobacter", - "num_t_rna": 46 - }, - { - "number_of_contig": 353, - "completeness": 88.62, - "bin_name": "bins.2", - "gene_count": 3146, - "bin_quality": "MQ", - "gtdbtk_species": "", - "gtdbtk_order": "Sphingomonadales", - "num_16s": 0, - "gtdbtk_family": "Sphingomonadaceae", - "gtdbtk_domain": "Bacteria", - "contamination": 2.0, - "gtdbtk_class": "Alphaproteobacteria", - "gtdbtk_phylum": "Proteobacteria", - "num_5s": 0, - "num_23s": 0, - "gtdbtk_genus": "Novosphingobium", - "num_t_rna": 40 - }, - { - "number_of_contig": 273, - "completeness": 51.61, - "bin_name": "bins.3", - "gene_count": 1397, - "bin_quality": "MQ", - "gtdbtk_species": "", - "gtdbtk_order": "Pseudomonadales", - "num_16s": 0, - "gtdbtk_family": "UBA3067", - "gtdbtk_domain": "Bacteria", - "contamination": 0.8, - "gtdbtk_class": "Gammaproteobacteria", - "gtdbtk_phylum": "Proteobacteria", - "num_5s": 0, - "num_23s": 0, - "gtdbtk_genus": "UBA3067", - "num_t_rna": 17 - }, - { - "number_of_contig": 295, - "completeness": 49.14, - "bin_name": "bins.4", - "gene_count": 1695, - "bin_quality": "LQ", - "gtdbtk_species": "", - "gtdbtk_order": "", - "num_16s": 0, - "gtdbtk_family": "", - "gtdbtk_domain": "", - "contamination": 0.0, - "gtdbtk_class": "", - "gtdbtk_phylum": "", - "num_5s": 0, - "num_23s": 0, - "gtdbtk_genus": "", - "num_t_rna": 16 - } - ], - "unbinned_contig_num": 6350, - "started_at_time": "2021-10-11T02:28:52Z", - "type": "nmdc:MAGsAnalysisActivity", - "ended_at_time": "2021-10-11T05:21:41+00:00" - } - ], - "material_sample_set": [], - "material_sampling_activity_set": [], - "metabolomics_analysis_activity_set": [], - "metagenome_annotation_activity_set": [ - { - "_id": { - "$oid": "649b005bbf2caae0415ef9d4" - }, - "has_input": [ - "nmdc:ed2e4b90c8c2947486cc5c3c5828f949" - ], - "part_of": [ - "nmdc:mga0cf0450" - ], - "git_url": "https://github.com/microbiomedata/metaG/releases/tag/0.1", - "has_output": [ - "nmdc:9ae7cb8ba4bee2ce9a46c963d00ba6ba", - "nmdc:ce90743969776fd717671aeb21d37379", - "nmdc:1a4f5145ccf0838811fe570a93549fdf", - "nmdc:662dae8ba0ea9dda93637c2ea60c1f4e", - "nmdc:b5db445feb8edb47022c2a0ee86d828d", - "nmdc:157d24f6f63091fbe9ef98cc3090975d", - "nmdc:afa217feffb94965aa1839041305237e", - "nmdc:4a00e0c0bc479b8e6f1139c8de3149d5", - "nmdc:ffcd280a63fab7bcfa5422f34070d87f", - "nmdc:9fb334fc9409e6db51aaa1f960b08f4b", - "nmdc:d5676c01e67f71559a382850f42c3493", - "nmdc:121fab4d5bff0dcbb9d1849738a72347" - ], - "was_informed_by": "gold:Gp0115674", - "id": "nmdc:954288cfef2de46d4b895fac3811a7d0", - "execution_resource": "NERSC-Cori", - "name": "Annotation Activity for nmdc:mga0cf0450", - "started_at_time": "2021-10-11T02:28:52Z", - "type": "nmdc:MetagenomeAnnotationActivity", - "ended_at_time": "2021-10-11T05:21:41+00:00" - } - ], - "metagenome_assembly_set": [ - { - "_id": { - "$oid": "649b005f2ca5ee4adb139fb6" - }, - "has_input": [ - "nmdc:538fd5695eb3decd48891e72acebb8ce" - ], - "part_of": [ - "nmdc:mga0cf0450" - ], - "ctg_logsum": 272042, - "scaf_logsum": 272657, - "gap_pct": 0.00172, - "git_url": "https://github.com/microbiomedata/metaG/releases/tag/0.1", - "has_output": [ - "nmdc:ed2e4b90c8c2947486cc5c3c5828f949", - "nmdc:e8fa9ae5e04a2969d220d81f1fb752f2", - "nmdc:5f308ea3cb43a331cda55ac9f91c6a53", - "nmdc:604ed99b7c622082ddf174bb11d2787f", - "nmdc:a0263d8b11653306a05f598395ca603a" - ], - "asm_score": 18.19, - "was_informed_by": "gold:Gp0115674", - "ctg_powsum": 36133, - "scaf_max": 176505, - "id": "nmdc:954288cfef2de46d4b895fac3811a7d0", - "scaf_powsum": 36239, - "execution_resource": "NERSC-Cori", - "contigs": 139326, - "name": "Assembly Activity for nmdc:mga0cf0450", - "ctg_max": 176505, - "gc_std": 0.12397, - "contig_bp": 73195425, - "gc_avg": 0.56886, - "started_at_time": "2021-10-11T02:28:52Z", - "scaf_bp": 73196685, - "type": "nmdc:MetagenomeAssembly", - "scaffolds": 139236, - "ended_at_time": "2021-10-11T05:21:41+00:00", - "ctg_l50": 481, - "ctg_l90": 290, - "ctg_n50": 30768, - "ctg_n90": 115008, - "scaf_l50": 482, - "scaf_l90": 290, - "scaf_n50": 30582, - "scaf_n90": 114932, - "scaf_l_gt50k": 2506146, - "scaf_n_gt50k": 32, - "scaf_pct_gt50k": 3.4238515 - } - ], - "metagenome_sequencing_activity_set": [], - "metaproteomics_analysis_activity_set": [], - "metatranscriptome_activity_set": [], - "nom_analysis_activity_set": [], - "omics_processing_set": [ - { - "_id": { - "$oid": "649b009773e8249959349b4b" - }, - "id": "nmdc:omprc-11-1avd3d16", - "name": "Sand microcosm microbial communities from a hyporheic zone in Columbia River, Washington, USA - GW-RW T4_21-May-14", - "description": "Sterilized sand packs were incubated back in the ground and collected at time point T4.", - "has_input": [ - "nmdc:bsm-11-5xjtzc47" - ], - "has_output": [ - "jgi:55d7402c0d8785342fcf7e3e" - ], - "part_of": [ - "nmdc:sty-11-aygzgv51" - ], - "add_date": "2015-05-28", - "mod_date": "2021-06-15", - "ncbi_project_name": "Sand microcosm microbial communities from a hyporheic zone in Columbia River, Washington, USA - GW-RW T4_21-May-14", - "omics_type": { - "has_raw_value": "Metagenome" - }, - "principal_investigator": { - "has_raw_value": "James Stegen" - }, - "processing_institution": "JGI", - "type": "nmdc:OmicsProcessing", - "gold_sequencing_project_identifiers": [ - "gold:Gp0115674" - ] - } - ], - "reaction_activity_set": [], - "read_qc_analysis_activity_set": [ - { - "_id": { - "$oid": "649b009d6bdd4fd20273c88a" - }, - "has_input": [ - "nmdc:d94c174a22116c2db7ab8c47619e30aa" - ], - "part_of": [ - "nmdc:mga0cf0450" - ], - "git_url": "https://github.com/microbiomedata/metaG/releases/tag/0.1", - "has_output": [ - "nmdc:538fd5695eb3decd48891e72acebb8ce", - "nmdc:dde2b1748e16380e63476430ee27083a" - ], - "was_informed_by": "gold:Gp0115674", - "input_read_count": 26546332, - "output_read_bases": 3862169938, - "id": "nmdc:954288cfef2de46d4b895fac3811a7d0", - "execution_resource": "NERSC-Cori", - "input_read_bases": 4008496132, - "name": "Read QC Activity for nmdc:mga0cf0450", - "output_read_count": 25776010, - "started_at_time": "2021-10-11T02:28:52Z", - "type": "nmdc:ReadQCAnalysisActivity", - "ended_at_time": "2021-10-11T05:21:41+00:00" - } - ], - "read_based_taxonomy_analysis_activity_set": [ - { - "_id": { - "$oid": "649b009bff710ae353f8cf50" - }, - "has_input": [ - "nmdc:538fd5695eb3decd48891e72acebb8ce" - ], - "git_url": "https://github.com/microbiomedata/metaG/releases/tag/0.1", - "has_output": [ - "nmdc:7d6ec08ff0d080997fda7c7417f9c3d4", - "nmdc:df0dfd58dc386f5e0ded0b65b4a88c58", - "nmdc:ce3f31985e0a99f97bd4751bc2469bcb", - "nmdc:f8740b1fadbc29aef50d32706c955199", - "nmdc:80abfcc9b09476af4083b2af1760834f", - "nmdc:f189624af50d8d62908f8ddd5f3451ad", - "nmdc:09302fbc8e30758a95fac09ee5cfd449", - "nmdc:e44f717fc6f3458c17b4f5129a5e7920", - "nmdc:19eb52a96c1dedc9036ec9a0aaeda079" - ], - "was_informed_by": "gold:Gp0115674", - "id": "nmdc:954288cfef2de46d4b895fac3811a7d0", - "execution_resource": "NERSC-Cori", - "name": "ReadBased Analysis Activity for nmdc:mga0cf0450", - "started_at_time": "2021-10-11T02:28:52Z", - "type": "nmdc:ReadbasedAnalysis", - "ended_at_time": "2021-10-11T05:21:41+00:00" - } - ], - "study_set": [], - "field_research_site_set": [], - "collecting_biosamples_from_site_set": [], - "date_created": null, - "etl_software_version": null, - "pooling_set": [] - }, - { - "functional_annotation_agg": [], - "library_preparation_set": [], - "processed_sample_set": [], - "extraction_set": [], - "activity_set": [], - "biosample_set": [], - "data_object_set": [ - { - "description": "Raw sequencer read data", - "file_size_bytes": 1698585233, - "type": "nmdc:DataObject", - "id": "jgi:55d817f70d8785342fcf8270", - "name": "9387.2.132031.CTTGTA.fastq.gz" - }, - { - "name": "Gp0115673_Filtered Reads", - "description": "Filtered Reads for Gp0115673", - "data_object_type": "Filtered Sequencing Reads", - "url": "https://data.microbiomedata.org/data/nmdc:mga0kpja70/qa/nmdc_mga0kpja70_filtered.fastq.gz", - "md5_checksum": "268918f610926421d2af43f175553680", - "id": "nmdc:268918f610926421d2af43f175553680", - "file_size_bytes": 1492820163 - }, - { - "name": "Gp0115673_Filtered Stats", - "description": "Filtered Stats for Gp0115673", - "data_object_type": "QC Statistics", - "url": "https://data.microbiomedata.org/data/nmdc:mga0kpja70/qa/nmdc_mga0kpja70_filterStats.txt", - "md5_checksum": "4610980cf3558f5a9830797ead97362a", - "id": "nmdc:4610980cf3558f5a9830797ead97362a", - "file_size_bytes": 287 - }, - { - "name": "Gp0115673_Gottcha2 TSV report", - "description": "Gottcha2 TSV report for Gp0115673", - "url": "https://data.microbiomedata.org/data/nmdc:mga0kpja70/ReadbasedAnalysis/nmdc_mga0kpja70_gottcha2_report.tsv", - "md5_checksum": "c7b24571b61a33018cf118b5424b787f", - "id": "nmdc:c7b24571b61a33018cf118b5424b787f", - "file_size_bytes": 9782 - }, - { - "name": "Gp0115673_Gottcha2 full TSV report", - "description": "Gottcha2 full TSV report for Gp0115673", - "url": "https://data.microbiomedata.org/data/nmdc:mga0kpja70/ReadbasedAnalysis/nmdc_mga0kpja70_gottcha2_report_full.tsv", - "md5_checksum": "e185734176505343bf4c83c16a0a9fe2", - "id": "nmdc:e185734176505343bf4c83c16a0a9fe2", - "file_size_bytes": 856112 - }, - { - "name": "Gp0115673_Gottcha2 Krona HTML report", - "description": "Gottcha2 Krona HTML report for Gp0115673", - "data_object_type": "GOTTCHA2 Krona Plot", - "url": "https://data.microbiomedata.org/data/nmdc:mga0kpja70/ReadbasedAnalysis/nmdc_mga0kpja70_gottcha2_krona.html", - "md5_checksum": "7c6b0ef44450c747580826a2e218844b", - "id": "nmdc:7c6b0ef44450c747580826a2e218844b", - "file_size_bytes": 255142 - }, - { - "name": "Gp0115673_Centrifuge classification TSV report", - "description": "Centrifuge classification TSV report for Gp0115673", - "data_object_type": "Centrifuge Taxonomic Classification", - "url": "https://data.microbiomedata.org/data/nmdc:mga0kpja70/ReadbasedAnalysis/nmdc_mga0kpja70_centrifuge_classification.tsv", - "md5_checksum": "5b98c377f424d7609f1a09e350cfb837", - "id": "nmdc:5b98c377f424d7609f1a09e350cfb837", - "file_size_bytes": 1218364738 - }, - { - "name": "Gp0115673_Centrifuge TSV report", - "description": "Centrifuge TSV report for Gp0115673", - "data_object_type": "Centrifuge Classification Report", - "url": "https://data.microbiomedata.org/data/nmdc:mga0kpja70/ReadbasedAnalysis/nmdc_mga0kpja70_centrifuge_report.tsv", - "md5_checksum": "b5f7a68a94b356001014d1be024231af", - "id": "nmdc:b5f7a68a94b356001014d1be024231af", - "file_size_bytes": 254923 - }, - { - "name": "Gp0115673_Centrifuge Krona HTML report", - "description": "Centrifuge Krona HTML report for Gp0115673", - "data_object_type": "Centrifuge Krona Plot", - "url": "https://data.microbiomedata.org/data/nmdc:mga0kpja70/ReadbasedAnalysis/nmdc_mga0kpja70_centrifuge_krona.html", - "md5_checksum": "75bca66cfcdd38331c10edbba03fa0d3", - "id": "nmdc:75bca66cfcdd38331c10edbba03fa0d3", - "file_size_bytes": 2323219 - }, - { - "name": "Gp0115673_Kraken classification TSV report", - "description": "Kraken classification TSV report for Gp0115673", - "data_object_type": "Kraken2 Taxonomic Classification", - "url": "https://data.microbiomedata.org/data/nmdc:mga0kpja70/ReadbasedAnalysis/nmdc_mga0kpja70_kraken2_classification.tsv", - "md5_checksum": "35bf579641b2ffb3614098d9811a4968", - "id": "nmdc:35bf579641b2ffb3614098d9811a4968", - "file_size_bytes": 1001134031 - }, - { - "name": "Gp0115673_Kraken2 TSV report", - "description": "Kraken2 TSV report for Gp0115673", - "data_object_type": "Kraken2 Classification Report", - "url": "https://data.microbiomedata.org/data/nmdc:mga0kpja70/ReadbasedAnalysis/nmdc_mga0kpja70_kraken2_report.tsv", - "md5_checksum": "801b79f5442e5bfaa0d15f76786cfbc0", - "id": "nmdc:801b79f5442e5bfaa0d15f76786cfbc0", - "file_size_bytes": 640671 - }, - { - "name": "Gp0115673_Kraken2 Krona HTML report", - "description": "Kraken2 Krona HTML report for Gp0115673", - "data_object_type": "Kraken2 Krona Plot", - "url": "https://data.microbiomedata.org/data/nmdc:mga0kpja70/ReadbasedAnalysis/nmdc_mga0kpja70_kraken2_krona.html", - "md5_checksum": "a7030fa8e9622e3396c2b96448e90c3b", - "id": "nmdc:a7030fa8e9622e3396c2b96448e90c3b", - "file_size_bytes": 3995499 - }, - { - "name": "Gp0115673_Assembled contigs fasta", - "description": "Assembled contigs fasta for Gp0115673", - "data_object_type": "Assembly Contigs", - "url": "https://data.microbiomedata.org/data/nmdc:mga0kpja70/assembly/nmdc_mga0kpja70_contigs.fna", - "md5_checksum": "06d4964c0822abd6f94ca883c122f7ce", - "id": "nmdc:06d4964c0822abd6f94ca883c122f7ce", - "file_size_bytes": 49610158 - }, - { - "name": "Gp0115673_Assembled scaffold fasta", - "description": "Assembled scaffold fasta for Gp0115673", - "data_object_type": "Assembly Scaffolds", - "url": "https://data.microbiomedata.org/data/nmdc:mga0kpja70/assembly/nmdc_mga0kpja70_scaffolds.fna", - "md5_checksum": "bad916c69afe839097650b0b9526a841", - "id": "nmdc:bad916c69afe839097650b0b9526a841", - "file_size_bytes": 49338957 - }, - { - "name": "Gp0115673_Metagenome Contig Coverage Stats", - "description": "Metagenome Contig Coverage Stats for Gp0115673", - "url": "https://data.microbiomedata.org/data/nmdc:mga0kpja70/assembly/nmdc_mga0kpja70_covstats.txt", - "md5_checksum": "a187658f262fa495de43707aabcbf480", - "id": "nmdc:a187658f262fa495de43707aabcbf480", - "file_size_bytes": 7048516 - }, - { - "name": "Gp0115673_Assembled AGP file", - "description": "Assembled AGP file for Gp0115673", - "data_object_type": "Assembly AGP", - "url": "https://data.microbiomedata.org/data/nmdc:mga0kpja70/assembly/nmdc_mga0kpja70_assembly.agp", - "md5_checksum": "c525c04f90889be615025c667908370c", - "id": "nmdc:c525c04f90889be615025c667908370c", - "file_size_bytes": 6557406 - }, - { - "name": "Gp0115673_Metagenome Alignment BAM file", - "description": "Metagenome Alignment BAM file for Gp0115673", - "data_object_type": "Assembly Coverage BAM", - "url": "https://data.microbiomedata.org/data/nmdc:mga0kpja70/assembly/nmdc_mga0kpja70_pairedMapped_sorted.bam", - "md5_checksum": "2e293158750df042be7422826125bef2", - "id": "nmdc:2e293158750df042be7422826125bef2", - "file_size_bytes": 1601507411 - }, - { - "name": "Gp0115673_Protein FAA", - "description": "Protein FAA for Gp0115673", - "data_object_type": "Annotation Amino Acid FASTA", - "url": "https://data.microbiomedata.org/data/nmdc:mga0kpja70/annotation/nmdc_mga0kpja70_proteins.faa", - "md5_checksum": "be3b8decbc48f9588daca36ca4c883ab", - "id": "nmdc:be3b8decbc48f9588daca36ca4c883ab", - "file_size_bytes": 27487621 - }, - { - "name": "Gp0115673_Structural annotation GFF file", - "description": "Structural annotation GFF file for Gp0115673", - "data_object_type": "Structural Annotation GFF", - "url": "https://data.microbiomedata.org/data/nmdc:mga0kpja70/annotation/nmdc_mga0kpja70_structural_annotation.gff", - "md5_checksum": "106c834bb14367ec6154d1b04f2a1021", - "id": "nmdc:106c834bb14367ec6154d1b04f2a1021", - "file_size_bytes": 2505 - }, - { - "name": "Gp0115673_Functional annotation GFF file", - "description": "Functional annotation GFF file for Gp0115673", - "data_object_type": "Functional Annotation GFF", - "url": "https://data.microbiomedata.org/data/nmdc:mga0kpja70/annotation/nmdc_mga0kpja70_functional_annotation.gff", - "md5_checksum": "dfe3eed1eee6d6764ae22a2c6b0209e5", - "id": "nmdc:dfe3eed1eee6d6764ae22a2c6b0209e5", - "file_size_bytes": 30665845 - }, - { - "name": "Gp0115673_KO TSV file", - "description": "KO TSV file for Gp0115673", - "data_object_type": "Annotation KEGG Orthology", - "url": "https://data.microbiomedata.org/data/nmdc:mga0kpja70/annotation/nmdc_mga0kpja70_ko.tsv", - "md5_checksum": "84e3913c75d155fc45f04bc04810063a", - "id": "nmdc:84e3913c75d155fc45f04bc04810063a", - "file_size_bytes": 4142989 - }, - { - "name": "Gp0115673_EC TSV file", - "description": "EC TSV file for Gp0115673", - "data_object_type": "Annotation Enzyme Commission", - "url": "https://data.microbiomedata.org/data/nmdc:mga0kpja70/annotation/nmdc_mga0kpja70_ec.tsv", - "md5_checksum": "418e74fcbe4b97b8d74cb697a3b3feb4", - "id": "nmdc:418e74fcbe4b97b8d74cb697a3b3feb4", - "file_size_bytes": 2665975 - }, - { - "name": "Gp0115673_COG GFF file", - "description": "COG GFF file for Gp0115673", - "url": "https://data.microbiomedata.org/data/nmdc:mga0kpja70/annotation/nmdc_mga0kpja70_cog.gff", - "md5_checksum": "2d57dd06178c83c1f9c4bfaecf34b8b4", - "id": "nmdc:2d57dd06178c83c1f9c4bfaecf34b8b4", - "file_size_bytes": 17716812 - }, - { - "name": "Gp0115673_PFAM GFF file", - "description": "PFAM GFF file for Gp0115673", - "url": "https://data.microbiomedata.org/data/nmdc:mga0kpja70/annotation/nmdc_mga0kpja70_pfam.gff", - "md5_checksum": "42173701162f4fdb727bc4eded48c2a1", - "id": "nmdc:42173701162f4fdb727bc4eded48c2a1", - "file_size_bytes": 14043787 - }, - { - "name": "Gp0115673_TigrFam GFF file", - "description": "TigrFam GFF file for Gp0115673", - "url": "https://data.microbiomedata.org/data/nmdc:mga0kpja70/annotation/nmdc_mga0kpja70_tigrfam.gff", - "md5_checksum": "89b8851da4dca184654a76128048e09a", - "id": "nmdc:89b8851da4dca184654a76128048e09a", - "file_size_bytes": 2009579 - }, - { - "name": "Gp0115673_SMART GFF file", - "description": "SMART GFF file for Gp0115673", - "url": "https://data.microbiomedata.org/data/nmdc:mga0kpja70/annotation/nmdc_mga0kpja70_smart.gff", - "md5_checksum": "e0d0721c6051fb0eebd70635882639c1", - "id": "nmdc:e0d0721c6051fb0eebd70635882639c1", - "file_size_bytes": 3834400 - }, - { - "name": "Gp0115673_SuperFam GFF file", - "description": "SuperFam GFF file for Gp0115673", - "url": "https://data.microbiomedata.org/data/nmdc:mga0kpja70/annotation/nmdc_mga0kpja70_supfam.gff", - "md5_checksum": "e9b0a3709e78dd9dfdba4eff7103c425", - "id": "nmdc:e9b0a3709e78dd9dfdba4eff7103c425", - "file_size_bytes": 22131290 - }, - { - "name": "Gp0115673_Cath FunFam GFF file", - "description": "Cath FunFam GFF file for Gp0115673", - "url": "https://data.microbiomedata.org/data/nmdc:mga0kpja70/annotation/nmdc_mga0kpja70_cath_funfam.gff", - "md5_checksum": "e627abd2dfaee1fbf695de11211c6971", - "id": "nmdc:e627abd2dfaee1fbf695de11211c6971", - "file_size_bytes": 17702997 - }, - { - "name": "Gp0115673_KO_EC GFF file", - "description": "KO_EC GFF file for Gp0115673", - "url": "https://data.microbiomedata.org/data/nmdc:mga0kpja70/annotation/nmdc_mga0kpja70_ko_ec.gff", - "md5_checksum": "a04e32711e814e733114531a666606c6", - "id": "nmdc:a04e32711e814e733114531a666606c6", - "file_size_bytes": 13225993 - }, - { - "name": "gold:Gp0452679_metabat2 bin checkm quality assessment result", - "description": "metabat2 bin checkm quality assessment result for gold:Gp0452679", - "data_object_type": "CheckM Statistics", - "url": "https://data.microbiomedata.org/data/nmdc:mga0fsjy84/MAGs/nmdc_mga0fsjy84_checkm_qa.out", - "md5_checksum": "d41d8cd98f00b204e9800998ecf8427e", - "id": "nmdc:d41d8cd98f00b204e9800998ecf8427e", - "file_size_bytes": 0 - }, - { - "name": "Gp0115673_tooShort (< 3kb) filtered contigs fasta file by metaBat2", - "description": "tooShort (< 3kb) filtered contigs fasta file by metaBat2 for Gp0115673", - "url": "https://data.microbiomedata.org/data/nmdc:mga0kpja70/MAGs/nmdc_mga0kpja70_bins.tooShort.fa", - "md5_checksum": "c907101a9eb50d1e522d1fc11b4d3164", - "id": "nmdc:c907101a9eb50d1e522d1fc11b4d3164", - "file_size_bytes": 35344893 - }, - { - "name": "Gp0115673_unbinned fasta file from metabat2", - "description": "unbinned fasta file from metabat2 for Gp0115673", - "url": "https://data.microbiomedata.org/data/nmdc:mga0kpja70/MAGs/nmdc_mga0kpja70_bins.unbinned.fa", - "md5_checksum": "f80fbdbf31ee0ac76353d59e64b789bc", - "id": "nmdc:f80fbdbf31ee0ac76353d59e64b789bc", - "file_size_bytes": 8810307 - }, - { - "name": "Gp0115673_metabat2 bin checkm quality assessment result", - "description": "metabat2 bin checkm quality assessment result for Gp0115673", - "data_object_type": "CheckM Statistics", - "url": "https://data.microbiomedata.org/data/nmdc:mga0kpja70/MAGs/nmdc_mga0kpja70_checkm_qa.out", - "md5_checksum": "af15089c0cb19ec9bd65f98e59dc94f1", - "id": "nmdc:af15089c0cb19ec9bd65f98e59dc94f1", - "file_size_bytes": 942 - }, - { - "name": "Gp0115673_high-quality and medium-quality bins", - "description": "high-quality and medium-quality bins for Gp0115673", - "data_object_type": "Metagenome Bins", - "url": "https://data.microbiomedata.org/data/nmdc:mga0kpja70/MAGs/nmdc_mga0kpja70_hqmq_bin.zip", - "md5_checksum": "70d3f2afd9f32a2bdaa81a6fc547f6fb", - "id": "nmdc:70d3f2afd9f32a2bdaa81a6fc547f6fb", - "file_size_bytes": 182 - }, - { - "name": "Gp0115673_metabat2 bins", - "description": "metabat2 bins for Gp0115673", - "url": "https://data.microbiomedata.org/data/nmdc:mga0kpja70/MAGs/nmdc_mga0kpja70_metabat_bin.zip", - "md5_checksum": "f40d84a4fc0c87d76c144777f9e8a8ea", - "id": "nmdc:f40d84a4fc0c87d76c144777f9e8a8ea", - "file_size_bytes": 1658458 - }, - { - "_id": { - "$oid": "649b003c1ae706d7b5b14d8c" - }, - "description": "Metagenome Contig Coverage Stats for gold:Gp0115673", - "url": "https://data.microbiomedata.org/data/1781_86091/assembly/mapping_stats.txt", - "file_size_bytes": 6599486, - "type": "nmdc:DataObject", - "id": "nmdc:c8b6932baf9efa891ba3ef22cdfc747f", - "name": "mapping_stats.txt", - "data_object_type": "Assembly Coverage Stats" - }, - { - "_id": { - "$oid": "649b003c1ae706d7b5b14d90" - }, - "description": "Assembled AGP file for gold:Gp0115673", - "url": "https://data.microbiomedata.org/data/1781_86091/assembly/assembly.agp", - "file_size_bytes": 5657846, - "type": "nmdc:DataObject", - "id": "nmdc:8f202f5c73cded42a0ee74842d99d453", - "name": "assembly.agp", - "data_object_type": "Assembly AGP" - }, - { - "_id": { - "$oid": "649b003c1ae706d7b5b14d91" - }, - "description": "Assembled contigs fasta for gold:Gp0115673", - "url": "https://data.microbiomedata.org/data/1781_86091/assembly/assembly_contigs.fna", - "file_size_bytes": 49161128, - "type": "nmdc:DataObject", - "id": "nmdc:b2f2d476b77fca0725cb68b0305ea3b0", - "name": "assembly_contigs.fna", - "data_object_type": "Assembly Contigs" - }, - { - "_id": { - "$oid": "649b003c1ae706d7b5b14d93" - }, - "description": "Metagenome Alignment BAM file for gold:Gp0115673", - "url": "https://data.microbiomedata.org/data/1781_86091/assembly/pairedMapped_sorted.bam", - "file_size_bytes": 1579984662, - "type": "nmdc:DataObject", - "id": "nmdc:7a768ecc03a7f9bf2f48e0ff038e286c", - "name": "pairedMapped_sorted.bam", - "data_object_type": "Assembly Coverage BAM" - }, - { - "_id": { - "$oid": "649b003c1ae706d7b5b14d96" - }, - "description": "Assembled scaffold fasta for gold:Gp0115673", - "url": "https://data.microbiomedata.org/data/1781_86091/assembly/assembly_scaffolds.fna", - "file_size_bytes": 48890657, - "type": "nmdc:DataObject", - "id": "nmdc:af2802220167f0c190a161f58e7140ef", - "name": "assembly_scaffolds.fna", - "data_object_type": "Assembly Scaffolds" - }, - { - "_id": { - "$oid": "649b003d1ae706d7b5b159d4" - }, - "id": "nmdc:54ab9f23cfb3900421112f1c63981d19", - "name": "1781_86091.krona.html", - "description": "Gold:Gp0115673 KRONA plot HTML file", - "url": "https://data.microbiomedata.org/1781_86091/ReadbasedAnalysis/centrifuge/1781_86091.krona.html", - "file_size_bytes": 3385, - "data_object_type": "Centrifuge Krona Plot", - "type": "nmdc:DataObject" - }, - { - "_id": { - "$oid": "649b003d1ae706d7b5b159da" - }, - "id": "nmdc:b92cba553fb3b7f7488f9cf0153170a4", - "name": "1781_86091.json", - "description": "Gold:Gp0115673 ReadbasedAnalysis result JSON file", - "url": "https://data.microbiomedata.org/1781_86091/ReadbasedAnalysis/1781_86091.json", - "file_size_bytes": 3385, - "type": "nmdc:DataObject" - }, - { - "_id": { - "$oid": "649b003f1ae706d7b5b162ab" - }, - "id": "nmdc:64d4b2d627893f1add14860728cce4dd", - "name": "bins.tooShort.fa", - "description": "tooShort (< 3kb) filtered contigs fasta file by metaBat2 for gold:Gp0115673", - "file_size_bytes": 34259785, - "url": "https://data.microbiomedata.org/data/1781_86091/img_MAGs/bins.tooShort.fa", - "type": "nmdc:DataObject" - }, - { - "_id": { - "$oid": "649b003f1ae706d7b5b162ac" - }, - "id": "nmdc:844cbe586fb4d8c7523f5e48bcf269e4", - "name": "bins.unbinned.fa", - "description": "unbinned fasta file from metabat2 for gold:Gp0115673", - "file_size_bytes": 9383451, - "url": "https://data.microbiomedata.org/data/1781_86091/img_MAGs/bins.unbinned.fa", - "type": "nmdc:DataObject" - }, - { - "_id": { - "$oid": "649b003f1ae706d7b5b162ad" - }, - "id": "nmdc:51877a97315cae458f13c66d23bb5938", - "name": "checkm_qa.out", - "description": "metabat2 bin checkm quality assessment result for gold:Gp0115673", - "file_size_bytes": 936, - "url": "https://data.microbiomedata.org/data/1781_86091/img_MAGs/checkm_qa.out", - "type": "nmdc:DataObject", - "data_object_type": "CheckM Statistics" - }, - { - "_id": { - "$oid": "649b003f1ae706d7b5b162ae" - }, - "id": "nmdc:9234a7d807019d0678be49a2b0bf8902", - "name": "gold:Gp0115673.bins.1.fa", - "description": "metabat2 binned contig file for gold:Gp0115673", - "file_size_bytes": 3819274, - "url": "https://data.microbiomedata.org/data/1781_86091/img_MAGs/metabat-bins/bins.1.fa", - "type": "nmdc:DataObject", - "data_object_type": "Metagenome Bins" - }, - { - "_id": { - "$oid": "649b003f1ae706d7b5b162af" - }, - "id": "nmdc:99160995b52b9234959f882fee6d2a6b", - "name": "gold:Gp0115673.bins.2.fa", - "description": "metabat2 binned contig file for gold:Gp0115673", - "file_size_bytes": 991444, - "url": "https://data.microbiomedata.org/data/1781_86091/img_MAGs/metabat-bins/bins.2.fa", - "type": "nmdc:DataObject", - "data_object_type": "Metagenome Bins" - }, - { - "_id": { - "$oid": "649b00401ae706d7b5b16d6b" - }, - "description": "Structural annotation GFF file for gold:Gp0115673", - "url": "https://data.microbiomedata.org/1781_86091/img_annotation/Ga0482254_structural_annotation.gff", - "md5_checksum": "2fba563f11988f4e30d2b4283c3c5487", - "file_size_bytes": 3385, - "id": "nmdc:2fba563f11988f4e30d2b4283c3c5487", - "name": "gold:Gp0115673_Structural annotation GFF file", - "data_object_type": "Structural Annotation GFF", - "type": "nmdc:DataObject" - }, - { - "_id": { - "$oid": "649b00401ae706d7b5b16d6d" - }, - "description": "EC TSV File for gold:Gp0115673", - "url": "https://data.microbiomedata.org/1781_86091/img_annotation/Ga0482254_ec.tsv", - "md5_checksum": "da4d331daa6d5965be8e201c3c9ba4d4", - "file_size_bytes": 3385, - "id": "nmdc:da4d331daa6d5965be8e201c3c9ba4d4", - "name": "gold:Gp0115673_EC TSV File", - "data_object_type": "Annotation Enzyme Commission", - "type": "nmdc:DataObject" - }, - { - "_id": { - "$oid": "649b00401ae706d7b5b16d93" - }, - "description": "Functional annotation GFF file for gold:Gp0115673", - "url": "https://data.microbiomedata.org/1781_86091/img_annotation/Ga0482254_functional_annotation.gff", - "md5_checksum": "b7264d7a1c56fc32c4a0c050fe04208e", - "file_size_bytes": 3385, - "id": "nmdc:b7264d7a1c56fc32c4a0c050fe04208e", - "name": "gold:Gp0115673_Functional annotation GFF file", - "data_object_type": "Functional Annotation GFF", - "type": "nmdc:DataObject" - }, - { - "_id": { - "$oid": "649b00401ae706d7b5b16dbb" - }, - "description": "Protein FAA for gold:Gp0115673", - "url": "https://data.microbiomedata.org/1781_86091/img_annotation/Ga0482254_proteins.faa", - "md5_checksum": "d325906b9b82b3bfc2fe8ed7321a828e", - "file_size_bytes": 3385, - "id": "nmdc:d325906b9b82b3bfc2fe8ed7321a828e", - "name": "gold:Gp0115673_Protein FAA", - "data_object_type": "Annotation Amino Acid FASTA", - "type": "nmdc:DataObject" - }, - { - "_id": { - "$oid": "649b00401ae706d7b5b16dd0" - }, - "description": "KO TSV File for gold:Gp0115673", - "url": "https://data.microbiomedata.org/1781_86091/img_annotation/Ga0482254_ko.tsv", - "md5_checksum": "73cac6bcbfa2627ab291bf230ded9748", - "file_size_bytes": 3385, - "id": "nmdc:73cac6bcbfa2627ab291bf230ded9748", - "name": "gold:Gp0115673_KO TSV File", - "data_object_type": "Annotation KEGG Orthology", - "type": "nmdc:DataObject" - } - ], - "dissolving_activity_set": [], - "functional_annotation_set": [], - "genome_feature_set": [], - "mags_activity_set": [ - { - "_id": { - "$oid": "649b0052ec087f6bbab3471f" - }, - "has_input": [ - "nmdc:06d4964c0822abd6f94ca883c122f7ce", - "nmdc:2e293158750df042be7422826125bef2", - "nmdc:dfe3eed1eee6d6764ae22a2c6b0209e5" - ], - "too_short_contig_num": 83787, - "part_of": [ - "nmdc:mga0kpja70" - ], - "binned_contig_num": 890, - "git_url": "https://github.com/microbiomedata/metaG/releases/tag/0.1", - "has_output": [ - "nmdc:d41d8cd98f00b204e9800998ecf8427e", - "nmdc:c907101a9eb50d1e522d1fc11b4d3164", - "nmdc:f80fbdbf31ee0ac76353d59e64b789bc", - "nmdc:af15089c0cb19ec9bd65f98e59dc94f1", - "nmdc:70d3f2afd9f32a2bdaa81a6fc547f6fb", - "nmdc:f40d84a4fc0c87d76c144777f9e8a8ea" - ], - "was_informed_by": "gold:Gp0115673", - "input_contig_num": 89806, - "id": "nmdc:7ae51c3485db8a27225f08083565b28e", - "execution_resource": "NERSC-Cori", - "name": "MAGs Analysis Activity for nmdc:mga0kpja70", - "mags_list": [ - { - "number_of_contig": 67, - "completeness": 12.5, - "bin_name": "bins.1", - "gene_count": 318, - "bin_quality": "LQ", - "gtdbtk_species": "", - "gtdbtk_order": "", - "num_16s": 0, - "gtdbtk_family": "", - "gtdbtk_domain": "", - "contamination": 0.0, - "gtdbtk_class": "", - "gtdbtk_phylum": "", - "num_5s": 0, - "num_23s": 0, - "gtdbtk_genus": "", - "num_t_rna": 4 - }, - { - "number_of_contig": 823, - "completeness": 97.81, - "bin_name": "bins.2", - "gene_count": 5828, - "bin_quality": "LQ", - "gtdbtk_species": "", - "gtdbtk_order": "", - "num_16s": 1, - "gtdbtk_family": "", - "gtdbtk_domain": "", - "contamination": 66.19, - "gtdbtk_class": "", - "gtdbtk_phylum": "", - "num_5s": 0, - "num_23s": 0, - "gtdbtk_genus": "", - "num_t_rna": 63 - } - ], - "unbinned_contig_num": 5129, - "started_at_time": "2021-10-11T02:28:36Z", - "type": "nmdc:MAGsAnalysisActivity", - "ended_at_time": "2021-10-11T03:32:43+00:00" - } - ], - "material_sample_set": [], - "material_sampling_activity_set": [], - "metabolomics_analysis_activity_set": [], - "metagenome_annotation_activity_set": [ - { - "_id": { - "$oid": "649b005bbf2caae0415ef9c0" - }, - "has_input": [ - "nmdc:06d4964c0822abd6f94ca883c122f7ce" - ], - "part_of": [ - "nmdc:mga0kpja70" - ], - "git_url": "https://github.com/microbiomedata/metaG/releases/tag/0.1", - "has_output": [ - "nmdc:be3b8decbc48f9588daca36ca4c883ab", - "nmdc:106c834bb14367ec6154d1b04f2a1021", - "nmdc:dfe3eed1eee6d6764ae22a2c6b0209e5", - "nmdc:84e3913c75d155fc45f04bc04810063a", - "nmdc:418e74fcbe4b97b8d74cb697a3b3feb4", - "nmdc:2d57dd06178c83c1f9c4bfaecf34b8b4", - "nmdc:42173701162f4fdb727bc4eded48c2a1", - "nmdc:89b8851da4dca184654a76128048e09a", - "nmdc:e0d0721c6051fb0eebd70635882639c1", - "nmdc:e9b0a3709e78dd9dfdba4eff7103c425", - "nmdc:e627abd2dfaee1fbf695de11211c6971", - "nmdc:a04e32711e814e733114531a666606c6" - ], - "was_informed_by": "gold:Gp0115673", - "id": "nmdc:7ae51c3485db8a27225f08083565b28e", - "execution_resource": "NERSC-Cori", - "name": "Annotation Activity for nmdc:mga0kpja70", - "started_at_time": "2021-10-11T02:28:36Z", - "type": "nmdc:MetagenomeAnnotationActivity", - "ended_at_time": "2021-10-11T03:32:43+00:00" - } - ], - "metagenome_assembly_set": [ - { - "_id": { - "$oid": "649b005f2ca5ee4adb139fa8" - }, - "has_input": [ - "nmdc:268918f610926421d2af43f175553680" - ], - "part_of": [ - "nmdc:mga0kpja70" - ], - "ctg_logsum": 160283, - "scaf_logsum": 161291, - "gap_pct": 0.0036, - "git_url": "https://github.com/microbiomedata/metaG/releases/tag/0.1", - "has_output": [ - "nmdc:06d4964c0822abd6f94ca883c122f7ce", - "nmdc:bad916c69afe839097650b0b9526a841", - "nmdc:a187658f262fa495de43707aabcbf480", - "nmdc:c525c04f90889be615025c667908370c", - "nmdc:2e293158750df042be7422826125bef2" - ], - "asm_score": 6.419, - "was_informed_by": "gold:Gp0115673", - "ctg_powsum": 18694, - "scaf_max": 39252, - "id": "nmdc:7ae51c3485db8a27225f08083565b28e", - "scaf_powsum": 18825, - "execution_resource": "NERSC-Cori", - "contigs": 89808, - "name": "Assembly Activity for nmdc:mga0kpja70", - "ctg_max": 39252, - "gc_std": 0.11246, - "contig_bp": 46120517, - "gc_avg": 0.55483, - "started_at_time": "2021-10-11T02:28:36Z", - "scaf_bp": 46122177, - "type": "nmdc:MetagenomeAssembly", - "scaffolds": 89660, - "ended_at_time": "2021-10-11T03:32:43+00:00", - "ctg_l50": 493, - "ctg_l90": 286, - "ctg_n50": 19910, - "ctg_n90": 73487, - "scaf_l50": 494, - "scaf_l90": 286, - "scaf_n50": 19797, - "scaf_n90": 73347 - } - ], - "metagenome_sequencing_activity_set": [], - "metaproteomics_analysis_activity_set": [], - "metatranscriptome_activity_set": [], - "nom_analysis_activity_set": [], - "omics_processing_set": [ - { - "_id": { - "$oid": "649b009773e8249959349b4c" - }, - "id": "nmdc:omprc-11-hk1bje46", - "name": "Sand microcosm microbial communities from a hyporheic zone in Columbia River, Washington, USA - GW-RW T4_2-Sept-14", - "description": "Sterilized sand packs were incubated back in the ground and collected at time point T4.", - "has_input": [ - "nmdc:bsm-11-5h7px351" - ], - "has_output": [ - "jgi:55d817f70d8785342fcf8270" - ], - "part_of": [ - "nmdc:sty-11-aygzgv51" - ], - "add_date": "2015-05-28", - "mod_date": "2021-06-15", - "ncbi_project_name": "Sand microcosm microbial communities from a hyporheic zone in Columbia River, Washington, USA - GW-RW T4_2-Sept-14", - "omics_type": { - "has_raw_value": "Metagenome" - }, - "principal_investigator": { - "has_raw_value": "James Stegen" - }, - "processing_institution": "JGI", - "type": "nmdc:OmicsProcessing", - "gold_sequencing_project_identifiers": [ - "gold:Gp0115673" - ] - } - ], - "reaction_activity_set": [], - "read_qc_analysis_activity_set": [ - { - "_id": { - "$oid": "649b009d6bdd4fd20273c876" - }, - "has_input": [ - "nmdc:3783bc4ce3716b6d299533bc3f6591b6" - ], - "part_of": [ - "nmdc:mga0kpja70" - ], - "git_url": "https://github.com/microbiomedata/metaG/releases/tag/0.1", - "has_output": [ - "nmdc:268918f610926421d2af43f175553680", - "nmdc:4610980cf3558f5a9830797ead97362a" - ], - "was_informed_by": "gold:Gp0115673", - "input_read_count": 17796788, - "output_read_bases": 2520029380, - "id": "nmdc:7ae51c3485db8a27225f08083565b28e", - "execution_resource": "NERSC-Cori", - "input_read_bases": 2687314988, - "name": "Read QC Activity for nmdc:mga0kpja70", - "output_read_count": 16817496, - "started_at_time": "2021-10-11T02:28:36Z", - "type": "nmdc:ReadQCAnalysisActivity", - "ended_at_time": "2021-10-11T03:32:43+00:00" - } - ], - "read_based_taxonomy_analysis_activity_set": [ - { - "_id": { - "$oid": "649b009bff710ae353f8cf44" - }, - "has_input": [ - "nmdc:268918f610926421d2af43f175553680" - ], - "git_url": "https://github.com/microbiomedata/metaG/releases/tag/0.1", - "has_output": [ - "nmdc:c7b24571b61a33018cf118b5424b787f", - "nmdc:e185734176505343bf4c83c16a0a9fe2", - "nmdc:7c6b0ef44450c747580826a2e218844b", - "nmdc:5b98c377f424d7609f1a09e350cfb837", - "nmdc:b5f7a68a94b356001014d1be024231af", - "nmdc:75bca66cfcdd38331c10edbba03fa0d3", - "nmdc:35bf579641b2ffb3614098d9811a4968", - "nmdc:801b79f5442e5bfaa0d15f76786cfbc0", - "nmdc:a7030fa8e9622e3396c2b96448e90c3b" - ], - "was_informed_by": "gold:Gp0115673", - "id": "nmdc:7ae51c3485db8a27225f08083565b28e", - "execution_resource": "NERSC-Cori", - "name": "ReadBased Analysis Activity for nmdc:mga0kpja70", - "started_at_time": "2021-10-11T02:28:36Z", - "type": "nmdc:ReadbasedAnalysis", - "ended_at_time": "2021-10-11T03:32:43+00:00" - } - ], - "study_set": [], - "field_research_site_set": [], - "collecting_biosamples_from_site_set": [], - "date_created": null, - "etl_software_version": null, - "pooling_set": [] - }, - { - "functional_annotation_agg": [], - "library_preparation_set": [], - "processed_sample_set": [], - "extraction_set": [], - "activity_set": [], - "biosample_set": [], - "data_object_set": [ - { - "description": "Raw sequencer read data", - "file_size_bytes": 2065080622, - "type": "nmdc:DataObject", - "id": "jgi:55d817fa0d8785342fcf8272", - "name": "9387.2.132031.ATGTCA.fastq.gz" - }, - { - "name": "Gp0115671_Filtered Reads", - "description": "Filtered Reads for Gp0115671", - "data_object_type": "Filtered Sequencing Reads", - "url": "https://data.microbiomedata.org/data/nmdc:mga0rw1351/qa/nmdc_mga0rw1351_filtered.fastq.gz", - "md5_checksum": "445f37bc3019e9fe3b29a2ac5bcbfc9c", - "id": "nmdc:445f37bc3019e9fe3b29a2ac5bcbfc9c", - "file_size_bytes": 1806996776 - }, - { - "name": "Gp0115671_Filtered Stats", - "description": "Filtered Stats for Gp0115671", - "data_object_type": "QC Statistics", - "url": "https://data.microbiomedata.org/data/nmdc:mga0rw1351/qa/nmdc_mga0rw1351_filterStats.txt", - "md5_checksum": "24440b4c5534da30eee650b68eccda84", - "id": "nmdc:24440b4c5534da30eee650b68eccda84", - "file_size_bytes": 289 - }, - { - "name": "Gp0115671_Gottcha2 TSV report", - "description": "Gottcha2 TSV report for Gp0115671", - "url": "https://data.microbiomedata.org/data/nmdc:mga0rw1351/ReadbasedAnalysis/nmdc_mga0rw1351_gottcha2_report.tsv", - "md5_checksum": "358559c32b69eff51758db66ac01021b", - "id": "nmdc:358559c32b69eff51758db66ac01021b", - "file_size_bytes": 11833 - }, - { - "name": "Gp0115671_Gottcha2 full TSV report", - "description": "Gottcha2 full TSV report for Gp0115671", - "url": "https://data.microbiomedata.org/data/nmdc:mga0rw1351/ReadbasedAnalysis/nmdc_mga0rw1351_gottcha2_report_full.tsv", - "md5_checksum": "befbd648249c2871bd27999120e50bf7", - "id": "nmdc:befbd648249c2871bd27999120e50bf7", - "file_size_bytes": 888177 - }, - { - "name": "Gp0115671_Gottcha2 Krona HTML report", - "description": "Gottcha2 Krona HTML report for Gp0115671", - "data_object_type": "GOTTCHA2 Krona Plot", - "url": "https://data.microbiomedata.org/data/nmdc:mga0rw1351/ReadbasedAnalysis/nmdc_mga0rw1351_gottcha2_krona.html", - "md5_checksum": "cacb8f623a808d0cae094d46f2801dd3", - "id": "nmdc:cacb8f623a808d0cae094d46f2801dd3", - "file_size_bytes": 261703 - }, - { - "name": "Gp0115671_Centrifuge classification TSV report", - "description": "Centrifuge classification TSV report for Gp0115671", - "data_object_type": "Centrifuge Taxonomic Classification", - "url": "https://data.microbiomedata.org/data/nmdc:mga0rw1351/ReadbasedAnalysis/nmdc_mga0rw1351_centrifuge_classification.tsv", - "md5_checksum": "1b15ffb745e320a9bf0cac7e672e974b", - "id": "nmdc:1b15ffb745e320a9bf0cac7e672e974b", - "file_size_bytes": 1474970402 - }, - { - "name": "Gp0115671_Centrifuge TSV report", - "description": "Centrifuge TSV report for Gp0115671", - "data_object_type": "Centrifuge Classification Report", - "url": "https://data.microbiomedata.org/data/nmdc:mga0rw1351/ReadbasedAnalysis/nmdc_mga0rw1351_centrifuge_report.tsv", - "md5_checksum": "90b77c7118bf6ec1f99836a50d562a7f", - "id": "nmdc:90b77c7118bf6ec1f99836a50d562a7f", - "file_size_bytes": 255777 - }, - { - "name": "Gp0115671_Centrifuge Krona HTML report", - "description": "Centrifuge Krona HTML report for Gp0115671", - "data_object_type": "Centrifuge Krona Plot", - "url": "https://data.microbiomedata.org/data/nmdc:mga0rw1351/ReadbasedAnalysis/nmdc_mga0rw1351_centrifuge_krona.html", - "md5_checksum": "e0736ff520260ba2097c02b9e767362c", - "id": "nmdc:e0736ff520260ba2097c02b9e767362c", - "file_size_bytes": 2329875 - }, - { - "name": "Gp0115671_Kraken classification TSV report", - "description": "Kraken classification TSV report for Gp0115671", - "data_object_type": "Kraken2 Taxonomic Classification", - "url": "https://data.microbiomedata.org/data/nmdc:mga0rw1351/ReadbasedAnalysis/nmdc_mga0rw1351_kraken2_classification.tsv", - "md5_checksum": "a00960655f9e80726fdb0fade1bec958", - "id": "nmdc:a00960655f9e80726fdb0fade1bec958", - "file_size_bytes": 1213240496 - }, - { - "name": "Gp0115671_Kraken2 TSV report", - "description": "Kraken2 TSV report for Gp0115671", - "data_object_type": "Kraken2 Classification Report", - "url": "https://data.microbiomedata.org/data/nmdc:mga0rw1351/ReadbasedAnalysis/nmdc_mga0rw1351_kraken2_report.tsv", - "md5_checksum": "366bf195f71d2c35a9b47c0f29381e85", - "id": "nmdc:366bf195f71d2c35a9b47c0f29381e85", - "file_size_bytes": 659715 - }, - { - "name": "Gp0115671_Kraken2 Krona HTML report", - "description": "Kraken2 Krona HTML report for Gp0115671", - "data_object_type": "Kraken2 Krona Plot", - "url": "https://data.microbiomedata.org/data/nmdc:mga0rw1351/ReadbasedAnalysis/nmdc_mga0rw1351_kraken2_krona.html", - "md5_checksum": "e111cd4927f6736e5de6f6e81e7e6d72", - "id": "nmdc:e111cd4927f6736e5de6f6e81e7e6d72", - "file_size_bytes": 4010701 - }, - { - "name": "Gp0115671_Assembled contigs fasta", - "description": "Assembled contigs fasta for Gp0115671", - "data_object_type": "Assembly Contigs", - "url": "https://data.microbiomedata.org/data/nmdc:mga0rw1351/assembly/nmdc_mga0rw1351_contigs.fna", - "md5_checksum": "0a1ebd847e3bb8f928ef491497f8355b", - "id": "nmdc:0a1ebd847e3bb8f928ef491497f8355b", - "file_size_bytes": 58744710 - }, - { - "name": "Gp0115671_Assembled scaffold fasta", - "description": "Assembled scaffold fasta for Gp0115671", - "data_object_type": "Assembly Scaffolds", - "url": "https://data.microbiomedata.org/data/nmdc:mga0rw1351/assembly/nmdc_mga0rw1351_scaffolds.fna", - "md5_checksum": "be4cab04a701bce0ed99605109bd5d6f", - "id": "nmdc:be4cab04a701bce0ed99605109bd5d6f", - "file_size_bytes": 58382380 - }, - { - "name": "Gp0115671_Metagenome Contig Coverage Stats", - "description": "Metagenome Contig Coverage Stats for Gp0115671", - "url": "https://data.microbiomedata.org/data/nmdc:mga0rw1351/assembly/nmdc_mga0rw1351_covstats.txt", - "md5_checksum": "cc4d3160618a82f81518bdc97ce1f5e2", - "id": "nmdc:cc4d3160618a82f81518bdc97ce1f5e2", - "file_size_bytes": 9464710 - }, - { - "name": "Gp0115671_Assembled AGP file", - "description": "Assembled AGP file for Gp0115671", - "data_object_type": "Assembly AGP", - "url": "https://data.microbiomedata.org/data/nmdc:mga0rw1351/assembly/nmdc_mga0rw1351_assembly.agp", - "md5_checksum": "473ca208ab97399a644c8e5326e765e5", - "id": "nmdc:473ca208ab97399a644c8e5326e765e5", - "file_size_bytes": 8820452 - }, - { - "name": "Gp0115671_Metagenome Alignment BAM file", - "description": "Metagenome Alignment BAM file for Gp0115671", - "data_object_type": "Assembly Coverage BAM", - "url": "https://data.microbiomedata.org/data/nmdc:mga0rw1351/assembly/nmdc_mga0rw1351_pairedMapped_sorted.bam", - "md5_checksum": "69371e513bebd1069a0ed26cc2c914cb", - "id": "nmdc:69371e513bebd1069a0ed26cc2c914cb", - "file_size_bytes": 1938214126 - }, - { - "name": "Gp0115671_Protein FAA", - "description": "Protein FAA for Gp0115671", - "data_object_type": "Annotation Amino Acid FASTA", - "url": "https://data.microbiomedata.org/data/nmdc:mga0rw1351/annotation/nmdc_mga0rw1351_proteins.faa", - "md5_checksum": "147b97234576ba123a9f3c63eb249ecf", - "id": "nmdc:147b97234576ba123a9f3c63eb249ecf", - "file_size_bytes": 32911597 - }, - { - "name": "Gp0115671_Structural annotation GFF file", - "description": "Structural annotation GFF file for Gp0115671", - "data_object_type": "Structural Annotation GFF", - "url": "https://data.microbiomedata.org/data/nmdc:mga0rw1351/annotation/nmdc_mga0rw1351_structural_annotation.gff", - "md5_checksum": "3e037f5f744c9f8e4aa355222cc620ae", - "id": "nmdc:3e037f5f744c9f8e4aa355222cc620ae", - "file_size_bytes": 2516 - }, - { - "name": "Gp0115671_Functional annotation GFF file", - "description": "Functional annotation GFF file for Gp0115671", - "data_object_type": "Functional Annotation GFF", - "url": "https://data.microbiomedata.org/data/nmdc:mga0rw1351/annotation/nmdc_mga0rw1351_functional_annotation.gff", - "md5_checksum": "10d19849864ecdb722335200d0607bbe", - "id": "nmdc:10d19849864ecdb722335200d0607bbe", - "file_size_bytes": 38009425 - }, - { - "name": "Gp0115671_KO TSV file", - "description": "KO TSV file for Gp0115671", - "data_object_type": "Annotation KEGG Orthology", - "url": "https://data.microbiomedata.org/data/nmdc:mga0rw1351/annotation/nmdc_mga0rw1351_ko.tsv", - "md5_checksum": "0ce9fa5958b6445f7be463538e89e9b1", - "id": "nmdc:0ce9fa5958b6445f7be463538e89e9b1", - "file_size_bytes": 4994549 - }, - { - "name": "Gp0115671_EC TSV file", - "description": "EC TSV file for Gp0115671", - "data_object_type": "Annotation Enzyme Commission", - "url": "https://data.microbiomedata.org/data/nmdc:mga0rw1351/annotation/nmdc_mga0rw1351_ec.tsv", - "md5_checksum": "a3bc059d9350034f835be4e754486c73", - "id": "nmdc:a3bc059d9350034f835be4e754486c73", - "file_size_bytes": 3207987 - }, - { - "name": "Gp0115671_COG GFF file", - "description": "COG GFF file for Gp0115671", - "url": "https://data.microbiomedata.org/data/nmdc:mga0rw1351/annotation/nmdc_mga0rw1351_cog.gff", - "md5_checksum": "da9866461051130a44f0982b1a65c061", - "id": "nmdc:da9866461051130a44f0982b1a65c061", - "file_size_bytes": 21138081 - }, - { - "name": "Gp0115671_PFAM GFF file", - "description": "PFAM GFF file for Gp0115671", - "url": "https://data.microbiomedata.org/data/nmdc:mga0rw1351/annotation/nmdc_mga0rw1351_pfam.gff", - "md5_checksum": "676fff23fb641ee8af8a2b948fc5b46e", - "id": "nmdc:676fff23fb641ee8af8a2b948fc5b46e", - "file_size_bytes": 16269399 - }, - { - "name": "Gp0115671_TigrFam GFF file", - "description": "TigrFam GFF file for Gp0115671", - "url": "https://data.microbiomedata.org/data/nmdc:mga0rw1351/annotation/nmdc_mga0rw1351_tigrfam.gff", - "md5_checksum": "a4aa56158a292b63078eb029ed1d90a9", - "id": "nmdc:a4aa56158a292b63078eb029ed1d90a9", - "file_size_bytes": 2189740 - }, - { - "name": "Gp0115671_SMART GFF file", - "description": "SMART GFF file for Gp0115671", - "url": "https://data.microbiomedata.org/data/nmdc:mga0rw1351/annotation/nmdc_mga0rw1351_smart.gff", - "md5_checksum": "6a28f85e8b5addccb429cc7f8964e496", - "id": "nmdc:6a28f85e8b5addccb429cc7f8964e496", - "file_size_bytes": 4669463 - }, - { - "name": "Gp0115671_SuperFam GFF file", - "description": "SuperFam GFF file for Gp0115671", - "url": "https://data.microbiomedata.org/data/nmdc:mga0rw1351/annotation/nmdc_mga0rw1351_supfam.gff", - "md5_checksum": "d5b21cce7406ab46611c49dc1ab658ed", - "id": "nmdc:d5b21cce7406ab46611c49dc1ab658ed", - "file_size_bytes": 26589549 - }, - { - "name": "Gp0115671_Cath FunFam GFF file", - "description": "Cath FunFam GFF file for Gp0115671", - "url": "https://data.microbiomedata.org/data/nmdc:mga0rw1351/annotation/nmdc_mga0rw1351_cath_funfam.gff", - "md5_checksum": "8ead1ab881fd48527d853b0d0601b4bc", - "id": "nmdc:8ead1ab881fd48527d853b0d0601b4bc", - "file_size_bytes": 20889965 - }, - { - "name": "Gp0115671_KO_EC GFF file", - "description": "KO_EC GFF file for Gp0115671", - "url": "https://data.microbiomedata.org/data/nmdc:mga0rw1351/annotation/nmdc_mga0rw1351_ko_ec.gff", - "md5_checksum": "ad206c1031a6f0a7805034dee03ff889", - "id": "nmdc:ad206c1031a6f0a7805034dee03ff889", - "file_size_bytes": 15914575 - }, - { - "name": "gold:Gp0452679_metabat2 bin checkm quality assessment result", - "description": "metabat2 bin checkm quality assessment result for gold:Gp0452679", - "data_object_type": "CheckM Statistics", - "url": "https://data.microbiomedata.org/data/nmdc:mga0fsjy84/MAGs/nmdc_mga0fsjy84_checkm_qa.out", - "md5_checksum": "d41d8cd98f00b204e9800998ecf8427e", - "id": "nmdc:d41d8cd98f00b204e9800998ecf8427e", - "file_size_bytes": 0 - }, - { - "name": "Gp0115671_tooShort (< 3kb) filtered contigs fasta file by metaBat2", - "description": "tooShort (< 3kb) filtered contigs fasta file by metaBat2 for Gp0115671", - "url": "https://data.microbiomedata.org/data/nmdc:mga0rw1351/MAGs/nmdc_mga0rw1351_bins.tooShort.fa", - "md5_checksum": "57fd559aaca7b976f3b38bb1a3ce362b", - "id": "nmdc:57fd559aaca7b976f3b38bb1a3ce362b", - "file_size_bytes": 48167943 - }, - { - "name": "Gp0115671_unbinned fasta file from metabat2", - "description": "unbinned fasta file from metabat2 for Gp0115671", - "url": "https://data.microbiomedata.org/data/nmdc:mga0rw1351/MAGs/nmdc_mga0rw1351_bins.unbinned.fa", - "md5_checksum": "43a900225e93216944b4eec3a01f7db7", - "id": "nmdc:43a900225e93216944b4eec3a01f7db7", - "file_size_bytes": 9124730 - }, - { - "name": "Gp0115671_metabat2 bin checkm quality assessment result", - "description": "metabat2 bin checkm quality assessment result for Gp0115671", - "data_object_type": "CheckM Statistics", - "url": "https://data.microbiomedata.org/data/nmdc:mga0rw1351/MAGs/nmdc_mga0rw1351_checkm_qa.out", - "md5_checksum": "cad0e18a4d2c4067a2724f41e449cb86", - "id": "nmdc:cad0e18a4d2c4067a2724f41e449cb86", - "file_size_bytes": 1014 - }, - { - "name": "Gp0115671_high-quality and medium-quality bins", - "description": "high-quality and medium-quality bins for Gp0115671", - "data_object_type": "Metagenome Bins", - "url": "https://data.microbiomedata.org/data/nmdc:mga0rw1351/MAGs/nmdc_mga0rw1351_hqmq_bin.zip", - "md5_checksum": "55577aa26faf185b3b3f4c78711e7715", - "id": "nmdc:55577aa26faf185b3b3f4c78711e7715", - "file_size_bytes": 182 - }, - { - "name": "Gp0115671_metabat2 bins", - "description": "metabat2 bins for Gp0115671", - "url": "https://data.microbiomedata.org/data/nmdc:mga0rw1351/MAGs/nmdc_mga0rw1351_metabat_bin.zip", - "md5_checksum": "c484ee1e530a0c9b47069c0288110e47", - "id": "nmdc:c484ee1e530a0c9b47069c0288110e47", - "file_size_bytes": 444082 - }, - { - "_id": { - "$oid": "649b003c1ae706d7b5b14d83" - }, - "description": "Metagenome Contig Coverage Stats for gold:Gp0115671", - "url": "https://data.microbiomedata.org/data/1781_86095/assembly/mapping_stats.txt", - "file_size_bytes": 8863080, - "type": "nmdc:DataObject", - "id": "nmdc:b422a9fcf9c3fb738a67d9b007e6e063", - "name": "mapping_stats.txt", - "data_object_type": "Assembly Coverage Stats" - }, - { - "_id": { - "$oid": "649b003c1ae706d7b5b14d86" - }, - "description": "Assembled contigs fasta for gold:Gp0115671", - "url": "https://data.microbiomedata.org/data/1781_86095/assembly/assembly_contigs.fna", - "file_size_bytes": 58143080, - "type": "nmdc:DataObject", - "id": "nmdc:cbbbd9da9ae7fc0d7cd3ad507977a0fe", - "name": "assembly_contigs.fna", - "data_object_type": "Assembly Contigs" - }, - { - "_id": { - "$oid": "649b003c1ae706d7b5b14d88" - }, - "description": "Metagenome Alignment BAM file for gold:Gp0115671", - "url": "https://data.microbiomedata.org/data/1781_86095/assembly/pairedMapped_sorted.bam", - "file_size_bytes": 1911271746, - "type": "nmdc:DataObject", - "id": "nmdc:0594950317ff722111f4ffd3a11304ab", - "name": "pairedMapped_sorted.bam", - "data_object_type": "Assembly Coverage BAM" - }, - { - "_id": { - "$oid": "649b003c1ae706d7b5b14d89" - }, - "description": "Assembled scaffold fasta for gold:Gp0115671", - "url": "https://data.microbiomedata.org/data/1781_86095/assembly/assembly_scaffolds.fna", - "file_size_bytes": 57781170, - "type": "nmdc:DataObject", - "id": "nmdc:84d2fa8698a27a1b5b5e493494863296", - "name": "assembly_scaffolds.fna", - "data_object_type": "Assembly Scaffolds" - }, - { - "_id": { - "$oid": "649b003c1ae706d7b5b14d8a" - }, - "description": "Assembled AGP file for gold:Gp0115671", - "url": "https://data.microbiomedata.org/data/1781_86095/assembly/assembly.agp", - "file_size_bytes": 7616352, - "type": "nmdc:DataObject", - "id": "nmdc:49e748d2c3a8f4aaeb65019da319287e", - "name": "assembly.agp", - "data_object_type": "Assembly AGP" - }, - { - "_id": { - "$oid": "649b003d1ae706d7b5b159c0" - }, - "id": "nmdc:dd2f65c7b8ae6d5b3348968d354fb744", - "name": "1781_86095.krona.html", - "description": "Gold:Gp0115671 KRONA plot HTML file", - "url": "https://data.microbiomedata.org/1781_86095/ReadbasedAnalysis/centrifuge/1781_86095.krona.html", - "file_size_bytes": 3385, - "data_object_type": "Centrifuge Krona Plot", - "type": "nmdc:DataObject" - }, - { - "_id": { - "$oid": "649b003d1ae706d7b5b159cd" - }, - "id": "nmdc:60800e393e9c757603261909577320b6", - "name": "1781_86095.json", - "description": "Gold:Gp0115671 ReadbasedAnalysis result JSON file", - "url": "https://data.microbiomedata.org/1781_86095/ReadbasedAnalysis/1781_86095.json", - "file_size_bytes": 3385, - "type": "nmdc:DataObject" - }, - { - "_id": { - "$oid": "649b003f1ae706d7b5b16292" - }, - "id": "nmdc:0f654b06229134dbe8dca13a709b9575", - "name": "bins.tooShort.fa", - "description": "tooShort (< 3kb) filtered contigs fasta file by metaBat2 for gold:Gp0115671", - "file_size_bytes": 46779370, - "url": "https://data.microbiomedata.org/data/1781_86095/img_MAGs/bins.tooShort.fa", - "type": "nmdc:DataObject" - }, - { - "_id": { - "$oid": "649b003f1ae706d7b5b16295" - }, - "id": "nmdc:1bf83c5dc0174021cd428b7354033bb8", - "name": "bins.unbinned.fa", - "description": "unbinned fasta file from metabat2 for gold:Gp0115671", - "file_size_bytes": 9883327, - "url": "https://data.microbiomedata.org/data/1781_86095/img_MAGs/bins.unbinned.fa", - "type": "nmdc:DataObject" - }, - { - "_id": { - "$oid": "649b003f1ae706d7b5b16296" - }, - "id": "nmdc:8c12b5ddb4bda80c1c255b2c887afb34", - "name": "gold:Gp0115671.bins.1.fa", - "description": "metabat2 binned contig file for gold:Gp0115671", - "file_size_bytes": 223419, - "url": "https://data.microbiomedata.org/data/1781_86095/img_MAGs/metabat-bins/bins.1.fa", - "type": "nmdc:DataObject", - "data_object_type": "Metagenome Bins" - }, - { - "_id": { - "$oid": "649b003f1ae706d7b5b16298" - }, - "id": "nmdc:d27e4b42b83c999df80390a378c2c189", - "name": "gold:Gp0115671.bins.2.fa", - "description": "metabat2 binned contig file for gold:Gp0115671", - "file_size_bytes": 405648, - "url": "https://data.microbiomedata.org/data/1781_86095/img_MAGs/metabat-bins/bins.2.fa", - "type": "nmdc:DataObject", - "data_object_type": "Metagenome Bins" - }, - { - "_id": { - "$oid": "649b003f1ae706d7b5b1629b" - }, - "id": "nmdc:2de7dcd5c53b16b1f2ea8e6006384dec", - "name": "checkm_qa.out", - "description": "metabat2 bin checkm quality assessment result for gold:Gp0115671", - "file_size_bytes": 930, - "url": "https://data.microbiomedata.org/data/1781_86095/img_MAGs/checkm_qa.out", - "type": "nmdc:DataObject", - "data_object_type": "CheckM Statistics" - }, - { - "_id": { - "$oid": "649b00401ae706d7b5b16d73" - }, - "description": "EC TSV File for gold:Gp0115671", - "url": "https://data.microbiomedata.org/1781_86095/img_annotation/Ga0482256_ec.tsv", - "md5_checksum": "75e88ab163c9d092836f9110768c6a52", - "file_size_bytes": 3385, - "id": "nmdc:75e88ab163c9d092836f9110768c6a52", - "name": "gold:Gp0115671_EC TSV File", - "data_object_type": "Annotation Enzyme Commission", - "type": "nmdc:DataObject" - }, - { - "_id": { - "$oid": "649b00401ae706d7b5b16d76" - }, - "description": "KO TSV File for gold:Gp0115671", - "url": "https://data.microbiomedata.org/1781_86095/img_annotation/Ga0482256_ko.tsv", - "md5_checksum": "9c6c644e821021661d936d374ee9fc1b", - "file_size_bytes": 3385, - "id": "nmdc:9c6c644e821021661d936d374ee9fc1b", - "name": "gold:Gp0115671_KO TSV File", - "data_object_type": "Annotation KEGG Orthology", - "type": "nmdc:DataObject" - }, - { - "_id": { - "$oid": "649b00401ae706d7b5b16d78" - }, - "description": "Functional annotation GFF file for gold:Gp0115671", - "url": "https://data.microbiomedata.org/1781_86095/img_annotation/Ga0482256_functional_annotation.gff", - "md5_checksum": "8f5a7f2db6790e67282439becd4c04b2", - "file_size_bytes": 3385, - "id": "nmdc:8f5a7f2db6790e67282439becd4c04b2", - "name": "gold:Gp0115671_Functional annotation GFF file", - "data_object_type": "Functional Annotation GFF", - "type": "nmdc:DataObject" - }, - { - "_id": { - "$oid": "649b00401ae706d7b5b16d7e" - }, - "description": "Protein FAA for gold:Gp0115671", - "url": "https://data.microbiomedata.org/1781_86095/img_annotation/Ga0482256_proteins.faa", - "md5_checksum": "f5a4336c7ac10e908cfe90a61a991c65", - "file_size_bytes": 3385, - "id": "nmdc:f5a4336c7ac10e908cfe90a61a991c65", - "name": "gold:Gp0115671_Protein FAA", - "data_object_type": "Annotation Amino Acid FASTA", - "type": "nmdc:DataObject" - }, - { - "_id": { - "$oid": "649b00401ae706d7b5b16d87" - }, - "description": "Structural annotation GFF file for gold:Gp0115671", - "url": "https://data.microbiomedata.org/1781_86095/img_annotation/Ga0482256_structural_annotation.gff", - "md5_checksum": "ad6e88d469fbad7b0684afb933403a6c", - "file_size_bytes": 3385, - "id": "nmdc:ad6e88d469fbad7b0684afb933403a6c", - "name": "gold:Gp0115671_Structural annotation GFF file", - "data_object_type": "Structural Annotation GFF", - "type": "nmdc:DataObject" - } - ], - "dissolving_activity_set": [], - "functional_annotation_set": [], - "genome_feature_set": [], - "mags_activity_set": [ - { - "_id": { - "$oid": "649b0052ec087f6bbab34733" - }, - "has_input": [ - "nmdc:0a1ebd847e3bb8f928ef491497f8355b", - "nmdc:69371e513bebd1069a0ed26cc2c914cb", - "nmdc:10d19849864ecdb722335200d0607bbe" - ], - "too_short_contig_num": 114372, - "part_of": [ - "nmdc:mga0rw1351" - ], - "binned_contig_num": 328, - "git_url": "https://github.com/microbiomedata/metaG/releases/tag/0.1", - "has_output": [ - "nmdc:d41d8cd98f00b204e9800998ecf8427e", - "nmdc:57fd559aaca7b976f3b38bb1a3ce362b", - "nmdc:43a900225e93216944b4eec3a01f7db7", - "nmdc:cad0e18a4d2c4067a2724f41e449cb86", - "nmdc:55577aa26faf185b3b3f4c78711e7715", - "nmdc:c484ee1e530a0c9b47069c0288110e47" - ], - "was_informed_by": "gold:Gp0115671", - "input_contig_num": 120326, - "id": "nmdc:c61259a6fe99bc2482a8619c099e6cc2", - "execution_resource": "NERSC-Cori", - "name": "MAGs Analysis Activity for nmdc:mga0rw1351", - "mags_list": [ - { - "number_of_contig": 173, - "completeness": 26.29, - "bin_name": "bins.1", - "gene_count": 875, - "bin_quality": "LQ", - "gtdbtk_species": "", - "gtdbtk_order": "", - "num_16s": 0, - "gtdbtk_family": "", - "gtdbtk_domain": "", - "contamination": 0.18, - "gtdbtk_class": "", - "gtdbtk_phylum": "", - "num_5s": 1, - "num_23s": 1, - "gtdbtk_genus": "", - "num_t_rna": 14 - }, - { - "number_of_contig": 155, - "completeness": 24.1, - "bin_name": "bins.2", - "gene_count": 806, - "bin_quality": "LQ", - "gtdbtk_species": "", - "gtdbtk_order": "", - "num_16s": 0, - "gtdbtk_family": "", - "gtdbtk_domain": "", - "contamination": 0.0, - "gtdbtk_class": "", - "gtdbtk_phylum": "", - "num_5s": 0, - "num_23s": 0, - "gtdbtk_genus": "", - "num_t_rna": 9 - } - ], - "unbinned_contig_num": 5626, - "started_at_time": "2021-10-11T02:27:50Z", - "type": "nmdc:MAGsAnalysisActivity", - "ended_at_time": "2021-10-11T03:39:05+00:00" - } - ], - "material_sample_set": [], - "material_sampling_activity_set": [], - "metabolomics_analysis_activity_set": [], - "metagenome_annotation_activity_set": [ - { - "_id": { - "$oid": "649b005bbf2caae0415ef9d3" - }, - "has_input": [ - "nmdc:0a1ebd847e3bb8f928ef491497f8355b" - ], - "part_of": [ - "nmdc:mga0rw1351" - ], - "git_url": "https://github.com/microbiomedata/metaG/releases/tag/0.1", - "has_output": [ - "nmdc:147b97234576ba123a9f3c63eb249ecf", - "nmdc:3e037f5f744c9f8e4aa355222cc620ae", - "nmdc:10d19849864ecdb722335200d0607bbe", - "nmdc:0ce9fa5958b6445f7be463538e89e9b1", - "nmdc:a3bc059d9350034f835be4e754486c73", - "nmdc:da9866461051130a44f0982b1a65c061", - "nmdc:676fff23fb641ee8af8a2b948fc5b46e", - "nmdc:a4aa56158a292b63078eb029ed1d90a9", - "nmdc:6a28f85e8b5addccb429cc7f8964e496", - "nmdc:d5b21cce7406ab46611c49dc1ab658ed", - "nmdc:8ead1ab881fd48527d853b0d0601b4bc", - "nmdc:ad206c1031a6f0a7805034dee03ff889" - ], - "was_informed_by": "gold:Gp0115671", - "id": "nmdc:c61259a6fe99bc2482a8619c099e6cc2", - "execution_resource": "NERSC-Cori", - "name": "Annotation Activity for nmdc:mga0rw1351", - "started_at_time": "2021-10-11T02:27:50Z", - "type": "nmdc:MetagenomeAnnotationActivity", - "ended_at_time": "2021-10-11T03:39:05+00:00" - } - ], - "metagenome_assembly_set": [ - { - "_id": { - "$oid": "649b005f2ca5ee4adb139fc1" - }, - "has_input": [ - "nmdc:445f37bc3019e9fe3b29a2ac5bcbfc9c" - ], - "part_of": [ - "nmdc:mga0rw1351" - ], - "ctg_logsum": 111611, - "scaf_logsum": 112140, - "gap_pct": 0.00155, - "git_url": "https://github.com/microbiomedata/metaG/releases/tag/0.1", - "has_output": [ - "nmdc:0a1ebd847e3bb8f928ef491497f8355b", - "nmdc:be4cab04a701bce0ed99605109bd5d6f", - "nmdc:cc4d3160618a82f81518bdc97ce1f5e2", - "nmdc:473ca208ab97399a644c8e5326e765e5", - "nmdc:69371e513bebd1069a0ed26cc2c914cb" - ], - "asm_score": 3.588, - "was_informed_by": "gold:Gp0115671", - "ctg_powsum": 12152, - "scaf_max": 16504, - "id": "nmdc:c61259a6fe99bc2482a8619c099e6cc2", - "scaf_powsum": 12215, - "execution_resource": "NERSC-Cori", - "contigs": 120326, - "name": "Assembly Activity for nmdc:mga0rw1351", - "ctg_max": 16504, - "gc_std": 0.11331, - "contig_bp": 54171370, - "gc_avg": 0.54451, - "started_at_time": "2021-10-11T02:27:50Z", - "scaf_bp": 54172210, - "type": "nmdc:MetagenomeAssembly", - "scaffolds": 120242, - "ended_at_time": "2021-10-11T03:39:05+00:00", - "ctg_l50": 421, - "ctg_l90": 285, - "ctg_n50": 34725, - "ctg_n90": 101428, - "scaf_l50": 421, - "scaf_l90": 285, - "scaf_n50": 34687, - "scaf_n90": 101345 - } - ], - "metagenome_sequencing_activity_set": [], - "metaproteomics_analysis_activity_set": [], - "metatranscriptome_activity_set": [], - "nom_analysis_activity_set": [], - "omics_processing_set": [ - { - "_id": { - "$oid": "649b009773e8249959349b4d" - }, - "id": "nmdc:omprc-11-qtje8r57", - "name": "Sand microcosm microbial communities from a hyporheic zone in Columbia River, Washington, USA - GW-RW T4_12-Aug-14", - "description": "Sterilized sand packs were incubated back in the ground and collected at time point T4.", - "has_input": [ - "nmdc:bsm-11-wzdqhh45" - ], - "has_output": [ - "jgi:55d817fa0d8785342fcf8272" - ], - "part_of": [ - "nmdc:sty-11-aygzgv51" - ], - "add_date": "2015-05-28", - "mod_date": "2021-06-15", - "ncbi_project_name": "Sand microcosm microbial communities from a hyporheic zone in Columbia River, Washington, USA - GW-RW T4_12-Aug-14", - "omics_type": { - "has_raw_value": "Metagenome" - }, - "principal_investigator": { - "has_raw_value": "James Stegen" - }, - "processing_institution": "JGI", - "type": "nmdc:OmicsProcessing", - "gold_sequencing_project_identifiers": [ - "gold:Gp0115671" - ] - } - ], - "reaction_activity_set": [], - "read_qc_analysis_activity_set": [ - { - "_id": { - "$oid": "649b009d6bdd4fd20273c891" - }, - "has_input": [ - "nmdc:57d2e9b1a32e13f859c8b6e450ac3402" - ], - "part_of": [ - "nmdc:mga0rw1351" - ], - "git_url": "https://github.com/microbiomedata/metaG/releases/tag/0.1", - "has_output": [ - "nmdc:445f37bc3019e9fe3b29a2ac5bcbfc9c", - "nmdc:24440b4c5534da30eee650b68eccda84" - ], - "was_informed_by": "gold:Gp0115671", - "input_read_count": 22298982, - "output_read_bases": 3062549086, - "id": "nmdc:c61259a6fe99bc2482a8619c099e6cc2", - "execution_resource": "NERSC-Cori", - "input_read_bases": 3367146282, - "name": "Read QC Activity for nmdc:mga0rw1351", - "output_read_count": 20445042, - "started_at_time": "2021-10-11T02:27:50Z", - "type": "nmdc:ReadQCAnalysisActivity", - "ended_at_time": "2021-10-11T03:39:05+00:00" - } - ], - "read_based_taxonomy_analysis_activity_set": [ - { - "_id": { - "$oid": "649b009bff710ae353f8cf56" - }, - "has_input": [ - "nmdc:445f37bc3019e9fe3b29a2ac5bcbfc9c" - ], - "git_url": "https://github.com/microbiomedata/metaG/releases/tag/0.1", - "has_output": [ - "nmdc:358559c32b69eff51758db66ac01021b", - "nmdc:befbd648249c2871bd27999120e50bf7", - "nmdc:cacb8f623a808d0cae094d46f2801dd3", - "nmdc:1b15ffb745e320a9bf0cac7e672e974b", - "nmdc:90b77c7118bf6ec1f99836a50d562a7f", - "nmdc:e0736ff520260ba2097c02b9e767362c", - "nmdc:a00960655f9e80726fdb0fade1bec958", - "nmdc:366bf195f71d2c35a9b47c0f29381e85", - "nmdc:e111cd4927f6736e5de6f6e81e7e6d72" - ], - "was_informed_by": "gold:Gp0115671", - "id": "nmdc:c61259a6fe99bc2482a8619c099e6cc2", - "execution_resource": "NERSC-Cori", - "name": "ReadBased Analysis Activity for nmdc:mga0rw1351", - "started_at_time": "2021-10-11T02:27:50Z", - "type": "nmdc:ReadbasedAnalysis", - "ended_at_time": "2021-10-11T03:39:05+00:00" - } - ], - "study_set": [], - "field_research_site_set": [], - "collecting_biosamples_from_site_set": [], - "date_created": null, - "etl_software_version": null, - "pooling_set": [] - }, - { - "functional_annotation_agg": [], - "library_preparation_set": [], - "processed_sample_set": [], - "extraction_set": [], - "activity_set": [], - "biosample_set": [], - "data_object_set": [ - { - "description": "Raw sequencer read data", - "file_size_bytes": 3492714581, - "type": "nmdc:DataObject", - "id": "jgi:55d740220d8785342fcf7e35", - "name": "9422.8.132674.GTGAAA.fastq.gz" - }, - { - "name": "Gp0115676_Filtered Reads", - "description": "Filtered Reads for Gp0115676", - "data_object_type": "Filtered Sequencing Reads", - "url": "https://data.microbiomedata.org/data/nmdc:mga0w3a067/qa/nmdc_mga0w3a067_filtered.fastq.gz", - "md5_checksum": "e777bc518da4bbe0ab7b2959f00e2b08", - "id": "nmdc:e777bc518da4bbe0ab7b2959f00e2b08", - "file_size_bytes": 3113249122 - }, - { - "name": "Gp0115676_Filtered Stats", - "description": "Filtered Stats for Gp0115676", - "data_object_type": "QC Statistics", - "url": "https://data.microbiomedata.org/data/nmdc:mga0w3a067/qa/nmdc_mga0w3a067_filterStats.txt", - "md5_checksum": "79815495339053b7935b55dbde02b2ff", - "id": "nmdc:79815495339053b7935b55dbde02b2ff", - "file_size_bytes": 292 - }, - { - "name": "Gp0115676_Gottcha2 TSV report", - "description": "Gottcha2 TSV report for Gp0115676", - "url": "https://data.microbiomedata.org/data/nmdc:mga0w3a067/ReadbasedAnalysis/nmdc_mga0w3a067_gottcha2_report.tsv", - "md5_checksum": "13343b2533892633bcc3655a1ebe788f", - "id": "nmdc:13343b2533892633bcc3655a1ebe788f", - "file_size_bytes": 13659 - }, - { - "name": "Gp0115676_Gottcha2 full TSV report", - "description": "Gottcha2 full TSV report for Gp0115676", - "url": "https://data.microbiomedata.org/data/nmdc:mga0w3a067/ReadbasedAnalysis/nmdc_mga0w3a067_gottcha2_report_full.tsv", - "md5_checksum": "87b36326bee32ad5642e3ffc2f5ac7db", - "id": "nmdc:87b36326bee32ad5642e3ffc2f5ac7db", - "file_size_bytes": 1168924 - }, - { - "name": "Gp0115676_Gottcha2 Krona HTML report", - "description": "Gottcha2 Krona HTML report for Gp0115676", - "data_object_type": "GOTTCHA2 Krona Plot", - "url": "https://data.microbiomedata.org/data/nmdc:mga0w3a067/ReadbasedAnalysis/nmdc_mga0w3a067_gottcha2_krona.html", - "md5_checksum": "95a2de8be672fd50bf542215194dc4d4", - "id": "nmdc:95a2de8be672fd50bf542215194dc4d4", - "file_size_bytes": 267660 - }, - { - "name": "Gp0115676_Centrifuge classification TSV report", - "description": "Centrifuge classification TSV report for Gp0115676", - "data_object_type": "Centrifuge Taxonomic Classification", - "url": "https://data.microbiomedata.org/data/nmdc:mga0w3a067/ReadbasedAnalysis/nmdc_mga0w3a067_centrifuge_classification.tsv", - "md5_checksum": "6cd0210b345d6908ad8ab683b1a11572", - "id": "nmdc:6cd0210b345d6908ad8ab683b1a11572", - "file_size_bytes": 2721808152 - }, - { - "name": "Gp0115676_Centrifuge TSV report", - "description": "Centrifuge TSV report for Gp0115676", - "data_object_type": "Centrifuge Classification Report", - "url": "https://data.microbiomedata.org/data/nmdc:mga0w3a067/ReadbasedAnalysis/nmdc_mga0w3a067_centrifuge_report.tsv", - "md5_checksum": "5049a65d2a42d73c5d47373e990b70f7", - "id": "nmdc:5049a65d2a42d73c5d47373e990b70f7", - "file_size_bytes": 263207 - }, - { - "name": "Gp0115676_Centrifuge Krona HTML report", - "description": "Centrifuge Krona HTML report for Gp0115676", - "data_object_type": "Centrifuge Krona Plot", - "url": "https://data.microbiomedata.org/data/nmdc:mga0w3a067/ReadbasedAnalysis/nmdc_mga0w3a067_centrifuge_krona.html", - "md5_checksum": "6e1e28773094884d35c04072309e285a", - "id": "nmdc:6e1e28773094884d35c04072309e285a", - "file_size_bytes": 2347912 - }, - { - "name": "Gp0115676_Kraken classification TSV report", - "description": "Kraken classification TSV report for Gp0115676", - "data_object_type": "Kraken2 Taxonomic Classification", - "url": "https://data.microbiomedata.org/data/nmdc:mga0w3a067/ReadbasedAnalysis/nmdc_mga0w3a067_kraken2_classification.tsv", - "md5_checksum": "7fa3aba8b1e31ccc00cf56f04f5605ac", - "id": "nmdc:7fa3aba8b1e31ccc00cf56f04f5605ac", - "file_size_bytes": 2224468607 - }, - { - "name": "Gp0115676_Kraken2 TSV report", - "description": "Kraken2 TSV report for Gp0115676", - "data_object_type": "Kraken2 Classification Report", - "url": "https://data.microbiomedata.org/data/nmdc:mga0w3a067/ReadbasedAnalysis/nmdc_mga0w3a067_kraken2_report.tsv", - "md5_checksum": "3b3abe337d79d09e9c7ba0a40045ad93", - "id": "nmdc:3b3abe337d79d09e9c7ba0a40045ad93", - "file_size_bytes": 701128 - }, - { - "name": "Gp0115676_Kraken2 Krona HTML report", - "description": "Kraken2 Krona HTML report for Gp0115676", - "data_object_type": "Kraken2 Krona Plot", - "url": "https://data.microbiomedata.org/data/nmdc:mga0w3a067/ReadbasedAnalysis/nmdc_mga0w3a067_kraken2_krona.html", - "md5_checksum": "e8602b20781cdbbd84e6dcb92c048a6b", - "id": "nmdc:e8602b20781cdbbd84e6dcb92c048a6b", - "file_size_bytes": 4217185 - }, - { - "name": "Gp0115676_Assembled contigs fasta", - "description": "Assembled contigs fasta for Gp0115676", - "data_object_type": "Assembly Contigs", - "url": "https://data.microbiomedata.org/data/nmdc:mga0w3a067/assembly/nmdc_mga0w3a067_contigs.fna", - "md5_checksum": "19987e32391f846db382edabf14ba43e", - "id": "nmdc:19987e32391f846db382edabf14ba43e", - "file_size_bytes": 105010680 - }, - { - "name": "Gp0115676_Assembled scaffold fasta", - "description": "Assembled scaffold fasta for Gp0115676", - "data_object_type": "Assembly Scaffolds", - "url": "https://data.microbiomedata.org/data/nmdc:mga0w3a067/assembly/nmdc_mga0w3a067_scaffolds.fna", - "md5_checksum": "1a4c5ace6c1b54e057d282031e8bc2c6", - "id": "nmdc:1a4c5ace6c1b54e057d282031e8bc2c6", - "file_size_bytes": 104445982 - }, - { - "name": "Gp0115676_Metagenome Contig Coverage Stats", - "description": "Metagenome Contig Coverage Stats for Gp0115676", - "url": "https://data.microbiomedata.org/data/nmdc:mga0w3a067/assembly/nmdc_mga0w3a067_covstats.txt", - "md5_checksum": "af7a38646011c9e6d0ad2b1ebd7f47c9", - "id": "nmdc:af7a38646011c9e6d0ad2b1ebd7f47c9", - "file_size_bytes": 14811778 - }, - { - "name": "Gp0115676_Assembled AGP file", - "description": "Assembled AGP file for Gp0115676", - "data_object_type": "Assembly AGP", - "url": "https://data.microbiomedata.org/data/nmdc:mga0w3a067/assembly/nmdc_mga0w3a067_assembly.agp", - "md5_checksum": "1b665fb0fbbf40a13122100c927b398b", - "id": "nmdc:1b665fb0fbbf40a13122100c927b398b", - "file_size_bytes": 13854137 - }, - { - "name": "Gp0115676_Metagenome Alignment BAM file", - "description": "Metagenome Alignment BAM file for Gp0115676", - "data_object_type": "Assembly Coverage BAM", - "url": "https://data.microbiomedata.org/data/nmdc:mga0w3a067/assembly/nmdc_mga0w3a067_pairedMapped_sorted.bam", - "md5_checksum": "7c1232ff8d861d2e2c111a1dc4a70480", - "id": "nmdc:7c1232ff8d861d2e2c111a1dc4a70480", - "file_size_bytes": 3366223347 - }, - { - "name": "Gp0115676_Protein FAA", - "description": "Protein FAA for Gp0115676", - "data_object_type": "Annotation Amino Acid FASTA", - "url": "https://data.microbiomedata.org/data/nmdc:mga0w3a067/annotation/nmdc_mga0w3a067_proteins.faa", - "md5_checksum": "35adf26b13c97c40147af2f067e0c9be", - "id": "nmdc:35adf26b13c97c40147af2f067e0c9be", - "file_size_bytes": 59120149 - }, - { - "name": "Gp0115676_Structural annotation GFF file", - "description": "Structural annotation GFF file for Gp0115676", - "data_object_type": "Structural Annotation GFF", - "url": "https://data.microbiomedata.org/data/nmdc:mga0w3a067/annotation/nmdc_mga0w3a067_structural_annotation.gff", - "md5_checksum": "3de29d8dede94769e7753f0aaee86691", - "id": "nmdc:3de29d8dede94769e7753f0aaee86691", - "file_size_bytes": 2524 - }, - { - "name": "Gp0115676_Functional annotation GFF file", - "description": "Functional annotation GFF file for Gp0115676", - "data_object_type": "Functional Annotation GFF", - "url": "https://data.microbiomedata.org/data/nmdc:mga0w3a067/annotation/nmdc_mga0w3a067_functional_annotation.gff", - "md5_checksum": "6fa3d1e5fae636b4199ff57b4776a51c", - "id": "nmdc:6fa3d1e5fae636b4199ff57b4776a51c", - "file_size_bytes": 65284624 - }, - { - "name": "Gp0115676_KO TSV file", - "description": "KO TSV file for Gp0115676", - "data_object_type": "Annotation KEGG Orthology", - "url": "https://data.microbiomedata.org/data/nmdc:mga0w3a067/annotation/nmdc_mga0w3a067_ko.tsv", - "md5_checksum": "b865dcd9976c90dbc8459ec7ccc72d45", - "id": "nmdc:b865dcd9976c90dbc8459ec7ccc72d45", - "file_size_bytes": 9219020 - }, - { - "name": "Gp0115676_EC TSV file", - "description": "EC TSV file for Gp0115676", - "data_object_type": "Annotation Enzyme Commission", - "url": "https://data.microbiomedata.org/data/nmdc:mga0w3a067/annotation/nmdc_mga0w3a067_ec.tsv", - "md5_checksum": "98b9ea6588dc9ff918298c4a7c567edf", - "id": "nmdc:98b9ea6588dc9ff918298c4a7c567edf", - "file_size_bytes": 5972063 - }, - { - "name": "Gp0115676_COG GFF file", - "description": "COG GFF file for Gp0115676", - "url": "https://data.microbiomedata.org/data/nmdc:mga0w3a067/annotation/nmdc_mga0w3a067_cog.gff", - "md5_checksum": "d8fbe8d24c00eee2ef163e3bb428b718", - "id": "nmdc:d8fbe8d24c00eee2ef163e3bb428b718", - "file_size_bytes": 39290017 - }, - { - "name": "Gp0115676_PFAM GFF file", - "description": "PFAM GFF file for Gp0115676", - "url": "https://data.microbiomedata.org/data/nmdc:mga0w3a067/annotation/nmdc_mga0w3a067_pfam.gff", - "md5_checksum": "ed68f1e7fd4873f1ea756d0c58a9c550", - "id": "nmdc:ed68f1e7fd4873f1ea756d0c58a9c550", - "file_size_bytes": 31343624 - }, - { - "name": "Gp0115676_TigrFam GFF file", - "description": "TigrFam GFF file for Gp0115676", - "url": "https://data.microbiomedata.org/data/nmdc:mga0w3a067/annotation/nmdc_mga0w3a067_tigrfam.gff", - "md5_checksum": "4d0469ae5b27dd4045d637d2493ccba9", - "id": "nmdc:4d0469ae5b27dd4045d637d2493ccba9", - "file_size_bytes": 4260344 - }, - { - "name": "Gp0115676_SMART GFF file", - "description": "SMART GFF file for Gp0115676", - "url": "https://data.microbiomedata.org/data/nmdc:mga0w3a067/annotation/nmdc_mga0w3a067_smart.gff", - "md5_checksum": "a893783f6886e31b6bca5b6baede9f66", - "id": "nmdc:a893783f6886e31b6bca5b6baede9f66", - "file_size_bytes": 8240017 - }, - { - "name": "Gp0115676_SuperFam GFF file", - "description": "SuperFam GFF file for Gp0115676", - "url": "https://data.microbiomedata.org/data/nmdc:mga0w3a067/annotation/nmdc_mga0w3a067_supfam.gff", - "md5_checksum": "2225c723ccf0fd5ea309cfb5ca90d536", - "id": "nmdc:2225c723ccf0fd5ea309cfb5ca90d536", - "file_size_bytes": 48186264 - }, - { - "name": "Gp0115676_Cath FunFam GFF file", - "description": "Cath FunFam GFF file for Gp0115676", - "url": "https://data.microbiomedata.org/data/nmdc:mga0w3a067/annotation/nmdc_mga0w3a067_cath_funfam.gff", - "md5_checksum": "1abd69f8096f98174d95d9a3a13c2a3b", - "id": "nmdc:1abd69f8096f98174d95d9a3a13c2a3b", - "file_size_bytes": 38259823 - }, - { - "name": "Gp0115676_KO_EC GFF file", - "description": "KO_EC GFF file for Gp0115676", - "url": "https://data.microbiomedata.org/data/nmdc:mga0w3a067/annotation/nmdc_mga0w3a067_ko_ec.gff", - "md5_checksum": "83647c3e1ed96fda36f7c119a3e98182", - "id": "nmdc:83647c3e1ed96fda36f7c119a3e98182", - "file_size_bytes": 29337291 - }, - { - "name": "gold:Gp0452679_metabat2 bin checkm quality assessment result", - "description": "metabat2 bin checkm quality assessment result for gold:Gp0452679", - "data_object_type": "CheckM Statistics", - "url": "https://data.microbiomedata.org/data/nmdc:mga0fsjy84/MAGs/nmdc_mga0fsjy84_checkm_qa.out", - "md5_checksum": "d41d8cd98f00b204e9800998ecf8427e", - "id": "nmdc:d41d8cd98f00b204e9800998ecf8427e", - "file_size_bytes": 0 - }, - { - "name": "Gp0115676_tooShort (< 3kb) filtered contigs fasta file by metaBat2", - "description": "tooShort (< 3kb) filtered contigs fasta file by metaBat2 for Gp0115676", - "url": "https://data.microbiomedata.org/data/nmdc:mga0w3a067/MAGs/nmdc_mga0w3a067_bins.tooShort.fa", - "md5_checksum": "71667f3b8ee0cb5acadc541fa6914022", - "id": "nmdc:71667f3b8ee0cb5acadc541fa6914022", - "file_size_bytes": 75793492 - }, - { - "name": "Gp0115676_unbinned fasta file from metabat2", - "description": "unbinned fasta file from metabat2 for Gp0115676", - "url": "https://data.microbiomedata.org/data/nmdc:mga0w3a067/MAGs/nmdc_mga0w3a067_bins.unbinned.fa", - "md5_checksum": "0141a64077e0f18adc42cb1915a00fa2", - "id": "nmdc:0141a64077e0f18adc42cb1915a00fa2", - "file_size_bytes": 17366889 - }, - { - "name": "Gp0115676_metabat2 bin checkm quality assessment result", - "description": "metabat2 bin checkm quality assessment result for Gp0115676", - "data_object_type": "CheckM Statistics", - "url": "https://data.microbiomedata.org/data/nmdc:mga0w3a067/MAGs/nmdc_mga0w3a067_checkm_qa.out", - "md5_checksum": "982b47616dde63a388400fcc57d7c5b0", - "id": "nmdc:982b47616dde63a388400fcc57d7c5b0", - "file_size_bytes": 1700 - }, - { - "name": "Gp0115676_high-quality and medium-quality bins", - "description": "high-quality and medium-quality bins for Gp0115676", - "data_object_type": "Metagenome Bins", - "url": "https://data.microbiomedata.org/data/nmdc:mga0w3a067/MAGs/nmdc_mga0w3a067_hqmq_bin.zip", - "md5_checksum": "313eb61bc7577e272eca6332e923f9c4", - "id": "nmdc:313eb61bc7577e272eca6332e923f9c4", - "file_size_bytes": 677741 - }, - { - "name": "Gp0115676_metabat2 bins", - "description": "metabat2 bins for Gp0115676", - "url": "https://data.microbiomedata.org/data/nmdc:mga0w3a067/MAGs/nmdc_mga0w3a067_metabat_bin.zip", - "md5_checksum": "763eb40a8905e9b0d459c45222f1b05e", - "id": "nmdc:763eb40a8905e9b0d459c45222f1b05e", - "file_size_bytes": 2885722 - }, - { - "_id": { - "$oid": "649b003c1ae706d7b5b14d9e" - }, - "description": "Metagenome Contig Coverage Stats for gold:Gp0115676", - "url": "https://data.microbiomedata.org/data/1781_86099/assembly/mapping_stats.txt", - "file_size_bytes": 13876163, - "type": "nmdc:DataObject", - "id": "nmdc:96941ca922d1e71c5651c276dae2951e", - "name": "mapping_stats.txt", - "data_object_type": "Assembly Coverage Stats" - }, - { - "_id": { - "$oid": "649b003d1ae706d7b5b14da0" - }, - "description": "Assembled scaffold fasta for gold:Gp0115676", - "url": "https://data.microbiomedata.org/data/1781_86099/assembly/assembly_scaffolds.fna", - "file_size_bytes": 103511507, - "type": "nmdc:DataObject", - "id": "nmdc:f6a39ee8aee7ca6e8d4b3a351af5097e", - "name": "assembly_scaffolds.fna", - "data_object_type": "Assembly Scaffolds" - }, - { - "_id": { - "$oid": "649b003d1ae706d7b5b14da1" - }, - "description": "Assembled AGP file for gold:Gp0115676", - "url": "https://data.microbiomedata.org/data/1781_86099/assembly/assembly.agp", - "file_size_bytes": 11980587, - "type": "nmdc:DataObject", - "id": "nmdc:665e21a0e5c6dc4de9165db7fc04944b", - "name": "assembly.agp", - "data_object_type": "Assembly AGP" - }, - { - "_id": { - "$oid": "649b003d1ae706d7b5b14da2" - }, - "description": "Metagenome Alignment BAM file for gold:Gp0115676", - "url": "https://data.microbiomedata.org/data/1781_86099/assembly/pairedMapped_sorted.bam", - "file_size_bytes": 3317390706, - "type": "nmdc:DataObject", - "id": "nmdc:7b206a8925a1ea97bf5cfbbafd4c1331", - "name": "pairedMapped_sorted.bam", - "data_object_type": "Assembly Coverage BAM" - }, - { - "_id": { - "$oid": "649b003d1ae706d7b5b14da4" - }, - "description": "Assembled contigs fasta for gold:Gp0115676", - "url": "https://data.microbiomedata.org/data/1781_86099/assembly/assembly_contigs.fna", - "file_size_bytes": 104075065, - "type": "nmdc:DataObject", - "id": "nmdc:6525bd7de120f6ed4dd75069d597f261", - "name": "assembly_contigs.fna", - "data_object_type": "Assembly Contigs" - }, - { - "_id": { - "$oid": "649b003d1ae706d7b5b159f1" - }, - "id": "nmdc:7bfbfbfea6176042739cd5079cda14bd", - "name": "1781_86099.krona.html", - "description": "Gold:Gp0115676 KRONA plot HTML file", - "url": "https://data.microbiomedata.org/1781_86099/ReadbasedAnalysis/centrifuge/1781_86099.krona.html", - "file_size_bytes": 3385, - "data_object_type": "Centrifuge Krona Plot", - "type": "nmdc:DataObject" - }, - { - "_id": { - "$oid": "649b003d1ae706d7b5b159f5" - }, - "id": "nmdc:e98001eea268f1373182f7b83d43ab1f", - "name": "1781_86099.json", - "description": "Gold:Gp0115676 ReadbasedAnalysis result JSON file", - "url": "https://data.microbiomedata.org/1781_86099/ReadbasedAnalysis/1781_86099.json", - "file_size_bytes": 3385, - "type": "nmdc:DataObject" - }, - { - "_id": { - "$oid": "649b003f1ae706d7b5b162c5" - }, - "id": "nmdc:b80b655f568ca46cf4789674ac6a83cc", - "name": "checkm_qa.out", - "description": "metabat2 bin checkm quality assessment result for gold:Gp0115676", - "file_size_bytes": 1881, - "url": "https://data.microbiomedata.org/data/1781_86099/img_MAGs/checkm_qa.out", - "type": "nmdc:DataObject", - "data_object_type": "CheckM Statistics" - }, - { - "_id": { - "$oid": "649b003f1ae706d7b5b162c6" - }, - "id": "nmdc:4c9a69a0c2311a6cdaf5a476cc8c9d42", - "name": "gtdbtk.bac120.summary.tsv", - "description": "gtdbtk bacterial assignment result summary table for gold:Gp0115676", - "file_size_bytes": 815, - "url": "https://data.microbiomedata.org/data/1781_86099/img_MAGs/gtdbtk_output/classify/gtdbtk.bac120.summary.tsv", - "type": "nmdc:DataObject" - }, - { - "_id": { - "$oid": "649b003f1ae706d7b5b162c7" - }, - "id": "nmdc:956841108fcf5c6634a57da8f2b7fac7", - "name": "bins.unbinned.fa", - "description": "unbinned fasta file from metabat2 for gold:Gp0115676", - "file_size_bytes": 19565106, - "url": "https://data.microbiomedata.org/data/1781_86099/img_MAGs/bins.unbinned.fa", - "type": "nmdc:DataObject" - }, - { - "_id": { - "$oid": "649b003f1ae706d7b5b162c8" - }, - "id": "nmdc:e91e2f12beabcf429ee849748b76801f", - "name": "gold:Gp0115676.bins.3.fa", - "description": "metabat2 binned contig file for gold:Gp0115676", - "file_size_bytes": 2056638, - "url": "https://data.microbiomedata.org/data/1781_86099/img_MAGs/metabat-bins/bins.3.fa", - "type": "nmdc:DataObject", - "data_object_type": "Metagenome Bins" - }, - { - "_id": { - "$oid": "649b003f1ae706d7b5b162c9" - }, - "id": "nmdc:31bc3893618af40d8f63e24e2dad6772", - "name": "bins.tooShort.fa", - "description": "tooShort (< 3kb) filtered contigs fasta file by metaBat2 for gold:Gp0115676", - "file_size_bytes": 73605331, - "url": "https://data.microbiomedata.org/data/1781_86099/img_MAGs/bins.tooShort.fa", - "type": "nmdc:DataObject" - }, - { - "_id": { - "$oid": "649b003f1ae706d7b5b162ca" - }, - "id": "nmdc:ccc55d14e487d71a93085ff56130b44a", - "name": "gold:Gp0115676.bins.5.fa", - "description": "metabat2 binned contig file for gold:Gp0115676", - "file_size_bytes": 3283805, - "url": "https://data.microbiomedata.org/data/1781_86099/img_MAGs/metabat-bins/bins.5.fa", - "type": "nmdc:DataObject", - "data_object_type": "Metagenome Bins" - }, - { - "_id": { - "$oid": "649b003f1ae706d7b5b162cb" - }, - "id": "nmdc:3b5521b99ccb093a2984e693c20cef84", - "name": "gold:Gp0115676.bins.2.fa", - "description": "metabat2 binned contig file for gold:Gp0115676", - "file_size_bytes": 642694, - "url": "https://data.microbiomedata.org/data/1781_86099/img_MAGs/metabat-bins/bins.2.fa", - "type": "nmdc:DataObject", - "data_object_type": "Metagenome Bins" - }, - { - "_id": { - "$oid": "649b003f1ae706d7b5b162cc" - }, - "id": "nmdc:b0793c3769f2849c8a6a1496bfc555bd", - "name": "gold:Gp0115676.bins.7.fa", - "description": "metabat2 binned contig file for gold:Gp0115676", - "file_size_bytes": 537417, - "url": "https://data.microbiomedata.org/data/1781_86099/img_MAGs/metabat-bins/bins.7.fa", - "type": "nmdc:DataObject", - "data_object_type": "Metagenome Bins" - }, - { - "_id": { - "$oid": "649b003f1ae706d7b5b162cd" - }, - "id": "nmdc:bd0dc950e63b986b1585aa25c81c0a52", - "name": "gold:Gp0115676.bins.4.fa", - "description": "metabat2 binned contig file for gold:Gp0115676", - "file_size_bytes": 2597982, - "url": "https://data.microbiomedata.org/data/1781_86099/img_MAGs/metabat-bins/bins.4.fa", - "type": "nmdc:DataObject", - "data_object_type": "Metagenome Bins" - }, - { - "_id": { - "$oid": "649b003f1ae706d7b5b162d0" - }, - "id": "nmdc:b151fd8de6fd473abe671917580a23d9", - "name": "gold:Gp0115676.bins.6.fa", - "description": "metabat2 binned contig file for gold:Gp0115676", - "file_size_bytes": 249502, - "url": "https://data.microbiomedata.org/data/1781_86099/img_MAGs/metabat-bins/bins.6.fa", - "type": "nmdc:DataObject", - "data_object_type": "Metagenome Bins" - }, - { - "_id": { - "$oid": "649b003f1ae706d7b5b162d2" - }, - "id": "nmdc:575b0300e32da8f7a051c5019b4ccfee", - "name": "gold:Gp0115676.bins.1.fa", - "description": "metabat2 binned contig file for gold:Gp0115676", - "file_size_bytes": 228256, - "url": "https://data.microbiomedata.org/data/1781_86099/img_MAGs/metabat-bins/bins.1.fa", - "type": "nmdc:DataObject", - "data_object_type": "Metagenome Bins" - }, - { - "_id": { - "$oid": "649b00401ae706d7b5b16d92" - }, - "description": "Protein FAA for gold:Gp0115676", - "url": "https://data.microbiomedata.org/1781_86099/img_annotation/Ga0482251_proteins.faa", - "md5_checksum": "5193d8fa7e151b96396afa8d61851af8", - "file_size_bytes": 3385, - "id": "nmdc:5193d8fa7e151b96396afa8d61851af8", - "name": "gold:Gp0115676_Protein FAA", - "data_object_type": "Annotation Amino Acid FASTA", - "type": "nmdc:DataObject" - }, - { - "_id": { - "$oid": "649b00401ae706d7b5b16d96" - }, - "description": "KO TSV File for gold:Gp0115676", - "url": "https://data.microbiomedata.org/1781_86099/img_annotation/Ga0482251_ko.tsv", - "md5_checksum": "23762ea8dc5ce375c3827aded41ae2c0", - "file_size_bytes": 3385, - "id": "nmdc:23762ea8dc5ce375c3827aded41ae2c0", - "name": "gold:Gp0115676_KO TSV File", - "data_object_type": "Annotation KEGG Orthology", - "type": "nmdc:DataObject" - }, - { - "_id": { - "$oid": "649b00401ae706d7b5b16d9a" - }, - "description": "EC TSV File for gold:Gp0115676", - "url": "https://data.microbiomedata.org/1781_86099/img_annotation/Ga0482251_ec.tsv", - "md5_checksum": "bc4755bf8b2c0b7c384eb4ffd8e9e017", - "file_size_bytes": 3385, - "id": "nmdc:bc4755bf8b2c0b7c384eb4ffd8e9e017", - "name": "gold:Gp0115676_EC TSV File", - "data_object_type": "Annotation Enzyme Commission", - "type": "nmdc:DataObject" - }, - { - "_id": { - "$oid": "649b00401ae706d7b5b16da7" - }, - "description": "Structural annotation GFF file for gold:Gp0115676", - "url": "https://data.microbiomedata.org/1781_86099/img_annotation/Ga0482251_structural_annotation.gff", - "md5_checksum": "e3b04bb85be48814ca078ee871a9296b", - "file_size_bytes": 3385, - "id": "nmdc:e3b04bb85be48814ca078ee871a9296b", - "name": "gold:Gp0115676_Structural annotation GFF file", - "data_object_type": "Structural Annotation GFF", - "type": "nmdc:DataObject" - }, - { - "_id": { - "$oid": "649b00401ae706d7b5b16da9" - }, - "description": "Functional annotation GFF file for gold:Gp0115676", - "url": "https://data.microbiomedata.org/1781_86099/img_annotation/Ga0482251_functional_annotation.gff", - "md5_checksum": "d429e7a9bb0344196ed7bcca6131e3c0", - "file_size_bytes": 3385, - "id": "nmdc:d429e7a9bb0344196ed7bcca6131e3c0", - "name": "gold:Gp0115676_Functional annotation GFF file", - "data_object_type": "Functional Annotation GFF", - "type": "nmdc:DataObject" - } - ], - "dissolving_activity_set": [], - "functional_annotation_set": [], - "genome_feature_set": [], - "mags_activity_set": [ - { - "_id": { - "$oid": "649b0052ec087f6bbab3472c" - }, - "has_input": [ - "nmdc:19987e32391f846db382edabf14ba43e", - "nmdc:7c1232ff8d861d2e2c111a1dc4a70480", - "nmdc:6fa3d1e5fae636b4199ff57b4776a51c" - ], - "too_short_contig_num": 175121, - "part_of": [ - "nmdc:mga0w3a067" - ], - "binned_contig_num": 1550, - "git_url": "https://github.com/microbiomedata/metaG/releases/tag/0.1", - "has_output": [ - "nmdc:d41d8cd98f00b204e9800998ecf8427e", - "nmdc:71667f3b8ee0cb5acadc541fa6914022", - "nmdc:0141a64077e0f18adc42cb1915a00fa2", - "nmdc:982b47616dde63a388400fcc57d7c5b0", - "nmdc:313eb61bc7577e272eca6332e923f9c4", - "nmdc:763eb40a8905e9b0d459c45222f1b05e" - ], - "was_informed_by": "gold:Gp0115676", - "input_contig_num": 187123, - "id": "nmdc:b2025bb4a2c7616273d414e4093f63ca", - "execution_resource": "NERSC-Cori", - "name": "MAGs Analysis Activity for nmdc:mga0w3a067", - "mags_list": [ - { - "number_of_contig": 457, - "completeness": 95.14, - "bin_name": "bins.1", - "gene_count": 6260, - "bin_quality": "LQ", - "gtdbtk_species": "", - "gtdbtk_order": "", - "num_16s": 2, - "gtdbtk_family": "", - "gtdbtk_domain": "", - "contamination": 76.52, - "gtdbtk_class": "", - "gtdbtk_phylum": "", - "num_5s": 1, - "num_23s": 1, - "gtdbtk_genus": "", - "num_t_rna": 85 - }, - { - "number_of_contig": 24, - "completeness": 4.17, - "bin_name": "bins.2", - "gene_count": 246, - "bin_quality": "LQ", - "gtdbtk_species": "", - "gtdbtk_order": "", - "num_16s": 0, - "gtdbtk_family": "", - "gtdbtk_domain": "", - "contamination": 0.0, - "gtdbtk_class": "", - "gtdbtk_phylum": "", - "num_5s": 1, - "num_23s": 1, - "gtdbtk_genus": "", - "num_t_rna": 5 - }, - { - "number_of_contig": 175, - "completeness": 36.21, - "bin_name": "bins.3", - "gene_count": 937, - "bin_quality": "LQ", - "gtdbtk_species": "", - "gtdbtk_order": "", - "num_16s": 0, - "gtdbtk_family": "", - "gtdbtk_domain": "", - "contamination": 0.0, - "gtdbtk_class": "", - "gtdbtk_phylum": "", - "num_5s": 0, - "num_23s": 0, - "gtdbtk_genus": "", - "num_t_rna": 12 - }, - { - "number_of_contig": 485, - "completeness": 43.26, - "bin_name": "bins.4", - "gene_count": 2590, - "bin_quality": "LQ", - "gtdbtk_species": "", - "gtdbtk_order": "", - "num_16s": 0, - "gtdbtk_family": "", - "gtdbtk_domain": "", - "contamination": 0.55, - "gtdbtk_class": "", - "gtdbtk_phylum": "", - "num_5s": 1, - "num_23s": 1, - "gtdbtk_genus": "", - "num_t_rna": 29 - }, - { - "number_of_contig": 339, - "completeness": 79.0, - "bin_name": "bins.5", - "gene_count": 2464, - "bin_quality": "MQ", - "gtdbtk_species": "", - "gtdbtk_order": "UBA11222", - "num_16s": 0, - "gtdbtk_family": "UBA11222", - "gtdbtk_domain": "Bacteria", - "contamination": 3.71, - "gtdbtk_class": "Alphaproteobacteria", - "gtdbtk_phylum": "Proteobacteria", - "num_5s": 0, - "num_23s": 0, - "gtdbtk_genus": "UBA11222", - "num_t_rna": 32 - }, - { - "number_of_contig": 70, - "completeness": 0.0, - "bin_name": "bins.6", - "gene_count": 298, - "bin_quality": "LQ", - "gtdbtk_species": "", - "gtdbtk_order": "", - "num_16s": 0, - "gtdbtk_family": "", - "gtdbtk_domain": "", - "contamination": 0.0, - "gtdbtk_class": "", - "gtdbtk_phylum": "", - "num_5s": 0, - "num_23s": 0, - "gtdbtk_genus": "", - "num_t_rna": 1 - } - ], - "unbinned_contig_num": 10452, - "started_at_time": "2021-10-11T02:26:37Z", - "type": "nmdc:MAGsAnalysisActivity", - "ended_at_time": "2021-10-11T05:40:05+00:00" - } - ], - "material_sample_set": [], - "material_sampling_activity_set": [], - "metabolomics_analysis_activity_set": [], - "metagenome_annotation_activity_set": [ - { - "_id": { - "$oid": "649b005bbf2caae0415ef9cb" - }, - "has_input": [ - "nmdc:19987e32391f846db382edabf14ba43e" - ], - "part_of": [ - "nmdc:mga0w3a067" - ], - "git_url": "https://github.com/microbiomedata/metaG/releases/tag/0.1", - "has_output": [ - "nmdc:35adf26b13c97c40147af2f067e0c9be", - "nmdc:3de29d8dede94769e7753f0aaee86691", - "nmdc:6fa3d1e5fae636b4199ff57b4776a51c", - "nmdc:b865dcd9976c90dbc8459ec7ccc72d45", - "nmdc:98b9ea6588dc9ff918298c4a7c567edf", - "nmdc:d8fbe8d24c00eee2ef163e3bb428b718", - "nmdc:ed68f1e7fd4873f1ea756d0c58a9c550", - "nmdc:4d0469ae5b27dd4045d637d2493ccba9", - "nmdc:a893783f6886e31b6bca5b6baede9f66", - "nmdc:2225c723ccf0fd5ea309cfb5ca90d536", - "nmdc:1abd69f8096f98174d95d9a3a13c2a3b", - "nmdc:83647c3e1ed96fda36f7c119a3e98182" - ], - "was_informed_by": "gold:Gp0115676", - "id": "nmdc:b2025bb4a2c7616273d414e4093f63ca", - "execution_resource": "NERSC-Cori", - "name": "Annotation Activity for nmdc:mga0w3a067", - "started_at_time": "2021-10-11T02:26:37Z", - "type": "nmdc:MetagenomeAnnotationActivity", - "ended_at_time": "2021-10-11T05:40:05+00:00" - } - ], - "metagenome_assembly_set": [ - { - "_id": { - "$oid": "649b005f2ca5ee4adb139fbe" - }, - "has_input": [ - "nmdc:e777bc518da4bbe0ab7b2959f00e2b08" - ], - "part_of": [ - "nmdc:mga0w3a067" - ], - "ctg_logsum": 335229, - "scaf_logsum": 337025, - "gap_pct": 0.00236, - "git_url": "https://github.com/microbiomedata/metaG/releases/tag/0.1", - "has_output": [ - "nmdc:19987e32391f846db382edabf14ba43e", - "nmdc:1a4c5ace6c1b54e057d282031e8bc2c6", - "nmdc:af7a38646011c9e6d0ad2b1ebd7f47c9", - "nmdc:1b665fb0fbbf40a13122100c927b398b", - "nmdc:7c1232ff8d861d2e2c111a1dc4a70480" - ], - "asm_score": 10.939, - "was_informed_by": "gold:Gp0115676", - "ctg_powsum": 40696, - "scaf_max": 163197, - "id": "nmdc:b2025bb4a2c7616273d414e4093f63ca", - "scaf_powsum": 40973, - "execution_resource": "NERSC-Cori", - "contigs": 187125, - "name": "Assembly Activity for nmdc:mga0w3a067", - "ctg_max": 163197, - "gc_std": 0.10616, - "contig_bp": 97611209, - "gc_avg": 0.5929, - "started_at_time": "2021-10-11T02:26:37Z", - "scaf_bp": 97613509, - "type": "nmdc:MetagenomeAssembly", - "scaffolds": 186895, - "ended_at_time": "2021-10-11T05:40:05+00:00", - "ctg_l50": 499, - "ctg_l90": 288, - "ctg_n50": 42676, - "ctg_n90": 155670, - "scaf_l50": 499, - "scaf_l90": 288, - "scaf_n50": 42593, - "scaf_n90": 155449, - "scaf_l_gt50k": 743033, - "scaf_n_gt50k": 11, - "scaf_pct_gt50k": 0.7611989 - } - ], - "metagenome_sequencing_activity_set": [], - "metaproteomics_analysis_activity_set": [], - "metatranscriptome_activity_set": [], - "nom_analysis_activity_set": [], - "omics_processing_set": [ - { - "_id": { - "$oid": "649b009773e8249959349b4e" - }, - "id": "nmdc:omprc-11-7ey2jr63", - "name": "Sand microcosm microbial communities from a hyporheic zone in Columbia River, Washington, USA - GW-RW T4_23-Sept-14", - "description": "Sterilized sand packs were incubated back in the ground and collected at time point T4.", - "has_input": [ - "nmdc:bsm-11-pkgtg048" - ], - "has_output": [ - "jgi:55d740220d8785342fcf7e35" - ], - "part_of": [ - "nmdc:sty-11-aygzgv51" - ], - "add_date": "2015-05-28", - "mod_date": "2021-06-15", - "ncbi_project_name": "Sand microcosm microbial communities from a hyporheic zone in Columbia River, Washington, USA - GW-RW T4_23-Sept-14", - "omics_type": { - "has_raw_value": "Metagenome" - }, - "principal_investigator": { - "has_raw_value": "James Stegen" - }, - "processing_institution": "JGI", - "type": "nmdc:OmicsProcessing", - "gold_sequencing_project_identifiers": [ - "gold:Gp0115676" - ] - } - ], - "reaction_activity_set": [], - "read_qc_analysis_activity_set": [ - { - "_id": { - "$oid": "649b009d6bdd4fd20273c883" - }, - "has_input": [ - "nmdc:5672111f6f33b8aff5f65e69ebb41c5e" - ], - "part_of": [ - "nmdc:mga0w3a067" - ], - "git_url": "https://github.com/microbiomedata/metaG/releases/tag/0.1", - "has_output": [ - "nmdc:e777bc518da4bbe0ab7b2959f00e2b08", - "nmdc:79815495339053b7935b55dbde02b2ff" - ], - "was_informed_by": "gold:Gp0115676", - "input_read_count": 39069214, - "output_read_bases": 5550744725, - "id": "nmdc:b2025bb4a2c7616273d414e4093f63ca", - "execution_resource": "NERSC-Cori", - "input_read_bases": 5899451314, - "name": "Read QC Activity for nmdc:mga0w3a067", - "output_read_count": 37037822, - "started_at_time": "2021-10-11T02:26:37Z", - "type": "nmdc:ReadQCAnalysisActivity", - "ended_at_time": "2021-10-11T05:40:05+00:00" - } - ], - "read_based_taxonomy_analysis_activity_set": [ - { - "_id": { - "$oid": "649b009bff710ae353f8cf4c" - }, - "has_input": [ - "nmdc:e777bc518da4bbe0ab7b2959f00e2b08" - ], - "git_url": "https://github.com/microbiomedata/metaG/releases/tag/0.1", - "has_output": [ - "nmdc:13343b2533892633bcc3655a1ebe788f", - "nmdc:87b36326bee32ad5642e3ffc2f5ac7db", - "nmdc:95a2de8be672fd50bf542215194dc4d4", - "nmdc:6cd0210b345d6908ad8ab683b1a11572", - "nmdc:5049a65d2a42d73c5d47373e990b70f7", - "nmdc:6e1e28773094884d35c04072309e285a", - "nmdc:7fa3aba8b1e31ccc00cf56f04f5605ac", - "nmdc:3b3abe337d79d09e9c7ba0a40045ad93", - "nmdc:e8602b20781cdbbd84e6dcb92c048a6b" - ], - "was_informed_by": "gold:Gp0115676", - "id": "nmdc:b2025bb4a2c7616273d414e4093f63ca", - "execution_resource": "NERSC-Cori", - "name": "ReadBased Analysis Activity for nmdc:mga0w3a067", - "started_at_time": "2021-10-11T02:26:37Z", - "type": "nmdc:ReadbasedAnalysis", - "ended_at_time": "2021-10-11T05:40:05+00:00" - } - ], - "study_set": [], - "field_research_site_set": [], - "collecting_biosamples_from_site_set": [], - "date_created": null, - "etl_software_version": null, - "pooling_set": [] - }, - { - "functional_annotation_agg": [], - "library_preparation_set": [], - "processed_sample_set": [], - "extraction_set": [], - "activity_set": [], - "biosample_set": [], - "data_object_set": [ - { - "description": "Raw sequencer read data", - "file_size_bytes": 6700067822, - "type": "nmdc:DataObject", - "id": "jgi:55a9caff0d87852b2150891e", - "name": "9289.1.128215.TCCTGAG-TATCCTC.fastq.gz" - }, - { - "name": "Gp0115677_Filtered Reads", - "description": "Filtered Reads for Gp0115677", - "data_object_type": "Filtered Sequencing Reads", - "url": "https://data.microbiomedata.org/data/nmdc:mga0zb0766/qa/nmdc_mga0zb0766_filtered.fastq.gz", - "md5_checksum": "63c857b3011dec61a08044d518291f23", - "id": "nmdc:63c857b3011dec61a08044d518291f23", - "file_size_bytes": 5307348388 - }, - { - "name": "Gp0115677_Filtered Stats", - "description": "Filtered Stats for Gp0115677", - "data_object_type": "QC Statistics", - "url": "https://data.microbiomedata.org/data/nmdc:mga0zb0766/qa/nmdc_mga0zb0766_filterStats.txt", - "md5_checksum": "2a79d7978caecf9b08fb2029fa42c9b3", - "id": "nmdc:2a79d7978caecf9b08fb2029fa42c9b3", - "file_size_bytes": 279 - }, - { - "name": "Gp0115677_Gottcha2 TSV report", - "description": "Gottcha2 TSV report for Gp0115677", - "url": "https://data.microbiomedata.org/data/nmdc:mga0zb0766/ReadbasedAnalysis/nmdc_mga0zb0766_gottcha2_report.tsv", - "md5_checksum": "ba32f20b0cc5143783e00c5d1ba15223", - "id": "nmdc:ba32f20b0cc5143783e00c5d1ba15223", - "file_size_bytes": 17895 - }, - { - "name": "Gp0115677_Gottcha2 full TSV report", - "description": "Gottcha2 full TSV report for Gp0115677", - "url": "https://data.microbiomedata.org/data/nmdc:mga0zb0766/ReadbasedAnalysis/nmdc_mga0zb0766_gottcha2_report_full.tsv", - "md5_checksum": "c1730daf5e6017219fd9fc079e42c132", - "id": "nmdc:c1730daf5e6017219fd9fc079e42c132", - "file_size_bytes": 1182538 - }, - { - "name": "Gp0115677_Gottcha2 Krona HTML report", - "description": "Gottcha2 Krona HTML report for Gp0115677", - "data_object_type": "GOTTCHA2 Krona Plot", - "url": "https://data.microbiomedata.org/data/nmdc:mga0zb0766/ReadbasedAnalysis/nmdc_mga0zb0766_gottcha2_krona.html", - "md5_checksum": "55b6c047c48f5bf9fb156f139992e4d8", - "id": "nmdc:55b6c047c48f5bf9fb156f139992e4d8", - "file_size_bytes": 276802 - }, - { - "name": "Gp0115677_Centrifuge classification TSV report", - "description": "Centrifuge classification TSV report for Gp0115677", - "data_object_type": "Centrifuge Taxonomic Classification", - "url": "https://data.microbiomedata.org/data/nmdc:mga0zb0766/ReadbasedAnalysis/nmdc_mga0zb0766_centrifuge_classification.tsv", - "md5_checksum": "1c2e2dff881b35a25b4622bbc66c3140", - "id": "nmdc:1c2e2dff881b35a25b4622bbc66c3140", - "file_size_bytes": 4716470614 - }, - { - "name": "Gp0115677_Centrifuge TSV report", - "description": "Centrifuge TSV report for Gp0115677", - "data_object_type": "Centrifuge Classification Report", - "url": "https://data.microbiomedata.org/data/nmdc:mga0zb0766/ReadbasedAnalysis/nmdc_mga0zb0766_centrifuge_report.tsv", - "md5_checksum": "50f771c7bc17a0b184c2a10a24013f08", - "id": "nmdc:50f771c7bc17a0b184c2a10a24013f08", - "file_size_bytes": 267231 - }, - { - "name": "Gp0115677_Centrifuge Krona HTML report", - "description": "Centrifuge Krona HTML report for Gp0115677", - "data_object_type": "Centrifuge Krona Plot", - "url": "https://data.microbiomedata.org/data/nmdc:mga0zb0766/ReadbasedAnalysis/nmdc_mga0zb0766_centrifuge_krona.html", - "md5_checksum": "229017cdb1832bb718d22dc27db44125", - "id": "nmdc:229017cdb1832bb718d22dc27db44125", - "file_size_bytes": 2356003 - }, - { - "name": "Gp0115677_Kraken classification TSV report", - "description": "Kraken classification TSV report for Gp0115677", - "data_object_type": "Kraken2 Taxonomic Classification", - "url": "https://data.microbiomedata.org/data/nmdc:mga0zb0766/ReadbasedAnalysis/nmdc_mga0zb0766_kraken2_classification.tsv", - "md5_checksum": "49d5d11132bd5a02c3dd077d42a6a16b", - "id": "nmdc:49d5d11132bd5a02c3dd077d42a6a16b", - "file_size_bytes": 3857487871 - }, - { - "name": "Gp0115677_Kraken2 TSV report", - "description": "Kraken2 TSV report for Gp0115677", - "data_object_type": "Kraken2 Classification Report", - "url": "https://data.microbiomedata.org/data/nmdc:mga0zb0766/ReadbasedAnalysis/nmdc_mga0zb0766_kraken2_report.tsv", - "md5_checksum": "bdd701b44e67929ec8bbe279697da937", - "id": "nmdc:bdd701b44e67929ec8bbe279697da937", - "file_size_bytes": 708598 - }, - { - "name": "Gp0115677_Kraken2 Krona HTML report", - "description": "Kraken2 Krona HTML report for Gp0115677", - "data_object_type": "Kraken2 Krona Plot", - "url": "https://data.microbiomedata.org/data/nmdc:mga0zb0766/ReadbasedAnalysis/nmdc_mga0zb0766_kraken2_krona.html", - "md5_checksum": "d35583a5ed45df5a58bf084fc67bf988", - "id": "nmdc:d35583a5ed45df5a58bf084fc67bf988", - "file_size_bytes": 4250180 - }, - { - "name": "Gp0115677_Assembled contigs fasta", - "description": "Assembled contigs fasta for Gp0115677", - "data_object_type": "Assembly Contigs", - "url": "https://data.microbiomedata.org/data/nmdc:mga0zb0766/assembly/nmdc_mga0zb0766_contigs.fna", - "md5_checksum": "3d9e14d6f7a854042a7d71def080409b", - "id": "nmdc:3d9e14d6f7a854042a7d71def080409b", - "file_size_bytes": 250747283 - }, - { - "name": "Gp0115677_Assembled scaffold fasta", - "description": "Assembled scaffold fasta for Gp0115677", - "data_object_type": "Assembly Scaffolds", - "url": "https://data.microbiomedata.org/data/nmdc:mga0zb0766/assembly/nmdc_mga0zb0766_scaffolds.fna", - "md5_checksum": "26d0d64ca7c850f0e04a4c33690bd178", - "id": "nmdc:26d0d64ca7c850f0e04a4c33690bd178", - "file_size_bytes": 249006954 - }, - { - "name": "Gp0115677_Metagenome Contig Coverage Stats", - "description": "Metagenome Contig Coverage Stats for Gp0115677", - "url": "https://data.microbiomedata.org/data/nmdc:mga0zb0766/assembly/nmdc_mga0zb0766_covstats.txt", - "md5_checksum": "8f8a0622cfe39054bd20f11116c78402", - "id": "nmdc:8f8a0622cfe39054bd20f11116c78402", - "file_size_bytes": 43716675 - }, - { - "name": "Gp0115677_Assembled AGP file", - "description": "Assembled AGP file for Gp0115677", - "data_object_type": "Assembly AGP", - "url": "https://data.microbiomedata.org/data/nmdc:mga0zb0766/assembly/nmdc_mga0zb0766_assembly.agp", - "md5_checksum": "623aa370c44897cf30844647c2f5bd94", - "id": "nmdc:623aa370c44897cf30844647c2f5bd94", - "file_size_bytes": 41409581 - }, - { - "name": "Gp0115677_Metagenome Alignment BAM file", - "description": "Metagenome Alignment BAM file for Gp0115677", - "data_object_type": "Assembly Coverage BAM", - "url": "https://data.microbiomedata.org/data/nmdc:mga0zb0766/assembly/nmdc_mga0zb0766_pairedMapped_sorted.bam", - "md5_checksum": "f4a1cf24281f14a666a1bfc9afc0aab5", - "id": "nmdc:f4a1cf24281f14a666a1bfc9afc0aab5", - "file_size_bytes": 5828772757 - }, - { - "name": "Gp0115677_Protein FAA", - "description": "Protein FAA for Gp0115677", - "data_object_type": "Annotation Amino Acid FASTA", - "url": "https://data.microbiomedata.org/data/nmdc:mga0zb0766/annotation/nmdc_mga0zb0766_proteins.faa", - "md5_checksum": "4f9d82516561ee307b1ab4841255aff0", - "id": "nmdc:4f9d82516561ee307b1ab4841255aff0", - "file_size_bytes": 144603933 - }, - { - "name": "Gp0115677_Structural annotation GFF file", - "description": "Structural annotation GFF file for Gp0115677", - "data_object_type": "Structural Annotation GFF", - "url": "https://data.microbiomedata.org/data/nmdc:mga0zb0766/annotation/nmdc_mga0zb0766_structural_annotation.gff", - "md5_checksum": "a658e9045fde900cdc78d0578446b960", - "id": "nmdc:a658e9045fde900cdc78d0578446b960", - "file_size_bytes": 2546 - }, - { - "name": "Gp0115677_Functional annotation GFF file", - "description": "Functional annotation GFF file for Gp0115677", - "data_object_type": "Functional Annotation GFF", - "url": "https://data.microbiomedata.org/data/nmdc:mga0zb0766/annotation/nmdc_mga0zb0766_functional_annotation.gff", - "md5_checksum": "075c3477b8874aa8d6c4dbc1360a2b38", - "id": "nmdc:075c3477b8874aa8d6c4dbc1360a2b38", - "file_size_bytes": 167984752 - }, - { - "name": "Gp0115677_KO TSV file", - "description": "KO TSV file for Gp0115677", - "data_object_type": "Annotation KEGG Orthology", - "url": "https://data.microbiomedata.org/data/nmdc:mga0zb0766/annotation/nmdc_mga0zb0766_ko.tsv", - "md5_checksum": "9a338a51c6ca2ec4e0da4e15903be407", - "id": "nmdc:9a338a51c6ca2ec4e0da4e15903be407", - "file_size_bytes": 19341535 - }, - { - "name": "Gp0115677_EC TSV file", - "description": "EC TSV file for Gp0115677", - "data_object_type": "Annotation Enzyme Commission", - "url": "https://data.microbiomedata.org/data/nmdc:mga0zb0766/annotation/nmdc_mga0zb0766_ec.tsv", - "md5_checksum": "0f9e627ace8d9b8420e957bcd033244a", - "id": "nmdc:0f9e627ace8d9b8420e957bcd033244a", - "file_size_bytes": 12533246 - }, - { - "name": "Gp0115677_COG GFF file", - "description": "COG GFF file for Gp0115677", - "url": "https://data.microbiomedata.org/data/nmdc:mga0zb0766/annotation/nmdc_mga0zb0766_cog.gff", - "md5_checksum": "144a997b22098f5fe748d2fa069cdc71", - "id": "nmdc:144a997b22098f5fe748d2fa069cdc71", - "file_size_bytes": 85841510 - }, - { - "name": "Gp0115677_PFAM GFF file", - "description": "PFAM GFF file for Gp0115677", - "url": "https://data.microbiomedata.org/data/nmdc:mga0zb0766/annotation/nmdc_mga0zb0766_pfam.gff", - "md5_checksum": "82dc44c196f4b6b5552e8360f21f93a0", - "id": "nmdc:82dc44c196f4b6b5552e8360f21f93a0", - "file_size_bytes": 64139943 - }, - { - "name": "Gp0115677_TigrFam GFF file", - "description": "TigrFam GFF file for Gp0115677", - "url": "https://data.microbiomedata.org/data/nmdc:mga0zb0766/annotation/nmdc_mga0zb0766_tigrfam.gff", - "md5_checksum": "9238a5420065e1da9da31c270c90268a", - "id": "nmdc:9238a5420065e1da9da31c270c90268a", - "file_size_bytes": 7585101 - }, - { - "name": "Gp0115677_SMART GFF file", - "description": "SMART GFF file for Gp0115677", - "url": "https://data.microbiomedata.org/data/nmdc:mga0zb0766/annotation/nmdc_mga0zb0766_smart.gff", - "md5_checksum": "ce31f29ff8fed6d0a973d61157af7220", - "id": "nmdc:ce31f29ff8fed6d0a973d61157af7220", - "file_size_bytes": 18353478 - }, - { - "name": "Gp0115677_SuperFam GFF file", - "description": "SuperFam GFF file for Gp0115677", - "url": "https://data.microbiomedata.org/data/nmdc:mga0zb0766/annotation/nmdc_mga0zb0766_supfam.gff", - "md5_checksum": "016cbd549e03d896ed746ab91771b21a", - "id": "nmdc:016cbd549e03d896ed746ab91771b21a", - "file_size_bytes": 107179327 - }, - { - "name": "Gp0115677_Cath FunFam GFF file", - "description": "Cath FunFam GFF file for Gp0115677", - "url": "https://data.microbiomedata.org/data/nmdc:mga0zb0766/annotation/nmdc_mga0zb0766_cath_funfam.gff", - "md5_checksum": "7ef0abcd7fba705f6e9e26dcb8b1da8d", - "id": "nmdc:7ef0abcd7fba705f6e9e26dcb8b1da8d", - "file_size_bytes": 85056001 - }, - { - "name": "Gp0115677_KO_EC GFF file", - "description": "KO_EC GFF file for Gp0115677", - "url": "https://data.microbiomedata.org/data/nmdc:mga0zb0766/annotation/nmdc_mga0zb0766_ko_ec.gff", - "md5_checksum": "c935ce264779684a01c9a7777e506d02", - "id": "nmdc:c935ce264779684a01c9a7777e506d02", - "file_size_bytes": 61547317 - }, - { - "name": "gold:Gp0452679_metabat2 bin checkm quality assessment result", - "description": "metabat2 bin checkm quality assessment result for gold:Gp0452679", - "data_object_type": "CheckM Statistics", - "url": "https://data.microbiomedata.org/data/nmdc:mga0fsjy84/MAGs/nmdc_mga0fsjy84_checkm_qa.out", - "md5_checksum": "d41d8cd98f00b204e9800998ecf8427e", - "id": "nmdc:d41d8cd98f00b204e9800998ecf8427e", - "file_size_bytes": 0 - }, - { - "name": "Gp0115677_tooShort (< 3kb) filtered contigs fasta file by metaBat2", - "description": "tooShort (< 3kb) filtered contigs fasta file by metaBat2 for Gp0115677", - "url": "https://data.microbiomedata.org/data/nmdc:mga0zb0766/MAGs/nmdc_mga0zb0766_bins.tooShort.fa", - "md5_checksum": "603009bd6294d2318d929a57b5d3e5d3", - "id": "nmdc:603009bd6294d2318d929a57b5d3e5d3", - "file_size_bytes": 215021876 - }, - { - "name": "Gp0115677_unbinned fasta file from metabat2", - "description": "unbinned fasta file from metabat2 for Gp0115677", - "url": "https://data.microbiomedata.org/data/nmdc:mga0zb0766/MAGs/nmdc_mga0zb0766_bins.unbinned.fa", - "md5_checksum": "c5334a4e305f78c294c304c3c0526826", - "id": "nmdc:c5334a4e305f78c294c304c3c0526826", - "file_size_bytes": 26658018 - }, - { - "name": "Gp0115677_metabat2 bin checkm quality assessment result", - "description": "metabat2 bin checkm quality assessment result for Gp0115677", - "data_object_type": "CheckM Statistics", - "url": "https://data.microbiomedata.org/data/nmdc:mga0zb0766/MAGs/nmdc_mga0zb0766_checkm_qa.out", - "md5_checksum": "6adacc1ba06e5e451f3636c394c71ae8", - "id": "nmdc:6adacc1ba06e5e451f3636c394c71ae8", - "file_size_bytes": 1859 - }, - { - "name": "Gp0115677_high-quality and medium-quality bins", - "description": "high-quality and medium-quality bins for Gp0115677", - "data_object_type": "Metagenome Bins", - "url": "https://data.microbiomedata.org/data/nmdc:mga0zb0766/MAGs/nmdc_mga0zb0766_hqmq_bin.zip", - "md5_checksum": "77d4e2a7f358b9ac1d53b69d7e8c45e1", - "id": "nmdc:77d4e2a7f358b9ac1d53b69d7e8c45e1", - "file_size_bytes": 2309404 - }, - { - "name": "Gp0115677_metabat2 bins", - "description": "metabat2 bins for Gp0115677", - "url": "https://data.microbiomedata.org/data/nmdc:mga0zb0766/MAGs/nmdc_mga0zb0766_metabat_bin.zip", - "md5_checksum": "42c3fb9a3906f6b413f99e3276bb7550", - "id": "nmdc:42c3fb9a3906f6b413f99e3276bb7550", - "file_size_bytes": 450699 - }, - { - "_id": { - "$oid": "649b003d1ae706d7b5b14da3" - }, - "description": "Assembled contigs fasta for gold:Gp0115677", - "url": "https://data.microbiomedata.org/data/1781_86092/assembly/assembly_contigs.fna", - "file_size_bytes": 248003503, - "type": "nmdc:DataObject", - "id": "nmdc:9ca27b985234aaed07e3f6659e0416d0", - "name": "assembly_contigs.fna", - "data_object_type": "Assembly Contigs" - }, - { - "_id": { - "$oid": "649b003d1ae706d7b5b14da6" - }, - "description": "Metagenome Contig Coverage Stats for gold:Gp0115677", - "url": "https://data.microbiomedata.org/data/1781_86092/assembly/mapping_stats.txt", - "file_size_bytes": 40972895, - "type": "nmdc:DataObject", - "id": "nmdc:26847612e684ef73baf6d1daf75ba042", - "name": "mapping_stats.txt", - "data_object_type": "Assembly Coverage Stats" - }, - { - "_id": { - "$oid": "649b003d1ae706d7b5b14db1" - }, - "description": "Assembled scaffold fasta for gold:Gp0115677", - "url": "https://data.microbiomedata.org/data/1781_86092/assembly/assembly_scaffolds.fna", - "file_size_bytes": 246291939, - "type": "nmdc:DataObject", - "id": "nmdc:fcac84657291d9e28e15e83d656ce7fd", - "name": "assembly_scaffolds.fna", - "data_object_type": "Assembly Scaffolds" - }, - { - "_id": { - "$oid": "649b003d1ae706d7b5b14db2" - }, - "description": "Metagenome Alignment BAM file for gold:Gp0115677", - "url": "https://data.microbiomedata.org/data/1781_86092/assembly/pairedMapped_sorted.bam", - "file_size_bytes": 5769674213, - "type": "nmdc:DataObject", - "id": "nmdc:32366af15429dcf03ef716a44fed367e", - "name": "pairedMapped_sorted.bam", - "data_object_type": "Assembly Coverage BAM" - }, - { - "_id": { - "$oid": "649b003d1ae706d7b5b14db4" - }, - "description": "Assembled AGP file for gold:Gp0115677", - "url": "https://data.microbiomedata.org/data/1781_86092/assembly/assembly.agp", - "file_size_bytes": 35864331, - "type": "nmdc:DataObject", - "id": "nmdc:4027ab07eb8931ae9c5a17b480d238b3", - "name": "assembly.agp", - "data_object_type": "Assembly AGP" - }, - { - "_id": { - "$oid": "649b003d1ae706d7b5b159f6" - }, - "id": "nmdc:9921b494b07bde6a76e1c3e9e4da11ce", - "name": "1781_86092.krona.html", - "description": "Gold:Gp0115677 KRONA plot HTML file", - "url": "https://data.microbiomedata.org/1781_86092/ReadbasedAnalysis/centrifuge/1781_86092.krona.html", - "file_size_bytes": 3385, - "data_object_type": "Centrifuge Krona Plot", - "type": "nmdc:DataObject" - }, - { - "_id": { - "$oid": "649b003d1ae706d7b5b159fe" - }, - "id": "nmdc:a99cc1fb22427c72f4d6b67cec82948e", - "name": "1781_86092.json", - "description": "Gold:Gp0115677 ReadbasedAnalysis result JSON file", - "url": "https://data.microbiomedata.org/1781_86092/ReadbasedAnalysis/1781_86092.json", - "file_size_bytes": 3385, - "type": "nmdc:DataObject" - }, - { - "_id": { - "$oid": "649b003f1ae706d7b5b162ce" - }, - "id": "nmdc:7085e0d349fac196f897eadc405b081a", - "name": "bins.tooShort.fa", - "description": "tooShort (< 3kb) filtered contigs fasta file by metaBat2 for gold:Gp0115677", - "file_size_bytes": 208193586, - "url": "https://data.microbiomedata.org/data/1781_86092/img_MAGs/bins.tooShort.fa", - "type": "nmdc:DataObject" - }, - { - "_id": { - "$oid": "649b003f1ae706d7b5b162cf" - }, - "id": "nmdc:20faf8ed13a8cde73b0522cb954acf0e", - "name": "bins.unbinned.fa", - "description": "unbinned fasta file from metabat2 for gold:Gp0115677", - "file_size_bytes": 28797768, - "url": "https://data.microbiomedata.org/data/1781_86092/img_MAGs/bins.unbinned.fa", - "type": "nmdc:DataObject" - }, - { - "_id": { - "$oid": "649b003f1ae706d7b5b162d1" - }, - "id": "nmdc:963a658cc3e72ac4966f260064cf3c4e", - "name": "gtdbtk.bac120.summary.tsv", - "description": "gtdbtk bacterial assignment result summary table for gold:Gp0115677", - "file_size_bytes": 832, - "url": "https://data.microbiomedata.org/data/1781_86092/img_MAGs/gtdbtk_output/classify/gtdbtk.bac120.summary.tsv", - "type": "nmdc:DataObject" - }, - { - "_id": { - "$oid": "649b003f1ae706d7b5b162d3" - }, - "id": "nmdc:d5296a7efb592e30a0f2439b3a6aad40", - "name": "gold:Gp0115677.bins.10.fa", - "description": "metabat2 binned contig file for gold:Gp0115677", - "file_size_bytes": 756006, - "url": "https://data.microbiomedata.org/data/1781_86092/img_MAGs/metabat-bins/bins.10.fa", - "type": "nmdc:DataObject", - "data_object_type": "Metagenome Bins" - }, - { - "_id": { - "$oid": "649b003f1ae706d7b5b162d4" - }, - "id": "nmdc:e371db754cc99ab772212052997f6e12", - "name": "gold:Gp0115677.bins.3.fa", - "description": "metabat2 binned contig file for gold:Gp0115677", - "file_size_bytes": 557597, - "url": "https://data.microbiomedata.org/data/1781_86092/img_MAGs/metabat-bins/bins.3.fa", - "type": "nmdc:DataObject", - "data_object_type": "Metagenome Bins" - }, - { - "_id": { - "$oid": "649b003f1ae706d7b5b162d5" - }, - "id": "nmdc:82753f458e12c2c99121f4aec2f62b03", - "name": "gold:Gp0115677.bins.9.fa", - "description": "metabat2 binned contig file for gold:Gp0115677", - "file_size_bytes": 314985, - "url": "https://data.microbiomedata.org/data/1781_86092/img_MAGs/metabat-bins/bins.9.fa", - "type": "nmdc:DataObject", - "data_object_type": "Metagenome Bins" - }, - { - "_id": { - "$oid": "649b003f1ae706d7b5b162d6" - }, - "id": "nmdc:e795c55b78d5504a3c9da194492bd8f4", - "name": "gold:Gp0115677.bins.2.fa", - "description": "metabat2 binned contig file for gold:Gp0115677", - "file_size_bytes": 1087308, - "url": "https://data.microbiomedata.org/data/1781_86092/img_MAGs/metabat-bins/bins.2.fa", - "type": "nmdc:DataObject", - "data_object_type": "Metagenome Bins" - }, - { - "_id": { - "$oid": "649b003f1ae706d7b5b162d7" - }, - "id": "nmdc:5f7c596adbc713a159c13ac5e8d88e2f", - "name": "gold:Gp0115677.bins.5.fa", - "description": "metabat2 binned contig file for gold:Gp0115677", - "file_size_bytes": 621780, - "url": "https://data.microbiomedata.org/data/1781_86092/img_MAGs/metabat-bins/bins.5.fa", - "type": "nmdc:DataObject", - "data_object_type": "Metagenome Bins" - }, - { - "_id": { - "$oid": "649b003f1ae706d7b5b162d8" - }, - "id": "nmdc:56ebd3517fb82d228d679991b7b9cfb8", - "name": "checkm_qa.out", - "description": "metabat2 bin checkm quality assessment result for gold:Gp0115677", - "file_size_bytes": 2535, - "url": "https://data.microbiomedata.org/data/1781_86092/img_MAGs/checkm_qa.out", - "type": "nmdc:DataObject", - "data_object_type": "CheckM Statistics" - }, - { - "_id": { - "$oid": "649b003f1ae706d7b5b162d9" - }, - "id": "nmdc:f81840dde4202f7feef24e36df596446", - "name": "gold:Gp0115677.bins.11.fa", - "description": "metabat2 binned contig file for gold:Gp0115677", - "file_size_bytes": 291445, - "url": "https://data.microbiomedata.org/data/1781_86092/img_MAGs/metabat-bins/bins.11.fa", - "type": "nmdc:DataObject", - "data_object_type": "Metagenome Bins" - }, - { - "_id": { - "$oid": "649b003f1ae706d7b5b162da" - }, - "id": "nmdc:edae13d8586f04ad81f447fa27adc7a9", - "name": "gold:Gp0115677.bins.4.fa", - "description": "metabat2 binned contig file for gold:Gp0115677", - "file_size_bytes": 1196625, - "url": "https://data.microbiomedata.org/data/1781_86092/img_MAGs/metabat-bins/bins.4.fa", - "type": "nmdc:DataObject", - "data_object_type": "Metagenome Bins" - }, - { - "_id": { - "$oid": "649b003f1ae706d7b5b162db" - }, - "id": "nmdc:ff320d478c10b7118c4da20ce49793c9", - "name": "gold:Gp0115677.bins.8.fa", - "description": "metabat2 binned contig file for gold:Gp0115677", - "file_size_bytes": 225889, - "url": "https://data.microbiomedata.org/data/1781_86092/img_MAGs/metabat-bins/bins.8.fa", - "type": "nmdc:DataObject", - "data_object_type": "Metagenome Bins" - }, - { - "_id": { - "$oid": "649b003f1ae706d7b5b162dc" - }, - "id": "nmdc:ca9221f7ba635008e04c92c7111633e9", - "name": "gold:Gp0115677.bins.7.fa", - "description": "metabat2 binned contig file for gold:Gp0115677", - "file_size_bytes": 642978, - "url": "https://data.microbiomedata.org/data/1781_86092/img_MAGs/metabat-bins/bins.7.fa", - "type": "nmdc:DataObject", - "data_object_type": "Metagenome Bins" - }, - { - "_id": { - "$oid": "649b003f1ae706d7b5b162dd" - }, - "id": "nmdc:2b9b32a7151436cea05cbddc6ec0dded", - "name": "gold:Gp0115677.bins.1.fa", - "description": "metabat2 binned contig file for gold:Gp0115677", - "file_size_bytes": 723505, - "url": "https://data.microbiomedata.org/data/1781_86092/img_MAGs/metabat-bins/bins.1.fa", - "type": "nmdc:DataObject", - "data_object_type": "Metagenome Bins" - }, - { - "_id": { - "$oid": "649b003f1ae706d7b5b162e0" - }, - "id": "nmdc:ee41bb68b0e1e5c7d4724b00c79e5649", - "name": "gold:Gp0115677.bins.6.fa", - "description": "metabat2 binned contig file for gold:Gp0115677", - "file_size_bytes": 323769, - "url": "https://data.microbiomedata.org/data/1781_86092/img_MAGs/metabat-bins/bins.6.fa", - "type": "nmdc:DataObject", - "data_object_type": "Metagenome Bins" - }, - { - "_id": { - "$oid": "649b00401ae706d7b5b16d6c" - }, - "description": "KO TSV File for gold:Gp0115677", - "url": "https://data.microbiomedata.org/1781_86092/img_annotation/Ga0482250_ko.tsv", - "md5_checksum": "34d53203f08e6c25c8f85f6e04d6df24", - "file_size_bytes": 3385, - "id": "nmdc:34d53203f08e6c25c8f85f6e04d6df24", - "name": "gold:Gp0115677_KO TSV File", - "data_object_type": "Annotation KEGG Orthology", - "type": "nmdc:DataObject" - }, - { - "_id": { - "$oid": "649b00401ae706d7b5b16d6f" - }, - "description": "EC TSV File for gold:Gp0115677", - "url": "https://data.microbiomedata.org/1781_86092/img_annotation/Ga0482250_ec.tsv", - "md5_checksum": "8a39e09943350e563b00e23a146c3ec1", - "file_size_bytes": 3385, - "id": "nmdc:8a39e09943350e563b00e23a146c3ec1", - "name": "gold:Gp0115677_EC TSV File", - "data_object_type": "Annotation Enzyme Commission", - "type": "nmdc:DataObject" - }, - { - "_id": { - "$oid": "649b00401ae706d7b5b16d70" - }, - "description": "Functional annotation GFF file for gold:Gp0115677", - "url": "https://data.microbiomedata.org/1781_86092/img_annotation/Ga0482250_functional_annotation.gff", - "md5_checksum": "e7df895e1a7776ba16b6d77fdc9b077d", - "file_size_bytes": 3385, - "id": "nmdc:e7df895e1a7776ba16b6d77fdc9b077d", - "name": "gold:Gp0115677_Functional annotation GFF file", - "data_object_type": "Functional Annotation GFF", - "type": "nmdc:DataObject" - }, - { - "_id": { - "$oid": "649b00401ae706d7b5b16d71" - }, - "description": "Protein FAA for gold:Gp0115677", - "url": "https://data.microbiomedata.org/1781_86092/img_annotation/Ga0482250_proteins.faa", - "md5_checksum": "c0365d39cb481d6e0f729b587dac10c8", - "file_size_bytes": 3385, - "id": "nmdc:c0365d39cb481d6e0f729b587dac10c8", - "name": "gold:Gp0115677_Protein FAA", - "data_object_type": "Annotation Amino Acid FASTA", - "type": "nmdc:DataObject" - }, - { - "_id": { - "$oid": "649b00401ae706d7b5b16d77" - }, - "description": "Structural annotation GFF file for gold:Gp0115677", - "url": "https://data.microbiomedata.org/1781_86092/img_annotation/Ga0482250_structural_annotation.gff", - "md5_checksum": "bfbd1bd1ad70307dd01b699ecc4ffb2a", - "file_size_bytes": 3385, - "id": "nmdc:bfbd1bd1ad70307dd01b699ecc4ffb2a", - "name": "gold:Gp0115677_Structural annotation GFF file", - "data_object_type": "Structural Annotation GFF", - "type": "nmdc:DataObject" - } - ], - "dissolving_activity_set": [], - "functional_annotation_set": [], - "genome_feature_set": [], - "mags_activity_set": [ - { - "_id": { - "$oid": "649b0052ec087f6bbab34724" - }, - "has_input": [ - "nmdc:3d9e14d6f7a854042a7d71def080409b", - "nmdc:f4a1cf24281f14a666a1bfc9afc0aab5", - "nmdc:075c3477b8874aa8d6c4dbc1360a2b38" - ], - "too_short_contig_num": 532333, - "part_of": [ - "nmdc:mga0zb0766" - ], - "binned_contig_num": 969, - "git_url": "https://github.com/microbiomedata/metaG/releases/tag/0.1", - "has_output": [ - "nmdc:d41d8cd98f00b204e9800998ecf8427e", - "nmdc:603009bd6294d2318d929a57b5d3e5d3", - "nmdc:c5334a4e305f78c294c304c3c0526826", - "nmdc:6adacc1ba06e5e451f3636c394c71ae8", - "nmdc:77d4e2a7f358b9ac1d53b69d7e8c45e1", - "nmdc:42c3fb9a3906f6b413f99e3276bb7550" - ], - "was_informed_by": "gold:Gp0115677", - "input_contig_num": 548756, - "id": "nmdc:4fa91d90b1bd5f821a7f09edc8426939", - "execution_resource": "NERSC-Cori", - "name": "MAGs Analysis Activity for nmdc:mga0zb0766", - "mags_list": [ - { - "number_of_contig": 68, - "completeness": 3.17, - "bin_name": "bins.1", - "gene_count": 329, - "bin_quality": "LQ", - "gtdbtk_species": "", - "gtdbtk_order": "", - "num_16s": 0, - "gtdbtk_family": "", - "gtdbtk_domain": "", - "contamination": 0.0, - "gtdbtk_class": "", - "gtdbtk_phylum": "", - "num_5s": 0, - "num_23s": 0, - "gtdbtk_genus": "", - "num_t_rna": 2 - }, - { - "number_of_contig": 282, - "completeness": 59.56, - "bin_name": "bins.2", - "gene_count": 1735, - "bin_quality": "MQ", - "gtdbtk_species": "UBA5335 sp002862435", - "gtdbtk_order": "UBA5335", - "num_16s": 0, - "gtdbtk_family": "UBA5335", - "gtdbtk_domain": "Bacteria", - "contamination": 0.0, - "gtdbtk_class": "Gammaproteobacteria", - "gtdbtk_phylum": "Proteobacteria", - "num_5s": 0, - "num_23s": 0, - "gtdbtk_genus": "UBA5335", - "num_t_rna": 26 - }, - { - "number_of_contig": 3, - "completeness": 54.6, - "bin_name": "bins.3", - "gene_count": 751, - "bin_quality": "MQ", - "gtdbtk_species": "", - "gtdbtk_order": "UBA9983_A", - "num_16s": 1, - "gtdbtk_family": "UBA2163", - "gtdbtk_domain": "Bacteria", - "contamination": 1.72, - "gtdbtk_class": "Paceibacteria", - "gtdbtk_phylum": "Patescibacteria", - "num_5s": 1, - "num_23s": 1, - "gtdbtk_genus": "1-14-0-10-47-16", - "num_t_rna": 22 - }, - { - "number_of_contig": 90, - "completeness": 98.7, - "bin_name": "bins.4", - "gene_count": 3042, - "bin_quality": "MQ", - "gtdbtk_species": "", - "gtdbtk_order": "UBA11222", - "num_16s": 0, - "gtdbtk_family": "UBA11222", - "gtdbtk_domain": "Bacteria", - "contamination": 0.0, - "gtdbtk_class": "Alphaproteobacteria", - "gtdbtk_phylum": "Proteobacteria", - "num_5s": 2, - "num_23s": 2, - "gtdbtk_genus": "UBA11222", - "num_t_rna": 46 - }, - { - "number_of_contig": 325, - "completeness": 73.34, - "bin_name": "bins.5", - "gene_count": 2576, - "bin_quality": "MQ", - "gtdbtk_species": "", - "gtdbtk_order": "UBA11222", - "num_16s": 1, - "gtdbtk_family": "UBA11222", - "gtdbtk_domain": "Bacteria", - "contamination": 0.91, - "gtdbtk_class": "Alphaproteobacteria", - "gtdbtk_phylum": "Proteobacteria", - "num_5s": 0, - "num_23s": 0, - "gtdbtk_genus": "UBA11222", - "num_t_rna": 35 - }, - { - "number_of_contig": 199, - "completeness": 49.14, - "bin_name": "bins.6", - "gene_count": 1046, - "bin_quality": "LQ", - "gtdbtk_species": "", - "gtdbtk_order": "", - "num_16s": 0, - "gtdbtk_family": "", - "gtdbtk_domain": "", - "contamination": 0.0, - "gtdbtk_class": "", - "gtdbtk_phylum": "", - "num_5s": 2, - "num_23s": 2, - "gtdbtk_genus": "", - "num_t_rna": 21 - }, - { - "number_of_contig": 2, - "completeness": 24.32, - "bin_name": "bins.7", - "gene_count": 329, - "bin_quality": "LQ", - "gtdbtk_species": "", - "gtdbtk_order": "", - "num_16s": 0, - "gtdbtk_family": "", - "gtdbtk_domain": "", - "contamination": 0.0, - "gtdbtk_class": "", - "gtdbtk_phylum": "", - "num_5s": 0, - "num_23s": 0, - "gtdbtk_genus": "", - "num_t_rna": 16 - } - ], - "unbinned_contig_num": 15454, - "started_at_time": "2021-10-11T02:24:49Z", - "type": "nmdc:MAGsAnalysisActivity", - "ended_at_time": "2021-10-11T06:26:42+00:00" - } - ], - "material_sample_set": [], - "material_sampling_activity_set": [], - "metabolomics_analysis_activity_set": [], - "metagenome_annotation_activity_set": [ - { - "_id": { - "$oid": "649b005bbf2caae0415ef9c1" - }, - "has_input": [ - "nmdc:3d9e14d6f7a854042a7d71def080409b" - ], - "part_of": [ - "nmdc:mga0zb0766" - ], - "git_url": "https://github.com/microbiomedata/metaG/releases/tag/0.1", - "has_output": [ - "nmdc:4f9d82516561ee307b1ab4841255aff0", - "nmdc:a658e9045fde900cdc78d0578446b960", - "nmdc:075c3477b8874aa8d6c4dbc1360a2b38", - "nmdc:9a338a51c6ca2ec4e0da4e15903be407", - "nmdc:0f9e627ace8d9b8420e957bcd033244a", - "nmdc:144a997b22098f5fe748d2fa069cdc71", - "nmdc:82dc44c196f4b6b5552e8360f21f93a0", - "nmdc:9238a5420065e1da9da31c270c90268a", - "nmdc:ce31f29ff8fed6d0a973d61157af7220", - "nmdc:016cbd549e03d896ed746ab91771b21a", - "nmdc:7ef0abcd7fba705f6e9e26dcb8b1da8d", - "nmdc:c935ce264779684a01c9a7777e506d02" - ], - "was_informed_by": "gold:Gp0115677", - "id": "nmdc:4fa91d90b1bd5f821a7f09edc8426939", - "execution_resource": "NERSC-Cori", - "name": "Annotation Activity for nmdc:mga0zb0766", - "started_at_time": "2021-10-11T02:24:49Z", - "type": "nmdc:MetagenomeAnnotationActivity", - "ended_at_time": "2021-10-11T06:26:42+00:00" - } - ], - "metagenome_assembly_set": [ - { - "_id": { - "$oid": "649b005f2ca5ee4adb139fad" - }, - "has_input": [ - "nmdc:63c857b3011dec61a08044d518291f23" - ], - "part_of": [ - "nmdc:mga0zb0766" - ], - "ctg_logsum": 407938, - "scaf_logsum": 442802, - "gap_pct": 0.02562, - "git_url": "https://github.com/microbiomedata/metaG/releases/tag/0.1", - "has_output": [ - "nmdc:3d9e14d6f7a854042a7d71def080409b", - "nmdc:26d0d64ca7c850f0e04a4c33690bd178", - "nmdc:8f8a0622cfe39054bd20f11116c78402", - "nmdc:623aa370c44897cf30844647c2f5bd94", - "nmdc:f4a1cf24281f14a666a1bfc9afc0aab5" - ], - "asm_score": 13.853, - "was_informed_by": "gold:Gp0115677", - "ctg_powsum": 50872, - "scaf_max": 582605, - "id": "nmdc:4fa91d90b1bd5f821a7f09edc8426939", - "scaf_powsum": 55815, - "execution_resource": "NERSC-Cori", - "contigs": 548764, - "name": "Assembly Activity for nmdc:mga0zb0766", - "ctg_max": 464697, - "gc_std": 0.11035, - "contig_bp": 229799767, - "gc_avg": 0.55184, - "started_at_time": "2021-10-11T02:24:49Z", - "scaf_bp": 229858665, - "type": "nmdc:MetagenomeAssembly", - "scaffolds": 543003, - "ended_at_time": "2021-10-11T06:26:42+00:00", - "ctg_l50": 375, - "ctg_l90": 283, - "ctg_n50": 171281, - "ctg_n90": 471697, - "scaf_l50": 378, - "scaf_l90": 283, - "scaf_n50": 164840, - "scaf_n90": 466121, - "scaf_l_gt50k": 2790937, - "scaf_n_gt50k": 23, - "scaf_pct_gt50k": 1.2141969 - } - ], - "metagenome_sequencing_activity_set": [], - "metaproteomics_analysis_activity_set": [], - "metatranscriptome_activity_set": [], - "nom_analysis_activity_set": [], - "omics_processing_set": [ - { - "_id": { - "$oid": "649b009773e8249959349b4f" - }, - "id": "nmdc:omprc-11-qngh7497", - "name": "Sand microcosm microbial communities from a hyporheic zone in Columbia River, Washington, USA - GW-RW T4_25-Nov-14", - "description": "Sterilized sand packs were incubated back in the ground and collected at time point T4.", - "has_input": [ - "nmdc:bsm-11-8362vs44" - ], - "has_output": [ - "jgi:55a9caff0d87852b2150891e" - ], - "part_of": [ - "nmdc:sty-11-aygzgv51" - ], - "add_date": "2015-05-28", - "mod_date": "2021-06-15", - "ncbi_project_name": "Sand microcosm microbial communities from a hyporheic zone in Columbia River, Washington, USA - GW-RW T4_25-Nov-14", - "omics_type": { - "has_raw_value": "Metagenome" - }, - "principal_investigator": { - "has_raw_value": "James Stegen" - }, - "processing_institution": "JGI", - "type": "nmdc:OmicsProcessing", - "gold_sequencing_project_identifiers": [ - "gold:Gp0115677" - ] - } - ], - "reaction_activity_set": [], - "read_qc_analysis_activity_set": [ - { - "_id": { - "$oid": "649b009d6bdd4fd20273c87a" - }, - "has_input": [ - "nmdc:80ca2cf2e3edcac29eb62b43f62e25c3" - ], - "part_of": [ - "nmdc:mga0zb0766" - ], - "git_url": "https://github.com/microbiomedata/metaG/releases/tag/0.1", - "has_output": [ - "nmdc:63c857b3011dec61a08044d518291f23", - "nmdc:2a79d7978caecf9b08fb2029fa42c9b3" - ], - "was_informed_by": "gold:Gp0115677", - "input_read_count": 65434428, - "output_read_bases": 9483843059, - "id": "nmdc:4fa91d90b1bd5f821a7f09edc8426939", - "execution_resource": "NERSC-Cori", - "input_read_bases": 9880598628, - "name": "Read QC Activity for nmdc:mga0zb0766", - "output_read_count": 64887080, - "started_at_time": "2021-10-11T02:24:49Z", - "type": "nmdc:ReadQCAnalysisActivity", - "ended_at_time": "2021-10-11T06:26:42+00:00" - } - ], - "read_based_taxonomy_analysis_activity_set": [ - { - "_id": { - "$oid": "649b009bff710ae353f8cf3c" - }, - "has_input": [ - "nmdc:63c857b3011dec61a08044d518291f23" - ], - "git_url": "https://github.com/microbiomedata/metaG/releases/tag/0.1", - "has_output": [ - "nmdc:ba32f20b0cc5143783e00c5d1ba15223", - "nmdc:c1730daf5e6017219fd9fc079e42c132", - "nmdc:55b6c047c48f5bf9fb156f139992e4d8", - "nmdc:1c2e2dff881b35a25b4622bbc66c3140", - "nmdc:50f771c7bc17a0b184c2a10a24013f08", - "nmdc:229017cdb1832bb718d22dc27db44125", - "nmdc:49d5d11132bd5a02c3dd077d42a6a16b", - "nmdc:bdd701b44e67929ec8bbe279697da937", - "nmdc:d35583a5ed45df5a58bf084fc67bf988" - ], - "was_informed_by": "gold:Gp0115677", - "id": "nmdc:4fa91d90b1bd5f821a7f09edc8426939", - "execution_resource": "NERSC-Cori", - "name": "ReadBased Analysis Activity for nmdc:mga0zb0766", - "started_at_time": "2021-10-11T02:24:49Z", - "type": "nmdc:ReadbasedAnalysis", - "ended_at_time": "2021-10-11T06:26:42+00:00" - } - ], - "study_set": [], - "field_research_site_set": [], - "collecting_biosamples_from_site_set": [], - "date_created": null, - "etl_software_version": null, - "pooling_set": [] - }, - { - "functional_annotation_agg": [], - "library_preparation_set": [], - "processed_sample_set": [], - "extraction_set": [], - "activity_set": [], - "biosample_set": [], - "data_object_set": [ - { - "description": "Raw sequencer read data", - "file_size_bytes": 1777604881, - "type": "nmdc:DataObject", - "id": "jgi:55d817f30d8785342fcf826d", - "name": "9387.2.132031.GGCTAC.fastq.gz" - }, - { - "name": "Gp0115675_Filtered Reads", - "description": "Filtered Reads for Gp0115675", - "data_object_type": "Filtered Sequencing Reads", - "url": "https://data.microbiomedata.org/data/nmdc:mga0vf2h47/qa/nmdc_mga0vf2h47_filtered.fastq.gz", - "md5_checksum": "54e3a71218d04224719e0dc8a7fdf9c7", - "id": "nmdc:54e3a71218d04224719e0dc8a7fdf9c7", - "file_size_bytes": 1533239347 - }, - { - "name": "Gp0115675_Filtered Stats", - "description": "Filtered Stats for Gp0115675", - "data_object_type": "QC Statistics", - "url": "https://data.microbiomedata.org/data/nmdc:mga0vf2h47/qa/nmdc_mga0vf2h47_filterStats.txt", - "md5_checksum": "2507e3f107100ce0c72c57191d450818", - "id": "nmdc:2507e3f107100ce0c72c57191d450818", - "file_size_bytes": 287 - }, - { - "name": "Gp0115675_Gottcha2 TSV report", - "description": "Gottcha2 TSV report for Gp0115675", - "url": "https://data.microbiomedata.org/data/nmdc:mga0vf2h47/ReadbasedAnalysis/nmdc_mga0vf2h47_gottcha2_report.tsv", - "md5_checksum": "60d673988c4f4447feb5985e8501e914", - "id": "nmdc:60d673988c4f4447feb5985e8501e914", - "file_size_bytes": 8921 - }, - { - "name": "Gp0115675_Gottcha2 full TSV report", - "description": "Gottcha2 full TSV report for Gp0115675", - "url": "https://data.microbiomedata.org/data/nmdc:mga0vf2h47/ReadbasedAnalysis/nmdc_mga0vf2h47_gottcha2_report_full.tsv", - "md5_checksum": "a8f93ed13033eb949109b4e83980a893", - "id": "nmdc:a8f93ed13033eb949109b4e83980a893", - "file_size_bytes": 871109 - }, - { - "name": "Gp0115675_Gottcha2 Krona HTML report", - "description": "Gottcha2 Krona HTML report for Gp0115675", - "data_object_type": "GOTTCHA2 Krona Plot", - "url": "https://data.microbiomedata.org/data/nmdc:mga0vf2h47/ReadbasedAnalysis/nmdc_mga0vf2h47_gottcha2_krona.html", - "md5_checksum": "31dd6eb616f1e9815778453ab1601195", - "id": "nmdc:31dd6eb616f1e9815778453ab1601195", - "file_size_bytes": 252578 - }, - { - "name": "Gp0115675_Centrifuge classification TSV report", - "description": "Centrifuge classification TSV report for Gp0115675", - "data_object_type": "Centrifuge Taxonomic Classification", - "url": "https://data.microbiomedata.org/data/nmdc:mga0vf2h47/ReadbasedAnalysis/nmdc_mga0vf2h47_centrifuge_classification.tsv", - "md5_checksum": "6d7a930d79f220b06cde8fbf8339e744", - "id": "nmdc:6d7a930d79f220b06cde8fbf8339e744", - "file_size_bytes": 1218767711 - }, - { - "name": "Gp0115675_Centrifuge TSV report", - "description": "Centrifuge TSV report for Gp0115675", - "data_object_type": "Centrifuge Classification Report", - "url": "https://data.microbiomedata.org/data/nmdc:mga0vf2h47/ReadbasedAnalysis/nmdc_mga0vf2h47_centrifuge_report.tsv", - "md5_checksum": "0aaac507db0e29827e1c87df47324932", - "id": "nmdc:0aaac507db0e29827e1c87df47324932", - "file_size_bytes": 254260 - }, - { - "name": "Gp0115675_Centrifuge Krona HTML report", - "description": "Centrifuge Krona HTML report for Gp0115675", - "data_object_type": "Centrifuge Krona Plot", - "url": "https://data.microbiomedata.org/data/nmdc:mga0vf2h47/ReadbasedAnalysis/nmdc_mga0vf2h47_centrifuge_krona.html", - "md5_checksum": "6aec8677139ed24ef9cfe0c75b30056f", - "id": "nmdc:6aec8677139ed24ef9cfe0c75b30056f", - "file_size_bytes": 2324387 - }, - { - "name": "Gp0115675_Kraken classification TSV report", - "description": "Kraken classification TSV report for Gp0115675", - "data_object_type": "Kraken2 Taxonomic Classification", - "url": "https://data.microbiomedata.org/data/nmdc:mga0vf2h47/ReadbasedAnalysis/nmdc_mga0vf2h47_kraken2_classification.tsv", - "md5_checksum": "d39369f32ada967d7cf52cb503fccf4a", - "id": "nmdc:d39369f32ada967d7cf52cb503fccf4a", - "file_size_bytes": 1001846607 - }, - { - "name": "Gp0115675_Kraken2 TSV report", - "description": "Kraken2 TSV report for Gp0115675", - "data_object_type": "Kraken2 Classification Report", - "url": "https://data.microbiomedata.org/data/nmdc:mga0vf2h47/ReadbasedAnalysis/nmdc_mga0vf2h47_kraken2_report.tsv", - "md5_checksum": "1ec0247d86889fcef13f39a58a92b066", - "id": "nmdc:1ec0247d86889fcef13f39a58a92b066", - "file_size_bytes": 635541 - }, - { - "name": "Gp0115675_Kraken2 Krona HTML report", - "description": "Kraken2 Krona HTML report for Gp0115675", - "data_object_type": "Kraken2 Krona Plot", - "url": "https://data.microbiomedata.org/data/nmdc:mga0vf2h47/ReadbasedAnalysis/nmdc_mga0vf2h47_kraken2_krona.html", - "md5_checksum": "242a1c60f6cb14ba8430375171fda436", - "id": "nmdc:242a1c60f6cb14ba8430375171fda436", - "file_size_bytes": 3968420 - }, - { - "name": "Gp0115675_Assembled contigs fasta", - "description": "Assembled contigs fasta for Gp0115675", - "data_object_type": "Assembly Contigs", - "url": "https://data.microbiomedata.org/data/nmdc:mga0vf2h47/assembly/nmdc_mga0vf2h47_contigs.fna", - "md5_checksum": "dd5cad9348fc41cb18ac989185fed0b5", - "id": "nmdc:dd5cad9348fc41cb18ac989185fed0b5", - "file_size_bytes": 41662357 - }, - { - "name": "Gp0115675_Assembled scaffold fasta", - "description": "Assembled scaffold fasta for Gp0115675", - "data_object_type": "Assembly Scaffolds", - "url": "https://data.microbiomedata.org/data/nmdc:mga0vf2h47/assembly/nmdc_mga0vf2h47_scaffolds.fna", - "md5_checksum": "6d02084941141ac9a1876c621a50aef0", - "id": "nmdc:6d02084941141ac9a1876c621a50aef0", - "file_size_bytes": 41417652 - }, - { - "name": "Gp0115675_Metagenome Contig Coverage Stats", - "description": "Metagenome Contig Coverage Stats for Gp0115675", - "url": "https://data.microbiomedata.org/data/nmdc:mga0vf2h47/assembly/nmdc_mga0vf2h47_covstats.txt", - "md5_checksum": "cc8faed3494579d793c08ede54cb5b3a", - "id": "nmdc:cc8faed3494579d793c08ede54cb5b3a", - "file_size_bytes": 6338871 - }, - { - "name": "Gp0115675_Assembled AGP file", - "description": "Assembled AGP file for Gp0115675", - "data_object_type": "Assembly AGP", - "url": "https://data.microbiomedata.org/data/nmdc:mga0vf2h47/assembly/nmdc_mga0vf2h47_assembly.agp", - "md5_checksum": "8891e46c9766f2b84d45fd6e46078a64", - "id": "nmdc:8891e46c9766f2b84d45fd6e46078a64", - "file_size_bytes": 5901316 - }, - { - "name": "Gp0115675_Metagenome Alignment BAM file", - "description": "Metagenome Alignment BAM file for Gp0115675", - "data_object_type": "Assembly Coverage BAM", - "url": "https://data.microbiomedata.org/data/nmdc:mga0vf2h47/assembly/nmdc_mga0vf2h47_pairedMapped_sorted.bam", - "md5_checksum": "80470769e7531b46c709d12c65487ffe", - "id": "nmdc:80470769e7531b46c709d12c65487ffe", - "file_size_bytes": 1635169657 - }, - { - "name": "Gp0115675_Protein FAA", - "description": "Protein FAA for Gp0115675", - "data_object_type": "Annotation Amino Acid FASTA", - "url": "https://data.microbiomedata.org/data/nmdc:mga0vf2h47/annotation/nmdc_mga0vf2h47_proteins.faa", - "md5_checksum": "93ea50ce57263b498b781240c04dbf46", - "id": "nmdc:93ea50ce57263b498b781240c04dbf46", - "file_size_bytes": 23383485 - }, - { - "name": "Gp0115675_Structural annotation GFF file", - "description": "Structural annotation GFF file for Gp0115675", - "data_object_type": "Structural Annotation GFF", - "url": "https://data.microbiomedata.org/data/nmdc:mga0vf2h47/annotation/nmdc_mga0vf2h47_structural_annotation.gff", - "md5_checksum": "71195b9bc697bf29cd865718a689eb1b", - "id": "nmdc:71195b9bc697bf29cd865718a689eb1b", - "file_size_bytes": 2508 - }, - { - "name": "Gp0115675_Functional annotation GFF file", - "description": "Functional annotation GFF file for Gp0115675", - "data_object_type": "Functional Annotation GFF", - "url": "https://data.microbiomedata.org/data/nmdc:mga0vf2h47/annotation/nmdc_mga0vf2h47_functional_annotation.gff", - "md5_checksum": "d8cccd9c5cd237c238e5ba443c477db5", - "id": "nmdc:d8cccd9c5cd237c238e5ba443c477db5", - "file_size_bytes": 26575202 - }, - { - "name": "Gp0115675_KO TSV file", - "description": "KO TSV file for Gp0115675", - "data_object_type": "Annotation KEGG Orthology", - "url": "https://data.microbiomedata.org/data/nmdc:mga0vf2h47/annotation/nmdc_mga0vf2h47_ko.tsv", - "md5_checksum": "1cb17c4c7681345f53a7f4ef5c319fba", - "id": "nmdc:1cb17c4c7681345f53a7f4ef5c319fba", - "file_size_bytes": 3577030 - }, - { - "name": "Gp0115675_EC TSV file", - "description": "EC TSV file for Gp0115675", - "data_object_type": "Annotation Enzyme Commission", - "url": "https://data.microbiomedata.org/data/nmdc:mga0vf2h47/annotation/nmdc_mga0vf2h47_ec.tsv", - "md5_checksum": "17e386be26f52833c463a89733ef2e34", - "id": "nmdc:17e386be26f52833c463a89733ef2e34", - "file_size_bytes": 2294485 - }, - { - "name": "Gp0115675_COG GFF file", - "description": "COG GFF file for Gp0115675", - "url": "https://data.microbiomedata.org/data/nmdc:mga0vf2h47/annotation/nmdc_mga0vf2h47_cog.gff", - "md5_checksum": "3e9b2fd11f2f5c16f9f25560e3b6fc55", - "id": "nmdc:3e9b2fd11f2f5c16f9f25560e3b6fc55", - "file_size_bytes": 15181628 - }, - { - "name": "Gp0115675_PFAM GFF file", - "description": "PFAM GFF file for Gp0115675", - "url": "https://data.microbiomedata.org/data/nmdc:mga0vf2h47/annotation/nmdc_mga0vf2h47_pfam.gff", - "md5_checksum": "b11e36753299e36fa92670cf75165698", - "id": "nmdc:b11e36753299e36fa92670cf75165698", - "file_size_bytes": 11905020 - }, - { - "name": "Gp0115675_TigrFam GFF file", - "description": "TigrFam GFF file for Gp0115675", - "url": "https://data.microbiomedata.org/data/nmdc:mga0vf2h47/annotation/nmdc_mga0vf2h47_tigrfam.gff", - "md5_checksum": "70ac1de5fbc6cc835d5a0d1855f7a28a", - "id": "nmdc:70ac1de5fbc6cc835d5a0d1855f7a28a", - "file_size_bytes": 1629352 - }, - { - "name": "Gp0115675_SMART GFF file", - "description": "SMART GFF file for Gp0115675", - "url": "https://data.microbiomedata.org/data/nmdc:mga0vf2h47/annotation/nmdc_mga0vf2h47_smart.gff", - "md5_checksum": "b9e3eb74fa7fee0fac886f8a436b9ecf", - "id": "nmdc:b9e3eb74fa7fee0fac886f8a436b9ecf", - "file_size_bytes": 3360419 - }, - { - "name": "Gp0115675_SuperFam GFF file", - "description": "SuperFam GFF file for Gp0115675", - "url": "https://data.microbiomedata.org/data/nmdc:mga0vf2h47/annotation/nmdc_mga0vf2h47_supfam.gff", - "md5_checksum": "faa27c2be6dc56e66f739dbffcbb6bef", - "id": "nmdc:faa27c2be6dc56e66f739dbffcbb6bef", - "file_size_bytes": 19134944 - }, - { - "name": "Gp0115675_Cath FunFam GFF file", - "description": "Cath FunFam GFF file for Gp0115675", - "url": "https://data.microbiomedata.org/data/nmdc:mga0vf2h47/annotation/nmdc_mga0vf2h47_cath_funfam.gff", - "md5_checksum": "b080e9d168c0c1330fda64814afe335b", - "id": "nmdc:b080e9d168c0c1330fda64814afe335b", - "file_size_bytes": 15037016 - }, - { - "name": "Gp0115675_KO_EC GFF file", - "description": "KO_EC GFF file for Gp0115675", - "url": "https://data.microbiomedata.org/data/nmdc:mga0vf2h47/annotation/nmdc_mga0vf2h47_ko_ec.gff", - "md5_checksum": "4ea799de0bc051409b7231801eea0129", - "id": "nmdc:4ea799de0bc051409b7231801eea0129", - "file_size_bytes": 11398449 - }, - { - "name": "gold:Gp0452679_metabat2 bin checkm quality assessment result", - "description": "metabat2 bin checkm quality assessment result for gold:Gp0452679", - "data_object_type": "CheckM Statistics", - "url": "https://data.microbiomedata.org/data/nmdc:mga0fsjy84/MAGs/nmdc_mga0fsjy84_checkm_qa.out", - "md5_checksum": "d41d8cd98f00b204e9800998ecf8427e", - "id": "nmdc:d41d8cd98f00b204e9800998ecf8427e", - "file_size_bytes": 0 - }, - { - "name": "Gp0115675_tooShort (< 3kb) filtered contigs fasta file by metaBat2", - "description": "tooShort (< 3kb) filtered contigs fasta file by metaBat2 for Gp0115675", - "url": "https://data.microbiomedata.org/data/nmdc:mga0vf2h47/MAGs/nmdc_mga0vf2h47_bins.tooShort.fa", - "md5_checksum": "826503b4204b77c319c0bb353d69818e", - "id": "nmdc:826503b4204b77c319c0bb353d69818e", - "file_size_bytes": 31246547 - }, - { - "name": "Gp0115675_unbinned fasta file from metabat2", - "description": "unbinned fasta file from metabat2 for Gp0115675", - "url": "https://data.microbiomedata.org/data/nmdc:mga0vf2h47/MAGs/nmdc_mga0vf2h47_bins.unbinned.fa", - "md5_checksum": "9a02c2954014bb8dcd62800609dd3ec5", - "id": "nmdc:9a02c2954014bb8dcd62800609dd3ec5", - "file_size_bytes": 6258719 - }, - { - "name": "Gp0115675_metabat2 bin checkm quality assessment result", - "description": "metabat2 bin checkm quality assessment result for Gp0115675", - "data_object_type": "CheckM Statistics", - "url": "https://data.microbiomedata.org/data/nmdc:mga0vf2h47/MAGs/nmdc_mga0vf2h47_checkm_qa.out", - "md5_checksum": "d15ed915946e095d045d73f4b4de019d", - "id": "nmdc:d15ed915946e095d045d73f4b4de019d", - "file_size_bytes": 1092 - }, - { - "name": "Gp0115675_high-quality and medium-quality bins", - "description": "high-quality and medium-quality bins for Gp0115675", - "data_object_type": "Metagenome Bins", - "url": "https://data.microbiomedata.org/data/nmdc:mga0vf2h47/MAGs/nmdc_mga0vf2h47_hqmq_bin.zip", - "md5_checksum": "8de4404b1a6601bae7d7d5fd51bd131a", - "id": "nmdc:8de4404b1a6601bae7d7d5fd51bd131a", - "file_size_bytes": 182 - }, - { - "name": "Gp0115675_metabat2 bins", - "description": "metabat2 bins for Gp0115675", - "url": "https://data.microbiomedata.org/data/nmdc:mga0vf2h47/MAGs/nmdc_mga0vf2h47_metabat_bin.zip", - "md5_checksum": "55f66520d821205e80dcd303cc2793bc", - "id": "nmdc:55f66520d821205e80dcd303cc2793bc", - "file_size_bytes": 1259160 - }, - { - "_id": { - "$oid": "649b003c1ae706d7b5b14d9a" - }, - "description": "Assembled contigs fasta for gold:Gp0115675", - "url": "https://data.microbiomedata.org/data/1781_86090/assembly/assembly_contigs.fna", - "file_size_bytes": 41258072, - "type": "nmdc:DataObject", - "id": "nmdc:333b8256818eefecf0581f31a45719f9", - "name": "assembly_contigs.fna", - "data_object_type": "Assembly Contigs" - }, - { - "_id": { - "$oid": "649b003c1ae706d7b5b14d9b" - }, - "description": "Assembled AGP file for gold:Gp0115675", - "url": "https://data.microbiomedata.org/data/1781_86090/assembly/assembly.agp", - "file_size_bytes": 5091186, - "type": "nmdc:DataObject", - "id": "nmdc:a153a87ca330ba427510d800ac847c95", - "name": "assembly.agp", - "data_object_type": "Assembly AGP" - }, - { - "_id": { - "$oid": "649b003c1ae706d7b5b14d9c" - }, - "description": "Metagenome Contig Coverage Stats for gold:Gp0115675", - "url": "https://data.microbiomedata.org/data/1781_86090/assembly/mapping_stats.txt", - "file_size_bytes": 5934586, - "type": "nmdc:DataObject", - "id": "nmdc:6eca425a70ac889b1d110b88f7907b74", - "name": "mapping_stats.txt", - "data_object_type": "Assembly Coverage Stats" - }, - { - "_id": { - "$oid": "649b003c1ae706d7b5b14d9d" - }, - "description": "Assembled scaffold fasta for gold:Gp0115675", - "url": "https://data.microbiomedata.org/data/1781_86090/assembly/assembly_scaffolds.fna", - "file_size_bytes": 41014137, - "type": "nmdc:DataObject", - "id": "nmdc:b6558fa3c0fcd24593d86fc5c63ab5b5", - "name": "assembly_scaffolds.fna", - "data_object_type": "Assembly Scaffolds" - }, - { - "_id": { - "$oid": "649b003d1ae706d7b5b14d9f" - }, - "description": "Metagenome Alignment BAM file for gold:Gp0115675", - "url": "https://data.microbiomedata.org/data/1781_86090/assembly/pairedMapped_sorted.bam", - "file_size_bytes": 1614431528, - "type": "nmdc:DataObject", - "id": "nmdc:c7cce636f2a9bd54e8f62742da37f5cc", - "name": "pairedMapped_sorted.bam", - "data_object_type": "Assembly Coverage BAM" - }, - { - "_id": { - "$oid": "649b003d1ae706d7b5b159e6" - }, - "id": "nmdc:f4335a3fd80dc97fbf2ce8bc5b64f0a4", - "name": "1781_86090.krona.html", - "description": "Gold:Gp0115675 KRONA plot HTML file", - "url": "https://data.microbiomedata.org/1781_86090/ReadbasedAnalysis/centrifuge/1781_86090.krona.html", - "file_size_bytes": 3385, - "data_object_type": "Centrifuge Krona Plot", - "type": "nmdc:DataObject" - }, - { - "_id": { - "$oid": "649b003d1ae706d7b5b159ea" - }, - "id": "nmdc:05966e29e6f087d77b9e766b5fb9c64f", - "name": "1781_86090.json", - "description": "Gold:Gp0115675 ReadbasedAnalysis result JSON file", - "url": "https://data.microbiomedata.org/1781_86090/ReadbasedAnalysis/1781_86090.json", - "file_size_bytes": 3385, - "type": "nmdc:DataObject" - }, - { - "_id": { - "$oid": "649b003f1ae706d7b5b162bd" - }, - "id": "nmdc:360d6a6ecfa44731fe4d69f778f11285", - "name": "bins.unbinned.fa", - "description": "unbinned fasta file from metabat2 for gold:Gp0115675", - "file_size_bytes": 6936846, - "url": "https://data.microbiomedata.org/data/1781_86090/img_MAGs/bins.unbinned.fa", - "type": "nmdc:DataObject" - }, - { - "_id": { - "$oid": "649b003f1ae706d7b5b162c0" - }, - "id": "nmdc:5aa5000cbfc131f8162b1cd1bc37698e", - "name": "gold:Gp0115675.bins.3.fa", - "description": "metabat2 binned contig file for gold:Gp0115675", - "file_size_bytes": 2739890, - "url": "https://data.microbiomedata.org/data/1781_86090/img_MAGs/metabat-bins/bins.3.fa", - "type": "nmdc:DataObject", - "data_object_type": "Metagenome Bins" - }, - { - "_id": { - "$oid": "649b003f1ae706d7b5b162c1" - }, - "id": "nmdc:aaaee9ac7ea2ec601b554f01e38e2a4c", - "name": "checkm_qa.out", - "description": "metabat2 bin checkm quality assessment result for gold:Gp0115675", - "file_size_bytes": 1176, - "url": "https://data.microbiomedata.org/data/1781_86090/img_MAGs/checkm_qa.out", - "type": "nmdc:DataObject", - "data_object_type": "CheckM Statistics" - }, - { - "_id": { - "$oid": "649b003f1ae706d7b5b162c2" - }, - "id": "nmdc:d2c69965d41ba1023d9422e40e3366cc", - "name": "gold:Gp0115675.bins.1.fa", - "description": "metabat2 binned contig file for gold:Gp0115675", - "file_size_bytes": 221316, - "url": "https://data.microbiomedata.org/data/1781_86090/img_MAGs/metabat-bins/bins.1.fa", - "type": "nmdc:DataObject", - "data_object_type": "Metagenome Bins" - }, - { - "_id": { - "$oid": "649b003f1ae706d7b5b162c3" - }, - "id": "nmdc:bec36f95050a4825f0e8eec250dec56a", - "name": "gold:Gp0115675.bins.2.fa", - "description": "metabat2 binned contig file for gold:Gp0115675", - "file_size_bytes": 464857, - "url": "https://data.microbiomedata.org/data/1781_86090/img_MAGs/metabat-bins/bins.2.fa", - "type": "nmdc:DataObject", - "data_object_type": "Metagenome Bins" - }, - { - "_id": { - "$oid": "649b003f1ae706d7b5b162c4" - }, - "id": "nmdc:83f09a72cb190961374eae70d64af121", - "name": "bins.tooShort.fa", - "description": "tooShort (< 3kb) filtered contigs fasta file by metaBat2 for gold:Gp0115675", - "file_size_bytes": 30259643, - "url": "https://data.microbiomedata.org/data/1781_86090/img_MAGs/bins.tooShort.fa", - "type": "nmdc:DataObject" - }, - { - "_id": { - "$oid": "649b00401ae706d7b5b16d66" - }, - "description": "EC TSV File for gold:Gp0115675", - "url": "https://data.microbiomedata.org/1781_86090/img_annotation/Ga0482252_ec.tsv", - "md5_checksum": "b30bdfcd025588bd80ebb3bcdad2cdc8", - "file_size_bytes": 3385, - "id": "nmdc:b30bdfcd025588bd80ebb3bcdad2cdc8", - "name": "gold:Gp0115675_EC TSV File", - "data_object_type": "Annotation Enzyme Commission", - "type": "nmdc:DataObject" - }, - { - "_id": { - "$oid": "649b00401ae706d7b5b16d69" - }, - "description": "Functional annotation GFF file for gold:Gp0115675", - "url": "https://data.microbiomedata.org/1781_86090/img_annotation/Ga0482252_functional_annotation.gff", - "md5_checksum": "e745ff0c0a95c89393f8789cd8c409e9", - "file_size_bytes": 3385, - "id": "nmdc:e745ff0c0a95c89393f8789cd8c409e9", - "name": "gold:Gp0115675_Functional annotation GFF file", - "data_object_type": "Functional Annotation GFF", - "type": "nmdc:DataObject" - }, - { - "_id": { - "$oid": "649b00401ae706d7b5b16d6a" - }, - "description": "KO TSV File for gold:Gp0115675", - "url": "https://data.microbiomedata.org/1781_86090/img_annotation/Ga0482252_ko.tsv", - "md5_checksum": "7ab72f45de20843e167ee1e595bb752d", - "file_size_bytes": 3385, - "id": "nmdc:7ab72f45de20843e167ee1e595bb752d", - "name": "gold:Gp0115675_KO TSV File", - "data_object_type": "Annotation KEGG Orthology", - "type": "nmdc:DataObject" - }, - { - "_id": { - "$oid": "649b00401ae706d7b5b16dab" - }, - "description": "Structural annotation GFF file for gold:Gp0115675", - "url": "https://data.microbiomedata.org/1781_86090/img_annotation/Ga0482252_structural_annotation.gff", - "md5_checksum": "dcb8211231f718d57e22f8dea1efc6d0", - "file_size_bytes": 3385, - "id": "nmdc:dcb8211231f718d57e22f8dea1efc6d0", - "name": "gold:Gp0115675_Structural annotation GFF file", - "data_object_type": "Structural Annotation GFF", - "type": "nmdc:DataObject" - }, - { - "_id": { - "$oid": "649b00401ae706d7b5b16dc7" - }, - "description": "Protein FAA for gold:Gp0115675", - "url": "https://data.microbiomedata.org/1781_86090/img_annotation/Ga0482252_proteins.faa", - "md5_checksum": "51f3c008db6a106ee14e160f35f7d9f3", - "file_size_bytes": 3385, - "id": "nmdc:51f3c008db6a106ee14e160f35f7d9f3", - "name": "gold:Gp0115675_Protein FAA", - "data_object_type": "Annotation Amino Acid FASTA", - "type": "nmdc:DataObject" - } - ], - "dissolving_activity_set": [], - "functional_annotation_set": [], - "genome_feature_set": [], - "mags_activity_set": [ - { - "_id": { - "$oid": "649b0052ec087f6bbab34722" - }, - "has_input": [ - "nmdc:dd5cad9348fc41cb18ac989185fed0b5", - "nmdc:80470769e7531b46c709d12c65487ffe", - "nmdc:d8cccd9c5cd237c238e5ba443c477db5" - ], - "too_short_contig_num": 76352, - "part_of": [ - "nmdc:mga0vf2h47" - ], - "binned_contig_num": 846, - "git_url": "https://github.com/microbiomedata/metaG/releases/tag/0.1", - "has_output": [ - "nmdc:d41d8cd98f00b204e9800998ecf8427e", - "nmdc:826503b4204b77c319c0bb353d69818e", - "nmdc:9a02c2954014bb8dcd62800609dd3ec5", - "nmdc:d15ed915946e095d045d73f4b4de019d", - "nmdc:8de4404b1a6601bae7d7d5fd51bd131a", - "nmdc:55f66520d821205e80dcd303cc2793bc" - ], - "was_informed_by": "gold:Gp0115675", - "input_contig_num": 80857, - "id": "nmdc:4cfac32b9cd7a8dd01d49117e1078a79", - "execution_resource": "NERSC-Cori", - "name": "MAGs Analysis Activity for nmdc:mga0vf2h47", - "mags_list": [ - { - "number_of_contig": 579, - "completeness": 73.87, - "bin_name": "bins.1", - "gene_count": 3274, - "bin_quality": "LQ", - "gtdbtk_species": "", - "gtdbtk_order": "", - "num_16s": 1, - "gtdbtk_family": "", - "gtdbtk_domain": "", - "contamination": 25.78, - "gtdbtk_class": "", - "gtdbtk_phylum": "", - "num_5s": 1, - "num_23s": 1, - "gtdbtk_genus": "", - "num_t_rna": 37 - }, - { - "number_of_contig": 199, - "completeness": 36.21, - "bin_name": "bins.2", - "gene_count": 1070, - "bin_quality": "LQ", - "gtdbtk_species": "", - "gtdbtk_order": "", - "num_16s": 0, - "gtdbtk_family": "", - "gtdbtk_domain": "", - "contamination": 0.0, - "gtdbtk_class": "", - "gtdbtk_phylum": "", - "num_5s": 0, - "num_23s": 0, - "gtdbtk_genus": "", - "num_t_rna": 16 - }, - { - "number_of_contig": 68, - "completeness": 4.17, - "bin_name": "bins.3", - "gene_count": 480, - "bin_quality": "LQ", - "gtdbtk_species": "", - "gtdbtk_order": "", - "num_16s": 0, - "gtdbtk_family": "", - "gtdbtk_domain": "", - "contamination": 4.17, - "gtdbtk_class": "", - "gtdbtk_phylum": "", - "num_5s": 0, - "num_23s": 0, - "gtdbtk_genus": "", - "num_t_rna": 5 - } - ], - "unbinned_contig_num": 3659, - "started_at_time": "2021-10-11T02:28:05Z", - "type": "nmdc:MAGsAnalysisActivity", - "ended_at_time": "2021-10-11T03:25:21+00:00" - } - ], - "material_sample_set": [], - "material_sampling_activity_set": [], - "metabolomics_analysis_activity_set": [], - "metagenome_annotation_activity_set": [ - { - "_id": { - "$oid": "649b005bbf2caae0415ef9c2" - }, - "has_input": [ - "nmdc:dd5cad9348fc41cb18ac989185fed0b5" - ], - "part_of": [ - "nmdc:mga0vf2h47" - ], - "git_url": "https://github.com/microbiomedata/metaG/releases/tag/0.1", - "has_output": [ - "nmdc:93ea50ce57263b498b781240c04dbf46", - "nmdc:71195b9bc697bf29cd865718a689eb1b", - "nmdc:d8cccd9c5cd237c238e5ba443c477db5", - "nmdc:1cb17c4c7681345f53a7f4ef5c319fba", - "nmdc:17e386be26f52833c463a89733ef2e34", - "nmdc:3e9b2fd11f2f5c16f9f25560e3b6fc55", - "nmdc:b11e36753299e36fa92670cf75165698", - "nmdc:70ac1de5fbc6cc835d5a0d1855f7a28a", - "nmdc:b9e3eb74fa7fee0fac886f8a436b9ecf", - "nmdc:faa27c2be6dc56e66f739dbffcbb6bef", - "nmdc:b080e9d168c0c1330fda64814afe335b", - "nmdc:4ea799de0bc051409b7231801eea0129" - ], - "was_informed_by": "gold:Gp0115675", - "id": "nmdc:4cfac32b9cd7a8dd01d49117e1078a79", - "execution_resource": "NERSC-Cori", - "name": "Annotation Activity for nmdc:mga0vf2h47", - "started_at_time": "2021-10-11T02:28:05Z", - "type": "nmdc:MetagenomeAnnotationActivity", - "ended_at_time": "2021-10-11T03:25:21+00:00" - } - ], - "metagenome_assembly_set": [ - { - "_id": { - "$oid": "649b005f2ca5ee4adb139faf" - }, - "has_input": [ - "nmdc:54e3a71218d04224719e0dc8a7fdf9c7" - ], - "part_of": [ - "nmdc:mga0vf2h47" - ], - "ctg_logsum": 115425, - "scaf_logsum": 116377, - "gap_pct": 0.00425, - "git_url": "https://github.com/microbiomedata/metaG/releases/tag/0.1", - "has_output": [ - "nmdc:dd5cad9348fc41cb18ac989185fed0b5", - "nmdc:6d02084941141ac9a1876c621a50aef0", - "nmdc:cc8faed3494579d793c08ede54cb5b3a", - "nmdc:8891e46c9766f2b84d45fd6e46078a64", - "nmdc:80470769e7531b46c709d12c65487ffe" - ], - "asm_score": 4.718, - "was_informed_by": "gold:Gp0115675", - "ctg_powsum": 13174, - "scaf_max": 25635, - "id": "nmdc:4cfac32b9cd7a8dd01d49117e1078a79", - "scaf_powsum": 13311, - "execution_resource": "NERSC-Cori", - "contigs": 80858, - "name": "Assembly Activity for nmdc:mga0vf2h47", - "ctg_max": 25635, - "gc_std": 0.10716, - "contig_bp": 38571486, - "gc_avg": 0.56103, - "started_at_time": "2021-10-11T02:28:05Z", - "scaf_bp": 38573126, - "type": "nmdc:MetagenomeAssembly", - "scaffolds": 80703, - "ended_at_time": "2021-10-11T03:25:21+00:00", - "ctg_l50": 435, - "ctg_l90": 284, - "ctg_n50": 19932, - "ctg_n90": 68422, - "scaf_l50": 436, - "scaf_l90": 284, - "scaf_n50": 19754, - "scaf_n90": 68272 - } - ], - "metagenome_sequencing_activity_set": [], - "metaproteomics_analysis_activity_set": [], - "metatranscriptome_activity_set": [], - "nom_analysis_activity_set": [], - "omics_processing_set": [ - { - "_id": { - "$oid": "649b009773e8249959349b50" - }, - "id": "nmdc:omprc-11-jk7zjz92", - "name": "Sand microcosm microbial communities from a hyporheic zone in Columbia River, Washington, USA- GW-RW T4_22-July-14", - "description": "Sterilized sand packs were incubated back in the ground and collected at time point T4.", - "has_input": [ - "nmdc:bsm-11-a5d23e19" - ], - "has_output": [ - "jgi:55d817f30d8785342fcf826d" - ], - "part_of": [ - "nmdc:sty-11-aygzgv51" - ], - "add_date": "2015-05-28", - "mod_date": "2021-06-15", - "ncbi_project_name": "Sand microcosm microbial communities from a hyporheic zone in Columbia River, Washington, USA- GW-RW T4_22-July-14", - "omics_type": { - "has_raw_value": "Metagenome" - }, - "principal_investigator": { - "has_raw_value": "James Stegen" - }, - "processing_institution": "JGI", - "type": "nmdc:OmicsProcessing", - "gold_sequencing_project_identifiers": [ - "gold:Gp0115675" - ] - } - ], - "reaction_activity_set": [], - "read_qc_analysis_activity_set": [ - { - "_id": { - "$oid": "649b009d6bdd4fd20273c87d" - }, - "has_input": [ - "nmdc:4a9a0183b794a98c57e5b5ce959a3f65" - ], - "part_of": [ - "nmdc:mga0vf2h47" - ], - "git_url": "https://github.com/microbiomedata/metaG/releases/tag/0.1", - "has_output": [ - "nmdc:54e3a71218d04224719e0dc8a7fdf9c7", - "nmdc:2507e3f107100ce0c72c57191d450818" - ], - "was_informed_by": "gold:Gp0115675", - "input_read_count": 18827380, - "output_read_bases": 2508839784, - "id": "nmdc:4cfac32b9cd7a8dd01d49117e1078a79", - "execution_resource": "NERSC-Cori", - "input_read_bases": 2842934380, - "name": "Read QC Activity for nmdc:mga0vf2h47", - "output_read_count": 16749572, - "started_at_time": "2021-10-11T02:28:05Z", - "type": "nmdc:ReadQCAnalysisActivity", - "ended_at_time": "2021-10-11T03:25:21+00:00" - } - ], - "read_based_taxonomy_analysis_activity_set": [ - { - "_id": { - "$oid": "649b009bff710ae353f8cf41" - }, - "has_input": [ - "nmdc:54e3a71218d04224719e0dc8a7fdf9c7" - ], - "git_url": "https://github.com/microbiomedata/metaG/releases/tag/0.1", - "has_output": [ - "nmdc:60d673988c4f4447feb5985e8501e914", - "nmdc:a8f93ed13033eb949109b4e83980a893", - "nmdc:31dd6eb616f1e9815778453ab1601195", - "nmdc:6d7a930d79f220b06cde8fbf8339e744", - "nmdc:0aaac507db0e29827e1c87df47324932", - "nmdc:6aec8677139ed24ef9cfe0c75b30056f", - "nmdc:d39369f32ada967d7cf52cb503fccf4a", - "nmdc:1ec0247d86889fcef13f39a58a92b066", - "nmdc:242a1c60f6cb14ba8430375171fda436" - ], - "was_informed_by": "gold:Gp0115675", - "id": "nmdc:4cfac32b9cd7a8dd01d49117e1078a79", - "execution_resource": "NERSC-Cori", - "name": "ReadBased Analysis Activity for nmdc:mga0vf2h47", - "started_at_time": "2021-10-11T02:28:05Z", - "type": "nmdc:ReadbasedAnalysis", - "ended_at_time": "2021-10-11T03:25:21+00:00" - } - ], - "study_set": [], - "field_research_site_set": [], - "collecting_biosamples_from_site_set": [], - "date_created": null, - "etl_software_version": null, - "pooling_set": [] - }, - { - "functional_annotation_agg": [], - "library_preparation_set": [], - "processed_sample_set": [], - "extraction_set": [], - "activity_set": [], - "biosample_set": [], - "data_object_set": [ - { - "description": "Raw sequencer read data", - "file_size_bytes": 4637325661, - "type": "nmdc:DataObject", - "id": "jgi:55f23d820d8785306f964980", - "name": "9491.1.134352.AGTTCC.fastq.gz" - }, - { - "name": "Gp0115665_Filtered Reads", - "description": "Filtered Reads for Gp0115665", - "data_object_type": "Filtered Sequencing Reads", - "url": "https://data.microbiomedata.org/data/nmdc:mga06n7k74/qa/nmdc_mga06n7k74_filtered.fastq.gz", - "md5_checksum": "b0462e18cf9dafc9d2207a58bf085530", - "id": "nmdc:b0462e18cf9dafc9d2207a58bf085530", - "file_size_bytes": 4096192298 - }, - { - "name": "Gp0115665_Filtered Stats", - "description": "Filtered Stats for Gp0115665", - "data_object_type": "QC Statistics", - "url": "https://data.microbiomedata.org/data/nmdc:mga06n7k74/qa/nmdc_mga06n7k74_filterStats.txt", - "md5_checksum": "f0e1b9004b0e9aafb06c444444a522c7", - "id": "nmdc:f0e1b9004b0e9aafb06c444444a522c7", - "file_size_bytes": 291 - }, - { - "name": "Gp0115665_Gottcha2 TSV report", - "description": "Gottcha2 TSV report for Gp0115665", - "url": "https://data.microbiomedata.org/data/nmdc:mga06n7k74/ReadbasedAnalysis/nmdc_mga06n7k74_gottcha2_report.tsv", - "md5_checksum": "432fedddcbacb4e69c0350354ab44080", - "id": "nmdc:432fedddcbacb4e69c0350354ab44080", - "file_size_bytes": 18015 - }, - { - "name": "Gp0115665_Gottcha2 full TSV report", - "description": "Gottcha2 full TSV report for Gp0115665", - "url": "https://data.microbiomedata.org/data/nmdc:mga06n7k74/ReadbasedAnalysis/nmdc_mga06n7k74_gottcha2_report_full.tsv", - "md5_checksum": "50b9a4c83b2ec0d1dd683cb8814ed5ad", - "id": "nmdc:50b9a4c83b2ec0d1dd683cb8814ed5ad", - "file_size_bytes": 1283220 - }, - { - "name": "Gp0115665_Gottcha2 Krona HTML report", - "description": "Gottcha2 Krona HTML report for Gp0115665", - "data_object_type": "GOTTCHA2 Krona Plot", - "url": "https://data.microbiomedata.org/data/nmdc:mga06n7k74/ReadbasedAnalysis/nmdc_mga06n7k74_gottcha2_krona.html", - "md5_checksum": "e3d7339ba5c7677be13854f391462474", - "id": "nmdc:e3d7339ba5c7677be13854f391462474", - "file_size_bytes": 281366 - }, - { - "name": "Gp0115665_Centrifuge classification TSV report", - "description": "Centrifuge classification TSV report for Gp0115665", - "data_object_type": "Centrifuge Taxonomic Classification", - "url": "https://data.microbiomedata.org/data/nmdc:mga06n7k74/ReadbasedAnalysis/nmdc_mga06n7k74_centrifuge_classification.tsv", - "md5_checksum": "7bf922ee2f9fc298c031e2ff7d5abe0d", - "id": "nmdc:7bf922ee2f9fc298c031e2ff7d5abe0d", - "file_size_bytes": 3481369185 - }, - { - "name": "Gp0115665_Centrifuge TSV report", - "description": "Centrifuge TSV report for Gp0115665", - "data_object_type": "Centrifuge Classification Report", - "url": "https://data.microbiomedata.org/data/nmdc:mga06n7k74/ReadbasedAnalysis/nmdc_mga06n7k74_centrifuge_report.tsv", - "md5_checksum": "33a20a77c3dc5b4feb102d66dfbfbe11", - "id": "nmdc:33a20a77c3dc5b4feb102d66dfbfbe11", - "file_size_bytes": 263480 - }, - { - "name": "Gp0115665_Centrifuge Krona HTML report", - "description": "Centrifuge Krona HTML report for Gp0115665", - "data_object_type": "Centrifuge Krona Plot", - "url": "https://data.microbiomedata.org/data/nmdc:mga06n7k74/ReadbasedAnalysis/nmdc_mga06n7k74_centrifuge_krona.html", - "md5_checksum": "30bdf0aedf771221ca3f7f18ff4e0067", - "id": "nmdc:30bdf0aedf771221ca3f7f18ff4e0067", - "file_size_bytes": 2347079 - }, - { - "name": "Gp0115665_Kraken classification TSV report", - "description": "Kraken classification TSV report for Gp0115665", - "data_object_type": "Kraken2 Taxonomic Classification", - "url": "https://data.microbiomedata.org/data/nmdc:mga06n7k74/ReadbasedAnalysis/nmdc_mga06n7k74_kraken2_classification.tsv", - "md5_checksum": "8e21ac30de17de0d1051d7d223d0aa0f", - "id": "nmdc:8e21ac30de17de0d1051d7d223d0aa0f", - "file_size_bytes": 2866138771 - }, - { - "name": "Gp0115665_Kraken2 TSV report", - "description": "Kraken2 TSV report for Gp0115665", - "data_object_type": "Kraken2 Classification Report", - "url": "https://data.microbiomedata.org/data/nmdc:mga06n7k74/ReadbasedAnalysis/nmdc_mga06n7k74_kraken2_report.tsv", - "md5_checksum": "64459bec7843953a70f8ea2b09a7e9de", - "id": "nmdc:64459bec7843953a70f8ea2b09a7e9de", - "file_size_bytes": 728030 - }, - { - "name": "Gp0115665_Kraken2 Krona HTML report", - "description": "Kraken2 Krona HTML report for Gp0115665", - "data_object_type": "Kraken2 Krona Plot", - "url": "https://data.microbiomedata.org/data/nmdc:mga06n7k74/ReadbasedAnalysis/nmdc_mga06n7k74_kraken2_krona.html", - "md5_checksum": "9aa0ec113eb8dd22e7f574216d1760b2", - "id": "nmdc:9aa0ec113eb8dd22e7f574216d1760b2", - "file_size_bytes": 4374689 - }, - { - "name": "Gp0115665_Assembled contigs fasta", - "description": "Assembled contigs fasta for Gp0115665", - "data_object_type": "Assembly Contigs", - "url": "https://data.microbiomedata.org/data/nmdc:mga06n7k74/assembly/nmdc_mga06n7k74_contigs.fna", - "md5_checksum": "9704e757dc537a7f06c6f83fc633cf64", - "id": "nmdc:9704e757dc537a7f06c6f83fc633cf64", - "file_size_bytes": 185880663 - }, - { - "name": "Gp0115665_Assembled scaffold fasta", - "description": "Assembled scaffold fasta for Gp0115665", - "data_object_type": "Assembly Scaffolds", - "url": "https://data.microbiomedata.org/data/nmdc:mga06n7k74/assembly/nmdc_mga06n7k74_scaffolds.fna", - "md5_checksum": "2674db4e7e6171864fa47f0b3b5a9603", - "id": "nmdc:2674db4e7e6171864fa47f0b3b5a9603", - "file_size_bytes": 184819604 - }, - { - "name": "Gp0115665_Metagenome Contig Coverage Stats", - "description": "Metagenome Contig Coverage Stats for Gp0115665", - "url": "https://data.microbiomedata.org/data/nmdc:mga06n7k74/assembly/nmdc_mga06n7k74_covstats.txt", - "md5_checksum": "ab6c496a5e3ab895fee3812fd992e1e7", - "id": "nmdc:ab6c496a5e3ab895fee3812fd992e1e7", - "file_size_bytes": 27961807 - }, - { - "name": "Gp0115665_Assembled AGP file", - "description": "Assembled AGP file for Gp0115665", - "data_object_type": "Assembly AGP", - "url": "https://data.microbiomedata.org/data/nmdc:mga06n7k74/assembly/nmdc_mga06n7k74_assembly.agp", - "md5_checksum": "5a1240fa0a6bf92c95e852c0352e5839", - "id": "nmdc:5a1240fa0a6bf92c95e852c0352e5839", - "file_size_bytes": 26248242 - }, - { - "name": "Gp0115665_Metagenome Alignment BAM file", - "description": "Metagenome Alignment BAM file for Gp0115665", - "data_object_type": "Assembly Coverage BAM", - "url": "https://data.microbiomedata.org/data/nmdc:mga06n7k74/assembly/nmdc_mga06n7k74_pairedMapped_sorted.bam", - "md5_checksum": "e28c85b50e0b654626e655755165aff5", - "id": "nmdc:e28c85b50e0b654626e655755165aff5", - "file_size_bytes": 4460978045 - }, - { - "name": "Gp0115665_Protein FAA", - "description": "Protein FAA for Gp0115665", - "data_object_type": "Annotation Amino Acid FASTA", - "url": "https://data.microbiomedata.org/data/nmdc:mga06n7k74/annotation/nmdc_mga06n7k74_proteins.faa", - "md5_checksum": "2d23b05bda1c60f2ef6d54c8fe5fb5e7", - "id": "nmdc:2d23b05bda1c60f2ef6d54c8fe5fb5e7", - "file_size_bytes": 100719814 - }, - { - "name": "Gp0115665_Structural annotation GFF file", - "description": "Structural annotation GFF file for Gp0115665", - "data_object_type": "Structural Annotation GFF", - "url": "https://data.microbiomedata.org/data/nmdc:mga06n7k74/annotation/nmdc_mga06n7k74_structural_annotation.gff", - "md5_checksum": "6c55ce2e0d6e74d217d850b273c4f0c4", - "id": "nmdc:6c55ce2e0d6e74d217d850b273c4f0c4", - "file_size_bytes": 2534 - }, - { - "name": "Gp0115665_Functional annotation GFF file", - "description": "Functional annotation GFF file for Gp0115665", - "data_object_type": "Functional Annotation GFF", - "url": "https://data.microbiomedata.org/data/nmdc:mga06n7k74/annotation/nmdc_mga06n7k74_functional_annotation.gff", - "md5_checksum": "b3add25cdb76a537e70617ac6a1d1fc5", - "id": "nmdc:b3add25cdb76a537e70617ac6a1d1fc5", - "file_size_bytes": 110405026 - }, - { - "name": "Gp0115665_KO TSV file", - "description": "KO TSV file for Gp0115665", - "data_object_type": "Annotation KEGG Orthology", - "url": "https://data.microbiomedata.org/data/nmdc:mga06n7k74/annotation/nmdc_mga06n7k74_ko.tsv", - "md5_checksum": "b782707ae2cf5676596ca99800deea26", - "id": "nmdc:b782707ae2cf5676596ca99800deea26", - "file_size_bytes": 12963636 - }, - { - "name": "Gp0115665_EC TSV file", - "description": "EC TSV file for Gp0115665", - "data_object_type": "Annotation Enzyme Commission", - "url": "https://data.microbiomedata.org/data/nmdc:mga06n7k74/annotation/nmdc_mga06n7k74_ec.tsv", - "md5_checksum": "6a8565bf52f70efa03c755a9f0b82d7d", - "id": "nmdc:6a8565bf52f70efa03c755a9f0b82d7d", - "file_size_bytes": 8371381 - }, - { - "name": "Gp0115665_COG GFF file", - "description": "COG GFF file for Gp0115665", - "url": "https://data.microbiomedata.org/data/nmdc:mga06n7k74/annotation/nmdc_mga06n7k74_cog.gff", - "md5_checksum": "f5d79b4c69825e0b66153e7582cb489b", - "id": "nmdc:f5d79b4c69825e0b66153e7582cb489b", - "file_size_bytes": 56948501 - }, - { - "name": "Gp0115665_PFAM GFF file", - "description": "PFAM GFF file for Gp0115665", - "url": "https://data.microbiomedata.org/data/nmdc:mga06n7k74/annotation/nmdc_mga06n7k74_pfam.gff", - "md5_checksum": "f66a0eaa9432ef5a2dd390214f47eed5", - "id": "nmdc:f66a0eaa9432ef5a2dd390214f47eed5", - "file_size_bytes": 45618277 - }, - { - "name": "Gp0115665_TigrFam GFF file", - "description": "TigrFam GFF file for Gp0115665", - "url": "https://data.microbiomedata.org/data/nmdc:mga06n7k74/annotation/nmdc_mga06n7k74_tigrfam.gff", - "md5_checksum": "26cc0a40aab6bfc64d24afa760b43102", - "id": "nmdc:26cc0a40aab6bfc64d24afa760b43102", - "file_size_bytes": 5245489 - }, - { - "name": "Gp0115665_SMART GFF file", - "description": "SMART GFF file for Gp0115665", - "url": "https://data.microbiomedata.org/data/nmdc:mga06n7k74/annotation/nmdc_mga06n7k74_smart.gff", - "md5_checksum": "83785a6e8f7658dc2354b9bad1b86d01", - "id": "nmdc:83785a6e8f7658dc2354b9bad1b86d01", - "file_size_bytes": 15993417 - }, - { - "name": "Gp0115665_SuperFam GFF file", - "description": "SuperFam GFF file for Gp0115665", - "url": "https://data.microbiomedata.org/data/nmdc:mga06n7k74/annotation/nmdc_mga06n7k74_supfam.gff", - "md5_checksum": "0f03207aa38d1aec8afdbf2bec1e4990", - "id": "nmdc:0f03207aa38d1aec8afdbf2bec1e4990", - "file_size_bytes": 76926960 - }, - { - "name": "Gp0115665_Cath FunFam GFF file", - "description": "Cath FunFam GFF file for Gp0115665", - "url": "https://data.microbiomedata.org/data/nmdc:mga06n7k74/annotation/nmdc_mga06n7k74_cath_funfam.gff", - "md5_checksum": "4876eed2bee3b3b7b2ac827857410be6", - "id": "nmdc:4876eed2bee3b3b7b2ac827857410be6", - "file_size_bytes": 61571084 - }, - { - "name": "Gp0115665_KO_EC GFF file", - "description": "KO_EC GFF file for Gp0115665", - "url": "https://data.microbiomedata.org/data/nmdc:mga06n7k74/annotation/nmdc_mga06n7k74_ko_ec.gff", - "md5_checksum": "bb5b62735a896d189c9a274c6e091bab", - "id": "nmdc:bb5b62735a896d189c9a274c6e091bab", - "file_size_bytes": 41244685 - }, - { - "name": "gold:Gp0452679_metabat2 bin checkm quality assessment result", - "description": "metabat2 bin checkm quality assessment result for gold:Gp0452679", - "data_object_type": "CheckM Statistics", - "url": "https://data.microbiomedata.org/data/nmdc:mga0fsjy84/MAGs/nmdc_mga0fsjy84_checkm_qa.out", - "md5_checksum": "d41d8cd98f00b204e9800998ecf8427e", - "id": "nmdc:d41d8cd98f00b204e9800998ecf8427e", - "file_size_bytes": 0 - }, - { - "name": "Gp0115665_tooShort (< 3kb) filtered contigs fasta file by metaBat2", - "description": "tooShort (< 3kb) filtered contigs fasta file by metaBat2 for Gp0115665", - "url": "https://data.microbiomedata.org/data/nmdc:mga06n7k74/MAGs/nmdc_mga06n7k74_bins.tooShort.fa", - "md5_checksum": "79794b0497c1a4a292778ddb94504f7a", - "id": "nmdc:79794b0497c1a4a292778ddb94504f7a", - "file_size_bytes": 146322768 - }, - { - "name": "Gp0115665_unbinned fasta file from metabat2", - "description": "unbinned fasta file from metabat2 for Gp0115665", - "url": "https://data.microbiomedata.org/data/nmdc:mga06n7k74/MAGs/nmdc_mga06n7k74_bins.unbinned.fa", - "md5_checksum": "e26dc245e491a521a94fbb9ab1b4293d", - "id": "nmdc:e26dc245e491a521a94fbb9ab1b4293d", - "file_size_bytes": 30116585 - }, - { - "name": "Gp0115665_metabat2 bin checkm quality assessment result", - "description": "metabat2 bin checkm quality assessment result for Gp0115665", - "data_object_type": "CheckM Statistics", - "url": "https://data.microbiomedata.org/data/nmdc:mga06n7k74/MAGs/nmdc_mga06n7k74_checkm_qa.out", - "md5_checksum": "45cb473694eb3cfa8abc7768e87ef303", - "id": "nmdc:45cb473694eb3cfa8abc7768e87ef303", - "file_size_bytes": 1700 - }, - { - "name": "Gp0115665_high-quality and medium-quality bins", - "description": "high-quality and medium-quality bins for Gp0115665", - "data_object_type": "Metagenome Bins", - "url": "https://data.microbiomedata.org/data/nmdc:mga06n7k74/MAGs/nmdc_mga06n7k74_hqmq_bin.zip", - "md5_checksum": "e344d87dbac42a645fd3c7d5b9d0a1a5", - "id": "nmdc:e344d87dbac42a645fd3c7d5b9d0a1a5", - "file_size_bytes": 2294379 - }, - { - "name": "Gp0115665_metabat2 bins", - "description": "metabat2 bins for Gp0115665", - "url": "https://data.microbiomedata.org/data/nmdc:mga06n7k74/MAGs/nmdc_mga06n7k74_metabat_bin.zip", - "md5_checksum": "1098bd9921c6ab8f52aca786e3b7bf1d", - "id": "nmdc:1098bd9921c6ab8f52aca786e3b7bf1d", - "file_size_bytes": 534425 - }, - { - "_id": { - "$oid": "649b003c1ae706d7b5b14d68" - }, - "description": "Metagenome Contig Coverage Stats for gold:Gp0115665", - "url": "https://data.microbiomedata.org/data/1781_86094/assembly/mapping_stats.txt", - "file_size_bytes": 26201542, - "type": "nmdc:DataObject", - "id": "nmdc:0b1c4ab81deba76f53eb5b266566cc4e", - "name": "mapping_stats.txt", - "data_object_type": "Assembly Coverage Stats" - }, - { - "_id": { - "$oid": "649b003c1ae706d7b5b14d69" - }, - "description": "Assembled scaffold fasta for gold:Gp0115665", - "url": "https://data.microbiomedata.org/data/1781_86094/assembly/assembly_scaffolds.fna", - "file_size_bytes": 183060964, - "type": "nmdc:DataObject", - "id": "nmdc:4a666a393dc8497e61a35c6842a369be", - "name": "assembly_scaffolds.fna", - "data_object_type": "Assembly Scaffolds" - }, - { - "_id": { - "$oid": "649b003c1ae706d7b5b14d6a" - }, - "description": "Metagenome Alignment BAM file for gold:Gp0115665", - "url": "https://data.microbiomedata.org/data/1781_86094/assembly/pairedMapped_sorted.bam", - "file_size_bytes": 4399182435, - "type": "nmdc:DataObject", - "id": "nmdc:292511a07ffb1791b7546b4db9843a07", - "name": "pairedMapped_sorted.bam", - "data_object_type": "Assembly Coverage BAM" - }, - { - "_id": { - "$oid": "649b003c1ae706d7b5b14d6b" - }, - "description": "Assembled AGP file for gold:Gp0115665", - "url": "https://data.microbiomedata.org/data/1781_86094/assembly/assembly.agp", - "file_size_bytes": 22724422, - "type": "nmdc:DataObject", - "id": "nmdc:a54a1ab841896539024d2748e1b66131", - "name": "assembly.agp", - "data_object_type": "Assembly AGP" - }, - { - "_id": { - "$oid": "649b003c1ae706d7b5b14d70" - }, - "description": "Assembled contigs fasta for gold:Gp0115665", - "url": "https://data.microbiomedata.org/data/1781_86094/assembly/assembly_contigs.fna", - "file_size_bytes": 184120398, - "type": "nmdc:DataObject", - "id": "nmdc:2235febcd5329a40beb86d8d8411e0c1", - "name": "assembly_contigs.fna", - "data_object_type": "Assembly Contigs" - }, - { - "_id": { - "$oid": "649b003d1ae706d7b5b1598e" - }, - "id": "nmdc:79966acbb43ba7d6f0044b54b7c44a6b", - "name": "1781_86094.krona.html", - "description": "Gold:Gp0115665 KRONA plot HTML file", - "url": "https://data.microbiomedata.org/1781_86094/ReadbasedAnalysis/centrifuge/1781_86094.krona.html", - "file_size_bytes": 3385, - "data_object_type": "Centrifuge Krona Plot", - "type": "nmdc:DataObject" - }, - { - "_id": { - "$oid": "649b003d1ae706d7b5b159a4" - }, - "id": "nmdc:7a86a0c36f9ef12596ff3796cd9277d9", - "name": "1781_86094.json", - "description": "Gold:Gp0115665 ReadbasedAnalysis result JSON file", - "url": "https://data.microbiomedata.org/1781_86094/ReadbasedAnalysis/1781_86094.json", - "file_size_bytes": 3385, - "type": "nmdc:DataObject" - }, - { - "_id": { - "$oid": "649b003f1ae706d7b5b1625e" - }, - "id": "nmdc:f9db08f4e245f63eede42dedcdbb4def", - "name": "checkm_qa.out", - "description": "metabat2 bin checkm quality assessment result for gold:Gp0115665", - "file_size_bytes": 2145, - "url": "https://data.microbiomedata.org/data/1781_86094/img_MAGs/checkm_qa.out", - "type": "nmdc:DataObject", - "data_object_type": "CheckM Statistics" - }, - { - "_id": { - "$oid": "649b003f1ae706d7b5b16260" - }, - "id": "nmdc:97218a09f8e0949fea768a5c616191b0", - "name": "bins.unbinned.fa", - "description": "unbinned fasta file from metabat2 for gold:Gp0115665", - "file_size_bytes": 33177668, - "url": "https://data.microbiomedata.org/data/1781_86094/img_MAGs/bins.unbinned.fa", - "type": "nmdc:DataObject" - }, - { - "_id": { - "$oid": "649b003f1ae706d7b5b16262" - }, - "id": "nmdc:7ecd6e3a6a8cb6fe9365b57becf82216", - "name": "gold:Gp0115665.bins.5.fa", - "description": "metabat2 binned contig file for gold:Gp0115665", - "file_size_bytes": 1121208, - "url": "https://data.microbiomedata.org/data/1781_86094/img_MAGs/metabat-bins/bins.5.fa", - "type": "nmdc:DataObject", - "data_object_type": "Metagenome Bins" - }, - { - "_id": { - "$oid": "649b003f1ae706d7b5b16263" - }, - "id": "nmdc:7596d700873fd3e46d0f78284fe0c0f5", - "name": "gold:Gp0115665.bins.9.fa", - "description": "metabat2 binned contig file for gold:Gp0115665", - "file_size_bytes": 298154, - "url": "https://data.microbiomedata.org/data/1781_86094/img_MAGs/metabat-bins/bins.9.fa", - "type": "nmdc:DataObject", - "data_object_type": "Metagenome Bins" - }, - { - "_id": { - "$oid": "649b003f1ae706d7b5b16264" - }, - "id": "nmdc:b44f6446e22ea134f1f306f0412f0f5f", - "name": "gold:Gp0115665.bins.3.fa", - "description": "metabat2 binned contig file for gold:Gp0115665", - "file_size_bytes": 591265, - "url": "https://data.microbiomedata.org/data/1781_86094/img_MAGs/metabat-bins/bins.3.fa", - "type": "nmdc:DataObject", - "data_object_type": "Metagenome Bins" - }, - { - "_id": { - "$oid": "649b003f1ae706d7b5b16265" - }, - "id": "nmdc:fcb189e3ab45040fe52458c90dd29f6c", - "name": "bins.tooShort.fa", - "description": "tooShort (< 3kb) filtered contigs fasta file by metaBat2 for gold:Gp0115665", - "file_size_bytes": 142092787, - "url": "https://data.microbiomedata.org/data/1781_86094/img_MAGs/bins.tooShort.fa", - "type": "nmdc:DataObject" - }, - { - "_id": { - "$oid": "649b003f1ae706d7b5b16266" - }, - "id": "nmdc:ee01c153a5253dfe9e8de1bbbf9480fe", - "name": "gold:Gp0115665.bins.4.fa", - "description": "metabat2 binned contig file for gold:Gp0115665", - "file_size_bytes": 1178929, - "url": "https://data.microbiomedata.org/data/1781_86094/img_MAGs/metabat-bins/bins.4.fa", - "type": "nmdc:DataObject", - "data_object_type": "Metagenome Bins" - }, - { - "_id": { - "$oid": "649b003f1ae706d7b5b16268" - }, - "id": "nmdc:e08dcba67c8844436094e8c77b0f16f9", - "name": "gold:Gp0115665.bins.6.fa", - "description": "metabat2 binned contig file for gold:Gp0115665", - "file_size_bytes": 964743, - "url": "https://data.microbiomedata.org/data/1781_86094/img_MAGs/metabat-bins/bins.6.fa", - "type": "nmdc:DataObject", - "data_object_type": "Metagenome Bins" - }, - { - "_id": { - "$oid": "649b003f1ae706d7b5b16269" - }, - "id": "nmdc:54d4d93e1b2c00805f59308f47c950dd", - "name": "gold:Gp0115665.bins.8.fa", - "description": "metabat2 binned contig file for gold:Gp0115665", - "file_size_bytes": 377968, - "url": "https://data.microbiomedata.org/data/1781_86094/img_MAGs/metabat-bins/bins.8.fa", - "type": "nmdc:DataObject", - "data_object_type": "Metagenome Bins" - }, - { - "_id": { - "$oid": "649b003f1ae706d7b5b1626a" - }, - "id": "nmdc:b7d7c5484e7902cd4fd2e5b946467326", - "name": "gold:Gp0115665.bins.2.fa", - "description": "metabat2 binned contig file for gold:Gp0115665", - "file_size_bytes": 221952, - "url": "https://data.microbiomedata.org/data/1781_86094/img_MAGs/metabat-bins/bins.2.fa", - "type": "nmdc:DataObject", - "data_object_type": "Metagenome Bins" - }, - { - "_id": { - "$oid": "649b003f1ae706d7b5b1626c" - }, - "id": "nmdc:ec4e307b82b0fc7da2a123df702d2202", - "name": "gold:Gp0115665.bins.1.fa", - "description": "metabat2 binned contig file for gold:Gp0115665", - "file_size_bytes": 1185129, - "url": "https://data.microbiomedata.org/data/1781_86094/img_MAGs/metabat-bins/bins.1.fa", - "type": "nmdc:DataObject", - "data_object_type": "Metagenome Bins" - }, - { - "_id": { - "$oid": "649b003f1ae706d7b5b1626f" - }, - "id": "nmdc:43913ff26d7b2fd7315e4468948fccac", - "name": "gold:Gp0115665.bins.7.fa", - "description": "metabat2 binned contig file for gold:Gp0115665", - "file_size_bytes": 205826, - "url": "https://data.microbiomedata.org/data/1781_86094/img_MAGs/metabat-bins/bins.7.fa", - "type": "nmdc:DataObject", - "data_object_type": "Metagenome Bins" - }, - { - "_id": { - "$oid": "649b00401ae706d7b5b16d72" - }, - "description": "Protein FAA for gold:Gp0115665", - "url": "https://data.microbiomedata.org/1781_86094/img_annotation/Ga0482262_proteins.faa", - "md5_checksum": "1b5b79d300bb60afffec76da4cda7f14", - "file_size_bytes": 3385, - "id": "nmdc:1b5b79d300bb60afffec76da4cda7f14", - "name": "gold:Gp0115665_Protein FAA", - "data_object_type": "Annotation Amino Acid FASTA", - "type": "nmdc:DataObject" - }, - { - "_id": { - "$oid": "649b00401ae706d7b5b16d74" - }, - "description": "KO TSV File for gold:Gp0115665", - "url": "https://data.microbiomedata.org/1781_86094/img_annotation/Ga0482262_ko.tsv", - "md5_checksum": "e28746f79f2d58d71fd5f42dff8b6dd5", - "file_size_bytes": 3385, - "id": "nmdc:e28746f79f2d58d71fd5f42dff8b6dd5", - "name": "gold:Gp0115665_KO TSV File", - "data_object_type": "Annotation KEGG Orthology", - "type": "nmdc:DataObject" - }, - { - "_id": { - "$oid": "649b00401ae706d7b5b16d79" - }, - "description": "Functional annotation GFF file for gold:Gp0115665", - "url": "https://data.microbiomedata.org/1781_86094/img_annotation/Ga0482262_functional_annotation.gff", - "md5_checksum": "dceabe03f9758a72038b9824794337e1", - "file_size_bytes": 3385, - "id": "nmdc:dceabe03f9758a72038b9824794337e1", - "name": "gold:Gp0115665_Functional annotation GFF file", - "data_object_type": "Functional Annotation GFF", - "type": "nmdc:DataObject" - }, - { - "_id": { - "$oid": "649b00401ae706d7b5b16d7a" - }, - "description": "Structural annotation GFF file for gold:Gp0115665", - "url": "https://data.microbiomedata.org/1781_86094/img_annotation/Ga0482262_structural_annotation.gff", - "md5_checksum": "431860b46c896880c1d8d779fb2645ec", - "file_size_bytes": 3385, - "id": "nmdc:431860b46c896880c1d8d779fb2645ec", - "name": "gold:Gp0115665_Structural annotation GFF file", - "data_object_type": "Structural Annotation GFF", - "type": "nmdc:DataObject" - }, - { - "_id": { - "$oid": "649b00401ae706d7b5b16dc6" - }, - "description": "EC TSV File for gold:Gp0115665", - "url": "https://data.microbiomedata.org/1781_86094/img_annotation/Ga0482262_ec.tsv", - "md5_checksum": "b4a623a8d9418c04567b5712889fcdfd", - "file_size_bytes": 3385, - "id": "nmdc:b4a623a8d9418c04567b5712889fcdfd", - "name": "gold:Gp0115665_EC TSV File", - "data_object_type": "Annotation Enzyme Commission", - "type": "nmdc:DataObject" - } - ], - "dissolving_activity_set": [], - "functional_annotation_set": [], - "genome_feature_set": [], - "mags_activity_set": [ - { - "_id": { - "$oid": "649b0052ec087f6bbab34738" - }, - "has_input": [ - "nmdc:9704e757dc537a7f06c6f83fc633cf64", - "nmdc:e28c85b50e0b654626e655755165aff5", - "nmdc:b3add25cdb76a537e70617ac6a1d1fc5" - ], - "too_short_contig_num": 331533, - "part_of": [ - "nmdc:mga06n7k74" - ], - "binned_contig_num": 1636, - "git_url": "https://github.com/microbiomedata/metaG/releases/tag/0.1", - "has_output": [ - "nmdc:d41d8cd98f00b204e9800998ecf8427e", - "nmdc:79794b0497c1a4a292778ddb94504f7a", - "nmdc:e26dc245e491a521a94fbb9ab1b4293d", - "nmdc:45cb473694eb3cfa8abc7768e87ef303", - "nmdc:e344d87dbac42a645fd3c7d5b9d0a1a5", - "nmdc:1098bd9921c6ab8f52aca786e3b7bf1d" - ], - "was_informed_by": "gold:Gp0115665", - "input_contig_num": 352053, - "id": "nmdc:ad03f306fbd2bb1f6302eedfc1cde9b2", - "execution_resource": "NERSC-Cori", - "name": "MAGs Analysis Activity for nmdc:mga06n7k74", - "mags_list": [ - { - "number_of_contig": 211, - "completeness": 44.36, - "bin_name": "bins.1", - "gene_count": 1029, - "bin_quality": "LQ", - "gtdbtk_species": "", - "gtdbtk_order": "", - "num_16s": 0, - "gtdbtk_family": "", - "gtdbtk_domain": "", - "contamination": 0.49, - "gtdbtk_class": "", - "gtdbtk_phylum": "", - "num_5s": 0, - "num_23s": 0, - "gtdbtk_genus": "", - "num_t_rna": 21 - }, - { - "number_of_contig": 564, - "completeness": 79.11, - "bin_name": "bins.2", - "gene_count": 4164, - "bin_quality": "MQ", - "gtdbtk_species": "", - "gtdbtk_order": "Burkholderiales", - "num_16s": 0, - "gtdbtk_family": "Burkholderiaceae", - "gtdbtk_domain": "Bacteria", - "contamination": 3.11, - "gtdbtk_class": "Gammaproteobacteria", - "gtdbtk_phylum": "Proteobacteria", - "num_5s": 0, - "num_23s": 0, - "gtdbtk_genus": "Aquabacterium", - "num_t_rna": 33 - }, - { - "number_of_contig": 646, - "completeness": 72.48, - "bin_name": "bins.3", - "gene_count": 4108, - "bin_quality": "MQ", - "gtdbtk_species": "", - "gtdbtk_order": "Burkholderiales", - "num_16s": 1, - "gtdbtk_family": "Burkholderiaceae", - "gtdbtk_domain": "Bacteria", - "contamination": 2.62, - "gtdbtk_class": "Gammaproteobacteria", - "gtdbtk_phylum": "Proteobacteria", - "num_5s": 0, - "num_23s": 0, - "gtdbtk_genus": "Rhizobacter", - "num_t_rna": 28 - }, - { - "number_of_contig": 67, - "completeness": 1.97, - "bin_name": "bins.4", - "gene_count": 257, - "bin_quality": "LQ", - "gtdbtk_species": "", - "gtdbtk_order": "", - "num_16s": 0, - "gtdbtk_family": "", - "gtdbtk_domain": "", - "contamination": 0.0, - "gtdbtk_class": "", - "gtdbtk_phylum": "", - "num_5s": 0, - "num_23s": 0, - "gtdbtk_genus": "", - "num_t_rna": 3 - }, - { - "number_of_contig": 64, - "completeness": 7.47, - "bin_name": "bins.5", - "gene_count": 259, - "bin_quality": "LQ", - "gtdbtk_species": "", - "gtdbtk_order": "", - "num_16s": 0, - "gtdbtk_family": "", - "gtdbtk_domain": "", - "contamination": 0.0, - "gtdbtk_class": "", - "gtdbtk_phylum": "", - "num_5s": 0, - "num_23s": 0, - "gtdbtk_genus": "", - "num_t_rna": 4 - }, - { - "number_of_contig": 84, - "completeness": 3.88, - "bin_name": "bins.6", - "gene_count": 313, - "bin_quality": "LQ", - "gtdbtk_species": "", - "gtdbtk_order": "", - "num_16s": 0, - "gtdbtk_family": "", - "gtdbtk_domain": "", - "contamination": 0.0, - "gtdbtk_class": "", - "gtdbtk_phylum": "", - "num_5s": 0, - "num_23s": 0, - "gtdbtk_genus": "", - "num_t_rna": 0 - } - ], - "unbinned_contig_num": 18884, - "started_at_time": "2021-10-11T02:28:54Z", - "type": "nmdc:MAGsAnalysisActivity", - "ended_at_time": "2021-10-11T06:19:29+00:00" - } - ], - "material_sample_set": [], - "material_sampling_activity_set": [], - "metabolomics_analysis_activity_set": [], - "metagenome_annotation_activity_set": [ - { - "_id": { - "$oid": "649b005bbf2caae0415ef9d2" - }, - "has_input": [ - "nmdc:9704e757dc537a7f06c6f83fc633cf64" - ], - "part_of": [ - "nmdc:mga06n7k74" - ], - "git_url": "https://github.com/microbiomedata/metaG/releases/tag/0.1", - "has_output": [ - "nmdc:2d23b05bda1c60f2ef6d54c8fe5fb5e7", - "nmdc:6c55ce2e0d6e74d217d850b273c4f0c4", - "nmdc:b3add25cdb76a537e70617ac6a1d1fc5", - "nmdc:b782707ae2cf5676596ca99800deea26", - "nmdc:6a8565bf52f70efa03c755a9f0b82d7d", - "nmdc:f5d79b4c69825e0b66153e7582cb489b", - "nmdc:f66a0eaa9432ef5a2dd390214f47eed5", - "nmdc:26cc0a40aab6bfc64d24afa760b43102", - "nmdc:83785a6e8f7658dc2354b9bad1b86d01", - "nmdc:0f03207aa38d1aec8afdbf2bec1e4990", - "nmdc:4876eed2bee3b3b7b2ac827857410be6", - "nmdc:bb5b62735a896d189c9a274c6e091bab" - ], - "was_informed_by": "gold:Gp0115665", - "id": "nmdc:ad03f306fbd2bb1f6302eedfc1cde9b2", - "execution_resource": "NERSC-Cori", - "name": "Annotation Activity for nmdc:mga06n7k74", - "started_at_time": "2021-10-11T02:28:54Z", - "type": "nmdc:MetagenomeAnnotationActivity", - "ended_at_time": "2021-10-11T06:19:29+00:00" - } - ], - "metagenome_assembly_set": [ - { - "_id": { - "$oid": "649b005f2ca5ee4adb139fc2" - }, - "has_input": [ - "nmdc:b0462e18cf9dafc9d2207a58bf085530" - ], - "part_of": [ - "nmdc:mga06n7k74" - ], - "ctg_logsum": 427633, - "scaf_logsum": 429769, - "gap_pct": 0.00206, - "git_url": "https://github.com/microbiomedata/metaG/releases/tag/0.1", - "has_output": [ - "nmdc:9704e757dc537a7f06c6f83fc633cf64", - "nmdc:2674db4e7e6171864fa47f0b3b5a9603", - "nmdc:ab6c496a5e3ab895fee3812fd992e1e7", - "nmdc:5a1240fa0a6bf92c95e852c0352e5839", - "nmdc:e28c85b50e0b654626e655755165aff5" - ], - "asm_score": 5.768, - "was_informed_by": "gold:Gp0115665", - "ctg_powsum": 48025, - "scaf_max": 44931, - "id": "nmdc:ad03f306fbd2bb1f6302eedfc1cde9b2", - "scaf_powsum": 48321, - "execution_resource": "NERSC-Cori", - "contigs": 352055, - "name": "Assembly Activity for nmdc:mga06n7k74", - "ctg_max": 44931, - "gc_std": 0.13027, - "contig_bp": 172051088, - "gc_avg": 0.51918, - "started_at_time": "2021-10-11T02:28:54Z", - "scaf_bp": 172054628, - "type": "nmdc:MetagenomeAssembly", - "scaffolds": 351728, - "ended_at_time": "2021-10-11T06:19:29+00:00", - "ctg_l50": 468, - "ctg_l90": 289, - "ctg_n50": 95561, - "ctg_n90": 294969, - "scaf_l50": 468, - "scaf_l90": 289, - "scaf_n50": 95446, - "scaf_n90": 294658 - } - ], - "metagenome_sequencing_activity_set": [], - "metaproteomics_analysis_activity_set": [], - "metatranscriptome_activity_set": [], - "nom_analysis_activity_set": [], - "omics_processing_set": [ - { - "_id": { - "$oid": "649b009773e8249959349b51" - }, - "id": "nmdc:omprc-11-2jt0jk84", - "name": "Sand microcosm microbial communities from a hyporheic zone in Columbia River, Washington, USA - GW-RW T2_30-Apr-14", - "description": "Sterilized sand packs were incubated back in the ground and collected at time point T2.", - "has_input": [ - "nmdc:bsm-11-qjtgh002" - ], - "has_output": [ - "jgi:55f23d820d8785306f964980" - ], - "part_of": [ - "nmdc:sty-11-aygzgv51" - ], - "add_date": "2015-05-28", - "mod_date": "2021-06-15", - "ncbi_project_name": "Sand microcosm microbial communities from a hyporheic zone in Columbia River, Washington, USA - GW-RW T2_30-Apr-14", - "omics_type": { - "has_raw_value": "Metagenome" - }, - "principal_investigator": { - "has_raw_value": "James Stegen" - }, - "processing_institution": "JGI", - "type": "nmdc:OmicsProcessing", - "gold_sequencing_project_identifiers": [ - "gold:Gp0115665" - ] - } - ], - "reaction_activity_set": [], - "read_qc_analysis_activity_set": [ - { - "_id": { - "$oid": "649b009d6bdd4fd20273c88e" - }, - "has_input": [ - "nmdc:0d39aafcd16496457fbb3be0f785b67f" - ], - "part_of": [ - "nmdc:mga06n7k74" - ], - "git_url": "https://github.com/microbiomedata/metaG/releases/tag/0.1", - "has_output": [ - "nmdc:b0462e18cf9dafc9d2207a58bf085530", - "nmdc:f0e1b9004b0e9aafb06c444444a522c7" - ], - "was_informed_by": "gold:Gp0115665", - "input_read_count": 50719572, - "output_read_bases": 7175148255, - "id": "nmdc:ad03f306fbd2bb1f6302eedfc1cde9b2", - "execution_resource": "NERSC-Cori", - "input_read_bases": 7658655372, - "name": "Read QC Activity for nmdc:mga06n7k74", - "output_read_count": 47896142, - "started_at_time": "2021-10-11T02:28:54Z", - "type": "nmdc:ReadQCAnalysisActivity", - "ended_at_time": "2021-10-11T06:19:29+00:00" - } - ], - "read_based_taxonomy_analysis_activity_set": [ - { - "_id": { - "$oid": "649b009bff710ae353f8cf57" - }, - "has_input": [ - "nmdc:b0462e18cf9dafc9d2207a58bf085530" - ], - "git_url": "https://github.com/microbiomedata/metaG/releases/tag/0.1", - "has_output": [ - "nmdc:432fedddcbacb4e69c0350354ab44080", - "nmdc:50b9a4c83b2ec0d1dd683cb8814ed5ad", - "nmdc:e3d7339ba5c7677be13854f391462474", - "nmdc:7bf922ee2f9fc298c031e2ff7d5abe0d", - "nmdc:33a20a77c3dc5b4feb102d66dfbfbe11", - "nmdc:30bdf0aedf771221ca3f7f18ff4e0067", - "nmdc:8e21ac30de17de0d1051d7d223d0aa0f", - "nmdc:64459bec7843953a70f8ea2b09a7e9de", - "nmdc:9aa0ec113eb8dd22e7f574216d1760b2" - ], - "was_informed_by": "gold:Gp0115665", - "id": "nmdc:ad03f306fbd2bb1f6302eedfc1cde9b2", - "execution_resource": "NERSC-Cori", - "name": "ReadBased Analysis Activity for nmdc:mga06n7k74", - "started_at_time": "2021-10-11T02:28:54Z", - "type": "nmdc:ReadbasedAnalysis", - "ended_at_time": "2021-10-11T06:19:29+00:00" - } - ], - "study_set": [], - "field_research_site_set": [], - "collecting_biosamples_from_site_set": [], - "date_created": null, - "etl_software_version": null, - "pooling_set": [] - }, - { - "functional_annotation_agg": [], - "library_preparation_set": [], - "processed_sample_set": [], - "extraction_set": [], - "activity_set": [], - "biosample_set": [], - "data_object_set": [ - { - "description": "Raw sequencer read data", - "file_size_bytes": 1988838112, - "type": "nmdc:DataObject", - "id": "jgi:55d817fe0d8785342fcf8276", - "name": "9387.2.132031.GTAGAG.fastq.gz" - }, - { - "name": "Gp0115669_Filtered Reads", - "description": "Filtered Reads for Gp0115669", - "data_object_type": "Filtered Sequencing Reads", - "url": "https://data.microbiomedata.org/data/nmdc:mga0k85x37/qa/nmdc_mga0k85x37_filtered.fastq.gz", - "md5_checksum": "6eef104db92b99c9741b26c667d75cd9", - "id": "nmdc:6eef104db92b99c9741b26c667d75cd9", - "file_size_bytes": 1806935637 - }, - { - "name": "Gp0115669_Filtered Stats", - "description": "Filtered Stats for Gp0115669", - "data_object_type": "QC Statistics", - "url": "https://data.microbiomedata.org/data/nmdc:mga0k85x37/qa/nmdc_mga0k85x37_filterStats.txt", - "md5_checksum": "58fde3e96dbb28af9133bede850a2653", - "id": "nmdc:58fde3e96dbb28af9133bede850a2653", - "file_size_bytes": 286 - }, - { - "name": "Gp0115669_Gottcha2 TSV report", - "description": "Gottcha2 TSV report for Gp0115669", - "url": "https://data.microbiomedata.org/data/nmdc:mga0k85x37/ReadbasedAnalysis/nmdc_mga0k85x37_gottcha2_report.tsv", - "md5_checksum": "05933784d02331b60b2531e2025cd3b7", - "id": "nmdc:05933784d02331b60b2531e2025cd3b7", - "file_size_bytes": 11362 - }, - { - "name": "Gp0115669_Gottcha2 full TSV report", - "description": "Gottcha2 full TSV report for Gp0115669", - "url": "https://data.microbiomedata.org/data/nmdc:mga0k85x37/ReadbasedAnalysis/nmdc_mga0k85x37_gottcha2_report_full.tsv", - "md5_checksum": "50fc279637cb7048aaaeec9b223d0286", - "id": "nmdc:50fc279637cb7048aaaeec9b223d0286", - "file_size_bytes": 909325 - }, - { - "name": "Gp0115669_Gottcha2 Krona HTML report", - "description": "Gottcha2 Krona HTML report for Gp0115669", - "data_object_type": "GOTTCHA2 Krona Plot", - "url": "https://data.microbiomedata.org/data/nmdc:mga0k85x37/ReadbasedAnalysis/nmdc_mga0k85x37_gottcha2_krona.html", - "md5_checksum": "c3add9c5d34e3ca719096ba3ba9b1c08", - "id": "nmdc:c3add9c5d34e3ca719096ba3ba9b1c08", - "file_size_bytes": 261412 - }, - { - "name": "Gp0115669_Centrifuge classification TSV report", - "description": "Centrifuge classification TSV report for Gp0115669", - "data_object_type": "Centrifuge Taxonomic Classification", - "url": "https://data.microbiomedata.org/data/nmdc:mga0k85x37/ReadbasedAnalysis/nmdc_mga0k85x37_centrifuge_classification.tsv", - "md5_checksum": "2777a04ec7e23aff356bb4f2733e55b7", - "id": "nmdc:2777a04ec7e23aff356bb4f2733e55b7", - "file_size_bytes": 1481087410 - }, - { - "name": "Gp0115669_Centrifuge TSV report", - "description": "Centrifuge TSV report for Gp0115669", - "data_object_type": "Centrifuge Classification Report", - "url": "https://data.microbiomedata.org/data/nmdc:mga0k85x37/ReadbasedAnalysis/nmdc_mga0k85x37_centrifuge_report.tsv", - "md5_checksum": "de45d70cc01749e9b5691dc24674545d", - "id": "nmdc:de45d70cc01749e9b5691dc24674545d", - "file_size_bytes": 256139 - }, - { - "name": "Gp0115669_Centrifuge Krona HTML report", - "description": "Centrifuge Krona HTML report for Gp0115669", - "data_object_type": "Centrifuge Krona Plot", - "url": "https://data.microbiomedata.org/data/nmdc:mga0k85x37/ReadbasedAnalysis/nmdc_mga0k85x37_centrifuge_krona.html", - "md5_checksum": "534f97f3792b74385c4da305196a1b1d", - "id": "nmdc:534f97f3792b74385c4da305196a1b1d", - "file_size_bytes": 2323658 - }, - { - "name": "Gp0115669_Kraken classification TSV report", - "description": "Kraken classification TSV report for Gp0115669", - "data_object_type": "Kraken2 Taxonomic Classification", - "url": "https://data.microbiomedata.org/data/nmdc:mga0k85x37/ReadbasedAnalysis/nmdc_mga0k85x37_kraken2_classification.tsv", - "md5_checksum": "fc3e489df923ec344ac0cce7316f49d6", - "id": "nmdc:fc3e489df923ec344ac0cce7316f49d6", - "file_size_bytes": 1220980345 - }, - { - "name": "Gp0115669_Kraken2 TSV report", - "description": "Kraken2 TSV report for Gp0115669", - "data_object_type": "Kraken2 Classification Report", - "url": "https://data.microbiomedata.org/data/nmdc:mga0k85x37/ReadbasedAnalysis/nmdc_mga0k85x37_kraken2_report.tsv", - "md5_checksum": "07b6457a094fab96563168ed287dc59f", - "id": "nmdc:07b6457a094fab96563168ed287dc59f", - "file_size_bytes": 651795 - }, - { - "name": "Gp0115669_Kraken2 Krona HTML report", - "description": "Kraken2 Krona HTML report for Gp0115669", - "data_object_type": "Kraken2 Krona Plot", - "url": "https://data.microbiomedata.org/data/nmdc:mga0k85x37/ReadbasedAnalysis/nmdc_mga0k85x37_kraken2_krona.html", - "md5_checksum": "164a1bc50e8d6509446ae2877be8231c", - "id": "nmdc:164a1bc50e8d6509446ae2877be8231c", - "file_size_bytes": 3963303 - }, - { - "name": "Gp0115669_Assembled contigs fasta", - "description": "Assembled contigs fasta for Gp0115669", - "data_object_type": "Assembly Contigs", - "url": "https://data.microbiomedata.org/data/nmdc:mga0k85x37/assembly/nmdc_mga0k85x37_contigs.fna", - "md5_checksum": "03eb095e55df50d639fab237d06c14ac", - "id": "nmdc:03eb095e55df50d639fab237d06c14ac", - "file_size_bytes": 58951440 - }, - { - "name": "Gp0115669_Assembled scaffold fasta", - "description": "Assembled scaffold fasta for Gp0115669", - "data_object_type": "Assembly Scaffolds", - "url": "https://data.microbiomedata.org/data/nmdc:mga0k85x37/assembly/nmdc_mga0k85x37_scaffolds.fna", - "md5_checksum": "569cb5da239e82dce1b40bfa7e2fd518", - "id": "nmdc:569cb5da239e82dce1b40bfa7e2fd518", - "file_size_bytes": 58607757 - }, - { - "name": "Gp0115669_Metagenome Contig Coverage Stats", - "description": "Metagenome Contig Coverage Stats for Gp0115669", - "url": "https://data.microbiomedata.org/data/nmdc:mga0k85x37/assembly/nmdc_mga0k85x37_covstats.txt", - "md5_checksum": "b77ef3014c80797cc88509adf02be002", - "id": "nmdc:b77ef3014c80797cc88509adf02be002", - "file_size_bytes": 8978635 - }, - { - "name": "Gp0115669_Assembled AGP file", - "description": "Assembled AGP file for Gp0115669", - "data_object_type": "Assembly AGP", - "url": "https://data.microbiomedata.org/data/nmdc:mga0k85x37/assembly/nmdc_mga0k85x37_assembly.agp", - "md5_checksum": "62d08517e0ba0f991f2d8bbd66061d78", - "id": "nmdc:62d08517e0ba0f991f2d8bbd66061d78", - "file_size_bytes": 8358006 - }, - { - "name": "Gp0115669_Metagenome Alignment BAM file", - "description": "Metagenome Alignment BAM file for Gp0115669", - "data_object_type": "Assembly Coverage BAM", - "url": "https://data.microbiomedata.org/data/nmdc:mga0k85x37/assembly/nmdc_mga0k85x37_pairedMapped_sorted.bam", - "md5_checksum": "568b82cb6038fec5df04c30cbd874098", - "id": "nmdc:568b82cb6038fec5df04c30cbd874098", - "file_size_bytes": 1940308720 - }, - { - "name": "Gp0115669_Protein FAA", - "description": "Protein FAA for Gp0115669", - "data_object_type": "Annotation Amino Acid FASTA", - "url": "https://data.microbiomedata.org/data/nmdc:mga0k85x37/annotation/nmdc_mga0k85x37_proteins.faa", - "md5_checksum": "8a5f288604c61556ff3e827725864fd1", - "id": "nmdc:8a5f288604c61556ff3e827725864fd1", - "file_size_bytes": 32524652 - }, - { - "name": "Gp0115669_Structural annotation GFF file", - "description": "Structural annotation GFF file for Gp0115669", - "data_object_type": "Structural Annotation GFF", - "url": "https://data.microbiomedata.org/data/nmdc:mga0k85x37/annotation/nmdc_mga0k85x37_structural_annotation.gff", - "md5_checksum": "0180998d6f3a3021638f04d9c0b35019", - "id": "nmdc:0180998d6f3a3021638f04d9c0b35019", - "file_size_bytes": 2514 - }, - { - "name": "Gp0115669_Functional annotation GFF file", - "description": "Functional annotation GFF file for Gp0115669", - "data_object_type": "Functional Annotation GFF", - "url": "https://data.microbiomedata.org/data/nmdc:mga0k85x37/annotation/nmdc_mga0k85x37_functional_annotation.gff", - "md5_checksum": "950b8c4ebd1da50e2ca079273540f3af", - "id": "nmdc:950b8c4ebd1da50e2ca079273540f3af", - "file_size_bytes": 36685287 - }, - { - "name": "Gp0115669_KO TSV file", - "description": "KO TSV file for Gp0115669", - "data_object_type": "Annotation KEGG Orthology", - "url": "https://data.microbiomedata.org/data/nmdc:mga0k85x37/annotation/nmdc_mga0k85x37_ko.tsv", - "md5_checksum": "96ec49c6124cf4f8f3e7da3525348477", - "id": "nmdc:96ec49c6124cf4f8f3e7da3525348477", - "file_size_bytes": 4815732 - }, - { - "name": "Gp0115669_EC TSV file", - "description": "EC TSV file for Gp0115669", - "data_object_type": "Annotation Enzyme Commission", - "url": "https://data.microbiomedata.org/data/nmdc:mga0k85x37/annotation/nmdc_mga0k85x37_ec.tsv", - "md5_checksum": "12ca374a58bf899e42ed2c191a239e71", - "id": "nmdc:12ca374a58bf899e42ed2c191a239e71", - "file_size_bytes": 3090911 - }, - { - "name": "Gp0115669_COG GFF file", - "description": "COG GFF file for Gp0115669", - "url": "https://data.microbiomedata.org/data/nmdc:mga0k85x37/annotation/nmdc_mga0k85x37_cog.gff", - "md5_checksum": "b8ae2993aa29c8e04c00580dfdb82650", - "id": "nmdc:b8ae2993aa29c8e04c00580dfdb82650", - "file_size_bytes": 20357759 - }, - { - "name": "Gp0115669_PFAM GFF file", - "description": "PFAM GFF file for Gp0115669", - "url": "https://data.microbiomedata.org/data/nmdc:mga0k85x37/annotation/nmdc_mga0k85x37_pfam.gff", - "md5_checksum": "7901c83b5a41e54854c96ab0b081ebd6", - "id": "nmdc:7901c83b5a41e54854c96ab0b081ebd6", - "file_size_bytes": 15876941 - }, - { - "name": "Gp0115669_TigrFam GFF file", - "description": "TigrFam GFF file for Gp0115669", - "url": "https://data.microbiomedata.org/data/nmdc:mga0k85x37/annotation/nmdc_mga0k85x37_tigrfam.gff", - "md5_checksum": "762fe35b733dd82f89f5dce44fa54ed1", - "id": "nmdc:762fe35b733dd82f89f5dce44fa54ed1", - "file_size_bytes": 2104873 - }, - { - "name": "Gp0115669_SMART GFF file", - "description": "SMART GFF file for Gp0115669", - "url": "https://data.microbiomedata.org/data/nmdc:mga0k85x37/annotation/nmdc_mga0k85x37_smart.gff", - "md5_checksum": "661b70d6f41a44fcc1913b101f79d86a", - "id": "nmdc:661b70d6f41a44fcc1913b101f79d86a", - "file_size_bytes": 4523437 - }, - { - "name": "Gp0115669_SuperFam GFF file", - "description": "SuperFam GFF file for Gp0115669", - "url": "https://data.microbiomedata.org/data/nmdc:mga0k85x37/annotation/nmdc_mga0k85x37_supfam.gff", - "md5_checksum": "e1843a865023d75edd3139c14b8c355e", - "id": "nmdc:e1843a865023d75edd3139c14b8c355e", - "file_size_bytes": 25872277 - }, - { - "name": "Gp0115669_Cath FunFam GFF file", - "description": "Cath FunFam GFF file for Gp0115669", - "url": "https://data.microbiomedata.org/data/nmdc:mga0k85x37/annotation/nmdc_mga0k85x37_cath_funfam.gff", - "md5_checksum": "a21449989b0b0884901602528b3f423e", - "id": "nmdc:a21449989b0b0884901602528b3f423e", - "file_size_bytes": 20254021 - }, - { - "name": "Gp0115669_KO_EC GFF file", - "description": "KO_EC GFF file for Gp0115669", - "url": "https://data.microbiomedata.org/data/nmdc:mga0k85x37/annotation/nmdc_mga0k85x37_ko_ec.gff", - "md5_checksum": "7f52547663f4eeea33de1e437012981e", - "id": "nmdc:7f52547663f4eeea33de1e437012981e", - "file_size_bytes": 15397038 - }, - { - "name": "gold:Gp0452679_metabat2 bin checkm quality assessment result", - "description": "metabat2 bin checkm quality assessment result for gold:Gp0452679", - "data_object_type": "CheckM Statistics", - "url": "https://data.microbiomedata.org/data/nmdc:mga0fsjy84/MAGs/nmdc_mga0fsjy84_checkm_qa.out", - "md5_checksum": "d41d8cd98f00b204e9800998ecf8427e", - "id": "nmdc:d41d8cd98f00b204e9800998ecf8427e", - "file_size_bytes": 0 - }, - { - "name": "Gp0115669_tooShort (< 3kb) filtered contigs fasta file by metaBat2", - "description": "tooShort (< 3kb) filtered contigs fasta file by metaBat2 for Gp0115669", - "url": "https://data.microbiomedata.org/data/nmdc:mga0k85x37/MAGs/nmdc_mga0k85x37_bins.tooShort.fa", - "md5_checksum": "420b015f88d0b88ab582805f39ed2b47", - "id": "nmdc:420b015f88d0b88ab582805f39ed2b47", - "file_size_bytes": 44979790 - }, - { - "name": "Gp0115669_unbinned fasta file from metabat2", - "description": "unbinned fasta file from metabat2 for Gp0115669", - "url": "https://data.microbiomedata.org/data/nmdc:mga0k85x37/MAGs/nmdc_mga0k85x37_bins.unbinned.fa", - "md5_checksum": "ee8a556be3a57008c1c05ff9fe83437e", - "id": "nmdc:ee8a556be3a57008c1c05ff9fe83437e", - "file_size_bytes": 10530111 - }, - { - "name": "Gp0115669_metabat2 bin checkm quality assessment result", - "description": "metabat2 bin checkm quality assessment result for Gp0115669", - "data_object_type": "CheckM Statistics", - "url": "https://data.microbiomedata.org/data/nmdc:mga0k85x37/MAGs/nmdc_mga0k85x37_checkm_qa.out", - "md5_checksum": "6fd5dfbd1500a60620194b5b9a4aab8a", - "id": "nmdc:6fd5dfbd1500a60620194b5b9a4aab8a", - "file_size_bytes": 1190 - }, - { - "name": "Gp0115669_high-quality and medium-quality bins", - "description": "high-quality and medium-quality bins for Gp0115669", - "data_object_type": "Metagenome Bins", - "url": "https://data.microbiomedata.org/data/nmdc:mga0k85x37/MAGs/nmdc_mga0k85x37_hqmq_bin.zip", - "md5_checksum": "6a7eb248822ec0994ddeffe8b5aae7b1", - "id": "nmdc:6a7eb248822ec0994ddeffe8b5aae7b1", - "file_size_bytes": 681479 - }, - { - "name": "Gp0115669_metabat2 bins", - "description": "metabat2 bins for Gp0115669", - "url": "https://data.microbiomedata.org/data/nmdc:mga0k85x37/MAGs/nmdc_mga0k85x37_metabat_bin.zip", - "md5_checksum": "6a80769f6812a45615890cc2b03e9abf", - "id": "nmdc:6a80769f6812a45615890cc2b03e9abf", - "file_size_bytes": 359752 - }, - { - "_id": { - "$oid": "649b003c1ae706d7b5b14d78" - }, - "description": "Assembled contigs fasta for gold:Gp0115669", - "url": "https://data.microbiomedata.org/data/1781_86097/assembly/assembly_contigs.fna", - "file_size_bytes": 58380875, - "type": "nmdc:DataObject", - "id": "nmdc:17cff5e222ad522c357863eb39418117", - "name": "assembly_contigs.fna", - "data_object_type": "Assembly Contigs" - }, - { - "_id": { - "$oid": "649b003c1ae706d7b5b14d7a" - }, - "description": "Metagenome Contig Coverage Stats for gold:Gp0115669", - "url": "https://data.microbiomedata.org/data/1781_86097/assembly/mapping_stats.txt", - "file_size_bytes": 8408070, - "type": "nmdc:DataObject", - "id": "nmdc:3f087e100be127e3b95dae0eeff2cb95", - "name": "mapping_stats.txt", - "data_object_type": "Assembly Coverage Stats" - }, - { - "_id": { - "$oid": "649b003c1ae706d7b5b14d7c" - }, - "description": "Assembled scaffold fasta for gold:Gp0115669", - "url": "https://data.microbiomedata.org/data/1781_86097/assembly/assembly_scaffolds.fna", - "file_size_bytes": 58037702, - "type": "nmdc:DataObject", - "id": "nmdc:72840aa9e6a9a5b8e1ca113008cf44b1", - "name": "assembly_scaffolds.fna", - "data_object_type": "Assembly Scaffolds" - }, - { - "_id": { - "$oid": "649b003c1ae706d7b5b14d7f" - }, - "description": "Metagenome Alignment BAM file for gold:Gp0115669", - "url": "https://data.microbiomedata.org/data/1781_86097/assembly/pairedMapped_sorted.bam", - "file_size_bytes": 1913779393, - "type": "nmdc:DataObject", - "id": "nmdc:3eea7321716d25a836521bbd70da488b", - "name": "pairedMapped_sorted.bam", - "data_object_type": "Assembly Coverage BAM" - }, - { - "_id": { - "$oid": "649b003c1ae706d7b5b14d81" - }, - "description": "Assembled AGP file for gold:Gp0115669", - "url": "https://data.microbiomedata.org/data/1781_86097/assembly/assembly.agp", - "file_size_bytes": 7215836, - "type": "nmdc:DataObject", - "id": "nmdc:bcab2f9486464ccf89e94e63626cfc5c", - "name": "assembly.agp", - "data_object_type": "Assembly AGP" - }, - { - "_id": { - "$oid": "649b003d1ae706d7b5b159af" - }, - "id": "nmdc:398b32d16246f98d91f1a6952a26feba", - "name": "1781_86097.krona.html", - "description": "Gold:Gp0115669 KRONA plot HTML file", - "url": "https://data.microbiomedata.org/1781_86097/ReadbasedAnalysis/centrifuge/1781_86097.krona.html", - "file_size_bytes": 3385, - "data_object_type": "Centrifuge Krona Plot", - "type": "nmdc:DataObject" - }, - { - "_id": { - "$oid": "649b003d1ae706d7b5b159b3" - }, - "id": "nmdc:9b7edfd57bb79efa110f07d6a03bb2f4", - "name": "1781_86097.json", - "description": "Gold:Gp0115669 ReadbasedAnalysis result JSON file", - "url": "https://data.microbiomedata.org/1781_86097/ReadbasedAnalysis/1781_86097.json", - "file_size_bytes": 3385, - "type": "nmdc:DataObject" - }, - { - "_id": { - "$oid": "649b003f1ae706d7b5b1627b" - }, - "id": "nmdc:f0f1a3e612de5a76d0e0517864378138", - "name": "bins.tooShort.fa", - "description": "tooShort (< 3kb) filtered contigs fasta file by metaBat2 for gold:Gp0115669", - "file_size_bytes": 43682660, - "url": "https://data.microbiomedata.org/data/1781_86097/img_MAGs/bins.tooShort.fa", - "type": "nmdc:DataObject" - }, - { - "_id": { - "$oid": "649b003f1ae706d7b5b1627c" - }, - "id": "nmdc:2b7abbacb5fbbb936d9421e78e9116c9", - "name": "bins.unbinned.fa", - "description": "unbinned fasta file from metabat2 for gold:Gp0115669", - "file_size_bytes": 11095337, - "url": "https://data.microbiomedata.org/data/1781_86097/img_MAGs/bins.unbinned.fa", - "type": "nmdc:DataObject" - }, - { - "_id": { - "$oid": "649b003f1ae706d7b5b1627d" - }, - "id": "nmdc:ed20a53339faee3206f7eacf9031fa26", - "name": "gold:Gp0115669.bins.2.fa", - "description": "metabat2 binned contig file for gold:Gp0115669", - "file_size_bytes": 2141471, - "url": "https://data.microbiomedata.org/data/1781_86097/img_MAGs/metabat-bins/bins.2.fa", - "type": "nmdc:DataObject", - "data_object_type": "Metagenome Bins" - }, - { - "_id": { - "$oid": "649b003f1ae706d7b5b1627e" - }, - "id": "nmdc:45d197f727234e5dd5756bc48f88bf0f", - "name": "gtdbtk.bac120.summary.tsv", - "description": "gtdbtk bacterial assignment result summary table for gold:Gp0115669", - "file_size_bytes": 4804, - "url": "https://data.microbiomedata.org/data/1781_86097/img_MAGs/gtdbtk_output/classify/gtdbtk.bac120.summary.tsv", - "type": "nmdc:DataObject" - }, - { - "_id": { - "$oid": "649b003f1ae706d7b5b1627f" - }, - "id": "nmdc:c397d43fa00df5c21b4865775bea17ba", - "name": "checkm_qa.out", - "description": "metabat2 bin checkm quality assessment result for gold:Gp0115669", - "file_size_bytes": 996, - "url": "https://data.microbiomedata.org/data/1781_86097/img_MAGs/checkm_qa.out", - "type": "nmdc:DataObject", - "data_object_type": "CheckM Statistics" - }, - { - "_id": { - "$oid": "649b003f1ae706d7b5b16282" - }, - "id": "nmdc:8602045050811243d163714135d5dce5", - "name": "gold:Gp0115669.bins.1.fa", - "description": "metabat2 binned contig file for gold:Gp0115669", - "file_size_bytes": 659826, - "url": "https://data.microbiomedata.org/data/1781_86097/img_MAGs/metabat-bins/bins.1.fa", - "type": "nmdc:DataObject", - "data_object_type": "Metagenome Bins" - }, - { - "_id": { - "$oid": "649b00401ae706d7b5b16d7f" - }, - "description": "Protein FAA for gold:Gp0115669", - "url": "https://data.microbiomedata.org/1781_86097/img_annotation/Ga0482258_proteins.faa", - "md5_checksum": "6de20d427454895dce6caeb7b9543c11", - "file_size_bytes": 3385, - "id": "nmdc:6de20d427454895dce6caeb7b9543c11", - "name": "gold:Gp0115669_Protein FAA", - "data_object_type": "Annotation Amino Acid FASTA", - "type": "nmdc:DataObject" - }, - { - "_id": { - "$oid": "649b00401ae706d7b5b16d80" - }, - "description": "EC TSV File for gold:Gp0115669", - "url": "https://data.microbiomedata.org/1781_86097/img_annotation/Ga0482258_ec.tsv", - "md5_checksum": "e74cb5e168717574193a15d5ac04a01f", - "file_size_bytes": 3385, - "id": "nmdc:e74cb5e168717574193a15d5ac04a01f", - "name": "gold:Gp0115669_EC TSV File", - "data_object_type": "Annotation Enzyme Commission", - "type": "nmdc:DataObject" - }, - { - "_id": { - "$oid": "649b00401ae706d7b5b16d82" - }, - "description": "Functional annotation GFF file for gold:Gp0115669", - "url": "https://data.microbiomedata.org/1781_86097/img_annotation/Ga0482258_functional_annotation.gff", - "md5_checksum": "4f7a6e682f6f13b7ea73511265fdd2a9", - "file_size_bytes": 3385, - "id": "nmdc:4f7a6e682f6f13b7ea73511265fdd2a9", - "name": "gold:Gp0115669_Functional annotation GFF file", - "data_object_type": "Functional Annotation GFF", - "type": "nmdc:DataObject" - }, - { - "_id": { - "$oid": "649b00401ae706d7b5b16d83" - }, - "description": "Structural annotation GFF file for gold:Gp0115669", - "url": "https://data.microbiomedata.org/1781_86097/img_annotation/Ga0482258_structural_annotation.gff", - "md5_checksum": "9c68523f458ee1f8ec395e1442b1f508", - "file_size_bytes": 3385, - "id": "nmdc:9c68523f458ee1f8ec395e1442b1f508", - "name": "gold:Gp0115669_Structural annotation GFF file", - "data_object_type": "Structural Annotation GFF", - "type": "nmdc:DataObject" - }, - { - "_id": { - "$oid": "649b00401ae706d7b5b16d8c" - }, - "description": "KO TSV File for gold:Gp0115669", - "url": "https://data.microbiomedata.org/1781_86097/img_annotation/Ga0482258_ko.tsv", - "md5_checksum": "0bc9b55e2d8f3c45b18725845815bfde", - "file_size_bytes": 3385, - "id": "nmdc:0bc9b55e2d8f3c45b18725845815bfde", - "name": "gold:Gp0115669_KO TSV File", - "data_object_type": "Annotation KEGG Orthology", - "type": "nmdc:DataObject" - } - ], - "dissolving_activity_set": [], - "functional_annotation_set": [], - "genome_feature_set": [], - "mags_activity_set": [ - { - "_id": { - "$oid": "649b0052ec087f6bbab34735" - }, - "has_input": [ - "nmdc:03eb095e55df50d639fab237d06c14ac", - "nmdc:568b82cb6038fec5df04c30cbd874098", - "nmdc:950b8c4ebd1da50e2ca079273540f3af" - ], - "too_short_contig_num": 107191, - "part_of": [ - "nmdc:mga0k85x37" - ], - "binned_contig_num": 651, - "git_url": "https://github.com/microbiomedata/metaG/releases/tag/0.1", - "has_output": [ - "nmdc:d41d8cd98f00b204e9800998ecf8427e", - "nmdc:420b015f88d0b88ab582805f39ed2b47", - "nmdc:ee8a556be3a57008c1c05ff9fe83437e", - "nmdc:6fd5dfbd1500a60620194b5b9a4aab8a", - "nmdc:6a7eb248822ec0994ddeffe8b5aae7b1", - "nmdc:6a80769f6812a45615890cc2b03e9abf" - ], - "was_informed_by": "gold:Gp0115669", - "input_contig_num": 114113, - "id": "nmdc:ed7745adccd65c2dd20dfa24c2922db2", - "execution_resource": "NERSC-Cori", - "name": "MAGs Analysis Activity for nmdc:mga0k85x37", - "mags_list": [ - { - "number_of_contig": 48, - "completeness": 13.04, - "bin_name": "bins.1", - "gene_count": 245, - "bin_quality": "LQ", - "gtdbtk_species": "", - "gtdbtk_order": "", - "num_16s": 0, - "gtdbtk_family": "", - "gtdbtk_domain": "", - "contamination": 0.0, - "gtdbtk_class": "", - "gtdbtk_phylum": "", - "num_5s": 0, - "num_23s": 0, - "gtdbtk_genus": "", - "num_t_rna": 5 - }, - { - "number_of_contig": 379, - "completeness": 72.42, - "bin_name": "bins.2", - "gene_count": 2513, - "bin_quality": "MQ", - "gtdbtk_species": "", - "gtdbtk_order": "Sphingomonadales", - "num_16s": 0, - "gtdbtk_family": "Sphingomonadaceae", - "gtdbtk_domain": "Bacteria", - "contamination": 1.85, - "gtdbtk_class": "Alphaproteobacteria", - "gtdbtk_phylum": "Proteobacteria", - "num_5s": 0, - "num_23s": 0, - "gtdbtk_genus": "Novosphingobium", - "num_t_rna": 32 - }, - { - "number_of_contig": 224, - "completeness": 29.36, - "bin_name": "bins.3", - "gene_count": 1148, - "bin_quality": "LQ", - "gtdbtk_species": "", - "gtdbtk_order": "", - "num_16s": 0, - "gtdbtk_family": "", - "gtdbtk_domain": "", - "contamination": 0.43, - "gtdbtk_class": "", - "gtdbtk_phylum": "", - "num_5s": 0, - "num_23s": 0, - "gtdbtk_genus": "", - "num_t_rna": 13 - } - ], - "unbinned_contig_num": 6271, - "started_at_time": "2021-10-11T02:28:43Z", - "type": "nmdc:MAGsAnalysisActivity", - "ended_at_time": "2021-10-11T04:20:07+00:00" - } - ], - "material_sample_set": [], - "material_sampling_activity_set": [], - "metabolomics_analysis_activity_set": [], - "metagenome_annotation_activity_set": [ - { - "_id": { - "$oid": "649b005bbf2caae0415ef9d0" - }, - "has_input": [ - "nmdc:03eb095e55df50d639fab237d06c14ac" - ], - "part_of": [ - "nmdc:mga0k85x37" - ], - "git_url": "https://github.com/microbiomedata/metaG/releases/tag/0.1", - "has_output": [ - "nmdc:8a5f288604c61556ff3e827725864fd1", - "nmdc:0180998d6f3a3021638f04d9c0b35019", - "nmdc:950b8c4ebd1da50e2ca079273540f3af", - "nmdc:96ec49c6124cf4f8f3e7da3525348477", - "nmdc:12ca374a58bf899e42ed2c191a239e71", - "nmdc:b8ae2993aa29c8e04c00580dfdb82650", - "nmdc:7901c83b5a41e54854c96ab0b081ebd6", - "nmdc:762fe35b733dd82f89f5dce44fa54ed1", - "nmdc:661b70d6f41a44fcc1913b101f79d86a", - "nmdc:e1843a865023d75edd3139c14b8c355e", - "nmdc:a21449989b0b0884901602528b3f423e", - "nmdc:7f52547663f4eeea33de1e437012981e" - ], - "was_informed_by": "gold:Gp0115669", - "id": "nmdc:ed7745adccd65c2dd20dfa24c2922db2", - "execution_resource": "NERSC-Cori", - "name": "Annotation Activity for nmdc:mga0k85x37", - "started_at_time": "2021-10-11T02:28:43Z", - "type": "nmdc:MetagenomeAnnotationActivity", - "ended_at_time": "2021-10-11T04:20:07+00:00" - } - ], - "metagenome_assembly_set": [ - { - "_id": { - "$oid": "649b005f2ca5ee4adb139fba" - }, - "has_input": [ - "nmdc:6eef104db92b99c9741b26c667d75cd9" - ], - "part_of": [ - "nmdc:mga0k85x37" - ], - "ctg_logsum": 151663, - "scaf_logsum": 152336, - "gap_pct": 0.00222, - "git_url": "https://github.com/microbiomedata/metaG/releases/tag/0.1", - "has_output": [ - "nmdc:03eb095e55df50d639fab237d06c14ac", - "nmdc:569cb5da239e82dce1b40bfa7e2fd518", - "nmdc:b77ef3014c80797cc88509adf02be002", - "nmdc:62d08517e0ba0f991f2d8bbd66061d78", - "nmdc:568b82cb6038fec5df04c30cbd874098" - ], - "asm_score": 4.733, - "was_informed_by": "gold:Gp0115669", - "ctg_powsum": 17017, - "scaf_max": 20100, - "id": "nmdc:ed7745adccd65c2dd20dfa24c2922db2", - "scaf_powsum": 17101, - "execution_resource": "NERSC-Cori", - "contigs": 114114, - "name": "Assembly Activity for nmdc:mga0k85x37", - "ctg_max": 20100, - "gc_std": 0.11871, - "contig_bp": 54567489, - "gc_avg": 0.55923, - "started_at_time": "2021-10-11T02:28:43Z", - "scaf_bp": 54568699, - "type": "nmdc:MetagenomeAssembly", - "scaffolds": 114011, - "ended_at_time": "2021-10-11T04:20:07+00:00", - "ctg_l50": 451, - "ctg_l90": 285, - "ctg_n50": 29019, - "ctg_n90": 94816, - "scaf_l50": 451, - "scaf_l90": 285, - "scaf_n50": 28976, - "scaf_n90": 94720 - } - ], - "metagenome_sequencing_activity_set": [], - "metaproteomics_analysis_activity_set": [], - "metatranscriptome_activity_set": [], - "nom_analysis_activity_set": [], - "omics_processing_set": [ - { - "_id": { - "$oid": "649b009773e8249959349b52" - }, - "id": "nmdc:omprc-11-hqmmwn16", - "name": "Sand microcosm microbial communities from a hyporheic zone in Columbia River, Washington, USA - GW-RW T4_1-July-14", - "description": "Sterilized sand packs were incubated back in the ground and collected at time point T4.", - "has_input": [ - "nmdc:bsm-11-47nxfg85" - ], - "has_output": [ - "jgi:55d817fe0d8785342fcf8276" - ], - "part_of": [ - "nmdc:sty-11-aygzgv51" - ], - "add_date": "2015-05-28", - "mod_date": "2021-06-15", - "ncbi_project_name": "Sand microcosm microbial communities from a hyporheic zone in Columbia River, Washington, USA - GW-RW T4_1-July-14", - "omics_type": { - "has_raw_value": "Metagenome" - }, - "principal_investigator": { - "has_raw_value": "James Stegen" - }, - "processing_institution": "JGI", - "type": "nmdc:OmicsProcessing", - "gold_sequencing_project_identifiers": [ - "gold:Gp0115669" - ] - } - ], - "reaction_activity_set": [], - "read_qc_analysis_activity_set": [ - { - "_id": { - "$oid": "649b009d6bdd4fd20273c88d" - }, - "has_input": [ - "nmdc:f18b96b7d225d2f64f7b29015150113f" - ], - "part_of": [ - "nmdc:mga0k85x37" - ], - "git_url": "https://github.com/microbiomedata/metaG/releases/tag/0.1", - "has_output": [ - "nmdc:6eef104db92b99c9741b26c667d75cd9", - "nmdc:58fde3e96dbb28af9133bede850a2653" - ], - "was_informed_by": "gold:Gp0115669", - "input_read_count": 20957834, - "output_read_bases": 3065138996, - "id": "nmdc:ed7745adccd65c2dd20dfa24c2922db2", - "execution_resource": "NERSC-Cori", - "input_read_bases": 3164632934, - "name": "Read QC Activity for nmdc:mga0k85x37", - "output_read_count": 20454422, - "started_at_time": "2021-10-11T02:28:43Z", - "type": "nmdc:ReadQCAnalysisActivity", - "ended_at_time": "2021-10-11T04:20:07+00:00" - } - ], - "read_based_taxonomy_analysis_activity_set": [ - { - "_id": { - "$oid": "649b009bff710ae353f8cf51" - }, - "has_input": [ - "nmdc:6eef104db92b99c9741b26c667d75cd9" - ], - "git_url": "https://github.com/microbiomedata/metaG/releases/tag/0.1", - "has_output": [ - "nmdc:05933784d02331b60b2531e2025cd3b7", - "nmdc:50fc279637cb7048aaaeec9b223d0286", - "nmdc:c3add9c5d34e3ca719096ba3ba9b1c08", - "nmdc:2777a04ec7e23aff356bb4f2733e55b7", - "nmdc:de45d70cc01749e9b5691dc24674545d", - "nmdc:534f97f3792b74385c4da305196a1b1d", - "nmdc:fc3e489df923ec344ac0cce7316f49d6", - "nmdc:07b6457a094fab96563168ed287dc59f", - "nmdc:164a1bc50e8d6509446ae2877be8231c" - ], - "was_informed_by": "gold:Gp0115669", - "id": "nmdc:ed7745adccd65c2dd20dfa24c2922db2", - "execution_resource": "NERSC-Cori", - "name": "ReadBased Analysis Activity for nmdc:mga0k85x37", - "started_at_time": "2021-10-11T02:28:43Z", - "type": "nmdc:ReadbasedAnalysis", - "ended_at_time": "2021-10-11T04:20:07+00:00" - } - ], - "study_set": [], - "field_research_site_set": [], - "collecting_biosamples_from_site_set": [], - "date_created": null, - "etl_software_version": null, - "pooling_set": [] - }, - { - "functional_annotation_agg": [], - "library_preparation_set": [], - "processed_sample_set": [], - "extraction_set": [], - "activity_set": [], - "biosample_set": [], - "data_object_set": [ - { - "description": "Raw sequencer read data", - "file_size_bytes": 3054717241, - "type": "nmdc:DataObject", - "id": "jgi:55d7402b0d8785342fcf7e3c", - "name": "9422.8.132674.GAGTGG.fastq.gz" - }, - { - "name": "Gp0115672_Filtered Reads", - "description": "Filtered Reads for Gp0115672", - "data_object_type": "Filtered Sequencing Reads", - "url": "https://data.microbiomedata.org/data/nmdc:mga0cwhj53/qa/nmdc_mga0cwhj53_filtered.fastq.gz", - "md5_checksum": "eb516fb673793f5161fb634fc19de310", - "id": "nmdc:eb516fb673793f5161fb634fc19de310", - "file_size_bytes": 2704299418 - }, - { - "name": "Gp0115672_Filtered Stats", - "description": "Filtered Stats for Gp0115672", - "data_object_type": "QC Statistics", - "url": "https://data.microbiomedata.org/data/nmdc:mga0cwhj53/qa/nmdc_mga0cwhj53_filterStats.txt", - "md5_checksum": "f4b68d1bd25f8d2fa8986aeef5fbec3f", - "id": "nmdc:f4b68d1bd25f8d2fa8986aeef5fbec3f", - "file_size_bytes": 290 - }, - { - "name": "Gp0115672_Gottcha2 TSV report", - "description": "Gottcha2 TSV report for Gp0115672", - "url": "https://data.microbiomedata.org/data/nmdc:mga0cwhj53/ReadbasedAnalysis/nmdc_mga0cwhj53_gottcha2_report.tsv", - "md5_checksum": "5a9326e2e450663a5ed8c97389136b25", - "id": "nmdc:5a9326e2e450663a5ed8c97389136b25", - "file_size_bytes": 15806 - }, - { - "name": "Gp0115672_Gottcha2 full TSV report", - "description": "Gottcha2 full TSV report for Gp0115672", - "url": "https://data.microbiomedata.org/data/nmdc:mga0cwhj53/ReadbasedAnalysis/nmdc_mga0cwhj53_gottcha2_report_full.tsv", - "md5_checksum": "6044f2e33e0dd3e951484e9c50ae10f4", - "id": "nmdc:6044f2e33e0dd3e951484e9c50ae10f4", - "file_size_bytes": 1142479 - }, - { - "name": "Gp0115672_Gottcha2 Krona HTML report", - "description": "Gottcha2 Krona HTML report for Gp0115672", - "data_object_type": "GOTTCHA2 Krona Plot", - "url": "https://data.microbiomedata.org/data/nmdc:mga0cwhj53/ReadbasedAnalysis/nmdc_mga0cwhj53_gottcha2_krona.html", - "md5_checksum": "39a46887587926c9b81e126bb1036005", - "id": "nmdc:39a46887587926c9b81e126bb1036005", - "file_size_bytes": 273611 - }, - { - "name": "Gp0115672_Centrifuge classification TSV report", - "description": "Centrifuge classification TSV report for Gp0115672", - "data_object_type": "Centrifuge Taxonomic Classification", - "url": "https://data.microbiomedata.org/data/nmdc:mga0cwhj53/ReadbasedAnalysis/nmdc_mga0cwhj53_centrifuge_classification.tsv", - "md5_checksum": "b8dde2c047141d9097317c86f723eded", - "id": "nmdc:b8dde2c047141d9097317c86f723eded", - "file_size_bytes": 2436637487 - }, - { - "name": "Gp0115672_Centrifuge TSV report", - "description": "Centrifuge TSV report for Gp0115672", - "data_object_type": "Centrifuge Classification Report", - "url": "https://data.microbiomedata.org/data/nmdc:mga0cwhj53/ReadbasedAnalysis/nmdc_mga0cwhj53_centrifuge_report.tsv", - "md5_checksum": "d530342b37f0785f92650e9650f31d6a", - "id": "nmdc:d530342b37f0785f92650e9650f31d6a", - "file_size_bytes": 261520 - }, - { - "name": "Gp0115672_Centrifuge Krona HTML report", - "description": "Centrifuge Krona HTML report for Gp0115672", - "data_object_type": "Centrifuge Krona Plot", - "url": "https://data.microbiomedata.org/data/nmdc:mga0cwhj53/ReadbasedAnalysis/nmdc_mga0cwhj53_centrifuge_krona.html", - "md5_checksum": "6672aa851b5d39d7381211232b4f6cb2", - "id": "nmdc:6672aa851b5d39d7381211232b4f6cb2", - "file_size_bytes": 2342832 - }, - { - "name": "Gp0115672_Kraken classification TSV report", - "description": "Kraken classification TSV report for Gp0115672", - "data_object_type": "Kraken2 Taxonomic Classification", - "url": "https://data.microbiomedata.org/data/nmdc:mga0cwhj53/ReadbasedAnalysis/nmdc_mga0cwhj53_kraken2_classification.tsv", - "md5_checksum": "61e3c875231ae8999b5aa1dbf7d55cca", - "id": "nmdc:61e3c875231ae8999b5aa1dbf7d55cca", - "file_size_bytes": 1993150715 - }, - { - "name": "Gp0115672_Kraken2 TSV report", - "description": "Kraken2 TSV report for Gp0115672", - "data_object_type": "Kraken2 Classification Report", - "url": "https://data.microbiomedata.org/data/nmdc:mga0cwhj53/ReadbasedAnalysis/nmdc_mga0cwhj53_kraken2_report.tsv", - "md5_checksum": "3049835ed4e3533acce49e9cc60b03fc", - "id": "nmdc:3049835ed4e3533acce49e9cc60b03fc", - "file_size_bytes": 693572 - }, - { - "name": "Gp0115672_Kraken2 Krona HTML report", - "description": "Kraken2 Krona HTML report for Gp0115672", - "data_object_type": "Kraken2 Krona Plot", - "url": "https://data.microbiomedata.org/data/nmdc:mga0cwhj53/ReadbasedAnalysis/nmdc_mga0cwhj53_kraken2_krona.html", - "md5_checksum": "3266e79813577aae1d4377c62e73332c", - "id": "nmdc:3266e79813577aae1d4377c62e73332c", - "file_size_bytes": 4177114 - }, - { - "name": "Gp0115672_Assembled contigs fasta", - "description": "Assembled contigs fasta for Gp0115672", - "data_object_type": "Assembly Contigs", - "url": "https://data.microbiomedata.org/data/nmdc:mga0cwhj53/assembly/nmdc_mga0cwhj53_contigs.fna", - "md5_checksum": "6f762f7b079f8c2633ef674a8264879f", - "id": "nmdc:6f762f7b079f8c2633ef674a8264879f", - "file_size_bytes": 129321165 - }, - { - "name": "Gp0115672_Assembled scaffold fasta", - "description": "Assembled scaffold fasta for Gp0115672", - "data_object_type": "Assembly Scaffolds", - "url": "https://data.microbiomedata.org/data/nmdc:mga0cwhj53/assembly/nmdc_mga0cwhj53_scaffolds.fna", - "md5_checksum": "26cc1c91f5f5e79d50041ff4623398b5", - "id": "nmdc:26cc1c91f5f5e79d50041ff4623398b5", - "file_size_bytes": 128655263 - }, - { - "name": "Gp0115672_Metagenome Contig Coverage Stats", - "description": "Metagenome Contig Coverage Stats for Gp0115672", - "url": "https://data.microbiomedata.org/data/nmdc:mga0cwhj53/assembly/nmdc_mga0cwhj53_covstats.txt", - "md5_checksum": "bd9d5497c4e2e0ea61df1f3f239107f7", - "id": "nmdc:bd9d5497c4e2e0ea61df1f3f239107f7", - "file_size_bytes": 17496249 - }, - { - "name": "Gp0115672_Assembled AGP file", - "description": "Assembled AGP file for Gp0115672", - "data_object_type": "Assembly AGP", - "url": "https://data.microbiomedata.org/data/nmdc:mga0cwhj53/assembly/nmdc_mga0cwhj53_assembly.agp", - "md5_checksum": "362a9857666fe2f4e90bf6a818f551cc", - "id": "nmdc:362a9857666fe2f4e90bf6a818f551cc", - "file_size_bytes": 16401188 - }, - { - "name": "Gp0115672_Metagenome Alignment BAM file", - "description": "Metagenome Alignment BAM file for Gp0115672", - "data_object_type": "Assembly Coverage BAM", - "url": "https://data.microbiomedata.org/data/nmdc:mga0cwhj53/assembly/nmdc_mga0cwhj53_pairedMapped_sorted.bam", - "md5_checksum": "afd1d03b38bc5deb9c196264bcea8795", - "id": "nmdc:afd1d03b38bc5deb9c196264bcea8795", - "file_size_bytes": 2952467259 - }, - { - "name": "Gp0115672_Protein FAA", - "description": "Protein FAA for Gp0115672", - "data_object_type": "Annotation Amino Acid FASTA", - "url": "https://data.microbiomedata.org/data/nmdc:mga0cwhj53/annotation/nmdc_mga0cwhj53_proteins.faa", - "md5_checksum": "84e3590be0f59007275fdf459d464f74", - "id": "nmdc:84e3590be0f59007275fdf459d464f74", - "file_size_bytes": 71651089 - }, - { - "name": "Gp0115672_Structural annotation GFF file", - "description": "Structural annotation GFF file for Gp0115672", - "data_object_type": "Structural Annotation GFF", - "url": "https://data.microbiomedata.org/data/nmdc:mga0cwhj53/annotation/nmdc_mga0cwhj53_structural_annotation.gff", - "md5_checksum": "7dd630b842f587768235714e8a95f377", - "id": "nmdc:7dd630b842f587768235714e8a95f377", - "file_size_bytes": 2534 - }, - { - "name": "Gp0115672_Functional annotation GFF file", - "description": "Functional annotation GFF file for Gp0115672", - "data_object_type": "Functional Annotation GFF", - "url": "https://data.microbiomedata.org/data/nmdc:mga0cwhj53/annotation/nmdc_mga0cwhj53_functional_annotation.gff", - "md5_checksum": "38d776837c2208b557e2e4e5428c879d", - "id": "nmdc:38d776837c2208b557e2e4e5428c879d", - "file_size_bytes": 78213025 - }, - { - "name": "Gp0115672_KO TSV file", - "description": "KO TSV file for Gp0115672", - "data_object_type": "Annotation KEGG Orthology", - "url": "https://data.microbiomedata.org/data/nmdc:mga0cwhj53/annotation/nmdc_mga0cwhj53_ko.tsv", - "md5_checksum": "e38cb3355892042cb02580c26c083cd9", - "id": "nmdc:e38cb3355892042cb02580c26c083cd9", - "file_size_bytes": 10621211 - }, - { - "name": "Gp0115672_EC TSV file", - "description": "EC TSV file for Gp0115672", - "data_object_type": "Annotation Enzyme Commission", - "url": "https://data.microbiomedata.org/data/nmdc:mga0cwhj53/annotation/nmdc_mga0cwhj53_ec.tsv", - "md5_checksum": "d55119e8f094efa075c44b22e8b2f689", - "id": "nmdc:d55119e8f094efa075c44b22e8b2f689", - "file_size_bytes": 6814564 - }, - { - "name": "Gp0115672_COG GFF file", - "description": "COG GFF file for Gp0115672", - "url": "https://data.microbiomedata.org/data/nmdc:mga0cwhj53/annotation/nmdc_mga0cwhj53_cog.gff", - "md5_checksum": "02a9ad5732172f04d1da83d145f63226", - "id": "nmdc:02a9ad5732172f04d1da83d145f63226", - "file_size_bytes": 45617917 - }, - { - "name": "Gp0115672_PFAM GFF file", - "description": "PFAM GFF file for Gp0115672", - "url": "https://data.microbiomedata.org/data/nmdc:mga0cwhj53/annotation/nmdc_mga0cwhj53_pfam.gff", - "md5_checksum": "73811b72087e57f23db32f4a0ca4fb9c", - "id": "nmdc:73811b72087e57f23db32f4a0ca4fb9c", - "file_size_bytes": 37040943 - }, - { - "name": "Gp0115672_TigrFam GFF file", - "description": "TigrFam GFF file for Gp0115672", - "url": "https://data.microbiomedata.org/data/nmdc:mga0cwhj53/annotation/nmdc_mga0cwhj53_tigrfam.gff", - "md5_checksum": "dfc18c0f97e80c14ca6ca1bc2ba7a809", - "id": "nmdc:dfc18c0f97e80c14ca6ca1bc2ba7a809", - "file_size_bytes": 5380314 - }, - { - "name": "Gp0115672_SMART GFF file", - "description": "SMART GFF file for Gp0115672", - "url": "https://data.microbiomedata.org/data/nmdc:mga0cwhj53/annotation/nmdc_mga0cwhj53_smart.gff", - "md5_checksum": "5a843529ffac8227515c5ea399ee4815", - "id": "nmdc:5a843529ffac8227515c5ea399ee4815", - "file_size_bytes": 10141642 - }, - { - "name": "Gp0115672_SuperFam GFF file", - "description": "SuperFam GFF file for Gp0115672", - "url": "https://data.microbiomedata.org/data/nmdc:mga0cwhj53/annotation/nmdc_mga0cwhj53_supfam.gff", - "md5_checksum": "82ac29a9999c6bc097cb0f35e4177e35", - "id": "nmdc:82ac29a9999c6bc097cb0f35e4177e35", - "file_size_bytes": 56808220 - }, - { - "name": "Gp0115672_Cath FunFam GFF file", - "description": "Cath FunFam GFF file for Gp0115672", - "url": "https://data.microbiomedata.org/data/nmdc:mga0cwhj53/annotation/nmdc_mga0cwhj53_cath_funfam.gff", - "md5_checksum": "5b0e8395559ef0d8a341ae0e132e60f6", - "id": "nmdc:5b0e8395559ef0d8a341ae0e132e60f6", - "file_size_bytes": 45632833 - }, - { - "name": "Gp0115672_KO_EC GFF file", - "description": "KO_EC GFF file for Gp0115672", - "url": "https://data.microbiomedata.org/data/nmdc:mga0cwhj53/annotation/nmdc_mga0cwhj53_ko_ec.gff", - "md5_checksum": "1e74c3df751a59a34e5c0d87f4a37563", - "id": "nmdc:1e74c3df751a59a34e5c0d87f4a37563", - "file_size_bytes": 33782864 - }, - { - "name": "gold:Gp0452679_metabat2 bin checkm quality assessment result", - "description": "metabat2 bin checkm quality assessment result for gold:Gp0452679", - "data_object_type": "CheckM Statistics", - "url": "https://data.microbiomedata.org/data/nmdc:mga0fsjy84/MAGs/nmdc_mga0fsjy84_checkm_qa.out", - "md5_checksum": "d41d8cd98f00b204e9800998ecf8427e", - "id": "nmdc:d41d8cd98f00b204e9800998ecf8427e", - "file_size_bytes": 0 - }, - { - "name": "Gp0115672_tooShort (< 3kb) filtered contigs fasta file by metaBat2", - "description": "tooShort (< 3kb) filtered contigs fasta file by metaBat2 for Gp0115672", - "url": "https://data.microbiomedata.org/data/nmdc:mga0cwhj53/MAGs/nmdc_mga0cwhj53_bins.tooShort.fa", - "md5_checksum": "2b6e0195e34697039eff38b51026be24", - "id": "nmdc:2b6e0195e34697039eff38b51026be24", - "file_size_bytes": 91055942 - }, - { - "name": "Gp0115672_unbinned fasta file from metabat2", - "description": "unbinned fasta file from metabat2 for Gp0115672", - "url": "https://data.microbiomedata.org/data/nmdc:mga0cwhj53/MAGs/nmdc_mga0cwhj53_bins.unbinned.fa", - "md5_checksum": "f02d361fbef7549e2289bf4da623787d", - "id": "nmdc:f02d361fbef7549e2289bf4da623787d", - "file_size_bytes": 23202832 - }, - { - "name": "Gp0115672_metabat2 bin checkm quality assessment result", - "description": "metabat2 bin checkm quality assessment result for Gp0115672", - "data_object_type": "CheckM Statistics", - "url": "https://data.microbiomedata.org/data/nmdc:mga0cwhj53/MAGs/nmdc_mga0cwhj53_checkm_qa.out", - "md5_checksum": "2de282e5507477269238ead458f11ac0", - "id": "nmdc:2de282e5507477269238ead458f11ac0", - "file_size_bytes": 2040 - }, - { - "name": "Gp0115672_high-quality and medium-quality bins", - "description": "high-quality and medium-quality bins for Gp0115672", - "data_object_type": "Metagenome Bins", - "url": "https://data.microbiomedata.org/data/nmdc:mga0cwhj53/MAGs/nmdc_mga0cwhj53_hqmq_bin.zip", - "md5_checksum": "3abae1a573f9f0ac6da47e1ab9b9a723", - "id": "nmdc:3abae1a573f9f0ac6da47e1ab9b9a723", - "file_size_bytes": 1815861 - }, - { - "name": "Gp0115672_metabat2 bins", - "description": "metabat2 bins for Gp0115672", - "url": "https://data.microbiomedata.org/data/nmdc:mga0cwhj53/MAGs/nmdc_mga0cwhj53_metabat_bin.zip", - "md5_checksum": "4d315d8dac1d9605d110ff2298b10229", - "id": "nmdc:4d315d8dac1d9605d110ff2298b10229", - "file_size_bytes": 2757900 - }, - { - "_id": { - "$oid": "649b003c1ae706d7b5b14d8b" - }, - "description": "Metagenome Contig Coverage Stats for gold:Gp0115672", - "url": "https://data.microbiomedata.org/data/1781_86103/assembly/mapping_stats.txt", - "file_size_bytes": 16391024, - "type": "nmdc:DataObject", - "id": "nmdc:b5be8aa1d11106aabbcf86f4a31e558b", - "name": "mapping_stats.txt", - "data_object_type": "Assembly Coverage Stats" - }, - { - "_id": { - "$oid": "649b003c1ae706d7b5b14d8d" - }, - "description": "Assembled contigs fasta for gold:Gp0115672", - "url": "https://data.microbiomedata.org/data/1781_86103/assembly/assembly_contigs.fna", - "file_size_bytes": 128215940, - "type": "nmdc:DataObject", - "id": "nmdc:f74d007a0d55515291e2ab3ecd50461f", - "name": "assembly_contigs.fna", - "data_object_type": "Assembly Contigs" - }, - { - "_id": { - "$oid": "649b003c1ae706d7b5b14d8e" - }, - "description": "Assembled AGP file for gold:Gp0115672", - "url": "https://data.microbiomedata.org/data/1781_86103/assembly/assembly.agp", - "file_size_bytes": 14188798, - "type": "nmdc:DataObject", - "id": "nmdc:39b43fc42da1d32ab929d57555ff63ee", - "name": "assembly.agp", - "data_object_type": "Assembly AGP" - }, - { - "_id": { - "$oid": "649b003c1ae706d7b5b14d8f" - }, - "description": "Metagenome Alignment BAM file for gold:Gp0115672", - "url": "https://data.microbiomedata.org/data/1781_86103/assembly/pairedMapped_sorted.bam", - "file_size_bytes": 2905683228, - "type": "nmdc:DataObject", - "id": "nmdc:c01fcbe10ff6779259fbe584b123b82d", - "name": "pairedMapped_sorted.bam", - "data_object_type": "Assembly Coverage BAM" - }, - { - "_id": { - "$oid": "649b003c1ae706d7b5b14d92" - }, - "description": "Assembled scaffold fasta for gold:Gp0115672", - "url": "https://data.microbiomedata.org/data/1781_86103/assembly/assembly_scaffolds.fna", - "file_size_bytes": 127550998, - "type": "nmdc:DataObject", - "id": "nmdc:49c6eb2c5d792edf921a7226b03351bf", - "name": "assembly_scaffolds.fna", - "data_object_type": "Assembly Scaffolds" - }, - { - "_id": { - "$oid": "649b003d1ae706d7b5b159ca" - }, - "id": "nmdc:bc647f348d91e409e4125941b495ff13", - "name": "1781_86103.krona.html", - "description": "Gold:Gp0115672 KRONA plot HTML file", - "url": "https://data.microbiomedata.org/1781_86103/ReadbasedAnalysis/centrifuge/1781_86103.krona.html", - "file_size_bytes": 3385, - "data_object_type": "Centrifuge Krona Plot", - "type": "nmdc:DataObject" - }, - { - "_id": { - "$oid": "649b003d1ae706d7b5b159cf" - }, - "id": "nmdc:986355b49a83d2548afbc1792128513e", - "name": "1781_86103.json", - "description": "Gold:Gp0115672 ReadbasedAnalysis result JSON file", - "url": "https://data.microbiomedata.org/1781_86103/ReadbasedAnalysis/1781_86103.json", - "file_size_bytes": 3385, - "type": "nmdc:DataObject" - }, - { - "_id": { - "$oid": "649b003f1ae706d7b5b16297" - }, - "id": "nmdc:0c7691992c142a735412ded115a1debd", - "name": "bins.tooShort.fa", - "description": "tooShort (< 3kb) filtered contigs fasta file by metaBat2 for gold:Gp0115672", - "file_size_bytes": 88459668, - "url": "https://data.microbiomedata.org/data/1781_86103/img_MAGs/bins.tooShort.fa", - "type": "nmdc:DataObject" - }, - { - "_id": { - "$oid": "649b003f1ae706d7b5b1629c" - }, - "id": "nmdc:521024b7b73f146c2b00dba84fb2d303", - "name": "bins.unbinned.fa", - "description": "unbinned fasta file from metabat2 for gold:Gp0115672", - "file_size_bytes": 24826673, - "url": "https://data.microbiomedata.org/data/1781_86103/img_MAGs/bins.unbinned.fa", - "type": "nmdc:DataObject" - }, - { - "_id": { - "$oid": "649b003f1ae706d7b5b1629d" - }, - "id": "nmdc:d6bee3893d2f63b687bc6078ce48dc2e", - "name": "gtdbtk.bac120.summary.tsv", - "description": "gtdbtk bacterial assignment result summary table for gold:Gp0115672", - "file_size_bytes": 815, - "url": "https://data.microbiomedata.org/data/1781_86103/img_MAGs/gtdbtk_output/classify/gtdbtk.bac120.summary.tsv", - "type": "nmdc:DataObject" - }, - { - "_id": { - "$oid": "649b003f1ae706d7b5b1629e" - }, - "id": "nmdc:82cfd339c167866e4b67cc4b12d7478d", - "name": "checkm_qa.out", - "description": "metabat2 bin checkm quality assessment result for gold:Gp0115672", - "file_size_bytes": 2394, - "url": "https://data.microbiomedata.org/data/1781_86103/img_MAGs/checkm_qa.out", - "type": "nmdc:DataObject", - "data_object_type": "CheckM Statistics" - }, - { - "_id": { - "$oid": "649b003f1ae706d7b5b1629f" - }, - "id": "nmdc:a939ca808857119835c8340b2a79d302", - "name": "gold:Gp0115672.bins.1.fa", - "description": "metabat2 binned contig file for gold:Gp0115672", - "file_size_bytes": 2103943, - "url": "https://data.microbiomedata.org/data/1781_86103/img_MAGs/metabat-bins/bins.1.fa", - "type": "nmdc:DataObject", - "data_object_type": "Metagenome Bins" - }, - { - "_id": { - "$oid": "649b003f1ae706d7b5b162a2" - }, - "id": "nmdc:e3e116dfc8712b6e35a071845657d1d1", - "name": "gold:Gp0115672.bins.5.fa", - "description": "metabat2 binned contig file for gold:Gp0115672", - "file_size_bytes": 374860, - "url": "https://data.microbiomedata.org/data/1781_86103/img_MAGs/metabat-bins/bins.5.fa", - "type": "nmdc:DataObject", - "data_object_type": "Metagenome Bins" - }, - { - "_id": { - "$oid": "649b003f1ae706d7b5b162a3" - }, - "id": "nmdc:e8a46f36b4956575ad78e022e604a89a", - "name": "gold:Gp0115672.bins.3.fa", - "description": "metabat2 binned contig file for gold:Gp0115672", - "file_size_bytes": 689749, - "url": "https://data.microbiomedata.org/data/1781_86103/img_MAGs/metabat-bins/bins.3.fa", - "type": "nmdc:DataObject", - "data_object_type": "Metagenome Bins" - }, - { - "_id": { - "$oid": "649b003f1ae706d7b5b162a4" - }, - "id": "nmdc:561b7c7429a778107b65ece41a39bbb8", - "name": "gold:Gp0115672.bins.9.fa", - "description": "metabat2 binned contig file for gold:Gp0115672", - "file_size_bytes": 313467, - "url": "https://data.microbiomedata.org/data/1781_86103/img_MAGs/metabat-bins/bins.9.fa", - "type": "nmdc:DataObject", - "data_object_type": "Metagenome Bins" - }, - { - "_id": { - "$oid": "649b003f1ae706d7b5b162a5" - }, - "id": "nmdc:1daea4e61ec3220e37e2c86742d9ba90", - "name": "gold:Gp0115672.bins.4.fa", - "description": "metabat2 binned contig file for gold:Gp0115672", - "file_size_bytes": 1599533, - "url": "https://data.microbiomedata.org/data/1781_86103/img_MAGs/metabat-bins/bins.4.fa", - "type": "nmdc:DataObject", - "data_object_type": "Metagenome Bins" - }, - { - "_id": { - "$oid": "649b003f1ae706d7b5b162a6" - }, - "id": "nmdc:45fa952f6821c80c16e77c526d6506c0", - "name": "gold:Gp0115672.bins.2.fa", - "description": "metabat2 binned contig file for gold:Gp0115672", - "file_size_bytes": 471200, - "url": "https://data.microbiomedata.org/data/1781_86103/img_MAGs/metabat-bins/bins.2.fa", - "type": "nmdc:DataObject", - "data_object_type": "Metagenome Bins" - }, - { - "_id": { - "$oid": "649b003f1ae706d7b5b162a7" - }, - "id": "nmdc:48a3756c08657061dbbd1b3fbd92b52d", - "name": "gold:Gp0115672.bins.7.fa", - "description": "metabat2 binned contig file for gold:Gp0115672", - "file_size_bytes": 232940, - "url": "https://data.microbiomedata.org/data/1781_86103/img_MAGs/metabat-bins/bins.7.fa", - "type": "nmdc:DataObject", - "data_object_type": "Metagenome Bins" - }, - { - "_id": { - "$oid": "649b003f1ae706d7b5b162a8" - }, - "id": "nmdc:28fcb3eb2cf1d63d9623996a438b3cae", - "name": "gold:Gp0115672.bins.8.fa", - "description": "metabat2 binned contig file for gold:Gp0115672", - "file_size_bytes": 834579, - "url": "https://data.microbiomedata.org/data/1781_86103/img_MAGs/metabat-bins/bins.8.fa", - "type": "nmdc:DataObject", - "data_object_type": "Metagenome Bins" - }, - { - "_id": { - "$oid": "649b003f1ae706d7b5b162a9" - }, - "id": "nmdc:97a872124142327afa9e896d56b3c263", - "name": "gold:Gp0115672.bins.6.fa", - "description": "metabat2 binned contig file for gold:Gp0115672", - "file_size_bytes": 943843, - "url": "https://data.microbiomedata.org/data/1781_86103/img_MAGs/metabat-bins/bins.6.fa", - "type": "nmdc:DataObject", - "data_object_type": "Metagenome Bins" - }, - { - "_id": { - "$oid": "649b003f1ae706d7b5b162aa" - }, - "id": "nmdc:20f94f48572c63758d65c10c19dc3a44", - "name": "gold:Gp0115672.bins.10.fa", - "description": "metabat2 binned contig file for gold:Gp0115672", - "file_size_bytes": 5938384, - "url": "https://data.microbiomedata.org/data/1781_86103/img_MAGs/metabat-bins/bins.10.fa", - "type": "nmdc:DataObject", - "data_object_type": "Metagenome Bins" - }, - { - "_id": { - "$oid": "649b00401ae706d7b5b16d9e" - }, - "description": "EC TSV File for gold:Gp0115672", - "url": "https://data.microbiomedata.org/1781_86103/img_annotation/Ga0482255_ec.tsv", - "md5_checksum": "e029f10a29dd5e9d81dce82c2211fdee", - "file_size_bytes": 3385, - "id": "nmdc:e029f10a29dd5e9d81dce82c2211fdee", - "name": "gold:Gp0115672_EC TSV File", - "data_object_type": "Annotation Enzyme Commission", - "type": "nmdc:DataObject" - }, - { - "_id": { - "$oid": "649b00401ae706d7b5b16d9f" - }, - "description": "KO TSV File for gold:Gp0115672", - "url": "https://data.microbiomedata.org/1781_86103/img_annotation/Ga0482255_ko.tsv", - "md5_checksum": "f6230d3d3eadab80074ecfe59a623c10", - "file_size_bytes": 3385, - "id": "nmdc:f6230d3d3eadab80074ecfe59a623c10", - "name": "gold:Gp0115672_KO TSV File", - "data_object_type": "Annotation KEGG Orthology", - "type": "nmdc:DataObject" - }, - { - "_id": { - "$oid": "649b00401ae706d7b5b16da0" - }, - "description": "Functional annotation GFF file for gold:Gp0115672", - "url": "https://data.microbiomedata.org/1781_86103/img_annotation/Ga0482255_functional_annotation.gff", - "md5_checksum": "5c1afd4ffb1b1594807fbd0901da7a88", - "file_size_bytes": 3385, - "id": "nmdc:5c1afd4ffb1b1594807fbd0901da7a88", - "name": "gold:Gp0115672_Functional annotation GFF file", - "data_object_type": "Functional Annotation GFF", - "type": "nmdc:DataObject" - }, - { - "_id": { - "$oid": "649b00401ae706d7b5b16da1" - }, - "description": "Protein FAA for gold:Gp0115672", - "url": "https://data.microbiomedata.org/1781_86103/img_annotation/Ga0482255_proteins.faa", - "md5_checksum": "b0687d58e2803a41864c9d830977402b", - "file_size_bytes": 3385, - "id": "nmdc:b0687d58e2803a41864c9d830977402b", - "name": "gold:Gp0115672_Protein FAA", - "data_object_type": "Annotation Amino Acid FASTA", - "type": "nmdc:DataObject" - }, - { - "_id": { - "$oid": "649b00401ae706d7b5b16da2" - }, - "description": "Structural annotation GFF file for gold:Gp0115672", - "url": "https://data.microbiomedata.org/1781_86103/img_annotation/Ga0482255_structural_annotation.gff", - "md5_checksum": "644d67586f9337bf4d12ff5859d4cd54", - "file_size_bytes": 3385, - "id": "nmdc:644d67586f9337bf4d12ff5859d4cd54", - "name": "gold:Gp0115672_Structural annotation GFF file", - "data_object_type": "Structural Annotation GFF", - "type": "nmdc:DataObject" - } - ], - "dissolving_activity_set": [], - "functional_annotation_set": [], - "genome_feature_set": [], - "mags_activity_set": [ - { - "_id": { - "$oid": "649b0052ec087f6bbab34737" - }, - "has_input": [ - "nmdc:6f762f7b079f8c2633ef674a8264879f", - "nmdc:afd1d03b38bc5deb9c196264bcea8795", - "nmdc:38d776837c2208b557e2e4e5428c879d" - ], - "too_short_contig_num": 206294, - "part_of": [ - "nmdc:mga0cwhj53" - ], - "binned_contig_num": 1785, - "git_url": "https://github.com/microbiomedata/metaG/releases/tag/0.1", - "has_output": [ - "nmdc:d41d8cd98f00b204e9800998ecf8427e", - "nmdc:2b6e0195e34697039eff38b51026be24", - "nmdc:f02d361fbef7549e2289bf4da623787d", - "nmdc:2de282e5507477269238ead458f11ac0", - "nmdc:3abae1a573f9f0ac6da47e1ab9b9a723", - "nmdc:4d315d8dac1d9605d110ff2298b10229" - ], - "was_informed_by": "gold:Gp0115672", - "input_contig_num": 221045, - "id": "nmdc:50eb8825777d1294abac150521e5c2db", - "execution_resource": "NERSC-Cori", - "name": "MAGs Analysis Activity for nmdc:mga0cwhj53", - "mags_list": [ - { - "number_of_contig": 316, - "completeness": 61.03, - "bin_name": "bins.1", - "gene_count": 2148, - "bin_quality": "MQ", - "gtdbtk_species": "", - "gtdbtk_order": "Sphingomonadales", - "num_16s": 0, - "gtdbtk_family": "Sphingomonadaceae", - "gtdbtk_domain": "Bacteria", - "contamination": 0.85, - "gtdbtk_class": "Alphaproteobacteria", - "gtdbtk_phylum": "Proteobacteria", - "num_5s": 0, - "num_23s": 0, - "gtdbtk_genus": "Novosphingobium", - "num_t_rna": 19 - }, - { - "number_of_contig": 130, - "completeness": 34.64, - "bin_name": "bins.2", - "gene_count": 675, - "bin_quality": "LQ", - "gtdbtk_species": "", - "gtdbtk_order": "", - "num_16s": 0, - "gtdbtk_family": "", - "gtdbtk_domain": "", - "contamination": 0.0, - "gtdbtk_class": "", - "gtdbtk_phylum": "", - "num_5s": 0, - "num_23s": 0, - "gtdbtk_genus": "", - "num_t_rna": 5 - }, - { - "number_of_contig": 201, - "completeness": 19.13, - "bin_name": "bins.3", - "gene_count": 1000, - "bin_quality": "LQ", - "gtdbtk_species": "", - "gtdbtk_order": "", - "num_16s": 0, - "gtdbtk_family": "", - "gtdbtk_domain": "", - "contamination": 0.0, - "gtdbtk_class": "", - "gtdbtk_phylum": "", - "num_5s": 0, - "num_23s": 0, - "gtdbtk_genus": "", - "num_t_rna": 2 - }, - { - "number_of_contig": 256, - "completeness": 75.9, - "bin_name": "bins.4", - "gene_count": 2131, - "bin_quality": "MQ", - "gtdbtk_species": "UBA5335 sp002862435", - "gtdbtk_order": "UBA5335", - "num_16s": 0, - "gtdbtk_family": "UBA5335", - "gtdbtk_domain": "Bacteria", - "contamination": 1.52, - "gtdbtk_class": "Gammaproteobacteria", - "gtdbtk_phylum": "Proteobacteria", - "num_5s": 0, - "num_23s": 0, - "gtdbtk_genus": "UBA5335", - "num_t_rna": 22 - }, - { - "number_of_contig": 254, - "completeness": 100.0, - "bin_name": "bins.5", - "gene_count": 6188, - "bin_quality": "LQ", - "gtdbtk_species": "", - "gtdbtk_order": "", - "num_16s": 2, - "gtdbtk_family": "", - "gtdbtk_domain": "", - "contamination": 95.83, - "gtdbtk_class": "", - "gtdbtk_phylum": "", - "num_5s": 2, - "num_23s": 2, - "gtdbtk_genus": "", - "num_t_rna": 86 - }, - { - "number_of_contig": 106, - "completeness": 7.24, - "bin_name": "bins.6", - "gene_count": 524, - "bin_quality": "LQ", - "gtdbtk_species": "", - "gtdbtk_order": "", - "num_16s": 0, - "gtdbtk_family": "", - "gtdbtk_domain": "", - "contamination": 0.0, - "gtdbtk_class": "", - "gtdbtk_phylum": "", - "num_5s": 0, - "num_23s": 0, - "gtdbtk_genus": "", - "num_t_rna": 16 - }, - { - "number_of_contig": 306, - "completeness": 65.74, - "bin_name": "bins.7", - "gene_count": 2357, - "bin_quality": "MQ", - "gtdbtk_species": "", - "gtdbtk_order": "UBA11222", - "num_16s": 0, - "gtdbtk_family": "UBA11222", - "gtdbtk_domain": "Bacteria", - "contamination": 2.3, - "gtdbtk_class": "Alphaproteobacteria", - "gtdbtk_phylum": "Proteobacteria", - "num_5s": 0, - "num_23s": 0, - "gtdbtk_genus": "UBA11222", - "num_t_rna": 29 - }, - { - "number_of_contig": 216, - "completeness": 47.34, - "bin_name": "bins.8", - "gene_count": 1203, - "bin_quality": "LQ", - "gtdbtk_species": "", - "gtdbtk_order": "", - "num_16s": 0, - "gtdbtk_family": "", - "gtdbtk_domain": "", - "contamination": 0.0, - "gtdbtk_class": "", - "gtdbtk_phylum": "", - "num_5s": 0, - "num_23s": 0, - "gtdbtk_genus": "", - "num_t_rna": 12 - } - ], - "unbinned_contig_num": 12966, - "started_at_time": "2021-10-11T02:28:16Z", - "type": "nmdc:MAGsAnalysisActivity", - "ended_at_time": "2021-10-11T05:56:20+00:00" - } - ], - "material_sample_set": [], - "material_sampling_activity_set": [], - "metabolomics_analysis_activity_set": [], - "metagenome_annotation_activity_set": [ - { - "_id": { - "$oid": "649b005bbf2caae0415ef9d1" - }, - "has_input": [ - "nmdc:6f762f7b079f8c2633ef674a8264879f" - ], - "part_of": [ - "nmdc:mga0cwhj53" - ], - "git_url": "https://github.com/microbiomedata/metaG/releases/tag/0.1", - "has_output": [ - "nmdc:84e3590be0f59007275fdf459d464f74", - "nmdc:7dd630b842f587768235714e8a95f377", - "nmdc:38d776837c2208b557e2e4e5428c879d", - "nmdc:e38cb3355892042cb02580c26c083cd9", - "nmdc:d55119e8f094efa075c44b22e8b2f689", - "nmdc:02a9ad5732172f04d1da83d145f63226", - "nmdc:73811b72087e57f23db32f4a0ca4fb9c", - "nmdc:dfc18c0f97e80c14ca6ca1bc2ba7a809", - "nmdc:5a843529ffac8227515c5ea399ee4815", - "nmdc:82ac29a9999c6bc097cb0f35e4177e35", - "nmdc:5b0e8395559ef0d8a341ae0e132e60f6", - "nmdc:1e74c3df751a59a34e5c0d87f4a37563" - ], - "was_informed_by": "gold:Gp0115672", - "id": "nmdc:50eb8825777d1294abac150521e5c2db", - "execution_resource": "NERSC-Cori", - "name": "Annotation Activity for nmdc:mga0cwhj53", - "started_at_time": "2021-10-11T02:28:16Z", - "type": "nmdc:MetagenomeAnnotationActivity", - "ended_at_time": "2021-10-11T05:56:20+00:00" - } - ], - "metagenome_assembly_set": [ - { - "_id": { - "$oid": "649b005f2ca5ee4adb139fbf" - }, - "has_input": [ - "nmdc:eb516fb673793f5161fb634fc19de310" - ], - "part_of": [ - "nmdc:mga0cwhj53" - ], - "ctg_logsum": 447149, - "scaf_logsum": 448446, - "gap_pct": 0.0019, - "git_url": "https://github.com/microbiomedata/metaG/releases/tag/0.1", - "has_output": [ - "nmdc:6f762f7b079f8c2633ef674a8264879f", - "nmdc:26cc1c91f5f5e79d50041ff4623398b5", - "nmdc:bd9d5497c4e2e0ea61df1f3f239107f7", - "nmdc:362a9857666fe2f4e90bf6a818f551cc", - "nmdc:afd1d03b38bc5deb9c196264bcea8795" - ], - "asm_score": 13.127, - "was_informed_by": "gold:Gp0115672", - "ctg_powsum": 55923, - "scaf_max": 157008, - "id": "nmdc:50eb8825777d1294abac150521e5c2db", - "scaf_powsum": 56113, - "execution_resource": "NERSC-Cori", - "contigs": 221046, - "name": "Assembly Activity for nmdc:mga0cwhj53", - "ctg_max": 157008, - "gc_std": 0.10619, - "contig_bp": 120471215, - "gc_avg": 0.56196, - "started_at_time": "2021-10-11T02:28:16Z", - "scaf_bp": 120473505, - "type": "nmdc:MetagenomeAssembly", - "scaffolds": 220853, - "ended_at_time": "2021-10-11T05:56:20+00:00", - "ctg_l50": 528, - "ctg_l90": 293, - "ctg_n50": 48327, - "ctg_n90": 178881, - "scaf_l50": 529, - "scaf_l90": 293, - "scaf_n50": 48077, - "scaf_n90": 178708, - "scaf_l_gt50k": 2147966, - "scaf_n_gt50k": 28, - "scaf_pct_gt50k": 1.7829365 - } - ], - "metagenome_sequencing_activity_set": [], - "metaproteomics_analysis_activity_set": [], - "metatranscriptome_activity_set": [], - "nom_analysis_activity_set": [], - "omics_processing_set": [ - { - "_id": { - "$oid": "649b009773e8249959349b53" - }, - "id": "nmdc:omprc-11-qsxwf517", - "name": "Sand microcosm microbial communities from a hyporheic zone in Columbia River, Washington, USA - GW-RW T4_14-Oct-14", - "description": "Sterilized sand packs were incubated back in the ground and collected at time point T4.", - "has_input": [ - "nmdc:bsm-11-sdhyr752" - ], - "has_output": [ - "jgi:55d7402b0d8785342fcf7e3c" - ], - "part_of": [ - "nmdc:sty-11-aygzgv51" - ], - "add_date": "2015-05-28", - "mod_date": "2021-06-15", - "ncbi_project_name": "Sand microcosm microbial communities from a hyporheic zone in Columbia River, Washington, USA - GW-RW T4_14-Oct-14", - "omics_type": { - "has_raw_value": "Metagenome" - }, - "principal_investigator": { - "has_raw_value": "James Stegen" - }, - "processing_institution": "JGI", - "type": "nmdc:OmicsProcessing", - "gold_sequencing_project_identifiers": [ - "gold:Gp0115672" - ] - } - ], - "reaction_activity_set": [], - "read_qc_analysis_activity_set": [ - { - "_id": { - "$oid": "649b009d6bdd4fd20273c889" - }, - "has_input": [ - "nmdc:1f6998a48aec6f4008a92d2b8e17d314" - ], - "part_of": [ - "nmdc:mga0cwhj53" - ], - "git_url": "https://github.com/microbiomedata/metaG/releases/tag/0.1", - "has_output": [ - "nmdc:eb516fb673793f5161fb634fc19de310", - "nmdc:f4b68d1bd25f8d2fa8986aeef5fbec3f" - ], - "was_informed_by": "gold:Gp0115672", - "input_read_count": 34522052, - "output_read_bases": 5012430912, - "id": "nmdc:50eb8825777d1294abac150521e5c2db", - "execution_resource": "NERSC-Cori", - "input_read_bases": 5212829852, - "name": "Read QC Activity for nmdc:mga0cwhj53", - "output_read_count": 33454554, - "started_at_time": "2021-10-11T02:28:16Z", - "type": "nmdc:ReadQCAnalysisActivity", - "ended_at_time": "2021-10-11T05:56:20+00:00" - } - ], - "read_based_taxonomy_analysis_activity_set": [ - { - "_id": { - "$oid": "649b009bff710ae353f8cf52" - }, - "has_input": [ - "nmdc:eb516fb673793f5161fb634fc19de310" - ], - "git_url": "https://github.com/microbiomedata/metaG/releases/tag/0.1", - "has_output": [ - "nmdc:5a9326e2e450663a5ed8c97389136b25", - "nmdc:6044f2e33e0dd3e951484e9c50ae10f4", - "nmdc:39a46887587926c9b81e126bb1036005", - "nmdc:b8dde2c047141d9097317c86f723eded", - "nmdc:d530342b37f0785f92650e9650f31d6a", - "nmdc:6672aa851b5d39d7381211232b4f6cb2", - "nmdc:61e3c875231ae8999b5aa1dbf7d55cca", - "nmdc:3049835ed4e3533acce49e9cc60b03fc", - "nmdc:3266e79813577aae1d4377c62e73332c" - ], - "was_informed_by": "gold:Gp0115672", - "id": "nmdc:50eb8825777d1294abac150521e5c2db", - "execution_resource": "NERSC-Cori", - "name": "ReadBased Analysis Activity for nmdc:mga0cwhj53", - "started_at_time": "2021-10-11T02:28:16Z", - "type": "nmdc:ReadbasedAnalysis", - "ended_at_time": "2021-10-11T05:56:20+00:00" - } - ], - "study_set": [], - "field_research_site_set": [], - "collecting_biosamples_from_site_set": [], - "date_created": null, - "etl_software_version": null, - "pooling_set": [] - }, - { - "functional_annotation_agg": [], - "library_preparation_set": [], - "processed_sample_set": [], - "extraction_set": [], - "activity_set": [], - "biosample_set": [], - "data_object_set": [ - { - "description": "Raw sequencer read data", - "file_size_bytes": 2619328583, - "type": "nmdc:DataObject", - "id": "jgi:574fe0a17ded5e3df1ee148a", - "name": "10533.3.165334.ACCATCC-TGGATGG.fastq.gz" - }, - { - "name": "Gp0127640_Filtered Reads", - "description": "Filtered Reads for Gp0127640", - "data_object_type": "Filtered Sequencing Reads", - "url": "https://data.microbiomedata.org/data/nmdc:mga06rnc11/qa/nmdc_mga06rnc11_filtered.fastq.gz", - "md5_checksum": "534c94e20d292a6bf09c0a42b550b4c2", - "id": "nmdc:534c94e20d292a6bf09c0a42b550b4c2", - "file_size_bytes": 2416846292 - }, - { - "name": "Gp0127640_Filtered Stats", - "description": "Filtered Stats for Gp0127640", - "data_object_type": "QC Statistics", - "url": "https://data.microbiomedata.org/data/nmdc:mga06rnc11/qa/nmdc_mga06rnc11_filterStats.txt", - "md5_checksum": "db5ccad12d6ddb46947fbd815aae7f9a", - "id": "nmdc:db5ccad12d6ddb46947fbd815aae7f9a", - "file_size_bytes": 285 - }, - { - "name": "Gp0127640_Gottcha2 TSV report", - "description": "Gottcha2 TSV report for Gp0127640", - "url": "https://data.microbiomedata.org/data/nmdc:mga06rnc11/ReadbasedAnalysis/nmdc_mga06rnc11_gottcha2_report.tsv", - "md5_checksum": "7e79b2eba131ed6df71a56f47b1b901f", - "id": "nmdc:7e79b2eba131ed6df71a56f47b1b901f", - "file_size_bytes": 3824 - }, - { - "name": "Gp0127640_Gottcha2 full TSV report", - "description": "Gottcha2 full TSV report for Gp0127640", - "url": "https://data.microbiomedata.org/data/nmdc:mga06rnc11/ReadbasedAnalysis/nmdc_mga06rnc11_gottcha2_report_full.tsv", - "md5_checksum": "bc82dcb8151fc20c22be71b6531a1fb2", - "id": "nmdc:bc82dcb8151fc20c22be71b6531a1fb2", - "file_size_bytes": 850491 - }, - { - "name": "Gp0127640_Gottcha2 Krona HTML report", - "description": "Gottcha2 Krona HTML report for Gp0127640", - "data_object_type": "GOTTCHA2 Krona Plot", - "url": "https://data.microbiomedata.org/data/nmdc:mga06rnc11/ReadbasedAnalysis/nmdc_mga06rnc11_gottcha2_krona.html", - "md5_checksum": "d5e45563875efca0653ba2dd47ee3d68", - "id": "nmdc:d5e45563875efca0653ba2dd47ee3d68", - "file_size_bytes": 236151 - }, - { - "name": "Gp0127640_Centrifuge classification TSV report", - "description": "Centrifuge classification TSV report for Gp0127640", - "data_object_type": "Centrifuge Taxonomic Classification", - "url": "https://data.microbiomedata.org/data/nmdc:mga06rnc11/ReadbasedAnalysis/nmdc_mga06rnc11_centrifuge_classification.tsv", - "md5_checksum": "bf5aa70f6ff14da2ef1393124ec29c4d", - "id": "nmdc:bf5aa70f6ff14da2ef1393124ec29c4d", - "file_size_bytes": 2057333090 - }, - { - "name": "Gp0127640_Centrifuge TSV report", - "description": "Centrifuge TSV report for Gp0127640", - "data_object_type": "Centrifuge Classification Report", - "url": "https://data.microbiomedata.org/data/nmdc:mga06rnc11/ReadbasedAnalysis/nmdc_mga06rnc11_centrifuge_report.tsv", - "md5_checksum": "61f1f6d57fd4d445682e25ec34901721", - "id": "nmdc:61f1f6d57fd4d445682e25ec34901721", - "file_size_bytes": 256577 - }, - { - "name": "Gp0127640_Centrifuge Krona HTML report", - "description": "Centrifuge Krona HTML report for Gp0127640", - "data_object_type": "Centrifuge Krona Plot", - "url": "https://data.microbiomedata.org/data/nmdc:mga06rnc11/ReadbasedAnalysis/nmdc_mga06rnc11_centrifuge_krona.html", - "md5_checksum": "7c31728fc2a51c8d202f9f74b1919886", - "id": "nmdc:7c31728fc2a51c8d202f9f74b1919886", - "file_size_bytes": 2334984 - }, - { - "name": "Gp0127640_Kraken classification TSV report", - "description": "Kraken classification TSV report for Gp0127640", - "data_object_type": "Kraken2 Taxonomic Classification", - "url": "https://data.microbiomedata.org/data/nmdc:mga06rnc11/ReadbasedAnalysis/nmdc_mga06rnc11_kraken2_classification.tsv", - "md5_checksum": "f36c2b28e63d21ca4d9e84035450c8e1", - "id": "nmdc:f36c2b28e63d21ca4d9e84035450c8e1", - "file_size_bytes": 1658481192 - }, - { - "name": "Gp0127640_Kraken2 TSV report", - "description": "Kraken2 TSV report for Gp0127640", - "data_object_type": "Kraken2 Classification Report", - "url": "https://data.microbiomedata.org/data/nmdc:mga06rnc11/ReadbasedAnalysis/nmdc_mga06rnc11_kraken2_report.tsv", - "md5_checksum": "e2939606fc9ff1c0046b333e1740f258", - "id": "nmdc:e2939606fc9ff1c0046b333e1740f258", - "file_size_bytes": 653129 - }, - { - "name": "Gp0127640_Kraken2 Krona HTML report", - "description": "Kraken2 Krona HTML report for Gp0127640", - "data_object_type": "Kraken2 Krona Plot", - "url": "https://data.microbiomedata.org/data/nmdc:mga06rnc11/ReadbasedAnalysis/nmdc_mga06rnc11_kraken2_krona.html", - "md5_checksum": "d47144fd7ec0608e7677550d9589c889", - "id": "nmdc:d47144fd7ec0608e7677550d9589c889", - "file_size_bytes": 3977820 - }, - { - "name": "Gp0127640_Assembled contigs fasta", - "description": "Assembled contigs fasta for Gp0127640", - "data_object_type": "Assembly Contigs", - "url": "https://data.microbiomedata.org/data/nmdc:mga06rnc11/assembly/nmdc_mga06rnc11_contigs.fna", - "md5_checksum": "b85a322271c7f93ef295141d12cb2dbc", - "id": "nmdc:b85a322271c7f93ef295141d12cb2dbc", - "file_size_bytes": 44243651 - }, - { - "name": "Gp0127640_Assembled scaffold fasta", - "description": "Assembled scaffold fasta for Gp0127640", - "data_object_type": "Assembly Scaffolds", - "url": "https://data.microbiomedata.org/data/nmdc:mga06rnc11/assembly/nmdc_mga06rnc11_scaffolds.fna", - "md5_checksum": "794445b3fedfaec8af9b70b167bc6852", - "id": "nmdc:794445b3fedfaec8af9b70b167bc6852", - "file_size_bytes": 43923338 - }, - { - "name": "Gp0127640_Metagenome Contig Coverage Stats", - "description": "Metagenome Contig Coverage Stats for Gp0127640", - "url": "https://data.microbiomedata.org/data/nmdc:mga06rnc11/assembly/nmdc_mga06rnc11_covstats.txt", - "md5_checksum": "d389ae4f8a92c21423fc77aa054ba985", - "id": "nmdc:d389ae4f8a92c21423fc77aa054ba985", - "file_size_bytes": 8365383 - }, - { - "name": "Gp0127640_Assembled AGP file", - "description": "Assembled AGP file for Gp0127640", - "data_object_type": "Assembly AGP", - "url": "https://data.microbiomedata.org/data/nmdc:mga06rnc11/assembly/nmdc_mga06rnc11_assembly.agp", - "md5_checksum": "765541c2865f6047d5e2e8e7299908e4", - "id": "nmdc:765541c2865f6047d5e2e8e7299908e4", - "file_size_bytes": 7782777 - }, - { - "name": "Gp0127640_Metagenome Alignment BAM file", - "description": "Metagenome Alignment BAM file for Gp0127640", - "data_object_type": "Assembly Coverage BAM", - "url": "https://data.microbiomedata.org/data/nmdc:mga06rnc11/assembly/nmdc_mga06rnc11_pairedMapped_sorted.bam", - "md5_checksum": "78b554dd52492c3d1e401d0c9198b89b", - "id": "nmdc:78b554dd52492c3d1e401d0c9198b89b", - "file_size_bytes": 2578128724 - }, - { - "name": "Gp0127640_Protein FAA", - "description": "Protein FAA for Gp0127640", - "data_object_type": "Annotation Amino Acid FASTA", - "url": "https://data.microbiomedata.org/data/nmdc:mga06rnc11/annotation/nmdc_mga06rnc11_proteins.faa", - "md5_checksum": "13e64b02d230f76008e42256a48d1cec", - "id": "nmdc:13e64b02d230f76008e42256a48d1cec", - "file_size_bytes": 26637626 - }, - { - "name": "Gp0127640_Structural annotation GFF file", - "description": "Structural annotation GFF file for Gp0127640", - "data_object_type": "Structural Annotation GFF", - "url": "https://data.microbiomedata.org/data/nmdc:mga06rnc11/annotation/nmdc_mga06rnc11_structural_annotation.gff", - "md5_checksum": "7babb0c9f662679659b7b1bee469f073", - "id": "nmdc:7babb0c9f662679659b7b1bee469f073", - "file_size_bytes": 2515 - }, - { - "name": "Gp0127640_Functional annotation GFF file", - "description": "Functional annotation GFF file for Gp0127640", - "data_object_type": "Functional Annotation GFF", - "url": "https://data.microbiomedata.org/data/nmdc:mga06rnc11/annotation/nmdc_mga06rnc11_functional_annotation.gff", - "md5_checksum": "e84b1e43d546c9793c3a4d9eaa8cee86", - "id": "nmdc:e84b1e43d546c9793c3a4d9eaa8cee86", - "file_size_bytes": 32184781 - }, - { - "name": "Gp0127640_KO TSV file", - "description": "KO TSV file for Gp0127640", - "data_object_type": "Annotation KEGG Orthology", - "url": "https://data.microbiomedata.org/data/nmdc:mga06rnc11/annotation/nmdc_mga06rnc11_ko.tsv", - "md5_checksum": "2e3e5b7ffa39e533db8ed1d925426f50", - "id": "nmdc:2e3e5b7ffa39e533db8ed1d925426f50", - "file_size_bytes": 3620933 - }, - { - "name": "Gp0127640_EC TSV file", - "description": "EC TSV file for Gp0127640", - "data_object_type": "Annotation Enzyme Commission", - "url": "https://data.microbiomedata.org/data/nmdc:mga06rnc11/annotation/nmdc_mga06rnc11_ec.tsv", - "md5_checksum": "62e46d35a6aff3a52b39c6bb04dc6161", - "id": "nmdc:62e46d35a6aff3a52b39c6bb04dc6161", - "file_size_bytes": 2390086 - }, - { - "name": "Gp0127640_COG GFF file", - "description": "COG GFF file for Gp0127640", - "url": "https://data.microbiomedata.org/data/nmdc:mga06rnc11/annotation/nmdc_mga06rnc11_cog.gff", - "md5_checksum": "93fa7de9c74cfcff99bb74e27fa94674", - "id": "nmdc:93fa7de9c74cfcff99bb74e27fa94674", - "file_size_bytes": 17898567 - }, - { - "name": "Gp0127640_PFAM GFF file", - "description": "PFAM GFF file for Gp0127640", - "url": "https://data.microbiomedata.org/data/nmdc:mga06rnc11/annotation/nmdc_mga06rnc11_pfam.gff", - "md5_checksum": "63bad86a6d7fb23b5a4683ae36820622", - "id": "nmdc:63bad86a6d7fb23b5a4683ae36820622", - "file_size_bytes": 12585366 - }, - { - "name": "Gp0127640_TigrFam GFF file", - "description": "TigrFam GFF file for Gp0127640", - "url": "https://data.microbiomedata.org/data/nmdc:mga06rnc11/annotation/nmdc_mga06rnc11_tigrfam.gff", - "md5_checksum": "d6b80bb748b4d6fbe52c15300ad2137b", - "id": "nmdc:d6b80bb748b4d6fbe52c15300ad2137b", - "file_size_bytes": 1170952 - }, - { - "name": "Gp0127640_SMART GFF file", - "description": "SMART GFF file for Gp0127640", - "url": "https://data.microbiomedata.org/data/nmdc:mga06rnc11/annotation/nmdc_mga06rnc11_smart.gff", - "md5_checksum": "46722961c280df725d15489e82502031", - "id": "nmdc:46722961c280df725d15489e82502031", - "file_size_bytes": 3891425 - }, - { - "name": "Gp0127640_SuperFam GFF file", - "description": "SuperFam GFF file for Gp0127640", - "url": "https://data.microbiomedata.org/data/nmdc:mga06rnc11/annotation/nmdc_mga06rnc11_supfam.gff", - "md5_checksum": "6f1a0029cb25f1433de1d7c241bc7553", - "id": "nmdc:6f1a0029cb25f1433de1d7c241bc7553", - "file_size_bytes": 22543435 - }, - { - "name": "Gp0127640_Cath FunFam GFF file", - "description": "Cath FunFam GFF file for Gp0127640", - "url": "https://data.microbiomedata.org/data/nmdc:mga06rnc11/annotation/nmdc_mga06rnc11_cath_funfam.gff", - "md5_checksum": "6d2839963f616d810e66435b3bbe018a", - "id": "nmdc:6d2839963f616d810e66435b3bbe018a", - "file_size_bytes": 16572925 - }, - { - "name": "Gp0127640_KO_EC GFF file", - "description": "KO_EC GFF file for Gp0127640", - "url": "https://data.microbiomedata.org/data/nmdc:mga06rnc11/annotation/nmdc_mga06rnc11_ko_ec.gff", - "md5_checksum": "efbf36ca49c40ad0367ecd23c012b29b", - "id": "nmdc:efbf36ca49c40ad0367ecd23c012b29b", - "file_size_bytes": 11571776 - }, - { - "name": "gold:Gp0452679_metabat2 bin checkm quality assessment result", - "description": "metabat2 bin checkm quality assessment result for gold:Gp0452679", - "data_object_type": "CheckM Statistics", - "url": "https://data.microbiomedata.org/data/nmdc:mga0fsjy84/MAGs/nmdc_mga0fsjy84_checkm_qa.out", - "md5_checksum": "d41d8cd98f00b204e9800998ecf8427e", - "id": "nmdc:d41d8cd98f00b204e9800998ecf8427e", - "file_size_bytes": 0 - }, - { - "name": "Gp0127640_tooShort (< 3kb) filtered contigs fasta file by metaBat2", - "description": "tooShort (< 3kb) filtered contigs fasta file by metaBat2 for Gp0127640", - "url": "https://data.microbiomedata.org/data/nmdc:mga06rnc11/MAGs/nmdc_mga06rnc11_bins.tooShort.fa", - "md5_checksum": "ce395376d0bc7121e4dc5efc774d5e74", - "id": "nmdc:ce395376d0bc7121e4dc5efc774d5e74", - "file_size_bytes": 40358420 - }, - { - "name": "Gp0127640_unbinned fasta file from metabat2", - "description": "unbinned fasta file from metabat2 for Gp0127640", - "url": "https://data.microbiomedata.org/data/nmdc:mga06rnc11/MAGs/nmdc_mga06rnc11_bins.unbinned.fa", - "md5_checksum": "a16cbb06b91ebfb45f5a010effc1cfde", - "id": "nmdc:a16cbb06b91ebfb45f5a010effc1cfde", - "file_size_bytes": 2755747 - }, - { - "name": "Gp0127640_metabat2 bin checkm quality assessment result", - "description": "metabat2 bin checkm quality assessment result for Gp0127640", - "data_object_type": "CheckM Statistics", - "url": "https://data.microbiomedata.org/data/nmdc:mga06rnc11/MAGs/nmdc_mga06rnc11_checkm_qa.out", - "md5_checksum": "97ae130ca2f75c66b8cbd60c4d35463a", - "id": "nmdc:97ae130ca2f75c66b8cbd60c4d35463a", - "file_size_bytes": 760 - }, - { - "name": "Gp0127640_high-quality and medium-quality bins", - "description": "high-quality and medium-quality bins for Gp0127640", - "data_object_type": "Metagenome Bins", - "url": "https://data.microbiomedata.org/data/nmdc:mga06rnc11/MAGs/nmdc_mga06rnc11_hqmq_bin.zip", - "md5_checksum": "5945311235c6195ad409ab30e2b72c0c", - "id": "nmdc:5945311235c6195ad409ab30e2b72c0c", - "file_size_bytes": 182 - }, - { - "name": "Gp0127640_metabat2 bins", - "description": "metabat2 bins for Gp0127640", - "url": "https://data.microbiomedata.org/data/nmdc:mga06rnc11/MAGs/nmdc_mga06rnc11_metabat_bin.zip", - "md5_checksum": "d1cf2992bd60e25032eedeb09858d14b", - "id": "nmdc:d1cf2992bd60e25032eedeb09858d14b", - "file_size_bytes": 345388 - }, - { - "_id": { - "$oid": "649b003d1ae706d7b5b14e92" - }, - "description": "Assembled scaffold fasta for gold:Gp0127640", - "url": "https://data.microbiomedata.org/data/1781_100342/assembly/assembly_scaffolds.fna", - "file_size_bytes": 43496758, - "type": "nmdc:DataObject", - "id": "nmdc:aa1bb1c144d1bca4e8aeeb2c9d640d75", - "name": "assembly_scaffolds.fna", - "data_object_type": "Assembly Scaffolds" - }, - { - "_id": { - "$oid": "649b003d1ae706d7b5b14e93" - }, - "description": "Assembled AGP file for gold:Gp0127640", - "url": "https://data.microbiomedata.org/data/1781_100342/assembly/assembly.agp", - "file_size_bytes": 6929297, - "type": "nmdc:DataObject", - "id": "nmdc:c4688faca5539c65da5223b1468045be", - "name": "assembly.agp", - "data_object_type": "Assembly AGP" - }, - { - "_id": { - "$oid": "649b003d1ae706d7b5b14e94" - }, - "description": "Metagenome Contig Coverage Stats for gold:Gp0127640", - "url": "https://data.microbiomedata.org/data/1781_100342/assembly/mapping_stats.txt", - "file_size_bytes": 7938723, - "type": "nmdc:DataObject", - "id": "nmdc:82ea1e2021fb7f53d998452af137427c", - "name": "mapping_stats.txt", - "data_object_type": "Assembly Coverage Stats" - }, - { - "_id": { - "$oid": "649b003d1ae706d7b5b14e96" - }, - "description": "Assembled contigs fasta for gold:Gp0127640", - "url": "https://data.microbiomedata.org/data/1781_100342/assembly/assembly_contigs.fna", - "file_size_bytes": 43816991, - "type": "nmdc:DataObject", - "id": "nmdc:e2d5ce50f49731a49740d9f61f630550", - "name": "assembly_contigs.fna", - "data_object_type": "Assembly Contigs" - }, - { - "_id": { - "$oid": "649b003d1ae706d7b5b14e98" - }, - "description": "Metagenome Alignment BAM file for gold:Gp0127640", - "url": "https://data.microbiomedata.org/data/1781_100342/assembly/pairedMapped_sorted.bam", - "file_size_bytes": 2545520278, - "type": "nmdc:DataObject", - "id": "nmdc:68a7046814acf2ffe580fa8ce70e8a06", - "name": "pairedMapped_sorted.bam", - "data_object_type": "Assembly Coverage BAM" - }, - { - "_id": { - "$oid": "649b003d1ae706d7b5b15b7b" - }, - "id": "nmdc:252bb7818bcf5f8a50bf88d1fd0a297c", - "name": "1781_100342.krona.html", - "description": "Gold:Gp0127640 KRONA plot HTML file", - "url": "https://data.microbiomedata.org/1781_100342/ReadbasedAnalysis/centrifuge/1781_100342.krona.html", - "file_size_bytes": 3385, - "data_object_type": "Centrifuge Krona Plot", - "type": "nmdc:DataObject" - }, - { - "_id": { - "$oid": "649b003d1ae706d7b5b15b82" - }, - "id": "nmdc:0bf64f8fcce67bacdf9e484f8ea2268e", - "name": "1781_100342.json", - "description": "Gold:Gp0127640 ReadbasedAnalysis result JSON file", - "url": "https://data.microbiomedata.org/1781_100342/ReadbasedAnalysis/1781_100342.json", - "file_size_bytes": 3385, - "type": "nmdc:DataObject" - }, - { - "_id": { - "$oid": "649b003f1ae706d7b5b1660c" - }, - "id": "nmdc:e9110de20a054251e14eddda17e204a6", - "name": "bins.tooShort.fa", - "description": "tooShort (< 3kb) filtered contigs fasta file by metaBat2 for gold:Gp0127640", - "file_size_bytes": 39101595, - "url": "https://data.microbiomedata.org/data/1781_100342/img_MAGs/bins.tooShort.fa", - "type": "nmdc:DataObject" - }, - { - "_id": { - "$oid": "649b003f1ae706d7b5b1660d" - }, - "id": "nmdc:21e6cb23babaec38d6e8d431893c23a3", - "name": "bins.unbinned.fa", - "description": "unbinned fasta file from metabat2 for gold:Gp0127640", - "file_size_bytes": 3314124, - "url": "https://data.microbiomedata.org/data/1781_100342/img_MAGs/bins.unbinned.fa", - "type": "nmdc:DataObject" - }, - { - "_id": { - "$oid": "649b003f1ae706d7b5b1660e" - }, - "id": "nmdc:363ee3fe300a57198050ef502d613d92", - "name": "checkm_qa.out", - "description": "metabat2 bin checkm quality assessment result for gold:Gp0127640", - "file_size_bytes": 918, - "url": "https://data.microbiomedata.org/data/1781_100342/img_MAGs/checkm_qa.out", - "type": "nmdc:DataObject", - "data_object_type": "CheckM Statistics" - }, - { - "_id": { - "$oid": "649b003f1ae706d7b5b1660f" - }, - "id": "nmdc:59a345dc3bc08ee0f1837d41a276654f", - "name": "gold:Gp0127640.bins.2.fa", - "description": "metabat2 binned contig file for gold:Gp0127640", - "file_size_bytes": 275145, - "url": "https://data.microbiomedata.org/data/1781_100342/img_MAGs/metabat-bins/bins.2.fa", - "type": "nmdc:DataObject", - "data_object_type": "Metagenome Bins" - }, - { - "_id": { - "$oid": "649b003f1ae706d7b5b16626" - }, - "id": "nmdc:5356fae3a74ea20c0344e57c8ef11166", - "name": "gold:Gp0127640.bins.1.fa", - "description": "metabat2 binned contig file for gold:Gp0127640", - "file_size_bytes": 277293, - "url": "https://data.microbiomedata.org/data/1781_100342/img_MAGs/metabat-bins/bins.1.fa", - "type": "nmdc:DataObject", - "data_object_type": "Metagenome Bins" - }, - { - "_id": { - "$oid": "649b00401ae706d7b5b16d11" - }, - "description": "EC TSV File for gold:Gp0127640", - "url": "https://data.microbiomedata.org/1781_100342/img_annotation/Ga0482230_ec.tsv", - "md5_checksum": "e90b16891cff9bd5b0034cc6c89f8080", - "file_size_bytes": 3385, - "id": "nmdc:e90b16891cff9bd5b0034cc6c89f8080", - "name": "gold:Gp0127640_EC TSV File", - "data_object_type": "Annotation Enzyme Commission", - "type": "nmdc:DataObject" - }, - { - "_id": { - "$oid": "649b00401ae706d7b5b16d14" - }, - "description": "Functional annotation GFF file for gold:Gp0127640", - "url": "https://data.microbiomedata.org/1781_100342/img_annotation/Ga0482230_functional_annotation.gff", - "md5_checksum": "86b6734c5eb64c0cae6e95fa7f062123", - "file_size_bytes": 3385, - "id": "nmdc:86b6734c5eb64c0cae6e95fa7f062123", - "name": "gold:Gp0127640_Functional annotation GFF file", - "data_object_type": "Functional Annotation GFF", - "type": "nmdc:DataObject" - }, - { - "_id": { - "$oid": "649b00401ae706d7b5b16d15" - }, - "description": "KO TSV File for gold:Gp0127640", - "url": "https://data.microbiomedata.org/1781_100342/img_annotation/Ga0482230_ko.tsv", - "md5_checksum": "4950dc66d2b5a3c325454fb106d6b726", - "file_size_bytes": 3385, - "id": "nmdc:4950dc66d2b5a3c325454fb106d6b726", - "name": "gold:Gp0127640_KO TSV File", - "data_object_type": "Annotation KEGG Orthology", - "type": "nmdc:DataObject" - }, - { - "_id": { - "$oid": "649b00401ae706d7b5b16d1c" - }, - "description": "Protein FAA for gold:Gp0127640", - "url": "https://data.microbiomedata.org/1781_100342/img_annotation/Ga0482230_proteins.faa", - "md5_checksum": "1fbb7302a6ad581085d561e9fd3ed802", - "file_size_bytes": 3385, - "id": "nmdc:1fbb7302a6ad581085d561e9fd3ed802", - "name": "gold:Gp0127640_Protein FAA", - "data_object_type": "Annotation Amino Acid FASTA", - "type": "nmdc:DataObject" - }, - { - "_id": { - "$oid": "649b00401ae706d7b5b16d22" - }, - "description": "Structural annotation GFF file for gold:Gp0127640", - "url": "https://data.microbiomedata.org/1781_100342/img_annotation/Ga0482230_structural_annotation.gff", - "md5_checksum": "812cf8b77747ff65cfd237158535d310", - "file_size_bytes": 3385, - "id": "nmdc:812cf8b77747ff65cfd237158535d310", - "name": "gold:Gp0127640_Structural annotation GFF file", - "data_object_type": "Structural Annotation GFF", - "type": "nmdc:DataObject" - } - ], - "dissolving_activity_set": [], - "functional_annotation_set": [], - "genome_feature_set": [], - "mags_activity_set": [ - { - "_id": { - "$oid": "649b0052ec087f6bbab3471a" - }, - "has_input": [ - "nmdc:b85a322271c7f93ef295141d12cb2dbc", - "nmdc:78b554dd52492c3d1e401d0c9198b89b", - "nmdc:e84b1e43d546c9793c3a4d9eaa8cee86" - ], - "too_short_contig_num": 104867, - "part_of": [ - "nmdc:mga06rnc11" - ], - "binned_contig_num": 213, - "git_url": "https://github.com/microbiomedata/metaG/releases/tag/0.1", - "has_output": [ - "nmdc:d41d8cd98f00b204e9800998ecf8427e", - "nmdc:ce395376d0bc7121e4dc5efc774d5e74", - "nmdc:a16cbb06b91ebfb45f5a010effc1cfde", - "nmdc:97ae130ca2f75c66b8cbd60c4d35463a", - "nmdc:5945311235c6195ad409ab30e2b72c0c", - "nmdc:d1cf2992bd60e25032eedeb09858d14b" - ], - "was_informed_by": "gold:Gp0127640", - "input_contig_num": 106665, - "id": "nmdc:414c4647eddd8081308d92da2d59815e", - "execution_resource": "NERSC-Cori", - "name": "MAGs Analysis Activity for nmdc:mga06rnc11", - "mags_list": [ - { - "number_of_contig": 213, - "completeness": 48.94, - "bin_name": "bins.1", - "gene_count": 1422, - "bin_quality": "LQ", - "gtdbtk_species": "", - "gtdbtk_order": "", - "num_16s": 0, - "gtdbtk_family": "", - "gtdbtk_domain": "", - "contamination": 0.97, - "gtdbtk_class": "", - "gtdbtk_phylum": "", - "num_5s": 0, - "num_23s": 0, - "gtdbtk_genus": "", - "num_t_rna": 30 - } - ], - "unbinned_contig_num": 1585, - "started_at_time": "2021-10-11T02:24:27Z", - "type": "nmdc:MAGsAnalysisActivity", - "ended_at_time": "2021-10-11T04:33:17+00:00" - } - ], - "material_sample_set": [], - "material_sampling_activity_set": [], - "metabolomics_analysis_activity_set": [], - "metagenome_annotation_activity_set": [ - { - "_id": { - "$oid": "649b005bbf2caae0415ef9bb" - }, - "has_input": [ - "nmdc:b85a322271c7f93ef295141d12cb2dbc" - ], - "part_of": [ - "nmdc:mga06rnc11" - ], - "git_url": "https://github.com/microbiomedata/metaG/releases/tag/0.1", - "has_output": [ - "nmdc:13e64b02d230f76008e42256a48d1cec", - "nmdc:7babb0c9f662679659b7b1bee469f073", - "nmdc:e84b1e43d546c9793c3a4d9eaa8cee86", - "nmdc:2e3e5b7ffa39e533db8ed1d925426f50", - "nmdc:62e46d35a6aff3a52b39c6bb04dc6161", - "nmdc:93fa7de9c74cfcff99bb74e27fa94674", - "nmdc:63bad86a6d7fb23b5a4683ae36820622", - "nmdc:d6b80bb748b4d6fbe52c15300ad2137b", - "nmdc:46722961c280df725d15489e82502031", - "nmdc:6f1a0029cb25f1433de1d7c241bc7553", - "nmdc:6d2839963f616d810e66435b3bbe018a", - "nmdc:efbf36ca49c40ad0367ecd23c012b29b" - ], - "was_informed_by": "gold:Gp0127640", - "id": "nmdc:414c4647eddd8081308d92da2d59815e", - "execution_resource": "NERSC-Cori", - "name": "Annotation Activity for nmdc:mga06rnc11", - "started_at_time": "2021-10-11T02:24:27Z", - "type": "nmdc:MetagenomeAnnotationActivity", - "ended_at_time": "2021-10-11T04:33:17+00:00" - } - ], - "metagenome_assembly_set": [ - { - "_id": { - "$oid": "649b005f2ca5ee4adb139fa6" - }, - "has_input": [ - "nmdc:534c94e20d292a6bf09c0a42b550b4c2" - ], - "part_of": [ - "nmdc:mga06rnc11" - ], - "ctg_logsum": 42879, - "scaf_logsum": 42987, - "gap_pct": 0.0005, - "git_url": "https://github.com/microbiomedata/metaG/releases/tag/0.1", - "has_output": [ - "nmdc:b85a322271c7f93ef295141d12cb2dbc", - "nmdc:794445b3fedfaec8af9b70b167bc6852", - "nmdc:d389ae4f8a92c21423fc77aa054ba985", - "nmdc:765541c2865f6047d5e2e8e7299908e4", - "nmdc:78b554dd52492c3d1e401d0c9198b89b" - ], - "asm_score": 5.471, - "was_informed_by": "gold:Gp0127640", - "ctg_powsum": 4901.253, - "scaf_max": 27880, - "id": "nmdc:414c4647eddd8081308d92da2d59815e", - "scaf_powsum": 4913.296, - "execution_resource": "NERSC-Cori", - "contigs": 106665, - "name": "Assembly Activity for nmdc:mga06rnc11", - "ctg_max": 27880, - "gc_std": 0.10189, - "contig_bp": 40331509, - "gc_avg": 0.58648, - "started_at_time": "2021-10-11T02:24:27Z", - "scaf_bp": 40331709, - "type": "nmdc:MetagenomeAssembly", - "scaffolds": 106645, - "ended_at_time": "2021-10-11T04:33:17+00:00", - "ctg_l50": 336, - "ctg_l90": 282, - "ctg_n50": 38543, - "ctg_n90": 94525, - "scaf_l50": 336, - "scaf_l90": 282, - "scaf_n50": 38534, - "scaf_n90": 94506 - } - ], - "metagenome_sequencing_activity_set": [], - "metaproteomics_analysis_activity_set": [], - "metatranscriptome_activity_set": [], - "nom_analysis_activity_set": [], - "omics_processing_set": [ - { - "_id": { - "$oid": "649b009773e8249959349b54" - }, - "id": "nmdc:omprc-11-932jcd76", - "name": "Riverbed sediment microbial communities from areas with no vegetation in Columbia River, Washington, USA - GW-RW N3_50_60", - "description": "Riverbed sediment samples were collected from an area with no vegetation in a hyporheic zone (subsurface groundwater - surface water mixing zone)", - "has_input": [ - "nmdc:bsm-11-pvcgp635" - ], - "has_output": [ - "jgi:574fe0a17ded5e3df1ee148a" - ], - "part_of": [ - "nmdc:sty-11-aygzgv51" - ], - "add_date": "2016-01-11", - "mod_date": "2021-06-15", - "ncbi_project_name": "Riverbed sediment microbial communities from areas with no vegetation in Columbia River, Washington, USA - GW-RW N3_50_60", - "omics_type": { - "has_raw_value": "Metagenome" - }, - "principal_investigator": { - "has_raw_value": "James Stegen" - }, - "processing_institution": "JGI", - "type": "nmdc:OmicsProcessing", - "gold_sequencing_project_identifiers": [ - "gold:Gp0127640" - ] - } - ], - "reaction_activity_set": [], - "read_qc_analysis_activity_set": [ - { - "_id": { - "$oid": "649b009d6bdd4fd20273c875" - }, - "has_input": [ - "nmdc:0094fcbe3a051a8000b8823c8db540f8" - ], - "part_of": [ - "nmdc:mga06rnc11" - ], - "git_url": "https://github.com/microbiomedata/metaG/releases/tag/0.1", - "has_output": [ - "nmdc:534c94e20d292a6bf09c0a42b550b4c2", - "nmdc:db5ccad12d6ddb46947fbd815aae7f9a" - ], - "was_informed_by": "gold:Gp0127640", - "input_read_count": 28754670, - "output_read_bases": 4186416440, - "id": "nmdc:414c4647eddd8081308d92da2d59815e", - "execution_resource": "NERSC-Cori", - "input_read_bases": 4341955170, - "name": "Read QC Activity for nmdc:mga06rnc11", - "output_read_count": 27981268, - "started_at_time": "2021-10-11T02:24:27Z", - "type": "nmdc:ReadQCAnalysisActivity", - "ended_at_time": "2021-10-11T04:33:17+00:00" - } - ], - "read_based_taxonomy_analysis_activity_set": [ - { - "_id": { - "$oid": "649b009bff710ae353f8cf3a" - }, - "has_input": [ - "nmdc:534c94e20d292a6bf09c0a42b550b4c2" - ], - "git_url": "https://github.com/microbiomedata/metaG/releases/tag/0.1", - "has_output": [ - "nmdc:7e79b2eba131ed6df71a56f47b1b901f", - "nmdc:bc82dcb8151fc20c22be71b6531a1fb2", - "nmdc:d5e45563875efca0653ba2dd47ee3d68", - "nmdc:bf5aa70f6ff14da2ef1393124ec29c4d", - "nmdc:61f1f6d57fd4d445682e25ec34901721", - "nmdc:7c31728fc2a51c8d202f9f74b1919886", - "nmdc:f36c2b28e63d21ca4d9e84035450c8e1", - "nmdc:e2939606fc9ff1c0046b333e1740f258", - "nmdc:d47144fd7ec0608e7677550d9589c889" - ], - "was_informed_by": "gold:Gp0127640", - "id": "nmdc:414c4647eddd8081308d92da2d59815e", - "execution_resource": "NERSC-Cori", - "name": "ReadBased Analysis Activity for nmdc:mga06rnc11", - "started_at_time": "2021-10-11T02:24:27Z", - "type": "nmdc:ReadbasedAnalysis", - "ended_at_time": "2021-10-11T04:33:17+00:00" - } - ], - "study_set": [], - "field_research_site_set": [], - "collecting_biosamples_from_site_set": [], - "date_created": null, - "etl_software_version": null, - "pooling_set": [] - }, - { - "functional_annotation_agg": [], - "library_preparation_set": [], - "processed_sample_set": [], - "extraction_set": [], - "activity_set": [], - "biosample_set": [], - "data_object_set": [ - { - "description": "Raw sequencer read data", - "file_size_bytes": 2061929348, - "type": "nmdc:DataObject", - "id": "jgi:574fde697ded5e3df1ee140a", - "name": "10533.1.165310.GCTACGT-AACGTAG.fastq.gz" - }, - { - "name": "Gp0127641_Filtered Reads", - "description": "Filtered Reads for Gp0127641", - "data_object_type": "Filtered Sequencing Reads", - "url": "https://data.microbiomedata.org/data/nmdc:mga0822t33/qa/nmdc_mga0822t33_filtered.fastq.gz", - "md5_checksum": "a2700afe93abad6f004a3701348622a2", - "id": "nmdc:a2700afe93abad6f004a3701348622a2", - "file_size_bytes": 1787020792 - }, - { - "name": "Gp0127641_Filtered Stats", - "description": "Filtered Stats for Gp0127641", - "data_object_type": "QC Statistics", - "url": "https://data.microbiomedata.org/data/nmdc:mga0822t33/qa/nmdc_mga0822t33_filterStats.txt", - "md5_checksum": "aaa9a8a3d8e147116953394a8755742d", - "id": "nmdc:aaa9a8a3d8e147116953394a8755742d", - "file_size_bytes": 289 - }, - { - "name": "Gp0127641_Gottcha2 TSV report", - "description": "Gottcha2 TSV report for Gp0127641", - "url": "https://data.microbiomedata.org/data/nmdc:mga0822t33/ReadbasedAnalysis/nmdc_mga0822t33_gottcha2_report.tsv", - "md5_checksum": "0d021c80bfd39c8293a8b355b8ff3605", - "id": "nmdc:0d021c80bfd39c8293a8b355b8ff3605", - "file_size_bytes": 3331 - }, - { - "name": "Gp0127641_Gottcha2 full TSV report", - "description": "Gottcha2 full TSV report for Gp0127641", - "url": "https://data.microbiomedata.org/data/nmdc:mga0822t33/ReadbasedAnalysis/nmdc_mga0822t33_gottcha2_report_full.tsv", - "md5_checksum": "a42312841b816448d8bd5d3adfa65f58", - "id": "nmdc:a42312841b816448d8bd5d3adfa65f58", - "file_size_bytes": 761359 - }, - { - "name": "Gp0127641_Gottcha2 Krona HTML report", - "description": "Gottcha2 Krona HTML report for Gp0127641", - "data_object_type": "GOTTCHA2 Krona Plot", - "url": "https://data.microbiomedata.org/data/nmdc:mga0822t33/ReadbasedAnalysis/nmdc_mga0822t33_gottcha2_krona.html", - "md5_checksum": "f473f4a99336a49105d2722888ae0510", - "id": "nmdc:f473f4a99336a49105d2722888ae0510", - "file_size_bytes": 236161 - }, - { - "name": "Gp0127641_Centrifuge classification TSV report", - "description": "Centrifuge classification TSV report for Gp0127641", - "data_object_type": "Centrifuge Taxonomic Classification", - "url": "https://data.microbiomedata.org/data/nmdc:mga0822t33/ReadbasedAnalysis/nmdc_mga0822t33_centrifuge_classification.tsv", - "md5_checksum": "ae51ea50660f44fa3b317a45f3015556", - "id": "nmdc:ae51ea50660f44fa3b317a45f3015556", - "file_size_bytes": 1635953327 - }, - { - "name": "Gp0127641_Centrifuge TSV report", - "description": "Centrifuge TSV report for Gp0127641", - "data_object_type": "Centrifuge Classification Report", - "url": "https://data.microbiomedata.org/data/nmdc:mga0822t33/ReadbasedAnalysis/nmdc_mga0822t33_centrifuge_report.tsv", - "md5_checksum": "ef39b44a90c8525e93f45e500b3ae934", - "id": "nmdc:ef39b44a90c8525e93f45e500b3ae934", - "file_size_bytes": 255166 - }, - { - "name": "Gp0127641_Centrifuge Krona HTML report", - "description": "Centrifuge Krona HTML report for Gp0127641", - "data_object_type": "Centrifuge Krona Plot", - "url": "https://data.microbiomedata.org/data/nmdc:mga0822t33/ReadbasedAnalysis/nmdc_mga0822t33_centrifuge_krona.html", - "md5_checksum": "e2653a4ce3f34c235ad7b01e87dd1016", - "id": "nmdc:e2653a4ce3f34c235ad7b01e87dd1016", - "file_size_bytes": 2332521 - }, - { - "name": "Gp0127641_Kraken classification TSV report", - "description": "Kraken classification TSV report for Gp0127641", - "data_object_type": "Kraken2 Taxonomic Classification", - "url": "https://data.microbiomedata.org/data/nmdc:mga0822t33/ReadbasedAnalysis/nmdc_mga0822t33_kraken2_classification.tsv", - "md5_checksum": "869730c4d81163e0c238dd4ae27ebd9e", - "id": "nmdc:869730c4d81163e0c238dd4ae27ebd9e", - "file_size_bytes": 1307934195 - }, - { - "name": "Gp0127641_Kraken2 TSV report", - "description": "Kraken2 TSV report for Gp0127641", - "data_object_type": "Kraken2 Classification Report", - "url": "https://data.microbiomedata.org/data/nmdc:mga0822t33/ReadbasedAnalysis/nmdc_mga0822t33_kraken2_report.tsv", - "md5_checksum": "dc193d1a1693589003f992c820606bab", - "id": "nmdc:dc193d1a1693589003f992c820606bab", - "file_size_bytes": 635050 - }, - { - "name": "Gp0127641_Kraken2 Krona HTML report", - "description": "Kraken2 Krona HTML report for Gp0127641", - "data_object_type": "Kraken2 Krona Plot", - "url": "https://data.microbiomedata.org/data/nmdc:mga0822t33/ReadbasedAnalysis/nmdc_mga0822t33_kraken2_krona.html", - "md5_checksum": "2f36b41c419efa1b1dfb6a9576b965ee", - "id": "nmdc:2f36b41c419efa1b1dfb6a9576b965ee", - "file_size_bytes": 3964515 - }, - { - "name": "Gp0127641_Assembled contigs fasta", - "description": "Assembled contigs fasta for Gp0127641", - "data_object_type": "Assembly Contigs", - "url": "https://data.microbiomedata.org/data/nmdc:mga0822t33/assembly/nmdc_mga0822t33_contigs.fna", - "md5_checksum": "18f0d53f503c855c0093677df58366e0", - "id": "nmdc:18f0d53f503c855c0093677df58366e0", - "file_size_bytes": 102384540 - }, - { - "name": "Gp0127641_Assembled scaffold fasta", - "description": "Assembled scaffold fasta for Gp0127641", - "data_object_type": "Assembly Scaffolds", - "url": "https://data.microbiomedata.org/data/nmdc:mga0822t33/assembly/nmdc_mga0822t33_scaffolds.fna", - "md5_checksum": "2fe3e02d47d8e1d66ccb15c0e42bf1e0", - "id": "nmdc:2fe3e02d47d8e1d66ccb15c0e42bf1e0", - "file_size_bytes": 101806869 - }, - { - "name": "Gp0127641_Metagenome Contig Coverage Stats", - "description": "Metagenome Contig Coverage Stats for Gp0127641", - "url": "https://data.microbiomedata.org/data/nmdc:mga0822t33/assembly/nmdc_mga0822t33_covstats.txt", - "md5_checksum": "04ad2128f72c26a4fa2d0ee7b1709ee9", - "id": "nmdc:04ad2128f72c26a4fa2d0ee7b1709ee9", - "file_size_bytes": 15204446 - }, - { - "name": "Gp0127641_Assembled AGP file", - "description": "Assembled AGP file for Gp0127641", - "data_object_type": "Assembly AGP", - "url": "https://data.microbiomedata.org/data/nmdc:mga0822t33/assembly/nmdc_mga0822t33_assembly.agp", - "md5_checksum": "b89858508c524a03011cd5191f7589fa", - "id": "nmdc:b89858508c524a03011cd5191f7589fa", - "file_size_bytes": 14206204 - }, - { - "name": "Gp0127641_Metagenome Alignment BAM file", - "description": "Metagenome Alignment BAM file for Gp0127641", - "data_object_type": "Assembly Coverage BAM", - "url": "https://data.microbiomedata.org/data/nmdc:mga0822t33/assembly/nmdc_mga0822t33_pairedMapped_sorted.bam", - "md5_checksum": "6974d394df454501e0515b31a2415367", - "id": "nmdc:6974d394df454501e0515b31a2415367", - "file_size_bytes": 1967753614 - }, - { - "name": "Gp0127641_Protein FAA", - "description": "Protein FAA for Gp0127641", - "data_object_type": "Annotation Amino Acid FASTA", - "url": "https://data.microbiomedata.org/data/nmdc:mga0822t33/annotation/nmdc_mga0822t33_proteins.faa", - "md5_checksum": "f33a2a1789f5e913c3ef0dd0440a4877", - "id": "nmdc:f33a2a1789f5e913c3ef0dd0440a4877", - "file_size_bytes": 57768168 - }, - { - "name": "Gp0127641_Structural annotation GFF file", - "description": "Structural annotation GFF file for Gp0127641", - "data_object_type": "Structural Annotation GFF", - "url": "https://data.microbiomedata.org/data/nmdc:mga0822t33/annotation/nmdc_mga0822t33_structural_annotation.gff", - "md5_checksum": "9aba4a0c78cb073609b129c4bb65fe2d", - "id": "nmdc:9aba4a0c78cb073609b129c4bb65fe2d", - "file_size_bytes": 2522 - }, - { - "name": "Gp0127641_Functional annotation GFF file", - "description": "Functional annotation GFF file for Gp0127641", - "data_object_type": "Functional Annotation GFF", - "url": "https://data.microbiomedata.org/data/nmdc:mga0822t33/annotation/nmdc_mga0822t33_functional_annotation.gff", - "md5_checksum": "2477ce1de68bdb1322eec1ffad5c74ac", - "id": "nmdc:2477ce1de68bdb1322eec1ffad5c74ac", - "file_size_bytes": 65167139 - }, - { - "name": "Gp0127641_KO TSV file", - "description": "KO TSV file for Gp0127641", - "data_object_type": "Annotation KEGG Orthology", - "url": "https://data.microbiomedata.org/data/nmdc:mga0822t33/annotation/nmdc_mga0822t33_ko.tsv", - "md5_checksum": "65768fea44cbd0183b286ab8f9883394", - "id": "nmdc:65768fea44cbd0183b286ab8f9883394", - "file_size_bytes": 7266122 - }, - { - "name": "Gp0127641_EC TSV file", - "description": "EC TSV file for Gp0127641", - "data_object_type": "Annotation Enzyme Commission", - "url": "https://data.microbiomedata.org/data/nmdc:mga0822t33/annotation/nmdc_mga0822t33_ec.tsv", - "md5_checksum": "b8ac75e77d2bc2607877e33ab692c43b", - "id": "nmdc:b8ac75e77d2bc2607877e33ab692c43b", - "file_size_bytes": 4793386 - }, - { - "name": "Gp0127641_COG GFF file", - "description": "COG GFF file for Gp0127641", - "url": "https://data.microbiomedata.org/data/nmdc:mga0822t33/annotation/nmdc_mga0822t33_cog.gff", - "md5_checksum": "31018e605b1569eb64006f2108b9d7d4", - "id": "nmdc:31018e605b1569eb64006f2108b9d7d4", - "file_size_bytes": 38184948 - }, - { - "name": "Gp0127641_PFAM GFF file", - "description": "PFAM GFF file for Gp0127641", - "url": "https://data.microbiomedata.org/data/nmdc:mga0822t33/annotation/nmdc_mga0822t33_pfam.gff", - "md5_checksum": "c7ee9f693971a7686d8ff701fddbcb4a", - "id": "nmdc:c7ee9f693971a7686d8ff701fddbcb4a", - "file_size_bytes": 28867184 - }, - { - "name": "Gp0127641_TigrFam GFF file", - "description": "TigrFam GFF file for Gp0127641", - "url": "https://data.microbiomedata.org/data/nmdc:mga0822t33/annotation/nmdc_mga0822t33_tigrfam.gff", - "md5_checksum": "5c0d5f63853ca572d8d73cac9a36c8d7", - "id": "nmdc:5c0d5f63853ca572d8d73cac9a36c8d7", - "file_size_bytes": 3122581 - }, - { - "name": "Gp0127641_SMART GFF file", - "description": "SMART GFF file for Gp0127641", - "url": "https://data.microbiomedata.org/data/nmdc:mga0822t33/annotation/nmdc_mga0822t33_smart.gff", - "md5_checksum": "058c5e17eeeea69b2bf0b1b3c2838aea", - "id": "nmdc:058c5e17eeeea69b2bf0b1b3c2838aea", - "file_size_bytes": 8368877 - }, - { - "name": "Gp0127641_SuperFam GFF file", - "description": "SuperFam GFF file for Gp0127641", - "url": "https://data.microbiomedata.org/data/nmdc:mga0822t33/annotation/nmdc_mga0822t33_supfam.gff", - "md5_checksum": "b836f94d526c1936d080a4aa7c0646c9", - "id": "nmdc:b836f94d526c1936d080a4aa7c0646c9", - "file_size_bytes": 47986944 - }, - { - "name": "Gp0127641_Cath FunFam GFF file", - "description": "Cath FunFam GFF file for Gp0127641", - "url": "https://data.microbiomedata.org/data/nmdc:mga0822t33/annotation/nmdc_mga0822t33_cath_funfam.gff", - "md5_checksum": "0100d09c52d0c243b5ae45d95e6a22dc", - "id": "nmdc:0100d09c52d0c243b5ae45d95e6a22dc", - "file_size_bytes": 36349993 - }, - { - "name": "Gp0127641_KO_EC GFF file", - "description": "KO_EC GFF file for Gp0127641", - "url": "https://data.microbiomedata.org/data/nmdc:mga0822t33/annotation/nmdc_mga0822t33_ko_ec.gff", - "md5_checksum": "64b87140003d1a5a3d9ac939be55e57d", - "id": "nmdc:64b87140003d1a5a3d9ac939be55e57d", - "file_size_bytes": 23113010 - }, - { - "name": "gold:Gp0452679_metabat2 bin checkm quality assessment result", - "description": "metabat2 bin checkm quality assessment result for gold:Gp0452679", - "data_object_type": "CheckM Statistics", - "url": "https://data.microbiomedata.org/data/nmdc:mga0fsjy84/MAGs/nmdc_mga0fsjy84_checkm_qa.out", - "md5_checksum": "d41d8cd98f00b204e9800998ecf8427e", - "id": "nmdc:d41d8cd98f00b204e9800998ecf8427e", - "file_size_bytes": 0 - }, - { - "name": "Gp0127641_tooShort (< 3kb) filtered contigs fasta file by metaBat2", - "description": "tooShort (< 3kb) filtered contigs fasta file by metaBat2 for Gp0127641", - "url": "https://data.microbiomedata.org/data/nmdc:mga0822t33/MAGs/nmdc_mga0822t33_bins.tooShort.fa", - "md5_checksum": "024b6771e169aeaf57a3b10acc6045a1", - "id": "nmdc:024b6771e169aeaf57a3b10acc6045a1", - "file_size_bytes": 80852741 - }, - { - "name": "Gp0127641_unbinned fasta file from metabat2", - "description": "unbinned fasta file from metabat2 for Gp0127641", - "url": "https://data.microbiomedata.org/data/nmdc:mga0822t33/MAGs/nmdc_mga0822t33_bins.unbinned.fa", - "md5_checksum": "545cd253ad26116236dec9937b32d8ef", - "id": "nmdc:545cd253ad26116236dec9937b32d8ef", - "file_size_bytes": 19497941 - }, - { - "name": "Gp0127641_metabat2 bin checkm quality assessment result", - "description": "metabat2 bin checkm quality assessment result for Gp0127641", - "data_object_type": "CheckM Statistics", - "url": "https://data.microbiomedata.org/data/nmdc:mga0822t33/MAGs/nmdc_mga0822t33_checkm_qa.out", - "md5_checksum": "1785cfe7cf0546dc8702193921a2f566", - "id": "nmdc:1785cfe7cf0546dc8702193921a2f566", - "file_size_bytes": 936 - }, - { - "name": "Gp0127641_high-quality and medium-quality bins", - "description": "high-quality and medium-quality bins for Gp0127641", - "data_object_type": "Metagenome Bins", - "url": "https://data.microbiomedata.org/data/nmdc:mga0822t33/MAGs/nmdc_mga0822t33_hqmq_bin.zip", - "md5_checksum": "0a2a5650358b51ffcd3bbcfc874ac5c9", - "id": "nmdc:0a2a5650358b51ffcd3bbcfc874ac5c9", - "file_size_bytes": 182 - }, - { - "name": "Gp0127641_metabat2 bins", - "description": "metabat2 bins for Gp0127641", - "url": "https://data.microbiomedata.org/data/nmdc:mga0822t33/MAGs/nmdc_mga0822t33_metabat_bin.zip", - "md5_checksum": "8f6b89831cabcd1dc7aa5e26d87f5063", - "id": "nmdc:8f6b89831cabcd1dc7aa5e26d87f5063", - "file_size_bytes": 625863 - }, - { - "_id": { - "$oid": "649b003d1ae706d7b5b14e95" - }, - "description": "Assembled contigs fasta for gold:Gp0127641", - "url": "https://data.microbiomedata.org/data/1781_100343/assembly/assembly_contigs.fna", - "file_size_bytes": 101616916, - "type": "nmdc:DataObject", - "id": "nmdc:a707d24e95ee536650d1cc70bbf997d8", - "name": "assembly_contigs.fna", - "data_object_type": "Assembly Contigs" - }, - { - "_id": { - "$oid": "649b003d1ae706d7b5b14e97" - }, - "description": "Metagenome Alignment BAM file for gold:Gp0127641", - "url": "https://data.microbiomedata.org/data/1781_100343/assembly/pairedMapped_sorted.bam", - "file_size_bytes": 1939284551, - "type": "nmdc:DataObject", - "id": "nmdc:af117c2397f282c3f1d319c499d72b01", - "name": "pairedMapped_sorted.bam", - "data_object_type": "Assembly Coverage BAM" - }, - { - "_id": { - "$oid": "649b003d1ae706d7b5b14e99" - }, - "description": "Assembled AGP file for gold:Gp0127641", - "url": "https://data.microbiomedata.org/data/1781_100343/assembly/assembly.agp", - "file_size_bytes": 12669908, - "type": "nmdc:DataObject", - "id": "nmdc:662fa061e9042db360dd7981f6068505", - "name": "assembly.agp", - "data_object_type": "Assembly AGP" - }, - { - "_id": { - "$oid": "649b003d1ae706d7b5b14e9a" - }, - "description": "Assembled scaffold fasta for gold:Gp0127641", - "url": "https://data.microbiomedata.org/data/1781_100343/assembly/assembly_scaffolds.fna", - "file_size_bytes": 101039761, - "type": "nmdc:DataObject", - "id": "nmdc:3c5870bf66d9acb165352c67638b29c8", - "name": "assembly_scaffolds.fna", - "data_object_type": "Assembly Scaffolds" - }, - { - "_id": { - "$oid": "649b003d1ae706d7b5b14e9e" - }, - "description": "Metagenome Contig Coverage Stats for gold:Gp0127641", - "url": "https://data.microbiomedata.org/data/1781_100343/assembly/mapping_stats.txt", - "file_size_bytes": 14436822, - "type": "nmdc:DataObject", - "id": "nmdc:bb3f818e2f6299570c76a7ea96fcf7e4", - "name": "mapping_stats.txt", - "data_object_type": "Assembly Coverage Stats" - }, - { - "_id": { - "$oid": "649b003d1ae706d7b5b15b84" - }, - "id": "nmdc:f6cd4b98b207dc9f70dcfa063d4afb92", - "name": "1781_100343.krona.html", - "description": "Gold:Gp0127641 KRONA plot HTML file", - "url": "https://data.microbiomedata.org/1781_100343/ReadbasedAnalysis/centrifuge/1781_100343.krona.html", - "file_size_bytes": 3385, - "data_object_type": "Centrifuge Krona Plot", - "type": "nmdc:DataObject" - }, - { - "_id": { - "$oid": "649b003d1ae706d7b5b15b8a" - }, - "id": "nmdc:48c8c09803af12f6092d895de5a1eff9", - "name": "1781_100343.json", - "description": "Gold:Gp0127641 ReadbasedAnalysis result JSON file", - "url": "https://data.microbiomedata.org/1781_100343/ReadbasedAnalysis/1781_100343.json", - "file_size_bytes": 3385, - "type": "nmdc:DataObject" - }, - { - "_id": { - "$oid": "649b003f1ae706d7b5b16610" - }, - "id": "nmdc:ede01d68f85f6183407fe751475b2350", - "name": "bins.tooShort.fa", - "description": "tooShort (< 3kb) filtered contigs fasta file by metaBat2 for gold:Gp0127641", - "file_size_bytes": 78611268, - "url": "https://data.microbiomedata.org/data/1781_100343/img_MAGs/bins.tooShort.fa", - "type": "nmdc:DataObject" - }, - { - "_id": { - "$oid": "649b003f1ae706d7b5b16611" - }, - "id": "nmdc:af8a167fb92c9470eaa77ed8617d454d", - "name": "checkm_qa.out", - "description": "metabat2 bin checkm quality assessment result for gold:Gp0127641", - "file_size_bytes": 930, - "url": "https://data.microbiomedata.org/data/1781_100343/img_MAGs/checkm_qa.out", - "type": "nmdc:DataObject", - "data_object_type": "CheckM Statistics" - }, - { - "_id": { - "$oid": "649b003f1ae706d7b5b16614" - }, - "id": "nmdc:55b42e3f671bfeab937473ef45b55b4b", - "name": "gold:Gp0127641.bins.2.fa", - "description": "metabat2 binned contig file for gold:Gp0127641", - "file_size_bytes": 434241, - "url": "https://data.microbiomedata.org/data/1781_100343/img_MAGs/metabat-bins/bins.2.fa", - "type": "nmdc:DataObject", - "data_object_type": "Metagenome Bins" - }, - { - "_id": { - "$oid": "649b003f1ae706d7b5b1661a" - }, - "id": "nmdc:9e790d1073c174456b4d98661bf92d81", - "name": "gold:Gp0127641.bins.1.fa", - "description": "metabat2 binned contig file for gold:Gp0127641", - "file_size_bytes": 740186, - "url": "https://data.microbiomedata.org/data/1781_100343/img_MAGs/metabat-bins/bins.1.fa", - "type": "nmdc:DataObject", - "data_object_type": "Metagenome Bins" - }, - { - "_id": { - "$oid": "649b003f1ae706d7b5b16628" - }, - "id": "nmdc:5d6128d308651aad814210c9a3a28f3b", - "name": "bins.unbinned.fa", - "description": "unbinned fasta file from metabat2 for gold:Gp0127641", - "file_size_bytes": 20215397, - "url": "https://data.microbiomedata.org/data/1781_100343/img_MAGs/bins.unbinned.fa", - "type": "nmdc:DataObject" - }, - { - "_id": { - "$oid": "649b00401ae706d7b5b16d17" - }, - "description": "Structural annotation GFF file for gold:Gp0127641", - "url": "https://data.microbiomedata.org/1781_100343/img_annotation/Ga0482229_structural_annotation.gff", - "md5_checksum": "a33ac2dc640b7088767a99517f22421f", - "file_size_bytes": 3385, - "id": "nmdc:a33ac2dc640b7088767a99517f22421f", - "name": "gold:Gp0127641_Structural annotation GFF file", - "data_object_type": "Structural Annotation GFF", - "type": "nmdc:DataObject" - }, - { - "_id": { - "$oid": "649b00401ae706d7b5b16d18" - }, - "description": "KO TSV File for gold:Gp0127641", - "url": "https://data.microbiomedata.org/1781_100343/img_annotation/Ga0482229_ko.tsv", - "md5_checksum": "4ec0cbf7d166057c3d2904b2dd2f6b15", - "file_size_bytes": 3385, - "id": "nmdc:4ec0cbf7d166057c3d2904b2dd2f6b15", - "name": "gold:Gp0127641_KO TSV File", - "data_object_type": "Annotation KEGG Orthology", - "type": "nmdc:DataObject" - }, - { - "_id": { - "$oid": "649b00401ae706d7b5b16d2d" - }, - "description": "Protein FAA for gold:Gp0127641", - "url": "https://data.microbiomedata.org/1781_100343/img_annotation/Ga0482229_proteins.faa", - "md5_checksum": "d10b0c9b0d5e646d09c570eb2e08b793", - "file_size_bytes": 3385, - "id": "nmdc:d10b0c9b0d5e646d09c570eb2e08b793", - "name": "gold:Gp0127641_Protein FAA", - "data_object_type": "Annotation Amino Acid FASTA", - "type": "nmdc:DataObject" - }, - { - "_id": { - "$oid": "649b00401ae706d7b5b16d31" - }, - "description": "EC TSV File for gold:Gp0127641", - "url": "https://data.microbiomedata.org/1781_100343/img_annotation/Ga0482229_ec.tsv", - "md5_checksum": "71306193abf043865cafa413b3ca9c1e", - "file_size_bytes": 3385, - "id": "nmdc:71306193abf043865cafa413b3ca9c1e", - "name": "gold:Gp0127641_EC TSV File", - "data_object_type": "Annotation Enzyme Commission", - "type": "nmdc:DataObject" - }, - { - "_id": { - "$oid": "649b00401ae706d7b5b16d33" - }, - "description": "Functional annotation GFF file for gold:Gp0127641", - "url": "https://data.microbiomedata.org/1781_100343/img_annotation/Ga0482229_functional_annotation.gff", - "md5_checksum": "11d4524c896f4fd678ff05a0547b6b52", - "file_size_bytes": 3385, - "id": "nmdc:11d4524c896f4fd678ff05a0547b6b52", - "name": "gold:Gp0127641_Functional annotation GFF file", - "data_object_type": "Functional Annotation GFF", - "type": "nmdc:DataObject" - } - ], - "dissolving_activity_set": [], - "functional_annotation_set": [], - "genome_feature_set": [], - "mags_activity_set": [ - { - "_id": { - "$oid": "649b0052ec087f6bbab34719" - }, - "has_input": [ - "nmdc:18f0d53f503c855c0093677df58366e0", - "nmdc:6974d394df454501e0515b31a2415367", - "nmdc:2477ce1de68bdb1322eec1ffad5c74ac" - ], - "too_short_contig_num": 179152, - "part_of": [ - "nmdc:mga0822t33" - ], - "binned_contig_num": 464, - "git_url": "https://github.com/microbiomedata/metaG/releases/tag/0.1", - "has_output": [ - "nmdc:d41d8cd98f00b204e9800998ecf8427e", - "nmdc:024b6771e169aeaf57a3b10acc6045a1", - "nmdc:545cd253ad26116236dec9937b32d8ef", - "nmdc:1785cfe7cf0546dc8702193921a2f566", - "nmdc:0a2a5650358b51ffcd3bbcfc874ac5c9", - "nmdc:8f6b89831cabcd1dc7aa5e26d87f5063" - ], - "was_informed_by": "gold:Gp0127641", - "input_contig_num": 191906, - "id": "nmdc:363fe7a0dd914e046b274fea70625c52", - "execution_resource": "NERSC-Cori", - "name": "MAGs Analysis Activity for nmdc:mga0822t33", - "mags_list": [ - { - "number_of_contig": 142, - "completeness": 24.43, - "bin_name": "bins.1", - "gene_count": 832, - "bin_quality": "LQ", - "gtdbtk_species": "", - "gtdbtk_order": "", - "num_16s": 1, - "gtdbtk_family": "", - "gtdbtk_domain": "", - "contamination": 0.0, - "gtdbtk_class": "", - "gtdbtk_phylum": "", - "num_5s": 0, - "num_23s": 0, - "gtdbtk_genus": "", - "num_t_rna": 15 - }, - { - "number_of_contig": 322, - "completeness": 46.21, - "bin_name": "bins.2", - "gene_count": 1652, - "bin_quality": "LQ", - "gtdbtk_species": "", - "gtdbtk_order": "", - "num_16s": 0, - "gtdbtk_family": "", - "gtdbtk_domain": "", - "contamination": 0.0, - "gtdbtk_class": "", - "gtdbtk_phylum": "", - "num_5s": 1, - "num_23s": 1, - "gtdbtk_genus": "", - "num_t_rna": 21 - } - ], - "unbinned_contig_num": 12290, - "started_at_time": "2021-10-11T02:27:18Z", - "type": "nmdc:MAGsAnalysisActivity", - "ended_at_time": "2021-10-11T04:05:47+00:00" - } - ], - "material_sample_set": [], - "material_sampling_activity_set": [], - "metabolomics_analysis_activity_set": [], - "metagenome_annotation_activity_set": [ - { - "_id": { - "$oid": "649b005bbf2caae0415ef9b9" - }, - "has_input": [ - "nmdc:18f0d53f503c855c0093677df58366e0" - ], - "part_of": [ - "nmdc:mga0822t33" - ], - "git_url": "https://github.com/microbiomedata/metaG/releases/tag/0.1", - "has_output": [ - "nmdc:f33a2a1789f5e913c3ef0dd0440a4877", - "nmdc:9aba4a0c78cb073609b129c4bb65fe2d", - "nmdc:2477ce1de68bdb1322eec1ffad5c74ac", - "nmdc:65768fea44cbd0183b286ab8f9883394", - "nmdc:b8ac75e77d2bc2607877e33ab692c43b", - "nmdc:31018e605b1569eb64006f2108b9d7d4", - "nmdc:c7ee9f693971a7686d8ff701fddbcb4a", - "nmdc:5c0d5f63853ca572d8d73cac9a36c8d7", - "nmdc:058c5e17eeeea69b2bf0b1b3c2838aea", - "nmdc:b836f94d526c1936d080a4aa7c0646c9", - "nmdc:0100d09c52d0c243b5ae45d95e6a22dc", - "nmdc:64b87140003d1a5a3d9ac939be55e57d" - ], - "was_informed_by": "gold:Gp0127641", - "id": "nmdc:363fe7a0dd914e046b274fea70625c52", - "execution_resource": "NERSC-Cori", - "name": "Annotation Activity for nmdc:mga0822t33", - "started_at_time": "2021-10-11T02:27:18Z", - "type": "nmdc:MetagenomeAnnotationActivity", - "ended_at_time": "2021-10-11T04:05:47+00:00" - } - ], - "metagenome_assembly_set": [ - { - "_id": { - "$oid": "649b005f2ca5ee4adb139fa2" - }, - "has_input": [ - "nmdc:a2700afe93abad6f004a3701348622a2" - ], - "part_of": [ - "nmdc:mga0822t33" - ], - "ctg_logsum": 224925, - "scaf_logsum": 225846, - "gap_pct": 0.00137, - "git_url": "https://github.com/microbiomedata/metaG/releases/tag/0.1", - "has_output": [ - "nmdc:18f0d53f503c855c0093677df58366e0", - "nmdc:2fe3e02d47d8e1d66ccb15c0e42bf1e0", - "nmdc:04ad2128f72c26a4fa2d0ee7b1709ee9", - "nmdc:b89858508c524a03011cd5191f7589fa", - "nmdc:6974d394df454501e0515b31a2415367" - ], - "asm_score": 3.367, - "was_informed_by": "gold:Gp0127641", - "ctg_powsum": 24264, - "scaf_max": 18020, - "id": "nmdc:363fe7a0dd914e046b274fea70625c52", - "scaf_powsum": 24365, - "execution_resource": "NERSC-Cori", - "contigs": 191907, - "name": "Assembly Activity for nmdc:mga0822t33", - "ctg_max": 18020, - "gc_std": 0.10192, - "contig_bp": 94878155, - "gc_avg": 0.61857, - "started_at_time": "2021-10-11T02:27:18Z", - "scaf_bp": 94879455, - "type": "nmdc:MetagenomeAssembly", - "scaffolds": 191777, - "ended_at_time": "2021-10-11T04:05:47+00:00", - "ctg_l50": 489, - "ctg_l90": 290, - "ctg_n50": 53038, - "ctg_n90": 159679, - "scaf_l50": 489, - "scaf_l90": 290, - "scaf_n50": 53021, - "scaf_n90": 159560 - } - ], - "metagenome_sequencing_activity_set": [], - "metaproteomics_analysis_activity_set": [], - "metatranscriptome_activity_set": [], - "nom_analysis_activity_set": [], - "omics_processing_set": [ - { - "_id": { - "$oid": "649b009773e8249959349b55" - }, - "id": "nmdc:omprc-11-p0jdew93", - "name": "Riverbed sediment microbial communities from areas with vegetation nearby in Columbia River, Washington, USA - GW-RW S1_10_20", - "description": "Riverbed sediment samples were collected from an area with a lot of surrounding vegetation in a hyporheic zone (subsurface groundwater - surface water mixing zone)", - "has_input": [ - "nmdc:bsm-11-fgtanh42" - ], - "has_output": [ - "jgi:574fde697ded5e3df1ee140a" - ], - "part_of": [ - "nmdc:sty-11-aygzgv51" - ], - "add_date": "2016-01-11", - "mod_date": "2021-06-15", - "ncbi_project_name": "Riverbed sediment microbial communities from areas with vegetation nearby in Columbia River, Washington, USA - GW-RW S1_10_20", - "omics_type": { - "has_raw_value": "Metagenome" - }, - "principal_investigator": { - "has_raw_value": "James Stegen" - }, - "processing_institution": "JGI", - "type": "nmdc:OmicsProcessing", - "gold_sequencing_project_identifiers": [ - "gold:Gp0127641" - ] - } - ], - "reaction_activity_set": [], - "read_qc_analysis_activity_set": [ - { - "_id": { - "$oid": "649b009d6bdd4fd20273c872" - }, - "has_input": [ - "nmdc:c59690f54a7afb65869c9c683e3eef7f" - ], - "part_of": [ - "nmdc:mga0822t33" - ], - "git_url": "https://github.com/microbiomedata/metaG/releases/tag/0.1", - "has_output": [ - "nmdc:a2700afe93abad6f004a3701348622a2", - "nmdc:aaa9a8a3d8e147116953394a8755742d" - ], - "was_informed_by": "gold:Gp0127641", - "input_read_count": 24261468, - "output_read_bases": 3340338011, - "id": "nmdc:363fe7a0dd914e046b274fea70625c52", - "execution_resource": "NERSC-Cori", - "input_read_bases": 3663481668, - "name": "Read QC Activity for nmdc:mga0822t33", - "output_read_count": 22362924, - "started_at_time": "2021-10-11T02:27:18Z", - "type": "nmdc:ReadQCAnalysisActivity", - "ended_at_time": "2021-10-11T04:05:47+00:00" - } - ], - "read_based_taxonomy_analysis_activity_set": [ - { - "_id": { - "$oid": "649b009bff710ae353f8cf37" - }, - "has_input": [ - "nmdc:a2700afe93abad6f004a3701348622a2" - ], - "git_url": "https://github.com/microbiomedata/metaG/releases/tag/0.1", - "has_output": [ - "nmdc:0d021c80bfd39c8293a8b355b8ff3605", - "nmdc:a42312841b816448d8bd5d3adfa65f58", - "nmdc:f473f4a99336a49105d2722888ae0510", - "nmdc:ae51ea50660f44fa3b317a45f3015556", - "nmdc:ef39b44a90c8525e93f45e500b3ae934", - "nmdc:e2653a4ce3f34c235ad7b01e87dd1016", - "nmdc:869730c4d81163e0c238dd4ae27ebd9e", - "nmdc:dc193d1a1693589003f992c820606bab", - "nmdc:2f36b41c419efa1b1dfb6a9576b965ee" - ], - "was_informed_by": "gold:Gp0127641", - "id": "nmdc:363fe7a0dd914e046b274fea70625c52", - "execution_resource": "NERSC-Cori", - "name": "ReadBased Analysis Activity for nmdc:mga0822t33", - "started_at_time": "2021-10-11T02:27:18Z", - "type": "nmdc:ReadbasedAnalysis", - "ended_at_time": "2021-10-11T04:05:47+00:00" - } - ], - "study_set": [], - "field_research_site_set": [], - "collecting_biosamples_from_site_set": [], - "date_created": null, - "etl_software_version": null, - "pooling_set": [] - }, - { - "functional_annotation_agg": [], - "library_preparation_set": [], - "processed_sample_set": [], - "extraction_set": [], - "activity_set": [], - "biosample_set": [], - "data_object_set": [ - { - "description": "Raw sequencer read data", - "file_size_bytes": 2168673471, - "type": "nmdc:DataObject", - "id": "jgi:574fde6c7ded5e3df1ee140c", - "name": "10533.1.165310.TCCGAGT-AACTCGG.fastq.gz" - }, - { - "name": "Gp0127643_Filtered Reads", - "description": "Filtered Reads for Gp0127643", - "data_object_type": "Filtered Sequencing Reads", - "url": "https://data.microbiomedata.org/data/nmdc:mga0evc178/qa/nmdc_mga0evc178_filtered.fastq.gz", - "md5_checksum": "2ef23543e3064ca73c3034713d87c026", - "id": "nmdc:2ef23543e3064ca73c3034713d87c026", - "file_size_bytes": 1891088172 - }, - { - "name": "Gp0127643_Filtered Stats", - "description": "Filtered Stats for Gp0127643", - "data_object_type": "QC Statistics", - "url": "https://data.microbiomedata.org/data/nmdc:mga0evc178/qa/nmdc_mga0evc178_filterStats.txt", - "md5_checksum": "87b172ead58a37be8d199c0acfc96759", - "id": "nmdc:87b172ead58a37be8d199c0acfc96759", - "file_size_bytes": 289 - }, - { - "name": "Gp0127643_Gottcha2 TSV report", - "description": "Gottcha2 TSV report for Gp0127643", - "url": "https://data.microbiomedata.org/data/nmdc:mga0evc178/ReadbasedAnalysis/nmdc_mga0evc178_gottcha2_report.tsv", - "md5_checksum": "e8f825653e5736e29b73de55bd11a270", - "id": "nmdc:e8f825653e5736e29b73de55bd11a270", - "file_size_bytes": 1326 - }, - { - "name": "Gp0127643_Gottcha2 full TSV report", - "description": "Gottcha2 full TSV report for Gp0127643", - "url": "https://data.microbiomedata.org/data/nmdc:mga0evc178/ReadbasedAnalysis/nmdc_mga0evc178_gottcha2_report_full.tsv", - "md5_checksum": "99bb1311b220e9a03da619fe5fb58f0f", - "id": "nmdc:99bb1311b220e9a03da619fe5fb58f0f", - "file_size_bytes": 664131 - }, - { - "name": "Gp0127643_Gottcha2 Krona HTML report", - "description": "Gottcha2 Krona HTML report for Gp0127643", - "data_object_type": "GOTTCHA2 Krona Plot", - "url": "https://data.microbiomedata.org/data/nmdc:mga0evc178/ReadbasedAnalysis/nmdc_mga0evc178_gottcha2_krona.html", - "md5_checksum": "5c97bc15d4d5999f140664b3b2777c6d", - "id": "nmdc:5c97bc15d4d5999f140664b3b2777c6d", - "file_size_bytes": 229630 - }, - { - "name": "Gp0127643_Centrifuge classification TSV report", - "description": "Centrifuge classification TSV report for Gp0127643", - "data_object_type": "Centrifuge Taxonomic Classification", - "url": "https://data.microbiomedata.org/data/nmdc:mga0evc178/ReadbasedAnalysis/nmdc_mga0evc178_centrifuge_classification.tsv", - "md5_checksum": "c9074b2e05765afd68463dc301b87995", - "id": "nmdc:c9074b2e05765afd68463dc301b87995", - "file_size_bytes": 1726867547 - }, - { - "name": "Gp0127643_Centrifuge TSV report", - "description": "Centrifuge TSV report for Gp0127643", - "data_object_type": "Centrifuge Classification Report", - "url": "https://data.microbiomedata.org/data/nmdc:mga0evc178/ReadbasedAnalysis/nmdc_mga0evc178_centrifuge_report.tsv", - "md5_checksum": "ed2c05d1702a9a811b8a98de748bc82a", - "id": "nmdc:ed2c05d1702a9a811b8a98de748bc82a", - "file_size_bytes": 254021 - }, - { - "name": "Gp0127643_Centrifuge Krona HTML report", - "description": "Centrifuge Krona HTML report for Gp0127643", - "data_object_type": "Centrifuge Krona Plot", - "url": "https://data.microbiomedata.org/data/nmdc:mga0evc178/ReadbasedAnalysis/nmdc_mga0evc178_centrifuge_krona.html", - "md5_checksum": "6465fe59472b111ead1f0414ccf39f62", - "id": "nmdc:6465fe59472b111ead1f0414ccf39f62", - "file_size_bytes": 2331702 - }, - { - "name": "Gp0127643_Kraken classification TSV report", - "description": "Kraken classification TSV report for Gp0127643", - "data_object_type": "Kraken2 Taxonomic Classification", - "url": "https://data.microbiomedata.org/data/nmdc:mga0evc178/ReadbasedAnalysis/nmdc_mga0evc178_kraken2_classification.tsv", - "md5_checksum": "9855ca52bce074c34dcebfd154fa94ff", - "id": "nmdc:9855ca52bce074c34dcebfd154fa94ff", - "file_size_bytes": 1376409913 - }, - { - "name": "Gp0127643_Kraken2 TSV report", - "description": "Kraken2 TSV report for Gp0127643", - "data_object_type": "Kraken2 Classification Report", - "url": "https://data.microbiomedata.org/data/nmdc:mga0evc178/ReadbasedAnalysis/nmdc_mga0evc178_kraken2_report.tsv", - "md5_checksum": "ed8059f366d60112deb41a0c307bc6fc", - "id": "nmdc:ed8059f366d60112deb41a0c307bc6fc", - "file_size_bytes": 640506 - }, - { - "name": "Gp0127643_Kraken2 Krona HTML report", - "description": "Kraken2 Krona HTML report for Gp0127643", - "data_object_type": "Kraken2 Krona Plot", - "url": "https://data.microbiomedata.org/data/nmdc:mga0evc178/ReadbasedAnalysis/nmdc_mga0evc178_kraken2_krona.html", - "md5_checksum": "f98bae155bced880c058ecde7d539c18", - "id": "nmdc:f98bae155bced880c058ecde7d539c18", - "file_size_bytes": 3998448 - }, - { - "name": "Gp0127643_Assembled contigs fasta", - "description": "Assembled contigs fasta for Gp0127643", - "data_object_type": "Assembly Contigs", - "url": "https://data.microbiomedata.org/data/nmdc:mga0evc178/assembly/nmdc_mga0evc178_contigs.fna", - "md5_checksum": "a3a85f9f946ff34f28dfd4b5f8590f23", - "id": "nmdc:a3a85f9f946ff34f28dfd4b5f8590f23", - "file_size_bytes": 112772885 - }, - { - "name": "Gp0127643_Assembled scaffold fasta", - "description": "Assembled scaffold fasta for Gp0127643", - "data_object_type": "Assembly Scaffolds", - "url": "https://data.microbiomedata.org/data/nmdc:mga0evc178/assembly/nmdc_mga0evc178_scaffolds.fna", - "md5_checksum": "001fd34d98a73eee6be5a41004e67469", - "id": "nmdc:001fd34d98a73eee6be5a41004e67469", - "file_size_bytes": 112143079 - }, - { - "name": "Gp0127643_Metagenome Contig Coverage Stats", - "description": "Metagenome Contig Coverage Stats for Gp0127643", - "url": "https://data.microbiomedata.org/data/nmdc:mga0evc178/assembly/nmdc_mga0evc178_covstats.txt", - "md5_checksum": "9b45294f72cb55b2f039366d33183fa3", - "id": "nmdc:9b45294f72cb55b2f039366d33183fa3", - "file_size_bytes": 16563197 - }, - { - "name": "Gp0127643_Assembled AGP file", - "description": "Assembled AGP file for Gp0127643", - "data_object_type": "Assembly AGP", - "url": "https://data.microbiomedata.org/data/nmdc:mga0evc178/assembly/nmdc_mga0evc178_assembly.agp", - "md5_checksum": "b2ec4f5a3f02869684bdfaf065d75c54", - "id": "nmdc:b2ec4f5a3f02869684bdfaf065d75c54", - "file_size_bytes": 15493398 - }, - { - "name": "Gp0127643_Metagenome Alignment BAM file", - "description": "Metagenome Alignment BAM file for Gp0127643", - "data_object_type": "Assembly Coverage BAM", - "url": "https://data.microbiomedata.org/data/nmdc:mga0evc178/assembly/nmdc_mga0evc178_pairedMapped_sorted.bam", - "md5_checksum": "fa61e18d49a2012f115d970f0a195986", - "id": "nmdc:fa61e18d49a2012f115d970f0a195986", - "file_size_bytes": 2085429752 - }, - { - "name": "Gp0127643_Protein FAA", - "description": "Protein FAA for Gp0127643", - "data_object_type": "Annotation Amino Acid FASTA", - "url": "https://data.microbiomedata.org/data/nmdc:mga0evc178/annotation/nmdc_mga0evc178_proteins.faa", - "md5_checksum": "b2cd0d1a024094fd4e308c21d439ed5f", - "id": "nmdc:b2cd0d1a024094fd4e308c21d439ed5f", - "file_size_bytes": 63917762 - }, - { - "name": "Gp0127643_Structural annotation GFF file", - "description": "Structural annotation GFF file for Gp0127643", - "data_object_type": "Structural Annotation GFF", - "url": "https://data.microbiomedata.org/data/nmdc:mga0evc178/annotation/nmdc_mga0evc178_structural_annotation.gff", - "md5_checksum": "6151bacd37618698c28b00151b4998f8", - "id": "nmdc:6151bacd37618698c28b00151b4998f8", - "file_size_bytes": 2521 - }, - { - "name": "Gp0127643_Functional annotation GFF file", - "description": "Functional annotation GFF file for Gp0127643", - "data_object_type": "Functional Annotation GFF", - "url": "https://data.microbiomedata.org/data/nmdc:mga0evc178/annotation/nmdc_mga0evc178_functional_annotation.gff", - "md5_checksum": "744277086ab01222a91233536d5e8976", - "id": "nmdc:744277086ab01222a91233536d5e8976", - "file_size_bytes": 71811800 - }, - { - "name": "Gp0127643_KO TSV file", - "description": "KO TSV file for Gp0127643", - "data_object_type": "Annotation KEGG Orthology", - "url": "https://data.microbiomedata.org/data/nmdc:mga0evc178/annotation/nmdc_mga0evc178_ko.tsv", - "md5_checksum": "9c8a359c69bcb1179241f9a3c727fa23", - "id": "nmdc:9c8a359c69bcb1179241f9a3c727fa23", - "file_size_bytes": 7959243 - }, - { - "name": "Gp0127643_EC TSV file", - "description": "EC TSV file for Gp0127643", - "data_object_type": "Annotation Enzyme Commission", - "url": "https://data.microbiomedata.org/data/nmdc:mga0evc178/annotation/nmdc_mga0evc178_ec.tsv", - "md5_checksum": "027b72af172d078f88471d932cf6d473", - "id": "nmdc:027b72af172d078f88471d932cf6d473", - "file_size_bytes": 5202338 - }, - { - "name": "Gp0127643_COG GFF file", - "description": "COG GFF file for Gp0127643", - "url": "https://data.microbiomedata.org/data/nmdc:mga0evc178/annotation/nmdc_mga0evc178_cog.gff", - "md5_checksum": "ff24990735aa002e828ff7204a456ad2", - "id": "nmdc:ff24990735aa002e828ff7204a456ad2", - "file_size_bytes": 41649279 - }, - { - "name": "Gp0127643_PFAM GFF file", - "description": "PFAM GFF file for Gp0127643", - "url": "https://data.microbiomedata.org/data/nmdc:mga0evc178/annotation/nmdc_mga0evc178_pfam.gff", - "md5_checksum": "e884ad501d1bb3bcf006f0999020ce0f", - "id": "nmdc:e884ad501d1bb3bcf006f0999020ce0f", - "file_size_bytes": 31529168 - }, - { - "name": "Gp0127643_TigrFam GFF file", - "description": "TigrFam GFF file for Gp0127643", - "url": "https://data.microbiomedata.org/data/nmdc:mga0evc178/annotation/nmdc_mga0evc178_tigrfam.gff", - "md5_checksum": "8321f818f53371491a7a80ef7e063ca6", - "id": "nmdc:8321f818f53371491a7a80ef7e063ca6", - "file_size_bytes": 3378599 - }, - { - "name": "Gp0127643_SMART GFF file", - "description": "SMART GFF file for Gp0127643", - "url": "https://data.microbiomedata.org/data/nmdc:mga0evc178/annotation/nmdc_mga0evc178_smart.gff", - "md5_checksum": "6f799842fe74ebff7942a026dbf9b1bf", - "id": "nmdc:6f799842fe74ebff7942a026dbf9b1bf", - "file_size_bytes": 9132037 - }, - { - "name": "Gp0127643_SuperFam GFF file", - "description": "SuperFam GFF file for Gp0127643", - "url": "https://data.microbiomedata.org/data/nmdc:mga0evc178/annotation/nmdc_mga0evc178_supfam.gff", - "md5_checksum": "8ee84a629a5899c25e0fbd0f07084530", - "id": "nmdc:8ee84a629a5899c25e0fbd0f07084530", - "file_size_bytes": 52720037 - }, - { - "name": "Gp0127643_Cath FunFam GFF file", - "description": "Cath FunFam GFF file for Gp0127643", - "url": "https://data.microbiomedata.org/data/nmdc:mga0evc178/annotation/nmdc_mga0evc178_cath_funfam.gff", - "md5_checksum": "6697cdb0b1dcf83e7ecb8fcefa0703ef", - "id": "nmdc:6697cdb0b1dcf83e7ecb8fcefa0703ef", - "file_size_bytes": 39643020 - }, - { - "name": "Gp0127643_KO_EC GFF file", - "description": "KO_EC GFF file for Gp0127643", - "url": "https://data.microbiomedata.org/data/nmdc:mga0evc178/annotation/nmdc_mga0evc178_ko_ec.gff", - "md5_checksum": "d2990b0bd86e50209dcada6fa6b09510", - "id": "nmdc:d2990b0bd86e50209dcada6fa6b09510", - "file_size_bytes": 25272687 - }, - { - "name": "gold:Gp0452679_metabat2 bin checkm quality assessment result", - "description": "metabat2 bin checkm quality assessment result for gold:Gp0452679", - "data_object_type": "CheckM Statistics", - "url": "https://data.microbiomedata.org/data/nmdc:mga0fsjy84/MAGs/nmdc_mga0fsjy84_checkm_qa.out", - "md5_checksum": "d41d8cd98f00b204e9800998ecf8427e", - "id": "nmdc:d41d8cd98f00b204e9800998ecf8427e", - "file_size_bytes": 0 - }, - { - "name": "Gp0127643_tooShort (< 3kb) filtered contigs fasta file by metaBat2", - "description": "tooShort (< 3kb) filtered contigs fasta file by metaBat2 for Gp0127643", - "url": "https://data.microbiomedata.org/data/nmdc:mga0evc178/MAGs/nmdc_mga0evc178_bins.tooShort.fa", - "md5_checksum": "ed8acb6d21b14da131350d9c52aa7041", - "id": "nmdc:ed8acb6d21b14da131350d9c52aa7041", - "file_size_bytes": 87917684 - }, - { - "name": "Gp0127643_unbinned fasta file from metabat2", - "description": "unbinned fasta file from metabat2 for Gp0127643", - "url": "https://data.microbiomedata.org/data/nmdc:mga0evc178/MAGs/nmdc_mga0evc178_bins.unbinned.fa", - "md5_checksum": "d81e3cc17fa762a717dcf324a0aa3d45", - "id": "nmdc:d81e3cc17fa762a717dcf324a0aa3d45", - "file_size_bytes": 22746526 - }, - { - "name": "Gp0127643_metabat2 bin checkm quality assessment result", - "description": "metabat2 bin checkm quality assessment result for Gp0127643", - "data_object_type": "CheckM Statistics", - "url": "https://data.microbiomedata.org/data/nmdc:mga0evc178/MAGs/nmdc_mga0evc178_checkm_qa.out", - "md5_checksum": "bd388cba93a77cde2f5791fa0f580865", - "id": "nmdc:bd388cba93a77cde2f5791fa0f580865", - "file_size_bytes": 785 - }, - { - "name": "Gp0127643_high-quality and medium-quality bins", - "description": "high-quality and medium-quality bins for Gp0127643", - "data_object_type": "Metagenome Bins", - "url": "https://data.microbiomedata.org/data/nmdc:mga0evc178/MAGs/nmdc_mga0evc178_hqmq_bin.zip", - "md5_checksum": "30695aca02693c6aba316db3e9f565a8", - "id": "nmdc:30695aca02693c6aba316db3e9f565a8", - "file_size_bytes": 182 - }, - { - "name": "Gp0127643_metabat2 bins", - "description": "metabat2 bins for Gp0127643", - "url": "https://data.microbiomedata.org/data/nmdc:mga0evc178/MAGs/nmdc_mga0evc178_metabat_bin.zip", - "md5_checksum": "79de6d81848956e1c06a811bc9bdab81", - "id": "nmdc:79de6d81848956e1c06a811bc9bdab81", - "file_size_bytes": 614113 - }, - { - "_id": { - "$oid": "649b003d1ae706d7b5b14ea0" - }, - "description": "Assembled AGP file for gold:Gp0127643", - "url": "https://data.microbiomedata.org/data/1781_100345/assembly/assembly.agp", - "file_size_bytes": 13820270, - "type": "nmdc:DataObject", - "id": "nmdc:b0ff6dcafcb9bed83c5290e6f974dbf0", - "name": "assembly.agp", - "data_object_type": "Assembly AGP" - }, - { - "_id": { - "$oid": "649b003d1ae706d7b5b14ea1" - }, - "description": "Metagenome Alignment BAM file for gold:Gp0127643", - "url": "https://data.microbiomedata.org/data/1781_100345/assembly/pairedMapped_sorted.bam", - "file_size_bytes": 2057080151, - "type": "nmdc:DataObject", - "id": "nmdc:372822daf5aee3e4a9b1f8e621dbd3f5", - "name": "pairedMapped_sorted.bam", - "data_object_type": "Assembly Coverage BAM" - }, - { - "_id": { - "$oid": "649b003d1ae706d7b5b14ea3" - }, - "description": "Assembled scaffold fasta for gold:Gp0127643", - "url": "https://data.microbiomedata.org/data/1781_100345/assembly/assembly_scaffolds.fna", - "file_size_bytes": 111307907, - "type": "nmdc:DataObject", - "id": "nmdc:7b778a5f68bdd7a7deeb51a98df7ac3d", - "name": "assembly_scaffolds.fna", - "data_object_type": "Assembly Scaffolds" - }, - { - "_id": { - "$oid": "649b003d1ae706d7b5b14ea4" - }, - "description": "Assembled contigs fasta for gold:Gp0127643", - "url": "https://data.microbiomedata.org/data/1781_100345/assembly/assembly_contigs.fna", - "file_size_bytes": 111937017, - "type": "nmdc:DataObject", - "id": "nmdc:e087926bf099d6b56eaa8ed38dc9587c", - "name": "assembly_contigs.fna", - "data_object_type": "Assembly Contigs" - }, - { - "_id": { - "$oid": "649b003d1ae706d7b5b14ea5" - }, - "description": "Metagenome Contig Coverage Stats for gold:Gp0127643", - "url": "https://data.microbiomedata.org/data/1781_100345/assembly/mapping_stats.txt", - "file_size_bytes": 15727329, - "type": "nmdc:DataObject", - "id": "nmdc:3e82935d61f88ddbd5c4d0be5f3a4974", - "name": "mapping_stats.txt", - "data_object_type": "Assembly Coverage Stats" - }, - { - "_id": { - "$oid": "649b003d1ae706d7b5b15b98" - }, - "id": "nmdc:f562a2cbd61dd314aa652b5a7962a453", - "name": "1781_100345.krona.html", - "description": "Gold:Gp0127643 KRONA plot HTML file", - "url": "https://data.microbiomedata.org/1781_100345/ReadbasedAnalysis/centrifuge/1781_100345.krona.html", - "file_size_bytes": 3385, - "data_object_type": "Centrifuge Krona Plot", - "type": "nmdc:DataObject" - }, - { - "_id": { - "$oid": "649b003d1ae706d7b5b15b9a" - }, - "id": "nmdc:60405949438243714571490c6faab9f5", - "name": "1781_100345.json", - "description": "Gold:Gp0127643 ReadbasedAnalysis result JSON file", - "url": "https://data.microbiomedata.org/1781_100345/ReadbasedAnalysis/1781_100345.json", - "file_size_bytes": 3385, - "type": "nmdc:DataObject" - }, - { - "_id": { - "$oid": "649b003f1ae706d7b5b16617" - }, - "id": "nmdc:843af81eb17f23a12d17e72a36922a7a", - "name": "bins.unbinned.fa", - "description": "unbinned fasta file from metabat2 for gold:Gp0127643", - "file_size_bytes": 22959128, - "url": "https://data.microbiomedata.org/data/1781_100345/img_MAGs/bins.unbinned.fa", - "type": "nmdc:DataObject" - }, - { - "_id": { - "$oid": "649b003f1ae706d7b5b1661b" - }, - "id": "nmdc:00606078cd171aa99d50f89abea30559", - "name": "gold:Gp0127643.bins.2.fa", - "description": "metabat2 binned contig file for gold:Gp0127643", - "file_size_bytes": 232512, - "url": "https://data.microbiomedata.org/data/1781_100345/img_MAGs/metabat-bins/bins.2.fa", - "type": "nmdc:DataObject", - "data_object_type": "Metagenome Bins" - }, - { - "_id": { - "$oid": "649b003f1ae706d7b5b1661c" - }, - "id": "nmdc:ea805619c536992228a7e6ad5e3ee57a", - "name": "checkm_qa.out", - "description": "metabat2 bin checkm quality assessment result for gold:Gp0127643", - "file_size_bytes": 930, - "url": "https://data.microbiomedata.org/data/1781_100345/img_MAGs/checkm_qa.out", - "type": "nmdc:DataObject", - "data_object_type": "CheckM Statistics" - }, - { - "_id": { - "$oid": "649b003f1ae706d7b5b1661d" - }, - "id": "nmdc:4c1506844b7b4f668c62e266dc7180da", - "name": "gold:Gp0127643.bins.1.fa", - "description": "metabat2 binned contig file for gold:Gp0127643", - "file_size_bytes": 1495841, - "url": "https://data.microbiomedata.org/data/1781_100345/img_MAGs/metabat-bins/bins.1.fa", - "type": "nmdc:DataObject", - "data_object_type": "Metagenome Bins" - }, - { - "_id": { - "$oid": "649b003f1ae706d7b5b1662a" - }, - "id": "nmdc:7c45113f19fcf47e76d2408c9e4aa2af", - "name": "bins.tooShort.fa", - "description": "tooShort (< 3kb) filtered contigs fasta file by metaBat2 for gold:Gp0127643", - "file_size_bytes": 85480222, - "url": "https://data.microbiomedata.org/data/1781_100345/img_MAGs/bins.tooShort.fa", - "type": "nmdc:DataObject" - }, - { - "_id": { - "$oid": "649b00401ae706d7b5b16d16" - }, - "description": "KO TSV File for gold:Gp0127643", - "url": "https://data.microbiomedata.org/1781_100345/img_annotation/Ga0482227_ko.tsv", - "md5_checksum": "b7b422e726f82668cd9c2ea9f0786f41", - "file_size_bytes": 3385, - "id": "nmdc:b7b422e726f82668cd9c2ea9f0786f41", - "name": "gold:Gp0127643_KO TSV File", - "data_object_type": "Annotation KEGG Orthology", - "type": "nmdc:DataObject" - }, - { - "_id": { - "$oid": "649b00401ae706d7b5b16d19" - }, - "description": "Functional annotation GFF file for gold:Gp0127643", - "url": "https://data.microbiomedata.org/1781_100345/img_annotation/Ga0482227_functional_annotation.gff", - "md5_checksum": "f8df0729f51da70739b75a2458e32020", - "file_size_bytes": 3385, - "id": "nmdc:f8df0729f51da70739b75a2458e32020", - "name": "gold:Gp0127643_Functional annotation GFF file", - "data_object_type": "Functional Annotation GFF", - "type": "nmdc:DataObject" - }, - { - "_id": { - "$oid": "649b00401ae706d7b5b16d1a" - }, - "description": "Protein FAA for gold:Gp0127643", - "url": "https://data.microbiomedata.org/1781_100345/img_annotation/Ga0482227_proteins.faa", - "md5_checksum": "7434bd60874fc6d05530ee0652a9e18f", - "file_size_bytes": 3385, - "id": "nmdc:7434bd60874fc6d05530ee0652a9e18f", - "name": "gold:Gp0127643_Protein FAA", - "data_object_type": "Annotation Amino Acid FASTA", - "type": "nmdc:DataObject" - }, - { - "_id": { - "$oid": "649b00401ae706d7b5b16d1b" - }, - "description": "Structural annotation GFF file for gold:Gp0127643", - "url": "https://data.microbiomedata.org/1781_100345/img_annotation/Ga0482227_structural_annotation.gff", - "md5_checksum": "d897fea88896a93843966962f6bbb7be", - "file_size_bytes": 3385, - "id": "nmdc:d897fea88896a93843966962f6bbb7be", - "name": "gold:Gp0127643_Structural annotation GFF file", - "data_object_type": "Structural Annotation GFF", - "type": "nmdc:DataObject" - }, - { - "_id": { - "$oid": "649b00401ae706d7b5b16d50" - }, - "description": "EC TSV File for gold:Gp0127643", - "url": "https://data.microbiomedata.org/1781_100345/img_annotation/Ga0482227_ec.tsv", - "md5_checksum": "0b7fc1ad662f267eaa604075f9968b7c", - "file_size_bytes": 3385, - "id": "nmdc:0b7fc1ad662f267eaa604075f9968b7c", - "name": "gold:Gp0127643_EC TSV File", - "data_object_type": "Annotation Enzyme Commission", - "type": "nmdc:DataObject" - } - ], - "dissolving_activity_set": [], - "functional_annotation_set": [], - "genome_feature_set": [], - "mags_activity_set": [ - { - "_id": { - "$oid": "649b0052ec087f6bbab3471e" - }, - "has_input": [ - "nmdc:a3a85f9f946ff34f28dfd4b5f8590f23", - "nmdc:fa61e18d49a2012f115d970f0a195986", - "nmdc:744277086ab01222a91233536d5e8976" - ], - "too_short_contig_num": 194066, - "part_of": [ - "nmdc:mga0evc178" - ], - "binned_contig_num": 470, - "git_url": "https://github.com/microbiomedata/metaG/releases/tag/0.1", - "has_output": [ - "nmdc:d41d8cd98f00b204e9800998ecf8427e", - "nmdc:ed8acb6d21b14da131350d9c52aa7041", - "nmdc:d81e3cc17fa762a717dcf324a0aa3d45", - "nmdc:bd388cba93a77cde2f5791fa0f580865", - "nmdc:30695aca02693c6aba316db3e9f565a8", - "nmdc:79de6d81848956e1c06a811bc9bdab81" - ], - "was_informed_by": "gold:Gp0127643", - "input_contig_num": 208967, - "id": "nmdc:30cd614596fd2a4b87220523f8a6ff30", - "execution_resource": "NERSC-Cori", - "name": "MAGs Analysis Activity for nmdc:mga0evc178", - "mags_list": [ - { - "number_of_contig": 470, - "completeness": 30.73, - "bin_name": "bins.1", - "gene_count": 2501, - "bin_quality": "LQ", - "gtdbtk_species": "", - "gtdbtk_order": "", - "num_16s": 0, - "gtdbtk_family": "", - "gtdbtk_domain": "", - "contamination": 1.71, - "gtdbtk_class": "", - "gtdbtk_phylum": "", - "num_5s": 0, - "num_23s": 0, - "gtdbtk_genus": "", - "num_t_rna": 22 - } - ], - "unbinned_contig_num": 14431, - "started_at_time": "2021-10-11T02:27:00Z", - "type": "nmdc:MAGsAnalysisActivity", - "ended_at_time": "2021-10-11T04:04:16+00:00" - } - ], - "material_sample_set": [], - "material_sampling_activity_set": [], - "metabolomics_analysis_activity_set": [], - "metagenome_annotation_activity_set": [ - { - "_id": { - "$oid": "649b005bbf2caae0415ef9bd" - }, - "has_input": [ - "nmdc:a3a85f9f946ff34f28dfd4b5f8590f23" - ], - "part_of": [ - "nmdc:mga0evc178" - ], - "git_url": "https://github.com/microbiomedata/metaG/releases/tag/0.1", - "has_output": [ - "nmdc:b2cd0d1a024094fd4e308c21d439ed5f", - "nmdc:6151bacd37618698c28b00151b4998f8", - "nmdc:744277086ab01222a91233536d5e8976", - "nmdc:9c8a359c69bcb1179241f9a3c727fa23", - "nmdc:027b72af172d078f88471d932cf6d473", - "nmdc:ff24990735aa002e828ff7204a456ad2", - "nmdc:e884ad501d1bb3bcf006f0999020ce0f", - "nmdc:8321f818f53371491a7a80ef7e063ca6", - "nmdc:6f799842fe74ebff7942a026dbf9b1bf", - "nmdc:8ee84a629a5899c25e0fbd0f07084530", - "nmdc:6697cdb0b1dcf83e7ecb8fcefa0703ef", - "nmdc:d2990b0bd86e50209dcada6fa6b09510" - ], - "was_informed_by": "gold:Gp0127643", - "id": "nmdc:30cd614596fd2a4b87220523f8a6ff30", - "execution_resource": "NERSC-Cori", - "name": "Annotation Activity for nmdc:mga0evc178", - "started_at_time": "2021-10-11T02:27:00Z", - "type": "nmdc:MetagenomeAnnotationActivity", - "ended_at_time": "2021-10-11T04:04:16+00:00" - } - ], - "metagenome_assembly_set": [ - { - "_id": { - "$oid": "649b005f2ca5ee4adb139fa9" - }, - "has_input": [ - "nmdc:2ef23543e3064ca73c3034713d87c026" - ], - "part_of": [ - "nmdc:mga0evc178" - ], - "ctg_logsum": 258957, - "scaf_logsum": 260132, - "gap_pct": 0.00166, - "git_url": "https://github.com/microbiomedata/metaG/releases/tag/0.1", - "has_output": [ - "nmdc:a3a85f9f946ff34f28dfd4b5f8590f23", - "nmdc:001fd34d98a73eee6be5a41004e67469", - "nmdc:9b45294f72cb55b2f039366d33183fa3", - "nmdc:b2ec4f5a3f02869684bdfaf065d75c54", - "nmdc:fa61e18d49a2012f115d970f0a195986" - ], - "asm_score": 3.329, - "was_informed_by": "gold:Gp0127643", - "ctg_powsum": 27868, - "scaf_max": 12873, - "id": "nmdc:30cd614596fd2a4b87220523f8a6ff30", - "scaf_powsum": 27998, - "execution_resource": "NERSC-Cori", - "contigs": 208967, - "name": "Assembly Activity for nmdc:mga0evc178", - "ctg_max": 12873, - "gc_std": 0.09438, - "contig_bp": 104567589, - "gc_avg": 0.63102, - "started_at_time": "2021-10-11T02:27:00Z", - "scaf_bp": 104569329, - "type": "nmdc:MetagenomeAssembly", - "scaffolds": 208793, - "ended_at_time": "2021-10-11T04:04:16+00:00", - "ctg_l50": 497, - "ctg_l90": 292, - "ctg_n50": 57164, - "ctg_n90": 172414, - "scaf_l50": 498, - "scaf_l90": 292, - "scaf_n50": 56935, - "scaf_n90": 172256 - } - ], - "metagenome_sequencing_activity_set": [], - "metaproteomics_analysis_activity_set": [], - "metatranscriptome_activity_set": [], - "nom_analysis_activity_set": [], - "omics_processing_set": [ - { - "_id": { - "$oid": "649b009773e8249959349b56" - }, - "id": "nmdc:omprc-11-dtsr6z90", - "name": "Riverbed sediment microbial communities from areas with no vegetation in Columbia River, Washington, USA - GW-RW N1_0_10", - "description": "Riverbed sediment samples were collected from an area with no vegetation in a hyporheic zone (subsurface groundwater - surface water mixing zone)", - "has_input": [ - "nmdc:bsm-11-g079t498" - ], - "has_output": [ - "jgi:574fde6c7ded5e3df1ee140c" - ], - "part_of": [ - "nmdc:sty-11-aygzgv51" - ], - "add_date": "2016-01-11", - "mod_date": "2021-06-15", - "ncbi_project_name": "Riverbed sediment microbial communities from areas with no vegetation in Columbia River, Washington, USA - GW-RW N1_0_10", - "omics_type": { - "has_raw_value": "Metagenome" - }, - "principal_investigator": { - "has_raw_value": "James Stegen" - }, - "processing_institution": "JGI", - "type": "nmdc:OmicsProcessing", - "gold_sequencing_project_identifiers": [ - "gold:Gp0127643" - ] - } - ], - "reaction_activity_set": [], - "read_qc_analysis_activity_set": [ - { - "_id": { - "$oid": "649b009d6bdd4fd20273c879" - }, - "has_input": [ - "nmdc:8b553dbdd47b90ed7f55d5747822f5d5" - ], - "part_of": [ - "nmdc:mga0evc178" - ], - "git_url": "https://github.com/microbiomedata/metaG/releases/tag/0.1", - "has_output": [ - "nmdc:2ef23543e3064ca73c3034713d87c026", - "nmdc:87b172ead58a37be8d199c0acfc96759" - ], - "was_informed_by": "gold:Gp0127643", - "input_read_count": 25305566, - "output_read_bases": 3510483777, - "id": "nmdc:30cd614596fd2a4b87220523f8a6ff30", - "execution_resource": "NERSC-Cori", - "input_read_bases": 3821140466, - "name": "Read QC Activity for nmdc:mga0evc178", - "output_read_count": 23508042, - "started_at_time": "2021-10-11T02:27:00Z", - "type": "nmdc:ReadQCAnalysisActivity", - "ended_at_time": "2021-10-11T04:04:16+00:00" - } - ], - "read_based_taxonomy_analysis_activity_set": [ - { - "_id": { - "$oid": "649b009bff710ae353f8cf36" - }, - "has_input": [ - "nmdc:2ef23543e3064ca73c3034713d87c026" - ], - "git_url": "https://github.com/microbiomedata/metaG/releases/tag/0.1", - "has_output": [ - "nmdc:e8f825653e5736e29b73de55bd11a270", - "nmdc:99bb1311b220e9a03da619fe5fb58f0f", - "nmdc:5c97bc15d4d5999f140664b3b2777c6d", - "nmdc:c9074b2e05765afd68463dc301b87995", - "nmdc:ed2c05d1702a9a811b8a98de748bc82a", - "nmdc:6465fe59472b111ead1f0414ccf39f62", - "nmdc:9855ca52bce074c34dcebfd154fa94ff", - "nmdc:ed8059f366d60112deb41a0c307bc6fc", - "nmdc:f98bae155bced880c058ecde7d539c18" - ], - "was_informed_by": "gold:Gp0127643", - "id": "nmdc:30cd614596fd2a4b87220523f8a6ff30", - "execution_resource": "NERSC-Cori", - "name": "ReadBased Analysis Activity for nmdc:mga0evc178", - "started_at_time": "2021-10-11T02:27:00Z", - "type": "nmdc:ReadbasedAnalysis", - "ended_at_time": "2021-10-11T04:04:16+00:00" - } - ], - "study_set": [], - "field_research_site_set": [], - "collecting_biosamples_from_site_set": [], - "date_created": null, - "etl_software_version": null, - "pooling_set": [] - }, - { - "functional_annotation_agg": [], - "library_preparation_set": [], - "processed_sample_set": [], - "extraction_set": [], - "activity_set": [], - "biosample_set": [], - "data_object_set": [ - { - "description": "Raw sequencer read data", - "data_object_type": "Metagenome Raw Reads", - "url": "https://data.microbiomedata.org/data/raw/10533.3.165334.AGTCTCA-GTGAGAC.fastq.gz", - "file_size_bytes": 939616475, - "type": "nmdc:DataObject", - "id": "jgi:574fe0a87ded5e3df1ee148e", - "name": "10533.3.165334.AGTCTCA-GTGAGAC.fastq.gz" - }, - { - "name": "Gp0127644_Filtered Reads", - "description": "Filtered Reads for Gp0127644", - "data_object_type": "Filtered Sequencing Reads", - "url": "https://data.microbiomedata.org/data/nmdc:mga0bpf635/qa/nmdc_mga0bpf635_filtered.fastq.gz", - "md5_checksum": "98da35678c59689ce738b2a6bc708692", - "id": "nmdc:98da35678c59689ce738b2a6bc708692", - "file_size_bytes": 694199131 - }, - { - "name": "Gp0127644_Filtered Stats", - "description": "Filtered Stats for Gp0127644", - "data_object_type": "QC Statistics", - "url": "https://data.microbiomedata.org/data/nmdc:mga0bpf635/qa/nmdc_mga0bpf635_filterStats.txt", - "md5_checksum": "ff08ea52254e0cc1011c56656505b27b", - "id": "nmdc:ff08ea52254e0cc1011c56656505b27b", - "file_size_bytes": 280 - }, - { - "name": "gold:Gp0452677_Gottcha2 TSV report", - "description": "Gottcha2 TSV report for gold:Gp0452677", - "url": "https://data.microbiomedata.org/data/nmdc:mga0fbpz25/ReadbasedAnalysis/nmdc_mga0fbpz25_gottcha2_report.tsv", - "md5_checksum": "dc2e21becda8d6b010a95897cf97ae90", - "id": "nmdc:dc2e21becda8d6b010a95897cf97ae90", - "file_size_bytes": 109 - }, - { - "name": "Gp0127644_Gottcha2 full TSV report", - "description": "Gottcha2 full TSV report for Gp0127644", - "url": "https://data.microbiomedata.org/data/nmdc:mga0bpf635/ReadbasedAnalysis/nmdc_mga0bpf635_gottcha2_report_full.tsv", - "md5_checksum": "0dd334c92557f3a8ac8c78b437c75eaf", - "id": "nmdc:0dd334c92557f3a8ac8c78b437c75eaf", - "file_size_bytes": 426075 - }, - { - "name": "gold:Gp0452677_Gottcha2 Krona HTML report", - "description": "Gottcha2 Krona HTML report for gold:Gp0452677", - "data_object_type": "GOTTCHA2 Krona Plot", - "url": "https://data.microbiomedata.org/data/nmdc:mga0fbpz25/ReadbasedAnalysis/nmdc_mga0fbpz25_gottcha2_krona.html", - "md5_checksum": "425873a08e598b0ca2987ff7b9b5da1f", - "id": "nmdc:425873a08e598b0ca2987ff7b9b5da1f", - "file_size_bytes": 226638 - }, - { - "name": "Gp0127644_Centrifuge classification TSV report", - "description": "Centrifuge classification TSV report for Gp0127644", - "data_object_type": "Centrifuge Taxonomic Classification", - "url": "https://data.microbiomedata.org/data/nmdc:mga0bpf635/ReadbasedAnalysis/nmdc_mga0bpf635_centrifuge_classification.tsv", - "md5_checksum": "b0f2449065b52935ddba8abd6ae6bc88", - "id": "nmdc:b0f2449065b52935ddba8abd6ae6bc88", - "file_size_bytes": 610862986 - }, - { - "name": "Gp0127644_Centrifuge TSV report", - "description": "Centrifuge TSV report for Gp0127644", - "data_object_type": "Centrifuge Classification Report", - "url": "https://data.microbiomedata.org/data/nmdc:mga0bpf635/ReadbasedAnalysis/nmdc_mga0bpf635_centrifuge_report.tsv", - "md5_checksum": "9baa708296f62334e099cf61711b5e16", - "id": "nmdc:9baa708296f62334e099cf61711b5e16", - "file_size_bytes": 243322 - }, - { - "name": "Gp0127644_Centrifuge Krona HTML report", - "description": "Centrifuge Krona HTML report for Gp0127644", - "data_object_type": "Centrifuge Krona Plot", - "url": "https://data.microbiomedata.org/data/nmdc:mga0bpf635/ReadbasedAnalysis/nmdc_mga0bpf635_centrifuge_krona.html", - "md5_checksum": "f2a43278b06876cae5d4e8cdef17cfe1", - "id": "nmdc:f2a43278b06876cae5d4e8cdef17cfe1", - "file_size_bytes": 2294995 - }, - { - "name": "Gp0127644_Kraken classification TSV report", - "description": "Kraken classification TSV report for Gp0127644", - "data_object_type": "Kraken2 Taxonomic Classification", - "url": "https://data.microbiomedata.org/data/nmdc:mga0bpf635/ReadbasedAnalysis/nmdc_mga0bpf635_kraken2_classification.tsv", - "md5_checksum": "f1a811dbc523f9a27dbc004b8a66f0cb", - "id": "nmdc:f1a811dbc523f9a27dbc004b8a66f0cb", - "file_size_bytes": 487178087 - }, - { - "name": "Gp0127644_Kraken2 TSV report", - "description": "Kraken2 TSV report for Gp0127644", - "data_object_type": "Kraken2 Classification Report", - "url": "https://data.microbiomedata.org/data/nmdc:mga0bpf635/ReadbasedAnalysis/nmdc_mga0bpf635_kraken2_report.tsv", - "md5_checksum": "8983fa1acb03f2905bbec3a6ccee2854", - "id": "nmdc:8983fa1acb03f2905bbec3a6ccee2854", - "file_size_bytes": 557688 - }, - { - "name": "Gp0127644_Kraken2 Krona HTML report", - "description": "Kraken2 Krona HTML report for Gp0127644", - "data_object_type": "Kraken2 Krona Plot", - "url": "https://data.microbiomedata.org/data/nmdc:mga0bpf635/ReadbasedAnalysis/nmdc_mga0bpf635_kraken2_krona.html", - "md5_checksum": "a07c6c5fb68d1a56e39d93e8745b96cb", - "id": "nmdc:a07c6c5fb68d1a56e39d93e8745b96cb", - "file_size_bytes": 3567307 - }, - { - "name": "Gp0127644_Assembled contigs fasta", - "description": "Assembled contigs fasta for Gp0127644", - "data_object_type": "Assembly Contigs", - "url": "https://data.microbiomedata.org/data/nmdc:mga0bpf635/assembly/nmdc_mga0bpf635_contigs.fna", - "md5_checksum": "16f77f4aaed29f3acc31646e1ce06b2d", - "id": "nmdc:16f77f4aaed29f3acc31646e1ce06b2d", - "file_size_bytes": 21881611 - }, - { - "name": "Gp0127644_Assembled scaffold fasta", - "description": "Assembled scaffold fasta for Gp0127644", - "data_object_type": "Assembly Scaffolds", - "url": "https://data.microbiomedata.org/data/nmdc:mga0bpf635/assembly/nmdc_mga0bpf635_scaffolds.fna", - "md5_checksum": "b6afa25cadc614083204383bbad06f48", - "id": "nmdc:b6afa25cadc614083204383bbad06f48", - "file_size_bytes": 21742982 - }, - { - "name": "Gp0127644_Metagenome Contig Coverage Stats", - "description": "Metagenome Contig Coverage Stats for Gp0127644", - "url": "https://data.microbiomedata.org/data/nmdc:mga0bpf635/assembly/nmdc_mga0bpf635_covstats.txt", - "md5_checksum": "87b1ea13d41499eeb5eb67932db01423", - "id": "nmdc:87b1ea13d41499eeb5eb67932db01423", - "file_size_bytes": 3612085 - }, - { - "name": "Gp0127644_Assembled AGP file", - "description": "Assembled AGP file for Gp0127644", - "data_object_type": "Assembly AGP", - "url": "https://data.microbiomedata.org/data/nmdc:mga0bpf635/assembly/nmdc_mga0bpf635_assembly.agp", - "md5_checksum": "72a38c353753abcb6d046385bf2950f6", - "id": "nmdc:72a38c353753abcb6d046385bf2950f6", - "file_size_bytes": 3350598 - }, - { - "name": "Gp0127644_Metagenome Alignment BAM file", - "description": "Metagenome Alignment BAM file for Gp0127644", - "data_object_type": "Assembly Coverage BAM", - "url": "https://data.microbiomedata.org/data/nmdc:mga0bpf635/assembly/nmdc_mga0bpf635_pairedMapped_sorted.bam", - "md5_checksum": "ecf498b9aa15f9d000845ffdfa7eb521", - "id": "nmdc:ecf498b9aa15f9d000845ffdfa7eb521", - "file_size_bytes": 746781339 - }, - { - "name": "Gp0127644_Protein FAA", - "description": "Protein FAA for Gp0127644", - "data_object_type": "Annotation Amino Acid FASTA", - "url": "https://data.microbiomedata.org/data/nmdc:mga0bpf635/annotation/nmdc_mga0bpf635_proteins.faa", - "md5_checksum": "9d960cad4d88795aba8bb1acbe415fc9", - "id": "nmdc:9d960cad4d88795aba8bb1acbe415fc9", - "file_size_bytes": 12848136 - }, - { - "name": "Gp0127644_Structural annotation GFF file", - "description": "Structural annotation GFF file for Gp0127644", - "data_object_type": "Structural Annotation GFF", - "url": "https://data.microbiomedata.org/data/nmdc:mga0bpf635/annotation/nmdc_mga0bpf635_structural_annotation.gff", - "md5_checksum": "cb5d98ee6e459ce1cc2d14295424eef1", - "id": "nmdc:cb5d98ee6e459ce1cc2d14295424eef1", - "file_size_bytes": 2488 - }, - { - "name": "Gp0127644_Functional annotation GFF file", - "description": "Functional annotation GFF file for Gp0127644", - "data_object_type": "Functional Annotation GFF", - "url": "https://data.microbiomedata.org/data/nmdc:mga0bpf635/annotation/nmdc_mga0bpf635_functional_annotation.gff", - "md5_checksum": "349838000a53b6655a5b12edf6351c50", - "id": "nmdc:349838000a53b6655a5b12edf6351c50", - "file_size_bytes": 15112193 - }, - { - "name": "Gp0127644_KO TSV file", - "description": "KO TSV file for Gp0127644", - "data_object_type": "Annotation KEGG Orthology", - "url": "https://data.microbiomedata.org/data/nmdc:mga0bpf635/annotation/nmdc_mga0bpf635_ko.tsv", - "md5_checksum": "7bb072409221978dbea8ff5cb0bdba1e", - "id": "nmdc:7bb072409221978dbea8ff5cb0bdba1e", - "file_size_bytes": 1814299 - }, - { - "name": "Gp0127644_EC TSV file", - "description": "EC TSV file for Gp0127644", - "data_object_type": "Annotation Enzyme Commission", - "url": "https://data.microbiomedata.org/data/nmdc:mga0bpf635/annotation/nmdc_mga0bpf635_ec.tsv", - "md5_checksum": "3d69ade973d1652bd6f061b2122ffe36", - "id": "nmdc:3d69ade973d1652bd6f061b2122ffe36", - "file_size_bytes": 1233948 - }, - { - "name": "Gp0127644_COG GFF file", - "description": "COG GFF file for Gp0127644", - "url": "https://data.microbiomedata.org/data/nmdc:mga0bpf635/annotation/nmdc_mga0bpf635_cog.gff", - "md5_checksum": "2a9b9a21fe5fb84219e0be5f153665be", - "id": "nmdc:2a9b9a21fe5fb84219e0be5f153665be", - "file_size_bytes": 9028987 - }, - { - "name": "Gp0127644_PFAM GFF file", - "description": "PFAM GFF file for Gp0127644", - "url": "https://data.microbiomedata.org/data/nmdc:mga0bpf635/annotation/nmdc_mga0bpf635_pfam.gff", - "md5_checksum": "83e64b9fc9406a72d18e8dd4742bac1a", - "id": "nmdc:83e64b9fc9406a72d18e8dd4742bac1a", - "file_size_bytes": 6574998 - }, - { - "name": "Gp0127644_TigrFam GFF file", - "description": "TigrFam GFF file for Gp0127644", - "url": "https://data.microbiomedata.org/data/nmdc:mga0bpf635/annotation/nmdc_mga0bpf635_tigrfam.gff", - "md5_checksum": "cdc4cc8629b7c61f1708f654aaaa9932", - "id": "nmdc:cdc4cc8629b7c61f1708f654aaaa9932", - "file_size_bytes": 783908 - }, - { - "name": "Gp0127644_SMART GFF file", - "description": "SMART GFF file for Gp0127644", - "url": "https://data.microbiomedata.org/data/nmdc:mga0bpf635/annotation/nmdc_mga0bpf635_smart.gff", - "md5_checksum": "f8d79375a2bf82f257e0015efeee6f26", - "id": "nmdc:f8d79375a2bf82f257e0015efeee6f26", - "file_size_bytes": 2030043 - }, - { - "name": "Gp0127644_SuperFam GFF file", - "description": "SuperFam GFF file for Gp0127644", - "url": "https://data.microbiomedata.org/data/nmdc:mga0bpf635/annotation/nmdc_mga0bpf635_supfam.gff", - "md5_checksum": "c9b4806132d19e740822b1a84bc4f07d", - "id": "nmdc:c9b4806132d19e740822b1a84bc4f07d", - "file_size_bytes": 11227652 - }, - { - "name": "Gp0127644_Cath FunFam GFF file", - "description": "Cath FunFam GFF file for Gp0127644", - "url": "https://data.microbiomedata.org/data/nmdc:mga0bpf635/annotation/nmdc_mga0bpf635_cath_funfam.gff", - "md5_checksum": "e304e10eb60423c23486e140594d1a7b", - "id": "nmdc:e304e10eb60423c23486e140594d1a7b", - "file_size_bytes": 8555821 - }, - { - "name": "Gp0127644_KO_EC GFF file", - "description": "KO_EC GFF file for Gp0127644", - "url": "https://data.microbiomedata.org/data/nmdc:mga0bpf635/annotation/nmdc_mga0bpf635_ko_ec.gff", - "md5_checksum": "9b78f0ac527ee7287ae532a896582948", - "id": "nmdc:9b78f0ac527ee7287ae532a896582948", - "file_size_bytes": 5791094 - }, - { - "name": "gold:Gp0452679_metabat2 bin checkm quality assessment result", - "description": "metabat2 bin checkm quality assessment result for gold:Gp0452679", - "data_object_type": "CheckM Statistics", - "url": "https://data.microbiomedata.org/data/nmdc:mga0fsjy84/MAGs/nmdc_mga0fsjy84_checkm_qa.out", - "md5_checksum": "d41d8cd98f00b204e9800998ecf8427e", - "id": "nmdc:d41d8cd98f00b204e9800998ecf8427e", - "file_size_bytes": 0 - }, - { - "name": "Gp0127644_tooShort (< 3kb) filtered contigs fasta file by metaBat2", - "description": "tooShort (< 3kb) filtered contigs fasta file by metaBat2 for Gp0127644", - "url": "https://data.microbiomedata.org/data/nmdc:mga0bpf635/MAGs/nmdc_mga0bpf635_bins.tooShort.fa", - "md5_checksum": "4857d71459f50147c8ae97ffce40caa5", - "id": "nmdc:4857d71459f50147c8ae97ffce40caa5", - "file_size_bytes": 18310651 - }, - { - "name": "Gp0127644_unbinned fasta file from metabat2", - "description": "unbinned fasta file from metabat2 for Gp0127644", - "url": "https://data.microbiomedata.org/data/nmdc:mga0bpf635/MAGs/nmdc_mga0bpf635_bins.unbinned.fa", - "md5_checksum": "65522bf77241109a74354d0e294597f9", - "id": "nmdc:65522bf77241109a74354d0e294597f9", - "file_size_bytes": 2858628 - }, - { - "name": "Gp0127644_metabat2 bin checkm quality assessment result", - "description": "metabat2 bin checkm quality assessment result for Gp0127644", - "data_object_type": "CheckM Statistics", - "url": "https://data.microbiomedata.org/data/nmdc:mga0bpf635/MAGs/nmdc_mga0bpf635_checkm_qa.out", - "md5_checksum": "30d6c9fb23abb0849991fad01e0393f1", - "id": "nmdc:30d6c9fb23abb0849991fad01e0393f1", - "file_size_bytes": 760 - }, - { - "name": "Gp0127644_high-quality and medium-quality bins", - "description": "high-quality and medium-quality bins for Gp0127644", - "data_object_type": "Metagenome Bins", - "url": "https://data.microbiomedata.org/data/nmdc:mga0bpf635/MAGs/nmdc_mga0bpf635_hqmq_bin.zip", - "md5_checksum": "a76c8c9034b877334a75e7c0b7c2c830", - "id": "nmdc:a76c8c9034b877334a75e7c0b7c2c830", - "file_size_bytes": 182 - }, - { - "name": "Gp0127644_metabat2 bins", - "description": "metabat2 bins for Gp0127644", - "url": "https://data.microbiomedata.org/data/nmdc:mga0bpf635/MAGs/nmdc_mga0bpf635_metabat_bin.zip", - "md5_checksum": "9d712c5924d6d0ee6d7305918e69302d", - "id": "nmdc:9d712c5924d6d0ee6d7305918e69302d", - "file_size_bytes": 218004 - }, - { - "_id": { - "$oid": "649b003d1ae706d7b5b14ea2" - }, - "description": "Metagenome Contig Coverage Stats for gold:Gp0127644", - "url": "https://data.microbiomedata.org/data/1781_100346/assembly/mapping_stats.txt", - "file_size_bytes": 3427545, - "type": "nmdc:DataObject", - "id": "nmdc:8a13cc4cdcd17eff35bdd65c4ffba887", - "name": "mapping_stats.txt", - "data_object_type": "Assembly Coverage Stats" - }, - { - "_id": { - "$oid": "649b003d1ae706d7b5b14ea7" - }, - "description": "Assembled contigs fasta for gold:Gp0127644", - "url": "https://data.microbiomedata.org/data/1781_100346/assembly/assembly_contigs.fna", - "file_size_bytes": 21697071, - "type": "nmdc:DataObject", - "id": "nmdc:f40e4315c5285ac27f850a924b9f0d19", - "name": "assembly_contigs.fna", - "data_object_type": "Assembly Contigs" - }, - { - "_id": { - "$oid": "649b003d1ae706d7b5b14ebe" - }, - "description": "Metagenome Alignment BAM file for gold:Gp0127644", - "url": "https://data.microbiomedata.org/data/1781_100346/assembly/pairedMapped_sorted.bam", - "file_size_bytes": 737012011, - "type": "nmdc:DataObject", - "id": "nmdc:0d039fa249c3d84d8f41ba5302cdbf44", - "name": "pairedMapped_sorted.bam", - "data_object_type": "Assembly Coverage BAM" - }, - { - "_id": { - "$oid": "649b003d1ae706d7b5b14ec1" - }, - "description": "Assembled AGP file for gold:Gp0127644", - "url": "https://data.microbiomedata.org/data/1781_100346/assembly/assembly.agp", - "file_size_bytes": 2981406, - "type": "nmdc:DataObject", - "id": "nmdc:0cd8988c1aa59aed46dc245a4fc85fae", - "name": "assembly.agp", - "data_object_type": "Assembly AGP" - }, - { - "_id": { - "$oid": "649b003d1ae706d7b5b14ec2" - }, - "description": "Assembled scaffold fasta for gold:Gp0127644", - "url": "https://data.microbiomedata.org/data/1781_100346/assembly/assembly_scaffolds.fna", - "file_size_bytes": 21558498, - "type": "nmdc:DataObject", - "id": "nmdc:72856cd0c04a3d82033e4eeb78036c79", - "name": "assembly_scaffolds.fna", - "data_object_type": "Assembly Scaffolds" - }, - { - "_id": { - "$oid": "649b003d1ae706d7b5b15ba0" - }, - "id": "nmdc:ad532c16f1d8772ef78f2b4977e13fbd", - "name": "1781_100346.krona.html", - "description": "Gold:Gp0127644 KRONA plot HTML file", - "url": "https://data.microbiomedata.org/1781_100346/ReadbasedAnalysis/centrifuge/1781_100346.krona.html", - "file_size_bytes": 3385, - "data_object_type": "Centrifuge Krona Plot", - "type": "nmdc:DataObject" - }, - { - "_id": { - "$oid": "649b003d1ae706d7b5b15ba4" - }, - "id": "nmdc:92cdb6d9a145d9ae65275474604499cc", - "name": "1781_100346.json", - "description": "Gold:Gp0127644 ReadbasedAnalysis result JSON file", - "url": "https://data.microbiomedata.org/1781_100346/ReadbasedAnalysis/1781_100346.json", - "file_size_bytes": 3385, - "type": "nmdc:DataObject" - }, - { - "_id": { - "$oid": "649b003f1ae706d7b5b1661e" - }, - "id": "nmdc:db639c1a9c06584736a3a8551fd080c4", - "name": "bins.unbinned.fa", - "description": "unbinned fasta file from metabat2 for gold:Gp0127644", - "file_size_bytes": 2937035, - "url": "https://data.microbiomedata.org/data/1781_100346/img_MAGs/bins.unbinned.fa", - "type": "nmdc:DataObject" - }, - { - "_id": { - "$oid": "649b003f1ae706d7b5b1661f" - }, - "id": "nmdc:fa322b3ff5e9a665ddc2a40878a19292", - "name": "bins.tooShort.fa", - "description": "tooShort (< 3kb) filtered contigs fasta file by metaBat2 for gold:Gp0127644", - "file_size_bytes": 17744229, - "url": "https://data.microbiomedata.org/data/1781_100346/img_MAGs/bins.tooShort.fa", - "type": "nmdc:DataObject" - }, - { - "_id": { - "$oid": "649b003f1ae706d7b5b16620" - }, - "id": "nmdc:5506404345e9af51ae1ef526737952eb", - "name": "gold:Gp0127644.bins.2.fa", - "description": "metabat2 binned contig file for gold:Gp0127644", - "file_size_bytes": 291225, - "url": "https://data.microbiomedata.org/data/1781_100346/img_MAGs/metabat-bins/bins.2.fa", - "type": "nmdc:DataObject", - "data_object_type": "Metagenome Bins" - }, - { - "_id": { - "$oid": "649b003f1ae706d7b5b16621" - }, - "id": "nmdc:0d447336e66ce46fe603146e03f77994", - "name": "checkm_qa.out", - "description": "metabat2 bin checkm quality assessment result for gold:Gp0127644", - "file_size_bytes": 918, - "url": "https://data.microbiomedata.org/data/1781_100346/img_MAGs/checkm_qa.out", - "type": "nmdc:DataObject", - "data_object_type": "CheckM Statistics" - }, - { - "_id": { - "$oid": "649b003f1ae706d7b5b16623" - }, - "id": "nmdc:e199232899faf46559a302f30bd9e0c8", - "name": "gold:Gp0127644.bins.1.fa", - "description": "metabat2 binned contig file for gold:Gp0127644", - "file_size_bytes": 320491, - "url": "https://data.microbiomedata.org/data/1781_100346/img_MAGs/metabat-bins/bins.1.fa", - "type": "nmdc:DataObject", - "data_object_type": "Metagenome Bins" - }, - { - "_id": { - "$oid": "649b00401ae706d7b5b16d1d" - }, - "description": "Functional annotation GFF file for gold:Gp0127644", - "url": "https://data.microbiomedata.org/1781_100346/img_annotation/Ga0482226_functional_annotation.gff", - "md5_checksum": "0626957517790befa95e8fefad58be0c", - "file_size_bytes": 3385, - "id": "nmdc:0626957517790befa95e8fefad58be0c", - "name": "gold:Gp0127644_Functional annotation GFF file", - "data_object_type": "Functional Annotation GFF", - "type": "nmdc:DataObject" - }, - { - "_id": { - "$oid": "649b00401ae706d7b5b16d1e" - }, - "description": "Protein FAA for gold:Gp0127644", - "url": "https://data.microbiomedata.org/1781_100346/img_annotation/Ga0482226_proteins.faa", - "md5_checksum": "2c7d55cbee1f35793da90275740d3651", - "file_size_bytes": 3385, - "id": "nmdc:2c7d55cbee1f35793da90275740d3651", - "name": "gold:Gp0127644_Protein FAA", - "data_object_type": "Annotation Amino Acid FASTA", - "type": "nmdc:DataObject" - }, - { - "_id": { - "$oid": "649b00401ae706d7b5b16d1f" - }, - "description": "Structural annotation GFF file for gold:Gp0127644", - "url": "https://data.microbiomedata.org/1781_100346/img_annotation/Ga0482226_structural_annotation.gff", - "md5_checksum": "0973e6d47848f6677ced2a8d463670fa", - "file_size_bytes": 3385, - "id": "nmdc:0973e6d47848f6677ced2a8d463670fa", - "name": "gold:Gp0127644_Structural annotation GFF file", - "data_object_type": "Structural Annotation GFF", - "type": "nmdc:DataObject" - }, - { - "_id": { - "$oid": "649b00401ae706d7b5b16d20" - }, - "description": "EC TSV File for gold:Gp0127644", - "url": "https://data.microbiomedata.org/1781_100346/img_annotation/Ga0482226_ec.tsv", - "md5_checksum": "03c32c8ae757623520f6211ff641c40a", - "file_size_bytes": 3385, - "id": "nmdc:03c32c8ae757623520f6211ff641c40a", - "name": "gold:Gp0127644_EC TSV File", - "data_object_type": "Annotation Enzyme Commission", - "type": "nmdc:DataObject" - }, - { - "_id": { - "$oid": "649b00401ae706d7b5b16d21" - }, - "description": "KO TSV File for gold:Gp0127644", - "url": "https://data.microbiomedata.org/1781_100346/img_annotation/Ga0482226_ko.tsv", - "md5_checksum": "3eced892c4712a2b13e805a978ec0819", - "file_size_bytes": 3385, - "id": "nmdc:3eced892c4712a2b13e805a978ec0819", - "name": "gold:Gp0127644_KO TSV File", - "data_object_type": "Annotation KEGG Orthology", - "type": "nmdc:DataObject" - } - ], - "dissolving_activity_set": [], - "functional_annotation_set": [], - "genome_feature_set": [], - "mags_activity_set": [ - { - "_id": { - "$oid": "649b0052ec087f6bbab34714" - }, - "has_input": [ - "nmdc:16f77f4aaed29f3acc31646e1ce06b2d", - "nmdc:ecf498b9aa15f9d000845ffdfa7eb521", - "nmdc:349838000a53b6655a5b12edf6351c50" - ], - "too_short_contig_num": 44192, - "part_of": [ - "nmdc:mga0bpf635" - ], - "binned_contig_num": 157, - "git_url": "https://github.com/microbiomedata/metaG/releases/tag/0.1", - "has_output": [ - "nmdc:d41d8cd98f00b204e9800998ecf8427e", - "nmdc:4857d71459f50147c8ae97ffce40caa5", - "nmdc:65522bf77241109a74354d0e294597f9", - "nmdc:30d6c9fb23abb0849991fad01e0393f1", - "nmdc:a76c8c9034b877334a75e7c0b7c2c830", - "nmdc:9d712c5924d6d0ee6d7305918e69302d" - ], - "was_informed_by": "gold:Gp0127644", - "input_contig_num": 46135, - "id": "nmdc:9990ae6d30bab3988f258ae8224acd89", - "execution_resource": "NERSC-Cori", - "name": "MAGs Analysis Activity for nmdc:mga0bpf635", - "mags_list": [ - { - "number_of_contig": 157, - "completeness": 39.0, - "bin_name": "bins.1", - "gene_count": 891, - "bin_quality": "LQ", - "gtdbtk_species": "", - "gtdbtk_order": "", - "num_16s": 1, - "gtdbtk_family": "", - "gtdbtk_domain": "", - "contamination": 0.0, - "gtdbtk_class": "", - "gtdbtk_phylum": "", - "num_5s": 0, - "num_23s": 0, - "gtdbtk_genus": "", - "num_t_rna": 19 - } - ], - "unbinned_contig_num": 1786, - "started_at_time": "2021-10-11T02:26:47Z", - "type": "nmdc:MAGsAnalysisActivity", - "ended_at_time": "2021-10-11T02:55:00+00:00" - } - ], - "material_sample_set": [], - "material_sampling_activity_set": [], - "metabolomics_analysis_activity_set": [], - "metagenome_annotation_activity_set": [ - { - "_id": { - "$oid": "649b005bbf2caae0415ef9b4" - }, - "has_input": [ - "nmdc:16f77f4aaed29f3acc31646e1ce06b2d" - ], - "part_of": [ - "nmdc:mga0bpf635" - ], - "git_url": "https://github.com/microbiomedata/metaG/releases/tag/0.1", - "has_output": [ - "nmdc:9d960cad4d88795aba8bb1acbe415fc9", - "nmdc:cb5d98ee6e459ce1cc2d14295424eef1", - "nmdc:349838000a53b6655a5b12edf6351c50", - "nmdc:7bb072409221978dbea8ff5cb0bdba1e", - "nmdc:3d69ade973d1652bd6f061b2122ffe36", - "nmdc:2a9b9a21fe5fb84219e0be5f153665be", - "nmdc:83e64b9fc9406a72d18e8dd4742bac1a", - "nmdc:cdc4cc8629b7c61f1708f654aaaa9932", - "nmdc:f8d79375a2bf82f257e0015efeee6f26", - "nmdc:c9b4806132d19e740822b1a84bc4f07d", - "nmdc:e304e10eb60423c23486e140594d1a7b", - "nmdc:9b78f0ac527ee7287ae532a896582948" - ], - "was_informed_by": "gold:Gp0127644", - "id": "nmdc:9990ae6d30bab3988f258ae8224acd89", - "execution_resource": "NERSC-Cori", - "name": "Annotation Activity for nmdc:mga0bpf635", - "started_at_time": "2021-10-11T02:26:47Z", - "type": "nmdc:MetagenomeAnnotationActivity", - "ended_at_time": "2021-10-11T02:55:00+00:00" - } - ], - "metagenome_assembly_set": [ - { - "_id": { - "$oid": "649b005f2ca5ee4adb139fa1" - }, - "has_input": [ - "nmdc:98da35678c59689ce738b2a6bc708692" - ], - "part_of": [ - "nmdc:mga0bpf635" - ], - "ctg_logsum": 37962, - "scaf_logsum": 38062, - "gap_pct": 0.00069, - "git_url": "https://github.com/microbiomedata/metaG/releases/tag/0.1", - "has_output": [ - "nmdc:16f77f4aaed29f3acc31646e1ce06b2d", - "nmdc:b6afa25cadc614083204383bbad06f48", - "nmdc:87b1ea13d41499eeb5eb67932db01423", - "nmdc:72a38c353753abcb6d046385bf2950f6", - "nmdc:ecf498b9aa15f9d000845ffdfa7eb521" - ], - "asm_score": 3.712, - "was_informed_by": "gold:Gp0127644", - "ctg_powsum": 4162.045, - "scaf_max": 11252, - "id": "nmdc:9990ae6d30bab3988f258ae8224acd89", - "scaf_powsum": 4172.955, - "execution_resource": "NERSC-Cori", - "contigs": 46135, - "name": "Assembly Activity for nmdc:mga0bpf635", - "ctg_max": 11252, - "gc_std": 0.09328, - "contig_bp": 20152503, - "gc_avg": 0.6086, - "started_at_time": "2021-10-11T02:26:47Z", - "scaf_bp": 20152643, - "type": "nmdc:MetagenomeAssembly", - "scaffolds": 46121, - "ended_at_time": "2021-10-11T02:55:00+00:00", - "ctg_l50": 394, - "ctg_l90": 285, - "ctg_n50": 14034, - "ctg_n90": 39639, - "scaf_l50": 395, - "scaf_l90": 285, - "scaf_n50": 13959, - "scaf_n90": 39626 - } - ], - "metagenome_sequencing_activity_set": [], - "metaproteomics_analysis_activity_set": [], - "metatranscriptome_activity_set": [], - "nom_analysis_activity_set": [], - "omics_processing_set": [ - { - "_id": { - "$oid": "649b009773e8249959349b57" - }, - "id": "nmdc:omprc-11-hwadfm25", - "name": "Riverbed sediment microbial communities from areas with vegetation nearby in Columbia River, Washington, USA - GW-RW S3_30_40", - "description": "Riverbed sediment samples were collected from an area with a lot of surrounding vegetation in a hyporheic zone (subsurface groundwater - surface water mixing zone)", - "has_input": [ - "nmdc:bsm-11-n80sx618" - ], - "has_output": [ - "jgi:574fe0a87ded5e3df1ee148e" - ], - "part_of": [ - "nmdc:sty-11-aygzgv51" - ], - "add_date": "2016-01-11", - "mod_date": "2021-06-15", - "ncbi_project_name": "Riverbed sediment microbial communities from areas with vegetation nearby in Columbia River, Washington, USA - GW-RW S3_30_40", - "omics_type": { - "has_raw_value": "Metagenome" - }, - "principal_investigator": { - "has_raw_value": "James Stegen" - }, - "processing_institution": "JGI", - "type": "nmdc:OmicsProcessing", - "gold_sequencing_project_identifiers": [ - "gold:Gp0127644" - ] - } - ], - "reaction_activity_set": [], - "read_qc_analysis_activity_set": [ - { - "_id": { - "$oid": "649b009d6bdd4fd20273c86b" - }, - "has_input": [ - "nmdc:a1d8fff4b02719c4d0f9c442cf052f69" - ], - "part_of": [ - "nmdc:mga0bpf635" - ], - "git_url": "https://github.com/microbiomedata/metaG/releases/tag/0.1", - "has_output": [ - "nmdc:98da35678c59689ce738b2a6bc708692", - "nmdc:ff08ea52254e0cc1011c56656505b27b" - ], - "was_informed_by": "gold:Gp0127644", - "input_read_count": 11431762, - "output_read_bases": 1245433047, - "id": "nmdc:9990ae6d30bab3988f258ae8224acd89", - "execution_resource": "NERSC-Cori", - "input_read_bases": 1726196062, - "name": "Read QC Activity for nmdc:mga0bpf635", - "output_read_count": 8322164, - "started_at_time": "2021-10-11T02:26:47Z", - "type": "nmdc:ReadQCAnalysisActivity", - "ended_at_time": "2021-10-11T02:55:00+00:00" - } - ], - "read_based_taxonomy_analysis_activity_set": [ - { - "_id": { - "$oid": "649b009bff710ae353f8cf38" - }, - "has_input": [ - "nmdc:98da35678c59689ce738b2a6bc708692" - ], - "git_url": "https://github.com/microbiomedata/metaG/releases/tag/0.1", - "has_output": [ - "nmdc:dc2e21becda8d6b010a95897cf97ae90", - "nmdc:0dd334c92557f3a8ac8c78b437c75eaf", - "nmdc:425873a08e598b0ca2987ff7b9b5da1f", - "nmdc:b0f2449065b52935ddba8abd6ae6bc88", - "nmdc:9baa708296f62334e099cf61711b5e16", - "nmdc:f2a43278b06876cae5d4e8cdef17cfe1", - "nmdc:f1a811dbc523f9a27dbc004b8a66f0cb", - "nmdc:8983fa1acb03f2905bbec3a6ccee2854", - "nmdc:a07c6c5fb68d1a56e39d93e8745b96cb" - ], - "was_informed_by": "gold:Gp0127644", - "id": "nmdc:9990ae6d30bab3988f258ae8224acd89", - "execution_resource": "NERSC-Cori", - "name": "ReadBased Analysis Activity for nmdc:mga0bpf635", - "started_at_time": "2021-10-11T02:26:47Z", - "type": "nmdc:ReadbasedAnalysis", - "ended_at_time": "2021-10-11T02:55:00+00:00" - } - ], - "study_set": [], - "field_research_site_set": [], - "collecting_biosamples_from_site_set": [], - "date_created": null, - "etl_software_version": null, - "pooling_set": [] - }, - { - "functional_annotation_agg": [], - "library_preparation_set": [], - "processed_sample_set": [], - "extraction_set": [], - "activity_set": [], - "biosample_set": [], - "data_object_set": [ - { - "description": "Raw sequencer read data", - "file_size_bytes": 1941323184, - "type": "nmdc:DataObject", - "id": "jgi:574fde667ded5e3df1ee1407", - "name": "10533.1.165310.TGTGCGT-AACGCAC.fastq.gz" - }, - { - "name": "Gp0127639_Filtered Reads", - "description": "Filtered Reads for Gp0127639", - "data_object_type": "Filtered Sequencing Reads", - "url": "https://data.microbiomedata.org/data/nmdc:mga09wpw60/qa/nmdc_mga09wpw60_filtered.fastq.gz", - "md5_checksum": "833077b40372c6daa20beaed04ed0ae1", - "id": "nmdc:833077b40372c6daa20beaed04ed0ae1", - "file_size_bytes": 1585232805 - }, - { - "name": "Gp0127639_Filtered Stats", - "description": "Filtered Stats for Gp0127639", - "data_object_type": "QC Statistics", - "url": "https://data.microbiomedata.org/data/nmdc:mga09wpw60/qa/nmdc_mga09wpw60_filterStats.txt", - "md5_checksum": "b68178eebde030fad0850797adbb2624", - "id": "nmdc:b68178eebde030fad0850797adbb2624", - "file_size_bytes": 289 - }, - { - "name": "Gp0127639_Gottcha2 TSV report", - "description": "Gottcha2 TSV report for Gp0127639", - "url": "https://data.microbiomedata.org/data/nmdc:mga09wpw60/ReadbasedAnalysis/nmdc_mga09wpw60_gottcha2_report.tsv", - "md5_checksum": "514172bb91ef3b125ae2d001b47bff0b", - "id": "nmdc:514172bb91ef3b125ae2d001b47bff0b", - "file_size_bytes": 648 - }, - { - "name": "Gp0127639_Gottcha2 full TSV report", - "description": "Gottcha2 full TSV report for Gp0127639", - "url": "https://data.microbiomedata.org/data/nmdc:mga09wpw60/ReadbasedAnalysis/nmdc_mga09wpw60_gottcha2_report_full.tsv", - "md5_checksum": "82f072d1931154fbc722531d3d0dc41c", - "id": "nmdc:82f072d1931154fbc722531d3d0dc41c", - "file_size_bytes": 588644 - }, - { - "name": "Gp0127639_Gottcha2 Krona HTML report", - "description": "Gottcha2 Krona HTML report for Gp0127639", - "data_object_type": "GOTTCHA2 Krona Plot", - "url": "https://data.microbiomedata.org/data/nmdc:mga09wpw60/ReadbasedAnalysis/nmdc_mga09wpw60_gottcha2_krona.html", - "md5_checksum": "62a817ebcbfaf2c8feb1abedc35a736f", - "id": "nmdc:62a817ebcbfaf2c8feb1abedc35a736f", - "file_size_bytes": 228175 - }, - { - "name": "Gp0127639_Centrifuge classification TSV report", - "description": "Centrifuge classification TSV report for Gp0127639", - "data_object_type": "Centrifuge Taxonomic Classification", - "url": "https://data.microbiomedata.org/data/nmdc:mga09wpw60/ReadbasedAnalysis/nmdc_mga09wpw60_centrifuge_classification.tsv", - "md5_checksum": "81281fef2c0778516a84b3a672cc0230", - "id": "nmdc:81281fef2c0778516a84b3a672cc0230", - "file_size_bytes": 1468498728 - }, - { - "name": "Gp0127639_Centrifuge TSV report", - "description": "Centrifuge TSV report for Gp0127639", - "data_object_type": "Centrifuge Classification Report", - "url": "https://data.microbiomedata.org/data/nmdc:mga09wpw60/ReadbasedAnalysis/nmdc_mga09wpw60_centrifuge_report.tsv", - "md5_checksum": "86ae054ba9def1126579c8f76db8a07a", - "id": "nmdc:86ae054ba9def1126579c8f76db8a07a", - "file_size_bytes": 251338 - }, - { - "name": "Gp0127639_Centrifuge Krona HTML report", - "description": "Centrifuge Krona HTML report for Gp0127639", - "data_object_type": "Centrifuge Krona Plot", - "url": "https://data.microbiomedata.org/data/nmdc:mga09wpw60/ReadbasedAnalysis/nmdc_mga09wpw60_centrifuge_krona.html", - "md5_checksum": "9db20a88fa3d02eb00f64d1671ef8521", - "id": "nmdc:9db20a88fa3d02eb00f64d1671ef8521", - "file_size_bytes": 2322720 - }, - { - "name": "Gp0127639_Kraken classification TSV report", - "description": "Kraken classification TSV report for Gp0127639", - "data_object_type": "Kraken2 Taxonomic Classification", - "url": "https://data.microbiomedata.org/data/nmdc:mga09wpw60/ReadbasedAnalysis/nmdc_mga09wpw60_kraken2_classification.tsv", - "md5_checksum": "848fc10ed4365047cb139a4b40303808", - "id": "nmdc:848fc10ed4365047cb139a4b40303808", - "file_size_bytes": 1168015909 - }, - { - "name": "Gp0127639_Kraken2 TSV report", - "description": "Kraken2 TSV report for Gp0127639", - "data_object_type": "Kraken2 Classification Report", - "url": "https://data.microbiomedata.org/data/nmdc:mga09wpw60/ReadbasedAnalysis/nmdc_mga09wpw60_kraken2_report.tsv", - "md5_checksum": "94e422e0bae86c608fba1c3815e08e92", - "id": "nmdc:94e422e0bae86c608fba1c3815e08e92", - "file_size_bytes": 616202 - }, - { - "name": "Gp0127639_Kraken2 Krona HTML report", - "description": "Kraken2 Krona HTML report for Gp0127639", - "data_object_type": "Kraken2 Krona Plot", - "url": "https://data.microbiomedata.org/data/nmdc:mga09wpw60/ReadbasedAnalysis/nmdc_mga09wpw60_kraken2_krona.html", - "md5_checksum": "c6eb85143a2489921c53f8184d536129", - "id": "nmdc:c6eb85143a2489921c53f8184d536129", - "file_size_bytes": 3863456 - }, - { - "name": "Gp0127639_Assembled contigs fasta", - "description": "Assembled contigs fasta for Gp0127639", - "data_object_type": "Assembly Contigs", - "url": "https://data.microbiomedata.org/data/nmdc:mga09wpw60/assembly/nmdc_mga09wpw60_contigs.fna", - "md5_checksum": "2b73310c6eef1ece5bb01f235b22fdbd", - "id": "nmdc:2b73310c6eef1ece5bb01f235b22fdbd", - "file_size_bytes": 120497476 - }, - { - "name": "Gp0127639_Assembled scaffold fasta", - "description": "Assembled scaffold fasta for Gp0127639", - "data_object_type": "Assembly Scaffolds", - "url": "https://data.microbiomedata.org/data/nmdc:mga09wpw60/assembly/nmdc_mga09wpw60_scaffolds.fna", - "md5_checksum": "8f14c016997dd96f70f547df930717be", - "id": "nmdc:8f14c016997dd96f70f547df930717be", - "file_size_bytes": 119857107 - }, - { - "name": "Gp0127639_Metagenome Contig Coverage Stats", - "description": "Metagenome Contig Coverage Stats for Gp0127639", - "url": "https://data.microbiomedata.org/data/nmdc:mga09wpw60/assembly/nmdc_mga09wpw60_covstats.txt", - "md5_checksum": "5966e5e32744a14549b19b4c92a606a5", - "id": "nmdc:5966e5e32744a14549b19b4c92a606a5", - "file_size_bytes": 16872665 - }, - { - "name": "Gp0127639_Assembled AGP file", - "description": "Assembled AGP file for Gp0127639", - "data_object_type": "Assembly AGP", - "url": "https://data.microbiomedata.org/data/nmdc:mga09wpw60/assembly/nmdc_mga09wpw60_assembly.agp", - "md5_checksum": "1fcd489b3ae86a76bf297cc19b50392d", - "id": "nmdc:1fcd489b3ae86a76bf297cc19b50392d", - "file_size_bytes": 15768901 - }, - { - "name": "Gp0127639_Metagenome Alignment BAM file", - "description": "Metagenome Alignment BAM file for Gp0127639", - "data_object_type": "Assembly Coverage BAM", - "url": "https://data.microbiomedata.org/data/nmdc:mga09wpw60/assembly/nmdc_mga09wpw60_pairedMapped_sorted.bam", - "md5_checksum": "5b90d13539ce840980db101fa7c1df96", - "id": "nmdc:5b90d13539ce840980db101fa7c1df96", - "file_size_bytes": 1779135536 - }, - { - "name": "Gp0127639_Protein FAA", - "description": "Protein FAA for Gp0127639", - "data_object_type": "Annotation Amino Acid FASTA", - "url": "https://data.microbiomedata.org/data/nmdc:mga09wpw60/annotation/nmdc_mga09wpw60_proteins.faa", - "md5_checksum": "6c09d55cfb8872b30eb1832394f80beb", - "id": "nmdc:6c09d55cfb8872b30eb1832394f80beb", - "file_size_bytes": 67573912 - }, - { - "name": "Gp0127639_Structural annotation GFF file", - "description": "Structural annotation GFF file for Gp0127639", - "data_object_type": "Structural Annotation GFF", - "url": "https://data.microbiomedata.org/data/nmdc:mga09wpw60/annotation/nmdc_mga09wpw60_structural_annotation.gff", - "md5_checksum": "2e3cc72d21590667259f6356882ce63b", - "id": "nmdc:2e3cc72d21590667259f6356882ce63b", - "file_size_bytes": 2526 - }, - { - "name": "Gp0127639_Functional annotation GFF file", - "description": "Functional annotation GFF file for Gp0127639", - "data_object_type": "Functional Annotation GFF", - "url": "https://data.microbiomedata.org/data/nmdc:mga09wpw60/annotation/nmdc_mga09wpw60_functional_annotation.gff", - "md5_checksum": "2dee5eaa50c8eeb6e3bc8471501d9964", - "id": "nmdc:2dee5eaa50c8eeb6e3bc8471501d9964", - "file_size_bytes": 75196016 - }, - { - "name": "Gp0127639_KO TSV file", - "description": "KO TSV file for Gp0127639", - "data_object_type": "Annotation KEGG Orthology", - "url": "https://data.microbiomedata.org/data/nmdc:mga09wpw60/annotation/nmdc_mga09wpw60_ko.tsv", - "md5_checksum": "7ec4cfdd88352d703a2bb64b99bd56c5", - "id": "nmdc:7ec4cfdd88352d703a2bb64b99bd56c5", - "file_size_bytes": 8707597 - }, - { - "name": "Gp0127639_EC TSV file", - "description": "EC TSV file for Gp0127639", - "data_object_type": "Annotation Enzyme Commission", - "url": "https://data.microbiomedata.org/data/nmdc:mga09wpw60/annotation/nmdc_mga09wpw60_ec.tsv", - "md5_checksum": "16bedd944e5e836924c28b006026c348", - "id": "nmdc:16bedd944e5e836924c28b006026c348", - "file_size_bytes": 5769544 - }, - { - "name": "Gp0127639_COG GFF file", - "description": "COG GFF file for Gp0127639", - "url": "https://data.microbiomedata.org/data/nmdc:mga09wpw60/annotation/nmdc_mga09wpw60_cog.gff", - "md5_checksum": "8764070f565c50998968e0739420f5cc", - "id": "nmdc:8764070f565c50998968e0739420f5cc", - "file_size_bytes": 45648468 - }, - { - "name": "Gp0127639_PFAM GFF file", - "description": "PFAM GFF file for Gp0127639", - "url": "https://data.microbiomedata.org/data/nmdc:mga09wpw60/annotation/nmdc_mga09wpw60_pfam.gff", - "md5_checksum": "9e6accc90d61ea572819dcdb591e41a7", - "id": "nmdc:9e6accc90d61ea572819dcdb591e41a7", - "file_size_bytes": 34995151 - }, - { - "name": "Gp0127639_TigrFam GFF file", - "description": "TigrFam GFF file for Gp0127639", - "url": "https://data.microbiomedata.org/data/nmdc:mga09wpw60/annotation/nmdc_mga09wpw60_tigrfam.gff", - "md5_checksum": "32b9518ee41cadb157f3c0f9ec91476c", - "id": "nmdc:32b9518ee41cadb157f3c0f9ec91476c", - "file_size_bytes": 4060116 - }, - { - "name": "Gp0127639_SMART GFF file", - "description": "SMART GFF file for Gp0127639", - "url": "https://data.microbiomedata.org/data/nmdc:mga09wpw60/annotation/nmdc_mga09wpw60_smart.gff", - "md5_checksum": "432d591bd525ae429e837431d44954f7", - "id": "nmdc:432d591bd525ae429e837431d44954f7", - "file_size_bytes": 10056742 - }, - { - "name": "Gp0127639_SuperFam GFF file", - "description": "SuperFam GFF file for Gp0127639", - "url": "https://data.microbiomedata.org/data/nmdc:mga09wpw60/annotation/nmdc_mga09wpw60_supfam.gff", - "md5_checksum": "3120d5d5d27d142f898f70a8cc1b076e", - "id": "nmdc:3120d5d5d27d142f898f70a8cc1b076e", - "file_size_bytes": 56435804 - }, - { - "name": "Gp0127639_Cath FunFam GFF file", - "description": "Cath FunFam GFF file for Gp0127639", - "url": "https://data.microbiomedata.org/data/nmdc:mga09wpw60/annotation/nmdc_mga09wpw60_cath_funfam.gff", - "md5_checksum": "d37ff61fdae942030a1b07e855cf1abd", - "id": "nmdc:d37ff61fdae942030a1b07e855cf1abd", - "file_size_bytes": 43456195 - }, - { - "name": "Gp0127639_KO_EC GFF file", - "description": "KO_EC GFF file for Gp0127639", - "url": "https://data.microbiomedata.org/data/nmdc:mga09wpw60/annotation/nmdc_mga09wpw60_ko_ec.gff", - "md5_checksum": "56995366ba4186639a8ff4fd4defbd5e", - "id": "nmdc:56995366ba4186639a8ff4fd4defbd5e", - "file_size_bytes": 27657123 - }, - { - "name": "gold:Gp0452679_metabat2 bin checkm quality assessment result", - "description": "metabat2 bin checkm quality assessment result for gold:Gp0452679", - "data_object_type": "CheckM Statistics", - "url": "https://data.microbiomedata.org/data/nmdc:mga0fsjy84/MAGs/nmdc_mga0fsjy84_checkm_qa.out", - "md5_checksum": "d41d8cd98f00b204e9800998ecf8427e", - "id": "nmdc:d41d8cd98f00b204e9800998ecf8427e", - "file_size_bytes": 0 - }, - { - "name": "Gp0127639_tooShort (< 3kb) filtered contigs fasta file by metaBat2", - "description": "tooShort (< 3kb) filtered contigs fasta file by metaBat2 for Gp0127639", - "url": "https://data.microbiomedata.org/data/nmdc:mga09wpw60/MAGs/nmdc_mga09wpw60_bins.tooShort.fa", - "md5_checksum": "820dbad1b0ddd3c728e77aceee09ea28", - "id": "nmdc:820dbad1b0ddd3c728e77aceee09ea28", - "file_size_bytes": 90173016 - }, - { - "name": "Gp0127639_unbinned fasta file from metabat2", - "description": "unbinned fasta file from metabat2 for Gp0127639", - "url": "https://data.microbiomedata.org/data/nmdc:mga09wpw60/MAGs/nmdc_mga09wpw60_bins.unbinned.fa", - "md5_checksum": "24fbfc69ded61dffff95ba2f8475239c", - "id": "nmdc:24fbfc69ded61dffff95ba2f8475239c", - "file_size_bytes": 27021291 - }, - { - "name": "Gp0127639_metabat2 bin checkm quality assessment result", - "description": "metabat2 bin checkm quality assessment result for Gp0127639", - "data_object_type": "CheckM Statistics", - "url": "https://data.microbiomedata.org/data/nmdc:mga09wpw60/MAGs/nmdc_mga09wpw60_checkm_qa.out", - "md5_checksum": "1837710887027f94b0f25208edb35cbe", - "id": "nmdc:1837710887027f94b0f25208edb35cbe", - "file_size_bytes": 1570 - }, - { - "name": "Gp0127639_high-quality and medium-quality bins", - "description": "high-quality and medium-quality bins for Gp0127639", - "data_object_type": "Metagenome Bins", - "url": "https://data.microbiomedata.org/data/nmdc:mga09wpw60/MAGs/nmdc_mga09wpw60_hqmq_bin.zip", - "md5_checksum": "7072cfd6665082a95b2c09a4bc88760c", - "id": "nmdc:7072cfd6665082a95b2c09a4bc88760c", - "file_size_bytes": 182 - }, - { - "name": "Gp0127639_metabat2 bins", - "description": "metabat2 bins for Gp0127639", - "url": "https://data.microbiomedata.org/data/nmdc:mga09wpw60/MAGs/nmdc_mga09wpw60_metabat_bin.zip", - "md5_checksum": "b0db190d9d1093ef87a5efb8a600e9ef", - "id": "nmdc:b0db190d9d1093ef87a5efb8a600e9ef", - "file_size_bytes": 1000457 - }, - { - "_id": { - "$oid": "649b003d1ae706d7b5b14e8b" - }, - "description": "Assembled scaffold fasta for gold:Gp0127639", - "url": "https://data.microbiomedata.org/data/1781_100341/assembly/assembly_scaffolds.fna", - "file_size_bytes": 119007591, - "type": "nmdc:DataObject", - "id": "nmdc:3200c62a99e8ddd0fd6403d6dfe5fc5d", - "name": "assembly_scaffolds.fna", - "data_object_type": "Assembly Scaffolds" - }, - { - "_id": { - "$oid": "649b003d1ae706d7b5b14e8c" - }, - "description": "Metagenome Contig Coverage Stats for gold:Gp0127639", - "url": "https://data.microbiomedata.org/data/1781_100341/assembly/mapping_stats.txt", - "file_size_bytes": 16022429, - "type": "nmdc:DataObject", - "id": "nmdc:b2bff56e405eaffed2b0a3d7d6000b37", - "name": "mapping_stats.txt", - "data_object_type": "Assembly Coverage Stats" - }, - { - "_id": { - "$oid": "649b003d1ae706d7b5b14e8f" - }, - "description": "Assembled contigs fasta for gold:Gp0127639", - "url": "https://data.microbiomedata.org/data/1781_100341/assembly/assembly_contigs.fna", - "file_size_bytes": 119647240, - "type": "nmdc:DataObject", - "id": "nmdc:1d1610f39b4543fe7a0ecde2b1d8d710", - "name": "assembly_contigs.fna", - "data_object_type": "Assembly Contigs" - }, - { - "_id": { - "$oid": "649b003d1ae706d7b5b14e90" - }, - "description": "Assembled AGP file for gold:Gp0127639", - "url": "https://data.microbiomedata.org/data/1781_100341/assembly/assembly.agp", - "file_size_bytes": 14066973, - "type": "nmdc:DataObject", - "id": "nmdc:71da65a514fef7d1e2b3cf2a8dbcba74", - "name": "assembly.agp", - "data_object_type": "Assembly AGP" - }, - { - "_id": { - "$oid": "649b003d1ae706d7b5b14e91" - }, - "description": "Metagenome Alignment BAM file for gold:Gp0127639", - "url": "https://data.microbiomedata.org/data/1781_100341/assembly/pairedMapped_sorted.bam", - "file_size_bytes": 1755614129, - "type": "nmdc:DataObject", - "id": "nmdc:164f413fa91ee1433e3f441649315c61", - "name": "pairedMapped_sorted.bam", - "data_object_type": "Assembly Coverage BAM" - }, - { - "_id": { - "$oid": "649b003d1ae706d7b5b15b74" - }, - "id": "nmdc:1bf82e8b1c00260947b645449b0bedcb", - "name": "1781_100341.krona.html", - "description": "Gold:Gp0127639 KRONA plot HTML file", - "url": "https://data.microbiomedata.org/1781_100341/ReadbasedAnalysis/centrifuge/1781_100341.krona.html", - "file_size_bytes": 3385, - "data_object_type": "Centrifuge Krona Plot", - "type": "nmdc:DataObject" - }, - { - "_id": { - "$oid": "649b003d1ae706d7b5b15b7a" - }, - "id": "nmdc:813f4c0b656c2812a7db73fc0df92f23", - "name": "1781_100341.json", - "description": "Gold:Gp0127639 ReadbasedAnalysis result JSON file", - "url": "https://data.microbiomedata.org/1781_100341/ReadbasedAnalysis/1781_100341.json", - "file_size_bytes": 3385, - "type": "nmdc:DataObject" - }, - { - "_id": { - "$oid": "649b003f1ae706d7b5b16608" - }, - "id": "nmdc:bd6adf1661bff8cdbad6416f39136291", - "name": "checkm_qa.out", - "description": "metabat2 bin checkm quality assessment result for gold:Gp0127639", - "file_size_bytes": 1092, - "url": "https://data.microbiomedata.org/data/1781_100341/img_MAGs/checkm_qa.out", - "type": "nmdc:DataObject", - "data_object_type": "CheckM Statistics" - }, - { - "_id": { - "$oid": "649b003f1ae706d7b5b1660a" - }, - "id": "nmdc:03f2b08b396c5b273845ff6fb1a7a2d2", - "name": "bins.tooShort.fa", - "description": "tooShort (< 3kb) filtered contigs fasta file by metaBat2 for gold:Gp0127639", - "file_size_bytes": 87721724, - "url": "https://data.microbiomedata.org/data/1781_100341/img_MAGs/bins.tooShort.fa", - "type": "nmdc:DataObject" - }, - { - "_id": { - "$oid": "649b003f1ae706d7b5b1660b" - }, - "id": "nmdc:a57183db98a4cd0611a8587010c37d52", - "name": "bins.unbinned.fa", - "description": "unbinned fasta file from metabat2 for gold:Gp0127639", - "file_size_bytes": 29133879, - "url": "https://data.microbiomedata.org/data/1781_100341/img_MAGs/bins.unbinned.fa", - "type": "nmdc:DataObject" - }, - { - "_id": { - "$oid": "649b003f1ae706d7b5b16612" - }, - "id": "nmdc:168e30f70b1513401a606fa75bdabf50", - "name": "gold:Gp0127639.bins.2.fa", - "description": "metabat2 binned contig file for gold:Gp0127639", - "file_size_bytes": 235784, - "url": "https://data.microbiomedata.org/data/1781_100341/img_MAGs/metabat-bins/bins.2.fa", - "type": "nmdc:DataObject", - "data_object_type": "Metagenome Bins" - }, - { - "_id": { - "$oid": "649b003f1ae706d7b5b16627" - }, - "id": "nmdc:390577e22b09a5c74de14b9e3a9a6b19", - "name": "gold:Gp0127639.bins.1.fa", - "description": "metabat2 binned contig file for gold:Gp0127639", - "file_size_bytes": 472014, - "url": "https://data.microbiomedata.org/data/1781_100341/img_MAGs/metabat-bins/bins.1.fa", - "type": "nmdc:DataObject", - "data_object_type": "Metagenome Bins" - }, - { - "_id": { - "$oid": "649b003f1ae706d7b5b1662c" - }, - "id": "nmdc:2cc7b6f61240516e9bf15f84809635c8", - "name": "gold:Gp0127639.bins.3.fa", - "description": "metabat2 binned contig file for gold:Gp0127639", - "file_size_bytes": 282214, - "url": "https://data.microbiomedata.org/data/1781_100341/img_MAGs/metabat-bins/bins.3.fa", - "type": "nmdc:DataObject", - "data_object_type": "Metagenome Bins" - }, - { - "_id": { - "$oid": "649b00401ae706d7b5b16d0d" - }, - "description": "EC TSV File for gold:Gp0127639", - "url": "https://data.microbiomedata.org/1781_100341/img_annotation/Ga0482231_ec.tsv", - "md5_checksum": "7e710e983d3a5ffbddc618c5e252e06b", - "file_size_bytes": 3385, - "id": "nmdc:7e710e983d3a5ffbddc618c5e252e06b", - "name": "gold:Gp0127639_EC TSV File", - "data_object_type": "Annotation Enzyme Commission", - "type": "nmdc:DataObject" - }, - { - "_id": { - "$oid": "649b00401ae706d7b5b16d0e" - }, - "description": "KO TSV File for gold:Gp0127639", - "url": "https://data.microbiomedata.org/1781_100341/img_annotation/Ga0482231_ko.tsv", - "md5_checksum": "ccbc768cb20e4c1b25d7627b611eb8dc", - "file_size_bytes": 3385, - "id": "nmdc:ccbc768cb20e4c1b25d7627b611eb8dc", - "name": "gold:Gp0127639_KO TSV File", - "data_object_type": "Annotation KEGG Orthology", - "type": "nmdc:DataObject" - }, - { - "_id": { - "$oid": "649b00401ae706d7b5b16d10" - }, - "description": "Protein FAA for gold:Gp0127639", - "url": "https://data.microbiomedata.org/1781_100341/img_annotation/Ga0482231_proteins.faa", - "md5_checksum": "fccc8283a46f12babeed0b2c7cc4eebd", - "file_size_bytes": 3385, - "id": "nmdc:fccc8283a46f12babeed0b2c7cc4eebd", - "name": "gold:Gp0127639_Protein FAA", - "data_object_type": "Annotation Amino Acid FASTA", - "type": "nmdc:DataObject" - }, - { - "_id": { - "$oid": "649b00401ae706d7b5b16d12" - }, - "description": "Functional annotation GFF file for gold:Gp0127639", - "url": "https://data.microbiomedata.org/1781_100341/img_annotation/Ga0482231_functional_annotation.gff", - "md5_checksum": "ee416a49155f7c07bcb776962708fb04", - "file_size_bytes": 3385, - "id": "nmdc:ee416a49155f7c07bcb776962708fb04", - "name": "gold:Gp0127639_Functional annotation GFF file", - "data_object_type": "Functional Annotation GFF", - "type": "nmdc:DataObject" - }, - { - "_id": { - "$oid": "649b00401ae706d7b5b16d13" - }, - "description": "Structural annotation GFF file for gold:Gp0127639", - "url": "https://data.microbiomedata.org/1781_100341/img_annotation/Ga0482231_structural_annotation.gff", - "md5_checksum": "d0452fefd4ad4f4cd10c974294bf9058", - "file_size_bytes": 3385, - "id": "nmdc:d0452fefd4ad4f4cd10c974294bf9058", - "name": "gold:Gp0127639_Structural annotation GFF file", - "data_object_type": "Structural Annotation GFF", - "type": "nmdc:DataObject" - } - ], - "dissolving_activity_set": [], - "functional_annotation_set": [], - "genome_feature_set": [], - "mags_activity_set": [ - { - "_id": { - "$oid": "649b0052ec087f6bbab3471b" - }, - "has_input": [ - "nmdc:2b73310c6eef1ece5bb01f235b22fdbd", - "nmdc:5b90d13539ce840980db101fa7c1df96", - "nmdc:2dee5eaa50c8eeb6e3bc8471501d9964" - ], - "too_short_contig_num": 194918, - "part_of": [ - "nmdc:mga09wpw60" - ], - "binned_contig_num": 732, - "git_url": "https://github.com/microbiomedata/metaG/releases/tag/0.1", - "has_output": [ - "nmdc:d41d8cd98f00b204e9800998ecf8427e", - "nmdc:820dbad1b0ddd3c728e77aceee09ea28", - "nmdc:24fbfc69ded61dffff95ba2f8475239c", - "nmdc:1837710887027f94b0f25208edb35cbe", - "nmdc:7072cfd6665082a95b2c09a4bc88760c", - "nmdc:b0db190d9d1093ef87a5efb8a600e9ef" - ], - "was_informed_by": "gold:Gp0127639", - "input_contig_num": 212559, - "id": "nmdc:b8fea91a695311b43660f2e7044ad15c", - "execution_resource": "NERSC-Cori", - "name": "MAGs Analysis Activity for nmdc:mga09wpw60", - "mags_list": [ - { - "number_of_contig": 85, - "completeness": 18.1, - "bin_name": "bins.1", - "gene_count": 437, - "bin_quality": "LQ", - "gtdbtk_species": "", - "gtdbtk_order": "", - "num_16s": 0, - "gtdbtk_family": "", - "gtdbtk_domain": "", - "contamination": 0.0, - "gtdbtk_class": "", - "gtdbtk_phylum": "", - "num_5s": 0, - "num_23s": 0, - "gtdbtk_genus": "", - "num_t_rna": 8 - }, - { - "number_of_contig": 59, - "completeness": 15.92, - "bin_name": "bins.2", - "gene_count": 343, - "bin_quality": "LQ", - "gtdbtk_species": "", - "gtdbtk_order": "", - "num_16s": 0, - "gtdbtk_family": "", - "gtdbtk_domain": "", - "contamination": 0.84, - "gtdbtk_class": "", - "gtdbtk_phylum": "", - "num_5s": 0, - "num_23s": 0, - "gtdbtk_genus": "", - "num_t_rna": 8 - }, - { - "number_of_contig": 258, - "completeness": 21.26, - "bin_name": "bins.3", - "gene_count": 1440, - "bin_quality": "LQ", - "gtdbtk_species": "", - "gtdbtk_order": "", - "num_16s": 0, - "gtdbtk_family": "", - "gtdbtk_domain": "", - "contamination": 0.0, - "gtdbtk_class": "", - "gtdbtk_phylum": "", - "num_5s": 0, - "num_23s": 0, - "gtdbtk_genus": "", - "num_t_rna": 19 - }, - { - "number_of_contig": 101, - "completeness": 29.13, - "bin_name": "bins.4", - "gene_count": 560, - "bin_quality": "LQ", - "gtdbtk_species": "", - "gtdbtk_order": "", - "num_16s": 0, - "gtdbtk_family": "", - "gtdbtk_domain": "", - "contamination": 0.97, - "gtdbtk_class": "", - "gtdbtk_phylum": "", - "num_5s": 0, - "num_23s": 0, - "gtdbtk_genus": "", - "num_t_rna": 6 - }, - { - "number_of_contig": 116, - "completeness": 1.53, - "bin_name": "bins.5", - "gene_count": 763, - "bin_quality": "LQ", - "gtdbtk_species": "", - "gtdbtk_order": "", - "num_16s": 0, - "gtdbtk_family": "", - "gtdbtk_domain": "", - "contamination": 0.0, - "gtdbtk_class": "", - "gtdbtk_phylum": "", - "num_5s": 0, - "num_23s": 0, - "gtdbtk_genus": "", - "num_t_rna": 10 - }, - { - "number_of_contig": 113, - "completeness": 9.72, - "bin_name": "bins.6", - "gene_count": 531, - "bin_quality": "LQ", - "gtdbtk_species": "", - "gtdbtk_order": "", - "num_16s": 0, - "gtdbtk_family": "", - "gtdbtk_domain": "", - "contamination": 0.0, - "gtdbtk_class": "", - "gtdbtk_phylum": "", - "num_5s": 0, - "num_23s": 0, - "gtdbtk_genus": "", - "num_t_rna": 4 - } - ], - "unbinned_contig_num": 16909, - "started_at_time": "2021-10-11T02:27:08Z", - "type": "nmdc:MAGsAnalysisActivity", - "ended_at_time": "2021-10-11T03:27:12+00:00" - } - ], - "material_sample_set": [], - "material_sampling_activity_set": [], - "metabolomics_analysis_activity_set": [], - "metagenome_annotation_activity_set": [ - { - "_id": { - "$oid": "649b005bbf2caae0415ef9b7" - }, - "has_input": [ - "nmdc:2b73310c6eef1ece5bb01f235b22fdbd" - ], - "part_of": [ - "nmdc:mga09wpw60" - ], - "git_url": "https://github.com/microbiomedata/metaG/releases/tag/0.1", - "has_output": [ - "nmdc:6c09d55cfb8872b30eb1832394f80beb", - "nmdc:2e3cc72d21590667259f6356882ce63b", - "nmdc:2dee5eaa50c8eeb6e3bc8471501d9964", - "nmdc:7ec4cfdd88352d703a2bb64b99bd56c5", - "nmdc:16bedd944e5e836924c28b006026c348", - "nmdc:8764070f565c50998968e0739420f5cc", - "nmdc:9e6accc90d61ea572819dcdb591e41a7", - "nmdc:32b9518ee41cadb157f3c0f9ec91476c", - "nmdc:432d591bd525ae429e837431d44954f7", - "nmdc:3120d5d5d27d142f898f70a8cc1b076e", - "nmdc:d37ff61fdae942030a1b07e855cf1abd", - "nmdc:56995366ba4186639a8ff4fd4defbd5e" - ], - "was_informed_by": "gold:Gp0127639", - "id": "nmdc:b8fea91a695311b43660f2e7044ad15c", - "execution_resource": "NERSC-Cori", - "name": "Annotation Activity for nmdc:mga09wpw60", - "started_at_time": "2021-10-11T02:27:08Z", - "type": "nmdc:MetagenomeAnnotationActivity", - "ended_at_time": "2021-10-11T03:27:12+00:00" - } - ], - "metagenome_assembly_set": [ - { - "_id": { - "$oid": "649b005f2ca5ee4adb139fa3" - }, - "has_input": [ - "nmdc:833077b40372c6daa20beaed04ed0ae1" - ], - "part_of": [ - "nmdc:mga09wpw60" - ], - "ctg_logsum": 317684, - "scaf_logsum": 318786, - "gap_pct": 0.0017, - "git_url": "https://github.com/microbiomedata/metaG/releases/tag/0.1", - "has_output": [ - "nmdc:2b73310c6eef1ece5bb01f235b22fdbd", - "nmdc:8f14c016997dd96f70f547df930717be", - "nmdc:5966e5e32744a14549b19b4c92a606a5", - "nmdc:1fcd489b3ae86a76bf297cc19b50392d", - "nmdc:5b90d13539ce840980db101fa7c1df96" - ], - "asm_score": 3.397, - "was_informed_by": "gold:Gp0127639", - "ctg_powsum": 34356, - "scaf_max": 19860, - "id": "nmdc:b8fea91a695311b43660f2e7044ad15c", - "scaf_powsum": 34485, - "execution_resource": "NERSC-Cori", - "contigs": 212560, - "name": "Assembly Activity for nmdc:mga09wpw60", - "ctg_max": 19860, - "gc_std": 0.09375, - "contig_bp": 112053293, - "gc_avg": 0.63186, - "started_at_time": "2021-10-11T02:27:08Z", - "scaf_bp": 112055193, - "type": "nmdc:MetagenomeAssembly", - "scaffolds": 212379, - "ended_at_time": "2021-10-11T03:27:12+00:00", - "ctg_l50": 538, - "ctg_l90": 298, - "ctg_n50": 55584, - "ctg_n90": 173977, - "scaf_l50": 539, - "scaf_l90": 298, - "scaf_n50": 55395, - "scaf_n90": 173826 - } - ], - "metagenome_sequencing_activity_set": [], - "metaproteomics_analysis_activity_set": [], - "metatranscriptome_activity_set": [], - "nom_analysis_activity_set": [], - "omics_processing_set": [ - { - "_id": { - "$oid": "649b009773e8249959349b58" - }, - "id": "nmdc:omprc-11-vnnn4722", - "name": "Riverbed sediment microbial communities from areas with no vegetation in Columbia River, Washington, USA - GW-RW N1_30_40", - "description": "Riverbed sediment samples were collected from an area with no vegetation in a hyporheic zone (subsurface groundwater - surface water mixing zone)", - "has_input": [ - "nmdc:bsm-11-tzp60785" - ], - "has_output": [ - "jgi:574fde667ded5e3df1ee1407" - ], - "part_of": [ - "nmdc:sty-11-aygzgv51" - ], - "add_date": "2016-01-11", - "mod_date": "2021-06-15", - "ncbi_project_name": "Riverbed sediment microbial communities from areas with no vegetation in Columbia River, Washington, USA - GW-RW N1_30_40", - "omics_type": { - "has_raw_value": "Metagenome" - }, - "principal_investigator": { - "has_raw_value": "James Stegen" - }, - "processing_institution": "JGI", - "type": "nmdc:OmicsProcessing", - "gold_sequencing_project_identifiers": [ - "gold:Gp0127639" - ] - } - ], - "reaction_activity_set": [], - "read_qc_analysis_activity_set": [ - { - "_id": { - "$oid": "649b009d6bdd4fd20273c871" - }, - "has_input": [ - "nmdc:ae9087ed8e1ead2407bca45a47725633" - ], - "part_of": [ - "nmdc:mga09wpw60" - ], - "git_url": "https://github.com/microbiomedata/metaG/releases/tag/0.1", - "has_output": [ - "nmdc:833077b40372c6daa20beaed04ed0ae1", - "nmdc:b68178eebde030fad0850797adbb2624" - ], - "was_informed_by": "gold:Gp0127639", - "input_read_count": 23535784, - "output_read_bases": 2989527376, - "id": "nmdc:b8fea91a695311b43660f2e7044ad15c", - "execution_resource": "NERSC-Cori", - "input_read_bases": 3553903384, - "name": "Read QC Activity for nmdc:mga09wpw60", - "output_read_count": 20011156, - "started_at_time": "2021-10-11T02:27:08Z", - "type": "nmdc:ReadQCAnalysisActivity", - "ended_at_time": "2021-10-11T03:27:12+00:00" - } - ], - "read_based_taxonomy_analysis_activity_set": [ - { - "_id": { - "$oid": "649b009bff710ae353f8cf46" - }, - "has_input": [ - "nmdc:833077b40372c6daa20beaed04ed0ae1" - ], - "git_url": "https://github.com/microbiomedata/metaG/releases/tag/0.1", - "has_output": [ - "nmdc:514172bb91ef3b125ae2d001b47bff0b", - "nmdc:82f072d1931154fbc722531d3d0dc41c", - "nmdc:62a817ebcbfaf2c8feb1abedc35a736f", - "nmdc:81281fef2c0778516a84b3a672cc0230", - "nmdc:86ae054ba9def1126579c8f76db8a07a", - "nmdc:9db20a88fa3d02eb00f64d1671ef8521", - "nmdc:848fc10ed4365047cb139a4b40303808", - "nmdc:94e422e0bae86c608fba1c3815e08e92", - "nmdc:c6eb85143a2489921c53f8184d536129" - ], - "was_informed_by": "gold:Gp0127639", - "id": "nmdc:b8fea91a695311b43660f2e7044ad15c", - "execution_resource": "NERSC-Cori", - "name": "ReadBased Analysis Activity for nmdc:mga09wpw60", - "started_at_time": "2021-10-11T02:27:08Z", - "type": "nmdc:ReadbasedAnalysis", - "ended_at_time": "2021-10-11T03:27:12+00:00" - } - ], - "study_set": [], - "field_research_site_set": [], - "collecting_biosamples_from_site_set": [], - "date_created": null, - "etl_software_version": null, - "pooling_set": [] - }, - { - "functional_annotation_agg": [], - "library_preparation_set": [], - "processed_sample_set": [], - "extraction_set": [], - "activity_set": [], - "biosample_set": [], - "data_object_set": [ - { - "description": "Raw sequencer read data", - "file_size_bytes": 2500707412, - "type": "nmdc:DataObject", - "id": "jgi:574fe0a67ded5e3df1ee148d", - "name": "10533.3.165334.CGCTTAA-GTTAAGC.fastq.gz" - }, - { - "name": "Gp0127642_Filtered Reads", - "description": "Filtered Reads for Gp0127642", - "data_object_type": "Filtered Sequencing Reads", - "url": "https://data.microbiomedata.org/data/nmdc:mga0cvxk30/qa/nmdc_mga0cvxk30_filtered.fastq.gz", - "md5_checksum": "603166d1e0da357d356a2029215d76ea", - "id": "nmdc:603166d1e0da357d356a2029215d76ea", - "file_size_bytes": 2304174057 - }, - { - "name": "Gp0127642_Filtered Stats", - "description": "Filtered Stats for Gp0127642", - "data_object_type": "QC Statistics", - "url": "https://data.microbiomedata.org/data/nmdc:mga0cvxk30/qa/nmdc_mga0cvxk30_filterStats.txt", - "md5_checksum": "639d9630c859c9b2f6f7a2eff1e1a863", - "id": "nmdc:639d9630c859c9b2f6f7a2eff1e1a863", - "file_size_bytes": 284 - }, - { - "name": "Gp0127642_Gottcha2 TSV report", - "description": "Gottcha2 TSV report for Gp0127642", - "url": "https://data.microbiomedata.org/data/nmdc:mga0cvxk30/ReadbasedAnalysis/nmdc_mga0cvxk30_gottcha2_report.tsv", - "md5_checksum": "bc7f6a9435c3a9aaca7ce9efe9d16e41", - "id": "nmdc:bc7f6a9435c3a9aaca7ce9efe9d16e41", - "file_size_bytes": 5303 - }, - { - "name": "Gp0127642_Gottcha2 full TSV report", - "description": "Gottcha2 full TSV report for Gp0127642", - "url": "https://data.microbiomedata.org/data/nmdc:mga0cvxk30/ReadbasedAnalysis/nmdc_mga0cvxk30_gottcha2_report_full.tsv", - "md5_checksum": "0a079e34648ce23b0837dff31e2be5df", - "id": "nmdc:0a079e34648ce23b0837dff31e2be5df", - "file_size_bytes": 948120 - }, - { - "name": "Gp0127642_Gottcha2 Krona HTML report", - "description": "Gottcha2 Krona HTML report for Gp0127642", - "data_object_type": "GOTTCHA2 Krona Plot", - "url": "https://data.microbiomedata.org/data/nmdc:mga0cvxk30/ReadbasedAnalysis/nmdc_mga0cvxk30_gottcha2_krona.html", - "md5_checksum": "f19bf1723f0f0e9f2158b137d2618b08", - "id": "nmdc:f19bf1723f0f0e9f2158b137d2618b08", - "file_size_bytes": 241990 - }, - { - "name": "Gp0127642_Centrifuge classification TSV report", - "description": "Centrifuge classification TSV report for Gp0127642", - "data_object_type": "Centrifuge Taxonomic Classification", - "url": "https://data.microbiomedata.org/data/nmdc:mga0cvxk30/ReadbasedAnalysis/nmdc_mga0cvxk30_centrifuge_classification.tsv", - "md5_checksum": "81fc62d01a53a7ab5037829a158f0b64", - "id": "nmdc:81fc62d01a53a7ab5037829a158f0b64", - "file_size_bytes": 2023464022 - }, - { - "name": "Gp0127642_Centrifuge TSV report", - "description": "Centrifuge TSV report for Gp0127642", - "data_object_type": "Centrifuge Classification Report", - "url": "https://data.microbiomedata.org/data/nmdc:mga0cvxk30/ReadbasedAnalysis/nmdc_mga0cvxk30_centrifuge_report.tsv", - "md5_checksum": "05cc05eefdcb0d7bac19031619244a4b", - "id": "nmdc:05cc05eefdcb0d7bac19031619244a4b", - "file_size_bytes": 257700 - }, - { - "name": "Gp0127642_Centrifuge Krona HTML report", - "description": "Centrifuge Krona HTML report for Gp0127642", - "data_object_type": "Centrifuge Krona Plot", - "url": "https://data.microbiomedata.org/data/nmdc:mga0cvxk30/ReadbasedAnalysis/nmdc_mga0cvxk30_centrifuge_krona.html", - "md5_checksum": "bb92f0d18280f32aacf482a43a841372", - "id": "nmdc:bb92f0d18280f32aacf482a43a841372", - "file_size_bytes": 2339227 - }, - { - "name": "Gp0127642_Kraken classification TSV report", - "description": "Kraken classification TSV report for Gp0127642", - "data_object_type": "Kraken2 Taxonomic Classification", - "url": "https://data.microbiomedata.org/data/nmdc:mga0cvxk30/ReadbasedAnalysis/nmdc_mga0cvxk30_kraken2_classification.tsv", - "md5_checksum": "2fddd33160498548fa73e95dfc304d1a", - "id": "nmdc:2fddd33160498548fa73e95dfc304d1a", - "file_size_bytes": 1630988221 - }, - { - "name": "Gp0127642_Kraken2 TSV report", - "description": "Kraken2 TSV report for Gp0127642", - "data_object_type": "Kraken2 Classification Report", - "url": "https://data.microbiomedata.org/data/nmdc:mga0cvxk30/ReadbasedAnalysis/nmdc_mga0cvxk30_kraken2_report.tsv", - "md5_checksum": "272e3daee292c6e284026ee95b72d290", - "id": "nmdc:272e3daee292c6e284026ee95b72d290", - "file_size_bytes": 659136 - }, - { - "name": "Gp0127642_Kraken2 Krona HTML report", - "description": "Kraken2 Krona HTML report for Gp0127642", - "data_object_type": "Kraken2 Krona Plot", - "url": "https://data.microbiomedata.org/data/nmdc:mga0cvxk30/ReadbasedAnalysis/nmdc_mga0cvxk30_kraken2_krona.html", - "md5_checksum": "bca8c2988929e7c176ec7b6609445db2", - "id": "nmdc:bca8c2988929e7c176ec7b6609445db2", - "file_size_bytes": 4013188 - }, - { - "name": "Gp0127642_Assembled contigs fasta", - "description": "Assembled contigs fasta for Gp0127642", - "data_object_type": "Assembly Contigs", - "url": "https://data.microbiomedata.org/data/nmdc:mga0cvxk30/assembly/nmdc_mga0cvxk30_contigs.fna", - "md5_checksum": "9c2c077dd8f43350b83c1c1ba853bbbc", - "id": "nmdc:9c2c077dd8f43350b83c1c1ba853bbbc", - "file_size_bytes": 44374790 - }, - { - "name": "Gp0127642_Assembled scaffold fasta", - "description": "Assembled scaffold fasta for Gp0127642", - "data_object_type": "Assembly Scaffolds", - "url": "https://data.microbiomedata.org/data/nmdc:mga0cvxk30/assembly/nmdc_mga0cvxk30_scaffolds.fna", - "md5_checksum": "9a3dfedede65ba1253a84264492e909c", - "id": "nmdc:9a3dfedede65ba1253a84264492e909c", - "file_size_bytes": 44064962 - }, - { - "name": "Gp0127642_Metagenome Contig Coverage Stats", - "description": "Metagenome Contig Coverage Stats for Gp0127642", - "url": "https://data.microbiomedata.org/data/nmdc:mga0cvxk30/assembly/nmdc_mga0cvxk30_covstats.txt", - "md5_checksum": "0772cb4473177c4e0046c7fd9cb65b27", - "id": "nmdc:0772cb4473177c4e0046c7fd9cb65b27", - "file_size_bytes": 8090415 - }, - { - "name": "Gp0127642_Assembled AGP file", - "description": "Assembled AGP file for Gp0127642", - "data_object_type": "Assembly AGP", - "url": "https://data.microbiomedata.org/data/nmdc:mga0cvxk30/assembly/nmdc_mga0cvxk30_assembly.agp", - "md5_checksum": "7d0ccfaeac8981d1300b8c17abed052b", - "id": "nmdc:7d0ccfaeac8981d1300b8c17abed052b", - "file_size_bytes": 7524067 - }, - { - "name": "Gp0127642_Metagenome Alignment BAM file", - "description": "Metagenome Alignment BAM file for Gp0127642", - "data_object_type": "Assembly Coverage BAM", - "url": "https://data.microbiomedata.org/data/nmdc:mga0cvxk30/assembly/nmdc_mga0cvxk30_pairedMapped_sorted.bam", - "md5_checksum": "a5b5801b13f062bc09a1405d0a01e6ac", - "id": "nmdc:a5b5801b13f062bc09a1405d0a01e6ac", - "file_size_bytes": 2461892983 - }, - { - "name": "Gp0127642_Protein FAA", - "description": "Protein FAA for Gp0127642", - "data_object_type": "Annotation Amino Acid FASTA", - "url": "https://data.microbiomedata.org/data/nmdc:mga0cvxk30/annotation/nmdc_mga0cvxk30_proteins.faa", - "md5_checksum": "e6270776fe3cb9f4e8e2958f9d8d6151", - "id": "nmdc:e6270776fe3cb9f4e8e2958f9d8d6151", - "file_size_bytes": 26699570 - }, - { - "name": "Gp0127642_Structural annotation GFF file", - "description": "Structural annotation GFF file for Gp0127642", - "data_object_type": "Structural Annotation GFF", - "url": "https://data.microbiomedata.org/data/nmdc:mga0cvxk30/annotation/nmdc_mga0cvxk30_structural_annotation.gff", - "md5_checksum": "f442172aba544a550f1e294bc615fd1d", - "id": "nmdc:f442172aba544a550f1e294bc615fd1d", - "file_size_bytes": 2505 - }, - { - "name": "Gp0127642_Functional annotation GFF file", - "description": "Functional annotation GFF file for Gp0127642", - "data_object_type": "Functional Annotation GFF", - "url": "https://data.microbiomedata.org/data/nmdc:mga0cvxk30/annotation/nmdc_mga0cvxk30_functional_annotation.gff", - "md5_checksum": "c0f7ac45facbbb7b74bb7ce11af11910", - "id": "nmdc:c0f7ac45facbbb7b74bb7ce11af11910", - "file_size_bytes": 32011364 - }, - { - "name": "Gp0127642_KO TSV file", - "description": "KO TSV file for Gp0127642", - "data_object_type": "Annotation KEGG Orthology", - "url": "https://data.microbiomedata.org/data/nmdc:mga0cvxk30/annotation/nmdc_mga0cvxk30_ko.tsv", - "md5_checksum": "63db41425c31ceda578a9e2a801dcb98", - "id": "nmdc:63db41425c31ceda578a9e2a801dcb98", - "file_size_bytes": 3660508 - }, - { - "name": "Gp0127642_EC TSV file", - "description": "EC TSV file for Gp0127642", - "data_object_type": "Annotation Enzyme Commission", - "url": "https://data.microbiomedata.org/data/nmdc:mga0cvxk30/annotation/nmdc_mga0cvxk30_ec.tsv", - "md5_checksum": "1cf9336281454b1747a86f9877f47ce8", - "id": "nmdc:1cf9336281454b1747a86f9877f47ce8", - "file_size_bytes": 2451794 - }, - { - "name": "Gp0127642_COG GFF file", - "description": "COG GFF file for Gp0127642", - "url": "https://data.microbiomedata.org/data/nmdc:mga0cvxk30/annotation/nmdc_mga0cvxk30_cog.gff", - "md5_checksum": "1cb7ab56a921ed80d21dad5b2d41c139", - "id": "nmdc:1cb7ab56a921ed80d21dad5b2d41c139", - "file_size_bytes": 18356139 - }, - { - "name": "Gp0127642_PFAM GFF file", - "description": "PFAM GFF file for Gp0127642", - "url": "https://data.microbiomedata.org/data/nmdc:mga0cvxk30/annotation/nmdc_mga0cvxk30_pfam.gff", - "md5_checksum": "157326e95b92fa83ab5755c22acf5837", - "id": "nmdc:157326e95b92fa83ab5755c22acf5837", - "file_size_bytes": 13044512 - }, - { - "name": "Gp0127642_TigrFam GFF file", - "description": "TigrFam GFF file for Gp0127642", - "url": "https://data.microbiomedata.org/data/nmdc:mga0cvxk30/annotation/nmdc_mga0cvxk30_tigrfam.gff", - "md5_checksum": "f001a06864e30347885e5a76ae89ae92", - "id": "nmdc:f001a06864e30347885e5a76ae89ae92", - "file_size_bytes": 1280537 - }, - { - "name": "Gp0127642_SMART GFF file", - "description": "SMART GFF file for Gp0127642", - "url": "https://data.microbiomedata.org/data/nmdc:mga0cvxk30/annotation/nmdc_mga0cvxk30_smart.gff", - "md5_checksum": "aa1e3207b62ca31a87da28ad4c3e6e92", - "id": "nmdc:aa1e3207b62ca31a87da28ad4c3e6e92", - "file_size_bytes": 4029242 - }, - { - "name": "Gp0127642_SuperFam GFF file", - "description": "SuperFam GFF file for Gp0127642", - "url": "https://data.microbiomedata.org/data/nmdc:mga0cvxk30/annotation/nmdc_mga0cvxk30_supfam.gff", - "md5_checksum": "5119eebdfebd43b4af243a61cc8e45eb", - "id": "nmdc:5119eebdfebd43b4af243a61cc8e45eb", - "file_size_bytes": 23011352 - }, - { - "name": "Gp0127642_Cath FunFam GFF file", - "description": "Cath FunFam GFF file for Gp0127642", - "url": "https://data.microbiomedata.org/data/nmdc:mga0cvxk30/annotation/nmdc_mga0cvxk30_cath_funfam.gff", - "md5_checksum": "4e6178de376e5e228c8b5c17ce3d0621", - "id": "nmdc:4e6178de376e5e228c8b5c17ce3d0621", - "file_size_bytes": 17039992 - }, - { - "name": "Gp0127642_KO_EC GFF file", - "description": "KO_EC GFF file for Gp0127642", - "url": "https://data.microbiomedata.org/data/nmdc:mga0cvxk30/annotation/nmdc_mga0cvxk30_ko_ec.gff", - "md5_checksum": "d89f026da3dfb4ee7d4884a47ce5739d", - "id": "nmdc:d89f026da3dfb4ee7d4884a47ce5739d", - "file_size_bytes": 11677748 - }, - { - "name": "Gp0127642_metabat2 bin checkm quality assessment result", - "description": "metabat2 bin checkm quality assessment result for Gp0127642", - "data_object_type": "CheckM Statistics", - "url": "https://data.microbiomedata.org/data/nmdc:mga0cvxk30/MAGs/nmdc_mga0cvxk30_checkm_qa.out", - "md5_checksum": "ac59797a394f8e4aa971e5c1d016e23e", - "id": "nmdc:ac59797a394f8e4aa971e5c1d016e23e", - "file_size_bytes": 765 - }, - { - "name": "Gp0127642_high-quality and medium-quality bins", - "description": "high-quality and medium-quality bins for Gp0127642", - "data_object_type": "Metagenome Bins", - "url": "https://data.microbiomedata.org/data/nmdc:mga0cvxk30/MAGs/nmdc_mga0cvxk30_hqmq_bin.zip", - "md5_checksum": "46858bd4b45bdaa4e4344820f3c54b3b", - "id": "nmdc:46858bd4b45bdaa4e4344820f3c54b3b", - "file_size_bytes": 472684 - }, - { - "_id": { - "$oid": "649b003d1ae706d7b5b14e9b" - }, - "description": "Assembled scaffold fasta for gold:Gp0127642", - "url": "https://data.microbiomedata.org/data/1781_100344/assembly/assembly_scaffolds.fna", - "file_size_bytes": 43652238, - "type": "nmdc:DataObject", - "id": "nmdc:6eca0963e47257569a60827999eeaaa8", - "name": "assembly_scaffolds.fna", - "data_object_type": "Assembly Scaffolds" - }, - { - "_id": { - "$oid": "649b003d1ae706d7b5b14e9c" - }, - "description": "Metagenome Contig Coverage Stats for gold:Gp0127642", - "url": "https://data.microbiomedata.org/data/1781_100344/assembly/mapping_stats.txt", - "file_size_bytes": 7677591, - "type": "nmdc:DataObject", - "id": "nmdc:8a499e5986fac773f987576c5c2ec223", - "name": "mapping_stats.txt", - "data_object_type": "Assembly Coverage Stats" - }, - { - "_id": { - "$oid": "649b003d1ae706d7b5b14e9d" - }, - "description": "Metagenome Alignment BAM file for gold:Gp0127642", - "url": "https://data.microbiomedata.org/data/1781_100344/assembly/pairedMapped_sorted.bam", - "file_size_bytes": 2429450297, - "type": "nmdc:DataObject", - "id": "nmdc:4938ea35089362aa1ee2e129706e1e8a", - "name": "pairedMapped_sorted.bam", - "data_object_type": "Assembly Coverage BAM" - }, - { - "_id": { - "$oid": "649b003d1ae706d7b5b14e9f" - }, - "description": "Assembled contigs fasta for gold:Gp0127642", - "url": "https://data.microbiomedata.org/data/1781_100344/assembly/assembly_contigs.fna", - "file_size_bytes": 43961966, - "type": "nmdc:DataObject", - "id": "nmdc:1e4e73c9d1faa4585cb3a266b5a6cd39", - "name": "assembly_contigs.fna", - "data_object_type": "Assembly Contigs" - }, - { - "_id": { - "$oid": "649b003d1ae706d7b5b14ea6" - }, - "description": "Assembled AGP file for gold:Gp0127642", - "url": "https://data.microbiomedata.org/data/1781_100344/assembly/assembly.agp", - "file_size_bytes": 6698219, - "type": "nmdc:DataObject", - "id": "nmdc:ff66a5de4da06400243924f54998c37d", - "name": "assembly.agp", - "data_object_type": "Assembly AGP" - }, - { - "_id": { - "$oid": "649b003d1ae706d7b5b15b8e" - }, - "id": "nmdc:57c57663cd0c81252303be99f87ec09e", - "name": "1781_100344.krona.html", - "description": "Gold:Gp0127642 KRONA plot HTML file", - "url": "https://data.microbiomedata.org/1781_100344/ReadbasedAnalysis/centrifuge/1781_100344.krona.html", - "file_size_bytes": 3385, - "data_object_type": "Centrifuge Krona Plot", - "type": "nmdc:DataObject" - }, - { - "_id": { - "$oid": "649b003d1ae706d7b5b15b92" - }, - "id": "nmdc:07632d9f02d85eee5b556a94acf251ef", - "name": "1781_100344.json", - "description": "Gold:Gp0127642 ReadbasedAnalysis result JSON file", - "url": "https://data.microbiomedata.org/1781_100344/ReadbasedAnalysis/1781_100344.json", - "file_size_bytes": 3385, - "type": "nmdc:DataObject" - }, - { - "_id": { - "$oid": "649b003f1ae706d7b5b16613" - }, - "id": "nmdc:231cfca4487ba7ec3ab476022e003ac7", - "name": "bins.unbinned.fa", - "description": "unbinned fasta file from metabat2 for gold:Gp0127642", - "file_size_bytes": 3446845, - "url": "https://data.microbiomedata.org/data/1781_100344/img_MAGs/bins.unbinned.fa", - "type": "nmdc:DataObject" - }, - { - "_id": { - "$oid": "649b003f1ae706d7b5b16615" - }, - "id": "nmdc:bceccf441b752fc5608db53515a9552e", - "name": "gold:Gp0127642.bins.1.fa", - "description": "metabat2 binned contig file for gold:Gp0127642", - "file_size_bytes": 276719, - "url": "https://data.microbiomedata.org/data/1781_100344/img_MAGs/metabat-bins/bins.1.fa", - "type": "nmdc:DataObject", - "data_object_type": "Metagenome Bins" - }, - { - "_id": { - "$oid": "649b003f1ae706d7b5b16616" - }, - "id": "nmdc:c26ef51c7ce6c79f8dad28e39f4238d7", - "name": "checkm_qa.out", - "description": "metabat2 bin checkm quality assessment result for gold:Gp0127642", - "file_size_bytes": 918, - "url": "https://data.microbiomedata.org/data/1781_100344/img_MAGs/checkm_qa.out", - "type": "nmdc:DataObject", - "data_object_type": "CheckM Statistics" - }, - { - "_id": { - "$oid": "649b003f1ae706d7b5b16618" - }, - "id": "nmdc:914e14cda452df07bf33be9bda12738c", - "name": "bins.tooShort.fa", - "description": "tooShort (< 3kb) filtered contigs fasta file by metaBat2 for gold:Gp0127642", - "file_size_bytes": 38677251, - "url": "https://data.microbiomedata.org/data/1781_100344/img_MAGs/bins.tooShort.fa", - "type": "nmdc:DataObject" - }, - { - "_id": { - "$oid": "649b003f1ae706d7b5b16619" - }, - "id": "nmdc:0d6107d2c1b4c0e3423f54ae1895aad9", - "name": "gold:Gp0127642.bins.2.fa", - "description": "metabat2 binned contig file for gold:Gp0127642", - "file_size_bytes": 743464, - "url": "https://data.microbiomedata.org/data/1781_100344/img_MAGs/metabat-bins/bins.2.fa", - "type": "nmdc:DataObject", - "data_object_type": "Metagenome Bins" - }, - { - "_id": { - "$oid": "649b00401ae706d7b5b16d29" - }, - "description": "Functional annotation GFF file for gold:Gp0127642", - "url": "https://data.microbiomedata.org/1781_100344/img_annotation/Ga0482228_functional_annotation.gff", - "md5_checksum": "657b2348517d3e169df0914f5d8a2d21", - "file_size_bytes": 3385, - "id": "nmdc:657b2348517d3e169df0914f5d8a2d21", - "name": "gold:Gp0127642_Functional annotation GFF file", - "data_object_type": "Functional Annotation GFF", - "type": "nmdc:DataObject" - }, - { - "_id": { - "$oid": "649b00401ae706d7b5b16d2c" - }, - "description": "Protein FAA for gold:Gp0127642", - "url": "https://data.microbiomedata.org/1781_100344/img_annotation/Ga0482228_proteins.faa", - "md5_checksum": "263acdd17bdb9ed72102610070da3d65", - "file_size_bytes": 3385, - "id": "nmdc:263acdd17bdb9ed72102610070da3d65", - "name": "gold:Gp0127642_Protein FAA", - "data_object_type": "Annotation Amino Acid FASTA", - "type": "nmdc:DataObject" - }, - { - "_id": { - "$oid": "649b00401ae706d7b5b16d2e" - }, - "description": "EC TSV File for gold:Gp0127642", - "url": "https://data.microbiomedata.org/1781_100344/img_annotation/Ga0482228_ec.tsv", - "md5_checksum": "4f8de602126deeb9ef60cf5f739d601a", - "file_size_bytes": 3385, - "id": "nmdc:4f8de602126deeb9ef60cf5f739d601a", - "name": "gold:Gp0127642_EC TSV File", - "data_object_type": "Annotation Enzyme Commission", - "type": "nmdc:DataObject" - }, - { - "_id": { - "$oid": "649b00401ae706d7b5b16d30" - }, - "description": "KO TSV File for gold:Gp0127642", - "url": "https://data.microbiomedata.org/1781_100344/img_annotation/Ga0482228_ko.tsv", - "md5_checksum": "65319d4c3ffdbf5dcdb2e2837aea8cf4", - "file_size_bytes": 3385, - "id": "nmdc:65319d4c3ffdbf5dcdb2e2837aea8cf4", - "name": "gold:Gp0127642_KO TSV File", - "data_object_type": "Annotation KEGG Orthology", - "type": "nmdc:DataObject" - }, - { - "_id": { - "$oid": "649b00401ae706d7b5b16d32" - }, - "description": "Structural annotation GFF file for gold:Gp0127642", - "url": "https://data.microbiomedata.org/1781_100344/img_annotation/Ga0482228_structural_annotation.gff", - "md5_checksum": "9e55f66e86f57487e23029b90a84c4a4", - "file_size_bytes": 3385, - "id": "nmdc:9e55f66e86f57487e23029b90a84c4a4", - "name": "gold:Gp0127642_Structural annotation GFF file", - "data_object_type": "Structural Annotation GFF", - "type": "nmdc:DataObject" - } - ], - "dissolving_activity_set": [], - "functional_annotation_set": [], - "genome_feature_set": [], - "mags_activity_set": [ - { - "_id": { - "$oid": "649b0052ec087f6bbab3471c" - }, - "has_input": [ - "nmdc:9c2c077dd8f43350b83c1c1ba853bbbc", - "nmdc:a5b5801b13f062bc09a1405d0a01e6ac", - "nmdc:c0f7ac45facbbb7b74bb7ce11af11910" - ], - "too_short_contig_num": 101249, - "part_of": [ - "nmdc:mga0cvxk30" - ], - "binned_contig_num": 213, - "git_url": "https://github.com/microbiomedata/metaG/releases/tag/0.1", - "has_output": [ - "nmdc:ac59797a394f8e4aa971e5c1d016e23e", - "nmdc:46858bd4b45bdaa4e4344820f3c54b3b" - ], - "was_informed_by": "gold:Gp0127642", - "input_contig_num": 103206, - "id": "nmdc:2de70a234a7c626b9f512b8aa3b73717", - "execution_resource": "NERSC-Cori", - "name": "MAGs Analysis Activity for nmdc:mga0cvxk30", - "mags_list": [ - { - "number_of_contig": 213, - "completeness": 71.17, - "bin_name": "bins.1", - "gene_count": 1914, - "bin_quality": "MQ", - "gtdbtk_species": "", - "gtdbtk_order": "Nitrososphaerales", - "num_16s": 0, - "gtdbtk_family": "Nitrososphaeraceae", - "gtdbtk_domain": "Archaea", - "contamination": 0.97, - "gtdbtk_class": "Nitrososphaeria", - "gtdbtk_phylum": "Crenarchaeota", - "num_5s": 1, - "num_23s": 0, - "gtdbtk_genus": "", - "num_t_rna": 31 - } - ], - "unbinned_contig_num": 1744, - "started_at_time": "2021-12-01T21:30:33Z", - "type": "nmdc:MAGsAnalysisActivity", - "ended_at_time": "2021-12-02T20:50:24+00:00" - } - ], - "material_sample_set": [], - "material_sampling_activity_set": [], - "metabolomics_analysis_activity_set": [], - "metagenome_annotation_activity_set": [ - { - "_id": { - "$oid": "649b005bbf2caae0415ef9ba" - }, - "has_input": [ - "nmdc:9c2c077dd8f43350b83c1c1ba853bbbc" - ], - "part_of": [ - "nmdc:mga0cvxk30" - ], - "git_url": "https://github.com/microbiomedata/metaG/releases/tag/0.1", - "has_output": [ - "nmdc:e6270776fe3cb9f4e8e2958f9d8d6151", - "nmdc:f442172aba544a550f1e294bc615fd1d", - "nmdc:c0f7ac45facbbb7b74bb7ce11af11910", - "nmdc:63db41425c31ceda578a9e2a801dcb98", - "nmdc:1cf9336281454b1747a86f9877f47ce8", - "nmdc:1cb7ab56a921ed80d21dad5b2d41c139", - "nmdc:157326e95b92fa83ab5755c22acf5837", - "nmdc:f001a06864e30347885e5a76ae89ae92", - "nmdc:aa1e3207b62ca31a87da28ad4c3e6e92", - "nmdc:5119eebdfebd43b4af243a61cc8e45eb", - "nmdc:4e6178de376e5e228c8b5c17ce3d0621", - "nmdc:d89f026da3dfb4ee7d4884a47ce5739d" - ], - "was_informed_by": "gold:Gp0127642", - "id": "nmdc:2de70a234a7c626b9f512b8aa3b73717", - "execution_resource": "NERSC-Cori", - "name": "Annotation Activity for nmdc:mga0cvxk30", - "started_at_time": "2021-12-01T21:30:33Z", - "type": "nmdc:MetagenomeAnnotationActivity", - "ended_at_time": "2021-12-02T20:50:24+00:00" - } - ], - "metagenome_assembly_set": [ - { - "_id": { - "$oid": "649b005f2ca5ee4adb139fa7" - }, - "has_input": [ - "nmdc:603166d1e0da357d356a2029215d76ea" - ], - "part_of": [ - "nmdc:mga0cvxk30" - ], - "ctg_logsum": 50653, - "scaf_logsum": 50816, - "gap_pct": 0.00106, - "git_url": "https://github.com/microbiomedata/metaG/releases/tag/0.1", - "has_output": [ - "nmdc:9c2c077dd8f43350b83c1c1ba853bbbc", - "nmdc:9a3dfedede65ba1253a84264492e909c", - "nmdc:0772cb4473177c4e0046c7fd9cb65b27", - "nmdc:7d0ccfaeac8981d1300b8c17abed052b", - "nmdc:a5b5801b13f062bc09a1405d0a01e6ac" - ], - "asm_score": 7.947, - "was_informed_by": "gold:Gp0127642", - "ctg_powsum": 5974.26, - "scaf_max": 27286, - "id": "nmdc:2de70a234a7c626b9f512b8aa3b73717", - "scaf_powsum": 5993.216, - "execution_resource": "NERSC-Cori", - "contigs": 103206, - "name": "Assembly Activity for nmdc:mga0cvxk30", - "ctg_max": 27286, - "gc_std": 0.1028, - "gc_avg": 0.60377, - "contig_bp": 40567169, - "started_at_time": "2021-12-01T21:30:33Z", - "scaf_bp": 40567599, - "type": "nmdc:MetagenomeAssembly", - "scaffolds": 103181, - "ended_at_time": "2021-12-02T20:50:24+00:00", - "ctg_l50": 348, - "ctg_l90": 283, - "ctg_n50": 35487, - "ctg_n90": 88775, - "scaf_l50": 348, - "scaf_l90": 283, - "scaf_n50": 35472, - "scaf_n90": 88751 - } - ], - "metagenome_sequencing_activity_set": [], - "metaproteomics_analysis_activity_set": [], - "metatranscriptome_activity_set": [], - "nom_analysis_activity_set": [], - "omics_processing_set": [ - { - "_id": { - "$oid": "649b009773e8249959349b59" - }, - "id": "nmdc:omprc-11-p21wp875", - "name": "Riverbed sediment microbial communities from areas with no vegetation in Columbia River, Washington, USA - GW-RW N2_0_30", - "description": "Riverbed sediment samples were collected from an area with no vegetation in a hyporheic zone (subsurface groundwater - surface water mixing zone)", - "has_input": [ - "nmdc:bsm-11-qpve9v25" - ], - "has_output": [ - "jgi:574fe0a67ded5e3df1ee148d" - ], - "part_of": [ - "nmdc:sty-11-aygzgv51" - ], - "add_date": "2016-01-11", - "mod_date": "2021-06-15", - "ncbi_project_name": "Riverbed sediment microbial communities from areas with no vegetation in Columbia River, Washington, USA - GW-RW N2_0_30", - "omics_type": { - "has_raw_value": "Metagenome" - }, - "principal_investigator": { - "has_raw_value": "James Stegen" - }, - "processing_institution": "JGI", - "type": "nmdc:OmicsProcessing", - "gold_sequencing_project_identifiers": [ - "gold:Gp0127642" - ] - } - ], - "reaction_activity_set": [], - "read_qc_analysis_activity_set": [ - { - "_id": { - "$oid": "649b009d6bdd4fd20273c874" - }, - "has_input": [ - "nmdc:ac3a54ab71fd4e15763cd3e01c7a91bf" - ], - "part_of": [ - "nmdc:mga0cvxk30" - ], - "git_url": "https://github.com/microbiomedata/metaG/releases/tag/0.1", - "has_output": [ - "nmdc:603166d1e0da357d356a2029215d76ea", - "nmdc:639d9630c859c9b2f6f7a2eff1e1a863" - ], - "was_informed_by": "gold:Gp0127642", - "input_read_count": 28024960, - "output_read_bases": 4095196321, - "id": "nmdc:2de70a234a7c626b9f512b8aa3b73717", - "execution_resource": "NERSC-Cori", - "input_read_bases": 4231768960, - "name": "Read QC Activity for nmdc:mga0cvxk30", - "output_read_count": 27378404, - "started_at_time": "2021-12-01T21:30:33Z", - "type": "nmdc:ReadQCAnalysisActivity", - "ended_at_time": "2021-12-02T20:50:24+00:00" - } - ], - "read_based_taxonomy_analysis_activity_set": [ - { - "_id": { - "$oid": "649b009bff710ae353f8cf3b" - }, - "has_input": [ - "nmdc:603166d1e0da357d356a2029215d76ea" - ], - "git_url": "https://github.com/microbiomedata/metaG/releases/tag/0.1", - "has_output": [ - "nmdc:bc7f6a9435c3a9aaca7ce9efe9d16e41", - "nmdc:0a079e34648ce23b0837dff31e2be5df", - "nmdc:f19bf1723f0f0e9f2158b137d2618b08", - "nmdc:81fc62d01a53a7ab5037829a158f0b64", - "nmdc:05cc05eefdcb0d7bac19031619244a4b", - "nmdc:bb92f0d18280f32aacf482a43a841372", - "nmdc:2fddd33160498548fa73e95dfc304d1a", - "nmdc:272e3daee292c6e284026ee95b72d290", - "nmdc:bca8c2988929e7c176ec7b6609445db2" - ], - "was_informed_by": "gold:Gp0127642", - "id": "nmdc:2de70a234a7c626b9f512b8aa3b73717", - "execution_resource": "NERSC-Cori", - "name": "ReadBased Analysis Activity for nmdc:mga0cvxk30", - "started_at_time": "2021-12-01T21:30:33Z", - "type": "nmdc:ReadbasedAnalysis", - "ended_at_time": "2021-12-02T20:50:24+00:00" - } - ], - "study_set": [], - "field_research_site_set": [], - "collecting_biosamples_from_site_set": [], - "date_created": null, - "etl_software_version": null, - "pooling_set": [] - }, - { - "functional_annotation_agg": [], - "library_preparation_set": [], - "processed_sample_set": [], - "extraction_set": [], - "activity_set": [], - "biosample_set": [], - "data_object_set": [ - { - "description": "Raw sequencer read data", - "file_size_bytes": 2463257736, - "type": "nmdc:DataObject", - "id": "jgi:574fde867ded5e3df1ee1420", - "name": "10533.2.165322.TTCGTAC-GGTACGA.fastq.gz" - }, - { - "name": "Gp0127646_Filtered Reads", - "description": "Filtered Reads for Gp0127646", - "data_object_type": "Filtered Sequencing Reads", - "url": "https://data.microbiomedata.org/data/nmdc:mga0dm4q17/qa/nmdc_mga0dm4q17_filtered.fastq.gz", - "md5_checksum": "208a3777ef0b99408f0d5832dee576e0", - "id": "nmdc:208a3777ef0b99408f0d5832dee576e0", - "file_size_bytes": 2209739723 - }, - { - "name": "Gp0127646_Filtered Stats", - "description": "Filtered Stats for Gp0127646", - "data_object_type": "QC Statistics", - "url": "https://data.microbiomedata.org/data/nmdc:mga0dm4q17/qa/nmdc_mga0dm4q17_filterStats.txt", - "md5_checksum": "8533a56006bdc1841b6fc16e99b6a84a", - "id": "nmdc:8533a56006bdc1841b6fc16e99b6a84a", - "file_size_bytes": 291 - }, - { - "name": "Gp0127646_Gottcha2 TSV report", - "description": "Gottcha2 TSV report for Gp0127646", - "url": "https://data.microbiomedata.org/data/nmdc:mga0dm4q17/ReadbasedAnalysis/nmdc_mga0dm4q17_gottcha2_report.tsv", - "md5_checksum": "3e0598df41941463bac0fdec5df29f55", - "id": "nmdc:3e0598df41941463bac0fdec5df29f55", - "file_size_bytes": 4650 - }, - { - "name": "Gp0127646_Gottcha2 full TSV report", - "description": "Gottcha2 full TSV report for Gp0127646", - "url": "https://data.microbiomedata.org/data/nmdc:mga0dm4q17/ReadbasedAnalysis/nmdc_mga0dm4q17_gottcha2_report_full.tsv", - "md5_checksum": "1a625b148d8f6d9fe9aeab6cfb67df6c", - "id": "nmdc:1a625b148d8f6d9fe9aeab6cfb67df6c", - "file_size_bytes": 877659 - }, - { - "name": "Gp0127646_Gottcha2 Krona HTML report", - "description": "Gottcha2 Krona HTML report for Gp0127646", - "data_object_type": "GOTTCHA2 Krona Plot", - "url": "https://data.microbiomedata.org/data/nmdc:mga0dm4q17/ReadbasedAnalysis/nmdc_mga0dm4q17_gottcha2_krona.html", - "md5_checksum": "bc8e157195d042d7207d67b4982fea96", - "id": "nmdc:bc8e157195d042d7207d67b4982fea96", - "file_size_bytes": 236676 - }, - { - "name": "Gp0127646_Centrifuge classification TSV report", - "description": "Centrifuge classification TSV report for Gp0127646", - "data_object_type": "Centrifuge Taxonomic Classification", - "url": "https://data.microbiomedata.org/data/nmdc:mga0dm4q17/ReadbasedAnalysis/nmdc_mga0dm4q17_centrifuge_classification.tsv", - "md5_checksum": "a8fc683bb9b3aba316cb605c5fb591ec", - "id": "nmdc:a8fc683bb9b3aba316cb605c5fb591ec", - "file_size_bytes": 1901493736 - }, - { - "name": "Gp0127646_Centrifuge TSV report", - "description": "Centrifuge TSV report for Gp0127646", - "data_object_type": "Centrifuge Classification Report", - "url": "https://data.microbiomedata.org/data/nmdc:mga0dm4q17/ReadbasedAnalysis/nmdc_mga0dm4q17_centrifuge_report.tsv", - "md5_checksum": "b5fe0189dbf00662d78cc55b8b0cc803", - "id": "nmdc:b5fe0189dbf00662d78cc55b8b0cc803", - "file_size_bytes": 256274 - }, - { - "name": "Gp0127646_Centrifuge Krona HTML report", - "description": "Centrifuge Krona HTML report for Gp0127646", - "data_object_type": "Centrifuge Krona Plot", - "url": "https://data.microbiomedata.org/data/nmdc:mga0dm4q17/ReadbasedAnalysis/nmdc_mga0dm4q17_centrifuge_krona.html", - "md5_checksum": "cd10cca62774e66f60d60380ee18132e", - "id": "nmdc:cd10cca62774e66f60d60380ee18132e", - "file_size_bytes": 2333722 - }, - { - "name": "Gp0127646_Kraken classification TSV report", - "description": "Kraken classification TSV report for Gp0127646", - "data_object_type": "Kraken2 Taxonomic Classification", - "url": "https://data.microbiomedata.org/data/nmdc:mga0dm4q17/ReadbasedAnalysis/nmdc_mga0dm4q17_kraken2_classification.tsv", - "md5_checksum": "b13ee2ee52d15c3669aecd2e913f2658", - "id": "nmdc:b13ee2ee52d15c3669aecd2e913f2658", - "file_size_bytes": 1534616616 - }, - { - "name": "Gp0127646_Kraken2 TSV report", - "description": "Kraken2 TSV report for Gp0127646", - "data_object_type": "Kraken2 Classification Report", - "url": "https://data.microbiomedata.org/data/nmdc:mga0dm4q17/ReadbasedAnalysis/nmdc_mga0dm4q17_kraken2_report.tsv", - "md5_checksum": "09a2d722810b3d90207bc4cfa626133b", - "id": "nmdc:09a2d722810b3d90207bc4cfa626133b", - "file_size_bytes": 663507 - }, - { - "name": "Gp0127646_Kraken2 Krona HTML report", - "description": "Kraken2 Krona HTML report for Gp0127646", - "data_object_type": "Kraken2 Krona Plot", - "url": "https://data.microbiomedata.org/data/nmdc:mga0dm4q17/ReadbasedAnalysis/nmdc_mga0dm4q17_kraken2_krona.html", - "md5_checksum": "c3a8d9f48266a43ad74fc581132e2bba", - "id": "nmdc:c3a8d9f48266a43ad74fc581132e2bba", - "file_size_bytes": 4031909 - }, - { - "name": "Gp0127646_Assembled contigs fasta", - "description": "Assembled contigs fasta for Gp0127646", - "data_object_type": "Assembly Contigs", - "url": "https://data.microbiomedata.org/data/nmdc:mga0dm4q17/assembly/nmdc_mga0dm4q17_contigs.fna", - "md5_checksum": "c2301a45b987661e5e6f32eaf6928003", - "id": "nmdc:c2301a45b987661e5e6f32eaf6928003", - "file_size_bytes": 33070670 - }, - { - "name": "Gp0127646_Assembled scaffold fasta", - "description": "Assembled scaffold fasta for Gp0127646", - "data_object_type": "Assembly Scaffolds", - "url": "https://data.microbiomedata.org/data/nmdc:mga0dm4q17/assembly/nmdc_mga0dm4q17_scaffolds.fna", - "md5_checksum": "6233a266773aa722d6a3c2556b0c1cb5", - "id": "nmdc:6233a266773aa722d6a3c2556b0c1cb5", - "file_size_bytes": 32825592 - }, - { - "name": "Gp0127646_Metagenome Contig Coverage Stats", - "description": "Metagenome Contig Coverage Stats for Gp0127646", - "url": "https://data.microbiomedata.org/data/nmdc:mga0dm4q17/assembly/nmdc_mga0dm4q17_covstats.txt", - "md5_checksum": "c5460716df8c1d47e081837c8cc5d281", - "id": "nmdc:c5460716df8c1d47e081837c8cc5d281", - "file_size_bytes": 6393678 - }, - { - "name": "Gp0127646_Assembled AGP file", - "description": "Assembled AGP file for Gp0127646", - "data_object_type": "Assembly AGP", - "url": "https://data.microbiomedata.org/data/nmdc:mga0dm4q17/assembly/nmdc_mga0dm4q17_assembly.agp", - "md5_checksum": "9437132a95f356e7cc6513f862f38f81", - "id": "nmdc:9437132a95f356e7cc6513f862f38f81", - "file_size_bytes": 5942403 - }, - { - "name": "Gp0127646_Metagenome Alignment BAM file", - "description": "Metagenome Alignment BAM file for Gp0127646", - "data_object_type": "Assembly Coverage BAM", - "url": "https://data.microbiomedata.org/data/nmdc:mga0dm4q17/assembly/nmdc_mga0dm4q17_pairedMapped_sorted.bam", - "md5_checksum": "0d0ee85be3a079b0eba5bb872c842f7d", - "id": "nmdc:0d0ee85be3a079b0eba5bb872c842f7d", - "file_size_bytes": 2346665933 - }, - { - "name": "Gp0127646_Protein FAA", - "description": "Protein FAA for Gp0127646", - "data_object_type": "Annotation Amino Acid FASTA", - "url": "https://data.microbiomedata.org/data/nmdc:mga0dm4q17/annotation/nmdc_mga0dm4q17_proteins.faa", - "md5_checksum": "b907352a805a209c5b7e10f6ce9e3ceb", - "id": "nmdc:b907352a805a209c5b7e10f6ce9e3ceb", - "file_size_bytes": 18886480 - }, - { - "name": "Gp0127646_Structural annotation GFF file", - "description": "Structural annotation GFF file for Gp0127646", - "data_object_type": "Structural Annotation GFF", - "url": "https://data.microbiomedata.org/data/nmdc:mga0dm4q17/annotation/nmdc_mga0dm4q17_structural_annotation.gff", - "md5_checksum": "769c049c4b3301900de0c62666e8c297", - "id": "nmdc:769c049c4b3301900de0c62666e8c297", - "file_size_bytes": 2883 - }, - { - "name": "Gp0127646_Functional annotation GFF file", - "description": "Functional annotation GFF file for Gp0127646", - "data_object_type": "Functional Annotation GFF", - "url": "https://data.microbiomedata.org/data/nmdc:mga0dm4q17/annotation/nmdc_mga0dm4q17_functional_annotation.gff", - "md5_checksum": "3dcb9f83f3921fc7f3e7a2050584cc77", - "id": "nmdc:3dcb9f83f3921fc7f3e7a2050584cc77", - "file_size_bytes": 23048582 - }, - { - "name": "Gp0127646_KO TSV file", - "description": "KO TSV file for Gp0127646", - "data_object_type": "Annotation KEGG Orthology", - "url": "https://data.microbiomedata.org/data/nmdc:mga0dm4q17/annotation/nmdc_mga0dm4q17_ko.tsv", - "md5_checksum": "1b81cc955690e81f18c2bc1533e7ee89", - "id": "nmdc:1b81cc955690e81f18c2bc1533e7ee89", - "file_size_bytes": 2643070 - }, - { - "name": "Gp0127646_EC TSV file", - "description": "EC TSV file for Gp0127646", - "data_object_type": "Annotation Enzyme Commission", - "url": "https://data.microbiomedata.org/data/nmdc:mga0dm4q17/annotation/nmdc_mga0dm4q17_ec.tsv", - "md5_checksum": "dd94ee1dbd107bf14e8be72b8f546290", - "id": "nmdc:dd94ee1dbd107bf14e8be72b8f546290", - "file_size_bytes": 1742846 - }, - { - "name": "Gp0127646_COG GFF file", - "description": "COG GFF file for Gp0127646", - "url": "https://data.microbiomedata.org/data/nmdc:mga0dm4q17/annotation/nmdc_mga0dm4q17_cog.gff", - "md5_checksum": "e271f0ef1c44b514304c35a7913751e3", - "id": "nmdc:e271f0ef1c44b514304c35a7913751e3", - "file_size_bytes": 12090733 - }, - { - "name": "Gp0127646_PFAM GFF file", - "description": "PFAM GFF file for Gp0127646", - "url": "https://data.microbiomedata.org/data/nmdc:mga0dm4q17/annotation/nmdc_mga0dm4q17_pfam.gff", - "md5_checksum": "b3d3f1ef308b7555cbea077cc00dbc95", - "id": "nmdc:b3d3f1ef308b7555cbea077cc00dbc95", - "file_size_bytes": 8631888 - }, - { - "name": "Gp0127646_TigrFam GFF file", - "description": "TigrFam GFF file for Gp0127646", - "url": "https://data.microbiomedata.org/data/nmdc:mga0dm4q17/annotation/nmdc_mga0dm4q17_tigrfam.gff", - "md5_checksum": "d18d6a67ad7e17514b0c4b502ea69ac0", - "id": "nmdc:d18d6a67ad7e17514b0c4b502ea69ac0", - "file_size_bytes": 840759 - }, - { - "name": "Gp0127646_SMART GFF file", - "description": "SMART GFF file for Gp0127646", - "url": "https://data.microbiomedata.org/data/nmdc:mga0dm4q17/annotation/nmdc_mga0dm4q17_smart.gff", - "md5_checksum": "62920faf364dea6a1d028878d49a2989", - "id": "nmdc:62920faf364dea6a1d028878d49a2989", - "file_size_bytes": 2684392 - }, - { - "name": "Gp0127646_SuperFam GFF file", - "description": "SuperFam GFF file for Gp0127646", - "url": "https://data.microbiomedata.org/data/nmdc:mga0dm4q17/annotation/nmdc_mga0dm4q17_supfam.gff", - "md5_checksum": "757bd3295026410cb03690d4dae95935", - "id": "nmdc:757bd3295026410cb03690d4dae95935", - "file_size_bytes": 15569120 - }, - { - "name": "Gp0127646_Cath FunFam GFF file", - "description": "Cath FunFam GFF file for Gp0127646", - "url": "https://data.microbiomedata.org/data/nmdc:mga0dm4q17/annotation/nmdc_mga0dm4q17_cath_funfam.gff", - "md5_checksum": "19eef79eefc81cbe6d7d4586d8be5d23", - "id": "nmdc:19eef79eefc81cbe6d7d4586d8be5d23", - "file_size_bytes": 11479737 - }, - { - "name": "Gp0127646_KO_EC GFF file", - "description": "KO_EC GFF file for Gp0127646", - "url": "https://data.microbiomedata.org/data/nmdc:mga0dm4q17/annotation/nmdc_mga0dm4q17_ko_ec.gff", - "md5_checksum": "fc8598d9d6926e6ac8bb9c488016734a", - "id": "nmdc:fc8598d9d6926e6ac8bb9c488016734a", - "file_size_bytes": 8425263 - }, - { - "name": "gold:Gp0452679_metabat2 bin checkm quality assessment result", - "description": "metabat2 bin checkm quality assessment result for gold:Gp0452679", - "data_object_type": "CheckM Statistics", - "url": "https://data.microbiomedata.org/data/nmdc:mga0fsjy84/MAGs/nmdc_mga0fsjy84_checkm_qa.out", - "md5_checksum": "d41d8cd98f00b204e9800998ecf8427e", - "id": "nmdc:d41d8cd98f00b204e9800998ecf8427e", - "file_size_bytes": 0 - }, - { - "name": "Gp0127646_tooShort (< 3kb) filtered contigs fasta file by metaBat2", - "description": "tooShort (< 3kb) filtered contigs fasta file by metaBat2 for Gp0127646", - "url": "https://data.microbiomedata.org/data/nmdc:mga0dm4q17/MAGs/nmdc_mga0dm4q17_bins.tooShort.fa", - "md5_checksum": "de605dd3ecac26d6a35740c09448b171", - "id": "nmdc:de605dd3ecac26d6a35740c09448b171", - "file_size_bytes": 31210054 - }, - { - "name": "Gp0127646_unbinned fasta file from metabat2", - "description": "unbinned fasta file from metabat2 for Gp0127646", - "url": "https://data.microbiomedata.org/data/nmdc:mga0dm4q17/MAGs/nmdc_mga0dm4q17_bins.unbinned.fa", - "md5_checksum": "9392ab9668a1c347f010004c2f0cc8db", - "id": "nmdc:9392ab9668a1c347f010004c2f0cc8db", - "file_size_bytes": 1595698 - }, - { - "name": "Gp0127647_metabat2 bin checkm quality assessment result", - "description": "metabat2 bin checkm quality assessment result for Gp0127647", - "data_object_type": "CheckM Statistics", - "url": "https://data.microbiomedata.org/data/nmdc:mga0g0e588/MAGs/nmdc_mga0g0e588_checkm_qa.out", - "md5_checksum": "e8bdcd7b113a14b29a3026b73cd18c20", - "id": "nmdc:e8bdcd7b113a14b29a3026b73cd18c20", - "file_size_bytes": 775 - }, - { - "name": "Gp0127646_high-quality and medium-quality bins", - "description": "high-quality and medium-quality bins for Gp0127646", - "data_object_type": "Metagenome Bins", - "url": "https://data.microbiomedata.org/data/nmdc:mga0dm4q17/MAGs/nmdc_mga0dm4q17_hqmq_bin.zip", - "md5_checksum": "d75d0006d0009e7e14f2ad8044a3cbfb", - "id": "nmdc:d75d0006d0009e7e14f2ad8044a3cbfb", - "file_size_bytes": 182 - }, - { - "name": "Gp0127646_metabat2 bins", - "description": "metabat2 bins for Gp0127646", - "url": "https://data.microbiomedata.org/data/nmdc:mga0dm4q17/MAGs/nmdc_mga0dm4q17_metabat_bin.zip", - "md5_checksum": "17e9a7763327f2b5d3f841079c2f68d8", - "id": "nmdc:17e9a7763327f2b5d3f841079c2f68d8", - "file_size_bytes": 82006 - }, - { - "_id": { - "$oid": "649b003d1ae706d7b5b14ea9" - }, - "description": "Assembled contigs fasta for gold:Gp0127646", - "url": "https://data.microbiomedata.org/data/1781_100348/assembly/assembly_contigs.fna", - "file_size_bytes": 32744062, - "type": "nmdc:DataObject", - "id": "nmdc:cfb56be5f505927c085fb3105561b578", - "name": "assembly_contigs.fna", - "data_object_type": "Assembly Contigs" - }, - { - "_id": { - "$oid": "649b003d1ae706d7b5b14eab" - }, - "description": "Assembled scaffold fasta for gold:Gp0127646", - "url": "https://data.microbiomedata.org/data/1781_100348/assembly/assembly_scaffolds.fna", - "file_size_bytes": 32499084, - "type": "nmdc:DataObject", - "id": "nmdc:fda96a730e2bfe0ced5e4ff057aae5d3", - "name": "assembly_scaffolds.fna", - "data_object_type": "Assembly Scaffolds" - }, - { - "_id": { - "$oid": "649b003d1ae706d7b5b14eac" - }, - "description": "Assembled AGP file for gold:Gp0127646", - "url": "https://data.microbiomedata.org/data/1781_100348/assembly/assembly.agp", - "file_size_bytes": 5288971, - "type": "nmdc:DataObject", - "id": "nmdc:5cd6af80a19658f0fa7b8229f6ba8242", - "name": "assembly.agp", - "data_object_type": "Assembly AGP" - }, - { - "_id": { - "$oid": "649b003d1ae706d7b5b14ead" - }, - "description": "Metagenome Alignment BAM file for gold:Gp0127646", - "url": "https://data.microbiomedata.org/data/1781_100348/assembly/pairedMapped_sorted.bam", - "file_size_bytes": 2314898025, - "type": "nmdc:DataObject", - "id": "nmdc:3c03b1fab29a1825d07195ca4992fb31", - "name": "pairedMapped_sorted.bam", - "data_object_type": "Assembly Coverage BAM" - }, - { - "_id": { - "$oid": "649b003d1ae706d7b5b14eb9" - }, - "description": "Metagenome Contig Coverage Stats for gold:Gp0127646", - "url": "https://data.microbiomedata.org/data/1781_100348/assembly/mapping_stats.txt", - "file_size_bytes": 6067070, - "type": "nmdc:DataObject", - "id": "nmdc:e17989e7cc9952a4f9d8321328229316", - "name": "mapping_stats.txt", - "data_object_type": "Assembly Coverage Stats" - }, - { - "_id": { - "$oid": "649b003d1ae706d7b5b15bb5" - }, - "id": "nmdc:bfb473bea17c38bdb8fc5e394c1021b7", - "name": "1781_100348.json", - "description": "Gold:Gp0127646 ReadbasedAnalysis result JSON file", - "url": "https://data.microbiomedata.org/1781_100348/ReadbasedAnalysis/1781_100348.json", - "file_size_bytes": 3385, - "type": "nmdc:DataObject" - }, - { - "_id": { - "$oid": "649b003d1ae706d7b5b15bb6" - }, - "id": "nmdc:26bb18215b48754fcd58dbb38e8d01e1", - "name": "1781_100348.krona.html", - "description": "Gold:Gp0127646 KRONA plot HTML file", - "url": "https://data.microbiomedata.org/1781_100348/ReadbasedAnalysis/centrifuge/1781_100348.krona.html", - "file_size_bytes": 3385, - "data_object_type": "Centrifuge Krona Plot", - "type": "nmdc:DataObject" - }, - { - "_id": { - "$oid": "649b003f1ae706d7b5b1662b" - }, - "id": "nmdc:10a6a1f1b3bf7cc4960ad08e0914edc7", - "name": "bins.tooShort.fa", - "description": "tooShort (< 3kb) filtered contigs fasta file by metaBat2 for gold:Gp0127646", - "file_size_bytes": 30170371, - "url": "https://data.microbiomedata.org/data/1781_100348/img_MAGs/bins.tooShort.fa", - "type": "nmdc:DataObject" - }, - { - "_id": { - "$oid": "649b003f1ae706d7b5b16631" - }, - "id": "nmdc:dc8ebe9a5e1a8e38d88a63afbc535046", - "name": "bins.unbinned.fa", - "description": "unbinned fasta file from metabat2 for gold:Gp0127646", - "file_size_bytes": 1849966, - "url": "https://data.microbiomedata.org/data/1781_100348/img_MAGs/bins.unbinned.fa", - "type": "nmdc:DataObject" - }, - { - "_id": { - "$oid": "649b00401ae706d7b5b16d27" - }, - "description": "KO TSV File for gold:Gp0127646", - "url": "https://data.microbiomedata.org/1781_100348/img_annotation/Ga0482224_ko.tsv", - "md5_checksum": "5cd2f970cbb8eb5d8e52ac7a08bfb9a3", - "file_size_bytes": 3385, - "id": "nmdc:5cd2f970cbb8eb5d8e52ac7a08bfb9a3", - "name": "gold:Gp0127646_KO TSV File", - "data_object_type": "Annotation KEGG Orthology", - "type": "nmdc:DataObject" - }, - { - "_id": { - "$oid": "649b00401ae706d7b5b16d28" - }, - "description": "EC TSV File for gold:Gp0127646", - "url": "https://data.microbiomedata.org/1781_100348/img_annotation/Ga0482224_ec.tsv", - "md5_checksum": "9d87100ad8b6278b4a442c4686d7aef7", - "file_size_bytes": 3385, - "id": "nmdc:9d87100ad8b6278b4a442c4686d7aef7", - "name": "gold:Gp0127646_EC TSV File", - "data_object_type": "Annotation Enzyme Commission", - "type": "nmdc:DataObject" - }, - { - "_id": { - "$oid": "649b00401ae706d7b5b16d2b" - }, - "description": "Functional annotation GFF file for gold:Gp0127646", - "url": "https://data.microbiomedata.org/1781_100348/img_annotation/Ga0482224_functional_annotation.gff", - "md5_checksum": "c0858f9a847f241ed28f454adb580bf4", - "file_size_bytes": 3385, - "id": "nmdc:c0858f9a847f241ed28f454adb580bf4", - "name": "gold:Gp0127646_Functional annotation GFF file", - "data_object_type": "Functional Annotation GFF", - "type": "nmdc:DataObject" - }, - { - "_id": { - "$oid": "649b00401ae706d7b5b16d2f" - }, - "description": "Protein FAA for gold:Gp0127646", - "url": "https://data.microbiomedata.org/1781_100348/img_annotation/Ga0482224_proteins.faa", - "md5_checksum": "646648c11733f7ab7ea23008729360ce", - "file_size_bytes": 3385, - "id": "nmdc:646648c11733f7ab7ea23008729360ce", - "name": "gold:Gp0127646_Protein FAA", - "data_object_type": "Annotation Amino Acid FASTA", - "type": "nmdc:DataObject" - }, - { - "_id": { - "$oid": "649b00401ae706d7b5b16d35" - }, - "description": "Structural annotation GFF file for gold:Gp0127646", - "url": "https://data.microbiomedata.org/1781_100348/img_annotation/Ga0482224_structural_annotation.gff", - "md5_checksum": "94574634e1ccfe241af033259e27df1a", - "file_size_bytes": 3385, - "id": "nmdc:94574634e1ccfe241af033259e27df1a", - "name": "gold:Gp0127646_Structural annotation GFF file", - "data_object_type": "Structural Annotation GFF", - "type": "nmdc:DataObject" - } - ], - "dissolving_activity_set": [], - "functional_annotation_set": [], - "genome_feature_set": [], - "mags_activity_set": [ - { - "_id": { - "$oid": "649b0052ec087f6bbab34715" - }, - "has_input": [ - "nmdc:c2301a45b987661e5e6f32eaf6928003", - "nmdc:0d0ee85be3a079b0eba5bb872c842f7d", - "nmdc:3dcb9f83f3921fc7f3e7a2050584cc77" - ], - "too_short_contig_num": 80674, - "part_of": [ - "nmdc:mga0dm4q17" - ], - "binned_contig_num": 20, - "git_url": "https://github.com/microbiomedata/metaG/releases/tag/0.1", - "has_output": [ - "nmdc:d41d8cd98f00b204e9800998ecf8427e", - "nmdc:de605dd3ecac26d6a35740c09448b171", - "nmdc:9392ab9668a1c347f010004c2f0cc8db", - "nmdc:e8bdcd7b113a14b29a3026b73cd18c20", - "nmdc:d75d0006d0009e7e14f2ad8044a3cbfb", - "nmdc:17e9a7763327f2b5d3f841079c2f68d8" - ], - "was_informed_by": "gold:Gp0127646", - "input_contig_num": 81652, - "id": "nmdc:1a0a2a1b5db9455e9dbbacf102059180", - "execution_resource": "NERSC-Cori", - "name": "MAGs Analysis Activity for nmdc:mga0dm4q17", - "mags_list": [ - { - "number_of_contig": 20, - "completeness": 1.36, - "bin_name": "bins.1", - "gene_count": 275, - "bin_quality": "LQ", - "gtdbtk_species": "", - "gtdbtk_order": "", - "num_16s": 1, - "gtdbtk_family": "", - "gtdbtk_domain": "", - "contamination": 0.0, - "gtdbtk_class": "", - "gtdbtk_phylum": "", - "num_5s": 0, - "num_23s": 2, - "gtdbtk_genus": "", - "num_t_rna": 10 - } - ], - "unbinned_contig_num": 958, - "started_at_time": "2021-10-11T02:23:42Z", - "type": "nmdc:MAGsAnalysisActivity", - "ended_at_time": "2021-10-11T04:05:12+00:00" - } - ], - "material_sample_set": [], - "material_sampling_activity_set": [], - "metabolomics_analysis_activity_set": [], - "metagenome_annotation_activity_set": [ - { - "_id": { - "$oid": "649b005bbf2caae0415ef9b2" - }, - "has_input": [ - "nmdc:c2301a45b987661e5e6f32eaf6928003" - ], - "part_of": [ - "nmdc:mga0dm4q17" - ], - "git_url": "https://github.com/microbiomedata/metaG/releases/tag/0.1", - "has_output": [ - "nmdc:b907352a805a209c5b7e10f6ce9e3ceb", - "nmdc:769c049c4b3301900de0c62666e8c297", - "nmdc:3dcb9f83f3921fc7f3e7a2050584cc77", - "nmdc:1b81cc955690e81f18c2bc1533e7ee89", - "nmdc:dd94ee1dbd107bf14e8be72b8f546290", - "nmdc:e271f0ef1c44b514304c35a7913751e3", - "nmdc:b3d3f1ef308b7555cbea077cc00dbc95", - "nmdc:d18d6a67ad7e17514b0c4b502ea69ac0", - "nmdc:62920faf364dea6a1d028878d49a2989", - "nmdc:757bd3295026410cb03690d4dae95935", - "nmdc:19eef79eefc81cbe6d7d4586d8be5d23", - "nmdc:fc8598d9d6926e6ac8bb9c488016734a" - ], - "was_informed_by": "gold:Gp0127646", - "id": "nmdc:1a0a2a1b5db9455e9dbbacf102059180", - "execution_resource": "NERSC-Cori", - "name": "Annotation Activity for nmdc:mga0dm4q17", - "started_at_time": "2021-10-11T02:23:42Z", - "type": "nmdc:MetagenomeAnnotationActivity", - "ended_at_time": "2021-10-11T04:05:12+00:00" - } - ], - "metagenome_assembly_set": [ - { - "_id": { - "$oid": "649b005f2ca5ee4adb139f9d" - }, - "has_input": [ - "nmdc:208a3777ef0b99408f0d5832dee576e0" - ], - "part_of": [ - "nmdc:mga0dm4q17" - ], - "ctg_logsum": 20856, - "scaf_logsum": 20954, - "gap_pct": 0.00116, - "git_url": "https://github.com/microbiomedata/metaG/releases/tag/0.1", - "has_output": [ - "nmdc:c2301a45b987661e5e6f32eaf6928003", - "nmdc:6233a266773aa722d6a3c2556b0c1cb5", - "nmdc:c5460716df8c1d47e081837c8cc5d281", - "nmdc:9437132a95f356e7cc6513f862f38f81", - "nmdc:0d0ee85be3a079b0eba5bb872c842f7d" - ], - "asm_score": 17.863, - "was_informed_by": "gold:Gp0127646", - "ctg_powsum": 2534.931, - "scaf_max": 88400, - "id": "nmdc:1a0a2a1b5db9455e9dbbacf102059180", - "scaf_powsum": 2545.156, - "execution_resource": "NERSC-Cori", - "contigs": 81653, - "name": "Assembly Activity for nmdc:mga0dm4q17", - "ctg_max": 88400, - "gc_std": 0.13273, - "contig_bp": 30097213, - "gc_avg": 0.55961, - "started_at_time": "2021-10-11T02:23:42Z", - "scaf_bp": 30097563, - "type": "nmdc:MetagenomeAssembly", - "scaffolds": 81627, - "ended_at_time": "2021-10-11T04:05:12+00:00", - "ctg_l50": 332, - "ctg_l90": 282, - "ctg_n50": 30532, - "ctg_n90": 71638, - "scaf_l50": 332, - "scaf_l90": 282, - "scaf_n50": 30518, - "scaf_n90": 71614, - "scaf_l_gt50k": 150260, - "scaf_n_gt50k": 2, - "scaf_pct_gt50k": 0.49924305 - } - ], - "metagenome_sequencing_activity_set": [], - "metaproteomics_analysis_activity_set": [], - "metatranscriptome_activity_set": [], - "nom_analysis_activity_set": [], - "omics_processing_set": [ - { - "_id": { - "$oid": "649b009773e8249959349b5a" - }, - "id": "nmdc:omprc-11-vs67yj43", - "name": "Riverbed sediment microbial communities from areas with no vegetation in Columbia River, Washington, USA - GW-RW N3_0_10", - "description": "Riverbed sediment samples were collected from an area with no vegetation in a hyporheic zone (subsurface groundwater - surface water mixing zone)", - "has_input": [ - "nmdc:bsm-11-0n5nks24" - ], - "has_output": [ - "jgi:574fde867ded5e3df1ee1420" - ], - "part_of": [ - "nmdc:sty-11-aygzgv51" - ], - "add_date": "2016-01-11", - "mod_date": "2021-06-15", - "ncbi_project_name": "Riverbed sediment microbial communities from areas with no vegetation in Columbia River, Washington, USA - GW-RW N3_0_10", - "omics_type": { - "has_raw_value": "Metagenome" - }, - "principal_investigator": { - "has_raw_value": "James Stegen" - }, - "processing_institution": "JGI", - "type": "nmdc:OmicsProcessing", - "gold_sequencing_project_identifiers": [ - "gold:Gp0127646" - ] - } - ], - "reaction_activity_set": [], - "read_qc_analysis_activity_set": [ - { - "_id": { - "$oid": "649b009d6bdd4fd20273c86f" - }, - "has_input": [ - "nmdc:94b1d19ad74cfb1be53ebb45dcf5f70c" - ], - "part_of": [ - "nmdc:mga0dm4q17" - ], - "git_url": "https://github.com/microbiomedata/metaG/releases/tag/0.1", - "has_output": [ - "nmdc:208a3777ef0b99408f0d5832dee576e0", - "nmdc:8533a56006bdc1841b6fc16e99b6a84a" - ], - "was_informed_by": "gold:Gp0127646", - "input_read_count": 27835800, - "output_read_bases": 3867340900, - "id": "nmdc:1a0a2a1b5db9455e9dbbacf102059180", - "execution_resource": "NERSC-Cori", - "input_read_bases": 4203205800, - "name": "Read QC Activity for nmdc:mga0dm4q17", - "output_read_count": 25862834, - "started_at_time": "2021-10-11T02:23:42Z", - "type": "nmdc:ReadQCAnalysisActivity", - "ended_at_time": "2021-10-11T04:05:12+00:00" - } - ], - "read_based_taxonomy_analysis_activity_set": [ - { - "_id": { - "$oid": "649b009bff710ae353f8cf33" - }, - "has_input": [ - "nmdc:208a3777ef0b99408f0d5832dee576e0" - ], - "git_url": "https://github.com/microbiomedata/metaG/releases/tag/0.1", - "has_output": [ - "nmdc:3e0598df41941463bac0fdec5df29f55", - "nmdc:1a625b148d8f6d9fe9aeab6cfb67df6c", - "nmdc:bc8e157195d042d7207d67b4982fea96", - "nmdc:a8fc683bb9b3aba316cb605c5fb591ec", - "nmdc:b5fe0189dbf00662d78cc55b8b0cc803", - "nmdc:cd10cca62774e66f60d60380ee18132e", - "nmdc:b13ee2ee52d15c3669aecd2e913f2658", - "nmdc:09a2d722810b3d90207bc4cfa626133b", - "nmdc:c3a8d9f48266a43ad74fc581132e2bba" - ], - "was_informed_by": "gold:Gp0127646", - "id": "nmdc:1a0a2a1b5db9455e9dbbacf102059180", - "execution_resource": "NERSC-Cori", - "name": "ReadBased Analysis Activity for nmdc:mga0dm4q17", - "started_at_time": "2021-10-11T02:23:42Z", - "type": "nmdc:ReadbasedAnalysis", - "ended_at_time": "2021-10-11T04:05:12+00:00" - } - ], - "study_set": [], - "field_research_site_set": [], - "collecting_biosamples_from_site_set": [], - "date_created": null, - "etl_software_version": null, - "pooling_set": [] - }, - { - "functional_annotation_agg": [], - "library_preparation_set": [], - "processed_sample_set": [], - "extraction_set": [], - "activity_set": [], - "biosample_set": [], - "data_object_set": [ - { - "description": "Raw sequencer read data", - "file_size_bytes": 2446032142, - "type": "nmdc:DataObject", - "id": "jgi:574fe0ac7ded5e3df1ee1491", - "name": "10533.3.165334.GTAACGA-GTCGTTA.fastq.gz" - }, - { - "name": "Gp0127648_Filtered Reads", - "description": "Filtered Reads for Gp0127648", - "data_object_type": "Filtered Sequencing Reads", - "url": "https://data.microbiomedata.org/data/nmdc:mga0andh11/qa/nmdc_mga0andh11_filtered.fastq.gz", - "md5_checksum": "fcc3a92dd2b6ab6045f4be27da6f2cdd", - "id": "nmdc:fcc3a92dd2b6ab6045f4be27da6f2cdd", - "file_size_bytes": 2191252492 - }, - { - "name": "Gp0127648_Filtered Stats", - "description": "Filtered Stats for Gp0127648", - "data_object_type": "QC Statistics", - "url": "https://data.microbiomedata.org/data/nmdc:mga0andh11/qa/nmdc_mga0andh11_filterStats.txt", - "md5_checksum": "2208c88cac6b941799d4492dbf5f0887", - "id": "nmdc:2208c88cac6b941799d4492dbf5f0887", - "file_size_bytes": 289 - }, - { - "name": "Gp0127648_Gottcha2 TSV report", - "description": "Gottcha2 TSV report for Gp0127648", - "url": "https://data.microbiomedata.org/data/nmdc:mga0andh11/ReadbasedAnalysis/nmdc_mga0andh11_gottcha2_report.tsv", - "md5_checksum": "5e64b9ccf92f0c974c51bd8393dea50c", - "id": "nmdc:5e64b9ccf92f0c974c51bd8393dea50c", - "file_size_bytes": 3323 - }, - { - "name": "Gp0127648_Gottcha2 full TSV report", - "description": "Gottcha2 full TSV report for Gp0127648", - "url": "https://data.microbiomedata.org/data/nmdc:mga0andh11/ReadbasedAnalysis/nmdc_mga0andh11_gottcha2_report_full.tsv", - "md5_checksum": "1357df297d8d8a872b335e0c3222d102", - "id": "nmdc:1357df297d8d8a872b335e0c3222d102", - "file_size_bytes": 782039 - }, - { - "name": "Gp0127648_Gottcha2 Krona HTML report", - "description": "Gottcha2 Krona HTML report for Gp0127648", - "data_object_type": "GOTTCHA2 Krona Plot", - "url": "https://data.microbiomedata.org/data/nmdc:mga0andh11/ReadbasedAnalysis/nmdc_mga0andh11_gottcha2_krona.html", - "md5_checksum": "5b510e336e60b6120b43e9b6420a074e", - "id": "nmdc:5b510e336e60b6120b43e9b6420a074e", - "file_size_bytes": 236971 - }, - { - "name": "Gp0127648_Centrifuge classification TSV report", - "description": "Centrifuge classification TSV report for Gp0127648", - "data_object_type": "Centrifuge Taxonomic Classification", - "url": "https://data.microbiomedata.org/data/nmdc:mga0andh11/ReadbasedAnalysis/nmdc_mga0andh11_centrifuge_classification.tsv", - "md5_checksum": "33bf814280051c220e0c4a06f7935728", - "id": "nmdc:33bf814280051c220e0c4a06f7935728", - "file_size_bytes": 1945479328 - }, - { - "name": "Gp0127648_Centrifuge TSV report", - "description": "Centrifuge TSV report for Gp0127648", - "data_object_type": "Centrifuge Classification Report", - "url": "https://data.microbiomedata.org/data/nmdc:mga0andh11/ReadbasedAnalysis/nmdc_mga0andh11_centrifuge_report.tsv", - "md5_checksum": "e77a1d052b0d2a99e0a1df3b3c038f7c", - "id": "nmdc:e77a1d052b0d2a99e0a1df3b3c038f7c", - "file_size_bytes": 255338 - }, - { - "name": "Gp0127648_Centrifuge Krona HTML report", - "description": "Centrifuge Krona HTML report for Gp0127648", - "data_object_type": "Centrifuge Krona Plot", - "url": "https://data.microbiomedata.org/data/nmdc:mga0andh11/ReadbasedAnalysis/nmdc_mga0andh11_centrifuge_krona.html", - "md5_checksum": "0efb0ad19234056d7e2e3726dead3622", - "id": "nmdc:0efb0ad19234056d7e2e3726dead3622", - "file_size_bytes": 2333371 - }, - { - "name": "Gp0127648_Kraken classification TSV report", - "description": "Kraken classification TSV report for Gp0127648", - "data_object_type": "Kraken2 Taxonomic Classification", - "url": "https://data.microbiomedata.org/data/nmdc:mga0andh11/ReadbasedAnalysis/nmdc_mga0andh11_kraken2_classification.tsv", - "md5_checksum": "222bac312efdd6c86d2475ad224b7907", - "id": "nmdc:222bac312efdd6c86d2475ad224b7907", - "file_size_bytes": 1562011343 - }, - { - "name": "Gp0127648_Kraken2 TSV report", - "description": "Kraken2 TSV report for Gp0127648", - "data_object_type": "Kraken2 Classification Report", - "url": "https://data.microbiomedata.org/data/nmdc:mga0andh11/ReadbasedAnalysis/nmdc_mga0andh11_kraken2_report.tsv", - "md5_checksum": "baaca868b1fed932b463e489708dd741", - "id": "nmdc:baaca868b1fed932b463e489708dd741", - "file_size_bytes": 647859 - }, - { - "name": "Gp0127648_Kraken2 Krona HTML report", - "description": "Kraken2 Krona HTML report for Gp0127648", - "data_object_type": "Kraken2 Krona Plot", - "url": "https://data.microbiomedata.org/data/nmdc:mga0andh11/ReadbasedAnalysis/nmdc_mga0andh11_kraken2_krona.html", - "md5_checksum": "b549d169e5b0693152555373a6d8ee75", - "id": "nmdc:b549d169e5b0693152555373a6d8ee75", - "file_size_bytes": 3952548 - }, - { - "name": "Gp0127648_Assembled contigs fasta", - "description": "Assembled contigs fasta for Gp0127648", - "data_object_type": "Assembly Contigs", - "url": "https://data.microbiomedata.org/data/nmdc:mga0andh11/assembly/nmdc_mga0andh11_contigs.fna", - "md5_checksum": "ca10f7bae0565946414188c9121ee338", - "id": "nmdc:ca10f7bae0565946414188c9121ee338", - "file_size_bytes": 67439267 - }, - { - "name": "Gp0127648_Assembled scaffold fasta", - "description": "Assembled scaffold fasta for Gp0127648", - "data_object_type": "Assembly Scaffolds", - "url": "https://data.microbiomedata.org/data/nmdc:mga0andh11/assembly/nmdc_mga0andh11_scaffolds.fna", - "md5_checksum": "cf23062373806986b70244b1fabbd17b", - "id": "nmdc:cf23062373806986b70244b1fabbd17b", - "file_size_bytes": 66996134 - }, - { - "name": "Gp0127648_Metagenome Contig Coverage Stats", - "description": "Metagenome Contig Coverage Stats for Gp0127648", - "url": "https://data.microbiomedata.org/data/nmdc:mga0andh11/assembly/nmdc_mga0andh11_covstats.txt", - "md5_checksum": "99b2c3c91b299b9426cca9dfb10b0cea", - "id": "nmdc:99b2c3c91b299b9426cca9dfb10b0cea", - "file_size_bytes": 11610674 - }, - { - "name": "Gp0127648_Assembled AGP file", - "description": "Assembled AGP file for Gp0127648", - "data_object_type": "Assembly AGP", - "url": "https://data.microbiomedata.org/data/nmdc:mga0andh11/assembly/nmdc_mga0andh11_assembly.agp", - "md5_checksum": "303d7282e6f91afaa9564c65107d4086", - "id": "nmdc:303d7282e6f91afaa9564c65107d4086", - "file_size_bytes": 10842402 - }, - { - "name": "Gp0127648_Metagenome Alignment BAM file", - "description": "Metagenome Alignment BAM file for Gp0127648", - "data_object_type": "Assembly Coverage BAM", - "url": "https://data.microbiomedata.org/data/nmdc:mga0andh11/assembly/nmdc_mga0andh11_pairedMapped_sorted.bam", - "md5_checksum": "4a6ffadb01b62dd73278429808c1a39a", - "id": "nmdc:4a6ffadb01b62dd73278429808c1a39a", - "file_size_bytes": 2362185094 - }, - { - "name": "Gp0127648_Protein FAA", - "description": "Protein FAA for Gp0127648", - "data_object_type": "Annotation Amino Acid FASTA", - "url": "https://data.microbiomedata.org/data/nmdc:mga0andh11/annotation/nmdc_mga0andh11_proteins.faa", - "md5_checksum": "c4a719f3a899f7aa760f627f7b1ae6e7", - "id": "nmdc:c4a719f3a899f7aa760f627f7b1ae6e7", - "file_size_bytes": 40118426 - }, - { - "name": "Gp0127648_Structural annotation GFF file", - "description": "Structural annotation GFF file for Gp0127648", - "data_object_type": "Structural Annotation GFF", - "url": "https://data.microbiomedata.org/data/nmdc:mga0andh11/annotation/nmdc_mga0andh11_structural_annotation.gff", - "md5_checksum": "80ab4116b1cdfbc3e4c4d06e5990d735", - "id": "nmdc:80ab4116b1cdfbc3e4c4d06e5990d735", - "file_size_bytes": 2507 - }, - { - "name": "Gp0127648_Functional annotation GFF file", - "description": "Functional annotation GFF file for Gp0127648", - "data_object_type": "Functional Annotation GFF", - "url": "https://data.microbiomedata.org/data/nmdc:mga0andh11/annotation/nmdc_mga0andh11_functional_annotation.gff", - "md5_checksum": "600011ab7e39465d3f9f28d5d93a4248", - "id": "nmdc:600011ab7e39465d3f9f28d5d93a4248", - "file_size_bytes": 47178055 - }, - { - "name": "Gp0127648_KO TSV file", - "description": "KO TSV file for Gp0127648", - "data_object_type": "Annotation KEGG Orthology", - "url": "https://data.microbiomedata.org/data/nmdc:mga0andh11/annotation/nmdc_mga0andh11_ko.tsv", - "md5_checksum": "0c8d98b369900cd19da39235e3eae6db", - "id": "nmdc:0c8d98b369900cd19da39235e3eae6db", - "file_size_bytes": 5498487 - }, - { - "name": "Gp0127648_EC TSV file", - "description": "EC TSV file for Gp0127648", - "data_object_type": "Annotation Enzyme Commission", - "url": "https://data.microbiomedata.org/data/nmdc:mga0andh11/annotation/nmdc_mga0andh11_ec.tsv", - "md5_checksum": "16c37f8c4f74e7e81b7900536da55e39", - "id": "nmdc:16c37f8c4f74e7e81b7900536da55e39", - "file_size_bytes": 3650457 - }, - { - "name": "Gp0127648_COG GFF file", - "description": "COG GFF file for Gp0127648", - "url": "https://data.microbiomedata.org/data/nmdc:mga0andh11/annotation/nmdc_mga0andh11_cog.gff", - "md5_checksum": "a7fc228cd8d224bbf2843ba6a6648480", - "id": "nmdc:a7fc228cd8d224bbf2843ba6a6648480", - "file_size_bytes": 27226505 - }, - { - "name": "Gp0127648_PFAM GFF file", - "description": "PFAM GFF file for Gp0127648", - "url": "https://data.microbiomedata.org/data/nmdc:mga0andh11/annotation/nmdc_mga0andh11_pfam.gff", - "md5_checksum": "a57d9d86c20cfd13ddc56027110485ba", - "id": "nmdc:a57d9d86c20cfd13ddc56027110485ba", - "file_size_bytes": 19896169 - }, - { - "name": "Gp0127648_TigrFam GFF file", - "description": "TigrFam GFF file for Gp0127648", - "url": "https://data.microbiomedata.org/data/nmdc:mga0andh11/annotation/nmdc_mga0andh11_tigrfam.gff", - "md5_checksum": "6a4be27e2e7454941b73aa843471f25d", - "id": "nmdc:6a4be27e2e7454941b73aa843471f25d", - "file_size_bytes": 2105656 - }, - { - "name": "Gp0127648_SMART GFF file", - "description": "SMART GFF file for Gp0127648", - "url": "https://data.microbiomedata.org/data/nmdc:mga0andh11/annotation/nmdc_mga0andh11_smart.gff", - "md5_checksum": "be3d2a77be3ccd810d679f03204f8bac", - "id": "nmdc:be3d2a77be3ccd810d679f03204f8bac", - "file_size_bytes": 6062323 - }, - { - "name": "Gp0127648_SuperFam GFF file", - "description": "SuperFam GFF file for Gp0127648", - "url": "https://data.microbiomedata.org/data/nmdc:mga0andh11/annotation/nmdc_mga0andh11_supfam.gff", - "md5_checksum": "eb5ac02ce17f687c5ccf5a64548c559e", - "id": "nmdc:eb5ac02ce17f687c5ccf5a64548c559e", - "file_size_bytes": 33896425 - }, - { - "name": "Gp0127648_Cath FunFam GFF file", - "description": "Cath FunFam GFF file for Gp0127648", - "url": "https://data.microbiomedata.org/data/nmdc:mga0andh11/annotation/nmdc_mga0andh11_cath_funfam.gff", - "md5_checksum": "81ff9f257ffe63ca5d04db9e767620b1", - "id": "nmdc:81ff9f257ffe63ca5d04db9e767620b1", - "file_size_bytes": 25515156 - }, - { - "name": "Gp0127648_KO_EC GFF file", - "description": "KO_EC GFF file for Gp0127648", - "url": "https://data.microbiomedata.org/data/nmdc:mga0andh11/annotation/nmdc_mga0andh11_ko_ec.gff", - "md5_checksum": "8768f37ff001a86a25ae34c7deee9d9a", - "id": "nmdc:8768f37ff001a86a25ae34c7deee9d9a", - "file_size_bytes": 17491444 - }, - { - "name": "gold:Gp0452679_metabat2 bin checkm quality assessment result", - "description": "metabat2 bin checkm quality assessment result for gold:Gp0452679", - "data_object_type": "CheckM Statistics", - "url": "https://data.microbiomedata.org/data/nmdc:mga0fsjy84/MAGs/nmdc_mga0fsjy84_checkm_qa.out", - "md5_checksum": "d41d8cd98f00b204e9800998ecf8427e", - "id": "nmdc:d41d8cd98f00b204e9800998ecf8427e", - "file_size_bytes": 0 - }, - { - "name": "Gp0127648_tooShort (< 3kb) filtered contigs fasta file by metaBat2", - "description": "tooShort (< 3kb) filtered contigs fasta file by metaBat2 for Gp0127648", - "url": "https://data.microbiomedata.org/data/nmdc:mga0andh11/MAGs/nmdc_mga0andh11_bins.tooShort.fa", - "md5_checksum": "8b67e5038c55083e2aa8e19c5d05fef8", - "id": "nmdc:8b67e5038c55083e2aa8e19c5d05fef8", - "file_size_bytes": 58962192 - }, - { - "name": "Gp0127648_unbinned fasta file from metabat2", - "description": "unbinned fasta file from metabat2 for Gp0127648", - "url": "https://data.microbiomedata.org/data/nmdc:mga0andh11/MAGs/nmdc_mga0andh11_bins.unbinned.fa", - "md5_checksum": "fc8454a790709b36d7ca96cd99359d26", - "id": "nmdc:fc8454a790709b36d7ca96cd99359d26", - "file_size_bytes": 6656731 - }, - { - "name": "Gp0127648_metabat2 bin checkm quality assessment result", - "description": "metabat2 bin checkm quality assessment result for Gp0127648", - "data_object_type": "CheckM Statistics", - "url": "https://data.microbiomedata.org/data/nmdc:mga0andh11/MAGs/nmdc_mga0andh11_checkm_qa.out", - "md5_checksum": "942bd7c28c52e6301bf97dab0ea2852a", - "id": "nmdc:942bd7c28c52e6301bf97dab0ea2852a", - "file_size_bytes": 930 - }, - { - "name": "Gp0127648_high-quality and medium-quality bins", - "description": "high-quality and medium-quality bins for Gp0127648", - "data_object_type": "Metagenome Bins", - "url": "https://data.microbiomedata.org/data/nmdc:mga0andh11/MAGs/nmdc_mga0andh11_hqmq_bin.zip", - "md5_checksum": "82ebf9065be9715e1230a50bf7a02197", - "id": "nmdc:82ebf9065be9715e1230a50bf7a02197", - "file_size_bytes": 466157 - }, - { - "name": "Gp0127648_metabat2 bins", - "description": "metabat2 bins for Gp0127648", - "url": "https://data.microbiomedata.org/data/nmdc:mga0andh11/MAGs/nmdc_mga0andh11_metabat_bin.zip", - "md5_checksum": "897536007e7e3525457df5d3baddd593", - "id": "nmdc:897536007e7e3525457df5d3baddd593", - "file_size_bytes": 90255 - }, - { - "_id": { - "$oid": "649b003d1ae706d7b5b14eb3" - }, - "description": "Metagenome Contig Coverage Stats for gold:Gp0127648", - "url": "https://data.microbiomedata.org/data/1781_100350/assembly/mapping_stats.txt", - "file_size_bytes": 11021314, - "type": "nmdc:DataObject", - "id": "nmdc:c12e8ed1aade1238318952b0c311cff1", - "name": "mapping_stats.txt", - "data_object_type": "Assembly Coverage Stats" - }, - { - "_id": { - "$oid": "649b003d1ae706d7b5b14eb5" - }, - "description": "Assembled contigs fasta for gold:Gp0127648", - "url": "https://data.microbiomedata.org/data/1781_100350/assembly/assembly_contigs.fna", - "file_size_bytes": 66849907, - "type": "nmdc:DataObject", - "id": "nmdc:621134d8dd8a6b117924f92ffed69ba7", - "name": "assembly_contigs.fna", - "data_object_type": "Assembly Contigs" - }, - { - "_id": { - "$oid": "649b003d1ae706d7b5b14eb6" - }, - "description": "Assembled scaffold fasta for gold:Gp0127648", - "url": "https://data.microbiomedata.org/data/1781_100350/assembly/assembly_scaffolds.fna", - "file_size_bytes": 66407046, - "type": "nmdc:DataObject", - "id": "nmdc:ede66d623f73ce756b3495f83205e6d9", - "name": "assembly_scaffolds.fna", - "data_object_type": "Assembly Scaffolds" - }, - { - "_id": { - "$oid": "649b003d1ae706d7b5b14eb7" - }, - "description": "Assembled AGP file for gold:Gp0127648", - "url": "https://data.microbiomedata.org/data/1781_100350/assembly/assembly.agp", - "file_size_bytes": 9663138, - "type": "nmdc:DataObject", - "id": "nmdc:d48d39804b7f30d62b244ec9556fd8b1", - "name": "assembly.agp", - "data_object_type": "Assembly AGP" - }, - { - "_id": { - "$oid": "649b003d1ae706d7b5b14eb8" - }, - "description": "Metagenome Alignment BAM file for gold:Gp0127648", - "url": "https://data.microbiomedata.org/data/1781_100350/assembly/pairedMapped_sorted.bam", - "file_size_bytes": 2330726850, - "type": "nmdc:DataObject", - "id": "nmdc:11791ba21a4e2cea1d5e889dce811985", - "name": "pairedMapped_sorted.bam", - "data_object_type": "Assembly Coverage BAM" - }, - { - "_id": { - "$oid": "649b003d1ae706d7b5b15bc0" - }, - "id": "nmdc:af639412ee5bca47c42088a8c81df692", - "name": "1781_100350.krona.html", - "description": "Gold:Gp0127648 KRONA plot HTML file", - "url": "https://data.microbiomedata.org/1781_100350/ReadbasedAnalysis/centrifuge/1781_100350.krona.html", - "file_size_bytes": 3385, - "data_object_type": "Centrifuge Krona Plot", - "type": "nmdc:DataObject" - }, - { - "_id": { - "$oid": "649b003d1ae706d7b5b15bc6" - }, - "id": "nmdc:e0e59ec0d07f88b6bc73664a179627e6", - "name": "1781_100350.json", - "description": "Gold:Gp0127648 ReadbasedAnalysis result JSON file", - "url": "https://data.microbiomedata.org/1781_100350/ReadbasedAnalysis/1781_100350.json", - "file_size_bytes": 3385, - "type": "nmdc:DataObject" - }, - { - "_id": { - "$oid": "649b003f1ae706d7b5b1662d" - }, - "id": "nmdc:ef8ef2e558c88161ea6b3c05434b56ac", - "name": "bins.unbinned.fa", - "description": "unbinned fasta file from metabat2 for gold:Gp0127648", - "file_size_bytes": 7023427, - "url": "https://data.microbiomedata.org/data/1781_100350/img_MAGs/bins.unbinned.fa", - "type": "nmdc:DataObject" - }, - { - "_id": { - "$oid": "649b003f1ae706d7b5b16630" - }, - "id": "nmdc:74b41590eebfcdef0e90406daaa4c95d", - "name": "bins.tooShort.fa", - "description": "tooShort (< 3kb) filtered contigs fasta file by metaBat2 for gold:Gp0127648", - "file_size_bytes": 57206330, - "url": "https://data.microbiomedata.org/data/1781_100350/img_MAGs/bins.tooShort.fa", - "type": "nmdc:DataObject" - }, - { - "_id": { - "$oid": "649b003f1ae706d7b5b16633" - }, - "id": "nmdc:a783b176b63e949529058e9db9ede106", - "name": "checkm_qa.out", - "description": "metabat2 bin checkm quality assessment result for gold:Gp0127648", - "file_size_bytes": 760, - "url": "https://data.microbiomedata.org/data/1781_100350/img_MAGs/checkm_qa.out", - "type": "nmdc:DataObject", - "data_object_type": "CheckM Statistics" - }, - { - "_id": { - "$oid": "649b003f1ae706d7b5b16634" - }, - "id": "nmdc:9fbf80a1258e24efe59ba434424a14e6", - "name": "gold:Gp0127648.bins.1.fa", - "description": "metabat2 binned contig file for gold:Gp0127648", - "file_size_bytes": 1405337, - "url": "https://data.microbiomedata.org/data/1781_100350/img_MAGs/metabat-bins/bins.1.fa", - "type": "nmdc:DataObject", - "data_object_type": "Metagenome Bins" - }, - { - "_id": { - "$oid": "649b003f1ae706d7b5b16636" - }, - "id": "nmdc:30f30b256de77b4a0b0194330f337dbe", - "name": "gtdbtk.ar122.summary.tsv", - "description": "gtdbtk archaea assignment result summary table for gold:Gp0127648", - "file_size_bytes": 1004, - "url": "https://data.microbiomedata.org/data/1781_100350/img_MAGs/gtdbtk_output/classify/gtdbtk.ar122.summary.tsv", - "type": "nmdc:DataObject" - }, - { - "_id": { - "$oid": "649b00401ae706d7b5b16d39" - }, - "description": "Structural annotation GFF file for gold:Gp0127648", - "url": "https://data.microbiomedata.org/1781_100350/img_annotation/Ga0482222_structural_annotation.gff", - "md5_checksum": "863f93ecf208a6e19f17d460d8e1a963", - "file_size_bytes": 3385, - "id": "nmdc:863f93ecf208a6e19f17d460d8e1a963", - "name": "gold:Gp0127648_Structural annotation GFF file", - "data_object_type": "Structural Annotation GFF", - "type": "nmdc:DataObject" - }, - { - "_id": { - "$oid": "649b00401ae706d7b5b16d3b" - }, - "description": "KO TSV File for gold:Gp0127648", - "url": "https://data.microbiomedata.org/1781_100350/img_annotation/Ga0482222_ko.tsv", - "md5_checksum": "1287c2532770a0f0d6792192c7400c0c", - "file_size_bytes": 3385, - "id": "nmdc:1287c2532770a0f0d6792192c7400c0c", - "name": "gold:Gp0127648_KO TSV File", - "data_object_type": "Annotation KEGG Orthology", - "type": "nmdc:DataObject" - }, - { - "_id": { - "$oid": "649b00401ae706d7b5b16d3c" - }, - "description": "EC TSV File for gold:Gp0127648", - "url": "https://data.microbiomedata.org/1781_100350/img_annotation/Ga0482222_ec.tsv", - "md5_checksum": "06042b9d083bd6b9879bc5486c0b38ba", - "file_size_bytes": 3385, - "id": "nmdc:06042b9d083bd6b9879bc5486c0b38ba", - "name": "gold:Gp0127648_EC TSV File", - "data_object_type": "Annotation Enzyme Commission", - "type": "nmdc:DataObject" - }, - { - "_id": { - "$oid": "649b00401ae706d7b5b16d3d" - }, - "description": "Protein FAA for gold:Gp0127648", - "url": "https://data.microbiomedata.org/1781_100350/img_annotation/Ga0482222_proteins.faa", - "md5_checksum": "d27fabc532b52dec4afa4673f920633a", - "file_size_bytes": 3385, - "id": "nmdc:d27fabc532b52dec4afa4673f920633a", - "name": "gold:Gp0127648_Protein FAA", - "data_object_type": "Annotation Amino Acid FASTA", - "type": "nmdc:DataObject" - }, - { - "_id": { - "$oid": "649b00401ae706d7b5b16d43" - }, - "description": "Functional annotation GFF file for gold:Gp0127648", - "url": "https://data.microbiomedata.org/1781_100350/img_annotation/Ga0482222_functional_annotation.gff", - "md5_checksum": "c11e44f28b422233e151d324d2accb43", - "file_size_bytes": 3385, - "id": "nmdc:c11e44f28b422233e151d324d2accb43", - "name": "gold:Gp0127648_Functional annotation GFF file", - "data_object_type": "Functional Annotation GFF", - "type": "nmdc:DataObject" - } - ], - "dissolving_activity_set": [], - "functional_annotation_set": [], - "genome_feature_set": [], - "mags_activity_set": [ - { - "_id": { - "$oid": "649b0052ec087f6bbab34718" - }, - "has_input": [ - "nmdc:ca10f7bae0565946414188c9121ee338", - "nmdc:4a6ffadb01b62dd73278429808c1a39a", - "nmdc:600011ab7e39465d3f9f28d5d93a4248" - ], - "too_short_contig_num": 142847, - "part_of": [ - "nmdc:mga0andh11" - ], - "binned_contig_num": 329, - "git_url": "https://github.com/microbiomedata/metaG/releases/tag/0.1", - "has_output": [ - "nmdc:d41d8cd98f00b204e9800998ecf8427e", - "nmdc:8b67e5038c55083e2aa8e19c5d05fef8", - "nmdc:fc8454a790709b36d7ca96cd99359d26", - "nmdc:942bd7c28c52e6301bf97dab0ea2852a", - "nmdc:82ebf9065be9715e1230a50bf7a02197", - "nmdc:897536007e7e3525457df5d3baddd593" - ], - "was_informed_by": "gold:Gp0127648", - "input_contig_num": 147340, - "id": "nmdc:cb02cfd1451743d94381a247cf0a9d65", - "execution_resource": "NERSC-Cori", - "name": "MAGs Analysis Activity for nmdc:mga0andh11", - "mags_list": [ - { - "number_of_contig": 255, - "completeness": 70.91, - "bin_name": "bins.1", - "gene_count": 1857, - "bin_quality": "MQ", - "gtdbtk_species": "", - "gtdbtk_order": "Nitrososphaerales", - "num_16s": 1, - "gtdbtk_family": "Nitrososphaeraceae", - "gtdbtk_domain": "Archaea", - "contamination": 1.94, - "gtdbtk_class": "Nitrososphaeria", - "gtdbtk_phylum": "Crenarchaeota", - "num_5s": 1, - "num_23s": 0, - "gtdbtk_genus": "", - "num_t_rna": 28 - }, - { - "number_of_contig": 74, - "completeness": 19.91, - "bin_name": "bins.2", - "gene_count": 380, - "bin_quality": "LQ", - "gtdbtk_species": "", - "gtdbtk_order": "", - "num_16s": 0, - "gtdbtk_family": "", - "gtdbtk_domain": "", - "contamination": 0.0, - "gtdbtk_class": "", - "gtdbtk_phylum": "", - "num_5s": 0, - "num_23s": 0, - "gtdbtk_genus": "", - "num_t_rna": 5 - } - ], - "unbinned_contig_num": 4164, - "started_at_time": "2021-10-11T02:23:29Z", - "type": "nmdc:MAGsAnalysisActivity", - "ended_at_time": "2021-10-11T04:13:04+00:00" - } - ], - "material_sample_set": [], - "material_sampling_activity_set": [], - "metabolomics_analysis_activity_set": [], - "metagenome_annotation_activity_set": [ - { - "_id": { - "$oid": "649b005bbf2caae0415ef9b8" - }, - "has_input": [ - "nmdc:ca10f7bae0565946414188c9121ee338" - ], - "part_of": [ - "nmdc:mga0andh11" - ], - "git_url": "https://github.com/microbiomedata/metaG/releases/tag/0.1", - "has_output": [ - "nmdc:c4a719f3a899f7aa760f627f7b1ae6e7", - "nmdc:80ab4116b1cdfbc3e4c4d06e5990d735", - "nmdc:600011ab7e39465d3f9f28d5d93a4248", - "nmdc:0c8d98b369900cd19da39235e3eae6db", - "nmdc:16c37f8c4f74e7e81b7900536da55e39", - "nmdc:a7fc228cd8d224bbf2843ba6a6648480", - "nmdc:a57d9d86c20cfd13ddc56027110485ba", - "nmdc:6a4be27e2e7454941b73aa843471f25d", - "nmdc:be3d2a77be3ccd810d679f03204f8bac", - "nmdc:eb5ac02ce17f687c5ccf5a64548c559e", - "nmdc:81ff9f257ffe63ca5d04db9e767620b1", - "nmdc:8768f37ff001a86a25ae34c7deee9d9a" - ], - "was_informed_by": "gold:Gp0127648", - "id": "nmdc:cb02cfd1451743d94381a247cf0a9d65", - "execution_resource": "NERSC-Cori", - "name": "Annotation Activity for nmdc:mga0andh11", - "started_at_time": "2021-10-11T02:23:29Z", - "type": "nmdc:MetagenomeAnnotationActivity", - "ended_at_time": "2021-10-11T04:13:04+00:00" - } - ], - "metagenome_assembly_set": [ - { - "_id": { - "$oid": "649b005f2ca5ee4adb139fa5" - }, - "has_input": [ - "nmdc:fcc3a92dd2b6ab6045f4be27da6f2cdd" - ], - "part_of": [ - "nmdc:mga0andh11" - ], - "ctg_logsum": 91193, - "scaf_logsum": 91521, - "gap_pct": 0.0011, - "git_url": "https://github.com/microbiomedata/metaG/releases/tag/0.1", - "has_output": [ - "nmdc:ca10f7bae0565946414188c9121ee338", - "nmdc:cf23062373806986b70244b1fabbd17b", - "nmdc:99b2c3c91b299b9426cca9dfb10b0cea", - "nmdc:303d7282e6f91afaa9564c65107d4086", - "nmdc:4a6ffadb01b62dd73278429808c1a39a" - ], - "asm_score": 4.996, - "was_informed_by": "gold:Gp0127648", - "ctg_powsum": 10170, - "scaf_max": 23974, - "id": "nmdc:cb02cfd1451743d94381a247cf0a9d65", - "scaf_powsum": 10208, - "execution_resource": "NERSC-Cori", - "contigs": 147340, - "name": "Assembly Activity for nmdc:mga0andh11", - "ctg_max": 23974, - "gc_std": 0.0855, - "contig_bp": 61886959, - "gc_avg": 0.61759, - "started_at_time": "2021-10-11T02:23:29Z", - "scaf_bp": 61887639, - "type": "nmdc:MetagenomeAssembly", - "scaffolds": 147272, - "ended_at_time": "2021-10-11T04:13:04+00:00", - "ctg_l50": 381, - "ctg_l90": 285, - "ctg_n50": 47493, - "ctg_n90": 126039, - "scaf_l50": 381, - "scaf_l90": 285, - "scaf_n50": 47464, - "scaf_n90": 125972 - } - ], - "metagenome_sequencing_activity_set": [], - "metaproteomics_analysis_activity_set": [], - "metatranscriptome_activity_set": [], - "nom_analysis_activity_set": [], - "omics_processing_set": [ - { - "_id": { - "$oid": "649b009773e8249959349b5b" - }, - "id": "nmdc:omprc-11-nhf5m035", - "name": "Riverbed sediment microbial communities from areas with vegetation nearby in Columbia River, Washington, USA - GW-RW S1_50_60", - "description": "Riverbed sediment samples were collected from an area with a lot of surrounding vegetation in a hyporheic zone (subsurface groundwater - surface water mixing zone)", - "has_input": [ - "nmdc:bsm-11-b7nrtg75" - ], - "has_output": [ - "jgi:574fe0ac7ded5e3df1ee1491" - ], - "part_of": [ - "nmdc:sty-11-aygzgv51" - ], - "add_date": "2016-01-11", - "mod_date": "2021-06-15", - "ncbi_project_name": "Riverbed sediment microbial communities from areas with vegetation nearby in Columbia River, Washington, USA - GW-RW S1_50_60", - "omics_type": { - "has_raw_value": "Metagenome" - }, - "principal_investigator": { - "has_raw_value": "James Stegen" - }, - "processing_institution": "JGI", - "type": "nmdc:OmicsProcessing", - "gold_sequencing_project_identifiers": [ - "gold:Gp0127648" - ] - } - ], - "reaction_activity_set": [], - "read_qc_analysis_activity_set": [ - { - "_id": { - "$oid": "649b009d6bdd4fd20273c870" - }, - "has_input": [ - "nmdc:22bf7ba401619da2a191e7b30544a8ac" - ], - "part_of": [ - "nmdc:mga0andh11" - ], - "git_url": "https://github.com/microbiomedata/metaG/releases/tag/0.1", - "has_output": [ - "nmdc:fcc3a92dd2b6ab6045f4be27da6f2cdd", - "nmdc:2208c88cac6b941799d4492dbf5f0887" - ], - "was_informed_by": "gold:Gp0127648", - "input_read_count": 28064750, - "output_read_bases": 3953713958, - "id": "nmdc:cb02cfd1451743d94381a247cf0a9d65", - "execution_resource": "NERSC-Cori", - "input_read_bases": 4237777250, - "name": "Read QC Activity for nmdc:mga0andh11", - "output_read_count": 26438892, - "started_at_time": "2021-10-11T02:23:29Z", - "type": "nmdc:ReadQCAnalysisActivity", - "ended_at_time": "2021-10-11T04:13:04+00:00" - } - ], - "read_based_taxonomy_analysis_activity_set": [ - { - "_id": { - "$oid": "649b009bff710ae353f8cf39" - }, - "has_input": [ - "nmdc:fcc3a92dd2b6ab6045f4be27da6f2cdd" - ], - "git_url": "https://github.com/microbiomedata/metaG/releases/tag/0.1", - "has_output": [ - "nmdc:5e64b9ccf92f0c974c51bd8393dea50c", - "nmdc:1357df297d8d8a872b335e0c3222d102", - "nmdc:5b510e336e60b6120b43e9b6420a074e", - "nmdc:33bf814280051c220e0c4a06f7935728", - "nmdc:e77a1d052b0d2a99e0a1df3b3c038f7c", - "nmdc:0efb0ad19234056d7e2e3726dead3622", - "nmdc:222bac312efdd6c86d2475ad224b7907", - "nmdc:baaca868b1fed932b463e489708dd741", - "nmdc:b549d169e5b0693152555373a6d8ee75" - ], - "was_informed_by": "gold:Gp0127648", - "id": "nmdc:cb02cfd1451743d94381a247cf0a9d65", - "execution_resource": "NERSC-Cori", - "name": "ReadBased Analysis Activity for nmdc:mga0andh11", - "started_at_time": "2021-10-11T02:23:29Z", - "type": "nmdc:ReadbasedAnalysis", - "ended_at_time": "2021-10-11T04:13:04+00:00" - } - ], - "study_set": [], - "field_research_site_set": [], - "collecting_biosamples_from_site_set": [], - "date_created": null, - "etl_software_version": null, - "pooling_set": [] - }, - { - "functional_annotation_agg": [], - "library_preparation_set": [], - "processed_sample_set": [], - "extraction_set": [], - "activity_set": [], - "biosample_set": [], - "data_object_set": [ - { - "description": "Raw sequencer read data", - "file_size_bytes": 2236205196, - "type": "nmdc:DataObject", - "id": "jgi:574fde8a7ded5e3df1ee1422", - "name": "10533.2.165322.ACGGTCT-AAGACCG.fastq.gz" - }, - { - "name": "Gp0127647_Filtered Reads", - "description": "Filtered Reads for Gp0127647", - "data_object_type": "Filtered Sequencing Reads", - "url": "https://data.microbiomedata.org/data/nmdc:mga0g0e588/qa/nmdc_mga0g0e588_filtered.fastq.gz", - "md5_checksum": "c082eff434fe4863c0e29c79b759d100", - "id": "nmdc:c082eff434fe4863c0e29c79b759d100", - "file_size_bytes": 2052448806 - }, - { - "name": "Gp0127647_Filtered Stats", - "description": "Filtered Stats for Gp0127647", - "data_object_type": "QC Statistics", - "url": "https://data.microbiomedata.org/data/nmdc:mga0g0e588/qa/nmdc_mga0g0e588_filterStats.txt", - "md5_checksum": "7f204d0d1d45e77b39d9c9b2362c6b0b", - "id": "nmdc:7f204d0d1d45e77b39d9c9b2362c6b0b", - "file_size_bytes": 282 - }, - { - "name": "Gp0127647_Gottcha2 TSV report", - "description": "Gottcha2 TSV report for Gp0127647", - "url": "https://data.microbiomedata.org/data/nmdc:mga0g0e588/ReadbasedAnalysis/nmdc_mga0g0e588_gottcha2_report.tsv", - "md5_checksum": "7e1438bf8076daf46f3d782d8f9656b4", - "id": "nmdc:7e1438bf8076daf46f3d782d8f9656b4", - "file_size_bytes": 4666 - }, - { - "name": "Gp0127647_Gottcha2 full TSV report", - "description": "Gottcha2 full TSV report for Gp0127647", - "url": "https://data.microbiomedata.org/data/nmdc:mga0g0e588/ReadbasedAnalysis/nmdc_mga0g0e588_gottcha2_report_full.tsv", - "md5_checksum": "cfd63309cd38a293615ddce5e8ea6402", - "id": "nmdc:cfd63309cd38a293615ddce5e8ea6402", - "file_size_bytes": 786018 - }, - { - "name": "Gp0127647_Gottcha2 Krona HTML report", - "description": "Gottcha2 Krona HTML report for Gp0127647", - "data_object_type": "GOTTCHA2 Krona Plot", - "url": "https://data.microbiomedata.org/data/nmdc:mga0g0e588/ReadbasedAnalysis/nmdc_mga0g0e588_gottcha2_krona.html", - "md5_checksum": "7e353b7bfb1586773fa00b515dffe6ec", - "id": "nmdc:7e353b7bfb1586773fa00b515dffe6ec", - "file_size_bytes": 237895 - }, - { - "name": "Gp0127647_Centrifuge classification TSV report", - "description": "Centrifuge classification TSV report for Gp0127647", - "data_object_type": "Centrifuge Taxonomic Classification", - "url": "https://data.microbiomedata.org/data/nmdc:mga0g0e588/ReadbasedAnalysis/nmdc_mga0g0e588_centrifuge_classification.tsv", - "md5_checksum": "6667be33e7867ca2aabfa5d663e2970a", - "id": "nmdc:6667be33e7867ca2aabfa5d663e2970a", - "file_size_bytes": 1767305277 - }, - { - "name": "Gp0127647_Centrifuge TSV report", - "description": "Centrifuge TSV report for Gp0127647", - "data_object_type": "Centrifuge Classification Report", - "url": "https://data.microbiomedata.org/data/nmdc:mga0g0e588/ReadbasedAnalysis/nmdc_mga0g0e588_centrifuge_report.tsv", - "md5_checksum": "7ee0b0b21444ee06752e6b9c32f476af", - "id": "nmdc:7ee0b0b21444ee06752e6b9c32f476af", - "file_size_bytes": 254858 - }, - { - "name": "Gp0127647_Centrifuge Krona HTML report", - "description": "Centrifuge Krona HTML report for Gp0127647", - "data_object_type": "Centrifuge Krona Plot", - "url": "https://data.microbiomedata.org/data/nmdc:mga0g0e588/ReadbasedAnalysis/nmdc_mga0g0e588_centrifuge_krona.html", - "md5_checksum": "d3b27bed597f07ad4bb4a500ad2fb928", - "id": "nmdc:d3b27bed597f07ad4bb4a500ad2fb928", - "file_size_bytes": 2332396 - }, - { - "name": "Gp0127647_Kraken classification TSV report", - "description": "Kraken classification TSV report for Gp0127647", - "data_object_type": "Kraken2 Taxonomic Classification", - "url": "https://data.microbiomedata.org/data/nmdc:mga0g0e588/ReadbasedAnalysis/nmdc_mga0g0e588_kraken2_classification.tsv", - "md5_checksum": "45617f93e5f072fbad25a0308ead6c3d", - "id": "nmdc:45617f93e5f072fbad25a0308ead6c3d", - "file_size_bytes": 1419938277 - }, - { - "name": "Gp0127647_Kraken2 TSV report", - "description": "Kraken2 TSV report for Gp0127647", - "data_object_type": "Kraken2 Classification Report", - "url": "https://data.microbiomedata.org/data/nmdc:mga0g0e588/ReadbasedAnalysis/nmdc_mga0g0e588_kraken2_report.tsv", - "md5_checksum": "460e7594fcd06678df1b9c5e5075cb4d", - "id": "nmdc:460e7594fcd06678df1b9c5e5075cb4d", - "file_size_bytes": 661837 - }, - { - "name": "Gp0127647_Kraken2 Krona HTML report", - "description": "Kraken2 Krona HTML report for Gp0127647", - "data_object_type": "Kraken2 Krona Plot", - "url": "https://data.microbiomedata.org/data/nmdc:mga0g0e588/ReadbasedAnalysis/nmdc_mga0g0e588_kraken2_krona.html", - "md5_checksum": "ab80fc324c9206a41a66d64227a97179", - "id": "nmdc:ab80fc324c9206a41a66d64227a97179", - "file_size_bytes": 4028822 - }, - { - "name": "Gp0127647_Assembled contigs fasta", - "description": "Assembled contigs fasta for Gp0127647", - "data_object_type": "Assembly Contigs", - "url": "https://data.microbiomedata.org/data/nmdc:mga0g0e588/assembly/nmdc_mga0g0e588_contigs.fna", - "md5_checksum": "05952c056a6db782ba77c6369206838a", - "id": "nmdc:05952c056a6db782ba77c6369206838a", - "file_size_bytes": 41696500 - }, - { - "name": "Gp0127647_Assembled scaffold fasta", - "description": "Assembled scaffold fasta for Gp0127647", - "data_object_type": "Assembly Scaffolds", - "url": "https://data.microbiomedata.org/data/nmdc:mga0g0e588/assembly/nmdc_mga0g0e588_scaffolds.fna", - "md5_checksum": "6fa8f2d4236fda4f628436ed85094e3b", - "id": "nmdc:6fa8f2d4236fda4f628436ed85094e3b", - "file_size_bytes": 41403892 - }, - { - "name": "Gp0127647_Metagenome Contig Coverage Stats", - "description": "Metagenome Contig Coverage Stats for Gp0127647", - "url": "https://data.microbiomedata.org/data/nmdc:mga0g0e588/assembly/nmdc_mga0g0e588_covstats.txt", - "md5_checksum": "82be5b6248eb4b0bfef1c9afa5c5c0bc", - "id": "nmdc:82be5b6248eb4b0bfef1c9afa5c5c0bc", - "file_size_bytes": 7629542 - }, - { - "name": "Gp0127647_Assembled AGP file", - "description": "Assembled AGP file for Gp0127647", - "data_object_type": "Assembly AGP", - "url": "https://data.microbiomedata.org/data/nmdc:mga0g0e588/assembly/nmdc_mga0g0e588_assembly.agp", - "md5_checksum": "fee22437c76dc343846f41e1be538b9d", - "id": "nmdc:fee22437c76dc343846f41e1be538b9d", - "file_size_bytes": 7091204 - }, - { - "name": "Gp0127647_Metagenome Alignment BAM file", - "description": "Metagenome Alignment BAM file for Gp0127647", - "data_object_type": "Assembly Coverage BAM", - "url": "https://data.microbiomedata.org/data/nmdc:mga0g0e588/assembly/nmdc_mga0g0e588_pairedMapped_sorted.bam", - "md5_checksum": "7fc9fd7844b6ce48869a0ad5216da4dc", - "id": "nmdc:7fc9fd7844b6ce48869a0ad5216da4dc", - "file_size_bytes": 2190560397 - }, - { - "name": "Gp0127647_Protein FAA", - "description": "Protein FAA for Gp0127647", - "data_object_type": "Annotation Amino Acid FASTA", - "url": "https://data.microbiomedata.org/data/nmdc:mga0g0e588/annotation/nmdc_mga0g0e588_proteins.faa", - "md5_checksum": "b95b8538748c921fac6c93ba55d43e2c", - "id": "nmdc:b95b8538748c921fac6c93ba55d43e2c", - "file_size_bytes": 23580407 - }, - { - "name": "Gp0127647_Structural annotation GFF file", - "description": "Structural annotation GFF file for Gp0127647", - "data_object_type": "Structural Annotation GFF", - "url": "https://data.microbiomedata.org/data/nmdc:mga0g0e588/annotation/nmdc_mga0g0e588_structural_annotation.gff", - "md5_checksum": "9c63632766a4946bc76829a7dafe49c0", - "id": "nmdc:9c63632766a4946bc76829a7dafe49c0", - "file_size_bytes": 2925 - }, - { - "name": "Gp0127647_Functional annotation GFF file", - "description": "Functional annotation GFF file for Gp0127647", - "data_object_type": "Functional Annotation GFF", - "url": "https://data.microbiomedata.org/data/nmdc:mga0g0e588/annotation/nmdc_mga0g0e588_functional_annotation.gff", - "md5_checksum": "0c5e791c8170181aa3e43d710e7c55eb", - "id": "nmdc:0c5e791c8170181aa3e43d710e7c55eb", - "file_size_bytes": 28355659 - }, - { - "name": "Gp0127647_KO TSV file", - "description": "KO TSV file for Gp0127647", - "data_object_type": "Annotation KEGG Orthology", - "url": "https://data.microbiomedata.org/data/nmdc:mga0g0e588/annotation/nmdc_mga0g0e588_ko.tsv", - "md5_checksum": "358cb8682dd2d5c1b7a691e9f7734acc", - "id": "nmdc:358cb8682dd2d5c1b7a691e9f7734acc", - "file_size_bytes": 3251676 - }, - { - "name": "Gp0127647_EC TSV file", - "description": "EC TSV file for Gp0127647", - "data_object_type": "Annotation Enzyme Commission", - "url": "https://data.microbiomedata.org/data/nmdc:mga0g0e588/annotation/nmdc_mga0g0e588_ec.tsv", - "md5_checksum": "d770a8c872a3a359bf3482e564c56988", - "id": "nmdc:d770a8c872a3a359bf3482e564c56988", - "file_size_bytes": 2134531 - }, - { - "name": "Gp0127647_COG GFF file", - "description": "COG GFF file for Gp0127647", - "url": "https://data.microbiomedata.org/data/nmdc:mga0g0e588/annotation/nmdc_mga0g0e588_cog.gff", - "md5_checksum": "cdecaf6cff3fc2d559cc3313599b137b", - "id": "nmdc:cdecaf6cff3fc2d559cc3313599b137b", - "file_size_bytes": 15119260 - }, - { - "name": "Gp0127647_PFAM GFF file", - "description": "PFAM GFF file for Gp0127647", - "url": "https://data.microbiomedata.org/data/nmdc:mga0g0e588/annotation/nmdc_mga0g0e588_pfam.gff", - "md5_checksum": "7dedc14d5645ae32f913d8f823ba5aa3", - "id": "nmdc:7dedc14d5645ae32f913d8f823ba5aa3", - "file_size_bytes": 11013734 - }, - { - "name": "Gp0127647_TigrFam GFF file", - "description": "TigrFam GFF file for Gp0127647", - "url": "https://data.microbiomedata.org/data/nmdc:mga0g0e588/annotation/nmdc_mga0g0e588_tigrfam.gff", - "md5_checksum": "809e6d246bd10968d4da074db08216d9", - "id": "nmdc:809e6d246bd10968d4da074db08216d9", - "file_size_bytes": 1131416 - }, - { - "name": "Gp0127647_SMART GFF file", - "description": "SMART GFF file for Gp0127647", - "url": "https://data.microbiomedata.org/data/nmdc:mga0g0e588/annotation/nmdc_mga0g0e588_smart.gff", - "md5_checksum": "546d11411d30ab337a215d0094fc36b6", - "id": "nmdc:546d11411d30ab337a215d0094fc36b6", - "file_size_bytes": 3424877 - }, - { - "name": "Gp0127647_SuperFam GFF file", - "description": "SuperFam GFF file for Gp0127647", - "url": "https://data.microbiomedata.org/data/nmdc:mga0g0e588/annotation/nmdc_mga0g0e588_supfam.gff", - "md5_checksum": "6eb654de91a99eb4e01e1bf9513a6208", - "id": "nmdc:6eb654de91a99eb4e01e1bf9513a6208", - "file_size_bytes": 19463761 - }, - { - "name": "Gp0127647_Cath FunFam GFF file", - "description": "Cath FunFam GFF file for Gp0127647", - "url": "https://data.microbiomedata.org/data/nmdc:mga0g0e588/annotation/nmdc_mga0g0e588_cath_funfam.gff", - "md5_checksum": "a8ae7ed318e7c170aeed508f331ce5b2", - "id": "nmdc:a8ae7ed318e7c170aeed508f331ce5b2", - "file_size_bytes": 14536820 - }, - { - "name": "Gp0127647_KO_EC GFF file", - "description": "KO_EC GFF file for Gp0127647", - "url": "https://data.microbiomedata.org/data/nmdc:mga0g0e588/annotation/nmdc_mga0g0e588_ko_ec.gff", - "md5_checksum": "455f95c7c15739b2fddc6f62b03253ed", - "id": "nmdc:455f95c7c15739b2fddc6f62b03253ed", - "file_size_bytes": 10367039 - }, - { - "name": "gold:Gp0452679_metabat2 bin checkm quality assessment result", - "description": "metabat2 bin checkm quality assessment result for gold:Gp0452679", - "data_object_type": "CheckM Statistics", - "url": "https://data.microbiomedata.org/data/nmdc:mga0fsjy84/MAGs/nmdc_mga0fsjy84_checkm_qa.out", - "md5_checksum": "d41d8cd98f00b204e9800998ecf8427e", - "id": "nmdc:d41d8cd98f00b204e9800998ecf8427e", - "file_size_bytes": 0 - }, - { - "name": "Gp0127647_tooShort (< 3kb) filtered contigs fasta file by metaBat2", - "description": "tooShort (< 3kb) filtered contigs fasta file by metaBat2 for Gp0127647", - "url": "https://data.microbiomedata.org/data/nmdc:mga0g0e588/MAGs/nmdc_mga0g0e588_bins.tooShort.fa", - "md5_checksum": "8ec4227eca7ea06fed4e866c4de4a5c9", - "id": "nmdc:8ec4227eca7ea06fed4e866c4de4a5c9", - "file_size_bytes": 38197270 - }, - { - "name": "Gp0127647_unbinned fasta file from metabat2", - "description": "unbinned fasta file from metabat2 for Gp0127647", - "url": "https://data.microbiomedata.org/data/nmdc:mga0g0e588/MAGs/nmdc_mga0g0e588_bins.unbinned.fa", - "md5_checksum": "40c0cbc75e2b698572b8b94d91fdc236", - "id": "nmdc:40c0cbc75e2b698572b8b94d91fdc236", - "file_size_bytes": 3202231 - }, - { - "name": "Gp0127647_metabat2 bin checkm quality assessment result", - "description": "metabat2 bin checkm quality assessment result for Gp0127647", - "data_object_type": "CheckM Statistics", - "url": "https://data.microbiomedata.org/data/nmdc:mga0g0e588/MAGs/nmdc_mga0g0e588_checkm_qa.out", - "md5_checksum": "e8bdcd7b113a14b29a3026b73cd18c20", - "id": "nmdc:e8bdcd7b113a14b29a3026b73cd18c20", - "file_size_bytes": 775 - }, - { - "name": "Gp0127647_high-quality and medium-quality bins", - "description": "high-quality and medium-quality bins for Gp0127647", - "data_object_type": "Metagenome Bins", - "url": "https://data.microbiomedata.org/data/nmdc:mga0g0e588/MAGs/nmdc_mga0g0e588_hqmq_bin.zip", - "md5_checksum": "03b448db547a556e988a0d4948dab424", - "id": "nmdc:03b448db547a556e988a0d4948dab424", - "file_size_bytes": 182 - }, - { - "name": "Gp0127647_metabat2 bins", - "description": "metabat2 bins for Gp0127647", - "url": "https://data.microbiomedata.org/data/nmdc:mga0g0e588/MAGs/nmdc_mga0g0e588_metabat_bin.zip", - "md5_checksum": "6e92868d1912cb8f5b32fbf507721d16", - "id": "nmdc:6e92868d1912cb8f5b32fbf507721d16", - "file_size_bytes": 91931 - }, - { - "_id": { - "$oid": "649b003d1ae706d7b5b14eae" - }, - "description": "Metagenome Contig Coverage Stats for gold:Gp0127647", - "url": "https://data.microbiomedata.org/data/1781_100349/assembly/mapping_stats.txt", - "file_size_bytes": 7240138, - "type": "nmdc:DataObject", - "id": "nmdc:c61b0651682d71b4a62ee2e51223af99", - "name": "mapping_stats.txt", - "data_object_type": "Assembly Coverage Stats" - }, - { - "_id": { - "$oid": "649b003d1ae706d7b5b14eaf" - }, - "description": "Assembled contigs fasta for gold:Gp0127647", - "url": "https://data.microbiomedata.org/data/1781_100349/assembly/assembly_contigs.fna", - "file_size_bytes": 41307096, - "type": "nmdc:DataObject", - "id": "nmdc:9aefb925f949c698cd2a0d71d1d2d7cc", - "name": "assembly_contigs.fna", - "data_object_type": "Assembly Contigs" - }, - { - "_id": { - "$oid": "649b003d1ae706d7b5b14eb1" - }, - "description": "Assembled scaffold fasta for gold:Gp0127647", - "url": "https://data.microbiomedata.org/data/1781_100349/assembly/assembly_scaffolds.fna", - "file_size_bytes": 41014628, - "type": "nmdc:DataObject", - "id": "nmdc:ba8fe365f6e8a08812efe185c3454385", - "name": "assembly_scaffolds.fna", - "data_object_type": "Assembly Scaffolds" - }, - { - "_id": { - "$oid": "649b003d1ae706d7b5b14eb2" - }, - "description": "Assembled AGP file for gold:Gp0127647", - "url": "https://data.microbiomedata.org/data/1781_100349/assembly/assembly.agp", - "file_size_bytes": 6312116, - "type": "nmdc:DataObject", - "id": "nmdc:32c394cf3ff8c87b4d60ff769265b544", - "name": "assembly.agp", - "data_object_type": "Assembly AGP" - }, - { - "_id": { - "$oid": "649b003d1ae706d7b5b14eb4" - }, - "description": "Metagenome Alignment BAM file for gold:Gp0127647", - "url": "https://data.microbiomedata.org/data/1781_100349/assembly/pairedMapped_sorted.bam", - "file_size_bytes": 2160760884, - "type": "nmdc:DataObject", - "id": "nmdc:f68a2e204a75c536142b2fd9dfd9ee8b", - "name": "pairedMapped_sorted.bam", - "data_object_type": "Assembly Coverage BAM" - }, - { - "_id": { - "$oid": "649b003d1ae706d7b5b15bab" - }, - "id": "nmdc:34a7f42ed597813c10ad6d3935563bf7", - "name": "1781_100349.krona.html", - "description": "Gold:Gp0127647 KRONA plot HTML file", - "url": "https://data.microbiomedata.org/1781_100349/ReadbasedAnalysis/centrifuge/1781_100349.krona.html", - "file_size_bytes": 3385, - "data_object_type": "Centrifuge Krona Plot", - "type": "nmdc:DataObject" - }, - { - "_id": { - "$oid": "649b003d1ae706d7b5b15bb1" - }, - "id": "nmdc:8ec86e6598c064e0091960a9921de1d6", - "name": "1781_100349.json", - "description": "Gold:Gp0127647 ReadbasedAnalysis result JSON file", - "url": "https://data.microbiomedata.org/1781_100349/ReadbasedAnalysis/1781_100349.json", - "file_size_bytes": 3385, - "type": "nmdc:DataObject" - }, - { - "_id": { - "$oid": "649b003f1ae706d7b5b1662e" - }, - "id": "nmdc:a406d9ea4e02f98d5f48ba53b992dfa2", - "name": "bins.unbinned.fa", - "description": "unbinned fasta file from metabat2 for gold:Gp0127647", - "file_size_bytes": 3475613, - "url": "https://data.microbiomedata.org/data/1781_100349/img_MAGs/bins.unbinned.fa", - "type": "nmdc:DataObject" - }, - { - "_id": { - "$oid": "649b003f1ae706d7b5b16632" - }, - "id": "nmdc:a0f5a2359ba3651c0315060a9827e39d", - "name": "bins.tooShort.fa", - "description": "tooShort (< 3kb) filtered contigs fasta file by metaBat2 for gold:Gp0127647", - "file_size_bytes": 36966483, - "url": "https://data.microbiomedata.org/data/1781_100349/img_MAGs/bins.tooShort.fa", - "type": "nmdc:DataObject" - }, - { - "_id": { - "$oid": "649b00401ae706d7b5b16d34" - }, - "description": "Functional annotation GFF file for gold:Gp0127647", - "url": "https://data.microbiomedata.org/1781_100349/img_annotation/Ga0482223_functional_annotation.gff", - "md5_checksum": "af2496c3ae96ff31e6bdaae75b507ea7", - "file_size_bytes": 3385, - "id": "nmdc:af2496c3ae96ff31e6bdaae75b507ea7", - "name": "gold:Gp0127647_Functional annotation GFF file", - "data_object_type": "Functional Annotation GFF", - "type": "nmdc:DataObject" - }, - { - "_id": { - "$oid": "649b00401ae706d7b5b16d36" - }, - "description": "Protein FAA for gold:Gp0127647", - "url": "https://data.microbiomedata.org/1781_100349/img_annotation/Ga0482223_proteins.faa", - "md5_checksum": "ec6d01297279eee2d4c03ecfda9309c9", - "file_size_bytes": 3385, - "id": "nmdc:ec6d01297279eee2d4c03ecfda9309c9", - "name": "gold:Gp0127647_Protein FAA", - "data_object_type": "Annotation Amino Acid FASTA", - "type": "nmdc:DataObject" - }, - { - "_id": { - "$oid": "649b00401ae706d7b5b16d37" - }, - "description": "EC TSV File for gold:Gp0127647", - "url": "https://data.microbiomedata.org/1781_100349/img_annotation/Ga0482223_ec.tsv", - "md5_checksum": "18d40bd5ff2707ba9a4512363d05537d", - "file_size_bytes": 3385, - "id": "nmdc:18d40bd5ff2707ba9a4512363d05537d", - "name": "gold:Gp0127647_EC TSV File", - "data_object_type": "Annotation Enzyme Commission", - "type": "nmdc:DataObject" - }, - { - "_id": { - "$oid": "649b00401ae706d7b5b16d38" - }, - "description": "KO TSV File for gold:Gp0127647", - "url": "https://data.microbiomedata.org/1781_100349/img_annotation/Ga0482223_ko.tsv", - "md5_checksum": "d855bc2d72a6ba238acfe746299cf26a", - "file_size_bytes": 3385, - "id": "nmdc:d855bc2d72a6ba238acfe746299cf26a", - "name": "gold:Gp0127647_KO TSV File", - "data_object_type": "Annotation KEGG Orthology", - "type": "nmdc:DataObject" - }, - { - "_id": { - "$oid": "649b00401ae706d7b5b16d3a" - }, - "description": "Structural annotation GFF file for gold:Gp0127647", - "url": "https://data.microbiomedata.org/1781_100349/img_annotation/Ga0482223_structural_annotation.gff", - "md5_checksum": "a57c9b7f192351676e897b8187cf6641", - "file_size_bytes": 3385, - "id": "nmdc:a57c9b7f192351676e897b8187cf6641", - "name": "gold:Gp0127647_Structural annotation GFF file", - "data_object_type": "Structural Annotation GFF", - "type": "nmdc:DataObject" - } - ], - "dissolving_activity_set": [], - "functional_annotation_set": [], - "genome_feature_set": [], - "mags_activity_set": [ - { - "_id": { - "$oid": "649b0052ec087f6bbab34716" - }, - "has_input": [ - "nmdc:05952c056a6db782ba77c6369206838a", - "nmdc:7fc9fd7844b6ce48869a0ad5216da4dc", - "nmdc:0c5e791c8170181aa3e43d710e7c55eb" - ], - "too_short_contig_num": 95291, - "part_of": [ - "nmdc:mga0g0e588" - ], - "binned_contig_num": 20, - "git_url": "https://github.com/microbiomedata/metaG/releases/tag/0.1", - "has_output": [ - "nmdc:d41d8cd98f00b204e9800998ecf8427e", - "nmdc:8ec4227eca7ea06fed4e866c4de4a5c9", - "nmdc:40c0cbc75e2b698572b8b94d91fdc236", - "nmdc:e8bdcd7b113a14b29a3026b73cd18c20", - "nmdc:03b448db547a556e988a0d4948dab424", - "nmdc:6e92868d1912cb8f5b32fbf507721d16" - ], - "was_informed_by": "gold:Gp0127647", - "input_contig_num": 97351, - "id": "nmdc:36fa8157feb62e7d176b99031e5e41a9", - "execution_resource": "NERSC-Cori", - "name": "MAGs Analysis Activity for nmdc:mga0g0e588", - "mags_list": [ - { - "number_of_contig": 20, - "completeness": 1.36, - "bin_name": "bins.1", - "gene_count": 310, - "bin_quality": "LQ", - "gtdbtk_species": "", - "gtdbtk_order": "", - "num_16s": 2, - "gtdbtk_family": "", - "gtdbtk_domain": "", - "contamination": 0.0, - "gtdbtk_class": "", - "gtdbtk_phylum": "", - "num_5s": 0, - "num_23s": 2, - "gtdbtk_genus": "", - "num_t_rna": 14 - } - ], - "unbinned_contig_num": 2040, - "started_at_time": "2021-10-11T02:24:27Z", - "type": "nmdc:MAGsAnalysisActivity", - "ended_at_time": "2021-10-11T03:38:33+00:00" - } - ], - "material_sample_set": [], - "material_sampling_activity_set": [], - "metabolomics_analysis_activity_set": [], - "metagenome_annotation_activity_set": [ - { - "_id": { - "$oid": "649b005bbf2caae0415ef9b5" - }, - "has_input": [ - "nmdc:05952c056a6db782ba77c6369206838a" - ], - "part_of": [ - "nmdc:mga0g0e588" - ], - "git_url": "https://github.com/microbiomedata/metaG/releases/tag/0.1", - "has_output": [ - "nmdc:b95b8538748c921fac6c93ba55d43e2c", - "nmdc:9c63632766a4946bc76829a7dafe49c0", - "nmdc:0c5e791c8170181aa3e43d710e7c55eb", - "nmdc:358cb8682dd2d5c1b7a691e9f7734acc", - "nmdc:d770a8c872a3a359bf3482e564c56988", - "nmdc:cdecaf6cff3fc2d559cc3313599b137b", - "nmdc:7dedc14d5645ae32f913d8f823ba5aa3", - "nmdc:809e6d246bd10968d4da074db08216d9", - "nmdc:546d11411d30ab337a215d0094fc36b6", - "nmdc:6eb654de91a99eb4e01e1bf9513a6208", - "nmdc:a8ae7ed318e7c170aeed508f331ce5b2", - "nmdc:455f95c7c15739b2fddc6f62b03253ed" - ], - "was_informed_by": "gold:Gp0127647", - "id": "nmdc:36fa8157feb62e7d176b99031e5e41a9", - "execution_resource": "NERSC-Cori", - "name": "Annotation Activity for nmdc:mga0g0e588", - "started_at_time": "2021-10-11T02:24:27Z", - "type": "nmdc:MetagenomeAnnotationActivity", - "ended_at_time": "2021-10-11T03:38:33+00:00" - } - ], - "metagenome_assembly_set": [ - { - "_id": { - "$oid": "649b005f2ca5ee4adb139fa0" - }, - "has_input": [ - "nmdc:c082eff434fe4863c0e29c79b759d100" - ], - "part_of": [ - "nmdc:mga0g0e588" - ], - "ctg_logsum": 37666, - "scaf_logsum": 37899, - "gap_pct": 0.00092, - "git_url": "https://github.com/microbiomedata/metaG/releases/tag/0.1", - "has_output": [ - "nmdc:05952c056a6db782ba77c6369206838a", - "nmdc:6fa8f2d4236fda4f628436ed85094e3b", - "nmdc:82be5b6248eb4b0bfef1c9afa5c5c0bc", - "nmdc:fee22437c76dc343846f41e1be538b9d", - "nmdc:7fc9fd7844b6ce48869a0ad5216da4dc" - ], - "asm_score": 14.664, - "was_informed_by": "gold:Gp0127647", - "ctg_powsum": 4336.355, - "scaf_max": 96788, - "id": "nmdc:36fa8157feb62e7d176b99031e5e41a9", - "scaf_powsum": 4362.772, - "execution_resource": "NERSC-Cori", - "contigs": 97351, - "name": "Assembly Activity for nmdc:mga0g0e588", - "ctg_max": 96788, - "gc_std": 0.13435, - "contig_bp": 38110297, - "gc_avg": 0.5552, - "started_at_time": "2021-10-11T02:24:27Z", - "scaf_bp": 38110647, - "type": "nmdc:MetagenomeAssembly", - "scaffolds": 97316, - "ended_at_time": "2021-10-11T03:38:33+00:00", - "ctg_l50": 353, - "ctg_l90": 283, - "ctg_n50": 34144, - "ctg_n90": 85387, - "scaf_l50": 353, - "scaf_l90": 283, - "scaf_n50": 34125, - "scaf_n90": 85353, - "scaf_l_gt50k": 153917, - "scaf_n_gt50k": 2, - "scaf_pct_gt50k": 0.40386876 - } - ], - "metagenome_sequencing_activity_set": [], - "metaproteomics_analysis_activity_set": [], - "metatranscriptome_activity_set": [], - "nom_analysis_activity_set": [], - "omics_processing_set": [ - { - "_id": { - "$oid": "649b009773e8249959349b5c" - }, - "id": "nmdc:omprc-11-w3v30q48", - "name": "Riverbed sediment microbial communities from areas with vegetation nearby in Columbia River, Washington, USA - GW-RW S2_10_20", - "description": "Riverbed sediment samples were collected from an area with a lot of surrounding vegetation in a hyporheic zone (subsurface groundwater - surface water mixing zone)", - "has_input": [ - "nmdc:bsm-11-q44pjf87" - ], - "has_output": [ - "jgi:574fde8a7ded5e3df1ee1422" - ], - "part_of": [ - "nmdc:sty-11-aygzgv51" - ], - "add_date": "2016-01-11", - "mod_date": "2021-06-15", - "ncbi_project_name": "Riverbed sediment microbial communities from areas with vegetation nearby in Columbia River, Washington, USA - GW-RW S2_10_20", - "omics_type": { - "has_raw_value": "Metagenome" - }, - "principal_investigator": { - "has_raw_value": "James Stegen" - }, - "processing_institution": "JGI", - "type": "nmdc:OmicsProcessing", - "gold_sequencing_project_identifiers": [ - "gold:Gp0127647" - ] - } - ], - "reaction_activity_set": [], - "read_qc_analysis_activity_set": [ - { - "_id": { - "$oid": "649b009d6bdd4fd20273c86e" - }, - "has_input": [ - "nmdc:34b881e1c01cbdc1f8dc1b1fc07e46a7" - ], - "part_of": [ - "nmdc:mga0g0e588" - ], - "git_url": "https://github.com/microbiomedata/metaG/releases/tag/0.1", - "has_output": [ - "nmdc:c082eff434fe4863c0e29c79b759d100", - "nmdc:7f204d0d1d45e77b39d9c9b2362c6b0b" - ], - "was_informed_by": "gold:Gp0127647", - "input_read_count": 24906858, - "output_read_bases": 3608754154, - "id": "nmdc:36fa8157feb62e7d176b99031e5e41a9", - "execution_resource": "NERSC-Cori", - "input_read_bases": 3760935558, - "name": "Read QC Activity for nmdc:mga0g0e588", - "output_read_count": 24128544, - "started_at_time": "2021-10-11T02:24:27Z", - "type": "nmdc:ReadQCAnalysisActivity", - "ended_at_time": "2021-10-11T03:38:33+00:00" - } - ], - "read_based_taxonomy_analysis_activity_set": [ - { - "_id": { - "$oid": "649b009bff710ae353f8cf32" - }, - "has_input": [ - "nmdc:c082eff434fe4863c0e29c79b759d100" - ], - "git_url": "https://github.com/microbiomedata/metaG/releases/tag/0.1", - "has_output": [ - "nmdc:7e1438bf8076daf46f3d782d8f9656b4", - "nmdc:cfd63309cd38a293615ddce5e8ea6402", - "nmdc:7e353b7bfb1586773fa00b515dffe6ec", - "nmdc:6667be33e7867ca2aabfa5d663e2970a", - "nmdc:7ee0b0b21444ee06752e6b9c32f476af", - "nmdc:d3b27bed597f07ad4bb4a500ad2fb928", - "nmdc:45617f93e5f072fbad25a0308ead6c3d", - "nmdc:460e7594fcd06678df1b9c5e5075cb4d", - "nmdc:ab80fc324c9206a41a66d64227a97179" - ], - "was_informed_by": "gold:Gp0127647", - "id": "nmdc:36fa8157feb62e7d176b99031e5e41a9", - "execution_resource": "NERSC-Cori", - "name": "ReadBased Analysis Activity for nmdc:mga0g0e588", - "started_at_time": "2021-10-11T02:24:27Z", - "type": "nmdc:ReadbasedAnalysis", - "ended_at_time": "2021-10-11T03:38:33+00:00" - } - ], - "study_set": [], - "field_research_site_set": [], - "collecting_biosamples_from_site_set": [], - "date_created": null, - "etl_software_version": null, - "pooling_set": [] - }, - { - "functional_annotation_agg": [], - "library_preparation_set": [], - "processed_sample_set": [], - "extraction_set": [], - "activity_set": [], - "biosample_set": [], - "data_object_set": [ - { - "description": "Raw sequencer read data", - "file_size_bytes": 2092289780, - "type": "nmdc:DataObject", - "id": "jgi:574fde6e7ded5e3df1ee140d", - "name": "10533.1.165310.CCTCAGT-AACTGAG.fastq.gz" - }, - { - "name": "Gp0127645_Filtered Reads", - "description": "Filtered Reads for Gp0127645", - "data_object_type": "Filtered Sequencing Reads", - "url": "https://data.microbiomedata.org/data/nmdc:mga0jbfx89/qa/nmdc_mga0jbfx89_filtered.fastq.gz", - "md5_checksum": "034df323b47f010f27e7c032d445a891", - "id": "nmdc:034df323b47f010f27e7c032d445a891", - "file_size_bytes": 1909192845 - }, - { - "name": "Gp0127645_Filtered Stats", - "description": "Filtered Stats for Gp0127645", - "data_object_type": "QC Statistics", - "url": "https://data.microbiomedata.org/data/nmdc:mga0jbfx89/qa/nmdc_mga0jbfx89_filterStats.txt", - "md5_checksum": "ca137bf5e2df6541425f22b5d1fec492", - "id": "nmdc:ca137bf5e2df6541425f22b5d1fec492", - "file_size_bytes": 283 - }, - { - "name": "Gp0127645_Gottcha2 TSV report", - "description": "Gottcha2 TSV report for Gp0127645", - "url": "https://data.microbiomedata.org/data/nmdc:mga0jbfx89/ReadbasedAnalysis/nmdc_mga0jbfx89_gottcha2_report.tsv", - "md5_checksum": "694374188ba4372344536fa26a2282b8", - "id": "nmdc:694374188ba4372344536fa26a2282b8", - "file_size_bytes": 3780 - }, - { - "name": "Gp0127645_Gottcha2 full TSV report", - "description": "Gottcha2 full TSV report for Gp0127645", - "url": "https://data.microbiomedata.org/data/nmdc:mga0jbfx89/ReadbasedAnalysis/nmdc_mga0jbfx89_gottcha2_report_full.tsv", - "md5_checksum": "e11dfa7178e8c426c7c930b57aa40377", - "id": "nmdc:e11dfa7178e8c426c7c930b57aa40377", - "file_size_bytes": 822292 - }, - { - "name": "Gp0127645_Gottcha2 Krona HTML report", - "description": "Gottcha2 Krona HTML report for Gp0127645", - "data_object_type": "GOTTCHA2 Krona Plot", - "url": "https://data.microbiomedata.org/data/nmdc:mga0jbfx89/ReadbasedAnalysis/nmdc_mga0jbfx89_gottcha2_krona.html", - "md5_checksum": "46e203465faf61780fad8f626e9ab623", - "id": "nmdc:46e203465faf61780fad8f626e9ab623", - "file_size_bytes": 236496 - }, - { - "name": "Gp0127645_Centrifuge classification TSV report", - "description": "Centrifuge classification TSV report for Gp0127645", - "data_object_type": "Centrifuge Taxonomic Classification", - "url": "https://data.microbiomedata.org/data/nmdc:mga0jbfx89/ReadbasedAnalysis/nmdc_mga0jbfx89_centrifuge_classification.tsv", - "md5_checksum": "7a6b2ded3f49663d9916eaea3e129dc7", - "id": "nmdc:7a6b2ded3f49663d9916eaea3e129dc7", - "file_size_bytes": 1699052782 - }, - { - "name": "Gp0127645_Centrifuge TSV report", - "description": "Centrifuge TSV report for Gp0127645", - "data_object_type": "Centrifuge Classification Report", - "url": "https://data.microbiomedata.org/data/nmdc:mga0jbfx89/ReadbasedAnalysis/nmdc_mga0jbfx89_centrifuge_report.tsv", - "md5_checksum": "6f8be89c7aab1c3f392b4f80c7ddf6a5", - "id": "nmdc:6f8be89c7aab1c3f392b4f80c7ddf6a5", - "file_size_bytes": 256209 - }, - { - "name": "Gp0127645_Centrifuge Krona HTML report", - "description": "Centrifuge Krona HTML report for Gp0127645", - "data_object_type": "Centrifuge Krona Plot", - "url": "https://data.microbiomedata.org/data/nmdc:mga0jbfx89/ReadbasedAnalysis/nmdc_mga0jbfx89_centrifuge_krona.html", - "md5_checksum": "4299b438a815becc8beed40fcb803e9f", - "id": "nmdc:4299b438a815becc8beed40fcb803e9f", - "file_size_bytes": 2336400 - }, - { - "name": "Gp0127645_Kraken classification TSV report", - "description": "Kraken classification TSV report for Gp0127645", - "data_object_type": "Kraken2 Taxonomic Classification", - "url": "https://data.microbiomedata.org/data/nmdc:mga0jbfx89/ReadbasedAnalysis/nmdc_mga0jbfx89_kraken2_classification.tsv", - "md5_checksum": "4ae4dbd13c7338df5c00555bc6755947", - "id": "nmdc:4ae4dbd13c7338df5c00555bc6755947", - "file_size_bytes": 1359323947 - }, - { - "name": "Gp0127645_Kraken2 TSV report", - "description": "Kraken2 TSV report for Gp0127645", - "data_object_type": "Kraken2 Classification Report", - "url": "https://data.microbiomedata.org/data/nmdc:mga0jbfx89/ReadbasedAnalysis/nmdc_mga0jbfx89_kraken2_report.tsv", - "md5_checksum": "2be07eb38d408077a55ecb48e123f7f8", - "id": "nmdc:2be07eb38d408077a55ecb48e123f7f8", - "file_size_bytes": 651624 - }, - { - "name": "Gp0127645_Kraken2 Krona HTML report", - "description": "Kraken2 Krona HTML report for Gp0127645", - "data_object_type": "Kraken2 Krona Plot", - "url": "https://data.microbiomedata.org/data/nmdc:mga0jbfx89/ReadbasedAnalysis/nmdc_mga0jbfx89_kraken2_krona.html", - "md5_checksum": "f318581f0df6e04b7ae2384f9237da06", - "id": "nmdc:f318581f0df6e04b7ae2384f9237da06", - "file_size_bytes": 3973557 - }, - { - "name": "Gp0127645_Assembled contigs fasta", - "description": "Assembled contigs fasta for Gp0127645", - "data_object_type": "Assembly Contigs", - "url": "https://data.microbiomedata.org/data/nmdc:mga0jbfx89/assembly/nmdc_mga0jbfx89_contigs.fna", - "md5_checksum": "3685fdcfffdf34d2802c692dc0515e33", - "id": "nmdc:3685fdcfffdf34d2802c692dc0515e33", - "file_size_bytes": 49479236 - }, - { - "name": "Gp0127645_Assembled scaffold fasta", - "description": "Assembled scaffold fasta for Gp0127645", - "data_object_type": "Assembly Scaffolds", - "url": "https://data.microbiomedata.org/data/nmdc:mga0jbfx89/assembly/nmdc_mga0jbfx89_scaffolds.fna", - "md5_checksum": "7891adab80c63d98169e3cb7b4331f1e", - "id": "nmdc:7891adab80c63d98169e3cb7b4331f1e", - "file_size_bytes": 49157929 - }, - { - "name": "Gp0127645_Metagenome Contig Coverage Stats", - "description": "Metagenome Contig Coverage Stats for Gp0127645", - "url": "https://data.microbiomedata.org/data/nmdc:mga0jbfx89/assembly/nmdc_mga0jbfx89_covstats.txt", - "md5_checksum": "d883460ae5f8cbabc3d437e745935040", - "id": "nmdc:d883460ae5f8cbabc3d437e745935040", - "file_size_bytes": 8394481 - }, - { - "name": "Gp0127645_Assembled AGP file", - "description": "Assembled AGP file for Gp0127645", - "data_object_type": "Assembly AGP", - "url": "https://data.microbiomedata.org/data/nmdc:mga0jbfx89/assembly/nmdc_mga0jbfx89_assembly.agp", - "md5_checksum": "f36166196caa529e09f3b93e17db3acc", - "id": "nmdc:f36166196caa529e09f3b93e17db3acc", - "file_size_bytes": 7804199 - }, - { - "name": "Gp0127645_Metagenome Alignment BAM file", - "description": "Metagenome Alignment BAM file for Gp0127645", - "data_object_type": "Assembly Coverage BAM", - "url": "https://data.microbiomedata.org/data/nmdc:mga0jbfx89/assembly/nmdc_mga0jbfx89_pairedMapped_sorted.bam", - "md5_checksum": "08a13111a5314ec4c8dbaa59790dc2f1", - "id": "nmdc:08a13111a5314ec4c8dbaa59790dc2f1", - "file_size_bytes": 2047004915 - }, - { - "name": "Gp0127645_Protein FAA", - "description": "Protein FAA for Gp0127645", - "data_object_type": "Annotation Amino Acid FASTA", - "url": "https://data.microbiomedata.org/data/nmdc:mga0jbfx89/annotation/nmdc_mga0jbfx89_proteins.faa", - "md5_checksum": "b14fecfaa99eaad42128e409aa7ae3ec", - "id": "nmdc:b14fecfaa99eaad42128e409aa7ae3ec", - "file_size_bytes": 29015561 - }, - { - "name": "Gp0127645_Structural annotation GFF file", - "description": "Structural annotation GFF file for Gp0127645", - "data_object_type": "Structural Annotation GFF", - "url": "https://data.microbiomedata.org/data/nmdc:mga0jbfx89/annotation/nmdc_mga0jbfx89_structural_annotation.gff", - "md5_checksum": "851584f7bcec80cddec4b113fe6cfcea", - "id": "nmdc:851584f7bcec80cddec4b113fe6cfcea", - "file_size_bytes": 2506 - }, - { - "name": "Gp0127645_Functional annotation GFF file", - "description": "Functional annotation GFF file for Gp0127645", - "data_object_type": "Functional Annotation GFF", - "url": "https://data.microbiomedata.org/data/nmdc:mga0jbfx89/annotation/nmdc_mga0jbfx89_functional_annotation.gff", - "md5_checksum": "d0280881c70c54946d9b5170e62b904b", - "id": "nmdc:d0280881c70c54946d9b5170e62b904b", - "file_size_bytes": 34124039 - }, - { - "name": "Gp0127645_KO TSV file", - "description": "KO TSV file for Gp0127645", - "data_object_type": "Annotation KEGG Orthology", - "url": "https://data.microbiomedata.org/data/nmdc:mga0jbfx89/annotation/nmdc_mga0jbfx89_ko.tsv", - "md5_checksum": "7c1894478af7b8205bb4760acb93c353", - "id": "nmdc:7c1894478af7b8205bb4760acb93c353", - "file_size_bytes": 3942110 - }, - { - "name": "Gp0127645_EC TSV file", - "description": "EC TSV file for Gp0127645", - "data_object_type": "Annotation Enzyme Commission", - "url": "https://data.microbiomedata.org/data/nmdc:mga0jbfx89/annotation/nmdc_mga0jbfx89_ec.tsv", - "md5_checksum": "ac413560dfdbcea1f0697391b593c552", - "id": "nmdc:ac413560dfdbcea1f0697391b593c552", - "file_size_bytes": 2691460 - }, - { - "name": "Gp0127645_COG GFF file", - "description": "COG GFF file for Gp0127645", - "url": "https://data.microbiomedata.org/data/nmdc:mga0jbfx89/annotation/nmdc_mga0jbfx89_cog.gff", - "md5_checksum": "80f846ff418e4758f4c6b9a96ba2b8ca", - "id": "nmdc:80f846ff418e4758f4c6b9a96ba2b8ca", - "file_size_bytes": 19597211 - }, - { - "name": "Gp0127645_PFAM GFF file", - "description": "PFAM GFF file for Gp0127645", - "url": "https://data.microbiomedata.org/data/nmdc:mga0jbfx89/annotation/nmdc_mga0jbfx89_pfam.gff", - "md5_checksum": "bbfcd35137b7cb018945a531704805eb", - "id": "nmdc:bbfcd35137b7cb018945a531704805eb", - "file_size_bytes": 14110039 - }, - { - "name": "Gp0127645_TigrFam GFF file", - "description": "TigrFam GFF file for Gp0127645", - "url": "https://data.microbiomedata.org/data/nmdc:mga0jbfx89/annotation/nmdc_mga0jbfx89_tigrfam.gff", - "md5_checksum": "c1c10952c472a97fb7de8bc7dbce564b", - "id": "nmdc:c1c10952c472a97fb7de8bc7dbce564b", - "file_size_bytes": 1502814 - }, - { - "name": "Gp0127645_SMART GFF file", - "description": "SMART GFF file for Gp0127645", - "url": "https://data.microbiomedata.org/data/nmdc:mga0jbfx89/annotation/nmdc_mga0jbfx89_smart.gff", - "md5_checksum": "b86dba5a29f4ca25cec7c0590e0b4771", - "id": "nmdc:b86dba5a29f4ca25cec7c0590e0b4771", - "file_size_bytes": 4354176 - }, - { - "name": "Gp0127645_SuperFam GFF file", - "description": "SuperFam GFF file for Gp0127645", - "url": "https://data.microbiomedata.org/data/nmdc:mga0jbfx89/annotation/nmdc_mga0jbfx89_supfam.gff", - "md5_checksum": "a701026580285ca67816cb9a2f272ca6", - "id": "nmdc:a701026580285ca67816cb9a2f272ca6", - "file_size_bytes": 24911282 - }, - { - "name": "Gp0127645_Cath FunFam GFF file", - "description": "Cath FunFam GFF file for Gp0127645", - "url": "https://data.microbiomedata.org/data/nmdc:mga0jbfx89/annotation/nmdc_mga0jbfx89_cath_funfam.gff", - "md5_checksum": "5ce71fa6aebdb4fb9f843e89ab53ca9b", - "id": "nmdc:5ce71fa6aebdb4fb9f843e89ab53ca9b", - "file_size_bytes": 18832113 - }, - { - "name": "Gp0127645_KO_EC GFF file", - "description": "KO_EC GFF file for Gp0127645", - "url": "https://data.microbiomedata.org/data/nmdc:mga0jbfx89/annotation/nmdc_mga0jbfx89_ko_ec.gff", - "md5_checksum": "47c0e39e60bd4d688a29ede2af2cee35", - "id": "nmdc:47c0e39e60bd4d688a29ede2af2cee35", - "file_size_bytes": 12581509 - }, - { - "name": "gold:Gp0452679_metabat2 bin checkm quality assessment result", - "description": "metabat2 bin checkm quality assessment result for gold:Gp0452679", - "data_object_type": "CheckM Statistics", - "url": "https://data.microbiomedata.org/data/nmdc:mga0fsjy84/MAGs/nmdc_mga0fsjy84_checkm_qa.out", - "md5_checksum": "d41d8cd98f00b204e9800998ecf8427e", - "id": "nmdc:d41d8cd98f00b204e9800998ecf8427e", - "file_size_bytes": 0 - }, - { - "name": "Gp0127645_tooShort (< 3kb) filtered contigs fasta file by metaBat2", - "description": "tooShort (< 3kb) filtered contigs fasta file by metaBat2 for Gp0127645", - "url": "https://data.microbiomedata.org/data/nmdc:mga0jbfx89/MAGs/nmdc_mga0jbfx89_bins.tooShort.fa", - "md5_checksum": "47d1233f5afdd7b00790ac2ca8be778a", - "id": "nmdc:47d1233f5afdd7b00790ac2ca8be778a", - "file_size_bytes": 43078346 - }, - { - "name": "Gp0127645_unbinned fasta file from metabat2", - "description": "unbinned fasta file from metabat2 for Gp0127645", - "url": "https://data.microbiomedata.org/data/nmdc:mga0jbfx89/MAGs/nmdc_mga0jbfx89_bins.unbinned.fa", - "md5_checksum": "637bc2394dcb4869149370683ccc9e61", - "id": "nmdc:637bc2394dcb4869149370683ccc9e61", - "file_size_bytes": 6153132 - }, - { - "name": "Gp0127645_metabat2 bin checkm quality assessment result", - "description": "metabat2 bin checkm quality assessment result for Gp0127645", - "data_object_type": "CheckM Statistics", - "url": "https://data.microbiomedata.org/data/nmdc:mga0jbfx89/MAGs/nmdc_mga0jbfx89_checkm_qa.out", - "md5_checksum": "a8e49a136701e388199a72f02bb6d288", - "id": "nmdc:a8e49a136701e388199a72f02bb6d288", - "file_size_bytes": 765 - }, - { - "name": "Gp0127645_high-quality and medium-quality bins", - "description": "high-quality and medium-quality bins for Gp0127645", - "data_object_type": "Metagenome Bins", - "url": "https://data.microbiomedata.org/data/nmdc:mga0jbfx89/MAGs/nmdc_mga0jbfx89_hqmq_bin.zip", - "md5_checksum": "b0d2597d04809508e9dd0bcb48c7edad", - "id": "nmdc:b0d2597d04809508e9dd0bcb48c7edad", - "file_size_bytes": 182 - }, - { - "name": "Gp0127645_metabat2 bins", - "description": "metabat2 bins for Gp0127645", - "url": "https://data.microbiomedata.org/data/nmdc:mga0jbfx89/MAGs/nmdc_mga0jbfx89_metabat_bin.zip", - "md5_checksum": "106983a66b58a2d07f0592d9379ad635", - "id": "nmdc:106983a66b58a2d07f0592d9379ad635", - "file_size_bytes": 76018 - }, - { - "_id": { - "$oid": "649b003d1ae706d7b5b14eb0" - }, - "description": "Metagenome Contig Coverage Stats for gold:Gp0127645", - "url": "https://data.microbiomedata.org/data/1781_100347/assembly/mapping_stats.txt", - "file_size_bytes": 7967021, - "type": "nmdc:DataObject", - "id": "nmdc:b9abce64459572cfb1b7ab2bed3c24f5", - "name": "mapping_stats.txt", - "data_object_type": "Assembly Coverage Stats" - }, - { - "_id": { - "$oid": "649b003d1ae706d7b5b14ebc" - }, - "description": "Assembled contigs fasta for gold:Gp0127645", - "url": "https://data.microbiomedata.org/data/1781_100347/assembly/assembly_contigs.fna", - "file_size_bytes": 49051776, - "type": "nmdc:DataObject", - "id": "nmdc:eb1d97165017b3e14d15f6407a181be3", - "name": "assembly_contigs.fna", - "data_object_type": "Assembly Contigs" - }, - { - "_id": { - "$oid": "649b003d1ae706d7b5b14ec0" - }, - "description": "Assembled scaffold fasta for gold:Gp0127645", - "url": "https://data.microbiomedata.org/data/1781_100347/assembly/assembly_scaffolds.fna", - "file_size_bytes": 48730645, - "type": "nmdc:DataObject", - "id": "nmdc:def65e725117abf461c8c182f7f56a72", - "name": "assembly_scaffolds.fna", - "data_object_type": "Assembly Scaffolds" - }, - { - "_id": { - "$oid": "649b003d1ae706d7b5b14ec3" - }, - "description": "Assembled AGP file for gold:Gp0127645", - "url": "https://data.microbiomedata.org/data/1781_100347/assembly/assembly.agp", - "file_size_bytes": 6948927, - "type": "nmdc:DataObject", - "id": "nmdc:b71d0b119b5c306cf7e692196f77ca98", - "name": "assembly.agp", - "data_object_type": "Assembly AGP" - }, - { - "_id": { - "$oid": "649b003d1ae706d7b5b14ec4" - }, - "description": "Metagenome Alignment BAM file for gold:Gp0127645", - "url": "https://data.microbiomedata.org/data/1781_100347/assembly/pairedMapped_sorted.bam", - "file_size_bytes": 2016145304, - "type": "nmdc:DataObject", - "id": "nmdc:f628c83e48578369510c07a7f81fdb56", - "name": "pairedMapped_sorted.bam", - "data_object_type": "Assembly Coverage BAM" - }, - { - "_id": { - "$oid": "649b003d1ae706d7b5b15ba5" - }, - "id": "nmdc:0a84281526e0db5a01a8cc737d2febd8", - "name": "1781_100347.krona.html", - "description": "Gold:Gp0127645 KRONA plot HTML file", - "url": "https://data.microbiomedata.org/1781_100347/ReadbasedAnalysis/centrifuge/1781_100347.krona.html", - "file_size_bytes": 3385, - "data_object_type": "Centrifuge Krona Plot", - "type": "nmdc:DataObject" - }, - { - "_id": { - "$oid": "649b003d1ae706d7b5b15bb7" - }, - "id": "nmdc:f2e4cd496ba10ca8ae09e148c1a62e05", - "name": "1781_100347.json", - "description": "Gold:Gp0127645 ReadbasedAnalysis result JSON file", - "url": "https://data.microbiomedata.org/1781_100347/ReadbasedAnalysis/1781_100347.json", - "file_size_bytes": 3385, - "type": "nmdc:DataObject" - }, - { - "_id": { - "$oid": "649b003f1ae706d7b5b16624" - }, - "id": "nmdc:5bd5972264fb1269fa8516647b3cffd9", - "name": "bins.tooShort.fa", - "description": "tooShort (< 3kb) filtered contigs fasta file by metaBat2 for gold:Gp0127645", - "file_size_bytes": 41844692, - "url": "https://data.microbiomedata.org/data/1781_100347/img_MAGs/bins.tooShort.fa", - "type": "nmdc:DataObject" - }, - { - "_id": { - "$oid": "649b003f1ae706d7b5b16625" - }, - "id": "nmdc:0e1d05cdd010f61435994457a58076cc", - "name": "bins.unbinned.fa", - "description": "unbinned fasta file from metabat2 for gold:Gp0127645", - "file_size_bytes": 6356502, - "url": "https://data.microbiomedata.org/data/1781_100347/img_MAGs/bins.unbinned.fa", - "type": "nmdc:DataObject" - }, - { - "_id": { - "$oid": "649b00401ae706d7b5b16d23" - }, - "description": "EC TSV File for gold:Gp0127645", - "url": "https://data.microbiomedata.org/1781_100347/img_annotation/Ga0482225_ec.tsv", - "md5_checksum": "17524561a0e1f2c9d9ffdebc3b2df6a8", - "file_size_bytes": 3385, - "id": "nmdc:17524561a0e1f2c9d9ffdebc3b2df6a8", - "name": "gold:Gp0127645_EC TSV File", - "data_object_type": "Annotation Enzyme Commission", - "type": "nmdc:DataObject" - }, - { - "_id": { - "$oid": "649b00401ae706d7b5b16d24" - }, - "description": "KO TSV File for gold:Gp0127645", - "url": "https://data.microbiomedata.org/1781_100347/img_annotation/Ga0482225_ko.tsv", - "md5_checksum": "e2b3ea50301aa3efaea18732ddba04f4", - "file_size_bytes": 3385, - "id": "nmdc:e2b3ea50301aa3efaea18732ddba04f4", - "name": "gold:Gp0127645_KO TSV File", - "data_object_type": "Annotation KEGG Orthology", - "type": "nmdc:DataObject" - }, - { - "_id": { - "$oid": "649b00401ae706d7b5b16d25" - }, - "description": "Functional annotation GFF file for gold:Gp0127645", - "url": "https://data.microbiomedata.org/1781_100347/img_annotation/Ga0482225_functional_annotation.gff", - "md5_checksum": "c3a8cfa76e5da83b2b24bc6a52f71952", - "file_size_bytes": 3385, - "id": "nmdc:c3a8cfa76e5da83b2b24bc6a52f71952", - "name": "gold:Gp0127645_Functional annotation GFF file", - "data_object_type": "Functional Annotation GFF", - "type": "nmdc:DataObject" - }, - { - "_id": { - "$oid": "649b00401ae706d7b5b16d26" - }, - "description": "Protein FAA for gold:Gp0127645", - "url": "https://data.microbiomedata.org/1781_100347/img_annotation/Ga0482225_proteins.faa", - "md5_checksum": "2ab0820d09b9c331ec56d7d3e20552e6", - "file_size_bytes": 3385, - "id": "nmdc:2ab0820d09b9c331ec56d7d3e20552e6", - "name": "gold:Gp0127645_Protein FAA", - "data_object_type": "Annotation Amino Acid FASTA", - "type": "nmdc:DataObject" - }, - { - "_id": { - "$oid": "649b00401ae706d7b5b16d2a" - }, - "description": "Structural annotation GFF file for gold:Gp0127645", - "url": "https://data.microbiomedata.org/1781_100347/img_annotation/Ga0482225_structural_annotation.gff", - "md5_checksum": "06280b3737fbf704d850ac68da190166", - "file_size_bytes": 3385, - "id": "nmdc:06280b3737fbf704d850ac68da190166", - "name": "gold:Gp0127645_Structural annotation GFF file", - "data_object_type": "Structural Annotation GFF", - "type": "nmdc:DataObject" - } - ], - "dissolving_activity_set": [], - "functional_annotation_set": [], - "genome_feature_set": [], - "mags_activity_set": [ - { - "_id": { - "$oid": "649b0052ec087f6bbab34717" - }, - "has_input": [ - "nmdc:3685fdcfffdf34d2802c692dc0515e33", - "nmdc:08a13111a5314ec4c8dbaa59790dc2f1", - "nmdc:d0280881c70c54946d9b5170e62b904b" - ], - "too_short_contig_num": 102729, - "part_of": [ - "nmdc:mga0jbfx89" - ], - "binned_contig_num": 61, - "git_url": "https://github.com/microbiomedata/metaG/releases/tag/0.1", - "has_output": [ - "nmdc:d41d8cd98f00b204e9800998ecf8427e", - "nmdc:47d1233f5afdd7b00790ac2ca8be778a", - "nmdc:637bc2394dcb4869149370683ccc9e61", - "nmdc:a8e49a136701e388199a72f02bb6d288", - "nmdc:b0d2597d04809508e9dd0bcb48c7edad", - "nmdc:106983a66b58a2d07f0592d9379ad635" - ], - "was_informed_by": "gold:Gp0127645", - "input_contig_num": 106865, - "id": "nmdc:f18f00c9c36689307a6b4188b7b18e32", - "execution_resource": "NERSC-Cori", - "name": "MAGs Analysis Activity for nmdc:mga0jbfx89", - "mags_list": [ - { - "number_of_contig": 61, - "completeness": 18.77, - "bin_name": "bins.1", - "gene_count": 307, - "bin_quality": "LQ", - "gtdbtk_species": "", - "gtdbtk_order": "", - "num_16s": 0, - "gtdbtk_family": "", - "gtdbtk_domain": "", - "contamination": 0.0, - "gtdbtk_class": "", - "gtdbtk_phylum": "", - "num_5s": 0, - "num_23s": 0, - "gtdbtk_genus": "", - "num_t_rna": 7 - } - ], - "unbinned_contig_num": 4075, - "started_at_time": "2021-10-11T02:24:42Z", - "type": "nmdc:MAGsAnalysisActivity", - "ended_at_time": "2021-10-11T04:07:11+00:00" - } - ], - "material_sample_set": [], - "material_sampling_activity_set": [], - "metabolomics_analysis_activity_set": [], - "metagenome_annotation_activity_set": [ - { - "_id": { - "$oid": "649b005bbf2caae0415ef9b6" - }, - "has_input": [ - "nmdc:3685fdcfffdf34d2802c692dc0515e33" - ], - "part_of": [ - "nmdc:mga0jbfx89" - ], - "git_url": "https://github.com/microbiomedata/metaG/releases/tag/0.1", - "has_output": [ - "nmdc:b14fecfaa99eaad42128e409aa7ae3ec", - "nmdc:851584f7bcec80cddec4b113fe6cfcea", - "nmdc:d0280881c70c54946d9b5170e62b904b", - "nmdc:7c1894478af7b8205bb4760acb93c353", - "nmdc:ac413560dfdbcea1f0697391b593c552", - "nmdc:80f846ff418e4758f4c6b9a96ba2b8ca", - "nmdc:bbfcd35137b7cb018945a531704805eb", - "nmdc:c1c10952c472a97fb7de8bc7dbce564b", - "nmdc:b86dba5a29f4ca25cec7c0590e0b4771", - "nmdc:a701026580285ca67816cb9a2f272ca6", - "nmdc:5ce71fa6aebdb4fb9f843e89ab53ca9b", - "nmdc:47c0e39e60bd4d688a29ede2af2cee35" - ], - "was_informed_by": "gold:Gp0127645", - "id": "nmdc:f18f00c9c36689307a6b4188b7b18e32", - "execution_resource": "NERSC-Cori", - "name": "Annotation Activity for nmdc:mga0jbfx89", - "started_at_time": "2021-10-11T02:24:42Z", - "type": "nmdc:MetagenomeAnnotationActivity", - "ended_at_time": "2021-10-11T04:07:11+00:00" - } - ], - "metagenome_assembly_set": [ - { - "_id": { - "$oid": "649b005f2ca5ee4adb139fa4" - }, - "has_input": [ - "nmdc:034df323b47f010f27e7c032d445a891" - ], - "part_of": [ - "nmdc:mga0jbfx89" - ], - "ctg_logsum": 65663, - "scaf_logsum": 65979, - "gap_pct": 0.00097, - "git_url": "https://github.com/microbiomedata/metaG/releases/tag/0.1", - "has_output": [ - "nmdc:3685fdcfffdf34d2802c692dc0515e33", - "nmdc:7891adab80c63d98169e3cb7b4331f1e", - "nmdc:d883460ae5f8cbabc3d437e745935040", - "nmdc:f36166196caa529e09f3b93e17db3acc", - "nmdc:08a13111a5314ec4c8dbaa59790dc2f1" - ], - "asm_score": 2.823, - "was_informed_by": "gold:Gp0127645", - "ctg_powsum": 6960.932, - "scaf_max": 6924, - "id": "nmdc:f18f00c9c36689307a6b4188b7b18e32", - "scaf_powsum": 6995.401, - "execution_resource": "NERSC-Cori", - "contigs": 106865, - "name": "Assembly Activity for nmdc:mga0jbfx89", - "ctg_max": 6924, - "gc_std": 0.12472, - "contig_bp": 45473855, - "gc_avg": 0.58373, - "started_at_time": "2021-10-11T02:24:42Z", - "scaf_bp": 45474295, - "type": "nmdc:MetagenomeAssembly", - "scaffolds": 106821, - "ended_at_time": "2021-10-11T04:07:11+00:00", - "ctg_l50": 395, - "ctg_l90": 284, - "ctg_n50": 33845, - "ctg_n90": 92046, - "scaf_l50": 395, - "scaf_l90": 284, - "scaf_n50": 33825, - "scaf_n90": 92004 - } - ], - "metagenome_sequencing_activity_set": [], - "metaproteomics_analysis_activity_set": [], - "metatranscriptome_activity_set": [], - "nom_analysis_activity_set": [], - "omics_processing_set": [ - { - "_id": { - "$oid": "649b009773e8249959349b5d" - }, - "id": "nmdc:omprc-11-vykcbs96", - "name": "Riverbed sediment microbial communities from areas with no vegetation in Columbia River, Washington, USA - GW-RW N1_40_50", - "description": "Riverbed sediment samples were collected from an area with no vegetation in a hyporheic zone (subsurface groundwater - surface water mixing zone)", - "has_input": [ - "nmdc:bsm-11-ffqcqd73" - ], - "has_output": [ - "jgi:574fde6e7ded5e3df1ee140d" - ], - "part_of": [ - "nmdc:sty-11-aygzgv51" - ], - "add_date": "2016-01-11", - "mod_date": "2021-06-15", - "ncbi_project_name": "Riverbed sediment microbial communities from areas with no vegetation in Columbia River, Washington, USA - GW-RW N1_40_50", - "omics_type": { - "has_raw_value": "Metagenome" - }, - "principal_investigator": { - "has_raw_value": "James Stegen" - }, - "processing_institution": "JGI", - "type": "nmdc:OmicsProcessing", - "gold_sequencing_project_identifiers": [ - "gold:Gp0127645" - ] - } - ], - "reaction_activity_set": [], - "read_qc_analysis_activity_set": [ - { - "_id": { - "$oid": "649b009d6bdd4fd20273c86d" - }, - "has_input": [ - "nmdc:5e7fc22a1527c7ff74e245bbb352fa91" - ], - "part_of": [ - "nmdc:mga0jbfx89" - ], - "git_url": "https://github.com/microbiomedata/metaG/releases/tag/0.1", - "has_output": [ - "nmdc:034df323b47f010f27e7c032d445a891", - "nmdc:ca137bf5e2df6541425f22b5d1fec492" - ], - "was_informed_by": "gold:Gp0127645", - "input_read_count": 24139032, - "output_read_bases": 3475317024, - "id": "nmdc:f18f00c9c36689307a6b4188b7b18e32", - "execution_resource": "NERSC-Cori", - "input_read_bases": 3644993832, - "name": "Read QC Activity for nmdc:mga0jbfx89", - "output_read_count": 23262948, - "started_at_time": "2021-10-11T02:24:42Z", - "type": "nmdc:ReadQCAnalysisActivity", - "ended_at_time": "2021-10-11T04:07:11+00:00" - } - ], - "read_based_taxonomy_analysis_activity_set": [ - { - "_id": { - "$oid": "649b009bff710ae353f8cf43" - }, - "has_input": [ - "nmdc:034df323b47f010f27e7c032d445a891" - ], - "git_url": "https://github.com/microbiomedata/metaG/releases/tag/0.1", - "has_output": [ - "nmdc:694374188ba4372344536fa26a2282b8", - "nmdc:e11dfa7178e8c426c7c930b57aa40377", - "nmdc:46e203465faf61780fad8f626e9ab623", - "nmdc:7a6b2ded3f49663d9916eaea3e129dc7", - "nmdc:6f8be89c7aab1c3f392b4f80c7ddf6a5", - "nmdc:4299b438a815becc8beed40fcb803e9f", - "nmdc:4ae4dbd13c7338df5c00555bc6755947", - "nmdc:2be07eb38d408077a55ecb48e123f7f8", - "nmdc:f318581f0df6e04b7ae2384f9237da06" - ], - "was_informed_by": "gold:Gp0127645", - "id": "nmdc:f18f00c9c36689307a6b4188b7b18e32", - "execution_resource": "NERSC-Cori", - "name": "ReadBased Analysis Activity for nmdc:mga0jbfx89", - "started_at_time": "2021-10-11T02:24:42Z", - "type": "nmdc:ReadbasedAnalysis", - "ended_at_time": "2021-10-11T04:07:11+00:00" - } - ], - "study_set": [], - "field_research_site_set": [], - "collecting_biosamples_from_site_set": [], - "date_created": null, - "etl_software_version": null, - "pooling_set": [] - }, - { - "functional_annotation_agg": [], - "library_preparation_set": [], - "processed_sample_set": [], - "extraction_set": [], - "activity_set": [], - "biosample_set": [], - "data_object_set": [ - { - "description": "Raw sequencer read data", - "file_size_bytes": 2196954131, - "type": "nmdc:DataObject", - "id": "jgi:574fde8c7ded5e3df1ee1424", - "name": "10533.2.165322.GAACGCT-AAGCGTT.fastq.gz" - }, - { - "name": "Gp0127649_Filtered Reads", - "description": "Filtered Reads for Gp0127649", - "data_object_type": "Filtered Sequencing Reads", - "url": "https://data.microbiomedata.org/data/nmdc:mga0j4fe07/qa/nmdc_mga0j4fe07_filtered.fastq.gz", - "md5_checksum": "ed0ea2f2ef6b667c5f8e60cd7d197cf5", - "id": "nmdc:ed0ea2f2ef6b667c5f8e60cd7d197cf5", - "file_size_bytes": 1967546513 - }, - { - "name": "Gp0127649_Filtered Stats", - "description": "Filtered Stats for Gp0127649", - "data_object_type": "QC Statistics", - "url": "https://data.microbiomedata.org/data/nmdc:mga0j4fe07/qa/nmdc_mga0j4fe07_filterStats.txt", - "md5_checksum": "25a7ff469ffae5906d6ade4d74cab88f", - "id": "nmdc:25a7ff469ffae5906d6ade4d74cab88f", - "file_size_bytes": 283 - }, - { - "name": "Gp0127649_Gottcha2 TSV report", - "description": "Gottcha2 TSV report for Gp0127649", - "url": "https://data.microbiomedata.org/data/nmdc:mga0j4fe07/ReadbasedAnalysis/nmdc_mga0j4fe07_gottcha2_report.tsv", - "md5_checksum": "c30cb5928ad608e7c8fe1ce77d81933a", - "id": "nmdc:c30cb5928ad608e7c8fe1ce77d81933a", - "file_size_bytes": 2079 - }, - { - "name": "Gp0127649_Gottcha2 full TSV report", - "description": "Gottcha2 full TSV report for Gp0127649", - "url": "https://data.microbiomedata.org/data/nmdc:mga0j4fe07/ReadbasedAnalysis/nmdc_mga0j4fe07_gottcha2_report_full.tsv", - "md5_checksum": "4aa159b1ee973c6e3e309ef60d351018", - "id": "nmdc:4aa159b1ee973c6e3e309ef60d351018", - "file_size_bytes": 642861 - }, - { - "name": "Gp0127649_Gottcha2 Krona HTML report", - "description": "Gottcha2 Krona HTML report for Gp0127649", - "data_object_type": "GOTTCHA2 Krona Plot", - "url": "https://data.microbiomedata.org/data/nmdc:mga0j4fe07/ReadbasedAnalysis/nmdc_mga0j4fe07_gottcha2_krona.html", - "md5_checksum": "8c1683fa4041bd10711aa3beb4735811", - "id": "nmdc:8c1683fa4041bd10711aa3beb4735811", - "file_size_bytes": 230792 - }, - { - "name": "Gp0127649_Centrifuge classification TSV report", - "description": "Centrifuge classification TSV report for Gp0127649", - "data_object_type": "Centrifuge Taxonomic Classification", - "url": "https://data.microbiomedata.org/data/nmdc:mga0j4fe07/ReadbasedAnalysis/nmdc_mga0j4fe07_centrifuge_classification.tsv", - "md5_checksum": "b8be7144441cbd6fbe4a8193f9e055ab", - "id": "nmdc:b8be7144441cbd6fbe4a8193f9e055ab", - "file_size_bytes": 1743695420 - }, - { - "name": "Gp0127649_Centrifuge TSV report", - "description": "Centrifuge TSV report for Gp0127649", - "data_object_type": "Centrifuge Classification Report", - "url": "https://data.microbiomedata.org/data/nmdc:mga0j4fe07/ReadbasedAnalysis/nmdc_mga0j4fe07_centrifuge_report.tsv", - "md5_checksum": "d4f57641e41f0249f3fde7b973289cf5", - "id": "nmdc:d4f57641e41f0249f3fde7b973289cf5", - "file_size_bytes": 254036 - }, - { - "name": "Gp0127649_Centrifuge Krona HTML report", - "description": "Centrifuge Krona HTML report for Gp0127649", - "data_object_type": "Centrifuge Krona Plot", - "url": "https://data.microbiomedata.org/data/nmdc:mga0j4fe07/ReadbasedAnalysis/nmdc_mga0j4fe07_centrifuge_krona.html", - "md5_checksum": "4e9ec619c5611cb0166ea127496fadeb", - "id": "nmdc:4e9ec619c5611cb0166ea127496fadeb", - "file_size_bytes": 2332943 - }, - { - "name": "Gp0127649_Kraken classification TSV report", - "description": "Kraken classification TSV report for Gp0127649", - "data_object_type": "Kraken2 Taxonomic Classification", - "url": "https://data.microbiomedata.org/data/nmdc:mga0j4fe07/ReadbasedAnalysis/nmdc_mga0j4fe07_kraken2_classification.tsv", - "md5_checksum": "ed2b2495ca211e17298ca2e212fe3811", - "id": "nmdc:ed2b2495ca211e17298ca2e212fe3811", - "file_size_bytes": 1387669799 - }, - { - "name": "Gp0127649_Kraken2 TSV report", - "description": "Kraken2 TSV report for Gp0127649", - "data_object_type": "Kraken2 Classification Report", - "url": "https://data.microbiomedata.org/data/nmdc:mga0j4fe07/ReadbasedAnalysis/nmdc_mga0j4fe07_kraken2_report.tsv", - "md5_checksum": "05d35fc4e391296ff0e716c3fcbbee89", - "id": "nmdc:05d35fc4e391296ff0e716c3fcbbee89", - "file_size_bytes": 637131 - }, - { - "name": "Gp0127649_Kraken2 Krona HTML report", - "description": "Kraken2 Krona HTML report for Gp0127649", - "data_object_type": "Kraken2 Krona Plot", - "url": "https://data.microbiomedata.org/data/nmdc:mga0j4fe07/ReadbasedAnalysis/nmdc_mga0j4fe07_kraken2_krona.html", - "md5_checksum": "0d07551972f3230ec2ef4a0e04929b97", - "id": "nmdc:0d07551972f3230ec2ef4a0e04929b97", - "file_size_bytes": 3976407 - }, - { - "name": "Gp0127649_Assembled contigs fasta", - "description": "Assembled contigs fasta for Gp0127649", - "data_object_type": "Assembly Contigs", - "url": "https://data.microbiomedata.org/data/nmdc:mga0j4fe07/assembly/nmdc_mga0j4fe07_contigs.fna", - "md5_checksum": "5ada15f24d3de4a96521532a4ced6018", - "id": "nmdc:5ada15f24d3de4a96521532a4ced6018", - "file_size_bytes": 94852732 - }, - { - "name": "Gp0127649_Assembled scaffold fasta", - "description": "Assembled scaffold fasta for Gp0127649", - "data_object_type": "Assembly Scaffolds", - "url": "https://data.microbiomedata.org/data/nmdc:mga0j4fe07/assembly/nmdc_mga0j4fe07_scaffolds.fna", - "md5_checksum": "fc32ae27239661670605b59c395dd770", - "id": "nmdc:fc32ae27239661670605b59c395dd770", - "file_size_bytes": 94280572 - }, - { - "name": "Gp0127649_Metagenome Contig Coverage Stats", - "description": "Metagenome Contig Coverage Stats for Gp0127649", - "url": "https://data.microbiomedata.org/data/nmdc:mga0j4fe07/assembly/nmdc_mga0j4fe07_covstats.txt", - "md5_checksum": "d6e996af3275c4cdd3e51376517e2b6b", - "id": "nmdc:d6e996af3275c4cdd3e51376517e2b6b", - "file_size_bytes": 15029734 - }, - { - "name": "Gp0127649_Assembled AGP file", - "description": "Assembled AGP file for Gp0127649", - "data_object_type": "Assembly AGP", - "url": "https://data.microbiomedata.org/data/nmdc:mga0j4fe07/assembly/nmdc_mga0j4fe07_assembly.agp", - "md5_checksum": "f52600933fc5a09f7cead5c065d6b100", - "id": "nmdc:f52600933fc5a09f7cead5c065d6b100", - "file_size_bytes": 14057243 - }, - { - "name": "Gp0127649_Metagenome Alignment BAM file", - "description": "Metagenome Alignment BAM file for Gp0127649", - "data_object_type": "Assembly Coverage BAM", - "url": "https://data.microbiomedata.org/data/nmdc:mga0j4fe07/assembly/nmdc_mga0j4fe07_pairedMapped_sorted.bam", - "md5_checksum": "5d9826a5f5164cfe20bfc1343144c96f", - "id": "nmdc:5d9826a5f5164cfe20bfc1343144c96f", - "file_size_bytes": 2147322298 - }, - { - "name": "Gp0127649_Protein FAA", - "description": "Protein FAA for Gp0127649", - "data_object_type": "Annotation Amino Acid FASTA", - "url": "https://data.microbiomedata.org/data/nmdc:mga0j4fe07/annotation/nmdc_mga0j4fe07_proteins.faa", - "md5_checksum": "4e5d87bb4bb3198f5b9955622a781376", - "id": "nmdc:4e5d87bb4bb3198f5b9955622a781376", - "file_size_bytes": 55301156 - }, - { - "name": "Gp0127649_Structural annotation GFF file", - "description": "Structural annotation GFF file for Gp0127649", - "data_object_type": "Structural Annotation GFF", - "url": "https://data.microbiomedata.org/data/nmdc:mga0j4fe07/annotation/nmdc_mga0j4fe07_structural_annotation.gff", - "md5_checksum": "40f79a8b021a3de27c464087fad9f092", - "id": "nmdc:40f79a8b021a3de27c464087fad9f092", - "file_size_bytes": 2518 - }, - { - "name": "Gp0127649_Functional annotation GFF file", - "description": "Functional annotation GFF file for Gp0127649", - "data_object_type": "Functional Annotation GFF", - "url": "https://data.microbiomedata.org/data/nmdc:mga0j4fe07/annotation/nmdc_mga0j4fe07_functional_annotation.gff", - "md5_checksum": "aba74592cf7aa507179e9544c008a0ec", - "id": "nmdc:aba74592cf7aa507179e9544c008a0ec", - "file_size_bytes": 63464973 - }, - { - "name": "Gp0127649_KO TSV file", - "description": "KO TSV file for Gp0127649", - "data_object_type": "Annotation KEGG Orthology", - "url": "https://data.microbiomedata.org/data/nmdc:mga0j4fe07/annotation/nmdc_mga0j4fe07_ko.tsv", - "md5_checksum": "29500fc3a86f2767cc3752ba02fa0a05", - "id": "nmdc:29500fc3a86f2767cc3752ba02fa0a05", - "file_size_bytes": 7317450 - }, - { - "name": "Gp0127649_EC TSV file", - "description": "EC TSV file for Gp0127649", - "data_object_type": "Annotation Enzyme Commission", - "url": "https://data.microbiomedata.org/data/nmdc:mga0j4fe07/annotation/nmdc_mga0j4fe07_ec.tsv", - "md5_checksum": "ba8fedc9b57d401ad0cc2b329038c5a9", - "id": "nmdc:ba8fedc9b57d401ad0cc2b329038c5a9", - "file_size_bytes": 4888576 - }, - { - "name": "Gp0127649_COG GFF file", - "description": "COG GFF file for Gp0127649", - "url": "https://data.microbiomedata.org/data/nmdc:mga0j4fe07/annotation/nmdc_mga0j4fe07_cog.gff", - "md5_checksum": "66bd5f2b62818742c6df5c39d1952a99", - "id": "nmdc:66bd5f2b62818742c6df5c39d1952a99", - "file_size_bytes": 36960882 - }, - { - "name": "Gp0127649_PFAM GFF file", - "description": "PFAM GFF file for Gp0127649", - "url": "https://data.microbiomedata.org/data/nmdc:mga0j4fe07/annotation/nmdc_mga0j4fe07_pfam.gff", - "md5_checksum": "e60c77fb34f71861ceacf988074949af", - "id": "nmdc:e60c77fb34f71861ceacf988074949af", - "file_size_bytes": 27535342 - }, - { - "name": "Gp0127649_TigrFam GFF file", - "description": "TigrFam GFF file for Gp0127649", - "url": "https://data.microbiomedata.org/data/nmdc:mga0j4fe07/annotation/nmdc_mga0j4fe07_tigrfam.gff", - "md5_checksum": "3738ab59fb56002a9f38d95b101957bd", - "id": "nmdc:3738ab59fb56002a9f38d95b101957bd", - "file_size_bytes": 2999247 - }, - { - "name": "Gp0127649_SMART GFF file", - "description": "SMART GFF file for Gp0127649", - "url": "https://data.microbiomedata.org/data/nmdc:mga0j4fe07/annotation/nmdc_mga0j4fe07_smart.gff", - "md5_checksum": "2f34c5db7846cbf37add471c0dbca951", - "id": "nmdc:2f34c5db7846cbf37add471c0dbca951", - "file_size_bytes": 8199823 - }, - { - "name": "Gp0127649_SuperFam GFF file", - "description": "SuperFam GFF file for Gp0127649", - "url": "https://data.microbiomedata.org/data/nmdc:mga0j4fe07/annotation/nmdc_mga0j4fe07_supfam.gff", - "md5_checksum": "fa7f659afca037861ae65e08092f2d83", - "id": "nmdc:fa7f659afca037861ae65e08092f2d83", - "file_size_bytes": 46114509 - }, - { - "name": "Gp0127649_Cath FunFam GFF file", - "description": "Cath FunFam GFF file for Gp0127649", - "url": "https://data.microbiomedata.org/data/nmdc:mga0j4fe07/annotation/nmdc_mga0j4fe07_cath_funfam.gff", - "md5_checksum": "9ee627031c0b425974fa1aa4d695d4ae", - "id": "nmdc:9ee627031c0b425974fa1aa4d695d4ae", - "file_size_bytes": 34807554 - }, - { - "name": "Gp0127649_KO_EC GFF file", - "description": "KO_EC GFF file for Gp0127649", - "url": "https://data.microbiomedata.org/data/nmdc:mga0j4fe07/annotation/nmdc_mga0j4fe07_ko_ec.gff", - "md5_checksum": "2fc423fd55e34d3400c9a6924df67633", - "id": "nmdc:2fc423fd55e34d3400c9a6924df67633", - "file_size_bytes": 23276630 - }, - { - "name": "gold:Gp0452679_metabat2 bin checkm quality assessment result", - "description": "metabat2 bin checkm quality assessment result for gold:Gp0452679", - "data_object_type": "CheckM Statistics", - "url": "https://data.microbiomedata.org/data/nmdc:mga0fsjy84/MAGs/nmdc_mga0fsjy84_checkm_qa.out", - "md5_checksum": "d41d8cd98f00b204e9800998ecf8427e", - "id": "nmdc:d41d8cd98f00b204e9800998ecf8427e", - "file_size_bytes": 0 - }, - { - "name": "Gp0127649_tooShort (< 3kb) filtered contigs fasta file by metaBat2", - "description": "tooShort (< 3kb) filtered contigs fasta file by metaBat2 for Gp0127649", - "url": "https://data.microbiomedata.org/data/nmdc:mga0j4fe07/MAGs/nmdc_mga0j4fe07_bins.tooShort.fa", - "md5_checksum": "f84d25fee16a4dece54f5580d893ecaa", - "id": "nmdc:f84d25fee16a4dece54f5580d893ecaa", - "file_size_bytes": 79592416 - }, - { - "name": "Gp0127649_unbinned fasta file from metabat2", - "description": "unbinned fasta file from metabat2 for Gp0127649", - "url": "https://data.microbiomedata.org/data/nmdc:mga0j4fe07/MAGs/nmdc_mga0j4fe07_bins.unbinned.fa", - "md5_checksum": "ed61fb0056b08bc82f4545c49b744c2a", - "id": "nmdc:ed61fb0056b08bc82f4545c49b744c2a", - "file_size_bytes": 14383032 - }, - { - "name": "Gp0127649_metabat2 bin checkm quality assessment result", - "description": "metabat2 bin checkm quality assessment result for Gp0127649", - "data_object_type": "CheckM Statistics", - "url": "https://data.microbiomedata.org/data/nmdc:mga0j4fe07/MAGs/nmdc_mga0j4fe07_checkm_qa.out", - "md5_checksum": "8d4bce832a16affbcc3efeb8364e8eaa", - "id": "nmdc:8d4bce832a16affbcc3efeb8364e8eaa", - "file_size_bytes": 942 - }, - { - "name": "Gp0127649_high-quality and medium-quality bins", - "description": "high-quality and medium-quality bins for Gp0127649", - "data_object_type": "Metagenome Bins", - "url": "https://data.microbiomedata.org/data/nmdc:mga0j4fe07/MAGs/nmdc_mga0j4fe07_hqmq_bin.zip", - "md5_checksum": "40273505b8b3dddd3ee5cb5c83871067", - "id": "nmdc:40273505b8b3dddd3ee5cb5c83871067", - "file_size_bytes": 182 - }, - { - "name": "Gp0127649_metabat2 bins", - "description": "metabat2 bins for Gp0127649", - "url": "https://data.microbiomedata.org/data/nmdc:mga0j4fe07/MAGs/nmdc_mga0j4fe07_metabat_bin.zip", - "md5_checksum": "b767f2b59d0fd9e650914e140cacf104", - "id": "nmdc:b767f2b59d0fd9e650914e140cacf104", - "file_size_bytes": 269239 - }, - { - "_id": { - "$oid": "649b003d1ae706d7b5b14eba" - }, - "description": "Metagenome Contig Coverage Stats for gold:Gp0127649", - "url": "https://data.microbiomedata.org/data/1781_100351/assembly/mapping_stats.txt", - "file_size_bytes": 14269698, - "type": "nmdc:DataObject", - "id": "nmdc:02f1408424cf3420cad010fe4f672f7a", - "name": "mapping_stats.txt", - "data_object_type": "Assembly Coverage Stats" - }, - { - "_id": { - "$oid": "649b003d1ae706d7b5b14ebb" - }, - "description": "Assembled contigs fasta for gold:Gp0127649", - "url": "https://data.microbiomedata.org/data/1781_100351/assembly/assembly_contigs.fna", - "file_size_bytes": 94092696, - "type": "nmdc:DataObject", - "id": "nmdc:6300cd8140abe6322e4a9c1921584476", - "name": "assembly_contigs.fna", - "data_object_type": "Assembly Contigs" - }, - { - "_id": { - "$oid": "649b003d1ae706d7b5b14ebd" - }, - "description": "Assembled scaffold fasta for gold:Gp0127649", - "url": "https://data.microbiomedata.org/data/1781_100351/assembly/assembly_scaffolds.fna", - "file_size_bytes": 93521052, - "type": "nmdc:DataObject", - "id": "nmdc:0dc5339ec9b3ea1dad516beff981255e", - "name": "assembly_scaffolds.fna", - "data_object_type": "Assembly Scaffolds" - }, - { - "_id": { - "$oid": "649b003d1ae706d7b5b14ec7" - }, - "description": "Assembled AGP file for gold:Gp0127649", - "url": "https://data.microbiomedata.org/data/1781_100351/assembly/assembly.agp", - "file_size_bytes": 12536139, - "type": "nmdc:DataObject", - "id": "nmdc:8477d852446a073d2d2de6b146b2921b", - "name": "assembly.agp", - "data_object_type": "Assembly AGP" - }, - { - "_id": { - "$oid": "649b003d1ae706d7b5b14ec8" - }, - "description": "Metagenome Alignment BAM file for gold:Gp0127649", - "url": "https://data.microbiomedata.org/data/1781_100351/assembly/pairedMapped_sorted.bam", - "file_size_bytes": 2118848396, - "type": "nmdc:DataObject", - "id": "nmdc:b486014481aba75b91177c9cc3a9cf7b", - "name": "pairedMapped_sorted.bam", - "data_object_type": "Assembly Coverage BAM" - }, - { - "_id": { - "$oid": "649b003d1ae706d7b5b15bc7" - }, - "id": "nmdc:082242b653e5a0803121d043375f93a3", - "name": "1781_100351.krona.html", - "description": "Gold:Gp0127649 KRONA plot HTML file", - "url": "https://data.microbiomedata.org/1781_100351/ReadbasedAnalysis/centrifuge/1781_100351.krona.html", - "file_size_bytes": 3385, - "data_object_type": "Centrifuge Krona Plot", - "type": "nmdc:DataObject" - }, - { - "_id": { - "$oid": "649b003d1ae706d7b5b15bd1" - }, - "id": "nmdc:b7078aa8cc8165e23978ece3312e9192", - "name": "1781_100351.json", - "description": "Gold:Gp0127649 ReadbasedAnalysis result JSON file", - "url": "https://data.microbiomedata.org/1781_100351/ReadbasedAnalysis/1781_100351.json", - "file_size_bytes": 3385, - "type": "nmdc:DataObject" - }, - { - "_id": { - "$oid": "649b003f1ae706d7b5b16635" - }, - "id": "nmdc:a0108431c6c1fcfc7cec6830ef96dcb9", - "name": "checkm_qa.out", - "description": "metabat2 bin checkm quality assessment result for gold:Gp0127649", - "file_size_bytes": 775, - "url": "https://data.microbiomedata.org/data/1781_100351/img_MAGs/checkm_qa.out", - "type": "nmdc:DataObject", - "data_object_type": "CheckM Statistics" - }, - { - "_id": { - "$oid": "649b003f1ae706d7b5b16639" - }, - "id": "nmdc:0c1c4f73f64b5651a20d421225d67f24", - "name": "bins.tooShort.fa", - "description": "tooShort (< 3kb) filtered contigs fasta file by metaBat2 for gold:Gp0127649", - "file_size_bytes": 77337130, - "url": "https://data.microbiomedata.org/data/1781_100351/img_MAGs/bins.tooShort.fa", - "type": "nmdc:DataObject" - }, - { - "_id": { - "$oid": "649b003f1ae706d7b5b1663a" - }, - "id": "nmdc:d11e901143547313fd0037177b5555ae", - "name": "gold:Gp0127649.bins.1.fa", - "description": "metabat2 binned contig file for gold:Gp0127649", - "file_size_bytes": 265082, - "url": "https://data.microbiomedata.org/data/1781_100351/img_MAGs/metabat-bins/bins.1.fa", - "type": "nmdc:DataObject", - "data_object_type": "Metagenome Bins" - }, - { - "_id": { - "$oid": "649b003f1ae706d7b5b1663b" - }, - "id": "nmdc:680731abdb97ba4977d33afbbc0b0c61", - "name": "bins.unbinned.fa", - "description": "unbinned fasta file from metabat2 for gold:Gp0127649", - "file_size_bytes": 14891738, - "url": "https://data.microbiomedata.org/data/1781_100351/img_MAGs/bins.unbinned.fa", - "type": "nmdc:DataObject" - }, - { - "_id": { - "$oid": "649b00401ae706d7b5b16d3e" - }, - "description": "KO TSV File for gold:Gp0127649", - "url": "https://data.microbiomedata.org/1781_100351/img_annotation/Ga0482221_ko.tsv", - "md5_checksum": "35c0fd91c2225f595df469b61ba9578b", - "file_size_bytes": 3385, - "id": "nmdc:35c0fd91c2225f595df469b61ba9578b", - "name": "gold:Gp0127649_KO TSV File", - "data_object_type": "Annotation KEGG Orthology", - "type": "nmdc:DataObject" - }, - { - "_id": { - "$oid": "649b00401ae706d7b5b16d3f" - }, - "description": "Structural annotation GFF file for gold:Gp0127649", - "url": "https://data.microbiomedata.org/1781_100351/img_annotation/Ga0482221_structural_annotation.gff", - "md5_checksum": "cff0a71781a84c7096ee79b39c3336f8", - "file_size_bytes": 3385, - "id": "nmdc:cff0a71781a84c7096ee79b39c3336f8", - "name": "gold:Gp0127649_Structural annotation GFF file", - "data_object_type": "Structural Annotation GFF", - "type": "nmdc:DataObject" - }, - { - "_id": { - "$oid": "649b00401ae706d7b5b16d40" - }, - "description": "EC TSV File for gold:Gp0127649", - "url": "https://data.microbiomedata.org/1781_100351/img_annotation/Ga0482221_ec.tsv", - "md5_checksum": "a14b836f963c0f6b02a70f0fc8cd40c0", - "file_size_bytes": 3385, - "id": "nmdc:a14b836f963c0f6b02a70f0fc8cd40c0", - "name": "gold:Gp0127649_EC TSV File", - "data_object_type": "Annotation Enzyme Commission", - "type": "nmdc:DataObject" - }, - { - "_id": { - "$oid": "649b00401ae706d7b5b16d41" - }, - "description": "Functional annotation GFF file for gold:Gp0127649", - "url": "https://data.microbiomedata.org/1781_100351/img_annotation/Ga0482221_functional_annotation.gff", - "md5_checksum": "a022fd9c3254ad5dc6ae5be40cd35c0b", - "file_size_bytes": 3385, - "id": "nmdc:a022fd9c3254ad5dc6ae5be40cd35c0b", - "name": "gold:Gp0127649_Functional annotation GFF file", - "data_object_type": "Functional Annotation GFF", - "type": "nmdc:DataObject" - }, - { - "_id": { - "$oid": "649b00401ae706d7b5b16d42" - }, - "description": "Protein FAA for gold:Gp0127649", - "url": "https://data.microbiomedata.org/1781_100351/img_annotation/Ga0482221_proteins.faa", - "md5_checksum": "56c3ac34fb2f1c2ba7bcd9bd56be731a", - "file_size_bytes": 3385, - "id": "nmdc:56c3ac34fb2f1c2ba7bcd9bd56be731a", - "name": "gold:Gp0127649_Protein FAA", - "data_object_type": "Annotation Amino Acid FASTA", - "type": "nmdc:DataObject" - } - ], - "dissolving_activity_set": [], - "functional_annotation_set": [], - "genome_feature_set": [], - "mags_activity_set": [ - { - "_id": { - "$oid": "649b0052ec087f6bbab34729" - }, - "has_input": [ - "nmdc:5ada15f24d3de4a96521532a4ced6018", - "nmdc:5d9826a5f5164cfe20bfc1343144c96f", - "nmdc:aba74592cf7aa507179e9544c008a0ec" - ], - "too_short_contig_num": 180499, - "part_of": [ - "nmdc:mga0j4fe07" - ], - "binned_contig_num": 211, - "git_url": "https://github.com/microbiomedata/metaG/releases/tag/0.1", - "has_output": [ - "nmdc:d41d8cd98f00b204e9800998ecf8427e", - "nmdc:f84d25fee16a4dece54f5580d893ecaa", - "nmdc:ed61fb0056b08bc82f4545c49b744c2a", - "nmdc:8d4bce832a16affbcc3efeb8364e8eaa", - "nmdc:40273505b8b3dddd3ee5cb5c83871067", - "nmdc:b767f2b59d0fd9e650914e140cacf104" - ], - "was_informed_by": "gold:Gp0127649", - "input_contig_num": 190009, - "id": "nmdc:74b40db9b9eef8519a02f49fe6034be5", - "execution_resource": "NERSC-Cori", - "name": "MAGs Analysis Activity for nmdc:mga0j4fe07", - "mags_list": [ - { - "number_of_contig": 64, - "completeness": 16.46, - "bin_name": "bins.1", - "gene_count": 305, - "bin_quality": "LQ", - "gtdbtk_species": "", - "gtdbtk_order": "", - "num_16s": 0, - "gtdbtk_family": "", - "gtdbtk_domain": "", - "contamination": 0.47, - "gtdbtk_class": "", - "gtdbtk_phylum": "", - "num_5s": 0, - "num_23s": 0, - "gtdbtk_genus": "", - "num_t_rna": 4 - }, - { - "number_of_contig": 147, - "completeness": 19.16, - "bin_name": "bins.2", - "gene_count": 744, - "bin_quality": "LQ", - "gtdbtk_species": "", - "gtdbtk_order": "", - "num_16s": 0, - "gtdbtk_family": "", - "gtdbtk_domain": "", - "contamination": 0.0, - "gtdbtk_class": "", - "gtdbtk_phylum": "", - "num_5s": 0, - "num_23s": 0, - "gtdbtk_genus": "", - "num_t_rna": 15 - } - ], - "unbinned_contig_num": 9299, - "started_at_time": "2021-10-11T02:23:29Z", - "type": "nmdc:MAGsAnalysisActivity", - "ended_at_time": "2021-10-11T03:38:32+00:00" - } - ], - "material_sample_set": [], - "material_sampling_activity_set": [], - "metabolomics_analysis_activity_set": [], - "metagenome_annotation_activity_set": [ - { - "_id": { - "$oid": "649b005bbf2caae0415ef9ca" - }, - "has_input": [ - "nmdc:5ada15f24d3de4a96521532a4ced6018" - ], - "part_of": [ - "nmdc:mga0j4fe07" - ], - "git_url": "https://github.com/microbiomedata/metaG/releases/tag/0.1", - "has_output": [ - "nmdc:4e5d87bb4bb3198f5b9955622a781376", - "nmdc:40f79a8b021a3de27c464087fad9f092", - "nmdc:aba74592cf7aa507179e9544c008a0ec", - "nmdc:29500fc3a86f2767cc3752ba02fa0a05", - "nmdc:ba8fedc9b57d401ad0cc2b329038c5a9", - "nmdc:66bd5f2b62818742c6df5c39d1952a99", - "nmdc:e60c77fb34f71861ceacf988074949af", - "nmdc:3738ab59fb56002a9f38d95b101957bd", - "nmdc:2f34c5db7846cbf37add471c0dbca951", - "nmdc:fa7f659afca037861ae65e08092f2d83", - "nmdc:9ee627031c0b425974fa1aa4d695d4ae", - "nmdc:2fc423fd55e34d3400c9a6924df67633" - ], - "was_informed_by": "gold:Gp0127649", - "id": "nmdc:74b40db9b9eef8519a02f49fe6034be5", - "execution_resource": "NERSC-Cori", - "name": "Annotation Activity for nmdc:mga0j4fe07", - "started_at_time": "2021-10-11T02:23:29Z", - "type": "nmdc:MetagenomeAnnotationActivity", - "ended_at_time": "2021-10-11T03:38:32+00:00" - } - ], - "metagenome_assembly_set": [ - { - "_id": { - "$oid": "649b005f2ca5ee4adb139fb8" - }, - "has_input": [ - "nmdc:ed0ea2f2ef6b667c5f8e60cd7d197cf5" - ], - "part_of": [ - "nmdc:mga0j4fe07" - ], - "ctg_logsum": 157844, - "scaf_logsum": 158661, - "gap_pct": 0.00147, - "git_url": "https://github.com/microbiomedata/metaG/releases/tag/0.1", - "has_output": [ - "nmdc:5ada15f24d3de4a96521532a4ced6018", - "nmdc:fc32ae27239661670605b59c395dd770", - "nmdc:d6e996af3275c4cdd3e51376517e2b6b", - "nmdc:f52600933fc5a09f7cead5c065d6b100", - "nmdc:5d9826a5f5164cfe20bfc1343144c96f" - ], - "asm_score": 3.279, - "was_informed_by": "gold:Gp0127649", - "ctg_powsum": 16877, - "scaf_max": 28201, - "id": "nmdc:74b40db9b9eef8519a02f49fe6034be5", - "scaf_powsum": 16967, - "execution_resource": "NERSC-Cori", - "contigs": 190009, - "name": "Assembly Activity for nmdc:mga0j4fe07", - "ctg_max": 28201, - "gc_std": 0.09385, - "contig_bp": 87528185, - "gc_avg": 0.62766, - "started_at_time": "2021-10-11T02:23:29Z", - "scaf_bp": 87529475, - "type": "nmdc:MetagenomeAssembly", - "scaffolds": 189880, - "ended_at_time": "2021-10-11T03:38:32+00:00", - "ctg_l50": 440, - "ctg_l90": 289, - "ctg_n50": 57445, - "ctg_n90": 160942, - "scaf_l50": 440, - "scaf_l90": 289, - "scaf_n50": 57416, - "scaf_n90": 160823 - } - ], - "metagenome_sequencing_activity_set": [], - "metaproteomics_analysis_activity_set": [], - "metatranscriptome_activity_set": [], - "nom_analysis_activity_set": [], - "omics_processing_set": [ - { - "_id": { - "$oid": "649b009773e8249959349b5e" - }, - "id": "nmdc:omprc-11-dw7shd52", - "name": "Riverbed sediment microbial communities from areas with vegetation nearby in Columbia River, Washington, USA - GW-RW S2_50_60", - "description": "Riverbed sediment samples were collected from an area with a lot of surrounding vegetation in a hyporheic zone (subsurface groundwater - surface water mixing zone)", - "has_input": [ - "nmdc:bsm-11-7fedgs13" - ], - "has_output": [ - "jgi:574fde8c7ded5e3df1ee1424" - ], - "part_of": [ - "nmdc:sty-11-aygzgv51" - ], - "add_date": "2016-01-11", - "mod_date": "2021-06-15", - "ncbi_project_name": "Riverbed sediment microbial communities from areas with vegetation nearby in Columbia River, Washington, USA - GW-RW S2_50_60", - "omics_type": { - "has_raw_value": "Metagenome" - }, - "principal_investigator": { - "has_raw_value": "James Stegen" - }, - "processing_institution": "JGI", - "type": "nmdc:OmicsProcessing", - "gold_sequencing_project_identifiers": [ - "gold:Gp0127649" - ] - } - ], - "reaction_activity_set": [], - "read_qc_analysis_activity_set": [ - { - "_id": { - "$oid": "649b009d6bdd4fd20273c885" - }, - "has_input": [ - "nmdc:5895de3040f750a5ce1b5238158fd51c" - ], - "part_of": [ - "nmdc:mga0j4fe07" - ], - "git_url": "https://github.com/microbiomedata/metaG/releases/tag/0.1", - "has_output": [ - "nmdc:ed0ea2f2ef6b667c5f8e60cd7d197cf5", - "nmdc:25a7ff469ffae5906d6ade4d74cab88f" - ], - "was_informed_by": "gold:Gp0127649", - "input_read_count": 24889788, - "output_read_bases": 3558782964, - "id": "nmdc:74b40db9b9eef8519a02f49fe6034be5", - "execution_resource": "NERSC-Cori", - "input_read_bases": 3758357988, - "name": "Read QC Activity for nmdc:mga0j4fe07", - "output_read_count": 23803802, - "started_at_time": "2021-10-11T02:23:29Z", - "type": "nmdc:ReadQCAnalysisActivity", - "ended_at_time": "2021-10-11T03:38:32+00:00" - } - ], - "read_based_taxonomy_analysis_activity_set": [ - { - "_id": { - "$oid": "649b009bff710ae353f8cf48" - }, - "has_input": [ - "nmdc:ed0ea2f2ef6b667c5f8e60cd7d197cf5" - ], - "git_url": "https://github.com/microbiomedata/metaG/releases/tag/0.1", - "has_output": [ - "nmdc:c30cb5928ad608e7c8fe1ce77d81933a", - "nmdc:4aa159b1ee973c6e3e309ef60d351018", - "nmdc:8c1683fa4041bd10711aa3beb4735811", - "nmdc:b8be7144441cbd6fbe4a8193f9e055ab", - "nmdc:d4f57641e41f0249f3fde7b973289cf5", - "nmdc:4e9ec619c5611cb0166ea127496fadeb", - "nmdc:ed2b2495ca211e17298ca2e212fe3811", - "nmdc:05d35fc4e391296ff0e716c3fcbbee89", - "nmdc:0d07551972f3230ec2ef4a0e04929b97" - ], - "was_informed_by": "gold:Gp0127649", - "id": "nmdc:74b40db9b9eef8519a02f49fe6034be5", - "execution_resource": "NERSC-Cori", - "name": "ReadBased Analysis Activity for nmdc:mga0j4fe07", - "started_at_time": "2021-10-11T02:23:29Z", - "type": "nmdc:ReadbasedAnalysis", - "ended_at_time": "2021-10-11T03:38:32+00:00" - } - ], - "study_set": [], - "field_research_site_set": [], - "collecting_biosamples_from_site_set": [], - "date_created": null, - "etl_software_version": null, - "pooling_set": [] - }, - { - "functional_annotation_agg": [], - "library_preparation_set": [], - "processed_sample_set": [], - "extraction_set": [], - "activity_set": [], - "biosample_set": [], - "data_object_set": [ - { - "description": "Raw sequencer read data", - "file_size_bytes": 2318220660, - "type": "nmdc:DataObject", - "id": "jgi:574fe0af7ded5e3df1ee1493", - "name": "10533.3.165334.CAATCGA-GTCGATT.fastq.gz" - }, - { - "name": "Gp0127652_Filtered Reads", - "description": "Filtered Reads for Gp0127652", - "data_object_type": "Filtered Sequencing Reads", - "url": "https://data.microbiomedata.org/data/nmdc:mga0mfxf90/qa/nmdc_mga0mfxf90_filtered.fastq.gz", - "md5_checksum": "60f03b815160b29125c2bd0776a330bf", - "id": "nmdc:60f03b815160b29125c2bd0776a330bf", - "file_size_bytes": 2019434951 - }, - { - "name": "Gp0127652_Filtered Stats", - "description": "Filtered Stats for Gp0127652", - "data_object_type": "QC Statistics", - "url": "https://data.microbiomedata.org/data/nmdc:mga0mfxf90/qa/nmdc_mga0mfxf90_filterStats.txt", - "md5_checksum": "c40fa552711f6b19130b2a559f2d4cdc", - "id": "nmdc:c40fa552711f6b19130b2a559f2d4cdc", - "file_size_bytes": 290 - }, - { - "name": "Gp0127652_Gottcha2 TSV report", - "description": "Gottcha2 TSV report for Gp0127652", - "url": "https://data.microbiomedata.org/data/nmdc:mga0mfxf90/ReadbasedAnalysis/nmdc_mga0mfxf90_gottcha2_report.tsv", - "md5_checksum": "70f29a321c925cfc0e2003515f708400", - "id": "nmdc:70f29a321c925cfc0e2003515f708400", - "file_size_bytes": 1524 - }, - { - "name": "Gp0127652_Gottcha2 full TSV report", - "description": "Gottcha2 full TSV report for Gp0127652", - "url": "https://data.microbiomedata.org/data/nmdc:mga0mfxf90/ReadbasedAnalysis/nmdc_mga0mfxf90_gottcha2_report_full.tsv", - "md5_checksum": "93d5419c0b31e0696ab8ffef477945fb", - "id": "nmdc:93d5419c0b31e0696ab8ffef477945fb", - "file_size_bytes": 670250 - }, - { - "name": "Gp0127652_Gottcha2 Krona HTML report", - "description": "Gottcha2 Krona HTML report for Gp0127652", - "data_object_type": "GOTTCHA2 Krona Plot", - "url": "https://data.microbiomedata.org/data/nmdc:mga0mfxf90/ReadbasedAnalysis/nmdc_mga0mfxf90_gottcha2_krona.html", - "md5_checksum": "9cd3b2939adabd809741ae6a84260266", - "id": "nmdc:9cd3b2939adabd809741ae6a84260266", - "file_size_bytes": 229949 - }, - { - "name": "Gp0127652_Centrifuge classification TSV report", - "description": "Centrifuge classification TSV report for Gp0127652", - "data_object_type": "Centrifuge Taxonomic Classification", - "url": "https://data.microbiomedata.org/data/nmdc:mga0mfxf90/ReadbasedAnalysis/nmdc_mga0mfxf90_centrifuge_classification.tsv", - "md5_checksum": "acea91fced8993a40cf1eb9cda29c4cd", - "id": "nmdc:acea91fced8993a40cf1eb9cda29c4cd", - "file_size_bytes": 1814515284 - }, - { - "name": "Gp0127652_Centrifuge TSV report", - "description": "Centrifuge TSV report for Gp0127652", - "data_object_type": "Centrifuge Classification Report", - "url": "https://data.microbiomedata.org/data/nmdc:mga0mfxf90/ReadbasedAnalysis/nmdc_mga0mfxf90_centrifuge_report.tsv", - "md5_checksum": "b623a0d3bdff34fb97530c74bb558aeb", - "id": "nmdc:b623a0d3bdff34fb97530c74bb558aeb", - "file_size_bytes": 253730 - }, - { - "name": "Gp0127652_Centrifuge Krona HTML report", - "description": "Centrifuge Krona HTML report for Gp0127652", - "data_object_type": "Centrifuge Krona Plot", - "url": "https://data.microbiomedata.org/data/nmdc:mga0mfxf90/ReadbasedAnalysis/nmdc_mga0mfxf90_centrifuge_krona.html", - "md5_checksum": "e461b2e81a22514fcd691caeaa7b0ca1", - "id": "nmdc:e461b2e81a22514fcd691caeaa7b0ca1", - "file_size_bytes": 2330558 - }, - { - "name": "Gp0127652_Kraken classification TSV report", - "description": "Kraken classification TSV report for Gp0127652", - "data_object_type": "Kraken2 Taxonomic Classification", - "url": "https://data.microbiomedata.org/data/nmdc:mga0mfxf90/ReadbasedAnalysis/nmdc_mga0mfxf90_kraken2_classification.tsv", - "md5_checksum": "38b7c63d0157f8bf4316f4295f0e6e28", - "id": "nmdc:38b7c63d0157f8bf4316f4295f0e6e28", - "file_size_bytes": 1445957300 - }, - { - "name": "Gp0127652_Kraken2 TSV report", - "description": "Kraken2 TSV report for Gp0127652", - "data_object_type": "Kraken2 Classification Report", - "url": "https://data.microbiomedata.org/data/nmdc:mga0mfxf90/ReadbasedAnalysis/nmdc_mga0mfxf90_kraken2_report.tsv", - "md5_checksum": "be0c2bc71cefcb0f0a23d270d047f30c", - "id": "nmdc:be0c2bc71cefcb0f0a23d270d047f30c", - "file_size_bytes": 639677 - }, - { - "name": "Gp0127652_Kraken2 Krona HTML report", - "description": "Kraken2 Krona HTML report for Gp0127652", - "data_object_type": "Kraken2 Krona Plot", - "url": "https://data.microbiomedata.org/data/nmdc:mga0mfxf90/ReadbasedAnalysis/nmdc_mga0mfxf90_kraken2_krona.html", - "md5_checksum": "1df4b479c887b43319d89cc80dc35239", - "id": "nmdc:1df4b479c887b43319d89cc80dc35239", - "file_size_bytes": 3991377 - }, - { - "name": "Gp0127652_Assembled contigs fasta", - "description": "Assembled contigs fasta for Gp0127652", - "data_object_type": "Assembly Contigs", - "url": "https://data.microbiomedata.org/data/nmdc:mga0mfxf90/assembly/nmdc_mga0mfxf90_contigs.fna", - "md5_checksum": "a550eb6e614b375c1089ab816163ea63", - "id": "nmdc:a550eb6e614b375c1089ab816163ea63", - "file_size_bytes": 117075841 - }, - { - "name": "Gp0127652_Assembled scaffold fasta", - "description": "Assembled scaffold fasta for Gp0127652", - "data_object_type": "Assembly Scaffolds", - "url": "https://data.microbiomedata.org/data/nmdc:mga0mfxf90/assembly/nmdc_mga0mfxf90_scaffolds.fna", - "md5_checksum": "9f194d271c352af3f68f2afeb1dbd499", - "id": "nmdc:9f194d271c352af3f68f2afeb1dbd499", - "file_size_bytes": 116423675 - }, - { - "name": "Gp0127652_Metagenome Contig Coverage Stats", - "description": "Metagenome Contig Coverage Stats for Gp0127652", - "url": "https://data.microbiomedata.org/data/nmdc:mga0mfxf90/assembly/nmdc_mga0mfxf90_covstats.txt", - "md5_checksum": "b0a79069110825cfe5525a8fc4f02cb6", - "id": "nmdc:b0a79069110825cfe5525a8fc4f02cb6", - "file_size_bytes": 17141637 - }, - { - "name": "Gp0127652_Assembled AGP file", - "description": "Assembled AGP file for Gp0127652", - "data_object_type": "Assembly AGP", - "url": "https://data.microbiomedata.org/data/nmdc:mga0mfxf90/assembly/nmdc_mga0mfxf90_assembly.agp", - "md5_checksum": "f54e8bda482b1cb8bc8e121ee5f39e07", - "id": "nmdc:f54e8bda482b1cb8bc8e121ee5f39e07", - "file_size_bytes": 16044279 - }, - { - "name": "Gp0127652_Metagenome Alignment BAM file", - "description": "Metagenome Alignment BAM file for Gp0127652", - "data_object_type": "Assembly Coverage BAM", - "url": "https://data.microbiomedata.org/data/nmdc:mga0mfxf90/assembly/nmdc_mga0mfxf90_pairedMapped_sorted.bam", - "md5_checksum": "c8c5056ee57126695073137d0c1d3d04", - "id": "nmdc:c8c5056ee57126695073137d0c1d3d04", - "file_size_bytes": 2224050507 - }, - { - "name": "Gp0127652_Protein FAA", - "description": "Protein FAA for Gp0127652", - "data_object_type": "Annotation Amino Acid FASTA", - "url": "https://data.microbiomedata.org/data/nmdc:mga0mfxf90/annotation/nmdc_mga0mfxf90_proteins.faa", - "md5_checksum": "096c54bce5ec1cc5d41ac64553e42cb3", - "id": "nmdc:096c54bce5ec1cc5d41ac64553e42cb3", - "file_size_bytes": 66555768 - }, - { - "name": "Gp0127652_Structural annotation GFF file", - "description": "Structural annotation GFF file for Gp0127652", - "data_object_type": "Structural Annotation GFF", - "url": "https://data.microbiomedata.org/data/nmdc:mga0mfxf90/annotation/nmdc_mga0mfxf90_structural_annotation.gff", - "md5_checksum": "ac8cd253a39e6e5fe0a0930f3bf6888a", - "id": "nmdc:ac8cd253a39e6e5fe0a0930f3bf6888a", - "file_size_bytes": 2521 - }, - { - "name": "Gp0127652_Functional annotation GFF file", - "description": "Functional annotation GFF file for Gp0127652", - "data_object_type": "Functional Annotation GFF", - "url": "https://data.microbiomedata.org/data/nmdc:mga0mfxf90/annotation/nmdc_mga0mfxf90_functional_annotation.gff", - "md5_checksum": "863dc502676573c59ce69b1ff786042a", - "id": "nmdc:863dc502676573c59ce69b1ff786042a", - "file_size_bytes": 74520486 - }, - { - "name": "Gp0127652_KO TSV file", - "description": "KO TSV file for Gp0127652", - "data_object_type": "Annotation KEGG Orthology", - "url": "https://data.microbiomedata.org/data/nmdc:mga0mfxf90/annotation/nmdc_mga0mfxf90_ko.tsv", - "md5_checksum": "28ed2a9e345d0e542127fd1dc2173ae7", - "id": "nmdc:28ed2a9e345d0e542127fd1dc2173ae7", - "file_size_bytes": 8379185 - }, - { - "name": "Gp0127652_EC TSV file", - "description": "EC TSV file for Gp0127652", - "data_object_type": "Annotation Enzyme Commission", - "url": "https://data.microbiomedata.org/data/nmdc:mga0mfxf90/annotation/nmdc_mga0mfxf90_ec.tsv", - "md5_checksum": "a826d96e791f69ff7759d57f44a8a510", - "id": "nmdc:a826d96e791f69ff7759d57f44a8a510", - "file_size_bytes": 5555311 - }, - { - "name": "Gp0127652_COG GFF file", - "description": "COG GFF file for Gp0127652", - "url": "https://data.microbiomedata.org/data/nmdc:mga0mfxf90/annotation/nmdc_mga0mfxf90_cog.gff", - "md5_checksum": "58e310990be01a574eef05b3f5dd1495", - "id": "nmdc:58e310990be01a574eef05b3f5dd1495", - "file_size_bytes": 43385646 - }, - { - "name": "Gp0127652_PFAM GFF file", - "description": "PFAM GFF file for Gp0127652", - "url": "https://data.microbiomedata.org/data/nmdc:mga0mfxf90/annotation/nmdc_mga0mfxf90_pfam.gff", - "md5_checksum": "28ce5c4c605a1c4538ce63987252c0ad", - "id": "nmdc:28ce5c4c605a1c4538ce63987252c0ad", - "file_size_bytes": 33061709 - }, - { - "name": "Gp0127652_TigrFam GFF file", - "description": "TigrFam GFF file for Gp0127652", - "url": "https://data.microbiomedata.org/data/nmdc:mga0mfxf90/annotation/nmdc_mga0mfxf90_tigrfam.gff", - "md5_checksum": "6de9ddf0b07c9bcf1409aceb7ee2f941", - "id": "nmdc:6de9ddf0b07c9bcf1409aceb7ee2f941", - "file_size_bytes": 3665042 - }, - { - "name": "Gp0127652_SMART GFF file", - "description": "SMART GFF file for Gp0127652", - "url": "https://data.microbiomedata.org/data/nmdc:mga0mfxf90/annotation/nmdc_mga0mfxf90_smart.gff", - "md5_checksum": "6342c9c98e297d2e39a2144c7ca0191b", - "id": "nmdc:6342c9c98e297d2e39a2144c7ca0191b", - "file_size_bytes": 9667737 - }, - { - "name": "Gp0127652_SuperFam GFF file", - "description": "SuperFam GFF file for Gp0127652", - "url": "https://data.microbiomedata.org/data/nmdc:mga0mfxf90/annotation/nmdc_mga0mfxf90_supfam.gff", - "md5_checksum": "d20aa781d3ad6b0face7cc9c412bc3f7", - "id": "nmdc:d20aa781d3ad6b0face7cc9c412bc3f7", - "file_size_bytes": 54593577 - }, - { - "name": "Gp0127652_Cath FunFam GFF file", - "description": "Cath FunFam GFF file for Gp0127652", - "url": "https://data.microbiomedata.org/data/nmdc:mga0mfxf90/annotation/nmdc_mga0mfxf90_cath_funfam.gff", - "md5_checksum": "db2e4b8f6cc1e8dc934e14b93589805a", - "id": "nmdc:db2e4b8f6cc1e8dc934e14b93589805a", - "file_size_bytes": 41409254 - }, - { - "name": "Gp0127652_KO_EC GFF file", - "description": "KO_EC GFF file for Gp0127652", - "url": "https://data.microbiomedata.org/data/nmdc:mga0mfxf90/annotation/nmdc_mga0mfxf90_ko_ec.gff", - "md5_checksum": "f51f9d679d1b045f4ebc61dab7fc2f08", - "id": "nmdc:f51f9d679d1b045f4ebc61dab7fc2f08", - "file_size_bytes": 26617726 - }, - { - "name": "gold:Gp0452679_metabat2 bin checkm quality assessment result", - "description": "metabat2 bin checkm quality assessment result for gold:Gp0452679", - "data_object_type": "CheckM Statistics", - "url": "https://data.microbiomedata.org/data/nmdc:mga0fsjy84/MAGs/nmdc_mga0fsjy84_checkm_qa.out", - "md5_checksum": "d41d8cd98f00b204e9800998ecf8427e", - "id": "nmdc:d41d8cd98f00b204e9800998ecf8427e", - "file_size_bytes": 0 - }, - { - "name": "Gp0127652_tooShort (< 3kb) filtered contigs fasta file by metaBat2", - "description": "tooShort (< 3kb) filtered contigs fasta file by metaBat2 for Gp0127652", - "url": "https://data.microbiomedata.org/data/nmdc:mga0mfxf90/MAGs/nmdc_mga0mfxf90_bins.tooShort.fa", - "md5_checksum": "4371932b5834f2deadb2fbfc42b056f7", - "id": "nmdc:4371932b5834f2deadb2fbfc42b056f7", - "file_size_bytes": 89154072 - }, - { - "name": "Gp0127652_unbinned fasta file from metabat2", - "description": "unbinned fasta file from metabat2 for Gp0127652", - "url": "https://data.microbiomedata.org/data/nmdc:mga0mfxf90/MAGs/nmdc_mga0mfxf90_bins.unbinned.fa", - "md5_checksum": "5a8d8441e6e472837809ee31d517d32a", - "id": "nmdc:5a8d8441e6e472837809ee31d517d32a", - "file_size_bytes": 24514353 - }, - { - "name": "Gp0127652_metabat2 bin checkm quality assessment result", - "description": "metabat2 bin checkm quality assessment result for Gp0127652", - "data_object_type": "CheckM Statistics", - "url": "https://data.microbiomedata.org/data/nmdc:mga0mfxf90/MAGs/nmdc_mga0mfxf90_checkm_qa.out", - "md5_checksum": "16016a7b2388048eec469f73395bc478", - "id": "nmdc:16016a7b2388048eec469f73395bc478", - "file_size_bytes": 1320 - }, - { - "name": "Gp0127652_high-quality and medium-quality bins", - "description": "high-quality and medium-quality bins for Gp0127652", - "data_object_type": "Metagenome Bins", - "url": "https://data.microbiomedata.org/data/nmdc:mga0mfxf90/MAGs/nmdc_mga0mfxf90_hqmq_bin.zip", - "md5_checksum": "1e604f9f29f74c6169c4d27f839bb7b0", - "id": "nmdc:1e604f9f29f74c6169c4d27f839bb7b0", - "file_size_bytes": 182 - }, - { - "name": "Gp0127652_metabat2 bins", - "description": "metabat2 bins for Gp0127652", - "url": "https://data.microbiomedata.org/data/nmdc:mga0mfxf90/MAGs/nmdc_mga0mfxf90_metabat_bin.zip", - "md5_checksum": "21467369d04671628ae67afbaf1d2076", - "id": "nmdc:21467369d04671628ae67afbaf1d2076", - "file_size_bytes": 1013750 - }, - { - "_id": { - "$oid": "649b003d1ae706d7b5b14ece" - }, - "description": "Metagenome Contig Coverage Stats for gold:Gp0127652", - "url": "https://data.microbiomedata.org/data/1781_100354/assembly/mapping_stats.txt", - "file_size_bytes": 16276629, - "type": "nmdc:DataObject", - "id": "nmdc:23dcbb19af7db7cda8f06a1b375f12bb", - "name": "mapping_stats.txt", - "data_object_type": "Assembly Coverage Stats" - }, - { - "_id": { - "$oid": "649b003d1ae706d7b5b14ecf" - }, - "description": "Assembled scaffold fasta for gold:Gp0127652", - "url": "https://data.microbiomedata.org/data/1781_100354/assembly/assembly_scaffolds.fna", - "file_size_bytes": 115559491, - "type": "nmdc:DataObject", - "id": "nmdc:880b4e3e1b337def43f9dc694227eb50", - "name": "assembly_scaffolds.fna", - "data_object_type": "Assembly Scaffolds" - }, - { - "_id": { - "$oid": "649b003d1ae706d7b5b14ed0" - }, - "description": "Assembled contigs fasta for gold:Gp0127652", - "url": "https://data.microbiomedata.org/data/1781_100354/assembly/assembly_contigs.fna", - "file_size_bytes": 116210833, - "type": "nmdc:DataObject", - "id": "nmdc:e8bc7228a422a7c1a2641276ee3f6e37", - "name": "assembly_contigs.fna", - "data_object_type": "Assembly Contigs" - }, - { - "_id": { - "$oid": "649b003d1ae706d7b5b14ed1" - }, - "description": "Metagenome Alignment BAM file for gold:Gp0127652", - "url": "https://data.microbiomedata.org/data/1781_100354/assembly/pairedMapped_sorted.bam", - "file_size_bytes": 2194603382, - "type": "nmdc:DataObject", - "id": "nmdc:8f74962a51f82e4cebc78b6ac49dee49", - "name": "pairedMapped_sorted.bam", - "data_object_type": "Assembly Coverage BAM" - }, - { - "_id": { - "$oid": "649b003d1ae706d7b5b14eda" - }, - "description": "Assembled AGP file for gold:Gp0127652", - "url": "https://data.microbiomedata.org/data/1781_100354/assembly/assembly.agp", - "file_size_bytes": 14312615, - "type": "nmdc:DataObject", - "id": "nmdc:6a251e6317c4450686a6215b61cd85d1", - "name": "assembly.agp", - "data_object_type": "Assembly AGP" - }, - { - "_id": { - "$oid": "649b003d1ae706d7b5b15bde" - }, - "id": "nmdc:a29b48c9962bc2acbf5d7e1b5a8e3a41", - "name": "1781_100354.krona.html", - "description": "Gold:Gp0127652 KRONA plot HTML file", - "url": "https://data.microbiomedata.org/1781_100354/ReadbasedAnalysis/centrifuge/1781_100354.krona.html", - "file_size_bytes": 3385, - "data_object_type": "Centrifuge Krona Plot", - "type": "nmdc:DataObject" - }, - { - "_id": { - "$oid": "649b003d1ae706d7b5b15be2" - }, - "id": "nmdc:966945bb7952a4629efc713c78ef927f", - "name": "1781_100354.json", - "description": "Gold:Gp0127652 ReadbasedAnalysis result JSON file", - "url": "https://data.microbiomedata.org/1781_100354/ReadbasedAnalysis/1781_100354.json", - "file_size_bytes": 3385, - "type": "nmdc:DataObject" - }, - { - "_id": { - "$oid": "649b003f1ae706d7b5b16637" - }, - "id": "nmdc:0327492db7a99ab0fb672213e49e2f84", - "name": "bins.tooShort.fa", - "description": "tooShort (< 3kb) filtered contigs fasta file by metaBat2 for gold:Gp0127652", - "file_size_bytes": 86634430, - "url": "https://data.microbiomedata.org/data/1781_100354/img_MAGs/bins.tooShort.fa", - "type": "nmdc:DataObject" - }, - { - "_id": { - "$oid": "649b003f1ae706d7b5b1663c" - }, - "id": "nmdc:376fce40c578be064e55103093f99f66", - "name": "gold:Gp0127652.bins.2.fa", - "description": "metabat2 binned contig file for gold:Gp0127652", - "file_size_bytes": 254409, - "url": "https://data.microbiomedata.org/data/1781_100354/img_MAGs/metabat-bins/bins.2.fa", - "type": "nmdc:DataObject", - "data_object_type": "Metagenome Bins" - }, - { - "_id": { - "$oid": "649b003f1ae706d7b5b1663d" - }, - "id": "nmdc:a28fa4b0897b1eae6d10053c47d07319", - "name": "checkm_qa.out", - "description": "metabat2 bin checkm quality assessment result for gold:Gp0127652", - "file_size_bytes": 1148, - "url": "https://data.microbiomedata.org/data/1781_100354/img_MAGs/checkm_qa.out", - "type": "nmdc:DataObject", - "data_object_type": "CheckM Statistics" - }, - { - "_id": { - "$oid": "649b003f1ae706d7b5b1663e" - }, - "id": "nmdc:b7511c0e296be199db386a3ae4181e45", - "name": "bins.unbinned.fa", - "description": "unbinned fasta file from metabat2 for gold:Gp0127652", - "file_size_bytes": 26455665, - "url": "https://data.microbiomedata.org/data/1781_100354/img_MAGs/bins.unbinned.fa", - "type": "nmdc:DataObject" - }, - { - "_id": { - "$oid": "649b003f1ae706d7b5b1663f" - }, - "id": "nmdc:1946e92c1dfeddcf766605d2f6227934", - "name": "gold:Gp0127652.bins.3.fa", - "description": "metabat2 binned contig file for gold:Gp0127652", - "file_size_bytes": 340274, - "url": "https://data.microbiomedata.org/data/1781_100354/img_MAGs/metabat-bins/bins.3.fa", - "type": "nmdc:DataObject", - "data_object_type": "Metagenome Bins" - }, - { - "_id": { - "$oid": "649b003f1ae706d7b5b16640" - }, - "id": "nmdc:794cbd38c4fe3d18faf5ceb5f543de61", - "name": "gold:Gp0127652.bins.1.fa", - "description": "metabat2 binned contig file for gold:Gp0127652", - "file_size_bytes": 691252, - "url": "https://data.microbiomedata.org/data/1781_100354/img_MAGs/metabat-bins/bins.1.fa", - "type": "nmdc:DataObject", - "data_object_type": "Metagenome Bins" - }, - { - "_id": { - "$oid": "649b00401ae706d7b5b16d47" - }, - "description": "EC TSV File for gold:Gp0127652", - "url": "https://data.microbiomedata.org/1781_100354/img_annotation/Ga0482219_ec.tsv", - "md5_checksum": "06ceb99673dcb924ca223539267a962a", - "file_size_bytes": 3385, - "id": "nmdc:06ceb99673dcb924ca223539267a962a", - "name": "gold:Gp0127652_EC TSV File", - "data_object_type": "Annotation Enzyme Commission", - "type": "nmdc:DataObject" - }, - { - "_id": { - "$oid": "649b00401ae706d7b5b16d49" - }, - "description": "KO TSV File for gold:Gp0127652", - "url": "https://data.microbiomedata.org/1781_100354/img_annotation/Ga0482219_ko.tsv", - "md5_checksum": "4d16f813aefc09c7720770f065964c49", - "file_size_bytes": 3385, - "id": "nmdc:4d16f813aefc09c7720770f065964c49", - "name": "gold:Gp0127652_KO TSV File", - "data_object_type": "Annotation KEGG Orthology", - "type": "nmdc:DataObject" - }, - { - "_id": { - "$oid": "649b00401ae706d7b5b16d4a" - }, - "description": "Structural annotation GFF file for gold:Gp0127652", - "url": "https://data.microbiomedata.org/1781_100354/img_annotation/Ga0482219_structural_annotation.gff", - "md5_checksum": "6b39045cb99ca6220e27c4fa960f4dd1", - "file_size_bytes": 3385, - "id": "nmdc:6b39045cb99ca6220e27c4fa960f4dd1", - "name": "gold:Gp0127652_Structural annotation GFF file", - "data_object_type": "Structural Annotation GFF", - "type": "nmdc:DataObject" - }, - { - "_id": { - "$oid": "649b00401ae706d7b5b16d4b" - }, - "description": "Functional annotation GFF file for gold:Gp0127652", - "url": "https://data.microbiomedata.org/1781_100354/img_annotation/Ga0482219_functional_annotation.gff", - "md5_checksum": "80c28fa3efc78e6d23d0abcf1161c983", - "file_size_bytes": 3385, - "id": "nmdc:80c28fa3efc78e6d23d0abcf1161c983", - "name": "gold:Gp0127652_Functional annotation GFF file", - "data_object_type": "Functional Annotation GFF", - "type": "nmdc:DataObject" - }, - { - "_id": { - "$oid": "649b00401ae706d7b5b16d4e" - }, - "description": "Protein FAA for gold:Gp0127652", - "url": "https://data.microbiomedata.org/1781_100354/img_annotation/Ga0482219_proteins.faa", - "md5_checksum": "48bb698de57cd77bf1ddda9004e89c01", - "file_size_bytes": 3385, - "id": "nmdc:48bb698de57cd77bf1ddda9004e89c01", - "name": "gold:Gp0127652_Protein FAA", - "data_object_type": "Annotation Amino Acid FASTA", - "type": "nmdc:DataObject" - } - ], - "dissolving_activity_set": [], - "functional_annotation_set": [], - "genome_feature_set": [], - "mags_activity_set": [ - { - "_id": { - "$oid": "649b0052ec087f6bbab34727" - }, - "has_input": [ - "nmdc:a550eb6e614b375c1089ab816163ea63", - "nmdc:c8c5056ee57126695073137d0c1d3d04", - "nmdc:863dc502676573c59ce69b1ff786042a" - ], - "too_short_contig_num": 200309, - "part_of": [ - "nmdc:mga0mfxf90" - ], - "binned_contig_num": 835, - "git_url": "https://github.com/microbiomedata/metaG/releases/tag/0.1", - "has_output": [ - "nmdc:d41d8cd98f00b204e9800998ecf8427e", - "nmdc:4371932b5834f2deadb2fbfc42b056f7", - "nmdc:5a8d8441e6e472837809ee31d517d32a", - "nmdc:16016a7b2388048eec469f73395bc478", - "nmdc:1e604f9f29f74c6169c4d27f839bb7b0", - "nmdc:21467369d04671628ae67afbaf1d2076" - ], - "was_informed_by": "gold:Gp0127652", - "input_contig_num": 216252, - "id": "nmdc:c86126b11f214f19721c56fadf91d87c", - "execution_resource": "NERSC-Cori", - "name": "MAGs Analysis Activity for nmdc:mga0mfxf90", - "mags_list": [ - { - "number_of_contig": 233, - "completeness": 12.16, - "bin_name": "bins.1", - "gene_count": 1133, - "bin_quality": "LQ", - "gtdbtk_species": "", - "gtdbtk_order": "", - "num_16s": 0, - "gtdbtk_family": "", - "gtdbtk_domain": "", - "contamination": 0.0, - "gtdbtk_class": "", - "gtdbtk_phylum": "", - "num_5s": 0, - "num_23s": 0, - "gtdbtk_genus": "", - "num_t_rna": 10 - }, - { - "number_of_contig": 349, - "completeness": 45.68, - "bin_name": "bins.2", - "gene_count": 1809, - "bin_quality": "LQ", - "gtdbtk_species": "", - "gtdbtk_order": "", - "num_16s": 0, - "gtdbtk_family": "", - "gtdbtk_domain": "", - "contamination": 10.0, - "gtdbtk_class": "", - "gtdbtk_phylum": "", - "num_5s": 1, - "num_23s": 1, - "gtdbtk_genus": "", - "num_t_rna": 16 - }, - { - "number_of_contig": 106, - "completeness": 17.54, - "bin_name": "bins.3", - "gene_count": 552, - "bin_quality": "LQ", - "gtdbtk_species": "", - "gtdbtk_order": "", - "num_16s": 0, - "gtdbtk_family": "", - "gtdbtk_domain": "", - "contamination": 0.0, - "gtdbtk_class": "", - "gtdbtk_phylum": "", - "num_5s": 0, - "num_23s": 0, - "gtdbtk_genus": "", - "num_t_rna": 12 - }, - { - "number_of_contig": 147, - "completeness": 14.66, - "bin_name": "bins.4", - "gene_count": 668, - "bin_quality": "LQ", - "gtdbtk_species": "", - "gtdbtk_order": "", - "num_16s": 0, - "gtdbtk_family": "", - "gtdbtk_domain": "", - "contamination": 0.0, - "gtdbtk_class": "", - "gtdbtk_phylum": "", - "num_5s": 0, - "num_23s": 0, - "gtdbtk_genus": "", - "num_t_rna": 5 - } - ], - "unbinned_contig_num": 15108, - "started_at_time": "2021-10-11T02:27:08Z", - "type": "nmdc:MAGsAnalysisActivity", - "ended_at_time": "2021-10-11T04:45:21+00:00" - } - ], - "material_sample_set": [], - "material_sampling_activity_set": [], - "metabolomics_analysis_activity_set": [], - "metagenome_annotation_activity_set": [ - { - "_id": { - "$oid": "649b005bbf2caae0415ef9c3" - }, - "has_input": [ - "nmdc:a550eb6e614b375c1089ab816163ea63" - ], - "part_of": [ - "nmdc:mga0mfxf90" - ], - "git_url": "https://github.com/microbiomedata/metaG/releases/tag/0.1", - "has_output": [ - "nmdc:096c54bce5ec1cc5d41ac64553e42cb3", - "nmdc:ac8cd253a39e6e5fe0a0930f3bf6888a", - "nmdc:863dc502676573c59ce69b1ff786042a", - "nmdc:28ed2a9e345d0e542127fd1dc2173ae7", - "nmdc:a826d96e791f69ff7759d57f44a8a510", - "nmdc:58e310990be01a574eef05b3f5dd1495", - "nmdc:28ce5c4c605a1c4538ce63987252c0ad", - "nmdc:6de9ddf0b07c9bcf1409aceb7ee2f941", - "nmdc:6342c9c98e297d2e39a2144c7ca0191b", - "nmdc:d20aa781d3ad6b0face7cc9c412bc3f7", - "nmdc:db2e4b8f6cc1e8dc934e14b93589805a", - "nmdc:f51f9d679d1b045f4ebc61dab7fc2f08" - ], - "was_informed_by": "gold:Gp0127652", - "id": "nmdc:c86126b11f214f19721c56fadf91d87c", - "execution_resource": "NERSC-Cori", - "name": "Annotation Activity for nmdc:mga0mfxf90", - "started_at_time": "2021-10-11T02:27:08Z", - "type": "nmdc:MetagenomeAnnotationActivity", - "ended_at_time": "2021-10-11T04:45:21+00:00" - } - ], - "metagenome_assembly_set": [ - { - "_id": { - "$oid": "649b005f2ca5ee4adb139fb1" - }, - "has_input": [ - "nmdc:60f03b815160b29125c2bd0776a330bf" - ], - "part_of": [ - "nmdc:mga0mfxf90" - ], - "ctg_logsum": 293195, - "scaf_logsum": 294510, - "gap_pct": 0.0019, - "git_url": "https://github.com/microbiomedata/metaG/releases/tag/0.1", - "has_output": [ - "nmdc:a550eb6e614b375c1089ab816163ea63", - "nmdc:9f194d271c352af3f68f2afeb1dbd499", - "nmdc:b0a79069110825cfe5525a8fc4f02cb6", - "nmdc:f54e8bda482b1cb8bc8e121ee5f39e07", - "nmdc:c8c5056ee57126695073137d0c1d3d04" - ], - "asm_score": 3.266, - "was_informed_by": "gold:Gp0127652", - "ctg_powsum": 31744, - "scaf_max": 16883, - "id": "nmdc:c86126b11f214f19721c56fadf91d87c", - "scaf_powsum": 31903, - "execution_resource": "NERSC-Cori", - "contigs": 216252, - "name": "Assembly Activity for nmdc:mga0mfxf90", - "ctg_max": 16883, - "gc_std": 0.09516, - "contig_bp": 108575090, - "gc_avg": 0.63494, - "started_at_time": "2021-10-11T02:27:08Z", - "scaf_bp": 108577150, - "type": "nmdc:MetagenomeAssembly", - "scaffolds": 216046, - "ended_at_time": "2021-10-11T04:45:21+00:00", - "ctg_l50": 493, - "ctg_l90": 290, - "ctg_n50": 57034, - "ctg_n90": 179762, - "scaf_l50": 493, - "scaf_l90": 290, - "scaf_n50": 56962, - "scaf_n90": 179563 - } - ], - "metagenome_sequencing_activity_set": [], - "metaproteomics_analysis_activity_set": [], - "metatranscriptome_activity_set": [], - "nom_analysis_activity_set": [], - "omics_processing_set": [ - { - "_id": { - "$oid": "649b009773e8249959349b5f" - }, - "id": "nmdc:omprc-11-j43hz774", - "name": "Riverbed sediment microbial communities from areas with vegetation nearby in Columbia River, Washington, USA - GW-RW S3_50_60", - "description": "Riverbed sediment samples were collected from an area with a lot of surrounding vegetation in a hyporheic zone (subsurface groundwater - surface water mixing zone)", - "has_input": [ - "nmdc:bsm-11-xngecc18" - ], - "has_output": [ - "jgi:574fe0af7ded5e3df1ee1493" - ], - "part_of": [ - "nmdc:sty-11-aygzgv51" - ], - "add_date": "2016-01-11", - "mod_date": "2021-06-15", - "ncbi_project_name": "Riverbed sediment microbial communities from areas with vegetation nearby in Columbia River, Washington, USA - GW-RW S3_50_60", - "omics_type": { - "has_raw_value": "Metagenome" - }, - "principal_investigator": { - "has_raw_value": "James Stegen" - }, - "processing_institution": "JGI", - "type": "nmdc:OmicsProcessing", - "gold_sequencing_project_identifiers": [ - "gold:Gp0127652" - ] - } - ], - "reaction_activity_set": [], - "read_qc_analysis_activity_set": [ - { - "_id": { - "$oid": "649b009d6bdd4fd20273c87e" - }, - "has_input": [ - "nmdc:b0548475f69b48e2d150cb90ae27f2c6" - ], - "part_of": [ - "nmdc:mga0mfxf90" - ], - "git_url": "https://github.com/microbiomedata/metaG/releases/tag/0.1", - "has_output": [ - "nmdc:60f03b815160b29125c2bd0776a330bf", - "nmdc:c40fa552711f6b19130b2a559f2d4cdc" - ], - "was_informed_by": "gold:Gp0127652", - "input_read_count": 26604768, - "output_read_bases": 3697162034, - "id": "nmdc:c86126b11f214f19721c56fadf91d87c", - "execution_resource": "NERSC-Cori", - "input_read_bases": 4017319968, - "name": "Read QC Activity for nmdc:mga0mfxf90", - "output_read_count": 24717950, - "started_at_time": "2021-10-11T02:27:08Z", - "type": "nmdc:ReadQCAnalysisActivity", - "ended_at_time": "2021-10-11T04:45:21+00:00" - } - ], - "read_based_taxonomy_analysis_activity_set": [ - { - "_id": { - "$oid": "649b009bff710ae353f8cf40" - }, - "has_input": [ - "nmdc:60f03b815160b29125c2bd0776a330bf" - ], - "git_url": "https://github.com/microbiomedata/metaG/releases/tag/0.1", - "has_output": [ - "nmdc:70f29a321c925cfc0e2003515f708400", - "nmdc:93d5419c0b31e0696ab8ffef477945fb", - "nmdc:9cd3b2939adabd809741ae6a84260266", - "nmdc:acea91fced8993a40cf1eb9cda29c4cd", - "nmdc:b623a0d3bdff34fb97530c74bb558aeb", - "nmdc:e461b2e81a22514fcd691caeaa7b0ca1", - "nmdc:38b7c63d0157f8bf4316f4295f0e6e28", - "nmdc:be0c2bc71cefcb0f0a23d270d047f30c", - "nmdc:1df4b479c887b43319d89cc80dc35239" - ], - "was_informed_by": "gold:Gp0127652", - "id": "nmdc:c86126b11f214f19721c56fadf91d87c", - "execution_resource": "NERSC-Cori", - "name": "ReadBased Analysis Activity for nmdc:mga0mfxf90", - "started_at_time": "2021-10-11T02:27:08Z", - "type": "nmdc:ReadbasedAnalysis", - "ended_at_time": "2021-10-11T04:45:21+00:00" - } - ], - "study_set": [], - "field_research_site_set": [], - "collecting_biosamples_from_site_set": [], - "date_created": null, - "etl_software_version": null, - "pooling_set": [] - }, - { - "functional_annotation_agg": [], - "library_preparation_set": [], - "processed_sample_set": [], - "extraction_set": [], - "activity_set": [], - "biosample_set": [], - "data_object_set": [ - { - "description": "Raw sequencer read data", - "file_size_bytes": 2711112988, - "type": "nmdc:DataObject", - "id": "jgi:574fe0b17ded5e3df1ee1494", - "name": "10533.3.165334.TGACTGA-GTCAGTC.fastq.gz" - }, - { - "name": "Gp0127654_Filtered Reads", - "description": "Filtered Reads for Gp0127654", - "data_object_type": "Filtered Sequencing Reads", - "url": "https://data.microbiomedata.org/data/nmdc:mga0h0s362/qa/nmdc_mga0h0s362_filtered.fastq.gz", - "md5_checksum": "c4f29a07f3ce03ee2a2d11c90e8b43d6", - "id": "nmdc:c4f29a07f3ce03ee2a2d11c90e8b43d6", - "file_size_bytes": 2479437709 - }, - { - "name": "Gp0127654_Filtered Stats", - "description": "Filtered Stats for Gp0127654", - "data_object_type": "QC Statistics", - "url": "https://data.microbiomedata.org/data/nmdc:mga0h0s362/qa/nmdc_mga0h0s362_filterStats.txt", - "md5_checksum": "9c600ec3be94d876f00d22808f3e8a59", - "id": "nmdc:9c600ec3be94d876f00d22808f3e8a59", - "file_size_bytes": 284 - }, - { - "name": "Gp0127654_Gottcha2 TSV report", - "description": "Gottcha2 TSV report for Gp0127654", - "url": "https://data.microbiomedata.org/data/nmdc:mga0h0s362/ReadbasedAnalysis/nmdc_mga0h0s362_gottcha2_report.tsv", - "md5_checksum": "130ee7559789726a2cadccd3126dacad", - "id": "nmdc:130ee7559789726a2cadccd3126dacad", - "file_size_bytes": 3508 - }, - { - "name": "Gp0127654_Gottcha2 full TSV report", - "description": "Gottcha2 full TSV report for Gp0127654", - "url": "https://data.microbiomedata.org/data/nmdc:mga0h0s362/ReadbasedAnalysis/nmdc_mga0h0s362_gottcha2_report_full.tsv", - "md5_checksum": "c955eae73afbfe1ad4c4eb2eac51f3f3", - "id": "nmdc:c955eae73afbfe1ad4c4eb2eac51f3f3", - "file_size_bytes": 798264 - }, - { - "name": "Gp0127654_Gottcha2 Krona HTML report", - "description": "Gottcha2 Krona HTML report for Gp0127654", - "data_object_type": "GOTTCHA2 Krona Plot", - "url": "https://data.microbiomedata.org/data/nmdc:mga0h0s362/ReadbasedAnalysis/nmdc_mga0h0s362_gottcha2_krona.html", - "md5_checksum": "7ccb4ee5a0728322154b29a79d13c842", - "id": "nmdc:7ccb4ee5a0728322154b29a79d13c842", - "file_size_bytes": 234834 - }, - { - "name": "Gp0127654_Centrifuge classification TSV report", - "description": "Centrifuge classification TSV report for Gp0127654", - "data_object_type": "Centrifuge Taxonomic Classification", - "url": "https://data.microbiomedata.org/data/nmdc:mga0h0s362/ReadbasedAnalysis/nmdc_mga0h0s362_centrifuge_classification.tsv", - "md5_checksum": "8b88e19f3d4f22c8bb71f66e7aec6dba", - "id": "nmdc:8b88e19f3d4f22c8bb71f66e7aec6dba", - "file_size_bytes": 2231971137 - }, - { - "name": "Gp0127654_Centrifuge TSV report", - "description": "Centrifuge TSV report for Gp0127654", - "data_object_type": "Centrifuge Classification Report", - "url": "https://data.microbiomedata.org/data/nmdc:mga0h0s362/ReadbasedAnalysis/nmdc_mga0h0s362_centrifuge_report.tsv", - "md5_checksum": "35a0d72edac6c5e7f9c8ddf86c5534e0", - "id": "nmdc:35a0d72edac6c5e7f9c8ddf86c5534e0", - "file_size_bytes": 257151 - }, - { - "name": "Gp0127654_Centrifuge Krona HTML report", - "description": "Centrifuge Krona HTML report for Gp0127654", - "data_object_type": "Centrifuge Krona Plot", - "url": "https://data.microbiomedata.org/data/nmdc:mga0h0s362/ReadbasedAnalysis/nmdc_mga0h0s362_centrifuge_krona.html", - "md5_checksum": "f808a89810cdb2a911a5b5388b70ce94", - "id": "nmdc:f808a89810cdb2a911a5b5388b70ce94", - "file_size_bytes": 2341088 - }, - { - "name": "Gp0127654_Kraken classification TSV report", - "description": "Kraken classification TSV report for Gp0127654", - "data_object_type": "Kraken2 Taxonomic Classification", - "url": "https://data.microbiomedata.org/data/nmdc:mga0h0s362/ReadbasedAnalysis/nmdc_mga0h0s362_kraken2_classification.tsv", - "md5_checksum": "dfc90170aa038c2425702be223cb2f23", - "id": "nmdc:dfc90170aa038c2425702be223cb2f23", - "file_size_bytes": 1782429285 - }, - { - "name": "Gp0127654_Kraken2 TSV report", - "description": "Kraken2 TSV report for Gp0127654", - "data_object_type": "Kraken2 Classification Report", - "url": "https://data.microbiomedata.org/data/nmdc:mga0h0s362/ReadbasedAnalysis/nmdc_mga0h0s362_kraken2_report.tsv", - "md5_checksum": "84255d3bab9ea79151db5ad7bcbc677c", - "id": "nmdc:84255d3bab9ea79151db5ad7bcbc677c", - "file_size_bytes": 661482 - }, - { - "name": "Gp0127654_Kraken2 Krona HTML report", - "description": "Kraken2 Krona HTML report for Gp0127654", - "data_object_type": "Kraken2 Krona Plot", - "url": "https://data.microbiomedata.org/data/nmdc:mga0h0s362/ReadbasedAnalysis/nmdc_mga0h0s362_kraken2_krona.html", - "md5_checksum": "1c8339d96884c4a408de7804e00490d1", - "id": "nmdc:1c8339d96884c4a408de7804e00490d1", - "file_size_bytes": 4020719 - }, - { - "name": "Gp0127654_Assembled contigs fasta", - "description": "Assembled contigs fasta for Gp0127654", - "data_object_type": "Assembly Contigs", - "url": "https://data.microbiomedata.org/data/nmdc:mga0h0s362/assembly/nmdc_mga0h0s362_contigs.fna", - "md5_checksum": "909ae2a351ab1b99dfa877969ba33fc0", - "id": "nmdc:909ae2a351ab1b99dfa877969ba33fc0", - "file_size_bytes": 93264957 - }, - { - "name": "Gp0127654_Assembled scaffold fasta", - "description": "Assembled scaffold fasta for Gp0127654", - "data_object_type": "Assembly Scaffolds", - "url": "https://data.microbiomedata.org/data/nmdc:mga0h0s362/assembly/nmdc_mga0h0s362_scaffolds.fna", - "md5_checksum": "1bd3a82d1ced0a3a4e4b207ecdeedc50", - "id": "nmdc:1bd3a82d1ced0a3a4e4b207ecdeedc50", - "file_size_bytes": 92670816 - }, - { - "name": "Gp0127654_Metagenome Contig Coverage Stats", - "description": "Metagenome Contig Coverage Stats for Gp0127654", - "url": "https://data.microbiomedata.org/data/nmdc:mga0h0s362/assembly/nmdc_mga0h0s362_covstats.txt", - "md5_checksum": "e2281ea2c0342c7243ac6a3179948547", - "id": "nmdc:e2281ea2c0342c7243ac6a3179948547", - "file_size_bytes": 15633835 - }, - { - "name": "Gp0127654_Assembled AGP file", - "description": "Assembled AGP file for Gp0127654", - "data_object_type": "Assembly AGP", - "url": "https://data.microbiomedata.org/data/nmdc:mga0h0s362/assembly/nmdc_mga0h0s362_assembly.agp", - "md5_checksum": "ad045e491d27a8a2a4bb13c62ed74fd8", - "id": "nmdc:ad045e491d27a8a2a4bb13c62ed74fd8", - "file_size_bytes": 14624353 - }, - { - "name": "Gp0127654_Metagenome Alignment BAM file", - "description": "Metagenome Alignment BAM file for Gp0127654", - "data_object_type": "Assembly Coverage BAM", - "url": "https://data.microbiomedata.org/data/nmdc:mga0h0s362/assembly/nmdc_mga0h0s362_pairedMapped_sorted.bam", - "md5_checksum": "d8e09db1617046117fbb15631cf4977f", - "id": "nmdc:d8e09db1617046117fbb15631cf4977f", - "file_size_bytes": 2687176632 - }, - { - "name": "Gp0127654_Protein FAA", - "description": "Protein FAA for Gp0127654", - "data_object_type": "Annotation Amino Acid FASTA", - "url": "https://data.microbiomedata.org/data/nmdc:mga0h0s362/annotation/nmdc_mga0h0s362_proteins.faa", - "md5_checksum": "7e7c871dbe9ed0b2692444b77d0afe8d", - "id": "nmdc:7e7c871dbe9ed0b2692444b77d0afe8d", - "file_size_bytes": 55142968 - }, - { - "name": "Gp0127654_Structural annotation GFF file", - "description": "Structural annotation GFF file for Gp0127654", - "data_object_type": "Structural Annotation GFF", - "url": "https://data.microbiomedata.org/data/nmdc:mga0h0s362/annotation/nmdc_mga0h0s362_structural_annotation.gff", - "md5_checksum": "7b466cbbadfde9b125f2a31e48d8c60d", - "id": "nmdc:7b466cbbadfde9b125f2a31e48d8c60d", - "file_size_bytes": 2518 - }, - { - "name": "Gp0127654_Functional annotation GFF file", - "description": "Functional annotation GFF file for Gp0127654", - "data_object_type": "Functional Annotation GFF", - "url": "https://data.microbiomedata.org/data/nmdc:mga0h0s362/annotation/nmdc_mga0h0s362_functional_annotation.gff", - "md5_checksum": "6a03c0a78fa59ac0a55777a9ea73e5d0", - "id": "nmdc:6a03c0a78fa59ac0a55777a9ea73e5d0", - "file_size_bytes": 64337475 - }, - { - "name": "Gp0127654_KO TSV file", - "description": "KO TSV file for Gp0127654", - "data_object_type": "Annotation KEGG Orthology", - "url": "https://data.microbiomedata.org/data/nmdc:mga0h0s362/annotation/nmdc_mga0h0s362_ko.tsv", - "md5_checksum": "2275c42fa5206d646c7b477b184b9519", - "id": "nmdc:2275c42fa5206d646c7b477b184b9519", - "file_size_bytes": 7628926 - }, - { - "name": "Gp0127654_EC TSV file", - "description": "EC TSV file for Gp0127654", - "data_object_type": "Annotation Enzyme Commission", - "url": "https://data.microbiomedata.org/data/nmdc:mga0h0s362/annotation/nmdc_mga0h0s362_ec.tsv", - "md5_checksum": "9c7fc55c2cbc986d520695dfb69b3e26", - "id": "nmdc:9c7fc55c2cbc986d520695dfb69b3e26", - "file_size_bytes": 5084393 - }, - { - "name": "Gp0127654_COG GFF file", - "description": "COG GFF file for Gp0127654", - "url": "https://data.microbiomedata.org/data/nmdc:mga0h0s362/annotation/nmdc_mga0h0s362_cog.gff", - "md5_checksum": "fabdc762526357e8a6f288a07f947f06", - "id": "nmdc:fabdc762526357e8a6f288a07f947f06", - "file_size_bytes": 37680499 - }, - { - "name": "Gp0127654_PFAM GFF file", - "description": "PFAM GFF file for Gp0127654", - "url": "https://data.microbiomedata.org/data/nmdc:mga0h0s362/annotation/nmdc_mga0h0s362_pfam.gff", - "md5_checksum": "1e8dcb98dfc7598e3965af187c296f12", - "id": "nmdc:1e8dcb98dfc7598e3965af187c296f12", - "file_size_bytes": 27765282 - }, - { - "name": "Gp0127654_TigrFam GFF file", - "description": "TigrFam GFF file for Gp0127654", - "url": "https://data.microbiomedata.org/data/nmdc:mga0h0s362/annotation/nmdc_mga0h0s362_tigrfam.gff", - "md5_checksum": "86f1a8ccf1532e11fc09d94dc39af57c", - "id": "nmdc:86f1a8ccf1532e11fc09d94dc39af57c", - "file_size_bytes": 2970208 - }, - { - "name": "Gp0127654_SMART GFF file", - "description": "SMART GFF file for Gp0127654", - "url": "https://data.microbiomedata.org/data/nmdc:mga0h0s362/annotation/nmdc_mga0h0s362_smart.gff", - "md5_checksum": "8add80a0fe95822917e4e7eaf275ed4f", - "id": "nmdc:8add80a0fe95822917e4e7eaf275ed4f", - "file_size_bytes": 8172309 - }, - { - "name": "Gp0127654_SuperFam GFF file", - "description": "SuperFam GFF file for Gp0127654", - "url": "https://data.microbiomedata.org/data/nmdc:mga0h0s362/annotation/nmdc_mga0h0s362_supfam.gff", - "md5_checksum": "6268ff527b56548792e7dca811500436", - "id": "nmdc:6268ff527b56548792e7dca811500436", - "file_size_bytes": 46611499 - }, - { - "name": "Gp0127654_Cath FunFam GFF file", - "description": "Cath FunFam GFF file for Gp0127654", - "url": "https://data.microbiomedata.org/data/nmdc:mga0h0s362/annotation/nmdc_mga0h0s362_cath_funfam.gff", - "md5_checksum": "ff7ac6fb709d1f0f7b476c9a5b29524e", - "id": "nmdc:ff7ac6fb709d1f0f7b476c9a5b29524e", - "file_size_bytes": 35108681 - }, - { - "name": "Gp0127654_KO_EC GFF file", - "description": "KO_EC GFF file for Gp0127654", - "url": "https://data.microbiomedata.org/data/nmdc:mga0h0s362/annotation/nmdc_mga0h0s362_ko_ec.gff", - "md5_checksum": "6c50fdd87bdba9116c1ff81e21b8a95c", - "id": "nmdc:6c50fdd87bdba9116c1ff81e21b8a95c", - "file_size_bytes": 24261565 - }, - { - "name": "gold:Gp0452679_metabat2 bin checkm quality assessment result", - "description": "metabat2 bin checkm quality assessment result for gold:Gp0452679", - "data_object_type": "CheckM Statistics", - "url": "https://data.microbiomedata.org/data/nmdc:mga0fsjy84/MAGs/nmdc_mga0fsjy84_checkm_qa.out", - "md5_checksum": "d41d8cd98f00b204e9800998ecf8427e", - "id": "nmdc:d41d8cd98f00b204e9800998ecf8427e", - "file_size_bytes": 0 - }, - { - "name": "Gp0127654_tooShort (< 3kb) filtered contigs fasta file by metaBat2", - "description": "tooShort (< 3kb) filtered contigs fasta file by metaBat2 for Gp0127654", - "url": "https://data.microbiomedata.org/data/nmdc:mga0h0s362/MAGs/nmdc_mga0h0s362_bins.tooShort.fa", - "md5_checksum": "920bcae91eae59ed8b9b19bcb7392ac5", - "id": "nmdc:920bcae91eae59ed8b9b19bcb7392ac5", - "file_size_bytes": 80638518 - }, - { - "name": "Gp0127654_unbinned fasta file from metabat2", - "description": "unbinned fasta file from metabat2 for Gp0127654", - "url": "https://data.microbiomedata.org/data/nmdc:mga0h0s362/MAGs/nmdc_mga0h0s362_bins.unbinned.fa", - "md5_checksum": "d13bc24bdf72e7ba00d60f0e2e0805e8", - "id": "nmdc:d13bc24bdf72e7ba00d60f0e2e0805e8", - "file_size_bytes": 12400628 - }, - { - "name": "Gp0127654_metabat2 bin checkm quality assessment result", - "description": "metabat2 bin checkm quality assessment result for Gp0127654", - "data_object_type": "CheckM Statistics", - "url": "https://data.microbiomedata.org/data/nmdc:mga0h0s362/MAGs/nmdc_mga0h0s362_checkm_qa.out", - "md5_checksum": "3fd777151ef41b39b272cb42c1d5e8ba", - "id": "nmdc:3fd777151ef41b39b272cb42c1d5e8ba", - "file_size_bytes": 785 - }, - { - "name": "Gp0127654_high-quality and medium-quality bins", - "description": "high-quality and medium-quality bins for Gp0127654", - "data_object_type": "Metagenome Bins", - "url": "https://data.microbiomedata.org/data/nmdc:mga0h0s362/MAGs/nmdc_mga0h0s362_hqmq_bin.zip", - "md5_checksum": "470edf3d79702d3b806b545db595ca02", - "id": "nmdc:470edf3d79702d3b806b545db595ca02", - "file_size_bytes": 182 - }, - { - "name": "Gp0127654_metabat2 bins", - "description": "metabat2 bins for Gp0127654", - "url": "https://data.microbiomedata.org/data/nmdc:mga0h0s362/MAGs/nmdc_mga0h0s362_metabat_bin.zip", - "md5_checksum": "8fc6f1a0269aa5179d72c52cf1a9726e", - "id": "nmdc:8fc6f1a0269aa5179d72c52cf1a9726e", - "file_size_bytes": 69938 - }, - { - "_id": { - "$oid": "649b003d1ae706d7b5b14ed7" - }, - "description": "Metagenome Contig Coverage Stats for gold:Gp0127654", - "url": "https://data.microbiomedata.org/data/1781_100356/assembly/mapping_stats.txt", - "file_size_bytes": 14843159, - "type": "nmdc:DataObject", - "id": "nmdc:414faae2752dc595ae4f2ddab4438ec7", - "name": "mapping_stats.txt", - "data_object_type": "Assembly Coverage Stats" - }, - { - "_id": { - "$oid": "649b003d1ae706d7b5b14ed8" - }, - "description": "Assembled contigs fasta for gold:Gp0127654", - "url": "https://data.microbiomedata.org/data/1781_100356/assembly/assembly_contigs.fna", - "file_size_bytes": 92474281, - "type": "nmdc:DataObject", - "id": "nmdc:cd12b50afea3097034758d6883864dd5", - "name": "assembly_contigs.fna", - "data_object_type": "Assembly Contigs" - }, - { - "_id": { - "$oid": "649b003d1ae706d7b5b14edc" - }, - "description": "Metagenome Alignment BAM file for gold:Gp0127654", - "url": "https://data.microbiomedata.org/data/1781_100356/assembly/pairedMapped_sorted.bam", - "file_size_bytes": 2651150044, - "type": "nmdc:DataObject", - "id": "nmdc:fdde2ac466c983fc1154c7968631df20", - "name": "pairedMapped_sorted.bam", - "data_object_type": "Assembly Coverage BAM" - }, - { - "_id": { - "$oid": "649b003d1ae706d7b5b14ede" - }, - "description": "Assembled scaffold fasta for gold:Gp0127654", - "url": "https://data.microbiomedata.org/data/1781_100356/assembly/assembly_scaffolds.fna", - "file_size_bytes": 91880416, - "type": "nmdc:DataObject", - "id": "nmdc:6e1f393ec856d3445d9a4ac23ff1b249", - "name": "assembly_scaffolds.fna", - "data_object_type": "Assembly Scaffolds" - }, - { - "_id": { - "$oid": "649b003d1ae706d7b5b14ee1" - }, - "description": "Assembled AGP file for gold:Gp0127654", - "url": "https://data.microbiomedata.org/data/1781_100356/assembly/assembly.agp", - "file_size_bytes": 13042449, - "type": "nmdc:DataObject", - "id": "nmdc:c2e8b30ea935a2ca7bece5b913116f65", - "name": "assembly.agp", - "data_object_type": "Assembly AGP" - }, - { - "_id": { - "$oid": "649b003d1ae706d7b5b15bee" - }, - "id": "nmdc:336911e31f6622b74af1c92d2ed5f4b6", - "name": "1781_100356.krona.html", - "description": "Gold:Gp0127654 KRONA plot HTML file", - "url": "https://data.microbiomedata.org/1781_100356/ReadbasedAnalysis/centrifuge/1781_100356.krona.html", - "file_size_bytes": 3385, - "data_object_type": "Centrifuge Krona Plot", - "type": "nmdc:DataObject" - }, - { - "_id": { - "$oid": "649b003d1ae706d7b5b15bef" - }, - "id": "nmdc:cdd1b6d43bd7a8963fa3c5bab4296498", - "name": "1781_100356.json", - "description": "Gold:Gp0127654 ReadbasedAnalysis result JSON file", - "url": "https://data.microbiomedata.org/1781_100356/ReadbasedAnalysis/1781_100356.json", - "file_size_bytes": 3385, - "type": "nmdc:DataObject" - }, - { - "_id": { - "$oid": "649b003f1ae706d7b5b16645" - }, - "id": "nmdc:e22ff97901fed9397f221fbd8048f87d", - "name": "bins.tooShort.fa", - "description": "tooShort (< 3kb) filtered contigs fasta file by metaBat2 for gold:Gp0127654", - "file_size_bytes": 78267851, - "url": "https://data.microbiomedata.org/data/1781_100356/img_MAGs/bins.tooShort.fa", - "type": "nmdc:DataObject" - }, - { - "_id": { - "$oid": "649b003f1ae706d7b5b16648" - }, - "id": "nmdc:9f21a2cf85bdf5ec51f41a6e331819cc", - "name": "bins.unbinned.fa", - "description": "unbinned fasta file from metabat2 for gold:Gp0127654", - "file_size_bytes": 12538639, - "url": "https://data.microbiomedata.org/data/1781_100356/img_MAGs/bins.unbinned.fa", - "type": "nmdc:DataObject" - }, - { - "_id": { - "$oid": "649b00401ae706d7b5b16d55" - }, - "description": "Functional annotation GFF file for gold:Gp0127654", - "url": "https://data.microbiomedata.org/1781_100356/img_annotation/Ga0482217_functional_annotation.gff", - "md5_checksum": "b28a675c6560b34691a960f7e873841d", - "file_size_bytes": 3385, - "id": "nmdc:b28a675c6560b34691a960f7e873841d", - "name": "gold:Gp0127654_Functional annotation GFF file", - "data_object_type": "Functional Annotation GFF", - "type": "nmdc:DataObject" - }, - { - "_id": { - "$oid": "649b00401ae706d7b5b16d5c" - }, - "description": "KO TSV File for gold:Gp0127654", - "url": "https://data.microbiomedata.org/1781_100356/img_annotation/Ga0482217_ko.tsv", - "md5_checksum": "3b7734343770dce929591ee83d96acb6", - "file_size_bytes": 3385, - "id": "nmdc:3b7734343770dce929591ee83d96acb6", - "name": "gold:Gp0127654_KO TSV File", - "data_object_type": "Annotation KEGG Orthology", - "type": "nmdc:DataObject" - }, - { - "_id": { - "$oid": "649b00401ae706d7b5b16d5e" - }, - "description": "Protein FAA for gold:Gp0127654", - "url": "https://data.microbiomedata.org/1781_100356/img_annotation/Ga0482217_proteins.faa", - "md5_checksum": "deda4116aac7e262c0edf3358bb8e384", - "file_size_bytes": 3385, - "id": "nmdc:deda4116aac7e262c0edf3358bb8e384", - "name": "gold:Gp0127654_Protein FAA", - "data_object_type": "Annotation Amino Acid FASTA", - "type": "nmdc:DataObject" - }, - { - "_id": { - "$oid": "649b00401ae706d7b5b16d63" - }, - "description": "EC TSV File for gold:Gp0127654", - "url": "https://data.microbiomedata.org/1781_100356/img_annotation/Ga0482217_ec.tsv", - "md5_checksum": "b785c7809fa99d5beca859eded4a9b0f", - "file_size_bytes": 3385, - "id": "nmdc:b785c7809fa99d5beca859eded4a9b0f", - "name": "gold:Gp0127654_EC TSV File", - "data_object_type": "Annotation Enzyme Commission", - "type": "nmdc:DataObject" - }, - { - "_id": { - "$oid": "649b00401ae706d7b5b16d67" - }, - "description": "Structural annotation GFF file for gold:Gp0127654", - "url": "https://data.microbiomedata.org/1781_100356/img_annotation/Ga0482217_structural_annotation.gff", - "md5_checksum": "a9cf54b925e1c5b8c3e0299730f5a464", - "file_size_bytes": 3385, - "id": "nmdc:a9cf54b925e1c5b8c3e0299730f5a464", - "name": "gold:Gp0127654_Structural annotation GFF file", - "data_object_type": "Structural Annotation GFF", - "type": "nmdc:DataObject" - } - ], - "dissolving_activity_set": [], - "functional_annotation_set": [], - "genome_feature_set": [], - "mags_activity_set": [ - { - "_id": { - "$oid": "649b0052ec087f6bbab34728" - }, - "has_input": [ - "nmdc:909ae2a351ab1b99dfa877969ba33fc0", - "nmdc:d8e09db1617046117fbb15631cf4977f", - "nmdc:6a03c0a78fa59ac0a55777a9ea73e5d0" - ], - "too_short_contig_num": 189586, - "part_of": [ - "nmdc:mga0h0s362" - ], - "binned_contig_num": 56, - "git_url": "https://github.com/microbiomedata/metaG/releases/tag/0.1", - "has_output": [ - "nmdc:d41d8cd98f00b204e9800998ecf8427e", - "nmdc:920bcae91eae59ed8b9b19bcb7392ac5", - "nmdc:d13bc24bdf72e7ba00d60f0e2e0805e8", - "nmdc:3fd777151ef41b39b272cb42c1d5e8ba", - "nmdc:470edf3d79702d3b806b545db595ca02", - "nmdc:8fc6f1a0269aa5179d72c52cf1a9726e" - ], - "was_informed_by": "gold:Gp0127654", - "input_contig_num": 197669, - "id": "nmdc:168441535388b19bbdee0928b42e5b20", - "execution_resource": "NERSC-Cori", - "name": "MAGs Analysis Activity for nmdc:mga0h0s362", - "mags_list": [ - { - "number_of_contig": 56, - "completeness": 18.09, - "bin_name": "bins.1", - "gene_count": 272, - "bin_quality": "LQ", - "gtdbtk_species": "", - "gtdbtk_order": "", - "num_16s": 0, - "gtdbtk_family": "", - "gtdbtk_domain": "", - "contamination": 0.0, - "gtdbtk_class": "", - "gtdbtk_phylum": "", - "num_5s": 0, - "num_23s": 0, - "gtdbtk_genus": "", - "num_t_rna": 5 - } - ], - "unbinned_contig_num": 8027, - "started_at_time": "2021-10-11T02:23:29Z", - "type": "nmdc:MAGsAnalysisActivity", - "ended_at_time": "2021-10-11T03:58:56+00:00" - } - ], - "material_sample_set": [], - "material_sampling_activity_set": [], - "metabolomics_analysis_activity_set": [], - "metagenome_annotation_activity_set": [ - { - "_id": { - "$oid": "649b005bbf2caae0415ef9c7" - }, - "has_input": [ - "nmdc:909ae2a351ab1b99dfa877969ba33fc0" - ], - "part_of": [ - "nmdc:mga0h0s362" - ], - "git_url": "https://github.com/microbiomedata/metaG/releases/tag/0.1", - "has_output": [ - "nmdc:7e7c871dbe9ed0b2692444b77d0afe8d", - "nmdc:7b466cbbadfde9b125f2a31e48d8c60d", - "nmdc:6a03c0a78fa59ac0a55777a9ea73e5d0", - "nmdc:2275c42fa5206d646c7b477b184b9519", - "nmdc:9c7fc55c2cbc986d520695dfb69b3e26", - "nmdc:fabdc762526357e8a6f288a07f947f06", - "nmdc:1e8dcb98dfc7598e3965af187c296f12", - "nmdc:86f1a8ccf1532e11fc09d94dc39af57c", - "nmdc:8add80a0fe95822917e4e7eaf275ed4f", - "nmdc:6268ff527b56548792e7dca811500436", - "nmdc:ff7ac6fb709d1f0f7b476c9a5b29524e", - "nmdc:6c50fdd87bdba9116c1ff81e21b8a95c" - ], - "was_informed_by": "gold:Gp0127654", - "id": "nmdc:168441535388b19bbdee0928b42e5b20", - "execution_resource": "NERSC-Cori", - "name": "Annotation Activity for nmdc:mga0h0s362", - "started_at_time": "2021-10-11T02:23:29Z", - "type": "nmdc:MetagenomeAnnotationActivity", - "ended_at_time": "2021-10-11T03:58:56+00:00" - } - ], - "metagenome_assembly_set": [ - { - "_id": { - "$oid": "649b005f2ca5ee4adb139fb4" - }, - "has_input": [ - "nmdc:c4f29a07f3ce03ee2a2d11c90e8b43d6" - ], - "part_of": [ - "nmdc:mga0h0s362" - ], - "ctg_logsum": 130142, - "scaf_logsum": 130537, - "gap_pct": 0.0008, - "git_url": "https://github.com/microbiomedata/metaG/releases/tag/0.1", - "has_output": [ - "nmdc:909ae2a351ab1b99dfa877969ba33fc0", - "nmdc:1bd3a82d1ced0a3a4e4b207ecdeedc50", - "nmdc:e2281ea2c0342c7243ac6a3179948547", - "nmdc:ad045e491d27a8a2a4bb13c62ed74fd8", - "nmdc:d8e09db1617046117fbb15631cf4977f" - ], - "asm_score": 4.409, - "was_informed_by": "gold:Gp0127654", - "ctg_powsum": 13918, - "scaf_max": 69027, - "id": "nmdc:168441535388b19bbdee0928b42e5b20", - "scaf_powsum": 13961, - "execution_resource": "NERSC-Cori", - "contigs": 197669, - "name": "Assembly Activity for nmdc:mga0h0s362", - "ctg_max": 69027, - "gc_std": 0.09749, - "contig_bp": 85731750, - "gc_avg": 0.62891, - "started_at_time": "2021-10-11T02:23:29Z", - "scaf_bp": 85732440, - "type": "nmdc:MetagenomeAssembly", - "scaffolds": 197600, - "ended_at_time": "2021-10-11T03:58:56+00:00", - "ctg_l50": 404, - "ctg_l90": 286, - "ctg_n50": 62467, - "ctg_n90": 168661, - "scaf_l50": 404, - "scaf_l90": 286, - "scaf_n50": 62435, - "scaf_n90": 168596, - "scaf_l_gt50k": 69027, - "scaf_n_gt50k": 1, - "scaf_pct_gt50k": 0.080514446 - } - ], - "metagenome_sequencing_activity_set": [], - "metaproteomics_analysis_activity_set": [], - "metatranscriptome_activity_set": [], - "nom_analysis_activity_set": [], - "omics_processing_set": [ - { - "_id": { - "$oid": "649b009773e8249959349b60" - }, - "id": "nmdc:omprc-11-kgxpef29", - "name": "Riverbed sediment microbial communities from areas with vegetation nearby in Columbia River, Washington, USA - GW-RW S2_40_50", - "description": "Riverbed sediment samples were collected from an area with a lot of surrounding vegetation in a hyporheic zone (subsurface groundwater - surface water mixing zone)", - "has_input": [ - "nmdc:bsm-11-tpk9x619" - ], - "has_output": [ - "jgi:574fe0b17ded5e3df1ee1494" - ], - "part_of": [ - "nmdc:sty-11-aygzgv51" - ], - "add_date": "2016-01-11", - "mod_date": "2021-06-15", - "ncbi_project_name": "Riverbed sediment microbial communities from areas with vegetation nearby in Columbia River, Washington, USA - GW-RW S2_40_50", - "omics_type": { - "has_raw_value": "Metagenome" - }, - "principal_investigator": { - "has_raw_value": "James Stegen" - }, - "processing_institution": "JGI", - "type": "nmdc:OmicsProcessing", - "gold_sequencing_project_identifiers": [ - "gold:Gp0127654" - ] - } - ], - "reaction_activity_set": [], - "read_qc_analysis_activity_set": [ - { - "_id": { - "$oid": "649b009d6bdd4fd20273c87f" - }, - "has_input": [ - "nmdc:c87a7a87a5218698fbdd8ad39085b892" - ], - "part_of": [ - "nmdc:mga0h0s362" - ], - "git_url": "https://github.com/microbiomedata/metaG/releases/tag/0.1", - "has_output": [ - "nmdc:c4f29a07f3ce03ee2a2d11c90e8b43d6", - "nmdc:9c600ec3be94d876f00d22808f3e8a59" - ], - "was_informed_by": "gold:Gp0127654", - "input_read_count": 30951192, - "output_read_bases": 4526478748, - "id": "nmdc:168441535388b19bbdee0928b42e5b20", - "execution_resource": "NERSC-Cori", - "input_read_bases": 4673629992, - "name": "Read QC Activity for nmdc:mga0h0s362", - "output_read_count": 30289044, - "started_at_time": "2021-10-11T02:23:29Z", - "type": "nmdc:ReadQCAnalysisActivity", - "ended_at_time": "2021-10-11T03:58:56+00:00" - } - ], - "read_based_taxonomy_analysis_activity_set": [ - { - "_id": { - "$oid": "649b009bff710ae353f8cf45" - }, - "has_input": [ - "nmdc:c4f29a07f3ce03ee2a2d11c90e8b43d6" - ], - "git_url": "https://github.com/microbiomedata/metaG/releases/tag/0.1", - "has_output": [ - "nmdc:130ee7559789726a2cadccd3126dacad", - "nmdc:c955eae73afbfe1ad4c4eb2eac51f3f3", - "nmdc:7ccb4ee5a0728322154b29a79d13c842", - "nmdc:8b88e19f3d4f22c8bb71f66e7aec6dba", - "nmdc:35a0d72edac6c5e7f9c8ddf86c5534e0", - "nmdc:f808a89810cdb2a911a5b5388b70ce94", - "nmdc:dfc90170aa038c2425702be223cb2f23", - "nmdc:84255d3bab9ea79151db5ad7bcbc677c", - "nmdc:1c8339d96884c4a408de7804e00490d1" - ], - "was_informed_by": "gold:Gp0127654", - "id": "nmdc:168441535388b19bbdee0928b42e5b20", - "execution_resource": "NERSC-Cori", - "name": "ReadBased Analysis Activity for nmdc:mga0h0s362", - "started_at_time": "2021-10-11T02:23:29Z", - "type": "nmdc:ReadbasedAnalysis", - "ended_at_time": "2021-10-11T03:58:56+00:00" - } - ], - "study_set": [], - "field_research_site_set": [], - "collecting_biosamples_from_site_set": [], - "date_created": null, - "etl_software_version": null, - "pooling_set": [] - }, - { - "functional_annotation_agg": [], - "library_preparation_set": [], - "processed_sample_set": [], - "extraction_set": [], - "activity_set": [], - "biosample_set": [], - "data_object_set": [ - { - "description": "Raw sequencer read data", - "file_size_bytes": 2411560282, - "type": "nmdc:DataObject", - "id": "jgi:574fe0b47ded5e3df1ee1496", - "name": "10533.3.165334.ACGATGA-GTCATCG.fastq.gz" - }, - { - "name": "Gp0127656_Filtered Reads", - "description": "Filtered Reads for Gp0127656", - "data_object_type": "Filtered Sequencing Reads", - "url": "https://data.microbiomedata.org/data/nmdc:mga00hh562/qa/nmdc_mga00hh562_filtered.fastq.gz", - "md5_checksum": "cec95659bb04ae095f51821ddaa9fa59", - "id": "nmdc:cec95659bb04ae095f51821ddaa9fa59", - "file_size_bytes": 2195848744 - }, - { - "name": "Gp0127656_Filtered Stats", - "description": "Filtered Stats for Gp0127656", - "data_object_type": "QC Statistics", - "url": "https://data.microbiomedata.org/data/nmdc:mga00hh562/qa/nmdc_mga00hh562_filterStats.txt", - "md5_checksum": "7b4f365bbe942a523890abf13d1b6436", - "id": "nmdc:7b4f365bbe942a523890abf13d1b6436", - "file_size_bytes": 284 - }, - { - "name": "Gp0127656_Gottcha2 TSV report", - "description": "Gottcha2 TSV report for Gp0127656", - "url": "https://data.microbiomedata.org/data/nmdc:mga00hh562/ReadbasedAnalysis/nmdc_mga00hh562_gottcha2_report.tsv", - "md5_checksum": "ccbe419157d8286626330fd0eb0dd0e0", - "id": "nmdc:ccbe419157d8286626330fd0eb0dd0e0", - "file_size_bytes": 2418 - }, - { - "name": "Gp0127656_Gottcha2 full TSV report", - "description": "Gottcha2 full TSV report for Gp0127656", - "url": "https://data.microbiomedata.org/data/nmdc:mga00hh562/ReadbasedAnalysis/nmdc_mga00hh562_gottcha2_report_full.tsv", - "md5_checksum": "92ab65cdaca3367552e03d895123e04f", - "id": "nmdc:92ab65cdaca3367552e03d895123e04f", - "file_size_bytes": 759212 - }, - { - "name": "Gp0127656_Gottcha2 Krona HTML report", - "description": "Gottcha2 Krona HTML report for Gp0127656", - "data_object_type": "GOTTCHA2 Krona Plot", - "url": "https://data.microbiomedata.org/data/nmdc:mga00hh562/ReadbasedAnalysis/nmdc_mga00hh562_gottcha2_krona.html", - "md5_checksum": "0b3ff6503723d6ea9b84552f68ed4270", - "id": "nmdc:0b3ff6503723d6ea9b84552f68ed4270", - "file_size_bytes": 231563 - }, - { - "name": "Gp0127656_Centrifuge classification TSV report", - "description": "Centrifuge classification TSV report for Gp0127656", - "data_object_type": "Centrifuge Taxonomic Classification", - "url": "https://data.microbiomedata.org/data/nmdc:mga00hh562/ReadbasedAnalysis/nmdc_mga00hh562_centrifuge_classification.tsv", - "md5_checksum": "8e5ad12b7fa8873463088d7bf361f7c5", - "id": "nmdc:8e5ad12b7fa8873463088d7bf361f7c5", - "file_size_bytes": 1950007455 - }, - { - "name": "Gp0127656_Centrifuge TSV report", - "description": "Centrifuge TSV report for Gp0127656", - "data_object_type": "Centrifuge Classification Report", - "url": "https://data.microbiomedata.org/data/nmdc:mga00hh562/ReadbasedAnalysis/nmdc_mga00hh562_centrifuge_report.tsv", - "md5_checksum": "a3255df52cd6150f03bbf7cbd655ec76", - "id": "nmdc:a3255df52cd6150f03bbf7cbd655ec76", - "file_size_bytes": 255724 - }, - { - "name": "Gp0127656_Centrifuge Krona HTML report", - "description": "Centrifuge Krona HTML report for Gp0127656", - "data_object_type": "Centrifuge Krona Plot", - "url": "https://data.microbiomedata.org/data/nmdc:mga00hh562/ReadbasedAnalysis/nmdc_mga00hh562_centrifuge_krona.html", - "md5_checksum": "a25a5d7e399624e5e5735b65a9dd322a", - "id": "nmdc:a25a5d7e399624e5e5735b65a9dd322a", - "file_size_bytes": 2337553 - }, - { - "name": "Gp0127656_Kraken classification TSV report", - "description": "Kraken classification TSV report for Gp0127656", - "data_object_type": "Kraken2 Taxonomic Classification", - "url": "https://data.microbiomedata.org/data/nmdc:mga00hh562/ReadbasedAnalysis/nmdc_mga00hh562_kraken2_classification.tsv", - "md5_checksum": "dd953aebfd5cf624a5ffa8c6d6b64b08", - "id": "nmdc:dd953aebfd5cf624a5ffa8c6d6b64b08", - "file_size_bytes": 1555636513 - }, - { - "name": "Gp0127656_Kraken2 TSV report", - "description": "Kraken2 TSV report for Gp0127656", - "data_object_type": "Kraken2 Classification Report", - "url": "https://data.microbiomedata.org/data/nmdc:mga00hh562/ReadbasedAnalysis/nmdc_mga00hh562_kraken2_report.tsv", - "md5_checksum": "96f47f6cd2350fb1c7c7b746d2e9d811", - "id": "nmdc:96f47f6cd2350fb1c7c7b746d2e9d811", - "file_size_bytes": 647090 - }, - { - "name": "Gp0127656_Kraken2 Krona HTML report", - "description": "Kraken2 Krona HTML report for Gp0127656", - "data_object_type": "Kraken2 Krona Plot", - "url": "https://data.microbiomedata.org/data/nmdc:mga00hh562/ReadbasedAnalysis/nmdc_mga00hh562_kraken2_krona.html", - "md5_checksum": "ae369194e4b24e137fc23da0412277a6", - "id": "nmdc:ae369194e4b24e137fc23da0412277a6", - "file_size_bytes": 3939982 - }, - { - "name": "Gp0127656_Assembled contigs fasta", - "description": "Assembled contigs fasta for Gp0127656", - "data_object_type": "Assembly Contigs", - "url": "https://data.microbiomedata.org/data/nmdc:mga00hh562/assembly/nmdc_mga00hh562_contigs.fna", - "md5_checksum": "8106808f8e245ef9a46a4e31561eba7f", - "id": "nmdc:8106808f8e245ef9a46a4e31561eba7f", - "file_size_bytes": 78938478 - }, - { - "name": "Gp0127656_Assembled scaffold fasta", - "description": "Assembled scaffold fasta for Gp0127656", - "data_object_type": "Assembly Scaffolds", - "url": "https://data.microbiomedata.org/data/nmdc:mga00hh562/assembly/nmdc_mga00hh562_scaffolds.fna", - "md5_checksum": "55385159fa8361d7ff747cdc1155512b", - "id": "nmdc:55385159fa8361d7ff747cdc1155512b", - "file_size_bytes": 78428743 - }, - { - "name": "Gp0127656_Metagenome Contig Coverage Stats", - "description": "Metagenome Contig Coverage Stats for Gp0127656", - "url": "https://data.microbiomedata.org/data/nmdc:mga00hh562/assembly/nmdc_mga00hh562_covstats.txt", - "md5_checksum": "4741908a5b07eaa2312ff3e6d2d991aa", - "id": "nmdc:4741908a5b07eaa2312ff3e6d2d991aa", - "file_size_bytes": 13384382 - }, - { - "name": "Gp0127656_Assembled AGP file", - "description": "Assembled AGP file for Gp0127656", - "data_object_type": "Assembly AGP", - "url": "https://data.microbiomedata.org/data/nmdc:mga00hh562/assembly/nmdc_mga00hh562_assembly.agp", - "md5_checksum": "172e5cf3b5c5bf8e4896058dad3e814a", - "id": "nmdc:172e5cf3b5c5bf8e4896058dad3e814a", - "file_size_bytes": 12508060 - }, - { - "name": "Gp0127656_Metagenome Alignment BAM file", - "description": "Metagenome Alignment BAM file for Gp0127656", - "data_object_type": "Assembly Coverage BAM", - "url": "https://data.microbiomedata.org/data/nmdc:mga00hh562/assembly/nmdc_mga00hh562_pairedMapped_sorted.bam", - "md5_checksum": "941f749a92155321c5ce7e5aa32d3b55", - "id": "nmdc:941f749a92155321c5ce7e5aa32d3b55", - "file_size_bytes": 2375706529 - }, - { - "name": "Gp0127656_Protein FAA", - "description": "Protein FAA for Gp0127656", - "data_object_type": "Annotation Amino Acid FASTA", - "url": "https://data.microbiomedata.org/data/nmdc:mga00hh562/annotation/nmdc_mga00hh562_proteins.faa", - "md5_checksum": "18f68cc8acda8d33d5fd6f21a9166aa8", - "id": "nmdc:18f68cc8acda8d33d5fd6f21a9166aa8", - "file_size_bytes": 46951183 - }, - { - "name": "Gp0127656_Structural annotation GFF file", - "description": "Structural annotation GFF file for Gp0127656", - "data_object_type": "Structural Annotation GFF", - "url": "https://data.microbiomedata.org/data/nmdc:mga00hh562/annotation/nmdc_mga00hh562_structural_annotation.gff", - "md5_checksum": "87d5f3a505d23c1aa2deea960702d55b", - "id": "nmdc:87d5f3a505d23c1aa2deea960702d55b", - "file_size_bytes": 2511 - }, - { - "name": "Gp0127656_Functional annotation GFF file", - "description": "Functional annotation GFF file for Gp0127656", - "data_object_type": "Functional Annotation GFF", - "url": "https://data.microbiomedata.org/data/nmdc:mga00hh562/annotation/nmdc_mga00hh562_functional_annotation.gff", - "md5_checksum": "8e8be343bbb1ba11f3e15867b419d05d", - "id": "nmdc:8e8be343bbb1ba11f3e15867b419d05d", - "file_size_bytes": 54902900 - }, - { - "name": "Gp0127656_KO TSV file", - "description": "KO TSV file for Gp0127656", - "data_object_type": "Annotation KEGG Orthology", - "url": "https://data.microbiomedata.org/data/nmdc:mga00hh562/annotation/nmdc_mga00hh562_ko.tsv", - "md5_checksum": "91c2485c0ebf683aed3e7935ec60b7d1", - "id": "nmdc:91c2485c0ebf683aed3e7935ec60b7d1", - "file_size_bytes": 6468844 - }, - { - "name": "Gp0127656_EC TSV file", - "description": "EC TSV file for Gp0127656", - "data_object_type": "Annotation Enzyme Commission", - "url": "https://data.microbiomedata.org/data/nmdc:mga00hh562/annotation/nmdc_mga00hh562_ec.tsv", - "md5_checksum": "fb6740e86534daeea41ab6d5cf9d91d2", - "id": "nmdc:fb6740e86534daeea41ab6d5cf9d91d2", - "file_size_bytes": 4308547 - }, - { - "name": "Gp0127656_COG GFF file", - "description": "COG GFF file for Gp0127656", - "url": "https://data.microbiomedata.org/data/nmdc:mga00hh562/annotation/nmdc_mga00hh562_cog.gff", - "md5_checksum": "19da9b3f211164643f276bc74604c9b0", - "id": "nmdc:19da9b3f211164643f276bc74604c9b0", - "file_size_bytes": 32139189 - }, - { - "name": "Gp0127656_PFAM GFF file", - "description": "PFAM GFF file for Gp0127656", - "url": "https://data.microbiomedata.org/data/nmdc:mga00hh562/annotation/nmdc_mga00hh562_pfam.gff", - "md5_checksum": "19905547dfa37274a9f91c9caaf6bacc", - "id": "nmdc:19905547dfa37274a9f91c9caaf6bacc", - "file_size_bytes": 23590201 - }, - { - "name": "Gp0127656_TigrFam GFF file", - "description": "TigrFam GFF file for Gp0127656", - "url": "https://data.microbiomedata.org/data/nmdc:mga00hh562/annotation/nmdc_mga00hh562_tigrfam.gff", - "md5_checksum": "30c2b0722d225938975243ab1041ed12", - "id": "nmdc:30c2b0722d225938975243ab1041ed12", - "file_size_bytes": 2485400 - }, - { - "name": "Gp0127656_SMART GFF file", - "description": "SMART GFF file for Gp0127656", - "url": "https://data.microbiomedata.org/data/nmdc:mga00hh562/annotation/nmdc_mga00hh562_smart.gff", - "md5_checksum": "623e913fa98f88f6037754daf5d9ffc5", - "id": "nmdc:623e913fa98f88f6037754daf5d9ffc5", - "file_size_bytes": 6932331 - }, - { - "name": "Gp0127656_SuperFam GFF file", - "description": "SuperFam GFF file for Gp0127656", - "url": "https://data.microbiomedata.org/data/nmdc:mga00hh562/annotation/nmdc_mga00hh562_supfam.gff", - "md5_checksum": "ec56df16785bc67e073128f09366ec43", - "id": "nmdc:ec56df16785bc67e073128f09366ec43", - "file_size_bytes": 39880284 - }, - { - "name": "Gp0127656_Cath FunFam GFF file", - "description": "Cath FunFam GFF file for Gp0127656", - "url": "https://data.microbiomedata.org/data/nmdc:mga00hh562/annotation/nmdc_mga00hh562_cath_funfam.gff", - "md5_checksum": "2831d1ead0af4681b2ae1a9f21733637", - "id": "nmdc:2831d1ead0af4681b2ae1a9f21733637", - "file_size_bytes": 29872897 - }, - { - "name": "Gp0127656_KO_EC GFF file", - "description": "KO_EC GFF file for Gp0127656", - "url": "https://data.microbiomedata.org/data/nmdc:mga00hh562/annotation/nmdc_mga00hh562_ko_ec.gff", - "md5_checksum": "53f225f74011f7d30fcfd5c60b3693ae", - "id": "nmdc:53f225f74011f7d30fcfd5c60b3693ae", - "file_size_bytes": 20564625 - }, - { - "name": "gold:Gp0452679_metabat2 bin checkm quality assessment result", - "description": "metabat2 bin checkm quality assessment result for gold:Gp0452679", - "data_object_type": "CheckM Statistics", - "url": "https://data.microbiomedata.org/data/nmdc:mga0fsjy84/MAGs/nmdc_mga0fsjy84_checkm_qa.out", - "md5_checksum": "d41d8cd98f00b204e9800998ecf8427e", - "id": "nmdc:d41d8cd98f00b204e9800998ecf8427e", - "file_size_bytes": 0 - }, - { - "name": "Gp0127656_tooShort (< 3kb) filtered contigs fasta file by metaBat2", - "description": "tooShort (< 3kb) filtered contigs fasta file by metaBat2 for Gp0127656", - "url": "https://data.microbiomedata.org/data/nmdc:mga00hh562/MAGs/nmdc_mga00hh562_bins.tooShort.fa", - "md5_checksum": "313c88df1890a33d388bdb23c7ad37c3", - "id": "nmdc:313c88df1890a33d388bdb23c7ad37c3", - "file_size_bytes": 69332992 - }, - { - "name": "Gp0127656_unbinned fasta file from metabat2", - "description": "unbinned fasta file from metabat2 for Gp0127656", - "url": "https://data.microbiomedata.org/data/nmdc:mga00hh562/MAGs/nmdc_mga00hh562_bins.unbinned.fa", - "md5_checksum": "ae567f55fe899da83831fda23dcd7a20", - "id": "nmdc:ae567f55fe899da83831fda23dcd7a20", - "file_size_bytes": 9275333 - }, - { - "name": "Gp0127656_metabat2 bin checkm quality assessment result", - "description": "metabat2 bin checkm quality assessment result for Gp0127656", - "data_object_type": "CheckM Statistics", - "url": "https://data.microbiomedata.org/data/nmdc:mga00hh562/MAGs/nmdc_mga00hh562_checkm_qa.out", - "md5_checksum": "5a8dbda6aec0825b4159d5b53481db90", - "id": "nmdc:5a8dbda6aec0825b4159d5b53481db90", - "file_size_bytes": 775 - }, - { - "name": "Gp0127656_high-quality and medium-quality bins", - "description": "high-quality and medium-quality bins for Gp0127656", - "data_object_type": "Metagenome Bins", - "url": "https://data.microbiomedata.org/data/nmdc:mga00hh562/MAGs/nmdc_mga00hh562_hqmq_bin.zip", - "md5_checksum": "060a7f90c5c5123cac41ed946a5482af", - "id": "nmdc:060a7f90c5c5123cac41ed946a5482af", - "file_size_bytes": 182 - }, - { - "name": "Gp0127656_metabat2 bins", - "description": "metabat2 bins for Gp0127656", - "url": "https://data.microbiomedata.org/data/nmdc:mga00hh562/MAGs/nmdc_mga00hh562_metabat_bin.zip", - "md5_checksum": "e9f5d03e8264308ed77da0b63eb738fe", - "id": "nmdc:e9f5d03e8264308ed77da0b63eb738fe", - "file_size_bytes": 101752 - }, - { - "_id": { - "$oid": "649b003d1ae706d7b5b14ee3" - }, - "description": "Assembled scaffold fasta for gold:Gp0127656", - "url": "https://data.microbiomedata.org/data/1781_100633/assembly/assembly_scaffolds.fna", - "file_size_bytes": 77751067, - "type": "nmdc:DataObject", - "id": "nmdc:cea40db59e6f0f57dfb38ed4339225f7", - "name": "assembly_scaffolds.fna", - "data_object_type": "Assembly Scaffolds" - }, - { - "_id": { - "$oid": "649b003d1ae706d7b5b14ee4" - }, - "description": "Metagenome Contig Coverage Stats for gold:Gp0127656", - "url": "https://data.microbiomedata.org/data/1781_100633/assembly/mapping_stats.txt", - "file_size_bytes": 12706402, - "type": "nmdc:DataObject", - "id": "nmdc:b31206dd7fe7d961882d0654ab5aaffa", - "name": "mapping_stats.txt", - "data_object_type": "Assembly Coverage Stats" - }, - { - "_id": { - "$oid": "649b003d1ae706d7b5b14ee5" - }, - "description": "Assembled AGP file for gold:Gp0127656", - "url": "https://data.microbiomedata.org/data/1781_100633/assembly/assembly.agp", - "file_size_bytes": 11151492, - "type": "nmdc:DataObject", - "id": "nmdc:3794d834e9a6e8c1e2acf616a2cc7625", - "name": "assembly.agp", - "data_object_type": "Assembly AGP" - }, - { - "_id": { - "$oid": "649b003d1ae706d7b5b14ee6" - }, - "description": "Assembled contigs fasta for gold:Gp0127656", - "url": "https://data.microbiomedata.org/data/1781_100633/assembly/assembly_contigs.fna", - "file_size_bytes": 78260498, - "type": "nmdc:DataObject", - "id": "nmdc:3dfd278d5e4fc3539b6dfd021acdac76", - "name": "assembly_contigs.fna", - "data_object_type": "Assembly Contigs" - }, - { - "_id": { - "$oid": "649b003d1ae706d7b5b14ee7" - }, - "description": "Metagenome Alignment BAM file for gold:Gp0127656", - "url": "https://data.microbiomedata.org/data/1781_100633/assembly/pairedMapped_sorted.bam", - "file_size_bytes": 2344270684, - "type": "nmdc:DataObject", - "id": "nmdc:6e0e10b90c8b52db8afc73199c3d6028", - "name": "pairedMapped_sorted.bam", - "data_object_type": "Assembly Coverage BAM" - }, - { - "_id": { - "$oid": "649b003d1ae706d7b5b15bf4" - }, - "id": "nmdc:daa88ce1c3c1f25b3b19a8c98c255e7c", - "name": "1781_100633.krona.html", - "description": "Gold:Gp0127656 KRONA plot HTML file", - "url": "https://data.microbiomedata.org/1781_100633/ReadbasedAnalysis/centrifuge/1781_100633.krona.html", - "file_size_bytes": 3385, - "data_object_type": "Centrifuge Krona Plot", - "type": "nmdc:DataObject" - }, - { - "_id": { - "$oid": "649b003d1ae706d7b5b15bfb" - }, - "id": "nmdc:e47f46c4a96d30e9bc65ded042a90033", - "name": "1781_100633.json", - "description": "Gold:Gp0127656 ReadbasedAnalysis result JSON file", - "url": "https://data.microbiomedata.org/1781_100633/ReadbasedAnalysis/1781_100633.json", - "file_size_bytes": 3385, - "type": "nmdc:DataObject" - }, - { - "_id": { - "$oid": "649b003f1ae706d7b5b16649" - }, - "id": "nmdc:0f2602d1171d6e2e1a09f0b41f6ded92", - "name": "bins.unbinned.fa", - "description": "unbinned fasta file from metabat2 for gold:Gp0127656", - "file_size_bytes": 9538263, - "url": "https://data.microbiomedata.org/data/1781_100633/img_MAGs/bins.unbinned.fa", - "type": "nmdc:DataObject" - }, - { - "_id": { - "$oid": "649b003f1ae706d7b5b1664e" - }, - "id": "nmdc:5184f24c83a7a7b9a0aafb8a934234ac", - "name": "bins.tooShort.fa", - "description": "tooShort (< 3kb) filtered contigs fasta file by metaBat2 for gold:Gp0127656", - "file_size_bytes": 67308024, - "url": "https://data.microbiomedata.org/data/1781_100633/img_MAGs/bins.tooShort.fa", - "type": "nmdc:DataObject" - }, - { - "_id": { - "$oid": "649b00401ae706d7b5b16d59" - }, - "description": "Functional annotation GFF file for gold:Gp0127656", - "url": "https://data.microbiomedata.org/1781_100633/img_annotation/Ga0482215_functional_annotation.gff", - "md5_checksum": "00f42710ff9df37cd23e5e73d54e4dd1", - "file_size_bytes": 3385, - "id": "nmdc:00f42710ff9df37cd23e5e73d54e4dd1", - "name": "gold:Gp0127656_Functional annotation GFF file", - "data_object_type": "Functional Annotation GFF", - "type": "nmdc:DataObject" - }, - { - "_id": { - "$oid": "649b00401ae706d7b5b16d5a" - }, - "description": "Protein FAA for gold:Gp0127656", - "url": "https://data.microbiomedata.org/1781_100633/img_annotation/Ga0482215_proteins.faa", - "md5_checksum": "2819bbb349ca5bdbf311aeae6ada532b", - "file_size_bytes": 3385, - "id": "nmdc:2819bbb349ca5bdbf311aeae6ada532b", - "name": "gold:Gp0127656_Protein FAA", - "data_object_type": "Annotation Amino Acid FASTA", - "type": "nmdc:DataObject" - }, - { - "_id": { - "$oid": "649b00401ae706d7b5b16d5d" - }, - "description": "Structural annotation GFF file for gold:Gp0127656", - "url": "https://data.microbiomedata.org/1781_100633/img_annotation/Ga0482215_structural_annotation.gff", - "md5_checksum": "0c2ae5a86d4840a0b324d73977170f1e", - "file_size_bytes": 3385, - "id": "nmdc:0c2ae5a86d4840a0b324d73977170f1e", - "name": "gold:Gp0127656_Structural annotation GFF file", - "data_object_type": "Structural Annotation GFF", - "type": "nmdc:DataObject" - }, - { - "_id": { - "$oid": "649b00401ae706d7b5b16d5f" - }, - "description": "EC TSV File for gold:Gp0127656", - "url": "https://data.microbiomedata.org/1781_100633/img_annotation/Ga0482215_ec.tsv", - "md5_checksum": "33e0f5ff7c448ded210f04798894a031", - "file_size_bytes": 3385, - "id": "nmdc:33e0f5ff7c448ded210f04798894a031", - "name": "gold:Gp0127656_EC TSV File", - "data_object_type": "Annotation Enzyme Commission", - "type": "nmdc:DataObject" - }, - { - "_id": { - "$oid": "649b00401ae706d7b5b16d60" - }, - "description": "KO TSV File for gold:Gp0127656", - "url": "https://data.microbiomedata.org/1781_100633/img_annotation/Ga0482215_ko.tsv", - "md5_checksum": "8d230dd7948d2b08c4de1adc0d0002b8", - "file_size_bytes": 3385, - "id": "nmdc:8d230dd7948d2b08c4de1adc0d0002b8", - "name": "gold:Gp0127656_KO TSV File", - "data_object_type": "Annotation KEGG Orthology", - "type": "nmdc:DataObject" - } - ], - "dissolving_activity_set": [], - "functional_annotation_set": [], - "genome_feature_set": [], - "mags_activity_set": [ - { - "_id": { - "$oid": "649b0052ec087f6bbab34726" - }, - "has_input": [ - "nmdc:8106808f8e245ef9a46a4e31561eba7f", - "nmdc:941f749a92155321c5ce7e5aa32d3b55", - "nmdc:8e8be343bbb1ba11f3e15867b419d05d" - ], - "too_short_contig_num": 163283, - "part_of": [ - "nmdc:mga00hh562" - ], - "binned_contig_num": 83, - "git_url": "https://github.com/microbiomedata/metaG/releases/tag/0.1", - "has_output": [ - "nmdc:d41d8cd98f00b204e9800998ecf8427e", - "nmdc:313c88df1890a33d388bdb23c7ad37c3", - "nmdc:ae567f55fe899da83831fda23dcd7a20", - "nmdc:5a8dbda6aec0825b4159d5b53481db90", - "nmdc:060a7f90c5c5123cac41ed946a5482af", - "nmdc:e9f5d03e8264308ed77da0b63eb738fe" - ], - "was_informed_by": "gold:Gp0127656", - "input_contig_num": 169495, - "id": "nmdc:8e2d8da1d05b292a52a33732a6bc4391", - "execution_resource": "NERSC-Cori", - "name": "MAGs Analysis Activity for nmdc:mga00hh562", - "mags_list": [ - { - "number_of_contig": 83, - "completeness": 14.35, - "bin_name": "bins.1", - "gene_count": 388, - "bin_quality": "LQ", - "gtdbtk_species": "", - "gtdbtk_order": "", - "num_16s": 0, - "gtdbtk_family": "", - "gtdbtk_domain": "", - "contamination": 0.0, - "gtdbtk_class": "", - "gtdbtk_phylum": "", - "num_5s": 0, - "num_23s": 0, - "gtdbtk_genus": "", - "num_t_rna": 5 - } - ], - "unbinned_contig_num": 6129, - "started_at_time": "2021-10-11T02:23:35Z", - "type": "nmdc:MAGsAnalysisActivity", - "ended_at_time": "2021-10-11T03:58:56+00:00" - } - ], - "material_sample_set": [], - "material_sampling_activity_set": [], - "metabolomics_analysis_activity_set": [], - "metagenome_annotation_activity_set": [ - { - "_id": { - "$oid": "649b005bbf2caae0415ef9c6" - }, - "has_input": [ - "nmdc:8106808f8e245ef9a46a4e31561eba7f" - ], - "part_of": [ - "nmdc:mga00hh562" - ], - "git_url": "https://github.com/microbiomedata/metaG/releases/tag/0.1", - "has_output": [ - "nmdc:18f68cc8acda8d33d5fd6f21a9166aa8", - "nmdc:87d5f3a505d23c1aa2deea960702d55b", - "nmdc:8e8be343bbb1ba11f3e15867b419d05d", - "nmdc:91c2485c0ebf683aed3e7935ec60b7d1", - "nmdc:fb6740e86534daeea41ab6d5cf9d91d2", - "nmdc:19da9b3f211164643f276bc74604c9b0", - "nmdc:19905547dfa37274a9f91c9caaf6bacc", - "nmdc:30c2b0722d225938975243ab1041ed12", - "nmdc:623e913fa98f88f6037754daf5d9ffc5", - "nmdc:ec56df16785bc67e073128f09366ec43", - "nmdc:2831d1ead0af4681b2ae1a9f21733637", - "nmdc:53f225f74011f7d30fcfd5c60b3693ae" - ], - "was_informed_by": "gold:Gp0127656", - "id": "nmdc:8e2d8da1d05b292a52a33732a6bc4391", - "execution_resource": "NERSC-Cori", - "name": "Annotation Activity for nmdc:mga00hh562", - "started_at_time": "2021-10-11T02:23:35Z", - "type": "nmdc:MetagenomeAnnotationActivity", - "ended_at_time": "2021-10-11T03:58:56+00:00" - } - ], - "metagenome_assembly_set": [ - { - "_id": { - "$oid": "649b005f2ca5ee4adb139fb2" - }, - "has_input": [ - "nmdc:cec95659bb04ae095f51821ddaa9fa59" - ], - "part_of": [ - "nmdc:mga00hh562" - ], - "ctg_logsum": 98556, - "scaf_logsum": 99077, - "gap_pct": 0.00105, - "git_url": "https://github.com/microbiomedata/metaG/releases/tag/0.1", - "has_output": [ - "nmdc:8106808f8e245ef9a46a4e31561eba7f", - "nmdc:55385159fa8361d7ff747cdc1155512b", - "nmdc:4741908a5b07eaa2312ff3e6d2d991aa", - "nmdc:172e5cf3b5c5bf8e4896058dad3e814a", - "nmdc:941f749a92155321c5ce7e5aa32d3b55" - ], - "asm_score": 2.914, - "was_informed_by": "gold:Gp0127656", - "ctg_powsum": 10453, - "scaf_max": 9079, - "id": "nmdc:8e2d8da1d05b292a52a33732a6bc4391", - "scaf_powsum": 10508, - "execution_resource": "NERSC-Cori", - "contigs": 169495, - "name": "Assembly Activity for nmdc:mga00hh562", - "ctg_max": 9079, - "gc_std": 0.09653, - "contig_bp": 72511508, - "gc_avg": 0.62989, - "started_at_time": "2021-10-11T02:23:35Z", - "scaf_bp": 72512268, - "type": "nmdc:MetagenomeAssembly", - "scaffolds": 169419, - "ended_at_time": "2021-10-11T03:58:56+00:00", - "ctg_l50": 399, - "ctg_l90": 286, - "ctg_n50": 54638, - "ctg_n90": 144448, - "scaf_l50": 399, - "scaf_l90": 286, - "scaf_n50": 54616, - "scaf_n90": 144376 - } - ], - "metagenome_sequencing_activity_set": [], - "metaproteomics_analysis_activity_set": [], - "metatranscriptome_activity_set": [], - "nom_analysis_activity_set": [], - "omics_processing_set": [ - { - "_id": { - "$oid": "649b009773e8249959349b61" - }, - "id": "nmdc:omprc-11-qrsway30", - "name": "Riverbed sediment microbial communities from areas with vegetation nearby in Columbia River, Washington, USA - GW-RW S2_30_40", - "description": "Riverbed sediment samples were collected from an area with a lot of surrounding vegetation in a hyporheic zone (subsurface groundwater - surface water mixing zone)", - "has_input": [ - "nmdc:bsm-11-mmr87q87" - ], - "has_output": [ - "jgi:574fe0b47ded5e3df1ee1496" - ], - "part_of": [ - "nmdc:sty-11-aygzgv51" - ], - "add_date": "2016-01-11", - "mod_date": "2021-06-18", - "ncbi_project_name": "Riverbed sediment microbial communities from areas with vegetation nearby in Columbia River, Washington, USA - GW-RW S2_30_40", - "omics_type": { - "has_raw_value": "Metagenome" - }, - "principal_investigator": { - "has_raw_value": "James Stegen" - }, - "processing_institution": "JGI", - "type": "nmdc:OmicsProcessing", - "gold_sequencing_project_identifiers": [ - "gold:Gp0127656" - ] - } - ], - "reaction_activity_set": [], - "read_qc_analysis_activity_set": [ - { - "_id": { - "$oid": "649b009d6bdd4fd20273c87c" - }, - "has_input": [ - "nmdc:a604c87c632165bb5223eebda60801d0" - ], - "part_of": [ - "nmdc:mga00hh562" - ], - "git_url": "https://github.com/microbiomedata/metaG/releases/tag/0.1", - "has_output": [ - "nmdc:cec95659bb04ae095f51821ddaa9fa59", - "nmdc:7b4f365bbe942a523890abf13d1b6436" - ], - "was_informed_by": "gold:Gp0127656", - "input_read_count": 27317020, - "output_read_bases": 3960490395, - "id": "nmdc:8e2d8da1d05b292a52a33732a6bc4391", - "execution_resource": "NERSC-Cori", - "input_read_bases": 4124870020, - "name": "Read QC Activity for nmdc:mga00hh562", - "output_read_count": 26481746, - "started_at_time": "2021-10-11T02:23:35Z", - "type": "nmdc:ReadQCAnalysisActivity", - "ended_at_time": "2021-10-11T03:58:56+00:00" - } - ], - "read_based_taxonomy_analysis_activity_set": [ - { - "_id": { - "$oid": "649b009bff710ae353f8cf3e" - }, - "has_input": [ - "nmdc:cec95659bb04ae095f51821ddaa9fa59" - ], - "git_url": "https://github.com/microbiomedata/metaG/releases/tag/0.1", - "has_output": [ - "nmdc:ccbe419157d8286626330fd0eb0dd0e0", - "nmdc:92ab65cdaca3367552e03d895123e04f", - "nmdc:0b3ff6503723d6ea9b84552f68ed4270", - "nmdc:8e5ad12b7fa8873463088d7bf361f7c5", - "nmdc:a3255df52cd6150f03bbf7cbd655ec76", - "nmdc:a25a5d7e399624e5e5735b65a9dd322a", - "nmdc:dd953aebfd5cf624a5ffa8c6d6b64b08", - "nmdc:96f47f6cd2350fb1c7c7b746d2e9d811", - "nmdc:ae369194e4b24e137fc23da0412277a6" - ], - "was_informed_by": "gold:Gp0127656", - "id": "nmdc:8e2d8da1d05b292a52a33732a6bc4391", - "execution_resource": "NERSC-Cori", - "name": "ReadBased Analysis Activity for nmdc:mga00hh562", - "started_at_time": "2021-10-11T02:23:35Z", - "type": "nmdc:ReadbasedAnalysis", - "ended_at_time": "2021-10-11T03:58:56+00:00" - } - ], - "study_set": [], - "field_research_site_set": [], - "collecting_biosamples_from_site_set": [], - "date_created": null, - "etl_software_version": null, - "pooling_set": [] - }, - { - "functional_annotation_agg": [], - "library_preparation_set": [], - "processed_sample_set": [], - "extraction_set": [], - "activity_set": [], - "biosample_set": [], - "data_object_set": [ - { - "description": "Raw sequencer read data", - "file_size_bytes": 2103957707, - "type": "nmdc:DataObject", - "id": "jgi:574fde907ded5e3df1ee1426", - "name": "10533.2.165322.GTGAGCT-AAGCTCA.fastq.gz" - }, - { - "name": "Gp0127651_Filtered Reads", - "description": "Filtered Reads for Gp0127651", - "data_object_type": "Filtered Sequencing Reads", - "url": "https://data.microbiomedata.org/data/nmdc:mga08hnt47/qa/nmdc_mga08hnt47_filtered.fastq.gz", - "md5_checksum": "2791a196017767af3b5b21a3029799c0", - "id": "nmdc:2791a196017767af3b5b21a3029799c0", - "file_size_bytes": 1856919615 - }, - { - "name": "Gp0127651_Filtered Stats", - "description": "Filtered Stats for Gp0127651", - "data_object_type": "QC Statistics", - "url": "https://data.microbiomedata.org/data/nmdc:mga08hnt47/qa/nmdc_mga08hnt47_filterStats.txt", - "md5_checksum": "92cb49efbff5d5977e00dbad1c4d0d9f", - "id": "nmdc:92cb49efbff5d5977e00dbad1c4d0d9f", - "file_size_bytes": 283 - }, - { - "name": "Gp0127651_Gottcha2 TSV report", - "description": "Gottcha2 TSV report for Gp0127651", - "url": "https://data.microbiomedata.org/data/nmdc:mga08hnt47/ReadbasedAnalysis/nmdc_mga08hnt47_gottcha2_report.tsv", - "md5_checksum": "53ee263960c39126e039656a121deb96", - "id": "nmdc:53ee263960c39126e039656a121deb96", - "file_size_bytes": 1199 - }, - { - "name": "Gp0127651_Gottcha2 full TSV report", - "description": "Gottcha2 full TSV report for Gp0127651", - "url": "https://data.microbiomedata.org/data/nmdc:mga08hnt47/ReadbasedAnalysis/nmdc_mga08hnt47_gottcha2_report_full.tsv", - "md5_checksum": "2781b9269b8e24f49a1a301d44d0e3d5", - "id": "nmdc:2781b9269b8e24f49a1a301d44d0e3d5", - "file_size_bytes": 703299 - }, - { - "name": "Gp0127651_Gottcha2 Krona HTML report", - "description": "Gottcha2 Krona HTML report for Gp0127651", - "data_object_type": "GOTTCHA2 Krona Plot", - "url": "https://data.microbiomedata.org/data/nmdc:mga08hnt47/ReadbasedAnalysis/nmdc_mga08hnt47_gottcha2_krona.html", - "md5_checksum": "0ed808b8ce29d39c3b555e7d5bf4c274", - "id": "nmdc:0ed808b8ce29d39c3b555e7d5bf4c274", - "file_size_bytes": 229311 - }, - { - "name": "Gp0127651_Centrifuge classification TSV report", - "description": "Centrifuge classification TSV report for Gp0127651", - "data_object_type": "Centrifuge Taxonomic Classification", - "url": "https://data.microbiomedata.org/data/nmdc:mga08hnt47/ReadbasedAnalysis/nmdc_mga08hnt47_centrifuge_classification.tsv", - "md5_checksum": "a7d8f038b87bd28843e30c5dd115704b", - "id": "nmdc:a7d8f038b87bd28843e30c5dd115704b", - "file_size_bytes": 1642196063 - }, - { - "name": "Gp0127651_Centrifuge TSV report", - "description": "Centrifuge TSV report for Gp0127651", - "data_object_type": "Centrifuge Classification Report", - "url": "https://data.microbiomedata.org/data/nmdc:mga08hnt47/ReadbasedAnalysis/nmdc_mga08hnt47_centrifuge_report.tsv", - "md5_checksum": "b4cbc81c986c67c1037c8b7280924683", - "id": "nmdc:b4cbc81c986c67c1037c8b7280924683", - "file_size_bytes": 254418 - }, - { - "name": "Gp0127651_Centrifuge Krona HTML report", - "description": "Centrifuge Krona HTML report for Gp0127651", - "data_object_type": "Centrifuge Krona Plot", - "url": "https://data.microbiomedata.org/data/nmdc:mga08hnt47/ReadbasedAnalysis/nmdc_mga08hnt47_centrifuge_krona.html", - "md5_checksum": "e0c61a191258597984a05d86eaf4d71f", - "id": "nmdc:e0c61a191258597984a05d86eaf4d71f", - "file_size_bytes": 2333132 - }, - { - "name": "Gp0127651_Kraken classification TSV report", - "description": "Kraken classification TSV report for Gp0127651", - "data_object_type": "Kraken2 Taxonomic Classification", - "url": "https://data.microbiomedata.org/data/nmdc:mga08hnt47/ReadbasedAnalysis/nmdc_mga08hnt47_kraken2_classification.tsv", - "md5_checksum": "e1cbcfa86444a4ff4e992bcb6653d18f", - "id": "nmdc:e1cbcfa86444a4ff4e992bcb6653d18f", - "file_size_bytes": 1309125719 - }, - { - "name": "Gp0127651_Kraken2 TSV report", - "description": "Kraken2 TSV report for Gp0127651", - "data_object_type": "Kraken2 Classification Report", - "url": "https://data.microbiomedata.org/data/nmdc:mga08hnt47/ReadbasedAnalysis/nmdc_mga08hnt47_kraken2_report.tsv", - "md5_checksum": "d2e10038a40e81e81ba94f75ed1ec52c", - "id": "nmdc:d2e10038a40e81e81ba94f75ed1ec52c", - "file_size_bytes": 639737 - }, - { - "name": "Gp0127651_Kraken2 Krona HTML report", - "description": "Kraken2 Krona HTML report for Gp0127651", - "data_object_type": "Kraken2 Krona Plot", - "url": "https://data.microbiomedata.org/data/nmdc:mga08hnt47/ReadbasedAnalysis/nmdc_mga08hnt47_kraken2_krona.html", - "md5_checksum": "ddba84cd45462d3a55df4ac62bb4eeb8", - "id": "nmdc:ddba84cd45462d3a55df4ac62bb4eeb8", - "file_size_bytes": 3988966 - }, - { - "name": "Gp0127651_Assembled contigs fasta", - "description": "Assembled contigs fasta for Gp0127651", - "data_object_type": "Assembly Contigs", - "url": "https://data.microbiomedata.org/data/nmdc:mga08hnt47/assembly/nmdc_mga08hnt47_contigs.fna", - "md5_checksum": "8483663a943ff4c0fc0249353676bfc1", - "id": "nmdc:8483663a943ff4c0fc0249353676bfc1", - "file_size_bytes": 95957530 - }, - { - "name": "Gp0127651_Assembled scaffold fasta", - "description": "Assembled scaffold fasta for Gp0127651", - "data_object_type": "Assembly Scaffolds", - "url": "https://data.microbiomedata.org/data/nmdc:mga08hnt47/assembly/nmdc_mga08hnt47_scaffolds.fna", - "md5_checksum": "ccca920c56ad3d050e2d8801bcbe4855", - "id": "nmdc:ccca920c56ad3d050e2d8801bcbe4855", - "file_size_bytes": 95414704 - }, - { - "name": "Gp0127651_Metagenome Contig Coverage Stats", - "description": "Metagenome Contig Coverage Stats for Gp0127651", - "url": "https://data.microbiomedata.org/data/nmdc:mga08hnt47/assembly/nmdc_mga08hnt47_covstats.txt", - "md5_checksum": "f21e374c1c31c02bd0e41228cc7895c3", - "id": "nmdc:f21e374c1c31c02bd0e41228cc7895c3", - "file_size_bytes": 14289388 - }, - { - "name": "Gp0127651_Assembled AGP file", - "description": "Assembled AGP file for Gp0127651", - "data_object_type": "Assembly AGP", - "url": "https://data.microbiomedata.org/data/nmdc:mga08hnt47/assembly/nmdc_mga08hnt47_assembly.agp", - "md5_checksum": "f43ae7935184d10ba65961171efcac34", - "id": "nmdc:f43ae7935184d10ba65961171efcac34", - "file_size_bytes": 13343603 - }, - { - "name": "Gp0127651_Metagenome Alignment BAM file", - "description": "Metagenome Alignment BAM file for Gp0127651", - "data_object_type": "Assembly Coverage BAM", - "url": "https://data.microbiomedata.org/data/nmdc:mga08hnt47/assembly/nmdc_mga08hnt47_pairedMapped_sorted.bam", - "md5_checksum": "838162ead3f121f5bc02bc1234a32a55", - "id": "nmdc:838162ead3f121f5bc02bc1234a32a55", - "file_size_bytes": 2037589818 - }, - { - "name": "Gp0127651_Protein FAA", - "description": "Protein FAA for Gp0127651", - "data_object_type": "Annotation Amino Acid FASTA", - "url": "https://data.microbiomedata.org/data/nmdc:mga08hnt47/annotation/nmdc_mga08hnt47_proteins.faa", - "md5_checksum": "d8dc4f31293c549b12bbcab915d708cc", - "id": "nmdc:d8dc4f31293c549b12bbcab915d708cc", - "file_size_bytes": 54370216 - }, - { - "name": "Gp0127651_Structural annotation GFF file", - "description": "Structural annotation GFF file for Gp0127651", - "data_object_type": "Structural Annotation GFF", - "url": "https://data.microbiomedata.org/data/nmdc:mga08hnt47/annotation/nmdc_mga08hnt47_structural_annotation.gff", - "md5_checksum": "415256907dcafaa68778a2ba358d9ac5", - "id": "nmdc:415256907dcafaa68778a2ba358d9ac5", - "file_size_bytes": 2517 - }, - { - "name": "Gp0127651_Functional annotation GFF file", - "description": "Functional annotation GFF file for Gp0127651", - "data_object_type": "Functional Annotation GFF", - "url": "https://data.microbiomedata.org/data/nmdc:mga08hnt47/annotation/nmdc_mga08hnt47_functional_annotation.gff", - "md5_checksum": "f0c60a537e6867bf62fde15577669453", - "id": "nmdc:f0c60a537e6867bf62fde15577669453", - "file_size_bytes": 61364019 - }, - { - "name": "Gp0127651_KO TSV file", - "description": "KO TSV file for Gp0127651", - "data_object_type": "Annotation KEGG Orthology", - "url": "https://data.microbiomedata.org/data/nmdc:mga08hnt47/annotation/nmdc_mga08hnt47_ko.tsv", - "md5_checksum": "e0f16b60c50581799b7ecb254e61e537", - "id": "nmdc:e0f16b60c50581799b7ecb254e61e537", - "file_size_bytes": 6908291 - }, - { - "name": "Gp0127651_EC TSV file", - "description": "EC TSV file for Gp0127651", - "data_object_type": "Annotation Enzyme Commission", - "url": "https://data.microbiomedata.org/data/nmdc:mga08hnt47/annotation/nmdc_mga08hnt47_ec.tsv", - "md5_checksum": "6eb21304f0762bd8c11b98826d310321", - "id": "nmdc:6eb21304f0762bd8c11b98826d310321", - "file_size_bytes": 4650091 - }, - { - "name": "Gp0127651_COG GFF file", - "description": "COG GFF file for Gp0127651", - "url": "https://data.microbiomedata.org/data/nmdc:mga08hnt47/annotation/nmdc_mga08hnt47_cog.gff", - "md5_checksum": "4ea7982c99cbb6d8ccc9fd949bee09ec", - "id": "nmdc:4ea7982c99cbb6d8ccc9fd949bee09ec", - "file_size_bytes": 36137856 - }, - { - "name": "Gp0127651_PFAM GFF file", - "description": "PFAM GFF file for Gp0127651", - "url": "https://data.microbiomedata.org/data/nmdc:mga08hnt47/annotation/nmdc_mga08hnt47_pfam.gff", - "md5_checksum": "f389dc8a93de9f21322db385b2788f5f", - "id": "nmdc:f389dc8a93de9f21322db385b2788f5f", - "file_size_bytes": 27173740 - }, - { - "name": "Gp0127651_TigrFam GFF file", - "description": "TigrFam GFF file for Gp0127651", - "url": "https://data.microbiomedata.org/data/nmdc:mga08hnt47/annotation/nmdc_mga08hnt47_tigrfam.gff", - "md5_checksum": "8e6659ce96dfa72ceefda39c74fb1dce", - "id": "nmdc:8e6659ce96dfa72ceefda39c74fb1dce", - "file_size_bytes": 2943355 - }, - { - "name": "Gp0127651_SMART GFF file", - "description": "SMART GFF file for Gp0127651", - "url": "https://data.microbiomedata.org/data/nmdc:mga08hnt47/annotation/nmdc_mga08hnt47_smart.gff", - "md5_checksum": "89bc9cf9183fed6700cde44fad41b830", - "id": "nmdc:89bc9cf9183fed6700cde44fad41b830", - "file_size_bytes": 7927726 - }, - { - "name": "Gp0127651_SuperFam GFF file", - "description": "SuperFam GFF file for Gp0127651", - "url": "https://data.microbiomedata.org/data/nmdc:mga08hnt47/annotation/nmdc_mga08hnt47_supfam.gff", - "md5_checksum": "84aae368e77c1d07c6b6e8deecbc3f3b", - "id": "nmdc:84aae368e77c1d07c6b6e8deecbc3f3b", - "file_size_bytes": 45499652 - }, - { - "name": "Gp0127651_Cath FunFam GFF file", - "description": "Cath FunFam GFF file for Gp0127651", - "url": "https://data.microbiomedata.org/data/nmdc:mga08hnt47/annotation/nmdc_mga08hnt47_cath_funfam.gff", - "md5_checksum": "ee5612e5ee82ec2d57029d1bc4e1756f", - "id": "nmdc:ee5612e5ee82ec2d57029d1bc4e1756f", - "file_size_bytes": 34280847 - }, - { - "name": "Gp0127651_KO_EC GFF file", - "description": "KO_EC GFF file for Gp0127651", - "url": "https://data.microbiomedata.org/data/nmdc:mga08hnt47/annotation/nmdc_mga08hnt47_ko_ec.gff", - "md5_checksum": "68c06be8d27d1697b4a6955537b318c8", - "id": "nmdc:68c06be8d27d1697b4a6955537b318c8", - "file_size_bytes": 21943549 - }, - { - "name": "gold:Gp0452679_metabat2 bin checkm quality assessment result", - "description": "metabat2 bin checkm quality assessment result for gold:Gp0452679", - "data_object_type": "CheckM Statistics", - "url": "https://data.microbiomedata.org/data/nmdc:mga0fsjy84/MAGs/nmdc_mga0fsjy84_checkm_qa.out", - "md5_checksum": "d41d8cd98f00b204e9800998ecf8427e", - "id": "nmdc:d41d8cd98f00b204e9800998ecf8427e", - "file_size_bytes": 0 - }, - { - "name": "Gp0127651_tooShort (< 3kb) filtered contigs fasta file by metaBat2", - "description": "tooShort (< 3kb) filtered contigs fasta file by metaBat2 for Gp0127651", - "url": "https://data.microbiomedata.org/data/nmdc:mga08hnt47/MAGs/nmdc_mga08hnt47_bins.tooShort.fa", - "md5_checksum": "6f012bfca6cb653f92eaf927003de0fa", - "id": "nmdc:6f012bfca6cb653f92eaf927003de0fa", - "file_size_bytes": 77381118 - }, - { - "name": "Gp0127651_unbinned fasta file from metabat2", - "description": "unbinned fasta file from metabat2 for Gp0127651", - "url": "https://data.microbiomedata.org/data/nmdc:mga08hnt47/MAGs/nmdc_mga08hnt47_bins.unbinned.fa", - "md5_checksum": "298e0a0c98ebe4fb673da7de9fcb03a2", - "id": "nmdc:298e0a0c98ebe4fb673da7de9fcb03a2", - "file_size_bytes": 17278743 - }, - { - "name": "Gp0127651_metabat2 bin checkm quality assessment result", - "description": "metabat2 bin checkm quality assessment result for Gp0127651", - "data_object_type": "CheckM Statistics", - "url": "https://data.microbiomedata.org/data/nmdc:mga08hnt47/MAGs/nmdc_mga08hnt47_checkm_qa.out", - "md5_checksum": "66fd77d80cc9257da98c5bce4cb30626", - "id": "nmdc:66fd77d80cc9257da98c5bce4cb30626", - "file_size_bytes": 760 - }, - { - "name": "Gp0127651_high-quality and medium-quality bins", - "description": "high-quality and medium-quality bins for Gp0127651", - "data_object_type": "Metagenome Bins", - "url": "https://data.microbiomedata.org/data/nmdc:mga08hnt47/MAGs/nmdc_mga08hnt47_hqmq_bin.zip", - "md5_checksum": "06caec963e007225d1d9411078829100", - "id": "nmdc:06caec963e007225d1d9411078829100", - "file_size_bytes": 182 - }, - { - "name": "Gp0127651_metabat2 bins", - "description": "metabat2 bins for Gp0127651", - "url": "https://data.microbiomedata.org/data/nmdc:mga08hnt47/MAGs/nmdc_mga08hnt47_metabat_bin.zip", - "md5_checksum": "eb5216cc4e09d88c4c59a76c4808a693", - "id": "nmdc:eb5216cc4e09d88c4c59a76c4808a693", - "file_size_bytes": 397044 - }, - { - "_id": { - "$oid": "649b003d1ae706d7b5b14ec9" - }, - "description": "Assembled contigs fasta for gold:Gp0127651", - "url": "https://data.microbiomedata.org/data/1781_100353/assembly/assembly_contigs.fna", - "file_size_bytes": 95235782, - "type": "nmdc:DataObject", - "id": "nmdc:49c49b255b8db84f4b79e0ad5a963c82", - "name": "assembly_contigs.fna", - "data_object_type": "Assembly Contigs" - }, - { - "_id": { - "$oid": "649b003d1ae706d7b5b14eca" - }, - "description": "Assembled scaffold fasta for gold:Gp0127651", - "url": "https://data.microbiomedata.org/data/1781_100353/assembly/assembly_scaffolds.fna", - "file_size_bytes": 94693464, - "type": "nmdc:DataObject", - "id": "nmdc:6b1d7af20d7a316f3b13f1707ce7c518", - "name": "assembly_scaffolds.fna", - "data_object_type": "Assembly Scaffolds" - }, - { - "_id": { - "$oid": "649b003d1ae706d7b5b14ecb" - }, - "description": "Assembled AGP file for gold:Gp0127651", - "url": "https://data.microbiomedata.org/data/1781_100353/assembly/assembly.agp", - "file_size_bytes": 11899059, - "type": "nmdc:DataObject", - "id": "nmdc:36f080b0d13effe19b1f18dfc041a341", - "name": "assembly.agp", - "data_object_type": "Assembly AGP" - }, - { - "_id": { - "$oid": "649b003d1ae706d7b5b14ecd" - }, - "description": "Metagenome Contig Coverage Stats for gold:Gp0127651", - "url": "https://data.microbiomedata.org/data/1781_100353/assembly/mapping_stats.txt", - "file_size_bytes": 13567640, - "type": "nmdc:DataObject", - "id": "nmdc:e1e806d81cc6cd9f22702e75849f5e31", - "name": "mapping_stats.txt", - "data_object_type": "Assembly Coverage Stats" - }, - { - "_id": { - "$oid": "649b003d1ae706d7b5b14ed6" - }, - "description": "Metagenome Alignment BAM file for gold:Gp0127651", - "url": "https://data.microbiomedata.org/data/1781_100353/assembly/pairedMapped_sorted.bam", - "file_size_bytes": 2010414032, - "type": "nmdc:DataObject", - "id": "nmdc:b041d1ee91abbe2d6ade41bc46c67ab9", - "name": "pairedMapped_sorted.bam", - "data_object_type": "Assembly Coverage BAM" - }, - { - "_id": { - "$oid": "649b003d1ae706d7b5b15bd4" - }, - "id": "nmdc:4ee6b6c602c6f2c054154f48da58b304", - "name": "1781_100353.krona.html", - "description": "Gold:Gp0127651 KRONA plot HTML file", - "url": "https://data.microbiomedata.org/1781_100353/ReadbasedAnalysis/centrifuge/1781_100353.krona.html", - "file_size_bytes": 3385, - "data_object_type": "Centrifuge Krona Plot", - "type": "nmdc:DataObject" - }, - { - "_id": { - "$oid": "649b003d1ae706d7b5b15bd9" - }, - "id": "nmdc:fba8766b1f1e3e5375ac56ecde508e96", - "name": "1781_100353.json", - "description": "Gold:Gp0127651 ReadbasedAnalysis result JSON file", - "url": "https://data.microbiomedata.org/1781_100353/ReadbasedAnalysis/1781_100353.json", - "file_size_bytes": 3385, - "type": "nmdc:DataObject" - }, - { - "_id": { - "$oid": "649b003f1ae706d7b5b16638" - }, - "id": "nmdc:2574b731c1f785d106e9033639833750", - "name": "bins.tooShort.fa", - "description": "tooShort (< 3kb) filtered contigs fasta file by metaBat2 for gold:Gp0127651", - "file_size_bytes": 75274019, - "url": "https://data.microbiomedata.org/data/1781_100353/img_MAGs/bins.tooShort.fa", - "type": "nmdc:DataObject" - }, - { - "_id": { - "$oid": "649b003f1ae706d7b5b16644" - }, - "id": "nmdc:3fc9dca08829f51d49a574f916099e20", - "name": "bins.unbinned.fa", - "description": "unbinned fasta file from metabat2 for gold:Gp0127651", - "file_size_bytes": 18449153, - "url": "https://data.microbiomedata.org/data/1781_100353/img_MAGs/bins.unbinned.fa", - "type": "nmdc:DataObject" - }, - { - "_id": { - "$oid": "649b00401ae706d7b5b16d44" - }, - "description": "EC TSV File for gold:Gp0127651", - "url": "https://data.microbiomedata.org/1781_100353/img_annotation/Ga0482220_ec.tsv", - "md5_checksum": "2a7c5ba82dff4dd5d996ad5bc824103c", - "file_size_bytes": 3385, - "id": "nmdc:2a7c5ba82dff4dd5d996ad5bc824103c", - "name": "gold:Gp0127651_EC TSV File", - "data_object_type": "Annotation Enzyme Commission", - "type": "nmdc:DataObject" - }, - { - "_id": { - "$oid": "649b00401ae706d7b5b16d45" - }, - "description": "KO TSV File for gold:Gp0127651", - "url": "https://data.microbiomedata.org/1781_100353/img_annotation/Ga0482220_ko.tsv", - "md5_checksum": "84dc1abc2d39254da6c3d2cd6cff6d9d", - "file_size_bytes": 3385, - "id": "nmdc:84dc1abc2d39254da6c3d2cd6cff6d9d", - "name": "gold:Gp0127651_KO TSV File", - "data_object_type": "Annotation KEGG Orthology", - "type": "nmdc:DataObject" - }, - { - "_id": { - "$oid": "649b00401ae706d7b5b16d46" - }, - "description": "Protein FAA for gold:Gp0127651", - "url": "https://data.microbiomedata.org/1781_100353/img_annotation/Ga0482220_proteins.faa", - "md5_checksum": "67dfacdfc27cb6b0ec4787e1a40d9547", - "file_size_bytes": 3385, - "id": "nmdc:67dfacdfc27cb6b0ec4787e1a40d9547", - "name": "gold:Gp0127651_Protein FAA", - "data_object_type": "Annotation Amino Acid FASTA", - "type": "nmdc:DataObject" - }, - { - "_id": { - "$oid": "649b00401ae706d7b5b16d48" - }, - "description": "Structural annotation GFF file for gold:Gp0127651", - "url": "https://data.microbiomedata.org/1781_100353/img_annotation/Ga0482220_structural_annotation.gff", - "md5_checksum": "714fb73a8b3011d0b2faea98eda477c3", - "file_size_bytes": 3385, - "id": "nmdc:714fb73a8b3011d0b2faea98eda477c3", - "name": "gold:Gp0127651_Structural annotation GFF file", - "data_object_type": "Structural Annotation GFF", - "type": "nmdc:DataObject" - }, - { - "_id": { - "$oid": "649b00401ae706d7b5b16d4c" - }, - "description": "Functional annotation GFF file for gold:Gp0127651", - "url": "https://data.microbiomedata.org/1781_100353/img_annotation/Ga0482220_functional_annotation.gff", - "md5_checksum": "e25cb289f398c007806c72c080724872", - "file_size_bytes": 3385, - "id": "nmdc:e25cb289f398c007806c72c080724872", - "name": "gold:Gp0127651_Functional annotation GFF file", - "data_object_type": "Functional Annotation GFF", - "type": "nmdc:DataObject" - } - ], - "dissolving_activity_set": [], - "functional_annotation_set": [], - "genome_feature_set": [], - "mags_activity_set": [ - { - "_id": { - "$oid": "649b0052ec087f6bbab34721" - }, - "has_input": [ - "nmdc:8483663a943ff4c0fc0249353676bfc1", - "nmdc:838162ead3f121f5bc02bc1234a32a55", - "nmdc:f0c60a537e6867bf62fde15577669453" - ], - "too_short_contig_num": 168908, - "part_of": [ - "nmdc:mga08hnt47" - ], - "binned_contig_num": 216, - "git_url": "https://github.com/microbiomedata/metaG/releases/tag/0.1", - "has_output": [ - "nmdc:d41d8cd98f00b204e9800998ecf8427e", - "nmdc:6f012bfca6cb653f92eaf927003de0fa", - "nmdc:298e0a0c98ebe4fb673da7de9fcb03a2", - "nmdc:66fd77d80cc9257da98c5bce4cb30626", - "nmdc:06caec963e007225d1d9411078829100", - "nmdc:eb5216cc4e09d88c4c59a76c4808a693" - ], - "was_informed_by": "gold:Gp0127651", - "input_contig_num": 180437, - "id": "nmdc:158a1d28cb7b14fdffb0f092a644b0f6", - "execution_resource": "NERSC-Cori", - "name": "MAGs Analysis Activity for nmdc:mga08hnt47", - "mags_list": [ - { - "number_of_contig": 216, - "completeness": 36.79, - "bin_name": "bins.1", - "gene_count": 1612, - "bin_quality": "LQ", - "gtdbtk_species": "", - "gtdbtk_order": "", - "num_16s": 0, - "gtdbtk_family": "", - "gtdbtk_domain": "", - "contamination": 0.97, - "gtdbtk_class": "", - "gtdbtk_phylum": "", - "num_5s": 0, - "num_23s": 0, - "gtdbtk_genus": "", - "num_t_rna": 28 - } - ], - "unbinned_contig_num": 11313, - "started_at_time": "2021-10-11T02:27:15Z", - "type": "nmdc:MAGsAnalysisActivity", - "ended_at_time": "2021-10-11T03:57:48+00:00" - } - ], - "material_sample_set": [], - "material_sampling_activity_set": [], - "metabolomics_analysis_activity_set": [], - "metagenome_annotation_activity_set": [ - { - "_id": { - "$oid": "649b005bbf2caae0415ef9bf" - }, - "has_input": [ - "nmdc:8483663a943ff4c0fc0249353676bfc1" - ], - "part_of": [ - "nmdc:mga08hnt47" - ], - "git_url": "https://github.com/microbiomedata/metaG/releases/tag/0.1", - "has_output": [ - "nmdc:d8dc4f31293c549b12bbcab915d708cc", - "nmdc:415256907dcafaa68778a2ba358d9ac5", - "nmdc:f0c60a537e6867bf62fde15577669453", - "nmdc:e0f16b60c50581799b7ecb254e61e537", - "nmdc:6eb21304f0762bd8c11b98826d310321", - "nmdc:4ea7982c99cbb6d8ccc9fd949bee09ec", - "nmdc:f389dc8a93de9f21322db385b2788f5f", - "nmdc:8e6659ce96dfa72ceefda39c74fb1dce", - "nmdc:89bc9cf9183fed6700cde44fad41b830", - "nmdc:84aae368e77c1d07c6b6e8deecbc3f3b", - "nmdc:ee5612e5ee82ec2d57029d1bc4e1756f", - "nmdc:68c06be8d27d1697b4a6955537b318c8" - ], - "was_informed_by": "gold:Gp0127651", - "id": "nmdc:158a1d28cb7b14fdffb0f092a644b0f6", - "execution_resource": "NERSC-Cori", - "name": "Annotation Activity for nmdc:mga08hnt47", - "started_at_time": "2021-10-11T02:27:15Z", - "type": "nmdc:MetagenomeAnnotationActivity", - "ended_at_time": "2021-10-11T03:57:48+00:00" - } - ], - "metagenome_assembly_set": [ - { - "_id": { - "$oid": "649b005f2ca5ee4adb139fab" - }, - "has_input": [ - "nmdc:2791a196017767af3b5b21a3029799c0" - ], - "part_of": [ - "nmdc:mga08hnt47" - ], - "ctg_logsum": 192880, - "scaf_logsum": 193641, - "gap_pct": 0.00165, - "git_url": "https://github.com/microbiomedata/metaG/releases/tag/0.1", - "has_output": [ - "nmdc:8483663a943ff4c0fc0249353676bfc1", - "nmdc:ccca920c56ad3d050e2d8801bcbe4855", - "nmdc:f21e374c1c31c02bd0e41228cc7895c3", - "nmdc:f43ae7935184d10ba65961171efcac34", - "nmdc:838162ead3f121f5bc02bc1234a32a55" - ], - "asm_score": 4.164, - "was_informed_by": "gold:Gp0127651", - "ctg_powsum": 20759, - "scaf_max": 29106, - "id": "nmdc:158a1d28cb7b14fdffb0f092a644b0f6", - "scaf_powsum": 20844, - "execution_resource": "NERSC-Cori", - "contigs": 180439, - "name": "Assembly Activity for nmdc:mga08hnt47", - "ctg_max": 29106, - "gc_std": 0.109, - "contig_bp": 88911005, - "gc_avg": 0.62144, - "started_at_time": "2021-10-11T02:27:15Z", - "scaf_bp": 88912475, - "type": "nmdc:MetagenomeAssembly", - "scaffolds": 180310, - "ended_at_time": "2021-10-11T03:57:48+00:00", - "ctg_l50": 492, - "ctg_l90": 292, - "ctg_n50": 51430, - "ctg_n90": 149085, - "scaf_l50": 493, - "scaf_l90": 292, - "scaf_n50": 51225, - "scaf_n90": 148971 - } - ], - "metagenome_sequencing_activity_set": [], - "metaproteomics_analysis_activity_set": [], - "metatranscriptome_activity_set": [], - "nom_analysis_activity_set": [], - "omics_processing_set": [ - { - "_id": { - "$oid": "649b009773e8249959349b62" - }, - "id": "nmdc:omprc-11-nry91b19", - "name": "Riverbed sediment microbial communities from areas with no vegetation in Columbia River, Washington, USA - GW-RW N3_20_30", - "description": "Riverbed sediment samples were collected from an area with no vegetation in a hyporheic zone (subsurface groundwater - surface water mixing zone)", - "has_input": [ - "nmdc:bsm-11-jqzk1523" - ], - "has_output": [ - "jgi:574fde907ded5e3df1ee1426" - ], - "part_of": [ - "nmdc:sty-11-aygzgv51" - ], - "add_date": "2016-01-11", - "mod_date": "2021-06-15", - "ncbi_project_name": "Riverbed sediment microbial communities from areas with no vegetation in Columbia River, Washington, USA - GW-RW N3_20_30", - "omics_type": { - "has_raw_value": "Metagenome" - }, - "principal_investigator": { - "has_raw_value": "James Stegen" - }, - "processing_institution": "JGI", - "type": "nmdc:OmicsProcessing", - "gold_sequencing_project_identifiers": [ - "gold:Gp0127651" - ] - } - ], - "reaction_activity_set": [], - "read_qc_analysis_activity_set": [ - { - "_id": { - "$oid": "649b009d6bdd4fd20273c877" - }, - "has_input": [ - "nmdc:8254ce50b88be8c384fd37fe21e0d0c4" - ], - "part_of": [ - "nmdc:mga08hnt47" - ], - "git_url": "https://github.com/microbiomedata/metaG/releases/tag/0.1", - "has_output": [ - "nmdc:2791a196017767af3b5b21a3029799c0", - "nmdc:92cb49efbff5d5977e00dbad1c4d0d9f" - ], - "was_informed_by": "gold:Gp0127651", - "input_read_count": 23728904, - "output_read_bases": 3352071049, - "id": "nmdc:158a1d28cb7b14fdffb0f092a644b0f6", - "execution_resource": "NERSC-Cori", - "input_read_bases": 3583064504, - "name": "Read QC Activity for nmdc:mga08hnt47", - "output_read_count": 22416634, - "started_at_time": "2021-10-11T02:27:15Z", - "type": "nmdc:ReadQCAnalysisActivity", - "ended_at_time": "2021-10-11T03:57:48+00:00" - } - ], - "read_based_taxonomy_analysis_activity_set": [ - { - "_id": { - "$oid": "649b009bff710ae353f8cf42" - }, - "has_input": [ - "nmdc:2791a196017767af3b5b21a3029799c0" - ], - "git_url": "https://github.com/microbiomedata/metaG/releases/tag/0.1", - "has_output": [ - "nmdc:53ee263960c39126e039656a121deb96", - "nmdc:2781b9269b8e24f49a1a301d44d0e3d5", - "nmdc:0ed808b8ce29d39c3b555e7d5bf4c274", - "nmdc:a7d8f038b87bd28843e30c5dd115704b", - "nmdc:b4cbc81c986c67c1037c8b7280924683", - "nmdc:e0c61a191258597984a05d86eaf4d71f", - "nmdc:e1cbcfa86444a4ff4e992bcb6653d18f", - "nmdc:d2e10038a40e81e81ba94f75ed1ec52c", - "nmdc:ddba84cd45462d3a55df4ac62bb4eeb8" - ], - "was_informed_by": "gold:Gp0127651", - "id": "nmdc:158a1d28cb7b14fdffb0f092a644b0f6", - "execution_resource": "NERSC-Cori", - "name": "ReadBased Analysis Activity for nmdc:mga08hnt47", - "started_at_time": "2021-10-11T02:27:15Z", - "type": "nmdc:ReadbasedAnalysis", - "ended_at_time": "2021-10-11T03:57:48+00:00" - } - ], - "study_set": [], - "field_research_site_set": [], - "collecting_biosamples_from_site_set": [], - "date_created": null, - "etl_software_version": null, - "pooling_set": [] - }, - { - "functional_annotation_agg": [], - "library_preparation_set": [], - "processed_sample_set": [], - "extraction_set": [], - "activity_set": [], - "biosample_set": [], - "data_object_set": [ - { - "description": "Raw sequencer read data", - "file_size_bytes": 2116898122, - "type": "nmdc:DataObject", - "id": "jgi:574fde947ded5e3df1ee1429", - "name": "10533.2.165322.GTCTCCT-AAGGAGA.fastq.gz" - }, - { - "name": "Gp0127655_Filtered Reads", - "description": "Filtered Reads for Gp0127655", - "data_object_type": "Filtered Sequencing Reads", - "url": "https://data.microbiomedata.org/data/nmdc:mga0317978/qa/nmdc_mga0317978_filtered.fastq.gz", - "md5_checksum": "04b9014981f7035c39bd7f870613ed93", - "id": "nmdc:04b9014981f7035c39bd7f870613ed93", - "file_size_bytes": 1880069213 - }, - { - "name": "Gp0127655_Filtered Stats", - "description": "Filtered Stats for Gp0127655", - "data_object_type": "QC Statistics", - "url": "https://data.microbiomedata.org/data/nmdc:mga0317978/qa/nmdc_mga0317978_filterStats.txt", - "md5_checksum": "b66266969ab3df4c1cb2b16c1fa7d098", - "id": "nmdc:b66266969ab3df4c1cb2b16c1fa7d098", - "file_size_bytes": 286 - }, - { - "name": "Gp0127655_Gottcha2 TSV report", - "description": "Gottcha2 TSV report for Gp0127655", - "url": "https://data.microbiomedata.org/data/nmdc:mga0317978/ReadbasedAnalysis/nmdc_mga0317978_gottcha2_report.tsv", - "md5_checksum": "46371c7bc8259e459f975f915aaac26f", - "id": "nmdc:46371c7bc8259e459f975f915aaac26f", - "file_size_bytes": 2178 - }, - { - "name": "Gp0127655_Gottcha2 full TSV report", - "description": "Gottcha2 full TSV report for Gp0127655", - "url": "https://data.microbiomedata.org/data/nmdc:mga0317978/ReadbasedAnalysis/nmdc_mga0317978_gottcha2_report_full.tsv", - "md5_checksum": "5dd9bc51105920f3f629e8106235af3b", - "id": "nmdc:5dd9bc51105920f3f629e8106235af3b", - "file_size_bytes": 697690 - }, - { - "name": "Gp0127655_Gottcha2 Krona HTML report", - "description": "Gottcha2 Krona HTML report for Gp0127655", - "data_object_type": "GOTTCHA2 Krona Plot", - "url": "https://data.microbiomedata.org/data/nmdc:mga0317978/ReadbasedAnalysis/nmdc_mga0317978_gottcha2_krona.html", - "md5_checksum": "1879e0e9af6d568ac9c7ffdb47fc7f12", - "id": "nmdc:1879e0e9af6d568ac9c7ffdb47fc7f12", - "file_size_bytes": 231103 - }, - { - "name": "Gp0127655_Centrifuge classification TSV report", - "description": "Centrifuge classification TSV report for Gp0127655", - "data_object_type": "Centrifuge Taxonomic Classification", - "url": "https://data.microbiomedata.org/data/nmdc:mga0317978/ReadbasedAnalysis/nmdc_mga0317978_centrifuge_classification.tsv", - "md5_checksum": "e3f410adc2347396abfdec2a848000d9", - "id": "nmdc:e3f410adc2347396abfdec2a848000d9", - "file_size_bytes": 1676897166 - }, - { - "name": "Gp0127655_Centrifuge TSV report", - "description": "Centrifuge TSV report for Gp0127655", - "data_object_type": "Centrifuge Classification Report", - "url": "https://data.microbiomedata.org/data/nmdc:mga0317978/ReadbasedAnalysis/nmdc_mga0317978_centrifuge_report.tsv", - "md5_checksum": "ed6c4f17d6ae759487164ca8ed5edf45", - "id": "nmdc:ed6c4f17d6ae759487164ca8ed5edf45", - "file_size_bytes": 253692 - }, - { - "name": "Gp0127655_Centrifuge Krona HTML report", - "description": "Centrifuge Krona HTML report for Gp0127655", - "data_object_type": "Centrifuge Krona Plot", - "url": "https://data.microbiomedata.org/data/nmdc:mga0317978/ReadbasedAnalysis/nmdc_mga0317978_centrifuge_krona.html", - "md5_checksum": "6d54f73f251de1bd5c4ca8665f098ac0", - "id": "nmdc:6d54f73f251de1bd5c4ca8665f098ac0", - "file_size_bytes": 2329422 - }, - { - "name": "Gp0127655_Kraken classification TSV report", - "description": "Kraken classification TSV report for Gp0127655", - "data_object_type": "Kraken2 Taxonomic Classification", - "url": "https://data.microbiomedata.org/data/nmdc:mga0317978/ReadbasedAnalysis/nmdc_mga0317978_kraken2_classification.tsv", - "md5_checksum": "1d4f5a605d4549801fda16da567efe56", - "id": "nmdc:1d4f5a605d4549801fda16da567efe56", - "file_size_bytes": 1336793184 - }, - { - "name": "Gp0127655_Kraken2 TSV report", - "description": "Kraken2 TSV report for Gp0127655", - "data_object_type": "Kraken2 Classification Report", - "url": "https://data.microbiomedata.org/data/nmdc:mga0317978/ReadbasedAnalysis/nmdc_mga0317978_kraken2_report.tsv", - "md5_checksum": "8bb5c66575c7c953719ae9947600ad49", - "id": "nmdc:8bb5c66575c7c953719ae9947600ad49", - "file_size_bytes": 632192 - }, - { - "name": "Gp0127655_Kraken2 Krona HTML report", - "description": "Kraken2 Krona HTML report for Gp0127655", - "data_object_type": "Kraken2 Krona Plot", - "url": "https://data.microbiomedata.org/data/nmdc:mga0317978/ReadbasedAnalysis/nmdc_mga0317978_kraken2_krona.html", - "md5_checksum": "157f7672690ba8207808cc4386ff10a4", - "id": "nmdc:157f7672690ba8207808cc4386ff10a4", - "file_size_bytes": 3946317 - }, - { - "name": "Gp0127655_Assembled contigs fasta", - "description": "Assembled contigs fasta for Gp0127655", - "data_object_type": "Assembly Contigs", - "url": "https://data.microbiomedata.org/data/nmdc:mga0317978/assembly/nmdc_mga0317978_contigs.fna", - "md5_checksum": "98bc1e8aa3703e255a930f6c6f923453", - "id": "nmdc:98bc1e8aa3703e255a930f6c6f923453", - "file_size_bytes": 93445462 - }, - { - "name": "Gp0127655_Assembled scaffold fasta", - "description": "Assembled scaffold fasta for Gp0127655", - "data_object_type": "Assembly Scaffolds", - "url": "https://data.microbiomedata.org/data/nmdc:mga0317978/assembly/nmdc_mga0317978_scaffolds.fna", - "md5_checksum": "769bd168524b84f2d10dfdb2a42a909d", - "id": "nmdc:769bd168524b84f2d10dfdb2a42a909d", - "file_size_bytes": 92895420 - }, - { - "name": "Gp0127655_Metagenome Contig Coverage Stats", - "description": "Metagenome Contig Coverage Stats for Gp0127655", - "url": "https://data.microbiomedata.org/data/nmdc:mga0317978/assembly/nmdc_mga0317978_covstats.txt", - "md5_checksum": "5bd5f8108ae1d767ea5a79ebde3d83de", - "id": "nmdc:5bd5f8108ae1d767ea5a79ebde3d83de", - "file_size_bytes": 14474338 - }, - { - "name": "Gp0127655_Assembled AGP file", - "description": "Assembled AGP file for Gp0127655", - "data_object_type": "Assembly AGP", - "url": "https://data.microbiomedata.org/data/nmdc:mga0317978/assembly/nmdc_mga0317978_assembly.agp", - "md5_checksum": "933de420870147e58137b328e0d54d87", - "id": "nmdc:933de420870147e58137b328e0d54d87", - "file_size_bytes": 13523380 - }, - { - "name": "Gp0127655_Metagenome Alignment BAM file", - "description": "Metagenome Alignment BAM file for Gp0127655", - "data_object_type": "Assembly Coverage BAM", - "url": "https://data.microbiomedata.org/data/nmdc:mga0317978/assembly/nmdc_mga0317978_pairedMapped_sorted.bam", - "md5_checksum": "2b699163734ee73cbccc94e4767d36c0", - "id": "nmdc:2b699163734ee73cbccc94e4767d36c0", - "file_size_bytes": 2057808015 - }, - { - "name": "Gp0127655_Protein FAA", - "description": "Protein FAA for Gp0127655", - "data_object_type": "Annotation Amino Acid FASTA", - "url": "https://data.microbiomedata.org/data/nmdc:mga0317978/annotation/nmdc_mga0317978_proteins.faa", - "md5_checksum": "9b57eb78fd2e8f0af8b55cf5fb3a2bab", - "id": "nmdc:9b57eb78fd2e8f0af8b55cf5fb3a2bab", - "file_size_bytes": 53898203 - }, - { - "name": "Gp0127655_Structural annotation GFF file", - "description": "Structural annotation GFF file for Gp0127655", - "data_object_type": "Structural Annotation GFF", - "url": "https://data.microbiomedata.org/data/nmdc:mga0317978/annotation/nmdc_mga0317978_structural_annotation.gff", - "md5_checksum": "6b11bf4eaf9723559b6015296b802252", - "id": "nmdc:6b11bf4eaf9723559b6015296b802252", - "file_size_bytes": 2515 - }, - { - "name": "Gp0127655_Functional annotation GFF file", - "description": "Functional annotation GFF file for Gp0127655", - "data_object_type": "Functional Annotation GFF", - "url": "https://data.microbiomedata.org/data/nmdc:mga0317978/annotation/nmdc_mga0317978_functional_annotation.gff", - "md5_checksum": "0940fbdf18becd76e7dd3abcfaba12b5", - "id": "nmdc:0940fbdf18becd76e7dd3abcfaba12b5", - "file_size_bytes": 61535970 - }, - { - "name": "Gp0127655_KO TSV file", - "description": "KO TSV file for Gp0127655", - "data_object_type": "Annotation KEGG Orthology", - "url": "https://data.microbiomedata.org/data/nmdc:mga0317978/annotation/nmdc_mga0317978_ko.tsv", - "md5_checksum": "a1cd7e1382fd1818c42860a0555f1f57", - "id": "nmdc:a1cd7e1382fd1818c42860a0555f1f57", - "file_size_bytes": 6994761 - }, - { - "name": "Gp0127655_EC TSV file", - "description": "EC TSV file for Gp0127655", - "data_object_type": "Annotation Enzyme Commission", - "url": "https://data.microbiomedata.org/data/nmdc:mga0317978/annotation/nmdc_mga0317978_ec.tsv", - "md5_checksum": "3a27c2da0a3d05e4c44547afb2875195", - "id": "nmdc:3a27c2da0a3d05e4c44547afb2875195", - "file_size_bytes": 4598688 - }, - { - "name": "Gp0127655_COG GFF file", - "description": "COG GFF file for Gp0127655", - "url": "https://data.microbiomedata.org/data/nmdc:mga0317978/annotation/nmdc_mga0317978_cog.gff", - "md5_checksum": "1c8529ca35ee0b275b8ca3d2b5c565ec", - "id": "nmdc:1c8529ca35ee0b275b8ca3d2b5c565ec", - "file_size_bytes": 36290392 - }, - { - "name": "Gp0127655_PFAM GFF file", - "description": "PFAM GFF file for Gp0127655", - "url": "https://data.microbiomedata.org/data/nmdc:mga0317978/annotation/nmdc_mga0317978_pfam.gff", - "md5_checksum": "8bf1c44c4a9fc7f55dcf58be1273b46f", - "id": "nmdc:8bf1c44c4a9fc7f55dcf58be1273b46f", - "file_size_bytes": 27016921 - }, - { - "name": "Gp0127655_TigrFam GFF file", - "description": "TigrFam GFF file for Gp0127655", - "url": "https://data.microbiomedata.org/data/nmdc:mga0317978/annotation/nmdc_mga0317978_tigrfam.gff", - "md5_checksum": "acb8325b4800ff62e3fda52b21b92ecc", - "id": "nmdc:acb8325b4800ff62e3fda52b21b92ecc", - "file_size_bytes": 2768301 - }, - { - "name": "Gp0127655_SMART GFF file", - "description": "SMART GFF file for Gp0127655", - "url": "https://data.microbiomedata.org/data/nmdc:mga0317978/annotation/nmdc_mga0317978_smart.gff", - "md5_checksum": "a044873e470ce9f2be06ae99cd1cc242", - "id": "nmdc:a044873e470ce9f2be06ae99cd1cc242", - "file_size_bytes": 7806208 - }, - { - "name": "Gp0127655_SuperFam GFF file", - "description": "SuperFam GFF file for Gp0127655", - "url": "https://data.microbiomedata.org/data/nmdc:mga0317978/annotation/nmdc_mga0317978_supfam.gff", - "md5_checksum": "40f0627934454a354886609d7068a12c", - "id": "nmdc:40f0627934454a354886609d7068a12c", - "file_size_bytes": 45276498 - }, - { - "name": "Gp0127655_Cath FunFam GFF file", - "description": "Cath FunFam GFF file for Gp0127655", - "url": "https://data.microbiomedata.org/data/nmdc:mga0317978/annotation/nmdc_mga0317978_cath_funfam.gff", - "md5_checksum": "60255b31e223a7b5bad8f186b6f65d7c", - "id": "nmdc:60255b31e223a7b5bad8f186b6f65d7c", - "file_size_bytes": 33794110 - }, - { - "name": "Gp0127655_KO_EC GFF file", - "description": "KO_EC GFF file for Gp0127655", - "url": "https://data.microbiomedata.org/data/nmdc:mga0317978/annotation/nmdc_mga0317978_ko_ec.gff", - "md5_checksum": "b8d559d4ea779c4076e3c9e1e92bddcf", - "id": "nmdc:b8d559d4ea779c4076e3c9e1e92bddcf", - "file_size_bytes": 22249696 - }, - { - "name": "gold:Gp0452679_metabat2 bin checkm quality assessment result", - "description": "metabat2 bin checkm quality assessment result for gold:Gp0452679", - "data_object_type": "CheckM Statistics", - "url": "https://data.microbiomedata.org/data/nmdc:mga0fsjy84/MAGs/nmdc_mga0fsjy84_checkm_qa.out", - "md5_checksum": "d41d8cd98f00b204e9800998ecf8427e", - "id": "nmdc:d41d8cd98f00b204e9800998ecf8427e", - "file_size_bytes": 0 - }, - { - "name": "Gp0127655_tooShort (< 3kb) filtered contigs fasta file by metaBat2", - "description": "tooShort (< 3kb) filtered contigs fasta file by metaBat2 for Gp0127655", - "url": "https://data.microbiomedata.org/data/nmdc:mga0317978/MAGs/nmdc_mga0317978_bins.tooShort.fa", - "md5_checksum": "58f2cc63798346be853bccacdd7ca30d", - "id": "nmdc:58f2cc63798346be853bccacdd7ca30d", - "file_size_bytes": 77075570 - }, - { - "name": "Gp0127655_unbinned fasta file from metabat2", - "description": "unbinned fasta file from metabat2 for Gp0127655", - "url": "https://data.microbiomedata.org/data/nmdc:mga0317978/MAGs/nmdc_mga0317978_bins.unbinned.fa", - "md5_checksum": "8b2dbaba9c1219096831ad99d8b7c056", - "id": "nmdc:8b2dbaba9c1219096831ad99d8b7c056", - "file_size_bytes": 14551969 - }, - { - "name": "Gp0127655_metabat2 bin checkm quality assessment result", - "description": "metabat2 bin checkm quality assessment result for Gp0127655", - "data_object_type": "CheckM Statistics", - "url": "https://data.microbiomedata.org/data/nmdc:mga0317978/MAGs/nmdc_mga0317978_checkm_qa.out", - "md5_checksum": "c562d8d5ccc986d672b4e48e006fafab", - "id": "nmdc:c562d8d5ccc986d672b4e48e006fafab", - "file_size_bytes": 775 - }, - { - "name": "Gp0127655_high-quality and medium-quality bins", - "description": "high-quality and medium-quality bins for Gp0127655", - "data_object_type": "Metagenome Bins", - "url": "https://data.microbiomedata.org/data/nmdc:mga0317978/MAGs/nmdc_mga0317978_hqmq_bin.zip", - "md5_checksum": "2eaf0a7d519ac7c034d63797d735080c", - "id": "nmdc:2eaf0a7d519ac7c034d63797d735080c", - "file_size_bytes": 182 - }, - { - "name": "Gp0127655_metabat2 bins", - "description": "metabat2 bins for Gp0127655", - "url": "https://data.microbiomedata.org/data/nmdc:mga0317978/MAGs/nmdc_mga0317978_metabat_bin.zip", - "md5_checksum": "668a0a6dbd840dd2178a00c2af4c2237", - "id": "nmdc:668a0a6dbd840dd2178a00c2af4c2237", - "file_size_bytes": 527634 - }, - { - "_id": { - "$oid": "649b003d1ae706d7b5b14edb" - }, - "description": "Metagenome Contig Coverage Stats for gold:Gp0127655", - "url": "https://data.microbiomedata.org/data/1781_100357/assembly/mapping_stats.txt", - "file_size_bytes": 13742582, - "type": "nmdc:DataObject", - "id": "nmdc:f4e6a47ebd604f90384f130eca3e401e", - "name": "mapping_stats.txt", - "data_object_type": "Assembly Coverage Stats" - }, - { - "_id": { - "$oid": "649b003d1ae706d7b5b14edf" - }, - "description": "Assembled contigs fasta for gold:Gp0127655", - "url": "https://data.microbiomedata.org/data/1781_100357/assembly/assembly_contigs.fna", - "file_size_bytes": 92713706, - "type": "nmdc:DataObject", - "id": "nmdc:2860c363baa5fd6e5bbdc96a8d54b56b", - "name": "assembly_contigs.fna", - "data_object_type": "Assembly Contigs" - }, - { - "_id": { - "$oid": "649b003d1ae706d7b5b14ee0" - }, - "description": "Metagenome Alignment BAM file for gold:Gp0127655", - "url": "https://data.microbiomedata.org/data/1781_100357/assembly/pairedMapped_sorted.bam", - "file_size_bytes": 2031000089, - "type": "nmdc:DataObject", - "id": "nmdc:a20a83922a21eba2ec447dacc259c083", - "name": "pairedMapped_sorted.bam", - "data_object_type": "Assembly Coverage BAM" - }, - { - "_id": { - "$oid": "649b003d1ae706d7b5b14ee2" - }, - "description": "Assembled scaffold fasta for gold:Gp0127655", - "url": "https://data.microbiomedata.org/data/1781_100357/assembly/assembly_scaffolds.fna", - "file_size_bytes": 92163960, - "type": "nmdc:DataObject", - "id": "nmdc:07f2db98361b0d4e4d4c6a89294348ce", - "name": "assembly_scaffolds.fna", - "data_object_type": "Assembly Scaffolds" - }, - { - "_id": { - "$oid": "649b003d1ae706d7b5b14ee9" - }, - "description": "Assembled AGP file for gold:Gp0127655", - "url": "https://data.microbiomedata.org/data/1781_100357/assembly/assembly.agp", - "file_size_bytes": 12059276, - "type": "nmdc:DataObject", - "id": "nmdc:4f8b4cfdd8cbff990d5f4c5b932beb96", - "name": "assembly.agp", - "data_object_type": "Assembly AGP" - }, - { - "_id": { - "$oid": "649b003d1ae706d7b5b15c0a" - }, - "id": "nmdc:8efdeab08615731f46e30a1cdc6bcb2d", - "name": "1781_100357.krona.html", - "description": "Gold:Gp0127655 KRONA plot HTML file", - "url": "https://data.microbiomedata.org/1781_100357/ReadbasedAnalysis/centrifuge/1781_100357.krona.html", - "file_size_bytes": 3385, - "data_object_type": "Centrifuge Krona Plot", - "type": "nmdc:DataObject" - }, - { - "_id": { - "$oid": "649b003d1ae706d7b5b15c0e" - }, - "id": "nmdc:4df533784cd9ca8514f9622ba3ae0036", - "name": "1781_100357.json", - "description": "Gold:Gp0127655 ReadbasedAnalysis result JSON file", - "url": "https://data.microbiomedata.org/1781_100357/ReadbasedAnalysis/1781_100357.json", - "file_size_bytes": 3385, - "type": "nmdc:DataObject" - }, - { - "_id": { - "$oid": "649b003f1ae706d7b5b16641" - }, - "id": "nmdc:57a6185b7ec704380a4856d0083dbd1d", - "name": "bins.tooShort.fa", - "description": "tooShort (< 3kb) filtered contigs fasta file by metaBat2 for gold:Gp0127655", - "file_size_bytes": 74915065, - "url": "https://data.microbiomedata.org/data/1781_100357/img_MAGs/bins.tooShort.fa", - "type": "nmdc:DataObject" - }, - { - "_id": { - "$oid": "649b003f1ae706d7b5b16643" - }, - "id": "nmdc:7432d75e55847cf9a3c66589024e342c", - "name": "gold:Gp0127655.bins.3.fa", - "description": "metabat2 binned contig file for gold:Gp0127655", - "file_size_bytes": 210579, - "url": "https://data.microbiomedata.org/data/1781_100357/img_MAGs/metabat-bins/bins.3.fa", - "type": "nmdc:DataObject", - "data_object_type": "Metagenome Bins" - }, - { - "_id": { - "$oid": "649b003f1ae706d7b5b16646" - }, - "id": "nmdc:a9494cde349debe8557cbd59c43138fe", - "name": "checkm_qa.out", - "description": "metabat2 bin checkm quality assessment result for gold:Gp0127655", - "file_size_bytes": 1085, - "url": "https://data.microbiomedata.org/data/1781_100357/img_MAGs/checkm_qa.out", - "type": "nmdc:DataObject", - "data_object_type": "CheckM Statistics" - }, - { - "_id": { - "$oid": "649b003f1ae706d7b5b1664a" - }, - "id": "nmdc:299ed9ebee6f2a5e5c202a11b9e5536a", - "name": "gold:Gp0127655.bins.1.fa", - "description": "metabat2 binned contig file for gold:Gp0127655", - "file_size_bytes": 216122, - "url": "https://data.microbiomedata.org/data/1781_100357/img_MAGs/metabat-bins/bins.1.fa", - "type": "nmdc:DataObject", - "data_object_type": "Metagenome Bins" - }, - { - "_id": { - "$oid": "649b003f1ae706d7b5b1664b" - }, - "id": "nmdc:61c5dd80a3ac06408612da5aa2ad8bc1", - "name": "bins.unbinned.fa", - "description": "unbinned fasta file from metabat2 for gold:Gp0127655", - "file_size_bytes": 15144355, - "url": "https://data.microbiomedata.org/data/1781_100357/img_MAGs/bins.unbinned.fa", - "type": "nmdc:DataObject" - }, - { - "_id": { - "$oid": "649b003f1ae706d7b5b1664c" - }, - "id": "nmdc:556fbc4cc2220b73f70dce6b46ff34c7", - "name": "gold:Gp0127655.bins.2.fa", - "description": "metabat2 binned contig file for gold:Gp0127655", - "file_size_bytes": 692364, - "url": "https://data.microbiomedata.org/data/1781_100357/img_MAGs/metabat-bins/bins.2.fa", - "type": "nmdc:DataObject", - "data_object_type": "Metagenome Bins" - }, - { - "_id": { - "$oid": "649b00401ae706d7b5b16d52" - }, - "description": "KO TSV File for gold:Gp0127655", - "url": "https://data.microbiomedata.org/1781_100357/img_annotation/Ga0482216_ko.tsv", - "md5_checksum": "6d1185f4034e364b74109d40326a450a", - "file_size_bytes": 3385, - "id": "nmdc:6d1185f4034e364b74109d40326a450a", - "name": "gold:Gp0127655_KO TSV File", - "data_object_type": "Annotation KEGG Orthology", - "type": "nmdc:DataObject" - }, - { - "_id": { - "$oid": "649b00401ae706d7b5b16d53" - }, - "description": "Structural annotation GFF file for gold:Gp0127655", - "url": "https://data.microbiomedata.org/1781_100357/img_annotation/Ga0482216_structural_annotation.gff", - "md5_checksum": "05e2702ecae6ba0ba0b0898132850b9f", - "file_size_bytes": 3385, - "id": "nmdc:05e2702ecae6ba0ba0b0898132850b9f", - "name": "gold:Gp0127655_Structural annotation GFF file", - "data_object_type": "Structural Annotation GFF", - "type": "nmdc:DataObject" - }, - { - "_id": { - "$oid": "649b00401ae706d7b5b16d56" - }, - "description": "Functional annotation GFF file for gold:Gp0127655", - "url": "https://data.microbiomedata.org/1781_100357/img_annotation/Ga0482216_functional_annotation.gff", - "md5_checksum": "0b4a5dc91c42b7fea3fd514d5cb3138b", - "file_size_bytes": 3385, - "id": "nmdc:0b4a5dc91c42b7fea3fd514d5cb3138b", - "name": "gold:Gp0127655_Functional annotation GFF file", - "data_object_type": "Functional Annotation GFF", - "type": "nmdc:DataObject" - }, - { - "_id": { - "$oid": "649b00401ae706d7b5b16d57" - }, - "description": "EC TSV File for gold:Gp0127655", - "url": "https://data.microbiomedata.org/1781_100357/img_annotation/Ga0482216_ec.tsv", - "md5_checksum": "32c6c6dbce4a1c6ab92810a86f90c574", - "file_size_bytes": 3385, - "id": "nmdc:32c6c6dbce4a1c6ab92810a86f90c574", - "name": "gold:Gp0127655_EC TSV File", - "data_object_type": "Annotation Enzyme Commission", - "type": "nmdc:DataObject" - }, - { - "_id": { - "$oid": "649b00401ae706d7b5b16d58" - }, - "description": "Protein FAA for gold:Gp0127655", - "url": "https://data.microbiomedata.org/1781_100357/img_annotation/Ga0482216_proteins.faa", - "md5_checksum": "a31096eb3e473fd0c68d09096bc3fd85", - "file_size_bytes": 3385, - "id": "nmdc:a31096eb3e473fd0c68d09096bc3fd85", - "name": "gold:Gp0127655_Protein FAA", - "data_object_type": "Annotation Amino Acid FASTA", - "type": "nmdc:DataObject" - } - ], - "dissolving_activity_set": [], - "functional_annotation_set": [], - "genome_feature_set": [], - "mags_activity_set": [ - { - "_id": { - "$oid": "649b0052ec087f6bbab34725" - }, - "has_input": [ - "nmdc:98bc1e8aa3703e255a930f6c6f923453", - "nmdc:2b699163734ee73cbccc94e4767d36c0", - "nmdc:0940fbdf18becd76e7dd3abcfaba12b5" - ], - "too_short_contig_num": 173159, - "part_of": [ - "nmdc:mga0317978" - ], - "binned_contig_num": 412, - "git_url": "https://github.com/microbiomedata/metaG/releases/tag/0.1", - "has_output": [ - "nmdc:d41d8cd98f00b204e9800998ecf8427e", - "nmdc:58f2cc63798346be853bccacdd7ca30d", - "nmdc:8b2dbaba9c1219096831ad99d8b7c056", - "nmdc:c562d8d5ccc986d672b4e48e006fafab", - "nmdc:2eaf0a7d519ac7c034d63797d735080c", - "nmdc:668a0a6dbd840dd2178a00c2af4c2237" - ], - "was_informed_by": "gold:Gp0127655", - "input_contig_num": 182939, - "id": "nmdc:65af38817454a315aeb8c67ab27e1469", - "execution_resource": "NERSC-Cori", - "name": "MAGs Analysis Activity for nmdc:mga0317978", - "mags_list": [ - { - "number_of_contig": 412, - "completeness": 27.84, - "bin_name": "bins.1", - "gene_count": 2086, - "bin_quality": "LQ", - "gtdbtk_species": "", - "gtdbtk_order": "", - "num_16s": 0, - "gtdbtk_family": "", - "gtdbtk_domain": "", - "contamination": 0.0, - "gtdbtk_class": "", - "gtdbtk_phylum": "", - "num_5s": 0, - "num_23s": 0, - "gtdbtk_genus": "", - "num_t_rna": 22 - } - ], - "unbinned_contig_num": 9368, - "started_at_time": "2021-10-11T02:23:42Z", - "type": "nmdc:MAGsAnalysisActivity", - "ended_at_time": "2021-10-11T03:21:25+00:00" - } - ], - "material_sample_set": [], - "material_sampling_activity_set": [], - "metabolomics_analysis_activity_set": [], - "metagenome_annotation_activity_set": [ - { - "_id": { - "$oid": "649b005bbf2caae0415ef9c4" - }, - "has_input": [ - "nmdc:98bc1e8aa3703e255a930f6c6f923453" - ], - "part_of": [ - "nmdc:mga0317978" - ], - "git_url": "https://github.com/microbiomedata/metaG/releases/tag/0.1", - "has_output": [ - "nmdc:9b57eb78fd2e8f0af8b55cf5fb3a2bab", - "nmdc:6b11bf4eaf9723559b6015296b802252", - "nmdc:0940fbdf18becd76e7dd3abcfaba12b5", - "nmdc:a1cd7e1382fd1818c42860a0555f1f57", - "nmdc:3a27c2da0a3d05e4c44547afb2875195", - "nmdc:1c8529ca35ee0b275b8ca3d2b5c565ec", - "nmdc:8bf1c44c4a9fc7f55dcf58be1273b46f", - "nmdc:acb8325b4800ff62e3fda52b21b92ecc", - "nmdc:a044873e470ce9f2be06ae99cd1cc242", - "nmdc:40f0627934454a354886609d7068a12c", - "nmdc:60255b31e223a7b5bad8f186b6f65d7c", - "nmdc:b8d559d4ea779c4076e3c9e1e92bddcf" - ], - "was_informed_by": "gold:Gp0127655", - "id": "nmdc:65af38817454a315aeb8c67ab27e1469", - "execution_resource": "NERSC-Cori", - "name": "Annotation Activity for nmdc:mga0317978", - "started_at_time": "2021-10-11T02:23:42Z", - "type": "nmdc:MetagenomeAnnotationActivity", - "ended_at_time": "2021-10-11T03:21:25+00:00" - } - ], - "metagenome_assembly_set": [ - { - "_id": { - "$oid": "649b005f2ca5ee4adb139fb3" - }, - "has_input": [ - "nmdc:04b9014981f7035c39bd7f870613ed93" - ], - "part_of": [ - "nmdc:mga0317978" - ], - "ctg_logsum": 170806, - "scaf_logsum": 171254, - "gap_pct": 0.00086, - "git_url": "https://github.com/microbiomedata/metaG/releases/tag/0.1", - "has_output": [ - "nmdc:98bc1e8aa3703e255a930f6c6f923453", - "nmdc:769bd168524b84f2d10dfdb2a42a909d", - "nmdc:5bd5f8108ae1d767ea5a79ebde3d83de", - "nmdc:933de420870147e58137b328e0d54d87", - "nmdc:2b699163734ee73cbccc94e4767d36c0" - ], - "asm_score": 3.393, - "was_informed_by": "gold:Gp0127655", - "ctg_powsum": 18408, - "scaf_max": 16317, - "id": "nmdc:65af38817454a315aeb8c67ab27e1469", - "scaf_powsum": 18458, - "execution_resource": "NERSC-Cori", - "contigs": 182939, - "name": "Assembly Activity for nmdc:mga0317978", - "ctg_max": 16317, - "gc_std": 0.09607, - "contig_bp": 86362605, - "gc_avg": 0.63666, - "started_at_time": "2021-10-11T02:23:42Z", - "scaf_bp": 86363345, - "type": "nmdc:MetagenomeAssembly", - "scaffolds": 182865, - "ended_at_time": "2021-10-11T03:21:25+00:00", - "ctg_l50": 456, - "ctg_l90": 289, - "ctg_n50": 53760, - "ctg_n90": 154881, - "scaf_l50": 457, - "scaf_l90": 289, - "scaf_n50": 53484, - "scaf_n90": 154812 - } - ], - "metagenome_sequencing_activity_set": [], - "metaproteomics_analysis_activity_set": [], - "metatranscriptome_activity_set": [], - "nom_analysis_activity_set": [], - "omics_processing_set": [ - { - "_id": { - "$oid": "649b009773e8249959349b63" - }, - "id": "nmdc:omprc-11-0n8y1d07", - "name": "Riverbed sediment microbial communities from areas with vegetation nearby in Columbia River, Washington, USA - GW-RW S3_20_30", - "description": "Riverbed sediment samples were collected from an area with a lot of surrounding vegetation in a hyporheic zone (subsurface groundwater - surface water mixing zone)", - "has_input": [ - "nmdc:bsm-11-a7fxtx60" - ], - "has_output": [ - "jgi:574fde947ded5e3df1ee1429" - ], - "part_of": [ - "nmdc:sty-11-aygzgv51" - ], - "add_date": "2016-01-11", - "mod_date": "2021-06-15", - "ncbi_project_name": "Riverbed sediment microbial communities from areas with vegetation nearby in Columbia River, Washington, USA - GW-RW S3_20_30", - "omics_type": { - "has_raw_value": "Metagenome" - }, - "principal_investigator": { - "has_raw_value": "James Stegen" - }, - "processing_institution": "JGI", - "type": "nmdc:OmicsProcessing", - "gold_sequencing_project_identifiers": [ - "gold:Gp0127655" - ] - } - ], - "reaction_activity_set": [], - "read_qc_analysis_activity_set": [ - { - "_id": { - "$oid": "649b009d6bdd4fd20273c880" - }, - "has_input": [ - "nmdc:898017d076d5d2daaf902e9141f0600a" - ], - "part_of": [ - "nmdc:mga0317978" - ], - "git_url": "https://github.com/microbiomedata/metaG/releases/tag/0.1", - "has_output": [ - "nmdc:04b9014981f7035c39bd7f870613ed93", - "nmdc:b66266969ab3df4c1cb2b16c1fa7d098" - ], - "was_informed_by": "gold:Gp0127655", - "input_read_count": 23985924, - "output_read_bases": 3400452550, - "id": "nmdc:65af38817454a315aeb8c67ab27e1469", - "execution_resource": "NERSC-Cori", - "input_read_bases": 3621874524, - "name": "Read QC Activity for nmdc:mga0317978", - "output_read_count": 22751496, - "started_at_time": "2021-10-11T02:23:42Z", - "type": "nmdc:ReadQCAnalysisActivity", - "ended_at_time": "2021-10-11T03:21:25+00:00" - } - ], - "read_based_taxonomy_analysis_activity_set": [ - { - "_id": { - "$oid": "649b009bff710ae353f8cf47" - }, - "has_input": [ - "nmdc:04b9014981f7035c39bd7f870613ed93" - ], - "git_url": "https://github.com/microbiomedata/metaG/releases/tag/0.1", - "has_output": [ - "nmdc:46371c7bc8259e459f975f915aaac26f", - "nmdc:5dd9bc51105920f3f629e8106235af3b", - "nmdc:1879e0e9af6d568ac9c7ffdb47fc7f12", - "nmdc:e3f410adc2347396abfdec2a848000d9", - "nmdc:ed6c4f17d6ae759487164ca8ed5edf45", - "nmdc:6d54f73f251de1bd5c4ca8665f098ac0", - "nmdc:1d4f5a605d4549801fda16da567efe56", - "nmdc:8bb5c66575c7c953719ae9947600ad49", - "nmdc:157f7672690ba8207808cc4386ff10a4" - ], - "was_informed_by": "gold:Gp0127655", - "id": "nmdc:65af38817454a315aeb8c67ab27e1469", - "execution_resource": "NERSC-Cori", - "name": "ReadBased Analysis Activity for nmdc:mga0317978", - "started_at_time": "2021-10-11T02:23:42Z", - "type": "nmdc:ReadbasedAnalysis", - "ended_at_time": "2021-10-11T03:21:25+00:00" - } - ], - "study_set": [], - "field_research_site_set": [], - "collecting_biosamples_from_site_set": [], - "date_created": null, - "etl_software_version": null, - "pooling_set": [] - }, - { - "functional_annotation_agg": [], - "library_preparation_set": [], - "processed_sample_set": [], - "extraction_set": [], - "activity_set": [], - "biosample_set": [], - "data_object_set": [ - { - "description": "Raw sequencer read data", - "file_size_bytes": 1827996307, - "type": "nmdc:DataObject", - "id": "jgi:574fde937ded5e3df1ee1428", - "name": "10533.2.165322.CCTTCCT-AAGGAAG.fastq.gz" - }, - { - "name": "Gp0127653_Filtered Reads", - "description": "Filtered Reads for Gp0127653", - "data_object_type": "Filtered Sequencing Reads", - "url": "https://data.microbiomedata.org/data/nmdc:mga079y988/qa/nmdc_mga079y988_filtered.fastq.gz", - "md5_checksum": "8eec0e9c14abb418b906504d1675ecc5", - "id": "nmdc:8eec0e9c14abb418b906504d1675ecc5", - "file_size_bytes": 1661017378 - }, - { - "name": "Gp0127653_Filtered Stats", - "description": "Filtered Stats for Gp0127653", - "data_object_type": "QC Statistics", - "url": "https://data.microbiomedata.org/data/nmdc:mga079y988/qa/nmdc_mga079y988_filterStats.txt", - "md5_checksum": "5d07358bbc48f25e157ffc91ea7ae3e0", - "id": "nmdc:5d07358bbc48f25e157ffc91ea7ae3e0", - "file_size_bytes": 286 - }, - { - "name": "Gp0127653_Gottcha2 TSV report", - "description": "Gottcha2 TSV report for Gp0127653", - "url": "https://data.microbiomedata.org/data/nmdc:mga079y988/ReadbasedAnalysis/nmdc_mga079y988_gottcha2_report.tsv", - "md5_checksum": "fbbad3e21108a372e3d53c9ee8fc3cd5", - "id": "nmdc:fbbad3e21108a372e3d53c9ee8fc3cd5", - "file_size_bytes": 3812 - }, - { - "name": "Gp0127653_Gottcha2 full TSV report", - "description": "Gottcha2 full TSV report for Gp0127653", - "url": "https://data.microbiomedata.org/data/nmdc:mga079y988/ReadbasedAnalysis/nmdc_mga079y988_gottcha2_report_full.tsv", - "md5_checksum": "dbf03e26f7e1529762830161fe1f1906", - "id": "nmdc:dbf03e26f7e1529762830161fe1f1906", - "file_size_bytes": 857087 - }, - { - "name": "Gp0127653_Gottcha2 Krona HTML report", - "description": "Gottcha2 Krona HTML report for Gp0127653", - "data_object_type": "GOTTCHA2 Krona Plot", - "url": "https://data.microbiomedata.org/data/nmdc:mga079y988/ReadbasedAnalysis/nmdc_mga079y988_gottcha2_krona.html", - "md5_checksum": "284ce1b28b8964cb525025d678277dba", - "id": "nmdc:284ce1b28b8964cb525025d678277dba", - "file_size_bytes": 235621 - }, - { - "name": "Gp0127653_Centrifuge classification TSV report", - "description": "Centrifuge classification TSV report for Gp0127653", - "data_object_type": "Centrifuge Taxonomic Classification", - "url": "https://data.microbiomedata.org/data/nmdc:mga079y988/ReadbasedAnalysis/nmdc_mga079y988_centrifuge_classification.tsv", - "md5_checksum": "a379527f61806391e42b3512146013a8", - "id": "nmdc:a379527f61806391e42b3512146013a8", - "file_size_bytes": 1437707313 - }, - { - "name": "Gp0127653_Centrifuge TSV report", - "description": "Centrifuge TSV report for Gp0127653", - "data_object_type": "Centrifuge Classification Report", - "url": "https://data.microbiomedata.org/data/nmdc:mga079y988/ReadbasedAnalysis/nmdc_mga079y988_centrifuge_report.tsv", - "md5_checksum": "3659ac6c99dea0fb1385c58eac8b1335", - "id": "nmdc:3659ac6c99dea0fb1385c58eac8b1335", - "file_size_bytes": 255105 - }, - { - "name": "Gp0127653_Centrifuge Krona HTML report", - "description": "Centrifuge Krona HTML report for Gp0127653", - "data_object_type": "Centrifuge Krona Plot", - "url": "https://data.microbiomedata.org/data/nmdc:mga079y988/ReadbasedAnalysis/nmdc_mga079y988_centrifuge_krona.html", - "md5_checksum": "3219058371bf2f8081b2dd2b434ec145", - "id": "nmdc:3219058371bf2f8081b2dd2b434ec145", - "file_size_bytes": 2327985 - }, - { - "name": "Gp0127653_Kraken classification TSV report", - "description": "Kraken classification TSV report for Gp0127653", - "data_object_type": "Kraken2 Taxonomic Classification", - "url": "https://data.microbiomedata.org/data/nmdc:mga079y988/ReadbasedAnalysis/nmdc_mga079y988_kraken2_classification.tsv", - "md5_checksum": "be29ebcd7358653afec7381f9ca43431", - "id": "nmdc:be29ebcd7358653afec7381f9ca43431", - "file_size_bytes": 1164013677 - }, - { - "name": "Gp0127653_Kraken2 TSV report", - "description": "Kraken2 TSV report for Gp0127653", - "data_object_type": "Kraken2 Classification Report", - "url": "https://data.microbiomedata.org/data/nmdc:mga079y988/ReadbasedAnalysis/nmdc_mga079y988_kraken2_report.tsv", - "md5_checksum": "a9e6ab6db23ddce02317e3e21ea3f618", - "id": "nmdc:a9e6ab6db23ddce02317e3e21ea3f618", - "file_size_bytes": 638368 - }, - { - "name": "Gp0127653_Kraken2 Krona HTML report", - "description": "Kraken2 Krona HTML report for Gp0127653", - "data_object_type": "Kraken2 Krona Plot", - "url": "https://data.microbiomedata.org/data/nmdc:mga079y988/ReadbasedAnalysis/nmdc_mga079y988_kraken2_krona.html", - "md5_checksum": "4c1aae1a46e51359f9146e48fff0e7f0", - "id": "nmdc:4c1aae1a46e51359f9146e48fff0e7f0", - "file_size_bytes": 3982485 - }, - { - "name": "Gp0127653_Assembled contigs fasta", - "description": "Assembled contigs fasta for Gp0127653", - "data_object_type": "Assembly Contigs", - "url": "https://data.microbiomedata.org/data/nmdc:mga079y988/assembly/nmdc_mga079y988_contigs.fna", - "md5_checksum": "0f2b82878f54787c127bf03338d5c605", - "id": "nmdc:0f2b82878f54787c127bf03338d5c605", - "file_size_bytes": 18722308 - }, - { - "name": "Gp0127653_Assembled scaffold fasta", - "description": "Assembled scaffold fasta for Gp0127653", - "data_object_type": "Assembly Scaffolds", - "url": "https://data.microbiomedata.org/data/nmdc:mga079y988/assembly/nmdc_mga079y988_scaffolds.fna", - "md5_checksum": "02f8e7222e9e6f45c388a189ca66e1f9", - "id": "nmdc:02f8e7222e9e6f45c388a189ca66e1f9", - "file_size_bytes": 18575622 - }, - { - "name": "Gp0127653_Metagenome Contig Coverage Stats", - "description": "Metagenome Contig Coverage Stats for Gp0127653", - "url": "https://data.microbiomedata.org/data/nmdc:mga079y988/assembly/nmdc_mga079y988_covstats.txt", - "md5_checksum": "eea8a4b58ca07019d0050b030be3a3d1", - "id": "nmdc:eea8a4b58ca07019d0050b030be3a3d1", - "file_size_bytes": 3824141 - }, - { - "name": "Gp0127653_Assembled AGP file", - "description": "Assembled AGP file for Gp0127653", - "data_object_type": "Assembly AGP", - "url": "https://data.microbiomedata.org/data/nmdc:mga079y988/assembly/nmdc_mga079y988_assembly.agp", - "md5_checksum": "44b1ad59bd14c3367ac0fa2ca37aa057", - "id": "nmdc:44b1ad59bd14c3367ac0fa2ca37aa057", - "file_size_bytes": 3551123 - }, - { - "name": "Gp0127653_Metagenome Alignment BAM file", - "description": "Metagenome Alignment BAM file for Gp0127653", - "data_object_type": "Assembly Coverage BAM", - "url": "https://data.microbiomedata.org/data/nmdc:mga079y988/assembly/nmdc_mga079y988_pairedMapped_sorted.bam", - "md5_checksum": "ccd5ba8558a92751c59989aa81054e1a", - "id": "nmdc:ccd5ba8558a92751c59989aa81054e1a", - "file_size_bytes": 1757373378 - }, - { - "name": "Gp0127653_Protein FAA", - "description": "Protein FAA for Gp0127653", - "data_object_type": "Annotation Amino Acid FASTA", - "url": "https://data.microbiomedata.org/data/nmdc:mga079y988/annotation/nmdc_mga079y988_proteins.faa", - "md5_checksum": "81f16ca99f73a3314a66e6b24d23376f", - "id": "nmdc:81f16ca99f73a3314a66e6b24d23376f", - "file_size_bytes": 11129064 - }, - { - "name": "Gp0127653_Structural annotation GFF file", - "description": "Structural annotation GFF file for Gp0127653", - "data_object_type": "Structural Annotation GFF", - "url": "https://data.microbiomedata.org/data/nmdc:mga079y988/annotation/nmdc_mga079y988_structural_annotation.gff", - "md5_checksum": "66bb16ef28196379647d319da50426dd", - "id": "nmdc:66bb16ef28196379647d319da50426dd", - "file_size_bytes": 8094827 - }, - { - "name": "Gp0127653_Functional annotation GFF file", - "description": "Functional annotation GFF file for Gp0127653", - "data_object_type": "Functional Annotation GFF", - "url": "https://data.microbiomedata.org/data/nmdc:mga079y988/annotation/nmdc_mga079y988_functional_annotation.gff", - "md5_checksum": "1e7dac5f12cc086509ff905f7133b15a", - "id": "nmdc:1e7dac5f12cc086509ff905f7133b15a", - "file_size_bytes": 13821021 - }, - { - "name": "Gp0127653_KO TSV file", - "description": "KO TSV file for Gp0127653", - "data_object_type": "Annotation KEGG Orthology", - "url": "https://data.microbiomedata.org/data/nmdc:mga079y988/annotation/nmdc_mga079y988_ko.tsv", - "md5_checksum": "2a7343eb6364d769a1c43aa5c94daee8", - "id": "nmdc:2a7343eb6364d769a1c43aa5c94daee8", - "file_size_bytes": 1578987 - }, - { - "name": "Gp0127653_EC TSV file", - "description": "EC TSV file for Gp0127653", - "data_object_type": "Annotation Enzyme Commission", - "url": "https://data.microbiomedata.org/data/nmdc:mga079y988/annotation/nmdc_mga079y988_ec.tsv", - "md5_checksum": "b2cee4d35f68d1f5731bff3af5904fa4", - "id": "nmdc:b2cee4d35f68d1f5731bff3af5904fa4", - "file_size_bytes": 1029657 - }, - { - "name": "Gp0127653_COG GFF file", - "description": "COG GFF file for Gp0127653", - "url": "https://data.microbiomedata.org/data/nmdc:mga079y988/annotation/nmdc_mga079y988_cog.gff", - "md5_checksum": "1d45960b1ba5e27af42c736ec583ecd4", - "id": "nmdc:1d45960b1ba5e27af42c736ec583ecd4", - "file_size_bytes": 7241411 - }, - { - "name": "Gp0127653_PFAM GFF file", - "description": "PFAM GFF file for Gp0127653", - "url": "https://data.microbiomedata.org/data/nmdc:mga079y988/annotation/nmdc_mga079y988_pfam.gff", - "md5_checksum": "3dec47a0a04865ecdcd9ed7cbc78eca4", - "id": "nmdc:3dec47a0a04865ecdcd9ed7cbc78eca4", - "file_size_bytes": 5221877 - }, - { - "name": "Gp0127653_TigrFam GFF file", - "description": "TigrFam GFF file for Gp0127653", - "url": "https://data.microbiomedata.org/data/nmdc:mga079y988/annotation/nmdc_mga079y988_tigrfam.gff", - "md5_checksum": "043322f3cd31d50faf4d4e0ffd1c8427", - "id": "nmdc:043322f3cd31d50faf4d4e0ffd1c8427", - "file_size_bytes": 472233 - }, - { - "name": "Gp0127653_SMART GFF file", - "description": "SMART GFF file for Gp0127653", - "url": "https://data.microbiomedata.org/data/nmdc:mga079y988/annotation/nmdc_mga079y988_smart.gff", - "md5_checksum": "6bed0fc7a7be284936c69fc1faac4be6", - "id": "nmdc:6bed0fc7a7be284936c69fc1faac4be6", - "file_size_bytes": 1586537 - }, - { - "name": "Gp0127653_SuperFam GFF file", - "description": "SuperFam GFF file for Gp0127653", - "url": "https://data.microbiomedata.org/data/nmdc:mga079y988/annotation/nmdc_mga079y988_supfam.gff", - "md5_checksum": "052d3fb0080390255df5772f79e5ef2c", - "id": "nmdc:052d3fb0080390255df5772f79e5ef2c", - "file_size_bytes": 9232981 - }, - { - "name": "Gp0127653_Cath FunFam GFF file", - "description": "Cath FunFam GFF file for Gp0127653", - "url": "https://data.microbiomedata.org/data/nmdc:mga079y988/annotation/nmdc_mga079y988_cath_funfam.gff", - "md5_checksum": "e66a3b85c713e8766e5181da2e393984", - "id": "nmdc:e66a3b85c713e8766e5181da2e393984", - "file_size_bytes": 6697496 - }, - { - "name": "Gp0127653_KO_EC GFF file", - "description": "KO_EC GFF file for Gp0127653", - "url": "https://data.microbiomedata.org/data/nmdc:mga079y988/annotation/nmdc_mga079y988_ko_ec.gff", - "md5_checksum": "949e3b137b3a0591ed9de493ee5c530b", - "id": "nmdc:949e3b137b3a0591ed9de493ee5c530b", - "file_size_bytes": 5035400 - }, - { - "name": "gold:Gp0452679_metabat2 bin checkm quality assessment result", - "description": "metabat2 bin checkm quality assessment result for gold:Gp0452679", - "data_object_type": "CheckM Statistics", - "url": "https://data.microbiomedata.org/data/nmdc:mga0fsjy84/MAGs/nmdc_mga0fsjy84_checkm_qa.out", - "md5_checksum": "d41d8cd98f00b204e9800998ecf8427e", - "id": "nmdc:d41d8cd98f00b204e9800998ecf8427e", - "file_size_bytes": 0 - }, - { - "name": "Gp0127653_high-quality and medium-quality bins", - "description": "high-quality and medium-quality bins for Gp0127653", - "data_object_type": "Metagenome Bins", - "url": "https://data.microbiomedata.org/data/nmdc:mga079y988/MAGs/nmdc_mga079y988_hqmq_bin.zip", - "md5_checksum": "1029b97dba32dab780f4267f8224619f", - "id": "nmdc:1029b97dba32dab780f4267f8224619f", - "file_size_bytes": 182 - }, - { - "_id": { - "$oid": "649b003d1ae706d7b5b14ed2" - }, - "description": "Metagenome Contig Coverage Stats for gold:Gp0127653", - "url": "https://data.microbiomedata.org/data/1781_100355/assembly/mapping_stats.txt", - "file_size_bytes": 3628417, - "type": "nmdc:DataObject", - "id": "nmdc:59f6f5bd2480f717a09946125a0cac46", - "name": "mapping_stats.txt", - "data_object_type": "Assembly Coverage Stats" - }, - { - "_id": { - "$oid": "649b003d1ae706d7b5b14ed3" - }, - "description": "Assembled contigs fasta for gold:Gp0127653", - "url": "https://data.microbiomedata.org/data/1781_100355/assembly/assembly_contigs.fna", - "file_size_bytes": 18526584, - "type": "nmdc:DataObject", - "id": "nmdc:c9708409d9e8f45dcc89e688b3482e5e", - "name": "assembly_contigs.fna", - "data_object_type": "Assembly Contigs" - }, - { - "_id": { - "$oid": "649b003d1ae706d7b5b14ed4" - }, - "description": "Assembled scaffold fasta for gold:Gp0127653", - "url": "https://data.microbiomedata.org/data/1781_100355/assembly/assembly_scaffolds.fna", - "file_size_bytes": 18379922, - "type": "nmdc:DataObject", - "id": "nmdc:c76c1fdcd6be23a0d7add5ea3a23f754", - "name": "assembly_scaffolds.fna", - "data_object_type": "Assembly Scaffolds" - }, - { - "_id": { - "$oid": "649b003d1ae706d7b5b14ed9" - }, - "description": "Assembled AGP file for gold:Gp0127653", - "url": "https://data.microbiomedata.org/data/1781_100355/assembly/assembly.agp", - "file_size_bytes": 3159611, - "type": "nmdc:DataObject", - "id": "nmdc:ac8be882728344819f210f42d5ea8577", - "name": "assembly.agp", - "data_object_type": "Assembly AGP" - }, - { - "_id": { - "$oid": "649b003d1ae706d7b5b14edd" - }, - "description": "Metagenome Alignment BAM file for gold:Gp0127653", - "url": "https://data.microbiomedata.org/data/1781_100355/assembly/pairedMapped_sorted.bam", - "file_size_bytes": 1732533897, - "type": "nmdc:DataObject", - "id": "nmdc:fadc083a0534b4961c902c8af8a8ebba", - "name": "pairedMapped_sorted.bam", - "data_object_type": "Assembly Coverage BAM" - }, - { - "_id": { - "$oid": "649b003d1ae706d7b5b15be6" - }, - "id": "nmdc:2d6f886bc9561f305d3b15be14bc192f", - "name": "1781_100355.krona.html", - "description": "Gold:Gp0127653 KRONA plot HTML file", - "url": "https://data.microbiomedata.org/1781_100355/ReadbasedAnalysis/centrifuge/1781_100355.krona.html", - "file_size_bytes": 3385, - "data_object_type": "Centrifuge Krona Plot", - "type": "nmdc:DataObject" - }, - { - "_id": { - "$oid": "649b003d1ae706d7b5b15c08" - }, - "id": "nmdc:bcf6968cd97d5db72fbe6d048a638fd7", - "name": "1781_100355.json", - "description": "Gold:Gp0127653 ReadbasedAnalysis result JSON file", - "url": "https://data.microbiomedata.org/1781_100355/ReadbasedAnalysis/1781_100355.json", - "file_size_bytes": 3385, - "type": "nmdc:DataObject" - }, - { - "_id": { - "$oid": "649b003f1ae706d7b5b16642" - }, - "id": "nmdc:9429c50986c3904bdd48e585bfc74dfd", - "name": "bins.tooShort.fa", - "description": "tooShort (< 3kb) filtered contigs fasta file by metaBat2 for gold:Gp0127653", - "file_size_bytes": 17290021, - "url": "https://data.microbiomedata.org/data/1781_100355/img_MAGs/bins.tooShort.fa", - "type": "nmdc:DataObject" - }, - { - "_id": { - "$oid": "649b003f1ae706d7b5b16647" - }, - "id": "nmdc:eb6bfb2af22e43df303aa691a87889bc", - "name": "bins.unbinned.fa", - "description": "unbinned fasta file from metabat2 for gold:Gp0127653", - "file_size_bytes": 807307, - "url": "https://data.microbiomedata.org/data/1781_100355/img_MAGs/bins.unbinned.fa", - "type": "nmdc:DataObject" - }, - { - "_id": { - "$oid": "649b00401ae706d7b5b16d4d" - }, - "description": "EC TSV File for gold:Gp0127653", - "url": "https://data.microbiomedata.org/1781_100355/img_annotation/Ga0482218_ec.tsv", - "md5_checksum": "05cc9ce5321d6bc909ab63b8cbc59d02", - "file_size_bytes": 3385, - "id": "nmdc:05cc9ce5321d6bc909ab63b8cbc59d02", - "name": "gold:Gp0127653_EC TSV File", - "data_object_type": "Annotation Enzyme Commission", - "type": "nmdc:DataObject" - }, - { - "_id": { - "$oid": "649b00401ae706d7b5b16d4f" - }, - "description": "Protein FAA for gold:Gp0127653", - "url": "https://data.microbiomedata.org/1781_100355/img_annotation/Ga0482218_proteins.faa", - "md5_checksum": "658231efecf9d087ec2a6e9467f4e968", - "file_size_bytes": 3385, - "id": "nmdc:658231efecf9d087ec2a6e9467f4e968", - "name": "gold:Gp0127653_Protein FAA", - "data_object_type": "Annotation Amino Acid FASTA", - "type": "nmdc:DataObject" - }, - { - "_id": { - "$oid": "649b00401ae706d7b5b16d51" - }, - "description": "Functional annotation GFF file for gold:Gp0127653", - "url": "https://data.microbiomedata.org/1781_100355/img_annotation/Ga0482218_functional_annotation.gff", - "md5_checksum": "8d83e502a533b5db8cd3bc943ae8b18b", - "file_size_bytes": 3385, - "id": "nmdc:8d83e502a533b5db8cd3bc943ae8b18b", - "name": "gold:Gp0127653_Functional annotation GFF file", - "data_object_type": "Functional Annotation GFF", - "type": "nmdc:DataObject" - }, - { - "_id": { - "$oid": "649b00401ae706d7b5b16d54" - }, - "description": "KO TSV File for gold:Gp0127653", - "url": "https://data.microbiomedata.org/1781_100355/img_annotation/Ga0482218_ko.tsv", - "md5_checksum": "44f8e708349a1effdff745880f4fdd12", - "file_size_bytes": 3385, - "id": "nmdc:44f8e708349a1effdff745880f4fdd12", - "name": "gold:Gp0127653_KO TSV File", - "data_object_type": "Annotation KEGG Orthology", - "type": "nmdc:DataObject" - }, - { - "_id": { - "$oid": "649b00401ae706d7b5b16d5b" - }, - "description": "Structural annotation GFF file for gold:Gp0127653", - "url": "https://data.microbiomedata.org/1781_100355/img_annotation/Ga0482218_structural_annotation.gff", - "md5_checksum": "511ae319ddff2bdcbc3296d951e42d7e", - "file_size_bytes": 3385, - "id": "nmdc:511ae319ddff2bdcbc3296d951e42d7e", - "name": "gold:Gp0127653_Structural annotation GFF file", - "data_object_type": "Structural Annotation GFF", - "type": "nmdc:DataObject" - } - ], - "dissolving_activity_set": [], - "functional_annotation_set": [], - "genome_feature_set": [], - "mags_activity_set": [ - { - "_id": { - "$oid": "649b0052ec087f6bbab34720" - }, - "has_input": [ - "nmdc:0f2b82878f54787c127bf03338d5c605", - "nmdc:ccd5ba8558a92751c59989aa81054e1a", - "nmdc:1e7dac5f12cc086509ff905f7133b15a" - ], - "too_short_contig_num": 48540, - "part_of": [ - "nmdc:mga079y988" - ], - "git_url": "https://github.com/microbiomedata/metaG/releases/tag/0.1", - "has_output": [ - "nmdc:d41d8cd98f00b204e9800998ecf8427e", - "nmdc:1029b97dba32dab780f4267f8224619f" - ], - "was_informed_by": "gold:Gp0127653", - "input_contig_num": 48931, - "id": "nmdc:676183a611b251dc6b8f0b8ca600181b", - "execution_resource": "NERSC-Cori", - "name": "MAGs Analysis Activity for nmdc:mga079y988", - "mags_list": [], - "unbinned_contig_num": 391, - "started_at_time": "2021-10-11T02:23:35Z", - "type": "nmdc:MAGsAnalysisActivity", - "ended_at_time": "2021-11-13T18:52:13+00:00" - } - ], - "material_sample_set": [], - "material_sampling_activity_set": [], - "metabolomics_analysis_activity_set": [], - "metagenome_annotation_activity_set": [ - { - "_id": { - "$oid": "649b005bbf2caae0415ef9be" - }, - "has_input": [ - "nmdc:0f2b82878f54787c127bf03338d5c605" - ], - "part_of": [ - "nmdc:mga079y988" - ], - "git_url": "https://github.com/microbiomedata/metaG/releases/tag/0.1", - "has_output": [ - "nmdc:81f16ca99f73a3314a66e6b24d23376f", - "nmdc:66bb16ef28196379647d319da50426dd", - "nmdc:1e7dac5f12cc086509ff905f7133b15a", - "nmdc:2a7343eb6364d769a1c43aa5c94daee8", - "nmdc:b2cee4d35f68d1f5731bff3af5904fa4", - "nmdc:1d45960b1ba5e27af42c736ec583ecd4", - "nmdc:3dec47a0a04865ecdcd9ed7cbc78eca4", - "nmdc:043322f3cd31d50faf4d4e0ffd1c8427", - "nmdc:6bed0fc7a7be284936c69fc1faac4be6", - "nmdc:052d3fb0080390255df5772f79e5ef2c", - "nmdc:e66a3b85c713e8766e5181da2e393984", - "nmdc:949e3b137b3a0591ed9de493ee5c530b" - ], - "was_informed_by": "gold:Gp0127653", - "id": "nmdc:676183a611b251dc6b8f0b8ca600181b", - "execution_resource": "NERSC-Cori", - "name": "Annotation Activity for nmdc:mga079y988", - "started_at_time": "2021-10-11T02:23:35Z", - "type": "nmdc:MetagenomeAnnotationActivity", - "ended_at_time": "2021-11-13T18:52:13+00:00" - } - ], - "metagenome_assembly_set": [ - { - "_id": { - "$oid": "649b005f2ca5ee4adb139faa" - }, - "has_input": [ - "nmdc:8eec0e9c14abb418b906504d1675ecc5" - ], - "part_of": [ - "nmdc:mga079y988" - ], - "ctg_logsum": 9125.582, - "scaf_logsum": 9156.336, - "gap_pct": 0.00094, - "git_url": "https://github.com/microbiomedata/metaG/releases/tag/0.1", - "has_output": [ - "nmdc:0f2b82878f54787c127bf03338d5c605", - "nmdc:02f8e7222e9e6f45c388a189ca66e1f9", - "nmdc:eea8a4b58ca07019d0050b030be3a3d1", - "nmdc:44b1ad59bd14c3367ac0fa2ca37aa057", - "nmdc:ccd5ba8558a92751c59989aa81054e1a" - ], - "asm_score": 13.921, - "was_informed_by": "gold:Gp0127653", - "ctg_powsum": 1096.518, - "scaf_max": 58655, - "id": "nmdc:676183a611b251dc6b8f0b8ca600181b", - "scaf_powsum": 1101.795, - "execution_resource": "NERSC-Cori", - "contigs": 48932, - "name": "Assembly Activity for nmdc:mga079y988", - "ctg_max": 58655, - "gc_std": 0.10928, - "gc_avg": 0.57867, - "contig_bp": 16963869, - "started_at_time": "2021-10-11T02:23:35Z", - "scaf_bp": 16964029, - "type": "nmdc:MetagenomeAssembly", - "scaffolds": 48925, - "ended_at_time": "2021-11-13T18:52:13+00:00", - "ctg_l50": 309, - "ctg_l90": 281, - "ctg_n50": 19544, - "ctg_n90": 43034, - "scaf_l50": 309, - "scaf_l90": 281, - "scaf_n50": 19539, - "scaf_n90": 43028, - "scaf_l_gt50k": 58655, - "scaf_n_gt50k": 1, - "scaf_pct_gt50k": 0.34576103 - } - ], - "metagenome_sequencing_activity_set": [], - "metaproteomics_analysis_activity_set": [], - "metatranscriptome_activity_set": [], - "nom_analysis_activity_set": [], - "omics_processing_set": [ - { - "_id": { - "$oid": "649b009773e8249959349b64" - }, - "id": "nmdc:omprc-11-p1735e67", - "name": "Riverbed sediment microbial communities from areas with vegetation nearby in Columbia River, Washington, USA - GW-RW S2_0_10", - "description": "Riverbed sediment samples were collected from an area with a lot of surrounding vegetation in a hyporheic zone (subsurface groundwater - surface water mixing zone)", - "has_input": [ - "nmdc:bsm-11-k4wa0808" - ], - "has_output": [ - "jgi:574fde937ded5e3df1ee1428" - ], - "part_of": [ - "nmdc:sty-11-aygzgv51" - ], - "add_date": "2016-01-11", - "mod_date": "2021-06-15", - "ncbi_project_name": "Riverbed sediment microbial communities from areas with vegetation nearby in Columbia River, Washington, USA - GW-RW S2_0_10", - "omics_type": { - "has_raw_value": "Metagenome" - }, - "principal_investigator": { - "has_raw_value": "James Stegen" - }, - "processing_institution": "JGI", - "type": "nmdc:OmicsProcessing", - "gold_sequencing_project_identifiers": [ - "gold:Gp0127653" - ] - } - ], - "reaction_activity_set": [], - "read_qc_analysis_activity_set": [ - { - "_id": { - "$oid": "649b009d6bdd4fd20273c878" - }, - "has_input": [ - "nmdc:84ffabc3fbd7e759cd2352ec513b89a0" - ], - "part_of": [ - "nmdc:mga079y988" - ], - "git_url": "https://github.com/microbiomedata/metaG/releases/tag/0.1", - "has_output": [ - "nmdc:8eec0e9c14abb418b906504d1675ecc5", - "nmdc:5d07358bbc48f25e157ffc91ea7ae3e0" - ], - "was_informed_by": "gold:Gp0127653", - "input_read_count": 20780788, - "output_read_bases": 2918466866, - "id": "nmdc:676183a611b251dc6b8f0b8ca600181b", - "execution_resource": "NERSC-Cori", - "input_read_bases": 3137898988, - "name": "Read QC Activity for nmdc:mga079y988", - "output_read_count": 19516330, - "started_at_time": "2021-10-11T02:23:35Z", - "type": "nmdc:ReadQCAnalysisActivity", - "ended_at_time": "2021-11-13T18:52:13+00:00" - } - ], - "read_based_taxonomy_analysis_activity_set": [ - { - "_id": { - "$oid": "649b009bff710ae353f8cf3d" - }, - "has_input": [ - "nmdc:8eec0e9c14abb418b906504d1675ecc5" - ], - "git_url": "https://github.com/microbiomedata/metaG/releases/tag/0.1", - "has_output": [ - "nmdc:fbbad3e21108a372e3d53c9ee8fc3cd5", - "nmdc:dbf03e26f7e1529762830161fe1f1906", - "nmdc:284ce1b28b8964cb525025d678277dba", - "nmdc:a379527f61806391e42b3512146013a8", - "nmdc:3659ac6c99dea0fb1385c58eac8b1335", - "nmdc:3219058371bf2f8081b2dd2b434ec145", - "nmdc:be29ebcd7358653afec7381f9ca43431", - "nmdc:a9e6ab6db23ddce02317e3e21ea3f618", - "nmdc:4c1aae1a46e51359f9146e48fff0e7f0" - ], - "was_informed_by": "gold:Gp0127653", - "id": "nmdc:676183a611b251dc6b8f0b8ca600181b", - "execution_resource": "NERSC-Cori", - "name": "ReadBased Analysis Activity for nmdc:mga079y988", - "started_at_time": "2021-10-11T02:23:35Z", - "type": "nmdc:ReadbasedAnalysis", - "ended_at_time": "2021-11-13T18:52:13+00:00" - } - ], - "study_set": [], - "field_research_site_set": [], - "collecting_biosamples_from_site_set": [], - "date_created": null, - "etl_software_version": null, - "pooling_set": [] - } -] \ No newline at end of file From 1a1a8ab680e2eac4d1d9ab1250285cf65d5ca26a Mon Sep 17 00:00:00 2001 From: Michael Thornton Date: Wed, 15 Nov 2023 09:55:24 -0800 Subject: [PATCH 60/91] updated script and output --- ...ty-11-aygzgv51_associated_record_dump.json | 39520 ++++++++++++++++ .../re_iding/scripts/re_id_tool.py | 19 +- 2 files changed, 39532 insertions(+), 7 deletions(-) create mode 100644 nmdc_automation/re_iding/scripts/data/nmdc:sty-11-aygzgv51_associated_record_dump.json diff --git a/nmdc_automation/re_iding/scripts/data/nmdc:sty-11-aygzgv51_associated_record_dump.json b/nmdc_automation/re_iding/scripts/data/nmdc:sty-11-aygzgv51_associated_record_dump.json new file mode 100644 index 00000000..935367d9 --- /dev/null +++ b/nmdc_automation/re_iding/scripts/data/nmdc:sty-11-aygzgv51_associated_record_dump.json @@ -0,0 +1,39520 @@ +[ + { + "data_object_set": [ + { + "description": "Raw sequencer read data", + "file_size_bytes": 2861414297, + "type": "nmdc:DataObject", + "id": "jgi:55d740280d8785342fcf7e39", + "name": "9422.8.132674.GTTTCG.fastq.gz" + }, + { + "name": "Gp0115663_Filtered Reads", + "description": "Filtered Reads for Gp0115663", + "data_object_type": "Filtered Sequencing Reads", + "url": "https://data.microbiomedata.org/data/nmdc:mga0h9dt75/qa/nmdc_mga0h9dt75_filtered.fastq.gz", + "md5_checksum": "7bf778baef033d36f118f8591256d6ef", + "id": "nmdc:7bf778baef033d36f118f8591256d6ef", + "file_size_bytes": 2571324879 + }, + { + "name": "Gp0115663_Filtered Stats", + "description": "Filtered Stats for Gp0115663", + "data_object_type": "QC Statistics", + "url": "https://data.microbiomedata.org/data/nmdc:mga0h9dt75/qa/nmdc_mga0h9dt75_filterStats.txt", + "md5_checksum": "b99ce8adc125c95f0bfdadf36a3f6848", + "id": "nmdc:b99ce8adc125c95f0bfdadf36a3f6848", + "file_size_bytes": 290 + }, + { + "name": "Gp0115663_Gottcha2 TSV report", + "description": "Gottcha2 TSV report for Gp0115663", + "url": "https://data.microbiomedata.org/data/nmdc:mga0h9dt75/ReadbasedAnalysis/nmdc_mga0h9dt75_gottcha2_report.tsv", + "md5_checksum": "bc7c1bda004aab357c8f6cf5a42242f9", + "id": "nmdc:bc7c1bda004aab357c8f6cf5a42242f9", + "file_size_bytes": 13174 + }, + { + "name": "Gp0115663_Gottcha2 full TSV report", + "description": "Gottcha2 full TSV report for Gp0115663", + "url": "https://data.microbiomedata.org/data/nmdc:mga0h9dt75/ReadbasedAnalysis/nmdc_mga0h9dt75_gottcha2_report_full.tsv", + "md5_checksum": "9481434cadd0d6c154e2ec4c11ef0e04", + "id": "nmdc:9481434cadd0d6c154e2ec4c11ef0e04", + "file_size_bytes": 1035818 + }, + { + "name": "Gp0115663_Gottcha2 Krona HTML report", + "description": "Gottcha2 Krona HTML report for Gp0115663", + "data_object_type": "GOTTCHA2 Krona Plot", + "url": "https://data.microbiomedata.org/data/nmdc:mga0h9dt75/ReadbasedAnalysis/nmdc_mga0h9dt75_gottcha2_krona.html", + "md5_checksum": "6b5bc6ce7f11c1336a5f85a98fc18541", + "id": "nmdc:6b5bc6ce7f11c1336a5f85a98fc18541", + "file_size_bytes": 262669 + }, + { + "name": "Gp0115663_Centrifuge classification TSV report", + "description": "Centrifuge classification TSV report for Gp0115663", + "data_object_type": "Centrifuge Taxonomic Classification", + "url": "https://data.microbiomedata.org/data/nmdc:mga0h9dt75/ReadbasedAnalysis/nmdc_mga0h9dt75_centrifuge_classification.tsv", + "md5_checksum": "933c71bbc2f4a2e84d50f0d3864cf940", + "id": "nmdc:933c71bbc2f4a2e84d50f0d3864cf940", + "file_size_bytes": 2189843623 + }, + { + "name": "Gp0115663_Centrifuge TSV report", + "description": "Centrifuge TSV report for Gp0115663", + "data_object_type": "Centrifuge Classification Report", + "url": "https://data.microbiomedata.org/data/nmdc:mga0h9dt75/ReadbasedAnalysis/nmdc_mga0h9dt75_centrifuge_report.tsv", + "md5_checksum": "1a208e2519770ef50740ac39f1b9ba9a", + "id": "nmdc:1a208e2519770ef50740ac39f1b9ba9a", + "file_size_bytes": 260134 + }, + { + "name": "Gp0115663_Centrifuge Krona HTML report", + "description": "Centrifuge Krona HTML report for Gp0115663", + "data_object_type": "Centrifuge Krona Plot", + "url": "https://data.microbiomedata.org/data/nmdc:mga0h9dt75/ReadbasedAnalysis/nmdc_mga0h9dt75_centrifuge_krona.html", + "md5_checksum": "f112a3840464ae7a9cf4a3bf295edd5c", + "id": "nmdc:f112a3840464ae7a9cf4a3bf295edd5c", + "file_size_bytes": 2343980 + }, + { + "name": "Gp0115663_Kraken classification TSV report", + "description": "Kraken classification TSV report for Gp0115663", + "data_object_type": "Kraken2 Taxonomic Classification", + "url": "https://data.microbiomedata.org/data/nmdc:mga0h9dt75/ReadbasedAnalysis/nmdc_mga0h9dt75_kraken2_classification.tsv", + "md5_checksum": "7ca01ea379f0baed96f87d1435925f95", + "id": "nmdc:7ca01ea379f0baed96f87d1435925f95", + "file_size_bytes": 1785563917 + }, + { + "name": "Gp0115663_Kraken2 TSV report", + "description": "Kraken2 TSV report for Gp0115663", + "data_object_type": "Kraken2 Classification Report", + "url": "https://data.microbiomedata.org/data/nmdc:mga0h9dt75/ReadbasedAnalysis/nmdc_mga0h9dt75_kraken2_report.tsv", + "md5_checksum": "c85f2f2b4a518c4adb23970448a5cb45", + "id": "nmdc:c85f2f2b4a518c4adb23970448a5cb45", + "file_size_bytes": 699896 + }, + { + "name": "Gp0115663_Kraken2 Krona HTML report", + "description": "Kraken2 Krona HTML report for Gp0115663", + "data_object_type": "Kraken2 Krona Plot", + "url": "https://data.microbiomedata.org/data/nmdc:mga0h9dt75/ReadbasedAnalysis/nmdc_mga0h9dt75_kraken2_krona.html", + "md5_checksum": "94ee1bc2dc74830a21d5c3471d6cf223", + "id": "nmdc:94ee1bc2dc74830a21d5c3471d6cf223", + "file_size_bytes": 4221977 + }, + { + "name": "Gp0115663_Assembled contigs fasta", + "description": "Assembled contigs fasta for Gp0115663", + "data_object_type": "Assembly Contigs", + "url": "https://data.microbiomedata.org/data/nmdc:mga0h9dt75/assembly/nmdc_mga0h9dt75_contigs.fna", + "md5_checksum": "deddd162bf0128fba13b3bc1ca38d1aa", + "id": "nmdc:deddd162bf0128fba13b3bc1ca38d1aa", + "file_size_bytes": 90115831 + }, + { + "name": "Gp0115663_Assembled scaffold fasta", + "description": "Assembled scaffold fasta for Gp0115663", + "data_object_type": "Assembly Scaffolds", + "url": "https://data.microbiomedata.org/data/nmdc:mga0h9dt75/assembly/nmdc_mga0h9dt75_scaffolds.fna", + "md5_checksum": "b3573e3cda5a06611de71ca04c5c14cc", + "id": "nmdc:b3573e3cda5a06611de71ca04c5c14cc", + "file_size_bytes": 89604715 + }, + { + "name": "Gp0115663_Metagenome Contig Coverage Stats", + "description": "Metagenome Contig Coverage Stats for Gp0115663", + "url": "https://data.microbiomedata.org/data/nmdc:mga0h9dt75/assembly/nmdc_mga0h9dt75_covstats.txt", + "md5_checksum": "c6d0d4cea985ca6fb50a060e15b4a856", + "id": "nmdc:c6d0d4cea985ca6fb50a060e15b4a856", + "file_size_bytes": 13412363 + }, + { + "name": "Gp0115663_Assembled AGP file", + "description": "Assembled AGP file for Gp0115663", + "data_object_type": "Assembly AGP", + "url": "https://data.microbiomedata.org/data/nmdc:mga0h9dt75/assembly/nmdc_mga0h9dt75_assembly.agp", + "md5_checksum": "f450e3800e17691d5874c89fc46c186a", + "id": "nmdc:f450e3800e17691d5874c89fc46c186a", + "file_size_bytes": 12542171 + }, + { + "name": "Gp0115663_Metagenome Alignment BAM file", + "description": "Metagenome Alignment BAM file for Gp0115663", + "data_object_type": "Assembly Coverage BAM", + "url": "https://data.microbiomedata.org/data/nmdc:mga0h9dt75/assembly/nmdc_mga0h9dt75_pairedMapped_sorted.bam", + "md5_checksum": "31dc958d116d02122509e90b0883954f", + "id": "nmdc:31dc958d116d02122509e90b0883954f", + "file_size_bytes": 2773429299 + }, + { + "name": "Gp0115663_Protein FAA", + "description": "Protein FAA for Gp0115663", + "data_object_type": "Annotation Amino Acid FASTA", + "url": "https://data.microbiomedata.org/data/nmdc:mga0h9dt75/annotation/nmdc_mga0h9dt75_proteins.faa", + "md5_checksum": "879988d212ecec46928b8598e2f8391f", + "id": "nmdc:879988d212ecec46928b8598e2f8391f", + "file_size_bytes": 50165060 + }, + { + "name": "Gp0115663_Structural annotation GFF file", + "description": "Structural annotation GFF file for Gp0115663", + "data_object_type": "Structural Annotation GFF", + "url": "https://data.microbiomedata.org/data/nmdc:mga0h9dt75/annotation/nmdc_mga0h9dt75_structural_annotation.gff", + "md5_checksum": "884b95102f5965cc0ee2d9b7f198e5a4", + "id": "nmdc:884b95102f5965cc0ee2d9b7f198e5a4", + "file_size_bytes": 2767 + }, + { + "name": "Gp0115663_Functional annotation GFF file", + "description": "Functional annotation GFF file for Gp0115663", + "data_object_type": "Functional Annotation GFF", + "url": "https://data.microbiomedata.org/data/nmdc:mga0h9dt75/annotation/nmdc_mga0h9dt75_functional_annotation.gff", + "md5_checksum": "002e4ebc728f8b91cb5f298d340ab013", + "id": "nmdc:002e4ebc728f8b91cb5f298d340ab013", + "file_size_bytes": 55139586 + }, + { + "name": "Gp0115663_KO TSV file", + "description": "KO TSV file for Gp0115663", + "data_object_type": "Annotation KEGG Orthology", + "url": "https://data.microbiomedata.org/data/nmdc:mga0h9dt75/annotation/nmdc_mga0h9dt75_ko.tsv", + "md5_checksum": "6851078f29716d89e3f41f0969ae7bf0", + "id": "nmdc:6851078f29716d89e3f41f0969ae7bf0", + "file_size_bytes": 6023696 + }, + { + "name": "Gp0115663_EC TSV file", + "description": "EC TSV file for Gp0115663", + "data_object_type": "Annotation Enzyme Commission", + "url": "https://data.microbiomedata.org/data/nmdc:mga0h9dt75/annotation/nmdc_mga0h9dt75_ec.tsv", + "md5_checksum": "4f88c89459f36655eb7c1eceec19602a", + "id": "nmdc:4f88c89459f36655eb7c1eceec19602a", + "file_size_bytes": 3982918 + }, + { + "name": "Gp0115663_COG GFF file", + "description": "COG GFF file for Gp0115663", + "url": "https://data.microbiomedata.org/data/nmdc:mga0h9dt75/annotation/nmdc_mga0h9dt75_cog.gff", + "md5_checksum": "a068b9ce6ebb7deb15ff932b513817a9", + "id": "nmdc:a068b9ce6ebb7deb15ff932b513817a9", + "file_size_bytes": 27362917 + }, + { + "name": "Gp0115663_PFAM GFF file", + "description": "PFAM GFF file for Gp0115663", + "url": "https://data.microbiomedata.org/data/nmdc:mga0h9dt75/annotation/nmdc_mga0h9dt75_pfam.gff", + "md5_checksum": "618b18fa8635c80cc0091371f451a6f0", + "id": "nmdc:618b18fa8635c80cc0091371f451a6f0", + "file_size_bytes": 21572048 + }, + { + "name": "Gp0115663_TigrFam GFF file", + "description": "TigrFam GFF file for Gp0115663", + "url": "https://data.microbiomedata.org/data/nmdc:mga0h9dt75/annotation/nmdc_mga0h9dt75_tigrfam.gff", + "md5_checksum": "17e55a1a1a133ffbf8cbe4024d997a6f", + "id": "nmdc:17e55a1a1a133ffbf8cbe4024d997a6f", + "file_size_bytes": 2900068 + }, + { + "name": "Gp0115663_SMART GFF file", + "description": "SMART GFF file for Gp0115663", + "url": "https://data.microbiomedata.org/data/nmdc:mga0h9dt75/annotation/nmdc_mga0h9dt75_smart.gff", + "md5_checksum": "8f80142c0f5723af5a3b44b7ff4e4339", + "id": "nmdc:8f80142c0f5723af5a3b44b7ff4e4339", + "file_size_bytes": 6905519 + }, + { + "name": "Gp0115663_SuperFam GFF file", + "description": "SuperFam GFF file for Gp0115663", + "url": "https://data.microbiomedata.org/data/nmdc:mga0h9dt75/annotation/nmdc_mga0h9dt75_supfam.gff", + "md5_checksum": "fdd2e8741ffef40db383674a10bb4d11", + "id": "nmdc:fdd2e8741ffef40db383674a10bb4d11", + "file_size_bytes": 38787856 + }, + { + "name": "Gp0115663_Cath FunFam GFF file", + "description": "Cath FunFam GFF file for Gp0115663", + "url": "https://data.microbiomedata.org/data/nmdc:mga0h9dt75/annotation/nmdc_mga0h9dt75_cath_funfam.gff", + "md5_checksum": "8eb49ac20a6c2721d6db227f4fb3356a", + "id": "nmdc:8eb49ac20a6c2721d6db227f4fb3356a", + "file_size_bytes": 30134783 + }, + { + "name": "Gp0115663_KO_EC GFF file", + "description": "KO_EC GFF file for Gp0115663", + "url": "https://data.microbiomedata.org/data/nmdc:mga0h9dt75/annotation/nmdc_mga0h9dt75_ko_ec.gff", + "md5_checksum": "75f481e0d98793cfb4f9508cb3e31622", + "id": "nmdc:75f481e0d98793cfb4f9508cb3e31622", + "file_size_bytes": 19194308 + }, + { + "name": "gold:Gp0452679_metabat2 bin checkm quality assessment result", + "description": "metabat2 bin checkm quality assessment result for gold:Gp0452679", + "data_object_type": "CheckM Statistics", + "url": "https://data.microbiomedata.org/data/nmdc:mga0fsjy84/MAGs/nmdc_mga0fsjy84_checkm_qa.out", + "md5_checksum": "d41d8cd98f00b204e9800998ecf8427e", + "id": "nmdc:d41d8cd98f00b204e9800998ecf8427e", + "file_size_bytes": 0 + }, + { + "name": "Gp0115663_tooShort (< 3kb) filtered contigs fasta file by metaBat2", + "description": "tooShort (< 3kb) filtered contigs fasta file by metaBat2 for Gp0115663", + "url": "https://data.microbiomedata.org/data/nmdc:mga0h9dt75/MAGs/nmdc_mga0h9dt75_bins.tooShort.fa", + "md5_checksum": "c092b018cb4652c4ca0620b37a4b3fad", + "id": "nmdc:c092b018cb4652c4ca0620b37a4b3fad", + "file_size_bytes": 70411007 + }, + { + "name": "Gp0115663_unbinned fasta file from metabat2", + "description": "unbinned fasta file from metabat2 for Gp0115663", + "url": "https://data.microbiomedata.org/data/nmdc:mga0h9dt75/MAGs/nmdc_mga0h9dt75_bins.unbinned.fa", + "md5_checksum": "70d7c8a307f47adb05056bee1b01f9d4", + "id": "nmdc:70d7c8a307f47adb05056bee1b01f9d4", + "file_size_bytes": 15998690 + }, + { + "name": "Gp0115663_metabat2 bin checkm quality assessment result", + "description": "metabat2 bin checkm quality assessment result for Gp0115663", + "data_object_type": "CheckM Statistics", + "url": "https://data.microbiomedata.org/data/nmdc:mga0h9dt75/MAGs/nmdc_mga0h9dt75_checkm_qa.out", + "md5_checksum": "4545ab2039ae70f4439a93316f4fb7bc", + "id": "nmdc:4545ab2039ae70f4439a93316f4fb7bc", + "file_size_bytes": 1530 + }, + { + "name": "Gp0115663_high-quality and medium-quality bins", + "description": "high-quality and medium-quality bins for Gp0115663", + "data_object_type": "Metagenome Bins", + "url": "https://data.microbiomedata.org/data/nmdc:mga0h9dt75/MAGs/nmdc_mga0h9dt75_hqmq_bin.zip", + "md5_checksum": "280b63ae1cc1fa8d6154a0681d47c399", + "id": "nmdc:280b63ae1cc1fa8d6154a0681d47c399", + "file_size_bytes": 182 + }, + { + "name": "Gp0115663_metabat2 bins", + "description": "metabat2 bins for Gp0115663", + "url": "https://data.microbiomedata.org/data/nmdc:mga0h9dt75/MAGs/nmdc_mga0h9dt75_metabat_bin.zip", + "md5_checksum": "27c07072f175571200b5931550adb8aa", + "id": "nmdc:27c07072f175571200b5931550adb8aa", + "file_size_bytes": 1114314 + }, + { + "_id": { + "$oid": "649b003c1ae706d7b5b14d5a" + }, + "description": "Assembled scaffold fasta for gold:Gp0115663", + "url": "https://data.microbiomedata.org/data/1781_86101/assembly/assembly_scaffolds.fna", + "file_size_bytes": 88756490, + "type": "nmdc:DataObject", + "id": "nmdc:321a497bc1c3cf25affc8e659b746ba5", + "name": "assembly_scaffolds.fna", + "data_object_type": "Assembly Scaffolds" + }, + { + "_id": { + "$oid": "649b003c1ae706d7b5b14d5d" + }, + "description": "Metagenome Contig Coverage Stats for gold:Gp0115663", + "url": "https://data.microbiomedata.org/data/1781_86101/assembly/mapping_stats.txt", + "file_size_bytes": 12563453, + "type": "nmdc:DataObject", + "id": "nmdc:ad47215b9b079c1d94a8fc56385dee36", + "name": "mapping_stats.txt", + "data_object_type": "Assembly Coverage Stats" + }, + { + "_id": { + "$oid": "649b003c1ae706d7b5b14d5f" + }, + "description": "Assembled contigs fasta for gold:Gp0115663", + "url": "https://data.microbiomedata.org/data/1781_86101/assembly/assembly_contigs.fna", + "file_size_bytes": 89266921, + "type": "nmdc:DataObject", + "id": "nmdc:0a3d00715d01ad7b8f3aee59b674dfe9", + "name": "assembly_contigs.fna", + "data_object_type": "Assembly Contigs" + }, + { + "_id": { + "$oid": "649b003c1ae706d7b5b14d61" + }, + "description": "Assembled AGP file for gold:Gp0115663", + "url": "https://data.microbiomedata.org/data/1781_86101/assembly/assembly.agp", + "file_size_bytes": 10842941, + "type": "nmdc:DataObject", + "id": "nmdc:bc01f0f507c9dac65d8a8e40e41a8c48", + "name": "assembly.agp", + "data_object_type": "Assembly AGP" + }, + { + "_id": { + "$oid": "649b003c1ae706d7b5b14d63" + }, + "description": "Metagenome Alignment BAM file for gold:Gp0115663", + "url": "https://data.microbiomedata.org/data/1781_86101/assembly/pairedMapped_sorted.bam", + "file_size_bytes": 2729039400, + "type": "nmdc:DataObject", + "id": "nmdc:668d207be5ea844f988fbfb2813564cc", + "name": "pairedMapped_sorted.bam", + "data_object_type": "Assembly Coverage BAM" + }, + { + "_id": { + "$oid": "649b003d1ae706d7b5b15979" + }, + "id": "nmdc:9509adb5a013006dfda9754429cfc968", + "name": "1781_86101.krona.html", + "description": "Gold:Gp0115663 KRONA plot HTML file", + "url": "https://data.microbiomedata.org/1781_86101/ReadbasedAnalysis/centrifuge/1781_86101.krona.html", + "file_size_bytes": 3385, + "data_object_type": "Centrifuge Krona Plot", + "type": "nmdc:DataObject" + }, + { + "_id": { + "$oid": "649b003d1ae706d7b5b15980" + }, + "id": "nmdc:6dea4c58f402b5c3935e8f1a545bec47", + "name": "1781_86101.json", + "description": "Gold:Gp0115663 ReadbasedAnalysis result JSON file", + "url": "https://data.microbiomedata.org/1781_86101/ReadbasedAnalysis/1781_86101.json", + "file_size_bytes": 3385, + "type": "nmdc:DataObject" + }, + { + "_id": { + "$oid": "649b003f1ae706d7b5b16253" + }, + "id": "nmdc:0c4c875e5b10c6b742c14c22e2926751", + "name": "bins.tooShort.fa", + "description": "tooShort (< 3kb) filtered contigs fasta file by metaBat2 for gold:Gp0115663", + "file_size_bytes": 68423774, + "url": "https://data.microbiomedata.org/data/1781_86101/img_MAGs/bins.tooShort.fa", + "type": "nmdc:DataObject" + }, + { + "_id": { + "$oid": "649b003f1ae706d7b5b16254" + }, + "id": "nmdc:c55d6b00aa5d4af8cd46d349e17d4127", + "name": "bins.unbinned.fa", + "description": "unbinned fasta file from metabat2 for gold:Gp0115663", + "file_size_bytes": 16857267, + "url": "https://data.microbiomedata.org/data/1781_86101/img_MAGs/bins.unbinned.fa", + "type": "nmdc:DataObject" + }, + { + "_id": { + "$oid": "649b003f1ae706d7b5b16256" + }, + "id": "nmdc:1346fe25b6ff22180eb3a51204e0b1fc", + "name": "gold:Gp0115663.bins.1.fa", + "description": "metabat2 binned contig file for gold:Gp0115663", + "file_size_bytes": 224772, + "url": "https://data.microbiomedata.org/data/1781_86101/img_MAGs/metabat-bins/bins.1.fa", + "type": "nmdc:DataObject", + "data_object_type": "Metagenome Bins" + }, + { + "_id": { + "$oid": "649b003f1ae706d7b5b16258" + }, + "id": "nmdc:818f5a47d1371295f9313909ea12eb50", + "name": "checkm_qa.out", + "description": "metabat2 bin checkm quality assessment result for gold:Gp0115663", + "file_size_bytes": 1141, + "url": "https://data.microbiomedata.org/data/1781_86101/img_MAGs/checkm_qa.out", + "type": "nmdc:DataObject", + "data_object_type": "CheckM Statistics" + }, + { + "_id": { + "$oid": "649b003f1ae706d7b5b16259" + }, + "id": "nmdc:a755bb87aded36aefbd8022506a793c7", + "name": "gold:Gp0115663.bins.2.fa", + "description": "metabat2 binned contig file for gold:Gp0115663", + "file_size_bytes": 2225340, + "url": "https://data.microbiomedata.org/data/1781_86101/img_MAGs/metabat-bins/bins.2.fa", + "type": "nmdc:DataObject", + "data_object_type": "Metagenome Bins" + }, + { + "_id": { + "$oid": "649b003f1ae706d7b5b1625a" + }, + "id": "nmdc:e0b7421514f976cb7ad8c343cf3077a9", + "name": "gold:Gp0115663.bins.3.fa", + "description": "metabat2 binned contig file for gold:Gp0115663", + "file_size_bytes": 288873, + "url": "https://data.microbiomedata.org/data/1781_86101/img_MAGs/metabat-bins/bins.3.fa", + "type": "nmdc:DataObject", + "data_object_type": "Metagenome Bins" + }, + { + "_id": { + "$oid": "649b00401ae706d7b5b16d91" + }, + "description": "KO TSV File for gold:Gp0115663", + "url": "https://data.microbiomedata.org/1781_86101/img_annotation/Ga0482264_ko.tsv", + "md5_checksum": "8d250650c90956edff8bafccc56fd630", + "file_size_bytes": 3385, + "id": "nmdc:8d250650c90956edff8bafccc56fd630", + "name": "gold:Gp0115663_KO TSV File", + "data_object_type": "Annotation KEGG Orthology", + "type": "nmdc:DataObject" + }, + { + "_id": { + "$oid": "649b00401ae706d7b5b16d94" + }, + "description": "Functional annotation GFF file for gold:Gp0115663", + "url": "https://data.microbiomedata.org/1781_86101/img_annotation/Ga0482264_functional_annotation.gff", + "md5_checksum": "b7e9c8d0bffdd13ace6f862a61fa87d2", + "file_size_bytes": 3385, + "id": "nmdc:b7e9c8d0bffdd13ace6f862a61fa87d2", + "name": "gold:Gp0115663_Functional annotation GFF file", + "data_object_type": "Functional Annotation GFF", + "type": "nmdc:DataObject" + }, + { + "_id": { + "$oid": "649b00401ae706d7b5b16d95" + }, + "description": "Protein FAA for gold:Gp0115663", + "url": "https://data.microbiomedata.org/1781_86101/img_annotation/Ga0482264_proteins.faa", + "md5_checksum": "754074d3bcade65aba2a6f8236619ab7", + "file_size_bytes": 3385, + "id": "nmdc:754074d3bcade65aba2a6f8236619ab7", + "name": "gold:Gp0115663_Protein FAA", + "data_object_type": "Annotation Amino Acid FASTA", + "type": "nmdc:DataObject" + }, + { + "_id": { + "$oid": "649b00401ae706d7b5b16d97" + }, + "description": "Structural annotation GFF file for gold:Gp0115663", + "url": "https://data.microbiomedata.org/1781_86101/img_annotation/Ga0482264_structural_annotation.gff", + "md5_checksum": "a4b4c623457aa10161d88a9ac4eef522", + "file_size_bytes": 3385, + "id": "nmdc:a4b4c623457aa10161d88a9ac4eef522", + "name": "gold:Gp0115663_Structural annotation GFF file", + "data_object_type": "Structural Annotation GFF", + "type": "nmdc:DataObject" + }, + { + "_id": { + "$oid": "649b00401ae706d7b5b16da8" + }, + "description": "EC TSV File for gold:Gp0115663", + "url": "https://data.microbiomedata.org/1781_86101/img_annotation/Ga0482264_ec.tsv", + "md5_checksum": "27319f58c616a07159e1fac12635bd4b", + "file_size_bytes": 3385, + "id": "nmdc:27319f58c616a07159e1fac12635bd4b", + "name": "gold:Gp0115663_EC TSV File", + "data_object_type": "Annotation Enzyme Commission", + "type": "nmdc:DataObject" + } + ], + "mags_activity_set": [ + { + "_id": { + "$oid": "649b0052ec087f6bbab34734" + }, + "has_input": [ + "nmdc:deddd162bf0128fba13b3bc1ca38d1aa", + "nmdc:31dc958d116d02122509e90b0883954f", + "nmdc:002e4ebc728f8b91cb5f298d340ab013" + ], + "too_short_contig_num": 159810, + "part_of": [ + "nmdc:mga0h9dt75" + ], + "binned_contig_num": 684, + "git_url": "https://github.com/microbiomedata/metaG/releases/tag/0.1", + "has_output": [ + "nmdc:d41d8cd98f00b204e9800998ecf8427e", + "nmdc:c092b018cb4652c4ca0620b37a4b3fad", + "nmdc:70d7c8a307f47adb05056bee1b01f9d4", + "nmdc:4545ab2039ae70f4439a93316f4fb7bc", + "nmdc:280b63ae1cc1fa8d6154a0681d47c399", + "nmdc:27c07072f175571200b5931550adb8aa" + ], + "was_informed_by": "gold:Gp0115663", + "input_contig_num": 169782, + "id": "nmdc:b31abf9d7fe53e2f802bb53e2d13542b", + "execution_resource": "NERSC-Cori", + "name": "MAGs Analysis Activity for nmdc:mga0h9dt75", + "mags_list": [ + { + "number_of_contig": 61, + "completeness": 13.82, + "bin_name": "bins.1", + "gene_count": 294, + "bin_quality": "LQ", + "gtdbtk_species": "", + "gtdbtk_order": "", + "num_16s": 1, + "gtdbtk_family": "", + "gtdbtk_domain": "", + "contamination": 0.62, + "gtdbtk_class": "", + "gtdbtk_phylum": "", + "num_5s": 1, + "num_23s": 0, + "gtdbtk_genus": "", + "num_t_rna": 0 + }, + { + "number_of_contig": 485, + "completeness": 66.03, + "bin_name": "bins.2", + "gene_count": 2871, + "bin_quality": "LQ", + "gtdbtk_species": "", + "gtdbtk_order": "", + "num_16s": 0, + "gtdbtk_family": "", + "gtdbtk_domain": "", + "contamination": 10.87, + "gtdbtk_class": "", + "gtdbtk_phylum": "", + "num_5s": 1, + "num_23s": 0, + "gtdbtk_genus": "", + "num_t_rna": 32 + }, + { + "number_of_contig": 56, + "completeness": 34.23, + "bin_name": "bins.3", + "gene_count": 337, + "bin_quality": "LQ", + "gtdbtk_species": "", + "gtdbtk_order": "", + "num_16s": 0, + "gtdbtk_family": "", + "gtdbtk_domain": "", + "contamination": 0.0, + "gtdbtk_class": "", + "gtdbtk_phylum": "", + "num_5s": 0, + "num_23s": 0, + "gtdbtk_genus": "", + "num_t_rna": 9 + }, + { + "number_of_contig": 63, + "completeness": 6.9, + "bin_name": "bins.4", + "gene_count": 276, + "bin_quality": "LQ", + "gtdbtk_species": "", + "gtdbtk_order": "", + "num_16s": 0, + "gtdbtk_family": "", + "gtdbtk_domain": "", + "contamination": 0.0, + "gtdbtk_class": "", + "gtdbtk_phylum": "", + "num_5s": 0, + "num_23s": 0, + "gtdbtk_genus": "", + "num_t_rna": 6 + }, + { + "number_of_contig": 19, + "completeness": 4.45, + "bin_name": "bins.5", + "gene_count": 463, + "bin_quality": "LQ", + "gtdbtk_species": "", + "gtdbtk_order": "", + "num_16s": 0, + "gtdbtk_family": "", + "gtdbtk_domain": "", + "contamination": 0.0, + "gtdbtk_class": "", + "gtdbtk_phylum": "", + "num_5s": 0, + "num_23s": 0, + "gtdbtk_genus": "", + "num_t_rna": 4 + } + ], + "unbinned_contig_num": 9288, + "started_at_time": "2021-10-11T02:28:26Z", + "type": "nmdc:MAGsAnalysisActivity", + "ended_at_time": "2021-10-11T04:56:04+00:00" + } + ], + "metagenome_annotation_activity_set": [ + { + "_id": { + "$oid": "649b005bbf2caae0415ef9d6" + }, + "has_input": [ + "nmdc:deddd162bf0128fba13b3bc1ca38d1aa" + ], + "part_of": [ + "nmdc:mga0h9dt75" + ], + "git_url": "https://github.com/microbiomedata/metaG/releases/tag/0.1", + "has_output": [ + "nmdc:879988d212ecec46928b8598e2f8391f", + "nmdc:884b95102f5965cc0ee2d9b7f198e5a4", + "nmdc:002e4ebc728f8b91cb5f298d340ab013", + "nmdc:6851078f29716d89e3f41f0969ae7bf0", + "nmdc:4f88c89459f36655eb7c1eceec19602a", + "nmdc:a068b9ce6ebb7deb15ff932b513817a9", + "nmdc:618b18fa8635c80cc0091371f451a6f0", + "nmdc:17e55a1a1a133ffbf8cbe4024d997a6f", + "nmdc:8f80142c0f5723af5a3b44b7ff4e4339", + "nmdc:fdd2e8741ffef40db383674a10bb4d11", + "nmdc:8eb49ac20a6c2721d6db227f4fb3356a", + "nmdc:75f481e0d98793cfb4f9508cb3e31622" + ], + "was_informed_by": "gold:Gp0115663", + "id": "nmdc:b31abf9d7fe53e2f802bb53e2d13542b", + "execution_resource": "NERSC-Cori", + "name": "Annotation Activity for nmdc:mga0h9dt75", + "started_at_time": "2021-10-11T02:28:26Z", + "type": "nmdc:MetagenomeAnnotationActivity", + "ended_at_time": "2021-10-11T04:56:04+00:00" + } + ], + "metagenome_assembly_set": [ + { + "_id": { + "$oid": "649b005f2ca5ee4adb139fb9" + }, + "has_input": [ + "nmdc:7bf778baef033d36f118f8591256d6ef" + ], + "part_of": [ + "nmdc:mga0h9dt75" + ], + "ctg_logsum": 214373, + "scaf_logsum": 215363, + "gap_pct": 0.00188, + "git_url": "https://github.com/microbiomedata/metaG/releases/tag/0.1", + "has_output": [ + "nmdc:deddd162bf0128fba13b3bc1ca38d1aa", + "nmdc:b3573e3cda5a06611de71ca04c5c14cc", + "nmdc:c6d0d4cea985ca6fb50a060e15b4a856", + "nmdc:f450e3800e17691d5874c89fc46c186a", + "nmdc:31dc958d116d02122509e90b0883954f" + ], + "asm_score": 6.577, + "was_informed_by": "gold:Gp0115663", + "ctg_powsum": 24284, + "scaf_max": 68135, + "id": "nmdc:b31abf9d7fe53e2f802bb53e2d13542b", + "scaf_powsum": 24422, + "execution_resource": "NERSC-Cori", + "contigs": 169784, + "name": "Assembly Activity for nmdc:mga0h9dt75", + "ctg_max": 68135, + "gc_std": 0.11726, + "contig_bp": 83494920, + "gc_avg": 0.46001, + "started_at_time": "2021-10-11T02:28:26Z", + "scaf_bp": 83496490, + "type": "nmdc:MetagenomeAssembly", + "scaffolds": 169645, + "ended_at_time": "2021-10-11T04:56:04+00:00", + "ctg_l50": 470, + "ctg_l90": 290, + "ctg_n50": 45584, + "ctg_n90": 141996, + "scaf_l50": 470, + "scaf_l90": 290, + "scaf_n50": 45550, + "scaf_n90": 141870, + "scaf_l_gt50k": 68135, + "scaf_n_gt50k": 1, + "scaf_pct_gt50k": 0.08160224 + } + ], + "omics_processing_set": [ + { + "_id": { + "$oid": "649b009773e8249959349b33" + }, + "id": "nmdc:omprc-11-bn8jcq58", + "name": "Sand microcosm microbial communities from a hyporheic zone in Columbia River, Washington, USA - GW-RW T2_23-Sept-14", + "description": "Sterilized sand packs were incubated back in the ground and collected at time point T2.", + "has_input": [ + "nmdc:bsm-11-qq8s6x03" + ], + "has_output": [ + "jgi:55d740280d8785342fcf7e39" + ], + "part_of": [ + "nmdc:sty-11-aygzgv51" + ], + "add_date": "2015-05-28", + "mod_date": "2021-06-15", + "ncbi_project_name": "Sand microcosm microbial communities from a hyporheic zone in Columbia River, Washington, USA - GW-RW T2_23-Sept-14", + "omics_type": { + "has_raw_value": "Metagenome" + }, + "principal_investigator": { + "has_raw_value": "James Stegen" + }, + "processing_institution": "JGI", + "type": "nmdc:OmicsProcessing", + "gold_sequencing_project_identifiers": [ + "gold:Gp0115663" + ] + } + ], + "read_qc_analysis_activity_set": [ + { + "_id": { + "$oid": "649b009d6bdd4fd20273c88b" + }, + "has_input": [ + "nmdc:30a06664f29cffbbbc49abad86eae6fc" + ], + "part_of": [ + "nmdc:mga0h9dt75" + ], + "git_url": "https://github.com/microbiomedata/metaG/releases/tag/0.1", + "has_output": [ + "nmdc:7bf778baef033d36f118f8591256d6ef", + "nmdc:b99ce8adc125c95f0bfdadf36a3f6848" + ], + "was_informed_by": "gold:Gp0115663", + "input_read_count": 32238374, + "output_read_bases": 4608772924, + "id": "nmdc:b31abf9d7fe53e2f802bb53e2d13542b", + "execution_resource": "NERSC-Cori", + "input_read_bases": 4867994474, + "name": "Read QC Activity for nmdc:mga0h9dt75", + "output_read_count": 30774080, + "started_at_time": "2021-10-11T02:28:26Z", + "type": "nmdc:ReadQCAnalysisActivity", + "ended_at_time": "2021-10-11T04:56:04+00:00" + } + ], + "read_based_taxonomy_analysis_activity_set": [ + { + "_id": { + "$oid": "649b009bff710ae353f8cf4f" + }, + "has_input": [ + "nmdc:7bf778baef033d36f118f8591256d6ef" + ], + "git_url": "https://github.com/microbiomedata/metaG/releases/tag/0.1", + "has_output": [ + "nmdc:bc7c1bda004aab357c8f6cf5a42242f9", + "nmdc:9481434cadd0d6c154e2ec4c11ef0e04", + "nmdc:6b5bc6ce7f11c1336a5f85a98fc18541", + "nmdc:933c71bbc2f4a2e84d50f0d3864cf940", + "nmdc:1a208e2519770ef50740ac39f1b9ba9a", + "nmdc:f112a3840464ae7a9cf4a3bf295edd5c", + "nmdc:7ca01ea379f0baed96f87d1435925f95", + "nmdc:c85f2f2b4a518c4adb23970448a5cb45", + "nmdc:94ee1bc2dc74830a21d5c3471d6cf223" + ], + "was_informed_by": "gold:Gp0115663", + "id": "nmdc:b31abf9d7fe53e2f802bb53e2d13542b", + "execution_resource": "NERSC-Cori", + "name": "ReadBased Analysis Activity for nmdc:mga0h9dt75", + "started_at_time": "2021-10-11T02:28:26Z", + "type": "nmdc:ReadbasedAnalysis", + "ended_at_time": "2021-10-11T04:56:04+00:00" + } + ] + }, + { + "data_object_set": [ + { + "description": "Raw sequencer read data", + "file_size_bytes": 2080914094, + "type": "nmdc:DataObject", + "id": "jgi:55d817fc0d8785342fcf8274", + "name": "9387.2.132031.CCGTCC.fastq.gz" + }, + { + "name": "Gp0115666_Filtered Reads", + "description": "Filtered Reads for Gp0115666", + "data_object_type": "Filtered Sequencing Reads", + "url": "https://data.microbiomedata.org/data/nmdc:mga0eehe16/qa/nmdc_mga0eehe16_filtered.fastq.gz", + "md5_checksum": "0b301d2dd917c2be31422dd0e986dd5e", + "id": "nmdc:0b301d2dd917c2be31422dd0e986dd5e", + "file_size_bytes": 1806510860 + }, + { + "name": "Gp0115666_Filtered Stats", + "description": "Filtered Stats for Gp0115666", + "data_object_type": "QC Statistics", + "url": "https://data.microbiomedata.org/data/nmdc:mga0eehe16/qa/nmdc_mga0eehe16_filterStats.txt", + "md5_checksum": "0634e8261ce976d167457993d7f7a4ec", + "id": "nmdc:0634e8261ce976d167457993d7f7a4ec", + "file_size_bytes": 289 + }, + { + "name": "Gp0115666_Gottcha2 TSV report", + "description": "Gottcha2 TSV report for Gp0115666", + "url": "https://data.microbiomedata.org/data/nmdc:mga0eehe16/ReadbasedAnalysis/nmdc_mga0eehe16_gottcha2_report.tsv", + "md5_checksum": "17454627f873cc37e80700c4751c81d6", + "id": "nmdc:17454627f873cc37e80700c4751c81d6", + "file_size_bytes": 10721 + }, + { + "name": "Gp0115666_Gottcha2 full TSV report", + "description": "Gottcha2 full TSV report for Gp0115666", + "url": "https://data.microbiomedata.org/data/nmdc:mga0eehe16/ReadbasedAnalysis/nmdc_mga0eehe16_gottcha2_report_full.tsv", + "md5_checksum": "e0479eb7fd3345aaf134640e0b9e11b0", + "id": "nmdc:e0479eb7fd3345aaf134640e0b9e11b0", + "file_size_bytes": 920924 + }, + { + "name": "Gp0115666_Gottcha2 Krona HTML report", + "description": "Gottcha2 Krona HTML report for Gp0115666", + "data_object_type": "GOTTCHA2 Krona Plot", + "url": "https://data.microbiomedata.org/data/nmdc:mga0eehe16/ReadbasedAnalysis/nmdc_mga0eehe16_gottcha2_krona.html", + "md5_checksum": "a8433a0b17d7380fc836e4c9f85a7a54", + "id": "nmdc:a8433a0b17d7380fc836e4c9f85a7a54", + "file_size_bytes": 257441 + }, + { + "name": "Gp0115666_Centrifuge classification TSV report", + "description": "Centrifuge classification TSV report for Gp0115666", + "data_object_type": "Centrifuge Taxonomic Classification", + "url": "https://data.microbiomedata.org/data/nmdc:mga0eehe16/ReadbasedAnalysis/nmdc_mga0eehe16_centrifuge_classification.tsv", + "md5_checksum": "9e061ad19d4a6a3f209d1992d02df9f9", + "id": "nmdc:9e061ad19d4a6a3f209d1992d02df9f9", + "file_size_bytes": 1468295025 + }, + { + "name": "Gp0115666_Centrifuge TSV report", + "description": "Centrifuge TSV report for Gp0115666", + "data_object_type": "Centrifuge Classification Report", + "url": "https://data.microbiomedata.org/data/nmdc:mga0eehe16/ReadbasedAnalysis/nmdc_mga0eehe16_centrifuge_report.tsv", + "md5_checksum": "1d46eebd0f194f57dd9e92c9bc992891", + "id": "nmdc:1d46eebd0f194f57dd9e92c9bc992891", + "file_size_bytes": 257081 + }, + { + "name": "Gp0115666_Centrifuge Krona HTML report", + "description": "Centrifuge Krona HTML report for Gp0115666", + "data_object_type": "Centrifuge Krona Plot", + "url": "https://data.microbiomedata.org/data/nmdc:mga0eehe16/ReadbasedAnalysis/nmdc_mga0eehe16_centrifuge_krona.html", + "md5_checksum": "e5227b1cfdbc266c44d23028c92150a9", + "id": "nmdc:e5227b1cfdbc266c44d23028c92150a9", + "file_size_bytes": 2331968 + }, + { + "name": "Gp0115666_Kraken classification TSV report", + "description": "Kraken classification TSV report for Gp0115666", + "data_object_type": "Kraken2 Taxonomic Classification", + "url": "https://data.microbiomedata.org/data/nmdc:mga0eehe16/ReadbasedAnalysis/nmdc_mga0eehe16_kraken2_classification.tsv", + "md5_checksum": "05f7680c6646904cfb16fc146c0fed4a", + "id": "nmdc:05f7680c6646904cfb16fc146c0fed4a", + "file_size_bytes": 1204548180 + }, + { + "name": "Gp0115666_Kraken2 TSV report", + "description": "Kraken2 TSV report for Gp0115666", + "data_object_type": "Kraken2 Classification Report", + "url": "https://data.microbiomedata.org/data/nmdc:mga0eehe16/ReadbasedAnalysis/nmdc_mga0eehe16_kraken2_report.tsv", + "md5_checksum": "368cf81424348cdf46d17c13908280e7", + "id": "nmdc:368cf81424348cdf46d17c13908280e7", + "file_size_bytes": 653697 + }, + { + "name": "Gp0115666_Kraken2 Krona HTML report", + "description": "Kraken2 Krona HTML report for Gp0115666", + "data_object_type": "Kraken2 Krona Plot", + "url": "https://data.microbiomedata.org/data/nmdc:mga0eehe16/ReadbasedAnalysis/nmdc_mga0eehe16_kraken2_krona.html", + "md5_checksum": "b5091cfeed4fbea8316e50fbceea89bc", + "id": "nmdc:b5091cfeed4fbea8316e50fbceea89bc", + "file_size_bytes": 3983935 + }, + { + "name": "Gp0115666_Assembled contigs fasta", + "description": "Assembled contigs fasta for Gp0115666", + "data_object_type": "Assembly Contigs", + "url": "https://data.microbiomedata.org/data/nmdc:mga0eehe16/assembly/nmdc_mga0eehe16_contigs.fna", + "md5_checksum": "e557facdf4c3066ba4b5ba168995ba85", + "id": "nmdc:e557facdf4c3066ba4b5ba168995ba85", + "file_size_bytes": 63269472 + }, + { + "name": "Gp0115666_Assembled scaffold fasta", + "description": "Assembled scaffold fasta for Gp0115666", + "data_object_type": "Assembly Scaffolds", + "url": "https://data.microbiomedata.org/data/nmdc:mga0eehe16/assembly/nmdc_mga0eehe16_scaffolds.fna", + "md5_checksum": "92cc678ca9e54cb92118b9ae746fb996", + "id": "nmdc:92cc678ca9e54cb92118b9ae746fb996", + "file_size_bytes": 62917914 + }, + { + "name": "Gp0115666_Metagenome Contig Coverage Stats", + "description": "Metagenome Contig Coverage Stats for Gp0115666", + "url": "https://data.microbiomedata.org/data/nmdc:mga0eehe16/assembly/nmdc_mga0eehe16_covstats.txt", + "md5_checksum": "7082b41c627571a03466f94ba80c15b8", + "id": "nmdc:7082b41c627571a03466f94ba80c15b8", + "file_size_bytes": 9179769 + }, + { + "name": "Gp0115666_Assembled AGP file", + "description": "Assembled AGP file for Gp0115666", + "data_object_type": "Assembly AGP", + "url": "https://data.microbiomedata.org/data/nmdc:mga0eehe16/assembly/nmdc_mga0eehe16_assembly.agp", + "md5_checksum": "c5ccd39d97d652d5ec8804202a324b0e", + "id": "nmdc:c5ccd39d97d652d5ec8804202a324b0e", + "file_size_bytes": 8550216 + }, + { + "name": "Gp0115666_Metagenome Alignment BAM file", + "description": "Metagenome Alignment BAM file for Gp0115666", + "data_object_type": "Assembly Coverage BAM", + "url": "https://data.microbiomedata.org/data/nmdc:mga0eehe16/assembly/nmdc_mga0eehe16_pairedMapped_sorted.bam", + "md5_checksum": "3ece2c377622cebdddfb9322047cb115", + "id": "nmdc:3ece2c377622cebdddfb9322047cb115", + "file_size_bytes": 1940309089 + }, + { + "name": "Gp0115666_Protein FAA", + "description": "Protein FAA for Gp0115666", + "data_object_type": "Annotation Amino Acid FASTA", + "url": "https://data.microbiomedata.org/data/nmdc:mga0eehe16/annotation/nmdc_mga0eehe16_proteins.faa", + "md5_checksum": "4d509c29cad07f0b18d3f7e0e724c493", + "id": "nmdc:4d509c29cad07f0b18d3f7e0e724c493", + "file_size_bytes": 35706777 + }, + { + "name": "Gp0115666_Structural annotation GFF file", + "description": "Structural annotation GFF file for Gp0115666", + "data_object_type": "Structural Annotation GFF", + "url": "https://data.microbiomedata.org/data/nmdc:mga0eehe16/annotation/nmdc_mga0eehe16_structural_annotation.gff", + "md5_checksum": "60d04bb0a2d1a1d593bd849a2a13e405", + "id": "nmdc:60d04bb0a2d1a1d593bd849a2a13e405", + "file_size_bytes": 2520 + }, + { + "name": "Gp0115666_Functional annotation GFF file", + "description": "Functional annotation GFF file for Gp0115666", + "data_object_type": "Functional Annotation GFF", + "url": "https://data.microbiomedata.org/data/nmdc:mga0eehe16/annotation/nmdc_mga0eehe16_functional_annotation.gff", + "md5_checksum": "91cd273ea95a29b2c4e326c56eafe08a", + "id": "nmdc:91cd273ea95a29b2c4e326c56eafe08a", + "file_size_bytes": 40030386 + }, + { + "name": "Gp0115666_KO TSV file", + "description": "KO TSV file for Gp0115666", + "data_object_type": "Annotation KEGG Orthology", + "url": "https://data.microbiomedata.org/data/nmdc:mga0eehe16/annotation/nmdc_mga0eehe16_ko.tsv", + "md5_checksum": "e08c6253ec5a15eb43d8cb4d69d09d4c", + "id": "nmdc:e08c6253ec5a15eb43d8cb4d69d09d4c", + "file_size_bytes": 5584125 + }, + { + "name": "Gp0115666_EC TSV file", + "description": "EC TSV file for Gp0115666", + "data_object_type": "Annotation Enzyme Commission", + "url": "https://data.microbiomedata.org/data/nmdc:mga0eehe16/annotation/nmdc_mga0eehe16_ec.tsv", + "md5_checksum": "9edfc4fee191b722148af1e2648f787f", + "id": "nmdc:9edfc4fee191b722148af1e2648f787f", + "file_size_bytes": 3575242 + }, + { + "name": "Gp0115666_COG GFF file", + "description": "COG GFF file for Gp0115666", + "url": "https://data.microbiomedata.org/data/nmdc:mga0eehe16/annotation/nmdc_mga0eehe16_cog.gff", + "md5_checksum": "886402044865256b80bfaf42ca148a61", + "id": "nmdc:886402044865256b80bfaf42ca148a61", + "file_size_bytes": 23390091 + }, + { + "name": "Gp0115666_PFAM GFF file", + "description": "PFAM GFF file for Gp0115666", + "url": "https://data.microbiomedata.org/data/nmdc:mga0eehe16/annotation/nmdc_mga0eehe16_pfam.gff", + "md5_checksum": "1b2bc9b96a15ebdfe3ff1e30027544af", + "id": "nmdc:1b2bc9b96a15ebdfe3ff1e30027544af", + "file_size_bytes": 18444613 + }, + { + "name": "Gp0115666_TigrFam GFF file", + "description": "TigrFam GFF file for Gp0115666", + "url": "https://data.microbiomedata.org/data/nmdc:mga0eehe16/annotation/nmdc_mga0eehe16_tigrfam.gff", + "md5_checksum": "2d730834b8841b7a7ad30786bff382fa", + "id": "nmdc:2d730834b8841b7a7ad30786bff382fa", + "file_size_bytes": 2596225 + }, + { + "name": "Gp0115666_SMART GFF file", + "description": "SMART GFF file for Gp0115666", + "url": "https://data.microbiomedata.org/data/nmdc:mga0eehe16/annotation/nmdc_mga0eehe16_smart.gff", + "md5_checksum": "46d62d69e48d7aeecb87106e02102753", + "id": "nmdc:46d62d69e48d7aeecb87106e02102753", + "file_size_bytes": 4932262 + }, + { + "name": "Gp0115666_SuperFam GFF file", + "description": "SuperFam GFF file for Gp0115666", + "url": "https://data.microbiomedata.org/data/nmdc:mga0eehe16/annotation/nmdc_mga0eehe16_supfam.gff", + "md5_checksum": "1896e41000aa9e4acc98cc7702e42304", + "id": "nmdc:1896e41000aa9e4acc98cc7702e42304", + "file_size_bytes": 28911479 + }, + { + "name": "Gp0115666_Cath FunFam GFF file", + "description": "Cath FunFam GFF file for Gp0115666", + "url": "https://data.microbiomedata.org/data/nmdc:mga0eehe16/annotation/nmdc_mga0eehe16_cath_funfam.gff", + "md5_checksum": "f40bfd77fb3f24be2529fdafc01104c7", + "id": "nmdc:f40bfd77fb3f24be2529fdafc01104c7", + "file_size_bytes": 22881869 + }, + { + "name": "Gp0115666_KO_EC GFF file", + "description": "KO_EC GFF file for Gp0115666", + "url": "https://data.microbiomedata.org/data/nmdc:mga0eehe16/annotation/nmdc_mga0eehe16_ko_ec.gff", + "md5_checksum": "920be8f090654360619fbb16163b8513", + "id": "nmdc:920be8f090654360619fbb16163b8513", + "file_size_bytes": 17844749 + }, + { + "name": "gold:Gp0452679_metabat2 bin checkm quality assessment result", + "description": "metabat2 bin checkm quality assessment result for gold:Gp0452679", + "data_object_type": "CheckM Statistics", + "url": "https://data.microbiomedata.org/data/nmdc:mga0fsjy84/MAGs/nmdc_mga0fsjy84_checkm_qa.out", + "md5_checksum": "d41d8cd98f00b204e9800998ecf8427e", + "id": "nmdc:d41d8cd98f00b204e9800998ecf8427e", + "file_size_bytes": 0 + }, + { + "name": "Gp0115666_tooShort (< 3kb) filtered contigs fasta file by metaBat2", + "description": "tooShort (< 3kb) filtered contigs fasta file by metaBat2 for Gp0115666", + "url": "https://data.microbiomedata.org/data/nmdc:mga0eehe16/MAGs/nmdc_mga0eehe16_bins.tooShort.fa", + "md5_checksum": "9944a9020ce981a2423ca81424998e66", + "id": "nmdc:9944a9020ce981a2423ca81424998e66", + "file_size_bytes": 46766610 + }, + { + "name": "Gp0115666_unbinned fasta file from metabat2", + "description": "unbinned fasta file from metabat2 for Gp0115666", + "url": "https://data.microbiomedata.org/data/nmdc:mga0eehe16/MAGs/nmdc_mga0eehe16_bins.unbinned.fa", + "md5_checksum": "d2a24728b9006fd4fb4bf4f326138dc2", + "id": "nmdc:d2a24728b9006fd4fb4bf4f326138dc2", + "file_size_bytes": 11382048 + }, + { + "name": "Gp0115666_metabat2 bin checkm quality assessment result", + "description": "metabat2 bin checkm quality assessment result for Gp0115666", + "data_object_type": "CheckM Statistics", + "url": "https://data.microbiomedata.org/data/nmdc:mga0eehe16/MAGs/nmdc_mga0eehe16_checkm_qa.out", + "md5_checksum": "415dfed655f9c4673f2cce4f9947c2e4", + "id": "nmdc:415dfed655f9c4673f2cce4f9947c2e4", + "file_size_bytes": 1020 + }, + { + "name": "Gp0115666_high-quality and medium-quality bins", + "description": "high-quality and medium-quality bins for Gp0115666", + "data_object_type": "Metagenome Bins", + "url": "https://data.microbiomedata.org/data/nmdc:mga0eehe16/MAGs/nmdc_mga0eehe16_hqmq_bin.zip", + "md5_checksum": "be6482b534716166ce5daea5a07cba06", + "id": "nmdc:be6482b534716166ce5daea5a07cba06", + "file_size_bytes": 182 + }, + { + "name": "Gp0115666_metabat2 bins", + "description": "metabat2 bins for Gp0115666", + "url": "https://data.microbiomedata.org/data/nmdc:mga0eehe16/MAGs/nmdc_mga0eehe16_metabat_bin.zip", + "md5_checksum": "df08913532a84681996a29d1a1c127b3", + "id": "nmdc:df08913532a84681996a29d1a1c127b3", + "file_size_bytes": 1559491 + }, + { + "_id": { + "$oid": "649b003c1ae706d7b5b14d6c" + }, + "description": "Assembled contigs fasta for gold:Gp0115666", + "url": "https://data.microbiomedata.org/data/1781_86096/assembly/assembly_contigs.fna", + "file_size_bytes": 62686167, + "type": "nmdc:DataObject", + "id": "nmdc:aa60f90793266081a0ba6d125fb06e55", + "name": "assembly_contigs.fna", + "data_object_type": "Assembly Contigs" + }, + { + "_id": { + "$oid": "649b003c1ae706d7b5b14d6d" + }, + "description": "Metagenome Contig Coverage Stats for gold:Gp0115666", + "url": "https://data.microbiomedata.org/data/1781_86096/assembly/mapping_stats.txt", + "file_size_bytes": 8596464, + "type": "nmdc:DataObject", + "id": "nmdc:0157a89bc2e56ad99bae7289b9df7fb6", + "name": "mapping_stats.txt", + "data_object_type": "Assembly Coverage Stats" + }, + { + "_id": { + "$oid": "649b003c1ae706d7b5b14d6e" + }, + "description": "Assembled scaffold fasta for gold:Gp0115666", + "url": "https://data.microbiomedata.org/data/1781_86096/assembly/assembly_scaffolds.fna", + "file_size_bytes": 62335089, + "type": "nmdc:DataObject", + "id": "nmdc:2dd25f896d7b6100a24987d1496e2646", + "name": "assembly_scaffolds.fna", + "data_object_type": "Assembly Scaffolds" + }, + { + "_id": { + "$oid": "649b003c1ae706d7b5b14d6f" + }, + "description": "Assembled AGP file for gold:Gp0115666", + "url": "https://data.microbiomedata.org/data/1781_86096/assembly/assembly.agp", + "file_size_bytes": 7382646, + "type": "nmdc:DataObject", + "id": "nmdc:c29f2a80b289985b57570884a5c92548", + "name": "assembly.agp", + "data_object_type": "Assembly AGP" + }, + { + "_id": { + "$oid": "649b003c1ae706d7b5b14d73" + }, + "description": "Metagenome Alignment BAM file for gold:Gp0115666", + "url": "https://data.microbiomedata.org/data/1781_86096/assembly/pairedMapped_sorted.bam", + "file_size_bytes": 1914805023, + "type": "nmdc:DataObject", + "id": "nmdc:1abeac4b3490b9baf8206f8df963a646", + "name": "pairedMapped_sorted.bam", + "data_object_type": "Assembly Coverage BAM" + }, + { + "_id": { + "$oid": "649b003d1ae706d7b5b159a5" + }, + "id": "nmdc:2e76bec484e1a06b2f84b1c230cd97b4", + "name": "1781_86096.json", + "description": "Gold:Gp0115666 ReadbasedAnalysis result JSON file", + "url": "https://data.microbiomedata.org/1781_86096/ReadbasedAnalysis/1781_86096.json", + "file_size_bytes": 3385, + "type": "nmdc:DataObject" + }, + { + "_id": { + "$oid": "649b003d1ae706d7b5b159a9" + }, + "id": "nmdc:d5ab2504c9505d6cb96b348b71f2efc5", + "name": "1781_86096.krona.html", + "description": "Gold:Gp0115666 KRONA plot HTML file", + "url": "https://data.microbiomedata.org/1781_86096/ReadbasedAnalysis/centrifuge/1781_86096.krona.html", + "file_size_bytes": 3385, + "data_object_type": "Centrifuge Krona Plot", + "type": "nmdc:DataObject" + }, + { + "_id": { + "$oid": "649b003f1ae706d7b5b16267" + }, + "id": "nmdc:5b80fde8feaaab94a1774e7d61863048", + "name": "bins.tooShort.fa", + "description": "tooShort (< 3kb) filtered contigs fasta file by metaBat2 for gold:Gp0115666", + "file_size_bytes": 45445156, + "url": "https://data.microbiomedata.org/data/1781_86096/img_MAGs/bins.tooShort.fa", + "type": "nmdc:DataObject" + }, + { + "_id": { + "$oid": "649b003f1ae706d7b5b1626b" + }, + "id": "nmdc:5c7b37ac5d11fd3ffcbcc63b2e15f627", + "name": "checkm_qa.out", + "description": "metabat2 bin checkm quality assessment result for gold:Gp0115666", + "file_size_bytes": 1190, + "url": "https://data.microbiomedata.org/data/1781_86096/img_MAGs/checkm_qa.out", + "type": "nmdc:DataObject", + "data_object_type": "CheckM Statistics" + }, + { + "_id": { + "$oid": "649b003f1ae706d7b5b1626d" + }, + "id": "nmdc:a4fab93f1102baf069e09b65cb13e87a", + "name": "gold:Gp0115666.bins.3.fa", + "description": "metabat2 binned contig file for gold:Gp0115666", + "file_size_bytes": 3971570, + "url": "https://data.microbiomedata.org/data/1781_86096/img_MAGs/metabat-bins/bins.3.fa", + "type": "nmdc:DataObject", + "data_object_type": "Metagenome Bins" + }, + { + "_id": { + "$oid": "649b003f1ae706d7b5b1626e" + }, + "id": "nmdc:88ef6e640707d816e9df8d751c31e71b", + "name": "gold:Gp0115666.bins.2.fa", + "description": "metabat2 binned contig file for gold:Gp0115666", + "file_size_bytes": 559859, + "url": "https://data.microbiomedata.org/data/1781_86096/img_MAGs/metabat-bins/bins.2.fa", + "type": "nmdc:DataObject", + "data_object_type": "Metagenome Bins" + }, + { + "_id": { + "$oid": "649b003f1ae706d7b5b16271" + }, + "id": "nmdc:ce2c968f1093b25da9ac4399291eede6", + "name": "bins.unbinned.fa", + "description": "unbinned fasta file from metabat2 for gold:Gp0115666", + "file_size_bytes": 11541386, + "url": "https://data.microbiomedata.org/data/1781_86096/img_MAGs/bins.unbinned.fa", + "type": "nmdc:DataObject" + }, + { + "_id": { + "$oid": "649b003f1ae706d7b5b16281" + }, + "id": "nmdc:ffdbeb92d859d6b7e828f3d6f8219e0b", + "name": "gold:Gp0115666.bins.1.fa", + "description": "metabat2 binned contig file for gold:Gp0115666", + "file_size_bytes": 346195, + "url": "https://data.microbiomedata.org/data/1781_86096/img_MAGs/metabat-bins/bins.1.fa", + "type": "nmdc:DataObject", + "data_object_type": "Metagenome Bins" + }, + { + "_id": { + "$oid": "649b00401ae706d7b5b16d7d" + }, + "description": "Functional annotation GFF file for gold:Gp0115666", + "url": "https://data.microbiomedata.org/1781_86096/img_annotation/Ga0482261_functional_annotation.gff", + "md5_checksum": "a1e8795537eca0522357d60045780ab3", + "file_size_bytes": 3385, + "id": "nmdc:a1e8795537eca0522357d60045780ab3", + "name": "gold:Gp0115666_Functional annotation GFF file", + "data_object_type": "Functional Annotation GFF", + "type": "nmdc:DataObject" + }, + { + "_id": { + "$oid": "649b00401ae706d7b5b16d81" + }, + "description": "Structural annotation GFF file for gold:Gp0115666", + "url": "https://data.microbiomedata.org/1781_86096/img_annotation/Ga0482261_structural_annotation.gff", + "md5_checksum": "654201c4699079bdd923dcff52881c07", + "file_size_bytes": 3385, + "id": "nmdc:654201c4699079bdd923dcff52881c07", + "name": "gold:Gp0115666_Structural annotation GFF file", + "data_object_type": "Structural Annotation GFF", + "type": "nmdc:DataObject" + }, + { + "_id": { + "$oid": "649b00401ae706d7b5b16d84" + }, + "description": "EC TSV File for gold:Gp0115666", + "url": "https://data.microbiomedata.org/1781_86096/img_annotation/Ga0482261_ec.tsv", + "md5_checksum": "4e3f389524497182aa3e8832aa7b373b", + "file_size_bytes": 3385, + "id": "nmdc:4e3f389524497182aa3e8832aa7b373b", + "name": "gold:Gp0115666_EC TSV File", + "data_object_type": "Annotation Enzyme Commission", + "type": "nmdc:DataObject" + }, + { + "_id": { + "$oid": "649b00401ae706d7b5b16d8a" + }, + "description": "KO TSV File for gold:Gp0115666", + "url": "https://data.microbiomedata.org/1781_86096/img_annotation/Ga0482261_ko.tsv", + "md5_checksum": "ab262feeaf856be190b60ea7c0a4c030", + "file_size_bytes": 3385, + "id": "nmdc:ab262feeaf856be190b60ea7c0a4c030", + "name": "gold:Gp0115666_KO TSV File", + "data_object_type": "Annotation KEGG Orthology", + "type": "nmdc:DataObject" + }, + { + "_id": { + "$oid": "649b00401ae706d7b5b16d8d" + }, + "description": "Protein FAA for gold:Gp0115666", + "url": "https://data.microbiomedata.org/1781_86096/img_annotation/Ga0482261_proteins.faa", + "md5_checksum": "70c8e0fc6e64b20e99a4c0f783014142", + "file_size_bytes": 3385, + "id": "nmdc:70c8e0fc6e64b20e99a4c0f783014142", + "name": "gold:Gp0115666_Protein FAA", + "data_object_type": "Annotation Amino Acid FASTA", + "type": "nmdc:DataObject" + } + ], + "mags_activity_set": [ + { + "_id": { + "$oid": "649b0052ec087f6bbab34730" + }, + "has_input": [ + "nmdc:e557facdf4c3066ba4b5ba168995ba85", + "nmdc:3ece2c377622cebdddfb9322047cb115", + "nmdc:91cd273ea95a29b2c4e326c56eafe08a" + ], + "too_short_contig_num": 108937, + "part_of": [ + "nmdc:mga0eehe16" + ], + "binned_contig_num": 899, + "git_url": "https://github.com/microbiomedata/metaG/releases/tag/0.1", + "has_output": [ + "nmdc:d41d8cd98f00b204e9800998ecf8427e", + "nmdc:9944a9020ce981a2423ca81424998e66", + "nmdc:d2a24728b9006fd4fb4bf4f326138dc2", + "nmdc:415dfed655f9c4673f2cce4f9947c2e4", + "nmdc:be6482b534716166ce5daea5a07cba06", + "nmdc:df08913532a84681996a29d1a1c127b3" + ], + "was_informed_by": "gold:Gp0115666", + "input_contig_num": 116661, + "id": "nmdc:db181675b157d27d9b0b2f35b5cbf03e", + "execution_resource": "NERSC-Cori", + "name": "MAGs Analysis Activity for nmdc:mga0eehe16", + "mags_list": [ + { + "number_of_contig": 216, + "completeness": 41.57, + "bin_name": "bins.1", + "gene_count": 1176, + "bin_quality": "LQ", + "gtdbtk_species": "", + "gtdbtk_order": "", + "num_16s": 0, + "gtdbtk_family": "", + "gtdbtk_domain": "", + "contamination": 0.93, + "gtdbtk_class": "", + "gtdbtk_phylum": "", + "num_5s": 0, + "num_23s": 0, + "gtdbtk_genus": "", + "num_t_rna": 11 + }, + { + "number_of_contig": 683, + "completeness": 87.59, + "bin_name": "bins.2", + "gene_count": 4526, + "bin_quality": "LQ", + "gtdbtk_species": "", + "gtdbtk_order": "", + "num_16s": 1, + "gtdbtk_family": "", + "gtdbtk_domain": "", + "contamination": 33.23, + "gtdbtk_class": "", + "gtdbtk_phylum": "", + "num_5s": 0, + "num_23s": 0, + "gtdbtk_genus": "", + "num_t_rna": 56 + } + ], + "unbinned_contig_num": 6825, + "started_at_time": "2021-10-11T02:28:09Z", + "type": "nmdc:MAGsAnalysisActivity", + "ended_at_time": "2021-10-11T04:06:19+00:00" + } + ], + "metagenome_annotation_activity_set": [ + { + "_id": { + "$oid": "649b005bbf2caae0415ef9ce" + }, + "has_input": [ + "nmdc:e557facdf4c3066ba4b5ba168995ba85" + ], + "part_of": [ + "nmdc:mga0eehe16" + ], + "git_url": "https://github.com/microbiomedata/metaG/releases/tag/0.1", + "has_output": [ + "nmdc:4d509c29cad07f0b18d3f7e0e724c493", + "nmdc:60d04bb0a2d1a1d593bd849a2a13e405", + "nmdc:91cd273ea95a29b2c4e326c56eafe08a", + "nmdc:e08c6253ec5a15eb43d8cb4d69d09d4c", + "nmdc:9edfc4fee191b722148af1e2648f787f", + "nmdc:886402044865256b80bfaf42ca148a61", + "nmdc:1b2bc9b96a15ebdfe3ff1e30027544af", + "nmdc:2d730834b8841b7a7ad30786bff382fa", + "nmdc:46d62d69e48d7aeecb87106e02102753", + "nmdc:1896e41000aa9e4acc98cc7702e42304", + "nmdc:f40bfd77fb3f24be2529fdafc01104c7", + "nmdc:920be8f090654360619fbb16163b8513" + ], + "was_informed_by": "gold:Gp0115666", + "id": "nmdc:db181675b157d27d9b0b2f35b5cbf03e", + "execution_resource": "NERSC-Cori", + "name": "Annotation Activity for nmdc:mga0eehe16", + "started_at_time": "2021-10-11T02:28:09Z", + "type": "nmdc:MetagenomeAnnotationActivity", + "ended_at_time": "2021-10-11T04:06:19+00:00" + } + ], + "metagenome_assembly_set": [ + { + "_id": { + "$oid": "649b005f2ca5ee4adb139fbb" + }, + "has_input": [ + "nmdc:0b301d2dd917c2be31422dd0e986dd5e" + ], + "part_of": [ + "nmdc:mga0eehe16" + ], + "ctg_logsum": 181484, + "scaf_logsum": 182081, + "gap_pct": 0.00163, + "git_url": "https://github.com/microbiomedata/metaG/releases/tag/0.1", + "has_output": [ + "nmdc:e557facdf4c3066ba4b5ba168995ba85", + "nmdc:92cc678ca9e54cb92118b9ae746fb996", + "nmdc:7082b41c627571a03466f94ba80c15b8", + "nmdc:c5ccd39d97d652d5ec8804202a324b0e", + "nmdc:3ece2c377622cebdddfb9322047cb115" + ], + "asm_score": 5.224, + "was_informed_by": "gold:Gp0115666", + "ctg_powsum": 20653, + "scaf_max": 25973, + "id": "nmdc:db181675b157d27d9b0b2f35b5cbf03e", + "scaf_powsum": 20721, + "execution_resource": "NERSC-Cori", + "contigs": 116661, + "name": "Assembly Activity for nmdc:mga0eehe16", + "ctg_max": 25973, + "gc_std": 0.10759, + "contig_bp": 58735100, + "gc_avg": 0.57262, + "started_at_time": "2021-10-11T02:28:09Z", + "scaf_bp": 58736060, + "type": "nmdc:MetagenomeAssembly", + "scaffolds": 116565, + "ended_at_time": "2021-10-11T04:06:19+00:00", + "ctg_l50": 493, + "ctg_l90": 286, + "ctg_n50": 27791, + "ctg_n90": 95962, + "scaf_l50": 493, + "scaf_l90": 286, + "scaf_n50": 27775, + "scaf_n90": 95875 + } + ], + "omics_processing_set": [ + { + "_id": { + "$oid": "649b009773e8249959349b34" + }, + "id": "nmdc:omprc-11-zp2ar437", + "name": "Sand microcosm microbial communities from a hyporheic zone in Columbia River, Washington, USA - GW-RW T3_23-Sept-14", + "description": "Sterilized sand packs were incubated back in the ground and collected at time point T3.", + "has_input": [ + "nmdc:bsm-11-4qsqg549" + ], + "has_output": [ + "jgi:55d817fc0d8785342fcf8274" + ], + "part_of": [ + "nmdc:sty-11-aygzgv51" + ], + "add_date": "2015-05-28", + "mod_date": "2021-06-15", + "ncbi_project_name": "Sand microcosm microbial communities from a hyporheic zone in Columbia River, Washington, USA - GW-RW T3_23-Sept-14", + "omics_type": { + "has_raw_value": "Metagenome" + }, + "principal_investigator": { + "has_raw_value": "James Stegen" + }, + "processing_institution": "JGI", + "type": "nmdc:OmicsProcessing", + "gold_sequencing_project_identifiers": [ + "gold:Gp0115666" + ] + } + ], + "read_qc_analysis_activity_set": [ + { + "_id": { + "$oid": "649b009d6bdd4fd20273c887" + }, + "has_input": [ + "nmdc:76893480c05758ad2977df78a5b050e5" + ], + "part_of": [ + "nmdc:mga0eehe16" + ], + "git_url": "https://github.com/microbiomedata/metaG/releases/tag/0.1", + "has_output": [ + "nmdc:0b301d2dd917c2be31422dd0e986dd5e", + "nmdc:0634e8261ce976d167457993d7f7a4ec" + ], + "was_informed_by": "gold:Gp0115666", + "input_read_count": 22183982, + "output_read_bases": 3025260554, + "id": "nmdc:db181675b157d27d9b0b2f35b5cbf03e", + "execution_resource": "NERSC-Cori", + "input_read_bases": 3349781282, + "name": "Read QC Activity for nmdc:mga0eehe16", + "output_read_count": 20195754, + "started_at_time": "2021-10-11T02:28:09Z", + "type": "nmdc:ReadQCAnalysisActivity", + "ended_at_time": "2021-10-11T04:06:19+00:00" + } + ], + "read_based_taxonomy_analysis_activity_set": [ + { + "_id": { + "$oid": "649b009bff710ae353f8cf55" + }, + "has_input": [ + "nmdc:0b301d2dd917c2be31422dd0e986dd5e" + ], + "git_url": "https://github.com/microbiomedata/metaG/releases/tag/0.1", + "has_output": [ + "nmdc:17454627f873cc37e80700c4751c81d6", + "nmdc:e0479eb7fd3345aaf134640e0b9e11b0", + "nmdc:a8433a0b17d7380fc836e4c9f85a7a54", + "nmdc:9e061ad19d4a6a3f209d1992d02df9f9", + "nmdc:1d46eebd0f194f57dd9e92c9bc992891", + "nmdc:e5227b1cfdbc266c44d23028c92150a9", + "nmdc:05f7680c6646904cfb16fc146c0fed4a", + "nmdc:368cf81424348cdf46d17c13908280e7", + "nmdc:b5091cfeed4fbea8316e50fbceea89bc" + ], + "was_informed_by": "gold:Gp0115666", + "id": "nmdc:db181675b157d27d9b0b2f35b5cbf03e", + "execution_resource": "NERSC-Cori", + "name": "ReadBased Analysis Activity for nmdc:mga0eehe16", + "started_at_time": "2021-10-11T02:28:09Z", + "type": "nmdc:ReadbasedAnalysis", + "ended_at_time": "2021-10-11T04:06:19+00:00" + } + ] + }, + { + "data_object_set": [ + { + "description": "Raw sequencer read data", + "file_size_bytes": 3050291373, + "type": "nmdc:DataObject", + "id": "jgi:55d740240d8785342fcf7e37", + "name": "9422.8.132674.GTGGCC.fastq.gz" + }, + { + "name": "Gp0115668_Filtered Reads", + "description": "Filtered Reads for Gp0115668", + "data_object_type": "Filtered Sequencing Reads", + "url": "https://data.microbiomedata.org/data/nmdc:mga0n66h21/qa/nmdc_mga0n66h21_filtered.fastq.gz", + "md5_checksum": "121b1c25e803f2a010ae5a2206a8d1d2", + "id": "nmdc:121b1c25e803f2a010ae5a2206a8d1d2", + "file_size_bytes": 2665008319 + }, + { + "name": "Gp0115668_Filtered Stats", + "description": "Filtered Stats for Gp0115668", + "data_object_type": "QC Statistics", + "url": "https://data.microbiomedata.org/data/nmdc:mga0n66h21/qa/nmdc_mga0n66h21_filterStats.txt", + "md5_checksum": "63fb5949ebafd1846ba60f2ce033191c", + "id": "nmdc:63fb5949ebafd1846ba60f2ce033191c", + "file_size_bytes": 289 + }, + { + "name": "Gp0115668_Gottcha2 TSV report", + "description": "Gottcha2 TSV report for Gp0115668", + "url": "https://data.microbiomedata.org/data/nmdc:mga0n66h21/ReadbasedAnalysis/nmdc_mga0n66h21_gottcha2_report.tsv", + "md5_checksum": "8bdf8bbee24242aaaee763c1d851c05e", + "id": "nmdc:8bdf8bbee24242aaaee763c1d851c05e", + "file_size_bytes": 13875 + }, + { + "name": "Gp0115668_Gottcha2 full TSV report", + "description": "Gottcha2 full TSV report for Gp0115668", + "url": "https://data.microbiomedata.org/data/nmdc:mga0n66h21/ReadbasedAnalysis/nmdc_mga0n66h21_gottcha2_report_full.tsv", + "md5_checksum": "2529ede10eb159148711d016ec022af3", + "id": "nmdc:2529ede10eb159148711d016ec022af3", + "file_size_bytes": 956974 + }, + { + "name": "Gp0115668_Gottcha2 Krona HTML report", + "description": "Gottcha2 Krona HTML report for Gp0115668", + "data_object_type": "GOTTCHA2 Krona Plot", + "url": "https://data.microbiomedata.org/data/nmdc:mga0n66h21/ReadbasedAnalysis/nmdc_mga0n66h21_gottcha2_krona.html", + "md5_checksum": "a0631ed87dc2e7c69355ef575dbe4e60", + "id": "nmdc:a0631ed87dc2e7c69355ef575dbe4e60", + "file_size_bytes": 265076 + }, + { + "name": "Gp0115668_Centrifuge classification TSV report", + "description": "Centrifuge classification TSV report for Gp0115668", + "data_object_type": "Centrifuge Taxonomic Classification", + "url": "https://data.microbiomedata.org/data/nmdc:mga0n66h21/ReadbasedAnalysis/nmdc_mga0n66h21_centrifuge_classification.tsv", + "md5_checksum": "93d26b69073bd4d6283aee3c7e5997d4", + "id": "nmdc:93d26b69073bd4d6283aee3c7e5997d4", + "file_size_bytes": 2377445510 + }, + { + "name": "Gp0115668_Centrifuge TSV report", + "description": "Centrifuge TSV report for Gp0115668", + "data_object_type": "Centrifuge Classification Report", + "url": "https://data.microbiomedata.org/data/nmdc:mga0n66h21/ReadbasedAnalysis/nmdc_mga0n66h21_centrifuge_report.tsv", + "md5_checksum": "d7a49bf0d9797a2b603643a2de896b5c", + "id": "nmdc:d7a49bf0d9797a2b603643a2de896b5c", + "file_size_bytes": 258291 + }, + { + "name": "Gp0115668_Centrifuge Krona HTML report", + "description": "Centrifuge Krona HTML report for Gp0115668", + "data_object_type": "Centrifuge Krona Plot", + "url": "https://data.microbiomedata.org/data/nmdc:mga0n66h21/ReadbasedAnalysis/nmdc_mga0n66h21_centrifuge_krona.html", + "md5_checksum": "890f9f52d828e1ea8277b52566763069", + "id": "nmdc:890f9f52d828e1ea8277b52566763069", + "file_size_bytes": 2333775 + }, + { + "name": "Gp0115668_Kraken classification TSV report", + "description": "Kraken classification TSV report for Gp0115668", + "data_object_type": "Kraken2 Taxonomic Classification", + "url": "https://data.microbiomedata.org/data/nmdc:mga0n66h21/ReadbasedAnalysis/nmdc_mga0n66h21_kraken2_classification.tsv", + "md5_checksum": "371b7fabbcbc2d22c3ca84b422a88863", + "id": "nmdc:371b7fabbcbc2d22c3ca84b422a88863", + "file_size_bytes": 1966520263 + }, + { + "name": "Gp0115668_Kraken2 TSV report", + "description": "Kraken2 TSV report for Gp0115668", + "data_object_type": "Kraken2 Classification Report", + "url": "https://data.microbiomedata.org/data/nmdc:mga0n66h21/ReadbasedAnalysis/nmdc_mga0n66h21_kraken2_report.tsv", + "md5_checksum": "8677985c5e8ad92dd6d051f85950a636", + "id": "nmdc:8677985c5e8ad92dd6d051f85950a636", + "file_size_bytes": 707661 + }, + { + "name": "Gp0115668_Kraken2 Krona HTML report", + "description": "Kraken2 Krona HTML report for Gp0115668", + "data_object_type": "Kraken2 Krona Plot", + "url": "https://data.microbiomedata.org/data/nmdc:mga0n66h21/ReadbasedAnalysis/nmdc_mga0n66h21_kraken2_krona.html", + "md5_checksum": "9b2f355a4c2ff3651a3d1179212e2914", + "id": "nmdc:9b2f355a4c2ff3651a3d1179212e2914", + "file_size_bytes": 4276256 + }, + { + "name": "Gp0115668_Assembled contigs fasta", + "description": "Assembled contigs fasta for Gp0115668", + "data_object_type": "Assembly Contigs", + "url": "https://data.microbiomedata.org/data/nmdc:mga0n66h21/assembly/nmdc_mga0n66h21_contigs.fna", + "md5_checksum": "b2b862aede4f333acec79aac3afc7254", + "id": "nmdc:b2b862aede4f333acec79aac3afc7254", + "file_size_bytes": 182488593 + }, + { + "name": "Gp0115668_Assembled scaffold fasta", + "description": "Assembled scaffold fasta for Gp0115668", + "data_object_type": "Assembly Scaffolds", + "url": "https://data.microbiomedata.org/data/nmdc:mga0n66h21/assembly/nmdc_mga0n66h21_scaffolds.fna", + "md5_checksum": "15d4494dad1e12523aa9afb56b1e7cdb", + "id": "nmdc:15d4494dad1e12523aa9afb56b1e7cdb", + "file_size_bytes": 181514952 + }, + { + "name": "Gp0115668_Metagenome Contig Coverage Stats", + "description": "Metagenome Contig Coverage Stats for Gp0115668", + "url": "https://data.microbiomedata.org/data/nmdc:mga0n66h21/assembly/nmdc_mga0n66h21_covstats.txt", + "md5_checksum": "6ccb798d615b67dfb9c64ff32d6586c4", + "id": "nmdc:6ccb798d615b67dfb9c64ff32d6586c4", + "file_size_bytes": 25682298 + }, + { + "name": "Gp0115668_Assembled AGP file", + "description": "Assembled AGP file for Gp0115668", + "data_object_type": "Assembly AGP", + "url": "https://data.microbiomedata.org/data/nmdc:mga0n66h21/assembly/nmdc_mga0n66h21_assembly.agp", + "md5_checksum": "da27801a4e0ab450485f5a3aeb75a7d6", + "id": "nmdc:da27801a4e0ab450485f5a3aeb75a7d6", + "file_size_bytes": 24103161 + }, + { + "name": "Gp0115668_Metagenome Alignment BAM file", + "description": "Metagenome Alignment BAM file for Gp0115668", + "data_object_type": "Assembly Coverage BAM", + "url": "https://data.microbiomedata.org/data/nmdc:mga0n66h21/assembly/nmdc_mga0n66h21_pairedMapped_sorted.bam", + "md5_checksum": "f7a4bb0be4599b544360617190b45681", + "id": "nmdc:f7a4bb0be4599b544360617190b45681", + "file_size_bytes": 2958311801 + }, + { + "name": "Gp0115668_Protein FAA", + "description": "Protein FAA for Gp0115668", + "data_object_type": "Annotation Amino Acid FASTA", + "url": "https://data.microbiomedata.org/data/nmdc:mga0n66h21/annotation/nmdc_mga0n66h21_proteins.faa", + "md5_checksum": "5cb6273cd171d1ae5a8d77c8f131517f", + "id": "nmdc:5cb6273cd171d1ae5a8d77c8f131517f", + "file_size_bytes": 88016165 + }, + { + "name": "Gp0115668_Structural annotation GFF file", + "description": "Structural annotation GFF file for Gp0115668", + "data_object_type": "Structural Annotation GFF", + "url": "https://data.microbiomedata.org/data/nmdc:mga0n66h21/annotation/nmdc_mga0n66h21_structural_annotation.gff", + "md5_checksum": "d49149a48134c1091c001448cc91f8e2", + "id": "nmdc:d49149a48134c1091c001448cc91f8e2", + "file_size_bytes": 2527 + }, + { + "name": "Gp0115668_Functional annotation GFF file", + "description": "Functional annotation GFF file for Gp0115668", + "data_object_type": "Functional Annotation GFF", + "url": "https://data.microbiomedata.org/data/nmdc:mga0n66h21/annotation/nmdc_mga0n66h21_functional_annotation.gff", + "md5_checksum": "7a861805138d425525f298c1790b58ed", + "id": "nmdc:7a861805138d425525f298c1790b58ed", + "file_size_bytes": 91926507 + }, + { + "name": "Gp0115668_KO TSV file", + "description": "KO TSV file for Gp0115668", + "data_object_type": "Annotation KEGG Orthology", + "url": "https://data.microbiomedata.org/data/nmdc:mga0n66h21/annotation/nmdc_mga0n66h21_ko.tsv", + "md5_checksum": "0d0a80f2dafb68f4659709dd2ebd2f28", + "id": "nmdc:0d0a80f2dafb68f4659709dd2ebd2f28", + "file_size_bytes": 6651856 + }, + { + "name": "Gp0115668_EC TSV file", + "description": "EC TSV file for Gp0115668", + "data_object_type": "Annotation Enzyme Commission", + "url": "https://data.microbiomedata.org/data/nmdc:mga0n66h21/annotation/nmdc_mga0n66h21_ec.tsv", + "md5_checksum": "a8b689fdef54bf7235532de634cf553e", + "id": "nmdc:a8b689fdef54bf7235532de634cf553e", + "file_size_bytes": 4156019 + }, + { + "name": "Gp0115668_COG GFF file", + "description": "COG GFF file for Gp0115668", + "url": "https://data.microbiomedata.org/data/nmdc:mga0n66h21/annotation/nmdc_mga0n66h21_cog.gff", + "md5_checksum": "017daaa53039bc1135ca8f013596eb14", + "id": "nmdc:017daaa53039bc1135ca8f013596eb14", + "file_size_bytes": 33686729 + }, + { + "name": "Gp0115668_PFAM GFF file", + "description": "PFAM GFF file for Gp0115668", + "url": "https://data.microbiomedata.org/data/nmdc:mga0n66h21/annotation/nmdc_mga0n66h21_pfam.gff", + "md5_checksum": "e3eb963d76dc6bdc54756cfa80977611", + "id": "nmdc:e3eb963d76dc6bdc54756cfa80977611", + "file_size_bytes": 29534588 + }, + { + "name": "Gp0115668_TigrFam GFF file", + "description": "TigrFam GFF file for Gp0115668", + "url": "https://data.microbiomedata.org/data/nmdc:mga0n66h21/annotation/nmdc_mga0n66h21_tigrfam.gff", + "md5_checksum": "ab1d561046fbe146ac418e4ed822e861", + "id": "nmdc:ab1d561046fbe146ac418e4ed822e861", + "file_size_bytes": 2596288 + }, + { + "name": "Gp0115668_SMART GFF file", + "description": "SMART GFF file for Gp0115668", + "url": "https://data.microbiomedata.org/data/nmdc:mga0n66h21/annotation/nmdc_mga0n66h21_smart.gff", + "md5_checksum": "51054c4da9edc391b03418b5f9327815", + "id": "nmdc:51054c4da9edc391b03418b5f9327815", + "file_size_bytes": 18133874 + }, + { + "name": "Gp0115668_SuperFam GFF file", + "description": "SuperFam GFF file for Gp0115668", + "url": "https://data.microbiomedata.org/data/nmdc:mga0n66h21/annotation/nmdc_mga0n66h21_supfam.gff", + "md5_checksum": "335576d20d4f5c061a875529cbe9572c", + "id": "nmdc:335576d20d4f5c061a875529cbe9572c", + "file_size_bytes": 61337132 + }, + { + "name": "Gp0115668_Cath FunFam GFF file", + "description": "Cath FunFam GFF file for Gp0115668", + "url": "https://data.microbiomedata.org/data/nmdc:mga0n66h21/annotation/nmdc_mga0n66h21_cath_funfam.gff", + "md5_checksum": "6c5387ac5acb8b340a2c2a9e17e62bae", + "id": "nmdc:6c5387ac5acb8b340a2c2a9e17e62bae", + "file_size_bytes": 52005922 + }, + { + "name": "Gp0115668_KO_EC GFF file", + "description": "KO_EC GFF file for Gp0115668", + "url": "https://data.microbiomedata.org/data/nmdc:mga0n66h21/annotation/nmdc_mga0n66h21_ko_ec.gff", + "md5_checksum": "eea36326caba5baa0536ac2f5e36d497", + "id": "nmdc:eea36326caba5baa0536ac2f5e36d497", + "file_size_bytes": 21150415 + }, + { + "name": "gold:Gp0452679_metabat2 bin checkm quality assessment result", + "description": "metabat2 bin checkm quality assessment result for gold:Gp0452679", + "data_object_type": "CheckM Statistics", + "url": "https://data.microbiomedata.org/data/nmdc:mga0fsjy84/MAGs/nmdc_mga0fsjy84_checkm_qa.out", + "md5_checksum": "d41d8cd98f00b204e9800998ecf8427e", + "id": "nmdc:d41d8cd98f00b204e9800998ecf8427e", + "file_size_bytes": 0 + }, + { + "name": "Gp0115668_tooShort (< 3kb) filtered contigs fasta file by metaBat2", + "description": "tooShort (< 3kb) filtered contigs fasta file by metaBat2 for Gp0115668", + "url": "https://data.microbiomedata.org/data/nmdc:mga0n66h21/MAGs/nmdc_mga0n66h21_bins.tooShort.fa", + "md5_checksum": "8c05fc754583d51714bc1aa81396e59d", + "id": "nmdc:8c05fc754583d51714bc1aa81396e59d", + "file_size_bytes": 136315210 + }, + { + "name": "Gp0115668_unbinned fasta file from metabat2", + "description": "unbinned fasta file from metabat2 for Gp0115668", + "url": "https://data.microbiomedata.org/data/nmdc:mga0n66h21/MAGs/nmdc_mga0n66h21_bins.unbinned.fa", + "md5_checksum": "9ef1be5df79aee7c64f2addc4bda6afa", + "id": "nmdc:9ef1be5df79aee7c64f2addc4bda6afa", + "file_size_bytes": 39131745 + }, + { + "name": "Gp0115668_metabat2 bin checkm quality assessment result", + "description": "metabat2 bin checkm quality assessment result for Gp0115668", + "data_object_type": "CheckM Statistics", + "url": "https://data.microbiomedata.org/data/nmdc:mga0n66h21/MAGs/nmdc_mga0n66h21_checkm_qa.out", + "md5_checksum": "60db1474ee6a099c10e4fdc728420cf8", + "id": "nmdc:60db1474ee6a099c10e4fdc728420cf8", + "file_size_bytes": 1176 + }, + { + "name": "Gp0115668_high-quality and medium-quality bins", + "description": "high-quality and medium-quality bins for Gp0115668", + "data_object_type": "Metagenome Bins", + "url": "https://data.microbiomedata.org/data/nmdc:mga0n66h21/MAGs/nmdc_mga0n66h21_hqmq_bin.zip", + "md5_checksum": "5a36d8ba758ee510ab2be3e01fda3e0f", + "id": "nmdc:5a36d8ba758ee510ab2be3e01fda3e0f", + "file_size_bytes": 182 + }, + { + "name": "Gp0115668_metabat2 bins", + "description": "metabat2 bins for Gp0115668", + "url": "https://data.microbiomedata.org/data/nmdc:mga0n66h21/MAGs/nmdc_mga0n66h21_metabat_bin.zip", + "md5_checksum": "3f4c7c98bb94687eb96382799c8626fe", + "id": "nmdc:3f4c7c98bb94687eb96382799c8626fe", + "file_size_bytes": 2145953 + }, + { + "_id": { + "$oid": "649b003c1ae706d7b5b14d77" + }, + "description": "Assembled contigs fasta for gold:Gp0115668", + "url": "https://data.microbiomedata.org/data/1781_86100/assembly/assembly_contigs.fna", + "file_size_bytes": 180872288, + "type": "nmdc:DataObject", + "id": "nmdc:0ce94528dc5ad4d5b62293d4d95c1e9e", + "name": "assembly_contigs.fna", + "data_object_type": "Assembly Contigs" + }, + { + "_id": { + "$oid": "649b003c1ae706d7b5b14d79" + }, + "description": "Metagenome Contig Coverage Stats for gold:Gp0115668", + "url": "https://data.microbiomedata.org/data/1781_86100/assembly/mapping_stats.txt", + "file_size_bytes": 24065993, + "type": "nmdc:DataObject", + "id": "nmdc:3d9a9633303a795133a0afbbe7541354", + "name": "mapping_stats.txt", + "data_object_type": "Assembly Coverage Stats" + }, + { + "_id": { + "$oid": "649b003c1ae706d7b5b14d7b" + }, + "description": "Metagenome Alignment BAM file for gold:Gp0115668", + "url": "https://data.microbiomedata.org/data/1781_86100/assembly/pairedMapped_sorted.bam", + "file_size_bytes": 2912328623, + "type": "nmdc:DataObject", + "id": "nmdc:0b3a3146c8e3d01fe0cbda4de3a58ff1", + "name": "pairedMapped_sorted.bam", + "data_object_type": "Assembly Coverage BAM" + }, + { + "_id": { + "$oid": "649b003c1ae706d7b5b14d7d" + }, + "description": "Assembled scaffold fasta for gold:Gp0115668", + "url": "https://data.microbiomedata.org/data/1781_86100/assembly/assembly_scaffolds.fna", + "file_size_bytes": 179900502, + "type": "nmdc:DataObject", + "id": "nmdc:aaab457bbc67e3a755340b9c94d15286", + "name": "assembly_scaffolds.fna", + "data_object_type": "Assembly Scaffolds" + }, + { + "_id": { + "$oid": "649b003c1ae706d7b5b14d82" + }, + "description": "Assembled AGP file for gold:Gp0115668", + "url": "https://data.microbiomedata.org/data/1781_86100/assembly/assembly.agp", + "file_size_bytes": 20866681, + "type": "nmdc:DataObject", + "id": "nmdc:327d188b5936d3c95c61bc9f2131da76", + "name": "assembly.agp", + "data_object_type": "Assembly AGP" + }, + { + "_id": { + "$oid": "649b003d1ae706d7b5b159a0" + }, + "id": "nmdc:b582f88ff691a520217093bc43cf2cbf", + "name": "1781_86100.krona.html", + "description": "Gold:Gp0115668 KRONA plot HTML file", + "url": "https://data.microbiomedata.org/1781_86100/ReadbasedAnalysis/centrifuge/1781_86100.krona.html", + "file_size_bytes": 3385, + "data_object_type": "Centrifuge Krona Plot", + "type": "nmdc:DataObject" + }, + { + "_id": { + "$oid": "649b003d1ae706d7b5b159ab" + }, + "id": "nmdc:34e913d729110bd83d9e44e130550f83", + "name": "1781_86100.json", + "description": "Gold:Gp0115668 ReadbasedAnalysis result JSON file", + "url": "https://data.microbiomedata.org/1781_86100/ReadbasedAnalysis/1781_86100.json", + "file_size_bytes": 3385, + "type": "nmdc:DataObject" + }, + { + "_id": { + "$oid": "649b003f1ae706d7b5b16272" + }, + "id": "nmdc:328968741e1f9405e81f711e4e419c60", + "name": "bins.unbinned.fa", + "description": "unbinned fasta file from metabat2 for gold:Gp0115668", + "file_size_bytes": 39575271, + "url": "https://data.microbiomedata.org/data/1781_86100/img_MAGs/bins.unbinned.fa", + "type": "nmdc:DataObject" + }, + { + "_id": { + "$oid": "649b003f1ae706d7b5b16273" + }, + "id": "nmdc:af5ce540e803059bb726b9d73a794dc2", + "name": "bins.tooShort.fa", + "description": "tooShort (< 3kb) filtered contigs fasta file by metaBat2 for gold:Gp0115668", + "file_size_bytes": 132519280, + "url": "https://data.microbiomedata.org/data/1781_86100/img_MAGs/bins.tooShort.fa", + "type": "nmdc:DataObject" + }, + { + "_id": { + "$oid": "649b003f1ae706d7b5b16275" + }, + "id": "nmdc:f61532f16df6a916b9ecc308a8d555a2", + "name": "gold:Gp0115668.bins.5.fa", + "description": "metabat2 binned contig file for gold:Gp0115668", + "file_size_bytes": 957388, + "url": "https://data.microbiomedata.org/data/1781_86100/img_MAGs/metabat-bins/bins.5.fa", + "type": "nmdc:DataObject", + "data_object_type": "Metagenome Bins" + }, + { + "_id": { + "$oid": "649b003f1ae706d7b5b16276" + }, + "id": "nmdc:40b27b33a28b3b16da74479fb8516aff", + "name": "checkm_qa.out", + "description": "metabat2 bin checkm quality assessment result for gold:Gp0115668", + "file_size_bytes": 1404, + "url": "https://data.microbiomedata.org/data/1781_86100/img_MAGs/checkm_qa.out", + "type": "nmdc:DataObject", + "data_object_type": "CheckM Statistics" + }, + { + "_id": { + "$oid": "649b003f1ae706d7b5b16277" + }, + "id": "nmdc:e0d56b325b27af3c1fff5d603e5c5db1", + "name": "gold:Gp0115668.bins.4.fa", + "description": "metabat2 binned contig file for gold:Gp0115668", + "file_size_bytes": 246415, + "url": "https://data.microbiomedata.org/data/1781_86100/img_MAGs/metabat-bins/bins.4.fa", + "type": "nmdc:DataObject", + "data_object_type": "Metagenome Bins" + }, + { + "_id": { + "$oid": "649b003f1ae706d7b5b16279" + }, + "id": "nmdc:5dc85b63c568dfee4fabacf43b5ec75c", + "name": "gold:Gp0115668.bins.2.fa", + "description": "metabat2 binned contig file for gold:Gp0115668", + "file_size_bytes": 1076024, + "url": "https://data.microbiomedata.org/data/1781_86100/img_MAGs/metabat-bins/bins.2.fa", + "type": "nmdc:DataObject", + "data_object_type": "Metagenome Bins" + }, + { + "_id": { + "$oid": "649b003f1ae706d7b5b1627a" + }, + "id": "nmdc:39ac1fcf35046edc399b1b64faa56ca0", + "name": "gold:Gp0115668.bins.3.fa", + "description": "metabat2 binned contig file for gold:Gp0115668", + "file_size_bytes": 1385677, + "url": "https://data.microbiomedata.org/data/1781_86100/img_MAGs/metabat-bins/bins.3.fa", + "type": "nmdc:DataObject", + "data_object_type": "Metagenome Bins" + }, + { + "_id": { + "$oid": "649b003f1ae706d7b5b16280" + }, + "id": "nmdc:81a69601c9bf2a04762f30b38fd796ea", + "name": "gold:Gp0115668.bins.1.fa", + "description": "metabat2 binned contig file for gold:Gp0115668", + "file_size_bytes": 2654069, + "url": "https://data.microbiomedata.org/data/1781_86100/img_MAGs/metabat-bins/bins.1.fa", + "type": "nmdc:DataObject", + "data_object_type": "Metagenome Bins" + }, + { + "_id": { + "$oid": "649b00401ae706d7b5b16d8e" + }, + "description": "Protein FAA for gold:Gp0115668", + "url": "https://data.microbiomedata.org/1781_86100/img_annotation/Ga0482259_proteins.faa", + "md5_checksum": "f97c44951275f8b68fa94ded40fda756", + "file_size_bytes": 3385, + "id": "nmdc:f97c44951275f8b68fa94ded40fda756", + "name": "gold:Gp0115668_Protein FAA", + "data_object_type": "Annotation Amino Acid FASTA", + "type": "nmdc:DataObject" + }, + { + "_id": { + "$oid": "649b00401ae706d7b5b16d8f" + }, + "description": "Structural annotation GFF file for gold:Gp0115668", + "url": "https://data.microbiomedata.org/1781_86100/img_annotation/Ga0482259_structural_annotation.gff", + "md5_checksum": "b4764f173896dcb134d7c94c1ee13ca3", + "file_size_bytes": 3385, + "id": "nmdc:b4764f173896dcb134d7c94c1ee13ca3", + "name": "gold:Gp0115668_Structural annotation GFF file", + "data_object_type": "Structural Annotation GFF", + "type": "nmdc:DataObject" + }, + { + "_id": { + "$oid": "649b00401ae706d7b5b16d90" + }, + "description": "Functional annotation GFF file for gold:Gp0115668", + "url": "https://data.microbiomedata.org/1781_86100/img_annotation/Ga0482259_functional_annotation.gff", + "md5_checksum": "c57d28f7dd791aab5c4caee00b247ef9", + "file_size_bytes": 3385, + "id": "nmdc:c57d28f7dd791aab5c4caee00b247ef9", + "name": "gold:Gp0115668_Functional annotation GFF file", + "data_object_type": "Functional Annotation GFF", + "type": "nmdc:DataObject" + }, + { + "_id": { + "$oid": "649b00401ae706d7b5b16da6" + }, + "description": "KO TSV File for gold:Gp0115668", + "url": "https://data.microbiomedata.org/1781_86100/img_annotation/Ga0482259_ko.tsv", + "md5_checksum": "dbd78725415f5f8e80f590c3588a1c60", + "file_size_bytes": 3385, + "id": "nmdc:dbd78725415f5f8e80f590c3588a1c60", + "name": "gold:Gp0115668_KO TSV File", + "data_object_type": "Annotation KEGG Orthology", + "type": "nmdc:DataObject" + }, + { + "_id": { + "$oid": "649b00401ae706d7b5b16daa" + }, + "description": "EC TSV File for gold:Gp0115668", + "url": "https://data.microbiomedata.org/1781_86100/img_annotation/Ga0482259_ec.tsv", + "md5_checksum": "bcbae14f9733da2b512b5f5b6c8fcb98", + "file_size_bytes": 3385, + "id": "nmdc:bcbae14f9733da2b512b5f5b6c8fcb98", + "name": "gold:Gp0115668_EC TSV File", + "data_object_type": "Annotation Enzyme Commission", + "type": "nmdc:DataObject" + } + ], + "mags_activity_set": [ + { + "_id": { + "$oid": "649b0052ec087f6bbab34736" + }, + "has_input": [ + "nmdc:b2b862aede4f333acec79aac3afc7254", + "nmdc:f7a4bb0be4599b544360617190b45681", + "nmdc:7a861805138d425525f298c1790b58ed" + ], + "too_short_contig_num": 297764, + "part_of": [ + "nmdc:mga0n66h21" + ], + "binned_contig_num": 1669, + "git_url": "https://github.com/microbiomedata/metaG/releases/tag/0.1", + "has_output": [ + "nmdc:d41d8cd98f00b204e9800998ecf8427e", + "nmdc:8c05fc754583d51714bc1aa81396e59d", + "nmdc:9ef1be5df79aee7c64f2addc4bda6afa", + "nmdc:60db1474ee6a099c10e4fdc728420cf8", + "nmdc:5a36d8ba758ee510ab2be3e01fda3e0f", + "nmdc:3f4c7c98bb94687eb96382799c8626fe" + ], + "was_informed_by": "gold:Gp0115668", + "input_contig_num": 323261, + "id": "nmdc:0aec70779dcdcc1b577d7e372c0eca87", + "execution_resource": "NERSC-Cori", + "name": "MAGs Analysis Activity for nmdc:mga0n66h21", + "mags_list": [ + { + "number_of_contig": 1013, + "completeness": 12.29, + "bin_name": "bins.1", + "gene_count": 4188, + "bin_quality": "LQ", + "gtdbtk_species": "", + "gtdbtk_order": "", + "num_16s": 0, + "gtdbtk_family": "", + "gtdbtk_domain": "", + "contamination": 2.32, + "gtdbtk_class": "", + "gtdbtk_phylum": "", + "num_5s": 0, + "num_23s": 0, + "gtdbtk_genus": "", + "num_t_rna": 20 + }, + { + "number_of_contig": 599, + "completeness": 58.72, + "bin_name": "bins.2", + "gene_count": 2940, + "bin_quality": "LQ", + "gtdbtk_species": "", + "gtdbtk_order": "", + "num_16s": 1, + "gtdbtk_family": "", + "gtdbtk_domain": "", + "contamination": 12.95, + "gtdbtk_class": "", + "gtdbtk_phylum": "", + "num_5s": 0, + "num_23s": 1, + "gtdbtk_genus": "", + "num_t_rna": 25 + }, + { + "number_of_contig": 57, + "completeness": 4.0, + "bin_name": "bins.3", + "gene_count": 258, + "bin_quality": "LQ", + "gtdbtk_species": "", + "gtdbtk_order": "", + "num_16s": 0, + "gtdbtk_family": "", + "gtdbtk_domain": "", + "contamination": 0.03, + "gtdbtk_class": "", + "gtdbtk_phylum": "", + "num_5s": 0, + "num_23s": 0, + "gtdbtk_genus": "", + "num_t_rna": 1 + } + ], + "unbinned_contig_num": 23828, + "started_at_time": "2021-10-11T02:28:43Z", + "type": "nmdc:MAGsAnalysisActivity", + "ended_at_time": "2021-10-11T05:19:17+00:00" + } + ], + "metagenome_annotation_activity_set": [ + { + "_id": { + "$oid": "649b005bbf2caae0415ef9d8" + }, + "has_input": [ + "nmdc:b2b862aede4f333acec79aac3afc7254" + ], + "part_of": [ + "nmdc:mga0n66h21" + ], + "git_url": "https://github.com/microbiomedata/metaG/releases/tag/0.1", + "has_output": [ + "nmdc:5cb6273cd171d1ae5a8d77c8f131517f", + "nmdc:d49149a48134c1091c001448cc91f8e2", + "nmdc:7a861805138d425525f298c1790b58ed", + "nmdc:0d0a80f2dafb68f4659709dd2ebd2f28", + "nmdc:a8b689fdef54bf7235532de634cf553e", + "nmdc:017daaa53039bc1135ca8f013596eb14", + "nmdc:e3eb963d76dc6bdc54756cfa80977611", + "nmdc:ab1d561046fbe146ac418e4ed822e861", + "nmdc:51054c4da9edc391b03418b5f9327815", + "nmdc:335576d20d4f5c061a875529cbe9572c", + "nmdc:6c5387ac5acb8b340a2c2a9e17e62bae", + "nmdc:eea36326caba5baa0536ac2f5e36d497" + ], + "was_informed_by": "gold:Gp0115668", + "id": "nmdc:0aec70779dcdcc1b577d7e372c0eca87", + "execution_resource": "NERSC-Cori", + "name": "Annotation Activity for nmdc:mga0n66h21", + "started_at_time": "2021-10-11T02:28:43Z", + "type": "nmdc:MetagenomeAnnotationActivity", + "ended_at_time": "2021-10-11T05:19:17+00:00" + } + ], + "metagenome_assembly_set": [ + { + "_id": { + "$oid": "649b005f2ca5ee4adb139fc3" + }, + "has_input": [ + "nmdc:121b1c25e803f2a010ae5a2206a8d1d2" + ], + "part_of": [ + "nmdc:mga0n66h21" + ], + "ctg_logsum": 489108, + "scaf_logsum": 491574, + "gap_pct": 0.00308, + "git_url": "https://github.com/microbiomedata/metaG/releases/tag/0.1", + "has_output": [ + "nmdc:b2b862aede4f333acec79aac3afc7254", + "nmdc:15d4494dad1e12523aa9afb56b1e7cdb", + "nmdc:6ccb798d615b67dfb9c64ff32d6586c4", + "nmdc:da27801a4e0ab450485f5a3aeb75a7d6", + "nmdc:f7a4bb0be4599b544360617190b45681" + ], + "asm_score": 4.087, + "was_informed_by": "gold:Gp0115668", + "ctg_powsum": 53542, + "scaf_max": 53286, + "id": "nmdc:0aec70779dcdcc1b577d7e372c0eca87", + "scaf_powsum": 53839, + "execution_resource": "NERSC-Cori", + "contigs": 323269, + "name": "Assembly Activity for nmdc:mga0n66h21", + "ctg_max": 53286, + "gc_std": 0.10793, + "contig_bp": 169601906, + "gc_avg": 0.39548, + "started_at_time": "2021-10-11T02:28:43Z", + "scaf_bp": 169607136, + "type": "nmdc:MetagenomeAssembly", + "scaffolds": 322890, + "ended_at_time": "2021-10-11T05:19:17+00:00", + "ctg_l50": 525, + "ctg_l90": 299, + "ctg_n50": 83667, + "ctg_n90": 263711, + "scaf_l50": 526, + "scaf_l90": 299, + "scaf_n50": 83307, + "scaf_n90": 263381, + "scaf_l_gt50k": 53286, + "scaf_n_gt50k": 1, + "scaf_pct_gt50k": 0.03141731 + } + ], + "omics_processing_set": [ + { + "_id": { + "$oid": "649b009773e8249959349b35" + }, + "id": "nmdc:omprc-11-wepaa271", + "name": "Sand microcosm microbial communities from a hyporheic zone in Columbia River, Washington, USA - GW-RW T3_30-Apr-14", + "description": "Sterilized sand packs were incubated back in the ground and collected at time point T3.", + "has_input": [ + "nmdc:bsm-11-srz83p34" + ], + "has_output": [ + "jgi:55d740240d8785342fcf7e37" + ], + "part_of": [ + "nmdc:sty-11-aygzgv51" + ], + "add_date": "2015-05-28", + "mod_date": "2021-06-15", + "ncbi_project_name": "Sand microcosm microbial communities from a hyporheic zone in Columbia River, Washington, USA - GW-RW T3_30-Apr-14", + "omics_type": { + "has_raw_value": "Metagenome" + }, + "principal_investigator": { + "has_raw_value": "James Stegen" + }, + "processing_institution": "JGI", + "type": "nmdc:OmicsProcessing", + "gold_sequencing_project_identifiers": [ + "gold:Gp0115668" + ] + } + ], + "read_qc_analysis_activity_set": [ + { + "_id": { + "$oid": "649b009d6bdd4fd20273c88c" + }, + "has_input": [ + "nmdc:0967bbbe5ee2737f66bc6ee7bf366bbb" + ], + "part_of": [ + "nmdc:mga0n66h21" + ], + "git_url": "https://github.com/microbiomedata/metaG/releases/tag/0.1", + "has_output": [ + "nmdc:121b1c25e803f2a010ae5a2206a8d1d2", + "nmdc:63fb5949ebafd1846ba60f2ce033191c" + ], + "was_informed_by": "gold:Gp0115668", + "input_read_count": 35064492, + "output_read_bases": 5069132469, + "id": "nmdc:0aec70779dcdcc1b577d7e372c0eca87", + "execution_resource": "NERSC-Cori", + "input_read_bases": 5294738292, + "name": "Read QC Activity for nmdc:mga0n66h21", + "output_read_count": 33873238, + "started_at_time": "2021-10-11T02:28:43Z", + "type": "nmdc:ReadQCAnalysisActivity", + "ended_at_time": "2021-10-11T05:19:17+00:00" + } + ], + "read_based_taxonomy_analysis_activity_set": [ + { + "_id": { + "$oid": "649b009bff710ae353f8cf53" + }, + "has_input": [ + "nmdc:121b1c25e803f2a010ae5a2206a8d1d2" + ], + "git_url": "https://github.com/microbiomedata/metaG/releases/tag/0.1", + "has_output": [ + "nmdc:8bdf8bbee24242aaaee763c1d851c05e", + "nmdc:2529ede10eb159148711d016ec022af3", + "nmdc:a0631ed87dc2e7c69355ef575dbe4e60", + "nmdc:93d26b69073bd4d6283aee3c7e5997d4", + "nmdc:d7a49bf0d9797a2b603643a2de896b5c", + "nmdc:890f9f52d828e1ea8277b52566763069", + "nmdc:371b7fabbcbc2d22c3ca84b422a88863", + "nmdc:8677985c5e8ad92dd6d051f85950a636", + "nmdc:9b2f355a4c2ff3651a3d1179212e2914" + ], + "was_informed_by": "gold:Gp0115668", + "id": "nmdc:0aec70779dcdcc1b577d7e372c0eca87", + "execution_resource": "NERSC-Cori", + "name": "ReadBased Analysis Activity for nmdc:mga0n66h21", + "started_at_time": "2021-10-11T02:28:43Z", + "type": "nmdc:ReadbasedAnalysis", + "ended_at_time": "2021-10-11T05:19:17+00:00" + } + ] + }, + { + "data_object_set": [ + { + "description": "Raw sequencer read data", + "file_size_bytes": 6863035214, + "type": "nmdc:DataObject", + "id": "jgi:55a9cb010d87852b21508920", + "name": "9289.1.128215.GGACTCC-AGAGTAG.fastq.gz" + }, + { + "name": "Gp0115679_Filtered Reads", + "description": "Filtered Reads for Gp0115679", + "data_object_type": "Filtered Sequencing Reads", + "url": "https://data.microbiomedata.org/data/nmdc:mga0gg1q48/qa/nmdc_mga0gg1q48_filtered.fastq.gz", + "md5_checksum": "7e294ff66cb7ddf84edf9c8bed576bcd", + "id": "nmdc:7e294ff66cb7ddf84edf9c8bed576bcd", + "file_size_bytes": 5673282665 + }, + { + "name": "Gp0115679_Filtered Stats", + "description": "Filtered Stats for Gp0115679", + "data_object_type": "QC Statistics", + "url": "https://data.microbiomedata.org/data/nmdc:mga0gg1q48/qa/nmdc_mga0gg1q48_filterStats.txt", + "md5_checksum": "08e2a96f7aaaff5ff6f747cfe6f49e49", + "id": "nmdc:08e2a96f7aaaff5ff6f747cfe6f49e49", + "file_size_bytes": 276 + }, + { + "name": "Gp0115679_Gottcha2 TSV report", + "description": "Gottcha2 TSV report for Gp0115679", + "url": "https://data.microbiomedata.org/data/nmdc:mga0gg1q48/ReadbasedAnalysis/nmdc_mga0gg1q48_gottcha2_report.tsv", + "md5_checksum": "e20f8c00473472fa073adde871860801", + "id": "nmdc:e20f8c00473472fa073adde871860801", + "file_size_bytes": 18551 + }, + { + "name": "Gp0115679_Gottcha2 full TSV report", + "description": "Gottcha2 full TSV report for Gp0115679", + "url": "https://data.microbiomedata.org/data/nmdc:mga0gg1q48/ReadbasedAnalysis/nmdc_mga0gg1q48_gottcha2_report_full.tsv", + "md5_checksum": "52f8c91d04e8d179af98e7fac35a8ff1", + "id": "nmdc:52f8c91d04e8d179af98e7fac35a8ff1", + "file_size_bytes": 1200541 + }, + { + "name": "Gp0115679_Gottcha2 Krona HTML report", + "description": "Gottcha2 Krona HTML report for Gp0115679", + "data_object_type": "GOTTCHA2 Krona Plot", + "url": "https://data.microbiomedata.org/data/nmdc:mga0gg1q48/ReadbasedAnalysis/nmdc_mga0gg1q48_gottcha2_krona.html", + "md5_checksum": "f721d9dd168b0dea080b191a4396167e", + "id": "nmdc:f721d9dd168b0dea080b191a4396167e", + "file_size_bytes": 278990 + }, + { + "name": "Gp0115679_Centrifuge classification TSV report", + "description": "Centrifuge classification TSV report for Gp0115679", + "data_object_type": "Centrifuge Taxonomic Classification", + "url": "https://data.microbiomedata.org/data/nmdc:mga0gg1q48/ReadbasedAnalysis/nmdc_mga0gg1q48_centrifuge_classification.tsv", + "md5_checksum": "ab77e396ec643b58b54da92848b88a96", + "id": "nmdc:ab77e396ec643b58b54da92848b88a96", + "file_size_bytes": 4742886512 + }, + { + "name": "Gp0115679_Centrifuge TSV report", + "description": "Centrifuge TSV report for Gp0115679", + "data_object_type": "Centrifuge Classification Report", + "url": "https://data.microbiomedata.org/data/nmdc:mga0gg1q48/ReadbasedAnalysis/nmdc_mga0gg1q48_centrifuge_report.tsv", + "md5_checksum": "f2514844e47a9e3d268671f80f152bc1", + "id": "nmdc:f2514844e47a9e3d268671f80f152bc1", + "file_size_bytes": 266907 + }, + { + "name": "Gp0115679_Centrifuge Krona HTML report", + "description": "Centrifuge Krona HTML report for Gp0115679", + "data_object_type": "Centrifuge Krona Plot", + "url": "https://data.microbiomedata.org/data/nmdc:mga0gg1q48/ReadbasedAnalysis/nmdc_mga0gg1q48_centrifuge_krona.html", + "md5_checksum": "a3e49f39f33c54bc8d9430a947cd4b16", + "id": "nmdc:a3e49f39f33c54bc8d9430a947cd4b16", + "file_size_bytes": 2359747 + }, + { + "name": "Gp0115679_Kraken classification TSV report", + "description": "Kraken classification TSV report for Gp0115679", + "data_object_type": "Kraken2 Taxonomic Classification", + "url": "https://data.microbiomedata.org/data/nmdc:mga0gg1q48/ReadbasedAnalysis/nmdc_mga0gg1q48_kraken2_classification.tsv", + "md5_checksum": "17bc87145b0dcabbb8e3de0f393f4d4d", + "id": "nmdc:17bc87145b0dcabbb8e3de0f393f4d4d", + "file_size_bytes": 3859620862 + }, + { + "name": "Gp0115679_Kraken2 TSV report", + "description": "Kraken2 TSV report for Gp0115679", + "data_object_type": "Kraken2 Classification Report", + "url": "https://data.microbiomedata.org/data/nmdc:mga0gg1q48/ReadbasedAnalysis/nmdc_mga0gg1q48_kraken2_report.tsv", + "md5_checksum": "aecb320fdfe4c4da35c0206dd34e0f40", + "id": "nmdc:aecb320fdfe4c4da35c0206dd34e0f40", + "file_size_bytes": 729541 + }, + { + "name": "Gp0115679_Kraken2 Krona HTML report", + "description": "Kraken2 Krona HTML report for Gp0115679", + "data_object_type": "Kraken2 Krona Plot", + "url": "https://data.microbiomedata.org/data/nmdc:mga0gg1q48/ReadbasedAnalysis/nmdc_mga0gg1q48_kraken2_krona.html", + "md5_checksum": "77860ee043ae9738e7702a3f665b15fa", + "id": "nmdc:77860ee043ae9738e7702a3f665b15fa", + "file_size_bytes": 4358324 + }, + { + "name": "Gp0115679_Assembled contigs fasta", + "description": "Assembled contigs fasta for Gp0115679", + "data_object_type": "Assembly Contigs", + "url": "https://data.microbiomedata.org/data/nmdc:mga0gg1q48/assembly/nmdc_mga0gg1q48_contigs.fna", + "md5_checksum": "e4314c3743795e0be8beda8b7f806557", + "id": "nmdc:e4314c3743795e0be8beda8b7f806557", + "file_size_bytes": 275030840 + }, + { + "name": "Gp0115679_Assembled scaffold fasta", + "description": "Assembled scaffold fasta for Gp0115679", + "data_object_type": "Assembly Scaffolds", + "url": "https://data.microbiomedata.org/data/nmdc:mga0gg1q48/assembly/nmdc_mga0gg1q48_scaffolds.fna", + "md5_checksum": "2a288a5827b66c88f8abf202bbe37aab", + "id": "nmdc:2a288a5827b66c88f8abf202bbe37aab", + "file_size_bytes": 273327529 + }, + { + "name": "Gp0115679_Metagenome Contig Coverage Stats", + "description": "Metagenome Contig Coverage Stats for Gp0115679", + "url": "https://data.microbiomedata.org/data/nmdc:mga0gg1q48/assembly/nmdc_mga0gg1q48_covstats.txt", + "md5_checksum": "a51c7b3a70601a885594936fd6c753bc", + "id": "nmdc:a51c7b3a70601a885594936fd6c753bc", + "file_size_bytes": 42368790 + }, + { + "name": "Gp0115679_Assembled AGP file", + "description": "Assembled AGP file for Gp0115679", + "data_object_type": "Assembly AGP", + "url": "https://data.microbiomedata.org/data/nmdc:mga0gg1q48/assembly/nmdc_mga0gg1q48_assembly.agp", + "md5_checksum": "8851d6fed8e5bbee88aeb7af77bbcfe3", + "id": "nmdc:8851d6fed8e5bbee88aeb7af77bbcfe3", + "file_size_bytes": 40232148 + }, + { + "name": "Gp0115679_Metagenome Alignment BAM file", + "description": "Metagenome Alignment BAM file for Gp0115679", + "data_object_type": "Assembly Coverage BAM", + "url": "https://data.microbiomedata.org/data/nmdc:mga0gg1q48/assembly/nmdc_mga0gg1q48_pairedMapped_sorted.bam", + "md5_checksum": "002ed5f389b8a13735d27a8741290f6b", + "id": "nmdc:002ed5f389b8a13735d27a8741290f6b", + "file_size_bytes": 6236105158 + }, + { + "name": "Gp0115679_Protein FAA", + "description": "Protein FAA for Gp0115679", + "data_object_type": "Annotation Amino Acid FASTA", + "url": "https://data.microbiomedata.org/data/nmdc:mga0gg1q48/annotation/nmdc_mga0gg1q48_proteins.faa", + "md5_checksum": "ac3faa8ad0e8e7827fcf6b882ec90706", + "id": "nmdc:ac3faa8ad0e8e7827fcf6b882ec90706", + "file_size_bytes": 151048115 + }, + { + "name": "Gp0115679_Structural annotation GFF file", + "description": "Structural annotation GFF file for Gp0115679", + "data_object_type": "Structural Annotation GFF", + "url": "https://data.microbiomedata.org/data/nmdc:mga0gg1q48/annotation/nmdc_mga0gg1q48_structural_annotation.gff", + "md5_checksum": "e3712dbbf0d0bfa14b9b340e73ebf4d0", + "id": "nmdc:e3712dbbf0d0bfa14b9b340e73ebf4d0", + "file_size_bytes": 2549 + }, + { + "name": "Gp0115679_Functional annotation GFF file", + "description": "Functional annotation GFF file for Gp0115679", + "data_object_type": "Functional Annotation GFF", + "url": "https://data.microbiomedata.org/data/nmdc:mga0gg1q48/annotation/nmdc_mga0gg1q48_functional_annotation.gff", + "md5_checksum": "8aed63ca1302c874040e74aceb54ff05", + "id": "nmdc:8aed63ca1302c874040e74aceb54ff05", + "file_size_bytes": 166415068 + }, + { + "name": "Gp0115679_KO TSV file", + "description": "KO TSV file for Gp0115679", + "data_object_type": "Annotation KEGG Orthology", + "url": "https://data.microbiomedata.org/data/nmdc:mga0gg1q48/annotation/nmdc_mga0gg1q48_ko.tsv", + "md5_checksum": "6361a06de62d93909abfb565a47fd5f0", + "id": "nmdc:6361a06de62d93909abfb565a47fd5f0", + "file_size_bytes": 18038415 + }, + { + "name": "Gp0115679_EC TSV file", + "description": "EC TSV file for Gp0115679", + "data_object_type": "Annotation Enzyme Commission", + "url": "https://data.microbiomedata.org/data/nmdc:mga0gg1q48/annotation/nmdc_mga0gg1q48_ec.tsv", + "md5_checksum": "bd9d330d1d6a925066003d653a171ca5", + "id": "nmdc:bd9d330d1d6a925066003d653a171ca5", + "file_size_bytes": 11896121 + }, + { + "name": "Gp0115679_COG GFF file", + "description": "COG GFF file for Gp0115679", + "url": "https://data.microbiomedata.org/data/nmdc:mga0gg1q48/annotation/nmdc_mga0gg1q48_cog.gff", + "md5_checksum": "c497ffc128d6738bf3868529eb7ff899", + "id": "nmdc:c497ffc128d6738bf3868529eb7ff899", + "file_size_bytes": 81943107 + }, + { + "name": "Gp0115679_PFAM GFF file", + "description": "PFAM GFF file for Gp0115679", + "url": "https://data.microbiomedata.org/data/nmdc:mga0gg1q48/annotation/nmdc_mga0gg1q48_pfam.gff", + "md5_checksum": "b67886515193abbd1eec79de067b3196", + "id": "nmdc:b67886515193abbd1eec79de067b3196", + "file_size_bytes": 65136506 + }, + { + "name": "Gp0115679_TigrFam GFF file", + "description": "TigrFam GFF file for Gp0115679", + "url": "https://data.microbiomedata.org/data/nmdc:mga0gg1q48/annotation/nmdc_mga0gg1q48_tigrfam.gff", + "md5_checksum": "05e7a016dddba90801c29de448c43c3c", + "id": "nmdc:05e7a016dddba90801c29de448c43c3c", + "file_size_bytes": 8536835 + }, + { + "name": "Gp0115679_SMART GFF file", + "description": "SMART GFF file for Gp0115679", + "url": "https://data.microbiomedata.org/data/nmdc:mga0gg1q48/annotation/nmdc_mga0gg1q48_smart.gff", + "md5_checksum": "7effd4db11316ff95f6a8303807d530f", + "id": "nmdc:7effd4db11316ff95f6a8303807d530f", + "file_size_bytes": 19907975 + }, + { + "name": "Gp0115679_SuperFam GFF file", + "description": "SuperFam GFF file for Gp0115679", + "url": "https://data.microbiomedata.org/data/nmdc:mga0gg1q48/annotation/nmdc_mga0gg1q48_supfam.gff", + "md5_checksum": "503770f008dd2cf04d73821412dcf23a", + "id": "nmdc:503770f008dd2cf04d73821412dcf23a", + "file_size_bytes": 107636995 + }, + { + "name": "Gp0115679_Cath FunFam GFF file", + "description": "Cath FunFam GFF file for Gp0115679", + "url": "https://data.microbiomedata.org/data/nmdc:mga0gg1q48/annotation/nmdc_mga0gg1q48_cath_funfam.gff", + "md5_checksum": "c33049c64af55f8ac54d52c861b0a221", + "id": "nmdc:c33049c64af55f8ac54d52c861b0a221", + "file_size_bytes": 89046662 + }, + { + "name": "Gp0115679_KO_EC GFF file", + "description": "KO_EC GFF file for Gp0115679", + "url": "https://data.microbiomedata.org/data/nmdc:mga0gg1q48/annotation/nmdc_mga0gg1q48_ko_ec.gff", + "md5_checksum": "b162efd63f79bc34de66f61348471b74", + "id": "nmdc:b162efd63f79bc34de66f61348471b74", + "file_size_bytes": 57348606 + }, + { + "name": "gold:Gp0452679_metabat2 bin checkm quality assessment result", + "description": "metabat2 bin checkm quality assessment result for gold:Gp0452679", + "data_object_type": "CheckM Statistics", + "url": "https://data.microbiomedata.org/data/nmdc:mga0fsjy84/MAGs/nmdc_mga0fsjy84_checkm_qa.out", + "md5_checksum": "d41d8cd98f00b204e9800998ecf8427e", + "id": "nmdc:d41d8cd98f00b204e9800998ecf8427e", + "file_size_bytes": 0 + }, + { + "name": "Gp0115679_tooShort (< 3kb) filtered contigs fasta file by metaBat2", + "description": "tooShort (< 3kb) filtered contigs fasta file by metaBat2 for Gp0115679", + "url": "https://data.microbiomedata.org/data/nmdc:mga0gg1q48/MAGs/nmdc_mga0gg1q48_bins.tooShort.fa", + "md5_checksum": "d830e60f4fb30ecb0610f991dcc70e47", + "id": "nmdc:d830e60f4fb30ecb0610f991dcc70e47", + "file_size_bytes": 215033122 + }, + { + "name": "Gp0115679_unbinned fasta file from metabat2", + "description": "unbinned fasta file from metabat2 for Gp0115679", + "url": "https://data.microbiomedata.org/data/nmdc:mga0gg1q48/MAGs/nmdc_mga0gg1q48_bins.unbinned.fa", + "md5_checksum": "d33af65556b85b1aaf3a5c48b6e294de", + "id": "nmdc:d33af65556b85b1aaf3a5c48b6e294de", + "file_size_bytes": 44057142 + }, + { + "name": "Gp0115679_metabat2 bin checkm quality assessment result", + "description": "metabat2 bin checkm quality assessment result for Gp0115679", + "data_object_type": "CheckM Statistics", + "url": "https://data.microbiomedata.org/data/nmdc:mga0gg1q48/MAGs/nmdc_mga0gg1q48_checkm_qa.out", + "md5_checksum": "d2d655091735e6308aafca1e1633aad9", + "id": "nmdc:d2d655091735e6308aafca1e1633aad9", + "file_size_bytes": 2394 + }, + { + "name": "Gp0115679_high-quality and medium-quality bins", + "description": "high-quality and medium-quality bins for Gp0115679", + "data_object_type": "Metagenome Bins", + "url": "https://data.microbiomedata.org/data/nmdc:mga0gg1q48/MAGs/nmdc_mga0gg1q48_hqmq_bin.zip", + "md5_checksum": "17c6259329da1bbe6da5a18274452a8d", + "id": "nmdc:17c6259329da1bbe6da5a18274452a8d", + "file_size_bytes": 3215059 + }, + { + "name": "Gp0115679_metabat2 bins", + "description": "metabat2 bins for Gp0115679", + "url": "https://data.microbiomedata.org/data/nmdc:mga0gg1q48/MAGs/nmdc_mga0gg1q48_metabat_bin.zip", + "md5_checksum": "9250ad41cb19e04a6002e62bda38bbfb", + "id": "nmdc:9250ad41cb19e04a6002e62bda38bbfb", + "file_size_bytes": 1649649 + }, + { + "_id": { + "$oid": "649b003d1ae706d7b5b14da7" + }, + "description": "Metagenome Contig Coverage Stats for gold:Gp0115679", + "url": "https://data.microbiomedata.org/data/1781_86105/assembly/mapping_stats.txt", + "file_size_bytes": 39709915, + "type": "nmdc:DataObject", + "id": "nmdc:eb7b565580c8a81f8c674ce87a7c07c3", + "name": "mapping_stats.txt", + "data_object_type": "Assembly Coverage Stats" + }, + { + "_id": { + "$oid": "649b003d1ae706d7b5b14da9" + }, + "description": "Metagenome Alignment BAM file for gold:Gp0115679", + "url": "https://data.microbiomedata.org/data/1781_86105/assembly/pairedMapped_sorted.bam", + "file_size_bytes": 6165329815, + "type": "nmdc:DataObject", + "id": "nmdc:11956fa2a6c996aedac70f779222570f", + "name": "pairedMapped_sorted.bam", + "data_object_type": "Assembly Coverage BAM" + }, + { + "_id": { + "$oid": "649b003d1ae706d7b5b14dab" + }, + "description": "Assembled scaffold fasta for gold:Gp0115679", + "url": "https://data.microbiomedata.org/data/1781_86105/assembly/assembly_scaffolds.fna", + "file_size_bytes": 270701949, + "type": "nmdc:DataObject", + "id": "nmdc:c8a6971a9982af6e8a054dee6d1cb78d", + "name": "assembly_scaffolds.fna", + "data_object_type": "Assembly Scaffolds" + }, + { + "_id": { + "$oid": "649b003d1ae706d7b5b14db3" + }, + "description": "Assembled contigs fasta for gold:Gp0115679", + "url": "https://data.microbiomedata.org/data/1781_86105/assembly/assembly_contigs.fna", + "file_size_bytes": 272371965, + "type": "nmdc:DataObject", + "id": "nmdc:4fb2f3e8ebd99cea1e797e248b2e5c1d", + "name": "assembly_contigs.fna", + "data_object_type": "Assembly Contigs" + }, + { + "_id": { + "$oid": "649b003d1ae706d7b5b14dbd" + }, + "description": "Assembled AGP file for gold:Gp0115679", + "url": "https://data.microbiomedata.org/data/1781_86105/assembly/assembly.agp", + "file_size_bytes": 34847488, + "type": "nmdc:DataObject", + "id": "nmdc:a85f992644d46cb23475ac9850f4e864", + "name": "assembly.agp", + "data_object_type": "Assembly AGP" + }, + { + "_id": { + "$oid": "649b003d1ae706d7b5b15a0e" + }, + "id": "nmdc:ed54f8af3521aae47d2757852695a188", + "name": "1781_86105.json", + "description": "Gold:Gp0115679 ReadbasedAnalysis result JSON file", + "url": "https://data.microbiomedata.org/1781_86105/ReadbasedAnalysis/1781_86105.json", + "file_size_bytes": 3385, + "type": "nmdc:DataObject" + }, + { + "_id": { + "$oid": "649b003d1ae706d7b5b15a10" + }, + "id": "nmdc:7ca0d8ecd80292bc9bea1862c755a2f8", + "name": "1781_86105.krona.html", + "description": "Gold:Gp0115679 KRONA plot HTML file", + "url": "https://data.microbiomedata.org/1781_86105/ReadbasedAnalysis/centrifuge/1781_86105.krona.html", + "file_size_bytes": 3385, + "data_object_type": "Centrifuge Krona Plot", + "type": "nmdc:DataObject" + }, + { + "_id": { + "$oid": "649b003f1ae706d7b5b162ed" + }, + "id": "nmdc:6ef4477214fc698cd494f3e516cdda23", + "name": "bins.tooShort.fa", + "description": "tooShort (< 3kb) filtered contigs fasta file by metaBat2 for gold:Gp0115679", + "file_size_bytes": 208557497, + "url": "https://data.microbiomedata.org/data/1781_86105/img_MAGs/bins.tooShort.fa", + "type": "nmdc:DataObject" + }, + { + "_id": { + "$oid": "649b003f1ae706d7b5b162ef" + }, + "id": "nmdc:f81a3c77048a277c28b110d4797f48d5", + "name": "checkm_qa.out", + "description": "metabat2 bin checkm quality assessment result for gold:Gp0115679", + "file_size_bytes": 3400, + "url": "https://data.microbiomedata.org/data/1781_86105/img_MAGs/checkm_qa.out", + "type": "nmdc:DataObject", + "data_object_type": "CheckM Statistics" + }, + { + "_id": { + "$oid": "649b003f1ae706d7b5b162f0" + }, + "id": "nmdc:e3396f34bc4bcc83c4b43bbd1f698450", + "name": "bins.unbinned.fa", + "description": "unbinned fasta file from metabat2 for gold:Gp0115679", + "file_size_bytes": 51045792, + "url": "https://data.microbiomedata.org/data/1781_86105/img_MAGs/bins.unbinned.fa", + "type": "nmdc:DataObject" + }, + { + "_id": { + "$oid": "649b003f1ae706d7b5b162f1" + }, + "id": "nmdc:84cac08cd46c06525b4001424027fd60", + "name": "gtdbtk.bac120.summary.tsv", + "description": "gtdbtk bacterial assignment result summary table for gold:Gp0115679", + "file_size_bytes": 830, + "url": "https://data.microbiomedata.org/data/1781_86105/img_MAGs/gtdbtk_output/classify/gtdbtk.bac120.summary.tsv", + "type": "nmdc:DataObject" + }, + { + "_id": { + "$oid": "649b003f1ae706d7b5b162f3" + }, + "id": "nmdc:45cce58fd37ad46815381000dd21470d", + "name": "gold:Gp0115679.bins.9.fa", + "description": "metabat2 binned contig file for gold:Gp0115679", + "file_size_bytes": 657176, + "url": "https://data.microbiomedata.org/data/1781_86105/img_MAGs/metabat-bins/bins.9.fa", + "type": "nmdc:DataObject", + "data_object_type": "Metagenome Bins" + }, + { + "_id": { + "$oid": "649b003f1ae706d7b5b162f4" + }, + "id": "nmdc:db508aa3c84853ff9e2c156d7afcbd7b", + "name": "gold:Gp0115679.bins.3.fa", + "description": "metabat2 binned contig file for gold:Gp0115679", + "file_size_bytes": 358418, + "url": "https://data.microbiomedata.org/data/1781_86105/img_MAGs/metabat-bins/bins.3.fa", + "type": "nmdc:DataObject", + "data_object_type": "Metagenome Bins" + }, + { + "_id": { + "$oid": "649b003f1ae706d7b5b162f5" + }, + "id": "nmdc:219d9761a4dbe9e7374c659a03e8ecf0", + "name": "gold:Gp0115679.bins.6.fa", + "description": "metabat2 binned contig file for gold:Gp0115679", + "file_size_bytes": 921488, + "url": "https://data.microbiomedata.org/data/1781_86105/img_MAGs/metabat-bins/bins.6.fa", + "type": "nmdc:DataObject", + "data_object_type": "Metagenome Bins" + }, + { + "_id": { + "$oid": "649b003f1ae706d7b5b162f6" + }, + "id": "nmdc:d0582754ac551686e46730419ec9d047", + "name": "gold:Gp0115679.bins.5.fa", + "description": "metabat2 binned contig file for gold:Gp0115679", + "file_size_bytes": 374792, + "url": "https://data.microbiomedata.org/data/1781_86105/img_MAGs/metabat-bins/bins.5.fa", + "type": "nmdc:DataObject", + "data_object_type": "Metagenome Bins" + }, + { + "_id": { + "$oid": "649b003f1ae706d7b5b162f7" + }, + "id": "nmdc:40152478dc669bc63bbbd4bda0d0c5df", + "name": "gold:Gp0115679.bins.11.fa", + "description": "metabat2 binned contig file for gold:Gp0115679", + "file_size_bytes": 332863, + "url": "https://data.microbiomedata.org/data/1781_86105/img_MAGs/metabat-bins/bins.11.fa", + "type": "nmdc:DataObject", + "data_object_type": "Metagenome Bins" + }, + { + "_id": { + "$oid": "649b003f1ae706d7b5b162f8" + }, + "id": "nmdc:4d9e7c6ffa3fa24c9b9fdeb0e722c57f", + "name": "gold:Gp0115679.bins.12.fa", + "description": "metabat2 binned contig file for gold:Gp0115679", + "file_size_bytes": 658843, + "url": "https://data.microbiomedata.org/data/1781_86105/img_MAGs/metabat-bins/bins.12.fa", + "type": "nmdc:DataObject", + "data_object_type": "Metagenome Bins" + }, + { + "_id": { + "$oid": "649b003f1ae706d7b5b162f9" + }, + "id": "nmdc:df7fd9d79f734d8e02589a0bae44a810", + "name": "gold:Gp0115679.bins.10.fa", + "description": "metabat2 binned contig file for gold:Gp0115679", + "file_size_bytes": 325713, + "url": "https://data.microbiomedata.org/data/1781_86105/img_MAGs/metabat-bins/bins.10.fa", + "type": "nmdc:DataObject", + "data_object_type": "Metagenome Bins" + }, + { + "_id": { + "$oid": "649b003f1ae706d7b5b162fa" + }, + "id": "nmdc:abe52bd40df93d12453b1f543a782b2a", + "name": "gold:Gp0115679.bins.4.fa", + "description": "metabat2 binned contig file for gold:Gp0115679", + "file_size_bytes": 324504, + "url": "https://data.microbiomedata.org/data/1781_86105/img_MAGs/metabat-bins/bins.4.fa", + "type": "nmdc:DataObject", + "data_object_type": "Metagenome Bins" + }, + { + "_id": { + "$oid": "649b003f1ae706d7b5b162fb" + }, + "id": "nmdc:8e65f7abf4dfe56dce5ea4d42dc83b12", + "name": "gold:Gp0115679.bins.7.fa", + "description": "metabat2 binned contig file for gold:Gp0115679", + "file_size_bytes": 430938, + "url": "https://data.microbiomedata.org/data/1781_86105/img_MAGs/metabat-bins/bins.7.fa", + "type": "nmdc:DataObject", + "data_object_type": "Metagenome Bins" + }, + { + "_id": { + "$oid": "649b003f1ae706d7b5b162fc" + }, + "id": "nmdc:aaac58966e74cbc7e8d0ba7048a8691f", + "name": "gold:Gp0115679.bins.8.fa", + "description": "metabat2 binned contig file for gold:Gp0115679", + "file_size_bytes": 378596, + "url": "https://data.microbiomedata.org/data/1781_86105/img_MAGs/metabat-bins/bins.8.fa", + "type": "nmdc:DataObject", + "data_object_type": "Metagenome Bins" + }, + { + "_id": { + "$oid": "649b003f1ae706d7b5b162fd" + }, + "id": "nmdc:d4646cc3223b1d9d13c012a6f0dd98e3", + "name": "gold:Gp0115679.bins.13.fa", + "description": "metabat2 binned contig file for gold:Gp0115679", + "file_size_bytes": 209262, + "url": "https://data.microbiomedata.org/data/1781_86105/img_MAGs/metabat-bins/bins.13.fa", + "type": "nmdc:DataObject", + "data_object_type": "Metagenome Bins" + }, + { + "_id": { + "$oid": "649b003f1ae706d7b5b162fe" + }, + "id": "nmdc:4a6a269ffe428009827fe87d5fc82555", + "name": "gold:Gp0115679.bins.2.fa", + "description": "metabat2 binned contig file for gold:Gp0115679", + "file_size_bytes": 238341, + "url": "https://data.microbiomedata.org/data/1781_86105/img_MAGs/metabat-bins/bins.2.fa", + "type": "nmdc:DataObject", + "data_object_type": "Metagenome Bins" + }, + { + "_id": { + "$oid": "649b003f1ae706d7b5b162ff" + }, + "id": "nmdc:5f136581b45cbac4ffcd9bd61c3b2c0c", + "name": "gold:Gp0115679.bins.16.fa", + "description": "metabat2 binned contig file for gold:Gp0115679", + "file_size_bytes": 645317, + "url": "https://data.microbiomedata.org/data/1781_86105/img_MAGs/metabat-bins/bins.16.fa", + "type": "nmdc:DataObject", + "data_object_type": "Metagenome Bins" + }, + { + "_id": { + "$oid": "649b003f1ae706d7b5b16300" + }, + "id": "nmdc:c42cd75d1b3e947bb6d82fcca7d4d1a2", + "name": "gold:Gp0115679.bins.1.fa", + "description": "metabat2 binned contig file for gold:Gp0115679", + "file_size_bytes": 2098951, + "url": "https://data.microbiomedata.org/data/1781_86105/img_MAGs/metabat-bins/bins.1.fa", + "type": "nmdc:DataObject", + "data_object_type": "Metagenome Bins" + }, + { + "_id": { + "$oid": "649b003f1ae706d7b5b16301" + }, + "id": "nmdc:0922541390d01fa0e51bd93665be8913", + "name": "gold:Gp0115679.bins.14.fa", + "description": "metabat2 binned contig file for gold:Gp0115679", + "file_size_bytes": 380540, + "url": "https://data.microbiomedata.org/data/1781_86105/img_MAGs/metabat-bins/bins.14.fa", + "type": "nmdc:DataObject", + "data_object_type": "Metagenome Bins" + }, + { + "_id": { + "$oid": "649b003f1ae706d7b5b16306" + }, + "id": "nmdc:c5df3f066326803dbeafb15a36aa0d93", + "name": "gold:Gp0115679.bins.15.fa", + "description": "metabat2 binned contig file for gold:Gp0115679", + "file_size_bytes": 299202, + "url": "https://data.microbiomedata.org/data/1781_86105/img_MAGs/metabat-bins/bins.15.fa", + "type": "nmdc:DataObject", + "data_object_type": "Metagenome Bins" + }, + { + "_id": { + "$oid": "649b00401ae706d7b5b16dad" + }, + "description": "Functional annotation GFF file for gold:Gp0115679", + "url": "https://data.microbiomedata.org/1781_86105/img_annotation/Ga0482248_functional_annotation.gff", + "md5_checksum": "b8c895face8e8e77bbfc7163c7eb7850", + "file_size_bytes": 3385, + "id": "nmdc:b8c895face8e8e77bbfc7163c7eb7850", + "name": "gold:Gp0115679_Functional annotation GFF file", + "data_object_type": "Functional Annotation GFF", + "type": "nmdc:DataObject" + }, + { + "_id": { + "$oid": "649b00401ae706d7b5b16dae" + }, + "description": "KO TSV File for gold:Gp0115679", + "url": "https://data.microbiomedata.org/1781_86105/img_annotation/Ga0482248_ko.tsv", + "md5_checksum": "ee75eaed19b9a259e0e70e20a53f7fba", + "file_size_bytes": 3385, + "id": "nmdc:ee75eaed19b9a259e0e70e20a53f7fba", + "name": "gold:Gp0115679_KO TSV File", + "data_object_type": "Annotation KEGG Orthology", + "type": "nmdc:DataObject" + }, + { + "_id": { + "$oid": "649b00401ae706d7b5b16daf" + }, + "description": "EC TSV File for gold:Gp0115679", + "url": "https://data.microbiomedata.org/1781_86105/img_annotation/Ga0482248_ec.tsv", + "md5_checksum": "aa5fa1b83592459bd3e742be4949d0b1", + "file_size_bytes": 3385, + "id": "nmdc:aa5fa1b83592459bd3e742be4949d0b1", + "name": "gold:Gp0115679_EC TSV File", + "data_object_type": "Annotation Enzyme Commission", + "type": "nmdc:DataObject" + }, + { + "_id": { + "$oid": "649b00401ae706d7b5b16db1" + }, + "description": "Protein FAA for gold:Gp0115679", + "url": "https://data.microbiomedata.org/1781_86105/img_annotation/Ga0482248_proteins.faa", + "md5_checksum": "21f3d777493f87403b60a4a1b3dd2f1b", + "file_size_bytes": 3385, + "id": "nmdc:21f3d777493f87403b60a4a1b3dd2f1b", + "name": "gold:Gp0115679_Protein FAA", + "data_object_type": "Annotation Amino Acid FASTA", + "type": "nmdc:DataObject" + }, + { + "_id": { + "$oid": "649b00401ae706d7b5b16db6" + }, + "description": "Structural annotation GFF file for gold:Gp0115679", + "url": "https://data.microbiomedata.org/1781_86105/img_annotation/Ga0482248_structural_annotation.gff", + "md5_checksum": "b63b42c7892b4a14e5661bca5bfa2419", + "file_size_bytes": 3385, + "id": "nmdc:b63b42c7892b4a14e5661bca5bfa2419", + "name": "gold:Gp0115679_Structural annotation GFF file", + "data_object_type": "Structural Annotation GFF", + "type": "nmdc:DataObject" + } + ], + "mags_activity_set": [ + { + "_id": { + "$oid": "649b0052ec087f6bbab3472e" + }, + "has_input": [ + "nmdc:e4314c3743795e0be8beda8b7f806557", + "nmdc:002ed5f389b8a13735d27a8741290f6b", + "nmdc:8aed63ca1302c874040e74aceb54ff05" + ], + "too_short_contig_num": 504368, + "part_of": [ + "nmdc:mga0gg1q48" + ], + "binned_contig_num": 1887, + "git_url": "https://github.com/microbiomedata/metaG/releases/tag/0.1", + "has_output": [ + "nmdc:d41d8cd98f00b204e9800998ecf8427e", + "nmdc:d830e60f4fb30ecb0610f991dcc70e47", + "nmdc:d33af65556b85b1aaf3a5c48b6e294de", + "nmdc:d2d655091735e6308aafca1e1633aad9", + "nmdc:17c6259329da1bbe6da5a18274452a8d", + "nmdc:9250ad41cb19e04a6002e62bda38bbfb" + ], + "was_informed_by": "gold:Gp0115679", + "input_contig_num": 531775, + "id": "nmdc:8bc23e2bd69c8af5538db3d6192a3b5b", + "execution_resource": "NERSC-Cori", + "name": "MAGs Analysis Activity for nmdc:mga0gg1q48", + "mags_list": [ + { + "number_of_contig": 73, + "completeness": 95.65, + "bin_name": "bins.1", + "gene_count": 2974, + "bin_quality": "HQ", + "gtdbtk_species": "", + "gtdbtk_order": "UBA11222", + "num_16s": 1, + "gtdbtk_family": "UBA11222", + "gtdbtk_domain": "Bacteria", + "contamination": 0.22, + "gtdbtk_class": "Alphaproteobacteria", + "gtdbtk_phylum": "Proteobacteria", + "num_5s": 1, + "num_23s": 1, + "gtdbtk_genus": "UBA11222", + "num_t_rna": 45 + }, + { + "number_of_contig": 253, + "completeness": 39.12, + "bin_name": "bins.10", + "gene_count": 1586, + "bin_quality": "LQ", + "gtdbtk_species": "", + "gtdbtk_order": "", + "num_16s": 0, + "gtdbtk_family": "", + "gtdbtk_domain": "", + "contamination": 1.79, + "gtdbtk_class": "", + "gtdbtk_phylum": "", + "num_5s": 0, + "num_23s": 0, + "gtdbtk_genus": "", + "num_t_rna": 20 + }, + { + "number_of_contig": 135, + "completeness": 16.83, + "bin_name": "bins.2", + "gene_count": 706, + "bin_quality": "LQ", + "gtdbtk_species": "", + "gtdbtk_order": "", + "num_16s": 0, + "gtdbtk_family": "", + "gtdbtk_domain": "", + "contamination": 0.0, + "gtdbtk_class": "", + "gtdbtk_phylum": "", + "num_5s": 0, + "num_23s": 0, + "gtdbtk_genus": "", + "num_t_rna": 11 + }, + { + "number_of_contig": 144, + "completeness": 22.53, + "bin_name": "bins.3", + "gene_count": 731, + "bin_quality": "LQ", + "gtdbtk_species": "", + "gtdbtk_order": "", + "num_16s": 0, + "gtdbtk_family": "", + "gtdbtk_domain": "", + "contamination": 0.14, + "gtdbtk_class": "", + "gtdbtk_phylum": "", + "num_5s": 0, + "num_23s": 0, + "gtdbtk_genus": "", + "num_t_rna": 16 + }, + { + "number_of_contig": 273, + "completeness": 68.97, + "bin_name": "bins.4", + "gene_count": 2023, + "bin_quality": "MQ", + "gtdbtk_species": "UBA5335 sp002862435", + "gtdbtk_order": "UBA5335", + "num_16s": 0, + "gtdbtk_family": "UBA5335", + "gtdbtk_domain": "Bacteria", + "contamination": 1.72, + "gtdbtk_class": "Gammaproteobacteria", + "gtdbtk_phylum": "Proteobacteria", + "num_5s": 0, + "num_23s": 0, + "gtdbtk_genus": "UBA5335", + "num_t_rna": 33 + }, + { + "number_of_contig": 3, + "completeness": 8.33, + "bin_name": "bins.5", + "gene_count": 306, + "bin_quality": "LQ", + "gtdbtk_species": "", + "gtdbtk_order": "", + "num_16s": 0, + "gtdbtk_family": "", + "gtdbtk_domain": "", + "contamination": 16.67, + "gtdbtk_class": "", + "gtdbtk_phylum": "", + "num_5s": 0, + "num_23s": 0, + "gtdbtk_genus": "", + "num_t_rna": 15 + }, + { + "number_of_contig": 4, + "completeness": 77.01, + "bin_name": "bins.6", + "gene_count": 976, + "bin_quality": "MQ", + "gtdbtk_species": "", + "gtdbtk_order": "UBA9983_A", + "num_16s": 1, + "gtdbtk_family": "UBA2163", + "gtdbtk_domain": "Bacteria", + "contamination": 1.72, + "gtdbtk_class": "Paceibacteria", + "gtdbtk_phylum": "Patescibacteria", + "num_5s": 1, + "num_23s": 1, + "gtdbtk_genus": "1-14-0-10-47-16", + "num_t_rna": 34 + }, + { + "number_of_contig": 309, + "completeness": 74.97, + "bin_name": "bins.7", + "gene_count": 2072, + "bin_quality": "MQ", + "gtdbtk_species": "", + "gtdbtk_order": "Pseudomonadales", + "num_16s": 0, + "gtdbtk_family": "UBA3067", + "gtdbtk_domain": "Bacteria", + "contamination": 2.13, + "gtdbtk_class": "Gammaproteobacteria", + "gtdbtk_phylum": "Proteobacteria", + "num_5s": 1, + "num_23s": 1, + "gtdbtk_genus": "UBA3067", + "num_t_rna": 28 + }, + { + "number_of_contig": 182, + "completeness": 92.61, + "bin_name": "bins.8", + "gene_count": 3044, + "bin_quality": "MQ", + "gtdbtk_species": "", + "gtdbtk_order": "UBA11222", + "num_16s": 0, + "gtdbtk_family": "UBA11222", + "gtdbtk_domain": "Bacteria", + "contamination": 5.0, + "gtdbtk_class": "Alphaproteobacteria", + "gtdbtk_phylum": "Proteobacteria", + "num_5s": 1, + "num_23s": 1, + "gtdbtk_genus": "UBA11222", + "num_t_rna": 41 + }, + { + "number_of_contig": 511, + "completeness": 48.64, + "bin_name": "bins.9", + "gene_count": 2267, + "bin_quality": "LQ", + "gtdbtk_species": "", + "gtdbtk_order": "", + "num_16s": 0, + "gtdbtk_family": "", + "gtdbtk_domain": "", + "contamination": 5.03, + "gtdbtk_class": "", + "gtdbtk_phylum": "", + "num_5s": 0, + "num_23s": 0, + "gtdbtk_genus": "", + "num_t_rna": 15 + } + ], + "unbinned_contig_num": 25520, + "started_at_time": "2021-10-11T02:23:30Z", + "type": "nmdc:MAGsAnalysisActivity", + "ended_at_time": "2021-10-11T06:30:42+00:00" + } + ], + "metagenome_annotation_activity_set": [ + { + "_id": { + "$oid": "649b005bbf2caae0415ef9c9" + }, + "has_input": [ + "nmdc:e4314c3743795e0be8beda8b7f806557" + ], + "part_of": [ + "nmdc:mga0gg1q48" + ], + "git_url": "https://github.com/microbiomedata/metaG/releases/tag/0.1", + "has_output": [ + "nmdc:ac3faa8ad0e8e7827fcf6b882ec90706", + "nmdc:e3712dbbf0d0bfa14b9b340e73ebf4d0", + "nmdc:8aed63ca1302c874040e74aceb54ff05", + "nmdc:6361a06de62d93909abfb565a47fd5f0", + "nmdc:bd9d330d1d6a925066003d653a171ca5", + "nmdc:c497ffc128d6738bf3868529eb7ff899", + "nmdc:b67886515193abbd1eec79de067b3196", + "nmdc:05e7a016dddba90801c29de448c43c3c", + "nmdc:7effd4db11316ff95f6a8303807d530f", + "nmdc:503770f008dd2cf04d73821412dcf23a", + "nmdc:c33049c64af55f8ac54d52c861b0a221", + "nmdc:b162efd63f79bc34de66f61348471b74" + ], + "was_informed_by": "gold:Gp0115679", + "id": "nmdc:8bc23e2bd69c8af5538db3d6192a3b5b", + "execution_resource": "NERSC-Cori", + "name": "Annotation Activity for nmdc:mga0gg1q48", + "started_at_time": "2021-10-11T02:23:30Z", + "type": "nmdc:MetagenomeAnnotationActivity", + "ended_at_time": "2021-10-11T06:30:42+00:00" + } + ], + "metagenome_assembly_set": [ + { + "_id": { + "$oid": "649b005f2ca5ee4adb139fbd" + }, + "has_input": [ + "nmdc:7e294ff66cb7ddf84edf9c8bed576bcd" + ], + "part_of": [ + "nmdc:mga0gg1q48" + ], + "ctg_logsum": 682158, + "scaf_logsum": 725191, + "gap_pct": 0.02692, + "git_url": "https://github.com/microbiomedata/metaG/releases/tag/0.1", + "has_output": [ + "nmdc:e4314c3743795e0be8beda8b7f806557", + "nmdc:2a288a5827b66c88f8abf202bbe37aab", + "nmdc:a51c7b3a70601a885594936fd6c753bc", + "nmdc:8851d6fed8e5bbee88aeb7af77bbcfe3", + "nmdc:002ed5f389b8a13735d27a8741290f6b" + ], + "asm_score": 12.582, + "was_informed_by": "gold:Gp0115679", + "ctg_powsum": 84136, + "scaf_max": 884972, + "id": "nmdc:8bc23e2bd69c8af5538db3d6192a3b5b", + "scaf_powsum": 89882, + "execution_resource": "NERSC-Cori", + "contigs": 531791, + "name": "Assembly Activity for nmdc:mga0gg1q48", + "ctg_max": 719201, + "gc_std": 0.09689, + "contig_bp": 254202396, + "gc_avg": 0.48697, + "started_at_time": "2021-10-11T02:23:30Z", + "scaf_bp": 254270837, + "type": "nmdc:MetagenomeAssembly", + "scaffolds": 525116, + "ended_at_time": "2021-10-11T06:30:42+00:00", + "ctg_l50": 449, + "ctg_l90": 285, + "ctg_n50": 139317, + "ctg_n90": 451813, + "scaf_l50": 455, + "scaf_l90": 285, + "scaf_n50": 133535, + "scaf_n90": 445430, + "scaf_l_gt50k": 3540548, + "scaf_n_gt50k": 34, + "scaf_pct_gt50k": 1.3924317 + } + ], + "omics_processing_set": [ + { + "_id": { + "$oid": "649b009773e8249959349b36" + }, + "id": "nmdc:omprc-11-hymrq852", + "name": "Sand microcosm microbial communities from a hyporheic zone in Columbia River, Washington, USA - GW-RW T4_4-Nov-14", + "description": "Sterilized sand packs were incubated back in the ground and collected at time point T4.", + "has_input": [ + "nmdc:bsm-11-11219w54" + ], + "has_output": [ + "jgi:55a9cb010d87852b21508920" + ], + "part_of": [ + "nmdc:sty-11-aygzgv51" + ], + "add_date": "2015-05-28", + "mod_date": "2021-06-15", + "ncbi_project_name": "Sand microcosm microbial communities from a hyporheic zone in Columbia River, Washington, USA - GW-RW T4_4-Nov-14", + "omics_type": { + "has_raw_value": "Metagenome" + }, + "principal_investigator": { + "has_raw_value": "James Stegen" + }, + "processing_institution": "JGI", + "type": "nmdc:OmicsProcessing", + "gold_sequencing_project_identifiers": [ + "gold:Gp0115679" + ] + } + ], + "read_qc_analysis_activity_set": [ + { + "_id": { + "$oid": "649b009d6bdd4fd20273c884" + }, + "has_input": [ + "nmdc:3bf389b767cf8a49224dc0028e55eeb7" + ], + "part_of": [ + "nmdc:mga0gg1q48" + ], + "git_url": "https://github.com/microbiomedata/metaG/releases/tag/0.1", + "has_output": [ + "nmdc:7e294ff66cb7ddf84edf9c8bed576bcd", + "nmdc:08e2a96f7aaaff5ff6f747cfe6f49e49" + ], + "was_informed_by": "gold:Gp0115679", + "input_read_count": 67696542, + "output_read_bases": 9825387057, + "id": "nmdc:8bc23e2bd69c8af5538db3d6192a3b5b", + "execution_resource": "NERSC-Cori", + "input_read_bases": 10222177842, + "name": "Read QC Activity for nmdc:mga0gg1q48", + "output_read_count": 67147510, + "started_at_time": "2021-10-11T02:23:30Z", + "type": "nmdc:ReadQCAnalysisActivity", + "ended_at_time": "2021-10-11T06:30:42+00:00" + } + ], + "read_based_taxonomy_analysis_activity_set": [ + { + "_id": { + "$oid": "649b009bff710ae353f8cf4b" + }, + "has_input": [ + "nmdc:7e294ff66cb7ddf84edf9c8bed576bcd" + ], + "git_url": "https://github.com/microbiomedata/metaG/releases/tag/0.1", + "has_output": [ + "nmdc:e20f8c00473472fa073adde871860801", + "nmdc:52f8c91d04e8d179af98e7fac35a8ff1", + "nmdc:f721d9dd168b0dea080b191a4396167e", + "nmdc:ab77e396ec643b58b54da92848b88a96", + "nmdc:f2514844e47a9e3d268671f80f152bc1", + "nmdc:a3e49f39f33c54bc8d9430a947cd4b16", + "nmdc:17bc87145b0dcabbb8e3de0f393f4d4d", + "nmdc:aecb320fdfe4c4da35c0206dd34e0f40", + "nmdc:77860ee043ae9738e7702a3f665b15fa" + ], + "was_informed_by": "gold:Gp0115679", + "id": "nmdc:8bc23e2bd69c8af5538db3d6192a3b5b", + "execution_resource": "NERSC-Cori", + "name": "ReadBased Analysis Activity for nmdc:mga0gg1q48", + "started_at_time": "2021-10-11T02:23:30Z", + "type": "nmdc:ReadbasedAnalysis", + "ended_at_time": "2021-10-11T06:30:42+00:00" + } + ] + }, + { + "data_object_set": [ + { + "description": "Raw sequencer read data", + "file_size_bytes": 1840708400, + "type": "nmdc:DataObject", + "id": "jgi:55d818010d8785342fcf8278", + "name": "9387.2.132031.GTCCGC.fastq.gz" + }, + { + "name": "Gp0115667_Filtered Reads", + "description": "Filtered Reads for Gp0115667", + "data_object_type": "Filtered Sequencing Reads", + "url": "https://data.microbiomedata.org/data/nmdc:mga0n0je44/qa/nmdc_mga0n0je44_filtered.fastq.gz", + "md5_checksum": "7d4057e3a44a05171c13fb0ed3e2294a", + "id": "nmdc:7d4057e3a44a05171c13fb0ed3e2294a", + "file_size_bytes": 1599931347 + }, + { + "name": "Gp0115667_Filtered Stats", + "description": "Filtered Stats for Gp0115667", + "data_object_type": "QC Statistics", + "url": "https://data.microbiomedata.org/data/nmdc:mga0n0je44/qa/nmdc_mga0n0je44_filterStats.txt", + "md5_checksum": "dae7c6e067f69ef6db39b4240cc450ba", + "id": "nmdc:dae7c6e067f69ef6db39b4240cc450ba", + "file_size_bytes": 286 + }, + { + "name": "Gp0115667_Gottcha2 TSV report", + "description": "Gottcha2 TSV report for Gp0115667", + "url": "https://data.microbiomedata.org/data/nmdc:mga0n0je44/ReadbasedAnalysis/nmdc_mga0n0je44_gottcha2_report.tsv", + "md5_checksum": "56edf81e5f5102edf7e416bc9430fbb6", + "id": "nmdc:56edf81e5f5102edf7e416bc9430fbb6", + "file_size_bytes": 10576 + }, + { + "name": "Gp0115667_Gottcha2 full TSV report", + "description": "Gottcha2 full TSV report for Gp0115667", + "url": "https://data.microbiomedata.org/data/nmdc:mga0n0je44/ReadbasedAnalysis/nmdc_mga0n0je44_gottcha2_report_full.tsv", + "md5_checksum": "c3d0f03afb44520ef5f2ea14e6daf705", + "id": "nmdc:c3d0f03afb44520ef5f2ea14e6daf705", + "file_size_bytes": 792905 + }, + { + "name": "Gp0115667_Gottcha2 Krona HTML report", + "description": "Gottcha2 Krona HTML report for Gp0115667", + "data_object_type": "GOTTCHA2 Krona Plot", + "url": "https://data.microbiomedata.org/data/nmdc:mga0n0je44/ReadbasedAnalysis/nmdc_mga0n0je44_gottcha2_krona.html", + "md5_checksum": "2afff209a40ca4895307f3a47080c534", + "id": "nmdc:2afff209a40ca4895307f3a47080c534", + "file_size_bytes": 254763 + }, + { + "name": "Gp0115667_Centrifuge classification TSV report", + "description": "Centrifuge classification TSV report for Gp0115667", + "data_object_type": "Centrifuge Taxonomic Classification", + "url": "https://data.microbiomedata.org/data/nmdc:mga0n0je44/ReadbasedAnalysis/nmdc_mga0n0je44_centrifuge_classification.tsv", + "md5_checksum": "d76c80bf15c4fd84f28c7150f24a8143", + "id": "nmdc:d76c80bf15c4fd84f28c7150f24a8143", + "file_size_bytes": 1336111813 + }, + { + "name": "Gp0115667_Centrifuge TSV report", + "description": "Centrifuge TSV report for Gp0115667", + "data_object_type": "Centrifuge Classification Report", + "url": "https://data.microbiomedata.org/data/nmdc:mga0n0je44/ReadbasedAnalysis/nmdc_mga0n0je44_centrifuge_report.tsv", + "md5_checksum": "b9d6d8a8297f9a604ac85a334a3412de", + "id": "nmdc:b9d6d8a8297f9a604ac85a334a3412de", + "file_size_bytes": 254506 + }, + { + "name": "Gp0115667_Centrifuge Krona HTML report", + "description": "Centrifuge Krona HTML report for Gp0115667", + "data_object_type": "Centrifuge Krona Plot", + "url": "https://data.microbiomedata.org/data/nmdc:mga0n0je44/ReadbasedAnalysis/nmdc_mga0n0je44_centrifuge_krona.html", + "md5_checksum": "fe4bd9f63c32f50676792e3c4adced08", + "id": "nmdc:fe4bd9f63c32f50676792e3c4adced08", + "file_size_bytes": 2323153 + }, + { + "name": "Gp0115667_Kraken classification TSV report", + "description": "Kraken classification TSV report for Gp0115667", + "data_object_type": "Kraken2 Taxonomic Classification", + "url": "https://data.microbiomedata.org/data/nmdc:mga0n0je44/ReadbasedAnalysis/nmdc_mga0n0je44_kraken2_classification.tsv", + "md5_checksum": "eb189cbf0543203d2521397b73d4d34b", + "id": "nmdc:eb189cbf0543203d2521397b73d4d34b", + "file_size_bytes": 1097852664 + }, + { + "name": "Gp0115667_Kraken2 TSV report", + "description": "Kraken2 TSV report for Gp0115667", + "data_object_type": "Kraken2 Classification Report", + "url": "https://data.microbiomedata.org/data/nmdc:mga0n0je44/ReadbasedAnalysis/nmdc_mga0n0je44_kraken2_report.tsv", + "md5_checksum": "ce3f002a824efde4a7134e6cd2e6306b", + "id": "nmdc:ce3f002a824efde4a7134e6cd2e6306b", + "file_size_bytes": 639213 + }, + { + "name": "Gp0115667_Kraken2 Krona HTML report", + "description": "Kraken2 Krona HTML report for Gp0115667", + "data_object_type": "Kraken2 Krona Plot", + "url": "https://data.microbiomedata.org/data/nmdc:mga0n0je44/ReadbasedAnalysis/nmdc_mga0n0je44_kraken2_krona.html", + "md5_checksum": "ac90bf3384ce44d097f7897ac5ff8134", + "id": "nmdc:ac90bf3384ce44d097f7897ac5ff8134", + "file_size_bytes": 3979807 + }, + { + "name": "Gp0115667_Assembled contigs fasta", + "description": "Assembled contigs fasta for Gp0115667", + "data_object_type": "Assembly Contigs", + "url": "https://data.microbiomedata.org/data/nmdc:mga0n0je44/assembly/nmdc_mga0n0je44_contigs.fna", + "md5_checksum": "b3cefc5a9599a4fb9432132baf7f5565", + "id": "nmdc:b3cefc5a9599a4fb9432132baf7f5565", + "file_size_bytes": 62926054 + }, + { + "name": "Gp0115667_Assembled scaffold fasta", + "description": "Assembled scaffold fasta for Gp0115667", + "data_object_type": "Assembly Scaffolds", + "url": "https://data.microbiomedata.org/data/nmdc:mga0n0je44/assembly/nmdc_mga0n0je44_scaffolds.fna", + "md5_checksum": "b60f674a01e3f7fff5ead95f330cef4f", + "id": "nmdc:b60f674a01e3f7fff5ead95f330cef4f", + "file_size_bytes": 62577490 + }, + { + "name": "Gp0115667_Metagenome Contig Coverage Stats", + "description": "Metagenome Contig Coverage Stats for Gp0115667", + "url": "https://data.microbiomedata.org/data/nmdc:mga0n0je44/assembly/nmdc_mga0n0je44_covstats.txt", + "md5_checksum": "2e4532cb03bb1e9201976b9d65893788", + "id": "nmdc:2e4532cb03bb1e9201976b9d65893788", + "file_size_bytes": 9189143 + }, + { + "name": "Gp0115667_Assembled AGP file", + "description": "Assembled AGP file for Gp0115667", + "data_object_type": "Assembly AGP", + "url": "https://data.microbiomedata.org/data/nmdc:mga0n0je44/assembly/nmdc_mga0n0je44_assembly.agp", + "md5_checksum": "e49f8a26a9cd0420b688c967bbacb4c6", + "id": "nmdc:e49f8a26a9cd0420b688c967bbacb4c6", + "file_size_bytes": 8508903 + }, + { + "name": "Gp0115667_Metagenome Alignment BAM file", + "description": "Metagenome Alignment BAM file for Gp0115667", + "data_object_type": "Assembly Coverage BAM", + "url": "https://data.microbiomedata.org/data/nmdc:mga0n0je44/assembly/nmdc_mga0n0je44_pairedMapped_sorted.bam", + "md5_checksum": "d9b957c7efe7f753fe67441d0be605c6", + "id": "nmdc:d9b957c7efe7f753fe67441d0be605c6", + "file_size_bytes": 1771039554 + }, + { + "name": "Gp0115667_Protein FAA", + "description": "Protein FAA for Gp0115667", + "data_object_type": "Annotation Amino Acid FASTA", + "url": "https://data.microbiomedata.org/data/nmdc:mga0n0je44/annotation/nmdc_mga0n0je44_proteins.faa", + "md5_checksum": "45e8b887fc06ddbf2af3ecf9c91a7bf7", + "id": "nmdc:45e8b887fc06ddbf2af3ecf9c91a7bf7", + "file_size_bytes": 31564336 + }, + { + "name": "Gp0115667_Structural annotation GFF file", + "description": "Structural annotation GFF file for Gp0115667", + "data_object_type": "Structural Annotation GFF", + "url": "https://data.microbiomedata.org/data/nmdc:mga0n0je44/annotation/nmdc_mga0n0je44_structural_annotation.gff", + "md5_checksum": "26ab4381753f685c44091e1f17d8bab5", + "id": "nmdc:26ab4381753f685c44091e1f17d8bab5", + "file_size_bytes": 2760 + }, + { + "name": "Gp0115667_Functional annotation GFF file", + "description": "Functional annotation GFF file for Gp0115667", + "data_object_type": "Functional Annotation GFF", + "url": "https://data.microbiomedata.org/data/nmdc:mga0n0je44/annotation/nmdc_mga0n0je44_functional_annotation.gff", + "md5_checksum": "5a378f3975ab6c2cf2a36b0b007ea3f8", + "id": "nmdc:5a378f3975ab6c2cf2a36b0b007ea3f8", + "file_size_bytes": 34525554 + }, + { + "name": "Gp0115667_KO TSV file", + "description": "KO TSV file for Gp0115667", + "data_object_type": "Annotation KEGG Orthology", + "url": "https://data.microbiomedata.org/data/nmdc:mga0n0je44/annotation/nmdc_mga0n0je44_ko.tsv", + "md5_checksum": "6df49253fee066c699d6a5191a0efaed", + "id": "nmdc:6df49253fee066c699d6a5191a0efaed", + "file_size_bytes": 3439857 + }, + { + "name": "Gp0115667_EC TSV file", + "description": "EC TSV file for Gp0115667", + "data_object_type": "Annotation Enzyme Commission", + "url": "https://data.microbiomedata.org/data/nmdc:mga0n0je44/annotation/nmdc_mga0n0je44_ec.tsv", + "md5_checksum": "5e35e51a595f892968e57681ee448e5f", + "id": "nmdc:5e35e51a595f892968e57681ee448e5f", + "file_size_bytes": 2203532 + }, + { + "name": "Gp0115667_COG GFF file", + "description": "COG GFF file for Gp0115667", + "url": "https://data.microbiomedata.org/data/nmdc:mga0n0je44/annotation/nmdc_mga0n0je44_cog.gff", + "md5_checksum": "ae1bc890152d28387f65c65d434b97ea", + "id": "nmdc:ae1bc890152d28387f65c65d434b97ea", + "file_size_bytes": 15384958 + }, + { + "name": "Gp0115667_PFAM GFF file", + "description": "PFAM GFF file for Gp0115667", + "url": "https://data.microbiomedata.org/data/nmdc:mga0n0je44/annotation/nmdc_mga0n0je44_pfam.gff", + "md5_checksum": "fb736eaba77cbd99135ddbc32168db94", + "id": "nmdc:fb736eaba77cbd99135ddbc32168db94", + "file_size_bytes": 12472999 + }, + { + "name": "Gp0115667_TigrFam GFF file", + "description": "TigrFam GFF file for Gp0115667", + "url": "https://data.microbiomedata.org/data/nmdc:mga0n0je44/annotation/nmdc_mga0n0je44_tigrfam.gff", + "md5_checksum": "3b00892f95bc4dedaf4384685a75d52f", + "id": "nmdc:3b00892f95bc4dedaf4384685a75d52f", + "file_size_bytes": 1755779 + }, + { + "name": "Gp0115667_SMART GFF file", + "description": "SMART GFF file for Gp0115667", + "url": "https://data.microbiomedata.org/data/nmdc:mga0n0je44/annotation/nmdc_mga0n0je44_smart.gff", + "md5_checksum": "b8c0d7c187169f34aafc17308aeea2ed", + "id": "nmdc:b8c0d7c187169f34aafc17308aeea2ed", + "file_size_bytes": 3937293 + }, + { + "name": "Gp0115667_SuperFam GFF file", + "description": "SuperFam GFF file for Gp0115667", + "url": "https://data.microbiomedata.org/data/nmdc:mga0n0je44/annotation/nmdc_mga0n0je44_supfam.gff", + "md5_checksum": "2a8e4bb3922ec664bbb5ce49a30cc87e", + "id": "nmdc:2a8e4bb3922ec664bbb5ce49a30cc87e", + "file_size_bytes": 22725250 + }, + { + "name": "Gp0115667_Cath FunFam GFF file", + "description": "Cath FunFam GFF file for Gp0115667", + "url": "https://data.microbiomedata.org/data/nmdc:mga0n0je44/annotation/nmdc_mga0n0je44_cath_funfam.gff", + "md5_checksum": "34eddc2289f3e3b4707a6c8060f6dd99", + "id": "nmdc:34eddc2289f3e3b4707a6c8060f6dd99", + "file_size_bytes": 17788890 + }, + { + "name": "Gp0115667_KO_EC GFF file", + "description": "KO_EC GFF file for Gp0115667", + "url": "https://data.microbiomedata.org/data/nmdc:mga0n0je44/annotation/nmdc_mga0n0je44_ko_ec.gff", + "md5_checksum": "0a51a22e2cf94c853657381549aa8f04", + "id": "nmdc:0a51a22e2cf94c853657381549aa8f04", + "file_size_bytes": 11004264 + }, + { + "name": "gold:Gp0452679_metabat2 bin checkm quality assessment result", + "description": "metabat2 bin checkm quality assessment result for gold:Gp0452679", + "data_object_type": "CheckM Statistics", + "url": "https://data.microbiomedata.org/data/nmdc:mga0fsjy84/MAGs/nmdc_mga0fsjy84_checkm_qa.out", + "md5_checksum": "d41d8cd98f00b204e9800998ecf8427e", + "id": "nmdc:d41d8cd98f00b204e9800998ecf8427e", + "file_size_bytes": 0 + }, + { + "name": "Gp0115667_tooShort (< 3kb) filtered contigs fasta file by metaBat2", + "description": "tooShort (< 3kb) filtered contigs fasta file by metaBat2 for Gp0115667", + "url": "https://data.microbiomedata.org/data/nmdc:mga0n0je44/MAGs/nmdc_mga0n0je44_bins.tooShort.fa", + "md5_checksum": "1277a6924ab380e001a7208e7ebbb0e3", + "id": "nmdc:1277a6924ab380e001a7208e7ebbb0e3", + "file_size_bytes": 46335107 + }, + { + "name": "Gp0115667_unbinned fasta file from metabat2", + "description": "unbinned fasta file from metabat2 for Gp0115667", + "url": "https://data.microbiomedata.org/data/nmdc:mga0n0je44/MAGs/nmdc_mga0n0je44_bins.unbinned.fa", + "md5_checksum": "48772112891988a2ef3f0c40786c11fd", + "id": "nmdc:48772112891988a2ef3f0c40786c11fd", + "file_size_bytes": 10701981 + }, + { + "name": "Gp0115667_metabat2 bin checkm quality assessment result", + "description": "metabat2 bin checkm quality assessment result for Gp0115667", + "data_object_type": "CheckM Statistics", + "url": "https://data.microbiomedata.org/data/nmdc:mga0n0je44/MAGs/nmdc_mga0n0je44_checkm_qa.out", + "md5_checksum": "527e2c19607c225a707db67b5be01b6f", + "id": "nmdc:527e2c19607c225a707db67b5be01b6f", + "file_size_bytes": 1360 + }, + { + "name": "Gp0115667_high-quality and medium-quality bins", + "description": "high-quality and medium-quality bins for Gp0115667", + "data_object_type": "Metagenome Bins", + "url": "https://data.microbiomedata.org/data/nmdc:mga0n0je44/MAGs/nmdc_mga0n0je44_hqmq_bin.zip", + "md5_checksum": "027626ff998bf1e495e32d09cab4bb08", + "id": "nmdc:027626ff998bf1e495e32d09cab4bb08", + "file_size_bytes": 1462611 + }, + { + "name": "Gp0115667_metabat2 bins", + "description": "metabat2 bins for Gp0115667", + "url": "https://data.microbiomedata.org/data/nmdc:mga0n0je44/MAGs/nmdc_mga0n0je44_metabat_bin.zip", + "md5_checksum": "733e798989606c802b3bbfc952a38841", + "id": "nmdc:733e798989606c802b3bbfc952a38841", + "file_size_bytes": 334014 + }, + { + "_id": { + "$oid": "649b003c1ae706d7b5b14d71" + }, + "description": "Metagenome Contig Coverage Stats for gold:Gp0115667", + "url": "https://data.microbiomedata.org/data/1781_86098/assembly/mapping_stats.txt", + "file_size_bytes": 8608508, + "type": "nmdc:DataObject", + "id": "nmdc:65454371fa1809684cc19c5c1cb49c4c", + "name": "mapping_stats.txt", + "data_object_type": "Assembly Coverage Stats" + }, + { + "_id": { + "$oid": "649b003c1ae706d7b5b14d72" + }, + "description": "Assembled contigs fasta for gold:Gp0115667", + "url": "https://data.microbiomedata.org/data/1781_86098/assembly/assembly_contigs.fna", + "file_size_bytes": 62345419, + "type": "nmdc:DataObject", + "id": "nmdc:28a8512eff8b81cebce0614fe5ed18a0", + "name": "assembly_contigs.fna", + "data_object_type": "Assembly Contigs" + }, + { + "_id": { + "$oid": "649b003c1ae706d7b5b14d74" + }, + "description": "Assembled AGP file for gold:Gp0115667", + "url": "https://data.microbiomedata.org/data/1781_86098/assembly/assembly.agp", + "file_size_bytes": 7346593, + "type": "nmdc:DataObject", + "id": "nmdc:7ce2c4d4d2cbf019fd43453b6fb54fac", + "name": "assembly.agp", + "data_object_type": "Assembly AGP" + }, + { + "_id": { + "$oid": "649b003c1ae706d7b5b14d75" + }, + "description": "Assembled scaffold fasta for gold:Gp0115667", + "url": "https://data.microbiomedata.org/data/1781_86098/assembly/assembly_scaffolds.fna", + "file_size_bytes": 61997325, + "type": "nmdc:DataObject", + "id": "nmdc:5c6200f0a56a1ec503ac0245b1d2cbdf", + "name": "assembly_scaffolds.fna", + "data_object_type": "Assembly Scaffolds" + }, + { + "_id": { + "$oid": "649b003c1ae706d7b5b14d76" + }, + "description": "Metagenome Alignment BAM file for gold:Gp0115667", + "url": "https://data.microbiomedata.org/data/1781_86098/assembly/pairedMapped_sorted.bam", + "file_size_bytes": 1747730642, + "type": "nmdc:DataObject", + "id": "nmdc:d07858a6b7932797c1e4e8b019f82131", + "name": "pairedMapped_sorted.bam", + "data_object_type": "Assembly Coverage BAM" + }, + { + "_id": { + "$oid": "649b003d1ae706d7b5b15992" + }, + "id": "nmdc:257cca2e47a0917e48596800a3f9f161", + "name": "1781_86098.krona.html", + "description": "Gold:Gp0115667 KRONA plot HTML file", + "url": "https://data.microbiomedata.org/1781_86098/ReadbasedAnalysis/centrifuge/1781_86098.krona.html", + "file_size_bytes": 3385, + "data_object_type": "Centrifuge Krona Plot", + "type": "nmdc:DataObject" + }, + { + "_id": { + "$oid": "649b003d1ae706d7b5b15999" + }, + "id": "nmdc:734cf235a0ede4b50b75488ee5fe893a", + "name": "1781_86098.json", + "description": "Gold:Gp0115667 ReadbasedAnalysis result JSON file", + "url": "https://data.microbiomedata.org/1781_86098/ReadbasedAnalysis/1781_86098.json", + "file_size_bytes": 3385, + "type": "nmdc:DataObject" + }, + { + "_id": { + "$oid": "649b003f1ae706d7b5b16270" + }, + "id": "nmdc:1e34d5f7bf6a095e74dc5b0ba743c6c4", + "name": "bins.unbinned.fa", + "description": "unbinned fasta file from metabat2 for gold:Gp0115667", + "file_size_bytes": 11386423, + "url": "https://data.microbiomedata.org/data/1781_86098/img_MAGs/bins.unbinned.fa", + "type": "nmdc:DataObject" + }, + { + "_id": { + "$oid": "649b003f1ae706d7b5b16274" + }, + "id": "nmdc:12c891d4c33195700fbf605402639c77", + "name": "gold:Gp0115667.bins.3.fa", + "description": "metabat2 binned contig file for gold:Gp0115667", + "file_size_bytes": 211939, + "url": "https://data.microbiomedata.org/data/1781_86098/img_MAGs/metabat-bins/bins.3.fa", + "type": "nmdc:DataObject", + "data_object_type": "Metagenome Bins" + }, + { + "_id": { + "$oid": "649b003f1ae706d7b5b16278" + }, + "id": "nmdc:6ac802499984a5da4fc7aa2cd17af998", + "name": "checkm_qa.out", + "description": "metabat2 bin checkm quality assessment result for gold:Gp0115667", + "file_size_bytes": 2040, + "url": "https://data.microbiomedata.org/data/1781_86098/img_MAGs/checkm_qa.out", + "type": "nmdc:DataObject", + "data_object_type": "CheckM Statistics" + }, + { + "_id": { + "$oid": "649b003f1ae706d7b5b1628a" + }, + "id": "nmdc:3fc44ed589c47ca3a915e818dc9ef957", + "name": "gold:Gp0115667.bins.4.fa", + "description": "metabat2 binned contig file for gold:Gp0115667", + "file_size_bytes": 229517, + "url": "https://data.microbiomedata.org/data/1781_86098/img_MAGs/metabat-bins/bins.4.fa", + "type": "nmdc:DataObject", + "data_object_type": "Metagenome Bins" + }, + { + "_id": { + "$oid": "649b003f1ae706d7b5b1628c" + }, + "id": "nmdc:ecf40d29af87b508f4128c7520dbddff", + "name": "gold:Gp0115667.bins.2.fa", + "description": "metabat2 binned contig file for gold:Gp0115667", + "file_size_bytes": 288428, + "url": "https://data.microbiomedata.org/data/1781_86098/img_MAGs/metabat-bins/bins.2.fa", + "type": "nmdc:DataObject", + "data_object_type": "Metagenome Bins" + }, + { + "_id": { + "$oid": "649b003f1ae706d7b5b1628f" + }, + "id": "nmdc:3d69f4d7f7bdcabac2d974bf0436cba0", + "name": "gold:Gp0115667.bins.7.fa", + "description": "metabat2 binned contig file for gold:Gp0115667", + "file_size_bytes": 332716, + "url": "https://data.microbiomedata.org/data/1781_86098/img_MAGs/metabat-bins/bins.7.fa", + "type": "nmdc:DataObject", + "data_object_type": "Metagenome Bins" + }, + { + "_id": { + "$oid": "649b003f1ae706d7b5b16293" + }, + "id": "nmdc:5977ef8709f03d6b5dd25112cf45dd6a", + "name": "gold:Gp0115667.bins.5.fa", + "description": "metabat2 binned contig file for gold:Gp0115667", + "file_size_bytes": 770132, + "url": "https://data.microbiomedata.org/data/1781_86098/img_MAGs/metabat-bins/bins.5.fa", + "type": "nmdc:DataObject", + "data_object_type": "Metagenome Bins" + }, + { + "_id": { + "$oid": "649b003f1ae706d7b5b16294" + }, + "id": "nmdc:411197955ce463b3374262983e6e6c12", + "name": "gold:Gp0115667.bins.1.fa", + "description": "metabat2 binned contig file for gold:Gp0115667", + "file_size_bytes": 1544238, + "url": "https://data.microbiomedata.org/data/1781_86098/img_MAGs/metabat-bins/bins.1.fa", + "type": "nmdc:DataObject", + "data_object_type": "Metagenome Bins" + }, + { + "_id": { + "$oid": "649b003f1ae706d7b5b16299" + }, + "id": "nmdc:e4e449bf8b38e28b1c585494dd53f83a", + "name": "bins.tooShort.fa", + "description": "tooShort (< 3kb) filtered contigs fasta file by metaBat2 for gold:Gp0115667", + "file_size_bytes": 45010132, + "url": "https://data.microbiomedata.org/data/1781_86098/img_MAGs/bins.tooShort.fa", + "type": "nmdc:DataObject" + }, + { + "_id": { + "$oid": "649b003f1ae706d7b5b1629a" + }, + "id": "nmdc:9e21ea6dec1b46e65841a271b2bbe8fe", + "name": "gtdbtk.bac120.summary.tsv", + "description": "gtdbtk bacterial assignment result summary table for gold:Gp0115667", + "file_size_bytes": 846, + "url": "https://data.microbiomedata.org/data/1781_86098/img_MAGs/gtdbtk_output/classify/gtdbtk.bac120.summary.tsv", + "type": "nmdc:DataObject" + }, + { + "_id": { + "$oid": "649b003f1ae706d7b5b162a0" + }, + "id": "nmdc:a2fecaa8e738191ae8a4934f235ad934", + "name": "gold:Gp0115667.bins.6.fa", + "description": "metabat2 binned contig file for gold:Gp0115667", + "file_size_bytes": 305691, + "url": "https://data.microbiomedata.org/data/1781_86098/img_MAGs/metabat-bins/bins.6.fa", + "type": "nmdc:DataObject", + "data_object_type": "Metagenome Bins" + }, + { + "_id": { + "$oid": "649b003f1ae706d7b5b162a1" + }, + "id": "nmdc:843c9624d7bb8bdbcfe26fdde4117f0d", + "name": "gold:Gp0115667.bins.8.fa", + "description": "metabat2 binned contig file for gold:Gp0115667", + "file_size_bytes": 1448474, + "url": "https://data.microbiomedata.org/data/1781_86098/img_MAGs/metabat-bins/bins.8.fa", + "type": "nmdc:DataObject", + "data_object_type": "Metagenome Bins" + }, + { + "_id": { + "$oid": "649b00401ae706d7b5b16d85" + }, + "description": "EC TSV File for gold:Gp0115667", + "url": "https://data.microbiomedata.org/1781_86098/img_annotation/Ga0482260_ec.tsv", + "md5_checksum": "babc9f95621eed35bc7975dee8b417b9", + "file_size_bytes": 3385, + "id": "nmdc:babc9f95621eed35bc7975dee8b417b9", + "name": "gold:Gp0115667_EC TSV File", + "data_object_type": "Annotation Enzyme Commission", + "type": "nmdc:DataObject" + }, + { + "_id": { + "$oid": "649b00401ae706d7b5b16d86" + }, + "description": "KO TSV File for gold:Gp0115667", + "url": "https://data.microbiomedata.org/1781_86098/img_annotation/Ga0482260_ko.tsv", + "md5_checksum": "bc5043b689463c3651c15ad4ba1aa9a4", + "file_size_bytes": 3385, + "id": "nmdc:bc5043b689463c3651c15ad4ba1aa9a4", + "name": "gold:Gp0115667_KO TSV File", + "data_object_type": "Annotation KEGG Orthology", + "type": "nmdc:DataObject" + }, + { + "_id": { + "$oid": "649b00401ae706d7b5b16d88" + }, + "description": "Functional annotation GFF file for gold:Gp0115667", + "url": "https://data.microbiomedata.org/1781_86098/img_annotation/Ga0482260_functional_annotation.gff", + "md5_checksum": "c47020ef7958f3a4c4458e0797fc2400", + "file_size_bytes": 3385, + "id": "nmdc:c47020ef7958f3a4c4458e0797fc2400", + "name": "gold:Gp0115667_Functional annotation GFF file", + "data_object_type": "Functional Annotation GFF", + "type": "nmdc:DataObject" + }, + { + "_id": { + "$oid": "649b00401ae706d7b5b16d89" + }, + "description": "Protein FAA for gold:Gp0115667", + "url": "https://data.microbiomedata.org/1781_86098/img_annotation/Ga0482260_proteins.faa", + "md5_checksum": "acdedd1c48e28e4f4e0d0679cae417f9", + "file_size_bytes": 3385, + "id": "nmdc:acdedd1c48e28e4f4e0d0679cae417f9", + "name": "gold:Gp0115667_Protein FAA", + "data_object_type": "Annotation Amino Acid FASTA", + "type": "nmdc:DataObject" + }, + { + "_id": { + "$oid": "649b00401ae706d7b5b16d8b" + }, + "description": "Structural annotation GFF file for gold:Gp0115667", + "url": "https://data.microbiomedata.org/1781_86098/img_annotation/Ga0482260_structural_annotation.gff", + "md5_checksum": "6f236cc8b728333fcf85e4f27873a500", + "file_size_bytes": 3385, + "id": "nmdc:6f236cc8b728333fcf85e4f27873a500", + "name": "gold:Gp0115667_Structural annotation GFF file", + "data_object_type": "Structural Annotation GFF", + "type": "nmdc:DataObject" + } + ], + "mags_activity_set": [ + { + "_id": { + "$oid": "649b0052ec087f6bbab3472a" + }, + "has_input": [ + "nmdc:b3cefc5a9599a4fb9432132baf7f5565", + "nmdc:d9b957c7efe7f753fe67441d0be605c6", + "nmdc:5a378f3975ab6c2cf2a36b0b007ea3f8" + ], + "too_short_contig_num": 109354, + "part_of": [ + "nmdc:mga0n0je44" + ], + "binned_contig_num": 596, + "git_url": "https://github.com/microbiomedata/metaG/releases/tag/0.1", + "has_output": [ + "nmdc:d41d8cd98f00b204e9800998ecf8427e", + "nmdc:1277a6924ab380e001a7208e7ebbb0e3", + "nmdc:48772112891988a2ef3f0c40786c11fd", + "nmdc:527e2c19607c225a707db67b5be01b6f", + "nmdc:027626ff998bf1e495e32d09cab4bb08", + "nmdc:733e798989606c802b3bbfc952a38841" + ], + "was_informed_by": "gold:Gp0115667", + "input_contig_num": 116127, + "id": "nmdc:8093869c91384d3299431e56019f7de0", + "execution_resource": "NERSC-Cori", + "name": "MAGs Analysis Activity for nmdc:mga0n0je44", + "mags_list": [ + { + "number_of_contig": 166, + "completeness": 19.51, + "bin_name": "bins.1", + "gene_count": 906, + "bin_quality": "LQ", + "gtdbtk_species": "", + "gtdbtk_order": "", + "num_16s": 0, + "gtdbtk_family": "", + "gtdbtk_domain": "", + "contamination": 0.04, + "gtdbtk_class": "", + "gtdbtk_phylum": "", + "num_5s": 0, + "num_23s": 0, + "gtdbtk_genus": "", + "num_t_rna": 14 + }, + { + "number_of_contig": 70, + "completeness": 99.78, + "bin_name": "bins.2", + "gene_count": 3225, + "bin_quality": "MQ", + "gtdbtk_species": "", + "gtdbtk_order": "UBA11222", + "num_16s": 1, + "gtdbtk_family": "UBA11222", + "gtdbtk_domain": "Bacteria", + "contamination": 0.43, + "gtdbtk_class": "Alphaproteobacteria", + "gtdbtk_phylum": "Proteobacteria", + "num_5s": 0, + "num_23s": 0, + "gtdbtk_genus": "UBA11222", + "num_t_rna": 49 + }, + { + "number_of_contig": 67, + "completeness": 41.5, + "bin_name": "bins.3", + "gene_count": 464, + "bin_quality": "LQ", + "gtdbtk_species": "", + "gtdbtk_order": "", + "num_16s": 0, + "gtdbtk_family": "", + "gtdbtk_domain": "", + "contamination": 3.76, + "gtdbtk_class": "", + "gtdbtk_phylum": "", + "num_5s": 1, + "num_23s": 1, + "gtdbtk_genus": "", + "num_t_rna": 10 + }, + { + "number_of_contig": 293, + "completeness": 56.99, + "bin_name": "bins.4", + "gene_count": 1734, + "bin_quality": "MQ", + "gtdbtk_species": "", + "gtdbtk_order": "Pseudomonadales", + "num_16s": 0, + "gtdbtk_family": "UBA3067", + "gtdbtk_domain": "Bacteria", + "contamination": 3.01, + "gtdbtk_class": "Gammaproteobacteria", + "gtdbtk_phylum": "Proteobacteria", + "num_5s": 0, + "num_23s": 0, + "gtdbtk_genus": "UBA3067", + "num_t_rna": 27 + } + ], + "unbinned_contig_num": 6177, + "started_at_time": "2021-10-11T02:28:16Z", + "type": "nmdc:MAGsAnalysisActivity", + "ended_at_time": "2021-10-11T03:58:24+00:00" + } + ], + "metagenome_annotation_activity_set": [ + { + "_id": { + "$oid": "649b005bbf2caae0415ef9cc" + }, + "has_input": [ + "nmdc:b3cefc5a9599a4fb9432132baf7f5565" + ], + "part_of": [ + "nmdc:mga0n0je44" + ], + "git_url": "https://github.com/microbiomedata/metaG/releases/tag/0.1", + "has_output": [ + "nmdc:45e8b887fc06ddbf2af3ecf9c91a7bf7", + "nmdc:26ab4381753f685c44091e1f17d8bab5", + "nmdc:5a378f3975ab6c2cf2a36b0b007ea3f8", + "nmdc:6df49253fee066c699d6a5191a0efaed", + "nmdc:5e35e51a595f892968e57681ee448e5f", + "nmdc:ae1bc890152d28387f65c65d434b97ea", + "nmdc:fb736eaba77cbd99135ddbc32168db94", + "nmdc:3b00892f95bc4dedaf4384685a75d52f", + "nmdc:b8c0d7c187169f34aafc17308aeea2ed", + "nmdc:2a8e4bb3922ec664bbb5ce49a30cc87e", + "nmdc:34eddc2289f3e3b4707a6c8060f6dd99", + "nmdc:0a51a22e2cf94c853657381549aa8f04" + ], + "was_informed_by": "gold:Gp0115667", + "id": "nmdc:8093869c91384d3299431e56019f7de0", + "execution_resource": "NERSC-Cori", + "name": "Annotation Activity for nmdc:mga0n0je44", + "started_at_time": "2021-10-11T02:28:16Z", + "type": "nmdc:MetagenomeAnnotationActivity", + "ended_at_time": "2021-10-11T03:58:24+00:00" + } + ], + "metagenome_assembly_set": [ + { + "_id": { + "$oid": "649b005f2ca5ee4adb139fb7" + }, + "has_input": [ + "nmdc:7d4057e3a44a05171c13fb0ed3e2294a" + ], + "part_of": [ + "nmdc:mga0n0je44" + ], + "ctg_logsum": 195440, + "scaf_logsum": 196103, + "gap_pct": 0.00293, + "git_url": "https://github.com/microbiomedata/metaG/releases/tag/0.1", + "has_output": [ + "nmdc:b3cefc5a9599a4fb9432132baf7f5565", + "nmdc:b60f674a01e3f7fff5ead95f330cef4f", + "nmdc:2e4532cb03bb1e9201976b9d65893788", + "nmdc:e49f8a26a9cd0420b688c967bbacb4c6", + "nmdc:d9b957c7efe7f753fe67441d0be605c6" + ], + "asm_score": 17.061, + "was_informed_by": "gold:Gp0115667", + "ctg_powsum": 25448, + "scaf_max": 245816, + "id": "nmdc:8093869c91384d3299431e56019f7de0", + "scaf_powsum": 25552, + "execution_resource": "NERSC-Cori", + "contigs": 116132, + "name": "Assembly Activity for nmdc:mga0n0je44", + "ctg_max": 245816, + "gc_std": 0.12277, + "contig_bp": 58413782, + "gc_avg": 0.47644, + "started_at_time": "2021-10-11T02:28:16Z", + "scaf_bp": 58415492, + "type": "nmdc:MetagenomeAssembly", + "scaffolds": 116033, + "ended_at_time": "2021-10-11T03:58:24+00:00", + "ctg_l50": 479, + "ctg_l90": 286, + "ctg_n50": 26909, + "ctg_n90": 95138, + "scaf_l50": 479, + "scaf_l90": 286, + "scaf_n50": 26889, + "scaf_n90": 95057, + "scaf_l_gt50k": 1865703, + "scaf_n_gt50k": 17, + "scaf_pct_gt50k": 3.1938498 + } + ], + "omics_processing_set": [ + { + "_id": { + "$oid": "649b009773e8249959349b37" + }, + "id": "nmdc:omprc-11-yt8css91", + "name": "Sand microcosm microbial communities from a hyporheic zone in Columbia River, Washington, USA - GW-RW T3_25-Nov-14", + "description": "Sterilized sand packs were incubated back in the ground and collected at time point T3.", + "has_input": [ + "nmdc:bsm-11-ynevd369" + ], + "has_output": [ + "jgi:55d818010d8785342fcf8278" + ], + "part_of": [ + "nmdc:sty-11-aygzgv51" + ], + "add_date": "2015-05-28", + "mod_date": "2021-06-15", + "ncbi_project_name": "Sand microcosm microbial communities from a hyporheic zone in Columbia River, Washington, USA - GW-RW T3_25-Nov-14", + "omics_type": { + "has_raw_value": "Metagenome" + }, + "principal_investigator": { + "has_raw_value": "James Stegen" + }, + "processing_institution": "JGI", + "type": "nmdc:OmicsProcessing", + "gold_sequencing_project_identifiers": [ + "gold:Gp0115667" + ] + } + ], + "read_qc_analysis_activity_set": [ + { + "_id": { + "$oid": "649b009d6bdd4fd20273c882" + }, + "has_input": [ + "nmdc:cb2e0605e8f22a398d982e35aee57715" + ], + "part_of": [ + "nmdc:mga0n0je44" + ], + "git_url": "https://github.com/microbiomedata/metaG/releases/tag/0.1", + "has_output": [ + "nmdc:7d4057e3a44a05171c13fb0ed3e2294a", + "nmdc:dae7c6e067f69ef6db39b4240cc450ba" + ], + "was_informed_by": "gold:Gp0115667", + "input_read_count": 19416222, + "output_read_bases": 2825090769, + "id": "nmdc:8093869c91384d3299431e56019f7de0", + "execution_resource": "NERSC-Cori", + "input_read_bases": 2931849522, + "name": "Read QC Activity for nmdc:mga0n0je44", + "output_read_count": 18855352, + "started_at_time": "2021-10-11T02:28:16Z", + "type": "nmdc:ReadQCAnalysisActivity", + "ended_at_time": "2021-10-11T03:58:24+00:00" + } + ], + "read_based_taxonomy_analysis_activity_set": [ + { + "_id": { + "$oid": "649b009bff710ae353f8cf4a" + }, + "has_input": [ + "nmdc:7d4057e3a44a05171c13fb0ed3e2294a" + ], + "git_url": "https://github.com/microbiomedata/metaG/releases/tag/0.1", + "has_output": [ + "nmdc:56edf81e5f5102edf7e416bc9430fbb6", + "nmdc:c3d0f03afb44520ef5f2ea14e6daf705", + "nmdc:2afff209a40ca4895307f3a47080c534", + "nmdc:d76c80bf15c4fd84f28c7150f24a8143", + "nmdc:b9d6d8a8297f9a604ac85a334a3412de", + "nmdc:fe4bd9f63c32f50676792e3c4adced08", + "nmdc:eb189cbf0543203d2521397b73d4d34b", + "nmdc:ce3f002a824efde4a7134e6cd2e6306b", + "nmdc:ac90bf3384ce44d097f7897ac5ff8134" + ], + "was_informed_by": "gold:Gp0115667", + "id": "nmdc:8093869c91384d3299431e56019f7de0", + "execution_resource": "NERSC-Cori", + "name": "ReadBased Analysis Activity for nmdc:mga0n0je44", + "started_at_time": "2021-10-11T02:28:16Z", + "type": "nmdc:ReadbasedAnalysis", + "ended_at_time": "2021-10-11T03:58:24+00:00" + } + ] + }, + { + "data_object_set": [ + { + "description": "Raw sequencer read data", + "file_size_bytes": 1792111281, + "type": "nmdc:DataObject", + "id": "jgi:55d817f20d8785342fcf826c", + "name": "9387.2.132031.TAGCTT.fastq.gz" + }, + { + "name": "Gp0115664_Filtered Reads", + "description": "Filtered Reads for Gp0115664", + "data_object_type": "Filtered Sequencing Reads", + "url": "https://data.microbiomedata.org/data/nmdc:mga0dm3v04/qa/nmdc_mga0dm3v04_filtered.fastq.gz", + "md5_checksum": "232e31505b6a0251df2303c0563d64c1", + "id": "nmdc:232e31505b6a0251df2303c0563d64c1", + "file_size_bytes": 1566732675 + }, + { + "name": "Gp0115664_Filtered Stats", + "description": "Filtered Stats for Gp0115664", + "data_object_type": "QC Statistics", + "url": "https://data.microbiomedata.org/data/nmdc:mga0dm3v04/qa/nmdc_mga0dm3v04_filterStats.txt", + "md5_checksum": "f3f4f75f19c92af6e98d2b45cccaacd5", + "id": "nmdc:f3f4f75f19c92af6e98d2b45cccaacd5", + "file_size_bytes": 289 + }, + { + "name": "Gp0115664_Gottcha2 TSV report", + "description": "Gottcha2 TSV report for Gp0115664", + "url": "https://data.microbiomedata.org/data/nmdc:mga0dm3v04/ReadbasedAnalysis/nmdc_mga0dm3v04_gottcha2_report.tsv", + "md5_checksum": "9d61d9f0c31a98f88ad8cde86254148d", + "id": "nmdc:9d61d9f0c31a98f88ad8cde86254148d", + "file_size_bytes": 9591 + }, + { + "name": "Gp0115664_Gottcha2 full TSV report", + "description": "Gottcha2 full TSV report for Gp0115664", + "url": "https://data.microbiomedata.org/data/nmdc:mga0dm3v04/ReadbasedAnalysis/nmdc_mga0dm3v04_gottcha2_report_full.tsv", + "md5_checksum": "7f93f97242aed036019f13492f5af35c", + "id": "nmdc:7f93f97242aed036019f13492f5af35c", + "file_size_bytes": 885985 + }, + { + "name": "Gp0115664_Gottcha2 Krona HTML report", + "description": "Gottcha2 Krona HTML report for Gp0115664", + "data_object_type": "GOTTCHA2 Krona Plot", + "url": "https://data.microbiomedata.org/data/nmdc:mga0dm3v04/ReadbasedAnalysis/nmdc_mga0dm3v04_gottcha2_krona.html", + "md5_checksum": "b4d0179bcc68b5186a3544d9ee0c6941", + "id": "nmdc:b4d0179bcc68b5186a3544d9ee0c6941", + "file_size_bytes": 251303 + }, + { + "name": "Gp0115664_Centrifuge classification TSV report", + "description": "Centrifuge classification TSV report for Gp0115664", + "data_object_type": "Centrifuge Taxonomic Classification", + "url": "https://data.microbiomedata.org/data/nmdc:mga0dm3v04/ReadbasedAnalysis/nmdc_mga0dm3v04_centrifuge_classification.tsv", + "md5_checksum": "a4243f71a0288f489c566ae85d85891d", + "id": "nmdc:a4243f71a0288f489c566ae85d85891d", + "file_size_bytes": 1268144933 + }, + { + "name": "Gp0115664_Centrifuge TSV report", + "description": "Centrifuge TSV report for Gp0115664", + "data_object_type": "Centrifuge Classification Report", + "url": "https://data.microbiomedata.org/data/nmdc:mga0dm3v04/ReadbasedAnalysis/nmdc_mga0dm3v04_centrifuge_report.tsv", + "md5_checksum": "f8b6ef830b94c6470056a3cd0a0eafc1", + "id": "nmdc:f8b6ef830b94c6470056a3cd0a0eafc1", + "file_size_bytes": 254575 + }, + { + "name": "Gp0115664_Centrifuge Krona HTML report", + "description": "Centrifuge Krona HTML report for Gp0115664", + "data_object_type": "Centrifuge Krona Plot", + "url": "https://data.microbiomedata.org/data/nmdc:mga0dm3v04/ReadbasedAnalysis/nmdc_mga0dm3v04_centrifuge_krona.html", + "md5_checksum": "a80779b32415ef001d0403f0b618b612", + "id": "nmdc:a80779b32415ef001d0403f0b618b612", + "file_size_bytes": 2327293 + }, + { + "name": "Gp0115664_Kraken classification TSV report", + "description": "Kraken classification TSV report for Gp0115664", + "data_object_type": "Kraken2 Taxonomic Classification", + "url": "https://data.microbiomedata.org/data/nmdc:mga0dm3v04/ReadbasedAnalysis/nmdc_mga0dm3v04_kraken2_classification.tsv", + "md5_checksum": "01581429336a43d7dc2f85b8d49d6c6e", + "id": "nmdc:01581429336a43d7dc2f85b8d49d6c6e", + "file_size_bytes": 1037932028 + }, + { + "name": "Gp0115664_Kraken2 TSV report", + "description": "Kraken2 TSV report for Gp0115664", + "data_object_type": "Kraken2 Classification Report", + "url": "https://data.microbiomedata.org/data/nmdc:mga0dm3v04/ReadbasedAnalysis/nmdc_mga0dm3v04_kraken2_report.tsv", + "md5_checksum": "ce47d6686edb7b3472102d5883229c45", + "id": "nmdc:ce47d6686edb7b3472102d5883229c45", + "file_size_bytes": 641242 + }, + { + "name": "Gp0115664_Kraken2 Krona HTML report", + "description": "Kraken2 Krona HTML report for Gp0115664", + "data_object_type": "Kraken2 Krona Plot", + "url": "https://data.microbiomedata.org/data/nmdc:mga0dm3v04/ReadbasedAnalysis/nmdc_mga0dm3v04_kraken2_krona.html", + "md5_checksum": "29b75e78b0b7fd8115614d8e9d341d46", + "id": "nmdc:29b75e78b0b7fd8115614d8e9d341d46", + "file_size_bytes": 3995680 + }, + { + "name": "Gp0115664_Assembled contigs fasta", + "description": "Assembled contigs fasta for Gp0115664", + "data_object_type": "Assembly Contigs", + "url": "https://data.microbiomedata.org/data/nmdc:mga0dm3v04/assembly/nmdc_mga0dm3v04_contigs.fna", + "md5_checksum": "3faf965a2e745048afed5d1c065a78c4", + "id": "nmdc:3faf965a2e745048afed5d1c065a78c4", + "file_size_bytes": 36012597 + }, + { + "name": "Gp0115664_Assembled scaffold fasta", + "description": "Assembled scaffold fasta for Gp0115664", + "data_object_type": "Assembly Scaffolds", + "url": "https://data.microbiomedata.org/data/nmdc:mga0dm3v04/assembly/nmdc_mga0dm3v04_scaffolds.fna", + "md5_checksum": "2d99daff632b19ebdea3f3e5784e2fbc", + "id": "nmdc:2d99daff632b19ebdea3f3e5784e2fbc", + "file_size_bytes": 35776428 + }, + { + "name": "Gp0115664_Metagenome Contig Coverage Stats", + "description": "Metagenome Contig Coverage Stats for Gp0115664", + "url": "https://data.microbiomedata.org/data/nmdc:mga0dm3v04/assembly/nmdc_mga0dm3v04_covstats.txt", + "md5_checksum": "d8f255300e5f214baad3c3b4b3c0b51b", + "id": "nmdc:d8f255300e5f214baad3c3b4b3c0b51b", + "file_size_bytes": 6143277 + }, + { + "name": "Gp0115664_Assembled AGP file", + "description": "Assembled AGP file for Gp0115664", + "data_object_type": "Assembly AGP", + "url": "https://data.microbiomedata.org/data/nmdc:mga0dm3v04/assembly/nmdc_mga0dm3v04_assembly.agp", + "md5_checksum": "1f9a75569aedc406a3db8ff779b03c19", + "id": "nmdc:1f9a75569aedc406a3db8ff779b03c19", + "file_size_bytes": 5710214 + }, + { + "name": "Gp0115664_Metagenome Alignment BAM file", + "description": "Metagenome Alignment BAM file for Gp0115664", + "data_object_type": "Assembly Coverage BAM", + "url": "https://data.microbiomedata.org/data/nmdc:mga0dm3v04/assembly/nmdc_mga0dm3v04_pairedMapped_sorted.bam", + "md5_checksum": "faeb84260d97f23162a6176b9442a5c8", + "id": "nmdc:faeb84260d97f23162a6176b9442a5c8", + "file_size_bytes": 1670248615 + }, + { + "name": "Gp0115664_Protein FAA", + "description": "Protein FAA for Gp0115664", + "data_object_type": "Annotation Amino Acid FASTA", + "url": "https://data.microbiomedata.org/data/nmdc:mga0dm3v04/annotation/nmdc_mga0dm3v04_proteins.faa", + "md5_checksum": "338a8f2f739dfc89557e090d604302f6", + "id": "nmdc:338a8f2f739dfc89557e090d604302f6", + "file_size_bytes": 21010319 + }, + { + "name": "Gp0115664_Structural annotation GFF file", + "description": "Structural annotation GFF file for Gp0115664", + "data_object_type": "Structural Annotation GFF", + "url": "https://data.microbiomedata.org/data/nmdc:mga0dm3v04/annotation/nmdc_mga0dm3v04_structural_annotation.gff", + "md5_checksum": "0ce03dd69826edcc8b5f6dd01ca176dc", + "id": "nmdc:0ce03dd69826edcc8b5f6dd01ca176dc", + "file_size_bytes": 2497 + }, + { + "name": "Gp0115664_Functional annotation GFF file", + "description": "Functional annotation GFF file for Gp0115664", + "data_object_type": "Functional Annotation GFF", + "url": "https://data.microbiomedata.org/data/nmdc:mga0dm3v04/annotation/nmdc_mga0dm3v04_functional_annotation.gff", + "md5_checksum": "dc720d27299f6f5c1d38c4dcf1dfc8db", + "id": "nmdc:dc720d27299f6f5c1d38c4dcf1dfc8db", + "file_size_bytes": 24426623 + }, + { + "name": "Gp0115664_KO TSV file", + "description": "KO TSV file for Gp0115664", + "data_object_type": "Annotation KEGG Orthology", + "url": "https://data.microbiomedata.org/data/nmdc:mga0dm3v04/annotation/nmdc_mga0dm3v04_ko.tsv", + "md5_checksum": "bc7f7df6865acffd4e07f8b592573eb9", + "id": "nmdc:bc7f7df6865acffd4e07f8b592573eb9", + "file_size_bytes": 2875393 + }, + { + "name": "Gp0115664_EC TSV file", + "description": "EC TSV file for Gp0115664", + "data_object_type": "Annotation Enzyme Commission", + "url": "https://data.microbiomedata.org/data/nmdc:mga0dm3v04/annotation/nmdc_mga0dm3v04_ec.tsv", + "md5_checksum": "be38bedd77ab3c072bafbb2c201c953d", + "id": "nmdc:be38bedd77ab3c072bafbb2c201c953d", + "file_size_bytes": 1882878 + }, + { + "name": "Gp0115664_COG GFF file", + "description": "COG GFF file for Gp0115664", + "url": "https://data.microbiomedata.org/data/nmdc:mga0dm3v04/annotation/nmdc_mga0dm3v04_cog.gff", + "md5_checksum": "d7318549a735853b679d15171f5c7ea7", + "id": "nmdc:d7318549a735853b679d15171f5c7ea7", + "file_size_bytes": 12475107 + }, + { + "name": "Gp0115664_PFAM GFF file", + "description": "PFAM GFF file for Gp0115664", + "url": "https://data.microbiomedata.org/data/nmdc:mga0dm3v04/annotation/nmdc_mga0dm3v04_pfam.gff", + "md5_checksum": "c1617e0980c6e52149692aee39e30f8c", + "id": "nmdc:c1617e0980c6e52149692aee39e30f8c", + "file_size_bytes": 9305713 + }, + { + "name": "Gp0115664_TigrFam GFF file", + "description": "TigrFam GFF file for Gp0115664", + "url": "https://data.microbiomedata.org/data/nmdc:mga0dm3v04/annotation/nmdc_mga0dm3v04_tigrfam.gff", + "md5_checksum": "bd5a9b5e55605ece8873d6ac05e76e0d", + "id": "nmdc:bd5a9b5e55605ece8873d6ac05e76e0d", + "file_size_bytes": 1181236 + }, + { + "name": "Gp0115664_SMART GFF file", + "description": "SMART GFF file for Gp0115664", + "url": "https://data.microbiomedata.org/data/nmdc:mga0dm3v04/annotation/nmdc_mga0dm3v04_smart.gff", + "md5_checksum": "eb1fba5cad14c3e211baa2de796bca2e", + "id": "nmdc:eb1fba5cad14c3e211baa2de796bca2e", + "file_size_bytes": 2718910 + }, + { + "name": "Gp0115664_SuperFam GFF file", + "description": "SuperFam GFF file for Gp0115664", + "url": "https://data.microbiomedata.org/data/nmdc:mga0dm3v04/annotation/nmdc_mga0dm3v04_supfam.gff", + "md5_checksum": "2146449222f410a286e4786bf19c9a5e", + "id": "nmdc:2146449222f410a286e4786bf19c9a5e", + "file_size_bytes": 16463047 + }, + { + "name": "Gp0115664_Cath FunFam GFF file", + "description": "Cath FunFam GFF file for Gp0115664", + "url": "https://data.microbiomedata.org/data/nmdc:mga0dm3v04/annotation/nmdc_mga0dm3v04_cath_funfam.gff", + "md5_checksum": "20ced78c72f67d064bddcc8d5534ebb6", + "id": "nmdc:20ced78c72f67d064bddcc8d5534ebb6", + "file_size_bytes": 12501882 + }, + { + "name": "Gp0115664_KO_EC GFF file", + "description": "KO_EC GFF file for Gp0115664", + "url": "https://data.microbiomedata.org/data/nmdc:mga0dm3v04/annotation/nmdc_mga0dm3v04_ko_ec.gff", + "md5_checksum": "7ffe90ceb10c9f40f755aa8d7aa30170", + "id": "nmdc:7ffe90ceb10c9f40f755aa8d7aa30170", + "file_size_bytes": 9217314 + }, + { + "name": "gold:Gp0452679_metabat2 bin checkm quality assessment result", + "description": "metabat2 bin checkm quality assessment result for gold:Gp0452679", + "data_object_type": "CheckM Statistics", + "url": "https://data.microbiomedata.org/data/nmdc:mga0fsjy84/MAGs/nmdc_mga0fsjy84_checkm_qa.out", + "md5_checksum": "d41d8cd98f00b204e9800998ecf8427e", + "id": "nmdc:d41d8cd98f00b204e9800998ecf8427e", + "file_size_bytes": 0 + }, + { + "name": "Gp0115664_tooShort (< 3kb) filtered contigs fasta file by metaBat2", + "description": "tooShort (< 3kb) filtered contigs fasta file by metaBat2 for Gp0115664", + "url": "https://data.microbiomedata.org/data/nmdc:mga0dm3v04/MAGs/nmdc_mga0dm3v04_bins.tooShort.fa", + "md5_checksum": "767a36b1bffa42d3d25af3f81b15e11b", + "id": "nmdc:767a36b1bffa42d3d25af3f81b15e11b", + "file_size_bytes": 30368582 + }, + { + "name": "Gp0115664_unbinned fasta file from metabat2", + "description": "unbinned fasta file from metabat2 for Gp0115664", + "url": "https://data.microbiomedata.org/data/nmdc:mga0dm3v04/MAGs/nmdc_mga0dm3v04_bins.unbinned.fa", + "md5_checksum": "994fd58ab9a53c19ba1cdb830e37a132", + "id": "nmdc:994fd58ab9a53c19ba1cdb830e37a132", + "file_size_bytes": 4608000 + }, + { + "name": "Gp0115664_metabat2 bin checkm quality assessment result", + "description": "metabat2 bin checkm quality assessment result for Gp0115664", + "data_object_type": "CheckM Statistics", + "url": "https://data.microbiomedata.org/data/nmdc:mga0dm3v04/MAGs/nmdc_mga0dm3v04_checkm_qa.out", + "md5_checksum": "db59a64c874a9e06c1f1ba58df96fe0d", + "id": "nmdc:db59a64c874a9e06c1f1ba58df96fe0d", + "file_size_bytes": 845 + }, + { + "name": "Gp0115664_high-quality and medium-quality bins", + "description": "high-quality and medium-quality bins for Gp0115664", + "data_object_type": "Metagenome Bins", + "url": "https://data.microbiomedata.org/data/nmdc:mga0dm3v04/MAGs/nmdc_mga0dm3v04_hqmq_bin.zip", + "md5_checksum": "0d45611a5d0c80679c00fa759c939df0", + "id": "nmdc:0d45611a5d0c80679c00fa759c939df0", + "file_size_bytes": 182 + }, + { + "name": "Gp0115664_metabat2 bins", + "description": "metabat2 bins for Gp0115664", + "url": "https://data.microbiomedata.org/data/nmdc:mga0dm3v04/MAGs/nmdc_mga0dm3v04_metabat_bin.zip", + "md5_checksum": "bb5835f621252fca37967e00245517ac", + "id": "nmdc:bb5835f621252fca37967e00245517ac", + "file_size_bytes": 314358 + }, + { + "_id": { + "$oid": "649b003c1ae706d7b5b14d62" + }, + "description": "Metagenome Contig Coverage Stats for gold:Gp0115664", + "url": "https://data.microbiomedata.org/data/1781_86089/assembly/mapping_stats.txt", + "file_size_bytes": 5751397, + "type": "nmdc:DataObject", + "id": "nmdc:115045c0b7102243d0b9f2d4ffaa20a0", + "name": "mapping_stats.txt", + "data_object_type": "Assembly Coverage Stats" + }, + { + "_id": { + "$oid": "649b003c1ae706d7b5b14d64" + }, + "description": "Assembled scaffold fasta for gold:Gp0115664", + "url": "https://data.microbiomedata.org/data/1781_86089/assembly/assembly_scaffolds.fna", + "file_size_bytes": 35384873, + "type": "nmdc:DataObject", + "id": "nmdc:71b690c6d9ad021d8ea68b8fd9d31135", + "name": "assembly_scaffolds.fna", + "data_object_type": "Assembly Scaffolds" + }, + { + "_id": { + "$oid": "649b003c1ae706d7b5b14d65" + }, + "description": "Assembled contigs fasta for gold:Gp0115664", + "url": "https://data.microbiomedata.org/data/1781_86089/assembly/assembly_contigs.fna", + "file_size_bytes": 35620717, + "type": "nmdc:DataObject", + "id": "nmdc:b78f599c21fb31b00d3f8a3c56daeb88", + "name": "assembly_contigs.fna", + "data_object_type": "Assembly Contigs" + }, + { + "_id": { + "$oid": "649b003c1ae706d7b5b14d66" + }, + "description": "Assembled AGP file for gold:Gp0115664", + "url": "https://data.microbiomedata.org/data/1781_86089/assembly/assembly.agp", + "file_size_bytes": 4925804, + "type": "nmdc:DataObject", + "id": "nmdc:f592b315dbd5a060ddb075ad98bf4803", + "name": "assembly.agp", + "data_object_type": "Assembly AGP" + }, + { + "_id": { + "$oid": "649b003c1ae706d7b5b14d67" + }, + "description": "Metagenome Alignment BAM file for gold:Gp0115664", + "url": "https://data.microbiomedata.org/data/1781_86089/assembly/pairedMapped_sorted.bam", + "file_size_bytes": 1649453824, + "type": "nmdc:DataObject", + "id": "nmdc:662dc676b0b5a486248357f5b887c18b", + "name": "pairedMapped_sorted.bam", + "data_object_type": "Assembly Coverage BAM" + }, + { + "_id": { + "$oid": "649b003d1ae706d7b5b15983" + }, + "id": "nmdc:bd586aef31587a585d6be2b9814a2551", + "name": "1781_86089.krona.html", + "description": "Gold:Gp0115664 KRONA plot HTML file", + "url": "https://data.microbiomedata.org/1781_86089/ReadbasedAnalysis/centrifuge/1781_86089.krona.html", + "file_size_bytes": 3385, + "data_object_type": "Centrifuge Krona Plot", + "type": "nmdc:DataObject" + }, + { + "_id": { + "$oid": "649b003d1ae706d7b5b15988" + }, + "id": "nmdc:81b89289645757b1b3608d93bc563c73", + "name": "1781_86089.json", + "description": "Gold:Gp0115664 ReadbasedAnalysis result JSON file", + "url": "https://data.microbiomedata.org/1781_86089/ReadbasedAnalysis/1781_86089.json", + "file_size_bytes": 3385, + "type": "nmdc:DataObject" + }, + { + "_id": { + "$oid": "649b003f1ae706d7b5b16257" + }, + "id": "nmdc:e8ec230bfe68a272b34540e7f5ab5b2b", + "name": "gold:Gp0115664.bins.3.fa", + "description": "metabat2 binned contig file for gold:Gp0115664", + "file_size_bytes": 287705, + "url": "https://data.microbiomedata.org/data/1781_86089/img_MAGs/metabat-bins/bins.3.fa", + "type": "nmdc:DataObject", + "data_object_type": "Metagenome Bins" + }, + { + "_id": { + "$oid": "649b003f1ae706d7b5b1625b" + }, + "id": "nmdc:7a652e7e0f8ded35496989fe90b40c40", + "name": "bins.unbinned.fa", + "description": "unbinned fasta file from metabat2 for gold:Gp0115664", + "file_size_bytes": 4643149, + "url": "https://data.microbiomedata.org/data/1781_86089/img_MAGs/bins.unbinned.fa", + "type": "nmdc:DataObject" + }, + { + "_id": { + "$oid": "649b003f1ae706d7b5b1625c" + }, + "id": "nmdc:ab198c4e10213c9e85c4506b269452ee", + "name": "bins.tooShort.fa", + "description": "tooShort (< 3kb) filtered contigs fasta file by metaBat2 for gold:Gp0115664", + "file_size_bytes": 29395917, + "url": "https://data.microbiomedata.org/data/1781_86089/img_MAGs/bins.tooShort.fa", + "type": "nmdc:DataObject" + }, + { + "_id": { + "$oid": "649b003f1ae706d7b5b1625d" + }, + "id": "nmdc:c24915651cfdfc91f3e6b5bac679c3af", + "name": "checkm_qa.out", + "description": "metabat2 bin checkm quality assessment result for gold:Gp0115664", + "file_size_bytes": 1176, + "url": "https://data.microbiomedata.org/data/1781_86089/img_MAGs/checkm_qa.out", + "type": "nmdc:DataObject", + "data_object_type": "CheckM Statistics" + }, + { + "_id": { + "$oid": "649b003f1ae706d7b5b1625f" + }, + "id": "nmdc:9800add41d26829494265ba81a100c53", + "name": "gold:Gp0115664.bins.1.fa", + "description": "metabat2 binned contig file for gold:Gp0115664", + "file_size_bytes": 351859, + "url": "https://data.microbiomedata.org/data/1781_86089/img_MAGs/metabat-bins/bins.1.fa", + "type": "nmdc:DataObject", + "data_object_type": "Metagenome Bins" + }, + { + "_id": { + "$oid": "649b003f1ae706d7b5b16261" + }, + "id": "nmdc:474fa29bd39452fa80f5a32e9e6be6f4", + "name": "gold:Gp0115664.bins.2.fa", + "description": "metabat2 binned contig file for gold:Gp0115664", + "file_size_bytes": 326275, + "url": "https://data.microbiomedata.org/data/1781_86089/img_MAGs/metabat-bins/bins.2.fa", + "type": "nmdc:DataObject", + "data_object_type": "Metagenome Bins" + }, + { + "_id": { + "$oid": "649b00401ae706d7b5b16d61" + }, + "description": "Functional annotation GFF file for gold:Gp0115664", + "url": "https://data.microbiomedata.org/1781_86089/img_annotation/Ga0482263_functional_annotation.gff", + "md5_checksum": "bc034c7024043ea88b44d0897bb5bece", + "file_size_bytes": 3385, + "id": "nmdc:bc034c7024043ea88b44d0897bb5bece", + "name": "gold:Gp0115664_Functional annotation GFF file", + "data_object_type": "Functional Annotation GFF", + "type": "nmdc:DataObject" + }, + { + "_id": { + "$oid": "649b00401ae706d7b5b16d62" + }, + "description": "KO TSV File for gold:Gp0115664", + "url": "https://data.microbiomedata.org/1781_86089/img_annotation/Ga0482263_ko.tsv", + "md5_checksum": "76537a4ab5012ba3b407471da373ef1c", + "file_size_bytes": 3385, + "id": "nmdc:76537a4ab5012ba3b407471da373ef1c", + "name": "gold:Gp0115664_KO TSV File", + "data_object_type": "Annotation KEGG Orthology", + "type": "nmdc:DataObject" + }, + { + "_id": { + "$oid": "649b00401ae706d7b5b16d64" + }, + "description": "Structural annotation GFF file for gold:Gp0115664", + "url": "https://data.microbiomedata.org/1781_86089/img_annotation/Ga0482263_structural_annotation.gff", + "md5_checksum": "10117f9500d0dd54655a5d70195f7df5", + "file_size_bytes": 3385, + "id": "nmdc:10117f9500d0dd54655a5d70195f7df5", + "name": "gold:Gp0115664_Structural annotation GFF file", + "data_object_type": "Structural Annotation GFF", + "type": "nmdc:DataObject" + }, + { + "_id": { + "$oid": "649b00401ae706d7b5b16d65" + }, + "description": "EC TSV File for gold:Gp0115664", + "url": "https://data.microbiomedata.org/1781_86089/img_annotation/Ga0482263_ec.tsv", + "md5_checksum": "8a812604db9b4e2bdbad6d0b3539f6ea", + "file_size_bytes": 3385, + "id": "nmdc:8a812604db9b4e2bdbad6d0b3539f6ea", + "name": "gold:Gp0115664_EC TSV File", + "data_object_type": "Annotation Enzyme Commission", + "type": "nmdc:DataObject" + }, + { + "_id": { + "$oid": "649b00401ae706d7b5b16d68" + }, + "description": "Protein FAA for gold:Gp0115664", + "url": "https://data.microbiomedata.org/1781_86089/img_annotation/Ga0482263_proteins.faa", + "md5_checksum": "fc419491cce16671e828d76083252841", + "file_size_bytes": 3385, + "id": "nmdc:fc419491cce16671e828d76083252841", + "name": "gold:Gp0115664_Protein FAA", + "data_object_type": "Annotation Amino Acid FASTA", + "type": "nmdc:DataObject" + } + ], + "mags_activity_set": [ + { + "_id": { + "$oid": "649b0052ec087f6bbab34723" + }, + "has_input": [ + "nmdc:3faf965a2e745048afed5d1c065a78c4", + "nmdc:faeb84260d97f23162a6176b9442a5c8", + "nmdc:dc720d27299f6f5c1d38c4dcf1dfc8db" + ], + "too_short_contig_num": 75364, + "part_of": [ + "nmdc:mga0dm3v04" + ], + "binned_contig_num": 220, + "git_url": "https://github.com/microbiomedata/metaG/releases/tag/0.1", + "has_output": [ + "nmdc:d41d8cd98f00b204e9800998ecf8427e", + "nmdc:767a36b1bffa42d3d25af3f81b15e11b", + "nmdc:994fd58ab9a53c19ba1cdb830e37a132", + "nmdc:db59a64c874a9e06c1f1ba58df96fe0d", + "nmdc:0d45611a5d0c80679c00fa759c939df0", + "nmdc:bb5835f621252fca37967e00245517ac" + ], + "was_informed_by": "gold:Gp0115664", + "input_contig_num": 78376, + "id": "nmdc:10c6e87f449fdc27c6b8bfbc9e25d6a3", + "execution_resource": "NERSC-Cori", + "name": "MAGs Analysis Activity for nmdc:mga0dm3v04", + "mags_list": [ + { + "number_of_contig": 220, + "completeness": 45.41, + "bin_name": "bins.1", + "gene_count": 1182, + "bin_quality": "LQ", + "gtdbtk_species": "", + "gtdbtk_order": "", + "num_16s": 0, + "gtdbtk_family": "", + "gtdbtk_domain": "", + "contamination": 0.72, + "gtdbtk_class": "", + "gtdbtk_phylum": "", + "num_5s": 0, + "num_23s": 0, + "gtdbtk_genus": "", + "num_t_rna": 16 + } + ], + "unbinned_contig_num": 2792, + "started_at_time": "2021-10-11T02:28:16Z", + "type": "nmdc:MAGsAnalysisActivity", + "ended_at_time": "2021-10-11T03:33:34+00:00" + } + ], + "metagenome_annotation_activity_set": [ + { + "_id": { + "$oid": "649b005bbf2caae0415ef9c5" + }, + "has_input": [ + "nmdc:3faf965a2e745048afed5d1c065a78c4" + ], + "part_of": [ + "nmdc:mga0dm3v04" + ], + "git_url": "https://github.com/microbiomedata/metaG/releases/tag/0.1", + "has_output": [ + "nmdc:338a8f2f739dfc89557e090d604302f6", + "nmdc:0ce03dd69826edcc8b5f6dd01ca176dc", + "nmdc:dc720d27299f6f5c1d38c4dcf1dfc8db", + "nmdc:bc7f7df6865acffd4e07f8b592573eb9", + "nmdc:be38bedd77ab3c072bafbb2c201c953d", + "nmdc:d7318549a735853b679d15171f5c7ea7", + "nmdc:c1617e0980c6e52149692aee39e30f8c", + "nmdc:bd5a9b5e55605ece8873d6ac05e76e0d", + "nmdc:eb1fba5cad14c3e211baa2de796bca2e", + "nmdc:2146449222f410a286e4786bf19c9a5e", + "nmdc:20ced78c72f67d064bddcc8d5534ebb6", + "nmdc:7ffe90ceb10c9f40f755aa8d7aa30170" + ], + "was_informed_by": "gold:Gp0115664", + "id": "nmdc:10c6e87f449fdc27c6b8bfbc9e25d6a3", + "execution_resource": "NERSC-Cori", + "name": "Annotation Activity for nmdc:mga0dm3v04", + "started_at_time": "2021-10-11T02:28:16Z", + "type": "nmdc:MetagenomeAnnotationActivity", + "ended_at_time": "2021-10-11T03:33:34+00:00" + } + ], + "metagenome_assembly_set": [ + { + "_id": { + "$oid": "649b005f2ca5ee4adb139fb0" + }, + "has_input": [ + "nmdc:232e31505b6a0251df2303c0563d64c1" + ], + "part_of": [ + "nmdc:mga0dm3v04" + ], + "ctg_logsum": 60365, + "scaf_logsum": 60806, + "gap_pct": 0.00196, + "git_url": "https://github.com/microbiomedata/metaG/releases/tag/0.1", + "has_output": [ + "nmdc:3faf965a2e745048afed5d1c065a78c4", + "nmdc:2d99daff632b19ebdea3f3e5784e2fbc", + "nmdc:d8f255300e5f214baad3c3b4b3c0b51b", + "nmdc:1f9a75569aedc406a3db8ff779b03c19", + "nmdc:faeb84260d97f23162a6176b9442a5c8" + ], + "asm_score": 4.21, + "was_informed_by": "gold:Gp0115664", + "ctg_powsum": 6668.288, + "scaf_max": 15348, + "id": "nmdc:10c6e87f449fdc27c6b8bfbc9e25d6a3", + "scaf_powsum": 6720.964, + "execution_resource": "NERSC-Cori", + "contigs": 78376, + "name": "Assembly Activity for nmdc:mga0dm3v04", + "ctg_max": 15348, + "gc_std": 0.11459, + "contig_bp": 33088752, + "gc_avg": 0.5432, + "started_at_time": "2021-10-11T02:28:16Z", + "scaf_bp": 33089402, + "type": "nmdc:MetagenomeAssembly", + "scaffolds": 78311, + "ended_at_time": "2021-10-11T03:33:34+00:00", + "ctg_l50": 377, + "ctg_l90": 283, + "ctg_n50": 23883, + "ctg_n90": 67231, + "scaf_l50": 377, + "scaf_l90": 283, + "scaf_n50": 23850, + "scaf_n90": 67169 + } + ], + "omics_processing_set": [ + { + "_id": { + "$oid": "649b009773e8249959349b38" + }, + "id": "nmdc:omprc-11-hgehsc37", + "name": "Sand microcosm microbial communities from a hyporheic zone in Columbia River, Washington, USA - GW-RW T2_25-Nov-14", + "description": "Sterilized sand packs were incubated back in the ground and collected at time point T2.", + "has_input": [ + "nmdc:bsm-11-qxntpg05" + ], + "has_output": [ + "jgi:55d817f20d8785342fcf826c" + ], + "part_of": [ + "nmdc:sty-11-aygzgv51" + ], + "add_date": "2015-05-28", + "mod_date": "2021-06-15", + "ncbi_project_name": "Sand microcosm microbial communities from a hyporheic zone in Columbia River, Washington, USA - GW-RW T2_25-Nov-14", + "omics_type": { + "has_raw_value": "Metagenome" + }, + "principal_investigator": { + "has_raw_value": "James Stegen" + }, + "processing_institution": "JGI", + "type": "nmdc:OmicsProcessing", + "gold_sequencing_project_identifiers": [ + "gold:Gp0115664" + ] + } + ], + "read_qc_analysis_activity_set": [ + { + "_id": { + "$oid": "649b009d6bdd4fd20273c87b" + }, + "has_input": [ + "nmdc:86929bf5b2afcb965129dcf0eae2d8fc" + ], + "part_of": [ + "nmdc:mga0dm3v04" + ], + "git_url": "https://github.com/microbiomedata/metaG/releases/tag/0.1", + "has_output": [ + "nmdc:232e31505b6a0251df2303c0563d64c1", + "nmdc:f3f4f75f19c92af6e98d2b45cccaacd5" + ], + "was_informed_by": "gold:Gp0115664", + "input_read_count": 19058974, + "output_read_bases": 2597325375, + "id": "nmdc:10c6e87f449fdc27c6b8bfbc9e25d6a3", + "execution_resource": "NERSC-Cori", + "input_read_bases": 2877905074, + "name": "Read QC Activity for nmdc:mga0dm3v04", + "output_read_count": 17338778, + "started_at_time": "2021-10-11T02:28:16Z", + "type": "nmdc:ReadQCAnalysisActivity", + "ended_at_time": "2021-10-11T03:33:34+00:00" + } + ], + "read_based_taxonomy_analysis_activity_set": [ + { + "_id": { + "$oid": "649b009bff710ae353f8cf3f" + }, + "has_input": [ + "nmdc:232e31505b6a0251df2303c0563d64c1" + ], + "git_url": "https://github.com/microbiomedata/metaG/releases/tag/0.1", + "has_output": [ + "nmdc:9d61d9f0c31a98f88ad8cde86254148d", + "nmdc:7f93f97242aed036019f13492f5af35c", + "nmdc:b4d0179bcc68b5186a3544d9ee0c6941", + "nmdc:a4243f71a0288f489c566ae85d85891d", + "nmdc:f8b6ef830b94c6470056a3cd0a0eafc1", + "nmdc:a80779b32415ef001d0403f0b618b612", + "nmdc:01581429336a43d7dc2f85b8d49d6c6e", + "nmdc:ce47d6686edb7b3472102d5883229c45", + "nmdc:29b75e78b0b7fd8115614d8e9d341d46" + ], + "was_informed_by": "gold:Gp0115664", + "id": "nmdc:10c6e87f449fdc27c6b8bfbc9e25d6a3", + "execution_resource": "NERSC-Cori", + "name": "ReadBased Analysis Activity for nmdc:mga0dm3v04", + "started_at_time": "2021-10-11T02:28:16Z", + "type": "nmdc:ReadbasedAnalysis", + "ended_at_time": "2021-10-11T03:33:34+00:00" + } + ] + }, + { + "data_object_set": [ + { + "description": "Raw sequencer read data", + "file_size_bytes": 4674996922, + "type": "nmdc:DataObject", + "id": "jgi:55f23d790d8785306f96497e", + "name": "9491.1.134352.AGTCAA.fastq.gz" + }, + { + "name": "Gp0115678_Filtered Reads", + "description": "Filtered Reads for Gp0115678", + "data_object_type": "Filtered Sequencing Reads", + "url": "https://data.microbiomedata.org/data/nmdc:mga026tn70/qa/nmdc_mga026tn70_filtered.fastq.gz", + "md5_checksum": "e0ce93b88419f87568ff206e0efe3a24", + "id": "nmdc:e0ce93b88419f87568ff206e0efe3a24", + "file_size_bytes": 4090026888 + }, + { + "name": "Gp0115678_Filtered Stats", + "description": "Filtered Stats for Gp0115678", + "data_object_type": "QC Statistics", + "url": "https://data.microbiomedata.org/data/nmdc:mga026tn70/qa/nmdc_mga026tn70_filterStats.txt", + "md5_checksum": "7bf8ff4cf0d98cccd8e1c20f77dd1690", + "id": "nmdc:7bf8ff4cf0d98cccd8e1c20f77dd1690", + "file_size_bytes": 292 + }, + { + "name": "Gp0115678_Gottcha2 TSV report", + "description": "Gottcha2 TSV report for Gp0115678", + "url": "https://data.microbiomedata.org/data/nmdc:mga026tn70/ReadbasedAnalysis/nmdc_mga026tn70_gottcha2_report.tsv", + "md5_checksum": "05bab80e2ff02d160b8e808f056ee2b5", + "id": "nmdc:05bab80e2ff02d160b8e808f056ee2b5", + "file_size_bytes": 19085 + }, + { + "name": "Gp0115678_Gottcha2 full TSV report", + "description": "Gottcha2 full TSV report for Gp0115678", + "url": "https://data.microbiomedata.org/data/nmdc:mga026tn70/ReadbasedAnalysis/nmdc_mga026tn70_gottcha2_report_full.tsv", + "md5_checksum": "12b2d6afc355bce76249d750a9fab534", + "id": "nmdc:12b2d6afc355bce76249d750a9fab534", + "file_size_bytes": 1243929 + }, + { + "name": "Gp0115678_Gottcha2 Krona HTML report", + "description": "Gottcha2 Krona HTML report for Gp0115678", + "data_object_type": "GOTTCHA2 Krona Plot", + "url": "https://data.microbiomedata.org/data/nmdc:mga026tn70/ReadbasedAnalysis/nmdc_mga026tn70_gottcha2_krona.html", + "md5_checksum": "18214017d56658a48723c9c998dcba7e", + "id": "nmdc:18214017d56658a48723c9c998dcba7e", + "file_size_bytes": 281148 + }, + { + "name": "Gp0115678_Centrifuge classification TSV report", + "description": "Centrifuge classification TSV report for Gp0115678", + "data_object_type": "Centrifuge Taxonomic Classification", + "url": "https://data.microbiomedata.org/data/nmdc:mga026tn70/ReadbasedAnalysis/nmdc_mga026tn70_centrifuge_classification.tsv", + "md5_checksum": "99ef009c73c128e561a4b9dcb70d7ff2", + "id": "nmdc:99ef009c73c128e561a4b9dcb70d7ff2", + "file_size_bytes": 3491726958 + }, + { + "name": "Gp0115678_Centrifuge TSV report", + "description": "Centrifuge TSV report for Gp0115678", + "data_object_type": "Centrifuge Classification Report", + "url": "https://data.microbiomedata.org/data/nmdc:mga026tn70/ReadbasedAnalysis/nmdc_mga026tn70_centrifuge_report.tsv", + "md5_checksum": "78dab6988b57c654462ef3dbeb64d8d6", + "id": "nmdc:78dab6988b57c654462ef3dbeb64d8d6", + "file_size_bytes": 264123 + }, + { + "name": "Gp0115678_Centrifuge Krona HTML report", + "description": "Centrifuge Krona HTML report for Gp0115678", + "data_object_type": "Centrifuge Krona Plot", + "url": "https://data.microbiomedata.org/data/nmdc:mga026tn70/ReadbasedAnalysis/nmdc_mga026tn70_centrifuge_krona.html", + "md5_checksum": "f9c01985f057825149d35de0650095a8", + "id": "nmdc:f9c01985f057825149d35de0650095a8", + "file_size_bytes": 2352347 + }, + { + "name": "Gp0115678_Kraken classification TSV report", + "description": "Kraken classification TSV report for Gp0115678", + "data_object_type": "Kraken2 Taxonomic Classification", + "url": "https://data.microbiomedata.org/data/nmdc:mga026tn70/ReadbasedAnalysis/nmdc_mga026tn70_kraken2_classification.tsv", + "md5_checksum": "bcea8bbe63625ad0f3142abe69a4a11d", + "id": "nmdc:bcea8bbe63625ad0f3142abe69a4a11d", + "file_size_bytes": 2880889483 + }, + { + "name": "Gp0115678_Kraken2 TSV report", + "description": "Kraken2 TSV report for Gp0115678", + "data_object_type": "Kraken2 Classification Report", + "url": "https://data.microbiomedata.org/data/nmdc:mga026tn70/ReadbasedAnalysis/nmdc_mga026tn70_kraken2_report.tsv", + "md5_checksum": "054c3097c9682bc9a6e07f88fdecc0ee", + "id": "nmdc:054c3097c9682bc9a6e07f88fdecc0ee", + "file_size_bytes": 735519 + }, + { + "name": "Gp0115678_Kraken2 Krona HTML report", + "description": "Kraken2 Krona HTML report for Gp0115678", + "data_object_type": "Kraken2 Krona Plot", + "url": "https://data.microbiomedata.org/data/nmdc:mga026tn70/ReadbasedAnalysis/nmdc_mga026tn70_kraken2_krona.html", + "md5_checksum": "38d41d4299141abe28bf0405af80cdfc", + "id": "nmdc:38d41d4299141abe28bf0405af80cdfc", + "file_size_bytes": 4410156 + }, + { + "name": "Gp0115678_Assembled contigs fasta", + "description": "Assembled contigs fasta for Gp0115678", + "data_object_type": "Assembly Contigs", + "url": "https://data.microbiomedata.org/data/nmdc:mga026tn70/assembly/nmdc_mga026tn70_contigs.fna", + "md5_checksum": "d305e212cce8f84f14561d3957c968b1", + "id": "nmdc:d305e212cce8f84f14561d3957c968b1", + "file_size_bytes": 205441595 + }, + { + "name": "Gp0115678_Assembled scaffold fasta", + "description": "Assembled scaffold fasta for Gp0115678", + "data_object_type": "Assembly Scaffolds", + "url": "https://data.microbiomedata.org/data/nmdc:mga026tn70/assembly/nmdc_mga026tn70_scaffolds.fna", + "md5_checksum": "fb12da7c2d6d1f9d9c7a1511702758bb", + "id": "nmdc:fb12da7c2d6d1f9d9c7a1511702758bb", + "file_size_bytes": 204286677 + }, + { + "name": "Gp0115678_Metagenome Contig Coverage Stats", + "description": "Metagenome Contig Coverage Stats for Gp0115678", + "url": "https://data.microbiomedata.org/data/nmdc:mga026tn70/assembly/nmdc_mga026tn70_covstats.txt", + "md5_checksum": "444562a4e7108077b7e541a5d9064086", + "id": "nmdc:444562a4e7108077b7e541a5d9064086", + "file_size_bytes": 30470067 + }, + { + "name": "Gp0115678_Assembled AGP file", + "description": "Assembled AGP file for Gp0115678", + "data_object_type": "Assembly AGP", + "url": "https://data.microbiomedata.org/data/nmdc:mga026tn70/assembly/nmdc_mga026tn70_assembly.agp", + "md5_checksum": "6c400425b7188b24ac49533d9ce0d43b", + "id": "nmdc:6c400425b7188b24ac49533d9ce0d43b", + "file_size_bytes": 28619270 + }, + { + "name": "Gp0115678_Metagenome Alignment BAM file", + "description": "Metagenome Alignment BAM file for Gp0115678", + "data_object_type": "Assembly Coverage BAM", + "url": "https://data.microbiomedata.org/data/nmdc:mga026tn70/assembly/nmdc_mga026tn70_pairedMapped_sorted.bam", + "md5_checksum": "1c63639a894aa686e77e57787fcafbc6", + "id": "nmdc:1c63639a894aa686e77e57787fcafbc6", + "file_size_bytes": 4471336607 + }, + { + "name": "Gp0115678_Protein FAA", + "description": "Protein FAA for Gp0115678", + "data_object_type": "Annotation Amino Acid FASTA", + "url": "https://data.microbiomedata.org/data/nmdc:mga026tn70/annotation/nmdc_mga026tn70_proteins.faa", + "md5_checksum": "ecfb1a4d469d9f95a91c8a3a3d5475af", + "id": "nmdc:ecfb1a4d469d9f95a91c8a3a3d5475af", + "file_size_bytes": 109377096 + }, + { + "name": "Gp0115678_Structural annotation GFF file", + "description": "Structural annotation GFF file for Gp0115678", + "data_object_type": "Structural Annotation GFF", + "url": "https://data.microbiomedata.org/data/nmdc:mga026tn70/annotation/nmdc_mga026tn70_structural_annotation.gff", + "md5_checksum": "4eeee677df10364f622a0d4789522c69", + "id": "nmdc:4eeee677df10364f622a0d4789522c69", + "file_size_bytes": 2533 + }, + { + "name": "Gp0115678_Functional annotation GFF file", + "description": "Functional annotation GFF file for Gp0115678", + "data_object_type": "Functional Annotation GFF", + "url": "https://data.microbiomedata.org/data/nmdc:mga026tn70/annotation/nmdc_mga026tn70_functional_annotation.gff", + "md5_checksum": "351ff91eddf2bc89acbdf04eab68aef1", + "id": "nmdc:351ff91eddf2bc89acbdf04eab68aef1", + "file_size_bytes": 118933051 + }, + { + "name": "Gp0115678_KO TSV file", + "description": "KO TSV file for Gp0115678", + "data_object_type": "Annotation KEGG Orthology", + "url": "https://data.microbiomedata.org/data/nmdc:mga026tn70/annotation/nmdc_mga026tn70_ko.tsv", + "md5_checksum": "64b9d934918b78de80f1cf80a013557f", + "id": "nmdc:64b9d934918b78de80f1cf80a013557f", + "file_size_bytes": 12839157 + }, + { + "name": "Gp0115678_EC TSV file", + "description": "EC TSV file for Gp0115678", + "data_object_type": "Annotation Enzyme Commission", + "url": "https://data.microbiomedata.org/data/nmdc:mga026tn70/annotation/nmdc_mga026tn70_ec.tsv", + "md5_checksum": "903f2015c41660ae53e16bfc369d566a", + "id": "nmdc:903f2015c41660ae53e16bfc369d566a", + "file_size_bytes": 8227424 + }, + { + "name": "Gp0115678_COG GFF file", + "description": "COG GFF file for Gp0115678", + "url": "https://data.microbiomedata.org/data/nmdc:mga026tn70/annotation/nmdc_mga026tn70_cog.gff", + "md5_checksum": "bf72ad74b2375abe730ecf7dc50b1557", + "id": "nmdc:bf72ad74b2375abe730ecf7dc50b1557", + "file_size_bytes": 57084923 + }, + { + "name": "Gp0115678_PFAM GFF file", + "description": "PFAM GFF file for Gp0115678", + "url": "https://data.microbiomedata.org/data/nmdc:mga026tn70/annotation/nmdc_mga026tn70_pfam.gff", + "md5_checksum": "92f4707b0b022c217463f76d229dd3cb", + "id": "nmdc:92f4707b0b022c217463f76d229dd3cb", + "file_size_bytes": 46625196 + }, + { + "name": "Gp0115678_TigrFam GFF file", + "description": "TigrFam GFF file for Gp0115678", + "url": "https://data.microbiomedata.org/data/nmdc:mga026tn70/annotation/nmdc_mga026tn70_tigrfam.gff", + "md5_checksum": "4f6f494c878aeff4308f2de2b2682ea6", + "id": "nmdc:4f6f494c878aeff4308f2de2b2682ea6", + "file_size_bytes": 5472483 + }, + { + "name": "Gp0115678_SMART GFF file", + "description": "SMART GFF file for Gp0115678", + "url": "https://data.microbiomedata.org/data/nmdc:mga026tn70/annotation/nmdc_mga026tn70_smart.gff", + "md5_checksum": "c44ff7df84f2b777b7fee22f7d28e205", + "id": "nmdc:c44ff7df84f2b777b7fee22f7d28e205", + "file_size_bytes": 18005129 + }, + { + "name": "Gp0115678_SuperFam GFF file", + "description": "SuperFam GFF file for Gp0115678", + "url": "https://data.microbiomedata.org/data/nmdc:mga026tn70/annotation/nmdc_mga026tn70_supfam.gff", + "md5_checksum": "b4fad8c887bc33c67a3316475ccc3572", + "id": "nmdc:b4fad8c887bc33c67a3316475ccc3572", + "file_size_bytes": 80713018 + }, + { + "name": "Gp0115678_Cath FunFam GFF file", + "description": "Cath FunFam GFF file for Gp0115678", + "url": "https://data.microbiomedata.org/data/nmdc:mga026tn70/annotation/nmdc_mga026tn70_cath_funfam.gff", + "md5_checksum": "4a3d00839e3067973b06771a31bbae93", + "id": "nmdc:4a3d00839e3067973b06771a31bbae93", + "file_size_bytes": 66327975 + }, + { + "name": "Gp0115678_KO_EC GFF file", + "description": "KO_EC GFF file for Gp0115678", + "url": "https://data.microbiomedata.org/data/nmdc:mga026tn70/annotation/nmdc_mga026tn70_ko_ec.gff", + "md5_checksum": "f01768e30cdd8f7650f631883d1c5d23", + "id": "nmdc:f01768e30cdd8f7650f631883d1c5d23", + "file_size_bytes": 40908900 + }, + { + "name": "gold:Gp0452679_metabat2 bin checkm quality assessment result", + "description": "metabat2 bin checkm quality assessment result for gold:Gp0452679", + "data_object_type": "CheckM Statistics", + "url": "https://data.microbiomedata.org/data/nmdc:mga0fsjy84/MAGs/nmdc_mga0fsjy84_checkm_qa.out", + "md5_checksum": "d41d8cd98f00b204e9800998ecf8427e", + "id": "nmdc:d41d8cd98f00b204e9800998ecf8427e", + "file_size_bytes": 0 + }, + { + "name": "Gp0115678_tooShort (< 3kb) filtered contigs fasta file by metaBat2", + "description": "tooShort (< 3kb) filtered contigs fasta file by metaBat2 for Gp0115678", + "url": "https://data.microbiomedata.org/data/nmdc:mga026tn70/MAGs/nmdc_mga026tn70_bins.tooShort.fa", + "md5_checksum": "cf2d0eb0281d2822373d4e7d25c8d1e6", + "id": "nmdc:cf2d0eb0281d2822373d4e7d25c8d1e6", + "file_size_bytes": 160811096 + }, + { + "name": "Gp0115678_unbinned fasta file from metabat2", + "description": "unbinned fasta file from metabat2 for Gp0115678", + "url": "https://data.microbiomedata.org/data/nmdc:mga026tn70/MAGs/nmdc_mga026tn70_bins.unbinned.fa", + "md5_checksum": "85defe7977c263b8fba3f31f89f101f9", + "id": "nmdc:85defe7977c263b8fba3f31f89f101f9", + "file_size_bytes": 31022166 + }, + { + "name": "Gp0115678_metabat2 bin checkm quality assessment result", + "description": "metabat2 bin checkm quality assessment result for Gp0115678", + "data_object_type": "CheckM Statistics", + "url": "https://data.microbiomedata.org/data/nmdc:mga026tn70/MAGs/nmdc_mga026tn70_checkm_qa.out", + "md5_checksum": "19a6a8410cece1118a06763023cc1313", + "id": "nmdc:19a6a8410cece1118a06763023cc1313", + "file_size_bytes": 1690 + }, + { + "name": "Gp0115678_high-quality and medium-quality bins", + "description": "high-quality and medium-quality bins for Gp0115678", + "data_object_type": "Metagenome Bins", + "url": "https://data.microbiomedata.org/data/nmdc:mga026tn70/MAGs/nmdc_mga026tn70_hqmq_bin.zip", + "md5_checksum": "54ed3f096ca7eacec9e5078ca45a6530", + "id": "nmdc:54ed3f096ca7eacec9e5078ca45a6530", + "file_size_bytes": 4026276 + }, + { + "name": "Gp0115678_metabat2 bins", + "description": "metabat2 bins for Gp0115678", + "url": "https://data.microbiomedata.org/data/nmdc:mga026tn70/MAGs/nmdc_mga026tn70_metabat_bin.zip", + "md5_checksum": "8493c05e428d90f8893e4c58755b2e95", + "id": "nmdc:8493c05e428d90f8893e4c58755b2e95", + "file_size_bytes": 72078 + }, + { + "_id": { + "$oid": "649b003d1ae706d7b5b14da5" + }, + "description": "Metagenome Alignment BAM file for gold:Gp0115678", + "url": "https://data.microbiomedata.org/data/1781_86093/assembly/pairedMapped_sorted.bam", + "file_size_bytes": 4408046431, + "type": "nmdc:DataObject", + "id": "nmdc:5d038f63644b03794d5a931f380bfd04", + "name": "pairedMapped_sorted.bam", + "data_object_type": "Assembly Coverage BAM" + }, + { + "_id": { + "$oid": "649b003d1ae706d7b5b14db7" + }, + "description": "Metagenome Contig Coverage Stats for gold:Gp0115678", + "url": "https://data.microbiomedata.org/data/1781_86093/assembly/mapping_stats.txt", + "file_size_bytes": 28551512, + "type": "nmdc:DataObject", + "id": "nmdc:56f166420a42acf12a021f3a66004127", + "name": "mapping_stats.txt", + "data_object_type": "Assembly Coverage Stats" + }, + { + "_id": { + "$oid": "649b003d1ae706d7b5b14db9" + }, + "description": "Assembled scaffold fasta for gold:Gp0115678", + "url": "https://data.microbiomedata.org/data/1781_86093/assembly/assembly_scaffolds.fna", + "file_size_bytes": 202369442, + "type": "nmdc:DataObject", + "id": "nmdc:eabfcbcc20b7c6b2732fab7d2ce8b44b", + "name": "assembly_scaffolds.fna", + "data_object_type": "Assembly Scaffolds" + }, + { + "_id": { + "$oid": "649b003d1ae706d7b5b14dbc" + }, + "description": "Assembled contigs fasta for gold:Gp0115678", + "url": "https://data.microbiomedata.org/data/1781_86093/assembly/assembly_contigs.fna", + "file_size_bytes": 203523040, + "type": "nmdc:DataObject", + "id": "nmdc:d1dee40a000226d9f2c8f4f05e0f85f1", + "name": "assembly_contigs.fna", + "data_object_type": "Assembly Contigs" + }, + { + "_id": { + "$oid": "649b003d1ae706d7b5b14dbe" + }, + "description": "Assembled AGP file for gold:Gp0115678", + "url": "https://data.microbiomedata.org/data/1781_86093/assembly/assembly.agp", + "file_size_bytes": 24779500, + "type": "nmdc:DataObject", + "id": "nmdc:ac56c44a98ebb58393634c4c2f83028d", + "name": "assembly.agp", + "data_object_type": "Assembly AGP" + }, + { + "_id": { + "$oid": "649b003d1ae706d7b5b15a00" + }, + "id": "nmdc:acb4672087a4cbe2f4e5a65dc291f70b", + "name": "1781_86093.krona.html", + "description": "Gold:Gp0115678 KRONA plot HTML file", + "url": "https://data.microbiomedata.org/1781_86093/ReadbasedAnalysis/centrifuge/1781_86093.krona.html", + "file_size_bytes": 3385, + "data_object_type": "Centrifuge Krona Plot", + "type": "nmdc:DataObject" + }, + { + "_id": { + "$oid": "649b003d1ae706d7b5b15a0a" + }, + "id": "nmdc:5334ea32a928a691d0be326a7a73ffe4", + "name": "1781_86093.json", + "description": "Gold:Gp0115678 ReadbasedAnalysis result JSON file", + "url": "https://data.microbiomedata.org/1781_86093/ReadbasedAnalysis/1781_86093.json", + "file_size_bytes": 3385, + "type": "nmdc:DataObject" + }, + { + "_id": { + "$oid": "649b003f1ae706d7b5b162de" + }, + "id": "nmdc:8576b99e74ec8da1f25e14a8c09c6815", + "name": "bins.unbinned.fa", + "description": "unbinned fasta file from metabat2 for gold:Gp0115678", + "file_size_bytes": 33583798, + "url": "https://data.microbiomedata.org/data/1781_86093/img_MAGs/bins.unbinned.fa", + "type": "nmdc:DataObject" + }, + { + "_id": { + "$oid": "649b003f1ae706d7b5b162df" + }, + "id": "nmdc:6afbb385aa127eb27cefb63eb516c8bc", + "name": "bins.tooShort.fa", + "description": "tooShort (< 3kb) filtered contigs fasta file by metaBat2 for gold:Gp0115678", + "file_size_bytes": 156176336, + "url": "https://data.microbiomedata.org/data/1781_86093/img_MAGs/bins.tooShort.fa", + "type": "nmdc:DataObject" + }, + { + "_id": { + "$oid": "649b003f1ae706d7b5b162e1" + }, + "id": "nmdc:127bab89a08a1eed165a2afbde8fedd9", + "name": "gtdbtk.bac120.summary.tsv", + "description": "gtdbtk bacterial assignment result summary table for gold:Gp0115678", + "file_size_bytes": 1861, + "url": "https://data.microbiomedata.org/data/1781_86093/img_MAGs/gtdbtk_output/classify/gtdbtk.bac120.summary.tsv", + "type": "nmdc:DataObject" + }, + { + "_id": { + "$oid": "649b003f1ae706d7b5b162e2" + }, + "id": "nmdc:8ead45679db7866273e97e259a27773f", + "name": "gold:Gp0115678.bins.7.fa", + "description": "metabat2 binned contig file for gold:Gp0115678", + "file_size_bytes": 2830025, + "url": "https://data.microbiomedata.org/data/1781_86093/img_MAGs/metabat-bins/bins.7.fa", + "type": "nmdc:DataObject", + "data_object_type": "Metagenome Bins" + }, + { + "_id": { + "$oid": "649b003f1ae706d7b5b162e3" + }, + "id": "nmdc:7c9a51eb968568c463672c6e4d0cbb0b", + "name": "gold:Gp0115678.bins.9.fa", + "description": "metabat2 binned contig file for gold:Gp0115678", + "file_size_bytes": 699704, + "url": "https://data.microbiomedata.org/data/1781_86093/img_MAGs/metabat-bins/bins.9.fa", + "type": "nmdc:DataObject", + "data_object_type": "Metagenome Bins" + }, + { + "_id": { + "$oid": "649b003f1ae706d7b5b162e4" + }, + "id": "nmdc:aedec76f4ab12cc4534fc2204677aa81", + "name": "gold:Gp0115678.bins.5.fa", + "description": "metabat2 binned contig file for gold:Gp0115678", + "file_size_bytes": 1797224, + "url": "https://data.microbiomedata.org/data/1781_86093/img_MAGs/metabat-bins/bins.5.fa", + "type": "nmdc:DataObject", + "data_object_type": "Metagenome Bins" + }, + { + "_id": { + "$oid": "649b003f1ae706d7b5b162e5" + }, + "id": "nmdc:d272087deef90faf8499f958061f818d", + "name": "checkm_qa.out", + "description": "metabat2 bin checkm quality assessment result for gold:Gp0115678", + "file_size_bytes": 2720, + "url": "https://data.microbiomedata.org/data/1781_86093/img_MAGs/checkm_qa.out", + "type": "nmdc:DataObject", + "data_object_type": "CheckM Statistics" + }, + { + "_id": { + "$oid": "649b003f1ae706d7b5b162e6" + }, + "id": "nmdc:b0b8774fb7f948606a1f4fa015e7f05e", + "name": "gold:Gp0115678.bins.3.fa", + "description": "metabat2 binned contig file for gold:Gp0115678", + "file_size_bytes": 866150, + "url": "https://data.microbiomedata.org/data/1781_86093/img_MAGs/metabat-bins/bins.3.fa", + "type": "nmdc:DataObject", + "data_object_type": "Metagenome Bins" + }, + { + "_id": { + "$oid": "649b003f1ae706d7b5b162e7" + }, + "id": "nmdc:ea9e5c76a6942b053a3b50ac9d56db97", + "name": "gold:Gp0115678.bins.11.fa", + "description": "metabat2 binned contig file for gold:Gp0115678", + "file_size_bytes": 672888, + "url": "https://data.microbiomedata.org/data/1781_86093/img_MAGs/metabat-bins/bins.11.fa", + "type": "nmdc:DataObject", + "data_object_type": "Metagenome Bins" + }, + { + "_id": { + "$oid": "649b003f1ae706d7b5b162e8" + }, + "id": "nmdc:33a8648962959be82a7140b07cb4eec5", + "name": "gold:Gp0115678.bins.2.fa", + "description": "metabat2 binned contig file for gold:Gp0115678", + "file_size_bytes": 245890, + "url": "https://data.microbiomedata.org/data/1781_86093/img_MAGs/metabat-bins/bins.2.fa", + "type": "nmdc:DataObject", + "data_object_type": "Metagenome Bins" + }, + { + "_id": { + "$oid": "649b003f1ae706d7b5b162e9" + }, + "id": "nmdc:a72a3f4d459210af63005c1438af24ca", + "name": "gold:Gp0115678.bins.12.fa", + "description": "metabat2 binned contig file for gold:Gp0115678", + "file_size_bytes": 232797, + "url": "https://data.microbiomedata.org/data/1781_86093/img_MAGs/metabat-bins/bins.12.fa", + "type": "nmdc:DataObject", + "data_object_type": "Metagenome Bins" + }, + { + "_id": { + "$oid": "649b003f1ae706d7b5b162ea" + }, + "id": "nmdc:1d650b2d0318de6359afd3393562f3a1", + "name": "gold:Gp0115678.bins.10.fa", + "description": "metabat2 binned contig file for gold:Gp0115678", + "file_size_bytes": 1157673, + "url": "https://data.microbiomedata.org/data/1781_86093/img_MAGs/metabat-bins/bins.10.fa", + "type": "nmdc:DataObject", + "data_object_type": "Metagenome Bins" + }, + { + "_id": { + "$oid": "649b003f1ae706d7b5b162eb" + }, + "id": "nmdc:3bb54fe9860b1a3c7ad831e2ba2d311e", + "name": "gold:Gp0115678.bins.1.fa", + "description": "metabat2 binned contig file for gold:Gp0115678", + "file_size_bytes": 307851, + "url": "https://data.microbiomedata.org/data/1781_86093/img_MAGs/metabat-bins/bins.1.fa", + "type": "nmdc:DataObject", + "data_object_type": "Metagenome Bins" + }, + { + "_id": { + "$oid": "649b003f1ae706d7b5b162ec" + }, + "id": "nmdc:5f7a9cc615e036a5f42b35abc88dda66", + "name": "gold:Gp0115678.bins.4.fa", + "description": "metabat2 binned contig file for gold:Gp0115678", + "file_size_bytes": 366506, + "url": "https://data.microbiomedata.org/data/1781_86093/img_MAGs/metabat-bins/bins.4.fa", + "type": "nmdc:DataObject", + "data_object_type": "Metagenome Bins" + }, + { + "_id": { + "$oid": "649b003f1ae706d7b5b162ee" + }, + "id": "nmdc:e581ce6782a654cf7528153e52a8c80f", + "name": "gold:Gp0115678.bins.8.fa", + "description": "metabat2 binned contig file for gold:Gp0115678", + "file_size_bytes": 439839, + "url": "https://data.microbiomedata.org/data/1781_86093/img_MAGs/metabat-bins/bins.8.fa", + "type": "nmdc:DataObject", + "data_object_type": "Metagenome Bins" + }, + { + "_id": { + "$oid": "649b003f1ae706d7b5b162f2" + }, + "id": "nmdc:de85ac00876e5ea0c61208d366b084b2", + "name": "gold:Gp0115678.bins.6.fa", + "description": "metabat2 binned contig file for gold:Gp0115678", + "file_size_bytes": 1192968, + "url": "https://data.microbiomedata.org/data/1781_86093/img_MAGs/metabat-bins/bins.6.fa", + "type": "nmdc:DataObject", + "data_object_type": "Metagenome Bins" + }, + { + "_id": { + "$oid": "649b00401ae706d7b5b16d6e" + }, + "description": "Protein FAA for gold:Gp0115678", + "url": "https://data.microbiomedata.org/1781_86093/img_annotation/Ga0482249_proteins.faa", + "md5_checksum": "78a99f435ce2bdd6cd83ebb807dc0ef3", + "file_size_bytes": 3385, + "id": "nmdc:78a99f435ce2bdd6cd83ebb807dc0ef3", + "name": "gold:Gp0115678_Protein FAA", + "data_object_type": "Annotation Amino Acid FASTA", + "type": "nmdc:DataObject" + }, + { + "_id": { + "$oid": "649b00401ae706d7b5b16d75" + }, + "description": "KO TSV File for gold:Gp0115678", + "url": "https://data.microbiomedata.org/1781_86093/img_annotation/Ga0482249_ko.tsv", + "md5_checksum": "cfe4a8ce52735eedacc38bacdc8785e4", + "file_size_bytes": 3385, + "id": "nmdc:cfe4a8ce52735eedacc38bacdc8785e4", + "name": "gold:Gp0115678_KO TSV File", + "data_object_type": "Annotation KEGG Orthology", + "type": "nmdc:DataObject" + }, + { + "_id": { + "$oid": "649b00401ae706d7b5b16d7b" + }, + "description": "Functional annotation GFF file for gold:Gp0115678", + "url": "https://data.microbiomedata.org/1781_86093/img_annotation/Ga0482249_functional_annotation.gff", + "md5_checksum": "6d1553b3e100a61f3b2b453fb7e71094", + "file_size_bytes": 3385, + "id": "nmdc:6d1553b3e100a61f3b2b453fb7e71094", + "name": "gold:Gp0115678_Functional annotation GFF file", + "data_object_type": "Functional Annotation GFF", + "type": "nmdc:DataObject" + }, + { + "_id": { + "$oid": "649b00401ae706d7b5b16d7c" + }, + "description": "EC TSV File for gold:Gp0115678", + "url": "https://data.microbiomedata.org/1781_86093/img_annotation/Ga0482249_ec.tsv", + "md5_checksum": "240064338b65f944556e88ebd44fbd03", + "file_size_bytes": 3385, + "id": "nmdc:240064338b65f944556e88ebd44fbd03", + "name": "gold:Gp0115678_EC TSV File", + "data_object_type": "Annotation Enzyme Commission", + "type": "nmdc:DataObject" + }, + { + "_id": { + "$oid": "649b00401ae706d7b5b16dc2" + }, + "description": "Structural annotation GFF file for gold:Gp0115678", + "url": "https://data.microbiomedata.org/1781_86093/img_annotation/Ga0482249_structural_annotation.gff", + "md5_checksum": "ac989404b8a9e07880788cfb061015ba", + "file_size_bytes": 3385, + "id": "nmdc:ac989404b8a9e07880788cfb061015ba", + "name": "gold:Gp0115678_Structural annotation GFF file", + "data_object_type": "Structural Annotation GFF", + "type": "nmdc:DataObject" + } + ], + "mags_activity_set": [ + { + "_id": { + "$oid": "649b0052ec087f6bbab3472b" + }, + "has_input": [ + "nmdc:d305e212cce8f84f14561d3957c968b1", + "nmdc:1c63639a894aa686e77e57787fcafbc6", + "nmdc:351ff91eddf2bc89acbdf04eab68aef1" + ], + "too_short_contig_num": 362617, + "part_of": [ + "nmdc:mga026tn70" + ], + "binned_contig_num": 2089, + "git_url": "https://github.com/microbiomedata/metaG/releases/tag/0.1", + "has_output": [ + "nmdc:d41d8cd98f00b204e9800998ecf8427e", + "nmdc:cf2d0eb0281d2822373d4e7d25c8d1e6", + "nmdc:85defe7977c263b8fba3f31f89f101f9", + "nmdc:19a6a8410cece1118a06763023cc1313", + "nmdc:54ed3f096ca7eacec9e5078ca45a6530", + "nmdc:8493c05e428d90f8893e4c58755b2e95" + ], + "was_informed_by": "gold:Gp0115678", + "input_contig_num": 383711, + "id": "nmdc:88c0fc9b5b0bb0cd814448bbfba0da6a", + "execution_resource": "NERSC-Cori", + "name": "MAGs Analysis Activity for nmdc:mga026tn70", + "mags_list": [ + { + "number_of_contig": 5, + "completeness": 0.31, + "bin_name": "bins.1", + "gene_count": 264, + "bin_quality": "LQ", + "gtdbtk_species": "", + "gtdbtk_order": "", + "num_16s": 0, + "gtdbtk_family": "", + "gtdbtk_domain": "", + "contamination": 0.0, + "gtdbtk_class": "", + "gtdbtk_phylum": "", + "num_5s": 0, + "num_23s": 0, + "gtdbtk_genus": "", + "num_t_rna": 0 + }, + { + "number_of_contig": 231, + "completeness": 50.86, + "bin_name": "bins.2", + "gene_count": 1187, + "bin_quality": "MQ", + "gtdbtk_species": "", + "gtdbtk_order": "Pseudomonadales", + "num_16s": 0, + "gtdbtk_family": "UBA3067", + "gtdbtk_domain": "Bacteria", + "contamination": 0.86, + "gtdbtk_class": "Gammaproteobacteria", + "gtdbtk_phylum": "Proteobacteria", + "num_5s": 1, + "num_23s": 0, + "gtdbtk_genus": "UBA3067", + "num_t_rna": 19 + }, + { + "number_of_contig": 675, + "completeness": 74.51, + "bin_name": "bins.3", + "gene_count": 4479, + "bin_quality": "MQ", + "gtdbtk_species": "", + "gtdbtk_order": "Burkholderiales", + "num_16s": 1, + "gtdbtk_family": "Burkholderiaceae", + "gtdbtk_domain": "Bacteria", + "contamination": 1.06, + "gtdbtk_class": "Gammaproteobacteria", + "gtdbtk_phylum": "Proteobacteria", + "num_5s": 1, + "num_23s": 1, + "gtdbtk_genus": "Rhizobacter", + "num_t_rna": 37 + }, + { + "number_of_contig": 314, + "completeness": 64.35, + "bin_name": "bins.4", + "gene_count": 1988, + "bin_quality": "MQ", + "gtdbtk_species": "", + "gtdbtk_order": "Sphingomonadales", + "num_16s": 0, + "gtdbtk_family": "Sphingomonadaceae", + "gtdbtk_domain": "Bacteria", + "contamination": 0.48, + "gtdbtk_class": "Alphaproteobacteria", + "gtdbtk_phylum": "Proteobacteria", + "num_5s": 0, + "num_23s": 0, + "gtdbtk_genus": "Ga0077559", + "num_t_rna": 24 + }, + { + "number_of_contig": 574, + "completeness": 73.7, + "bin_name": "bins.5", + "gene_count": 3601, + "bin_quality": "MQ", + "gtdbtk_species": "", + "gtdbtk_order": "Burkholderiales", + "num_16s": 0, + "gtdbtk_family": "Burkholderiaceae", + "gtdbtk_domain": "Bacteria", + "contamination": 1.88, + "gtdbtk_class": "Gammaproteobacteria", + "gtdbtk_phylum": "Proteobacteria", + "num_5s": 0, + "num_23s": 0, + "gtdbtk_genus": "Aquabacterium", + "num_t_rna": 32 + }, + { + "number_of_contig": 290, + "completeness": 91.23, + "bin_name": "bins.6", + "gene_count": 3090, + "bin_quality": "MQ", + "gtdbtk_species": "", + "gtdbtk_order": "Pseudomonadales", + "num_16s": 0, + "gtdbtk_family": "Moraxellaceae", + "gtdbtk_domain": "Bacteria", + "contamination": 0.0, + "gtdbtk_class": "Gammaproteobacteria", + "gtdbtk_phylum": "Proteobacteria", + "num_5s": 0, + "num_23s": 0, + "gtdbtk_genus": "", + "num_t_rna": 32 + } + ], + "unbinned_contig_num": 19005, + "started_at_time": "2021-10-11T02:23:30Z", + "type": "nmdc:MAGsAnalysisActivity", + "ended_at_time": "2021-10-11T06:18:17+00:00" + } + ], + "metagenome_annotation_activity_set": [ + { + "_id": { + "$oid": "649b005bbf2caae0415ef9c8" + }, + "has_input": [ + "nmdc:d305e212cce8f84f14561d3957c968b1" + ], + "part_of": [ + "nmdc:mga026tn70" + ], + "git_url": "https://github.com/microbiomedata/metaG/releases/tag/0.1", + "has_output": [ + "nmdc:ecfb1a4d469d9f95a91c8a3a3d5475af", + "nmdc:4eeee677df10364f622a0d4789522c69", + "nmdc:351ff91eddf2bc89acbdf04eab68aef1", + "nmdc:64b9d934918b78de80f1cf80a013557f", + "nmdc:903f2015c41660ae53e16bfc369d566a", + "nmdc:bf72ad74b2375abe730ecf7dc50b1557", + "nmdc:92f4707b0b022c217463f76d229dd3cb", + "nmdc:4f6f494c878aeff4308f2de2b2682ea6", + "nmdc:c44ff7df84f2b777b7fee22f7d28e205", + "nmdc:b4fad8c887bc33c67a3316475ccc3572", + "nmdc:4a3d00839e3067973b06771a31bbae93", + "nmdc:f01768e30cdd8f7650f631883d1c5d23" + ], + "was_informed_by": "gold:Gp0115678", + "id": "nmdc:88c0fc9b5b0bb0cd814448bbfba0da6a", + "execution_resource": "NERSC-Cori", + "name": "Annotation Activity for nmdc:mga026tn70", + "started_at_time": "2021-10-11T02:23:30Z", + "type": "nmdc:MetagenomeAnnotationActivity", + "ended_at_time": "2021-10-11T06:18:17+00:00" + } + ], + "metagenome_assembly_set": [ + { + "_id": { + "$oid": "649b005f2ca5ee4adb139fb5" + }, + "has_input": [ + "nmdc:e0ce93b88419f87568ff206e0efe3a24" + ], + "part_of": [ + "nmdc:mga026tn70" + ], + "ctg_logsum": 494917, + "scaf_logsum": 496628, + "gap_pct": 0.00163, + "git_url": "https://github.com/microbiomedata/metaG/releases/tag/0.1", + "has_output": [ + "nmdc:d305e212cce8f84f14561d3957c968b1", + "nmdc:fb12da7c2d6d1f9d9c7a1511702758bb", + "nmdc:444562a4e7108077b7e541a5d9064086", + "nmdc:6c400425b7188b24ac49533d9ce0d43b", + "nmdc:1c63639a894aa686e77e57787fcafbc6" + ], + "asm_score": 7.785, + "was_informed_by": "gold:Gp0115678", + "ctg_powsum": 57423, + "scaf_max": 116556, + "id": "nmdc:88c0fc9b5b0bb0cd814448bbfba0da6a", + "scaf_powsum": 57689, + "execution_resource": "NERSC-Cori", + "contigs": 383712, + "name": "Assembly Activity for nmdc:mga026tn70", + "ctg_max": 116556, + "gc_std": 0.13426, + "contig_bp": 190310453, + "gc_avg": 0.48844, + "started_at_time": "2021-10-11T02:23:30Z", + "scaf_bp": 190313553, + "type": "nmdc:MetagenomeAssembly", + "scaffolds": 383447, + "ended_at_time": "2021-10-11T06:18:17+00:00", + "ctg_l50": 474, + "ctg_l90": 290, + "ctg_n50": 102228, + "ctg_n90": 321321, + "scaf_l50": 474, + "scaf_l90": 290, + "scaf_n50": 102177, + "scaf_n90": 321076, + "scaf_l_gt50k": 453691, + "scaf_n_gt50k": 6, + "scaf_pct_gt50k": 0.23839132 + } + ], + "omics_processing_set": [ + { + "_id": { + "$oid": "649b009773e8249959349b39" + }, + "id": "nmdc:omprc-11-7vsv7h78", + "name": "Sand microcosm microbial communities from a hyporheic zone in Columbia River, Washington, USA - GW-RW T4_30-Apr-14", + "description": "Sterilized sand packs were incubated back in the ground and collected at time point T4.", + "has_input": [ + "nmdc:bsm-11-j0wbx741" + ], + "has_output": [ + "jgi:55f23d790d8785306f96497e" + ], + "part_of": [ + "nmdc:sty-11-aygzgv51" + ], + "add_date": "2015-05-28", + "mod_date": "2021-06-15", + "ncbi_project_name": "Sand microcosm microbial communities from a hyporheic zone in Columbia River, Washington, USA - GW-RW T4_30-Apr-14", + "omics_type": { + "has_raw_value": "Metagenome" + }, + "principal_investigator": { + "has_raw_value": "James Stegen" + }, + "processing_institution": "JGI", + "type": "nmdc:OmicsProcessing", + "gold_sequencing_project_identifiers": [ + "gold:Gp0115678" + ] + } + ], + "read_qc_analysis_activity_set": [ + { + "_id": { + "$oid": "649b009d6bdd4fd20273c881" + }, + "has_input": [ + "nmdc:0e6219b7901669483a0a0386cfc01f93" + ], + "part_of": [ + "nmdc:mga026tn70" + ], + "git_url": "https://github.com/microbiomedata/metaG/releases/tag/0.1", + "has_output": [ + "nmdc:e0ce93b88419f87568ff206e0efe3a24", + "nmdc:7bf8ff4cf0d98cccd8e1c20f77dd1690" + ], + "was_informed_by": "gold:Gp0115678", + "input_read_count": 51286688, + "output_read_bases": 7231449575, + "id": "nmdc:88c0fc9b5b0bb0cd814448bbfba0da6a", + "execution_resource": "NERSC-Cori", + "input_read_bases": 7744289888, + "name": "Read QC Activity for nmdc:mga026tn70", + "output_read_count": 48276864, + "started_at_time": "2021-10-11T02:23:30Z", + "type": "nmdc:ReadQCAnalysisActivity", + "ended_at_time": "2021-10-11T06:18:17+00:00" + } + ], + "read_based_taxonomy_analysis_activity_set": [ + { + "_id": { + "$oid": "649b009bff710ae353f8cf49" + }, + "has_input": [ + "nmdc:e0ce93b88419f87568ff206e0efe3a24" + ], + "git_url": "https://github.com/microbiomedata/metaG/releases/tag/0.1", + "has_output": [ + "nmdc:05bab80e2ff02d160b8e808f056ee2b5", + "nmdc:12b2d6afc355bce76249d750a9fab534", + "nmdc:18214017d56658a48723c9c998dcba7e", + "nmdc:99ef009c73c128e561a4b9dcb70d7ff2", + "nmdc:78dab6988b57c654462ef3dbeb64d8d6", + "nmdc:f9c01985f057825149d35de0650095a8", + "nmdc:bcea8bbe63625ad0f3142abe69a4a11d", + "nmdc:054c3097c9682bc9a6e07f88fdecc0ee", + "nmdc:38d41d4299141abe28bf0405af80cdfc" + ], + "was_informed_by": "gold:Gp0115678", + "id": "nmdc:88c0fc9b5b0bb0cd814448bbfba0da6a", + "execution_resource": "NERSC-Cori", + "name": "ReadBased Analysis Activity for nmdc:mga026tn70", + "started_at_time": "2021-10-11T02:23:30Z", + "type": "nmdc:ReadbasedAnalysis", + "ended_at_time": "2021-10-11T06:18:17+00:00" + } + ] + }, + { + "data_object_set": [ + { + "description": "Raw sequencer read data", + "file_size_bytes": 2106076506, + "type": "nmdc:DataObject", + "id": "jgi:574fde547ded5e3df1ee13fa", + "name": "10533.1.165310.GAGCTCA-TTGAGCT.fastq.gz" + }, + { + "name": "Gp0127623_Filtered Reads", + "description": "Filtered Reads for Gp0127623", + "data_object_type": "Filtered Sequencing Reads", + "url": "https://data.microbiomedata.org/data/nmdc:mga03eyz63/qa/nmdc_mga03eyz63_filtered.fastq.gz", + "md5_checksum": "6a8409b21c45ba9feba873ec269c8ff7", + "id": "nmdc:6a8409b21c45ba9feba873ec269c8ff7", + "file_size_bytes": 1917552858 + }, + { + "name": "Gp0127623_Filtered Stats", + "description": "Filtered Stats for Gp0127623", + "data_object_type": "QC Statistics", + "url": "https://data.microbiomedata.org/data/nmdc:mga03eyz63/qa/nmdc_mga03eyz63_filterStats.txt", + "md5_checksum": "61fb06de10fe3a0c49c5afe14ab7fb32", + "id": "nmdc:61fb06de10fe3a0c49c5afe14ab7fb32", + "file_size_bytes": 283 + }, + { + "name": "Gp0127623_Gottcha2 TSV report", + "description": "Gottcha2 TSV report for Gp0127623", + "url": "https://data.microbiomedata.org/data/nmdc:mga03eyz63/ReadbasedAnalysis/nmdc_mga03eyz63_gottcha2_report.tsv", + "md5_checksum": "ac39e916e17e08a845bb40d97519d8be", + "id": "nmdc:ac39e916e17e08a845bb40d97519d8be", + "file_size_bytes": 1553 + }, + { + "name": "Gp0127623_Gottcha2 full TSV report", + "description": "Gottcha2 full TSV report for Gp0127623", + "url": "https://data.microbiomedata.org/data/nmdc:mga03eyz63/ReadbasedAnalysis/nmdc_mga03eyz63_gottcha2_report_full.tsv", + "md5_checksum": "c6fd5c573ef8605d9b43ff9c698af423", + "id": "nmdc:c6fd5c573ef8605d9b43ff9c698af423", + "file_size_bytes": 836575 + }, + { + "name": "Gp0127623_Gottcha2 Krona HTML report", + "description": "Gottcha2 Krona HTML report for Gp0127623", + "data_object_type": "GOTTCHA2 Krona Plot", + "url": "https://data.microbiomedata.org/data/nmdc:mga03eyz63/ReadbasedAnalysis/nmdc_mga03eyz63_gottcha2_krona.html", + "md5_checksum": "eda0c04d692ecf137585676c15924626", + "id": "nmdc:eda0c04d692ecf137585676c15924626", + "file_size_bytes": 231097 + }, + { + "name": "Gp0127623_Centrifuge classification TSV report", + "description": "Centrifuge classification TSV report for Gp0127623", + "data_object_type": "Centrifuge Taxonomic Classification", + "url": "https://data.microbiomedata.org/data/nmdc:mga03eyz63/ReadbasedAnalysis/nmdc_mga03eyz63_centrifuge_classification.tsv", + "md5_checksum": "d9ea063be9ab8ea102c1e2ec2fa9f177", + "id": "nmdc:d9ea063be9ab8ea102c1e2ec2fa9f177", + "file_size_bytes": 1669254765 + }, + { + "name": "Gp0127623_Centrifuge TSV report", + "description": "Centrifuge TSV report for Gp0127623", + "data_object_type": "Centrifuge Classification Report", + "url": "https://data.microbiomedata.org/data/nmdc:mga03eyz63/ReadbasedAnalysis/nmdc_mga03eyz63_centrifuge_report.tsv", + "md5_checksum": "e1f164c534830cd628d67c564ace863b", + "id": "nmdc:e1f164c534830cd628d67c564ace863b", + "file_size_bytes": 255784 + }, + { + "name": "Gp0127623_Centrifuge Krona HTML report", + "description": "Centrifuge Krona HTML report for Gp0127623", + "data_object_type": "Centrifuge Krona Plot", + "url": "https://data.microbiomedata.org/data/nmdc:mga03eyz63/ReadbasedAnalysis/nmdc_mga03eyz63_centrifuge_krona.html", + "md5_checksum": "a1062576d998b7b82e39b8d8520fa37e", + "id": "nmdc:a1062576d998b7b82e39b8d8520fa37e", + "file_size_bytes": 2333760 + }, + { + "name": "Gp0127623_Kraken classification TSV report", + "description": "Kraken classification TSV report for Gp0127623", + "data_object_type": "Kraken2 Taxonomic Classification", + "url": "https://data.microbiomedata.org/data/nmdc:mga03eyz63/ReadbasedAnalysis/nmdc_mga03eyz63_kraken2_classification.tsv", + "md5_checksum": "040e6ca695283a12711c16344acd1e76", + "id": "nmdc:040e6ca695283a12711c16344acd1e76", + "file_size_bytes": 1335651191 + }, + { + "name": "Gp0127623_Kraken2 TSV report", + "description": "Kraken2 TSV report for Gp0127623", + "data_object_type": "Kraken2 Classification Report", + "url": "https://data.microbiomedata.org/data/nmdc:mga03eyz63/ReadbasedAnalysis/nmdc_mga03eyz63_kraken2_report.tsv", + "md5_checksum": "ed4ced0ccbe3f6b34c35bd842e882cad", + "id": "nmdc:ed4ced0ccbe3f6b34c35bd842e882cad", + "file_size_bytes": 647609 + }, + { + "name": "Gp0127623_Kraken2 Krona HTML report", + "description": "Kraken2 Krona HTML report for Gp0127623", + "data_object_type": "Kraken2 Krona Plot", + "url": "https://data.microbiomedata.org/data/nmdc:mga03eyz63/ReadbasedAnalysis/nmdc_mga03eyz63_kraken2_krona.html", + "md5_checksum": "f2eed9669268f69dbc31f0c4f839fccf", + "id": "nmdc:f2eed9669268f69dbc31f0c4f839fccf", + "file_size_bytes": 3949449 + }, + { + "name": "Gp0127623_Assembled contigs fasta", + "description": "Assembled contigs fasta for Gp0127623", + "data_object_type": "Assembly Contigs", + "url": "https://data.microbiomedata.org/data/nmdc:mga03eyz63/assembly/nmdc_mga03eyz63_contigs.fna", + "md5_checksum": "3373ef564b5b97fa472dc8f2c2277dbc", + "id": "nmdc:3373ef564b5b97fa472dc8f2c2277dbc", + "file_size_bytes": 55220158 + }, + { + "name": "Gp0127623_Assembled scaffold fasta", + "description": "Assembled scaffold fasta for Gp0127623", + "data_object_type": "Assembly Scaffolds", + "url": "https://data.microbiomedata.org/data/nmdc:mga03eyz63/assembly/nmdc_mga03eyz63_scaffolds.fna", + "md5_checksum": "a0377bb7d752e66b754753fcefb5005a", + "id": "nmdc:a0377bb7d752e66b754753fcefb5005a", + "file_size_bytes": 54864386 + }, + { + "name": "Gp0127623_Metagenome Contig Coverage Stats", + "description": "Metagenome Contig Coverage Stats for Gp0127623", + "url": "https://data.microbiomedata.org/data/nmdc:mga03eyz63/assembly/nmdc_mga03eyz63_covstats.txt", + "md5_checksum": "081017d0d9e68a999c245618eb907c08", + "id": "nmdc:081017d0d9e68a999c245618eb907c08", + "file_size_bytes": 9321875 + }, + { + "name": "Gp0127623_Assembled AGP file", + "description": "Assembled AGP file for Gp0127623", + "data_object_type": "Assembly AGP", + "url": "https://data.microbiomedata.org/data/nmdc:mga03eyz63/assembly/nmdc_mga03eyz63_assembly.agp", + "md5_checksum": "4a6ed00a6c2156c142d7bbec6baa36b5", + "id": "nmdc:4a6ed00a6c2156c142d7bbec6baa36b5", + "file_size_bytes": 8670291 + }, + { + "name": "Gp0127623_Metagenome Alignment BAM file", + "description": "Metagenome Alignment BAM file for Gp0127623", + "data_object_type": "Assembly Coverage BAM", + "url": "https://data.microbiomedata.org/data/nmdc:mga03eyz63/assembly/nmdc_mga03eyz63_pairedMapped_sorted.bam", + "md5_checksum": "21fb280328baf81e8135733eaf440b66", + "id": "nmdc:21fb280328baf81e8135733eaf440b66", + "file_size_bytes": 2062412797 + }, + { + "name": "Gp0127623_Protein FAA", + "description": "Protein FAA for Gp0127623", + "data_object_type": "Annotation Amino Acid FASTA", + "url": "https://data.microbiomedata.org/data/nmdc:mga03eyz63/annotation/nmdc_mga03eyz63_proteins.faa", + "md5_checksum": "8ac52d00bad1f9349da2acde572006b6", + "id": "nmdc:8ac52d00bad1f9349da2acde572006b6", + "file_size_bytes": 32224726 + }, + { + "name": "Gp0127623_Structural annotation GFF file", + "description": "Structural annotation GFF file for Gp0127623", + "data_object_type": "Structural Annotation GFF", + "url": "https://data.microbiomedata.org/data/nmdc:mga03eyz63/annotation/nmdc_mga03eyz63_structural_annotation.gff", + "md5_checksum": "9dd5eb06fe24f63d5012e34e364a580c", + "id": "nmdc:9dd5eb06fe24f63d5012e34e364a580c", + "file_size_bytes": 2512 + }, + { + "name": "Gp0127623_Functional annotation GFF file", + "description": "Functional annotation GFF file for Gp0127623", + "data_object_type": "Functional Annotation GFF", + "url": "https://data.microbiomedata.org/data/nmdc:mga03eyz63/annotation/nmdc_mga03eyz63_functional_annotation.gff", + "md5_checksum": "05107e0217e199d7b0cd571db88f7d09", + "id": "nmdc:05107e0217e199d7b0cd571db88f7d09", + "file_size_bytes": 37779373 + }, + { + "name": "Gp0127623_KO TSV file", + "description": "KO TSV file for Gp0127623", + "data_object_type": "Annotation KEGG Orthology", + "url": "https://data.microbiomedata.org/data/nmdc:mga03eyz63/annotation/nmdc_mga03eyz63_ko.tsv", + "md5_checksum": "02ffcaeeb9a73edea47ba3671396026a", + "id": "nmdc:02ffcaeeb9a73edea47ba3671396026a", + "file_size_bytes": 4343179 + }, + { + "name": "Gp0127623_EC TSV file", + "description": "EC TSV file for Gp0127623", + "data_object_type": "Annotation Enzyme Commission", + "url": "https://data.microbiomedata.org/data/nmdc:mga03eyz63/annotation/nmdc_mga03eyz63_ec.tsv", + "md5_checksum": "b9b4ccafc50787f86ef03680eb23848d", + "id": "nmdc:b9b4ccafc50787f86ef03680eb23848d", + "file_size_bytes": 2966454 + }, + { + "name": "Gp0127623_COG GFF file", + "description": "COG GFF file for Gp0127623", + "url": "https://data.microbiomedata.org/data/nmdc:mga03eyz63/annotation/nmdc_mga03eyz63_cog.gff", + "md5_checksum": "fbd178d9c302b841e3fde3ab9acd8160", + "id": "nmdc:fbd178d9c302b841e3fde3ab9acd8160", + "file_size_bytes": 22023330 + }, + { + "name": "Gp0127623_PFAM GFF file", + "description": "PFAM GFF file for Gp0127623", + "url": "https://data.microbiomedata.org/data/nmdc:mga03eyz63/annotation/nmdc_mga03eyz63_pfam.gff", + "md5_checksum": "1bcc35e753e7dad78ef8ae4989eb901a", + "id": "nmdc:1bcc35e753e7dad78ef8ae4989eb901a", + "file_size_bytes": 15956001 + }, + { + "name": "Gp0127623_TigrFam GFF file", + "description": "TigrFam GFF file for Gp0127623", + "url": "https://data.microbiomedata.org/data/nmdc:mga03eyz63/annotation/nmdc_mga03eyz63_tigrfam.gff", + "md5_checksum": "f6d6d2ea3c539560ad30bbd6df8bc71a", + "id": "nmdc:f6d6d2ea3c539560ad30bbd6df8bc71a", + "file_size_bytes": 1656727 + }, + { + "name": "Gp0127623_SMART GFF file", + "description": "SMART GFF file for Gp0127623", + "url": "https://data.microbiomedata.org/data/nmdc:mga03eyz63/annotation/nmdc_mga03eyz63_smart.gff", + "md5_checksum": "45536a48cef31f2c3870c7bacb3d785a", + "id": "nmdc:45536a48cef31f2c3870c7bacb3d785a", + "file_size_bytes": 4731416 + }, + { + "name": "Gp0127623_SuperFam GFF file", + "description": "SuperFam GFF file for Gp0127623", + "url": "https://data.microbiomedata.org/data/nmdc:mga03eyz63/annotation/nmdc_mga03eyz63_supfam.gff", + "md5_checksum": "a52d057d005504857f82bcf661dd7676", + "id": "nmdc:a52d057d005504857f82bcf661dd7676", + "file_size_bytes": 27616681 + }, + { + "name": "Gp0127623_Cath FunFam GFF file", + "description": "Cath FunFam GFF file for Gp0127623", + "url": "https://data.microbiomedata.org/data/nmdc:mga03eyz63/annotation/nmdc_mga03eyz63_cath_funfam.gff", + "md5_checksum": "b92cb96900a31a3c70ccf9cfe45f02c3", + "id": "nmdc:b92cb96900a31a3c70ccf9cfe45f02c3", + "file_size_bytes": 20817140 + }, + { + "name": "Gp0127623_KO_EC GFF file", + "description": "KO_EC GFF file for Gp0127623", + "url": "https://data.microbiomedata.org/data/nmdc:mga03eyz63/annotation/nmdc_mga03eyz63_ko_ec.gff", + "md5_checksum": "32eca4cab8525b09cf1b0ed2353f9278", + "id": "nmdc:32eca4cab8525b09cf1b0ed2353f9278", + "file_size_bytes": 13827629 + }, + { + "name": "gold:Gp0452679_metabat2 bin checkm quality assessment result", + "description": "metabat2 bin checkm quality assessment result for gold:Gp0452679", + "data_object_type": "CheckM Statistics", + "url": "https://data.microbiomedata.org/data/nmdc:mga0fsjy84/MAGs/nmdc_mga0fsjy84_checkm_qa.out", + "md5_checksum": "d41d8cd98f00b204e9800998ecf8427e", + "id": "nmdc:d41d8cd98f00b204e9800998ecf8427e", + "file_size_bytes": 0 + }, + { + "name": "Gp0127623_tooShort (< 3kb) filtered contigs fasta file by metaBat2", + "description": "tooShort (< 3kb) filtered contigs fasta file by metaBat2 for Gp0127623", + "url": "https://data.microbiomedata.org/data/nmdc:mga03eyz63/MAGs/nmdc_mga03eyz63_bins.tooShort.fa", + "md5_checksum": "e63c76f92bc0ae95dfc238c099296e91", + "id": "nmdc:e63c76f92bc0ae95dfc238c099296e91", + "file_size_bytes": 48421824 + }, + { + "name": "Gp0127623_unbinned fasta file from metabat2", + "description": "unbinned fasta file from metabat2 for Gp0127623", + "url": "https://data.microbiomedata.org/data/nmdc:mga03eyz63/MAGs/nmdc_mga03eyz63_bins.unbinned.fa", + "md5_checksum": "3dfba77d38712870f8c415203f991496", + "id": "nmdc:3dfba77d38712870f8c415203f991496", + "file_size_bytes": 6028115 + }, + { + "name": "Gp0127623_metabat2 bin checkm quality assessment result", + "description": "metabat2 bin checkm quality assessment result for Gp0127623", + "data_object_type": "CheckM Statistics", + "url": "https://data.microbiomedata.org/data/nmdc:mga03eyz63/MAGs/nmdc_mga03eyz63_checkm_qa.out", + "md5_checksum": "5e98d27533164fdf67c07cc224090547", + "id": "nmdc:5e98d27533164fdf67c07cc224090547", + "file_size_bytes": 765 + }, + { + "name": "Gp0127623_high-quality and medium-quality bins", + "description": "high-quality and medium-quality bins for Gp0127623", + "data_object_type": "Metagenome Bins", + "url": "https://data.microbiomedata.org/data/nmdc:mga03eyz63/MAGs/nmdc_mga03eyz63_hqmq_bin.zip", + "md5_checksum": "bfbe3e3a21e8a089c4c7a0d945c79b7b", + "id": "nmdc:bfbe3e3a21e8a089c4c7a0d945c79b7b", + "file_size_bytes": 182 + }, + { + "name": "Gp0127623_metabat2 bins", + "description": "metabat2 bins for Gp0127623", + "url": "https://data.microbiomedata.org/data/nmdc:mga03eyz63/MAGs/nmdc_mga03eyz63_metabat_bin.zip", + "md5_checksum": "c70853ef1a6ab162b85df5215a76666b", + "id": "nmdc:c70853ef1a6ab162b85df5215a76666b", + "file_size_bytes": 236177 + }, + { + "_id": { + "$oid": "649b003d1ae706d7b5b14e44" + }, + "description": "Assembled AGP file for gold:Gp0127623", + "url": "https://data.microbiomedata.org/data/1781_100325/assembly/assembly.agp", + "file_size_bytes": 7722651, + "type": "nmdc:DataObject", + "id": "nmdc:157fde8313174776bf9fd98b41c53aae", + "name": "assembly.agp", + "data_object_type": "Assembly AGP" + }, + { + "_id": { + "$oid": "649b003d1ae706d7b5b14e45" + }, + "description": "Assembled scaffold fasta for gold:Gp0127623", + "url": "https://data.microbiomedata.org/data/1781_100325/assembly/assembly_scaffolds.fna", + "file_size_bytes": 54390822, + "type": "nmdc:DataObject", + "id": "nmdc:c654cffcafc3b8bed2acfdf8e2dc2f3b", + "name": "assembly_scaffolds.fna", + "data_object_type": "Assembly Scaffolds" + }, + { + "_id": { + "$oid": "649b003d1ae706d7b5b14e46" + }, + "description": "Metagenome Alignment BAM file for gold:Gp0127623", + "url": "https://data.microbiomedata.org/data/1781_100325/assembly/pairedMapped_sorted.bam", + "file_size_bytes": 2033548061, + "type": "nmdc:DataObject", + "id": "nmdc:d41f2a097f2cb6f9d6c8378f203cc565", + "name": "pairedMapped_sorted.bam", + "data_object_type": "Assembly Coverage BAM" + }, + { + "_id": { + "$oid": "649b003d1ae706d7b5b14e47" + }, + "description": "Assembled contigs fasta for gold:Gp0127623", + "url": "https://data.microbiomedata.org/data/1781_100325/assembly/assembly_contigs.fna", + "file_size_bytes": 54746466, + "type": "nmdc:DataObject", + "id": "nmdc:9339ba4d7b731220024b995f87ddc5e1", + "name": "assembly_contigs.fna", + "data_object_type": "Assembly Contigs" + }, + { + "_id": { + "$oid": "649b003d1ae706d7b5b14e48" + }, + "description": "Metagenome Contig Coverage Stats for gold:Gp0127623", + "url": "https://data.microbiomedata.org/data/1781_100325/assembly/mapping_stats.txt", + "file_size_bytes": 8848183, + "type": "nmdc:DataObject", + "id": "nmdc:f283aad4ed4b528d3ca14bb8fbd8abcd", + "name": "mapping_stats.txt", + "data_object_type": "Assembly Coverage Stats" + }, + { + "_id": { + "$oid": "649b003d1ae706d7b5b15ae0" + }, + "id": "nmdc:6dbd96624464bcccba0269cd46f59c1f", + "name": "1781_100325.krona.html", + "description": "Gold:Gp0127623 KRONA plot HTML file", + "url": "https://data.microbiomedata.org/1781_100325/ReadbasedAnalysis/centrifuge/1781_100325.krona.html", + "file_size_bytes": 3385, + "data_object_type": "Centrifuge Krona Plot", + "type": "nmdc:DataObject" + }, + { + "_id": { + "$oid": "649b003d1ae706d7b5b15aea" + }, + "id": "nmdc:91dc3ab40d04608ca5f5a30baa2d48b5", + "name": "1781_100325.json", + "description": "Gold:Gp0127623 ReadbasedAnalysis result JSON file", + "url": "https://data.microbiomedata.org/1781_100325/ReadbasedAnalysis/1781_100325.json", + "file_size_bytes": 3385, + "type": "nmdc:DataObject" + }, + { + "_id": { + "$oid": "649b003f1ae706d7b5b165b9" + }, + "id": "nmdc:295741c2b87623f465d21c6eaacec974", + "name": "bins.unbinned.fa", + "description": "unbinned fasta file from metabat2 for gold:Gp0127623", + "file_size_bytes": 6753212, + "url": "https://data.microbiomedata.org/data/1781_100325/img_MAGs/bins.unbinned.fa", + "type": "nmdc:DataObject" + }, + { + "_id": { + "$oid": "649b003f1ae706d7b5b165ba" + }, + "id": "nmdc:dc2d3cbd8386e59252f48f52900f76a4", + "name": "bins.tooShort.fa", + "description": "tooShort (< 3kb) filtered contigs fasta file by metaBat2 for gold:Gp0127623", + "file_size_bytes": 47038634, + "url": "https://data.microbiomedata.org/data/1781_100325/img_MAGs/bins.tooShort.fa", + "type": "nmdc:DataObject" + }, + { + "_id": { + "$oid": "649b003f1ae706d7b5b16cbd" + }, + "description": "EC TSV File for gold:Gp0127623", + "url": "https://data.microbiomedata.org/1781_100325/img_annotation/Ga0482247_ec.tsv", + "md5_checksum": "d6afa54f891852b3a5befc294ce84489", + "file_size_bytes": 3385, + "id": "nmdc:d6afa54f891852b3a5befc294ce84489", + "name": "gold:Gp0127623_EC TSV File", + "data_object_type": "Annotation Enzyme Commission", + "type": "nmdc:DataObject" + }, + { + "_id": { + "$oid": "649b003f1ae706d7b5b16cbf" + }, + "description": "KO TSV File for gold:Gp0127623", + "url": "https://data.microbiomedata.org/1781_100325/img_annotation/Ga0482247_ko.tsv", + "md5_checksum": "e15c4db1e4e26208b302ecb9bc2c094c", + "file_size_bytes": 3385, + "id": "nmdc:e15c4db1e4e26208b302ecb9bc2c094c", + "name": "gold:Gp0127623_KO TSV File", + "data_object_type": "Annotation KEGG Orthology", + "type": "nmdc:DataObject" + }, + { + "_id": { + "$oid": "649b003f1ae706d7b5b16cc0" + }, + "description": "Functional annotation GFF file for gold:Gp0127623", + "url": "https://data.microbiomedata.org/1781_100325/img_annotation/Ga0482247_functional_annotation.gff", + "md5_checksum": "3acc269b9e2b5e97ffcc3c1a0d85381c", + "file_size_bytes": 3385, + "id": "nmdc:3acc269b9e2b5e97ffcc3c1a0d85381c", + "name": "gold:Gp0127623_Functional annotation GFF file", + "data_object_type": "Functional Annotation GFF", + "type": "nmdc:DataObject" + }, + { + "_id": { + "$oid": "649b003f1ae706d7b5b16cc1" + }, + "description": "Structural annotation GFF file for gold:Gp0127623", + "url": "https://data.microbiomedata.org/1781_100325/img_annotation/Ga0482247_structural_annotation.gff", + "md5_checksum": "feb21db71dc44afceeb88bb725315b42", + "file_size_bytes": 3385, + "id": "nmdc:feb21db71dc44afceeb88bb725315b42", + "name": "gold:Gp0127623_Structural annotation GFF file", + "data_object_type": "Structural Annotation GFF", + "type": "nmdc:DataObject" + }, + { + "_id": { + "$oid": "649b003f1ae706d7b5b16cc4" + }, + "description": "Protein FAA for gold:Gp0127623", + "url": "https://data.microbiomedata.org/1781_100325/img_annotation/Ga0482247_proteins.faa", + "md5_checksum": "be62e3b68916c8077955d0b3d3aaf5aa", + "file_size_bytes": 3385, + "id": "nmdc:be62e3b68916c8077955d0b3d3aaf5aa", + "name": "gold:Gp0127623_Protein FAA", + "data_object_type": "Annotation Amino Acid FASTA", + "type": "nmdc:DataObject" + } + ], + "mags_activity_set": [ + { + "_id": { + "$oid": "649b0052ec087f6bbab346fd" + }, + "has_input": [ + "nmdc:3373ef564b5b97fa472dc8f2c2277dbc", + "nmdc:21fb280328baf81e8135733eaf440b66", + "nmdc:05107e0217e199d7b0cd571db88f7d09" + ], + "too_short_contig_num": 114220, + "part_of": [ + "nmdc:mga03eyz63" + ], + "binned_contig_num": 171, + "git_url": "https://github.com/microbiomedata/metaG/releases/tag/0.1", + "has_output": [ + "nmdc:d41d8cd98f00b204e9800998ecf8427e", + "nmdc:e63c76f92bc0ae95dfc238c099296e91", + "nmdc:3dfba77d38712870f8c415203f991496", + "nmdc:5e98d27533164fdf67c07cc224090547", + "nmdc:bfbe3e3a21e8a089c4c7a0d945c79b7b", + "nmdc:c70853ef1a6ab162b85df5215a76666b" + ], + "was_informed_by": "gold:Gp0127623", + "input_contig_num": 118423, + "id": "nmdc:e05db57d44a39f083df9a1803551b79b", + "execution_resource": "NERSC-Cori", + "name": "MAGs Analysis Activity for nmdc:mga03eyz63", + "mags_list": [ + { + "number_of_contig": 171, + "completeness": 30.1, + "bin_name": "bins.1", + "gene_count": 991, + "bin_quality": "LQ", + "gtdbtk_species": "", + "gtdbtk_order": "", + "num_16s": 0, + "gtdbtk_family": "", + "gtdbtk_domain": "", + "contamination": 0.97, + "gtdbtk_class": "", + "gtdbtk_phylum": "", + "num_5s": 0, + "num_23s": 0, + "gtdbtk_genus": "", + "num_t_rna": 12 + } + ], + "unbinned_contig_num": 4032, + "started_at_time": "2021-10-11T02:23:30Z", + "type": "nmdc:MAGsAnalysisActivity", + "ended_at_time": "2021-10-11T02:42:25+00:00" + } + ], + "metagenome_annotation_activity_set": [ + { + "_id": { + "$oid": "649b005bbf2caae0415ef99a" + }, + "has_input": [ + "nmdc:3373ef564b5b97fa472dc8f2c2277dbc" + ], + "part_of": [ + "nmdc:mga03eyz63" + ], + "git_url": "https://github.com/microbiomedata/metaG/releases/tag/0.1", + "has_output": [ + "nmdc:8ac52d00bad1f9349da2acde572006b6", + "nmdc:9dd5eb06fe24f63d5012e34e364a580c", + "nmdc:05107e0217e199d7b0cd571db88f7d09", + "nmdc:02ffcaeeb9a73edea47ba3671396026a", + "nmdc:b9b4ccafc50787f86ef03680eb23848d", + "nmdc:fbd178d9c302b841e3fde3ab9acd8160", + "nmdc:1bcc35e753e7dad78ef8ae4989eb901a", + "nmdc:f6d6d2ea3c539560ad30bbd6df8bc71a", + "nmdc:45536a48cef31f2c3870c7bacb3d785a", + "nmdc:a52d057d005504857f82bcf661dd7676", + "nmdc:b92cb96900a31a3c70ccf9cfe45f02c3", + "nmdc:32eca4cab8525b09cf1b0ed2353f9278" + ], + "was_informed_by": "gold:Gp0127623", + "id": "nmdc:e05db57d44a39f083df9a1803551b79b", + "execution_resource": "NERSC-Cori", + "name": "Annotation Activity for nmdc:mga03eyz63", + "started_at_time": "2021-10-11T02:23:30Z", + "type": "nmdc:MetagenomeAnnotationActivity", + "ended_at_time": "2021-10-11T02:42:25+00:00" + } + ], + "metagenome_assembly_set": [ + { + "_id": { + "$oid": "649b005f2ca5ee4adb139f9f" + }, + "has_input": [ + "nmdc:6a8409b21c45ba9feba873ec269c8ff7" + ], + "part_of": [ + "nmdc:mga03eyz63" + ], + "ctg_logsum": 70596, + "scaf_logsum": 70885, + "gap_pct": 0.00063, + "git_url": "https://github.com/microbiomedata/metaG/releases/tag/0.1", + "has_output": [ + "nmdc:3373ef564b5b97fa472dc8f2c2277dbc", + "nmdc:a0377bb7d752e66b754753fcefb5005a", + "nmdc:081017d0d9e68a999c245618eb907c08", + "nmdc:4a6ed00a6c2156c142d7bbec6baa36b5", + "nmdc:21fb280328baf81e8135733eaf440b66" + ], + "asm_score": 3.626, + "was_informed_by": "gold:Gp0127623", + "ctg_powsum": 7584.611, + "scaf_max": 12785, + "id": "nmdc:e05db57d44a39f083df9a1803551b79b", + "scaf_powsum": 7618.086, + "execution_resource": "NERSC-Cori", + "contigs": 118423, + "name": "Assembly Activity for nmdc:mga03eyz63", + "ctg_max": 11834, + "gc_std": 0.12108, + "contig_bp": 50762396, + "gc_avg": 0.59992, + "started_at_time": "2021-10-11T02:23:30Z", + "scaf_bp": 50762716, + "type": "nmdc:MetagenomeAssembly", + "scaffolds": 118391, + "ended_at_time": "2021-10-11T02:42:25+00:00", + "ctg_l50": 402, + "ctg_l90": 285, + "ctg_n50": 37682, + "ctg_n90": 100987, + "scaf_l50": 402, + "scaf_l90": 285, + "scaf_n50": 37659, + "scaf_n90": 100956 + } + ], + "omics_processing_set": [ + { + "_id": { + "$oid": "649b009773e8249959349b3a" + }, + "id": "nmdc:omprc-11-5r54nt37", + "name": "Riverbed sediment microbial communities from areas with vegetation nearby in Columbia River, Washington, USA - GW-RW S2_20_30", + "description": "Riverbed sediment samples were collected from an area with a lot of surrounding vegetation in a hyporheic zone (subsurface groundwater - surface water mixing zone)", + "has_input": [ + "nmdc:bsm-11-r7ggfc16" + ], + "has_output": [ + "jgi:574fde547ded5e3df1ee13fa" + ], + "part_of": [ + "nmdc:sty-11-aygzgv51" + ], + "add_date": "2016-01-11", + "mod_date": "2021-06-15", + "ncbi_project_name": "Riverbed sediment microbial communities from areas with vegetation nearby in Columbia River, Washington, USA - GW-RW S2_20_30", + "omics_type": { + "has_raw_value": "Metagenome" + }, + "principal_investigator": { + "has_raw_value": "James Stegen" + }, + "processing_institution": "JGI", + "type": "nmdc:OmicsProcessing", + "gold_sequencing_project_identifiers": [ + "gold:Gp0127623" + ] + } + ], + "read_qc_analysis_activity_set": [ + { + "_id": { + "$oid": "649b009d6bdd4fd20273c85a" + }, + "has_input": [ + "nmdc:14766bc431808b2a29c03beecb66bbac" + ], + "part_of": [ + "nmdc:mga03eyz63" + ], + "git_url": "https://github.com/microbiomedata/metaG/releases/tag/0.1", + "has_output": [ + "nmdc:6a8409b21c45ba9feba873ec269c8ff7", + "nmdc:61fb06de10fe3a0c49c5afe14ab7fb32" + ], + "was_informed_by": "gold:Gp0127623", + "input_read_count": 23705118, + "output_read_bases": 3409425046, + "id": "nmdc:e05db57d44a39f083df9a1803551b79b", + "execution_resource": "NERSC-Cori", + "input_read_bases": 3579472818, + "name": "Read QC Activity for nmdc:mga03eyz63", + "output_read_count": 22801896, + "started_at_time": "2021-10-11T02:23:30Z", + "type": "nmdc:ReadQCAnalysisActivity", + "ended_at_time": "2021-10-11T02:42:25+00:00" + } + ], + "read_based_taxonomy_analysis_activity_set": [ + { + "_id": { + "$oid": "649b009bff710ae353f8cf17" + }, + "has_input": [ + "nmdc:6a8409b21c45ba9feba873ec269c8ff7" + ], + "git_url": "https://github.com/microbiomedata/metaG/releases/tag/0.1", + "has_output": [ + "nmdc:ac39e916e17e08a845bb40d97519d8be", + "nmdc:c6fd5c573ef8605d9b43ff9c698af423", + "nmdc:eda0c04d692ecf137585676c15924626", + "nmdc:d9ea063be9ab8ea102c1e2ec2fa9f177", + "nmdc:e1f164c534830cd628d67c564ace863b", + "nmdc:a1062576d998b7b82e39b8d8520fa37e", + "nmdc:040e6ca695283a12711c16344acd1e76", + "nmdc:ed4ced0ccbe3f6b34c35bd842e882cad", + "nmdc:f2eed9669268f69dbc31f0c4f839fccf" + ], + "was_informed_by": "gold:Gp0127623", + "id": "nmdc:e05db57d44a39f083df9a1803551b79b", + "execution_resource": "NERSC-Cori", + "name": "ReadBased Analysis Activity for nmdc:mga03eyz63", + "started_at_time": "2021-10-11T02:23:30Z", + "type": "nmdc:ReadbasedAnalysis", + "ended_at_time": "2021-10-11T02:42:25+00:00" + } + ] + }, + { + "data_object_set": [ + { + "description": "Raw sequencer read data", + "file_size_bytes": 2351763069, + "type": "nmdc:DataObject", + "id": "jgi:574fde787ded5e3df1ee1416", + "name": "10533.2.165322.CGGTTGT-AACAACC.fastq.gz" + }, + { + "name": "Gp0127625_Filtered Reads", + "description": "Filtered Reads for Gp0127625", + "data_object_type": "Filtered Sequencing Reads", + "url": "https://data.microbiomedata.org/data/nmdc:mga0bfpq58/qa/nmdc_mga0bfpq58_filtered.fastq.gz", + "md5_checksum": "2d13b3a30339b9c5b4fba099f9d4b10f", + "id": "nmdc:2d13b3a30339b9c5b4fba099f9d4b10f", + "file_size_bytes": 2037866145 + }, + { + "name": "Gp0127625_Filtered Stats", + "description": "Filtered Stats for Gp0127625", + "data_object_type": "QC Statistics", + "url": "https://data.microbiomedata.org/data/nmdc:mga0bfpq58/qa/nmdc_mga0bfpq58_filterStats.txt", + "md5_checksum": "42be49edad69619e550ddd69d150490f", + "id": "nmdc:42be49edad69619e550ddd69d150490f", + "file_size_bytes": 284 + }, + { + "name": "Gp0127625_Gottcha2 TSV report", + "description": "Gottcha2 TSV report for Gp0127625", + "url": "https://data.microbiomedata.org/data/nmdc:mga0bfpq58/ReadbasedAnalysis/nmdc_mga0bfpq58_gottcha2_report.tsv", + "md5_checksum": "550b631e1de3e01392154e54493d47ef", + "id": "nmdc:550b631e1de3e01392154e54493d47ef", + "file_size_bytes": 754 + }, + { + "name": "Gp0127625_Gottcha2 full TSV report", + "description": "Gottcha2 full TSV report for Gp0127625", + "url": "https://data.microbiomedata.org/data/nmdc:mga0bfpq58/ReadbasedAnalysis/nmdc_mga0bfpq58_gottcha2_report_full.tsv", + "md5_checksum": "3f14ff51550d9d78dae3a7ec08514907", + "id": "nmdc:3f14ff51550d9d78dae3a7ec08514907", + "file_size_bytes": 641658 + }, + { + "name": "Gp0127625_Gottcha2 Krona HTML report", + "description": "Gottcha2 Krona HTML report for Gp0127625", + "data_object_type": "GOTTCHA2 Krona Plot", + "url": "https://data.microbiomedata.org/data/nmdc:mga0bfpq58/ReadbasedAnalysis/nmdc_mga0bfpq58_gottcha2_krona.html", + "md5_checksum": "1a7b8f8968f451b5d5ccb97a10a56d89", + "id": "nmdc:1a7b8f8968f451b5d5ccb97a10a56d89", + "file_size_bytes": 228494 + }, + { + "name": "Gp0127625_Centrifuge classification TSV report", + "description": "Centrifuge classification TSV report for Gp0127625", + "data_object_type": "Centrifuge Taxonomic Classification", + "url": "https://data.microbiomedata.org/data/nmdc:mga0bfpq58/ReadbasedAnalysis/nmdc_mga0bfpq58_centrifuge_classification.tsv", + "md5_checksum": "b09795fc768257d881e8ce547be0ce68", + "id": "nmdc:b09795fc768257d881e8ce547be0ce68", + "file_size_bytes": 1849982678 + }, + { + "name": "Gp0127625_Centrifuge TSV report", + "description": "Centrifuge TSV report for Gp0127625", + "data_object_type": "Centrifuge Classification Report", + "url": "https://data.microbiomedata.org/data/nmdc:mga0bfpq58/ReadbasedAnalysis/nmdc_mga0bfpq58_centrifuge_report.tsv", + "md5_checksum": "064ba18473eb80ff0b484311565d2894", + "id": "nmdc:064ba18473eb80ff0b484311565d2894", + "file_size_bytes": 253852 + }, + { + "name": "Gp0127625_Centrifuge Krona HTML report", + "description": "Centrifuge Krona HTML report for Gp0127625", + "data_object_type": "Centrifuge Krona Plot", + "url": "https://data.microbiomedata.org/data/nmdc:mga0bfpq58/ReadbasedAnalysis/nmdc_mga0bfpq58_centrifuge_krona.html", + "md5_checksum": "a7b6cc370371668be2e3bb90f5ca0fd1", + "id": "nmdc:a7b6cc370371668be2e3bb90f5ca0fd1", + "file_size_bytes": 2331556 + }, + { + "name": "Gp0127625_Kraken classification TSV report", + "description": "Kraken classification TSV report for Gp0127625", + "data_object_type": "Kraken2 Taxonomic Classification", + "url": "https://data.microbiomedata.org/data/nmdc:mga0bfpq58/ReadbasedAnalysis/nmdc_mga0bfpq58_kraken2_classification.tsv", + "md5_checksum": "60c663a34b79db2ee71edf1afe4c14e3", + "id": "nmdc:60c663a34b79db2ee71edf1afe4c14e3", + "file_size_bytes": 1471976767 + }, + { + "name": "Gp0127625_Kraken2 TSV report", + "description": "Kraken2 TSV report for Gp0127625", + "data_object_type": "Kraken2 Classification Report", + "url": "https://data.microbiomedata.org/data/nmdc:mga0bfpq58/ReadbasedAnalysis/nmdc_mga0bfpq58_kraken2_report.tsv", + "md5_checksum": "bc8acb862c8942616ef07302667c334f", + "id": "nmdc:bc8acb862c8942616ef07302667c334f", + "file_size_bytes": 627498 + }, + { + "name": "Gp0127625_Kraken2 Krona HTML report", + "description": "Kraken2 Krona HTML report for Gp0127625", + "data_object_type": "Kraken2 Krona Plot", + "url": "https://data.microbiomedata.org/data/nmdc:mga0bfpq58/ReadbasedAnalysis/nmdc_mga0bfpq58_kraken2_krona.html", + "md5_checksum": "b797ed6cb135c993b582cac368b2a93c", + "id": "nmdc:b797ed6cb135c993b582cac368b2a93c", + "file_size_bytes": 3921941 + }, + { + "name": "Gp0127625_Assembled contigs fasta", + "description": "Assembled contigs fasta for Gp0127625", + "data_object_type": "Assembly Contigs", + "url": "https://data.microbiomedata.org/data/nmdc:mga0bfpq58/assembly/nmdc_mga0bfpq58_contigs.fna", + "md5_checksum": "5b6e7cbece9167002b12c3415afa9bb8", + "id": "nmdc:5b6e7cbece9167002b12c3415afa9bb8", + "file_size_bytes": 171703232 + }, + { + "name": "Gp0127625_Assembled scaffold fasta", + "description": "Assembled scaffold fasta for Gp0127625", + "data_object_type": "Assembly Scaffolds", + "url": "https://data.microbiomedata.org/data/nmdc:mga0bfpq58/assembly/nmdc_mga0bfpq58_scaffolds.fna", + "md5_checksum": "f2ef7ceaaedf4d6bf377ce82687b06e3", + "id": "nmdc:f2ef7ceaaedf4d6bf377ce82687b06e3", + "file_size_bytes": 170799869 + }, + { + "name": "Gp0127625_Metagenome Contig Coverage Stats", + "description": "Metagenome Contig Coverage Stats for Gp0127625", + "url": "https://data.microbiomedata.org/data/nmdc:mga0bfpq58/assembly/nmdc_mga0bfpq58_covstats.txt", + "md5_checksum": "d231edb2040700184064615a28e65ee5", + "id": "nmdc:d231edb2040700184064615a28e65ee5", + "file_size_bytes": 23875845 + }, + { + "name": "Gp0127625_Assembled AGP file", + "description": "Assembled AGP file for Gp0127625", + "data_object_type": "Assembly AGP", + "url": "https://data.microbiomedata.org/data/nmdc:mga0bfpq58/assembly/nmdc_mga0bfpq58_assembly.agp", + "md5_checksum": "9e3e55fe2f337ee0192604f8aa13da8e", + "id": "nmdc:9e3e55fe2f337ee0192604f8aa13da8e", + "file_size_bytes": 22351137 + }, + { + "name": "Gp0127625_Metagenome Alignment BAM file", + "description": "Metagenome Alignment BAM file for Gp0127625", + "data_object_type": "Assembly Coverage BAM", + "url": "https://data.microbiomedata.org/data/nmdc:mga0bfpq58/assembly/nmdc_mga0bfpq58_pairedMapped_sorted.bam", + "md5_checksum": "ff612445b348b65f906cd8858c4ec54e", + "id": "nmdc:ff612445b348b65f906cd8858c4ec54e", + "file_size_bytes": 2304803186 + }, + { + "name": "Gp0127625_Protein FAA", + "description": "Protein FAA for Gp0127625", + "data_object_type": "Annotation Amino Acid FASTA", + "url": "https://data.microbiomedata.org/data/nmdc:mga0bfpq58/annotation/nmdc_mga0bfpq58_proteins.faa", + "md5_checksum": "b1cae75f11c5efc7b37ea38c8d690e09", + "id": "nmdc:b1cae75f11c5efc7b37ea38c8d690e09", + "file_size_bytes": 96076876 + }, + { + "name": "Gp0127625_Structural annotation GFF file", + "description": "Structural annotation GFF file for Gp0127625", + "data_object_type": "Structural Annotation GFF", + "url": "https://data.microbiomedata.org/data/nmdc:mga0bfpq58/annotation/nmdc_mga0bfpq58_structural_annotation.gff", + "md5_checksum": "035d81e38b01174de882d15a859390a0", + "id": "nmdc:035d81e38b01174de882d15a859390a0", + "file_size_bytes": 2526 + }, + { + "name": "Gp0127625_Functional annotation GFF file", + "description": "Functional annotation GFF file for Gp0127625", + "data_object_type": "Functional Annotation GFF", + "url": "https://data.microbiomedata.org/data/nmdc:mga0bfpq58/annotation/nmdc_mga0bfpq58_functional_annotation.gff", + "md5_checksum": "da95ab39eb229378ef9c1c7317f58c36", + "id": "nmdc:da95ab39eb229378ef9c1c7317f58c36", + "file_size_bytes": 106301187 + }, + { + "name": "Gp0127625_KO TSV file", + "description": "KO TSV file for Gp0127625", + "data_object_type": "Annotation KEGG Orthology", + "url": "https://data.microbiomedata.org/data/nmdc:mga0bfpq58/annotation/nmdc_mga0bfpq58_ko.tsv", + "md5_checksum": "7ba2f365814fc2ae2896849d4dbb619d", + "id": "nmdc:7ba2f365814fc2ae2896849d4dbb619d", + "file_size_bytes": 12012992 + }, + { + "name": "Gp0127625_EC TSV file", + "description": "EC TSV file for Gp0127625", + "data_object_type": "Annotation Enzyme Commission", + "url": "https://data.microbiomedata.org/data/nmdc:mga0bfpq58/annotation/nmdc_mga0bfpq58_ec.tsv", + "md5_checksum": "91ade9a89599592c1e699b8990a11fba", + "id": "nmdc:91ade9a89599592c1e699b8990a11fba", + "file_size_bytes": 7987608 + }, + { + "name": "Gp0127625_COG GFF file", + "description": "COG GFF file for Gp0127625", + "url": "https://data.microbiomedata.org/data/nmdc:mga0bfpq58/annotation/nmdc_mga0bfpq58_cog.gff", + "md5_checksum": "a1c78cb8202825bd692c572b1537b549", + "id": "nmdc:a1c78cb8202825bd692c572b1537b549", + "file_size_bytes": 63761051 + }, + { + "name": "Gp0127625_PFAM GFF file", + "description": "PFAM GFF file for Gp0127625", + "url": "https://data.microbiomedata.org/data/nmdc:mga0bfpq58/annotation/nmdc_mga0bfpq58_pfam.gff", + "md5_checksum": "b83f7bca7166e0bbeb5d260af5920d00", + "id": "nmdc:b83f7bca7166e0bbeb5d260af5920d00", + "file_size_bytes": 49051515 + }, + { + "name": "Gp0127625_TigrFam GFF file", + "description": "TigrFam GFF file for Gp0127625", + "url": "https://data.microbiomedata.org/data/nmdc:mga0bfpq58/annotation/nmdc_mga0bfpq58_tigrfam.gff", + "md5_checksum": "2f8d30335b71e6d7f29458795d20daf4", + "id": "nmdc:2f8d30335b71e6d7f29458795d20daf4", + "file_size_bytes": 5446717 + }, + { + "name": "Gp0127625_SMART GFF file", + "description": "SMART GFF file for Gp0127625", + "url": "https://data.microbiomedata.org/data/nmdc:mga0bfpq58/annotation/nmdc_mga0bfpq58_smart.gff", + "md5_checksum": "77a22d4fe5949259acc0f12eafe264a2", + "id": "nmdc:77a22d4fe5949259acc0f12eafe264a2", + "file_size_bytes": 14046377 + }, + { + "name": "Gp0127625_SuperFam GFF file", + "description": "SuperFam GFF file for Gp0127625", + "url": "https://data.microbiomedata.org/data/nmdc:mga0bfpq58/annotation/nmdc_mga0bfpq58_supfam.gff", + "md5_checksum": "f21f3b5ed41e8945b4eebdbb044f832a", + "id": "nmdc:f21f3b5ed41e8945b4eebdbb044f832a", + "file_size_bytes": 79091420 + }, + { + "name": "Gp0127625_Cath FunFam GFF file", + "description": "Cath FunFam GFF file for Gp0127625", + "url": "https://data.microbiomedata.org/data/nmdc:mga0bfpq58/annotation/nmdc_mga0bfpq58_cath_funfam.gff", + "md5_checksum": "4a365e4bb51f09bb4f21470a753eac42", + "id": "nmdc:4a365e4bb51f09bb4f21470a753eac42", + "file_size_bytes": 60777542 + }, + { + "name": "Gp0127625_KO_EC GFF file", + "description": "KO_EC GFF file for Gp0127625", + "url": "https://data.microbiomedata.org/data/nmdc:mga0bfpq58/annotation/nmdc_mga0bfpq58_ko_ec.gff", + "md5_checksum": "5a230cb34060373c2e9a0af8b8040f46", + "id": "nmdc:5a230cb34060373c2e9a0af8b8040f46", + "file_size_bytes": 38117675 + }, + { + "name": "gold:Gp0452679_metabat2 bin checkm quality assessment result", + "description": "metabat2 bin checkm quality assessment result for gold:Gp0452679", + "data_object_type": "CheckM Statistics", + "url": "https://data.microbiomedata.org/data/nmdc:mga0fsjy84/MAGs/nmdc_mga0fsjy84_checkm_qa.out", + "md5_checksum": "d41d8cd98f00b204e9800998ecf8427e", + "id": "nmdc:d41d8cd98f00b204e9800998ecf8427e", + "file_size_bytes": 0 + }, + { + "name": "Gp0127625_tooShort (< 3kb) filtered contigs fasta file by metaBat2", + "description": "tooShort (< 3kb) filtered contigs fasta file by metaBat2 for Gp0127625", + "url": "https://data.microbiomedata.org/data/nmdc:mga0bfpq58/MAGs/nmdc_mga0bfpq58_bins.tooShort.fa", + "md5_checksum": "27f14f2f1af3ad7d17505a6ddc52d860", + "id": "nmdc:27f14f2f1af3ad7d17505a6ddc52d860", + "file_size_bytes": 128750891 + }, + { + "name": "Gp0127625_unbinned fasta file from metabat2", + "description": "unbinned fasta file from metabat2 for Gp0127625", + "url": "https://data.microbiomedata.org/data/nmdc:mga0bfpq58/MAGs/nmdc_mga0bfpq58_bins.unbinned.fa", + "md5_checksum": "b66d8fd47536ed5299c280aa873e2130", + "id": "nmdc:b66d8fd47536ed5299c280aa873e2130", + "file_size_bytes": 37223163 + }, + { + "name": "Gp0127625_metabat2 bin checkm quality assessment result", + "description": "metabat2 bin checkm quality assessment result for Gp0127625", + "data_object_type": "CheckM Statistics", + "url": "https://data.microbiomedata.org/data/nmdc:mga0bfpq58/MAGs/nmdc_mga0bfpq58_checkm_qa.out", + "md5_checksum": "dac476e3a7a8cdb2f3be5946ae437906", + "id": "nmdc:dac476e3a7a8cdb2f3be5946ae437906", + "file_size_bytes": 1413 + }, + { + "name": "Gp0127625_high-quality and medium-quality bins", + "description": "high-quality and medium-quality bins for Gp0127625", + "data_object_type": "Metagenome Bins", + "url": "https://data.microbiomedata.org/data/nmdc:mga0bfpq58/MAGs/nmdc_mga0bfpq58_hqmq_bin.zip", + "md5_checksum": "1ce4d3dcf2c9cbe245b437ca14a2772f", + "id": "nmdc:1ce4d3dcf2c9cbe245b437ca14a2772f", + "file_size_bytes": 182 + }, + { + "name": "Gp0127625_metabat2 bins", + "description": "metabat2 bins for Gp0127625", + "url": "https://data.microbiomedata.org/data/nmdc:mga0bfpq58/MAGs/nmdc_mga0bfpq58_metabat_bin.zip", + "md5_checksum": "d312dfb56973b50497bab8faf7409db8", + "id": "nmdc:d312dfb56973b50497bab8faf7409db8", + "file_size_bytes": 1729165 + }, + { + "_id": { + "$oid": "649b003d1ae706d7b5b14e4b" + }, + "description": "Metagenome Contig Coverage Stats for gold:Gp0127625", + "url": "https://data.microbiomedata.org/data/1781_100327/assembly/mapping_stats.txt", + "file_size_bytes": 22675445, + "type": "nmdc:DataObject", + "id": "nmdc:d5f4718482fe0012f1e39efbd22c50b4", + "name": "mapping_stats.txt", + "data_object_type": "Assembly Coverage Stats" + }, + { + "_id": { + "$oid": "649b003d1ae706d7b5b14e4e" + }, + "description": "Assembled scaffold fasta for gold:Gp0127625", + "url": "https://data.microbiomedata.org/data/1781_100327/assembly/assembly_scaffolds.fna", + "file_size_bytes": 169600309, + "type": "nmdc:DataObject", + "id": "nmdc:f1b48b2f19ff83ba6fd51df86ec966ec", + "name": "assembly_scaffolds.fna", + "data_object_type": "Assembly Scaffolds" + }, + { + "_id": { + "$oid": "649b003d1ae706d7b5b14e50" + }, + "description": "Metagenome Alignment BAM file for gold:Gp0127625", + "url": "https://data.microbiomedata.org/data/1781_100327/assembly/pairedMapped_sorted.bam", + "file_size_bytes": 2276431089, + "type": "nmdc:DataObject", + "id": "nmdc:8ff0fbe4939b764e12158e783f049f23", + "name": "pairedMapped_sorted.bam", + "data_object_type": "Assembly Coverage BAM" + }, + { + "_id": { + "$oid": "649b003d1ae706d7b5b14e52" + }, + "description": "Assembled contigs fasta for gold:Gp0127625", + "url": "https://data.microbiomedata.org/data/1781_100327/assembly/assembly_contigs.fna", + "file_size_bytes": 170502832, + "type": "nmdc:DataObject", + "id": "nmdc:593237bf7f38f66d40eca1dbb23c7aef", + "name": "assembly_contigs.fna", + "data_object_type": "Assembly Contigs" + }, + { + "_id": { + "$oid": "649b003d1ae706d7b5b14e55" + }, + "description": "Assembled AGP file for gold:Gp0127625", + "url": "https://data.microbiomedata.org/data/1781_100327/assembly/assembly.agp", + "file_size_bytes": 19948625, + "type": "nmdc:DataObject", + "id": "nmdc:6c6032861ed3d9b16040e414aac58731", + "name": "assembly.agp", + "data_object_type": "Assembly AGP" + }, + { + "_id": { + "$oid": "649b003d1ae706d7b5b15af9" + }, + "id": "nmdc:99433bf558d171ea575501775dabdb36", + "name": "1781_100327.krona.html", + "description": "Gold:Gp0127625 KRONA plot HTML file", + "url": "https://data.microbiomedata.org/1781_100327/ReadbasedAnalysis/centrifuge/1781_100327.krona.html", + "file_size_bytes": 3385, + "data_object_type": "Centrifuge Krona Plot", + "type": "nmdc:DataObject" + }, + { + "_id": { + "$oid": "649b003d1ae706d7b5b15b3a" + }, + "id": "nmdc:17804fc0900e6fddc51600638be0e04c", + "name": "1781_100327.json", + "description": "Gold:Gp0127625 ReadbasedAnalysis result JSON file", + "url": "https://data.microbiomedata.org/1781_100327/ReadbasedAnalysis/1781_100327.json", + "file_size_bytes": 3385, + "type": "nmdc:DataObject" + }, + { + "_id": { + "$oid": "649b003f1ae706d7b5b165c3" + }, + "id": "nmdc:f48e06ed804bffa9e3fbafe2548c0d23", + "name": "bins.tooShort.fa", + "description": "tooShort (< 3kb) filtered contigs fasta file by metaBat2 for gold:Gp0127625", + "file_size_bytes": 125246089, + "url": "https://data.microbiomedata.org/data/1781_100327/img_MAGs/bins.tooShort.fa", + "type": "nmdc:DataObject" + }, + { + "_id": { + "$oid": "649b003f1ae706d7b5b165c4" + }, + "id": "nmdc:6355ef8f8d9c6f954797ae92ca908c41", + "name": "bins.unbinned.fa", + "description": "unbinned fasta file from metabat2 for gold:Gp0127625", + "file_size_bytes": 39081008, + "url": "https://data.microbiomedata.org/data/1781_100327/img_MAGs/bins.unbinned.fa", + "type": "nmdc:DataObject" + }, + { + "_id": { + "$oid": "649b003f1ae706d7b5b165c7" + }, + "id": "nmdc:9c77e38f33eedf5f9c2eb4e672ce3951", + "name": "checkm_qa.out", + "description": "metabat2 bin checkm quality assessment result for gold:Gp0127625", + "file_size_bytes": 1099, + "url": "https://data.microbiomedata.org/data/1781_100327/img_MAGs/checkm_qa.out", + "type": "nmdc:DataObject", + "data_object_type": "CheckM Statistics" + }, + { + "_id": { + "$oid": "649b003f1ae706d7b5b165c8" + }, + "id": "nmdc:d22c092793a0476bfc8bba9c5e1a6d22", + "name": "gold:Gp0127625.bins.3.fa", + "description": "metabat2 binned contig file for gold:Gp0127625", + "file_size_bytes": 1411952, + "url": "https://data.microbiomedata.org/data/1781_100327/img_MAGs/metabat-bins/bins.3.fa", + "type": "nmdc:DataObject", + "data_object_type": "Metagenome Bins" + }, + { + "_id": { + "$oid": "649b003f1ae706d7b5b165ca" + }, + "id": "nmdc:80a9f95cbc30cf2ccd164b5b85b866b6", + "name": "gold:Gp0127625.bins.1.fa", + "description": "metabat2 binned contig file for gold:Gp0127625", + "file_size_bytes": 1533567, + "url": "https://data.microbiomedata.org/data/1781_100327/img_MAGs/metabat-bins/bins.1.fa", + "type": "nmdc:DataObject", + "data_object_type": "Metagenome Bins" + }, + { + "_id": { + "$oid": "649b003f1ae706d7b5b165cb" + }, + "id": "nmdc:31079c1f43cb55dd4d8d603e8413cc56", + "name": "gold:Gp0127625.bins.2.fa", + "description": "metabat2 binned contig file for gold:Gp0127625", + "file_size_bytes": 640735, + "url": "https://data.microbiomedata.org/data/1781_100327/img_MAGs/metabat-bins/bins.2.fa", + "type": "nmdc:DataObject", + "data_object_type": "Metagenome Bins" + }, + { + "_id": { + "$oid": "649b003f1ae706d7b5b16cc8" + }, + "description": "EC TSV File for gold:Gp0127625", + "url": "https://data.microbiomedata.org/1781_100327/img_annotation/Ga0482245_ec.tsv", + "md5_checksum": "16eb9d7ffc8dbf8872cbdb9b7f0a1c82", + "file_size_bytes": 3385, + "id": "nmdc:16eb9d7ffc8dbf8872cbdb9b7f0a1c82", + "name": "gold:Gp0127625_EC TSV File", + "data_object_type": "Annotation Enzyme Commission", + "type": "nmdc:DataObject" + }, + { + "_id": { + "$oid": "649b003f1ae706d7b5b16cc9" + }, + "description": "Structural annotation GFF file for gold:Gp0127625", + "url": "https://data.microbiomedata.org/1781_100327/img_annotation/Ga0482245_structural_annotation.gff", + "md5_checksum": "bf23db2dda841d77cf51b7c9120ba503", + "file_size_bytes": 3385, + "id": "nmdc:bf23db2dda841d77cf51b7c9120ba503", + "name": "gold:Gp0127625_Structural annotation GFF file", + "data_object_type": "Structural Annotation GFF", + "type": "nmdc:DataObject" + }, + { + "_id": { + "$oid": "649b003f1ae706d7b5b16cca" + }, + "description": "KO TSV File for gold:Gp0127625", + "url": "https://data.microbiomedata.org/1781_100327/img_annotation/Ga0482245_ko.tsv", + "md5_checksum": "18dd16caf7af261c4d647da91a6f526a", + "file_size_bytes": 3385, + "id": "nmdc:18dd16caf7af261c4d647da91a6f526a", + "name": "gold:Gp0127625_KO TSV File", + "data_object_type": "Annotation KEGG Orthology", + "type": "nmdc:DataObject" + }, + { + "_id": { + "$oid": "649b003f1ae706d7b5b16ccd" + }, + "description": "Protein FAA for gold:Gp0127625", + "url": "https://data.microbiomedata.org/1781_100327/img_annotation/Ga0482245_proteins.faa", + "md5_checksum": "c6fb34fc2da63a5cc46522279e768db9", + "file_size_bytes": 3385, + "id": "nmdc:c6fb34fc2da63a5cc46522279e768db9", + "name": "gold:Gp0127625_Protein FAA", + "data_object_type": "Annotation Amino Acid FASTA", + "type": "nmdc:DataObject" + }, + { + "_id": { + "$oid": "649b003f1ae706d7b5b16cce" + }, + "description": "Functional annotation GFF file for gold:Gp0127625", + "url": "https://data.microbiomedata.org/1781_100327/img_annotation/Ga0482245_functional_annotation.gff", + "md5_checksum": "3b039b9d5a75b97a67edf5d50b34d9f0", + "file_size_bytes": 3385, + "id": "nmdc:3b039b9d5a75b97a67edf5d50b34d9f0", + "name": "gold:Gp0127625_Functional annotation GFF file", + "data_object_type": "Functional Annotation GFF", + "type": "nmdc:DataObject" + } + ], + "mags_activity_set": [ + { + "_id": { + "$oid": "649b0052ec087f6bbab346fc" + }, + "has_input": [ + "nmdc:5b6e7cbece9167002b12c3415afa9bb8", + "nmdc:ff612445b348b65f906cd8858c4ec54e", + "nmdc:da95ab39eb229378ef9c1c7317f58c36" + ], + "too_short_contig_num": 275414, + "part_of": [ + "nmdc:mga0bfpq58" + ], + "binned_contig_num": 1195, + "git_url": "https://github.com/microbiomedata/metaG/releases/tag/0.1", + "has_output": [ + "nmdc:d41d8cd98f00b204e9800998ecf8427e", + "nmdc:27f14f2f1af3ad7d17505a6ddc52d860", + "nmdc:b66d8fd47536ed5299c280aa873e2130", + "nmdc:dac476e3a7a8cdb2f3be5946ae437906", + "nmdc:1ce4d3dcf2c9cbe245b437ca14a2772f", + "nmdc:d312dfb56973b50497bab8faf7409db8" + ], + "was_informed_by": "gold:Gp0127625", + "input_contig_num": 300100, + "id": "nmdc:aa214e53dc8472f9ff99b02245d8f943", + "execution_resource": "NERSC-Cori", + "name": "MAGs Analysis Activity for nmdc:mga0bfpq58", + "mags_list": [ + { + "number_of_contig": 382, + "completeness": 47.74, + "bin_name": "bins.1", + "gene_count": 2054, + "bin_quality": "LQ", + "gtdbtk_species": "", + "gtdbtk_order": "", + "num_16s": 0, + "gtdbtk_family": "", + "gtdbtk_domain": "", + "contamination": 2.69, + "gtdbtk_class": "", + "gtdbtk_phylum": "", + "num_5s": 0, + "num_23s": 0, + "gtdbtk_genus": "", + "num_t_rna": 21 + }, + { + "number_of_contig": 197, + "completeness": 22.93, + "bin_name": "bins.2", + "gene_count": 1005, + "bin_quality": "LQ", + "gtdbtk_species": "", + "gtdbtk_order": "", + "num_16s": 0, + "gtdbtk_family": "", + "gtdbtk_domain": "", + "contamination": 1.03, + "gtdbtk_class": "", + "gtdbtk_phylum": "", + "num_5s": 0, + "num_23s": 0, + "gtdbtk_genus": "", + "num_t_rna": 17 + }, + { + "number_of_contig": 95, + "completeness": 7.24, + "bin_name": "bins.3", + "gene_count": 447, + "bin_quality": "LQ", + "gtdbtk_species": "", + "gtdbtk_order": "", + "num_16s": 0, + "gtdbtk_family": "", + "gtdbtk_domain": "", + "contamination": 1.72, + "gtdbtk_class": "", + "gtdbtk_phylum": "", + "num_5s": 0, + "num_23s": 0, + "gtdbtk_genus": "", + "num_t_rna": 2 + }, + { + "number_of_contig": 193, + "completeness": 17.79, + "bin_name": "bins.4", + "gene_count": 1386, + "bin_quality": "LQ", + "gtdbtk_species": "", + "gtdbtk_order": "", + "num_16s": 0, + "gtdbtk_family": "", + "gtdbtk_domain": "", + "contamination": 0.97, + "gtdbtk_class": "", + "gtdbtk_phylum": "", + "num_5s": 0, + "num_23s": 0, + "gtdbtk_genus": "", + "num_t_rna": 19 + }, + { + "number_of_contig": 328, + "completeness": 37.37, + "bin_name": "bins.5", + "gene_count": 1978, + "bin_quality": "LQ", + "gtdbtk_species": "", + "gtdbtk_order": "", + "num_16s": 0, + "gtdbtk_family": "", + "gtdbtk_domain": "", + "contamination": 2.56, + "gtdbtk_class": "", + "gtdbtk_phylum": "", + "num_5s": 1, + "num_23s": 0, + "gtdbtk_genus": "", + "num_t_rna": 35 + } + ], + "unbinned_contig_num": 23491, + "started_at_time": "2021-10-11T02:23:30Z", + "type": "nmdc:MAGsAnalysisActivity", + "ended_at_time": "2021-10-11T03:29:50+00:00" + } + ], + "metagenome_annotation_activity_set": [ + { + "_id": { + "$oid": "649b005bbf2caae0415ef999" + }, + "has_input": [ + "nmdc:5b6e7cbece9167002b12c3415afa9bb8" + ], + "part_of": [ + "nmdc:mga0bfpq58" + ], + "git_url": "https://github.com/microbiomedata/metaG/releases/tag/0.1", + "has_output": [ + "nmdc:b1cae75f11c5efc7b37ea38c8d690e09", + "nmdc:035d81e38b01174de882d15a859390a0", + "nmdc:da95ab39eb229378ef9c1c7317f58c36", + "nmdc:7ba2f365814fc2ae2896849d4dbb619d", + "nmdc:91ade9a89599592c1e699b8990a11fba", + "nmdc:a1c78cb8202825bd692c572b1537b549", + "nmdc:b83f7bca7166e0bbeb5d260af5920d00", + "nmdc:2f8d30335b71e6d7f29458795d20daf4", + "nmdc:77a22d4fe5949259acc0f12eafe264a2", + "nmdc:f21f3b5ed41e8945b4eebdbb044f832a", + "nmdc:4a365e4bb51f09bb4f21470a753eac42", + "nmdc:5a230cb34060373c2e9a0af8b8040f46" + ], + "was_informed_by": "gold:Gp0127625", + "id": "nmdc:aa214e53dc8472f9ff99b02245d8f943", + "execution_resource": "NERSC-Cori", + "name": "Annotation Activity for nmdc:mga0bfpq58", + "started_at_time": "2021-10-11T02:23:30Z", + "type": "nmdc:MetagenomeAnnotationActivity", + "ended_at_time": "2021-10-11T03:29:50+00:00" + } + ], + "metagenome_assembly_set": [ + { + "_id": { + "$oid": "649b005f2ca5ee4adb139f89" + }, + "has_input": [ + "nmdc:2d13b3a30339b9c5b4fba099f9d4b10f" + ], + "part_of": [ + "nmdc:mga0bfpq58" + ], + "ctg_logsum": 452076, + "scaf_logsum": 453436, + "gap_pct": 0.00138, + "git_url": "https://github.com/microbiomedata/metaG/releases/tag/0.1", + "has_output": [ + "nmdc:5b6e7cbece9167002b12c3415afa9bb8", + "nmdc:f2ef7ceaaedf4d6bf377ce82687b06e3", + "nmdc:d231edb2040700184064615a28e65ee5", + "nmdc:9e3e55fe2f337ee0192604f8aa13da8e", + "nmdc:ff612445b348b65f906cd8858c4ec54e" + ], + "asm_score": 3.923, + "was_informed_by": "gold:Gp0127625", + "ctg_powsum": 49204, + "scaf_max": 29400, + "id": "nmdc:aa214e53dc8472f9ff99b02245d8f943", + "scaf_powsum": 49370, + "execution_resource": "NERSC-Cori", + "contigs": 300102, + "name": "Assembly Activity for nmdc:mga0bfpq58", + "ctg_max": 29400, + "gc_std": 0.0955, + "contig_bp": 159709614, + "gc_avg": 0.6367, + "started_at_time": "2021-10-11T02:23:30Z", + "scaf_bp": 159711824, + "type": "nmdc:MetagenomeAssembly", + "scaffolds": 299890, + "ended_at_time": "2021-10-11T03:29:50+00:00", + "ctg_l50": 546, + "ctg_l90": 301, + "ctg_n50": 78532, + "ctg_n90": 244428, + "scaf_l50": 546, + "scaf_l90": 301, + "scaf_n50": 78517, + "scaf_n90": 244244 + } + ], + "omics_processing_set": [ + { + "_id": { + "$oid": "649b009773e8249959349b3b" + }, + "id": "nmdc:omprc-11-76ebsj44", + "name": "Riverbed sediment microbial communities from areas with no vegetation in Columbia River, Washington, USA - GW-RW N1_20_30", + "description": "Riverbed sediment samples were collected from an area with no vegetation in a hyporheic zone (subsurface groundwater - surface water mixing zone)", + "has_input": [ + "nmdc:bsm-11-k3t2wk45" + ], + "has_output": [ + "jgi:574fde787ded5e3df1ee1416" + ], + "part_of": [ + "nmdc:sty-11-aygzgv51" + ], + "add_date": "2016-01-11", + "mod_date": "2021-06-15", + "ncbi_project_name": "Riverbed sediment microbial communities from areas with no vegetation in Columbia River, Washington, USA - GW-RW N1_20_30", + "omics_type": { + "has_raw_value": "Metagenome" + }, + "principal_investigator": { + "has_raw_value": "James Stegen" + }, + "processing_institution": "JGI", + "type": "nmdc:OmicsProcessing", + "gold_sequencing_project_identifiers": [ + "gold:Gp0127625" + ] + } + ], + "read_qc_analysis_activity_set": [ + { + "_id": { + "$oid": "649b009d6bdd4fd20273c858" + }, + "has_input": [ + "nmdc:93c62425e46296c35415039d7fd9cb56" + ], + "part_of": [ + "nmdc:mga0bfpq58" + ], + "git_url": "https://github.com/microbiomedata/metaG/releases/tag/0.1", + "has_output": [ + "nmdc:2d13b3a30339b9c5b4fba099f9d4b10f", + "nmdc:42be49edad69619e550ddd69d150490f" + ], + "was_informed_by": "gold:Gp0127625", + "input_read_count": 26227312, + "output_read_bases": 3764845015, + "id": "nmdc:aa214e53dc8472f9ff99b02245d8f943", + "execution_resource": "NERSC-Cori", + "input_read_bases": 3960324112, + "name": "Read QC Activity for nmdc:mga0bfpq58", + "output_read_count": 25182244, + "started_at_time": "2021-10-11T02:23:30Z", + "type": "nmdc:ReadQCAnalysisActivity", + "ended_at_time": "2021-10-11T03:29:50+00:00" + } + ], + "read_based_taxonomy_analysis_activity_set": [ + { + "_id": { + "$oid": "649b009bff710ae353f8cf15" + }, + "has_input": [ + "nmdc:2d13b3a30339b9c5b4fba099f9d4b10f" + ], + "git_url": "https://github.com/microbiomedata/metaG/releases/tag/0.1", + "has_output": [ + "nmdc:550b631e1de3e01392154e54493d47ef", + "nmdc:3f14ff51550d9d78dae3a7ec08514907", + "nmdc:1a7b8f8968f451b5d5ccb97a10a56d89", + "nmdc:b09795fc768257d881e8ce547be0ce68", + "nmdc:064ba18473eb80ff0b484311565d2894", + "nmdc:a7b6cc370371668be2e3bb90f5ca0fd1", + "nmdc:60c663a34b79db2ee71edf1afe4c14e3", + "nmdc:bc8acb862c8942616ef07302667c334f", + "nmdc:b797ed6cb135c993b582cac368b2a93c" + ], + "was_informed_by": "gold:Gp0127625", + "id": "nmdc:aa214e53dc8472f9ff99b02245d8f943", + "execution_resource": "NERSC-Cori", + "name": "ReadBased Analysis Activity for nmdc:mga0bfpq58", + "started_at_time": "2021-10-11T02:23:30Z", + "type": "nmdc:ReadbasedAnalysis", + "ended_at_time": "2021-10-11T03:29:50+00:00" + } + ] + }, + { + "data_object_set": [ + { + "description": "Raw sequencer read data", + "file_size_bytes": 2167583658, + "type": "nmdc:DataObject", + "id": "jgi:574fde7b7ded5e3df1ee1418", + "name": "10533.2.165322.TACCAAC-GGTTGGT.fastq.gz" + }, + { + "name": "Gp0127626_Filtered Reads", + "description": "Filtered Reads for Gp0127626", + "data_object_type": "Filtered Sequencing Reads", + "url": "https://data.microbiomedata.org/data/nmdc:mga04xnj45/qa/nmdc_mga04xnj45_filtered.fastq.gz", + "md5_checksum": "07499ad2f2b80f42bd7109732b1eef90", + "id": "nmdc:07499ad2f2b80f42bd7109732b1eef90", + "file_size_bytes": 1944721961 + }, + { + "name": "Gp0127626_Filtered Stats", + "description": "Filtered Stats for Gp0127626", + "data_object_type": "QC Statistics", + "url": "https://data.microbiomedata.org/data/nmdc:mga04xnj45/qa/nmdc_mga04xnj45_filterStats.txt", + "md5_checksum": "9089d07fdee5ed03e901c1656206af02", + "id": "nmdc:9089d07fdee5ed03e901c1656206af02", + "file_size_bytes": 287 + }, + { + "name": "Gp0127626_Gottcha2 TSV report", + "description": "Gottcha2 TSV report for Gp0127626", + "url": "https://data.microbiomedata.org/data/nmdc:mga04xnj45/ReadbasedAnalysis/nmdc_mga04xnj45_gottcha2_report.tsv", + "md5_checksum": "a91f8dccb2baa53550216f5bdfbf1473", + "id": "nmdc:a91f8dccb2baa53550216f5bdfbf1473", + "file_size_bytes": 2399 + }, + { + "name": "Gp0127626_Gottcha2 full TSV report", + "description": "Gottcha2 full TSV report for Gp0127626", + "url": "https://data.microbiomedata.org/data/nmdc:mga04xnj45/ReadbasedAnalysis/nmdc_mga04xnj45_gottcha2_report_full.tsv", + "md5_checksum": "a81ddf4e3bc044e8601554117cd887aa", + "id": "nmdc:a81ddf4e3bc044e8601554117cd887aa", + "file_size_bytes": 743066 + }, + { + "name": "Gp0127626_Gottcha2 Krona HTML report", + "description": "Gottcha2 Krona HTML report for Gp0127626", + "data_object_type": "GOTTCHA2 Krona Plot", + "url": "https://data.microbiomedata.org/data/nmdc:mga04xnj45/ReadbasedAnalysis/nmdc_mga04xnj45_gottcha2_krona.html", + "md5_checksum": "a012dc3a7b44774019c313fd8ee88efc", + "id": "nmdc:a012dc3a7b44774019c313fd8ee88efc", + "file_size_bytes": 233970 + }, + { + "name": "Gp0127626_Centrifuge classification TSV report", + "description": "Centrifuge classification TSV report for Gp0127626", + "data_object_type": "Centrifuge Taxonomic Classification", + "url": "https://data.microbiomedata.org/data/nmdc:mga04xnj45/ReadbasedAnalysis/nmdc_mga04xnj45_centrifuge_classification.tsv", + "md5_checksum": "dd4023a1488bdfc73b12c422b62b274a", + "id": "nmdc:dd4023a1488bdfc73b12c422b62b274a", + "file_size_bytes": 1673697764 + }, + { + "name": "Gp0127626_Centrifuge TSV report", + "description": "Centrifuge TSV report for Gp0127626", + "data_object_type": "Centrifuge Classification Report", + "url": "https://data.microbiomedata.org/data/nmdc:mga04xnj45/ReadbasedAnalysis/nmdc_mga04xnj45_centrifuge_report.tsv", + "md5_checksum": "2f9b1c55d52cc61affbe99f5163b48c8", + "id": "nmdc:2f9b1c55d52cc61affbe99f5163b48c8", + "file_size_bytes": 253730 + }, + { + "name": "Gp0127626_Centrifuge Krona HTML report", + "description": "Centrifuge Krona HTML report for Gp0127626", + "data_object_type": "Centrifuge Krona Plot", + "url": "https://data.microbiomedata.org/data/nmdc:mga04xnj45/ReadbasedAnalysis/nmdc_mga04xnj45_centrifuge_krona.html", + "md5_checksum": "ccf7f447a25ebf354ce44b3f1f90f223", + "id": "nmdc:ccf7f447a25ebf354ce44b3f1f90f223", + "file_size_bytes": 2327521 + }, + { + "name": "Gp0127626_Kraken classification TSV report", + "description": "Kraken classification TSV report for Gp0127626", + "data_object_type": "Kraken2 Taxonomic Classification", + "url": "https://data.microbiomedata.org/data/nmdc:mga04xnj45/ReadbasedAnalysis/nmdc_mga04xnj45_kraken2_classification.tsv", + "md5_checksum": "2c8efdb77cbcd1276c4fb386fd37bd6d", + "id": "nmdc:2c8efdb77cbcd1276c4fb386fd37bd6d", + "file_size_bytes": 1343921825 + }, + { + "name": "Gp0127626_Kraken2 TSV report", + "description": "Kraken2 TSV report for Gp0127626", + "data_object_type": "Kraken2 Classification Report", + "url": "https://data.microbiomedata.org/data/nmdc:mga04xnj45/ReadbasedAnalysis/nmdc_mga04xnj45_kraken2_report.tsv", + "md5_checksum": "806b27f1fa5a423100b113bb56edc708", + "id": "nmdc:806b27f1fa5a423100b113bb56edc708", + "file_size_bytes": 638478 + }, + { + "name": "Gp0127626_Kraken2 Krona HTML report", + "description": "Kraken2 Krona HTML report for Gp0127626", + "data_object_type": "Kraken2 Krona Plot", + "url": "https://data.microbiomedata.org/data/nmdc:mga04xnj45/ReadbasedAnalysis/nmdc_mga04xnj45_kraken2_krona.html", + "md5_checksum": "bb3e6793c4f036b9756f075d41846964", + "id": "nmdc:bb3e6793c4f036b9756f075d41846964", + "file_size_bytes": 3987411 + }, + { + "name": "Gp0127626_Assembled contigs fasta", + "description": "Assembled contigs fasta for Gp0127626", + "data_object_type": "Assembly Contigs", + "url": "https://data.microbiomedata.org/data/nmdc:mga04xnj45/assembly/nmdc_mga04xnj45_contigs.fna", + "md5_checksum": "6d72d9fb6a282f8872cd3d5b8ce1a29d", + "id": "nmdc:6d72d9fb6a282f8872cd3d5b8ce1a29d", + "file_size_bytes": 47315336 + }, + { + "name": "Gp0127626_Assembled scaffold fasta", + "description": "Assembled scaffold fasta for Gp0127626", + "data_object_type": "Assembly Scaffolds", + "url": "https://data.microbiomedata.org/data/nmdc:mga04xnj45/assembly/nmdc_mga04xnj45_scaffolds.fna", + "md5_checksum": "2d89ade1cc6267bb77b48daa176442f2", + "id": "nmdc:2d89ade1cc6267bb77b48daa176442f2", + "file_size_bytes": 46998743 + }, + { + "name": "Gp0127626_Metagenome Contig Coverage Stats", + "description": "Metagenome Contig Coverage Stats for Gp0127626", + "url": "https://data.microbiomedata.org/data/nmdc:mga04xnj45/assembly/nmdc_mga04xnj45_covstats.txt", + "md5_checksum": "79588f527e08eace069ddc63171f004c", + "id": "nmdc:79588f527e08eace069ddc63171f004c", + "file_size_bytes": 8270233 + }, + { + "name": "Gp0127626_Assembled AGP file", + "description": "Assembled AGP file for Gp0127626", + "data_object_type": "Assembly AGP", + "url": "https://data.microbiomedata.org/data/nmdc:mga04xnj45/assembly/nmdc_mga04xnj45_assembly.agp", + "md5_checksum": "cc855d3c15387d078c6919d1b19f8c05", + "id": "nmdc:cc855d3c15387d078c6919d1b19f8c05", + "file_size_bytes": 7690333 + }, + { + "name": "Gp0127626_Metagenome Alignment BAM file", + "description": "Metagenome Alignment BAM file for Gp0127626", + "data_object_type": "Assembly Coverage BAM", + "url": "https://data.microbiomedata.org/data/nmdc:mga04xnj45/assembly/nmdc_mga04xnj45_pairedMapped_sorted.bam", + "md5_checksum": "ef722a8ecd2b85d9202560df41eca7ed", + "id": "nmdc:ef722a8ecd2b85d9202560df41eca7ed", + "file_size_bytes": 2083099081 + }, + { + "name": "Gp0127626_Protein FAA", + "description": "Protein FAA for Gp0127626", + "data_object_type": "Annotation Amino Acid FASTA", + "url": "https://data.microbiomedata.org/data/nmdc:mga04xnj45/annotation/nmdc_mga04xnj45_proteins.faa", + "md5_checksum": "26360324fcaed21fd48b54972cce09cb", + "id": "nmdc:26360324fcaed21fd48b54972cce09cb", + "file_size_bytes": 28150597 + }, + { + "name": "Gp0127626_Structural annotation GFF file", + "description": "Structural annotation GFF file for Gp0127626", + "data_object_type": "Structural Annotation GFF", + "url": "https://data.microbiomedata.org/data/nmdc:mga04xnj45/annotation/nmdc_mga04xnj45_structural_annotation.gff", + "md5_checksum": "d2be135e631726360cf6ac23a3d56629", + "id": "nmdc:d2be135e631726360cf6ac23a3d56629", + "file_size_bytes": 2511 + }, + { + "name": "Gp0127626_Functional annotation GFF file", + "description": "Functional annotation GFF file for Gp0127626", + "data_object_type": "Functional Annotation GFF", + "url": "https://data.microbiomedata.org/data/nmdc:mga04xnj45/annotation/nmdc_mga04xnj45_functional_annotation.gff", + "md5_checksum": "b2fdf525bc1ddadb30427cba91c63483", + "id": "nmdc:b2fdf525bc1ddadb30427cba91c63483", + "file_size_bytes": 33351979 + }, + { + "name": "Gp0127626_KO TSV file", + "description": "KO TSV file for Gp0127626", + "data_object_type": "Annotation KEGG Orthology", + "url": "https://data.microbiomedata.org/data/nmdc:mga04xnj45/annotation/nmdc_mga04xnj45_ko.tsv", + "md5_checksum": "75ff61c1b51ace76d6e01930ae41c38c", + "id": "nmdc:75ff61c1b51ace76d6e01930ae41c38c", + "file_size_bytes": 3842650 + }, + { + "name": "Gp0127626_EC TSV file", + "description": "EC TSV file for Gp0127626", + "data_object_type": "Annotation Enzyme Commission", + "url": "https://data.microbiomedata.org/data/nmdc:mga04xnj45/annotation/nmdc_mga04xnj45_ec.tsv", + "md5_checksum": "4210daa7b1b0b84a6e5b6591e4e93c55", + "id": "nmdc:4210daa7b1b0b84a6e5b6591e4e93c55", + "file_size_bytes": 2561980 + }, + { + "name": "Gp0127626_COG GFF file", + "description": "COG GFF file for Gp0127626", + "url": "https://data.microbiomedata.org/data/nmdc:mga04xnj45/annotation/nmdc_mga04xnj45_cog.gff", + "md5_checksum": "cfd7a714b2e18f136d6dc48b9162e1c0", + "id": "nmdc:cfd7a714b2e18f136d6dc48b9162e1c0", + "file_size_bytes": 19108716 + }, + { + "name": "Gp0127626_PFAM GFF file", + "description": "PFAM GFF file for Gp0127626", + "url": "https://data.microbiomedata.org/data/nmdc:mga04xnj45/annotation/nmdc_mga04xnj45_pfam.gff", + "md5_checksum": "80a1ed51631f5fbc43032aa4afbfbf1d", + "id": "nmdc:80a1ed51631f5fbc43032aa4afbfbf1d", + "file_size_bytes": 13800768 + }, + { + "name": "Gp0127626_TigrFam GFF file", + "description": "TigrFam GFF file for Gp0127626", + "url": "https://data.microbiomedata.org/data/nmdc:mga04xnj45/annotation/nmdc_mga04xnj45_tigrfam.gff", + "md5_checksum": "52e64eec8c715affde1612b871e2490e", + "id": "nmdc:52e64eec8c715affde1612b871e2490e", + "file_size_bytes": 1446190 + }, + { + "name": "Gp0127626_SMART GFF file", + "description": "SMART GFF file for Gp0127626", + "url": "https://data.microbiomedata.org/data/nmdc:mga04xnj45/annotation/nmdc_mga04xnj45_smart.gff", + "md5_checksum": "4687e89ae41c98bc49ca81ded0b4c622", + "id": "nmdc:4687e89ae41c98bc49ca81ded0b4c622", + "file_size_bytes": 4252918 + }, + { + "name": "Gp0127626_SuperFam GFF file", + "description": "SuperFam GFF file for Gp0127626", + "url": "https://data.microbiomedata.org/data/nmdc:mga04xnj45/annotation/nmdc_mga04xnj45_supfam.gff", + "md5_checksum": "8cc7e6c8e232891c3ac7d952302905b6", + "id": "nmdc:8cc7e6c8e232891c3ac7d952302905b6", + "file_size_bytes": 24007157 + }, + { + "name": "Gp0127626_Cath FunFam GFF file", + "description": "Cath FunFam GFF file for Gp0127626", + "url": "https://data.microbiomedata.org/data/nmdc:mga04xnj45/annotation/nmdc_mga04xnj45_cath_funfam.gff", + "md5_checksum": "445ce659140104b37475c5c2e3fb7761", + "id": "nmdc:445ce659140104b37475c5c2e3fb7761", + "file_size_bytes": 17990080 + }, + { + "name": "Gp0127626_KO_EC GFF file", + "description": "KO_EC GFF file for Gp0127626", + "url": "https://data.microbiomedata.org/data/nmdc:mga04xnj45/annotation/nmdc_mga04xnj45_ko_ec.gff", + "md5_checksum": "32e15eb7eab763990dbb0ce947321718", + "id": "nmdc:32e15eb7eab763990dbb0ce947321718", + "file_size_bytes": 12235401 + }, + { + "name": "Gp0127626_metabat2 bin checkm quality assessment result", + "description": "metabat2 bin checkm quality assessment result for Gp0127626", + "data_object_type": "CheckM Statistics", + "url": "https://data.microbiomedata.org/data/nmdc:mga04xnj45/MAGs/nmdc_mga04xnj45_checkm_qa.out", + "md5_checksum": "66dea8d60f61c7a150ae4cbc3ce88757", + "id": "nmdc:66dea8d60f61c7a150ae4cbc3ce88757", + "file_size_bytes": 765 + }, + { + "name": "Gp0127626_high-quality and medium-quality bins", + "description": "high-quality and medium-quality bins for Gp0127626", + "data_object_type": "Metagenome Bins", + "url": "https://data.microbiomedata.org/data/nmdc:mga04xnj45/MAGs/nmdc_mga04xnj45_hqmq_bin.zip", + "md5_checksum": "1b0f148dc6a3a6a007482d1b03fe7e6a", + "id": "nmdc:1b0f148dc6a3a6a007482d1b03fe7e6a", + "file_size_bytes": 520239 + }, + { + "_id": { + "$oid": "649b003d1ae706d7b5b14e51" + }, + "description": "Metagenome Contig Coverage Stats for gold:Gp0127626", + "url": "https://data.microbiomedata.org/data/1781_100328/assembly/mapping_stats.txt", + "file_size_bytes": 7848645, + "type": "nmdc:DataObject", + "id": "nmdc:f12072d88720efdfb5cecb913d4a595f", + "name": "mapping_stats.txt", + "data_object_type": "Assembly Coverage Stats" + }, + { + "_id": { + "$oid": "649b003d1ae706d7b5b14e53" + }, + "description": "Assembled contigs fasta for gold:Gp0127626", + "url": "https://data.microbiomedata.org/data/1781_100328/assembly/assembly_contigs.fna", + "file_size_bytes": 46893748, + "type": "nmdc:DataObject", + "id": "nmdc:59e99f35194f3f98fa07d401dddd4959", + "name": "assembly_contigs.fna", + "data_object_type": "Assembly Contigs" + }, + { + "_id": { + "$oid": "649b003d1ae706d7b5b14e54" + }, + "description": "Assembled scaffold fasta for gold:Gp0127626", + "url": "https://data.microbiomedata.org/data/1781_100328/assembly/assembly_scaffolds.fna", + "file_size_bytes": 46577279, + "type": "nmdc:DataObject", + "id": "nmdc:8856365e5fa1681e630bca38b7376fd1", + "name": "assembly_scaffolds.fna", + "data_object_type": "Assembly Scaffolds" + }, + { + "_id": { + "$oid": "649b003d1ae706d7b5b14e56" + }, + "description": "Assembled AGP file for gold:Gp0127626", + "url": "https://data.microbiomedata.org/data/1781_100328/assembly/assembly.agp", + "file_size_bytes": 6846909, + "type": "nmdc:DataObject", + "id": "nmdc:f3ab16f91b806aff91f36167bc832f4a", + "name": "assembly.agp", + "data_object_type": "Assembly AGP" + }, + { + "_id": { + "$oid": "649b003d1ae706d7b5b14e57" + }, + "description": "Metagenome Alignment BAM file for gold:Gp0127626", + "url": "https://data.microbiomedata.org/data/1781_100328/assembly/pairedMapped_sorted.bam", + "file_size_bytes": 2056447451, + "type": "nmdc:DataObject", + "id": "nmdc:353e83f4603072d1fe5d15f4c193397f", + "name": "pairedMapped_sorted.bam", + "data_object_type": "Assembly Coverage BAM" + }, + { + "_id": { + "$oid": "649b003d1ae706d7b5b15b00" + }, + "id": "nmdc:3441b097a56424b593c10323e71636f7", + "name": "1781_100328.json", + "description": "Gold:Gp0127626 ReadbasedAnalysis result JSON file", + "url": "https://data.microbiomedata.org/1781_100328/ReadbasedAnalysis/1781_100328.json", + "file_size_bytes": 3385, + "type": "nmdc:DataObject" + }, + { + "_id": { + "$oid": "649b003d1ae706d7b5b15b08" + }, + "id": "nmdc:04f3b5daa5e47ce69c5c95dce5507f61", + "name": "1781_100328.krona.html", + "description": "Gold:Gp0127626 KRONA plot HTML file", + "url": "https://data.microbiomedata.org/1781_100328/ReadbasedAnalysis/centrifuge/1781_100328.krona.html", + "file_size_bytes": 3385, + "data_object_type": "Centrifuge Krona Plot", + "type": "nmdc:DataObject" + }, + { + "_id": { + "$oid": "649b003f1ae706d7b5b165c6" + }, + "id": "nmdc:4f708453d292f67572466be1e73f5e63", + "name": "bins.unbinned.fa", + "description": "unbinned fasta file from metabat2 for gold:Gp0127626", + "file_size_bytes": 4192133, + "url": "https://data.microbiomedata.org/data/1781_100328/img_MAGs/bins.unbinned.fa", + "type": "nmdc:DataObject" + }, + { + "_id": { + "$oid": "649b003f1ae706d7b5b165c9" + }, + "id": "nmdc:6ac7d20ce76667dbe7737db5074574c5", + "name": "bins.tooShort.fa", + "description": "tooShort (< 3kb) filtered contigs fasta file by metaBat2 for gold:Gp0127626", + "file_size_bytes": 40372419, + "url": "https://data.microbiomedata.org/data/1781_100328/img_MAGs/bins.tooShort.fa", + "type": "nmdc:DataObject" + }, + { + "_id": { + "$oid": "649b003f1ae706d7b5b165cc" + }, + "id": "nmdc:a38cd7c53173358f551112e22bffa7b3", + "name": "checkm_qa.out", + "description": "metabat2 bin checkm quality assessment result for gold:Gp0127626", + "file_size_bytes": 1224, + "url": "https://data.microbiomedata.org/data/1781_100328/img_MAGs/checkm_qa.out", + "type": "nmdc:DataObject", + "data_object_type": "CheckM Statistics" + }, + { + "_id": { + "$oid": "649b003f1ae706d7b5b165cd" + }, + "id": "nmdc:02a1905f0e4d6c3106d2d43932ad44d7", + "name": "gold:Gp0127626.bins.3.fa", + "description": "metabat2 binned contig file for gold:Gp0127626", + "file_size_bytes": 232694, + "url": "https://data.microbiomedata.org/data/1781_100328/img_MAGs/metabat-bins/bins.3.fa", + "type": "nmdc:DataObject", + "data_object_type": "Metagenome Bins" + }, + { + "_id": { + "$oid": "649b003f1ae706d7b5b165ce" + }, + "id": "nmdc:4473bba41dba6e11ed96dff91fd1b9e4", + "name": "gold:Gp0127626.bins.4.fa", + "description": "metabat2 binned contig file for gold:Gp0127626", + "file_size_bytes": 305463, + "url": "https://data.microbiomedata.org/data/1781_100328/img_MAGs/metabat-bins/bins.4.fa", + "type": "nmdc:DataObject", + "data_object_type": "Metagenome Bins" + }, + { + "_id": { + "$oid": "649b003f1ae706d7b5b165cf" + }, + "id": "nmdc:d0941c29cff73ecfa187a4e38108efab", + "name": "gold:Gp0127626.bins.2.fa", + "description": "metabat2 binned contig file for gold:Gp0127626", + "file_size_bytes": 437942, + "url": "https://data.microbiomedata.org/data/1781_100328/img_MAGs/metabat-bins/bins.2.fa", + "type": "nmdc:DataObject", + "data_object_type": "Metagenome Bins" + }, + { + "_id": { + "$oid": "649b003f1ae706d7b5b165d1" + }, + "id": "nmdc:98c71d88192e9665597c98b72266ae0f", + "name": "gold:Gp0127626.bins.1.fa", + "description": "metabat2 binned contig file for gold:Gp0127626", + "file_size_bytes": 515703, + "url": "https://data.microbiomedata.org/data/1781_100328/img_MAGs/metabat-bins/bins.1.fa", + "type": "nmdc:DataObject", + "data_object_type": "Metagenome Bins" + }, + { + "_id": { + "$oid": "649b003f1ae706d7b5b16ccb" + }, + "description": "EC TSV File for gold:Gp0127626", + "url": "https://data.microbiomedata.org/1781_100328/img_annotation/Ga0482244_ec.tsv", + "md5_checksum": "dde9a2b70a0552a8d6f7cda7f4862aa9", + "file_size_bytes": 3385, + "id": "nmdc:dde9a2b70a0552a8d6f7cda7f4862aa9", + "name": "gold:Gp0127626_EC TSV File", + "data_object_type": "Annotation Enzyme Commission", + "type": "nmdc:DataObject" + }, + { + "_id": { + "$oid": "649b003f1ae706d7b5b16ccc" + }, + "description": "KO TSV File for gold:Gp0127626", + "url": "https://data.microbiomedata.org/1781_100328/img_annotation/Ga0482244_ko.tsv", + "md5_checksum": "1f45d481e2882a15e7d060e47cbbfda3", + "file_size_bytes": 3385, + "id": "nmdc:1f45d481e2882a15e7d060e47cbbfda3", + "name": "gold:Gp0127626_KO TSV File", + "data_object_type": "Annotation KEGG Orthology", + "type": "nmdc:DataObject" + }, + { + "_id": { + "$oid": "649b003f1ae706d7b5b16ccf" + }, + "description": "Functional annotation GFF file for gold:Gp0127626", + "url": "https://data.microbiomedata.org/1781_100328/img_annotation/Ga0482244_functional_annotation.gff", + "md5_checksum": "8e19f17a8fd0747410b68d804b87139d", + "file_size_bytes": 3385, + "id": "nmdc:8e19f17a8fd0747410b68d804b87139d", + "name": "gold:Gp0127626_Functional annotation GFF file", + "data_object_type": "Functional Annotation GFF", + "type": "nmdc:DataObject" + }, + { + "_id": { + "$oid": "649b003f1ae706d7b5b16cd0" + }, + "description": "Structural annotation GFF file for gold:Gp0127626", + "url": "https://data.microbiomedata.org/1781_100328/img_annotation/Ga0482244_structural_annotation.gff", + "md5_checksum": "f1b6a4b001b67ec72eb5b5411e1321c9", + "file_size_bytes": 3385, + "id": "nmdc:f1b6a4b001b67ec72eb5b5411e1321c9", + "name": "gold:Gp0127626_Structural annotation GFF file", + "data_object_type": "Structural Annotation GFF", + "type": "nmdc:DataObject" + }, + { + "_id": { + "$oid": "649b003f1ae706d7b5b16cd2" + }, + "description": "Protein FAA for gold:Gp0127626", + "url": "https://data.microbiomedata.org/1781_100328/img_annotation/Ga0482244_proteins.faa", + "md5_checksum": "11741b35b589852f2b652d1f73afb663", + "file_size_bytes": 3385, + "id": "nmdc:11741b35b589852f2b652d1f73afb663", + "name": "gold:Gp0127626_Protein FAA", + "data_object_type": "Annotation Amino Acid FASTA", + "type": "nmdc:DataObject" + } + ], + "mags_activity_set": [ + { + "_id": { + "$oid": "649b0052ec087f6bbab34712" + }, + "has_input": [ + "nmdc:6d72d9fb6a282f8872cd3d5b8ce1a29d", + "nmdc:ef722a8ecd2b85d9202560df41eca7ed", + "nmdc:b2fdf525bc1ddadb30427cba91c63483" + ], + "too_short_contig_num": 102702, + "part_of": [ + "nmdc:mga04xnj45" + ], + "binned_contig_num": 230, + "git_url": "https://github.com/microbiomedata/metaG/releases/tag/0.1", + "has_output": [ + "nmdc:66dea8d60f61c7a150ae4cbc3ce88757", + "nmdc:1b0f148dc6a3a6a007482d1b03fe7e6a" + ], + "was_informed_by": "gold:Gp0127626", + "input_contig_num": 105397, + "id": "nmdc:0ef6e04f4cefbcd9e2d3d2a7717baa3e", + "execution_resource": "NERSC-Cori", + "name": "MAGs Analysis Activity for nmdc:mga04xnj45", + "mags_list": [ + { + "number_of_contig": 230, + "completeness": 81.4, + "bin_name": "bins.1", + "gene_count": 2055, + "bin_quality": "MQ", + "gtdbtk_species": "", + "gtdbtk_order": "Nitrososphaerales", + "num_16s": 0, + "gtdbtk_family": "Nitrososphaeraceae", + "gtdbtk_domain": "Archaea", + "contamination": 2.43, + "gtdbtk_class": "Nitrososphaeria", + "gtdbtk_phylum": "Crenarchaeota", + "num_5s": 1, + "num_23s": 0, + "gtdbtk_genus": "", + "num_t_rna": 37 + } + ], + "unbinned_contig_num": 2465, + "started_at_time": "2021-12-01T21:31:29Z", + "type": "nmdc:MAGsAnalysisActivity", + "ended_at_time": "2021-12-02T20:54:56+00:00" + } + ], + "metagenome_annotation_activity_set": [ + { + "_id": { + "$oid": "649b005bbf2caae0415ef9b1" + }, + "has_input": [ + "nmdc:6d72d9fb6a282f8872cd3d5b8ce1a29d" + ], + "part_of": [ + "nmdc:mga04xnj45" + ], + "git_url": "https://github.com/microbiomedata/metaG/releases/tag/0.1", + "has_output": [ + "nmdc:26360324fcaed21fd48b54972cce09cb", + "nmdc:d2be135e631726360cf6ac23a3d56629", + "nmdc:b2fdf525bc1ddadb30427cba91c63483", + "nmdc:75ff61c1b51ace76d6e01930ae41c38c", + "nmdc:4210daa7b1b0b84a6e5b6591e4e93c55", + "nmdc:cfd7a714b2e18f136d6dc48b9162e1c0", + "nmdc:80a1ed51631f5fbc43032aa4afbfbf1d", + "nmdc:52e64eec8c715affde1612b871e2490e", + "nmdc:4687e89ae41c98bc49ca81ded0b4c622", + "nmdc:8cc7e6c8e232891c3ac7d952302905b6", + "nmdc:445ce659140104b37475c5c2e3fb7761", + "nmdc:32e15eb7eab763990dbb0ce947321718" + ], + "was_informed_by": "gold:Gp0127626", + "id": "nmdc:0ef6e04f4cefbcd9e2d3d2a7717baa3e", + "execution_resource": "NERSC-Cori", + "name": "Annotation Activity for nmdc:mga04xnj45", + "started_at_time": "2021-12-01T21:31:29Z", + "type": "nmdc:MetagenomeAnnotationActivity", + "ended_at_time": "2021-12-02T20:54:56+00:00" + } + ], + "metagenome_assembly_set": [ + { + "_id": { + "$oid": "649b005f2ca5ee4adb139f9e" + }, + "has_input": [ + "nmdc:07499ad2f2b80f42bd7109732b1eef90" + ], + "part_of": [ + "nmdc:mga04xnj45" + ], + "ctg_logsum": 63429, + "scaf_logsum": 63657, + "gap_pct": 0.00092, + "git_url": "https://github.com/microbiomedata/metaG/releases/tag/0.1", + "has_output": [ + "nmdc:6d72d9fb6a282f8872cd3d5b8ce1a29d", + "nmdc:2d89ade1cc6267bb77b48daa176442f2", + "nmdc:79588f527e08eace069ddc63171f004c", + "nmdc:cc855d3c15387d078c6919d1b19f8c05", + "nmdc:ef722a8ecd2b85d9202560df41eca7ed" + ], + "asm_score": 7.629, + "was_informed_by": "gold:Gp0127626", + "ctg_powsum": 7359.443, + "scaf_max": 30685, + "id": "nmdc:0ef6e04f4cefbcd9e2d3d2a7717baa3e", + "scaf_powsum": 7386.413, + "execution_resource": "NERSC-Cori", + "contigs": 105397, + "name": "Assembly Activity for nmdc:mga04xnj45", + "ctg_max": 30685, + "gc_std": 0.09232, + "gc_avg": 0.60819, + "contig_bp": 43390261, + "started_at_time": "2021-12-01T21:31:29Z", + "scaf_bp": 43390661, + "type": "nmdc:MetagenomeAssembly", + "scaffolds": 105366, + "ended_at_time": "2021-12-02T20:54:56+00:00", + "ctg_l50": 368, + "ctg_l90": 284, + "ctg_n50": 34766, + "ctg_n90": 91597, + "scaf_l50": 368, + "scaf_l90": 284, + "scaf_n50": 34749, + "scaf_n90": 91567 + } + ], + "omics_processing_set": [ + { + "_id": { + "$oid": "649b009773e8249959349b3c" + }, + "id": "nmdc:omprc-11-s6wqag22", + "name": "Riverbed sediment microbial communities from areas with no vegetation in Columbia River, Washington, USA - GW-RW N3_40_50", + "description": "Riverbed sediment samples were collected from an area with no vegetation in a hyporheic zone (subsurface groundwater - surface water mixing zone)", + "has_input": [ + "nmdc:bsm-11-mxdygh62" + ], + "has_output": [ + "jgi:574fde7b7ded5e3df1ee1418" + ], + "part_of": [ + "nmdc:sty-11-aygzgv51" + ], + "add_date": "2016-01-11", + "mod_date": "2021-06-15", + "ncbi_project_name": "Riverbed sediment microbial communities from areas with no vegetation in Columbia River, Washington, USA - GW-RW N3_40_50", + "omics_type": { + "has_raw_value": "Metagenome" + }, + "principal_investigator": { + "has_raw_value": "James Stegen" + }, + "processing_institution": "JGI", + "type": "nmdc:OmicsProcessing", + "gold_sequencing_project_identifiers": [ + "gold:Gp0127626" + ] + } + ], + "read_qc_analysis_activity_set": [ + { + "_id": { + "$oid": "649b009d6bdd4fd20273c86c" + }, + "has_input": [ + "nmdc:8bee270fc5b3a39f7e7609b60e191766" + ], + "part_of": [ + "nmdc:mga04xnj45" + ], + "git_url": "https://github.com/microbiomedata/metaG/releases/tag/0.1", + "has_output": [ + "nmdc:07499ad2f2b80f42bd7109732b1eef90", + "nmdc:9089d07fdee5ed03e901c1656206af02" + ], + "was_informed_by": "gold:Gp0127626", + "input_read_count": 24223170, + "output_read_bases": 3405205631, + "id": "nmdc:0ef6e04f4cefbcd9e2d3d2a7717baa3e", + "execution_resource": "NERSC-Cori", + "input_read_bases": 3657698670, + "name": "Read QC Activity for nmdc:mga04xnj45", + "output_read_count": 22768968, + "started_at_time": "2021-12-01T21:31:29Z", + "type": "nmdc:ReadQCAnalysisActivity", + "ended_at_time": "2021-12-02T20:54:56+00:00" + } + ], + "read_based_taxonomy_analysis_activity_set": [ + { + "_id": { + "$oid": "649b009bff710ae353f8cf30" + }, + "has_input": [ + "nmdc:07499ad2f2b80f42bd7109732b1eef90" + ], + "git_url": "https://github.com/microbiomedata/metaG/releases/tag/0.1", + "has_output": [ + "nmdc:a91f8dccb2baa53550216f5bdfbf1473", + "nmdc:a81ddf4e3bc044e8601554117cd887aa", + "nmdc:a012dc3a7b44774019c313fd8ee88efc", + "nmdc:dd4023a1488bdfc73b12c422b62b274a", + "nmdc:2f9b1c55d52cc61affbe99f5163b48c8", + "nmdc:ccf7f447a25ebf354ce44b3f1f90f223", + "nmdc:2c8efdb77cbcd1276c4fb386fd37bd6d", + "nmdc:806b27f1fa5a423100b113bb56edc708", + "nmdc:bb3e6793c4f036b9756f075d41846964" + ], + "was_informed_by": "gold:Gp0127626", + "id": "nmdc:0ef6e04f4cefbcd9e2d3d2a7717baa3e", + "execution_resource": "NERSC-Cori", + "name": "ReadBased Analysis Activity for nmdc:mga04xnj45", + "started_at_time": "2021-12-01T21:31:29Z", + "type": "nmdc:ReadbasedAnalysis", + "ended_at_time": "2021-12-02T20:54:56+00:00" + } + ] + }, + { + "data_object_set": [ + { + "description": "Raw sequencer read data", + "file_size_bytes": 2150489977, + "type": "nmdc:DataObject", + "id": "jgi:574fde577ded5e3df1ee13fc", + "name": "10533.1.165310.ATAGCGG-ACCGCTA.fastq.gz" + }, + { + "name": "Gp0127624_Filtered Reads", + "description": "Filtered Reads for Gp0127624", + "data_object_type": "Filtered Sequencing Reads", + "url": "https://data.microbiomedata.org/data/nmdc:mga0e8jh10/qa/nmdc_mga0e8jh10_filtered.fastq.gz", + "md5_checksum": "8585f6896702bddf64b02191be5921f4", + "id": "nmdc:8585f6896702bddf64b02191be5921f4", + "file_size_bytes": 1795382596 + }, + { + "name": "Gp0127624_Filtered Stats", + "description": "Filtered Stats for Gp0127624", + "data_object_type": "QC Statistics", + "url": "https://data.microbiomedata.org/data/nmdc:mga0e8jh10/qa/nmdc_mga0e8jh10_filterStats.txt", + "md5_checksum": "b9b6464ecc746a4cc39b549696c5fe9c", + "id": "nmdc:b9b6464ecc746a4cc39b549696c5fe9c", + "file_size_bytes": 289 + }, + { + "name": "Gp0127624_Gottcha2 TSV report", + "description": "Gottcha2 TSV report for Gp0127624", + "url": "https://data.microbiomedata.org/data/nmdc:mga0e8jh10/ReadbasedAnalysis/nmdc_mga0e8jh10_gottcha2_report.tsv", + "md5_checksum": "fef871a81032dd1f3e57dc1c7d5aa3db", + "id": "nmdc:fef871a81032dd1f3e57dc1c7d5aa3db", + "file_size_bytes": 1500 + }, + { + "name": "Gp0127624_Gottcha2 full TSV report", + "description": "Gottcha2 full TSV report for Gp0127624", + "url": "https://data.microbiomedata.org/data/nmdc:mga0e8jh10/ReadbasedAnalysis/nmdc_mga0e8jh10_gottcha2_report_full.tsv", + "md5_checksum": "6c7fec765f2a225f168ebb1f69961013", + "id": "nmdc:6c7fec765f2a225f168ebb1f69961013", + "file_size_bytes": 692993 + }, + { + "name": "Gp0127624_Gottcha2 Krona HTML report", + "description": "Gottcha2 Krona HTML report for Gp0127624", + "data_object_type": "GOTTCHA2 Krona Plot", + "url": "https://data.microbiomedata.org/data/nmdc:mga0e8jh10/ReadbasedAnalysis/nmdc_mga0e8jh10_gottcha2_krona.html", + "md5_checksum": "6e660d5a062f9c3ad7b49d8d438453d7", + "id": "nmdc:6e660d5a062f9c3ad7b49d8d438453d7", + "file_size_bytes": 230779 + }, + { + "name": "Gp0127624_Centrifuge classification TSV report", + "description": "Centrifuge classification TSV report for Gp0127624", + "data_object_type": "Centrifuge Taxonomic Classification", + "url": "https://data.microbiomedata.org/data/nmdc:mga0e8jh10/ReadbasedAnalysis/nmdc_mga0e8jh10_centrifuge_classification.tsv", + "md5_checksum": "77db34862804280185d3b1ce961e5338", + "id": "nmdc:77db34862804280185d3b1ce961e5338", + "file_size_bytes": 1645928829 + }, + { + "name": "Gp0127624_Centrifuge TSV report", + "description": "Centrifuge TSV report for Gp0127624", + "data_object_type": "Centrifuge Classification Report", + "url": "https://data.microbiomedata.org/data/nmdc:mga0e8jh10/ReadbasedAnalysis/nmdc_mga0e8jh10_centrifuge_report.tsv", + "md5_checksum": "84e3efb84d961d189ece310911ccf475", + "id": "nmdc:84e3efb84d961d189ece310911ccf475", + "file_size_bytes": 254646 + }, + { + "name": "Gp0127624_Centrifuge Krona HTML report", + "description": "Centrifuge Krona HTML report for Gp0127624", + "data_object_type": "Centrifuge Krona Plot", + "url": "https://data.microbiomedata.org/data/nmdc:mga0e8jh10/ReadbasedAnalysis/nmdc_mga0e8jh10_centrifuge_krona.html", + "md5_checksum": "b8fd31679921f8b68c80917e14caa260", + "id": "nmdc:b8fd31679921f8b68c80917e14caa260", + "file_size_bytes": 2332082 + }, + { + "name": "Gp0127624_Kraken classification TSV report", + "description": "Kraken classification TSV report for Gp0127624", + "data_object_type": "Kraken2 Taxonomic Classification", + "url": "https://data.microbiomedata.org/data/nmdc:mga0e8jh10/ReadbasedAnalysis/nmdc_mga0e8jh10_kraken2_classification.tsv", + "md5_checksum": "715c66c69b621478da7d48481f3cbd1d", + "id": "nmdc:715c66c69b621478da7d48481f3cbd1d", + "file_size_bytes": 1316771556 + }, + { + "name": "Gp0127624_Kraken2 TSV report", + "description": "Kraken2 TSV report for Gp0127624", + "data_object_type": "Kraken2 Classification Report", + "url": "https://data.microbiomedata.org/data/nmdc:mga0e8jh10/ReadbasedAnalysis/nmdc_mga0e8jh10_kraken2_report.tsv", + "md5_checksum": "0781e8042688219035efafe7d75858d0", + "id": "nmdc:0781e8042688219035efafe7d75858d0", + "file_size_bytes": 626940 + }, + { + "name": "Gp0127624_Kraken2 Krona HTML report", + "description": "Kraken2 Krona HTML report for Gp0127624", + "data_object_type": "Kraken2 Krona Plot", + "url": "https://data.microbiomedata.org/data/nmdc:mga0e8jh10/ReadbasedAnalysis/nmdc_mga0e8jh10_kraken2_krona.html", + "md5_checksum": "85547ab860ef9d6877ba7abc8881740a", + "id": "nmdc:85547ab860ef9d6877ba7abc8881740a", + "file_size_bytes": 3921891 + }, + { + "name": "Gp0127624_Assembled contigs fasta", + "description": "Assembled contigs fasta for Gp0127624", + "data_object_type": "Assembly Contigs", + "url": "https://data.microbiomedata.org/data/nmdc:mga0e8jh10/assembly/nmdc_mga0e8jh10_contigs.fna", + "md5_checksum": "464a9db7a94e7e0646b1ff8b501d82f3", + "id": "nmdc:464a9db7a94e7e0646b1ff8b501d82f3", + "file_size_bytes": 95468011 + }, + { + "name": "Gp0127624_Assembled scaffold fasta", + "description": "Assembled scaffold fasta for Gp0127624", + "data_object_type": "Assembly Scaffolds", + "url": "https://data.microbiomedata.org/data/nmdc:mga0e8jh10/assembly/nmdc_mga0e8jh10_scaffolds.fna", + "md5_checksum": "0a50f88775f36e9238152f3319252853", + "id": "nmdc:0a50f88775f36e9238152f3319252853", + "file_size_bytes": 94893921 + }, + { + "name": "Gp0127624_Metagenome Contig Coverage Stats", + "description": "Metagenome Contig Coverage Stats for Gp0127624", + "url": "https://data.microbiomedata.org/data/nmdc:mga0e8jh10/assembly/nmdc_mga0e8jh10_covstats.txt", + "md5_checksum": "f0dc2f598fa06efbe99843bddaf54f60", + "id": "nmdc:f0dc2f598fa06efbe99843bddaf54f60", + "file_size_bytes": 15112642 + }, + { + "name": "Gp0127624_Assembled AGP file", + "description": "Assembled AGP file for Gp0127624", + "data_object_type": "Assembly AGP", + "url": "https://data.microbiomedata.org/data/nmdc:mga0e8jh10/assembly/nmdc_mga0e8jh10_assembly.agp", + "md5_checksum": "a4405d49e8efe2ee124d25e2414de56c", + "id": "nmdc:a4405d49e8efe2ee124d25e2414de56c", + "file_size_bytes": 14126849 + }, + { + "name": "Gp0127624_Metagenome Alignment BAM file", + "description": "Metagenome Alignment BAM file for Gp0127624", + "data_object_type": "Assembly Coverage BAM", + "url": "https://data.microbiomedata.org/data/nmdc:mga0e8jh10/assembly/nmdc_mga0e8jh10_pairedMapped_sorted.bam", + "md5_checksum": "8c37ab0b3594cc975348041e4841f6ac", + "id": "nmdc:8c37ab0b3594cc975348041e4841f6ac", + "file_size_bytes": 1976821836 + }, + { + "name": "Gp0127624_Protein FAA", + "description": "Protein FAA for Gp0127624", + "data_object_type": "Annotation Amino Acid FASTA", + "url": "https://data.microbiomedata.org/data/nmdc:mga0e8jh10/annotation/nmdc_mga0e8jh10_proteins.faa", + "md5_checksum": "40d15cb24063dbb6097fd1626f62db95", + "id": "nmdc:40d15cb24063dbb6097fd1626f62db95", + "file_size_bytes": 55458746 + }, + { + "name": "Gp0127624_Structural annotation GFF file", + "description": "Structural annotation GFF file for Gp0127624", + "data_object_type": "Structural Annotation GFF", + "url": "https://data.microbiomedata.org/data/nmdc:mga0e8jh10/annotation/nmdc_mga0e8jh10_structural_annotation.gff", + "md5_checksum": "f70325438abce4c6f56e6c82619dd44a", + "id": "nmdc:f70325438abce4c6f56e6c82619dd44a", + "file_size_bytes": 2518 + }, + { + "name": "Gp0127624_Functional annotation GFF file", + "description": "Functional annotation GFF file for Gp0127624", + "data_object_type": "Functional Annotation GFF", + "url": "https://data.microbiomedata.org/data/nmdc:mga0e8jh10/annotation/nmdc_mga0e8jh10_functional_annotation.gff", + "md5_checksum": "c5cf33c1f2f68a7c63fef6dd623a97c0", + "id": "nmdc:c5cf33c1f2f68a7c63fef6dd623a97c0", + "file_size_bytes": 63778960 + }, + { + "name": "Gp0127624_KO TSV file", + "description": "KO TSV file for Gp0127624", + "data_object_type": "Annotation KEGG Orthology", + "url": "https://data.microbiomedata.org/data/nmdc:mga0e8jh10/annotation/nmdc_mga0e8jh10_ko.tsv", + "md5_checksum": "4aca66fe81c8c056fa5617c7aa77bc7d", + "id": "nmdc:4aca66fe81c8c056fa5617c7aa77bc7d", + "file_size_bytes": 7252005 + }, + { + "name": "Gp0127624_EC TSV file", + "description": "EC TSV file for Gp0127624", + "data_object_type": "Annotation Enzyme Commission", + "url": "https://data.microbiomedata.org/data/nmdc:mga0e8jh10/annotation/nmdc_mga0e8jh10_ec.tsv", + "md5_checksum": "303a5e88a0eae8942082e9e13f9f6eba", + "id": "nmdc:303a5e88a0eae8942082e9e13f9f6eba", + "file_size_bytes": 4835920 + }, + { + "name": "Gp0127624_COG GFF file", + "description": "COG GFF file for Gp0127624", + "url": "https://data.microbiomedata.org/data/nmdc:mga0e8jh10/annotation/nmdc_mga0e8jh10_cog.gff", + "md5_checksum": "d919f65e54a8351324e332a5daa6a831", + "id": "nmdc:d919f65e54a8351324e332a5daa6a831", + "file_size_bytes": 37494199 + }, + { + "name": "Gp0127624_PFAM GFF file", + "description": "PFAM GFF file for Gp0127624", + "url": "https://data.microbiomedata.org/data/nmdc:mga0e8jh10/annotation/nmdc_mga0e8jh10_pfam.gff", + "md5_checksum": "764c7c2b5554fc6b860b036cab22e0ef", + "id": "nmdc:764c7c2b5554fc6b860b036cab22e0ef", + "file_size_bytes": 27739105 + }, + { + "name": "Gp0127624_TigrFam GFF file", + "description": "TigrFam GFF file for Gp0127624", + "url": "https://data.microbiomedata.org/data/nmdc:mga0e8jh10/annotation/nmdc_mga0e8jh10_tigrfam.gff", + "md5_checksum": "d0a86560767836f901bdd2625bea46e3", + "id": "nmdc:d0a86560767836f901bdd2625bea46e3", + "file_size_bytes": 3077428 + }, + { + "name": "Gp0127624_SMART GFF file", + "description": "SMART GFF file for Gp0127624", + "url": "https://data.microbiomedata.org/data/nmdc:mga0e8jh10/annotation/nmdc_mga0e8jh10_smart.gff", + "md5_checksum": "2f64111072a2b19a726aed9c9f54bba7", + "id": "nmdc:2f64111072a2b19a726aed9c9f54bba7", + "file_size_bytes": 8547849 + }, + { + "name": "Gp0127624_SuperFam GFF file", + "description": "SuperFam GFF file for Gp0127624", + "url": "https://data.microbiomedata.org/data/nmdc:mga0e8jh10/annotation/nmdc_mga0e8jh10_supfam.gff", + "md5_checksum": "51a011777869ff58b977991f5c90fc47", + "id": "nmdc:51a011777869ff58b977991f5c90fc47", + "file_size_bytes": 46844460 + }, + { + "name": "Gp0127624_Cath FunFam GFF file", + "description": "Cath FunFam GFF file for Gp0127624", + "url": "https://data.microbiomedata.org/data/nmdc:mga0e8jh10/annotation/nmdc_mga0e8jh10_cath_funfam.gff", + "md5_checksum": "53f57253df5119d338b9813aa81c7c9b", + "id": "nmdc:53f57253df5119d338b9813aa81c7c9b", + "file_size_bytes": 35558659 + }, + { + "name": "Gp0127624_KO_EC GFF file", + "description": "KO_EC GFF file for Gp0127624", + "url": "https://data.microbiomedata.org/data/nmdc:mga0e8jh10/annotation/nmdc_mga0e8jh10_ko_ec.gff", + "md5_checksum": "c4aa03608fa7442a05cd23fdcc29bc21", + "id": "nmdc:c4aa03608fa7442a05cd23fdcc29bc21", + "file_size_bytes": 23055213 + }, + { + "name": "gold:Gp0452679_metabat2 bin checkm quality assessment result", + "description": "metabat2 bin checkm quality assessment result for gold:Gp0452679", + "data_object_type": "CheckM Statistics", + "url": "https://data.microbiomedata.org/data/nmdc:mga0fsjy84/MAGs/nmdc_mga0fsjy84_checkm_qa.out", + "md5_checksum": "d41d8cd98f00b204e9800998ecf8427e", + "id": "nmdc:d41d8cd98f00b204e9800998ecf8427e", + "file_size_bytes": 0 + }, + { + "name": "Gp0127624_tooShort (< 3kb) filtered contigs fasta file by metaBat2", + "description": "tooShort (< 3kb) filtered contigs fasta file by metaBat2 for Gp0127624", + "url": "https://data.microbiomedata.org/data/nmdc:mga0e8jh10/MAGs/nmdc_mga0e8jh10_bins.tooShort.fa", + "md5_checksum": "73aca2cc587d8a632a730dcc6ff53d3b", + "id": "nmdc:73aca2cc587d8a632a730dcc6ff53d3b", + "file_size_bytes": 79198373 + }, + { + "name": "Gp0127624_unbinned fasta file from metabat2", + "description": "unbinned fasta file from metabat2 for Gp0127624", + "url": "https://data.microbiomedata.org/data/nmdc:mga0e8jh10/MAGs/nmdc_mga0e8jh10_bins.unbinned.fa", + "md5_checksum": "822be4fbeadb0c8c24f4a680d646b62f", + "id": "nmdc:822be4fbeadb0c8c24f4a680d646b62f", + "file_size_bytes": 13854717 + }, + { + "name": "Gp0127624_metabat2 bin checkm quality assessment result", + "description": "metabat2 bin checkm quality assessment result for Gp0127624", + "data_object_type": "CheckM Statistics", + "url": "https://data.microbiomedata.org/data/nmdc:mga0e8jh10/MAGs/nmdc_mga0e8jh10_checkm_qa.out", + "md5_checksum": "6b39bdb404c651428634ad28f8f15e2a", + "id": "nmdc:6b39bdb404c651428634ad28f8f15e2a", + "file_size_bytes": 1106 + }, + { + "name": "Gp0127624_high-quality and medium-quality bins", + "description": "high-quality and medium-quality bins for Gp0127624", + "data_object_type": "Metagenome Bins", + "url": "https://data.microbiomedata.org/data/nmdc:mga0e8jh10/MAGs/nmdc_mga0e8jh10_hqmq_bin.zip", + "md5_checksum": "0bd9d9e5f15087ccd35c38956bb3a210", + "id": "nmdc:0bd9d9e5f15087ccd35c38956bb3a210", + "file_size_bytes": 507790 + }, + { + "name": "Gp0127624_metabat2 bins", + "description": "metabat2 bins for Gp0127624", + "url": "https://data.microbiomedata.org/data/nmdc:mga0e8jh10/MAGs/nmdc_mga0e8jh10_metabat_bin.zip", + "md5_checksum": "2d174febedeca0ce515939dd53d6ccb9", + "id": "nmdc:2d174febedeca0ce515939dd53d6ccb9", + "file_size_bytes": 230699 + }, + { + "_id": { + "$oid": "649b003d1ae706d7b5b14e49" + }, + "description": "Assembled contigs fasta for gold:Gp0127624", + "url": "https://data.microbiomedata.org/data/1781_100326/assembly/assembly_contigs.fna", + "file_size_bytes": 94703971, + "type": "nmdc:DataObject", + "id": "nmdc:70c6cfaac2821e95aad6732da590276e", + "name": "assembly_contigs.fna", + "data_object_type": "Assembly Contigs" + }, + { + "_id": { + "$oid": "649b003d1ae706d7b5b14e4a" + }, + "description": "Assembled scaffold fasta for gold:Gp0127624", + "url": "https://data.microbiomedata.org/data/1781_100326/assembly/assembly_scaffolds.fna", + "file_size_bytes": 94130161, + "type": "nmdc:DataObject", + "id": "nmdc:3b26db32c98a95990057fb0a38d243ca", + "name": "assembly_scaffolds.fna", + "data_object_type": "Assembly Scaffolds" + }, + { + "_id": { + "$oid": "649b003d1ae706d7b5b14e4c" + }, + "description": "Assembled AGP file for gold:Gp0127624", + "url": "https://data.microbiomedata.org/data/1781_100326/assembly/assembly.agp", + "file_size_bytes": 12598209, + "type": "nmdc:DataObject", + "id": "nmdc:8f50a4da5f7f50b0271523331b484e18", + "name": "assembly.agp", + "data_object_type": "Assembly AGP" + }, + { + "_id": { + "$oid": "649b003d1ae706d7b5b14e4d" + }, + "description": "Metagenome Contig Coverage Stats for gold:Gp0127624", + "url": "https://data.microbiomedata.org/data/1781_100326/assembly/mapping_stats.txt", + "file_size_bytes": 14348602, + "type": "nmdc:DataObject", + "id": "nmdc:b7e396b2ead7ab3abf0b39139af1ba09", + "name": "mapping_stats.txt", + "data_object_type": "Assembly Coverage Stats" + }, + { + "_id": { + "$oid": "649b003d1ae706d7b5b14e4f" + }, + "description": "Metagenome Alignment BAM file for gold:Gp0127624", + "url": "https://data.microbiomedata.org/data/1781_100326/assembly/pairedMapped_sorted.bam", + "file_size_bytes": 1949775740, + "type": "nmdc:DataObject", + "id": "nmdc:d27b41f3c6392653daeb9b6bbc0277be", + "name": "pairedMapped_sorted.bam", + "data_object_type": "Assembly Coverage BAM" + }, + { + "_id": { + "$oid": "649b003d1ae706d7b5b15af3" + }, + "id": "nmdc:d84b79e2655d147759dfe8b579b7e4b9", + "name": "1781_100326.json", + "description": "Gold:Gp0127624 ReadbasedAnalysis result JSON file", + "url": "https://data.microbiomedata.org/1781_100326/ReadbasedAnalysis/1781_100326.json", + "file_size_bytes": 3385, + "type": "nmdc:DataObject" + }, + { + "_id": { + "$oid": "649b003d1ae706d7b5b15af4" + }, + "id": "nmdc:9670430bbf29ecc709d0e98a383ce37e", + "name": "1781_100326.krona.html", + "description": "Gold:Gp0127624 KRONA plot HTML file", + "url": "https://data.microbiomedata.org/1781_100326/ReadbasedAnalysis/centrifuge/1781_100326.krona.html", + "file_size_bytes": 3385, + "data_object_type": "Centrifuge Krona Plot", + "type": "nmdc:DataObject" + }, + { + "_id": { + "$oid": "649b003f1ae706d7b5b165b8" + }, + "id": "nmdc:d84b1a0fea4e91826aa72971c4580662", + "name": "bins.tooShort.fa", + "description": "tooShort (< 3kb) filtered contigs fasta file by metaBat2 for gold:Gp0127624", + "file_size_bytes": 76923851, + "url": "https://data.microbiomedata.org/data/1781_100326/img_MAGs/bins.tooShort.fa", + "type": "nmdc:DataObject" + }, + { + "_id": { + "$oid": "649b003f1ae706d7b5b165bb" + }, + "id": "nmdc:e47f7f56d18d80c1d06b96dac4fb1090", + "name": "gtdbtk.ar122.summary.tsv", + "description": "gtdbtk archaea assignment result summary table for gold:Gp0127624", + "file_size_bytes": 1004, + "url": "https://data.microbiomedata.org/data/1781_100326/img_MAGs/gtdbtk_output/classify/gtdbtk.ar122.summary.tsv", + "type": "nmdc:DataObject" + }, + { + "_id": { + "$oid": "649b003f1ae706d7b5b165bc" + }, + "id": "nmdc:72ae589425ade6d237d1fb6bb7f88dd8", + "name": "checkm_qa.out", + "description": "metabat2 bin checkm quality assessment result for gold:Gp0127624", + "file_size_bytes": 1256, + "url": "https://data.microbiomedata.org/data/1781_100326/img_MAGs/checkm_qa.out", + "type": "nmdc:DataObject", + "data_object_type": "CheckM Statistics" + }, + { + "_id": { + "$oid": "649b003f1ae706d7b5b165bd" + }, + "id": "nmdc:d95dfd1f76e7e53f84b231e534b3aba7", + "name": "gold:Gp0127624.bins.3.fa", + "description": "hqmq binned contig file for gold:Gp0127624", + "file_size_bytes": 958082, + "url": "https://data.microbiomedata.org/data/1781_100326/img_MAGs/hqmq-metabat-bins/bins.3.fa", + "type": "nmdc:DataObject", + "data_object_type": "Metagenome Bins" + }, + { + "_id": { + "$oid": "649b003f1ae706d7b5b165be" + }, + "id": "nmdc:8eec32d3611abd677f9849ca12dba02e", + "name": "bins.unbinned.fa", + "description": "unbinned fasta file from metabat2 for gold:Gp0127624", + "file_size_bytes": 14174146, + "url": "https://data.microbiomedata.org/data/1781_100326/img_MAGs/bins.unbinned.fa", + "type": "nmdc:DataObject" + }, + { + "_id": { + "$oid": "649b003f1ae706d7b5b165bf" + }, + "id": "nmdc:79950b448257c67890bd38c137557aa0", + "name": "gold:Gp0127624.bins.2.fa", + "description": "metabat2 binned contig file for gold:Gp0127624", + "file_size_bytes": 263657, + "url": "https://data.microbiomedata.org/data/1781_100326/img_MAGs/metabat-bins/bins.2.fa", + "type": "nmdc:DataObject", + "data_object_type": "Metagenome Bins" + }, + { + "_id": { + "$oid": "649b003f1ae706d7b5b165c0" + }, + "id": "nmdc:105f97b2c45693bd730d438123566b41", + "name": "gold:Gp0127624.bins.4.fa", + "description": "metabat2 binned contig file for gold:Gp0127624", + "file_size_bytes": 517581, + "url": "https://data.microbiomedata.org/data/1781_100326/img_MAGs/metabat-bins/bins.4.fa", + "type": "nmdc:DataObject", + "data_object_type": "Metagenome Bins" + }, + { + "_id": { + "$oid": "649b003f1ae706d7b5b165c1" + }, + "id": "nmdc:0907e3098e3adb4ce9d8bc4ad240191a", + "name": "gold:Gp0127624.bins.3.fa", + "description": "metabat2 binned contig file for gold:Gp0127624", + "file_size_bytes": 978794, + "url": "https://data.microbiomedata.org/data/1781_100326/img_MAGs/metabat-bins/bins.3.fa", + "type": "nmdc:DataObject", + "data_object_type": "Metagenome Bins" + }, + { + "_id": { + "$oid": "649b003f1ae706d7b5b165c2" + }, + "id": "nmdc:44c236036e8d7c34d7066b96066415b5", + "name": "gold:Gp0127624.bins.1.fa", + "description": "metabat2 binned contig file for gold:Gp0127624", + "file_size_bytes": 238087, + "url": "https://data.microbiomedata.org/data/1781_100326/img_MAGs/metabat-bins/bins.1.fa", + "type": "nmdc:DataObject", + "data_object_type": "Metagenome Bins" + }, + { + "_id": { + "$oid": "649b003f1ae706d7b5b16cc2" + }, + "description": "EC TSV File for gold:Gp0127624", + "url": "https://data.microbiomedata.org/1781_100326/img_annotation/Ga0482246_ec.tsv", + "md5_checksum": "73ebb84a8744552c890ad2508e313972", + "file_size_bytes": 3385, + "id": "nmdc:73ebb84a8744552c890ad2508e313972", + "name": "gold:Gp0127624_EC TSV File", + "data_object_type": "Annotation Enzyme Commission", + "type": "nmdc:DataObject" + }, + { + "_id": { + "$oid": "649b003f1ae706d7b5b16cc3" + }, + "description": "KO TSV File for gold:Gp0127624", + "url": "https://data.microbiomedata.org/1781_100326/img_annotation/Ga0482246_ko.tsv", + "md5_checksum": "89a4bb36ef225146a2ba0daaaea512fd", + "file_size_bytes": 3385, + "id": "nmdc:89a4bb36ef225146a2ba0daaaea512fd", + "name": "gold:Gp0127624_KO TSV File", + "data_object_type": "Annotation KEGG Orthology", + "type": "nmdc:DataObject" + }, + { + "_id": { + "$oid": "649b003f1ae706d7b5b16cc5" + }, + "description": "Protein FAA for gold:Gp0127624", + "url": "https://data.microbiomedata.org/1781_100326/img_annotation/Ga0482246_proteins.faa", + "md5_checksum": "075262a23b12fd4da073a973a5b6cf15", + "file_size_bytes": 3385, + "id": "nmdc:075262a23b12fd4da073a973a5b6cf15", + "name": "gold:Gp0127624_Protein FAA", + "data_object_type": "Annotation Amino Acid FASTA", + "type": "nmdc:DataObject" + }, + { + "_id": { + "$oid": "649b003f1ae706d7b5b16cc6" + }, + "description": "Structural annotation GFF file for gold:Gp0127624", + "url": "https://data.microbiomedata.org/1781_100326/img_annotation/Ga0482246_structural_annotation.gff", + "md5_checksum": "3fb3966095303ea8aa7f27bff3e9db50", + "file_size_bytes": 3385, + "id": "nmdc:3fb3966095303ea8aa7f27bff3e9db50", + "name": "gold:Gp0127624_Structural annotation GFF file", + "data_object_type": "Structural Annotation GFF", + "type": "nmdc:DataObject" + }, + { + "_id": { + "$oid": "649b003f1ae706d7b5b16cc7" + }, + "description": "Functional annotation GFF file for gold:Gp0127624", + "url": "https://data.microbiomedata.org/1781_100326/img_annotation/Ga0482246_functional_annotation.gff", + "md5_checksum": "6bcdfc58ee6b4eb5ae022c71636a88b4", + "file_size_bytes": 3385, + "id": "nmdc:6bcdfc58ee6b4eb5ae022c71636a88b4", + "name": "gold:Gp0127624_Functional annotation GFF file", + "data_object_type": "Functional Annotation GFF", + "type": "nmdc:DataObject" + } + ], + "mags_activity_set": [ + { + "_id": { + "$oid": "649b0052ec087f6bbab34701" + }, + "has_input": [ + "nmdc:464a9db7a94e7e0646b1ff8b501d82f3", + "nmdc:8c37ab0b3594cc975348041e4841f6ac", + "nmdc:c5cf33c1f2f68a7c63fef6dd623a97c0" + ], + "too_short_contig_num": 182057, + "part_of": [ + "nmdc:mga0e8jh10" + ], + "binned_contig_num": 364, + "git_url": "https://github.com/microbiomedata/metaG/releases/tag/0.1", + "has_output": [ + "nmdc:d41d8cd98f00b204e9800998ecf8427e", + "nmdc:73aca2cc587d8a632a730dcc6ff53d3b", + "nmdc:822be4fbeadb0c8c24f4a680d646b62f", + "nmdc:6b39bdb404c651428634ad28f8f15e2a", + "nmdc:0bd9d9e5f15087ccd35c38956bb3a210", + "nmdc:2d174febedeca0ce515939dd53d6ccb9" + ], + "was_informed_by": "gold:Gp0127624", + "input_contig_num": 191010, + "id": "nmdc:20efb77f7b08a72b0c887c059686b7cf", + "execution_resource": "NERSC-Cori", + "name": "MAGs Analysis Activity for nmdc:mga0e8jh10", + "mags_list": [ + { + "number_of_contig": 69, + "completeness": 11.21, + "bin_name": "bins.1", + "gene_count": 328, + "bin_quality": "LQ", + "gtdbtk_species": "", + "gtdbtk_order": "", + "num_16s": 0, + "gtdbtk_family": "", + "gtdbtk_domain": "", + "contamination": 0.0, + "gtdbtk_class": "", + "gtdbtk_phylum": "", + "num_5s": 0, + "num_23s": 0, + "gtdbtk_genus": "", + "num_t_rna": 8 + }, + { + "number_of_contig": 194, + "completeness": 75.24, + "bin_name": "bins.2", + "gene_count": 2023, + "bin_quality": "MQ", + "gtdbtk_species": "", + "gtdbtk_order": "Nitrososphaerales", + "num_16s": 0, + "gtdbtk_family": "Nitrososphaeraceae", + "gtdbtk_domain": "Archaea", + "contamination": 1.78, + "gtdbtk_class": "Nitrososphaeria", + "gtdbtk_phylum": "Crenarchaeota", + "num_5s": 1, + "num_23s": 0, + "gtdbtk_genus": "", + "num_t_rna": 35 + }, + { + "number_of_contig": 101, + "completeness": 19.54, + "bin_name": "bins.3", + "gene_count": 585, + "bin_quality": "LQ", + "gtdbtk_species": "", + "gtdbtk_order": "", + "num_16s": 0, + "gtdbtk_family": "", + "gtdbtk_domain": "", + "contamination": 0.0, + "gtdbtk_class": "", + "gtdbtk_phylum": "", + "num_5s": 0, + "num_23s": 0, + "gtdbtk_genus": "", + "num_t_rna": 10 + } + ], + "unbinned_contig_num": 8589, + "started_at_time": "2021-10-11T02:23:30Z", + "type": "nmdc:MAGsAnalysisActivity", + "ended_at_time": "2021-10-11T03:30:59+00:00" + } + ], + "metagenome_annotation_activity_set": [ + { + "_id": { + "$oid": "649b005bbf2caae0415ef9a2" + }, + "has_input": [ + "nmdc:464a9db7a94e7e0646b1ff8b501d82f3" + ], + "part_of": [ + "nmdc:mga0e8jh10" + ], + "git_url": "https://github.com/microbiomedata/metaG/releases/tag/0.1", + "has_output": [ + "nmdc:40d15cb24063dbb6097fd1626f62db95", + "nmdc:f70325438abce4c6f56e6c82619dd44a", + "nmdc:c5cf33c1f2f68a7c63fef6dd623a97c0", + "nmdc:4aca66fe81c8c056fa5617c7aa77bc7d", + "nmdc:303a5e88a0eae8942082e9e13f9f6eba", + "nmdc:d919f65e54a8351324e332a5daa6a831", + "nmdc:764c7c2b5554fc6b860b036cab22e0ef", + "nmdc:d0a86560767836f901bdd2625bea46e3", + "nmdc:2f64111072a2b19a726aed9c9f54bba7", + "nmdc:51a011777869ff58b977991f5c90fc47", + "nmdc:53f57253df5119d338b9813aa81c7c9b", + "nmdc:c4aa03608fa7442a05cd23fdcc29bc21" + ], + "was_informed_by": "gold:Gp0127624", + "id": "nmdc:20efb77f7b08a72b0c887c059686b7cf", + "execution_resource": "NERSC-Cori", + "name": "Annotation Activity for nmdc:mga0e8jh10", + "started_at_time": "2021-10-11T02:23:30Z", + "type": "nmdc:MetagenomeAnnotationActivity", + "ended_at_time": "2021-10-11T03:30:59+00:00" + } + ], + "metagenome_assembly_set": [ + { + "_id": { + "$oid": "649b005f2ca5ee4adb139f8d" + }, + "has_input": [ + "nmdc:8585f6896702bddf64b02191be5921f4" + ], + "part_of": [ + "nmdc:mga0e8jh10" + ], + "ctg_logsum": 174168, + "scaf_logsum": 174680, + "gap_pct": 0.0009, + "git_url": "https://github.com/microbiomedata/metaG/releases/tag/0.1", + "has_output": [ + "nmdc:464a9db7a94e7e0646b1ff8b501d82f3", + "nmdc:0a50f88775f36e9238152f3319252853", + "nmdc:f0dc2f598fa06efbe99843bddaf54f60", + "nmdc:a4405d49e8efe2ee124d25e2414de56c", + "nmdc:8c37ab0b3594cc975348041e4841f6ac" + ], + "asm_score": 5.95, + "was_informed_by": "gold:Gp0127624", + "ctg_powsum": 19404, + "scaf_max": 33408, + "id": "nmdc:20efb77f7b08a72b0c887c059686b7cf", + "scaf_powsum": 19462, + "execution_resource": "NERSC-Cori", + "contigs": 191010, + "name": "Assembly Activity for nmdc:mga0e8jh10", + "ctg_max": 33408, + "gc_std": 0.09154, + "contig_bp": 88102698, + "gc_avg": 0.62452, + "started_at_time": "2021-10-11T02:23:30Z", + "scaf_bp": 88103488, + "type": "nmdc:MetagenomeAssembly", + "scaffolds": 190940, + "ended_at_time": "2021-10-11T03:30:59+00:00", + "ctg_l50": 434, + "ctg_l90": 288, + "ctg_n50": 56361, + "ctg_n90": 162547, + "scaf_l50": 434, + "scaf_l90": 288, + "scaf_n50": 56334, + "scaf_n90": 162481 + } + ], + "omics_processing_set": [ + { + "_id": { + "$oid": "649b009773e8249959349b3d" + }, + "id": "nmdc:omprc-11-x0es2p18", + "name": "Riverbed sediment microbial communities from areas with no vegetation in Columbia River, Washington, USA - GW-RW N3_10_20", + "description": "Riverbed sediment samples were collected from an area with no vegetation in a hyporheic zone (subsurface groundwater - surface water mixing zone)", + "has_input": [ + "nmdc:bsm-11-msqbhe76" + ], + "has_output": [ + "jgi:574fde577ded5e3df1ee13fc" + ], + "part_of": [ + "nmdc:sty-11-aygzgv51" + ], + "add_date": "2016-01-11", + "mod_date": "2021-06-15", + "ncbi_project_name": "Riverbed sediment microbial communities from areas with no vegetation in Columbia River, Washington, USA - GW-RW N3_10_20", + "omics_type": { + "has_raw_value": "Metagenome" + }, + "principal_investigator": { + "has_raw_value": "James Stegen" + }, + "processing_institution": "JGI", + "type": "nmdc:OmicsProcessing", + "gold_sequencing_project_identifiers": [ + "gold:Gp0127624" + ] + } + ], + "read_qc_analysis_activity_set": [ + { + "_id": { + "$oid": "649b009d6bdd4fd20273c85b" + }, + "has_input": [ + "nmdc:e24b00c4de7a24629f5933940070e06c" + ], + "part_of": [ + "nmdc:mga0e8jh10" + ], + "git_url": "https://github.com/microbiomedata/metaG/releases/tag/0.1", + "has_output": [ + "nmdc:8585f6896702bddf64b02191be5921f4", + "nmdc:b9b6464ecc746a4cc39b549696c5fe9c" + ], + "was_informed_by": "gold:Gp0127624", + "input_read_count": 25674112, + "output_read_bases": 3361311014, + "id": "nmdc:20efb77f7b08a72b0c887c059686b7cf", + "execution_resource": "NERSC-Cori", + "input_read_bases": 3876790912, + "name": "Read QC Activity for nmdc:mga0e8jh10", + "output_read_count": 22503352, + "started_at_time": "2021-10-11T02:23:30Z", + "type": "nmdc:ReadQCAnalysisActivity", + "ended_at_time": "2021-10-11T03:30:59+00:00" + } + ], + "read_based_taxonomy_analysis_activity_set": [ + { + "_id": { + "$oid": "649b009bff710ae353f8cf1c" + }, + "has_input": [ + "nmdc:8585f6896702bddf64b02191be5921f4" + ], + "git_url": "https://github.com/microbiomedata/metaG/releases/tag/0.1", + "has_output": [ + "nmdc:fef871a81032dd1f3e57dc1c7d5aa3db", + "nmdc:6c7fec765f2a225f168ebb1f69961013", + "nmdc:6e660d5a062f9c3ad7b49d8d438453d7", + "nmdc:77db34862804280185d3b1ce961e5338", + "nmdc:84e3efb84d961d189ece310911ccf475", + "nmdc:b8fd31679921f8b68c80917e14caa260", + "nmdc:715c66c69b621478da7d48481f3cbd1d", + "nmdc:0781e8042688219035efafe7d75858d0", + "nmdc:85547ab860ef9d6877ba7abc8881740a" + ], + "was_informed_by": "gold:Gp0127624", + "id": "nmdc:20efb77f7b08a72b0c887c059686b7cf", + "execution_resource": "NERSC-Cori", + "name": "ReadBased Analysis Activity for nmdc:mga0e8jh10", + "started_at_time": "2021-10-11T02:23:30Z", + "type": "nmdc:ReadbasedAnalysis", + "ended_at_time": "2021-10-11T03:30:59+00:00" + } + ] + }, + { + "data_object_set": [ + { + "description": "Raw sequencer read data", + "file_size_bytes": 2057112594, + "type": "nmdc:DataObject", + "id": "jgi:574fde587ded5e3df1ee13fd", + "name": "10533.1.165310.CCAGTGT-AACACTG.fastq.gz" + }, + { + "name": "Gp0127629_Filtered Reads", + "description": "Filtered Reads for Gp0127629", + "data_object_type": "Filtered Sequencing Reads", + "url": "https://data.microbiomedata.org/data/nmdc:mga071r920/qa/nmdc_mga071r920_filtered.fastq.gz", + "md5_checksum": "0db98173ae3395106e24d250b2655f06", + "id": "nmdc:0db98173ae3395106e24d250b2655f06", + "file_size_bytes": 1807840952 + }, + { + "name": "Gp0127629_Filtered Stats", + "description": "Filtered Stats for Gp0127629", + "data_object_type": "QC Statistics", + "url": "https://data.microbiomedata.org/data/nmdc:mga071r920/qa/nmdc_mga071r920_filterStats.txt", + "md5_checksum": "bc0874c01bbd31c644cd598e2fdad3c4", + "id": "nmdc:bc0874c01bbd31c644cd598e2fdad3c4", + "file_size_bytes": 284 + }, + { + "name": "Gp0127629_Gottcha2 TSV report", + "description": "Gottcha2 TSV report for Gp0127629", + "url": "https://data.microbiomedata.org/data/nmdc:mga071r920/ReadbasedAnalysis/nmdc_mga071r920_gottcha2_report.tsv", + "md5_checksum": "f4f810491708ff25956cddd005cc9944", + "id": "nmdc:f4f810491708ff25956cddd005cc9944", + "file_size_bytes": 1206 + }, + { + "name": "Gp0127629_Gottcha2 full TSV report", + "description": "Gottcha2 full TSV report for Gp0127629", + "url": "https://data.microbiomedata.org/data/nmdc:mga071r920/ReadbasedAnalysis/nmdc_mga071r920_gottcha2_report_full.tsv", + "md5_checksum": "67e3c200d3765733af33d1db1f4bf968", + "id": "nmdc:67e3c200d3765733af33d1db1f4bf968", + "file_size_bytes": 662074 + }, + { + "name": "Gp0127629_Gottcha2 Krona HTML report", + "description": "Gottcha2 Krona HTML report for Gp0127629", + "data_object_type": "GOTTCHA2 Krona Plot", + "url": "https://data.microbiomedata.org/data/nmdc:mga071r920/ReadbasedAnalysis/nmdc_mga071r920_gottcha2_krona.html", + "md5_checksum": "26cd6390e8362da2ee1d7691360d2dfb", + "id": "nmdc:26cd6390e8362da2ee1d7691360d2dfb", + "file_size_bytes": 229307 + }, + { + "name": "Gp0127629_Centrifuge classification TSV report", + "description": "Centrifuge classification TSV report for Gp0127629", + "data_object_type": "Centrifuge Taxonomic Classification", + "url": "https://data.microbiomedata.org/data/nmdc:mga071r920/ReadbasedAnalysis/nmdc_mga071r920_centrifuge_classification.tsv", + "md5_checksum": "80fe705d97ef4a0701b1320e9ba19a82", + "id": "nmdc:80fe705d97ef4a0701b1320e9ba19a82", + "file_size_bytes": 1667543500 + }, + { + "name": "Gp0127629_Centrifuge TSV report", + "description": "Centrifuge TSV report for Gp0127629", + "data_object_type": "Centrifuge Classification Report", + "url": "https://data.microbiomedata.org/data/nmdc:mga071r920/ReadbasedAnalysis/nmdc_mga071r920_centrifuge_report.tsv", + "md5_checksum": "6a216ec913587e26ddc036b703126d76", + "id": "nmdc:6a216ec913587e26ddc036b703126d76", + "file_size_bytes": 253079 + }, + { + "name": "Gp0127629_Centrifuge Krona HTML report", + "description": "Centrifuge Krona HTML report for Gp0127629", + "data_object_type": "Centrifuge Krona Plot", + "url": "https://data.microbiomedata.org/data/nmdc:mga071r920/ReadbasedAnalysis/nmdc_mga071r920_centrifuge_krona.html", + "md5_checksum": "ebed7286f886596764a66a0d1dac3e43", + "id": "nmdc:ebed7286f886596764a66a0d1dac3e43", + "file_size_bytes": 2326900 + }, + { + "name": "Gp0127629_Kraken classification TSV report", + "description": "Kraken classification TSV report for Gp0127629", + "data_object_type": "Kraken2 Taxonomic Classification", + "url": "https://data.microbiomedata.org/data/nmdc:mga071r920/ReadbasedAnalysis/nmdc_mga071r920_kraken2_classification.tsv", + "md5_checksum": "80dd3584d257e8f84b59118ffd0d5e21", + "id": "nmdc:80dd3584d257e8f84b59118ffd0d5e21", + "file_size_bytes": 1328025421 + }, + { + "name": "Gp0127629_Kraken2 TSV report", + "description": "Kraken2 TSV report for Gp0127629", + "data_object_type": "Kraken2 Classification Report", + "url": "https://data.microbiomedata.org/data/nmdc:mga071r920/ReadbasedAnalysis/nmdc_mga071r920_kraken2_report.tsv", + "md5_checksum": "61b5fe5664ca99f6354c7a5a0222678c", + "id": "nmdc:61b5fe5664ca99f6354c7a5a0222678c", + "file_size_bytes": 628969 + }, + { + "name": "Gp0127629_Kraken2 Krona HTML report", + "description": "Kraken2 Krona HTML report for Gp0127629", + "data_object_type": "Kraken2 Krona Plot", + "url": "https://data.microbiomedata.org/data/nmdc:mga071r920/ReadbasedAnalysis/nmdc_mga071r920_kraken2_krona.html", + "md5_checksum": "81108175d5ef2ca158f516bfc75d3cd9", + "id": "nmdc:81108175d5ef2ca158f516bfc75d3cd9", + "file_size_bytes": 3933712 + }, + { + "name": "Gp0127629_Assembled contigs fasta", + "description": "Assembled contigs fasta for Gp0127629", + "data_object_type": "Assembly Contigs", + "url": "https://data.microbiomedata.org/data/nmdc:mga071r920/assembly/nmdc_mga071r920_contigs.fna", + "md5_checksum": "7badcefc26b24213b514cd4c3c9a87d7", + "id": "nmdc:7badcefc26b24213b514cd4c3c9a87d7", + "file_size_bytes": 109144090 + }, + { + "name": "Gp0127629_Assembled scaffold fasta", + "description": "Assembled scaffold fasta for Gp0127629", + "data_object_type": "Assembly Scaffolds", + "url": "https://data.microbiomedata.org/data/nmdc:mga071r920/assembly/nmdc_mga071r920_scaffolds.fna", + "md5_checksum": "89dd3c10791083ae5a5b30c2154deabd", + "id": "nmdc:89dd3c10791083ae5a5b30c2154deabd", + "file_size_bytes": 108517023 + }, + { + "name": "Gp0127629_Metagenome Contig Coverage Stats", + "description": "Metagenome Contig Coverage Stats for Gp0127629", + "url": "https://data.microbiomedata.org/data/nmdc:mga071r920/assembly/nmdc_mga071r920_covstats.txt", + "md5_checksum": "5e503e3abe6eb9e94c34a55da5bbafdc", + "id": "nmdc:5e503e3abe6eb9e94c34a55da5bbafdc", + "file_size_bytes": 16536925 + }, + { + "name": "Gp0127629_Assembled AGP file", + "description": "Assembled AGP file for Gp0127629", + "data_object_type": "Assembly AGP", + "url": "https://data.microbiomedata.org/data/nmdc:mga071r920/assembly/nmdc_mga071r920_assembly.agp", + "md5_checksum": "0a1f96cd74ec9f1a6668924745689014", + "id": "nmdc:0a1f96cd74ec9f1a6668924745689014", + "file_size_bytes": 15454045 + }, + { + "name": "Gp0127629_Metagenome Alignment BAM file", + "description": "Metagenome Alignment BAM file for Gp0127629", + "data_object_type": "Assembly Coverage BAM", + "url": "https://data.microbiomedata.org/data/nmdc:mga071r920/assembly/nmdc_mga071r920_pairedMapped_sorted.bam", + "md5_checksum": "1608f12840c36ac1d882cc6ef4f4627f", + "id": "nmdc:1608f12840c36ac1d882cc6ef4f4627f", + "file_size_bytes": 2001264626 + }, + { + "name": "Gp0127629_Protein FAA", + "description": "Protein FAA for Gp0127629", + "data_object_type": "Annotation Amino Acid FASTA", + "url": "https://data.microbiomedata.org/data/nmdc:mga071r920/annotation/nmdc_mga071r920_proteins.faa", + "md5_checksum": "ba15f54043fad473edec771b60f5b040", + "id": "nmdc:ba15f54043fad473edec771b60f5b040", + "file_size_bytes": 62222526 + }, + { + "name": "Gp0127629_Structural annotation GFF file", + "description": "Structural annotation GFF file for Gp0127629", + "data_object_type": "Structural Annotation GFF", + "url": "https://data.microbiomedata.org/data/nmdc:mga071r920/annotation/nmdc_mga071r920_structural_annotation.gff", + "md5_checksum": "f6d684abab1c60b2b95ade84644e6a38", + "id": "nmdc:f6d684abab1c60b2b95ade84644e6a38", + "file_size_bytes": 2521 + }, + { + "name": "Gp0127629_Functional annotation GFF file", + "description": "Functional annotation GFF file for Gp0127629", + "data_object_type": "Functional Annotation GFF", + "url": "https://data.microbiomedata.org/data/nmdc:mga071r920/annotation/nmdc_mga071r920_functional_annotation.gff", + "md5_checksum": "496e0fa5ac1c04849338c972189ee3f6", + "id": "nmdc:496e0fa5ac1c04849338c972189ee3f6", + "file_size_bytes": 70803412 + }, + { + "name": "Gp0127629_KO TSV file", + "description": "KO TSV file for Gp0127629", + "data_object_type": "Annotation KEGG Orthology", + "url": "https://data.microbiomedata.org/data/nmdc:mga071r920/annotation/nmdc_mga071r920_ko.tsv", + "md5_checksum": "311ffbbfc80f28908615a1f18492ae5e", + "id": "nmdc:311ffbbfc80f28908615a1f18492ae5e", + "file_size_bytes": 8203743 + }, + { + "name": "Gp0127629_EC TSV file", + "description": "EC TSV file for Gp0127629", + "data_object_type": "Annotation Enzyme Commission", + "url": "https://data.microbiomedata.org/data/nmdc:mga071r920/annotation/nmdc_mga071r920_ec.tsv", + "md5_checksum": "7d90dec4cdc8c8e8a4bc8f7bddf2e0c2", + "id": "nmdc:7d90dec4cdc8c8e8a4bc8f7bddf2e0c2", + "file_size_bytes": 5508974 + }, + { + "name": "Gp0127629_COG GFF file", + "description": "COG GFF file for Gp0127629", + "url": "https://data.microbiomedata.org/data/nmdc:mga071r920/annotation/nmdc_mga071r920_cog.gff", + "md5_checksum": "1116328ed7ba951246f0eec1d3f065b4", + "id": "nmdc:1116328ed7ba951246f0eec1d3f065b4", + "file_size_bytes": 42250648 + }, + { + "name": "Gp0127629_PFAM GFF file", + "description": "PFAM GFF file for Gp0127629", + "url": "https://data.microbiomedata.org/data/nmdc:mga071r920/annotation/nmdc_mga071r920_pfam.gff", + "md5_checksum": "325e47bc009aeba79fc767e3b6daeee2", + "id": "nmdc:325e47bc009aeba79fc767e3b6daeee2", + "file_size_bytes": 31677996 + }, + { + "name": "Gp0127629_TigrFam GFF file", + "description": "TigrFam GFF file for Gp0127629", + "url": "https://data.microbiomedata.org/data/nmdc:mga071r920/annotation/nmdc_mga071r920_tigrfam.gff", + "md5_checksum": "f820db8ce6a1ae7c3e8af40729f5b62b", + "id": "nmdc:f820db8ce6a1ae7c3e8af40729f5b62b", + "file_size_bytes": 3472661 + }, + { + "name": "Gp0127629_SMART GFF file", + "description": "SMART GFF file for Gp0127629", + "url": "https://data.microbiomedata.org/data/nmdc:mga071r920/annotation/nmdc_mga071r920_smart.gff", + "md5_checksum": "96ab6fa258a08490082b4f99269f3e8d", + "id": "nmdc:96ab6fa258a08490082b4f99269f3e8d", + "file_size_bytes": 9149681 + }, + { + "name": "Gp0127629_SuperFam GFF file", + "description": "SuperFam GFF file for Gp0127629", + "url": "https://data.microbiomedata.org/data/nmdc:mga071r920/annotation/nmdc_mga071r920_supfam.gff", + "md5_checksum": "a2b630c408bd557d693b147f95627fdc", + "id": "nmdc:a2b630c408bd557d693b147f95627fdc", + "file_size_bytes": 52308332 + }, + { + "name": "Gp0127629_Cath FunFam GFF file", + "description": "Cath FunFam GFF file for Gp0127629", + "url": "https://data.microbiomedata.org/data/nmdc:mga071r920/annotation/nmdc_mga071r920_cath_funfam.gff", + "md5_checksum": "ba87cd24242288e0b6d8f32a2bcbbb80", + "id": "nmdc:ba87cd24242288e0b6d8f32a2bcbbb80", + "file_size_bytes": 39926818 + }, + { + "name": "Gp0127629_KO_EC GFF file", + "description": "KO_EC GFF file for Gp0127629", + "url": "https://data.microbiomedata.org/data/nmdc:mga071r920/annotation/nmdc_mga071r920_ko_ec.gff", + "md5_checksum": "9c97bd7a5e4978e31ed1e5386c3619f3", + "id": "nmdc:9c97bd7a5e4978e31ed1e5386c3619f3", + "file_size_bytes": 26101397 + }, + { + "name": "gold:Gp0452679_metabat2 bin checkm quality assessment result", + "description": "metabat2 bin checkm quality assessment result for gold:Gp0452679", + "data_object_type": "CheckM Statistics", + "url": "https://data.microbiomedata.org/data/nmdc:mga0fsjy84/MAGs/nmdc_mga0fsjy84_checkm_qa.out", + "md5_checksum": "d41d8cd98f00b204e9800998ecf8427e", + "id": "nmdc:d41d8cd98f00b204e9800998ecf8427e", + "file_size_bytes": 0 + }, + { + "name": "Gp0127629_tooShort (< 3kb) filtered contigs fasta file by metaBat2", + "description": "tooShort (< 3kb) filtered contigs fasta file by metaBat2 for Gp0127629", + "url": "https://data.microbiomedata.org/data/nmdc:mga071r920/MAGs/nmdc_mga071r920_bins.tooShort.fa", + "md5_checksum": "2bbd475ff6a15058b38244e71456024a", + "id": "nmdc:2bbd475ff6a15058b38244e71456024a", + "file_size_bytes": 88674437 + }, + { + "name": "Gp0127629_unbinned fasta file from metabat2", + "description": "unbinned fasta file from metabat2 for Gp0127629", + "url": "https://data.microbiomedata.org/data/nmdc:mga071r920/MAGs/nmdc_mga071r920_bins.unbinned.fa", + "md5_checksum": "70901a70c06fdcfc71efa2d004e210fd", + "id": "nmdc:70901a70c06fdcfc71efa2d004e210fd", + "file_size_bytes": 19226945 + }, + { + "name": "Gp0127629_metabat2 bin checkm quality assessment result", + "description": "metabat2 bin checkm quality assessment result for Gp0127629", + "data_object_type": "CheckM Statistics", + "url": "https://data.microbiomedata.org/data/nmdc:mga071r920/MAGs/nmdc_mga071r920_checkm_qa.out", + "md5_checksum": "d52b4ae6b61161082fee7d42ecf5ee87", + "id": "nmdc:d52b4ae6b61161082fee7d42ecf5ee87", + "file_size_bytes": 978 + }, + { + "name": "Gp0127629_high-quality and medium-quality bins", + "description": "high-quality and medium-quality bins for Gp0127629", + "data_object_type": "Metagenome Bins", + "url": "https://data.microbiomedata.org/data/nmdc:mga071r920/MAGs/nmdc_mga071r920_hqmq_bin.zip", + "md5_checksum": "58d9cd30ca53424cd0f1ce27d0a8a885", + "id": "nmdc:58d9cd30ca53424cd0f1ce27d0a8a885", + "file_size_bytes": 182 + }, + { + "name": "Gp0127629_metabat2 bins", + "description": "metabat2 bins for Gp0127629", + "url": "https://data.microbiomedata.org/data/nmdc:mga071r920/MAGs/nmdc_mga071r920_metabat_bin.zip", + "md5_checksum": "8f4f5294de942734837fba3d68ffc6b4", + "id": "nmdc:8f4f5294de942734837fba3d68ffc6b4", + "file_size_bytes": 377953 + }, + { + "_id": { + "$oid": "649b003d1ae706d7b5b14e61" + }, + "description": "Assembled scaffold fasta for gold:Gp0127629", + "url": "https://data.microbiomedata.org/data/1781_100331/assembly/assembly_scaffolds.fna", + "file_size_bytes": 107683315, + "type": "nmdc:DataObject", + "id": "nmdc:eb0c17effb4ea272e31318eecbe890da", + "name": "assembly_scaffolds.fna", + "data_object_type": "Assembly Scaffolds" + }, + { + "_id": { + "$oid": "649b003d1ae706d7b5b14e62" + }, + "description": "Assembled contigs fasta for gold:Gp0127629", + "url": "https://data.microbiomedata.org/data/1781_100331/assembly/assembly_contigs.fna", + "file_size_bytes": 108309886, + "type": "nmdc:DataObject", + "id": "nmdc:9eed2da9f67c58f243329daf2289f40e", + "name": "assembly_contigs.fna", + "data_object_type": "Assembly Contigs" + }, + { + "_id": { + "$oid": "649b003d1ae706d7b5b14e63" + }, + "description": "Assembled AGP file for gold:Gp0127629", + "url": "https://data.microbiomedata.org/data/1781_100331/assembly/assembly.agp", + "file_size_bytes": 13784613, + "type": "nmdc:DataObject", + "id": "nmdc:0680d9887963e661ca943b8b2779d954", + "name": "assembly.agp", + "data_object_type": "Assembly AGP" + }, + { + "_id": { + "$oid": "649b003d1ae706d7b5b14e64" + }, + "description": "Metagenome Contig Coverage Stats for gold:Gp0127629", + "url": "https://data.microbiomedata.org/data/1781_100331/assembly/mapping_stats.txt", + "file_size_bytes": 15702721, + "type": "nmdc:DataObject", + "id": "nmdc:047d86c83005c22cf581cb6c092a6362", + "name": "mapping_stats.txt", + "data_object_type": "Assembly Coverage Stats" + }, + { + "_id": { + "$oid": "649b003d1ae706d7b5b14e67" + }, + "description": "Metagenome Alignment BAM file for gold:Gp0127629", + "url": "https://data.microbiomedata.org/data/1781_100331/assembly/pairedMapped_sorted.bam", + "file_size_bytes": 1973835303, + "type": "nmdc:DataObject", + "id": "nmdc:42c05d533d1a4ecaaa7367180a1b9b36", + "name": "pairedMapped_sorted.bam", + "data_object_type": "Assembly Coverage BAM" + }, + { + "_id": { + "$oid": "649b003d1ae706d7b5b15b18" + }, + "id": "nmdc:7d974a6b38d5572992a870fb6dbcfb24", + "name": "1781_100331.json", + "description": "Gold:Gp0127629 ReadbasedAnalysis result JSON file", + "url": "https://data.microbiomedata.org/1781_100331/ReadbasedAnalysis/1781_100331.json", + "file_size_bytes": 3385, + "type": "nmdc:DataObject" + }, + { + "_id": { + "$oid": "649b003d1ae706d7b5b15b1a" + }, + "id": "nmdc:728f96c592e66b7def86b6b6a227fd51", + "name": "1781_100331.krona.html", + "description": "Gold:Gp0127629 KRONA plot HTML file", + "url": "https://data.microbiomedata.org/1781_100331/ReadbasedAnalysis/centrifuge/1781_100331.krona.html", + "file_size_bytes": 3385, + "data_object_type": "Centrifuge Krona Plot", + "type": "nmdc:DataObject" + }, + { + "_id": { + "$oid": "649b003f1ae706d7b5b165e2" + }, + "id": "nmdc:26b0b714933a5d25157f88d0e491104b", + "name": "bins.unbinned.fa", + "description": "unbinned fasta file from metabat2 for gold:Gp0127629", + "file_size_bytes": 20329609, + "url": "https://data.microbiomedata.org/data/1781_100331/img_MAGs/bins.unbinned.fa", + "type": "nmdc:DataObject" + }, + { + "_id": { + "$oid": "649b003f1ae706d7b5b165fa" + }, + "id": "nmdc:3508c879a1c420c55e7325f8be819542", + "name": "bins.tooShort.fa", + "description": "tooShort (< 3kb) filtered contigs fasta file by metaBat2 for gold:Gp0127629", + "file_size_bytes": 86214629, + "url": "https://data.microbiomedata.org/data/1781_100331/img_MAGs/bins.tooShort.fa", + "type": "nmdc:DataObject" + }, + { + "_id": { + "$oid": "649b003f1ae706d7b5b16cda" + }, + "description": "KO TSV File for gold:Gp0127629", + "url": "https://data.microbiomedata.org/1781_100331/img_annotation/Ga0482241_ko.tsv", + "md5_checksum": "37fb326b25c1ae3caebddf668feadd76", + "file_size_bytes": 3385, + "id": "nmdc:37fb326b25c1ae3caebddf668feadd76", + "name": "gold:Gp0127629_KO TSV File", + "data_object_type": "Annotation KEGG Orthology", + "type": "nmdc:DataObject" + }, + { + "_id": { + "$oid": "649b003f1ae706d7b5b16cdb" + }, + "description": "Functional annotation GFF file for gold:Gp0127629", + "url": "https://data.microbiomedata.org/1781_100331/img_annotation/Ga0482241_functional_annotation.gff", + "md5_checksum": "75e43708767f06de878e1c2115714e0b", + "file_size_bytes": 3385, + "id": "nmdc:75e43708767f06de878e1c2115714e0b", + "name": "gold:Gp0127629_Functional annotation GFF file", + "data_object_type": "Functional Annotation GFF", + "type": "nmdc:DataObject" + }, + { + "_id": { + "$oid": "649b003f1ae706d7b5b16cdc" + }, + "description": "EC TSV File for gold:Gp0127629", + "url": "https://data.microbiomedata.org/1781_100331/img_annotation/Ga0482241_ec.tsv", + "md5_checksum": "f9211f36dc6992c2dfecd160987434c7", + "file_size_bytes": 3385, + "id": "nmdc:f9211f36dc6992c2dfecd160987434c7", + "name": "gold:Gp0127629_EC TSV File", + "data_object_type": "Annotation Enzyme Commission", + "type": "nmdc:DataObject" + }, + { + "_id": { + "$oid": "649b00401ae706d7b5b16ce1" + }, + "description": "Structural annotation GFF file for gold:Gp0127629", + "url": "https://data.microbiomedata.org/1781_100331/img_annotation/Ga0482241_structural_annotation.gff", + "md5_checksum": "9b3fb3e409e3d3128a8a43cc58d32a95", + "file_size_bytes": 3385, + "id": "nmdc:9b3fb3e409e3d3128a8a43cc58d32a95", + "name": "gold:Gp0127629_Structural annotation GFF file", + "data_object_type": "Structural Annotation GFF", + "type": "nmdc:DataObject" + }, + { + "_id": { + "$oid": "649b00401ae706d7b5b16ce2" + }, + "description": "Protein FAA for gold:Gp0127629", + "url": "https://data.microbiomedata.org/1781_100331/img_annotation/Ga0482241_proteins.faa", + "md5_checksum": "9559ebd9a8921ff8ae9f89c2ffcef6f7", + "file_size_bytes": 3385, + "id": "nmdc:9559ebd9a8921ff8ae9f89c2ffcef6f7", + "name": "gold:Gp0127629_Protein FAA", + "data_object_type": "Annotation Amino Acid FASTA", + "type": "nmdc:DataObject" + } + ], + "mags_activity_set": [ + { + "_id": { + "$oid": "649b0052ec087f6bbab3470c" + }, + "has_input": [ + "nmdc:7badcefc26b24213b514cd4c3c9a87d7", + "nmdc:1608f12840c36ac1d882cc6ef4f4627f", + "nmdc:496e0fa5ac1c04849338c972189ee3f6" + ], + "too_short_contig_num": 195955, + "part_of": [ + "nmdc:mga071r920" + ], + "binned_contig_num": 271, + "git_url": "https://github.com/microbiomedata/metaG/releases/tag/0.1", + "has_output": [ + "nmdc:d41d8cd98f00b204e9800998ecf8427e", + "nmdc:2bbd475ff6a15058b38244e71456024a", + "nmdc:70901a70c06fdcfc71efa2d004e210fd", + "nmdc:d52b4ae6b61161082fee7d42ecf5ee87", + "nmdc:58d9cd30ca53424cd0f1ce27d0a8a885", + "nmdc:8f4f5294de942734837fba3d68ffc6b4" + ], + "was_informed_by": "gold:Gp0127629", + "input_contig_num": 208551, + "id": "nmdc:b82754c2c692809f9e59ff9824278c32", + "execution_resource": "NERSC-Cori", + "name": "MAGs Analysis Activity for nmdc:mga071r920", + "mags_list": [ + { + "number_of_contig": 177, + "completeness": 9.71, + "bin_name": "bins.1", + "gene_count": 1122, + "bin_quality": "LQ", + "gtdbtk_species": "", + "gtdbtk_order": "", + "num_16s": 0, + "gtdbtk_family": "", + "gtdbtk_domain": "", + "contamination": 0.0, + "gtdbtk_class": "", + "gtdbtk_phylum": "", + "num_5s": 1, + "num_23s": 0, + "gtdbtk_genus": "", + "num_t_rna": 14 + }, + { + "number_of_contig": 94, + "completeness": 16.81, + "bin_name": "bins.2", + "gene_count": 465, + "bin_quality": "LQ", + "gtdbtk_species": "", + "gtdbtk_order": "", + "num_16s": 0, + "gtdbtk_family": "", + "gtdbtk_domain": "", + "contamination": 0.34, + "gtdbtk_class": "", + "gtdbtk_phylum": "", + "num_5s": 0, + "num_23s": 0, + "gtdbtk_genus": "", + "num_t_rna": 5 + } + ], + "unbinned_contig_num": 12325, + "started_at_time": "2021-10-11T02:23:35Z", + "type": "nmdc:MAGsAnalysisActivity", + "ended_at_time": "2021-10-11T03:33:33+00:00" + } + ], + "metagenome_annotation_activity_set": [ + { + "_id": { + "$oid": "649b005bbf2caae0415ef9af" + }, + "has_input": [ + "nmdc:7badcefc26b24213b514cd4c3c9a87d7" + ], + "part_of": [ + "nmdc:mga071r920" + ], + "git_url": "https://github.com/microbiomedata/metaG/releases/tag/0.1", + "has_output": [ + "nmdc:ba15f54043fad473edec771b60f5b040", + "nmdc:f6d684abab1c60b2b95ade84644e6a38", + "nmdc:496e0fa5ac1c04849338c972189ee3f6", + "nmdc:311ffbbfc80f28908615a1f18492ae5e", + "nmdc:7d90dec4cdc8c8e8a4bc8f7bddf2e0c2", + "nmdc:1116328ed7ba951246f0eec1d3f065b4", + "nmdc:325e47bc009aeba79fc767e3b6daeee2", + "nmdc:f820db8ce6a1ae7c3e8af40729f5b62b", + "nmdc:96ab6fa258a08490082b4f99269f3e8d", + "nmdc:a2b630c408bd557d693b147f95627fdc", + "nmdc:ba87cd24242288e0b6d8f32a2bcbbb80", + "nmdc:9c97bd7a5e4978e31ed1e5386c3619f3" + ], + "was_informed_by": "gold:Gp0127629", + "id": "nmdc:b82754c2c692809f9e59ff9824278c32", + "execution_resource": "NERSC-Cori", + "name": "Annotation Activity for nmdc:mga071r920", + "started_at_time": "2021-10-11T02:23:35Z", + "type": "nmdc:MetagenomeAnnotationActivity", + "ended_at_time": "2021-10-11T03:33:33+00:00" + } + ], + "metagenome_assembly_set": [ + { + "_id": { + "$oid": "649b005f2ca5ee4adb139f9a" + }, + "has_input": [ + "nmdc:0db98173ae3395106e24d250b2655f06" + ], + "part_of": [ + "nmdc:mga071r920" + ], + "ctg_logsum": 212258, + "scaf_logsum": 212917, + "gap_pct": 0.00151, + "git_url": "https://github.com/microbiomedata/metaG/releases/tag/0.1", + "has_output": [ + "nmdc:7badcefc26b24213b514cd4c3c9a87d7", + "nmdc:89dd3c10791083ae5a5b30c2154deabd", + "nmdc:5e503e3abe6eb9e94c34a55da5bbafdc", + "nmdc:0a1f96cd74ec9f1a6668924745689014", + "nmdc:1608f12840c36ac1d882cc6ef4f4627f" + ], + "asm_score": 3.305, + "was_informed_by": "gold:Gp0127629", + "ctg_powsum": 22751, + "scaf_max": 23996, + "id": "nmdc:b82754c2c692809f9e59ff9824278c32", + "scaf_powsum": 22826, + "execution_resource": "NERSC-Cori", + "contigs": 208553, + "name": "Assembly Activity for nmdc:mga071r920", + "ctg_max": 23996, + "gc_std": 0.1053, + "contig_bp": 101011771, + "gc_avg": 0.62056, + "started_at_time": "2021-10-11T02:23:35Z", + "scaf_bp": 101013301, + "type": "nmdc:MetagenomeAssembly", + "scaffolds": 208427, + "ended_at_time": "2021-10-11T03:33:33+00:00", + "ctg_l50": 478, + "ctg_l90": 290, + "ctg_n50": 59884, + "ctg_n90": 174522, + "scaf_l50": 478, + "scaf_l90": 290, + "scaf_n50": 59864, + "scaf_n90": 174416 + } + ], + "omics_processing_set": [ + { + "_id": { + "$oid": "649b009773e8249959349b3e" + }, + "id": "nmdc:omprc-11-1nvcer55", + "name": "Riverbed sediment microbial communities from areas with vegetation nearby in Columbia River, Washington, USA - GW-RW S3_10_20", + "description": "Riverbed sediment samples were collected from an area with a lot of surrounding vegetation in a hyporheic zone (subsurface groundwater - surface water mixing zone)", + "has_input": [ + "nmdc:bsm-11-3sfanv57" + ], + "has_output": [ + "jgi:574fde587ded5e3df1ee13fd" + ], + "part_of": [ + "nmdc:sty-11-aygzgv51" + ], + "add_date": "2016-01-11", + "mod_date": "2021-06-15", + "ncbi_project_name": "Riverbed sediment microbial communities from areas with vegetation nearby in Columbia River, Washington, USA - GW-RW S3_10_20", + "omics_type": { + "has_raw_value": "Metagenome" + }, + "principal_investigator": { + "has_raw_value": "James Stegen" + }, + "processing_institution": "JGI", + "type": "nmdc:OmicsProcessing", + "gold_sequencing_project_identifiers": [ + "gold:Gp0127629" + ] + } + ], + "read_qc_analysis_activity_set": [ + { + "_id": { + "$oid": "649b009d6bdd4fd20273c869" + }, + "has_input": [ + "nmdc:22f8150866c51b35726066d2ec13c5ca" + ], + "part_of": [ + "nmdc:mga071r920" + ], + "git_url": "https://github.com/microbiomedata/metaG/releases/tag/0.1", + "has_output": [ + "nmdc:0db98173ae3395106e24d250b2655f06", + "nmdc:bc0874c01bbd31c644cd598e2fdad3c4" + ], + "was_informed_by": "gold:Gp0127629", + "input_read_count": 23886420, + "output_read_bases": 3395256515, + "id": "nmdc:b82754c2c692809f9e59ff9824278c32", + "execution_resource": "NERSC-Cori", + "input_read_bases": 3606849420, + "name": "Read QC Activity for nmdc:mga071r920", + "output_read_count": 22738452, + "started_at_time": "2021-10-11T02:23:35Z", + "type": "nmdc:ReadQCAnalysisActivity", + "ended_at_time": "2021-10-11T03:33:33+00:00" + } + ], + "read_based_taxonomy_analysis_activity_set": [ + { + "_id": { + "$oid": "649b009bff710ae353f8cf2b" + }, + "has_input": [ + "nmdc:0db98173ae3395106e24d250b2655f06" + ], + "git_url": "https://github.com/microbiomedata/metaG/releases/tag/0.1", + "has_output": [ + "nmdc:f4f810491708ff25956cddd005cc9944", + "nmdc:67e3c200d3765733af33d1db1f4bf968", + "nmdc:26cd6390e8362da2ee1d7691360d2dfb", + "nmdc:80fe705d97ef4a0701b1320e9ba19a82", + "nmdc:6a216ec913587e26ddc036b703126d76", + "nmdc:ebed7286f886596764a66a0d1dac3e43", + "nmdc:80dd3584d257e8f84b59118ffd0d5e21", + "nmdc:61b5fe5664ca99f6354c7a5a0222678c", + "nmdc:81108175d5ef2ca158f516bfc75d3cd9" + ], + "was_informed_by": "gold:Gp0127629", + "id": "nmdc:b82754c2c692809f9e59ff9824278c32", + "execution_resource": "NERSC-Cori", + "name": "ReadBased Analysis Activity for nmdc:mga071r920", + "started_at_time": "2021-10-11T02:23:35Z", + "type": "nmdc:ReadbasedAnalysis", + "ended_at_time": "2021-10-11T03:33:33+00:00" + } + ] + }, + { + "data_object_set": [ + { + "description": "Raw sequencer read data", + "file_size_bytes": 2825784199, + "type": "nmdc:DataObject", + "id": "jgi:574fe09a7ded5e3df1ee1485", + "name": "10533.3.165334.CTGACAC-TGTGTCA.fastq.gz" + }, + { + "name": "Gp0127628_Filtered Reads", + "description": "Filtered Reads for Gp0127628", + "data_object_type": "Filtered Sequencing Reads", + "url": "https://data.microbiomedata.org/data/nmdc:mga0x5c381/qa/nmdc_mga0x5c381_filtered.fastq.gz", + "md5_checksum": "f6f1760721d73fc57919b2115a1d47ec", + "id": "nmdc:f6f1760721d73fc57919b2115a1d47ec", + "file_size_bytes": 2548975208 + }, + { + "name": "Gp0127628_Filtered Stats", + "description": "Filtered Stats for Gp0127628", + "data_object_type": "QC Statistics", + "url": "https://data.microbiomedata.org/data/nmdc:mga0x5c381/qa/nmdc_mga0x5c381_filterStats.txt", + "md5_checksum": "2225f9d41343590d818186fa2d66852d", + "id": "nmdc:2225f9d41343590d818186fa2d66852d", + "file_size_bytes": 291 + }, + { + "name": "Gp0127628_Gottcha2 TSV report", + "description": "Gottcha2 TSV report for Gp0127628", + "url": "https://data.microbiomedata.org/data/nmdc:mga0x5c381/ReadbasedAnalysis/nmdc_mga0x5c381_gottcha2_report.tsv", + "md5_checksum": "a6ed9af48a9ad473ab66721829a5c226", + "id": "nmdc:a6ed9af48a9ad473ab66721829a5c226", + "file_size_bytes": 3472 + }, + { + "name": "Gp0127628_Gottcha2 full TSV report", + "description": "Gottcha2 full TSV report for Gp0127628", + "url": "https://data.microbiomedata.org/data/nmdc:mga0x5c381/ReadbasedAnalysis/nmdc_mga0x5c381_gottcha2_report_full.tsv", + "md5_checksum": "335dbf6f1055de0950988a002f432c0b", + "id": "nmdc:335dbf6f1055de0950988a002f432c0b", + "file_size_bytes": 863867 + }, + { + "name": "Gp0127628_Gottcha2 Krona HTML report", + "description": "Gottcha2 Krona HTML report for Gp0127628", + "data_object_type": "GOTTCHA2 Krona Plot", + "url": "https://data.microbiomedata.org/data/nmdc:mga0x5c381/ReadbasedAnalysis/nmdc_mga0x5c381_gottcha2_krona.html", + "md5_checksum": "35da19bc0e50db1f9a02fe1550d1df0e", + "id": "nmdc:35da19bc0e50db1f9a02fe1550d1df0e", + "file_size_bytes": 234974 + }, + { + "name": "Gp0127628_Centrifuge classification TSV report", + "description": "Centrifuge classification TSV report for Gp0127628", + "data_object_type": "Centrifuge Taxonomic Classification", + "url": "https://data.microbiomedata.org/data/nmdc:mga0x5c381/ReadbasedAnalysis/nmdc_mga0x5c381_centrifuge_classification.tsv", + "md5_checksum": "224085164a389c6f207967ed03b3e6af", + "id": "nmdc:224085164a389c6f207967ed03b3e6af", + "file_size_bytes": 2220789142 + }, + { + "name": "Gp0127628_Centrifuge TSV report", + "description": "Centrifuge TSV report for Gp0127628", + "data_object_type": "Centrifuge Classification Report", + "url": "https://data.microbiomedata.org/data/nmdc:mga0x5c381/ReadbasedAnalysis/nmdc_mga0x5c381_centrifuge_report.tsv", + "md5_checksum": "39ba17263c144761a8bdcc1645c034f5", + "id": "nmdc:39ba17263c144761a8bdcc1645c034f5", + "file_size_bytes": 257030 + }, + { + "name": "Gp0127628_Centrifuge Krona HTML report", + "description": "Centrifuge Krona HTML report for Gp0127628", + "data_object_type": "Centrifuge Krona Plot", + "url": "https://data.microbiomedata.org/data/nmdc:mga0x5c381/ReadbasedAnalysis/nmdc_mga0x5c381_centrifuge_krona.html", + "md5_checksum": "84debc9bd1c09328d60f073d7fc2db4f", + "id": "nmdc:84debc9bd1c09328d60f073d7fc2db4f", + "file_size_bytes": 2337568 + }, + { + "name": "Gp0127628_Kraken classification TSV report", + "description": "Kraken classification TSV report for Gp0127628", + "data_object_type": "Kraken2 Taxonomic Classification", + "url": "https://data.microbiomedata.org/data/nmdc:mga0x5c381/ReadbasedAnalysis/nmdc_mga0x5c381_kraken2_classification.tsv", + "md5_checksum": "8f75800abbcf5a94043ad677d7cb975c", + "id": "nmdc:8f75800abbcf5a94043ad677d7cb975c", + "file_size_bytes": 1776487262 + }, + { + "name": "Gp0127628_Kraken2 TSV report", + "description": "Kraken2 TSV report for Gp0127628", + "data_object_type": "Kraken2 Classification Report", + "url": "https://data.microbiomedata.org/data/nmdc:mga0x5c381/ReadbasedAnalysis/nmdc_mga0x5c381_kraken2_report.tsv", + "md5_checksum": "aae9e961d8ed716457616c8a8841037b", + "id": "nmdc:aae9e961d8ed716457616c8a8841037b", + "file_size_bytes": 664011 + }, + { + "name": "Gp0127628_Kraken2 Krona HTML report", + "description": "Kraken2 Krona HTML report for Gp0127628", + "data_object_type": "Kraken2 Krona Plot", + "url": "https://data.microbiomedata.org/data/nmdc:mga0x5c381/ReadbasedAnalysis/nmdc_mga0x5c381_kraken2_krona.html", + "md5_checksum": "ba83d6ab837403f4bcbc9400a0460457", + "id": "nmdc:ba83d6ab837403f4bcbc9400a0460457", + "file_size_bytes": 4035375 + }, + { + "name": "Gp0127628_Assembled contigs fasta", + "description": "Assembled contigs fasta for Gp0127628", + "data_object_type": "Assembly Contigs", + "url": "https://data.microbiomedata.org/data/nmdc:mga0x5c381/assembly/nmdc_mga0x5c381_contigs.fna", + "md5_checksum": "9e550afb3bcd8d66807f861ecfed815b", + "id": "nmdc:9e550afb3bcd8d66807f861ecfed815b", + "file_size_bytes": 74277737 + }, + { + "name": "Gp0127628_Assembled scaffold fasta", + "description": "Assembled scaffold fasta for Gp0127628", + "data_object_type": "Assembly Scaffolds", + "url": "https://data.microbiomedata.org/data/nmdc:mga0x5c381/assembly/nmdc_mga0x5c381_scaffolds.fna", + "md5_checksum": "5e79fce62ffa8c4479be5159143797e0", + "id": "nmdc:5e79fce62ffa8c4479be5159143797e0", + "file_size_bytes": 73802989 + }, + { + "name": "Gp0127628_Metagenome Contig Coverage Stats", + "description": "Metagenome Contig Coverage Stats for Gp0127628", + "url": "https://data.microbiomedata.org/data/nmdc:mga0x5c381/assembly/nmdc_mga0x5c381_covstats.txt", + "md5_checksum": "682fd042d6adcd93f75c3eae2cf32241", + "id": "nmdc:682fd042d6adcd93f75c3eae2cf32241", + "file_size_bytes": 12462125 + }, + { + "name": "Gp0127628_Assembled AGP file", + "description": "Assembled AGP file for Gp0127628", + "data_object_type": "Assembly AGP", + "url": "https://data.microbiomedata.org/data/nmdc:mga0x5c381/assembly/nmdc_mga0x5c381_assembly.agp", + "md5_checksum": "9d607ebd92ad5bcbaaa405884d4a83a3", + "id": "nmdc:9d607ebd92ad5bcbaaa405884d4a83a3", + "file_size_bytes": 11636352 + }, + { + "name": "Gp0127628_Metagenome Alignment BAM file", + "description": "Metagenome Alignment BAM file for Gp0127628", + "data_object_type": "Assembly Coverage BAM", + "url": "https://data.microbiomedata.org/data/nmdc:mga0x5c381/assembly/nmdc_mga0x5c381_pairedMapped_sorted.bam", + "md5_checksum": "9163caaba1f60d1af9a551559069ca08", + "id": "nmdc:9163caaba1f60d1af9a551559069ca08", + "file_size_bytes": 2743529039 + }, + { + "name": "Gp0127628_Protein FAA", + "description": "Protein FAA for Gp0127628", + "data_object_type": "Annotation Amino Acid FASTA", + "url": "https://data.microbiomedata.org/data/nmdc:mga0x5c381/annotation/nmdc_mga0x5c381_proteins.faa", + "md5_checksum": "9c21fbee23b4098d69ac618d32fe44c3", + "id": "nmdc:9c21fbee23b4098d69ac618d32fe44c3", + "file_size_bytes": 43551850 + }, + { + "name": "Gp0127628_Structural annotation GFF file", + "description": "Structural annotation GFF file for Gp0127628", + "data_object_type": "Structural Annotation GFF", + "url": "https://data.microbiomedata.org/data/nmdc:mga0x5c381/annotation/nmdc_mga0x5c381_structural_annotation.gff", + "md5_checksum": "c668eaf35e0ebbb7a304271a03dfd3cd", + "id": "nmdc:c668eaf35e0ebbb7a304271a03dfd3cd", + "file_size_bytes": 2518 + }, + { + "name": "Gp0127628_Functional annotation GFF file", + "description": "Functional annotation GFF file for Gp0127628", + "data_object_type": "Functional Annotation GFF", + "url": "https://data.microbiomedata.org/data/nmdc:mga0x5c381/annotation/nmdc_mga0x5c381_functional_annotation.gff", + "md5_checksum": "cf08b19ebb993d895845588d073c02fe", + "id": "nmdc:cf08b19ebb993d895845588d073c02fe", + "file_size_bytes": 50830515 + }, + { + "name": "Gp0127628_KO TSV file", + "description": "KO TSV file for Gp0127628", + "data_object_type": "Annotation KEGG Orthology", + "url": "https://data.microbiomedata.org/data/nmdc:mga0x5c381/annotation/nmdc_mga0x5c381_ko.tsv", + "md5_checksum": "e110cecd0dcfbefbde06b88e89047c94", + "id": "nmdc:e110cecd0dcfbefbde06b88e89047c94", + "file_size_bytes": 5904167 + }, + { + "name": "Gp0127628_EC TSV file", + "description": "EC TSV file for Gp0127628", + "data_object_type": "Annotation Enzyme Commission", + "url": "https://data.microbiomedata.org/data/nmdc:mga0x5c381/annotation/nmdc_mga0x5c381_ec.tsv", + "md5_checksum": "5f393bad4aacf75d348d7e7d5fe00a06", + "id": "nmdc:5f393bad4aacf75d348d7e7d5fe00a06", + "file_size_bytes": 3917008 + }, + { + "name": "Gp0127628_COG GFF file", + "description": "COG GFF file for Gp0127628", + "url": "https://data.microbiomedata.org/data/nmdc:mga0x5c381/annotation/nmdc_mga0x5c381_cog.gff", + "md5_checksum": "c8834a004633752f76b91883416c34b8", + "id": "nmdc:c8834a004633752f76b91883416c34b8", + "file_size_bytes": 29634134 + }, + { + "name": "Gp0127628_PFAM GFF file", + "description": "PFAM GFF file for Gp0127628", + "url": "https://data.microbiomedata.org/data/nmdc:mga0x5c381/annotation/nmdc_mga0x5c381_pfam.gff", + "md5_checksum": "adc813c11b8b32e205aa65ab971d4159", + "id": "nmdc:adc813c11b8b32e205aa65ab971d4159", + "file_size_bytes": 21661208 + }, + { + "name": "Gp0127628_TigrFam GFF file", + "description": "TigrFam GFF file for Gp0127628", + "url": "https://data.microbiomedata.org/data/nmdc:mga0x5c381/annotation/nmdc_mga0x5c381_tigrfam.gff", + "md5_checksum": "eecb4098ed258acb0820c17e9e308a9d", + "id": "nmdc:eecb4098ed258acb0820c17e9e308a9d", + "file_size_bytes": 2198767 + }, + { + "name": "Gp0127628_SMART GFF file", + "description": "SMART GFF file for Gp0127628", + "url": "https://data.microbiomedata.org/data/nmdc:mga0x5c381/annotation/nmdc_mga0x5c381_smart.gff", + "md5_checksum": "cd2cbf38f357d4c7ec5080072e994861", + "id": "nmdc:cd2cbf38f357d4c7ec5080072e994861", + "file_size_bytes": 6281175 + }, + { + "name": "Gp0127628_SuperFam GFF file", + "description": "SuperFam GFF file for Gp0127628", + "url": "https://data.microbiomedata.org/data/nmdc:mga0x5c381/annotation/nmdc_mga0x5c381_supfam.gff", + "md5_checksum": "1e7aefe1539f0dbe510f805a8d0a6930", + "id": "nmdc:1e7aefe1539f0dbe510f805a8d0a6930", + "file_size_bytes": 36891824 + }, + { + "name": "Gp0127628_Cath FunFam GFF file", + "description": "Cath FunFam GFF file for Gp0127628", + "url": "https://data.microbiomedata.org/data/nmdc:mga0x5c381/annotation/nmdc_mga0x5c381_cath_funfam.gff", + "md5_checksum": "29e9378a37cc56837c1343de85993789", + "id": "nmdc:29e9378a37cc56837c1343de85993789", + "file_size_bytes": 27671574 + }, + { + "name": "Gp0127628_KO_EC GFF file", + "description": "KO_EC GFF file for Gp0127628", + "url": "https://data.microbiomedata.org/data/nmdc:mga0x5c381/annotation/nmdc_mga0x5c381_ko_ec.gff", + "md5_checksum": "5faeccd78a03acd094263a777faa5fe2", + "id": "nmdc:5faeccd78a03acd094263a777faa5fe2", + "file_size_bytes": 18790529 + }, + { + "name": "gold:Gp0452679_metabat2 bin checkm quality assessment result", + "description": "metabat2 bin checkm quality assessment result for gold:Gp0452679", + "data_object_type": "CheckM Statistics", + "url": "https://data.microbiomedata.org/data/nmdc:mga0fsjy84/MAGs/nmdc_mga0fsjy84_checkm_qa.out", + "md5_checksum": "d41d8cd98f00b204e9800998ecf8427e", + "id": "nmdc:d41d8cd98f00b204e9800998ecf8427e", + "file_size_bytes": 0 + }, + { + "name": "Gp0127628_tooShort (< 3kb) filtered contigs fasta file by metaBat2", + "description": "tooShort (< 3kb) filtered contigs fasta file by metaBat2 for Gp0127628", + "url": "https://data.microbiomedata.org/data/nmdc:mga0x5c381/MAGs/nmdc_mga0x5c381_bins.tooShort.fa", + "md5_checksum": "13137fa415f537d2874808d8c75c1b3d", + "id": "nmdc:13137fa415f537d2874808d8c75c1b3d", + "file_size_bytes": 63661919 + }, + { + "name": "Gp0127628_unbinned fasta file from metabat2", + "description": "unbinned fasta file from metabat2 for Gp0127628", + "url": "https://data.microbiomedata.org/data/nmdc:mga0x5c381/MAGs/nmdc_mga0x5c381_bins.unbinned.fa", + "md5_checksum": "196d2699f8fdab4e38c8a638f92093b1", + "id": "nmdc:196d2699f8fdab4e38c8a638f92093b1", + "file_size_bytes": 9649261 + }, + { + "name": "Gp0127628_metabat2 bin checkm quality assessment result", + "description": "metabat2 bin checkm quality assessment result for Gp0127628", + "data_object_type": "CheckM Statistics", + "url": "https://data.microbiomedata.org/data/nmdc:mga0x5c381/MAGs/nmdc_mga0x5c381_checkm_qa.out", + "md5_checksum": "b67b26f8f76faa347575352000021faf", + "id": "nmdc:b67b26f8f76faa347575352000021faf", + "file_size_bytes": 785 + }, + { + "name": "Gp0127628_high-quality and medium-quality bins", + "description": "high-quality and medium-quality bins for Gp0127628", + "data_object_type": "Metagenome Bins", + "url": "https://data.microbiomedata.org/data/nmdc:mga0x5c381/MAGs/nmdc_mga0x5c381_hqmq_bin.zip", + "md5_checksum": "166c8a0ad2f4d57e9b16cdc699d56c09", + "id": "nmdc:166c8a0ad2f4d57e9b16cdc699d56c09", + "file_size_bytes": 182 + }, + { + "name": "Gp0127628_metabat2 bins", + "description": "metabat2 bins for Gp0127628", + "url": "https://data.microbiomedata.org/data/nmdc:mga0x5c381/MAGs/nmdc_mga0x5c381_metabat_bin.zip", + "md5_checksum": "5ef5ad24cfe3990c0256d420f51f9010", + "id": "nmdc:5ef5ad24cfe3990c0256d420f51f9010", + "file_size_bytes": 279359 + }, + { + "_id": { + "$oid": "649b003d1ae706d7b5b14e5c" + }, + "description": "Metagenome Contig Coverage Stats for gold:Gp0127628", + "url": "https://data.microbiomedata.org/data/1781_100330/assembly/mapping_stats.txt", + "file_size_bytes": 11830693, + "type": "nmdc:DataObject", + "id": "nmdc:29cc178c2efed5702e8d984729345761", + "name": "mapping_stats.txt", + "data_object_type": "Assembly Coverage Stats" + }, + { + "_id": { + "$oid": "649b003d1ae706d7b5b14e5d" + }, + "description": "Assembled contigs fasta for gold:Gp0127628", + "url": "https://data.microbiomedata.org/data/1781_100330/assembly/assembly_contigs.fna", + "file_size_bytes": 73646305, + "type": "nmdc:DataObject", + "id": "nmdc:e54d5475a6bf7148d2312d0fcc349cdb", + "name": "assembly_contigs.fna", + "data_object_type": "Assembly Contigs" + }, + { + "_id": { + "$oid": "649b003d1ae706d7b5b14e5e" + }, + "description": "Assembled scaffold fasta for gold:Gp0127628", + "url": "https://data.microbiomedata.org/data/1781_100330/assembly/assembly_scaffolds.fna", + "file_size_bytes": 73171893, + "type": "nmdc:DataObject", + "id": "nmdc:140f23c819c51594790d1209780f8f60", + "name": "assembly_scaffolds.fna", + "data_object_type": "Assembly Scaffolds" + }, + { + "_id": { + "$oid": "649b003d1ae706d7b5b14e60" + }, + "description": "Assembled AGP file for gold:Gp0127628", + "url": "https://data.microbiomedata.org/data/1781_100330/assembly/assembly.agp", + "file_size_bytes": 10372800, + "type": "nmdc:DataObject", + "id": "nmdc:64d6bc3e0883eb23926cd05e43c42d2c", + "name": "assembly.agp", + "data_object_type": "Assembly AGP" + }, + { + "_id": { + "$oid": "649b003d1ae706d7b5b14e65" + }, + "description": "Metagenome Alignment BAM file for gold:Gp0127628", + "url": "https://data.microbiomedata.org/data/1781_100330/assembly/pairedMapped_sorted.bam", + "file_size_bytes": 2706803208, + "type": "nmdc:DataObject", + "id": "nmdc:72951466c19bec33fea0232a2becf637", + "name": "pairedMapped_sorted.bam", + "data_object_type": "Assembly Coverage BAM" + }, + { + "_id": { + "$oid": "649b003d1ae706d7b5b15b09" + }, + "id": "nmdc:c65969e7112a41f10ba56435077e1833", + "name": "1781_100330.krona.html", + "description": "Gold:Gp0127628 KRONA plot HTML file", + "url": "https://data.microbiomedata.org/1781_100330/ReadbasedAnalysis/centrifuge/1781_100330.krona.html", + "file_size_bytes": 3385, + "data_object_type": "Centrifuge Krona Plot", + "type": "nmdc:DataObject" + }, + { + "_id": { + "$oid": "649b003d1ae706d7b5b15b10" + }, + "id": "nmdc:94230c82a668902f15d13898305b06d0", + "name": "1781_100330.json", + "description": "Gold:Gp0127628 ReadbasedAnalysis result JSON file", + "url": "https://data.microbiomedata.org/1781_100330/ReadbasedAnalysis/1781_100330.json", + "file_size_bytes": 3385, + "type": "nmdc:DataObject" + }, + { + "_id": { + "$oid": "649b003f1ae706d7b5b165d3" + }, + "id": "nmdc:58091a36e7ab3d84f65b6d4e08f1a528", + "name": "bins.tooShort.fa", + "description": "tooShort (< 3kb) filtered contigs fasta file by metaBat2 for gold:Gp0127628", + "file_size_bytes": 61790019, + "url": "https://data.microbiomedata.org/data/1781_100330/img_MAGs/bins.tooShort.fa", + "type": "nmdc:DataObject" + }, + { + "_id": { + "$oid": "649b003f1ae706d7b5b165d4" + }, + "id": "nmdc:756c3abe18401097b557a9d3f2788fa1", + "name": "bins.unbinned.fa", + "description": "unbinned fasta file from metabat2 for gold:Gp0127628", + "file_size_bytes": 10332738, + "url": "https://data.microbiomedata.org/data/1781_100330/img_MAGs/bins.unbinned.fa", + "type": "nmdc:DataObject" + }, + { + "_id": { + "$oid": "649b003f1ae706d7b5b165d5" + }, + "id": "nmdc:bd1a55c5a24a4ec234f5c22ce66ba8e2", + "name": "gold:Gp0127628.bins.1.fa", + "description": "metabat2 binned contig file for gold:Gp0127628", + "file_size_bytes": 214091, + "url": "https://data.microbiomedata.org/data/1781_100330/img_MAGs/metabat-bins/bins.1.fa", + "type": "nmdc:DataObject", + "data_object_type": "Metagenome Bins" + }, + { + "_id": { + "$oid": "649b003f1ae706d7b5b165d9" + }, + "id": "nmdc:4d6fbc978933ca7a2fb204c0230252d1", + "name": "checkm_qa.out", + "description": "metabat2 bin checkm quality assessment result for gold:Gp0127628", + "file_size_bytes": 770, + "url": "https://data.microbiomedata.org/data/1781_100330/img_MAGs/checkm_qa.out", + "type": "nmdc:DataObject", + "data_object_type": "CheckM Statistics" + }, + { + "_id": { + "$oid": "649b003f1ae706d7b5b16cd4" + }, + "description": "EC TSV File for gold:Gp0127628", + "url": "https://data.microbiomedata.org/1781_100330/img_annotation/Ga0482242_ec.tsv", + "md5_checksum": "760a1e1bc5aac21dd0b96098c72133ff", + "file_size_bytes": 3385, + "id": "nmdc:760a1e1bc5aac21dd0b96098c72133ff", + "name": "gold:Gp0127628_EC TSV File", + "data_object_type": "Annotation Enzyme Commission", + "type": "nmdc:DataObject" + }, + { + "_id": { + "$oid": "649b003f1ae706d7b5b16cd7" + }, + "description": "Protein FAA for gold:Gp0127628", + "url": "https://data.microbiomedata.org/1781_100330/img_annotation/Ga0482242_proteins.faa", + "md5_checksum": "ec55b61e1204cde7fe61841179b88b53", + "file_size_bytes": 3385, + "id": "nmdc:ec55b61e1204cde7fe61841179b88b53", + "name": "gold:Gp0127628_Protein FAA", + "data_object_type": "Annotation Amino Acid FASTA", + "type": "nmdc:DataObject" + }, + { + "_id": { + "$oid": "649b003f1ae706d7b5b16cd8" + }, + "description": "Functional annotation GFF file for gold:Gp0127628", + "url": "https://data.microbiomedata.org/1781_100330/img_annotation/Ga0482242_functional_annotation.gff", + "md5_checksum": "b73bf45facd909d89bfab76dee85a2cc", + "file_size_bytes": 3385, + "id": "nmdc:b73bf45facd909d89bfab76dee85a2cc", + "name": "gold:Gp0127628_Functional annotation GFF file", + "data_object_type": "Functional Annotation GFF", + "type": "nmdc:DataObject" + }, + { + "_id": { + "$oid": "649b003f1ae706d7b5b16cd9" + }, + "description": "KO TSV File for gold:Gp0127628", + "url": "https://data.microbiomedata.org/1781_100330/img_annotation/Ga0482242_ko.tsv", + "md5_checksum": "69da278e8966a688cafb7bb2c8f2e4d1", + "file_size_bytes": 3385, + "id": "nmdc:69da278e8966a688cafb7bb2c8f2e4d1", + "name": "gold:Gp0127628_KO TSV File", + "data_object_type": "Annotation KEGG Orthology", + "type": "nmdc:DataObject" + }, + { + "_id": { + "$oid": "649b00401ae706d7b5b16ce0" + }, + "description": "Structural annotation GFF file for gold:Gp0127628", + "url": "https://data.microbiomedata.org/1781_100330/img_annotation/Ga0482242_structural_annotation.gff", + "md5_checksum": "bd55dfd59ed0aa7ea685734c5b7ecbab", + "file_size_bytes": 3385, + "id": "nmdc:bd55dfd59ed0aa7ea685734c5b7ecbab", + "name": "gold:Gp0127628_Structural annotation GFF file", + "data_object_type": "Structural Annotation GFF", + "type": "nmdc:DataObject" + } + ], + "mags_activity_set": [ + { + "_id": { + "$oid": "649b0052ec087f6bbab34705" + }, + "has_input": [ + "nmdc:9e550afb3bcd8d66807f861ecfed815b", + "nmdc:9163caaba1f60d1af9a551559069ca08", + "nmdc:cf08b19ebb993d895845588d073c02fe" + ], + "too_short_contig_num": 151485, + "part_of": [ + "nmdc:mga0x5c381" + ], + "binned_contig_num": 238, + "git_url": "https://github.com/microbiomedata/metaG/releases/tag/0.1", + "has_output": [ + "nmdc:d41d8cd98f00b204e9800998ecf8427e", + "nmdc:13137fa415f537d2874808d8c75c1b3d", + "nmdc:196d2699f8fdab4e38c8a638f92093b1", + "nmdc:b67b26f8f76faa347575352000021faf", + "nmdc:166c8a0ad2f4d57e9b16cdc699d56c09", + "nmdc:5ef5ad24cfe3990c0256d420f51f9010" + ], + "was_informed_by": "gold:Gp0127628", + "input_contig_num": 157858, + "id": "nmdc:a634b0691cf34899d9b09bc4573d4c36", + "execution_resource": "NERSC-Cori", + "name": "MAGs Analysis Activity for nmdc:mga0x5c381", + "mags_list": [ + { + "number_of_contig": 238, + "completeness": 30.86, + "bin_name": "bins.1", + "gene_count": 1126, + "bin_quality": "LQ", + "gtdbtk_species": "", + "gtdbtk_order": "", + "num_16s": 0, + "gtdbtk_family": "", + "gtdbtk_domain": "", + "contamination": 0.0, + "gtdbtk_class": "", + "gtdbtk_phylum": "", + "num_5s": 0, + "num_23s": 0, + "gtdbtk_genus": "", + "num_t_rna": 5 + } + ], + "unbinned_contig_num": 6135, + "started_at_time": "2021-10-11T02:25:13Z", + "type": "nmdc:MAGsAnalysisActivity", + "ended_at_time": "2021-10-11T04:45:59+00:00" + } + ], + "metagenome_annotation_activity_set": [ + { + "_id": { + "$oid": "649b005bbf2caae0415ef9aa" + }, + "has_input": [ + "nmdc:9e550afb3bcd8d66807f861ecfed815b" + ], + "part_of": [ + "nmdc:mga0x5c381" + ], + "git_url": "https://github.com/microbiomedata/metaG/releases/tag/0.1", + "has_output": [ + "nmdc:9c21fbee23b4098d69ac618d32fe44c3", + "nmdc:c668eaf35e0ebbb7a304271a03dfd3cd", + "nmdc:cf08b19ebb993d895845588d073c02fe", + "nmdc:e110cecd0dcfbefbde06b88e89047c94", + "nmdc:5f393bad4aacf75d348d7e7d5fe00a06", + "nmdc:c8834a004633752f76b91883416c34b8", + "nmdc:adc813c11b8b32e205aa65ab971d4159", + "nmdc:eecb4098ed258acb0820c17e9e308a9d", + "nmdc:cd2cbf38f357d4c7ec5080072e994861", + "nmdc:1e7aefe1539f0dbe510f805a8d0a6930", + "nmdc:29e9378a37cc56837c1343de85993789", + "nmdc:5faeccd78a03acd094263a777faa5fe2" + ], + "was_informed_by": "gold:Gp0127628", + "id": "nmdc:a634b0691cf34899d9b09bc4573d4c36", + "execution_resource": "NERSC-Cori", + "name": "Annotation Activity for nmdc:mga0x5c381", + "started_at_time": "2021-10-11T02:25:13Z", + "type": "nmdc:MetagenomeAnnotationActivity", + "ended_at_time": "2021-10-11T04:45:59+00:00" + } + ], + "metagenome_assembly_set": [ + { + "_id": { + "$oid": "649b005f2ca5ee4adb139fae" + }, + "has_input": [ + "nmdc:f6f1760721d73fc57919b2115a1d47ec" + ], + "part_of": [ + "nmdc:mga0x5c381" + ], + "ctg_logsum": 110768, + "scaf_logsum": 111226, + "gap_pct": 0.00124, + "git_url": "https://github.com/microbiomedata/metaG/releases/tag/0.1", + "has_output": [ + "nmdc:9e550afb3bcd8d66807f861ecfed815b", + "nmdc:5e79fce62ffa8c4479be5159143797e0", + "nmdc:682fd042d6adcd93f75c3eae2cf32241", + "nmdc:9d607ebd92ad5bcbaaa405884d4a83a3", + "nmdc:9163caaba1f60d1af9a551559069ca08" + ], + "asm_score": 4.319, + "was_informed_by": "gold:Gp0127628", + "ctg_powsum": 11962, + "scaf_max": 45540, + "id": "nmdc:a634b0691cf34899d9b09bc4573d4c36", + "scaf_powsum": 12026, + "execution_resource": "NERSC-Cori", + "contigs": 157859, + "name": "Assembly Activity for nmdc:mga0x5c381", + "ctg_max": 40273, + "gc_std": 0.10673, + "contig_bp": 68288279, + "gc_avg": 0.61453, + "started_at_time": "2021-10-11T02:25:13Z", + "scaf_bp": 68289129, + "type": "nmdc:MetagenomeAssembly", + "scaffolds": 157774, + "ended_at_time": "2021-10-11T04:45:59+00:00", + "ctg_l50": 400, + "ctg_l90": 285, + "ctg_n50": 49248, + "ctg_n90": 135173, + "scaf_l50": 400, + "scaf_l90": 285, + "scaf_n50": 49230, + "scaf_n90": 135095 + } + ], + "omics_processing_set": [ + { + "_id": { + "$oid": "649b009773e8249959349b3f" + }, + "id": "nmdc:omprc-11-b051xn44", + "name": "Riverbed sediment microbial communities from areas with vegetation nearby in Columbia River, Washington, USA - GW-RW S3_0_10", + "description": "Riverbed sediment samples were collected from an area with a lot of surrounding vegetation in a hyporheic zone (subsurface groundwater - surface water mixing zone)", + "has_input": [ + "nmdc:bsm-11-jdsasr43" + ], + "has_output": [ + "jgi:574fe09a7ded5e3df1ee1485" + ], + "part_of": [ + "nmdc:sty-11-aygzgv51" + ], + "add_date": "2016-01-11", + "mod_date": "2021-06-15", + "ncbi_project_name": "Riverbed sediment microbial communities from areas with vegetation nearby in Columbia River, Washington, USA - GW-RW S3_0_10", + "omics_type": { + "has_raw_value": "Metagenome" + }, + "principal_investigator": { + "has_raw_value": "James Stegen" + }, + "processing_institution": "JGI", + "type": "nmdc:OmicsProcessing", + "gold_sequencing_project_identifiers": [ + "gold:Gp0127628" + ] + } + ], + "read_qc_analysis_activity_set": [ + { + "_id": { + "$oid": "649b009d6bdd4fd20273c863" + }, + "has_input": [ + "nmdc:efca984ecf94cc8de2aeabf94e0b87cc" + ], + "part_of": [ + "nmdc:mga0x5c381" + ], + "git_url": "https://github.com/microbiomedata/metaG/releases/tag/0.1", + "has_output": [ + "nmdc:f6f1760721d73fc57919b2115a1d47ec", + "nmdc:2225f9d41343590d818186fa2d66852d" + ], + "was_informed_by": "gold:Gp0127628", + "input_read_count": 31715882, + "output_read_bases": 4516265181, + "id": "nmdc:a634b0691cf34899d9b09bc4573d4c36", + "execution_resource": "NERSC-Cori", + "input_read_bases": 4789098182, + "name": "Read QC Activity for nmdc:mga0x5c381", + "output_read_count": 30212248, + "started_at_time": "2021-10-11T02:25:13Z", + "type": "nmdc:ReadQCAnalysisActivity", + "ended_at_time": "2021-10-11T04:45:59+00:00" + } + ], + "read_based_taxonomy_analysis_activity_set": [ + { + "_id": { + "$oid": "649b009bff710ae353f8cf24" + }, + "has_input": [ + "nmdc:f6f1760721d73fc57919b2115a1d47ec" + ], + "git_url": "https://github.com/microbiomedata/metaG/releases/tag/0.1", + "has_output": [ + "nmdc:a6ed9af48a9ad473ab66721829a5c226", + "nmdc:335dbf6f1055de0950988a002f432c0b", + "nmdc:35da19bc0e50db1f9a02fe1550d1df0e", + "nmdc:224085164a389c6f207967ed03b3e6af", + "nmdc:39ba17263c144761a8bdcc1645c034f5", + "nmdc:84debc9bd1c09328d60f073d7fc2db4f", + "nmdc:8f75800abbcf5a94043ad677d7cb975c", + "nmdc:aae9e961d8ed716457616c8a8841037b", + "nmdc:ba83d6ab837403f4bcbc9400a0460457" + ], + "was_informed_by": "gold:Gp0127628", + "id": "nmdc:a634b0691cf34899d9b09bc4573d4c36", + "execution_resource": "NERSC-Cori", + "name": "ReadBased Analysis Activity for nmdc:mga0x5c381", + "started_at_time": "2021-10-11T02:25:13Z", + "type": "nmdc:ReadbasedAnalysis", + "ended_at_time": "2021-10-11T04:45:59+00:00" + } + ] + }, + { + "data_object_set": [ + { + "description": "Raw sequencer read data", + "file_size_bytes": 2291612962, + "type": "nmdc:DataObject", + "id": "jgi:574fde5b7ded5e3df1ee13ff", + "name": "10533.1.165310.TCGCTGT-AACAGCG.fastq.gz" + }, + { + "name": "Gp0127631_Filtered Reads", + "description": "Filtered Reads for Gp0127631", + "data_object_type": "Filtered Sequencing Reads", + "url": "https://data.microbiomedata.org/data/nmdc:mga0jx8k09/qa/nmdc_mga0jx8k09_filtered.fastq.gz", + "md5_checksum": "6969fd7f4b1a5a34fb30d31b92cd6bf8", + "id": "nmdc:6969fd7f4b1a5a34fb30d31b92cd6bf8", + "file_size_bytes": 2030538721 + }, + { + "name": "Gp0127631_Filtered Stats", + "description": "Filtered Stats for Gp0127631", + "data_object_type": "QC Statistics", + "url": "https://data.microbiomedata.org/data/nmdc:mga0jx8k09/qa/nmdc_mga0jx8k09_filterStats.txt", + "md5_checksum": "b280141d234edf10cde8794539700654", + "id": "nmdc:b280141d234edf10cde8794539700654", + "file_size_bytes": 284 + }, + { + "name": "Gp0127631_Gottcha2 TSV report", + "description": "Gottcha2 TSV report for Gp0127631", + "url": "https://data.microbiomedata.org/data/nmdc:mga0jx8k09/ReadbasedAnalysis/nmdc_mga0jx8k09_gottcha2_report.tsv", + "md5_checksum": "b78e8246144185beb95c0caf65ef1f1a", + "id": "nmdc:b78e8246144185beb95c0caf65ef1f1a", + "file_size_bytes": 1227 + }, + { + "name": "Gp0127631_Gottcha2 full TSV report", + "description": "Gottcha2 full TSV report for Gp0127631", + "url": "https://data.microbiomedata.org/data/nmdc:mga0jx8k09/ReadbasedAnalysis/nmdc_mga0jx8k09_gottcha2_report_full.tsv", + "md5_checksum": "8875c6ce19e13ed9a88447f2f78bb049", + "id": "nmdc:8875c6ce19e13ed9a88447f2f78bb049", + "file_size_bytes": 647196 + }, + { + "name": "Gp0127631_Gottcha2 Krona HTML report", + "description": "Gottcha2 Krona HTML report for Gp0127631", + "data_object_type": "GOTTCHA2 Krona Plot", + "url": "https://data.microbiomedata.org/data/nmdc:mga0jx8k09/ReadbasedAnalysis/nmdc_mga0jx8k09_gottcha2_krona.html", + "md5_checksum": "3b0aee019c772a695bf4cc8f4a390f4e", + "id": "nmdc:3b0aee019c772a695bf4cc8f4a390f4e", + "file_size_bytes": 229312 + }, + { + "name": "Gp0127631_Centrifuge classification TSV report", + "description": "Centrifuge classification TSV report for Gp0127631", + "data_object_type": "Centrifuge Taxonomic Classification", + "url": "https://data.microbiomedata.org/data/nmdc:mga0jx8k09/ReadbasedAnalysis/nmdc_mga0jx8k09_centrifuge_classification.tsv", + "md5_checksum": "0d1729a83798b752f33eeb8d97afe972", + "id": "nmdc:0d1729a83798b752f33eeb8d97afe972", + "file_size_bytes": 1861431092 + }, + { + "name": "Gp0127631_Centrifuge TSV report", + "description": "Centrifuge TSV report for Gp0127631", + "data_object_type": "Centrifuge Classification Report", + "url": "https://data.microbiomedata.org/data/nmdc:mga0jx8k09/ReadbasedAnalysis/nmdc_mga0jx8k09_centrifuge_report.tsv", + "md5_checksum": "77561a0de3bb8aae04d110429fd9ad0c", + "id": "nmdc:77561a0de3bb8aae04d110429fd9ad0c", + "file_size_bytes": 254665 + }, + { + "name": "Gp0127631_Centrifuge Krona HTML report", + "description": "Centrifuge Krona HTML report for Gp0127631", + "data_object_type": "Centrifuge Krona Plot", + "url": "https://data.microbiomedata.org/data/nmdc:mga0jx8k09/ReadbasedAnalysis/nmdc_mga0jx8k09_centrifuge_krona.html", + "md5_checksum": "ea27c005b1788434c2198ad60939d4bc", + "id": "nmdc:ea27c005b1788434c2198ad60939d4bc", + "file_size_bytes": 2334578 + }, + { + "name": "Gp0127631_Kraken classification TSV report", + "description": "Kraken classification TSV report for Gp0127631", + "data_object_type": "Kraken2 Taxonomic Classification", + "url": "https://data.microbiomedata.org/data/nmdc:mga0jx8k09/ReadbasedAnalysis/nmdc_mga0jx8k09_kraken2_classification.tsv", + "md5_checksum": "6a46583da876b9d6287302308df0b9fd", + "id": "nmdc:6a46583da876b9d6287302308df0b9fd", + "file_size_bytes": 1483354621 + }, + { + "name": "Gp0127631_Kraken2 TSV report", + "description": "Kraken2 TSV report for Gp0127631", + "data_object_type": "Kraken2 Classification Report", + "url": "https://data.microbiomedata.org/data/nmdc:mga0jx8k09/ReadbasedAnalysis/nmdc_mga0jx8k09_kraken2_report.tsv", + "md5_checksum": "af619dc5a0423509a4beaca26aa61000", + "id": "nmdc:af619dc5a0423509a4beaca26aa61000", + "file_size_bytes": 640329 + }, + { + "name": "Gp0127631_Kraken2 Krona HTML report", + "description": "Kraken2 Krona HTML report for Gp0127631", + "data_object_type": "Kraken2 Krona Plot", + "url": "https://data.microbiomedata.org/data/nmdc:mga0jx8k09/ReadbasedAnalysis/nmdc_mga0jx8k09_kraken2_krona.html", + "md5_checksum": "50093825ec73dcabe66aa353de766beb", + "id": "nmdc:50093825ec73dcabe66aa353de766beb", + "file_size_bytes": 3993246 + }, + { + "name": "Gp0127631_Assembled contigs fasta", + "description": "Assembled contigs fasta for Gp0127631", + "data_object_type": "Assembly Contigs", + "url": "https://data.microbiomedata.org/data/nmdc:mga0jx8k09/assembly/nmdc_mga0jx8k09_contigs.fna", + "md5_checksum": "1eb44ff780f2aad1053ca336b53d7b98", + "id": "nmdc:1eb44ff780f2aad1053ca336b53d7b98", + "file_size_bytes": 128714098 + }, + { + "name": "Gp0127631_Assembled scaffold fasta", + "description": "Assembled scaffold fasta for Gp0127631", + "data_object_type": "Assembly Scaffolds", + "url": "https://data.microbiomedata.org/data/nmdc:mga0jx8k09/assembly/nmdc_mga0jx8k09_scaffolds.fna", + "md5_checksum": "992fb303b5ced60489fea0ce6dae71f9", + "id": "nmdc:992fb303b5ced60489fea0ce6dae71f9", + "file_size_bytes": 127998496 + }, + { + "name": "Gp0127631_Metagenome Contig Coverage Stats", + "description": "Metagenome Contig Coverage Stats for Gp0127631", + "url": "https://data.microbiomedata.org/data/nmdc:mga0jx8k09/assembly/nmdc_mga0jx8k09_covstats.txt", + "md5_checksum": "a0f466071ed249babf1a5653e1c20a02", + "id": "nmdc:a0f466071ed249babf1a5653e1c20a02", + "file_size_bytes": 18831462 + }, + { + "name": "Gp0127631_Assembled AGP file", + "description": "Assembled AGP file for Gp0127631", + "data_object_type": "Assembly AGP", + "url": "https://data.microbiomedata.org/data/nmdc:mga0jx8k09/assembly/nmdc_mga0jx8k09_assembly.agp", + "md5_checksum": "5eddebfbfabfd9c0e71c2699bee73870", + "id": "nmdc:5eddebfbfabfd9c0e71c2699bee73870", + "file_size_bytes": 17634272 + }, + { + "name": "Gp0127631_Metagenome Alignment BAM file", + "description": "Metagenome Alignment BAM file for Gp0127631", + "data_object_type": "Assembly Coverage BAM", + "url": "https://data.microbiomedata.org/data/nmdc:mga0jx8k09/assembly/nmdc_mga0jx8k09_pairedMapped_sorted.bam", + "md5_checksum": "0ecd5e99ec93ba17c7b02483560bafdf", + "id": "nmdc:0ecd5e99ec93ba17c7b02483560bafdf", + "file_size_bytes": 2245356551 + }, + { + "name": "Gp0127631_Protein FAA", + "description": "Protein FAA for Gp0127631", + "data_object_type": "Annotation Amino Acid FASTA", + "url": "https://data.microbiomedata.org/data/nmdc:mga0jx8k09/annotation/nmdc_mga0jx8k09_proteins.faa", + "md5_checksum": "8c26f97b6a3196ed09dc4f54857d4972", + "id": "nmdc:8c26f97b6a3196ed09dc4f54857d4972", + "file_size_bytes": 72966123 + }, + { + "name": "Gp0127631_Structural annotation GFF file", + "description": "Structural annotation GFF file for Gp0127631", + "data_object_type": "Structural Annotation GFF", + "url": "https://data.microbiomedata.org/data/nmdc:mga0jx8k09/annotation/nmdc_mga0jx8k09_structural_annotation.gff", + "md5_checksum": "c7112633e322d7bc609bd479f7ddddb9", + "id": "nmdc:c7112633e322d7bc609bd479f7ddddb9", + "file_size_bytes": 2524 + }, + { + "name": "Gp0127631_Functional annotation GFF file", + "description": "Functional annotation GFF file for Gp0127631", + "data_object_type": "Functional Annotation GFF", + "url": "https://data.microbiomedata.org/data/nmdc:mga0jx8k09/annotation/nmdc_mga0jx8k09_functional_annotation.gff", + "md5_checksum": "2f6baf7176d2d904c02ae71875a8d326", + "id": "nmdc:2f6baf7176d2d904c02ae71875a8d326", + "file_size_bytes": 81929295 + }, + { + "name": "Gp0127631_KO TSV file", + "description": "KO TSV file for Gp0127631", + "data_object_type": "Annotation KEGG Orthology", + "url": "https://data.microbiomedata.org/data/nmdc:mga0jx8k09/annotation/nmdc_mga0jx8k09_ko.tsv", + "md5_checksum": "1abb9d211201bef0cb545e70a65de8cf", + "id": "nmdc:1abb9d211201bef0cb545e70a65de8cf", + "file_size_bytes": 8979915 + }, + { + "name": "Gp0127631_EC TSV file", + "description": "EC TSV file for Gp0127631", + "data_object_type": "Annotation Enzyme Commission", + "url": "https://data.microbiomedata.org/data/nmdc:mga0jx8k09/annotation/nmdc_mga0jx8k09_ec.tsv", + "md5_checksum": "985a23612611fb258d2dbaee1e4458f5", + "id": "nmdc:985a23612611fb258d2dbaee1e4458f5", + "file_size_bytes": 5914861 + }, + { + "name": "Gp0127631_COG GFF file", + "description": "COG GFF file for Gp0127631", + "url": "https://data.microbiomedata.org/data/nmdc:mga0jx8k09/annotation/nmdc_mga0jx8k09_cog.gff", + "md5_checksum": "44c3fa82e71af5647b7619b0dd8a0728", + "id": "nmdc:44c3fa82e71af5647b7619b0dd8a0728", + "file_size_bytes": 47190255 + }, + { + "name": "Gp0127631_PFAM GFF file", + "description": "PFAM GFF file for Gp0127631", + "url": "https://data.microbiomedata.org/data/nmdc:mga0jx8k09/annotation/nmdc_mga0jx8k09_pfam.gff", + "md5_checksum": "fb70c00e07d0b93b12cacbded87dcea6", + "id": "nmdc:fb70c00e07d0b93b12cacbded87dcea6", + "file_size_bytes": 35794646 + }, + { + "name": "Gp0127631_TigrFam GFF file", + "description": "TigrFam GFF file for Gp0127631", + "url": "https://data.microbiomedata.org/data/nmdc:mga0jx8k09/annotation/nmdc_mga0jx8k09_tigrfam.gff", + "md5_checksum": "98e1311ba5e96a176baccdb9a95439f9", + "id": "nmdc:98e1311ba5e96a176baccdb9a95439f9", + "file_size_bytes": 3856365 + }, + { + "name": "Gp0127631_SMART GFF file", + "description": "SMART GFF file for Gp0127631", + "url": "https://data.microbiomedata.org/data/nmdc:mga0jx8k09/annotation/nmdc_mga0jx8k09_smart.gff", + "md5_checksum": "0685da4455dde2dec9f221b9356f008c", + "id": "nmdc:0685da4455dde2dec9f221b9356f008c", + "file_size_bytes": 10561278 + }, + { + "name": "Gp0127631_SuperFam GFF file", + "description": "SuperFam GFF file for Gp0127631", + "url": "https://data.microbiomedata.org/data/nmdc:mga0jx8k09/annotation/nmdc_mga0jx8k09_supfam.gff", + "md5_checksum": "14b7f064a3a2fad830fad893ff3257bc", + "id": "nmdc:14b7f064a3a2fad830fad893ff3257bc", + "file_size_bytes": 59641133 + }, + { + "name": "Gp0127631_Cath FunFam GFF file", + "description": "Cath FunFam GFF file for Gp0127631", + "url": "https://data.microbiomedata.org/data/nmdc:mga0jx8k09/annotation/nmdc_mga0jx8k09_cath_funfam.gff", + "md5_checksum": "1b8b64c254f88dd9a8e3cd42bde7b7ba", + "id": "nmdc:1b8b64c254f88dd9a8e3cd42bde7b7ba", + "file_size_bytes": 45160077 + }, + { + "name": "Gp0127631_KO_EC GFF file", + "description": "KO_EC GFF file for Gp0127631", + "url": "https://data.microbiomedata.org/data/nmdc:mga0jx8k09/annotation/nmdc_mga0jx8k09_ko_ec.gff", + "md5_checksum": "01769b6920ba82884f19ac3f88428db1", + "id": "nmdc:01769b6920ba82884f19ac3f88428db1", + "file_size_bytes": 28510384 + }, + { + "name": "gold:Gp0452679_metabat2 bin checkm quality assessment result", + "description": "metabat2 bin checkm quality assessment result for gold:Gp0452679", + "data_object_type": "CheckM Statistics", + "url": "https://data.microbiomedata.org/data/nmdc:mga0fsjy84/MAGs/nmdc_mga0fsjy84_checkm_qa.out", + "md5_checksum": "d41d8cd98f00b204e9800998ecf8427e", + "id": "nmdc:d41d8cd98f00b204e9800998ecf8427e", + "file_size_bytes": 0 + }, + { + "name": "Gp0127631_tooShort (< 3kb) filtered contigs fasta file by metaBat2", + "description": "tooShort (< 3kb) filtered contigs fasta file by metaBat2 for Gp0127631", + "url": "https://data.microbiomedata.org/data/nmdc:mga0jx8k09/MAGs/nmdc_mga0jx8k09_bins.tooShort.fa", + "md5_checksum": "53faea62cf1183292bc6fca374f75ed1", + "id": "nmdc:53faea62cf1183292bc6fca374f75ed1", + "file_size_bytes": 99316833 + }, + { + "name": "Gp0127631_unbinned fasta file from metabat2", + "description": "unbinned fasta file from metabat2 for Gp0127631", + "url": "https://data.microbiomedata.org/data/nmdc:mga0jx8k09/MAGs/nmdc_mga0jx8k09_bins.unbinned.fa", + "md5_checksum": "7a6616d3262630c2aea2923e3c2683d0", + "id": "nmdc:7a6616d3262630c2aea2923e3c2683d0", + "file_size_bytes": 27381739 + }, + { + "name": "Gp0127631_metabat2 bin checkm quality assessment result", + "description": "metabat2 bin checkm quality assessment result for Gp0127631", + "data_object_type": "CheckM Statistics", + "url": "https://data.microbiomedata.org/data/nmdc:mga0jx8k09/MAGs/nmdc_mga0jx8k09_checkm_qa.out", + "md5_checksum": "e16dde65e7229d69949c9e2dee7e2413", + "id": "nmdc:e16dde65e7229d69949c9e2dee7e2413", + "file_size_bytes": 1085 + }, + { + "name": "Gp0127631_high-quality and medium-quality bins", + "description": "high-quality and medium-quality bins for Gp0127631", + "data_object_type": "Metagenome Bins", + "url": "https://data.microbiomedata.org/data/nmdc:mga0jx8k09/MAGs/nmdc_mga0jx8k09_hqmq_bin.zip", + "md5_checksum": "58acda197bd8136a80d5047342008cdf", + "id": "nmdc:58acda197bd8136a80d5047342008cdf", + "file_size_bytes": 182 + }, + { + "name": "Gp0127631_metabat2 bins", + "description": "metabat2 bins for Gp0127631", + "url": "https://data.microbiomedata.org/data/nmdc:mga0jx8k09/MAGs/nmdc_mga0jx8k09_metabat_bin.zip", + "md5_checksum": "8d5e2b8a8dede83c2f74182f506f9176", + "id": "nmdc:8d5e2b8a8dede83c2f74182f506f9176", + "file_size_bytes": 596616 + }, + { + "_id": { + "$oid": "649b003d1ae706d7b5b14ea8" + }, + "description": "Metagenome Contig Coverage Stats for gold:Gp0127631", + "url": "https://data.microbiomedata.org/data/1781_100333/assembly/mapping_stats.txt", + "file_size_bytes": 17881866, + "type": "nmdc:DataObject", + "id": "nmdc:b8891d5a1c93a83756f25450b1fe5e6e", + "name": "mapping_stats.txt", + "data_object_type": "Assembly Coverage Stats" + }, + { + "_id": { + "$oid": "649b003d1ae706d7b5b14eaa" + }, + "description": "Assembled AGP file for gold:Gp0127631", + "url": "https://data.microbiomedata.org/data/1781_100333/assembly/assembly.agp", + "file_size_bytes": 15733352, + "type": "nmdc:DataObject", + "id": "nmdc:00fd0de6e8c1ba5bdd6308a282f543cc", + "name": "assembly.agp", + "data_object_type": "Assembly AGP" + }, + { + "_id": { + "$oid": "649b003d1ae706d7b5b14ec6" + }, + "description": "Assembled contigs fasta for gold:Gp0127631", + "url": "https://data.microbiomedata.org/data/1781_100333/assembly/assembly_contigs.fna", + "file_size_bytes": 127764502, + "type": "nmdc:DataObject", + "id": "nmdc:ff68d07b09e5a9cdd866208394d66bd6", + "name": "assembly_contigs.fna", + "data_object_type": "Assembly Contigs" + }, + { + "_id": { + "$oid": "649b003d1ae706d7b5b14ecc" + }, + "description": "Metagenome Alignment BAM file for gold:Gp0127631", + "url": "https://data.microbiomedata.org/data/1781_100333/assembly/pairedMapped_sorted.bam", + "file_size_bytes": 2213957632, + "type": "nmdc:DataObject", + "id": "nmdc:39703cdbfb1e7fbb52a08061a05d8f4d", + "name": "pairedMapped_sorted.bam", + "data_object_type": "Assembly Coverage BAM" + }, + { + "_id": { + "$oid": "649b003d1ae706d7b5b14ed5" + }, + "description": "Assembled scaffold fasta for gold:Gp0127631", + "url": "https://data.microbiomedata.org/data/1781_100333/assembly/assembly_scaffolds.fna", + "file_size_bytes": 127049764, + "type": "nmdc:DataObject", + "id": "nmdc:7beaf6e386659d8d728720bb1ab2f2b9", + "name": "assembly_scaffolds.fna", + "data_object_type": "Assembly Scaffolds" + }, + { + "_id": { + "$oid": "649b003d1ae706d7b5b15b26" + }, + "id": "nmdc:22bf0b09cd9b46acaa8436ac81aec2f3", + "name": "1781_100333.json", + "description": "Gold:Gp0127631 ReadbasedAnalysis result JSON file", + "url": "https://data.microbiomedata.org/1781_100333/ReadbasedAnalysis/1781_100333.json", + "file_size_bytes": 3385, + "type": "nmdc:DataObject" + }, + { + "_id": { + "$oid": "649b003d1ae706d7b5b15b27" + }, + "id": "nmdc:d376ada84b52516325fc31f0f95fc1c4", + "name": "1781_100333.krona.html", + "description": "Gold:Gp0127631 KRONA plot HTML file", + "url": "https://data.microbiomedata.org/1781_100333/ReadbasedAnalysis/centrifuge/1781_100333.krona.html", + "file_size_bytes": 3385, + "data_object_type": "Centrifuge Krona Plot", + "type": "nmdc:DataObject" + }, + { + "_id": { + "$oid": "649b003f1ae706d7b5b165f1" + }, + "id": "nmdc:3d29d0956f968142b75f0ca9a03e3abb", + "name": "bins.tooShort.fa", + "description": "tooShort (< 3kb) filtered contigs fasta file by metaBat2 for gold:Gp0127631", + "file_size_bytes": 96541361, + "url": "https://data.microbiomedata.org/data/1781_100333/img_MAGs/bins.tooShort.fa", + "type": "nmdc:DataObject" + }, + { + "_id": { + "$oid": "649b003f1ae706d7b5b165f3" + }, + "id": "nmdc:0ee0d3a741d960268c288071b826ccb2", + "name": "checkm_qa.out", + "description": "metabat2 bin checkm quality assessment result for gold:Gp0127631", + "file_size_bytes": 930, + "url": "https://data.microbiomedata.org/data/1781_100333/img_MAGs/checkm_qa.out", + "type": "nmdc:DataObject", + "data_object_type": "CheckM Statistics" + }, + { + "_id": { + "$oid": "649b003f1ae706d7b5b165f4" + }, + "id": "nmdc:80e41aba95e325a6687edf730910288b", + "name": "gold:Gp0127631.bins.1.fa", + "description": "metabat2 binned contig file for gold:Gp0127631", + "file_size_bytes": 299114, + "url": "https://data.microbiomedata.org/data/1781_100333/img_MAGs/metabat-bins/bins.1.fa", + "type": "nmdc:DataObject", + "data_object_type": "Metagenome Bins" + }, + { + "_id": { + "$oid": "649b003f1ae706d7b5b165ff" + }, + "id": "nmdc:be8af74d4f97b2443a2dac045fa8af6d", + "name": "bins.unbinned.fa", + "description": "unbinned fasta file from metabat2 for gold:Gp0127631", + "file_size_bytes": 28526080, + "url": "https://data.microbiomedata.org/data/1781_100333/img_MAGs/bins.unbinned.fa", + "type": "nmdc:DataObject" + }, + { + "_id": { + "$oid": "649b003f1ae706d7b5b16629" + }, + "id": "nmdc:16ad3e594738a8fb4ee46a931ce0444c", + "name": "gold:Gp0127631.bins.2.fa", + "description": "metabat2 binned contig file for gold:Gp0127631", + "file_size_bytes": 372823, + "url": "https://data.microbiomedata.org/data/1781_100333/img_MAGs/metabat-bins/bins.2.fa", + "type": "nmdc:DataObject", + "data_object_type": "Metagenome Bins" + }, + { + "_id": { + "$oid": "649b00401ae706d7b5b16ce5" + }, + "description": "Functional annotation GFF file for gold:Gp0127631", + "url": "https://data.microbiomedata.org/1781_100333/img_annotation/Ga0482239_functional_annotation.gff", + "md5_checksum": "5723e7023b0e3994e92c7c5e72aa34ec", + "file_size_bytes": 3385, + "id": "nmdc:5723e7023b0e3994e92c7c5e72aa34ec", + "name": "gold:Gp0127631_Functional annotation GFF file", + "data_object_type": "Functional Annotation GFF", + "type": "nmdc:DataObject" + }, + { + "_id": { + "$oid": "649b00401ae706d7b5b16ce7" + }, + "description": "EC TSV File for gold:Gp0127631", + "url": "https://data.microbiomedata.org/1781_100333/img_annotation/Ga0482239_ec.tsv", + "md5_checksum": "ee276fe3eb490475ad3d7280a8c67464", + "file_size_bytes": 3385, + "id": "nmdc:ee276fe3eb490475ad3d7280a8c67464", + "name": "gold:Gp0127631_EC TSV File", + "data_object_type": "Annotation Enzyme Commission", + "type": "nmdc:DataObject" + }, + { + "_id": { + "$oid": "649b00401ae706d7b5b16ce8" + }, + "description": "Structural annotation GFF file for gold:Gp0127631", + "url": "https://data.microbiomedata.org/1781_100333/img_annotation/Ga0482239_structural_annotation.gff", + "md5_checksum": "d57f28027b2d6f82b96f5413bf8c9a59", + "file_size_bytes": 3385, + "id": "nmdc:d57f28027b2d6f82b96f5413bf8c9a59", + "name": "gold:Gp0127631_Structural annotation GFF file", + "data_object_type": "Structural Annotation GFF", + "type": "nmdc:DataObject" + }, + { + "_id": { + "$oid": "649b00401ae706d7b5b16cea" + }, + "description": "Protein FAA for gold:Gp0127631", + "url": "https://data.microbiomedata.org/1781_100333/img_annotation/Ga0482239_proteins.faa", + "md5_checksum": "04c97ac7af06bf37da8f1ffe827e454d", + "file_size_bytes": 3385, + "id": "nmdc:04c97ac7af06bf37da8f1ffe827e454d", + "name": "gold:Gp0127631_Protein FAA", + "data_object_type": "Annotation Amino Acid FASTA", + "type": "nmdc:DataObject" + }, + { + "_id": { + "$oid": "649b00401ae706d7b5b16cec" + }, + "description": "KO TSV File for gold:Gp0127631", + "url": "https://data.microbiomedata.org/1781_100333/img_annotation/Ga0482239_ko.tsv", + "md5_checksum": "e2ef79ef2b6669d93af5e90ba2c58fcf", + "file_size_bytes": 3385, + "id": "nmdc:e2ef79ef2b6669d93af5e90ba2c58fcf", + "name": "gold:Gp0127631_KO TSV File", + "data_object_type": "Annotation KEGG Orthology", + "type": "nmdc:DataObject" + } + ], + "mags_activity_set": [ + { + "_id": { + "$oid": "649b0052ec087f6bbab34706" + }, + "has_input": [ + "nmdc:1eb44ff780f2aad1053ca336b53d7b98", + "nmdc:0ecd5e99ec93ba17c7b02483560bafdf", + "nmdc:2f6baf7176d2d904c02ae71875a8d326" + ], + "too_short_contig_num": 219869, + "part_of": [ + "nmdc:mga0jx8k09" + ], + "binned_contig_num": 506, + "git_url": "https://github.com/microbiomedata/metaG/releases/tag/0.1", + "has_output": [ + "nmdc:d41d8cd98f00b204e9800998ecf8427e", + "nmdc:53faea62cf1183292bc6fca374f75ed1", + "nmdc:7a6616d3262630c2aea2923e3c2683d0", + "nmdc:e16dde65e7229d69949c9e2dee7e2413", + "nmdc:58acda197bd8136a80d5047342008cdf", + "nmdc:8d5e2b8a8dede83c2f74182f506f9176" + ], + "was_informed_by": "gold:Gp0127631", + "input_contig_num": 237399, + "id": "nmdc:00a9e34a36f0ea767ff48aa2f411874f", + "execution_resource": "NERSC-Cori", + "name": "MAGs Analysis Activity for nmdc:mga0jx8k09", + "mags_list": [ + { + "number_of_contig": 151, + "completeness": 11.4, + "bin_name": "bins.1", + "gene_count": 748, + "bin_quality": "LQ", + "gtdbtk_species": "", + "gtdbtk_order": "", + "num_16s": 0, + "gtdbtk_family": "", + "gtdbtk_domain": "", + "contamination": 0.0, + "gtdbtk_class": "", + "gtdbtk_phylum": "", + "num_5s": 0, + "num_23s": 0, + "gtdbtk_genus": "", + "num_t_rna": 6 + }, + { + "number_of_contig": 268, + "completeness": 7.47, + "bin_name": "bins.2", + "gene_count": 1304, + "bin_quality": "LQ", + "gtdbtk_species": "", + "gtdbtk_order": "", + "num_16s": 0, + "gtdbtk_family": "", + "gtdbtk_domain": "", + "contamination": 0.0, + "gtdbtk_class": "", + "gtdbtk_phylum": "", + "num_5s": 0, + "num_23s": 0, + "gtdbtk_genus": "", + "num_t_rna": 12 + }, + { + "number_of_contig": 87, + "completeness": 13.32, + "bin_name": "bins.3", + "gene_count": 412, + "bin_quality": "LQ", + "gtdbtk_species": "", + "gtdbtk_order": "", + "num_16s": 0, + "gtdbtk_family": "", + "gtdbtk_domain": "", + "contamination": 0.0, + "gtdbtk_class": "", + "gtdbtk_phylum": "", + "num_5s": 0, + "num_23s": 0, + "gtdbtk_genus": "", + "num_t_rna": 4 + } + ], + "unbinned_contig_num": 17024, + "started_at_time": "2021-10-11T02:26:22Z", + "type": "nmdc:MAGsAnalysisActivity", + "ended_at_time": "2021-10-11T04:40:31+00:00" + } + ], + "metagenome_annotation_activity_set": [ + { + "_id": { + "$oid": "649b005bbf2caae0415ef9a8" + }, + "has_input": [ + "nmdc:1eb44ff780f2aad1053ca336b53d7b98" + ], + "part_of": [ + "nmdc:mga0jx8k09" + ], + "git_url": "https://github.com/microbiomedata/metaG/releases/tag/0.1", + "has_output": [ + "nmdc:8c26f97b6a3196ed09dc4f54857d4972", + "nmdc:c7112633e322d7bc609bd479f7ddddb9", + "nmdc:2f6baf7176d2d904c02ae71875a8d326", + "nmdc:1abb9d211201bef0cb545e70a65de8cf", + "nmdc:985a23612611fb258d2dbaee1e4458f5", + "nmdc:44c3fa82e71af5647b7619b0dd8a0728", + "nmdc:fb70c00e07d0b93b12cacbded87dcea6", + "nmdc:98e1311ba5e96a176baccdb9a95439f9", + "nmdc:0685da4455dde2dec9f221b9356f008c", + "nmdc:14b7f064a3a2fad830fad893ff3257bc", + "nmdc:1b8b64c254f88dd9a8e3cd42bde7b7ba", + "nmdc:01769b6920ba82884f19ac3f88428db1" + ], + "was_informed_by": "gold:Gp0127631", + "id": "nmdc:00a9e34a36f0ea767ff48aa2f411874f", + "execution_resource": "NERSC-Cori", + "name": "Annotation Activity for nmdc:mga0jx8k09", + "started_at_time": "2021-10-11T02:26:22Z", + "type": "nmdc:MetagenomeAnnotationActivity", + "ended_at_time": "2021-10-11T04:40:31+00:00" + } + ], + "metagenome_assembly_set": [ + { + "_id": { + "$oid": "649b005f2ca5ee4adb139f94" + }, + "has_input": [ + "nmdc:6969fd7f4b1a5a34fb30d31b92cd6bf8" + ], + "part_of": [ + "nmdc:mga0jx8k09" + ], + "ctg_logsum": 306128, + "scaf_logsum": 307525, + "gap_pct": 0.00196, + "git_url": "https://github.com/microbiomedata/metaG/releases/tag/0.1", + "has_output": [ + "nmdc:1eb44ff780f2aad1053ca336b53d7b98", + "nmdc:992fb303b5ced60489fea0ce6dae71f9", + "nmdc:a0f466071ed249babf1a5653e1c20a02", + "nmdc:5eddebfbfabfd9c0e71c2699bee73870", + "nmdc:0ecd5e99ec93ba17c7b02483560bafdf" + ], + "asm_score": 3.117, + "was_informed_by": "gold:Gp0127631", + "ctg_powsum": 32898, + "scaf_max": 14244, + "id": "nmdc:00a9e34a36f0ea767ff48aa2f411874f", + "scaf_powsum": 33057, + "execution_resource": "NERSC-Cori", + "contigs": 237399, + "name": "Assembly Activity for nmdc:mga0jx8k09", + "ctg_max": 14244, + "gc_std": 0.09594, + "contig_bp": 119367623, + "gc_avg": 0.62364, + "started_at_time": "2021-10-11T02:26:22Z", + "scaf_bp": 119369963, + "type": "nmdc:MetagenomeAssembly", + "scaffolds": 237183, + "ended_at_time": "2021-10-11T04:40:31+00:00", + "ctg_l50": 499, + "ctg_l90": 292, + "ctg_n50": 64310, + "ctg_n90": 195626, + "scaf_l50": 500, + "scaf_l90": 292, + "scaf_n50": 64017, + "scaf_n90": 195424 + } + ], + "omics_processing_set": [ + { + "_id": { + "$oid": "649b009773e8249959349b40" + }, + "id": "nmdc:omprc-11-k8kt2j31", + "name": "Riverbed sediment microbial communities from areas with vegetation nearby in Columbia River, Washington, USA - GW-RW S1_20_30", + "description": "Riverbed sediment samples were collected from an area with a lot of surrounding vegetation in a hyporheic zone (subsurface groundwater - surface water mixing zone)", + "has_input": [ + "nmdc:bsm-11-4vqhvw07" + ], + "has_output": [ + "jgi:574fde5b7ded5e3df1ee13ff" + ], + "part_of": [ + "nmdc:sty-11-aygzgv51" + ], + "add_date": "2016-01-11", + "mod_date": "2021-06-15", + "ncbi_project_name": "Riverbed sediment microbial communities from areas with vegetation nearby in Columbia River, Washington, USA - GW-RW S1_20_30", + "omics_type": { + "has_raw_value": "Metagenome" + }, + "principal_investigator": { + "has_raw_value": "James Stegen" + }, + "processing_institution": "JGI", + "type": "nmdc:OmicsProcessing", + "gold_sequencing_project_identifiers": [ + "gold:Gp0127631" + ] + } + ], + "read_qc_analysis_activity_set": [ + { + "_id": { + "$oid": "649b009d6bdd4fd20273c862" + }, + "has_input": [ + "nmdc:9c97e4b734b9cac731fe30fb07a32bb7" + ], + "part_of": [ + "nmdc:mga0jx8k09" + ], + "git_url": "https://github.com/microbiomedata/metaG/releases/tag/0.1", + "has_output": [ + "nmdc:6969fd7f4b1a5a34fb30d31b92cd6bf8", + "nmdc:b280141d234edf10cde8794539700654" + ], + "was_informed_by": "gold:Gp0127631", + "input_read_count": 26419652, + "output_read_bases": 3798930297, + "id": "nmdc:00a9e34a36f0ea767ff48aa2f411874f", + "execution_resource": "NERSC-Cori", + "input_read_bases": 3989367452, + "name": "Read QC Activity for nmdc:mga0jx8k09", + "output_read_count": 25434840, + "started_at_time": "2021-10-11T02:26:22Z", + "type": "nmdc:ReadQCAnalysisActivity", + "ended_at_time": "2021-10-11T04:40:31+00:00" + } + ], + "read_based_taxonomy_analysis_activity_set": [ + { + "_id": { + "$oid": "649b009bff710ae353f8cf22" + }, + "has_input": [ + "nmdc:6969fd7f4b1a5a34fb30d31b92cd6bf8" + ], + "git_url": "https://github.com/microbiomedata/metaG/releases/tag/0.1", + "has_output": [ + "nmdc:b78e8246144185beb95c0caf65ef1f1a", + "nmdc:8875c6ce19e13ed9a88447f2f78bb049", + "nmdc:3b0aee019c772a695bf4cc8f4a390f4e", + "nmdc:0d1729a83798b752f33eeb8d97afe972", + "nmdc:77561a0de3bb8aae04d110429fd9ad0c", + "nmdc:ea27c005b1788434c2198ad60939d4bc", + "nmdc:6a46583da876b9d6287302308df0b9fd", + "nmdc:af619dc5a0423509a4beaca26aa61000", + "nmdc:50093825ec73dcabe66aa353de766beb" + ], + "was_informed_by": "gold:Gp0127631", + "id": "nmdc:00a9e34a36f0ea767ff48aa2f411874f", + "execution_resource": "NERSC-Cori", + "name": "ReadBased Analysis Activity for nmdc:mga0jx8k09", + "started_at_time": "2021-10-11T02:26:22Z", + "type": "nmdc:ReadbasedAnalysis", + "ended_at_time": "2021-10-11T04:40:31+00:00" + } + ] + }, + { + "data_object_set": [ + { + "description": "Raw sequencer read data", + "file_size_bytes": 2557650099, + "type": "nmdc:DataObject", + "id": "jgi:574fde7c7ded5e3df1ee1419", + "name": "10533.2.165322.TGTACAC-GGTGTAC.fastq.gz" + }, + { + "name": "Gp0127630_Filtered Reads", + "description": "Filtered Reads for Gp0127630", + "data_object_type": "Filtered Sequencing Reads", + "url": "https://data.microbiomedata.org/data/nmdc:mga09n3g47/qa/nmdc_mga09n3g47_filtered.fastq.gz", + "md5_checksum": "eaffb16b5247d85c08f8af73bcb8b65e", + "id": "nmdc:eaffb16b5247d85c08f8af73bcb8b65e", + "file_size_bytes": 2294158265 + }, + { + "name": "Gp0127630_Filtered Stats", + "description": "Filtered Stats for Gp0127630", + "data_object_type": "QC Statistics", + "url": "https://data.microbiomedata.org/data/nmdc:mga09n3g47/qa/nmdc_mga09n3g47_filterStats.txt", + "md5_checksum": "088fd18cb9169097e739289d2e5ebb13", + "id": "nmdc:088fd18cb9169097e739289d2e5ebb13", + "file_size_bytes": 288 + }, + { + "name": "Gp0127630_Gottcha2 TSV report", + "description": "Gottcha2 TSV report for Gp0127630", + "url": "https://data.microbiomedata.org/data/nmdc:mga09n3g47/ReadbasedAnalysis/nmdc_mga09n3g47_gottcha2_report.tsv", + "md5_checksum": "ad8aa7d317d86bcd1b33e6e68a917198", + "id": "nmdc:ad8aa7d317d86bcd1b33e6e68a917198", + "file_size_bytes": 3373 + }, + { + "name": "Gp0127630_Gottcha2 full TSV report", + "description": "Gottcha2 full TSV report for Gp0127630", + "url": "https://data.microbiomedata.org/data/nmdc:mga09n3g47/ReadbasedAnalysis/nmdc_mga09n3g47_gottcha2_report_full.tsv", + "md5_checksum": "e5f1da9ed5be2adcd65763d387387c9f", + "id": "nmdc:e5f1da9ed5be2adcd65763d387387c9f", + "file_size_bytes": 791488 + }, + { + "name": "Gp0127630_Gottcha2 Krona HTML report", + "description": "Gottcha2 Krona HTML report for Gp0127630", + "data_object_type": "GOTTCHA2 Krona Plot", + "url": "https://data.microbiomedata.org/data/nmdc:mga09n3g47/ReadbasedAnalysis/nmdc_mga09n3g47_gottcha2_krona.html", + "md5_checksum": "db82b41936f37bbbeaa027ffc25b58cd", + "id": "nmdc:db82b41936f37bbbeaa027ffc25b58cd", + "file_size_bytes": 235803 + }, + { + "name": "Gp0127630_Centrifuge classification TSV report", + "description": "Centrifuge classification TSV report for Gp0127630", + "data_object_type": "Centrifuge Taxonomic Classification", + "url": "https://data.microbiomedata.org/data/nmdc:mga09n3g47/ReadbasedAnalysis/nmdc_mga09n3g47_centrifuge_classification.tsv", + "md5_checksum": "2f21fd19f055d1931ab82016ed781a12", + "id": "nmdc:2f21fd19f055d1931ab82016ed781a12", + "file_size_bytes": 1974171566 + }, + { + "name": "Gp0127630_Centrifuge TSV report", + "description": "Centrifuge TSV report for Gp0127630", + "data_object_type": "Centrifuge Classification Report", + "url": "https://data.microbiomedata.org/data/nmdc:mga09n3g47/ReadbasedAnalysis/nmdc_mga09n3g47_centrifuge_report.tsv", + "md5_checksum": "890f494d1dd5e130d6c1688e78f27ff2", + "id": "nmdc:890f494d1dd5e130d6c1688e78f27ff2", + "file_size_bytes": 255012 + }, + { + "name": "Gp0127630_Centrifuge Krona HTML report", + "description": "Centrifuge Krona HTML report for Gp0127630", + "data_object_type": "Centrifuge Krona Plot", + "url": "https://data.microbiomedata.org/data/nmdc:mga09n3g47/ReadbasedAnalysis/nmdc_mga09n3g47_centrifuge_krona.html", + "md5_checksum": "813232a3034ddb9a05efc2f2e9b78cce", + "id": "nmdc:813232a3034ddb9a05efc2f2e9b78cce", + "file_size_bytes": 2330430 + }, + { + "name": "Gp0127630_Kraken classification TSV report", + "description": "Kraken classification TSV report for Gp0127630", + "data_object_type": "Kraken2 Taxonomic Classification", + "url": "https://data.microbiomedata.org/data/nmdc:mga09n3g47/ReadbasedAnalysis/nmdc_mga09n3g47_kraken2_classification.tsv", + "md5_checksum": "ef490241b537bb4c19bd5548cd7b7f6b", + "id": "nmdc:ef490241b537bb4c19bd5548cd7b7f6b", + "file_size_bytes": 1584744477 + }, + { + "name": "Gp0127630_Kraken2 TSV report", + "description": "Kraken2 TSV report for Gp0127630", + "data_object_type": "Kraken2 Classification Report", + "url": "https://data.microbiomedata.org/data/nmdc:mga09n3g47/ReadbasedAnalysis/nmdc_mga09n3g47_kraken2_report.tsv", + "md5_checksum": "6a7de24b01ad1c63ba6edb758e25af40", + "id": "nmdc:6a7de24b01ad1c63ba6edb758e25af40", + "file_size_bytes": 650172 + }, + { + "name": "Gp0127630_Kraken2 Krona HTML report", + "description": "Kraken2 Krona HTML report for Gp0127630", + "data_object_type": "Kraken2 Krona Plot", + "url": "https://data.microbiomedata.org/data/nmdc:mga09n3g47/ReadbasedAnalysis/nmdc_mga09n3g47_kraken2_krona.html", + "md5_checksum": "fc8a855916eb1ba0f7d278b7c1f1786f", + "id": "nmdc:fc8a855916eb1ba0f7d278b7c1f1786f", + "file_size_bytes": 3962195 + }, + { + "name": "Gp0127630_Assembled contigs fasta", + "description": "Assembled contigs fasta for Gp0127630", + "data_object_type": "Assembly Contigs", + "url": "https://data.microbiomedata.org/data/nmdc:mga09n3g47/assembly/nmdc_mga09n3g47_contigs.fna", + "md5_checksum": "7b35237c97a75f17ba74be0fe96416c9", + "id": "nmdc:7b35237c97a75f17ba74be0fe96416c9", + "file_size_bytes": 57511432 + }, + { + "name": "Gp0127630_Assembled scaffold fasta", + "description": "Assembled scaffold fasta for Gp0127630", + "data_object_type": "Assembly Scaffolds", + "url": "https://data.microbiomedata.org/data/nmdc:mga09n3g47/assembly/nmdc_mga09n3g47_scaffolds.fna", + "md5_checksum": "118dd6190bdaf127d3c105cc73012cc3", + "id": "nmdc:118dd6190bdaf127d3c105cc73012cc3", + "file_size_bytes": 57128690 + }, + { + "name": "Gp0127630_Metagenome Contig Coverage Stats", + "description": "Metagenome Contig Coverage Stats for Gp0127630", + "url": "https://data.microbiomedata.org/data/nmdc:mga09n3g47/assembly/nmdc_mga09n3g47_covstats.txt", + "md5_checksum": "9e129133978cb4c4cc4bae9fc28a8a49", + "id": "nmdc:9e129133978cb4c4cc4bae9fc28a8a49", + "file_size_bytes": 10020081 + }, + { + "name": "Gp0127630_Assembled AGP file", + "description": "Assembled AGP file for Gp0127630", + "data_object_type": "Assembly AGP", + "url": "https://data.microbiomedata.org/data/nmdc:mga09n3g47/assembly/nmdc_mga09n3g47_assembly.agp", + "md5_checksum": "33d86c437a046031ea2b4bed5a2d2d6b", + "id": "nmdc:33d86c437a046031ea2b4bed5a2d2d6b", + "file_size_bytes": 9337675 + }, + { + "name": "Gp0127630_Metagenome Alignment BAM file", + "description": "Metagenome Alignment BAM file for Gp0127630", + "data_object_type": "Assembly Coverage BAM", + "url": "https://data.microbiomedata.org/data/nmdc:mga09n3g47/assembly/nmdc_mga09n3g47_pairedMapped_sorted.bam", + "md5_checksum": "873f16e03e0f94c9ec28573fb10ad6d8", + "id": "nmdc:873f16e03e0f94c9ec28573fb10ad6d8", + "file_size_bytes": 2461822274 + }, + { + "name": "Gp0127630_Protein FAA", + "description": "Protein FAA for Gp0127630", + "data_object_type": "Annotation Amino Acid FASTA", + "url": "https://data.microbiomedata.org/data/nmdc:mga09n3g47/annotation/nmdc_mga09n3g47_proteins.faa", + "md5_checksum": "f7735eb161908954feda34285993f1b9", + "id": "nmdc:f7735eb161908954feda34285993f1b9", + "file_size_bytes": 34246728 + }, + { + "name": "Gp0127630_Structural annotation GFF file", + "description": "Structural annotation GFF file for Gp0127630", + "data_object_type": "Structural Annotation GFF", + "url": "https://data.microbiomedata.org/data/nmdc:mga09n3g47/annotation/nmdc_mga09n3g47_structural_annotation.gff", + "md5_checksum": "c6053080461e8cc0bbadd13e0775e108", + "id": "nmdc:c6053080461e8cc0bbadd13e0775e108", + "file_size_bytes": 2515 + }, + { + "name": "Gp0127630_Functional annotation GFF file", + "description": "Functional annotation GFF file for Gp0127630", + "data_object_type": "Functional Annotation GFF", + "url": "https://data.microbiomedata.org/data/nmdc:mga09n3g47/annotation/nmdc_mga09n3g47_functional_annotation.gff", + "md5_checksum": "4878e3d5a95e67c0bb81da53e03400be", + "id": "nmdc:4878e3d5a95e67c0bb81da53e03400be", + "file_size_bytes": 40345940 + }, + { + "name": "Gp0127630_KO TSV file", + "description": "KO TSV file for Gp0127630", + "data_object_type": "Annotation KEGG Orthology", + "url": "https://data.microbiomedata.org/data/nmdc:mga09n3g47/annotation/nmdc_mga09n3g47_ko.tsv", + "md5_checksum": "dbc4d4e179a86aa95211de3e62219191", + "id": "nmdc:dbc4d4e179a86aa95211de3e62219191", + "file_size_bytes": 4543233 + }, + { + "name": "Gp0127630_EC TSV file", + "description": "EC TSV file for Gp0127630", + "data_object_type": "Annotation Enzyme Commission", + "url": "https://data.microbiomedata.org/data/nmdc:mga09n3g47/annotation/nmdc_mga09n3g47_ec.tsv", + "md5_checksum": "5bdd96be3fbc888969d92c2ed6392846", + "id": "nmdc:5bdd96be3fbc888969d92c2ed6392846", + "file_size_bytes": 3027431 + }, + { + "name": "Gp0127630_COG GFF file", + "description": "COG GFF file for Gp0127630", + "url": "https://data.microbiomedata.org/data/nmdc:mga09n3g47/annotation/nmdc_mga09n3g47_cog.gff", + "md5_checksum": "78026e2afc7644463828fbbfa4d8d727", + "id": "nmdc:78026e2afc7644463828fbbfa4d8d727", + "file_size_bytes": 23085097 + }, + { + "name": "Gp0127630_PFAM GFF file", + "description": "PFAM GFF file for Gp0127630", + "url": "https://data.microbiomedata.org/data/nmdc:mga09n3g47/annotation/nmdc_mga09n3g47_pfam.gff", + "md5_checksum": "ef99a9afe80e1acc086694ca8ab4cca7", + "id": "nmdc:ef99a9afe80e1acc086694ca8ab4cca7", + "file_size_bytes": 16769237 + }, + { + "name": "Gp0127630_TigrFam GFF file", + "description": "TigrFam GFF file for Gp0127630", + "url": "https://data.microbiomedata.org/data/nmdc:mga09n3g47/annotation/nmdc_mga09n3g47_tigrfam.gff", + "md5_checksum": "f949efd8a6b6affb4707a4314980e86e", + "id": "nmdc:f949efd8a6b6affb4707a4314980e86e", + "file_size_bytes": 1710760 + }, + { + "name": "Gp0127630_SMART GFF file", + "description": "SMART GFF file for Gp0127630", + "url": "https://data.microbiomedata.org/data/nmdc:mga09n3g47/annotation/nmdc_mga09n3g47_smart.gff", + "md5_checksum": "2f9f0b8164c35117da1e121e63ad772f", + "id": "nmdc:2f9f0b8164c35117da1e121e63ad772f", + "file_size_bytes": 5166448 + }, + { + "name": "Gp0127630_SuperFam GFF file", + "description": "SuperFam GFF file for Gp0127630", + "url": "https://data.microbiomedata.org/data/nmdc:mga09n3g47/annotation/nmdc_mga09n3g47_supfam.gff", + "md5_checksum": "1e3d433d3cb308d086dec26916b6b1bf", + "id": "nmdc:1e3d433d3cb308d086dec26916b6b1bf", + "file_size_bytes": 29155547 + }, + { + "name": "Gp0127630_Cath FunFam GFF file", + "description": "Cath FunFam GFF file for Gp0127630", + "url": "https://data.microbiomedata.org/data/nmdc:mga09n3g47/annotation/nmdc_mga09n3g47_cath_funfam.gff", + "md5_checksum": "d467bd6407a5a41798aa84df69a4a31d", + "id": "nmdc:d467bd6407a5a41798aa84df69a4a31d", + "file_size_bytes": 21679406 + }, + { + "name": "Gp0127630_KO_EC GFF file", + "description": "KO_EC GFF file for Gp0127630", + "url": "https://data.microbiomedata.org/data/nmdc:mga09n3g47/annotation/nmdc_mga09n3g47_ko_ec.gff", + "md5_checksum": "4cb3db8f0ff98bf805f4750af65eb9d1", + "id": "nmdc:4cb3db8f0ff98bf805f4750af65eb9d1", + "file_size_bytes": 14461252 + }, + { + "name": "gold:Gp0452679_metabat2 bin checkm quality assessment result", + "description": "metabat2 bin checkm quality assessment result for gold:Gp0452679", + "data_object_type": "CheckM Statistics", + "url": "https://data.microbiomedata.org/data/nmdc:mga0fsjy84/MAGs/nmdc_mga0fsjy84_checkm_qa.out", + "md5_checksum": "d41d8cd98f00b204e9800998ecf8427e", + "id": "nmdc:d41d8cd98f00b204e9800998ecf8427e", + "file_size_bytes": 0 + }, + { + "name": "Gp0127630_tooShort (< 3kb) filtered contigs fasta file by metaBat2", + "description": "tooShort (< 3kb) filtered contigs fasta file by metaBat2 for Gp0127630", + "url": "https://data.microbiomedata.org/data/nmdc:mga09n3g47/MAGs/nmdc_mga09n3g47_bins.tooShort.fa", + "md5_checksum": "ce09d99bdfdf0379b09a3ae75c65d830", + "id": "nmdc:ce09d99bdfdf0379b09a3ae75c65d830", + "file_size_bytes": 50450286 + }, + { + "name": "Gp0127630_unbinned fasta file from metabat2", + "description": "unbinned fasta file from metabat2 for Gp0127630", + "url": "https://data.microbiomedata.org/data/nmdc:mga09n3g47/MAGs/nmdc_mga09n3g47_bins.unbinned.fa", + "md5_checksum": "acd651395108c71dd20eeebf9b177d06", + "id": "nmdc:acd651395108c71dd20eeebf9b177d06", + "file_size_bytes": 5114463 + }, + { + "name": "Gp0127630_metabat2 bin checkm quality assessment result", + "description": "metabat2 bin checkm quality assessment result for Gp0127630", + "data_object_type": "CheckM Statistics", + "url": "https://data.microbiomedata.org/data/nmdc:mga09n3g47/MAGs/nmdc_mga09n3g47_checkm_qa.out", + "md5_checksum": "850a6fbbd2993f4dfeb5a40485e67f8e", + "id": "nmdc:850a6fbbd2993f4dfeb5a40485e67f8e", + "file_size_bytes": 948 + }, + { + "name": "Gp0127630_high-quality and medium-quality bins", + "description": "high-quality and medium-quality bins for Gp0127630", + "data_object_type": "Metagenome Bins", + "url": "https://data.microbiomedata.org/data/nmdc:mga09n3g47/MAGs/nmdc_mga09n3g47_hqmq_bin.zip", + "md5_checksum": "287529453d35eab4acb72032a59994d0", + "id": "nmdc:287529453d35eab4acb72032a59994d0", + "file_size_bytes": 484667 + }, + { + "name": "Gp0127630_metabat2 bins", + "description": "metabat2 bins for Gp0127630", + "url": "https://data.microbiomedata.org/data/nmdc:mga09n3g47/MAGs/nmdc_mga09n3g47_metabat_bin.zip", + "md5_checksum": "4ad58f05545a75edc1b933a0b0286d16", + "id": "nmdc:4ad58f05545a75edc1b933a0b0286d16", + "file_size_bytes": 110526 + }, + { + "_id": { + "$oid": "649b003d1ae706d7b5b14e66" + }, + "description": "Metagenome Contig Coverage Stats for gold:Gp0127630", + "url": "https://data.microbiomedata.org/data/1781_100332/assembly/mapping_stats.txt", + "file_size_bytes": 9510797, + "type": "nmdc:DataObject", + "id": "nmdc:e4e89517e39bd367af05e5dc5849b32b", + "name": "mapping_stats.txt", + "data_object_type": "Assembly Coverage Stats" + }, + { + "_id": { + "$oid": "649b003d1ae706d7b5b14e68" + }, + "description": "Assembled contigs fasta for gold:Gp0127630", + "url": "https://data.microbiomedata.org/data/1781_100332/assembly/assembly_contigs.fna", + "file_size_bytes": 57002148, + "type": "nmdc:DataObject", + "id": "nmdc:c3958f0be344c850d06ee61865c95ff6", + "name": "assembly_contigs.fna", + "data_object_type": "Assembly Contigs" + }, + { + "_id": { + "$oid": "649b003d1ae706d7b5b14e6b" + }, + "description": "Assembled scaffold fasta for gold:Gp0127630", + "url": "https://data.microbiomedata.org/data/1781_100332/assembly/assembly_scaffolds.fna", + "file_size_bytes": 56619602, + "type": "nmdc:DataObject", + "id": "nmdc:354cac10ff205a59fffc795554aa3539", + "name": "assembly_scaffolds.fna", + "data_object_type": "Assembly Scaffolds" + }, + { + "_id": { + "$oid": "649b003d1ae706d7b5b14ebf" + }, + "description": "Assembled AGP file for gold:Gp0127630", + "url": "https://data.microbiomedata.org/data/1781_100332/assembly/assembly.agp", + "file_size_bytes": 8318715, + "type": "nmdc:DataObject", + "id": "nmdc:a08b36a85343a2f3dc45d62000a34274", + "name": "assembly.agp", + "data_object_type": "Assembly AGP" + }, + { + "_id": { + "$oid": "649b003d1ae706d7b5b14ec5" + }, + "description": "Metagenome Alignment BAM file for gold:Gp0127630", + "url": "https://data.microbiomedata.org/data/1781_100332/assembly/pairedMapped_sorted.bam", + "file_size_bytes": 2430916917, + "type": "nmdc:DataObject", + "id": "nmdc:1d1e719a8aa56730007392e34c0515a7", + "name": "pairedMapped_sorted.bam", + "data_object_type": "Assembly Coverage BAM" + }, + { + "_id": { + "$oid": "649b003d1ae706d7b5b15b1c" + }, + "id": "nmdc:82aac6da49d2ea7174e5786d247ceb42", + "name": "1781_100332.krona.html", + "description": "Gold:Gp0127630 KRONA plot HTML file", + "url": "https://data.microbiomedata.org/1781_100332/ReadbasedAnalysis/centrifuge/1781_100332.krona.html", + "file_size_bytes": 3385, + "data_object_type": "Centrifuge Krona Plot", + "type": "nmdc:DataObject" + }, + { + "_id": { + "$oid": "649b003d1ae706d7b5b15b2b" + }, + "id": "nmdc:65bf42cc5b458fd298f30d1df2cdb6d6", + "name": "1781_100332.json", + "description": "Gold:Gp0127630 ReadbasedAnalysis result JSON file", + "url": "https://data.microbiomedata.org/1781_100332/ReadbasedAnalysis/1781_100332.json", + "file_size_bytes": 3385, + "type": "nmdc:DataObject" + }, + { + "_id": { + "$oid": "649b003f1ae706d7b5b165d8" + }, + "id": "nmdc:1cc2fa15c0c8c54f427684eac47d9288", + "name": "bins.tooShort.fa", + "description": "tooShort (< 3kb) filtered contigs fasta file by metaBat2 for gold:Gp0127630", + "file_size_bytes": 48944272, + "url": "https://data.microbiomedata.org/data/1781_100332/img_MAGs/bins.tooShort.fa", + "type": "nmdc:DataObject" + }, + { + "_id": { + "$oid": "649b003f1ae706d7b5b165dc" + }, + "id": "nmdc:f19e68b486a6cfb5a09a20d9c388f679", + "name": "gold:Gp0127630.bins.1.fa", + "description": "metabat2 binned contig file for gold:Gp0127630", + "file_size_bytes": 211455, + "url": "https://data.microbiomedata.org/data/1781_100332/img_MAGs/metabat-bins/bins.1.fa", + "type": "nmdc:DataObject", + "data_object_type": "Metagenome Bins" + }, + { + "_id": { + "$oid": "649b003f1ae706d7b5b165e0" + }, + "id": "nmdc:904adbe6f49936fd689e59f7e970b4ab", + "name": "bins.unbinned.fa", + "description": "unbinned fasta file from metabat2 for gold:Gp0127630", + "file_size_bytes": 6557371, + "url": "https://data.microbiomedata.org/data/1781_100332/img_MAGs/bins.unbinned.fa", + "type": "nmdc:DataObject" + }, + { + "_id": { + "$oid": "649b003f1ae706d7b5b165e7" + }, + "id": "nmdc:abf67b79d7b9f94c9454eab172da8823", + "name": "checkm_qa.out", + "description": "metabat2 bin checkm quality assessment result for gold:Gp0127630", + "file_size_bytes": 918, + "url": "https://data.microbiomedata.org/data/1781_100332/img_MAGs/checkm_qa.out", + "type": "nmdc:DataObject", + "data_object_type": "CheckM Statistics" + }, + { + "_id": { + "$oid": "649b003f1ae706d7b5b165ea" + }, + "id": "nmdc:77678d37b9822be709b6ed462de42e71", + "name": "gold:Gp0127630.bins.2.fa", + "description": "metabat2 binned contig file for gold:Gp0127630", + "file_size_bytes": 254378, + "url": "https://data.microbiomedata.org/data/1781_100332/img_MAGs/metabat-bins/bins.2.fa", + "type": "nmdc:DataObject", + "data_object_type": "Metagenome Bins" + }, + { + "_id": { + "$oid": "649b003f1ae706d7b5b16cde" + }, + "description": "EC TSV File for gold:Gp0127630", + "url": "https://data.microbiomedata.org/1781_100332/img_annotation/Ga0482240_ec.tsv", + "md5_checksum": "81ab86211731bc0547d3e8f8786c3e8b", + "file_size_bytes": 3385, + "id": "nmdc:81ab86211731bc0547d3e8f8786c3e8b", + "name": "gold:Gp0127630_EC TSV File", + "data_object_type": "Annotation Enzyme Commission", + "type": "nmdc:DataObject" + }, + { + "_id": { + "$oid": "649b00401ae706d7b5b16cdf" + }, + "description": "Protein FAA for gold:Gp0127630", + "url": "https://data.microbiomedata.org/1781_100332/img_annotation/Ga0482240_proteins.faa", + "md5_checksum": "6bca5ad106b3519416205a82d3a14b16", + "file_size_bytes": 3385, + "id": "nmdc:6bca5ad106b3519416205a82d3a14b16", + "name": "gold:Gp0127630_Protein FAA", + "data_object_type": "Annotation Amino Acid FASTA", + "type": "nmdc:DataObject" + }, + { + "_id": { + "$oid": "649b00401ae706d7b5b16ce3" + }, + "description": "Functional annotation GFF file for gold:Gp0127630", + "url": "https://data.microbiomedata.org/1781_100332/img_annotation/Ga0482240_functional_annotation.gff", + "md5_checksum": "070f0952308650d35ae05c4fed188677", + "file_size_bytes": 3385, + "id": "nmdc:070f0952308650d35ae05c4fed188677", + "name": "gold:Gp0127630_Functional annotation GFF file", + "data_object_type": "Functional Annotation GFF", + "type": "nmdc:DataObject" + }, + { + "_id": { + "$oid": "649b00401ae706d7b5b16ce4" + }, + "description": "KO TSV File for gold:Gp0127630", + "url": "https://data.microbiomedata.org/1781_100332/img_annotation/Ga0482240_ko.tsv", + "md5_checksum": "c6b5f388349af0214d65d1357026c7ee", + "file_size_bytes": 3385, + "id": "nmdc:c6b5f388349af0214d65d1357026c7ee", + "name": "gold:Gp0127630_KO TSV File", + "data_object_type": "Annotation KEGG Orthology", + "type": "nmdc:DataObject" + }, + { + "_id": { + "$oid": "649b00401ae706d7b5b16ce6" + }, + "description": "Structural annotation GFF file for gold:Gp0127630", + "url": "https://data.microbiomedata.org/1781_100332/img_annotation/Ga0482240_structural_annotation.gff", + "md5_checksum": "f921989651475b06052058126db54de9", + "file_size_bytes": 3385, + "id": "nmdc:f921989651475b06052058126db54de9", + "name": "gold:Gp0127630_Structural annotation GFF file", + "data_object_type": "Structural Annotation GFF", + "type": "nmdc:DataObject" + } + ], + "mags_activity_set": [ + { + "_id": { + "$oid": "649b0052ec087f6bbab34707" + }, + "has_input": [ + "nmdc:7b35237c97a75f17ba74be0fe96416c9", + "nmdc:873f16e03e0f94c9ec28573fb10ad6d8", + "nmdc:4878e3d5a95e67c0bb81da53e03400be" + ], + "too_short_contig_num": 123771, + "part_of": [ + "nmdc:mga09n3g47" + ], + "binned_contig_num": 313, + "git_url": "https://github.com/microbiomedata/metaG/releases/tag/0.1", + "has_output": [ + "nmdc:d41d8cd98f00b204e9800998ecf8427e", + "nmdc:ce09d99bdfdf0379b09a3ae75c65d830", + "nmdc:acd651395108c71dd20eeebf9b177d06", + "nmdc:850a6fbbd2993f4dfeb5a40485e67f8e", + "nmdc:287529453d35eab4acb72032a59994d0", + "nmdc:4ad58f05545a75edc1b933a0b0286d16" + ], + "was_informed_by": "gold:Gp0127630", + "input_contig_num": 127321, + "id": "nmdc:9fb85875014e14636fbb93d97d62f4bd", + "execution_resource": "NERSC-Cori", + "name": "MAGs Analysis Activity for nmdc:mga09n3g47", + "mags_list": [ + { + "number_of_contig": 86, + "completeness": 19.9, + "bin_name": "bins.1", + "gene_count": 422, + "bin_quality": "LQ", + "gtdbtk_species": "", + "gtdbtk_order": "", + "num_16s": 0, + "gtdbtk_family": "", + "gtdbtk_domain": "", + "contamination": 0.0, + "gtdbtk_class": "", + "gtdbtk_phylum": "", + "num_5s": 0, + "num_23s": 0, + "gtdbtk_genus": "", + "num_t_rna": 7 + }, + { + "number_of_contig": 227, + "completeness": 70.23, + "bin_name": "bins.2", + "gene_count": 1932, + "bin_quality": "MQ", + "gtdbtk_species": "", + "gtdbtk_order": "Nitrososphaerales", + "num_16s": 0, + "gtdbtk_family": "Nitrososphaeraceae", + "gtdbtk_domain": "Archaea", + "contamination": 1.94, + "gtdbtk_class": "Nitrososphaeria", + "gtdbtk_phylum": "Crenarchaeota", + "num_5s": 1, + "num_23s": 0, + "gtdbtk_genus": "", + "num_t_rna": 35 + } + ], + "unbinned_contig_num": 3237, + "started_at_time": "2021-10-11T02:26:53Z", + "type": "nmdc:MAGsAnalysisActivity", + "ended_at_time": "2021-10-11T04:54:22+00:00" + } + ], + "metagenome_annotation_activity_set": [ + { + "_id": { + "$oid": "649b005bbf2caae0415ef9ad" + }, + "has_input": [ + "nmdc:7b35237c97a75f17ba74be0fe96416c9" + ], + "part_of": [ + "nmdc:mga09n3g47" + ], + "git_url": "https://github.com/microbiomedata/metaG/releases/tag/0.1", + "has_output": [ + "nmdc:f7735eb161908954feda34285993f1b9", + "nmdc:c6053080461e8cc0bbadd13e0775e108", + "nmdc:4878e3d5a95e67c0bb81da53e03400be", + "nmdc:dbc4d4e179a86aa95211de3e62219191", + "nmdc:5bdd96be3fbc888969d92c2ed6392846", + "nmdc:78026e2afc7644463828fbbfa4d8d727", + "nmdc:ef99a9afe80e1acc086694ca8ab4cca7", + "nmdc:f949efd8a6b6affb4707a4314980e86e", + "nmdc:2f9f0b8164c35117da1e121e63ad772f", + "nmdc:1e3d433d3cb308d086dec26916b6b1bf", + "nmdc:d467bd6407a5a41798aa84df69a4a31d", + "nmdc:4cb3db8f0ff98bf805f4750af65eb9d1" + ], + "was_informed_by": "gold:Gp0127630", + "id": "nmdc:9fb85875014e14636fbb93d97d62f4bd", + "execution_resource": "NERSC-Cori", + "name": "Annotation Activity for nmdc:mga09n3g47", + "started_at_time": "2021-10-11T02:26:53Z", + "type": "nmdc:MetagenomeAnnotationActivity", + "ended_at_time": "2021-10-11T04:54:22+00:00" + } + ], + "metagenome_assembly_set": [ + { + "_id": { + "$oid": "649b005f2ca5ee4adb139f95" + }, + "has_input": [ + "nmdc:eaffb16b5247d85c08f8af73bcb8b65e" + ], + "part_of": [ + "nmdc:mga09n3g47" + ], + "ctg_logsum": 77070, + "scaf_logsum": 77428, + "gap_pct": 0.00093, + "git_url": "https://github.com/microbiomedata/metaG/releases/tag/0.1", + "has_output": [ + "nmdc:7b35237c97a75f17ba74be0fe96416c9", + "nmdc:118dd6190bdaf127d3c105cc73012cc3", + "nmdc:9e129133978cb4c4cc4bae9fc28a8a49", + "nmdc:33d86c437a046031ea2b4bed5a2d2d6b", + "nmdc:873f16e03e0f94c9ec28573fb10ad6d8" + ], + "asm_score": 6.312, + "was_informed_by": "gold:Gp0127630", + "ctg_powsum": 8755.579, + "scaf_max": 31136, + "id": "nmdc:9fb85875014e14636fbb93d97d62f4bd", + "scaf_powsum": 8795.268, + "execution_resource": "NERSC-Cori", + "contigs": 127321, + "name": "Assembly Activity for nmdc:mga09n3g47", + "ctg_max": 31136, + "gc_std": 0.09346, + "contig_bp": 52740992, + "gc_avg": 0.61288, + "started_at_time": "2021-10-11T02:26:53Z", + "scaf_bp": 52741482, + "type": "nmdc:MetagenomeAssembly", + "scaffolds": 127272, + "ended_at_time": "2021-10-11T04:54:22+00:00", + "ctg_l50": 372, + "ctg_l90": 284, + "ctg_n50": 41888, + "ctg_n90": 110882, + "scaf_l50": 372, + "scaf_l90": 284, + "scaf_n50": 41856, + "scaf_n90": 110834 + } + ], + "omics_processing_set": [ + { + "_id": { + "$oid": "649b009773e8249959349b41" + }, + "id": "nmdc:omprc-11-9pbab972", + "name": "Riverbed sediment microbial communities from areas with no vegetation in Columbia River, Washington, USA - GW-RW N3_30_40", + "description": "Riverbed sediment samples were collected from an area with no vegetation in a hyporheic zone (subsurface groundwater - surface water mixing zone)", + "has_input": [ + "nmdc:bsm-11-3yjh4z33" + ], + "has_output": [ + "jgi:574fde7c7ded5e3df1ee1419" + ], + "part_of": [ + "nmdc:sty-11-aygzgv51" + ], + "add_date": "2016-01-11", + "mod_date": "2021-06-15", + "ncbi_project_name": "Riverbed sediment microbial communities from areas with no vegetation in Columbia River, Washington, USA - GW-RW N3_30_40", + "omics_type": { + "has_raw_value": "Metagenome" + }, + "principal_investigator": { + "has_raw_value": "James Stegen" + }, + "processing_institution": "JGI", + "type": "nmdc:OmicsProcessing", + "gold_sequencing_project_identifiers": [ + "gold:Gp0127630" + ] + } + ], + "read_qc_analysis_activity_set": [ + { + "_id": { + "$oid": "649b009d6bdd4fd20273c865" + }, + "has_input": [ + "nmdc:0e737a8e36535f70bff074004ee1f9c0" + ], + "part_of": [ + "nmdc:mga09n3g47" + ], + "git_url": "https://github.com/microbiomedata/metaG/releases/tag/0.1", + "has_output": [ + "nmdc:eaffb16b5247d85c08f8af73bcb8b65e", + "nmdc:088fd18cb9169097e739289d2e5ebb13" + ], + "was_informed_by": "gold:Gp0127630", + "input_read_count": 28569382, + "output_read_bases": 4016672570, + "id": "nmdc:9fb85875014e14636fbb93d97d62f4bd", + "execution_resource": "NERSC-Cori", + "input_read_bases": 4313976682, + "name": "Read QC Activity for nmdc:mga09n3g47", + "output_read_count": 26868700, + "started_at_time": "2021-10-11T02:26:53Z", + "type": "nmdc:ReadQCAnalysisActivity", + "ended_at_time": "2021-10-11T04:54:22+00:00" + } + ], + "read_based_taxonomy_analysis_activity_set": [ + { + "_id": { + "$oid": "649b009bff710ae353f8cf27" + }, + "has_input": [ + "nmdc:eaffb16b5247d85c08f8af73bcb8b65e" + ], + "git_url": "https://github.com/microbiomedata/metaG/releases/tag/0.1", + "has_output": [ + "nmdc:ad8aa7d317d86bcd1b33e6e68a917198", + "nmdc:e5f1da9ed5be2adcd65763d387387c9f", + "nmdc:db82b41936f37bbbeaa027ffc25b58cd", + "nmdc:2f21fd19f055d1931ab82016ed781a12", + "nmdc:890f494d1dd5e130d6c1688e78f27ff2", + "nmdc:813232a3034ddb9a05efc2f2e9b78cce", + "nmdc:ef490241b537bb4c19bd5548cd7b7f6b", + "nmdc:6a7de24b01ad1c63ba6edb758e25af40", + "nmdc:fc8a855916eb1ba0f7d278b7c1f1786f" + ], + "was_informed_by": "gold:Gp0127630", + "id": "nmdc:9fb85875014e14636fbb93d97d62f4bd", + "execution_resource": "NERSC-Cori", + "name": "ReadBased Analysis Activity for nmdc:mga09n3g47", + "started_at_time": "2021-10-11T02:26:53Z", + "type": "nmdc:ReadbasedAnalysis", + "ended_at_time": "2021-10-11T04:54:22+00:00" + } + ] + }, + { + "data_object_set": [ + { + "description": "Raw sequencer read data", + "file_size_bytes": 2003194973, + "type": "nmdc:DataObject", + "id": "jgi:574fde5e7ded5e3df1ee1401", + "name": "10533.1.165310.GGACTGT-AACAGTC.fastq.gz" + }, + { + "name": "Gp0127633_Filtered Reads", + "description": "Filtered Reads for Gp0127633", + "data_object_type": "Filtered Sequencing Reads", + "url": "https://data.microbiomedata.org/data/nmdc:mga05zvf81/qa/nmdc_mga05zvf81_filtered.fastq.gz", + "md5_checksum": "7cbd497624d8b60ab2a5e7fdbe4730f2", + "id": "nmdc:7cbd497624d8b60ab2a5e7fdbe4730f2", + "file_size_bytes": 1727224362 + }, + { + "name": "Gp0127633_Filtered Stats", + "description": "Filtered Stats for Gp0127633", + "data_object_type": "QC Statistics", + "url": "https://data.microbiomedata.org/data/nmdc:mga05zvf81/qa/nmdc_mga05zvf81_filterStats.txt", + "md5_checksum": "eccf0501d08f920a88b6598d573a8e3e", + "id": "nmdc:eccf0501d08f920a88b6598d573a8e3e", + "file_size_bytes": 280 + }, + { + "name": "Gp0127633_Gottcha2 TSV report", + "description": "Gottcha2 TSV report for Gp0127633", + "url": "https://data.microbiomedata.org/data/nmdc:mga05zvf81/ReadbasedAnalysis/nmdc_mga05zvf81_gottcha2_report.tsv", + "md5_checksum": "8bd9eb762acabbac5d079c379c28e381", + "id": "nmdc:8bd9eb762acabbac5d079c379c28e381", + "file_size_bytes": 875 + }, + { + "name": "Gp0127633_Gottcha2 full TSV report", + "description": "Gottcha2 full TSV report for Gp0127633", + "url": "https://data.microbiomedata.org/data/nmdc:mga05zvf81/ReadbasedAnalysis/nmdc_mga05zvf81_gottcha2_report_full.tsv", + "md5_checksum": "77351dd18ca40e5552ac1380ba94acbf", + "id": "nmdc:77351dd18ca40e5552ac1380ba94acbf", + "file_size_bytes": 578856 + }, + { + "name": "Gp0127633_Gottcha2 Krona HTML report", + "description": "Gottcha2 Krona HTML report for Gp0127633", + "data_object_type": "GOTTCHA2 Krona Plot", + "url": "https://data.microbiomedata.org/data/nmdc:mga05zvf81/ReadbasedAnalysis/nmdc_mga05zvf81_gottcha2_krona.html", + "md5_checksum": "f445af1a7774572d156f55a898d26f09", + "id": "nmdc:f445af1a7774572d156f55a898d26f09", + "file_size_bytes": 228067 + }, + { + "name": "Gp0127633_Centrifuge classification TSV report", + "description": "Centrifuge classification TSV report for Gp0127633", + "data_object_type": "Centrifuge Taxonomic Classification", + "url": "https://data.microbiomedata.org/data/nmdc:mga05zvf81/ReadbasedAnalysis/nmdc_mga05zvf81_centrifuge_classification.tsv", + "md5_checksum": "e11fcbf66318878c05984fa3d893e3b7", + "id": "nmdc:e11fcbf66318878c05984fa3d893e3b7", + "file_size_bytes": 1646942155 + }, + { + "name": "Gp0127633_Centrifuge TSV report", + "description": "Centrifuge TSV report for Gp0127633", + "data_object_type": "Centrifuge Classification Report", + "url": "https://data.microbiomedata.org/data/nmdc:mga05zvf81/ReadbasedAnalysis/nmdc_mga05zvf81_centrifuge_report.tsv", + "md5_checksum": "28beb8baabdaf346f2066b40f375a152", + "id": "nmdc:28beb8baabdaf346f2066b40f375a152", + "file_size_bytes": 252735 + }, + { + "name": "Gp0127633_Centrifuge Krona HTML report", + "description": "Centrifuge Krona HTML report for Gp0127633", + "data_object_type": "Centrifuge Krona Plot", + "url": "https://data.microbiomedata.org/data/nmdc:mga05zvf81/ReadbasedAnalysis/nmdc_mga05zvf81_centrifuge_krona.html", + "md5_checksum": "1f74a43724c4afed5563499d05601e22", + "id": "nmdc:1f74a43724c4afed5563499d05601e22", + "file_size_bytes": 2329168 + }, + { + "name": "Gp0127633_Kraken classification TSV report", + "description": "Kraken classification TSV report for Gp0127633", + "data_object_type": "Kraken2 Taxonomic Classification", + "url": "https://data.microbiomedata.org/data/nmdc:mga05zvf81/ReadbasedAnalysis/nmdc_mga05zvf81_kraken2_classification.tsv", + "md5_checksum": "4825177c6d0a8b67db82e6070cfbc35f", + "id": "nmdc:4825177c6d0a8b67db82e6070cfbc35f", + "file_size_bytes": 1310443491 + }, + { + "name": "Gp0127633_Kraken2 TSV report", + "description": "Kraken2 TSV report for Gp0127633", + "data_object_type": "Kraken2 Classification Report", + "url": "https://data.microbiomedata.org/data/nmdc:mga05zvf81/ReadbasedAnalysis/nmdc_mga05zvf81_kraken2_report.tsv", + "md5_checksum": "275268a6b5aca33c427d11877bcfa674", + "id": "nmdc:275268a6b5aca33c427d11877bcfa674", + "file_size_bytes": 621441 + }, + { + "name": "Gp0127633_Kraken2 Krona HTML report", + "description": "Kraken2 Krona HTML report for Gp0127633", + "data_object_type": "Kraken2 Krona Plot", + "url": "https://data.microbiomedata.org/data/nmdc:mga05zvf81/ReadbasedAnalysis/nmdc_mga05zvf81_kraken2_krona.html", + "md5_checksum": "89e810af4915f0e117eaa60550587453", + "id": "nmdc:89e810af4915f0e117eaa60550587453", + "file_size_bytes": 3891844 + }, + { + "name": "Gp0127633_Assembled contigs fasta", + "description": "Assembled contigs fasta for Gp0127633", + "data_object_type": "Assembly Contigs", + "url": "https://data.microbiomedata.org/data/nmdc:mga05zvf81/assembly/nmdc_mga05zvf81_contigs.fna", + "md5_checksum": "ea5ca9478871b3e2600e1df0d748cbef", + "id": "nmdc:ea5ca9478871b3e2600e1df0d748cbef", + "file_size_bytes": 152814586 + }, + { + "name": "Gp0127633_Assembled scaffold fasta", + "description": "Assembled scaffold fasta for Gp0127633", + "data_object_type": "Assembly Scaffolds", + "url": "https://data.microbiomedata.org/data/nmdc:mga05zvf81/assembly/nmdc_mga05zvf81_scaffolds.fna", + "md5_checksum": "327e130872e4c5faac2f1c9f8dea2316", + "id": "nmdc:327e130872e4c5faac2f1c9f8dea2316", + "file_size_bytes": 151993436 + }, + { + "name": "Gp0127633_Metagenome Contig Coverage Stats", + "description": "Metagenome Contig Coverage Stats for Gp0127633", + "url": "https://data.microbiomedata.org/data/nmdc:mga05zvf81/assembly/nmdc_mga05zvf81_covstats.txt", + "md5_checksum": "f61f1e62791a38beae95bd95833a6784", + "id": "nmdc:f61f1e62791a38beae95bd95833a6784", + "file_size_bytes": 21678212 + }, + { + "name": "Gp0127633_Assembled AGP file", + "description": "Assembled AGP file for Gp0127633", + "data_object_type": "Assembly AGP", + "url": "https://data.microbiomedata.org/data/nmdc:mga05zvf81/assembly/nmdc_mga05zvf81_assembly.agp", + "md5_checksum": "416254a3bfc685dd16c11d65a222305f", + "id": "nmdc:416254a3bfc685dd16c11d65a222305f", + "file_size_bytes": 20304047 + }, + { + "name": "Gp0127633_Metagenome Alignment BAM file", + "description": "Metagenome Alignment BAM file for Gp0127633", + "data_object_type": "Assembly Coverage BAM", + "url": "https://data.microbiomedata.org/data/nmdc:mga05zvf81/assembly/nmdc_mga05zvf81_pairedMapped_sorted.bam", + "md5_checksum": "bc054294600fa310924f104484effd3e", + "id": "nmdc:bc054294600fa310924f104484effd3e", + "file_size_bytes": 1959649749 + }, + { + "name": "Gp0127633_Protein FAA", + "description": "Protein FAA for Gp0127633", + "data_object_type": "Annotation Amino Acid FASTA", + "url": "https://data.microbiomedata.org/data/nmdc:mga05zvf81/annotation/nmdc_mga05zvf81_proteins.faa", + "md5_checksum": "8defcf55f08cd56d8b2560e27f490ca5", + "id": "nmdc:8defcf55f08cd56d8b2560e27f490ca5", + "file_size_bytes": 85918779 + }, + { + "name": "Gp0127633_Structural annotation GFF file", + "description": "Structural annotation GFF file for Gp0127633", + "data_object_type": "Structural Annotation GFF", + "url": "https://data.microbiomedata.org/data/nmdc:mga05zvf81/annotation/nmdc_mga05zvf81_structural_annotation.gff", + "md5_checksum": "a6031c0a101419dd413a0804937425ca", + "id": "nmdc:a6031c0a101419dd413a0804937425ca", + "file_size_bytes": 2527 + }, + { + "name": "Gp0127633_Functional annotation GFF file", + "description": "Functional annotation GFF file for Gp0127633", + "data_object_type": "Functional Annotation GFF", + "url": "https://data.microbiomedata.org/data/nmdc:mga05zvf81/annotation/nmdc_mga05zvf81_functional_annotation.gff", + "md5_checksum": "43069b1146c84c064b7ff334dc9ff100", + "id": "nmdc:43069b1146c84c064b7ff334dc9ff100", + "file_size_bytes": 95647963 + }, + { + "name": "Gp0127633_KO TSV file", + "description": "KO TSV file for Gp0127633", + "data_object_type": "Annotation KEGG Orthology", + "url": "https://data.microbiomedata.org/data/nmdc:mga05zvf81/annotation/nmdc_mga05zvf81_ko.tsv", + "md5_checksum": "acc5a2c445dc6e00668c9a5d50aecdb8", + "id": "nmdc:acc5a2c445dc6e00668c9a5d50aecdb8", + "file_size_bytes": 10638485 + }, + { + "name": "Gp0127633_EC TSV file", + "description": "EC TSV file for Gp0127633", + "data_object_type": "Annotation Enzyme Commission", + "url": "https://data.microbiomedata.org/data/nmdc:mga05zvf81/annotation/nmdc_mga05zvf81_ec.tsv", + "md5_checksum": "ec91d5d7a8af4fb845e22cbe7ab82bde", + "id": "nmdc:ec91d5d7a8af4fb845e22cbe7ab82bde", + "file_size_bytes": 6991172 + }, + { + "name": "Gp0127633_COG GFF file", + "description": "COG GFF file for Gp0127633", + "url": "https://data.microbiomedata.org/data/nmdc:mga05zvf81/annotation/nmdc_mga05zvf81_cog.gff", + "md5_checksum": "3cd238ff1bb176b7a159aeb34a7c4683", + "id": "nmdc:3cd238ff1bb176b7a159aeb34a7c4683", + "file_size_bytes": 56525933 + }, + { + "name": "Gp0127633_PFAM GFF file", + "description": "PFAM GFF file for Gp0127633", + "url": "https://data.microbiomedata.org/data/nmdc:mga05zvf81/annotation/nmdc_mga05zvf81_pfam.gff", + "md5_checksum": "5103ea2a481ea3b82f1aa98ab7a36998", + "id": "nmdc:5103ea2a481ea3b82f1aa98ab7a36998", + "file_size_bytes": 43189711 + }, + { + "name": "Gp0127633_TigrFam GFF file", + "description": "TigrFam GFF file for Gp0127633", + "url": "https://data.microbiomedata.org/data/nmdc:mga05zvf81/annotation/nmdc_mga05zvf81_tigrfam.gff", + "md5_checksum": "8f7429420cbefb9e27bcdbe6252e5288", + "id": "nmdc:8f7429420cbefb9e27bcdbe6252e5288", + "file_size_bytes": 4806086 + }, + { + "name": "Gp0127633_SMART GFF file", + "description": "SMART GFF file for Gp0127633", + "url": "https://data.microbiomedata.org/data/nmdc:mga05zvf81/annotation/nmdc_mga05zvf81_smart.gff", + "md5_checksum": "6d69127dc30609e4861a7b2443b99164", + "id": "nmdc:6d69127dc30609e4861a7b2443b99164", + "file_size_bytes": 12776467 + }, + { + "name": "Gp0127633_SuperFam GFF file", + "description": "SuperFam GFF file for Gp0127633", + "url": "https://data.microbiomedata.org/data/nmdc:mga05zvf81/annotation/nmdc_mga05zvf81_supfam.gff", + "md5_checksum": "00243bcaf50313d937a7685380a876bb", + "id": "nmdc:00243bcaf50313d937a7685380a876bb", + "file_size_bytes": 70607320 + }, + { + "name": "Gp0127633_Cath FunFam GFF file", + "description": "Cath FunFam GFF file for Gp0127633", + "url": "https://data.microbiomedata.org/data/nmdc:mga05zvf81/annotation/nmdc_mga05zvf81_cath_funfam.gff", + "md5_checksum": "ec6ffd40772dee9d48dbec0beb6b3321", + "id": "nmdc:ec6ffd40772dee9d48dbec0beb6b3321", + "file_size_bytes": 53950895 + }, + { + "name": "Gp0127633_KO_EC GFF file", + "description": "KO_EC GFF file for Gp0127633", + "url": "https://data.microbiomedata.org/data/nmdc:mga05zvf81/annotation/nmdc_mga05zvf81_ko_ec.gff", + "md5_checksum": "907439e314b4f4623244e2cec8532098", + "id": "nmdc:907439e314b4f4623244e2cec8532098", + "file_size_bytes": 33781965 + }, + { + "name": "gold:Gp0452679_metabat2 bin checkm quality assessment result", + "description": "metabat2 bin checkm quality assessment result for gold:Gp0452679", + "data_object_type": "CheckM Statistics", + "url": "https://data.microbiomedata.org/data/nmdc:mga0fsjy84/MAGs/nmdc_mga0fsjy84_checkm_qa.out", + "md5_checksum": "d41d8cd98f00b204e9800998ecf8427e", + "id": "nmdc:d41d8cd98f00b204e9800998ecf8427e", + "file_size_bytes": 0 + }, + { + "name": "Gp0127633_tooShort (< 3kb) filtered contigs fasta file by metaBat2", + "description": "tooShort (< 3kb) filtered contigs fasta file by metaBat2 for Gp0127633", + "url": "https://data.microbiomedata.org/data/nmdc:mga05zvf81/MAGs/nmdc_mga05zvf81_bins.tooShort.fa", + "md5_checksum": "00415cf72f9a77f907e3467a08b123c5", + "id": "nmdc:00415cf72f9a77f907e3467a08b123c5", + "file_size_bytes": 116930318 + }, + { + "name": "Gp0127633_unbinned fasta file from metabat2", + "description": "unbinned fasta file from metabat2 for Gp0127633", + "url": "https://data.microbiomedata.org/data/nmdc:mga05zvf81/MAGs/nmdc_mga05zvf81_bins.unbinned.fa", + "md5_checksum": "83064ec7bfc35a79a1ca76fdd8ad75fd", + "id": "nmdc:83064ec7bfc35a79a1ca76fdd8ad75fd", + "file_size_bytes": 31883888 + }, + { + "name": "Gp0127633_metabat2 bin checkm quality assessment result", + "description": "metabat2 bin checkm quality assessment result for Gp0127633", + "data_object_type": "CheckM Statistics", + "url": "https://data.microbiomedata.org/data/nmdc:mga05zvf81/MAGs/nmdc_mga05zvf81_checkm_qa.out", + "md5_checksum": "3f435d6da551400a4ba4400fa3608e7f", + "id": "nmdc:3f435d6da551400a4ba4400fa3608e7f", + "file_size_bytes": 1590 + }, + { + "name": "Gp0127633_high-quality and medium-quality bins", + "description": "high-quality and medium-quality bins for Gp0127633", + "data_object_type": "Metagenome Bins", + "url": "https://data.microbiomedata.org/data/nmdc:mga05zvf81/MAGs/nmdc_mga05zvf81_hqmq_bin.zip", + "md5_checksum": "c66f93153962f8b80c8f3d6978b6d802", + "id": "nmdc:c66f93153962f8b80c8f3d6978b6d802", + "file_size_bytes": 460412 + }, + { + "name": "Gp0127633_metabat2 bins", + "description": "metabat2 bins for Gp0127633", + "url": "https://data.microbiomedata.org/data/nmdc:mga05zvf81/MAGs/nmdc_mga05zvf81_metabat_bin.zip", + "md5_checksum": "ce2a364ec51a1d6311a319509751266e", + "id": "nmdc:ce2a364ec51a1d6311a319509751266e", + "file_size_bytes": 753147 + }, + { + "_id": { + "$oid": "649b003d1ae706d7b5b14e6e" + }, + "description": "Metagenome Contig Coverage Stats for gold:Gp0127633", + "url": "https://data.microbiomedata.org/data/1781_100335/assembly/mapping_stats.txt", + "file_size_bytes": 20586724, + "type": "nmdc:DataObject", + "id": "nmdc:262d79d1a7606b75f88468b3b9f80b59", + "name": "mapping_stats.txt", + "data_object_type": "Assembly Coverage Stats" + }, + { + "_id": { + "$oid": "649b003d1ae706d7b5b14e6f" + }, + "description": "Assembled contigs fasta for gold:Gp0127633", + "url": "https://data.microbiomedata.org/data/1781_100335/assembly/assembly_contigs.fna", + "file_size_bytes": 151723098, + "type": "nmdc:DataObject", + "id": "nmdc:e04c866a9a015bec110f1235db7223dc", + "name": "assembly_contigs.fna", + "data_object_type": "Assembly Contigs" + }, + { + "_id": { + "$oid": "649b003d1ae706d7b5b14e71" + }, + "description": "Assembled scaffold fasta for gold:Gp0127633", + "url": "https://data.microbiomedata.org/data/1781_100335/assembly/assembly_scaffolds.fna", + "file_size_bytes": 150902924, + "type": "nmdc:DataObject", + "id": "nmdc:bfdd3614128940d958264690470bce14", + "name": "assembly_scaffolds.fna", + "data_object_type": "Assembly Scaffolds" + }, + { + "_id": { + "$oid": "649b003d1ae706d7b5b14e72" + }, + "description": "Metagenome Alignment BAM file for gold:Gp0127633", + "url": "https://data.microbiomedata.org/data/1781_100335/assembly/pairedMapped_sorted.bam", + "file_size_bytes": 1932857393, + "type": "nmdc:DataObject", + "id": "nmdc:49bbdbc432b3c36c0c9196c53f4b952d", + "name": "pairedMapped_sorted.bam", + "data_object_type": "Assembly Coverage BAM" + }, + { + "_id": { + "$oid": "649b003d1ae706d7b5b14e73" + }, + "description": "Assembled AGP file for gold:Gp0127633", + "url": "https://data.microbiomedata.org/data/1781_100335/assembly/assembly.agp", + "file_size_bytes": 18119007, + "type": "nmdc:DataObject", + "id": "nmdc:8d617079209f2f0a15a4752fc68f5e81", + "name": "assembly.agp", + "data_object_type": "Assembly AGP" + }, + { + "_id": { + "$oid": "649b003d1ae706d7b5b15b32" + }, + "id": "nmdc:6496a165a51c3500ed2439270887c660", + "name": "1781_100335.krona.html", + "description": "Gold:Gp0127633 KRONA plot HTML file", + "url": "https://data.microbiomedata.org/1781_100335/ReadbasedAnalysis/centrifuge/1781_100335.krona.html", + "file_size_bytes": 3385, + "data_object_type": "Centrifuge Krona Plot", + "type": "nmdc:DataObject" + }, + { + "_id": { + "$oid": "649b003d1ae706d7b5b15b3c" + }, + "id": "nmdc:432510ad975787c5c15f94f45f1226c4", + "name": "1781_100335.json", + "description": "Gold:Gp0127633 ReadbasedAnalysis result JSON file", + "url": "https://data.microbiomedata.org/1781_100335/ReadbasedAnalysis/1781_100335.json", + "file_size_bytes": 3385, + "type": "nmdc:DataObject" + }, + { + "_id": { + "$oid": "649b003f1ae706d7b5b165df" + }, + "id": "nmdc:f4e28c4b7ce3ff07a6a824da9006df87", + "name": "bins.unbinned.fa", + "description": "unbinned fasta file from metabat2 for gold:Gp0127633", + "file_size_bytes": 34864009, + "url": "https://data.microbiomedata.org/data/1781_100335/img_MAGs/bins.unbinned.fa", + "type": "nmdc:DataObject" + }, + { + "_id": { + "$oid": "649b003f1ae706d7b5b165e1" + }, + "id": "nmdc:4f3514bc849b503f135d5652ae7d867d", + "name": "bins.tooShort.fa", + "description": "tooShort (< 3kb) filtered contigs fasta file by metaBat2 for gold:Gp0127633", + "file_size_bytes": 113729207, + "url": "https://data.microbiomedata.org/data/1781_100335/img_MAGs/bins.tooShort.fa", + "type": "nmdc:DataObject" + }, + { + "_id": { + "$oid": "649b003f1ae706d7b5b165e3" + }, + "id": "nmdc:07a65c967ab1996f34d016aedd3b0451", + "name": "checkm_qa.out", + "description": "metabat2 bin checkm quality assessment result for gold:Gp0127633", + "file_size_bytes": 1092, + "url": "https://data.microbiomedata.org/data/1781_100335/img_MAGs/checkm_qa.out", + "type": "nmdc:DataObject", + "data_object_type": "CheckM Statistics" + }, + { + "_id": { + "$oid": "649b003f1ae706d7b5b165e4" + }, + "id": "nmdc:50b7d0804958ce8d87de9374cc46af89", + "name": "gold:Gp0127633.bins.3.fa", + "description": "metabat2 binned contig file for gold:Gp0127633", + "file_size_bytes": 286733, + "url": "https://data.microbiomedata.org/data/1781_100335/img_MAGs/metabat-bins/bins.3.fa", + "type": "nmdc:DataObject", + "data_object_type": "Metagenome Bins" + }, + { + "_id": { + "$oid": "649b003f1ae706d7b5b165e6" + }, + "id": "nmdc:ca5d7e1b38c31fa6fe1af6931632d74e", + "name": "gold:Gp0127633.bins.2.fa", + "description": "metabat2 binned contig file for gold:Gp0127633", + "file_size_bytes": 269570, + "url": "https://data.microbiomedata.org/data/1781_100335/img_MAGs/metabat-bins/bins.2.fa", + "type": "nmdc:DataObject", + "data_object_type": "Metagenome Bins" + }, + { + "_id": { + "$oid": "649b003f1ae706d7b5b165e8" + }, + "id": "nmdc:cd5403a50c10d21375a7449c9a81d214", + "name": "gold:Gp0127633.bins.1.fa", + "description": "metabat2 binned contig file for gold:Gp0127633", + "file_size_bytes": 229207, + "url": "https://data.microbiomedata.org/data/1781_100335/img_MAGs/metabat-bins/bins.1.fa", + "type": "nmdc:DataObject", + "data_object_type": "Metagenome Bins" + }, + { + "_id": { + "$oid": "649b00401ae706d7b5b16ced" + }, + "description": "KO TSV File for gold:Gp0127633", + "url": "https://data.microbiomedata.org/1781_100335/img_annotation/Ga0482237_ko.tsv", + "md5_checksum": "9ef3a52b2d97cc4afb64e37d04e59865", + "file_size_bytes": 3385, + "id": "nmdc:9ef3a52b2d97cc4afb64e37d04e59865", + "name": "gold:Gp0127633_KO TSV File", + "data_object_type": "Annotation KEGG Orthology", + "type": "nmdc:DataObject" + }, + { + "_id": { + "$oid": "649b00401ae706d7b5b16cf3" + }, + "description": "EC TSV File for gold:Gp0127633", + "url": "https://data.microbiomedata.org/1781_100335/img_annotation/Ga0482237_ec.tsv", + "md5_checksum": "31e2f5b7b055f2959d50a990ebda7ff6", + "file_size_bytes": 3385, + "id": "nmdc:31e2f5b7b055f2959d50a990ebda7ff6", + "name": "gold:Gp0127633_EC TSV File", + "data_object_type": "Annotation Enzyme Commission", + "type": "nmdc:DataObject" + }, + { + "_id": { + "$oid": "649b00401ae706d7b5b16cf8" + }, + "description": "Structural annotation GFF file for gold:Gp0127633", + "url": "https://data.microbiomedata.org/1781_100335/img_annotation/Ga0482237_structural_annotation.gff", + "md5_checksum": "b18381667b4e7401e1bb58e8aede5d4a", + "file_size_bytes": 3385, + "id": "nmdc:b18381667b4e7401e1bb58e8aede5d4a", + "name": "gold:Gp0127633_Structural annotation GFF file", + "data_object_type": "Structural Annotation GFF", + "type": "nmdc:DataObject" + }, + { + "_id": { + "$oid": "649b00401ae706d7b5b16cfa" + }, + "description": "Functional annotation GFF file for gold:Gp0127633", + "url": "https://data.microbiomedata.org/1781_100335/img_annotation/Ga0482237_functional_annotation.gff", + "md5_checksum": "740240c975daffee3e63251fc86cfd33", + "file_size_bytes": 3385, + "id": "nmdc:740240c975daffee3e63251fc86cfd33", + "name": "gold:Gp0127633_Functional annotation GFF file", + "data_object_type": "Functional Annotation GFF", + "type": "nmdc:DataObject" + }, + { + "_id": { + "$oid": "649b00401ae706d7b5b16cfd" + }, + "description": "Protein FAA for gold:Gp0127633", + "url": "https://data.microbiomedata.org/1781_100335/img_annotation/Ga0482237_proteins.faa", + "md5_checksum": "79fd564d59bf9fe4cfb2c771daa84f29", + "file_size_bytes": 3385, + "id": "nmdc:79fd564d59bf9fe4cfb2c771daa84f29", + "name": "gold:Gp0127633_Protein FAA", + "data_object_type": "Annotation Amino Acid FASTA", + "type": "nmdc:DataObject" + } + ], + "mags_activity_set": [ + { + "_id": { + "$oid": "649b0052ec087f6bbab3470e" + }, + "has_input": [ + "nmdc:ea5ca9478871b3e2600e1df0d748cbef", + "nmdc:bc054294600fa310924f104484effd3e", + "nmdc:43069b1146c84c064b7ff334dc9ff100" + ], + "too_short_contig_num": 252383, + "part_of": [ + "nmdc:mga05zvf81" + ], + "binned_contig_num": 738, + "git_url": "https://github.com/microbiomedata/metaG/releases/tag/0.1", + "has_output": [ + "nmdc:d41d8cd98f00b204e9800998ecf8427e", + "nmdc:00415cf72f9a77f907e3467a08b123c5", + "nmdc:83064ec7bfc35a79a1ca76fdd8ad75fd", + "nmdc:3f435d6da551400a4ba4400fa3608e7f", + "nmdc:c66f93153962f8b80c8f3d6978b6d802", + "nmdc:ce2a364ec51a1d6311a319509751266e" + ], + "was_informed_by": "gold:Gp0127633", + "input_contig_num": 272872, + "id": "nmdc:c88a6f4e80cd6159dbbdeaeffbc28f55", + "execution_resource": "NERSC-Cori", + "name": "MAGs Analysis Activity for nmdc:mga05zvf81", + "mags_list": [ + { + "number_of_contig": 83, + "completeness": 0.0, + "bin_name": "bins.1", + "gene_count": 600, + "bin_quality": "LQ", + "gtdbtk_species": "", + "gtdbtk_order": "", + "num_16s": 0, + "gtdbtk_family": "", + "gtdbtk_domain": "", + "contamination": 0.0, + "gtdbtk_class": "", + "gtdbtk_phylum": "", + "num_5s": 0, + "num_23s": 0, + "gtdbtk_genus": "", + "num_t_rna": 5 + }, + { + "number_of_contig": 142, + "completeness": 43.03, + "bin_name": "bins.2", + "gene_count": 746, + "bin_quality": "LQ", + "gtdbtk_species": "", + "gtdbtk_order": "", + "num_16s": 0, + "gtdbtk_family": "", + "gtdbtk_domain": "", + "contamination": 1.72, + "gtdbtk_class": "", + "gtdbtk_phylum": "", + "num_5s": 0, + "num_23s": 0, + "gtdbtk_genus": "", + "num_t_rna": 6 + }, + { + "number_of_contig": 194, + "completeness": 73.62, + "bin_name": "bins.3", + "gene_count": 1844, + "bin_quality": "MQ", + "gtdbtk_species": "", + "gtdbtk_order": "Nitrososphaerales", + "num_16s": 0, + "gtdbtk_family": "Nitrososphaeraceae", + "gtdbtk_domain": "Archaea", + "contamination": 2.43, + "gtdbtk_class": "Nitrososphaeria", + "gtdbtk_phylum": "Crenarchaeota", + "num_5s": 1, + "num_23s": 0, + "gtdbtk_genus": "", + "num_t_rna": 31 + }, + { + "number_of_contig": 91, + "completeness": 10.82, + "bin_name": "bins.4", + "gene_count": 442, + "bin_quality": "LQ", + "gtdbtk_species": "", + "gtdbtk_order": "", + "num_16s": 0, + "gtdbtk_family": "", + "gtdbtk_domain": "", + "contamination": 0.0, + "gtdbtk_class": "", + "gtdbtk_phylum": "", + "num_5s": 0, + "num_23s": 0, + "gtdbtk_genus": "", + "num_t_rna": 2 + }, + { + "number_of_contig": 82, + "completeness": 10.97, + "bin_name": "bins.5", + "gene_count": 385, + "bin_quality": "LQ", + "gtdbtk_species": "", + "gtdbtk_order": "", + "num_16s": 1, + "gtdbtk_family": "", + "gtdbtk_domain": "", + "contamination": 0.0, + "gtdbtk_class": "", + "gtdbtk_phylum": "", + "num_5s": 0, + "num_23s": 0, + "gtdbtk_genus": "", + "num_t_rna": 3 + }, + { + "number_of_contig": 146, + "completeness": 31.6, + "bin_name": "bins.6", + "gene_count": 800, + "bin_quality": "LQ", + "gtdbtk_species": "", + "gtdbtk_order": "", + "num_16s": 1, + "gtdbtk_family": "", + "gtdbtk_domain": "", + "contamination": 1.6, + "gtdbtk_class": "", + "gtdbtk_phylum": "", + "num_5s": 0, + "num_23s": 1, + "gtdbtk_genus": "", + "num_t_rna": 20 + } + ], + "unbinned_contig_num": 19751, + "started_at_time": "2021-10-11T02:24:58Z", + "type": "nmdc:MAGsAnalysisActivity", + "ended_at_time": "2021-10-11T03:40:06+00:00" + } + ], + "metagenome_annotation_activity_set": [ + { + "_id": { + "$oid": "649b005bbf2caae0415ef9b0" + }, + "has_input": [ + "nmdc:ea5ca9478871b3e2600e1df0d748cbef" + ], + "part_of": [ + "nmdc:mga05zvf81" + ], + "git_url": "https://github.com/microbiomedata/metaG/releases/tag/0.1", + "has_output": [ + "nmdc:8defcf55f08cd56d8b2560e27f490ca5", + "nmdc:a6031c0a101419dd413a0804937425ca", + "nmdc:43069b1146c84c064b7ff334dc9ff100", + "nmdc:acc5a2c445dc6e00668c9a5d50aecdb8", + "nmdc:ec91d5d7a8af4fb845e22cbe7ab82bde", + "nmdc:3cd238ff1bb176b7a159aeb34a7c4683", + "nmdc:5103ea2a481ea3b82f1aa98ab7a36998", + "nmdc:8f7429420cbefb9e27bcdbe6252e5288", + "nmdc:6d69127dc30609e4861a7b2443b99164", + "nmdc:00243bcaf50313d937a7685380a876bb", + "nmdc:ec6ffd40772dee9d48dbec0beb6b3321", + "nmdc:907439e314b4f4623244e2cec8532098" + ], + "was_informed_by": "gold:Gp0127633", + "id": "nmdc:c88a6f4e80cd6159dbbdeaeffbc28f55", + "execution_resource": "NERSC-Cori", + "name": "Annotation Activity for nmdc:mga05zvf81", + "started_at_time": "2021-10-11T02:24:58Z", + "type": "nmdc:MetagenomeAnnotationActivity", + "ended_at_time": "2021-10-11T03:40:06+00:00" + } + ], + "metagenome_assembly_set": [ + { + "_id": { + "$oid": "649b005f2ca5ee4adb139f97" + }, + "has_input": [ + "nmdc:7cbd497624d8b60ab2a5e7fdbe4730f2" + ], + "part_of": [ + "nmdc:mga05zvf81" + ], + "ctg_logsum": 378958, + "scaf_logsum": 380592, + "gap_pct": 0.00189, + "git_url": "https://github.com/microbiomedata/metaG/releases/tag/0.1", + "has_output": [ + "nmdc:ea5ca9478871b3e2600e1df0d748cbef", + "nmdc:327e130872e4c5faac2f1c9f8dea2316", + "nmdc:f61f1e62791a38beae95bd95833a6784", + "nmdc:416254a3bfc685dd16c11d65a222305f", + "nmdc:bc054294600fa310924f104484effd3e" + ], + "asm_score": 4.48, + "was_informed_by": "gold:Gp0127633", + "ctg_powsum": 41464, + "scaf_max": 30530, + "id": "nmdc:c88a6f4e80cd6159dbbdeaeffbc28f55", + "scaf_powsum": 41655, + "execution_resource": "NERSC-Cori", + "contigs": 272879, + "name": "Assembly Activity for nmdc:mga05zvf81", + "ctg_max": 30530, + "gc_std": 0.08353, + "contig_bp": 141974737, + "gc_avg": 0.63381, + "started_at_time": "2021-10-11T02:24:58Z", + "scaf_bp": 141977427, + "type": "nmdc:MetagenomeAssembly", + "scaffolds": 272628, + "ended_at_time": "2021-10-11T03:40:06+00:00", + "ctg_l50": 526, + "ctg_l90": 298, + "ctg_n50": 72824, + "ctg_n90": 224178, + "scaf_l50": 527, + "scaf_l90": 298, + "scaf_n50": 72571, + "scaf_n90": 223970 + } + ], + "omics_processing_set": [ + { + "_id": { + "$oid": "649b009773e8249959349b42" + }, + "id": "nmdc:omprc-11-0g415160", + "name": "Riverbed sediment microbial communities from areas with vegetation nearby in Columbia River, Washington, USA - GW-RW S1_30_40", + "description": "Riverbed sediment samples were collected from an area with a lot of surrounding vegetation in a hyporheic zone (subsurface groundwater - surface water mixing zone)", + "has_input": [ + "nmdc:bsm-11-nbgp1x53" + ], + "has_output": [ + "jgi:574fde5e7ded5e3df1ee1401" + ], + "part_of": [ + "nmdc:sty-11-aygzgv51" + ], + "add_date": "2016-01-11", + "mod_date": "2021-06-15", + "ncbi_project_name": "Riverbed sediment microbial communities from areas with vegetation nearby in Columbia River, Washington, USA - GW-RW S1_30_40", + "omics_type": { + "has_raw_value": "Metagenome" + }, + "principal_investigator": { + "has_raw_value": "James Stegen" + }, + "processing_institution": "JGI", + "type": "nmdc:OmicsProcessing", + "gold_sequencing_project_identifiers": [ + "gold:Gp0127633" + ] + } + ], + "read_qc_analysis_activity_set": [ + { + "_id": { + "$oid": "649b009d6bdd4fd20273c867" + }, + "has_input": [ + "nmdc:c0b8d6516c48cfe5a0b110abe67ee983" + ], + "part_of": [ + "nmdc:mga05zvf81" + ], + "git_url": "https://github.com/microbiomedata/metaG/releases/tag/0.1", + "has_output": [ + "nmdc:7cbd497624d8b60ab2a5e7fdbe4730f2", + "nmdc:eccf0501d08f920a88b6598d573a8e3e" + ], + "was_informed_by": "gold:Gp0127633", + "input_read_count": 23291434, + "output_read_bases": 3367024367, + "id": "nmdc:c88a6f4e80cd6159dbbdeaeffbc28f55", + "execution_resource": "NERSC-Cori", + "input_read_bases": 3517006534, + "name": "Read QC Activity for nmdc:mga05zvf81", + "output_read_count": 22556158, + "started_at_time": "2021-10-11T02:24:58Z", + "type": "nmdc:ReadQCAnalysisActivity", + "ended_at_time": "2021-10-11T03:40:06+00:00" + } + ], + "read_based_taxonomy_analysis_activity_set": [ + { + "_id": { + "$oid": "649b009bff710ae353f8cf29" + }, + "has_input": [ + "nmdc:7cbd497624d8b60ab2a5e7fdbe4730f2" + ], + "git_url": "https://github.com/microbiomedata/metaG/releases/tag/0.1", + "has_output": [ + "nmdc:8bd9eb762acabbac5d079c379c28e381", + "nmdc:77351dd18ca40e5552ac1380ba94acbf", + "nmdc:f445af1a7774572d156f55a898d26f09", + "nmdc:e11fcbf66318878c05984fa3d893e3b7", + "nmdc:28beb8baabdaf346f2066b40f375a152", + "nmdc:1f74a43724c4afed5563499d05601e22", + "nmdc:4825177c6d0a8b67db82e6070cfbc35f", + "nmdc:275268a6b5aca33c427d11877bcfa674", + "nmdc:89e810af4915f0e117eaa60550587453" + ], + "was_informed_by": "gold:Gp0127633", + "id": "nmdc:c88a6f4e80cd6159dbbdeaeffbc28f55", + "execution_resource": "NERSC-Cori", + "name": "ReadBased Analysis Activity for nmdc:mga05zvf81", + "started_at_time": "2021-10-11T02:24:58Z", + "type": "nmdc:ReadbasedAnalysis", + "ended_at_time": "2021-10-11T03:40:06+00:00" + } + ] + }, + { + "data_object_set": [ + { + "description": "Raw sequencer read data", + "file_size_bytes": 1875083220, + "type": "nmdc:DataObject", + "id": "jgi:574fe0967ded5e3df1ee1482", + "name": "10533.3.165334.GCCTTGT-AACAAGG.fastq.gz" + }, + { + "name": "Gp0127627_Filtered Reads", + "description": "Filtered Reads for Gp0127627", + "data_object_type": "Filtered Sequencing Reads", + "url": "https://data.microbiomedata.org/data/nmdc:mga0daby71/qa/nmdc_mga0daby71_filtered.fastq.gz", + "md5_checksum": "ed95796b3fd964c6bedb141d70737ebf", + "id": "nmdc:ed95796b3fd964c6bedb141d70737ebf", + "file_size_bytes": 1752924191 + }, + { + "name": "Gp0127627_Filtered Stats", + "description": "Filtered Stats for Gp0127627", + "data_object_type": "QC Statistics", + "url": "https://data.microbiomedata.org/data/nmdc:mga0daby71/qa/nmdc_mga0daby71_filterStats.txt", + "md5_checksum": "308ae373809697291bbc7947a1e4ed2d", + "id": "nmdc:308ae373809697291bbc7947a1e4ed2d", + "file_size_bytes": 281 + }, + { + "name": "Gp0127627_Gottcha2 TSV report", + "description": "Gottcha2 TSV report for Gp0127627", + "url": "https://data.microbiomedata.org/data/nmdc:mga0daby71/ReadbasedAnalysis/nmdc_mga0daby71_gottcha2_report.tsv", + "md5_checksum": "a5ac6665e5d66242b1c885a911236982", + "id": "nmdc:a5ac6665e5d66242b1c885a911236982", + "file_size_bytes": 5530 + }, + { + "name": "Gp0127627_Gottcha2 full TSV report", + "description": "Gottcha2 full TSV report for Gp0127627", + "url": "https://data.microbiomedata.org/data/nmdc:mga0daby71/ReadbasedAnalysis/nmdc_mga0daby71_gottcha2_report_full.tsv", + "md5_checksum": "d19478a191693d643157a89c69cc02d1", + "id": "nmdc:d19478a191693d643157a89c69cc02d1", + "file_size_bytes": 825047 + }, + { + "name": "Gp0127627_Gottcha2 Krona HTML report", + "description": "Gottcha2 Krona HTML report for Gp0127627", + "data_object_type": "GOTTCHA2 Krona Plot", + "url": "https://data.microbiomedata.org/data/nmdc:mga0daby71/ReadbasedAnalysis/nmdc_mga0daby71_gottcha2_krona.html", + "md5_checksum": "679a82699663e88a5e8828ee081fa967", + "id": "nmdc:679a82699663e88a5e8828ee081fa967", + "file_size_bytes": 241114 + }, + { + "name": "Gp0127627_Centrifuge classification TSV report", + "description": "Centrifuge classification TSV report for Gp0127627", + "data_object_type": "Centrifuge Taxonomic Classification", + "url": "https://data.microbiomedata.org/data/nmdc:mga0daby71/ReadbasedAnalysis/nmdc_mga0daby71_centrifuge_classification.tsv", + "md5_checksum": "95b3150e6fb62195c1e5ebf06f87c7d5", + "id": "nmdc:95b3150e6fb62195c1e5ebf06f87c7d5", + "file_size_bytes": 1463660267 + }, + { + "name": "Gp0127627_Centrifuge TSV report", + "description": "Centrifuge TSV report for Gp0127627", + "data_object_type": "Centrifuge Classification Report", + "url": "https://data.microbiomedata.org/data/nmdc:mga0daby71/ReadbasedAnalysis/nmdc_mga0daby71_centrifuge_report.tsv", + "md5_checksum": "0380e478962be82e0d97a6339f7f3b91", + "id": "nmdc:0380e478962be82e0d97a6339f7f3b91", + "file_size_bytes": 254347 + }, + { + "name": "Gp0127627_Centrifuge Krona HTML report", + "description": "Centrifuge Krona HTML report for Gp0127627", + "data_object_type": "Centrifuge Krona Plot", + "url": "https://data.microbiomedata.org/data/nmdc:mga0daby71/ReadbasedAnalysis/nmdc_mga0daby71_centrifuge_krona.html", + "md5_checksum": "0c1d139abdfa9fa10f26923abb4d6bda", + "id": "nmdc:0c1d139abdfa9fa10f26923abb4d6bda", + "file_size_bytes": 2330603 + }, + { + "name": "Gp0127627_Kraken classification TSV report", + "description": "Kraken classification TSV report for Gp0127627", + "data_object_type": "Kraken2 Taxonomic Classification", + "url": "https://data.microbiomedata.org/data/nmdc:mga0daby71/ReadbasedAnalysis/nmdc_mga0daby71_kraken2_classification.tsv", + "md5_checksum": "f388f7f0d79d0b2bbec1c3c0c5641814", + "id": "nmdc:f388f7f0d79d0b2bbec1c3c0c5641814", + "file_size_bytes": 1177609473 + }, + { + "name": "Gp0127627_Kraken2 TSV report", + "description": "Kraken2 TSV report for Gp0127627", + "data_object_type": "Kraken2 Classification Report", + "url": "https://data.microbiomedata.org/data/nmdc:mga0daby71/ReadbasedAnalysis/nmdc_mga0daby71_kraken2_report.tsv", + "md5_checksum": "a2a0029691c04851f4a98003a773fe3f", + "id": "nmdc:a2a0029691c04851f4a98003a773fe3f", + "file_size_bytes": 643281 + }, + { + "name": "Gp0127627_Kraken2 Krona HTML report", + "description": "Kraken2 Krona HTML report for Gp0127627", + "data_object_type": "Kraken2 Krona Plot", + "url": "https://data.microbiomedata.org/data/nmdc:mga0daby71/ReadbasedAnalysis/nmdc_mga0daby71_kraken2_krona.html", + "md5_checksum": "bab24ab64ad432d115f182df7198d46e", + "id": "nmdc:bab24ab64ad432d115f182df7198d46e", + "file_size_bytes": 3926756 + }, + { + "name": "Gp0127627_Assembled contigs fasta", + "description": "Assembled contigs fasta for Gp0127627", + "data_object_type": "Assembly Contigs", + "url": "https://data.microbiomedata.org/data/nmdc:mga0daby71/assembly/nmdc_mga0daby71_contigs.fna", + "md5_checksum": "a7db57faea894bec6603a69abfdfcf7d", + "id": "nmdc:a7db57faea894bec6603a69abfdfcf7d", + "file_size_bytes": 19853676 + }, + { + "name": "Gp0127627_Assembled scaffold fasta", + "description": "Assembled scaffold fasta for Gp0127627", + "data_object_type": "Assembly Scaffolds", + "url": "https://data.microbiomedata.org/data/nmdc:mga0daby71/assembly/nmdc_mga0daby71_scaffolds.fna", + "md5_checksum": "8e798fcdd761feff51cab6a9c97ed7ae", + "id": "nmdc:8e798fcdd761feff51cab6a9c97ed7ae", + "file_size_bytes": 19699986 + }, + { + "name": "Gp0127627_Metagenome Contig Coverage Stats", + "description": "Metagenome Contig Coverage Stats for Gp0127627", + "url": "https://data.microbiomedata.org/data/nmdc:mga0daby71/assembly/nmdc_mga0daby71_covstats.txt", + "md5_checksum": "0d3200307a90e23525d3fefa7a25f867", + "id": "nmdc:0d3200307a90e23525d3fefa7a25f867", + "file_size_bytes": 3997845 + }, + { + "name": "Gp0127627_Assembled AGP file", + "description": "Assembled AGP file for Gp0127627", + "data_object_type": "Assembly AGP", + "url": "https://data.microbiomedata.org/data/nmdc:mga0daby71/assembly/nmdc_mga0daby71_assembly.agp", + "md5_checksum": "e6e7f40bb1f1e333904f20dc3c317e37", + "id": "nmdc:e6e7f40bb1f1e333904f20dc3c317e37", + "file_size_bytes": 3715901 + }, + { + "name": "Gp0127627_Metagenome Alignment BAM file", + "description": "Metagenome Alignment BAM file for Gp0127627", + "data_object_type": "Assembly Coverage BAM", + "url": "https://data.microbiomedata.org/data/nmdc:mga0daby71/assembly/nmdc_mga0daby71_pairedMapped_sorted.bam", + "md5_checksum": "08f1ba3d3d380a167182c1beb7da304f", + "id": "nmdc:08f1ba3d3d380a167182c1beb7da304f", + "file_size_bytes": 1854522814 + }, + { + "name": "Gp0127627_Protein FAA", + "description": "Protein FAA for Gp0127627", + "data_object_type": "Annotation Amino Acid FASTA", + "url": "https://data.microbiomedata.org/data/nmdc:mga0daby71/annotation/nmdc_mga0daby71_proteins.faa", + "md5_checksum": "6003e73aa18ac6aa3cc0f7e020c7170e", + "id": "nmdc:6003e73aa18ac6aa3cc0f7e020c7170e", + "file_size_bytes": 12141650 + }, + { + "name": "Gp0127627_Structural annotation GFF file", + "description": "Structural annotation GFF file for Gp0127627", + "data_object_type": "Structural Annotation GFF", + "url": "https://data.microbiomedata.org/data/nmdc:mga0daby71/annotation/nmdc_mga0daby71_structural_annotation.gff", + "md5_checksum": "7e5852b8ca5590f81c543ea69398410f", + "id": "nmdc:7e5852b8ca5590f81c543ea69398410f", + "file_size_bytes": 8716031 + }, + { + "name": "Gp0127627_Functional annotation GFF file", + "description": "Functional annotation GFF file for Gp0127627", + "data_object_type": "Functional Annotation GFF", + "url": "https://data.microbiomedata.org/data/nmdc:mga0daby71/annotation/nmdc_mga0daby71_functional_annotation.gff", + "md5_checksum": "cf868630ca2d9037e69e82cfb76a7bd7", + "id": "nmdc:cf868630ca2d9037e69e82cfb76a7bd7", + "file_size_bytes": 14995284 + }, + { + "name": "Gp0127627_KO TSV file", + "description": "KO TSV file for Gp0127627", + "data_object_type": "Annotation KEGG Orthology", + "url": "https://data.microbiomedata.org/data/nmdc:mga0daby71/annotation/nmdc_mga0daby71_ko.tsv", + "md5_checksum": "9e52b5a16f0eff5df36bd46038702a52", + "id": "nmdc:9e52b5a16f0eff5df36bd46038702a52", + "file_size_bytes": 1782540 + }, + { + "name": "Gp0127627_EC TSV file", + "description": "EC TSV file for Gp0127627", + "data_object_type": "Annotation Enzyme Commission", + "url": "https://data.microbiomedata.org/data/nmdc:mga0daby71/annotation/nmdc_mga0daby71_ec.tsv", + "md5_checksum": "c44dceb1684f1a4249e7b8e944a2b7cf", + "id": "nmdc:c44dceb1684f1a4249e7b8e944a2b7cf", + "file_size_bytes": 1180943 + }, + { + "name": "Gp0127627_COG GFF file", + "description": "COG GFF file for Gp0127627", + "url": "https://data.microbiomedata.org/data/nmdc:mga0daby71/annotation/nmdc_mga0daby71_cog.gff", + "md5_checksum": "4a788566d47b89e8bc79eea6e26f2c42", + "id": "nmdc:4a788566d47b89e8bc79eea6e26f2c42", + "file_size_bytes": 8144598 + }, + { + "name": "Gp0127627_PFAM GFF file", + "description": "PFAM GFF file for Gp0127627", + "url": "https://data.microbiomedata.org/data/nmdc:mga0daby71/annotation/nmdc_mga0daby71_pfam.gff", + "md5_checksum": "3d01f11a480f59cefdc67e7b6c7f9fc6", + "id": "nmdc:3d01f11a480f59cefdc67e7b6c7f9fc6", + "file_size_bytes": 5854816 + }, + { + "name": "Gp0127627_TigrFam GFF file", + "description": "TigrFam GFF file for Gp0127627", + "url": "https://data.microbiomedata.org/data/nmdc:mga0daby71/annotation/nmdc_mga0daby71_tigrfam.gff", + "md5_checksum": "196a8e27999a32a6168d23f30d84f37b", + "id": "nmdc:196a8e27999a32a6168d23f30d84f37b", + "file_size_bytes": 549612 + }, + { + "name": "Gp0127627_SMART GFF file", + "description": "SMART GFF file for Gp0127627", + "url": "https://data.microbiomedata.org/data/nmdc:mga0daby71/annotation/nmdc_mga0daby71_smart.gff", + "md5_checksum": "c3040fe67c2c8b2924c6db6c53b268ce", + "id": "nmdc:c3040fe67c2c8b2924c6db6c53b268ce", + "file_size_bytes": 1739035 + }, + { + "name": "Gp0127627_SuperFam GFF file", + "description": "SuperFam GFF file for Gp0127627", + "url": "https://data.microbiomedata.org/data/nmdc:mga0daby71/annotation/nmdc_mga0daby71_supfam.gff", + "md5_checksum": "5594ce118ad4b2f9ec03adc10ebb6267", + "id": "nmdc:5594ce118ad4b2f9ec03adc10ebb6267", + "file_size_bytes": 10326655 + }, + { + "name": "Gp0127627_Cath FunFam GFF file", + "description": "Cath FunFam GFF file for Gp0127627", + "url": "https://data.microbiomedata.org/data/nmdc:mga0daby71/annotation/nmdc_mga0daby71_cath_funfam.gff", + "md5_checksum": "9d2ac6550f5a1dc3d4b3743e8fe2ceec", + "id": "nmdc:9d2ac6550f5a1dc3d4b3743e8fe2ceec", + "file_size_bytes": 7571959 + }, + { + "name": "Gp0127627_KO_EC GFF file", + "description": "KO_EC GFF file for Gp0127627", + "url": "https://data.microbiomedata.org/data/nmdc:mga0daby71/annotation/nmdc_mga0daby71_ko_ec.gff", + "md5_checksum": "2c73a261047ff94b898c190418373075", + "id": "nmdc:2c73a261047ff94b898c190418373075", + "file_size_bytes": 5683569 + }, + { + "name": "gold:Gp0452679_metabat2 bin checkm quality assessment result", + "description": "metabat2 bin checkm quality assessment result for gold:Gp0452679", + "data_object_type": "CheckM Statistics", + "url": "https://data.microbiomedata.org/data/nmdc:mga0fsjy84/MAGs/nmdc_mga0fsjy84_checkm_qa.out", + "md5_checksum": "d41d8cd98f00b204e9800998ecf8427e", + "id": "nmdc:d41d8cd98f00b204e9800998ecf8427e", + "file_size_bytes": 0 + }, + { + "name": "Gp0127627_high-quality and medium-quality bins", + "description": "high-quality and medium-quality bins for Gp0127627", + "data_object_type": "Metagenome Bins", + "url": "https://data.microbiomedata.org/data/nmdc:mga0daby71/MAGs/nmdc_mga0daby71_hqmq_bin.zip", + "md5_checksum": "ba468a2c4f4810d87ba95ad9e123483d", + "id": "nmdc:ba468a2c4f4810d87ba95ad9e123483d", + "file_size_bytes": 182 + }, + { + "_id": { + "$oid": "649b003d1ae706d7b5b14e58" + }, + "description": "Assembled contigs fasta for gold:Gp0127627", + "url": "https://data.microbiomedata.org/data/1781_100329/assembly/assembly_contigs.fna", + "file_size_bytes": 19648924, + "type": "nmdc:DataObject", + "id": "nmdc:245e4bf7ae2d630d26223054f851e31c", + "name": "assembly_contigs.fna", + "data_object_type": "Assembly Contigs" + }, + { + "_id": { + "$oid": "649b003d1ae706d7b5b14e59" + }, + "description": "Metagenome Contig Coverage Stats for gold:Gp0127627", + "url": "https://data.microbiomedata.org/data/1781_100329/assembly/mapping_stats.txt", + "file_size_bytes": 3793093, + "type": "nmdc:DataObject", + "id": "nmdc:53931f648c95c33e09552eb092065622", + "name": "mapping_stats.txt", + "data_object_type": "Assembly Coverage Stats" + }, + { + "_id": { + "$oid": "649b003d1ae706d7b5b14e5a" + }, + "description": "Assembled AGP file for gold:Gp0127627", + "url": "https://data.microbiomedata.org/data/1781_100329/assembly/assembly.agp", + "file_size_bytes": 3306333, + "type": "nmdc:DataObject", + "id": "nmdc:75f11f70792c4e6055068a31d0b8f64b", + "name": "assembly.agp", + "data_object_type": "Assembly AGP" + }, + { + "_id": { + "$oid": "649b003d1ae706d7b5b14e5b" + }, + "description": "Assembled scaffold fasta for gold:Gp0127627", + "url": "https://data.microbiomedata.org/data/1781_100329/assembly/assembly_scaffolds.fna", + "file_size_bytes": 19495266, + "type": "nmdc:DataObject", + "id": "nmdc:06427cb05246b5573ed4b85f93c0f155", + "name": "assembly_scaffolds.fna", + "data_object_type": "Assembly Scaffolds" + }, + { + "_id": { + "$oid": "649b003d1ae706d7b5b14e5f" + }, + "description": "Metagenome Alignment BAM file for gold:Gp0127627", + "url": "https://data.microbiomedata.org/data/1781_100329/assembly/pairedMapped_sorted.bam", + "file_size_bytes": 1829579569, + "type": "nmdc:DataObject", + "id": "nmdc:7d0f0b73c319579aac90fa171f8d77d2", + "name": "pairedMapped_sorted.bam", + "data_object_type": "Assembly Coverage BAM" + }, + { + "_id": { + "$oid": "649b003d1ae706d7b5b15afe" + }, + "id": "nmdc:84280bb9e2ed61950aca03e7a5248bf0", + "name": "1781_100329.krona.html", + "description": "Gold:Gp0127627 KRONA plot HTML file", + "url": "https://data.microbiomedata.org/1781_100329/ReadbasedAnalysis/centrifuge/1781_100329.krona.html", + "file_size_bytes": 3385, + "data_object_type": "Centrifuge Krona Plot", + "type": "nmdc:DataObject" + }, + { + "_id": { + "$oid": "649b003d1ae706d7b5b15b01" + }, + "id": "nmdc:1b842adedef085708050a71c63cbccb3", + "name": "1781_100329.json", + "description": "Gold:Gp0127627 ReadbasedAnalysis result JSON file", + "url": "https://data.microbiomedata.org/1781_100329/ReadbasedAnalysis/1781_100329.json", + "file_size_bytes": 3385, + "type": "nmdc:DataObject" + }, + { + "_id": { + "$oid": "649b003f1ae706d7b5b165d0" + }, + "id": "nmdc:0ab7113a0f5362f23f64b4b7cd7abcb8", + "name": "bins.tooShort.fa", + "description": "tooShort (< 3kb) filtered contigs fasta file by metaBat2 for gold:Gp0127627", + "file_size_bytes": 18589862, + "url": "https://data.microbiomedata.org/data/1781_100329/img_MAGs/bins.tooShort.fa", + "type": "nmdc:DataObject" + }, + { + "_id": { + "$oid": "649b003f1ae706d7b5b165d2" + }, + "id": "nmdc:ec364473e457f915d5fe7fb700c210cd", + "name": "bins.unbinned.fa", + "description": "unbinned fasta file from metabat2 for gold:Gp0127627", + "file_size_bytes": 609489, + "url": "https://data.microbiomedata.org/data/1781_100329/img_MAGs/bins.unbinned.fa", + "type": "nmdc:DataObject" + }, + { + "_id": { + "$oid": "649b003f1ae706d7b5b16cd1" + }, + "description": "EC TSV File for gold:Gp0127627", + "url": "https://data.microbiomedata.org/1781_100329/img_annotation/Ga0482243_ec.tsv", + "md5_checksum": "4c97ec34649fc995f167408bd39c9998", + "file_size_bytes": 3385, + "id": "nmdc:4c97ec34649fc995f167408bd39c9998", + "name": "gold:Gp0127627_EC TSV File", + "data_object_type": "Annotation Enzyme Commission", + "type": "nmdc:DataObject" + }, + { + "_id": { + "$oid": "649b003f1ae706d7b5b16cd3" + }, + "description": "Functional annotation GFF file for gold:Gp0127627", + "url": "https://data.microbiomedata.org/1781_100329/img_annotation/Ga0482243_functional_annotation.gff", + "md5_checksum": "6c96999ab72498624aae8bb9b0bfbc66", + "file_size_bytes": 3385, + "id": "nmdc:6c96999ab72498624aae8bb9b0bfbc66", + "name": "gold:Gp0127627_Functional annotation GFF file", + "data_object_type": "Functional Annotation GFF", + "type": "nmdc:DataObject" + }, + { + "_id": { + "$oid": "649b003f1ae706d7b5b16cd5" + }, + "description": "KO TSV File for gold:Gp0127627", + "url": "https://data.microbiomedata.org/1781_100329/img_annotation/Ga0482243_ko.tsv", + "md5_checksum": "874ae45fc2a007a7d5f9ff964fa8117a", + "file_size_bytes": 3385, + "id": "nmdc:874ae45fc2a007a7d5f9ff964fa8117a", + "name": "gold:Gp0127627_KO TSV File", + "data_object_type": "Annotation KEGG Orthology", + "type": "nmdc:DataObject" + }, + { + "_id": { + "$oid": "649b003f1ae706d7b5b16cd6" + }, + "description": "Structural annotation GFF file for gold:Gp0127627", + "url": "https://data.microbiomedata.org/1781_100329/img_annotation/Ga0482243_structural_annotation.gff", + "md5_checksum": "48ab9737528d088ffde37b733e3f728f", + "file_size_bytes": 3385, + "id": "nmdc:48ab9737528d088ffde37b733e3f728f", + "name": "gold:Gp0127627_Structural annotation GFF file", + "data_object_type": "Structural Annotation GFF", + "type": "nmdc:DataObject" + }, + { + "_id": { + "$oid": "649b003f1ae706d7b5b16cdd" + }, + "description": "Protein FAA for gold:Gp0127627", + "url": "https://data.microbiomedata.org/1781_100329/img_annotation/Ga0482243_proteins.faa", + "md5_checksum": "fec0b3842897bbce9166a628c4c2d7a0", + "file_size_bytes": 3385, + "id": "nmdc:fec0b3842897bbce9166a628c4c2d7a0", + "name": "gold:Gp0127627_Protein FAA", + "data_object_type": "Annotation Amino Acid FASTA", + "type": "nmdc:DataObject" + } + ], + "mags_activity_set": [ + { + "_id": { + "$oid": "649b0052ec087f6bbab3470b" + }, + "has_input": [ + "nmdc:a7db57faea894bec6603a69abfdfcf7d", + "nmdc:08f1ba3d3d380a167182c1beb7da304f", + "nmdc:cf868630ca2d9037e69e82cfb76a7bd7" + ], + "too_short_contig_num": 50792, + "part_of": [ + "nmdc:mga0daby71" + ], + "git_url": "https://github.com/microbiomedata/metaG/releases/tag/0.1", + "has_output": [ + "nmdc:d41d8cd98f00b204e9800998ecf8427e", + "nmdc:ba468a2c4f4810d87ba95ad9e123483d" + ], + "was_informed_by": "gold:Gp0127627", + "input_contig_num": 51188, + "id": "nmdc:ecaab4b51de694b35269756fd7e31ed9", + "execution_resource": "NERSC-Cori", + "name": "MAGs Analysis Activity for nmdc:mga0daby71", + "unbinned_contig_num": 396, + "started_at_time": "2021-11-13T18:47:34Z", + "type": "nmdc:MAGsAnalysisActivity", + "ended_at_time": "2021-11-13T19:08:49+00:00" + } + ], + "metagenome_annotation_activity_set": [ + { + "_id": { + "$oid": "649b005bbf2caae0415ef9ab" + }, + "has_input": [ + "nmdc:a7db57faea894bec6603a69abfdfcf7d" + ], + "part_of": [ + "nmdc:mga0daby71" + ], + "git_url": "https://github.com/microbiomedata/metaG/releases/tag/0.1", + "has_output": [ + "nmdc:6003e73aa18ac6aa3cc0f7e020c7170e", + "nmdc:7e5852b8ca5590f81c543ea69398410f", + "nmdc:cf868630ca2d9037e69e82cfb76a7bd7", + "nmdc:9e52b5a16f0eff5df36bd46038702a52", + "nmdc:c44dceb1684f1a4249e7b8e944a2b7cf", + "nmdc:4a788566d47b89e8bc79eea6e26f2c42", + "nmdc:3d01f11a480f59cefdc67e7b6c7f9fc6", + "nmdc:196a8e27999a32a6168d23f30d84f37b", + "nmdc:c3040fe67c2c8b2924c6db6c53b268ce", + "nmdc:5594ce118ad4b2f9ec03adc10ebb6267", + "nmdc:9d2ac6550f5a1dc3d4b3743e8fe2ceec", + "nmdc:2c73a261047ff94b898c190418373075" + ], + "was_informed_by": "gold:Gp0127627", + "id": "nmdc:ecaab4b51de694b35269756fd7e31ed9", + "execution_resource": "NERSC-Cori", + "name": "Annotation Activity for nmdc:mga0daby71", + "started_at_time": "2021-11-13T18:47:34Z", + "type": "nmdc:MetagenomeAnnotationActivity", + "ended_at_time": "2021-11-13T19:08:49+00:00" + } + ], + "metagenome_assembly_set": [ + { + "_id": { + "$oid": "649b005f2ca5ee4adb139f99" + }, + "has_input": [ + "nmdc:ed95796b3fd964c6bedb141d70737ebf" + ], + "part_of": [ + "nmdc:mga0daby71" + ], + "ctg_logsum": 6346.305, + "scaf_logsum": 6368.36, + "gap_pct": 0.00044, + "git_url": "https://github.com/microbiomedata/metaG/releases/tag/0.1", + "has_output": [ + "nmdc:a7db57faea894bec6603a69abfdfcf7d", + "nmdc:8e798fcdd761feff51cab6a9c97ed7ae", + "nmdc:0d3200307a90e23525d3fefa7a25f867", + "nmdc:e6e7f40bb1f1e333904f20dc3c317e37", + "nmdc:08f1ba3d3d380a167182c1beb7da304f" + ], + "asm_score": 4.807, + "was_informed_by": "gold:Gp0127627", + "ctg_powsum": 681.483, + "scaf_max": 15604, + "id": "nmdc:ecaab4b51de694b35269756fd7e31ed9", + "scaf_powsum": 683.717, + "execution_resource": "NERSC-Cori", + "contigs": 51188, + "name": "Assembly Activity for nmdc:mga0daby71", + "ctg_max": 15604, + "gc_std": 0.11462, + "gc_avg": 0.57328, + "contig_bp": 18008171, + "started_at_time": "2021-11-13T18:47:34Z", + "scaf_bp": 18008251, + "type": "nmdc:MetagenomeAssembly", + "scaffolds": 51180, + "ended_at_time": "2021-11-13T19:08:49+00:00", + "ctg_l50": 321, + "ctg_l90": 282, + "ctg_n50": 20415, + "ctg_n90": 44756, + "scaf_l50": 321, + "scaf_l90": 282, + "scaf_n50": 20413, + "scaf_n90": 44748 + } + ], + "omics_processing_set": [ + { + "_id": { + "$oid": "649b009773e8249959349b43" + }, + "id": "nmdc:omprc-11-z5qv0f24", + "name": "Riverbed sediment microbial communities from areas with vegetation nearby in Columbia River, Washington, USA - GW-RW S1_40_50", + "description": "Riverbed sediment samples were collected from an area with a lot of surrounding vegetation in a hyporheic zone (subsurface groundwater - surface water mixing zone)", + "has_input": [ + "nmdc:bsm-11-v0q5ak63" + ], + "has_output": [ + "jgi:574fe0967ded5e3df1ee1482" + ], + "part_of": [ + "nmdc:sty-11-aygzgv51" + ], + "add_date": "2016-01-11", + "mod_date": "2021-06-15", + "ncbi_project_name": "Riverbed sediment microbial communities from areas with vegetation nearby in Columbia River, Washington, USA - GW-RW S1_40_50", + "omics_type": { + "has_raw_value": "Metagenome" + }, + "principal_investigator": { + "has_raw_value": "James Stegen" + }, + "processing_institution": "JGI", + "type": "nmdc:OmicsProcessing", + "gold_sequencing_project_identifiers": [ + "gold:Gp0127627" + ] + } + ], + "read_qc_analysis_activity_set": [ + { + "_id": { + "$oid": "649b009d6bdd4fd20273c866" + }, + "has_input": [ + "nmdc:45f15cded08bad75a2ef9d7e4b1f42de" + ], + "part_of": [ + "nmdc:mga0daby71" + ], + "git_url": "https://github.com/microbiomedata/metaG/releases/tag/0.1", + "has_output": [ + "nmdc:ed95796b3fd964c6bedb141d70737ebf", + "nmdc:308ae373809697291bbc7947a1e4ed2d" + ], + "was_informed_by": "gold:Gp0127627", + "input_read_count": 20505370, + "output_read_bases": 2992084693, + "id": "nmdc:ecaab4b51de694b35269756fd7e31ed9", + "execution_resource": "NERSC-Cori", + "input_read_bases": 3096310870, + "name": "Read QC Activity for nmdc:mga0daby71", + "output_read_count": 19995028, + "started_at_time": "2021-11-13T18:47:34Z", + "type": "nmdc:ReadQCAnalysisActivity", + "ended_at_time": "2021-11-13T19:08:49+00:00" + } + ], + "read_based_taxonomy_analysis_activity_set": [ + { + "_id": { + "$oid": "649b009bff710ae353f8cf2a" + }, + "has_input": [ + "nmdc:ed95796b3fd964c6bedb141d70737ebf" + ], + "git_url": "https://github.com/microbiomedata/metaG/releases/tag/0.1", + "has_output": [ + "nmdc:a5ac6665e5d66242b1c885a911236982", + "nmdc:d19478a191693d643157a89c69cc02d1", + "nmdc:679a82699663e88a5e8828ee081fa967", + "nmdc:95b3150e6fb62195c1e5ebf06f87c7d5", + "nmdc:0380e478962be82e0d97a6339f7f3b91", + "nmdc:0c1d139abdfa9fa10f26923abb4d6bda", + "nmdc:f388f7f0d79d0b2bbec1c3c0c5641814", + "nmdc:a2a0029691c04851f4a98003a773fe3f", + "nmdc:bab24ab64ad432d115f182df7198d46e" + ], + "was_informed_by": "gold:Gp0127627", + "id": "nmdc:ecaab4b51de694b35269756fd7e31ed9", + "execution_resource": "NERSC-Cori", + "name": "ReadBased Analysis Activity for nmdc:mga0daby71", + "started_at_time": "2021-11-13T18:47:34Z", + "type": "nmdc:ReadbasedAnalysis", + "ended_at_time": "2021-11-13T19:08:49+00:00" + } + ] + }, + { + "data_object_set": [ + { + "description": "Raw sequencer read data", + "file_size_bytes": 2456584646, + "type": "nmdc:DataObject", + "id": "jgi:574fde807ded5e3df1ee141b", + "name": "10533.2.165322.AGCTAAC-GGTTAGC.fastq.gz" + }, + { + "name": "Gp0127632_Filtered Reads", + "description": "Filtered Reads for Gp0127632", + "data_object_type": "Filtered Sequencing Reads", + "url": "https://data.microbiomedata.org/data/nmdc:mga0b6cy30/qa/nmdc_mga0b6cy30_filtered.fastq.gz", + "md5_checksum": "a43bfb55389206c2fc5ddb53e6aa2bc6", + "id": "nmdc:a43bfb55389206c2fc5ddb53e6aa2bc6", + "file_size_bytes": 2199178772 + }, + { + "name": "Gp0127632_Filtered Stats", + "description": "Filtered Stats for Gp0127632", + "data_object_type": "QC Statistics", + "url": "https://data.microbiomedata.org/data/nmdc:mga0b6cy30/qa/nmdc_mga0b6cy30_filterStats.txt", + "md5_checksum": "919c5aade4fffb76f743a33b035b2839", + "id": "nmdc:919c5aade4fffb76f743a33b035b2839", + "file_size_bytes": 289 + }, + { + "name": "Gp0127632_Gottcha2 TSV report", + "description": "Gottcha2 TSV report for Gp0127632", + "url": "https://data.microbiomedata.org/data/nmdc:mga0b6cy30/ReadbasedAnalysis/nmdc_mga0b6cy30_gottcha2_report.tsv", + "md5_checksum": "3e583cccbbc068e0879ba6618bb6407c", + "id": "nmdc:3e583cccbbc068e0879ba6618bb6407c", + "file_size_bytes": 2899 + }, + { + "name": "Gp0127632_Gottcha2 full TSV report", + "description": "Gottcha2 full TSV report for Gp0127632", + "url": "https://data.microbiomedata.org/data/nmdc:mga0b6cy30/ReadbasedAnalysis/nmdc_mga0b6cy30_gottcha2_report_full.tsv", + "md5_checksum": "6c54105711e818c4d8169ab595b05efe", + "id": "nmdc:6c54105711e818c4d8169ab595b05efe", + "file_size_bytes": 769416 + }, + { + "name": "Gp0127632_Gottcha2 Krona HTML report", + "description": "Gottcha2 Krona HTML report for Gp0127632", + "data_object_type": "GOTTCHA2 Krona Plot", + "url": "https://data.microbiomedata.org/data/nmdc:mga0b6cy30/ReadbasedAnalysis/nmdc_mga0b6cy30_gottcha2_krona.html", + "md5_checksum": "adb155cdb656648496484998a62fb96f", + "id": "nmdc:adb155cdb656648496484998a62fb96f", + "file_size_bytes": 235384 + }, + { + "name": "Gp0127632_Centrifuge classification TSV report", + "description": "Centrifuge classification TSV report for Gp0127632", + "data_object_type": "Centrifuge Taxonomic Classification", + "url": "https://data.microbiomedata.org/data/nmdc:mga0b6cy30/ReadbasedAnalysis/nmdc_mga0b6cy30_centrifuge_classification.tsv", + "md5_checksum": "0a03ac5737750a3b336e7299e9f01ead", + "id": "nmdc:0a03ac5737750a3b336e7299e9f01ead", + "file_size_bytes": 1917130445 + }, + { + "name": "Gp0127632_Centrifuge TSV report", + "description": "Centrifuge TSV report for Gp0127632", + "data_object_type": "Centrifuge Classification Report", + "url": "https://data.microbiomedata.org/data/nmdc:mga0b6cy30/ReadbasedAnalysis/nmdc_mga0b6cy30_centrifuge_report.tsv", + "md5_checksum": "f345b3a57c37097a860e38d5e83835b8", + "id": "nmdc:f345b3a57c37097a860e38d5e83835b8", + "file_size_bytes": 255290 + }, + { + "name": "Gp0127632_Centrifuge Krona HTML report", + "description": "Centrifuge Krona HTML report for Gp0127632", + "data_object_type": "Centrifuge Krona Plot", + "url": "https://data.microbiomedata.org/data/nmdc:mga0b6cy30/ReadbasedAnalysis/nmdc_mga0b6cy30_centrifuge_krona.html", + "md5_checksum": "c1f4471d943b284720a8becb5a2e32b4", + "id": "nmdc:c1f4471d943b284720a8becb5a2e32b4", + "file_size_bytes": 2333225 + }, + { + "name": "Gp0127632_Kraken classification TSV report", + "description": "Kraken classification TSV report for Gp0127632", + "data_object_type": "Kraken2 Taxonomic Classification", + "url": "https://data.microbiomedata.org/data/nmdc:mga0b6cy30/ReadbasedAnalysis/nmdc_mga0b6cy30_kraken2_classification.tsv", + "md5_checksum": "50cfcfc5d0d89245b8370abf6bfef23c", + "id": "nmdc:50cfcfc5d0d89245b8370abf6bfef23c", + "file_size_bytes": 1537863470 + }, + { + "name": "Gp0127632_Kraken2 TSV report", + "description": "Kraken2 TSV report for Gp0127632", + "data_object_type": "Kraken2 Classification Report", + "url": "https://data.microbiomedata.org/data/nmdc:mga0b6cy30/ReadbasedAnalysis/nmdc_mga0b6cy30_kraken2_report.tsv", + "md5_checksum": "a8dd7aa20043510158ad3b2bbe961b42", + "id": "nmdc:a8dd7aa20043510158ad3b2bbe961b42", + "file_size_bytes": 648597 + }, + { + "name": "Gp0127632_Kraken2 Krona HTML report", + "description": "Kraken2 Krona HTML report for Gp0127632", + "data_object_type": "Kraken2 Krona Plot", + "url": "https://data.microbiomedata.org/data/nmdc:mga0b6cy30/ReadbasedAnalysis/nmdc_mga0b6cy30_kraken2_krona.html", + "md5_checksum": "e350fda9bd0651755171d79b413b8da3", + "id": "nmdc:e350fda9bd0651755171d79b413b8da3", + "file_size_bytes": 3959152 + }, + { + "name": "Gp0127632_Assembled contigs fasta", + "description": "Assembled contigs fasta for Gp0127632", + "data_object_type": "Assembly Contigs", + "url": "https://data.microbiomedata.org/data/nmdc:mga0b6cy30/assembly/nmdc_mga0b6cy30_contigs.fna", + "md5_checksum": "b5094d52c6d48836de0aac261c622868", + "id": "nmdc:b5094d52c6d48836de0aac261c622868", + "file_size_bytes": 59930370 + }, + { + "name": "Gp0127632_Assembled scaffold fasta", + "description": "Assembled scaffold fasta for Gp0127632", + "data_object_type": "Assembly Scaffolds", + "url": "https://data.microbiomedata.org/data/nmdc:mga0b6cy30/assembly/nmdc_mga0b6cy30_scaffolds.fna", + "md5_checksum": "4d9d83ac8db218e6d0bd4f29801c3ce3", + "id": "nmdc:4d9d83ac8db218e6d0bd4f29801c3ce3", + "file_size_bytes": 59532251 + }, + { + "name": "Gp0127632_Metagenome Contig Coverage Stats", + "description": "Metagenome Contig Coverage Stats for Gp0127632", + "url": "https://data.microbiomedata.org/data/nmdc:mga0b6cy30/assembly/nmdc_mga0b6cy30_covstats.txt", + "md5_checksum": "f8fad4cf225943d8fddec3fa3402c53a", + "id": "nmdc:f8fad4cf225943d8fddec3fa3402c53a", + "file_size_bytes": 10428676 + }, + { + "name": "Gp0127632_Assembled AGP file", + "description": "Assembled AGP file for Gp0127632", + "data_object_type": "Assembly AGP", + "url": "https://data.microbiomedata.org/data/nmdc:mga0b6cy30/assembly/nmdc_mga0b6cy30_assembly.agp", + "md5_checksum": "52f130d084757d6e27177ed108e9e5bf", + "id": "nmdc:52f130d084757d6e27177ed108e9e5bf", + "file_size_bytes": 9725931 + }, + { + "name": "Gp0127632_Metagenome Alignment BAM file", + "description": "Metagenome Alignment BAM file for Gp0127632", + "data_object_type": "Assembly Coverage BAM", + "url": "https://data.microbiomedata.org/data/nmdc:mga0b6cy30/assembly/nmdc_mga0b6cy30_pairedMapped_sorted.bam", + "md5_checksum": "9e5deaa9e7ac3f5f90d79b6520d39d53", + "id": "nmdc:9e5deaa9e7ac3f5f90d79b6520d39d53", + "file_size_bytes": 2363431165 + }, + { + "name": "Gp0127632_Protein FAA", + "description": "Protein FAA for Gp0127632", + "data_object_type": "Annotation Amino Acid FASTA", + "url": "https://data.microbiomedata.org/data/nmdc:mga0b6cy30/annotation/nmdc_mga0b6cy30_proteins.faa", + "md5_checksum": "42989e75458691fbd17e537582c56d5e", + "id": "nmdc:42989e75458691fbd17e537582c56d5e", + "file_size_bytes": 35685584 + }, + { + "name": "Gp0127632_Structural annotation GFF file", + "description": "Structural annotation GFF file for Gp0127632", + "data_object_type": "Structural Annotation GFF", + "url": "https://data.microbiomedata.org/data/nmdc:mga0b6cy30/annotation/nmdc_mga0b6cy30_structural_annotation.gff", + "md5_checksum": "09240a6d1afc5f8b965a80a64aa96ef4", + "id": "nmdc:09240a6d1afc5f8b965a80a64aa96ef4", + "file_size_bytes": 2512 + }, + { + "name": "Gp0127632_Functional annotation GFF file", + "description": "Functional annotation GFF file for Gp0127632", + "data_object_type": "Functional Annotation GFF", + "url": "https://data.microbiomedata.org/data/nmdc:mga0b6cy30/annotation/nmdc_mga0b6cy30_functional_annotation.gff", + "md5_checksum": "c595237698baaf882fdeeac92f1b02be", + "id": "nmdc:c595237698baaf882fdeeac92f1b02be", + "file_size_bytes": 41979225 + }, + { + "name": "Gp0127632_KO TSV file", + "description": "KO TSV file for Gp0127632", + "data_object_type": "Annotation KEGG Orthology", + "url": "https://data.microbiomedata.org/data/nmdc:mga0b6cy30/annotation/nmdc_mga0b6cy30_ko.tsv", + "md5_checksum": "cd87df7a80ed03eef7d9923b9e9621e4", + "id": "nmdc:cd87df7a80ed03eef7d9923b9e9621e4", + "file_size_bytes": 4726366 + }, + { + "name": "Gp0127632_EC TSV file", + "description": "EC TSV file for Gp0127632", + "data_object_type": "Annotation Enzyme Commission", + "url": "https://data.microbiomedata.org/data/nmdc:mga0b6cy30/annotation/nmdc_mga0b6cy30_ec.tsv", + "md5_checksum": "57053d5594bb80495014664df22b0bb0", + "id": "nmdc:57053d5594bb80495014664df22b0bb0", + "file_size_bytes": 3155078 + }, + { + "name": "Gp0127632_COG GFF file", + "description": "COG GFF file for Gp0127632", + "url": "https://data.microbiomedata.org/data/nmdc:mga0b6cy30/annotation/nmdc_mga0b6cy30_cog.gff", + "md5_checksum": "3c82ee6a19674bd5abd4072cb137d96f", + "id": "nmdc:3c82ee6a19674bd5abd4072cb137d96f", + "file_size_bytes": 23956687 + }, + { + "name": "Gp0127632_PFAM GFF file", + "description": "PFAM GFF file for Gp0127632", + "url": "https://data.microbiomedata.org/data/nmdc:mga0b6cy30/annotation/nmdc_mga0b6cy30_pfam.gff", + "md5_checksum": "c9bf48d6c88b3db0f431a08d93873c4a", + "id": "nmdc:c9bf48d6c88b3db0f431a08d93873c4a", + "file_size_bytes": 17333907 + }, + { + "name": "Gp0127632_TigrFam GFF file", + "description": "TigrFam GFF file for Gp0127632", + "url": "https://data.microbiomedata.org/data/nmdc:mga0b6cy30/annotation/nmdc_mga0b6cy30_tigrfam.gff", + "md5_checksum": "2475726e21bd8369f76d529f55f21a3f", + "id": "nmdc:2475726e21bd8369f76d529f55f21a3f", + "file_size_bytes": 1771706 + }, + { + "name": "Gp0127632_SMART GFF file", + "description": "SMART GFF file for Gp0127632", + "url": "https://data.microbiomedata.org/data/nmdc:mga0b6cy30/annotation/nmdc_mga0b6cy30_smart.gff", + "md5_checksum": "5698830d572ddc4e35a5f6642da7981a", + "id": "nmdc:5698830d572ddc4e35a5f6642da7981a", + "file_size_bytes": 5383998 + }, + { + "name": "Gp0127632_SuperFam GFF file", + "description": "SuperFam GFF file for Gp0127632", + "url": "https://data.microbiomedata.org/data/nmdc:mga0b6cy30/annotation/nmdc_mga0b6cy30_supfam.gff", + "md5_checksum": "18cdb0f987a2d417d0a39a685e435729", + "id": "nmdc:18cdb0f987a2d417d0a39a685e435729", + "file_size_bytes": 30162479 + }, + { + "name": "Gp0127632_Cath FunFam GFF file", + "description": "Cath FunFam GFF file for Gp0127632", + "url": "https://data.microbiomedata.org/data/nmdc:mga0b6cy30/annotation/nmdc_mga0b6cy30_cath_funfam.gff", + "md5_checksum": "b34e4d1823bd5cd88aa42832d10b3431", + "id": "nmdc:b34e4d1823bd5cd88aa42832d10b3431", + "file_size_bytes": 22459777 + }, + { + "name": "Gp0127632_KO_EC GFF file", + "description": "KO_EC GFF file for Gp0127632", + "url": "https://data.microbiomedata.org/data/nmdc:mga0b6cy30/annotation/nmdc_mga0b6cy30_ko_ec.gff", + "md5_checksum": "dc544f4796d49c520372e1872c5aea49", + "id": "nmdc:dc544f4796d49c520372e1872c5aea49", + "file_size_bytes": 15047897 + }, + { + "name": "gold:Gp0452679_metabat2 bin checkm quality assessment result", + "description": "metabat2 bin checkm quality assessment result for gold:Gp0452679", + "data_object_type": "CheckM Statistics", + "url": "https://data.microbiomedata.org/data/nmdc:mga0fsjy84/MAGs/nmdc_mga0fsjy84_checkm_qa.out", + "md5_checksum": "d41d8cd98f00b204e9800998ecf8427e", + "id": "nmdc:d41d8cd98f00b204e9800998ecf8427e", + "file_size_bytes": 0 + }, + { + "name": "Gp0127632_tooShort (< 3kb) filtered contigs fasta file by metaBat2", + "description": "tooShort (< 3kb) filtered contigs fasta file by metaBat2 for Gp0127632", + "url": "https://data.microbiomedata.org/data/nmdc:mga0b6cy30/MAGs/nmdc_mga0b6cy30_bins.tooShort.fa", + "md5_checksum": "2941988fcfb708d20ad1e44682c78e22", + "id": "nmdc:2941988fcfb708d20ad1e44682c78e22", + "file_size_bytes": 52475207 + }, + { + "name": "Gp0127632_unbinned fasta file from metabat2", + "description": "unbinned fasta file from metabat2 for Gp0127632", + "url": "https://data.microbiomedata.org/data/nmdc:mga0b6cy30/MAGs/nmdc_mga0b6cy30_bins.unbinned.fa", + "md5_checksum": "a6bc8d9d5ba5fe9713829aa7aef3c4cd", + "id": "nmdc:a6bc8d9d5ba5fe9713829aa7aef3c4cd", + "file_size_bytes": 5473493 + }, + { + "name": "Gp0127632_metabat2 bin checkm quality assessment result", + "description": "metabat2 bin checkm quality assessment result for Gp0127632", + "data_object_type": "CheckM Statistics", + "url": "https://data.microbiomedata.org/data/nmdc:mga0b6cy30/MAGs/nmdc_mga0b6cy30_checkm_qa.out", + "md5_checksum": "2914266e7ac7a8668c6f8d8722466c69", + "id": "nmdc:2914266e7ac7a8668c6f8d8722466c69", + "file_size_bytes": 948 + }, + { + "name": "Gp0127632_high-quality and medium-quality bins", + "description": "high-quality and medium-quality bins for Gp0127632", + "data_object_type": "Metagenome Bins", + "url": "https://data.microbiomedata.org/data/nmdc:mga0b6cy30/MAGs/nmdc_mga0b6cy30_hqmq_bin.zip", + "md5_checksum": "0fd97ca0ce01d42361ce817d3753a65e", + "id": "nmdc:0fd97ca0ce01d42361ce817d3753a65e", + "file_size_bytes": 497493 + }, + { + "name": "Gp0127632_metabat2 bins", + "description": "metabat2 bins for Gp0127632", + "url": "https://data.microbiomedata.org/data/nmdc:mga0b6cy30/MAGs/nmdc_mga0b6cy30_metabat_bin.zip", + "md5_checksum": "8e7832cac0ae99e2b63dfdfa34c24927", + "id": "nmdc:8e7832cac0ae99e2b63dfdfa34c24927", + "file_size_bytes": 108323 + }, + { + "_id": { + "$oid": "649b003d1ae706d7b5b14e69" + }, + "description": "Assembled contigs fasta for gold:Gp0127632", + "url": "https://data.microbiomedata.org/data/1781_100334/assembly/assembly_contigs.fna", + "file_size_bytes": 59400374, + "type": "nmdc:DataObject", + "id": "nmdc:8f8931e086f72961675aa936b1356f86", + "name": "assembly_contigs.fna", + "data_object_type": "Assembly Contigs" + }, + { + "_id": { + "$oid": "649b003d1ae706d7b5b14e6a" + }, + "description": "Assembled scaffold fasta for gold:Gp0127632", + "url": "https://data.microbiomedata.org/data/1781_100334/assembly/assembly_scaffolds.fna", + "file_size_bytes": 59002431, + "type": "nmdc:DataObject", + "id": "nmdc:e780311c63e956d852cd3c1bbd957f86", + "name": "assembly_scaffolds.fna", + "data_object_type": "Assembly Scaffolds" + }, + { + "_id": { + "$oid": "649b003d1ae706d7b5b14e6c" + }, + "description": "Assembled AGP file for gold:Gp0127632", + "url": "https://data.microbiomedata.org/data/1781_100334/assembly/assembly.agp", + "file_size_bytes": 8665587, + "type": "nmdc:DataObject", + "id": "nmdc:9c08a645e240b0861d3b8c912c7eaed0", + "name": "assembly.agp", + "data_object_type": "Assembly AGP" + }, + { + "_id": { + "$oid": "649b003d1ae706d7b5b14e6d" + }, + "description": "Metagenome Contig Coverage Stats for gold:Gp0127632", + "url": "https://data.microbiomedata.org/data/1781_100334/assembly/mapping_stats.txt", + "file_size_bytes": 9898680, + "type": "nmdc:DataObject", + "id": "nmdc:0ca761b2a51f8db8f46b694f06c0809d", + "name": "mapping_stats.txt", + "data_object_type": "Assembly Coverage Stats" + }, + { + "_id": { + "$oid": "649b003d1ae706d7b5b14e70" + }, + "description": "Metagenome Alignment BAM file for gold:Gp0127632", + "url": "https://data.microbiomedata.org/data/1781_100334/assembly/pairedMapped_sorted.bam", + "file_size_bytes": 2332493253, + "type": "nmdc:DataObject", + "id": "nmdc:4a26faa9b34a5c9f3bb65815cf2ad5c8", + "name": "pairedMapped_sorted.bam", + "data_object_type": "Assembly Coverage BAM" + }, + { + "_id": { + "$oid": "649b003d1ae706d7b5b15b29" + }, + "id": "nmdc:a95788f887b5af704f7e2cfd9868e8a4", + "name": "1781_100334.krona.html", + "description": "Gold:Gp0127632 KRONA plot HTML file", + "url": "https://data.microbiomedata.org/1781_100334/ReadbasedAnalysis/centrifuge/1781_100334.krona.html", + "file_size_bytes": 3385, + "data_object_type": "Centrifuge Krona Plot", + "type": "nmdc:DataObject" + }, + { + "_id": { + "$oid": "649b003d1ae706d7b5b15b30" + }, + "id": "nmdc:634b959933536776d62c9c66d43ec8ed", + "name": "1781_100334.json", + "description": "Gold:Gp0127632 ReadbasedAnalysis result JSON file", + "url": "https://data.microbiomedata.org/1781_100334/ReadbasedAnalysis/1781_100334.json", + "file_size_bytes": 3385, + "type": "nmdc:DataObject" + }, + { + "_id": { + "$oid": "649b003f1ae706d7b5b165d6" + }, + "id": "nmdc:e45be81dca6d2cb4a1d7d17ee6d166a4", + "name": "checkm_qa.out", + "description": "metabat2 bin checkm quality assessment result for gold:Gp0127632", + "file_size_bytes": 1413, + "url": "https://data.microbiomedata.org/data/1781_100334/img_MAGs/checkm_qa.out", + "type": "nmdc:DataObject", + "data_object_type": "CheckM Statistics" + }, + { + "_id": { + "$oid": "649b003f1ae706d7b5b165d7" + }, + "id": "nmdc:0aa200afbb24fe1e4c26c79c54c070dd", + "name": "gold:Gp0127632.bins.5.fa", + "description": "metabat2 binned contig file for gold:Gp0127632", + "file_size_bytes": 273113, + "url": "https://data.microbiomedata.org/data/1781_100334/img_MAGs/metabat-bins/bins.5.fa", + "type": "nmdc:DataObject", + "data_object_type": "Metagenome Bins" + }, + { + "_id": { + "$oid": "649b003f1ae706d7b5b165da" + }, + "id": "nmdc:695d01e510529d34d501f4fa62d5c9b8", + "name": "gold:Gp0127632.bins.3.fa", + "description": "metabat2 binned contig file for gold:Gp0127632", + "file_size_bytes": 210794, + "url": "https://data.microbiomedata.org/data/1781_100334/img_MAGs/metabat-bins/bins.3.fa", + "type": "nmdc:DataObject", + "data_object_type": "Metagenome Bins" + }, + { + "_id": { + "$oid": "649b003f1ae706d7b5b165db" + }, + "id": "nmdc:86c8e7c9c103a298357f6a2102bd8772", + "name": "gold:Gp0127632.bins.2.fa", + "description": "metabat2 binned contig file for gold:Gp0127632", + "file_size_bytes": 311763, + "url": "https://data.microbiomedata.org/data/1781_100334/img_MAGs/metabat-bins/bins.2.fa", + "type": "nmdc:DataObject", + "data_object_type": "Metagenome Bins" + }, + { + "_id": { + "$oid": "649b003f1ae706d7b5b165dd" + }, + "id": "nmdc:97771c73e3e872ea1da72cd758a03453", + "name": "gold:Gp0127632.bins.4.fa", + "description": "metabat2 binned contig file for gold:Gp0127632", + "file_size_bytes": 236159, + "url": "https://data.microbiomedata.org/data/1781_100334/img_MAGs/metabat-bins/bins.4.fa", + "type": "nmdc:DataObject", + "data_object_type": "Metagenome Bins" + }, + { + "_id": { + "$oid": "649b003f1ae706d7b5b165de" + }, + "id": "nmdc:e7e4a297417b55b9714702401da79d96", + "name": "gold:Gp0127632.bins.1.fa", + "description": "metabat2 binned contig file for gold:Gp0127632", + "file_size_bytes": 346669, + "url": "https://data.microbiomedata.org/data/1781_100334/img_MAGs/metabat-bins/bins.1.fa", + "type": "nmdc:DataObject", + "data_object_type": "Metagenome Bins" + }, + { + "_id": { + "$oid": "649b003f1ae706d7b5b165eb" + }, + "id": "nmdc:fa90657e9d2a9b11f5cea076316d0a50", + "name": "bins.unbinned.fa", + "description": "unbinned fasta file from metabat2 for gold:Gp0127632", + "file_size_bytes": 6037283, + "url": "https://data.microbiomedata.org/data/1781_100334/img_MAGs/bins.unbinned.fa", + "type": "nmdc:DataObject" + }, + { + "_id": { + "$oid": "649b003f1ae706d7b5b1662f" + }, + "id": "nmdc:800b596b9573c1ddf6a9e357c1eb8d86", + "name": "bins.tooShort.fa", + "description": "tooShort (< 3kb) filtered contigs fasta file by metaBat2 for gold:Gp0127632", + "file_size_bytes": 50903302, + "url": "https://data.microbiomedata.org/data/1781_100334/img_MAGs/bins.tooShort.fa", + "type": "nmdc:DataObject" + }, + { + "_id": { + "$oid": "649b00401ae706d7b5b16ce9" + }, + "description": "KO TSV File for gold:Gp0127632", + "url": "https://data.microbiomedata.org/1781_100334/img_annotation/Ga0482238_ko.tsv", + "md5_checksum": "aeafeb18adb193b1a3c5c3c2ff9a912e", + "file_size_bytes": 3385, + "id": "nmdc:aeafeb18adb193b1a3c5c3c2ff9a912e", + "name": "gold:Gp0127632_KO TSV File", + "data_object_type": "Annotation KEGG Orthology", + "type": "nmdc:DataObject" + }, + { + "_id": { + "$oid": "649b00401ae706d7b5b16ceb" + }, + "description": "Protein FAA for gold:Gp0127632", + "url": "https://data.microbiomedata.org/1781_100334/img_annotation/Ga0482238_proteins.faa", + "md5_checksum": "a89e8af0fc6daf895e7a87f1ff7087f2", + "file_size_bytes": 3385, + "id": "nmdc:a89e8af0fc6daf895e7a87f1ff7087f2", + "name": "gold:Gp0127632_Protein FAA", + "data_object_type": "Annotation Amino Acid FASTA", + "type": "nmdc:DataObject" + }, + { + "_id": { + "$oid": "649b00401ae706d7b5b16cee" + }, + "description": "EC TSV File for gold:Gp0127632", + "url": "https://data.microbiomedata.org/1781_100334/img_annotation/Ga0482238_ec.tsv", + "md5_checksum": "b8d886e71031cbe4fb1284f479348740", + "file_size_bytes": 3385, + "id": "nmdc:b8d886e71031cbe4fb1284f479348740", + "name": "gold:Gp0127632_EC TSV File", + "data_object_type": "Annotation Enzyme Commission", + "type": "nmdc:DataObject" + }, + { + "_id": { + "$oid": "649b00401ae706d7b5b16cf2" + }, + "description": "Structural annotation GFF file for gold:Gp0127632", + "url": "https://data.microbiomedata.org/1781_100334/img_annotation/Ga0482238_structural_annotation.gff", + "md5_checksum": "89b895fbf3c13801ddba22ff59bb385a", + "file_size_bytes": 3385, + "id": "nmdc:89b895fbf3c13801ddba22ff59bb385a", + "name": "gold:Gp0127632_Structural annotation GFF file", + "data_object_type": "Structural Annotation GFF", + "type": "nmdc:DataObject" + }, + { + "_id": { + "$oid": "649b00401ae706d7b5b16cf5" + }, + "description": "Functional annotation GFF file for gold:Gp0127632", + "url": "https://data.microbiomedata.org/1781_100334/img_annotation/Ga0482238_functional_annotation.gff", + "md5_checksum": "2395040203b3351554a9e3ffb48b0b88", + "file_size_bytes": 3385, + "id": "nmdc:2395040203b3351554a9e3ffb48b0b88", + "name": "gold:Gp0127632_Functional annotation GFF file", + "data_object_type": "Functional Annotation GFF", + "type": "nmdc:DataObject" + } + ], + "mags_activity_set": [ + { + "_id": { + "$oid": "649b0052ec087f6bbab3470d" + }, + "has_input": [ + "nmdc:b5094d52c6d48836de0aac261c622868", + "nmdc:9e5deaa9e7ac3f5f90d79b6520d39d53", + "nmdc:c595237698baaf882fdeeac92f1b02be" + ], + "too_short_contig_num": 128818, + "part_of": [ + "nmdc:mga0b6cy30" + ], + "binned_contig_num": 313, + "git_url": "https://github.com/microbiomedata/metaG/releases/tag/0.1", + "has_output": [ + "nmdc:d41d8cd98f00b204e9800998ecf8427e", + "nmdc:2941988fcfb708d20ad1e44682c78e22", + "nmdc:a6bc8d9d5ba5fe9713829aa7aef3c4cd", + "nmdc:2914266e7ac7a8668c6f8d8722466c69", + "nmdc:0fd97ca0ce01d42361ce817d3753a65e", + "nmdc:8e7832cac0ae99e2b63dfdfa34c24927" + ], + "was_informed_by": "gold:Gp0127632", + "input_contig_num": 132499, + "id": "nmdc:2d5f97b7f6c0f385d6185e05ac989b1e", + "execution_resource": "NERSC-Cori", + "name": "MAGs Analysis Activity for nmdc:mga0b6cy30", + "mags_list": [ + { + "number_of_contig": 84, + "completeness": 27.81, + "bin_name": "bins.1", + "gene_count": 437, + "bin_quality": "LQ", + "gtdbtk_species": "", + "gtdbtk_order": "", + "num_16s": 0, + "gtdbtk_family": "", + "gtdbtk_domain": "", + "contamination": 1.71, + "gtdbtk_class": "", + "gtdbtk_phylum": "", + "num_5s": 1, + "num_23s": 1, + "gtdbtk_genus": "", + "num_t_rna": 10 + }, + { + "number_of_contig": 229, + "completeness": 71.45, + "bin_name": "bins.2", + "gene_count": 1997, + "bin_quality": "MQ", + "gtdbtk_species": "", + "gtdbtk_order": "Nitrososphaerales", + "num_16s": 0, + "gtdbtk_family": "Nitrososphaeraceae", + "gtdbtk_domain": "Archaea", + "contamination": 0.97, + "gtdbtk_class": "Nitrososphaeria", + "gtdbtk_phylum": "Crenarchaeota", + "num_5s": 1, + "num_23s": 0, + "gtdbtk_genus": "", + "num_t_rna": 36 + } + ], + "unbinned_contig_num": 3368, + "started_at_time": "2021-10-11T02:23:42Z", + "type": "nmdc:MAGsAnalysisActivity", + "ended_at_time": "2021-10-11T04:08:32+00:00" + } + ], + "metagenome_annotation_activity_set": [ + { + "_id": { + "$oid": "649b005bbf2caae0415ef9ae" + }, + "has_input": [ + "nmdc:b5094d52c6d48836de0aac261c622868" + ], + "part_of": [ + "nmdc:mga0b6cy30" + ], + "git_url": "https://github.com/microbiomedata/metaG/releases/tag/0.1", + "has_output": [ + "nmdc:42989e75458691fbd17e537582c56d5e", + "nmdc:09240a6d1afc5f8b965a80a64aa96ef4", + "nmdc:c595237698baaf882fdeeac92f1b02be", + "nmdc:cd87df7a80ed03eef7d9923b9e9621e4", + "nmdc:57053d5594bb80495014664df22b0bb0", + "nmdc:3c82ee6a19674bd5abd4072cb137d96f", + "nmdc:c9bf48d6c88b3db0f431a08d93873c4a", + "nmdc:2475726e21bd8369f76d529f55f21a3f", + "nmdc:5698830d572ddc4e35a5f6642da7981a", + "nmdc:18cdb0f987a2d417d0a39a685e435729", + "nmdc:b34e4d1823bd5cd88aa42832d10b3431", + "nmdc:dc544f4796d49c520372e1872c5aea49" + ], + "was_informed_by": "gold:Gp0127632", + "id": "nmdc:2d5f97b7f6c0f385d6185e05ac989b1e", + "execution_resource": "NERSC-Cori", + "name": "Annotation Activity for nmdc:mga0b6cy30", + "started_at_time": "2021-10-11T02:23:42Z", + "type": "nmdc:MetagenomeAnnotationActivity", + "ended_at_time": "2021-10-11T04:08:32+00:00" + } + ], + "metagenome_assembly_set": [ + { + "_id": { + "$oid": "649b005f2ca5ee4adb139f9b" + }, + "has_input": [ + "nmdc:a43bfb55389206c2fc5ddb53e6aa2bc6" + ], + "part_of": [ + "nmdc:mga0b6cy30" + ], + "ctg_logsum": 81568, + "scaf_logsum": 81839, + "gap_pct": 0.00096, + "git_url": "https://github.com/microbiomedata/metaG/releases/tag/0.1", + "has_output": [ + "nmdc:b5094d52c6d48836de0aac261c622868", + "nmdc:4d9d83ac8db218e6d0bd4f29801c3ce3", + "nmdc:f8fad4cf225943d8fddec3fa3402c53a", + "nmdc:52f130d084757d6e27177ed108e9e5bf", + "nmdc:9e5deaa9e7ac3f5f90d79b6520d39d53" + ], + "asm_score": 5.986, + "was_informed_by": "gold:Gp0127632", + "ctg_powsum": 9274.272, + "scaf_max": 23706, + "id": "nmdc:2d5f97b7f6c0f385d6185e05ac989b1e", + "scaf_powsum": 9304.689, + "execution_resource": "NERSC-Cori", + "contigs": 132499, + "name": "Assembly Activity for nmdc:mga0b6cy30", + "ctg_max": 23706, + "gc_std": 0.09103, + "contig_bp": 54959738, + "gc_avg": 0.61354, + "started_at_time": "2021-10-11T02:23:42Z", + "scaf_bp": 54960268, + "type": "nmdc:MetagenomeAssembly", + "scaffolds": 132455, + "ended_at_time": "2021-10-11T04:08:32+00:00", + "ctg_l50": 372, + "ctg_l90": 285, + "ctg_n50": 43541, + "ctg_n90": 113564, + "scaf_l50": 372, + "scaf_l90": 285, + "scaf_n50": 43524, + "scaf_n90": 113522 + } + ], + "omics_processing_set": [ + { + "_id": { + "$oid": "649b009773e8249959349b44" + }, + "id": "nmdc:omprc-11-8qms8262", + "name": "Riverbed sediment microbial communities from areas with vegetation nearby in Columbia River, Washington, USA - GW-RW S3_40_50", + "description": "Riverbed sediment samples were collected from an area with a lot of surrounding vegetation in a hyporheic zone (subsurface groundwater - surface water mixing zone)", + "has_input": [ + "nmdc:bsm-11-0xprxw22" + ], + "has_output": [ + "jgi:574fde807ded5e3df1ee141b" + ], + "part_of": [ + "nmdc:sty-11-aygzgv51" + ], + "add_date": "2016-01-11", + "mod_date": "2021-06-15", + "ncbi_project_name": "Riverbed sediment microbial communities from areas with vegetation nearby in Columbia River, Washington, USA - GW-RW S3_40_50", + "omics_type": { + "has_raw_value": "Metagenome" + }, + "principal_investigator": { + "has_raw_value": "James Stegen" + }, + "processing_institution": "JGI", + "type": "nmdc:OmicsProcessing", + "gold_sequencing_project_identifiers": [ + "gold:Gp0127632" + ] + } + ], + "read_qc_analysis_activity_set": [ + { + "_id": { + "$oid": "649b009d6bdd4fd20273c868" + }, + "has_input": [ + "nmdc:5cbd7ceb39903cbded77b36ae866fe9f" + ], + "part_of": [ + "nmdc:mga0b6cy30" + ], + "git_url": "https://github.com/microbiomedata/metaG/releases/tag/0.1", + "has_output": [ + "nmdc:a43bfb55389206c2fc5ddb53e6aa2bc6", + "nmdc:919c5aade4fffb76f743a33b035b2839" + ], + "was_informed_by": "gold:Gp0127632", + "input_read_count": 27906294, + "output_read_bases": 3905482172, + "id": "nmdc:2d5f97b7f6c0f385d6185e05ac989b1e", + "execution_resource": "NERSC-Cori", + "input_read_bases": 4213850394, + "name": "Read QC Activity for nmdc:mga0b6cy30", + "output_read_count": 26116440, + "started_at_time": "2021-10-11T02:23:42Z", + "type": "nmdc:ReadQCAnalysisActivity", + "ended_at_time": "2021-10-11T04:08:32+00:00" + } + ], + "read_based_taxonomy_analysis_activity_set": [ + { + "_id": { + "$oid": "649b009bff710ae353f8cf31" + }, + "has_input": [ + "nmdc:a43bfb55389206c2fc5ddb53e6aa2bc6" + ], + "git_url": "https://github.com/microbiomedata/metaG/releases/tag/0.1", + "has_output": [ + "nmdc:3e583cccbbc068e0879ba6618bb6407c", + "nmdc:6c54105711e818c4d8169ab595b05efe", + "nmdc:adb155cdb656648496484998a62fb96f", + "nmdc:0a03ac5737750a3b336e7299e9f01ead", + "nmdc:f345b3a57c37097a860e38d5e83835b8", + "nmdc:c1f4471d943b284720a8becb5a2e32b4", + "nmdc:50cfcfc5d0d89245b8370abf6bfef23c", + "nmdc:a8dd7aa20043510158ad3b2bbe961b42", + "nmdc:e350fda9bd0651755171d79b413b8da3" + ], + "was_informed_by": "gold:Gp0127632", + "id": "nmdc:2d5f97b7f6c0f385d6185e05ac989b1e", + "execution_resource": "NERSC-Cori", + "name": "ReadBased Analysis Activity for nmdc:mga0b6cy30", + "started_at_time": "2021-10-11T02:23:42Z", + "type": "nmdc:ReadbasedAnalysis", + "ended_at_time": "2021-10-11T04:08:32+00:00" + } + ] + }, + { + "data_object_set": [ + { + "description": "Raw sequencer read data", + "file_size_bytes": 2759159406, + "type": "nmdc:DataObject", + "id": "jgi:574fe09f7ded5e3df1ee1489", + "name": "10533.3.165334.ACAGCAA-GTTGCTG.fastq.gz" + }, + { + "name": "Gp0127636_Filtered Reads", + "description": "Filtered Reads for Gp0127636", + "data_object_type": "Filtered Sequencing Reads", + "url": "https://data.microbiomedata.org/data/nmdc:mga02tph39/qa/nmdc_mga02tph39_filtered.fastq.gz", + "md5_checksum": "e4f5675c728fd1896682eb669656b5d6", + "id": "nmdc:e4f5675c728fd1896682eb669656b5d6", + "file_size_bytes": 2463342132 + }, + { + "name": "Gp0127636_Filtered Stats", + "description": "Filtered Stats for Gp0127636", + "data_object_type": "QC Statistics", + "url": "https://data.microbiomedata.org/data/nmdc:mga02tph39/qa/nmdc_mga02tph39_filterStats.txt", + "md5_checksum": "64f455185b1bc610a8d74a84ed12683f", + "id": "nmdc:64f455185b1bc610a8d74a84ed12683f", + "file_size_bytes": 293 + }, + { + "name": "Gp0127636_Gottcha2 TSV report", + "description": "Gottcha2 TSV report for Gp0127636", + "url": "https://data.microbiomedata.org/data/nmdc:mga02tph39/ReadbasedAnalysis/nmdc_mga02tph39_gottcha2_report.tsv", + "md5_checksum": "50d80a30d4ff113e36f6fd64b1f28547", + "id": "nmdc:50d80a30d4ff113e36f6fd64b1f28547", + "file_size_bytes": 5547 + }, + { + "name": "Gp0127636_Gottcha2 full TSV report", + "description": "Gottcha2 full TSV report for Gp0127636", + "url": "https://data.microbiomedata.org/data/nmdc:mga02tph39/ReadbasedAnalysis/nmdc_mga02tph39_gottcha2_report_full.tsv", + "md5_checksum": "c2cd20a2011592a76397f49dc3acd6b7", + "id": "nmdc:c2cd20a2011592a76397f49dc3acd6b7", + "file_size_bytes": 965042 + }, + { + "name": "Gp0127636_Gottcha2 Krona HTML report", + "description": "Gottcha2 Krona HTML report for Gp0127636", + "data_object_type": "GOTTCHA2 Krona Plot", + "url": "https://data.microbiomedata.org/data/nmdc:mga02tph39/ReadbasedAnalysis/nmdc_mga02tph39_gottcha2_krona.html", + "md5_checksum": "827ad863c875ea14473c9903d192fa73", + "id": "nmdc:827ad863c875ea14473c9903d192fa73", + "file_size_bytes": 242495 + }, + { + "name": "Gp0127636_Centrifuge classification TSV report", + "description": "Centrifuge classification TSV report for Gp0127636", + "data_object_type": "Centrifuge Taxonomic Classification", + "url": "https://data.microbiomedata.org/data/nmdc:mga02tph39/ReadbasedAnalysis/nmdc_mga02tph39_centrifuge_classification.tsv", + "md5_checksum": "957074ca49765b22348e27b0133d8ba0", + "id": "nmdc:957074ca49765b22348e27b0133d8ba0", + "file_size_bytes": 2151939041 + }, + { + "name": "Gp0127636_Centrifuge TSV report", + "description": "Centrifuge TSV report for Gp0127636", + "data_object_type": "Centrifuge Classification Report", + "url": "https://data.microbiomedata.org/data/nmdc:mga02tph39/ReadbasedAnalysis/nmdc_mga02tph39_centrifuge_report.tsv", + "md5_checksum": "9253645582296696cb33b11754832574", + "id": "nmdc:9253645582296696cb33b11754832574", + "file_size_bytes": 257932 + }, + { + "name": "Gp0127636_Centrifuge Krona HTML report", + "description": "Centrifuge Krona HTML report for Gp0127636", + "data_object_type": "Centrifuge Krona Plot", + "url": "https://data.microbiomedata.org/data/nmdc:mga02tph39/ReadbasedAnalysis/nmdc_mga02tph39_centrifuge_krona.html", + "md5_checksum": "9aef1d9e04acfe0b7fb1b9dc3b842912", + "id": "nmdc:9aef1d9e04acfe0b7fb1b9dc3b842912", + "file_size_bytes": 2335219 + }, + { + "name": "Gp0127636_Kraken classification TSV report", + "description": "Kraken classification TSV report for Gp0127636", + "data_object_type": "Kraken2 Taxonomic Classification", + "url": "https://data.microbiomedata.org/data/nmdc:mga02tph39/ReadbasedAnalysis/nmdc_mga02tph39_kraken2_classification.tsv", + "md5_checksum": "75180fce38f38a6307231b47a8d2b23b", + "id": "nmdc:75180fce38f38a6307231b47a8d2b23b", + "file_size_bytes": 1746049273 + }, + { + "name": "Gp0127636_Kraken2 TSV report", + "description": "Kraken2 TSV report for Gp0127636", + "data_object_type": "Kraken2 Classification Report", + "url": "https://data.microbiomedata.org/data/nmdc:mga02tph39/ReadbasedAnalysis/nmdc_mga02tph39_kraken2_report.tsv", + "md5_checksum": "b4524a34937893768dbd3752068dee0c", + "id": "nmdc:b4524a34937893768dbd3752068dee0c", + "file_size_bytes": 660975 + }, + { + "name": "Gp0127636_Kraken2 Krona HTML report", + "description": "Kraken2 Krona HTML report for Gp0127636", + "data_object_type": "Kraken2 Krona Plot", + "url": "https://data.microbiomedata.org/data/nmdc:mga02tph39/ReadbasedAnalysis/nmdc_mga02tph39_kraken2_krona.html", + "md5_checksum": "f1543441c59aaaf8ec52036a5bbbe3f4", + "id": "nmdc:f1543441c59aaaf8ec52036a5bbbe3f4", + "file_size_bytes": 4020978 + }, + { + "name": "Gp0127636_Assembled contigs fasta", + "description": "Assembled contigs fasta for Gp0127636", + "data_object_type": "Assembly Contigs", + "url": "https://data.microbiomedata.org/data/nmdc:mga02tph39/assembly/nmdc_mga02tph39_contigs.fna", + "md5_checksum": "36692b7b93756aaabd7f1f6259753c4e", + "id": "nmdc:36692b7b93756aaabd7f1f6259753c4e", + "file_size_bytes": 39062008 + }, + { + "name": "Gp0127636_Assembled scaffold fasta", + "description": "Assembled scaffold fasta for Gp0127636", + "data_object_type": "Assembly Scaffolds", + "url": "https://data.microbiomedata.org/data/nmdc:mga02tph39/assembly/nmdc_mga02tph39_scaffolds.fna", + "md5_checksum": "8d02adf1319d5b95c2abc6ed5b5c1683", + "id": "nmdc:8d02adf1319d5b95c2abc6ed5b5c1683", + "file_size_bytes": 38774844 + }, + { + "name": "Gp0127636_Metagenome Contig Coverage Stats", + "description": "Metagenome Contig Coverage Stats for Gp0127636", + "url": "https://data.microbiomedata.org/data/nmdc:mga02tph39/assembly/nmdc_mga02tph39_covstats.txt", + "md5_checksum": "9830a711accd3a5ed899a2e616d0f4bf", + "id": "nmdc:9830a711accd3a5ed899a2e616d0f4bf", + "file_size_bytes": 7495949 + }, + { + "name": "Gp0127636_Assembled AGP file", + "description": "Assembled AGP file for Gp0127636", + "data_object_type": "Assembly AGP", + "url": "https://data.microbiomedata.org/data/nmdc:mga02tph39/assembly/nmdc_mga02tph39_assembly.agp", + "md5_checksum": "481fbd8cdeacd71e54a45c78d5decb36", + "id": "nmdc:481fbd8cdeacd71e54a45c78d5decb36", + "file_size_bytes": 6962527 + }, + { + "name": "Gp0127636_Metagenome Alignment BAM file", + "description": "Metagenome Alignment BAM file for Gp0127636", + "data_object_type": "Assembly Coverage BAM", + "url": "https://data.microbiomedata.org/data/nmdc:mga02tph39/assembly/nmdc_mga02tph39_pairedMapped_sorted.bam", + "md5_checksum": "a24edc9ffd773c30cea8ea709988307a", + "id": "nmdc:a24edc9ffd773c30cea8ea709988307a", + "file_size_bytes": 2624769069 + }, + { + "name": "Gp0127636_Protein FAA", + "description": "Protein FAA for Gp0127636", + "data_object_type": "Annotation Amino Acid FASTA", + "url": "https://data.microbiomedata.org/data/nmdc:mga02tph39/annotation/nmdc_mga02tph39_proteins.faa", + "md5_checksum": "a5d97f323fe7117cb38a2eea1f2246d2", + "id": "nmdc:a5d97f323fe7117cb38a2eea1f2246d2", + "file_size_bytes": 23469553 + }, + { + "name": "Gp0127636_Structural annotation GFF file", + "description": "Structural annotation GFF file for Gp0127636", + "data_object_type": "Structural Annotation GFF", + "url": "https://data.microbiomedata.org/data/nmdc:mga02tph39/annotation/nmdc_mga02tph39_structural_annotation.gff", + "md5_checksum": "2b791fb3e2964d7808388b32086e0de2", + "id": "nmdc:2b791fb3e2964d7808388b32086e0de2", + "file_size_bytes": 16532352 + }, + { + "name": "Gp0127636_Functional annotation GFF file", + "description": "Functional annotation GFF file for Gp0127636", + "data_object_type": "Functional Annotation GFF", + "url": "https://data.microbiomedata.org/data/nmdc:mga02tph39/annotation/nmdc_mga02tph39_functional_annotation.gff", + "md5_checksum": "f61ed86592491b2d83b5893749e12406", + "id": "nmdc:f61ed86592491b2d83b5893749e12406", + "file_size_bytes": 28432426 + }, + { + "name": "Gp0127636_KO TSV file", + "description": "KO TSV file for Gp0127636", + "data_object_type": "Annotation KEGG Orthology", + "url": "https://data.microbiomedata.org/data/nmdc:mga02tph39/annotation/nmdc_mga02tph39_ko.tsv", + "md5_checksum": "e983789bdc08364b00a000684062ed16", + "id": "nmdc:e983789bdc08364b00a000684062ed16", + "file_size_bytes": 3189682 + }, + { + "name": "Gp0127636_EC TSV file", + "description": "EC TSV file for Gp0127636", + "data_object_type": "Annotation Enzyme Commission", + "url": "https://data.microbiomedata.org/data/nmdc:mga02tph39/annotation/nmdc_mga02tph39_ec.tsv", + "md5_checksum": "3cd47d66b6e9006ff683a2eda168285f", + "id": "nmdc:3cd47d66b6e9006ff683a2eda168285f", + "file_size_bytes": 2100535 + }, + { + "name": "Gp0127636_COG GFF file", + "description": "COG GFF file for Gp0127636", + "url": "https://data.microbiomedata.org/data/nmdc:mga02tph39/annotation/nmdc_mga02tph39_cog.gff", + "md5_checksum": "e056ee666e8001bdb6f790efb3394093", + "id": "nmdc:e056ee666e8001bdb6f790efb3394093", + "file_size_bytes": 15585690 + }, + { + "name": "Gp0127636_PFAM GFF file", + "description": "PFAM GFF file for Gp0127636", + "url": "https://data.microbiomedata.org/data/nmdc:mga02tph39/annotation/nmdc_mga02tph39_pfam.gff", + "md5_checksum": "2b90fcb7628c3ffa9e7a14a32612b7af", + "id": "nmdc:2b90fcb7628c3ffa9e7a14a32612b7af", + "file_size_bytes": 11182350 + }, + { + "name": "Gp0127636_TigrFam GFF file", + "description": "TigrFam GFF file for Gp0127636", + "url": "https://data.microbiomedata.org/data/nmdc:mga02tph39/annotation/nmdc_mga02tph39_tigrfam.gff", + "md5_checksum": "4e2f1d4b2d20bfb0209a320a60c4aeac", + "id": "nmdc:4e2f1d4b2d20bfb0209a320a60c4aeac", + "file_size_bytes": 995758 + }, + { + "name": "Gp0127636_SMART GFF file", + "description": "SMART GFF file for Gp0127636", + "url": "https://data.microbiomedata.org/data/nmdc:mga02tph39/annotation/nmdc_mga02tph39_smart.gff", + "md5_checksum": "dd24a8b0f774555ac91e663416745428", + "id": "nmdc:dd24a8b0f774555ac91e663416745428", + "file_size_bytes": 3256325 + }, + { + "name": "Gp0127636_SuperFam GFF file", + "description": "SuperFam GFF file for Gp0127636", + "url": "https://data.microbiomedata.org/data/nmdc:mga02tph39/annotation/nmdc_mga02tph39_supfam.gff", + "md5_checksum": "2e76b71475b854e2bf2d0aa15a53dd7d", + "id": "nmdc:2e76b71475b854e2bf2d0aa15a53dd7d", + "file_size_bytes": 19666317 + }, + { + "name": "Gp0127636_Cath FunFam GFF file", + "description": "Cath FunFam GFF file for Gp0127636", + "url": "https://data.microbiomedata.org/data/nmdc:mga02tph39/annotation/nmdc_mga02tph39_cath_funfam.gff", + "md5_checksum": "2f297176cd51b2ede33c313f713b40b1", + "id": "nmdc:2f297176cd51b2ede33c313f713b40b1", + "file_size_bytes": 14458019 + }, + { + "name": "Gp0127636_KO_EC GFF file", + "description": "KO_EC GFF file for Gp0127636", + "url": "https://data.microbiomedata.org/data/nmdc:mga02tph39/annotation/nmdc_mga02tph39_ko_ec.gff", + "md5_checksum": "678a7af05a89d9d4f5f5d598dc2e3013", + "id": "nmdc:678a7af05a89d9d4f5f5d598dc2e3013", + "file_size_bytes": 10187098 + }, + { + "name": "gold:Gp0452679_metabat2 bin checkm quality assessment result", + "description": "metabat2 bin checkm quality assessment result for gold:Gp0452679", + "data_object_type": "CheckM Statistics", + "url": "https://data.microbiomedata.org/data/nmdc:mga0fsjy84/MAGs/nmdc_mga0fsjy84_checkm_qa.out", + "md5_checksum": "d41d8cd98f00b204e9800998ecf8427e", + "id": "nmdc:d41d8cd98f00b204e9800998ecf8427e", + "file_size_bytes": 0 + }, + { + "name": "Gp0127636_high-quality and medium-quality bins", + "description": "high-quality and medium-quality bins for Gp0127636", + "data_object_type": "Metagenome Bins", + "url": "https://data.microbiomedata.org/data/nmdc:mga02tph39/MAGs/nmdc_mga02tph39_hqmq_bin.zip", + "md5_checksum": "2d1e318b8b815a8a5487f23315d0fe02", + "id": "nmdc:2d1e318b8b815a8a5487f23315d0fe02", + "file_size_bytes": 182 + }, + { + "_id": { + "$oid": "649b003d1ae706d7b5b14e7c" + }, + "description": "Assembled contigs fasta for gold:Gp0127636", + "url": "https://data.microbiomedata.org/data/1781_100338/assembly/assembly_contigs.fna", + "file_size_bytes": 38679584, + "type": "nmdc:DataObject", + "id": "nmdc:555541de209f6b5bc8b4e36f9c5a96c1", + "name": "assembly_contigs.fna", + "data_object_type": "Assembly Contigs" + }, + { + "_id": { + "$oid": "649b003d1ae706d7b5b14e7d" + }, + "description": "Assembled scaffold fasta for gold:Gp0127636", + "url": "https://data.microbiomedata.org/data/1781_100338/assembly/assembly_scaffolds.fna", + "file_size_bytes": 38392508, + "type": "nmdc:DataObject", + "id": "nmdc:28a962bc24ab2ba5b7d2e486c36cf6b5", + "name": "assembly_scaffolds.fna", + "data_object_type": "Assembly Scaffolds" + }, + { + "_id": { + "$oid": "649b003d1ae706d7b5b14e81" + }, + "description": "Assembled AGP file for gold:Gp0127636", + "url": "https://data.microbiomedata.org/data/1781_100338/assembly/assembly.agp", + "file_size_bytes": 6197503, + "type": "nmdc:DataObject", + "id": "nmdc:e67c00b23e1c74597d0c07f129d35890", + "name": "assembly.agp", + "data_object_type": "Assembly AGP" + }, + { + "_id": { + "$oid": "649b003d1ae706d7b5b14e82" + }, + "description": "Metagenome Contig Coverage Stats for gold:Gp0127636", + "url": "https://data.microbiomedata.org/data/1781_100338/assembly/mapping_stats.txt", + "file_size_bytes": 7113525, + "type": "nmdc:DataObject", + "id": "nmdc:fd1201530245f5e4ef4c5d263b34c0a3", + "name": "mapping_stats.txt", + "data_object_type": "Assembly Coverage Stats" + }, + { + "_id": { + "$oid": "649b003d1ae706d7b5b14e85" + }, + "description": "Metagenome Alignment BAM file for gold:Gp0127636", + "url": "https://data.microbiomedata.org/data/1781_100338/assembly/pairedMapped_sorted.bam", + "file_size_bytes": 2590059661, + "type": "nmdc:DataObject", + "id": "nmdc:937d790bc414e5aa80c09a419b25dfe1", + "name": "pairedMapped_sorted.bam", + "data_object_type": "Assembly Coverage BAM" + }, + { + "_id": { + "$oid": "649b003d1ae706d7b5b15b57" + }, + "id": "nmdc:ad763856d102ea442c511ce8c2ac7641", + "name": "1781_100338.krona.html", + "description": "Gold:Gp0127636 KRONA plot HTML file", + "url": "https://data.microbiomedata.org/1781_100338/ReadbasedAnalysis/centrifuge/1781_100338.krona.html", + "file_size_bytes": 3385, + "data_object_type": "Centrifuge Krona Plot", + "type": "nmdc:DataObject" + }, + { + "_id": { + "$oid": "649b003d1ae706d7b5b15b5f" + }, + "id": "nmdc:6ea65689d5fe9ac44c9e0e415304f4e3", + "name": "1781_100338.json", + "description": "Gold:Gp0127636 ReadbasedAnalysis result JSON file", + "url": "https://data.microbiomedata.org/1781_100338/ReadbasedAnalysis/1781_100338.json", + "file_size_bytes": 3385, + "type": "nmdc:DataObject" + }, + { + "_id": { + "$oid": "649b003f1ae706d7b5b165f8" + }, + "id": "nmdc:09f9ddea688b24a3ffa3f858851f6011", + "name": "bins.unbinned.fa", + "description": "unbinned fasta file from metabat2 for gold:Gp0127636", + "file_size_bytes": 3427718, + "url": "https://data.microbiomedata.org/data/1781_100338/img_MAGs/bins.unbinned.fa", + "type": "nmdc:DataObject" + }, + { + "_id": { + "$oid": "649b003f1ae706d7b5b16622" + }, + "id": "nmdc:7c210c1bd10b2654864e4d571336a8b8", + "name": "bins.tooShort.fa", + "description": "tooShort (< 3kb) filtered contigs fasta file by metaBat2 for gold:Gp0127636", + "file_size_bytes": 34402553, + "url": "https://data.microbiomedata.org/data/1781_100338/img_MAGs/bins.tooShort.fa", + "type": "nmdc:DataObject" + }, + { + "_id": { + "$oid": "649b00401ae706d7b5b16cff" + }, + "description": "EC TSV File for gold:Gp0127636", + "url": "https://data.microbiomedata.org/1781_100338/img_annotation/Ga0482234_ec.tsv", + "md5_checksum": "80ec7d76d2509e6eeab61d092808908b", + "file_size_bytes": 3385, + "id": "nmdc:80ec7d76d2509e6eeab61d092808908b", + "name": "gold:Gp0127636_EC TSV File", + "data_object_type": "Annotation Enzyme Commission", + "type": "nmdc:DataObject" + }, + { + "_id": { + "$oid": "649b00401ae706d7b5b16d00" + }, + "description": "KO TSV File for gold:Gp0127636", + "url": "https://data.microbiomedata.org/1781_100338/img_annotation/Ga0482234_ko.tsv", + "md5_checksum": "d68e6d4245c33a73666148570aac9c10", + "file_size_bytes": 3385, + "id": "nmdc:d68e6d4245c33a73666148570aac9c10", + "name": "gold:Gp0127636_KO TSV File", + "data_object_type": "Annotation KEGG Orthology", + "type": "nmdc:DataObject" + }, + { + "_id": { + "$oid": "649b00401ae706d7b5b16d01" + }, + "description": "Protein FAA for gold:Gp0127636", + "url": "https://data.microbiomedata.org/1781_100338/img_annotation/Ga0482234_proteins.faa", + "md5_checksum": "66d9d6751efad0b8019a565488f950a5", + "file_size_bytes": 3385, + "id": "nmdc:66d9d6751efad0b8019a565488f950a5", + "name": "gold:Gp0127636_Protein FAA", + "data_object_type": "Annotation Amino Acid FASTA", + "type": "nmdc:DataObject" + }, + { + "_id": { + "$oid": "649b00401ae706d7b5b16d03" + }, + "description": "Structural annotation GFF file for gold:Gp0127636", + "url": "https://data.microbiomedata.org/1781_100338/img_annotation/Ga0482234_structural_annotation.gff", + "md5_checksum": "e3b57dff7ca37c0da6b7d4bfb4450d9c", + "file_size_bytes": 3385, + "id": "nmdc:e3b57dff7ca37c0da6b7d4bfb4450d9c", + "name": "gold:Gp0127636_Structural annotation GFF file", + "data_object_type": "Structural Annotation GFF", + "type": "nmdc:DataObject" + }, + { + "_id": { + "$oid": "649b00401ae706d7b5b16d04" + }, + "description": "Functional annotation GFF file for gold:Gp0127636", + "url": "https://data.microbiomedata.org/1781_100338/img_annotation/Ga0482234_functional_annotation.gff", + "md5_checksum": "31f8346eeca4b929a6c28686bb8b2043", + "file_size_bytes": 3385, + "id": "nmdc:31f8346eeca4b929a6c28686bb8b2043", + "name": "gold:Gp0127636_Functional annotation GFF file", + "data_object_type": "Functional Annotation GFF", + "type": "nmdc:DataObject" + } + ], + "mags_activity_set": [ + { + "_id": { + "$oid": "649b0052ec087f6bbab34709" + }, + "has_input": [ + "nmdc:36692b7b93756aaabd7f1f6259753c4e", + "nmdc:a24edc9ffd773c30cea8ea709988307a", + "nmdc:f61ed86592491b2d83b5893749e12406" + ], + "too_short_contig_num": 93687, + "part_of": [ + "nmdc:mga02tph39" + ], + "git_url": "https://github.com/microbiomedata/metaG/releases/tag/0.1", + "has_output": [ + "nmdc:d41d8cd98f00b204e9800998ecf8427e", + "nmdc:2d1e318b8b815a8a5487f23315d0fe02" + ], + "was_informed_by": "gold:Gp0127636", + "input_contig_num": 95606, + "id": "nmdc:3a0bb3c0ec1ec60e0487cb98f7f19a90", + "execution_resource": "NERSC-Cori", + "name": "MAGs Analysis Activity for nmdc:mga02tph39", + "unbinned_contig_num": 1919, + "started_at_time": "2021-10-11T02:23:42Z", + "type": "nmdc:MAGsAnalysisActivity", + "ended_at_time": "2021-11-13T18:49:37+00:00" + } + ], + "metagenome_annotation_activity_set": [ + { + "_id": { + "$oid": "649b005bbf2caae0415ef9ac" + }, + "has_input": [ + "nmdc:36692b7b93756aaabd7f1f6259753c4e" + ], + "part_of": [ + "nmdc:mga02tph39" + ], + "git_url": "https://github.com/microbiomedata/metaG/releases/tag/0.1", + "has_output": [ + "nmdc:a5d97f323fe7117cb38a2eea1f2246d2", + "nmdc:2b791fb3e2964d7808388b32086e0de2", + "nmdc:f61ed86592491b2d83b5893749e12406", + "nmdc:e983789bdc08364b00a000684062ed16", + "nmdc:3cd47d66b6e9006ff683a2eda168285f", + "nmdc:e056ee666e8001bdb6f790efb3394093", + "nmdc:2b90fcb7628c3ffa9e7a14a32612b7af", + "nmdc:4e2f1d4b2d20bfb0209a320a60c4aeac", + "nmdc:dd24a8b0f774555ac91e663416745428", + "nmdc:2e76b71475b854e2bf2d0aa15a53dd7d", + "nmdc:2f297176cd51b2ede33c313f713b40b1", + "nmdc:678a7af05a89d9d4f5f5d598dc2e3013" + ], + "was_informed_by": "gold:Gp0127636", + "id": "nmdc:3a0bb3c0ec1ec60e0487cb98f7f19a90", + "execution_resource": "NERSC-Cori", + "name": "Annotation Activity for nmdc:mga02tph39", + "started_at_time": "2021-10-11T02:23:42Z", + "type": "nmdc:MetagenomeAnnotationActivity", + "ended_at_time": "2021-11-13T18:49:37+00:00" + } + ], + "metagenome_assembly_set": [ + { + "_id": { + "$oid": "649b005f2ca5ee4adb139f91" + }, + "has_input": [ + "nmdc:e4f5675c728fd1896682eb669656b5d6" + ], + "part_of": [ + "nmdc:mga02tph39" + ], + "ctg_logsum": 36469, + "scaf_logsum": 36615, + "gap_pct": 0.00062, + "git_url": "https://github.com/microbiomedata/metaG/releases/tag/0.1", + "has_output": [ + "nmdc:36692b7b93756aaabd7f1f6259753c4e", + "nmdc:8d02adf1319d5b95c2abc6ed5b5c1683", + "nmdc:9830a711accd3a5ed899a2e616d0f4bf", + "nmdc:481fbd8cdeacd71e54a45c78d5decb36", + "nmdc:a24edc9ffd773c30cea8ea709988307a" + ], + "asm_score": 3.618, + "was_informed_by": "gold:Gp0127636", + "ctg_powsum": 3976.058, + "scaf_max": 23067, + "id": "nmdc:3a0bb3c0ec1ec60e0487cb98f7f19a90", + "scaf_powsum": 3993.143, + "execution_resource": "NERSC-Cori", + "contigs": 95606, + "name": "Assembly Activity for nmdc:mga02tph39", + "ctg_max": 23067, + "gc_std": 0.11099, + "gc_avg": 0.57474, + "contig_bp": 35573088, + "started_at_time": "2021-10-11T02:23:42Z", + "scaf_bp": 35573308, + "type": "nmdc:MetagenomeAssembly", + "scaffolds": 95584, + "ended_at_time": "2021-11-13T18:49:37+00:00", + "ctg_l50": 329, + "ctg_l90": 282, + "ctg_n50": 35238, + "ctg_n90": 83377, + "scaf_l50": 329, + "scaf_l90": 282, + "scaf_n50": 35220, + "scaf_n90": 83355 + } + ], + "omics_processing_set": [ + { + "_id": { + "$oid": "649b009773e8249959349b45" + }, + "id": "nmdc:omprc-11-k675bw84", + "name": "Riverbed sediment microbial communities from areas with no vegetation in Columbia River, Washington, USA - GW-RW N2_30_40", + "description": "Riverbed sediment samples were collected from an area with no vegetation in a hyporheic zone (subsurface groundwater - surface water mixing zone)", + "has_input": [ + "nmdc:bsm-11-rtf54942" + ], + "has_output": [ + "jgi:574fe09f7ded5e3df1ee1489" + ], + "part_of": [ + "nmdc:sty-11-aygzgv51" + ], + "add_date": "2016-01-11", + "mod_date": "2021-06-15", + "ncbi_project_name": "Riverbed sediment microbial communities from areas with no vegetation in Columbia River, Washington, USA - GW-RW N2_30_40", + "omics_type": { + "has_raw_value": "Metagenome" + }, + "principal_investigator": { + "has_raw_value": "James Stegen" + }, + "processing_institution": "JGI", + "type": "nmdc:OmicsProcessing", + "gold_sequencing_project_identifiers": [ + "gold:Gp0127636" + ] + } + ], + "read_qc_analysis_activity_set": [ + { + "_id": { + "$oid": "649b009d6bdd4fd20273c864" + }, + "has_input": [ + "nmdc:341830a5735c34968da2304bc27edd2a" + ], + "part_of": [ + "nmdc:mga02tph39" + ], + "git_url": "https://github.com/microbiomedata/metaG/releases/tag/0.1", + "has_output": [ + "nmdc:e4f5675c728fd1896682eb669656b5d6", + "nmdc:64f455185b1bc610a8d74a84ed12683f" + ], + "was_informed_by": "gold:Gp0127636", + "input_read_count": 31642056, + "output_read_bases": 4354491393, + "id": "nmdc:3a0bb3c0ec1ec60e0487cb98f7f19a90", + "execution_resource": "NERSC-Cori", + "input_read_bases": 4777950456, + "name": "Read QC Activity for nmdc:mga02tph39", + "output_read_count": 29115818, + "started_at_time": "2021-10-11T02:23:42Z", + "type": "nmdc:ReadQCAnalysisActivity", + "ended_at_time": "2021-11-13T18:49:37+00:00" + } + ], + "read_based_taxonomy_analysis_activity_set": [ + { + "_id": { + "$oid": "649b009bff710ae353f8cf26" + }, + "has_input": [ + "nmdc:e4f5675c728fd1896682eb669656b5d6" + ], + "git_url": "https://github.com/microbiomedata/metaG/releases/tag/0.1", + "has_output": [ + "nmdc:50d80a30d4ff113e36f6fd64b1f28547", + "nmdc:c2cd20a2011592a76397f49dc3acd6b7", + "nmdc:827ad863c875ea14473c9903d192fa73", + "nmdc:957074ca49765b22348e27b0133d8ba0", + "nmdc:9253645582296696cb33b11754832574", + "nmdc:9aef1d9e04acfe0b7fb1b9dc3b842912", + "nmdc:75180fce38f38a6307231b47a8d2b23b", + "nmdc:b4524a34937893768dbd3752068dee0c", + "nmdc:f1543441c59aaaf8ec52036a5bbbe3f4" + ], + "was_informed_by": "gold:Gp0127636", + "id": "nmdc:3a0bb3c0ec1ec60e0487cb98f7f19a90", + "execution_resource": "NERSC-Cori", + "name": "ReadBased Analysis Activity for nmdc:mga02tph39", + "started_at_time": "2021-10-11T02:23:42Z", + "type": "nmdc:ReadbasedAnalysis", + "ended_at_time": "2021-11-13T18:49:37+00:00" + } + ] + }, + { + "data_object_set": [ + { + "description": "Raw sequencer read data", + "file_size_bytes": 2620687542, + "type": "nmdc:DataObject", + "id": "jgi:574fe09c7ded5e3df1ee1487", + "name": "10533.3.165334.ACGGAAC-TGTTCCG.fastq.gz" + }, + { + "name": "Gp0127634_Filtered Reads", + "description": "Filtered Reads for Gp0127634", + "data_object_type": "Filtered Sequencing Reads", + "url": "https://data.microbiomedata.org/data/nmdc:mga0r0vf18/qa/nmdc_mga0r0vf18_filtered.fastq.gz", + "md5_checksum": "ac889627d813c8e34cfbf79a4264c590", + "id": "nmdc:ac889627d813c8e34cfbf79a4264c590", + "file_size_bytes": 2316462404 + }, + { + "name": "Gp0127634_Filtered Stats", + "description": "Filtered Stats for Gp0127634", + "data_object_type": "QC Statistics", + "url": "https://data.microbiomedata.org/data/nmdc:mga0r0vf18/qa/nmdc_mga0r0vf18_filterStats.txt", + "md5_checksum": "0dfd55be1779ae7922d80aa22034c9a1", + "id": "nmdc:0dfd55be1779ae7922d80aa22034c9a1", + "file_size_bytes": 291 + }, + { + "name": "Gp0127634_Gottcha2 TSV report", + "description": "Gottcha2 TSV report for Gp0127634", + "url": "https://data.microbiomedata.org/data/nmdc:mga0r0vf18/ReadbasedAnalysis/nmdc_mga0r0vf18_gottcha2_report.tsv", + "md5_checksum": "0526ea84f6e7893f5b6d62a32f81a199", + "id": "nmdc:0526ea84f6e7893f5b6d62a32f81a199", + "file_size_bytes": 4224 + }, + { + "name": "Gp0127634_Gottcha2 full TSV report", + "description": "Gottcha2 full TSV report for Gp0127634", + "url": "https://data.microbiomedata.org/data/nmdc:mga0r0vf18/ReadbasedAnalysis/nmdc_mga0r0vf18_gottcha2_report_full.tsv", + "md5_checksum": "1a7380f5adb59f36c98c840bf28ad4bd", + "id": "nmdc:1a7380f5adb59f36c98c840bf28ad4bd", + "file_size_bytes": 875501 + }, + { + "name": "Gp0127634_Gottcha2 Krona HTML report", + "description": "Gottcha2 Krona HTML report for Gp0127634", + "data_object_type": "GOTTCHA2 Krona Plot", + "url": "https://data.microbiomedata.org/data/nmdc:mga0r0vf18/ReadbasedAnalysis/nmdc_mga0r0vf18_gottcha2_krona.html", + "md5_checksum": "366ab38bb6de9591f31a086d42ac23d6", + "id": "nmdc:366ab38bb6de9591f31a086d42ac23d6", + "file_size_bytes": 238755 + }, + { + "name": "Gp0127634_Centrifuge classification TSV report", + "description": "Centrifuge classification TSV report for Gp0127634", + "data_object_type": "Centrifuge Taxonomic Classification", + "url": "https://data.microbiomedata.org/data/nmdc:mga0r0vf18/ReadbasedAnalysis/nmdc_mga0r0vf18_centrifuge_classification.tsv", + "md5_checksum": "c44ba44bc6910c2f3ed3a60a52b4a616", + "id": "nmdc:c44ba44bc6910c2f3ed3a60a52b4a616", + "file_size_bytes": 2051793471 + }, + { + "name": "Gp0127634_Centrifuge TSV report", + "description": "Centrifuge TSV report for Gp0127634", + "data_object_type": "Centrifuge Classification Report", + "url": "https://data.microbiomedata.org/data/nmdc:mga0r0vf18/ReadbasedAnalysis/nmdc_mga0r0vf18_centrifuge_report.tsv", + "md5_checksum": "0ca043b630ba304cb80603e8332c78cf", + "id": "nmdc:0ca043b630ba304cb80603e8332c78cf", + "file_size_bytes": 256560 + }, + { + "name": "Gp0127634_Centrifuge Krona HTML report", + "description": "Centrifuge Krona HTML report for Gp0127634", + "data_object_type": "Centrifuge Krona Plot", + "url": "https://data.microbiomedata.org/data/nmdc:mga0r0vf18/ReadbasedAnalysis/nmdc_mga0r0vf18_centrifuge_krona.html", + "md5_checksum": "059ff39ced52c0df45a331c4e9e10fdd", + "id": "nmdc:059ff39ced52c0df45a331c4e9e10fdd", + "file_size_bytes": 2334325 + }, + { + "name": "Gp0127634_Kraken classification TSV report", + "description": "Kraken classification TSV report for Gp0127634", + "data_object_type": "Kraken2 Taxonomic Classification", + "url": "https://data.microbiomedata.org/data/nmdc:mga0r0vf18/ReadbasedAnalysis/nmdc_mga0r0vf18_kraken2_classification.tsv", + "md5_checksum": "7bfa3b5b29ec5cf9882251585d99f9bf", + "id": "nmdc:7bfa3b5b29ec5cf9882251585d99f9bf", + "file_size_bytes": 1649071235 + }, + { + "name": "Gp0127634_Kraken2 TSV report", + "description": "Kraken2 TSV report for Gp0127634", + "data_object_type": "Kraken2 Classification Report", + "url": "https://data.microbiomedata.org/data/nmdc:mga0r0vf18/ReadbasedAnalysis/nmdc_mga0r0vf18_kraken2_report.tsv", + "md5_checksum": "2fceef0aaf1c3c3e3edd8d69bb72c8d3", + "id": "nmdc:2fceef0aaf1c3c3e3edd8d69bb72c8d3", + "file_size_bytes": 654782 + }, + { + "name": "Gp0127634_Kraken2 Krona HTML report", + "description": "Kraken2 Krona HTML report for Gp0127634", + "data_object_type": "Kraken2 Krona Plot", + "url": "https://data.microbiomedata.org/data/nmdc:mga0r0vf18/ReadbasedAnalysis/nmdc_mga0r0vf18_kraken2_krona.html", + "md5_checksum": "678e7c401a6971629f7d3ada83b307ab", + "id": "nmdc:678e7c401a6971629f7d3ada83b307ab", + "file_size_bytes": 3988988 + }, + { + "name": "Gp0127634_Assembled contigs fasta", + "description": "Assembled contigs fasta for Gp0127634", + "data_object_type": "Assembly Contigs", + "url": "https://data.microbiomedata.org/data/nmdc:mga0r0vf18/assembly/nmdc_mga0r0vf18_contigs.fna", + "md5_checksum": "2a30cf44cc596923301befc34edf6c0a", + "id": "nmdc:2a30cf44cc596923301befc34edf6c0a", + "file_size_bytes": 84939887 + }, + { + "name": "Gp0127634_Assembled scaffold fasta", + "description": "Assembled scaffold fasta for Gp0127634", + "data_object_type": "Assembly Scaffolds", + "url": "https://data.microbiomedata.org/data/nmdc:mga0r0vf18/assembly/nmdc_mga0r0vf18_scaffolds.fna", + "md5_checksum": "f147264a5a4a7eec4d68f05ab52ecc1d", + "id": "nmdc:f147264a5a4a7eec4d68f05ab52ecc1d", + "file_size_bytes": 84411544 + }, + { + "name": "Gp0127634_Metagenome Contig Coverage Stats", + "description": "Metagenome Contig Coverage Stats for Gp0127634", + "url": "https://data.microbiomedata.org/data/nmdc:mga0r0vf18/assembly/nmdc_mga0r0vf18_covstats.txt", + "md5_checksum": "9bd1b25df71c0a6f9ca408ddc045ffed", + "id": "nmdc:9bd1b25df71c0a6f9ca408ddc045ffed", + "file_size_bytes": 13895509 + }, + { + "name": "Gp0127634_Assembled AGP file", + "description": "Assembled AGP file for Gp0127634", + "data_object_type": "Assembly AGP", + "url": "https://data.microbiomedata.org/data/nmdc:mga0r0vf18/assembly/nmdc_mga0r0vf18_assembly.agp", + "md5_checksum": "825969095ff134b195b06a40fcc6089a", + "id": "nmdc:825969095ff134b195b06a40fcc6089a", + "file_size_bytes": 12985962 + }, + { + "name": "Gp0127634_Metagenome Alignment BAM file", + "description": "Metagenome Alignment BAM file for Gp0127634", + "data_object_type": "Assembly Coverage BAM", + "url": "https://data.microbiomedata.org/data/nmdc:mga0r0vf18/assembly/nmdc_mga0r0vf18_pairedMapped_sorted.bam", + "md5_checksum": "356d9ca409747590849dd894998166ee", + "id": "nmdc:356d9ca409747590849dd894998166ee", + "file_size_bytes": 2516463401 + }, + { + "name": "Gp0127634_Protein FAA", + "description": "Protein FAA for Gp0127634", + "data_object_type": "Annotation Amino Acid FASTA", + "url": "https://data.microbiomedata.org/data/nmdc:mga0r0vf18/annotation/nmdc_mga0r0vf18_proteins.faa", + "md5_checksum": "ca16203099dc1d6bbce00320bb753974", + "id": "nmdc:ca16203099dc1d6bbce00320bb753974", + "file_size_bytes": 49630516 + }, + { + "name": "Gp0127634_Structural annotation GFF file", + "description": "Structural annotation GFF file for Gp0127634", + "data_object_type": "Structural Annotation GFF", + "url": "https://data.microbiomedata.org/data/nmdc:mga0r0vf18/annotation/nmdc_mga0r0vf18_structural_annotation.gff", + "md5_checksum": "fffbb7b52a4886755df429e22a152427", + "id": "nmdc:fffbb7b52a4886755df429e22a152427", + "file_size_bytes": 2519 + }, + { + "name": "Gp0127634_Functional annotation GFF file", + "description": "Functional annotation GFF file for Gp0127634", + "data_object_type": "Functional Annotation GFF", + "url": "https://data.microbiomedata.org/data/nmdc:mga0r0vf18/annotation/nmdc_mga0r0vf18_functional_annotation.gff", + "md5_checksum": "f63b43e7797845fa94dc6f552ba1ea39", + "id": "nmdc:f63b43e7797845fa94dc6f552ba1ea39", + "file_size_bytes": 57589694 + }, + { + "name": "Gp0127634_KO TSV file", + "description": "KO TSV file for Gp0127634", + "data_object_type": "Annotation KEGG Orthology", + "url": "https://data.microbiomedata.org/data/nmdc:mga0r0vf18/annotation/nmdc_mga0r0vf18_ko.tsv", + "md5_checksum": "8ab8f39bfc76267daa4ce5a34811bff1", + "id": "nmdc:8ab8f39bfc76267daa4ce5a34811bff1", + "file_size_bytes": 6602379 + }, + { + "name": "Gp0127634_EC TSV file", + "description": "EC TSV file for Gp0127634", + "data_object_type": "Annotation Enzyme Commission", + "url": "https://data.microbiomedata.org/data/nmdc:mga0r0vf18/annotation/nmdc_mga0r0vf18_ec.tsv", + "md5_checksum": "d6ff8f2f0d5c77495b2b43a7020e5730", + "id": "nmdc:d6ff8f2f0d5c77495b2b43a7020e5730", + "file_size_bytes": 4399755 + }, + { + "name": "Gp0127634_COG GFF file", + "description": "COG GFF file for Gp0127634", + "url": "https://data.microbiomedata.org/data/nmdc:mga0r0vf18/annotation/nmdc_mga0r0vf18_cog.gff", + "md5_checksum": "763d16c5dbadbeba61ceee91ed5209f3", + "id": "nmdc:763d16c5dbadbeba61ceee91ed5209f3", + "file_size_bytes": 33737036 + }, + { + "name": "Gp0127634_PFAM GFF file", + "description": "PFAM GFF file for Gp0127634", + "url": "https://data.microbiomedata.org/data/nmdc:mga0r0vf18/annotation/nmdc_mga0r0vf18_pfam.gff", + "md5_checksum": "52cba722f402eea06fda75ec1e5a5103", + "id": "nmdc:52cba722f402eea06fda75ec1e5a5103", + "file_size_bytes": 24757263 + }, + { + "name": "Gp0127634_TigrFam GFF file", + "description": "TigrFam GFF file for Gp0127634", + "url": "https://data.microbiomedata.org/data/nmdc:mga0r0vf18/annotation/nmdc_mga0r0vf18_tigrfam.gff", + "md5_checksum": "ad358ce4b479febc34a2acdd9f249517", + "id": "nmdc:ad358ce4b479febc34a2acdd9f249517", + "file_size_bytes": 2661782 + }, + { + "name": "Gp0127634_SMART GFF file", + "description": "SMART GFF file for Gp0127634", + "url": "https://data.microbiomedata.org/data/nmdc:mga0r0vf18/annotation/nmdc_mga0r0vf18_smart.gff", + "md5_checksum": "10a0ca82cf662ac4d9b465f05ed1fb2b", + "id": "nmdc:10a0ca82cf662ac4d9b465f05ed1fb2b", + "file_size_bytes": 7506881 + }, + { + "name": "Gp0127634_SuperFam GFF file", + "description": "SuperFam GFF file for Gp0127634", + "url": "https://data.microbiomedata.org/data/nmdc:mga0r0vf18/annotation/nmdc_mga0r0vf18_supfam.gff", + "md5_checksum": "d0e8459e010015e726c31f0f8c18d359", + "id": "nmdc:d0e8459e010015e726c31f0f8c18d359", + "file_size_bytes": 42013513 + }, + { + "name": "Gp0127634_Cath FunFam GFF file", + "description": "Cath FunFam GFF file for Gp0127634", + "url": "https://data.microbiomedata.org/data/nmdc:mga0r0vf18/annotation/nmdc_mga0r0vf18_cath_funfam.gff", + "md5_checksum": "41d7ca149efb4c12bce48e5a19649a84", + "id": "nmdc:41d7ca149efb4c12bce48e5a19649a84", + "file_size_bytes": 31747110 + }, + { + "name": "Gp0127634_KO_EC GFF file", + "description": "KO_EC GFF file for Gp0127634", + "url": "https://data.microbiomedata.org/data/nmdc:mga0r0vf18/annotation/nmdc_mga0r0vf18_ko_ec.gff", + "md5_checksum": "9da1883e60979e17665b0211198c35f0", + "id": "nmdc:9da1883e60979e17665b0211198c35f0", + "file_size_bytes": 20999001 + }, + { + "name": "gold:Gp0452679_metabat2 bin checkm quality assessment result", + "description": "metabat2 bin checkm quality assessment result for gold:Gp0452679", + "data_object_type": "CheckM Statistics", + "url": "https://data.microbiomedata.org/data/nmdc:mga0fsjy84/MAGs/nmdc_mga0fsjy84_checkm_qa.out", + "md5_checksum": "d41d8cd98f00b204e9800998ecf8427e", + "id": "nmdc:d41d8cd98f00b204e9800998ecf8427e", + "file_size_bytes": 0 + }, + { + "name": "Gp0127634_tooShort (< 3kb) filtered contigs fasta file by metaBat2", + "description": "tooShort (< 3kb) filtered contigs fasta file by metaBat2 for Gp0127634", + "url": "https://data.microbiomedata.org/data/nmdc:mga0r0vf18/MAGs/nmdc_mga0r0vf18_bins.tooShort.fa", + "md5_checksum": "3c8eadbcf4f583090d8f378ea6758799", + "id": "nmdc:3c8eadbcf4f583090d8f378ea6758799", + "file_size_bytes": 71683990 + }, + { + "name": "Gp0127634_unbinned fasta file from metabat2", + "description": "unbinned fasta file from metabat2 for Gp0127634", + "url": "https://data.microbiomedata.org/data/nmdc:mga0r0vf18/MAGs/nmdc_mga0r0vf18_bins.unbinned.fa", + "md5_checksum": "1be647dc835ee8fe666fe9893266bd21", + "id": "nmdc:1be647dc835ee8fe666fe9893266bd21", + "file_size_bytes": 11353478 + }, + { + "name": "Gp0127634_metabat2 bin checkm quality assessment result", + "description": "metabat2 bin checkm quality assessment result for Gp0127634", + "data_object_type": "CheckM Statistics", + "url": "https://data.microbiomedata.org/data/nmdc:mga0r0vf18/MAGs/nmdc_mga0r0vf18_checkm_qa.out", + "md5_checksum": "6cc278c455cafc691333c0a74fe6c540", + "id": "nmdc:6cc278c455cafc691333c0a74fe6c540", + "file_size_bytes": 936 + }, + { + "name": "Gp0127634_high-quality and medium-quality bins", + "description": "high-quality and medium-quality bins for Gp0127634", + "data_object_type": "Metagenome Bins", + "url": "https://data.microbiomedata.org/data/nmdc:mga0r0vf18/MAGs/nmdc_mga0r0vf18_hqmq_bin.zip", + "md5_checksum": "de4d0180489bdaa5526977508a489b99", + "id": "nmdc:de4d0180489bdaa5526977508a489b99", + "file_size_bytes": 518340 + }, + { + "name": "Gp0127634_metabat2 bins", + "description": "metabat2 bins for Gp0127634", + "url": "https://data.microbiomedata.org/data/nmdc:mga0r0vf18/MAGs/nmdc_mga0r0vf18_metabat_bin.zip", + "md5_checksum": "16a08c4a3a6e9c70a5d47209177d0e60", + "id": "nmdc:16a08c4a3a6e9c70a5d47209177d0e60", + "file_size_bytes": 63768 + }, + { + "_id": { + "$oid": "649b003d1ae706d7b5b14e74" + }, + "description": "Assembled AGP file for gold:Gp0127634", + "url": "https://data.microbiomedata.org/data/1781_100336/assembly/assembly.agp", + "file_size_bytes": 11578650, + "type": "nmdc:DataObject", + "id": "nmdc:38fc6fb4189df6c3ba567ce6e9eb0492", + "name": "assembly.agp", + "data_object_type": "Assembly AGP" + }, + { + "_id": { + "$oid": "649b003d1ae706d7b5b14e75" + }, + "description": "Assembled scaffold fasta for gold:Gp0127634", + "url": "https://data.microbiomedata.org/data/1781_100336/assembly/assembly_scaffolds.fna", + "file_size_bytes": 83708608, + "type": "nmdc:DataObject", + "id": "nmdc:3c3519053f5bf24ac5faa2db3b9c258b", + "name": "assembly_scaffolds.fna", + "data_object_type": "Assembly Scaffolds" + }, + { + "_id": { + "$oid": "649b003d1ae706d7b5b14e76" + }, + "description": "Metagenome Alignment BAM file for gold:Gp0127634", + "url": "https://data.microbiomedata.org/data/1781_100336/assembly/pairedMapped_sorted.bam", + "file_size_bytes": 2483793009, + "type": "nmdc:DataObject", + "id": "nmdc:ac6c2b405bcedaa830f122f55b389245", + "name": "pairedMapped_sorted.bam", + "data_object_type": "Assembly Coverage BAM" + }, + { + "_id": { + "$oid": "649b003d1ae706d7b5b14e77" + }, + "description": "Metagenome Contig Coverage Stats for gold:Gp0127634", + "url": "https://data.microbiomedata.org/data/1781_100336/assembly/mapping_stats.txt", + "file_size_bytes": 13192221, + "type": "nmdc:DataObject", + "id": "nmdc:6783fd1b5292dc59fa04a4f20725b721", + "name": "mapping_stats.txt", + "data_object_type": "Assembly Coverage Stats" + }, + { + "_id": { + "$oid": "649b003d1ae706d7b5b14e78" + }, + "description": "Assembled contigs fasta for gold:Gp0127634", + "url": "https://data.microbiomedata.org/data/1781_100336/assembly/assembly_contigs.fna", + "file_size_bytes": 84236599, + "type": "nmdc:DataObject", + "id": "nmdc:b502e282cb52690232ce6ec6e1cfd4bc", + "name": "assembly_contigs.fna", + "data_object_type": "Assembly Contigs" + }, + { + "_id": { + "$oid": "649b003d1ae706d7b5b15b40" + }, + "id": "nmdc:613dbb558d3cf5ece974268c1b0b1243", + "name": "1781_100336.krona.html", + "description": "Gold:Gp0127634 KRONA plot HTML file", + "url": "https://data.microbiomedata.org/1781_100336/ReadbasedAnalysis/centrifuge/1781_100336.krona.html", + "file_size_bytes": 3385, + "data_object_type": "Centrifuge Krona Plot", + "type": "nmdc:DataObject" + }, + { + "_id": { + "$oid": "649b003d1ae706d7b5b15b4b" + }, + "id": "nmdc:009de2a4d412df442a83e43028aed210", + "name": "1781_100336.json", + "description": "Gold:Gp0127634 ReadbasedAnalysis result JSON file", + "url": "https://data.microbiomedata.org/1781_100336/ReadbasedAnalysis/1781_100336.json", + "file_size_bytes": 3385, + "type": "nmdc:DataObject" + }, + { + "_id": { + "$oid": "649b003f1ae706d7b5b165e5" + }, + "id": "nmdc:941f2a63752dd68925387a6dde7bd88a", + "name": "bins.tooShort.fa", + "description": "tooShort (< 3kb) filtered contigs fasta file by metaBat2 for gold:Gp0127634", + "file_size_bytes": 69587941, + "url": "https://data.microbiomedata.org/data/1781_100336/img_MAGs/bins.tooShort.fa", + "type": "nmdc:DataObject" + }, + { + "_id": { + "$oid": "649b003f1ae706d7b5b165e9" + }, + "id": "nmdc:77e41ad1f4836947b39a43f7ea971076", + "name": "bins.unbinned.fa", + "description": "unbinned fasta file from metabat2 for gold:Gp0127634", + "file_size_bytes": 12216952, + "url": "https://data.microbiomedata.org/data/1781_100336/img_MAGs/bins.unbinned.fa", + "type": "nmdc:DataObject" + }, + { + "_id": { + "$oid": "649b003f1ae706d7b5b165ec" + }, + "id": "nmdc:aba84545a4941088387ffe076be49a4d", + "name": "gold:Gp0127634.bins.3.fa", + "description": "metabat2 binned contig file for gold:Gp0127634", + "file_size_bytes": 367898, + "url": "https://data.microbiomedata.org/data/1781_100336/img_MAGs/metabat-bins/bins.3.fa", + "type": "nmdc:DataObject", + "data_object_type": "Metagenome Bins" + }, + { + "_id": { + "$oid": "649b003f1ae706d7b5b165ed" + }, + "id": "nmdc:3713d982c02d72fba230a0d408598a1f", + "name": "gold:Gp0127634.bins.1.fa", + "description": "metabat2 binned contig file for gold:Gp0127634", + "file_size_bytes": 272314, + "url": "https://data.microbiomedata.org/data/1781_100336/img_MAGs/metabat-bins/bins.1.fa", + "type": "nmdc:DataObject", + "data_object_type": "Metagenome Bins" + }, + { + "_id": { + "$oid": "649b003f1ae706d7b5b165ef" + }, + "id": "nmdc:29c82fc14dfa2a0f5dc057d234ac6c5a", + "name": "checkm_qa.out", + "description": "metabat2 bin checkm quality assessment result for gold:Gp0127634", + "file_size_bytes": 1071, + "url": "https://data.microbiomedata.org/data/1781_100336/img_MAGs/checkm_qa.out", + "type": "nmdc:DataObject", + "data_object_type": "CheckM Statistics" + }, + { + "_id": { + "$oid": "649b003f1ae706d7b5b165f0" + }, + "id": "nmdc:0e28d59da33c112a00b6793a19b71189", + "name": "gold:Gp0127634.bins.2.fa", + "description": "metabat2 binned contig file for gold:Gp0127634", + "file_size_bytes": 320360, + "url": "https://data.microbiomedata.org/data/1781_100336/img_MAGs/metabat-bins/bins.2.fa", + "type": "nmdc:DataObject", + "data_object_type": "Metagenome Bins" + }, + { + "_id": { + "$oid": "649b00401ae706d7b5b16cef" + }, + "description": "Structural annotation GFF file for gold:Gp0127634", + "url": "https://data.microbiomedata.org/1781_100336/img_annotation/Ga0482236_structural_annotation.gff", + "md5_checksum": "1e286398d6b164538bbdefb9cc8a41e9", + "file_size_bytes": 3385, + "id": "nmdc:1e286398d6b164538bbdefb9cc8a41e9", + "name": "gold:Gp0127634_Structural annotation GFF file", + "data_object_type": "Structural Annotation GFF", + "type": "nmdc:DataObject" + }, + { + "_id": { + "$oid": "649b00401ae706d7b5b16cf1" + }, + "description": "EC TSV File for gold:Gp0127634", + "url": "https://data.microbiomedata.org/1781_100336/img_annotation/Ga0482236_ec.tsv", + "md5_checksum": "01b078b5b9dde5699e9b9ab02af272df", + "file_size_bytes": 3385, + "id": "nmdc:01b078b5b9dde5699e9b9ab02af272df", + "name": "gold:Gp0127634_EC TSV File", + "data_object_type": "Annotation Enzyme Commission", + "type": "nmdc:DataObject" + }, + { + "_id": { + "$oid": "649b00401ae706d7b5b16cf4" + }, + "description": "Protein FAA for gold:Gp0127634", + "url": "https://data.microbiomedata.org/1781_100336/img_annotation/Ga0482236_proteins.faa", + "md5_checksum": "3374b8708ae6b77b16cd01ce4f33ee72", + "file_size_bytes": 3385, + "id": "nmdc:3374b8708ae6b77b16cd01ce4f33ee72", + "name": "gold:Gp0127634_Protein FAA", + "data_object_type": "Annotation Amino Acid FASTA", + "type": "nmdc:DataObject" + }, + { + "_id": { + "$oid": "649b00401ae706d7b5b16cf9" + }, + "description": "KO TSV File for gold:Gp0127634", + "url": "https://data.microbiomedata.org/1781_100336/img_annotation/Ga0482236_ko.tsv", + "md5_checksum": "5b2ff10d97d2b516716a67dafb137937", + "file_size_bytes": 3385, + "id": "nmdc:5b2ff10d97d2b516716a67dafb137937", + "name": "gold:Gp0127634_KO TSV File", + "data_object_type": "Annotation KEGG Orthology", + "type": "nmdc:DataObject" + }, + { + "_id": { + "$oid": "649b00401ae706d7b5b16cfb" + }, + "description": "Functional annotation GFF file for gold:Gp0127634", + "url": "https://data.microbiomedata.org/1781_100336/img_annotation/Ga0482236_functional_annotation.gff", + "md5_checksum": "803451414e1935d4de9f9911963efe8d", + "file_size_bytes": 3385, + "id": "nmdc:803451414e1935d4de9f9911963efe8d", + "name": "gold:Gp0127634_Functional annotation GFF file", + "data_object_type": "Functional Annotation GFF", + "type": "nmdc:DataObject" + } + ], + "mags_activity_set": [ + { + "_id": { + "$oid": "649b0052ec087f6bbab34713" + }, + "has_input": [ + "nmdc:2a30cf44cc596923301befc34edf6c0a", + "nmdc:356d9ca409747590849dd894998166ee", + "nmdc:f63b43e7797845fa94dc6f552ba1ea39" + ], + "too_short_contig_num": 168596, + "part_of": [ + "nmdc:mga0r0vf18" + ], + "binned_contig_num": 278, + "git_url": "https://github.com/microbiomedata/metaG/releases/tag/0.1", + "has_output": [ + "nmdc:d41d8cd98f00b204e9800998ecf8427e", + "nmdc:3c8eadbcf4f583090d8f378ea6758799", + "nmdc:1be647dc835ee8fe666fe9893266bd21", + "nmdc:6cc278c455cafc691333c0a74fe6c540", + "nmdc:de4d0180489bdaa5526977508a489b99", + "nmdc:16a08c4a3a6e9c70a5d47209177d0e60" + ], + "was_informed_by": "gold:Gp0127634", + "input_contig_num": 175822, + "id": "nmdc:d6415e0c3e4f9b8702c1ae98c6fb2f48", + "execution_resource": "NERSC-Cori", + "name": "MAGs Analysis Activity for nmdc:mga0r0vf18", + "mags_list": [ + { + "number_of_contig": 235, + "completeness": 68.28, + "bin_name": "bins.1", + "gene_count": 2056, + "bin_quality": "MQ", + "gtdbtk_species": "", + "gtdbtk_order": "Nitrososphaerales", + "num_16s": 0, + "gtdbtk_family": "Nitrososphaeraceae", + "gtdbtk_domain": "Archaea", + "contamination": 2.91, + "gtdbtk_class": "Nitrososphaeria", + "gtdbtk_phylum": "Crenarchaeota", + "num_5s": 1, + "num_23s": 0, + "gtdbtk_genus": "", + "num_t_rna": 34 + }, + { + "number_of_contig": 43, + "completeness": 10.69, + "bin_name": "bins.2", + "gene_count": 247, + "bin_quality": "LQ", + "gtdbtk_species": "", + "gtdbtk_order": "", + "num_16s": 0, + "gtdbtk_family": "", + "gtdbtk_domain": "", + "contamination": 0.0, + "gtdbtk_class": "", + "gtdbtk_phylum": "", + "num_5s": 0, + "num_23s": 0, + "gtdbtk_genus": "", + "num_t_rna": 4 + } + ], + "unbinned_contig_num": 6948, + "started_at_time": "2021-10-11T02:23:30Z", + "type": "nmdc:MAGsAnalysisActivity", + "ended_at_time": "2021-10-11T04:49:55+00:00" + } + ], + "metagenome_annotation_activity_set": [ + { + "_id": { + "$oid": "649b005bbf2caae0415ef9b3" + }, + "has_input": [ + "nmdc:2a30cf44cc596923301befc34edf6c0a" + ], + "part_of": [ + "nmdc:mga0r0vf18" + ], + "git_url": "https://github.com/microbiomedata/metaG/releases/tag/0.1", + "has_output": [ + "nmdc:ca16203099dc1d6bbce00320bb753974", + "nmdc:fffbb7b52a4886755df429e22a152427", + "nmdc:f63b43e7797845fa94dc6f552ba1ea39", + "nmdc:8ab8f39bfc76267daa4ce5a34811bff1", + "nmdc:d6ff8f2f0d5c77495b2b43a7020e5730", + "nmdc:763d16c5dbadbeba61ceee91ed5209f3", + "nmdc:52cba722f402eea06fda75ec1e5a5103", + "nmdc:ad358ce4b479febc34a2acdd9f249517", + "nmdc:10a0ca82cf662ac4d9b465f05ed1fb2b", + "nmdc:d0e8459e010015e726c31f0f8c18d359", + "nmdc:41d7ca149efb4c12bce48e5a19649a84", + "nmdc:9da1883e60979e17665b0211198c35f0" + ], + "was_informed_by": "gold:Gp0127634", + "id": "nmdc:d6415e0c3e4f9b8702c1ae98c6fb2f48", + "execution_resource": "NERSC-Cori", + "name": "Annotation Activity for nmdc:mga0r0vf18", + "started_at_time": "2021-10-11T02:23:30Z", + "type": "nmdc:MetagenomeAnnotationActivity", + "ended_at_time": "2021-10-11T04:49:55+00:00" + } + ], + "metagenome_assembly_set": [ + { + "_id": { + "$oid": "649b005f2ca5ee4adb139f9c" + }, + "has_input": [ + "nmdc:ac889627d813c8e34cfbf79a4264c590" + ], + "part_of": [ + "nmdc:mga0r0vf18" + ], + "ctg_logsum": 142091, + "scaf_logsum": 142614, + "gap_pct": 0.00138, + "git_url": "https://github.com/microbiomedata/metaG/releases/tag/0.1", + "has_output": [ + "nmdc:2a30cf44cc596923301befc34edf6c0a", + "nmdc:f147264a5a4a7eec4d68f05ab52ecc1d", + "nmdc:9bd1b25df71c0a6f9ca408ddc045ffed", + "nmdc:825969095ff134b195b06a40fcc6089a", + "nmdc:356d9ca409747590849dd894998166ee" + ], + "asm_score": 5.751, + "was_informed_by": "gold:Gp0127634", + "ctg_powsum": 15837, + "scaf_max": 33833, + "id": "nmdc:d6415e0c3e4f9b8702c1ae98c6fb2f48", + "scaf_powsum": 15897, + "execution_resource": "NERSC-Cori", + "contigs": 175824, + "name": "Assembly Activity for nmdc:mga0r0vf18", + "ctg_max": 33833, + "gc_std": 0.09424, + "contig_bp": 78219291, + "gc_avg": 0.62214, + "started_at_time": "2021-10-11T02:23:30Z", + "scaf_bp": 78220371, + "type": "nmdc:MetagenomeAssembly", + "scaffolds": 175734, + "ended_at_time": "2021-10-11T04:49:55+00:00", + "ctg_l50": 412, + "ctg_l90": 286, + "ctg_n50": 53340, + "ctg_n90": 150131, + "scaf_l50": 412, + "scaf_l90": 286, + "scaf_n50": 53321, + "scaf_n90": 150048 + } + ], + "omics_processing_set": [ + { + "_id": { + "$oid": "649b009773e8249959349b46" + }, + "id": "nmdc:omprc-11-mbv2jc69", + "name": "Riverbed sediment microbial communities from areas with no vegetation in Columbia River, Washington, USA - GW-RW N2_50_60", + "description": "Riverbed sediment samples were collected from an area with no vegetation in a hyporheic zone (subsurface groundwater - surface water mixing zone)", + "has_input": [ + "nmdc:bsm-11-jdgzjq31" + ], + "has_output": [ + "jgi:574fe09c7ded5e3df1ee1487" + ], + "part_of": [ + "nmdc:sty-11-aygzgv51" + ], + "add_date": "2016-01-11", + "mod_date": "2021-06-15", + "ncbi_project_name": "Riverbed sediment microbial communities from areas with no vegetation in Columbia River, Washington, USA - GW-RW N2_50_60", + "omics_type": { + "has_raw_value": "Metagenome" + }, + "principal_investigator": { + "has_raw_value": "James Stegen" + }, + "processing_institution": "JGI", + "type": "nmdc:OmicsProcessing", + "gold_sequencing_project_identifiers": [ + "gold:Gp0127634" + ] + } + ], + "read_qc_analysis_activity_set": [ + { + "_id": { + "$oid": "649b009d6bdd4fd20273c86a" + }, + "has_input": [ + "nmdc:2b7712d32a159eca66fc50936de000a5" + ], + "part_of": [ + "nmdc:mga0r0vf18" + ], + "git_url": "https://github.com/microbiomedata/metaG/releases/tag/0.1", + "has_output": [ + "nmdc:ac889627d813c8e34cfbf79a4264c590", + "nmdc:0dfd55be1779ae7922d80aa22034c9a1" + ], + "was_informed_by": "gold:Gp0127634", + "input_read_count": 29872658, + "output_read_bases": 4172764161, + "id": "nmdc:d6415e0c3e4f9b8702c1ae98c6fb2f48", + "execution_resource": "NERSC-Cori", + "input_read_bases": 4510771358, + "name": "Read QC Activity for nmdc:mga0r0vf18", + "output_read_count": 27896694, + "started_at_time": "2021-10-11T02:23:30Z", + "type": "nmdc:ReadQCAnalysisActivity", + "ended_at_time": "2021-10-11T04:49:55+00:00" + } + ], + "read_based_taxonomy_analysis_activity_set": [ + { + "_id": { + "$oid": "649b009bff710ae353f8cf2d" + }, + "has_input": [ + "nmdc:ac889627d813c8e34cfbf79a4264c590" + ], + "git_url": "https://github.com/microbiomedata/metaG/releases/tag/0.1", + "has_output": [ + "nmdc:0526ea84f6e7893f5b6d62a32f81a199", + "nmdc:1a7380f5adb59f36c98c840bf28ad4bd", + "nmdc:366ab38bb6de9591f31a086d42ac23d6", + "nmdc:c44ba44bc6910c2f3ed3a60a52b4a616", + "nmdc:0ca043b630ba304cb80603e8332c78cf", + "nmdc:059ff39ced52c0df45a331c4e9e10fdd", + "nmdc:7bfa3b5b29ec5cf9882251585d99f9bf", + "nmdc:2fceef0aaf1c3c3e3edd8d69bb72c8d3", + "nmdc:678e7c401a6971629f7d3ada83b307ab" + ], + "was_informed_by": "gold:Gp0127634", + "id": "nmdc:d6415e0c3e4f9b8702c1ae98c6fb2f48", + "execution_resource": "NERSC-Cori", + "name": "ReadBased Analysis Activity for nmdc:mga0r0vf18", + "started_at_time": "2021-10-11T02:23:30Z", + "type": "nmdc:ReadbasedAnalysis", + "ended_at_time": "2021-10-11T04:49:55+00:00" + } + ] + }, + { + "data_object_set": [ + { + "description": "Raw sequencer read data", + "file_size_bytes": 2197847748, + "type": "nmdc:DataObject", + "id": "jgi:574fde607ded5e3df1ee1403", + "name": "10533.1.165310.GTTCGGT-AACCGAA.fastq.gz" + }, + { + "name": "Gp0127635_Filtered Reads", + "description": "Filtered Reads for Gp0127635", + "data_object_type": "Filtered Sequencing Reads", + "url": "https://data.microbiomedata.org/data/nmdc:mga0ak4p20/qa/nmdc_mga0ak4p20_filtered.fastq.gz", + "md5_checksum": "f8bc16e232f7ba0f6d6b5ca35a708c36", + "id": "nmdc:f8bc16e232f7ba0f6d6b5ca35a708c36", + "file_size_bytes": 1951049105 + }, + { + "name": "Gp0127635_Filtered Stats", + "description": "Filtered Stats for Gp0127635", + "data_object_type": "QC Statistics", + "url": "https://data.microbiomedata.org/data/nmdc:mga0ak4p20/qa/nmdc_mga0ak4p20_filterStats.txt", + "md5_checksum": "fbc260443529d6e8067efdac3b58a8c1", + "id": "nmdc:fbc260443529d6e8067efdac3b58a8c1", + "file_size_bytes": 280 + }, + { + "name": "Gp0127635_Gottcha2 TSV report", + "description": "Gottcha2 TSV report for Gp0127635", + "url": "https://data.microbiomedata.org/data/nmdc:mga0ak4p20/ReadbasedAnalysis/nmdc_mga0ak4p20_gottcha2_report.tsv", + "md5_checksum": "d8a410c52c8f6cf0097b674492cc3926", + "id": "nmdc:d8a410c52c8f6cf0097b674492cc3926", + "file_size_bytes": 3696 + }, + { + "name": "Gp0127635_Gottcha2 full TSV report", + "description": "Gottcha2 full TSV report for Gp0127635", + "url": "https://data.microbiomedata.org/data/nmdc:mga0ak4p20/ReadbasedAnalysis/nmdc_mga0ak4p20_gottcha2_report_full.tsv", + "md5_checksum": "ddec46781153da60da815c65871f5413", + "id": "nmdc:ddec46781153da60da815c65871f5413", + "file_size_bytes": 677459 + }, + { + "name": "Gp0127635_Gottcha2 Krona HTML report", + "description": "Gottcha2 Krona HTML report for Gp0127635", + "data_object_type": "GOTTCHA2 Krona Plot", + "url": "https://data.microbiomedata.org/data/nmdc:mga0ak4p20/ReadbasedAnalysis/nmdc_mga0ak4p20_gottcha2_krona.html", + "md5_checksum": "e626ec18dba4885613240927cbb99d8b", + "id": "nmdc:e626ec18dba4885613240927cbb99d8b", + "file_size_bytes": 236164 + }, + { + "name": "Gp0127635_Centrifuge classification TSV report", + "description": "Centrifuge classification TSV report for Gp0127635", + "data_object_type": "Centrifuge Taxonomic Classification", + "url": "https://data.microbiomedata.org/data/nmdc:mga0ak4p20/ReadbasedAnalysis/nmdc_mga0ak4p20_centrifuge_classification.tsv", + "md5_checksum": "f8486e4ee029038a452a3484db10cabc", + "id": "nmdc:f8486e4ee029038a452a3484db10cabc", + "file_size_bytes": 1796179546 + }, + { + "name": "Gp0127635_Centrifuge TSV report", + "description": "Centrifuge TSV report for Gp0127635", + "data_object_type": "Centrifuge Classification Report", + "url": "https://data.microbiomedata.org/data/nmdc:mga0ak4p20/ReadbasedAnalysis/nmdc_mga0ak4p20_centrifuge_report.tsv", + "md5_checksum": "4121f2ec52b80b7feb9d9a4749080125", + "id": "nmdc:4121f2ec52b80b7feb9d9a4749080125", + "file_size_bytes": 254661 + }, + { + "name": "Gp0127635_Centrifuge Krona HTML report", + "description": "Centrifuge Krona HTML report for Gp0127635", + "data_object_type": "Centrifuge Krona Plot", + "url": "https://data.microbiomedata.org/data/nmdc:mga0ak4p20/ReadbasedAnalysis/nmdc_mga0ak4p20_centrifuge_krona.html", + "md5_checksum": "5b8c1cd8ba47041c20d3e18cb902a854", + "id": "nmdc:5b8c1cd8ba47041c20d3e18cb902a854", + "file_size_bytes": 2333534 + }, + { + "name": "Gp0127635_Kraken classification TSV report", + "description": "Kraken classification TSV report for Gp0127635", + "data_object_type": "Kraken2 Taxonomic Classification", + "url": "https://data.microbiomedata.org/data/nmdc:mga0ak4p20/ReadbasedAnalysis/nmdc_mga0ak4p20_kraken2_classification.tsv", + "md5_checksum": "59807dae5216b11c96df5593a26d9a88", + "id": "nmdc:59807dae5216b11c96df5593a26d9a88", + "file_size_bytes": 1432249556 + }, + { + "name": "Gp0127635_Kraken2 TSV report", + "description": "Kraken2 TSV report for Gp0127635", + "data_object_type": "Kraken2 Classification Report", + "url": "https://data.microbiomedata.org/data/nmdc:mga0ak4p20/ReadbasedAnalysis/nmdc_mga0ak4p20_kraken2_report.tsv", + "md5_checksum": "a491f6797bd7294dbc5ba301efb3466e", + "id": "nmdc:a491f6797bd7294dbc5ba301efb3466e", + "file_size_bytes": 639738 + }, + { + "name": "Gp0127635_Kraken2 Krona HTML report", + "description": "Kraken2 Krona HTML report for Gp0127635", + "data_object_type": "Kraken2 Krona Plot", + "url": "https://data.microbiomedata.org/data/nmdc:mga0ak4p20/ReadbasedAnalysis/nmdc_mga0ak4p20_kraken2_krona.html", + "md5_checksum": "6748020214a3d68ad588e3548107208e", + "id": "nmdc:6748020214a3d68ad588e3548107208e", + "file_size_bytes": 3996293 + }, + { + "name": "Gp0127635_Assembled contigs fasta", + "description": "Assembled contigs fasta for Gp0127635", + "data_object_type": "Assembly Contigs", + "url": "https://data.microbiomedata.org/data/nmdc:mga0ak4p20/assembly/nmdc_mga0ak4p20_contigs.fna", + "md5_checksum": "3d1b5043e0c49ac6062aeba4ebbba910", + "id": "nmdc:3d1b5043e0c49ac6062aeba4ebbba910", + "file_size_bytes": 111964628 + }, + { + "name": "Gp0127635_Assembled scaffold fasta", + "description": "Assembled scaffold fasta for Gp0127635", + "data_object_type": "Assembly Scaffolds", + "url": "https://data.microbiomedata.org/data/nmdc:mga0ak4p20/assembly/nmdc_mga0ak4p20_scaffolds.fna", + "md5_checksum": "4d4497f63f95f7d2f8986178dab3ae52", + "id": "nmdc:4d4497f63f95f7d2f8986178dab3ae52", + "file_size_bytes": 111342667 + }, + { + "name": "Gp0127635_Metagenome Contig Coverage Stats", + "description": "Metagenome Contig Coverage Stats for Gp0127635", + "url": "https://data.microbiomedata.org/data/nmdc:mga0ak4p20/assembly/nmdc_mga0ak4p20_covstats.txt", + "md5_checksum": "ac98d3d128ec5b045a9ef019a5653b99", + "id": "nmdc:ac98d3d128ec5b045a9ef019a5653b99", + "file_size_bytes": 16397988 + }, + { + "name": "Gp0127635_Assembled AGP file", + "description": "Assembled AGP file for Gp0127635", + "data_object_type": "Assembly AGP", + "url": "https://data.microbiomedata.org/data/nmdc:mga0ak4p20/assembly/nmdc_mga0ak4p20_assembly.agp", + "md5_checksum": "1d0302bec371a73f040d052f4b66277c", + "id": "nmdc:1d0302bec371a73f040d052f4b66277c", + "file_size_bytes": 15325341 + }, + { + "name": "Gp0127635_Metagenome Alignment BAM file", + "description": "Metagenome Alignment BAM file for Gp0127635", + "data_object_type": "Assembly Coverage BAM", + "url": "https://data.microbiomedata.org/data/nmdc:mga0ak4p20/assembly/nmdc_mga0ak4p20_pairedMapped_sorted.bam", + "md5_checksum": "2d8cca230f439e38f1e628666e40e013", + "id": "nmdc:2d8cca230f439e38f1e628666e40e013", + "file_size_bytes": 2159251548 + }, + { + "name": "Gp0127635_Protein FAA", + "description": "Protein FAA for Gp0127635", + "data_object_type": "Annotation Amino Acid FASTA", + "url": "https://data.microbiomedata.org/data/nmdc:mga0ak4p20/annotation/nmdc_mga0ak4p20_proteins.faa", + "md5_checksum": "bb7eae2b3dbc58168b9122098f078bb5", + "id": "nmdc:bb7eae2b3dbc58168b9122098f078bb5", + "file_size_bytes": 63157189 + }, + { + "name": "Gp0127635_Structural annotation GFF file", + "description": "Structural annotation GFF file for Gp0127635", + "data_object_type": "Structural Annotation GFF", + "url": "https://data.microbiomedata.org/data/nmdc:mga0ak4p20/annotation/nmdc_mga0ak4p20_structural_annotation.gff", + "md5_checksum": "2af7f6c008858f2f0d47c00fa9758129", + "id": "nmdc:2af7f6c008858f2f0d47c00fa9758129", + "file_size_bytes": 2526 + }, + { + "name": "Gp0127635_Functional annotation GFF file", + "description": "Functional annotation GFF file for Gp0127635", + "data_object_type": "Functional Annotation GFF", + "url": "https://data.microbiomedata.org/data/nmdc:mga0ak4p20/annotation/nmdc_mga0ak4p20_functional_annotation.gff", + "md5_checksum": "dd3668477e39a65243179dfb9e4bf26e", + "id": "nmdc:dd3668477e39a65243179dfb9e4bf26e", + "file_size_bytes": 71092075 + }, + { + "name": "Gp0127635_KO TSV file", + "description": "KO TSV file for Gp0127635", + "data_object_type": "Annotation KEGG Orthology", + "url": "https://data.microbiomedata.org/data/nmdc:mga0ak4p20/annotation/nmdc_mga0ak4p20_ko.tsv", + "md5_checksum": "be0e9a5999ddfd46bf5daac56aa96b86", + "id": "nmdc:be0e9a5999ddfd46bf5daac56aa96b86", + "file_size_bytes": 8023056 + }, + { + "name": "Gp0127635_EC TSV file", + "description": "EC TSV file for Gp0127635", + "data_object_type": "Annotation Enzyme Commission", + "url": "https://data.microbiomedata.org/data/nmdc:mga0ak4p20/annotation/nmdc_mga0ak4p20_ec.tsv", + "md5_checksum": "95a6a1f91bf18bc1a781a8890d2e1bc5", + "id": "nmdc:95a6a1f91bf18bc1a781a8890d2e1bc5", + "file_size_bytes": 5303502 + }, + { + "name": "Gp0127635_COG GFF file", + "description": "COG GFF file for Gp0127635", + "url": "https://data.microbiomedata.org/data/nmdc:mga0ak4p20/annotation/nmdc_mga0ak4p20_cog.gff", + "md5_checksum": "6960907313875913a789e1fda46ed34e", + "id": "nmdc:6960907313875913a789e1fda46ed34e", + "file_size_bytes": 42106254 + }, + { + "name": "Gp0127635_PFAM GFF file", + "description": "PFAM GFF file for Gp0127635", + "url": "https://data.microbiomedata.org/data/nmdc:mga0ak4p20/annotation/nmdc_mga0ak4p20_pfam.gff", + "md5_checksum": "033da43cdca9f81ed2270a9094fdb065", + "id": "nmdc:033da43cdca9f81ed2270a9094fdb065", + "file_size_bytes": 31806020 + }, + { + "name": "Gp0127635_TigrFam GFF file", + "description": "TigrFam GFF file for Gp0127635", + "url": "https://data.microbiomedata.org/data/nmdc:mga0ak4p20/annotation/nmdc_mga0ak4p20_tigrfam.gff", + "md5_checksum": "e9603ffd918db8a21df1310b890315ff", + "id": "nmdc:e9603ffd918db8a21df1310b890315ff", + "file_size_bytes": 3500524 + }, + { + "name": "Gp0127635_SMART GFF file", + "description": "SMART GFF file for Gp0127635", + "url": "https://data.microbiomedata.org/data/nmdc:mga0ak4p20/annotation/nmdc_mga0ak4p20_smart.gff", + "md5_checksum": "fd98e0cfe1f4ca7b9e4af833c5ef199c", + "id": "nmdc:fd98e0cfe1f4ca7b9e4af833c5ef199c", + "file_size_bytes": 9346082 + }, + { + "name": "Gp0127635_SuperFam GFF file", + "description": "SuperFam GFF file for Gp0127635", + "url": "https://data.microbiomedata.org/data/nmdc:mga0ak4p20/annotation/nmdc_mga0ak4p20_supfam.gff", + "md5_checksum": "03481d99958ae1c9dcccb8fd91c0bbf7", + "id": "nmdc:03481d99958ae1c9dcccb8fd91c0bbf7", + "file_size_bytes": 52582333 + }, + { + "name": "Gp0127635_Cath FunFam GFF file", + "description": "Cath FunFam GFF file for Gp0127635", + "url": "https://data.microbiomedata.org/data/nmdc:mga0ak4p20/annotation/nmdc_mga0ak4p20_cath_funfam.gff", + "md5_checksum": "f0a96fb57947358a42053e9fb7134e70", + "id": "nmdc:f0a96fb57947358a42053e9fb7134e70", + "file_size_bytes": 40179818 + }, + { + "name": "Gp0127635_KO_EC GFF file", + "description": "KO_EC GFF file for Gp0127635", + "url": "https://data.microbiomedata.org/data/nmdc:mga0ak4p20/annotation/nmdc_mga0ak4p20_ko_ec.gff", + "md5_checksum": "9737b61f2e6e923ac662e0a1c4f6aaa9", + "id": "nmdc:9737b61f2e6e923ac662e0a1c4f6aaa9", + "file_size_bytes": 25482964 + }, + { + "name": "gold:Gp0452679_metabat2 bin checkm quality assessment result", + "description": "metabat2 bin checkm quality assessment result for gold:Gp0452679", + "data_object_type": "CheckM Statistics", + "url": "https://data.microbiomedata.org/data/nmdc:mga0fsjy84/MAGs/nmdc_mga0fsjy84_checkm_qa.out", + "md5_checksum": "d41d8cd98f00b204e9800998ecf8427e", + "id": "nmdc:d41d8cd98f00b204e9800998ecf8427e", + "file_size_bytes": 0 + }, + { + "name": "Gp0127635_tooShort (< 3kb) filtered contigs fasta file by metaBat2", + "description": "tooShort (< 3kb) filtered contigs fasta file by metaBat2 for Gp0127635", + "url": "https://data.microbiomedata.org/data/nmdc:mga0ak4p20/MAGs/nmdc_mga0ak4p20_bins.tooShort.fa", + "md5_checksum": "daed5e3af5201fe510e780f155f90bc3", + "id": "nmdc:daed5e3af5201fe510e780f155f90bc3", + "file_size_bytes": 86476884 + }, + { + "name": "Gp0127635_unbinned fasta file from metabat2", + "description": "unbinned fasta file from metabat2 for Gp0127635", + "url": "https://data.microbiomedata.org/data/nmdc:mga0ak4p20/MAGs/nmdc_mga0ak4p20_bins.unbinned.fa", + "md5_checksum": "7cdb1c384c8bc63b3c127e5bc434ac6b", + "id": "nmdc:7cdb1c384c8bc63b3c127e5bc434ac6b", + "file_size_bytes": 22898396 + }, + { + "name": "Gp0127635_metabat2 bin checkm quality assessment result", + "description": "metabat2 bin checkm quality assessment result for Gp0127635", + "data_object_type": "CheckM Statistics", + "url": "https://data.microbiomedata.org/data/nmdc:mga0ak4p20/MAGs/nmdc_mga0ak4p20_checkm_qa.out", + "md5_checksum": "b5ae13756638f09d74fdbe03183b231f", + "id": "nmdc:b5ae13756638f09d74fdbe03183b231f", + "file_size_bytes": 1240 + }, + { + "name": "Gp0127635_high-quality and medium-quality bins", + "description": "high-quality and medium-quality bins for Gp0127635", + "data_object_type": "Metagenome Bins", + "url": "https://data.microbiomedata.org/data/nmdc:mga0ak4p20/MAGs/nmdc_mga0ak4p20_hqmq_bin.zip", + "md5_checksum": "1dc5796596177362849da19fc4e50b13", + "id": "nmdc:1dc5796596177362849da19fc4e50b13", + "file_size_bytes": 182 + }, + { + "name": "Gp0127635_metabat2 bins", + "description": "metabat2 bins for Gp0127635", + "url": "https://data.microbiomedata.org/data/nmdc:mga0ak4p20/MAGs/nmdc_mga0ak4p20_metabat_bin.zip", + "md5_checksum": "fba0bfa144e9ef179edb10b5a941c259", + "id": "nmdc:fba0bfa144e9ef179edb10b5a941c259", + "file_size_bytes": 795127 + }, + { + "_id": { + "$oid": "649b003d1ae706d7b5b14e79" + }, + "description": "Assembled contigs fasta for gold:Gp0127635", + "url": "https://data.microbiomedata.org/data/1781_100337/assembly/assembly_contigs.fna", + "file_size_bytes": 111137612, + "type": "nmdc:DataObject", + "id": "nmdc:178298f959546299f78fb2bff07cd460", + "name": "assembly_contigs.fna", + "data_object_type": "Assembly Contigs" + }, + { + "_id": { + "$oid": "649b003d1ae706d7b5b14e7a" + }, + "description": "Metagenome Contig Coverage Stats for gold:Gp0127635", + "url": "https://data.microbiomedata.org/data/1781_100337/assembly/mapping_stats.txt", + "file_size_bytes": 15570972, + "type": "nmdc:DataObject", + "id": "nmdc:4fc895272dffb49edac9e03d08684d05", + "name": "mapping_stats.txt", + "data_object_type": "Assembly Coverage Stats" + }, + { + "_id": { + "$oid": "649b003d1ae706d7b5b14e7b" + }, + "description": "Assembled AGP file for gold:Gp0127635", + "url": "https://data.microbiomedata.org/data/1781_100337/assembly/assembly.agp", + "file_size_bytes": 13670021, + "type": "nmdc:DataObject", + "id": "nmdc:b148c0b9078ed2c9dc0ef9d47d6c4273", + "name": "assembly.agp", + "data_object_type": "Assembly AGP" + }, + { + "_id": { + "$oid": "649b003d1ae706d7b5b14e7e" + }, + "description": "Assembled scaffold fasta for gold:Gp0127635", + "url": "https://data.microbiomedata.org/data/1781_100337/assembly/assembly_scaffolds.fna", + "file_size_bytes": 110516271, + "type": "nmdc:DataObject", + "id": "nmdc:c2ae4e2ecc5f68caf6fb04e4c0da29a8", + "name": "assembly_scaffolds.fna", + "data_object_type": "Assembly Scaffolds" + }, + { + "_id": { + "$oid": "649b003d1ae706d7b5b14e7f" + }, + "description": "Metagenome Alignment BAM file for gold:Gp0127635", + "url": "https://data.microbiomedata.org/data/1781_100337/assembly/pairedMapped_sorted.bam", + "file_size_bytes": 2128896439, + "type": "nmdc:DataObject", + "id": "nmdc:2e8b9eb5d9a8cbc0e2289bd29ab58bd5", + "name": "pairedMapped_sorted.bam", + "data_object_type": "Assembly Coverage BAM" + }, + { + "_id": { + "$oid": "649b003d1ae706d7b5b15b49" + }, + "id": "nmdc:1ef5e7e08bb9692d1ce21b338888f92b", + "name": "1781_100337.krona.html", + "description": "Gold:Gp0127635 KRONA plot HTML file", + "url": "https://data.microbiomedata.org/1781_100337/ReadbasedAnalysis/centrifuge/1781_100337.krona.html", + "file_size_bytes": 3385, + "data_object_type": "Centrifuge Krona Plot", + "type": "nmdc:DataObject" + }, + { + "_id": { + "$oid": "649b003d1ae706d7b5b15b51" + }, + "id": "nmdc:c9595cd833ad6e651762d7ee3a8e9e5b", + "name": "1781_100337.json", + "description": "Gold:Gp0127635 ReadbasedAnalysis result JSON file", + "url": "https://data.microbiomedata.org/1781_100337/ReadbasedAnalysis/1781_100337.json", + "file_size_bytes": 3385, + "type": "nmdc:DataObject" + }, + { + "_id": { + "$oid": "649b003f1ae706d7b5b165ee" + }, + "id": "nmdc:5eb1b1b53aab751c8ad74e9547ff8a70", + "name": "bins.tooShort.fa", + "description": "tooShort (< 3kb) filtered contigs fasta file by metaBat2 for gold:Gp0127635", + "file_size_bytes": 84061517, + "url": "https://data.microbiomedata.org/data/1781_100337/img_MAGs/bins.tooShort.fa", + "type": "nmdc:DataObject" + }, + { + "_id": { + "$oid": "649b003f1ae706d7b5b165f2" + }, + "id": "nmdc:1c62a2e67e8af295a7f57e2b4492dc22", + "name": "bins.unbinned.fa", + "description": "unbinned fasta file from metabat2 for gold:Gp0127635", + "file_size_bytes": 24394459, + "url": "https://data.microbiomedata.org/data/1781_100337/img_MAGs/bins.unbinned.fa", + "type": "nmdc:DataObject" + }, + { + "_id": { + "$oid": "649b003f1ae706d7b5b165f5" + }, + "id": "nmdc:093a82c4b7951c837e8a281cfae9f128", + "name": "checkm_qa.out", + "description": "metabat2 bin checkm quality assessment result for gold:Gp0127635", + "file_size_bytes": 1071, + "url": "https://data.microbiomedata.org/data/1781_100337/img_MAGs/checkm_qa.out", + "type": "nmdc:DataObject", + "data_object_type": "CheckM Statistics" + }, + { + "_id": { + "$oid": "649b003f1ae706d7b5b165f6" + }, + "id": "nmdc:0f5cac2a54bebd617c1c00bcd7e4ba50", + "name": "gold:Gp0127635.bins.3.fa", + "description": "metabat2 binned contig file for gold:Gp0127635", + "file_size_bytes": 365625, + "url": "https://data.microbiomedata.org/data/1781_100337/img_MAGs/metabat-bins/bins.3.fa", + "type": "nmdc:DataObject", + "data_object_type": "Metagenome Bins" + }, + { + "_id": { + "$oid": "649b003f1ae706d7b5b165f7" + }, + "id": "nmdc:db607d74248055a9962eda6db70c280e", + "name": "gold:Gp0127635.bins.1.fa", + "description": "metabat2 binned contig file for gold:Gp0127635", + "file_size_bytes": 217636, + "url": "https://data.microbiomedata.org/data/1781_100337/img_MAGs/metabat-bins/bins.1.fa", + "type": "nmdc:DataObject", + "data_object_type": "Metagenome Bins" + }, + { + "_id": { + "$oid": "649b003f1ae706d7b5b165fd" + }, + "id": "nmdc:90a132bcef3ba4ebdddc7d4b1297f157", + "name": "gold:Gp0127635.bins.2.fa", + "description": "metabat2 binned contig file for gold:Gp0127635", + "file_size_bytes": 348955, + "url": "https://data.microbiomedata.org/data/1781_100337/img_MAGs/metabat-bins/bins.2.fa", + "type": "nmdc:DataObject", + "data_object_type": "Metagenome Bins" + }, + { + "_id": { + "$oid": "649b00401ae706d7b5b16cf0" + }, + "description": "Structural annotation GFF file for gold:Gp0127635", + "url": "https://data.microbiomedata.org/1781_100337/img_annotation/Ga0482235_structural_annotation.gff", + "md5_checksum": "e1cd02b3a92223d8e30e8d7c90837d9a", + "file_size_bytes": 3385, + "id": "nmdc:e1cd02b3a92223d8e30e8d7c90837d9a", + "name": "gold:Gp0127635_Structural annotation GFF file", + "data_object_type": "Structural Annotation GFF", + "type": "nmdc:DataObject" + }, + { + "_id": { + "$oid": "649b00401ae706d7b5b16cf6" + }, + "description": "Protein FAA for gold:Gp0127635", + "url": "https://data.microbiomedata.org/1781_100337/img_annotation/Ga0482235_proteins.faa", + "md5_checksum": "b2ee2639269e6d665f772fc8c4e31d07", + "file_size_bytes": 3385, + "id": "nmdc:b2ee2639269e6d665f772fc8c4e31d07", + "name": "gold:Gp0127635_Protein FAA", + "data_object_type": "Annotation Amino Acid FASTA", + "type": "nmdc:DataObject" + }, + { + "_id": { + "$oid": "649b00401ae706d7b5b16cf7" + }, + "description": "Functional annotation GFF file for gold:Gp0127635", + "url": "https://data.microbiomedata.org/1781_100337/img_annotation/Ga0482235_functional_annotation.gff", + "md5_checksum": "4768d5de701a1ac55ed0c2d57a270dd2", + "file_size_bytes": 3385, + "id": "nmdc:4768d5de701a1ac55ed0c2d57a270dd2", + "name": "gold:Gp0127635_Functional annotation GFF file", + "data_object_type": "Functional Annotation GFF", + "type": "nmdc:DataObject" + }, + { + "_id": { + "$oid": "649b00401ae706d7b5b16cfc" + }, + "description": "KO TSV File for gold:Gp0127635", + "url": "https://data.microbiomedata.org/1781_100337/img_annotation/Ga0482235_ko.tsv", + "md5_checksum": "4cddc89fb8b405210d66b836825c37ee", + "file_size_bytes": 3385, + "id": "nmdc:4cddc89fb8b405210d66b836825c37ee", + "name": "gold:Gp0127635_KO TSV File", + "data_object_type": "Annotation KEGG Orthology", + "type": "nmdc:DataObject" + }, + { + "_id": { + "$oid": "649b00401ae706d7b5b16cfe" + }, + "description": "EC TSV File for gold:Gp0127635", + "url": "https://data.microbiomedata.org/1781_100337/img_annotation/Ga0482235_ec.tsv", + "md5_checksum": "9e0a73962f7014df93613b04fae9f8be", + "file_size_bytes": 3385, + "id": "nmdc:9e0a73962f7014df93613b04fae9f8be", + "name": "gold:Gp0127635_EC TSV File", + "data_object_type": "Annotation Enzyme Commission", + "type": "nmdc:DataObject" + } + ], + "mags_activity_set": [ + { + "_id": { + "$oid": "649b0052ec087f6bbab34708" + }, + "has_input": [ + "nmdc:3d1b5043e0c49ac6062aeba4ebbba910", + "nmdc:2d8cca230f439e38f1e628666e40e013", + "nmdc:dd3668477e39a65243179dfb9e4bf26e" + ], + "too_short_contig_num": 192406, + "part_of": [ + "nmdc:mga0ak4p20" + ], + "binned_contig_num": 502, + "git_url": "https://github.com/microbiomedata/metaG/releases/tag/0.1", + "has_output": [ + "nmdc:d41d8cd98f00b204e9800998ecf8427e", + "nmdc:daed5e3af5201fe510e780f155f90bc3", + "nmdc:7cdb1c384c8bc63b3c127e5bc434ac6b", + "nmdc:b5ae13756638f09d74fdbe03183b231f", + "nmdc:1dc5796596177362849da19fc4e50b13", + "nmdc:fba0bfa144e9ef179edb10b5a941c259" + ], + "was_informed_by": "gold:Gp0127635", + "input_contig_num": 206754, + "id": "nmdc:80cd0785782060a937ca17d1b417b0b6", + "execution_resource": "NERSC-Cori", + "name": "MAGs Analysis Activity for nmdc:mga0ak4p20", + "mags_list": [ + { + "number_of_contig": 203, + "completeness": 41.91, + "bin_name": "bins.1", + "gene_count": 1456, + "bin_quality": "LQ", + "gtdbtk_species": "", + "gtdbtk_order": "", + "num_16s": 0, + "gtdbtk_family": "", + "gtdbtk_domain": "", + "contamination": 3.88, + "gtdbtk_class": "", + "gtdbtk_phylum": "", + "num_5s": 0, + "num_23s": 0, + "gtdbtk_genus": "", + "num_t_rna": 24 + }, + { + "number_of_contig": 171, + "completeness": 8.33, + "bin_name": "bins.2", + "gene_count": 880, + "bin_quality": "LQ", + "gtdbtk_species": "", + "gtdbtk_order": "", + "num_16s": 0, + "gtdbtk_family": "", + "gtdbtk_domain": "", + "contamination": 0.0, + "gtdbtk_class": "", + "gtdbtk_phylum": "", + "num_5s": 0, + "num_23s": 0, + "gtdbtk_genus": "", + "num_t_rna": 5 + }, + { + "number_of_contig": 55, + "completeness": 14.66, + "bin_name": "bins.3", + "gene_count": 269, + "bin_quality": "LQ", + "gtdbtk_species": "", + "gtdbtk_order": "", + "num_16s": 0, + "gtdbtk_family": "", + "gtdbtk_domain": "", + "contamination": 0.0, + "gtdbtk_class": "", + "gtdbtk_phylum": "", + "num_5s": 0, + "num_23s": 0, + "gtdbtk_genus": "", + "num_t_rna": 1 + }, + { + "number_of_contig": 73, + "completeness": 0.0, + "bin_name": "bins.4", + "gene_count": 475, + "bin_quality": "LQ", + "gtdbtk_species": "", + "gtdbtk_order": "", + "num_16s": 0, + "gtdbtk_family": "", + "gtdbtk_domain": "", + "contamination": 0.0, + "gtdbtk_class": "", + "gtdbtk_phylum": "", + "num_5s": 0, + "num_23s": 0, + "gtdbtk_genus": "", + "num_t_rna": 6 + } + ], + "unbinned_contig_num": 13846, + "started_at_time": "2021-10-11T02:26:59Z", + "type": "nmdc:MAGsAnalysisActivity", + "ended_at_time": "2021-10-11T04:11:48+00:00" + } + ], + "metagenome_annotation_activity_set": [ + { + "_id": { + "$oid": "649b005bbf2caae0415ef9a7" + }, + "has_input": [ + "nmdc:3d1b5043e0c49ac6062aeba4ebbba910" + ], + "part_of": [ + "nmdc:mga0ak4p20" + ], + "git_url": "https://github.com/microbiomedata/metaG/releases/tag/0.1", + "has_output": [ + "nmdc:bb7eae2b3dbc58168b9122098f078bb5", + "nmdc:2af7f6c008858f2f0d47c00fa9758129", + "nmdc:dd3668477e39a65243179dfb9e4bf26e", + "nmdc:be0e9a5999ddfd46bf5daac56aa96b86", + "nmdc:95a6a1f91bf18bc1a781a8890d2e1bc5", + "nmdc:6960907313875913a789e1fda46ed34e", + "nmdc:033da43cdca9f81ed2270a9094fdb065", + "nmdc:e9603ffd918db8a21df1310b890315ff", + "nmdc:fd98e0cfe1f4ca7b9e4af833c5ef199c", + "nmdc:03481d99958ae1c9dcccb8fd91c0bbf7", + "nmdc:f0a96fb57947358a42053e9fb7134e70", + "nmdc:9737b61f2e6e923ac662e0a1c4f6aaa9" + ], + "was_informed_by": "gold:Gp0127635", + "id": "nmdc:80cd0785782060a937ca17d1b417b0b6", + "execution_resource": "NERSC-Cori", + "name": "Annotation Activity for nmdc:mga0ak4p20", + "started_at_time": "2021-10-11T02:26:59Z", + "type": "nmdc:MetagenomeAnnotationActivity", + "ended_at_time": "2021-10-11T04:11:48+00:00" + } + ], + "metagenome_assembly_set": [ + { + "_id": { + "$oid": "649b005f2ca5ee4adb139f90" + }, + "has_input": [ + "nmdc:f8bc16e232f7ba0f6d6b5ca35a708c36" + ], + "part_of": [ + "nmdc:mga0ak4p20" + ], + "ctg_logsum": 269360, + "scaf_logsum": 270403, + "gap_pct": 0.00195, + "git_url": "https://github.com/microbiomedata/metaG/releases/tag/0.1", + "has_output": [ + "nmdc:3d1b5043e0c49ac6062aeba4ebbba910", + "nmdc:4d4497f63f95f7d2f8986178dab3ae52", + "nmdc:ac98d3d128ec5b045a9ef019a5653b99", + "nmdc:1d0302bec371a73f040d052f4b66277c", + "nmdc:2d8cca230f439e38f1e628666e40e013" + ], + "asm_score": 3.934, + "was_informed_by": "gold:Gp0127635", + "ctg_powsum": 29422, + "scaf_max": 23775, + "id": "nmdc:80cd0785782060a937ca17d1b417b0b6", + "scaf_powsum": 29544, + "execution_resource": "NERSC-Cori", + "contigs": 206757, + "name": "Assembly Activity for nmdc:mga0ak4p20", + "ctg_max": 23775, + "gc_std": 0.10033, + "contig_bp": 103842002, + "gc_avg": 0.61621, + "started_at_time": "2021-10-11T02:26:59Z", + "scaf_bp": 103844032, + "type": "nmdc:MetagenomeAssembly", + "scaffolds": 206599, + "ended_at_time": "2021-10-11T04:11:48+00:00", + "ctg_l50": 496, + "ctg_l90": 290, + "ctg_n50": 55322, + "ctg_n90": 171862, + "scaf_l50": 497, + "scaf_l90": 290, + "scaf_n50": 55067, + "scaf_n90": 171721 + } + ], + "omics_processing_set": [ + { + "_id": { + "$oid": "649b009773e8249959349b47" + }, + "id": "nmdc:omprc-11-kc23zq65", + "name": "Riverbed sediment microbial communities from areas with no vegetation in Columbia River, Washington, USA - GW-RW N1_10_20", + "description": "Riverbed sediment samples were collected from an area with no vegetation in a hyporheic zone (subsurface groundwater - surface water mixing zone)", + "has_input": [ + "nmdc:bsm-11-59xteq78" + ], + "has_output": [ + "jgi:574fde607ded5e3df1ee1403" + ], + "part_of": [ + "nmdc:sty-11-aygzgv51" + ], + "add_date": "2016-01-11", + "mod_date": "2021-06-15", + "ncbi_project_name": "Riverbed sediment microbial communities from areas with no vegetation in Columbia River, Washington, USA - GW-RW N1_10_20", + "omics_type": { + "has_raw_value": "Metagenome" + }, + "principal_investigator": { + "has_raw_value": "James Stegen" + }, + "processing_institution": "JGI", + "type": "nmdc:OmicsProcessing", + "gold_sequencing_project_identifiers": [ + "gold:Gp0127635" + ] + } + ], + "read_qc_analysis_activity_set": [ + { + "_id": { + "$oid": "649b009d6bdd4fd20273c860" + }, + "has_input": [ + "nmdc:1a16fdf096087338922b288165a924b8" + ], + "part_of": [ + "nmdc:mga0ak4p20" + ], + "git_url": "https://github.com/microbiomedata/metaG/releases/tag/0.1", + "has_output": [ + "nmdc:f8bc16e232f7ba0f6d6b5ca35a708c36", + "nmdc:fbc260443529d6e8067efdac3b58a8c1" + ], + "was_informed_by": "gold:Gp0127635", + "input_read_count": 25320866, + "output_read_bases": 3673182178, + "id": "nmdc:80cd0785782060a937ca17d1b417b0b6", + "execution_resource": "NERSC-Cori", + "input_read_bases": 3823450766, + "name": "Read QC Activity for nmdc:mga0ak4p20", + "output_read_count": 24600396, + "started_at_time": "2021-10-11T02:26:59Z", + "type": "nmdc:ReadQCAnalysisActivity", + "ended_at_time": "2021-10-11T04:11:48+00:00" + } + ], + "read_based_taxonomy_analysis_activity_set": [ + { + "_id": { + "$oid": "649b009bff710ae353f8cf25" + }, + "has_input": [ + "nmdc:f8bc16e232f7ba0f6d6b5ca35a708c36" + ], + "git_url": "https://github.com/microbiomedata/metaG/releases/tag/0.1", + "has_output": [ + "nmdc:d8a410c52c8f6cf0097b674492cc3926", + "nmdc:ddec46781153da60da815c65871f5413", + "nmdc:e626ec18dba4885613240927cbb99d8b", + "nmdc:f8486e4ee029038a452a3484db10cabc", + "nmdc:4121f2ec52b80b7feb9d9a4749080125", + "nmdc:5b8c1cd8ba47041c20d3e18cb902a854", + "nmdc:59807dae5216b11c96df5593a26d9a88", + "nmdc:a491f6797bd7294dbc5ba301efb3466e", + "nmdc:6748020214a3d68ad588e3548107208e" + ], + "was_informed_by": "gold:Gp0127635", + "id": "nmdc:80cd0785782060a937ca17d1b417b0b6", + "execution_resource": "NERSC-Cori", + "name": "ReadBased Analysis Activity for nmdc:mga0ak4p20", + "started_at_time": "2021-10-11T02:26:59Z", + "type": "nmdc:ReadbasedAnalysis", + "ended_at_time": "2021-10-11T04:11:48+00:00" + } + ] + }, + { + "data_object_set": [ + { + "description": "Raw sequencer read data", + "file_size_bytes": 1954789686, + "type": "nmdc:DataObject", + "id": "jgi:574fde647ded5e3df1ee1406", + "name": "10533.1.165310.CGTAGGT-AACCTAC.fastq.gz" + }, + { + "name": "Gp0127637_Filtered Reads", + "description": "Filtered Reads for Gp0127637", + "data_object_type": "Filtered Sequencing Reads", + "url": "https://data.microbiomedata.org/data/nmdc:mga0sb9b30/qa/nmdc_mga0sb9b30_filtered.fastq.gz", + "md5_checksum": "805310f4b1e39a0cc9e5b5787576cb8b", + "id": "nmdc:805310f4b1e39a0cc9e5b5787576cb8b", + "file_size_bytes": 1553219358 + }, + { + "name": "Gp0127637_Filtered Stats", + "description": "Filtered Stats for Gp0127637", + "data_object_type": "QC Statistics", + "url": "https://data.microbiomedata.org/data/nmdc:mga0sb9b30/qa/nmdc_mga0sb9b30_filterStats.txt", + "md5_checksum": "611e67df261e050860b1075c6a6a5ff5", + "id": "nmdc:611e67df261e050860b1075c6a6a5ff5", + "file_size_bytes": 289 + }, + { + "name": "Gp0127637_Gottcha2 TSV report", + "description": "Gottcha2 TSV report for Gp0127637", + "url": "https://data.microbiomedata.org/data/nmdc:mga0sb9b30/ReadbasedAnalysis/nmdc_mga0sb9b30_gottcha2_report.tsv", + "md5_checksum": "9268e073dacb7f7cd5f9513393cb0b2a", + "id": "nmdc:9268e073dacb7f7cd5f9513393cb0b2a", + "file_size_bytes": 660 + }, + { + "name": "Gp0127637_Gottcha2 full TSV report", + "description": "Gottcha2 full TSV report for Gp0127637", + "url": "https://data.microbiomedata.org/data/nmdc:mga0sb9b30/ReadbasedAnalysis/nmdc_mga0sb9b30_gottcha2_report_full.tsv", + "md5_checksum": "37dd1d73ad47979ee5284830d27df535", + "id": "nmdc:37dd1d73ad47979ee5284830d27df535", + "file_size_bytes": 594054 + }, + { + "name": "Gp0127637_Gottcha2 Krona HTML report", + "description": "Gottcha2 Krona HTML report for Gp0127637", + "data_object_type": "GOTTCHA2 Krona Plot", + "url": "https://data.microbiomedata.org/data/nmdc:mga0sb9b30/ReadbasedAnalysis/nmdc_mga0sb9b30_gottcha2_krona.html", + "md5_checksum": "43bffbfb830c6e3ccc140ec0dff1e773", + "id": "nmdc:43bffbfb830c6e3ccc140ec0dff1e773", + "file_size_bytes": 227750 + }, + { + "name": "Gp0127637_Centrifuge classification TSV report", + "description": "Centrifuge classification TSV report for Gp0127637", + "data_object_type": "Centrifuge Taxonomic Classification", + "url": "https://data.microbiomedata.org/data/nmdc:mga0sb9b30/ReadbasedAnalysis/nmdc_mga0sb9b30_centrifuge_classification.tsv", + "md5_checksum": "cb3bd5ca5088484cb4e580ad91d736b2", + "id": "nmdc:cb3bd5ca5088484cb4e580ad91d736b2", + "file_size_bytes": 1457058272 + }, + { + "name": "Gp0127637_Centrifuge TSV report", + "description": "Centrifuge TSV report for Gp0127637", + "data_object_type": "Centrifuge Classification Report", + "url": "https://data.microbiomedata.org/data/nmdc:mga0sb9b30/ReadbasedAnalysis/nmdc_mga0sb9b30_centrifuge_report.tsv", + "md5_checksum": "f44a5d59785cdededea0fe4a6a429c30", + "id": "nmdc:f44a5d59785cdededea0fe4a6a429c30", + "file_size_bytes": 251867 + }, + { + "name": "Gp0127637_Centrifuge Krona HTML report", + "description": "Centrifuge Krona HTML report for Gp0127637", + "data_object_type": "Centrifuge Krona Plot", + "url": "https://data.microbiomedata.org/data/nmdc:mga0sb9b30/ReadbasedAnalysis/nmdc_mga0sb9b30_centrifuge_krona.html", + "md5_checksum": "81a6efbd082e07bc2db174a88d64a272", + "id": "nmdc:81a6efbd082e07bc2db174a88d64a272", + "file_size_bytes": 2325282 + }, + { + "name": "Gp0127637_Kraken classification TSV report", + "description": "Kraken classification TSV report for Gp0127637", + "data_object_type": "Kraken2 Taxonomic Classification", + "url": "https://data.microbiomedata.org/data/nmdc:mga0sb9b30/ReadbasedAnalysis/nmdc_mga0sb9b30_kraken2_classification.tsv", + "md5_checksum": "f63856a84bc9afb8954ccdb1803d5fde", + "id": "nmdc:f63856a84bc9afb8954ccdb1803d5fde", + "file_size_bytes": 1160106364 + }, + { + "name": "Gp0127637_Kraken2 TSV report", + "description": "Kraken2 TSV report for Gp0127637", + "data_object_type": "Kraken2 Classification Report", + "url": "https://data.microbiomedata.org/data/nmdc:mga0sb9b30/ReadbasedAnalysis/nmdc_mga0sb9b30_kraken2_report.tsv", + "md5_checksum": "9a1826f66ee45187d627076d11dc491f", + "id": "nmdc:9a1826f66ee45187d627076d11dc491f", + "file_size_bytes": 613810 + }, + { + "name": "Gp0127637_Kraken2 Krona HTML report", + "description": "Kraken2 Krona HTML report for Gp0127637", + "data_object_type": "Kraken2 Krona Plot", + "url": "https://data.microbiomedata.org/data/nmdc:mga0sb9b30/ReadbasedAnalysis/nmdc_mga0sb9b30_kraken2_krona.html", + "md5_checksum": "67adb9cc2c75251f556a90b1a959ea72", + "id": "nmdc:67adb9cc2c75251f556a90b1a959ea72", + "file_size_bytes": 3853908 + }, + { + "name": "Gp0127637_Assembled contigs fasta", + "description": "Assembled contigs fasta for Gp0127637", + "data_object_type": "Assembly Contigs", + "url": "https://data.microbiomedata.org/data/nmdc:mga0sb9b30/assembly/nmdc_mga0sb9b30_contigs.fna", + "md5_checksum": "aee81646e593045bbb32a0012870b88b", + "id": "nmdc:aee81646e593045bbb32a0012870b88b", + "file_size_bytes": 117200777 + }, + { + "name": "Gp0127637_Assembled scaffold fasta", + "description": "Assembled scaffold fasta for Gp0127637", + "data_object_type": "Assembly Scaffolds", + "url": "https://data.microbiomedata.org/data/nmdc:mga0sb9b30/assembly/nmdc_mga0sb9b30_scaffolds.fna", + "md5_checksum": "f1026db242cad285204c9c3d6307c183", + "id": "nmdc:f1026db242cad285204c9c3d6307c183", + "file_size_bytes": 116554638 + }, + { + "name": "Gp0127637_Metagenome Contig Coverage Stats", + "description": "Metagenome Contig Coverage Stats for Gp0127637", + "url": "https://data.microbiomedata.org/data/nmdc:mga0sb9b30/assembly/nmdc_mga0sb9b30_covstats.txt", + "md5_checksum": "b02b0a0145d14e97a31e6a6f7e4b8dc8", + "id": "nmdc:b02b0a0145d14e97a31e6a6f7e4b8dc8", + "file_size_bytes": 17037754 + }, + { + "name": "Gp0127637_Assembled AGP file", + "description": "Assembled AGP file for Gp0127637", + "data_object_type": "Assembly AGP", + "url": "https://data.microbiomedata.org/data/nmdc:mga0sb9b30/assembly/nmdc_mga0sb9b30_assembly.agp", + "md5_checksum": "8afcf1e8b7b3f35edaefee7a0c31e19f", + "id": "nmdc:8afcf1e8b7b3f35edaefee7a0c31e19f", + "file_size_bytes": 15931363 + }, + { + "name": "Gp0127637_Metagenome Alignment BAM file", + "description": "Metagenome Alignment BAM file for Gp0127637", + "data_object_type": "Assembly Coverage BAM", + "url": "https://data.microbiomedata.org/data/nmdc:mga0sb9b30/assembly/nmdc_mga0sb9b30_pairedMapped_sorted.bam", + "md5_checksum": "dee5fa37f57a24685b65e00380d6e433", + "id": "nmdc:dee5fa37f57a24685b65e00380d6e433", + "file_size_bytes": 1739825120 + }, + { + "name": "Gp0127637_Protein FAA", + "description": "Protein FAA for Gp0127637", + "data_object_type": "Annotation Amino Acid FASTA", + "url": "https://data.microbiomedata.org/data/nmdc:mga0sb9b30/annotation/nmdc_mga0sb9b30_proteins.faa", + "md5_checksum": "69603434971f93dbd79860c18dd5c61a", + "id": "nmdc:69603434971f93dbd79860c18dd5c61a", + "file_size_bytes": 66263123 + }, + { + "name": "Gp0127637_Structural annotation GFF file", + "description": "Structural annotation GFF file for Gp0127637", + "data_object_type": "Structural Annotation GFF", + "url": "https://data.microbiomedata.org/data/nmdc:mga0sb9b30/annotation/nmdc_mga0sb9b30_structural_annotation.gff", + "md5_checksum": "bf8f822c6730b4cc73715ced3d25c262", + "id": "nmdc:bf8f822c6730b4cc73715ced3d25c262", + "file_size_bytes": 2521 + }, + { + "name": "Gp0127637_Functional annotation GFF file", + "description": "Functional annotation GFF file for Gp0127637", + "data_object_type": "Functional Annotation GFF", + "url": "https://data.microbiomedata.org/data/nmdc:mga0sb9b30/annotation/nmdc_mga0sb9b30_functional_annotation.gff", + "md5_checksum": "b9ec0754ffaa338c899244703bc91386", + "id": "nmdc:b9ec0754ffaa338c899244703bc91386", + "file_size_bytes": 74459552 + }, + { + "name": "Gp0127637_KO TSV file", + "description": "KO TSV file for Gp0127637", + "data_object_type": "Annotation KEGG Orthology", + "url": "https://data.microbiomedata.org/data/nmdc:mga0sb9b30/annotation/nmdc_mga0sb9b30_ko.tsv", + "md5_checksum": "22402cc61770feb5a0aaa4f760808366", + "id": "nmdc:22402cc61770feb5a0aaa4f760808366", + "file_size_bytes": 8394894 + }, + { + "name": "Gp0127637_EC TSV file", + "description": "EC TSV file for Gp0127637", + "data_object_type": "Annotation Enzyme Commission", + "url": "https://data.microbiomedata.org/data/nmdc:mga0sb9b30/annotation/nmdc_mga0sb9b30_ec.tsv", + "md5_checksum": "8c96f7faa38c361acc247b5a107a6b54", + "id": "nmdc:8c96f7faa38c361acc247b5a107a6b54", + "file_size_bytes": 5556852 + }, + { + "name": "Gp0127637_COG GFF file", + "description": "COG GFF file for Gp0127637", + "url": "https://data.microbiomedata.org/data/nmdc:mga0sb9b30/annotation/nmdc_mga0sb9b30_cog.gff", + "md5_checksum": "7a28d1eafd3a3c181e95f61eb3d18bf1", + "id": "nmdc:7a28d1eafd3a3c181e95f61eb3d18bf1", + "file_size_bytes": 44328195 + }, + { + "name": "Gp0127637_PFAM GFF file", + "description": "PFAM GFF file for Gp0127637", + "url": "https://data.microbiomedata.org/data/nmdc:mga0sb9b30/annotation/nmdc_mga0sb9b30_pfam.gff", + "md5_checksum": "89a8657f659710b3927baab155917fdf", + "id": "nmdc:89a8657f659710b3927baab155917fdf", + "file_size_bytes": 33562431 + }, + { + "name": "Gp0127637_TigrFam GFF file", + "description": "TigrFam GFF file for Gp0127637", + "url": "https://data.microbiomedata.org/data/nmdc:mga0sb9b30/annotation/nmdc_mga0sb9b30_tigrfam.gff", + "md5_checksum": "9b9ecf34f2f6ef6865d4864f5debfbb7", + "id": "nmdc:9b9ecf34f2f6ef6865d4864f5debfbb7", + "file_size_bytes": 3752251 + }, + { + "name": "Gp0127637_SMART GFF file", + "description": "SMART GFF file for Gp0127637", + "url": "https://data.microbiomedata.org/data/nmdc:mga0sb9b30/annotation/nmdc_mga0sb9b30_smart.gff", + "md5_checksum": "5cae6736713d02ccbe26543d733875cb", + "id": "nmdc:5cae6736713d02ccbe26543d733875cb", + "file_size_bytes": 9871224 + }, + { + "name": "Gp0127637_SuperFam GFF file", + "description": "SuperFam GFF file for Gp0127637", + "url": "https://data.microbiomedata.org/data/nmdc:mga0sb9b30/annotation/nmdc_mga0sb9b30_supfam.gff", + "md5_checksum": "a64350eb947c199cc1fbfb087191c0c7", + "id": "nmdc:a64350eb947c199cc1fbfb087191c0c7", + "file_size_bytes": 55329770 + }, + { + "name": "Gp0127637_Cath FunFam GFF file", + "description": "Cath FunFam GFF file for Gp0127637", + "url": "https://data.microbiomedata.org/data/nmdc:mga0sb9b30/annotation/nmdc_mga0sb9b30_cath_funfam.gff", + "md5_checksum": "b8492828a1ad078d9c3192bab4d9a3fa", + "id": "nmdc:b8492828a1ad078d9c3192bab4d9a3fa", + "file_size_bytes": 42052238 + }, + { + "name": "Gp0127637_KO_EC GFF file", + "description": "KO_EC GFF file for Gp0127637", + "url": "https://data.microbiomedata.org/data/nmdc:mga0sb9b30/annotation/nmdc_mga0sb9b30_ko_ec.gff", + "md5_checksum": "2471f27b6cf11b6f93c791c273989731", + "id": "nmdc:2471f27b6cf11b6f93c791c273989731", + "file_size_bytes": 26689447 + }, + { + "name": "gold:Gp0452679_metabat2 bin checkm quality assessment result", + "description": "metabat2 bin checkm quality assessment result for gold:Gp0452679", + "data_object_type": "CheckM Statistics", + "url": "https://data.microbiomedata.org/data/nmdc:mga0fsjy84/MAGs/nmdc_mga0fsjy84_checkm_qa.out", + "md5_checksum": "d41d8cd98f00b204e9800998ecf8427e", + "id": "nmdc:d41d8cd98f00b204e9800998ecf8427e", + "file_size_bytes": 0 + }, + { + "name": "Gp0127637_tooShort (< 3kb) filtered contigs fasta file by metaBat2", + "description": "tooShort (< 3kb) filtered contigs fasta file by metaBat2 for Gp0127637", + "url": "https://data.microbiomedata.org/data/nmdc:mga0sb9b30/MAGs/nmdc_mga0sb9b30_bins.tooShort.fa", + "md5_checksum": "7968c6b88e49f066bd24982b4d54965b", + "id": "nmdc:7968c6b88e49f066bd24982b4d54965b", + "file_size_bytes": 91577123 + }, + { + "name": "Gp0127637_unbinned fasta file from metabat2", + "description": "unbinned fasta file from metabat2 for Gp0127637", + "url": "https://data.microbiomedata.org/data/nmdc:mga0sb9b30/MAGs/nmdc_mga0sb9b30_bins.unbinned.fa", + "md5_checksum": "120fbaa7439eb628d9a982de573446a8", + "id": "nmdc:120fbaa7439eb628d9a982de573446a8", + "file_size_bytes": 22556841 + }, + { + "name": "Gp0127637_metabat2 bin checkm quality assessment result", + "description": "metabat2 bin checkm quality assessment result for Gp0127637", + "data_object_type": "CheckM Statistics", + "url": "https://data.microbiomedata.org/data/nmdc:mga0sb9b30/MAGs/nmdc_mga0sb9b30_checkm_qa.out", + "md5_checksum": "347a7ee18b37674e031cca9046e92623", + "id": "nmdc:347a7ee18b37674e031cca9046e92623", + "file_size_bytes": 1092 + }, + { + "name": "Gp0127637_high-quality and medium-quality bins", + "description": "high-quality and medium-quality bins for Gp0127637", + "data_object_type": "Metagenome Bins", + "url": "https://data.microbiomedata.org/data/nmdc:mga0sb9b30/MAGs/nmdc_mga0sb9b30_hqmq_bin.zip", + "md5_checksum": "de1da5ea4bfdf3131a6c510b79b145c2", + "id": "nmdc:de1da5ea4bfdf3131a6c510b79b145c2", + "file_size_bytes": 504932 + }, + { + "name": "Gp0127637_metabat2 bins", + "description": "metabat2 bins for Gp0127637", + "url": "https://data.microbiomedata.org/data/nmdc:mga0sb9b30/MAGs/nmdc_mga0sb9b30_metabat_bin.zip", + "md5_checksum": "382d00338a5e4829285e58a203de153e", + "id": "nmdc:382d00338a5e4829285e58a203de153e", + "file_size_bytes": 432910 + }, + { + "_id": { + "$oid": "649b003d1ae706d7b5b14e80" + }, + "description": "Assembled contigs fasta for gold:Gp0127637", + "url": "https://data.microbiomedata.org/data/1781_100339/assembly/assembly_contigs.fna", + "file_size_bytes": 116341325, + "type": "nmdc:DataObject", + "id": "nmdc:d66bd2d4b3ad1abef6787addfb5aa8b6", + "name": "assembly_contigs.fna", + "data_object_type": "Assembly Contigs" + }, + { + "_id": { + "$oid": "649b003d1ae706d7b5b14e83" + }, + "description": "Metagenome Contig Coverage Stats for gold:Gp0127637", + "url": "https://data.microbiomedata.org/data/1781_100339/assembly/mapping_stats.txt", + "file_size_bytes": 16178302, + "type": "nmdc:DataObject", + "id": "nmdc:c15461f775c7a7b44ec57af9e2897e17", + "name": "mapping_stats.txt", + "data_object_type": "Assembly Coverage Stats" + }, + { + "_id": { + "$oid": "649b003d1ae706d7b5b14e84" + }, + "description": "Assembled scaffold fasta for gold:Gp0127637", + "url": "https://data.microbiomedata.org/data/1781_100339/assembly/assembly_scaffolds.fna", + "file_size_bytes": 115695690, + "type": "nmdc:DataObject", + "id": "nmdc:cd9b2948e3c873bc1c9a2aebe3401cc5", + "name": "assembly_scaffolds.fna", + "data_object_type": "Assembly Scaffolds" + }, + { + "_id": { + "$oid": "649b003d1ae706d7b5b14e87" + }, + "description": "Metagenome Alignment BAM file for gold:Gp0127637", + "url": "https://data.microbiomedata.org/data/1781_100339/assembly/pairedMapped_sorted.bam", + "file_size_bytes": 1715892686, + "type": "nmdc:DataObject", + "id": "nmdc:aa0ca68275dfc45fe70cc94e247e1a69", + "name": "pairedMapped_sorted.bam", + "data_object_type": "Assembly Coverage BAM" + }, + { + "_id": { + "$oid": "649b003d1ae706d7b5b14e88" + }, + "description": "Assembled AGP file for gold:Gp0127637", + "url": "https://data.microbiomedata.org/data/1781_100339/assembly/assembly.agp", + "file_size_bytes": 14211451, + "type": "nmdc:DataObject", + "id": "nmdc:b152dd521661ba36392ccc535795542e", + "name": "assembly.agp", + "data_object_type": "Assembly AGP" + }, + { + "_id": { + "$oid": "649b003d1ae706d7b5b15b60" + }, + "id": "nmdc:9eeee33a195e10f3f8e95f2049dac406", + "name": "1781_100339.krona.html", + "description": "Gold:Gp0127637 KRONA plot HTML file", + "url": "https://data.microbiomedata.org/1781_100339/ReadbasedAnalysis/centrifuge/1781_100339.krona.html", + "file_size_bytes": 3385, + "data_object_type": "Centrifuge Krona Plot", + "type": "nmdc:DataObject" + }, + { + "_id": { + "$oid": "649b003d1ae706d7b5b15b65" + }, + "id": "nmdc:3eca864f3fb90709467d18cc9247709b", + "name": "1781_100339.json", + "description": "Gold:Gp0127637 ReadbasedAnalysis result JSON file", + "url": "https://data.microbiomedata.org/1781_100339/ReadbasedAnalysis/1781_100339.json", + "file_size_bytes": 3385, + "type": "nmdc:DataObject" + }, + { + "_id": { + "$oid": "649b003f1ae706d7b5b165f9" + }, + "id": "nmdc:51db76a0482441e5ade7e3d99694ffe6", + "name": "bins.tooShort.fa", + "description": "tooShort (< 3kb) filtered contigs fasta file by metaBat2 for gold:Gp0127637", + "file_size_bytes": 89058634, + "url": "https://data.microbiomedata.org/data/1781_100339/img_MAGs/bins.tooShort.fa", + "type": "nmdc:DataObject" + }, + { + "_id": { + "$oid": "649b003f1ae706d7b5b165fb" + }, + "id": "nmdc:6a3a98a4c86866bcb2c347d35006fc55", + "name": "bins.unbinned.fa", + "description": "unbinned fasta file from metabat2 for gold:Gp0127637", + "file_size_bytes": 22532401, + "url": "https://data.microbiomedata.org/data/1781_100339/img_MAGs/bins.unbinned.fa", + "type": "nmdc:DataObject" + }, + { + "_id": { + "$oid": "649b003f1ae706d7b5b165fc" + }, + "id": "nmdc:6c4325a1e9ba6b106d58e1fa99239c56", + "name": "checkm_qa.out", + "description": "metabat2 bin checkm quality assessment result for gold:Gp0127637", + "file_size_bytes": 1092, + "url": "https://data.microbiomedata.org/data/1781_100339/img_MAGs/checkm_qa.out", + "type": "nmdc:DataObject", + "data_object_type": "CheckM Statistics" + }, + { + "_id": { + "$oid": "649b003f1ae706d7b5b165fe" + }, + "id": "nmdc:419c789c7c7458e9775dcfb3e95530b2", + "name": "gtdbtk.ar122.summary.tsv", + "description": "gtdbtk archaea assignment result summary table for gold:Gp0127637", + "file_size_bytes": 1003, + "url": "https://data.microbiomedata.org/data/1781_100339/img_MAGs/gtdbtk_output/classify/gtdbtk.ar122.summary.tsv", + "type": "nmdc:DataObject" + }, + { + "_id": { + "$oid": "649b003f1ae706d7b5b16600" + }, + "id": "nmdc:2e3079d5c7c114727c5445f2ed43ed5f", + "name": "gold:Gp0127637.bins.2.fa", + "description": "metabat2 binned contig file for gold:Gp0127637", + "file_size_bytes": 1650635, + "url": "https://data.microbiomedata.org/data/1781_100339/img_MAGs/metabat-bins/bins.2.fa", + "type": "nmdc:DataObject", + "data_object_type": "Metagenome Bins" + }, + { + "_id": { + "$oid": "649b003f1ae706d7b5b16601" + }, + "id": "nmdc:004ec11cd626b798ffccba986dd4f129", + "name": "gold:Gp0127637.bins.3.fa", + "description": "metabat2 binned contig file for gold:Gp0127637", + "file_size_bytes": 1046842, + "url": "https://data.microbiomedata.org/data/1781_100339/img_MAGs/metabat-bins/bins.3.fa", + "type": "nmdc:DataObject", + "data_object_type": "Metagenome Bins" + }, + { + "_id": { + "$oid": "649b003f1ae706d7b5b16603" + }, + "id": "nmdc:024873d88915f917de41ed83c0b7b98c", + "name": "gold:Gp0127637.bins.1.fa", + "description": "metabat2 binned contig file for gold:Gp0127637", + "file_size_bytes": 230352, + "url": "https://data.microbiomedata.org/data/1781_100339/img_MAGs/metabat-bins/bins.1.fa", + "type": "nmdc:DataObject", + "data_object_type": "Metagenome Bins" + }, + { + "_id": { + "$oid": "649b003f1ae706d7b5b16607" + }, + "id": "nmdc:6920af8dfe8dc23b9a267c837184d619", + "name": "gold:Gp0127637.bins.2.fa", + "description": "hqmq binned contig file for gold:Gp0127637", + "file_size_bytes": 1619653, + "url": "https://data.microbiomedata.org/data/1781_100339/img_MAGs/hqmq-metabat-bins/bins.2.fa", + "type": "nmdc:DataObject", + "data_object_type": "Metagenome Bins" + }, + { + "_id": { + "$oid": "649b00401ae706d7b5b16d02" + }, + "description": "KO TSV File for gold:Gp0127637", + "url": "https://data.microbiomedata.org/1781_100339/img_annotation/Ga0482233_ko.tsv", + "md5_checksum": "ce74f349e03ae28dd49fc5ea4cd1d91d", + "file_size_bytes": 3385, + "id": "nmdc:ce74f349e03ae28dd49fc5ea4cd1d91d", + "name": "gold:Gp0127637_KO TSV File", + "data_object_type": "Annotation KEGG Orthology", + "type": "nmdc:DataObject" + }, + { + "_id": { + "$oid": "649b00401ae706d7b5b16d05" + }, + "description": "Protein FAA for gold:Gp0127637", + "url": "https://data.microbiomedata.org/1781_100339/img_annotation/Ga0482233_proteins.faa", + "md5_checksum": "c43a6b5a306a8f14aab780d8f1bf9c41", + "file_size_bytes": 3385, + "id": "nmdc:c43a6b5a306a8f14aab780d8f1bf9c41", + "name": "gold:Gp0127637_Protein FAA", + "data_object_type": "Annotation Amino Acid FASTA", + "type": "nmdc:DataObject" + }, + { + "_id": { + "$oid": "649b00401ae706d7b5b16d06" + }, + "description": "Structural annotation GFF file for gold:Gp0127637", + "url": "https://data.microbiomedata.org/1781_100339/img_annotation/Ga0482233_structural_annotation.gff", + "md5_checksum": "5f6b287493cde8cf8cb49348a2868aa6", + "file_size_bytes": 3385, + "id": "nmdc:5f6b287493cde8cf8cb49348a2868aa6", + "name": "gold:Gp0127637_Structural annotation GFF file", + "data_object_type": "Structural Annotation GFF", + "type": "nmdc:DataObject" + }, + { + "_id": { + "$oid": "649b00401ae706d7b5b16d09" + }, + "description": "EC TSV File for gold:Gp0127637", + "url": "https://data.microbiomedata.org/1781_100339/img_annotation/Ga0482233_ec.tsv", + "md5_checksum": "1549562abe1044734fab8562585ec161", + "file_size_bytes": 3385, + "id": "nmdc:1549562abe1044734fab8562585ec161", + "name": "gold:Gp0127637_EC TSV File", + "data_object_type": "Annotation Enzyme Commission", + "type": "nmdc:DataObject" + }, + { + "_id": { + "$oid": "649b00401ae706d7b5b16d0f" + }, + "description": "Functional annotation GFF file for gold:Gp0127637", + "url": "https://data.microbiomedata.org/1781_100339/img_annotation/Ga0482233_functional_annotation.gff", + "md5_checksum": "74b2fc3dd196a3d615c7d0d478fa2f90", + "file_size_bytes": 3385, + "id": "nmdc:74b2fc3dd196a3d615c7d0d478fa2f90", + "name": "gold:Gp0127637_Functional annotation GFF file", + "data_object_type": "Functional Annotation GFF", + "type": "nmdc:DataObject" + } + ], + "mags_activity_set": [ + { + "_id": { + "$oid": "649b0052ec087f6bbab34702" + }, + "has_input": [ + "nmdc:aee81646e593045bbb32a0012870b88b", + "nmdc:dee5fa37f57a24685b65e00380d6e433", + "nmdc:b9ec0754ffaa338c899244703bc91386" + ], + "too_short_contig_num": 200319, + "part_of": [ + "nmdc:mga0sb9b30" + ], + "binned_contig_num": 482, + "git_url": "https://github.com/microbiomedata/metaG/releases/tag/0.1", + "has_output": [ + "nmdc:d41d8cd98f00b204e9800998ecf8427e", + "nmdc:7968c6b88e49f066bd24982b4d54965b", + "nmdc:120fbaa7439eb628d9a982de573446a8", + "nmdc:347a7ee18b37674e031cca9046e92623", + "nmdc:de1da5ea4bfdf3131a6c510b79b145c2", + "nmdc:382d00338a5e4829285e58a203de153e" + ], + "was_informed_by": "gold:Gp0127637", + "input_contig_num": 214863, + "id": "nmdc:c6d11b07b7b33c0d906ce0d4a58a7ccf", + "execution_resource": "NERSC-Cori", + "name": "MAGs Analysis Activity for nmdc:mga0sb9b30", + "mags_list": [ + { + "number_of_contig": 59, + "completeness": 8.33, + "bin_name": "bins.1", + "gene_count": 295, + "bin_quality": "LQ", + "gtdbtk_species": "", + "gtdbtk_order": "", + "num_16s": 0, + "gtdbtk_family": "", + "gtdbtk_domain": "", + "contamination": 0.0, + "gtdbtk_class": "", + "gtdbtk_phylum": "", + "num_5s": 0, + "num_23s": 0, + "gtdbtk_genus": "", + "num_t_rna": 0 + }, + { + "number_of_contig": 233, + "completeness": 45.87, + "bin_name": "bins.2", + "gene_count": 1342, + "bin_quality": "LQ", + "gtdbtk_species": "", + "gtdbtk_order": "", + "num_16s": 0, + "gtdbtk_family": "", + "gtdbtk_domain": "", + "contamination": 1.28, + "gtdbtk_class": "", + "gtdbtk_phylum": "", + "num_5s": 0, + "num_23s": 0, + "gtdbtk_genus": "", + "num_t_rna": 18 + }, + { + "number_of_contig": 190, + "completeness": 75.08, + "bin_name": "bins.3", + "gene_count": 1991, + "bin_quality": "MQ", + "gtdbtk_species": "", + "gtdbtk_order": "Nitrososphaerales", + "num_16s": 0, + "gtdbtk_family": "Nitrososphaeraceae", + "gtdbtk_domain": "Archaea", + "contamination": 1.21, + "gtdbtk_class": "Nitrososphaeria", + "gtdbtk_phylum": "Crenarchaeota", + "num_5s": 1, + "num_23s": 1, + "gtdbtk_genus": "", + "num_t_rna": 37 + } + ], + "unbinned_contig_num": 14062, + "started_at_time": "2021-10-11T02:24:01Z", + "type": "nmdc:MAGsAnalysisActivity", + "ended_at_time": "2021-10-11T03:11:56+00:00" + } + ], + "metagenome_annotation_activity_set": [ + { + "_id": { + "$oid": "649b005bbf2caae0415ef9a6" + }, + "has_input": [ + "nmdc:aee81646e593045bbb32a0012870b88b" + ], + "part_of": [ + "nmdc:mga0sb9b30" + ], + "git_url": "https://github.com/microbiomedata/metaG/releases/tag/0.1", + "has_output": [ + "nmdc:69603434971f93dbd79860c18dd5c61a", + "nmdc:bf8f822c6730b4cc73715ced3d25c262", + "nmdc:b9ec0754ffaa338c899244703bc91386", + "nmdc:22402cc61770feb5a0aaa4f760808366", + "nmdc:8c96f7faa38c361acc247b5a107a6b54", + "nmdc:7a28d1eafd3a3c181e95f61eb3d18bf1", + "nmdc:89a8657f659710b3927baab155917fdf", + "nmdc:9b9ecf34f2f6ef6865d4864f5debfbb7", + "nmdc:5cae6736713d02ccbe26543d733875cb", + "nmdc:a64350eb947c199cc1fbfb087191c0c7", + "nmdc:b8492828a1ad078d9c3192bab4d9a3fa", + "nmdc:2471f27b6cf11b6f93c791c273989731" + ], + "was_informed_by": "gold:Gp0127637", + "id": "nmdc:c6d11b07b7b33c0d906ce0d4a58a7ccf", + "execution_resource": "NERSC-Cori", + "name": "Annotation Activity for nmdc:mga0sb9b30", + "started_at_time": "2021-10-11T02:24:01Z", + "type": "nmdc:MetagenomeAnnotationActivity", + "ended_at_time": "2021-10-11T03:11:56+00:00" + } + ], + "metagenome_assembly_set": [ + { + "_id": { + "$oid": "649b005f2ca5ee4adb139f93" + }, + "has_input": [ + "nmdc:805310f4b1e39a0cc9e5b5787576cb8b" + ], + "part_of": [ + "nmdc:mga0sb9b30" + ], + "ctg_logsum": 271617, + "scaf_logsum": 272416, + "gap_pct": 0.00166, + "git_url": "https://github.com/microbiomedata/metaG/releases/tag/0.1", + "has_output": [ + "nmdc:aee81646e593045bbb32a0012870b88b", + "nmdc:f1026db242cad285204c9c3d6307c183", + "nmdc:b02b0a0145d14e97a31e6a6f7e4b8dc8", + "nmdc:8afcf1e8b7b3f35edaefee7a0c31e19f", + "nmdc:dee5fa37f57a24685b65e00380d6e433" + ], + "asm_score": 5.062, + "was_informed_by": "gold:Gp0127637", + "ctg_powsum": 29885, + "scaf_max": 43650, + "id": "nmdc:c6d11b07b7b33c0d906ce0d4a58a7ccf", + "scaf_powsum": 29983, + "execution_resource": "NERSC-Cori", + "contigs": 214863, + "name": "Assembly Activity for nmdc:mga0sb9b30", + "ctg_max": 43650, + "gc_std": 0.08814, + "contig_bp": 108739484, + "gc_avg": 0.63266, + "started_at_time": "2021-10-11T02:24:01Z", + "scaf_bp": 108741284, + "type": "nmdc:MetagenomeAssembly", + "scaffolds": 214737, + "ended_at_time": "2021-10-11T03:11:56+00:00", + "ctg_l50": 505, + "ctg_l90": 294, + "ctg_n50": 58474, + "ctg_n90": 177521, + "scaf_l50": 505, + "scaf_l90": 294, + "scaf_n50": 58469, + "scaf_n90": 177412 + } + ], + "omics_processing_set": [ + { + "_id": { + "$oid": "649b009773e8249959349b48" + }, + "id": "nmdc:omprc-11-c8dzx197", + "name": "Riverbed sediment microbial communities from areas with vegetation nearby in Columbia River, Washington, USA - GW-RW S1_0_10", + "description": "Riverbed sediment samples were collected from an area with a lot of surrounding vegetation in a hyporheic zone (subsurface groundwater - surface water mixing zone)", + "has_input": [ + "nmdc:bsm-11-kwfbp795" + ], + "has_output": [ + "jgi:574fde647ded5e3df1ee1406" + ], + "part_of": [ + "nmdc:sty-11-aygzgv51" + ], + "add_date": "2016-01-11", + "mod_date": "2021-06-15", + "ncbi_project_name": "Riverbed sediment microbial communities from areas with vegetation nearby in Columbia River, Washington, USA - GW-RW S1_0_10", + "omics_type": { + "has_raw_value": "Metagenome" + }, + "principal_investigator": { + "has_raw_value": "James Stegen" + }, + "processing_institution": "JGI", + "type": "nmdc:OmicsProcessing", + "gold_sequencing_project_identifiers": [ + "gold:Gp0127637" + ] + } + ], + "read_qc_analysis_activity_set": [ + { + "_id": { + "$oid": "649b009d6bdd4fd20273c85f" + }, + "has_input": [ + "nmdc:320ac579913ecc4c218607b6b3b915b3" + ], + "part_of": [ + "nmdc:mga0sb9b30" + ], + "git_url": "https://github.com/microbiomedata/metaG/releases/tag/0.1", + "has_output": [ + "nmdc:805310f4b1e39a0cc9e5b5787576cb8b", + "nmdc:611e67df261e050860b1075c6a6a5ff5" + ], + "was_informed_by": "gold:Gp0127637", + "input_read_count": 24239336, + "output_read_bases": 2975652755, + "id": "nmdc:c6d11b07b7b33c0d906ce0d4a58a7ccf", + "execution_resource": "NERSC-Cori", + "input_read_bases": 3660139736, + "name": "Read QC Activity for nmdc:mga0sb9b30", + "output_read_count": 19917090, + "started_at_time": "2021-10-11T02:24:01Z", + "type": "nmdc:ReadQCAnalysisActivity", + "ended_at_time": "2021-10-11T03:11:56+00:00" + } + ], + "read_based_taxonomy_analysis_activity_set": [ + { + "_id": { + "$oid": "649b009bff710ae353f8cf23" + }, + "has_input": [ + "nmdc:805310f4b1e39a0cc9e5b5787576cb8b" + ], + "git_url": "https://github.com/microbiomedata/metaG/releases/tag/0.1", + "has_output": [ + "nmdc:9268e073dacb7f7cd5f9513393cb0b2a", + "nmdc:37dd1d73ad47979ee5284830d27df535", + "nmdc:43bffbfb830c6e3ccc140ec0dff1e773", + "nmdc:cb3bd5ca5088484cb4e580ad91d736b2", + "nmdc:f44a5d59785cdededea0fe4a6a429c30", + "nmdc:81a6efbd082e07bc2db174a88d64a272", + "nmdc:f63856a84bc9afb8954ccdb1803d5fde", + "nmdc:9a1826f66ee45187d627076d11dc491f", + "nmdc:67adb9cc2c75251f556a90b1a959ea72" + ], + "was_informed_by": "gold:Gp0127637", + "id": "nmdc:c6d11b07b7b33c0d906ce0d4a58a7ccf", + "execution_resource": "NERSC-Cori", + "name": "ReadBased Analysis Activity for nmdc:mga0sb9b30", + "started_at_time": "2021-10-11T02:24:01Z", + "type": "nmdc:ReadbasedAnalysis", + "ended_at_time": "2021-10-11T03:11:56+00:00" + } + ] + }, + { + "data_object_set": [ + { + "description": "Raw sequencer read data", + "file_size_bytes": 1920284821, + "type": "nmdc:DataObject", + "id": "jgi:574fde837ded5e3df1ee141d", + "name": "10533.2.165322.TCATCAC-GGTGATG.fastq.gz" + }, + { + "name": "Gp0127638_Filtered Reads", + "description": "Filtered Reads for Gp0127638", + "data_object_type": "Filtered Sequencing Reads", + "url": "https://data.microbiomedata.org/data/nmdc:mga0hjgc20/qa/nmdc_mga0hjgc20_filtered.fastq.gz", + "md5_checksum": "56ba2416c050decd6c16c618c1e4a752", + "id": "nmdc:56ba2416c050decd6c16c618c1e4a752", + "file_size_bytes": 1649318115 + }, + { + "name": "Gp0127638_Filtered Stats", + "description": "Filtered Stats for Gp0127638", + "data_object_type": "QC Statistics", + "url": "https://data.microbiomedata.org/data/nmdc:mga0hjgc20/qa/nmdc_mga0hjgc20_filterStats.txt", + "md5_checksum": "5c9398042e9ff608befa78e86597bdf0", + "id": "nmdc:5c9398042e9ff608befa78e86597bdf0", + "file_size_bytes": 283 + }, + { + "name": "Gp0127638_Gottcha2 TSV report", + "description": "Gottcha2 TSV report for Gp0127638", + "url": "https://data.microbiomedata.org/data/nmdc:mga0hjgc20/ReadbasedAnalysis/nmdc_mga0hjgc20_gottcha2_report.tsv", + "md5_checksum": "dbbd6ca6777b71d1fac4aae2cd947deb", + "id": "nmdc:dbbd6ca6777b71d1fac4aae2cd947deb", + "file_size_bytes": 2025 + }, + { + "name": "Gp0127638_Gottcha2 full TSV report", + "description": "Gottcha2 full TSV report for Gp0127638", + "url": "https://data.microbiomedata.org/data/nmdc:mga0hjgc20/ReadbasedAnalysis/nmdc_mga0hjgc20_gottcha2_report_full.tsv", + "md5_checksum": "b6de56746a284f8226dd86817c8ae04e", + "id": "nmdc:b6de56746a284f8226dd86817c8ae04e", + "file_size_bytes": 655633 + }, + { + "name": "Gp0127638_Gottcha2 Krona HTML report", + "description": "Gottcha2 Krona HTML report for Gp0127638", + "data_object_type": "GOTTCHA2 Krona Plot", + "url": "https://data.microbiomedata.org/data/nmdc:mga0hjgc20/ReadbasedAnalysis/nmdc_mga0hjgc20_gottcha2_krona.html", + "md5_checksum": "d9572e708af9f0a06e98cfddfb298359", + "id": "nmdc:d9572e708af9f0a06e98cfddfb298359", + "file_size_bytes": 232133 + }, + { + "name": "Gp0127638_Centrifuge classification TSV report", + "description": "Centrifuge classification TSV report for Gp0127638", + "data_object_type": "Centrifuge Taxonomic Classification", + "url": "https://data.microbiomedata.org/data/nmdc:mga0hjgc20/ReadbasedAnalysis/nmdc_mga0hjgc20_centrifuge_classification.tsv", + "md5_checksum": "e9946f36795474182b7759d3d7532b57", + "id": "nmdc:e9946f36795474182b7759d3d7532b57", + "file_size_bytes": 1448205544 + }, + { + "name": "Gp0127638_Centrifuge TSV report", + "description": "Centrifuge TSV report for Gp0127638", + "data_object_type": "Centrifuge Classification Report", + "url": "https://data.microbiomedata.org/data/nmdc:mga0hjgc20/ReadbasedAnalysis/nmdc_mga0hjgc20_centrifuge_report.tsv", + "md5_checksum": "33ff1d85d17d763afc9e21e481cc10d2", + "id": "nmdc:33ff1d85d17d763afc9e21e481cc10d2", + "file_size_bytes": 253872 + }, + { + "name": "Gp0127638_Centrifuge Krona HTML report", + "description": "Centrifuge Krona HTML report for Gp0127638", + "data_object_type": "Centrifuge Krona Plot", + "url": "https://data.microbiomedata.org/data/nmdc:mga0hjgc20/ReadbasedAnalysis/nmdc_mga0hjgc20_centrifuge_krona.html", + "md5_checksum": "997a66f49a232750bd7132639f3387e7", + "id": "nmdc:997a66f49a232750bd7132639f3387e7", + "file_size_bytes": 2331772 + }, + { + "name": "Gp0127638_Kraken classification TSV report", + "description": "Kraken classification TSV report for Gp0127638", + "data_object_type": "Kraken2 Taxonomic Classification", + "url": "https://data.microbiomedata.org/data/nmdc:mga0hjgc20/ReadbasedAnalysis/nmdc_mga0hjgc20_kraken2_classification.tsv", + "md5_checksum": "d3f604a59babf001839d38a617b62931", + "id": "nmdc:d3f604a59babf001839d38a617b62931", + "file_size_bytes": 1157365410 + }, + { + "name": "Gp0127638_Kraken2 TSV report", + "description": "Kraken2 TSV report for Gp0127638", + "data_object_type": "Kraken2 Classification Report", + "url": "https://data.microbiomedata.org/data/nmdc:mga0hjgc20/ReadbasedAnalysis/nmdc_mga0hjgc20_kraken2_report.tsv", + "md5_checksum": "3abfaa434ee1449cbbb69985e48488b4", + "id": "nmdc:3abfaa434ee1449cbbb69985e48488b4", + "file_size_bytes": 621484 + }, + { + "name": "Gp0127638_Kraken2 Krona HTML report", + "description": "Kraken2 Krona HTML report for Gp0127638", + "data_object_type": "Kraken2 Krona Plot", + "url": "https://data.microbiomedata.org/data/nmdc:mga0hjgc20/ReadbasedAnalysis/nmdc_mga0hjgc20_kraken2_krona.html", + "md5_checksum": "70c2fc1a2c7c0032528ff91ad1576465", + "id": "nmdc:70c2fc1a2c7c0032528ff91ad1576465", + "file_size_bytes": 3896830 + }, + { + "name": "Gp0127638_Assembled contigs fasta", + "description": "Assembled contigs fasta for Gp0127638", + "data_object_type": "Assembly Contigs", + "url": "https://data.microbiomedata.org/data/nmdc:mga0hjgc20/assembly/nmdc_mga0hjgc20_contigs.fna", + "md5_checksum": "5122503797ac0ed9694a6f4feecab955", + "id": "nmdc:5122503797ac0ed9694a6f4feecab955", + "file_size_bytes": 84307064 + }, + { + "name": "Gp0127638_Assembled scaffold fasta", + "description": "Assembled scaffold fasta for Gp0127638", + "data_object_type": "Assembly Scaffolds", + "url": "https://data.microbiomedata.org/data/nmdc:mga0hjgc20/assembly/nmdc_mga0hjgc20_scaffolds.fna", + "md5_checksum": "d7ee4628101b11bc5fb67d961a4e1a0a", + "id": "nmdc:d7ee4628101b11bc5fb67d961a4e1a0a", + "file_size_bytes": 83796938 + }, + { + "name": "Gp0127638_Metagenome Contig Coverage Stats", + "description": "Metagenome Contig Coverage Stats for Gp0127638", + "url": "https://data.microbiomedata.org/data/nmdc:mga0hjgc20/assembly/nmdc_mga0hjgc20_covstats.txt", + "md5_checksum": "0944f2c0dd70a751117fb10d9a41fddc", + "id": "nmdc:0944f2c0dd70a751117fb10d9a41fddc", + "file_size_bytes": 13413799 + }, + { + "name": "Gp0127638_Assembled AGP file", + "description": "Assembled AGP file for Gp0127638", + "data_object_type": "Assembly AGP", + "url": "https://data.microbiomedata.org/data/nmdc:mga0hjgc20/assembly/nmdc_mga0hjgc20_assembly.agp", + "md5_checksum": "1917dcbbe1efcc2a57c511648a7f332e", + "id": "nmdc:1917dcbbe1efcc2a57c511648a7f332e", + "file_size_bytes": 12526116 + }, + { + "name": "Gp0127638_Metagenome Alignment BAM file", + "description": "Metagenome Alignment BAM file for Gp0127638", + "data_object_type": "Assembly Coverage BAM", + "url": "https://data.microbiomedata.org/data/nmdc:mga0hjgc20/assembly/nmdc_mga0hjgc20_pairedMapped_sorted.bam", + "md5_checksum": "6420476f7e93425a68aa00b8e09cd6e7", + "id": "nmdc:6420476f7e93425a68aa00b8e09cd6e7", + "file_size_bytes": 1810224630 + }, + { + "name": "Gp0127638_Protein FAA", + "description": "Protein FAA for Gp0127638", + "data_object_type": "Annotation Amino Acid FASTA", + "url": "https://data.microbiomedata.org/data/nmdc:mga0hjgc20/annotation/nmdc_mga0hjgc20_proteins.faa", + "md5_checksum": "f56690d136c4dafdc1eaa64a21fd9210", + "id": "nmdc:f56690d136c4dafdc1eaa64a21fd9210", + "file_size_bytes": 49236514 + }, + { + "name": "Gp0127638_Structural annotation GFF file", + "description": "Structural annotation GFF file for Gp0127638", + "data_object_type": "Structural Annotation GFF", + "url": "https://data.microbiomedata.org/data/nmdc:mga0hjgc20/annotation/nmdc_mga0hjgc20_structural_annotation.gff", + "md5_checksum": "8be4e8ac2d00bf1d5b4863c36dc3678c", + "id": "nmdc:8be4e8ac2d00bf1d5b4863c36dc3678c", + "file_size_bytes": 2519 + }, + { + "name": "Gp0127638_Functional annotation GFF file", + "description": "Functional annotation GFF file for Gp0127638", + "data_object_type": "Functional Annotation GFF", + "url": "https://data.microbiomedata.org/data/nmdc:mga0hjgc20/annotation/nmdc_mga0hjgc20_functional_annotation.gff", + "md5_checksum": "41453202313c56e06b0cc00b5ee6c375", + "id": "nmdc:41453202313c56e06b0cc00b5ee6c375", + "file_size_bytes": 56761027 + }, + { + "name": "Gp0127638_KO TSV file", + "description": "KO TSV file for Gp0127638", + "data_object_type": "Annotation KEGG Orthology", + "url": "https://data.microbiomedata.org/data/nmdc:mga0hjgc20/annotation/nmdc_mga0hjgc20_ko.tsv", + "md5_checksum": "e06bd74dce2e5b839b35ac1012d93ba4", + "id": "nmdc:e06bd74dce2e5b839b35ac1012d93ba4", + "file_size_bytes": 6728487 + }, + { + "name": "Gp0127638_EC TSV file", + "description": "EC TSV file for Gp0127638", + "data_object_type": "Annotation Enzyme Commission", + "url": "https://data.microbiomedata.org/data/nmdc:mga0hjgc20/annotation/nmdc_mga0hjgc20_ec.tsv", + "md5_checksum": "f2786d1f8a17bedd0104b01ec06ebfce", + "id": "nmdc:f2786d1f8a17bedd0104b01ec06ebfce", + "file_size_bytes": 4522678 + }, + { + "name": "Gp0127638_COG GFF file", + "description": "COG GFF file for Gp0127638", + "url": "https://data.microbiomedata.org/data/nmdc:mga0hjgc20/annotation/nmdc_mga0hjgc20_cog.gff", + "md5_checksum": "37cb3fb060da091a84f1baa7ef3743fc", + "id": "nmdc:37cb3fb060da091a84f1baa7ef3743fc", + "file_size_bytes": 33992392 + }, + { + "name": "Gp0127638_PFAM GFF file", + "description": "PFAM GFF file for Gp0127638", + "url": "https://data.microbiomedata.org/data/nmdc:mga0hjgc20/annotation/nmdc_mga0hjgc20_pfam.gff", + "md5_checksum": "34680897818585cefbef6e69109e7de4", + "id": "nmdc:34680897818585cefbef6e69109e7de4", + "file_size_bytes": 25203872 + }, + { + "name": "Gp0127638_TigrFam GFF file", + "description": "TigrFam GFF file for Gp0127638", + "url": "https://data.microbiomedata.org/data/nmdc:mga0hjgc20/annotation/nmdc_mga0hjgc20_tigrfam.gff", + "md5_checksum": "a00404838fbe9f846a704e1dbb14f2b2", + "id": "nmdc:a00404838fbe9f846a704e1dbb14f2b2", + "file_size_bytes": 2852587 + }, + { + "name": "Gp0127638_SMART GFF file", + "description": "SMART GFF file for Gp0127638", + "url": "https://data.microbiomedata.org/data/nmdc:mga0hjgc20/annotation/nmdc_mga0hjgc20_smart.gff", + "md5_checksum": "700dd121a0ac41e3fa8077d7330adae7", + "id": "nmdc:700dd121a0ac41e3fa8077d7330adae7", + "file_size_bytes": 7723231 + }, + { + "name": "Gp0127638_SuperFam GFF file", + "description": "SuperFam GFF file for Gp0127638", + "url": "https://data.microbiomedata.org/data/nmdc:mga0hjgc20/annotation/nmdc_mga0hjgc20_supfam.gff", + "md5_checksum": "e429651ae53a18b07d99880d09a19b26", + "id": "nmdc:e429651ae53a18b07d99880d09a19b26", + "file_size_bytes": 42064836 + }, + { + "name": "Gp0127638_Cath FunFam GFF file", + "description": "Cath FunFam GFF file for Gp0127638", + "url": "https://data.microbiomedata.org/data/nmdc:mga0hjgc20/annotation/nmdc_mga0hjgc20_cath_funfam.gff", + "md5_checksum": "b22aab3cc1b9231102b23c31b418eff4", + "id": "nmdc:b22aab3cc1b9231102b23c31b418eff4", + "file_size_bytes": 32005228 + }, + { + "name": "Gp0127638_KO_EC GFF file", + "description": "KO_EC GFF file for Gp0127638", + "url": "https://data.microbiomedata.org/data/nmdc:mga0hjgc20/annotation/nmdc_mga0hjgc20_ko_ec.gff", + "md5_checksum": "ebb5a6a7ad1f14fd8cf2178ec59969ef", + "id": "nmdc:ebb5a6a7ad1f14fd8cf2178ec59969ef", + "file_size_bytes": 21405596 + }, + { + "name": "Gp0127638_metabat2 bin checkm quality assessment result", + "description": "metabat2 bin checkm quality assessment result for Gp0127638", + "data_object_type": "CheckM Statistics", + "url": "https://data.microbiomedata.org/data/nmdc:mga0hjgc20/MAGs/nmdc_mga0hjgc20_checkm_qa.out", + "md5_checksum": "dcdd7e33e92d3658fe68056f21b57f5d", + "id": "nmdc:dcdd7e33e92d3658fe68056f21b57f5d", + "file_size_bytes": 760 + }, + { + "name": "Gp0127638_high-quality and medium-quality bins", + "description": "high-quality and medium-quality bins for Gp0127638", + "data_object_type": "Metagenome Bins", + "url": "https://data.microbiomedata.org/data/nmdc:mga0hjgc20/MAGs/nmdc_mga0hjgc20_hqmq_bin.zip", + "md5_checksum": "8ca8e2250dc68643e937163323f2a826", + "id": "nmdc:8ca8e2250dc68643e937163323f2a826", + "file_size_bytes": 508443 + }, + { + "_id": { + "$oid": "649b003d1ae706d7b5b14e86" + }, + "description": "Assembled contigs fasta for gold:Gp0127638", + "url": "https://data.microbiomedata.org/data/1781_100340/assembly/assembly_contigs.fna", + "file_size_bytes": 83628276, + "type": "nmdc:DataObject", + "id": "nmdc:ed782cb1e889b9965707363c1324ee22", + "name": "assembly_contigs.fna", + "data_object_type": "Assembly Contigs" + }, + { + "_id": { + "$oid": "649b003d1ae706d7b5b14e89" + }, + "description": "Assembled scaffold fasta for gold:Gp0127638", + "url": "https://data.microbiomedata.org/data/1781_100340/assembly/assembly_scaffolds.fna", + "file_size_bytes": 83118450, + "type": "nmdc:DataObject", + "id": "nmdc:37adb1b2ce1b858809930aa12526e720", + "name": "assembly_scaffolds.fna", + "data_object_type": "Assembly Scaffolds" + }, + { + "_id": { + "$oid": "649b003d1ae706d7b5b14e8a" + }, + "description": "Metagenome Contig Coverage Stats for gold:Gp0127638", + "url": "https://data.microbiomedata.org/data/1781_100340/assembly/mapping_stats.txt", + "file_size_bytes": 12735011, + "type": "nmdc:DataObject", + "id": "nmdc:05e4b13c2533a4969139e6e11ae71984", + "name": "mapping_stats.txt", + "data_object_type": "Assembly Coverage Stats" + }, + { + "_id": { + "$oid": "649b003d1ae706d7b5b14e8d" + }, + "description": "Assembled AGP file for gold:Gp0127638", + "url": "https://data.microbiomedata.org/data/1781_100340/assembly/assembly.agp", + "file_size_bytes": 11167924, + "type": "nmdc:DataObject", + "id": "nmdc:d98e0c56d4ea8b29a62f6ba8cc058c72", + "name": "assembly.agp", + "data_object_type": "Assembly AGP" + }, + { + "_id": { + "$oid": "649b003d1ae706d7b5b14e8e" + }, + "description": "Metagenome Alignment BAM file for gold:Gp0127638", + "url": "https://data.microbiomedata.org/data/1781_100340/assembly/pairedMapped_sorted.bam", + "file_size_bytes": 1787924636, + "type": "nmdc:DataObject", + "id": "nmdc:0aa8f1c4c591a4080152f9712431f85b", + "name": "pairedMapped_sorted.bam", + "data_object_type": "Assembly Coverage BAM" + }, + { + "_id": { + "$oid": "649b003d1ae706d7b5b15b6b" + }, + "id": "nmdc:3114c2a7faf5fc63ff1e8be9c15ae9ac", + "name": "1781_100340.krona.html", + "description": "Gold:Gp0127638 KRONA plot HTML file", + "url": "https://data.microbiomedata.org/1781_100340/ReadbasedAnalysis/centrifuge/1781_100340.krona.html", + "file_size_bytes": 3385, + "data_object_type": "Centrifuge Krona Plot", + "type": "nmdc:DataObject" + }, + { + "_id": { + "$oid": "649b003d1ae706d7b5b15b6e" + }, + "id": "nmdc:73e4e17fc849b239ced558102cb107de", + "name": "1781_100340.json", + "description": "Gold:Gp0127638 ReadbasedAnalysis result JSON file", + "url": "https://data.microbiomedata.org/1781_100340/ReadbasedAnalysis/1781_100340.json", + "file_size_bytes": 3385, + "type": "nmdc:DataObject" + }, + { + "_id": { + "$oid": "649b003f1ae706d7b5b16602" + }, + "id": "nmdc:1ed134939eb54d78ba95134f8b11abf0", + "name": "bins.tooShort.fa", + "description": "tooShort (< 3kb) filtered contigs fasta file by metaBat2 for gold:Gp0127638", + "file_size_bytes": 69018209, + "url": "https://data.microbiomedata.org/data/1781_100340/img_MAGs/bins.tooShort.fa", + "type": "nmdc:DataObject" + }, + { + "_id": { + "$oid": "649b003f1ae706d7b5b16604" + }, + "id": "nmdc:4bf67b94461ec33e4d3bf4f28442c6b5", + "name": "bins.unbinned.fa", + "description": "unbinned fasta file from metabat2 for gold:Gp0127638", + "file_size_bytes": 11722895, + "url": "https://data.microbiomedata.org/data/1781_100340/img_MAGs/bins.unbinned.fa", + "type": "nmdc:DataObject" + }, + { + "_id": { + "$oid": "649b003f1ae706d7b5b16605" + }, + "id": "nmdc:28848628e266e83cfb2e9af8e90ae9c0", + "name": "checkm_qa.out", + "description": "metabat2 bin checkm quality assessment result for gold:Gp0127638", + "file_size_bytes": 760, + "url": "https://data.microbiomedata.org/data/1781_100340/img_MAGs/checkm_qa.out", + "type": "nmdc:DataObject", + "data_object_type": "CheckM Statistics" + }, + { + "_id": { + "$oid": "649b003f1ae706d7b5b16606" + }, + "id": "nmdc:143af6193b463b4b8e685e999fc0d756", + "name": "gtdbtk.ar122.summary.tsv", + "description": "gtdbtk archaea assignment result summary table for gold:Gp0127638", + "file_size_bytes": 1003, + "url": "https://data.microbiomedata.org/data/1781_100340/img_MAGs/gtdbtk_output/classify/gtdbtk.ar122.summary.tsv", + "type": "nmdc:DataObject" + }, + { + "_id": { + "$oid": "649b003f1ae706d7b5b16609" + }, + "id": "nmdc:e4e5dd8c3aaba918e6e98db827fc9d28", + "name": "gold:Gp0127638.bins.1.fa", + "description": "metabat2 binned contig file for gold:Gp0127638", + "file_size_bytes": 1471149, + "url": "https://data.microbiomedata.org/data/1781_100340/img_MAGs/metabat-bins/bins.1.fa", + "type": "nmdc:DataObject", + "data_object_type": "Metagenome Bins" + }, + { + "_id": { + "$oid": "649b00401ae706d7b5b16d07" + }, + "description": "EC TSV File for gold:Gp0127638", + "url": "https://data.microbiomedata.org/1781_100340/img_annotation/Ga0482232_ec.tsv", + "md5_checksum": "3bd360103e4e8fc8f89c1df345367776", + "file_size_bytes": 3385, + "id": "nmdc:3bd360103e4e8fc8f89c1df345367776", + "name": "gold:Gp0127638_EC TSV File", + "data_object_type": "Annotation Enzyme Commission", + "type": "nmdc:DataObject" + }, + { + "_id": { + "$oid": "649b00401ae706d7b5b16d08" + }, + "description": "KO TSV File for gold:Gp0127638", + "url": "https://data.microbiomedata.org/1781_100340/img_annotation/Ga0482232_ko.tsv", + "md5_checksum": "1aba5135d8cddc36da3cd37579be190b", + "file_size_bytes": 3385, + "id": "nmdc:1aba5135d8cddc36da3cd37579be190b", + "name": "gold:Gp0127638_KO TSV File", + "data_object_type": "Annotation KEGG Orthology", + "type": "nmdc:DataObject" + }, + { + "_id": { + "$oid": "649b00401ae706d7b5b16d0a" + }, + "description": "Functional annotation GFF file for gold:Gp0127638", + "url": "https://data.microbiomedata.org/1781_100340/img_annotation/Ga0482232_functional_annotation.gff", + "md5_checksum": "3da4d2f1c2db68033fa2264f4db7f459", + "file_size_bytes": 3385, + "id": "nmdc:3da4d2f1c2db68033fa2264f4db7f459", + "name": "gold:Gp0127638_Functional annotation GFF file", + "data_object_type": "Functional Annotation GFF", + "type": "nmdc:DataObject" + }, + { + "_id": { + "$oid": "649b00401ae706d7b5b16d0b" + }, + "description": "Structural annotation GFF file for gold:Gp0127638", + "url": "https://data.microbiomedata.org/1781_100340/img_annotation/Ga0482232_structural_annotation.gff", + "md5_checksum": "2ca3e1a0ba8007e86dedbec47e85adba", + "file_size_bytes": 3385, + "id": "nmdc:2ca3e1a0ba8007e86dedbec47e85adba", + "name": "gold:Gp0127638_Structural annotation GFF file", + "data_object_type": "Structural Annotation GFF", + "type": "nmdc:DataObject" + }, + { + "_id": { + "$oid": "649b00401ae706d7b5b16d0c" + }, + "description": "Protein FAA for gold:Gp0127638", + "url": "https://data.microbiomedata.org/1781_100340/img_annotation/Ga0482232_proteins.faa", + "md5_checksum": "17993d4fcfa7be4fd4488804d23b67c6", + "file_size_bytes": 3385, + "id": "nmdc:17993d4fcfa7be4fd4488804d23b67c6", + "name": "gold:Gp0127638_Protein FAA", + "data_object_type": "Annotation Amino Acid FASTA", + "type": "nmdc:DataObject" + } + ], + "mags_activity_set": [ + { + "_id": { + "$oid": "649b0052ec087f6bbab3471d" + }, + "has_input": [ + "nmdc:5122503797ac0ed9694a6f4feecab955", + "nmdc:6420476f7e93425a68aa00b8e09cd6e7", + "nmdc:41453202313c56e06b0cc00b5ee6c375" + ], + "too_short_contig_num": 162130, + "part_of": [ + "nmdc:mga0hjgc20" + ], + "binned_contig_num": 189, + "git_url": "https://github.com/microbiomedata/metaG/releases/tag/0.1", + "has_output": [ + "nmdc:dcdd7e33e92d3658fe68056f21b57f5d", + "nmdc:8ca8e2250dc68643e937163323f2a826" + ], + "was_informed_by": "gold:Gp0127638", + "input_contig_num": 169697, + "id": "nmdc:668844f5ea6cefdcb893db0bb6afc92a", + "execution_resource": "NERSC-Cori", + "name": "MAGs Analysis Activity for nmdc:mga0hjgc20", + "mags_list": [ + { + "number_of_contig": 189, + "completeness": 73.5, + "bin_name": "bins.1", + "gene_count": 2020, + "bin_quality": "MQ", + "gtdbtk_species": "", + "gtdbtk_order": "Nitrososphaerales", + "num_16s": 0, + "gtdbtk_family": "Nitrososphaeraceae", + "gtdbtk_domain": "Archaea", + "contamination": 0.97, + "gtdbtk_class": "Nitrososphaeria", + "gtdbtk_phylum": "Crenarchaeota", + "num_5s": 0, + "num_23s": 0, + "gtdbtk_genus": "", + "num_t_rna": 37 + } + ], + "unbinned_contig_num": 7378, + "started_at_time": "2021-12-01T21:31:29Z", + "type": "nmdc:MAGsAnalysisActivity", + "ended_at_time": "2021-12-02T20:49:51+00:00" + } + ], + "metagenome_annotation_activity_set": [ + { + "_id": { + "$oid": "649b005bbf2caae0415ef9bc" + }, + "has_input": [ + "nmdc:5122503797ac0ed9694a6f4feecab955" + ], + "part_of": [ + "nmdc:mga0hjgc20" + ], + "git_url": "https://github.com/microbiomedata/metaG/releases/tag/0.1", + "has_output": [ + "nmdc:f56690d136c4dafdc1eaa64a21fd9210", + "nmdc:8be4e8ac2d00bf1d5b4863c36dc3678c", + "nmdc:41453202313c56e06b0cc00b5ee6c375", + "nmdc:e06bd74dce2e5b839b35ac1012d93ba4", + "nmdc:f2786d1f8a17bedd0104b01ec06ebfce", + "nmdc:37cb3fb060da091a84f1baa7ef3743fc", + "nmdc:34680897818585cefbef6e69109e7de4", + "nmdc:a00404838fbe9f846a704e1dbb14f2b2", + "nmdc:700dd121a0ac41e3fa8077d7330adae7", + "nmdc:e429651ae53a18b07d99880d09a19b26", + "nmdc:b22aab3cc1b9231102b23c31b418eff4", + "nmdc:ebb5a6a7ad1f14fd8cf2178ec59969ef" + ], + "was_informed_by": "gold:Gp0127638", + "id": "nmdc:668844f5ea6cefdcb893db0bb6afc92a", + "execution_resource": "NERSC-Cori", + "name": "Annotation Activity for nmdc:mga0hjgc20", + "started_at_time": "2021-12-01T21:31:29Z", + "type": "nmdc:MetagenomeAnnotationActivity", + "ended_at_time": "2021-12-02T20:49:51+00:00" + } + ], + "metagenome_assembly_set": [ + { + "_id": { + "$oid": "649b005f2ca5ee4adb139fac" + }, + "has_input": [ + "nmdc:56ba2416c050decd6c16c618c1e4a752" + ], + "part_of": [ + "nmdc:mga0hjgc20" + ], + "ctg_logsum": 141543, + "scaf_logsum": 141966, + "gap_pct": 0.00109, + "git_url": "https://github.com/microbiomedata/metaG/releases/tag/0.1", + "has_output": [ + "nmdc:5122503797ac0ed9694a6f4feecab955", + "nmdc:d7ee4628101b11bc5fb67d961a4e1a0a", + "nmdc:0944f2c0dd70a751117fb10d9a41fddc", + "nmdc:1917dcbbe1efcc2a57c511648a7f332e", + "nmdc:6420476f7e93425a68aa00b8e09cd6e7" + ], + "asm_score": 6.89, + "was_informed_by": "gold:Gp0127638", + "ctg_powsum": 15753, + "scaf_max": 48487, + "id": "nmdc:668844f5ea6cefdcb893db0bb6afc92a", + "scaf_powsum": 15801, + "execution_resource": "NERSC-Cori", + "contigs": 169698, + "name": "Assembly Activity for nmdc:mga0hjgc20", + "ctg_max": 48487, + "gc_std": 0.08917, + "gc_avg": 0.63213, + "contig_bp": 77783768, + "started_at_time": "2021-12-01T21:31:29Z", + "scaf_bp": 77784618, + "type": "nmdc:MetagenomeAssembly", + "scaffolds": 169622, + "ended_at_time": "2021-12-02T20:49:51+00:00", + "ctg_l50": 433, + "ctg_l90": 289, + "ctg_n50": 51455, + "ctg_n90": 144304, + "scaf_l50": 433, + "scaf_l90": 289, + "scaf_n50": 51437, + "scaf_n90": 144234 + } + ], + "omics_processing_set": [ + { + "_id": { + "$oid": "649b009773e8249959349b49" + }, + "id": "nmdc:omprc-11-tgxmb243", + "name": "Riverbed sediment microbial communities from areas with no vegetation in Columbia River, Washington, USA - GW-RW N2_40_50", + "description": "Riverbed sediment samples were collected from an area with no vegetation in a hyporheic zone (subsurface groundwater - surface water mixing zone)", + "has_input": [ + "nmdc:bsm-11-pq3zmp51" + ], + "has_output": [ + "jgi:574fde837ded5e3df1ee141d" + ], + "part_of": [ + "nmdc:sty-11-aygzgv51" + ], + "add_date": "2016-01-11", + "mod_date": "2021-06-15", + "ncbi_project_name": "Riverbed sediment microbial communities from areas with no vegetation in Columbia River, Washington, USA - GW-RW N2_40_50", + "omics_type": { + "has_raw_value": "Metagenome" + }, + "principal_investigator": { + "has_raw_value": "James Stegen" + }, + "processing_institution": "JGI", + "type": "nmdc:OmicsProcessing", + "gold_sequencing_project_identifiers": [ + "gold:Gp0127638" + ] + } + ], + "read_qc_analysis_activity_set": [ + { + "_id": { + "$oid": "649b009d6bdd4fd20273c873" + }, + "has_input": [ + "nmdc:56b2d94789953adf1b4ed35f09f0edd4" + ], + "part_of": [ + "nmdc:mga0hjgc20" + ], + "git_url": "https://github.com/microbiomedata/metaG/releases/tag/0.1", + "has_output": [ + "nmdc:56ba2416c050decd6c16c618c1e4a752", + "nmdc:5c9398042e9ff608befa78e86597bdf0" + ], + "was_informed_by": "gold:Gp0127638", + "input_read_count": 21721428, + "output_read_bases": 2949961420, + "id": "nmdc:668844f5ea6cefdcb893db0bb6afc92a", + "execution_resource": "NERSC-Cori", + "input_read_bases": 3279935628, + "name": "Read QC Activity for nmdc:mga0hjgc20", + "output_read_count": 19723416, + "started_at_time": "2021-12-01T21:31:29Z", + "type": "nmdc:ReadQCAnalysisActivity", + "ended_at_time": "2021-12-02T20:49:51+00:00" + } + ], + "read_based_taxonomy_analysis_activity_set": [ + { + "_id": { + "$oid": "649b009bff710ae353f8cf35" + }, + "has_input": [ + "nmdc:56ba2416c050decd6c16c618c1e4a752" + ], + "git_url": "https://github.com/microbiomedata/metaG/releases/tag/0.1", + "has_output": [ + "nmdc:dbbd6ca6777b71d1fac4aae2cd947deb", + "nmdc:b6de56746a284f8226dd86817c8ae04e", + "nmdc:d9572e708af9f0a06e98cfddfb298359", + "nmdc:e9946f36795474182b7759d3d7532b57", + "nmdc:33ff1d85d17d763afc9e21e481cc10d2", + "nmdc:997a66f49a232750bd7132639f3387e7", + "nmdc:d3f604a59babf001839d38a617b62931", + "nmdc:3abfaa434ee1449cbbb69985e48488b4", + "nmdc:70c2fc1a2c7c0032528ff91ad1576465" + ], + "was_informed_by": "gold:Gp0127638", + "id": "nmdc:668844f5ea6cefdcb893db0bb6afc92a", + "execution_resource": "NERSC-Cori", + "name": "ReadBased Analysis Activity for nmdc:mga0hjgc20", + "started_at_time": "2021-12-01T21:31:29Z", + "type": "nmdc:ReadbasedAnalysis", + "ended_at_time": "2021-12-02T20:49:51+00:00" + } + ] + }, + { + "data_object_set": [ + { + "description": "Raw sequencer read data", + "file_size_bytes": 3408915289, + "type": "nmdc:DataObject", + "id": "jgi:55d7402a0d8785342fcf7e3b", + "name": "9422.8.132674.CGTACG.fastq.gz" + }, + { + "name": "Gp0115670_Filtered Reads", + "description": "Filtered Reads for Gp0115670", + "data_object_type": "Filtered Sequencing Reads", + "url": "https://data.microbiomedata.org/data/nmdc:mga0d7pj22/qa/nmdc_mga0d7pj22_filtered.fastq.gz", + "md5_checksum": "7f6b353300583c60d2d668880b4134cd", + "id": "nmdc:7f6b353300583c60d2d668880b4134cd", + "file_size_bytes": 3012174785 + }, + { + "name": "Gp0115670_Filtered Stats", + "description": "Filtered Stats for Gp0115670", + "data_object_type": "QC Statistics", + "url": "https://data.microbiomedata.org/data/nmdc:mga0d7pj22/qa/nmdc_mga0d7pj22_filterStats.txt", + "md5_checksum": "a4f65d101293fa4345cd865f86597464", + "id": "nmdc:a4f65d101293fa4345cd865f86597464", + "file_size_bytes": 291 + }, + { + "name": "Gp0115670_Gottcha2 TSV report", + "description": "Gottcha2 TSV report for Gp0115670", + "url": "https://data.microbiomedata.org/data/nmdc:mga0d7pj22/ReadbasedAnalysis/nmdc_mga0d7pj22_gottcha2_report.tsv", + "md5_checksum": "e316502f9e7a78c9db3996ef832aa9d7", + "id": "nmdc:e316502f9e7a78c9db3996ef832aa9d7", + "file_size_bytes": 13758 + }, + { + "name": "Gp0115670_Gottcha2 full TSV report", + "description": "Gottcha2 full TSV report for Gp0115670", + "url": "https://data.microbiomedata.org/data/nmdc:mga0d7pj22/ReadbasedAnalysis/nmdc_mga0d7pj22_gottcha2_report_full.tsv", + "md5_checksum": "1ac2be77491e7d425da1d62f69f1508d", + "id": "nmdc:1ac2be77491e7d425da1d62f69f1508d", + "file_size_bytes": 1116084 + }, + { + "name": "Gp0115670_Gottcha2 Krona HTML report", + "description": "Gottcha2 Krona HTML report for Gp0115670", + "data_object_type": "GOTTCHA2 Krona Plot", + "url": "https://data.microbiomedata.org/data/nmdc:mga0d7pj22/ReadbasedAnalysis/nmdc_mga0d7pj22_gottcha2_krona.html", + "md5_checksum": "de5b15fa9d3bdbc3abcc2475ee351323", + "id": "nmdc:de5b15fa9d3bdbc3abcc2475ee351323", + "file_size_bytes": 268542 + }, + { + "name": "Gp0115670_Centrifuge classification TSV report", + "description": "Centrifuge classification TSV report for Gp0115670", + "data_object_type": "Centrifuge Taxonomic Classification", + "url": "https://data.microbiomedata.org/data/nmdc:mga0d7pj22/ReadbasedAnalysis/nmdc_mga0d7pj22_centrifuge_classification.tsv", + "md5_checksum": "a9bbb74833404a2bf3bbd05e83a7a0ed", + "id": "nmdc:a9bbb74833404a2bf3bbd05e83a7a0ed", + "file_size_bytes": 2458475116 + }, + { + "name": "Gp0115670_Centrifuge TSV report", + "description": "Centrifuge TSV report for Gp0115670", + "data_object_type": "Centrifuge Classification Report", + "url": "https://data.microbiomedata.org/data/nmdc:mga0d7pj22/ReadbasedAnalysis/nmdc_mga0d7pj22_centrifuge_report.tsv", + "md5_checksum": "c065784bed2b2495d512af93d05967de", + "id": "nmdc:c065784bed2b2495d512af93d05967de", + "file_size_bytes": 261692 + }, + { + "name": "Gp0115670_Centrifuge Krona HTML report", + "description": "Centrifuge Krona HTML report for Gp0115670", + "data_object_type": "Centrifuge Krona Plot", + "url": "https://data.microbiomedata.org/data/nmdc:mga0d7pj22/ReadbasedAnalysis/nmdc_mga0d7pj22_centrifuge_krona.html", + "md5_checksum": "a34dbcbdebae0861e41c09e7b9a5d9f0", + "id": "nmdc:a34dbcbdebae0861e41c09e7b9a5d9f0", + "file_size_bytes": 2343355 + }, + { + "name": "Gp0115670_Kraken classification TSV report", + "description": "Kraken classification TSV report for Gp0115670", + "data_object_type": "Kraken2 Taxonomic Classification", + "url": "https://data.microbiomedata.org/data/nmdc:mga0d7pj22/ReadbasedAnalysis/nmdc_mga0d7pj22_kraken2_classification.tsv", + "md5_checksum": "b2122f5a910a1d4ae8a62956d1cd731c", + "id": "nmdc:b2122f5a910a1d4ae8a62956d1cd731c", + "file_size_bytes": 2019980511 + }, + { + "name": "Gp0115670_Kraken2 TSV report", + "description": "Kraken2 TSV report for Gp0115670", + "data_object_type": "Kraken2 Classification Report", + "url": "https://data.microbiomedata.org/data/nmdc:mga0d7pj22/ReadbasedAnalysis/nmdc_mga0d7pj22_kraken2_report.tsv", + "md5_checksum": "8a26d8496a70f4777be0e1237092e44c", + "id": "nmdc:8a26d8496a70f4777be0e1237092e44c", + "file_size_bytes": 694029 + }, + { + "name": "Gp0115670_Kraken2 Krona HTML report", + "description": "Kraken2 Krona HTML report for Gp0115670", + "data_object_type": "Kraken2 Krona Plot", + "url": "https://data.microbiomedata.org/data/nmdc:mga0d7pj22/ReadbasedAnalysis/nmdc_mga0d7pj22_kraken2_krona.html", + "md5_checksum": "694b83f0b6f599948d4248dd48dd9ba9", + "id": "nmdc:694b83f0b6f599948d4248dd48dd9ba9", + "file_size_bytes": 4190653 + }, + { + "name": "Gp0115670_Assembled contigs fasta", + "description": "Assembled contigs fasta for Gp0115670", + "data_object_type": "Assembly Contigs", + "url": "https://data.microbiomedata.org/data/nmdc:mga0d7pj22/assembly/nmdc_mga0d7pj22_contigs.fna", + "md5_checksum": "975cdb0a18df949be4efb80d1dc4ef0b", + "id": "nmdc:975cdb0a18df949be4efb80d1dc4ef0b", + "file_size_bytes": 85578260 + }, + { + "name": "Gp0115670_Assembled scaffold fasta", + "description": "Assembled scaffold fasta for Gp0115670", + "data_object_type": "Assembly Scaffolds", + "url": "https://data.microbiomedata.org/data/nmdc:mga0d7pj22/assembly/nmdc_mga0d7pj22_scaffolds.fna", + "md5_checksum": "1dfaed4da055c5fd4226abe08bd91db9", + "id": "nmdc:1dfaed4da055c5fd4226abe08bd91db9", + "file_size_bytes": 85115954 + }, + { + "name": "Gp0115670_Metagenome Contig Coverage Stats", + "description": "Metagenome Contig Coverage Stats for Gp0115670", + "url": "https://data.microbiomedata.org/data/nmdc:mga0d7pj22/assembly/nmdc_mga0d7pj22_covstats.txt", + "md5_checksum": "8a749340eefc40901a22a0ef603bc803", + "id": "nmdc:8a749340eefc40901a22a0ef603bc803", + "file_size_bytes": 12068883 + }, + { + "name": "Gp0115670_Assembled AGP file", + "description": "Assembled AGP file for Gp0115670", + "data_object_type": "Assembly AGP", + "url": "https://data.microbiomedata.org/data/nmdc:mga0d7pj22/assembly/nmdc_mga0d7pj22_assembly.agp", + "md5_checksum": "ad027e4c3ca67907154c03feeebbd97b", + "id": "nmdc:ad027e4c3ca67907154c03feeebbd97b", + "file_size_bytes": 11264235 + }, + { + "name": "Gp0115670_Metagenome Alignment BAM file", + "description": "Metagenome Alignment BAM file for Gp0115670", + "data_object_type": "Assembly Coverage BAM", + "url": "https://data.microbiomedata.org/data/nmdc:mga0d7pj22/assembly/nmdc_mga0d7pj22_pairedMapped_sorted.bam", + "md5_checksum": "c4f2407273babd894282d4d0f20be5d1", + "id": "nmdc:c4f2407273babd894282d4d0f20be5d1", + "file_size_bytes": 3245960211 + }, + { + "name": "Gp0115670_Protein FAA", + "description": "Protein FAA for Gp0115670", + "data_object_type": "Annotation Amino Acid FASTA", + "url": "https://data.microbiomedata.org/data/nmdc:mga0d7pj22/annotation/nmdc_mga0d7pj22_proteins.faa", + "md5_checksum": "21230aff7bb5b266fb544905f9ac5ce2", + "id": "nmdc:21230aff7bb5b266fb544905f9ac5ce2", + "file_size_bytes": 46061226 + }, + { + "name": "Gp0115670_Structural annotation GFF file", + "description": "Structural annotation GFF file for Gp0115670", + "data_object_type": "Structural Annotation GFF", + "url": "https://data.microbiomedata.org/data/nmdc:mga0d7pj22/annotation/nmdc_mga0d7pj22_structural_annotation.gff", + "md5_checksum": "91c5cc265ef61ab83111a5bc9462e8b2", + "id": "nmdc:91c5cc265ef61ab83111a5bc9462e8b2", + "file_size_bytes": 2769 + }, + { + "name": "Gp0115670_Functional annotation GFF file", + "description": "Functional annotation GFF file for Gp0115670", + "data_object_type": "Functional Annotation GFF", + "url": "https://data.microbiomedata.org/data/nmdc:mga0d7pj22/annotation/nmdc_mga0d7pj22_functional_annotation.gff", + "md5_checksum": "0bc4d8b8ef11724c3d7e728b0e8e0ea5", + "id": "nmdc:0bc4d8b8ef11724c3d7e728b0e8e0ea5", + "file_size_bytes": 50449176 + }, + { + "name": "Gp0115670_KO TSV file", + "description": "KO TSV file for Gp0115670", + "data_object_type": "Annotation KEGG Orthology", + "url": "https://data.microbiomedata.org/data/nmdc:mga0d7pj22/annotation/nmdc_mga0d7pj22_ko.tsv", + "md5_checksum": "811910b7d8c300befddd039e833b0453", + "id": "nmdc:811910b7d8c300befddd039e833b0453", + "file_size_bytes": 6653168 + }, + { + "name": "Gp0115670_EC TSV file", + "description": "EC TSV file for Gp0115670", + "data_object_type": "Annotation Enzyme Commission", + "url": "https://data.microbiomedata.org/data/nmdc:mga0d7pj22/annotation/nmdc_mga0d7pj22_ec.tsv", + "md5_checksum": "9ed55d9535d1592866a66e9d5cd936a2", + "id": "nmdc:9ed55d9535d1592866a66e9d5cd936a2", + "file_size_bytes": 4232890 + }, + { + "name": "Gp0115670_COG GFF file", + "description": "COG GFF file for Gp0115670", + "url": "https://data.microbiomedata.org/data/nmdc:mga0d7pj22/annotation/nmdc_mga0d7pj22_cog.gff", + "md5_checksum": "a127efaa423e6dd6d24d7ab67cc2124a", + "id": "nmdc:a127efaa423e6dd6d24d7ab67cc2124a", + "file_size_bytes": 28376544 + }, + { + "name": "Gp0115670_PFAM GFF file", + "description": "PFAM GFF file for Gp0115670", + "url": "https://data.microbiomedata.org/data/nmdc:mga0d7pj22/annotation/nmdc_mga0d7pj22_pfam.gff", + "md5_checksum": "4b56646de8c37278beaaf9797e4ddf2f", + "id": "nmdc:4b56646de8c37278beaaf9797e4ddf2f", + "file_size_bytes": 22850790 + }, + { + "name": "Gp0115670_TigrFam GFF file", + "description": "TigrFam GFF file for Gp0115670", + "url": "https://data.microbiomedata.org/data/nmdc:mga0d7pj22/annotation/nmdc_mga0d7pj22_tigrfam.gff", + "md5_checksum": "53a0873376e22fef62f2740f6afead21", + "id": "nmdc:53a0873376e22fef62f2740f6afead21", + "file_size_bytes": 3099434 + }, + { + "name": "Gp0115670_SMART GFF file", + "description": "SMART GFF file for Gp0115670", + "url": "https://data.microbiomedata.org/data/nmdc:mga0d7pj22/annotation/nmdc_mga0d7pj22_smart.gff", + "md5_checksum": "36748318682076112ba81283c8bc767a", + "id": "nmdc:36748318682076112ba81283c8bc767a", + "file_size_bytes": 6433811 + }, + { + "name": "Gp0115670_SuperFam GFF file", + "description": "SuperFam GFF file for Gp0115670", + "url": "https://data.microbiomedata.org/data/nmdc:mga0d7pj22/annotation/nmdc_mga0d7pj22_supfam.gff", + "md5_checksum": "5dd32385b351847f23ec4eac63eb70ff", + "id": "nmdc:5dd32385b351847f23ec4eac63eb70ff", + "file_size_bytes": 36427587 + }, + { + "name": "Gp0115670_Cath FunFam GFF file", + "description": "Cath FunFam GFF file for Gp0115670", + "url": "https://data.microbiomedata.org/data/nmdc:mga0d7pj22/annotation/nmdc_mga0d7pj22_cath_funfam.gff", + "md5_checksum": "95076052a4d5d57e1ed0c7699e4f5472", + "id": "nmdc:95076052a4d5d57e1ed0c7699e4f5472", + "file_size_bytes": 28909664 + }, + { + "name": "Gp0115670_KO_EC GFF file", + "description": "KO_EC GFF file for Gp0115670", + "url": "https://data.microbiomedata.org/data/nmdc:mga0d7pj22/annotation/nmdc_mga0d7pj22_ko_ec.gff", + "md5_checksum": "6ae89cc4b2fb7d09614c106d3358be27", + "id": "nmdc:6ae89cc4b2fb7d09614c106d3358be27", + "file_size_bytes": 21214802 + }, + { + "name": "gold:Gp0452679_metabat2 bin checkm quality assessment result", + "description": "metabat2 bin checkm quality assessment result for gold:Gp0452679", + "data_object_type": "CheckM Statistics", + "url": "https://data.microbiomedata.org/data/nmdc:mga0fsjy84/MAGs/nmdc_mga0fsjy84_checkm_qa.out", + "md5_checksum": "d41d8cd98f00b204e9800998ecf8427e", + "id": "nmdc:d41d8cd98f00b204e9800998ecf8427e", + "file_size_bytes": 0 + }, + { + "name": "Gp0115670_tooShort (< 3kb) filtered contigs fasta file by metaBat2", + "description": "tooShort (< 3kb) filtered contigs fasta file by metaBat2 for Gp0115670", + "url": "https://data.microbiomedata.org/data/nmdc:mga0d7pj22/MAGs/nmdc_mga0d7pj22_bins.tooShort.fa", + "md5_checksum": "fd5fe3f1faaaf3cd8a88d9bbfb016827", + "id": "nmdc:fd5fe3f1faaaf3cd8a88d9bbfb016827", + "file_size_bytes": 61828850 + }, + { + "name": "Gp0115670_unbinned fasta file from metabat2", + "description": "unbinned fasta file from metabat2 for Gp0115670", + "url": "https://data.microbiomedata.org/data/nmdc:mga0d7pj22/MAGs/nmdc_mga0d7pj22_bins.unbinned.fa", + "md5_checksum": "e27b736ee699ef2a8468a684811aaabd", + "id": "nmdc:e27b736ee699ef2a8468a684811aaabd", + "file_size_bytes": 15075820 + }, + { + "name": "Gp0115670_metabat2 bin checkm quality assessment result", + "description": "metabat2 bin checkm quality assessment result for Gp0115670", + "data_object_type": "CheckM Statistics", + "url": "https://data.microbiomedata.org/data/nmdc:mga0d7pj22/MAGs/nmdc_mga0d7pj22_checkm_qa.out", + "md5_checksum": "b0866d1a944aa27e34dc7a140aeaf336", + "id": "nmdc:b0866d1a944aa27e34dc7a140aeaf336", + "file_size_bytes": 1690 + }, + { + "name": "Gp0115670_high-quality and medium-quality bins", + "description": "high-quality and medium-quality bins for Gp0115670", + "data_object_type": "Metagenome Bins", + "url": "https://data.microbiomedata.org/data/nmdc:mga0d7pj22/MAGs/nmdc_mga0d7pj22_hqmq_bin.zip", + "md5_checksum": "0875e5107d03a40832d15e5cf80adbbc", + "id": "nmdc:0875e5107d03a40832d15e5cf80adbbc", + "file_size_bytes": 1944800 + }, + { + "name": "Gp0115670_metabat2 bins", + "description": "metabat2 bins for Gp0115670", + "url": "https://data.microbiomedata.org/data/nmdc:mga0d7pj22/MAGs/nmdc_mga0d7pj22_metabat_bin.zip", + "md5_checksum": "9b60c7c905d34e08427781eafbce9b12", + "id": "nmdc:9b60c7c905d34e08427781eafbce9b12", + "file_size_bytes": 658258 + }, + { + "_id": { + "$oid": "649b003c1ae706d7b5b14d7e" + }, + "description": "Assembled contigs fasta for gold:Gp0115670", + "url": "https://data.microbiomedata.org/data/1781_86102/assembly/assembly_contigs.fna", + "file_size_bytes": 84815235, + "type": "nmdc:DataObject", + "id": "nmdc:6c7beb91bbdcda84076fd786d59cab20", + "name": "assembly_contigs.fna", + "data_object_type": "Assembly Contigs" + }, + { + "_id": { + "$oid": "649b003c1ae706d7b5b14d80" + }, + "description": "Metagenome Contig Coverage Stats for gold:Gp0115670", + "url": "https://data.microbiomedata.org/data/1781_86102/assembly/mapping_stats.txt", + "file_size_bytes": 11305858, + "type": "nmdc:DataObject", + "id": "nmdc:5133fdf5c818f740f9e7ca276477f5db", + "name": "mapping_stats.txt", + "data_object_type": "Assembly Coverage Stats" + }, + { + "_id": { + "$oid": "649b003c1ae706d7b5b14d84" + }, + "description": "Assembled scaffold fasta for gold:Gp0115670", + "url": "https://data.microbiomedata.org/data/1781_86102/assembly/assembly_scaffolds.fna", + "file_size_bytes": 84354304, + "type": "nmdc:DataObject", + "id": "nmdc:cbd3fb5b5b99d86979e4c481bcd52d91", + "name": "assembly_scaffolds.fna", + "data_object_type": "Assembly Scaffolds" + }, + { + "_id": { + "$oid": "649b003c1ae706d7b5b14d85" + }, + "description": "Assembled AGP file for gold:Gp0115670", + "url": "https://data.microbiomedata.org/data/1781_86102/assembly/assembly.agp", + "file_size_bytes": 9735435, + "type": "nmdc:DataObject", + "id": "nmdc:5f92683e40ac788a1bebf1d6e02415ad", + "name": "assembly.agp", + "data_object_type": "Assembly AGP" + }, + { + "_id": { + "$oid": "649b003c1ae706d7b5b14d87" + }, + "description": "Metagenome Alignment BAM file for gold:Gp0115670", + "url": "https://data.microbiomedata.org/data/1781_86102/assembly/pairedMapped_sorted.bam", + "file_size_bytes": 3205338070, + "type": "nmdc:DataObject", + "id": "nmdc:bd2f5662f242a2be294876530634afaf", + "name": "pairedMapped_sorted.bam", + "data_object_type": "Assembly Coverage BAM" + }, + { + "_id": { + "$oid": "649b003d1ae706d7b5b159b6" + }, + "id": "nmdc:0a93ee11d25618c9207f4c109dd0859d", + "name": "1781_86102.krona.html", + "description": "Gold:Gp0115670 KRONA plot HTML file", + "url": "https://data.microbiomedata.org/1781_86102/ReadbasedAnalysis/centrifuge/1781_86102.krona.html", + "file_size_bytes": 3385, + "data_object_type": "Centrifuge Krona Plot", + "type": "nmdc:DataObject" + }, + { + "_id": { + "$oid": "649b003d1ae706d7b5b159bd" + }, + "id": "nmdc:43f9235ab417dd2dff189967b1a66ac7", + "name": "1781_86102.json", + "description": "Gold:Gp0115670 ReadbasedAnalysis result JSON file", + "url": "https://data.microbiomedata.org/1781_86102/ReadbasedAnalysis/1781_86102.json", + "file_size_bytes": 3385, + "type": "nmdc:DataObject" + }, + { + "_id": { + "$oid": "649b003f1ae706d7b5b16283" + }, + "id": "nmdc:fc533d14a7bb4e0dc462c4d95818e01e", + "name": "bins.tooShort.fa", + "description": "tooShort (< 3kb) filtered contigs fasta file by metaBat2 for gold:Gp0115670", + "file_size_bytes": 60065481, + "url": "https://data.microbiomedata.org/data/1781_86102/img_MAGs/bins.tooShort.fa", + "type": "nmdc:DataObject" + }, + { + "_id": { + "$oid": "649b003f1ae706d7b5b16284" + }, + "id": "nmdc:5b15dd3d951dc863beb945de63d7ec25", + "name": "bins.unbinned.fa", + "description": "unbinned fasta file from metabat2 for gold:Gp0115670", + "file_size_bytes": 17564113, + "url": "https://data.microbiomedata.org/data/1781_86102/img_MAGs/bins.unbinned.fa", + "type": "nmdc:DataObject" + }, + { + "_id": { + "$oid": "649b003f1ae706d7b5b16285" + }, + "id": "nmdc:53398d224211bc133f6dce929cae0d72", + "name": "checkm_qa.out", + "description": "metabat2 bin checkm quality assessment result for gold:Gp0115670", + "file_size_bytes": 2158, + "url": "https://data.microbiomedata.org/data/1781_86102/img_MAGs/checkm_qa.out", + "type": "nmdc:DataObject", + "data_object_type": "CheckM Statistics" + }, + { + "_id": { + "$oid": "649b003f1ae706d7b5b16286" + }, + "id": "nmdc:46666048e020a995a98c70df53ac4d9f", + "name": "gold:Gp0115670.bins.9.fa", + "description": "metabat2 binned contig file for gold:Gp0115670", + "file_size_bytes": 412159, + "url": "https://data.microbiomedata.org/data/1781_86102/img_MAGs/metabat-bins/bins.9.fa", + "type": "nmdc:DataObject", + "data_object_type": "Metagenome Bins" + }, + { + "_id": { + "$oid": "649b003f1ae706d7b5b16287" + }, + "id": "nmdc:667049b22edf3a81a717ccf63fa6021c", + "name": "gold:Gp0115670.bins.2.fa", + "description": "metabat2 binned contig file for gold:Gp0115670", + "file_size_bytes": 1551626, + "url": "https://data.microbiomedata.org/data/1781_86102/img_MAGs/metabat-bins/bins.2.fa", + "type": "nmdc:DataObject", + "data_object_type": "Metagenome Bins" + }, + { + "_id": { + "$oid": "649b003f1ae706d7b5b16288" + }, + "id": "nmdc:1a8477ea8d089e78cb03052c64a35249", + "name": "gold:Gp0115670.bins.5.fa", + "description": "metabat2 binned contig file for gold:Gp0115670", + "file_size_bytes": 298749, + "url": "https://data.microbiomedata.org/data/1781_86102/img_MAGs/metabat-bins/bins.5.fa", + "type": "nmdc:DataObject", + "data_object_type": "Metagenome Bins" + }, + { + "_id": { + "$oid": "649b003f1ae706d7b5b16289" + }, + "id": "nmdc:d0b11003b25eb91d3759fbc6b7477c37", + "name": "gold:Gp0115670.bins.3.fa", + "description": "metabat2 binned contig file for gold:Gp0115670", + "file_size_bytes": 389627, + "url": "https://data.microbiomedata.org/data/1781_86102/img_MAGs/metabat-bins/bins.3.fa", + "type": "nmdc:DataObject", + "data_object_type": "Metagenome Bins" + }, + { + "_id": { + "$oid": "649b003f1ae706d7b5b1628b" + }, + "id": "nmdc:9d6dace191d0fa9b660b12af98402fab", + "name": "gold:Gp0115670.bins.7.fa", + "description": "metabat2 binned contig file for gold:Gp0115670", + "file_size_bytes": 329051, + "url": "https://data.microbiomedata.org/data/1781_86102/img_MAGs/metabat-bins/bins.7.fa", + "type": "nmdc:DataObject", + "data_object_type": "Metagenome Bins" + }, + { + "_id": { + "$oid": "649b003f1ae706d7b5b1628d" + }, + "id": "nmdc:a15523e666fa33f919c66d5cba8bc0f5", + "name": "gold:Gp0115670.bins.4.fa", + "description": "metabat2 binned contig file for gold:Gp0115670", + "file_size_bytes": 570961, + "url": "https://data.microbiomedata.org/data/1781_86102/img_MAGs/metabat-bins/bins.4.fa", + "type": "nmdc:DataObject", + "data_object_type": "Metagenome Bins" + }, + { + "_id": { + "$oid": "649b003f1ae706d7b5b1628e" + }, + "id": "nmdc:4162f320ed86534d75ecb1ccf2763d47", + "name": "gold:Gp0115670.bins.1.fa", + "description": "metabat2 binned contig file for gold:Gp0115670", + "file_size_bytes": 743867, + "url": "https://data.microbiomedata.org/data/1781_86102/img_MAGs/metabat-bins/bins.1.fa", + "type": "nmdc:DataObject", + "data_object_type": "Metagenome Bins" + }, + { + "_id": { + "$oid": "649b003f1ae706d7b5b16290" + }, + "id": "nmdc:9b032e23d57bfbb90069887246064d22", + "name": "gold:Gp0115670.bins.8.fa", + "description": "metabat2 binned contig file for gold:Gp0115670", + "file_size_bytes": 1008688, + "url": "https://data.microbiomedata.org/data/1781_86102/img_MAGs/metabat-bins/bins.8.fa", + "type": "nmdc:DataObject", + "data_object_type": "Metagenome Bins" + }, + { + "_id": { + "$oid": "649b003f1ae706d7b5b16291" + }, + "id": "nmdc:32d3414866d65ea1c0a43f9fc60004ec", + "name": "gold:Gp0115670.bins.6.fa", + "description": "metabat2 binned contig file for gold:Gp0115670", + "file_size_bytes": 771722, + "url": "https://data.microbiomedata.org/data/1781_86102/img_MAGs/metabat-bins/bins.6.fa", + "type": "nmdc:DataObject", + "data_object_type": "Metagenome Bins" + }, + { + "_id": { + "$oid": "649b00401ae706d7b5b16d98" + }, + "description": "EC TSV File for gold:Gp0115670", + "url": "https://data.microbiomedata.org/1781_86102/img_annotation/Ga0482257_ec.tsv", + "md5_checksum": "483453952f8e4dc70687e02842b2bfc8", + "file_size_bytes": 3385, + "id": "nmdc:483453952f8e4dc70687e02842b2bfc8", + "name": "gold:Gp0115670_EC TSV File", + "data_object_type": "Annotation Enzyme Commission", + "type": "nmdc:DataObject" + }, + { + "_id": { + "$oid": "649b00401ae706d7b5b16d99" + }, + "description": "KO TSV File for gold:Gp0115670", + "url": "https://data.microbiomedata.org/1781_86102/img_annotation/Ga0482257_ko.tsv", + "md5_checksum": "4226d30b4f7d4018245613abbb2cc254", + "file_size_bytes": 3385, + "id": "nmdc:4226d30b4f7d4018245613abbb2cc254", + "name": "gold:Gp0115670_KO TSV File", + "data_object_type": "Annotation KEGG Orthology", + "type": "nmdc:DataObject" + }, + { + "_id": { + "$oid": "649b00401ae706d7b5b16d9b" + }, + "description": "Protein FAA for gold:Gp0115670", + "url": "https://data.microbiomedata.org/1781_86102/img_annotation/Ga0482257_proteins.faa", + "md5_checksum": "7e531f55eba2bd29d5bb4b1af8417b7c", + "file_size_bytes": 3385, + "id": "nmdc:7e531f55eba2bd29d5bb4b1af8417b7c", + "name": "gold:Gp0115670_Protein FAA", + "data_object_type": "Annotation Amino Acid FASTA", + "type": "nmdc:DataObject" + }, + { + "_id": { + "$oid": "649b00401ae706d7b5b16d9c" + }, + "description": "Functional annotation GFF file for gold:Gp0115670", + "url": "https://data.microbiomedata.org/1781_86102/img_annotation/Ga0482257_functional_annotation.gff", + "md5_checksum": "75a1e23a29f8b793c0b0abb7778d8661", + "file_size_bytes": 3385, + "id": "nmdc:75a1e23a29f8b793c0b0abb7778d8661", + "name": "gold:Gp0115670_Functional annotation GFF file", + "data_object_type": "Functional Annotation GFF", + "type": "nmdc:DataObject" + }, + { + "_id": { + "$oid": "649b00401ae706d7b5b16d9d" + }, + "description": "Structural annotation GFF file for gold:Gp0115670", + "url": "https://data.microbiomedata.org/1781_86102/img_annotation/Ga0482257_structural_annotation.gff", + "md5_checksum": "f05ecf0db08d716edb7a3f499582a2b7", + "file_size_bytes": 3385, + "id": "nmdc:f05ecf0db08d716edb7a3f499582a2b7", + "name": "gold:Gp0115670_Structural annotation GFF file", + "data_object_type": "Structural Annotation GFF", + "type": "nmdc:DataObject" + } + ], + "mags_activity_set": [ + { + "_id": { + "$oid": "649b0052ec087f6bbab3472d" + }, + "has_input": [ + "nmdc:975cdb0a18df949be4efb80d1dc4ef0b", + "nmdc:c4f2407273babd894282d4d0f20be5d1", + "nmdc:0bc4d8b8ef11724c3d7e728b0e8e0ea5" + ], + "too_short_contig_num": 142606, + "part_of": [ + "nmdc:mga0d7pj22" + ], + "binned_contig_num": 1261, + "git_url": "https://github.com/microbiomedata/metaG/releases/tag/0.1", + "has_output": [ + "nmdc:d41d8cd98f00b204e9800998ecf8427e", + "nmdc:fd5fe3f1faaaf3cd8a88d9bbfb016827", + "nmdc:e27b736ee699ef2a8468a684811aaabd", + "nmdc:b0866d1a944aa27e34dc7a140aeaf336", + "nmdc:0875e5107d03a40832d15e5cf80adbbc", + "nmdc:9b60c7c905d34e08427781eafbce9b12" + ], + "was_informed_by": "gold:Gp0115670", + "input_contig_num": 152605, + "id": "nmdc:f1cbe3cc181b2e689272b4223b68c15f", + "execution_resource": "NERSC-Cori", + "name": "MAGs Analysis Activity for nmdc:mga0d7pj22", + "mags_list": [ + { + "number_of_contig": 118, + "completeness": 23.28, + "bin_name": "bins.1", + "gene_count": 572, + "bin_quality": "LQ", + "gtdbtk_species": "", + "gtdbtk_order": "", + "num_16s": 0, + "gtdbtk_family": "", + "gtdbtk_domain": "", + "contamination": 0.0, + "gtdbtk_class": "", + "gtdbtk_phylum": "", + "num_5s": 0, + "num_23s": 0, + "gtdbtk_genus": "", + "num_t_rna": 5 + }, + { + "number_of_contig": 151, + "completeness": 38.09, + "bin_name": "bins.2", + "gene_count": 725, + "bin_quality": "LQ", + "gtdbtk_species": "", + "gtdbtk_order": "", + "num_16s": 0, + "gtdbtk_family": "", + "gtdbtk_domain": "", + "contamination": 0.0, + "gtdbtk_class": "", + "gtdbtk_phylum": "", + "num_5s": 0, + "num_23s": 0, + "gtdbtk_genus": "", + "num_t_rna": 9 + }, + { + "number_of_contig": 100, + "completeness": 99.01, + "bin_name": "bins.3", + "gene_count": 3233, + "bin_quality": "HQ", + "gtdbtk_species": "", + "gtdbtk_order": "Sphingomonadales", + "num_16s": 1, + "gtdbtk_family": "Sphingomonadaceae", + "gtdbtk_domain": "Bacteria", + "contamination": 1.38, + "gtdbtk_class": "Alphaproteobacteria", + "gtdbtk_phylum": "Proteobacteria", + "num_5s": 1, + "num_23s": 2, + "gtdbtk_genus": "Novosphingobium", + "num_t_rna": 47 + }, + { + "number_of_contig": 135, + "completeness": 34.24, + "bin_name": "bins.4", + "gene_count": 689, + "bin_quality": "LQ", + "gtdbtk_species": "", + "gtdbtk_order": "", + "num_16s": 0, + "gtdbtk_family": "", + "gtdbtk_domain": "", + "contamination": 0.91, + "gtdbtk_class": "", + "gtdbtk_phylum": "", + "num_5s": 0, + "num_23s": 0, + "gtdbtk_genus": "", + "num_t_rna": 6 + }, + { + "number_of_contig": 652, + "completeness": 57.14, + "bin_name": "bins.5", + "gene_count": 3635, + "bin_quality": "MQ", + "gtdbtk_species": "", + "gtdbtk_order": "Burkholderiales", + "num_16s": 1, + "gtdbtk_family": "Burkholderiaceae", + "gtdbtk_domain": "Bacteria", + "contamination": 2.6, + "gtdbtk_class": "Gammaproteobacteria", + "gtdbtk_phylum": "Proteobacteria", + "num_5s": 1, + "num_23s": 1, + "gtdbtk_genus": "Rhizobacter", + "num_t_rna": 27 + }, + { + "number_of_contig": 105, + "completeness": 27.22, + "bin_name": "bins.6", + "gene_count": 509, + "bin_quality": "LQ", + "gtdbtk_species": "", + "gtdbtk_order": "", + "num_16s": 0, + "gtdbtk_family": "", + "gtdbtk_domain": "", + "contamination": 0.19, + "gtdbtk_class": "", + "gtdbtk_phylum": "", + "num_5s": 0, + "num_23s": 0, + "gtdbtk_genus": "", + "num_t_rna": 9 + } + ], + "unbinned_contig_num": 8738, + "started_at_time": "2021-10-11T02:28:43Z", + "type": "nmdc:MAGsAnalysisActivity", + "ended_at_time": "2021-10-11T05:55:52+00:00" + } + ], + "metagenome_annotation_activity_set": [ + { + "_id": { + "$oid": "649b005bbf2caae0415ef9cf" + }, + "has_input": [ + "nmdc:975cdb0a18df949be4efb80d1dc4ef0b" + ], + "part_of": [ + "nmdc:mga0d7pj22" + ], + "git_url": "https://github.com/microbiomedata/metaG/releases/tag/0.1", + "has_output": [ + "nmdc:21230aff7bb5b266fb544905f9ac5ce2", + "nmdc:91c5cc265ef61ab83111a5bc9462e8b2", + "nmdc:0bc4d8b8ef11724c3d7e728b0e8e0ea5", + "nmdc:811910b7d8c300befddd039e833b0453", + "nmdc:9ed55d9535d1592866a66e9d5cd936a2", + "nmdc:a127efaa423e6dd6d24d7ab67cc2124a", + "nmdc:4b56646de8c37278beaaf9797e4ddf2f", + "nmdc:53a0873376e22fef62f2740f6afead21", + "nmdc:36748318682076112ba81283c8bc767a", + "nmdc:5dd32385b351847f23ec4eac63eb70ff", + "nmdc:95076052a4d5d57e1ed0c7699e4f5472", + "nmdc:6ae89cc4b2fb7d09614c106d3358be27" + ], + "was_informed_by": "gold:Gp0115670", + "id": "nmdc:f1cbe3cc181b2e689272b4223b68c15f", + "execution_resource": "NERSC-Cori", + "name": "Annotation Activity for nmdc:mga0d7pj22", + "started_at_time": "2021-10-11T02:28:43Z", + "type": "nmdc:MetagenomeAnnotationActivity", + "ended_at_time": "2021-10-11T05:55:52+00:00" + } + ], + "metagenome_assembly_set": [ + { + "_id": { + "$oid": "649b005f2ca5ee4adb139fc0" + }, + "has_input": [ + "nmdc:7f6b353300583c60d2d668880b4134cd" + ], + "part_of": [ + "nmdc:mga0d7pj22" + ], + "ctg_logsum": 272574, + "scaf_logsum": 274450, + "gap_pct": 0.00346, + "git_url": "https://github.com/microbiomedata/metaG/releases/tag/0.1", + "has_output": [ + "nmdc:975cdb0a18df949be4efb80d1dc4ef0b", + "nmdc:1dfaed4da055c5fd4226abe08bd91db9", + "nmdc:8a749340eefc40901a22a0ef603bc803", + "nmdc:ad027e4c3ca67907154c03feeebbd97b", + "nmdc:c4f2407273babd894282d4d0f20be5d1" + ], + "asm_score": 12.57, + "was_informed_by": "gold:Gp0115670", + "ctg_powsum": 33596, + "scaf_max": 211520, + "id": "nmdc:f1cbe3cc181b2e689272b4223b68c15f", + "scaf_powsum": 33865, + "execution_resource": "NERSC-Cori", + "contigs": 152605, + "name": "Assembly Activity for nmdc:mga0d7pj22", + "ctg_max": 211520, + "gc_std": 0.125, + "contig_bp": 79563543, + "gc_avg": 0.57036, + "started_at_time": "2021-10-11T02:28:43Z", + "scaf_bp": 79566293, + "type": "nmdc:MetagenomeAssembly", + "scaffolds": 152330, + "ended_at_time": "2021-10-11T05:55:52+00:00", + "ctg_l50": 492, + "ctg_l90": 290, + "ctg_n50": 35595, + "ctg_n90": 126332, + "scaf_l50": 493, + "scaf_l90": 290, + "scaf_n50": 35340, + "scaf_n90": 126070, + "scaf_l_gt50k": 1744421, + "scaf_n_gt50k": 21, + "scaf_pct_gt50k": 2.192412 + } + ], + "omics_processing_set": [ + { + "_id": { + "$oid": "649b009773e8249959349b4a" + }, + "id": "nmdc:omprc-11-t0xjjc50", + "name": "Sand microcosm microbial communities from a hyporheic zone in Columbia River, Washington, USA - GW-RW T4_10-June-14", + "description": "Sterilized sand packs were incubated back in the ground and collected at time point T4.", + "has_input": [ + "nmdc:bsm-11-vg9vy382" + ], + "has_output": [ + "jgi:55d7402a0d8785342fcf7e3b" + ], + "part_of": [ + "nmdc:sty-11-aygzgv51" + ], + "add_date": "2015-05-28", + "mod_date": "2021-06-15", + "ncbi_project_name": "Sand microcosm microbial communities from a hyporheic zone in Columbia River, Washington, USA - GW-RW T4_10-June-14", + "omics_type": { + "has_raw_value": "Metagenome" + }, + "principal_investigator": { + "has_raw_value": "James Stegen" + }, + "processing_institution": "JGI", + "type": "nmdc:OmicsProcessing", + "gold_sequencing_project_identifiers": [ + "gold:Gp0115670" + ] + } + ], + "read_qc_analysis_activity_set": [ + { + "_id": { + "$oid": "649b009d6bdd4fd20273c888" + }, + "has_input": [ + "nmdc:aa477a857eb9da284635b774477f3f54" + ], + "part_of": [ + "nmdc:mga0d7pj22" + ], + "git_url": "https://github.com/microbiomedata/metaG/releases/tag/0.1", + "has_output": [ + "nmdc:7f6b353300583c60d2d668880b4134cd", + "nmdc:a4f65d101293fa4345cd865f86597464" + ], + "was_informed_by": "gold:Gp0115670", + "input_read_count": 36554212, + "output_read_bases": 5044444014, + "id": "nmdc:f1cbe3cc181b2e689272b4223b68c15f", + "execution_resource": "NERSC-Cori", + "input_read_bases": 5519686012, + "name": "Read QC Activity for nmdc:mga0d7pj22", + "output_read_count": 33663942, + "started_at_time": "2021-10-11T02:28:43Z", + "type": "nmdc:ReadQCAnalysisActivity", + "ended_at_time": "2021-10-11T05:55:52+00:00" + } + ], + "read_based_taxonomy_analysis_activity_set": [ + { + "_id": { + "$oid": "649b009bff710ae353f8cf4d" + }, + "has_input": [ + "nmdc:7f6b353300583c60d2d668880b4134cd" + ], + "git_url": "https://github.com/microbiomedata/metaG/releases/tag/0.1", + "has_output": [ + "nmdc:e316502f9e7a78c9db3996ef832aa9d7", + "nmdc:1ac2be77491e7d425da1d62f69f1508d", + "nmdc:de5b15fa9d3bdbc3abcc2475ee351323", + "nmdc:a9bbb74833404a2bf3bbd05e83a7a0ed", + "nmdc:c065784bed2b2495d512af93d05967de", + "nmdc:a34dbcbdebae0861e41c09e7b9a5d9f0", + "nmdc:b2122f5a910a1d4ae8a62956d1cd731c", + "nmdc:8a26d8496a70f4777be0e1237092e44c", + "nmdc:694b83f0b6f599948d4248dd48dd9ba9" + ], + "was_informed_by": "gold:Gp0115670", + "id": "nmdc:f1cbe3cc181b2e689272b4223b68c15f", + "execution_resource": "NERSC-Cori", + "name": "ReadBased Analysis Activity for nmdc:mga0d7pj22", + "started_at_time": "2021-10-11T02:28:43Z", + "type": "nmdc:ReadbasedAnalysis", + "ended_at_time": "2021-10-11T05:55:52+00:00" + } + ] + }, + { + "data_object_set": [ + { + "description": "Raw sequencer read data", + "file_size_bytes": 2350177247, + "type": "nmdc:DataObject", + "id": "jgi:55d7402c0d8785342fcf7e3e", + "name": "9422.8.132674.GGTAGC.fastq.gz" + }, + { + "name": "Gp0115674_Filtered Reads", + "description": "Filtered Reads for Gp0115674", + "data_object_type": "Filtered Sequencing Reads", + "url": "https://data.microbiomedata.org/data/nmdc:mga0cf0450/qa/nmdc_mga0cf0450_filtered.fastq.gz", + "md5_checksum": "538fd5695eb3decd48891e72acebb8ce", + "id": "nmdc:538fd5695eb3decd48891e72acebb8ce", + "file_size_bytes": 2126353222 + }, + { + "name": "Gp0115674_Filtered Stats", + "description": "Filtered Stats for Gp0115674", + "data_object_type": "QC Statistics", + "url": "https://data.microbiomedata.org/data/nmdc:mga0cf0450/qa/nmdc_mga0cf0450_filterStats.txt", + "md5_checksum": "dde2b1748e16380e63476430ee27083a", + "id": "nmdc:dde2b1748e16380e63476430ee27083a", + "file_size_bytes": 288 + }, + { + "name": "Gp0115674_Gottcha2 TSV report", + "description": "Gottcha2 TSV report for Gp0115674", + "url": "https://data.microbiomedata.org/data/nmdc:mga0cf0450/ReadbasedAnalysis/nmdc_mga0cf0450_gottcha2_report.tsv", + "md5_checksum": "7d6ec08ff0d080997fda7c7417f9c3d4", + "id": "nmdc:7d6ec08ff0d080997fda7c7417f9c3d4", + "file_size_bytes": 13768 + }, + { + "name": "Gp0115674_Gottcha2 full TSV report", + "description": "Gottcha2 full TSV report for Gp0115674", + "url": "https://data.microbiomedata.org/data/nmdc:mga0cf0450/ReadbasedAnalysis/nmdc_mga0cf0450_gottcha2_report_full.tsv", + "md5_checksum": "df0dfd58dc386f5e0ded0b65b4a88c58", + "id": "nmdc:df0dfd58dc386f5e0ded0b65b4a88c58", + "file_size_bytes": 1022858 + }, + { + "name": "Gp0115674_Gottcha2 Krona HTML report", + "description": "Gottcha2 Krona HTML report for Gp0115674", + "data_object_type": "GOTTCHA2 Krona Plot", + "url": "https://data.microbiomedata.org/data/nmdc:mga0cf0450/ReadbasedAnalysis/nmdc_mga0cf0450_gottcha2_krona.html", + "md5_checksum": "ce3f31985e0a99f97bd4751bc2469bcb", + "id": "nmdc:ce3f31985e0a99f97bd4751bc2469bcb", + "file_size_bytes": 269166 + }, + { + "name": "Gp0115674_Centrifuge classification TSV report", + "description": "Centrifuge classification TSV report for Gp0115674", + "data_object_type": "Centrifuge Taxonomic Classification", + "url": "https://data.microbiomedata.org/data/nmdc:mga0cf0450/ReadbasedAnalysis/nmdc_mga0cf0450_centrifuge_classification.tsv", + "md5_checksum": "f8740b1fadbc29aef50d32706c955199", + "id": "nmdc:f8740b1fadbc29aef50d32706c955199", + "file_size_bytes": 1904303690 + }, + { + "name": "Gp0115674_Centrifuge TSV report", + "description": "Centrifuge TSV report for Gp0115674", + "data_object_type": "Centrifuge Classification Report", + "url": "https://data.microbiomedata.org/data/nmdc:mga0cf0450/ReadbasedAnalysis/nmdc_mga0cf0450_centrifuge_report.tsv", + "md5_checksum": "80abfcc9b09476af4083b2af1760834f", + "id": "nmdc:80abfcc9b09476af4083b2af1760834f", + "file_size_bytes": 258748 + }, + { + "name": "Gp0115674_Centrifuge Krona HTML report", + "description": "Centrifuge Krona HTML report for Gp0115674", + "data_object_type": "Centrifuge Krona Plot", + "url": "https://data.microbiomedata.org/data/nmdc:mga0cf0450/ReadbasedAnalysis/nmdc_mga0cf0450_centrifuge_krona.html", + "md5_checksum": "f189624af50d8d62908f8ddd5f3451ad", + "id": "nmdc:f189624af50d8d62908f8ddd5f3451ad", + "file_size_bytes": 2335000 + }, + { + "name": "Gp0115674_Kraken classification TSV report", + "description": "Kraken classification TSV report for Gp0115674", + "data_object_type": "Kraken2 Taxonomic Classification", + "url": "https://data.microbiomedata.org/data/nmdc:mga0cf0450/ReadbasedAnalysis/nmdc_mga0cf0450_kraken2_classification.tsv", + "md5_checksum": "09302fbc8e30758a95fac09ee5cfd449", + "id": "nmdc:09302fbc8e30758a95fac09ee5cfd449", + "file_size_bytes": 1574286150 + }, + { + "name": "Gp0115674_Kraken2 TSV report", + "description": "Kraken2 TSV report for Gp0115674", + "data_object_type": "Kraken2 Classification Report", + "url": "https://data.microbiomedata.org/data/nmdc:mga0cf0450/ReadbasedAnalysis/nmdc_mga0cf0450_kraken2_report.tsv", + "md5_checksum": "e44f717fc6f3458c17b4f5129a5e7920", + "id": "nmdc:e44f717fc6f3458c17b4f5129a5e7920", + "file_size_bytes": 671800 + }, + { + "name": "Gp0115674_Kraken2 Krona HTML report", + "description": "Kraken2 Krona HTML report for Gp0115674", + "data_object_type": "Kraken2 Krona Plot", + "url": "https://data.microbiomedata.org/data/nmdc:mga0cf0450/ReadbasedAnalysis/nmdc_mga0cf0450_kraken2_krona.html", + "md5_checksum": "19eb52a96c1dedc9036ec9a0aaeda079", + "id": "nmdc:19eb52a96c1dedc9036ec9a0aaeda079", + "file_size_bytes": 4070548 + }, + { + "name": "Gp0115674_Assembled contigs fasta", + "description": "Assembled contigs fasta for Gp0115674", + "data_object_type": "Assembly Contigs", + "url": "https://data.microbiomedata.org/data/nmdc:mga0cf0450/assembly/nmdc_mga0cf0450_contigs.fna", + "md5_checksum": "ed2e4b90c8c2947486cc5c3c5828f949", + "id": "nmdc:ed2e4b90c8c2947486cc5c3c5828f949", + "file_size_bytes": 78686505 + }, + { + "name": "Gp0115674_Assembled scaffold fasta", + "description": "Assembled scaffold fasta for Gp0115674", + "data_object_type": "Assembly Scaffolds", + "url": "https://data.microbiomedata.org/data/nmdc:mga0cf0450/assembly/nmdc_mga0cf0450_scaffolds.fna", + "md5_checksum": "e8fa9ae5e04a2969d220d81f1fb752f2", + "id": "nmdc:e8fa9ae5e04a2969d220d81f1fb752f2", + "file_size_bytes": 78267725 + }, + { + "name": "Gp0115674_Metagenome Contig Coverage Stats", + "description": "Metagenome Contig Coverage Stats for Gp0115674", + "url": "https://data.microbiomedata.org/data/nmdc:mga0cf0450/assembly/nmdc_mga0cf0450_covstats.txt", + "md5_checksum": "5f308ea3cb43a331cda55ac9f91c6a53", + "id": "nmdc:5f308ea3cb43a331cda55ac9f91c6a53", + "file_size_bytes": 10980044 + }, + { + "name": "Gp0115674_Assembled AGP file", + "description": "Assembled AGP file for Gp0115674", + "data_object_type": "Assembly AGP", + "url": "https://data.microbiomedata.org/data/nmdc:mga0cf0450/assembly/nmdc_mga0cf0450_assembly.agp", + "md5_checksum": "604ed99b7c622082ddf174bb11d2787f", + "id": "nmdc:604ed99b7c622082ddf174bb11d2787f", + "file_size_bytes": 10249514 + }, + { + "name": "Gp0115674_Metagenome Alignment BAM file", + "description": "Metagenome Alignment BAM file for Gp0115674", + "data_object_type": "Assembly Coverage BAM", + "url": "https://data.microbiomedata.org/data/nmdc:mga0cf0450/assembly/nmdc_mga0cf0450_pairedMapped_sorted.bam", + "md5_checksum": "a0263d8b11653306a05f598395ca603a", + "id": "nmdc:a0263d8b11653306a05f598395ca603a", + "file_size_bytes": 2304306876 + }, + { + "name": "Gp0115674_Protein FAA", + "description": "Protein FAA for Gp0115674", + "data_object_type": "Annotation Amino Acid FASTA", + "url": "https://data.microbiomedata.org/data/nmdc:mga0cf0450/annotation/nmdc_mga0cf0450_proteins.faa", + "md5_checksum": "9ae7cb8ba4bee2ce9a46c963d00ba6ba", + "id": "nmdc:9ae7cb8ba4bee2ce9a46c963d00ba6ba", + "file_size_bytes": 43650605 + }, + { + "name": "Gp0115674_Structural annotation GFF file", + "description": "Structural annotation GFF file for Gp0115674", + "data_object_type": "Structural Annotation GFF", + "url": "https://data.microbiomedata.org/data/nmdc:mga0cf0450/annotation/nmdc_mga0cf0450_structural_annotation.gff", + "md5_checksum": "ce90743969776fd717671aeb21d37379", + "id": "nmdc:ce90743969776fd717671aeb21d37379", + "file_size_bytes": 2529 + }, + { + "name": "Gp0115674_Functional annotation GFF file", + "description": "Functional annotation GFF file for Gp0115674", + "data_object_type": "Functional Annotation GFF", + "url": "https://data.microbiomedata.org/data/nmdc:mga0cf0450/annotation/nmdc_mga0cf0450_functional_annotation.gff", + "md5_checksum": "1a4f5145ccf0838811fe570a93549fdf", + "id": "nmdc:1a4f5145ccf0838811fe570a93549fdf", + "file_size_bytes": 47604509 + }, + { + "name": "Gp0115674_KO TSV file", + "description": "KO TSV file for Gp0115674", + "data_object_type": "Annotation KEGG Orthology", + "url": "https://data.microbiomedata.org/data/nmdc:mga0cf0450/annotation/nmdc_mga0cf0450_ko.tsv", + "md5_checksum": "662dae8ba0ea9dda93637c2ea60c1f4e", + "id": "nmdc:662dae8ba0ea9dda93637c2ea60c1f4e", + "file_size_bytes": 6436472 + }, + { + "name": "Gp0115674_EC TSV file", + "description": "EC TSV file for Gp0115674", + "data_object_type": "Annotation Enzyme Commission", + "url": "https://data.microbiomedata.org/data/nmdc:mga0cf0450/annotation/nmdc_mga0cf0450_ec.tsv", + "md5_checksum": "b5db445feb8edb47022c2a0ee86d828d", + "id": "nmdc:b5db445feb8edb47022c2a0ee86d828d", + "file_size_bytes": 4111562 + }, + { + "name": "Gp0115674_COG GFF file", + "description": "COG GFF file for Gp0115674", + "url": "https://data.microbiomedata.org/data/nmdc:mga0cf0450/annotation/nmdc_mga0cf0450_cog.gff", + "md5_checksum": "157d24f6f63091fbe9ef98cc3090975d", + "id": "nmdc:157d24f6f63091fbe9ef98cc3090975d", + "file_size_bytes": 27373015 + }, + { + "name": "Gp0115674_PFAM GFF file", + "description": "PFAM GFF file for Gp0115674", + "url": "https://data.microbiomedata.org/data/nmdc:mga0cf0450/annotation/nmdc_mga0cf0450_pfam.gff", + "md5_checksum": "afa217feffb94965aa1839041305237e", + "id": "nmdc:afa217feffb94965aa1839041305237e", + "file_size_bytes": 22153817 + }, + { + "name": "Gp0115674_TigrFam GFF file", + "description": "TigrFam GFF file for Gp0115674", + "url": "https://data.microbiomedata.org/data/nmdc:mga0cf0450/annotation/nmdc_mga0cf0450_tigrfam.gff", + "md5_checksum": "4a00e0c0bc479b8e6f1139c8de3149d5", + "id": "nmdc:4a00e0c0bc479b8e6f1139c8de3149d5", + "file_size_bytes": 2995281 + }, + { + "name": "Gp0115674_SMART GFF file", + "description": "SMART GFF file for Gp0115674", + "url": "https://data.microbiomedata.org/data/nmdc:mga0cf0450/annotation/nmdc_mga0cf0450_smart.gff", + "md5_checksum": "ffcd280a63fab7bcfa5422f34070d87f", + "id": "nmdc:ffcd280a63fab7bcfa5422f34070d87f", + "file_size_bytes": 6393135 + }, + { + "name": "Gp0115674_SuperFam GFF file", + "description": "SuperFam GFF file for Gp0115674", + "url": "https://data.microbiomedata.org/data/nmdc:mga0cf0450/annotation/nmdc_mga0cf0450_supfam.gff", + "md5_checksum": "9fb334fc9409e6db51aaa1f960b08f4b", + "id": "nmdc:9fb334fc9409e6db51aaa1f960b08f4b", + "file_size_bytes": 35023258 + }, + { + "name": "Gp0115674_Cath FunFam GFF file", + "description": "Cath FunFam GFF file for Gp0115674", + "url": "https://data.microbiomedata.org/data/nmdc:mga0cf0450/annotation/nmdc_mga0cf0450_cath_funfam.gff", + "md5_checksum": "d5676c01e67f71559a382850f42c3493", + "id": "nmdc:d5676c01e67f71559a382850f42c3493", + "file_size_bytes": 27788764 + }, + { + "name": "Gp0115674_KO_EC GFF file", + "description": "KO_EC GFF file for Gp0115674", + "url": "https://data.microbiomedata.org/data/nmdc:mga0cf0450/annotation/nmdc_mga0cf0450_ko_ec.gff", + "md5_checksum": "121fab4d5bff0dcbb9d1849738a72347", + "id": "nmdc:121fab4d5bff0dcbb9d1849738a72347", + "file_size_bytes": 20542466 + }, + { + "name": "gold:Gp0452679_metabat2 bin checkm quality assessment result", + "description": "metabat2 bin checkm quality assessment result for gold:Gp0452679", + "data_object_type": "CheckM Statistics", + "url": "https://data.microbiomedata.org/data/nmdc:mga0fsjy84/MAGs/nmdc_mga0fsjy84_checkm_qa.out", + "md5_checksum": "d41d8cd98f00b204e9800998ecf8427e", + "id": "nmdc:d41d8cd98f00b204e9800998ecf8427e", + "file_size_bytes": 0 + }, + { + "name": "Gp0115674_tooShort (< 3kb) filtered contigs fasta file by metaBat2", + "description": "tooShort (< 3kb) filtered contigs fasta file by metaBat2 for Gp0115674", + "url": "https://data.microbiomedata.org/data/nmdc:mga0cf0450/MAGs/nmdc_mga0cf0450_bins.tooShort.fa", + "md5_checksum": "6a03eb0156b154ea68ffff9b473e73a5", + "id": "nmdc:6a03eb0156b154ea68ffff9b473e73a5", + "file_size_bytes": 56345518 + }, + { + "name": "Gp0115674_unbinned fasta file from metabat2", + "description": "unbinned fasta file from metabat2 for Gp0115674", + "url": "https://data.microbiomedata.org/data/nmdc:mga0cf0450/MAGs/nmdc_mga0cf0450_bins.unbinned.fa", + "md5_checksum": "33a477987509b67fcfa5096d20c7c40b", + "id": "nmdc:33a477987509b67fcfa5096d20c7c40b", + "file_size_bytes": 10836032 + }, + { + "name": "Gp0115674_metabat2 bin checkm quality assessment result", + "description": "metabat2 bin checkm quality assessment result for Gp0115674", + "data_object_type": "CheckM Statistics", + "url": "https://data.microbiomedata.org/data/nmdc:mga0cf0450/MAGs/nmdc_mga0cf0450_checkm_qa.out", + "md5_checksum": "314c92c3a9458e1aa304e3c474209acf", + "id": "nmdc:314c92c3a9458e1aa304e3c474209acf", + "file_size_bytes": 1360 + }, + { + "name": "Gp0115674_high-quality and medium-quality bins", + "description": "high-quality and medium-quality bins for Gp0115674", + "data_object_type": "Metagenome Bins", + "url": "https://data.microbiomedata.org/data/nmdc:mga0cf0450/MAGs/nmdc_mga0cf0450_hqmq_bin.zip", + "md5_checksum": "a4f9093efaf84855cab58880b262afd5", + "id": "nmdc:a4f9093efaf84855cab58880b262afd5", + "file_size_bytes": 2974639 + }, + { + "name": "Gp0115674_metabat2 bins", + "description": "metabat2 bins for Gp0115674", + "url": "https://data.microbiomedata.org/data/nmdc:mga0cf0450/MAGs/nmdc_mga0cf0450_metabat_bin.zip", + "md5_checksum": "1a29af6f30c21f38b25e4553605f50ef", + "id": "nmdc:1a29af6f30c21f38b25e4553605f50ef", + "file_size_bytes": 469326 + }, + { + "_id": { + "$oid": "649b003c1ae706d7b5b14d94" + }, + "description": "Metagenome Contig Coverage Stats for gold:Gp0115674", + "url": "https://data.microbiomedata.org/data/1781_86104/assembly/mapping_stats.txt", + "file_size_bytes": 10283424, + "type": "nmdc:DataObject", + "id": "nmdc:dafe01b902d5308bc53a143024f4c0be", + "name": "mapping_stats.txt", + "data_object_type": "Assembly Coverage Stats" + }, + { + "_id": { + "$oid": "649b003c1ae706d7b5b14d95" + }, + "description": "Assembled scaffold fasta for gold:Gp0115674", + "url": "https://data.microbiomedata.org/data/1781_86104/assembly/assembly_scaffolds.fna", + "file_size_bytes": 77571545, + "type": "nmdc:DataObject", + "id": "nmdc:52e5a91a8c71575c66793012fbdc1d38", + "name": "assembly_scaffolds.fna", + "data_object_type": "Assembly Scaffolds" + }, + { + "_id": { + "$oid": "649b003c1ae706d7b5b14d97" + }, + "description": "Assembled contigs fasta for gold:Gp0115674", + "url": "https://data.microbiomedata.org/data/1781_86104/assembly/assembly_contigs.fna", + "file_size_bytes": 77989885, + "type": "nmdc:DataObject", + "id": "nmdc:1689f2f2e14c55ab5d2af78ad3eb99bd", + "name": "assembly_contigs.fna", + "data_object_type": "Assembly Contigs" + }, + { + "_id": { + "$oid": "649b003c1ae706d7b5b14d98" + }, + "description": "Assembled AGP file for gold:Gp0115674", + "url": "https://data.microbiomedata.org/data/1781_86104/assembly/assembly.agp", + "file_size_bytes": 8855354, + "type": "nmdc:DataObject", + "id": "nmdc:e3b48b89ae6f02705022bf443f649bc2", + "name": "assembly.agp", + "data_object_type": "Assembly AGP" + }, + { + "_id": { + "$oid": "649b003c1ae706d7b5b14d99" + }, + "description": "Metagenome Alignment BAM file for gold:Gp0115674", + "url": "https://data.microbiomedata.org/data/1781_86104/assembly/pairedMapped_sorted.bam", + "file_size_bytes": 2269403358, + "type": "nmdc:DataObject", + "id": "nmdc:0e448fc98b179d70a76f38beb90171cf", + "name": "pairedMapped_sorted.bam", + "data_object_type": "Assembly Coverage BAM" + }, + { + "_id": { + "$oid": "649b003d1ae706d7b5b159de" + }, + "id": "nmdc:d9d2c48e8e6cc1e9111eba4cd5aa44ce", + "name": "1781_86104.krona.html", + "description": "Gold:Gp0115674 KRONA plot HTML file", + "url": "https://data.microbiomedata.org/1781_86104/ReadbasedAnalysis/centrifuge/1781_86104.krona.html", + "file_size_bytes": 3385, + "data_object_type": "Centrifuge Krona Plot", + "type": "nmdc:DataObject" + }, + { + "_id": { + "$oid": "649b003d1ae706d7b5b159e3" + }, + "id": "nmdc:ede7eba8751ecdb4bde5cdbded5598a8", + "name": "1781_86104.json", + "description": "Gold:Gp0115674 ReadbasedAnalysis result JSON file", + "url": "https://data.microbiomedata.org/1781_86104/ReadbasedAnalysis/1781_86104.json", + "file_size_bytes": 3385, + "type": "nmdc:DataObject" + }, + { + "_id": { + "$oid": "649b003f1ae706d7b5b162b0" + }, + "id": "nmdc:4164b9671b26c93cf3580eff524af8a6", + "name": "bins.tooShort.fa", + "description": "tooShort (< 3kb) filtered contigs fasta file by metaBat2 for gold:Gp0115674", + "file_size_bytes": 54726629, + "url": "https://data.microbiomedata.org/data/1781_86104/img_MAGs/bins.tooShort.fa", + "type": "nmdc:DataObject" + }, + { + "_id": { + "$oid": "649b003f1ae706d7b5b162b1" + }, + "id": "nmdc:ecb95407253379e53508c4a5d200ae4e", + "name": "bins.unbinned.fa", + "description": "unbinned fasta file from metabat2 for gold:Gp0115674", + "file_size_bytes": 12697686, + "url": "https://data.microbiomedata.org/data/1781_86104/img_MAGs/bins.unbinned.fa", + "type": "nmdc:DataObject" + }, + { + "_id": { + "$oid": "649b003f1ae706d7b5b162b2" + }, + "id": "nmdc:44451dde40f3facbdb6357985448cb9f", + "name": "gold:Gp0115674.bins.9.fa", + "description": "metabat2 binned contig file for gold:Gp0115674", + "file_size_bytes": 2583019, + "url": "https://data.microbiomedata.org/data/1781_86104/img_MAGs/metabat-bins/bins.9.fa", + "type": "nmdc:DataObject", + "data_object_type": "Metagenome Bins" + }, + { + "_id": { + "$oid": "649b003f1ae706d7b5b162b3" + }, + "id": "nmdc:36a4a672be5f95492ec7b48b501bc666", + "name": "checkm_qa.out", + "description": "metabat2 bin checkm quality assessment result for gold:Gp0115674", + "file_size_bytes": 2550, + "url": "https://data.microbiomedata.org/data/1781_86104/img_MAGs/checkm_qa.out", + "type": "nmdc:DataObject", + "data_object_type": "CheckM Statistics" + }, + { + "_id": { + "$oid": "649b003f1ae706d7b5b162b4" + }, + "id": "nmdc:ffcb32d9c9e558bdbd8827712761d752", + "name": "gold:Gp0115674.bins.5.fa", + "description": "metabat2 binned contig file for gold:Gp0115674", + "file_size_bytes": 897656, + "url": "https://data.microbiomedata.org/data/1781_86104/img_MAGs/metabat-bins/bins.5.fa", + "type": "nmdc:DataObject", + "data_object_type": "Metagenome Bins" + }, + { + "_id": { + "$oid": "649b003f1ae706d7b5b162b5" + }, + "id": "nmdc:7c49f6fc2918adadd95fac344eb321f9", + "name": "gold:Gp0115674.bins.11.fa", + "description": "metabat2 binned contig file for gold:Gp0115674", + "file_size_bytes": 891103, + "url": "https://data.microbiomedata.org/data/1781_86104/img_MAGs/metabat-bins/bins.11.fa", + "type": "nmdc:DataObject", + "data_object_type": "Metagenome Bins" + }, + { + "_id": { + "$oid": "649b003f1ae706d7b5b162b6" + }, + "id": "nmdc:e8abf1d316f52b3d1234bbaa8cf33c82", + "name": "gold:Gp0115674.bins.3.fa", + "description": "metabat2 binned contig file for gold:Gp0115674", + "file_size_bytes": 1084295, + "url": "https://data.microbiomedata.org/data/1781_86104/img_MAGs/metabat-bins/bins.3.fa", + "type": "nmdc:DataObject", + "data_object_type": "Metagenome Bins" + }, + { + "_id": { + "$oid": "649b003f1ae706d7b5b162b7" + }, + "id": "nmdc:612b7364e691619f08e480035620ddb3", + "name": "gold:Gp0115674.bins.4.fa", + "description": "metabat2 binned contig file for gold:Gp0115674", + "file_size_bytes": 221915, + "url": "https://data.microbiomedata.org/data/1781_86104/img_MAGs/metabat-bins/bins.4.fa", + "type": "nmdc:DataObject", + "data_object_type": "Metagenome Bins" + }, + { + "_id": { + "$oid": "649b003f1ae706d7b5b162b8" + }, + "id": "nmdc:56823e8ec01d84f84c1f44ba0020cefd", + "name": "gold:Gp0115674.bins.2.fa", + "description": "metabat2 binned contig file for gold:Gp0115674", + "file_size_bytes": 1378648, + "url": "https://data.microbiomedata.org/data/1781_86104/img_MAGs/metabat-bins/bins.2.fa", + "type": "nmdc:DataObject", + "data_object_type": "Metagenome Bins" + }, + { + "_id": { + "$oid": "649b003f1ae706d7b5b162b9" + }, + "id": "nmdc:f8f2d349759dfc70e53916dd7e07d796", + "name": "gtdbtk.bac120.summary.tsv", + "description": "gtdbtk bacterial assignment result summary table for gold:Gp0115674", + "file_size_bytes": 4807, + "url": "https://data.microbiomedata.org/data/1781_86104/img_MAGs/gtdbtk_output/classify/gtdbtk.bac120.summary.tsv", + "type": "nmdc:DataObject" + }, + { + "_id": { + "$oid": "649b003f1ae706d7b5b162ba" + }, + "id": "nmdc:8e481f201f444f86ce93109f8f25c356", + "name": "gold:Gp0115674.bins.7.fa", + "description": "metabat2 binned contig file for gold:Gp0115674", + "file_size_bytes": 571743, + "url": "https://data.microbiomedata.org/data/1781_86104/img_MAGs/metabat-bins/bins.7.fa", + "type": "nmdc:DataObject", + "data_object_type": "Metagenome Bins" + }, + { + "_id": { + "$oid": "649b003f1ae706d7b5b162bb" + }, + "id": "nmdc:3d38153edc983c24deb7609306105632", + "name": "gold:Gp0115674.bins.10.fa", + "description": "metabat2 binned contig file for gold:Gp0115674", + "file_size_bytes": 637469, + "url": "https://data.microbiomedata.org/data/1781_86104/img_MAGs/metabat-bins/bins.10.fa", + "type": "nmdc:DataObject", + "data_object_type": "Metagenome Bins" + }, + { + "_id": { + "$oid": "649b003f1ae706d7b5b162bc" + }, + "id": "nmdc:2e8fbb22de466ca13ed4441fee49faab", + "name": "gold:Gp0115674.bins.1.fa", + "description": "metabat2 binned contig file for gold:Gp0115674", + "file_size_bytes": 589376, + "url": "https://data.microbiomedata.org/data/1781_86104/img_MAGs/metabat-bins/bins.1.fa", + "type": "nmdc:DataObject", + "data_object_type": "Metagenome Bins" + }, + { + "_id": { + "$oid": "649b003f1ae706d7b5b162be" + }, + "id": "nmdc:448e30e675ef810514b1e091992df2fc", + "name": "gold:Gp0115674.bins.8.fa", + "description": "metabat2 binned contig file for gold:Gp0115674", + "file_size_bytes": 318161, + "url": "https://data.microbiomedata.org/data/1781_86104/img_MAGs/metabat-bins/bins.8.fa", + "type": "nmdc:DataObject", + "data_object_type": "Metagenome Bins" + }, + { + "_id": { + "$oid": "649b003f1ae706d7b5b162bf" + }, + "id": "nmdc:f7ee01c219b7141044ca7338877ddf5e", + "name": "gold:Gp0115674.bins.6.fa", + "description": "metabat2 binned contig file for gold:Gp0115674", + "file_size_bytes": 412815, + "url": "https://data.microbiomedata.org/data/1781_86104/img_MAGs/metabat-bins/bins.6.fa", + "type": "nmdc:DataObject", + "data_object_type": "Metagenome Bins" + }, + { + "_id": { + "$oid": "649b00401ae706d7b5b16da3" + }, + "description": "EC TSV File for gold:Gp0115674", + "url": "https://data.microbiomedata.org/1781_86104/img_annotation/Ga0482253_ec.tsv", + "md5_checksum": "72ede7603b72206d929c03364769021c", + "file_size_bytes": 3385, + "id": "nmdc:72ede7603b72206d929c03364769021c", + "name": "gold:Gp0115674_EC TSV File", + "data_object_type": "Annotation Enzyme Commission", + "type": "nmdc:DataObject" + }, + { + "_id": { + "$oid": "649b00401ae706d7b5b16da4" + }, + "description": "KO TSV File for gold:Gp0115674", + "url": "https://data.microbiomedata.org/1781_86104/img_annotation/Ga0482253_ko.tsv", + "md5_checksum": "9c248ab2a22c7b49060e544f37b9c798", + "file_size_bytes": 3385, + "id": "nmdc:9c248ab2a22c7b49060e544f37b9c798", + "name": "gold:Gp0115674_KO TSV File", + "data_object_type": "Annotation KEGG Orthology", + "type": "nmdc:DataObject" + }, + { + "_id": { + "$oid": "649b00401ae706d7b5b16da5" + }, + "description": "Functional annotation GFF file for gold:Gp0115674", + "url": "https://data.microbiomedata.org/1781_86104/img_annotation/Ga0482253_functional_annotation.gff", + "md5_checksum": "876382e7107a83b87a059e4e961bff75", + "file_size_bytes": 3385, + "id": "nmdc:876382e7107a83b87a059e4e961bff75", + "name": "gold:Gp0115674_Functional annotation GFF file", + "data_object_type": "Functional Annotation GFF", + "type": "nmdc:DataObject" + }, + { + "_id": { + "$oid": "649b00401ae706d7b5b16dac" + }, + "description": "Structural annotation GFF file for gold:Gp0115674", + "url": "https://data.microbiomedata.org/1781_86104/img_annotation/Ga0482253_structural_annotation.gff", + "md5_checksum": "17f2fbdeb3f5891c37f2e9e43a40c7b1", + "file_size_bytes": 3385, + "id": "nmdc:17f2fbdeb3f5891c37f2e9e43a40c7b1", + "name": "gold:Gp0115674_Structural annotation GFF file", + "data_object_type": "Structural Annotation GFF", + "type": "nmdc:DataObject" + }, + { + "_id": { + "$oid": "649b00401ae706d7b5b16db0" + }, + "description": "Protein FAA for gold:Gp0115674", + "url": "https://data.microbiomedata.org/1781_86104/img_annotation/Ga0482253_proteins.faa", + "md5_checksum": "c70d6973abeb3ee231d3e38c3c5dced4", + "file_size_bytes": 3385, + "id": "nmdc:c70d6973abeb3ee231d3e38c3c5dced4", + "name": "gold:Gp0115674_Protein FAA", + "data_object_type": "Annotation Amino Acid FASTA", + "type": "nmdc:DataObject" + } + ], + "mags_activity_set": [ + { + "_id": { + "$oid": "649b0052ec087f6bbab34732" + }, + "has_input": [ + "nmdc:ed2e4b90c8c2947486cc5c3c5828f949", + "nmdc:a0263d8b11653306a05f598395ca603a", + "nmdc:1a4f5145ccf0838811fe570a93549fdf" + ], + "too_short_contig_num": 131855, + "part_of": [ + "nmdc:mga0cf0450" + ], + "binned_contig_num": 1119, + "git_url": "https://github.com/microbiomedata/metaG/releases/tag/0.1", + "has_output": [ + "nmdc:d41d8cd98f00b204e9800998ecf8427e", + "nmdc:6a03eb0156b154ea68ffff9b473e73a5", + "nmdc:33a477987509b67fcfa5096d20c7c40b", + "nmdc:314c92c3a9458e1aa304e3c474209acf", + "nmdc:a4f9093efaf84855cab58880b262afd5", + "nmdc:1a29af6f30c21f38b25e4553605f50ef" + ], + "was_informed_by": "gold:Gp0115674", + "input_contig_num": 139324, + "id": "nmdc:954288cfef2de46d4b895fac3811a7d0", + "execution_resource": "NERSC-Cori", + "name": "MAGs Analysis Activity for nmdc:mga0cf0450", + "mags_list": [ + { + "number_of_contig": 198, + "completeness": 100.0, + "bin_name": "bins.1", + "gene_count": 5608, + "bin_quality": "HQ", + "gtdbtk_species": "", + "gtdbtk_order": "Burkholderiales", + "num_16s": 1, + "gtdbtk_family": "Burkholderiaceae", + "gtdbtk_domain": "Bacteria", + "contamination": 1.29, + "gtdbtk_class": "Gammaproteobacteria", + "gtdbtk_phylum": "Proteobacteria", + "num_5s": 1, + "num_23s": 1, + "gtdbtk_genus": "Rhizobacter", + "num_t_rna": 46 + }, + { + "number_of_contig": 353, + "completeness": 88.62, + "bin_name": "bins.2", + "gene_count": 3146, + "bin_quality": "MQ", + "gtdbtk_species": "", + "gtdbtk_order": "Sphingomonadales", + "num_16s": 0, + "gtdbtk_family": "Sphingomonadaceae", + "gtdbtk_domain": "Bacteria", + "contamination": 2.0, + "gtdbtk_class": "Alphaproteobacteria", + "gtdbtk_phylum": "Proteobacteria", + "num_5s": 0, + "num_23s": 0, + "gtdbtk_genus": "Novosphingobium", + "num_t_rna": 40 + }, + { + "number_of_contig": 273, + "completeness": 51.61, + "bin_name": "bins.3", + "gene_count": 1397, + "bin_quality": "MQ", + "gtdbtk_species": "", + "gtdbtk_order": "Pseudomonadales", + "num_16s": 0, + "gtdbtk_family": "UBA3067", + "gtdbtk_domain": "Bacteria", + "contamination": 0.8, + "gtdbtk_class": "Gammaproteobacteria", + "gtdbtk_phylum": "Proteobacteria", + "num_5s": 0, + "num_23s": 0, + "gtdbtk_genus": "UBA3067", + "num_t_rna": 17 + }, + { + "number_of_contig": 295, + "completeness": 49.14, + "bin_name": "bins.4", + "gene_count": 1695, + "bin_quality": "LQ", + "gtdbtk_species": "", + "gtdbtk_order": "", + "num_16s": 0, + "gtdbtk_family": "", + "gtdbtk_domain": "", + "contamination": 0.0, + "gtdbtk_class": "", + "gtdbtk_phylum": "", + "num_5s": 0, + "num_23s": 0, + "gtdbtk_genus": "", + "num_t_rna": 16 + } + ], + "unbinned_contig_num": 6350, + "started_at_time": "2021-10-11T02:28:52Z", + "type": "nmdc:MAGsAnalysisActivity", + "ended_at_time": "2021-10-11T05:21:41+00:00" + } + ], + "metagenome_annotation_activity_set": [ + { + "_id": { + "$oid": "649b005bbf2caae0415ef9d4" + }, + "has_input": [ + "nmdc:ed2e4b90c8c2947486cc5c3c5828f949" + ], + "part_of": [ + "nmdc:mga0cf0450" + ], + "git_url": "https://github.com/microbiomedata/metaG/releases/tag/0.1", + "has_output": [ + "nmdc:9ae7cb8ba4bee2ce9a46c963d00ba6ba", + "nmdc:ce90743969776fd717671aeb21d37379", + "nmdc:1a4f5145ccf0838811fe570a93549fdf", + "nmdc:662dae8ba0ea9dda93637c2ea60c1f4e", + "nmdc:b5db445feb8edb47022c2a0ee86d828d", + "nmdc:157d24f6f63091fbe9ef98cc3090975d", + "nmdc:afa217feffb94965aa1839041305237e", + "nmdc:4a00e0c0bc479b8e6f1139c8de3149d5", + "nmdc:ffcd280a63fab7bcfa5422f34070d87f", + "nmdc:9fb334fc9409e6db51aaa1f960b08f4b", + "nmdc:d5676c01e67f71559a382850f42c3493", + "nmdc:121fab4d5bff0dcbb9d1849738a72347" + ], + "was_informed_by": "gold:Gp0115674", + "id": "nmdc:954288cfef2de46d4b895fac3811a7d0", + "execution_resource": "NERSC-Cori", + "name": "Annotation Activity for nmdc:mga0cf0450", + "started_at_time": "2021-10-11T02:28:52Z", + "type": "nmdc:MetagenomeAnnotationActivity", + "ended_at_time": "2021-10-11T05:21:41+00:00" + } + ], + "metagenome_assembly_set": [ + { + "_id": { + "$oid": "649b005f2ca5ee4adb139fb6" + }, + "has_input": [ + "nmdc:538fd5695eb3decd48891e72acebb8ce" + ], + "part_of": [ + "nmdc:mga0cf0450" + ], + "ctg_logsum": 272042, + "scaf_logsum": 272657, + "gap_pct": 0.00172, + "git_url": "https://github.com/microbiomedata/metaG/releases/tag/0.1", + "has_output": [ + "nmdc:ed2e4b90c8c2947486cc5c3c5828f949", + "nmdc:e8fa9ae5e04a2969d220d81f1fb752f2", + "nmdc:5f308ea3cb43a331cda55ac9f91c6a53", + "nmdc:604ed99b7c622082ddf174bb11d2787f", + "nmdc:a0263d8b11653306a05f598395ca603a" + ], + "asm_score": 18.19, + "was_informed_by": "gold:Gp0115674", + "ctg_powsum": 36133, + "scaf_max": 176505, + "id": "nmdc:954288cfef2de46d4b895fac3811a7d0", + "scaf_powsum": 36239, + "execution_resource": "NERSC-Cori", + "contigs": 139326, + "name": "Assembly Activity for nmdc:mga0cf0450", + "ctg_max": 176505, + "gc_std": 0.12397, + "contig_bp": 73195425, + "gc_avg": 0.56886, + "started_at_time": "2021-10-11T02:28:52Z", + "scaf_bp": 73196685, + "type": "nmdc:MetagenomeAssembly", + "scaffolds": 139236, + "ended_at_time": "2021-10-11T05:21:41+00:00", + "ctg_l50": 481, + "ctg_l90": 290, + "ctg_n50": 30768, + "ctg_n90": 115008, + "scaf_l50": 482, + "scaf_l90": 290, + "scaf_n50": 30582, + "scaf_n90": 114932, + "scaf_l_gt50k": 2506146, + "scaf_n_gt50k": 32, + "scaf_pct_gt50k": 3.4238515 + } + ], + "omics_processing_set": [ + { + "_id": { + "$oid": "649b009773e8249959349b4b" + }, + "id": "nmdc:omprc-11-1avd3d16", + "name": "Sand microcosm microbial communities from a hyporheic zone in Columbia River, Washington, USA - GW-RW T4_21-May-14", + "description": "Sterilized sand packs were incubated back in the ground and collected at time point T4.", + "has_input": [ + "nmdc:bsm-11-5xjtzc47" + ], + "has_output": [ + "jgi:55d7402c0d8785342fcf7e3e" + ], + "part_of": [ + "nmdc:sty-11-aygzgv51" + ], + "add_date": "2015-05-28", + "mod_date": "2021-06-15", + "ncbi_project_name": "Sand microcosm microbial communities from a hyporheic zone in Columbia River, Washington, USA - GW-RW T4_21-May-14", + "omics_type": { + "has_raw_value": "Metagenome" + }, + "principal_investigator": { + "has_raw_value": "James Stegen" + }, + "processing_institution": "JGI", + "type": "nmdc:OmicsProcessing", + "gold_sequencing_project_identifiers": [ + "gold:Gp0115674" + ] + } + ], + "read_qc_analysis_activity_set": [ + { + "_id": { + "$oid": "649b009d6bdd4fd20273c88a" + }, + "has_input": [ + "nmdc:d94c174a22116c2db7ab8c47619e30aa" + ], + "part_of": [ + "nmdc:mga0cf0450" + ], + "git_url": "https://github.com/microbiomedata/metaG/releases/tag/0.1", + "has_output": [ + "nmdc:538fd5695eb3decd48891e72acebb8ce", + "nmdc:dde2b1748e16380e63476430ee27083a" + ], + "was_informed_by": "gold:Gp0115674", + "input_read_count": 26546332, + "output_read_bases": 3862169938, + "id": "nmdc:954288cfef2de46d4b895fac3811a7d0", + "execution_resource": "NERSC-Cori", + "input_read_bases": 4008496132, + "name": "Read QC Activity for nmdc:mga0cf0450", + "output_read_count": 25776010, + "started_at_time": "2021-10-11T02:28:52Z", + "type": "nmdc:ReadQCAnalysisActivity", + "ended_at_time": "2021-10-11T05:21:41+00:00" + } + ], + "read_based_taxonomy_analysis_activity_set": [ + { + "_id": { + "$oid": "649b009bff710ae353f8cf50" + }, + "has_input": [ + "nmdc:538fd5695eb3decd48891e72acebb8ce" + ], + "git_url": "https://github.com/microbiomedata/metaG/releases/tag/0.1", + "has_output": [ + "nmdc:7d6ec08ff0d080997fda7c7417f9c3d4", + "nmdc:df0dfd58dc386f5e0ded0b65b4a88c58", + "nmdc:ce3f31985e0a99f97bd4751bc2469bcb", + "nmdc:f8740b1fadbc29aef50d32706c955199", + "nmdc:80abfcc9b09476af4083b2af1760834f", + "nmdc:f189624af50d8d62908f8ddd5f3451ad", + "nmdc:09302fbc8e30758a95fac09ee5cfd449", + "nmdc:e44f717fc6f3458c17b4f5129a5e7920", + "nmdc:19eb52a96c1dedc9036ec9a0aaeda079" + ], + "was_informed_by": "gold:Gp0115674", + "id": "nmdc:954288cfef2de46d4b895fac3811a7d0", + "execution_resource": "NERSC-Cori", + "name": "ReadBased Analysis Activity for nmdc:mga0cf0450", + "started_at_time": "2021-10-11T02:28:52Z", + "type": "nmdc:ReadbasedAnalysis", + "ended_at_time": "2021-10-11T05:21:41+00:00" + } + ] + }, + { + "data_object_set": [ + { + "description": "Raw sequencer read data", + "file_size_bytes": 1698585233, + "type": "nmdc:DataObject", + "id": "jgi:55d817f70d8785342fcf8270", + "name": "9387.2.132031.CTTGTA.fastq.gz" + }, + { + "name": "Gp0115673_Filtered Reads", + "description": "Filtered Reads for Gp0115673", + "data_object_type": "Filtered Sequencing Reads", + "url": "https://data.microbiomedata.org/data/nmdc:mga0kpja70/qa/nmdc_mga0kpja70_filtered.fastq.gz", + "md5_checksum": "268918f610926421d2af43f175553680", + "id": "nmdc:268918f610926421d2af43f175553680", + "file_size_bytes": 1492820163 + }, + { + "name": "Gp0115673_Filtered Stats", + "description": "Filtered Stats for Gp0115673", + "data_object_type": "QC Statistics", + "url": "https://data.microbiomedata.org/data/nmdc:mga0kpja70/qa/nmdc_mga0kpja70_filterStats.txt", + "md5_checksum": "4610980cf3558f5a9830797ead97362a", + "id": "nmdc:4610980cf3558f5a9830797ead97362a", + "file_size_bytes": 287 + }, + { + "name": "Gp0115673_Gottcha2 TSV report", + "description": "Gottcha2 TSV report for Gp0115673", + "url": "https://data.microbiomedata.org/data/nmdc:mga0kpja70/ReadbasedAnalysis/nmdc_mga0kpja70_gottcha2_report.tsv", + "md5_checksum": "c7b24571b61a33018cf118b5424b787f", + "id": "nmdc:c7b24571b61a33018cf118b5424b787f", + "file_size_bytes": 9782 + }, + { + "name": "Gp0115673_Gottcha2 full TSV report", + "description": "Gottcha2 full TSV report for Gp0115673", + "url": "https://data.microbiomedata.org/data/nmdc:mga0kpja70/ReadbasedAnalysis/nmdc_mga0kpja70_gottcha2_report_full.tsv", + "md5_checksum": "e185734176505343bf4c83c16a0a9fe2", + "id": "nmdc:e185734176505343bf4c83c16a0a9fe2", + "file_size_bytes": 856112 + }, + { + "name": "Gp0115673_Gottcha2 Krona HTML report", + "description": "Gottcha2 Krona HTML report for Gp0115673", + "data_object_type": "GOTTCHA2 Krona Plot", + "url": "https://data.microbiomedata.org/data/nmdc:mga0kpja70/ReadbasedAnalysis/nmdc_mga0kpja70_gottcha2_krona.html", + "md5_checksum": "7c6b0ef44450c747580826a2e218844b", + "id": "nmdc:7c6b0ef44450c747580826a2e218844b", + "file_size_bytes": 255142 + }, + { + "name": "Gp0115673_Centrifuge classification TSV report", + "description": "Centrifuge classification TSV report for Gp0115673", + "data_object_type": "Centrifuge Taxonomic Classification", + "url": "https://data.microbiomedata.org/data/nmdc:mga0kpja70/ReadbasedAnalysis/nmdc_mga0kpja70_centrifuge_classification.tsv", + "md5_checksum": "5b98c377f424d7609f1a09e350cfb837", + "id": "nmdc:5b98c377f424d7609f1a09e350cfb837", + "file_size_bytes": 1218364738 + }, + { + "name": "Gp0115673_Centrifuge TSV report", + "description": "Centrifuge TSV report for Gp0115673", + "data_object_type": "Centrifuge Classification Report", + "url": "https://data.microbiomedata.org/data/nmdc:mga0kpja70/ReadbasedAnalysis/nmdc_mga0kpja70_centrifuge_report.tsv", + "md5_checksum": "b5f7a68a94b356001014d1be024231af", + "id": "nmdc:b5f7a68a94b356001014d1be024231af", + "file_size_bytes": 254923 + }, + { + "name": "Gp0115673_Centrifuge Krona HTML report", + "description": "Centrifuge Krona HTML report for Gp0115673", + "data_object_type": "Centrifuge Krona Plot", + "url": "https://data.microbiomedata.org/data/nmdc:mga0kpja70/ReadbasedAnalysis/nmdc_mga0kpja70_centrifuge_krona.html", + "md5_checksum": "75bca66cfcdd38331c10edbba03fa0d3", + "id": "nmdc:75bca66cfcdd38331c10edbba03fa0d3", + "file_size_bytes": 2323219 + }, + { + "name": "Gp0115673_Kraken classification TSV report", + "description": "Kraken classification TSV report for Gp0115673", + "data_object_type": "Kraken2 Taxonomic Classification", + "url": "https://data.microbiomedata.org/data/nmdc:mga0kpja70/ReadbasedAnalysis/nmdc_mga0kpja70_kraken2_classification.tsv", + "md5_checksum": "35bf579641b2ffb3614098d9811a4968", + "id": "nmdc:35bf579641b2ffb3614098d9811a4968", + "file_size_bytes": 1001134031 + }, + { + "name": "Gp0115673_Kraken2 TSV report", + "description": "Kraken2 TSV report for Gp0115673", + "data_object_type": "Kraken2 Classification Report", + "url": "https://data.microbiomedata.org/data/nmdc:mga0kpja70/ReadbasedAnalysis/nmdc_mga0kpja70_kraken2_report.tsv", + "md5_checksum": "801b79f5442e5bfaa0d15f76786cfbc0", + "id": "nmdc:801b79f5442e5bfaa0d15f76786cfbc0", + "file_size_bytes": 640671 + }, + { + "name": "Gp0115673_Kraken2 Krona HTML report", + "description": "Kraken2 Krona HTML report for Gp0115673", + "data_object_type": "Kraken2 Krona Plot", + "url": "https://data.microbiomedata.org/data/nmdc:mga0kpja70/ReadbasedAnalysis/nmdc_mga0kpja70_kraken2_krona.html", + "md5_checksum": "a7030fa8e9622e3396c2b96448e90c3b", + "id": "nmdc:a7030fa8e9622e3396c2b96448e90c3b", + "file_size_bytes": 3995499 + }, + { + "name": "Gp0115673_Assembled contigs fasta", + "description": "Assembled contigs fasta for Gp0115673", + "data_object_type": "Assembly Contigs", + "url": "https://data.microbiomedata.org/data/nmdc:mga0kpja70/assembly/nmdc_mga0kpja70_contigs.fna", + "md5_checksum": "06d4964c0822abd6f94ca883c122f7ce", + "id": "nmdc:06d4964c0822abd6f94ca883c122f7ce", + "file_size_bytes": 49610158 + }, + { + "name": "Gp0115673_Assembled scaffold fasta", + "description": "Assembled scaffold fasta for Gp0115673", + "data_object_type": "Assembly Scaffolds", + "url": "https://data.microbiomedata.org/data/nmdc:mga0kpja70/assembly/nmdc_mga0kpja70_scaffolds.fna", + "md5_checksum": "bad916c69afe839097650b0b9526a841", + "id": "nmdc:bad916c69afe839097650b0b9526a841", + "file_size_bytes": 49338957 + }, + { + "name": "Gp0115673_Metagenome Contig Coverage Stats", + "description": "Metagenome Contig Coverage Stats for Gp0115673", + "url": "https://data.microbiomedata.org/data/nmdc:mga0kpja70/assembly/nmdc_mga0kpja70_covstats.txt", + "md5_checksum": "a187658f262fa495de43707aabcbf480", + "id": "nmdc:a187658f262fa495de43707aabcbf480", + "file_size_bytes": 7048516 + }, + { + "name": "Gp0115673_Assembled AGP file", + "description": "Assembled AGP file for Gp0115673", + "data_object_type": "Assembly AGP", + "url": "https://data.microbiomedata.org/data/nmdc:mga0kpja70/assembly/nmdc_mga0kpja70_assembly.agp", + "md5_checksum": "c525c04f90889be615025c667908370c", + "id": "nmdc:c525c04f90889be615025c667908370c", + "file_size_bytes": 6557406 + }, + { + "name": "Gp0115673_Metagenome Alignment BAM file", + "description": "Metagenome Alignment BAM file for Gp0115673", + "data_object_type": "Assembly Coverage BAM", + "url": "https://data.microbiomedata.org/data/nmdc:mga0kpja70/assembly/nmdc_mga0kpja70_pairedMapped_sorted.bam", + "md5_checksum": "2e293158750df042be7422826125bef2", + "id": "nmdc:2e293158750df042be7422826125bef2", + "file_size_bytes": 1601507411 + }, + { + "name": "Gp0115673_Protein FAA", + "description": "Protein FAA for Gp0115673", + "data_object_type": "Annotation Amino Acid FASTA", + "url": "https://data.microbiomedata.org/data/nmdc:mga0kpja70/annotation/nmdc_mga0kpja70_proteins.faa", + "md5_checksum": "be3b8decbc48f9588daca36ca4c883ab", + "id": "nmdc:be3b8decbc48f9588daca36ca4c883ab", + "file_size_bytes": 27487621 + }, + { + "name": "Gp0115673_Structural annotation GFF file", + "description": "Structural annotation GFF file for Gp0115673", + "data_object_type": "Structural Annotation GFF", + "url": "https://data.microbiomedata.org/data/nmdc:mga0kpja70/annotation/nmdc_mga0kpja70_structural_annotation.gff", + "md5_checksum": "106c834bb14367ec6154d1b04f2a1021", + "id": "nmdc:106c834bb14367ec6154d1b04f2a1021", + "file_size_bytes": 2505 + }, + { + "name": "Gp0115673_Functional annotation GFF file", + "description": "Functional annotation GFF file for Gp0115673", + "data_object_type": "Functional Annotation GFF", + "url": "https://data.microbiomedata.org/data/nmdc:mga0kpja70/annotation/nmdc_mga0kpja70_functional_annotation.gff", + "md5_checksum": "dfe3eed1eee6d6764ae22a2c6b0209e5", + "id": "nmdc:dfe3eed1eee6d6764ae22a2c6b0209e5", + "file_size_bytes": 30665845 + }, + { + "name": "Gp0115673_KO TSV file", + "description": "KO TSV file for Gp0115673", + "data_object_type": "Annotation KEGG Orthology", + "url": "https://data.microbiomedata.org/data/nmdc:mga0kpja70/annotation/nmdc_mga0kpja70_ko.tsv", + "md5_checksum": "84e3913c75d155fc45f04bc04810063a", + "id": "nmdc:84e3913c75d155fc45f04bc04810063a", + "file_size_bytes": 4142989 + }, + { + "name": "Gp0115673_EC TSV file", + "description": "EC TSV file for Gp0115673", + "data_object_type": "Annotation Enzyme Commission", + "url": "https://data.microbiomedata.org/data/nmdc:mga0kpja70/annotation/nmdc_mga0kpja70_ec.tsv", + "md5_checksum": "418e74fcbe4b97b8d74cb697a3b3feb4", + "id": "nmdc:418e74fcbe4b97b8d74cb697a3b3feb4", + "file_size_bytes": 2665975 + }, + { + "name": "Gp0115673_COG GFF file", + "description": "COG GFF file for Gp0115673", + "url": "https://data.microbiomedata.org/data/nmdc:mga0kpja70/annotation/nmdc_mga0kpja70_cog.gff", + "md5_checksum": "2d57dd06178c83c1f9c4bfaecf34b8b4", + "id": "nmdc:2d57dd06178c83c1f9c4bfaecf34b8b4", + "file_size_bytes": 17716812 + }, + { + "name": "Gp0115673_PFAM GFF file", + "description": "PFAM GFF file for Gp0115673", + "url": "https://data.microbiomedata.org/data/nmdc:mga0kpja70/annotation/nmdc_mga0kpja70_pfam.gff", + "md5_checksum": "42173701162f4fdb727bc4eded48c2a1", + "id": "nmdc:42173701162f4fdb727bc4eded48c2a1", + "file_size_bytes": 14043787 + }, + { + "name": "Gp0115673_TigrFam GFF file", + "description": "TigrFam GFF file for Gp0115673", + "url": "https://data.microbiomedata.org/data/nmdc:mga0kpja70/annotation/nmdc_mga0kpja70_tigrfam.gff", + "md5_checksum": "89b8851da4dca184654a76128048e09a", + "id": "nmdc:89b8851da4dca184654a76128048e09a", + "file_size_bytes": 2009579 + }, + { + "name": "Gp0115673_SMART GFF file", + "description": "SMART GFF file for Gp0115673", + "url": "https://data.microbiomedata.org/data/nmdc:mga0kpja70/annotation/nmdc_mga0kpja70_smart.gff", + "md5_checksum": "e0d0721c6051fb0eebd70635882639c1", + "id": "nmdc:e0d0721c6051fb0eebd70635882639c1", + "file_size_bytes": 3834400 + }, + { + "name": "Gp0115673_SuperFam GFF file", + "description": "SuperFam GFF file for Gp0115673", + "url": "https://data.microbiomedata.org/data/nmdc:mga0kpja70/annotation/nmdc_mga0kpja70_supfam.gff", + "md5_checksum": "e9b0a3709e78dd9dfdba4eff7103c425", + "id": "nmdc:e9b0a3709e78dd9dfdba4eff7103c425", + "file_size_bytes": 22131290 + }, + { + "name": "Gp0115673_Cath FunFam GFF file", + "description": "Cath FunFam GFF file for Gp0115673", + "url": "https://data.microbiomedata.org/data/nmdc:mga0kpja70/annotation/nmdc_mga0kpja70_cath_funfam.gff", + "md5_checksum": "e627abd2dfaee1fbf695de11211c6971", + "id": "nmdc:e627abd2dfaee1fbf695de11211c6971", + "file_size_bytes": 17702997 + }, + { + "name": "Gp0115673_KO_EC GFF file", + "description": "KO_EC GFF file for Gp0115673", + "url": "https://data.microbiomedata.org/data/nmdc:mga0kpja70/annotation/nmdc_mga0kpja70_ko_ec.gff", + "md5_checksum": "a04e32711e814e733114531a666606c6", + "id": "nmdc:a04e32711e814e733114531a666606c6", + "file_size_bytes": 13225993 + }, + { + "name": "gold:Gp0452679_metabat2 bin checkm quality assessment result", + "description": "metabat2 bin checkm quality assessment result for gold:Gp0452679", + "data_object_type": "CheckM Statistics", + "url": "https://data.microbiomedata.org/data/nmdc:mga0fsjy84/MAGs/nmdc_mga0fsjy84_checkm_qa.out", + "md5_checksum": "d41d8cd98f00b204e9800998ecf8427e", + "id": "nmdc:d41d8cd98f00b204e9800998ecf8427e", + "file_size_bytes": 0 + }, + { + "name": "Gp0115673_tooShort (< 3kb) filtered contigs fasta file by metaBat2", + "description": "tooShort (< 3kb) filtered contigs fasta file by metaBat2 for Gp0115673", + "url": "https://data.microbiomedata.org/data/nmdc:mga0kpja70/MAGs/nmdc_mga0kpja70_bins.tooShort.fa", + "md5_checksum": "c907101a9eb50d1e522d1fc11b4d3164", + "id": "nmdc:c907101a9eb50d1e522d1fc11b4d3164", + "file_size_bytes": 35344893 + }, + { + "name": "Gp0115673_unbinned fasta file from metabat2", + "description": "unbinned fasta file from metabat2 for Gp0115673", + "url": "https://data.microbiomedata.org/data/nmdc:mga0kpja70/MAGs/nmdc_mga0kpja70_bins.unbinned.fa", + "md5_checksum": "f80fbdbf31ee0ac76353d59e64b789bc", + "id": "nmdc:f80fbdbf31ee0ac76353d59e64b789bc", + "file_size_bytes": 8810307 + }, + { + "name": "Gp0115673_metabat2 bin checkm quality assessment result", + "description": "metabat2 bin checkm quality assessment result for Gp0115673", + "data_object_type": "CheckM Statistics", + "url": "https://data.microbiomedata.org/data/nmdc:mga0kpja70/MAGs/nmdc_mga0kpja70_checkm_qa.out", + "md5_checksum": "af15089c0cb19ec9bd65f98e59dc94f1", + "id": "nmdc:af15089c0cb19ec9bd65f98e59dc94f1", + "file_size_bytes": 942 + }, + { + "name": "Gp0115673_high-quality and medium-quality bins", + "description": "high-quality and medium-quality bins for Gp0115673", + "data_object_type": "Metagenome Bins", + "url": "https://data.microbiomedata.org/data/nmdc:mga0kpja70/MAGs/nmdc_mga0kpja70_hqmq_bin.zip", + "md5_checksum": "70d3f2afd9f32a2bdaa81a6fc547f6fb", + "id": "nmdc:70d3f2afd9f32a2bdaa81a6fc547f6fb", + "file_size_bytes": 182 + }, + { + "name": "Gp0115673_metabat2 bins", + "description": "metabat2 bins for Gp0115673", + "url": "https://data.microbiomedata.org/data/nmdc:mga0kpja70/MAGs/nmdc_mga0kpja70_metabat_bin.zip", + "md5_checksum": "f40d84a4fc0c87d76c144777f9e8a8ea", + "id": "nmdc:f40d84a4fc0c87d76c144777f9e8a8ea", + "file_size_bytes": 1658458 + }, + { + "_id": { + "$oid": "649b003c1ae706d7b5b14d8c" + }, + "description": "Metagenome Contig Coverage Stats for gold:Gp0115673", + "url": "https://data.microbiomedata.org/data/1781_86091/assembly/mapping_stats.txt", + "file_size_bytes": 6599486, + "type": "nmdc:DataObject", + "id": "nmdc:c8b6932baf9efa891ba3ef22cdfc747f", + "name": "mapping_stats.txt", + "data_object_type": "Assembly Coverage Stats" + }, + { + "_id": { + "$oid": "649b003c1ae706d7b5b14d90" + }, + "description": "Assembled AGP file for gold:Gp0115673", + "url": "https://data.microbiomedata.org/data/1781_86091/assembly/assembly.agp", + "file_size_bytes": 5657846, + "type": "nmdc:DataObject", + "id": "nmdc:8f202f5c73cded42a0ee74842d99d453", + "name": "assembly.agp", + "data_object_type": "Assembly AGP" + }, + { + "_id": { + "$oid": "649b003c1ae706d7b5b14d91" + }, + "description": "Assembled contigs fasta for gold:Gp0115673", + "url": "https://data.microbiomedata.org/data/1781_86091/assembly/assembly_contigs.fna", + "file_size_bytes": 49161128, + "type": "nmdc:DataObject", + "id": "nmdc:b2f2d476b77fca0725cb68b0305ea3b0", + "name": "assembly_contigs.fna", + "data_object_type": "Assembly Contigs" + }, + { + "_id": { + "$oid": "649b003c1ae706d7b5b14d93" + }, + "description": "Metagenome Alignment BAM file for gold:Gp0115673", + "url": "https://data.microbiomedata.org/data/1781_86091/assembly/pairedMapped_sorted.bam", + "file_size_bytes": 1579984662, + "type": "nmdc:DataObject", + "id": "nmdc:7a768ecc03a7f9bf2f48e0ff038e286c", + "name": "pairedMapped_sorted.bam", + "data_object_type": "Assembly Coverage BAM" + }, + { + "_id": { + "$oid": "649b003c1ae706d7b5b14d96" + }, + "description": "Assembled scaffold fasta for gold:Gp0115673", + "url": "https://data.microbiomedata.org/data/1781_86091/assembly/assembly_scaffolds.fna", + "file_size_bytes": 48890657, + "type": "nmdc:DataObject", + "id": "nmdc:af2802220167f0c190a161f58e7140ef", + "name": "assembly_scaffolds.fna", + "data_object_type": "Assembly Scaffolds" + }, + { + "_id": { + "$oid": "649b003d1ae706d7b5b159d4" + }, + "id": "nmdc:54ab9f23cfb3900421112f1c63981d19", + "name": "1781_86091.krona.html", + "description": "Gold:Gp0115673 KRONA plot HTML file", + "url": "https://data.microbiomedata.org/1781_86091/ReadbasedAnalysis/centrifuge/1781_86091.krona.html", + "file_size_bytes": 3385, + "data_object_type": "Centrifuge Krona Plot", + "type": "nmdc:DataObject" + }, + { + "_id": { + "$oid": "649b003d1ae706d7b5b159da" + }, + "id": "nmdc:b92cba553fb3b7f7488f9cf0153170a4", + "name": "1781_86091.json", + "description": "Gold:Gp0115673 ReadbasedAnalysis result JSON file", + "url": "https://data.microbiomedata.org/1781_86091/ReadbasedAnalysis/1781_86091.json", + "file_size_bytes": 3385, + "type": "nmdc:DataObject" + }, + { + "_id": { + "$oid": "649b003f1ae706d7b5b162ab" + }, + "id": "nmdc:64d4b2d627893f1add14860728cce4dd", + "name": "bins.tooShort.fa", + "description": "tooShort (< 3kb) filtered contigs fasta file by metaBat2 for gold:Gp0115673", + "file_size_bytes": 34259785, + "url": "https://data.microbiomedata.org/data/1781_86091/img_MAGs/bins.tooShort.fa", + "type": "nmdc:DataObject" + }, + { + "_id": { + "$oid": "649b003f1ae706d7b5b162ac" + }, + "id": "nmdc:844cbe586fb4d8c7523f5e48bcf269e4", + "name": "bins.unbinned.fa", + "description": "unbinned fasta file from metabat2 for gold:Gp0115673", + "file_size_bytes": 9383451, + "url": "https://data.microbiomedata.org/data/1781_86091/img_MAGs/bins.unbinned.fa", + "type": "nmdc:DataObject" + }, + { + "_id": { + "$oid": "649b003f1ae706d7b5b162ad" + }, + "id": "nmdc:51877a97315cae458f13c66d23bb5938", + "name": "checkm_qa.out", + "description": "metabat2 bin checkm quality assessment result for gold:Gp0115673", + "file_size_bytes": 936, + "url": "https://data.microbiomedata.org/data/1781_86091/img_MAGs/checkm_qa.out", + "type": "nmdc:DataObject", + "data_object_type": "CheckM Statistics" + }, + { + "_id": { + "$oid": "649b003f1ae706d7b5b162ae" + }, + "id": "nmdc:9234a7d807019d0678be49a2b0bf8902", + "name": "gold:Gp0115673.bins.1.fa", + "description": "metabat2 binned contig file for gold:Gp0115673", + "file_size_bytes": 3819274, + "url": "https://data.microbiomedata.org/data/1781_86091/img_MAGs/metabat-bins/bins.1.fa", + "type": "nmdc:DataObject", + "data_object_type": "Metagenome Bins" + }, + { + "_id": { + "$oid": "649b003f1ae706d7b5b162af" + }, + "id": "nmdc:99160995b52b9234959f882fee6d2a6b", + "name": "gold:Gp0115673.bins.2.fa", + "description": "metabat2 binned contig file for gold:Gp0115673", + "file_size_bytes": 991444, + "url": "https://data.microbiomedata.org/data/1781_86091/img_MAGs/metabat-bins/bins.2.fa", + "type": "nmdc:DataObject", + "data_object_type": "Metagenome Bins" + }, + { + "_id": { + "$oid": "649b00401ae706d7b5b16d6b" + }, + "description": "Structural annotation GFF file for gold:Gp0115673", + "url": "https://data.microbiomedata.org/1781_86091/img_annotation/Ga0482254_structural_annotation.gff", + "md5_checksum": "2fba563f11988f4e30d2b4283c3c5487", + "file_size_bytes": 3385, + "id": "nmdc:2fba563f11988f4e30d2b4283c3c5487", + "name": "gold:Gp0115673_Structural annotation GFF file", + "data_object_type": "Structural Annotation GFF", + "type": "nmdc:DataObject" + }, + { + "_id": { + "$oid": "649b00401ae706d7b5b16d6d" + }, + "description": "EC TSV File for gold:Gp0115673", + "url": "https://data.microbiomedata.org/1781_86091/img_annotation/Ga0482254_ec.tsv", + "md5_checksum": "da4d331daa6d5965be8e201c3c9ba4d4", + "file_size_bytes": 3385, + "id": "nmdc:da4d331daa6d5965be8e201c3c9ba4d4", + "name": "gold:Gp0115673_EC TSV File", + "data_object_type": "Annotation Enzyme Commission", + "type": "nmdc:DataObject" + }, + { + "_id": { + "$oid": "649b00401ae706d7b5b16d93" + }, + "description": "Functional annotation GFF file for gold:Gp0115673", + "url": "https://data.microbiomedata.org/1781_86091/img_annotation/Ga0482254_functional_annotation.gff", + "md5_checksum": "b7264d7a1c56fc32c4a0c050fe04208e", + "file_size_bytes": 3385, + "id": "nmdc:b7264d7a1c56fc32c4a0c050fe04208e", + "name": "gold:Gp0115673_Functional annotation GFF file", + "data_object_type": "Functional Annotation GFF", + "type": "nmdc:DataObject" + }, + { + "_id": { + "$oid": "649b00401ae706d7b5b16dbb" + }, + "description": "Protein FAA for gold:Gp0115673", + "url": "https://data.microbiomedata.org/1781_86091/img_annotation/Ga0482254_proteins.faa", + "md5_checksum": "d325906b9b82b3bfc2fe8ed7321a828e", + "file_size_bytes": 3385, + "id": "nmdc:d325906b9b82b3bfc2fe8ed7321a828e", + "name": "gold:Gp0115673_Protein FAA", + "data_object_type": "Annotation Amino Acid FASTA", + "type": "nmdc:DataObject" + }, + { + "_id": { + "$oid": "649b00401ae706d7b5b16dd0" + }, + "description": "KO TSV File for gold:Gp0115673", + "url": "https://data.microbiomedata.org/1781_86091/img_annotation/Ga0482254_ko.tsv", + "md5_checksum": "73cac6bcbfa2627ab291bf230ded9748", + "file_size_bytes": 3385, + "id": "nmdc:73cac6bcbfa2627ab291bf230ded9748", + "name": "gold:Gp0115673_KO TSV File", + "data_object_type": "Annotation KEGG Orthology", + "type": "nmdc:DataObject" + } + ], + "mags_activity_set": [ + { + "_id": { + "$oid": "649b0052ec087f6bbab3471f" + }, + "has_input": [ + "nmdc:06d4964c0822abd6f94ca883c122f7ce", + "nmdc:2e293158750df042be7422826125bef2", + "nmdc:dfe3eed1eee6d6764ae22a2c6b0209e5" + ], + "too_short_contig_num": 83787, + "part_of": [ + "nmdc:mga0kpja70" + ], + "binned_contig_num": 890, + "git_url": "https://github.com/microbiomedata/metaG/releases/tag/0.1", + "has_output": [ + "nmdc:d41d8cd98f00b204e9800998ecf8427e", + "nmdc:c907101a9eb50d1e522d1fc11b4d3164", + "nmdc:f80fbdbf31ee0ac76353d59e64b789bc", + "nmdc:af15089c0cb19ec9bd65f98e59dc94f1", + "nmdc:70d3f2afd9f32a2bdaa81a6fc547f6fb", + "nmdc:f40d84a4fc0c87d76c144777f9e8a8ea" + ], + "was_informed_by": "gold:Gp0115673", + "input_contig_num": 89806, + "id": "nmdc:7ae51c3485db8a27225f08083565b28e", + "execution_resource": "NERSC-Cori", + "name": "MAGs Analysis Activity for nmdc:mga0kpja70", + "mags_list": [ + { + "number_of_contig": 67, + "completeness": 12.5, + "bin_name": "bins.1", + "gene_count": 318, + "bin_quality": "LQ", + "gtdbtk_species": "", + "gtdbtk_order": "", + "num_16s": 0, + "gtdbtk_family": "", + "gtdbtk_domain": "", + "contamination": 0.0, + "gtdbtk_class": "", + "gtdbtk_phylum": "", + "num_5s": 0, + "num_23s": 0, + "gtdbtk_genus": "", + "num_t_rna": 4 + }, + { + "number_of_contig": 823, + "completeness": 97.81, + "bin_name": "bins.2", + "gene_count": 5828, + "bin_quality": "LQ", + "gtdbtk_species": "", + "gtdbtk_order": "", + "num_16s": 1, + "gtdbtk_family": "", + "gtdbtk_domain": "", + "contamination": 66.19, + "gtdbtk_class": "", + "gtdbtk_phylum": "", + "num_5s": 0, + "num_23s": 0, + "gtdbtk_genus": "", + "num_t_rna": 63 + } + ], + "unbinned_contig_num": 5129, + "started_at_time": "2021-10-11T02:28:36Z", + "type": "nmdc:MAGsAnalysisActivity", + "ended_at_time": "2021-10-11T03:32:43+00:00" + } + ], + "metagenome_annotation_activity_set": [ + { + "_id": { + "$oid": "649b005bbf2caae0415ef9c0" + }, + "has_input": [ + "nmdc:06d4964c0822abd6f94ca883c122f7ce" + ], + "part_of": [ + "nmdc:mga0kpja70" + ], + "git_url": "https://github.com/microbiomedata/metaG/releases/tag/0.1", + "has_output": [ + "nmdc:be3b8decbc48f9588daca36ca4c883ab", + "nmdc:106c834bb14367ec6154d1b04f2a1021", + "nmdc:dfe3eed1eee6d6764ae22a2c6b0209e5", + "nmdc:84e3913c75d155fc45f04bc04810063a", + "nmdc:418e74fcbe4b97b8d74cb697a3b3feb4", + "nmdc:2d57dd06178c83c1f9c4bfaecf34b8b4", + "nmdc:42173701162f4fdb727bc4eded48c2a1", + "nmdc:89b8851da4dca184654a76128048e09a", + "nmdc:e0d0721c6051fb0eebd70635882639c1", + "nmdc:e9b0a3709e78dd9dfdba4eff7103c425", + "nmdc:e627abd2dfaee1fbf695de11211c6971", + "nmdc:a04e32711e814e733114531a666606c6" + ], + "was_informed_by": "gold:Gp0115673", + "id": "nmdc:7ae51c3485db8a27225f08083565b28e", + "execution_resource": "NERSC-Cori", + "name": "Annotation Activity for nmdc:mga0kpja70", + "started_at_time": "2021-10-11T02:28:36Z", + "type": "nmdc:MetagenomeAnnotationActivity", + "ended_at_time": "2021-10-11T03:32:43+00:00" + } + ], + "metagenome_assembly_set": [ + { + "_id": { + "$oid": "649b005f2ca5ee4adb139fa8" + }, + "has_input": [ + "nmdc:268918f610926421d2af43f175553680" + ], + "part_of": [ + "nmdc:mga0kpja70" + ], + "ctg_logsum": 160283, + "scaf_logsum": 161291, + "gap_pct": 0.0036, + "git_url": "https://github.com/microbiomedata/metaG/releases/tag/0.1", + "has_output": [ + "nmdc:06d4964c0822abd6f94ca883c122f7ce", + "nmdc:bad916c69afe839097650b0b9526a841", + "nmdc:a187658f262fa495de43707aabcbf480", + "nmdc:c525c04f90889be615025c667908370c", + "nmdc:2e293158750df042be7422826125bef2" + ], + "asm_score": 6.419, + "was_informed_by": "gold:Gp0115673", + "ctg_powsum": 18694, + "scaf_max": 39252, + "id": "nmdc:7ae51c3485db8a27225f08083565b28e", + "scaf_powsum": 18825, + "execution_resource": "NERSC-Cori", + "contigs": 89808, + "name": "Assembly Activity for nmdc:mga0kpja70", + "ctg_max": 39252, + "gc_std": 0.11246, + "contig_bp": 46120517, + "gc_avg": 0.55483, + "started_at_time": "2021-10-11T02:28:36Z", + "scaf_bp": 46122177, + "type": "nmdc:MetagenomeAssembly", + "scaffolds": 89660, + "ended_at_time": "2021-10-11T03:32:43+00:00", + "ctg_l50": 493, + "ctg_l90": 286, + "ctg_n50": 19910, + "ctg_n90": 73487, + "scaf_l50": 494, + "scaf_l90": 286, + "scaf_n50": 19797, + "scaf_n90": 73347 + } + ], + "omics_processing_set": [ + { + "_id": { + "$oid": "649b009773e8249959349b4c" + }, + "id": "nmdc:omprc-11-hk1bje46", + "name": "Sand microcosm microbial communities from a hyporheic zone in Columbia River, Washington, USA - GW-RW T4_2-Sept-14", + "description": "Sterilized sand packs were incubated back in the ground and collected at time point T4.", + "has_input": [ + "nmdc:bsm-11-5h7px351" + ], + "has_output": [ + "jgi:55d817f70d8785342fcf8270" + ], + "part_of": [ + "nmdc:sty-11-aygzgv51" + ], + "add_date": "2015-05-28", + "mod_date": "2021-06-15", + "ncbi_project_name": "Sand microcosm microbial communities from a hyporheic zone in Columbia River, Washington, USA - GW-RW T4_2-Sept-14", + "omics_type": { + "has_raw_value": "Metagenome" + }, + "principal_investigator": { + "has_raw_value": "James Stegen" + }, + "processing_institution": "JGI", + "type": "nmdc:OmicsProcessing", + "gold_sequencing_project_identifiers": [ + "gold:Gp0115673" + ] + } + ], + "read_qc_analysis_activity_set": [ + { + "_id": { + "$oid": "649b009d6bdd4fd20273c876" + }, + "has_input": [ + "nmdc:3783bc4ce3716b6d299533bc3f6591b6" + ], + "part_of": [ + "nmdc:mga0kpja70" + ], + "git_url": "https://github.com/microbiomedata/metaG/releases/tag/0.1", + "has_output": [ + "nmdc:268918f610926421d2af43f175553680", + "nmdc:4610980cf3558f5a9830797ead97362a" + ], + "was_informed_by": "gold:Gp0115673", + "input_read_count": 17796788, + "output_read_bases": 2520029380, + "id": "nmdc:7ae51c3485db8a27225f08083565b28e", + "execution_resource": "NERSC-Cori", + "input_read_bases": 2687314988, + "name": "Read QC Activity for nmdc:mga0kpja70", + "output_read_count": 16817496, + "started_at_time": "2021-10-11T02:28:36Z", + "type": "nmdc:ReadQCAnalysisActivity", + "ended_at_time": "2021-10-11T03:32:43+00:00" + } + ], + "read_based_taxonomy_analysis_activity_set": [ + { + "_id": { + "$oid": "649b009bff710ae353f8cf44" + }, + "has_input": [ + "nmdc:268918f610926421d2af43f175553680" + ], + "git_url": "https://github.com/microbiomedata/metaG/releases/tag/0.1", + "has_output": [ + "nmdc:c7b24571b61a33018cf118b5424b787f", + "nmdc:e185734176505343bf4c83c16a0a9fe2", + "nmdc:7c6b0ef44450c747580826a2e218844b", + "nmdc:5b98c377f424d7609f1a09e350cfb837", + "nmdc:b5f7a68a94b356001014d1be024231af", + "nmdc:75bca66cfcdd38331c10edbba03fa0d3", + "nmdc:35bf579641b2ffb3614098d9811a4968", + "nmdc:801b79f5442e5bfaa0d15f76786cfbc0", + "nmdc:a7030fa8e9622e3396c2b96448e90c3b" + ], + "was_informed_by": "gold:Gp0115673", + "id": "nmdc:7ae51c3485db8a27225f08083565b28e", + "execution_resource": "NERSC-Cori", + "name": "ReadBased Analysis Activity for nmdc:mga0kpja70", + "started_at_time": "2021-10-11T02:28:36Z", + "type": "nmdc:ReadbasedAnalysis", + "ended_at_time": "2021-10-11T03:32:43+00:00" + } + ] + }, + { + "data_object_set": [ + { + "description": "Raw sequencer read data", + "file_size_bytes": 2065080622, + "type": "nmdc:DataObject", + "id": "jgi:55d817fa0d8785342fcf8272", + "name": "9387.2.132031.ATGTCA.fastq.gz" + }, + { + "name": "Gp0115671_Filtered Reads", + "description": "Filtered Reads for Gp0115671", + "data_object_type": "Filtered Sequencing Reads", + "url": "https://data.microbiomedata.org/data/nmdc:mga0rw1351/qa/nmdc_mga0rw1351_filtered.fastq.gz", + "md5_checksum": "445f37bc3019e9fe3b29a2ac5bcbfc9c", + "id": "nmdc:445f37bc3019e9fe3b29a2ac5bcbfc9c", + "file_size_bytes": 1806996776 + }, + { + "name": "Gp0115671_Filtered Stats", + "description": "Filtered Stats for Gp0115671", + "data_object_type": "QC Statistics", + "url": "https://data.microbiomedata.org/data/nmdc:mga0rw1351/qa/nmdc_mga0rw1351_filterStats.txt", + "md5_checksum": "24440b4c5534da30eee650b68eccda84", + "id": "nmdc:24440b4c5534da30eee650b68eccda84", + "file_size_bytes": 289 + }, + { + "name": "Gp0115671_Gottcha2 TSV report", + "description": "Gottcha2 TSV report for Gp0115671", + "url": "https://data.microbiomedata.org/data/nmdc:mga0rw1351/ReadbasedAnalysis/nmdc_mga0rw1351_gottcha2_report.tsv", + "md5_checksum": "358559c32b69eff51758db66ac01021b", + "id": "nmdc:358559c32b69eff51758db66ac01021b", + "file_size_bytes": 11833 + }, + { + "name": "Gp0115671_Gottcha2 full TSV report", + "description": "Gottcha2 full TSV report for Gp0115671", + "url": "https://data.microbiomedata.org/data/nmdc:mga0rw1351/ReadbasedAnalysis/nmdc_mga0rw1351_gottcha2_report_full.tsv", + "md5_checksum": "befbd648249c2871bd27999120e50bf7", + "id": "nmdc:befbd648249c2871bd27999120e50bf7", + "file_size_bytes": 888177 + }, + { + "name": "Gp0115671_Gottcha2 Krona HTML report", + "description": "Gottcha2 Krona HTML report for Gp0115671", + "data_object_type": "GOTTCHA2 Krona Plot", + "url": "https://data.microbiomedata.org/data/nmdc:mga0rw1351/ReadbasedAnalysis/nmdc_mga0rw1351_gottcha2_krona.html", + "md5_checksum": "cacb8f623a808d0cae094d46f2801dd3", + "id": "nmdc:cacb8f623a808d0cae094d46f2801dd3", + "file_size_bytes": 261703 + }, + { + "name": "Gp0115671_Centrifuge classification TSV report", + "description": "Centrifuge classification TSV report for Gp0115671", + "data_object_type": "Centrifuge Taxonomic Classification", + "url": "https://data.microbiomedata.org/data/nmdc:mga0rw1351/ReadbasedAnalysis/nmdc_mga0rw1351_centrifuge_classification.tsv", + "md5_checksum": "1b15ffb745e320a9bf0cac7e672e974b", + "id": "nmdc:1b15ffb745e320a9bf0cac7e672e974b", + "file_size_bytes": 1474970402 + }, + { + "name": "Gp0115671_Centrifuge TSV report", + "description": "Centrifuge TSV report for Gp0115671", + "data_object_type": "Centrifuge Classification Report", + "url": "https://data.microbiomedata.org/data/nmdc:mga0rw1351/ReadbasedAnalysis/nmdc_mga0rw1351_centrifuge_report.tsv", + "md5_checksum": "90b77c7118bf6ec1f99836a50d562a7f", + "id": "nmdc:90b77c7118bf6ec1f99836a50d562a7f", + "file_size_bytes": 255777 + }, + { + "name": "Gp0115671_Centrifuge Krona HTML report", + "description": "Centrifuge Krona HTML report for Gp0115671", + "data_object_type": "Centrifuge Krona Plot", + "url": "https://data.microbiomedata.org/data/nmdc:mga0rw1351/ReadbasedAnalysis/nmdc_mga0rw1351_centrifuge_krona.html", + "md5_checksum": "e0736ff520260ba2097c02b9e767362c", + "id": "nmdc:e0736ff520260ba2097c02b9e767362c", + "file_size_bytes": 2329875 + }, + { + "name": "Gp0115671_Kraken classification TSV report", + "description": "Kraken classification TSV report for Gp0115671", + "data_object_type": "Kraken2 Taxonomic Classification", + "url": "https://data.microbiomedata.org/data/nmdc:mga0rw1351/ReadbasedAnalysis/nmdc_mga0rw1351_kraken2_classification.tsv", + "md5_checksum": "a00960655f9e80726fdb0fade1bec958", + "id": "nmdc:a00960655f9e80726fdb0fade1bec958", + "file_size_bytes": 1213240496 + }, + { + "name": "Gp0115671_Kraken2 TSV report", + "description": "Kraken2 TSV report for Gp0115671", + "data_object_type": "Kraken2 Classification Report", + "url": "https://data.microbiomedata.org/data/nmdc:mga0rw1351/ReadbasedAnalysis/nmdc_mga0rw1351_kraken2_report.tsv", + "md5_checksum": "366bf195f71d2c35a9b47c0f29381e85", + "id": "nmdc:366bf195f71d2c35a9b47c0f29381e85", + "file_size_bytes": 659715 + }, + { + "name": "Gp0115671_Kraken2 Krona HTML report", + "description": "Kraken2 Krona HTML report for Gp0115671", + "data_object_type": "Kraken2 Krona Plot", + "url": "https://data.microbiomedata.org/data/nmdc:mga0rw1351/ReadbasedAnalysis/nmdc_mga0rw1351_kraken2_krona.html", + "md5_checksum": "e111cd4927f6736e5de6f6e81e7e6d72", + "id": "nmdc:e111cd4927f6736e5de6f6e81e7e6d72", + "file_size_bytes": 4010701 + }, + { + "name": "Gp0115671_Assembled contigs fasta", + "description": "Assembled contigs fasta for Gp0115671", + "data_object_type": "Assembly Contigs", + "url": "https://data.microbiomedata.org/data/nmdc:mga0rw1351/assembly/nmdc_mga0rw1351_contigs.fna", + "md5_checksum": "0a1ebd847e3bb8f928ef491497f8355b", + "id": "nmdc:0a1ebd847e3bb8f928ef491497f8355b", + "file_size_bytes": 58744710 + }, + { + "name": "Gp0115671_Assembled scaffold fasta", + "description": "Assembled scaffold fasta for Gp0115671", + "data_object_type": "Assembly Scaffolds", + "url": "https://data.microbiomedata.org/data/nmdc:mga0rw1351/assembly/nmdc_mga0rw1351_scaffolds.fna", + "md5_checksum": "be4cab04a701bce0ed99605109bd5d6f", + "id": "nmdc:be4cab04a701bce0ed99605109bd5d6f", + "file_size_bytes": 58382380 + }, + { + "name": "Gp0115671_Metagenome Contig Coverage Stats", + "description": "Metagenome Contig Coverage Stats for Gp0115671", + "url": "https://data.microbiomedata.org/data/nmdc:mga0rw1351/assembly/nmdc_mga0rw1351_covstats.txt", + "md5_checksum": "cc4d3160618a82f81518bdc97ce1f5e2", + "id": "nmdc:cc4d3160618a82f81518bdc97ce1f5e2", + "file_size_bytes": 9464710 + }, + { + "name": "Gp0115671_Assembled AGP file", + "description": "Assembled AGP file for Gp0115671", + "data_object_type": "Assembly AGP", + "url": "https://data.microbiomedata.org/data/nmdc:mga0rw1351/assembly/nmdc_mga0rw1351_assembly.agp", + "md5_checksum": "473ca208ab97399a644c8e5326e765e5", + "id": "nmdc:473ca208ab97399a644c8e5326e765e5", + "file_size_bytes": 8820452 + }, + { + "name": "Gp0115671_Metagenome Alignment BAM file", + "description": "Metagenome Alignment BAM file for Gp0115671", + "data_object_type": "Assembly Coverage BAM", + "url": "https://data.microbiomedata.org/data/nmdc:mga0rw1351/assembly/nmdc_mga0rw1351_pairedMapped_sorted.bam", + "md5_checksum": "69371e513bebd1069a0ed26cc2c914cb", + "id": "nmdc:69371e513bebd1069a0ed26cc2c914cb", + "file_size_bytes": 1938214126 + }, + { + "name": "Gp0115671_Protein FAA", + "description": "Protein FAA for Gp0115671", + "data_object_type": "Annotation Amino Acid FASTA", + "url": "https://data.microbiomedata.org/data/nmdc:mga0rw1351/annotation/nmdc_mga0rw1351_proteins.faa", + "md5_checksum": "147b97234576ba123a9f3c63eb249ecf", + "id": "nmdc:147b97234576ba123a9f3c63eb249ecf", + "file_size_bytes": 32911597 + }, + { + "name": "Gp0115671_Structural annotation GFF file", + "description": "Structural annotation GFF file for Gp0115671", + "data_object_type": "Structural Annotation GFF", + "url": "https://data.microbiomedata.org/data/nmdc:mga0rw1351/annotation/nmdc_mga0rw1351_structural_annotation.gff", + "md5_checksum": "3e037f5f744c9f8e4aa355222cc620ae", + "id": "nmdc:3e037f5f744c9f8e4aa355222cc620ae", + "file_size_bytes": 2516 + }, + { + "name": "Gp0115671_Functional annotation GFF file", + "description": "Functional annotation GFF file for Gp0115671", + "data_object_type": "Functional Annotation GFF", + "url": "https://data.microbiomedata.org/data/nmdc:mga0rw1351/annotation/nmdc_mga0rw1351_functional_annotation.gff", + "md5_checksum": "10d19849864ecdb722335200d0607bbe", + "id": "nmdc:10d19849864ecdb722335200d0607bbe", + "file_size_bytes": 38009425 + }, + { + "name": "Gp0115671_KO TSV file", + "description": "KO TSV file for Gp0115671", + "data_object_type": "Annotation KEGG Orthology", + "url": "https://data.microbiomedata.org/data/nmdc:mga0rw1351/annotation/nmdc_mga0rw1351_ko.tsv", + "md5_checksum": "0ce9fa5958b6445f7be463538e89e9b1", + "id": "nmdc:0ce9fa5958b6445f7be463538e89e9b1", + "file_size_bytes": 4994549 + }, + { + "name": "Gp0115671_EC TSV file", + "description": "EC TSV file for Gp0115671", + "data_object_type": "Annotation Enzyme Commission", + "url": "https://data.microbiomedata.org/data/nmdc:mga0rw1351/annotation/nmdc_mga0rw1351_ec.tsv", + "md5_checksum": "a3bc059d9350034f835be4e754486c73", + "id": "nmdc:a3bc059d9350034f835be4e754486c73", + "file_size_bytes": 3207987 + }, + { + "name": "Gp0115671_COG GFF file", + "description": "COG GFF file for Gp0115671", + "url": "https://data.microbiomedata.org/data/nmdc:mga0rw1351/annotation/nmdc_mga0rw1351_cog.gff", + "md5_checksum": "da9866461051130a44f0982b1a65c061", + "id": "nmdc:da9866461051130a44f0982b1a65c061", + "file_size_bytes": 21138081 + }, + { + "name": "Gp0115671_PFAM GFF file", + "description": "PFAM GFF file for Gp0115671", + "url": "https://data.microbiomedata.org/data/nmdc:mga0rw1351/annotation/nmdc_mga0rw1351_pfam.gff", + "md5_checksum": "676fff23fb641ee8af8a2b948fc5b46e", + "id": "nmdc:676fff23fb641ee8af8a2b948fc5b46e", + "file_size_bytes": 16269399 + }, + { + "name": "Gp0115671_TigrFam GFF file", + "description": "TigrFam GFF file for Gp0115671", + "url": "https://data.microbiomedata.org/data/nmdc:mga0rw1351/annotation/nmdc_mga0rw1351_tigrfam.gff", + "md5_checksum": "a4aa56158a292b63078eb029ed1d90a9", + "id": "nmdc:a4aa56158a292b63078eb029ed1d90a9", + "file_size_bytes": 2189740 + }, + { + "name": "Gp0115671_SMART GFF file", + "description": "SMART GFF file for Gp0115671", + "url": "https://data.microbiomedata.org/data/nmdc:mga0rw1351/annotation/nmdc_mga0rw1351_smart.gff", + "md5_checksum": "6a28f85e8b5addccb429cc7f8964e496", + "id": "nmdc:6a28f85e8b5addccb429cc7f8964e496", + "file_size_bytes": 4669463 + }, + { + "name": "Gp0115671_SuperFam GFF file", + "description": "SuperFam GFF file for Gp0115671", + "url": "https://data.microbiomedata.org/data/nmdc:mga0rw1351/annotation/nmdc_mga0rw1351_supfam.gff", + "md5_checksum": "d5b21cce7406ab46611c49dc1ab658ed", + "id": "nmdc:d5b21cce7406ab46611c49dc1ab658ed", + "file_size_bytes": 26589549 + }, + { + "name": "Gp0115671_Cath FunFam GFF file", + "description": "Cath FunFam GFF file for Gp0115671", + "url": "https://data.microbiomedata.org/data/nmdc:mga0rw1351/annotation/nmdc_mga0rw1351_cath_funfam.gff", + "md5_checksum": "8ead1ab881fd48527d853b0d0601b4bc", + "id": "nmdc:8ead1ab881fd48527d853b0d0601b4bc", + "file_size_bytes": 20889965 + }, + { + "name": "Gp0115671_KO_EC GFF file", + "description": "KO_EC GFF file for Gp0115671", + "url": "https://data.microbiomedata.org/data/nmdc:mga0rw1351/annotation/nmdc_mga0rw1351_ko_ec.gff", + "md5_checksum": "ad206c1031a6f0a7805034dee03ff889", + "id": "nmdc:ad206c1031a6f0a7805034dee03ff889", + "file_size_bytes": 15914575 + }, + { + "name": "gold:Gp0452679_metabat2 bin checkm quality assessment result", + "description": "metabat2 bin checkm quality assessment result for gold:Gp0452679", + "data_object_type": "CheckM Statistics", + "url": "https://data.microbiomedata.org/data/nmdc:mga0fsjy84/MAGs/nmdc_mga0fsjy84_checkm_qa.out", + "md5_checksum": "d41d8cd98f00b204e9800998ecf8427e", + "id": "nmdc:d41d8cd98f00b204e9800998ecf8427e", + "file_size_bytes": 0 + }, + { + "name": "Gp0115671_tooShort (< 3kb) filtered contigs fasta file by metaBat2", + "description": "tooShort (< 3kb) filtered contigs fasta file by metaBat2 for Gp0115671", + "url": "https://data.microbiomedata.org/data/nmdc:mga0rw1351/MAGs/nmdc_mga0rw1351_bins.tooShort.fa", + "md5_checksum": "57fd559aaca7b976f3b38bb1a3ce362b", + "id": "nmdc:57fd559aaca7b976f3b38bb1a3ce362b", + "file_size_bytes": 48167943 + }, + { + "name": "Gp0115671_unbinned fasta file from metabat2", + "description": "unbinned fasta file from metabat2 for Gp0115671", + "url": "https://data.microbiomedata.org/data/nmdc:mga0rw1351/MAGs/nmdc_mga0rw1351_bins.unbinned.fa", + "md5_checksum": "43a900225e93216944b4eec3a01f7db7", + "id": "nmdc:43a900225e93216944b4eec3a01f7db7", + "file_size_bytes": 9124730 + }, + { + "name": "Gp0115671_metabat2 bin checkm quality assessment result", + "description": "metabat2 bin checkm quality assessment result for Gp0115671", + "data_object_type": "CheckM Statistics", + "url": "https://data.microbiomedata.org/data/nmdc:mga0rw1351/MAGs/nmdc_mga0rw1351_checkm_qa.out", + "md5_checksum": "cad0e18a4d2c4067a2724f41e449cb86", + "id": "nmdc:cad0e18a4d2c4067a2724f41e449cb86", + "file_size_bytes": 1014 + }, + { + "name": "Gp0115671_high-quality and medium-quality bins", + "description": "high-quality and medium-quality bins for Gp0115671", + "data_object_type": "Metagenome Bins", + "url": "https://data.microbiomedata.org/data/nmdc:mga0rw1351/MAGs/nmdc_mga0rw1351_hqmq_bin.zip", + "md5_checksum": "55577aa26faf185b3b3f4c78711e7715", + "id": "nmdc:55577aa26faf185b3b3f4c78711e7715", + "file_size_bytes": 182 + }, + { + "name": "Gp0115671_metabat2 bins", + "description": "metabat2 bins for Gp0115671", + "url": "https://data.microbiomedata.org/data/nmdc:mga0rw1351/MAGs/nmdc_mga0rw1351_metabat_bin.zip", + "md5_checksum": "c484ee1e530a0c9b47069c0288110e47", + "id": "nmdc:c484ee1e530a0c9b47069c0288110e47", + "file_size_bytes": 444082 + }, + { + "_id": { + "$oid": "649b003c1ae706d7b5b14d83" + }, + "description": "Metagenome Contig Coverage Stats for gold:Gp0115671", + "url": "https://data.microbiomedata.org/data/1781_86095/assembly/mapping_stats.txt", + "file_size_bytes": 8863080, + "type": "nmdc:DataObject", + "id": "nmdc:b422a9fcf9c3fb738a67d9b007e6e063", + "name": "mapping_stats.txt", + "data_object_type": "Assembly Coverage Stats" + }, + { + "_id": { + "$oid": "649b003c1ae706d7b5b14d86" + }, + "description": "Assembled contigs fasta for gold:Gp0115671", + "url": "https://data.microbiomedata.org/data/1781_86095/assembly/assembly_contigs.fna", + "file_size_bytes": 58143080, + "type": "nmdc:DataObject", + "id": "nmdc:cbbbd9da9ae7fc0d7cd3ad507977a0fe", + "name": "assembly_contigs.fna", + "data_object_type": "Assembly Contigs" + }, + { + "_id": { + "$oid": "649b003c1ae706d7b5b14d88" + }, + "description": "Metagenome Alignment BAM file for gold:Gp0115671", + "url": "https://data.microbiomedata.org/data/1781_86095/assembly/pairedMapped_sorted.bam", + "file_size_bytes": 1911271746, + "type": "nmdc:DataObject", + "id": "nmdc:0594950317ff722111f4ffd3a11304ab", + "name": "pairedMapped_sorted.bam", + "data_object_type": "Assembly Coverage BAM" + }, + { + "_id": { + "$oid": "649b003c1ae706d7b5b14d89" + }, + "description": "Assembled scaffold fasta for gold:Gp0115671", + "url": "https://data.microbiomedata.org/data/1781_86095/assembly/assembly_scaffolds.fna", + "file_size_bytes": 57781170, + "type": "nmdc:DataObject", + "id": "nmdc:84d2fa8698a27a1b5b5e493494863296", + "name": "assembly_scaffolds.fna", + "data_object_type": "Assembly Scaffolds" + }, + { + "_id": { + "$oid": "649b003c1ae706d7b5b14d8a" + }, + "description": "Assembled AGP file for gold:Gp0115671", + "url": "https://data.microbiomedata.org/data/1781_86095/assembly/assembly.agp", + "file_size_bytes": 7616352, + "type": "nmdc:DataObject", + "id": "nmdc:49e748d2c3a8f4aaeb65019da319287e", + "name": "assembly.agp", + "data_object_type": "Assembly AGP" + }, + { + "_id": { + "$oid": "649b003d1ae706d7b5b159c0" + }, + "id": "nmdc:dd2f65c7b8ae6d5b3348968d354fb744", + "name": "1781_86095.krona.html", + "description": "Gold:Gp0115671 KRONA plot HTML file", + "url": "https://data.microbiomedata.org/1781_86095/ReadbasedAnalysis/centrifuge/1781_86095.krona.html", + "file_size_bytes": 3385, + "data_object_type": "Centrifuge Krona Plot", + "type": "nmdc:DataObject" + }, + { + "_id": { + "$oid": "649b003d1ae706d7b5b159cd" + }, + "id": "nmdc:60800e393e9c757603261909577320b6", + "name": "1781_86095.json", + "description": "Gold:Gp0115671 ReadbasedAnalysis result JSON file", + "url": "https://data.microbiomedata.org/1781_86095/ReadbasedAnalysis/1781_86095.json", + "file_size_bytes": 3385, + "type": "nmdc:DataObject" + }, + { + "_id": { + "$oid": "649b003f1ae706d7b5b16292" + }, + "id": "nmdc:0f654b06229134dbe8dca13a709b9575", + "name": "bins.tooShort.fa", + "description": "tooShort (< 3kb) filtered contigs fasta file by metaBat2 for gold:Gp0115671", + "file_size_bytes": 46779370, + "url": "https://data.microbiomedata.org/data/1781_86095/img_MAGs/bins.tooShort.fa", + "type": "nmdc:DataObject" + }, + { + "_id": { + "$oid": "649b003f1ae706d7b5b16295" + }, + "id": "nmdc:1bf83c5dc0174021cd428b7354033bb8", + "name": "bins.unbinned.fa", + "description": "unbinned fasta file from metabat2 for gold:Gp0115671", + "file_size_bytes": 9883327, + "url": "https://data.microbiomedata.org/data/1781_86095/img_MAGs/bins.unbinned.fa", + "type": "nmdc:DataObject" + }, + { + "_id": { + "$oid": "649b003f1ae706d7b5b16296" + }, + "id": "nmdc:8c12b5ddb4bda80c1c255b2c887afb34", + "name": "gold:Gp0115671.bins.1.fa", + "description": "metabat2 binned contig file for gold:Gp0115671", + "file_size_bytes": 223419, + "url": "https://data.microbiomedata.org/data/1781_86095/img_MAGs/metabat-bins/bins.1.fa", + "type": "nmdc:DataObject", + "data_object_type": "Metagenome Bins" + }, + { + "_id": { + "$oid": "649b003f1ae706d7b5b16298" + }, + "id": "nmdc:d27e4b42b83c999df80390a378c2c189", + "name": "gold:Gp0115671.bins.2.fa", + "description": "metabat2 binned contig file for gold:Gp0115671", + "file_size_bytes": 405648, + "url": "https://data.microbiomedata.org/data/1781_86095/img_MAGs/metabat-bins/bins.2.fa", + "type": "nmdc:DataObject", + "data_object_type": "Metagenome Bins" + }, + { + "_id": { + "$oid": "649b003f1ae706d7b5b1629b" + }, + "id": "nmdc:2de7dcd5c53b16b1f2ea8e6006384dec", + "name": "checkm_qa.out", + "description": "metabat2 bin checkm quality assessment result for gold:Gp0115671", + "file_size_bytes": 930, + "url": "https://data.microbiomedata.org/data/1781_86095/img_MAGs/checkm_qa.out", + "type": "nmdc:DataObject", + "data_object_type": "CheckM Statistics" + }, + { + "_id": { + "$oid": "649b00401ae706d7b5b16d73" + }, + "description": "EC TSV File for gold:Gp0115671", + "url": "https://data.microbiomedata.org/1781_86095/img_annotation/Ga0482256_ec.tsv", + "md5_checksum": "75e88ab163c9d092836f9110768c6a52", + "file_size_bytes": 3385, + "id": "nmdc:75e88ab163c9d092836f9110768c6a52", + "name": "gold:Gp0115671_EC TSV File", + "data_object_type": "Annotation Enzyme Commission", + "type": "nmdc:DataObject" + }, + { + "_id": { + "$oid": "649b00401ae706d7b5b16d76" + }, + "description": "KO TSV File for gold:Gp0115671", + "url": "https://data.microbiomedata.org/1781_86095/img_annotation/Ga0482256_ko.tsv", + "md5_checksum": "9c6c644e821021661d936d374ee9fc1b", + "file_size_bytes": 3385, + "id": "nmdc:9c6c644e821021661d936d374ee9fc1b", + "name": "gold:Gp0115671_KO TSV File", + "data_object_type": "Annotation KEGG Orthology", + "type": "nmdc:DataObject" + }, + { + "_id": { + "$oid": "649b00401ae706d7b5b16d78" + }, + "description": "Functional annotation GFF file for gold:Gp0115671", + "url": "https://data.microbiomedata.org/1781_86095/img_annotation/Ga0482256_functional_annotation.gff", + "md5_checksum": "8f5a7f2db6790e67282439becd4c04b2", + "file_size_bytes": 3385, + "id": "nmdc:8f5a7f2db6790e67282439becd4c04b2", + "name": "gold:Gp0115671_Functional annotation GFF file", + "data_object_type": "Functional Annotation GFF", + "type": "nmdc:DataObject" + }, + { + "_id": { + "$oid": "649b00401ae706d7b5b16d7e" + }, + "description": "Protein FAA for gold:Gp0115671", + "url": "https://data.microbiomedata.org/1781_86095/img_annotation/Ga0482256_proteins.faa", + "md5_checksum": "f5a4336c7ac10e908cfe90a61a991c65", + "file_size_bytes": 3385, + "id": "nmdc:f5a4336c7ac10e908cfe90a61a991c65", + "name": "gold:Gp0115671_Protein FAA", + "data_object_type": "Annotation Amino Acid FASTA", + "type": "nmdc:DataObject" + }, + { + "_id": { + "$oid": "649b00401ae706d7b5b16d87" + }, + "description": "Structural annotation GFF file for gold:Gp0115671", + "url": "https://data.microbiomedata.org/1781_86095/img_annotation/Ga0482256_structural_annotation.gff", + "md5_checksum": "ad6e88d469fbad7b0684afb933403a6c", + "file_size_bytes": 3385, + "id": "nmdc:ad6e88d469fbad7b0684afb933403a6c", + "name": "gold:Gp0115671_Structural annotation GFF file", + "data_object_type": "Structural Annotation GFF", + "type": "nmdc:DataObject" + } + ], + "mags_activity_set": [ + { + "_id": { + "$oid": "649b0052ec087f6bbab34733" + }, + "has_input": [ + "nmdc:0a1ebd847e3bb8f928ef491497f8355b", + "nmdc:69371e513bebd1069a0ed26cc2c914cb", + "nmdc:10d19849864ecdb722335200d0607bbe" + ], + "too_short_contig_num": 114372, + "part_of": [ + "nmdc:mga0rw1351" + ], + "binned_contig_num": 328, + "git_url": "https://github.com/microbiomedata/metaG/releases/tag/0.1", + "has_output": [ + "nmdc:d41d8cd98f00b204e9800998ecf8427e", + "nmdc:57fd559aaca7b976f3b38bb1a3ce362b", + "nmdc:43a900225e93216944b4eec3a01f7db7", + "nmdc:cad0e18a4d2c4067a2724f41e449cb86", + "nmdc:55577aa26faf185b3b3f4c78711e7715", + "nmdc:c484ee1e530a0c9b47069c0288110e47" + ], + "was_informed_by": "gold:Gp0115671", + "input_contig_num": 120326, + "id": "nmdc:c61259a6fe99bc2482a8619c099e6cc2", + "execution_resource": "NERSC-Cori", + "name": "MAGs Analysis Activity for nmdc:mga0rw1351", + "mags_list": [ + { + "number_of_contig": 173, + "completeness": 26.29, + "bin_name": "bins.1", + "gene_count": 875, + "bin_quality": "LQ", + "gtdbtk_species": "", + "gtdbtk_order": "", + "num_16s": 0, + "gtdbtk_family": "", + "gtdbtk_domain": "", + "contamination": 0.18, + "gtdbtk_class": "", + "gtdbtk_phylum": "", + "num_5s": 1, + "num_23s": 1, + "gtdbtk_genus": "", + "num_t_rna": 14 + }, + { + "number_of_contig": 155, + "completeness": 24.1, + "bin_name": "bins.2", + "gene_count": 806, + "bin_quality": "LQ", + "gtdbtk_species": "", + "gtdbtk_order": "", + "num_16s": 0, + "gtdbtk_family": "", + "gtdbtk_domain": "", + "contamination": 0.0, + "gtdbtk_class": "", + "gtdbtk_phylum": "", + "num_5s": 0, + "num_23s": 0, + "gtdbtk_genus": "", + "num_t_rna": 9 + } + ], + "unbinned_contig_num": 5626, + "started_at_time": "2021-10-11T02:27:50Z", + "type": "nmdc:MAGsAnalysisActivity", + "ended_at_time": "2021-10-11T03:39:05+00:00" + } + ], + "metagenome_annotation_activity_set": [ + { + "_id": { + "$oid": "649b005bbf2caae0415ef9d3" + }, + "has_input": [ + "nmdc:0a1ebd847e3bb8f928ef491497f8355b" + ], + "part_of": [ + "nmdc:mga0rw1351" + ], + "git_url": "https://github.com/microbiomedata/metaG/releases/tag/0.1", + "has_output": [ + "nmdc:147b97234576ba123a9f3c63eb249ecf", + "nmdc:3e037f5f744c9f8e4aa355222cc620ae", + "nmdc:10d19849864ecdb722335200d0607bbe", + "nmdc:0ce9fa5958b6445f7be463538e89e9b1", + "nmdc:a3bc059d9350034f835be4e754486c73", + "nmdc:da9866461051130a44f0982b1a65c061", + "nmdc:676fff23fb641ee8af8a2b948fc5b46e", + "nmdc:a4aa56158a292b63078eb029ed1d90a9", + "nmdc:6a28f85e8b5addccb429cc7f8964e496", + "nmdc:d5b21cce7406ab46611c49dc1ab658ed", + "nmdc:8ead1ab881fd48527d853b0d0601b4bc", + "nmdc:ad206c1031a6f0a7805034dee03ff889" + ], + "was_informed_by": "gold:Gp0115671", + "id": "nmdc:c61259a6fe99bc2482a8619c099e6cc2", + "execution_resource": "NERSC-Cori", + "name": "Annotation Activity for nmdc:mga0rw1351", + "started_at_time": "2021-10-11T02:27:50Z", + "type": "nmdc:MetagenomeAnnotationActivity", + "ended_at_time": "2021-10-11T03:39:05+00:00" + } + ], + "metagenome_assembly_set": [ + { + "_id": { + "$oid": "649b005f2ca5ee4adb139fc1" + }, + "has_input": [ + "nmdc:445f37bc3019e9fe3b29a2ac5bcbfc9c" + ], + "part_of": [ + "nmdc:mga0rw1351" + ], + "ctg_logsum": 111611, + "scaf_logsum": 112140, + "gap_pct": 0.00155, + "git_url": "https://github.com/microbiomedata/metaG/releases/tag/0.1", + "has_output": [ + "nmdc:0a1ebd847e3bb8f928ef491497f8355b", + "nmdc:be4cab04a701bce0ed99605109bd5d6f", + "nmdc:cc4d3160618a82f81518bdc97ce1f5e2", + "nmdc:473ca208ab97399a644c8e5326e765e5", + "nmdc:69371e513bebd1069a0ed26cc2c914cb" + ], + "asm_score": 3.588, + "was_informed_by": "gold:Gp0115671", + "ctg_powsum": 12152, + "scaf_max": 16504, + "id": "nmdc:c61259a6fe99bc2482a8619c099e6cc2", + "scaf_powsum": 12215, + "execution_resource": "NERSC-Cori", + "contigs": 120326, + "name": "Assembly Activity for nmdc:mga0rw1351", + "ctg_max": 16504, + "gc_std": 0.11331, + "contig_bp": 54171370, + "gc_avg": 0.54451, + "started_at_time": "2021-10-11T02:27:50Z", + "scaf_bp": 54172210, + "type": "nmdc:MetagenomeAssembly", + "scaffolds": 120242, + "ended_at_time": "2021-10-11T03:39:05+00:00", + "ctg_l50": 421, + "ctg_l90": 285, + "ctg_n50": 34725, + "ctg_n90": 101428, + "scaf_l50": 421, + "scaf_l90": 285, + "scaf_n50": 34687, + "scaf_n90": 101345 + } + ], + "omics_processing_set": [ + { + "_id": { + "$oid": "649b009773e8249959349b4d" + }, + "id": "nmdc:omprc-11-qtje8r57", + "name": "Sand microcosm microbial communities from a hyporheic zone in Columbia River, Washington, USA - GW-RW T4_12-Aug-14", + "description": "Sterilized sand packs were incubated back in the ground and collected at time point T4.", + "has_input": [ + "nmdc:bsm-11-wzdqhh45" + ], + "has_output": [ + "jgi:55d817fa0d8785342fcf8272" + ], + "part_of": [ + "nmdc:sty-11-aygzgv51" + ], + "add_date": "2015-05-28", + "mod_date": "2021-06-15", + "ncbi_project_name": "Sand microcosm microbial communities from a hyporheic zone in Columbia River, Washington, USA - GW-RW T4_12-Aug-14", + "omics_type": { + "has_raw_value": "Metagenome" + }, + "principal_investigator": { + "has_raw_value": "James Stegen" + }, + "processing_institution": "JGI", + "type": "nmdc:OmicsProcessing", + "gold_sequencing_project_identifiers": [ + "gold:Gp0115671" + ] + } + ], + "read_qc_analysis_activity_set": [ + { + "_id": { + "$oid": "649b009d6bdd4fd20273c891" + }, + "has_input": [ + "nmdc:57d2e9b1a32e13f859c8b6e450ac3402" + ], + "part_of": [ + "nmdc:mga0rw1351" + ], + "git_url": "https://github.com/microbiomedata/metaG/releases/tag/0.1", + "has_output": [ + "nmdc:445f37bc3019e9fe3b29a2ac5bcbfc9c", + "nmdc:24440b4c5534da30eee650b68eccda84" + ], + "was_informed_by": "gold:Gp0115671", + "input_read_count": 22298982, + "output_read_bases": 3062549086, + "id": "nmdc:c61259a6fe99bc2482a8619c099e6cc2", + "execution_resource": "NERSC-Cori", + "input_read_bases": 3367146282, + "name": "Read QC Activity for nmdc:mga0rw1351", + "output_read_count": 20445042, + "started_at_time": "2021-10-11T02:27:50Z", + "type": "nmdc:ReadQCAnalysisActivity", + "ended_at_time": "2021-10-11T03:39:05+00:00" + } + ], + "read_based_taxonomy_analysis_activity_set": [ + { + "_id": { + "$oid": "649b009bff710ae353f8cf56" + }, + "has_input": [ + "nmdc:445f37bc3019e9fe3b29a2ac5bcbfc9c" + ], + "git_url": "https://github.com/microbiomedata/metaG/releases/tag/0.1", + "has_output": [ + "nmdc:358559c32b69eff51758db66ac01021b", + "nmdc:befbd648249c2871bd27999120e50bf7", + "nmdc:cacb8f623a808d0cae094d46f2801dd3", + "nmdc:1b15ffb745e320a9bf0cac7e672e974b", + "nmdc:90b77c7118bf6ec1f99836a50d562a7f", + "nmdc:e0736ff520260ba2097c02b9e767362c", + "nmdc:a00960655f9e80726fdb0fade1bec958", + "nmdc:366bf195f71d2c35a9b47c0f29381e85", + "nmdc:e111cd4927f6736e5de6f6e81e7e6d72" + ], + "was_informed_by": "gold:Gp0115671", + "id": "nmdc:c61259a6fe99bc2482a8619c099e6cc2", + "execution_resource": "NERSC-Cori", + "name": "ReadBased Analysis Activity for nmdc:mga0rw1351", + "started_at_time": "2021-10-11T02:27:50Z", + "type": "nmdc:ReadbasedAnalysis", + "ended_at_time": "2021-10-11T03:39:05+00:00" + } + ] + }, + { + "data_object_set": [ + { + "description": "Raw sequencer read data", + "file_size_bytes": 3492714581, + "type": "nmdc:DataObject", + "id": "jgi:55d740220d8785342fcf7e35", + "name": "9422.8.132674.GTGAAA.fastq.gz" + }, + { + "name": "Gp0115676_Filtered Reads", + "description": "Filtered Reads for Gp0115676", + "data_object_type": "Filtered Sequencing Reads", + "url": "https://data.microbiomedata.org/data/nmdc:mga0w3a067/qa/nmdc_mga0w3a067_filtered.fastq.gz", + "md5_checksum": "e777bc518da4bbe0ab7b2959f00e2b08", + "id": "nmdc:e777bc518da4bbe0ab7b2959f00e2b08", + "file_size_bytes": 3113249122 + }, + { + "name": "Gp0115676_Filtered Stats", + "description": "Filtered Stats for Gp0115676", + "data_object_type": "QC Statistics", + "url": "https://data.microbiomedata.org/data/nmdc:mga0w3a067/qa/nmdc_mga0w3a067_filterStats.txt", + "md5_checksum": "79815495339053b7935b55dbde02b2ff", + "id": "nmdc:79815495339053b7935b55dbde02b2ff", + "file_size_bytes": 292 + }, + { + "name": "Gp0115676_Gottcha2 TSV report", + "description": "Gottcha2 TSV report for Gp0115676", + "url": "https://data.microbiomedata.org/data/nmdc:mga0w3a067/ReadbasedAnalysis/nmdc_mga0w3a067_gottcha2_report.tsv", + "md5_checksum": "13343b2533892633bcc3655a1ebe788f", + "id": "nmdc:13343b2533892633bcc3655a1ebe788f", + "file_size_bytes": 13659 + }, + { + "name": "Gp0115676_Gottcha2 full TSV report", + "description": "Gottcha2 full TSV report for Gp0115676", + "url": "https://data.microbiomedata.org/data/nmdc:mga0w3a067/ReadbasedAnalysis/nmdc_mga0w3a067_gottcha2_report_full.tsv", + "md5_checksum": "87b36326bee32ad5642e3ffc2f5ac7db", + "id": "nmdc:87b36326bee32ad5642e3ffc2f5ac7db", + "file_size_bytes": 1168924 + }, + { + "name": "Gp0115676_Gottcha2 Krona HTML report", + "description": "Gottcha2 Krona HTML report for Gp0115676", + "data_object_type": "GOTTCHA2 Krona Plot", + "url": "https://data.microbiomedata.org/data/nmdc:mga0w3a067/ReadbasedAnalysis/nmdc_mga0w3a067_gottcha2_krona.html", + "md5_checksum": "95a2de8be672fd50bf542215194dc4d4", + "id": "nmdc:95a2de8be672fd50bf542215194dc4d4", + "file_size_bytes": 267660 + }, + { + "name": "Gp0115676_Centrifuge classification TSV report", + "description": "Centrifuge classification TSV report for Gp0115676", + "data_object_type": "Centrifuge Taxonomic Classification", + "url": "https://data.microbiomedata.org/data/nmdc:mga0w3a067/ReadbasedAnalysis/nmdc_mga0w3a067_centrifuge_classification.tsv", + "md5_checksum": "6cd0210b345d6908ad8ab683b1a11572", + "id": "nmdc:6cd0210b345d6908ad8ab683b1a11572", + "file_size_bytes": 2721808152 + }, + { + "name": "Gp0115676_Centrifuge TSV report", + "description": "Centrifuge TSV report for Gp0115676", + "data_object_type": "Centrifuge Classification Report", + "url": "https://data.microbiomedata.org/data/nmdc:mga0w3a067/ReadbasedAnalysis/nmdc_mga0w3a067_centrifuge_report.tsv", + "md5_checksum": "5049a65d2a42d73c5d47373e990b70f7", + "id": "nmdc:5049a65d2a42d73c5d47373e990b70f7", + "file_size_bytes": 263207 + }, + { + "name": "Gp0115676_Centrifuge Krona HTML report", + "description": "Centrifuge Krona HTML report for Gp0115676", + "data_object_type": "Centrifuge Krona Plot", + "url": "https://data.microbiomedata.org/data/nmdc:mga0w3a067/ReadbasedAnalysis/nmdc_mga0w3a067_centrifuge_krona.html", + "md5_checksum": "6e1e28773094884d35c04072309e285a", + "id": "nmdc:6e1e28773094884d35c04072309e285a", + "file_size_bytes": 2347912 + }, + { + "name": "Gp0115676_Kraken classification TSV report", + "description": "Kraken classification TSV report for Gp0115676", + "data_object_type": "Kraken2 Taxonomic Classification", + "url": "https://data.microbiomedata.org/data/nmdc:mga0w3a067/ReadbasedAnalysis/nmdc_mga0w3a067_kraken2_classification.tsv", + "md5_checksum": "7fa3aba8b1e31ccc00cf56f04f5605ac", + "id": "nmdc:7fa3aba8b1e31ccc00cf56f04f5605ac", + "file_size_bytes": 2224468607 + }, + { + "name": "Gp0115676_Kraken2 TSV report", + "description": "Kraken2 TSV report for Gp0115676", + "data_object_type": "Kraken2 Classification Report", + "url": "https://data.microbiomedata.org/data/nmdc:mga0w3a067/ReadbasedAnalysis/nmdc_mga0w3a067_kraken2_report.tsv", + "md5_checksum": "3b3abe337d79d09e9c7ba0a40045ad93", + "id": "nmdc:3b3abe337d79d09e9c7ba0a40045ad93", + "file_size_bytes": 701128 + }, + { + "name": "Gp0115676_Kraken2 Krona HTML report", + "description": "Kraken2 Krona HTML report for Gp0115676", + "data_object_type": "Kraken2 Krona Plot", + "url": "https://data.microbiomedata.org/data/nmdc:mga0w3a067/ReadbasedAnalysis/nmdc_mga0w3a067_kraken2_krona.html", + "md5_checksum": "e8602b20781cdbbd84e6dcb92c048a6b", + "id": "nmdc:e8602b20781cdbbd84e6dcb92c048a6b", + "file_size_bytes": 4217185 + }, + { + "name": "Gp0115676_Assembled contigs fasta", + "description": "Assembled contigs fasta for Gp0115676", + "data_object_type": "Assembly Contigs", + "url": "https://data.microbiomedata.org/data/nmdc:mga0w3a067/assembly/nmdc_mga0w3a067_contigs.fna", + "md5_checksum": "19987e32391f846db382edabf14ba43e", + "id": "nmdc:19987e32391f846db382edabf14ba43e", + "file_size_bytes": 105010680 + }, + { + "name": "Gp0115676_Assembled scaffold fasta", + "description": "Assembled scaffold fasta for Gp0115676", + "data_object_type": "Assembly Scaffolds", + "url": "https://data.microbiomedata.org/data/nmdc:mga0w3a067/assembly/nmdc_mga0w3a067_scaffolds.fna", + "md5_checksum": "1a4c5ace6c1b54e057d282031e8bc2c6", + "id": "nmdc:1a4c5ace6c1b54e057d282031e8bc2c6", + "file_size_bytes": 104445982 + }, + { + "name": "Gp0115676_Metagenome Contig Coverage Stats", + "description": "Metagenome Contig Coverage Stats for Gp0115676", + "url": "https://data.microbiomedata.org/data/nmdc:mga0w3a067/assembly/nmdc_mga0w3a067_covstats.txt", + "md5_checksum": "af7a38646011c9e6d0ad2b1ebd7f47c9", + "id": "nmdc:af7a38646011c9e6d0ad2b1ebd7f47c9", + "file_size_bytes": 14811778 + }, + { + "name": "Gp0115676_Assembled AGP file", + "description": "Assembled AGP file for Gp0115676", + "data_object_type": "Assembly AGP", + "url": "https://data.microbiomedata.org/data/nmdc:mga0w3a067/assembly/nmdc_mga0w3a067_assembly.agp", + "md5_checksum": "1b665fb0fbbf40a13122100c927b398b", + "id": "nmdc:1b665fb0fbbf40a13122100c927b398b", + "file_size_bytes": 13854137 + }, + { + "name": "Gp0115676_Metagenome Alignment BAM file", + "description": "Metagenome Alignment BAM file for Gp0115676", + "data_object_type": "Assembly Coverage BAM", + "url": "https://data.microbiomedata.org/data/nmdc:mga0w3a067/assembly/nmdc_mga0w3a067_pairedMapped_sorted.bam", + "md5_checksum": "7c1232ff8d861d2e2c111a1dc4a70480", + "id": "nmdc:7c1232ff8d861d2e2c111a1dc4a70480", + "file_size_bytes": 3366223347 + }, + { + "name": "Gp0115676_Protein FAA", + "description": "Protein FAA for Gp0115676", + "data_object_type": "Annotation Amino Acid FASTA", + "url": "https://data.microbiomedata.org/data/nmdc:mga0w3a067/annotation/nmdc_mga0w3a067_proteins.faa", + "md5_checksum": "35adf26b13c97c40147af2f067e0c9be", + "id": "nmdc:35adf26b13c97c40147af2f067e0c9be", + "file_size_bytes": 59120149 + }, + { + "name": "Gp0115676_Structural annotation GFF file", + "description": "Structural annotation GFF file for Gp0115676", + "data_object_type": "Structural Annotation GFF", + "url": "https://data.microbiomedata.org/data/nmdc:mga0w3a067/annotation/nmdc_mga0w3a067_structural_annotation.gff", + "md5_checksum": "3de29d8dede94769e7753f0aaee86691", + "id": "nmdc:3de29d8dede94769e7753f0aaee86691", + "file_size_bytes": 2524 + }, + { + "name": "Gp0115676_Functional annotation GFF file", + "description": "Functional annotation GFF file for Gp0115676", + "data_object_type": "Functional Annotation GFF", + "url": "https://data.microbiomedata.org/data/nmdc:mga0w3a067/annotation/nmdc_mga0w3a067_functional_annotation.gff", + "md5_checksum": "6fa3d1e5fae636b4199ff57b4776a51c", + "id": "nmdc:6fa3d1e5fae636b4199ff57b4776a51c", + "file_size_bytes": 65284624 + }, + { + "name": "Gp0115676_KO TSV file", + "description": "KO TSV file for Gp0115676", + "data_object_type": "Annotation KEGG Orthology", + "url": "https://data.microbiomedata.org/data/nmdc:mga0w3a067/annotation/nmdc_mga0w3a067_ko.tsv", + "md5_checksum": "b865dcd9976c90dbc8459ec7ccc72d45", + "id": "nmdc:b865dcd9976c90dbc8459ec7ccc72d45", + "file_size_bytes": 9219020 + }, + { + "name": "Gp0115676_EC TSV file", + "description": "EC TSV file for Gp0115676", + "data_object_type": "Annotation Enzyme Commission", + "url": "https://data.microbiomedata.org/data/nmdc:mga0w3a067/annotation/nmdc_mga0w3a067_ec.tsv", + "md5_checksum": "98b9ea6588dc9ff918298c4a7c567edf", + "id": "nmdc:98b9ea6588dc9ff918298c4a7c567edf", + "file_size_bytes": 5972063 + }, + { + "name": "Gp0115676_COG GFF file", + "description": "COG GFF file for Gp0115676", + "url": "https://data.microbiomedata.org/data/nmdc:mga0w3a067/annotation/nmdc_mga0w3a067_cog.gff", + "md5_checksum": "d8fbe8d24c00eee2ef163e3bb428b718", + "id": "nmdc:d8fbe8d24c00eee2ef163e3bb428b718", + "file_size_bytes": 39290017 + }, + { + "name": "Gp0115676_PFAM GFF file", + "description": "PFAM GFF file for Gp0115676", + "url": "https://data.microbiomedata.org/data/nmdc:mga0w3a067/annotation/nmdc_mga0w3a067_pfam.gff", + "md5_checksum": "ed68f1e7fd4873f1ea756d0c58a9c550", + "id": "nmdc:ed68f1e7fd4873f1ea756d0c58a9c550", + "file_size_bytes": 31343624 + }, + { + "name": "Gp0115676_TigrFam GFF file", + "description": "TigrFam GFF file for Gp0115676", + "url": "https://data.microbiomedata.org/data/nmdc:mga0w3a067/annotation/nmdc_mga0w3a067_tigrfam.gff", + "md5_checksum": "4d0469ae5b27dd4045d637d2493ccba9", + "id": "nmdc:4d0469ae5b27dd4045d637d2493ccba9", + "file_size_bytes": 4260344 + }, + { + "name": "Gp0115676_SMART GFF file", + "description": "SMART GFF file for Gp0115676", + "url": "https://data.microbiomedata.org/data/nmdc:mga0w3a067/annotation/nmdc_mga0w3a067_smart.gff", + "md5_checksum": "a893783f6886e31b6bca5b6baede9f66", + "id": "nmdc:a893783f6886e31b6bca5b6baede9f66", + "file_size_bytes": 8240017 + }, + { + "name": "Gp0115676_SuperFam GFF file", + "description": "SuperFam GFF file for Gp0115676", + "url": "https://data.microbiomedata.org/data/nmdc:mga0w3a067/annotation/nmdc_mga0w3a067_supfam.gff", + "md5_checksum": "2225c723ccf0fd5ea309cfb5ca90d536", + "id": "nmdc:2225c723ccf0fd5ea309cfb5ca90d536", + "file_size_bytes": 48186264 + }, + { + "name": "Gp0115676_Cath FunFam GFF file", + "description": "Cath FunFam GFF file for Gp0115676", + "url": "https://data.microbiomedata.org/data/nmdc:mga0w3a067/annotation/nmdc_mga0w3a067_cath_funfam.gff", + "md5_checksum": "1abd69f8096f98174d95d9a3a13c2a3b", + "id": "nmdc:1abd69f8096f98174d95d9a3a13c2a3b", + "file_size_bytes": 38259823 + }, + { + "name": "Gp0115676_KO_EC GFF file", + "description": "KO_EC GFF file for Gp0115676", + "url": "https://data.microbiomedata.org/data/nmdc:mga0w3a067/annotation/nmdc_mga0w3a067_ko_ec.gff", + "md5_checksum": "83647c3e1ed96fda36f7c119a3e98182", + "id": "nmdc:83647c3e1ed96fda36f7c119a3e98182", + "file_size_bytes": 29337291 + }, + { + "name": "gold:Gp0452679_metabat2 bin checkm quality assessment result", + "description": "metabat2 bin checkm quality assessment result for gold:Gp0452679", + "data_object_type": "CheckM Statistics", + "url": "https://data.microbiomedata.org/data/nmdc:mga0fsjy84/MAGs/nmdc_mga0fsjy84_checkm_qa.out", + "md5_checksum": "d41d8cd98f00b204e9800998ecf8427e", + "id": "nmdc:d41d8cd98f00b204e9800998ecf8427e", + "file_size_bytes": 0 + }, + { + "name": "Gp0115676_tooShort (< 3kb) filtered contigs fasta file by metaBat2", + "description": "tooShort (< 3kb) filtered contigs fasta file by metaBat2 for Gp0115676", + "url": "https://data.microbiomedata.org/data/nmdc:mga0w3a067/MAGs/nmdc_mga0w3a067_bins.tooShort.fa", + "md5_checksum": "71667f3b8ee0cb5acadc541fa6914022", + "id": "nmdc:71667f3b8ee0cb5acadc541fa6914022", + "file_size_bytes": 75793492 + }, + { + "name": "Gp0115676_unbinned fasta file from metabat2", + "description": "unbinned fasta file from metabat2 for Gp0115676", + "url": "https://data.microbiomedata.org/data/nmdc:mga0w3a067/MAGs/nmdc_mga0w3a067_bins.unbinned.fa", + "md5_checksum": "0141a64077e0f18adc42cb1915a00fa2", + "id": "nmdc:0141a64077e0f18adc42cb1915a00fa2", + "file_size_bytes": 17366889 + }, + { + "name": "Gp0115676_metabat2 bin checkm quality assessment result", + "description": "metabat2 bin checkm quality assessment result for Gp0115676", + "data_object_type": "CheckM Statistics", + "url": "https://data.microbiomedata.org/data/nmdc:mga0w3a067/MAGs/nmdc_mga0w3a067_checkm_qa.out", + "md5_checksum": "982b47616dde63a388400fcc57d7c5b0", + "id": "nmdc:982b47616dde63a388400fcc57d7c5b0", + "file_size_bytes": 1700 + }, + { + "name": "Gp0115676_high-quality and medium-quality bins", + "description": "high-quality and medium-quality bins for Gp0115676", + "data_object_type": "Metagenome Bins", + "url": "https://data.microbiomedata.org/data/nmdc:mga0w3a067/MAGs/nmdc_mga0w3a067_hqmq_bin.zip", + "md5_checksum": "313eb61bc7577e272eca6332e923f9c4", + "id": "nmdc:313eb61bc7577e272eca6332e923f9c4", + "file_size_bytes": 677741 + }, + { + "name": "Gp0115676_metabat2 bins", + "description": "metabat2 bins for Gp0115676", + "url": "https://data.microbiomedata.org/data/nmdc:mga0w3a067/MAGs/nmdc_mga0w3a067_metabat_bin.zip", + "md5_checksum": "763eb40a8905e9b0d459c45222f1b05e", + "id": "nmdc:763eb40a8905e9b0d459c45222f1b05e", + "file_size_bytes": 2885722 + }, + { + "_id": { + "$oid": "649b003c1ae706d7b5b14d9e" + }, + "description": "Metagenome Contig Coverage Stats for gold:Gp0115676", + "url": "https://data.microbiomedata.org/data/1781_86099/assembly/mapping_stats.txt", + "file_size_bytes": 13876163, + "type": "nmdc:DataObject", + "id": "nmdc:96941ca922d1e71c5651c276dae2951e", + "name": "mapping_stats.txt", + "data_object_type": "Assembly Coverage Stats" + }, + { + "_id": { + "$oid": "649b003d1ae706d7b5b14da0" + }, + "description": "Assembled scaffold fasta for gold:Gp0115676", + "url": "https://data.microbiomedata.org/data/1781_86099/assembly/assembly_scaffolds.fna", + "file_size_bytes": 103511507, + "type": "nmdc:DataObject", + "id": "nmdc:f6a39ee8aee7ca6e8d4b3a351af5097e", + "name": "assembly_scaffolds.fna", + "data_object_type": "Assembly Scaffolds" + }, + { + "_id": { + "$oid": "649b003d1ae706d7b5b14da1" + }, + "description": "Assembled AGP file for gold:Gp0115676", + "url": "https://data.microbiomedata.org/data/1781_86099/assembly/assembly.agp", + "file_size_bytes": 11980587, + "type": "nmdc:DataObject", + "id": "nmdc:665e21a0e5c6dc4de9165db7fc04944b", + "name": "assembly.agp", + "data_object_type": "Assembly AGP" + }, + { + "_id": { + "$oid": "649b003d1ae706d7b5b14da2" + }, + "description": "Metagenome Alignment BAM file for gold:Gp0115676", + "url": "https://data.microbiomedata.org/data/1781_86099/assembly/pairedMapped_sorted.bam", + "file_size_bytes": 3317390706, + "type": "nmdc:DataObject", + "id": "nmdc:7b206a8925a1ea97bf5cfbbafd4c1331", + "name": "pairedMapped_sorted.bam", + "data_object_type": "Assembly Coverage BAM" + }, + { + "_id": { + "$oid": "649b003d1ae706d7b5b14da4" + }, + "description": "Assembled contigs fasta for gold:Gp0115676", + "url": "https://data.microbiomedata.org/data/1781_86099/assembly/assembly_contigs.fna", + "file_size_bytes": 104075065, + "type": "nmdc:DataObject", + "id": "nmdc:6525bd7de120f6ed4dd75069d597f261", + "name": "assembly_contigs.fna", + "data_object_type": "Assembly Contigs" + }, + { + "_id": { + "$oid": "649b003d1ae706d7b5b159f1" + }, + "id": "nmdc:7bfbfbfea6176042739cd5079cda14bd", + "name": "1781_86099.krona.html", + "description": "Gold:Gp0115676 KRONA plot HTML file", + "url": "https://data.microbiomedata.org/1781_86099/ReadbasedAnalysis/centrifuge/1781_86099.krona.html", + "file_size_bytes": 3385, + "data_object_type": "Centrifuge Krona Plot", + "type": "nmdc:DataObject" + }, + { + "_id": { + "$oid": "649b003d1ae706d7b5b159f5" + }, + "id": "nmdc:e98001eea268f1373182f7b83d43ab1f", + "name": "1781_86099.json", + "description": "Gold:Gp0115676 ReadbasedAnalysis result JSON file", + "url": "https://data.microbiomedata.org/1781_86099/ReadbasedAnalysis/1781_86099.json", + "file_size_bytes": 3385, + "type": "nmdc:DataObject" + }, + { + "_id": { + "$oid": "649b003f1ae706d7b5b162c5" + }, + "id": "nmdc:b80b655f568ca46cf4789674ac6a83cc", + "name": "checkm_qa.out", + "description": "metabat2 bin checkm quality assessment result for gold:Gp0115676", + "file_size_bytes": 1881, + "url": "https://data.microbiomedata.org/data/1781_86099/img_MAGs/checkm_qa.out", + "type": "nmdc:DataObject", + "data_object_type": "CheckM Statistics" + }, + { + "_id": { + "$oid": "649b003f1ae706d7b5b162c6" + }, + "id": "nmdc:4c9a69a0c2311a6cdaf5a476cc8c9d42", + "name": "gtdbtk.bac120.summary.tsv", + "description": "gtdbtk bacterial assignment result summary table for gold:Gp0115676", + "file_size_bytes": 815, + "url": "https://data.microbiomedata.org/data/1781_86099/img_MAGs/gtdbtk_output/classify/gtdbtk.bac120.summary.tsv", + "type": "nmdc:DataObject" + }, + { + "_id": { + "$oid": "649b003f1ae706d7b5b162c7" + }, + "id": "nmdc:956841108fcf5c6634a57da8f2b7fac7", + "name": "bins.unbinned.fa", + "description": "unbinned fasta file from metabat2 for gold:Gp0115676", + "file_size_bytes": 19565106, + "url": "https://data.microbiomedata.org/data/1781_86099/img_MAGs/bins.unbinned.fa", + "type": "nmdc:DataObject" + }, + { + "_id": { + "$oid": "649b003f1ae706d7b5b162c8" + }, + "id": "nmdc:e91e2f12beabcf429ee849748b76801f", + "name": "gold:Gp0115676.bins.3.fa", + "description": "metabat2 binned contig file for gold:Gp0115676", + "file_size_bytes": 2056638, + "url": "https://data.microbiomedata.org/data/1781_86099/img_MAGs/metabat-bins/bins.3.fa", + "type": "nmdc:DataObject", + "data_object_type": "Metagenome Bins" + }, + { + "_id": { + "$oid": "649b003f1ae706d7b5b162c9" + }, + "id": "nmdc:31bc3893618af40d8f63e24e2dad6772", + "name": "bins.tooShort.fa", + "description": "tooShort (< 3kb) filtered contigs fasta file by metaBat2 for gold:Gp0115676", + "file_size_bytes": 73605331, + "url": "https://data.microbiomedata.org/data/1781_86099/img_MAGs/bins.tooShort.fa", + "type": "nmdc:DataObject" + }, + { + "_id": { + "$oid": "649b003f1ae706d7b5b162ca" + }, + "id": "nmdc:ccc55d14e487d71a93085ff56130b44a", + "name": "gold:Gp0115676.bins.5.fa", + "description": "metabat2 binned contig file for gold:Gp0115676", + "file_size_bytes": 3283805, + "url": "https://data.microbiomedata.org/data/1781_86099/img_MAGs/metabat-bins/bins.5.fa", + "type": "nmdc:DataObject", + "data_object_type": "Metagenome Bins" + }, + { + "_id": { + "$oid": "649b003f1ae706d7b5b162cb" + }, + "id": "nmdc:3b5521b99ccb093a2984e693c20cef84", + "name": "gold:Gp0115676.bins.2.fa", + "description": "metabat2 binned contig file for gold:Gp0115676", + "file_size_bytes": 642694, + "url": "https://data.microbiomedata.org/data/1781_86099/img_MAGs/metabat-bins/bins.2.fa", + "type": "nmdc:DataObject", + "data_object_type": "Metagenome Bins" + }, + { + "_id": { + "$oid": "649b003f1ae706d7b5b162cc" + }, + "id": "nmdc:b0793c3769f2849c8a6a1496bfc555bd", + "name": "gold:Gp0115676.bins.7.fa", + "description": "metabat2 binned contig file for gold:Gp0115676", + "file_size_bytes": 537417, + "url": "https://data.microbiomedata.org/data/1781_86099/img_MAGs/metabat-bins/bins.7.fa", + "type": "nmdc:DataObject", + "data_object_type": "Metagenome Bins" + }, + { + "_id": { + "$oid": "649b003f1ae706d7b5b162cd" + }, + "id": "nmdc:bd0dc950e63b986b1585aa25c81c0a52", + "name": "gold:Gp0115676.bins.4.fa", + "description": "metabat2 binned contig file for gold:Gp0115676", + "file_size_bytes": 2597982, + "url": "https://data.microbiomedata.org/data/1781_86099/img_MAGs/metabat-bins/bins.4.fa", + "type": "nmdc:DataObject", + "data_object_type": "Metagenome Bins" + }, + { + "_id": { + "$oid": "649b003f1ae706d7b5b162d0" + }, + "id": "nmdc:b151fd8de6fd473abe671917580a23d9", + "name": "gold:Gp0115676.bins.6.fa", + "description": "metabat2 binned contig file for gold:Gp0115676", + "file_size_bytes": 249502, + "url": "https://data.microbiomedata.org/data/1781_86099/img_MAGs/metabat-bins/bins.6.fa", + "type": "nmdc:DataObject", + "data_object_type": "Metagenome Bins" + }, + { + "_id": { + "$oid": "649b003f1ae706d7b5b162d2" + }, + "id": "nmdc:575b0300e32da8f7a051c5019b4ccfee", + "name": "gold:Gp0115676.bins.1.fa", + "description": "metabat2 binned contig file for gold:Gp0115676", + "file_size_bytes": 228256, + "url": "https://data.microbiomedata.org/data/1781_86099/img_MAGs/metabat-bins/bins.1.fa", + "type": "nmdc:DataObject", + "data_object_type": "Metagenome Bins" + }, + { + "_id": { + "$oid": "649b00401ae706d7b5b16d92" + }, + "description": "Protein FAA for gold:Gp0115676", + "url": "https://data.microbiomedata.org/1781_86099/img_annotation/Ga0482251_proteins.faa", + "md5_checksum": "5193d8fa7e151b96396afa8d61851af8", + "file_size_bytes": 3385, + "id": "nmdc:5193d8fa7e151b96396afa8d61851af8", + "name": "gold:Gp0115676_Protein FAA", + "data_object_type": "Annotation Amino Acid FASTA", + "type": "nmdc:DataObject" + }, + { + "_id": { + "$oid": "649b00401ae706d7b5b16d96" + }, + "description": "KO TSV File for gold:Gp0115676", + "url": "https://data.microbiomedata.org/1781_86099/img_annotation/Ga0482251_ko.tsv", + "md5_checksum": "23762ea8dc5ce375c3827aded41ae2c0", + "file_size_bytes": 3385, + "id": "nmdc:23762ea8dc5ce375c3827aded41ae2c0", + "name": "gold:Gp0115676_KO TSV File", + "data_object_type": "Annotation KEGG Orthology", + "type": "nmdc:DataObject" + }, + { + "_id": { + "$oid": "649b00401ae706d7b5b16d9a" + }, + "description": "EC TSV File for gold:Gp0115676", + "url": "https://data.microbiomedata.org/1781_86099/img_annotation/Ga0482251_ec.tsv", + "md5_checksum": "bc4755bf8b2c0b7c384eb4ffd8e9e017", + "file_size_bytes": 3385, + "id": "nmdc:bc4755bf8b2c0b7c384eb4ffd8e9e017", + "name": "gold:Gp0115676_EC TSV File", + "data_object_type": "Annotation Enzyme Commission", + "type": "nmdc:DataObject" + }, + { + "_id": { + "$oid": "649b00401ae706d7b5b16da7" + }, + "description": "Structural annotation GFF file for gold:Gp0115676", + "url": "https://data.microbiomedata.org/1781_86099/img_annotation/Ga0482251_structural_annotation.gff", + "md5_checksum": "e3b04bb85be48814ca078ee871a9296b", + "file_size_bytes": 3385, + "id": "nmdc:e3b04bb85be48814ca078ee871a9296b", + "name": "gold:Gp0115676_Structural annotation GFF file", + "data_object_type": "Structural Annotation GFF", + "type": "nmdc:DataObject" + }, + { + "_id": { + "$oid": "649b00401ae706d7b5b16da9" + }, + "description": "Functional annotation GFF file for gold:Gp0115676", + "url": "https://data.microbiomedata.org/1781_86099/img_annotation/Ga0482251_functional_annotation.gff", + "md5_checksum": "d429e7a9bb0344196ed7bcca6131e3c0", + "file_size_bytes": 3385, + "id": "nmdc:d429e7a9bb0344196ed7bcca6131e3c0", + "name": "gold:Gp0115676_Functional annotation GFF file", + "data_object_type": "Functional Annotation GFF", + "type": "nmdc:DataObject" + } + ], + "mags_activity_set": [ + { + "_id": { + "$oid": "649b0052ec087f6bbab3472c" + }, + "has_input": [ + "nmdc:19987e32391f846db382edabf14ba43e", + "nmdc:7c1232ff8d861d2e2c111a1dc4a70480", + "nmdc:6fa3d1e5fae636b4199ff57b4776a51c" + ], + "too_short_contig_num": 175121, + "part_of": [ + "nmdc:mga0w3a067" + ], + "binned_contig_num": 1550, + "git_url": "https://github.com/microbiomedata/metaG/releases/tag/0.1", + "has_output": [ + "nmdc:d41d8cd98f00b204e9800998ecf8427e", + "nmdc:71667f3b8ee0cb5acadc541fa6914022", + "nmdc:0141a64077e0f18adc42cb1915a00fa2", + "nmdc:982b47616dde63a388400fcc57d7c5b0", + "nmdc:313eb61bc7577e272eca6332e923f9c4", + "nmdc:763eb40a8905e9b0d459c45222f1b05e" + ], + "was_informed_by": "gold:Gp0115676", + "input_contig_num": 187123, + "id": "nmdc:b2025bb4a2c7616273d414e4093f63ca", + "execution_resource": "NERSC-Cori", + "name": "MAGs Analysis Activity for nmdc:mga0w3a067", + "mags_list": [ + { + "number_of_contig": 457, + "completeness": 95.14, + "bin_name": "bins.1", + "gene_count": 6260, + "bin_quality": "LQ", + "gtdbtk_species": "", + "gtdbtk_order": "", + "num_16s": 2, + "gtdbtk_family": "", + "gtdbtk_domain": "", + "contamination": 76.52, + "gtdbtk_class": "", + "gtdbtk_phylum": "", + "num_5s": 1, + "num_23s": 1, + "gtdbtk_genus": "", + "num_t_rna": 85 + }, + { + "number_of_contig": 24, + "completeness": 4.17, + "bin_name": "bins.2", + "gene_count": 246, + "bin_quality": "LQ", + "gtdbtk_species": "", + "gtdbtk_order": "", + "num_16s": 0, + "gtdbtk_family": "", + "gtdbtk_domain": "", + "contamination": 0.0, + "gtdbtk_class": "", + "gtdbtk_phylum": "", + "num_5s": 1, + "num_23s": 1, + "gtdbtk_genus": "", + "num_t_rna": 5 + }, + { + "number_of_contig": 175, + "completeness": 36.21, + "bin_name": "bins.3", + "gene_count": 937, + "bin_quality": "LQ", + "gtdbtk_species": "", + "gtdbtk_order": "", + "num_16s": 0, + "gtdbtk_family": "", + "gtdbtk_domain": "", + "contamination": 0.0, + "gtdbtk_class": "", + "gtdbtk_phylum": "", + "num_5s": 0, + "num_23s": 0, + "gtdbtk_genus": "", + "num_t_rna": 12 + }, + { + "number_of_contig": 485, + "completeness": 43.26, + "bin_name": "bins.4", + "gene_count": 2590, + "bin_quality": "LQ", + "gtdbtk_species": "", + "gtdbtk_order": "", + "num_16s": 0, + "gtdbtk_family": "", + "gtdbtk_domain": "", + "contamination": 0.55, + "gtdbtk_class": "", + "gtdbtk_phylum": "", + "num_5s": 1, + "num_23s": 1, + "gtdbtk_genus": "", + "num_t_rna": 29 + }, + { + "number_of_contig": 339, + "completeness": 79.0, + "bin_name": "bins.5", + "gene_count": 2464, + "bin_quality": "MQ", + "gtdbtk_species": "", + "gtdbtk_order": "UBA11222", + "num_16s": 0, + "gtdbtk_family": "UBA11222", + "gtdbtk_domain": "Bacteria", + "contamination": 3.71, + "gtdbtk_class": "Alphaproteobacteria", + "gtdbtk_phylum": "Proteobacteria", + "num_5s": 0, + "num_23s": 0, + "gtdbtk_genus": "UBA11222", + "num_t_rna": 32 + }, + { + "number_of_contig": 70, + "completeness": 0.0, + "bin_name": "bins.6", + "gene_count": 298, + "bin_quality": "LQ", + "gtdbtk_species": "", + "gtdbtk_order": "", + "num_16s": 0, + "gtdbtk_family": "", + "gtdbtk_domain": "", + "contamination": 0.0, + "gtdbtk_class": "", + "gtdbtk_phylum": "", + "num_5s": 0, + "num_23s": 0, + "gtdbtk_genus": "", + "num_t_rna": 1 + } + ], + "unbinned_contig_num": 10452, + "started_at_time": "2021-10-11T02:26:37Z", + "type": "nmdc:MAGsAnalysisActivity", + "ended_at_time": "2021-10-11T05:40:05+00:00" + } + ], + "metagenome_annotation_activity_set": [ + { + "_id": { + "$oid": "649b005bbf2caae0415ef9cb" + }, + "has_input": [ + "nmdc:19987e32391f846db382edabf14ba43e" + ], + "part_of": [ + "nmdc:mga0w3a067" + ], + "git_url": "https://github.com/microbiomedata/metaG/releases/tag/0.1", + "has_output": [ + "nmdc:35adf26b13c97c40147af2f067e0c9be", + "nmdc:3de29d8dede94769e7753f0aaee86691", + "nmdc:6fa3d1e5fae636b4199ff57b4776a51c", + "nmdc:b865dcd9976c90dbc8459ec7ccc72d45", + "nmdc:98b9ea6588dc9ff918298c4a7c567edf", + "nmdc:d8fbe8d24c00eee2ef163e3bb428b718", + "nmdc:ed68f1e7fd4873f1ea756d0c58a9c550", + "nmdc:4d0469ae5b27dd4045d637d2493ccba9", + "nmdc:a893783f6886e31b6bca5b6baede9f66", + "nmdc:2225c723ccf0fd5ea309cfb5ca90d536", + "nmdc:1abd69f8096f98174d95d9a3a13c2a3b", + "nmdc:83647c3e1ed96fda36f7c119a3e98182" + ], + "was_informed_by": "gold:Gp0115676", + "id": "nmdc:b2025bb4a2c7616273d414e4093f63ca", + "execution_resource": "NERSC-Cori", + "name": "Annotation Activity for nmdc:mga0w3a067", + "started_at_time": "2021-10-11T02:26:37Z", + "type": "nmdc:MetagenomeAnnotationActivity", + "ended_at_time": "2021-10-11T05:40:05+00:00" + } + ], + "metagenome_assembly_set": [ + { + "_id": { + "$oid": "649b005f2ca5ee4adb139fbe" + }, + "has_input": [ + "nmdc:e777bc518da4bbe0ab7b2959f00e2b08" + ], + "part_of": [ + "nmdc:mga0w3a067" + ], + "ctg_logsum": 335229, + "scaf_logsum": 337025, + "gap_pct": 0.00236, + "git_url": "https://github.com/microbiomedata/metaG/releases/tag/0.1", + "has_output": [ + "nmdc:19987e32391f846db382edabf14ba43e", + "nmdc:1a4c5ace6c1b54e057d282031e8bc2c6", + "nmdc:af7a38646011c9e6d0ad2b1ebd7f47c9", + "nmdc:1b665fb0fbbf40a13122100c927b398b", + "nmdc:7c1232ff8d861d2e2c111a1dc4a70480" + ], + "asm_score": 10.939, + "was_informed_by": "gold:Gp0115676", + "ctg_powsum": 40696, + "scaf_max": 163197, + "id": "nmdc:b2025bb4a2c7616273d414e4093f63ca", + "scaf_powsum": 40973, + "execution_resource": "NERSC-Cori", + "contigs": 187125, + "name": "Assembly Activity for nmdc:mga0w3a067", + "ctg_max": 163197, + "gc_std": 0.10616, + "contig_bp": 97611209, + "gc_avg": 0.5929, + "started_at_time": "2021-10-11T02:26:37Z", + "scaf_bp": 97613509, + "type": "nmdc:MetagenomeAssembly", + "scaffolds": 186895, + "ended_at_time": "2021-10-11T05:40:05+00:00", + "ctg_l50": 499, + "ctg_l90": 288, + "ctg_n50": 42676, + "ctg_n90": 155670, + "scaf_l50": 499, + "scaf_l90": 288, + "scaf_n50": 42593, + "scaf_n90": 155449, + "scaf_l_gt50k": 743033, + "scaf_n_gt50k": 11, + "scaf_pct_gt50k": 0.7611989 + } + ], + "omics_processing_set": [ + { + "_id": { + "$oid": "649b009773e8249959349b4e" + }, + "id": "nmdc:omprc-11-7ey2jr63", + "name": "Sand microcosm microbial communities from a hyporheic zone in Columbia River, Washington, USA - GW-RW T4_23-Sept-14", + "description": "Sterilized sand packs were incubated back in the ground and collected at time point T4.", + "has_input": [ + "nmdc:bsm-11-pkgtg048" + ], + "has_output": [ + "jgi:55d740220d8785342fcf7e35" + ], + "part_of": [ + "nmdc:sty-11-aygzgv51" + ], + "add_date": "2015-05-28", + "mod_date": "2021-06-15", + "ncbi_project_name": "Sand microcosm microbial communities from a hyporheic zone in Columbia River, Washington, USA - GW-RW T4_23-Sept-14", + "omics_type": { + "has_raw_value": "Metagenome" + }, + "principal_investigator": { + "has_raw_value": "James Stegen" + }, + "processing_institution": "JGI", + "type": "nmdc:OmicsProcessing", + "gold_sequencing_project_identifiers": [ + "gold:Gp0115676" + ] + } + ], + "read_qc_analysis_activity_set": [ + { + "_id": { + "$oid": "649b009d6bdd4fd20273c883" + }, + "has_input": [ + "nmdc:5672111f6f33b8aff5f65e69ebb41c5e" + ], + "part_of": [ + "nmdc:mga0w3a067" + ], + "git_url": "https://github.com/microbiomedata/metaG/releases/tag/0.1", + "has_output": [ + "nmdc:e777bc518da4bbe0ab7b2959f00e2b08", + "nmdc:79815495339053b7935b55dbde02b2ff" + ], + "was_informed_by": "gold:Gp0115676", + "input_read_count": 39069214, + "output_read_bases": 5550744725, + "id": "nmdc:b2025bb4a2c7616273d414e4093f63ca", + "execution_resource": "NERSC-Cori", + "input_read_bases": 5899451314, + "name": "Read QC Activity for nmdc:mga0w3a067", + "output_read_count": 37037822, + "started_at_time": "2021-10-11T02:26:37Z", + "type": "nmdc:ReadQCAnalysisActivity", + "ended_at_time": "2021-10-11T05:40:05+00:00" + } + ], + "read_based_taxonomy_analysis_activity_set": [ + { + "_id": { + "$oid": "649b009bff710ae353f8cf4c" + }, + "has_input": [ + "nmdc:e777bc518da4bbe0ab7b2959f00e2b08" + ], + "git_url": "https://github.com/microbiomedata/metaG/releases/tag/0.1", + "has_output": [ + "nmdc:13343b2533892633bcc3655a1ebe788f", + "nmdc:87b36326bee32ad5642e3ffc2f5ac7db", + "nmdc:95a2de8be672fd50bf542215194dc4d4", + "nmdc:6cd0210b345d6908ad8ab683b1a11572", + "nmdc:5049a65d2a42d73c5d47373e990b70f7", + "nmdc:6e1e28773094884d35c04072309e285a", + "nmdc:7fa3aba8b1e31ccc00cf56f04f5605ac", + "nmdc:3b3abe337d79d09e9c7ba0a40045ad93", + "nmdc:e8602b20781cdbbd84e6dcb92c048a6b" + ], + "was_informed_by": "gold:Gp0115676", + "id": "nmdc:b2025bb4a2c7616273d414e4093f63ca", + "execution_resource": "NERSC-Cori", + "name": "ReadBased Analysis Activity for nmdc:mga0w3a067", + "started_at_time": "2021-10-11T02:26:37Z", + "type": "nmdc:ReadbasedAnalysis", + "ended_at_time": "2021-10-11T05:40:05+00:00" + } + ] + }, + { + "data_object_set": [ + { + "description": "Raw sequencer read data", + "file_size_bytes": 6700067822, + "type": "nmdc:DataObject", + "id": "jgi:55a9caff0d87852b2150891e", + "name": "9289.1.128215.TCCTGAG-TATCCTC.fastq.gz" + }, + { + "name": "Gp0115677_Filtered Reads", + "description": "Filtered Reads for Gp0115677", + "data_object_type": "Filtered Sequencing Reads", + "url": "https://data.microbiomedata.org/data/nmdc:mga0zb0766/qa/nmdc_mga0zb0766_filtered.fastq.gz", + "md5_checksum": "63c857b3011dec61a08044d518291f23", + "id": "nmdc:63c857b3011dec61a08044d518291f23", + "file_size_bytes": 5307348388 + }, + { + "name": "Gp0115677_Filtered Stats", + "description": "Filtered Stats for Gp0115677", + "data_object_type": "QC Statistics", + "url": "https://data.microbiomedata.org/data/nmdc:mga0zb0766/qa/nmdc_mga0zb0766_filterStats.txt", + "md5_checksum": "2a79d7978caecf9b08fb2029fa42c9b3", + "id": "nmdc:2a79d7978caecf9b08fb2029fa42c9b3", + "file_size_bytes": 279 + }, + { + "name": "Gp0115677_Gottcha2 TSV report", + "description": "Gottcha2 TSV report for Gp0115677", + "url": "https://data.microbiomedata.org/data/nmdc:mga0zb0766/ReadbasedAnalysis/nmdc_mga0zb0766_gottcha2_report.tsv", + "md5_checksum": "ba32f20b0cc5143783e00c5d1ba15223", + "id": "nmdc:ba32f20b0cc5143783e00c5d1ba15223", + "file_size_bytes": 17895 + }, + { + "name": "Gp0115677_Gottcha2 full TSV report", + "description": "Gottcha2 full TSV report for Gp0115677", + "url": "https://data.microbiomedata.org/data/nmdc:mga0zb0766/ReadbasedAnalysis/nmdc_mga0zb0766_gottcha2_report_full.tsv", + "md5_checksum": "c1730daf5e6017219fd9fc079e42c132", + "id": "nmdc:c1730daf5e6017219fd9fc079e42c132", + "file_size_bytes": 1182538 + }, + { + "name": "Gp0115677_Gottcha2 Krona HTML report", + "description": "Gottcha2 Krona HTML report for Gp0115677", + "data_object_type": "GOTTCHA2 Krona Plot", + "url": "https://data.microbiomedata.org/data/nmdc:mga0zb0766/ReadbasedAnalysis/nmdc_mga0zb0766_gottcha2_krona.html", + "md5_checksum": "55b6c047c48f5bf9fb156f139992e4d8", + "id": "nmdc:55b6c047c48f5bf9fb156f139992e4d8", + "file_size_bytes": 276802 + }, + { + "name": "Gp0115677_Centrifuge classification TSV report", + "description": "Centrifuge classification TSV report for Gp0115677", + "data_object_type": "Centrifuge Taxonomic Classification", + "url": "https://data.microbiomedata.org/data/nmdc:mga0zb0766/ReadbasedAnalysis/nmdc_mga0zb0766_centrifuge_classification.tsv", + "md5_checksum": "1c2e2dff881b35a25b4622bbc66c3140", + "id": "nmdc:1c2e2dff881b35a25b4622bbc66c3140", + "file_size_bytes": 4716470614 + }, + { + "name": "Gp0115677_Centrifuge TSV report", + "description": "Centrifuge TSV report for Gp0115677", + "data_object_type": "Centrifuge Classification Report", + "url": "https://data.microbiomedata.org/data/nmdc:mga0zb0766/ReadbasedAnalysis/nmdc_mga0zb0766_centrifuge_report.tsv", + "md5_checksum": "50f771c7bc17a0b184c2a10a24013f08", + "id": "nmdc:50f771c7bc17a0b184c2a10a24013f08", + "file_size_bytes": 267231 + }, + { + "name": "Gp0115677_Centrifuge Krona HTML report", + "description": "Centrifuge Krona HTML report for Gp0115677", + "data_object_type": "Centrifuge Krona Plot", + "url": "https://data.microbiomedata.org/data/nmdc:mga0zb0766/ReadbasedAnalysis/nmdc_mga0zb0766_centrifuge_krona.html", + "md5_checksum": "229017cdb1832bb718d22dc27db44125", + "id": "nmdc:229017cdb1832bb718d22dc27db44125", + "file_size_bytes": 2356003 + }, + { + "name": "Gp0115677_Kraken classification TSV report", + "description": "Kraken classification TSV report for Gp0115677", + "data_object_type": "Kraken2 Taxonomic Classification", + "url": "https://data.microbiomedata.org/data/nmdc:mga0zb0766/ReadbasedAnalysis/nmdc_mga0zb0766_kraken2_classification.tsv", + "md5_checksum": "49d5d11132bd5a02c3dd077d42a6a16b", + "id": "nmdc:49d5d11132bd5a02c3dd077d42a6a16b", + "file_size_bytes": 3857487871 + }, + { + "name": "Gp0115677_Kraken2 TSV report", + "description": "Kraken2 TSV report for Gp0115677", + "data_object_type": "Kraken2 Classification Report", + "url": "https://data.microbiomedata.org/data/nmdc:mga0zb0766/ReadbasedAnalysis/nmdc_mga0zb0766_kraken2_report.tsv", + "md5_checksum": "bdd701b44e67929ec8bbe279697da937", + "id": "nmdc:bdd701b44e67929ec8bbe279697da937", + "file_size_bytes": 708598 + }, + { + "name": "Gp0115677_Kraken2 Krona HTML report", + "description": "Kraken2 Krona HTML report for Gp0115677", + "data_object_type": "Kraken2 Krona Plot", + "url": "https://data.microbiomedata.org/data/nmdc:mga0zb0766/ReadbasedAnalysis/nmdc_mga0zb0766_kraken2_krona.html", + "md5_checksum": "d35583a5ed45df5a58bf084fc67bf988", + "id": "nmdc:d35583a5ed45df5a58bf084fc67bf988", + "file_size_bytes": 4250180 + }, + { + "name": "Gp0115677_Assembled contigs fasta", + "description": "Assembled contigs fasta for Gp0115677", + "data_object_type": "Assembly Contigs", + "url": "https://data.microbiomedata.org/data/nmdc:mga0zb0766/assembly/nmdc_mga0zb0766_contigs.fna", + "md5_checksum": "3d9e14d6f7a854042a7d71def080409b", + "id": "nmdc:3d9e14d6f7a854042a7d71def080409b", + "file_size_bytes": 250747283 + }, + { + "name": "Gp0115677_Assembled scaffold fasta", + "description": "Assembled scaffold fasta for Gp0115677", + "data_object_type": "Assembly Scaffolds", + "url": "https://data.microbiomedata.org/data/nmdc:mga0zb0766/assembly/nmdc_mga0zb0766_scaffolds.fna", + "md5_checksum": "26d0d64ca7c850f0e04a4c33690bd178", + "id": "nmdc:26d0d64ca7c850f0e04a4c33690bd178", + "file_size_bytes": 249006954 + }, + { + "name": "Gp0115677_Metagenome Contig Coverage Stats", + "description": "Metagenome Contig Coverage Stats for Gp0115677", + "url": "https://data.microbiomedata.org/data/nmdc:mga0zb0766/assembly/nmdc_mga0zb0766_covstats.txt", + "md5_checksum": "8f8a0622cfe39054bd20f11116c78402", + "id": "nmdc:8f8a0622cfe39054bd20f11116c78402", + "file_size_bytes": 43716675 + }, + { + "name": "Gp0115677_Assembled AGP file", + "description": "Assembled AGP file for Gp0115677", + "data_object_type": "Assembly AGP", + "url": "https://data.microbiomedata.org/data/nmdc:mga0zb0766/assembly/nmdc_mga0zb0766_assembly.agp", + "md5_checksum": "623aa370c44897cf30844647c2f5bd94", + "id": "nmdc:623aa370c44897cf30844647c2f5bd94", + "file_size_bytes": 41409581 + }, + { + "name": "Gp0115677_Metagenome Alignment BAM file", + "description": "Metagenome Alignment BAM file for Gp0115677", + "data_object_type": "Assembly Coverage BAM", + "url": "https://data.microbiomedata.org/data/nmdc:mga0zb0766/assembly/nmdc_mga0zb0766_pairedMapped_sorted.bam", + "md5_checksum": "f4a1cf24281f14a666a1bfc9afc0aab5", + "id": "nmdc:f4a1cf24281f14a666a1bfc9afc0aab5", + "file_size_bytes": 5828772757 + }, + { + "name": "Gp0115677_Protein FAA", + "description": "Protein FAA for Gp0115677", + "data_object_type": "Annotation Amino Acid FASTA", + "url": "https://data.microbiomedata.org/data/nmdc:mga0zb0766/annotation/nmdc_mga0zb0766_proteins.faa", + "md5_checksum": "4f9d82516561ee307b1ab4841255aff0", + "id": "nmdc:4f9d82516561ee307b1ab4841255aff0", + "file_size_bytes": 144603933 + }, + { + "name": "Gp0115677_Structural annotation GFF file", + "description": "Structural annotation GFF file for Gp0115677", + "data_object_type": "Structural Annotation GFF", + "url": "https://data.microbiomedata.org/data/nmdc:mga0zb0766/annotation/nmdc_mga0zb0766_structural_annotation.gff", + "md5_checksum": "a658e9045fde900cdc78d0578446b960", + "id": "nmdc:a658e9045fde900cdc78d0578446b960", + "file_size_bytes": 2546 + }, + { + "name": "Gp0115677_Functional annotation GFF file", + "description": "Functional annotation GFF file for Gp0115677", + "data_object_type": "Functional Annotation GFF", + "url": "https://data.microbiomedata.org/data/nmdc:mga0zb0766/annotation/nmdc_mga0zb0766_functional_annotation.gff", + "md5_checksum": "075c3477b8874aa8d6c4dbc1360a2b38", + "id": "nmdc:075c3477b8874aa8d6c4dbc1360a2b38", + "file_size_bytes": 167984752 + }, + { + "name": "Gp0115677_KO TSV file", + "description": "KO TSV file for Gp0115677", + "data_object_type": "Annotation KEGG Orthology", + "url": "https://data.microbiomedata.org/data/nmdc:mga0zb0766/annotation/nmdc_mga0zb0766_ko.tsv", + "md5_checksum": "9a338a51c6ca2ec4e0da4e15903be407", + "id": "nmdc:9a338a51c6ca2ec4e0da4e15903be407", + "file_size_bytes": 19341535 + }, + { + "name": "Gp0115677_EC TSV file", + "description": "EC TSV file for Gp0115677", + "data_object_type": "Annotation Enzyme Commission", + "url": "https://data.microbiomedata.org/data/nmdc:mga0zb0766/annotation/nmdc_mga0zb0766_ec.tsv", + "md5_checksum": "0f9e627ace8d9b8420e957bcd033244a", + "id": "nmdc:0f9e627ace8d9b8420e957bcd033244a", + "file_size_bytes": 12533246 + }, + { + "name": "Gp0115677_COG GFF file", + "description": "COG GFF file for Gp0115677", + "url": "https://data.microbiomedata.org/data/nmdc:mga0zb0766/annotation/nmdc_mga0zb0766_cog.gff", + "md5_checksum": "144a997b22098f5fe748d2fa069cdc71", + "id": "nmdc:144a997b22098f5fe748d2fa069cdc71", + "file_size_bytes": 85841510 + }, + { + "name": "Gp0115677_PFAM GFF file", + "description": "PFAM GFF file for Gp0115677", + "url": "https://data.microbiomedata.org/data/nmdc:mga0zb0766/annotation/nmdc_mga0zb0766_pfam.gff", + "md5_checksum": "82dc44c196f4b6b5552e8360f21f93a0", + "id": "nmdc:82dc44c196f4b6b5552e8360f21f93a0", + "file_size_bytes": 64139943 + }, + { + "name": "Gp0115677_TigrFam GFF file", + "description": "TigrFam GFF file for Gp0115677", + "url": "https://data.microbiomedata.org/data/nmdc:mga0zb0766/annotation/nmdc_mga0zb0766_tigrfam.gff", + "md5_checksum": "9238a5420065e1da9da31c270c90268a", + "id": "nmdc:9238a5420065e1da9da31c270c90268a", + "file_size_bytes": 7585101 + }, + { + "name": "Gp0115677_SMART GFF file", + "description": "SMART GFF file for Gp0115677", + "url": "https://data.microbiomedata.org/data/nmdc:mga0zb0766/annotation/nmdc_mga0zb0766_smart.gff", + "md5_checksum": "ce31f29ff8fed6d0a973d61157af7220", + "id": "nmdc:ce31f29ff8fed6d0a973d61157af7220", + "file_size_bytes": 18353478 + }, + { + "name": "Gp0115677_SuperFam GFF file", + "description": "SuperFam GFF file for Gp0115677", + "url": "https://data.microbiomedata.org/data/nmdc:mga0zb0766/annotation/nmdc_mga0zb0766_supfam.gff", + "md5_checksum": "016cbd549e03d896ed746ab91771b21a", + "id": "nmdc:016cbd549e03d896ed746ab91771b21a", + "file_size_bytes": 107179327 + }, + { + "name": "Gp0115677_Cath FunFam GFF file", + "description": "Cath FunFam GFF file for Gp0115677", + "url": "https://data.microbiomedata.org/data/nmdc:mga0zb0766/annotation/nmdc_mga0zb0766_cath_funfam.gff", + "md5_checksum": "7ef0abcd7fba705f6e9e26dcb8b1da8d", + "id": "nmdc:7ef0abcd7fba705f6e9e26dcb8b1da8d", + "file_size_bytes": 85056001 + }, + { + "name": "Gp0115677_KO_EC GFF file", + "description": "KO_EC GFF file for Gp0115677", + "url": "https://data.microbiomedata.org/data/nmdc:mga0zb0766/annotation/nmdc_mga0zb0766_ko_ec.gff", + "md5_checksum": "c935ce264779684a01c9a7777e506d02", + "id": "nmdc:c935ce264779684a01c9a7777e506d02", + "file_size_bytes": 61547317 + }, + { + "name": "gold:Gp0452679_metabat2 bin checkm quality assessment result", + "description": "metabat2 bin checkm quality assessment result for gold:Gp0452679", + "data_object_type": "CheckM Statistics", + "url": "https://data.microbiomedata.org/data/nmdc:mga0fsjy84/MAGs/nmdc_mga0fsjy84_checkm_qa.out", + "md5_checksum": "d41d8cd98f00b204e9800998ecf8427e", + "id": "nmdc:d41d8cd98f00b204e9800998ecf8427e", + "file_size_bytes": 0 + }, + { + "name": "Gp0115677_tooShort (< 3kb) filtered contigs fasta file by metaBat2", + "description": "tooShort (< 3kb) filtered contigs fasta file by metaBat2 for Gp0115677", + "url": "https://data.microbiomedata.org/data/nmdc:mga0zb0766/MAGs/nmdc_mga0zb0766_bins.tooShort.fa", + "md5_checksum": "603009bd6294d2318d929a57b5d3e5d3", + "id": "nmdc:603009bd6294d2318d929a57b5d3e5d3", + "file_size_bytes": 215021876 + }, + { + "name": "Gp0115677_unbinned fasta file from metabat2", + "description": "unbinned fasta file from metabat2 for Gp0115677", + "url": "https://data.microbiomedata.org/data/nmdc:mga0zb0766/MAGs/nmdc_mga0zb0766_bins.unbinned.fa", + "md5_checksum": "c5334a4e305f78c294c304c3c0526826", + "id": "nmdc:c5334a4e305f78c294c304c3c0526826", + "file_size_bytes": 26658018 + }, + { + "name": "Gp0115677_metabat2 bin checkm quality assessment result", + "description": "metabat2 bin checkm quality assessment result for Gp0115677", + "data_object_type": "CheckM Statistics", + "url": "https://data.microbiomedata.org/data/nmdc:mga0zb0766/MAGs/nmdc_mga0zb0766_checkm_qa.out", + "md5_checksum": "6adacc1ba06e5e451f3636c394c71ae8", + "id": "nmdc:6adacc1ba06e5e451f3636c394c71ae8", + "file_size_bytes": 1859 + }, + { + "name": "Gp0115677_high-quality and medium-quality bins", + "description": "high-quality and medium-quality bins for Gp0115677", + "data_object_type": "Metagenome Bins", + "url": "https://data.microbiomedata.org/data/nmdc:mga0zb0766/MAGs/nmdc_mga0zb0766_hqmq_bin.zip", + "md5_checksum": "77d4e2a7f358b9ac1d53b69d7e8c45e1", + "id": "nmdc:77d4e2a7f358b9ac1d53b69d7e8c45e1", + "file_size_bytes": 2309404 + }, + { + "name": "Gp0115677_metabat2 bins", + "description": "metabat2 bins for Gp0115677", + "url": "https://data.microbiomedata.org/data/nmdc:mga0zb0766/MAGs/nmdc_mga0zb0766_metabat_bin.zip", + "md5_checksum": "42c3fb9a3906f6b413f99e3276bb7550", + "id": "nmdc:42c3fb9a3906f6b413f99e3276bb7550", + "file_size_bytes": 450699 + }, + { + "_id": { + "$oid": "649b003d1ae706d7b5b14da3" + }, + "description": "Assembled contigs fasta for gold:Gp0115677", + "url": "https://data.microbiomedata.org/data/1781_86092/assembly/assembly_contigs.fna", + "file_size_bytes": 248003503, + "type": "nmdc:DataObject", + "id": "nmdc:9ca27b985234aaed07e3f6659e0416d0", + "name": "assembly_contigs.fna", + "data_object_type": "Assembly Contigs" + }, + { + "_id": { + "$oid": "649b003d1ae706d7b5b14da6" + }, + "description": "Metagenome Contig Coverage Stats for gold:Gp0115677", + "url": "https://data.microbiomedata.org/data/1781_86092/assembly/mapping_stats.txt", + "file_size_bytes": 40972895, + "type": "nmdc:DataObject", + "id": "nmdc:26847612e684ef73baf6d1daf75ba042", + "name": "mapping_stats.txt", + "data_object_type": "Assembly Coverage Stats" + }, + { + "_id": { + "$oid": "649b003d1ae706d7b5b14db1" + }, + "description": "Assembled scaffold fasta for gold:Gp0115677", + "url": "https://data.microbiomedata.org/data/1781_86092/assembly/assembly_scaffolds.fna", + "file_size_bytes": 246291939, + "type": "nmdc:DataObject", + "id": "nmdc:fcac84657291d9e28e15e83d656ce7fd", + "name": "assembly_scaffolds.fna", + "data_object_type": "Assembly Scaffolds" + }, + { + "_id": { + "$oid": "649b003d1ae706d7b5b14db2" + }, + "description": "Metagenome Alignment BAM file for gold:Gp0115677", + "url": "https://data.microbiomedata.org/data/1781_86092/assembly/pairedMapped_sorted.bam", + "file_size_bytes": 5769674213, + "type": "nmdc:DataObject", + "id": "nmdc:32366af15429dcf03ef716a44fed367e", + "name": "pairedMapped_sorted.bam", + "data_object_type": "Assembly Coverage BAM" + }, + { + "_id": { + "$oid": "649b003d1ae706d7b5b14db4" + }, + "description": "Assembled AGP file for gold:Gp0115677", + "url": "https://data.microbiomedata.org/data/1781_86092/assembly/assembly.agp", + "file_size_bytes": 35864331, + "type": "nmdc:DataObject", + "id": "nmdc:4027ab07eb8931ae9c5a17b480d238b3", + "name": "assembly.agp", + "data_object_type": "Assembly AGP" + }, + { + "_id": { + "$oid": "649b003d1ae706d7b5b159f6" + }, + "id": "nmdc:9921b494b07bde6a76e1c3e9e4da11ce", + "name": "1781_86092.krona.html", + "description": "Gold:Gp0115677 KRONA plot HTML file", + "url": "https://data.microbiomedata.org/1781_86092/ReadbasedAnalysis/centrifuge/1781_86092.krona.html", + "file_size_bytes": 3385, + "data_object_type": "Centrifuge Krona Plot", + "type": "nmdc:DataObject" + }, + { + "_id": { + "$oid": "649b003d1ae706d7b5b159fe" + }, + "id": "nmdc:a99cc1fb22427c72f4d6b67cec82948e", + "name": "1781_86092.json", + "description": "Gold:Gp0115677 ReadbasedAnalysis result JSON file", + "url": "https://data.microbiomedata.org/1781_86092/ReadbasedAnalysis/1781_86092.json", + "file_size_bytes": 3385, + "type": "nmdc:DataObject" + }, + { + "_id": { + "$oid": "649b003f1ae706d7b5b162ce" + }, + "id": "nmdc:7085e0d349fac196f897eadc405b081a", + "name": "bins.tooShort.fa", + "description": "tooShort (< 3kb) filtered contigs fasta file by metaBat2 for gold:Gp0115677", + "file_size_bytes": 208193586, + "url": "https://data.microbiomedata.org/data/1781_86092/img_MAGs/bins.tooShort.fa", + "type": "nmdc:DataObject" + }, + { + "_id": { + "$oid": "649b003f1ae706d7b5b162cf" + }, + "id": "nmdc:20faf8ed13a8cde73b0522cb954acf0e", + "name": "bins.unbinned.fa", + "description": "unbinned fasta file from metabat2 for gold:Gp0115677", + "file_size_bytes": 28797768, + "url": "https://data.microbiomedata.org/data/1781_86092/img_MAGs/bins.unbinned.fa", + "type": "nmdc:DataObject" + }, + { + "_id": { + "$oid": "649b003f1ae706d7b5b162d1" + }, + "id": "nmdc:963a658cc3e72ac4966f260064cf3c4e", + "name": "gtdbtk.bac120.summary.tsv", + "description": "gtdbtk bacterial assignment result summary table for gold:Gp0115677", + "file_size_bytes": 832, + "url": "https://data.microbiomedata.org/data/1781_86092/img_MAGs/gtdbtk_output/classify/gtdbtk.bac120.summary.tsv", + "type": "nmdc:DataObject" + }, + { + "_id": { + "$oid": "649b003f1ae706d7b5b162d3" + }, + "id": "nmdc:d5296a7efb592e30a0f2439b3a6aad40", + "name": "gold:Gp0115677.bins.10.fa", + "description": "metabat2 binned contig file for gold:Gp0115677", + "file_size_bytes": 756006, + "url": "https://data.microbiomedata.org/data/1781_86092/img_MAGs/metabat-bins/bins.10.fa", + "type": "nmdc:DataObject", + "data_object_type": "Metagenome Bins" + }, + { + "_id": { + "$oid": "649b003f1ae706d7b5b162d4" + }, + "id": "nmdc:e371db754cc99ab772212052997f6e12", + "name": "gold:Gp0115677.bins.3.fa", + "description": "metabat2 binned contig file for gold:Gp0115677", + "file_size_bytes": 557597, + "url": "https://data.microbiomedata.org/data/1781_86092/img_MAGs/metabat-bins/bins.3.fa", + "type": "nmdc:DataObject", + "data_object_type": "Metagenome Bins" + }, + { + "_id": { + "$oid": "649b003f1ae706d7b5b162d5" + }, + "id": "nmdc:82753f458e12c2c99121f4aec2f62b03", + "name": "gold:Gp0115677.bins.9.fa", + "description": "metabat2 binned contig file for gold:Gp0115677", + "file_size_bytes": 314985, + "url": "https://data.microbiomedata.org/data/1781_86092/img_MAGs/metabat-bins/bins.9.fa", + "type": "nmdc:DataObject", + "data_object_type": "Metagenome Bins" + }, + { + "_id": { + "$oid": "649b003f1ae706d7b5b162d6" + }, + "id": "nmdc:e795c55b78d5504a3c9da194492bd8f4", + "name": "gold:Gp0115677.bins.2.fa", + "description": "metabat2 binned contig file for gold:Gp0115677", + "file_size_bytes": 1087308, + "url": "https://data.microbiomedata.org/data/1781_86092/img_MAGs/metabat-bins/bins.2.fa", + "type": "nmdc:DataObject", + "data_object_type": "Metagenome Bins" + }, + { + "_id": { + "$oid": "649b003f1ae706d7b5b162d7" + }, + "id": "nmdc:5f7c596adbc713a159c13ac5e8d88e2f", + "name": "gold:Gp0115677.bins.5.fa", + "description": "metabat2 binned contig file for gold:Gp0115677", + "file_size_bytes": 621780, + "url": "https://data.microbiomedata.org/data/1781_86092/img_MAGs/metabat-bins/bins.5.fa", + "type": "nmdc:DataObject", + "data_object_type": "Metagenome Bins" + }, + { + "_id": { + "$oid": "649b003f1ae706d7b5b162d8" + }, + "id": "nmdc:56ebd3517fb82d228d679991b7b9cfb8", + "name": "checkm_qa.out", + "description": "metabat2 bin checkm quality assessment result for gold:Gp0115677", + "file_size_bytes": 2535, + "url": "https://data.microbiomedata.org/data/1781_86092/img_MAGs/checkm_qa.out", + "type": "nmdc:DataObject", + "data_object_type": "CheckM Statistics" + }, + { + "_id": { + "$oid": "649b003f1ae706d7b5b162d9" + }, + "id": "nmdc:f81840dde4202f7feef24e36df596446", + "name": "gold:Gp0115677.bins.11.fa", + "description": "metabat2 binned contig file for gold:Gp0115677", + "file_size_bytes": 291445, + "url": "https://data.microbiomedata.org/data/1781_86092/img_MAGs/metabat-bins/bins.11.fa", + "type": "nmdc:DataObject", + "data_object_type": "Metagenome Bins" + }, + { + "_id": { + "$oid": "649b003f1ae706d7b5b162da" + }, + "id": "nmdc:edae13d8586f04ad81f447fa27adc7a9", + "name": "gold:Gp0115677.bins.4.fa", + "description": "metabat2 binned contig file for gold:Gp0115677", + "file_size_bytes": 1196625, + "url": "https://data.microbiomedata.org/data/1781_86092/img_MAGs/metabat-bins/bins.4.fa", + "type": "nmdc:DataObject", + "data_object_type": "Metagenome Bins" + }, + { + "_id": { + "$oid": "649b003f1ae706d7b5b162db" + }, + "id": "nmdc:ff320d478c10b7118c4da20ce49793c9", + "name": "gold:Gp0115677.bins.8.fa", + "description": "metabat2 binned contig file for gold:Gp0115677", + "file_size_bytes": 225889, + "url": "https://data.microbiomedata.org/data/1781_86092/img_MAGs/metabat-bins/bins.8.fa", + "type": "nmdc:DataObject", + "data_object_type": "Metagenome Bins" + }, + { + "_id": { + "$oid": "649b003f1ae706d7b5b162dc" + }, + "id": "nmdc:ca9221f7ba635008e04c92c7111633e9", + "name": "gold:Gp0115677.bins.7.fa", + "description": "metabat2 binned contig file for gold:Gp0115677", + "file_size_bytes": 642978, + "url": "https://data.microbiomedata.org/data/1781_86092/img_MAGs/metabat-bins/bins.7.fa", + "type": "nmdc:DataObject", + "data_object_type": "Metagenome Bins" + }, + { + "_id": { + "$oid": "649b003f1ae706d7b5b162dd" + }, + "id": "nmdc:2b9b32a7151436cea05cbddc6ec0dded", + "name": "gold:Gp0115677.bins.1.fa", + "description": "metabat2 binned contig file for gold:Gp0115677", + "file_size_bytes": 723505, + "url": "https://data.microbiomedata.org/data/1781_86092/img_MAGs/metabat-bins/bins.1.fa", + "type": "nmdc:DataObject", + "data_object_type": "Metagenome Bins" + }, + { + "_id": { + "$oid": "649b003f1ae706d7b5b162e0" + }, + "id": "nmdc:ee41bb68b0e1e5c7d4724b00c79e5649", + "name": "gold:Gp0115677.bins.6.fa", + "description": "metabat2 binned contig file for gold:Gp0115677", + "file_size_bytes": 323769, + "url": "https://data.microbiomedata.org/data/1781_86092/img_MAGs/metabat-bins/bins.6.fa", + "type": "nmdc:DataObject", + "data_object_type": "Metagenome Bins" + }, + { + "_id": { + "$oid": "649b00401ae706d7b5b16d6c" + }, + "description": "KO TSV File for gold:Gp0115677", + "url": "https://data.microbiomedata.org/1781_86092/img_annotation/Ga0482250_ko.tsv", + "md5_checksum": "34d53203f08e6c25c8f85f6e04d6df24", + "file_size_bytes": 3385, + "id": "nmdc:34d53203f08e6c25c8f85f6e04d6df24", + "name": "gold:Gp0115677_KO TSV File", + "data_object_type": "Annotation KEGG Orthology", + "type": "nmdc:DataObject" + }, + { + "_id": { + "$oid": "649b00401ae706d7b5b16d6f" + }, + "description": "EC TSV File for gold:Gp0115677", + "url": "https://data.microbiomedata.org/1781_86092/img_annotation/Ga0482250_ec.tsv", + "md5_checksum": "8a39e09943350e563b00e23a146c3ec1", + "file_size_bytes": 3385, + "id": "nmdc:8a39e09943350e563b00e23a146c3ec1", + "name": "gold:Gp0115677_EC TSV File", + "data_object_type": "Annotation Enzyme Commission", + "type": "nmdc:DataObject" + }, + { + "_id": { + "$oid": "649b00401ae706d7b5b16d70" + }, + "description": "Functional annotation GFF file for gold:Gp0115677", + "url": "https://data.microbiomedata.org/1781_86092/img_annotation/Ga0482250_functional_annotation.gff", + "md5_checksum": "e7df895e1a7776ba16b6d77fdc9b077d", + "file_size_bytes": 3385, + "id": "nmdc:e7df895e1a7776ba16b6d77fdc9b077d", + "name": "gold:Gp0115677_Functional annotation GFF file", + "data_object_type": "Functional Annotation GFF", + "type": "nmdc:DataObject" + }, + { + "_id": { + "$oid": "649b00401ae706d7b5b16d71" + }, + "description": "Protein FAA for gold:Gp0115677", + "url": "https://data.microbiomedata.org/1781_86092/img_annotation/Ga0482250_proteins.faa", + "md5_checksum": "c0365d39cb481d6e0f729b587dac10c8", + "file_size_bytes": 3385, + "id": "nmdc:c0365d39cb481d6e0f729b587dac10c8", + "name": "gold:Gp0115677_Protein FAA", + "data_object_type": "Annotation Amino Acid FASTA", + "type": "nmdc:DataObject" + }, + { + "_id": { + "$oid": "649b00401ae706d7b5b16d77" + }, + "description": "Structural annotation GFF file for gold:Gp0115677", + "url": "https://data.microbiomedata.org/1781_86092/img_annotation/Ga0482250_structural_annotation.gff", + "md5_checksum": "bfbd1bd1ad70307dd01b699ecc4ffb2a", + "file_size_bytes": 3385, + "id": "nmdc:bfbd1bd1ad70307dd01b699ecc4ffb2a", + "name": "gold:Gp0115677_Structural annotation GFF file", + "data_object_type": "Structural Annotation GFF", + "type": "nmdc:DataObject" + } + ], + "mags_activity_set": [ + { + "_id": { + "$oid": "649b0052ec087f6bbab34724" + }, + "has_input": [ + "nmdc:3d9e14d6f7a854042a7d71def080409b", + "nmdc:f4a1cf24281f14a666a1bfc9afc0aab5", + "nmdc:075c3477b8874aa8d6c4dbc1360a2b38" + ], + "too_short_contig_num": 532333, + "part_of": [ + "nmdc:mga0zb0766" + ], + "binned_contig_num": 969, + "git_url": "https://github.com/microbiomedata/metaG/releases/tag/0.1", + "has_output": [ + "nmdc:d41d8cd98f00b204e9800998ecf8427e", + "nmdc:603009bd6294d2318d929a57b5d3e5d3", + "nmdc:c5334a4e305f78c294c304c3c0526826", + "nmdc:6adacc1ba06e5e451f3636c394c71ae8", + "nmdc:77d4e2a7f358b9ac1d53b69d7e8c45e1", + "nmdc:42c3fb9a3906f6b413f99e3276bb7550" + ], + "was_informed_by": "gold:Gp0115677", + "input_contig_num": 548756, + "id": "nmdc:4fa91d90b1bd5f821a7f09edc8426939", + "execution_resource": "NERSC-Cori", + "name": "MAGs Analysis Activity for nmdc:mga0zb0766", + "mags_list": [ + { + "number_of_contig": 68, + "completeness": 3.17, + "bin_name": "bins.1", + "gene_count": 329, + "bin_quality": "LQ", + "gtdbtk_species": "", + "gtdbtk_order": "", + "num_16s": 0, + "gtdbtk_family": "", + "gtdbtk_domain": "", + "contamination": 0.0, + "gtdbtk_class": "", + "gtdbtk_phylum": "", + "num_5s": 0, + "num_23s": 0, + "gtdbtk_genus": "", + "num_t_rna": 2 + }, + { + "number_of_contig": 282, + "completeness": 59.56, + "bin_name": "bins.2", + "gene_count": 1735, + "bin_quality": "MQ", + "gtdbtk_species": "UBA5335 sp002862435", + "gtdbtk_order": "UBA5335", + "num_16s": 0, + "gtdbtk_family": "UBA5335", + "gtdbtk_domain": "Bacteria", + "contamination": 0.0, + "gtdbtk_class": "Gammaproteobacteria", + "gtdbtk_phylum": "Proteobacteria", + "num_5s": 0, + "num_23s": 0, + "gtdbtk_genus": "UBA5335", + "num_t_rna": 26 + }, + { + "number_of_contig": 3, + "completeness": 54.6, + "bin_name": "bins.3", + "gene_count": 751, + "bin_quality": "MQ", + "gtdbtk_species": "", + "gtdbtk_order": "UBA9983_A", + "num_16s": 1, + "gtdbtk_family": "UBA2163", + "gtdbtk_domain": "Bacteria", + "contamination": 1.72, + "gtdbtk_class": "Paceibacteria", + "gtdbtk_phylum": "Patescibacteria", + "num_5s": 1, + "num_23s": 1, + "gtdbtk_genus": "1-14-0-10-47-16", + "num_t_rna": 22 + }, + { + "number_of_contig": 90, + "completeness": 98.7, + "bin_name": "bins.4", + "gene_count": 3042, + "bin_quality": "MQ", + "gtdbtk_species": "", + "gtdbtk_order": "UBA11222", + "num_16s": 0, + "gtdbtk_family": "UBA11222", + "gtdbtk_domain": "Bacteria", + "contamination": 0.0, + "gtdbtk_class": "Alphaproteobacteria", + "gtdbtk_phylum": "Proteobacteria", + "num_5s": 2, + "num_23s": 2, + "gtdbtk_genus": "UBA11222", + "num_t_rna": 46 + }, + { + "number_of_contig": 325, + "completeness": 73.34, + "bin_name": "bins.5", + "gene_count": 2576, + "bin_quality": "MQ", + "gtdbtk_species": "", + "gtdbtk_order": "UBA11222", + "num_16s": 1, + "gtdbtk_family": "UBA11222", + "gtdbtk_domain": "Bacteria", + "contamination": 0.91, + "gtdbtk_class": "Alphaproteobacteria", + "gtdbtk_phylum": "Proteobacteria", + "num_5s": 0, + "num_23s": 0, + "gtdbtk_genus": "UBA11222", + "num_t_rna": 35 + }, + { + "number_of_contig": 199, + "completeness": 49.14, + "bin_name": "bins.6", + "gene_count": 1046, + "bin_quality": "LQ", + "gtdbtk_species": "", + "gtdbtk_order": "", + "num_16s": 0, + "gtdbtk_family": "", + "gtdbtk_domain": "", + "contamination": 0.0, + "gtdbtk_class": "", + "gtdbtk_phylum": "", + "num_5s": 2, + "num_23s": 2, + "gtdbtk_genus": "", + "num_t_rna": 21 + }, + { + "number_of_contig": 2, + "completeness": 24.32, + "bin_name": "bins.7", + "gene_count": 329, + "bin_quality": "LQ", + "gtdbtk_species": "", + "gtdbtk_order": "", + "num_16s": 0, + "gtdbtk_family": "", + "gtdbtk_domain": "", + "contamination": 0.0, + "gtdbtk_class": "", + "gtdbtk_phylum": "", + "num_5s": 0, + "num_23s": 0, + "gtdbtk_genus": "", + "num_t_rna": 16 + } + ], + "unbinned_contig_num": 15454, + "started_at_time": "2021-10-11T02:24:49Z", + "type": "nmdc:MAGsAnalysisActivity", + "ended_at_time": "2021-10-11T06:26:42+00:00" + } + ], + "metagenome_annotation_activity_set": [ + { + "_id": { + "$oid": "649b005bbf2caae0415ef9c1" + }, + "has_input": [ + "nmdc:3d9e14d6f7a854042a7d71def080409b" + ], + "part_of": [ + "nmdc:mga0zb0766" + ], + "git_url": "https://github.com/microbiomedata/metaG/releases/tag/0.1", + "has_output": [ + "nmdc:4f9d82516561ee307b1ab4841255aff0", + "nmdc:a658e9045fde900cdc78d0578446b960", + "nmdc:075c3477b8874aa8d6c4dbc1360a2b38", + "nmdc:9a338a51c6ca2ec4e0da4e15903be407", + "nmdc:0f9e627ace8d9b8420e957bcd033244a", + "nmdc:144a997b22098f5fe748d2fa069cdc71", + "nmdc:82dc44c196f4b6b5552e8360f21f93a0", + "nmdc:9238a5420065e1da9da31c270c90268a", + "nmdc:ce31f29ff8fed6d0a973d61157af7220", + "nmdc:016cbd549e03d896ed746ab91771b21a", + "nmdc:7ef0abcd7fba705f6e9e26dcb8b1da8d", + "nmdc:c935ce264779684a01c9a7777e506d02" + ], + "was_informed_by": "gold:Gp0115677", + "id": "nmdc:4fa91d90b1bd5f821a7f09edc8426939", + "execution_resource": "NERSC-Cori", + "name": "Annotation Activity for nmdc:mga0zb0766", + "started_at_time": "2021-10-11T02:24:49Z", + "type": "nmdc:MetagenomeAnnotationActivity", + "ended_at_time": "2021-10-11T06:26:42+00:00" + } + ], + "metagenome_assembly_set": [ + { + "_id": { + "$oid": "649b005f2ca5ee4adb139fad" + }, + "has_input": [ + "nmdc:63c857b3011dec61a08044d518291f23" + ], + "part_of": [ + "nmdc:mga0zb0766" + ], + "ctg_logsum": 407938, + "scaf_logsum": 442802, + "gap_pct": 0.02562, + "git_url": "https://github.com/microbiomedata/metaG/releases/tag/0.1", + "has_output": [ + "nmdc:3d9e14d6f7a854042a7d71def080409b", + "nmdc:26d0d64ca7c850f0e04a4c33690bd178", + "nmdc:8f8a0622cfe39054bd20f11116c78402", + "nmdc:623aa370c44897cf30844647c2f5bd94", + "nmdc:f4a1cf24281f14a666a1bfc9afc0aab5" + ], + "asm_score": 13.853, + "was_informed_by": "gold:Gp0115677", + "ctg_powsum": 50872, + "scaf_max": 582605, + "id": "nmdc:4fa91d90b1bd5f821a7f09edc8426939", + "scaf_powsum": 55815, + "execution_resource": "NERSC-Cori", + "contigs": 548764, + "name": "Assembly Activity for nmdc:mga0zb0766", + "ctg_max": 464697, + "gc_std": 0.11035, + "contig_bp": 229799767, + "gc_avg": 0.55184, + "started_at_time": "2021-10-11T02:24:49Z", + "scaf_bp": 229858665, + "type": "nmdc:MetagenomeAssembly", + "scaffolds": 543003, + "ended_at_time": "2021-10-11T06:26:42+00:00", + "ctg_l50": 375, + "ctg_l90": 283, + "ctg_n50": 171281, + "ctg_n90": 471697, + "scaf_l50": 378, + "scaf_l90": 283, + "scaf_n50": 164840, + "scaf_n90": 466121, + "scaf_l_gt50k": 2790937, + "scaf_n_gt50k": 23, + "scaf_pct_gt50k": 1.2141969 + } + ], + "omics_processing_set": [ + { + "_id": { + "$oid": "649b009773e8249959349b4f" + }, + "id": "nmdc:omprc-11-qngh7497", + "name": "Sand microcosm microbial communities from a hyporheic zone in Columbia River, Washington, USA - GW-RW T4_25-Nov-14", + "description": "Sterilized sand packs were incubated back in the ground and collected at time point T4.", + "has_input": [ + "nmdc:bsm-11-8362vs44" + ], + "has_output": [ + "jgi:55a9caff0d87852b2150891e" + ], + "part_of": [ + "nmdc:sty-11-aygzgv51" + ], + "add_date": "2015-05-28", + "mod_date": "2021-06-15", + "ncbi_project_name": "Sand microcosm microbial communities from a hyporheic zone in Columbia River, Washington, USA - GW-RW T4_25-Nov-14", + "omics_type": { + "has_raw_value": "Metagenome" + }, + "principal_investigator": { + "has_raw_value": "James Stegen" + }, + "processing_institution": "JGI", + "type": "nmdc:OmicsProcessing", + "gold_sequencing_project_identifiers": [ + "gold:Gp0115677" + ] + } + ], + "read_qc_analysis_activity_set": [ + { + "_id": { + "$oid": "649b009d6bdd4fd20273c87a" + }, + "has_input": [ + "nmdc:80ca2cf2e3edcac29eb62b43f62e25c3" + ], + "part_of": [ + "nmdc:mga0zb0766" + ], + "git_url": "https://github.com/microbiomedata/metaG/releases/tag/0.1", + "has_output": [ + "nmdc:63c857b3011dec61a08044d518291f23", + "nmdc:2a79d7978caecf9b08fb2029fa42c9b3" + ], + "was_informed_by": "gold:Gp0115677", + "input_read_count": 65434428, + "output_read_bases": 9483843059, + "id": "nmdc:4fa91d90b1bd5f821a7f09edc8426939", + "execution_resource": "NERSC-Cori", + "input_read_bases": 9880598628, + "name": "Read QC Activity for nmdc:mga0zb0766", + "output_read_count": 64887080, + "started_at_time": "2021-10-11T02:24:49Z", + "type": "nmdc:ReadQCAnalysisActivity", + "ended_at_time": "2021-10-11T06:26:42+00:00" + } + ], + "read_based_taxonomy_analysis_activity_set": [ + { + "_id": { + "$oid": "649b009bff710ae353f8cf3c" + }, + "has_input": [ + "nmdc:63c857b3011dec61a08044d518291f23" + ], + "git_url": "https://github.com/microbiomedata/metaG/releases/tag/0.1", + "has_output": [ + "nmdc:ba32f20b0cc5143783e00c5d1ba15223", + "nmdc:c1730daf5e6017219fd9fc079e42c132", + "nmdc:55b6c047c48f5bf9fb156f139992e4d8", + "nmdc:1c2e2dff881b35a25b4622bbc66c3140", + "nmdc:50f771c7bc17a0b184c2a10a24013f08", + "nmdc:229017cdb1832bb718d22dc27db44125", + "nmdc:49d5d11132bd5a02c3dd077d42a6a16b", + "nmdc:bdd701b44e67929ec8bbe279697da937", + "nmdc:d35583a5ed45df5a58bf084fc67bf988" + ], + "was_informed_by": "gold:Gp0115677", + "id": "nmdc:4fa91d90b1bd5f821a7f09edc8426939", + "execution_resource": "NERSC-Cori", + "name": "ReadBased Analysis Activity for nmdc:mga0zb0766", + "started_at_time": "2021-10-11T02:24:49Z", + "type": "nmdc:ReadbasedAnalysis", + "ended_at_time": "2021-10-11T06:26:42+00:00" + } + ] + }, + { + "data_object_set": [ + { + "description": "Raw sequencer read data", + "file_size_bytes": 1777604881, + "type": "nmdc:DataObject", + "id": "jgi:55d817f30d8785342fcf826d", + "name": "9387.2.132031.GGCTAC.fastq.gz" + }, + { + "name": "Gp0115675_Filtered Reads", + "description": "Filtered Reads for Gp0115675", + "data_object_type": "Filtered Sequencing Reads", + "url": "https://data.microbiomedata.org/data/nmdc:mga0vf2h47/qa/nmdc_mga0vf2h47_filtered.fastq.gz", + "md5_checksum": "54e3a71218d04224719e0dc8a7fdf9c7", + "id": "nmdc:54e3a71218d04224719e0dc8a7fdf9c7", + "file_size_bytes": 1533239347 + }, + { + "name": "Gp0115675_Filtered Stats", + "description": "Filtered Stats for Gp0115675", + "data_object_type": "QC Statistics", + "url": "https://data.microbiomedata.org/data/nmdc:mga0vf2h47/qa/nmdc_mga0vf2h47_filterStats.txt", + "md5_checksum": "2507e3f107100ce0c72c57191d450818", + "id": "nmdc:2507e3f107100ce0c72c57191d450818", + "file_size_bytes": 287 + }, + { + "name": "Gp0115675_Gottcha2 TSV report", + "description": "Gottcha2 TSV report for Gp0115675", + "url": "https://data.microbiomedata.org/data/nmdc:mga0vf2h47/ReadbasedAnalysis/nmdc_mga0vf2h47_gottcha2_report.tsv", + "md5_checksum": "60d673988c4f4447feb5985e8501e914", + "id": "nmdc:60d673988c4f4447feb5985e8501e914", + "file_size_bytes": 8921 + }, + { + "name": "Gp0115675_Gottcha2 full TSV report", + "description": "Gottcha2 full TSV report for Gp0115675", + "url": "https://data.microbiomedata.org/data/nmdc:mga0vf2h47/ReadbasedAnalysis/nmdc_mga0vf2h47_gottcha2_report_full.tsv", + "md5_checksum": "a8f93ed13033eb949109b4e83980a893", + "id": "nmdc:a8f93ed13033eb949109b4e83980a893", + "file_size_bytes": 871109 + }, + { + "name": "Gp0115675_Gottcha2 Krona HTML report", + "description": "Gottcha2 Krona HTML report for Gp0115675", + "data_object_type": "GOTTCHA2 Krona Plot", + "url": "https://data.microbiomedata.org/data/nmdc:mga0vf2h47/ReadbasedAnalysis/nmdc_mga0vf2h47_gottcha2_krona.html", + "md5_checksum": "31dd6eb616f1e9815778453ab1601195", + "id": "nmdc:31dd6eb616f1e9815778453ab1601195", + "file_size_bytes": 252578 + }, + { + "name": "Gp0115675_Centrifuge classification TSV report", + "description": "Centrifuge classification TSV report for Gp0115675", + "data_object_type": "Centrifuge Taxonomic Classification", + "url": "https://data.microbiomedata.org/data/nmdc:mga0vf2h47/ReadbasedAnalysis/nmdc_mga0vf2h47_centrifuge_classification.tsv", + "md5_checksum": "6d7a930d79f220b06cde8fbf8339e744", + "id": "nmdc:6d7a930d79f220b06cde8fbf8339e744", + "file_size_bytes": 1218767711 + }, + { + "name": "Gp0115675_Centrifuge TSV report", + "description": "Centrifuge TSV report for Gp0115675", + "data_object_type": "Centrifuge Classification Report", + "url": "https://data.microbiomedata.org/data/nmdc:mga0vf2h47/ReadbasedAnalysis/nmdc_mga0vf2h47_centrifuge_report.tsv", + "md5_checksum": "0aaac507db0e29827e1c87df47324932", + "id": "nmdc:0aaac507db0e29827e1c87df47324932", + "file_size_bytes": 254260 + }, + { + "name": "Gp0115675_Centrifuge Krona HTML report", + "description": "Centrifuge Krona HTML report for Gp0115675", + "data_object_type": "Centrifuge Krona Plot", + "url": "https://data.microbiomedata.org/data/nmdc:mga0vf2h47/ReadbasedAnalysis/nmdc_mga0vf2h47_centrifuge_krona.html", + "md5_checksum": "6aec8677139ed24ef9cfe0c75b30056f", + "id": "nmdc:6aec8677139ed24ef9cfe0c75b30056f", + "file_size_bytes": 2324387 + }, + { + "name": "Gp0115675_Kraken classification TSV report", + "description": "Kraken classification TSV report for Gp0115675", + "data_object_type": "Kraken2 Taxonomic Classification", + "url": "https://data.microbiomedata.org/data/nmdc:mga0vf2h47/ReadbasedAnalysis/nmdc_mga0vf2h47_kraken2_classification.tsv", + "md5_checksum": "d39369f32ada967d7cf52cb503fccf4a", + "id": "nmdc:d39369f32ada967d7cf52cb503fccf4a", + "file_size_bytes": 1001846607 + }, + { + "name": "Gp0115675_Kraken2 TSV report", + "description": "Kraken2 TSV report for Gp0115675", + "data_object_type": "Kraken2 Classification Report", + "url": "https://data.microbiomedata.org/data/nmdc:mga0vf2h47/ReadbasedAnalysis/nmdc_mga0vf2h47_kraken2_report.tsv", + "md5_checksum": "1ec0247d86889fcef13f39a58a92b066", + "id": "nmdc:1ec0247d86889fcef13f39a58a92b066", + "file_size_bytes": 635541 + }, + { + "name": "Gp0115675_Kraken2 Krona HTML report", + "description": "Kraken2 Krona HTML report for Gp0115675", + "data_object_type": "Kraken2 Krona Plot", + "url": "https://data.microbiomedata.org/data/nmdc:mga0vf2h47/ReadbasedAnalysis/nmdc_mga0vf2h47_kraken2_krona.html", + "md5_checksum": "242a1c60f6cb14ba8430375171fda436", + "id": "nmdc:242a1c60f6cb14ba8430375171fda436", + "file_size_bytes": 3968420 + }, + { + "name": "Gp0115675_Assembled contigs fasta", + "description": "Assembled contigs fasta for Gp0115675", + "data_object_type": "Assembly Contigs", + "url": "https://data.microbiomedata.org/data/nmdc:mga0vf2h47/assembly/nmdc_mga0vf2h47_contigs.fna", + "md5_checksum": "dd5cad9348fc41cb18ac989185fed0b5", + "id": "nmdc:dd5cad9348fc41cb18ac989185fed0b5", + "file_size_bytes": 41662357 + }, + { + "name": "Gp0115675_Assembled scaffold fasta", + "description": "Assembled scaffold fasta for Gp0115675", + "data_object_type": "Assembly Scaffolds", + "url": "https://data.microbiomedata.org/data/nmdc:mga0vf2h47/assembly/nmdc_mga0vf2h47_scaffolds.fna", + "md5_checksum": "6d02084941141ac9a1876c621a50aef0", + "id": "nmdc:6d02084941141ac9a1876c621a50aef0", + "file_size_bytes": 41417652 + }, + { + "name": "Gp0115675_Metagenome Contig Coverage Stats", + "description": "Metagenome Contig Coverage Stats for Gp0115675", + "url": "https://data.microbiomedata.org/data/nmdc:mga0vf2h47/assembly/nmdc_mga0vf2h47_covstats.txt", + "md5_checksum": "cc8faed3494579d793c08ede54cb5b3a", + "id": "nmdc:cc8faed3494579d793c08ede54cb5b3a", + "file_size_bytes": 6338871 + }, + { + "name": "Gp0115675_Assembled AGP file", + "description": "Assembled AGP file for Gp0115675", + "data_object_type": "Assembly AGP", + "url": "https://data.microbiomedata.org/data/nmdc:mga0vf2h47/assembly/nmdc_mga0vf2h47_assembly.agp", + "md5_checksum": "8891e46c9766f2b84d45fd6e46078a64", + "id": "nmdc:8891e46c9766f2b84d45fd6e46078a64", + "file_size_bytes": 5901316 + }, + { + "name": "Gp0115675_Metagenome Alignment BAM file", + "description": "Metagenome Alignment BAM file for Gp0115675", + "data_object_type": "Assembly Coverage BAM", + "url": "https://data.microbiomedata.org/data/nmdc:mga0vf2h47/assembly/nmdc_mga0vf2h47_pairedMapped_sorted.bam", + "md5_checksum": "80470769e7531b46c709d12c65487ffe", + "id": "nmdc:80470769e7531b46c709d12c65487ffe", + "file_size_bytes": 1635169657 + }, + { + "name": "Gp0115675_Protein FAA", + "description": "Protein FAA for Gp0115675", + "data_object_type": "Annotation Amino Acid FASTA", + "url": "https://data.microbiomedata.org/data/nmdc:mga0vf2h47/annotation/nmdc_mga0vf2h47_proteins.faa", + "md5_checksum": "93ea50ce57263b498b781240c04dbf46", + "id": "nmdc:93ea50ce57263b498b781240c04dbf46", + "file_size_bytes": 23383485 + }, + { + "name": "Gp0115675_Structural annotation GFF file", + "description": "Structural annotation GFF file for Gp0115675", + "data_object_type": "Structural Annotation GFF", + "url": "https://data.microbiomedata.org/data/nmdc:mga0vf2h47/annotation/nmdc_mga0vf2h47_structural_annotation.gff", + "md5_checksum": "71195b9bc697bf29cd865718a689eb1b", + "id": "nmdc:71195b9bc697bf29cd865718a689eb1b", + "file_size_bytes": 2508 + }, + { + "name": "Gp0115675_Functional annotation GFF file", + "description": "Functional annotation GFF file for Gp0115675", + "data_object_type": "Functional Annotation GFF", + "url": "https://data.microbiomedata.org/data/nmdc:mga0vf2h47/annotation/nmdc_mga0vf2h47_functional_annotation.gff", + "md5_checksum": "d8cccd9c5cd237c238e5ba443c477db5", + "id": "nmdc:d8cccd9c5cd237c238e5ba443c477db5", + "file_size_bytes": 26575202 + }, + { + "name": "Gp0115675_KO TSV file", + "description": "KO TSV file for Gp0115675", + "data_object_type": "Annotation KEGG Orthology", + "url": "https://data.microbiomedata.org/data/nmdc:mga0vf2h47/annotation/nmdc_mga0vf2h47_ko.tsv", + "md5_checksum": "1cb17c4c7681345f53a7f4ef5c319fba", + "id": "nmdc:1cb17c4c7681345f53a7f4ef5c319fba", + "file_size_bytes": 3577030 + }, + { + "name": "Gp0115675_EC TSV file", + "description": "EC TSV file for Gp0115675", + "data_object_type": "Annotation Enzyme Commission", + "url": "https://data.microbiomedata.org/data/nmdc:mga0vf2h47/annotation/nmdc_mga0vf2h47_ec.tsv", + "md5_checksum": "17e386be26f52833c463a89733ef2e34", + "id": "nmdc:17e386be26f52833c463a89733ef2e34", + "file_size_bytes": 2294485 + }, + { + "name": "Gp0115675_COG GFF file", + "description": "COG GFF file for Gp0115675", + "url": "https://data.microbiomedata.org/data/nmdc:mga0vf2h47/annotation/nmdc_mga0vf2h47_cog.gff", + "md5_checksum": "3e9b2fd11f2f5c16f9f25560e3b6fc55", + "id": "nmdc:3e9b2fd11f2f5c16f9f25560e3b6fc55", + "file_size_bytes": 15181628 + }, + { + "name": "Gp0115675_PFAM GFF file", + "description": "PFAM GFF file for Gp0115675", + "url": "https://data.microbiomedata.org/data/nmdc:mga0vf2h47/annotation/nmdc_mga0vf2h47_pfam.gff", + "md5_checksum": "b11e36753299e36fa92670cf75165698", + "id": "nmdc:b11e36753299e36fa92670cf75165698", + "file_size_bytes": 11905020 + }, + { + "name": "Gp0115675_TigrFam GFF file", + "description": "TigrFam GFF file for Gp0115675", + "url": "https://data.microbiomedata.org/data/nmdc:mga0vf2h47/annotation/nmdc_mga0vf2h47_tigrfam.gff", + "md5_checksum": "70ac1de5fbc6cc835d5a0d1855f7a28a", + "id": "nmdc:70ac1de5fbc6cc835d5a0d1855f7a28a", + "file_size_bytes": 1629352 + }, + { + "name": "Gp0115675_SMART GFF file", + "description": "SMART GFF file for Gp0115675", + "url": "https://data.microbiomedata.org/data/nmdc:mga0vf2h47/annotation/nmdc_mga0vf2h47_smart.gff", + "md5_checksum": "b9e3eb74fa7fee0fac886f8a436b9ecf", + "id": "nmdc:b9e3eb74fa7fee0fac886f8a436b9ecf", + "file_size_bytes": 3360419 + }, + { + "name": "Gp0115675_SuperFam GFF file", + "description": "SuperFam GFF file for Gp0115675", + "url": "https://data.microbiomedata.org/data/nmdc:mga0vf2h47/annotation/nmdc_mga0vf2h47_supfam.gff", + "md5_checksum": "faa27c2be6dc56e66f739dbffcbb6bef", + "id": "nmdc:faa27c2be6dc56e66f739dbffcbb6bef", + "file_size_bytes": 19134944 + }, + { + "name": "Gp0115675_Cath FunFam GFF file", + "description": "Cath FunFam GFF file for Gp0115675", + "url": "https://data.microbiomedata.org/data/nmdc:mga0vf2h47/annotation/nmdc_mga0vf2h47_cath_funfam.gff", + "md5_checksum": "b080e9d168c0c1330fda64814afe335b", + "id": "nmdc:b080e9d168c0c1330fda64814afe335b", + "file_size_bytes": 15037016 + }, + { + "name": "Gp0115675_KO_EC GFF file", + "description": "KO_EC GFF file for Gp0115675", + "url": "https://data.microbiomedata.org/data/nmdc:mga0vf2h47/annotation/nmdc_mga0vf2h47_ko_ec.gff", + "md5_checksum": "4ea799de0bc051409b7231801eea0129", + "id": "nmdc:4ea799de0bc051409b7231801eea0129", + "file_size_bytes": 11398449 + }, + { + "name": "gold:Gp0452679_metabat2 bin checkm quality assessment result", + "description": "metabat2 bin checkm quality assessment result for gold:Gp0452679", + "data_object_type": "CheckM Statistics", + "url": "https://data.microbiomedata.org/data/nmdc:mga0fsjy84/MAGs/nmdc_mga0fsjy84_checkm_qa.out", + "md5_checksum": "d41d8cd98f00b204e9800998ecf8427e", + "id": "nmdc:d41d8cd98f00b204e9800998ecf8427e", + "file_size_bytes": 0 + }, + { + "name": "Gp0115675_tooShort (< 3kb) filtered contigs fasta file by metaBat2", + "description": "tooShort (< 3kb) filtered contigs fasta file by metaBat2 for Gp0115675", + "url": "https://data.microbiomedata.org/data/nmdc:mga0vf2h47/MAGs/nmdc_mga0vf2h47_bins.tooShort.fa", + "md5_checksum": "826503b4204b77c319c0bb353d69818e", + "id": "nmdc:826503b4204b77c319c0bb353d69818e", + "file_size_bytes": 31246547 + }, + { + "name": "Gp0115675_unbinned fasta file from metabat2", + "description": "unbinned fasta file from metabat2 for Gp0115675", + "url": "https://data.microbiomedata.org/data/nmdc:mga0vf2h47/MAGs/nmdc_mga0vf2h47_bins.unbinned.fa", + "md5_checksum": "9a02c2954014bb8dcd62800609dd3ec5", + "id": "nmdc:9a02c2954014bb8dcd62800609dd3ec5", + "file_size_bytes": 6258719 + }, + { + "name": "Gp0115675_metabat2 bin checkm quality assessment result", + "description": "metabat2 bin checkm quality assessment result for Gp0115675", + "data_object_type": "CheckM Statistics", + "url": "https://data.microbiomedata.org/data/nmdc:mga0vf2h47/MAGs/nmdc_mga0vf2h47_checkm_qa.out", + "md5_checksum": "d15ed915946e095d045d73f4b4de019d", + "id": "nmdc:d15ed915946e095d045d73f4b4de019d", + "file_size_bytes": 1092 + }, + { + "name": "Gp0115675_high-quality and medium-quality bins", + "description": "high-quality and medium-quality bins for Gp0115675", + "data_object_type": "Metagenome Bins", + "url": "https://data.microbiomedata.org/data/nmdc:mga0vf2h47/MAGs/nmdc_mga0vf2h47_hqmq_bin.zip", + "md5_checksum": "8de4404b1a6601bae7d7d5fd51bd131a", + "id": "nmdc:8de4404b1a6601bae7d7d5fd51bd131a", + "file_size_bytes": 182 + }, + { + "name": "Gp0115675_metabat2 bins", + "description": "metabat2 bins for Gp0115675", + "url": "https://data.microbiomedata.org/data/nmdc:mga0vf2h47/MAGs/nmdc_mga0vf2h47_metabat_bin.zip", + "md5_checksum": "55f66520d821205e80dcd303cc2793bc", + "id": "nmdc:55f66520d821205e80dcd303cc2793bc", + "file_size_bytes": 1259160 + }, + { + "_id": { + "$oid": "649b003c1ae706d7b5b14d9a" + }, + "description": "Assembled contigs fasta for gold:Gp0115675", + "url": "https://data.microbiomedata.org/data/1781_86090/assembly/assembly_contigs.fna", + "file_size_bytes": 41258072, + "type": "nmdc:DataObject", + "id": "nmdc:333b8256818eefecf0581f31a45719f9", + "name": "assembly_contigs.fna", + "data_object_type": "Assembly Contigs" + }, + { + "_id": { + "$oid": "649b003c1ae706d7b5b14d9b" + }, + "description": "Assembled AGP file for gold:Gp0115675", + "url": "https://data.microbiomedata.org/data/1781_86090/assembly/assembly.agp", + "file_size_bytes": 5091186, + "type": "nmdc:DataObject", + "id": "nmdc:a153a87ca330ba427510d800ac847c95", + "name": "assembly.agp", + "data_object_type": "Assembly AGP" + }, + { + "_id": { + "$oid": "649b003c1ae706d7b5b14d9c" + }, + "description": "Metagenome Contig Coverage Stats for gold:Gp0115675", + "url": "https://data.microbiomedata.org/data/1781_86090/assembly/mapping_stats.txt", + "file_size_bytes": 5934586, + "type": "nmdc:DataObject", + "id": "nmdc:6eca425a70ac889b1d110b88f7907b74", + "name": "mapping_stats.txt", + "data_object_type": "Assembly Coverage Stats" + }, + { + "_id": { + "$oid": "649b003c1ae706d7b5b14d9d" + }, + "description": "Assembled scaffold fasta for gold:Gp0115675", + "url": "https://data.microbiomedata.org/data/1781_86090/assembly/assembly_scaffolds.fna", + "file_size_bytes": 41014137, + "type": "nmdc:DataObject", + "id": "nmdc:b6558fa3c0fcd24593d86fc5c63ab5b5", + "name": "assembly_scaffolds.fna", + "data_object_type": "Assembly Scaffolds" + }, + { + "_id": { + "$oid": "649b003d1ae706d7b5b14d9f" + }, + "description": "Metagenome Alignment BAM file for gold:Gp0115675", + "url": "https://data.microbiomedata.org/data/1781_86090/assembly/pairedMapped_sorted.bam", + "file_size_bytes": 1614431528, + "type": "nmdc:DataObject", + "id": "nmdc:c7cce636f2a9bd54e8f62742da37f5cc", + "name": "pairedMapped_sorted.bam", + "data_object_type": "Assembly Coverage BAM" + }, + { + "_id": { + "$oid": "649b003d1ae706d7b5b159e6" + }, + "id": "nmdc:f4335a3fd80dc97fbf2ce8bc5b64f0a4", + "name": "1781_86090.krona.html", + "description": "Gold:Gp0115675 KRONA plot HTML file", + "url": "https://data.microbiomedata.org/1781_86090/ReadbasedAnalysis/centrifuge/1781_86090.krona.html", + "file_size_bytes": 3385, + "data_object_type": "Centrifuge Krona Plot", + "type": "nmdc:DataObject" + }, + { + "_id": { + "$oid": "649b003d1ae706d7b5b159ea" + }, + "id": "nmdc:05966e29e6f087d77b9e766b5fb9c64f", + "name": "1781_86090.json", + "description": "Gold:Gp0115675 ReadbasedAnalysis result JSON file", + "url": "https://data.microbiomedata.org/1781_86090/ReadbasedAnalysis/1781_86090.json", + "file_size_bytes": 3385, + "type": "nmdc:DataObject" + }, + { + "_id": { + "$oid": "649b003f1ae706d7b5b162bd" + }, + "id": "nmdc:360d6a6ecfa44731fe4d69f778f11285", + "name": "bins.unbinned.fa", + "description": "unbinned fasta file from metabat2 for gold:Gp0115675", + "file_size_bytes": 6936846, + "url": "https://data.microbiomedata.org/data/1781_86090/img_MAGs/bins.unbinned.fa", + "type": "nmdc:DataObject" + }, + { + "_id": { + "$oid": "649b003f1ae706d7b5b162c0" + }, + "id": "nmdc:5aa5000cbfc131f8162b1cd1bc37698e", + "name": "gold:Gp0115675.bins.3.fa", + "description": "metabat2 binned contig file for gold:Gp0115675", + "file_size_bytes": 2739890, + "url": "https://data.microbiomedata.org/data/1781_86090/img_MAGs/metabat-bins/bins.3.fa", + "type": "nmdc:DataObject", + "data_object_type": "Metagenome Bins" + }, + { + "_id": { + "$oid": "649b003f1ae706d7b5b162c1" + }, + "id": "nmdc:aaaee9ac7ea2ec601b554f01e38e2a4c", + "name": "checkm_qa.out", + "description": "metabat2 bin checkm quality assessment result for gold:Gp0115675", + "file_size_bytes": 1176, + "url": "https://data.microbiomedata.org/data/1781_86090/img_MAGs/checkm_qa.out", + "type": "nmdc:DataObject", + "data_object_type": "CheckM Statistics" + }, + { + "_id": { + "$oid": "649b003f1ae706d7b5b162c2" + }, + "id": "nmdc:d2c69965d41ba1023d9422e40e3366cc", + "name": "gold:Gp0115675.bins.1.fa", + "description": "metabat2 binned contig file for gold:Gp0115675", + "file_size_bytes": 221316, + "url": "https://data.microbiomedata.org/data/1781_86090/img_MAGs/metabat-bins/bins.1.fa", + "type": "nmdc:DataObject", + "data_object_type": "Metagenome Bins" + }, + { + "_id": { + "$oid": "649b003f1ae706d7b5b162c3" + }, + "id": "nmdc:bec36f95050a4825f0e8eec250dec56a", + "name": "gold:Gp0115675.bins.2.fa", + "description": "metabat2 binned contig file for gold:Gp0115675", + "file_size_bytes": 464857, + "url": "https://data.microbiomedata.org/data/1781_86090/img_MAGs/metabat-bins/bins.2.fa", + "type": "nmdc:DataObject", + "data_object_type": "Metagenome Bins" + }, + { + "_id": { + "$oid": "649b003f1ae706d7b5b162c4" + }, + "id": "nmdc:83f09a72cb190961374eae70d64af121", + "name": "bins.tooShort.fa", + "description": "tooShort (< 3kb) filtered contigs fasta file by metaBat2 for gold:Gp0115675", + "file_size_bytes": 30259643, + "url": "https://data.microbiomedata.org/data/1781_86090/img_MAGs/bins.tooShort.fa", + "type": "nmdc:DataObject" + }, + { + "_id": { + "$oid": "649b00401ae706d7b5b16d66" + }, + "description": "EC TSV File for gold:Gp0115675", + "url": "https://data.microbiomedata.org/1781_86090/img_annotation/Ga0482252_ec.tsv", + "md5_checksum": "b30bdfcd025588bd80ebb3bcdad2cdc8", + "file_size_bytes": 3385, + "id": "nmdc:b30bdfcd025588bd80ebb3bcdad2cdc8", + "name": "gold:Gp0115675_EC TSV File", + "data_object_type": "Annotation Enzyme Commission", + "type": "nmdc:DataObject" + }, + { + "_id": { + "$oid": "649b00401ae706d7b5b16d69" + }, + "description": "Functional annotation GFF file for gold:Gp0115675", + "url": "https://data.microbiomedata.org/1781_86090/img_annotation/Ga0482252_functional_annotation.gff", + "md5_checksum": "e745ff0c0a95c89393f8789cd8c409e9", + "file_size_bytes": 3385, + "id": "nmdc:e745ff0c0a95c89393f8789cd8c409e9", + "name": "gold:Gp0115675_Functional annotation GFF file", + "data_object_type": "Functional Annotation GFF", + "type": "nmdc:DataObject" + }, + { + "_id": { + "$oid": "649b00401ae706d7b5b16d6a" + }, + "description": "KO TSV File for gold:Gp0115675", + "url": "https://data.microbiomedata.org/1781_86090/img_annotation/Ga0482252_ko.tsv", + "md5_checksum": "7ab72f45de20843e167ee1e595bb752d", + "file_size_bytes": 3385, + "id": "nmdc:7ab72f45de20843e167ee1e595bb752d", + "name": "gold:Gp0115675_KO TSV File", + "data_object_type": "Annotation KEGG Orthology", + "type": "nmdc:DataObject" + }, + { + "_id": { + "$oid": "649b00401ae706d7b5b16dab" + }, + "description": "Structural annotation GFF file for gold:Gp0115675", + "url": "https://data.microbiomedata.org/1781_86090/img_annotation/Ga0482252_structural_annotation.gff", + "md5_checksum": "dcb8211231f718d57e22f8dea1efc6d0", + "file_size_bytes": 3385, + "id": "nmdc:dcb8211231f718d57e22f8dea1efc6d0", + "name": "gold:Gp0115675_Structural annotation GFF file", + "data_object_type": "Structural Annotation GFF", + "type": "nmdc:DataObject" + }, + { + "_id": { + "$oid": "649b00401ae706d7b5b16dc7" + }, + "description": "Protein FAA for gold:Gp0115675", + "url": "https://data.microbiomedata.org/1781_86090/img_annotation/Ga0482252_proteins.faa", + "md5_checksum": "51f3c008db6a106ee14e160f35f7d9f3", + "file_size_bytes": 3385, + "id": "nmdc:51f3c008db6a106ee14e160f35f7d9f3", + "name": "gold:Gp0115675_Protein FAA", + "data_object_type": "Annotation Amino Acid FASTA", + "type": "nmdc:DataObject" + } + ], + "mags_activity_set": [ + { + "_id": { + "$oid": "649b0052ec087f6bbab34722" + }, + "has_input": [ + "nmdc:dd5cad9348fc41cb18ac989185fed0b5", + "nmdc:80470769e7531b46c709d12c65487ffe", + "nmdc:d8cccd9c5cd237c238e5ba443c477db5" + ], + "too_short_contig_num": 76352, + "part_of": [ + "nmdc:mga0vf2h47" + ], + "binned_contig_num": 846, + "git_url": "https://github.com/microbiomedata/metaG/releases/tag/0.1", + "has_output": [ + "nmdc:d41d8cd98f00b204e9800998ecf8427e", + "nmdc:826503b4204b77c319c0bb353d69818e", + "nmdc:9a02c2954014bb8dcd62800609dd3ec5", + "nmdc:d15ed915946e095d045d73f4b4de019d", + "nmdc:8de4404b1a6601bae7d7d5fd51bd131a", + "nmdc:55f66520d821205e80dcd303cc2793bc" + ], + "was_informed_by": "gold:Gp0115675", + "input_contig_num": 80857, + "id": "nmdc:4cfac32b9cd7a8dd01d49117e1078a79", + "execution_resource": "NERSC-Cori", + "name": "MAGs Analysis Activity for nmdc:mga0vf2h47", + "mags_list": [ + { + "number_of_contig": 579, + "completeness": 73.87, + "bin_name": "bins.1", + "gene_count": 3274, + "bin_quality": "LQ", + "gtdbtk_species": "", + "gtdbtk_order": "", + "num_16s": 1, + "gtdbtk_family": "", + "gtdbtk_domain": "", + "contamination": 25.78, + "gtdbtk_class": "", + "gtdbtk_phylum": "", + "num_5s": 1, + "num_23s": 1, + "gtdbtk_genus": "", + "num_t_rna": 37 + }, + { + "number_of_contig": 199, + "completeness": 36.21, + "bin_name": "bins.2", + "gene_count": 1070, + "bin_quality": "LQ", + "gtdbtk_species": "", + "gtdbtk_order": "", + "num_16s": 0, + "gtdbtk_family": "", + "gtdbtk_domain": "", + "contamination": 0.0, + "gtdbtk_class": "", + "gtdbtk_phylum": "", + "num_5s": 0, + "num_23s": 0, + "gtdbtk_genus": "", + "num_t_rna": 16 + }, + { + "number_of_contig": 68, + "completeness": 4.17, + "bin_name": "bins.3", + "gene_count": 480, + "bin_quality": "LQ", + "gtdbtk_species": "", + "gtdbtk_order": "", + "num_16s": 0, + "gtdbtk_family": "", + "gtdbtk_domain": "", + "contamination": 4.17, + "gtdbtk_class": "", + "gtdbtk_phylum": "", + "num_5s": 0, + "num_23s": 0, + "gtdbtk_genus": "", + "num_t_rna": 5 + } + ], + "unbinned_contig_num": 3659, + "started_at_time": "2021-10-11T02:28:05Z", + "type": "nmdc:MAGsAnalysisActivity", + "ended_at_time": "2021-10-11T03:25:21+00:00" + } + ], + "metagenome_annotation_activity_set": [ + { + "_id": { + "$oid": "649b005bbf2caae0415ef9c2" + }, + "has_input": [ + "nmdc:dd5cad9348fc41cb18ac989185fed0b5" + ], + "part_of": [ + "nmdc:mga0vf2h47" + ], + "git_url": "https://github.com/microbiomedata/metaG/releases/tag/0.1", + "has_output": [ + "nmdc:93ea50ce57263b498b781240c04dbf46", + "nmdc:71195b9bc697bf29cd865718a689eb1b", + "nmdc:d8cccd9c5cd237c238e5ba443c477db5", + "nmdc:1cb17c4c7681345f53a7f4ef5c319fba", + "nmdc:17e386be26f52833c463a89733ef2e34", + "nmdc:3e9b2fd11f2f5c16f9f25560e3b6fc55", + "nmdc:b11e36753299e36fa92670cf75165698", + "nmdc:70ac1de5fbc6cc835d5a0d1855f7a28a", + "nmdc:b9e3eb74fa7fee0fac886f8a436b9ecf", + "nmdc:faa27c2be6dc56e66f739dbffcbb6bef", + "nmdc:b080e9d168c0c1330fda64814afe335b", + "nmdc:4ea799de0bc051409b7231801eea0129" + ], + "was_informed_by": "gold:Gp0115675", + "id": "nmdc:4cfac32b9cd7a8dd01d49117e1078a79", + "execution_resource": "NERSC-Cori", + "name": "Annotation Activity for nmdc:mga0vf2h47", + "started_at_time": "2021-10-11T02:28:05Z", + "type": "nmdc:MetagenomeAnnotationActivity", + "ended_at_time": "2021-10-11T03:25:21+00:00" + } + ], + "metagenome_assembly_set": [ + { + "_id": { + "$oid": "649b005f2ca5ee4adb139faf" + }, + "has_input": [ + "nmdc:54e3a71218d04224719e0dc8a7fdf9c7" + ], + "part_of": [ + "nmdc:mga0vf2h47" + ], + "ctg_logsum": 115425, + "scaf_logsum": 116377, + "gap_pct": 0.00425, + "git_url": "https://github.com/microbiomedata/metaG/releases/tag/0.1", + "has_output": [ + "nmdc:dd5cad9348fc41cb18ac989185fed0b5", + "nmdc:6d02084941141ac9a1876c621a50aef0", + "nmdc:cc8faed3494579d793c08ede54cb5b3a", + "nmdc:8891e46c9766f2b84d45fd6e46078a64", + "nmdc:80470769e7531b46c709d12c65487ffe" + ], + "asm_score": 4.718, + "was_informed_by": "gold:Gp0115675", + "ctg_powsum": 13174, + "scaf_max": 25635, + "id": "nmdc:4cfac32b9cd7a8dd01d49117e1078a79", + "scaf_powsum": 13311, + "execution_resource": "NERSC-Cori", + "contigs": 80858, + "name": "Assembly Activity for nmdc:mga0vf2h47", + "ctg_max": 25635, + "gc_std": 0.10716, + "contig_bp": 38571486, + "gc_avg": 0.56103, + "started_at_time": "2021-10-11T02:28:05Z", + "scaf_bp": 38573126, + "type": "nmdc:MetagenomeAssembly", + "scaffolds": 80703, + "ended_at_time": "2021-10-11T03:25:21+00:00", + "ctg_l50": 435, + "ctg_l90": 284, + "ctg_n50": 19932, + "ctg_n90": 68422, + "scaf_l50": 436, + "scaf_l90": 284, + "scaf_n50": 19754, + "scaf_n90": 68272 + } + ], + "omics_processing_set": [ + { + "_id": { + "$oid": "649b009773e8249959349b50" + }, + "id": "nmdc:omprc-11-jk7zjz92", + "name": "Sand microcosm microbial communities from a hyporheic zone in Columbia River, Washington, USA- GW-RW T4_22-July-14", + "description": "Sterilized sand packs were incubated back in the ground and collected at time point T4.", + "has_input": [ + "nmdc:bsm-11-a5d23e19" + ], + "has_output": [ + "jgi:55d817f30d8785342fcf826d" + ], + "part_of": [ + "nmdc:sty-11-aygzgv51" + ], + "add_date": "2015-05-28", + "mod_date": "2021-06-15", + "ncbi_project_name": "Sand microcosm microbial communities from a hyporheic zone in Columbia River, Washington, USA- GW-RW T4_22-July-14", + "omics_type": { + "has_raw_value": "Metagenome" + }, + "principal_investigator": { + "has_raw_value": "James Stegen" + }, + "processing_institution": "JGI", + "type": "nmdc:OmicsProcessing", + "gold_sequencing_project_identifiers": [ + "gold:Gp0115675" + ] + } + ], + "read_qc_analysis_activity_set": [ + { + "_id": { + "$oid": "649b009d6bdd4fd20273c87d" + }, + "has_input": [ + "nmdc:4a9a0183b794a98c57e5b5ce959a3f65" + ], + "part_of": [ + "nmdc:mga0vf2h47" + ], + "git_url": "https://github.com/microbiomedata/metaG/releases/tag/0.1", + "has_output": [ + "nmdc:54e3a71218d04224719e0dc8a7fdf9c7", + "nmdc:2507e3f107100ce0c72c57191d450818" + ], + "was_informed_by": "gold:Gp0115675", + "input_read_count": 18827380, + "output_read_bases": 2508839784, + "id": "nmdc:4cfac32b9cd7a8dd01d49117e1078a79", + "execution_resource": "NERSC-Cori", + "input_read_bases": 2842934380, + "name": "Read QC Activity for nmdc:mga0vf2h47", + "output_read_count": 16749572, + "started_at_time": "2021-10-11T02:28:05Z", + "type": "nmdc:ReadQCAnalysisActivity", + "ended_at_time": "2021-10-11T03:25:21+00:00" + } + ], + "read_based_taxonomy_analysis_activity_set": [ + { + "_id": { + "$oid": "649b009bff710ae353f8cf41" + }, + "has_input": [ + "nmdc:54e3a71218d04224719e0dc8a7fdf9c7" + ], + "git_url": "https://github.com/microbiomedata/metaG/releases/tag/0.1", + "has_output": [ + "nmdc:60d673988c4f4447feb5985e8501e914", + "nmdc:a8f93ed13033eb949109b4e83980a893", + "nmdc:31dd6eb616f1e9815778453ab1601195", + "nmdc:6d7a930d79f220b06cde8fbf8339e744", + "nmdc:0aaac507db0e29827e1c87df47324932", + "nmdc:6aec8677139ed24ef9cfe0c75b30056f", + "nmdc:d39369f32ada967d7cf52cb503fccf4a", + "nmdc:1ec0247d86889fcef13f39a58a92b066", + "nmdc:242a1c60f6cb14ba8430375171fda436" + ], + "was_informed_by": "gold:Gp0115675", + "id": "nmdc:4cfac32b9cd7a8dd01d49117e1078a79", + "execution_resource": "NERSC-Cori", + "name": "ReadBased Analysis Activity for nmdc:mga0vf2h47", + "started_at_time": "2021-10-11T02:28:05Z", + "type": "nmdc:ReadbasedAnalysis", + "ended_at_time": "2021-10-11T03:25:21+00:00" + } + ] + }, + { + "data_object_set": [ + { + "description": "Raw sequencer read data", + "file_size_bytes": 4637325661, + "type": "nmdc:DataObject", + "id": "jgi:55f23d820d8785306f964980", + "name": "9491.1.134352.AGTTCC.fastq.gz" + }, + { + "name": "Gp0115665_Filtered Reads", + "description": "Filtered Reads for Gp0115665", + "data_object_type": "Filtered Sequencing Reads", + "url": "https://data.microbiomedata.org/data/nmdc:mga06n7k74/qa/nmdc_mga06n7k74_filtered.fastq.gz", + "md5_checksum": "b0462e18cf9dafc9d2207a58bf085530", + "id": "nmdc:b0462e18cf9dafc9d2207a58bf085530", + "file_size_bytes": 4096192298 + }, + { + "name": "Gp0115665_Filtered Stats", + "description": "Filtered Stats for Gp0115665", + "data_object_type": "QC Statistics", + "url": "https://data.microbiomedata.org/data/nmdc:mga06n7k74/qa/nmdc_mga06n7k74_filterStats.txt", + "md5_checksum": "f0e1b9004b0e9aafb06c444444a522c7", + "id": "nmdc:f0e1b9004b0e9aafb06c444444a522c7", + "file_size_bytes": 291 + }, + { + "name": "Gp0115665_Gottcha2 TSV report", + "description": "Gottcha2 TSV report for Gp0115665", + "url": "https://data.microbiomedata.org/data/nmdc:mga06n7k74/ReadbasedAnalysis/nmdc_mga06n7k74_gottcha2_report.tsv", + "md5_checksum": "432fedddcbacb4e69c0350354ab44080", + "id": "nmdc:432fedddcbacb4e69c0350354ab44080", + "file_size_bytes": 18015 + }, + { + "name": "Gp0115665_Gottcha2 full TSV report", + "description": "Gottcha2 full TSV report for Gp0115665", + "url": "https://data.microbiomedata.org/data/nmdc:mga06n7k74/ReadbasedAnalysis/nmdc_mga06n7k74_gottcha2_report_full.tsv", + "md5_checksum": "50b9a4c83b2ec0d1dd683cb8814ed5ad", + "id": "nmdc:50b9a4c83b2ec0d1dd683cb8814ed5ad", + "file_size_bytes": 1283220 + }, + { + "name": "Gp0115665_Gottcha2 Krona HTML report", + "description": "Gottcha2 Krona HTML report for Gp0115665", + "data_object_type": "GOTTCHA2 Krona Plot", + "url": "https://data.microbiomedata.org/data/nmdc:mga06n7k74/ReadbasedAnalysis/nmdc_mga06n7k74_gottcha2_krona.html", + "md5_checksum": "e3d7339ba5c7677be13854f391462474", + "id": "nmdc:e3d7339ba5c7677be13854f391462474", + "file_size_bytes": 281366 + }, + { + "name": "Gp0115665_Centrifuge classification TSV report", + "description": "Centrifuge classification TSV report for Gp0115665", + "data_object_type": "Centrifuge Taxonomic Classification", + "url": "https://data.microbiomedata.org/data/nmdc:mga06n7k74/ReadbasedAnalysis/nmdc_mga06n7k74_centrifuge_classification.tsv", + "md5_checksum": "7bf922ee2f9fc298c031e2ff7d5abe0d", + "id": "nmdc:7bf922ee2f9fc298c031e2ff7d5abe0d", + "file_size_bytes": 3481369185 + }, + { + "name": "Gp0115665_Centrifuge TSV report", + "description": "Centrifuge TSV report for Gp0115665", + "data_object_type": "Centrifuge Classification Report", + "url": "https://data.microbiomedata.org/data/nmdc:mga06n7k74/ReadbasedAnalysis/nmdc_mga06n7k74_centrifuge_report.tsv", + "md5_checksum": "33a20a77c3dc5b4feb102d66dfbfbe11", + "id": "nmdc:33a20a77c3dc5b4feb102d66dfbfbe11", + "file_size_bytes": 263480 + }, + { + "name": "Gp0115665_Centrifuge Krona HTML report", + "description": "Centrifuge Krona HTML report for Gp0115665", + "data_object_type": "Centrifuge Krona Plot", + "url": "https://data.microbiomedata.org/data/nmdc:mga06n7k74/ReadbasedAnalysis/nmdc_mga06n7k74_centrifuge_krona.html", + "md5_checksum": "30bdf0aedf771221ca3f7f18ff4e0067", + "id": "nmdc:30bdf0aedf771221ca3f7f18ff4e0067", + "file_size_bytes": 2347079 + }, + { + "name": "Gp0115665_Kraken classification TSV report", + "description": "Kraken classification TSV report for Gp0115665", + "data_object_type": "Kraken2 Taxonomic Classification", + "url": "https://data.microbiomedata.org/data/nmdc:mga06n7k74/ReadbasedAnalysis/nmdc_mga06n7k74_kraken2_classification.tsv", + "md5_checksum": "8e21ac30de17de0d1051d7d223d0aa0f", + "id": "nmdc:8e21ac30de17de0d1051d7d223d0aa0f", + "file_size_bytes": 2866138771 + }, + { + "name": "Gp0115665_Kraken2 TSV report", + "description": "Kraken2 TSV report for Gp0115665", + "data_object_type": "Kraken2 Classification Report", + "url": "https://data.microbiomedata.org/data/nmdc:mga06n7k74/ReadbasedAnalysis/nmdc_mga06n7k74_kraken2_report.tsv", + "md5_checksum": "64459bec7843953a70f8ea2b09a7e9de", + "id": "nmdc:64459bec7843953a70f8ea2b09a7e9de", + "file_size_bytes": 728030 + }, + { + "name": "Gp0115665_Kraken2 Krona HTML report", + "description": "Kraken2 Krona HTML report for Gp0115665", + "data_object_type": "Kraken2 Krona Plot", + "url": "https://data.microbiomedata.org/data/nmdc:mga06n7k74/ReadbasedAnalysis/nmdc_mga06n7k74_kraken2_krona.html", + "md5_checksum": "9aa0ec113eb8dd22e7f574216d1760b2", + "id": "nmdc:9aa0ec113eb8dd22e7f574216d1760b2", + "file_size_bytes": 4374689 + }, + { + "name": "Gp0115665_Assembled contigs fasta", + "description": "Assembled contigs fasta for Gp0115665", + "data_object_type": "Assembly Contigs", + "url": "https://data.microbiomedata.org/data/nmdc:mga06n7k74/assembly/nmdc_mga06n7k74_contigs.fna", + "md5_checksum": "9704e757dc537a7f06c6f83fc633cf64", + "id": "nmdc:9704e757dc537a7f06c6f83fc633cf64", + "file_size_bytes": 185880663 + }, + { + "name": "Gp0115665_Assembled scaffold fasta", + "description": "Assembled scaffold fasta for Gp0115665", + "data_object_type": "Assembly Scaffolds", + "url": "https://data.microbiomedata.org/data/nmdc:mga06n7k74/assembly/nmdc_mga06n7k74_scaffolds.fna", + "md5_checksum": "2674db4e7e6171864fa47f0b3b5a9603", + "id": "nmdc:2674db4e7e6171864fa47f0b3b5a9603", + "file_size_bytes": 184819604 + }, + { + "name": "Gp0115665_Metagenome Contig Coverage Stats", + "description": "Metagenome Contig Coverage Stats for Gp0115665", + "url": "https://data.microbiomedata.org/data/nmdc:mga06n7k74/assembly/nmdc_mga06n7k74_covstats.txt", + "md5_checksum": "ab6c496a5e3ab895fee3812fd992e1e7", + "id": "nmdc:ab6c496a5e3ab895fee3812fd992e1e7", + "file_size_bytes": 27961807 + }, + { + "name": "Gp0115665_Assembled AGP file", + "description": "Assembled AGP file for Gp0115665", + "data_object_type": "Assembly AGP", + "url": "https://data.microbiomedata.org/data/nmdc:mga06n7k74/assembly/nmdc_mga06n7k74_assembly.agp", + "md5_checksum": "5a1240fa0a6bf92c95e852c0352e5839", + "id": "nmdc:5a1240fa0a6bf92c95e852c0352e5839", + "file_size_bytes": 26248242 + }, + { + "name": "Gp0115665_Metagenome Alignment BAM file", + "description": "Metagenome Alignment BAM file for Gp0115665", + "data_object_type": "Assembly Coverage BAM", + "url": "https://data.microbiomedata.org/data/nmdc:mga06n7k74/assembly/nmdc_mga06n7k74_pairedMapped_sorted.bam", + "md5_checksum": "e28c85b50e0b654626e655755165aff5", + "id": "nmdc:e28c85b50e0b654626e655755165aff5", + "file_size_bytes": 4460978045 + }, + { + "name": "Gp0115665_Protein FAA", + "description": "Protein FAA for Gp0115665", + "data_object_type": "Annotation Amino Acid FASTA", + "url": "https://data.microbiomedata.org/data/nmdc:mga06n7k74/annotation/nmdc_mga06n7k74_proteins.faa", + "md5_checksum": "2d23b05bda1c60f2ef6d54c8fe5fb5e7", + "id": "nmdc:2d23b05bda1c60f2ef6d54c8fe5fb5e7", + "file_size_bytes": 100719814 + }, + { + "name": "Gp0115665_Structural annotation GFF file", + "description": "Structural annotation GFF file for Gp0115665", + "data_object_type": "Structural Annotation GFF", + "url": "https://data.microbiomedata.org/data/nmdc:mga06n7k74/annotation/nmdc_mga06n7k74_structural_annotation.gff", + "md5_checksum": "6c55ce2e0d6e74d217d850b273c4f0c4", + "id": "nmdc:6c55ce2e0d6e74d217d850b273c4f0c4", + "file_size_bytes": 2534 + }, + { + "name": "Gp0115665_Functional annotation GFF file", + "description": "Functional annotation GFF file for Gp0115665", + "data_object_type": "Functional Annotation GFF", + "url": "https://data.microbiomedata.org/data/nmdc:mga06n7k74/annotation/nmdc_mga06n7k74_functional_annotation.gff", + "md5_checksum": "b3add25cdb76a537e70617ac6a1d1fc5", + "id": "nmdc:b3add25cdb76a537e70617ac6a1d1fc5", + "file_size_bytes": 110405026 + }, + { + "name": "Gp0115665_KO TSV file", + "description": "KO TSV file for Gp0115665", + "data_object_type": "Annotation KEGG Orthology", + "url": "https://data.microbiomedata.org/data/nmdc:mga06n7k74/annotation/nmdc_mga06n7k74_ko.tsv", + "md5_checksum": "b782707ae2cf5676596ca99800deea26", + "id": "nmdc:b782707ae2cf5676596ca99800deea26", + "file_size_bytes": 12963636 + }, + { + "name": "Gp0115665_EC TSV file", + "description": "EC TSV file for Gp0115665", + "data_object_type": "Annotation Enzyme Commission", + "url": "https://data.microbiomedata.org/data/nmdc:mga06n7k74/annotation/nmdc_mga06n7k74_ec.tsv", + "md5_checksum": "6a8565bf52f70efa03c755a9f0b82d7d", + "id": "nmdc:6a8565bf52f70efa03c755a9f0b82d7d", + "file_size_bytes": 8371381 + }, + { + "name": "Gp0115665_COG GFF file", + "description": "COG GFF file for Gp0115665", + "url": "https://data.microbiomedata.org/data/nmdc:mga06n7k74/annotation/nmdc_mga06n7k74_cog.gff", + "md5_checksum": "f5d79b4c69825e0b66153e7582cb489b", + "id": "nmdc:f5d79b4c69825e0b66153e7582cb489b", + "file_size_bytes": 56948501 + }, + { + "name": "Gp0115665_PFAM GFF file", + "description": "PFAM GFF file for Gp0115665", + "url": "https://data.microbiomedata.org/data/nmdc:mga06n7k74/annotation/nmdc_mga06n7k74_pfam.gff", + "md5_checksum": "f66a0eaa9432ef5a2dd390214f47eed5", + "id": "nmdc:f66a0eaa9432ef5a2dd390214f47eed5", + "file_size_bytes": 45618277 + }, + { + "name": "Gp0115665_TigrFam GFF file", + "description": "TigrFam GFF file for Gp0115665", + "url": "https://data.microbiomedata.org/data/nmdc:mga06n7k74/annotation/nmdc_mga06n7k74_tigrfam.gff", + "md5_checksum": "26cc0a40aab6bfc64d24afa760b43102", + "id": "nmdc:26cc0a40aab6bfc64d24afa760b43102", + "file_size_bytes": 5245489 + }, + { + "name": "Gp0115665_SMART GFF file", + "description": "SMART GFF file for Gp0115665", + "url": "https://data.microbiomedata.org/data/nmdc:mga06n7k74/annotation/nmdc_mga06n7k74_smart.gff", + "md5_checksum": "83785a6e8f7658dc2354b9bad1b86d01", + "id": "nmdc:83785a6e8f7658dc2354b9bad1b86d01", + "file_size_bytes": 15993417 + }, + { + "name": "Gp0115665_SuperFam GFF file", + "description": "SuperFam GFF file for Gp0115665", + "url": "https://data.microbiomedata.org/data/nmdc:mga06n7k74/annotation/nmdc_mga06n7k74_supfam.gff", + "md5_checksum": "0f03207aa38d1aec8afdbf2bec1e4990", + "id": "nmdc:0f03207aa38d1aec8afdbf2bec1e4990", + "file_size_bytes": 76926960 + }, + { + "name": "Gp0115665_Cath FunFam GFF file", + "description": "Cath FunFam GFF file for Gp0115665", + "url": "https://data.microbiomedata.org/data/nmdc:mga06n7k74/annotation/nmdc_mga06n7k74_cath_funfam.gff", + "md5_checksum": "4876eed2bee3b3b7b2ac827857410be6", + "id": "nmdc:4876eed2bee3b3b7b2ac827857410be6", + "file_size_bytes": 61571084 + }, + { + "name": "Gp0115665_KO_EC GFF file", + "description": "KO_EC GFF file for Gp0115665", + "url": "https://data.microbiomedata.org/data/nmdc:mga06n7k74/annotation/nmdc_mga06n7k74_ko_ec.gff", + "md5_checksum": "bb5b62735a896d189c9a274c6e091bab", + "id": "nmdc:bb5b62735a896d189c9a274c6e091bab", + "file_size_bytes": 41244685 + }, + { + "name": "gold:Gp0452679_metabat2 bin checkm quality assessment result", + "description": "metabat2 bin checkm quality assessment result for gold:Gp0452679", + "data_object_type": "CheckM Statistics", + "url": "https://data.microbiomedata.org/data/nmdc:mga0fsjy84/MAGs/nmdc_mga0fsjy84_checkm_qa.out", + "md5_checksum": "d41d8cd98f00b204e9800998ecf8427e", + "id": "nmdc:d41d8cd98f00b204e9800998ecf8427e", + "file_size_bytes": 0 + }, + { + "name": "Gp0115665_tooShort (< 3kb) filtered contigs fasta file by metaBat2", + "description": "tooShort (< 3kb) filtered contigs fasta file by metaBat2 for Gp0115665", + "url": "https://data.microbiomedata.org/data/nmdc:mga06n7k74/MAGs/nmdc_mga06n7k74_bins.tooShort.fa", + "md5_checksum": "79794b0497c1a4a292778ddb94504f7a", + "id": "nmdc:79794b0497c1a4a292778ddb94504f7a", + "file_size_bytes": 146322768 + }, + { + "name": "Gp0115665_unbinned fasta file from metabat2", + "description": "unbinned fasta file from metabat2 for Gp0115665", + "url": "https://data.microbiomedata.org/data/nmdc:mga06n7k74/MAGs/nmdc_mga06n7k74_bins.unbinned.fa", + "md5_checksum": "e26dc245e491a521a94fbb9ab1b4293d", + "id": "nmdc:e26dc245e491a521a94fbb9ab1b4293d", + "file_size_bytes": 30116585 + }, + { + "name": "Gp0115665_metabat2 bin checkm quality assessment result", + "description": "metabat2 bin checkm quality assessment result for Gp0115665", + "data_object_type": "CheckM Statistics", + "url": "https://data.microbiomedata.org/data/nmdc:mga06n7k74/MAGs/nmdc_mga06n7k74_checkm_qa.out", + "md5_checksum": "45cb473694eb3cfa8abc7768e87ef303", + "id": "nmdc:45cb473694eb3cfa8abc7768e87ef303", + "file_size_bytes": 1700 + }, + { + "name": "Gp0115665_high-quality and medium-quality bins", + "description": "high-quality and medium-quality bins for Gp0115665", + "data_object_type": "Metagenome Bins", + "url": "https://data.microbiomedata.org/data/nmdc:mga06n7k74/MAGs/nmdc_mga06n7k74_hqmq_bin.zip", + "md5_checksum": "e344d87dbac42a645fd3c7d5b9d0a1a5", + "id": "nmdc:e344d87dbac42a645fd3c7d5b9d0a1a5", + "file_size_bytes": 2294379 + }, + { + "name": "Gp0115665_metabat2 bins", + "description": "metabat2 bins for Gp0115665", + "url": "https://data.microbiomedata.org/data/nmdc:mga06n7k74/MAGs/nmdc_mga06n7k74_metabat_bin.zip", + "md5_checksum": "1098bd9921c6ab8f52aca786e3b7bf1d", + "id": "nmdc:1098bd9921c6ab8f52aca786e3b7bf1d", + "file_size_bytes": 534425 + }, + { + "_id": { + "$oid": "649b003c1ae706d7b5b14d68" + }, + "description": "Metagenome Contig Coverage Stats for gold:Gp0115665", + "url": "https://data.microbiomedata.org/data/1781_86094/assembly/mapping_stats.txt", + "file_size_bytes": 26201542, + "type": "nmdc:DataObject", + "id": "nmdc:0b1c4ab81deba76f53eb5b266566cc4e", + "name": "mapping_stats.txt", + "data_object_type": "Assembly Coverage Stats" + }, + { + "_id": { + "$oid": "649b003c1ae706d7b5b14d69" + }, + "description": "Assembled scaffold fasta for gold:Gp0115665", + "url": "https://data.microbiomedata.org/data/1781_86094/assembly/assembly_scaffolds.fna", + "file_size_bytes": 183060964, + "type": "nmdc:DataObject", + "id": "nmdc:4a666a393dc8497e61a35c6842a369be", + "name": "assembly_scaffolds.fna", + "data_object_type": "Assembly Scaffolds" + }, + { + "_id": { + "$oid": "649b003c1ae706d7b5b14d6a" + }, + "description": "Metagenome Alignment BAM file for gold:Gp0115665", + "url": "https://data.microbiomedata.org/data/1781_86094/assembly/pairedMapped_sorted.bam", + "file_size_bytes": 4399182435, + "type": "nmdc:DataObject", + "id": "nmdc:292511a07ffb1791b7546b4db9843a07", + "name": "pairedMapped_sorted.bam", + "data_object_type": "Assembly Coverage BAM" + }, + { + "_id": { + "$oid": "649b003c1ae706d7b5b14d6b" + }, + "description": "Assembled AGP file for gold:Gp0115665", + "url": "https://data.microbiomedata.org/data/1781_86094/assembly/assembly.agp", + "file_size_bytes": 22724422, + "type": "nmdc:DataObject", + "id": "nmdc:a54a1ab841896539024d2748e1b66131", + "name": "assembly.agp", + "data_object_type": "Assembly AGP" + }, + { + "_id": { + "$oid": "649b003c1ae706d7b5b14d70" + }, + "description": "Assembled contigs fasta for gold:Gp0115665", + "url": "https://data.microbiomedata.org/data/1781_86094/assembly/assembly_contigs.fna", + "file_size_bytes": 184120398, + "type": "nmdc:DataObject", + "id": "nmdc:2235febcd5329a40beb86d8d8411e0c1", + "name": "assembly_contigs.fna", + "data_object_type": "Assembly Contigs" + }, + { + "_id": { + "$oid": "649b003d1ae706d7b5b1598e" + }, + "id": "nmdc:79966acbb43ba7d6f0044b54b7c44a6b", + "name": "1781_86094.krona.html", + "description": "Gold:Gp0115665 KRONA plot HTML file", + "url": "https://data.microbiomedata.org/1781_86094/ReadbasedAnalysis/centrifuge/1781_86094.krona.html", + "file_size_bytes": 3385, + "data_object_type": "Centrifuge Krona Plot", + "type": "nmdc:DataObject" + }, + { + "_id": { + "$oid": "649b003d1ae706d7b5b159a4" + }, + "id": "nmdc:7a86a0c36f9ef12596ff3796cd9277d9", + "name": "1781_86094.json", + "description": "Gold:Gp0115665 ReadbasedAnalysis result JSON file", + "url": "https://data.microbiomedata.org/1781_86094/ReadbasedAnalysis/1781_86094.json", + "file_size_bytes": 3385, + "type": "nmdc:DataObject" + }, + { + "_id": { + "$oid": "649b003f1ae706d7b5b1625e" + }, + "id": "nmdc:f9db08f4e245f63eede42dedcdbb4def", + "name": "checkm_qa.out", + "description": "metabat2 bin checkm quality assessment result for gold:Gp0115665", + "file_size_bytes": 2145, + "url": "https://data.microbiomedata.org/data/1781_86094/img_MAGs/checkm_qa.out", + "type": "nmdc:DataObject", + "data_object_type": "CheckM Statistics" + }, + { + "_id": { + "$oid": "649b003f1ae706d7b5b16260" + }, + "id": "nmdc:97218a09f8e0949fea768a5c616191b0", + "name": "bins.unbinned.fa", + "description": "unbinned fasta file from metabat2 for gold:Gp0115665", + "file_size_bytes": 33177668, + "url": "https://data.microbiomedata.org/data/1781_86094/img_MAGs/bins.unbinned.fa", + "type": "nmdc:DataObject" + }, + { + "_id": { + "$oid": "649b003f1ae706d7b5b16262" + }, + "id": "nmdc:7ecd6e3a6a8cb6fe9365b57becf82216", + "name": "gold:Gp0115665.bins.5.fa", + "description": "metabat2 binned contig file for gold:Gp0115665", + "file_size_bytes": 1121208, + "url": "https://data.microbiomedata.org/data/1781_86094/img_MAGs/metabat-bins/bins.5.fa", + "type": "nmdc:DataObject", + "data_object_type": "Metagenome Bins" + }, + { + "_id": { + "$oid": "649b003f1ae706d7b5b16263" + }, + "id": "nmdc:7596d700873fd3e46d0f78284fe0c0f5", + "name": "gold:Gp0115665.bins.9.fa", + "description": "metabat2 binned contig file for gold:Gp0115665", + "file_size_bytes": 298154, + "url": "https://data.microbiomedata.org/data/1781_86094/img_MAGs/metabat-bins/bins.9.fa", + "type": "nmdc:DataObject", + "data_object_type": "Metagenome Bins" + }, + { + "_id": { + "$oid": "649b003f1ae706d7b5b16264" + }, + "id": "nmdc:b44f6446e22ea134f1f306f0412f0f5f", + "name": "gold:Gp0115665.bins.3.fa", + "description": "metabat2 binned contig file for gold:Gp0115665", + "file_size_bytes": 591265, + "url": "https://data.microbiomedata.org/data/1781_86094/img_MAGs/metabat-bins/bins.3.fa", + "type": "nmdc:DataObject", + "data_object_type": "Metagenome Bins" + }, + { + "_id": { + "$oid": "649b003f1ae706d7b5b16265" + }, + "id": "nmdc:fcb189e3ab45040fe52458c90dd29f6c", + "name": "bins.tooShort.fa", + "description": "tooShort (< 3kb) filtered contigs fasta file by metaBat2 for gold:Gp0115665", + "file_size_bytes": 142092787, + "url": "https://data.microbiomedata.org/data/1781_86094/img_MAGs/bins.tooShort.fa", + "type": "nmdc:DataObject" + }, + { + "_id": { + "$oid": "649b003f1ae706d7b5b16266" + }, + "id": "nmdc:ee01c153a5253dfe9e8de1bbbf9480fe", + "name": "gold:Gp0115665.bins.4.fa", + "description": "metabat2 binned contig file for gold:Gp0115665", + "file_size_bytes": 1178929, + "url": "https://data.microbiomedata.org/data/1781_86094/img_MAGs/metabat-bins/bins.4.fa", + "type": "nmdc:DataObject", + "data_object_type": "Metagenome Bins" + }, + { + "_id": { + "$oid": "649b003f1ae706d7b5b16268" + }, + "id": "nmdc:e08dcba67c8844436094e8c77b0f16f9", + "name": "gold:Gp0115665.bins.6.fa", + "description": "metabat2 binned contig file for gold:Gp0115665", + "file_size_bytes": 964743, + "url": "https://data.microbiomedata.org/data/1781_86094/img_MAGs/metabat-bins/bins.6.fa", + "type": "nmdc:DataObject", + "data_object_type": "Metagenome Bins" + }, + { + "_id": { + "$oid": "649b003f1ae706d7b5b16269" + }, + "id": "nmdc:54d4d93e1b2c00805f59308f47c950dd", + "name": "gold:Gp0115665.bins.8.fa", + "description": "metabat2 binned contig file for gold:Gp0115665", + "file_size_bytes": 377968, + "url": "https://data.microbiomedata.org/data/1781_86094/img_MAGs/metabat-bins/bins.8.fa", + "type": "nmdc:DataObject", + "data_object_type": "Metagenome Bins" + }, + { + "_id": { + "$oid": "649b003f1ae706d7b5b1626a" + }, + "id": "nmdc:b7d7c5484e7902cd4fd2e5b946467326", + "name": "gold:Gp0115665.bins.2.fa", + "description": "metabat2 binned contig file for gold:Gp0115665", + "file_size_bytes": 221952, + "url": "https://data.microbiomedata.org/data/1781_86094/img_MAGs/metabat-bins/bins.2.fa", + "type": "nmdc:DataObject", + "data_object_type": "Metagenome Bins" + }, + { + "_id": { + "$oid": "649b003f1ae706d7b5b1626c" + }, + "id": "nmdc:ec4e307b82b0fc7da2a123df702d2202", + "name": "gold:Gp0115665.bins.1.fa", + "description": "metabat2 binned contig file for gold:Gp0115665", + "file_size_bytes": 1185129, + "url": "https://data.microbiomedata.org/data/1781_86094/img_MAGs/metabat-bins/bins.1.fa", + "type": "nmdc:DataObject", + "data_object_type": "Metagenome Bins" + }, + { + "_id": { + "$oid": "649b003f1ae706d7b5b1626f" + }, + "id": "nmdc:43913ff26d7b2fd7315e4468948fccac", + "name": "gold:Gp0115665.bins.7.fa", + "description": "metabat2 binned contig file for gold:Gp0115665", + "file_size_bytes": 205826, + "url": "https://data.microbiomedata.org/data/1781_86094/img_MAGs/metabat-bins/bins.7.fa", + "type": "nmdc:DataObject", + "data_object_type": "Metagenome Bins" + }, + { + "_id": { + "$oid": "649b00401ae706d7b5b16d72" + }, + "description": "Protein FAA for gold:Gp0115665", + "url": "https://data.microbiomedata.org/1781_86094/img_annotation/Ga0482262_proteins.faa", + "md5_checksum": "1b5b79d300bb60afffec76da4cda7f14", + "file_size_bytes": 3385, + "id": "nmdc:1b5b79d300bb60afffec76da4cda7f14", + "name": "gold:Gp0115665_Protein FAA", + "data_object_type": "Annotation Amino Acid FASTA", + "type": "nmdc:DataObject" + }, + { + "_id": { + "$oid": "649b00401ae706d7b5b16d74" + }, + "description": "KO TSV File for gold:Gp0115665", + "url": "https://data.microbiomedata.org/1781_86094/img_annotation/Ga0482262_ko.tsv", + "md5_checksum": "e28746f79f2d58d71fd5f42dff8b6dd5", + "file_size_bytes": 3385, + "id": "nmdc:e28746f79f2d58d71fd5f42dff8b6dd5", + "name": "gold:Gp0115665_KO TSV File", + "data_object_type": "Annotation KEGG Orthology", + "type": "nmdc:DataObject" + }, + { + "_id": { + "$oid": "649b00401ae706d7b5b16d79" + }, + "description": "Functional annotation GFF file for gold:Gp0115665", + "url": "https://data.microbiomedata.org/1781_86094/img_annotation/Ga0482262_functional_annotation.gff", + "md5_checksum": "dceabe03f9758a72038b9824794337e1", + "file_size_bytes": 3385, + "id": "nmdc:dceabe03f9758a72038b9824794337e1", + "name": "gold:Gp0115665_Functional annotation GFF file", + "data_object_type": "Functional Annotation GFF", + "type": "nmdc:DataObject" + }, + { + "_id": { + "$oid": "649b00401ae706d7b5b16d7a" + }, + "description": "Structural annotation GFF file for gold:Gp0115665", + "url": "https://data.microbiomedata.org/1781_86094/img_annotation/Ga0482262_structural_annotation.gff", + "md5_checksum": "431860b46c896880c1d8d779fb2645ec", + "file_size_bytes": 3385, + "id": "nmdc:431860b46c896880c1d8d779fb2645ec", + "name": "gold:Gp0115665_Structural annotation GFF file", + "data_object_type": "Structural Annotation GFF", + "type": "nmdc:DataObject" + }, + { + "_id": { + "$oid": "649b00401ae706d7b5b16dc6" + }, + "description": "EC TSV File for gold:Gp0115665", + "url": "https://data.microbiomedata.org/1781_86094/img_annotation/Ga0482262_ec.tsv", + "md5_checksum": "b4a623a8d9418c04567b5712889fcdfd", + "file_size_bytes": 3385, + "id": "nmdc:b4a623a8d9418c04567b5712889fcdfd", + "name": "gold:Gp0115665_EC TSV File", + "data_object_type": "Annotation Enzyme Commission", + "type": "nmdc:DataObject" + } + ], + "mags_activity_set": [ + { + "_id": { + "$oid": "649b0052ec087f6bbab34738" + }, + "has_input": [ + "nmdc:9704e757dc537a7f06c6f83fc633cf64", + "nmdc:e28c85b50e0b654626e655755165aff5", + "nmdc:b3add25cdb76a537e70617ac6a1d1fc5" + ], + "too_short_contig_num": 331533, + "part_of": [ + "nmdc:mga06n7k74" + ], + "binned_contig_num": 1636, + "git_url": "https://github.com/microbiomedata/metaG/releases/tag/0.1", + "has_output": [ + "nmdc:d41d8cd98f00b204e9800998ecf8427e", + "nmdc:79794b0497c1a4a292778ddb94504f7a", + "nmdc:e26dc245e491a521a94fbb9ab1b4293d", + "nmdc:45cb473694eb3cfa8abc7768e87ef303", + "nmdc:e344d87dbac42a645fd3c7d5b9d0a1a5", + "nmdc:1098bd9921c6ab8f52aca786e3b7bf1d" + ], + "was_informed_by": "gold:Gp0115665", + "input_contig_num": 352053, + "id": "nmdc:ad03f306fbd2bb1f6302eedfc1cde9b2", + "execution_resource": "NERSC-Cori", + "name": "MAGs Analysis Activity for nmdc:mga06n7k74", + "mags_list": [ + { + "number_of_contig": 211, + "completeness": 44.36, + "bin_name": "bins.1", + "gene_count": 1029, + "bin_quality": "LQ", + "gtdbtk_species": "", + "gtdbtk_order": "", + "num_16s": 0, + "gtdbtk_family": "", + "gtdbtk_domain": "", + "contamination": 0.49, + "gtdbtk_class": "", + "gtdbtk_phylum": "", + "num_5s": 0, + "num_23s": 0, + "gtdbtk_genus": "", + "num_t_rna": 21 + }, + { + "number_of_contig": 564, + "completeness": 79.11, + "bin_name": "bins.2", + "gene_count": 4164, + "bin_quality": "MQ", + "gtdbtk_species": "", + "gtdbtk_order": "Burkholderiales", + "num_16s": 0, + "gtdbtk_family": "Burkholderiaceae", + "gtdbtk_domain": "Bacteria", + "contamination": 3.11, + "gtdbtk_class": "Gammaproteobacteria", + "gtdbtk_phylum": "Proteobacteria", + "num_5s": 0, + "num_23s": 0, + "gtdbtk_genus": "Aquabacterium", + "num_t_rna": 33 + }, + { + "number_of_contig": 646, + "completeness": 72.48, + "bin_name": "bins.3", + "gene_count": 4108, + "bin_quality": "MQ", + "gtdbtk_species": "", + "gtdbtk_order": "Burkholderiales", + "num_16s": 1, + "gtdbtk_family": "Burkholderiaceae", + "gtdbtk_domain": "Bacteria", + "contamination": 2.62, + "gtdbtk_class": "Gammaproteobacteria", + "gtdbtk_phylum": "Proteobacteria", + "num_5s": 0, + "num_23s": 0, + "gtdbtk_genus": "Rhizobacter", + "num_t_rna": 28 + }, + { + "number_of_contig": 67, + "completeness": 1.97, + "bin_name": "bins.4", + "gene_count": 257, + "bin_quality": "LQ", + "gtdbtk_species": "", + "gtdbtk_order": "", + "num_16s": 0, + "gtdbtk_family": "", + "gtdbtk_domain": "", + "contamination": 0.0, + "gtdbtk_class": "", + "gtdbtk_phylum": "", + "num_5s": 0, + "num_23s": 0, + "gtdbtk_genus": "", + "num_t_rna": 3 + }, + { + "number_of_contig": 64, + "completeness": 7.47, + "bin_name": "bins.5", + "gene_count": 259, + "bin_quality": "LQ", + "gtdbtk_species": "", + "gtdbtk_order": "", + "num_16s": 0, + "gtdbtk_family": "", + "gtdbtk_domain": "", + "contamination": 0.0, + "gtdbtk_class": "", + "gtdbtk_phylum": "", + "num_5s": 0, + "num_23s": 0, + "gtdbtk_genus": "", + "num_t_rna": 4 + }, + { + "number_of_contig": 84, + "completeness": 3.88, + "bin_name": "bins.6", + "gene_count": 313, + "bin_quality": "LQ", + "gtdbtk_species": "", + "gtdbtk_order": "", + "num_16s": 0, + "gtdbtk_family": "", + "gtdbtk_domain": "", + "contamination": 0.0, + "gtdbtk_class": "", + "gtdbtk_phylum": "", + "num_5s": 0, + "num_23s": 0, + "gtdbtk_genus": "", + "num_t_rna": 0 + } + ], + "unbinned_contig_num": 18884, + "started_at_time": "2021-10-11T02:28:54Z", + "type": "nmdc:MAGsAnalysisActivity", + "ended_at_time": "2021-10-11T06:19:29+00:00" + } + ], + "metagenome_annotation_activity_set": [ + { + "_id": { + "$oid": "649b005bbf2caae0415ef9d2" + }, + "has_input": [ + "nmdc:9704e757dc537a7f06c6f83fc633cf64" + ], + "part_of": [ + "nmdc:mga06n7k74" + ], + "git_url": "https://github.com/microbiomedata/metaG/releases/tag/0.1", + "has_output": [ + "nmdc:2d23b05bda1c60f2ef6d54c8fe5fb5e7", + "nmdc:6c55ce2e0d6e74d217d850b273c4f0c4", + "nmdc:b3add25cdb76a537e70617ac6a1d1fc5", + "nmdc:b782707ae2cf5676596ca99800deea26", + "nmdc:6a8565bf52f70efa03c755a9f0b82d7d", + "nmdc:f5d79b4c69825e0b66153e7582cb489b", + "nmdc:f66a0eaa9432ef5a2dd390214f47eed5", + "nmdc:26cc0a40aab6bfc64d24afa760b43102", + "nmdc:83785a6e8f7658dc2354b9bad1b86d01", + "nmdc:0f03207aa38d1aec8afdbf2bec1e4990", + "nmdc:4876eed2bee3b3b7b2ac827857410be6", + "nmdc:bb5b62735a896d189c9a274c6e091bab" + ], + "was_informed_by": "gold:Gp0115665", + "id": "nmdc:ad03f306fbd2bb1f6302eedfc1cde9b2", + "execution_resource": "NERSC-Cori", + "name": "Annotation Activity for nmdc:mga06n7k74", + "started_at_time": "2021-10-11T02:28:54Z", + "type": "nmdc:MetagenomeAnnotationActivity", + "ended_at_time": "2021-10-11T06:19:29+00:00" + } + ], + "metagenome_assembly_set": [ + { + "_id": { + "$oid": "649b005f2ca5ee4adb139fc2" + }, + "has_input": [ + "nmdc:b0462e18cf9dafc9d2207a58bf085530" + ], + "part_of": [ + "nmdc:mga06n7k74" + ], + "ctg_logsum": 427633, + "scaf_logsum": 429769, + "gap_pct": 0.00206, + "git_url": "https://github.com/microbiomedata/metaG/releases/tag/0.1", + "has_output": [ + "nmdc:9704e757dc537a7f06c6f83fc633cf64", + "nmdc:2674db4e7e6171864fa47f0b3b5a9603", + "nmdc:ab6c496a5e3ab895fee3812fd992e1e7", + "nmdc:5a1240fa0a6bf92c95e852c0352e5839", + "nmdc:e28c85b50e0b654626e655755165aff5" + ], + "asm_score": 5.768, + "was_informed_by": "gold:Gp0115665", + "ctg_powsum": 48025, + "scaf_max": 44931, + "id": "nmdc:ad03f306fbd2bb1f6302eedfc1cde9b2", + "scaf_powsum": 48321, + "execution_resource": "NERSC-Cori", + "contigs": 352055, + "name": "Assembly Activity for nmdc:mga06n7k74", + "ctg_max": 44931, + "gc_std": 0.13027, + "contig_bp": 172051088, + "gc_avg": 0.51918, + "started_at_time": "2021-10-11T02:28:54Z", + "scaf_bp": 172054628, + "type": "nmdc:MetagenomeAssembly", + "scaffolds": 351728, + "ended_at_time": "2021-10-11T06:19:29+00:00", + "ctg_l50": 468, + "ctg_l90": 289, + "ctg_n50": 95561, + "ctg_n90": 294969, + "scaf_l50": 468, + "scaf_l90": 289, + "scaf_n50": 95446, + "scaf_n90": 294658 + } + ], + "omics_processing_set": [ + { + "_id": { + "$oid": "649b009773e8249959349b51" + }, + "id": "nmdc:omprc-11-2jt0jk84", + "name": "Sand microcosm microbial communities from a hyporheic zone in Columbia River, Washington, USA - GW-RW T2_30-Apr-14", + "description": "Sterilized sand packs were incubated back in the ground and collected at time point T2.", + "has_input": [ + "nmdc:bsm-11-qjtgh002" + ], + "has_output": [ + "jgi:55f23d820d8785306f964980" + ], + "part_of": [ + "nmdc:sty-11-aygzgv51" + ], + "add_date": "2015-05-28", + "mod_date": "2021-06-15", + "ncbi_project_name": "Sand microcosm microbial communities from a hyporheic zone in Columbia River, Washington, USA - GW-RW T2_30-Apr-14", + "omics_type": { + "has_raw_value": "Metagenome" + }, + "principal_investigator": { + "has_raw_value": "James Stegen" + }, + "processing_institution": "JGI", + "type": "nmdc:OmicsProcessing", + "gold_sequencing_project_identifiers": [ + "gold:Gp0115665" + ] + } + ], + "read_qc_analysis_activity_set": [ + { + "_id": { + "$oid": "649b009d6bdd4fd20273c88e" + }, + "has_input": [ + "nmdc:0d39aafcd16496457fbb3be0f785b67f" + ], + "part_of": [ + "nmdc:mga06n7k74" + ], + "git_url": "https://github.com/microbiomedata/metaG/releases/tag/0.1", + "has_output": [ + "nmdc:b0462e18cf9dafc9d2207a58bf085530", + "nmdc:f0e1b9004b0e9aafb06c444444a522c7" + ], + "was_informed_by": "gold:Gp0115665", + "input_read_count": 50719572, + "output_read_bases": 7175148255, + "id": "nmdc:ad03f306fbd2bb1f6302eedfc1cde9b2", + "execution_resource": "NERSC-Cori", + "input_read_bases": 7658655372, + "name": "Read QC Activity for nmdc:mga06n7k74", + "output_read_count": 47896142, + "started_at_time": "2021-10-11T02:28:54Z", + "type": "nmdc:ReadQCAnalysisActivity", + "ended_at_time": "2021-10-11T06:19:29+00:00" + } + ], + "read_based_taxonomy_analysis_activity_set": [ + { + "_id": { + "$oid": "649b009bff710ae353f8cf57" + }, + "has_input": [ + "nmdc:b0462e18cf9dafc9d2207a58bf085530" + ], + "git_url": "https://github.com/microbiomedata/metaG/releases/tag/0.1", + "has_output": [ + "nmdc:432fedddcbacb4e69c0350354ab44080", + "nmdc:50b9a4c83b2ec0d1dd683cb8814ed5ad", + "nmdc:e3d7339ba5c7677be13854f391462474", + "nmdc:7bf922ee2f9fc298c031e2ff7d5abe0d", + "nmdc:33a20a77c3dc5b4feb102d66dfbfbe11", + "nmdc:30bdf0aedf771221ca3f7f18ff4e0067", + "nmdc:8e21ac30de17de0d1051d7d223d0aa0f", + "nmdc:64459bec7843953a70f8ea2b09a7e9de", + "nmdc:9aa0ec113eb8dd22e7f574216d1760b2" + ], + "was_informed_by": "gold:Gp0115665", + "id": "nmdc:ad03f306fbd2bb1f6302eedfc1cde9b2", + "execution_resource": "NERSC-Cori", + "name": "ReadBased Analysis Activity for nmdc:mga06n7k74", + "started_at_time": "2021-10-11T02:28:54Z", + "type": "nmdc:ReadbasedAnalysis", + "ended_at_time": "2021-10-11T06:19:29+00:00" + } + ] + }, + { + "data_object_set": [ + { + "description": "Raw sequencer read data", + "file_size_bytes": 1988838112, + "type": "nmdc:DataObject", + "id": "jgi:55d817fe0d8785342fcf8276", + "name": "9387.2.132031.GTAGAG.fastq.gz" + }, + { + "name": "Gp0115669_Filtered Reads", + "description": "Filtered Reads for Gp0115669", + "data_object_type": "Filtered Sequencing Reads", + "url": "https://data.microbiomedata.org/data/nmdc:mga0k85x37/qa/nmdc_mga0k85x37_filtered.fastq.gz", + "md5_checksum": "6eef104db92b99c9741b26c667d75cd9", + "id": "nmdc:6eef104db92b99c9741b26c667d75cd9", + "file_size_bytes": 1806935637 + }, + { + "name": "Gp0115669_Filtered Stats", + "description": "Filtered Stats for Gp0115669", + "data_object_type": "QC Statistics", + "url": "https://data.microbiomedata.org/data/nmdc:mga0k85x37/qa/nmdc_mga0k85x37_filterStats.txt", + "md5_checksum": "58fde3e96dbb28af9133bede850a2653", + "id": "nmdc:58fde3e96dbb28af9133bede850a2653", + "file_size_bytes": 286 + }, + { + "name": "Gp0115669_Gottcha2 TSV report", + "description": "Gottcha2 TSV report for Gp0115669", + "url": "https://data.microbiomedata.org/data/nmdc:mga0k85x37/ReadbasedAnalysis/nmdc_mga0k85x37_gottcha2_report.tsv", + "md5_checksum": "05933784d02331b60b2531e2025cd3b7", + "id": "nmdc:05933784d02331b60b2531e2025cd3b7", + "file_size_bytes": 11362 + }, + { + "name": "Gp0115669_Gottcha2 full TSV report", + "description": "Gottcha2 full TSV report for Gp0115669", + "url": "https://data.microbiomedata.org/data/nmdc:mga0k85x37/ReadbasedAnalysis/nmdc_mga0k85x37_gottcha2_report_full.tsv", + "md5_checksum": "50fc279637cb7048aaaeec9b223d0286", + "id": "nmdc:50fc279637cb7048aaaeec9b223d0286", + "file_size_bytes": 909325 + }, + { + "name": "Gp0115669_Gottcha2 Krona HTML report", + "description": "Gottcha2 Krona HTML report for Gp0115669", + "data_object_type": "GOTTCHA2 Krona Plot", + "url": "https://data.microbiomedata.org/data/nmdc:mga0k85x37/ReadbasedAnalysis/nmdc_mga0k85x37_gottcha2_krona.html", + "md5_checksum": "c3add9c5d34e3ca719096ba3ba9b1c08", + "id": "nmdc:c3add9c5d34e3ca719096ba3ba9b1c08", + "file_size_bytes": 261412 + }, + { + "name": "Gp0115669_Centrifuge classification TSV report", + "description": "Centrifuge classification TSV report for Gp0115669", + "data_object_type": "Centrifuge Taxonomic Classification", + "url": "https://data.microbiomedata.org/data/nmdc:mga0k85x37/ReadbasedAnalysis/nmdc_mga0k85x37_centrifuge_classification.tsv", + "md5_checksum": "2777a04ec7e23aff356bb4f2733e55b7", + "id": "nmdc:2777a04ec7e23aff356bb4f2733e55b7", + "file_size_bytes": 1481087410 + }, + { + "name": "Gp0115669_Centrifuge TSV report", + "description": "Centrifuge TSV report for Gp0115669", + "data_object_type": "Centrifuge Classification Report", + "url": "https://data.microbiomedata.org/data/nmdc:mga0k85x37/ReadbasedAnalysis/nmdc_mga0k85x37_centrifuge_report.tsv", + "md5_checksum": "de45d70cc01749e9b5691dc24674545d", + "id": "nmdc:de45d70cc01749e9b5691dc24674545d", + "file_size_bytes": 256139 + }, + { + "name": "Gp0115669_Centrifuge Krona HTML report", + "description": "Centrifuge Krona HTML report for Gp0115669", + "data_object_type": "Centrifuge Krona Plot", + "url": "https://data.microbiomedata.org/data/nmdc:mga0k85x37/ReadbasedAnalysis/nmdc_mga0k85x37_centrifuge_krona.html", + "md5_checksum": "534f97f3792b74385c4da305196a1b1d", + "id": "nmdc:534f97f3792b74385c4da305196a1b1d", + "file_size_bytes": 2323658 + }, + { + "name": "Gp0115669_Kraken classification TSV report", + "description": "Kraken classification TSV report for Gp0115669", + "data_object_type": "Kraken2 Taxonomic Classification", + "url": "https://data.microbiomedata.org/data/nmdc:mga0k85x37/ReadbasedAnalysis/nmdc_mga0k85x37_kraken2_classification.tsv", + "md5_checksum": "fc3e489df923ec344ac0cce7316f49d6", + "id": "nmdc:fc3e489df923ec344ac0cce7316f49d6", + "file_size_bytes": 1220980345 + }, + { + "name": "Gp0115669_Kraken2 TSV report", + "description": "Kraken2 TSV report for Gp0115669", + "data_object_type": "Kraken2 Classification Report", + "url": "https://data.microbiomedata.org/data/nmdc:mga0k85x37/ReadbasedAnalysis/nmdc_mga0k85x37_kraken2_report.tsv", + "md5_checksum": "07b6457a094fab96563168ed287dc59f", + "id": "nmdc:07b6457a094fab96563168ed287dc59f", + "file_size_bytes": 651795 + }, + { + "name": "Gp0115669_Kraken2 Krona HTML report", + "description": "Kraken2 Krona HTML report for Gp0115669", + "data_object_type": "Kraken2 Krona Plot", + "url": "https://data.microbiomedata.org/data/nmdc:mga0k85x37/ReadbasedAnalysis/nmdc_mga0k85x37_kraken2_krona.html", + "md5_checksum": "164a1bc50e8d6509446ae2877be8231c", + "id": "nmdc:164a1bc50e8d6509446ae2877be8231c", + "file_size_bytes": 3963303 + }, + { + "name": "Gp0115669_Assembled contigs fasta", + "description": "Assembled contigs fasta for Gp0115669", + "data_object_type": "Assembly Contigs", + "url": "https://data.microbiomedata.org/data/nmdc:mga0k85x37/assembly/nmdc_mga0k85x37_contigs.fna", + "md5_checksum": "03eb095e55df50d639fab237d06c14ac", + "id": "nmdc:03eb095e55df50d639fab237d06c14ac", + "file_size_bytes": 58951440 + }, + { + "name": "Gp0115669_Assembled scaffold fasta", + "description": "Assembled scaffold fasta for Gp0115669", + "data_object_type": "Assembly Scaffolds", + "url": "https://data.microbiomedata.org/data/nmdc:mga0k85x37/assembly/nmdc_mga0k85x37_scaffolds.fna", + "md5_checksum": "569cb5da239e82dce1b40bfa7e2fd518", + "id": "nmdc:569cb5da239e82dce1b40bfa7e2fd518", + "file_size_bytes": 58607757 + }, + { + "name": "Gp0115669_Metagenome Contig Coverage Stats", + "description": "Metagenome Contig Coverage Stats for Gp0115669", + "url": "https://data.microbiomedata.org/data/nmdc:mga0k85x37/assembly/nmdc_mga0k85x37_covstats.txt", + "md5_checksum": "b77ef3014c80797cc88509adf02be002", + "id": "nmdc:b77ef3014c80797cc88509adf02be002", + "file_size_bytes": 8978635 + }, + { + "name": "Gp0115669_Assembled AGP file", + "description": "Assembled AGP file for Gp0115669", + "data_object_type": "Assembly AGP", + "url": "https://data.microbiomedata.org/data/nmdc:mga0k85x37/assembly/nmdc_mga0k85x37_assembly.agp", + "md5_checksum": "62d08517e0ba0f991f2d8bbd66061d78", + "id": "nmdc:62d08517e0ba0f991f2d8bbd66061d78", + "file_size_bytes": 8358006 + }, + { + "name": "Gp0115669_Metagenome Alignment BAM file", + "description": "Metagenome Alignment BAM file for Gp0115669", + "data_object_type": "Assembly Coverage BAM", + "url": "https://data.microbiomedata.org/data/nmdc:mga0k85x37/assembly/nmdc_mga0k85x37_pairedMapped_sorted.bam", + "md5_checksum": "568b82cb6038fec5df04c30cbd874098", + "id": "nmdc:568b82cb6038fec5df04c30cbd874098", + "file_size_bytes": 1940308720 + }, + { + "name": "Gp0115669_Protein FAA", + "description": "Protein FAA for Gp0115669", + "data_object_type": "Annotation Amino Acid FASTA", + "url": "https://data.microbiomedata.org/data/nmdc:mga0k85x37/annotation/nmdc_mga0k85x37_proteins.faa", + "md5_checksum": "8a5f288604c61556ff3e827725864fd1", + "id": "nmdc:8a5f288604c61556ff3e827725864fd1", + "file_size_bytes": 32524652 + }, + { + "name": "Gp0115669_Structural annotation GFF file", + "description": "Structural annotation GFF file for Gp0115669", + "data_object_type": "Structural Annotation GFF", + "url": "https://data.microbiomedata.org/data/nmdc:mga0k85x37/annotation/nmdc_mga0k85x37_structural_annotation.gff", + "md5_checksum": "0180998d6f3a3021638f04d9c0b35019", + "id": "nmdc:0180998d6f3a3021638f04d9c0b35019", + "file_size_bytes": 2514 + }, + { + "name": "Gp0115669_Functional annotation GFF file", + "description": "Functional annotation GFF file for Gp0115669", + "data_object_type": "Functional Annotation GFF", + "url": "https://data.microbiomedata.org/data/nmdc:mga0k85x37/annotation/nmdc_mga0k85x37_functional_annotation.gff", + "md5_checksum": "950b8c4ebd1da50e2ca079273540f3af", + "id": "nmdc:950b8c4ebd1da50e2ca079273540f3af", + "file_size_bytes": 36685287 + }, + { + "name": "Gp0115669_KO TSV file", + "description": "KO TSV file for Gp0115669", + "data_object_type": "Annotation KEGG Orthology", + "url": "https://data.microbiomedata.org/data/nmdc:mga0k85x37/annotation/nmdc_mga0k85x37_ko.tsv", + "md5_checksum": "96ec49c6124cf4f8f3e7da3525348477", + "id": "nmdc:96ec49c6124cf4f8f3e7da3525348477", + "file_size_bytes": 4815732 + }, + { + "name": "Gp0115669_EC TSV file", + "description": "EC TSV file for Gp0115669", + "data_object_type": "Annotation Enzyme Commission", + "url": "https://data.microbiomedata.org/data/nmdc:mga0k85x37/annotation/nmdc_mga0k85x37_ec.tsv", + "md5_checksum": "12ca374a58bf899e42ed2c191a239e71", + "id": "nmdc:12ca374a58bf899e42ed2c191a239e71", + "file_size_bytes": 3090911 + }, + { + "name": "Gp0115669_COG GFF file", + "description": "COG GFF file for Gp0115669", + "url": "https://data.microbiomedata.org/data/nmdc:mga0k85x37/annotation/nmdc_mga0k85x37_cog.gff", + "md5_checksum": "b8ae2993aa29c8e04c00580dfdb82650", + "id": "nmdc:b8ae2993aa29c8e04c00580dfdb82650", + "file_size_bytes": 20357759 + }, + { + "name": "Gp0115669_PFAM GFF file", + "description": "PFAM GFF file for Gp0115669", + "url": "https://data.microbiomedata.org/data/nmdc:mga0k85x37/annotation/nmdc_mga0k85x37_pfam.gff", + "md5_checksum": "7901c83b5a41e54854c96ab0b081ebd6", + "id": "nmdc:7901c83b5a41e54854c96ab0b081ebd6", + "file_size_bytes": 15876941 + }, + { + "name": "Gp0115669_TigrFam GFF file", + "description": "TigrFam GFF file for Gp0115669", + "url": "https://data.microbiomedata.org/data/nmdc:mga0k85x37/annotation/nmdc_mga0k85x37_tigrfam.gff", + "md5_checksum": "762fe35b733dd82f89f5dce44fa54ed1", + "id": "nmdc:762fe35b733dd82f89f5dce44fa54ed1", + "file_size_bytes": 2104873 + }, + { + "name": "Gp0115669_SMART GFF file", + "description": "SMART GFF file for Gp0115669", + "url": "https://data.microbiomedata.org/data/nmdc:mga0k85x37/annotation/nmdc_mga0k85x37_smart.gff", + "md5_checksum": "661b70d6f41a44fcc1913b101f79d86a", + "id": "nmdc:661b70d6f41a44fcc1913b101f79d86a", + "file_size_bytes": 4523437 + }, + { + "name": "Gp0115669_SuperFam GFF file", + "description": "SuperFam GFF file for Gp0115669", + "url": "https://data.microbiomedata.org/data/nmdc:mga0k85x37/annotation/nmdc_mga0k85x37_supfam.gff", + "md5_checksum": "e1843a865023d75edd3139c14b8c355e", + "id": "nmdc:e1843a865023d75edd3139c14b8c355e", + "file_size_bytes": 25872277 + }, + { + "name": "Gp0115669_Cath FunFam GFF file", + "description": "Cath FunFam GFF file for Gp0115669", + "url": "https://data.microbiomedata.org/data/nmdc:mga0k85x37/annotation/nmdc_mga0k85x37_cath_funfam.gff", + "md5_checksum": "a21449989b0b0884901602528b3f423e", + "id": "nmdc:a21449989b0b0884901602528b3f423e", + "file_size_bytes": 20254021 + }, + { + "name": "Gp0115669_KO_EC GFF file", + "description": "KO_EC GFF file for Gp0115669", + "url": "https://data.microbiomedata.org/data/nmdc:mga0k85x37/annotation/nmdc_mga0k85x37_ko_ec.gff", + "md5_checksum": "7f52547663f4eeea33de1e437012981e", + "id": "nmdc:7f52547663f4eeea33de1e437012981e", + "file_size_bytes": 15397038 + }, + { + "name": "gold:Gp0452679_metabat2 bin checkm quality assessment result", + "description": "metabat2 bin checkm quality assessment result for gold:Gp0452679", + "data_object_type": "CheckM Statistics", + "url": "https://data.microbiomedata.org/data/nmdc:mga0fsjy84/MAGs/nmdc_mga0fsjy84_checkm_qa.out", + "md5_checksum": "d41d8cd98f00b204e9800998ecf8427e", + "id": "nmdc:d41d8cd98f00b204e9800998ecf8427e", + "file_size_bytes": 0 + }, + { + "name": "Gp0115669_tooShort (< 3kb) filtered contigs fasta file by metaBat2", + "description": "tooShort (< 3kb) filtered contigs fasta file by metaBat2 for Gp0115669", + "url": "https://data.microbiomedata.org/data/nmdc:mga0k85x37/MAGs/nmdc_mga0k85x37_bins.tooShort.fa", + "md5_checksum": "420b015f88d0b88ab582805f39ed2b47", + "id": "nmdc:420b015f88d0b88ab582805f39ed2b47", + "file_size_bytes": 44979790 + }, + { + "name": "Gp0115669_unbinned fasta file from metabat2", + "description": "unbinned fasta file from metabat2 for Gp0115669", + "url": "https://data.microbiomedata.org/data/nmdc:mga0k85x37/MAGs/nmdc_mga0k85x37_bins.unbinned.fa", + "md5_checksum": "ee8a556be3a57008c1c05ff9fe83437e", + "id": "nmdc:ee8a556be3a57008c1c05ff9fe83437e", + "file_size_bytes": 10530111 + }, + { + "name": "Gp0115669_metabat2 bin checkm quality assessment result", + "description": "metabat2 bin checkm quality assessment result for Gp0115669", + "data_object_type": "CheckM Statistics", + "url": "https://data.microbiomedata.org/data/nmdc:mga0k85x37/MAGs/nmdc_mga0k85x37_checkm_qa.out", + "md5_checksum": "6fd5dfbd1500a60620194b5b9a4aab8a", + "id": "nmdc:6fd5dfbd1500a60620194b5b9a4aab8a", + "file_size_bytes": 1190 + }, + { + "name": "Gp0115669_high-quality and medium-quality bins", + "description": "high-quality and medium-quality bins for Gp0115669", + "data_object_type": "Metagenome Bins", + "url": "https://data.microbiomedata.org/data/nmdc:mga0k85x37/MAGs/nmdc_mga0k85x37_hqmq_bin.zip", + "md5_checksum": "6a7eb248822ec0994ddeffe8b5aae7b1", + "id": "nmdc:6a7eb248822ec0994ddeffe8b5aae7b1", + "file_size_bytes": 681479 + }, + { + "name": "Gp0115669_metabat2 bins", + "description": "metabat2 bins for Gp0115669", + "url": "https://data.microbiomedata.org/data/nmdc:mga0k85x37/MAGs/nmdc_mga0k85x37_metabat_bin.zip", + "md5_checksum": "6a80769f6812a45615890cc2b03e9abf", + "id": "nmdc:6a80769f6812a45615890cc2b03e9abf", + "file_size_bytes": 359752 + }, + { + "_id": { + "$oid": "649b003c1ae706d7b5b14d78" + }, + "description": "Assembled contigs fasta for gold:Gp0115669", + "url": "https://data.microbiomedata.org/data/1781_86097/assembly/assembly_contigs.fna", + "file_size_bytes": 58380875, + "type": "nmdc:DataObject", + "id": "nmdc:17cff5e222ad522c357863eb39418117", + "name": "assembly_contigs.fna", + "data_object_type": "Assembly Contigs" + }, + { + "_id": { + "$oid": "649b003c1ae706d7b5b14d7a" + }, + "description": "Metagenome Contig Coverage Stats for gold:Gp0115669", + "url": "https://data.microbiomedata.org/data/1781_86097/assembly/mapping_stats.txt", + "file_size_bytes": 8408070, + "type": "nmdc:DataObject", + "id": "nmdc:3f087e100be127e3b95dae0eeff2cb95", + "name": "mapping_stats.txt", + "data_object_type": "Assembly Coverage Stats" + }, + { + "_id": { + "$oid": "649b003c1ae706d7b5b14d7c" + }, + "description": "Assembled scaffold fasta for gold:Gp0115669", + "url": "https://data.microbiomedata.org/data/1781_86097/assembly/assembly_scaffolds.fna", + "file_size_bytes": 58037702, + "type": "nmdc:DataObject", + "id": "nmdc:72840aa9e6a9a5b8e1ca113008cf44b1", + "name": "assembly_scaffolds.fna", + "data_object_type": "Assembly Scaffolds" + }, + { + "_id": { + "$oid": "649b003c1ae706d7b5b14d7f" + }, + "description": "Metagenome Alignment BAM file for gold:Gp0115669", + "url": "https://data.microbiomedata.org/data/1781_86097/assembly/pairedMapped_sorted.bam", + "file_size_bytes": 1913779393, + "type": "nmdc:DataObject", + "id": "nmdc:3eea7321716d25a836521bbd70da488b", + "name": "pairedMapped_sorted.bam", + "data_object_type": "Assembly Coverage BAM" + }, + { + "_id": { + "$oid": "649b003c1ae706d7b5b14d81" + }, + "description": "Assembled AGP file for gold:Gp0115669", + "url": "https://data.microbiomedata.org/data/1781_86097/assembly/assembly.agp", + "file_size_bytes": 7215836, + "type": "nmdc:DataObject", + "id": "nmdc:bcab2f9486464ccf89e94e63626cfc5c", + "name": "assembly.agp", + "data_object_type": "Assembly AGP" + }, + { + "_id": { + "$oid": "649b003d1ae706d7b5b159af" + }, + "id": "nmdc:398b32d16246f98d91f1a6952a26feba", + "name": "1781_86097.krona.html", + "description": "Gold:Gp0115669 KRONA plot HTML file", + "url": "https://data.microbiomedata.org/1781_86097/ReadbasedAnalysis/centrifuge/1781_86097.krona.html", + "file_size_bytes": 3385, + "data_object_type": "Centrifuge Krona Plot", + "type": "nmdc:DataObject" + }, + { + "_id": { + "$oid": "649b003d1ae706d7b5b159b3" + }, + "id": "nmdc:9b7edfd57bb79efa110f07d6a03bb2f4", + "name": "1781_86097.json", + "description": "Gold:Gp0115669 ReadbasedAnalysis result JSON file", + "url": "https://data.microbiomedata.org/1781_86097/ReadbasedAnalysis/1781_86097.json", + "file_size_bytes": 3385, + "type": "nmdc:DataObject" + }, + { + "_id": { + "$oid": "649b003f1ae706d7b5b1627b" + }, + "id": "nmdc:f0f1a3e612de5a76d0e0517864378138", + "name": "bins.tooShort.fa", + "description": "tooShort (< 3kb) filtered contigs fasta file by metaBat2 for gold:Gp0115669", + "file_size_bytes": 43682660, + "url": "https://data.microbiomedata.org/data/1781_86097/img_MAGs/bins.tooShort.fa", + "type": "nmdc:DataObject" + }, + { + "_id": { + "$oid": "649b003f1ae706d7b5b1627c" + }, + "id": "nmdc:2b7abbacb5fbbb936d9421e78e9116c9", + "name": "bins.unbinned.fa", + "description": "unbinned fasta file from metabat2 for gold:Gp0115669", + "file_size_bytes": 11095337, + "url": "https://data.microbiomedata.org/data/1781_86097/img_MAGs/bins.unbinned.fa", + "type": "nmdc:DataObject" + }, + { + "_id": { + "$oid": "649b003f1ae706d7b5b1627d" + }, + "id": "nmdc:ed20a53339faee3206f7eacf9031fa26", + "name": "gold:Gp0115669.bins.2.fa", + "description": "metabat2 binned contig file for gold:Gp0115669", + "file_size_bytes": 2141471, + "url": "https://data.microbiomedata.org/data/1781_86097/img_MAGs/metabat-bins/bins.2.fa", + "type": "nmdc:DataObject", + "data_object_type": "Metagenome Bins" + }, + { + "_id": { + "$oid": "649b003f1ae706d7b5b1627e" + }, + "id": "nmdc:45d197f727234e5dd5756bc48f88bf0f", + "name": "gtdbtk.bac120.summary.tsv", + "description": "gtdbtk bacterial assignment result summary table for gold:Gp0115669", + "file_size_bytes": 4804, + "url": "https://data.microbiomedata.org/data/1781_86097/img_MAGs/gtdbtk_output/classify/gtdbtk.bac120.summary.tsv", + "type": "nmdc:DataObject" + }, + { + "_id": { + "$oid": "649b003f1ae706d7b5b1627f" + }, + "id": "nmdc:c397d43fa00df5c21b4865775bea17ba", + "name": "checkm_qa.out", + "description": "metabat2 bin checkm quality assessment result for gold:Gp0115669", + "file_size_bytes": 996, + "url": "https://data.microbiomedata.org/data/1781_86097/img_MAGs/checkm_qa.out", + "type": "nmdc:DataObject", + "data_object_type": "CheckM Statistics" + }, + { + "_id": { + "$oid": "649b003f1ae706d7b5b16282" + }, + "id": "nmdc:8602045050811243d163714135d5dce5", + "name": "gold:Gp0115669.bins.1.fa", + "description": "metabat2 binned contig file for gold:Gp0115669", + "file_size_bytes": 659826, + "url": "https://data.microbiomedata.org/data/1781_86097/img_MAGs/metabat-bins/bins.1.fa", + "type": "nmdc:DataObject", + "data_object_type": "Metagenome Bins" + }, + { + "_id": { + "$oid": "649b00401ae706d7b5b16d7f" + }, + "description": "Protein FAA for gold:Gp0115669", + "url": "https://data.microbiomedata.org/1781_86097/img_annotation/Ga0482258_proteins.faa", + "md5_checksum": "6de20d427454895dce6caeb7b9543c11", + "file_size_bytes": 3385, + "id": "nmdc:6de20d427454895dce6caeb7b9543c11", + "name": "gold:Gp0115669_Protein FAA", + "data_object_type": "Annotation Amino Acid FASTA", + "type": "nmdc:DataObject" + }, + { + "_id": { + "$oid": "649b00401ae706d7b5b16d80" + }, + "description": "EC TSV File for gold:Gp0115669", + "url": "https://data.microbiomedata.org/1781_86097/img_annotation/Ga0482258_ec.tsv", + "md5_checksum": "e74cb5e168717574193a15d5ac04a01f", + "file_size_bytes": 3385, + "id": "nmdc:e74cb5e168717574193a15d5ac04a01f", + "name": "gold:Gp0115669_EC TSV File", + "data_object_type": "Annotation Enzyme Commission", + "type": "nmdc:DataObject" + }, + { + "_id": { + "$oid": "649b00401ae706d7b5b16d82" + }, + "description": "Functional annotation GFF file for gold:Gp0115669", + "url": "https://data.microbiomedata.org/1781_86097/img_annotation/Ga0482258_functional_annotation.gff", + "md5_checksum": "4f7a6e682f6f13b7ea73511265fdd2a9", + "file_size_bytes": 3385, + "id": "nmdc:4f7a6e682f6f13b7ea73511265fdd2a9", + "name": "gold:Gp0115669_Functional annotation GFF file", + "data_object_type": "Functional Annotation GFF", + "type": "nmdc:DataObject" + }, + { + "_id": { + "$oid": "649b00401ae706d7b5b16d83" + }, + "description": "Structural annotation GFF file for gold:Gp0115669", + "url": "https://data.microbiomedata.org/1781_86097/img_annotation/Ga0482258_structural_annotation.gff", + "md5_checksum": "9c68523f458ee1f8ec395e1442b1f508", + "file_size_bytes": 3385, + "id": "nmdc:9c68523f458ee1f8ec395e1442b1f508", + "name": "gold:Gp0115669_Structural annotation GFF file", + "data_object_type": "Structural Annotation GFF", + "type": "nmdc:DataObject" + }, + { + "_id": { + "$oid": "649b00401ae706d7b5b16d8c" + }, + "description": "KO TSV File for gold:Gp0115669", + "url": "https://data.microbiomedata.org/1781_86097/img_annotation/Ga0482258_ko.tsv", + "md5_checksum": "0bc9b55e2d8f3c45b18725845815bfde", + "file_size_bytes": 3385, + "id": "nmdc:0bc9b55e2d8f3c45b18725845815bfde", + "name": "gold:Gp0115669_KO TSV File", + "data_object_type": "Annotation KEGG Orthology", + "type": "nmdc:DataObject" + } + ], + "mags_activity_set": [ + { + "_id": { + "$oid": "649b0052ec087f6bbab34735" + }, + "has_input": [ + "nmdc:03eb095e55df50d639fab237d06c14ac", + "nmdc:568b82cb6038fec5df04c30cbd874098", + "nmdc:950b8c4ebd1da50e2ca079273540f3af" + ], + "too_short_contig_num": 107191, + "part_of": [ + "nmdc:mga0k85x37" + ], + "binned_contig_num": 651, + "git_url": "https://github.com/microbiomedata/metaG/releases/tag/0.1", + "has_output": [ + "nmdc:d41d8cd98f00b204e9800998ecf8427e", + "nmdc:420b015f88d0b88ab582805f39ed2b47", + "nmdc:ee8a556be3a57008c1c05ff9fe83437e", + "nmdc:6fd5dfbd1500a60620194b5b9a4aab8a", + "nmdc:6a7eb248822ec0994ddeffe8b5aae7b1", + "nmdc:6a80769f6812a45615890cc2b03e9abf" + ], + "was_informed_by": "gold:Gp0115669", + "input_contig_num": 114113, + "id": "nmdc:ed7745adccd65c2dd20dfa24c2922db2", + "execution_resource": "NERSC-Cori", + "name": "MAGs Analysis Activity for nmdc:mga0k85x37", + "mags_list": [ + { + "number_of_contig": 48, + "completeness": 13.04, + "bin_name": "bins.1", + "gene_count": 245, + "bin_quality": "LQ", + "gtdbtk_species": "", + "gtdbtk_order": "", + "num_16s": 0, + "gtdbtk_family": "", + "gtdbtk_domain": "", + "contamination": 0.0, + "gtdbtk_class": "", + "gtdbtk_phylum": "", + "num_5s": 0, + "num_23s": 0, + "gtdbtk_genus": "", + "num_t_rna": 5 + }, + { + "number_of_contig": 379, + "completeness": 72.42, + "bin_name": "bins.2", + "gene_count": 2513, + "bin_quality": "MQ", + "gtdbtk_species": "", + "gtdbtk_order": "Sphingomonadales", + "num_16s": 0, + "gtdbtk_family": "Sphingomonadaceae", + "gtdbtk_domain": "Bacteria", + "contamination": 1.85, + "gtdbtk_class": "Alphaproteobacteria", + "gtdbtk_phylum": "Proteobacteria", + "num_5s": 0, + "num_23s": 0, + "gtdbtk_genus": "Novosphingobium", + "num_t_rna": 32 + }, + { + "number_of_contig": 224, + "completeness": 29.36, + "bin_name": "bins.3", + "gene_count": 1148, + "bin_quality": "LQ", + "gtdbtk_species": "", + "gtdbtk_order": "", + "num_16s": 0, + "gtdbtk_family": "", + "gtdbtk_domain": "", + "contamination": 0.43, + "gtdbtk_class": "", + "gtdbtk_phylum": "", + "num_5s": 0, + "num_23s": 0, + "gtdbtk_genus": "", + "num_t_rna": 13 + } + ], + "unbinned_contig_num": 6271, + "started_at_time": "2021-10-11T02:28:43Z", + "type": "nmdc:MAGsAnalysisActivity", + "ended_at_time": "2021-10-11T04:20:07+00:00" + } + ], + "metagenome_annotation_activity_set": [ + { + "_id": { + "$oid": "649b005bbf2caae0415ef9d0" + }, + "has_input": [ + "nmdc:03eb095e55df50d639fab237d06c14ac" + ], + "part_of": [ + "nmdc:mga0k85x37" + ], + "git_url": "https://github.com/microbiomedata/metaG/releases/tag/0.1", + "has_output": [ + "nmdc:8a5f288604c61556ff3e827725864fd1", + "nmdc:0180998d6f3a3021638f04d9c0b35019", + "nmdc:950b8c4ebd1da50e2ca079273540f3af", + "nmdc:96ec49c6124cf4f8f3e7da3525348477", + "nmdc:12ca374a58bf899e42ed2c191a239e71", + "nmdc:b8ae2993aa29c8e04c00580dfdb82650", + "nmdc:7901c83b5a41e54854c96ab0b081ebd6", + "nmdc:762fe35b733dd82f89f5dce44fa54ed1", + "nmdc:661b70d6f41a44fcc1913b101f79d86a", + "nmdc:e1843a865023d75edd3139c14b8c355e", + "nmdc:a21449989b0b0884901602528b3f423e", + "nmdc:7f52547663f4eeea33de1e437012981e" + ], + "was_informed_by": "gold:Gp0115669", + "id": "nmdc:ed7745adccd65c2dd20dfa24c2922db2", + "execution_resource": "NERSC-Cori", + "name": "Annotation Activity for nmdc:mga0k85x37", + "started_at_time": "2021-10-11T02:28:43Z", + "type": "nmdc:MetagenomeAnnotationActivity", + "ended_at_time": "2021-10-11T04:20:07+00:00" + } + ], + "metagenome_assembly_set": [ + { + "_id": { + "$oid": "649b005f2ca5ee4adb139fba" + }, + "has_input": [ + "nmdc:6eef104db92b99c9741b26c667d75cd9" + ], + "part_of": [ + "nmdc:mga0k85x37" + ], + "ctg_logsum": 151663, + "scaf_logsum": 152336, + "gap_pct": 0.00222, + "git_url": "https://github.com/microbiomedata/metaG/releases/tag/0.1", + "has_output": [ + "nmdc:03eb095e55df50d639fab237d06c14ac", + "nmdc:569cb5da239e82dce1b40bfa7e2fd518", + "nmdc:b77ef3014c80797cc88509adf02be002", + "nmdc:62d08517e0ba0f991f2d8bbd66061d78", + "nmdc:568b82cb6038fec5df04c30cbd874098" + ], + "asm_score": 4.733, + "was_informed_by": "gold:Gp0115669", + "ctg_powsum": 17017, + "scaf_max": 20100, + "id": "nmdc:ed7745adccd65c2dd20dfa24c2922db2", + "scaf_powsum": 17101, + "execution_resource": "NERSC-Cori", + "contigs": 114114, + "name": "Assembly Activity for nmdc:mga0k85x37", + "ctg_max": 20100, + "gc_std": 0.11871, + "contig_bp": 54567489, + "gc_avg": 0.55923, + "started_at_time": "2021-10-11T02:28:43Z", + "scaf_bp": 54568699, + "type": "nmdc:MetagenomeAssembly", + "scaffolds": 114011, + "ended_at_time": "2021-10-11T04:20:07+00:00", + "ctg_l50": 451, + "ctg_l90": 285, + "ctg_n50": 29019, + "ctg_n90": 94816, + "scaf_l50": 451, + "scaf_l90": 285, + "scaf_n50": 28976, + "scaf_n90": 94720 + } + ], + "omics_processing_set": [ + { + "_id": { + "$oid": "649b009773e8249959349b52" + }, + "id": "nmdc:omprc-11-hqmmwn16", + "name": "Sand microcosm microbial communities from a hyporheic zone in Columbia River, Washington, USA - GW-RW T4_1-July-14", + "description": "Sterilized sand packs were incubated back in the ground and collected at time point T4.", + "has_input": [ + "nmdc:bsm-11-47nxfg85" + ], + "has_output": [ + "jgi:55d817fe0d8785342fcf8276" + ], + "part_of": [ + "nmdc:sty-11-aygzgv51" + ], + "add_date": "2015-05-28", + "mod_date": "2021-06-15", + "ncbi_project_name": "Sand microcosm microbial communities from a hyporheic zone in Columbia River, Washington, USA - GW-RW T4_1-July-14", + "omics_type": { + "has_raw_value": "Metagenome" + }, + "principal_investigator": { + "has_raw_value": "James Stegen" + }, + "processing_institution": "JGI", + "type": "nmdc:OmicsProcessing", + "gold_sequencing_project_identifiers": [ + "gold:Gp0115669" + ] + } + ], + "read_qc_analysis_activity_set": [ + { + "_id": { + "$oid": "649b009d6bdd4fd20273c88d" + }, + "has_input": [ + "nmdc:f18b96b7d225d2f64f7b29015150113f" + ], + "part_of": [ + "nmdc:mga0k85x37" + ], + "git_url": "https://github.com/microbiomedata/metaG/releases/tag/0.1", + "has_output": [ + "nmdc:6eef104db92b99c9741b26c667d75cd9", + "nmdc:58fde3e96dbb28af9133bede850a2653" + ], + "was_informed_by": "gold:Gp0115669", + "input_read_count": 20957834, + "output_read_bases": 3065138996, + "id": "nmdc:ed7745adccd65c2dd20dfa24c2922db2", + "execution_resource": "NERSC-Cori", + "input_read_bases": 3164632934, + "name": "Read QC Activity for nmdc:mga0k85x37", + "output_read_count": 20454422, + "started_at_time": "2021-10-11T02:28:43Z", + "type": "nmdc:ReadQCAnalysisActivity", + "ended_at_time": "2021-10-11T04:20:07+00:00" + } + ], + "read_based_taxonomy_analysis_activity_set": [ + { + "_id": { + "$oid": "649b009bff710ae353f8cf51" + }, + "has_input": [ + "nmdc:6eef104db92b99c9741b26c667d75cd9" + ], + "git_url": "https://github.com/microbiomedata/metaG/releases/tag/0.1", + "has_output": [ + "nmdc:05933784d02331b60b2531e2025cd3b7", + "nmdc:50fc279637cb7048aaaeec9b223d0286", + "nmdc:c3add9c5d34e3ca719096ba3ba9b1c08", + "nmdc:2777a04ec7e23aff356bb4f2733e55b7", + "nmdc:de45d70cc01749e9b5691dc24674545d", + "nmdc:534f97f3792b74385c4da305196a1b1d", + "nmdc:fc3e489df923ec344ac0cce7316f49d6", + "nmdc:07b6457a094fab96563168ed287dc59f", + "nmdc:164a1bc50e8d6509446ae2877be8231c" + ], + "was_informed_by": "gold:Gp0115669", + "id": "nmdc:ed7745adccd65c2dd20dfa24c2922db2", + "execution_resource": "NERSC-Cori", + "name": "ReadBased Analysis Activity for nmdc:mga0k85x37", + "started_at_time": "2021-10-11T02:28:43Z", + "type": "nmdc:ReadbasedAnalysis", + "ended_at_time": "2021-10-11T04:20:07+00:00" + } + ] + }, + { + "data_object_set": [ + { + "description": "Raw sequencer read data", + "file_size_bytes": 3054717241, + "type": "nmdc:DataObject", + "id": "jgi:55d7402b0d8785342fcf7e3c", + "name": "9422.8.132674.GAGTGG.fastq.gz" + }, + { + "name": "Gp0115672_Filtered Reads", + "description": "Filtered Reads for Gp0115672", + "data_object_type": "Filtered Sequencing Reads", + "url": "https://data.microbiomedata.org/data/nmdc:mga0cwhj53/qa/nmdc_mga0cwhj53_filtered.fastq.gz", + "md5_checksum": "eb516fb673793f5161fb634fc19de310", + "id": "nmdc:eb516fb673793f5161fb634fc19de310", + "file_size_bytes": 2704299418 + }, + { + "name": "Gp0115672_Filtered Stats", + "description": "Filtered Stats for Gp0115672", + "data_object_type": "QC Statistics", + "url": "https://data.microbiomedata.org/data/nmdc:mga0cwhj53/qa/nmdc_mga0cwhj53_filterStats.txt", + "md5_checksum": "f4b68d1bd25f8d2fa8986aeef5fbec3f", + "id": "nmdc:f4b68d1bd25f8d2fa8986aeef5fbec3f", + "file_size_bytes": 290 + }, + { + "name": "Gp0115672_Gottcha2 TSV report", + "description": "Gottcha2 TSV report for Gp0115672", + "url": "https://data.microbiomedata.org/data/nmdc:mga0cwhj53/ReadbasedAnalysis/nmdc_mga0cwhj53_gottcha2_report.tsv", + "md5_checksum": "5a9326e2e450663a5ed8c97389136b25", + "id": "nmdc:5a9326e2e450663a5ed8c97389136b25", + "file_size_bytes": 15806 + }, + { + "name": "Gp0115672_Gottcha2 full TSV report", + "description": "Gottcha2 full TSV report for Gp0115672", + "url": "https://data.microbiomedata.org/data/nmdc:mga0cwhj53/ReadbasedAnalysis/nmdc_mga0cwhj53_gottcha2_report_full.tsv", + "md5_checksum": "6044f2e33e0dd3e951484e9c50ae10f4", + "id": "nmdc:6044f2e33e0dd3e951484e9c50ae10f4", + "file_size_bytes": 1142479 + }, + { + "name": "Gp0115672_Gottcha2 Krona HTML report", + "description": "Gottcha2 Krona HTML report for Gp0115672", + "data_object_type": "GOTTCHA2 Krona Plot", + "url": "https://data.microbiomedata.org/data/nmdc:mga0cwhj53/ReadbasedAnalysis/nmdc_mga0cwhj53_gottcha2_krona.html", + "md5_checksum": "39a46887587926c9b81e126bb1036005", + "id": "nmdc:39a46887587926c9b81e126bb1036005", + "file_size_bytes": 273611 + }, + { + "name": "Gp0115672_Centrifuge classification TSV report", + "description": "Centrifuge classification TSV report for Gp0115672", + "data_object_type": "Centrifuge Taxonomic Classification", + "url": "https://data.microbiomedata.org/data/nmdc:mga0cwhj53/ReadbasedAnalysis/nmdc_mga0cwhj53_centrifuge_classification.tsv", + "md5_checksum": "b8dde2c047141d9097317c86f723eded", + "id": "nmdc:b8dde2c047141d9097317c86f723eded", + "file_size_bytes": 2436637487 + }, + { + "name": "Gp0115672_Centrifuge TSV report", + "description": "Centrifuge TSV report for Gp0115672", + "data_object_type": "Centrifuge Classification Report", + "url": "https://data.microbiomedata.org/data/nmdc:mga0cwhj53/ReadbasedAnalysis/nmdc_mga0cwhj53_centrifuge_report.tsv", + "md5_checksum": "d530342b37f0785f92650e9650f31d6a", + "id": "nmdc:d530342b37f0785f92650e9650f31d6a", + "file_size_bytes": 261520 + }, + { + "name": "Gp0115672_Centrifuge Krona HTML report", + "description": "Centrifuge Krona HTML report for Gp0115672", + "data_object_type": "Centrifuge Krona Plot", + "url": "https://data.microbiomedata.org/data/nmdc:mga0cwhj53/ReadbasedAnalysis/nmdc_mga0cwhj53_centrifuge_krona.html", + "md5_checksum": "6672aa851b5d39d7381211232b4f6cb2", + "id": "nmdc:6672aa851b5d39d7381211232b4f6cb2", + "file_size_bytes": 2342832 + }, + { + "name": "Gp0115672_Kraken classification TSV report", + "description": "Kraken classification TSV report for Gp0115672", + "data_object_type": "Kraken2 Taxonomic Classification", + "url": "https://data.microbiomedata.org/data/nmdc:mga0cwhj53/ReadbasedAnalysis/nmdc_mga0cwhj53_kraken2_classification.tsv", + "md5_checksum": "61e3c875231ae8999b5aa1dbf7d55cca", + "id": "nmdc:61e3c875231ae8999b5aa1dbf7d55cca", + "file_size_bytes": 1993150715 + }, + { + "name": "Gp0115672_Kraken2 TSV report", + "description": "Kraken2 TSV report for Gp0115672", + "data_object_type": "Kraken2 Classification Report", + "url": "https://data.microbiomedata.org/data/nmdc:mga0cwhj53/ReadbasedAnalysis/nmdc_mga0cwhj53_kraken2_report.tsv", + "md5_checksum": "3049835ed4e3533acce49e9cc60b03fc", + "id": "nmdc:3049835ed4e3533acce49e9cc60b03fc", + "file_size_bytes": 693572 + }, + { + "name": "Gp0115672_Kraken2 Krona HTML report", + "description": "Kraken2 Krona HTML report for Gp0115672", + "data_object_type": "Kraken2 Krona Plot", + "url": "https://data.microbiomedata.org/data/nmdc:mga0cwhj53/ReadbasedAnalysis/nmdc_mga0cwhj53_kraken2_krona.html", + "md5_checksum": "3266e79813577aae1d4377c62e73332c", + "id": "nmdc:3266e79813577aae1d4377c62e73332c", + "file_size_bytes": 4177114 + }, + { + "name": "Gp0115672_Assembled contigs fasta", + "description": "Assembled contigs fasta for Gp0115672", + "data_object_type": "Assembly Contigs", + "url": "https://data.microbiomedata.org/data/nmdc:mga0cwhj53/assembly/nmdc_mga0cwhj53_contigs.fna", + "md5_checksum": "6f762f7b079f8c2633ef674a8264879f", + "id": "nmdc:6f762f7b079f8c2633ef674a8264879f", + "file_size_bytes": 129321165 + }, + { + "name": "Gp0115672_Assembled scaffold fasta", + "description": "Assembled scaffold fasta for Gp0115672", + "data_object_type": "Assembly Scaffolds", + "url": "https://data.microbiomedata.org/data/nmdc:mga0cwhj53/assembly/nmdc_mga0cwhj53_scaffolds.fna", + "md5_checksum": "26cc1c91f5f5e79d50041ff4623398b5", + "id": "nmdc:26cc1c91f5f5e79d50041ff4623398b5", + "file_size_bytes": 128655263 + }, + { + "name": "Gp0115672_Metagenome Contig Coverage Stats", + "description": "Metagenome Contig Coverage Stats for Gp0115672", + "url": "https://data.microbiomedata.org/data/nmdc:mga0cwhj53/assembly/nmdc_mga0cwhj53_covstats.txt", + "md5_checksum": "bd9d5497c4e2e0ea61df1f3f239107f7", + "id": "nmdc:bd9d5497c4e2e0ea61df1f3f239107f7", + "file_size_bytes": 17496249 + }, + { + "name": "Gp0115672_Assembled AGP file", + "description": "Assembled AGP file for Gp0115672", + "data_object_type": "Assembly AGP", + "url": "https://data.microbiomedata.org/data/nmdc:mga0cwhj53/assembly/nmdc_mga0cwhj53_assembly.agp", + "md5_checksum": "362a9857666fe2f4e90bf6a818f551cc", + "id": "nmdc:362a9857666fe2f4e90bf6a818f551cc", + "file_size_bytes": 16401188 + }, + { + "name": "Gp0115672_Metagenome Alignment BAM file", + "description": "Metagenome Alignment BAM file for Gp0115672", + "data_object_type": "Assembly Coverage BAM", + "url": "https://data.microbiomedata.org/data/nmdc:mga0cwhj53/assembly/nmdc_mga0cwhj53_pairedMapped_sorted.bam", + "md5_checksum": "afd1d03b38bc5deb9c196264bcea8795", + "id": "nmdc:afd1d03b38bc5deb9c196264bcea8795", + "file_size_bytes": 2952467259 + }, + { + "name": "Gp0115672_Protein FAA", + "description": "Protein FAA for Gp0115672", + "data_object_type": "Annotation Amino Acid FASTA", + "url": "https://data.microbiomedata.org/data/nmdc:mga0cwhj53/annotation/nmdc_mga0cwhj53_proteins.faa", + "md5_checksum": "84e3590be0f59007275fdf459d464f74", + "id": "nmdc:84e3590be0f59007275fdf459d464f74", + "file_size_bytes": 71651089 + }, + { + "name": "Gp0115672_Structural annotation GFF file", + "description": "Structural annotation GFF file for Gp0115672", + "data_object_type": "Structural Annotation GFF", + "url": "https://data.microbiomedata.org/data/nmdc:mga0cwhj53/annotation/nmdc_mga0cwhj53_structural_annotation.gff", + "md5_checksum": "7dd630b842f587768235714e8a95f377", + "id": "nmdc:7dd630b842f587768235714e8a95f377", + "file_size_bytes": 2534 + }, + { + "name": "Gp0115672_Functional annotation GFF file", + "description": "Functional annotation GFF file for Gp0115672", + "data_object_type": "Functional Annotation GFF", + "url": "https://data.microbiomedata.org/data/nmdc:mga0cwhj53/annotation/nmdc_mga0cwhj53_functional_annotation.gff", + "md5_checksum": "38d776837c2208b557e2e4e5428c879d", + "id": "nmdc:38d776837c2208b557e2e4e5428c879d", + "file_size_bytes": 78213025 + }, + { + "name": "Gp0115672_KO TSV file", + "description": "KO TSV file for Gp0115672", + "data_object_type": "Annotation KEGG Orthology", + "url": "https://data.microbiomedata.org/data/nmdc:mga0cwhj53/annotation/nmdc_mga0cwhj53_ko.tsv", + "md5_checksum": "e38cb3355892042cb02580c26c083cd9", + "id": "nmdc:e38cb3355892042cb02580c26c083cd9", + "file_size_bytes": 10621211 + }, + { + "name": "Gp0115672_EC TSV file", + "description": "EC TSV file for Gp0115672", + "data_object_type": "Annotation Enzyme Commission", + "url": "https://data.microbiomedata.org/data/nmdc:mga0cwhj53/annotation/nmdc_mga0cwhj53_ec.tsv", + "md5_checksum": "d55119e8f094efa075c44b22e8b2f689", + "id": "nmdc:d55119e8f094efa075c44b22e8b2f689", + "file_size_bytes": 6814564 + }, + { + "name": "Gp0115672_COG GFF file", + "description": "COG GFF file for Gp0115672", + "url": "https://data.microbiomedata.org/data/nmdc:mga0cwhj53/annotation/nmdc_mga0cwhj53_cog.gff", + "md5_checksum": "02a9ad5732172f04d1da83d145f63226", + "id": "nmdc:02a9ad5732172f04d1da83d145f63226", + "file_size_bytes": 45617917 + }, + { + "name": "Gp0115672_PFAM GFF file", + "description": "PFAM GFF file for Gp0115672", + "url": "https://data.microbiomedata.org/data/nmdc:mga0cwhj53/annotation/nmdc_mga0cwhj53_pfam.gff", + "md5_checksum": "73811b72087e57f23db32f4a0ca4fb9c", + "id": "nmdc:73811b72087e57f23db32f4a0ca4fb9c", + "file_size_bytes": 37040943 + }, + { + "name": "Gp0115672_TigrFam GFF file", + "description": "TigrFam GFF file for Gp0115672", + "url": "https://data.microbiomedata.org/data/nmdc:mga0cwhj53/annotation/nmdc_mga0cwhj53_tigrfam.gff", + "md5_checksum": "dfc18c0f97e80c14ca6ca1bc2ba7a809", + "id": "nmdc:dfc18c0f97e80c14ca6ca1bc2ba7a809", + "file_size_bytes": 5380314 + }, + { + "name": "Gp0115672_SMART GFF file", + "description": "SMART GFF file for Gp0115672", + "url": "https://data.microbiomedata.org/data/nmdc:mga0cwhj53/annotation/nmdc_mga0cwhj53_smart.gff", + "md5_checksum": "5a843529ffac8227515c5ea399ee4815", + "id": "nmdc:5a843529ffac8227515c5ea399ee4815", + "file_size_bytes": 10141642 + }, + { + "name": "Gp0115672_SuperFam GFF file", + "description": "SuperFam GFF file for Gp0115672", + "url": "https://data.microbiomedata.org/data/nmdc:mga0cwhj53/annotation/nmdc_mga0cwhj53_supfam.gff", + "md5_checksum": "82ac29a9999c6bc097cb0f35e4177e35", + "id": "nmdc:82ac29a9999c6bc097cb0f35e4177e35", + "file_size_bytes": 56808220 + }, + { + "name": "Gp0115672_Cath FunFam GFF file", + "description": "Cath FunFam GFF file for Gp0115672", + "url": "https://data.microbiomedata.org/data/nmdc:mga0cwhj53/annotation/nmdc_mga0cwhj53_cath_funfam.gff", + "md5_checksum": "5b0e8395559ef0d8a341ae0e132e60f6", + "id": "nmdc:5b0e8395559ef0d8a341ae0e132e60f6", + "file_size_bytes": 45632833 + }, + { + "name": "Gp0115672_KO_EC GFF file", + "description": "KO_EC GFF file for Gp0115672", + "url": "https://data.microbiomedata.org/data/nmdc:mga0cwhj53/annotation/nmdc_mga0cwhj53_ko_ec.gff", + "md5_checksum": "1e74c3df751a59a34e5c0d87f4a37563", + "id": "nmdc:1e74c3df751a59a34e5c0d87f4a37563", + "file_size_bytes": 33782864 + }, + { + "name": "gold:Gp0452679_metabat2 bin checkm quality assessment result", + "description": "metabat2 bin checkm quality assessment result for gold:Gp0452679", + "data_object_type": "CheckM Statistics", + "url": "https://data.microbiomedata.org/data/nmdc:mga0fsjy84/MAGs/nmdc_mga0fsjy84_checkm_qa.out", + "md5_checksum": "d41d8cd98f00b204e9800998ecf8427e", + "id": "nmdc:d41d8cd98f00b204e9800998ecf8427e", + "file_size_bytes": 0 + }, + { + "name": "Gp0115672_tooShort (< 3kb) filtered contigs fasta file by metaBat2", + "description": "tooShort (< 3kb) filtered contigs fasta file by metaBat2 for Gp0115672", + "url": "https://data.microbiomedata.org/data/nmdc:mga0cwhj53/MAGs/nmdc_mga0cwhj53_bins.tooShort.fa", + "md5_checksum": "2b6e0195e34697039eff38b51026be24", + "id": "nmdc:2b6e0195e34697039eff38b51026be24", + "file_size_bytes": 91055942 + }, + { + "name": "Gp0115672_unbinned fasta file from metabat2", + "description": "unbinned fasta file from metabat2 for Gp0115672", + "url": "https://data.microbiomedata.org/data/nmdc:mga0cwhj53/MAGs/nmdc_mga0cwhj53_bins.unbinned.fa", + "md5_checksum": "f02d361fbef7549e2289bf4da623787d", + "id": "nmdc:f02d361fbef7549e2289bf4da623787d", + "file_size_bytes": 23202832 + }, + { + "name": "Gp0115672_metabat2 bin checkm quality assessment result", + "description": "metabat2 bin checkm quality assessment result for Gp0115672", + "data_object_type": "CheckM Statistics", + "url": "https://data.microbiomedata.org/data/nmdc:mga0cwhj53/MAGs/nmdc_mga0cwhj53_checkm_qa.out", + "md5_checksum": "2de282e5507477269238ead458f11ac0", + "id": "nmdc:2de282e5507477269238ead458f11ac0", + "file_size_bytes": 2040 + }, + { + "name": "Gp0115672_high-quality and medium-quality bins", + "description": "high-quality and medium-quality bins for Gp0115672", + "data_object_type": "Metagenome Bins", + "url": "https://data.microbiomedata.org/data/nmdc:mga0cwhj53/MAGs/nmdc_mga0cwhj53_hqmq_bin.zip", + "md5_checksum": "3abae1a573f9f0ac6da47e1ab9b9a723", + "id": "nmdc:3abae1a573f9f0ac6da47e1ab9b9a723", + "file_size_bytes": 1815861 + }, + { + "name": "Gp0115672_metabat2 bins", + "description": "metabat2 bins for Gp0115672", + "url": "https://data.microbiomedata.org/data/nmdc:mga0cwhj53/MAGs/nmdc_mga0cwhj53_metabat_bin.zip", + "md5_checksum": "4d315d8dac1d9605d110ff2298b10229", + "id": "nmdc:4d315d8dac1d9605d110ff2298b10229", + "file_size_bytes": 2757900 + }, + { + "_id": { + "$oid": "649b003c1ae706d7b5b14d8b" + }, + "description": "Metagenome Contig Coverage Stats for gold:Gp0115672", + "url": "https://data.microbiomedata.org/data/1781_86103/assembly/mapping_stats.txt", + "file_size_bytes": 16391024, + "type": "nmdc:DataObject", + "id": "nmdc:b5be8aa1d11106aabbcf86f4a31e558b", + "name": "mapping_stats.txt", + "data_object_type": "Assembly Coverage Stats" + }, + { + "_id": { + "$oid": "649b003c1ae706d7b5b14d8d" + }, + "description": "Assembled contigs fasta for gold:Gp0115672", + "url": "https://data.microbiomedata.org/data/1781_86103/assembly/assembly_contigs.fna", + "file_size_bytes": 128215940, + "type": "nmdc:DataObject", + "id": "nmdc:f74d007a0d55515291e2ab3ecd50461f", + "name": "assembly_contigs.fna", + "data_object_type": "Assembly Contigs" + }, + { + "_id": { + "$oid": "649b003c1ae706d7b5b14d8e" + }, + "description": "Assembled AGP file for gold:Gp0115672", + "url": "https://data.microbiomedata.org/data/1781_86103/assembly/assembly.agp", + "file_size_bytes": 14188798, + "type": "nmdc:DataObject", + "id": "nmdc:39b43fc42da1d32ab929d57555ff63ee", + "name": "assembly.agp", + "data_object_type": "Assembly AGP" + }, + { + "_id": { + "$oid": "649b003c1ae706d7b5b14d8f" + }, + "description": "Metagenome Alignment BAM file for gold:Gp0115672", + "url": "https://data.microbiomedata.org/data/1781_86103/assembly/pairedMapped_sorted.bam", + "file_size_bytes": 2905683228, + "type": "nmdc:DataObject", + "id": "nmdc:c01fcbe10ff6779259fbe584b123b82d", + "name": "pairedMapped_sorted.bam", + "data_object_type": "Assembly Coverage BAM" + }, + { + "_id": { + "$oid": "649b003c1ae706d7b5b14d92" + }, + "description": "Assembled scaffold fasta for gold:Gp0115672", + "url": "https://data.microbiomedata.org/data/1781_86103/assembly/assembly_scaffolds.fna", + "file_size_bytes": 127550998, + "type": "nmdc:DataObject", + "id": "nmdc:49c6eb2c5d792edf921a7226b03351bf", + "name": "assembly_scaffolds.fna", + "data_object_type": "Assembly Scaffolds" + }, + { + "_id": { + "$oid": "649b003d1ae706d7b5b159ca" + }, + "id": "nmdc:bc647f348d91e409e4125941b495ff13", + "name": "1781_86103.krona.html", + "description": "Gold:Gp0115672 KRONA plot HTML file", + "url": "https://data.microbiomedata.org/1781_86103/ReadbasedAnalysis/centrifuge/1781_86103.krona.html", + "file_size_bytes": 3385, + "data_object_type": "Centrifuge Krona Plot", + "type": "nmdc:DataObject" + }, + { + "_id": { + "$oid": "649b003d1ae706d7b5b159cf" + }, + "id": "nmdc:986355b49a83d2548afbc1792128513e", + "name": "1781_86103.json", + "description": "Gold:Gp0115672 ReadbasedAnalysis result JSON file", + "url": "https://data.microbiomedata.org/1781_86103/ReadbasedAnalysis/1781_86103.json", + "file_size_bytes": 3385, + "type": "nmdc:DataObject" + }, + { + "_id": { + "$oid": "649b003f1ae706d7b5b16297" + }, + "id": "nmdc:0c7691992c142a735412ded115a1debd", + "name": "bins.tooShort.fa", + "description": "tooShort (< 3kb) filtered contigs fasta file by metaBat2 for gold:Gp0115672", + "file_size_bytes": 88459668, + "url": "https://data.microbiomedata.org/data/1781_86103/img_MAGs/bins.tooShort.fa", + "type": "nmdc:DataObject" + }, + { + "_id": { + "$oid": "649b003f1ae706d7b5b1629c" + }, + "id": "nmdc:521024b7b73f146c2b00dba84fb2d303", + "name": "bins.unbinned.fa", + "description": "unbinned fasta file from metabat2 for gold:Gp0115672", + "file_size_bytes": 24826673, + "url": "https://data.microbiomedata.org/data/1781_86103/img_MAGs/bins.unbinned.fa", + "type": "nmdc:DataObject" + }, + { + "_id": { + "$oid": "649b003f1ae706d7b5b1629d" + }, + "id": "nmdc:d6bee3893d2f63b687bc6078ce48dc2e", + "name": "gtdbtk.bac120.summary.tsv", + "description": "gtdbtk bacterial assignment result summary table for gold:Gp0115672", + "file_size_bytes": 815, + "url": "https://data.microbiomedata.org/data/1781_86103/img_MAGs/gtdbtk_output/classify/gtdbtk.bac120.summary.tsv", + "type": "nmdc:DataObject" + }, + { + "_id": { + "$oid": "649b003f1ae706d7b5b1629e" + }, + "id": "nmdc:82cfd339c167866e4b67cc4b12d7478d", + "name": "checkm_qa.out", + "description": "metabat2 bin checkm quality assessment result for gold:Gp0115672", + "file_size_bytes": 2394, + "url": "https://data.microbiomedata.org/data/1781_86103/img_MAGs/checkm_qa.out", + "type": "nmdc:DataObject", + "data_object_type": "CheckM Statistics" + }, + { + "_id": { + "$oid": "649b003f1ae706d7b5b1629f" + }, + "id": "nmdc:a939ca808857119835c8340b2a79d302", + "name": "gold:Gp0115672.bins.1.fa", + "description": "metabat2 binned contig file for gold:Gp0115672", + "file_size_bytes": 2103943, + "url": "https://data.microbiomedata.org/data/1781_86103/img_MAGs/metabat-bins/bins.1.fa", + "type": "nmdc:DataObject", + "data_object_type": "Metagenome Bins" + }, + { + "_id": { + "$oid": "649b003f1ae706d7b5b162a2" + }, + "id": "nmdc:e3e116dfc8712b6e35a071845657d1d1", + "name": "gold:Gp0115672.bins.5.fa", + "description": "metabat2 binned contig file for gold:Gp0115672", + "file_size_bytes": 374860, + "url": "https://data.microbiomedata.org/data/1781_86103/img_MAGs/metabat-bins/bins.5.fa", + "type": "nmdc:DataObject", + "data_object_type": "Metagenome Bins" + }, + { + "_id": { + "$oid": "649b003f1ae706d7b5b162a3" + }, + "id": "nmdc:e8a46f36b4956575ad78e022e604a89a", + "name": "gold:Gp0115672.bins.3.fa", + "description": "metabat2 binned contig file for gold:Gp0115672", + "file_size_bytes": 689749, + "url": "https://data.microbiomedata.org/data/1781_86103/img_MAGs/metabat-bins/bins.3.fa", + "type": "nmdc:DataObject", + "data_object_type": "Metagenome Bins" + }, + { + "_id": { + "$oid": "649b003f1ae706d7b5b162a4" + }, + "id": "nmdc:561b7c7429a778107b65ece41a39bbb8", + "name": "gold:Gp0115672.bins.9.fa", + "description": "metabat2 binned contig file for gold:Gp0115672", + "file_size_bytes": 313467, + "url": "https://data.microbiomedata.org/data/1781_86103/img_MAGs/metabat-bins/bins.9.fa", + "type": "nmdc:DataObject", + "data_object_type": "Metagenome Bins" + }, + { + "_id": { + "$oid": "649b003f1ae706d7b5b162a5" + }, + "id": "nmdc:1daea4e61ec3220e37e2c86742d9ba90", + "name": "gold:Gp0115672.bins.4.fa", + "description": "metabat2 binned contig file for gold:Gp0115672", + "file_size_bytes": 1599533, + "url": "https://data.microbiomedata.org/data/1781_86103/img_MAGs/metabat-bins/bins.4.fa", + "type": "nmdc:DataObject", + "data_object_type": "Metagenome Bins" + }, + { + "_id": { + "$oid": "649b003f1ae706d7b5b162a6" + }, + "id": "nmdc:45fa952f6821c80c16e77c526d6506c0", + "name": "gold:Gp0115672.bins.2.fa", + "description": "metabat2 binned contig file for gold:Gp0115672", + "file_size_bytes": 471200, + "url": "https://data.microbiomedata.org/data/1781_86103/img_MAGs/metabat-bins/bins.2.fa", + "type": "nmdc:DataObject", + "data_object_type": "Metagenome Bins" + }, + { + "_id": { + "$oid": "649b003f1ae706d7b5b162a7" + }, + "id": "nmdc:48a3756c08657061dbbd1b3fbd92b52d", + "name": "gold:Gp0115672.bins.7.fa", + "description": "metabat2 binned contig file for gold:Gp0115672", + "file_size_bytes": 232940, + "url": "https://data.microbiomedata.org/data/1781_86103/img_MAGs/metabat-bins/bins.7.fa", + "type": "nmdc:DataObject", + "data_object_type": "Metagenome Bins" + }, + { + "_id": { + "$oid": "649b003f1ae706d7b5b162a8" + }, + "id": "nmdc:28fcb3eb2cf1d63d9623996a438b3cae", + "name": "gold:Gp0115672.bins.8.fa", + "description": "metabat2 binned contig file for gold:Gp0115672", + "file_size_bytes": 834579, + "url": "https://data.microbiomedata.org/data/1781_86103/img_MAGs/metabat-bins/bins.8.fa", + "type": "nmdc:DataObject", + "data_object_type": "Metagenome Bins" + }, + { + "_id": { + "$oid": "649b003f1ae706d7b5b162a9" + }, + "id": "nmdc:97a872124142327afa9e896d56b3c263", + "name": "gold:Gp0115672.bins.6.fa", + "description": "metabat2 binned contig file for gold:Gp0115672", + "file_size_bytes": 943843, + "url": "https://data.microbiomedata.org/data/1781_86103/img_MAGs/metabat-bins/bins.6.fa", + "type": "nmdc:DataObject", + "data_object_type": "Metagenome Bins" + }, + { + "_id": { + "$oid": "649b003f1ae706d7b5b162aa" + }, + "id": "nmdc:20f94f48572c63758d65c10c19dc3a44", + "name": "gold:Gp0115672.bins.10.fa", + "description": "metabat2 binned contig file for gold:Gp0115672", + "file_size_bytes": 5938384, + "url": "https://data.microbiomedata.org/data/1781_86103/img_MAGs/metabat-bins/bins.10.fa", + "type": "nmdc:DataObject", + "data_object_type": "Metagenome Bins" + }, + { + "_id": { + "$oid": "649b00401ae706d7b5b16d9e" + }, + "description": "EC TSV File for gold:Gp0115672", + "url": "https://data.microbiomedata.org/1781_86103/img_annotation/Ga0482255_ec.tsv", + "md5_checksum": "e029f10a29dd5e9d81dce82c2211fdee", + "file_size_bytes": 3385, + "id": "nmdc:e029f10a29dd5e9d81dce82c2211fdee", + "name": "gold:Gp0115672_EC TSV File", + "data_object_type": "Annotation Enzyme Commission", + "type": "nmdc:DataObject" + }, + { + "_id": { + "$oid": "649b00401ae706d7b5b16d9f" + }, + "description": "KO TSV File for gold:Gp0115672", + "url": "https://data.microbiomedata.org/1781_86103/img_annotation/Ga0482255_ko.tsv", + "md5_checksum": "f6230d3d3eadab80074ecfe59a623c10", + "file_size_bytes": 3385, + "id": "nmdc:f6230d3d3eadab80074ecfe59a623c10", + "name": "gold:Gp0115672_KO TSV File", + "data_object_type": "Annotation KEGG Orthology", + "type": "nmdc:DataObject" + }, + { + "_id": { + "$oid": "649b00401ae706d7b5b16da0" + }, + "description": "Functional annotation GFF file for gold:Gp0115672", + "url": "https://data.microbiomedata.org/1781_86103/img_annotation/Ga0482255_functional_annotation.gff", + "md5_checksum": "5c1afd4ffb1b1594807fbd0901da7a88", + "file_size_bytes": 3385, + "id": "nmdc:5c1afd4ffb1b1594807fbd0901da7a88", + "name": "gold:Gp0115672_Functional annotation GFF file", + "data_object_type": "Functional Annotation GFF", + "type": "nmdc:DataObject" + }, + { + "_id": { + "$oid": "649b00401ae706d7b5b16da1" + }, + "description": "Protein FAA for gold:Gp0115672", + "url": "https://data.microbiomedata.org/1781_86103/img_annotation/Ga0482255_proteins.faa", + "md5_checksum": "b0687d58e2803a41864c9d830977402b", + "file_size_bytes": 3385, + "id": "nmdc:b0687d58e2803a41864c9d830977402b", + "name": "gold:Gp0115672_Protein FAA", + "data_object_type": "Annotation Amino Acid FASTA", + "type": "nmdc:DataObject" + }, + { + "_id": { + "$oid": "649b00401ae706d7b5b16da2" + }, + "description": "Structural annotation GFF file for gold:Gp0115672", + "url": "https://data.microbiomedata.org/1781_86103/img_annotation/Ga0482255_structural_annotation.gff", + "md5_checksum": "644d67586f9337bf4d12ff5859d4cd54", + "file_size_bytes": 3385, + "id": "nmdc:644d67586f9337bf4d12ff5859d4cd54", + "name": "gold:Gp0115672_Structural annotation GFF file", + "data_object_type": "Structural Annotation GFF", + "type": "nmdc:DataObject" + } + ], + "mags_activity_set": [ + { + "_id": { + "$oid": "649b0052ec087f6bbab34737" + }, + "has_input": [ + "nmdc:6f762f7b079f8c2633ef674a8264879f", + "nmdc:afd1d03b38bc5deb9c196264bcea8795", + "nmdc:38d776837c2208b557e2e4e5428c879d" + ], + "too_short_contig_num": 206294, + "part_of": [ + "nmdc:mga0cwhj53" + ], + "binned_contig_num": 1785, + "git_url": "https://github.com/microbiomedata/metaG/releases/tag/0.1", + "has_output": [ + "nmdc:d41d8cd98f00b204e9800998ecf8427e", + "nmdc:2b6e0195e34697039eff38b51026be24", + "nmdc:f02d361fbef7549e2289bf4da623787d", + "nmdc:2de282e5507477269238ead458f11ac0", + "nmdc:3abae1a573f9f0ac6da47e1ab9b9a723", + "nmdc:4d315d8dac1d9605d110ff2298b10229" + ], + "was_informed_by": "gold:Gp0115672", + "input_contig_num": 221045, + "id": "nmdc:50eb8825777d1294abac150521e5c2db", + "execution_resource": "NERSC-Cori", + "name": "MAGs Analysis Activity for nmdc:mga0cwhj53", + "mags_list": [ + { + "number_of_contig": 316, + "completeness": 61.03, + "bin_name": "bins.1", + "gene_count": 2148, + "bin_quality": "MQ", + "gtdbtk_species": "", + "gtdbtk_order": "Sphingomonadales", + "num_16s": 0, + "gtdbtk_family": "Sphingomonadaceae", + "gtdbtk_domain": "Bacteria", + "contamination": 0.85, + "gtdbtk_class": "Alphaproteobacteria", + "gtdbtk_phylum": "Proteobacteria", + "num_5s": 0, + "num_23s": 0, + "gtdbtk_genus": "Novosphingobium", + "num_t_rna": 19 + }, + { + "number_of_contig": 130, + "completeness": 34.64, + "bin_name": "bins.2", + "gene_count": 675, + "bin_quality": "LQ", + "gtdbtk_species": "", + "gtdbtk_order": "", + "num_16s": 0, + "gtdbtk_family": "", + "gtdbtk_domain": "", + "contamination": 0.0, + "gtdbtk_class": "", + "gtdbtk_phylum": "", + "num_5s": 0, + "num_23s": 0, + "gtdbtk_genus": "", + "num_t_rna": 5 + }, + { + "number_of_contig": 201, + "completeness": 19.13, + "bin_name": "bins.3", + "gene_count": 1000, + "bin_quality": "LQ", + "gtdbtk_species": "", + "gtdbtk_order": "", + "num_16s": 0, + "gtdbtk_family": "", + "gtdbtk_domain": "", + "contamination": 0.0, + "gtdbtk_class": "", + "gtdbtk_phylum": "", + "num_5s": 0, + "num_23s": 0, + "gtdbtk_genus": "", + "num_t_rna": 2 + }, + { + "number_of_contig": 256, + "completeness": 75.9, + "bin_name": "bins.4", + "gene_count": 2131, + "bin_quality": "MQ", + "gtdbtk_species": "UBA5335 sp002862435", + "gtdbtk_order": "UBA5335", + "num_16s": 0, + "gtdbtk_family": "UBA5335", + "gtdbtk_domain": "Bacteria", + "contamination": 1.52, + "gtdbtk_class": "Gammaproteobacteria", + "gtdbtk_phylum": "Proteobacteria", + "num_5s": 0, + "num_23s": 0, + "gtdbtk_genus": "UBA5335", + "num_t_rna": 22 + }, + { + "number_of_contig": 254, + "completeness": 100.0, + "bin_name": "bins.5", + "gene_count": 6188, + "bin_quality": "LQ", + "gtdbtk_species": "", + "gtdbtk_order": "", + "num_16s": 2, + "gtdbtk_family": "", + "gtdbtk_domain": "", + "contamination": 95.83, + "gtdbtk_class": "", + "gtdbtk_phylum": "", + "num_5s": 2, + "num_23s": 2, + "gtdbtk_genus": "", + "num_t_rna": 86 + }, + { + "number_of_contig": 106, + "completeness": 7.24, + "bin_name": "bins.6", + "gene_count": 524, + "bin_quality": "LQ", + "gtdbtk_species": "", + "gtdbtk_order": "", + "num_16s": 0, + "gtdbtk_family": "", + "gtdbtk_domain": "", + "contamination": 0.0, + "gtdbtk_class": "", + "gtdbtk_phylum": "", + "num_5s": 0, + "num_23s": 0, + "gtdbtk_genus": "", + "num_t_rna": 16 + }, + { + "number_of_contig": 306, + "completeness": 65.74, + "bin_name": "bins.7", + "gene_count": 2357, + "bin_quality": "MQ", + "gtdbtk_species": "", + "gtdbtk_order": "UBA11222", + "num_16s": 0, + "gtdbtk_family": "UBA11222", + "gtdbtk_domain": "Bacteria", + "contamination": 2.3, + "gtdbtk_class": "Alphaproteobacteria", + "gtdbtk_phylum": "Proteobacteria", + "num_5s": 0, + "num_23s": 0, + "gtdbtk_genus": "UBA11222", + "num_t_rna": 29 + }, + { + "number_of_contig": 216, + "completeness": 47.34, + "bin_name": "bins.8", + "gene_count": 1203, + "bin_quality": "LQ", + "gtdbtk_species": "", + "gtdbtk_order": "", + "num_16s": 0, + "gtdbtk_family": "", + "gtdbtk_domain": "", + "contamination": 0.0, + "gtdbtk_class": "", + "gtdbtk_phylum": "", + "num_5s": 0, + "num_23s": 0, + "gtdbtk_genus": "", + "num_t_rna": 12 + } + ], + "unbinned_contig_num": 12966, + "started_at_time": "2021-10-11T02:28:16Z", + "type": "nmdc:MAGsAnalysisActivity", + "ended_at_time": "2021-10-11T05:56:20+00:00" + } + ], + "metagenome_annotation_activity_set": [ + { + "_id": { + "$oid": "649b005bbf2caae0415ef9d1" + }, + "has_input": [ + "nmdc:6f762f7b079f8c2633ef674a8264879f" + ], + "part_of": [ + "nmdc:mga0cwhj53" + ], + "git_url": "https://github.com/microbiomedata/metaG/releases/tag/0.1", + "has_output": [ + "nmdc:84e3590be0f59007275fdf459d464f74", + "nmdc:7dd630b842f587768235714e8a95f377", + "nmdc:38d776837c2208b557e2e4e5428c879d", + "nmdc:e38cb3355892042cb02580c26c083cd9", + "nmdc:d55119e8f094efa075c44b22e8b2f689", + "nmdc:02a9ad5732172f04d1da83d145f63226", + "nmdc:73811b72087e57f23db32f4a0ca4fb9c", + "nmdc:dfc18c0f97e80c14ca6ca1bc2ba7a809", + "nmdc:5a843529ffac8227515c5ea399ee4815", + "nmdc:82ac29a9999c6bc097cb0f35e4177e35", + "nmdc:5b0e8395559ef0d8a341ae0e132e60f6", + "nmdc:1e74c3df751a59a34e5c0d87f4a37563" + ], + "was_informed_by": "gold:Gp0115672", + "id": "nmdc:50eb8825777d1294abac150521e5c2db", + "execution_resource": "NERSC-Cori", + "name": "Annotation Activity for nmdc:mga0cwhj53", + "started_at_time": "2021-10-11T02:28:16Z", + "type": "nmdc:MetagenomeAnnotationActivity", + "ended_at_time": "2021-10-11T05:56:20+00:00" + } + ], + "metagenome_assembly_set": [ + { + "_id": { + "$oid": "649b005f2ca5ee4adb139fbf" + }, + "has_input": [ + "nmdc:eb516fb673793f5161fb634fc19de310" + ], + "part_of": [ + "nmdc:mga0cwhj53" + ], + "ctg_logsum": 447149, + "scaf_logsum": 448446, + "gap_pct": 0.0019, + "git_url": "https://github.com/microbiomedata/metaG/releases/tag/0.1", + "has_output": [ + "nmdc:6f762f7b079f8c2633ef674a8264879f", + "nmdc:26cc1c91f5f5e79d50041ff4623398b5", + "nmdc:bd9d5497c4e2e0ea61df1f3f239107f7", + "nmdc:362a9857666fe2f4e90bf6a818f551cc", + "nmdc:afd1d03b38bc5deb9c196264bcea8795" + ], + "asm_score": 13.127, + "was_informed_by": "gold:Gp0115672", + "ctg_powsum": 55923, + "scaf_max": 157008, + "id": "nmdc:50eb8825777d1294abac150521e5c2db", + "scaf_powsum": 56113, + "execution_resource": "NERSC-Cori", + "contigs": 221046, + "name": "Assembly Activity for nmdc:mga0cwhj53", + "ctg_max": 157008, + "gc_std": 0.10619, + "contig_bp": 120471215, + "gc_avg": 0.56196, + "started_at_time": "2021-10-11T02:28:16Z", + "scaf_bp": 120473505, + "type": "nmdc:MetagenomeAssembly", + "scaffolds": 220853, + "ended_at_time": "2021-10-11T05:56:20+00:00", + "ctg_l50": 528, + "ctg_l90": 293, + "ctg_n50": 48327, + "ctg_n90": 178881, + "scaf_l50": 529, + "scaf_l90": 293, + "scaf_n50": 48077, + "scaf_n90": 178708, + "scaf_l_gt50k": 2147966, + "scaf_n_gt50k": 28, + "scaf_pct_gt50k": 1.7829365 + } + ], + "omics_processing_set": [ + { + "_id": { + "$oid": "649b009773e8249959349b53" + }, + "id": "nmdc:omprc-11-qsxwf517", + "name": "Sand microcosm microbial communities from a hyporheic zone in Columbia River, Washington, USA - GW-RW T4_14-Oct-14", + "description": "Sterilized sand packs were incubated back in the ground and collected at time point T4.", + "has_input": [ + "nmdc:bsm-11-sdhyr752" + ], + "has_output": [ + "jgi:55d7402b0d8785342fcf7e3c" + ], + "part_of": [ + "nmdc:sty-11-aygzgv51" + ], + "add_date": "2015-05-28", + "mod_date": "2021-06-15", + "ncbi_project_name": "Sand microcosm microbial communities from a hyporheic zone in Columbia River, Washington, USA - GW-RW T4_14-Oct-14", + "omics_type": { + "has_raw_value": "Metagenome" + }, + "principal_investigator": { + "has_raw_value": "James Stegen" + }, + "processing_institution": "JGI", + "type": "nmdc:OmicsProcessing", + "gold_sequencing_project_identifiers": [ + "gold:Gp0115672" + ] + } + ], + "read_qc_analysis_activity_set": [ + { + "_id": { + "$oid": "649b009d6bdd4fd20273c889" + }, + "has_input": [ + "nmdc:1f6998a48aec6f4008a92d2b8e17d314" + ], + "part_of": [ + "nmdc:mga0cwhj53" + ], + "git_url": "https://github.com/microbiomedata/metaG/releases/tag/0.1", + "has_output": [ + "nmdc:eb516fb673793f5161fb634fc19de310", + "nmdc:f4b68d1bd25f8d2fa8986aeef5fbec3f" + ], + "was_informed_by": "gold:Gp0115672", + "input_read_count": 34522052, + "output_read_bases": 5012430912, + "id": "nmdc:50eb8825777d1294abac150521e5c2db", + "execution_resource": "NERSC-Cori", + "input_read_bases": 5212829852, + "name": "Read QC Activity for nmdc:mga0cwhj53", + "output_read_count": 33454554, + "started_at_time": "2021-10-11T02:28:16Z", + "type": "nmdc:ReadQCAnalysisActivity", + "ended_at_time": "2021-10-11T05:56:20+00:00" + } + ], + "read_based_taxonomy_analysis_activity_set": [ + { + "_id": { + "$oid": "649b009bff710ae353f8cf52" + }, + "has_input": [ + "nmdc:eb516fb673793f5161fb634fc19de310" + ], + "git_url": "https://github.com/microbiomedata/metaG/releases/tag/0.1", + "has_output": [ + "nmdc:5a9326e2e450663a5ed8c97389136b25", + "nmdc:6044f2e33e0dd3e951484e9c50ae10f4", + "nmdc:39a46887587926c9b81e126bb1036005", + "nmdc:b8dde2c047141d9097317c86f723eded", + "nmdc:d530342b37f0785f92650e9650f31d6a", + "nmdc:6672aa851b5d39d7381211232b4f6cb2", + "nmdc:61e3c875231ae8999b5aa1dbf7d55cca", + "nmdc:3049835ed4e3533acce49e9cc60b03fc", + "nmdc:3266e79813577aae1d4377c62e73332c" + ], + "was_informed_by": "gold:Gp0115672", + "id": "nmdc:50eb8825777d1294abac150521e5c2db", + "execution_resource": "NERSC-Cori", + "name": "ReadBased Analysis Activity for nmdc:mga0cwhj53", + "started_at_time": "2021-10-11T02:28:16Z", + "type": "nmdc:ReadbasedAnalysis", + "ended_at_time": "2021-10-11T05:56:20+00:00" + } + ] + }, + { + "data_object_set": [ + { + "description": "Raw sequencer read data", + "file_size_bytes": 2619328583, + "type": "nmdc:DataObject", + "id": "jgi:574fe0a17ded5e3df1ee148a", + "name": "10533.3.165334.ACCATCC-TGGATGG.fastq.gz" + }, + { + "name": "Gp0127640_Filtered Reads", + "description": "Filtered Reads for Gp0127640", + "data_object_type": "Filtered Sequencing Reads", + "url": "https://data.microbiomedata.org/data/nmdc:mga06rnc11/qa/nmdc_mga06rnc11_filtered.fastq.gz", + "md5_checksum": "534c94e20d292a6bf09c0a42b550b4c2", + "id": "nmdc:534c94e20d292a6bf09c0a42b550b4c2", + "file_size_bytes": 2416846292 + }, + { + "name": "Gp0127640_Filtered Stats", + "description": "Filtered Stats for Gp0127640", + "data_object_type": "QC Statistics", + "url": "https://data.microbiomedata.org/data/nmdc:mga06rnc11/qa/nmdc_mga06rnc11_filterStats.txt", + "md5_checksum": "db5ccad12d6ddb46947fbd815aae7f9a", + "id": "nmdc:db5ccad12d6ddb46947fbd815aae7f9a", + "file_size_bytes": 285 + }, + { + "name": "Gp0127640_Gottcha2 TSV report", + "description": "Gottcha2 TSV report for Gp0127640", + "url": "https://data.microbiomedata.org/data/nmdc:mga06rnc11/ReadbasedAnalysis/nmdc_mga06rnc11_gottcha2_report.tsv", + "md5_checksum": "7e79b2eba131ed6df71a56f47b1b901f", + "id": "nmdc:7e79b2eba131ed6df71a56f47b1b901f", + "file_size_bytes": 3824 + }, + { + "name": "Gp0127640_Gottcha2 full TSV report", + "description": "Gottcha2 full TSV report for Gp0127640", + "url": "https://data.microbiomedata.org/data/nmdc:mga06rnc11/ReadbasedAnalysis/nmdc_mga06rnc11_gottcha2_report_full.tsv", + "md5_checksum": "bc82dcb8151fc20c22be71b6531a1fb2", + "id": "nmdc:bc82dcb8151fc20c22be71b6531a1fb2", + "file_size_bytes": 850491 + }, + { + "name": "Gp0127640_Gottcha2 Krona HTML report", + "description": "Gottcha2 Krona HTML report for Gp0127640", + "data_object_type": "GOTTCHA2 Krona Plot", + "url": "https://data.microbiomedata.org/data/nmdc:mga06rnc11/ReadbasedAnalysis/nmdc_mga06rnc11_gottcha2_krona.html", + "md5_checksum": "d5e45563875efca0653ba2dd47ee3d68", + "id": "nmdc:d5e45563875efca0653ba2dd47ee3d68", + "file_size_bytes": 236151 + }, + { + "name": "Gp0127640_Centrifuge classification TSV report", + "description": "Centrifuge classification TSV report for Gp0127640", + "data_object_type": "Centrifuge Taxonomic Classification", + "url": "https://data.microbiomedata.org/data/nmdc:mga06rnc11/ReadbasedAnalysis/nmdc_mga06rnc11_centrifuge_classification.tsv", + "md5_checksum": "bf5aa70f6ff14da2ef1393124ec29c4d", + "id": "nmdc:bf5aa70f6ff14da2ef1393124ec29c4d", + "file_size_bytes": 2057333090 + }, + { + "name": "Gp0127640_Centrifuge TSV report", + "description": "Centrifuge TSV report for Gp0127640", + "data_object_type": "Centrifuge Classification Report", + "url": "https://data.microbiomedata.org/data/nmdc:mga06rnc11/ReadbasedAnalysis/nmdc_mga06rnc11_centrifuge_report.tsv", + "md5_checksum": "61f1f6d57fd4d445682e25ec34901721", + "id": "nmdc:61f1f6d57fd4d445682e25ec34901721", + "file_size_bytes": 256577 + }, + { + "name": "Gp0127640_Centrifuge Krona HTML report", + "description": "Centrifuge Krona HTML report for Gp0127640", + "data_object_type": "Centrifuge Krona Plot", + "url": "https://data.microbiomedata.org/data/nmdc:mga06rnc11/ReadbasedAnalysis/nmdc_mga06rnc11_centrifuge_krona.html", + "md5_checksum": "7c31728fc2a51c8d202f9f74b1919886", + "id": "nmdc:7c31728fc2a51c8d202f9f74b1919886", + "file_size_bytes": 2334984 + }, + { + "name": "Gp0127640_Kraken classification TSV report", + "description": "Kraken classification TSV report for Gp0127640", + "data_object_type": "Kraken2 Taxonomic Classification", + "url": "https://data.microbiomedata.org/data/nmdc:mga06rnc11/ReadbasedAnalysis/nmdc_mga06rnc11_kraken2_classification.tsv", + "md5_checksum": "f36c2b28e63d21ca4d9e84035450c8e1", + "id": "nmdc:f36c2b28e63d21ca4d9e84035450c8e1", + "file_size_bytes": 1658481192 + }, + { + "name": "Gp0127640_Kraken2 TSV report", + "description": "Kraken2 TSV report for Gp0127640", + "data_object_type": "Kraken2 Classification Report", + "url": "https://data.microbiomedata.org/data/nmdc:mga06rnc11/ReadbasedAnalysis/nmdc_mga06rnc11_kraken2_report.tsv", + "md5_checksum": "e2939606fc9ff1c0046b333e1740f258", + "id": "nmdc:e2939606fc9ff1c0046b333e1740f258", + "file_size_bytes": 653129 + }, + { + "name": "Gp0127640_Kraken2 Krona HTML report", + "description": "Kraken2 Krona HTML report for Gp0127640", + "data_object_type": "Kraken2 Krona Plot", + "url": "https://data.microbiomedata.org/data/nmdc:mga06rnc11/ReadbasedAnalysis/nmdc_mga06rnc11_kraken2_krona.html", + "md5_checksum": "d47144fd7ec0608e7677550d9589c889", + "id": "nmdc:d47144fd7ec0608e7677550d9589c889", + "file_size_bytes": 3977820 + }, + { + "name": "Gp0127640_Assembled contigs fasta", + "description": "Assembled contigs fasta for Gp0127640", + "data_object_type": "Assembly Contigs", + "url": "https://data.microbiomedata.org/data/nmdc:mga06rnc11/assembly/nmdc_mga06rnc11_contigs.fna", + "md5_checksum": "b85a322271c7f93ef295141d12cb2dbc", + "id": "nmdc:b85a322271c7f93ef295141d12cb2dbc", + "file_size_bytes": 44243651 + }, + { + "name": "Gp0127640_Assembled scaffold fasta", + "description": "Assembled scaffold fasta for Gp0127640", + "data_object_type": "Assembly Scaffolds", + "url": "https://data.microbiomedata.org/data/nmdc:mga06rnc11/assembly/nmdc_mga06rnc11_scaffolds.fna", + "md5_checksum": "794445b3fedfaec8af9b70b167bc6852", + "id": "nmdc:794445b3fedfaec8af9b70b167bc6852", + "file_size_bytes": 43923338 + }, + { + "name": "Gp0127640_Metagenome Contig Coverage Stats", + "description": "Metagenome Contig Coverage Stats for Gp0127640", + "url": "https://data.microbiomedata.org/data/nmdc:mga06rnc11/assembly/nmdc_mga06rnc11_covstats.txt", + "md5_checksum": "d389ae4f8a92c21423fc77aa054ba985", + "id": "nmdc:d389ae4f8a92c21423fc77aa054ba985", + "file_size_bytes": 8365383 + }, + { + "name": "Gp0127640_Assembled AGP file", + "description": "Assembled AGP file for Gp0127640", + "data_object_type": "Assembly AGP", + "url": "https://data.microbiomedata.org/data/nmdc:mga06rnc11/assembly/nmdc_mga06rnc11_assembly.agp", + "md5_checksum": "765541c2865f6047d5e2e8e7299908e4", + "id": "nmdc:765541c2865f6047d5e2e8e7299908e4", + "file_size_bytes": 7782777 + }, + { + "name": "Gp0127640_Metagenome Alignment BAM file", + "description": "Metagenome Alignment BAM file for Gp0127640", + "data_object_type": "Assembly Coverage BAM", + "url": "https://data.microbiomedata.org/data/nmdc:mga06rnc11/assembly/nmdc_mga06rnc11_pairedMapped_sorted.bam", + "md5_checksum": "78b554dd52492c3d1e401d0c9198b89b", + "id": "nmdc:78b554dd52492c3d1e401d0c9198b89b", + "file_size_bytes": 2578128724 + }, + { + "name": "Gp0127640_Protein FAA", + "description": "Protein FAA for Gp0127640", + "data_object_type": "Annotation Amino Acid FASTA", + "url": "https://data.microbiomedata.org/data/nmdc:mga06rnc11/annotation/nmdc_mga06rnc11_proteins.faa", + "md5_checksum": "13e64b02d230f76008e42256a48d1cec", + "id": "nmdc:13e64b02d230f76008e42256a48d1cec", + "file_size_bytes": 26637626 + }, + { + "name": "Gp0127640_Structural annotation GFF file", + "description": "Structural annotation GFF file for Gp0127640", + "data_object_type": "Structural Annotation GFF", + "url": "https://data.microbiomedata.org/data/nmdc:mga06rnc11/annotation/nmdc_mga06rnc11_structural_annotation.gff", + "md5_checksum": "7babb0c9f662679659b7b1bee469f073", + "id": "nmdc:7babb0c9f662679659b7b1bee469f073", + "file_size_bytes": 2515 + }, + { + "name": "Gp0127640_Functional annotation GFF file", + "description": "Functional annotation GFF file for Gp0127640", + "data_object_type": "Functional Annotation GFF", + "url": "https://data.microbiomedata.org/data/nmdc:mga06rnc11/annotation/nmdc_mga06rnc11_functional_annotation.gff", + "md5_checksum": "e84b1e43d546c9793c3a4d9eaa8cee86", + "id": "nmdc:e84b1e43d546c9793c3a4d9eaa8cee86", + "file_size_bytes": 32184781 + }, + { + "name": "Gp0127640_KO TSV file", + "description": "KO TSV file for Gp0127640", + "data_object_type": "Annotation KEGG Orthology", + "url": "https://data.microbiomedata.org/data/nmdc:mga06rnc11/annotation/nmdc_mga06rnc11_ko.tsv", + "md5_checksum": "2e3e5b7ffa39e533db8ed1d925426f50", + "id": "nmdc:2e3e5b7ffa39e533db8ed1d925426f50", + "file_size_bytes": 3620933 + }, + { + "name": "Gp0127640_EC TSV file", + "description": "EC TSV file for Gp0127640", + "data_object_type": "Annotation Enzyme Commission", + "url": "https://data.microbiomedata.org/data/nmdc:mga06rnc11/annotation/nmdc_mga06rnc11_ec.tsv", + "md5_checksum": "62e46d35a6aff3a52b39c6bb04dc6161", + "id": "nmdc:62e46d35a6aff3a52b39c6bb04dc6161", + "file_size_bytes": 2390086 + }, + { + "name": "Gp0127640_COG GFF file", + "description": "COG GFF file for Gp0127640", + "url": "https://data.microbiomedata.org/data/nmdc:mga06rnc11/annotation/nmdc_mga06rnc11_cog.gff", + "md5_checksum": "93fa7de9c74cfcff99bb74e27fa94674", + "id": "nmdc:93fa7de9c74cfcff99bb74e27fa94674", + "file_size_bytes": 17898567 + }, + { + "name": "Gp0127640_PFAM GFF file", + "description": "PFAM GFF file for Gp0127640", + "url": "https://data.microbiomedata.org/data/nmdc:mga06rnc11/annotation/nmdc_mga06rnc11_pfam.gff", + "md5_checksum": "63bad86a6d7fb23b5a4683ae36820622", + "id": "nmdc:63bad86a6d7fb23b5a4683ae36820622", + "file_size_bytes": 12585366 + }, + { + "name": "Gp0127640_TigrFam GFF file", + "description": "TigrFam GFF file for Gp0127640", + "url": "https://data.microbiomedata.org/data/nmdc:mga06rnc11/annotation/nmdc_mga06rnc11_tigrfam.gff", + "md5_checksum": "d6b80bb748b4d6fbe52c15300ad2137b", + "id": "nmdc:d6b80bb748b4d6fbe52c15300ad2137b", + "file_size_bytes": 1170952 + }, + { + "name": "Gp0127640_SMART GFF file", + "description": "SMART GFF file for Gp0127640", + "url": "https://data.microbiomedata.org/data/nmdc:mga06rnc11/annotation/nmdc_mga06rnc11_smart.gff", + "md5_checksum": "46722961c280df725d15489e82502031", + "id": "nmdc:46722961c280df725d15489e82502031", + "file_size_bytes": 3891425 + }, + { + "name": "Gp0127640_SuperFam GFF file", + "description": "SuperFam GFF file for Gp0127640", + "url": "https://data.microbiomedata.org/data/nmdc:mga06rnc11/annotation/nmdc_mga06rnc11_supfam.gff", + "md5_checksum": "6f1a0029cb25f1433de1d7c241bc7553", + "id": "nmdc:6f1a0029cb25f1433de1d7c241bc7553", + "file_size_bytes": 22543435 + }, + { + "name": "Gp0127640_Cath FunFam GFF file", + "description": "Cath FunFam GFF file for Gp0127640", + "url": "https://data.microbiomedata.org/data/nmdc:mga06rnc11/annotation/nmdc_mga06rnc11_cath_funfam.gff", + "md5_checksum": "6d2839963f616d810e66435b3bbe018a", + "id": "nmdc:6d2839963f616d810e66435b3bbe018a", + "file_size_bytes": 16572925 + }, + { + "name": "Gp0127640_KO_EC GFF file", + "description": "KO_EC GFF file for Gp0127640", + "url": "https://data.microbiomedata.org/data/nmdc:mga06rnc11/annotation/nmdc_mga06rnc11_ko_ec.gff", + "md5_checksum": "efbf36ca49c40ad0367ecd23c012b29b", + "id": "nmdc:efbf36ca49c40ad0367ecd23c012b29b", + "file_size_bytes": 11571776 + }, + { + "name": "gold:Gp0452679_metabat2 bin checkm quality assessment result", + "description": "metabat2 bin checkm quality assessment result for gold:Gp0452679", + "data_object_type": "CheckM Statistics", + "url": "https://data.microbiomedata.org/data/nmdc:mga0fsjy84/MAGs/nmdc_mga0fsjy84_checkm_qa.out", + "md5_checksum": "d41d8cd98f00b204e9800998ecf8427e", + "id": "nmdc:d41d8cd98f00b204e9800998ecf8427e", + "file_size_bytes": 0 + }, + { + "name": "Gp0127640_tooShort (< 3kb) filtered contigs fasta file by metaBat2", + "description": "tooShort (< 3kb) filtered contigs fasta file by metaBat2 for Gp0127640", + "url": "https://data.microbiomedata.org/data/nmdc:mga06rnc11/MAGs/nmdc_mga06rnc11_bins.tooShort.fa", + "md5_checksum": "ce395376d0bc7121e4dc5efc774d5e74", + "id": "nmdc:ce395376d0bc7121e4dc5efc774d5e74", + "file_size_bytes": 40358420 + }, + { + "name": "Gp0127640_unbinned fasta file from metabat2", + "description": "unbinned fasta file from metabat2 for Gp0127640", + "url": "https://data.microbiomedata.org/data/nmdc:mga06rnc11/MAGs/nmdc_mga06rnc11_bins.unbinned.fa", + "md5_checksum": "a16cbb06b91ebfb45f5a010effc1cfde", + "id": "nmdc:a16cbb06b91ebfb45f5a010effc1cfde", + "file_size_bytes": 2755747 + }, + { + "name": "Gp0127640_metabat2 bin checkm quality assessment result", + "description": "metabat2 bin checkm quality assessment result for Gp0127640", + "data_object_type": "CheckM Statistics", + "url": "https://data.microbiomedata.org/data/nmdc:mga06rnc11/MAGs/nmdc_mga06rnc11_checkm_qa.out", + "md5_checksum": "97ae130ca2f75c66b8cbd60c4d35463a", + "id": "nmdc:97ae130ca2f75c66b8cbd60c4d35463a", + "file_size_bytes": 760 + }, + { + "name": "Gp0127640_high-quality and medium-quality bins", + "description": "high-quality and medium-quality bins for Gp0127640", + "data_object_type": "Metagenome Bins", + "url": "https://data.microbiomedata.org/data/nmdc:mga06rnc11/MAGs/nmdc_mga06rnc11_hqmq_bin.zip", + "md5_checksum": "5945311235c6195ad409ab30e2b72c0c", + "id": "nmdc:5945311235c6195ad409ab30e2b72c0c", + "file_size_bytes": 182 + }, + { + "name": "Gp0127640_metabat2 bins", + "description": "metabat2 bins for Gp0127640", + "url": "https://data.microbiomedata.org/data/nmdc:mga06rnc11/MAGs/nmdc_mga06rnc11_metabat_bin.zip", + "md5_checksum": "d1cf2992bd60e25032eedeb09858d14b", + "id": "nmdc:d1cf2992bd60e25032eedeb09858d14b", + "file_size_bytes": 345388 + }, + { + "_id": { + "$oid": "649b003d1ae706d7b5b14e92" + }, + "description": "Assembled scaffold fasta for gold:Gp0127640", + "url": "https://data.microbiomedata.org/data/1781_100342/assembly/assembly_scaffolds.fna", + "file_size_bytes": 43496758, + "type": "nmdc:DataObject", + "id": "nmdc:aa1bb1c144d1bca4e8aeeb2c9d640d75", + "name": "assembly_scaffolds.fna", + "data_object_type": "Assembly Scaffolds" + }, + { + "_id": { + "$oid": "649b003d1ae706d7b5b14e93" + }, + "description": "Assembled AGP file for gold:Gp0127640", + "url": "https://data.microbiomedata.org/data/1781_100342/assembly/assembly.agp", + "file_size_bytes": 6929297, + "type": "nmdc:DataObject", + "id": "nmdc:c4688faca5539c65da5223b1468045be", + "name": "assembly.agp", + "data_object_type": "Assembly AGP" + }, + { + "_id": { + "$oid": "649b003d1ae706d7b5b14e94" + }, + "description": "Metagenome Contig Coverage Stats for gold:Gp0127640", + "url": "https://data.microbiomedata.org/data/1781_100342/assembly/mapping_stats.txt", + "file_size_bytes": 7938723, + "type": "nmdc:DataObject", + "id": "nmdc:82ea1e2021fb7f53d998452af137427c", + "name": "mapping_stats.txt", + "data_object_type": "Assembly Coverage Stats" + }, + { + "_id": { + "$oid": "649b003d1ae706d7b5b14e96" + }, + "description": "Assembled contigs fasta for gold:Gp0127640", + "url": "https://data.microbiomedata.org/data/1781_100342/assembly/assembly_contigs.fna", + "file_size_bytes": 43816991, + "type": "nmdc:DataObject", + "id": "nmdc:e2d5ce50f49731a49740d9f61f630550", + "name": "assembly_contigs.fna", + "data_object_type": "Assembly Contigs" + }, + { + "_id": { + "$oid": "649b003d1ae706d7b5b14e98" + }, + "description": "Metagenome Alignment BAM file for gold:Gp0127640", + "url": "https://data.microbiomedata.org/data/1781_100342/assembly/pairedMapped_sorted.bam", + "file_size_bytes": 2545520278, + "type": "nmdc:DataObject", + "id": "nmdc:68a7046814acf2ffe580fa8ce70e8a06", + "name": "pairedMapped_sorted.bam", + "data_object_type": "Assembly Coverage BAM" + }, + { + "_id": { + "$oid": "649b003d1ae706d7b5b15b7b" + }, + "id": "nmdc:252bb7818bcf5f8a50bf88d1fd0a297c", + "name": "1781_100342.krona.html", + "description": "Gold:Gp0127640 KRONA plot HTML file", + "url": "https://data.microbiomedata.org/1781_100342/ReadbasedAnalysis/centrifuge/1781_100342.krona.html", + "file_size_bytes": 3385, + "data_object_type": "Centrifuge Krona Plot", + "type": "nmdc:DataObject" + }, + { + "_id": { + "$oid": "649b003d1ae706d7b5b15b82" + }, + "id": "nmdc:0bf64f8fcce67bacdf9e484f8ea2268e", + "name": "1781_100342.json", + "description": "Gold:Gp0127640 ReadbasedAnalysis result JSON file", + "url": "https://data.microbiomedata.org/1781_100342/ReadbasedAnalysis/1781_100342.json", + "file_size_bytes": 3385, + "type": "nmdc:DataObject" + }, + { + "_id": { + "$oid": "649b003f1ae706d7b5b1660c" + }, + "id": "nmdc:e9110de20a054251e14eddda17e204a6", + "name": "bins.tooShort.fa", + "description": "tooShort (< 3kb) filtered contigs fasta file by metaBat2 for gold:Gp0127640", + "file_size_bytes": 39101595, + "url": "https://data.microbiomedata.org/data/1781_100342/img_MAGs/bins.tooShort.fa", + "type": "nmdc:DataObject" + }, + { + "_id": { + "$oid": "649b003f1ae706d7b5b1660d" + }, + "id": "nmdc:21e6cb23babaec38d6e8d431893c23a3", + "name": "bins.unbinned.fa", + "description": "unbinned fasta file from metabat2 for gold:Gp0127640", + "file_size_bytes": 3314124, + "url": "https://data.microbiomedata.org/data/1781_100342/img_MAGs/bins.unbinned.fa", + "type": "nmdc:DataObject" + }, + { + "_id": { + "$oid": "649b003f1ae706d7b5b1660e" + }, + "id": "nmdc:363ee3fe300a57198050ef502d613d92", + "name": "checkm_qa.out", + "description": "metabat2 bin checkm quality assessment result for gold:Gp0127640", + "file_size_bytes": 918, + "url": "https://data.microbiomedata.org/data/1781_100342/img_MAGs/checkm_qa.out", + "type": "nmdc:DataObject", + "data_object_type": "CheckM Statistics" + }, + { + "_id": { + "$oid": "649b003f1ae706d7b5b1660f" + }, + "id": "nmdc:59a345dc3bc08ee0f1837d41a276654f", + "name": "gold:Gp0127640.bins.2.fa", + "description": "metabat2 binned contig file for gold:Gp0127640", + "file_size_bytes": 275145, + "url": "https://data.microbiomedata.org/data/1781_100342/img_MAGs/metabat-bins/bins.2.fa", + "type": "nmdc:DataObject", + "data_object_type": "Metagenome Bins" + }, + { + "_id": { + "$oid": "649b003f1ae706d7b5b16626" + }, + "id": "nmdc:5356fae3a74ea20c0344e57c8ef11166", + "name": "gold:Gp0127640.bins.1.fa", + "description": "metabat2 binned contig file for gold:Gp0127640", + "file_size_bytes": 277293, + "url": "https://data.microbiomedata.org/data/1781_100342/img_MAGs/metabat-bins/bins.1.fa", + "type": "nmdc:DataObject", + "data_object_type": "Metagenome Bins" + }, + { + "_id": { + "$oid": "649b00401ae706d7b5b16d11" + }, + "description": "EC TSV File for gold:Gp0127640", + "url": "https://data.microbiomedata.org/1781_100342/img_annotation/Ga0482230_ec.tsv", + "md5_checksum": "e90b16891cff9bd5b0034cc6c89f8080", + "file_size_bytes": 3385, + "id": "nmdc:e90b16891cff9bd5b0034cc6c89f8080", + "name": "gold:Gp0127640_EC TSV File", + "data_object_type": "Annotation Enzyme Commission", + "type": "nmdc:DataObject" + }, + { + "_id": { + "$oid": "649b00401ae706d7b5b16d14" + }, + "description": "Functional annotation GFF file for gold:Gp0127640", + "url": "https://data.microbiomedata.org/1781_100342/img_annotation/Ga0482230_functional_annotation.gff", + "md5_checksum": "86b6734c5eb64c0cae6e95fa7f062123", + "file_size_bytes": 3385, + "id": "nmdc:86b6734c5eb64c0cae6e95fa7f062123", + "name": "gold:Gp0127640_Functional annotation GFF file", + "data_object_type": "Functional Annotation GFF", + "type": "nmdc:DataObject" + }, + { + "_id": { + "$oid": "649b00401ae706d7b5b16d15" + }, + "description": "KO TSV File for gold:Gp0127640", + "url": "https://data.microbiomedata.org/1781_100342/img_annotation/Ga0482230_ko.tsv", + "md5_checksum": "4950dc66d2b5a3c325454fb106d6b726", + "file_size_bytes": 3385, + "id": "nmdc:4950dc66d2b5a3c325454fb106d6b726", + "name": "gold:Gp0127640_KO TSV File", + "data_object_type": "Annotation KEGG Orthology", + "type": "nmdc:DataObject" + }, + { + "_id": { + "$oid": "649b00401ae706d7b5b16d1c" + }, + "description": "Protein FAA for gold:Gp0127640", + "url": "https://data.microbiomedata.org/1781_100342/img_annotation/Ga0482230_proteins.faa", + "md5_checksum": "1fbb7302a6ad581085d561e9fd3ed802", + "file_size_bytes": 3385, + "id": "nmdc:1fbb7302a6ad581085d561e9fd3ed802", + "name": "gold:Gp0127640_Protein FAA", + "data_object_type": "Annotation Amino Acid FASTA", + "type": "nmdc:DataObject" + }, + { + "_id": { + "$oid": "649b00401ae706d7b5b16d22" + }, + "description": "Structural annotation GFF file for gold:Gp0127640", + "url": "https://data.microbiomedata.org/1781_100342/img_annotation/Ga0482230_structural_annotation.gff", + "md5_checksum": "812cf8b77747ff65cfd237158535d310", + "file_size_bytes": 3385, + "id": "nmdc:812cf8b77747ff65cfd237158535d310", + "name": "gold:Gp0127640_Structural annotation GFF file", + "data_object_type": "Structural Annotation GFF", + "type": "nmdc:DataObject" + } + ], + "mags_activity_set": [ + { + "_id": { + "$oid": "649b0052ec087f6bbab3471a" + }, + "has_input": [ + "nmdc:b85a322271c7f93ef295141d12cb2dbc", + "nmdc:78b554dd52492c3d1e401d0c9198b89b", + "nmdc:e84b1e43d546c9793c3a4d9eaa8cee86" + ], + "too_short_contig_num": 104867, + "part_of": [ + "nmdc:mga06rnc11" + ], + "binned_contig_num": 213, + "git_url": "https://github.com/microbiomedata/metaG/releases/tag/0.1", + "has_output": [ + "nmdc:d41d8cd98f00b204e9800998ecf8427e", + "nmdc:ce395376d0bc7121e4dc5efc774d5e74", + "nmdc:a16cbb06b91ebfb45f5a010effc1cfde", + "nmdc:97ae130ca2f75c66b8cbd60c4d35463a", + "nmdc:5945311235c6195ad409ab30e2b72c0c", + "nmdc:d1cf2992bd60e25032eedeb09858d14b" + ], + "was_informed_by": "gold:Gp0127640", + "input_contig_num": 106665, + "id": "nmdc:414c4647eddd8081308d92da2d59815e", + "execution_resource": "NERSC-Cori", + "name": "MAGs Analysis Activity for nmdc:mga06rnc11", + "mags_list": [ + { + "number_of_contig": 213, + "completeness": 48.94, + "bin_name": "bins.1", + "gene_count": 1422, + "bin_quality": "LQ", + "gtdbtk_species": "", + "gtdbtk_order": "", + "num_16s": 0, + "gtdbtk_family": "", + "gtdbtk_domain": "", + "contamination": 0.97, + "gtdbtk_class": "", + "gtdbtk_phylum": "", + "num_5s": 0, + "num_23s": 0, + "gtdbtk_genus": "", + "num_t_rna": 30 + } + ], + "unbinned_contig_num": 1585, + "started_at_time": "2021-10-11T02:24:27Z", + "type": "nmdc:MAGsAnalysisActivity", + "ended_at_time": "2021-10-11T04:33:17+00:00" + } + ], + "metagenome_annotation_activity_set": [ + { + "_id": { + "$oid": "649b005bbf2caae0415ef9bb" + }, + "has_input": [ + "nmdc:b85a322271c7f93ef295141d12cb2dbc" + ], + "part_of": [ + "nmdc:mga06rnc11" + ], + "git_url": "https://github.com/microbiomedata/metaG/releases/tag/0.1", + "has_output": [ + "nmdc:13e64b02d230f76008e42256a48d1cec", + "nmdc:7babb0c9f662679659b7b1bee469f073", + "nmdc:e84b1e43d546c9793c3a4d9eaa8cee86", + "nmdc:2e3e5b7ffa39e533db8ed1d925426f50", + "nmdc:62e46d35a6aff3a52b39c6bb04dc6161", + "nmdc:93fa7de9c74cfcff99bb74e27fa94674", + "nmdc:63bad86a6d7fb23b5a4683ae36820622", + "nmdc:d6b80bb748b4d6fbe52c15300ad2137b", + "nmdc:46722961c280df725d15489e82502031", + "nmdc:6f1a0029cb25f1433de1d7c241bc7553", + "nmdc:6d2839963f616d810e66435b3bbe018a", + "nmdc:efbf36ca49c40ad0367ecd23c012b29b" + ], + "was_informed_by": "gold:Gp0127640", + "id": "nmdc:414c4647eddd8081308d92da2d59815e", + "execution_resource": "NERSC-Cori", + "name": "Annotation Activity for nmdc:mga06rnc11", + "started_at_time": "2021-10-11T02:24:27Z", + "type": "nmdc:MetagenomeAnnotationActivity", + "ended_at_time": "2021-10-11T04:33:17+00:00" + } + ], + "metagenome_assembly_set": [ + { + "_id": { + "$oid": "649b005f2ca5ee4adb139fa6" + }, + "has_input": [ + "nmdc:534c94e20d292a6bf09c0a42b550b4c2" + ], + "part_of": [ + "nmdc:mga06rnc11" + ], + "ctg_logsum": 42879, + "scaf_logsum": 42987, + "gap_pct": 0.0005, + "git_url": "https://github.com/microbiomedata/metaG/releases/tag/0.1", + "has_output": [ + "nmdc:b85a322271c7f93ef295141d12cb2dbc", + "nmdc:794445b3fedfaec8af9b70b167bc6852", + "nmdc:d389ae4f8a92c21423fc77aa054ba985", + "nmdc:765541c2865f6047d5e2e8e7299908e4", + "nmdc:78b554dd52492c3d1e401d0c9198b89b" + ], + "asm_score": 5.471, + "was_informed_by": "gold:Gp0127640", + "ctg_powsum": 4901.253, + "scaf_max": 27880, + "id": "nmdc:414c4647eddd8081308d92da2d59815e", + "scaf_powsum": 4913.296, + "execution_resource": "NERSC-Cori", + "contigs": 106665, + "name": "Assembly Activity for nmdc:mga06rnc11", + "ctg_max": 27880, + "gc_std": 0.10189, + "contig_bp": 40331509, + "gc_avg": 0.58648, + "started_at_time": "2021-10-11T02:24:27Z", + "scaf_bp": 40331709, + "type": "nmdc:MetagenomeAssembly", + "scaffolds": 106645, + "ended_at_time": "2021-10-11T04:33:17+00:00", + "ctg_l50": 336, + "ctg_l90": 282, + "ctg_n50": 38543, + "ctg_n90": 94525, + "scaf_l50": 336, + "scaf_l90": 282, + "scaf_n50": 38534, + "scaf_n90": 94506 + } + ], + "omics_processing_set": [ + { + "_id": { + "$oid": "649b009773e8249959349b54" + }, + "id": "nmdc:omprc-11-932jcd76", + "name": "Riverbed sediment microbial communities from areas with no vegetation in Columbia River, Washington, USA - GW-RW N3_50_60", + "description": "Riverbed sediment samples were collected from an area with no vegetation in a hyporheic zone (subsurface groundwater - surface water mixing zone)", + "has_input": [ + "nmdc:bsm-11-pvcgp635" + ], + "has_output": [ + "jgi:574fe0a17ded5e3df1ee148a" + ], + "part_of": [ + "nmdc:sty-11-aygzgv51" + ], + "add_date": "2016-01-11", + "mod_date": "2021-06-15", + "ncbi_project_name": "Riverbed sediment microbial communities from areas with no vegetation in Columbia River, Washington, USA - GW-RW N3_50_60", + "omics_type": { + "has_raw_value": "Metagenome" + }, + "principal_investigator": { + "has_raw_value": "James Stegen" + }, + "processing_institution": "JGI", + "type": "nmdc:OmicsProcessing", + "gold_sequencing_project_identifiers": [ + "gold:Gp0127640" + ] + } + ], + "read_qc_analysis_activity_set": [ + { + "_id": { + "$oid": "649b009d6bdd4fd20273c875" + }, + "has_input": [ + "nmdc:0094fcbe3a051a8000b8823c8db540f8" + ], + "part_of": [ + "nmdc:mga06rnc11" + ], + "git_url": "https://github.com/microbiomedata/metaG/releases/tag/0.1", + "has_output": [ + "nmdc:534c94e20d292a6bf09c0a42b550b4c2", + "nmdc:db5ccad12d6ddb46947fbd815aae7f9a" + ], + "was_informed_by": "gold:Gp0127640", + "input_read_count": 28754670, + "output_read_bases": 4186416440, + "id": "nmdc:414c4647eddd8081308d92da2d59815e", + "execution_resource": "NERSC-Cori", + "input_read_bases": 4341955170, + "name": "Read QC Activity for nmdc:mga06rnc11", + "output_read_count": 27981268, + "started_at_time": "2021-10-11T02:24:27Z", + "type": "nmdc:ReadQCAnalysisActivity", + "ended_at_time": "2021-10-11T04:33:17+00:00" + } + ], + "read_based_taxonomy_analysis_activity_set": [ + { + "_id": { + "$oid": "649b009bff710ae353f8cf3a" + }, + "has_input": [ + "nmdc:534c94e20d292a6bf09c0a42b550b4c2" + ], + "git_url": "https://github.com/microbiomedata/metaG/releases/tag/0.1", + "has_output": [ + "nmdc:7e79b2eba131ed6df71a56f47b1b901f", + "nmdc:bc82dcb8151fc20c22be71b6531a1fb2", + "nmdc:d5e45563875efca0653ba2dd47ee3d68", + "nmdc:bf5aa70f6ff14da2ef1393124ec29c4d", + "nmdc:61f1f6d57fd4d445682e25ec34901721", + "nmdc:7c31728fc2a51c8d202f9f74b1919886", + "nmdc:f36c2b28e63d21ca4d9e84035450c8e1", + "nmdc:e2939606fc9ff1c0046b333e1740f258", + "nmdc:d47144fd7ec0608e7677550d9589c889" + ], + "was_informed_by": "gold:Gp0127640", + "id": "nmdc:414c4647eddd8081308d92da2d59815e", + "execution_resource": "NERSC-Cori", + "name": "ReadBased Analysis Activity for nmdc:mga06rnc11", + "started_at_time": "2021-10-11T02:24:27Z", + "type": "nmdc:ReadbasedAnalysis", + "ended_at_time": "2021-10-11T04:33:17+00:00" + } + ] + }, + { + "data_object_set": [ + { + "description": "Raw sequencer read data", + "file_size_bytes": 2061929348, + "type": "nmdc:DataObject", + "id": "jgi:574fde697ded5e3df1ee140a", + "name": "10533.1.165310.GCTACGT-AACGTAG.fastq.gz" + }, + { + "name": "Gp0127641_Filtered Reads", + "description": "Filtered Reads for Gp0127641", + "data_object_type": "Filtered Sequencing Reads", + "url": "https://data.microbiomedata.org/data/nmdc:mga0822t33/qa/nmdc_mga0822t33_filtered.fastq.gz", + "md5_checksum": "a2700afe93abad6f004a3701348622a2", + "id": "nmdc:a2700afe93abad6f004a3701348622a2", + "file_size_bytes": 1787020792 + }, + { + "name": "Gp0127641_Filtered Stats", + "description": "Filtered Stats for Gp0127641", + "data_object_type": "QC Statistics", + "url": "https://data.microbiomedata.org/data/nmdc:mga0822t33/qa/nmdc_mga0822t33_filterStats.txt", + "md5_checksum": "aaa9a8a3d8e147116953394a8755742d", + "id": "nmdc:aaa9a8a3d8e147116953394a8755742d", + "file_size_bytes": 289 + }, + { + "name": "Gp0127641_Gottcha2 TSV report", + "description": "Gottcha2 TSV report for Gp0127641", + "url": "https://data.microbiomedata.org/data/nmdc:mga0822t33/ReadbasedAnalysis/nmdc_mga0822t33_gottcha2_report.tsv", + "md5_checksum": "0d021c80bfd39c8293a8b355b8ff3605", + "id": "nmdc:0d021c80bfd39c8293a8b355b8ff3605", + "file_size_bytes": 3331 + }, + { + "name": "Gp0127641_Gottcha2 full TSV report", + "description": "Gottcha2 full TSV report for Gp0127641", + "url": "https://data.microbiomedata.org/data/nmdc:mga0822t33/ReadbasedAnalysis/nmdc_mga0822t33_gottcha2_report_full.tsv", + "md5_checksum": "a42312841b816448d8bd5d3adfa65f58", + "id": "nmdc:a42312841b816448d8bd5d3adfa65f58", + "file_size_bytes": 761359 + }, + { + "name": "Gp0127641_Gottcha2 Krona HTML report", + "description": "Gottcha2 Krona HTML report for Gp0127641", + "data_object_type": "GOTTCHA2 Krona Plot", + "url": "https://data.microbiomedata.org/data/nmdc:mga0822t33/ReadbasedAnalysis/nmdc_mga0822t33_gottcha2_krona.html", + "md5_checksum": "f473f4a99336a49105d2722888ae0510", + "id": "nmdc:f473f4a99336a49105d2722888ae0510", + "file_size_bytes": 236161 + }, + { + "name": "Gp0127641_Centrifuge classification TSV report", + "description": "Centrifuge classification TSV report for Gp0127641", + "data_object_type": "Centrifuge Taxonomic Classification", + "url": "https://data.microbiomedata.org/data/nmdc:mga0822t33/ReadbasedAnalysis/nmdc_mga0822t33_centrifuge_classification.tsv", + "md5_checksum": "ae51ea50660f44fa3b317a45f3015556", + "id": "nmdc:ae51ea50660f44fa3b317a45f3015556", + "file_size_bytes": 1635953327 + }, + { + "name": "Gp0127641_Centrifuge TSV report", + "description": "Centrifuge TSV report for Gp0127641", + "data_object_type": "Centrifuge Classification Report", + "url": "https://data.microbiomedata.org/data/nmdc:mga0822t33/ReadbasedAnalysis/nmdc_mga0822t33_centrifuge_report.tsv", + "md5_checksum": "ef39b44a90c8525e93f45e500b3ae934", + "id": "nmdc:ef39b44a90c8525e93f45e500b3ae934", + "file_size_bytes": 255166 + }, + { + "name": "Gp0127641_Centrifuge Krona HTML report", + "description": "Centrifuge Krona HTML report for Gp0127641", + "data_object_type": "Centrifuge Krona Plot", + "url": "https://data.microbiomedata.org/data/nmdc:mga0822t33/ReadbasedAnalysis/nmdc_mga0822t33_centrifuge_krona.html", + "md5_checksum": "e2653a4ce3f34c235ad7b01e87dd1016", + "id": "nmdc:e2653a4ce3f34c235ad7b01e87dd1016", + "file_size_bytes": 2332521 + }, + { + "name": "Gp0127641_Kraken classification TSV report", + "description": "Kraken classification TSV report for Gp0127641", + "data_object_type": "Kraken2 Taxonomic Classification", + "url": "https://data.microbiomedata.org/data/nmdc:mga0822t33/ReadbasedAnalysis/nmdc_mga0822t33_kraken2_classification.tsv", + "md5_checksum": "869730c4d81163e0c238dd4ae27ebd9e", + "id": "nmdc:869730c4d81163e0c238dd4ae27ebd9e", + "file_size_bytes": 1307934195 + }, + { + "name": "Gp0127641_Kraken2 TSV report", + "description": "Kraken2 TSV report for Gp0127641", + "data_object_type": "Kraken2 Classification Report", + "url": "https://data.microbiomedata.org/data/nmdc:mga0822t33/ReadbasedAnalysis/nmdc_mga0822t33_kraken2_report.tsv", + "md5_checksum": "dc193d1a1693589003f992c820606bab", + "id": "nmdc:dc193d1a1693589003f992c820606bab", + "file_size_bytes": 635050 + }, + { + "name": "Gp0127641_Kraken2 Krona HTML report", + "description": "Kraken2 Krona HTML report for Gp0127641", + "data_object_type": "Kraken2 Krona Plot", + "url": "https://data.microbiomedata.org/data/nmdc:mga0822t33/ReadbasedAnalysis/nmdc_mga0822t33_kraken2_krona.html", + "md5_checksum": "2f36b41c419efa1b1dfb6a9576b965ee", + "id": "nmdc:2f36b41c419efa1b1dfb6a9576b965ee", + "file_size_bytes": 3964515 + }, + { + "name": "Gp0127641_Assembled contigs fasta", + "description": "Assembled contigs fasta for Gp0127641", + "data_object_type": "Assembly Contigs", + "url": "https://data.microbiomedata.org/data/nmdc:mga0822t33/assembly/nmdc_mga0822t33_contigs.fna", + "md5_checksum": "18f0d53f503c855c0093677df58366e0", + "id": "nmdc:18f0d53f503c855c0093677df58366e0", + "file_size_bytes": 102384540 + }, + { + "name": "Gp0127641_Assembled scaffold fasta", + "description": "Assembled scaffold fasta for Gp0127641", + "data_object_type": "Assembly Scaffolds", + "url": "https://data.microbiomedata.org/data/nmdc:mga0822t33/assembly/nmdc_mga0822t33_scaffolds.fna", + "md5_checksum": "2fe3e02d47d8e1d66ccb15c0e42bf1e0", + "id": "nmdc:2fe3e02d47d8e1d66ccb15c0e42bf1e0", + "file_size_bytes": 101806869 + }, + { + "name": "Gp0127641_Metagenome Contig Coverage Stats", + "description": "Metagenome Contig Coverage Stats for Gp0127641", + "url": "https://data.microbiomedata.org/data/nmdc:mga0822t33/assembly/nmdc_mga0822t33_covstats.txt", + "md5_checksum": "04ad2128f72c26a4fa2d0ee7b1709ee9", + "id": "nmdc:04ad2128f72c26a4fa2d0ee7b1709ee9", + "file_size_bytes": 15204446 + }, + { + "name": "Gp0127641_Assembled AGP file", + "description": "Assembled AGP file for Gp0127641", + "data_object_type": "Assembly AGP", + "url": "https://data.microbiomedata.org/data/nmdc:mga0822t33/assembly/nmdc_mga0822t33_assembly.agp", + "md5_checksum": "b89858508c524a03011cd5191f7589fa", + "id": "nmdc:b89858508c524a03011cd5191f7589fa", + "file_size_bytes": 14206204 + }, + { + "name": "Gp0127641_Metagenome Alignment BAM file", + "description": "Metagenome Alignment BAM file for Gp0127641", + "data_object_type": "Assembly Coverage BAM", + "url": "https://data.microbiomedata.org/data/nmdc:mga0822t33/assembly/nmdc_mga0822t33_pairedMapped_sorted.bam", + "md5_checksum": "6974d394df454501e0515b31a2415367", + "id": "nmdc:6974d394df454501e0515b31a2415367", + "file_size_bytes": 1967753614 + }, + { + "name": "Gp0127641_Protein FAA", + "description": "Protein FAA for Gp0127641", + "data_object_type": "Annotation Amino Acid FASTA", + "url": "https://data.microbiomedata.org/data/nmdc:mga0822t33/annotation/nmdc_mga0822t33_proteins.faa", + "md5_checksum": "f33a2a1789f5e913c3ef0dd0440a4877", + "id": "nmdc:f33a2a1789f5e913c3ef0dd0440a4877", + "file_size_bytes": 57768168 + }, + { + "name": "Gp0127641_Structural annotation GFF file", + "description": "Structural annotation GFF file for Gp0127641", + "data_object_type": "Structural Annotation GFF", + "url": "https://data.microbiomedata.org/data/nmdc:mga0822t33/annotation/nmdc_mga0822t33_structural_annotation.gff", + "md5_checksum": "9aba4a0c78cb073609b129c4bb65fe2d", + "id": "nmdc:9aba4a0c78cb073609b129c4bb65fe2d", + "file_size_bytes": 2522 + }, + { + "name": "Gp0127641_Functional annotation GFF file", + "description": "Functional annotation GFF file for Gp0127641", + "data_object_type": "Functional Annotation GFF", + "url": "https://data.microbiomedata.org/data/nmdc:mga0822t33/annotation/nmdc_mga0822t33_functional_annotation.gff", + "md5_checksum": "2477ce1de68bdb1322eec1ffad5c74ac", + "id": "nmdc:2477ce1de68bdb1322eec1ffad5c74ac", + "file_size_bytes": 65167139 + }, + { + "name": "Gp0127641_KO TSV file", + "description": "KO TSV file for Gp0127641", + "data_object_type": "Annotation KEGG Orthology", + "url": "https://data.microbiomedata.org/data/nmdc:mga0822t33/annotation/nmdc_mga0822t33_ko.tsv", + "md5_checksum": "65768fea44cbd0183b286ab8f9883394", + "id": "nmdc:65768fea44cbd0183b286ab8f9883394", + "file_size_bytes": 7266122 + }, + { + "name": "Gp0127641_EC TSV file", + "description": "EC TSV file for Gp0127641", + "data_object_type": "Annotation Enzyme Commission", + "url": "https://data.microbiomedata.org/data/nmdc:mga0822t33/annotation/nmdc_mga0822t33_ec.tsv", + "md5_checksum": "b8ac75e77d2bc2607877e33ab692c43b", + "id": "nmdc:b8ac75e77d2bc2607877e33ab692c43b", + "file_size_bytes": 4793386 + }, + { + "name": "Gp0127641_COG GFF file", + "description": "COG GFF file for Gp0127641", + "url": "https://data.microbiomedata.org/data/nmdc:mga0822t33/annotation/nmdc_mga0822t33_cog.gff", + "md5_checksum": "31018e605b1569eb64006f2108b9d7d4", + "id": "nmdc:31018e605b1569eb64006f2108b9d7d4", + "file_size_bytes": 38184948 + }, + { + "name": "Gp0127641_PFAM GFF file", + "description": "PFAM GFF file for Gp0127641", + "url": "https://data.microbiomedata.org/data/nmdc:mga0822t33/annotation/nmdc_mga0822t33_pfam.gff", + "md5_checksum": "c7ee9f693971a7686d8ff701fddbcb4a", + "id": "nmdc:c7ee9f693971a7686d8ff701fddbcb4a", + "file_size_bytes": 28867184 + }, + { + "name": "Gp0127641_TigrFam GFF file", + "description": "TigrFam GFF file for Gp0127641", + "url": "https://data.microbiomedata.org/data/nmdc:mga0822t33/annotation/nmdc_mga0822t33_tigrfam.gff", + "md5_checksum": "5c0d5f63853ca572d8d73cac9a36c8d7", + "id": "nmdc:5c0d5f63853ca572d8d73cac9a36c8d7", + "file_size_bytes": 3122581 + }, + { + "name": "Gp0127641_SMART GFF file", + "description": "SMART GFF file for Gp0127641", + "url": "https://data.microbiomedata.org/data/nmdc:mga0822t33/annotation/nmdc_mga0822t33_smart.gff", + "md5_checksum": "058c5e17eeeea69b2bf0b1b3c2838aea", + "id": "nmdc:058c5e17eeeea69b2bf0b1b3c2838aea", + "file_size_bytes": 8368877 + }, + { + "name": "Gp0127641_SuperFam GFF file", + "description": "SuperFam GFF file for Gp0127641", + "url": "https://data.microbiomedata.org/data/nmdc:mga0822t33/annotation/nmdc_mga0822t33_supfam.gff", + "md5_checksum": "b836f94d526c1936d080a4aa7c0646c9", + "id": "nmdc:b836f94d526c1936d080a4aa7c0646c9", + "file_size_bytes": 47986944 + }, + { + "name": "Gp0127641_Cath FunFam GFF file", + "description": "Cath FunFam GFF file for Gp0127641", + "url": "https://data.microbiomedata.org/data/nmdc:mga0822t33/annotation/nmdc_mga0822t33_cath_funfam.gff", + "md5_checksum": "0100d09c52d0c243b5ae45d95e6a22dc", + "id": "nmdc:0100d09c52d0c243b5ae45d95e6a22dc", + "file_size_bytes": 36349993 + }, + { + "name": "Gp0127641_KO_EC GFF file", + "description": "KO_EC GFF file for Gp0127641", + "url": "https://data.microbiomedata.org/data/nmdc:mga0822t33/annotation/nmdc_mga0822t33_ko_ec.gff", + "md5_checksum": "64b87140003d1a5a3d9ac939be55e57d", + "id": "nmdc:64b87140003d1a5a3d9ac939be55e57d", + "file_size_bytes": 23113010 + }, + { + "name": "gold:Gp0452679_metabat2 bin checkm quality assessment result", + "description": "metabat2 bin checkm quality assessment result for gold:Gp0452679", + "data_object_type": "CheckM Statistics", + "url": "https://data.microbiomedata.org/data/nmdc:mga0fsjy84/MAGs/nmdc_mga0fsjy84_checkm_qa.out", + "md5_checksum": "d41d8cd98f00b204e9800998ecf8427e", + "id": "nmdc:d41d8cd98f00b204e9800998ecf8427e", + "file_size_bytes": 0 + }, + { + "name": "Gp0127641_tooShort (< 3kb) filtered contigs fasta file by metaBat2", + "description": "tooShort (< 3kb) filtered contigs fasta file by metaBat2 for Gp0127641", + "url": "https://data.microbiomedata.org/data/nmdc:mga0822t33/MAGs/nmdc_mga0822t33_bins.tooShort.fa", + "md5_checksum": "024b6771e169aeaf57a3b10acc6045a1", + "id": "nmdc:024b6771e169aeaf57a3b10acc6045a1", + "file_size_bytes": 80852741 + }, + { + "name": "Gp0127641_unbinned fasta file from metabat2", + "description": "unbinned fasta file from metabat2 for Gp0127641", + "url": "https://data.microbiomedata.org/data/nmdc:mga0822t33/MAGs/nmdc_mga0822t33_bins.unbinned.fa", + "md5_checksum": "545cd253ad26116236dec9937b32d8ef", + "id": "nmdc:545cd253ad26116236dec9937b32d8ef", + "file_size_bytes": 19497941 + }, + { + "name": "Gp0127641_metabat2 bin checkm quality assessment result", + "description": "metabat2 bin checkm quality assessment result for Gp0127641", + "data_object_type": "CheckM Statistics", + "url": "https://data.microbiomedata.org/data/nmdc:mga0822t33/MAGs/nmdc_mga0822t33_checkm_qa.out", + "md5_checksum": "1785cfe7cf0546dc8702193921a2f566", + "id": "nmdc:1785cfe7cf0546dc8702193921a2f566", + "file_size_bytes": 936 + }, + { + "name": "Gp0127641_high-quality and medium-quality bins", + "description": "high-quality and medium-quality bins for Gp0127641", + "data_object_type": "Metagenome Bins", + "url": "https://data.microbiomedata.org/data/nmdc:mga0822t33/MAGs/nmdc_mga0822t33_hqmq_bin.zip", + "md5_checksum": "0a2a5650358b51ffcd3bbcfc874ac5c9", + "id": "nmdc:0a2a5650358b51ffcd3bbcfc874ac5c9", + "file_size_bytes": 182 + }, + { + "name": "Gp0127641_metabat2 bins", + "description": "metabat2 bins for Gp0127641", + "url": "https://data.microbiomedata.org/data/nmdc:mga0822t33/MAGs/nmdc_mga0822t33_metabat_bin.zip", + "md5_checksum": "8f6b89831cabcd1dc7aa5e26d87f5063", + "id": "nmdc:8f6b89831cabcd1dc7aa5e26d87f5063", + "file_size_bytes": 625863 + }, + { + "_id": { + "$oid": "649b003d1ae706d7b5b14e95" + }, + "description": "Assembled contigs fasta for gold:Gp0127641", + "url": "https://data.microbiomedata.org/data/1781_100343/assembly/assembly_contigs.fna", + "file_size_bytes": 101616916, + "type": "nmdc:DataObject", + "id": "nmdc:a707d24e95ee536650d1cc70bbf997d8", + "name": "assembly_contigs.fna", + "data_object_type": "Assembly Contigs" + }, + { + "_id": { + "$oid": "649b003d1ae706d7b5b14e97" + }, + "description": "Metagenome Alignment BAM file for gold:Gp0127641", + "url": "https://data.microbiomedata.org/data/1781_100343/assembly/pairedMapped_sorted.bam", + "file_size_bytes": 1939284551, + "type": "nmdc:DataObject", + "id": "nmdc:af117c2397f282c3f1d319c499d72b01", + "name": "pairedMapped_sorted.bam", + "data_object_type": "Assembly Coverage BAM" + }, + { + "_id": { + "$oid": "649b003d1ae706d7b5b14e99" + }, + "description": "Assembled AGP file for gold:Gp0127641", + "url": "https://data.microbiomedata.org/data/1781_100343/assembly/assembly.agp", + "file_size_bytes": 12669908, + "type": "nmdc:DataObject", + "id": "nmdc:662fa061e9042db360dd7981f6068505", + "name": "assembly.agp", + "data_object_type": "Assembly AGP" + }, + { + "_id": { + "$oid": "649b003d1ae706d7b5b14e9a" + }, + "description": "Assembled scaffold fasta for gold:Gp0127641", + "url": "https://data.microbiomedata.org/data/1781_100343/assembly/assembly_scaffolds.fna", + "file_size_bytes": 101039761, + "type": "nmdc:DataObject", + "id": "nmdc:3c5870bf66d9acb165352c67638b29c8", + "name": "assembly_scaffolds.fna", + "data_object_type": "Assembly Scaffolds" + }, + { + "_id": { + "$oid": "649b003d1ae706d7b5b14e9e" + }, + "description": "Metagenome Contig Coverage Stats for gold:Gp0127641", + "url": "https://data.microbiomedata.org/data/1781_100343/assembly/mapping_stats.txt", + "file_size_bytes": 14436822, + "type": "nmdc:DataObject", + "id": "nmdc:bb3f818e2f6299570c76a7ea96fcf7e4", + "name": "mapping_stats.txt", + "data_object_type": "Assembly Coverage Stats" + }, + { + "_id": { + "$oid": "649b003d1ae706d7b5b15b84" + }, + "id": "nmdc:f6cd4b98b207dc9f70dcfa063d4afb92", + "name": "1781_100343.krona.html", + "description": "Gold:Gp0127641 KRONA plot HTML file", + "url": "https://data.microbiomedata.org/1781_100343/ReadbasedAnalysis/centrifuge/1781_100343.krona.html", + "file_size_bytes": 3385, + "data_object_type": "Centrifuge Krona Plot", + "type": "nmdc:DataObject" + }, + { + "_id": { + "$oid": "649b003d1ae706d7b5b15b8a" + }, + "id": "nmdc:48c8c09803af12f6092d895de5a1eff9", + "name": "1781_100343.json", + "description": "Gold:Gp0127641 ReadbasedAnalysis result JSON file", + "url": "https://data.microbiomedata.org/1781_100343/ReadbasedAnalysis/1781_100343.json", + "file_size_bytes": 3385, + "type": "nmdc:DataObject" + }, + { + "_id": { + "$oid": "649b003f1ae706d7b5b16610" + }, + "id": "nmdc:ede01d68f85f6183407fe751475b2350", + "name": "bins.tooShort.fa", + "description": "tooShort (< 3kb) filtered contigs fasta file by metaBat2 for gold:Gp0127641", + "file_size_bytes": 78611268, + "url": "https://data.microbiomedata.org/data/1781_100343/img_MAGs/bins.tooShort.fa", + "type": "nmdc:DataObject" + }, + { + "_id": { + "$oid": "649b003f1ae706d7b5b16611" + }, + "id": "nmdc:af8a167fb92c9470eaa77ed8617d454d", + "name": "checkm_qa.out", + "description": "metabat2 bin checkm quality assessment result for gold:Gp0127641", + "file_size_bytes": 930, + "url": "https://data.microbiomedata.org/data/1781_100343/img_MAGs/checkm_qa.out", + "type": "nmdc:DataObject", + "data_object_type": "CheckM Statistics" + }, + { + "_id": { + "$oid": "649b003f1ae706d7b5b16614" + }, + "id": "nmdc:55b42e3f671bfeab937473ef45b55b4b", + "name": "gold:Gp0127641.bins.2.fa", + "description": "metabat2 binned contig file for gold:Gp0127641", + "file_size_bytes": 434241, + "url": "https://data.microbiomedata.org/data/1781_100343/img_MAGs/metabat-bins/bins.2.fa", + "type": "nmdc:DataObject", + "data_object_type": "Metagenome Bins" + }, + { + "_id": { + "$oid": "649b003f1ae706d7b5b1661a" + }, + "id": "nmdc:9e790d1073c174456b4d98661bf92d81", + "name": "gold:Gp0127641.bins.1.fa", + "description": "metabat2 binned contig file for gold:Gp0127641", + "file_size_bytes": 740186, + "url": "https://data.microbiomedata.org/data/1781_100343/img_MAGs/metabat-bins/bins.1.fa", + "type": "nmdc:DataObject", + "data_object_type": "Metagenome Bins" + }, + { + "_id": { + "$oid": "649b003f1ae706d7b5b16628" + }, + "id": "nmdc:5d6128d308651aad814210c9a3a28f3b", + "name": "bins.unbinned.fa", + "description": "unbinned fasta file from metabat2 for gold:Gp0127641", + "file_size_bytes": 20215397, + "url": "https://data.microbiomedata.org/data/1781_100343/img_MAGs/bins.unbinned.fa", + "type": "nmdc:DataObject" + }, + { + "_id": { + "$oid": "649b00401ae706d7b5b16d17" + }, + "description": "Structural annotation GFF file for gold:Gp0127641", + "url": "https://data.microbiomedata.org/1781_100343/img_annotation/Ga0482229_structural_annotation.gff", + "md5_checksum": "a33ac2dc640b7088767a99517f22421f", + "file_size_bytes": 3385, + "id": "nmdc:a33ac2dc640b7088767a99517f22421f", + "name": "gold:Gp0127641_Structural annotation GFF file", + "data_object_type": "Structural Annotation GFF", + "type": "nmdc:DataObject" + }, + { + "_id": { + "$oid": "649b00401ae706d7b5b16d18" + }, + "description": "KO TSV File for gold:Gp0127641", + "url": "https://data.microbiomedata.org/1781_100343/img_annotation/Ga0482229_ko.tsv", + "md5_checksum": "4ec0cbf7d166057c3d2904b2dd2f6b15", + "file_size_bytes": 3385, + "id": "nmdc:4ec0cbf7d166057c3d2904b2dd2f6b15", + "name": "gold:Gp0127641_KO TSV File", + "data_object_type": "Annotation KEGG Orthology", + "type": "nmdc:DataObject" + }, + { + "_id": { + "$oid": "649b00401ae706d7b5b16d2d" + }, + "description": "Protein FAA for gold:Gp0127641", + "url": "https://data.microbiomedata.org/1781_100343/img_annotation/Ga0482229_proteins.faa", + "md5_checksum": "d10b0c9b0d5e646d09c570eb2e08b793", + "file_size_bytes": 3385, + "id": "nmdc:d10b0c9b0d5e646d09c570eb2e08b793", + "name": "gold:Gp0127641_Protein FAA", + "data_object_type": "Annotation Amino Acid FASTA", + "type": "nmdc:DataObject" + }, + { + "_id": { + "$oid": "649b00401ae706d7b5b16d31" + }, + "description": "EC TSV File for gold:Gp0127641", + "url": "https://data.microbiomedata.org/1781_100343/img_annotation/Ga0482229_ec.tsv", + "md5_checksum": "71306193abf043865cafa413b3ca9c1e", + "file_size_bytes": 3385, + "id": "nmdc:71306193abf043865cafa413b3ca9c1e", + "name": "gold:Gp0127641_EC TSV File", + "data_object_type": "Annotation Enzyme Commission", + "type": "nmdc:DataObject" + }, + { + "_id": { + "$oid": "649b00401ae706d7b5b16d33" + }, + "description": "Functional annotation GFF file for gold:Gp0127641", + "url": "https://data.microbiomedata.org/1781_100343/img_annotation/Ga0482229_functional_annotation.gff", + "md5_checksum": "11d4524c896f4fd678ff05a0547b6b52", + "file_size_bytes": 3385, + "id": "nmdc:11d4524c896f4fd678ff05a0547b6b52", + "name": "gold:Gp0127641_Functional annotation GFF file", + "data_object_type": "Functional Annotation GFF", + "type": "nmdc:DataObject" + } + ], + "mags_activity_set": [ + { + "_id": { + "$oid": "649b0052ec087f6bbab34719" + }, + "has_input": [ + "nmdc:18f0d53f503c855c0093677df58366e0", + "nmdc:6974d394df454501e0515b31a2415367", + "nmdc:2477ce1de68bdb1322eec1ffad5c74ac" + ], + "too_short_contig_num": 179152, + "part_of": [ + "nmdc:mga0822t33" + ], + "binned_contig_num": 464, + "git_url": "https://github.com/microbiomedata/metaG/releases/tag/0.1", + "has_output": [ + "nmdc:d41d8cd98f00b204e9800998ecf8427e", + "nmdc:024b6771e169aeaf57a3b10acc6045a1", + "nmdc:545cd253ad26116236dec9937b32d8ef", + "nmdc:1785cfe7cf0546dc8702193921a2f566", + "nmdc:0a2a5650358b51ffcd3bbcfc874ac5c9", + "nmdc:8f6b89831cabcd1dc7aa5e26d87f5063" + ], + "was_informed_by": "gold:Gp0127641", + "input_contig_num": 191906, + "id": "nmdc:363fe7a0dd914e046b274fea70625c52", + "execution_resource": "NERSC-Cori", + "name": "MAGs Analysis Activity for nmdc:mga0822t33", + "mags_list": [ + { + "number_of_contig": 142, + "completeness": 24.43, + "bin_name": "bins.1", + "gene_count": 832, + "bin_quality": "LQ", + "gtdbtk_species": "", + "gtdbtk_order": "", + "num_16s": 1, + "gtdbtk_family": "", + "gtdbtk_domain": "", + "contamination": 0.0, + "gtdbtk_class": "", + "gtdbtk_phylum": "", + "num_5s": 0, + "num_23s": 0, + "gtdbtk_genus": "", + "num_t_rna": 15 + }, + { + "number_of_contig": 322, + "completeness": 46.21, + "bin_name": "bins.2", + "gene_count": 1652, + "bin_quality": "LQ", + "gtdbtk_species": "", + "gtdbtk_order": "", + "num_16s": 0, + "gtdbtk_family": "", + "gtdbtk_domain": "", + "contamination": 0.0, + "gtdbtk_class": "", + "gtdbtk_phylum": "", + "num_5s": 1, + "num_23s": 1, + "gtdbtk_genus": "", + "num_t_rna": 21 + } + ], + "unbinned_contig_num": 12290, + "started_at_time": "2021-10-11T02:27:18Z", + "type": "nmdc:MAGsAnalysisActivity", + "ended_at_time": "2021-10-11T04:05:47+00:00" + } + ], + "metagenome_annotation_activity_set": [ + { + "_id": { + "$oid": "649b005bbf2caae0415ef9b9" + }, + "has_input": [ + "nmdc:18f0d53f503c855c0093677df58366e0" + ], + "part_of": [ + "nmdc:mga0822t33" + ], + "git_url": "https://github.com/microbiomedata/metaG/releases/tag/0.1", + "has_output": [ + "nmdc:f33a2a1789f5e913c3ef0dd0440a4877", + "nmdc:9aba4a0c78cb073609b129c4bb65fe2d", + "nmdc:2477ce1de68bdb1322eec1ffad5c74ac", + "nmdc:65768fea44cbd0183b286ab8f9883394", + "nmdc:b8ac75e77d2bc2607877e33ab692c43b", + "nmdc:31018e605b1569eb64006f2108b9d7d4", + "nmdc:c7ee9f693971a7686d8ff701fddbcb4a", + "nmdc:5c0d5f63853ca572d8d73cac9a36c8d7", + "nmdc:058c5e17eeeea69b2bf0b1b3c2838aea", + "nmdc:b836f94d526c1936d080a4aa7c0646c9", + "nmdc:0100d09c52d0c243b5ae45d95e6a22dc", + "nmdc:64b87140003d1a5a3d9ac939be55e57d" + ], + "was_informed_by": "gold:Gp0127641", + "id": "nmdc:363fe7a0dd914e046b274fea70625c52", + "execution_resource": "NERSC-Cori", + "name": "Annotation Activity for nmdc:mga0822t33", + "started_at_time": "2021-10-11T02:27:18Z", + "type": "nmdc:MetagenomeAnnotationActivity", + "ended_at_time": "2021-10-11T04:05:47+00:00" + } + ], + "metagenome_assembly_set": [ + { + "_id": { + "$oid": "649b005f2ca5ee4adb139fa2" + }, + "has_input": [ + "nmdc:a2700afe93abad6f004a3701348622a2" + ], + "part_of": [ + "nmdc:mga0822t33" + ], + "ctg_logsum": 224925, + "scaf_logsum": 225846, + "gap_pct": 0.00137, + "git_url": "https://github.com/microbiomedata/metaG/releases/tag/0.1", + "has_output": [ + "nmdc:18f0d53f503c855c0093677df58366e0", + "nmdc:2fe3e02d47d8e1d66ccb15c0e42bf1e0", + "nmdc:04ad2128f72c26a4fa2d0ee7b1709ee9", + "nmdc:b89858508c524a03011cd5191f7589fa", + "nmdc:6974d394df454501e0515b31a2415367" + ], + "asm_score": 3.367, + "was_informed_by": "gold:Gp0127641", + "ctg_powsum": 24264, + "scaf_max": 18020, + "id": "nmdc:363fe7a0dd914e046b274fea70625c52", + "scaf_powsum": 24365, + "execution_resource": "NERSC-Cori", + "contigs": 191907, + "name": "Assembly Activity for nmdc:mga0822t33", + "ctg_max": 18020, + "gc_std": 0.10192, + "contig_bp": 94878155, + "gc_avg": 0.61857, + "started_at_time": "2021-10-11T02:27:18Z", + "scaf_bp": 94879455, + "type": "nmdc:MetagenomeAssembly", + "scaffolds": 191777, + "ended_at_time": "2021-10-11T04:05:47+00:00", + "ctg_l50": 489, + "ctg_l90": 290, + "ctg_n50": 53038, + "ctg_n90": 159679, + "scaf_l50": 489, + "scaf_l90": 290, + "scaf_n50": 53021, + "scaf_n90": 159560 + } + ], + "omics_processing_set": [ + { + "_id": { + "$oid": "649b009773e8249959349b55" + }, + "id": "nmdc:omprc-11-p0jdew93", + "name": "Riverbed sediment microbial communities from areas with vegetation nearby in Columbia River, Washington, USA - GW-RW S1_10_20", + "description": "Riverbed sediment samples were collected from an area with a lot of surrounding vegetation in a hyporheic zone (subsurface groundwater - surface water mixing zone)", + "has_input": [ + "nmdc:bsm-11-fgtanh42" + ], + "has_output": [ + "jgi:574fde697ded5e3df1ee140a" + ], + "part_of": [ + "nmdc:sty-11-aygzgv51" + ], + "add_date": "2016-01-11", + "mod_date": "2021-06-15", + "ncbi_project_name": "Riverbed sediment microbial communities from areas with vegetation nearby in Columbia River, Washington, USA - GW-RW S1_10_20", + "omics_type": { + "has_raw_value": "Metagenome" + }, + "principal_investigator": { + "has_raw_value": "James Stegen" + }, + "processing_institution": "JGI", + "type": "nmdc:OmicsProcessing", + "gold_sequencing_project_identifiers": [ + "gold:Gp0127641" + ] + } + ], + "read_qc_analysis_activity_set": [ + { + "_id": { + "$oid": "649b009d6bdd4fd20273c872" + }, + "has_input": [ + "nmdc:c59690f54a7afb65869c9c683e3eef7f" + ], + "part_of": [ + "nmdc:mga0822t33" + ], + "git_url": "https://github.com/microbiomedata/metaG/releases/tag/0.1", + "has_output": [ + "nmdc:a2700afe93abad6f004a3701348622a2", + "nmdc:aaa9a8a3d8e147116953394a8755742d" + ], + "was_informed_by": "gold:Gp0127641", + "input_read_count": 24261468, + "output_read_bases": 3340338011, + "id": "nmdc:363fe7a0dd914e046b274fea70625c52", + "execution_resource": "NERSC-Cori", + "input_read_bases": 3663481668, + "name": "Read QC Activity for nmdc:mga0822t33", + "output_read_count": 22362924, + "started_at_time": "2021-10-11T02:27:18Z", + "type": "nmdc:ReadQCAnalysisActivity", + "ended_at_time": "2021-10-11T04:05:47+00:00" + } + ], + "read_based_taxonomy_analysis_activity_set": [ + { + "_id": { + "$oid": "649b009bff710ae353f8cf37" + }, + "has_input": [ + "nmdc:a2700afe93abad6f004a3701348622a2" + ], + "git_url": "https://github.com/microbiomedata/metaG/releases/tag/0.1", + "has_output": [ + "nmdc:0d021c80bfd39c8293a8b355b8ff3605", + "nmdc:a42312841b816448d8bd5d3adfa65f58", + "nmdc:f473f4a99336a49105d2722888ae0510", + "nmdc:ae51ea50660f44fa3b317a45f3015556", + "nmdc:ef39b44a90c8525e93f45e500b3ae934", + "nmdc:e2653a4ce3f34c235ad7b01e87dd1016", + "nmdc:869730c4d81163e0c238dd4ae27ebd9e", + "nmdc:dc193d1a1693589003f992c820606bab", + "nmdc:2f36b41c419efa1b1dfb6a9576b965ee" + ], + "was_informed_by": "gold:Gp0127641", + "id": "nmdc:363fe7a0dd914e046b274fea70625c52", + "execution_resource": "NERSC-Cori", + "name": "ReadBased Analysis Activity for nmdc:mga0822t33", + "started_at_time": "2021-10-11T02:27:18Z", + "type": "nmdc:ReadbasedAnalysis", + "ended_at_time": "2021-10-11T04:05:47+00:00" + } + ] + }, + { + "data_object_set": [ + { + "description": "Raw sequencer read data", + "file_size_bytes": 2168673471, + "type": "nmdc:DataObject", + "id": "jgi:574fde6c7ded5e3df1ee140c", + "name": "10533.1.165310.TCCGAGT-AACTCGG.fastq.gz" + }, + { + "name": "Gp0127643_Filtered Reads", + "description": "Filtered Reads for Gp0127643", + "data_object_type": "Filtered Sequencing Reads", + "url": "https://data.microbiomedata.org/data/nmdc:mga0evc178/qa/nmdc_mga0evc178_filtered.fastq.gz", + "md5_checksum": "2ef23543e3064ca73c3034713d87c026", + "id": "nmdc:2ef23543e3064ca73c3034713d87c026", + "file_size_bytes": 1891088172 + }, + { + "name": "Gp0127643_Filtered Stats", + "description": "Filtered Stats for Gp0127643", + "data_object_type": "QC Statistics", + "url": "https://data.microbiomedata.org/data/nmdc:mga0evc178/qa/nmdc_mga0evc178_filterStats.txt", + "md5_checksum": "87b172ead58a37be8d199c0acfc96759", + "id": "nmdc:87b172ead58a37be8d199c0acfc96759", + "file_size_bytes": 289 + }, + { + "name": "Gp0127643_Gottcha2 TSV report", + "description": "Gottcha2 TSV report for Gp0127643", + "url": "https://data.microbiomedata.org/data/nmdc:mga0evc178/ReadbasedAnalysis/nmdc_mga0evc178_gottcha2_report.tsv", + "md5_checksum": "e8f825653e5736e29b73de55bd11a270", + "id": "nmdc:e8f825653e5736e29b73de55bd11a270", + "file_size_bytes": 1326 + }, + { + "name": "Gp0127643_Gottcha2 full TSV report", + "description": "Gottcha2 full TSV report for Gp0127643", + "url": "https://data.microbiomedata.org/data/nmdc:mga0evc178/ReadbasedAnalysis/nmdc_mga0evc178_gottcha2_report_full.tsv", + "md5_checksum": "99bb1311b220e9a03da619fe5fb58f0f", + "id": "nmdc:99bb1311b220e9a03da619fe5fb58f0f", + "file_size_bytes": 664131 + }, + { + "name": "Gp0127643_Gottcha2 Krona HTML report", + "description": "Gottcha2 Krona HTML report for Gp0127643", + "data_object_type": "GOTTCHA2 Krona Plot", + "url": "https://data.microbiomedata.org/data/nmdc:mga0evc178/ReadbasedAnalysis/nmdc_mga0evc178_gottcha2_krona.html", + "md5_checksum": "5c97bc15d4d5999f140664b3b2777c6d", + "id": "nmdc:5c97bc15d4d5999f140664b3b2777c6d", + "file_size_bytes": 229630 + }, + { + "name": "Gp0127643_Centrifuge classification TSV report", + "description": "Centrifuge classification TSV report for Gp0127643", + "data_object_type": "Centrifuge Taxonomic Classification", + "url": "https://data.microbiomedata.org/data/nmdc:mga0evc178/ReadbasedAnalysis/nmdc_mga0evc178_centrifuge_classification.tsv", + "md5_checksum": "c9074b2e05765afd68463dc301b87995", + "id": "nmdc:c9074b2e05765afd68463dc301b87995", + "file_size_bytes": 1726867547 + }, + { + "name": "Gp0127643_Centrifuge TSV report", + "description": "Centrifuge TSV report for Gp0127643", + "data_object_type": "Centrifuge Classification Report", + "url": "https://data.microbiomedata.org/data/nmdc:mga0evc178/ReadbasedAnalysis/nmdc_mga0evc178_centrifuge_report.tsv", + "md5_checksum": "ed2c05d1702a9a811b8a98de748bc82a", + "id": "nmdc:ed2c05d1702a9a811b8a98de748bc82a", + "file_size_bytes": 254021 + }, + { + "name": "Gp0127643_Centrifuge Krona HTML report", + "description": "Centrifuge Krona HTML report for Gp0127643", + "data_object_type": "Centrifuge Krona Plot", + "url": "https://data.microbiomedata.org/data/nmdc:mga0evc178/ReadbasedAnalysis/nmdc_mga0evc178_centrifuge_krona.html", + "md5_checksum": "6465fe59472b111ead1f0414ccf39f62", + "id": "nmdc:6465fe59472b111ead1f0414ccf39f62", + "file_size_bytes": 2331702 + }, + { + "name": "Gp0127643_Kraken classification TSV report", + "description": "Kraken classification TSV report for Gp0127643", + "data_object_type": "Kraken2 Taxonomic Classification", + "url": "https://data.microbiomedata.org/data/nmdc:mga0evc178/ReadbasedAnalysis/nmdc_mga0evc178_kraken2_classification.tsv", + "md5_checksum": "9855ca52bce074c34dcebfd154fa94ff", + "id": "nmdc:9855ca52bce074c34dcebfd154fa94ff", + "file_size_bytes": 1376409913 + }, + { + "name": "Gp0127643_Kraken2 TSV report", + "description": "Kraken2 TSV report for Gp0127643", + "data_object_type": "Kraken2 Classification Report", + "url": "https://data.microbiomedata.org/data/nmdc:mga0evc178/ReadbasedAnalysis/nmdc_mga0evc178_kraken2_report.tsv", + "md5_checksum": "ed8059f366d60112deb41a0c307bc6fc", + "id": "nmdc:ed8059f366d60112deb41a0c307bc6fc", + "file_size_bytes": 640506 + }, + { + "name": "Gp0127643_Kraken2 Krona HTML report", + "description": "Kraken2 Krona HTML report for Gp0127643", + "data_object_type": "Kraken2 Krona Plot", + "url": "https://data.microbiomedata.org/data/nmdc:mga0evc178/ReadbasedAnalysis/nmdc_mga0evc178_kraken2_krona.html", + "md5_checksum": "f98bae155bced880c058ecde7d539c18", + "id": "nmdc:f98bae155bced880c058ecde7d539c18", + "file_size_bytes": 3998448 + }, + { + "name": "Gp0127643_Assembled contigs fasta", + "description": "Assembled contigs fasta for Gp0127643", + "data_object_type": "Assembly Contigs", + "url": "https://data.microbiomedata.org/data/nmdc:mga0evc178/assembly/nmdc_mga0evc178_contigs.fna", + "md5_checksum": "a3a85f9f946ff34f28dfd4b5f8590f23", + "id": "nmdc:a3a85f9f946ff34f28dfd4b5f8590f23", + "file_size_bytes": 112772885 + }, + { + "name": "Gp0127643_Assembled scaffold fasta", + "description": "Assembled scaffold fasta for Gp0127643", + "data_object_type": "Assembly Scaffolds", + "url": "https://data.microbiomedata.org/data/nmdc:mga0evc178/assembly/nmdc_mga0evc178_scaffolds.fna", + "md5_checksum": "001fd34d98a73eee6be5a41004e67469", + "id": "nmdc:001fd34d98a73eee6be5a41004e67469", + "file_size_bytes": 112143079 + }, + { + "name": "Gp0127643_Metagenome Contig Coverage Stats", + "description": "Metagenome Contig Coverage Stats for Gp0127643", + "url": "https://data.microbiomedata.org/data/nmdc:mga0evc178/assembly/nmdc_mga0evc178_covstats.txt", + "md5_checksum": "9b45294f72cb55b2f039366d33183fa3", + "id": "nmdc:9b45294f72cb55b2f039366d33183fa3", + "file_size_bytes": 16563197 + }, + { + "name": "Gp0127643_Assembled AGP file", + "description": "Assembled AGP file for Gp0127643", + "data_object_type": "Assembly AGP", + "url": "https://data.microbiomedata.org/data/nmdc:mga0evc178/assembly/nmdc_mga0evc178_assembly.agp", + "md5_checksum": "b2ec4f5a3f02869684bdfaf065d75c54", + "id": "nmdc:b2ec4f5a3f02869684bdfaf065d75c54", + "file_size_bytes": 15493398 + }, + { + "name": "Gp0127643_Metagenome Alignment BAM file", + "description": "Metagenome Alignment BAM file for Gp0127643", + "data_object_type": "Assembly Coverage BAM", + "url": "https://data.microbiomedata.org/data/nmdc:mga0evc178/assembly/nmdc_mga0evc178_pairedMapped_sorted.bam", + "md5_checksum": "fa61e18d49a2012f115d970f0a195986", + "id": "nmdc:fa61e18d49a2012f115d970f0a195986", + "file_size_bytes": 2085429752 + }, + { + "name": "Gp0127643_Protein FAA", + "description": "Protein FAA for Gp0127643", + "data_object_type": "Annotation Amino Acid FASTA", + "url": "https://data.microbiomedata.org/data/nmdc:mga0evc178/annotation/nmdc_mga0evc178_proteins.faa", + "md5_checksum": "b2cd0d1a024094fd4e308c21d439ed5f", + "id": "nmdc:b2cd0d1a024094fd4e308c21d439ed5f", + "file_size_bytes": 63917762 + }, + { + "name": "Gp0127643_Structural annotation GFF file", + "description": "Structural annotation GFF file for Gp0127643", + "data_object_type": "Structural Annotation GFF", + "url": "https://data.microbiomedata.org/data/nmdc:mga0evc178/annotation/nmdc_mga0evc178_structural_annotation.gff", + "md5_checksum": "6151bacd37618698c28b00151b4998f8", + "id": "nmdc:6151bacd37618698c28b00151b4998f8", + "file_size_bytes": 2521 + }, + { + "name": "Gp0127643_Functional annotation GFF file", + "description": "Functional annotation GFF file for Gp0127643", + "data_object_type": "Functional Annotation GFF", + "url": "https://data.microbiomedata.org/data/nmdc:mga0evc178/annotation/nmdc_mga0evc178_functional_annotation.gff", + "md5_checksum": "744277086ab01222a91233536d5e8976", + "id": "nmdc:744277086ab01222a91233536d5e8976", + "file_size_bytes": 71811800 + }, + { + "name": "Gp0127643_KO TSV file", + "description": "KO TSV file for Gp0127643", + "data_object_type": "Annotation KEGG Orthology", + "url": "https://data.microbiomedata.org/data/nmdc:mga0evc178/annotation/nmdc_mga0evc178_ko.tsv", + "md5_checksum": "9c8a359c69bcb1179241f9a3c727fa23", + "id": "nmdc:9c8a359c69bcb1179241f9a3c727fa23", + "file_size_bytes": 7959243 + }, + { + "name": "Gp0127643_EC TSV file", + "description": "EC TSV file for Gp0127643", + "data_object_type": "Annotation Enzyme Commission", + "url": "https://data.microbiomedata.org/data/nmdc:mga0evc178/annotation/nmdc_mga0evc178_ec.tsv", + "md5_checksum": "027b72af172d078f88471d932cf6d473", + "id": "nmdc:027b72af172d078f88471d932cf6d473", + "file_size_bytes": 5202338 + }, + { + "name": "Gp0127643_COG GFF file", + "description": "COG GFF file for Gp0127643", + "url": "https://data.microbiomedata.org/data/nmdc:mga0evc178/annotation/nmdc_mga0evc178_cog.gff", + "md5_checksum": "ff24990735aa002e828ff7204a456ad2", + "id": "nmdc:ff24990735aa002e828ff7204a456ad2", + "file_size_bytes": 41649279 + }, + { + "name": "Gp0127643_PFAM GFF file", + "description": "PFAM GFF file for Gp0127643", + "url": "https://data.microbiomedata.org/data/nmdc:mga0evc178/annotation/nmdc_mga0evc178_pfam.gff", + "md5_checksum": "e884ad501d1bb3bcf006f0999020ce0f", + "id": "nmdc:e884ad501d1bb3bcf006f0999020ce0f", + "file_size_bytes": 31529168 + }, + { + "name": "Gp0127643_TigrFam GFF file", + "description": "TigrFam GFF file for Gp0127643", + "url": "https://data.microbiomedata.org/data/nmdc:mga0evc178/annotation/nmdc_mga0evc178_tigrfam.gff", + "md5_checksum": "8321f818f53371491a7a80ef7e063ca6", + "id": "nmdc:8321f818f53371491a7a80ef7e063ca6", + "file_size_bytes": 3378599 + }, + { + "name": "Gp0127643_SMART GFF file", + "description": "SMART GFF file for Gp0127643", + "url": "https://data.microbiomedata.org/data/nmdc:mga0evc178/annotation/nmdc_mga0evc178_smart.gff", + "md5_checksum": "6f799842fe74ebff7942a026dbf9b1bf", + "id": "nmdc:6f799842fe74ebff7942a026dbf9b1bf", + "file_size_bytes": 9132037 + }, + { + "name": "Gp0127643_SuperFam GFF file", + "description": "SuperFam GFF file for Gp0127643", + "url": "https://data.microbiomedata.org/data/nmdc:mga0evc178/annotation/nmdc_mga0evc178_supfam.gff", + "md5_checksum": "8ee84a629a5899c25e0fbd0f07084530", + "id": "nmdc:8ee84a629a5899c25e0fbd0f07084530", + "file_size_bytes": 52720037 + }, + { + "name": "Gp0127643_Cath FunFam GFF file", + "description": "Cath FunFam GFF file for Gp0127643", + "url": "https://data.microbiomedata.org/data/nmdc:mga0evc178/annotation/nmdc_mga0evc178_cath_funfam.gff", + "md5_checksum": "6697cdb0b1dcf83e7ecb8fcefa0703ef", + "id": "nmdc:6697cdb0b1dcf83e7ecb8fcefa0703ef", + "file_size_bytes": 39643020 + }, + { + "name": "Gp0127643_KO_EC GFF file", + "description": "KO_EC GFF file for Gp0127643", + "url": "https://data.microbiomedata.org/data/nmdc:mga0evc178/annotation/nmdc_mga0evc178_ko_ec.gff", + "md5_checksum": "d2990b0bd86e50209dcada6fa6b09510", + "id": "nmdc:d2990b0bd86e50209dcada6fa6b09510", + "file_size_bytes": 25272687 + }, + { + "name": "gold:Gp0452679_metabat2 bin checkm quality assessment result", + "description": "metabat2 bin checkm quality assessment result for gold:Gp0452679", + "data_object_type": "CheckM Statistics", + "url": "https://data.microbiomedata.org/data/nmdc:mga0fsjy84/MAGs/nmdc_mga0fsjy84_checkm_qa.out", + "md5_checksum": "d41d8cd98f00b204e9800998ecf8427e", + "id": "nmdc:d41d8cd98f00b204e9800998ecf8427e", + "file_size_bytes": 0 + }, + { + "name": "Gp0127643_tooShort (< 3kb) filtered contigs fasta file by metaBat2", + "description": "tooShort (< 3kb) filtered contigs fasta file by metaBat2 for Gp0127643", + "url": "https://data.microbiomedata.org/data/nmdc:mga0evc178/MAGs/nmdc_mga0evc178_bins.tooShort.fa", + "md5_checksum": "ed8acb6d21b14da131350d9c52aa7041", + "id": "nmdc:ed8acb6d21b14da131350d9c52aa7041", + "file_size_bytes": 87917684 + }, + { + "name": "Gp0127643_unbinned fasta file from metabat2", + "description": "unbinned fasta file from metabat2 for Gp0127643", + "url": "https://data.microbiomedata.org/data/nmdc:mga0evc178/MAGs/nmdc_mga0evc178_bins.unbinned.fa", + "md5_checksum": "d81e3cc17fa762a717dcf324a0aa3d45", + "id": "nmdc:d81e3cc17fa762a717dcf324a0aa3d45", + "file_size_bytes": 22746526 + }, + { + "name": "Gp0127643_metabat2 bin checkm quality assessment result", + "description": "metabat2 bin checkm quality assessment result for Gp0127643", + "data_object_type": "CheckM Statistics", + "url": "https://data.microbiomedata.org/data/nmdc:mga0evc178/MAGs/nmdc_mga0evc178_checkm_qa.out", + "md5_checksum": "bd388cba93a77cde2f5791fa0f580865", + "id": "nmdc:bd388cba93a77cde2f5791fa0f580865", + "file_size_bytes": 785 + }, + { + "name": "Gp0127643_high-quality and medium-quality bins", + "description": "high-quality and medium-quality bins for Gp0127643", + "data_object_type": "Metagenome Bins", + "url": "https://data.microbiomedata.org/data/nmdc:mga0evc178/MAGs/nmdc_mga0evc178_hqmq_bin.zip", + "md5_checksum": "30695aca02693c6aba316db3e9f565a8", + "id": "nmdc:30695aca02693c6aba316db3e9f565a8", + "file_size_bytes": 182 + }, + { + "name": "Gp0127643_metabat2 bins", + "description": "metabat2 bins for Gp0127643", + "url": "https://data.microbiomedata.org/data/nmdc:mga0evc178/MAGs/nmdc_mga0evc178_metabat_bin.zip", + "md5_checksum": "79de6d81848956e1c06a811bc9bdab81", + "id": "nmdc:79de6d81848956e1c06a811bc9bdab81", + "file_size_bytes": 614113 + }, + { + "_id": { + "$oid": "649b003d1ae706d7b5b14ea0" + }, + "description": "Assembled AGP file for gold:Gp0127643", + "url": "https://data.microbiomedata.org/data/1781_100345/assembly/assembly.agp", + "file_size_bytes": 13820270, + "type": "nmdc:DataObject", + "id": "nmdc:b0ff6dcafcb9bed83c5290e6f974dbf0", + "name": "assembly.agp", + "data_object_type": "Assembly AGP" + }, + { + "_id": { + "$oid": "649b003d1ae706d7b5b14ea1" + }, + "description": "Metagenome Alignment BAM file for gold:Gp0127643", + "url": "https://data.microbiomedata.org/data/1781_100345/assembly/pairedMapped_sorted.bam", + "file_size_bytes": 2057080151, + "type": "nmdc:DataObject", + "id": "nmdc:372822daf5aee3e4a9b1f8e621dbd3f5", + "name": "pairedMapped_sorted.bam", + "data_object_type": "Assembly Coverage BAM" + }, + { + "_id": { + "$oid": "649b003d1ae706d7b5b14ea3" + }, + "description": "Assembled scaffold fasta for gold:Gp0127643", + "url": "https://data.microbiomedata.org/data/1781_100345/assembly/assembly_scaffolds.fna", + "file_size_bytes": 111307907, + "type": "nmdc:DataObject", + "id": "nmdc:7b778a5f68bdd7a7deeb51a98df7ac3d", + "name": "assembly_scaffolds.fna", + "data_object_type": "Assembly Scaffolds" + }, + { + "_id": { + "$oid": "649b003d1ae706d7b5b14ea4" + }, + "description": "Assembled contigs fasta for gold:Gp0127643", + "url": "https://data.microbiomedata.org/data/1781_100345/assembly/assembly_contigs.fna", + "file_size_bytes": 111937017, + "type": "nmdc:DataObject", + "id": "nmdc:e087926bf099d6b56eaa8ed38dc9587c", + "name": "assembly_contigs.fna", + "data_object_type": "Assembly Contigs" + }, + { + "_id": { + "$oid": "649b003d1ae706d7b5b14ea5" + }, + "description": "Metagenome Contig Coverage Stats for gold:Gp0127643", + "url": "https://data.microbiomedata.org/data/1781_100345/assembly/mapping_stats.txt", + "file_size_bytes": 15727329, + "type": "nmdc:DataObject", + "id": "nmdc:3e82935d61f88ddbd5c4d0be5f3a4974", + "name": "mapping_stats.txt", + "data_object_type": "Assembly Coverage Stats" + }, + { + "_id": { + "$oid": "649b003d1ae706d7b5b15b98" + }, + "id": "nmdc:f562a2cbd61dd314aa652b5a7962a453", + "name": "1781_100345.krona.html", + "description": "Gold:Gp0127643 KRONA plot HTML file", + "url": "https://data.microbiomedata.org/1781_100345/ReadbasedAnalysis/centrifuge/1781_100345.krona.html", + "file_size_bytes": 3385, + "data_object_type": "Centrifuge Krona Plot", + "type": "nmdc:DataObject" + }, + { + "_id": { + "$oid": "649b003d1ae706d7b5b15b9a" + }, + "id": "nmdc:60405949438243714571490c6faab9f5", + "name": "1781_100345.json", + "description": "Gold:Gp0127643 ReadbasedAnalysis result JSON file", + "url": "https://data.microbiomedata.org/1781_100345/ReadbasedAnalysis/1781_100345.json", + "file_size_bytes": 3385, + "type": "nmdc:DataObject" + }, + { + "_id": { + "$oid": "649b003f1ae706d7b5b16617" + }, + "id": "nmdc:843af81eb17f23a12d17e72a36922a7a", + "name": "bins.unbinned.fa", + "description": "unbinned fasta file from metabat2 for gold:Gp0127643", + "file_size_bytes": 22959128, + "url": "https://data.microbiomedata.org/data/1781_100345/img_MAGs/bins.unbinned.fa", + "type": "nmdc:DataObject" + }, + { + "_id": { + "$oid": "649b003f1ae706d7b5b1661b" + }, + "id": "nmdc:00606078cd171aa99d50f89abea30559", + "name": "gold:Gp0127643.bins.2.fa", + "description": "metabat2 binned contig file for gold:Gp0127643", + "file_size_bytes": 232512, + "url": "https://data.microbiomedata.org/data/1781_100345/img_MAGs/metabat-bins/bins.2.fa", + "type": "nmdc:DataObject", + "data_object_type": "Metagenome Bins" + }, + { + "_id": { + "$oid": "649b003f1ae706d7b5b1661c" + }, + "id": "nmdc:ea805619c536992228a7e6ad5e3ee57a", + "name": "checkm_qa.out", + "description": "metabat2 bin checkm quality assessment result for gold:Gp0127643", + "file_size_bytes": 930, + "url": "https://data.microbiomedata.org/data/1781_100345/img_MAGs/checkm_qa.out", + "type": "nmdc:DataObject", + "data_object_type": "CheckM Statistics" + }, + { + "_id": { + "$oid": "649b003f1ae706d7b5b1661d" + }, + "id": "nmdc:4c1506844b7b4f668c62e266dc7180da", + "name": "gold:Gp0127643.bins.1.fa", + "description": "metabat2 binned contig file for gold:Gp0127643", + "file_size_bytes": 1495841, + "url": "https://data.microbiomedata.org/data/1781_100345/img_MAGs/metabat-bins/bins.1.fa", + "type": "nmdc:DataObject", + "data_object_type": "Metagenome Bins" + }, + { + "_id": { + "$oid": "649b003f1ae706d7b5b1662a" + }, + "id": "nmdc:7c45113f19fcf47e76d2408c9e4aa2af", + "name": "bins.tooShort.fa", + "description": "tooShort (< 3kb) filtered contigs fasta file by metaBat2 for gold:Gp0127643", + "file_size_bytes": 85480222, + "url": "https://data.microbiomedata.org/data/1781_100345/img_MAGs/bins.tooShort.fa", + "type": "nmdc:DataObject" + }, + { + "_id": { + "$oid": "649b00401ae706d7b5b16d16" + }, + "description": "KO TSV File for gold:Gp0127643", + "url": "https://data.microbiomedata.org/1781_100345/img_annotation/Ga0482227_ko.tsv", + "md5_checksum": "b7b422e726f82668cd9c2ea9f0786f41", + "file_size_bytes": 3385, + "id": "nmdc:b7b422e726f82668cd9c2ea9f0786f41", + "name": "gold:Gp0127643_KO TSV File", + "data_object_type": "Annotation KEGG Orthology", + "type": "nmdc:DataObject" + }, + { + "_id": { + "$oid": "649b00401ae706d7b5b16d19" + }, + "description": "Functional annotation GFF file for gold:Gp0127643", + "url": "https://data.microbiomedata.org/1781_100345/img_annotation/Ga0482227_functional_annotation.gff", + "md5_checksum": "f8df0729f51da70739b75a2458e32020", + "file_size_bytes": 3385, + "id": "nmdc:f8df0729f51da70739b75a2458e32020", + "name": "gold:Gp0127643_Functional annotation GFF file", + "data_object_type": "Functional Annotation GFF", + "type": "nmdc:DataObject" + }, + { + "_id": { + "$oid": "649b00401ae706d7b5b16d1a" + }, + "description": "Protein FAA for gold:Gp0127643", + "url": "https://data.microbiomedata.org/1781_100345/img_annotation/Ga0482227_proteins.faa", + "md5_checksum": "7434bd60874fc6d05530ee0652a9e18f", + "file_size_bytes": 3385, + "id": "nmdc:7434bd60874fc6d05530ee0652a9e18f", + "name": "gold:Gp0127643_Protein FAA", + "data_object_type": "Annotation Amino Acid FASTA", + "type": "nmdc:DataObject" + }, + { + "_id": { + "$oid": "649b00401ae706d7b5b16d1b" + }, + "description": "Structural annotation GFF file for gold:Gp0127643", + "url": "https://data.microbiomedata.org/1781_100345/img_annotation/Ga0482227_structural_annotation.gff", + "md5_checksum": "d897fea88896a93843966962f6bbb7be", + "file_size_bytes": 3385, + "id": "nmdc:d897fea88896a93843966962f6bbb7be", + "name": "gold:Gp0127643_Structural annotation GFF file", + "data_object_type": "Structural Annotation GFF", + "type": "nmdc:DataObject" + }, + { + "_id": { + "$oid": "649b00401ae706d7b5b16d50" + }, + "description": "EC TSV File for gold:Gp0127643", + "url": "https://data.microbiomedata.org/1781_100345/img_annotation/Ga0482227_ec.tsv", + "md5_checksum": "0b7fc1ad662f267eaa604075f9968b7c", + "file_size_bytes": 3385, + "id": "nmdc:0b7fc1ad662f267eaa604075f9968b7c", + "name": "gold:Gp0127643_EC TSV File", + "data_object_type": "Annotation Enzyme Commission", + "type": "nmdc:DataObject" + } + ], + "mags_activity_set": [ + { + "_id": { + "$oid": "649b0052ec087f6bbab3471e" + }, + "has_input": [ + "nmdc:a3a85f9f946ff34f28dfd4b5f8590f23", + "nmdc:fa61e18d49a2012f115d970f0a195986", + "nmdc:744277086ab01222a91233536d5e8976" + ], + "too_short_contig_num": 194066, + "part_of": [ + "nmdc:mga0evc178" + ], + "binned_contig_num": 470, + "git_url": "https://github.com/microbiomedata/metaG/releases/tag/0.1", + "has_output": [ + "nmdc:d41d8cd98f00b204e9800998ecf8427e", + "nmdc:ed8acb6d21b14da131350d9c52aa7041", + "nmdc:d81e3cc17fa762a717dcf324a0aa3d45", + "nmdc:bd388cba93a77cde2f5791fa0f580865", + "nmdc:30695aca02693c6aba316db3e9f565a8", + "nmdc:79de6d81848956e1c06a811bc9bdab81" + ], + "was_informed_by": "gold:Gp0127643", + "input_contig_num": 208967, + "id": "nmdc:30cd614596fd2a4b87220523f8a6ff30", + "execution_resource": "NERSC-Cori", + "name": "MAGs Analysis Activity for nmdc:mga0evc178", + "mags_list": [ + { + "number_of_contig": 470, + "completeness": 30.73, + "bin_name": "bins.1", + "gene_count": 2501, + "bin_quality": "LQ", + "gtdbtk_species": "", + "gtdbtk_order": "", + "num_16s": 0, + "gtdbtk_family": "", + "gtdbtk_domain": "", + "contamination": 1.71, + "gtdbtk_class": "", + "gtdbtk_phylum": "", + "num_5s": 0, + "num_23s": 0, + "gtdbtk_genus": "", + "num_t_rna": 22 + } + ], + "unbinned_contig_num": 14431, + "started_at_time": "2021-10-11T02:27:00Z", + "type": "nmdc:MAGsAnalysisActivity", + "ended_at_time": "2021-10-11T04:04:16+00:00" + } + ], + "metagenome_annotation_activity_set": [ + { + "_id": { + "$oid": "649b005bbf2caae0415ef9bd" + }, + "has_input": [ + "nmdc:a3a85f9f946ff34f28dfd4b5f8590f23" + ], + "part_of": [ + "nmdc:mga0evc178" + ], + "git_url": "https://github.com/microbiomedata/metaG/releases/tag/0.1", + "has_output": [ + "nmdc:b2cd0d1a024094fd4e308c21d439ed5f", + "nmdc:6151bacd37618698c28b00151b4998f8", + "nmdc:744277086ab01222a91233536d5e8976", + "nmdc:9c8a359c69bcb1179241f9a3c727fa23", + "nmdc:027b72af172d078f88471d932cf6d473", + "nmdc:ff24990735aa002e828ff7204a456ad2", + "nmdc:e884ad501d1bb3bcf006f0999020ce0f", + "nmdc:8321f818f53371491a7a80ef7e063ca6", + "nmdc:6f799842fe74ebff7942a026dbf9b1bf", + "nmdc:8ee84a629a5899c25e0fbd0f07084530", + "nmdc:6697cdb0b1dcf83e7ecb8fcefa0703ef", + "nmdc:d2990b0bd86e50209dcada6fa6b09510" + ], + "was_informed_by": "gold:Gp0127643", + "id": "nmdc:30cd614596fd2a4b87220523f8a6ff30", + "execution_resource": "NERSC-Cori", + "name": "Annotation Activity for nmdc:mga0evc178", + "started_at_time": "2021-10-11T02:27:00Z", + "type": "nmdc:MetagenomeAnnotationActivity", + "ended_at_time": "2021-10-11T04:04:16+00:00" + } + ], + "metagenome_assembly_set": [ + { + "_id": { + "$oid": "649b005f2ca5ee4adb139fa9" + }, + "has_input": [ + "nmdc:2ef23543e3064ca73c3034713d87c026" + ], + "part_of": [ + "nmdc:mga0evc178" + ], + "ctg_logsum": 258957, + "scaf_logsum": 260132, + "gap_pct": 0.00166, + "git_url": "https://github.com/microbiomedata/metaG/releases/tag/0.1", + "has_output": [ + "nmdc:a3a85f9f946ff34f28dfd4b5f8590f23", + "nmdc:001fd34d98a73eee6be5a41004e67469", + "nmdc:9b45294f72cb55b2f039366d33183fa3", + "nmdc:b2ec4f5a3f02869684bdfaf065d75c54", + "nmdc:fa61e18d49a2012f115d970f0a195986" + ], + "asm_score": 3.329, + "was_informed_by": "gold:Gp0127643", + "ctg_powsum": 27868, + "scaf_max": 12873, + "id": "nmdc:30cd614596fd2a4b87220523f8a6ff30", + "scaf_powsum": 27998, + "execution_resource": "NERSC-Cori", + "contigs": 208967, + "name": "Assembly Activity for nmdc:mga0evc178", + "ctg_max": 12873, + "gc_std": 0.09438, + "contig_bp": 104567589, + "gc_avg": 0.63102, + "started_at_time": "2021-10-11T02:27:00Z", + "scaf_bp": 104569329, + "type": "nmdc:MetagenomeAssembly", + "scaffolds": 208793, + "ended_at_time": "2021-10-11T04:04:16+00:00", + "ctg_l50": 497, + "ctg_l90": 292, + "ctg_n50": 57164, + "ctg_n90": 172414, + "scaf_l50": 498, + "scaf_l90": 292, + "scaf_n50": 56935, + "scaf_n90": 172256 + } + ], + "omics_processing_set": [ + { + "_id": { + "$oid": "649b009773e8249959349b56" + }, + "id": "nmdc:omprc-11-dtsr6z90", + "name": "Riverbed sediment microbial communities from areas with no vegetation in Columbia River, Washington, USA - GW-RW N1_0_10", + "description": "Riverbed sediment samples were collected from an area with no vegetation in a hyporheic zone (subsurface groundwater - surface water mixing zone)", + "has_input": [ + "nmdc:bsm-11-g079t498" + ], + "has_output": [ + "jgi:574fde6c7ded5e3df1ee140c" + ], + "part_of": [ + "nmdc:sty-11-aygzgv51" + ], + "add_date": "2016-01-11", + "mod_date": "2021-06-15", + "ncbi_project_name": "Riverbed sediment microbial communities from areas with no vegetation in Columbia River, Washington, USA - GW-RW N1_0_10", + "omics_type": { + "has_raw_value": "Metagenome" + }, + "principal_investigator": { + "has_raw_value": "James Stegen" + }, + "processing_institution": "JGI", + "type": "nmdc:OmicsProcessing", + "gold_sequencing_project_identifiers": [ + "gold:Gp0127643" + ] + } + ], + "read_qc_analysis_activity_set": [ + { + "_id": { + "$oid": "649b009d6bdd4fd20273c879" + }, + "has_input": [ + "nmdc:8b553dbdd47b90ed7f55d5747822f5d5" + ], + "part_of": [ + "nmdc:mga0evc178" + ], + "git_url": "https://github.com/microbiomedata/metaG/releases/tag/0.1", + "has_output": [ + "nmdc:2ef23543e3064ca73c3034713d87c026", + "nmdc:87b172ead58a37be8d199c0acfc96759" + ], + "was_informed_by": "gold:Gp0127643", + "input_read_count": 25305566, + "output_read_bases": 3510483777, + "id": "nmdc:30cd614596fd2a4b87220523f8a6ff30", + "execution_resource": "NERSC-Cori", + "input_read_bases": 3821140466, + "name": "Read QC Activity for nmdc:mga0evc178", + "output_read_count": 23508042, + "started_at_time": "2021-10-11T02:27:00Z", + "type": "nmdc:ReadQCAnalysisActivity", + "ended_at_time": "2021-10-11T04:04:16+00:00" + } + ], + "read_based_taxonomy_analysis_activity_set": [ + { + "_id": { + "$oid": "649b009bff710ae353f8cf36" + }, + "has_input": [ + "nmdc:2ef23543e3064ca73c3034713d87c026" + ], + "git_url": "https://github.com/microbiomedata/metaG/releases/tag/0.1", + "has_output": [ + "nmdc:e8f825653e5736e29b73de55bd11a270", + "nmdc:99bb1311b220e9a03da619fe5fb58f0f", + "nmdc:5c97bc15d4d5999f140664b3b2777c6d", + "nmdc:c9074b2e05765afd68463dc301b87995", + "nmdc:ed2c05d1702a9a811b8a98de748bc82a", + "nmdc:6465fe59472b111ead1f0414ccf39f62", + "nmdc:9855ca52bce074c34dcebfd154fa94ff", + "nmdc:ed8059f366d60112deb41a0c307bc6fc", + "nmdc:f98bae155bced880c058ecde7d539c18" + ], + "was_informed_by": "gold:Gp0127643", + "id": "nmdc:30cd614596fd2a4b87220523f8a6ff30", + "execution_resource": "NERSC-Cori", + "name": "ReadBased Analysis Activity for nmdc:mga0evc178", + "started_at_time": "2021-10-11T02:27:00Z", + "type": "nmdc:ReadbasedAnalysis", + "ended_at_time": "2021-10-11T04:04:16+00:00" + } + ] + }, + { + "data_object_set": [ + { + "description": "Raw sequencer read data", + "data_object_type": "Metagenome Raw Reads", + "url": "https://data.microbiomedata.org/data/raw/10533.3.165334.AGTCTCA-GTGAGAC.fastq.gz", + "file_size_bytes": 939616475, + "type": "nmdc:DataObject", + "id": "jgi:574fe0a87ded5e3df1ee148e", + "name": "10533.3.165334.AGTCTCA-GTGAGAC.fastq.gz" + }, + { + "name": "Gp0127644_Filtered Reads", + "description": "Filtered Reads for Gp0127644", + "data_object_type": "Filtered Sequencing Reads", + "url": "https://data.microbiomedata.org/data/nmdc:mga0bpf635/qa/nmdc_mga0bpf635_filtered.fastq.gz", + "md5_checksum": "98da35678c59689ce738b2a6bc708692", + "id": "nmdc:98da35678c59689ce738b2a6bc708692", + "file_size_bytes": 694199131 + }, + { + "name": "Gp0127644_Filtered Stats", + "description": "Filtered Stats for Gp0127644", + "data_object_type": "QC Statistics", + "url": "https://data.microbiomedata.org/data/nmdc:mga0bpf635/qa/nmdc_mga0bpf635_filterStats.txt", + "md5_checksum": "ff08ea52254e0cc1011c56656505b27b", + "id": "nmdc:ff08ea52254e0cc1011c56656505b27b", + "file_size_bytes": 280 + }, + { + "name": "gold:Gp0452677_Gottcha2 TSV report", + "description": "Gottcha2 TSV report for gold:Gp0452677", + "url": "https://data.microbiomedata.org/data/nmdc:mga0fbpz25/ReadbasedAnalysis/nmdc_mga0fbpz25_gottcha2_report.tsv", + "md5_checksum": "dc2e21becda8d6b010a95897cf97ae90", + "id": "nmdc:dc2e21becda8d6b010a95897cf97ae90", + "file_size_bytes": 109 + }, + { + "name": "Gp0127644_Gottcha2 full TSV report", + "description": "Gottcha2 full TSV report for Gp0127644", + "url": "https://data.microbiomedata.org/data/nmdc:mga0bpf635/ReadbasedAnalysis/nmdc_mga0bpf635_gottcha2_report_full.tsv", + "md5_checksum": "0dd334c92557f3a8ac8c78b437c75eaf", + "id": "nmdc:0dd334c92557f3a8ac8c78b437c75eaf", + "file_size_bytes": 426075 + }, + { + "name": "gold:Gp0452677_Gottcha2 Krona HTML report", + "description": "Gottcha2 Krona HTML report for gold:Gp0452677", + "data_object_type": "GOTTCHA2 Krona Plot", + "url": "https://data.microbiomedata.org/data/nmdc:mga0fbpz25/ReadbasedAnalysis/nmdc_mga0fbpz25_gottcha2_krona.html", + "md5_checksum": "425873a08e598b0ca2987ff7b9b5da1f", + "id": "nmdc:425873a08e598b0ca2987ff7b9b5da1f", + "file_size_bytes": 226638 + }, + { + "name": "Gp0127644_Centrifuge classification TSV report", + "description": "Centrifuge classification TSV report for Gp0127644", + "data_object_type": "Centrifuge Taxonomic Classification", + "url": "https://data.microbiomedata.org/data/nmdc:mga0bpf635/ReadbasedAnalysis/nmdc_mga0bpf635_centrifuge_classification.tsv", + "md5_checksum": "b0f2449065b52935ddba8abd6ae6bc88", + "id": "nmdc:b0f2449065b52935ddba8abd6ae6bc88", + "file_size_bytes": 610862986 + }, + { + "name": "Gp0127644_Centrifuge TSV report", + "description": "Centrifuge TSV report for Gp0127644", + "data_object_type": "Centrifuge Classification Report", + "url": "https://data.microbiomedata.org/data/nmdc:mga0bpf635/ReadbasedAnalysis/nmdc_mga0bpf635_centrifuge_report.tsv", + "md5_checksum": "9baa708296f62334e099cf61711b5e16", + "id": "nmdc:9baa708296f62334e099cf61711b5e16", + "file_size_bytes": 243322 + }, + { + "name": "Gp0127644_Centrifuge Krona HTML report", + "description": "Centrifuge Krona HTML report for Gp0127644", + "data_object_type": "Centrifuge Krona Plot", + "url": "https://data.microbiomedata.org/data/nmdc:mga0bpf635/ReadbasedAnalysis/nmdc_mga0bpf635_centrifuge_krona.html", + "md5_checksum": "f2a43278b06876cae5d4e8cdef17cfe1", + "id": "nmdc:f2a43278b06876cae5d4e8cdef17cfe1", + "file_size_bytes": 2294995 + }, + { + "name": "Gp0127644_Kraken classification TSV report", + "description": "Kraken classification TSV report for Gp0127644", + "data_object_type": "Kraken2 Taxonomic Classification", + "url": "https://data.microbiomedata.org/data/nmdc:mga0bpf635/ReadbasedAnalysis/nmdc_mga0bpf635_kraken2_classification.tsv", + "md5_checksum": "f1a811dbc523f9a27dbc004b8a66f0cb", + "id": "nmdc:f1a811dbc523f9a27dbc004b8a66f0cb", + "file_size_bytes": 487178087 + }, + { + "name": "Gp0127644_Kraken2 TSV report", + "description": "Kraken2 TSV report for Gp0127644", + "data_object_type": "Kraken2 Classification Report", + "url": "https://data.microbiomedata.org/data/nmdc:mga0bpf635/ReadbasedAnalysis/nmdc_mga0bpf635_kraken2_report.tsv", + "md5_checksum": "8983fa1acb03f2905bbec3a6ccee2854", + "id": "nmdc:8983fa1acb03f2905bbec3a6ccee2854", + "file_size_bytes": 557688 + }, + { + "name": "Gp0127644_Kraken2 Krona HTML report", + "description": "Kraken2 Krona HTML report for Gp0127644", + "data_object_type": "Kraken2 Krona Plot", + "url": "https://data.microbiomedata.org/data/nmdc:mga0bpf635/ReadbasedAnalysis/nmdc_mga0bpf635_kraken2_krona.html", + "md5_checksum": "a07c6c5fb68d1a56e39d93e8745b96cb", + "id": "nmdc:a07c6c5fb68d1a56e39d93e8745b96cb", + "file_size_bytes": 3567307 + }, + { + "name": "Gp0127644_Assembled contigs fasta", + "description": "Assembled contigs fasta for Gp0127644", + "data_object_type": "Assembly Contigs", + "url": "https://data.microbiomedata.org/data/nmdc:mga0bpf635/assembly/nmdc_mga0bpf635_contigs.fna", + "md5_checksum": "16f77f4aaed29f3acc31646e1ce06b2d", + "id": "nmdc:16f77f4aaed29f3acc31646e1ce06b2d", + "file_size_bytes": 21881611 + }, + { + "name": "Gp0127644_Assembled scaffold fasta", + "description": "Assembled scaffold fasta for Gp0127644", + "data_object_type": "Assembly Scaffolds", + "url": "https://data.microbiomedata.org/data/nmdc:mga0bpf635/assembly/nmdc_mga0bpf635_scaffolds.fna", + "md5_checksum": "b6afa25cadc614083204383bbad06f48", + "id": "nmdc:b6afa25cadc614083204383bbad06f48", + "file_size_bytes": 21742982 + }, + { + "name": "Gp0127644_Metagenome Contig Coverage Stats", + "description": "Metagenome Contig Coverage Stats for Gp0127644", + "url": "https://data.microbiomedata.org/data/nmdc:mga0bpf635/assembly/nmdc_mga0bpf635_covstats.txt", + "md5_checksum": "87b1ea13d41499eeb5eb67932db01423", + "id": "nmdc:87b1ea13d41499eeb5eb67932db01423", + "file_size_bytes": 3612085 + }, + { + "name": "Gp0127644_Assembled AGP file", + "description": "Assembled AGP file for Gp0127644", + "data_object_type": "Assembly AGP", + "url": "https://data.microbiomedata.org/data/nmdc:mga0bpf635/assembly/nmdc_mga0bpf635_assembly.agp", + "md5_checksum": "72a38c353753abcb6d046385bf2950f6", + "id": "nmdc:72a38c353753abcb6d046385bf2950f6", + "file_size_bytes": 3350598 + }, + { + "name": "Gp0127644_Metagenome Alignment BAM file", + "description": "Metagenome Alignment BAM file for Gp0127644", + "data_object_type": "Assembly Coverage BAM", + "url": "https://data.microbiomedata.org/data/nmdc:mga0bpf635/assembly/nmdc_mga0bpf635_pairedMapped_sorted.bam", + "md5_checksum": "ecf498b9aa15f9d000845ffdfa7eb521", + "id": "nmdc:ecf498b9aa15f9d000845ffdfa7eb521", + "file_size_bytes": 746781339 + }, + { + "name": "Gp0127644_Protein FAA", + "description": "Protein FAA for Gp0127644", + "data_object_type": "Annotation Amino Acid FASTA", + "url": "https://data.microbiomedata.org/data/nmdc:mga0bpf635/annotation/nmdc_mga0bpf635_proteins.faa", + "md5_checksum": "9d960cad4d88795aba8bb1acbe415fc9", + "id": "nmdc:9d960cad4d88795aba8bb1acbe415fc9", + "file_size_bytes": 12848136 + }, + { + "name": "Gp0127644_Structural annotation GFF file", + "description": "Structural annotation GFF file for Gp0127644", + "data_object_type": "Structural Annotation GFF", + "url": "https://data.microbiomedata.org/data/nmdc:mga0bpf635/annotation/nmdc_mga0bpf635_structural_annotation.gff", + "md5_checksum": "cb5d98ee6e459ce1cc2d14295424eef1", + "id": "nmdc:cb5d98ee6e459ce1cc2d14295424eef1", + "file_size_bytes": 2488 + }, + { + "name": "Gp0127644_Functional annotation GFF file", + "description": "Functional annotation GFF file for Gp0127644", + "data_object_type": "Functional Annotation GFF", + "url": "https://data.microbiomedata.org/data/nmdc:mga0bpf635/annotation/nmdc_mga0bpf635_functional_annotation.gff", + "md5_checksum": "349838000a53b6655a5b12edf6351c50", + "id": "nmdc:349838000a53b6655a5b12edf6351c50", + "file_size_bytes": 15112193 + }, + { + "name": "Gp0127644_KO TSV file", + "description": "KO TSV file for Gp0127644", + "data_object_type": "Annotation KEGG Orthology", + "url": "https://data.microbiomedata.org/data/nmdc:mga0bpf635/annotation/nmdc_mga0bpf635_ko.tsv", + "md5_checksum": "7bb072409221978dbea8ff5cb0bdba1e", + "id": "nmdc:7bb072409221978dbea8ff5cb0bdba1e", + "file_size_bytes": 1814299 + }, + { + "name": "Gp0127644_EC TSV file", + "description": "EC TSV file for Gp0127644", + "data_object_type": "Annotation Enzyme Commission", + "url": "https://data.microbiomedata.org/data/nmdc:mga0bpf635/annotation/nmdc_mga0bpf635_ec.tsv", + "md5_checksum": "3d69ade973d1652bd6f061b2122ffe36", + "id": "nmdc:3d69ade973d1652bd6f061b2122ffe36", + "file_size_bytes": 1233948 + }, + { + "name": "Gp0127644_COG GFF file", + "description": "COG GFF file for Gp0127644", + "url": "https://data.microbiomedata.org/data/nmdc:mga0bpf635/annotation/nmdc_mga0bpf635_cog.gff", + "md5_checksum": "2a9b9a21fe5fb84219e0be5f153665be", + "id": "nmdc:2a9b9a21fe5fb84219e0be5f153665be", + "file_size_bytes": 9028987 + }, + { + "name": "Gp0127644_PFAM GFF file", + "description": "PFAM GFF file for Gp0127644", + "url": "https://data.microbiomedata.org/data/nmdc:mga0bpf635/annotation/nmdc_mga0bpf635_pfam.gff", + "md5_checksum": "83e64b9fc9406a72d18e8dd4742bac1a", + "id": "nmdc:83e64b9fc9406a72d18e8dd4742bac1a", + "file_size_bytes": 6574998 + }, + { + "name": "Gp0127644_TigrFam GFF file", + "description": "TigrFam GFF file for Gp0127644", + "url": "https://data.microbiomedata.org/data/nmdc:mga0bpf635/annotation/nmdc_mga0bpf635_tigrfam.gff", + "md5_checksum": "cdc4cc8629b7c61f1708f654aaaa9932", + "id": "nmdc:cdc4cc8629b7c61f1708f654aaaa9932", + "file_size_bytes": 783908 + }, + { + "name": "Gp0127644_SMART GFF file", + "description": "SMART GFF file for Gp0127644", + "url": "https://data.microbiomedata.org/data/nmdc:mga0bpf635/annotation/nmdc_mga0bpf635_smart.gff", + "md5_checksum": "f8d79375a2bf82f257e0015efeee6f26", + "id": "nmdc:f8d79375a2bf82f257e0015efeee6f26", + "file_size_bytes": 2030043 + }, + { + "name": "Gp0127644_SuperFam GFF file", + "description": "SuperFam GFF file for Gp0127644", + "url": "https://data.microbiomedata.org/data/nmdc:mga0bpf635/annotation/nmdc_mga0bpf635_supfam.gff", + "md5_checksum": "c9b4806132d19e740822b1a84bc4f07d", + "id": "nmdc:c9b4806132d19e740822b1a84bc4f07d", + "file_size_bytes": 11227652 + }, + { + "name": "Gp0127644_Cath FunFam GFF file", + "description": "Cath FunFam GFF file for Gp0127644", + "url": "https://data.microbiomedata.org/data/nmdc:mga0bpf635/annotation/nmdc_mga0bpf635_cath_funfam.gff", + "md5_checksum": "e304e10eb60423c23486e140594d1a7b", + "id": "nmdc:e304e10eb60423c23486e140594d1a7b", + "file_size_bytes": 8555821 + }, + { + "name": "Gp0127644_KO_EC GFF file", + "description": "KO_EC GFF file for Gp0127644", + "url": "https://data.microbiomedata.org/data/nmdc:mga0bpf635/annotation/nmdc_mga0bpf635_ko_ec.gff", + "md5_checksum": "9b78f0ac527ee7287ae532a896582948", + "id": "nmdc:9b78f0ac527ee7287ae532a896582948", + "file_size_bytes": 5791094 + }, + { + "name": "gold:Gp0452679_metabat2 bin checkm quality assessment result", + "description": "metabat2 bin checkm quality assessment result for gold:Gp0452679", + "data_object_type": "CheckM Statistics", + "url": "https://data.microbiomedata.org/data/nmdc:mga0fsjy84/MAGs/nmdc_mga0fsjy84_checkm_qa.out", + "md5_checksum": "d41d8cd98f00b204e9800998ecf8427e", + "id": "nmdc:d41d8cd98f00b204e9800998ecf8427e", + "file_size_bytes": 0 + }, + { + "name": "Gp0127644_tooShort (< 3kb) filtered contigs fasta file by metaBat2", + "description": "tooShort (< 3kb) filtered contigs fasta file by metaBat2 for Gp0127644", + "url": "https://data.microbiomedata.org/data/nmdc:mga0bpf635/MAGs/nmdc_mga0bpf635_bins.tooShort.fa", + "md5_checksum": "4857d71459f50147c8ae97ffce40caa5", + "id": "nmdc:4857d71459f50147c8ae97ffce40caa5", + "file_size_bytes": 18310651 + }, + { + "name": "Gp0127644_unbinned fasta file from metabat2", + "description": "unbinned fasta file from metabat2 for Gp0127644", + "url": "https://data.microbiomedata.org/data/nmdc:mga0bpf635/MAGs/nmdc_mga0bpf635_bins.unbinned.fa", + "md5_checksum": "65522bf77241109a74354d0e294597f9", + "id": "nmdc:65522bf77241109a74354d0e294597f9", + "file_size_bytes": 2858628 + }, + { + "name": "Gp0127644_metabat2 bin checkm quality assessment result", + "description": "metabat2 bin checkm quality assessment result for Gp0127644", + "data_object_type": "CheckM Statistics", + "url": "https://data.microbiomedata.org/data/nmdc:mga0bpf635/MAGs/nmdc_mga0bpf635_checkm_qa.out", + "md5_checksum": "30d6c9fb23abb0849991fad01e0393f1", + "id": "nmdc:30d6c9fb23abb0849991fad01e0393f1", + "file_size_bytes": 760 + }, + { + "name": "Gp0127644_high-quality and medium-quality bins", + "description": "high-quality and medium-quality bins for Gp0127644", + "data_object_type": "Metagenome Bins", + "url": "https://data.microbiomedata.org/data/nmdc:mga0bpf635/MAGs/nmdc_mga0bpf635_hqmq_bin.zip", + "md5_checksum": "a76c8c9034b877334a75e7c0b7c2c830", + "id": "nmdc:a76c8c9034b877334a75e7c0b7c2c830", + "file_size_bytes": 182 + }, + { + "name": "Gp0127644_metabat2 bins", + "description": "metabat2 bins for Gp0127644", + "url": "https://data.microbiomedata.org/data/nmdc:mga0bpf635/MAGs/nmdc_mga0bpf635_metabat_bin.zip", + "md5_checksum": "9d712c5924d6d0ee6d7305918e69302d", + "id": "nmdc:9d712c5924d6d0ee6d7305918e69302d", + "file_size_bytes": 218004 + }, + { + "_id": { + "$oid": "649b003d1ae706d7b5b14ea2" + }, + "description": "Metagenome Contig Coverage Stats for gold:Gp0127644", + "url": "https://data.microbiomedata.org/data/1781_100346/assembly/mapping_stats.txt", + "file_size_bytes": 3427545, + "type": "nmdc:DataObject", + "id": "nmdc:8a13cc4cdcd17eff35bdd65c4ffba887", + "name": "mapping_stats.txt", + "data_object_type": "Assembly Coverage Stats" + }, + { + "_id": { + "$oid": "649b003d1ae706d7b5b14ea7" + }, + "description": "Assembled contigs fasta for gold:Gp0127644", + "url": "https://data.microbiomedata.org/data/1781_100346/assembly/assembly_contigs.fna", + "file_size_bytes": 21697071, + "type": "nmdc:DataObject", + "id": "nmdc:f40e4315c5285ac27f850a924b9f0d19", + "name": "assembly_contigs.fna", + "data_object_type": "Assembly Contigs" + }, + { + "_id": { + "$oid": "649b003d1ae706d7b5b14ebe" + }, + "description": "Metagenome Alignment BAM file for gold:Gp0127644", + "url": "https://data.microbiomedata.org/data/1781_100346/assembly/pairedMapped_sorted.bam", + "file_size_bytes": 737012011, + "type": "nmdc:DataObject", + "id": "nmdc:0d039fa249c3d84d8f41ba5302cdbf44", + "name": "pairedMapped_sorted.bam", + "data_object_type": "Assembly Coverage BAM" + }, + { + "_id": { + "$oid": "649b003d1ae706d7b5b14ec1" + }, + "description": "Assembled AGP file for gold:Gp0127644", + "url": "https://data.microbiomedata.org/data/1781_100346/assembly/assembly.agp", + "file_size_bytes": 2981406, + "type": "nmdc:DataObject", + "id": "nmdc:0cd8988c1aa59aed46dc245a4fc85fae", + "name": "assembly.agp", + "data_object_type": "Assembly AGP" + }, + { + "_id": { + "$oid": "649b003d1ae706d7b5b14ec2" + }, + "description": "Assembled scaffold fasta for gold:Gp0127644", + "url": "https://data.microbiomedata.org/data/1781_100346/assembly/assembly_scaffolds.fna", + "file_size_bytes": 21558498, + "type": "nmdc:DataObject", + "id": "nmdc:72856cd0c04a3d82033e4eeb78036c79", + "name": "assembly_scaffolds.fna", + "data_object_type": "Assembly Scaffolds" + }, + { + "_id": { + "$oid": "649b003d1ae706d7b5b15ba0" + }, + "id": "nmdc:ad532c16f1d8772ef78f2b4977e13fbd", + "name": "1781_100346.krona.html", + "description": "Gold:Gp0127644 KRONA plot HTML file", + "url": "https://data.microbiomedata.org/1781_100346/ReadbasedAnalysis/centrifuge/1781_100346.krona.html", + "file_size_bytes": 3385, + "data_object_type": "Centrifuge Krona Plot", + "type": "nmdc:DataObject" + }, + { + "_id": { + "$oid": "649b003d1ae706d7b5b15ba4" + }, + "id": "nmdc:92cdb6d9a145d9ae65275474604499cc", + "name": "1781_100346.json", + "description": "Gold:Gp0127644 ReadbasedAnalysis result JSON file", + "url": "https://data.microbiomedata.org/1781_100346/ReadbasedAnalysis/1781_100346.json", + "file_size_bytes": 3385, + "type": "nmdc:DataObject" + }, + { + "_id": { + "$oid": "649b003f1ae706d7b5b1661e" + }, + "id": "nmdc:db639c1a9c06584736a3a8551fd080c4", + "name": "bins.unbinned.fa", + "description": "unbinned fasta file from metabat2 for gold:Gp0127644", + "file_size_bytes": 2937035, + "url": "https://data.microbiomedata.org/data/1781_100346/img_MAGs/bins.unbinned.fa", + "type": "nmdc:DataObject" + }, + { + "_id": { + "$oid": "649b003f1ae706d7b5b1661f" + }, + "id": "nmdc:fa322b3ff5e9a665ddc2a40878a19292", + "name": "bins.tooShort.fa", + "description": "tooShort (< 3kb) filtered contigs fasta file by metaBat2 for gold:Gp0127644", + "file_size_bytes": 17744229, + "url": "https://data.microbiomedata.org/data/1781_100346/img_MAGs/bins.tooShort.fa", + "type": "nmdc:DataObject" + }, + { + "_id": { + "$oid": "649b003f1ae706d7b5b16620" + }, + "id": "nmdc:5506404345e9af51ae1ef526737952eb", + "name": "gold:Gp0127644.bins.2.fa", + "description": "metabat2 binned contig file for gold:Gp0127644", + "file_size_bytes": 291225, + "url": "https://data.microbiomedata.org/data/1781_100346/img_MAGs/metabat-bins/bins.2.fa", + "type": "nmdc:DataObject", + "data_object_type": "Metagenome Bins" + }, + { + "_id": { + "$oid": "649b003f1ae706d7b5b16621" + }, + "id": "nmdc:0d447336e66ce46fe603146e03f77994", + "name": "checkm_qa.out", + "description": "metabat2 bin checkm quality assessment result for gold:Gp0127644", + "file_size_bytes": 918, + "url": "https://data.microbiomedata.org/data/1781_100346/img_MAGs/checkm_qa.out", + "type": "nmdc:DataObject", + "data_object_type": "CheckM Statistics" + }, + { + "_id": { + "$oid": "649b003f1ae706d7b5b16623" + }, + "id": "nmdc:e199232899faf46559a302f30bd9e0c8", + "name": "gold:Gp0127644.bins.1.fa", + "description": "metabat2 binned contig file for gold:Gp0127644", + "file_size_bytes": 320491, + "url": "https://data.microbiomedata.org/data/1781_100346/img_MAGs/metabat-bins/bins.1.fa", + "type": "nmdc:DataObject", + "data_object_type": "Metagenome Bins" + }, + { + "_id": { + "$oid": "649b00401ae706d7b5b16d1d" + }, + "description": "Functional annotation GFF file for gold:Gp0127644", + "url": "https://data.microbiomedata.org/1781_100346/img_annotation/Ga0482226_functional_annotation.gff", + "md5_checksum": "0626957517790befa95e8fefad58be0c", + "file_size_bytes": 3385, + "id": "nmdc:0626957517790befa95e8fefad58be0c", + "name": "gold:Gp0127644_Functional annotation GFF file", + "data_object_type": "Functional Annotation GFF", + "type": "nmdc:DataObject" + }, + { + "_id": { + "$oid": "649b00401ae706d7b5b16d1e" + }, + "description": "Protein FAA for gold:Gp0127644", + "url": "https://data.microbiomedata.org/1781_100346/img_annotation/Ga0482226_proteins.faa", + "md5_checksum": "2c7d55cbee1f35793da90275740d3651", + "file_size_bytes": 3385, + "id": "nmdc:2c7d55cbee1f35793da90275740d3651", + "name": "gold:Gp0127644_Protein FAA", + "data_object_type": "Annotation Amino Acid FASTA", + "type": "nmdc:DataObject" + }, + { + "_id": { + "$oid": "649b00401ae706d7b5b16d1f" + }, + "description": "Structural annotation GFF file for gold:Gp0127644", + "url": "https://data.microbiomedata.org/1781_100346/img_annotation/Ga0482226_structural_annotation.gff", + "md5_checksum": "0973e6d47848f6677ced2a8d463670fa", + "file_size_bytes": 3385, + "id": "nmdc:0973e6d47848f6677ced2a8d463670fa", + "name": "gold:Gp0127644_Structural annotation GFF file", + "data_object_type": "Structural Annotation GFF", + "type": "nmdc:DataObject" + }, + { + "_id": { + "$oid": "649b00401ae706d7b5b16d20" + }, + "description": "EC TSV File for gold:Gp0127644", + "url": "https://data.microbiomedata.org/1781_100346/img_annotation/Ga0482226_ec.tsv", + "md5_checksum": "03c32c8ae757623520f6211ff641c40a", + "file_size_bytes": 3385, + "id": "nmdc:03c32c8ae757623520f6211ff641c40a", + "name": "gold:Gp0127644_EC TSV File", + "data_object_type": "Annotation Enzyme Commission", + "type": "nmdc:DataObject" + }, + { + "_id": { + "$oid": "649b00401ae706d7b5b16d21" + }, + "description": "KO TSV File for gold:Gp0127644", + "url": "https://data.microbiomedata.org/1781_100346/img_annotation/Ga0482226_ko.tsv", + "md5_checksum": "3eced892c4712a2b13e805a978ec0819", + "file_size_bytes": 3385, + "id": "nmdc:3eced892c4712a2b13e805a978ec0819", + "name": "gold:Gp0127644_KO TSV File", + "data_object_type": "Annotation KEGG Orthology", + "type": "nmdc:DataObject" + } + ], + "mags_activity_set": [ + { + "_id": { + "$oid": "649b0052ec087f6bbab34714" + }, + "has_input": [ + "nmdc:16f77f4aaed29f3acc31646e1ce06b2d", + "nmdc:ecf498b9aa15f9d000845ffdfa7eb521", + "nmdc:349838000a53b6655a5b12edf6351c50" + ], + "too_short_contig_num": 44192, + "part_of": [ + "nmdc:mga0bpf635" + ], + "binned_contig_num": 157, + "git_url": "https://github.com/microbiomedata/metaG/releases/tag/0.1", + "has_output": [ + "nmdc:d41d8cd98f00b204e9800998ecf8427e", + "nmdc:4857d71459f50147c8ae97ffce40caa5", + "nmdc:65522bf77241109a74354d0e294597f9", + "nmdc:30d6c9fb23abb0849991fad01e0393f1", + "nmdc:a76c8c9034b877334a75e7c0b7c2c830", + "nmdc:9d712c5924d6d0ee6d7305918e69302d" + ], + "was_informed_by": "gold:Gp0127644", + "input_contig_num": 46135, + "id": "nmdc:9990ae6d30bab3988f258ae8224acd89", + "execution_resource": "NERSC-Cori", + "name": "MAGs Analysis Activity for nmdc:mga0bpf635", + "mags_list": [ + { + "number_of_contig": 157, + "completeness": 39.0, + "bin_name": "bins.1", + "gene_count": 891, + "bin_quality": "LQ", + "gtdbtk_species": "", + "gtdbtk_order": "", + "num_16s": 1, + "gtdbtk_family": "", + "gtdbtk_domain": "", + "contamination": 0.0, + "gtdbtk_class": "", + "gtdbtk_phylum": "", + "num_5s": 0, + "num_23s": 0, + "gtdbtk_genus": "", + "num_t_rna": 19 + } + ], + "unbinned_contig_num": 1786, + "started_at_time": "2021-10-11T02:26:47Z", + "type": "nmdc:MAGsAnalysisActivity", + "ended_at_time": "2021-10-11T02:55:00+00:00" + } + ], + "metagenome_annotation_activity_set": [ + { + "_id": { + "$oid": "649b005bbf2caae0415ef9b4" + }, + "has_input": [ + "nmdc:16f77f4aaed29f3acc31646e1ce06b2d" + ], + "part_of": [ + "nmdc:mga0bpf635" + ], + "git_url": "https://github.com/microbiomedata/metaG/releases/tag/0.1", + "has_output": [ + "nmdc:9d960cad4d88795aba8bb1acbe415fc9", + "nmdc:cb5d98ee6e459ce1cc2d14295424eef1", + "nmdc:349838000a53b6655a5b12edf6351c50", + "nmdc:7bb072409221978dbea8ff5cb0bdba1e", + "nmdc:3d69ade973d1652bd6f061b2122ffe36", + "nmdc:2a9b9a21fe5fb84219e0be5f153665be", + "nmdc:83e64b9fc9406a72d18e8dd4742bac1a", + "nmdc:cdc4cc8629b7c61f1708f654aaaa9932", + "nmdc:f8d79375a2bf82f257e0015efeee6f26", + "nmdc:c9b4806132d19e740822b1a84bc4f07d", + "nmdc:e304e10eb60423c23486e140594d1a7b", + "nmdc:9b78f0ac527ee7287ae532a896582948" + ], + "was_informed_by": "gold:Gp0127644", + "id": "nmdc:9990ae6d30bab3988f258ae8224acd89", + "execution_resource": "NERSC-Cori", + "name": "Annotation Activity for nmdc:mga0bpf635", + "started_at_time": "2021-10-11T02:26:47Z", + "type": "nmdc:MetagenomeAnnotationActivity", + "ended_at_time": "2021-10-11T02:55:00+00:00" + } + ], + "metagenome_assembly_set": [ + { + "_id": { + "$oid": "649b005f2ca5ee4adb139fa1" + }, + "has_input": [ + "nmdc:98da35678c59689ce738b2a6bc708692" + ], + "part_of": [ + "nmdc:mga0bpf635" + ], + "ctg_logsum": 37962, + "scaf_logsum": 38062, + "gap_pct": 0.00069, + "git_url": "https://github.com/microbiomedata/metaG/releases/tag/0.1", + "has_output": [ + "nmdc:16f77f4aaed29f3acc31646e1ce06b2d", + "nmdc:b6afa25cadc614083204383bbad06f48", + "nmdc:87b1ea13d41499eeb5eb67932db01423", + "nmdc:72a38c353753abcb6d046385bf2950f6", + "nmdc:ecf498b9aa15f9d000845ffdfa7eb521" + ], + "asm_score": 3.712, + "was_informed_by": "gold:Gp0127644", + "ctg_powsum": 4162.045, + "scaf_max": 11252, + "id": "nmdc:9990ae6d30bab3988f258ae8224acd89", + "scaf_powsum": 4172.955, + "execution_resource": "NERSC-Cori", + "contigs": 46135, + "name": "Assembly Activity for nmdc:mga0bpf635", + "ctg_max": 11252, + "gc_std": 0.09328, + "contig_bp": 20152503, + "gc_avg": 0.6086, + "started_at_time": "2021-10-11T02:26:47Z", + "scaf_bp": 20152643, + "type": "nmdc:MetagenomeAssembly", + "scaffolds": 46121, + "ended_at_time": "2021-10-11T02:55:00+00:00", + "ctg_l50": 394, + "ctg_l90": 285, + "ctg_n50": 14034, + "ctg_n90": 39639, + "scaf_l50": 395, + "scaf_l90": 285, + "scaf_n50": 13959, + "scaf_n90": 39626 + } + ], + "omics_processing_set": [ + { + "_id": { + "$oid": "649b009773e8249959349b57" + }, + "id": "nmdc:omprc-11-hwadfm25", + "name": "Riverbed sediment microbial communities from areas with vegetation nearby in Columbia River, Washington, USA - GW-RW S3_30_40", + "description": "Riverbed sediment samples were collected from an area with a lot of surrounding vegetation in a hyporheic zone (subsurface groundwater - surface water mixing zone)", + "has_input": [ + "nmdc:bsm-11-n80sx618" + ], + "has_output": [ + "jgi:574fe0a87ded5e3df1ee148e" + ], + "part_of": [ + "nmdc:sty-11-aygzgv51" + ], + "add_date": "2016-01-11", + "mod_date": "2021-06-15", + "ncbi_project_name": "Riverbed sediment microbial communities from areas with vegetation nearby in Columbia River, Washington, USA - GW-RW S3_30_40", + "omics_type": { + "has_raw_value": "Metagenome" + }, + "principal_investigator": { + "has_raw_value": "James Stegen" + }, + "processing_institution": "JGI", + "type": "nmdc:OmicsProcessing", + "gold_sequencing_project_identifiers": [ + "gold:Gp0127644" + ] + } + ], + "read_qc_analysis_activity_set": [ + { + "_id": { + "$oid": "649b009d6bdd4fd20273c86b" + }, + "has_input": [ + "nmdc:a1d8fff4b02719c4d0f9c442cf052f69" + ], + "part_of": [ + "nmdc:mga0bpf635" + ], + "git_url": "https://github.com/microbiomedata/metaG/releases/tag/0.1", + "has_output": [ + "nmdc:98da35678c59689ce738b2a6bc708692", + "nmdc:ff08ea52254e0cc1011c56656505b27b" + ], + "was_informed_by": "gold:Gp0127644", + "input_read_count": 11431762, + "output_read_bases": 1245433047, + "id": "nmdc:9990ae6d30bab3988f258ae8224acd89", + "execution_resource": "NERSC-Cori", + "input_read_bases": 1726196062, + "name": "Read QC Activity for nmdc:mga0bpf635", + "output_read_count": 8322164, + "started_at_time": "2021-10-11T02:26:47Z", + "type": "nmdc:ReadQCAnalysisActivity", + "ended_at_time": "2021-10-11T02:55:00+00:00" + } + ], + "read_based_taxonomy_analysis_activity_set": [ + { + "_id": { + "$oid": "649b009bff710ae353f8cf38" + }, + "has_input": [ + "nmdc:98da35678c59689ce738b2a6bc708692" + ], + "git_url": "https://github.com/microbiomedata/metaG/releases/tag/0.1", + "has_output": [ + "nmdc:dc2e21becda8d6b010a95897cf97ae90", + "nmdc:0dd334c92557f3a8ac8c78b437c75eaf", + "nmdc:425873a08e598b0ca2987ff7b9b5da1f", + "nmdc:b0f2449065b52935ddba8abd6ae6bc88", + "nmdc:9baa708296f62334e099cf61711b5e16", + "nmdc:f2a43278b06876cae5d4e8cdef17cfe1", + "nmdc:f1a811dbc523f9a27dbc004b8a66f0cb", + "nmdc:8983fa1acb03f2905bbec3a6ccee2854", + "nmdc:a07c6c5fb68d1a56e39d93e8745b96cb" + ], + "was_informed_by": "gold:Gp0127644", + "id": "nmdc:9990ae6d30bab3988f258ae8224acd89", + "execution_resource": "NERSC-Cori", + "name": "ReadBased Analysis Activity for nmdc:mga0bpf635", + "started_at_time": "2021-10-11T02:26:47Z", + "type": "nmdc:ReadbasedAnalysis", + "ended_at_time": "2021-10-11T02:55:00+00:00" + } + ] + }, + { + "data_object_set": [ + { + "description": "Raw sequencer read data", + "file_size_bytes": 1941323184, + "type": "nmdc:DataObject", + "id": "jgi:574fde667ded5e3df1ee1407", + "name": "10533.1.165310.TGTGCGT-AACGCAC.fastq.gz" + }, + { + "name": "Gp0127639_Filtered Reads", + "description": "Filtered Reads for Gp0127639", + "data_object_type": "Filtered Sequencing Reads", + "url": "https://data.microbiomedata.org/data/nmdc:mga09wpw60/qa/nmdc_mga09wpw60_filtered.fastq.gz", + "md5_checksum": "833077b40372c6daa20beaed04ed0ae1", + "id": "nmdc:833077b40372c6daa20beaed04ed0ae1", + "file_size_bytes": 1585232805 + }, + { + "name": "Gp0127639_Filtered Stats", + "description": "Filtered Stats for Gp0127639", + "data_object_type": "QC Statistics", + "url": "https://data.microbiomedata.org/data/nmdc:mga09wpw60/qa/nmdc_mga09wpw60_filterStats.txt", + "md5_checksum": "b68178eebde030fad0850797adbb2624", + "id": "nmdc:b68178eebde030fad0850797adbb2624", + "file_size_bytes": 289 + }, + { + "name": "Gp0127639_Gottcha2 TSV report", + "description": "Gottcha2 TSV report for Gp0127639", + "url": "https://data.microbiomedata.org/data/nmdc:mga09wpw60/ReadbasedAnalysis/nmdc_mga09wpw60_gottcha2_report.tsv", + "md5_checksum": "514172bb91ef3b125ae2d001b47bff0b", + "id": "nmdc:514172bb91ef3b125ae2d001b47bff0b", + "file_size_bytes": 648 + }, + { + "name": "Gp0127639_Gottcha2 full TSV report", + "description": "Gottcha2 full TSV report for Gp0127639", + "url": "https://data.microbiomedata.org/data/nmdc:mga09wpw60/ReadbasedAnalysis/nmdc_mga09wpw60_gottcha2_report_full.tsv", + "md5_checksum": "82f072d1931154fbc722531d3d0dc41c", + "id": "nmdc:82f072d1931154fbc722531d3d0dc41c", + "file_size_bytes": 588644 + }, + { + "name": "Gp0127639_Gottcha2 Krona HTML report", + "description": "Gottcha2 Krona HTML report for Gp0127639", + "data_object_type": "GOTTCHA2 Krona Plot", + "url": "https://data.microbiomedata.org/data/nmdc:mga09wpw60/ReadbasedAnalysis/nmdc_mga09wpw60_gottcha2_krona.html", + "md5_checksum": "62a817ebcbfaf2c8feb1abedc35a736f", + "id": "nmdc:62a817ebcbfaf2c8feb1abedc35a736f", + "file_size_bytes": 228175 + }, + { + "name": "Gp0127639_Centrifuge classification TSV report", + "description": "Centrifuge classification TSV report for Gp0127639", + "data_object_type": "Centrifuge Taxonomic Classification", + "url": "https://data.microbiomedata.org/data/nmdc:mga09wpw60/ReadbasedAnalysis/nmdc_mga09wpw60_centrifuge_classification.tsv", + "md5_checksum": "81281fef2c0778516a84b3a672cc0230", + "id": "nmdc:81281fef2c0778516a84b3a672cc0230", + "file_size_bytes": 1468498728 + }, + { + "name": "Gp0127639_Centrifuge TSV report", + "description": "Centrifuge TSV report for Gp0127639", + "data_object_type": "Centrifuge Classification Report", + "url": "https://data.microbiomedata.org/data/nmdc:mga09wpw60/ReadbasedAnalysis/nmdc_mga09wpw60_centrifuge_report.tsv", + "md5_checksum": "86ae054ba9def1126579c8f76db8a07a", + "id": "nmdc:86ae054ba9def1126579c8f76db8a07a", + "file_size_bytes": 251338 + }, + { + "name": "Gp0127639_Centrifuge Krona HTML report", + "description": "Centrifuge Krona HTML report for Gp0127639", + "data_object_type": "Centrifuge Krona Plot", + "url": "https://data.microbiomedata.org/data/nmdc:mga09wpw60/ReadbasedAnalysis/nmdc_mga09wpw60_centrifuge_krona.html", + "md5_checksum": "9db20a88fa3d02eb00f64d1671ef8521", + "id": "nmdc:9db20a88fa3d02eb00f64d1671ef8521", + "file_size_bytes": 2322720 + }, + { + "name": "Gp0127639_Kraken classification TSV report", + "description": "Kraken classification TSV report for Gp0127639", + "data_object_type": "Kraken2 Taxonomic Classification", + "url": "https://data.microbiomedata.org/data/nmdc:mga09wpw60/ReadbasedAnalysis/nmdc_mga09wpw60_kraken2_classification.tsv", + "md5_checksum": "848fc10ed4365047cb139a4b40303808", + "id": "nmdc:848fc10ed4365047cb139a4b40303808", + "file_size_bytes": 1168015909 + }, + { + "name": "Gp0127639_Kraken2 TSV report", + "description": "Kraken2 TSV report for Gp0127639", + "data_object_type": "Kraken2 Classification Report", + "url": "https://data.microbiomedata.org/data/nmdc:mga09wpw60/ReadbasedAnalysis/nmdc_mga09wpw60_kraken2_report.tsv", + "md5_checksum": "94e422e0bae86c608fba1c3815e08e92", + "id": "nmdc:94e422e0bae86c608fba1c3815e08e92", + "file_size_bytes": 616202 + }, + { + "name": "Gp0127639_Kraken2 Krona HTML report", + "description": "Kraken2 Krona HTML report for Gp0127639", + "data_object_type": "Kraken2 Krona Plot", + "url": "https://data.microbiomedata.org/data/nmdc:mga09wpw60/ReadbasedAnalysis/nmdc_mga09wpw60_kraken2_krona.html", + "md5_checksum": "c6eb85143a2489921c53f8184d536129", + "id": "nmdc:c6eb85143a2489921c53f8184d536129", + "file_size_bytes": 3863456 + }, + { + "name": "Gp0127639_Assembled contigs fasta", + "description": "Assembled contigs fasta for Gp0127639", + "data_object_type": "Assembly Contigs", + "url": "https://data.microbiomedata.org/data/nmdc:mga09wpw60/assembly/nmdc_mga09wpw60_contigs.fna", + "md5_checksum": "2b73310c6eef1ece5bb01f235b22fdbd", + "id": "nmdc:2b73310c6eef1ece5bb01f235b22fdbd", + "file_size_bytes": 120497476 + }, + { + "name": "Gp0127639_Assembled scaffold fasta", + "description": "Assembled scaffold fasta for Gp0127639", + "data_object_type": "Assembly Scaffolds", + "url": "https://data.microbiomedata.org/data/nmdc:mga09wpw60/assembly/nmdc_mga09wpw60_scaffolds.fna", + "md5_checksum": "8f14c016997dd96f70f547df930717be", + "id": "nmdc:8f14c016997dd96f70f547df930717be", + "file_size_bytes": 119857107 + }, + { + "name": "Gp0127639_Metagenome Contig Coverage Stats", + "description": "Metagenome Contig Coverage Stats for Gp0127639", + "url": "https://data.microbiomedata.org/data/nmdc:mga09wpw60/assembly/nmdc_mga09wpw60_covstats.txt", + "md5_checksum": "5966e5e32744a14549b19b4c92a606a5", + "id": "nmdc:5966e5e32744a14549b19b4c92a606a5", + "file_size_bytes": 16872665 + }, + { + "name": "Gp0127639_Assembled AGP file", + "description": "Assembled AGP file for Gp0127639", + "data_object_type": "Assembly AGP", + "url": "https://data.microbiomedata.org/data/nmdc:mga09wpw60/assembly/nmdc_mga09wpw60_assembly.agp", + "md5_checksum": "1fcd489b3ae86a76bf297cc19b50392d", + "id": "nmdc:1fcd489b3ae86a76bf297cc19b50392d", + "file_size_bytes": 15768901 + }, + { + "name": "Gp0127639_Metagenome Alignment BAM file", + "description": "Metagenome Alignment BAM file for Gp0127639", + "data_object_type": "Assembly Coverage BAM", + "url": "https://data.microbiomedata.org/data/nmdc:mga09wpw60/assembly/nmdc_mga09wpw60_pairedMapped_sorted.bam", + "md5_checksum": "5b90d13539ce840980db101fa7c1df96", + "id": "nmdc:5b90d13539ce840980db101fa7c1df96", + "file_size_bytes": 1779135536 + }, + { + "name": "Gp0127639_Protein FAA", + "description": "Protein FAA for Gp0127639", + "data_object_type": "Annotation Amino Acid FASTA", + "url": "https://data.microbiomedata.org/data/nmdc:mga09wpw60/annotation/nmdc_mga09wpw60_proteins.faa", + "md5_checksum": "6c09d55cfb8872b30eb1832394f80beb", + "id": "nmdc:6c09d55cfb8872b30eb1832394f80beb", + "file_size_bytes": 67573912 + }, + { + "name": "Gp0127639_Structural annotation GFF file", + "description": "Structural annotation GFF file for Gp0127639", + "data_object_type": "Structural Annotation GFF", + "url": "https://data.microbiomedata.org/data/nmdc:mga09wpw60/annotation/nmdc_mga09wpw60_structural_annotation.gff", + "md5_checksum": "2e3cc72d21590667259f6356882ce63b", + "id": "nmdc:2e3cc72d21590667259f6356882ce63b", + "file_size_bytes": 2526 + }, + { + "name": "Gp0127639_Functional annotation GFF file", + "description": "Functional annotation GFF file for Gp0127639", + "data_object_type": "Functional Annotation GFF", + "url": "https://data.microbiomedata.org/data/nmdc:mga09wpw60/annotation/nmdc_mga09wpw60_functional_annotation.gff", + "md5_checksum": "2dee5eaa50c8eeb6e3bc8471501d9964", + "id": "nmdc:2dee5eaa50c8eeb6e3bc8471501d9964", + "file_size_bytes": 75196016 + }, + { + "name": "Gp0127639_KO TSV file", + "description": "KO TSV file for Gp0127639", + "data_object_type": "Annotation KEGG Orthology", + "url": "https://data.microbiomedata.org/data/nmdc:mga09wpw60/annotation/nmdc_mga09wpw60_ko.tsv", + "md5_checksum": "7ec4cfdd88352d703a2bb64b99bd56c5", + "id": "nmdc:7ec4cfdd88352d703a2bb64b99bd56c5", + "file_size_bytes": 8707597 + }, + { + "name": "Gp0127639_EC TSV file", + "description": "EC TSV file for Gp0127639", + "data_object_type": "Annotation Enzyme Commission", + "url": "https://data.microbiomedata.org/data/nmdc:mga09wpw60/annotation/nmdc_mga09wpw60_ec.tsv", + "md5_checksum": "16bedd944e5e836924c28b006026c348", + "id": "nmdc:16bedd944e5e836924c28b006026c348", + "file_size_bytes": 5769544 + }, + { + "name": "Gp0127639_COG GFF file", + "description": "COG GFF file for Gp0127639", + "url": "https://data.microbiomedata.org/data/nmdc:mga09wpw60/annotation/nmdc_mga09wpw60_cog.gff", + "md5_checksum": "8764070f565c50998968e0739420f5cc", + "id": "nmdc:8764070f565c50998968e0739420f5cc", + "file_size_bytes": 45648468 + }, + { + "name": "Gp0127639_PFAM GFF file", + "description": "PFAM GFF file for Gp0127639", + "url": "https://data.microbiomedata.org/data/nmdc:mga09wpw60/annotation/nmdc_mga09wpw60_pfam.gff", + "md5_checksum": "9e6accc90d61ea572819dcdb591e41a7", + "id": "nmdc:9e6accc90d61ea572819dcdb591e41a7", + "file_size_bytes": 34995151 + }, + { + "name": "Gp0127639_TigrFam GFF file", + "description": "TigrFam GFF file for Gp0127639", + "url": "https://data.microbiomedata.org/data/nmdc:mga09wpw60/annotation/nmdc_mga09wpw60_tigrfam.gff", + "md5_checksum": "32b9518ee41cadb157f3c0f9ec91476c", + "id": "nmdc:32b9518ee41cadb157f3c0f9ec91476c", + "file_size_bytes": 4060116 + }, + { + "name": "Gp0127639_SMART GFF file", + "description": "SMART GFF file for Gp0127639", + "url": "https://data.microbiomedata.org/data/nmdc:mga09wpw60/annotation/nmdc_mga09wpw60_smart.gff", + "md5_checksum": "432d591bd525ae429e837431d44954f7", + "id": "nmdc:432d591bd525ae429e837431d44954f7", + "file_size_bytes": 10056742 + }, + { + "name": "Gp0127639_SuperFam GFF file", + "description": "SuperFam GFF file for Gp0127639", + "url": "https://data.microbiomedata.org/data/nmdc:mga09wpw60/annotation/nmdc_mga09wpw60_supfam.gff", + "md5_checksum": "3120d5d5d27d142f898f70a8cc1b076e", + "id": "nmdc:3120d5d5d27d142f898f70a8cc1b076e", + "file_size_bytes": 56435804 + }, + { + "name": "Gp0127639_Cath FunFam GFF file", + "description": "Cath FunFam GFF file for Gp0127639", + "url": "https://data.microbiomedata.org/data/nmdc:mga09wpw60/annotation/nmdc_mga09wpw60_cath_funfam.gff", + "md5_checksum": "d37ff61fdae942030a1b07e855cf1abd", + "id": "nmdc:d37ff61fdae942030a1b07e855cf1abd", + "file_size_bytes": 43456195 + }, + { + "name": "Gp0127639_KO_EC GFF file", + "description": "KO_EC GFF file for Gp0127639", + "url": "https://data.microbiomedata.org/data/nmdc:mga09wpw60/annotation/nmdc_mga09wpw60_ko_ec.gff", + "md5_checksum": "56995366ba4186639a8ff4fd4defbd5e", + "id": "nmdc:56995366ba4186639a8ff4fd4defbd5e", + "file_size_bytes": 27657123 + }, + { + "name": "gold:Gp0452679_metabat2 bin checkm quality assessment result", + "description": "metabat2 bin checkm quality assessment result for gold:Gp0452679", + "data_object_type": "CheckM Statistics", + "url": "https://data.microbiomedata.org/data/nmdc:mga0fsjy84/MAGs/nmdc_mga0fsjy84_checkm_qa.out", + "md5_checksum": "d41d8cd98f00b204e9800998ecf8427e", + "id": "nmdc:d41d8cd98f00b204e9800998ecf8427e", + "file_size_bytes": 0 + }, + { + "name": "Gp0127639_tooShort (< 3kb) filtered contigs fasta file by metaBat2", + "description": "tooShort (< 3kb) filtered contigs fasta file by metaBat2 for Gp0127639", + "url": "https://data.microbiomedata.org/data/nmdc:mga09wpw60/MAGs/nmdc_mga09wpw60_bins.tooShort.fa", + "md5_checksum": "820dbad1b0ddd3c728e77aceee09ea28", + "id": "nmdc:820dbad1b0ddd3c728e77aceee09ea28", + "file_size_bytes": 90173016 + }, + { + "name": "Gp0127639_unbinned fasta file from metabat2", + "description": "unbinned fasta file from metabat2 for Gp0127639", + "url": "https://data.microbiomedata.org/data/nmdc:mga09wpw60/MAGs/nmdc_mga09wpw60_bins.unbinned.fa", + "md5_checksum": "24fbfc69ded61dffff95ba2f8475239c", + "id": "nmdc:24fbfc69ded61dffff95ba2f8475239c", + "file_size_bytes": 27021291 + }, + { + "name": "Gp0127639_metabat2 bin checkm quality assessment result", + "description": "metabat2 bin checkm quality assessment result for Gp0127639", + "data_object_type": "CheckM Statistics", + "url": "https://data.microbiomedata.org/data/nmdc:mga09wpw60/MAGs/nmdc_mga09wpw60_checkm_qa.out", + "md5_checksum": "1837710887027f94b0f25208edb35cbe", + "id": "nmdc:1837710887027f94b0f25208edb35cbe", + "file_size_bytes": 1570 + }, + { + "name": "Gp0127639_high-quality and medium-quality bins", + "description": "high-quality and medium-quality bins for Gp0127639", + "data_object_type": "Metagenome Bins", + "url": "https://data.microbiomedata.org/data/nmdc:mga09wpw60/MAGs/nmdc_mga09wpw60_hqmq_bin.zip", + "md5_checksum": "7072cfd6665082a95b2c09a4bc88760c", + "id": "nmdc:7072cfd6665082a95b2c09a4bc88760c", + "file_size_bytes": 182 + }, + { + "name": "Gp0127639_metabat2 bins", + "description": "metabat2 bins for Gp0127639", + "url": "https://data.microbiomedata.org/data/nmdc:mga09wpw60/MAGs/nmdc_mga09wpw60_metabat_bin.zip", + "md5_checksum": "b0db190d9d1093ef87a5efb8a600e9ef", + "id": "nmdc:b0db190d9d1093ef87a5efb8a600e9ef", + "file_size_bytes": 1000457 + }, + { + "_id": { + "$oid": "649b003d1ae706d7b5b14e8b" + }, + "description": "Assembled scaffold fasta for gold:Gp0127639", + "url": "https://data.microbiomedata.org/data/1781_100341/assembly/assembly_scaffolds.fna", + "file_size_bytes": 119007591, + "type": "nmdc:DataObject", + "id": "nmdc:3200c62a99e8ddd0fd6403d6dfe5fc5d", + "name": "assembly_scaffolds.fna", + "data_object_type": "Assembly Scaffolds" + }, + { + "_id": { + "$oid": "649b003d1ae706d7b5b14e8c" + }, + "description": "Metagenome Contig Coverage Stats for gold:Gp0127639", + "url": "https://data.microbiomedata.org/data/1781_100341/assembly/mapping_stats.txt", + "file_size_bytes": 16022429, + "type": "nmdc:DataObject", + "id": "nmdc:b2bff56e405eaffed2b0a3d7d6000b37", + "name": "mapping_stats.txt", + "data_object_type": "Assembly Coverage Stats" + }, + { + "_id": { + "$oid": "649b003d1ae706d7b5b14e8f" + }, + "description": "Assembled contigs fasta for gold:Gp0127639", + "url": "https://data.microbiomedata.org/data/1781_100341/assembly/assembly_contigs.fna", + "file_size_bytes": 119647240, + "type": "nmdc:DataObject", + "id": "nmdc:1d1610f39b4543fe7a0ecde2b1d8d710", + "name": "assembly_contigs.fna", + "data_object_type": "Assembly Contigs" + }, + { + "_id": { + "$oid": "649b003d1ae706d7b5b14e90" + }, + "description": "Assembled AGP file for gold:Gp0127639", + "url": "https://data.microbiomedata.org/data/1781_100341/assembly/assembly.agp", + "file_size_bytes": 14066973, + "type": "nmdc:DataObject", + "id": "nmdc:71da65a514fef7d1e2b3cf2a8dbcba74", + "name": "assembly.agp", + "data_object_type": "Assembly AGP" + }, + { + "_id": { + "$oid": "649b003d1ae706d7b5b14e91" + }, + "description": "Metagenome Alignment BAM file for gold:Gp0127639", + "url": "https://data.microbiomedata.org/data/1781_100341/assembly/pairedMapped_sorted.bam", + "file_size_bytes": 1755614129, + "type": "nmdc:DataObject", + "id": "nmdc:164f413fa91ee1433e3f441649315c61", + "name": "pairedMapped_sorted.bam", + "data_object_type": "Assembly Coverage BAM" + }, + { + "_id": { + "$oid": "649b003d1ae706d7b5b15b74" + }, + "id": "nmdc:1bf82e8b1c00260947b645449b0bedcb", + "name": "1781_100341.krona.html", + "description": "Gold:Gp0127639 KRONA plot HTML file", + "url": "https://data.microbiomedata.org/1781_100341/ReadbasedAnalysis/centrifuge/1781_100341.krona.html", + "file_size_bytes": 3385, + "data_object_type": "Centrifuge Krona Plot", + "type": "nmdc:DataObject" + }, + { + "_id": { + "$oid": "649b003d1ae706d7b5b15b7a" + }, + "id": "nmdc:813f4c0b656c2812a7db73fc0df92f23", + "name": "1781_100341.json", + "description": "Gold:Gp0127639 ReadbasedAnalysis result JSON file", + "url": "https://data.microbiomedata.org/1781_100341/ReadbasedAnalysis/1781_100341.json", + "file_size_bytes": 3385, + "type": "nmdc:DataObject" + }, + { + "_id": { + "$oid": "649b003f1ae706d7b5b16608" + }, + "id": "nmdc:bd6adf1661bff8cdbad6416f39136291", + "name": "checkm_qa.out", + "description": "metabat2 bin checkm quality assessment result for gold:Gp0127639", + "file_size_bytes": 1092, + "url": "https://data.microbiomedata.org/data/1781_100341/img_MAGs/checkm_qa.out", + "type": "nmdc:DataObject", + "data_object_type": "CheckM Statistics" + }, + { + "_id": { + "$oid": "649b003f1ae706d7b5b1660a" + }, + "id": "nmdc:03f2b08b396c5b273845ff6fb1a7a2d2", + "name": "bins.tooShort.fa", + "description": "tooShort (< 3kb) filtered contigs fasta file by metaBat2 for gold:Gp0127639", + "file_size_bytes": 87721724, + "url": "https://data.microbiomedata.org/data/1781_100341/img_MAGs/bins.tooShort.fa", + "type": "nmdc:DataObject" + }, + { + "_id": { + "$oid": "649b003f1ae706d7b5b1660b" + }, + "id": "nmdc:a57183db98a4cd0611a8587010c37d52", + "name": "bins.unbinned.fa", + "description": "unbinned fasta file from metabat2 for gold:Gp0127639", + "file_size_bytes": 29133879, + "url": "https://data.microbiomedata.org/data/1781_100341/img_MAGs/bins.unbinned.fa", + "type": "nmdc:DataObject" + }, + { + "_id": { + "$oid": "649b003f1ae706d7b5b16612" + }, + "id": "nmdc:168e30f70b1513401a606fa75bdabf50", + "name": "gold:Gp0127639.bins.2.fa", + "description": "metabat2 binned contig file for gold:Gp0127639", + "file_size_bytes": 235784, + "url": "https://data.microbiomedata.org/data/1781_100341/img_MAGs/metabat-bins/bins.2.fa", + "type": "nmdc:DataObject", + "data_object_type": "Metagenome Bins" + }, + { + "_id": { + "$oid": "649b003f1ae706d7b5b16627" + }, + "id": "nmdc:390577e22b09a5c74de14b9e3a9a6b19", + "name": "gold:Gp0127639.bins.1.fa", + "description": "metabat2 binned contig file for gold:Gp0127639", + "file_size_bytes": 472014, + "url": "https://data.microbiomedata.org/data/1781_100341/img_MAGs/metabat-bins/bins.1.fa", + "type": "nmdc:DataObject", + "data_object_type": "Metagenome Bins" + }, + { + "_id": { + "$oid": "649b003f1ae706d7b5b1662c" + }, + "id": "nmdc:2cc7b6f61240516e9bf15f84809635c8", + "name": "gold:Gp0127639.bins.3.fa", + "description": "metabat2 binned contig file for gold:Gp0127639", + "file_size_bytes": 282214, + "url": "https://data.microbiomedata.org/data/1781_100341/img_MAGs/metabat-bins/bins.3.fa", + "type": "nmdc:DataObject", + "data_object_type": "Metagenome Bins" + }, + { + "_id": { + "$oid": "649b00401ae706d7b5b16d0d" + }, + "description": "EC TSV File for gold:Gp0127639", + "url": "https://data.microbiomedata.org/1781_100341/img_annotation/Ga0482231_ec.tsv", + "md5_checksum": "7e710e983d3a5ffbddc618c5e252e06b", + "file_size_bytes": 3385, + "id": "nmdc:7e710e983d3a5ffbddc618c5e252e06b", + "name": "gold:Gp0127639_EC TSV File", + "data_object_type": "Annotation Enzyme Commission", + "type": "nmdc:DataObject" + }, + { + "_id": { + "$oid": "649b00401ae706d7b5b16d0e" + }, + "description": "KO TSV File for gold:Gp0127639", + "url": "https://data.microbiomedata.org/1781_100341/img_annotation/Ga0482231_ko.tsv", + "md5_checksum": "ccbc768cb20e4c1b25d7627b611eb8dc", + "file_size_bytes": 3385, + "id": "nmdc:ccbc768cb20e4c1b25d7627b611eb8dc", + "name": "gold:Gp0127639_KO TSV File", + "data_object_type": "Annotation KEGG Orthology", + "type": "nmdc:DataObject" + }, + { + "_id": { + "$oid": "649b00401ae706d7b5b16d10" + }, + "description": "Protein FAA for gold:Gp0127639", + "url": "https://data.microbiomedata.org/1781_100341/img_annotation/Ga0482231_proteins.faa", + "md5_checksum": "fccc8283a46f12babeed0b2c7cc4eebd", + "file_size_bytes": 3385, + "id": "nmdc:fccc8283a46f12babeed0b2c7cc4eebd", + "name": "gold:Gp0127639_Protein FAA", + "data_object_type": "Annotation Amino Acid FASTA", + "type": "nmdc:DataObject" + }, + { + "_id": { + "$oid": "649b00401ae706d7b5b16d12" + }, + "description": "Functional annotation GFF file for gold:Gp0127639", + "url": "https://data.microbiomedata.org/1781_100341/img_annotation/Ga0482231_functional_annotation.gff", + "md5_checksum": "ee416a49155f7c07bcb776962708fb04", + "file_size_bytes": 3385, + "id": "nmdc:ee416a49155f7c07bcb776962708fb04", + "name": "gold:Gp0127639_Functional annotation GFF file", + "data_object_type": "Functional Annotation GFF", + "type": "nmdc:DataObject" + }, + { + "_id": { + "$oid": "649b00401ae706d7b5b16d13" + }, + "description": "Structural annotation GFF file for gold:Gp0127639", + "url": "https://data.microbiomedata.org/1781_100341/img_annotation/Ga0482231_structural_annotation.gff", + "md5_checksum": "d0452fefd4ad4f4cd10c974294bf9058", + "file_size_bytes": 3385, + "id": "nmdc:d0452fefd4ad4f4cd10c974294bf9058", + "name": "gold:Gp0127639_Structural annotation GFF file", + "data_object_type": "Structural Annotation GFF", + "type": "nmdc:DataObject" + } + ], + "mags_activity_set": [ + { + "_id": { + "$oid": "649b0052ec087f6bbab3471b" + }, + "has_input": [ + "nmdc:2b73310c6eef1ece5bb01f235b22fdbd", + "nmdc:5b90d13539ce840980db101fa7c1df96", + "nmdc:2dee5eaa50c8eeb6e3bc8471501d9964" + ], + "too_short_contig_num": 194918, + "part_of": [ + "nmdc:mga09wpw60" + ], + "binned_contig_num": 732, + "git_url": "https://github.com/microbiomedata/metaG/releases/tag/0.1", + "has_output": [ + "nmdc:d41d8cd98f00b204e9800998ecf8427e", + "nmdc:820dbad1b0ddd3c728e77aceee09ea28", + "nmdc:24fbfc69ded61dffff95ba2f8475239c", + "nmdc:1837710887027f94b0f25208edb35cbe", + "nmdc:7072cfd6665082a95b2c09a4bc88760c", + "nmdc:b0db190d9d1093ef87a5efb8a600e9ef" + ], + "was_informed_by": "gold:Gp0127639", + "input_contig_num": 212559, + "id": "nmdc:b8fea91a695311b43660f2e7044ad15c", + "execution_resource": "NERSC-Cori", + "name": "MAGs Analysis Activity for nmdc:mga09wpw60", + "mags_list": [ + { + "number_of_contig": 85, + "completeness": 18.1, + "bin_name": "bins.1", + "gene_count": 437, + "bin_quality": "LQ", + "gtdbtk_species": "", + "gtdbtk_order": "", + "num_16s": 0, + "gtdbtk_family": "", + "gtdbtk_domain": "", + "contamination": 0.0, + "gtdbtk_class": "", + "gtdbtk_phylum": "", + "num_5s": 0, + "num_23s": 0, + "gtdbtk_genus": "", + "num_t_rna": 8 + }, + { + "number_of_contig": 59, + "completeness": 15.92, + "bin_name": "bins.2", + "gene_count": 343, + "bin_quality": "LQ", + "gtdbtk_species": "", + "gtdbtk_order": "", + "num_16s": 0, + "gtdbtk_family": "", + "gtdbtk_domain": "", + "contamination": 0.84, + "gtdbtk_class": "", + "gtdbtk_phylum": "", + "num_5s": 0, + "num_23s": 0, + "gtdbtk_genus": "", + "num_t_rna": 8 + }, + { + "number_of_contig": 258, + "completeness": 21.26, + "bin_name": "bins.3", + "gene_count": 1440, + "bin_quality": "LQ", + "gtdbtk_species": "", + "gtdbtk_order": "", + "num_16s": 0, + "gtdbtk_family": "", + "gtdbtk_domain": "", + "contamination": 0.0, + "gtdbtk_class": "", + "gtdbtk_phylum": "", + "num_5s": 0, + "num_23s": 0, + "gtdbtk_genus": "", + "num_t_rna": 19 + }, + { + "number_of_contig": 101, + "completeness": 29.13, + "bin_name": "bins.4", + "gene_count": 560, + "bin_quality": "LQ", + "gtdbtk_species": "", + "gtdbtk_order": "", + "num_16s": 0, + "gtdbtk_family": "", + "gtdbtk_domain": "", + "contamination": 0.97, + "gtdbtk_class": "", + "gtdbtk_phylum": "", + "num_5s": 0, + "num_23s": 0, + "gtdbtk_genus": "", + "num_t_rna": 6 + }, + { + "number_of_contig": 116, + "completeness": 1.53, + "bin_name": "bins.5", + "gene_count": 763, + "bin_quality": "LQ", + "gtdbtk_species": "", + "gtdbtk_order": "", + "num_16s": 0, + "gtdbtk_family": "", + "gtdbtk_domain": "", + "contamination": 0.0, + "gtdbtk_class": "", + "gtdbtk_phylum": "", + "num_5s": 0, + "num_23s": 0, + "gtdbtk_genus": "", + "num_t_rna": 10 + }, + { + "number_of_contig": 113, + "completeness": 9.72, + "bin_name": "bins.6", + "gene_count": 531, + "bin_quality": "LQ", + "gtdbtk_species": "", + "gtdbtk_order": "", + "num_16s": 0, + "gtdbtk_family": "", + "gtdbtk_domain": "", + "contamination": 0.0, + "gtdbtk_class": "", + "gtdbtk_phylum": "", + "num_5s": 0, + "num_23s": 0, + "gtdbtk_genus": "", + "num_t_rna": 4 + } + ], + "unbinned_contig_num": 16909, + "started_at_time": "2021-10-11T02:27:08Z", + "type": "nmdc:MAGsAnalysisActivity", + "ended_at_time": "2021-10-11T03:27:12+00:00" + } + ], + "metagenome_annotation_activity_set": [ + { + "_id": { + "$oid": "649b005bbf2caae0415ef9b7" + }, + "has_input": [ + "nmdc:2b73310c6eef1ece5bb01f235b22fdbd" + ], + "part_of": [ + "nmdc:mga09wpw60" + ], + "git_url": "https://github.com/microbiomedata/metaG/releases/tag/0.1", + "has_output": [ + "nmdc:6c09d55cfb8872b30eb1832394f80beb", + "nmdc:2e3cc72d21590667259f6356882ce63b", + "nmdc:2dee5eaa50c8eeb6e3bc8471501d9964", + "nmdc:7ec4cfdd88352d703a2bb64b99bd56c5", + "nmdc:16bedd944e5e836924c28b006026c348", + "nmdc:8764070f565c50998968e0739420f5cc", + "nmdc:9e6accc90d61ea572819dcdb591e41a7", + "nmdc:32b9518ee41cadb157f3c0f9ec91476c", + "nmdc:432d591bd525ae429e837431d44954f7", + "nmdc:3120d5d5d27d142f898f70a8cc1b076e", + "nmdc:d37ff61fdae942030a1b07e855cf1abd", + "nmdc:56995366ba4186639a8ff4fd4defbd5e" + ], + "was_informed_by": "gold:Gp0127639", + "id": "nmdc:b8fea91a695311b43660f2e7044ad15c", + "execution_resource": "NERSC-Cori", + "name": "Annotation Activity for nmdc:mga09wpw60", + "started_at_time": "2021-10-11T02:27:08Z", + "type": "nmdc:MetagenomeAnnotationActivity", + "ended_at_time": "2021-10-11T03:27:12+00:00" + } + ], + "metagenome_assembly_set": [ + { + "_id": { + "$oid": "649b005f2ca5ee4adb139fa3" + }, + "has_input": [ + "nmdc:833077b40372c6daa20beaed04ed0ae1" + ], + "part_of": [ + "nmdc:mga09wpw60" + ], + "ctg_logsum": 317684, + "scaf_logsum": 318786, + "gap_pct": 0.0017, + "git_url": "https://github.com/microbiomedata/metaG/releases/tag/0.1", + "has_output": [ + "nmdc:2b73310c6eef1ece5bb01f235b22fdbd", + "nmdc:8f14c016997dd96f70f547df930717be", + "nmdc:5966e5e32744a14549b19b4c92a606a5", + "nmdc:1fcd489b3ae86a76bf297cc19b50392d", + "nmdc:5b90d13539ce840980db101fa7c1df96" + ], + "asm_score": 3.397, + "was_informed_by": "gold:Gp0127639", + "ctg_powsum": 34356, + "scaf_max": 19860, + "id": "nmdc:b8fea91a695311b43660f2e7044ad15c", + "scaf_powsum": 34485, + "execution_resource": "NERSC-Cori", + "contigs": 212560, + "name": "Assembly Activity for nmdc:mga09wpw60", + "ctg_max": 19860, + "gc_std": 0.09375, + "contig_bp": 112053293, + "gc_avg": 0.63186, + "started_at_time": "2021-10-11T02:27:08Z", + "scaf_bp": 112055193, + "type": "nmdc:MetagenomeAssembly", + "scaffolds": 212379, + "ended_at_time": "2021-10-11T03:27:12+00:00", + "ctg_l50": 538, + "ctg_l90": 298, + "ctg_n50": 55584, + "ctg_n90": 173977, + "scaf_l50": 539, + "scaf_l90": 298, + "scaf_n50": 55395, + "scaf_n90": 173826 + } + ], + "omics_processing_set": [ + { + "_id": { + "$oid": "649b009773e8249959349b58" + }, + "id": "nmdc:omprc-11-vnnn4722", + "name": "Riverbed sediment microbial communities from areas with no vegetation in Columbia River, Washington, USA - GW-RW N1_30_40", + "description": "Riverbed sediment samples were collected from an area with no vegetation in a hyporheic zone (subsurface groundwater - surface water mixing zone)", + "has_input": [ + "nmdc:bsm-11-tzp60785" + ], + "has_output": [ + "jgi:574fde667ded5e3df1ee1407" + ], + "part_of": [ + "nmdc:sty-11-aygzgv51" + ], + "add_date": "2016-01-11", + "mod_date": "2021-06-15", + "ncbi_project_name": "Riverbed sediment microbial communities from areas with no vegetation in Columbia River, Washington, USA - GW-RW N1_30_40", + "omics_type": { + "has_raw_value": "Metagenome" + }, + "principal_investigator": { + "has_raw_value": "James Stegen" + }, + "processing_institution": "JGI", + "type": "nmdc:OmicsProcessing", + "gold_sequencing_project_identifiers": [ + "gold:Gp0127639" + ] + } + ], + "read_qc_analysis_activity_set": [ + { + "_id": { + "$oid": "649b009d6bdd4fd20273c871" + }, + "has_input": [ + "nmdc:ae9087ed8e1ead2407bca45a47725633" + ], + "part_of": [ + "nmdc:mga09wpw60" + ], + "git_url": "https://github.com/microbiomedata/metaG/releases/tag/0.1", + "has_output": [ + "nmdc:833077b40372c6daa20beaed04ed0ae1", + "nmdc:b68178eebde030fad0850797adbb2624" + ], + "was_informed_by": "gold:Gp0127639", + "input_read_count": 23535784, + "output_read_bases": 2989527376, + "id": "nmdc:b8fea91a695311b43660f2e7044ad15c", + "execution_resource": "NERSC-Cori", + "input_read_bases": 3553903384, + "name": "Read QC Activity for nmdc:mga09wpw60", + "output_read_count": 20011156, + "started_at_time": "2021-10-11T02:27:08Z", + "type": "nmdc:ReadQCAnalysisActivity", + "ended_at_time": "2021-10-11T03:27:12+00:00" + } + ], + "read_based_taxonomy_analysis_activity_set": [ + { + "_id": { + "$oid": "649b009bff710ae353f8cf46" + }, + "has_input": [ + "nmdc:833077b40372c6daa20beaed04ed0ae1" + ], + "git_url": "https://github.com/microbiomedata/metaG/releases/tag/0.1", + "has_output": [ + "nmdc:514172bb91ef3b125ae2d001b47bff0b", + "nmdc:82f072d1931154fbc722531d3d0dc41c", + "nmdc:62a817ebcbfaf2c8feb1abedc35a736f", + "nmdc:81281fef2c0778516a84b3a672cc0230", + "nmdc:86ae054ba9def1126579c8f76db8a07a", + "nmdc:9db20a88fa3d02eb00f64d1671ef8521", + "nmdc:848fc10ed4365047cb139a4b40303808", + "nmdc:94e422e0bae86c608fba1c3815e08e92", + "nmdc:c6eb85143a2489921c53f8184d536129" + ], + "was_informed_by": "gold:Gp0127639", + "id": "nmdc:b8fea91a695311b43660f2e7044ad15c", + "execution_resource": "NERSC-Cori", + "name": "ReadBased Analysis Activity for nmdc:mga09wpw60", + "started_at_time": "2021-10-11T02:27:08Z", + "type": "nmdc:ReadbasedAnalysis", + "ended_at_time": "2021-10-11T03:27:12+00:00" + } + ] + }, + { + "data_object_set": [ + { + "description": "Raw sequencer read data", + "file_size_bytes": 2500707412, + "type": "nmdc:DataObject", + "id": "jgi:574fe0a67ded5e3df1ee148d", + "name": "10533.3.165334.CGCTTAA-GTTAAGC.fastq.gz" + }, + { + "name": "Gp0127642_Filtered Reads", + "description": "Filtered Reads for Gp0127642", + "data_object_type": "Filtered Sequencing Reads", + "url": "https://data.microbiomedata.org/data/nmdc:mga0cvxk30/qa/nmdc_mga0cvxk30_filtered.fastq.gz", + "md5_checksum": "603166d1e0da357d356a2029215d76ea", + "id": "nmdc:603166d1e0da357d356a2029215d76ea", + "file_size_bytes": 2304174057 + }, + { + "name": "Gp0127642_Filtered Stats", + "description": "Filtered Stats for Gp0127642", + "data_object_type": "QC Statistics", + "url": "https://data.microbiomedata.org/data/nmdc:mga0cvxk30/qa/nmdc_mga0cvxk30_filterStats.txt", + "md5_checksum": "639d9630c859c9b2f6f7a2eff1e1a863", + "id": "nmdc:639d9630c859c9b2f6f7a2eff1e1a863", + "file_size_bytes": 284 + }, + { + "name": "Gp0127642_Gottcha2 TSV report", + "description": "Gottcha2 TSV report for Gp0127642", + "url": "https://data.microbiomedata.org/data/nmdc:mga0cvxk30/ReadbasedAnalysis/nmdc_mga0cvxk30_gottcha2_report.tsv", + "md5_checksum": "bc7f6a9435c3a9aaca7ce9efe9d16e41", + "id": "nmdc:bc7f6a9435c3a9aaca7ce9efe9d16e41", + "file_size_bytes": 5303 + }, + { + "name": "Gp0127642_Gottcha2 full TSV report", + "description": "Gottcha2 full TSV report for Gp0127642", + "url": "https://data.microbiomedata.org/data/nmdc:mga0cvxk30/ReadbasedAnalysis/nmdc_mga0cvxk30_gottcha2_report_full.tsv", + "md5_checksum": "0a079e34648ce23b0837dff31e2be5df", + "id": "nmdc:0a079e34648ce23b0837dff31e2be5df", + "file_size_bytes": 948120 + }, + { + "name": "Gp0127642_Gottcha2 Krona HTML report", + "description": "Gottcha2 Krona HTML report for Gp0127642", + "data_object_type": "GOTTCHA2 Krona Plot", + "url": "https://data.microbiomedata.org/data/nmdc:mga0cvxk30/ReadbasedAnalysis/nmdc_mga0cvxk30_gottcha2_krona.html", + "md5_checksum": "f19bf1723f0f0e9f2158b137d2618b08", + "id": "nmdc:f19bf1723f0f0e9f2158b137d2618b08", + "file_size_bytes": 241990 + }, + { + "name": "Gp0127642_Centrifuge classification TSV report", + "description": "Centrifuge classification TSV report for Gp0127642", + "data_object_type": "Centrifuge Taxonomic Classification", + "url": "https://data.microbiomedata.org/data/nmdc:mga0cvxk30/ReadbasedAnalysis/nmdc_mga0cvxk30_centrifuge_classification.tsv", + "md5_checksum": "81fc62d01a53a7ab5037829a158f0b64", + "id": "nmdc:81fc62d01a53a7ab5037829a158f0b64", + "file_size_bytes": 2023464022 + }, + { + "name": "Gp0127642_Centrifuge TSV report", + "description": "Centrifuge TSV report for Gp0127642", + "data_object_type": "Centrifuge Classification Report", + "url": "https://data.microbiomedata.org/data/nmdc:mga0cvxk30/ReadbasedAnalysis/nmdc_mga0cvxk30_centrifuge_report.tsv", + "md5_checksum": "05cc05eefdcb0d7bac19031619244a4b", + "id": "nmdc:05cc05eefdcb0d7bac19031619244a4b", + "file_size_bytes": 257700 + }, + { + "name": "Gp0127642_Centrifuge Krona HTML report", + "description": "Centrifuge Krona HTML report for Gp0127642", + "data_object_type": "Centrifuge Krona Plot", + "url": "https://data.microbiomedata.org/data/nmdc:mga0cvxk30/ReadbasedAnalysis/nmdc_mga0cvxk30_centrifuge_krona.html", + "md5_checksum": "bb92f0d18280f32aacf482a43a841372", + "id": "nmdc:bb92f0d18280f32aacf482a43a841372", + "file_size_bytes": 2339227 + }, + { + "name": "Gp0127642_Kraken classification TSV report", + "description": "Kraken classification TSV report for Gp0127642", + "data_object_type": "Kraken2 Taxonomic Classification", + "url": "https://data.microbiomedata.org/data/nmdc:mga0cvxk30/ReadbasedAnalysis/nmdc_mga0cvxk30_kraken2_classification.tsv", + "md5_checksum": "2fddd33160498548fa73e95dfc304d1a", + "id": "nmdc:2fddd33160498548fa73e95dfc304d1a", + "file_size_bytes": 1630988221 + }, + { + "name": "Gp0127642_Kraken2 TSV report", + "description": "Kraken2 TSV report for Gp0127642", + "data_object_type": "Kraken2 Classification Report", + "url": "https://data.microbiomedata.org/data/nmdc:mga0cvxk30/ReadbasedAnalysis/nmdc_mga0cvxk30_kraken2_report.tsv", + "md5_checksum": "272e3daee292c6e284026ee95b72d290", + "id": "nmdc:272e3daee292c6e284026ee95b72d290", + "file_size_bytes": 659136 + }, + { + "name": "Gp0127642_Kraken2 Krona HTML report", + "description": "Kraken2 Krona HTML report for Gp0127642", + "data_object_type": "Kraken2 Krona Plot", + "url": "https://data.microbiomedata.org/data/nmdc:mga0cvxk30/ReadbasedAnalysis/nmdc_mga0cvxk30_kraken2_krona.html", + "md5_checksum": "bca8c2988929e7c176ec7b6609445db2", + "id": "nmdc:bca8c2988929e7c176ec7b6609445db2", + "file_size_bytes": 4013188 + }, + { + "name": "Gp0127642_Assembled contigs fasta", + "description": "Assembled contigs fasta for Gp0127642", + "data_object_type": "Assembly Contigs", + "url": "https://data.microbiomedata.org/data/nmdc:mga0cvxk30/assembly/nmdc_mga0cvxk30_contigs.fna", + "md5_checksum": "9c2c077dd8f43350b83c1c1ba853bbbc", + "id": "nmdc:9c2c077dd8f43350b83c1c1ba853bbbc", + "file_size_bytes": 44374790 + }, + { + "name": "Gp0127642_Assembled scaffold fasta", + "description": "Assembled scaffold fasta for Gp0127642", + "data_object_type": "Assembly Scaffolds", + "url": "https://data.microbiomedata.org/data/nmdc:mga0cvxk30/assembly/nmdc_mga0cvxk30_scaffolds.fna", + "md5_checksum": "9a3dfedede65ba1253a84264492e909c", + "id": "nmdc:9a3dfedede65ba1253a84264492e909c", + "file_size_bytes": 44064962 + }, + { + "name": "Gp0127642_Metagenome Contig Coverage Stats", + "description": "Metagenome Contig Coverage Stats for Gp0127642", + "url": "https://data.microbiomedata.org/data/nmdc:mga0cvxk30/assembly/nmdc_mga0cvxk30_covstats.txt", + "md5_checksum": "0772cb4473177c4e0046c7fd9cb65b27", + "id": "nmdc:0772cb4473177c4e0046c7fd9cb65b27", + "file_size_bytes": 8090415 + }, + { + "name": "Gp0127642_Assembled AGP file", + "description": "Assembled AGP file for Gp0127642", + "data_object_type": "Assembly AGP", + "url": "https://data.microbiomedata.org/data/nmdc:mga0cvxk30/assembly/nmdc_mga0cvxk30_assembly.agp", + "md5_checksum": "7d0ccfaeac8981d1300b8c17abed052b", + "id": "nmdc:7d0ccfaeac8981d1300b8c17abed052b", + "file_size_bytes": 7524067 + }, + { + "name": "Gp0127642_Metagenome Alignment BAM file", + "description": "Metagenome Alignment BAM file for Gp0127642", + "data_object_type": "Assembly Coverage BAM", + "url": "https://data.microbiomedata.org/data/nmdc:mga0cvxk30/assembly/nmdc_mga0cvxk30_pairedMapped_sorted.bam", + "md5_checksum": "a5b5801b13f062bc09a1405d0a01e6ac", + "id": "nmdc:a5b5801b13f062bc09a1405d0a01e6ac", + "file_size_bytes": 2461892983 + }, + { + "name": "Gp0127642_Protein FAA", + "description": "Protein FAA for Gp0127642", + "data_object_type": "Annotation Amino Acid FASTA", + "url": "https://data.microbiomedata.org/data/nmdc:mga0cvxk30/annotation/nmdc_mga0cvxk30_proteins.faa", + "md5_checksum": "e6270776fe3cb9f4e8e2958f9d8d6151", + "id": "nmdc:e6270776fe3cb9f4e8e2958f9d8d6151", + "file_size_bytes": 26699570 + }, + { + "name": "Gp0127642_Structural annotation GFF file", + "description": "Structural annotation GFF file for Gp0127642", + "data_object_type": "Structural Annotation GFF", + "url": "https://data.microbiomedata.org/data/nmdc:mga0cvxk30/annotation/nmdc_mga0cvxk30_structural_annotation.gff", + "md5_checksum": "f442172aba544a550f1e294bc615fd1d", + "id": "nmdc:f442172aba544a550f1e294bc615fd1d", + "file_size_bytes": 2505 + }, + { + "name": "Gp0127642_Functional annotation GFF file", + "description": "Functional annotation GFF file for Gp0127642", + "data_object_type": "Functional Annotation GFF", + "url": "https://data.microbiomedata.org/data/nmdc:mga0cvxk30/annotation/nmdc_mga0cvxk30_functional_annotation.gff", + "md5_checksum": "c0f7ac45facbbb7b74bb7ce11af11910", + "id": "nmdc:c0f7ac45facbbb7b74bb7ce11af11910", + "file_size_bytes": 32011364 + }, + { + "name": "Gp0127642_KO TSV file", + "description": "KO TSV file for Gp0127642", + "data_object_type": "Annotation KEGG Orthology", + "url": "https://data.microbiomedata.org/data/nmdc:mga0cvxk30/annotation/nmdc_mga0cvxk30_ko.tsv", + "md5_checksum": "63db41425c31ceda578a9e2a801dcb98", + "id": "nmdc:63db41425c31ceda578a9e2a801dcb98", + "file_size_bytes": 3660508 + }, + { + "name": "Gp0127642_EC TSV file", + "description": "EC TSV file for Gp0127642", + "data_object_type": "Annotation Enzyme Commission", + "url": "https://data.microbiomedata.org/data/nmdc:mga0cvxk30/annotation/nmdc_mga0cvxk30_ec.tsv", + "md5_checksum": "1cf9336281454b1747a86f9877f47ce8", + "id": "nmdc:1cf9336281454b1747a86f9877f47ce8", + "file_size_bytes": 2451794 + }, + { + "name": "Gp0127642_COG GFF file", + "description": "COG GFF file for Gp0127642", + "url": "https://data.microbiomedata.org/data/nmdc:mga0cvxk30/annotation/nmdc_mga0cvxk30_cog.gff", + "md5_checksum": "1cb7ab56a921ed80d21dad5b2d41c139", + "id": "nmdc:1cb7ab56a921ed80d21dad5b2d41c139", + "file_size_bytes": 18356139 + }, + { + "name": "Gp0127642_PFAM GFF file", + "description": "PFAM GFF file for Gp0127642", + "url": "https://data.microbiomedata.org/data/nmdc:mga0cvxk30/annotation/nmdc_mga0cvxk30_pfam.gff", + "md5_checksum": "157326e95b92fa83ab5755c22acf5837", + "id": "nmdc:157326e95b92fa83ab5755c22acf5837", + "file_size_bytes": 13044512 + }, + { + "name": "Gp0127642_TigrFam GFF file", + "description": "TigrFam GFF file for Gp0127642", + "url": "https://data.microbiomedata.org/data/nmdc:mga0cvxk30/annotation/nmdc_mga0cvxk30_tigrfam.gff", + "md5_checksum": "f001a06864e30347885e5a76ae89ae92", + "id": "nmdc:f001a06864e30347885e5a76ae89ae92", + "file_size_bytes": 1280537 + }, + { + "name": "Gp0127642_SMART GFF file", + "description": "SMART GFF file for Gp0127642", + "url": "https://data.microbiomedata.org/data/nmdc:mga0cvxk30/annotation/nmdc_mga0cvxk30_smart.gff", + "md5_checksum": "aa1e3207b62ca31a87da28ad4c3e6e92", + "id": "nmdc:aa1e3207b62ca31a87da28ad4c3e6e92", + "file_size_bytes": 4029242 + }, + { + "name": "Gp0127642_SuperFam GFF file", + "description": "SuperFam GFF file for Gp0127642", + "url": "https://data.microbiomedata.org/data/nmdc:mga0cvxk30/annotation/nmdc_mga0cvxk30_supfam.gff", + "md5_checksum": "5119eebdfebd43b4af243a61cc8e45eb", + "id": "nmdc:5119eebdfebd43b4af243a61cc8e45eb", + "file_size_bytes": 23011352 + }, + { + "name": "Gp0127642_Cath FunFam GFF file", + "description": "Cath FunFam GFF file for Gp0127642", + "url": "https://data.microbiomedata.org/data/nmdc:mga0cvxk30/annotation/nmdc_mga0cvxk30_cath_funfam.gff", + "md5_checksum": "4e6178de376e5e228c8b5c17ce3d0621", + "id": "nmdc:4e6178de376e5e228c8b5c17ce3d0621", + "file_size_bytes": 17039992 + }, + { + "name": "Gp0127642_KO_EC GFF file", + "description": "KO_EC GFF file for Gp0127642", + "url": "https://data.microbiomedata.org/data/nmdc:mga0cvxk30/annotation/nmdc_mga0cvxk30_ko_ec.gff", + "md5_checksum": "d89f026da3dfb4ee7d4884a47ce5739d", + "id": "nmdc:d89f026da3dfb4ee7d4884a47ce5739d", + "file_size_bytes": 11677748 + }, + { + "name": "Gp0127642_metabat2 bin checkm quality assessment result", + "description": "metabat2 bin checkm quality assessment result for Gp0127642", + "data_object_type": "CheckM Statistics", + "url": "https://data.microbiomedata.org/data/nmdc:mga0cvxk30/MAGs/nmdc_mga0cvxk30_checkm_qa.out", + "md5_checksum": "ac59797a394f8e4aa971e5c1d016e23e", + "id": "nmdc:ac59797a394f8e4aa971e5c1d016e23e", + "file_size_bytes": 765 + }, + { + "name": "Gp0127642_high-quality and medium-quality bins", + "description": "high-quality and medium-quality bins for Gp0127642", + "data_object_type": "Metagenome Bins", + "url": "https://data.microbiomedata.org/data/nmdc:mga0cvxk30/MAGs/nmdc_mga0cvxk30_hqmq_bin.zip", + "md5_checksum": "46858bd4b45bdaa4e4344820f3c54b3b", + "id": "nmdc:46858bd4b45bdaa4e4344820f3c54b3b", + "file_size_bytes": 472684 + }, + { + "_id": { + "$oid": "649b003d1ae706d7b5b14e9b" + }, + "description": "Assembled scaffold fasta for gold:Gp0127642", + "url": "https://data.microbiomedata.org/data/1781_100344/assembly/assembly_scaffolds.fna", + "file_size_bytes": 43652238, + "type": "nmdc:DataObject", + "id": "nmdc:6eca0963e47257569a60827999eeaaa8", + "name": "assembly_scaffolds.fna", + "data_object_type": "Assembly Scaffolds" + }, + { + "_id": { + "$oid": "649b003d1ae706d7b5b14e9c" + }, + "description": "Metagenome Contig Coverage Stats for gold:Gp0127642", + "url": "https://data.microbiomedata.org/data/1781_100344/assembly/mapping_stats.txt", + "file_size_bytes": 7677591, + "type": "nmdc:DataObject", + "id": "nmdc:8a499e5986fac773f987576c5c2ec223", + "name": "mapping_stats.txt", + "data_object_type": "Assembly Coverage Stats" + }, + { + "_id": { + "$oid": "649b003d1ae706d7b5b14e9d" + }, + "description": "Metagenome Alignment BAM file for gold:Gp0127642", + "url": "https://data.microbiomedata.org/data/1781_100344/assembly/pairedMapped_sorted.bam", + "file_size_bytes": 2429450297, + "type": "nmdc:DataObject", + "id": "nmdc:4938ea35089362aa1ee2e129706e1e8a", + "name": "pairedMapped_sorted.bam", + "data_object_type": "Assembly Coverage BAM" + }, + { + "_id": { + "$oid": "649b003d1ae706d7b5b14e9f" + }, + "description": "Assembled contigs fasta for gold:Gp0127642", + "url": "https://data.microbiomedata.org/data/1781_100344/assembly/assembly_contigs.fna", + "file_size_bytes": 43961966, + "type": "nmdc:DataObject", + "id": "nmdc:1e4e73c9d1faa4585cb3a266b5a6cd39", + "name": "assembly_contigs.fna", + "data_object_type": "Assembly Contigs" + }, + { + "_id": { + "$oid": "649b003d1ae706d7b5b14ea6" + }, + "description": "Assembled AGP file for gold:Gp0127642", + "url": "https://data.microbiomedata.org/data/1781_100344/assembly/assembly.agp", + "file_size_bytes": 6698219, + "type": "nmdc:DataObject", + "id": "nmdc:ff66a5de4da06400243924f54998c37d", + "name": "assembly.agp", + "data_object_type": "Assembly AGP" + }, + { + "_id": { + "$oid": "649b003d1ae706d7b5b15b8e" + }, + "id": "nmdc:57c57663cd0c81252303be99f87ec09e", + "name": "1781_100344.krona.html", + "description": "Gold:Gp0127642 KRONA plot HTML file", + "url": "https://data.microbiomedata.org/1781_100344/ReadbasedAnalysis/centrifuge/1781_100344.krona.html", + "file_size_bytes": 3385, + "data_object_type": "Centrifuge Krona Plot", + "type": "nmdc:DataObject" + }, + { + "_id": { + "$oid": "649b003d1ae706d7b5b15b92" + }, + "id": "nmdc:07632d9f02d85eee5b556a94acf251ef", + "name": "1781_100344.json", + "description": "Gold:Gp0127642 ReadbasedAnalysis result JSON file", + "url": "https://data.microbiomedata.org/1781_100344/ReadbasedAnalysis/1781_100344.json", + "file_size_bytes": 3385, + "type": "nmdc:DataObject" + }, + { + "_id": { + "$oid": "649b003f1ae706d7b5b16613" + }, + "id": "nmdc:231cfca4487ba7ec3ab476022e003ac7", + "name": "bins.unbinned.fa", + "description": "unbinned fasta file from metabat2 for gold:Gp0127642", + "file_size_bytes": 3446845, + "url": "https://data.microbiomedata.org/data/1781_100344/img_MAGs/bins.unbinned.fa", + "type": "nmdc:DataObject" + }, + { + "_id": { + "$oid": "649b003f1ae706d7b5b16615" + }, + "id": "nmdc:bceccf441b752fc5608db53515a9552e", + "name": "gold:Gp0127642.bins.1.fa", + "description": "metabat2 binned contig file for gold:Gp0127642", + "file_size_bytes": 276719, + "url": "https://data.microbiomedata.org/data/1781_100344/img_MAGs/metabat-bins/bins.1.fa", + "type": "nmdc:DataObject", + "data_object_type": "Metagenome Bins" + }, + { + "_id": { + "$oid": "649b003f1ae706d7b5b16616" + }, + "id": "nmdc:c26ef51c7ce6c79f8dad28e39f4238d7", + "name": "checkm_qa.out", + "description": "metabat2 bin checkm quality assessment result for gold:Gp0127642", + "file_size_bytes": 918, + "url": "https://data.microbiomedata.org/data/1781_100344/img_MAGs/checkm_qa.out", + "type": "nmdc:DataObject", + "data_object_type": "CheckM Statistics" + }, + { + "_id": { + "$oid": "649b003f1ae706d7b5b16618" + }, + "id": "nmdc:914e14cda452df07bf33be9bda12738c", + "name": "bins.tooShort.fa", + "description": "tooShort (< 3kb) filtered contigs fasta file by metaBat2 for gold:Gp0127642", + "file_size_bytes": 38677251, + "url": "https://data.microbiomedata.org/data/1781_100344/img_MAGs/bins.tooShort.fa", + "type": "nmdc:DataObject" + }, + { + "_id": { + "$oid": "649b003f1ae706d7b5b16619" + }, + "id": "nmdc:0d6107d2c1b4c0e3423f54ae1895aad9", + "name": "gold:Gp0127642.bins.2.fa", + "description": "metabat2 binned contig file for gold:Gp0127642", + "file_size_bytes": 743464, + "url": "https://data.microbiomedata.org/data/1781_100344/img_MAGs/metabat-bins/bins.2.fa", + "type": "nmdc:DataObject", + "data_object_type": "Metagenome Bins" + }, + { + "_id": { + "$oid": "649b00401ae706d7b5b16d29" + }, + "description": "Functional annotation GFF file for gold:Gp0127642", + "url": "https://data.microbiomedata.org/1781_100344/img_annotation/Ga0482228_functional_annotation.gff", + "md5_checksum": "657b2348517d3e169df0914f5d8a2d21", + "file_size_bytes": 3385, + "id": "nmdc:657b2348517d3e169df0914f5d8a2d21", + "name": "gold:Gp0127642_Functional annotation GFF file", + "data_object_type": "Functional Annotation GFF", + "type": "nmdc:DataObject" + }, + { + "_id": { + "$oid": "649b00401ae706d7b5b16d2c" + }, + "description": "Protein FAA for gold:Gp0127642", + "url": "https://data.microbiomedata.org/1781_100344/img_annotation/Ga0482228_proteins.faa", + "md5_checksum": "263acdd17bdb9ed72102610070da3d65", + "file_size_bytes": 3385, + "id": "nmdc:263acdd17bdb9ed72102610070da3d65", + "name": "gold:Gp0127642_Protein FAA", + "data_object_type": "Annotation Amino Acid FASTA", + "type": "nmdc:DataObject" + }, + { + "_id": { + "$oid": "649b00401ae706d7b5b16d2e" + }, + "description": "EC TSV File for gold:Gp0127642", + "url": "https://data.microbiomedata.org/1781_100344/img_annotation/Ga0482228_ec.tsv", + "md5_checksum": "4f8de602126deeb9ef60cf5f739d601a", + "file_size_bytes": 3385, + "id": "nmdc:4f8de602126deeb9ef60cf5f739d601a", + "name": "gold:Gp0127642_EC TSV File", + "data_object_type": "Annotation Enzyme Commission", + "type": "nmdc:DataObject" + }, + { + "_id": { + "$oid": "649b00401ae706d7b5b16d30" + }, + "description": "KO TSV File for gold:Gp0127642", + "url": "https://data.microbiomedata.org/1781_100344/img_annotation/Ga0482228_ko.tsv", + "md5_checksum": "65319d4c3ffdbf5dcdb2e2837aea8cf4", + "file_size_bytes": 3385, + "id": "nmdc:65319d4c3ffdbf5dcdb2e2837aea8cf4", + "name": "gold:Gp0127642_KO TSV File", + "data_object_type": "Annotation KEGG Orthology", + "type": "nmdc:DataObject" + }, + { + "_id": { + "$oid": "649b00401ae706d7b5b16d32" + }, + "description": "Structural annotation GFF file for gold:Gp0127642", + "url": "https://data.microbiomedata.org/1781_100344/img_annotation/Ga0482228_structural_annotation.gff", + "md5_checksum": "9e55f66e86f57487e23029b90a84c4a4", + "file_size_bytes": 3385, + "id": "nmdc:9e55f66e86f57487e23029b90a84c4a4", + "name": "gold:Gp0127642_Structural annotation GFF file", + "data_object_type": "Structural Annotation GFF", + "type": "nmdc:DataObject" + } + ], + "mags_activity_set": [ + { + "_id": { + "$oid": "649b0052ec087f6bbab3471c" + }, + "has_input": [ + "nmdc:9c2c077dd8f43350b83c1c1ba853bbbc", + "nmdc:a5b5801b13f062bc09a1405d0a01e6ac", + "nmdc:c0f7ac45facbbb7b74bb7ce11af11910" + ], + "too_short_contig_num": 101249, + "part_of": [ + "nmdc:mga0cvxk30" + ], + "binned_contig_num": 213, + "git_url": "https://github.com/microbiomedata/metaG/releases/tag/0.1", + "has_output": [ + "nmdc:ac59797a394f8e4aa971e5c1d016e23e", + "nmdc:46858bd4b45bdaa4e4344820f3c54b3b" + ], + "was_informed_by": "gold:Gp0127642", + "input_contig_num": 103206, + "id": "nmdc:2de70a234a7c626b9f512b8aa3b73717", + "execution_resource": "NERSC-Cori", + "name": "MAGs Analysis Activity for nmdc:mga0cvxk30", + "mags_list": [ + { + "number_of_contig": 213, + "completeness": 71.17, + "bin_name": "bins.1", + "gene_count": 1914, + "bin_quality": "MQ", + "gtdbtk_species": "", + "gtdbtk_order": "Nitrososphaerales", + "num_16s": 0, + "gtdbtk_family": "Nitrososphaeraceae", + "gtdbtk_domain": "Archaea", + "contamination": 0.97, + "gtdbtk_class": "Nitrososphaeria", + "gtdbtk_phylum": "Crenarchaeota", + "num_5s": 1, + "num_23s": 0, + "gtdbtk_genus": "", + "num_t_rna": 31 + } + ], + "unbinned_contig_num": 1744, + "started_at_time": "2021-12-01T21:30:33Z", + "type": "nmdc:MAGsAnalysisActivity", + "ended_at_time": "2021-12-02T20:50:24+00:00" + } + ], + "metagenome_annotation_activity_set": [ + { + "_id": { + "$oid": "649b005bbf2caae0415ef9ba" + }, + "has_input": [ + "nmdc:9c2c077dd8f43350b83c1c1ba853bbbc" + ], + "part_of": [ + "nmdc:mga0cvxk30" + ], + "git_url": "https://github.com/microbiomedata/metaG/releases/tag/0.1", + "has_output": [ + "nmdc:e6270776fe3cb9f4e8e2958f9d8d6151", + "nmdc:f442172aba544a550f1e294bc615fd1d", + "nmdc:c0f7ac45facbbb7b74bb7ce11af11910", + "nmdc:63db41425c31ceda578a9e2a801dcb98", + "nmdc:1cf9336281454b1747a86f9877f47ce8", + "nmdc:1cb7ab56a921ed80d21dad5b2d41c139", + "nmdc:157326e95b92fa83ab5755c22acf5837", + "nmdc:f001a06864e30347885e5a76ae89ae92", + "nmdc:aa1e3207b62ca31a87da28ad4c3e6e92", + "nmdc:5119eebdfebd43b4af243a61cc8e45eb", + "nmdc:4e6178de376e5e228c8b5c17ce3d0621", + "nmdc:d89f026da3dfb4ee7d4884a47ce5739d" + ], + "was_informed_by": "gold:Gp0127642", + "id": "nmdc:2de70a234a7c626b9f512b8aa3b73717", + "execution_resource": "NERSC-Cori", + "name": "Annotation Activity for nmdc:mga0cvxk30", + "started_at_time": "2021-12-01T21:30:33Z", + "type": "nmdc:MetagenomeAnnotationActivity", + "ended_at_time": "2021-12-02T20:50:24+00:00" + } + ], + "metagenome_assembly_set": [ + { + "_id": { + "$oid": "649b005f2ca5ee4adb139fa7" + }, + "has_input": [ + "nmdc:603166d1e0da357d356a2029215d76ea" + ], + "part_of": [ + "nmdc:mga0cvxk30" + ], + "ctg_logsum": 50653, + "scaf_logsum": 50816, + "gap_pct": 0.00106, + "git_url": "https://github.com/microbiomedata/metaG/releases/tag/0.1", + "has_output": [ + "nmdc:9c2c077dd8f43350b83c1c1ba853bbbc", + "nmdc:9a3dfedede65ba1253a84264492e909c", + "nmdc:0772cb4473177c4e0046c7fd9cb65b27", + "nmdc:7d0ccfaeac8981d1300b8c17abed052b", + "nmdc:a5b5801b13f062bc09a1405d0a01e6ac" + ], + "asm_score": 7.947, + "was_informed_by": "gold:Gp0127642", + "ctg_powsum": 5974.26, + "scaf_max": 27286, + "id": "nmdc:2de70a234a7c626b9f512b8aa3b73717", + "scaf_powsum": 5993.216, + "execution_resource": "NERSC-Cori", + "contigs": 103206, + "name": "Assembly Activity for nmdc:mga0cvxk30", + "ctg_max": 27286, + "gc_std": 0.1028, + "gc_avg": 0.60377, + "contig_bp": 40567169, + "started_at_time": "2021-12-01T21:30:33Z", + "scaf_bp": 40567599, + "type": "nmdc:MetagenomeAssembly", + "scaffolds": 103181, + "ended_at_time": "2021-12-02T20:50:24+00:00", + "ctg_l50": 348, + "ctg_l90": 283, + "ctg_n50": 35487, + "ctg_n90": 88775, + "scaf_l50": 348, + "scaf_l90": 283, + "scaf_n50": 35472, + "scaf_n90": 88751 + } + ], + "omics_processing_set": [ + { + "_id": { + "$oid": "649b009773e8249959349b59" + }, + "id": "nmdc:omprc-11-p21wp875", + "name": "Riverbed sediment microbial communities from areas with no vegetation in Columbia River, Washington, USA - GW-RW N2_0_30", + "description": "Riverbed sediment samples were collected from an area with no vegetation in a hyporheic zone (subsurface groundwater - surface water mixing zone)", + "has_input": [ + "nmdc:bsm-11-qpve9v25" + ], + "has_output": [ + "jgi:574fe0a67ded5e3df1ee148d" + ], + "part_of": [ + "nmdc:sty-11-aygzgv51" + ], + "add_date": "2016-01-11", + "mod_date": "2021-06-15", + "ncbi_project_name": "Riverbed sediment microbial communities from areas with no vegetation in Columbia River, Washington, USA - GW-RW N2_0_30", + "omics_type": { + "has_raw_value": "Metagenome" + }, + "principal_investigator": { + "has_raw_value": "James Stegen" + }, + "processing_institution": "JGI", + "type": "nmdc:OmicsProcessing", + "gold_sequencing_project_identifiers": [ + "gold:Gp0127642" + ] + } + ], + "read_qc_analysis_activity_set": [ + { + "_id": { + "$oid": "649b009d6bdd4fd20273c874" + }, + "has_input": [ + "nmdc:ac3a54ab71fd4e15763cd3e01c7a91bf" + ], + "part_of": [ + "nmdc:mga0cvxk30" + ], + "git_url": "https://github.com/microbiomedata/metaG/releases/tag/0.1", + "has_output": [ + "nmdc:603166d1e0da357d356a2029215d76ea", + "nmdc:639d9630c859c9b2f6f7a2eff1e1a863" + ], + "was_informed_by": "gold:Gp0127642", + "input_read_count": 28024960, + "output_read_bases": 4095196321, + "id": "nmdc:2de70a234a7c626b9f512b8aa3b73717", + "execution_resource": "NERSC-Cori", + "input_read_bases": 4231768960, + "name": "Read QC Activity for nmdc:mga0cvxk30", + "output_read_count": 27378404, + "started_at_time": "2021-12-01T21:30:33Z", + "type": "nmdc:ReadQCAnalysisActivity", + "ended_at_time": "2021-12-02T20:50:24+00:00" + } + ], + "read_based_taxonomy_analysis_activity_set": [ + { + "_id": { + "$oid": "649b009bff710ae353f8cf3b" + }, + "has_input": [ + "nmdc:603166d1e0da357d356a2029215d76ea" + ], + "git_url": "https://github.com/microbiomedata/metaG/releases/tag/0.1", + "has_output": [ + "nmdc:bc7f6a9435c3a9aaca7ce9efe9d16e41", + "nmdc:0a079e34648ce23b0837dff31e2be5df", + "nmdc:f19bf1723f0f0e9f2158b137d2618b08", + "nmdc:81fc62d01a53a7ab5037829a158f0b64", + "nmdc:05cc05eefdcb0d7bac19031619244a4b", + "nmdc:bb92f0d18280f32aacf482a43a841372", + "nmdc:2fddd33160498548fa73e95dfc304d1a", + "nmdc:272e3daee292c6e284026ee95b72d290", + "nmdc:bca8c2988929e7c176ec7b6609445db2" + ], + "was_informed_by": "gold:Gp0127642", + "id": "nmdc:2de70a234a7c626b9f512b8aa3b73717", + "execution_resource": "NERSC-Cori", + "name": "ReadBased Analysis Activity for nmdc:mga0cvxk30", + "started_at_time": "2021-12-01T21:30:33Z", + "type": "nmdc:ReadbasedAnalysis", + "ended_at_time": "2021-12-02T20:50:24+00:00" + } + ] + }, + { + "data_object_set": [ + { + "description": "Raw sequencer read data", + "file_size_bytes": 2463257736, + "type": "nmdc:DataObject", + "id": "jgi:574fde867ded5e3df1ee1420", + "name": "10533.2.165322.TTCGTAC-GGTACGA.fastq.gz" + }, + { + "name": "Gp0127646_Filtered Reads", + "description": "Filtered Reads for Gp0127646", + "data_object_type": "Filtered Sequencing Reads", + "url": "https://data.microbiomedata.org/data/nmdc:mga0dm4q17/qa/nmdc_mga0dm4q17_filtered.fastq.gz", + "md5_checksum": "208a3777ef0b99408f0d5832dee576e0", + "id": "nmdc:208a3777ef0b99408f0d5832dee576e0", + "file_size_bytes": 2209739723 + }, + { + "name": "Gp0127646_Filtered Stats", + "description": "Filtered Stats for Gp0127646", + "data_object_type": "QC Statistics", + "url": "https://data.microbiomedata.org/data/nmdc:mga0dm4q17/qa/nmdc_mga0dm4q17_filterStats.txt", + "md5_checksum": "8533a56006bdc1841b6fc16e99b6a84a", + "id": "nmdc:8533a56006bdc1841b6fc16e99b6a84a", + "file_size_bytes": 291 + }, + { + "name": "Gp0127646_Gottcha2 TSV report", + "description": "Gottcha2 TSV report for Gp0127646", + "url": "https://data.microbiomedata.org/data/nmdc:mga0dm4q17/ReadbasedAnalysis/nmdc_mga0dm4q17_gottcha2_report.tsv", + "md5_checksum": "3e0598df41941463bac0fdec5df29f55", + "id": "nmdc:3e0598df41941463bac0fdec5df29f55", + "file_size_bytes": 4650 + }, + { + "name": "Gp0127646_Gottcha2 full TSV report", + "description": "Gottcha2 full TSV report for Gp0127646", + "url": "https://data.microbiomedata.org/data/nmdc:mga0dm4q17/ReadbasedAnalysis/nmdc_mga0dm4q17_gottcha2_report_full.tsv", + "md5_checksum": "1a625b148d8f6d9fe9aeab6cfb67df6c", + "id": "nmdc:1a625b148d8f6d9fe9aeab6cfb67df6c", + "file_size_bytes": 877659 + }, + { + "name": "Gp0127646_Gottcha2 Krona HTML report", + "description": "Gottcha2 Krona HTML report for Gp0127646", + "data_object_type": "GOTTCHA2 Krona Plot", + "url": "https://data.microbiomedata.org/data/nmdc:mga0dm4q17/ReadbasedAnalysis/nmdc_mga0dm4q17_gottcha2_krona.html", + "md5_checksum": "bc8e157195d042d7207d67b4982fea96", + "id": "nmdc:bc8e157195d042d7207d67b4982fea96", + "file_size_bytes": 236676 + }, + { + "name": "Gp0127646_Centrifuge classification TSV report", + "description": "Centrifuge classification TSV report for Gp0127646", + "data_object_type": "Centrifuge Taxonomic Classification", + "url": "https://data.microbiomedata.org/data/nmdc:mga0dm4q17/ReadbasedAnalysis/nmdc_mga0dm4q17_centrifuge_classification.tsv", + "md5_checksum": "a8fc683bb9b3aba316cb605c5fb591ec", + "id": "nmdc:a8fc683bb9b3aba316cb605c5fb591ec", + "file_size_bytes": 1901493736 + }, + { + "name": "Gp0127646_Centrifuge TSV report", + "description": "Centrifuge TSV report for Gp0127646", + "data_object_type": "Centrifuge Classification Report", + "url": "https://data.microbiomedata.org/data/nmdc:mga0dm4q17/ReadbasedAnalysis/nmdc_mga0dm4q17_centrifuge_report.tsv", + "md5_checksum": "b5fe0189dbf00662d78cc55b8b0cc803", + "id": "nmdc:b5fe0189dbf00662d78cc55b8b0cc803", + "file_size_bytes": 256274 + }, + { + "name": "Gp0127646_Centrifuge Krona HTML report", + "description": "Centrifuge Krona HTML report for Gp0127646", + "data_object_type": "Centrifuge Krona Plot", + "url": "https://data.microbiomedata.org/data/nmdc:mga0dm4q17/ReadbasedAnalysis/nmdc_mga0dm4q17_centrifuge_krona.html", + "md5_checksum": "cd10cca62774e66f60d60380ee18132e", + "id": "nmdc:cd10cca62774e66f60d60380ee18132e", + "file_size_bytes": 2333722 + }, + { + "name": "Gp0127646_Kraken classification TSV report", + "description": "Kraken classification TSV report for Gp0127646", + "data_object_type": "Kraken2 Taxonomic Classification", + "url": "https://data.microbiomedata.org/data/nmdc:mga0dm4q17/ReadbasedAnalysis/nmdc_mga0dm4q17_kraken2_classification.tsv", + "md5_checksum": "b13ee2ee52d15c3669aecd2e913f2658", + "id": "nmdc:b13ee2ee52d15c3669aecd2e913f2658", + "file_size_bytes": 1534616616 + }, + { + "name": "Gp0127646_Kraken2 TSV report", + "description": "Kraken2 TSV report for Gp0127646", + "data_object_type": "Kraken2 Classification Report", + "url": "https://data.microbiomedata.org/data/nmdc:mga0dm4q17/ReadbasedAnalysis/nmdc_mga0dm4q17_kraken2_report.tsv", + "md5_checksum": "09a2d722810b3d90207bc4cfa626133b", + "id": "nmdc:09a2d722810b3d90207bc4cfa626133b", + "file_size_bytes": 663507 + }, + { + "name": "Gp0127646_Kraken2 Krona HTML report", + "description": "Kraken2 Krona HTML report for Gp0127646", + "data_object_type": "Kraken2 Krona Plot", + "url": "https://data.microbiomedata.org/data/nmdc:mga0dm4q17/ReadbasedAnalysis/nmdc_mga0dm4q17_kraken2_krona.html", + "md5_checksum": "c3a8d9f48266a43ad74fc581132e2bba", + "id": "nmdc:c3a8d9f48266a43ad74fc581132e2bba", + "file_size_bytes": 4031909 + }, + { + "name": "Gp0127646_Assembled contigs fasta", + "description": "Assembled contigs fasta for Gp0127646", + "data_object_type": "Assembly Contigs", + "url": "https://data.microbiomedata.org/data/nmdc:mga0dm4q17/assembly/nmdc_mga0dm4q17_contigs.fna", + "md5_checksum": "c2301a45b987661e5e6f32eaf6928003", + "id": "nmdc:c2301a45b987661e5e6f32eaf6928003", + "file_size_bytes": 33070670 + }, + { + "name": "Gp0127646_Assembled scaffold fasta", + "description": "Assembled scaffold fasta for Gp0127646", + "data_object_type": "Assembly Scaffolds", + "url": "https://data.microbiomedata.org/data/nmdc:mga0dm4q17/assembly/nmdc_mga0dm4q17_scaffolds.fna", + "md5_checksum": "6233a266773aa722d6a3c2556b0c1cb5", + "id": "nmdc:6233a266773aa722d6a3c2556b0c1cb5", + "file_size_bytes": 32825592 + }, + { + "name": "Gp0127646_Metagenome Contig Coverage Stats", + "description": "Metagenome Contig Coverage Stats for Gp0127646", + "url": "https://data.microbiomedata.org/data/nmdc:mga0dm4q17/assembly/nmdc_mga0dm4q17_covstats.txt", + "md5_checksum": "c5460716df8c1d47e081837c8cc5d281", + "id": "nmdc:c5460716df8c1d47e081837c8cc5d281", + "file_size_bytes": 6393678 + }, + { + "name": "Gp0127646_Assembled AGP file", + "description": "Assembled AGP file for Gp0127646", + "data_object_type": "Assembly AGP", + "url": "https://data.microbiomedata.org/data/nmdc:mga0dm4q17/assembly/nmdc_mga0dm4q17_assembly.agp", + "md5_checksum": "9437132a95f356e7cc6513f862f38f81", + "id": "nmdc:9437132a95f356e7cc6513f862f38f81", + "file_size_bytes": 5942403 + }, + { + "name": "Gp0127646_Metagenome Alignment BAM file", + "description": "Metagenome Alignment BAM file for Gp0127646", + "data_object_type": "Assembly Coverage BAM", + "url": "https://data.microbiomedata.org/data/nmdc:mga0dm4q17/assembly/nmdc_mga0dm4q17_pairedMapped_sorted.bam", + "md5_checksum": "0d0ee85be3a079b0eba5bb872c842f7d", + "id": "nmdc:0d0ee85be3a079b0eba5bb872c842f7d", + "file_size_bytes": 2346665933 + }, + { + "name": "Gp0127646_Protein FAA", + "description": "Protein FAA for Gp0127646", + "data_object_type": "Annotation Amino Acid FASTA", + "url": "https://data.microbiomedata.org/data/nmdc:mga0dm4q17/annotation/nmdc_mga0dm4q17_proteins.faa", + "md5_checksum": "b907352a805a209c5b7e10f6ce9e3ceb", + "id": "nmdc:b907352a805a209c5b7e10f6ce9e3ceb", + "file_size_bytes": 18886480 + }, + { + "name": "Gp0127646_Structural annotation GFF file", + "description": "Structural annotation GFF file for Gp0127646", + "data_object_type": "Structural Annotation GFF", + "url": "https://data.microbiomedata.org/data/nmdc:mga0dm4q17/annotation/nmdc_mga0dm4q17_structural_annotation.gff", + "md5_checksum": "769c049c4b3301900de0c62666e8c297", + "id": "nmdc:769c049c4b3301900de0c62666e8c297", + "file_size_bytes": 2883 + }, + { + "name": "Gp0127646_Functional annotation GFF file", + "description": "Functional annotation GFF file for Gp0127646", + "data_object_type": "Functional Annotation GFF", + "url": "https://data.microbiomedata.org/data/nmdc:mga0dm4q17/annotation/nmdc_mga0dm4q17_functional_annotation.gff", + "md5_checksum": "3dcb9f83f3921fc7f3e7a2050584cc77", + "id": "nmdc:3dcb9f83f3921fc7f3e7a2050584cc77", + "file_size_bytes": 23048582 + }, + { + "name": "Gp0127646_KO TSV file", + "description": "KO TSV file for Gp0127646", + "data_object_type": "Annotation KEGG Orthology", + "url": "https://data.microbiomedata.org/data/nmdc:mga0dm4q17/annotation/nmdc_mga0dm4q17_ko.tsv", + "md5_checksum": "1b81cc955690e81f18c2bc1533e7ee89", + "id": "nmdc:1b81cc955690e81f18c2bc1533e7ee89", + "file_size_bytes": 2643070 + }, + { + "name": "Gp0127646_EC TSV file", + "description": "EC TSV file for Gp0127646", + "data_object_type": "Annotation Enzyme Commission", + "url": "https://data.microbiomedata.org/data/nmdc:mga0dm4q17/annotation/nmdc_mga0dm4q17_ec.tsv", + "md5_checksum": "dd94ee1dbd107bf14e8be72b8f546290", + "id": "nmdc:dd94ee1dbd107bf14e8be72b8f546290", + "file_size_bytes": 1742846 + }, + { + "name": "Gp0127646_COG GFF file", + "description": "COG GFF file for Gp0127646", + "url": "https://data.microbiomedata.org/data/nmdc:mga0dm4q17/annotation/nmdc_mga0dm4q17_cog.gff", + "md5_checksum": "e271f0ef1c44b514304c35a7913751e3", + "id": "nmdc:e271f0ef1c44b514304c35a7913751e3", + "file_size_bytes": 12090733 + }, + { + "name": "Gp0127646_PFAM GFF file", + "description": "PFAM GFF file for Gp0127646", + "url": "https://data.microbiomedata.org/data/nmdc:mga0dm4q17/annotation/nmdc_mga0dm4q17_pfam.gff", + "md5_checksum": "b3d3f1ef308b7555cbea077cc00dbc95", + "id": "nmdc:b3d3f1ef308b7555cbea077cc00dbc95", + "file_size_bytes": 8631888 + }, + { + "name": "Gp0127646_TigrFam GFF file", + "description": "TigrFam GFF file for Gp0127646", + "url": "https://data.microbiomedata.org/data/nmdc:mga0dm4q17/annotation/nmdc_mga0dm4q17_tigrfam.gff", + "md5_checksum": "d18d6a67ad7e17514b0c4b502ea69ac0", + "id": "nmdc:d18d6a67ad7e17514b0c4b502ea69ac0", + "file_size_bytes": 840759 + }, + { + "name": "Gp0127646_SMART GFF file", + "description": "SMART GFF file for Gp0127646", + "url": "https://data.microbiomedata.org/data/nmdc:mga0dm4q17/annotation/nmdc_mga0dm4q17_smart.gff", + "md5_checksum": "62920faf364dea6a1d028878d49a2989", + "id": "nmdc:62920faf364dea6a1d028878d49a2989", + "file_size_bytes": 2684392 + }, + { + "name": "Gp0127646_SuperFam GFF file", + "description": "SuperFam GFF file for Gp0127646", + "url": "https://data.microbiomedata.org/data/nmdc:mga0dm4q17/annotation/nmdc_mga0dm4q17_supfam.gff", + "md5_checksum": "757bd3295026410cb03690d4dae95935", + "id": "nmdc:757bd3295026410cb03690d4dae95935", + "file_size_bytes": 15569120 + }, + { + "name": "Gp0127646_Cath FunFam GFF file", + "description": "Cath FunFam GFF file for Gp0127646", + "url": "https://data.microbiomedata.org/data/nmdc:mga0dm4q17/annotation/nmdc_mga0dm4q17_cath_funfam.gff", + "md5_checksum": "19eef79eefc81cbe6d7d4586d8be5d23", + "id": "nmdc:19eef79eefc81cbe6d7d4586d8be5d23", + "file_size_bytes": 11479737 + }, + { + "name": "Gp0127646_KO_EC GFF file", + "description": "KO_EC GFF file for Gp0127646", + "url": "https://data.microbiomedata.org/data/nmdc:mga0dm4q17/annotation/nmdc_mga0dm4q17_ko_ec.gff", + "md5_checksum": "fc8598d9d6926e6ac8bb9c488016734a", + "id": "nmdc:fc8598d9d6926e6ac8bb9c488016734a", + "file_size_bytes": 8425263 + }, + { + "name": "gold:Gp0452679_metabat2 bin checkm quality assessment result", + "description": "metabat2 bin checkm quality assessment result for gold:Gp0452679", + "data_object_type": "CheckM Statistics", + "url": "https://data.microbiomedata.org/data/nmdc:mga0fsjy84/MAGs/nmdc_mga0fsjy84_checkm_qa.out", + "md5_checksum": "d41d8cd98f00b204e9800998ecf8427e", + "id": "nmdc:d41d8cd98f00b204e9800998ecf8427e", + "file_size_bytes": 0 + }, + { + "name": "Gp0127646_tooShort (< 3kb) filtered contigs fasta file by metaBat2", + "description": "tooShort (< 3kb) filtered contigs fasta file by metaBat2 for Gp0127646", + "url": "https://data.microbiomedata.org/data/nmdc:mga0dm4q17/MAGs/nmdc_mga0dm4q17_bins.tooShort.fa", + "md5_checksum": "de605dd3ecac26d6a35740c09448b171", + "id": "nmdc:de605dd3ecac26d6a35740c09448b171", + "file_size_bytes": 31210054 + }, + { + "name": "Gp0127646_unbinned fasta file from metabat2", + "description": "unbinned fasta file from metabat2 for Gp0127646", + "url": "https://data.microbiomedata.org/data/nmdc:mga0dm4q17/MAGs/nmdc_mga0dm4q17_bins.unbinned.fa", + "md5_checksum": "9392ab9668a1c347f010004c2f0cc8db", + "id": "nmdc:9392ab9668a1c347f010004c2f0cc8db", + "file_size_bytes": 1595698 + }, + { + "name": "Gp0127647_metabat2 bin checkm quality assessment result", + "description": "metabat2 bin checkm quality assessment result for Gp0127647", + "data_object_type": "CheckM Statistics", + "url": "https://data.microbiomedata.org/data/nmdc:mga0g0e588/MAGs/nmdc_mga0g0e588_checkm_qa.out", + "md5_checksum": "e8bdcd7b113a14b29a3026b73cd18c20", + "id": "nmdc:e8bdcd7b113a14b29a3026b73cd18c20", + "file_size_bytes": 775 + }, + { + "name": "Gp0127646_high-quality and medium-quality bins", + "description": "high-quality and medium-quality bins for Gp0127646", + "data_object_type": "Metagenome Bins", + "url": "https://data.microbiomedata.org/data/nmdc:mga0dm4q17/MAGs/nmdc_mga0dm4q17_hqmq_bin.zip", + "md5_checksum": "d75d0006d0009e7e14f2ad8044a3cbfb", + "id": "nmdc:d75d0006d0009e7e14f2ad8044a3cbfb", + "file_size_bytes": 182 + }, + { + "name": "Gp0127646_metabat2 bins", + "description": "metabat2 bins for Gp0127646", + "url": "https://data.microbiomedata.org/data/nmdc:mga0dm4q17/MAGs/nmdc_mga0dm4q17_metabat_bin.zip", + "md5_checksum": "17e9a7763327f2b5d3f841079c2f68d8", + "id": "nmdc:17e9a7763327f2b5d3f841079c2f68d8", + "file_size_bytes": 82006 + }, + { + "_id": { + "$oid": "649b003d1ae706d7b5b14ea9" + }, + "description": "Assembled contigs fasta for gold:Gp0127646", + "url": "https://data.microbiomedata.org/data/1781_100348/assembly/assembly_contigs.fna", + "file_size_bytes": 32744062, + "type": "nmdc:DataObject", + "id": "nmdc:cfb56be5f505927c085fb3105561b578", + "name": "assembly_contigs.fna", + "data_object_type": "Assembly Contigs" + }, + { + "_id": { + "$oid": "649b003d1ae706d7b5b14eab" + }, + "description": "Assembled scaffold fasta for gold:Gp0127646", + "url": "https://data.microbiomedata.org/data/1781_100348/assembly/assembly_scaffolds.fna", + "file_size_bytes": 32499084, + "type": "nmdc:DataObject", + "id": "nmdc:fda96a730e2bfe0ced5e4ff057aae5d3", + "name": "assembly_scaffolds.fna", + "data_object_type": "Assembly Scaffolds" + }, + { + "_id": { + "$oid": "649b003d1ae706d7b5b14eac" + }, + "description": "Assembled AGP file for gold:Gp0127646", + "url": "https://data.microbiomedata.org/data/1781_100348/assembly/assembly.agp", + "file_size_bytes": 5288971, + "type": "nmdc:DataObject", + "id": "nmdc:5cd6af80a19658f0fa7b8229f6ba8242", + "name": "assembly.agp", + "data_object_type": "Assembly AGP" + }, + { + "_id": { + "$oid": "649b003d1ae706d7b5b14ead" + }, + "description": "Metagenome Alignment BAM file for gold:Gp0127646", + "url": "https://data.microbiomedata.org/data/1781_100348/assembly/pairedMapped_sorted.bam", + "file_size_bytes": 2314898025, + "type": "nmdc:DataObject", + "id": "nmdc:3c03b1fab29a1825d07195ca4992fb31", + "name": "pairedMapped_sorted.bam", + "data_object_type": "Assembly Coverage BAM" + }, + { + "_id": { + "$oid": "649b003d1ae706d7b5b14eb9" + }, + "description": "Metagenome Contig Coverage Stats for gold:Gp0127646", + "url": "https://data.microbiomedata.org/data/1781_100348/assembly/mapping_stats.txt", + "file_size_bytes": 6067070, + "type": "nmdc:DataObject", + "id": "nmdc:e17989e7cc9952a4f9d8321328229316", + "name": "mapping_stats.txt", + "data_object_type": "Assembly Coverage Stats" + }, + { + "_id": { + "$oid": "649b003d1ae706d7b5b15bb5" + }, + "id": "nmdc:bfb473bea17c38bdb8fc5e394c1021b7", + "name": "1781_100348.json", + "description": "Gold:Gp0127646 ReadbasedAnalysis result JSON file", + "url": "https://data.microbiomedata.org/1781_100348/ReadbasedAnalysis/1781_100348.json", + "file_size_bytes": 3385, + "type": "nmdc:DataObject" + }, + { + "_id": { + "$oid": "649b003d1ae706d7b5b15bb6" + }, + "id": "nmdc:26bb18215b48754fcd58dbb38e8d01e1", + "name": "1781_100348.krona.html", + "description": "Gold:Gp0127646 KRONA plot HTML file", + "url": "https://data.microbiomedata.org/1781_100348/ReadbasedAnalysis/centrifuge/1781_100348.krona.html", + "file_size_bytes": 3385, + "data_object_type": "Centrifuge Krona Plot", + "type": "nmdc:DataObject" + }, + { + "_id": { + "$oid": "649b003f1ae706d7b5b1662b" + }, + "id": "nmdc:10a6a1f1b3bf7cc4960ad08e0914edc7", + "name": "bins.tooShort.fa", + "description": "tooShort (< 3kb) filtered contigs fasta file by metaBat2 for gold:Gp0127646", + "file_size_bytes": 30170371, + "url": "https://data.microbiomedata.org/data/1781_100348/img_MAGs/bins.tooShort.fa", + "type": "nmdc:DataObject" + }, + { + "_id": { + "$oid": "649b003f1ae706d7b5b16631" + }, + "id": "nmdc:dc8ebe9a5e1a8e38d88a63afbc535046", + "name": "bins.unbinned.fa", + "description": "unbinned fasta file from metabat2 for gold:Gp0127646", + "file_size_bytes": 1849966, + "url": "https://data.microbiomedata.org/data/1781_100348/img_MAGs/bins.unbinned.fa", + "type": "nmdc:DataObject" + }, + { + "_id": { + "$oid": "649b00401ae706d7b5b16d27" + }, + "description": "KO TSV File for gold:Gp0127646", + "url": "https://data.microbiomedata.org/1781_100348/img_annotation/Ga0482224_ko.tsv", + "md5_checksum": "5cd2f970cbb8eb5d8e52ac7a08bfb9a3", + "file_size_bytes": 3385, + "id": "nmdc:5cd2f970cbb8eb5d8e52ac7a08bfb9a3", + "name": "gold:Gp0127646_KO TSV File", + "data_object_type": "Annotation KEGG Orthology", + "type": "nmdc:DataObject" + }, + { + "_id": { + "$oid": "649b00401ae706d7b5b16d28" + }, + "description": "EC TSV File for gold:Gp0127646", + "url": "https://data.microbiomedata.org/1781_100348/img_annotation/Ga0482224_ec.tsv", + "md5_checksum": "9d87100ad8b6278b4a442c4686d7aef7", + "file_size_bytes": 3385, + "id": "nmdc:9d87100ad8b6278b4a442c4686d7aef7", + "name": "gold:Gp0127646_EC TSV File", + "data_object_type": "Annotation Enzyme Commission", + "type": "nmdc:DataObject" + }, + { + "_id": { + "$oid": "649b00401ae706d7b5b16d2b" + }, + "description": "Functional annotation GFF file for gold:Gp0127646", + "url": "https://data.microbiomedata.org/1781_100348/img_annotation/Ga0482224_functional_annotation.gff", + "md5_checksum": "c0858f9a847f241ed28f454adb580bf4", + "file_size_bytes": 3385, + "id": "nmdc:c0858f9a847f241ed28f454adb580bf4", + "name": "gold:Gp0127646_Functional annotation GFF file", + "data_object_type": "Functional Annotation GFF", + "type": "nmdc:DataObject" + }, + { + "_id": { + "$oid": "649b00401ae706d7b5b16d2f" + }, + "description": "Protein FAA for gold:Gp0127646", + "url": "https://data.microbiomedata.org/1781_100348/img_annotation/Ga0482224_proteins.faa", + "md5_checksum": "646648c11733f7ab7ea23008729360ce", + "file_size_bytes": 3385, + "id": "nmdc:646648c11733f7ab7ea23008729360ce", + "name": "gold:Gp0127646_Protein FAA", + "data_object_type": "Annotation Amino Acid FASTA", + "type": "nmdc:DataObject" + }, + { + "_id": { + "$oid": "649b00401ae706d7b5b16d35" + }, + "description": "Structural annotation GFF file for gold:Gp0127646", + "url": "https://data.microbiomedata.org/1781_100348/img_annotation/Ga0482224_structural_annotation.gff", + "md5_checksum": "94574634e1ccfe241af033259e27df1a", + "file_size_bytes": 3385, + "id": "nmdc:94574634e1ccfe241af033259e27df1a", + "name": "gold:Gp0127646_Structural annotation GFF file", + "data_object_type": "Structural Annotation GFF", + "type": "nmdc:DataObject" + } + ], + "mags_activity_set": [ + { + "_id": { + "$oid": "649b0052ec087f6bbab34715" + }, + "has_input": [ + "nmdc:c2301a45b987661e5e6f32eaf6928003", + "nmdc:0d0ee85be3a079b0eba5bb872c842f7d", + "nmdc:3dcb9f83f3921fc7f3e7a2050584cc77" + ], + "too_short_contig_num": 80674, + "part_of": [ + "nmdc:mga0dm4q17" + ], + "binned_contig_num": 20, + "git_url": "https://github.com/microbiomedata/metaG/releases/tag/0.1", + "has_output": [ + "nmdc:d41d8cd98f00b204e9800998ecf8427e", + "nmdc:de605dd3ecac26d6a35740c09448b171", + "nmdc:9392ab9668a1c347f010004c2f0cc8db", + "nmdc:e8bdcd7b113a14b29a3026b73cd18c20", + "nmdc:d75d0006d0009e7e14f2ad8044a3cbfb", + "nmdc:17e9a7763327f2b5d3f841079c2f68d8" + ], + "was_informed_by": "gold:Gp0127646", + "input_contig_num": 81652, + "id": "nmdc:1a0a2a1b5db9455e9dbbacf102059180", + "execution_resource": "NERSC-Cori", + "name": "MAGs Analysis Activity for nmdc:mga0dm4q17", + "mags_list": [ + { + "number_of_contig": 20, + "completeness": 1.36, + "bin_name": "bins.1", + "gene_count": 275, + "bin_quality": "LQ", + "gtdbtk_species": "", + "gtdbtk_order": "", + "num_16s": 1, + "gtdbtk_family": "", + "gtdbtk_domain": "", + "contamination": 0.0, + "gtdbtk_class": "", + "gtdbtk_phylum": "", + "num_5s": 0, + "num_23s": 2, + "gtdbtk_genus": "", + "num_t_rna": 10 + } + ], + "unbinned_contig_num": 958, + "started_at_time": "2021-10-11T02:23:42Z", + "type": "nmdc:MAGsAnalysisActivity", + "ended_at_time": "2021-10-11T04:05:12+00:00" + } + ], + "metagenome_annotation_activity_set": [ + { + "_id": { + "$oid": "649b005bbf2caae0415ef9b2" + }, + "has_input": [ + "nmdc:c2301a45b987661e5e6f32eaf6928003" + ], + "part_of": [ + "nmdc:mga0dm4q17" + ], + "git_url": "https://github.com/microbiomedata/metaG/releases/tag/0.1", + "has_output": [ + "nmdc:b907352a805a209c5b7e10f6ce9e3ceb", + "nmdc:769c049c4b3301900de0c62666e8c297", + "nmdc:3dcb9f83f3921fc7f3e7a2050584cc77", + "nmdc:1b81cc955690e81f18c2bc1533e7ee89", + "nmdc:dd94ee1dbd107bf14e8be72b8f546290", + "nmdc:e271f0ef1c44b514304c35a7913751e3", + "nmdc:b3d3f1ef308b7555cbea077cc00dbc95", + "nmdc:d18d6a67ad7e17514b0c4b502ea69ac0", + "nmdc:62920faf364dea6a1d028878d49a2989", + "nmdc:757bd3295026410cb03690d4dae95935", + "nmdc:19eef79eefc81cbe6d7d4586d8be5d23", + "nmdc:fc8598d9d6926e6ac8bb9c488016734a" + ], + "was_informed_by": "gold:Gp0127646", + "id": "nmdc:1a0a2a1b5db9455e9dbbacf102059180", + "execution_resource": "NERSC-Cori", + "name": "Annotation Activity for nmdc:mga0dm4q17", + "started_at_time": "2021-10-11T02:23:42Z", + "type": "nmdc:MetagenomeAnnotationActivity", + "ended_at_time": "2021-10-11T04:05:12+00:00" + } + ], + "metagenome_assembly_set": [ + { + "_id": { + "$oid": "649b005f2ca5ee4adb139f9d" + }, + "has_input": [ + "nmdc:208a3777ef0b99408f0d5832dee576e0" + ], + "part_of": [ + "nmdc:mga0dm4q17" + ], + "ctg_logsum": 20856, + "scaf_logsum": 20954, + "gap_pct": 0.00116, + "git_url": "https://github.com/microbiomedata/metaG/releases/tag/0.1", + "has_output": [ + "nmdc:c2301a45b987661e5e6f32eaf6928003", + "nmdc:6233a266773aa722d6a3c2556b0c1cb5", + "nmdc:c5460716df8c1d47e081837c8cc5d281", + "nmdc:9437132a95f356e7cc6513f862f38f81", + "nmdc:0d0ee85be3a079b0eba5bb872c842f7d" + ], + "asm_score": 17.863, + "was_informed_by": "gold:Gp0127646", + "ctg_powsum": 2534.931, + "scaf_max": 88400, + "id": "nmdc:1a0a2a1b5db9455e9dbbacf102059180", + "scaf_powsum": 2545.156, + "execution_resource": "NERSC-Cori", + "contigs": 81653, + "name": "Assembly Activity for nmdc:mga0dm4q17", + "ctg_max": 88400, + "gc_std": 0.13273, + "contig_bp": 30097213, + "gc_avg": 0.55961, + "started_at_time": "2021-10-11T02:23:42Z", + "scaf_bp": 30097563, + "type": "nmdc:MetagenomeAssembly", + "scaffolds": 81627, + "ended_at_time": "2021-10-11T04:05:12+00:00", + "ctg_l50": 332, + "ctg_l90": 282, + "ctg_n50": 30532, + "ctg_n90": 71638, + "scaf_l50": 332, + "scaf_l90": 282, + "scaf_n50": 30518, + "scaf_n90": 71614, + "scaf_l_gt50k": 150260, + "scaf_n_gt50k": 2, + "scaf_pct_gt50k": 0.49924305 + } + ], + "omics_processing_set": [ + { + "_id": { + "$oid": "649b009773e8249959349b5a" + }, + "id": "nmdc:omprc-11-vs67yj43", + "name": "Riverbed sediment microbial communities from areas with no vegetation in Columbia River, Washington, USA - GW-RW N3_0_10", + "description": "Riverbed sediment samples were collected from an area with no vegetation in a hyporheic zone (subsurface groundwater - surface water mixing zone)", + "has_input": [ + "nmdc:bsm-11-0n5nks24" + ], + "has_output": [ + "jgi:574fde867ded5e3df1ee1420" + ], + "part_of": [ + "nmdc:sty-11-aygzgv51" + ], + "add_date": "2016-01-11", + "mod_date": "2021-06-15", + "ncbi_project_name": "Riverbed sediment microbial communities from areas with no vegetation in Columbia River, Washington, USA - GW-RW N3_0_10", + "omics_type": { + "has_raw_value": "Metagenome" + }, + "principal_investigator": { + "has_raw_value": "James Stegen" + }, + "processing_institution": "JGI", + "type": "nmdc:OmicsProcessing", + "gold_sequencing_project_identifiers": [ + "gold:Gp0127646" + ] + } + ], + "read_qc_analysis_activity_set": [ + { + "_id": { + "$oid": "649b009d6bdd4fd20273c86f" + }, + "has_input": [ + "nmdc:94b1d19ad74cfb1be53ebb45dcf5f70c" + ], + "part_of": [ + "nmdc:mga0dm4q17" + ], + "git_url": "https://github.com/microbiomedata/metaG/releases/tag/0.1", + "has_output": [ + "nmdc:208a3777ef0b99408f0d5832dee576e0", + "nmdc:8533a56006bdc1841b6fc16e99b6a84a" + ], + "was_informed_by": "gold:Gp0127646", + "input_read_count": 27835800, + "output_read_bases": 3867340900, + "id": "nmdc:1a0a2a1b5db9455e9dbbacf102059180", + "execution_resource": "NERSC-Cori", + "input_read_bases": 4203205800, + "name": "Read QC Activity for nmdc:mga0dm4q17", + "output_read_count": 25862834, + "started_at_time": "2021-10-11T02:23:42Z", + "type": "nmdc:ReadQCAnalysisActivity", + "ended_at_time": "2021-10-11T04:05:12+00:00" + } + ], + "read_based_taxonomy_analysis_activity_set": [ + { + "_id": { + "$oid": "649b009bff710ae353f8cf33" + }, + "has_input": [ + "nmdc:208a3777ef0b99408f0d5832dee576e0" + ], + "git_url": "https://github.com/microbiomedata/metaG/releases/tag/0.1", + "has_output": [ + "nmdc:3e0598df41941463bac0fdec5df29f55", + "nmdc:1a625b148d8f6d9fe9aeab6cfb67df6c", + "nmdc:bc8e157195d042d7207d67b4982fea96", + "nmdc:a8fc683bb9b3aba316cb605c5fb591ec", + "nmdc:b5fe0189dbf00662d78cc55b8b0cc803", + "nmdc:cd10cca62774e66f60d60380ee18132e", + "nmdc:b13ee2ee52d15c3669aecd2e913f2658", + "nmdc:09a2d722810b3d90207bc4cfa626133b", + "nmdc:c3a8d9f48266a43ad74fc581132e2bba" + ], + "was_informed_by": "gold:Gp0127646", + "id": "nmdc:1a0a2a1b5db9455e9dbbacf102059180", + "execution_resource": "NERSC-Cori", + "name": "ReadBased Analysis Activity for nmdc:mga0dm4q17", + "started_at_time": "2021-10-11T02:23:42Z", + "type": "nmdc:ReadbasedAnalysis", + "ended_at_time": "2021-10-11T04:05:12+00:00" + } + ] + }, + { + "data_object_set": [ + { + "description": "Raw sequencer read data", + "file_size_bytes": 2446032142, + "type": "nmdc:DataObject", + "id": "jgi:574fe0ac7ded5e3df1ee1491", + "name": "10533.3.165334.GTAACGA-GTCGTTA.fastq.gz" + }, + { + "name": "Gp0127648_Filtered Reads", + "description": "Filtered Reads for Gp0127648", + "data_object_type": "Filtered Sequencing Reads", + "url": "https://data.microbiomedata.org/data/nmdc:mga0andh11/qa/nmdc_mga0andh11_filtered.fastq.gz", + "md5_checksum": "fcc3a92dd2b6ab6045f4be27da6f2cdd", + "id": "nmdc:fcc3a92dd2b6ab6045f4be27da6f2cdd", + "file_size_bytes": 2191252492 + }, + { + "name": "Gp0127648_Filtered Stats", + "description": "Filtered Stats for Gp0127648", + "data_object_type": "QC Statistics", + "url": "https://data.microbiomedata.org/data/nmdc:mga0andh11/qa/nmdc_mga0andh11_filterStats.txt", + "md5_checksum": "2208c88cac6b941799d4492dbf5f0887", + "id": "nmdc:2208c88cac6b941799d4492dbf5f0887", + "file_size_bytes": 289 + }, + { + "name": "Gp0127648_Gottcha2 TSV report", + "description": "Gottcha2 TSV report for Gp0127648", + "url": "https://data.microbiomedata.org/data/nmdc:mga0andh11/ReadbasedAnalysis/nmdc_mga0andh11_gottcha2_report.tsv", + "md5_checksum": "5e64b9ccf92f0c974c51bd8393dea50c", + "id": "nmdc:5e64b9ccf92f0c974c51bd8393dea50c", + "file_size_bytes": 3323 + }, + { + "name": "Gp0127648_Gottcha2 full TSV report", + "description": "Gottcha2 full TSV report for Gp0127648", + "url": "https://data.microbiomedata.org/data/nmdc:mga0andh11/ReadbasedAnalysis/nmdc_mga0andh11_gottcha2_report_full.tsv", + "md5_checksum": "1357df297d8d8a872b335e0c3222d102", + "id": "nmdc:1357df297d8d8a872b335e0c3222d102", + "file_size_bytes": 782039 + }, + { + "name": "Gp0127648_Gottcha2 Krona HTML report", + "description": "Gottcha2 Krona HTML report for Gp0127648", + "data_object_type": "GOTTCHA2 Krona Plot", + "url": "https://data.microbiomedata.org/data/nmdc:mga0andh11/ReadbasedAnalysis/nmdc_mga0andh11_gottcha2_krona.html", + "md5_checksum": "5b510e336e60b6120b43e9b6420a074e", + "id": "nmdc:5b510e336e60b6120b43e9b6420a074e", + "file_size_bytes": 236971 + }, + { + "name": "Gp0127648_Centrifuge classification TSV report", + "description": "Centrifuge classification TSV report for Gp0127648", + "data_object_type": "Centrifuge Taxonomic Classification", + "url": "https://data.microbiomedata.org/data/nmdc:mga0andh11/ReadbasedAnalysis/nmdc_mga0andh11_centrifuge_classification.tsv", + "md5_checksum": "33bf814280051c220e0c4a06f7935728", + "id": "nmdc:33bf814280051c220e0c4a06f7935728", + "file_size_bytes": 1945479328 + }, + { + "name": "Gp0127648_Centrifuge TSV report", + "description": "Centrifuge TSV report for Gp0127648", + "data_object_type": "Centrifuge Classification Report", + "url": "https://data.microbiomedata.org/data/nmdc:mga0andh11/ReadbasedAnalysis/nmdc_mga0andh11_centrifuge_report.tsv", + "md5_checksum": "e77a1d052b0d2a99e0a1df3b3c038f7c", + "id": "nmdc:e77a1d052b0d2a99e0a1df3b3c038f7c", + "file_size_bytes": 255338 + }, + { + "name": "Gp0127648_Centrifuge Krona HTML report", + "description": "Centrifuge Krona HTML report for Gp0127648", + "data_object_type": "Centrifuge Krona Plot", + "url": "https://data.microbiomedata.org/data/nmdc:mga0andh11/ReadbasedAnalysis/nmdc_mga0andh11_centrifuge_krona.html", + "md5_checksum": "0efb0ad19234056d7e2e3726dead3622", + "id": "nmdc:0efb0ad19234056d7e2e3726dead3622", + "file_size_bytes": 2333371 + }, + { + "name": "Gp0127648_Kraken classification TSV report", + "description": "Kraken classification TSV report for Gp0127648", + "data_object_type": "Kraken2 Taxonomic Classification", + "url": "https://data.microbiomedata.org/data/nmdc:mga0andh11/ReadbasedAnalysis/nmdc_mga0andh11_kraken2_classification.tsv", + "md5_checksum": "222bac312efdd6c86d2475ad224b7907", + "id": "nmdc:222bac312efdd6c86d2475ad224b7907", + "file_size_bytes": 1562011343 + }, + { + "name": "Gp0127648_Kraken2 TSV report", + "description": "Kraken2 TSV report for Gp0127648", + "data_object_type": "Kraken2 Classification Report", + "url": "https://data.microbiomedata.org/data/nmdc:mga0andh11/ReadbasedAnalysis/nmdc_mga0andh11_kraken2_report.tsv", + "md5_checksum": "baaca868b1fed932b463e489708dd741", + "id": "nmdc:baaca868b1fed932b463e489708dd741", + "file_size_bytes": 647859 + }, + { + "name": "Gp0127648_Kraken2 Krona HTML report", + "description": "Kraken2 Krona HTML report for Gp0127648", + "data_object_type": "Kraken2 Krona Plot", + "url": "https://data.microbiomedata.org/data/nmdc:mga0andh11/ReadbasedAnalysis/nmdc_mga0andh11_kraken2_krona.html", + "md5_checksum": "b549d169e5b0693152555373a6d8ee75", + "id": "nmdc:b549d169e5b0693152555373a6d8ee75", + "file_size_bytes": 3952548 + }, + { + "name": "Gp0127648_Assembled contigs fasta", + "description": "Assembled contigs fasta for Gp0127648", + "data_object_type": "Assembly Contigs", + "url": "https://data.microbiomedata.org/data/nmdc:mga0andh11/assembly/nmdc_mga0andh11_contigs.fna", + "md5_checksum": "ca10f7bae0565946414188c9121ee338", + "id": "nmdc:ca10f7bae0565946414188c9121ee338", + "file_size_bytes": 67439267 + }, + { + "name": "Gp0127648_Assembled scaffold fasta", + "description": "Assembled scaffold fasta for Gp0127648", + "data_object_type": "Assembly Scaffolds", + "url": "https://data.microbiomedata.org/data/nmdc:mga0andh11/assembly/nmdc_mga0andh11_scaffolds.fna", + "md5_checksum": "cf23062373806986b70244b1fabbd17b", + "id": "nmdc:cf23062373806986b70244b1fabbd17b", + "file_size_bytes": 66996134 + }, + { + "name": "Gp0127648_Metagenome Contig Coverage Stats", + "description": "Metagenome Contig Coverage Stats for Gp0127648", + "url": "https://data.microbiomedata.org/data/nmdc:mga0andh11/assembly/nmdc_mga0andh11_covstats.txt", + "md5_checksum": "99b2c3c91b299b9426cca9dfb10b0cea", + "id": "nmdc:99b2c3c91b299b9426cca9dfb10b0cea", + "file_size_bytes": 11610674 + }, + { + "name": "Gp0127648_Assembled AGP file", + "description": "Assembled AGP file for Gp0127648", + "data_object_type": "Assembly AGP", + "url": "https://data.microbiomedata.org/data/nmdc:mga0andh11/assembly/nmdc_mga0andh11_assembly.agp", + "md5_checksum": "303d7282e6f91afaa9564c65107d4086", + "id": "nmdc:303d7282e6f91afaa9564c65107d4086", + "file_size_bytes": 10842402 + }, + { + "name": "Gp0127648_Metagenome Alignment BAM file", + "description": "Metagenome Alignment BAM file for Gp0127648", + "data_object_type": "Assembly Coverage BAM", + "url": "https://data.microbiomedata.org/data/nmdc:mga0andh11/assembly/nmdc_mga0andh11_pairedMapped_sorted.bam", + "md5_checksum": "4a6ffadb01b62dd73278429808c1a39a", + "id": "nmdc:4a6ffadb01b62dd73278429808c1a39a", + "file_size_bytes": 2362185094 + }, + { + "name": "Gp0127648_Protein FAA", + "description": "Protein FAA for Gp0127648", + "data_object_type": "Annotation Amino Acid FASTA", + "url": "https://data.microbiomedata.org/data/nmdc:mga0andh11/annotation/nmdc_mga0andh11_proteins.faa", + "md5_checksum": "c4a719f3a899f7aa760f627f7b1ae6e7", + "id": "nmdc:c4a719f3a899f7aa760f627f7b1ae6e7", + "file_size_bytes": 40118426 + }, + { + "name": "Gp0127648_Structural annotation GFF file", + "description": "Structural annotation GFF file for Gp0127648", + "data_object_type": "Structural Annotation GFF", + "url": "https://data.microbiomedata.org/data/nmdc:mga0andh11/annotation/nmdc_mga0andh11_structural_annotation.gff", + "md5_checksum": "80ab4116b1cdfbc3e4c4d06e5990d735", + "id": "nmdc:80ab4116b1cdfbc3e4c4d06e5990d735", + "file_size_bytes": 2507 + }, + { + "name": "Gp0127648_Functional annotation GFF file", + "description": "Functional annotation GFF file for Gp0127648", + "data_object_type": "Functional Annotation GFF", + "url": "https://data.microbiomedata.org/data/nmdc:mga0andh11/annotation/nmdc_mga0andh11_functional_annotation.gff", + "md5_checksum": "600011ab7e39465d3f9f28d5d93a4248", + "id": "nmdc:600011ab7e39465d3f9f28d5d93a4248", + "file_size_bytes": 47178055 + }, + { + "name": "Gp0127648_KO TSV file", + "description": "KO TSV file for Gp0127648", + "data_object_type": "Annotation KEGG Orthology", + "url": "https://data.microbiomedata.org/data/nmdc:mga0andh11/annotation/nmdc_mga0andh11_ko.tsv", + "md5_checksum": "0c8d98b369900cd19da39235e3eae6db", + "id": "nmdc:0c8d98b369900cd19da39235e3eae6db", + "file_size_bytes": 5498487 + }, + { + "name": "Gp0127648_EC TSV file", + "description": "EC TSV file for Gp0127648", + "data_object_type": "Annotation Enzyme Commission", + "url": "https://data.microbiomedata.org/data/nmdc:mga0andh11/annotation/nmdc_mga0andh11_ec.tsv", + "md5_checksum": "16c37f8c4f74e7e81b7900536da55e39", + "id": "nmdc:16c37f8c4f74e7e81b7900536da55e39", + "file_size_bytes": 3650457 + }, + { + "name": "Gp0127648_COG GFF file", + "description": "COG GFF file for Gp0127648", + "url": "https://data.microbiomedata.org/data/nmdc:mga0andh11/annotation/nmdc_mga0andh11_cog.gff", + "md5_checksum": "a7fc228cd8d224bbf2843ba6a6648480", + "id": "nmdc:a7fc228cd8d224bbf2843ba6a6648480", + "file_size_bytes": 27226505 + }, + { + "name": "Gp0127648_PFAM GFF file", + "description": "PFAM GFF file for Gp0127648", + "url": "https://data.microbiomedata.org/data/nmdc:mga0andh11/annotation/nmdc_mga0andh11_pfam.gff", + "md5_checksum": "a57d9d86c20cfd13ddc56027110485ba", + "id": "nmdc:a57d9d86c20cfd13ddc56027110485ba", + "file_size_bytes": 19896169 + }, + { + "name": "Gp0127648_TigrFam GFF file", + "description": "TigrFam GFF file for Gp0127648", + "url": "https://data.microbiomedata.org/data/nmdc:mga0andh11/annotation/nmdc_mga0andh11_tigrfam.gff", + "md5_checksum": "6a4be27e2e7454941b73aa843471f25d", + "id": "nmdc:6a4be27e2e7454941b73aa843471f25d", + "file_size_bytes": 2105656 + }, + { + "name": "Gp0127648_SMART GFF file", + "description": "SMART GFF file for Gp0127648", + "url": "https://data.microbiomedata.org/data/nmdc:mga0andh11/annotation/nmdc_mga0andh11_smart.gff", + "md5_checksum": "be3d2a77be3ccd810d679f03204f8bac", + "id": "nmdc:be3d2a77be3ccd810d679f03204f8bac", + "file_size_bytes": 6062323 + }, + { + "name": "Gp0127648_SuperFam GFF file", + "description": "SuperFam GFF file for Gp0127648", + "url": "https://data.microbiomedata.org/data/nmdc:mga0andh11/annotation/nmdc_mga0andh11_supfam.gff", + "md5_checksum": "eb5ac02ce17f687c5ccf5a64548c559e", + "id": "nmdc:eb5ac02ce17f687c5ccf5a64548c559e", + "file_size_bytes": 33896425 + }, + { + "name": "Gp0127648_Cath FunFam GFF file", + "description": "Cath FunFam GFF file for Gp0127648", + "url": "https://data.microbiomedata.org/data/nmdc:mga0andh11/annotation/nmdc_mga0andh11_cath_funfam.gff", + "md5_checksum": "81ff9f257ffe63ca5d04db9e767620b1", + "id": "nmdc:81ff9f257ffe63ca5d04db9e767620b1", + "file_size_bytes": 25515156 + }, + { + "name": "Gp0127648_KO_EC GFF file", + "description": "KO_EC GFF file for Gp0127648", + "url": "https://data.microbiomedata.org/data/nmdc:mga0andh11/annotation/nmdc_mga0andh11_ko_ec.gff", + "md5_checksum": "8768f37ff001a86a25ae34c7deee9d9a", + "id": "nmdc:8768f37ff001a86a25ae34c7deee9d9a", + "file_size_bytes": 17491444 + }, + { + "name": "gold:Gp0452679_metabat2 bin checkm quality assessment result", + "description": "metabat2 bin checkm quality assessment result for gold:Gp0452679", + "data_object_type": "CheckM Statistics", + "url": "https://data.microbiomedata.org/data/nmdc:mga0fsjy84/MAGs/nmdc_mga0fsjy84_checkm_qa.out", + "md5_checksum": "d41d8cd98f00b204e9800998ecf8427e", + "id": "nmdc:d41d8cd98f00b204e9800998ecf8427e", + "file_size_bytes": 0 + }, + { + "name": "Gp0127648_tooShort (< 3kb) filtered contigs fasta file by metaBat2", + "description": "tooShort (< 3kb) filtered contigs fasta file by metaBat2 for Gp0127648", + "url": "https://data.microbiomedata.org/data/nmdc:mga0andh11/MAGs/nmdc_mga0andh11_bins.tooShort.fa", + "md5_checksum": "8b67e5038c55083e2aa8e19c5d05fef8", + "id": "nmdc:8b67e5038c55083e2aa8e19c5d05fef8", + "file_size_bytes": 58962192 + }, + { + "name": "Gp0127648_unbinned fasta file from metabat2", + "description": "unbinned fasta file from metabat2 for Gp0127648", + "url": "https://data.microbiomedata.org/data/nmdc:mga0andh11/MAGs/nmdc_mga0andh11_bins.unbinned.fa", + "md5_checksum": "fc8454a790709b36d7ca96cd99359d26", + "id": "nmdc:fc8454a790709b36d7ca96cd99359d26", + "file_size_bytes": 6656731 + }, + { + "name": "Gp0127648_metabat2 bin checkm quality assessment result", + "description": "metabat2 bin checkm quality assessment result for Gp0127648", + "data_object_type": "CheckM Statistics", + "url": "https://data.microbiomedata.org/data/nmdc:mga0andh11/MAGs/nmdc_mga0andh11_checkm_qa.out", + "md5_checksum": "942bd7c28c52e6301bf97dab0ea2852a", + "id": "nmdc:942bd7c28c52e6301bf97dab0ea2852a", + "file_size_bytes": 930 + }, + { + "name": "Gp0127648_high-quality and medium-quality bins", + "description": "high-quality and medium-quality bins for Gp0127648", + "data_object_type": "Metagenome Bins", + "url": "https://data.microbiomedata.org/data/nmdc:mga0andh11/MAGs/nmdc_mga0andh11_hqmq_bin.zip", + "md5_checksum": "82ebf9065be9715e1230a50bf7a02197", + "id": "nmdc:82ebf9065be9715e1230a50bf7a02197", + "file_size_bytes": 466157 + }, + { + "name": "Gp0127648_metabat2 bins", + "description": "metabat2 bins for Gp0127648", + "url": "https://data.microbiomedata.org/data/nmdc:mga0andh11/MAGs/nmdc_mga0andh11_metabat_bin.zip", + "md5_checksum": "897536007e7e3525457df5d3baddd593", + "id": "nmdc:897536007e7e3525457df5d3baddd593", + "file_size_bytes": 90255 + }, + { + "_id": { + "$oid": "649b003d1ae706d7b5b14eb3" + }, + "description": "Metagenome Contig Coverage Stats for gold:Gp0127648", + "url": "https://data.microbiomedata.org/data/1781_100350/assembly/mapping_stats.txt", + "file_size_bytes": 11021314, + "type": "nmdc:DataObject", + "id": "nmdc:c12e8ed1aade1238318952b0c311cff1", + "name": "mapping_stats.txt", + "data_object_type": "Assembly Coverage Stats" + }, + { + "_id": { + "$oid": "649b003d1ae706d7b5b14eb5" + }, + "description": "Assembled contigs fasta for gold:Gp0127648", + "url": "https://data.microbiomedata.org/data/1781_100350/assembly/assembly_contigs.fna", + "file_size_bytes": 66849907, + "type": "nmdc:DataObject", + "id": "nmdc:621134d8dd8a6b117924f92ffed69ba7", + "name": "assembly_contigs.fna", + "data_object_type": "Assembly Contigs" + }, + { + "_id": { + "$oid": "649b003d1ae706d7b5b14eb6" + }, + "description": "Assembled scaffold fasta for gold:Gp0127648", + "url": "https://data.microbiomedata.org/data/1781_100350/assembly/assembly_scaffolds.fna", + "file_size_bytes": 66407046, + "type": "nmdc:DataObject", + "id": "nmdc:ede66d623f73ce756b3495f83205e6d9", + "name": "assembly_scaffolds.fna", + "data_object_type": "Assembly Scaffolds" + }, + { + "_id": { + "$oid": "649b003d1ae706d7b5b14eb7" + }, + "description": "Assembled AGP file for gold:Gp0127648", + "url": "https://data.microbiomedata.org/data/1781_100350/assembly/assembly.agp", + "file_size_bytes": 9663138, + "type": "nmdc:DataObject", + "id": "nmdc:d48d39804b7f30d62b244ec9556fd8b1", + "name": "assembly.agp", + "data_object_type": "Assembly AGP" + }, + { + "_id": { + "$oid": "649b003d1ae706d7b5b14eb8" + }, + "description": "Metagenome Alignment BAM file for gold:Gp0127648", + "url": "https://data.microbiomedata.org/data/1781_100350/assembly/pairedMapped_sorted.bam", + "file_size_bytes": 2330726850, + "type": "nmdc:DataObject", + "id": "nmdc:11791ba21a4e2cea1d5e889dce811985", + "name": "pairedMapped_sorted.bam", + "data_object_type": "Assembly Coverage BAM" + }, + { + "_id": { + "$oid": "649b003d1ae706d7b5b15bc0" + }, + "id": "nmdc:af639412ee5bca47c42088a8c81df692", + "name": "1781_100350.krona.html", + "description": "Gold:Gp0127648 KRONA plot HTML file", + "url": "https://data.microbiomedata.org/1781_100350/ReadbasedAnalysis/centrifuge/1781_100350.krona.html", + "file_size_bytes": 3385, + "data_object_type": "Centrifuge Krona Plot", + "type": "nmdc:DataObject" + }, + { + "_id": { + "$oid": "649b003d1ae706d7b5b15bc6" + }, + "id": "nmdc:e0e59ec0d07f88b6bc73664a179627e6", + "name": "1781_100350.json", + "description": "Gold:Gp0127648 ReadbasedAnalysis result JSON file", + "url": "https://data.microbiomedata.org/1781_100350/ReadbasedAnalysis/1781_100350.json", + "file_size_bytes": 3385, + "type": "nmdc:DataObject" + }, + { + "_id": { + "$oid": "649b003f1ae706d7b5b1662d" + }, + "id": "nmdc:ef8ef2e558c88161ea6b3c05434b56ac", + "name": "bins.unbinned.fa", + "description": "unbinned fasta file from metabat2 for gold:Gp0127648", + "file_size_bytes": 7023427, + "url": "https://data.microbiomedata.org/data/1781_100350/img_MAGs/bins.unbinned.fa", + "type": "nmdc:DataObject" + }, + { + "_id": { + "$oid": "649b003f1ae706d7b5b16630" + }, + "id": "nmdc:74b41590eebfcdef0e90406daaa4c95d", + "name": "bins.tooShort.fa", + "description": "tooShort (< 3kb) filtered contigs fasta file by metaBat2 for gold:Gp0127648", + "file_size_bytes": 57206330, + "url": "https://data.microbiomedata.org/data/1781_100350/img_MAGs/bins.tooShort.fa", + "type": "nmdc:DataObject" + }, + { + "_id": { + "$oid": "649b003f1ae706d7b5b16633" + }, + "id": "nmdc:a783b176b63e949529058e9db9ede106", + "name": "checkm_qa.out", + "description": "metabat2 bin checkm quality assessment result for gold:Gp0127648", + "file_size_bytes": 760, + "url": "https://data.microbiomedata.org/data/1781_100350/img_MAGs/checkm_qa.out", + "type": "nmdc:DataObject", + "data_object_type": "CheckM Statistics" + }, + { + "_id": { + "$oid": "649b003f1ae706d7b5b16634" + }, + "id": "nmdc:9fbf80a1258e24efe59ba434424a14e6", + "name": "gold:Gp0127648.bins.1.fa", + "description": "metabat2 binned contig file for gold:Gp0127648", + "file_size_bytes": 1405337, + "url": "https://data.microbiomedata.org/data/1781_100350/img_MAGs/metabat-bins/bins.1.fa", + "type": "nmdc:DataObject", + "data_object_type": "Metagenome Bins" + }, + { + "_id": { + "$oid": "649b003f1ae706d7b5b16636" + }, + "id": "nmdc:30f30b256de77b4a0b0194330f337dbe", + "name": "gtdbtk.ar122.summary.tsv", + "description": "gtdbtk archaea assignment result summary table for gold:Gp0127648", + "file_size_bytes": 1004, + "url": "https://data.microbiomedata.org/data/1781_100350/img_MAGs/gtdbtk_output/classify/gtdbtk.ar122.summary.tsv", + "type": "nmdc:DataObject" + }, + { + "_id": { + "$oid": "649b00401ae706d7b5b16d39" + }, + "description": "Structural annotation GFF file for gold:Gp0127648", + "url": "https://data.microbiomedata.org/1781_100350/img_annotation/Ga0482222_structural_annotation.gff", + "md5_checksum": "863f93ecf208a6e19f17d460d8e1a963", + "file_size_bytes": 3385, + "id": "nmdc:863f93ecf208a6e19f17d460d8e1a963", + "name": "gold:Gp0127648_Structural annotation GFF file", + "data_object_type": "Structural Annotation GFF", + "type": "nmdc:DataObject" + }, + { + "_id": { + "$oid": "649b00401ae706d7b5b16d3b" + }, + "description": "KO TSV File for gold:Gp0127648", + "url": "https://data.microbiomedata.org/1781_100350/img_annotation/Ga0482222_ko.tsv", + "md5_checksum": "1287c2532770a0f0d6792192c7400c0c", + "file_size_bytes": 3385, + "id": "nmdc:1287c2532770a0f0d6792192c7400c0c", + "name": "gold:Gp0127648_KO TSV File", + "data_object_type": "Annotation KEGG Orthology", + "type": "nmdc:DataObject" + }, + { + "_id": { + "$oid": "649b00401ae706d7b5b16d3c" + }, + "description": "EC TSV File for gold:Gp0127648", + "url": "https://data.microbiomedata.org/1781_100350/img_annotation/Ga0482222_ec.tsv", + "md5_checksum": "06042b9d083bd6b9879bc5486c0b38ba", + "file_size_bytes": 3385, + "id": "nmdc:06042b9d083bd6b9879bc5486c0b38ba", + "name": "gold:Gp0127648_EC TSV File", + "data_object_type": "Annotation Enzyme Commission", + "type": "nmdc:DataObject" + }, + { + "_id": { + "$oid": "649b00401ae706d7b5b16d3d" + }, + "description": "Protein FAA for gold:Gp0127648", + "url": "https://data.microbiomedata.org/1781_100350/img_annotation/Ga0482222_proteins.faa", + "md5_checksum": "d27fabc532b52dec4afa4673f920633a", + "file_size_bytes": 3385, + "id": "nmdc:d27fabc532b52dec4afa4673f920633a", + "name": "gold:Gp0127648_Protein FAA", + "data_object_type": "Annotation Amino Acid FASTA", + "type": "nmdc:DataObject" + }, + { + "_id": { + "$oid": "649b00401ae706d7b5b16d43" + }, + "description": "Functional annotation GFF file for gold:Gp0127648", + "url": "https://data.microbiomedata.org/1781_100350/img_annotation/Ga0482222_functional_annotation.gff", + "md5_checksum": "c11e44f28b422233e151d324d2accb43", + "file_size_bytes": 3385, + "id": "nmdc:c11e44f28b422233e151d324d2accb43", + "name": "gold:Gp0127648_Functional annotation GFF file", + "data_object_type": "Functional Annotation GFF", + "type": "nmdc:DataObject" + } + ], + "mags_activity_set": [ + { + "_id": { + "$oid": "649b0052ec087f6bbab34718" + }, + "has_input": [ + "nmdc:ca10f7bae0565946414188c9121ee338", + "nmdc:4a6ffadb01b62dd73278429808c1a39a", + "nmdc:600011ab7e39465d3f9f28d5d93a4248" + ], + "too_short_contig_num": 142847, + "part_of": [ + "nmdc:mga0andh11" + ], + "binned_contig_num": 329, + "git_url": "https://github.com/microbiomedata/metaG/releases/tag/0.1", + "has_output": [ + "nmdc:d41d8cd98f00b204e9800998ecf8427e", + "nmdc:8b67e5038c55083e2aa8e19c5d05fef8", + "nmdc:fc8454a790709b36d7ca96cd99359d26", + "nmdc:942bd7c28c52e6301bf97dab0ea2852a", + "nmdc:82ebf9065be9715e1230a50bf7a02197", + "nmdc:897536007e7e3525457df5d3baddd593" + ], + "was_informed_by": "gold:Gp0127648", + "input_contig_num": 147340, + "id": "nmdc:cb02cfd1451743d94381a247cf0a9d65", + "execution_resource": "NERSC-Cori", + "name": "MAGs Analysis Activity for nmdc:mga0andh11", + "mags_list": [ + { + "number_of_contig": 255, + "completeness": 70.91, + "bin_name": "bins.1", + "gene_count": 1857, + "bin_quality": "MQ", + "gtdbtk_species": "", + "gtdbtk_order": "Nitrososphaerales", + "num_16s": 1, + "gtdbtk_family": "Nitrososphaeraceae", + "gtdbtk_domain": "Archaea", + "contamination": 1.94, + "gtdbtk_class": "Nitrososphaeria", + "gtdbtk_phylum": "Crenarchaeota", + "num_5s": 1, + "num_23s": 0, + "gtdbtk_genus": "", + "num_t_rna": 28 + }, + { + "number_of_contig": 74, + "completeness": 19.91, + "bin_name": "bins.2", + "gene_count": 380, + "bin_quality": "LQ", + "gtdbtk_species": "", + "gtdbtk_order": "", + "num_16s": 0, + "gtdbtk_family": "", + "gtdbtk_domain": "", + "contamination": 0.0, + "gtdbtk_class": "", + "gtdbtk_phylum": "", + "num_5s": 0, + "num_23s": 0, + "gtdbtk_genus": "", + "num_t_rna": 5 + } + ], + "unbinned_contig_num": 4164, + "started_at_time": "2021-10-11T02:23:29Z", + "type": "nmdc:MAGsAnalysisActivity", + "ended_at_time": "2021-10-11T04:13:04+00:00" + } + ], + "metagenome_annotation_activity_set": [ + { + "_id": { + "$oid": "649b005bbf2caae0415ef9b8" + }, + "has_input": [ + "nmdc:ca10f7bae0565946414188c9121ee338" + ], + "part_of": [ + "nmdc:mga0andh11" + ], + "git_url": "https://github.com/microbiomedata/metaG/releases/tag/0.1", + "has_output": [ + "nmdc:c4a719f3a899f7aa760f627f7b1ae6e7", + "nmdc:80ab4116b1cdfbc3e4c4d06e5990d735", + "nmdc:600011ab7e39465d3f9f28d5d93a4248", + "nmdc:0c8d98b369900cd19da39235e3eae6db", + "nmdc:16c37f8c4f74e7e81b7900536da55e39", + "nmdc:a7fc228cd8d224bbf2843ba6a6648480", + "nmdc:a57d9d86c20cfd13ddc56027110485ba", + "nmdc:6a4be27e2e7454941b73aa843471f25d", + "nmdc:be3d2a77be3ccd810d679f03204f8bac", + "nmdc:eb5ac02ce17f687c5ccf5a64548c559e", + "nmdc:81ff9f257ffe63ca5d04db9e767620b1", + "nmdc:8768f37ff001a86a25ae34c7deee9d9a" + ], + "was_informed_by": "gold:Gp0127648", + "id": "nmdc:cb02cfd1451743d94381a247cf0a9d65", + "execution_resource": "NERSC-Cori", + "name": "Annotation Activity for nmdc:mga0andh11", + "started_at_time": "2021-10-11T02:23:29Z", + "type": "nmdc:MetagenomeAnnotationActivity", + "ended_at_time": "2021-10-11T04:13:04+00:00" + } + ], + "metagenome_assembly_set": [ + { + "_id": { + "$oid": "649b005f2ca5ee4adb139fa5" + }, + "has_input": [ + "nmdc:fcc3a92dd2b6ab6045f4be27da6f2cdd" + ], + "part_of": [ + "nmdc:mga0andh11" + ], + "ctg_logsum": 91193, + "scaf_logsum": 91521, + "gap_pct": 0.0011, + "git_url": "https://github.com/microbiomedata/metaG/releases/tag/0.1", + "has_output": [ + "nmdc:ca10f7bae0565946414188c9121ee338", + "nmdc:cf23062373806986b70244b1fabbd17b", + "nmdc:99b2c3c91b299b9426cca9dfb10b0cea", + "nmdc:303d7282e6f91afaa9564c65107d4086", + "nmdc:4a6ffadb01b62dd73278429808c1a39a" + ], + "asm_score": 4.996, + "was_informed_by": "gold:Gp0127648", + "ctg_powsum": 10170, + "scaf_max": 23974, + "id": "nmdc:cb02cfd1451743d94381a247cf0a9d65", + "scaf_powsum": 10208, + "execution_resource": "NERSC-Cori", + "contigs": 147340, + "name": "Assembly Activity for nmdc:mga0andh11", + "ctg_max": 23974, + "gc_std": 0.0855, + "contig_bp": 61886959, + "gc_avg": 0.61759, + "started_at_time": "2021-10-11T02:23:29Z", + "scaf_bp": 61887639, + "type": "nmdc:MetagenomeAssembly", + "scaffolds": 147272, + "ended_at_time": "2021-10-11T04:13:04+00:00", + "ctg_l50": 381, + "ctg_l90": 285, + "ctg_n50": 47493, + "ctg_n90": 126039, + "scaf_l50": 381, + "scaf_l90": 285, + "scaf_n50": 47464, + "scaf_n90": 125972 + } + ], + "omics_processing_set": [ + { + "_id": { + "$oid": "649b009773e8249959349b5b" + }, + "id": "nmdc:omprc-11-nhf5m035", + "name": "Riverbed sediment microbial communities from areas with vegetation nearby in Columbia River, Washington, USA - GW-RW S1_50_60", + "description": "Riverbed sediment samples were collected from an area with a lot of surrounding vegetation in a hyporheic zone (subsurface groundwater - surface water mixing zone)", + "has_input": [ + "nmdc:bsm-11-b7nrtg75" + ], + "has_output": [ + "jgi:574fe0ac7ded5e3df1ee1491" + ], + "part_of": [ + "nmdc:sty-11-aygzgv51" + ], + "add_date": "2016-01-11", + "mod_date": "2021-06-15", + "ncbi_project_name": "Riverbed sediment microbial communities from areas with vegetation nearby in Columbia River, Washington, USA - GW-RW S1_50_60", + "omics_type": { + "has_raw_value": "Metagenome" + }, + "principal_investigator": { + "has_raw_value": "James Stegen" + }, + "processing_institution": "JGI", + "type": "nmdc:OmicsProcessing", + "gold_sequencing_project_identifiers": [ + "gold:Gp0127648" + ] + } + ], + "read_qc_analysis_activity_set": [ + { + "_id": { + "$oid": "649b009d6bdd4fd20273c870" + }, + "has_input": [ + "nmdc:22bf7ba401619da2a191e7b30544a8ac" + ], + "part_of": [ + "nmdc:mga0andh11" + ], + "git_url": "https://github.com/microbiomedata/metaG/releases/tag/0.1", + "has_output": [ + "nmdc:fcc3a92dd2b6ab6045f4be27da6f2cdd", + "nmdc:2208c88cac6b941799d4492dbf5f0887" + ], + "was_informed_by": "gold:Gp0127648", + "input_read_count": 28064750, + "output_read_bases": 3953713958, + "id": "nmdc:cb02cfd1451743d94381a247cf0a9d65", + "execution_resource": "NERSC-Cori", + "input_read_bases": 4237777250, + "name": "Read QC Activity for nmdc:mga0andh11", + "output_read_count": 26438892, + "started_at_time": "2021-10-11T02:23:29Z", + "type": "nmdc:ReadQCAnalysisActivity", + "ended_at_time": "2021-10-11T04:13:04+00:00" + } + ], + "read_based_taxonomy_analysis_activity_set": [ + { + "_id": { + "$oid": "649b009bff710ae353f8cf39" + }, + "has_input": [ + "nmdc:fcc3a92dd2b6ab6045f4be27da6f2cdd" + ], + "git_url": "https://github.com/microbiomedata/metaG/releases/tag/0.1", + "has_output": [ + "nmdc:5e64b9ccf92f0c974c51bd8393dea50c", + "nmdc:1357df297d8d8a872b335e0c3222d102", + "nmdc:5b510e336e60b6120b43e9b6420a074e", + "nmdc:33bf814280051c220e0c4a06f7935728", + "nmdc:e77a1d052b0d2a99e0a1df3b3c038f7c", + "nmdc:0efb0ad19234056d7e2e3726dead3622", + "nmdc:222bac312efdd6c86d2475ad224b7907", + "nmdc:baaca868b1fed932b463e489708dd741", + "nmdc:b549d169e5b0693152555373a6d8ee75" + ], + "was_informed_by": "gold:Gp0127648", + "id": "nmdc:cb02cfd1451743d94381a247cf0a9d65", + "execution_resource": "NERSC-Cori", + "name": "ReadBased Analysis Activity for nmdc:mga0andh11", + "started_at_time": "2021-10-11T02:23:29Z", + "type": "nmdc:ReadbasedAnalysis", + "ended_at_time": "2021-10-11T04:13:04+00:00" + } + ] + }, + { + "data_object_set": [ + { + "description": "Raw sequencer read data", + "file_size_bytes": 2236205196, + "type": "nmdc:DataObject", + "id": "jgi:574fde8a7ded5e3df1ee1422", + "name": "10533.2.165322.ACGGTCT-AAGACCG.fastq.gz" + }, + { + "name": "Gp0127647_Filtered Reads", + "description": "Filtered Reads for Gp0127647", + "data_object_type": "Filtered Sequencing Reads", + "url": "https://data.microbiomedata.org/data/nmdc:mga0g0e588/qa/nmdc_mga0g0e588_filtered.fastq.gz", + "md5_checksum": "c082eff434fe4863c0e29c79b759d100", + "id": "nmdc:c082eff434fe4863c0e29c79b759d100", + "file_size_bytes": 2052448806 + }, + { + "name": "Gp0127647_Filtered Stats", + "description": "Filtered Stats for Gp0127647", + "data_object_type": "QC Statistics", + "url": "https://data.microbiomedata.org/data/nmdc:mga0g0e588/qa/nmdc_mga0g0e588_filterStats.txt", + "md5_checksum": "7f204d0d1d45e77b39d9c9b2362c6b0b", + "id": "nmdc:7f204d0d1d45e77b39d9c9b2362c6b0b", + "file_size_bytes": 282 + }, + { + "name": "Gp0127647_Gottcha2 TSV report", + "description": "Gottcha2 TSV report for Gp0127647", + "url": "https://data.microbiomedata.org/data/nmdc:mga0g0e588/ReadbasedAnalysis/nmdc_mga0g0e588_gottcha2_report.tsv", + "md5_checksum": "7e1438bf8076daf46f3d782d8f9656b4", + "id": "nmdc:7e1438bf8076daf46f3d782d8f9656b4", + "file_size_bytes": 4666 + }, + { + "name": "Gp0127647_Gottcha2 full TSV report", + "description": "Gottcha2 full TSV report for Gp0127647", + "url": "https://data.microbiomedata.org/data/nmdc:mga0g0e588/ReadbasedAnalysis/nmdc_mga0g0e588_gottcha2_report_full.tsv", + "md5_checksum": "cfd63309cd38a293615ddce5e8ea6402", + "id": "nmdc:cfd63309cd38a293615ddce5e8ea6402", + "file_size_bytes": 786018 + }, + { + "name": "Gp0127647_Gottcha2 Krona HTML report", + "description": "Gottcha2 Krona HTML report for Gp0127647", + "data_object_type": "GOTTCHA2 Krona Plot", + "url": "https://data.microbiomedata.org/data/nmdc:mga0g0e588/ReadbasedAnalysis/nmdc_mga0g0e588_gottcha2_krona.html", + "md5_checksum": "7e353b7bfb1586773fa00b515dffe6ec", + "id": "nmdc:7e353b7bfb1586773fa00b515dffe6ec", + "file_size_bytes": 237895 + }, + { + "name": "Gp0127647_Centrifuge classification TSV report", + "description": "Centrifuge classification TSV report for Gp0127647", + "data_object_type": "Centrifuge Taxonomic Classification", + "url": "https://data.microbiomedata.org/data/nmdc:mga0g0e588/ReadbasedAnalysis/nmdc_mga0g0e588_centrifuge_classification.tsv", + "md5_checksum": "6667be33e7867ca2aabfa5d663e2970a", + "id": "nmdc:6667be33e7867ca2aabfa5d663e2970a", + "file_size_bytes": 1767305277 + }, + { + "name": "Gp0127647_Centrifuge TSV report", + "description": "Centrifuge TSV report for Gp0127647", + "data_object_type": "Centrifuge Classification Report", + "url": "https://data.microbiomedata.org/data/nmdc:mga0g0e588/ReadbasedAnalysis/nmdc_mga0g0e588_centrifuge_report.tsv", + "md5_checksum": "7ee0b0b21444ee06752e6b9c32f476af", + "id": "nmdc:7ee0b0b21444ee06752e6b9c32f476af", + "file_size_bytes": 254858 + }, + { + "name": "Gp0127647_Centrifuge Krona HTML report", + "description": "Centrifuge Krona HTML report for Gp0127647", + "data_object_type": "Centrifuge Krona Plot", + "url": "https://data.microbiomedata.org/data/nmdc:mga0g0e588/ReadbasedAnalysis/nmdc_mga0g0e588_centrifuge_krona.html", + "md5_checksum": "d3b27bed597f07ad4bb4a500ad2fb928", + "id": "nmdc:d3b27bed597f07ad4bb4a500ad2fb928", + "file_size_bytes": 2332396 + }, + { + "name": "Gp0127647_Kraken classification TSV report", + "description": "Kraken classification TSV report for Gp0127647", + "data_object_type": "Kraken2 Taxonomic Classification", + "url": "https://data.microbiomedata.org/data/nmdc:mga0g0e588/ReadbasedAnalysis/nmdc_mga0g0e588_kraken2_classification.tsv", + "md5_checksum": "45617f93e5f072fbad25a0308ead6c3d", + "id": "nmdc:45617f93e5f072fbad25a0308ead6c3d", + "file_size_bytes": 1419938277 + }, + { + "name": "Gp0127647_Kraken2 TSV report", + "description": "Kraken2 TSV report for Gp0127647", + "data_object_type": "Kraken2 Classification Report", + "url": "https://data.microbiomedata.org/data/nmdc:mga0g0e588/ReadbasedAnalysis/nmdc_mga0g0e588_kraken2_report.tsv", + "md5_checksum": "460e7594fcd06678df1b9c5e5075cb4d", + "id": "nmdc:460e7594fcd06678df1b9c5e5075cb4d", + "file_size_bytes": 661837 + }, + { + "name": "Gp0127647_Kraken2 Krona HTML report", + "description": "Kraken2 Krona HTML report for Gp0127647", + "data_object_type": "Kraken2 Krona Plot", + "url": "https://data.microbiomedata.org/data/nmdc:mga0g0e588/ReadbasedAnalysis/nmdc_mga0g0e588_kraken2_krona.html", + "md5_checksum": "ab80fc324c9206a41a66d64227a97179", + "id": "nmdc:ab80fc324c9206a41a66d64227a97179", + "file_size_bytes": 4028822 + }, + { + "name": "Gp0127647_Assembled contigs fasta", + "description": "Assembled contigs fasta for Gp0127647", + "data_object_type": "Assembly Contigs", + "url": "https://data.microbiomedata.org/data/nmdc:mga0g0e588/assembly/nmdc_mga0g0e588_contigs.fna", + "md5_checksum": "05952c056a6db782ba77c6369206838a", + "id": "nmdc:05952c056a6db782ba77c6369206838a", + "file_size_bytes": 41696500 + }, + { + "name": "Gp0127647_Assembled scaffold fasta", + "description": "Assembled scaffold fasta for Gp0127647", + "data_object_type": "Assembly Scaffolds", + "url": "https://data.microbiomedata.org/data/nmdc:mga0g0e588/assembly/nmdc_mga0g0e588_scaffolds.fna", + "md5_checksum": "6fa8f2d4236fda4f628436ed85094e3b", + "id": "nmdc:6fa8f2d4236fda4f628436ed85094e3b", + "file_size_bytes": 41403892 + }, + { + "name": "Gp0127647_Metagenome Contig Coverage Stats", + "description": "Metagenome Contig Coverage Stats for Gp0127647", + "url": "https://data.microbiomedata.org/data/nmdc:mga0g0e588/assembly/nmdc_mga0g0e588_covstats.txt", + "md5_checksum": "82be5b6248eb4b0bfef1c9afa5c5c0bc", + "id": "nmdc:82be5b6248eb4b0bfef1c9afa5c5c0bc", + "file_size_bytes": 7629542 + }, + { + "name": "Gp0127647_Assembled AGP file", + "description": "Assembled AGP file for Gp0127647", + "data_object_type": "Assembly AGP", + "url": "https://data.microbiomedata.org/data/nmdc:mga0g0e588/assembly/nmdc_mga0g0e588_assembly.agp", + "md5_checksum": "fee22437c76dc343846f41e1be538b9d", + "id": "nmdc:fee22437c76dc343846f41e1be538b9d", + "file_size_bytes": 7091204 + }, + { + "name": "Gp0127647_Metagenome Alignment BAM file", + "description": "Metagenome Alignment BAM file for Gp0127647", + "data_object_type": "Assembly Coverage BAM", + "url": "https://data.microbiomedata.org/data/nmdc:mga0g0e588/assembly/nmdc_mga0g0e588_pairedMapped_sorted.bam", + "md5_checksum": "7fc9fd7844b6ce48869a0ad5216da4dc", + "id": "nmdc:7fc9fd7844b6ce48869a0ad5216da4dc", + "file_size_bytes": 2190560397 + }, + { + "name": "Gp0127647_Protein FAA", + "description": "Protein FAA for Gp0127647", + "data_object_type": "Annotation Amino Acid FASTA", + "url": "https://data.microbiomedata.org/data/nmdc:mga0g0e588/annotation/nmdc_mga0g0e588_proteins.faa", + "md5_checksum": "b95b8538748c921fac6c93ba55d43e2c", + "id": "nmdc:b95b8538748c921fac6c93ba55d43e2c", + "file_size_bytes": 23580407 + }, + { + "name": "Gp0127647_Structural annotation GFF file", + "description": "Structural annotation GFF file for Gp0127647", + "data_object_type": "Structural Annotation GFF", + "url": "https://data.microbiomedata.org/data/nmdc:mga0g0e588/annotation/nmdc_mga0g0e588_structural_annotation.gff", + "md5_checksum": "9c63632766a4946bc76829a7dafe49c0", + "id": "nmdc:9c63632766a4946bc76829a7dafe49c0", + "file_size_bytes": 2925 + }, + { + "name": "Gp0127647_Functional annotation GFF file", + "description": "Functional annotation GFF file for Gp0127647", + "data_object_type": "Functional Annotation GFF", + "url": "https://data.microbiomedata.org/data/nmdc:mga0g0e588/annotation/nmdc_mga0g0e588_functional_annotation.gff", + "md5_checksum": "0c5e791c8170181aa3e43d710e7c55eb", + "id": "nmdc:0c5e791c8170181aa3e43d710e7c55eb", + "file_size_bytes": 28355659 + }, + { + "name": "Gp0127647_KO TSV file", + "description": "KO TSV file for Gp0127647", + "data_object_type": "Annotation KEGG Orthology", + "url": "https://data.microbiomedata.org/data/nmdc:mga0g0e588/annotation/nmdc_mga0g0e588_ko.tsv", + "md5_checksum": "358cb8682dd2d5c1b7a691e9f7734acc", + "id": "nmdc:358cb8682dd2d5c1b7a691e9f7734acc", + "file_size_bytes": 3251676 + }, + { + "name": "Gp0127647_EC TSV file", + "description": "EC TSV file for Gp0127647", + "data_object_type": "Annotation Enzyme Commission", + "url": "https://data.microbiomedata.org/data/nmdc:mga0g0e588/annotation/nmdc_mga0g0e588_ec.tsv", + "md5_checksum": "d770a8c872a3a359bf3482e564c56988", + "id": "nmdc:d770a8c872a3a359bf3482e564c56988", + "file_size_bytes": 2134531 + }, + { + "name": "Gp0127647_COG GFF file", + "description": "COG GFF file for Gp0127647", + "url": "https://data.microbiomedata.org/data/nmdc:mga0g0e588/annotation/nmdc_mga0g0e588_cog.gff", + "md5_checksum": "cdecaf6cff3fc2d559cc3313599b137b", + "id": "nmdc:cdecaf6cff3fc2d559cc3313599b137b", + "file_size_bytes": 15119260 + }, + { + "name": "Gp0127647_PFAM GFF file", + "description": "PFAM GFF file for Gp0127647", + "url": "https://data.microbiomedata.org/data/nmdc:mga0g0e588/annotation/nmdc_mga0g0e588_pfam.gff", + "md5_checksum": "7dedc14d5645ae32f913d8f823ba5aa3", + "id": "nmdc:7dedc14d5645ae32f913d8f823ba5aa3", + "file_size_bytes": 11013734 + }, + { + "name": "Gp0127647_TigrFam GFF file", + "description": "TigrFam GFF file for Gp0127647", + "url": "https://data.microbiomedata.org/data/nmdc:mga0g0e588/annotation/nmdc_mga0g0e588_tigrfam.gff", + "md5_checksum": "809e6d246bd10968d4da074db08216d9", + "id": "nmdc:809e6d246bd10968d4da074db08216d9", + "file_size_bytes": 1131416 + }, + { + "name": "Gp0127647_SMART GFF file", + "description": "SMART GFF file for Gp0127647", + "url": "https://data.microbiomedata.org/data/nmdc:mga0g0e588/annotation/nmdc_mga0g0e588_smart.gff", + "md5_checksum": "546d11411d30ab337a215d0094fc36b6", + "id": "nmdc:546d11411d30ab337a215d0094fc36b6", + "file_size_bytes": 3424877 + }, + { + "name": "Gp0127647_SuperFam GFF file", + "description": "SuperFam GFF file for Gp0127647", + "url": "https://data.microbiomedata.org/data/nmdc:mga0g0e588/annotation/nmdc_mga0g0e588_supfam.gff", + "md5_checksum": "6eb654de91a99eb4e01e1bf9513a6208", + "id": "nmdc:6eb654de91a99eb4e01e1bf9513a6208", + "file_size_bytes": 19463761 + }, + { + "name": "Gp0127647_Cath FunFam GFF file", + "description": "Cath FunFam GFF file for Gp0127647", + "url": "https://data.microbiomedata.org/data/nmdc:mga0g0e588/annotation/nmdc_mga0g0e588_cath_funfam.gff", + "md5_checksum": "a8ae7ed318e7c170aeed508f331ce5b2", + "id": "nmdc:a8ae7ed318e7c170aeed508f331ce5b2", + "file_size_bytes": 14536820 + }, + { + "name": "Gp0127647_KO_EC GFF file", + "description": "KO_EC GFF file for Gp0127647", + "url": "https://data.microbiomedata.org/data/nmdc:mga0g0e588/annotation/nmdc_mga0g0e588_ko_ec.gff", + "md5_checksum": "455f95c7c15739b2fddc6f62b03253ed", + "id": "nmdc:455f95c7c15739b2fddc6f62b03253ed", + "file_size_bytes": 10367039 + }, + { + "name": "gold:Gp0452679_metabat2 bin checkm quality assessment result", + "description": "metabat2 bin checkm quality assessment result for gold:Gp0452679", + "data_object_type": "CheckM Statistics", + "url": "https://data.microbiomedata.org/data/nmdc:mga0fsjy84/MAGs/nmdc_mga0fsjy84_checkm_qa.out", + "md5_checksum": "d41d8cd98f00b204e9800998ecf8427e", + "id": "nmdc:d41d8cd98f00b204e9800998ecf8427e", + "file_size_bytes": 0 + }, + { + "name": "Gp0127647_tooShort (< 3kb) filtered contigs fasta file by metaBat2", + "description": "tooShort (< 3kb) filtered contigs fasta file by metaBat2 for Gp0127647", + "url": "https://data.microbiomedata.org/data/nmdc:mga0g0e588/MAGs/nmdc_mga0g0e588_bins.tooShort.fa", + "md5_checksum": "8ec4227eca7ea06fed4e866c4de4a5c9", + "id": "nmdc:8ec4227eca7ea06fed4e866c4de4a5c9", + "file_size_bytes": 38197270 + }, + { + "name": "Gp0127647_unbinned fasta file from metabat2", + "description": "unbinned fasta file from metabat2 for Gp0127647", + "url": "https://data.microbiomedata.org/data/nmdc:mga0g0e588/MAGs/nmdc_mga0g0e588_bins.unbinned.fa", + "md5_checksum": "40c0cbc75e2b698572b8b94d91fdc236", + "id": "nmdc:40c0cbc75e2b698572b8b94d91fdc236", + "file_size_bytes": 3202231 + }, + { + "name": "Gp0127647_metabat2 bin checkm quality assessment result", + "description": "metabat2 bin checkm quality assessment result for Gp0127647", + "data_object_type": "CheckM Statistics", + "url": "https://data.microbiomedata.org/data/nmdc:mga0g0e588/MAGs/nmdc_mga0g0e588_checkm_qa.out", + "md5_checksum": "e8bdcd7b113a14b29a3026b73cd18c20", + "id": "nmdc:e8bdcd7b113a14b29a3026b73cd18c20", + "file_size_bytes": 775 + }, + { + "name": "Gp0127647_high-quality and medium-quality bins", + "description": "high-quality and medium-quality bins for Gp0127647", + "data_object_type": "Metagenome Bins", + "url": "https://data.microbiomedata.org/data/nmdc:mga0g0e588/MAGs/nmdc_mga0g0e588_hqmq_bin.zip", + "md5_checksum": "03b448db547a556e988a0d4948dab424", + "id": "nmdc:03b448db547a556e988a0d4948dab424", + "file_size_bytes": 182 + }, + { + "name": "Gp0127647_metabat2 bins", + "description": "metabat2 bins for Gp0127647", + "url": "https://data.microbiomedata.org/data/nmdc:mga0g0e588/MAGs/nmdc_mga0g0e588_metabat_bin.zip", + "md5_checksum": "6e92868d1912cb8f5b32fbf507721d16", + "id": "nmdc:6e92868d1912cb8f5b32fbf507721d16", + "file_size_bytes": 91931 + }, + { + "_id": { + "$oid": "649b003d1ae706d7b5b14eae" + }, + "description": "Metagenome Contig Coverage Stats for gold:Gp0127647", + "url": "https://data.microbiomedata.org/data/1781_100349/assembly/mapping_stats.txt", + "file_size_bytes": 7240138, + "type": "nmdc:DataObject", + "id": "nmdc:c61b0651682d71b4a62ee2e51223af99", + "name": "mapping_stats.txt", + "data_object_type": "Assembly Coverage Stats" + }, + { + "_id": { + "$oid": "649b003d1ae706d7b5b14eaf" + }, + "description": "Assembled contigs fasta for gold:Gp0127647", + "url": "https://data.microbiomedata.org/data/1781_100349/assembly/assembly_contigs.fna", + "file_size_bytes": 41307096, + "type": "nmdc:DataObject", + "id": "nmdc:9aefb925f949c698cd2a0d71d1d2d7cc", + "name": "assembly_contigs.fna", + "data_object_type": "Assembly Contigs" + }, + { + "_id": { + "$oid": "649b003d1ae706d7b5b14eb1" + }, + "description": "Assembled scaffold fasta for gold:Gp0127647", + "url": "https://data.microbiomedata.org/data/1781_100349/assembly/assembly_scaffolds.fna", + "file_size_bytes": 41014628, + "type": "nmdc:DataObject", + "id": "nmdc:ba8fe365f6e8a08812efe185c3454385", + "name": "assembly_scaffolds.fna", + "data_object_type": "Assembly Scaffolds" + }, + { + "_id": { + "$oid": "649b003d1ae706d7b5b14eb2" + }, + "description": "Assembled AGP file for gold:Gp0127647", + "url": "https://data.microbiomedata.org/data/1781_100349/assembly/assembly.agp", + "file_size_bytes": 6312116, + "type": "nmdc:DataObject", + "id": "nmdc:32c394cf3ff8c87b4d60ff769265b544", + "name": "assembly.agp", + "data_object_type": "Assembly AGP" + }, + { + "_id": { + "$oid": "649b003d1ae706d7b5b14eb4" + }, + "description": "Metagenome Alignment BAM file for gold:Gp0127647", + "url": "https://data.microbiomedata.org/data/1781_100349/assembly/pairedMapped_sorted.bam", + "file_size_bytes": 2160760884, + "type": "nmdc:DataObject", + "id": "nmdc:f68a2e204a75c536142b2fd9dfd9ee8b", + "name": "pairedMapped_sorted.bam", + "data_object_type": "Assembly Coverage BAM" + }, + { + "_id": { + "$oid": "649b003d1ae706d7b5b15bab" + }, + "id": "nmdc:34a7f42ed597813c10ad6d3935563bf7", + "name": "1781_100349.krona.html", + "description": "Gold:Gp0127647 KRONA plot HTML file", + "url": "https://data.microbiomedata.org/1781_100349/ReadbasedAnalysis/centrifuge/1781_100349.krona.html", + "file_size_bytes": 3385, + "data_object_type": "Centrifuge Krona Plot", + "type": "nmdc:DataObject" + }, + { + "_id": { + "$oid": "649b003d1ae706d7b5b15bb1" + }, + "id": "nmdc:8ec86e6598c064e0091960a9921de1d6", + "name": "1781_100349.json", + "description": "Gold:Gp0127647 ReadbasedAnalysis result JSON file", + "url": "https://data.microbiomedata.org/1781_100349/ReadbasedAnalysis/1781_100349.json", + "file_size_bytes": 3385, + "type": "nmdc:DataObject" + }, + { + "_id": { + "$oid": "649b003f1ae706d7b5b1662e" + }, + "id": "nmdc:a406d9ea4e02f98d5f48ba53b992dfa2", + "name": "bins.unbinned.fa", + "description": "unbinned fasta file from metabat2 for gold:Gp0127647", + "file_size_bytes": 3475613, + "url": "https://data.microbiomedata.org/data/1781_100349/img_MAGs/bins.unbinned.fa", + "type": "nmdc:DataObject" + }, + { + "_id": { + "$oid": "649b003f1ae706d7b5b16632" + }, + "id": "nmdc:a0f5a2359ba3651c0315060a9827e39d", + "name": "bins.tooShort.fa", + "description": "tooShort (< 3kb) filtered contigs fasta file by metaBat2 for gold:Gp0127647", + "file_size_bytes": 36966483, + "url": "https://data.microbiomedata.org/data/1781_100349/img_MAGs/bins.tooShort.fa", + "type": "nmdc:DataObject" + }, + { + "_id": { + "$oid": "649b00401ae706d7b5b16d34" + }, + "description": "Functional annotation GFF file for gold:Gp0127647", + "url": "https://data.microbiomedata.org/1781_100349/img_annotation/Ga0482223_functional_annotation.gff", + "md5_checksum": "af2496c3ae96ff31e6bdaae75b507ea7", + "file_size_bytes": 3385, + "id": "nmdc:af2496c3ae96ff31e6bdaae75b507ea7", + "name": "gold:Gp0127647_Functional annotation GFF file", + "data_object_type": "Functional Annotation GFF", + "type": "nmdc:DataObject" + }, + { + "_id": { + "$oid": "649b00401ae706d7b5b16d36" + }, + "description": "Protein FAA for gold:Gp0127647", + "url": "https://data.microbiomedata.org/1781_100349/img_annotation/Ga0482223_proteins.faa", + "md5_checksum": "ec6d01297279eee2d4c03ecfda9309c9", + "file_size_bytes": 3385, + "id": "nmdc:ec6d01297279eee2d4c03ecfda9309c9", + "name": "gold:Gp0127647_Protein FAA", + "data_object_type": "Annotation Amino Acid FASTA", + "type": "nmdc:DataObject" + }, + { + "_id": { + "$oid": "649b00401ae706d7b5b16d37" + }, + "description": "EC TSV File for gold:Gp0127647", + "url": "https://data.microbiomedata.org/1781_100349/img_annotation/Ga0482223_ec.tsv", + "md5_checksum": "18d40bd5ff2707ba9a4512363d05537d", + "file_size_bytes": 3385, + "id": "nmdc:18d40bd5ff2707ba9a4512363d05537d", + "name": "gold:Gp0127647_EC TSV File", + "data_object_type": "Annotation Enzyme Commission", + "type": "nmdc:DataObject" + }, + { + "_id": { + "$oid": "649b00401ae706d7b5b16d38" + }, + "description": "KO TSV File for gold:Gp0127647", + "url": "https://data.microbiomedata.org/1781_100349/img_annotation/Ga0482223_ko.tsv", + "md5_checksum": "d855bc2d72a6ba238acfe746299cf26a", + "file_size_bytes": 3385, + "id": "nmdc:d855bc2d72a6ba238acfe746299cf26a", + "name": "gold:Gp0127647_KO TSV File", + "data_object_type": "Annotation KEGG Orthology", + "type": "nmdc:DataObject" + }, + { + "_id": { + "$oid": "649b00401ae706d7b5b16d3a" + }, + "description": "Structural annotation GFF file for gold:Gp0127647", + "url": "https://data.microbiomedata.org/1781_100349/img_annotation/Ga0482223_structural_annotation.gff", + "md5_checksum": "a57c9b7f192351676e897b8187cf6641", + "file_size_bytes": 3385, + "id": "nmdc:a57c9b7f192351676e897b8187cf6641", + "name": "gold:Gp0127647_Structural annotation GFF file", + "data_object_type": "Structural Annotation GFF", + "type": "nmdc:DataObject" + } + ], + "mags_activity_set": [ + { + "_id": { + "$oid": "649b0052ec087f6bbab34716" + }, + "has_input": [ + "nmdc:05952c056a6db782ba77c6369206838a", + "nmdc:7fc9fd7844b6ce48869a0ad5216da4dc", + "nmdc:0c5e791c8170181aa3e43d710e7c55eb" + ], + "too_short_contig_num": 95291, + "part_of": [ + "nmdc:mga0g0e588" + ], + "binned_contig_num": 20, + "git_url": "https://github.com/microbiomedata/metaG/releases/tag/0.1", + "has_output": [ + "nmdc:d41d8cd98f00b204e9800998ecf8427e", + "nmdc:8ec4227eca7ea06fed4e866c4de4a5c9", + "nmdc:40c0cbc75e2b698572b8b94d91fdc236", + "nmdc:e8bdcd7b113a14b29a3026b73cd18c20", + "nmdc:03b448db547a556e988a0d4948dab424", + "nmdc:6e92868d1912cb8f5b32fbf507721d16" + ], + "was_informed_by": "gold:Gp0127647", + "input_contig_num": 97351, + "id": "nmdc:36fa8157feb62e7d176b99031e5e41a9", + "execution_resource": "NERSC-Cori", + "name": "MAGs Analysis Activity for nmdc:mga0g0e588", + "mags_list": [ + { + "number_of_contig": 20, + "completeness": 1.36, + "bin_name": "bins.1", + "gene_count": 310, + "bin_quality": "LQ", + "gtdbtk_species": "", + "gtdbtk_order": "", + "num_16s": 2, + "gtdbtk_family": "", + "gtdbtk_domain": "", + "contamination": 0.0, + "gtdbtk_class": "", + "gtdbtk_phylum": "", + "num_5s": 0, + "num_23s": 2, + "gtdbtk_genus": "", + "num_t_rna": 14 + } + ], + "unbinned_contig_num": 2040, + "started_at_time": "2021-10-11T02:24:27Z", + "type": "nmdc:MAGsAnalysisActivity", + "ended_at_time": "2021-10-11T03:38:33+00:00" + } + ], + "metagenome_annotation_activity_set": [ + { + "_id": { + "$oid": "649b005bbf2caae0415ef9b5" + }, + "has_input": [ + "nmdc:05952c056a6db782ba77c6369206838a" + ], + "part_of": [ + "nmdc:mga0g0e588" + ], + "git_url": "https://github.com/microbiomedata/metaG/releases/tag/0.1", + "has_output": [ + "nmdc:b95b8538748c921fac6c93ba55d43e2c", + "nmdc:9c63632766a4946bc76829a7dafe49c0", + "nmdc:0c5e791c8170181aa3e43d710e7c55eb", + "nmdc:358cb8682dd2d5c1b7a691e9f7734acc", + "nmdc:d770a8c872a3a359bf3482e564c56988", + "nmdc:cdecaf6cff3fc2d559cc3313599b137b", + "nmdc:7dedc14d5645ae32f913d8f823ba5aa3", + "nmdc:809e6d246bd10968d4da074db08216d9", + "nmdc:546d11411d30ab337a215d0094fc36b6", + "nmdc:6eb654de91a99eb4e01e1bf9513a6208", + "nmdc:a8ae7ed318e7c170aeed508f331ce5b2", + "nmdc:455f95c7c15739b2fddc6f62b03253ed" + ], + "was_informed_by": "gold:Gp0127647", + "id": "nmdc:36fa8157feb62e7d176b99031e5e41a9", + "execution_resource": "NERSC-Cori", + "name": "Annotation Activity for nmdc:mga0g0e588", + "started_at_time": "2021-10-11T02:24:27Z", + "type": "nmdc:MetagenomeAnnotationActivity", + "ended_at_time": "2021-10-11T03:38:33+00:00" + } + ], + "metagenome_assembly_set": [ + { + "_id": { + "$oid": "649b005f2ca5ee4adb139fa0" + }, + "has_input": [ + "nmdc:c082eff434fe4863c0e29c79b759d100" + ], + "part_of": [ + "nmdc:mga0g0e588" + ], + "ctg_logsum": 37666, + "scaf_logsum": 37899, + "gap_pct": 0.00092, + "git_url": "https://github.com/microbiomedata/metaG/releases/tag/0.1", + "has_output": [ + "nmdc:05952c056a6db782ba77c6369206838a", + "nmdc:6fa8f2d4236fda4f628436ed85094e3b", + "nmdc:82be5b6248eb4b0bfef1c9afa5c5c0bc", + "nmdc:fee22437c76dc343846f41e1be538b9d", + "nmdc:7fc9fd7844b6ce48869a0ad5216da4dc" + ], + "asm_score": 14.664, + "was_informed_by": "gold:Gp0127647", + "ctg_powsum": 4336.355, + "scaf_max": 96788, + "id": "nmdc:36fa8157feb62e7d176b99031e5e41a9", + "scaf_powsum": 4362.772, + "execution_resource": "NERSC-Cori", + "contigs": 97351, + "name": "Assembly Activity for nmdc:mga0g0e588", + "ctg_max": 96788, + "gc_std": 0.13435, + "contig_bp": 38110297, + "gc_avg": 0.5552, + "started_at_time": "2021-10-11T02:24:27Z", + "scaf_bp": 38110647, + "type": "nmdc:MetagenomeAssembly", + "scaffolds": 97316, + "ended_at_time": "2021-10-11T03:38:33+00:00", + "ctg_l50": 353, + "ctg_l90": 283, + "ctg_n50": 34144, + "ctg_n90": 85387, + "scaf_l50": 353, + "scaf_l90": 283, + "scaf_n50": 34125, + "scaf_n90": 85353, + "scaf_l_gt50k": 153917, + "scaf_n_gt50k": 2, + "scaf_pct_gt50k": 0.40386876 + } + ], + "omics_processing_set": [ + { + "_id": { + "$oid": "649b009773e8249959349b5c" + }, + "id": "nmdc:omprc-11-w3v30q48", + "name": "Riverbed sediment microbial communities from areas with vegetation nearby in Columbia River, Washington, USA - GW-RW S2_10_20", + "description": "Riverbed sediment samples were collected from an area with a lot of surrounding vegetation in a hyporheic zone (subsurface groundwater - surface water mixing zone)", + "has_input": [ + "nmdc:bsm-11-q44pjf87" + ], + "has_output": [ + "jgi:574fde8a7ded5e3df1ee1422" + ], + "part_of": [ + "nmdc:sty-11-aygzgv51" + ], + "add_date": "2016-01-11", + "mod_date": "2021-06-15", + "ncbi_project_name": "Riverbed sediment microbial communities from areas with vegetation nearby in Columbia River, Washington, USA - GW-RW S2_10_20", + "omics_type": { + "has_raw_value": "Metagenome" + }, + "principal_investigator": { + "has_raw_value": "James Stegen" + }, + "processing_institution": "JGI", + "type": "nmdc:OmicsProcessing", + "gold_sequencing_project_identifiers": [ + "gold:Gp0127647" + ] + } + ], + "read_qc_analysis_activity_set": [ + { + "_id": { + "$oid": "649b009d6bdd4fd20273c86e" + }, + "has_input": [ + "nmdc:34b881e1c01cbdc1f8dc1b1fc07e46a7" + ], + "part_of": [ + "nmdc:mga0g0e588" + ], + "git_url": "https://github.com/microbiomedata/metaG/releases/tag/0.1", + "has_output": [ + "nmdc:c082eff434fe4863c0e29c79b759d100", + "nmdc:7f204d0d1d45e77b39d9c9b2362c6b0b" + ], + "was_informed_by": "gold:Gp0127647", + "input_read_count": 24906858, + "output_read_bases": 3608754154, + "id": "nmdc:36fa8157feb62e7d176b99031e5e41a9", + "execution_resource": "NERSC-Cori", + "input_read_bases": 3760935558, + "name": "Read QC Activity for nmdc:mga0g0e588", + "output_read_count": 24128544, + "started_at_time": "2021-10-11T02:24:27Z", + "type": "nmdc:ReadQCAnalysisActivity", + "ended_at_time": "2021-10-11T03:38:33+00:00" + } + ], + "read_based_taxonomy_analysis_activity_set": [ + { + "_id": { + "$oid": "649b009bff710ae353f8cf32" + }, + "has_input": [ + "nmdc:c082eff434fe4863c0e29c79b759d100" + ], + "git_url": "https://github.com/microbiomedata/metaG/releases/tag/0.1", + "has_output": [ + "nmdc:7e1438bf8076daf46f3d782d8f9656b4", + "nmdc:cfd63309cd38a293615ddce5e8ea6402", + "nmdc:7e353b7bfb1586773fa00b515dffe6ec", + "nmdc:6667be33e7867ca2aabfa5d663e2970a", + "nmdc:7ee0b0b21444ee06752e6b9c32f476af", + "nmdc:d3b27bed597f07ad4bb4a500ad2fb928", + "nmdc:45617f93e5f072fbad25a0308ead6c3d", + "nmdc:460e7594fcd06678df1b9c5e5075cb4d", + "nmdc:ab80fc324c9206a41a66d64227a97179" + ], + "was_informed_by": "gold:Gp0127647", + "id": "nmdc:36fa8157feb62e7d176b99031e5e41a9", + "execution_resource": "NERSC-Cori", + "name": "ReadBased Analysis Activity for nmdc:mga0g0e588", + "started_at_time": "2021-10-11T02:24:27Z", + "type": "nmdc:ReadbasedAnalysis", + "ended_at_time": "2021-10-11T03:38:33+00:00" + } + ] + }, + { + "data_object_set": [ + { + "description": "Raw sequencer read data", + "file_size_bytes": 2092289780, + "type": "nmdc:DataObject", + "id": "jgi:574fde6e7ded5e3df1ee140d", + "name": "10533.1.165310.CCTCAGT-AACTGAG.fastq.gz" + }, + { + "name": "Gp0127645_Filtered Reads", + "description": "Filtered Reads for Gp0127645", + "data_object_type": "Filtered Sequencing Reads", + "url": "https://data.microbiomedata.org/data/nmdc:mga0jbfx89/qa/nmdc_mga0jbfx89_filtered.fastq.gz", + "md5_checksum": "034df323b47f010f27e7c032d445a891", + "id": "nmdc:034df323b47f010f27e7c032d445a891", + "file_size_bytes": 1909192845 + }, + { + "name": "Gp0127645_Filtered Stats", + "description": "Filtered Stats for Gp0127645", + "data_object_type": "QC Statistics", + "url": "https://data.microbiomedata.org/data/nmdc:mga0jbfx89/qa/nmdc_mga0jbfx89_filterStats.txt", + "md5_checksum": "ca137bf5e2df6541425f22b5d1fec492", + "id": "nmdc:ca137bf5e2df6541425f22b5d1fec492", + "file_size_bytes": 283 + }, + { + "name": "Gp0127645_Gottcha2 TSV report", + "description": "Gottcha2 TSV report for Gp0127645", + "url": "https://data.microbiomedata.org/data/nmdc:mga0jbfx89/ReadbasedAnalysis/nmdc_mga0jbfx89_gottcha2_report.tsv", + "md5_checksum": "694374188ba4372344536fa26a2282b8", + "id": "nmdc:694374188ba4372344536fa26a2282b8", + "file_size_bytes": 3780 + }, + { + "name": "Gp0127645_Gottcha2 full TSV report", + "description": "Gottcha2 full TSV report for Gp0127645", + "url": "https://data.microbiomedata.org/data/nmdc:mga0jbfx89/ReadbasedAnalysis/nmdc_mga0jbfx89_gottcha2_report_full.tsv", + "md5_checksum": "e11dfa7178e8c426c7c930b57aa40377", + "id": "nmdc:e11dfa7178e8c426c7c930b57aa40377", + "file_size_bytes": 822292 + }, + { + "name": "Gp0127645_Gottcha2 Krona HTML report", + "description": "Gottcha2 Krona HTML report for Gp0127645", + "data_object_type": "GOTTCHA2 Krona Plot", + "url": "https://data.microbiomedata.org/data/nmdc:mga0jbfx89/ReadbasedAnalysis/nmdc_mga0jbfx89_gottcha2_krona.html", + "md5_checksum": "46e203465faf61780fad8f626e9ab623", + "id": "nmdc:46e203465faf61780fad8f626e9ab623", + "file_size_bytes": 236496 + }, + { + "name": "Gp0127645_Centrifuge classification TSV report", + "description": "Centrifuge classification TSV report for Gp0127645", + "data_object_type": "Centrifuge Taxonomic Classification", + "url": "https://data.microbiomedata.org/data/nmdc:mga0jbfx89/ReadbasedAnalysis/nmdc_mga0jbfx89_centrifuge_classification.tsv", + "md5_checksum": "7a6b2ded3f49663d9916eaea3e129dc7", + "id": "nmdc:7a6b2ded3f49663d9916eaea3e129dc7", + "file_size_bytes": 1699052782 + }, + { + "name": "Gp0127645_Centrifuge TSV report", + "description": "Centrifuge TSV report for Gp0127645", + "data_object_type": "Centrifuge Classification Report", + "url": "https://data.microbiomedata.org/data/nmdc:mga0jbfx89/ReadbasedAnalysis/nmdc_mga0jbfx89_centrifuge_report.tsv", + "md5_checksum": "6f8be89c7aab1c3f392b4f80c7ddf6a5", + "id": "nmdc:6f8be89c7aab1c3f392b4f80c7ddf6a5", + "file_size_bytes": 256209 + }, + { + "name": "Gp0127645_Centrifuge Krona HTML report", + "description": "Centrifuge Krona HTML report for Gp0127645", + "data_object_type": "Centrifuge Krona Plot", + "url": "https://data.microbiomedata.org/data/nmdc:mga0jbfx89/ReadbasedAnalysis/nmdc_mga0jbfx89_centrifuge_krona.html", + "md5_checksum": "4299b438a815becc8beed40fcb803e9f", + "id": "nmdc:4299b438a815becc8beed40fcb803e9f", + "file_size_bytes": 2336400 + }, + { + "name": "Gp0127645_Kraken classification TSV report", + "description": "Kraken classification TSV report for Gp0127645", + "data_object_type": "Kraken2 Taxonomic Classification", + "url": "https://data.microbiomedata.org/data/nmdc:mga0jbfx89/ReadbasedAnalysis/nmdc_mga0jbfx89_kraken2_classification.tsv", + "md5_checksum": "4ae4dbd13c7338df5c00555bc6755947", + "id": "nmdc:4ae4dbd13c7338df5c00555bc6755947", + "file_size_bytes": 1359323947 + }, + { + "name": "Gp0127645_Kraken2 TSV report", + "description": "Kraken2 TSV report for Gp0127645", + "data_object_type": "Kraken2 Classification Report", + "url": "https://data.microbiomedata.org/data/nmdc:mga0jbfx89/ReadbasedAnalysis/nmdc_mga0jbfx89_kraken2_report.tsv", + "md5_checksum": "2be07eb38d408077a55ecb48e123f7f8", + "id": "nmdc:2be07eb38d408077a55ecb48e123f7f8", + "file_size_bytes": 651624 + }, + { + "name": "Gp0127645_Kraken2 Krona HTML report", + "description": "Kraken2 Krona HTML report for Gp0127645", + "data_object_type": "Kraken2 Krona Plot", + "url": "https://data.microbiomedata.org/data/nmdc:mga0jbfx89/ReadbasedAnalysis/nmdc_mga0jbfx89_kraken2_krona.html", + "md5_checksum": "f318581f0df6e04b7ae2384f9237da06", + "id": "nmdc:f318581f0df6e04b7ae2384f9237da06", + "file_size_bytes": 3973557 + }, + { + "name": "Gp0127645_Assembled contigs fasta", + "description": "Assembled contigs fasta for Gp0127645", + "data_object_type": "Assembly Contigs", + "url": "https://data.microbiomedata.org/data/nmdc:mga0jbfx89/assembly/nmdc_mga0jbfx89_contigs.fna", + "md5_checksum": "3685fdcfffdf34d2802c692dc0515e33", + "id": "nmdc:3685fdcfffdf34d2802c692dc0515e33", + "file_size_bytes": 49479236 + }, + { + "name": "Gp0127645_Assembled scaffold fasta", + "description": "Assembled scaffold fasta for Gp0127645", + "data_object_type": "Assembly Scaffolds", + "url": "https://data.microbiomedata.org/data/nmdc:mga0jbfx89/assembly/nmdc_mga0jbfx89_scaffolds.fna", + "md5_checksum": "7891adab80c63d98169e3cb7b4331f1e", + "id": "nmdc:7891adab80c63d98169e3cb7b4331f1e", + "file_size_bytes": 49157929 + }, + { + "name": "Gp0127645_Metagenome Contig Coverage Stats", + "description": "Metagenome Contig Coverage Stats for Gp0127645", + "url": "https://data.microbiomedata.org/data/nmdc:mga0jbfx89/assembly/nmdc_mga0jbfx89_covstats.txt", + "md5_checksum": "d883460ae5f8cbabc3d437e745935040", + "id": "nmdc:d883460ae5f8cbabc3d437e745935040", + "file_size_bytes": 8394481 + }, + { + "name": "Gp0127645_Assembled AGP file", + "description": "Assembled AGP file for Gp0127645", + "data_object_type": "Assembly AGP", + "url": "https://data.microbiomedata.org/data/nmdc:mga0jbfx89/assembly/nmdc_mga0jbfx89_assembly.agp", + "md5_checksum": "f36166196caa529e09f3b93e17db3acc", + "id": "nmdc:f36166196caa529e09f3b93e17db3acc", + "file_size_bytes": 7804199 + }, + { + "name": "Gp0127645_Metagenome Alignment BAM file", + "description": "Metagenome Alignment BAM file for Gp0127645", + "data_object_type": "Assembly Coverage BAM", + "url": "https://data.microbiomedata.org/data/nmdc:mga0jbfx89/assembly/nmdc_mga0jbfx89_pairedMapped_sorted.bam", + "md5_checksum": "08a13111a5314ec4c8dbaa59790dc2f1", + "id": "nmdc:08a13111a5314ec4c8dbaa59790dc2f1", + "file_size_bytes": 2047004915 + }, + { + "name": "Gp0127645_Protein FAA", + "description": "Protein FAA for Gp0127645", + "data_object_type": "Annotation Amino Acid FASTA", + "url": "https://data.microbiomedata.org/data/nmdc:mga0jbfx89/annotation/nmdc_mga0jbfx89_proteins.faa", + "md5_checksum": "b14fecfaa99eaad42128e409aa7ae3ec", + "id": "nmdc:b14fecfaa99eaad42128e409aa7ae3ec", + "file_size_bytes": 29015561 + }, + { + "name": "Gp0127645_Structural annotation GFF file", + "description": "Structural annotation GFF file for Gp0127645", + "data_object_type": "Structural Annotation GFF", + "url": "https://data.microbiomedata.org/data/nmdc:mga0jbfx89/annotation/nmdc_mga0jbfx89_structural_annotation.gff", + "md5_checksum": "851584f7bcec80cddec4b113fe6cfcea", + "id": "nmdc:851584f7bcec80cddec4b113fe6cfcea", + "file_size_bytes": 2506 + }, + { + "name": "Gp0127645_Functional annotation GFF file", + "description": "Functional annotation GFF file for Gp0127645", + "data_object_type": "Functional Annotation GFF", + "url": "https://data.microbiomedata.org/data/nmdc:mga0jbfx89/annotation/nmdc_mga0jbfx89_functional_annotation.gff", + "md5_checksum": "d0280881c70c54946d9b5170e62b904b", + "id": "nmdc:d0280881c70c54946d9b5170e62b904b", + "file_size_bytes": 34124039 + }, + { + "name": "Gp0127645_KO TSV file", + "description": "KO TSV file for Gp0127645", + "data_object_type": "Annotation KEGG Orthology", + "url": "https://data.microbiomedata.org/data/nmdc:mga0jbfx89/annotation/nmdc_mga0jbfx89_ko.tsv", + "md5_checksum": "7c1894478af7b8205bb4760acb93c353", + "id": "nmdc:7c1894478af7b8205bb4760acb93c353", + "file_size_bytes": 3942110 + }, + { + "name": "Gp0127645_EC TSV file", + "description": "EC TSV file for Gp0127645", + "data_object_type": "Annotation Enzyme Commission", + "url": "https://data.microbiomedata.org/data/nmdc:mga0jbfx89/annotation/nmdc_mga0jbfx89_ec.tsv", + "md5_checksum": "ac413560dfdbcea1f0697391b593c552", + "id": "nmdc:ac413560dfdbcea1f0697391b593c552", + "file_size_bytes": 2691460 + }, + { + "name": "Gp0127645_COG GFF file", + "description": "COG GFF file for Gp0127645", + "url": "https://data.microbiomedata.org/data/nmdc:mga0jbfx89/annotation/nmdc_mga0jbfx89_cog.gff", + "md5_checksum": "80f846ff418e4758f4c6b9a96ba2b8ca", + "id": "nmdc:80f846ff418e4758f4c6b9a96ba2b8ca", + "file_size_bytes": 19597211 + }, + { + "name": "Gp0127645_PFAM GFF file", + "description": "PFAM GFF file for Gp0127645", + "url": "https://data.microbiomedata.org/data/nmdc:mga0jbfx89/annotation/nmdc_mga0jbfx89_pfam.gff", + "md5_checksum": "bbfcd35137b7cb018945a531704805eb", + "id": "nmdc:bbfcd35137b7cb018945a531704805eb", + "file_size_bytes": 14110039 + }, + { + "name": "Gp0127645_TigrFam GFF file", + "description": "TigrFam GFF file for Gp0127645", + "url": "https://data.microbiomedata.org/data/nmdc:mga0jbfx89/annotation/nmdc_mga0jbfx89_tigrfam.gff", + "md5_checksum": "c1c10952c472a97fb7de8bc7dbce564b", + "id": "nmdc:c1c10952c472a97fb7de8bc7dbce564b", + "file_size_bytes": 1502814 + }, + { + "name": "Gp0127645_SMART GFF file", + "description": "SMART GFF file for Gp0127645", + "url": "https://data.microbiomedata.org/data/nmdc:mga0jbfx89/annotation/nmdc_mga0jbfx89_smart.gff", + "md5_checksum": "b86dba5a29f4ca25cec7c0590e0b4771", + "id": "nmdc:b86dba5a29f4ca25cec7c0590e0b4771", + "file_size_bytes": 4354176 + }, + { + "name": "Gp0127645_SuperFam GFF file", + "description": "SuperFam GFF file for Gp0127645", + "url": "https://data.microbiomedata.org/data/nmdc:mga0jbfx89/annotation/nmdc_mga0jbfx89_supfam.gff", + "md5_checksum": "a701026580285ca67816cb9a2f272ca6", + "id": "nmdc:a701026580285ca67816cb9a2f272ca6", + "file_size_bytes": 24911282 + }, + { + "name": "Gp0127645_Cath FunFam GFF file", + "description": "Cath FunFam GFF file for Gp0127645", + "url": "https://data.microbiomedata.org/data/nmdc:mga0jbfx89/annotation/nmdc_mga0jbfx89_cath_funfam.gff", + "md5_checksum": "5ce71fa6aebdb4fb9f843e89ab53ca9b", + "id": "nmdc:5ce71fa6aebdb4fb9f843e89ab53ca9b", + "file_size_bytes": 18832113 + }, + { + "name": "Gp0127645_KO_EC GFF file", + "description": "KO_EC GFF file for Gp0127645", + "url": "https://data.microbiomedata.org/data/nmdc:mga0jbfx89/annotation/nmdc_mga0jbfx89_ko_ec.gff", + "md5_checksum": "47c0e39e60bd4d688a29ede2af2cee35", + "id": "nmdc:47c0e39e60bd4d688a29ede2af2cee35", + "file_size_bytes": 12581509 + }, + { + "name": "gold:Gp0452679_metabat2 bin checkm quality assessment result", + "description": "metabat2 bin checkm quality assessment result for gold:Gp0452679", + "data_object_type": "CheckM Statistics", + "url": "https://data.microbiomedata.org/data/nmdc:mga0fsjy84/MAGs/nmdc_mga0fsjy84_checkm_qa.out", + "md5_checksum": "d41d8cd98f00b204e9800998ecf8427e", + "id": "nmdc:d41d8cd98f00b204e9800998ecf8427e", + "file_size_bytes": 0 + }, + { + "name": "Gp0127645_tooShort (< 3kb) filtered contigs fasta file by metaBat2", + "description": "tooShort (< 3kb) filtered contigs fasta file by metaBat2 for Gp0127645", + "url": "https://data.microbiomedata.org/data/nmdc:mga0jbfx89/MAGs/nmdc_mga0jbfx89_bins.tooShort.fa", + "md5_checksum": "47d1233f5afdd7b00790ac2ca8be778a", + "id": "nmdc:47d1233f5afdd7b00790ac2ca8be778a", + "file_size_bytes": 43078346 + }, + { + "name": "Gp0127645_unbinned fasta file from metabat2", + "description": "unbinned fasta file from metabat2 for Gp0127645", + "url": "https://data.microbiomedata.org/data/nmdc:mga0jbfx89/MAGs/nmdc_mga0jbfx89_bins.unbinned.fa", + "md5_checksum": "637bc2394dcb4869149370683ccc9e61", + "id": "nmdc:637bc2394dcb4869149370683ccc9e61", + "file_size_bytes": 6153132 + }, + { + "name": "Gp0127645_metabat2 bin checkm quality assessment result", + "description": "metabat2 bin checkm quality assessment result for Gp0127645", + "data_object_type": "CheckM Statistics", + "url": "https://data.microbiomedata.org/data/nmdc:mga0jbfx89/MAGs/nmdc_mga0jbfx89_checkm_qa.out", + "md5_checksum": "a8e49a136701e388199a72f02bb6d288", + "id": "nmdc:a8e49a136701e388199a72f02bb6d288", + "file_size_bytes": 765 + }, + { + "name": "Gp0127645_high-quality and medium-quality bins", + "description": "high-quality and medium-quality bins for Gp0127645", + "data_object_type": "Metagenome Bins", + "url": "https://data.microbiomedata.org/data/nmdc:mga0jbfx89/MAGs/nmdc_mga0jbfx89_hqmq_bin.zip", + "md5_checksum": "b0d2597d04809508e9dd0bcb48c7edad", + "id": "nmdc:b0d2597d04809508e9dd0bcb48c7edad", + "file_size_bytes": 182 + }, + { + "name": "Gp0127645_metabat2 bins", + "description": "metabat2 bins for Gp0127645", + "url": "https://data.microbiomedata.org/data/nmdc:mga0jbfx89/MAGs/nmdc_mga0jbfx89_metabat_bin.zip", + "md5_checksum": "106983a66b58a2d07f0592d9379ad635", + "id": "nmdc:106983a66b58a2d07f0592d9379ad635", + "file_size_bytes": 76018 + }, + { + "_id": { + "$oid": "649b003d1ae706d7b5b14eb0" + }, + "description": "Metagenome Contig Coverage Stats for gold:Gp0127645", + "url": "https://data.microbiomedata.org/data/1781_100347/assembly/mapping_stats.txt", + "file_size_bytes": 7967021, + "type": "nmdc:DataObject", + "id": "nmdc:b9abce64459572cfb1b7ab2bed3c24f5", + "name": "mapping_stats.txt", + "data_object_type": "Assembly Coverage Stats" + }, + { + "_id": { + "$oid": "649b003d1ae706d7b5b14ebc" + }, + "description": "Assembled contigs fasta for gold:Gp0127645", + "url": "https://data.microbiomedata.org/data/1781_100347/assembly/assembly_contigs.fna", + "file_size_bytes": 49051776, + "type": "nmdc:DataObject", + "id": "nmdc:eb1d97165017b3e14d15f6407a181be3", + "name": "assembly_contigs.fna", + "data_object_type": "Assembly Contigs" + }, + { + "_id": { + "$oid": "649b003d1ae706d7b5b14ec0" + }, + "description": "Assembled scaffold fasta for gold:Gp0127645", + "url": "https://data.microbiomedata.org/data/1781_100347/assembly/assembly_scaffolds.fna", + "file_size_bytes": 48730645, + "type": "nmdc:DataObject", + "id": "nmdc:def65e725117abf461c8c182f7f56a72", + "name": "assembly_scaffolds.fna", + "data_object_type": "Assembly Scaffolds" + }, + { + "_id": { + "$oid": "649b003d1ae706d7b5b14ec3" + }, + "description": "Assembled AGP file for gold:Gp0127645", + "url": "https://data.microbiomedata.org/data/1781_100347/assembly/assembly.agp", + "file_size_bytes": 6948927, + "type": "nmdc:DataObject", + "id": "nmdc:b71d0b119b5c306cf7e692196f77ca98", + "name": "assembly.agp", + "data_object_type": "Assembly AGP" + }, + { + "_id": { + "$oid": "649b003d1ae706d7b5b14ec4" + }, + "description": "Metagenome Alignment BAM file for gold:Gp0127645", + "url": "https://data.microbiomedata.org/data/1781_100347/assembly/pairedMapped_sorted.bam", + "file_size_bytes": 2016145304, + "type": "nmdc:DataObject", + "id": "nmdc:f628c83e48578369510c07a7f81fdb56", + "name": "pairedMapped_sorted.bam", + "data_object_type": "Assembly Coverage BAM" + }, + { + "_id": { + "$oid": "649b003d1ae706d7b5b15ba5" + }, + "id": "nmdc:0a84281526e0db5a01a8cc737d2febd8", + "name": "1781_100347.krona.html", + "description": "Gold:Gp0127645 KRONA plot HTML file", + "url": "https://data.microbiomedata.org/1781_100347/ReadbasedAnalysis/centrifuge/1781_100347.krona.html", + "file_size_bytes": 3385, + "data_object_type": "Centrifuge Krona Plot", + "type": "nmdc:DataObject" + }, + { + "_id": { + "$oid": "649b003d1ae706d7b5b15bb7" + }, + "id": "nmdc:f2e4cd496ba10ca8ae09e148c1a62e05", + "name": "1781_100347.json", + "description": "Gold:Gp0127645 ReadbasedAnalysis result JSON file", + "url": "https://data.microbiomedata.org/1781_100347/ReadbasedAnalysis/1781_100347.json", + "file_size_bytes": 3385, + "type": "nmdc:DataObject" + }, + { + "_id": { + "$oid": "649b003f1ae706d7b5b16624" + }, + "id": "nmdc:5bd5972264fb1269fa8516647b3cffd9", + "name": "bins.tooShort.fa", + "description": "tooShort (< 3kb) filtered contigs fasta file by metaBat2 for gold:Gp0127645", + "file_size_bytes": 41844692, + "url": "https://data.microbiomedata.org/data/1781_100347/img_MAGs/bins.tooShort.fa", + "type": "nmdc:DataObject" + }, + { + "_id": { + "$oid": "649b003f1ae706d7b5b16625" + }, + "id": "nmdc:0e1d05cdd010f61435994457a58076cc", + "name": "bins.unbinned.fa", + "description": "unbinned fasta file from metabat2 for gold:Gp0127645", + "file_size_bytes": 6356502, + "url": "https://data.microbiomedata.org/data/1781_100347/img_MAGs/bins.unbinned.fa", + "type": "nmdc:DataObject" + }, + { + "_id": { + "$oid": "649b00401ae706d7b5b16d23" + }, + "description": "EC TSV File for gold:Gp0127645", + "url": "https://data.microbiomedata.org/1781_100347/img_annotation/Ga0482225_ec.tsv", + "md5_checksum": "17524561a0e1f2c9d9ffdebc3b2df6a8", + "file_size_bytes": 3385, + "id": "nmdc:17524561a0e1f2c9d9ffdebc3b2df6a8", + "name": "gold:Gp0127645_EC TSV File", + "data_object_type": "Annotation Enzyme Commission", + "type": "nmdc:DataObject" + }, + { + "_id": { + "$oid": "649b00401ae706d7b5b16d24" + }, + "description": "KO TSV File for gold:Gp0127645", + "url": "https://data.microbiomedata.org/1781_100347/img_annotation/Ga0482225_ko.tsv", + "md5_checksum": "e2b3ea50301aa3efaea18732ddba04f4", + "file_size_bytes": 3385, + "id": "nmdc:e2b3ea50301aa3efaea18732ddba04f4", + "name": "gold:Gp0127645_KO TSV File", + "data_object_type": "Annotation KEGG Orthology", + "type": "nmdc:DataObject" + }, + { + "_id": { + "$oid": "649b00401ae706d7b5b16d25" + }, + "description": "Functional annotation GFF file for gold:Gp0127645", + "url": "https://data.microbiomedata.org/1781_100347/img_annotation/Ga0482225_functional_annotation.gff", + "md5_checksum": "c3a8cfa76e5da83b2b24bc6a52f71952", + "file_size_bytes": 3385, + "id": "nmdc:c3a8cfa76e5da83b2b24bc6a52f71952", + "name": "gold:Gp0127645_Functional annotation GFF file", + "data_object_type": "Functional Annotation GFF", + "type": "nmdc:DataObject" + }, + { + "_id": { + "$oid": "649b00401ae706d7b5b16d26" + }, + "description": "Protein FAA for gold:Gp0127645", + "url": "https://data.microbiomedata.org/1781_100347/img_annotation/Ga0482225_proteins.faa", + "md5_checksum": "2ab0820d09b9c331ec56d7d3e20552e6", + "file_size_bytes": 3385, + "id": "nmdc:2ab0820d09b9c331ec56d7d3e20552e6", + "name": "gold:Gp0127645_Protein FAA", + "data_object_type": "Annotation Amino Acid FASTA", + "type": "nmdc:DataObject" + }, + { + "_id": { + "$oid": "649b00401ae706d7b5b16d2a" + }, + "description": "Structural annotation GFF file for gold:Gp0127645", + "url": "https://data.microbiomedata.org/1781_100347/img_annotation/Ga0482225_structural_annotation.gff", + "md5_checksum": "06280b3737fbf704d850ac68da190166", + "file_size_bytes": 3385, + "id": "nmdc:06280b3737fbf704d850ac68da190166", + "name": "gold:Gp0127645_Structural annotation GFF file", + "data_object_type": "Structural Annotation GFF", + "type": "nmdc:DataObject" + } + ], + "mags_activity_set": [ + { + "_id": { + "$oid": "649b0052ec087f6bbab34717" + }, + "has_input": [ + "nmdc:3685fdcfffdf34d2802c692dc0515e33", + "nmdc:08a13111a5314ec4c8dbaa59790dc2f1", + "nmdc:d0280881c70c54946d9b5170e62b904b" + ], + "too_short_contig_num": 102729, + "part_of": [ + "nmdc:mga0jbfx89" + ], + "binned_contig_num": 61, + "git_url": "https://github.com/microbiomedata/metaG/releases/tag/0.1", + "has_output": [ + "nmdc:d41d8cd98f00b204e9800998ecf8427e", + "nmdc:47d1233f5afdd7b00790ac2ca8be778a", + "nmdc:637bc2394dcb4869149370683ccc9e61", + "nmdc:a8e49a136701e388199a72f02bb6d288", + "nmdc:b0d2597d04809508e9dd0bcb48c7edad", + "nmdc:106983a66b58a2d07f0592d9379ad635" + ], + "was_informed_by": "gold:Gp0127645", + "input_contig_num": 106865, + "id": "nmdc:f18f00c9c36689307a6b4188b7b18e32", + "execution_resource": "NERSC-Cori", + "name": "MAGs Analysis Activity for nmdc:mga0jbfx89", + "mags_list": [ + { + "number_of_contig": 61, + "completeness": 18.77, + "bin_name": "bins.1", + "gene_count": 307, + "bin_quality": "LQ", + "gtdbtk_species": "", + "gtdbtk_order": "", + "num_16s": 0, + "gtdbtk_family": "", + "gtdbtk_domain": "", + "contamination": 0.0, + "gtdbtk_class": "", + "gtdbtk_phylum": "", + "num_5s": 0, + "num_23s": 0, + "gtdbtk_genus": "", + "num_t_rna": 7 + } + ], + "unbinned_contig_num": 4075, + "started_at_time": "2021-10-11T02:24:42Z", + "type": "nmdc:MAGsAnalysisActivity", + "ended_at_time": "2021-10-11T04:07:11+00:00" + } + ], + "metagenome_annotation_activity_set": [ + { + "_id": { + "$oid": "649b005bbf2caae0415ef9b6" + }, + "has_input": [ + "nmdc:3685fdcfffdf34d2802c692dc0515e33" + ], + "part_of": [ + "nmdc:mga0jbfx89" + ], + "git_url": "https://github.com/microbiomedata/metaG/releases/tag/0.1", + "has_output": [ + "nmdc:b14fecfaa99eaad42128e409aa7ae3ec", + "nmdc:851584f7bcec80cddec4b113fe6cfcea", + "nmdc:d0280881c70c54946d9b5170e62b904b", + "nmdc:7c1894478af7b8205bb4760acb93c353", + "nmdc:ac413560dfdbcea1f0697391b593c552", + "nmdc:80f846ff418e4758f4c6b9a96ba2b8ca", + "nmdc:bbfcd35137b7cb018945a531704805eb", + "nmdc:c1c10952c472a97fb7de8bc7dbce564b", + "nmdc:b86dba5a29f4ca25cec7c0590e0b4771", + "nmdc:a701026580285ca67816cb9a2f272ca6", + "nmdc:5ce71fa6aebdb4fb9f843e89ab53ca9b", + "nmdc:47c0e39e60bd4d688a29ede2af2cee35" + ], + "was_informed_by": "gold:Gp0127645", + "id": "nmdc:f18f00c9c36689307a6b4188b7b18e32", + "execution_resource": "NERSC-Cori", + "name": "Annotation Activity for nmdc:mga0jbfx89", + "started_at_time": "2021-10-11T02:24:42Z", + "type": "nmdc:MetagenomeAnnotationActivity", + "ended_at_time": "2021-10-11T04:07:11+00:00" + } + ], + "metagenome_assembly_set": [ + { + "_id": { + "$oid": "649b005f2ca5ee4adb139fa4" + }, + "has_input": [ + "nmdc:034df323b47f010f27e7c032d445a891" + ], + "part_of": [ + "nmdc:mga0jbfx89" + ], + "ctg_logsum": 65663, + "scaf_logsum": 65979, + "gap_pct": 0.00097, + "git_url": "https://github.com/microbiomedata/metaG/releases/tag/0.1", + "has_output": [ + "nmdc:3685fdcfffdf34d2802c692dc0515e33", + "nmdc:7891adab80c63d98169e3cb7b4331f1e", + "nmdc:d883460ae5f8cbabc3d437e745935040", + "nmdc:f36166196caa529e09f3b93e17db3acc", + "nmdc:08a13111a5314ec4c8dbaa59790dc2f1" + ], + "asm_score": 2.823, + "was_informed_by": "gold:Gp0127645", + "ctg_powsum": 6960.932, + "scaf_max": 6924, + "id": "nmdc:f18f00c9c36689307a6b4188b7b18e32", + "scaf_powsum": 6995.401, + "execution_resource": "NERSC-Cori", + "contigs": 106865, + "name": "Assembly Activity for nmdc:mga0jbfx89", + "ctg_max": 6924, + "gc_std": 0.12472, + "contig_bp": 45473855, + "gc_avg": 0.58373, + "started_at_time": "2021-10-11T02:24:42Z", + "scaf_bp": 45474295, + "type": "nmdc:MetagenomeAssembly", + "scaffolds": 106821, + "ended_at_time": "2021-10-11T04:07:11+00:00", + "ctg_l50": 395, + "ctg_l90": 284, + "ctg_n50": 33845, + "ctg_n90": 92046, + "scaf_l50": 395, + "scaf_l90": 284, + "scaf_n50": 33825, + "scaf_n90": 92004 + } + ], + "omics_processing_set": [ + { + "_id": { + "$oid": "649b009773e8249959349b5d" + }, + "id": "nmdc:omprc-11-vykcbs96", + "name": "Riverbed sediment microbial communities from areas with no vegetation in Columbia River, Washington, USA - GW-RW N1_40_50", + "description": "Riverbed sediment samples were collected from an area with no vegetation in a hyporheic zone (subsurface groundwater - surface water mixing zone)", + "has_input": [ + "nmdc:bsm-11-ffqcqd73" + ], + "has_output": [ + "jgi:574fde6e7ded5e3df1ee140d" + ], + "part_of": [ + "nmdc:sty-11-aygzgv51" + ], + "add_date": "2016-01-11", + "mod_date": "2021-06-15", + "ncbi_project_name": "Riverbed sediment microbial communities from areas with no vegetation in Columbia River, Washington, USA - GW-RW N1_40_50", + "omics_type": { + "has_raw_value": "Metagenome" + }, + "principal_investigator": { + "has_raw_value": "James Stegen" + }, + "processing_institution": "JGI", + "type": "nmdc:OmicsProcessing", + "gold_sequencing_project_identifiers": [ + "gold:Gp0127645" + ] + } + ], + "read_qc_analysis_activity_set": [ + { + "_id": { + "$oid": "649b009d6bdd4fd20273c86d" + }, + "has_input": [ + "nmdc:5e7fc22a1527c7ff74e245bbb352fa91" + ], + "part_of": [ + "nmdc:mga0jbfx89" + ], + "git_url": "https://github.com/microbiomedata/metaG/releases/tag/0.1", + "has_output": [ + "nmdc:034df323b47f010f27e7c032d445a891", + "nmdc:ca137bf5e2df6541425f22b5d1fec492" + ], + "was_informed_by": "gold:Gp0127645", + "input_read_count": 24139032, + "output_read_bases": 3475317024, + "id": "nmdc:f18f00c9c36689307a6b4188b7b18e32", + "execution_resource": "NERSC-Cori", + "input_read_bases": 3644993832, + "name": "Read QC Activity for nmdc:mga0jbfx89", + "output_read_count": 23262948, + "started_at_time": "2021-10-11T02:24:42Z", + "type": "nmdc:ReadQCAnalysisActivity", + "ended_at_time": "2021-10-11T04:07:11+00:00" + } + ], + "read_based_taxonomy_analysis_activity_set": [ + { + "_id": { + "$oid": "649b009bff710ae353f8cf43" + }, + "has_input": [ + "nmdc:034df323b47f010f27e7c032d445a891" + ], + "git_url": "https://github.com/microbiomedata/metaG/releases/tag/0.1", + "has_output": [ + "nmdc:694374188ba4372344536fa26a2282b8", + "nmdc:e11dfa7178e8c426c7c930b57aa40377", + "nmdc:46e203465faf61780fad8f626e9ab623", + "nmdc:7a6b2ded3f49663d9916eaea3e129dc7", + "nmdc:6f8be89c7aab1c3f392b4f80c7ddf6a5", + "nmdc:4299b438a815becc8beed40fcb803e9f", + "nmdc:4ae4dbd13c7338df5c00555bc6755947", + "nmdc:2be07eb38d408077a55ecb48e123f7f8", + "nmdc:f318581f0df6e04b7ae2384f9237da06" + ], + "was_informed_by": "gold:Gp0127645", + "id": "nmdc:f18f00c9c36689307a6b4188b7b18e32", + "execution_resource": "NERSC-Cori", + "name": "ReadBased Analysis Activity for nmdc:mga0jbfx89", + "started_at_time": "2021-10-11T02:24:42Z", + "type": "nmdc:ReadbasedAnalysis", + "ended_at_time": "2021-10-11T04:07:11+00:00" + } + ] + }, + { + "data_object_set": [ + { + "description": "Raw sequencer read data", + "file_size_bytes": 2196954131, + "type": "nmdc:DataObject", + "id": "jgi:574fde8c7ded5e3df1ee1424", + "name": "10533.2.165322.GAACGCT-AAGCGTT.fastq.gz" + }, + { + "name": "Gp0127649_Filtered Reads", + "description": "Filtered Reads for Gp0127649", + "data_object_type": "Filtered Sequencing Reads", + "url": "https://data.microbiomedata.org/data/nmdc:mga0j4fe07/qa/nmdc_mga0j4fe07_filtered.fastq.gz", + "md5_checksum": "ed0ea2f2ef6b667c5f8e60cd7d197cf5", + "id": "nmdc:ed0ea2f2ef6b667c5f8e60cd7d197cf5", + "file_size_bytes": 1967546513 + }, + { + "name": "Gp0127649_Filtered Stats", + "description": "Filtered Stats for Gp0127649", + "data_object_type": "QC Statistics", + "url": "https://data.microbiomedata.org/data/nmdc:mga0j4fe07/qa/nmdc_mga0j4fe07_filterStats.txt", + "md5_checksum": "25a7ff469ffae5906d6ade4d74cab88f", + "id": "nmdc:25a7ff469ffae5906d6ade4d74cab88f", + "file_size_bytes": 283 + }, + { + "name": "Gp0127649_Gottcha2 TSV report", + "description": "Gottcha2 TSV report for Gp0127649", + "url": "https://data.microbiomedata.org/data/nmdc:mga0j4fe07/ReadbasedAnalysis/nmdc_mga0j4fe07_gottcha2_report.tsv", + "md5_checksum": "c30cb5928ad608e7c8fe1ce77d81933a", + "id": "nmdc:c30cb5928ad608e7c8fe1ce77d81933a", + "file_size_bytes": 2079 + }, + { + "name": "Gp0127649_Gottcha2 full TSV report", + "description": "Gottcha2 full TSV report for Gp0127649", + "url": "https://data.microbiomedata.org/data/nmdc:mga0j4fe07/ReadbasedAnalysis/nmdc_mga0j4fe07_gottcha2_report_full.tsv", + "md5_checksum": "4aa159b1ee973c6e3e309ef60d351018", + "id": "nmdc:4aa159b1ee973c6e3e309ef60d351018", + "file_size_bytes": 642861 + }, + { + "name": "Gp0127649_Gottcha2 Krona HTML report", + "description": "Gottcha2 Krona HTML report for Gp0127649", + "data_object_type": "GOTTCHA2 Krona Plot", + "url": "https://data.microbiomedata.org/data/nmdc:mga0j4fe07/ReadbasedAnalysis/nmdc_mga0j4fe07_gottcha2_krona.html", + "md5_checksum": "8c1683fa4041bd10711aa3beb4735811", + "id": "nmdc:8c1683fa4041bd10711aa3beb4735811", + "file_size_bytes": 230792 + }, + { + "name": "Gp0127649_Centrifuge classification TSV report", + "description": "Centrifuge classification TSV report for Gp0127649", + "data_object_type": "Centrifuge Taxonomic Classification", + "url": "https://data.microbiomedata.org/data/nmdc:mga0j4fe07/ReadbasedAnalysis/nmdc_mga0j4fe07_centrifuge_classification.tsv", + "md5_checksum": "b8be7144441cbd6fbe4a8193f9e055ab", + "id": "nmdc:b8be7144441cbd6fbe4a8193f9e055ab", + "file_size_bytes": 1743695420 + }, + { + "name": "Gp0127649_Centrifuge TSV report", + "description": "Centrifuge TSV report for Gp0127649", + "data_object_type": "Centrifuge Classification Report", + "url": "https://data.microbiomedata.org/data/nmdc:mga0j4fe07/ReadbasedAnalysis/nmdc_mga0j4fe07_centrifuge_report.tsv", + "md5_checksum": "d4f57641e41f0249f3fde7b973289cf5", + "id": "nmdc:d4f57641e41f0249f3fde7b973289cf5", + "file_size_bytes": 254036 + }, + { + "name": "Gp0127649_Centrifuge Krona HTML report", + "description": "Centrifuge Krona HTML report for Gp0127649", + "data_object_type": "Centrifuge Krona Plot", + "url": "https://data.microbiomedata.org/data/nmdc:mga0j4fe07/ReadbasedAnalysis/nmdc_mga0j4fe07_centrifuge_krona.html", + "md5_checksum": "4e9ec619c5611cb0166ea127496fadeb", + "id": "nmdc:4e9ec619c5611cb0166ea127496fadeb", + "file_size_bytes": 2332943 + }, + { + "name": "Gp0127649_Kraken classification TSV report", + "description": "Kraken classification TSV report for Gp0127649", + "data_object_type": "Kraken2 Taxonomic Classification", + "url": "https://data.microbiomedata.org/data/nmdc:mga0j4fe07/ReadbasedAnalysis/nmdc_mga0j4fe07_kraken2_classification.tsv", + "md5_checksum": "ed2b2495ca211e17298ca2e212fe3811", + "id": "nmdc:ed2b2495ca211e17298ca2e212fe3811", + "file_size_bytes": 1387669799 + }, + { + "name": "Gp0127649_Kraken2 TSV report", + "description": "Kraken2 TSV report for Gp0127649", + "data_object_type": "Kraken2 Classification Report", + "url": "https://data.microbiomedata.org/data/nmdc:mga0j4fe07/ReadbasedAnalysis/nmdc_mga0j4fe07_kraken2_report.tsv", + "md5_checksum": "05d35fc4e391296ff0e716c3fcbbee89", + "id": "nmdc:05d35fc4e391296ff0e716c3fcbbee89", + "file_size_bytes": 637131 + }, + { + "name": "Gp0127649_Kraken2 Krona HTML report", + "description": "Kraken2 Krona HTML report for Gp0127649", + "data_object_type": "Kraken2 Krona Plot", + "url": "https://data.microbiomedata.org/data/nmdc:mga0j4fe07/ReadbasedAnalysis/nmdc_mga0j4fe07_kraken2_krona.html", + "md5_checksum": "0d07551972f3230ec2ef4a0e04929b97", + "id": "nmdc:0d07551972f3230ec2ef4a0e04929b97", + "file_size_bytes": 3976407 + }, + { + "name": "Gp0127649_Assembled contigs fasta", + "description": "Assembled contigs fasta for Gp0127649", + "data_object_type": "Assembly Contigs", + "url": "https://data.microbiomedata.org/data/nmdc:mga0j4fe07/assembly/nmdc_mga0j4fe07_contigs.fna", + "md5_checksum": "5ada15f24d3de4a96521532a4ced6018", + "id": "nmdc:5ada15f24d3de4a96521532a4ced6018", + "file_size_bytes": 94852732 + }, + { + "name": "Gp0127649_Assembled scaffold fasta", + "description": "Assembled scaffold fasta for Gp0127649", + "data_object_type": "Assembly Scaffolds", + "url": "https://data.microbiomedata.org/data/nmdc:mga0j4fe07/assembly/nmdc_mga0j4fe07_scaffolds.fna", + "md5_checksum": "fc32ae27239661670605b59c395dd770", + "id": "nmdc:fc32ae27239661670605b59c395dd770", + "file_size_bytes": 94280572 + }, + { + "name": "Gp0127649_Metagenome Contig Coverage Stats", + "description": "Metagenome Contig Coverage Stats for Gp0127649", + "url": "https://data.microbiomedata.org/data/nmdc:mga0j4fe07/assembly/nmdc_mga0j4fe07_covstats.txt", + "md5_checksum": "d6e996af3275c4cdd3e51376517e2b6b", + "id": "nmdc:d6e996af3275c4cdd3e51376517e2b6b", + "file_size_bytes": 15029734 + }, + { + "name": "Gp0127649_Assembled AGP file", + "description": "Assembled AGP file for Gp0127649", + "data_object_type": "Assembly AGP", + "url": "https://data.microbiomedata.org/data/nmdc:mga0j4fe07/assembly/nmdc_mga0j4fe07_assembly.agp", + "md5_checksum": "f52600933fc5a09f7cead5c065d6b100", + "id": "nmdc:f52600933fc5a09f7cead5c065d6b100", + "file_size_bytes": 14057243 + }, + { + "name": "Gp0127649_Metagenome Alignment BAM file", + "description": "Metagenome Alignment BAM file for Gp0127649", + "data_object_type": "Assembly Coverage BAM", + "url": "https://data.microbiomedata.org/data/nmdc:mga0j4fe07/assembly/nmdc_mga0j4fe07_pairedMapped_sorted.bam", + "md5_checksum": "5d9826a5f5164cfe20bfc1343144c96f", + "id": "nmdc:5d9826a5f5164cfe20bfc1343144c96f", + "file_size_bytes": 2147322298 + }, + { + "name": "Gp0127649_Protein FAA", + "description": "Protein FAA for Gp0127649", + "data_object_type": "Annotation Amino Acid FASTA", + "url": "https://data.microbiomedata.org/data/nmdc:mga0j4fe07/annotation/nmdc_mga0j4fe07_proteins.faa", + "md5_checksum": "4e5d87bb4bb3198f5b9955622a781376", + "id": "nmdc:4e5d87bb4bb3198f5b9955622a781376", + "file_size_bytes": 55301156 + }, + { + "name": "Gp0127649_Structural annotation GFF file", + "description": "Structural annotation GFF file for Gp0127649", + "data_object_type": "Structural Annotation GFF", + "url": "https://data.microbiomedata.org/data/nmdc:mga0j4fe07/annotation/nmdc_mga0j4fe07_structural_annotation.gff", + "md5_checksum": "40f79a8b021a3de27c464087fad9f092", + "id": "nmdc:40f79a8b021a3de27c464087fad9f092", + "file_size_bytes": 2518 + }, + { + "name": "Gp0127649_Functional annotation GFF file", + "description": "Functional annotation GFF file for Gp0127649", + "data_object_type": "Functional Annotation GFF", + "url": "https://data.microbiomedata.org/data/nmdc:mga0j4fe07/annotation/nmdc_mga0j4fe07_functional_annotation.gff", + "md5_checksum": "aba74592cf7aa507179e9544c008a0ec", + "id": "nmdc:aba74592cf7aa507179e9544c008a0ec", + "file_size_bytes": 63464973 + }, + { + "name": "Gp0127649_KO TSV file", + "description": "KO TSV file for Gp0127649", + "data_object_type": "Annotation KEGG Orthology", + "url": "https://data.microbiomedata.org/data/nmdc:mga0j4fe07/annotation/nmdc_mga0j4fe07_ko.tsv", + "md5_checksum": "29500fc3a86f2767cc3752ba02fa0a05", + "id": "nmdc:29500fc3a86f2767cc3752ba02fa0a05", + "file_size_bytes": 7317450 + }, + { + "name": "Gp0127649_EC TSV file", + "description": "EC TSV file for Gp0127649", + "data_object_type": "Annotation Enzyme Commission", + "url": "https://data.microbiomedata.org/data/nmdc:mga0j4fe07/annotation/nmdc_mga0j4fe07_ec.tsv", + "md5_checksum": "ba8fedc9b57d401ad0cc2b329038c5a9", + "id": "nmdc:ba8fedc9b57d401ad0cc2b329038c5a9", + "file_size_bytes": 4888576 + }, + { + "name": "Gp0127649_COG GFF file", + "description": "COG GFF file for Gp0127649", + "url": "https://data.microbiomedata.org/data/nmdc:mga0j4fe07/annotation/nmdc_mga0j4fe07_cog.gff", + "md5_checksum": "66bd5f2b62818742c6df5c39d1952a99", + "id": "nmdc:66bd5f2b62818742c6df5c39d1952a99", + "file_size_bytes": 36960882 + }, + { + "name": "Gp0127649_PFAM GFF file", + "description": "PFAM GFF file for Gp0127649", + "url": "https://data.microbiomedata.org/data/nmdc:mga0j4fe07/annotation/nmdc_mga0j4fe07_pfam.gff", + "md5_checksum": "e60c77fb34f71861ceacf988074949af", + "id": "nmdc:e60c77fb34f71861ceacf988074949af", + "file_size_bytes": 27535342 + }, + { + "name": "Gp0127649_TigrFam GFF file", + "description": "TigrFam GFF file for Gp0127649", + "url": "https://data.microbiomedata.org/data/nmdc:mga0j4fe07/annotation/nmdc_mga0j4fe07_tigrfam.gff", + "md5_checksum": "3738ab59fb56002a9f38d95b101957bd", + "id": "nmdc:3738ab59fb56002a9f38d95b101957bd", + "file_size_bytes": 2999247 + }, + { + "name": "Gp0127649_SMART GFF file", + "description": "SMART GFF file for Gp0127649", + "url": "https://data.microbiomedata.org/data/nmdc:mga0j4fe07/annotation/nmdc_mga0j4fe07_smart.gff", + "md5_checksum": "2f34c5db7846cbf37add471c0dbca951", + "id": "nmdc:2f34c5db7846cbf37add471c0dbca951", + "file_size_bytes": 8199823 + }, + { + "name": "Gp0127649_SuperFam GFF file", + "description": "SuperFam GFF file for Gp0127649", + "url": "https://data.microbiomedata.org/data/nmdc:mga0j4fe07/annotation/nmdc_mga0j4fe07_supfam.gff", + "md5_checksum": "fa7f659afca037861ae65e08092f2d83", + "id": "nmdc:fa7f659afca037861ae65e08092f2d83", + "file_size_bytes": 46114509 + }, + { + "name": "Gp0127649_Cath FunFam GFF file", + "description": "Cath FunFam GFF file for Gp0127649", + "url": "https://data.microbiomedata.org/data/nmdc:mga0j4fe07/annotation/nmdc_mga0j4fe07_cath_funfam.gff", + "md5_checksum": "9ee627031c0b425974fa1aa4d695d4ae", + "id": "nmdc:9ee627031c0b425974fa1aa4d695d4ae", + "file_size_bytes": 34807554 + }, + { + "name": "Gp0127649_KO_EC GFF file", + "description": "KO_EC GFF file for Gp0127649", + "url": "https://data.microbiomedata.org/data/nmdc:mga0j4fe07/annotation/nmdc_mga0j4fe07_ko_ec.gff", + "md5_checksum": "2fc423fd55e34d3400c9a6924df67633", + "id": "nmdc:2fc423fd55e34d3400c9a6924df67633", + "file_size_bytes": 23276630 + }, + { + "name": "gold:Gp0452679_metabat2 bin checkm quality assessment result", + "description": "metabat2 bin checkm quality assessment result for gold:Gp0452679", + "data_object_type": "CheckM Statistics", + "url": "https://data.microbiomedata.org/data/nmdc:mga0fsjy84/MAGs/nmdc_mga0fsjy84_checkm_qa.out", + "md5_checksum": "d41d8cd98f00b204e9800998ecf8427e", + "id": "nmdc:d41d8cd98f00b204e9800998ecf8427e", + "file_size_bytes": 0 + }, + { + "name": "Gp0127649_tooShort (< 3kb) filtered contigs fasta file by metaBat2", + "description": "tooShort (< 3kb) filtered contigs fasta file by metaBat2 for Gp0127649", + "url": "https://data.microbiomedata.org/data/nmdc:mga0j4fe07/MAGs/nmdc_mga0j4fe07_bins.tooShort.fa", + "md5_checksum": "f84d25fee16a4dece54f5580d893ecaa", + "id": "nmdc:f84d25fee16a4dece54f5580d893ecaa", + "file_size_bytes": 79592416 + }, + { + "name": "Gp0127649_unbinned fasta file from metabat2", + "description": "unbinned fasta file from metabat2 for Gp0127649", + "url": "https://data.microbiomedata.org/data/nmdc:mga0j4fe07/MAGs/nmdc_mga0j4fe07_bins.unbinned.fa", + "md5_checksum": "ed61fb0056b08bc82f4545c49b744c2a", + "id": "nmdc:ed61fb0056b08bc82f4545c49b744c2a", + "file_size_bytes": 14383032 + }, + { + "name": "Gp0127649_metabat2 bin checkm quality assessment result", + "description": "metabat2 bin checkm quality assessment result for Gp0127649", + "data_object_type": "CheckM Statistics", + "url": "https://data.microbiomedata.org/data/nmdc:mga0j4fe07/MAGs/nmdc_mga0j4fe07_checkm_qa.out", + "md5_checksum": "8d4bce832a16affbcc3efeb8364e8eaa", + "id": "nmdc:8d4bce832a16affbcc3efeb8364e8eaa", + "file_size_bytes": 942 + }, + { + "name": "Gp0127649_high-quality and medium-quality bins", + "description": "high-quality and medium-quality bins for Gp0127649", + "data_object_type": "Metagenome Bins", + "url": "https://data.microbiomedata.org/data/nmdc:mga0j4fe07/MAGs/nmdc_mga0j4fe07_hqmq_bin.zip", + "md5_checksum": "40273505b8b3dddd3ee5cb5c83871067", + "id": "nmdc:40273505b8b3dddd3ee5cb5c83871067", + "file_size_bytes": 182 + }, + { + "name": "Gp0127649_metabat2 bins", + "description": "metabat2 bins for Gp0127649", + "url": "https://data.microbiomedata.org/data/nmdc:mga0j4fe07/MAGs/nmdc_mga0j4fe07_metabat_bin.zip", + "md5_checksum": "b767f2b59d0fd9e650914e140cacf104", + "id": "nmdc:b767f2b59d0fd9e650914e140cacf104", + "file_size_bytes": 269239 + }, + { + "_id": { + "$oid": "649b003d1ae706d7b5b14eba" + }, + "description": "Metagenome Contig Coverage Stats for gold:Gp0127649", + "url": "https://data.microbiomedata.org/data/1781_100351/assembly/mapping_stats.txt", + "file_size_bytes": 14269698, + "type": "nmdc:DataObject", + "id": "nmdc:02f1408424cf3420cad010fe4f672f7a", + "name": "mapping_stats.txt", + "data_object_type": "Assembly Coverage Stats" + }, + { + "_id": { + "$oid": "649b003d1ae706d7b5b14ebb" + }, + "description": "Assembled contigs fasta for gold:Gp0127649", + "url": "https://data.microbiomedata.org/data/1781_100351/assembly/assembly_contigs.fna", + "file_size_bytes": 94092696, + "type": "nmdc:DataObject", + "id": "nmdc:6300cd8140abe6322e4a9c1921584476", + "name": "assembly_contigs.fna", + "data_object_type": "Assembly Contigs" + }, + { + "_id": { + "$oid": "649b003d1ae706d7b5b14ebd" + }, + "description": "Assembled scaffold fasta for gold:Gp0127649", + "url": "https://data.microbiomedata.org/data/1781_100351/assembly/assembly_scaffolds.fna", + "file_size_bytes": 93521052, + "type": "nmdc:DataObject", + "id": "nmdc:0dc5339ec9b3ea1dad516beff981255e", + "name": "assembly_scaffolds.fna", + "data_object_type": "Assembly Scaffolds" + }, + { + "_id": { + "$oid": "649b003d1ae706d7b5b14ec7" + }, + "description": "Assembled AGP file for gold:Gp0127649", + "url": "https://data.microbiomedata.org/data/1781_100351/assembly/assembly.agp", + "file_size_bytes": 12536139, + "type": "nmdc:DataObject", + "id": "nmdc:8477d852446a073d2d2de6b146b2921b", + "name": "assembly.agp", + "data_object_type": "Assembly AGP" + }, + { + "_id": { + "$oid": "649b003d1ae706d7b5b14ec8" + }, + "description": "Metagenome Alignment BAM file for gold:Gp0127649", + "url": "https://data.microbiomedata.org/data/1781_100351/assembly/pairedMapped_sorted.bam", + "file_size_bytes": 2118848396, + "type": "nmdc:DataObject", + "id": "nmdc:b486014481aba75b91177c9cc3a9cf7b", + "name": "pairedMapped_sorted.bam", + "data_object_type": "Assembly Coverage BAM" + }, + { + "_id": { + "$oid": "649b003d1ae706d7b5b15bc7" + }, + "id": "nmdc:082242b653e5a0803121d043375f93a3", + "name": "1781_100351.krona.html", + "description": "Gold:Gp0127649 KRONA plot HTML file", + "url": "https://data.microbiomedata.org/1781_100351/ReadbasedAnalysis/centrifuge/1781_100351.krona.html", + "file_size_bytes": 3385, + "data_object_type": "Centrifuge Krona Plot", + "type": "nmdc:DataObject" + }, + { + "_id": { + "$oid": "649b003d1ae706d7b5b15bd1" + }, + "id": "nmdc:b7078aa8cc8165e23978ece3312e9192", + "name": "1781_100351.json", + "description": "Gold:Gp0127649 ReadbasedAnalysis result JSON file", + "url": "https://data.microbiomedata.org/1781_100351/ReadbasedAnalysis/1781_100351.json", + "file_size_bytes": 3385, + "type": "nmdc:DataObject" + }, + { + "_id": { + "$oid": "649b003f1ae706d7b5b16635" + }, + "id": "nmdc:a0108431c6c1fcfc7cec6830ef96dcb9", + "name": "checkm_qa.out", + "description": "metabat2 bin checkm quality assessment result for gold:Gp0127649", + "file_size_bytes": 775, + "url": "https://data.microbiomedata.org/data/1781_100351/img_MAGs/checkm_qa.out", + "type": "nmdc:DataObject", + "data_object_type": "CheckM Statistics" + }, + { + "_id": { + "$oid": "649b003f1ae706d7b5b16639" + }, + "id": "nmdc:0c1c4f73f64b5651a20d421225d67f24", + "name": "bins.tooShort.fa", + "description": "tooShort (< 3kb) filtered contigs fasta file by metaBat2 for gold:Gp0127649", + "file_size_bytes": 77337130, + "url": "https://data.microbiomedata.org/data/1781_100351/img_MAGs/bins.tooShort.fa", + "type": "nmdc:DataObject" + }, + { + "_id": { + "$oid": "649b003f1ae706d7b5b1663a" + }, + "id": "nmdc:d11e901143547313fd0037177b5555ae", + "name": "gold:Gp0127649.bins.1.fa", + "description": "metabat2 binned contig file for gold:Gp0127649", + "file_size_bytes": 265082, + "url": "https://data.microbiomedata.org/data/1781_100351/img_MAGs/metabat-bins/bins.1.fa", + "type": "nmdc:DataObject", + "data_object_type": "Metagenome Bins" + }, + { + "_id": { + "$oid": "649b003f1ae706d7b5b1663b" + }, + "id": "nmdc:680731abdb97ba4977d33afbbc0b0c61", + "name": "bins.unbinned.fa", + "description": "unbinned fasta file from metabat2 for gold:Gp0127649", + "file_size_bytes": 14891738, + "url": "https://data.microbiomedata.org/data/1781_100351/img_MAGs/bins.unbinned.fa", + "type": "nmdc:DataObject" + }, + { + "_id": { + "$oid": "649b00401ae706d7b5b16d3e" + }, + "description": "KO TSV File for gold:Gp0127649", + "url": "https://data.microbiomedata.org/1781_100351/img_annotation/Ga0482221_ko.tsv", + "md5_checksum": "35c0fd91c2225f595df469b61ba9578b", + "file_size_bytes": 3385, + "id": "nmdc:35c0fd91c2225f595df469b61ba9578b", + "name": "gold:Gp0127649_KO TSV File", + "data_object_type": "Annotation KEGG Orthology", + "type": "nmdc:DataObject" + }, + { + "_id": { + "$oid": "649b00401ae706d7b5b16d3f" + }, + "description": "Structural annotation GFF file for gold:Gp0127649", + "url": "https://data.microbiomedata.org/1781_100351/img_annotation/Ga0482221_structural_annotation.gff", + "md5_checksum": "cff0a71781a84c7096ee79b39c3336f8", + "file_size_bytes": 3385, + "id": "nmdc:cff0a71781a84c7096ee79b39c3336f8", + "name": "gold:Gp0127649_Structural annotation GFF file", + "data_object_type": "Structural Annotation GFF", + "type": "nmdc:DataObject" + }, + { + "_id": { + "$oid": "649b00401ae706d7b5b16d40" + }, + "description": "EC TSV File for gold:Gp0127649", + "url": "https://data.microbiomedata.org/1781_100351/img_annotation/Ga0482221_ec.tsv", + "md5_checksum": "a14b836f963c0f6b02a70f0fc8cd40c0", + "file_size_bytes": 3385, + "id": "nmdc:a14b836f963c0f6b02a70f0fc8cd40c0", + "name": "gold:Gp0127649_EC TSV File", + "data_object_type": "Annotation Enzyme Commission", + "type": "nmdc:DataObject" + }, + { + "_id": { + "$oid": "649b00401ae706d7b5b16d41" + }, + "description": "Functional annotation GFF file for gold:Gp0127649", + "url": "https://data.microbiomedata.org/1781_100351/img_annotation/Ga0482221_functional_annotation.gff", + "md5_checksum": "a022fd9c3254ad5dc6ae5be40cd35c0b", + "file_size_bytes": 3385, + "id": "nmdc:a022fd9c3254ad5dc6ae5be40cd35c0b", + "name": "gold:Gp0127649_Functional annotation GFF file", + "data_object_type": "Functional Annotation GFF", + "type": "nmdc:DataObject" + }, + { + "_id": { + "$oid": "649b00401ae706d7b5b16d42" + }, + "description": "Protein FAA for gold:Gp0127649", + "url": "https://data.microbiomedata.org/1781_100351/img_annotation/Ga0482221_proteins.faa", + "md5_checksum": "56c3ac34fb2f1c2ba7bcd9bd56be731a", + "file_size_bytes": 3385, + "id": "nmdc:56c3ac34fb2f1c2ba7bcd9bd56be731a", + "name": "gold:Gp0127649_Protein FAA", + "data_object_type": "Annotation Amino Acid FASTA", + "type": "nmdc:DataObject" + } + ], + "mags_activity_set": [ + { + "_id": { + "$oid": "649b0052ec087f6bbab34729" + }, + "has_input": [ + "nmdc:5ada15f24d3de4a96521532a4ced6018", + "nmdc:5d9826a5f5164cfe20bfc1343144c96f", + "nmdc:aba74592cf7aa507179e9544c008a0ec" + ], + "too_short_contig_num": 180499, + "part_of": [ + "nmdc:mga0j4fe07" + ], + "binned_contig_num": 211, + "git_url": "https://github.com/microbiomedata/metaG/releases/tag/0.1", + "has_output": [ + "nmdc:d41d8cd98f00b204e9800998ecf8427e", + "nmdc:f84d25fee16a4dece54f5580d893ecaa", + "nmdc:ed61fb0056b08bc82f4545c49b744c2a", + "nmdc:8d4bce832a16affbcc3efeb8364e8eaa", + "nmdc:40273505b8b3dddd3ee5cb5c83871067", + "nmdc:b767f2b59d0fd9e650914e140cacf104" + ], + "was_informed_by": "gold:Gp0127649", + "input_contig_num": 190009, + "id": "nmdc:74b40db9b9eef8519a02f49fe6034be5", + "execution_resource": "NERSC-Cori", + "name": "MAGs Analysis Activity for nmdc:mga0j4fe07", + "mags_list": [ + { + "number_of_contig": 64, + "completeness": 16.46, + "bin_name": "bins.1", + "gene_count": 305, + "bin_quality": "LQ", + "gtdbtk_species": "", + "gtdbtk_order": "", + "num_16s": 0, + "gtdbtk_family": "", + "gtdbtk_domain": "", + "contamination": 0.47, + "gtdbtk_class": "", + "gtdbtk_phylum": "", + "num_5s": 0, + "num_23s": 0, + "gtdbtk_genus": "", + "num_t_rna": 4 + }, + { + "number_of_contig": 147, + "completeness": 19.16, + "bin_name": "bins.2", + "gene_count": 744, + "bin_quality": "LQ", + "gtdbtk_species": "", + "gtdbtk_order": "", + "num_16s": 0, + "gtdbtk_family": "", + "gtdbtk_domain": "", + "contamination": 0.0, + "gtdbtk_class": "", + "gtdbtk_phylum": "", + "num_5s": 0, + "num_23s": 0, + "gtdbtk_genus": "", + "num_t_rna": 15 + } + ], + "unbinned_contig_num": 9299, + "started_at_time": "2021-10-11T02:23:29Z", + "type": "nmdc:MAGsAnalysisActivity", + "ended_at_time": "2021-10-11T03:38:32+00:00" + } + ], + "metagenome_annotation_activity_set": [ + { + "_id": { + "$oid": "649b005bbf2caae0415ef9ca" + }, + "has_input": [ + "nmdc:5ada15f24d3de4a96521532a4ced6018" + ], + "part_of": [ + "nmdc:mga0j4fe07" + ], + "git_url": "https://github.com/microbiomedata/metaG/releases/tag/0.1", + "has_output": [ + "nmdc:4e5d87bb4bb3198f5b9955622a781376", + "nmdc:40f79a8b021a3de27c464087fad9f092", + "nmdc:aba74592cf7aa507179e9544c008a0ec", + "nmdc:29500fc3a86f2767cc3752ba02fa0a05", + "nmdc:ba8fedc9b57d401ad0cc2b329038c5a9", + "nmdc:66bd5f2b62818742c6df5c39d1952a99", + "nmdc:e60c77fb34f71861ceacf988074949af", + "nmdc:3738ab59fb56002a9f38d95b101957bd", + "nmdc:2f34c5db7846cbf37add471c0dbca951", + "nmdc:fa7f659afca037861ae65e08092f2d83", + "nmdc:9ee627031c0b425974fa1aa4d695d4ae", + "nmdc:2fc423fd55e34d3400c9a6924df67633" + ], + "was_informed_by": "gold:Gp0127649", + "id": "nmdc:74b40db9b9eef8519a02f49fe6034be5", + "execution_resource": "NERSC-Cori", + "name": "Annotation Activity for nmdc:mga0j4fe07", + "started_at_time": "2021-10-11T02:23:29Z", + "type": "nmdc:MetagenomeAnnotationActivity", + "ended_at_time": "2021-10-11T03:38:32+00:00" + } + ], + "metagenome_assembly_set": [ + { + "_id": { + "$oid": "649b005f2ca5ee4adb139fb8" + }, + "has_input": [ + "nmdc:ed0ea2f2ef6b667c5f8e60cd7d197cf5" + ], + "part_of": [ + "nmdc:mga0j4fe07" + ], + "ctg_logsum": 157844, + "scaf_logsum": 158661, + "gap_pct": 0.00147, + "git_url": "https://github.com/microbiomedata/metaG/releases/tag/0.1", + "has_output": [ + "nmdc:5ada15f24d3de4a96521532a4ced6018", + "nmdc:fc32ae27239661670605b59c395dd770", + "nmdc:d6e996af3275c4cdd3e51376517e2b6b", + "nmdc:f52600933fc5a09f7cead5c065d6b100", + "nmdc:5d9826a5f5164cfe20bfc1343144c96f" + ], + "asm_score": 3.279, + "was_informed_by": "gold:Gp0127649", + "ctg_powsum": 16877, + "scaf_max": 28201, + "id": "nmdc:74b40db9b9eef8519a02f49fe6034be5", + "scaf_powsum": 16967, + "execution_resource": "NERSC-Cori", + "contigs": 190009, + "name": "Assembly Activity for nmdc:mga0j4fe07", + "ctg_max": 28201, + "gc_std": 0.09385, + "contig_bp": 87528185, + "gc_avg": 0.62766, + "started_at_time": "2021-10-11T02:23:29Z", + "scaf_bp": 87529475, + "type": "nmdc:MetagenomeAssembly", + "scaffolds": 189880, + "ended_at_time": "2021-10-11T03:38:32+00:00", + "ctg_l50": 440, + "ctg_l90": 289, + "ctg_n50": 57445, + "ctg_n90": 160942, + "scaf_l50": 440, + "scaf_l90": 289, + "scaf_n50": 57416, + "scaf_n90": 160823 + } + ], + "omics_processing_set": [ + { + "_id": { + "$oid": "649b009773e8249959349b5e" + }, + "id": "nmdc:omprc-11-dw7shd52", + "name": "Riverbed sediment microbial communities from areas with vegetation nearby in Columbia River, Washington, USA - GW-RW S2_50_60", + "description": "Riverbed sediment samples were collected from an area with a lot of surrounding vegetation in a hyporheic zone (subsurface groundwater - surface water mixing zone)", + "has_input": [ + "nmdc:bsm-11-7fedgs13" + ], + "has_output": [ + "jgi:574fde8c7ded5e3df1ee1424" + ], + "part_of": [ + "nmdc:sty-11-aygzgv51" + ], + "add_date": "2016-01-11", + "mod_date": "2021-06-15", + "ncbi_project_name": "Riverbed sediment microbial communities from areas with vegetation nearby in Columbia River, Washington, USA - GW-RW S2_50_60", + "omics_type": { + "has_raw_value": "Metagenome" + }, + "principal_investigator": { + "has_raw_value": "James Stegen" + }, + "processing_institution": "JGI", + "type": "nmdc:OmicsProcessing", + "gold_sequencing_project_identifiers": [ + "gold:Gp0127649" + ] + } + ], + "read_qc_analysis_activity_set": [ + { + "_id": { + "$oid": "649b009d6bdd4fd20273c885" + }, + "has_input": [ + "nmdc:5895de3040f750a5ce1b5238158fd51c" + ], + "part_of": [ + "nmdc:mga0j4fe07" + ], + "git_url": "https://github.com/microbiomedata/metaG/releases/tag/0.1", + "has_output": [ + "nmdc:ed0ea2f2ef6b667c5f8e60cd7d197cf5", + "nmdc:25a7ff469ffae5906d6ade4d74cab88f" + ], + "was_informed_by": "gold:Gp0127649", + "input_read_count": 24889788, + "output_read_bases": 3558782964, + "id": "nmdc:74b40db9b9eef8519a02f49fe6034be5", + "execution_resource": "NERSC-Cori", + "input_read_bases": 3758357988, + "name": "Read QC Activity for nmdc:mga0j4fe07", + "output_read_count": 23803802, + "started_at_time": "2021-10-11T02:23:29Z", + "type": "nmdc:ReadQCAnalysisActivity", + "ended_at_time": "2021-10-11T03:38:32+00:00" + } + ], + "read_based_taxonomy_analysis_activity_set": [ + { + "_id": { + "$oid": "649b009bff710ae353f8cf48" + }, + "has_input": [ + "nmdc:ed0ea2f2ef6b667c5f8e60cd7d197cf5" + ], + "git_url": "https://github.com/microbiomedata/metaG/releases/tag/0.1", + "has_output": [ + "nmdc:c30cb5928ad608e7c8fe1ce77d81933a", + "nmdc:4aa159b1ee973c6e3e309ef60d351018", + "nmdc:8c1683fa4041bd10711aa3beb4735811", + "nmdc:b8be7144441cbd6fbe4a8193f9e055ab", + "nmdc:d4f57641e41f0249f3fde7b973289cf5", + "nmdc:4e9ec619c5611cb0166ea127496fadeb", + "nmdc:ed2b2495ca211e17298ca2e212fe3811", + "nmdc:05d35fc4e391296ff0e716c3fcbbee89", + "nmdc:0d07551972f3230ec2ef4a0e04929b97" + ], + "was_informed_by": "gold:Gp0127649", + "id": "nmdc:74b40db9b9eef8519a02f49fe6034be5", + "execution_resource": "NERSC-Cori", + "name": "ReadBased Analysis Activity for nmdc:mga0j4fe07", + "started_at_time": "2021-10-11T02:23:29Z", + "type": "nmdc:ReadbasedAnalysis", + "ended_at_time": "2021-10-11T03:38:32+00:00" + } + ] + }, + { + "data_object_set": [ + { + "description": "Raw sequencer read data", + "file_size_bytes": 2318220660, + "type": "nmdc:DataObject", + "id": "jgi:574fe0af7ded5e3df1ee1493", + "name": "10533.3.165334.CAATCGA-GTCGATT.fastq.gz" + }, + { + "name": "Gp0127652_Filtered Reads", + "description": "Filtered Reads for Gp0127652", + "data_object_type": "Filtered Sequencing Reads", + "url": "https://data.microbiomedata.org/data/nmdc:mga0mfxf90/qa/nmdc_mga0mfxf90_filtered.fastq.gz", + "md5_checksum": "60f03b815160b29125c2bd0776a330bf", + "id": "nmdc:60f03b815160b29125c2bd0776a330bf", + "file_size_bytes": 2019434951 + }, + { + "name": "Gp0127652_Filtered Stats", + "description": "Filtered Stats for Gp0127652", + "data_object_type": "QC Statistics", + "url": "https://data.microbiomedata.org/data/nmdc:mga0mfxf90/qa/nmdc_mga0mfxf90_filterStats.txt", + "md5_checksum": "c40fa552711f6b19130b2a559f2d4cdc", + "id": "nmdc:c40fa552711f6b19130b2a559f2d4cdc", + "file_size_bytes": 290 + }, + { + "name": "Gp0127652_Gottcha2 TSV report", + "description": "Gottcha2 TSV report for Gp0127652", + "url": "https://data.microbiomedata.org/data/nmdc:mga0mfxf90/ReadbasedAnalysis/nmdc_mga0mfxf90_gottcha2_report.tsv", + "md5_checksum": "70f29a321c925cfc0e2003515f708400", + "id": "nmdc:70f29a321c925cfc0e2003515f708400", + "file_size_bytes": 1524 + }, + { + "name": "Gp0127652_Gottcha2 full TSV report", + "description": "Gottcha2 full TSV report for Gp0127652", + "url": "https://data.microbiomedata.org/data/nmdc:mga0mfxf90/ReadbasedAnalysis/nmdc_mga0mfxf90_gottcha2_report_full.tsv", + "md5_checksum": "93d5419c0b31e0696ab8ffef477945fb", + "id": "nmdc:93d5419c0b31e0696ab8ffef477945fb", + "file_size_bytes": 670250 + }, + { + "name": "Gp0127652_Gottcha2 Krona HTML report", + "description": "Gottcha2 Krona HTML report for Gp0127652", + "data_object_type": "GOTTCHA2 Krona Plot", + "url": "https://data.microbiomedata.org/data/nmdc:mga0mfxf90/ReadbasedAnalysis/nmdc_mga0mfxf90_gottcha2_krona.html", + "md5_checksum": "9cd3b2939adabd809741ae6a84260266", + "id": "nmdc:9cd3b2939adabd809741ae6a84260266", + "file_size_bytes": 229949 + }, + { + "name": "Gp0127652_Centrifuge classification TSV report", + "description": "Centrifuge classification TSV report for Gp0127652", + "data_object_type": "Centrifuge Taxonomic Classification", + "url": "https://data.microbiomedata.org/data/nmdc:mga0mfxf90/ReadbasedAnalysis/nmdc_mga0mfxf90_centrifuge_classification.tsv", + "md5_checksum": "acea91fced8993a40cf1eb9cda29c4cd", + "id": "nmdc:acea91fced8993a40cf1eb9cda29c4cd", + "file_size_bytes": 1814515284 + }, + { + "name": "Gp0127652_Centrifuge TSV report", + "description": "Centrifuge TSV report for Gp0127652", + "data_object_type": "Centrifuge Classification Report", + "url": "https://data.microbiomedata.org/data/nmdc:mga0mfxf90/ReadbasedAnalysis/nmdc_mga0mfxf90_centrifuge_report.tsv", + "md5_checksum": "b623a0d3bdff34fb97530c74bb558aeb", + "id": "nmdc:b623a0d3bdff34fb97530c74bb558aeb", + "file_size_bytes": 253730 + }, + { + "name": "Gp0127652_Centrifuge Krona HTML report", + "description": "Centrifuge Krona HTML report for Gp0127652", + "data_object_type": "Centrifuge Krona Plot", + "url": "https://data.microbiomedata.org/data/nmdc:mga0mfxf90/ReadbasedAnalysis/nmdc_mga0mfxf90_centrifuge_krona.html", + "md5_checksum": "e461b2e81a22514fcd691caeaa7b0ca1", + "id": "nmdc:e461b2e81a22514fcd691caeaa7b0ca1", + "file_size_bytes": 2330558 + }, + { + "name": "Gp0127652_Kraken classification TSV report", + "description": "Kraken classification TSV report for Gp0127652", + "data_object_type": "Kraken2 Taxonomic Classification", + "url": "https://data.microbiomedata.org/data/nmdc:mga0mfxf90/ReadbasedAnalysis/nmdc_mga0mfxf90_kraken2_classification.tsv", + "md5_checksum": "38b7c63d0157f8bf4316f4295f0e6e28", + "id": "nmdc:38b7c63d0157f8bf4316f4295f0e6e28", + "file_size_bytes": 1445957300 + }, + { + "name": "Gp0127652_Kraken2 TSV report", + "description": "Kraken2 TSV report for Gp0127652", + "data_object_type": "Kraken2 Classification Report", + "url": "https://data.microbiomedata.org/data/nmdc:mga0mfxf90/ReadbasedAnalysis/nmdc_mga0mfxf90_kraken2_report.tsv", + "md5_checksum": "be0c2bc71cefcb0f0a23d270d047f30c", + "id": "nmdc:be0c2bc71cefcb0f0a23d270d047f30c", + "file_size_bytes": 639677 + }, + { + "name": "Gp0127652_Kraken2 Krona HTML report", + "description": "Kraken2 Krona HTML report for Gp0127652", + "data_object_type": "Kraken2 Krona Plot", + "url": "https://data.microbiomedata.org/data/nmdc:mga0mfxf90/ReadbasedAnalysis/nmdc_mga0mfxf90_kraken2_krona.html", + "md5_checksum": "1df4b479c887b43319d89cc80dc35239", + "id": "nmdc:1df4b479c887b43319d89cc80dc35239", + "file_size_bytes": 3991377 + }, + { + "name": "Gp0127652_Assembled contigs fasta", + "description": "Assembled contigs fasta for Gp0127652", + "data_object_type": "Assembly Contigs", + "url": "https://data.microbiomedata.org/data/nmdc:mga0mfxf90/assembly/nmdc_mga0mfxf90_contigs.fna", + "md5_checksum": "a550eb6e614b375c1089ab816163ea63", + "id": "nmdc:a550eb6e614b375c1089ab816163ea63", + "file_size_bytes": 117075841 + }, + { + "name": "Gp0127652_Assembled scaffold fasta", + "description": "Assembled scaffold fasta for Gp0127652", + "data_object_type": "Assembly Scaffolds", + "url": "https://data.microbiomedata.org/data/nmdc:mga0mfxf90/assembly/nmdc_mga0mfxf90_scaffolds.fna", + "md5_checksum": "9f194d271c352af3f68f2afeb1dbd499", + "id": "nmdc:9f194d271c352af3f68f2afeb1dbd499", + "file_size_bytes": 116423675 + }, + { + "name": "Gp0127652_Metagenome Contig Coverage Stats", + "description": "Metagenome Contig Coverage Stats for Gp0127652", + "url": "https://data.microbiomedata.org/data/nmdc:mga0mfxf90/assembly/nmdc_mga0mfxf90_covstats.txt", + "md5_checksum": "b0a79069110825cfe5525a8fc4f02cb6", + "id": "nmdc:b0a79069110825cfe5525a8fc4f02cb6", + "file_size_bytes": 17141637 + }, + { + "name": "Gp0127652_Assembled AGP file", + "description": "Assembled AGP file for Gp0127652", + "data_object_type": "Assembly AGP", + "url": "https://data.microbiomedata.org/data/nmdc:mga0mfxf90/assembly/nmdc_mga0mfxf90_assembly.agp", + "md5_checksum": "f54e8bda482b1cb8bc8e121ee5f39e07", + "id": "nmdc:f54e8bda482b1cb8bc8e121ee5f39e07", + "file_size_bytes": 16044279 + }, + { + "name": "Gp0127652_Metagenome Alignment BAM file", + "description": "Metagenome Alignment BAM file for Gp0127652", + "data_object_type": "Assembly Coverage BAM", + "url": "https://data.microbiomedata.org/data/nmdc:mga0mfxf90/assembly/nmdc_mga0mfxf90_pairedMapped_sorted.bam", + "md5_checksum": "c8c5056ee57126695073137d0c1d3d04", + "id": "nmdc:c8c5056ee57126695073137d0c1d3d04", + "file_size_bytes": 2224050507 + }, + { + "name": "Gp0127652_Protein FAA", + "description": "Protein FAA for Gp0127652", + "data_object_type": "Annotation Amino Acid FASTA", + "url": "https://data.microbiomedata.org/data/nmdc:mga0mfxf90/annotation/nmdc_mga0mfxf90_proteins.faa", + "md5_checksum": "096c54bce5ec1cc5d41ac64553e42cb3", + "id": "nmdc:096c54bce5ec1cc5d41ac64553e42cb3", + "file_size_bytes": 66555768 + }, + { + "name": "Gp0127652_Structural annotation GFF file", + "description": "Structural annotation GFF file for Gp0127652", + "data_object_type": "Structural Annotation GFF", + "url": "https://data.microbiomedata.org/data/nmdc:mga0mfxf90/annotation/nmdc_mga0mfxf90_structural_annotation.gff", + "md5_checksum": "ac8cd253a39e6e5fe0a0930f3bf6888a", + "id": "nmdc:ac8cd253a39e6e5fe0a0930f3bf6888a", + "file_size_bytes": 2521 + }, + { + "name": "Gp0127652_Functional annotation GFF file", + "description": "Functional annotation GFF file for Gp0127652", + "data_object_type": "Functional Annotation GFF", + "url": "https://data.microbiomedata.org/data/nmdc:mga0mfxf90/annotation/nmdc_mga0mfxf90_functional_annotation.gff", + "md5_checksum": "863dc502676573c59ce69b1ff786042a", + "id": "nmdc:863dc502676573c59ce69b1ff786042a", + "file_size_bytes": 74520486 + }, + { + "name": "Gp0127652_KO TSV file", + "description": "KO TSV file for Gp0127652", + "data_object_type": "Annotation KEGG Orthology", + "url": "https://data.microbiomedata.org/data/nmdc:mga0mfxf90/annotation/nmdc_mga0mfxf90_ko.tsv", + "md5_checksum": "28ed2a9e345d0e542127fd1dc2173ae7", + "id": "nmdc:28ed2a9e345d0e542127fd1dc2173ae7", + "file_size_bytes": 8379185 + }, + { + "name": "Gp0127652_EC TSV file", + "description": "EC TSV file for Gp0127652", + "data_object_type": "Annotation Enzyme Commission", + "url": "https://data.microbiomedata.org/data/nmdc:mga0mfxf90/annotation/nmdc_mga0mfxf90_ec.tsv", + "md5_checksum": "a826d96e791f69ff7759d57f44a8a510", + "id": "nmdc:a826d96e791f69ff7759d57f44a8a510", + "file_size_bytes": 5555311 + }, + { + "name": "Gp0127652_COG GFF file", + "description": "COG GFF file for Gp0127652", + "url": "https://data.microbiomedata.org/data/nmdc:mga0mfxf90/annotation/nmdc_mga0mfxf90_cog.gff", + "md5_checksum": "58e310990be01a574eef05b3f5dd1495", + "id": "nmdc:58e310990be01a574eef05b3f5dd1495", + "file_size_bytes": 43385646 + }, + { + "name": "Gp0127652_PFAM GFF file", + "description": "PFAM GFF file for Gp0127652", + "url": "https://data.microbiomedata.org/data/nmdc:mga0mfxf90/annotation/nmdc_mga0mfxf90_pfam.gff", + "md5_checksum": "28ce5c4c605a1c4538ce63987252c0ad", + "id": "nmdc:28ce5c4c605a1c4538ce63987252c0ad", + "file_size_bytes": 33061709 + }, + { + "name": "Gp0127652_TigrFam GFF file", + "description": "TigrFam GFF file for Gp0127652", + "url": "https://data.microbiomedata.org/data/nmdc:mga0mfxf90/annotation/nmdc_mga0mfxf90_tigrfam.gff", + "md5_checksum": "6de9ddf0b07c9bcf1409aceb7ee2f941", + "id": "nmdc:6de9ddf0b07c9bcf1409aceb7ee2f941", + "file_size_bytes": 3665042 + }, + { + "name": "Gp0127652_SMART GFF file", + "description": "SMART GFF file for Gp0127652", + "url": "https://data.microbiomedata.org/data/nmdc:mga0mfxf90/annotation/nmdc_mga0mfxf90_smart.gff", + "md5_checksum": "6342c9c98e297d2e39a2144c7ca0191b", + "id": "nmdc:6342c9c98e297d2e39a2144c7ca0191b", + "file_size_bytes": 9667737 + }, + { + "name": "Gp0127652_SuperFam GFF file", + "description": "SuperFam GFF file for Gp0127652", + "url": "https://data.microbiomedata.org/data/nmdc:mga0mfxf90/annotation/nmdc_mga0mfxf90_supfam.gff", + "md5_checksum": "d20aa781d3ad6b0face7cc9c412bc3f7", + "id": "nmdc:d20aa781d3ad6b0face7cc9c412bc3f7", + "file_size_bytes": 54593577 + }, + { + "name": "Gp0127652_Cath FunFam GFF file", + "description": "Cath FunFam GFF file for Gp0127652", + "url": "https://data.microbiomedata.org/data/nmdc:mga0mfxf90/annotation/nmdc_mga0mfxf90_cath_funfam.gff", + "md5_checksum": "db2e4b8f6cc1e8dc934e14b93589805a", + "id": "nmdc:db2e4b8f6cc1e8dc934e14b93589805a", + "file_size_bytes": 41409254 + }, + { + "name": "Gp0127652_KO_EC GFF file", + "description": "KO_EC GFF file for Gp0127652", + "url": "https://data.microbiomedata.org/data/nmdc:mga0mfxf90/annotation/nmdc_mga0mfxf90_ko_ec.gff", + "md5_checksum": "f51f9d679d1b045f4ebc61dab7fc2f08", + "id": "nmdc:f51f9d679d1b045f4ebc61dab7fc2f08", + "file_size_bytes": 26617726 + }, + { + "name": "gold:Gp0452679_metabat2 bin checkm quality assessment result", + "description": "metabat2 bin checkm quality assessment result for gold:Gp0452679", + "data_object_type": "CheckM Statistics", + "url": "https://data.microbiomedata.org/data/nmdc:mga0fsjy84/MAGs/nmdc_mga0fsjy84_checkm_qa.out", + "md5_checksum": "d41d8cd98f00b204e9800998ecf8427e", + "id": "nmdc:d41d8cd98f00b204e9800998ecf8427e", + "file_size_bytes": 0 + }, + { + "name": "Gp0127652_tooShort (< 3kb) filtered contigs fasta file by metaBat2", + "description": "tooShort (< 3kb) filtered contigs fasta file by metaBat2 for Gp0127652", + "url": "https://data.microbiomedata.org/data/nmdc:mga0mfxf90/MAGs/nmdc_mga0mfxf90_bins.tooShort.fa", + "md5_checksum": "4371932b5834f2deadb2fbfc42b056f7", + "id": "nmdc:4371932b5834f2deadb2fbfc42b056f7", + "file_size_bytes": 89154072 + }, + { + "name": "Gp0127652_unbinned fasta file from metabat2", + "description": "unbinned fasta file from metabat2 for Gp0127652", + "url": "https://data.microbiomedata.org/data/nmdc:mga0mfxf90/MAGs/nmdc_mga0mfxf90_bins.unbinned.fa", + "md5_checksum": "5a8d8441e6e472837809ee31d517d32a", + "id": "nmdc:5a8d8441e6e472837809ee31d517d32a", + "file_size_bytes": 24514353 + }, + { + "name": "Gp0127652_metabat2 bin checkm quality assessment result", + "description": "metabat2 bin checkm quality assessment result for Gp0127652", + "data_object_type": "CheckM Statistics", + "url": "https://data.microbiomedata.org/data/nmdc:mga0mfxf90/MAGs/nmdc_mga0mfxf90_checkm_qa.out", + "md5_checksum": "16016a7b2388048eec469f73395bc478", + "id": "nmdc:16016a7b2388048eec469f73395bc478", + "file_size_bytes": 1320 + }, + { + "name": "Gp0127652_high-quality and medium-quality bins", + "description": "high-quality and medium-quality bins for Gp0127652", + "data_object_type": "Metagenome Bins", + "url": "https://data.microbiomedata.org/data/nmdc:mga0mfxf90/MAGs/nmdc_mga0mfxf90_hqmq_bin.zip", + "md5_checksum": "1e604f9f29f74c6169c4d27f839bb7b0", + "id": "nmdc:1e604f9f29f74c6169c4d27f839bb7b0", + "file_size_bytes": 182 + }, + { + "name": "Gp0127652_metabat2 bins", + "description": "metabat2 bins for Gp0127652", + "url": "https://data.microbiomedata.org/data/nmdc:mga0mfxf90/MAGs/nmdc_mga0mfxf90_metabat_bin.zip", + "md5_checksum": "21467369d04671628ae67afbaf1d2076", + "id": "nmdc:21467369d04671628ae67afbaf1d2076", + "file_size_bytes": 1013750 + }, + { + "_id": { + "$oid": "649b003d1ae706d7b5b14ece" + }, + "description": "Metagenome Contig Coverage Stats for gold:Gp0127652", + "url": "https://data.microbiomedata.org/data/1781_100354/assembly/mapping_stats.txt", + "file_size_bytes": 16276629, + "type": "nmdc:DataObject", + "id": "nmdc:23dcbb19af7db7cda8f06a1b375f12bb", + "name": "mapping_stats.txt", + "data_object_type": "Assembly Coverage Stats" + }, + { + "_id": { + "$oid": "649b003d1ae706d7b5b14ecf" + }, + "description": "Assembled scaffold fasta for gold:Gp0127652", + "url": "https://data.microbiomedata.org/data/1781_100354/assembly/assembly_scaffolds.fna", + "file_size_bytes": 115559491, + "type": "nmdc:DataObject", + "id": "nmdc:880b4e3e1b337def43f9dc694227eb50", + "name": "assembly_scaffolds.fna", + "data_object_type": "Assembly Scaffolds" + }, + { + "_id": { + "$oid": "649b003d1ae706d7b5b14ed0" + }, + "description": "Assembled contigs fasta for gold:Gp0127652", + "url": "https://data.microbiomedata.org/data/1781_100354/assembly/assembly_contigs.fna", + "file_size_bytes": 116210833, + "type": "nmdc:DataObject", + "id": "nmdc:e8bc7228a422a7c1a2641276ee3f6e37", + "name": "assembly_contigs.fna", + "data_object_type": "Assembly Contigs" + }, + { + "_id": { + "$oid": "649b003d1ae706d7b5b14ed1" + }, + "description": "Metagenome Alignment BAM file for gold:Gp0127652", + "url": "https://data.microbiomedata.org/data/1781_100354/assembly/pairedMapped_sorted.bam", + "file_size_bytes": 2194603382, + "type": "nmdc:DataObject", + "id": "nmdc:8f74962a51f82e4cebc78b6ac49dee49", + "name": "pairedMapped_sorted.bam", + "data_object_type": "Assembly Coverage BAM" + }, + { + "_id": { + "$oid": "649b003d1ae706d7b5b14eda" + }, + "description": "Assembled AGP file for gold:Gp0127652", + "url": "https://data.microbiomedata.org/data/1781_100354/assembly/assembly.agp", + "file_size_bytes": 14312615, + "type": "nmdc:DataObject", + "id": "nmdc:6a251e6317c4450686a6215b61cd85d1", + "name": "assembly.agp", + "data_object_type": "Assembly AGP" + }, + { + "_id": { + "$oid": "649b003d1ae706d7b5b15bde" + }, + "id": "nmdc:a29b48c9962bc2acbf5d7e1b5a8e3a41", + "name": "1781_100354.krona.html", + "description": "Gold:Gp0127652 KRONA plot HTML file", + "url": "https://data.microbiomedata.org/1781_100354/ReadbasedAnalysis/centrifuge/1781_100354.krona.html", + "file_size_bytes": 3385, + "data_object_type": "Centrifuge Krona Plot", + "type": "nmdc:DataObject" + }, + { + "_id": { + "$oid": "649b003d1ae706d7b5b15be2" + }, + "id": "nmdc:966945bb7952a4629efc713c78ef927f", + "name": "1781_100354.json", + "description": "Gold:Gp0127652 ReadbasedAnalysis result JSON file", + "url": "https://data.microbiomedata.org/1781_100354/ReadbasedAnalysis/1781_100354.json", + "file_size_bytes": 3385, + "type": "nmdc:DataObject" + }, + { + "_id": { + "$oid": "649b003f1ae706d7b5b16637" + }, + "id": "nmdc:0327492db7a99ab0fb672213e49e2f84", + "name": "bins.tooShort.fa", + "description": "tooShort (< 3kb) filtered contigs fasta file by metaBat2 for gold:Gp0127652", + "file_size_bytes": 86634430, + "url": "https://data.microbiomedata.org/data/1781_100354/img_MAGs/bins.tooShort.fa", + "type": "nmdc:DataObject" + }, + { + "_id": { + "$oid": "649b003f1ae706d7b5b1663c" + }, + "id": "nmdc:376fce40c578be064e55103093f99f66", + "name": "gold:Gp0127652.bins.2.fa", + "description": "metabat2 binned contig file for gold:Gp0127652", + "file_size_bytes": 254409, + "url": "https://data.microbiomedata.org/data/1781_100354/img_MAGs/metabat-bins/bins.2.fa", + "type": "nmdc:DataObject", + "data_object_type": "Metagenome Bins" + }, + { + "_id": { + "$oid": "649b003f1ae706d7b5b1663d" + }, + "id": "nmdc:a28fa4b0897b1eae6d10053c47d07319", + "name": "checkm_qa.out", + "description": "metabat2 bin checkm quality assessment result for gold:Gp0127652", + "file_size_bytes": 1148, + "url": "https://data.microbiomedata.org/data/1781_100354/img_MAGs/checkm_qa.out", + "type": "nmdc:DataObject", + "data_object_type": "CheckM Statistics" + }, + { + "_id": { + "$oid": "649b003f1ae706d7b5b1663e" + }, + "id": "nmdc:b7511c0e296be199db386a3ae4181e45", + "name": "bins.unbinned.fa", + "description": "unbinned fasta file from metabat2 for gold:Gp0127652", + "file_size_bytes": 26455665, + "url": "https://data.microbiomedata.org/data/1781_100354/img_MAGs/bins.unbinned.fa", + "type": "nmdc:DataObject" + }, + { + "_id": { + "$oid": "649b003f1ae706d7b5b1663f" + }, + "id": "nmdc:1946e92c1dfeddcf766605d2f6227934", + "name": "gold:Gp0127652.bins.3.fa", + "description": "metabat2 binned contig file for gold:Gp0127652", + "file_size_bytes": 340274, + "url": "https://data.microbiomedata.org/data/1781_100354/img_MAGs/metabat-bins/bins.3.fa", + "type": "nmdc:DataObject", + "data_object_type": "Metagenome Bins" + }, + { + "_id": { + "$oid": "649b003f1ae706d7b5b16640" + }, + "id": "nmdc:794cbd38c4fe3d18faf5ceb5f543de61", + "name": "gold:Gp0127652.bins.1.fa", + "description": "metabat2 binned contig file for gold:Gp0127652", + "file_size_bytes": 691252, + "url": "https://data.microbiomedata.org/data/1781_100354/img_MAGs/metabat-bins/bins.1.fa", + "type": "nmdc:DataObject", + "data_object_type": "Metagenome Bins" + }, + { + "_id": { + "$oid": "649b00401ae706d7b5b16d47" + }, + "description": "EC TSV File for gold:Gp0127652", + "url": "https://data.microbiomedata.org/1781_100354/img_annotation/Ga0482219_ec.tsv", + "md5_checksum": "06ceb99673dcb924ca223539267a962a", + "file_size_bytes": 3385, + "id": "nmdc:06ceb99673dcb924ca223539267a962a", + "name": "gold:Gp0127652_EC TSV File", + "data_object_type": "Annotation Enzyme Commission", + "type": "nmdc:DataObject" + }, + { + "_id": { + "$oid": "649b00401ae706d7b5b16d49" + }, + "description": "KO TSV File for gold:Gp0127652", + "url": "https://data.microbiomedata.org/1781_100354/img_annotation/Ga0482219_ko.tsv", + "md5_checksum": "4d16f813aefc09c7720770f065964c49", + "file_size_bytes": 3385, + "id": "nmdc:4d16f813aefc09c7720770f065964c49", + "name": "gold:Gp0127652_KO TSV File", + "data_object_type": "Annotation KEGG Orthology", + "type": "nmdc:DataObject" + }, + { + "_id": { + "$oid": "649b00401ae706d7b5b16d4a" + }, + "description": "Structural annotation GFF file for gold:Gp0127652", + "url": "https://data.microbiomedata.org/1781_100354/img_annotation/Ga0482219_structural_annotation.gff", + "md5_checksum": "6b39045cb99ca6220e27c4fa960f4dd1", + "file_size_bytes": 3385, + "id": "nmdc:6b39045cb99ca6220e27c4fa960f4dd1", + "name": "gold:Gp0127652_Structural annotation GFF file", + "data_object_type": "Structural Annotation GFF", + "type": "nmdc:DataObject" + }, + { + "_id": { + "$oid": "649b00401ae706d7b5b16d4b" + }, + "description": "Functional annotation GFF file for gold:Gp0127652", + "url": "https://data.microbiomedata.org/1781_100354/img_annotation/Ga0482219_functional_annotation.gff", + "md5_checksum": "80c28fa3efc78e6d23d0abcf1161c983", + "file_size_bytes": 3385, + "id": "nmdc:80c28fa3efc78e6d23d0abcf1161c983", + "name": "gold:Gp0127652_Functional annotation GFF file", + "data_object_type": "Functional Annotation GFF", + "type": "nmdc:DataObject" + }, + { + "_id": { + "$oid": "649b00401ae706d7b5b16d4e" + }, + "description": "Protein FAA for gold:Gp0127652", + "url": "https://data.microbiomedata.org/1781_100354/img_annotation/Ga0482219_proteins.faa", + "md5_checksum": "48bb698de57cd77bf1ddda9004e89c01", + "file_size_bytes": 3385, + "id": "nmdc:48bb698de57cd77bf1ddda9004e89c01", + "name": "gold:Gp0127652_Protein FAA", + "data_object_type": "Annotation Amino Acid FASTA", + "type": "nmdc:DataObject" + } + ], + "mags_activity_set": [ + { + "_id": { + "$oid": "649b0052ec087f6bbab34727" + }, + "has_input": [ + "nmdc:a550eb6e614b375c1089ab816163ea63", + "nmdc:c8c5056ee57126695073137d0c1d3d04", + "nmdc:863dc502676573c59ce69b1ff786042a" + ], + "too_short_contig_num": 200309, + "part_of": [ + "nmdc:mga0mfxf90" + ], + "binned_contig_num": 835, + "git_url": "https://github.com/microbiomedata/metaG/releases/tag/0.1", + "has_output": [ + "nmdc:d41d8cd98f00b204e9800998ecf8427e", + "nmdc:4371932b5834f2deadb2fbfc42b056f7", + "nmdc:5a8d8441e6e472837809ee31d517d32a", + "nmdc:16016a7b2388048eec469f73395bc478", + "nmdc:1e604f9f29f74c6169c4d27f839bb7b0", + "nmdc:21467369d04671628ae67afbaf1d2076" + ], + "was_informed_by": "gold:Gp0127652", + "input_contig_num": 216252, + "id": "nmdc:c86126b11f214f19721c56fadf91d87c", + "execution_resource": "NERSC-Cori", + "name": "MAGs Analysis Activity for nmdc:mga0mfxf90", + "mags_list": [ + { + "number_of_contig": 233, + "completeness": 12.16, + "bin_name": "bins.1", + "gene_count": 1133, + "bin_quality": "LQ", + "gtdbtk_species": "", + "gtdbtk_order": "", + "num_16s": 0, + "gtdbtk_family": "", + "gtdbtk_domain": "", + "contamination": 0.0, + "gtdbtk_class": "", + "gtdbtk_phylum": "", + "num_5s": 0, + "num_23s": 0, + "gtdbtk_genus": "", + "num_t_rna": 10 + }, + { + "number_of_contig": 349, + "completeness": 45.68, + "bin_name": "bins.2", + "gene_count": 1809, + "bin_quality": "LQ", + "gtdbtk_species": "", + "gtdbtk_order": "", + "num_16s": 0, + "gtdbtk_family": "", + "gtdbtk_domain": "", + "contamination": 10.0, + "gtdbtk_class": "", + "gtdbtk_phylum": "", + "num_5s": 1, + "num_23s": 1, + "gtdbtk_genus": "", + "num_t_rna": 16 + }, + { + "number_of_contig": 106, + "completeness": 17.54, + "bin_name": "bins.3", + "gene_count": 552, + "bin_quality": "LQ", + "gtdbtk_species": "", + "gtdbtk_order": "", + "num_16s": 0, + "gtdbtk_family": "", + "gtdbtk_domain": "", + "contamination": 0.0, + "gtdbtk_class": "", + "gtdbtk_phylum": "", + "num_5s": 0, + "num_23s": 0, + "gtdbtk_genus": "", + "num_t_rna": 12 + }, + { + "number_of_contig": 147, + "completeness": 14.66, + "bin_name": "bins.4", + "gene_count": 668, + "bin_quality": "LQ", + "gtdbtk_species": "", + "gtdbtk_order": "", + "num_16s": 0, + "gtdbtk_family": "", + "gtdbtk_domain": "", + "contamination": 0.0, + "gtdbtk_class": "", + "gtdbtk_phylum": "", + "num_5s": 0, + "num_23s": 0, + "gtdbtk_genus": "", + "num_t_rna": 5 + } + ], + "unbinned_contig_num": 15108, + "started_at_time": "2021-10-11T02:27:08Z", + "type": "nmdc:MAGsAnalysisActivity", + "ended_at_time": "2021-10-11T04:45:21+00:00" + } + ], + "metagenome_annotation_activity_set": [ + { + "_id": { + "$oid": "649b005bbf2caae0415ef9c3" + }, + "has_input": [ + "nmdc:a550eb6e614b375c1089ab816163ea63" + ], + "part_of": [ + "nmdc:mga0mfxf90" + ], + "git_url": "https://github.com/microbiomedata/metaG/releases/tag/0.1", + "has_output": [ + "nmdc:096c54bce5ec1cc5d41ac64553e42cb3", + "nmdc:ac8cd253a39e6e5fe0a0930f3bf6888a", + "nmdc:863dc502676573c59ce69b1ff786042a", + "nmdc:28ed2a9e345d0e542127fd1dc2173ae7", + "nmdc:a826d96e791f69ff7759d57f44a8a510", + "nmdc:58e310990be01a574eef05b3f5dd1495", + "nmdc:28ce5c4c605a1c4538ce63987252c0ad", + "nmdc:6de9ddf0b07c9bcf1409aceb7ee2f941", + "nmdc:6342c9c98e297d2e39a2144c7ca0191b", + "nmdc:d20aa781d3ad6b0face7cc9c412bc3f7", + "nmdc:db2e4b8f6cc1e8dc934e14b93589805a", + "nmdc:f51f9d679d1b045f4ebc61dab7fc2f08" + ], + "was_informed_by": "gold:Gp0127652", + "id": "nmdc:c86126b11f214f19721c56fadf91d87c", + "execution_resource": "NERSC-Cori", + "name": "Annotation Activity for nmdc:mga0mfxf90", + "started_at_time": "2021-10-11T02:27:08Z", + "type": "nmdc:MetagenomeAnnotationActivity", + "ended_at_time": "2021-10-11T04:45:21+00:00" + } + ], + "metagenome_assembly_set": [ + { + "_id": { + "$oid": "649b005f2ca5ee4adb139fb1" + }, + "has_input": [ + "nmdc:60f03b815160b29125c2bd0776a330bf" + ], + "part_of": [ + "nmdc:mga0mfxf90" + ], + "ctg_logsum": 293195, + "scaf_logsum": 294510, + "gap_pct": 0.0019, + "git_url": "https://github.com/microbiomedata/metaG/releases/tag/0.1", + "has_output": [ + "nmdc:a550eb6e614b375c1089ab816163ea63", + "nmdc:9f194d271c352af3f68f2afeb1dbd499", + "nmdc:b0a79069110825cfe5525a8fc4f02cb6", + "nmdc:f54e8bda482b1cb8bc8e121ee5f39e07", + "nmdc:c8c5056ee57126695073137d0c1d3d04" + ], + "asm_score": 3.266, + "was_informed_by": "gold:Gp0127652", + "ctg_powsum": 31744, + "scaf_max": 16883, + "id": "nmdc:c86126b11f214f19721c56fadf91d87c", + "scaf_powsum": 31903, + "execution_resource": "NERSC-Cori", + "contigs": 216252, + "name": "Assembly Activity for nmdc:mga0mfxf90", + "ctg_max": 16883, + "gc_std": 0.09516, + "contig_bp": 108575090, + "gc_avg": 0.63494, + "started_at_time": "2021-10-11T02:27:08Z", + "scaf_bp": 108577150, + "type": "nmdc:MetagenomeAssembly", + "scaffolds": 216046, + "ended_at_time": "2021-10-11T04:45:21+00:00", + "ctg_l50": 493, + "ctg_l90": 290, + "ctg_n50": 57034, + "ctg_n90": 179762, + "scaf_l50": 493, + "scaf_l90": 290, + "scaf_n50": 56962, + "scaf_n90": 179563 + } + ], + "omics_processing_set": [ + { + "_id": { + "$oid": "649b009773e8249959349b5f" + }, + "id": "nmdc:omprc-11-j43hz774", + "name": "Riverbed sediment microbial communities from areas with vegetation nearby in Columbia River, Washington, USA - GW-RW S3_50_60", + "description": "Riverbed sediment samples were collected from an area with a lot of surrounding vegetation in a hyporheic zone (subsurface groundwater - surface water mixing zone)", + "has_input": [ + "nmdc:bsm-11-xngecc18" + ], + "has_output": [ + "jgi:574fe0af7ded5e3df1ee1493" + ], + "part_of": [ + "nmdc:sty-11-aygzgv51" + ], + "add_date": "2016-01-11", + "mod_date": "2021-06-15", + "ncbi_project_name": "Riverbed sediment microbial communities from areas with vegetation nearby in Columbia River, Washington, USA - GW-RW S3_50_60", + "omics_type": { + "has_raw_value": "Metagenome" + }, + "principal_investigator": { + "has_raw_value": "James Stegen" + }, + "processing_institution": "JGI", + "type": "nmdc:OmicsProcessing", + "gold_sequencing_project_identifiers": [ + "gold:Gp0127652" + ] + } + ], + "read_qc_analysis_activity_set": [ + { + "_id": { + "$oid": "649b009d6bdd4fd20273c87e" + }, + "has_input": [ + "nmdc:b0548475f69b48e2d150cb90ae27f2c6" + ], + "part_of": [ + "nmdc:mga0mfxf90" + ], + "git_url": "https://github.com/microbiomedata/metaG/releases/tag/0.1", + "has_output": [ + "nmdc:60f03b815160b29125c2bd0776a330bf", + "nmdc:c40fa552711f6b19130b2a559f2d4cdc" + ], + "was_informed_by": "gold:Gp0127652", + "input_read_count": 26604768, + "output_read_bases": 3697162034, + "id": "nmdc:c86126b11f214f19721c56fadf91d87c", + "execution_resource": "NERSC-Cori", + "input_read_bases": 4017319968, + "name": "Read QC Activity for nmdc:mga0mfxf90", + "output_read_count": 24717950, + "started_at_time": "2021-10-11T02:27:08Z", + "type": "nmdc:ReadQCAnalysisActivity", + "ended_at_time": "2021-10-11T04:45:21+00:00" + } + ], + "read_based_taxonomy_analysis_activity_set": [ + { + "_id": { + "$oid": "649b009bff710ae353f8cf40" + }, + "has_input": [ + "nmdc:60f03b815160b29125c2bd0776a330bf" + ], + "git_url": "https://github.com/microbiomedata/metaG/releases/tag/0.1", + "has_output": [ + "nmdc:70f29a321c925cfc0e2003515f708400", + "nmdc:93d5419c0b31e0696ab8ffef477945fb", + "nmdc:9cd3b2939adabd809741ae6a84260266", + "nmdc:acea91fced8993a40cf1eb9cda29c4cd", + "nmdc:b623a0d3bdff34fb97530c74bb558aeb", + "nmdc:e461b2e81a22514fcd691caeaa7b0ca1", + "nmdc:38b7c63d0157f8bf4316f4295f0e6e28", + "nmdc:be0c2bc71cefcb0f0a23d270d047f30c", + "nmdc:1df4b479c887b43319d89cc80dc35239" + ], + "was_informed_by": "gold:Gp0127652", + "id": "nmdc:c86126b11f214f19721c56fadf91d87c", + "execution_resource": "NERSC-Cori", + "name": "ReadBased Analysis Activity for nmdc:mga0mfxf90", + "started_at_time": "2021-10-11T02:27:08Z", + "type": "nmdc:ReadbasedAnalysis", + "ended_at_time": "2021-10-11T04:45:21+00:00" + } + ] + }, + { + "data_object_set": [ + { + "description": "Raw sequencer read data", + "file_size_bytes": 2711112988, + "type": "nmdc:DataObject", + "id": "jgi:574fe0b17ded5e3df1ee1494", + "name": "10533.3.165334.TGACTGA-GTCAGTC.fastq.gz" + }, + { + "name": "Gp0127654_Filtered Reads", + "description": "Filtered Reads for Gp0127654", + "data_object_type": "Filtered Sequencing Reads", + "url": "https://data.microbiomedata.org/data/nmdc:mga0h0s362/qa/nmdc_mga0h0s362_filtered.fastq.gz", + "md5_checksum": "c4f29a07f3ce03ee2a2d11c90e8b43d6", + "id": "nmdc:c4f29a07f3ce03ee2a2d11c90e8b43d6", + "file_size_bytes": 2479437709 + }, + { + "name": "Gp0127654_Filtered Stats", + "description": "Filtered Stats for Gp0127654", + "data_object_type": "QC Statistics", + "url": "https://data.microbiomedata.org/data/nmdc:mga0h0s362/qa/nmdc_mga0h0s362_filterStats.txt", + "md5_checksum": "9c600ec3be94d876f00d22808f3e8a59", + "id": "nmdc:9c600ec3be94d876f00d22808f3e8a59", + "file_size_bytes": 284 + }, + { + "name": "Gp0127654_Gottcha2 TSV report", + "description": "Gottcha2 TSV report for Gp0127654", + "url": "https://data.microbiomedata.org/data/nmdc:mga0h0s362/ReadbasedAnalysis/nmdc_mga0h0s362_gottcha2_report.tsv", + "md5_checksum": "130ee7559789726a2cadccd3126dacad", + "id": "nmdc:130ee7559789726a2cadccd3126dacad", + "file_size_bytes": 3508 + }, + { + "name": "Gp0127654_Gottcha2 full TSV report", + "description": "Gottcha2 full TSV report for Gp0127654", + "url": "https://data.microbiomedata.org/data/nmdc:mga0h0s362/ReadbasedAnalysis/nmdc_mga0h0s362_gottcha2_report_full.tsv", + "md5_checksum": "c955eae73afbfe1ad4c4eb2eac51f3f3", + "id": "nmdc:c955eae73afbfe1ad4c4eb2eac51f3f3", + "file_size_bytes": 798264 + }, + { + "name": "Gp0127654_Gottcha2 Krona HTML report", + "description": "Gottcha2 Krona HTML report for Gp0127654", + "data_object_type": "GOTTCHA2 Krona Plot", + "url": "https://data.microbiomedata.org/data/nmdc:mga0h0s362/ReadbasedAnalysis/nmdc_mga0h0s362_gottcha2_krona.html", + "md5_checksum": "7ccb4ee5a0728322154b29a79d13c842", + "id": "nmdc:7ccb4ee5a0728322154b29a79d13c842", + "file_size_bytes": 234834 + }, + { + "name": "Gp0127654_Centrifuge classification TSV report", + "description": "Centrifuge classification TSV report for Gp0127654", + "data_object_type": "Centrifuge Taxonomic Classification", + "url": "https://data.microbiomedata.org/data/nmdc:mga0h0s362/ReadbasedAnalysis/nmdc_mga0h0s362_centrifuge_classification.tsv", + "md5_checksum": "8b88e19f3d4f22c8bb71f66e7aec6dba", + "id": "nmdc:8b88e19f3d4f22c8bb71f66e7aec6dba", + "file_size_bytes": 2231971137 + }, + { + "name": "Gp0127654_Centrifuge TSV report", + "description": "Centrifuge TSV report for Gp0127654", + "data_object_type": "Centrifuge Classification Report", + "url": "https://data.microbiomedata.org/data/nmdc:mga0h0s362/ReadbasedAnalysis/nmdc_mga0h0s362_centrifuge_report.tsv", + "md5_checksum": "35a0d72edac6c5e7f9c8ddf86c5534e0", + "id": "nmdc:35a0d72edac6c5e7f9c8ddf86c5534e0", + "file_size_bytes": 257151 + }, + { + "name": "Gp0127654_Centrifuge Krona HTML report", + "description": "Centrifuge Krona HTML report for Gp0127654", + "data_object_type": "Centrifuge Krona Plot", + "url": "https://data.microbiomedata.org/data/nmdc:mga0h0s362/ReadbasedAnalysis/nmdc_mga0h0s362_centrifuge_krona.html", + "md5_checksum": "f808a89810cdb2a911a5b5388b70ce94", + "id": "nmdc:f808a89810cdb2a911a5b5388b70ce94", + "file_size_bytes": 2341088 + }, + { + "name": "Gp0127654_Kraken classification TSV report", + "description": "Kraken classification TSV report for Gp0127654", + "data_object_type": "Kraken2 Taxonomic Classification", + "url": "https://data.microbiomedata.org/data/nmdc:mga0h0s362/ReadbasedAnalysis/nmdc_mga0h0s362_kraken2_classification.tsv", + "md5_checksum": "dfc90170aa038c2425702be223cb2f23", + "id": "nmdc:dfc90170aa038c2425702be223cb2f23", + "file_size_bytes": 1782429285 + }, + { + "name": "Gp0127654_Kraken2 TSV report", + "description": "Kraken2 TSV report for Gp0127654", + "data_object_type": "Kraken2 Classification Report", + "url": "https://data.microbiomedata.org/data/nmdc:mga0h0s362/ReadbasedAnalysis/nmdc_mga0h0s362_kraken2_report.tsv", + "md5_checksum": "84255d3bab9ea79151db5ad7bcbc677c", + "id": "nmdc:84255d3bab9ea79151db5ad7bcbc677c", + "file_size_bytes": 661482 + }, + { + "name": "Gp0127654_Kraken2 Krona HTML report", + "description": "Kraken2 Krona HTML report for Gp0127654", + "data_object_type": "Kraken2 Krona Plot", + "url": "https://data.microbiomedata.org/data/nmdc:mga0h0s362/ReadbasedAnalysis/nmdc_mga0h0s362_kraken2_krona.html", + "md5_checksum": "1c8339d96884c4a408de7804e00490d1", + "id": "nmdc:1c8339d96884c4a408de7804e00490d1", + "file_size_bytes": 4020719 + }, + { + "name": "Gp0127654_Assembled contigs fasta", + "description": "Assembled contigs fasta for Gp0127654", + "data_object_type": "Assembly Contigs", + "url": "https://data.microbiomedata.org/data/nmdc:mga0h0s362/assembly/nmdc_mga0h0s362_contigs.fna", + "md5_checksum": "909ae2a351ab1b99dfa877969ba33fc0", + "id": "nmdc:909ae2a351ab1b99dfa877969ba33fc0", + "file_size_bytes": 93264957 + }, + { + "name": "Gp0127654_Assembled scaffold fasta", + "description": "Assembled scaffold fasta for Gp0127654", + "data_object_type": "Assembly Scaffolds", + "url": "https://data.microbiomedata.org/data/nmdc:mga0h0s362/assembly/nmdc_mga0h0s362_scaffolds.fna", + "md5_checksum": "1bd3a82d1ced0a3a4e4b207ecdeedc50", + "id": "nmdc:1bd3a82d1ced0a3a4e4b207ecdeedc50", + "file_size_bytes": 92670816 + }, + { + "name": "Gp0127654_Metagenome Contig Coverage Stats", + "description": "Metagenome Contig Coverage Stats for Gp0127654", + "url": "https://data.microbiomedata.org/data/nmdc:mga0h0s362/assembly/nmdc_mga0h0s362_covstats.txt", + "md5_checksum": "e2281ea2c0342c7243ac6a3179948547", + "id": "nmdc:e2281ea2c0342c7243ac6a3179948547", + "file_size_bytes": 15633835 + }, + { + "name": "Gp0127654_Assembled AGP file", + "description": "Assembled AGP file for Gp0127654", + "data_object_type": "Assembly AGP", + "url": "https://data.microbiomedata.org/data/nmdc:mga0h0s362/assembly/nmdc_mga0h0s362_assembly.agp", + "md5_checksum": "ad045e491d27a8a2a4bb13c62ed74fd8", + "id": "nmdc:ad045e491d27a8a2a4bb13c62ed74fd8", + "file_size_bytes": 14624353 + }, + { + "name": "Gp0127654_Metagenome Alignment BAM file", + "description": "Metagenome Alignment BAM file for Gp0127654", + "data_object_type": "Assembly Coverage BAM", + "url": "https://data.microbiomedata.org/data/nmdc:mga0h0s362/assembly/nmdc_mga0h0s362_pairedMapped_sorted.bam", + "md5_checksum": "d8e09db1617046117fbb15631cf4977f", + "id": "nmdc:d8e09db1617046117fbb15631cf4977f", + "file_size_bytes": 2687176632 + }, + { + "name": "Gp0127654_Protein FAA", + "description": "Protein FAA for Gp0127654", + "data_object_type": "Annotation Amino Acid FASTA", + "url": "https://data.microbiomedata.org/data/nmdc:mga0h0s362/annotation/nmdc_mga0h0s362_proteins.faa", + "md5_checksum": "7e7c871dbe9ed0b2692444b77d0afe8d", + "id": "nmdc:7e7c871dbe9ed0b2692444b77d0afe8d", + "file_size_bytes": 55142968 + }, + { + "name": "Gp0127654_Structural annotation GFF file", + "description": "Structural annotation GFF file for Gp0127654", + "data_object_type": "Structural Annotation GFF", + "url": "https://data.microbiomedata.org/data/nmdc:mga0h0s362/annotation/nmdc_mga0h0s362_structural_annotation.gff", + "md5_checksum": "7b466cbbadfde9b125f2a31e48d8c60d", + "id": "nmdc:7b466cbbadfde9b125f2a31e48d8c60d", + "file_size_bytes": 2518 + }, + { + "name": "Gp0127654_Functional annotation GFF file", + "description": "Functional annotation GFF file for Gp0127654", + "data_object_type": "Functional Annotation GFF", + "url": "https://data.microbiomedata.org/data/nmdc:mga0h0s362/annotation/nmdc_mga0h0s362_functional_annotation.gff", + "md5_checksum": "6a03c0a78fa59ac0a55777a9ea73e5d0", + "id": "nmdc:6a03c0a78fa59ac0a55777a9ea73e5d0", + "file_size_bytes": 64337475 + }, + { + "name": "Gp0127654_KO TSV file", + "description": "KO TSV file for Gp0127654", + "data_object_type": "Annotation KEGG Orthology", + "url": "https://data.microbiomedata.org/data/nmdc:mga0h0s362/annotation/nmdc_mga0h0s362_ko.tsv", + "md5_checksum": "2275c42fa5206d646c7b477b184b9519", + "id": "nmdc:2275c42fa5206d646c7b477b184b9519", + "file_size_bytes": 7628926 + }, + { + "name": "Gp0127654_EC TSV file", + "description": "EC TSV file for Gp0127654", + "data_object_type": "Annotation Enzyme Commission", + "url": "https://data.microbiomedata.org/data/nmdc:mga0h0s362/annotation/nmdc_mga0h0s362_ec.tsv", + "md5_checksum": "9c7fc55c2cbc986d520695dfb69b3e26", + "id": "nmdc:9c7fc55c2cbc986d520695dfb69b3e26", + "file_size_bytes": 5084393 + }, + { + "name": "Gp0127654_COG GFF file", + "description": "COG GFF file for Gp0127654", + "url": "https://data.microbiomedata.org/data/nmdc:mga0h0s362/annotation/nmdc_mga0h0s362_cog.gff", + "md5_checksum": "fabdc762526357e8a6f288a07f947f06", + "id": "nmdc:fabdc762526357e8a6f288a07f947f06", + "file_size_bytes": 37680499 + }, + { + "name": "Gp0127654_PFAM GFF file", + "description": "PFAM GFF file for Gp0127654", + "url": "https://data.microbiomedata.org/data/nmdc:mga0h0s362/annotation/nmdc_mga0h0s362_pfam.gff", + "md5_checksum": "1e8dcb98dfc7598e3965af187c296f12", + "id": "nmdc:1e8dcb98dfc7598e3965af187c296f12", + "file_size_bytes": 27765282 + }, + { + "name": "Gp0127654_TigrFam GFF file", + "description": "TigrFam GFF file for Gp0127654", + "url": "https://data.microbiomedata.org/data/nmdc:mga0h0s362/annotation/nmdc_mga0h0s362_tigrfam.gff", + "md5_checksum": "86f1a8ccf1532e11fc09d94dc39af57c", + "id": "nmdc:86f1a8ccf1532e11fc09d94dc39af57c", + "file_size_bytes": 2970208 + }, + { + "name": "Gp0127654_SMART GFF file", + "description": "SMART GFF file for Gp0127654", + "url": "https://data.microbiomedata.org/data/nmdc:mga0h0s362/annotation/nmdc_mga0h0s362_smart.gff", + "md5_checksum": "8add80a0fe95822917e4e7eaf275ed4f", + "id": "nmdc:8add80a0fe95822917e4e7eaf275ed4f", + "file_size_bytes": 8172309 + }, + { + "name": "Gp0127654_SuperFam GFF file", + "description": "SuperFam GFF file for Gp0127654", + "url": "https://data.microbiomedata.org/data/nmdc:mga0h0s362/annotation/nmdc_mga0h0s362_supfam.gff", + "md5_checksum": "6268ff527b56548792e7dca811500436", + "id": "nmdc:6268ff527b56548792e7dca811500436", + "file_size_bytes": 46611499 + }, + { + "name": "Gp0127654_Cath FunFam GFF file", + "description": "Cath FunFam GFF file for Gp0127654", + "url": "https://data.microbiomedata.org/data/nmdc:mga0h0s362/annotation/nmdc_mga0h0s362_cath_funfam.gff", + "md5_checksum": "ff7ac6fb709d1f0f7b476c9a5b29524e", + "id": "nmdc:ff7ac6fb709d1f0f7b476c9a5b29524e", + "file_size_bytes": 35108681 + }, + { + "name": "Gp0127654_KO_EC GFF file", + "description": "KO_EC GFF file for Gp0127654", + "url": "https://data.microbiomedata.org/data/nmdc:mga0h0s362/annotation/nmdc_mga0h0s362_ko_ec.gff", + "md5_checksum": "6c50fdd87bdba9116c1ff81e21b8a95c", + "id": "nmdc:6c50fdd87bdba9116c1ff81e21b8a95c", + "file_size_bytes": 24261565 + }, + { + "name": "gold:Gp0452679_metabat2 bin checkm quality assessment result", + "description": "metabat2 bin checkm quality assessment result for gold:Gp0452679", + "data_object_type": "CheckM Statistics", + "url": "https://data.microbiomedata.org/data/nmdc:mga0fsjy84/MAGs/nmdc_mga0fsjy84_checkm_qa.out", + "md5_checksum": "d41d8cd98f00b204e9800998ecf8427e", + "id": "nmdc:d41d8cd98f00b204e9800998ecf8427e", + "file_size_bytes": 0 + }, + { + "name": "Gp0127654_tooShort (< 3kb) filtered contigs fasta file by metaBat2", + "description": "tooShort (< 3kb) filtered contigs fasta file by metaBat2 for Gp0127654", + "url": "https://data.microbiomedata.org/data/nmdc:mga0h0s362/MAGs/nmdc_mga0h0s362_bins.tooShort.fa", + "md5_checksum": "920bcae91eae59ed8b9b19bcb7392ac5", + "id": "nmdc:920bcae91eae59ed8b9b19bcb7392ac5", + "file_size_bytes": 80638518 + }, + { + "name": "Gp0127654_unbinned fasta file from metabat2", + "description": "unbinned fasta file from metabat2 for Gp0127654", + "url": "https://data.microbiomedata.org/data/nmdc:mga0h0s362/MAGs/nmdc_mga0h0s362_bins.unbinned.fa", + "md5_checksum": "d13bc24bdf72e7ba00d60f0e2e0805e8", + "id": "nmdc:d13bc24bdf72e7ba00d60f0e2e0805e8", + "file_size_bytes": 12400628 + }, + { + "name": "Gp0127654_metabat2 bin checkm quality assessment result", + "description": "metabat2 bin checkm quality assessment result for Gp0127654", + "data_object_type": "CheckM Statistics", + "url": "https://data.microbiomedata.org/data/nmdc:mga0h0s362/MAGs/nmdc_mga0h0s362_checkm_qa.out", + "md5_checksum": "3fd777151ef41b39b272cb42c1d5e8ba", + "id": "nmdc:3fd777151ef41b39b272cb42c1d5e8ba", + "file_size_bytes": 785 + }, + { + "name": "Gp0127654_high-quality and medium-quality bins", + "description": "high-quality and medium-quality bins for Gp0127654", + "data_object_type": "Metagenome Bins", + "url": "https://data.microbiomedata.org/data/nmdc:mga0h0s362/MAGs/nmdc_mga0h0s362_hqmq_bin.zip", + "md5_checksum": "470edf3d79702d3b806b545db595ca02", + "id": "nmdc:470edf3d79702d3b806b545db595ca02", + "file_size_bytes": 182 + }, + { + "name": "Gp0127654_metabat2 bins", + "description": "metabat2 bins for Gp0127654", + "url": "https://data.microbiomedata.org/data/nmdc:mga0h0s362/MAGs/nmdc_mga0h0s362_metabat_bin.zip", + "md5_checksum": "8fc6f1a0269aa5179d72c52cf1a9726e", + "id": "nmdc:8fc6f1a0269aa5179d72c52cf1a9726e", + "file_size_bytes": 69938 + }, + { + "_id": { + "$oid": "649b003d1ae706d7b5b14ed7" + }, + "description": "Metagenome Contig Coverage Stats for gold:Gp0127654", + "url": "https://data.microbiomedata.org/data/1781_100356/assembly/mapping_stats.txt", + "file_size_bytes": 14843159, + "type": "nmdc:DataObject", + "id": "nmdc:414faae2752dc595ae4f2ddab4438ec7", + "name": "mapping_stats.txt", + "data_object_type": "Assembly Coverage Stats" + }, + { + "_id": { + "$oid": "649b003d1ae706d7b5b14ed8" + }, + "description": "Assembled contigs fasta for gold:Gp0127654", + "url": "https://data.microbiomedata.org/data/1781_100356/assembly/assembly_contigs.fna", + "file_size_bytes": 92474281, + "type": "nmdc:DataObject", + "id": "nmdc:cd12b50afea3097034758d6883864dd5", + "name": "assembly_contigs.fna", + "data_object_type": "Assembly Contigs" + }, + { + "_id": { + "$oid": "649b003d1ae706d7b5b14edc" + }, + "description": "Metagenome Alignment BAM file for gold:Gp0127654", + "url": "https://data.microbiomedata.org/data/1781_100356/assembly/pairedMapped_sorted.bam", + "file_size_bytes": 2651150044, + "type": "nmdc:DataObject", + "id": "nmdc:fdde2ac466c983fc1154c7968631df20", + "name": "pairedMapped_sorted.bam", + "data_object_type": "Assembly Coverage BAM" + }, + { + "_id": { + "$oid": "649b003d1ae706d7b5b14ede" + }, + "description": "Assembled scaffold fasta for gold:Gp0127654", + "url": "https://data.microbiomedata.org/data/1781_100356/assembly/assembly_scaffolds.fna", + "file_size_bytes": 91880416, + "type": "nmdc:DataObject", + "id": "nmdc:6e1f393ec856d3445d9a4ac23ff1b249", + "name": "assembly_scaffolds.fna", + "data_object_type": "Assembly Scaffolds" + }, + { + "_id": { + "$oid": "649b003d1ae706d7b5b14ee1" + }, + "description": "Assembled AGP file for gold:Gp0127654", + "url": "https://data.microbiomedata.org/data/1781_100356/assembly/assembly.agp", + "file_size_bytes": 13042449, + "type": "nmdc:DataObject", + "id": "nmdc:c2e8b30ea935a2ca7bece5b913116f65", + "name": "assembly.agp", + "data_object_type": "Assembly AGP" + }, + { + "_id": { + "$oid": "649b003d1ae706d7b5b15bee" + }, + "id": "nmdc:336911e31f6622b74af1c92d2ed5f4b6", + "name": "1781_100356.krona.html", + "description": "Gold:Gp0127654 KRONA plot HTML file", + "url": "https://data.microbiomedata.org/1781_100356/ReadbasedAnalysis/centrifuge/1781_100356.krona.html", + "file_size_bytes": 3385, + "data_object_type": "Centrifuge Krona Plot", + "type": "nmdc:DataObject" + }, + { + "_id": { + "$oid": "649b003d1ae706d7b5b15bef" + }, + "id": "nmdc:cdd1b6d43bd7a8963fa3c5bab4296498", + "name": "1781_100356.json", + "description": "Gold:Gp0127654 ReadbasedAnalysis result JSON file", + "url": "https://data.microbiomedata.org/1781_100356/ReadbasedAnalysis/1781_100356.json", + "file_size_bytes": 3385, + "type": "nmdc:DataObject" + }, + { + "_id": { + "$oid": "649b003f1ae706d7b5b16645" + }, + "id": "nmdc:e22ff97901fed9397f221fbd8048f87d", + "name": "bins.tooShort.fa", + "description": "tooShort (< 3kb) filtered contigs fasta file by metaBat2 for gold:Gp0127654", + "file_size_bytes": 78267851, + "url": "https://data.microbiomedata.org/data/1781_100356/img_MAGs/bins.tooShort.fa", + "type": "nmdc:DataObject" + }, + { + "_id": { + "$oid": "649b003f1ae706d7b5b16648" + }, + "id": "nmdc:9f21a2cf85bdf5ec51f41a6e331819cc", + "name": "bins.unbinned.fa", + "description": "unbinned fasta file from metabat2 for gold:Gp0127654", + "file_size_bytes": 12538639, + "url": "https://data.microbiomedata.org/data/1781_100356/img_MAGs/bins.unbinned.fa", + "type": "nmdc:DataObject" + }, + { + "_id": { + "$oid": "649b00401ae706d7b5b16d55" + }, + "description": "Functional annotation GFF file for gold:Gp0127654", + "url": "https://data.microbiomedata.org/1781_100356/img_annotation/Ga0482217_functional_annotation.gff", + "md5_checksum": "b28a675c6560b34691a960f7e873841d", + "file_size_bytes": 3385, + "id": "nmdc:b28a675c6560b34691a960f7e873841d", + "name": "gold:Gp0127654_Functional annotation GFF file", + "data_object_type": "Functional Annotation GFF", + "type": "nmdc:DataObject" + }, + { + "_id": { + "$oid": "649b00401ae706d7b5b16d5c" + }, + "description": "KO TSV File for gold:Gp0127654", + "url": "https://data.microbiomedata.org/1781_100356/img_annotation/Ga0482217_ko.tsv", + "md5_checksum": "3b7734343770dce929591ee83d96acb6", + "file_size_bytes": 3385, + "id": "nmdc:3b7734343770dce929591ee83d96acb6", + "name": "gold:Gp0127654_KO TSV File", + "data_object_type": "Annotation KEGG Orthology", + "type": "nmdc:DataObject" + }, + { + "_id": { + "$oid": "649b00401ae706d7b5b16d5e" + }, + "description": "Protein FAA for gold:Gp0127654", + "url": "https://data.microbiomedata.org/1781_100356/img_annotation/Ga0482217_proteins.faa", + "md5_checksum": "deda4116aac7e262c0edf3358bb8e384", + "file_size_bytes": 3385, + "id": "nmdc:deda4116aac7e262c0edf3358bb8e384", + "name": "gold:Gp0127654_Protein FAA", + "data_object_type": "Annotation Amino Acid FASTA", + "type": "nmdc:DataObject" + }, + { + "_id": { + "$oid": "649b00401ae706d7b5b16d63" + }, + "description": "EC TSV File for gold:Gp0127654", + "url": "https://data.microbiomedata.org/1781_100356/img_annotation/Ga0482217_ec.tsv", + "md5_checksum": "b785c7809fa99d5beca859eded4a9b0f", + "file_size_bytes": 3385, + "id": "nmdc:b785c7809fa99d5beca859eded4a9b0f", + "name": "gold:Gp0127654_EC TSV File", + "data_object_type": "Annotation Enzyme Commission", + "type": "nmdc:DataObject" + }, + { + "_id": { + "$oid": "649b00401ae706d7b5b16d67" + }, + "description": "Structural annotation GFF file for gold:Gp0127654", + "url": "https://data.microbiomedata.org/1781_100356/img_annotation/Ga0482217_structural_annotation.gff", + "md5_checksum": "a9cf54b925e1c5b8c3e0299730f5a464", + "file_size_bytes": 3385, + "id": "nmdc:a9cf54b925e1c5b8c3e0299730f5a464", + "name": "gold:Gp0127654_Structural annotation GFF file", + "data_object_type": "Structural Annotation GFF", + "type": "nmdc:DataObject" + } + ], + "mags_activity_set": [ + { + "_id": { + "$oid": "649b0052ec087f6bbab34728" + }, + "has_input": [ + "nmdc:909ae2a351ab1b99dfa877969ba33fc0", + "nmdc:d8e09db1617046117fbb15631cf4977f", + "nmdc:6a03c0a78fa59ac0a55777a9ea73e5d0" + ], + "too_short_contig_num": 189586, + "part_of": [ + "nmdc:mga0h0s362" + ], + "binned_contig_num": 56, + "git_url": "https://github.com/microbiomedata/metaG/releases/tag/0.1", + "has_output": [ + "nmdc:d41d8cd98f00b204e9800998ecf8427e", + "nmdc:920bcae91eae59ed8b9b19bcb7392ac5", + "nmdc:d13bc24bdf72e7ba00d60f0e2e0805e8", + "nmdc:3fd777151ef41b39b272cb42c1d5e8ba", + "nmdc:470edf3d79702d3b806b545db595ca02", + "nmdc:8fc6f1a0269aa5179d72c52cf1a9726e" + ], + "was_informed_by": "gold:Gp0127654", + "input_contig_num": 197669, + "id": "nmdc:168441535388b19bbdee0928b42e5b20", + "execution_resource": "NERSC-Cori", + "name": "MAGs Analysis Activity for nmdc:mga0h0s362", + "mags_list": [ + { + "number_of_contig": 56, + "completeness": 18.09, + "bin_name": "bins.1", + "gene_count": 272, + "bin_quality": "LQ", + "gtdbtk_species": "", + "gtdbtk_order": "", + "num_16s": 0, + "gtdbtk_family": "", + "gtdbtk_domain": "", + "contamination": 0.0, + "gtdbtk_class": "", + "gtdbtk_phylum": "", + "num_5s": 0, + "num_23s": 0, + "gtdbtk_genus": "", + "num_t_rna": 5 + } + ], + "unbinned_contig_num": 8027, + "started_at_time": "2021-10-11T02:23:29Z", + "type": "nmdc:MAGsAnalysisActivity", + "ended_at_time": "2021-10-11T03:58:56+00:00" + } + ], + "metagenome_annotation_activity_set": [ + { + "_id": { + "$oid": "649b005bbf2caae0415ef9c7" + }, + "has_input": [ + "nmdc:909ae2a351ab1b99dfa877969ba33fc0" + ], + "part_of": [ + "nmdc:mga0h0s362" + ], + "git_url": "https://github.com/microbiomedata/metaG/releases/tag/0.1", + "has_output": [ + "nmdc:7e7c871dbe9ed0b2692444b77d0afe8d", + "nmdc:7b466cbbadfde9b125f2a31e48d8c60d", + "nmdc:6a03c0a78fa59ac0a55777a9ea73e5d0", + "nmdc:2275c42fa5206d646c7b477b184b9519", + "nmdc:9c7fc55c2cbc986d520695dfb69b3e26", + "nmdc:fabdc762526357e8a6f288a07f947f06", + "nmdc:1e8dcb98dfc7598e3965af187c296f12", + "nmdc:86f1a8ccf1532e11fc09d94dc39af57c", + "nmdc:8add80a0fe95822917e4e7eaf275ed4f", + "nmdc:6268ff527b56548792e7dca811500436", + "nmdc:ff7ac6fb709d1f0f7b476c9a5b29524e", + "nmdc:6c50fdd87bdba9116c1ff81e21b8a95c" + ], + "was_informed_by": "gold:Gp0127654", + "id": "nmdc:168441535388b19bbdee0928b42e5b20", + "execution_resource": "NERSC-Cori", + "name": "Annotation Activity for nmdc:mga0h0s362", + "started_at_time": "2021-10-11T02:23:29Z", + "type": "nmdc:MetagenomeAnnotationActivity", + "ended_at_time": "2021-10-11T03:58:56+00:00" + } + ], + "metagenome_assembly_set": [ + { + "_id": { + "$oid": "649b005f2ca5ee4adb139fb4" + }, + "has_input": [ + "nmdc:c4f29a07f3ce03ee2a2d11c90e8b43d6" + ], + "part_of": [ + "nmdc:mga0h0s362" + ], + "ctg_logsum": 130142, + "scaf_logsum": 130537, + "gap_pct": 0.0008, + "git_url": "https://github.com/microbiomedata/metaG/releases/tag/0.1", + "has_output": [ + "nmdc:909ae2a351ab1b99dfa877969ba33fc0", + "nmdc:1bd3a82d1ced0a3a4e4b207ecdeedc50", + "nmdc:e2281ea2c0342c7243ac6a3179948547", + "nmdc:ad045e491d27a8a2a4bb13c62ed74fd8", + "nmdc:d8e09db1617046117fbb15631cf4977f" + ], + "asm_score": 4.409, + "was_informed_by": "gold:Gp0127654", + "ctg_powsum": 13918, + "scaf_max": 69027, + "id": "nmdc:168441535388b19bbdee0928b42e5b20", + "scaf_powsum": 13961, + "execution_resource": "NERSC-Cori", + "contigs": 197669, + "name": "Assembly Activity for nmdc:mga0h0s362", + "ctg_max": 69027, + "gc_std": 0.09749, + "contig_bp": 85731750, + "gc_avg": 0.62891, + "started_at_time": "2021-10-11T02:23:29Z", + "scaf_bp": 85732440, + "type": "nmdc:MetagenomeAssembly", + "scaffolds": 197600, + "ended_at_time": "2021-10-11T03:58:56+00:00", + "ctg_l50": 404, + "ctg_l90": 286, + "ctg_n50": 62467, + "ctg_n90": 168661, + "scaf_l50": 404, + "scaf_l90": 286, + "scaf_n50": 62435, + "scaf_n90": 168596, + "scaf_l_gt50k": 69027, + "scaf_n_gt50k": 1, + "scaf_pct_gt50k": 0.080514446 + } + ], + "omics_processing_set": [ + { + "_id": { + "$oid": "649b009773e8249959349b60" + }, + "id": "nmdc:omprc-11-kgxpef29", + "name": "Riverbed sediment microbial communities from areas with vegetation nearby in Columbia River, Washington, USA - GW-RW S2_40_50", + "description": "Riverbed sediment samples were collected from an area with a lot of surrounding vegetation in a hyporheic zone (subsurface groundwater - surface water mixing zone)", + "has_input": [ + "nmdc:bsm-11-tpk9x619" + ], + "has_output": [ + "jgi:574fe0b17ded5e3df1ee1494" + ], + "part_of": [ + "nmdc:sty-11-aygzgv51" + ], + "add_date": "2016-01-11", + "mod_date": "2021-06-15", + "ncbi_project_name": "Riverbed sediment microbial communities from areas with vegetation nearby in Columbia River, Washington, USA - GW-RW S2_40_50", + "omics_type": { + "has_raw_value": "Metagenome" + }, + "principal_investigator": { + "has_raw_value": "James Stegen" + }, + "processing_institution": "JGI", + "type": "nmdc:OmicsProcessing", + "gold_sequencing_project_identifiers": [ + "gold:Gp0127654" + ] + } + ], + "read_qc_analysis_activity_set": [ + { + "_id": { + "$oid": "649b009d6bdd4fd20273c87f" + }, + "has_input": [ + "nmdc:c87a7a87a5218698fbdd8ad39085b892" + ], + "part_of": [ + "nmdc:mga0h0s362" + ], + "git_url": "https://github.com/microbiomedata/metaG/releases/tag/0.1", + "has_output": [ + "nmdc:c4f29a07f3ce03ee2a2d11c90e8b43d6", + "nmdc:9c600ec3be94d876f00d22808f3e8a59" + ], + "was_informed_by": "gold:Gp0127654", + "input_read_count": 30951192, + "output_read_bases": 4526478748, + "id": "nmdc:168441535388b19bbdee0928b42e5b20", + "execution_resource": "NERSC-Cori", + "input_read_bases": 4673629992, + "name": "Read QC Activity for nmdc:mga0h0s362", + "output_read_count": 30289044, + "started_at_time": "2021-10-11T02:23:29Z", + "type": "nmdc:ReadQCAnalysisActivity", + "ended_at_time": "2021-10-11T03:58:56+00:00" + } + ], + "read_based_taxonomy_analysis_activity_set": [ + { + "_id": { + "$oid": "649b009bff710ae353f8cf45" + }, + "has_input": [ + "nmdc:c4f29a07f3ce03ee2a2d11c90e8b43d6" + ], + "git_url": "https://github.com/microbiomedata/metaG/releases/tag/0.1", + "has_output": [ + "nmdc:130ee7559789726a2cadccd3126dacad", + "nmdc:c955eae73afbfe1ad4c4eb2eac51f3f3", + "nmdc:7ccb4ee5a0728322154b29a79d13c842", + "nmdc:8b88e19f3d4f22c8bb71f66e7aec6dba", + "nmdc:35a0d72edac6c5e7f9c8ddf86c5534e0", + "nmdc:f808a89810cdb2a911a5b5388b70ce94", + "nmdc:dfc90170aa038c2425702be223cb2f23", + "nmdc:84255d3bab9ea79151db5ad7bcbc677c", + "nmdc:1c8339d96884c4a408de7804e00490d1" + ], + "was_informed_by": "gold:Gp0127654", + "id": "nmdc:168441535388b19bbdee0928b42e5b20", + "execution_resource": "NERSC-Cori", + "name": "ReadBased Analysis Activity for nmdc:mga0h0s362", + "started_at_time": "2021-10-11T02:23:29Z", + "type": "nmdc:ReadbasedAnalysis", + "ended_at_time": "2021-10-11T03:58:56+00:00" + } + ] + }, + { + "data_object_set": [ + { + "description": "Raw sequencer read data", + "file_size_bytes": 2411560282, + "type": "nmdc:DataObject", + "id": "jgi:574fe0b47ded5e3df1ee1496", + "name": "10533.3.165334.ACGATGA-GTCATCG.fastq.gz" + }, + { + "name": "Gp0127656_Filtered Reads", + "description": "Filtered Reads for Gp0127656", + "data_object_type": "Filtered Sequencing Reads", + "url": "https://data.microbiomedata.org/data/nmdc:mga00hh562/qa/nmdc_mga00hh562_filtered.fastq.gz", + "md5_checksum": "cec95659bb04ae095f51821ddaa9fa59", + "id": "nmdc:cec95659bb04ae095f51821ddaa9fa59", + "file_size_bytes": 2195848744 + }, + { + "name": "Gp0127656_Filtered Stats", + "description": "Filtered Stats for Gp0127656", + "data_object_type": "QC Statistics", + "url": "https://data.microbiomedata.org/data/nmdc:mga00hh562/qa/nmdc_mga00hh562_filterStats.txt", + "md5_checksum": "7b4f365bbe942a523890abf13d1b6436", + "id": "nmdc:7b4f365bbe942a523890abf13d1b6436", + "file_size_bytes": 284 + }, + { + "name": "Gp0127656_Gottcha2 TSV report", + "description": "Gottcha2 TSV report for Gp0127656", + "url": "https://data.microbiomedata.org/data/nmdc:mga00hh562/ReadbasedAnalysis/nmdc_mga00hh562_gottcha2_report.tsv", + "md5_checksum": "ccbe419157d8286626330fd0eb0dd0e0", + "id": "nmdc:ccbe419157d8286626330fd0eb0dd0e0", + "file_size_bytes": 2418 + }, + { + "name": "Gp0127656_Gottcha2 full TSV report", + "description": "Gottcha2 full TSV report for Gp0127656", + "url": "https://data.microbiomedata.org/data/nmdc:mga00hh562/ReadbasedAnalysis/nmdc_mga00hh562_gottcha2_report_full.tsv", + "md5_checksum": "92ab65cdaca3367552e03d895123e04f", + "id": "nmdc:92ab65cdaca3367552e03d895123e04f", + "file_size_bytes": 759212 + }, + { + "name": "Gp0127656_Gottcha2 Krona HTML report", + "description": "Gottcha2 Krona HTML report for Gp0127656", + "data_object_type": "GOTTCHA2 Krona Plot", + "url": "https://data.microbiomedata.org/data/nmdc:mga00hh562/ReadbasedAnalysis/nmdc_mga00hh562_gottcha2_krona.html", + "md5_checksum": "0b3ff6503723d6ea9b84552f68ed4270", + "id": "nmdc:0b3ff6503723d6ea9b84552f68ed4270", + "file_size_bytes": 231563 + }, + { + "name": "Gp0127656_Centrifuge classification TSV report", + "description": "Centrifuge classification TSV report for Gp0127656", + "data_object_type": "Centrifuge Taxonomic Classification", + "url": "https://data.microbiomedata.org/data/nmdc:mga00hh562/ReadbasedAnalysis/nmdc_mga00hh562_centrifuge_classification.tsv", + "md5_checksum": "8e5ad12b7fa8873463088d7bf361f7c5", + "id": "nmdc:8e5ad12b7fa8873463088d7bf361f7c5", + "file_size_bytes": 1950007455 + }, + { + "name": "Gp0127656_Centrifuge TSV report", + "description": "Centrifuge TSV report for Gp0127656", + "data_object_type": "Centrifuge Classification Report", + "url": "https://data.microbiomedata.org/data/nmdc:mga00hh562/ReadbasedAnalysis/nmdc_mga00hh562_centrifuge_report.tsv", + "md5_checksum": "a3255df52cd6150f03bbf7cbd655ec76", + "id": "nmdc:a3255df52cd6150f03bbf7cbd655ec76", + "file_size_bytes": 255724 + }, + { + "name": "Gp0127656_Centrifuge Krona HTML report", + "description": "Centrifuge Krona HTML report for Gp0127656", + "data_object_type": "Centrifuge Krona Plot", + "url": "https://data.microbiomedata.org/data/nmdc:mga00hh562/ReadbasedAnalysis/nmdc_mga00hh562_centrifuge_krona.html", + "md5_checksum": "a25a5d7e399624e5e5735b65a9dd322a", + "id": "nmdc:a25a5d7e399624e5e5735b65a9dd322a", + "file_size_bytes": 2337553 + }, + { + "name": "Gp0127656_Kraken classification TSV report", + "description": "Kraken classification TSV report for Gp0127656", + "data_object_type": "Kraken2 Taxonomic Classification", + "url": "https://data.microbiomedata.org/data/nmdc:mga00hh562/ReadbasedAnalysis/nmdc_mga00hh562_kraken2_classification.tsv", + "md5_checksum": "dd953aebfd5cf624a5ffa8c6d6b64b08", + "id": "nmdc:dd953aebfd5cf624a5ffa8c6d6b64b08", + "file_size_bytes": 1555636513 + }, + { + "name": "Gp0127656_Kraken2 TSV report", + "description": "Kraken2 TSV report for Gp0127656", + "data_object_type": "Kraken2 Classification Report", + "url": "https://data.microbiomedata.org/data/nmdc:mga00hh562/ReadbasedAnalysis/nmdc_mga00hh562_kraken2_report.tsv", + "md5_checksum": "96f47f6cd2350fb1c7c7b746d2e9d811", + "id": "nmdc:96f47f6cd2350fb1c7c7b746d2e9d811", + "file_size_bytes": 647090 + }, + { + "name": "Gp0127656_Kraken2 Krona HTML report", + "description": "Kraken2 Krona HTML report for Gp0127656", + "data_object_type": "Kraken2 Krona Plot", + "url": "https://data.microbiomedata.org/data/nmdc:mga00hh562/ReadbasedAnalysis/nmdc_mga00hh562_kraken2_krona.html", + "md5_checksum": "ae369194e4b24e137fc23da0412277a6", + "id": "nmdc:ae369194e4b24e137fc23da0412277a6", + "file_size_bytes": 3939982 + }, + { + "name": "Gp0127656_Assembled contigs fasta", + "description": "Assembled contigs fasta for Gp0127656", + "data_object_type": "Assembly Contigs", + "url": "https://data.microbiomedata.org/data/nmdc:mga00hh562/assembly/nmdc_mga00hh562_contigs.fna", + "md5_checksum": "8106808f8e245ef9a46a4e31561eba7f", + "id": "nmdc:8106808f8e245ef9a46a4e31561eba7f", + "file_size_bytes": 78938478 + }, + { + "name": "Gp0127656_Assembled scaffold fasta", + "description": "Assembled scaffold fasta for Gp0127656", + "data_object_type": "Assembly Scaffolds", + "url": "https://data.microbiomedata.org/data/nmdc:mga00hh562/assembly/nmdc_mga00hh562_scaffolds.fna", + "md5_checksum": "55385159fa8361d7ff747cdc1155512b", + "id": "nmdc:55385159fa8361d7ff747cdc1155512b", + "file_size_bytes": 78428743 + }, + { + "name": "Gp0127656_Metagenome Contig Coverage Stats", + "description": "Metagenome Contig Coverage Stats for Gp0127656", + "url": "https://data.microbiomedata.org/data/nmdc:mga00hh562/assembly/nmdc_mga00hh562_covstats.txt", + "md5_checksum": "4741908a5b07eaa2312ff3e6d2d991aa", + "id": "nmdc:4741908a5b07eaa2312ff3e6d2d991aa", + "file_size_bytes": 13384382 + }, + { + "name": "Gp0127656_Assembled AGP file", + "description": "Assembled AGP file for Gp0127656", + "data_object_type": "Assembly AGP", + "url": "https://data.microbiomedata.org/data/nmdc:mga00hh562/assembly/nmdc_mga00hh562_assembly.agp", + "md5_checksum": "172e5cf3b5c5bf8e4896058dad3e814a", + "id": "nmdc:172e5cf3b5c5bf8e4896058dad3e814a", + "file_size_bytes": 12508060 + }, + { + "name": "Gp0127656_Metagenome Alignment BAM file", + "description": "Metagenome Alignment BAM file for Gp0127656", + "data_object_type": "Assembly Coverage BAM", + "url": "https://data.microbiomedata.org/data/nmdc:mga00hh562/assembly/nmdc_mga00hh562_pairedMapped_sorted.bam", + "md5_checksum": "941f749a92155321c5ce7e5aa32d3b55", + "id": "nmdc:941f749a92155321c5ce7e5aa32d3b55", + "file_size_bytes": 2375706529 + }, + { + "name": "Gp0127656_Protein FAA", + "description": "Protein FAA for Gp0127656", + "data_object_type": "Annotation Amino Acid FASTA", + "url": "https://data.microbiomedata.org/data/nmdc:mga00hh562/annotation/nmdc_mga00hh562_proteins.faa", + "md5_checksum": "18f68cc8acda8d33d5fd6f21a9166aa8", + "id": "nmdc:18f68cc8acda8d33d5fd6f21a9166aa8", + "file_size_bytes": 46951183 + }, + { + "name": "Gp0127656_Structural annotation GFF file", + "description": "Structural annotation GFF file for Gp0127656", + "data_object_type": "Structural Annotation GFF", + "url": "https://data.microbiomedata.org/data/nmdc:mga00hh562/annotation/nmdc_mga00hh562_structural_annotation.gff", + "md5_checksum": "87d5f3a505d23c1aa2deea960702d55b", + "id": "nmdc:87d5f3a505d23c1aa2deea960702d55b", + "file_size_bytes": 2511 + }, + { + "name": "Gp0127656_Functional annotation GFF file", + "description": "Functional annotation GFF file for Gp0127656", + "data_object_type": "Functional Annotation GFF", + "url": "https://data.microbiomedata.org/data/nmdc:mga00hh562/annotation/nmdc_mga00hh562_functional_annotation.gff", + "md5_checksum": "8e8be343bbb1ba11f3e15867b419d05d", + "id": "nmdc:8e8be343bbb1ba11f3e15867b419d05d", + "file_size_bytes": 54902900 + }, + { + "name": "Gp0127656_KO TSV file", + "description": "KO TSV file for Gp0127656", + "data_object_type": "Annotation KEGG Orthology", + "url": "https://data.microbiomedata.org/data/nmdc:mga00hh562/annotation/nmdc_mga00hh562_ko.tsv", + "md5_checksum": "91c2485c0ebf683aed3e7935ec60b7d1", + "id": "nmdc:91c2485c0ebf683aed3e7935ec60b7d1", + "file_size_bytes": 6468844 + }, + { + "name": "Gp0127656_EC TSV file", + "description": "EC TSV file for Gp0127656", + "data_object_type": "Annotation Enzyme Commission", + "url": "https://data.microbiomedata.org/data/nmdc:mga00hh562/annotation/nmdc_mga00hh562_ec.tsv", + "md5_checksum": "fb6740e86534daeea41ab6d5cf9d91d2", + "id": "nmdc:fb6740e86534daeea41ab6d5cf9d91d2", + "file_size_bytes": 4308547 + }, + { + "name": "Gp0127656_COG GFF file", + "description": "COG GFF file for Gp0127656", + "url": "https://data.microbiomedata.org/data/nmdc:mga00hh562/annotation/nmdc_mga00hh562_cog.gff", + "md5_checksum": "19da9b3f211164643f276bc74604c9b0", + "id": "nmdc:19da9b3f211164643f276bc74604c9b0", + "file_size_bytes": 32139189 + }, + { + "name": "Gp0127656_PFAM GFF file", + "description": "PFAM GFF file for Gp0127656", + "url": "https://data.microbiomedata.org/data/nmdc:mga00hh562/annotation/nmdc_mga00hh562_pfam.gff", + "md5_checksum": "19905547dfa37274a9f91c9caaf6bacc", + "id": "nmdc:19905547dfa37274a9f91c9caaf6bacc", + "file_size_bytes": 23590201 + }, + { + "name": "Gp0127656_TigrFam GFF file", + "description": "TigrFam GFF file for Gp0127656", + "url": "https://data.microbiomedata.org/data/nmdc:mga00hh562/annotation/nmdc_mga00hh562_tigrfam.gff", + "md5_checksum": "30c2b0722d225938975243ab1041ed12", + "id": "nmdc:30c2b0722d225938975243ab1041ed12", + "file_size_bytes": 2485400 + }, + { + "name": "Gp0127656_SMART GFF file", + "description": "SMART GFF file for Gp0127656", + "url": "https://data.microbiomedata.org/data/nmdc:mga00hh562/annotation/nmdc_mga00hh562_smart.gff", + "md5_checksum": "623e913fa98f88f6037754daf5d9ffc5", + "id": "nmdc:623e913fa98f88f6037754daf5d9ffc5", + "file_size_bytes": 6932331 + }, + { + "name": "Gp0127656_SuperFam GFF file", + "description": "SuperFam GFF file for Gp0127656", + "url": "https://data.microbiomedata.org/data/nmdc:mga00hh562/annotation/nmdc_mga00hh562_supfam.gff", + "md5_checksum": "ec56df16785bc67e073128f09366ec43", + "id": "nmdc:ec56df16785bc67e073128f09366ec43", + "file_size_bytes": 39880284 + }, + { + "name": "Gp0127656_Cath FunFam GFF file", + "description": "Cath FunFam GFF file for Gp0127656", + "url": "https://data.microbiomedata.org/data/nmdc:mga00hh562/annotation/nmdc_mga00hh562_cath_funfam.gff", + "md5_checksum": "2831d1ead0af4681b2ae1a9f21733637", + "id": "nmdc:2831d1ead0af4681b2ae1a9f21733637", + "file_size_bytes": 29872897 + }, + { + "name": "Gp0127656_KO_EC GFF file", + "description": "KO_EC GFF file for Gp0127656", + "url": "https://data.microbiomedata.org/data/nmdc:mga00hh562/annotation/nmdc_mga00hh562_ko_ec.gff", + "md5_checksum": "53f225f74011f7d30fcfd5c60b3693ae", + "id": "nmdc:53f225f74011f7d30fcfd5c60b3693ae", + "file_size_bytes": 20564625 + }, + { + "name": "gold:Gp0452679_metabat2 bin checkm quality assessment result", + "description": "metabat2 bin checkm quality assessment result for gold:Gp0452679", + "data_object_type": "CheckM Statistics", + "url": "https://data.microbiomedata.org/data/nmdc:mga0fsjy84/MAGs/nmdc_mga0fsjy84_checkm_qa.out", + "md5_checksum": "d41d8cd98f00b204e9800998ecf8427e", + "id": "nmdc:d41d8cd98f00b204e9800998ecf8427e", + "file_size_bytes": 0 + }, + { + "name": "Gp0127656_tooShort (< 3kb) filtered contigs fasta file by metaBat2", + "description": "tooShort (< 3kb) filtered contigs fasta file by metaBat2 for Gp0127656", + "url": "https://data.microbiomedata.org/data/nmdc:mga00hh562/MAGs/nmdc_mga00hh562_bins.tooShort.fa", + "md5_checksum": "313c88df1890a33d388bdb23c7ad37c3", + "id": "nmdc:313c88df1890a33d388bdb23c7ad37c3", + "file_size_bytes": 69332992 + }, + { + "name": "Gp0127656_unbinned fasta file from metabat2", + "description": "unbinned fasta file from metabat2 for Gp0127656", + "url": "https://data.microbiomedata.org/data/nmdc:mga00hh562/MAGs/nmdc_mga00hh562_bins.unbinned.fa", + "md5_checksum": "ae567f55fe899da83831fda23dcd7a20", + "id": "nmdc:ae567f55fe899da83831fda23dcd7a20", + "file_size_bytes": 9275333 + }, + { + "name": "Gp0127656_metabat2 bin checkm quality assessment result", + "description": "metabat2 bin checkm quality assessment result for Gp0127656", + "data_object_type": "CheckM Statistics", + "url": "https://data.microbiomedata.org/data/nmdc:mga00hh562/MAGs/nmdc_mga00hh562_checkm_qa.out", + "md5_checksum": "5a8dbda6aec0825b4159d5b53481db90", + "id": "nmdc:5a8dbda6aec0825b4159d5b53481db90", + "file_size_bytes": 775 + }, + { + "name": "Gp0127656_high-quality and medium-quality bins", + "description": "high-quality and medium-quality bins for Gp0127656", + "data_object_type": "Metagenome Bins", + "url": "https://data.microbiomedata.org/data/nmdc:mga00hh562/MAGs/nmdc_mga00hh562_hqmq_bin.zip", + "md5_checksum": "060a7f90c5c5123cac41ed946a5482af", + "id": "nmdc:060a7f90c5c5123cac41ed946a5482af", + "file_size_bytes": 182 + }, + { + "name": "Gp0127656_metabat2 bins", + "description": "metabat2 bins for Gp0127656", + "url": "https://data.microbiomedata.org/data/nmdc:mga00hh562/MAGs/nmdc_mga00hh562_metabat_bin.zip", + "md5_checksum": "e9f5d03e8264308ed77da0b63eb738fe", + "id": "nmdc:e9f5d03e8264308ed77da0b63eb738fe", + "file_size_bytes": 101752 + }, + { + "_id": { + "$oid": "649b003d1ae706d7b5b14ee3" + }, + "description": "Assembled scaffold fasta for gold:Gp0127656", + "url": "https://data.microbiomedata.org/data/1781_100633/assembly/assembly_scaffolds.fna", + "file_size_bytes": 77751067, + "type": "nmdc:DataObject", + "id": "nmdc:cea40db59e6f0f57dfb38ed4339225f7", + "name": "assembly_scaffolds.fna", + "data_object_type": "Assembly Scaffolds" + }, + { + "_id": { + "$oid": "649b003d1ae706d7b5b14ee4" + }, + "description": "Metagenome Contig Coverage Stats for gold:Gp0127656", + "url": "https://data.microbiomedata.org/data/1781_100633/assembly/mapping_stats.txt", + "file_size_bytes": 12706402, + "type": "nmdc:DataObject", + "id": "nmdc:b31206dd7fe7d961882d0654ab5aaffa", + "name": "mapping_stats.txt", + "data_object_type": "Assembly Coverage Stats" + }, + { + "_id": { + "$oid": "649b003d1ae706d7b5b14ee5" + }, + "description": "Assembled AGP file for gold:Gp0127656", + "url": "https://data.microbiomedata.org/data/1781_100633/assembly/assembly.agp", + "file_size_bytes": 11151492, + "type": "nmdc:DataObject", + "id": "nmdc:3794d834e9a6e8c1e2acf616a2cc7625", + "name": "assembly.agp", + "data_object_type": "Assembly AGP" + }, + { + "_id": { + "$oid": "649b003d1ae706d7b5b14ee6" + }, + "description": "Assembled contigs fasta for gold:Gp0127656", + "url": "https://data.microbiomedata.org/data/1781_100633/assembly/assembly_contigs.fna", + "file_size_bytes": 78260498, + "type": "nmdc:DataObject", + "id": "nmdc:3dfd278d5e4fc3539b6dfd021acdac76", + "name": "assembly_contigs.fna", + "data_object_type": "Assembly Contigs" + }, + { + "_id": { + "$oid": "649b003d1ae706d7b5b14ee7" + }, + "description": "Metagenome Alignment BAM file for gold:Gp0127656", + "url": "https://data.microbiomedata.org/data/1781_100633/assembly/pairedMapped_sorted.bam", + "file_size_bytes": 2344270684, + "type": "nmdc:DataObject", + "id": "nmdc:6e0e10b90c8b52db8afc73199c3d6028", + "name": "pairedMapped_sorted.bam", + "data_object_type": "Assembly Coverage BAM" + }, + { + "_id": { + "$oid": "649b003d1ae706d7b5b15bf4" + }, + "id": "nmdc:daa88ce1c3c1f25b3b19a8c98c255e7c", + "name": "1781_100633.krona.html", + "description": "Gold:Gp0127656 KRONA plot HTML file", + "url": "https://data.microbiomedata.org/1781_100633/ReadbasedAnalysis/centrifuge/1781_100633.krona.html", + "file_size_bytes": 3385, + "data_object_type": "Centrifuge Krona Plot", + "type": "nmdc:DataObject" + }, + { + "_id": { + "$oid": "649b003d1ae706d7b5b15bfb" + }, + "id": "nmdc:e47f46c4a96d30e9bc65ded042a90033", + "name": "1781_100633.json", + "description": "Gold:Gp0127656 ReadbasedAnalysis result JSON file", + "url": "https://data.microbiomedata.org/1781_100633/ReadbasedAnalysis/1781_100633.json", + "file_size_bytes": 3385, + "type": "nmdc:DataObject" + }, + { + "_id": { + "$oid": "649b003f1ae706d7b5b16649" + }, + "id": "nmdc:0f2602d1171d6e2e1a09f0b41f6ded92", + "name": "bins.unbinned.fa", + "description": "unbinned fasta file from metabat2 for gold:Gp0127656", + "file_size_bytes": 9538263, + "url": "https://data.microbiomedata.org/data/1781_100633/img_MAGs/bins.unbinned.fa", + "type": "nmdc:DataObject" + }, + { + "_id": { + "$oid": "649b003f1ae706d7b5b1664e" + }, + "id": "nmdc:5184f24c83a7a7b9a0aafb8a934234ac", + "name": "bins.tooShort.fa", + "description": "tooShort (< 3kb) filtered contigs fasta file by metaBat2 for gold:Gp0127656", + "file_size_bytes": 67308024, + "url": "https://data.microbiomedata.org/data/1781_100633/img_MAGs/bins.tooShort.fa", + "type": "nmdc:DataObject" + }, + { + "_id": { + "$oid": "649b00401ae706d7b5b16d59" + }, + "description": "Functional annotation GFF file for gold:Gp0127656", + "url": "https://data.microbiomedata.org/1781_100633/img_annotation/Ga0482215_functional_annotation.gff", + "md5_checksum": "00f42710ff9df37cd23e5e73d54e4dd1", + "file_size_bytes": 3385, + "id": "nmdc:00f42710ff9df37cd23e5e73d54e4dd1", + "name": "gold:Gp0127656_Functional annotation GFF file", + "data_object_type": "Functional Annotation GFF", + "type": "nmdc:DataObject" + }, + { + "_id": { + "$oid": "649b00401ae706d7b5b16d5a" + }, + "description": "Protein FAA for gold:Gp0127656", + "url": "https://data.microbiomedata.org/1781_100633/img_annotation/Ga0482215_proteins.faa", + "md5_checksum": "2819bbb349ca5bdbf311aeae6ada532b", + "file_size_bytes": 3385, + "id": "nmdc:2819bbb349ca5bdbf311aeae6ada532b", + "name": "gold:Gp0127656_Protein FAA", + "data_object_type": "Annotation Amino Acid FASTA", + "type": "nmdc:DataObject" + }, + { + "_id": { + "$oid": "649b00401ae706d7b5b16d5d" + }, + "description": "Structural annotation GFF file for gold:Gp0127656", + "url": "https://data.microbiomedata.org/1781_100633/img_annotation/Ga0482215_structural_annotation.gff", + "md5_checksum": "0c2ae5a86d4840a0b324d73977170f1e", + "file_size_bytes": 3385, + "id": "nmdc:0c2ae5a86d4840a0b324d73977170f1e", + "name": "gold:Gp0127656_Structural annotation GFF file", + "data_object_type": "Structural Annotation GFF", + "type": "nmdc:DataObject" + }, + { + "_id": { + "$oid": "649b00401ae706d7b5b16d5f" + }, + "description": "EC TSV File for gold:Gp0127656", + "url": "https://data.microbiomedata.org/1781_100633/img_annotation/Ga0482215_ec.tsv", + "md5_checksum": "33e0f5ff7c448ded210f04798894a031", + "file_size_bytes": 3385, + "id": "nmdc:33e0f5ff7c448ded210f04798894a031", + "name": "gold:Gp0127656_EC TSV File", + "data_object_type": "Annotation Enzyme Commission", + "type": "nmdc:DataObject" + }, + { + "_id": { + "$oid": "649b00401ae706d7b5b16d60" + }, + "description": "KO TSV File for gold:Gp0127656", + "url": "https://data.microbiomedata.org/1781_100633/img_annotation/Ga0482215_ko.tsv", + "md5_checksum": "8d230dd7948d2b08c4de1adc0d0002b8", + "file_size_bytes": 3385, + "id": "nmdc:8d230dd7948d2b08c4de1adc0d0002b8", + "name": "gold:Gp0127656_KO TSV File", + "data_object_type": "Annotation KEGG Orthology", + "type": "nmdc:DataObject" + } + ], + "mags_activity_set": [ + { + "_id": { + "$oid": "649b0052ec087f6bbab34726" + }, + "has_input": [ + "nmdc:8106808f8e245ef9a46a4e31561eba7f", + "nmdc:941f749a92155321c5ce7e5aa32d3b55", + "nmdc:8e8be343bbb1ba11f3e15867b419d05d" + ], + "too_short_contig_num": 163283, + "part_of": [ + "nmdc:mga00hh562" + ], + "binned_contig_num": 83, + "git_url": "https://github.com/microbiomedata/metaG/releases/tag/0.1", + "has_output": [ + "nmdc:d41d8cd98f00b204e9800998ecf8427e", + "nmdc:313c88df1890a33d388bdb23c7ad37c3", + "nmdc:ae567f55fe899da83831fda23dcd7a20", + "nmdc:5a8dbda6aec0825b4159d5b53481db90", + "nmdc:060a7f90c5c5123cac41ed946a5482af", + "nmdc:e9f5d03e8264308ed77da0b63eb738fe" + ], + "was_informed_by": "gold:Gp0127656", + "input_contig_num": 169495, + "id": "nmdc:8e2d8da1d05b292a52a33732a6bc4391", + "execution_resource": "NERSC-Cori", + "name": "MAGs Analysis Activity for nmdc:mga00hh562", + "mags_list": [ + { + "number_of_contig": 83, + "completeness": 14.35, + "bin_name": "bins.1", + "gene_count": 388, + "bin_quality": "LQ", + "gtdbtk_species": "", + "gtdbtk_order": "", + "num_16s": 0, + "gtdbtk_family": "", + "gtdbtk_domain": "", + "contamination": 0.0, + "gtdbtk_class": "", + "gtdbtk_phylum": "", + "num_5s": 0, + "num_23s": 0, + "gtdbtk_genus": "", + "num_t_rna": 5 + } + ], + "unbinned_contig_num": 6129, + "started_at_time": "2021-10-11T02:23:35Z", + "type": "nmdc:MAGsAnalysisActivity", + "ended_at_time": "2021-10-11T03:58:56+00:00" + } + ], + "metagenome_annotation_activity_set": [ + { + "_id": { + "$oid": "649b005bbf2caae0415ef9c6" + }, + "has_input": [ + "nmdc:8106808f8e245ef9a46a4e31561eba7f" + ], + "part_of": [ + "nmdc:mga00hh562" + ], + "git_url": "https://github.com/microbiomedata/metaG/releases/tag/0.1", + "has_output": [ + "nmdc:18f68cc8acda8d33d5fd6f21a9166aa8", + "nmdc:87d5f3a505d23c1aa2deea960702d55b", + "nmdc:8e8be343bbb1ba11f3e15867b419d05d", + "nmdc:91c2485c0ebf683aed3e7935ec60b7d1", + "nmdc:fb6740e86534daeea41ab6d5cf9d91d2", + "nmdc:19da9b3f211164643f276bc74604c9b0", + "nmdc:19905547dfa37274a9f91c9caaf6bacc", + "nmdc:30c2b0722d225938975243ab1041ed12", + "nmdc:623e913fa98f88f6037754daf5d9ffc5", + "nmdc:ec56df16785bc67e073128f09366ec43", + "nmdc:2831d1ead0af4681b2ae1a9f21733637", + "nmdc:53f225f74011f7d30fcfd5c60b3693ae" + ], + "was_informed_by": "gold:Gp0127656", + "id": "nmdc:8e2d8da1d05b292a52a33732a6bc4391", + "execution_resource": "NERSC-Cori", + "name": "Annotation Activity for nmdc:mga00hh562", + "started_at_time": "2021-10-11T02:23:35Z", + "type": "nmdc:MetagenomeAnnotationActivity", + "ended_at_time": "2021-10-11T03:58:56+00:00" + } + ], + "metagenome_assembly_set": [ + { + "_id": { + "$oid": "649b005f2ca5ee4adb139fb2" + }, + "has_input": [ + "nmdc:cec95659bb04ae095f51821ddaa9fa59" + ], + "part_of": [ + "nmdc:mga00hh562" + ], + "ctg_logsum": 98556, + "scaf_logsum": 99077, + "gap_pct": 0.00105, + "git_url": "https://github.com/microbiomedata/metaG/releases/tag/0.1", + "has_output": [ + "nmdc:8106808f8e245ef9a46a4e31561eba7f", + "nmdc:55385159fa8361d7ff747cdc1155512b", + "nmdc:4741908a5b07eaa2312ff3e6d2d991aa", + "nmdc:172e5cf3b5c5bf8e4896058dad3e814a", + "nmdc:941f749a92155321c5ce7e5aa32d3b55" + ], + "asm_score": 2.914, + "was_informed_by": "gold:Gp0127656", + "ctg_powsum": 10453, + "scaf_max": 9079, + "id": "nmdc:8e2d8da1d05b292a52a33732a6bc4391", + "scaf_powsum": 10508, + "execution_resource": "NERSC-Cori", + "contigs": 169495, + "name": "Assembly Activity for nmdc:mga00hh562", + "ctg_max": 9079, + "gc_std": 0.09653, + "contig_bp": 72511508, + "gc_avg": 0.62989, + "started_at_time": "2021-10-11T02:23:35Z", + "scaf_bp": 72512268, + "type": "nmdc:MetagenomeAssembly", + "scaffolds": 169419, + "ended_at_time": "2021-10-11T03:58:56+00:00", + "ctg_l50": 399, + "ctg_l90": 286, + "ctg_n50": 54638, + "ctg_n90": 144448, + "scaf_l50": 399, + "scaf_l90": 286, + "scaf_n50": 54616, + "scaf_n90": 144376 + } + ], + "omics_processing_set": [ + { + "_id": { + "$oid": "649b009773e8249959349b61" + }, + "id": "nmdc:omprc-11-qrsway30", + "name": "Riverbed sediment microbial communities from areas with vegetation nearby in Columbia River, Washington, USA - GW-RW S2_30_40", + "description": "Riverbed sediment samples were collected from an area with a lot of surrounding vegetation in a hyporheic zone (subsurface groundwater - surface water mixing zone)", + "has_input": [ + "nmdc:bsm-11-mmr87q87" + ], + "has_output": [ + "jgi:574fe0b47ded5e3df1ee1496" + ], + "part_of": [ + "nmdc:sty-11-aygzgv51" + ], + "add_date": "2016-01-11", + "mod_date": "2021-06-18", + "ncbi_project_name": "Riverbed sediment microbial communities from areas with vegetation nearby in Columbia River, Washington, USA - GW-RW S2_30_40", + "omics_type": { + "has_raw_value": "Metagenome" + }, + "principal_investigator": { + "has_raw_value": "James Stegen" + }, + "processing_institution": "JGI", + "type": "nmdc:OmicsProcessing", + "gold_sequencing_project_identifiers": [ + "gold:Gp0127656" + ] + } + ], + "read_qc_analysis_activity_set": [ + { + "_id": { + "$oid": "649b009d6bdd4fd20273c87c" + }, + "has_input": [ + "nmdc:a604c87c632165bb5223eebda60801d0" + ], + "part_of": [ + "nmdc:mga00hh562" + ], + "git_url": "https://github.com/microbiomedata/metaG/releases/tag/0.1", + "has_output": [ + "nmdc:cec95659bb04ae095f51821ddaa9fa59", + "nmdc:7b4f365bbe942a523890abf13d1b6436" + ], + "was_informed_by": "gold:Gp0127656", + "input_read_count": 27317020, + "output_read_bases": 3960490395, + "id": "nmdc:8e2d8da1d05b292a52a33732a6bc4391", + "execution_resource": "NERSC-Cori", + "input_read_bases": 4124870020, + "name": "Read QC Activity for nmdc:mga00hh562", + "output_read_count": 26481746, + "started_at_time": "2021-10-11T02:23:35Z", + "type": "nmdc:ReadQCAnalysisActivity", + "ended_at_time": "2021-10-11T03:58:56+00:00" + } + ], + "read_based_taxonomy_analysis_activity_set": [ + { + "_id": { + "$oid": "649b009bff710ae353f8cf3e" + }, + "has_input": [ + "nmdc:cec95659bb04ae095f51821ddaa9fa59" + ], + "git_url": "https://github.com/microbiomedata/metaG/releases/tag/0.1", + "has_output": [ + "nmdc:ccbe419157d8286626330fd0eb0dd0e0", + "nmdc:92ab65cdaca3367552e03d895123e04f", + "nmdc:0b3ff6503723d6ea9b84552f68ed4270", + "nmdc:8e5ad12b7fa8873463088d7bf361f7c5", + "nmdc:a3255df52cd6150f03bbf7cbd655ec76", + "nmdc:a25a5d7e399624e5e5735b65a9dd322a", + "nmdc:dd953aebfd5cf624a5ffa8c6d6b64b08", + "nmdc:96f47f6cd2350fb1c7c7b746d2e9d811", + "nmdc:ae369194e4b24e137fc23da0412277a6" + ], + "was_informed_by": "gold:Gp0127656", + "id": "nmdc:8e2d8da1d05b292a52a33732a6bc4391", + "execution_resource": "NERSC-Cori", + "name": "ReadBased Analysis Activity for nmdc:mga00hh562", + "started_at_time": "2021-10-11T02:23:35Z", + "type": "nmdc:ReadbasedAnalysis", + "ended_at_time": "2021-10-11T03:58:56+00:00" + } + ] + }, + { + "data_object_set": [ + { + "description": "Raw sequencer read data", + "file_size_bytes": 2103957707, + "type": "nmdc:DataObject", + "id": "jgi:574fde907ded5e3df1ee1426", + "name": "10533.2.165322.GTGAGCT-AAGCTCA.fastq.gz" + }, + { + "name": "Gp0127651_Filtered Reads", + "description": "Filtered Reads for Gp0127651", + "data_object_type": "Filtered Sequencing Reads", + "url": "https://data.microbiomedata.org/data/nmdc:mga08hnt47/qa/nmdc_mga08hnt47_filtered.fastq.gz", + "md5_checksum": "2791a196017767af3b5b21a3029799c0", + "id": "nmdc:2791a196017767af3b5b21a3029799c0", + "file_size_bytes": 1856919615 + }, + { + "name": "Gp0127651_Filtered Stats", + "description": "Filtered Stats for Gp0127651", + "data_object_type": "QC Statistics", + "url": "https://data.microbiomedata.org/data/nmdc:mga08hnt47/qa/nmdc_mga08hnt47_filterStats.txt", + "md5_checksum": "92cb49efbff5d5977e00dbad1c4d0d9f", + "id": "nmdc:92cb49efbff5d5977e00dbad1c4d0d9f", + "file_size_bytes": 283 + }, + { + "name": "Gp0127651_Gottcha2 TSV report", + "description": "Gottcha2 TSV report for Gp0127651", + "url": "https://data.microbiomedata.org/data/nmdc:mga08hnt47/ReadbasedAnalysis/nmdc_mga08hnt47_gottcha2_report.tsv", + "md5_checksum": "53ee263960c39126e039656a121deb96", + "id": "nmdc:53ee263960c39126e039656a121deb96", + "file_size_bytes": 1199 + }, + { + "name": "Gp0127651_Gottcha2 full TSV report", + "description": "Gottcha2 full TSV report for Gp0127651", + "url": "https://data.microbiomedata.org/data/nmdc:mga08hnt47/ReadbasedAnalysis/nmdc_mga08hnt47_gottcha2_report_full.tsv", + "md5_checksum": "2781b9269b8e24f49a1a301d44d0e3d5", + "id": "nmdc:2781b9269b8e24f49a1a301d44d0e3d5", + "file_size_bytes": 703299 + }, + { + "name": "Gp0127651_Gottcha2 Krona HTML report", + "description": "Gottcha2 Krona HTML report for Gp0127651", + "data_object_type": "GOTTCHA2 Krona Plot", + "url": "https://data.microbiomedata.org/data/nmdc:mga08hnt47/ReadbasedAnalysis/nmdc_mga08hnt47_gottcha2_krona.html", + "md5_checksum": "0ed808b8ce29d39c3b555e7d5bf4c274", + "id": "nmdc:0ed808b8ce29d39c3b555e7d5bf4c274", + "file_size_bytes": 229311 + }, + { + "name": "Gp0127651_Centrifuge classification TSV report", + "description": "Centrifuge classification TSV report for Gp0127651", + "data_object_type": "Centrifuge Taxonomic Classification", + "url": "https://data.microbiomedata.org/data/nmdc:mga08hnt47/ReadbasedAnalysis/nmdc_mga08hnt47_centrifuge_classification.tsv", + "md5_checksum": "a7d8f038b87bd28843e30c5dd115704b", + "id": "nmdc:a7d8f038b87bd28843e30c5dd115704b", + "file_size_bytes": 1642196063 + }, + { + "name": "Gp0127651_Centrifuge TSV report", + "description": "Centrifuge TSV report for Gp0127651", + "data_object_type": "Centrifuge Classification Report", + "url": "https://data.microbiomedata.org/data/nmdc:mga08hnt47/ReadbasedAnalysis/nmdc_mga08hnt47_centrifuge_report.tsv", + "md5_checksum": "b4cbc81c986c67c1037c8b7280924683", + "id": "nmdc:b4cbc81c986c67c1037c8b7280924683", + "file_size_bytes": 254418 + }, + { + "name": "Gp0127651_Centrifuge Krona HTML report", + "description": "Centrifuge Krona HTML report for Gp0127651", + "data_object_type": "Centrifuge Krona Plot", + "url": "https://data.microbiomedata.org/data/nmdc:mga08hnt47/ReadbasedAnalysis/nmdc_mga08hnt47_centrifuge_krona.html", + "md5_checksum": "e0c61a191258597984a05d86eaf4d71f", + "id": "nmdc:e0c61a191258597984a05d86eaf4d71f", + "file_size_bytes": 2333132 + }, + { + "name": "Gp0127651_Kraken classification TSV report", + "description": "Kraken classification TSV report for Gp0127651", + "data_object_type": "Kraken2 Taxonomic Classification", + "url": "https://data.microbiomedata.org/data/nmdc:mga08hnt47/ReadbasedAnalysis/nmdc_mga08hnt47_kraken2_classification.tsv", + "md5_checksum": "e1cbcfa86444a4ff4e992bcb6653d18f", + "id": "nmdc:e1cbcfa86444a4ff4e992bcb6653d18f", + "file_size_bytes": 1309125719 + }, + { + "name": "Gp0127651_Kraken2 TSV report", + "description": "Kraken2 TSV report for Gp0127651", + "data_object_type": "Kraken2 Classification Report", + "url": "https://data.microbiomedata.org/data/nmdc:mga08hnt47/ReadbasedAnalysis/nmdc_mga08hnt47_kraken2_report.tsv", + "md5_checksum": "d2e10038a40e81e81ba94f75ed1ec52c", + "id": "nmdc:d2e10038a40e81e81ba94f75ed1ec52c", + "file_size_bytes": 639737 + }, + { + "name": "Gp0127651_Kraken2 Krona HTML report", + "description": "Kraken2 Krona HTML report for Gp0127651", + "data_object_type": "Kraken2 Krona Plot", + "url": "https://data.microbiomedata.org/data/nmdc:mga08hnt47/ReadbasedAnalysis/nmdc_mga08hnt47_kraken2_krona.html", + "md5_checksum": "ddba84cd45462d3a55df4ac62bb4eeb8", + "id": "nmdc:ddba84cd45462d3a55df4ac62bb4eeb8", + "file_size_bytes": 3988966 + }, + { + "name": "Gp0127651_Assembled contigs fasta", + "description": "Assembled contigs fasta for Gp0127651", + "data_object_type": "Assembly Contigs", + "url": "https://data.microbiomedata.org/data/nmdc:mga08hnt47/assembly/nmdc_mga08hnt47_contigs.fna", + "md5_checksum": "8483663a943ff4c0fc0249353676bfc1", + "id": "nmdc:8483663a943ff4c0fc0249353676bfc1", + "file_size_bytes": 95957530 + }, + { + "name": "Gp0127651_Assembled scaffold fasta", + "description": "Assembled scaffold fasta for Gp0127651", + "data_object_type": "Assembly Scaffolds", + "url": "https://data.microbiomedata.org/data/nmdc:mga08hnt47/assembly/nmdc_mga08hnt47_scaffolds.fna", + "md5_checksum": "ccca920c56ad3d050e2d8801bcbe4855", + "id": "nmdc:ccca920c56ad3d050e2d8801bcbe4855", + "file_size_bytes": 95414704 + }, + { + "name": "Gp0127651_Metagenome Contig Coverage Stats", + "description": "Metagenome Contig Coverage Stats for Gp0127651", + "url": "https://data.microbiomedata.org/data/nmdc:mga08hnt47/assembly/nmdc_mga08hnt47_covstats.txt", + "md5_checksum": "f21e374c1c31c02bd0e41228cc7895c3", + "id": "nmdc:f21e374c1c31c02bd0e41228cc7895c3", + "file_size_bytes": 14289388 + }, + { + "name": "Gp0127651_Assembled AGP file", + "description": "Assembled AGP file for Gp0127651", + "data_object_type": "Assembly AGP", + "url": "https://data.microbiomedata.org/data/nmdc:mga08hnt47/assembly/nmdc_mga08hnt47_assembly.agp", + "md5_checksum": "f43ae7935184d10ba65961171efcac34", + "id": "nmdc:f43ae7935184d10ba65961171efcac34", + "file_size_bytes": 13343603 + }, + { + "name": "Gp0127651_Metagenome Alignment BAM file", + "description": "Metagenome Alignment BAM file for Gp0127651", + "data_object_type": "Assembly Coverage BAM", + "url": "https://data.microbiomedata.org/data/nmdc:mga08hnt47/assembly/nmdc_mga08hnt47_pairedMapped_sorted.bam", + "md5_checksum": "838162ead3f121f5bc02bc1234a32a55", + "id": "nmdc:838162ead3f121f5bc02bc1234a32a55", + "file_size_bytes": 2037589818 + }, + { + "name": "Gp0127651_Protein FAA", + "description": "Protein FAA for Gp0127651", + "data_object_type": "Annotation Amino Acid FASTA", + "url": "https://data.microbiomedata.org/data/nmdc:mga08hnt47/annotation/nmdc_mga08hnt47_proteins.faa", + "md5_checksum": "d8dc4f31293c549b12bbcab915d708cc", + "id": "nmdc:d8dc4f31293c549b12bbcab915d708cc", + "file_size_bytes": 54370216 + }, + { + "name": "Gp0127651_Structural annotation GFF file", + "description": "Structural annotation GFF file for Gp0127651", + "data_object_type": "Structural Annotation GFF", + "url": "https://data.microbiomedata.org/data/nmdc:mga08hnt47/annotation/nmdc_mga08hnt47_structural_annotation.gff", + "md5_checksum": "415256907dcafaa68778a2ba358d9ac5", + "id": "nmdc:415256907dcafaa68778a2ba358d9ac5", + "file_size_bytes": 2517 + }, + { + "name": "Gp0127651_Functional annotation GFF file", + "description": "Functional annotation GFF file for Gp0127651", + "data_object_type": "Functional Annotation GFF", + "url": "https://data.microbiomedata.org/data/nmdc:mga08hnt47/annotation/nmdc_mga08hnt47_functional_annotation.gff", + "md5_checksum": "f0c60a537e6867bf62fde15577669453", + "id": "nmdc:f0c60a537e6867bf62fde15577669453", + "file_size_bytes": 61364019 + }, + { + "name": "Gp0127651_KO TSV file", + "description": "KO TSV file for Gp0127651", + "data_object_type": "Annotation KEGG Orthology", + "url": "https://data.microbiomedata.org/data/nmdc:mga08hnt47/annotation/nmdc_mga08hnt47_ko.tsv", + "md5_checksum": "e0f16b60c50581799b7ecb254e61e537", + "id": "nmdc:e0f16b60c50581799b7ecb254e61e537", + "file_size_bytes": 6908291 + }, + { + "name": "Gp0127651_EC TSV file", + "description": "EC TSV file for Gp0127651", + "data_object_type": "Annotation Enzyme Commission", + "url": "https://data.microbiomedata.org/data/nmdc:mga08hnt47/annotation/nmdc_mga08hnt47_ec.tsv", + "md5_checksum": "6eb21304f0762bd8c11b98826d310321", + "id": "nmdc:6eb21304f0762bd8c11b98826d310321", + "file_size_bytes": 4650091 + }, + { + "name": "Gp0127651_COG GFF file", + "description": "COG GFF file for Gp0127651", + "url": "https://data.microbiomedata.org/data/nmdc:mga08hnt47/annotation/nmdc_mga08hnt47_cog.gff", + "md5_checksum": "4ea7982c99cbb6d8ccc9fd949bee09ec", + "id": "nmdc:4ea7982c99cbb6d8ccc9fd949bee09ec", + "file_size_bytes": 36137856 + }, + { + "name": "Gp0127651_PFAM GFF file", + "description": "PFAM GFF file for Gp0127651", + "url": "https://data.microbiomedata.org/data/nmdc:mga08hnt47/annotation/nmdc_mga08hnt47_pfam.gff", + "md5_checksum": "f389dc8a93de9f21322db385b2788f5f", + "id": "nmdc:f389dc8a93de9f21322db385b2788f5f", + "file_size_bytes": 27173740 + }, + { + "name": "Gp0127651_TigrFam GFF file", + "description": "TigrFam GFF file for Gp0127651", + "url": "https://data.microbiomedata.org/data/nmdc:mga08hnt47/annotation/nmdc_mga08hnt47_tigrfam.gff", + "md5_checksum": "8e6659ce96dfa72ceefda39c74fb1dce", + "id": "nmdc:8e6659ce96dfa72ceefda39c74fb1dce", + "file_size_bytes": 2943355 + }, + { + "name": "Gp0127651_SMART GFF file", + "description": "SMART GFF file for Gp0127651", + "url": "https://data.microbiomedata.org/data/nmdc:mga08hnt47/annotation/nmdc_mga08hnt47_smart.gff", + "md5_checksum": "89bc9cf9183fed6700cde44fad41b830", + "id": "nmdc:89bc9cf9183fed6700cde44fad41b830", + "file_size_bytes": 7927726 + }, + { + "name": "Gp0127651_SuperFam GFF file", + "description": "SuperFam GFF file for Gp0127651", + "url": "https://data.microbiomedata.org/data/nmdc:mga08hnt47/annotation/nmdc_mga08hnt47_supfam.gff", + "md5_checksum": "84aae368e77c1d07c6b6e8deecbc3f3b", + "id": "nmdc:84aae368e77c1d07c6b6e8deecbc3f3b", + "file_size_bytes": 45499652 + }, + { + "name": "Gp0127651_Cath FunFam GFF file", + "description": "Cath FunFam GFF file for Gp0127651", + "url": "https://data.microbiomedata.org/data/nmdc:mga08hnt47/annotation/nmdc_mga08hnt47_cath_funfam.gff", + "md5_checksum": "ee5612e5ee82ec2d57029d1bc4e1756f", + "id": "nmdc:ee5612e5ee82ec2d57029d1bc4e1756f", + "file_size_bytes": 34280847 + }, + { + "name": "Gp0127651_KO_EC GFF file", + "description": "KO_EC GFF file for Gp0127651", + "url": "https://data.microbiomedata.org/data/nmdc:mga08hnt47/annotation/nmdc_mga08hnt47_ko_ec.gff", + "md5_checksum": "68c06be8d27d1697b4a6955537b318c8", + "id": "nmdc:68c06be8d27d1697b4a6955537b318c8", + "file_size_bytes": 21943549 + }, + { + "name": "gold:Gp0452679_metabat2 bin checkm quality assessment result", + "description": "metabat2 bin checkm quality assessment result for gold:Gp0452679", + "data_object_type": "CheckM Statistics", + "url": "https://data.microbiomedata.org/data/nmdc:mga0fsjy84/MAGs/nmdc_mga0fsjy84_checkm_qa.out", + "md5_checksum": "d41d8cd98f00b204e9800998ecf8427e", + "id": "nmdc:d41d8cd98f00b204e9800998ecf8427e", + "file_size_bytes": 0 + }, + { + "name": "Gp0127651_tooShort (< 3kb) filtered contigs fasta file by metaBat2", + "description": "tooShort (< 3kb) filtered contigs fasta file by metaBat2 for Gp0127651", + "url": "https://data.microbiomedata.org/data/nmdc:mga08hnt47/MAGs/nmdc_mga08hnt47_bins.tooShort.fa", + "md5_checksum": "6f012bfca6cb653f92eaf927003de0fa", + "id": "nmdc:6f012bfca6cb653f92eaf927003de0fa", + "file_size_bytes": 77381118 + }, + { + "name": "Gp0127651_unbinned fasta file from metabat2", + "description": "unbinned fasta file from metabat2 for Gp0127651", + "url": "https://data.microbiomedata.org/data/nmdc:mga08hnt47/MAGs/nmdc_mga08hnt47_bins.unbinned.fa", + "md5_checksum": "298e0a0c98ebe4fb673da7de9fcb03a2", + "id": "nmdc:298e0a0c98ebe4fb673da7de9fcb03a2", + "file_size_bytes": 17278743 + }, + { + "name": "Gp0127651_metabat2 bin checkm quality assessment result", + "description": "metabat2 bin checkm quality assessment result for Gp0127651", + "data_object_type": "CheckM Statistics", + "url": "https://data.microbiomedata.org/data/nmdc:mga08hnt47/MAGs/nmdc_mga08hnt47_checkm_qa.out", + "md5_checksum": "66fd77d80cc9257da98c5bce4cb30626", + "id": "nmdc:66fd77d80cc9257da98c5bce4cb30626", + "file_size_bytes": 760 + }, + { + "name": "Gp0127651_high-quality and medium-quality bins", + "description": "high-quality and medium-quality bins for Gp0127651", + "data_object_type": "Metagenome Bins", + "url": "https://data.microbiomedata.org/data/nmdc:mga08hnt47/MAGs/nmdc_mga08hnt47_hqmq_bin.zip", + "md5_checksum": "06caec963e007225d1d9411078829100", + "id": "nmdc:06caec963e007225d1d9411078829100", + "file_size_bytes": 182 + }, + { + "name": "Gp0127651_metabat2 bins", + "description": "metabat2 bins for Gp0127651", + "url": "https://data.microbiomedata.org/data/nmdc:mga08hnt47/MAGs/nmdc_mga08hnt47_metabat_bin.zip", + "md5_checksum": "eb5216cc4e09d88c4c59a76c4808a693", + "id": "nmdc:eb5216cc4e09d88c4c59a76c4808a693", + "file_size_bytes": 397044 + }, + { + "_id": { + "$oid": "649b003d1ae706d7b5b14ec9" + }, + "description": "Assembled contigs fasta for gold:Gp0127651", + "url": "https://data.microbiomedata.org/data/1781_100353/assembly/assembly_contigs.fna", + "file_size_bytes": 95235782, + "type": "nmdc:DataObject", + "id": "nmdc:49c49b255b8db84f4b79e0ad5a963c82", + "name": "assembly_contigs.fna", + "data_object_type": "Assembly Contigs" + }, + { + "_id": { + "$oid": "649b003d1ae706d7b5b14eca" + }, + "description": "Assembled scaffold fasta for gold:Gp0127651", + "url": "https://data.microbiomedata.org/data/1781_100353/assembly/assembly_scaffolds.fna", + "file_size_bytes": 94693464, + "type": "nmdc:DataObject", + "id": "nmdc:6b1d7af20d7a316f3b13f1707ce7c518", + "name": "assembly_scaffolds.fna", + "data_object_type": "Assembly Scaffolds" + }, + { + "_id": { + "$oid": "649b003d1ae706d7b5b14ecb" + }, + "description": "Assembled AGP file for gold:Gp0127651", + "url": "https://data.microbiomedata.org/data/1781_100353/assembly/assembly.agp", + "file_size_bytes": 11899059, + "type": "nmdc:DataObject", + "id": "nmdc:36f080b0d13effe19b1f18dfc041a341", + "name": "assembly.agp", + "data_object_type": "Assembly AGP" + }, + { + "_id": { + "$oid": "649b003d1ae706d7b5b14ecd" + }, + "description": "Metagenome Contig Coverage Stats for gold:Gp0127651", + "url": "https://data.microbiomedata.org/data/1781_100353/assembly/mapping_stats.txt", + "file_size_bytes": 13567640, + "type": "nmdc:DataObject", + "id": "nmdc:e1e806d81cc6cd9f22702e75849f5e31", + "name": "mapping_stats.txt", + "data_object_type": "Assembly Coverage Stats" + }, + { + "_id": { + "$oid": "649b003d1ae706d7b5b14ed6" + }, + "description": "Metagenome Alignment BAM file for gold:Gp0127651", + "url": "https://data.microbiomedata.org/data/1781_100353/assembly/pairedMapped_sorted.bam", + "file_size_bytes": 2010414032, + "type": "nmdc:DataObject", + "id": "nmdc:b041d1ee91abbe2d6ade41bc46c67ab9", + "name": "pairedMapped_sorted.bam", + "data_object_type": "Assembly Coverage BAM" + }, + { + "_id": { + "$oid": "649b003d1ae706d7b5b15bd4" + }, + "id": "nmdc:4ee6b6c602c6f2c054154f48da58b304", + "name": "1781_100353.krona.html", + "description": "Gold:Gp0127651 KRONA plot HTML file", + "url": "https://data.microbiomedata.org/1781_100353/ReadbasedAnalysis/centrifuge/1781_100353.krona.html", + "file_size_bytes": 3385, + "data_object_type": "Centrifuge Krona Plot", + "type": "nmdc:DataObject" + }, + { + "_id": { + "$oid": "649b003d1ae706d7b5b15bd9" + }, + "id": "nmdc:fba8766b1f1e3e5375ac56ecde508e96", + "name": "1781_100353.json", + "description": "Gold:Gp0127651 ReadbasedAnalysis result JSON file", + "url": "https://data.microbiomedata.org/1781_100353/ReadbasedAnalysis/1781_100353.json", + "file_size_bytes": 3385, + "type": "nmdc:DataObject" + }, + { + "_id": { + "$oid": "649b003f1ae706d7b5b16638" + }, + "id": "nmdc:2574b731c1f785d106e9033639833750", + "name": "bins.tooShort.fa", + "description": "tooShort (< 3kb) filtered contigs fasta file by metaBat2 for gold:Gp0127651", + "file_size_bytes": 75274019, + "url": "https://data.microbiomedata.org/data/1781_100353/img_MAGs/bins.tooShort.fa", + "type": "nmdc:DataObject" + }, + { + "_id": { + "$oid": "649b003f1ae706d7b5b16644" + }, + "id": "nmdc:3fc9dca08829f51d49a574f916099e20", + "name": "bins.unbinned.fa", + "description": "unbinned fasta file from metabat2 for gold:Gp0127651", + "file_size_bytes": 18449153, + "url": "https://data.microbiomedata.org/data/1781_100353/img_MAGs/bins.unbinned.fa", + "type": "nmdc:DataObject" + }, + { + "_id": { + "$oid": "649b00401ae706d7b5b16d44" + }, + "description": "EC TSV File for gold:Gp0127651", + "url": "https://data.microbiomedata.org/1781_100353/img_annotation/Ga0482220_ec.tsv", + "md5_checksum": "2a7c5ba82dff4dd5d996ad5bc824103c", + "file_size_bytes": 3385, + "id": "nmdc:2a7c5ba82dff4dd5d996ad5bc824103c", + "name": "gold:Gp0127651_EC TSV File", + "data_object_type": "Annotation Enzyme Commission", + "type": "nmdc:DataObject" + }, + { + "_id": { + "$oid": "649b00401ae706d7b5b16d45" + }, + "description": "KO TSV File for gold:Gp0127651", + "url": "https://data.microbiomedata.org/1781_100353/img_annotation/Ga0482220_ko.tsv", + "md5_checksum": "84dc1abc2d39254da6c3d2cd6cff6d9d", + "file_size_bytes": 3385, + "id": "nmdc:84dc1abc2d39254da6c3d2cd6cff6d9d", + "name": "gold:Gp0127651_KO TSV File", + "data_object_type": "Annotation KEGG Orthology", + "type": "nmdc:DataObject" + }, + { + "_id": { + "$oid": "649b00401ae706d7b5b16d46" + }, + "description": "Protein FAA for gold:Gp0127651", + "url": "https://data.microbiomedata.org/1781_100353/img_annotation/Ga0482220_proteins.faa", + "md5_checksum": "67dfacdfc27cb6b0ec4787e1a40d9547", + "file_size_bytes": 3385, + "id": "nmdc:67dfacdfc27cb6b0ec4787e1a40d9547", + "name": "gold:Gp0127651_Protein FAA", + "data_object_type": "Annotation Amino Acid FASTA", + "type": "nmdc:DataObject" + }, + { + "_id": { + "$oid": "649b00401ae706d7b5b16d48" + }, + "description": "Structural annotation GFF file for gold:Gp0127651", + "url": "https://data.microbiomedata.org/1781_100353/img_annotation/Ga0482220_structural_annotation.gff", + "md5_checksum": "714fb73a8b3011d0b2faea98eda477c3", + "file_size_bytes": 3385, + "id": "nmdc:714fb73a8b3011d0b2faea98eda477c3", + "name": "gold:Gp0127651_Structural annotation GFF file", + "data_object_type": "Structural Annotation GFF", + "type": "nmdc:DataObject" + }, + { + "_id": { + "$oid": "649b00401ae706d7b5b16d4c" + }, + "description": "Functional annotation GFF file for gold:Gp0127651", + "url": "https://data.microbiomedata.org/1781_100353/img_annotation/Ga0482220_functional_annotation.gff", + "md5_checksum": "e25cb289f398c007806c72c080724872", + "file_size_bytes": 3385, + "id": "nmdc:e25cb289f398c007806c72c080724872", + "name": "gold:Gp0127651_Functional annotation GFF file", + "data_object_type": "Functional Annotation GFF", + "type": "nmdc:DataObject" + } + ], + "mags_activity_set": [ + { + "_id": { + "$oid": "649b0052ec087f6bbab34721" + }, + "has_input": [ + "nmdc:8483663a943ff4c0fc0249353676bfc1", + "nmdc:838162ead3f121f5bc02bc1234a32a55", + "nmdc:f0c60a537e6867bf62fde15577669453" + ], + "too_short_contig_num": 168908, + "part_of": [ + "nmdc:mga08hnt47" + ], + "binned_contig_num": 216, + "git_url": "https://github.com/microbiomedata/metaG/releases/tag/0.1", + "has_output": [ + "nmdc:d41d8cd98f00b204e9800998ecf8427e", + "nmdc:6f012bfca6cb653f92eaf927003de0fa", + "nmdc:298e0a0c98ebe4fb673da7de9fcb03a2", + "nmdc:66fd77d80cc9257da98c5bce4cb30626", + "nmdc:06caec963e007225d1d9411078829100", + "nmdc:eb5216cc4e09d88c4c59a76c4808a693" + ], + "was_informed_by": "gold:Gp0127651", + "input_contig_num": 180437, + "id": "nmdc:158a1d28cb7b14fdffb0f092a644b0f6", + "execution_resource": "NERSC-Cori", + "name": "MAGs Analysis Activity for nmdc:mga08hnt47", + "mags_list": [ + { + "number_of_contig": 216, + "completeness": 36.79, + "bin_name": "bins.1", + "gene_count": 1612, + "bin_quality": "LQ", + "gtdbtk_species": "", + "gtdbtk_order": "", + "num_16s": 0, + "gtdbtk_family": "", + "gtdbtk_domain": "", + "contamination": 0.97, + "gtdbtk_class": "", + "gtdbtk_phylum": "", + "num_5s": 0, + "num_23s": 0, + "gtdbtk_genus": "", + "num_t_rna": 28 + } + ], + "unbinned_contig_num": 11313, + "started_at_time": "2021-10-11T02:27:15Z", + "type": "nmdc:MAGsAnalysisActivity", + "ended_at_time": "2021-10-11T03:57:48+00:00" + } + ], + "metagenome_annotation_activity_set": [ + { + "_id": { + "$oid": "649b005bbf2caae0415ef9bf" + }, + "has_input": [ + "nmdc:8483663a943ff4c0fc0249353676bfc1" + ], + "part_of": [ + "nmdc:mga08hnt47" + ], + "git_url": "https://github.com/microbiomedata/metaG/releases/tag/0.1", + "has_output": [ + "nmdc:d8dc4f31293c549b12bbcab915d708cc", + "nmdc:415256907dcafaa68778a2ba358d9ac5", + "nmdc:f0c60a537e6867bf62fde15577669453", + "nmdc:e0f16b60c50581799b7ecb254e61e537", + "nmdc:6eb21304f0762bd8c11b98826d310321", + "nmdc:4ea7982c99cbb6d8ccc9fd949bee09ec", + "nmdc:f389dc8a93de9f21322db385b2788f5f", + "nmdc:8e6659ce96dfa72ceefda39c74fb1dce", + "nmdc:89bc9cf9183fed6700cde44fad41b830", + "nmdc:84aae368e77c1d07c6b6e8deecbc3f3b", + "nmdc:ee5612e5ee82ec2d57029d1bc4e1756f", + "nmdc:68c06be8d27d1697b4a6955537b318c8" + ], + "was_informed_by": "gold:Gp0127651", + "id": "nmdc:158a1d28cb7b14fdffb0f092a644b0f6", + "execution_resource": "NERSC-Cori", + "name": "Annotation Activity for nmdc:mga08hnt47", + "started_at_time": "2021-10-11T02:27:15Z", + "type": "nmdc:MetagenomeAnnotationActivity", + "ended_at_time": "2021-10-11T03:57:48+00:00" + } + ], + "metagenome_assembly_set": [ + { + "_id": { + "$oid": "649b005f2ca5ee4adb139fab" + }, + "has_input": [ + "nmdc:2791a196017767af3b5b21a3029799c0" + ], + "part_of": [ + "nmdc:mga08hnt47" + ], + "ctg_logsum": 192880, + "scaf_logsum": 193641, + "gap_pct": 0.00165, + "git_url": "https://github.com/microbiomedata/metaG/releases/tag/0.1", + "has_output": [ + "nmdc:8483663a943ff4c0fc0249353676bfc1", + "nmdc:ccca920c56ad3d050e2d8801bcbe4855", + "nmdc:f21e374c1c31c02bd0e41228cc7895c3", + "nmdc:f43ae7935184d10ba65961171efcac34", + "nmdc:838162ead3f121f5bc02bc1234a32a55" + ], + "asm_score": 4.164, + "was_informed_by": "gold:Gp0127651", + "ctg_powsum": 20759, + "scaf_max": 29106, + "id": "nmdc:158a1d28cb7b14fdffb0f092a644b0f6", + "scaf_powsum": 20844, + "execution_resource": "NERSC-Cori", + "contigs": 180439, + "name": "Assembly Activity for nmdc:mga08hnt47", + "ctg_max": 29106, + "gc_std": 0.109, + "contig_bp": 88911005, + "gc_avg": 0.62144, + "started_at_time": "2021-10-11T02:27:15Z", + "scaf_bp": 88912475, + "type": "nmdc:MetagenomeAssembly", + "scaffolds": 180310, + "ended_at_time": "2021-10-11T03:57:48+00:00", + "ctg_l50": 492, + "ctg_l90": 292, + "ctg_n50": 51430, + "ctg_n90": 149085, + "scaf_l50": 493, + "scaf_l90": 292, + "scaf_n50": 51225, + "scaf_n90": 148971 + } + ], + "omics_processing_set": [ + { + "_id": { + "$oid": "649b009773e8249959349b62" + }, + "id": "nmdc:omprc-11-nry91b19", + "name": "Riverbed sediment microbial communities from areas with no vegetation in Columbia River, Washington, USA - GW-RW N3_20_30", + "description": "Riverbed sediment samples were collected from an area with no vegetation in a hyporheic zone (subsurface groundwater - surface water mixing zone)", + "has_input": [ + "nmdc:bsm-11-jqzk1523" + ], + "has_output": [ + "jgi:574fde907ded5e3df1ee1426" + ], + "part_of": [ + "nmdc:sty-11-aygzgv51" + ], + "add_date": "2016-01-11", + "mod_date": "2021-06-15", + "ncbi_project_name": "Riverbed sediment microbial communities from areas with no vegetation in Columbia River, Washington, USA - GW-RW N3_20_30", + "omics_type": { + "has_raw_value": "Metagenome" + }, + "principal_investigator": { + "has_raw_value": "James Stegen" + }, + "processing_institution": "JGI", + "type": "nmdc:OmicsProcessing", + "gold_sequencing_project_identifiers": [ + "gold:Gp0127651" + ] + } + ], + "read_qc_analysis_activity_set": [ + { + "_id": { + "$oid": "649b009d6bdd4fd20273c877" + }, + "has_input": [ + "nmdc:8254ce50b88be8c384fd37fe21e0d0c4" + ], + "part_of": [ + "nmdc:mga08hnt47" + ], + "git_url": "https://github.com/microbiomedata/metaG/releases/tag/0.1", + "has_output": [ + "nmdc:2791a196017767af3b5b21a3029799c0", + "nmdc:92cb49efbff5d5977e00dbad1c4d0d9f" + ], + "was_informed_by": "gold:Gp0127651", + "input_read_count": 23728904, + "output_read_bases": 3352071049, + "id": "nmdc:158a1d28cb7b14fdffb0f092a644b0f6", + "execution_resource": "NERSC-Cori", + "input_read_bases": 3583064504, + "name": "Read QC Activity for nmdc:mga08hnt47", + "output_read_count": 22416634, + "started_at_time": "2021-10-11T02:27:15Z", + "type": "nmdc:ReadQCAnalysisActivity", + "ended_at_time": "2021-10-11T03:57:48+00:00" + } + ], + "read_based_taxonomy_analysis_activity_set": [ + { + "_id": { + "$oid": "649b009bff710ae353f8cf42" + }, + "has_input": [ + "nmdc:2791a196017767af3b5b21a3029799c0" + ], + "git_url": "https://github.com/microbiomedata/metaG/releases/tag/0.1", + "has_output": [ + "nmdc:53ee263960c39126e039656a121deb96", + "nmdc:2781b9269b8e24f49a1a301d44d0e3d5", + "nmdc:0ed808b8ce29d39c3b555e7d5bf4c274", + "nmdc:a7d8f038b87bd28843e30c5dd115704b", + "nmdc:b4cbc81c986c67c1037c8b7280924683", + "nmdc:e0c61a191258597984a05d86eaf4d71f", + "nmdc:e1cbcfa86444a4ff4e992bcb6653d18f", + "nmdc:d2e10038a40e81e81ba94f75ed1ec52c", + "nmdc:ddba84cd45462d3a55df4ac62bb4eeb8" + ], + "was_informed_by": "gold:Gp0127651", + "id": "nmdc:158a1d28cb7b14fdffb0f092a644b0f6", + "execution_resource": "NERSC-Cori", + "name": "ReadBased Analysis Activity for nmdc:mga08hnt47", + "started_at_time": "2021-10-11T02:27:15Z", + "type": "nmdc:ReadbasedAnalysis", + "ended_at_time": "2021-10-11T03:57:48+00:00" + } + ] + }, + { + "data_object_set": [ + { + "description": "Raw sequencer read data", + "file_size_bytes": 2116898122, + "type": "nmdc:DataObject", + "id": "jgi:574fde947ded5e3df1ee1429", + "name": "10533.2.165322.GTCTCCT-AAGGAGA.fastq.gz" + }, + { + "name": "Gp0127655_Filtered Reads", + "description": "Filtered Reads for Gp0127655", + "data_object_type": "Filtered Sequencing Reads", + "url": "https://data.microbiomedata.org/data/nmdc:mga0317978/qa/nmdc_mga0317978_filtered.fastq.gz", + "md5_checksum": "04b9014981f7035c39bd7f870613ed93", + "id": "nmdc:04b9014981f7035c39bd7f870613ed93", + "file_size_bytes": 1880069213 + }, + { + "name": "Gp0127655_Filtered Stats", + "description": "Filtered Stats for Gp0127655", + "data_object_type": "QC Statistics", + "url": "https://data.microbiomedata.org/data/nmdc:mga0317978/qa/nmdc_mga0317978_filterStats.txt", + "md5_checksum": "b66266969ab3df4c1cb2b16c1fa7d098", + "id": "nmdc:b66266969ab3df4c1cb2b16c1fa7d098", + "file_size_bytes": 286 + }, + { + "name": "Gp0127655_Gottcha2 TSV report", + "description": "Gottcha2 TSV report for Gp0127655", + "url": "https://data.microbiomedata.org/data/nmdc:mga0317978/ReadbasedAnalysis/nmdc_mga0317978_gottcha2_report.tsv", + "md5_checksum": "46371c7bc8259e459f975f915aaac26f", + "id": "nmdc:46371c7bc8259e459f975f915aaac26f", + "file_size_bytes": 2178 + }, + { + "name": "Gp0127655_Gottcha2 full TSV report", + "description": "Gottcha2 full TSV report for Gp0127655", + "url": "https://data.microbiomedata.org/data/nmdc:mga0317978/ReadbasedAnalysis/nmdc_mga0317978_gottcha2_report_full.tsv", + "md5_checksum": "5dd9bc51105920f3f629e8106235af3b", + "id": "nmdc:5dd9bc51105920f3f629e8106235af3b", + "file_size_bytes": 697690 + }, + { + "name": "Gp0127655_Gottcha2 Krona HTML report", + "description": "Gottcha2 Krona HTML report for Gp0127655", + "data_object_type": "GOTTCHA2 Krona Plot", + "url": "https://data.microbiomedata.org/data/nmdc:mga0317978/ReadbasedAnalysis/nmdc_mga0317978_gottcha2_krona.html", + "md5_checksum": "1879e0e9af6d568ac9c7ffdb47fc7f12", + "id": "nmdc:1879e0e9af6d568ac9c7ffdb47fc7f12", + "file_size_bytes": 231103 + }, + { + "name": "Gp0127655_Centrifuge classification TSV report", + "description": "Centrifuge classification TSV report for Gp0127655", + "data_object_type": "Centrifuge Taxonomic Classification", + "url": "https://data.microbiomedata.org/data/nmdc:mga0317978/ReadbasedAnalysis/nmdc_mga0317978_centrifuge_classification.tsv", + "md5_checksum": "e3f410adc2347396abfdec2a848000d9", + "id": "nmdc:e3f410adc2347396abfdec2a848000d9", + "file_size_bytes": 1676897166 + }, + { + "name": "Gp0127655_Centrifuge TSV report", + "description": "Centrifuge TSV report for Gp0127655", + "data_object_type": "Centrifuge Classification Report", + "url": "https://data.microbiomedata.org/data/nmdc:mga0317978/ReadbasedAnalysis/nmdc_mga0317978_centrifuge_report.tsv", + "md5_checksum": "ed6c4f17d6ae759487164ca8ed5edf45", + "id": "nmdc:ed6c4f17d6ae759487164ca8ed5edf45", + "file_size_bytes": 253692 + }, + { + "name": "Gp0127655_Centrifuge Krona HTML report", + "description": "Centrifuge Krona HTML report for Gp0127655", + "data_object_type": "Centrifuge Krona Plot", + "url": "https://data.microbiomedata.org/data/nmdc:mga0317978/ReadbasedAnalysis/nmdc_mga0317978_centrifuge_krona.html", + "md5_checksum": "6d54f73f251de1bd5c4ca8665f098ac0", + "id": "nmdc:6d54f73f251de1bd5c4ca8665f098ac0", + "file_size_bytes": 2329422 + }, + { + "name": "Gp0127655_Kraken classification TSV report", + "description": "Kraken classification TSV report for Gp0127655", + "data_object_type": "Kraken2 Taxonomic Classification", + "url": "https://data.microbiomedata.org/data/nmdc:mga0317978/ReadbasedAnalysis/nmdc_mga0317978_kraken2_classification.tsv", + "md5_checksum": "1d4f5a605d4549801fda16da567efe56", + "id": "nmdc:1d4f5a605d4549801fda16da567efe56", + "file_size_bytes": 1336793184 + }, + { + "name": "Gp0127655_Kraken2 TSV report", + "description": "Kraken2 TSV report for Gp0127655", + "data_object_type": "Kraken2 Classification Report", + "url": "https://data.microbiomedata.org/data/nmdc:mga0317978/ReadbasedAnalysis/nmdc_mga0317978_kraken2_report.tsv", + "md5_checksum": "8bb5c66575c7c953719ae9947600ad49", + "id": "nmdc:8bb5c66575c7c953719ae9947600ad49", + "file_size_bytes": 632192 + }, + { + "name": "Gp0127655_Kraken2 Krona HTML report", + "description": "Kraken2 Krona HTML report for Gp0127655", + "data_object_type": "Kraken2 Krona Plot", + "url": "https://data.microbiomedata.org/data/nmdc:mga0317978/ReadbasedAnalysis/nmdc_mga0317978_kraken2_krona.html", + "md5_checksum": "157f7672690ba8207808cc4386ff10a4", + "id": "nmdc:157f7672690ba8207808cc4386ff10a4", + "file_size_bytes": 3946317 + }, + { + "name": "Gp0127655_Assembled contigs fasta", + "description": "Assembled contigs fasta for Gp0127655", + "data_object_type": "Assembly Contigs", + "url": "https://data.microbiomedata.org/data/nmdc:mga0317978/assembly/nmdc_mga0317978_contigs.fna", + "md5_checksum": "98bc1e8aa3703e255a930f6c6f923453", + "id": "nmdc:98bc1e8aa3703e255a930f6c6f923453", + "file_size_bytes": 93445462 + }, + { + "name": "Gp0127655_Assembled scaffold fasta", + "description": "Assembled scaffold fasta for Gp0127655", + "data_object_type": "Assembly Scaffolds", + "url": "https://data.microbiomedata.org/data/nmdc:mga0317978/assembly/nmdc_mga0317978_scaffolds.fna", + "md5_checksum": "769bd168524b84f2d10dfdb2a42a909d", + "id": "nmdc:769bd168524b84f2d10dfdb2a42a909d", + "file_size_bytes": 92895420 + }, + { + "name": "Gp0127655_Metagenome Contig Coverage Stats", + "description": "Metagenome Contig Coverage Stats for Gp0127655", + "url": "https://data.microbiomedata.org/data/nmdc:mga0317978/assembly/nmdc_mga0317978_covstats.txt", + "md5_checksum": "5bd5f8108ae1d767ea5a79ebde3d83de", + "id": "nmdc:5bd5f8108ae1d767ea5a79ebde3d83de", + "file_size_bytes": 14474338 + }, + { + "name": "Gp0127655_Assembled AGP file", + "description": "Assembled AGP file for Gp0127655", + "data_object_type": "Assembly AGP", + "url": "https://data.microbiomedata.org/data/nmdc:mga0317978/assembly/nmdc_mga0317978_assembly.agp", + "md5_checksum": "933de420870147e58137b328e0d54d87", + "id": "nmdc:933de420870147e58137b328e0d54d87", + "file_size_bytes": 13523380 + }, + { + "name": "Gp0127655_Metagenome Alignment BAM file", + "description": "Metagenome Alignment BAM file for Gp0127655", + "data_object_type": "Assembly Coverage BAM", + "url": "https://data.microbiomedata.org/data/nmdc:mga0317978/assembly/nmdc_mga0317978_pairedMapped_sorted.bam", + "md5_checksum": "2b699163734ee73cbccc94e4767d36c0", + "id": "nmdc:2b699163734ee73cbccc94e4767d36c0", + "file_size_bytes": 2057808015 + }, + { + "name": "Gp0127655_Protein FAA", + "description": "Protein FAA for Gp0127655", + "data_object_type": "Annotation Amino Acid FASTA", + "url": "https://data.microbiomedata.org/data/nmdc:mga0317978/annotation/nmdc_mga0317978_proteins.faa", + "md5_checksum": "9b57eb78fd2e8f0af8b55cf5fb3a2bab", + "id": "nmdc:9b57eb78fd2e8f0af8b55cf5fb3a2bab", + "file_size_bytes": 53898203 + }, + { + "name": "Gp0127655_Structural annotation GFF file", + "description": "Structural annotation GFF file for Gp0127655", + "data_object_type": "Structural Annotation GFF", + "url": "https://data.microbiomedata.org/data/nmdc:mga0317978/annotation/nmdc_mga0317978_structural_annotation.gff", + "md5_checksum": "6b11bf4eaf9723559b6015296b802252", + "id": "nmdc:6b11bf4eaf9723559b6015296b802252", + "file_size_bytes": 2515 + }, + { + "name": "Gp0127655_Functional annotation GFF file", + "description": "Functional annotation GFF file for Gp0127655", + "data_object_type": "Functional Annotation GFF", + "url": "https://data.microbiomedata.org/data/nmdc:mga0317978/annotation/nmdc_mga0317978_functional_annotation.gff", + "md5_checksum": "0940fbdf18becd76e7dd3abcfaba12b5", + "id": "nmdc:0940fbdf18becd76e7dd3abcfaba12b5", + "file_size_bytes": 61535970 + }, + { + "name": "Gp0127655_KO TSV file", + "description": "KO TSV file for Gp0127655", + "data_object_type": "Annotation KEGG Orthology", + "url": "https://data.microbiomedata.org/data/nmdc:mga0317978/annotation/nmdc_mga0317978_ko.tsv", + "md5_checksum": "a1cd7e1382fd1818c42860a0555f1f57", + "id": "nmdc:a1cd7e1382fd1818c42860a0555f1f57", + "file_size_bytes": 6994761 + }, + { + "name": "Gp0127655_EC TSV file", + "description": "EC TSV file for Gp0127655", + "data_object_type": "Annotation Enzyme Commission", + "url": "https://data.microbiomedata.org/data/nmdc:mga0317978/annotation/nmdc_mga0317978_ec.tsv", + "md5_checksum": "3a27c2da0a3d05e4c44547afb2875195", + "id": "nmdc:3a27c2da0a3d05e4c44547afb2875195", + "file_size_bytes": 4598688 + }, + { + "name": "Gp0127655_COG GFF file", + "description": "COG GFF file for Gp0127655", + "url": "https://data.microbiomedata.org/data/nmdc:mga0317978/annotation/nmdc_mga0317978_cog.gff", + "md5_checksum": "1c8529ca35ee0b275b8ca3d2b5c565ec", + "id": "nmdc:1c8529ca35ee0b275b8ca3d2b5c565ec", + "file_size_bytes": 36290392 + }, + { + "name": "Gp0127655_PFAM GFF file", + "description": "PFAM GFF file for Gp0127655", + "url": "https://data.microbiomedata.org/data/nmdc:mga0317978/annotation/nmdc_mga0317978_pfam.gff", + "md5_checksum": "8bf1c44c4a9fc7f55dcf58be1273b46f", + "id": "nmdc:8bf1c44c4a9fc7f55dcf58be1273b46f", + "file_size_bytes": 27016921 + }, + { + "name": "Gp0127655_TigrFam GFF file", + "description": "TigrFam GFF file for Gp0127655", + "url": "https://data.microbiomedata.org/data/nmdc:mga0317978/annotation/nmdc_mga0317978_tigrfam.gff", + "md5_checksum": "acb8325b4800ff62e3fda52b21b92ecc", + "id": "nmdc:acb8325b4800ff62e3fda52b21b92ecc", + "file_size_bytes": 2768301 + }, + { + "name": "Gp0127655_SMART GFF file", + "description": "SMART GFF file for Gp0127655", + "url": "https://data.microbiomedata.org/data/nmdc:mga0317978/annotation/nmdc_mga0317978_smart.gff", + "md5_checksum": "a044873e470ce9f2be06ae99cd1cc242", + "id": "nmdc:a044873e470ce9f2be06ae99cd1cc242", + "file_size_bytes": 7806208 + }, + { + "name": "Gp0127655_SuperFam GFF file", + "description": "SuperFam GFF file for Gp0127655", + "url": "https://data.microbiomedata.org/data/nmdc:mga0317978/annotation/nmdc_mga0317978_supfam.gff", + "md5_checksum": "40f0627934454a354886609d7068a12c", + "id": "nmdc:40f0627934454a354886609d7068a12c", + "file_size_bytes": 45276498 + }, + { + "name": "Gp0127655_Cath FunFam GFF file", + "description": "Cath FunFam GFF file for Gp0127655", + "url": "https://data.microbiomedata.org/data/nmdc:mga0317978/annotation/nmdc_mga0317978_cath_funfam.gff", + "md5_checksum": "60255b31e223a7b5bad8f186b6f65d7c", + "id": "nmdc:60255b31e223a7b5bad8f186b6f65d7c", + "file_size_bytes": 33794110 + }, + { + "name": "Gp0127655_KO_EC GFF file", + "description": "KO_EC GFF file for Gp0127655", + "url": "https://data.microbiomedata.org/data/nmdc:mga0317978/annotation/nmdc_mga0317978_ko_ec.gff", + "md5_checksum": "b8d559d4ea779c4076e3c9e1e92bddcf", + "id": "nmdc:b8d559d4ea779c4076e3c9e1e92bddcf", + "file_size_bytes": 22249696 + }, + { + "name": "gold:Gp0452679_metabat2 bin checkm quality assessment result", + "description": "metabat2 bin checkm quality assessment result for gold:Gp0452679", + "data_object_type": "CheckM Statistics", + "url": "https://data.microbiomedata.org/data/nmdc:mga0fsjy84/MAGs/nmdc_mga0fsjy84_checkm_qa.out", + "md5_checksum": "d41d8cd98f00b204e9800998ecf8427e", + "id": "nmdc:d41d8cd98f00b204e9800998ecf8427e", + "file_size_bytes": 0 + }, + { + "name": "Gp0127655_tooShort (< 3kb) filtered contigs fasta file by metaBat2", + "description": "tooShort (< 3kb) filtered contigs fasta file by metaBat2 for Gp0127655", + "url": "https://data.microbiomedata.org/data/nmdc:mga0317978/MAGs/nmdc_mga0317978_bins.tooShort.fa", + "md5_checksum": "58f2cc63798346be853bccacdd7ca30d", + "id": "nmdc:58f2cc63798346be853bccacdd7ca30d", + "file_size_bytes": 77075570 + }, + { + "name": "Gp0127655_unbinned fasta file from metabat2", + "description": "unbinned fasta file from metabat2 for Gp0127655", + "url": "https://data.microbiomedata.org/data/nmdc:mga0317978/MAGs/nmdc_mga0317978_bins.unbinned.fa", + "md5_checksum": "8b2dbaba9c1219096831ad99d8b7c056", + "id": "nmdc:8b2dbaba9c1219096831ad99d8b7c056", + "file_size_bytes": 14551969 + }, + { + "name": "Gp0127655_metabat2 bin checkm quality assessment result", + "description": "metabat2 bin checkm quality assessment result for Gp0127655", + "data_object_type": "CheckM Statistics", + "url": "https://data.microbiomedata.org/data/nmdc:mga0317978/MAGs/nmdc_mga0317978_checkm_qa.out", + "md5_checksum": "c562d8d5ccc986d672b4e48e006fafab", + "id": "nmdc:c562d8d5ccc986d672b4e48e006fafab", + "file_size_bytes": 775 + }, + { + "name": "Gp0127655_high-quality and medium-quality bins", + "description": "high-quality and medium-quality bins for Gp0127655", + "data_object_type": "Metagenome Bins", + "url": "https://data.microbiomedata.org/data/nmdc:mga0317978/MAGs/nmdc_mga0317978_hqmq_bin.zip", + "md5_checksum": "2eaf0a7d519ac7c034d63797d735080c", + "id": "nmdc:2eaf0a7d519ac7c034d63797d735080c", + "file_size_bytes": 182 + }, + { + "name": "Gp0127655_metabat2 bins", + "description": "metabat2 bins for Gp0127655", + "url": "https://data.microbiomedata.org/data/nmdc:mga0317978/MAGs/nmdc_mga0317978_metabat_bin.zip", + "md5_checksum": "668a0a6dbd840dd2178a00c2af4c2237", + "id": "nmdc:668a0a6dbd840dd2178a00c2af4c2237", + "file_size_bytes": 527634 + }, + { + "_id": { + "$oid": "649b003d1ae706d7b5b14edb" + }, + "description": "Metagenome Contig Coverage Stats for gold:Gp0127655", + "url": "https://data.microbiomedata.org/data/1781_100357/assembly/mapping_stats.txt", + "file_size_bytes": 13742582, + "type": "nmdc:DataObject", + "id": "nmdc:f4e6a47ebd604f90384f130eca3e401e", + "name": "mapping_stats.txt", + "data_object_type": "Assembly Coverage Stats" + }, + { + "_id": { + "$oid": "649b003d1ae706d7b5b14edf" + }, + "description": "Assembled contigs fasta for gold:Gp0127655", + "url": "https://data.microbiomedata.org/data/1781_100357/assembly/assembly_contigs.fna", + "file_size_bytes": 92713706, + "type": "nmdc:DataObject", + "id": "nmdc:2860c363baa5fd6e5bbdc96a8d54b56b", + "name": "assembly_contigs.fna", + "data_object_type": "Assembly Contigs" + }, + { + "_id": { + "$oid": "649b003d1ae706d7b5b14ee0" + }, + "description": "Metagenome Alignment BAM file for gold:Gp0127655", + "url": "https://data.microbiomedata.org/data/1781_100357/assembly/pairedMapped_sorted.bam", + "file_size_bytes": 2031000089, + "type": "nmdc:DataObject", + "id": "nmdc:a20a83922a21eba2ec447dacc259c083", + "name": "pairedMapped_sorted.bam", + "data_object_type": "Assembly Coverage BAM" + }, + { + "_id": { + "$oid": "649b003d1ae706d7b5b14ee2" + }, + "description": "Assembled scaffold fasta for gold:Gp0127655", + "url": "https://data.microbiomedata.org/data/1781_100357/assembly/assembly_scaffolds.fna", + "file_size_bytes": 92163960, + "type": "nmdc:DataObject", + "id": "nmdc:07f2db98361b0d4e4d4c6a89294348ce", + "name": "assembly_scaffolds.fna", + "data_object_type": "Assembly Scaffolds" + }, + { + "_id": { + "$oid": "649b003d1ae706d7b5b14ee9" + }, + "description": "Assembled AGP file for gold:Gp0127655", + "url": "https://data.microbiomedata.org/data/1781_100357/assembly/assembly.agp", + "file_size_bytes": 12059276, + "type": "nmdc:DataObject", + "id": "nmdc:4f8b4cfdd8cbff990d5f4c5b932beb96", + "name": "assembly.agp", + "data_object_type": "Assembly AGP" + }, + { + "_id": { + "$oid": "649b003d1ae706d7b5b15c0a" + }, + "id": "nmdc:8efdeab08615731f46e30a1cdc6bcb2d", + "name": "1781_100357.krona.html", + "description": "Gold:Gp0127655 KRONA plot HTML file", + "url": "https://data.microbiomedata.org/1781_100357/ReadbasedAnalysis/centrifuge/1781_100357.krona.html", + "file_size_bytes": 3385, + "data_object_type": "Centrifuge Krona Plot", + "type": "nmdc:DataObject" + }, + { + "_id": { + "$oid": "649b003d1ae706d7b5b15c0e" + }, + "id": "nmdc:4df533784cd9ca8514f9622ba3ae0036", + "name": "1781_100357.json", + "description": "Gold:Gp0127655 ReadbasedAnalysis result JSON file", + "url": "https://data.microbiomedata.org/1781_100357/ReadbasedAnalysis/1781_100357.json", + "file_size_bytes": 3385, + "type": "nmdc:DataObject" + }, + { + "_id": { + "$oid": "649b003f1ae706d7b5b16641" + }, + "id": "nmdc:57a6185b7ec704380a4856d0083dbd1d", + "name": "bins.tooShort.fa", + "description": "tooShort (< 3kb) filtered contigs fasta file by metaBat2 for gold:Gp0127655", + "file_size_bytes": 74915065, + "url": "https://data.microbiomedata.org/data/1781_100357/img_MAGs/bins.tooShort.fa", + "type": "nmdc:DataObject" + }, + { + "_id": { + "$oid": "649b003f1ae706d7b5b16643" + }, + "id": "nmdc:7432d75e55847cf9a3c66589024e342c", + "name": "gold:Gp0127655.bins.3.fa", + "description": "metabat2 binned contig file for gold:Gp0127655", + "file_size_bytes": 210579, + "url": "https://data.microbiomedata.org/data/1781_100357/img_MAGs/metabat-bins/bins.3.fa", + "type": "nmdc:DataObject", + "data_object_type": "Metagenome Bins" + }, + { + "_id": { + "$oid": "649b003f1ae706d7b5b16646" + }, + "id": "nmdc:a9494cde349debe8557cbd59c43138fe", + "name": "checkm_qa.out", + "description": "metabat2 bin checkm quality assessment result for gold:Gp0127655", + "file_size_bytes": 1085, + "url": "https://data.microbiomedata.org/data/1781_100357/img_MAGs/checkm_qa.out", + "type": "nmdc:DataObject", + "data_object_type": "CheckM Statistics" + }, + { + "_id": { + "$oid": "649b003f1ae706d7b5b1664a" + }, + "id": "nmdc:299ed9ebee6f2a5e5c202a11b9e5536a", + "name": "gold:Gp0127655.bins.1.fa", + "description": "metabat2 binned contig file for gold:Gp0127655", + "file_size_bytes": 216122, + "url": "https://data.microbiomedata.org/data/1781_100357/img_MAGs/metabat-bins/bins.1.fa", + "type": "nmdc:DataObject", + "data_object_type": "Metagenome Bins" + }, + { + "_id": { + "$oid": "649b003f1ae706d7b5b1664b" + }, + "id": "nmdc:61c5dd80a3ac06408612da5aa2ad8bc1", + "name": "bins.unbinned.fa", + "description": "unbinned fasta file from metabat2 for gold:Gp0127655", + "file_size_bytes": 15144355, + "url": "https://data.microbiomedata.org/data/1781_100357/img_MAGs/bins.unbinned.fa", + "type": "nmdc:DataObject" + }, + { + "_id": { + "$oid": "649b003f1ae706d7b5b1664c" + }, + "id": "nmdc:556fbc4cc2220b73f70dce6b46ff34c7", + "name": "gold:Gp0127655.bins.2.fa", + "description": "metabat2 binned contig file for gold:Gp0127655", + "file_size_bytes": 692364, + "url": "https://data.microbiomedata.org/data/1781_100357/img_MAGs/metabat-bins/bins.2.fa", + "type": "nmdc:DataObject", + "data_object_type": "Metagenome Bins" + }, + { + "_id": { + "$oid": "649b00401ae706d7b5b16d52" + }, + "description": "KO TSV File for gold:Gp0127655", + "url": "https://data.microbiomedata.org/1781_100357/img_annotation/Ga0482216_ko.tsv", + "md5_checksum": "6d1185f4034e364b74109d40326a450a", + "file_size_bytes": 3385, + "id": "nmdc:6d1185f4034e364b74109d40326a450a", + "name": "gold:Gp0127655_KO TSV File", + "data_object_type": "Annotation KEGG Orthology", + "type": "nmdc:DataObject" + }, + { + "_id": { + "$oid": "649b00401ae706d7b5b16d53" + }, + "description": "Structural annotation GFF file for gold:Gp0127655", + "url": "https://data.microbiomedata.org/1781_100357/img_annotation/Ga0482216_structural_annotation.gff", + "md5_checksum": "05e2702ecae6ba0ba0b0898132850b9f", + "file_size_bytes": 3385, + "id": "nmdc:05e2702ecae6ba0ba0b0898132850b9f", + "name": "gold:Gp0127655_Structural annotation GFF file", + "data_object_type": "Structural Annotation GFF", + "type": "nmdc:DataObject" + }, + { + "_id": { + "$oid": "649b00401ae706d7b5b16d56" + }, + "description": "Functional annotation GFF file for gold:Gp0127655", + "url": "https://data.microbiomedata.org/1781_100357/img_annotation/Ga0482216_functional_annotation.gff", + "md5_checksum": "0b4a5dc91c42b7fea3fd514d5cb3138b", + "file_size_bytes": 3385, + "id": "nmdc:0b4a5dc91c42b7fea3fd514d5cb3138b", + "name": "gold:Gp0127655_Functional annotation GFF file", + "data_object_type": "Functional Annotation GFF", + "type": "nmdc:DataObject" + }, + { + "_id": { + "$oid": "649b00401ae706d7b5b16d57" + }, + "description": "EC TSV File for gold:Gp0127655", + "url": "https://data.microbiomedata.org/1781_100357/img_annotation/Ga0482216_ec.tsv", + "md5_checksum": "32c6c6dbce4a1c6ab92810a86f90c574", + "file_size_bytes": 3385, + "id": "nmdc:32c6c6dbce4a1c6ab92810a86f90c574", + "name": "gold:Gp0127655_EC TSV File", + "data_object_type": "Annotation Enzyme Commission", + "type": "nmdc:DataObject" + }, + { + "_id": { + "$oid": "649b00401ae706d7b5b16d58" + }, + "description": "Protein FAA for gold:Gp0127655", + "url": "https://data.microbiomedata.org/1781_100357/img_annotation/Ga0482216_proteins.faa", + "md5_checksum": "a31096eb3e473fd0c68d09096bc3fd85", + "file_size_bytes": 3385, + "id": "nmdc:a31096eb3e473fd0c68d09096bc3fd85", + "name": "gold:Gp0127655_Protein FAA", + "data_object_type": "Annotation Amino Acid FASTA", + "type": "nmdc:DataObject" + } + ], + "mags_activity_set": [ + { + "_id": { + "$oid": "649b0052ec087f6bbab34725" + }, + "has_input": [ + "nmdc:98bc1e8aa3703e255a930f6c6f923453", + "nmdc:2b699163734ee73cbccc94e4767d36c0", + "nmdc:0940fbdf18becd76e7dd3abcfaba12b5" + ], + "too_short_contig_num": 173159, + "part_of": [ + "nmdc:mga0317978" + ], + "binned_contig_num": 412, + "git_url": "https://github.com/microbiomedata/metaG/releases/tag/0.1", + "has_output": [ + "nmdc:d41d8cd98f00b204e9800998ecf8427e", + "nmdc:58f2cc63798346be853bccacdd7ca30d", + "nmdc:8b2dbaba9c1219096831ad99d8b7c056", + "nmdc:c562d8d5ccc986d672b4e48e006fafab", + "nmdc:2eaf0a7d519ac7c034d63797d735080c", + "nmdc:668a0a6dbd840dd2178a00c2af4c2237" + ], + "was_informed_by": "gold:Gp0127655", + "input_contig_num": 182939, + "id": "nmdc:65af38817454a315aeb8c67ab27e1469", + "execution_resource": "NERSC-Cori", + "name": "MAGs Analysis Activity for nmdc:mga0317978", + "mags_list": [ + { + "number_of_contig": 412, + "completeness": 27.84, + "bin_name": "bins.1", + "gene_count": 2086, + "bin_quality": "LQ", + "gtdbtk_species": "", + "gtdbtk_order": "", + "num_16s": 0, + "gtdbtk_family": "", + "gtdbtk_domain": "", + "contamination": 0.0, + "gtdbtk_class": "", + "gtdbtk_phylum": "", + "num_5s": 0, + "num_23s": 0, + "gtdbtk_genus": "", + "num_t_rna": 22 + } + ], + "unbinned_contig_num": 9368, + "started_at_time": "2021-10-11T02:23:42Z", + "type": "nmdc:MAGsAnalysisActivity", + "ended_at_time": "2021-10-11T03:21:25+00:00" + } + ], + "metagenome_annotation_activity_set": [ + { + "_id": { + "$oid": "649b005bbf2caae0415ef9c4" + }, + "has_input": [ + "nmdc:98bc1e8aa3703e255a930f6c6f923453" + ], + "part_of": [ + "nmdc:mga0317978" + ], + "git_url": "https://github.com/microbiomedata/metaG/releases/tag/0.1", + "has_output": [ + "nmdc:9b57eb78fd2e8f0af8b55cf5fb3a2bab", + "nmdc:6b11bf4eaf9723559b6015296b802252", + "nmdc:0940fbdf18becd76e7dd3abcfaba12b5", + "nmdc:a1cd7e1382fd1818c42860a0555f1f57", + "nmdc:3a27c2da0a3d05e4c44547afb2875195", + "nmdc:1c8529ca35ee0b275b8ca3d2b5c565ec", + "nmdc:8bf1c44c4a9fc7f55dcf58be1273b46f", + "nmdc:acb8325b4800ff62e3fda52b21b92ecc", + "nmdc:a044873e470ce9f2be06ae99cd1cc242", + "nmdc:40f0627934454a354886609d7068a12c", + "nmdc:60255b31e223a7b5bad8f186b6f65d7c", + "nmdc:b8d559d4ea779c4076e3c9e1e92bddcf" + ], + "was_informed_by": "gold:Gp0127655", + "id": "nmdc:65af38817454a315aeb8c67ab27e1469", + "execution_resource": "NERSC-Cori", + "name": "Annotation Activity for nmdc:mga0317978", + "started_at_time": "2021-10-11T02:23:42Z", + "type": "nmdc:MetagenomeAnnotationActivity", + "ended_at_time": "2021-10-11T03:21:25+00:00" + } + ], + "metagenome_assembly_set": [ + { + "_id": { + "$oid": "649b005f2ca5ee4adb139fb3" + }, + "has_input": [ + "nmdc:04b9014981f7035c39bd7f870613ed93" + ], + "part_of": [ + "nmdc:mga0317978" + ], + "ctg_logsum": 170806, + "scaf_logsum": 171254, + "gap_pct": 0.00086, + "git_url": "https://github.com/microbiomedata/metaG/releases/tag/0.1", + "has_output": [ + "nmdc:98bc1e8aa3703e255a930f6c6f923453", + "nmdc:769bd168524b84f2d10dfdb2a42a909d", + "nmdc:5bd5f8108ae1d767ea5a79ebde3d83de", + "nmdc:933de420870147e58137b328e0d54d87", + "nmdc:2b699163734ee73cbccc94e4767d36c0" + ], + "asm_score": 3.393, + "was_informed_by": "gold:Gp0127655", + "ctg_powsum": 18408, + "scaf_max": 16317, + "id": "nmdc:65af38817454a315aeb8c67ab27e1469", + "scaf_powsum": 18458, + "execution_resource": "NERSC-Cori", + "contigs": 182939, + "name": "Assembly Activity for nmdc:mga0317978", + "ctg_max": 16317, + "gc_std": 0.09607, + "contig_bp": 86362605, + "gc_avg": 0.63666, + "started_at_time": "2021-10-11T02:23:42Z", + "scaf_bp": 86363345, + "type": "nmdc:MetagenomeAssembly", + "scaffolds": 182865, + "ended_at_time": "2021-10-11T03:21:25+00:00", + "ctg_l50": 456, + "ctg_l90": 289, + "ctg_n50": 53760, + "ctg_n90": 154881, + "scaf_l50": 457, + "scaf_l90": 289, + "scaf_n50": 53484, + "scaf_n90": 154812 + } + ], + "omics_processing_set": [ + { + "_id": { + "$oid": "649b009773e8249959349b63" + }, + "id": "nmdc:omprc-11-0n8y1d07", + "name": "Riverbed sediment microbial communities from areas with vegetation nearby in Columbia River, Washington, USA - GW-RW S3_20_30", + "description": "Riverbed sediment samples were collected from an area with a lot of surrounding vegetation in a hyporheic zone (subsurface groundwater - surface water mixing zone)", + "has_input": [ + "nmdc:bsm-11-a7fxtx60" + ], + "has_output": [ + "jgi:574fde947ded5e3df1ee1429" + ], + "part_of": [ + "nmdc:sty-11-aygzgv51" + ], + "add_date": "2016-01-11", + "mod_date": "2021-06-15", + "ncbi_project_name": "Riverbed sediment microbial communities from areas with vegetation nearby in Columbia River, Washington, USA - GW-RW S3_20_30", + "omics_type": { + "has_raw_value": "Metagenome" + }, + "principal_investigator": { + "has_raw_value": "James Stegen" + }, + "processing_institution": "JGI", + "type": "nmdc:OmicsProcessing", + "gold_sequencing_project_identifiers": [ + "gold:Gp0127655" + ] + } + ], + "read_qc_analysis_activity_set": [ + { + "_id": { + "$oid": "649b009d6bdd4fd20273c880" + }, + "has_input": [ + "nmdc:898017d076d5d2daaf902e9141f0600a" + ], + "part_of": [ + "nmdc:mga0317978" + ], + "git_url": "https://github.com/microbiomedata/metaG/releases/tag/0.1", + "has_output": [ + "nmdc:04b9014981f7035c39bd7f870613ed93", + "nmdc:b66266969ab3df4c1cb2b16c1fa7d098" + ], + "was_informed_by": "gold:Gp0127655", + "input_read_count": 23985924, + "output_read_bases": 3400452550, + "id": "nmdc:65af38817454a315aeb8c67ab27e1469", + "execution_resource": "NERSC-Cori", + "input_read_bases": 3621874524, + "name": "Read QC Activity for nmdc:mga0317978", + "output_read_count": 22751496, + "started_at_time": "2021-10-11T02:23:42Z", + "type": "nmdc:ReadQCAnalysisActivity", + "ended_at_time": "2021-10-11T03:21:25+00:00" + } + ], + "read_based_taxonomy_analysis_activity_set": [ + { + "_id": { + "$oid": "649b009bff710ae353f8cf47" + }, + "has_input": [ + "nmdc:04b9014981f7035c39bd7f870613ed93" + ], + "git_url": "https://github.com/microbiomedata/metaG/releases/tag/0.1", + "has_output": [ + "nmdc:46371c7bc8259e459f975f915aaac26f", + "nmdc:5dd9bc51105920f3f629e8106235af3b", + "nmdc:1879e0e9af6d568ac9c7ffdb47fc7f12", + "nmdc:e3f410adc2347396abfdec2a848000d9", + "nmdc:ed6c4f17d6ae759487164ca8ed5edf45", + "nmdc:6d54f73f251de1bd5c4ca8665f098ac0", + "nmdc:1d4f5a605d4549801fda16da567efe56", + "nmdc:8bb5c66575c7c953719ae9947600ad49", + "nmdc:157f7672690ba8207808cc4386ff10a4" + ], + "was_informed_by": "gold:Gp0127655", + "id": "nmdc:65af38817454a315aeb8c67ab27e1469", + "execution_resource": "NERSC-Cori", + "name": "ReadBased Analysis Activity for nmdc:mga0317978", + "started_at_time": "2021-10-11T02:23:42Z", + "type": "nmdc:ReadbasedAnalysis", + "ended_at_time": "2021-10-11T03:21:25+00:00" + } + ] + }, + { + "data_object_set": [ + { + "description": "Raw sequencer read data", + "file_size_bytes": 1827996307, + "type": "nmdc:DataObject", + "id": "jgi:574fde937ded5e3df1ee1428", + "name": "10533.2.165322.CCTTCCT-AAGGAAG.fastq.gz" + }, + { + "name": "Gp0127653_Filtered Reads", + "description": "Filtered Reads for Gp0127653", + "data_object_type": "Filtered Sequencing Reads", + "url": "https://data.microbiomedata.org/data/nmdc:mga079y988/qa/nmdc_mga079y988_filtered.fastq.gz", + "md5_checksum": "8eec0e9c14abb418b906504d1675ecc5", + "id": "nmdc:8eec0e9c14abb418b906504d1675ecc5", + "file_size_bytes": 1661017378 + }, + { + "name": "Gp0127653_Filtered Stats", + "description": "Filtered Stats for Gp0127653", + "data_object_type": "QC Statistics", + "url": "https://data.microbiomedata.org/data/nmdc:mga079y988/qa/nmdc_mga079y988_filterStats.txt", + "md5_checksum": "5d07358bbc48f25e157ffc91ea7ae3e0", + "id": "nmdc:5d07358bbc48f25e157ffc91ea7ae3e0", + "file_size_bytes": 286 + }, + { + "name": "Gp0127653_Gottcha2 TSV report", + "description": "Gottcha2 TSV report for Gp0127653", + "url": "https://data.microbiomedata.org/data/nmdc:mga079y988/ReadbasedAnalysis/nmdc_mga079y988_gottcha2_report.tsv", + "md5_checksum": "fbbad3e21108a372e3d53c9ee8fc3cd5", + "id": "nmdc:fbbad3e21108a372e3d53c9ee8fc3cd5", + "file_size_bytes": 3812 + }, + { + "name": "Gp0127653_Gottcha2 full TSV report", + "description": "Gottcha2 full TSV report for Gp0127653", + "url": "https://data.microbiomedata.org/data/nmdc:mga079y988/ReadbasedAnalysis/nmdc_mga079y988_gottcha2_report_full.tsv", + "md5_checksum": "dbf03e26f7e1529762830161fe1f1906", + "id": "nmdc:dbf03e26f7e1529762830161fe1f1906", + "file_size_bytes": 857087 + }, + { + "name": "Gp0127653_Gottcha2 Krona HTML report", + "description": "Gottcha2 Krona HTML report for Gp0127653", + "data_object_type": "GOTTCHA2 Krona Plot", + "url": "https://data.microbiomedata.org/data/nmdc:mga079y988/ReadbasedAnalysis/nmdc_mga079y988_gottcha2_krona.html", + "md5_checksum": "284ce1b28b8964cb525025d678277dba", + "id": "nmdc:284ce1b28b8964cb525025d678277dba", + "file_size_bytes": 235621 + }, + { + "name": "Gp0127653_Centrifuge classification TSV report", + "description": "Centrifuge classification TSV report for Gp0127653", + "data_object_type": "Centrifuge Taxonomic Classification", + "url": "https://data.microbiomedata.org/data/nmdc:mga079y988/ReadbasedAnalysis/nmdc_mga079y988_centrifuge_classification.tsv", + "md5_checksum": "a379527f61806391e42b3512146013a8", + "id": "nmdc:a379527f61806391e42b3512146013a8", + "file_size_bytes": 1437707313 + }, + { + "name": "Gp0127653_Centrifuge TSV report", + "description": "Centrifuge TSV report for Gp0127653", + "data_object_type": "Centrifuge Classification Report", + "url": "https://data.microbiomedata.org/data/nmdc:mga079y988/ReadbasedAnalysis/nmdc_mga079y988_centrifuge_report.tsv", + "md5_checksum": "3659ac6c99dea0fb1385c58eac8b1335", + "id": "nmdc:3659ac6c99dea0fb1385c58eac8b1335", + "file_size_bytes": 255105 + }, + { + "name": "Gp0127653_Centrifuge Krona HTML report", + "description": "Centrifuge Krona HTML report for Gp0127653", + "data_object_type": "Centrifuge Krona Plot", + "url": "https://data.microbiomedata.org/data/nmdc:mga079y988/ReadbasedAnalysis/nmdc_mga079y988_centrifuge_krona.html", + "md5_checksum": "3219058371bf2f8081b2dd2b434ec145", + "id": "nmdc:3219058371bf2f8081b2dd2b434ec145", + "file_size_bytes": 2327985 + }, + { + "name": "Gp0127653_Kraken classification TSV report", + "description": "Kraken classification TSV report for Gp0127653", + "data_object_type": "Kraken2 Taxonomic Classification", + "url": "https://data.microbiomedata.org/data/nmdc:mga079y988/ReadbasedAnalysis/nmdc_mga079y988_kraken2_classification.tsv", + "md5_checksum": "be29ebcd7358653afec7381f9ca43431", + "id": "nmdc:be29ebcd7358653afec7381f9ca43431", + "file_size_bytes": 1164013677 + }, + { + "name": "Gp0127653_Kraken2 TSV report", + "description": "Kraken2 TSV report for Gp0127653", + "data_object_type": "Kraken2 Classification Report", + "url": "https://data.microbiomedata.org/data/nmdc:mga079y988/ReadbasedAnalysis/nmdc_mga079y988_kraken2_report.tsv", + "md5_checksum": "a9e6ab6db23ddce02317e3e21ea3f618", + "id": "nmdc:a9e6ab6db23ddce02317e3e21ea3f618", + "file_size_bytes": 638368 + }, + { + "name": "Gp0127653_Kraken2 Krona HTML report", + "description": "Kraken2 Krona HTML report for Gp0127653", + "data_object_type": "Kraken2 Krona Plot", + "url": "https://data.microbiomedata.org/data/nmdc:mga079y988/ReadbasedAnalysis/nmdc_mga079y988_kraken2_krona.html", + "md5_checksum": "4c1aae1a46e51359f9146e48fff0e7f0", + "id": "nmdc:4c1aae1a46e51359f9146e48fff0e7f0", + "file_size_bytes": 3982485 + }, + { + "name": "Gp0127653_Assembled contigs fasta", + "description": "Assembled contigs fasta for Gp0127653", + "data_object_type": "Assembly Contigs", + "url": "https://data.microbiomedata.org/data/nmdc:mga079y988/assembly/nmdc_mga079y988_contigs.fna", + "md5_checksum": "0f2b82878f54787c127bf03338d5c605", + "id": "nmdc:0f2b82878f54787c127bf03338d5c605", + "file_size_bytes": 18722308 + }, + { + "name": "Gp0127653_Assembled scaffold fasta", + "description": "Assembled scaffold fasta for Gp0127653", + "data_object_type": "Assembly Scaffolds", + "url": "https://data.microbiomedata.org/data/nmdc:mga079y988/assembly/nmdc_mga079y988_scaffolds.fna", + "md5_checksum": "02f8e7222e9e6f45c388a189ca66e1f9", + "id": "nmdc:02f8e7222e9e6f45c388a189ca66e1f9", + "file_size_bytes": 18575622 + }, + { + "name": "Gp0127653_Metagenome Contig Coverage Stats", + "description": "Metagenome Contig Coverage Stats for Gp0127653", + "url": "https://data.microbiomedata.org/data/nmdc:mga079y988/assembly/nmdc_mga079y988_covstats.txt", + "md5_checksum": "eea8a4b58ca07019d0050b030be3a3d1", + "id": "nmdc:eea8a4b58ca07019d0050b030be3a3d1", + "file_size_bytes": 3824141 + }, + { + "name": "Gp0127653_Assembled AGP file", + "description": "Assembled AGP file for Gp0127653", + "data_object_type": "Assembly AGP", + "url": "https://data.microbiomedata.org/data/nmdc:mga079y988/assembly/nmdc_mga079y988_assembly.agp", + "md5_checksum": "44b1ad59bd14c3367ac0fa2ca37aa057", + "id": "nmdc:44b1ad59bd14c3367ac0fa2ca37aa057", + "file_size_bytes": 3551123 + }, + { + "name": "Gp0127653_Metagenome Alignment BAM file", + "description": "Metagenome Alignment BAM file for Gp0127653", + "data_object_type": "Assembly Coverage BAM", + "url": "https://data.microbiomedata.org/data/nmdc:mga079y988/assembly/nmdc_mga079y988_pairedMapped_sorted.bam", + "md5_checksum": "ccd5ba8558a92751c59989aa81054e1a", + "id": "nmdc:ccd5ba8558a92751c59989aa81054e1a", + "file_size_bytes": 1757373378 + }, + { + "name": "Gp0127653_Protein FAA", + "description": "Protein FAA for Gp0127653", + "data_object_type": "Annotation Amino Acid FASTA", + "url": "https://data.microbiomedata.org/data/nmdc:mga079y988/annotation/nmdc_mga079y988_proteins.faa", + "md5_checksum": "81f16ca99f73a3314a66e6b24d23376f", + "id": "nmdc:81f16ca99f73a3314a66e6b24d23376f", + "file_size_bytes": 11129064 + }, + { + "name": "Gp0127653_Structural annotation GFF file", + "description": "Structural annotation GFF file for Gp0127653", + "data_object_type": "Structural Annotation GFF", + "url": "https://data.microbiomedata.org/data/nmdc:mga079y988/annotation/nmdc_mga079y988_structural_annotation.gff", + "md5_checksum": "66bb16ef28196379647d319da50426dd", + "id": "nmdc:66bb16ef28196379647d319da50426dd", + "file_size_bytes": 8094827 + }, + { + "name": "Gp0127653_Functional annotation GFF file", + "description": "Functional annotation GFF file for Gp0127653", + "data_object_type": "Functional Annotation GFF", + "url": "https://data.microbiomedata.org/data/nmdc:mga079y988/annotation/nmdc_mga079y988_functional_annotation.gff", + "md5_checksum": "1e7dac5f12cc086509ff905f7133b15a", + "id": "nmdc:1e7dac5f12cc086509ff905f7133b15a", + "file_size_bytes": 13821021 + }, + { + "name": "Gp0127653_KO TSV file", + "description": "KO TSV file for Gp0127653", + "data_object_type": "Annotation KEGG Orthology", + "url": "https://data.microbiomedata.org/data/nmdc:mga079y988/annotation/nmdc_mga079y988_ko.tsv", + "md5_checksum": "2a7343eb6364d769a1c43aa5c94daee8", + "id": "nmdc:2a7343eb6364d769a1c43aa5c94daee8", + "file_size_bytes": 1578987 + }, + { + "name": "Gp0127653_EC TSV file", + "description": "EC TSV file for Gp0127653", + "data_object_type": "Annotation Enzyme Commission", + "url": "https://data.microbiomedata.org/data/nmdc:mga079y988/annotation/nmdc_mga079y988_ec.tsv", + "md5_checksum": "b2cee4d35f68d1f5731bff3af5904fa4", + "id": "nmdc:b2cee4d35f68d1f5731bff3af5904fa4", + "file_size_bytes": 1029657 + }, + { + "name": "Gp0127653_COG GFF file", + "description": "COG GFF file for Gp0127653", + "url": "https://data.microbiomedata.org/data/nmdc:mga079y988/annotation/nmdc_mga079y988_cog.gff", + "md5_checksum": "1d45960b1ba5e27af42c736ec583ecd4", + "id": "nmdc:1d45960b1ba5e27af42c736ec583ecd4", + "file_size_bytes": 7241411 + }, + { + "name": "Gp0127653_PFAM GFF file", + "description": "PFAM GFF file for Gp0127653", + "url": "https://data.microbiomedata.org/data/nmdc:mga079y988/annotation/nmdc_mga079y988_pfam.gff", + "md5_checksum": "3dec47a0a04865ecdcd9ed7cbc78eca4", + "id": "nmdc:3dec47a0a04865ecdcd9ed7cbc78eca4", + "file_size_bytes": 5221877 + }, + { + "name": "Gp0127653_TigrFam GFF file", + "description": "TigrFam GFF file for Gp0127653", + "url": "https://data.microbiomedata.org/data/nmdc:mga079y988/annotation/nmdc_mga079y988_tigrfam.gff", + "md5_checksum": "043322f3cd31d50faf4d4e0ffd1c8427", + "id": "nmdc:043322f3cd31d50faf4d4e0ffd1c8427", + "file_size_bytes": 472233 + }, + { + "name": "Gp0127653_SMART GFF file", + "description": "SMART GFF file for Gp0127653", + "url": "https://data.microbiomedata.org/data/nmdc:mga079y988/annotation/nmdc_mga079y988_smart.gff", + "md5_checksum": "6bed0fc7a7be284936c69fc1faac4be6", + "id": "nmdc:6bed0fc7a7be284936c69fc1faac4be6", + "file_size_bytes": 1586537 + }, + { + "name": "Gp0127653_SuperFam GFF file", + "description": "SuperFam GFF file for Gp0127653", + "url": "https://data.microbiomedata.org/data/nmdc:mga079y988/annotation/nmdc_mga079y988_supfam.gff", + "md5_checksum": "052d3fb0080390255df5772f79e5ef2c", + "id": "nmdc:052d3fb0080390255df5772f79e5ef2c", + "file_size_bytes": 9232981 + }, + { + "name": "Gp0127653_Cath FunFam GFF file", + "description": "Cath FunFam GFF file for Gp0127653", + "url": "https://data.microbiomedata.org/data/nmdc:mga079y988/annotation/nmdc_mga079y988_cath_funfam.gff", + "md5_checksum": "e66a3b85c713e8766e5181da2e393984", + "id": "nmdc:e66a3b85c713e8766e5181da2e393984", + "file_size_bytes": 6697496 + }, + { + "name": "Gp0127653_KO_EC GFF file", + "description": "KO_EC GFF file for Gp0127653", + "url": "https://data.microbiomedata.org/data/nmdc:mga079y988/annotation/nmdc_mga079y988_ko_ec.gff", + "md5_checksum": "949e3b137b3a0591ed9de493ee5c530b", + "id": "nmdc:949e3b137b3a0591ed9de493ee5c530b", + "file_size_bytes": 5035400 + }, + { + "name": "gold:Gp0452679_metabat2 bin checkm quality assessment result", + "description": "metabat2 bin checkm quality assessment result for gold:Gp0452679", + "data_object_type": "CheckM Statistics", + "url": "https://data.microbiomedata.org/data/nmdc:mga0fsjy84/MAGs/nmdc_mga0fsjy84_checkm_qa.out", + "md5_checksum": "d41d8cd98f00b204e9800998ecf8427e", + "id": "nmdc:d41d8cd98f00b204e9800998ecf8427e", + "file_size_bytes": 0 + }, + { + "name": "Gp0127653_high-quality and medium-quality bins", + "description": "high-quality and medium-quality bins for Gp0127653", + "data_object_type": "Metagenome Bins", + "url": "https://data.microbiomedata.org/data/nmdc:mga079y988/MAGs/nmdc_mga079y988_hqmq_bin.zip", + "md5_checksum": "1029b97dba32dab780f4267f8224619f", + "id": "nmdc:1029b97dba32dab780f4267f8224619f", + "file_size_bytes": 182 + }, + { + "_id": { + "$oid": "649b003d1ae706d7b5b14ed2" + }, + "description": "Metagenome Contig Coverage Stats for gold:Gp0127653", + "url": "https://data.microbiomedata.org/data/1781_100355/assembly/mapping_stats.txt", + "file_size_bytes": 3628417, + "type": "nmdc:DataObject", + "id": "nmdc:59f6f5bd2480f717a09946125a0cac46", + "name": "mapping_stats.txt", + "data_object_type": "Assembly Coverage Stats" + }, + { + "_id": { + "$oid": "649b003d1ae706d7b5b14ed3" + }, + "description": "Assembled contigs fasta for gold:Gp0127653", + "url": "https://data.microbiomedata.org/data/1781_100355/assembly/assembly_contigs.fna", + "file_size_bytes": 18526584, + "type": "nmdc:DataObject", + "id": "nmdc:c9708409d9e8f45dcc89e688b3482e5e", + "name": "assembly_contigs.fna", + "data_object_type": "Assembly Contigs" + }, + { + "_id": { + "$oid": "649b003d1ae706d7b5b14ed4" + }, + "description": "Assembled scaffold fasta for gold:Gp0127653", + "url": "https://data.microbiomedata.org/data/1781_100355/assembly/assembly_scaffolds.fna", + "file_size_bytes": 18379922, + "type": "nmdc:DataObject", + "id": "nmdc:c76c1fdcd6be23a0d7add5ea3a23f754", + "name": "assembly_scaffolds.fna", + "data_object_type": "Assembly Scaffolds" + }, + { + "_id": { + "$oid": "649b003d1ae706d7b5b14ed9" + }, + "description": "Assembled AGP file for gold:Gp0127653", + "url": "https://data.microbiomedata.org/data/1781_100355/assembly/assembly.agp", + "file_size_bytes": 3159611, + "type": "nmdc:DataObject", + "id": "nmdc:ac8be882728344819f210f42d5ea8577", + "name": "assembly.agp", + "data_object_type": "Assembly AGP" + }, + { + "_id": { + "$oid": "649b003d1ae706d7b5b14edd" + }, + "description": "Metagenome Alignment BAM file for gold:Gp0127653", + "url": "https://data.microbiomedata.org/data/1781_100355/assembly/pairedMapped_sorted.bam", + "file_size_bytes": 1732533897, + "type": "nmdc:DataObject", + "id": "nmdc:fadc083a0534b4961c902c8af8a8ebba", + "name": "pairedMapped_sorted.bam", + "data_object_type": "Assembly Coverage BAM" + }, + { + "_id": { + "$oid": "649b003d1ae706d7b5b15be6" + }, + "id": "nmdc:2d6f886bc9561f305d3b15be14bc192f", + "name": "1781_100355.krona.html", + "description": "Gold:Gp0127653 KRONA plot HTML file", + "url": "https://data.microbiomedata.org/1781_100355/ReadbasedAnalysis/centrifuge/1781_100355.krona.html", + "file_size_bytes": 3385, + "data_object_type": "Centrifuge Krona Plot", + "type": "nmdc:DataObject" + }, + { + "_id": { + "$oid": "649b003d1ae706d7b5b15c08" + }, + "id": "nmdc:bcf6968cd97d5db72fbe6d048a638fd7", + "name": "1781_100355.json", + "description": "Gold:Gp0127653 ReadbasedAnalysis result JSON file", + "url": "https://data.microbiomedata.org/1781_100355/ReadbasedAnalysis/1781_100355.json", + "file_size_bytes": 3385, + "type": "nmdc:DataObject" + }, + { + "_id": { + "$oid": "649b003f1ae706d7b5b16642" + }, + "id": "nmdc:9429c50986c3904bdd48e585bfc74dfd", + "name": "bins.tooShort.fa", + "description": "tooShort (< 3kb) filtered contigs fasta file by metaBat2 for gold:Gp0127653", + "file_size_bytes": 17290021, + "url": "https://data.microbiomedata.org/data/1781_100355/img_MAGs/bins.tooShort.fa", + "type": "nmdc:DataObject" + }, + { + "_id": { + "$oid": "649b003f1ae706d7b5b16647" + }, + "id": "nmdc:eb6bfb2af22e43df303aa691a87889bc", + "name": "bins.unbinned.fa", + "description": "unbinned fasta file from metabat2 for gold:Gp0127653", + "file_size_bytes": 807307, + "url": "https://data.microbiomedata.org/data/1781_100355/img_MAGs/bins.unbinned.fa", + "type": "nmdc:DataObject" + }, + { + "_id": { + "$oid": "649b00401ae706d7b5b16d4d" + }, + "description": "EC TSV File for gold:Gp0127653", + "url": "https://data.microbiomedata.org/1781_100355/img_annotation/Ga0482218_ec.tsv", + "md5_checksum": "05cc9ce5321d6bc909ab63b8cbc59d02", + "file_size_bytes": 3385, + "id": "nmdc:05cc9ce5321d6bc909ab63b8cbc59d02", + "name": "gold:Gp0127653_EC TSV File", + "data_object_type": "Annotation Enzyme Commission", + "type": "nmdc:DataObject" + }, + { + "_id": { + "$oid": "649b00401ae706d7b5b16d4f" + }, + "description": "Protein FAA for gold:Gp0127653", + "url": "https://data.microbiomedata.org/1781_100355/img_annotation/Ga0482218_proteins.faa", + "md5_checksum": "658231efecf9d087ec2a6e9467f4e968", + "file_size_bytes": 3385, + "id": "nmdc:658231efecf9d087ec2a6e9467f4e968", + "name": "gold:Gp0127653_Protein FAA", + "data_object_type": "Annotation Amino Acid FASTA", + "type": "nmdc:DataObject" + }, + { + "_id": { + "$oid": "649b00401ae706d7b5b16d51" + }, + "description": "Functional annotation GFF file for gold:Gp0127653", + "url": "https://data.microbiomedata.org/1781_100355/img_annotation/Ga0482218_functional_annotation.gff", + "md5_checksum": "8d83e502a533b5db8cd3bc943ae8b18b", + "file_size_bytes": 3385, + "id": "nmdc:8d83e502a533b5db8cd3bc943ae8b18b", + "name": "gold:Gp0127653_Functional annotation GFF file", + "data_object_type": "Functional Annotation GFF", + "type": "nmdc:DataObject" + }, + { + "_id": { + "$oid": "649b00401ae706d7b5b16d54" + }, + "description": "KO TSV File for gold:Gp0127653", + "url": "https://data.microbiomedata.org/1781_100355/img_annotation/Ga0482218_ko.tsv", + "md5_checksum": "44f8e708349a1effdff745880f4fdd12", + "file_size_bytes": 3385, + "id": "nmdc:44f8e708349a1effdff745880f4fdd12", + "name": "gold:Gp0127653_KO TSV File", + "data_object_type": "Annotation KEGG Orthology", + "type": "nmdc:DataObject" + }, + { + "_id": { + "$oid": "649b00401ae706d7b5b16d5b" + }, + "description": "Structural annotation GFF file for gold:Gp0127653", + "url": "https://data.microbiomedata.org/1781_100355/img_annotation/Ga0482218_structural_annotation.gff", + "md5_checksum": "511ae319ddff2bdcbc3296d951e42d7e", + "file_size_bytes": 3385, + "id": "nmdc:511ae319ddff2bdcbc3296d951e42d7e", + "name": "gold:Gp0127653_Structural annotation GFF file", + "data_object_type": "Structural Annotation GFF", + "type": "nmdc:DataObject" + } + ], + "mags_activity_set": [ + { + "_id": { + "$oid": "649b0052ec087f6bbab34720" + }, + "has_input": [ + "nmdc:0f2b82878f54787c127bf03338d5c605", + "nmdc:ccd5ba8558a92751c59989aa81054e1a", + "nmdc:1e7dac5f12cc086509ff905f7133b15a" + ], + "too_short_contig_num": 48540, + "part_of": [ + "nmdc:mga079y988" + ], + "git_url": "https://github.com/microbiomedata/metaG/releases/tag/0.1", + "has_output": [ + "nmdc:d41d8cd98f00b204e9800998ecf8427e", + "nmdc:1029b97dba32dab780f4267f8224619f" + ], + "was_informed_by": "gold:Gp0127653", + "input_contig_num": 48931, + "id": "nmdc:676183a611b251dc6b8f0b8ca600181b", + "execution_resource": "NERSC-Cori", + "name": "MAGs Analysis Activity for nmdc:mga079y988", + "unbinned_contig_num": 391, + "started_at_time": "2021-10-11T02:23:35Z", + "type": "nmdc:MAGsAnalysisActivity", + "ended_at_time": "2021-11-13T18:52:13+00:00" + } + ], + "metagenome_annotation_activity_set": [ + { + "_id": { + "$oid": "649b005bbf2caae0415ef9be" + }, + "has_input": [ + "nmdc:0f2b82878f54787c127bf03338d5c605" + ], + "part_of": [ + "nmdc:mga079y988" + ], + "git_url": "https://github.com/microbiomedata/metaG/releases/tag/0.1", + "has_output": [ + "nmdc:81f16ca99f73a3314a66e6b24d23376f", + "nmdc:66bb16ef28196379647d319da50426dd", + "nmdc:1e7dac5f12cc086509ff905f7133b15a", + "nmdc:2a7343eb6364d769a1c43aa5c94daee8", + "nmdc:b2cee4d35f68d1f5731bff3af5904fa4", + "nmdc:1d45960b1ba5e27af42c736ec583ecd4", + "nmdc:3dec47a0a04865ecdcd9ed7cbc78eca4", + "nmdc:043322f3cd31d50faf4d4e0ffd1c8427", + "nmdc:6bed0fc7a7be284936c69fc1faac4be6", + "nmdc:052d3fb0080390255df5772f79e5ef2c", + "nmdc:e66a3b85c713e8766e5181da2e393984", + "nmdc:949e3b137b3a0591ed9de493ee5c530b" + ], + "was_informed_by": "gold:Gp0127653", + "id": "nmdc:676183a611b251dc6b8f0b8ca600181b", + "execution_resource": "NERSC-Cori", + "name": "Annotation Activity for nmdc:mga079y988", + "started_at_time": "2021-10-11T02:23:35Z", + "type": "nmdc:MetagenomeAnnotationActivity", + "ended_at_time": "2021-11-13T18:52:13+00:00" + } + ], + "metagenome_assembly_set": [ + { + "_id": { + "$oid": "649b005f2ca5ee4adb139faa" + }, + "has_input": [ + "nmdc:8eec0e9c14abb418b906504d1675ecc5" + ], + "part_of": [ + "nmdc:mga079y988" + ], + "ctg_logsum": 9125.582, + "scaf_logsum": 9156.336, + "gap_pct": 0.00094, + "git_url": "https://github.com/microbiomedata/metaG/releases/tag/0.1", + "has_output": [ + "nmdc:0f2b82878f54787c127bf03338d5c605", + "nmdc:02f8e7222e9e6f45c388a189ca66e1f9", + "nmdc:eea8a4b58ca07019d0050b030be3a3d1", + "nmdc:44b1ad59bd14c3367ac0fa2ca37aa057", + "nmdc:ccd5ba8558a92751c59989aa81054e1a" + ], + "asm_score": 13.921, + "was_informed_by": "gold:Gp0127653", + "ctg_powsum": 1096.518, + "scaf_max": 58655, + "id": "nmdc:676183a611b251dc6b8f0b8ca600181b", + "scaf_powsum": 1101.795, + "execution_resource": "NERSC-Cori", + "contigs": 48932, + "name": "Assembly Activity for nmdc:mga079y988", + "ctg_max": 58655, + "gc_std": 0.10928, + "gc_avg": 0.57867, + "contig_bp": 16963869, + "started_at_time": "2021-10-11T02:23:35Z", + "scaf_bp": 16964029, + "type": "nmdc:MetagenomeAssembly", + "scaffolds": 48925, + "ended_at_time": "2021-11-13T18:52:13+00:00", + "ctg_l50": 309, + "ctg_l90": 281, + "ctg_n50": 19544, + "ctg_n90": 43034, + "scaf_l50": 309, + "scaf_l90": 281, + "scaf_n50": 19539, + "scaf_n90": 43028, + "scaf_l_gt50k": 58655, + "scaf_n_gt50k": 1, + "scaf_pct_gt50k": 0.34576103 + } + ], + "omics_processing_set": [ + { + "_id": { + "$oid": "649b009773e8249959349b64" + }, + "id": "nmdc:omprc-11-p1735e67", + "name": "Riverbed sediment microbial communities from areas with vegetation nearby in Columbia River, Washington, USA - GW-RW S2_0_10", + "description": "Riverbed sediment samples were collected from an area with a lot of surrounding vegetation in a hyporheic zone (subsurface groundwater - surface water mixing zone)", + "has_input": [ + "nmdc:bsm-11-k4wa0808" + ], + "has_output": [ + "jgi:574fde937ded5e3df1ee1428" + ], + "part_of": [ + "nmdc:sty-11-aygzgv51" + ], + "add_date": "2016-01-11", + "mod_date": "2021-06-15", + "ncbi_project_name": "Riverbed sediment microbial communities from areas with vegetation nearby in Columbia River, Washington, USA - GW-RW S2_0_10", + "omics_type": { + "has_raw_value": "Metagenome" + }, + "principal_investigator": { + "has_raw_value": "James Stegen" + }, + "processing_institution": "JGI", + "type": "nmdc:OmicsProcessing", + "gold_sequencing_project_identifiers": [ + "gold:Gp0127653" + ] + } + ], + "read_qc_analysis_activity_set": [ + { + "_id": { + "$oid": "649b009d6bdd4fd20273c878" + }, + "has_input": [ + "nmdc:84ffabc3fbd7e759cd2352ec513b89a0" + ], + "part_of": [ + "nmdc:mga079y988" + ], + "git_url": "https://github.com/microbiomedata/metaG/releases/tag/0.1", + "has_output": [ + "nmdc:8eec0e9c14abb418b906504d1675ecc5", + "nmdc:5d07358bbc48f25e157ffc91ea7ae3e0" + ], + "was_informed_by": "gold:Gp0127653", + "input_read_count": 20780788, + "output_read_bases": 2918466866, + "id": "nmdc:676183a611b251dc6b8f0b8ca600181b", + "execution_resource": "NERSC-Cori", + "input_read_bases": 3137898988, + "name": "Read QC Activity for nmdc:mga079y988", + "output_read_count": 19516330, + "started_at_time": "2021-10-11T02:23:35Z", + "type": "nmdc:ReadQCAnalysisActivity", + "ended_at_time": "2021-11-13T18:52:13+00:00" + } + ], + "read_based_taxonomy_analysis_activity_set": [ + { + "_id": { + "$oid": "649b009bff710ae353f8cf3d" + }, + "has_input": [ + "nmdc:8eec0e9c14abb418b906504d1675ecc5" + ], + "git_url": "https://github.com/microbiomedata/metaG/releases/tag/0.1", + "has_output": [ + "nmdc:fbbad3e21108a372e3d53c9ee8fc3cd5", + "nmdc:dbf03e26f7e1529762830161fe1f1906", + "nmdc:284ce1b28b8964cb525025d678277dba", + "nmdc:a379527f61806391e42b3512146013a8", + "nmdc:3659ac6c99dea0fb1385c58eac8b1335", + "nmdc:3219058371bf2f8081b2dd2b434ec145", + "nmdc:be29ebcd7358653afec7381f9ca43431", + "nmdc:a9e6ab6db23ddce02317e3e21ea3f618", + "nmdc:4c1aae1a46e51359f9146e48fff0e7f0" + ], + "was_informed_by": "gold:Gp0127653", + "id": "nmdc:676183a611b251dc6b8f0b8ca600181b", + "execution_resource": "NERSC-Cori", + "name": "ReadBased Analysis Activity for nmdc:mga079y988", + "started_at_time": "2021-10-11T02:23:35Z", + "type": "nmdc:ReadbasedAnalysis", + "ended_at_time": "2021-11-13T18:52:13+00:00" + } + ] + } +] \ No newline at end of file diff --git a/nmdc_automation/re_iding/scripts/re_id_tool.py b/nmdc_automation/re_iding/scripts/re_id_tool.py index 26df0e81..e784244a 100755 --- a/nmdc_automation/re_iding/scripts/re_id_tool.py +++ b/nmdc_automation/re_iding/scripts/re_id_tool.py @@ -10,6 +10,8 @@ import json import click +from linkml_runtime.dumpers import json_dumper + from nmdc_automation.api import NmdcRuntimeApi, NmdcRuntimeUserApi from nmdc_automation.config import Config import nmdc_schema.nmdc as nmdc @@ -148,10 +150,10 @@ def extract_records(ctx, study_id): retrieved_databases.append(db) - with open(f"{study_id}_assocated_record_dump.json", 'w') as json_file: - json.dump( - [o.__dict__ for o in retrieved_databases], json_file, indent=4 - ) + json_data = json.loads(json_dumper.dumps(retrieved_databases, inject_type=False)) + db_outfile = DATA_DIR.joinpath(f"{study_id}_associated_record_dump.json") + with open(db_outfile, "w") as f: + f.write(json.dumps(json_data, indent=4)) @cli.command() @@ -171,6 +173,8 @@ def process_records(ctx, dryrun, study_id, data_dir): """ start_time = time.time() logging.info(f"Processing workflow records for study_id: {study_id}") + if dryrun: + logging.info("Running in dryrun mode") # Get API client config = ctx.obj['site_config'] @@ -180,6 +184,7 @@ def process_records(ctx, dryrun, study_id, data_dir): # Get Database dump file paths and the data directory db_infile, db_outfile = _get_database_paths(study_id, dryrun) data_dir = _get_data_dir(data_dir, dryrun) + logging.info(f"Using data_dir: {data_dir}") # Initialize re-ID tool reid_tool = ReIdTool(api_client, data_dir) @@ -212,10 +217,10 @@ def process_records(ctx, dryrun, study_id, data_dir): re_ided_db_records.append(new_db) - json_data = json.dumps(re_ided_db_records, default=lambda o: o.__dict__, indent=4) - logging.info(f"Writing re_ided_db_records to {db_outfile}") + json_data = json.loads(json_dumper.dumps(re_ided_db_records, + inject_type=False)) with open(db_outfile, "w") as f: - f.write(json_data) + f.write(json.dumps(json_data, indent=4)) def _get_data_dir(data_dir, dryrun): From c38070af6e6b97c146cbe26263aa3e443f6e5146 Mon Sep 17 00:00:00 2001 From: Michael Thornton Date: Wed, 15 Nov 2023 09:57:33 -0800 Subject: [PATCH 61/91] Create dryrun_associated_record_dump.json --- .../data/dryrun_associated_record_dump.json | 835 ++++++++++++++++++ 1 file changed, 835 insertions(+) create mode 100644 nmdc_automation/re_iding/scripts/data/dryrun_associated_record_dump.json diff --git a/nmdc_automation/re_iding/scripts/data/dryrun_associated_record_dump.json b/nmdc_automation/re_iding/scripts/data/dryrun_associated_record_dump.json new file mode 100644 index 00000000..64fa1542 --- /dev/null +++ b/nmdc_automation/re_iding/scripts/data/dryrun_associated_record_dump.json @@ -0,0 +1,835 @@ +[ + { + "data_object_set": [ + { + "description": "Raw sequencer read data", + "file_size_bytes": 2861414297, + "type": "nmdc:DataObject", + "id": "jgi:55d740280d8785342fcf7e39", + "name": "9422.8.132674.GTTTCG.fastq.gz" + }, + { + "name": "Gp0115663_Filtered Reads", + "description": "Filtered Reads for Gp0115663", + "data_object_type": "Filtered Sequencing Reads", + "url": "https://data.microbiomedata.org/data/nmdc:mga0h9dt75/qa/nmdc_mga0h9dt75_filtered.fastq.gz", + "md5_checksum": "7bf778baef033d36f118f8591256d6ef", + "id": "nmdc:7bf778baef033d36f118f8591256d6ef", + "file_size_bytes": 2571324879 + }, + { + "name": "Gp0115663_Filtered Stats", + "description": "Filtered Stats for Gp0115663", + "data_object_type": "QC Statistics", + "url": "https://data.microbiomedata.org/data/nmdc:mga0h9dt75/qa/nmdc_mga0h9dt75_filterStats.txt", + "md5_checksum": "b99ce8adc125c95f0bfdadf36a3f6848", + "id": "nmdc:b99ce8adc125c95f0bfdadf36a3f6848", + "file_size_bytes": 290 + }, + { + "name": "Gp0115663_Gottcha2 TSV report", + "description": "Gottcha2 TSV report for Gp0115663", + "url": "https://data.microbiomedata.org/data/nmdc:mga0h9dt75/ReadbasedAnalysis/nmdc_mga0h9dt75_gottcha2_report.tsv", + "md5_checksum": "bc7c1bda004aab357c8f6cf5a42242f9", + "id": "nmdc:bc7c1bda004aab357c8f6cf5a42242f9", + "file_size_bytes": 13174 + }, + { + "name": "Gp0115663_Gottcha2 full TSV report", + "description": "Gottcha2 full TSV report for Gp0115663", + "url": "https://data.microbiomedata.org/data/nmdc:mga0h9dt75/ReadbasedAnalysis/nmdc_mga0h9dt75_gottcha2_report_full.tsv", + "md5_checksum": "9481434cadd0d6c154e2ec4c11ef0e04", + "id": "nmdc:9481434cadd0d6c154e2ec4c11ef0e04", + "file_size_bytes": 1035818 + }, + { + "name": "Gp0115663_Gottcha2 Krona HTML report", + "description": "Gottcha2 Krona HTML report for Gp0115663", + "data_object_type": "GOTTCHA2 Krona Plot", + "url": "https://data.microbiomedata.org/data/nmdc:mga0h9dt75/ReadbasedAnalysis/nmdc_mga0h9dt75_gottcha2_krona.html", + "md5_checksum": "6b5bc6ce7f11c1336a5f85a98fc18541", + "id": "nmdc:6b5bc6ce7f11c1336a5f85a98fc18541", + "file_size_bytes": 262669 + }, + { + "name": "Gp0115663_Centrifuge classification TSV report", + "description": "Centrifuge classification TSV report for Gp0115663", + "data_object_type": "Centrifuge Taxonomic Classification", + "url": "https://data.microbiomedata.org/data/nmdc:mga0h9dt75/ReadbasedAnalysis/nmdc_mga0h9dt75_centrifuge_classification.tsv", + "md5_checksum": "933c71bbc2f4a2e84d50f0d3864cf940", + "id": "nmdc:933c71bbc2f4a2e84d50f0d3864cf940", + "file_size_bytes": 2189843623 + }, + { + "name": "Gp0115663_Centrifuge TSV report", + "description": "Centrifuge TSV report for Gp0115663", + "data_object_type": "Centrifuge Classification Report", + "url": "https://data.microbiomedata.org/data/nmdc:mga0h9dt75/ReadbasedAnalysis/nmdc_mga0h9dt75_centrifuge_report.tsv", + "md5_checksum": "1a208e2519770ef50740ac39f1b9ba9a", + "id": "nmdc:1a208e2519770ef50740ac39f1b9ba9a", + "file_size_bytes": 260134 + }, + { + "name": "Gp0115663_Centrifuge Krona HTML report", + "description": "Centrifuge Krona HTML report for Gp0115663", + "data_object_type": "Centrifuge Krona Plot", + "url": "https://data.microbiomedata.org/data/nmdc:mga0h9dt75/ReadbasedAnalysis/nmdc_mga0h9dt75_centrifuge_krona.html", + "md5_checksum": "f112a3840464ae7a9cf4a3bf295edd5c", + "id": "nmdc:f112a3840464ae7a9cf4a3bf295edd5c", + "file_size_bytes": 2343980 + }, + { + "name": "Gp0115663_Kraken classification TSV report", + "description": "Kraken classification TSV report for Gp0115663", + "data_object_type": "Kraken2 Taxonomic Classification", + "url": "https://data.microbiomedata.org/data/nmdc:mga0h9dt75/ReadbasedAnalysis/nmdc_mga0h9dt75_kraken2_classification.tsv", + "md5_checksum": "7ca01ea379f0baed96f87d1435925f95", + "id": "nmdc:7ca01ea379f0baed96f87d1435925f95", + "file_size_bytes": 1785563917 + }, + { + "name": "Gp0115663_Kraken2 TSV report", + "description": "Kraken2 TSV report for Gp0115663", + "data_object_type": "Kraken2 Classification Report", + "url": "https://data.microbiomedata.org/data/nmdc:mga0h9dt75/ReadbasedAnalysis/nmdc_mga0h9dt75_kraken2_report.tsv", + "md5_checksum": "c85f2f2b4a518c4adb23970448a5cb45", + "id": "nmdc:c85f2f2b4a518c4adb23970448a5cb45", + "file_size_bytes": 699896 + }, + { + "name": "Gp0115663_Kraken2 Krona HTML report", + "description": "Kraken2 Krona HTML report for Gp0115663", + "data_object_type": "Kraken2 Krona Plot", + "url": "https://data.microbiomedata.org/data/nmdc:mga0h9dt75/ReadbasedAnalysis/nmdc_mga0h9dt75_kraken2_krona.html", + "md5_checksum": "94ee1bc2dc74830a21d5c3471d6cf223", + "id": "nmdc:94ee1bc2dc74830a21d5c3471d6cf223", + "file_size_bytes": 4221977 + }, + { + "name": "Gp0115663_Assembled contigs fasta", + "description": "Assembled contigs fasta for Gp0115663", + "data_object_type": "Assembly Contigs", + "url": "https://data.microbiomedata.org/data/nmdc:mga0h9dt75/assembly/nmdc_mga0h9dt75_contigs.fna", + "md5_checksum": "deddd162bf0128fba13b3bc1ca38d1aa", + "id": "nmdc:deddd162bf0128fba13b3bc1ca38d1aa", + "file_size_bytes": 90115831 + }, + { + "name": "Gp0115663_Assembled scaffold fasta", + "description": "Assembled scaffold fasta for Gp0115663", + "data_object_type": "Assembly Scaffolds", + "url": "https://data.microbiomedata.org/data/nmdc:mga0h9dt75/assembly/nmdc_mga0h9dt75_scaffolds.fna", + "md5_checksum": "b3573e3cda5a06611de71ca04c5c14cc", + "id": "nmdc:b3573e3cda5a06611de71ca04c5c14cc", + "file_size_bytes": 89604715 + }, + { + "name": "Gp0115663_Metagenome Contig Coverage Stats", + "description": "Metagenome Contig Coverage Stats for Gp0115663", + "url": "https://data.microbiomedata.org/data/nmdc:mga0h9dt75/assembly/nmdc_mga0h9dt75_covstats.txt", + "md5_checksum": "c6d0d4cea985ca6fb50a060e15b4a856", + "id": "nmdc:c6d0d4cea985ca6fb50a060e15b4a856", + "file_size_bytes": 13412363 + }, + { + "name": "Gp0115663_Assembled AGP file", + "description": "Assembled AGP file for Gp0115663", + "data_object_type": "Assembly AGP", + "url": "https://data.microbiomedata.org/data/nmdc:mga0h9dt75/assembly/nmdc_mga0h9dt75_assembly.agp", + "md5_checksum": "f450e3800e17691d5874c89fc46c186a", + "id": "nmdc:f450e3800e17691d5874c89fc46c186a", + "file_size_bytes": 12542171 + }, + { + "name": "Gp0115663_Metagenome Alignment BAM file", + "description": "Metagenome Alignment BAM file for Gp0115663", + "data_object_type": "Assembly Coverage BAM", + "url": "https://data.microbiomedata.org/data/nmdc:mga0h9dt75/assembly/nmdc_mga0h9dt75_pairedMapped_sorted.bam", + "md5_checksum": "31dc958d116d02122509e90b0883954f", + "id": "nmdc:31dc958d116d02122509e90b0883954f", + "file_size_bytes": 2773429299 + }, + { + "name": "Gp0115663_Protein FAA", + "description": "Protein FAA for Gp0115663", + "data_object_type": "Annotation Amino Acid FASTA", + "url": "https://data.microbiomedata.org/data/nmdc:mga0h9dt75/annotation/nmdc_mga0h9dt75_proteins.faa", + "md5_checksum": "879988d212ecec46928b8598e2f8391f", + "id": "nmdc:879988d212ecec46928b8598e2f8391f", + "file_size_bytes": 50165060 + }, + { + "name": "Gp0115663_Structural annotation GFF file", + "description": "Structural annotation GFF file for Gp0115663", + "data_object_type": "Structural Annotation GFF", + "url": "https://data.microbiomedata.org/data/nmdc:mga0h9dt75/annotation/nmdc_mga0h9dt75_structural_annotation.gff", + "md5_checksum": "884b95102f5965cc0ee2d9b7f198e5a4", + "id": "nmdc:884b95102f5965cc0ee2d9b7f198e5a4", + "file_size_bytes": 2767 + }, + { + "name": "Gp0115663_Functional annotation GFF file", + "description": "Functional annotation GFF file for Gp0115663", + "data_object_type": "Functional Annotation GFF", + "url": "https://data.microbiomedata.org/data/nmdc:mga0h9dt75/annotation/nmdc_mga0h9dt75_functional_annotation.gff", + "md5_checksum": "002e4ebc728f8b91cb5f298d340ab013", + "id": "nmdc:002e4ebc728f8b91cb5f298d340ab013", + "file_size_bytes": 55139586 + }, + { + "name": "Gp0115663_KO TSV file", + "description": "KO TSV file for Gp0115663", + "data_object_type": "Annotation KEGG Orthology", + "url": "https://data.microbiomedata.org/data/nmdc:mga0h9dt75/annotation/nmdc_mga0h9dt75_ko.tsv", + "md5_checksum": "6851078f29716d89e3f41f0969ae7bf0", + "id": "nmdc:6851078f29716d89e3f41f0969ae7bf0", + "file_size_bytes": 6023696 + }, + { + "name": "Gp0115663_EC TSV file", + "description": "EC TSV file for Gp0115663", + "data_object_type": "Annotation Enzyme Commission", + "url": "https://data.microbiomedata.org/data/nmdc:mga0h9dt75/annotation/nmdc_mga0h9dt75_ec.tsv", + "md5_checksum": "4f88c89459f36655eb7c1eceec19602a", + "id": "nmdc:4f88c89459f36655eb7c1eceec19602a", + "file_size_bytes": 3982918 + }, + { + "name": "Gp0115663_COG GFF file", + "description": "COG GFF file for Gp0115663", + "url": "https://data.microbiomedata.org/data/nmdc:mga0h9dt75/annotation/nmdc_mga0h9dt75_cog.gff", + "md5_checksum": "a068b9ce6ebb7deb15ff932b513817a9", + "id": "nmdc:a068b9ce6ebb7deb15ff932b513817a9", + "file_size_bytes": 27362917 + }, + { + "name": "Gp0115663_PFAM GFF file", + "description": "PFAM GFF file for Gp0115663", + "url": "https://data.microbiomedata.org/data/nmdc:mga0h9dt75/annotation/nmdc_mga0h9dt75_pfam.gff", + "md5_checksum": "618b18fa8635c80cc0091371f451a6f0", + "id": "nmdc:618b18fa8635c80cc0091371f451a6f0", + "file_size_bytes": 21572048 + }, + { + "name": "Gp0115663_TigrFam GFF file", + "description": "TigrFam GFF file for Gp0115663", + "url": "https://data.microbiomedata.org/data/nmdc:mga0h9dt75/annotation/nmdc_mga0h9dt75_tigrfam.gff", + "md5_checksum": "17e55a1a1a133ffbf8cbe4024d997a6f", + "id": "nmdc:17e55a1a1a133ffbf8cbe4024d997a6f", + "file_size_bytes": 2900068 + }, + { + "name": "Gp0115663_SMART GFF file", + "description": "SMART GFF file for Gp0115663", + "url": "https://data.microbiomedata.org/data/nmdc:mga0h9dt75/annotation/nmdc_mga0h9dt75_smart.gff", + "md5_checksum": "8f80142c0f5723af5a3b44b7ff4e4339", + "id": "nmdc:8f80142c0f5723af5a3b44b7ff4e4339", + "file_size_bytes": 6905519 + }, + { + "name": "Gp0115663_SuperFam GFF file", + "description": "SuperFam GFF file for Gp0115663", + "url": "https://data.microbiomedata.org/data/nmdc:mga0h9dt75/annotation/nmdc_mga0h9dt75_supfam.gff", + "md5_checksum": "fdd2e8741ffef40db383674a10bb4d11", + "id": "nmdc:fdd2e8741ffef40db383674a10bb4d11", + "file_size_bytes": 38787856 + }, + { + "name": "Gp0115663_Cath FunFam GFF file", + "description": "Cath FunFam GFF file for Gp0115663", + "url": "https://data.microbiomedata.org/data/nmdc:mga0h9dt75/annotation/nmdc_mga0h9dt75_cath_funfam.gff", + "md5_checksum": "8eb49ac20a6c2721d6db227f4fb3356a", + "id": "nmdc:8eb49ac20a6c2721d6db227f4fb3356a", + "file_size_bytes": 30134783 + }, + { + "name": "Gp0115663_KO_EC GFF file", + "description": "KO_EC GFF file for Gp0115663", + "url": "https://data.microbiomedata.org/data/nmdc:mga0h9dt75/annotation/nmdc_mga0h9dt75_ko_ec.gff", + "md5_checksum": "75f481e0d98793cfb4f9508cb3e31622", + "id": "nmdc:75f481e0d98793cfb4f9508cb3e31622", + "file_size_bytes": 19194308 + }, + { + "name": "gold:Gp0452679_metabat2 bin checkm quality assessment result", + "description": "metabat2 bin checkm quality assessment result for gold:Gp0452679", + "data_object_type": "CheckM Statistics", + "url": "https://data.microbiomedata.org/data/nmdc:mga0fsjy84/MAGs/nmdc_mga0fsjy84_checkm_qa.out", + "md5_checksum": "d41d8cd98f00b204e9800998ecf8427e", + "id": "nmdc:d41d8cd98f00b204e9800998ecf8427e", + "file_size_bytes": 0 + }, + { + "name": "Gp0115663_tooShort (< 3kb) filtered contigs fasta file by metaBat2", + "description": "tooShort (< 3kb) filtered contigs fasta file by metaBat2 for Gp0115663", + "url": "https://data.microbiomedata.org/data/nmdc:mga0h9dt75/MAGs/nmdc_mga0h9dt75_bins.tooShort.fa", + "md5_checksum": "c092b018cb4652c4ca0620b37a4b3fad", + "id": "nmdc:c092b018cb4652c4ca0620b37a4b3fad", + "file_size_bytes": 70411007 + }, + { + "name": "Gp0115663_unbinned fasta file from metabat2", + "description": "unbinned fasta file from metabat2 for Gp0115663", + "url": "https://data.microbiomedata.org/data/nmdc:mga0h9dt75/MAGs/nmdc_mga0h9dt75_bins.unbinned.fa", + "md5_checksum": "70d7c8a307f47adb05056bee1b01f9d4", + "id": "nmdc:70d7c8a307f47adb05056bee1b01f9d4", + "file_size_bytes": 15998690 + }, + { + "name": "Gp0115663_metabat2 bin checkm quality assessment result", + "description": "metabat2 bin checkm quality assessment result for Gp0115663", + "data_object_type": "CheckM Statistics", + "url": "https://data.microbiomedata.org/data/nmdc:mga0h9dt75/MAGs/nmdc_mga0h9dt75_checkm_qa.out", + "md5_checksum": "4545ab2039ae70f4439a93316f4fb7bc", + "id": "nmdc:4545ab2039ae70f4439a93316f4fb7bc", + "file_size_bytes": 1530 + }, + { + "name": "Gp0115663_high-quality and medium-quality bins", + "description": "high-quality and medium-quality bins for Gp0115663", + "data_object_type": "Metagenome Bins", + "url": "https://data.microbiomedata.org/data/nmdc:mga0h9dt75/MAGs/nmdc_mga0h9dt75_hqmq_bin.zip", + "md5_checksum": "280b63ae1cc1fa8d6154a0681d47c399", + "id": "nmdc:280b63ae1cc1fa8d6154a0681d47c399", + "file_size_bytes": 182 + }, + { + "name": "Gp0115663_metabat2 bins", + "description": "metabat2 bins for Gp0115663", + "url": "https://data.microbiomedata.org/data/nmdc:mga0h9dt75/MAGs/nmdc_mga0h9dt75_metabat_bin.zip", + "md5_checksum": "27c07072f175571200b5931550adb8aa", + "id": "nmdc:27c07072f175571200b5931550adb8aa", + "file_size_bytes": 1114314 + }, + { + "_id": { + "$oid": "649b003c1ae706d7b5b14d5a" + }, + "description": "Assembled scaffold fasta for gold:Gp0115663", + "url": "https://data.microbiomedata.org/data/1781_86101/assembly/assembly_scaffolds.fna", + "file_size_bytes": 88756490, + "type": "nmdc:DataObject", + "id": "nmdc:321a497bc1c3cf25affc8e659b746ba5", + "name": "assembly_scaffolds.fna", + "data_object_type": "Assembly Scaffolds" + }, + { + "_id": { + "$oid": "649b003c1ae706d7b5b14d5d" + }, + "description": "Metagenome Contig Coverage Stats for gold:Gp0115663", + "url": "https://data.microbiomedata.org/data/1781_86101/assembly/mapping_stats.txt", + "file_size_bytes": 12563453, + "type": "nmdc:DataObject", + "id": "nmdc:ad47215b9b079c1d94a8fc56385dee36", + "name": "mapping_stats.txt", + "data_object_type": "Assembly Coverage Stats" + }, + { + "_id": { + "$oid": "649b003c1ae706d7b5b14d5f" + }, + "description": "Assembled contigs fasta for gold:Gp0115663", + "url": "https://data.microbiomedata.org/data/1781_86101/assembly/assembly_contigs.fna", + "file_size_bytes": 89266921, + "type": "nmdc:DataObject", + "id": "nmdc:0a3d00715d01ad7b8f3aee59b674dfe9", + "name": "assembly_contigs.fna", + "data_object_type": "Assembly Contigs" + }, + { + "_id": { + "$oid": "649b003c1ae706d7b5b14d61" + }, + "description": "Assembled AGP file for gold:Gp0115663", + "url": "https://data.microbiomedata.org/data/1781_86101/assembly/assembly.agp", + "file_size_bytes": 10842941, + "type": "nmdc:DataObject", + "id": "nmdc:bc01f0f507c9dac65d8a8e40e41a8c48", + "name": "assembly.agp", + "data_object_type": "Assembly AGP" + }, + { + "_id": { + "$oid": "649b003c1ae706d7b5b14d63" + }, + "description": "Metagenome Alignment BAM file for gold:Gp0115663", + "url": "https://data.microbiomedata.org/data/1781_86101/assembly/pairedMapped_sorted.bam", + "file_size_bytes": 2729039400, + "type": "nmdc:DataObject", + "id": "nmdc:668d207be5ea844f988fbfb2813564cc", + "name": "pairedMapped_sorted.bam", + "data_object_type": "Assembly Coverage BAM" + }, + { + "_id": { + "$oid": "649b003d1ae706d7b5b15979" + }, + "id": "nmdc:9509adb5a013006dfda9754429cfc968", + "name": "1781_86101.krona.html", + "description": "Gold:Gp0115663 KRONA plot HTML file", + "url": "https://data.microbiomedata.org/1781_86101/ReadbasedAnalysis/centrifuge/1781_86101.krona.html", + "file_size_bytes": 3385, + "data_object_type": "Centrifuge Krona Plot", + "type": "nmdc:DataObject" + }, + { + "_id": { + "$oid": "649b003d1ae706d7b5b15980" + }, + "id": "nmdc:6dea4c58f402b5c3935e8f1a545bec47", + "name": "1781_86101.json", + "description": "Gold:Gp0115663 ReadbasedAnalysis result JSON file", + "url": "https://data.microbiomedata.org/1781_86101/ReadbasedAnalysis/1781_86101.json", + "file_size_bytes": 3385, + "type": "nmdc:DataObject" + }, + { + "_id": { + "$oid": "649b003f1ae706d7b5b16253" + }, + "id": "nmdc:0c4c875e5b10c6b742c14c22e2926751", + "name": "bins.tooShort.fa", + "description": "tooShort (< 3kb) filtered contigs fasta file by metaBat2 for gold:Gp0115663", + "file_size_bytes": 68423774, + "url": "https://data.microbiomedata.org/data/1781_86101/img_MAGs/bins.tooShort.fa", + "type": "nmdc:DataObject" + }, + { + "_id": { + "$oid": "649b003f1ae706d7b5b16254" + }, + "id": "nmdc:c55d6b00aa5d4af8cd46d349e17d4127", + "name": "bins.unbinned.fa", + "description": "unbinned fasta file from metabat2 for gold:Gp0115663", + "file_size_bytes": 16857267, + "url": "https://data.microbiomedata.org/data/1781_86101/img_MAGs/bins.unbinned.fa", + "type": "nmdc:DataObject" + }, + { + "_id": { + "$oid": "649b003f1ae706d7b5b16256" + }, + "id": "nmdc:1346fe25b6ff22180eb3a51204e0b1fc", + "name": "gold:Gp0115663.bins.1.fa", + "description": "metabat2 binned contig file for gold:Gp0115663", + "file_size_bytes": 224772, + "url": "https://data.microbiomedata.org/data/1781_86101/img_MAGs/metabat-bins/bins.1.fa", + "type": "nmdc:DataObject", + "data_object_type": "Metagenome Bins" + }, + { + "_id": { + "$oid": "649b003f1ae706d7b5b16258" + }, + "id": "nmdc:818f5a47d1371295f9313909ea12eb50", + "name": "checkm_qa.out", + "description": "metabat2 bin checkm quality assessment result for gold:Gp0115663", + "file_size_bytes": 1141, + "url": "https://data.microbiomedata.org/data/1781_86101/img_MAGs/checkm_qa.out", + "type": "nmdc:DataObject", + "data_object_type": "CheckM Statistics" + }, + { + "_id": { + "$oid": "649b003f1ae706d7b5b16259" + }, + "id": "nmdc:a755bb87aded36aefbd8022506a793c7", + "name": "gold:Gp0115663.bins.2.fa", + "description": "metabat2 binned contig file for gold:Gp0115663", + "file_size_bytes": 2225340, + "url": "https://data.microbiomedata.org/data/1781_86101/img_MAGs/metabat-bins/bins.2.fa", + "type": "nmdc:DataObject", + "data_object_type": "Metagenome Bins" + }, + { + "_id": { + "$oid": "649b003f1ae706d7b5b1625a" + }, + "id": "nmdc:e0b7421514f976cb7ad8c343cf3077a9", + "name": "gold:Gp0115663.bins.3.fa", + "description": "metabat2 binned contig file for gold:Gp0115663", + "file_size_bytes": 288873, + "url": "https://data.microbiomedata.org/data/1781_86101/img_MAGs/metabat-bins/bins.3.fa", + "type": "nmdc:DataObject", + "data_object_type": "Metagenome Bins" + }, + { + "_id": { + "$oid": "649b00401ae706d7b5b16d91" + }, + "description": "KO TSV File for gold:Gp0115663", + "url": "https://data.microbiomedata.org/1781_86101/img_annotation/Ga0482264_ko.tsv", + "md5_checksum": "8d250650c90956edff8bafccc56fd630", + "file_size_bytes": 3385, + "id": "nmdc:8d250650c90956edff8bafccc56fd630", + "name": "gold:Gp0115663_KO TSV File", + "data_object_type": "Annotation KEGG Orthology", + "type": "nmdc:DataObject" + }, + { + "_id": { + "$oid": "649b00401ae706d7b5b16d94" + }, + "description": "Functional annotation GFF file for gold:Gp0115663", + "url": "https://data.microbiomedata.org/1781_86101/img_annotation/Ga0482264_functional_annotation.gff", + "md5_checksum": "b7e9c8d0bffdd13ace6f862a61fa87d2", + "file_size_bytes": 3385, + "id": "nmdc:b7e9c8d0bffdd13ace6f862a61fa87d2", + "name": "gold:Gp0115663_Functional annotation GFF file", + "data_object_type": "Functional Annotation GFF", + "type": "nmdc:DataObject" + }, + { + "_id": { + "$oid": "649b00401ae706d7b5b16d95" + }, + "description": "Protein FAA for gold:Gp0115663", + "url": "https://data.microbiomedata.org/1781_86101/img_annotation/Ga0482264_proteins.faa", + "md5_checksum": "754074d3bcade65aba2a6f8236619ab7", + "file_size_bytes": 3385, + "id": "nmdc:754074d3bcade65aba2a6f8236619ab7", + "name": "gold:Gp0115663_Protein FAA", + "data_object_type": "Annotation Amino Acid FASTA", + "type": "nmdc:DataObject" + }, + { + "_id": { + "$oid": "649b00401ae706d7b5b16d97" + }, + "description": "Structural annotation GFF file for gold:Gp0115663", + "url": "https://data.microbiomedata.org/1781_86101/img_annotation/Ga0482264_structural_annotation.gff", + "md5_checksum": "a4b4c623457aa10161d88a9ac4eef522", + "file_size_bytes": 3385, + "id": "nmdc:a4b4c623457aa10161d88a9ac4eef522", + "name": "gold:Gp0115663_Structural annotation GFF file", + "data_object_type": "Structural Annotation GFF", + "type": "nmdc:DataObject" + }, + { + "_id": { + "$oid": "649b00401ae706d7b5b16da8" + }, + "description": "EC TSV File for gold:Gp0115663", + "url": "https://data.microbiomedata.org/1781_86101/img_annotation/Ga0482264_ec.tsv", + "md5_checksum": "27319f58c616a07159e1fac12635bd4b", + "file_size_bytes": 3385, + "id": "nmdc:27319f58c616a07159e1fac12635bd4b", + "name": "gold:Gp0115663_EC TSV File", + "data_object_type": "Annotation Enzyme Commission", + "type": "nmdc:DataObject" + } + ], + "mags_activity_set": [ + { + "_id": { + "$oid": "649b0052ec087f6bbab34734" + }, + "has_input": [ + "nmdc:deddd162bf0128fba13b3bc1ca38d1aa", + "nmdc:31dc958d116d02122509e90b0883954f", + "nmdc:002e4ebc728f8b91cb5f298d340ab013" + ], + "too_short_contig_num": 159810, + "part_of": [ + "nmdc:mga0h9dt75" + ], + "binned_contig_num": 684, + "git_url": "https://github.com/microbiomedata/metaG/releases/tag/0.1", + "has_output": [ + "nmdc:d41d8cd98f00b204e9800998ecf8427e", + "nmdc:c092b018cb4652c4ca0620b37a4b3fad", + "nmdc:70d7c8a307f47adb05056bee1b01f9d4", + "nmdc:4545ab2039ae70f4439a93316f4fb7bc", + "nmdc:280b63ae1cc1fa8d6154a0681d47c399", + "nmdc:27c07072f175571200b5931550adb8aa" + ], + "was_informed_by": "gold:Gp0115663", + "input_contig_num": 169782, + "id": "nmdc:b31abf9d7fe53e2f802bb53e2d13542b", + "execution_resource": "NERSC-Cori", + "name": "MAGs Analysis Activity for nmdc:mga0h9dt75", + "mags_list": [ + { + "number_of_contig": 61, + "completeness": 13.82, + "bin_name": "bins.1", + "gene_count": 294, + "bin_quality": "LQ", + "gtdbtk_species": "", + "gtdbtk_order": "", + "num_16s": 1, + "gtdbtk_family": "", + "gtdbtk_domain": "", + "contamination": 0.62, + "gtdbtk_class": "", + "gtdbtk_phylum": "", + "num_5s": 1, + "num_23s": 0, + "gtdbtk_genus": "", + "num_t_rna": 0 + }, + { + "number_of_contig": 485, + "completeness": 66.03, + "bin_name": "bins.2", + "gene_count": 2871, + "bin_quality": "LQ", + "gtdbtk_species": "", + "gtdbtk_order": "", + "num_16s": 0, + "gtdbtk_family": "", + "gtdbtk_domain": "", + "contamination": 10.87, + "gtdbtk_class": "", + "gtdbtk_phylum": "", + "num_5s": 1, + "num_23s": 0, + "gtdbtk_genus": "", + "num_t_rna": 32 + }, + { + "number_of_contig": 56, + "completeness": 34.23, + "bin_name": "bins.3", + "gene_count": 337, + "bin_quality": "LQ", + "gtdbtk_species": "", + "gtdbtk_order": "", + "num_16s": 0, + "gtdbtk_family": "", + "gtdbtk_domain": "", + "contamination": 0.0, + "gtdbtk_class": "", + "gtdbtk_phylum": "", + "num_5s": 0, + "num_23s": 0, + "gtdbtk_genus": "", + "num_t_rna": 9 + }, + { + "number_of_contig": 63, + "completeness": 6.9, + "bin_name": "bins.4", + "gene_count": 276, + "bin_quality": "LQ", + "gtdbtk_species": "", + "gtdbtk_order": "", + "num_16s": 0, + "gtdbtk_family": "", + "gtdbtk_domain": "", + "contamination": 0.0, + "gtdbtk_class": "", + "gtdbtk_phylum": "", + "num_5s": 0, + "num_23s": 0, + "gtdbtk_genus": "", + "num_t_rna": 6 + }, + { + "number_of_contig": 19, + "completeness": 4.45, + "bin_name": "bins.5", + "gene_count": 463, + "bin_quality": "LQ", + "gtdbtk_species": "", + "gtdbtk_order": "", + "num_16s": 0, + "gtdbtk_family": "", + "gtdbtk_domain": "", + "contamination": 0.0, + "gtdbtk_class": "", + "gtdbtk_phylum": "", + "num_5s": 0, + "num_23s": 0, + "gtdbtk_genus": "", + "num_t_rna": 4 + } + ], + "unbinned_contig_num": 9288, + "started_at_time": "2021-10-11T02:28:26Z", + "type": "nmdc:MAGsAnalysisActivity", + "ended_at_time": "2021-10-11T04:56:04+00:00" + } + ], + "metagenome_annotation_activity_set": [ + { + "_id": { + "$oid": "649b005bbf2caae0415ef9d6" + }, + "has_input": [ + "nmdc:deddd162bf0128fba13b3bc1ca38d1aa" + ], + "part_of": [ + "nmdc:mga0h9dt75" + ], + "git_url": "https://github.com/microbiomedata/metaG/releases/tag/0.1", + "has_output": [ + "nmdc:879988d212ecec46928b8598e2f8391f", + "nmdc:884b95102f5965cc0ee2d9b7f198e5a4", + "nmdc:002e4ebc728f8b91cb5f298d340ab013", + "nmdc:6851078f29716d89e3f41f0969ae7bf0", + "nmdc:4f88c89459f36655eb7c1eceec19602a", + "nmdc:a068b9ce6ebb7deb15ff932b513817a9", + "nmdc:618b18fa8635c80cc0091371f451a6f0", + "nmdc:17e55a1a1a133ffbf8cbe4024d997a6f", + "nmdc:8f80142c0f5723af5a3b44b7ff4e4339", + "nmdc:fdd2e8741ffef40db383674a10bb4d11", + "nmdc:8eb49ac20a6c2721d6db227f4fb3356a", + "nmdc:75f481e0d98793cfb4f9508cb3e31622" + ], + "was_informed_by": "gold:Gp0115663", + "id": "nmdc:b31abf9d7fe53e2f802bb53e2d13542b", + "execution_resource": "NERSC-Cori", + "name": "Annotation Activity for nmdc:mga0h9dt75", + "started_at_time": "2021-10-11T02:28:26Z", + "type": "nmdc:MetagenomeAnnotationActivity", + "ended_at_time": "2021-10-11T04:56:04+00:00" + } + ], + "metagenome_assembly_set": [ + { + "_id": { + "$oid": "649b005f2ca5ee4adb139fb9" + }, + "has_input": [ + "nmdc:7bf778baef033d36f118f8591256d6ef" + ], + "part_of": [ + "nmdc:mga0h9dt75" + ], + "ctg_logsum": 214373, + "scaf_logsum": 215363, + "gap_pct": 0.00188, + "git_url": "https://github.com/microbiomedata/metaG/releases/tag/0.1", + "has_output": [ + "nmdc:deddd162bf0128fba13b3bc1ca38d1aa", + "nmdc:b3573e3cda5a06611de71ca04c5c14cc", + "nmdc:c6d0d4cea985ca6fb50a060e15b4a856", + "nmdc:f450e3800e17691d5874c89fc46c186a", + "nmdc:31dc958d116d02122509e90b0883954f" + ], + "asm_score": 6.577, + "was_informed_by": "gold:Gp0115663", + "ctg_powsum": 24284, + "scaf_max": 68135, + "id": "nmdc:b31abf9d7fe53e2f802bb53e2d13542b", + "scaf_powsum": 24422, + "execution_resource": "NERSC-Cori", + "contigs": 169784, + "name": "Assembly Activity for nmdc:mga0h9dt75", + "ctg_max": 68135, + "gc_std": 0.11726, + "contig_bp": 83494920, + "gc_avg": 0.46001, + "started_at_time": "2021-10-11T02:28:26Z", + "scaf_bp": 83496490, + "type": "nmdc:MetagenomeAssembly", + "scaffolds": 169645, + "ended_at_time": "2021-10-11T04:56:04+00:00", + "ctg_l50": 470, + "ctg_l90": 290, + "ctg_n50": 45584, + "ctg_n90": 141996, + "scaf_l50": 470, + "scaf_l90": 290, + "scaf_n50": 45550, + "scaf_n90": 141870, + "scaf_l_gt50k": 68135, + "scaf_n_gt50k": 1, + "scaf_pct_gt50k": 0.08160224 + } + ], + "omics_processing_set": [ + { + "_id": { + "$oid": "649b009773e8249959349b33" + }, + "id": "nmdc:omprc-11-bn8jcq58", + "name": "Sand microcosm microbial communities from a hyporheic zone in Columbia River, Washington, USA - GW-RW T2_23-Sept-14", + "description": "Sterilized sand packs were incubated back in the ground and collected at time point T2.", + "has_input": [ + "nmdc:bsm-11-qq8s6x03" + ], + "has_output": [ + "jgi:55d740280d8785342fcf7e39" + ], + "part_of": [ + "nmdc:sty-11-aygzgv51" + ], + "add_date": "2015-05-28", + "mod_date": "2021-06-15", + "ncbi_project_name": "Sand microcosm microbial communities from a hyporheic zone in Columbia River, Washington, USA - GW-RW T2_23-Sept-14", + "omics_type": { + "has_raw_value": "Metagenome" + }, + "principal_investigator": { + "has_raw_value": "James Stegen" + }, + "processing_institution": "JGI", + "type": "nmdc:OmicsProcessing", + "gold_sequencing_project_identifiers": [ + "gold:Gp0115663" + ] + } + ], + "read_qc_analysis_activity_set": [ + { + "_id": { + "$oid": "649b009d6bdd4fd20273c88b" + }, + "has_input": [ + "nmdc:30a06664f29cffbbbc49abad86eae6fc" + ], + "part_of": [ + "nmdc:mga0h9dt75" + ], + "git_url": "https://github.com/microbiomedata/metaG/releases/tag/0.1", + "has_output": [ + "nmdc:7bf778baef033d36f118f8591256d6ef", + "nmdc:b99ce8adc125c95f0bfdadf36a3f6848" + ], + "was_informed_by": "gold:Gp0115663", + "input_read_count": 32238374, + "output_read_bases": 4608772924, + "id": "nmdc:b31abf9d7fe53e2f802bb53e2d13542b", + "execution_resource": "NERSC-Cori", + "input_read_bases": 4867994474, + "name": "Read QC Activity for nmdc:mga0h9dt75", + "output_read_count": 30774080, + "started_at_time": "2021-10-11T02:28:26Z", + "type": "nmdc:ReadQCAnalysisActivity", + "ended_at_time": "2021-10-11T04:56:04+00:00" + } + ], + "read_based_taxonomy_analysis_activity_set": [ + { + "_id": { + "$oid": "649b009bff710ae353f8cf4f" + }, + "has_input": [ + "nmdc:7bf778baef033d36f118f8591256d6ef" + ], + "git_url": "https://github.com/microbiomedata/metaG/releases/tag/0.1", + "has_output": [ + "nmdc:bc7c1bda004aab357c8f6cf5a42242f9", + "nmdc:9481434cadd0d6c154e2ec4c11ef0e04", + "nmdc:6b5bc6ce7f11c1336a5f85a98fc18541", + "nmdc:933c71bbc2f4a2e84d50f0d3864cf940", + "nmdc:1a208e2519770ef50740ac39f1b9ba9a", + "nmdc:f112a3840464ae7a9cf4a3bf295edd5c", + "nmdc:7ca01ea379f0baed96f87d1435925f95", + "nmdc:c85f2f2b4a518c4adb23970448a5cb45", + "nmdc:94ee1bc2dc74830a21d5c3471d6cf223" + ], + "was_informed_by": "gold:Gp0115663", + "id": "nmdc:b31abf9d7fe53e2f802bb53e2d13542b", + "execution_resource": "NERSC-Cori", + "name": "ReadBased Analysis Activity for nmdc:mga0h9dt75", + "started_at_time": "2021-10-11T02:28:26Z", + "type": "nmdc:ReadbasedAnalysis", + "ended_at_time": "2021-10-11T04:56:04+00:00" + } + ] + } +] \ No newline at end of file From dcda603c7f427a62fc20ce5b2ae88b6d8a128cdc Mon Sep 17 00:00:00 2001 From: Michael Thornton Date: Wed, 15 Nov 2023 09:59:05 -0800 Subject: [PATCH 62/91] local input files for dryrun --- .../assembly/nmdc_mga0h9dt75_assembly.agp | 0 .../assembly/nmdc_mga0h9dt75_contigs.fna | 2 + .../assembly/nmdc_mga0h9dt75_covstats.txt | 0 .../assembly/nmdc_mga0h9dt75_scaffolds.fna | 2 + .../data/dryrun_re_ided_record_dump.json | 1015 ++--------------- 5 files changed, 91 insertions(+), 928 deletions(-) create mode 100644 nmdc_automation/re_iding/scripts/data/dryrun_data/results/nmdc:mga0h9dt75/assembly/nmdc_mga0h9dt75_assembly.agp create mode 100644 nmdc_automation/re_iding/scripts/data/dryrun_data/results/nmdc:mga0h9dt75/assembly/nmdc_mga0h9dt75_contigs.fna create mode 100644 nmdc_automation/re_iding/scripts/data/dryrun_data/results/nmdc:mga0h9dt75/assembly/nmdc_mga0h9dt75_covstats.txt create mode 100644 nmdc_automation/re_iding/scripts/data/dryrun_data/results/nmdc:mga0h9dt75/assembly/nmdc_mga0h9dt75_scaffolds.fna diff --git a/nmdc_automation/re_iding/scripts/data/dryrun_data/results/nmdc:mga0h9dt75/assembly/nmdc_mga0h9dt75_assembly.agp b/nmdc_automation/re_iding/scripts/data/dryrun_data/results/nmdc:mga0h9dt75/assembly/nmdc_mga0h9dt75_assembly.agp new file mode 100644 index 00000000..e69de29b diff --git a/nmdc_automation/re_iding/scripts/data/dryrun_data/results/nmdc:mga0h9dt75/assembly/nmdc_mga0h9dt75_contigs.fna b/nmdc_automation/re_iding/scripts/data/dryrun_data/results/nmdc:mga0h9dt75/assembly/nmdc_mga0h9dt75_contigs.fna new file mode 100644 index 00000000..aae2f9ae --- /dev/null +++ b/nmdc_automation/re_iding/scripts/data/dryrun_data/results/nmdc:mga0h9dt75/assembly/nmdc_mga0h9dt75_contigs.fna @@ -0,0 +1,2 @@ +>Contig_0001 + diff --git a/nmdc_automation/re_iding/scripts/data/dryrun_data/results/nmdc:mga0h9dt75/assembly/nmdc_mga0h9dt75_covstats.txt b/nmdc_automation/re_iding/scripts/data/dryrun_data/results/nmdc:mga0h9dt75/assembly/nmdc_mga0h9dt75_covstats.txt new file mode 100644 index 00000000..e69de29b diff --git a/nmdc_automation/re_iding/scripts/data/dryrun_data/results/nmdc:mga0h9dt75/assembly/nmdc_mga0h9dt75_scaffolds.fna b/nmdc_automation/re_iding/scripts/data/dryrun_data/results/nmdc:mga0h9dt75/assembly/nmdc_mga0h9dt75_scaffolds.fna new file mode 100644 index 00000000..0f2c7e5b --- /dev/null +++ b/nmdc_automation/re_iding/scripts/data/dryrun_data/results/nmdc:mga0h9dt75/assembly/nmdc_mga0h9dt75_scaffolds.fna @@ -0,0 +1,2 @@ +>sequenceID-001 description +ACGT diff --git a/nmdc_automation/re_iding/scripts/data/dryrun_re_ided_record_dump.json b/nmdc_automation/re_iding/scripts/data/dryrun_re_ided_record_dump.json index 74e29a57..ad00210f 100644 --- a/nmdc_automation/re_iding/scripts/data/dryrun_re_ided_record_dump.json +++ b/nmdc_automation/re_iding/scripts/data/dryrun_re_ided_record_dump.json @@ -1,950 +1,193 @@ [ { - "functional_annotation_agg": [], - "library_preparation_set": [], - "processed_sample_set": [], - "extraction_set": [], - "activity_set": [], - "biosample_set": [], "data_object_set": [ { - "id": "nmdc:dobj-11-bgp6z123", + "id": "nmdc:dobj-11-qadypq22", "name": "9422.8.132674.GTTTCG.fastq.gz", "description": "Raw sequencer read data", - "alternative_identifiers": [], "file_size_bytes": 2861414297, - "md5_checksum": null, - "data_object_type": null, - "compression_type": null, - "was_generated_by": null, - "url": null, "type": "nmdc:DataObject" }, { - "id": "nmdc:dobj-11-b20kzg40", + "id": "nmdc:dobj-11-r766y759", "name": "Reads QC result fastq (clean data)", "description": "Filtered Reads for nmdc:omprc-11-bn8jcq58", - "alternative_identifiers": [], "file_size_bytes": 2571324879, "md5_checksum": "7bf778baef033d36f118f8591256d6ef", - "data_object_type": { - "_code": { - "text": "Filtered Sequencing Reads", - "description": "Reads QC result fastq (clean data)", - "meaning": null, - "unit": null, - "is_a": null, - "mixins": [], - "extensions": {}, - "annotations": {}, - "alt_descriptions": {}, - "title": null, - "deprecated": null, - "todos": [], - "notes": [], - "comments": [], - "examples": [], - "in_subset": [], - "from_schema": null, - "imported_from": null, - "source": null, - "in_language": null, - "see_also": [], - "deprecated_element_has_exact_replacement": null, - "deprecated_element_has_possible_replacement": null, - "aliases": [], - "structured_aliases": {}, - "mappings": [], - "exact_mappings": [], - "close_mappings": [], - "related_mappings": [], - "narrow_mappings": [], - "broad_mappings": [], - "created_by": null, - "contributors": [], - "created_on": null, - "last_updated_on": null, - "modified_by": null, - "status": null, - "rank": null, - "categories": [], - "keywords": [] - } - }, - "compression_type": null, - "was_generated_by": null, - "url": "/global/cfs/cdirs/m3408/results/nmdc:omprc-11-bn8jcq58/nmdc:wfrqc-11-5sfwhy50/nmdc_wfrqc-11-5sfwhy50_filtered.fastq.gz", + "data_object_type": "Filtered Sequencing Reads", + "url": "./data/dryrun_data/results/nmdc:omprc-11-bn8jcq58/nmdc:wfrqc-11-vap10912/nmdc_wfrqc-11-vap10912_filtered.fastq.gz", "type": "nmdc:Data_Object" }, { - "id": "nmdc:dobj-11-mhccey92", + "id": "nmdc:dobj-11-k0bzwg32", "name": "Reads QC summary statistics", "description": "Filtered Stats for nmdc:omprc-11-bn8jcq58", - "alternative_identifiers": [], "file_size_bytes": 290, "md5_checksum": "b99ce8adc125c95f0bfdadf36a3f6848", - "data_object_type": { - "_code": { - "text": "QC Statistics", - "description": "Reads QC summary statistics", - "meaning": null, - "unit": null, - "is_a": null, - "mixins": [], - "extensions": {}, - "annotations": {}, - "alt_descriptions": {}, - "title": null, - "deprecated": null, - "todos": [], - "notes": [], - "comments": [], - "examples": [], - "in_subset": [], - "from_schema": null, - "imported_from": null, - "source": null, - "in_language": null, - "see_also": [], - "deprecated_element_has_exact_replacement": null, - "deprecated_element_has_possible_replacement": null, - "aliases": [], - "structured_aliases": {}, - "mappings": [], - "exact_mappings": [], - "close_mappings": [], - "related_mappings": [], - "narrow_mappings": [], - "broad_mappings": [], - "created_by": null, - "contributors": [], - "created_on": null, - "last_updated_on": null, - "modified_by": null, - "status": null, - "rank": null, - "categories": [], - "keywords": [] - } - }, - "compression_type": null, - "was_generated_by": null, - "url": "/global/cfs/cdirs/m3408/results/nmdc:omprc-11-bn8jcq58/nmdc:wfrqc-11-5sfwhy50/nmdc_wfrqc-11-5sfwhy50_filterStats.txt", + "data_object_type": "QC Statistics", + "url": "./data/dryrun_data/results/nmdc:omprc-11-bn8jcq58/nmdc:wfrqc-11-vap10912/nmdc_wfrqc-11-vap10912_filterStats.txt", "type": "nmdc:Data_Object" }, { - "id": "nmdc:dobj-11-b27rkd90", + "id": "nmdc:dobj-11-1dqfzb38", "name": "Final assembly contigs fasta", "description": "Assembled contigs fasta for nmdc:omprc-11-bn8jcq58", - "alternative_identifiers": [], - "file_size_bytes": 90794959, - "md5_checksum": "1a2ade31cf1edad5430ef0939d94ac43", - "data_object_type": { - "_code": { - "text": "Assembly Contigs", - "description": "Final assembly contigs fasta", - "meaning": null, - "unit": null, - "is_a": null, - "mixins": [], - "extensions": {}, - "annotations": {}, - "alt_descriptions": {}, - "title": null, - "deprecated": null, - "todos": [], - "notes": [], - "comments": [], - "examples": [], - "in_subset": [], - "from_schema": null, - "imported_from": null, - "source": null, - "in_language": null, - "see_also": [], - "deprecated_element_has_exact_replacement": null, - "deprecated_element_has_possible_replacement": null, - "aliases": [], - "structured_aliases": {}, - "mappings": [], - "exact_mappings": [], - "close_mappings": [], - "related_mappings": [], - "narrow_mappings": [], - "broad_mappings": [], - "created_by": null, - "contributors": [], - "created_on": null, - "last_updated_on": null, - "modified_by": null, - "status": null, - "rank": null, - "categories": [], - "keywords": [] - } - }, - "compression_type": null, - "was_generated_by": null, - "url": "/global/cfs/cdirs/m3408/results/nmdc:omprc-11-bn8jcq58/nmdc:wfmgas-11-xc17b248/nmdc_wfmgas-11-xc17b248_contigs.fna", + "file_size_bytes": 336, + "md5_checksum": "400d8dfa58189f35a3e754526470bf66", + "data_object_type": "Assembly Contigs", + "url": "./data/dryrun_data/results/nmdc:omprc-11-bn8jcq58/nmdc:wfmgas-11-7btyf478/nmdc_wfmgas-11-7btyf478_contigs.fna", "type": "nmdc:Data_Object" }, { - "id": "nmdc:dobj-11-tcg9he98", + "id": "nmdc:dobj-11-9e1jhw75", "name": "Final assembly scaffolds fasta", "description": "Assembled scaffold fasta for nmdc:omprc-11-bn8jcq58", - "alternative_identifiers": [], - "file_size_bytes": 90283295, - "md5_checksum": "642e3bae5809fa7a2f0be592e0693a10", - "data_object_type": { - "_code": { - "text": "Assembly Scaffolds", - "description": "Final assembly scaffolds fasta", - "meaning": null, - "unit": null, - "is_a": null, - "mixins": [], - "extensions": {}, - "annotations": {}, - "alt_descriptions": {}, - "title": null, - "deprecated": null, - "todos": [], - "notes": [], - "comments": [], - "examples": [], - "in_subset": [], - "from_schema": null, - "imported_from": null, - "source": null, - "in_language": null, - "see_also": [], - "deprecated_element_has_exact_replacement": null, - "deprecated_element_has_possible_replacement": null, - "aliases": [], - "structured_aliases": {}, - "mappings": [], - "exact_mappings": [], - "close_mappings": [], - "related_mappings": [], - "narrow_mappings": [], - "broad_mappings": [], - "created_by": null, - "contributors": [], - "created_on": null, - "last_updated_on": null, - "modified_by": null, - "status": null, - "rank": null, - "categories": [], - "keywords": [] - } - }, - "compression_type": null, - "was_generated_by": null, - "url": "/global/cfs/cdirs/m3408/results/nmdc:omprc-11-bn8jcq58/nmdc:wfmgas-11-xc17b248/nmdc_wfmgas-11-xc17b248_scaffolds.fna", + "file_size_bytes": 700, + "md5_checksum": "37c3fbca7955c4095feeada8325a2dc5", + "data_object_type": "Assembly Scaffolds", + "url": "./data/dryrun_data/results/nmdc:omprc-11-bn8jcq58/nmdc:wfmgas-11-7btyf478/nmdc_wfmgas-11-7btyf478_scaffolds.fna", "type": "nmdc:Data_Object" }, { - "id": "nmdc:dobj-11-z5f6jq78", + "id": "nmdc:dobj-11-7xk5gf96", "name": "Assembled contigs coverage information", "description": "Metagenome Contig Coverage Stats for nmdc:omprc-11-bn8jcq58", - "alternative_identifiers": [], - "file_size_bytes": 14091491, - "md5_checksum": "298156a0eddb0aa59ea3e312406fb56f", - "data_object_type": { - "_code": { - "text": "Assembly Coverage Stats", - "description": "Assembled contigs coverage information", - "meaning": null, - "unit": null, - "is_a": null, - "mixins": [], - "extensions": {}, - "annotations": {}, - "alt_descriptions": {}, - "title": null, - "deprecated": null, - "todos": [], - "notes": [], - "comments": [], - "examples": [], - "in_subset": [], - "from_schema": null, - "imported_from": null, - "source": null, - "in_language": null, - "see_also": [], - "deprecated_element_has_exact_replacement": null, - "deprecated_element_has_possible_replacement": null, - "aliases": [], - "structured_aliases": {}, - "mappings": [], - "exact_mappings": [], - "close_mappings": [], - "related_mappings": [], - "narrow_mappings": [], - "broad_mappings": [], - "created_by": null, - "contributors": [], - "created_on": null, - "last_updated_on": null, - "modified_by": null, - "status": null, - "rank": null, - "categories": [], - "keywords": [] - } - }, - "compression_type": null, - "was_generated_by": null, - "url": "/global/cfs/cdirs/m3408/results/nmdc:omprc-11-bn8jcq58/nmdc:wfmgas-11-xc17b248/nmdc_wfmgas-11-xc17b248_covstats.txt", + "file_size_bytes": 0, + "md5_checksum": "d41d8cd98f00b204e9800998ecf8427e", + "data_object_type": "Assembly Coverage Stats", + "url": "./data/dryrun_data/results/nmdc:omprc-11-bn8jcq58/nmdc:wfmgas-11-7btyf478/nmdc_wfmgas-11-7btyf478_covstats.txt", "type": "nmdc:Data_Object" }, { - "id": "nmdc:dobj-11-kv8gp304", + "id": "nmdc:dobj-11-w351h143", "name": "An AGP format file that describes the assembly", "description": "Assembled AGP file for nmdc:omprc-11-bn8jcq58", - "alternative_identifiers": [], - "file_size_bytes": 13901555, - "md5_checksum": "c319c47a6c2f2469ecdcaf972abd8f2b", - "data_object_type": { - "_code": { - "text": "Assembly AGP", - "description": "An AGP format file that describes the assembly", - "meaning": null, - "unit": null, - "is_a": null, - "mixins": [], - "extensions": {}, - "annotations": {}, - "alt_descriptions": {}, - "title": null, - "deprecated": null, - "todos": [], - "notes": [], - "comments": [], - "examples": [], - "in_subset": [], - "from_schema": null, - "imported_from": null, - "source": null, - "in_language": null, - "see_also": [], - "deprecated_element_has_exact_replacement": null, - "deprecated_element_has_possible_replacement": null, - "aliases": [], - "structured_aliases": {}, - "mappings": [], - "exact_mappings": [], - "close_mappings": [], - "related_mappings": [], - "narrow_mappings": [], - "broad_mappings": [], - "created_by": null, - "contributors": [], - "created_on": null, - "last_updated_on": null, - "modified_by": null, - "status": null, - "rank": null, - "categories": [], - "keywords": [] - } - }, - "compression_type": null, - "was_generated_by": null, - "url": "/global/cfs/cdirs/m3408/results/nmdc:omprc-11-bn8jcq58/nmdc:wfmgas-11-xc17b248/nmdc_wfmgas-11-xc17b248_assembly.agp", + "file_size_bytes": 0, + "md5_checksum": "d41d8cd98f00b204e9800998ecf8427e", + "data_object_type": "Assembly AGP", + "url": "./data/dryrun_data/results/nmdc:omprc-11-bn8jcq58/nmdc:wfmgas-11-7btyf478/nmdc_wfmgas-11-7btyf478_assembly.agp", "type": "nmdc:Data_Object" }, { - "id": "nmdc:dobj-11-xkzgxw24", + "id": "nmdc:dobj-11-2jxf0086", "name": "Sorted bam file of reads mapping back to the final assembly", "description": "Metagenome Alignment BAM file for nmdc:omprc-11-bn8jcq58", - "alternative_identifiers": [], "file_size_bytes": 0, "md5_checksum": "d41d8cd98f00b204e9800998ecf8427e", - "data_object_type": { - "_code": { - "text": "Assembly Coverage BAM", - "description": "Sorted bam file of reads mapping back to the final assembly", - "meaning": null, - "unit": null, - "is_a": null, - "mixins": [], - "extensions": {}, - "annotations": {}, - "alt_descriptions": {}, - "title": null, - "deprecated": null, - "todos": [], - "notes": [], - "comments": [], - "examples": [], - "in_subset": [], - "from_schema": null, - "imported_from": null, - "source": null, - "in_language": null, - "see_also": [], - "deprecated_element_has_exact_replacement": null, - "deprecated_element_has_possible_replacement": null, - "aliases": [], - "structured_aliases": {}, - "mappings": [], - "exact_mappings": [], - "close_mappings": [], - "related_mappings": [], - "narrow_mappings": [], - "broad_mappings": [], - "created_by": null, - "contributors": [], - "created_on": null, - "last_updated_on": null, - "modified_by": null, - "status": null, - "rank": null, - "categories": [], - "keywords": [] - } - }, - "compression_type": null, - "was_generated_by": null, - "url": "/global/cfs/cdirs/m3408/results/nmdc:omprc-11-bn8jcq58/nmdc:wfmgas-11-xc17b248/nmdc_wfmgas-11-xc17b248_pairedMapped_sorted.bam", + "data_object_type": "Assembly Coverage BAM", + "url": "./data/dryrun_data/results/nmdc:omprc-11-bn8jcq58/nmdc:wfmgas-11-7btyf478/nmdc_wfmgas-11-7btyf478_pairedMapped_sorted.bam", "type": "nmdc:Data_Object" }, { - "id": "nmdc:dobj-11-vw7fz645", + "id": "nmdc:dobj-11-101fqh28", "name": "GOTTCHA2 classification report file", "description": "Gottcha2 TSV report for nmdc:omprc-11-bn8jcq58", - "alternative_identifiers": [], "file_size_bytes": 13174, "md5_checksum": "bc7c1bda004aab357c8f6cf5a42242f9", - "data_object_type": { - "_code": { - "text": "GOTTCHA2 Classification Report", - "description": "GOTTCHA2 classification report file", - "meaning": null, - "unit": null, - "is_a": null, - "mixins": [], - "extensions": {}, - "annotations": {}, - "alt_descriptions": {}, - "title": null, - "deprecated": null, - "todos": [], - "notes": [], - "comments": [], - "examples": [], - "in_subset": [], - "from_schema": null, - "imported_from": null, - "source": null, - "in_language": null, - "see_also": [], - "deprecated_element_has_exact_replacement": null, - "deprecated_element_has_possible_replacement": null, - "aliases": [], - "structured_aliases": {}, - "mappings": [], - "exact_mappings": [], - "close_mappings": [], - "related_mappings": [], - "narrow_mappings": [], - "broad_mappings": [], - "created_by": null, - "contributors": [], - "created_on": null, - "last_updated_on": null, - "modified_by": null, - "status": null, - "rank": null, - "categories": [], - "keywords": [] - } - }, - "compression_type": null, - "was_generated_by": null, - "url": "/global/cfs/cdirs/m3408/results/nmdc:omprc-11-bn8jcq58/nmdc:wfrbt-11-wy6fa249/nmdc_wfrbt-11-wy6fa249_gottcha2_report.tsv", + "data_object_type": "GOTTCHA2 Classification Report", + "url": "./data/dryrun_data/results/nmdc:omprc-11-bn8jcq58/nmdc:wfrbt-11-k6swv922/nmdc_wfrbt-11-k6swv922_gottcha2_report.tsv", "type": "nmdc:Data_Object" }, { - "id": "nmdc:dobj-11-bhnxqw16", + "id": "nmdc:dobj-11-a1vwvh45", "name": "GOTTCHA2 report file", "description": "Gottcha2 full TSV report for nmdc:omprc-11-bn8jcq58", - "alternative_identifiers": [], "file_size_bytes": 1035818, "md5_checksum": "9481434cadd0d6c154e2ec4c11ef0e04", - "data_object_type": { - "_code": { - "text": "GOTTCHA2 Report Full", - "description": "GOTTCHA2 report file", - "meaning": null, - "unit": null, - "is_a": null, - "mixins": [], - "extensions": {}, - "annotations": {}, - "alt_descriptions": {}, - "title": null, - "deprecated": null, - "todos": [], - "notes": [], - "comments": [], - "examples": [], - "in_subset": [], - "from_schema": null, - "imported_from": null, - "source": null, - "in_language": null, - "see_also": [], - "deprecated_element_has_exact_replacement": null, - "deprecated_element_has_possible_replacement": null, - "aliases": [], - "structured_aliases": {}, - "mappings": [], - "exact_mappings": [], - "close_mappings": [], - "related_mappings": [], - "narrow_mappings": [], - "broad_mappings": [], - "created_by": null, - "contributors": [], - "created_on": null, - "last_updated_on": null, - "modified_by": null, - "status": null, - "rank": null, - "categories": [], - "keywords": [] - } - }, - "compression_type": null, - "was_generated_by": null, - "url": "/global/cfs/cdirs/m3408/results/nmdc:omprc-11-bn8jcq58/nmdc:wfrbt-11-wy6fa249/nmdc_wfrbt-11-wy6fa249_gottcha2_report_full.tsv", + "data_object_type": "GOTTCHA2 Report Full", + "url": "./data/dryrun_data/results/nmdc:omprc-11-bn8jcq58/nmdc:wfrbt-11-k6swv922/nmdc_wfrbt-11-k6swv922_gottcha2_report_full.tsv", "type": "nmdc:Data_Object" }, { - "id": "nmdc:dobj-11-hycy9b80", + "id": "nmdc:dobj-11-28b66z83", "name": "GOTTCHA2 krona plot HTML file", "description": "Gottcha2 Krona HTML report for nmdc:omprc-11-bn8jcq58", - "alternative_identifiers": [], "file_size_bytes": 262669, "md5_checksum": "6b5bc6ce7f11c1336a5f85a98fc18541", - "data_object_type": { - "_code": { - "text": "GOTTCHA2 Krona Plot", - "description": "GOTTCHA2 krona plot HTML file", - "meaning": null, - "unit": null, - "is_a": null, - "mixins": [], - "extensions": {}, - "annotations": {}, - "alt_descriptions": {}, - "title": null, - "deprecated": null, - "todos": [], - "notes": [], - "comments": [], - "examples": [], - "in_subset": [], - "from_schema": null, - "imported_from": null, - "source": null, - "in_language": null, - "see_also": [], - "deprecated_element_has_exact_replacement": null, - "deprecated_element_has_possible_replacement": null, - "aliases": [], - "structured_aliases": {}, - "mappings": [], - "exact_mappings": [], - "close_mappings": [], - "related_mappings": [], - "narrow_mappings": [], - "broad_mappings": [], - "created_by": null, - "contributors": [], - "created_on": null, - "last_updated_on": null, - "modified_by": null, - "status": null, - "rank": null, - "categories": [], - "keywords": [] - } - }, - "compression_type": null, - "was_generated_by": null, - "url": "/global/cfs/cdirs/m3408/results/nmdc:omprc-11-bn8jcq58/nmdc:wfrbt-11-wy6fa249/nmdc_wfrbt-11-wy6fa249_gottcha2_krona.html", + "data_object_type": "GOTTCHA2 Krona Plot", + "url": "./data/dryrun_data/results/nmdc:omprc-11-bn8jcq58/nmdc:wfrbt-11-k6swv922/nmdc_wfrbt-11-k6swv922_gottcha2_krona.html", "type": "nmdc:Data_Object" }, { - "id": "nmdc:dobj-11-8prvd856", + "id": "nmdc:dobj-11-bexrzt65", "name": "Centrifuge output read classification file", "description": "Centrifuge classification TSV report for nmdc:omprc-11-bn8jcq58", - "alternative_identifiers": [], "file_size_bytes": 2189843623, "md5_checksum": "933c71bbc2f4a2e84d50f0d3864cf940", - "data_object_type": { - "_code": { - "text": "Centrifuge Taxonomic Classification", - "description": "Centrifuge output read classification file", - "meaning": null, - "unit": null, - "is_a": null, - "mixins": [], - "extensions": {}, - "annotations": {}, - "alt_descriptions": {}, - "title": null, - "deprecated": null, - "todos": [], - "notes": [], - "comments": [], - "examples": [], - "in_subset": [], - "from_schema": null, - "imported_from": null, - "source": null, - "in_language": null, - "see_also": [], - "deprecated_element_has_exact_replacement": null, - "deprecated_element_has_possible_replacement": null, - "aliases": [], - "structured_aliases": {}, - "mappings": [], - "exact_mappings": [], - "close_mappings": [], - "related_mappings": [], - "narrow_mappings": [], - "broad_mappings": [], - "created_by": null, - "contributors": [], - "created_on": null, - "last_updated_on": null, - "modified_by": null, - "status": null, - "rank": null, - "categories": [], - "keywords": [] - } - }, - "compression_type": null, - "was_generated_by": null, - "url": "/global/cfs/cdirs/m3408/results/nmdc:omprc-11-bn8jcq58/nmdc:wfrbt-11-wy6fa249/nmdc_wfrbt-11-wy6fa249_centrifuge_classification.tsv", + "data_object_type": "Centrifuge Taxonomic Classification", + "url": "./data/dryrun_data/results/nmdc:omprc-11-bn8jcq58/nmdc:wfrbt-11-k6swv922/nmdc_wfrbt-11-k6swv922_centrifuge_classification.tsv", "type": "nmdc:Data_Object" }, { - "id": "nmdc:dobj-11-acryw383", + "id": "nmdc:dobj-11-2ep63x87", "name": "Centrifuge output report file", "description": "Centrifuge TSV report for nmdc:omprc-11-bn8jcq58", - "alternative_identifiers": [], "file_size_bytes": 260134, "md5_checksum": "1a208e2519770ef50740ac39f1b9ba9a", - "data_object_type": { - "_code": { - "text": "Centrifuge Classification Report", - "description": "Centrifuge output report file", - "meaning": null, - "unit": null, - "is_a": null, - "mixins": [], - "extensions": {}, - "annotations": {}, - "alt_descriptions": {}, - "title": null, - "deprecated": null, - "todos": [], - "notes": [], - "comments": [], - "examples": [], - "in_subset": [], - "from_schema": null, - "imported_from": null, - "source": null, - "in_language": null, - "see_also": [], - "deprecated_element_has_exact_replacement": null, - "deprecated_element_has_possible_replacement": null, - "aliases": [], - "structured_aliases": {}, - "mappings": [], - "exact_mappings": [], - "close_mappings": [], - "related_mappings": [], - "narrow_mappings": [], - "broad_mappings": [], - "created_by": null, - "contributors": [], - "created_on": null, - "last_updated_on": null, - "modified_by": null, - "status": null, - "rank": null, - "categories": [], - "keywords": [] - } - }, - "compression_type": null, - "was_generated_by": null, - "url": "/global/cfs/cdirs/m3408/results/nmdc:omprc-11-bn8jcq58/nmdc:wfrbt-11-wy6fa249/nmdc_wfrbt-11-wy6fa249_centrifuge_report.tsv", + "data_object_type": "Centrifuge Classification Report", + "url": "./data/dryrun_data/results/nmdc:omprc-11-bn8jcq58/nmdc:wfrbt-11-k6swv922/nmdc_wfrbt-11-k6swv922_centrifuge_report.tsv", "type": "nmdc:Data_Object" }, { - "id": "nmdc:dobj-11-f6j3wm49", + "id": "nmdc:dobj-11-jza95a63", "name": "Centrifug krona plot HTML file", "description": "Centrifuge Krona HTML report for nmdc:omprc-11-bn8jcq58", - "alternative_identifiers": [], "file_size_bytes": 2343980, "md5_checksum": "f112a3840464ae7a9cf4a3bf295edd5c", - "data_object_type": { - "_code": { - "text": "Centrifuge Krona Plot", - "description": "Centrifuge krona plot HTML file", - "meaning": null, - "unit": null, - "is_a": null, - "mixins": [], - "extensions": {}, - "annotations": {}, - "alt_descriptions": {}, - "title": null, - "deprecated": null, - "todos": [], - "notes": [], - "comments": [], - "examples": [], - "in_subset": [], - "from_schema": null, - "imported_from": null, - "source": null, - "in_language": null, - "see_also": [], - "deprecated_element_has_exact_replacement": null, - "deprecated_element_has_possible_replacement": null, - "aliases": [], - "structured_aliases": {}, - "mappings": [], - "exact_mappings": [], - "close_mappings": [], - "related_mappings": [], - "narrow_mappings": [], - "broad_mappings": [], - "created_by": null, - "contributors": [], - "created_on": null, - "last_updated_on": null, - "modified_by": null, - "status": null, - "rank": null, - "categories": [], - "keywords": [] - } - }, - "compression_type": null, - "was_generated_by": null, - "url": "/global/cfs/cdirs/m3408/results/nmdc:omprc-11-bn8jcq58/nmdc:wfrbt-11-wy6fa249/nmdc_wfrbt-11-wy6fa249_centrifuge_krona.html", + "data_object_type": "Centrifuge Krona Plot", + "url": "./data/dryrun_data/results/nmdc:omprc-11-bn8jcq58/nmdc:wfrbt-11-k6swv922/nmdc_wfrbt-11-k6swv922_centrifuge_krona.html", "type": "nmdc:Data_Object" }, { - "id": "nmdc:dobj-11-xbn3d347", + "id": "nmdc:dobj-11-a0babn50", "name": "Kraken2 output read classification file", "description": "Kraken classification TSV report for nmdc:omprc-11-bn8jcq58", - "alternative_identifiers": [], "file_size_bytes": 1785563917, "md5_checksum": "7ca01ea379f0baed96f87d1435925f95", - "data_object_type": { - "_code": { - "text": "Kraken2 Taxonomic Classification", - "description": "Kraken2 output read classification file", - "meaning": null, - "unit": null, - "is_a": null, - "mixins": [], - "extensions": {}, - "annotations": {}, - "alt_descriptions": {}, - "title": null, - "deprecated": null, - "todos": [], - "notes": [], - "comments": [], - "examples": [], - "in_subset": [], - "from_schema": null, - "imported_from": null, - "source": null, - "in_language": null, - "see_also": [], - "deprecated_element_has_exact_replacement": null, - "deprecated_element_has_possible_replacement": null, - "aliases": [], - "structured_aliases": {}, - "mappings": [], - "exact_mappings": [], - "close_mappings": [], - "related_mappings": [], - "narrow_mappings": [], - "broad_mappings": [], - "created_by": null, - "contributors": [], - "created_on": null, - "last_updated_on": null, - "modified_by": null, - "status": null, - "rank": null, - "categories": [], - "keywords": [] - } - }, - "compression_type": null, - "was_generated_by": null, - "url": "/global/cfs/cdirs/m3408/results/nmdc:omprc-11-bn8jcq58/nmdc:wfrbt-11-wy6fa249/nmdc_wfrbt-11-wy6fa249_kraken2_classification.tsv", + "data_object_type": "Kraken2 Taxonomic Classification", + "url": "./data/dryrun_data/results/nmdc:omprc-11-bn8jcq58/nmdc:wfrbt-11-k6swv922/nmdc_wfrbt-11-k6swv922_kraken2_classification.tsv", "type": "nmdc:Data_Object" }, { - "id": "nmdc:dobj-11-sf23g974", + "id": "nmdc:dobj-11-1y04j389", "name": "Kraken2 output report file", "description": "Kraken2 TSV report for nmdc:omprc-11-bn8jcq58", - "alternative_identifiers": [], "file_size_bytes": 699896, "md5_checksum": "c85f2f2b4a518c4adb23970448a5cb45", - "data_object_type": { - "_code": { - "text": "Kraken2 Classification Report", - "description": "Kraken2 output report file", - "meaning": null, - "unit": null, - "is_a": null, - "mixins": [], - "extensions": {}, - "annotations": {}, - "alt_descriptions": {}, - "title": null, - "deprecated": null, - "todos": [], - "notes": [], - "comments": [], - "examples": [], - "in_subset": [], - "from_schema": null, - "imported_from": null, - "source": null, - "in_language": null, - "see_also": [], - "deprecated_element_has_exact_replacement": null, - "deprecated_element_has_possible_replacement": null, - "aliases": [], - "structured_aliases": {}, - "mappings": [], - "exact_mappings": [], - "close_mappings": [], - "related_mappings": [], - "narrow_mappings": [], - "broad_mappings": [], - "created_by": null, - "contributors": [], - "created_on": null, - "last_updated_on": null, - "modified_by": null, - "status": null, - "rank": null, - "categories": [], - "keywords": [] - } - }, - "compression_type": null, - "was_generated_by": null, - "url": "/global/cfs/cdirs/m3408/results/nmdc:omprc-11-bn8jcq58/nmdc:wfrbt-11-wy6fa249/nmdc_wfrbt-11-wy6fa249_kraken2_report.tsv", + "data_object_type": "Kraken2 Classification Report", + "url": "./data/dryrun_data/results/nmdc:omprc-11-bn8jcq58/nmdc:wfrbt-11-k6swv922/nmdc_wfrbt-11-k6swv922_kraken2_report.tsv", "type": "nmdc:Data_Object" }, { - "id": "nmdc:dobj-11-cn18ys57", + "id": "nmdc:dobj-11-0en1hj05", "name": "Kraken2 Krona plot HTML file", "description": "Kraken2 Krona HTML report for nmdc:omprc-11-bn8jcq58", - "alternative_identifiers": [], "file_size_bytes": 4221977, "md5_checksum": "94ee1bc2dc74830a21d5c3471d6cf223", - "data_object_type": { - "_code": { - "text": "Kraken2 Krona Plot", - "description": "Kraken2 krona plot HTML file", - "meaning": null, - "unit": null, - "is_a": null, - "mixins": [], - "extensions": {}, - "annotations": {}, - "alt_descriptions": {}, - "title": null, - "deprecated": null, - "todos": [], - "notes": [], - "comments": [], - "examples": [], - "in_subset": [], - "from_schema": null, - "imported_from": null, - "source": null, - "in_language": null, - "see_also": [], - "deprecated_element_has_exact_replacement": null, - "deprecated_element_has_possible_replacement": null, - "aliases": [], - "structured_aliases": {}, - "mappings": [], - "exact_mappings": [], - "close_mappings": [], - "related_mappings": [], - "narrow_mappings": [], - "broad_mappings": [], - "created_by": null, - "contributors": [], - "created_on": null, - "last_updated_on": null, - "modified_by": null, - "status": null, - "rank": null, - "categories": [], - "keywords": [] - } - }, - "compression_type": null, - "was_generated_by": null, - "url": "/global/cfs/cdirs/m3408/results/nmdc:omprc-11-bn8jcq58/nmdc:wfrbt-11-wy6fa249/nmdc_wfrbt-11-wy6fa249_kraken2_krona.html", + "data_object_type": "Kraken2 Krona Plot", + "url": "./data/dryrun_data/results/nmdc:omprc-11-bn8jcq58/nmdc:wfrbt-11-k6swv922/nmdc_wfrbt-11-k6swv922_kraken2_krona.html", "type": "nmdc:Data_Object" } ], - "dissolving_activity_set": [], - "functional_annotation_set": [], - "genome_feature_set": [], - "mags_activity_set": [], - "material_sample_set": [], - "material_sampling_activity_set": [], - "metabolomics_analysis_activity_set": [], - "metagenome_annotation_activity_set": [], "metagenome_assembly_set": [ { - "id": "nmdc:wfmgas-11-xc17b248", + "id": "nmdc:wfmgas-11-7btyf478", "name": "Metagenome Assembly Activity for nmdc:omprc-11-bn8jcq58", "started_at_time": "2021-10-11T02:28:26Z", "ended_at_time": "2021-10-11T04:56:04+00:00", "was_informed_by": "nmdc:omprc-11-bn8jcq58", - "used": null, "execution_resource": "NERSC - Perlmutter", "git_url": "https://github.com/microbiomedata/metaAssembly", "has_input": [ - "nmdc:dobj-11-b20kzg40", - "nmdc:dobj-11-mhccey92" + "nmdc:dobj-11-r766y759", + "nmdc:dobj-11-k0bzwg32" ], "has_output": [ - "nmdc:dobj-11-b27rkd90", - "nmdc:dobj-11-tcg9he98", - "nmdc:dobj-11-z5f6jq78", - "nmdc:dobj-11-kv8gp304", - "nmdc:dobj-11-xkzgxw24" + "nmdc:dobj-11-1dqfzb38", + "nmdc:dobj-11-9e1jhw75", + "nmdc:dobj-11-7xk5gf96", + "nmdc:dobj-11-w351h143", + "nmdc:dobj-11-2jxf0086" ], "type": "nmdc:MetagenomeAssembly", "part_of": [ @@ -975,129 +218,54 @@ "ctg_max": 68135, "gap_pct": 0.00188, "gc_std": 0.11726, - "gc_avg": 0.46001, - "num_input_reads": null, - "num_aligned_reads": null, - "insdc_assembly_identifiers": null + "gc_avg": 0.46001 } ], - "metagenome_sequencing_activity_set": [], - "metaproteomics_analysis_activity_set": [], - "metatranscriptome_activity_set": [], - "nom_analysis_activity_set": [], "omics_processing_set": [ { "id": "nmdc:omprc-11-bn8jcq58", "name": "Sand microcosm microbial communities from a hyporheic zone in Columbia River, Washington, USA - GW-RW T2_23-Sept-14", "description": "Sterilized sand packs were incubated back in the ground and collected at time point T2.", - "alternative_identifiers": [], "has_input": [ "nmdc:bsm-11-qq8s6x03" ], "add_date": "2015-05-28", - "chimera_check": null, "gold_sequencing_project_identifiers": [ "gold:Gp0115663" ], "has_output": [ - "nmdc:dobj-11-bgp6z123" + "nmdc:dobj-11-qadypq22" ], - "insdc_experiment_identifiers": [], - "instrument_name": null, "mod_date": "2021-06-15", "ncbi_project_name": "Sand microcosm microbial communities from a hyporheic zone in Columbia River, Washington, USA - GW-RW T2_23-Sept-14", - "nucl_acid_amp": null, - "nucl_acid_ext": null, "omics_type": { - "has_raw_value": "Metagenome", - "was_generated_by": null, - "type": null, - "term": null + "has_raw_value": "Metagenome" }, "part_of": [ "nmdc:sty-11-aygzgv51" ], - "pcr_cond": null, - "pcr_primers": null, "principal_investigator": { - "has_raw_value": "James Stegen", - "was_generated_by": null, - "type": null, - "orcid": null, - "profile_image_url": null, - "email": null, - "name": null, - "websites": [] + "has_raw_value": "James Stegen" }, - "processing_institution": { - "_code": { - "text": "JGI", - "description": null, - "meaning": null, - "unit": null, - "is_a": null, - "mixins": [], - "extensions": {}, - "annotations": {}, - "alt_descriptions": {}, - "title": null, - "deprecated": null, - "todos": [], - "notes": [], - "comments": [], - "examples": [], - "in_subset": [], - "from_schema": null, - "imported_from": null, - "source": null, - "in_language": null, - "see_also": [], - "deprecated_element_has_exact_replacement": null, - "deprecated_element_has_possible_replacement": null, - "aliases": [], - "structured_aliases": {}, - "mappings": [], - "exact_mappings": [], - "close_mappings": [], - "related_mappings": [], - "narrow_mappings": [], - "broad_mappings": [], - "created_by": null, - "contributors": [], - "created_on": null, - "last_updated_on": null, - "modified_by": null, - "status": null, - "rank": null, - "categories": [], - "keywords": [] - } - }, - "samp_vol_we_dna_ext": null, - "seq_meth": null, - "seq_quality_check": null, - "target_gene": null, - "target_subfragment": null, + "processing_institution": "JGI", "type": "nmdc:OmicsProcessing" } ], - "reaction_activity_set": [], "read_qc_analysis_activity_set": [ { - "id": "nmdc:wfrqc-11-5sfwhy50", + "id": "nmdc:wfrqc-11-vap10912", "name": "Read QC Activity for nmdc:omprc-11-bn8jcq58", "started_at_time": "2021-10-11T02:28:26Z", "ended_at_time": "2021-10-11T04:56:04+00:00", "was_informed_by": "nmdc:omprc-11-bn8jcq58", - "used": null, "execution_resource": "NERSC - Perlmutter", "git_url": "https://github.com/microbiomedata/ReadsQC", "has_input": [ - "nmdc:dobj-11-bgp6z123" + "nmdc:dobj-11-qadypq22" ], "has_output": [ - "nmdc:dobj-11-b20kzg40", - "nmdc:dobj-11-mhccey92" + "nmdc:dobj-11-r766y759", + "nmdc:dobj-11-k0bzwg32" ], "type": "nmdc:ReadQcAnalysisActivity", "part_of": [ @@ -1105,37 +273,34 @@ ], "version": "v1.0.8", "input_read_count": 32238374, - "input_base_count": null, "output_read_count": 30774080, - "output_base_count": null, "input_read_bases": 4867994474, "output_read_bases": 4608772924 } ], "read_based_taxonomy_analysis_activity_set": [ { - "id": "nmdc:wfrbt-11-wy6fa249", + "id": "nmdc:wfrbt-11-k6swv922", "name": "Readbased Taxonomy Analysis Activity for nmdc:omprc-11-bn8jcq58", "started_at_time": "2021-10-11T02:28:26Z", "ended_at_time": "2021-10-11T04:56:04+00:00", "was_informed_by": "nmdc:omprc-11-bn8jcq58", - "used": null, "execution_resource": "NERSC - Perlmutter", "git_url": "https://github.com/microbiomedata/ReadbasedAnalysis", "has_input": [ - "nmdc:dobj-11-b20kzg40", - "nmdc:dobj-11-mhccey92" + "nmdc:dobj-11-r766y759", + "nmdc:dobj-11-k0bzwg32" ], "has_output": [ - "nmdc:dobj-11-vw7fz645", - "nmdc:dobj-11-bhnxqw16", - "nmdc:dobj-11-hycy9b80", - "nmdc:dobj-11-8prvd856", - "nmdc:dobj-11-acryw383", - "nmdc:dobj-11-f6j3wm49", - "nmdc:dobj-11-xbn3d347", - "nmdc:dobj-11-sf23g974", - "nmdc:dobj-11-cn18ys57" + "nmdc:dobj-11-101fqh28", + "nmdc:dobj-11-a1vwvh45", + "nmdc:dobj-11-28b66z83", + "nmdc:dobj-11-bexrzt65", + "nmdc:dobj-11-2ep63x87", + "nmdc:dobj-11-jza95a63", + "nmdc:dobj-11-a0babn50", + "nmdc:dobj-11-1y04j389", + "nmdc:dobj-11-0en1hj05" ], "type": "nmdc:ReadBasedTaxonomyAnalysisActivity", "part_of": [ @@ -1143,12 +308,6 @@ ], "version": "v1.0.5" } - ], - "study_set": [], - "field_research_site_set": [], - "collecting_biosamples_from_site_set": [], - "date_created": null, - "etl_software_version": null, - "pooling_set": [] + ] } ] \ No newline at end of file From 22d0fac48e6c8c3a30b09759592ad36f350fa226 Mon Sep 17 00:00:00 2001 From: Michal Babinski Date: Wed, 15 Nov 2023 11:11:03 -0800 Subject: [PATCH 63/91] fixed has_input for assembly and readbased --- nmdc_automation/re_iding/base.py | 15 +++++++++++---- 1 file changed, 11 insertions(+), 4 deletions(-) diff --git a/nmdc_automation/re_iding/base.py b/nmdc_automation/re_iding/base.py index eff1de17..a6f9ae21 100644 --- a/nmdc_automation/re_iding/base.py +++ b/nmdc_automation/re_iding/base.py @@ -201,8 +201,8 @@ def update_metagenome_assembly_set(self, db_record: Dict, for assembly_rec in db_record[METAGENOME_ASSEMBLY_SET]: activity_type = "nmdc:MetagenomeAssembly" omics_processing_id = new_omics_processing.id - new_read_qc = new_db.read_qc_analysis_activity_set[0] - has_input = new_read_qc.has_output + has_input = [self._get_input_do_id(new_db, "Filtered Sequencing Reads")] + updated_has_output = [] new_activity_id = self.api_client.minter(activity_type) @@ -265,8 +265,7 @@ def update_read_based_taxonomy_analysis_activity_set(self, db_record: Dict, "read_based_taxonomy_analysis_activity_set"]: activity_type = "nmdc:ReadBasedTaxonomyAnalysisActivity" omics_processing_id = new_omics_processing.id - new_read_qc = new_db.read_qc_analysis_activity_set[0] - has_input = new_read_qc.has_output + has_input = [self._get_input_do_id(new_db, "Filtered Sequencing Reads")] new_activity_id = self.api_client.minter(activity_type) logging.info(f"New activity id created for {omics_processing_id} activity type {activity_type}: {new_activity_id}") @@ -311,6 +310,14 @@ def update_read_based_taxonomy_analysis_activity_set(self, db_record: Dict, new_db.read_based_taxonomy_analysis_activity_set.append(new_read_based) return new_db + + def _get_input_do_id(self, new_db, data_object_type: str): + """Returns the string representation of a data object id given data object type""" + + for rec in new_db.data_object_set: + print(type(rec.data_object_type)) + if str(rec.data_object_type) == data_object_type: + return str(rec.id) def _make_new_activity_set_object(self, omics_processing_id: str, new_activity_id: str, activity_set_rec: Dict, has_input: List, From 6d3d81f6390716fda7c357e5ce2f946812431a0e Mon Sep 17 00:00:00 2001 From: Michal Babinski Date: Wed, 15 Nov 2023 11:11:24 -0800 Subject: [PATCH 64/91] updated has_inputs --- .../data/dryrun_re_ided_record_dump.json | 118 +++++++++--------- 1 file changed, 58 insertions(+), 60 deletions(-) diff --git a/nmdc_automation/re_iding/scripts/data/dryrun_re_ided_record_dump.json b/nmdc_automation/re_iding/scripts/data/dryrun_re_ided_record_dump.json index ad00210f..38b30ce3 100644 --- a/nmdc_automation/re_iding/scripts/data/dryrun_re_ided_record_dump.json +++ b/nmdc_automation/re_iding/scripts/data/dryrun_re_ided_record_dump.json @@ -2,176 +2,176 @@ { "data_object_set": [ { - "id": "nmdc:dobj-11-qadypq22", + "id": "nmdc:dobj-11-k18mgh67", "name": "9422.8.132674.GTTTCG.fastq.gz", "description": "Raw sequencer read data", "file_size_bytes": 2861414297, "type": "nmdc:DataObject" }, { - "id": "nmdc:dobj-11-r766y759", + "id": "nmdc:dobj-11-40ms2n98", "name": "Reads QC result fastq (clean data)", "description": "Filtered Reads for nmdc:omprc-11-bn8jcq58", "file_size_bytes": 2571324879, "md5_checksum": "7bf778baef033d36f118f8591256d6ef", "data_object_type": "Filtered Sequencing Reads", - "url": "./data/dryrun_data/results/nmdc:omprc-11-bn8jcq58/nmdc:wfrqc-11-vap10912/nmdc_wfrqc-11-vap10912_filtered.fastq.gz", + "url": "./data/dryrun_data/results/nmdc:omprc-11-bn8jcq58/nmdc:wfrqc-11-7agz5x85/nmdc_wfrqc-11-7agz5x85_filtered.fastq.gz", "type": "nmdc:Data_Object" }, { - "id": "nmdc:dobj-11-k0bzwg32", + "id": "nmdc:dobj-11-pz49a380", "name": "Reads QC summary statistics", "description": "Filtered Stats for nmdc:omprc-11-bn8jcq58", "file_size_bytes": 290, "md5_checksum": "b99ce8adc125c95f0bfdadf36a3f6848", "data_object_type": "QC Statistics", - "url": "./data/dryrun_data/results/nmdc:omprc-11-bn8jcq58/nmdc:wfrqc-11-vap10912/nmdc_wfrqc-11-vap10912_filterStats.txt", + "url": "./data/dryrun_data/results/nmdc:omprc-11-bn8jcq58/nmdc:wfrqc-11-7agz5x85/nmdc_wfrqc-11-7agz5x85_filterStats.txt", "type": "nmdc:Data_Object" }, { - "id": "nmdc:dobj-11-1dqfzb38", + "id": "nmdc:dobj-11-s7hbx548", "name": "Final assembly contigs fasta", "description": "Assembled contigs fasta for nmdc:omprc-11-bn8jcq58", "file_size_bytes": 336, - "md5_checksum": "400d8dfa58189f35a3e754526470bf66", + "md5_checksum": "1501f29ec969470773a42937d7050ee4", "data_object_type": "Assembly Contigs", - "url": "./data/dryrun_data/results/nmdc:omprc-11-bn8jcq58/nmdc:wfmgas-11-7btyf478/nmdc_wfmgas-11-7btyf478_contigs.fna", + "url": "./data/dryrun_data/results/nmdc:omprc-11-bn8jcq58/nmdc:wfmgas-11-wff05s57/nmdc_wfmgas-11-wff05s57_contigs.fna", "type": "nmdc:Data_Object" }, { - "id": "nmdc:dobj-11-9e1jhw75", + "id": "nmdc:dobj-11-k4xtja04", "name": "Final assembly scaffolds fasta", "description": "Assembled scaffold fasta for nmdc:omprc-11-bn8jcq58", "file_size_bytes": 700, - "md5_checksum": "37c3fbca7955c4095feeada8325a2dc5", + "md5_checksum": "d9241051e6c263cf4cba013276fbce79", "data_object_type": "Assembly Scaffolds", - "url": "./data/dryrun_data/results/nmdc:omprc-11-bn8jcq58/nmdc:wfmgas-11-7btyf478/nmdc_wfmgas-11-7btyf478_scaffolds.fna", + "url": "./data/dryrun_data/results/nmdc:omprc-11-bn8jcq58/nmdc:wfmgas-11-wff05s57/nmdc_wfmgas-11-wff05s57_scaffolds.fna", "type": "nmdc:Data_Object" }, { - "id": "nmdc:dobj-11-7xk5gf96", + "id": "nmdc:dobj-11-mn83rq32", "name": "Assembled contigs coverage information", "description": "Metagenome Contig Coverage Stats for nmdc:omprc-11-bn8jcq58", "file_size_bytes": 0, "md5_checksum": "d41d8cd98f00b204e9800998ecf8427e", "data_object_type": "Assembly Coverage Stats", - "url": "./data/dryrun_data/results/nmdc:omprc-11-bn8jcq58/nmdc:wfmgas-11-7btyf478/nmdc_wfmgas-11-7btyf478_covstats.txt", + "url": "./data/dryrun_data/results/nmdc:omprc-11-bn8jcq58/nmdc:wfmgas-11-wff05s57/nmdc_wfmgas-11-wff05s57_covstats.txt", "type": "nmdc:Data_Object" }, { - "id": "nmdc:dobj-11-w351h143", + "id": "nmdc:dobj-11-141yjx07", "name": "An AGP format file that describes the assembly", "description": "Assembled AGP file for nmdc:omprc-11-bn8jcq58", "file_size_bytes": 0, "md5_checksum": "d41d8cd98f00b204e9800998ecf8427e", "data_object_type": "Assembly AGP", - "url": "./data/dryrun_data/results/nmdc:omprc-11-bn8jcq58/nmdc:wfmgas-11-7btyf478/nmdc_wfmgas-11-7btyf478_assembly.agp", + "url": "./data/dryrun_data/results/nmdc:omprc-11-bn8jcq58/nmdc:wfmgas-11-wff05s57/nmdc_wfmgas-11-wff05s57_assembly.agp", "type": "nmdc:Data_Object" }, { - "id": "nmdc:dobj-11-2jxf0086", + "id": "nmdc:dobj-11-gw706z78", "name": "Sorted bam file of reads mapping back to the final assembly", "description": "Metagenome Alignment BAM file for nmdc:omprc-11-bn8jcq58", "file_size_bytes": 0, "md5_checksum": "d41d8cd98f00b204e9800998ecf8427e", "data_object_type": "Assembly Coverage BAM", - "url": "./data/dryrun_data/results/nmdc:omprc-11-bn8jcq58/nmdc:wfmgas-11-7btyf478/nmdc_wfmgas-11-7btyf478_pairedMapped_sorted.bam", + "url": "./data/dryrun_data/results/nmdc:omprc-11-bn8jcq58/nmdc:wfmgas-11-wff05s57/nmdc_wfmgas-11-wff05s57_pairedMapped_sorted.bam", "type": "nmdc:Data_Object" }, { - "id": "nmdc:dobj-11-101fqh28", + "id": "nmdc:dobj-11-eb9w0c69", "name": "GOTTCHA2 classification report file", "description": "Gottcha2 TSV report for nmdc:omprc-11-bn8jcq58", "file_size_bytes": 13174, "md5_checksum": "bc7c1bda004aab357c8f6cf5a42242f9", "data_object_type": "GOTTCHA2 Classification Report", - "url": "./data/dryrun_data/results/nmdc:omprc-11-bn8jcq58/nmdc:wfrbt-11-k6swv922/nmdc_wfrbt-11-k6swv922_gottcha2_report.tsv", + "url": "./data/dryrun_data/results/nmdc:omprc-11-bn8jcq58/nmdc:wfrbt-11-rvzhj731/nmdc_wfrbt-11-rvzhj731_gottcha2_report.tsv", "type": "nmdc:Data_Object" }, { - "id": "nmdc:dobj-11-a1vwvh45", + "id": "nmdc:dobj-11-wb0qzz63", "name": "GOTTCHA2 report file", "description": "Gottcha2 full TSV report for nmdc:omprc-11-bn8jcq58", "file_size_bytes": 1035818, "md5_checksum": "9481434cadd0d6c154e2ec4c11ef0e04", "data_object_type": "GOTTCHA2 Report Full", - "url": "./data/dryrun_data/results/nmdc:omprc-11-bn8jcq58/nmdc:wfrbt-11-k6swv922/nmdc_wfrbt-11-k6swv922_gottcha2_report_full.tsv", + "url": "./data/dryrun_data/results/nmdc:omprc-11-bn8jcq58/nmdc:wfrbt-11-rvzhj731/nmdc_wfrbt-11-rvzhj731_gottcha2_report_full.tsv", "type": "nmdc:Data_Object" }, { - "id": "nmdc:dobj-11-28b66z83", + "id": "nmdc:dobj-11-pnb7g686", "name": "GOTTCHA2 krona plot HTML file", "description": "Gottcha2 Krona HTML report for nmdc:omprc-11-bn8jcq58", "file_size_bytes": 262669, "md5_checksum": "6b5bc6ce7f11c1336a5f85a98fc18541", "data_object_type": "GOTTCHA2 Krona Plot", - "url": "./data/dryrun_data/results/nmdc:omprc-11-bn8jcq58/nmdc:wfrbt-11-k6swv922/nmdc_wfrbt-11-k6swv922_gottcha2_krona.html", + "url": "./data/dryrun_data/results/nmdc:omprc-11-bn8jcq58/nmdc:wfrbt-11-rvzhj731/nmdc_wfrbt-11-rvzhj731_gottcha2_krona.html", "type": "nmdc:Data_Object" }, { - "id": "nmdc:dobj-11-bexrzt65", + "id": "nmdc:dobj-11-hfcn4m30", "name": "Centrifuge output read classification file", "description": "Centrifuge classification TSV report for nmdc:omprc-11-bn8jcq58", "file_size_bytes": 2189843623, "md5_checksum": "933c71bbc2f4a2e84d50f0d3864cf940", "data_object_type": "Centrifuge Taxonomic Classification", - "url": "./data/dryrun_data/results/nmdc:omprc-11-bn8jcq58/nmdc:wfrbt-11-k6swv922/nmdc_wfrbt-11-k6swv922_centrifuge_classification.tsv", + "url": "./data/dryrun_data/results/nmdc:omprc-11-bn8jcq58/nmdc:wfrbt-11-rvzhj731/nmdc_wfrbt-11-rvzhj731_centrifuge_classification.tsv", "type": "nmdc:Data_Object" }, { - "id": "nmdc:dobj-11-2ep63x87", + "id": "nmdc:dobj-11-e1y3vw77", "name": "Centrifuge output report file", "description": "Centrifuge TSV report for nmdc:omprc-11-bn8jcq58", "file_size_bytes": 260134, "md5_checksum": "1a208e2519770ef50740ac39f1b9ba9a", "data_object_type": "Centrifuge Classification Report", - "url": "./data/dryrun_data/results/nmdc:omprc-11-bn8jcq58/nmdc:wfrbt-11-k6swv922/nmdc_wfrbt-11-k6swv922_centrifuge_report.tsv", + "url": "./data/dryrun_data/results/nmdc:omprc-11-bn8jcq58/nmdc:wfrbt-11-rvzhj731/nmdc_wfrbt-11-rvzhj731_centrifuge_report.tsv", "type": "nmdc:Data_Object" }, { - "id": "nmdc:dobj-11-jza95a63", + "id": "nmdc:dobj-11-6gt09871", "name": "Centrifug krona plot HTML file", "description": "Centrifuge Krona HTML report for nmdc:omprc-11-bn8jcq58", "file_size_bytes": 2343980, "md5_checksum": "f112a3840464ae7a9cf4a3bf295edd5c", "data_object_type": "Centrifuge Krona Plot", - "url": "./data/dryrun_data/results/nmdc:omprc-11-bn8jcq58/nmdc:wfrbt-11-k6swv922/nmdc_wfrbt-11-k6swv922_centrifuge_krona.html", + "url": "./data/dryrun_data/results/nmdc:omprc-11-bn8jcq58/nmdc:wfrbt-11-rvzhj731/nmdc_wfrbt-11-rvzhj731_centrifuge_krona.html", "type": "nmdc:Data_Object" }, { - "id": "nmdc:dobj-11-a0babn50", + "id": "nmdc:dobj-11-1ytpbw47", "name": "Kraken2 output read classification file", "description": "Kraken classification TSV report for nmdc:omprc-11-bn8jcq58", "file_size_bytes": 1785563917, "md5_checksum": "7ca01ea379f0baed96f87d1435925f95", "data_object_type": "Kraken2 Taxonomic Classification", - "url": "./data/dryrun_data/results/nmdc:omprc-11-bn8jcq58/nmdc:wfrbt-11-k6swv922/nmdc_wfrbt-11-k6swv922_kraken2_classification.tsv", + "url": "./data/dryrun_data/results/nmdc:omprc-11-bn8jcq58/nmdc:wfrbt-11-rvzhj731/nmdc_wfrbt-11-rvzhj731_kraken2_classification.tsv", "type": "nmdc:Data_Object" }, { - "id": "nmdc:dobj-11-1y04j389", + "id": "nmdc:dobj-11-1j604f63", "name": "Kraken2 output report file", "description": "Kraken2 TSV report for nmdc:omprc-11-bn8jcq58", "file_size_bytes": 699896, "md5_checksum": "c85f2f2b4a518c4adb23970448a5cb45", "data_object_type": "Kraken2 Classification Report", - "url": "./data/dryrun_data/results/nmdc:omprc-11-bn8jcq58/nmdc:wfrbt-11-k6swv922/nmdc_wfrbt-11-k6swv922_kraken2_report.tsv", + "url": "./data/dryrun_data/results/nmdc:omprc-11-bn8jcq58/nmdc:wfrbt-11-rvzhj731/nmdc_wfrbt-11-rvzhj731_kraken2_report.tsv", "type": "nmdc:Data_Object" }, { - "id": "nmdc:dobj-11-0en1hj05", + "id": "nmdc:dobj-11-yjwsmg07", "name": "Kraken2 Krona plot HTML file", "description": "Kraken2 Krona HTML report for nmdc:omprc-11-bn8jcq58", "file_size_bytes": 4221977, "md5_checksum": "94ee1bc2dc74830a21d5c3471d6cf223", "data_object_type": "Kraken2 Krona Plot", - "url": "./data/dryrun_data/results/nmdc:omprc-11-bn8jcq58/nmdc:wfrbt-11-k6swv922/nmdc_wfrbt-11-k6swv922_kraken2_krona.html", + "url": "./data/dryrun_data/results/nmdc:omprc-11-bn8jcq58/nmdc:wfrbt-11-rvzhj731/nmdc_wfrbt-11-rvzhj731_kraken2_krona.html", "type": "nmdc:Data_Object" } ], "metagenome_assembly_set": [ { - "id": "nmdc:wfmgas-11-7btyf478", + "id": "nmdc:wfmgas-11-wff05s57", "name": "Metagenome Assembly Activity for nmdc:omprc-11-bn8jcq58", "started_at_time": "2021-10-11T02:28:26Z", "ended_at_time": "2021-10-11T04:56:04+00:00", @@ -179,15 +179,14 @@ "execution_resource": "NERSC - Perlmutter", "git_url": "https://github.com/microbiomedata/metaAssembly", "has_input": [ - "nmdc:dobj-11-r766y759", - "nmdc:dobj-11-k0bzwg32" + "nmdc:dobj-11-40ms2n98" ], "has_output": [ - "nmdc:dobj-11-1dqfzb38", - "nmdc:dobj-11-9e1jhw75", - "nmdc:dobj-11-7xk5gf96", - "nmdc:dobj-11-w351h143", - "nmdc:dobj-11-2jxf0086" + "nmdc:dobj-11-s7hbx548", + "nmdc:dobj-11-k4xtja04", + "nmdc:dobj-11-mn83rq32", + "nmdc:dobj-11-141yjx07", + "nmdc:dobj-11-gw706z78" ], "type": "nmdc:MetagenomeAssembly", "part_of": [ @@ -234,7 +233,7 @@ "gold:Gp0115663" ], "has_output": [ - "nmdc:dobj-11-qadypq22" + "nmdc:dobj-11-k18mgh67" ], "mod_date": "2021-06-15", "ncbi_project_name": "Sand microcosm microbial communities from a hyporheic zone in Columbia River, Washington, USA - GW-RW T2_23-Sept-14", @@ -253,7 +252,7 @@ ], "read_qc_analysis_activity_set": [ { - "id": "nmdc:wfrqc-11-vap10912", + "id": "nmdc:wfrqc-11-7agz5x85", "name": "Read QC Activity for nmdc:omprc-11-bn8jcq58", "started_at_time": "2021-10-11T02:28:26Z", "ended_at_time": "2021-10-11T04:56:04+00:00", @@ -261,11 +260,11 @@ "execution_resource": "NERSC - Perlmutter", "git_url": "https://github.com/microbiomedata/ReadsQC", "has_input": [ - "nmdc:dobj-11-qadypq22" + "nmdc:dobj-11-k18mgh67" ], "has_output": [ - "nmdc:dobj-11-r766y759", - "nmdc:dobj-11-k0bzwg32" + "nmdc:dobj-11-40ms2n98", + "nmdc:dobj-11-pz49a380" ], "type": "nmdc:ReadQcAnalysisActivity", "part_of": [ @@ -280,7 +279,7 @@ ], "read_based_taxonomy_analysis_activity_set": [ { - "id": "nmdc:wfrbt-11-k6swv922", + "id": "nmdc:wfrbt-11-rvzhj731", "name": "Readbased Taxonomy Analysis Activity for nmdc:omprc-11-bn8jcq58", "started_at_time": "2021-10-11T02:28:26Z", "ended_at_time": "2021-10-11T04:56:04+00:00", @@ -288,19 +287,18 @@ "execution_resource": "NERSC - Perlmutter", "git_url": "https://github.com/microbiomedata/ReadbasedAnalysis", "has_input": [ - "nmdc:dobj-11-r766y759", - "nmdc:dobj-11-k0bzwg32" + "nmdc:dobj-11-40ms2n98" ], "has_output": [ - "nmdc:dobj-11-101fqh28", - "nmdc:dobj-11-a1vwvh45", - "nmdc:dobj-11-28b66z83", - "nmdc:dobj-11-bexrzt65", - "nmdc:dobj-11-2ep63x87", - "nmdc:dobj-11-jza95a63", - "nmdc:dobj-11-a0babn50", - "nmdc:dobj-11-1y04j389", - "nmdc:dobj-11-0en1hj05" + "nmdc:dobj-11-eb9w0c69", + "nmdc:dobj-11-wb0qzz63", + "nmdc:dobj-11-pnb7g686", + "nmdc:dobj-11-hfcn4m30", + "nmdc:dobj-11-e1y3vw77", + "nmdc:dobj-11-6gt09871", + "nmdc:dobj-11-1ytpbw47", + "nmdc:dobj-11-1j604f63", + "nmdc:dobj-11-yjwsmg07" ], "type": "nmdc:ReadBasedTaxonomyAnalysisActivity", "part_of": [ From eee1b50fd4f61092dcf6c8c66f7dd900ac266f51 Mon Sep 17 00:00:00 2001 From: Michael Thornton Date: Wed, 15 Nov 2023 11:24:40 -0800 Subject: [PATCH 65/91] Update new data object URLs --- nmdc_automation/re_iding/base.py | 29 +++-- .../nmdc_wfmgas-11-3epd0s35_scaffolds.fna | 2 + .../data/dryrun_re_ided_record_dump.json | 120 +++++++++--------- nmdc_automation/re_iding/tests/conftest.py | 13 ++ .../re_iding/tests/test_re_iding_base.py | 26 +++- 5 files changed, 114 insertions(+), 76 deletions(-) create mode 100644 nmdc_automation/re_iding/scripts/data/dryrun_data/results/nmdc:omprc-11-bn8jcq58/nmdc:wfmgas-11-3epd0s35/nmdc_wfmgas-11-3epd0s35_scaffolds.fna diff --git a/nmdc_automation/re_iding/base.py b/nmdc_automation/re_iding/base.py index eff1de17..579bab3b 100644 --- a/nmdc_automation/re_iding/base.py +++ b/nmdc_automation/re_iding/base.py @@ -27,6 +27,7 @@ assembly_file_operations) NAPA_TEMPLATE = "../../../configs/re_iding_worklfows.yaml" +DATA_BASE_URL = "https://data.microbiomedata.org/data" # BASE_DIR = "/global/cfs/cdirs/m3408/results" logging.basicConfig( @@ -164,7 +165,7 @@ def update_reads_qc_analysis_activity_set(self, db_record: Dict, old_do_rec["url"], new_readsqc_base_dir, new_activity_id, self.data_dir ) logging.info(f"New file path computed for {data_object_type}: {new_file_path}") - new_do = self._make_new_data_object( + new_do = self.make_new_data_object( omics_processing_id, activity_type, new_activity_id, old_do_rec, data_object_type, ) @@ -226,7 +227,7 @@ def update_metagenome_assembly_set(self, db_record: Dict, #update md5 and file byte size in place to use _make_new_data_object function without functions old_do_rec["file_size_bytes"] = updated_file_size old_do_rec["md5_checksum"] = updated_md5 - new_do = self._make_new_data_object( + new_do = self.make_new_data_object( omics_processing_id, activity_type, new_activity_id, old_do_rec, data_object_type ) # add new data object to new database and update has_output @@ -286,7 +287,7 @@ def update_read_based_taxonomy_analysis_activity_set(self, db_record: Dict, old_do_rec["url"], new_readbased_base_dir, new_activity_id, self.data_dir ) logging.info(f"New file path computed for {data_object_type}: {new_file_path}") - new_do = self._make_new_data_object( + new_do = self.make_new_data_object( omics_processing_id, activity_type, new_activity_id, old_do_rec, data_object_type ) # add new data object to new database and update has_output @@ -345,11 +346,11 @@ def _make_new_activity_set_object(self, omics_processing_id: str, new_activity_i ) return activity - def _make_new_data_object(self, omics_processing_id: str, - activity_type: str, - new_activity_id: str, - data_object_rec: Dict, - data_object_type: str) -> NmdcDataObject: + def make_new_data_object(self, omics_processing_id: str, + activity_type: str, + new_activity_id: str, + data_object_record: Dict, + data_object_type: str) -> NmdcDataObject: """ Return a new data object with updated IDs. """ @@ -357,14 +358,14 @@ def _make_new_data_object(self, omics_processing_id: str, activity_type, data_object_type ) new_data_object_id = self.api_client.minter("nmdc:DataObject") - logger.info(f"nmdcDataObject\t{data_object_rec['id']}\t{new_data_object_id}") + logger.info(f"nmdcDataObject\t{data_object_record['id']}\t{new_data_object_id}") new_description = re.sub( - "[^ ]+$", f"{omics_processing_id}", data_object_rec["description"] + "[^ ]+$", f"{omics_processing_id}", data_object_record["description"] ) logger.info(f"new_description: {new_description}") - new_filename = self._make_new_filename(new_activity_id, data_object_rec) + new_filename = self._make_new_filename(new_activity_id, data_object_record) logger.info(f"new_filename: {new_filename}") - new_url = (f"{self.data_dir}/{omics_processing_id}/{new_activity_id}" + new_url = (f"{DATA_BASE_URL}/{omics_processing_id}/{new_activity_id}" f"/{new_filename}") data_object = NmdcDataObject( @@ -372,8 +373,8 @@ def _make_new_data_object(self, omics_processing_id: str, name=template["name"].replace("{id}", omics_processing_id), description=new_description, type="nmdc:Data_Object", - file_size_bytes=data_object_rec["file_size_bytes"], - md5_checksum=data_object_rec["md5_checksum"], + file_size_bytes=data_object_record["file_size_bytes"], + md5_checksum=data_object_record["md5_checksum"], url=new_url, data_object_type=data_object_type, ) diff --git a/nmdc_automation/re_iding/scripts/data/dryrun_data/results/nmdc:omprc-11-bn8jcq58/nmdc:wfmgas-11-3epd0s35/nmdc_wfmgas-11-3epd0s35_scaffolds.fna b/nmdc_automation/re_iding/scripts/data/dryrun_data/results/nmdc:omprc-11-bn8jcq58/nmdc:wfmgas-11-3epd0s35/nmdc_wfmgas-11-3epd0s35_scaffolds.fna new file mode 100644 index 00000000..93ef5e29 --- /dev/null +++ b/nmdc_automation/re_iding/scripts/data/dryrun_data/results/nmdc:omprc-11-bn8jcq58/nmdc:wfmgas-11-3epd0s35/nmdc_wfmgas-11-3epd0s35_scaffolds.fna @@ -0,0 +1,2 @@ +nmdc:wfmgas-11-3epd0s35>nmdc:wfmgas-11-3epd0s35snmdc:wfmgas-11-3epd0s35enmdc:wfmgas-11-3epd0s35qnmdc:wfmgas-11-3epd0s35unmdc:wfmgas-11-3epd0s35enmdc:wfmgas-11-3epd0s35nnmdc:wfmgas-11-3epd0s35cnmdc:wfmgas-11-3epd0s35enmdc:wfmgas-11-3epd0s35Inmdc:wfmgas-11-3epd0s35Dnmdc:wfmgas-11-3epd0s35-nmdc:wfmgas-11-3epd0s350nmdc:wfmgas-11-3epd0s350nmdc:wfmgas-11-3epd0s351nmdc:wfmgas-11-3epd0s35 nmdc:wfmgas-11-3epd0s35dnmdc:wfmgas-11-3epd0s35enmdc:wfmgas-11-3epd0s35snmdc:wfmgas-11-3epd0s35cnmdc:wfmgas-11-3epd0s35rnmdc:wfmgas-11-3epd0s35inmdc:wfmgas-11-3epd0s35pnmdc:wfmgas-11-3epd0s35tnmdc:wfmgas-11-3epd0s35inmdc:wfmgas-11-3epd0s35onmdc:wfmgas-11-3epd0s35nnmdc:wfmgas-11-3epd0s35 +nmdc:wfmgas-11-3epd0s35ACGT diff --git a/nmdc_automation/re_iding/scripts/data/dryrun_re_ided_record_dump.json b/nmdc_automation/re_iding/scripts/data/dryrun_re_ided_record_dump.json index ad00210f..95d8bf1c 100644 --- a/nmdc_automation/re_iding/scripts/data/dryrun_re_ided_record_dump.json +++ b/nmdc_automation/re_iding/scripts/data/dryrun_re_ided_record_dump.json @@ -2,176 +2,176 @@ { "data_object_set": [ { - "id": "nmdc:dobj-11-qadypq22", + "id": "nmdc:dobj-11-y15shx96", "name": "9422.8.132674.GTTTCG.fastq.gz", "description": "Raw sequencer read data", "file_size_bytes": 2861414297, "type": "nmdc:DataObject" }, { - "id": "nmdc:dobj-11-r766y759", + "id": "nmdc:dobj-11-26y4jr47", "name": "Reads QC result fastq (clean data)", "description": "Filtered Reads for nmdc:omprc-11-bn8jcq58", "file_size_bytes": 2571324879, "md5_checksum": "7bf778baef033d36f118f8591256d6ef", "data_object_type": "Filtered Sequencing Reads", - "url": "./data/dryrun_data/results/nmdc:omprc-11-bn8jcq58/nmdc:wfrqc-11-vap10912/nmdc_wfrqc-11-vap10912_filtered.fastq.gz", + "url": "https://data.microbiomedata.org/data/nmdc:omprc-11-bn8jcq58/nmdc:wfrqc-11-nbsnr811/nmdc_wfrqc-11-nbsnr811_filtered.fastq.gz", "type": "nmdc:Data_Object" }, { - "id": "nmdc:dobj-11-k0bzwg32", + "id": "nmdc:dobj-11-gf5yhj50", "name": "Reads QC summary statistics", "description": "Filtered Stats for nmdc:omprc-11-bn8jcq58", "file_size_bytes": 290, "md5_checksum": "b99ce8adc125c95f0bfdadf36a3f6848", "data_object_type": "QC Statistics", - "url": "./data/dryrun_data/results/nmdc:omprc-11-bn8jcq58/nmdc:wfrqc-11-vap10912/nmdc_wfrqc-11-vap10912_filterStats.txt", + "url": "https://data.microbiomedata.org/data/nmdc:omprc-11-bn8jcq58/nmdc:wfrqc-11-nbsnr811/nmdc_wfrqc-11-nbsnr811_filterStats.txt", "type": "nmdc:Data_Object" }, { - "id": "nmdc:dobj-11-1dqfzb38", + "id": "nmdc:dobj-11-wc9qnv48", "name": "Final assembly contigs fasta", "description": "Assembled contigs fasta for nmdc:omprc-11-bn8jcq58", "file_size_bytes": 336, - "md5_checksum": "400d8dfa58189f35a3e754526470bf66", + "md5_checksum": "569e24922046a04cb49eeae4ac973c9d", "data_object_type": "Assembly Contigs", - "url": "./data/dryrun_data/results/nmdc:omprc-11-bn8jcq58/nmdc:wfmgas-11-7btyf478/nmdc_wfmgas-11-7btyf478_contigs.fna", + "url": "https://data.microbiomedata.org/data/nmdc:omprc-11-bn8jcq58/nmdc:wfmgas-11-3epd0s35/nmdc_wfmgas-11-3epd0s35_contigs.fna", "type": "nmdc:Data_Object" }, { - "id": "nmdc:dobj-11-9e1jhw75", + "id": "nmdc:dobj-11-97rsby06", "name": "Final assembly scaffolds fasta", "description": "Assembled scaffold fasta for nmdc:omprc-11-bn8jcq58", "file_size_bytes": 700, - "md5_checksum": "37c3fbca7955c4095feeada8325a2dc5", + "md5_checksum": "4d0838d90aa7dec33f463c0d9a9148fe", "data_object_type": "Assembly Scaffolds", - "url": "./data/dryrun_data/results/nmdc:omprc-11-bn8jcq58/nmdc:wfmgas-11-7btyf478/nmdc_wfmgas-11-7btyf478_scaffolds.fna", + "url": "https://data.microbiomedata.org/data/nmdc:omprc-11-bn8jcq58/nmdc:wfmgas-11-3epd0s35/nmdc_wfmgas-11-3epd0s35_scaffolds.fna", "type": "nmdc:Data_Object" }, { - "id": "nmdc:dobj-11-7xk5gf96", + "id": "nmdc:dobj-11-2740a986", "name": "Assembled contigs coverage information", "description": "Metagenome Contig Coverage Stats for nmdc:omprc-11-bn8jcq58", "file_size_bytes": 0, "md5_checksum": "d41d8cd98f00b204e9800998ecf8427e", "data_object_type": "Assembly Coverage Stats", - "url": "./data/dryrun_data/results/nmdc:omprc-11-bn8jcq58/nmdc:wfmgas-11-7btyf478/nmdc_wfmgas-11-7btyf478_covstats.txt", + "url": "https://data.microbiomedata.org/data/nmdc:omprc-11-bn8jcq58/nmdc:wfmgas-11-3epd0s35/nmdc_wfmgas-11-3epd0s35_covstats.txt", "type": "nmdc:Data_Object" }, { - "id": "nmdc:dobj-11-w351h143", + "id": "nmdc:dobj-11-yad0hz45", "name": "An AGP format file that describes the assembly", "description": "Assembled AGP file for nmdc:omprc-11-bn8jcq58", "file_size_bytes": 0, "md5_checksum": "d41d8cd98f00b204e9800998ecf8427e", "data_object_type": "Assembly AGP", - "url": "./data/dryrun_data/results/nmdc:omprc-11-bn8jcq58/nmdc:wfmgas-11-7btyf478/nmdc_wfmgas-11-7btyf478_assembly.agp", + "url": "https://data.microbiomedata.org/data/nmdc:omprc-11-bn8jcq58/nmdc:wfmgas-11-3epd0s35/nmdc_wfmgas-11-3epd0s35_assembly.agp", "type": "nmdc:Data_Object" }, { - "id": "nmdc:dobj-11-2jxf0086", + "id": "nmdc:dobj-11-0yrnp384", "name": "Sorted bam file of reads mapping back to the final assembly", "description": "Metagenome Alignment BAM file for nmdc:omprc-11-bn8jcq58", "file_size_bytes": 0, "md5_checksum": "d41d8cd98f00b204e9800998ecf8427e", "data_object_type": "Assembly Coverage BAM", - "url": "./data/dryrun_data/results/nmdc:omprc-11-bn8jcq58/nmdc:wfmgas-11-7btyf478/nmdc_wfmgas-11-7btyf478_pairedMapped_sorted.bam", + "url": "https://data.microbiomedata.org/data/nmdc:omprc-11-bn8jcq58/nmdc:wfmgas-11-3epd0s35/nmdc_wfmgas-11-3epd0s35_pairedMapped_sorted.bam", "type": "nmdc:Data_Object" }, { - "id": "nmdc:dobj-11-101fqh28", + "id": "nmdc:dobj-11-jrh4wg24", "name": "GOTTCHA2 classification report file", "description": "Gottcha2 TSV report for nmdc:omprc-11-bn8jcq58", "file_size_bytes": 13174, "md5_checksum": "bc7c1bda004aab357c8f6cf5a42242f9", "data_object_type": "GOTTCHA2 Classification Report", - "url": "./data/dryrun_data/results/nmdc:omprc-11-bn8jcq58/nmdc:wfrbt-11-k6swv922/nmdc_wfrbt-11-k6swv922_gottcha2_report.tsv", + "url": "https://data.microbiomedata.org/data/nmdc:omprc-11-bn8jcq58/nmdc:wfrbt-11-e30a2d23/nmdc_wfrbt-11-e30a2d23_gottcha2_report.tsv", "type": "nmdc:Data_Object" }, { - "id": "nmdc:dobj-11-a1vwvh45", + "id": "nmdc:dobj-11-j9dwnq42", "name": "GOTTCHA2 report file", "description": "Gottcha2 full TSV report for nmdc:omprc-11-bn8jcq58", "file_size_bytes": 1035818, "md5_checksum": "9481434cadd0d6c154e2ec4c11ef0e04", "data_object_type": "GOTTCHA2 Report Full", - "url": "./data/dryrun_data/results/nmdc:omprc-11-bn8jcq58/nmdc:wfrbt-11-k6swv922/nmdc_wfrbt-11-k6swv922_gottcha2_report_full.tsv", + "url": "https://data.microbiomedata.org/data/nmdc:omprc-11-bn8jcq58/nmdc:wfrbt-11-e30a2d23/nmdc_wfrbt-11-e30a2d23_gottcha2_report_full.tsv", "type": "nmdc:Data_Object" }, { - "id": "nmdc:dobj-11-28b66z83", + "id": "nmdc:dobj-11-x0gyg366", "name": "GOTTCHA2 krona plot HTML file", "description": "Gottcha2 Krona HTML report for nmdc:omprc-11-bn8jcq58", "file_size_bytes": 262669, "md5_checksum": "6b5bc6ce7f11c1336a5f85a98fc18541", "data_object_type": "GOTTCHA2 Krona Plot", - "url": "./data/dryrun_data/results/nmdc:omprc-11-bn8jcq58/nmdc:wfrbt-11-k6swv922/nmdc_wfrbt-11-k6swv922_gottcha2_krona.html", + "url": "https://data.microbiomedata.org/data/nmdc:omprc-11-bn8jcq58/nmdc:wfrbt-11-e30a2d23/nmdc_wfrbt-11-e30a2d23_gottcha2_krona.html", "type": "nmdc:Data_Object" }, { - "id": "nmdc:dobj-11-bexrzt65", + "id": "nmdc:dobj-11-snpybq20", "name": "Centrifuge output read classification file", "description": "Centrifuge classification TSV report for nmdc:omprc-11-bn8jcq58", "file_size_bytes": 2189843623, "md5_checksum": "933c71bbc2f4a2e84d50f0d3864cf940", "data_object_type": "Centrifuge Taxonomic Classification", - "url": "./data/dryrun_data/results/nmdc:omprc-11-bn8jcq58/nmdc:wfrbt-11-k6swv922/nmdc_wfrbt-11-k6swv922_centrifuge_classification.tsv", + "url": "https://data.microbiomedata.org/data/nmdc:omprc-11-bn8jcq58/nmdc:wfrbt-11-e30a2d23/nmdc_wfrbt-11-e30a2d23_centrifuge_classification.tsv", "type": "nmdc:Data_Object" }, { - "id": "nmdc:dobj-11-2ep63x87", + "id": "nmdc:dobj-11-gf3mxy83", "name": "Centrifuge output report file", "description": "Centrifuge TSV report for nmdc:omprc-11-bn8jcq58", "file_size_bytes": 260134, "md5_checksum": "1a208e2519770ef50740ac39f1b9ba9a", "data_object_type": "Centrifuge Classification Report", - "url": "./data/dryrun_data/results/nmdc:omprc-11-bn8jcq58/nmdc:wfrbt-11-k6swv922/nmdc_wfrbt-11-k6swv922_centrifuge_report.tsv", + "url": "https://data.microbiomedata.org/data/nmdc:omprc-11-bn8jcq58/nmdc:wfrbt-11-e30a2d23/nmdc_wfrbt-11-e30a2d23_centrifuge_report.tsv", "type": "nmdc:Data_Object" }, { - "id": "nmdc:dobj-11-jza95a63", + "id": "nmdc:dobj-11-8tg86t47", "name": "Centrifug krona plot HTML file", "description": "Centrifuge Krona HTML report for nmdc:omprc-11-bn8jcq58", "file_size_bytes": 2343980, "md5_checksum": "f112a3840464ae7a9cf4a3bf295edd5c", "data_object_type": "Centrifuge Krona Plot", - "url": "./data/dryrun_data/results/nmdc:omprc-11-bn8jcq58/nmdc:wfrbt-11-k6swv922/nmdc_wfrbt-11-k6swv922_centrifuge_krona.html", + "url": "https://data.microbiomedata.org/data/nmdc:omprc-11-bn8jcq58/nmdc:wfrbt-11-e30a2d23/nmdc_wfrbt-11-e30a2d23_centrifuge_krona.html", "type": "nmdc:Data_Object" }, { - "id": "nmdc:dobj-11-a0babn50", + "id": "nmdc:dobj-11-2xpzrv62", "name": "Kraken2 output read classification file", "description": "Kraken classification TSV report for nmdc:omprc-11-bn8jcq58", "file_size_bytes": 1785563917, "md5_checksum": "7ca01ea379f0baed96f87d1435925f95", "data_object_type": "Kraken2 Taxonomic Classification", - "url": "./data/dryrun_data/results/nmdc:omprc-11-bn8jcq58/nmdc:wfrbt-11-k6swv922/nmdc_wfrbt-11-k6swv922_kraken2_classification.tsv", + "url": "https://data.microbiomedata.org/data/nmdc:omprc-11-bn8jcq58/nmdc:wfrbt-11-e30a2d23/nmdc_wfrbt-11-e30a2d23_kraken2_classification.tsv", "type": "nmdc:Data_Object" }, { - "id": "nmdc:dobj-11-1y04j389", + "id": "nmdc:dobj-11-qspth853", "name": "Kraken2 output report file", "description": "Kraken2 TSV report for nmdc:omprc-11-bn8jcq58", "file_size_bytes": 699896, "md5_checksum": "c85f2f2b4a518c4adb23970448a5cb45", "data_object_type": "Kraken2 Classification Report", - "url": "./data/dryrun_data/results/nmdc:omprc-11-bn8jcq58/nmdc:wfrbt-11-k6swv922/nmdc_wfrbt-11-k6swv922_kraken2_report.tsv", + "url": "https://data.microbiomedata.org/data/nmdc:omprc-11-bn8jcq58/nmdc:wfrbt-11-e30a2d23/nmdc_wfrbt-11-e30a2d23_kraken2_report.tsv", "type": "nmdc:Data_Object" }, { - "id": "nmdc:dobj-11-0en1hj05", + "id": "nmdc:dobj-11-9msdy424", "name": "Kraken2 Krona plot HTML file", "description": "Kraken2 Krona HTML report for nmdc:omprc-11-bn8jcq58", "file_size_bytes": 4221977, "md5_checksum": "94ee1bc2dc74830a21d5c3471d6cf223", "data_object_type": "Kraken2 Krona Plot", - "url": "./data/dryrun_data/results/nmdc:omprc-11-bn8jcq58/nmdc:wfrbt-11-k6swv922/nmdc_wfrbt-11-k6swv922_kraken2_krona.html", + "url": "https://data.microbiomedata.org/data/nmdc:omprc-11-bn8jcq58/nmdc:wfrbt-11-e30a2d23/nmdc_wfrbt-11-e30a2d23_kraken2_krona.html", "type": "nmdc:Data_Object" } ], "metagenome_assembly_set": [ { - "id": "nmdc:wfmgas-11-7btyf478", + "id": "nmdc:wfmgas-11-3epd0s35", "name": "Metagenome Assembly Activity for nmdc:omprc-11-bn8jcq58", "started_at_time": "2021-10-11T02:28:26Z", "ended_at_time": "2021-10-11T04:56:04+00:00", @@ -179,15 +179,15 @@ "execution_resource": "NERSC - Perlmutter", "git_url": "https://github.com/microbiomedata/metaAssembly", "has_input": [ - "nmdc:dobj-11-r766y759", - "nmdc:dobj-11-k0bzwg32" + "nmdc:dobj-11-26y4jr47", + "nmdc:dobj-11-gf5yhj50" ], "has_output": [ - "nmdc:dobj-11-1dqfzb38", - "nmdc:dobj-11-9e1jhw75", - "nmdc:dobj-11-7xk5gf96", - "nmdc:dobj-11-w351h143", - "nmdc:dobj-11-2jxf0086" + "nmdc:dobj-11-wc9qnv48", + "nmdc:dobj-11-97rsby06", + "nmdc:dobj-11-2740a986", + "nmdc:dobj-11-yad0hz45", + "nmdc:dobj-11-0yrnp384" ], "type": "nmdc:MetagenomeAssembly", "part_of": [ @@ -234,7 +234,7 @@ "gold:Gp0115663" ], "has_output": [ - "nmdc:dobj-11-qadypq22" + "nmdc:dobj-11-y15shx96" ], "mod_date": "2021-06-15", "ncbi_project_name": "Sand microcosm microbial communities from a hyporheic zone in Columbia River, Washington, USA - GW-RW T2_23-Sept-14", @@ -253,7 +253,7 @@ ], "read_qc_analysis_activity_set": [ { - "id": "nmdc:wfrqc-11-vap10912", + "id": "nmdc:wfrqc-11-nbsnr811", "name": "Read QC Activity for nmdc:omprc-11-bn8jcq58", "started_at_time": "2021-10-11T02:28:26Z", "ended_at_time": "2021-10-11T04:56:04+00:00", @@ -261,11 +261,11 @@ "execution_resource": "NERSC - Perlmutter", "git_url": "https://github.com/microbiomedata/ReadsQC", "has_input": [ - "nmdc:dobj-11-qadypq22" + "nmdc:dobj-11-y15shx96" ], "has_output": [ - "nmdc:dobj-11-r766y759", - "nmdc:dobj-11-k0bzwg32" + "nmdc:dobj-11-26y4jr47", + "nmdc:dobj-11-gf5yhj50" ], "type": "nmdc:ReadQcAnalysisActivity", "part_of": [ @@ -280,7 +280,7 @@ ], "read_based_taxonomy_analysis_activity_set": [ { - "id": "nmdc:wfrbt-11-k6swv922", + "id": "nmdc:wfrbt-11-e30a2d23", "name": "Readbased Taxonomy Analysis Activity for nmdc:omprc-11-bn8jcq58", "started_at_time": "2021-10-11T02:28:26Z", "ended_at_time": "2021-10-11T04:56:04+00:00", @@ -288,19 +288,19 @@ "execution_resource": "NERSC - Perlmutter", "git_url": "https://github.com/microbiomedata/ReadbasedAnalysis", "has_input": [ - "nmdc:dobj-11-r766y759", - "nmdc:dobj-11-k0bzwg32" + "nmdc:dobj-11-26y4jr47", + "nmdc:dobj-11-gf5yhj50" ], "has_output": [ - "nmdc:dobj-11-101fqh28", - "nmdc:dobj-11-a1vwvh45", - "nmdc:dobj-11-28b66z83", - "nmdc:dobj-11-bexrzt65", - "nmdc:dobj-11-2ep63x87", - "nmdc:dobj-11-jza95a63", - "nmdc:dobj-11-a0babn50", - "nmdc:dobj-11-1y04j389", - "nmdc:dobj-11-0en1hj05" + "nmdc:dobj-11-jrh4wg24", + "nmdc:dobj-11-j9dwnq42", + "nmdc:dobj-11-x0gyg366", + "nmdc:dobj-11-snpybq20", + "nmdc:dobj-11-gf3mxy83", + "nmdc:dobj-11-8tg86t47", + "nmdc:dobj-11-2xpzrv62", + "nmdc:dobj-11-qspth853", + "nmdc:dobj-11-9msdy424" ], "type": "nmdc:ReadBasedTaxonomyAnalysisActivity", "part_of": [ diff --git a/nmdc_automation/re_iding/tests/conftest.py b/nmdc_automation/re_iding/tests/conftest.py index 341ac2bc..8141f20f 100644 --- a/nmdc_automation/re_iding/tests/conftest.py +++ b/nmdc_automation/re_iding/tests/conftest.py @@ -9,3 +9,16 @@ def db_record(): """Return a dict of a test Database instance""" with open(TEST_DATA_DIR / "db_record.json", "r") as f: return json.load(f) + +@pytest.fixture +def data_object_record(): + """Return a dict of a test DataObject instance""" + return { + "name": "Gp0115663_Filtered Reads", + "description": "Filtered Reads for Gp0115663", + "data_object_type": "Filtered Sequencing Reads", + "url": "https://data.microbiomedata.org/data/nmdc:mga0h9dt75/qa/nmdc_mga0h9dt75_filtered.fastq.gz", + "md5_checksum": "7bf778baef033d36f118f8591256d6ef", + "id": "nmdc:7bf778baef033d36f118f8591256d6ef", + "file_size_bytes": 2571324879 + } diff --git a/nmdc_automation/re_iding/tests/test_re_iding_base.py b/nmdc_automation/re_iding/tests/test_re_iding_base.py index 2459fc37..53cca23f 100644 --- a/nmdc_automation/re_iding/tests/test_re_iding_base.py +++ b/nmdc_automation/re_iding/tests/test_re_iding_base.py @@ -1,12 +1,14 @@ # nmdc_automation/re_iding/tests/test_re_iding_base.py +from pathlib import Path import pytest_mock from nmdc_automation.api import NmdcRuntimeApi from nmdc_schema.nmdc import Database as NmdcDatabase +from nmdc_schema.nmdc import DataObject as NmdcDataObject from nmdc_automation.re_iding.base import ReIdTool - +TEST_DATAFILE_DIR = "./test_data/results" def test_update_omics_processing_has_output(db_record, mocker): """ @@ -16,7 +18,7 @@ def test_update_omics_processing_has_output(db_record, mocker): exp_do_id = "nmdc:dobj-1234-abcd12345" mock_api = mocker.Mock(spec=NmdcRuntimeApi) mock_api.minter.return_value = exp_do_id - reid_tool = ReIdTool(mock_api) + reid_tool = ReIdTool(mock_api, TEST_DATAFILE_DIR) new_db = NmdcDatabase() new_db = reid_tool.update_omics_processing_has_output(db_record, new_db) assert isinstance(new_db, NmdcDatabase) @@ -25,3 +27,23 @@ def test_update_omics_processing_has_output(db_record, mocker): assert new_db.omics_processing_set[0].has_output[0] == exp_do_id +def test_make_new_data_object(data_object_record, mocker): + """ + Test that we can make a new DataObject with a new ID and correct + URL and Path attributes. + """ + exp_do_id = "nmdc:dobj-1234-abcd12345" + exp_url = 'https://data.microbiomedata.org/data/nmdc:omics_processing-1234-abcd12345/nmdc:activity-1234-abcd12345/nmdc_activity-1234-abcd12345_filtered.fastq.gz' + mock_api = mocker.Mock(spec=NmdcRuntimeApi) + mock_api.minter.return_value = exp_do_id + reid_tool = ReIdTool(mock_api, TEST_DATAFILE_DIR) + new_do = reid_tool.make_new_data_object( + omics_processing_id="nmdc:omics_processing-1234-abcd12345", + activity_type="nmdc:ReadQcAnalysisActivity", + new_activity_id="nmdc:activity-1234-abcd12345", + data_object_record=data_object_record, + data_object_type="Filtered Sequencing Reads", + ) + assert isinstance(new_do, NmdcDataObject) + assert new_do.id == exp_do_id + assert new_do.url == exp_url From 0b32b071cdc1aa69434c4c7b7fe42cde22782579 Mon Sep 17 00:00:00 2001 From: Michael Thornton Date: Wed, 15 Nov 2023 11:27:47 -0800 Subject: [PATCH 66/91] updated processed dry run output --- .../data/dryrun_re_ided_record_dump.json | 118 +++++++++--------- 1 file changed, 58 insertions(+), 60 deletions(-) diff --git a/nmdc_automation/re_iding/scripts/data/dryrun_re_ided_record_dump.json b/nmdc_automation/re_iding/scripts/data/dryrun_re_ided_record_dump.json index 95d8bf1c..4972881a 100644 --- a/nmdc_automation/re_iding/scripts/data/dryrun_re_ided_record_dump.json +++ b/nmdc_automation/re_iding/scripts/data/dryrun_re_ided_record_dump.json @@ -2,176 +2,176 @@ { "data_object_set": [ { - "id": "nmdc:dobj-11-y15shx96", + "id": "nmdc:dobj-11-y134xn31", "name": "9422.8.132674.GTTTCG.fastq.gz", "description": "Raw sequencer read data", "file_size_bytes": 2861414297, "type": "nmdc:DataObject" }, { - "id": "nmdc:dobj-11-26y4jr47", + "id": "nmdc:dobj-11-dz3wvm61", "name": "Reads QC result fastq (clean data)", "description": "Filtered Reads for nmdc:omprc-11-bn8jcq58", "file_size_bytes": 2571324879, "md5_checksum": "7bf778baef033d36f118f8591256d6ef", "data_object_type": "Filtered Sequencing Reads", - "url": "https://data.microbiomedata.org/data/nmdc:omprc-11-bn8jcq58/nmdc:wfrqc-11-nbsnr811/nmdc_wfrqc-11-nbsnr811_filtered.fastq.gz", + "url": "https://data.microbiomedata.org/data/nmdc:omprc-11-bn8jcq58/nmdc:wfrqc-11-8nssbh35/nmdc_wfrqc-11-8nssbh35_filtered.fastq.gz", "type": "nmdc:Data_Object" }, { - "id": "nmdc:dobj-11-gf5yhj50", + "id": "nmdc:dobj-11-1pp19646", "name": "Reads QC summary statistics", "description": "Filtered Stats for nmdc:omprc-11-bn8jcq58", "file_size_bytes": 290, "md5_checksum": "b99ce8adc125c95f0bfdadf36a3f6848", "data_object_type": "QC Statistics", - "url": "https://data.microbiomedata.org/data/nmdc:omprc-11-bn8jcq58/nmdc:wfrqc-11-nbsnr811/nmdc_wfrqc-11-nbsnr811_filterStats.txt", + "url": "https://data.microbiomedata.org/data/nmdc:omprc-11-bn8jcq58/nmdc:wfrqc-11-8nssbh35/nmdc_wfrqc-11-8nssbh35_filterStats.txt", "type": "nmdc:Data_Object" }, { - "id": "nmdc:dobj-11-wc9qnv48", + "id": "nmdc:dobj-11-vfk56697", "name": "Final assembly contigs fasta", "description": "Assembled contigs fasta for nmdc:omprc-11-bn8jcq58", "file_size_bytes": 336, - "md5_checksum": "569e24922046a04cb49eeae4ac973c9d", + "md5_checksum": "d4c4941391714c4d337bdf3444b2f108", "data_object_type": "Assembly Contigs", - "url": "https://data.microbiomedata.org/data/nmdc:omprc-11-bn8jcq58/nmdc:wfmgas-11-3epd0s35/nmdc_wfmgas-11-3epd0s35_contigs.fna", + "url": "https://data.microbiomedata.org/data/nmdc:omprc-11-bn8jcq58/nmdc:wfmgas-11-ryns7t13/nmdc_wfmgas-11-ryns7t13_contigs.fna", "type": "nmdc:Data_Object" }, { - "id": "nmdc:dobj-11-97rsby06", + "id": "nmdc:dobj-11-pdqkv709", "name": "Final assembly scaffolds fasta", "description": "Assembled scaffold fasta for nmdc:omprc-11-bn8jcq58", "file_size_bytes": 700, - "md5_checksum": "4d0838d90aa7dec33f463c0d9a9148fe", + "md5_checksum": "a57129f6b1ac8b75a42f4ab610b4c20a", "data_object_type": "Assembly Scaffolds", - "url": "https://data.microbiomedata.org/data/nmdc:omprc-11-bn8jcq58/nmdc:wfmgas-11-3epd0s35/nmdc_wfmgas-11-3epd0s35_scaffolds.fna", + "url": "https://data.microbiomedata.org/data/nmdc:omprc-11-bn8jcq58/nmdc:wfmgas-11-ryns7t13/nmdc_wfmgas-11-ryns7t13_scaffolds.fna", "type": "nmdc:Data_Object" }, { - "id": "nmdc:dobj-11-2740a986", + "id": "nmdc:dobj-11-ry36ec34", "name": "Assembled contigs coverage information", "description": "Metagenome Contig Coverage Stats for nmdc:omprc-11-bn8jcq58", "file_size_bytes": 0, "md5_checksum": "d41d8cd98f00b204e9800998ecf8427e", "data_object_type": "Assembly Coverage Stats", - "url": "https://data.microbiomedata.org/data/nmdc:omprc-11-bn8jcq58/nmdc:wfmgas-11-3epd0s35/nmdc_wfmgas-11-3epd0s35_covstats.txt", + "url": "https://data.microbiomedata.org/data/nmdc:omprc-11-bn8jcq58/nmdc:wfmgas-11-ryns7t13/nmdc_wfmgas-11-ryns7t13_covstats.txt", "type": "nmdc:Data_Object" }, { - "id": "nmdc:dobj-11-yad0hz45", + "id": "nmdc:dobj-11-v2dcgb32", "name": "An AGP format file that describes the assembly", "description": "Assembled AGP file for nmdc:omprc-11-bn8jcq58", "file_size_bytes": 0, "md5_checksum": "d41d8cd98f00b204e9800998ecf8427e", "data_object_type": "Assembly AGP", - "url": "https://data.microbiomedata.org/data/nmdc:omprc-11-bn8jcq58/nmdc:wfmgas-11-3epd0s35/nmdc_wfmgas-11-3epd0s35_assembly.agp", + "url": "https://data.microbiomedata.org/data/nmdc:omprc-11-bn8jcq58/nmdc:wfmgas-11-ryns7t13/nmdc_wfmgas-11-ryns7t13_assembly.agp", "type": "nmdc:Data_Object" }, { - "id": "nmdc:dobj-11-0yrnp384", + "id": "nmdc:dobj-11-7f8z6a09", "name": "Sorted bam file of reads mapping back to the final assembly", "description": "Metagenome Alignment BAM file for nmdc:omprc-11-bn8jcq58", "file_size_bytes": 0, "md5_checksum": "d41d8cd98f00b204e9800998ecf8427e", "data_object_type": "Assembly Coverage BAM", - "url": "https://data.microbiomedata.org/data/nmdc:omprc-11-bn8jcq58/nmdc:wfmgas-11-3epd0s35/nmdc_wfmgas-11-3epd0s35_pairedMapped_sorted.bam", + "url": "https://data.microbiomedata.org/data/nmdc:omprc-11-bn8jcq58/nmdc:wfmgas-11-ryns7t13/nmdc_wfmgas-11-ryns7t13_pairedMapped_sorted.bam", "type": "nmdc:Data_Object" }, { - "id": "nmdc:dobj-11-jrh4wg24", + "id": "nmdc:dobj-11-7q03xt64", "name": "GOTTCHA2 classification report file", "description": "Gottcha2 TSV report for nmdc:omprc-11-bn8jcq58", "file_size_bytes": 13174, "md5_checksum": "bc7c1bda004aab357c8f6cf5a42242f9", "data_object_type": "GOTTCHA2 Classification Report", - "url": "https://data.microbiomedata.org/data/nmdc:omprc-11-bn8jcq58/nmdc:wfrbt-11-e30a2d23/nmdc_wfrbt-11-e30a2d23_gottcha2_report.tsv", + "url": "https://data.microbiomedata.org/data/nmdc:omprc-11-bn8jcq58/nmdc:wfrbt-11-mxr19q91/nmdc_wfrbt-11-mxr19q91_gottcha2_report.tsv", "type": "nmdc:Data_Object" }, { - "id": "nmdc:dobj-11-j9dwnq42", + "id": "nmdc:dobj-11-qd17za87", "name": "GOTTCHA2 report file", "description": "Gottcha2 full TSV report for nmdc:omprc-11-bn8jcq58", "file_size_bytes": 1035818, "md5_checksum": "9481434cadd0d6c154e2ec4c11ef0e04", "data_object_type": "GOTTCHA2 Report Full", - "url": "https://data.microbiomedata.org/data/nmdc:omprc-11-bn8jcq58/nmdc:wfrbt-11-e30a2d23/nmdc_wfrbt-11-e30a2d23_gottcha2_report_full.tsv", + "url": "https://data.microbiomedata.org/data/nmdc:omprc-11-bn8jcq58/nmdc:wfrbt-11-mxr19q91/nmdc_wfrbt-11-mxr19q91_gottcha2_report_full.tsv", "type": "nmdc:Data_Object" }, { - "id": "nmdc:dobj-11-x0gyg366", + "id": "nmdc:dobj-11-f0pzg444", "name": "GOTTCHA2 krona plot HTML file", "description": "Gottcha2 Krona HTML report for nmdc:omprc-11-bn8jcq58", "file_size_bytes": 262669, "md5_checksum": "6b5bc6ce7f11c1336a5f85a98fc18541", "data_object_type": "GOTTCHA2 Krona Plot", - "url": "https://data.microbiomedata.org/data/nmdc:omprc-11-bn8jcq58/nmdc:wfrbt-11-e30a2d23/nmdc_wfrbt-11-e30a2d23_gottcha2_krona.html", + "url": "https://data.microbiomedata.org/data/nmdc:omprc-11-bn8jcq58/nmdc:wfrbt-11-mxr19q91/nmdc_wfrbt-11-mxr19q91_gottcha2_krona.html", "type": "nmdc:Data_Object" }, { - "id": "nmdc:dobj-11-snpybq20", + "id": "nmdc:dobj-11-kmysd345", "name": "Centrifuge output read classification file", "description": "Centrifuge classification TSV report for nmdc:omprc-11-bn8jcq58", "file_size_bytes": 2189843623, "md5_checksum": "933c71bbc2f4a2e84d50f0d3864cf940", "data_object_type": "Centrifuge Taxonomic Classification", - "url": "https://data.microbiomedata.org/data/nmdc:omprc-11-bn8jcq58/nmdc:wfrbt-11-e30a2d23/nmdc_wfrbt-11-e30a2d23_centrifuge_classification.tsv", + "url": "https://data.microbiomedata.org/data/nmdc:omprc-11-bn8jcq58/nmdc:wfrbt-11-mxr19q91/nmdc_wfrbt-11-mxr19q91_centrifuge_classification.tsv", "type": "nmdc:Data_Object" }, { - "id": "nmdc:dobj-11-gf3mxy83", + "id": "nmdc:dobj-11-mgdfaj98", "name": "Centrifuge output report file", "description": "Centrifuge TSV report for nmdc:omprc-11-bn8jcq58", "file_size_bytes": 260134, "md5_checksum": "1a208e2519770ef50740ac39f1b9ba9a", "data_object_type": "Centrifuge Classification Report", - "url": "https://data.microbiomedata.org/data/nmdc:omprc-11-bn8jcq58/nmdc:wfrbt-11-e30a2d23/nmdc_wfrbt-11-e30a2d23_centrifuge_report.tsv", + "url": "https://data.microbiomedata.org/data/nmdc:omprc-11-bn8jcq58/nmdc:wfrbt-11-mxr19q91/nmdc_wfrbt-11-mxr19q91_centrifuge_report.tsv", "type": "nmdc:Data_Object" }, { - "id": "nmdc:dobj-11-8tg86t47", + "id": "nmdc:dobj-11-g38md352", "name": "Centrifug krona plot HTML file", "description": "Centrifuge Krona HTML report for nmdc:omprc-11-bn8jcq58", "file_size_bytes": 2343980, "md5_checksum": "f112a3840464ae7a9cf4a3bf295edd5c", "data_object_type": "Centrifuge Krona Plot", - "url": "https://data.microbiomedata.org/data/nmdc:omprc-11-bn8jcq58/nmdc:wfrbt-11-e30a2d23/nmdc_wfrbt-11-e30a2d23_centrifuge_krona.html", + "url": "https://data.microbiomedata.org/data/nmdc:omprc-11-bn8jcq58/nmdc:wfrbt-11-mxr19q91/nmdc_wfrbt-11-mxr19q91_centrifuge_krona.html", "type": "nmdc:Data_Object" }, { - "id": "nmdc:dobj-11-2xpzrv62", + "id": "nmdc:dobj-11-d67rkh18", "name": "Kraken2 output read classification file", "description": "Kraken classification TSV report for nmdc:omprc-11-bn8jcq58", "file_size_bytes": 1785563917, "md5_checksum": "7ca01ea379f0baed96f87d1435925f95", "data_object_type": "Kraken2 Taxonomic Classification", - "url": "https://data.microbiomedata.org/data/nmdc:omprc-11-bn8jcq58/nmdc:wfrbt-11-e30a2d23/nmdc_wfrbt-11-e30a2d23_kraken2_classification.tsv", + "url": "https://data.microbiomedata.org/data/nmdc:omprc-11-bn8jcq58/nmdc:wfrbt-11-mxr19q91/nmdc_wfrbt-11-mxr19q91_kraken2_classification.tsv", "type": "nmdc:Data_Object" }, { - "id": "nmdc:dobj-11-qspth853", + "id": "nmdc:dobj-11-ptchwj55", "name": "Kraken2 output report file", "description": "Kraken2 TSV report for nmdc:omprc-11-bn8jcq58", "file_size_bytes": 699896, "md5_checksum": "c85f2f2b4a518c4adb23970448a5cb45", "data_object_type": "Kraken2 Classification Report", - "url": "https://data.microbiomedata.org/data/nmdc:omprc-11-bn8jcq58/nmdc:wfrbt-11-e30a2d23/nmdc_wfrbt-11-e30a2d23_kraken2_report.tsv", + "url": "https://data.microbiomedata.org/data/nmdc:omprc-11-bn8jcq58/nmdc:wfrbt-11-mxr19q91/nmdc_wfrbt-11-mxr19q91_kraken2_report.tsv", "type": "nmdc:Data_Object" }, { - "id": "nmdc:dobj-11-9msdy424", + "id": "nmdc:dobj-11-d0qt9513", "name": "Kraken2 Krona plot HTML file", "description": "Kraken2 Krona HTML report for nmdc:omprc-11-bn8jcq58", "file_size_bytes": 4221977, "md5_checksum": "94ee1bc2dc74830a21d5c3471d6cf223", "data_object_type": "Kraken2 Krona Plot", - "url": "https://data.microbiomedata.org/data/nmdc:omprc-11-bn8jcq58/nmdc:wfrbt-11-e30a2d23/nmdc_wfrbt-11-e30a2d23_kraken2_krona.html", + "url": "https://data.microbiomedata.org/data/nmdc:omprc-11-bn8jcq58/nmdc:wfrbt-11-mxr19q91/nmdc_wfrbt-11-mxr19q91_kraken2_krona.html", "type": "nmdc:Data_Object" } ], "metagenome_assembly_set": [ { - "id": "nmdc:wfmgas-11-3epd0s35", + "id": "nmdc:wfmgas-11-ryns7t13", "name": "Metagenome Assembly Activity for nmdc:omprc-11-bn8jcq58", "started_at_time": "2021-10-11T02:28:26Z", "ended_at_time": "2021-10-11T04:56:04+00:00", @@ -179,15 +179,14 @@ "execution_resource": "NERSC - Perlmutter", "git_url": "https://github.com/microbiomedata/metaAssembly", "has_input": [ - "nmdc:dobj-11-26y4jr47", - "nmdc:dobj-11-gf5yhj50" + "nmdc:dobj-11-dz3wvm61" ], "has_output": [ - "nmdc:dobj-11-wc9qnv48", - "nmdc:dobj-11-97rsby06", - "nmdc:dobj-11-2740a986", - "nmdc:dobj-11-yad0hz45", - "nmdc:dobj-11-0yrnp384" + "nmdc:dobj-11-vfk56697", + "nmdc:dobj-11-pdqkv709", + "nmdc:dobj-11-ry36ec34", + "nmdc:dobj-11-v2dcgb32", + "nmdc:dobj-11-7f8z6a09" ], "type": "nmdc:MetagenomeAssembly", "part_of": [ @@ -234,7 +233,7 @@ "gold:Gp0115663" ], "has_output": [ - "nmdc:dobj-11-y15shx96" + "nmdc:dobj-11-y134xn31" ], "mod_date": "2021-06-15", "ncbi_project_name": "Sand microcosm microbial communities from a hyporheic zone in Columbia River, Washington, USA - GW-RW T2_23-Sept-14", @@ -253,7 +252,7 @@ ], "read_qc_analysis_activity_set": [ { - "id": "nmdc:wfrqc-11-nbsnr811", + "id": "nmdc:wfrqc-11-8nssbh35", "name": "Read QC Activity for nmdc:omprc-11-bn8jcq58", "started_at_time": "2021-10-11T02:28:26Z", "ended_at_time": "2021-10-11T04:56:04+00:00", @@ -261,11 +260,11 @@ "execution_resource": "NERSC - Perlmutter", "git_url": "https://github.com/microbiomedata/ReadsQC", "has_input": [ - "nmdc:dobj-11-y15shx96" + "nmdc:dobj-11-y134xn31" ], "has_output": [ - "nmdc:dobj-11-26y4jr47", - "nmdc:dobj-11-gf5yhj50" + "nmdc:dobj-11-dz3wvm61", + "nmdc:dobj-11-1pp19646" ], "type": "nmdc:ReadQcAnalysisActivity", "part_of": [ @@ -280,7 +279,7 @@ ], "read_based_taxonomy_analysis_activity_set": [ { - "id": "nmdc:wfrbt-11-e30a2d23", + "id": "nmdc:wfrbt-11-mxr19q91", "name": "Readbased Taxonomy Analysis Activity for nmdc:omprc-11-bn8jcq58", "started_at_time": "2021-10-11T02:28:26Z", "ended_at_time": "2021-10-11T04:56:04+00:00", @@ -288,19 +287,18 @@ "execution_resource": "NERSC - Perlmutter", "git_url": "https://github.com/microbiomedata/ReadbasedAnalysis", "has_input": [ - "nmdc:dobj-11-26y4jr47", - "nmdc:dobj-11-gf5yhj50" + "nmdc:dobj-11-dz3wvm61" ], "has_output": [ - "nmdc:dobj-11-jrh4wg24", - "nmdc:dobj-11-j9dwnq42", - "nmdc:dobj-11-x0gyg366", - "nmdc:dobj-11-snpybq20", - "nmdc:dobj-11-gf3mxy83", - "nmdc:dobj-11-8tg86t47", - "nmdc:dobj-11-2xpzrv62", - "nmdc:dobj-11-qspth853", - "nmdc:dobj-11-9msdy424" + "nmdc:dobj-11-7q03xt64", + "nmdc:dobj-11-qd17za87", + "nmdc:dobj-11-f0pzg444", + "nmdc:dobj-11-kmysd345", + "nmdc:dobj-11-mgdfaj98", + "nmdc:dobj-11-g38md352", + "nmdc:dobj-11-d67rkh18", + "nmdc:dobj-11-ptchwj55", + "nmdc:dobj-11-d0qt9513" ], "type": "nmdc:ReadBasedTaxonomyAnalysisActivity", "part_of": [ From 609d71c1afa9b3487b93fc85a396ef93a51e2a9f Mon Sep 17 00:00:00 2001 From: Michael Thornton Date: Wed, 15 Nov 2023 12:51:37 -0800 Subject: [PATCH 67/91] extracted workflow records for Gs0114675 sty-11-8ft6t785 --- ...ty-11-8fb6t785_associated_record_dump.json | 26756 ++++++++++++++++ 1 file changed, 26756 insertions(+) create mode 100644 nmdc_automation/re_iding/scripts/data/nmdc:sty-11-8fb6t785_associated_record_dump.json diff --git a/nmdc_automation/re_iding/scripts/data/nmdc:sty-11-8fb6t785_associated_record_dump.json b/nmdc_automation/re_iding/scripts/data/nmdc:sty-11-8fb6t785_associated_record_dump.json new file mode 100644 index 00000000..bb91c6e2 --- /dev/null +++ b/nmdc_automation/re_iding/scripts/data/nmdc:sty-11-8fb6t785_associated_record_dump.json @@ -0,0 +1,26756 @@ +[ + { + "data_object_set": [ + { + "description": "Raw sequencer read data", + "file_size_bytes": 4551741658, + "type": "nmdc:DataObject", + "id": "jgi:560df5b90d878540fd6fe202", + "name": "9567.8.137569.TAGGCAT-AGAGTAG.fastq.gz" + }, + { + "name": "Gp0119849_Filtered Reads", + "description": "Filtered Reads for Gp0119849", + "data_object_type": "Filtered Sequencing Reads", + "url": "https://data.microbiomedata.org/data/nmdc:mga0jfwq54/qa/nmdc_mga0jfwq54_filtered.fastq.gz", + "md5_checksum": "f57b6f3aeda36484319fb9c345cfe600", + "id": "nmdc:f57b6f3aeda36484319fb9c345cfe600", + "file_size_bytes": 2421790995 + }, + { + "name": "Gp0119849_Filtered Stats", + "description": "Filtered Stats for Gp0119849", + "data_object_type": "QC Statistics", + "url": "https://data.microbiomedata.org/data/nmdc:mga0jfwq54/qa/nmdc_mga0jfwq54_filterStats.txt", + "md5_checksum": "a87566efafce97a9e99a3bb94e42b56c", + "id": "nmdc:a87566efafce97a9e99a3bb94e42b56c", + "file_size_bytes": 287 + }, + { + "name": "Gp0119849_Gottcha2 TSV report", + "description": "Gottcha2 TSV report for Gp0119849", + "url": "https://data.microbiomedata.org/data/nmdc:mga0jfwq54/ReadbasedAnalysis/nmdc_mga0jfwq54_gottcha2_report.tsv", + "md5_checksum": "d33b8adac7c87df1a1706daae4aac29b", + "id": "nmdc:d33b8adac7c87df1a1706daae4aac29b", + "file_size_bytes": 6758 + }, + { + "name": "Gp0119849_Gottcha2 full TSV report", + "description": "Gottcha2 full TSV report for Gp0119849", + "url": "https://data.microbiomedata.org/data/nmdc:mga0jfwq54/ReadbasedAnalysis/nmdc_mga0jfwq54_gottcha2_report_full.tsv", + "md5_checksum": "690ccd842d80dbd79294fd8efd7df668", + "id": "nmdc:690ccd842d80dbd79294fd8efd7df668", + "file_size_bytes": 289728 + }, + { + "name": "Gp0119849_Gottcha2 Krona HTML report", + "description": "Gottcha2 Krona HTML report for Gp0119849", + "data_object_type": "GOTTCHA2 Krona Plot", + "url": "https://data.microbiomedata.org/data/nmdc:mga0jfwq54/ReadbasedAnalysis/nmdc_mga0jfwq54_gottcha2_krona.html", + "md5_checksum": "d585fce17e6c7badda857c559faf9723", + "id": "nmdc:d585fce17e6c7badda857c559faf9723", + "file_size_bytes": 247651 + }, + { + "name": "Gp0119849_Centrifuge classification TSV report", + "description": "Centrifuge classification TSV report for Gp0119849", + "data_object_type": "Centrifuge Taxonomic Classification", + "url": "https://data.microbiomedata.org/data/nmdc:mga0jfwq54/ReadbasedAnalysis/nmdc_mga0jfwq54_centrifuge_classification.tsv", + "md5_checksum": "3dd3d00f873c18b2e59b839ead67c778", + "id": "nmdc:3dd3d00f873c18b2e59b839ead67c778", + "file_size_bytes": 4801736282 + }, + { + "name": "Gp0119849_Centrifuge TSV report", + "description": "Centrifuge TSV report for Gp0119849", + "data_object_type": "Centrifuge Classification Report", + "url": "https://data.microbiomedata.org/data/nmdc:mga0jfwq54/ReadbasedAnalysis/nmdc_mga0jfwq54_centrifuge_report.tsv", + "md5_checksum": "383e7886c9b93ecbf98bdd815ebe9f79", + "id": "nmdc:383e7886c9b93ecbf98bdd815ebe9f79", + "file_size_bytes": 209076 + }, + { + "name": "Gp0119849_Centrifuge Krona HTML report", + "description": "Centrifuge Krona HTML report for Gp0119849", + "data_object_type": "Centrifuge Krona Plot", + "url": "https://data.microbiomedata.org/data/nmdc:mga0jfwq54/ReadbasedAnalysis/nmdc_mga0jfwq54_centrifuge_krona.html", + "md5_checksum": "c543c400f77bd8e08636116c3a4681d6", + "id": "nmdc:c543c400f77bd8e08636116c3a4681d6", + "file_size_bytes": 2126517 + }, + { + "name": "Gp0119849_Kraken classification TSV report", + "description": "Kraken classification TSV report for Gp0119849", + "data_object_type": "Kraken2 Taxonomic Classification", + "url": "https://data.microbiomedata.org/data/nmdc:mga0jfwq54/ReadbasedAnalysis/nmdc_mga0jfwq54_kraken2_classification.tsv", + "md5_checksum": "240a6e819568135189afedbc52a9f9f3", + "id": "nmdc:240a6e819568135189afedbc52a9f9f3", + "file_size_bytes": 3182616049 + }, + { + "name": "Gp0119849_Kraken2 TSV report", + "description": "Kraken2 TSV report for Gp0119849", + "data_object_type": "Kraken2 Classification Report", + "url": "https://data.microbiomedata.org/data/nmdc:mga0jfwq54/ReadbasedAnalysis/nmdc_mga0jfwq54_kraken2_report.tsv", + "md5_checksum": "29445394b66469bbfd6972fae3b7be2a", + "id": "nmdc:29445394b66469bbfd6972fae3b7be2a", + "file_size_bytes": 443336 + }, + { + "name": "Gp0119849_Kraken2 Krona HTML report", + "description": "Kraken2 Krona HTML report for Gp0119849", + "data_object_type": "Kraken2 Krona Plot", + "url": "https://data.microbiomedata.org/data/nmdc:mga0jfwq54/ReadbasedAnalysis/nmdc_mga0jfwq54_kraken2_krona.html", + "md5_checksum": "b2b6fa1385acdad8f0827b029540a6c9", + "id": "nmdc:b2b6fa1385acdad8f0827b029540a6c9", + "file_size_bytes": 2949989 + }, + { + "name": "Gp0119849_Assembled contigs fasta", + "description": "Assembled contigs fasta for Gp0119849", + "data_object_type": "Assembly Contigs", + "url": "https://data.microbiomedata.org/data/nmdc:mga0jfwq54/assembly/nmdc_mga0jfwq54_contigs.fna", + "md5_checksum": "44b3fa54ba0cb93a37ed626b21354508", + "id": "nmdc:44b3fa54ba0cb93a37ed626b21354508", + "file_size_bytes": 15053260 + }, + { + "name": "Gp0119849_Assembled scaffold fasta", + "description": "Assembled scaffold fasta for Gp0119849", + "data_object_type": "Assembly Scaffolds", + "url": "https://data.microbiomedata.org/data/nmdc:mga0jfwq54/assembly/nmdc_mga0jfwq54_scaffolds.fna", + "md5_checksum": "e2a3484c735353d5e6d5b13c333bbeac", + "id": "nmdc:e2a3484c735353d5e6d5b13c333bbeac", + "file_size_bytes": 15014354 + }, + { + "name": "Gp0119849_Metagenome Contig Coverage Stats", + "description": "Metagenome Contig Coverage Stats for Gp0119849", + "url": "https://data.microbiomedata.org/data/nmdc:mga0jfwq54/assembly/nmdc_mga0jfwq54_covstats.txt", + "md5_checksum": "59faa6c9e3228b3d7c7d1b80dc2ec9e3", + "id": "nmdc:59faa6c9e3228b3d7c7d1b80dc2ec9e3", + "file_size_bytes": 996899 + }, + { + "name": "Gp0119849_Assembled AGP file", + "description": "Assembled AGP file for Gp0119849", + "data_object_type": "Assembly AGP", + "url": "https://data.microbiomedata.org/data/nmdc:mga0jfwq54/assembly/nmdc_mga0jfwq54_assembly.agp", + "md5_checksum": "dcad1c15a762628715a1316e84badbbc", + "id": "nmdc:dcad1c15a762628715a1316e84badbbc", + "file_size_bytes": 914847 + }, + { + "name": "Gp0119849_Metagenome Alignment BAM file", + "description": "Metagenome Alignment BAM file for Gp0119849", + "data_object_type": "Assembly Coverage BAM", + "url": "https://data.microbiomedata.org/data/nmdc:mga0jfwq54/assembly/nmdc_mga0jfwq54_pairedMapped_sorted.bam", + "md5_checksum": "d712cc8ba2e860e2c7d6ed01305d875c", + "id": "nmdc:d712cc8ba2e860e2c7d6ed01305d875c", + "file_size_bytes": 3127591529 + }, + { + "name": "Gp0119849_Protein FAA", + "description": "Protein FAA for Gp0119849", + "data_object_type": "Annotation Amino Acid FASTA", + "url": "https://data.microbiomedata.org/data/nmdc:mga0jfwq54/annotation/nmdc_mga0jfwq54_proteins.faa", + "md5_checksum": "1ea616c9664276341a688252337ae0bf", + "id": "nmdc:1ea616c9664276341a688252337ae0bf", + "file_size_bytes": 7347943 + }, + { + "name": "Gp0119849_Structural annotation GFF file", + "description": "Structural annotation GFF file for Gp0119849", + "data_object_type": "Structural Annotation GFF", + "url": "https://data.microbiomedata.org/data/nmdc:mga0jfwq54/annotation/nmdc_mga0jfwq54_structural_annotation.gff", + "md5_checksum": "7314a7d70ba0f72dfafa856ef51ff77d", + "id": "nmdc:7314a7d70ba0f72dfafa856ef51ff77d", + "file_size_bytes": 2500 + }, + { + "name": "Gp0119849_Functional annotation GFF file", + "description": "Functional annotation GFF file for Gp0119849", + "data_object_type": "Functional Annotation GFF", + "url": "https://data.microbiomedata.org/data/nmdc:mga0jfwq54/annotation/nmdc_mga0jfwq54_functional_annotation.gff", + "md5_checksum": "4084daf80dc14c7a5d14fe387631ee68", + "id": "nmdc:4084daf80dc14c7a5d14fe387631ee68", + "file_size_bytes": 7323096 + }, + { + "name": "Gp0119849_KO TSV file", + "description": "KO TSV file for Gp0119849", + "data_object_type": "Annotation KEGG Orthology", + "url": "https://data.microbiomedata.org/data/nmdc:mga0jfwq54/annotation/nmdc_mga0jfwq54_ko.tsv", + "md5_checksum": "1d975322e1053cf6dc28c68557834f75", + "id": "nmdc:1d975322e1053cf6dc28c68557834f75", + "file_size_bytes": 1143197 + }, + { + "name": "Gp0119849_EC TSV file", + "description": "EC TSV file for Gp0119849", + "data_object_type": "Annotation Enzyme Commission", + "url": "https://data.microbiomedata.org/data/nmdc:mga0jfwq54/annotation/nmdc_mga0jfwq54_ec.tsv", + "md5_checksum": "8f196b9c700a17b1625eed8ce66ca523", + "id": "nmdc:8f196b9c700a17b1625eed8ce66ca523", + "file_size_bytes": 699563 + }, + { + "name": "Gp0119849_COG GFF file", + "description": "COG GFF file for Gp0119849", + "url": "https://data.microbiomedata.org/data/nmdc:mga0jfwq54/annotation/nmdc_mga0jfwq54_cog.gff", + "md5_checksum": "07cb4835969e2165d1b929e0350616a0", + "id": "nmdc:07cb4835969e2165d1b929e0350616a0", + "file_size_bytes": 4891456 + }, + { + "name": "Gp0119849_PFAM GFF file", + "description": "PFAM GFF file for Gp0119849", + "url": "https://data.microbiomedata.org/data/nmdc:mga0jfwq54/annotation/nmdc_mga0jfwq54_pfam.gff", + "md5_checksum": "e6ff10df7baae3b25249c3ed25ce1092", + "id": "nmdc:e6ff10df7baae3b25249c3ed25ce1092", + "file_size_bytes": 4535872 + }, + { + "name": "Gp0119849_TigrFam GFF file", + "description": "TigrFam GFF file for Gp0119849", + "url": "https://data.microbiomedata.org/data/nmdc:mga0jfwq54/annotation/nmdc_mga0jfwq54_tigrfam.gff", + "md5_checksum": "cd5e26eb9e375b127394ae9c1a404b72", + "id": "nmdc:cd5e26eb9e375b127394ae9c1a404b72", + "file_size_bytes": 975660 + }, + { + "name": "Gp0119849_SMART GFF file", + "description": "SMART GFF file for Gp0119849", + "url": "https://data.microbiomedata.org/data/nmdc:mga0jfwq54/annotation/nmdc_mga0jfwq54_smart.gff", + "md5_checksum": "dcd69fb1d06c2e0e81aa6c3e8a47a4d6", + "id": "nmdc:dcd69fb1d06c2e0e81aa6c3e8a47a4d6", + "file_size_bytes": 1409447 + }, + { + "name": "Gp0119849_SuperFam GFF file", + "description": "SuperFam GFF file for Gp0119849", + "url": "https://data.microbiomedata.org/data/nmdc:mga0jfwq54/annotation/nmdc_mga0jfwq54_supfam.gff", + "md5_checksum": "919275eab1b2eff697b1f126a403d7a8", + "id": "nmdc:919275eab1b2eff697b1f126a403d7a8", + "file_size_bytes": 6360245 + }, + { + "name": "Gp0119849_Cath FunFam GFF file", + "description": "Cath FunFam GFF file for Gp0119849", + "url": "https://data.microbiomedata.org/data/nmdc:mga0jfwq54/annotation/nmdc_mga0jfwq54_cath_funfam.gff", + "md5_checksum": "c11e98db2d8c33154510378a029fca04", + "id": "nmdc:c11e98db2d8c33154510378a029fca04", + "file_size_bytes": 5812812 + }, + { + "name": "Gp0119849_KO_EC GFF file", + "description": "KO_EC GFF file for Gp0119849", + "url": "https://data.microbiomedata.org/data/nmdc:mga0jfwq54/annotation/nmdc_mga0jfwq54_ko_ec.gff", + "md5_checksum": "6421ef4cc738c134eadfd5011c14563f", + "id": "nmdc:6421ef4cc738c134eadfd5011c14563f", + "file_size_bytes": 3865731 + }, + { + "name": "gold:Gp0452679_metabat2 bin checkm quality assessment result", + "description": "metabat2 bin checkm quality assessment result for gold:Gp0452679", + "data_object_type": "CheckM Statistics", + "url": "https://data.microbiomedata.org/data/nmdc:mga0fsjy84/MAGs/nmdc_mga0fsjy84_checkm_qa.out", + "md5_checksum": "d41d8cd98f00b204e9800998ecf8427e", + "id": "nmdc:d41d8cd98f00b204e9800998ecf8427e", + "file_size_bytes": 0 + }, + { + "name": "Gp0119849_tooShort (< 3kb) filtered contigs fasta file by metaBat2", + "description": "tooShort (< 3kb) filtered contigs fasta file by metaBat2 for Gp0119849", + "url": "https://data.microbiomedata.org/data/nmdc:mga0jfwq54/MAGs/nmdc_mga0jfwq54_bins.tooShort.fa", + "md5_checksum": "38c2006dc930f7cb8db52f4e2eda3bf1", + "id": "nmdc:38c2006dc930f7cb8db52f4e2eda3bf1", + "file_size_bytes": 4445685 + }, + { + "name": "Gp0119849_unbinned fasta file from metabat2", + "description": "unbinned fasta file from metabat2 for Gp0119849", + "url": "https://data.microbiomedata.org/data/nmdc:mga0jfwq54/MAGs/nmdc_mga0jfwq54_bins.unbinned.fa", + "md5_checksum": "fde7c45ed03675e4dd7b7fba9aa8a5a2", + "id": "nmdc:fde7c45ed03675e4dd7b7fba9aa8a5a2", + "file_size_bytes": 4973295 + }, + { + "name": "Gp0119849_metabat2 bin checkm quality assessment result", + "description": "metabat2 bin checkm quality assessment result for Gp0119849", + "data_object_type": "CheckM Statistics", + "url": "https://data.microbiomedata.org/data/nmdc:mga0jfwq54/MAGs/nmdc_mga0jfwq54_checkm_qa.out", + "md5_checksum": "52d0bc80f4af44b60382600f8d1df96a", + "id": "nmdc:52d0bc80f4af44b60382600f8d1df96a", + "file_size_bytes": 1272 + }, + { + "name": "Gp0119849_high-quality and medium-quality bins", + "description": "high-quality and medium-quality bins for Gp0119849", + "data_object_type": "Metagenome Bins", + "url": "https://data.microbiomedata.org/data/nmdc:mga0jfwq54/MAGs/nmdc_mga0jfwq54_hqmq_bin.zip", + "md5_checksum": "55b285e508b8a79e96355079a24d3b2f", + "id": "nmdc:55b285e508b8a79e96355079a24d3b2f", + "file_size_bytes": 1045591 + }, + { + "name": "Gp0119849_metabat2 bins", + "description": "metabat2 bins for Gp0119849", + "url": "https://data.microbiomedata.org/data/nmdc:mga0jfwq54/MAGs/nmdc_mga0jfwq54_metabat_bin.zip", + "md5_checksum": "7b2437d0d787888db28c6891ca5ad04a", + "id": "nmdc:7b2437d0d787888db28c6891ca5ad04a", + "file_size_bytes": 672086 + }, + { + "_id": { + "$oid": "649b003d1ae706d7b5b14dd1" + }, + "description": "Metagenome Contig Coverage Stats for gold:Gp0119849", + "url": "https://data.microbiomedata.org/data/1777_95818/assembly/mapping_stats.txt", + "file_size_bytes": 935504, + "type": "nmdc:DataObject", + "id": "nmdc:32f196423fcede7cca7fe7ad99d5ec36", + "name": "mapping_stats.txt", + "data_object_type": "Assembly Coverage Stats" + }, + { + "_id": { + "$oid": "649b003d1ae706d7b5b14de9" + }, + "description": "Metagenome Alignment BAM file for gold:Gp0119849", + "url": "https://data.microbiomedata.org/data/1777_95818/assembly/pairedMapped_sorted.bam", + "file_size_bytes": 3104217853, + "type": "nmdc:DataObject", + "id": "nmdc:52e139e6493a510831aff8a1e9ad3fbb", + "name": "pairedMapped_sorted.bam", + "data_object_type": "Assembly Coverage BAM" + }, + { + "_id": { + "$oid": "649b003d1ae706d7b5b14df0" + }, + "description": "Assembled contigs fasta for gold:Gp0119849", + "url": "https://data.microbiomedata.org/data/1777_95818/assembly/assembly_contigs.fna", + "file_size_bytes": 14991865, + "type": "nmdc:DataObject", + "id": "nmdc:70fba0e579271c70e65c7ef5909958ed", + "name": "assembly_contigs.fna", + "data_object_type": "Assembly Contigs" + }, + { + "_id": { + "$oid": "649b003d1ae706d7b5b14df2" + }, + "description": "Assembled AGP file for gold:Gp0119849", + "url": "https://data.microbiomedata.org/data/1777_95818/assembly/assembly.agp", + "file_size_bytes": 787407, + "type": "nmdc:DataObject", + "id": "nmdc:d11d1582ad42347b08bc99a5e6950b4b", + "name": "assembly.agp", + "data_object_type": "Assembly AGP" + }, + { + "_id": { + "$oid": "649b003d1ae706d7b5b14df7" + }, + "description": "Assembled scaffold fasta for gold:Gp0119849", + "url": "https://data.microbiomedata.org/data/1777_95818/assembly/assembly_scaffolds.fna", + "file_size_bytes": 14955114, + "type": "nmdc:DataObject", + "id": "nmdc:418623ebdae7f1852c7161744622867b", + "name": "assembly_scaffolds.fna", + "data_object_type": "Assembly Scaffolds" + }, + { + "_id": { + "$oid": "649b003d1ae706d7b5b15a13" + }, + "id": "nmdc:dd3d42f47dc71f713486c54cc493f068", + "name": "1777_95818.krona.html", + "description": "Gold:Gp0119849 KRONA plot HTML file", + "url": "https://data.microbiomedata.org/1777_95818/ReadbasedAnalysis/centrifuge/1777_95818.krona.html", + "file_size_bytes": 3385, + "data_object_type": "Centrifuge Krona Plot", + "type": "nmdc:DataObject" + }, + { + "_id": { + "$oid": "649b003d1ae706d7b5b15a40" + }, + "id": "nmdc:429ae15ed5a0df665324b81a36732e3d", + "name": "1777_95818.json", + "description": "Gold:Gp0119849 ReadbasedAnalysis result JSON file", + "url": "https://data.microbiomedata.org/1777_95818/ReadbasedAnalysis/1777_95818.json", + "file_size_bytes": 3385, + "type": "nmdc:DataObject" + }, + { + "_id": { + "$oid": "649b003f1ae706d7b5b16302" + }, + "id": "nmdc:fc807a636d9d1a1d6a3d83ab22c16ce7", + "name": "bins.tooShort.fa", + "description": "tooShort (< 3kb) filtered contigs fasta file by metaBat2 for gold:Gp0119849", + "file_size_bytes": 4325771, + "url": "https://data.microbiomedata.org/data/1777_95818/img_MAGs/bins.tooShort.fa", + "type": "nmdc:DataObject" + }, + { + "_id": { + "$oid": "649b003f1ae706d7b5b16303" + }, + "id": "nmdc:a2675f339300cf7b35b46657e1269092", + "name": "bins.unbinned.fa", + "description": "unbinned fasta file from metabat2 for gold:Gp0119849", + "file_size_bytes": 8819371, + "url": "https://data.microbiomedata.org/data/1777_95818/img_MAGs/bins.unbinned.fa", + "type": "nmdc:DataObject" + }, + { + "_id": { + "$oid": "649b003f1ae706d7b5b16304" + }, + "id": "nmdc:faa091f8c8acfdb85314e324aa262ef4", + "name": "checkm_qa.out", + "description": "metabat2 bin checkm quality assessment result for gold:Gp0119849", + "file_size_bytes": 1248, + "url": "https://data.microbiomedata.org/data/1777_95818/img_MAGs/checkm_qa.out", + "type": "nmdc:DataObject", + "data_object_type": "CheckM Statistics" + }, + { + "_id": { + "$oid": "649b003f1ae706d7b5b16305" + }, + "id": "nmdc:656c175643d16b7a6d62a895a2e1a222", + "name": "gtdbtk.bac120.summary.tsv", + "description": "gtdbtk bacterial assignment result summary table for gold:Gp0119849", + "file_size_bytes": 1351, + "url": "https://data.microbiomedata.org/data/1777_95818/img_MAGs/gtdbtk_output/classify/gtdbtk.bac120.summary.tsv", + "type": "nmdc:DataObject" + }, + { + "_id": { + "$oid": "649b003f1ae706d7b5b16307" + }, + "id": "nmdc:8bc4f5599b837d4b989cb60308c4776c", + "name": "gold:Gp0119849.bins.2.fa", + "description": "metabat2 binned contig file for gold:Gp0119849", + "file_size_bytes": 890322, + "url": "https://data.microbiomedata.org/data/1777_95818/img_MAGs/metabat-bins/bins.2.fa", + "type": "nmdc:DataObject", + "data_object_type": "Metagenome Bins" + }, + { + "_id": { + "$oid": "649b003f1ae706d7b5b16308" + }, + "id": "nmdc:ea90b01b195fbb48b67f4d7c9e0c37ba", + "name": "gold:Gp0119849.bins.4.fa", + "description": "metabat2 binned contig file for gold:Gp0119849", + "file_size_bytes": 227792, + "url": "https://data.microbiomedata.org/data/1777_95818/img_MAGs/metabat-bins/bins.4.fa", + "type": "nmdc:DataObject", + "data_object_type": "Metagenome Bins" + }, + { + "_id": { + "$oid": "649b003f1ae706d7b5b16309" + }, + "id": "nmdc:fe002bbc1ae645ac4eecfeb5a65f1716", + "name": "gold:Gp0119849.bins.3.fa", + "description": "metabat2 binned contig file for gold:Gp0119849", + "file_size_bytes": 288954, + "url": "https://data.microbiomedata.org/data/1777_95818/img_MAGs/metabat-bins/bins.3.fa", + "type": "nmdc:DataObject", + "data_object_type": "Metagenome Bins" + }, + { + "_id": { + "$oid": "649b003f1ae706d7b5b1630a" + }, + "id": "nmdc:a7ceb97599ff14ca0b93ca31f17117d7", + "name": "gold:Gp0119849.bins.1.fa", + "description": "metabat2 binned contig file for gold:Gp0119849", + "file_size_bytes": 353139, + "url": "https://data.microbiomedata.org/data/1777_95818/img_MAGs/metabat-bins/bins.1.fa", + "type": "nmdc:DataObject", + "data_object_type": "Metagenome Bins" + }, + { + "_id": { + "$oid": "649b003f1ae706d7b5b16c57" + }, + "description": "Protein FAA for gold:Gp0119849", + "url": "https://data.microbiomedata.org/1777_95818/img_annotation/Ga0482166_proteins.faa", + "md5_checksum": "c09c5f5c4f776e6250cf35003e939729", + "file_size_bytes": 3385, + "id": "nmdc:c09c5f5c4f776e6250cf35003e939729", + "name": "gold:Gp0119849_Protein FAA", + "data_object_type": "Annotation Amino Acid FASTA", + "type": "nmdc:DataObject" + }, + { + "_id": { + "$oid": "649b003f1ae706d7b5b16c59" + }, + "description": "EC TSV File for gold:Gp0119849", + "url": "https://data.microbiomedata.org/1777_95818/img_annotation/Ga0482166_ec.tsv", + "md5_checksum": "b7abebe872fb705d2cdd5fcd36becf0e", + "file_size_bytes": 3385, + "id": "nmdc:b7abebe872fb705d2cdd5fcd36becf0e", + "name": "gold:Gp0119849_EC TSV File", + "data_object_type": "Annotation Enzyme Commission", + "type": "nmdc:DataObject" + }, + { + "_id": { + "$oid": "649b003f1ae706d7b5b16c60" + }, + "description": "KO TSV File for gold:Gp0119849", + "url": "https://data.microbiomedata.org/1777_95818/img_annotation/Ga0482166_ko.tsv", + "md5_checksum": "5b2e3bf2abc084710300eb4668638ed3", + "file_size_bytes": 3385, + "id": "nmdc:5b2e3bf2abc084710300eb4668638ed3", + "name": "gold:Gp0119849_KO TSV File", + "data_object_type": "Annotation KEGG Orthology", + "type": "nmdc:DataObject" + }, + { + "_id": { + "$oid": "649b003f1ae706d7b5b16c65" + }, + "description": "Structural annotation GFF file for gold:Gp0119849", + "url": "https://data.microbiomedata.org/1777_95818/img_annotation/Ga0482166_structural_annotation.gff", + "md5_checksum": "fd3023efdffe55105bc192f5b6cb4675", + "file_size_bytes": 3385, + "id": "nmdc:fd3023efdffe55105bc192f5b6cb4675", + "name": "gold:Gp0119849_Structural annotation GFF file", + "data_object_type": "Structural Annotation GFF", + "type": "nmdc:DataObject" + }, + { + "_id": { + "$oid": "649b003f1ae706d7b5b16c6a" + }, + "description": "Functional annotation GFF file for gold:Gp0119849", + "url": "https://data.microbiomedata.org/1777_95818/img_annotation/Ga0482166_functional_annotation.gff", + "md5_checksum": "f574e3392aa40d786a566ae4bc0a5932", + "file_size_bytes": 3385, + "id": "nmdc:f574e3392aa40d786a566ae4bc0a5932", + "name": "gold:Gp0119849_Functional annotation GFF file", + "data_object_type": "Functional Annotation GFF", + "type": "nmdc:DataObject" + } + ], + "mags_activity_set": [ + { + "_id": { + "$oid": "649b0052ec087f6bbab346e9" + }, + "has_input": [ + "nmdc:44b3fa54ba0cb93a37ed626b21354508", + "nmdc:d712cc8ba2e860e2c7d6ed01305d875c", + "nmdc:4084daf80dc14c7a5d14fe387631ee68" + ], + "too_short_contig_num": 9846, + "part_of": [ + "nmdc:mga0jfwq54" + ], + "binned_contig_num": 400, + "git_url": "https://github.com/microbiomedata/metaG/releases/tag/0.1", + "has_output": [ + "nmdc:d41d8cd98f00b204e9800998ecf8427e", + "nmdc:38c2006dc930f7cb8db52f4e2eda3bf1", + "nmdc:fde7c45ed03675e4dd7b7fba9aa8a5a2", + "nmdc:52d0bc80f4af44b60382600f8d1df96a", + "nmdc:55b285e508b8a79e96355079a24d3b2f", + "nmdc:7b2437d0d787888db28c6891ca5ad04a" + ], + "was_informed_by": "gold:Gp0119849", + "input_contig_num": 12279, + "id": "nmdc:22e21ef026913d9514e67ad8e0eec604", + "execution_resource": "NERSC-Cori", + "name": "MAGs Analysis Activity for nmdc:mga0jfwq54", + "mags_list": [ + { + "number_of_contig": 92, + "completeness": 90.1, + "bin_name": "bins.1", + "gene_count": 1876, + "bin_quality": "MQ", + "gtdbtk_species": "Thermococcus_A sp000430485", + "gtdbtk_order": "Thermococcales", + "num_16s": 0, + "gtdbtk_family": "Thermococcaceae", + "gtdbtk_domain": "Archaea", + "contamination": 0.0, + "gtdbtk_class": "Thermococci", + "gtdbtk_phylum": "Euryarchaeota", + "num_5s": 2, + "num_23s": 0, + "gtdbtk_genus": "Thermococcus_A", + "num_t_rna": 40 + }, + { + "number_of_contig": 7, + "completeness": 98.21, + "bin_name": "bins.2", + "gene_count": 2002, + "bin_quality": "HQ", + "gtdbtk_species": "Thermotoga petrophila", + "gtdbtk_order": "Thermotogales", + "num_16s": 1, + "gtdbtk_family": "Thermotogaceae", + "gtdbtk_domain": "Bacteria", + "contamination": 1.79, + "gtdbtk_class": "Thermotogae", + "gtdbtk_phylum": "Thermotogota", + "num_5s": 1, + "num_23s": 1, + "gtdbtk_genus": "Thermotoga", + "num_t_rna": 45 + }, + { + "number_of_contig": 111, + "completeness": 19.3, + "bin_name": "bins.3", + "gene_count": 976, + "bin_quality": "LQ", + "gtdbtk_species": "", + "gtdbtk_order": "", + "num_16s": 1, + "gtdbtk_family": "", + "gtdbtk_domain": "", + "contamination": 0.0, + "gtdbtk_class": "", + "gtdbtk_phylum": "", + "num_5s": 0, + "num_23s": 0, + "gtdbtk_genus": "", + "num_t_rna": 19 + }, + { + "number_of_contig": 190, + "completeness": 47.54, + "bin_name": "bins.4", + "gene_count": 1591, + "bin_quality": "LQ", + "gtdbtk_species": "", + "gtdbtk_order": "", + "num_16s": 2, + "gtdbtk_family": "", + "gtdbtk_domain": "", + "contamination": 7.02, + "gtdbtk_class": "", + "gtdbtk_phylum": "", + "num_5s": 0, + "num_23s": 0, + "gtdbtk_genus": "", + "num_t_rna": 30 + } + ], + "unbinned_contig_num": 2033, + "started_at_time": "2021-10-11T02:23:27Z", + "type": "nmdc:MAGsAnalysisActivity", + "ended_at_time": "2021-10-11T03:32:58+00:00" + } + ], + "metagenome_annotation_activity_set": [ + { + "_id": { + "$oid": "649b005bbf2caae0415ef989" + }, + "has_input": [ + "nmdc:44b3fa54ba0cb93a37ed626b21354508" + ], + "part_of": [ + "nmdc:mga0jfwq54" + ], + "git_url": "https://github.com/microbiomedata/metaG/releases/tag/0.1", + "has_output": [ + "nmdc:1ea616c9664276341a688252337ae0bf", + "nmdc:7314a7d70ba0f72dfafa856ef51ff77d", + "nmdc:4084daf80dc14c7a5d14fe387631ee68", + "nmdc:1d975322e1053cf6dc28c68557834f75", + "nmdc:8f196b9c700a17b1625eed8ce66ca523", + "nmdc:07cb4835969e2165d1b929e0350616a0", + "nmdc:e6ff10df7baae3b25249c3ed25ce1092", + "nmdc:cd5e26eb9e375b127394ae9c1a404b72", + "nmdc:dcd69fb1d06c2e0e81aa6c3e8a47a4d6", + "nmdc:919275eab1b2eff697b1f126a403d7a8", + "nmdc:c11e98db2d8c33154510378a029fca04", + "nmdc:6421ef4cc738c134eadfd5011c14563f" + ], + "was_informed_by": "gold:Gp0119849", + "id": "nmdc:22e21ef026913d9514e67ad8e0eec604", + "execution_resource": "NERSC-Cori", + "name": "Annotation Activity for nmdc:mga0jfwq54", + "started_at_time": "2021-10-11T02:23:27Z", + "type": "nmdc:MetagenomeAnnotationActivity", + "ended_at_time": "2021-10-11T03:32:58+00:00" + } + ], + "metagenome_assembly_set": [ + { + "_id": { + "$oid": "649b005f2ca5ee4adb139f78" + }, + "has_input": [ + "nmdc:f57b6f3aeda36484319fb9c345cfe600" + ], + "part_of": [ + "nmdc:mga0jfwq54" + ], + "ctg_logsum": 140623, + "scaf_logsum": 143628, + "gap_pct": 0.04117, + "git_url": "https://github.com/microbiomedata/metaG/releases/tag/0.1", + "has_output": [ + "nmdc:44b3fa54ba0cb93a37ed626b21354508", + "nmdc:e2a3484c735353d5e6d5b13c333bbeac", + "nmdc:59faa6c9e3228b3d7c7d1b80dc2ec9e3", + "nmdc:dcad1c15a762628715a1316e84badbbc", + "nmdc:d712cc8ba2e860e2c7d6ed01305d875c" + ], + "asm_score": 20.675, + "was_informed_by": "gold:Gp0119849", + "ctg_powsum": 21957, + "scaf_max": 795957, + "id": "nmdc:22e21ef026913d9514e67ad8e0eec604", + "scaf_powsum": 22472, + "execution_resource": "NERSC-Cori", + "contigs": 12296, + "name": "Assembly Activity for nmdc:mga0jfwq54", + "ctg_max": 795957, + "gc_std": 0.08242, + "contig_bp": 14453095, + "gc_avg": 0.38733, + "started_at_time": "2021-10-11T02:23:27Z", + "scaf_bp": 14459048, + "type": "nmdc:MetagenomeAssembly", + "scaffolds": 11848, + "ended_at_time": "2021-10-11T03:32:58+00:00", + "ctg_l50": 3242, + "ctg_l90": 348, + "ctg_n50": 629, + "ctg_n90": 7308, + "scaf_l50": 3548, + "scaf_l90": 352, + "scaf_n50": 592, + "scaf_n90": 6894, + "scaf_l_gt50k": 1960635, + "scaf_n_gt50k": 9, + "scaf_pct_gt50k": 13.559917 + } + ], + "omics_processing_set": [ + { + "_id": { + "$oid": "649b009773e8249959349b65" + }, + "id": "nmdc:omprc-11-ahcjx717", + "name": "Deep subsurface shale carbon reservoir microbial communities from Ohio, USA - Utica-2 Time Series 2014_10_10", + "description": "Microbial controls on biogeochemical cycling in deep subsurface shale carbon reservoirs", + "has_input": [ + "nmdc:bsm-11-rwahht82" + ], + "has_output": [ + "jgi:560df5b90d878540fd6fe202" + ], + "part_of": [ + "nmdc:sty-11-8fb6t785" + ], + "add_date": "2015-08-15", + "mod_date": "2020-04-05", + "ncbi_project_name": "Deep subsurface shale carbon reservoir microbial communities from Ohio, USA - Utica-2 Time Series 2014_10_10", + "omics_type": { + "has_raw_value": "Metagenome" + }, + "principal_investigator": { + "has_raw_value": "Kelly Wrighton" + }, + "processing_institution": "JGI", + "type": "nmdc:OmicsProcessing", + "gold_sequencing_project_identifiers": [ + "gold:Gp0119849" + ] + } + ], + "read_qc_analysis_activity_set": [ + { + "_id": { + "$oid": "649b009d6bdd4fd20273c844" + }, + "has_input": [ + "nmdc:b96730fd648a6dc1cdba6c42cba024ea" + ], + "part_of": [ + "nmdc:mga0jfwq54" + ], + "git_url": "https://github.com/microbiomedata/metaG/releases/tag/0.1", + "has_output": [ + "nmdc:f57b6f3aeda36484319fb9c345cfe600", + "nmdc:a87566efafce97a9e99a3bb94e42b56c" + ], + "was_informed_by": "gold:Gp0119849", + "input_read_count": 46114800, + "output_read_bases": 6776632765, + "id": "nmdc:22e21ef026913d9514e67ad8e0eec604", + "execution_resource": "NERSC-Cori", + "input_read_bases": 6963334800, + "name": "Read QC Activity for nmdc:mga0jfwq54", + "output_read_count": 45858112, + "started_at_time": "2021-10-11T02:23:27Z", + "type": "nmdc:ReadQCAnalysisActivity", + "ended_at_time": "2021-10-11T03:32:58+00:00" + } + ], + "read_based_taxonomy_analysis_activity_set": [ + { + "_id": { + "$oid": "649b009bff710ae353f8cf28" + }, + "has_input": [ + "nmdc:f57b6f3aeda36484319fb9c345cfe600" + ], + "git_url": "https://github.com/microbiomedata/metaG/releases/tag/0.1", + "has_output": [ + "nmdc:d33b8adac7c87df1a1706daae4aac29b", + "nmdc:690ccd842d80dbd79294fd8efd7df668", + "nmdc:d585fce17e6c7badda857c559faf9723", + "nmdc:3dd3d00f873c18b2e59b839ead67c778", + "nmdc:383e7886c9b93ecbf98bdd815ebe9f79", + "nmdc:c543c400f77bd8e08636116c3a4681d6", + "nmdc:240a6e819568135189afedbc52a9f9f3", + "nmdc:29445394b66469bbfd6972fae3b7be2a", + "nmdc:b2b6fa1385acdad8f0827b029540a6c9" + ], + "was_informed_by": "gold:Gp0119849", + "id": "nmdc:22e21ef026913d9514e67ad8e0eec604", + "execution_resource": "NERSC-Cori", + "name": "ReadBased Analysis Activity for nmdc:mga0jfwq54", + "started_at_time": "2021-10-11T02:23:27Z", + "type": "nmdc:ReadbasedAnalysis", + "ended_at_time": "2021-10-11T03:32:58+00:00" + } + ] + }, + { + "data_object_set": [ + { + "description": "Raw sequencer read data", + "file_size_bytes": 3239805638, + "type": "nmdc:DataObject", + "id": "jgi:560df5b50d878540fd6fe1fc", + "name": "9567.8.137569.TAGGCAT-GCGTAAG.fastq.gz" + }, + { + "name": "Gp0119851_Filtered Reads", + "description": "Filtered Reads for Gp0119851", + "data_object_type": "Filtered Sequencing Reads", + "url": "https://data.microbiomedata.org/data/nmdc:mga0s38291/qa/nmdc_mga0s38291_filtered.fastq.gz", + "md5_checksum": "9845f07f87c57efad76502c4bf165c9c", + "id": "nmdc:9845f07f87c57efad76502c4bf165c9c", + "file_size_bytes": 1774323012 + }, + { + "name": "Gp0119851_Filtered Stats", + "description": "Filtered Stats for Gp0119851", + "data_object_type": "QC Statistics", + "url": "https://data.microbiomedata.org/data/nmdc:mga0s38291/qa/nmdc_mga0s38291_filterStats.txt", + "md5_checksum": "33ed4578f6fe6b4ffdaadc5213be5d76", + "id": "nmdc:33ed4578f6fe6b4ffdaadc5213be5d76", + "file_size_bytes": 283 + }, + { + "name": "Gp0119851_Gottcha2 TSV report", + "description": "Gottcha2 TSV report for Gp0119851", + "url": "https://data.microbiomedata.org/data/nmdc:mga0s38291/ReadbasedAnalysis/nmdc_mga0s38291_gottcha2_report.tsv", + "md5_checksum": "97d87706efaf575734ac73fe4b563569", + "id": "nmdc:97d87706efaf575734ac73fe4b563569", + "file_size_bytes": 7469 + }, + { + "name": "Gp0119851_Gottcha2 full TSV report", + "description": "Gottcha2 full TSV report for Gp0119851", + "url": "https://data.microbiomedata.org/data/nmdc:mga0s38291/ReadbasedAnalysis/nmdc_mga0s38291_gottcha2_report_full.tsv", + "md5_checksum": "b9086162f9df32ddef118a1cf5d6883c", + "id": "nmdc:b9086162f9df32ddef118a1cf5d6883c", + "file_size_bytes": 247785 + }, + { + "name": "Gp0119851_Gottcha2 Krona HTML report", + "description": "Gottcha2 Krona HTML report for Gp0119851", + "data_object_type": "GOTTCHA2 Krona Plot", + "url": "https://data.microbiomedata.org/data/nmdc:mga0s38291/ReadbasedAnalysis/nmdc_mga0s38291_gottcha2_krona.html", + "md5_checksum": "e5e83df27ee142c750a1e3348c66077e", + "id": "nmdc:e5e83df27ee142c750a1e3348c66077e", + "file_size_bytes": 249683 + }, + { + "name": "Gp0119851_Centrifuge classification TSV report", + "description": "Centrifuge classification TSV report for Gp0119851", + "data_object_type": "Centrifuge Taxonomic Classification", + "url": "https://data.microbiomedata.org/data/nmdc:mga0s38291/ReadbasedAnalysis/nmdc_mga0s38291_centrifuge_classification.tsv", + "md5_checksum": "a2d40e3f2a20fdce858064c970f20ebd", + "id": "nmdc:a2d40e3f2a20fdce858064c970f20ebd", + "file_size_bytes": 3537363204 + }, + { + "name": "Gp0119851_Centrifuge TSV report", + "description": "Centrifuge TSV report for Gp0119851", + "data_object_type": "Centrifuge Classification Report", + "url": "https://data.microbiomedata.org/data/nmdc:mga0s38291/ReadbasedAnalysis/nmdc_mga0s38291_centrifuge_report.tsv", + "md5_checksum": "ddf2a8463e05dfe298ea46e0be272e1f", + "id": "nmdc:ddf2a8463e05dfe298ea46e0be272e1f", + "file_size_bytes": 210449 + }, + { + "name": "Gp0119851_Centrifuge Krona HTML report", + "description": "Centrifuge Krona HTML report for Gp0119851", + "data_object_type": "Centrifuge Krona Plot", + "url": "https://data.microbiomedata.org/data/nmdc:mga0s38291/ReadbasedAnalysis/nmdc_mga0s38291_centrifuge_krona.html", + "md5_checksum": "044bcba2b7f3bf02b53b08cc9b6c684f", + "id": "nmdc:044bcba2b7f3bf02b53b08cc9b6c684f", + "file_size_bytes": 2122064 + }, + { + "name": "Gp0119851_Kraken classification TSV report", + "description": "Kraken classification TSV report for Gp0119851", + "data_object_type": "Kraken2 Taxonomic Classification", + "url": "https://data.microbiomedata.org/data/nmdc:mga0s38291/ReadbasedAnalysis/nmdc_mga0s38291_kraken2_classification.tsv", + "md5_checksum": "04d10593cb25f887adbdc322f2036c00", + "id": "nmdc:04d10593cb25f887adbdc322f2036c00", + "file_size_bytes": 2160624721 + }, + { + "name": "Gp0119851_Kraken2 TSV report", + "description": "Kraken2 TSV report for Gp0119851", + "data_object_type": "Kraken2 Classification Report", + "url": "https://data.microbiomedata.org/data/nmdc:mga0s38291/ReadbasedAnalysis/nmdc_mga0s38291_kraken2_report.tsv", + "md5_checksum": "5b768d473c386ed08dc31442ebe59551", + "id": "nmdc:5b768d473c386ed08dc31442ebe59551", + "file_size_bytes": 426572 + }, + { + "name": "Gp0119851_Kraken2 Krona HTML report", + "description": "Kraken2 Krona HTML report for Gp0119851", + "data_object_type": "Kraken2 Krona Plot", + "url": "https://data.microbiomedata.org/data/nmdc:mga0s38291/ReadbasedAnalysis/nmdc_mga0s38291_kraken2_krona.html", + "md5_checksum": "4d591416c2bd450570efa96fabdddd67", + "id": "nmdc:4d591416c2bd450570efa96fabdddd67", + "file_size_bytes": 2835952 + }, + { + "name": "Gp0119851_Assembled contigs fasta", + "description": "Assembled contigs fasta for Gp0119851", + "data_object_type": "Assembly Contigs", + "url": "https://data.microbiomedata.org/data/nmdc:mga0s38291/assembly/nmdc_mga0s38291_contigs.fna", + "md5_checksum": "ee1b52a95388196053f6dbd52f0e8726", + "id": "nmdc:ee1b52a95388196053f6dbd52f0e8726", + "file_size_bytes": 19108871 + }, + { + "name": "Gp0119851_Assembled scaffold fasta", + "description": "Assembled scaffold fasta for Gp0119851", + "data_object_type": "Assembly Scaffolds", + "url": "https://data.microbiomedata.org/data/nmdc:mga0s38291/assembly/nmdc_mga0s38291_scaffolds.fna", + "md5_checksum": "284a9b71a458c9ebd7f0fd4446478466", + "id": "nmdc:284a9b71a458c9ebd7f0fd4446478466", + "file_size_bytes": 19072477 + }, + { + "name": "Gp0119851_Metagenome Contig Coverage Stats", + "description": "Metagenome Contig Coverage Stats for Gp0119851", + "url": "https://data.microbiomedata.org/data/nmdc:mga0s38291/assembly/nmdc_mga0s38291_covstats.txt", + "md5_checksum": "1d431a3abae31dca611f406f5e300a7b", + "id": "nmdc:1d431a3abae31dca611f406f5e300a7b", + "file_size_bytes": 866775 + }, + { + "name": "Gp0119851_Assembled AGP file", + "description": "Assembled AGP file for Gp0119851", + "data_object_type": "Assembly AGP", + "url": "https://data.microbiomedata.org/data/nmdc:mga0s38291/assembly/nmdc_mga0s38291_assembly.agp", + "md5_checksum": "e5e5c9c66b6cf8596b1308675bd2e9d0", + "id": "nmdc:e5e5c9c66b6cf8596b1308675bd2e9d0", + "file_size_bytes": 813300 + }, + { + "name": "Gp0119851_Metagenome Alignment BAM file", + "description": "Metagenome Alignment BAM file for Gp0119851", + "data_object_type": "Assembly Coverage BAM", + "url": "https://data.microbiomedata.org/data/nmdc:mga0s38291/assembly/nmdc_mga0s38291_pairedMapped_sorted.bam", + "md5_checksum": "65eb750011587b5461a055fa33490858", + "id": "nmdc:65eb750011587b5461a055fa33490858", + "file_size_bytes": 2245379476 + }, + { + "name": "Gp0119851_Protein FAA", + "description": "Protein FAA for Gp0119851", + "data_object_type": "Annotation Amino Acid FASTA", + "url": "https://data.microbiomedata.org/data/nmdc:mga0s38291/annotation/nmdc_mga0s38291_proteins.faa", + "md5_checksum": "79ebad70d4aba8d32490b43f622698d7", + "id": "nmdc:79ebad70d4aba8d32490b43f622698d7", + "file_size_bytes": 8873605 + }, + { + "name": "Gp0119851_Structural annotation GFF file", + "description": "Structural annotation GFF file for Gp0119851", + "data_object_type": "Structural Annotation GFF", + "url": "https://data.microbiomedata.org/data/nmdc:mga0s38291/annotation/nmdc_mga0s38291_structural_annotation.gff", + "md5_checksum": "f0f70fb67d2cf53bfc6f6a9f87bab4e6", + "id": "nmdc:f0f70fb67d2cf53bfc6f6a9f87bab4e6", + "file_size_bytes": 2506 + }, + { + "name": "Gp0119851_Functional annotation GFF file", + "description": "Functional annotation GFF file for Gp0119851", + "data_object_type": "Functional Annotation GFF", + "url": "https://data.microbiomedata.org/data/nmdc:mga0s38291/annotation/nmdc_mga0s38291_functional_annotation.gff", + "md5_checksum": "958672e820d896e0b825b28f34248c27", + "id": "nmdc:958672e820d896e0b825b28f34248c27", + "file_size_bytes": 8199019 + }, + { + "name": "Gp0119851_KO TSV file", + "description": "KO TSV file for Gp0119851", + "data_object_type": "Annotation KEGG Orthology", + "url": "https://data.microbiomedata.org/data/nmdc:mga0s38291/annotation/nmdc_mga0s38291_ko.tsv", + "md5_checksum": "8f96aa904627a4a3ce0d4be2bf046e3f", + "id": "nmdc:8f96aa904627a4a3ce0d4be2bf046e3f", + "file_size_bytes": 1252251 + }, + { + "name": "Gp0119851_EC TSV file", + "description": "EC TSV file for Gp0119851", + "data_object_type": "Annotation Enzyme Commission", + "url": "https://data.microbiomedata.org/data/nmdc:mga0s38291/annotation/nmdc_mga0s38291_ec.tsv", + "md5_checksum": "aa734a26e5c54f3a4c1e33fc97e5b6db", + "id": "nmdc:aa734a26e5c54f3a4c1e33fc97e5b6db", + "file_size_bytes": 751229 + }, + { + "name": "Gp0119851_COG GFF file", + "description": "COG GFF file for Gp0119851", + "url": "https://data.microbiomedata.org/data/nmdc:mga0s38291/annotation/nmdc_mga0s38291_cog.gff", + "md5_checksum": "5206211e5e526fb65a01536cad3f74d6", + "id": "nmdc:5206211e5e526fb65a01536cad3f74d6", + "file_size_bytes": 5612073 + }, + { + "name": "Gp0119851_PFAM GFF file", + "description": "PFAM GFF file for Gp0119851", + "url": "https://data.microbiomedata.org/data/nmdc:mga0s38291/annotation/nmdc_mga0s38291_pfam.gff", + "md5_checksum": "78d20a23c762f1b11d2ed13537df9275", + "id": "nmdc:78d20a23c762f1b11d2ed13537df9275", + "file_size_bytes": 5513825 + }, + { + "name": "Gp0119851_TigrFam GFF file", + "description": "TigrFam GFF file for Gp0119851", + "url": "https://data.microbiomedata.org/data/nmdc:mga0s38291/annotation/nmdc_mga0s38291_tigrfam.gff", + "md5_checksum": "7646387d97df969001b70881f9086e1a", + "id": "nmdc:7646387d97df969001b70881f9086e1a", + "file_size_bytes": 1231546 + }, + { + "name": "Gp0119851_SMART GFF file", + "description": "SMART GFF file for Gp0119851", + "url": "https://data.microbiomedata.org/data/nmdc:mga0s38291/annotation/nmdc_mga0s38291_smart.gff", + "md5_checksum": "526f19b02cda3069b7ea018a54a8f992", + "id": "nmdc:526f19b02cda3069b7ea018a54a8f992", + "file_size_bytes": 1764726 + }, + { + "name": "Gp0119851_SuperFam GFF file", + "description": "SuperFam GFF file for Gp0119851", + "url": "https://data.microbiomedata.org/data/nmdc:mga0s38291/annotation/nmdc_mga0s38291_supfam.gff", + "md5_checksum": "8a7a98e071ec7aa44c5866f1716f006b", + "id": "nmdc:8a7a98e071ec7aa44c5866f1716f006b", + "file_size_bytes": 7586135 + }, + { + "name": "Gp0119851_Cath FunFam GFF file", + "description": "Cath FunFam GFF file for Gp0119851", + "url": "https://data.microbiomedata.org/data/nmdc:mga0s38291/annotation/nmdc_mga0s38291_cath_funfam.gff", + "md5_checksum": "770e26d256122dab3f5d5fe026ea157b", + "id": "nmdc:770e26d256122dab3f5d5fe026ea157b", + "file_size_bytes": 7116054 + }, + { + "name": "Gp0119851_KO_EC GFF file", + "description": "KO_EC GFF file for Gp0119851", + "url": "https://data.microbiomedata.org/data/nmdc:mga0s38291/annotation/nmdc_mga0s38291_ko_ec.gff", + "md5_checksum": "6f4d643d913608acdf3c529f20499d01", + "id": "nmdc:6f4d643d913608acdf3c529f20499d01", + "file_size_bytes": 4207639 + }, + { + "name": "gold:Gp0452679_metabat2 bin checkm quality assessment result", + "description": "metabat2 bin checkm quality assessment result for gold:Gp0452679", + "data_object_type": "CheckM Statistics", + "url": "https://data.microbiomedata.org/data/nmdc:mga0fsjy84/MAGs/nmdc_mga0fsjy84_checkm_qa.out", + "md5_checksum": "d41d8cd98f00b204e9800998ecf8427e", + "id": "nmdc:d41d8cd98f00b204e9800998ecf8427e", + "file_size_bytes": 0 + }, + { + "name": "Gp0119851_tooShort (< 3kb) filtered contigs fasta file by metaBat2", + "description": "tooShort (< 3kb) filtered contigs fasta file by metaBat2 for Gp0119851", + "url": "https://data.microbiomedata.org/data/nmdc:mga0s38291/MAGs/nmdc_mga0s38291_bins.tooShort.fa", + "md5_checksum": "7f30870d63b49d6a9fea574776bf2b65", + "id": "nmdc:7f30870d63b49d6a9fea574776bf2b65", + "file_size_bytes": 3490098 + }, + { + "name": "Gp0119851_unbinned fasta file from metabat2", + "description": "unbinned fasta file from metabat2 for Gp0119851", + "url": "https://data.microbiomedata.org/data/nmdc:mga0s38291/MAGs/nmdc_mga0s38291_bins.unbinned.fa", + "md5_checksum": "163e27acedcdcc757a3e022b2b724c3f", + "id": "nmdc:163e27acedcdcc757a3e022b2b724c3f", + "file_size_bytes": 4937366 + }, + { + "name": "Gp0119851_metabat2 bin checkm quality assessment result", + "description": "metabat2 bin checkm quality assessment result for Gp0119851", + "data_object_type": "CheckM Statistics", + "url": "https://data.microbiomedata.org/data/nmdc:mga0s38291/MAGs/nmdc_mga0s38291_checkm_qa.out", + "md5_checksum": "ef7c283447da07d6e0cfa6adfeb76ddf", + "id": "nmdc:ef7c283447da07d6e0cfa6adfeb76ddf", + "file_size_bytes": 1749 + }, + { + "name": "Gp0119851_high-quality and medium-quality bins", + "description": "high-quality and medium-quality bins for Gp0119851", + "data_object_type": "Metagenome Bins", + "url": "https://data.microbiomedata.org/data/nmdc:mga0s38291/MAGs/nmdc_mga0s38291_hqmq_bin.zip", + "md5_checksum": "2a5bcee97e2df31c7fd723c7dc0c5d4f", + "id": "nmdc:2a5bcee97e2df31c7fd723c7dc0c5d4f", + "file_size_bytes": 2499389 + }, + { + "name": "Gp0119851_metabat2 bins", + "description": "metabat2 bins for Gp0119851", + "url": "https://data.microbiomedata.org/data/nmdc:mga0s38291/MAGs/nmdc_mga0s38291_metabat_bin.zip", + "md5_checksum": "e2921cb90af8461763559ec2f093f2d5", + "id": "nmdc:e2921cb90af8461763559ec2f093f2d5", + "file_size_bytes": 738116 + }, + { + "_id": { + "$oid": "649b003d1ae706d7b5b14dd4" + }, + "description": "Assembled contigs fasta for gold:Gp0119851", + "url": "https://data.microbiomedata.org/data/1777_95820/assembly/assembly_contigs.fna", + "file_size_bytes": 19056176, + "type": "nmdc:DataObject", + "id": "nmdc:76a9fb6a1d29da495d246728ab7ace33", + "name": "assembly_contigs.fna", + "data_object_type": "Assembly Contigs" + }, + { + "_id": { + "$oid": "649b003d1ae706d7b5b14ddc" + }, + "description": "Assembled AGP file for gold:Gp0119851", + "url": "https://data.microbiomedata.org/data/1777_95820/assembly/assembly.agp", + "file_size_bytes": 700100, + "type": "nmdc:DataObject", + "id": "nmdc:c3b9a0615d9bc0c85966fa3dc8d4fab2", + "name": "assembly.agp", + "data_object_type": "Assembly AGP" + }, + { + "_id": { + "$oid": "649b003d1ae706d7b5b14de8" + }, + "description": "Metagenome Alignment BAM file for gold:Gp0119851", + "url": "https://data.microbiomedata.org/data/1777_95820/assembly/pairedMapped_sorted.bam", + "file_size_bytes": 2229187280, + "type": "nmdc:DataObject", + "id": "nmdc:1ee208fddf3bafcd523ada731a98f8da", + "name": "pairedMapped_sorted.bam", + "data_object_type": "Assembly Coverage BAM" + }, + { + "_id": { + "$oid": "649b003d1ae706d7b5b14dea" + }, + "description": "Assembled scaffold fasta for gold:Gp0119851", + "url": "https://data.microbiomedata.org/data/1777_95820/assembly/assembly_scaffolds.fna", + "file_size_bytes": 19023447, + "type": "nmdc:DataObject", + "id": "nmdc:1db790a1f1e6939628c7cd98e4ad0ffc", + "name": "assembly_scaffolds.fna", + "data_object_type": "Assembly Scaffolds" + }, + { + "_id": { + "$oid": "649b003d1ae706d7b5b14df1" + }, + "description": "Metagenome Contig Coverage Stats for gold:Gp0119851", + "url": "https://data.microbiomedata.org/data/1777_95820/assembly/mapping_stats.txt", + "file_size_bytes": 814080, + "type": "nmdc:DataObject", + "id": "nmdc:aa9c33e49984da220d85bcfd9b0bad25", + "name": "mapping_stats.txt", + "data_object_type": "Assembly Coverage Stats" + }, + { + "_id": { + "$oid": "649b003d1ae706d7b5b15a22" + }, + "id": "nmdc:2d92596d1ea5e8f5e7690bb19f3475b9", + "name": "1777_95820.krona.html", + "description": "Gold:Gp0119851 KRONA plot HTML file", + "url": "https://data.microbiomedata.org/1777_95820/ReadbasedAnalysis/centrifuge/1777_95820.krona.html", + "file_size_bytes": 3385, + "data_object_type": "Centrifuge Krona Plot", + "type": "nmdc:DataObject" + }, + { + "_id": { + "$oid": "649b003d1ae706d7b5b15a28" + }, + "id": "nmdc:89d6fc71202f4c4c59c1ffb343098397", + "name": "1777_95820.json", + "description": "Gold:Gp0119851 ReadbasedAnalysis result JSON file", + "url": "https://data.microbiomedata.org/1777_95820/ReadbasedAnalysis/1777_95820.json", + "file_size_bytes": 3385, + "type": "nmdc:DataObject" + }, + { + "_id": { + "$oid": "649b003f1ae706d7b5b16312" + }, + "id": "nmdc:f510e0fbc375a5356e185d266bf0aef5", + "name": "bins.tooShort.fa", + "description": "tooShort (< 3kb) filtered contigs fasta file by metaBat2 for gold:Gp0119851", + "file_size_bytes": 3395592, + "url": "https://data.microbiomedata.org/data/1777_95820/img_MAGs/bins.tooShort.fa", + "type": "nmdc:DataObject" + }, + { + "_id": { + "$oid": "649b003f1ae706d7b5b16314" + }, + "id": "nmdc:15ed580639fcd44ce28e488397ee9000", + "name": "bins.unbinned.fa", + "description": "unbinned fasta file from metabat2 for gold:Gp0119851", + "file_size_bytes": 11508796, + "url": "https://data.microbiomedata.org/data/1777_95820/img_MAGs/bins.unbinned.fa", + "type": "nmdc:DataObject" + }, + { + "_id": { + "$oid": "649b003f1ae706d7b5b16315" + }, + "id": "nmdc:923f57f2570ea822b7165cbb0e9f1e68", + "name": "gtdbtk.bac120.summary.tsv", + "description": "gtdbtk bacterial assignment result summary table for gold:Gp0119851", + "file_size_bytes": 1351, + "url": "https://data.microbiomedata.org/data/1777_95820/img_MAGs/gtdbtk_output/classify/gtdbtk.bac120.summary.tsv", + "type": "nmdc:DataObject" + }, + { + "_id": { + "$oid": "649b003f1ae706d7b5b16316" + }, + "id": "nmdc:e69f847bf11cfc6eaf3aea845bbd7788", + "name": "checkm_qa.out", + "description": "metabat2 bin checkm quality assessment result for gold:Gp0119851", + "file_size_bytes": 2355, + "url": "https://data.microbiomedata.org/data/1777_95820/img_MAGs/checkm_qa.out", + "type": "nmdc:DataObject", + "data_object_type": "CheckM Statistics" + }, + { + "_id": { + "$oid": "649b003f1ae706d7b5b16317" + }, + "id": "nmdc:ad73fdf7dc9edc09705b2c8f7f902daf", + "name": "gold:Gp0119851.bins.9.fa", + "description": "metabat2 binned contig file for gold:Gp0119851", + "file_size_bytes": 340637, + "url": "https://data.microbiomedata.org/data/1777_95820/img_MAGs/metabat-bins/bins.9.fa", + "type": "nmdc:DataObject", + "data_object_type": "Metagenome Bins" + }, + { + "_id": { + "$oid": "649b003f1ae706d7b5b16318" + }, + "id": "nmdc:e33f65ac5e0596a486265bac5b31b1d5", + "name": "gold:Gp0119851.bins.6.fa", + "description": "metabat2 binned contig file for gold:Gp0119851", + "file_size_bytes": 1038750, + "url": "https://data.microbiomedata.org/data/1777_95820/img_MAGs/metabat-bins/bins.6.fa", + "type": "nmdc:DataObject", + "data_object_type": "Metagenome Bins" + }, + { + "_id": { + "$oid": "649b003f1ae706d7b5b16319" + }, + "id": "nmdc:264f24902a32241a07dd5eb979a586f3", + "name": "gold:Gp0119851.bins.5.fa", + "description": "metabat2 binned contig file for gold:Gp0119851", + "file_size_bytes": 205041, + "url": "https://data.microbiomedata.org/data/1777_95820/img_MAGs/metabat-bins/bins.5.fa", + "type": "nmdc:DataObject", + "data_object_type": "Metagenome Bins" + }, + { + "_id": { + "$oid": "649b003f1ae706d7b5b1631a" + }, + "id": "nmdc:e7e8f08290446bfc880cf5e8b48117d7", + "name": "gold:Gp0119851.bins.2.fa", + "description": "metabat2 binned contig file for gold:Gp0119851", + "file_size_bytes": 264370, + "url": "https://data.microbiomedata.org/data/1777_95820/img_MAGs/metabat-bins/bins.2.fa", + "type": "nmdc:DataObject", + "data_object_type": "Metagenome Bins" + }, + { + "_id": { + "$oid": "649b003f1ae706d7b5b1631b" + }, + "id": "nmdc:7a66ba194a24321cec9dc9df23e26fe6", + "name": "gold:Gp0119851.bins.3.fa", + "description": "metabat2 binned contig file for gold:Gp0119851", + "file_size_bytes": 243745, + "url": "https://data.microbiomedata.org/data/1777_95820/img_MAGs/metabat-bins/bins.3.fa", + "type": "nmdc:DataObject", + "data_object_type": "Metagenome Bins" + }, + { + "_id": { + "$oid": "649b003f1ae706d7b5b1631c" + }, + "id": "nmdc:d93c5f19d3634c4ed98c90ea34c60a41", + "name": "gold:Gp0119851.bins.11.fa", + "description": "metabat2 binned contig file for gold:Gp0119851", + "file_size_bytes": 250028, + "url": "https://data.microbiomedata.org/data/1777_95820/img_MAGs/metabat-bins/bins.11.fa", + "type": "nmdc:DataObject", + "data_object_type": "Metagenome Bins" + }, + { + "_id": { + "$oid": "649b003f1ae706d7b5b1631d" + }, + "id": "nmdc:cb83a6d883dbb2be17a75dc11de74a81", + "name": "gold:Gp0119851.bins.7.fa", + "description": "metabat2 binned contig file for gold:Gp0119851", + "file_size_bytes": 263586, + "url": "https://data.microbiomedata.org/data/1777_95820/img_MAGs/metabat-bins/bins.7.fa", + "type": "nmdc:DataObject", + "data_object_type": "Metagenome Bins" + }, + { + "_id": { + "$oid": "649b003f1ae706d7b5b1631e" + }, + "id": "nmdc:c259479924cc76ccef03dac0b9ef9383", + "name": "gold:Gp0119851.bins.10.fa", + "description": "metabat2 binned contig file for gold:Gp0119851", + "file_size_bytes": 262764, + "url": "https://data.microbiomedata.org/data/1777_95820/img_MAGs/metabat-bins/bins.10.fa", + "type": "nmdc:DataObject", + "data_object_type": "Metagenome Bins" + }, + { + "_id": { + "$oid": "649b003f1ae706d7b5b1631f" + }, + "id": "nmdc:7eb8cfa4359f6e9611da37db1ab55c31", + "name": "gold:Gp0119851.bins.4.fa", + "description": "metabat2 binned contig file for gold:Gp0119851", + "file_size_bytes": 429233, + "url": "https://data.microbiomedata.org/data/1777_95820/img_MAGs/metabat-bins/bins.4.fa", + "type": "nmdc:DataObject", + "data_object_type": "Metagenome Bins" + }, + { + "_id": { + "$oid": "649b003f1ae706d7b5b16320" + }, + "id": "nmdc:534773c043735d76dd55c000935fa835", + "name": "gold:Gp0119851.bins.8.fa", + "description": "metabat2 binned contig file for gold:Gp0119851", + "file_size_bytes": 427862, + "url": "https://data.microbiomedata.org/data/1777_95820/img_MAGs/metabat-bins/bins.8.fa", + "type": "nmdc:DataObject", + "data_object_type": "Metagenome Bins" + }, + { + "_id": { + "$oid": "649b003f1ae706d7b5b16322" + }, + "id": "nmdc:810e3ff50cab52a11d491a29a68cb75d", + "name": "gold:Gp0119851.bins.1.fa", + "description": "metabat2 binned contig file for gold:Gp0119851", + "file_size_bytes": 353422, + "url": "https://data.microbiomedata.org/data/1777_95820/img_MAGs/metabat-bins/bins.1.fa", + "type": "nmdc:DataObject", + "data_object_type": "Metagenome Bins" + }, + { + "_id": { + "$oid": "649b003f1ae706d7b5b16c4a" + }, + "description": "EC TSV File for gold:Gp0119851", + "url": "https://data.microbiomedata.org/1777_95820/img_annotation/Ga0482164_ec.tsv", + "md5_checksum": "bdfa927dbe2d5bbf27f1a1cf0265a27f", + "file_size_bytes": 3385, + "id": "nmdc:bdfa927dbe2d5bbf27f1a1cf0265a27f", + "name": "gold:Gp0119851_EC TSV File", + "data_object_type": "Annotation Enzyme Commission", + "type": "nmdc:DataObject" + }, + { + "_id": { + "$oid": "649b003f1ae706d7b5b16c4b" + }, + "description": "KO TSV File for gold:Gp0119851", + "url": "https://data.microbiomedata.org/1777_95820/img_annotation/Ga0482164_ko.tsv", + "md5_checksum": "0ab4c3c5fb624a9931ac977df5a4aa4f", + "file_size_bytes": 3385, + "id": "nmdc:0ab4c3c5fb624a9931ac977df5a4aa4f", + "name": "gold:Gp0119851_KO TSV File", + "data_object_type": "Annotation KEGG Orthology", + "type": "nmdc:DataObject" + }, + { + "_id": { + "$oid": "649b003f1ae706d7b5b16c4c" + }, + "description": "Functional annotation GFF file for gold:Gp0119851", + "url": "https://data.microbiomedata.org/1777_95820/img_annotation/Ga0482164_functional_annotation.gff", + "md5_checksum": "4b6e2700378acc2a9ac22195a9b4cbfb", + "file_size_bytes": 3385, + "id": "nmdc:4b6e2700378acc2a9ac22195a9b4cbfb", + "name": "gold:Gp0119851_Functional annotation GFF file", + "data_object_type": "Functional Annotation GFF", + "type": "nmdc:DataObject" + }, + { + "_id": { + "$oid": "649b003f1ae706d7b5b16c4d" + }, + "description": "Protein FAA for gold:Gp0119851", + "url": "https://data.microbiomedata.org/1777_95820/img_annotation/Ga0482164_proteins.faa", + "md5_checksum": "f8219a779a150e71c672dd2bfd695365", + "file_size_bytes": 3385, + "id": "nmdc:f8219a779a150e71c672dd2bfd695365", + "name": "gold:Gp0119851_Protein FAA", + "data_object_type": "Annotation Amino Acid FASTA", + "type": "nmdc:DataObject" + }, + { + "_id": { + "$oid": "649b003f1ae706d7b5b16c4e" + }, + "description": "Structural annotation GFF file for gold:Gp0119851", + "url": "https://data.microbiomedata.org/1777_95820/img_annotation/Ga0482164_structural_annotation.gff", + "md5_checksum": "adfd5d1e9ec99ea5917d8b9efdbd9130", + "file_size_bytes": 3385, + "id": "nmdc:adfd5d1e9ec99ea5917d8b9efdbd9130", + "name": "gold:Gp0119851_Structural annotation GFF file", + "data_object_type": "Structural Annotation GFF", + "type": "nmdc:DataObject" + } + ], + "mags_activity_set": [ + { + "_id": { + "$oid": "649b0052ec087f6bbab346f3" + }, + "has_input": [ + "nmdc:ee1b52a95388196053f6dbd52f0e8726", + "nmdc:65eb750011587b5461a055fa33490858", + "nmdc:958672e820d896e0b825b28f34248c27" + ], + "too_short_contig_num": 7885, + "part_of": [ + "nmdc:mga0s38291" + ], + "binned_contig_num": 739, + "git_url": "https://github.com/microbiomedata/metaG/releases/tag/0.1", + "has_output": [ + "nmdc:d41d8cd98f00b204e9800998ecf8427e", + "nmdc:7f30870d63b49d6a9fea574776bf2b65", + "nmdc:163e27acedcdcc757a3e022b2b724c3f", + "nmdc:ef7c283447da07d6e0cfa6adfeb76ddf", + "nmdc:2a5bcee97e2df31c7fd723c7dc0c5d4f", + "nmdc:e2921cb90af8461763559ec2f093f2d5" + ], + "was_informed_by": "gold:Gp0119851", + "input_contig_num": 10539, + "id": "nmdc:d33d65c1672bc3083d12f9ccece6e430", + "execution_resource": "NERSC-Cori", + "name": "MAGs Analysis Activity for nmdc:mga0s38291", + "mags_list": [ + { + "number_of_contig": 66, + "completeness": 81.03, + "bin_name": "bins.1", + "gene_count": 2790, + "bin_quality": "MQ", + "gtdbtk_species": "Halanaerobium sp003070825", + "gtdbtk_order": "Halanaerobiales", + "num_16s": 0, + "gtdbtk_family": "Halanaerobiaceae", + "gtdbtk_domain": "Bacteria", + "contamination": 1.72, + "gtdbtk_class": "Halanaerobiia", + "gtdbtk_phylum": "Firmicutes_F", + "num_5s": 3, + "num_23s": 1, + "gtdbtk_genus": "Halanaerobium", + "num_t_rna": 40 + }, + { + "number_of_contig": 141, + "completeness": 32.46, + "bin_name": "bins.2", + "gene_count": 1172, + "bin_quality": "LQ", + "gtdbtk_species": "", + "gtdbtk_order": "", + "num_16s": 1, + "gtdbtk_family": "", + "gtdbtk_domain": "", + "contamination": 4.39, + "gtdbtk_class": "", + "gtdbtk_phylum": "", + "num_5s": 1, + "num_23s": 1, + "gtdbtk_genus": "", + "num_t_rna": 26 + }, + { + "number_of_contig": 7, + "completeness": 100.0, + "bin_name": "bins.3", + "gene_count": 2091, + "bin_quality": "HQ", + "gtdbtk_species": "Thermotoga petrophila", + "gtdbtk_order": "Thermotogales", + "num_16s": 1, + "gtdbtk_family": "Thermotogaceae", + "gtdbtk_domain": "Bacteria", + "contamination": 1.79, + "gtdbtk_class": "Thermotogae", + "gtdbtk_phylum": "Thermotogota", + "num_5s": 1, + "num_23s": 1, + "gtdbtk_genus": "Thermotoga", + "num_t_rna": 47 + }, + { + "number_of_contig": 87, + "completeness": 87.13, + "bin_name": "bins.4", + "gene_count": 1856, + "bin_quality": "MQ", + "gtdbtk_species": "Thermococcus_A sp000430485", + "gtdbtk_order": "Thermococcales", + "num_16s": 0, + "gtdbtk_family": "Thermococcaceae", + "gtdbtk_domain": "Archaea", + "contamination": 0.0, + "gtdbtk_class": "Thermococci", + "gtdbtk_phylum": "Euryarchaeota", + "num_5s": 1, + "num_23s": 0, + "gtdbtk_genus": "Thermococcus_A", + "num_t_rna": 29 + }, + { + "number_of_contig": 20, + "completeness": 0.0, + "bin_name": "bins.5", + "gene_count": 265, + "bin_quality": "LQ", + "gtdbtk_species": "", + "gtdbtk_order": "", + "num_16s": 0, + "gtdbtk_family": "", + "gtdbtk_domain": "", + "contamination": 0.0, + "gtdbtk_class": "", + "gtdbtk_phylum": "", + "num_5s": 0, + "num_23s": 0, + "gtdbtk_genus": "", + "num_t_rna": 0 + }, + { + "number_of_contig": 156, + "completeness": 49.92, + "bin_name": "bins.6", + "gene_count": 1325, + "bin_quality": "LQ", + "gtdbtk_species": "", + "gtdbtk_order": "", + "num_16s": 0, + "gtdbtk_family": "", + "gtdbtk_domain": "", + "contamination": 4.39, + "gtdbtk_class": "", + "gtdbtk_phylum": "", + "num_5s": 0, + "num_23s": 0, + "gtdbtk_genus": "", + "num_t_rna": 15 + }, + { + "number_of_contig": 262, + "completeness": 80.55, + "bin_name": "bins.7", + "gene_count": 2127, + "bin_quality": "MQ", + "gtdbtk_species": "Halanaerobium congolense", + "gtdbtk_order": "Halanaerobiales", + "num_16s": 0, + "gtdbtk_family": "Halanaerobiaceae", + "gtdbtk_domain": "Bacteria", + "contamination": 1.38, + "gtdbtk_class": "Halanaerobiia", + "gtdbtk_phylum": "Firmicutes_F", + "num_5s": 1, + "num_23s": 0, + "gtdbtk_genus": "Halanaerobium", + "num_t_rna": 16 + } + ], + "unbinned_contig_num": 1915, + "started_at_time": "2021-10-11T02:23:25Z", + "type": "nmdc:MAGsAnalysisActivity", + "ended_at_time": "2021-10-11T03:16:24+00:00" + } + ], + "metagenome_annotation_activity_set": [ + { + "_id": { + "$oid": "649b005bbf2caae0415ef98f" + }, + "has_input": [ + "nmdc:ee1b52a95388196053f6dbd52f0e8726" + ], + "part_of": [ + "nmdc:mga0s38291" + ], + "git_url": "https://github.com/microbiomedata/metaG/releases/tag/0.1", + "has_output": [ + "nmdc:79ebad70d4aba8d32490b43f622698d7", + "nmdc:f0f70fb67d2cf53bfc6f6a9f87bab4e6", + "nmdc:958672e820d896e0b825b28f34248c27", + "nmdc:8f96aa904627a4a3ce0d4be2bf046e3f", + "nmdc:aa734a26e5c54f3a4c1e33fc97e5b6db", + "nmdc:5206211e5e526fb65a01536cad3f74d6", + "nmdc:78d20a23c762f1b11d2ed13537df9275", + "nmdc:7646387d97df969001b70881f9086e1a", + "nmdc:526f19b02cda3069b7ea018a54a8f992", + "nmdc:8a7a98e071ec7aa44c5866f1716f006b", + "nmdc:770e26d256122dab3f5d5fe026ea157b", + "nmdc:6f4d643d913608acdf3c529f20499d01" + ], + "was_informed_by": "gold:Gp0119851", + "id": "nmdc:d33d65c1672bc3083d12f9ccece6e430", + "execution_resource": "NERSC-Cori", + "name": "Annotation Activity for nmdc:mga0s38291", + "started_at_time": "2021-10-11T02:23:25Z", + "type": "nmdc:MetagenomeAnnotationActivity", + "ended_at_time": "2021-10-11T03:16:24+00:00" + } + ], + "metagenome_assembly_set": [ + { + "_id": { + "$oid": "649b005f2ca5ee4adb139f84" + }, + "has_input": [ + "nmdc:9845f07f87c57efad76502c4bf165c9c" + ], + "part_of": [ + "nmdc:mga0s38291" + ], + "ctg_logsum": 213192, + "scaf_logsum": 218451, + "gap_pct": 0.05222, + "git_url": "https://github.com/microbiomedata/metaG/releases/tag/0.1", + "has_output": [ + "nmdc:ee1b52a95388196053f6dbd52f0e8726", + "nmdc:284a9b71a458c9ebd7f0fd4446478466", + "nmdc:1d431a3abae31dca611f406f5e300a7b", + "nmdc:e5e5c9c66b6cf8596b1308675bd2e9d0", + "nmdc:65eb750011587b5461a055fa33490858" + ], + "asm_score": 23.393, + "was_informed_by": "gold:Gp0119851", + "ctg_powsum": 33504, + "scaf_max": 657878, + "id": "nmdc:d33d65c1672bc3083d12f9ccece6e430", + "scaf_powsum": 34471, + "execution_resource": "NERSC-Cori", + "contigs": 10563, + "name": "Assembly Activity for nmdc:mga0s38291", + "ctg_max": 657878, + "gc_std": 0.07175, + "contig_bp": 18495955, + "gc_avg": 0.37412, + "started_at_time": "2021-10-11T02:23:25Z", + "scaf_bp": 18505618, + "type": "nmdc:MetagenomeAssembly", + "scaffolds": 9806, + "ended_at_time": "2021-10-11T03:16:24+00:00", + "ctg_l50": 7909, + "ctg_l90": 455, + "ctg_n50": 380, + "ctg_n90": 4729, + "scaf_l50": 9041, + "scaf_l90": 501, + "scaf_n50": 347, + "scaf_n90": 4085, + "scaf_l_gt50k": 3892584, + "scaf_n_gt50k": 30, + "scaf_pct_gt50k": 21.034607 + } + ], + "omics_processing_set": [ + { + "_id": { + "$oid": "649b009773e8249959349b66" + }, + "id": "nmdc:omprc-11-q457kv14", + "name": "Deep subsurface shale carbon reservoir microbial communities from Ohio, USA - Utica-2 Time Series 2014_10_12", + "description": "Microbial controls on biogeochemical cycling in deep subsurface shale carbon reservoirs", + "has_input": [ + "nmdc:bsm-11-17gfm275" + ], + "has_output": [ + "jgi:560df5b50d878540fd6fe1fc" + ], + "part_of": [ + "nmdc:sty-11-8fb6t785" + ], + "add_date": "2015-08-15", + "mod_date": "2020-04-05", + "ncbi_project_name": "Deep subsurface shale carbon reservoir microbial communities from Ohio, USA - Utica-2 Time Series 2014_10_12", + "omics_type": { + "has_raw_value": "Metagenome" + }, + "principal_investigator": { + "has_raw_value": "Kelly Wrighton" + }, + "processing_institution": "JGI", + "type": "nmdc:OmicsProcessing", + "gold_sequencing_project_identifiers": [ + "gold:Gp0119851" + ] + } + ], + "read_qc_analysis_activity_set": [ + { + "_id": { + "$oid": "649b009d6bdd4fd20273c850" + }, + "has_input": [ + "nmdc:2e02214b1be9b2dbe0639d55d424c39f" + ], + "part_of": [ + "nmdc:mga0s38291" + ], + "git_url": "https://github.com/microbiomedata/metaG/releases/tag/0.1", + "has_output": [ + "nmdc:9845f07f87c57efad76502c4bf165c9c", + "nmdc:33ed4578f6fe6b4ffdaadc5213be5d76" + ], + "was_informed_by": "gold:Gp0119851", + "input_read_count": 31754562, + "output_read_bases": 4678886154, + "id": "nmdc:d33d65c1672bc3083d12f9ccece6e430", + "execution_resource": "NERSC-Cori", + "input_read_bases": 4794938862, + "name": "Read QC Activity for nmdc:mga0s38291", + "output_read_count": 31524732, + "started_at_time": "2021-10-11T02:23:25Z", + "type": "nmdc:ReadQCAnalysisActivity", + "ended_at_time": "2021-10-11T03:16:24+00:00" + } + ], + "read_based_taxonomy_analysis_activity_set": [ + { + "_id": { + "$oid": "649b009bff710ae353f8cf13" + }, + "has_input": [ + "nmdc:9845f07f87c57efad76502c4bf165c9c" + ], + "git_url": "https://github.com/microbiomedata/metaG/releases/tag/0.1", + "has_output": [ + "nmdc:97d87706efaf575734ac73fe4b563569", + "nmdc:b9086162f9df32ddef118a1cf5d6883c", + "nmdc:e5e83df27ee142c750a1e3348c66077e", + "nmdc:a2d40e3f2a20fdce858064c970f20ebd", + "nmdc:ddf2a8463e05dfe298ea46e0be272e1f", + "nmdc:044bcba2b7f3bf02b53b08cc9b6c684f", + "nmdc:04d10593cb25f887adbdc322f2036c00", + "nmdc:5b768d473c386ed08dc31442ebe59551", + "nmdc:4d591416c2bd450570efa96fabdddd67" + ], + "was_informed_by": "gold:Gp0119851", + "id": "nmdc:d33d65c1672bc3083d12f9ccece6e430", + "execution_resource": "NERSC-Cori", + "name": "ReadBased Analysis Activity for nmdc:mga0s38291", + "started_at_time": "2021-10-11T02:23:25Z", + "type": "nmdc:ReadbasedAnalysis", + "ended_at_time": "2021-10-11T03:16:24+00:00" + } + ] + }, + { + "data_object_set": [ + { + "description": "Raw sequencer read data", + "file_size_bytes": 8824348882, + "type": "nmdc:DataObject", + "id": "jgi:560df36f0d878540fd6fe1c7", + "name": "9567.6.137555.TAAGGCG-GCGTAAG.fastq.gz" + }, + { + "name": "Gp0119853_Filtered Reads", + "description": "Filtered Reads for Gp0119853", + "data_object_type": "Filtered Sequencing Reads", + "url": "https://data.microbiomedata.org/data/nmdc:mga0tyn058/qa/nmdc_mga0tyn058_filtered.fastq.gz", + "md5_checksum": "b481648f8154d2ca532a8a64fdb8a7ee", + "id": "nmdc:b481648f8154d2ca532a8a64fdb8a7ee", + "file_size_bytes": 5117450765 + }, + { + "name": "Gp0119853_Filtered Stats", + "description": "Filtered Stats for Gp0119853", + "data_object_type": "QC Statistics", + "url": "https://data.microbiomedata.org/data/nmdc:mga0tyn058/qa/nmdc_mga0tyn058_filterStats.txt", + "md5_checksum": "096ddb456d9d09f68a93586823c30c9d", + "id": "nmdc:096ddb456d9d09f68a93586823c30c9d", + "file_size_bytes": 285 + }, + { + "name": "Gp0119853_Gottcha2 TSV report", + "description": "Gottcha2 TSV report for Gp0119853", + "url": "https://data.microbiomedata.org/data/nmdc:mga0tyn058/ReadbasedAnalysis/nmdc_mga0tyn058_gottcha2_report.tsv", + "md5_checksum": "cfa6be80cfa9eac012c009e0e0846a16", + "id": "nmdc:cfa6be80cfa9eac012c009e0e0846a16", + "file_size_bytes": 7340 + }, + { + "name": "Gp0119853_Gottcha2 full TSV report", + "description": "Gottcha2 full TSV report for Gp0119853", + "url": "https://data.microbiomedata.org/data/nmdc:mga0tyn058/ReadbasedAnalysis/nmdc_mga0tyn058_gottcha2_report_full.tsv", + "md5_checksum": "af1cdac68dc47d6c89dcc43ef25c3484", + "id": "nmdc:af1cdac68dc47d6c89dcc43ef25c3484", + "file_size_bytes": 303818 + }, + { + "name": "Gp0119853_Gottcha2 Krona HTML report", + "description": "Gottcha2 Krona HTML report for Gp0119853", + "data_object_type": "GOTTCHA2 Krona Plot", + "url": "https://data.microbiomedata.org/data/nmdc:mga0tyn058/ReadbasedAnalysis/nmdc_mga0tyn058_gottcha2_krona.html", + "md5_checksum": "b9dedb38da1959178648f2470c558074", + "id": "nmdc:b9dedb38da1959178648f2470c558074", + "file_size_bytes": 249221 + }, + { + "name": "Gp0119853_Centrifuge classification TSV report", + "description": "Centrifuge classification TSV report for Gp0119853", + "data_object_type": "Centrifuge Taxonomic Classification", + "url": "https://data.microbiomedata.org/data/nmdc:mga0tyn058/ReadbasedAnalysis/nmdc_mga0tyn058_centrifuge_classification.tsv", + "md5_checksum": "ea048abb879cc24521477f96c635fe55", + "id": "nmdc:ea048abb879cc24521477f96c635fe55", + "file_size_bytes": 7135991668 + }, + { + "name": "Gp0119853_Centrifuge TSV report", + "description": "Centrifuge TSV report for Gp0119853", + "data_object_type": "Centrifuge Classification Report", + "url": "https://data.microbiomedata.org/data/nmdc:mga0tyn058/ReadbasedAnalysis/nmdc_mga0tyn058_centrifuge_report.tsv", + "md5_checksum": "579cfdd0130e89aa2b5bc49f5590c608", + "id": "nmdc:579cfdd0130e89aa2b5bc49f5590c608", + "file_size_bytes": 232347 + }, + { + "name": "Gp0119853_Centrifuge Krona HTML report", + "description": "Centrifuge Krona HTML report for Gp0119853", + "data_object_type": "Centrifuge Krona Plot", + "url": "https://data.microbiomedata.org/data/nmdc:mga0tyn058/ReadbasedAnalysis/nmdc_mga0tyn058_centrifuge_krona.html", + "md5_checksum": "45b93374f5d8323f24e19881e19e7bb4", + "id": "nmdc:45b93374f5d8323f24e19881e19e7bb4", + "file_size_bytes": 2217516 + }, + { + "name": "Gp0119853_Kraken classification TSV report", + "description": "Kraken classification TSV report for Gp0119853", + "data_object_type": "Kraken2 Taxonomic Classification", + "url": "https://data.microbiomedata.org/data/nmdc:mga0tyn058/ReadbasedAnalysis/nmdc_mga0tyn058_kraken2_classification.tsv", + "md5_checksum": "4fbba103ba85fa9cbe211eef2b6ca1d9", + "id": "nmdc:4fbba103ba85fa9cbe211eef2b6ca1d9", + "file_size_bytes": 5224516085 + }, + { + "name": "Gp0119853_Kraken2 TSV report", + "description": "Kraken2 TSV report for Gp0119853", + "data_object_type": "Kraken2 Classification Report", + "url": "https://data.microbiomedata.org/data/nmdc:mga0tyn058/ReadbasedAnalysis/nmdc_mga0tyn058_kraken2_report.tsv", + "md5_checksum": "99cb03af468bcf68164dd73a24966433", + "id": "nmdc:99cb03af468bcf68164dd73a24966433", + "file_size_bytes": 518710 + }, + { + "name": "Gp0119853_Kraken2 Krona HTML report", + "description": "Kraken2 Krona HTML report for Gp0119853", + "data_object_type": "Kraken2 Krona Plot", + "url": "https://data.microbiomedata.org/data/nmdc:mga0tyn058/ReadbasedAnalysis/nmdc_mga0tyn058_kraken2_krona.html", + "md5_checksum": "235fc3f88e203a17409c9d5fe809a3ff", + "id": "nmdc:235fc3f88e203a17409c9d5fe809a3ff", + "file_size_bytes": 3328984 + }, + { + "name": "Gp0119853_Assembled contigs fasta", + "description": "Assembled contigs fasta for Gp0119853", + "data_object_type": "Assembly Contigs", + "url": "https://data.microbiomedata.org/data/nmdc:mga0tyn058/assembly/nmdc_mga0tyn058_contigs.fna", + "md5_checksum": "05630cf0e878439dd116072fc3c86d36", + "id": "nmdc:05630cf0e878439dd116072fc3c86d36", + "file_size_bytes": 34137194 + }, + { + "name": "Gp0119853_Assembled scaffold fasta", + "description": "Assembled scaffold fasta for Gp0119853", + "data_object_type": "Assembly Scaffolds", + "url": "https://data.microbiomedata.org/data/nmdc:mga0tyn058/assembly/nmdc_mga0tyn058_scaffolds.fna", + "md5_checksum": "7255fa2608b4b3eaf439419ecbf533da", + "id": "nmdc:7255fa2608b4b3eaf439419ecbf533da", + "file_size_bytes": 34087890 + }, + { + "name": "Gp0119853_Metagenome Contig Coverage Stats", + "description": "Metagenome Contig Coverage Stats for Gp0119853", + "url": "https://data.microbiomedata.org/data/nmdc:mga0tyn058/assembly/nmdc_mga0tyn058_covstats.txt", + "md5_checksum": "e478fb39f3773cc9c6710f26134065c6", + "id": "nmdc:e478fb39f3773cc9c6710f26134065c6", + "file_size_bytes": 1428769 + }, + { + "name": "Gp0119853_Assembled AGP file", + "description": "Assembled AGP file for Gp0119853", + "data_object_type": "Assembly AGP", + "url": "https://data.microbiomedata.org/data/nmdc:mga0tyn058/assembly/nmdc_mga0tyn058_assembly.agp", + "md5_checksum": "9bc69e7441ad0c4c1cffd74ccb1e6bac", + "id": "nmdc:9bc69e7441ad0c4c1cffd74ccb1e6bac", + "file_size_bytes": 1280898 + }, + { + "name": "Gp0119853_Metagenome Alignment BAM file", + "description": "Metagenome Alignment BAM file for Gp0119853", + "data_object_type": "Assembly Coverage BAM", + "url": "https://data.microbiomedata.org/data/nmdc:mga0tyn058/assembly/nmdc_mga0tyn058_pairedMapped_sorted.bam", + "md5_checksum": "f65d9327e9d5bfe68dee46be951f10d4", + "id": "nmdc:f65d9327e9d5bfe68dee46be951f10d4", + "file_size_bytes": 6341550872 + }, + { + "name": "Gp0119853_Protein FAA", + "description": "Protein FAA for Gp0119853", + "data_object_type": "Annotation Amino Acid FASTA", + "url": "https://data.microbiomedata.org/data/nmdc:mga0tyn058/annotation/nmdc_mga0tyn058_proteins.faa", + "md5_checksum": "29335cf722c4b2d9ddc8d655b758764a", + "id": "nmdc:29335cf722c4b2d9ddc8d655b758764a", + "file_size_bytes": 15614854 + }, + { + "name": "Gp0119853_Structural annotation GFF file", + "description": "Structural annotation GFF file for Gp0119853", + "data_object_type": "Structural Annotation GFF", + "url": "https://data.microbiomedata.org/data/nmdc:mga0tyn058/annotation/nmdc_mga0tyn058_structural_annotation.gff", + "md5_checksum": "2e9580758c3e6f90323ba29d7789881f", + "id": "nmdc:2e9580758c3e6f90323ba29d7789881f", + "file_size_bytes": 2519 + }, + { + "name": "Gp0119853_Functional annotation GFF file", + "description": "Functional annotation GFF file for Gp0119853", + "data_object_type": "Functional Annotation GFF", + "url": "https://data.microbiomedata.org/data/nmdc:mga0tyn058/annotation/nmdc_mga0tyn058_functional_annotation.gff", + "md5_checksum": "9df7842de9bd10b7bfffdf122a81d20f", + "id": "nmdc:9df7842de9bd10b7bfffdf122a81d20f", + "file_size_bytes": 13790341 + }, + { + "name": "Gp0119853_KO TSV file", + "description": "KO TSV file for Gp0119853", + "data_object_type": "Annotation KEGG Orthology", + "url": "https://data.microbiomedata.org/data/nmdc:mga0tyn058/annotation/nmdc_mga0tyn058_ko.tsv", + "md5_checksum": "ec0380382be5ea506ebc6c98865d2ee2", + "id": "nmdc:ec0380382be5ea506ebc6c98865d2ee2", + "file_size_bytes": 2086252 + }, + { + "name": "Gp0119853_EC TSV file", + "description": "EC TSV file for Gp0119853", + "data_object_type": "Annotation Enzyme Commission", + "url": "https://data.microbiomedata.org/data/nmdc:mga0tyn058/annotation/nmdc_mga0tyn058_ec.tsv", + "md5_checksum": "6b2539c4784f5a71c8bf64c65c396f46", + "id": "nmdc:6b2539c4784f5a71c8bf64c65c396f46", + "file_size_bytes": 1252102 + }, + { + "name": "Gp0119853_COG GFF file", + "description": "COG GFF file for Gp0119853", + "url": "https://data.microbiomedata.org/data/nmdc:mga0tyn058/annotation/nmdc_mga0tyn058_cog.gff", + "md5_checksum": "07108b90dca7403f0cda3c682a39b80c", + "id": "nmdc:07108b90dca7403f0cda3c682a39b80c", + "file_size_bytes": 9613358 + }, + { + "name": "Gp0119853_PFAM GFF file", + "description": "PFAM GFF file for Gp0119853", + "url": "https://data.microbiomedata.org/data/nmdc:mga0tyn058/annotation/nmdc_mga0tyn058_pfam.gff", + "md5_checksum": "18e42b56c81bee7cce4539bf4a81e6cc", + "id": "nmdc:18e42b56c81bee7cce4539bf4a81e6cc", + "file_size_bytes": 9676174 + }, + { + "name": "Gp0119853_TigrFam GFF file", + "description": "TigrFam GFF file for Gp0119853", + "url": "https://data.microbiomedata.org/data/nmdc:mga0tyn058/annotation/nmdc_mga0tyn058_tigrfam.gff", + "md5_checksum": "0c31a19ae762380bb18635e545703f7c", + "id": "nmdc:0c31a19ae762380bb18635e545703f7c", + "file_size_bytes": 2237896 + }, + { + "name": "Gp0119853_SMART GFF file", + "description": "SMART GFF file for Gp0119853", + "url": "https://data.microbiomedata.org/data/nmdc:mga0tyn058/annotation/nmdc_mga0tyn058_smart.gff", + "md5_checksum": "2c9fa8ba31dd1b3eb81a7a922eedad42", + "id": "nmdc:2c9fa8ba31dd1b3eb81a7a922eedad42", + "file_size_bytes": 3144177 + }, + { + "name": "Gp0119853_SuperFam GFF file", + "description": "SuperFam GFF file for Gp0119853", + "url": "https://data.microbiomedata.org/data/nmdc:mga0tyn058/annotation/nmdc_mga0tyn058_supfam.gff", + "md5_checksum": "47b5268d8202d3818c6ea0578c64b948", + "id": "nmdc:47b5268d8202d3818c6ea0578c64b948", + "file_size_bytes": 13079560 + }, + { + "name": "Gp0119853_Cath FunFam GFF file", + "description": "Cath FunFam GFF file for Gp0119853", + "url": "https://data.microbiomedata.org/data/nmdc:mga0tyn058/annotation/nmdc_mga0tyn058_cath_funfam.gff", + "md5_checksum": "a3cfb0010f431f00e1d36c7d1ba301e0", + "id": "nmdc:a3cfb0010f431f00e1d36c7d1ba301e0", + "file_size_bytes": 12524215 + }, + { + "name": "Gp0119853_KO_EC GFF file", + "description": "KO_EC GFF file for Gp0119853", + "url": "https://data.microbiomedata.org/data/nmdc:mga0tyn058/annotation/nmdc_mga0tyn058_ko_ec.gff", + "md5_checksum": "5c1c9494c0265edc0063312177f8103e", + "id": "nmdc:5c1c9494c0265edc0063312177f8103e", + "file_size_bytes": 6837863 + }, + { + "name": "gold:Gp0452679_metabat2 bin checkm quality assessment result", + "description": "metabat2 bin checkm quality assessment result for gold:Gp0452679", + "data_object_type": "CheckM Statistics", + "url": "https://data.microbiomedata.org/data/nmdc:mga0fsjy84/MAGs/nmdc_mga0fsjy84_checkm_qa.out", + "md5_checksum": "d41d8cd98f00b204e9800998ecf8427e", + "id": "nmdc:d41d8cd98f00b204e9800998ecf8427e", + "file_size_bytes": 0 + }, + { + "name": "Gp0119853_tooShort (< 3kb) filtered contigs fasta file by metaBat2", + "description": "tooShort (< 3kb) filtered contigs fasta file by metaBat2 for Gp0119853", + "url": "https://data.microbiomedata.org/data/nmdc:mga0tyn058/MAGs/nmdc_mga0tyn058_bins.tooShort.fa", + "md5_checksum": "2654935e5a91bc06434a4a647975d054", + "id": "nmdc:2654935e5a91bc06434a4a647975d054", + "file_size_bytes": 5900051 + }, + { + "name": "Gp0119853_unbinned fasta file from metabat2", + "description": "unbinned fasta file from metabat2 for Gp0119853", + "url": "https://data.microbiomedata.org/data/nmdc:mga0tyn058/MAGs/nmdc_mga0tyn058_bins.unbinned.fa", + "md5_checksum": "33ec8d70bf62eb2bbe7b32f5e15b2035", + "id": "nmdc:33ec8d70bf62eb2bbe7b32f5e15b2035", + "file_size_bytes": 11135287 + }, + { + "name": "Gp0119853_metabat2 bin checkm quality assessment result", + "description": "metabat2 bin checkm quality assessment result for Gp0119853", + "data_object_type": "CheckM Statistics", + "url": "https://data.microbiomedata.org/data/nmdc:mga0tyn058/MAGs/nmdc_mga0tyn058_checkm_qa.out", + "md5_checksum": "bc04046ca73e9c419e2d6300b1889beb", + "id": "nmdc:bc04046ca73e9c419e2d6300b1889beb", + "file_size_bytes": 2592 + }, + { + "name": "Gp0119853_high-quality and medium-quality bins", + "description": "high-quality and medium-quality bins for Gp0119853", + "data_object_type": "Metagenome Bins", + "url": "https://data.microbiomedata.org/data/nmdc:mga0tyn058/MAGs/nmdc_mga0tyn058_hqmq_bin.zip", + "md5_checksum": "47b20ddea56b55709e6f8d9e5fa99f4d", + "id": "nmdc:47b20ddea56b55709e6f8d9e5fa99f4d", + "file_size_bytes": 4198398 + }, + { + "name": "Gp0119853_metabat2 bins", + "description": "metabat2 bins for Gp0119853", + "url": "https://data.microbiomedata.org/data/nmdc:mga0tyn058/MAGs/nmdc_mga0tyn058_metabat_bin.zip", + "md5_checksum": "8fcdb872f1a00c526d21c90eefe2fe69", + "id": "nmdc:8fcdb872f1a00c526d21c90eefe2fe69", + "file_size_bytes": 995744 + }, + { + "_id": { + "$oid": "649b003d1ae706d7b5b14dd8" + }, + "description": "Metagenome Contig Coverage Stats for gold:Gp0119853", + "url": "https://data.microbiomedata.org/data/1777_95822/assembly/mapping_stats.txt", + "file_size_bytes": 1341729, + "type": "nmdc:DataObject", + "id": "nmdc:85734ef192cd678ecc57025c65d1f74c", + "name": "mapping_stats.txt", + "data_object_type": "Assembly Coverage Stats" + }, + { + "_id": { + "$oid": "649b003d1ae706d7b5b14dd9" + }, + "description": "Assembled contigs fasta for gold:Gp0119853", + "url": "https://data.microbiomedata.org/data/1777_95822/assembly/assembly_contigs.fna", + "file_size_bytes": 34050154, + "type": "nmdc:DataObject", + "id": "nmdc:50175f6f32192a797c58d2ef636efa37", + "name": "assembly_contigs.fna", + "data_object_type": "Assembly Contigs" + }, + { + "_id": { + "$oid": "649b003d1ae706d7b5b14dda" + }, + "description": "Assembled scaffold fasta for gold:Gp0119853", + "url": "https://data.microbiomedata.org/data/1777_95822/assembly/assembly_scaffolds.fna", + "file_size_bytes": 34002065, + "type": "nmdc:DataObject", + "id": "nmdc:96586f090aaad4a0740a3412ab6c66b4", + "name": "assembly_scaffolds.fna", + "data_object_type": "Assembly Scaffolds" + }, + { + "_id": { + "$oid": "649b003d1ae706d7b5b14ddb" + }, + "description": "Assembled AGP file for gold:Gp0119853", + "url": "https://data.microbiomedata.org/data/1777_95822/assembly/assembly.agp", + "file_size_bytes": 1104208, + "type": "nmdc:DataObject", + "id": "nmdc:24891389c994f44e3a5a8016adbe7368", + "name": "assembly.agp", + "data_object_type": "Assembly AGP" + }, + { + "_id": { + "$oid": "649b003d1ae706d7b5b14ddd" + }, + "description": "Metagenome Alignment BAM file for gold:Gp0119853", + "url": "https://data.microbiomedata.org/data/1777_95822/assembly/pairedMapped_sorted.bam", + "file_size_bytes": 6294810786, + "type": "nmdc:DataObject", + "id": "nmdc:17c418191263d4244eaf7e3d78256b06", + "name": "pairedMapped_sorted.bam", + "data_object_type": "Assembly Coverage BAM" + }, + { + "_id": { + "$oid": "649b003d1ae706d7b5b15a34" + }, + "id": "nmdc:6701f4477f2c3c18817e0b6d63f2ed67", + "name": "1777_95822.json", + "description": "Gold:Gp0119853 ReadbasedAnalysis result JSON file", + "url": "https://data.microbiomedata.org/1777_95822/ReadbasedAnalysis/1777_95822.json", + "file_size_bytes": 3385, + "type": "nmdc:DataObject" + }, + { + "_id": { + "$oid": "649b003d1ae706d7b5b15a3f" + }, + "id": "nmdc:40d3ddc30edb12569ada4c5289714394", + "name": "1777_95822.krona.html", + "description": "Gold:Gp0119853 KRONA plot HTML file", + "url": "https://data.microbiomedata.org/1777_95822/ReadbasedAnalysis/centrifuge/1777_95822.krona.html", + "file_size_bytes": 3385, + "data_object_type": "Centrifuge Krona Plot", + "type": "nmdc:DataObject" + }, + { + "_id": { + "$oid": "649b003f1ae706d7b5b16325" + }, + "id": "nmdc:ea08554d8419ffc7966b7b821b235510", + "name": "bins.tooShort.fa", + "description": "tooShort (< 3kb) filtered contigs fasta file by metaBat2 for gold:Gp0119853", + "file_size_bytes": 5750443, + "url": "https://data.microbiomedata.org/data/1777_95822/img_MAGs/bins.tooShort.fa", + "type": "nmdc:DataObject" + }, + { + "_id": { + "$oid": "649b003f1ae706d7b5b16326" + }, + "id": "nmdc:918bb58a879321eccb1da420db50004c", + "name": "bins.unbinned.fa", + "description": "unbinned fasta file from metabat2 for gold:Gp0119853", + "file_size_bytes": 21720355, + "url": "https://data.microbiomedata.org/data/1777_95822/img_MAGs/bins.unbinned.fa", + "type": "nmdc:DataObject" + }, + { + "_id": { + "$oid": "649b003f1ae706d7b5b16327" + }, + "id": "nmdc:d2d359d0be3f3deae9f55ade8a30fc9d", + "name": "gtdbtk.ar122.summary.tsv", + "description": "gtdbtk archaea assignment result summary table for gold:Gp0119853", + "file_size_bytes": 1489, + "url": "https://data.microbiomedata.org/data/1777_95822/img_MAGs/gtdbtk_output/classify/gtdbtk.ar122.summary.tsv", + "type": "nmdc:DataObject" + }, + { + "_id": { + "$oid": "649b003f1ae706d7b5b16329" + }, + "id": "nmdc:afdbae1d37055040023a69ac0c7274a0", + "name": "gold:Gp0119853.bins.7.fa", + "description": "metabat2 binned contig file for gold:Gp0119853", + "file_size_bytes": 918519, + "url": "https://data.microbiomedata.org/data/1777_95822/img_MAGs/metabat-bins/bins.7.fa", + "type": "nmdc:DataObject", + "data_object_type": "Metagenome Bins" + }, + { + "_id": { + "$oid": "649b003f1ae706d7b5b1632a" + }, + "id": "nmdc:7a2f02b693904ec3b9894e005c67bd6f", + "name": "checkm_qa.out", + "description": "metabat2 bin checkm quality assessment result for gold:Gp0119853", + "file_size_bytes": 3220, + "url": "https://data.microbiomedata.org/data/1777_95822/img_MAGs/checkm_qa.out", + "type": "nmdc:DataObject", + "data_object_type": "CheckM Statistics" + }, + { + "_id": { + "$oid": "649b003f1ae706d7b5b1632c" + }, + "id": "nmdc:4b1a2b79040dbf5567fbd9a0cc3394dc", + "name": "gold:Gp0119853.bins.3.fa", + "description": "metabat2 binned contig file for gold:Gp0119853", + "file_size_bytes": 266568, + "url": "https://data.microbiomedata.org/data/1777_95822/img_MAGs/metabat-bins/bins.3.fa", + "type": "nmdc:DataObject", + "data_object_type": "Metagenome Bins" + }, + { + "_id": { + "$oid": "649b003f1ae706d7b5b1632d" + }, + "id": "nmdc:b1e6fea2a9cb5d3bdd1764df20ceff88", + "name": "gold:Gp0119853.bins.2.fa", + "description": "metabat2 binned contig file for gold:Gp0119853", + "file_size_bytes": 264100, + "url": "https://data.microbiomedata.org/data/1777_95822/img_MAGs/metabat-bins/bins.2.fa", + "type": "nmdc:DataObject", + "data_object_type": "Metagenome Bins" + }, + { + "_id": { + "$oid": "649b003f1ae706d7b5b1632e" + }, + "id": "nmdc:19576255302c4888d9dc91fc099cbafb", + "name": "gold:Gp0119853.bins.11.fa", + "description": "metabat2 binned contig file for gold:Gp0119853", + "file_size_bytes": 268016, + "url": "https://data.microbiomedata.org/data/1777_95822/img_MAGs/metabat-bins/bins.11.fa", + "type": "nmdc:DataObject", + "data_object_type": "Metagenome Bins" + }, + { + "_id": { + "$oid": "649b003f1ae706d7b5b1632f" + }, + "id": "nmdc:397f133628e30104bafaf6319f722d67", + "name": "gold:Gp0119853.bins.4.fa", + "description": "metabat2 binned contig file for gold:Gp0119853", + "file_size_bytes": 946396, + "url": "https://data.microbiomedata.org/data/1777_95822/img_MAGs/metabat-bins/bins.4.fa", + "type": "nmdc:DataObject", + "data_object_type": "Metagenome Bins" + }, + { + "_id": { + "$oid": "649b003f1ae706d7b5b16330" + }, + "id": "nmdc:9a79783ff6b0dc6407087b6eec6c8c64", + "name": "gold:Gp0119853.bins.10.fa", + "description": "metabat2 binned contig file for gold:Gp0119853", + "file_size_bytes": 319984, + "url": "https://data.microbiomedata.org/data/1777_95822/img_MAGs/metabat-bins/bins.10.fa", + "type": "nmdc:DataObject", + "data_object_type": "Metagenome Bins" + }, + { + "_id": { + "$oid": "649b003f1ae706d7b5b16331" + }, + "id": "nmdc:fefebeeb6f54aa2c30d5837d7cd9acdb", + "name": "gold:Gp0119853.bins.12.fa", + "description": "metabat2 binned contig file for gold:Gp0119853", + "file_size_bytes": 245921, + "url": "https://data.microbiomedata.org/data/1777_95822/img_MAGs/metabat-bins/bins.12.fa", + "type": "nmdc:DataObject", + "data_object_type": "Metagenome Bins" + }, + { + "_id": { + "$oid": "649b003f1ae706d7b5b16332" + }, + "id": "nmdc:e32d50c4a57435289820c57e4ab23ee9", + "name": "gold:Gp0119853.bins.13.fa", + "description": "metabat2 binned contig file for gold:Gp0119853", + "file_size_bytes": 235589, + "url": "https://data.microbiomedata.org/data/1777_95822/img_MAGs/metabat-bins/bins.13.fa", + "type": "nmdc:DataObject", + "data_object_type": "Metagenome Bins" + }, + { + "_id": { + "$oid": "649b003f1ae706d7b5b16333" + }, + "id": "nmdc:5d061c5380de6b52f42d84a3e695a0fe", + "name": "gold:Gp0119853.bins.9.fa", + "description": "metabat2 binned contig file for gold:Gp0119853", + "file_size_bytes": 328311, + "url": "https://data.microbiomedata.org/data/1777_95822/img_MAGs/metabat-bins/bins.9.fa", + "type": "nmdc:DataObject", + "data_object_type": "Metagenome Bins" + }, + { + "_id": { + "$oid": "649b003f1ae706d7b5b16334" + }, + "id": "nmdc:79e5312722f9720f67bbf3ff57bd3c83", + "name": "gold:Gp0119853.bins.16.fa", + "description": "metabat2 binned contig file for gold:Gp0119853", + "file_size_bytes": 246606, + "url": "https://data.microbiomedata.org/data/1777_95822/img_MAGs/metabat-bins/bins.16.fa", + "type": "nmdc:DataObject", + "data_object_type": "Metagenome Bins" + }, + { + "_id": { + "$oid": "649b003f1ae706d7b5b16335" + }, + "id": "nmdc:1a7091bf77408f9790225ca1b3dfa3fd", + "name": "gold:Gp0119853.bins.8.fa", + "description": "metabat2 binned contig file for gold:Gp0119853", + "file_size_bytes": 394894, + "url": "https://data.microbiomedata.org/data/1777_95822/img_MAGs/metabat-bins/bins.8.fa", + "type": "nmdc:DataObject", + "data_object_type": "Metagenome Bins" + }, + { + "_id": { + "$oid": "649b003f1ae706d7b5b16336" + }, + "id": "nmdc:1b4f5e8a8a4b97766dbf186e49ca804f", + "name": "gold:Gp0119853.bins.15.fa", + "description": "metabat2 binned contig file for gold:Gp0119853", + "file_size_bytes": 232211, + "url": "https://data.microbiomedata.org/data/1777_95822/img_MAGs/metabat-bins/bins.15.fa", + "type": "nmdc:DataObject", + "data_object_type": "Metagenome Bins" + }, + { + "_id": { + "$oid": "649b003f1ae706d7b5b16337" + }, + "id": "nmdc:371134f7bcfe47e3db35dba37ad8aeea", + "name": "gold:Gp0119853.bins.6.fa", + "description": "metabat2 binned contig file for gold:Gp0119853", + "file_size_bytes": 433850, + "url": "https://data.microbiomedata.org/data/1777_95822/img_MAGs/metabat-bins/bins.6.fa", + "type": "nmdc:DataObject", + "data_object_type": "Metagenome Bins" + }, + { + "_id": { + "$oid": "649b003f1ae706d7b5b16338" + }, + "id": "nmdc:43f6ac397dffcea7203de7715cea43e2", + "name": "gold:Gp0119853.bins.1.fa", + "description": "metabat2 binned contig file for gold:Gp0119853", + "file_size_bytes": 648044, + "url": "https://data.microbiomedata.org/data/1777_95822/img_MAGs/metabat-bins/bins.1.fa", + "type": "nmdc:DataObject", + "data_object_type": "Metagenome Bins" + }, + { + "_id": { + "$oid": "649b003f1ae706d7b5b16339" + }, + "id": "nmdc:d8e326e62d2b4399f5379e52c3c1cb96", + "name": "gold:Gp0119853.bins.14.fa", + "description": "metabat2 binned contig file for gold:Gp0119853", + "file_size_bytes": 283222, + "url": "https://data.microbiomedata.org/data/1777_95822/img_MAGs/metabat-bins/bins.14.fa", + "type": "nmdc:DataObject", + "data_object_type": "Metagenome Bins" + }, + { + "_id": { + "$oid": "649b003f1ae706d7b5b1633d" + }, + "id": "nmdc:48eb5d102a37e631df3bc85b32288da0", + "name": "gold:Gp0119853.bins.5.fa", + "description": "metabat2 binned contig file for gold:Gp0119853", + "file_size_bytes": 419316, + "url": "https://data.microbiomedata.org/data/1777_95822/img_MAGs/metabat-bins/bins.5.fa", + "type": "nmdc:DataObject", + "data_object_type": "Metagenome Bins" + }, + { + "_id": { + "$oid": "649b003f1ae706d7b5b16c56" + }, + "description": "EC TSV File for gold:Gp0119853", + "url": "https://data.microbiomedata.org/1777_95822/img_annotation/Ga0482162_ec.tsv", + "md5_checksum": "2bd72614bf1a394de064eb6148074d22", + "file_size_bytes": 3385, + "id": "nmdc:2bd72614bf1a394de064eb6148074d22", + "name": "gold:Gp0119853_EC TSV File", + "data_object_type": "Annotation Enzyme Commission", + "type": "nmdc:DataObject" + }, + { + "_id": { + "$oid": "649b003f1ae706d7b5b16c5c" + }, + "description": "Structural annotation GFF file for gold:Gp0119853", + "url": "https://data.microbiomedata.org/1777_95822/img_annotation/Ga0482162_structural_annotation.gff", + "md5_checksum": "14e390786d1f1eb60835aca96081ab23", + "file_size_bytes": 3385, + "id": "nmdc:14e390786d1f1eb60835aca96081ab23", + "name": "gold:Gp0119853_Structural annotation GFF file", + "data_object_type": "Structural Annotation GFF", + "type": "nmdc:DataObject" + }, + { + "_id": { + "$oid": "649b003f1ae706d7b5b16c61" + }, + "description": "KO TSV File for gold:Gp0119853", + "url": "https://data.microbiomedata.org/1777_95822/img_annotation/Ga0482162_ko.tsv", + "md5_checksum": "af374a8de732aa3c85eba74b5d29fb66", + "file_size_bytes": 3385, + "id": "nmdc:af374a8de732aa3c85eba74b5d29fb66", + "name": "gold:Gp0119853_KO TSV File", + "data_object_type": "Annotation KEGG Orthology", + "type": "nmdc:DataObject" + }, + { + "_id": { + "$oid": "649b003f1ae706d7b5b16c63" + }, + "description": "Functional annotation GFF file for gold:Gp0119853", + "url": "https://data.microbiomedata.org/1777_95822/img_annotation/Ga0482162_functional_annotation.gff", + "md5_checksum": "db7baf32a8a49b586b8ff70b67487863", + "file_size_bytes": 3385, + "id": "nmdc:db7baf32a8a49b586b8ff70b67487863", + "name": "gold:Gp0119853_Functional annotation GFF file", + "data_object_type": "Functional Annotation GFF", + "type": "nmdc:DataObject" + }, + { + "_id": { + "$oid": "649b003f1ae706d7b5b16c70" + }, + "description": "Protein FAA for gold:Gp0119853", + "url": "https://data.microbiomedata.org/1777_95822/img_annotation/Ga0482162_proteins.faa", + "md5_checksum": "c0c580b59189351b4f4919701e4db0f2", + "file_size_bytes": 3385, + "id": "nmdc:c0c580b59189351b4f4919701e4db0f2", + "name": "gold:Gp0119853_Protein FAA", + "data_object_type": "Annotation Amino Acid FASTA", + "type": "nmdc:DataObject" + } + ], + "mags_activity_set": [ + { + "_id": { + "$oid": "649b0052ec087f6bbab346fb" + }, + "has_input": [ + "nmdc:05630cf0e878439dd116072fc3c86d36", + "nmdc:f65d9327e9d5bfe68dee46be951f10d4", + "nmdc:9df7842de9bd10b7bfffdf122a81d20f" + ], + "too_short_contig_num": 11871, + "part_of": [ + "nmdc:mga0tyn058" + ], + "binned_contig_num": 1112, + "git_url": "https://github.com/microbiomedata/metaG/releases/tag/0.1", + "has_output": [ + "nmdc:d41d8cd98f00b204e9800998ecf8427e", + "nmdc:2654935e5a91bc06434a4a647975d054", + "nmdc:33ec8d70bf62eb2bbe7b32f5e15b2035", + "nmdc:bc04046ca73e9c419e2d6300b1889beb", + "nmdc:47b20ddea56b55709e6f8d9e5fa99f4d", + "nmdc:8fcdb872f1a00c526d21c90eefe2fe69" + ], + "was_informed_by": "gold:Gp0119853", + "input_contig_num": 17408, + "id": "nmdc:71bbad1a58cd51e6190b66f925cfb8f5", + "execution_resource": "NERSC-Cori", + "name": "MAGs Analysis Activity for nmdc:mga0tyn058", + "mags_list": [ + { + "number_of_contig": 87, + "completeness": 39.47, + "bin_name": "bins.1", + "gene_count": 441, + "bin_quality": "LQ", + "gtdbtk_species": "", + "gtdbtk_order": "", + "num_16s": 0, + "gtdbtk_family": "", + "gtdbtk_domain": "", + "contamination": 0.0, + "gtdbtk_class": "", + "gtdbtk_phylum": "", + "num_5s": 0, + "num_23s": 0, + "gtdbtk_genus": "", + "num_t_rna": 14 + }, + { + "number_of_contig": 130, + "completeness": 0.0, + "bin_name": "bins.10", + "gene_count": 1170, + "bin_quality": "LQ", + "gtdbtk_species": "", + "gtdbtk_order": "", + "num_16s": 0, + "gtdbtk_family": "", + "gtdbtk_domain": "", + "contamination": 0.0, + "gtdbtk_class": "", + "gtdbtk_phylum": "", + "num_5s": 0, + "num_23s": 0, + "gtdbtk_genus": "", + "num_t_rna": 5 + }, + { + "number_of_contig": 85, + "completeness": 89.6, + "bin_name": "bins.11", + "gene_count": 1949, + "bin_quality": "MQ", + "gtdbtk_species": "Thermococcus_A sp000430485", + "gtdbtk_order": "Thermococcales", + "num_16s": 0, + "gtdbtk_family": "Thermococcaceae", + "gtdbtk_domain": "Archaea", + "contamination": 0.0, + "gtdbtk_class": "Thermococci", + "gtdbtk_phylum": "Euryarchaeota", + "num_5s": 2, + "num_23s": 0, + "gtdbtk_genus": "Thermococcus_A", + "num_t_rna": 39 + }, + { + "number_of_contig": 18, + "completeness": 0.0, + "bin_name": "bins.12", + "gene_count": 224, + "bin_quality": "LQ", + "gtdbtk_species": "", + "gtdbtk_order": "", + "num_16s": 0, + "gtdbtk_family": "", + "gtdbtk_domain": "", + "contamination": 0.0, + "gtdbtk_class": "", + "gtdbtk_phylum": "", + "num_5s": 0, + "num_23s": 0, + "gtdbtk_genus": "", + "num_t_rna": 1 + }, + { + "number_of_contig": 96, + "completeness": 72.41, + "bin_name": "bins.2", + "gene_count": 2737, + "bin_quality": "MQ", + "gtdbtk_species": "Halanaerobium sp003070825", + "gtdbtk_order": "Halanaerobiales", + "num_16s": 0, + "gtdbtk_family": "Halanaerobiaceae", + "gtdbtk_domain": "Bacteria", + "contamination": 3.45, + "gtdbtk_class": "Halanaerobiia", + "gtdbtk_phylum": "Firmicutes_F", + "num_5s": 4, + "num_23s": 0, + "gtdbtk_genus": "Halanaerobium", + "num_t_rna": 42 + }, + { + "number_of_contig": 96, + "completeness": 71.73, + "bin_name": "bins.3", + "gene_count": 1154, + "bin_quality": "MQ", + "gtdbtk_species": "Methanohalophilus euhalobius", + "gtdbtk_order": "Methanosarcinales", + "num_16s": 0, + "gtdbtk_family": "Methanosarcinaceae", + "gtdbtk_domain": "Archaea", + "contamination": 0.0, + "gtdbtk_class": "Methanosarcinia", + "gtdbtk_phylum": "Halobacterota", + "num_5s": 0, + "num_23s": 0, + "gtdbtk_genus": "Methanohalophilus", + "num_t_rna": 24 + }, + { + "number_of_contig": 75, + "completeness": 96.5, + "bin_name": "bins.4", + "gene_count": 2335, + "bin_quality": "HQ", + "gtdbtk_species": "Flexistipes sp002338145", + "gtdbtk_order": "Deferribacterales", + "num_16s": 2, + "gtdbtk_family": "Flexistipitaceae", + "gtdbtk_domain": "Bacteria", + "contamination": 3.45, + "gtdbtk_class": "Deferribacteres", + "gtdbtk_phylum": "Deferribacterota", + "num_5s": 3, + "num_23s": 1, + "gtdbtk_genus": "Flexistipes", + "num_t_rna": 45 + }, + { + "number_of_contig": 38, + "completeness": 8.33, + "bin_name": "bins.5", + "gene_count": 358, + "bin_quality": "LQ", + "gtdbtk_species": "", + "gtdbtk_order": "", + "num_16s": 0, + "gtdbtk_family": "", + "gtdbtk_domain": "", + "contamination": 0.0, + "gtdbtk_class": "", + "gtdbtk_phylum": "", + "num_5s": 0, + "num_23s": 0, + "gtdbtk_genus": "", + "num_t_rna": 4 + }, + { + "number_of_contig": 63, + "completeness": 93.1, + "bin_name": "bins.6", + "gene_count": 2103, + "bin_quality": "MQ", + "gtdbtk_species": "", + "gtdbtk_order": "Deferribacterales", + "num_16s": 1, + "gtdbtk_family": "Flexistipitaceae", + "gtdbtk_domain": "Bacteria", + "contamination": 0.86, + "gtdbtk_class": "Deferribacteres", + "gtdbtk_phylum": "Deferribacterota", + "num_5s": 1, + "num_23s": 0, + "gtdbtk_genus": "Flexistipes", + "num_t_rna": 33 + }, + { + "number_of_contig": 11, + "completeness": 100.0, + "bin_name": "bins.7", + "gene_count": 2097, + "bin_quality": "HQ", + "gtdbtk_species": "Thermotoga petrophila", + "gtdbtk_order": "Thermotogales", + "num_16s": 1, + "gtdbtk_family": "Thermotogaceae", + "gtdbtk_domain": "Bacteria", + "contamination": 1.79, + "gtdbtk_class": "Thermotogae", + "gtdbtk_phylum": "Thermotogota", + "num_5s": 1, + "num_23s": 1, + "gtdbtk_genus": "Thermotoga", + "num_t_rna": 47 + }, + { + "number_of_contig": 187, + "completeness": 88.26, + "bin_name": "bins.8", + "gene_count": 2112, + "bin_quality": "MQ", + "gtdbtk_species": "Halanaerobium congolense", + "gtdbtk_order": "Halanaerobiales", + "num_16s": 0, + "gtdbtk_family": "Halanaerobiaceae", + "gtdbtk_domain": "Bacteria", + "contamination": 0.65, + "gtdbtk_class": "Halanaerobiia", + "gtdbtk_phylum": "Firmicutes_F", + "num_5s": 3, + "num_23s": 0, + "gtdbtk_genus": "Halanaerobium", + "num_t_rna": 16 + }, + { + "number_of_contig": 226, + "completeness": 46.28, + "bin_name": "bins.9", + "gene_count": 1495, + "bin_quality": "LQ", + "gtdbtk_species": "", + "gtdbtk_order": "", + "num_16s": 3, + "gtdbtk_family": "", + "gtdbtk_domain": "", + "contamination": 10.53, + "gtdbtk_class": "", + "gtdbtk_phylum": "", + "num_5s": 0, + "num_23s": 0, + "gtdbtk_genus": "", + "num_t_rna": 31 + } + ], + "unbinned_contig_num": 4425, + "started_at_time": "2021-10-11T02:23:27Z", + "type": "nmdc:MAGsAnalysisActivity", + "ended_at_time": "2021-10-11T05:24:31+00:00" + } + ], + "metagenome_annotation_activity_set": [ + { + "_id": { + "$oid": "649b005bbf2caae0415ef9a5" + }, + "has_input": [ + "nmdc:05630cf0e878439dd116072fc3c86d36" + ], + "part_of": [ + "nmdc:mga0tyn058" + ], + "git_url": "https://github.com/microbiomedata/metaG/releases/tag/0.1", + "has_output": [ + "nmdc:29335cf722c4b2d9ddc8d655b758764a", + "nmdc:2e9580758c3e6f90323ba29d7789881f", + "nmdc:9df7842de9bd10b7bfffdf122a81d20f", + "nmdc:ec0380382be5ea506ebc6c98865d2ee2", + "nmdc:6b2539c4784f5a71c8bf64c65c396f46", + "nmdc:07108b90dca7403f0cda3c682a39b80c", + "nmdc:18e42b56c81bee7cce4539bf4a81e6cc", + "nmdc:0c31a19ae762380bb18635e545703f7c", + "nmdc:2c9fa8ba31dd1b3eb81a7a922eedad42", + "nmdc:47b5268d8202d3818c6ea0578c64b948", + "nmdc:a3cfb0010f431f00e1d36c7d1ba301e0", + "nmdc:5c1c9494c0265edc0063312177f8103e" + ], + "was_informed_by": "gold:Gp0119853", + "id": "nmdc:71bbad1a58cd51e6190b66f925cfb8f5", + "execution_resource": "NERSC-Cori", + "name": "Annotation Activity for nmdc:mga0tyn058", + "started_at_time": "2021-10-11T02:23:27Z", + "type": "nmdc:MetagenomeAnnotationActivity", + "ended_at_time": "2021-10-11T05:24:31+00:00" + } + ], + "metagenome_assembly_set": [ + { + "_id": { + "$oid": "649b005f2ca5ee4adb139f86" + }, + "has_input": [ + "nmdc:b481648f8154d2ca532a8a64fdb8a7ee" + ], + "part_of": [ + "nmdc:mga0tyn058" + ], + "ctg_logsum": 375488, + "scaf_logsum": 378290, + "gap_pct": 0.02258, + "git_url": "https://github.com/microbiomedata/metaG/releases/tag/0.1", + "has_output": [ + "nmdc:05630cf0e878439dd116072fc3c86d36", + "nmdc:7255fa2608b4b3eaf439419ecbf533da", + "nmdc:e478fb39f3773cc9c6710f26134065c6", + "nmdc:9bc69e7441ad0c4c1cffd74ccb1e6bac", + "nmdc:f65d9327e9d5bfe68dee46be951f10d4" + ], + "asm_score": 23.401, + "was_informed_by": "gold:Gp0119853", + "ctg_powsum": 55953, + "scaf_max": 570983, + "id": "nmdc:71bbad1a58cd51e6190b66f925cfb8f5", + "scaf_powsum": 56821, + "execution_resource": "NERSC-Cori", + "contigs": 17417, + "name": "Assembly Activity for nmdc:mga0tyn058", + "ctg_max": 498917, + "gc_std": 0.06016, + "contig_bp": 33068444, + "gc_avg": 0.37211, + "started_at_time": "2021-10-11T02:23:27Z", + "scaf_bp": 33075914, + "type": "nmdc:MetagenomeAssembly", + "scaffolds": 17165, + "ended_at_time": "2021-10-11T05:24:31+00:00", + "ctg_l50": 7045, + "ctg_l90": 607, + "ctg_n50": 753, + "ctg_n90": 8308, + "scaf_l50": 7429, + "scaf_l90": 614, + "scaf_n50": 714, + "scaf_n90": 8096, + "scaf_l_gt50k": 5875019, + "scaf_n_gt50k": 60, + "scaf_pct_gt50k": 17.762226 + } + ], + "omics_processing_set": [ + { + "_id": { + "$oid": "649b009773e8249959349b67" + }, + "id": "nmdc:omprc-11-mspwf315", + "name": "Deep subsurface shale carbon reservoir microbial communities from Ohio, USA - Utica-2 Time Series 2014_10_18", + "description": "Microbial controls on biogeochemical cycling in deep subsurface shale carbon reservoirs", + "has_input": [ + "nmdc:bsm-11-49xaw128" + ], + "has_output": [ + "jgi:560df36f0d878540fd6fe1c7" + ], + "part_of": [ + "nmdc:sty-11-8fb6t785" + ], + "add_date": "2015-08-15", + "mod_date": "2020-04-05", + "ncbi_project_name": "Deep subsurface shale carbon reservoir microbial communities from Ohio, USA - Utica-2 Time Series 2014_10_18", + "omics_type": { + "has_raw_value": "Metagenome" + }, + "principal_investigator": { + "has_raw_value": "Kelly Wrighton" + }, + "processing_institution": "JGI", + "type": "nmdc:OmicsProcessing", + "gold_sequencing_project_identifiers": [ + "gold:Gp0119853" + ] + } + ], + "read_qc_analysis_activity_set": [ + { + "_id": { + "$oid": "649b009d6bdd4fd20273c851" + }, + "has_input": [ + "nmdc:b160631b887893c77168e1ec3b8a8ace" + ], + "part_of": [ + "nmdc:mga0tyn058" + ], + "git_url": "https://github.com/microbiomedata/metaG/releases/tag/0.1", + "has_output": [ + "nmdc:b481648f8154d2ca532a8a64fdb8a7ee", + "nmdc:096ddb456d9d09f68a93586823c30c9d" + ], + "was_informed_by": "gold:Gp0119853", + "input_read_count": 82676692, + "output_read_bases": 11993576169, + "id": "nmdc:71bbad1a58cd51e6190b66f925cfb8f5", + "execution_resource": "NERSC-Cori", + "input_read_bases": 12484180492, + "name": "Read QC Activity for nmdc:mga0tyn058", + "output_read_count": 82106300, + "started_at_time": "2021-10-11T02:23:27Z", + "type": "nmdc:ReadQCAnalysisActivity", + "ended_at_time": "2021-10-11T05:24:31+00:00" + } + ], + "read_based_taxonomy_analysis_activity_set": [ + { + "_id": { + "$oid": "649b009bff710ae353f8cf2f" + }, + "has_input": [ + "nmdc:b481648f8154d2ca532a8a64fdb8a7ee" + ], + "git_url": "https://github.com/microbiomedata/metaG/releases/tag/0.1", + "has_output": [ + "nmdc:cfa6be80cfa9eac012c009e0e0846a16", + "nmdc:af1cdac68dc47d6c89dcc43ef25c3484", + "nmdc:b9dedb38da1959178648f2470c558074", + "nmdc:ea048abb879cc24521477f96c635fe55", + "nmdc:579cfdd0130e89aa2b5bc49f5590c608", + "nmdc:45b93374f5d8323f24e19881e19e7bb4", + "nmdc:4fbba103ba85fa9cbe211eef2b6ca1d9", + "nmdc:99cb03af468bcf68164dd73a24966433", + "nmdc:235fc3f88e203a17409c9d5fe809a3ff" + ], + "was_informed_by": "gold:Gp0119853", + "id": "nmdc:71bbad1a58cd51e6190b66f925cfb8f5", + "execution_resource": "NERSC-Cori", + "name": "ReadBased Analysis Activity for nmdc:mga0tyn058", + "started_at_time": "2021-10-11T02:23:27Z", + "type": "nmdc:ReadbasedAnalysis", + "ended_at_time": "2021-10-11T05:24:31+00:00" + } + ] + }, + { + "data_object_set": [ + { + "description": "Raw sequencer read data", + "file_size_bytes": 6747454304, + "type": "nmdc:DataObject", + "id": "jgi:560df5ba0d878540fd6fe203", + "name": "9567.8.137569.AGGCAGA-AGAGTAG.fastq.gz" + }, + { + "name": "Gp0119858_Filtered Reads", + "description": "Filtered Reads for Gp0119858", + "data_object_type": "Filtered Sequencing Reads", + "url": "https://data.microbiomedata.org/data/nmdc:mga0t1k874/qa/nmdc_mga0t1k874_filtered.fastq.gz", + "md5_checksum": "ca8cef098d6e23f2882bde625674bfa1", + "id": "nmdc:ca8cef098d6e23f2882bde625674bfa1", + "file_size_bytes": 3408911196 + }, + { + "name": "Gp0119858_Filtered Stats", + "description": "Filtered Stats for Gp0119858", + "data_object_type": "QC Statistics", + "url": "https://data.microbiomedata.org/data/nmdc:mga0t1k874/qa/nmdc_mga0t1k874_filterStats.txt", + "md5_checksum": "4aa539150c9237638d437aca0039919e", + "id": "nmdc:4aa539150c9237638d437aca0039919e", + "file_size_bytes": 289 + }, + { + "name": "Gp0119858_Gottcha2 TSV report", + "description": "Gottcha2 TSV report for Gp0119858", + "url": "https://data.microbiomedata.org/data/nmdc:mga0t1k874/ReadbasedAnalysis/nmdc_mga0t1k874_gottcha2_report.tsv", + "md5_checksum": "2e6e862447b6813ae9085fd7cd0cc15e", + "id": "nmdc:2e6e862447b6813ae9085fd7cd0cc15e", + "file_size_bytes": 3615 + }, + { + "name": "Gp0119858_Gottcha2 full TSV report", + "description": "Gottcha2 full TSV report for Gp0119858", + "url": "https://data.microbiomedata.org/data/nmdc:mga0t1k874/ReadbasedAnalysis/nmdc_mga0t1k874_gottcha2_report_full.tsv", + "md5_checksum": "87178f480db11c48151e82776d63e0ca", + "id": "nmdc:87178f480db11c48151e82776d63e0ca", + "file_size_bytes": 88179 + }, + { + "name": "Gp0119858_Gottcha2 Krona HTML report", + "description": "Gottcha2 Krona HTML report for Gp0119858", + "data_object_type": "GOTTCHA2 Krona Plot", + "url": "https://data.microbiomedata.org/data/nmdc:mga0t1k874/ReadbasedAnalysis/nmdc_mga0t1k874_gottcha2_krona.html", + "md5_checksum": "f03e13e646e4ff20272bbf9fef27e3e6", + "id": "nmdc:f03e13e646e4ff20272bbf9fef27e3e6", + "file_size_bytes": 238270 + }, + { + "name": "Gp0119858_Centrifuge classification TSV report", + "description": "Centrifuge classification TSV report for Gp0119858", + "data_object_type": "Centrifuge Taxonomic Classification", + "url": "https://data.microbiomedata.org/data/nmdc:mga0t1k874/ReadbasedAnalysis/nmdc_mga0t1k874_centrifuge_classification.tsv", + "md5_checksum": "b6cb3aadb265b0bb6efd8e56de1f0f4d", + "id": "nmdc:b6cb3aadb265b0bb6efd8e56de1f0f4d", + "file_size_bytes": 5059679710 + }, + { + "name": "Gp0119858_Centrifuge TSV report", + "description": "Centrifuge TSV report for Gp0119858", + "data_object_type": "Centrifuge Classification Report", + "url": "https://data.microbiomedata.org/data/nmdc:mga0t1k874/ReadbasedAnalysis/nmdc_mga0t1k874_centrifuge_report.tsv", + "md5_checksum": "cf76e68caa214f514a199d9cdf47bee9", + "id": "nmdc:cf76e68caa214f514a199d9cdf47bee9", + "file_size_bytes": 209653 + }, + { + "name": "Gp0119858_Centrifuge Krona HTML report", + "description": "Centrifuge Krona HTML report for Gp0119858", + "data_object_type": "Centrifuge Krona Plot", + "url": "https://data.microbiomedata.org/data/nmdc:mga0t1k874/ReadbasedAnalysis/nmdc_mga0t1k874_centrifuge_krona.html", + "md5_checksum": "32634814ea35dc9d9cd2b13af86d8dc3", + "id": "nmdc:32634814ea35dc9d9cd2b13af86d8dc3", + "file_size_bytes": 2099570 + }, + { + "name": "Gp0119858_Kraken classification TSV report", + "description": "Kraken classification TSV report for Gp0119858", + "data_object_type": "Kraken2 Taxonomic Classification", + "url": "https://data.microbiomedata.org/data/nmdc:mga0t1k874/ReadbasedAnalysis/nmdc_mga0t1k874_kraken2_classification.tsv", + "md5_checksum": "1fc591039fa9014c83f98613402d3dfd", + "id": "nmdc:1fc591039fa9014c83f98613402d3dfd", + "file_size_bytes": 4383320787 + }, + { + "name": "Gp0119858_Kraken2 TSV report", + "description": "Kraken2 TSV report for Gp0119858", + "data_object_type": "Kraken2 Classification Report", + "url": "https://data.microbiomedata.org/data/nmdc:mga0t1k874/ReadbasedAnalysis/nmdc_mga0t1k874_kraken2_report.tsv", + "md5_checksum": "77738e9cb7e7edc91a71d77e77f6ad78", + "id": "nmdc:77738e9cb7e7edc91a71d77e77f6ad78", + "file_size_bytes": 444059 + }, + { + "name": "Gp0119858_Kraken2 Krona HTML report", + "description": "Kraken2 Krona HTML report for Gp0119858", + "data_object_type": "Kraken2 Krona Plot", + "url": "https://data.microbiomedata.org/data/nmdc:mga0t1k874/ReadbasedAnalysis/nmdc_mga0t1k874_kraken2_krona.html", + "md5_checksum": "9dd14e6b80bc74a94d11855f9b40a494", + "id": "nmdc:9dd14e6b80bc74a94d11855f9b40a494", + "file_size_bytes": 2920569 + }, + { + "name": "Gp0119858_Assembled contigs fasta", + "description": "Assembled contigs fasta for Gp0119858", + "data_object_type": "Assembly Contigs", + "url": "https://data.microbiomedata.org/data/nmdc:mga0t1k874/assembly/nmdc_mga0t1k874_contigs.fna", + "md5_checksum": "2cb85bcf96b582fe1fe4213d7ed6226e", + "id": "nmdc:2cb85bcf96b582fe1fe4213d7ed6226e", + "file_size_bytes": 8187549 + }, + { + "name": "Gp0119858_Assembled scaffold fasta", + "description": "Assembled scaffold fasta for Gp0119858", + "data_object_type": "Assembly Scaffolds", + "url": "https://data.microbiomedata.org/data/nmdc:mga0t1k874/assembly/nmdc_mga0t1k874_scaffolds.fna", + "md5_checksum": "ce00d58196ab832fb3306231639c7eae", + "id": "nmdc:ce00d58196ab832fb3306231639c7eae", + "file_size_bytes": 8173501 + }, + { + "name": "Gp0119858_Metagenome Contig Coverage Stats", + "description": "Metagenome Contig Coverage Stats for Gp0119858", + "url": "https://data.microbiomedata.org/data/nmdc:mga0t1k874/assembly/nmdc_mga0t1k874_covstats.txt", + "md5_checksum": "f79dcfd8e7a3906019954975e1df34a2", + "id": "nmdc:f79dcfd8e7a3906019954975e1df34a2", + "file_size_bytes": 344607 + }, + { + "name": "Gp0119858_Assembled AGP file", + "description": "Assembled AGP file for Gp0119858", + "data_object_type": "Assembly AGP", + "url": "https://data.microbiomedata.org/data/nmdc:mga0t1k874/assembly/nmdc_mga0t1k874_assembly.agp", + "md5_checksum": "0eb5598d0f7c9c4780804b57b3f43957", + "id": "nmdc:0eb5598d0f7c9c4780804b57b3f43957", + "file_size_bytes": 316576 + }, + { + "name": "Gp0119858_Metagenome Alignment BAM file", + "description": "Metagenome Alignment BAM file for Gp0119858", + "data_object_type": "Assembly Coverage BAM", + "url": "https://data.microbiomedata.org/data/nmdc:mga0t1k874/assembly/nmdc_mga0t1k874_pairedMapped_sorted.bam", + "md5_checksum": "ddfc52144c47fea5c94e690576c0fed9", + "id": "nmdc:ddfc52144c47fea5c94e690576c0fed9", + "file_size_bytes": 4545937329 + }, + { + "name": "Gp0119858_Protein FAA", + "description": "Protein FAA for Gp0119858", + "data_object_type": "Annotation Amino Acid FASTA", + "url": "https://data.microbiomedata.org/data/nmdc:mga0t1k874/annotation/nmdc_mga0t1k874_proteins.faa", + "md5_checksum": "f2a2de9ba03ec039dc221fb04b79b807", + "id": "nmdc:f2a2de9ba03ec039dc221fb04b79b807", + "file_size_bytes": 3761860 + }, + { + "name": "Gp0119858_Structural annotation GFF file", + "description": "Structural annotation GFF file for Gp0119858", + "data_object_type": "Structural Annotation GFF", + "url": "https://data.microbiomedata.org/data/nmdc:mga0t1k874/annotation/nmdc_mga0t1k874_structural_annotation.gff", + "md5_checksum": "d42a7f4f7a91533fa91b3167d0363baa", + "id": "nmdc:d42a7f4f7a91533fa91b3167d0363baa", + "file_size_bytes": 2480 + }, + { + "name": "Gp0119858_Functional annotation GFF file", + "description": "Functional annotation GFF file for Gp0119858", + "data_object_type": "Functional Annotation GFF", + "url": "https://data.microbiomedata.org/data/nmdc:mga0t1k874/annotation/nmdc_mga0t1k874_functional_annotation.gff", + "md5_checksum": "106043abf68dc4999936530d75db61c3", + "id": "nmdc:106043abf68dc4999936530d75db61c3", + "file_size_bytes": 3351132 + }, + { + "name": "Gp0119858_KO TSV file", + "description": "KO TSV file for Gp0119858", + "data_object_type": "Annotation KEGG Orthology", + "url": "https://data.microbiomedata.org/data/nmdc:mga0t1k874/annotation/nmdc_mga0t1k874_ko.tsv", + "md5_checksum": "11b981c284ca7a58b8c996442e17e894", + "id": "nmdc:11b981c284ca7a58b8c996442e17e894", + "file_size_bytes": 503435 + }, + { + "name": "Gp0119858_EC TSV file", + "description": "EC TSV file for Gp0119858", + "data_object_type": "Annotation Enzyme Commission", + "url": "https://data.microbiomedata.org/data/nmdc:mga0t1k874/annotation/nmdc_mga0t1k874_ec.tsv", + "md5_checksum": "da6c58680a8b6164ec9d85a3edfea49f", + "id": "nmdc:da6c58680a8b6164ec9d85a3edfea49f", + "file_size_bytes": 301864 + }, + { + "name": "Gp0119858_COG GFF file", + "description": "COG GFF file for Gp0119858", + "url": "https://data.microbiomedata.org/data/nmdc:mga0t1k874/annotation/nmdc_mga0t1k874_cog.gff", + "md5_checksum": "5c8f14cc8b95522a8d4039feecf11e36", + "id": "nmdc:5c8f14cc8b95522a8d4039feecf11e36", + "file_size_bytes": 2340211 + }, + { + "name": "Gp0119858_PFAM GFF file", + "description": "PFAM GFF file for Gp0119858", + "url": "https://data.microbiomedata.org/data/nmdc:mga0t1k874/annotation/nmdc_mga0t1k874_pfam.gff", + "md5_checksum": "c080f8fe5dd7e3153b69cbebdc6eb732", + "id": "nmdc:c080f8fe5dd7e3153b69cbebdc6eb732", + "file_size_bytes": 2186570 + }, + { + "name": "Gp0119858_TigrFam GFF file", + "description": "TigrFam GFF file for Gp0119858", + "url": "https://data.microbiomedata.org/data/nmdc:mga0t1k874/annotation/nmdc_mga0t1k874_tigrfam.gff", + "md5_checksum": "1fa103f252b881c9a0034991405a37ed", + "id": "nmdc:1fa103f252b881c9a0034991405a37ed", + "file_size_bytes": 469096 + }, + { + "name": "Gp0119858_SMART GFF file", + "description": "SMART GFF file for Gp0119858", + "url": "https://data.microbiomedata.org/data/nmdc:mga0t1k874/annotation/nmdc_mga0t1k874_smart.gff", + "md5_checksum": "2537d013dc559ae41c95a3da8da05643", + "id": "nmdc:2537d013dc559ae41c95a3da8da05643", + "file_size_bytes": 730973 + }, + { + "name": "Gp0119858_SuperFam GFF file", + "description": "SuperFam GFF file for Gp0119858", + "url": "https://data.microbiomedata.org/data/nmdc:mga0t1k874/annotation/nmdc_mga0t1k874_supfam.gff", + "md5_checksum": "4456bb3f27f48ded81c9f8ef97ccbc1d", + "id": "nmdc:4456bb3f27f48ded81c9f8ef97ccbc1d", + "file_size_bytes": 3242945 + }, + { + "name": "Gp0119858_Cath FunFam GFF file", + "description": "Cath FunFam GFF file for Gp0119858", + "url": "https://data.microbiomedata.org/data/nmdc:mga0t1k874/annotation/nmdc_mga0t1k874_cath_funfam.gff", + "md5_checksum": "06ead20673214901eb135ed769cb0730", + "id": "nmdc:06ead20673214901eb135ed769cb0730", + "file_size_bytes": 2935421 + }, + { + "name": "Gp0119858_KO_EC GFF file", + "description": "KO_EC GFF file for Gp0119858", + "url": "https://data.microbiomedata.org/data/nmdc:mga0t1k874/annotation/nmdc_mga0t1k874_ko_ec.gff", + "md5_checksum": "7c19af05f3893ea5aba35e2870c05098", + "id": "nmdc:7c19af05f3893ea5aba35e2870c05098", + "file_size_bytes": 1745966 + }, + { + "name": "gold:Gp0452679_metabat2 bin checkm quality assessment result", + "description": "metabat2 bin checkm quality assessment result for gold:Gp0452679", + "data_object_type": "CheckM Statistics", + "url": "https://data.microbiomedata.org/data/nmdc:mga0fsjy84/MAGs/nmdc_mga0fsjy84_checkm_qa.out", + "md5_checksum": "d41d8cd98f00b204e9800998ecf8427e", + "id": "nmdc:d41d8cd98f00b204e9800998ecf8427e", + "file_size_bytes": 0 + }, + { + "name": "Gp0119858_tooShort (< 3kb) filtered contigs fasta file by metaBat2", + "description": "tooShort (< 3kb) filtered contigs fasta file by metaBat2 for Gp0119858", + "url": "https://data.microbiomedata.org/data/nmdc:mga0t1k874/MAGs/nmdc_mga0t1k874_bins.tooShort.fa", + "md5_checksum": "d7fd2b2506ab787723b3d2a57469d1fa", + "id": "nmdc:d7fd2b2506ab787723b3d2a57469d1fa", + "file_size_bytes": 1592501 + }, + { + "name": "Gp0119858_unbinned fasta file from metabat2", + "description": "unbinned fasta file from metabat2 for Gp0119858", + "url": "https://data.microbiomedata.org/data/nmdc:mga0t1k874/MAGs/nmdc_mga0t1k874_bins.unbinned.fa", + "md5_checksum": "53c369a0a22245d50789021f80405578", + "id": "nmdc:53c369a0a22245d50789021f80405578", + "file_size_bytes": 3411496 + }, + { + "name": "Gp0119858_metabat2 bin checkm quality assessment result", + "description": "metabat2 bin checkm quality assessment result for Gp0119858", + "data_object_type": "CheckM Statistics", + "url": "https://data.microbiomedata.org/data/nmdc:mga0t1k874/MAGs/nmdc_mga0t1k874_checkm_qa.out", + "md5_checksum": "7e329552b9d48f3d4d29757beea4b888", + "id": "nmdc:7e329552b9d48f3d4d29757beea4b888", + "file_size_bytes": 1106 + }, + { + "name": "Gp0119858_high-quality and medium-quality bins", + "description": "high-quality and medium-quality bins for Gp0119858", + "data_object_type": "Metagenome Bins", + "url": "https://data.microbiomedata.org/data/nmdc:mga0t1k874/MAGs/nmdc_mga0t1k874_hqmq_bin.zip", + "md5_checksum": "b9351b819a40741049ac1eb89cdf1231", + "id": "nmdc:b9351b819a40741049ac1eb89cdf1231", + "file_size_bytes": 782503 + }, + { + "name": "Gp0119858_metabat2 bins", + "description": "metabat2 bins for Gp0119858", + "url": "https://data.microbiomedata.org/data/nmdc:mga0t1k874/MAGs/nmdc_mga0t1k874_metabat_bin.zip", + "md5_checksum": "4be9fbc49d724f5d7e4f755f3424b14d", + "id": "nmdc:4be9fbc49d724f5d7e4f755f3424b14d", + "file_size_bytes": 173295 + }, + { + "_id": { + "$oid": "649b003d1ae706d7b5b14dfa" + }, + "description": "Assembled contigs fasta for gold:Gp0119858", + "url": "https://data.microbiomedata.org/data/1777_95827/assembly/assembly_contigs.fna", + "file_size_bytes": 8166199, + "type": "nmdc:DataObject", + "id": "nmdc:f5e854c22a065f2895f99a0119b1b41a", + "name": "assembly_contigs.fna", + "data_object_type": "Assembly Contigs" + }, + { + "_id": { + "$oid": "649b003d1ae706d7b5b14dfb" + }, + "description": "Metagenome Contig Coverage Stats for gold:Gp0119858", + "url": "https://data.microbiomedata.org/data/1777_95827/assembly/mapping_stats.txt", + "file_size_bytes": 323257, + "type": "nmdc:DataObject", + "id": "nmdc:5dcccb54edf8d7592f220fcc3d52f87b", + "name": "mapping_stats.txt", + "data_object_type": "Assembly Coverage Stats" + }, + { + "_id": { + "$oid": "649b003d1ae706d7b5b14dfd" + }, + "description": "Assembled scaffold fasta for gold:Gp0119858", + "url": "https://data.microbiomedata.org/data/1777_95827/assembly/assembly_scaffolds.fna", + "file_size_bytes": 8153001, + "type": "nmdc:DataObject", + "id": "nmdc:b84f7a35f3718f82659fc9221c543159", + "name": "assembly_scaffolds.fna", + "data_object_type": "Assembly Scaffolds" + }, + { + "_id": { + "$oid": "649b003d1ae706d7b5b14dfe" + }, + "description": "Assembled AGP file for gold:Gp0119858", + "url": "https://data.microbiomedata.org/data/1777_95827/assembly/assembly.agp", + "file_size_bytes": 272176, + "type": "nmdc:DataObject", + "id": "nmdc:0cfcf6ef1b3fc81fd7b72aad3876604e", + "name": "assembly.agp", + "data_object_type": "Assembly AGP" + }, + { + "_id": { + "$oid": "649b003d1ae706d7b5b14e08" + }, + "description": "Metagenome Alignment BAM file for gold:Gp0119858", + "url": "https://data.microbiomedata.org/data/1777_95827/assembly/pairedMapped_sorted.bam", + "file_size_bytes": 4511037289, + "type": "nmdc:DataObject", + "id": "nmdc:c382bbee82f229f20241388a1624ac38", + "name": "pairedMapped_sorted.bam", + "data_object_type": "Assembly Coverage BAM" + }, + { + "_id": { + "$oid": "649b003d1ae706d7b5b15a63" + }, + "id": "nmdc:e27e560a7847cb493035de9f6cf07987", + "name": "1777_95827.krona.html", + "description": "Gold:Gp0119858 KRONA plot HTML file", + "url": "https://data.microbiomedata.org/1777_95827/ReadbasedAnalysis/centrifuge/1777_95827.krona.html", + "file_size_bytes": 3385, + "data_object_type": "Centrifuge Krona Plot", + "type": "nmdc:DataObject" + }, + { + "_id": { + "$oid": "649b003d1ae706d7b5b15a64" + }, + "id": "nmdc:221026713f2d3ae337f4137c3f25c1b2", + "name": "1777_95827.json", + "description": "Gold:Gp0119858 ReadbasedAnalysis result JSON file", + "url": "https://data.microbiomedata.org/1777_95827/ReadbasedAnalysis/1777_95827.json", + "file_size_bytes": 3385, + "type": "nmdc:DataObject" + }, + { + "_id": { + "$oid": "649b003f1ae706d7b5b16378" + }, + "id": "nmdc:daaadba181b54f715fc6aeb14ae5087d", + "name": "bins.tooShort.fa", + "description": "tooShort (< 3kb) filtered contigs fasta file by metaBat2 for gold:Gp0119858", + "file_size_bytes": 1551452, + "url": "https://data.microbiomedata.org/data/1777_95827/img_MAGs/bins.tooShort.fa", + "type": "nmdc:DataObject" + }, + { + "_id": { + "$oid": "649b003f1ae706d7b5b1637a" + }, + "id": "nmdc:5b2f03d8f55942d428b4e6ce4dc4a170", + "name": "checkm_qa.out", + "description": "metabat2 bin checkm quality assessment result for gold:Gp0119858", + "file_size_bytes": 924, + "url": "https://data.microbiomedata.org/data/1777_95827/img_MAGs/checkm_qa.out", + "type": "nmdc:DataObject", + "data_object_type": "CheckM Statistics" + }, + { + "_id": { + "$oid": "649b003f1ae706d7b5b1637c" + }, + "id": "nmdc:86f536a0912277f78d4d0c8d6e85c8e1", + "name": "gold:Gp0119858.bins.2.fa", + "description": "metabat2 binned contig file for gold:Gp0119858", + "file_size_bytes": 203555, + "url": "https://data.microbiomedata.org/data/1777_95827/img_MAGs/metabat-bins/bins.2.fa", + "type": "nmdc:DataObject", + "data_object_type": "Metagenome Bins" + }, + { + "_id": { + "$oid": "649b003f1ae706d7b5b1637e" + }, + "id": "nmdc:56707edfc2bed95f039109d708c0402f", + "name": "gold:Gp0119858.bins.1.fa", + "description": "metabat2 binned contig file for gold:Gp0119858", + "file_size_bytes": 210546, + "url": "https://data.microbiomedata.org/data/1777_95827/img_MAGs/metabat-bins/bins.1.fa", + "type": "nmdc:DataObject", + "data_object_type": "Metagenome Bins" + }, + { + "_id": { + "$oid": "649b003f1ae706d7b5b1637f" + }, + "id": "nmdc:5686e7d3d57ed01ae962560953600fb1", + "name": "bins.unbinned.fa", + "description": "unbinned fasta file from metabat2 for gold:Gp0119858", + "file_size_bytes": 6167751, + "url": "https://data.microbiomedata.org/data/1777_95827/img_MAGs/bins.unbinned.fa", + "type": "nmdc:DataObject" + }, + { + "_id": { + "$oid": "649b003f1ae706d7b5b16c78" + }, + "description": "Functional annotation GFF file for gold:Gp0119858", + "url": "https://data.microbiomedata.org/1777_95827/img_annotation/Ga0482157_functional_annotation.gff", + "md5_checksum": "8e0746f6759e85ee867f85191d83a1e9", + "file_size_bytes": 3385, + "id": "nmdc:8e0746f6759e85ee867f85191d83a1e9", + "name": "gold:Gp0119858_Functional annotation GFF file", + "data_object_type": "Functional Annotation GFF", + "type": "nmdc:DataObject" + }, + { + "_id": { + "$oid": "649b003f1ae706d7b5b16c79" + }, + "description": "KO TSV File for gold:Gp0119858", + "url": "https://data.microbiomedata.org/1777_95827/img_annotation/Ga0482157_ko.tsv", + "md5_checksum": "503667333d0a31fa41da4885bea45c8c", + "file_size_bytes": 3385, + "id": "nmdc:503667333d0a31fa41da4885bea45c8c", + "name": "gold:Gp0119858_KO TSV File", + "data_object_type": "Annotation KEGG Orthology", + "type": "nmdc:DataObject" + }, + { + "_id": { + "$oid": "649b003f1ae706d7b5b16c7a" + }, + "description": "Structural annotation GFF file for gold:Gp0119858", + "url": "https://data.microbiomedata.org/1777_95827/img_annotation/Ga0482157_structural_annotation.gff", + "md5_checksum": "43157d8de38b673ce4a4d70ee3a1198c", + "file_size_bytes": 3385, + "id": "nmdc:43157d8de38b673ce4a4d70ee3a1198c", + "name": "gold:Gp0119858_Structural annotation GFF file", + "data_object_type": "Structural Annotation GFF", + "type": "nmdc:DataObject" + }, + { + "_id": { + "$oid": "649b003f1ae706d7b5b16c7b" + }, + "description": "Protein FAA for gold:Gp0119858", + "url": "https://data.microbiomedata.org/1777_95827/img_annotation/Ga0482157_proteins.faa", + "md5_checksum": "0b7a5694aefdea2070dd1b21a21a9abb", + "file_size_bytes": 3385, + "id": "nmdc:0b7a5694aefdea2070dd1b21a21a9abb", + "name": "gold:Gp0119858_Protein FAA", + "data_object_type": "Annotation Amino Acid FASTA", + "type": "nmdc:DataObject" + }, + { + "_id": { + "$oid": "649b003f1ae706d7b5b16c83" + }, + "description": "EC TSV File for gold:Gp0119858", + "url": "https://data.microbiomedata.org/1777_95827/img_annotation/Ga0482157_ec.tsv", + "md5_checksum": "db4fc71545bef12273f70878ad83a7fe", + "file_size_bytes": 3385, + "id": "nmdc:db4fc71545bef12273f70878ad83a7fe", + "name": "gold:Gp0119858_EC TSV File", + "data_object_type": "Annotation Enzyme Commission", + "type": "nmdc:DataObject" + } + ], + "mags_activity_set": [ + { + "_id": { + "$oid": "649b0052ec087f6bbab346f1" + }, + "has_input": [ + "nmdc:2cb85bcf96b582fe1fe4213d7ed6226e", + "nmdc:ddfc52144c47fea5c94e690576c0fed9", + "nmdc:106043abf68dc4999936530d75db61c3" + ], + "too_short_contig_num": 3168, + "part_of": [ + "nmdc:mga0t1k874" + ], + "binned_contig_num": 105, + "git_url": "https://github.com/microbiomedata/metaG/releases/tag/0.1", + "has_output": [ + "nmdc:d41d8cd98f00b204e9800998ecf8427e", + "nmdc:d7fd2b2506ab787723b3d2a57469d1fa", + "nmdc:53c369a0a22245d50789021f80405578", + "nmdc:7e329552b9d48f3d4d29757beea4b888", + "nmdc:b9351b819a40741049ac1eb89cdf1231", + "nmdc:4be9fbc49d724f5d7e4f755f3424b14d" + ], + "was_informed_by": "gold:Gp0119858", + "input_contig_num": 4270, + "id": "nmdc:d7ee6ea807bff67b75f4ab80609e213f", + "execution_resource": "NERSC-Cori", + "name": "MAGs Analysis Activity for nmdc:mga0t1k874", + "mags_list": [ + { + "number_of_contig": 72, + "completeness": 95.65, + "bin_name": "bins.1", + "gene_count": 2647, + "bin_quality": "MQ", + "gtdbtk_species": "Halanaerobium congolense", + "gtdbtk_order": "Halanaerobiales", + "num_16s": 0, + "gtdbtk_family": "Halanaerobiaceae", + "gtdbtk_domain": "Bacteria", + "contamination": 1.38, + "gtdbtk_class": "Halanaerobiia", + "gtdbtk_phylum": "Firmicutes_F", + "num_5s": 4, + "num_23s": 0, + "gtdbtk_genus": "Halanaerobium", + "num_t_rna": 52 + }, + { + "number_of_contig": 8, + "completeness": 0.0, + "bin_name": "bins.2", + "gene_count": 230, + "bin_quality": "LQ", + "gtdbtk_species": "", + "gtdbtk_order": "", + "num_16s": 0, + "gtdbtk_family": "", + "gtdbtk_domain": "", + "contamination": 0.0, + "gtdbtk_class": "", + "gtdbtk_phylum": "", + "num_5s": 0, + "num_23s": 0, + "gtdbtk_genus": "", + "num_t_rna": 1 + }, + { + "number_of_contig": 25, + "completeness": 0.0, + "bin_name": "bins.3", + "gene_count": 388, + "bin_quality": "LQ", + "gtdbtk_species": "", + "gtdbtk_order": "", + "num_16s": 0, + "gtdbtk_family": "", + "gtdbtk_domain": "", + "contamination": 0.0, + "gtdbtk_class": "", + "gtdbtk_phylum": "", + "num_5s": 0, + "num_23s": 0, + "gtdbtk_genus": "", + "num_t_rna": 2 + } + ], + "unbinned_contig_num": 997, + "started_at_time": "2021-10-11T02:23:25Z", + "type": "nmdc:MAGsAnalysisActivity", + "ended_at_time": "2021-10-11T03:55:49+00:00" + } + ], + "metagenome_annotation_activity_set": [ + { + "_id": { + "$oid": "649b005bbf2caae0415ef996" + }, + "has_input": [ + "nmdc:2cb85bcf96b582fe1fe4213d7ed6226e" + ], + "part_of": [ + "nmdc:mga0t1k874" + ], + "git_url": "https://github.com/microbiomedata/metaG/releases/tag/0.1", + "has_output": [ + "nmdc:f2a2de9ba03ec039dc221fb04b79b807", + "nmdc:d42a7f4f7a91533fa91b3167d0363baa", + "nmdc:106043abf68dc4999936530d75db61c3", + "nmdc:11b981c284ca7a58b8c996442e17e894", + "nmdc:da6c58680a8b6164ec9d85a3edfea49f", + "nmdc:5c8f14cc8b95522a8d4039feecf11e36", + "nmdc:c080f8fe5dd7e3153b69cbebdc6eb732", + "nmdc:1fa103f252b881c9a0034991405a37ed", + "nmdc:2537d013dc559ae41c95a3da8da05643", + "nmdc:4456bb3f27f48ded81c9f8ef97ccbc1d", + "nmdc:06ead20673214901eb135ed769cb0730", + "nmdc:7c19af05f3893ea5aba35e2870c05098" + ], + "was_informed_by": "gold:Gp0119858", + "id": "nmdc:d7ee6ea807bff67b75f4ab80609e213f", + "execution_resource": "NERSC-Cori", + "name": "Annotation Activity for nmdc:mga0t1k874", + "started_at_time": "2021-10-11T02:23:25Z", + "type": "nmdc:MetagenomeAnnotationActivity", + "ended_at_time": "2021-10-11T03:55:49+00:00" + } + ], + "metagenome_assembly_set": [ + { + "_id": { + "$oid": "649b005f2ca5ee4adb139f7e" + }, + "has_input": [ + "nmdc:ca8cef098d6e23f2882bde625674bfa1" + ], + "part_of": [ + "nmdc:mga0t1k874" + ], + "ctg_logsum": 89742, + "scaf_logsum": 91536, + "gap_pct": 0.03847, + "git_url": "https://github.com/microbiomedata/metaG/releases/tag/0.1", + "has_output": [ + "nmdc:2cb85bcf96b582fe1fe4213d7ed6226e", + "nmdc:ce00d58196ab832fb3306231639c7eae", + "nmdc:f79dcfd8e7a3906019954975e1df34a2", + "nmdc:0eb5598d0f7c9c4780804b57b3f43957", + "nmdc:ddfc52144c47fea5c94e690576c0fed9" + ], + "asm_score": 28.514, + "was_informed_by": "gold:Gp0119858", + "ctg_powsum": 13532, + "scaf_max": 194733, + "id": "nmdc:d7ee6ea807bff67b75f4ab80609e213f", + "scaf_powsum": 13988, + "execution_resource": "NERSC-Cori", + "contigs": 4270, + "name": "Assembly Activity for nmdc:mga0t1k874", + "ctg_max": 116426, + "gc_std": 0.07022, + "contig_bp": 7930742, + "gc_avg": 0.35213, + "started_at_time": "2021-10-11T02:23:25Z", + "scaf_bp": 7933794, + "type": "nmdc:MetagenomeAssembly", + "scaffolds": 4100, + "ended_at_time": "2021-10-11T03:55:49+00:00", + "ctg_l50": 11208, + "ctg_l90": 537, + "ctg_n50": 120, + "ctg_n90": 2039, + "scaf_l50": 12449, + "scaf_l90": 566, + "scaf_n50": 112, + "scaf_n90": 1899, + "scaf_l_gt50k": 1816899, + "scaf_n_gt50k": 23, + "scaf_pct_gt50k": 22.900759 + } + ], + "omics_processing_set": [ + { + "_id": { + "$oid": "649b009773e8249959349b68" + }, + "id": "nmdc:omprc-11-r3821s28", + "name": "Deep subsurface shale carbon reservoir microbial communities from Ohio, USA - Utica-2 Time Series 2014_11_19", + "description": "Microbial controls on biogeochemical cycling in deep subsurface shale carbon reservoirs", + "has_input": [ + "nmdc:bsm-11-ewd36288" + ], + "has_output": [ + "jgi:560df5ba0d878540fd6fe203" + ], + "part_of": [ + "nmdc:sty-11-8fb6t785" + ], + "add_date": "2015-08-15", + "mod_date": "2020-04-05", + "ncbi_project_name": "Deep subsurface shale carbon reservoir microbial communities from Ohio, USA - Utica-2 Time Series 2014_11_19", + "omics_type": { + "has_raw_value": "Metagenome" + }, + "principal_investigator": { + "has_raw_value": "Kelly Wrighton" + }, + "processing_institution": "JGI", + "type": "nmdc:OmicsProcessing", + "gold_sequencing_project_identifiers": [ + "gold:Gp0119858" + ] + } + ], + "read_qc_analysis_activity_set": [ + { + "_id": { + "$oid": "649b009d6bdd4fd20273c84e" + }, + "has_input": [ + "nmdc:2788686bc46f23883e5e5019835f7990" + ], + "part_of": [ + "nmdc:mga0t1k874" + ], + "git_url": "https://github.com/microbiomedata/metaG/releases/tag/0.1", + "has_output": [ + "nmdc:ca8cef098d6e23f2882bde625674bfa1", + "nmdc:4aa539150c9237638d437aca0039919e" + ], + "was_informed_by": "gold:Gp0119858", + "input_read_count": 72363258, + "output_read_bases": 10588548946, + "id": "nmdc:d7ee6ea807bff67b75f4ab80609e213f", + "execution_resource": "NERSC-Cori", + "input_read_bases": 10926851958, + "name": "Read QC Activity for nmdc:mga0t1k874", + "output_read_count": 72017164, + "started_at_time": "2021-10-11T02:23:25Z", + "type": "nmdc:ReadQCAnalysisActivity", + "ended_at_time": "2021-10-11T03:55:49+00:00" + } + ], + "read_based_taxonomy_analysis_activity_set": [ + { + "_id": { + "$oid": "649b009bff710ae353f8cf16" + }, + "has_input": [ + "nmdc:ca8cef098d6e23f2882bde625674bfa1" + ], + "git_url": "https://github.com/microbiomedata/metaG/releases/tag/0.1", + "has_output": [ + "nmdc:2e6e862447b6813ae9085fd7cd0cc15e", + "nmdc:87178f480db11c48151e82776d63e0ca", + "nmdc:f03e13e646e4ff20272bbf9fef27e3e6", + "nmdc:b6cb3aadb265b0bb6efd8e56de1f0f4d", + "nmdc:cf76e68caa214f514a199d9cdf47bee9", + "nmdc:32634814ea35dc9d9cd2b13af86d8dc3", + "nmdc:1fc591039fa9014c83f98613402d3dfd", + "nmdc:77738e9cb7e7edc91a71d77e77f6ad78", + "nmdc:9dd14e6b80bc74a94d11855f9b40a494" + ], + "was_informed_by": "gold:Gp0119858", + "id": "nmdc:d7ee6ea807bff67b75f4ab80609e213f", + "execution_resource": "NERSC-Cori", + "name": "ReadBased Analysis Activity for nmdc:mga0t1k874", + "started_at_time": "2021-10-11T02:23:25Z", + "type": "nmdc:ReadbasedAnalysis", + "ended_at_time": "2021-10-11T03:55:49+00:00" + } + ] + }, + { + "data_object_set": [ + { + "description": "Raw sequencer read data", + "file_size_bytes": 7694337469, + "type": "nmdc:DataObject", + "id": "jgi:560df3730d878540fd6fe1cd", + "name": "9567.6.137555.TAAGGCG-CTCTCTA.fastq.gz" + }, + { + "name": "Gp0119850_Filtered Reads", + "description": "Filtered Reads for Gp0119850", + "data_object_type": "Filtered Sequencing Reads", + "url": "https://data.microbiomedata.org/data/nmdc:mga0wjj555/qa/nmdc_mga0wjj555_filtered.fastq.gz", + "md5_checksum": "927f7cd7cfb5e77d745a13c380ad8554", + "id": "nmdc:927f7cd7cfb5e77d745a13c380ad8554", + "file_size_bytes": 4207745837 + }, + { + "name": "Gp0119850_Filtered Stats", + "description": "Filtered Stats for Gp0119850", + "data_object_type": "QC Statistics", + "url": "https://data.microbiomedata.org/data/nmdc:mga0wjj555/qa/nmdc_mga0wjj555_filterStats.txt", + "md5_checksum": "e2a4d00eadac0c31535144af71d802b4", + "id": "nmdc:e2a4d00eadac0c31535144af71d802b4", + "file_size_bytes": 286 + }, + { + "name": "Gp0119850_Gottcha2 TSV report", + "description": "Gottcha2 TSV report for Gp0119850", + "url": "https://data.microbiomedata.org/data/nmdc:mga0wjj555/ReadbasedAnalysis/nmdc_mga0wjj555_gottcha2_report.tsv", + "md5_checksum": "51bedf2b27be026f81ba0c57d6ffa064", + "id": "nmdc:51bedf2b27be026f81ba0c57d6ffa064", + "file_size_bytes": 5024 + }, + { + "name": "Gp0119850_Gottcha2 full TSV report", + "description": "Gottcha2 full TSV report for Gp0119850", + "url": "https://data.microbiomedata.org/data/nmdc:mga0wjj555/ReadbasedAnalysis/nmdc_mga0wjj555_gottcha2_report_full.tsv", + "md5_checksum": "3ed5f5501460699e35c05a40a6f8fd43", + "id": "nmdc:3ed5f5501460699e35c05a40a6f8fd43", + "file_size_bytes": 137342 + }, + { + "name": "Gp0119850_Gottcha2 Krona HTML report", + "description": "Gottcha2 Krona HTML report for Gp0119850", + "data_object_type": "GOTTCHA2 Krona Plot", + "url": "https://data.microbiomedata.org/data/nmdc:mga0wjj555/ReadbasedAnalysis/nmdc_mga0wjj555_gottcha2_krona.html", + "md5_checksum": "c8c0673586e67c6005170d38f47dc9b4", + "id": "nmdc:c8c0673586e67c6005170d38f47dc9b4", + "file_size_bytes": 241839 + }, + { + "name": "Gp0119850_Centrifuge classification TSV report", + "description": "Centrifuge classification TSV report for Gp0119850", + "data_object_type": "Centrifuge Taxonomic Classification", + "url": "https://data.microbiomedata.org/data/nmdc:mga0wjj555/ReadbasedAnalysis/nmdc_mga0wjj555_centrifuge_classification.tsv", + "md5_checksum": "ccf06daa05d1a692815fff27c69bc119", + "id": "nmdc:ccf06daa05d1a692815fff27c69bc119", + "file_size_bytes": 9286297540 + }, + { + "name": "Gp0119850_Centrifuge TSV report", + "description": "Centrifuge TSV report for Gp0119850", + "data_object_type": "Centrifuge Classification Report", + "url": "https://data.microbiomedata.org/data/nmdc:mga0wjj555/ReadbasedAnalysis/nmdc_mga0wjj555_centrifuge_report.tsv", + "md5_checksum": "70f650b8d6d021fb6dd71588b0959c23", + "id": "nmdc:70f650b8d6d021fb6dd71588b0959c23", + "file_size_bytes": 209215 + }, + { + "name": "Gp0119850_Centrifuge Krona HTML report", + "description": "Centrifuge Krona HTML report for Gp0119850", + "data_object_type": "Centrifuge Krona Plot", + "url": "https://data.microbiomedata.org/data/nmdc:mga0wjj555/ReadbasedAnalysis/nmdc_mga0wjj555_centrifuge_krona.html", + "md5_checksum": "d1b39fc2d889ec3821be0b5929ac8cf1", + "id": "nmdc:d1b39fc2d889ec3821be0b5929ac8cf1", + "file_size_bytes": 2115088 + }, + { + "name": "Gp0119850_Kraken classification TSV report", + "description": "Kraken classification TSV report for Gp0119850", + "data_object_type": "Kraken2 Taxonomic Classification", + "url": "https://data.microbiomedata.org/data/nmdc:mga0wjj555/ReadbasedAnalysis/nmdc_mga0wjj555_kraken2_classification.tsv", + "md5_checksum": "89b1975173c1b889b27bbfef18ecd4ed", + "id": "nmdc:89b1975173c1b889b27bbfef18ecd4ed", + "file_size_bytes": 4939404139 + }, + { + "name": "Gp0119850_Kraken2 TSV report", + "description": "Kraken2 TSV report for Gp0119850", + "data_object_type": "Kraken2 Classification Report", + "url": "https://data.microbiomedata.org/data/nmdc:mga0wjj555/ReadbasedAnalysis/nmdc_mga0wjj555_kraken2_report.tsv", + "md5_checksum": "a6fb4c15e5684dc8fa75f3c7aaafcf47", + "id": "nmdc:a6fb4c15e5684dc8fa75f3c7aaafcf47", + "file_size_bytes": 413329 + }, + { + "name": "Gp0119850_Kraken2 Krona HTML report", + "description": "Kraken2 Krona HTML report for Gp0119850", + "data_object_type": "Kraken2 Krona Plot", + "url": "https://data.microbiomedata.org/data/nmdc:mga0wjj555/ReadbasedAnalysis/nmdc_mga0wjj555_kraken2_krona.html", + "md5_checksum": "293c1b40f998968f921242bd4a0359e4", + "id": "nmdc:293c1b40f998968f921242bd4a0359e4", + "file_size_bytes": 2750033 + }, + { + "name": "Gp0119850_Assembled contigs fasta", + "description": "Assembled contigs fasta for Gp0119850", + "data_object_type": "Assembly Contigs", + "url": "https://data.microbiomedata.org/data/nmdc:mga0wjj555/assembly/nmdc_mga0wjj555_contigs.fna", + "md5_checksum": "97dacc54aaa7258b55ecc55aa29bf686", + "id": "nmdc:97dacc54aaa7258b55ecc55aa29bf686", + "file_size_bytes": 11988968 + }, + { + "name": "Gp0119850_Assembled scaffold fasta", + "description": "Assembled scaffold fasta for Gp0119850", + "data_object_type": "Assembly Scaffolds", + "url": "https://data.microbiomedata.org/data/nmdc:mga0wjj555/assembly/nmdc_mga0wjj555_scaffolds.fna", + "md5_checksum": "ed5aaaab658157602b542ea5558fecc6", + "id": "nmdc:ed5aaaab658157602b542ea5558fecc6", + "file_size_bytes": 11940411 + }, + { + "name": "Gp0119850_Metagenome Contig Coverage Stats", + "description": "Metagenome Contig Coverage Stats for Gp0119850", + "url": "https://data.microbiomedata.org/data/nmdc:mga0wjj555/assembly/nmdc_mga0wjj555_covstats.txt", + "md5_checksum": "276bab3d85b7efe0dee9978e94aba7dd", + "id": "nmdc:276bab3d85b7efe0dee9978e94aba7dd", + "file_size_bytes": 948777 + }, + { + "name": "Gp0119850_Assembled AGP file", + "description": "Assembled AGP file for Gp0119850", + "data_object_type": "Assembly AGP", + "url": "https://data.microbiomedata.org/data/nmdc:mga0wjj555/assembly/nmdc_mga0wjj555_assembly.agp", + "md5_checksum": "cdb5d42847ef59ef565fea89a4ee7de6", + "id": "nmdc:cdb5d42847ef59ef565fea89a4ee7de6", + "file_size_bytes": 867104 + }, + { + "name": "Gp0119850_Metagenome Alignment BAM file", + "description": "Metagenome Alignment BAM file for Gp0119850", + "data_object_type": "Assembly Coverage BAM", + "url": "https://data.microbiomedata.org/data/nmdc:mga0wjj555/assembly/nmdc_mga0wjj555_pairedMapped_sorted.bam", + "md5_checksum": "53857e6c98fda98819f57c2bfb926b4e", + "id": "nmdc:53857e6c98fda98819f57c2bfb926b4e", + "file_size_bytes": 4997511598 + }, + { + "name": "Gp0119850_Protein FAA", + "description": "Protein FAA for Gp0119850", + "data_object_type": "Annotation Amino Acid FASTA", + "url": "https://data.microbiomedata.org/data/nmdc:mga0wjj555/annotation/nmdc_mga0wjj555_proteins.faa", + "md5_checksum": "3054d5060fb91a8d8c282be2582ca78e", + "id": "nmdc:3054d5060fb91a8d8c282be2582ca78e", + "file_size_bytes": 5964123 + }, + { + "name": "Gp0119850_Structural annotation GFF file", + "description": "Structural annotation GFF file for Gp0119850", + "data_object_type": "Structural Annotation GFF", + "url": "https://data.microbiomedata.org/data/nmdc:mga0wjj555/annotation/nmdc_mga0wjj555_structural_annotation.gff", + "md5_checksum": "0cfdd3ec305d7775a5e67ca8780ee941", + "id": "nmdc:0cfdd3ec305d7775a5e67ca8780ee941", + "file_size_bytes": 2504 + }, + { + "name": "Gp0119850_Functional annotation GFF file", + "description": "Functional annotation GFF file for Gp0119850", + "data_object_type": "Functional Annotation GFF", + "url": "https://data.microbiomedata.org/data/nmdc:mga0wjj555/annotation/nmdc_mga0wjj555_functional_annotation.gff", + "md5_checksum": "136b8f13132fd6ee08c56497b9a149bb", + "id": "nmdc:136b8f13132fd6ee08c56497b9a149bb", + "file_size_bytes": 5955056 + }, + { + "name": "Gp0119850_KO TSV file", + "description": "KO TSV file for Gp0119850", + "data_object_type": "Annotation KEGG Orthology", + "url": "https://data.microbiomedata.org/data/nmdc:mga0wjj555/annotation/nmdc_mga0wjj555_ko.tsv", + "md5_checksum": "7f7c8c6bcc4e56d3fe80bd798f912e39", + "id": "nmdc:7f7c8c6bcc4e56d3fe80bd798f912e39", + "file_size_bytes": 989057 + }, + { + "name": "Gp0119850_EC TSV file", + "description": "EC TSV file for Gp0119850", + "data_object_type": "Annotation Enzyme Commission", + "url": "https://data.microbiomedata.org/data/nmdc:mga0wjj555/annotation/nmdc_mga0wjj555_ec.tsv", + "md5_checksum": "eb1bcf94c4824f8a311d542708a2fea5", + "id": "nmdc:eb1bcf94c4824f8a311d542708a2fea5", + "file_size_bytes": 596621 + }, + { + "name": "Gp0119850_COG GFF file", + "description": "COG GFF file for Gp0119850", + "url": "https://data.microbiomedata.org/data/nmdc:mga0wjj555/annotation/nmdc_mga0wjj555_cog.gff", + "md5_checksum": "fd34f0a50116264cbebd9f95feecd6a7", + "id": "nmdc:fd34f0a50116264cbebd9f95feecd6a7", + "file_size_bytes": 4142136 + }, + { + "name": "Gp0119850_PFAM GFF file", + "description": "PFAM GFF file for Gp0119850", + "url": "https://data.microbiomedata.org/data/nmdc:mga0wjj555/annotation/nmdc_mga0wjj555_pfam.gff", + "md5_checksum": "331ad6a440c199dc322733a38a9a75e1", + "id": "nmdc:331ad6a440c199dc322733a38a9a75e1", + "file_size_bytes": 3751156 + }, + { + "name": "Gp0119850_TigrFam GFF file", + "description": "TigrFam GFF file for Gp0119850", + "url": "https://data.microbiomedata.org/data/nmdc:mga0wjj555/annotation/nmdc_mga0wjj555_tigrfam.gff", + "md5_checksum": "658d49561441147d1b22a9d6e1007d47", + "id": "nmdc:658d49561441147d1b22a9d6e1007d47", + "file_size_bytes": 798205 + }, + { + "name": "Gp0119850_SMART GFF file", + "description": "SMART GFF file for Gp0119850", + "url": "https://data.microbiomedata.org/data/nmdc:mga0wjj555/annotation/nmdc_mga0wjj555_smart.gff", + "md5_checksum": "6c638dd4c5368f164844667000943e44", + "id": "nmdc:6c638dd4c5368f164844667000943e44", + "file_size_bytes": 1120782 + }, + { + "name": "Gp0119850_SuperFam GFF file", + "description": "SuperFam GFF file for Gp0119850", + "url": "https://data.microbiomedata.org/data/nmdc:mga0wjj555/annotation/nmdc_mga0wjj555_supfam.gff", + "md5_checksum": "2d41e48366af1cc1b0f31abb85c7b620", + "id": "nmdc:2d41e48366af1cc1b0f31abb85c7b620", + "file_size_bytes": 5226073 + }, + { + "name": "Gp0119850_Cath FunFam GFF file", + "description": "Cath FunFam GFF file for Gp0119850", + "url": "https://data.microbiomedata.org/data/nmdc:mga0wjj555/annotation/nmdc_mga0wjj555_cath_funfam.gff", + "md5_checksum": "477db31098cdc2fa0064a92f8d4749bc", + "id": "nmdc:477db31098cdc2fa0064a92f8d4749bc", + "file_size_bytes": 4727188 + }, + { + "name": "Gp0119850_KO_EC GFF file", + "description": "KO_EC GFF file for Gp0119850", + "url": "https://data.microbiomedata.org/data/nmdc:mga0wjj555/annotation/nmdc_mga0wjj555_ko_ec.gff", + "md5_checksum": "2e15cb998f4582ab1d12906db18adab2", + "id": "nmdc:2e15cb998f4582ab1d12906db18adab2", + "file_size_bytes": 3340610 + }, + { + "name": "gold:Gp0452679_metabat2 bin checkm quality assessment result", + "description": "metabat2 bin checkm quality assessment result for gold:Gp0452679", + "data_object_type": "CheckM Statistics", + "url": "https://data.microbiomedata.org/data/nmdc:mga0fsjy84/MAGs/nmdc_mga0fsjy84_checkm_qa.out", + "md5_checksum": "d41d8cd98f00b204e9800998ecf8427e", + "id": "nmdc:d41d8cd98f00b204e9800998ecf8427e", + "file_size_bytes": 0 + }, + { + "name": "Gp0119850_tooShort (< 3kb) filtered contigs fasta file by metaBat2", + "description": "tooShort (< 3kb) filtered contigs fasta file by metaBat2 for Gp0119850", + "url": "https://data.microbiomedata.org/data/nmdc:mga0wjj555/MAGs/nmdc_mga0wjj555_bins.tooShort.fa", + "md5_checksum": "4bc01c7e8cba367adb7f0d1ea6d795ff", + "id": "nmdc:4bc01c7e8cba367adb7f0d1ea6d795ff", + "file_size_bytes": 4413079 + }, + { + "name": "Gp0119850_unbinned fasta file from metabat2", + "description": "unbinned fasta file from metabat2 for Gp0119850", + "url": "https://data.microbiomedata.org/data/nmdc:mga0wjj555/MAGs/nmdc_mga0wjj555_bins.unbinned.fa", + "md5_checksum": "439a3405b2afc5c635c8493b256cac07", + "id": "nmdc:439a3405b2afc5c635c8493b256cac07", + "file_size_bytes": 3502282 + }, + { + "name": "Gp0119850_metabat2 bin checkm quality assessment result", + "description": "metabat2 bin checkm quality assessment result for Gp0119850", + "data_object_type": "CheckM Statistics", + "url": "https://data.microbiomedata.org/data/nmdc:mga0wjj555/MAGs/nmdc_mga0wjj555_checkm_qa.out", + "md5_checksum": "77cda87ff58e9ba219eb885240e5098f", + "id": "nmdc:77cda87ff58e9ba219eb885240e5098f", + "file_size_bytes": 1204 + }, + { + "name": "Gp0119850_high-quality and medium-quality bins", + "description": "high-quality and medium-quality bins for Gp0119850", + "data_object_type": "Metagenome Bins", + "url": "https://data.microbiomedata.org/data/nmdc:mga0wjj555/MAGs/nmdc_mga0wjj555_hqmq_bin.zip", + "md5_checksum": "ad282c70035b72c35a5b795b5e8c2f12", + "id": "nmdc:ad282c70035b72c35a5b795b5e8c2f12", + "file_size_bytes": 1111979 + }, + { + "name": "Gp0119850_metabat2 bins", + "description": "metabat2 bins for Gp0119850", + "url": "https://data.microbiomedata.org/data/nmdc:mga0wjj555/MAGs/nmdc_mga0wjj555_metabat_bin.zip", + "md5_checksum": "9c71f0c0d9bd14711440e42aa36b2a13", + "id": "nmdc:9c71f0c0d9bd14711440e42aa36b2a13", + "file_size_bytes": 133802 + }, + { + "_id": { + "$oid": "649b003d1ae706d7b5b14dd6" + }, + "description": "Assembled scaffold fasta for gold:Gp0119850", + "url": "https://data.microbiomedata.org/data/1777_95819/assembly/assembly_scaffolds.fna", + "file_size_bytes": 11890316, + "type": "nmdc:DataObject", + "id": "nmdc:915b6263dd119a89989426c88806bae8", + "name": "assembly_scaffolds.fna", + "data_object_type": "Assembly Scaffolds" + }, + { + "_id": { + "$oid": "649b003d1ae706d7b5b14deb" + }, + "description": "Metagenome Contig Coverage Stats for gold:Gp0119850", + "url": "https://data.microbiomedata.org/data/1777_95819/assembly/mapping_stats.txt", + "file_size_bytes": 893552, + "type": "nmdc:DataObject", + "id": "nmdc:6ef62a611e518102e9c4a0d34c142174", + "name": "mapping_stats.txt", + "data_object_type": "Assembly Coverage Stats" + }, + { + "_id": { + "$oid": "649b003d1ae706d7b5b14dec" + }, + "description": "Metagenome Alignment BAM file for gold:Gp0119850", + "url": "https://data.microbiomedata.org/data/1777_95819/assembly/pairedMapped_sorted.bam", + "file_size_bytes": 4932320467, + "type": "nmdc:DataObject", + "id": "nmdc:c943750c8e0a26111d6dadb6fe2ba8fb", + "name": "pairedMapped_sorted.bam", + "data_object_type": "Assembly Coverage BAM" + }, + { + "_id": { + "$oid": "649b003d1ae706d7b5b14ded" + }, + "description": "Assembled contigs fasta for gold:Gp0119850", + "url": "https://data.microbiomedata.org/data/1777_95819/assembly/assembly_contigs.fna", + "file_size_bytes": 11933743, + "type": "nmdc:DataObject", + "id": "nmdc:04b7e946dd85306cc75eb3c59f26bf1d", + "name": "assembly_contigs.fna", + "data_object_type": "Assembly Contigs" + }, + { + "_id": { + "$oid": "649b003d1ae706d7b5b14dee" + }, + "description": "Assembled AGP file for gold:Gp0119850", + "url": "https://data.microbiomedata.org/data/1777_95819/assembly/assembly.agp", + "file_size_bytes": 746314, + "type": "nmdc:DataObject", + "id": "nmdc:c4e380903201b83c8d7e7b293778599e", + "name": "assembly.agp", + "data_object_type": "Assembly AGP" + }, + { + "_id": { + "$oid": "649b003d1ae706d7b5b15a1d" + }, + "id": "nmdc:52ea30f152735d648e11b98df0b37faa", + "name": "1777_95819.json", + "description": "Gold:Gp0119850 ReadbasedAnalysis result JSON file", + "url": "https://data.microbiomedata.org/1777_95819/ReadbasedAnalysis/1777_95819.json", + "file_size_bytes": 3385, + "type": "nmdc:DataObject" + }, + { + "_id": { + "$oid": "649b003d1ae706d7b5b15a59" + }, + "id": "nmdc:657ab08350af14602ad4245737079a12", + "name": "1777_95819.krona.html", + "description": "Gold:Gp0119850 KRONA plot HTML file", + "url": "https://data.microbiomedata.org/1777_95819/ReadbasedAnalysis/centrifuge/1777_95819.krona.html", + "file_size_bytes": 3385, + "data_object_type": "Centrifuge Krona Plot", + "type": "nmdc:DataObject" + }, + { + "_id": { + "$oid": "649b003f1ae706d7b5b1630b" + }, + "id": "nmdc:1e6937d6ccca71001b1fe35eaf5c5eb7", + "name": "bins.unbinned.fa", + "description": "unbinned fasta file from metabat2 for gold:Gp0119850", + "file_size_bytes": 5599427, + "url": "https://data.microbiomedata.org/data/1777_95819/img_MAGs/bins.unbinned.fa", + "type": "nmdc:DataObject" + }, + { + "_id": { + "$oid": "649b003f1ae706d7b5b1630c" + }, + "id": "nmdc:56e64ea9fe190a1e2d9018113e4178d9", + "name": "checkm_qa.out", + "description": "metabat2 bin checkm quality assessment result for gold:Gp0119850", + "file_size_bytes": 1404, + "url": "https://data.microbiomedata.org/data/1777_95819/img_MAGs/checkm_qa.out", + "type": "nmdc:DataObject", + "data_object_type": "CheckM Statistics" + }, + { + "_id": { + "$oid": "649b003f1ae706d7b5b1630d" + }, + "id": "nmdc:02abb02e0bf1dfcfba28e6f053eb3bd9", + "name": "gold:Gp0119850.bins.5.fa", + "description": "metabat2 binned contig file for gold:Gp0119850", + "file_size_bytes": 400546, + "url": "https://data.microbiomedata.org/data/1777_95819/img_MAGs/metabat-bins/bins.5.fa", + "type": "nmdc:DataObject", + "data_object_type": "Metagenome Bins" + }, + { + "_id": { + "$oid": "649b003f1ae706d7b5b1630e" + }, + "id": "nmdc:dd6ce28b08a24555658b4c9cbb4cb74a", + "name": "bins.tooShort.fa", + "description": "tooShort (< 3kb) filtered contigs fasta file by metaBat2 for gold:Gp0119850", + "file_size_bytes": 4306353, + "url": "https://data.microbiomedata.org/data/1777_95819/img_MAGs/bins.tooShort.fa", + "type": "nmdc:DataObject" + }, + { + "_id": { + "$oid": "649b003f1ae706d7b5b1630f" + }, + "id": "nmdc:c262ddb1c0f9dd795bd0c7c06c9f45a0", + "name": "gold:Gp0119850.bins.3.fa", + "description": "metabat2 binned contig file for gold:Gp0119850", + "file_size_bytes": 673625, + "url": "https://data.microbiomedata.org/data/1777_95819/img_MAGs/metabat-bins/bins.3.fa", + "type": "nmdc:DataObject", + "data_object_type": "Metagenome Bins" + }, + { + "_id": { + "$oid": "649b003f1ae706d7b5b16310" + }, + "id": "nmdc:f13b12575c064ded6e9b78db7eb0dfbb", + "name": "gold:Gp0119850.bins.2.fa", + "description": "metabat2 binned contig file for gold:Gp0119850", + "file_size_bytes": 392787, + "url": "https://data.microbiomedata.org/data/1777_95819/img_MAGs/metabat-bins/bins.2.fa", + "type": "nmdc:DataObject", + "data_object_type": "Metagenome Bins" + }, + { + "_id": { + "$oid": "649b003f1ae706d7b5b16311" + }, + "id": "nmdc:ac938de783d4caa40fc64ebf706c88ac", + "name": "gold:Gp0119850.bins.4.fa", + "description": "metabat2 binned contig file for gold:Gp0119850", + "file_size_bytes": 227095, + "url": "https://data.microbiomedata.org/data/1777_95819/img_MAGs/metabat-bins/bins.4.fa", + "type": "nmdc:DataObject", + "data_object_type": "Metagenome Bins" + }, + { + "_id": { + "$oid": "649b003f1ae706d7b5b16313" + }, + "id": "nmdc:ed1600a9f497086eb20611df2d6c176c", + "name": "gold:Gp0119850.bins.1.fa", + "description": "metabat2 binned contig file for gold:Gp0119850", + "file_size_bytes": 258182, + "url": "https://data.microbiomedata.org/data/1777_95819/img_MAGs/metabat-bins/bins.1.fa", + "type": "nmdc:DataObject", + "data_object_type": "Metagenome Bins" + }, + { + "_id": { + "$oid": "649b003f1ae706d7b5b16c45" + }, + "description": "Functional annotation GFF file for gold:Gp0119850", + "url": "https://data.microbiomedata.org/1777_95819/img_annotation/Ga0482165_functional_annotation.gff", + "md5_checksum": "e18017e4d7721b9aa8f6fe0604d61d28", + "file_size_bytes": 3385, + "id": "nmdc:e18017e4d7721b9aa8f6fe0604d61d28", + "name": "gold:Gp0119850_Functional annotation GFF file", + "data_object_type": "Functional Annotation GFF", + "type": "nmdc:DataObject" + }, + { + "_id": { + "$oid": "649b003f1ae706d7b5b16c46" + }, + "description": "KO TSV File for gold:Gp0119850", + "url": "https://data.microbiomedata.org/1777_95819/img_annotation/Ga0482165_ko.tsv", + "md5_checksum": "72bc285bcb9a4e0bff1b99719cf8346b", + "file_size_bytes": 3385, + "id": "nmdc:72bc285bcb9a4e0bff1b99719cf8346b", + "name": "gold:Gp0119850_KO TSV File", + "data_object_type": "Annotation KEGG Orthology", + "type": "nmdc:DataObject" + }, + { + "_id": { + "$oid": "649b003f1ae706d7b5b16c48" + }, + "description": "Protein FAA for gold:Gp0119850", + "url": "https://data.microbiomedata.org/1777_95819/img_annotation/Ga0482165_proteins.faa", + "md5_checksum": "c93e3ebaf3c21c50bb1071d0e07daa48", + "file_size_bytes": 3385, + "id": "nmdc:c93e3ebaf3c21c50bb1071d0e07daa48", + "name": "gold:Gp0119850_Protein FAA", + "data_object_type": "Annotation Amino Acid FASTA", + "type": "nmdc:DataObject" + }, + { + "_id": { + "$oid": "649b003f1ae706d7b5b16c49" + }, + "description": "Structural annotation GFF file for gold:Gp0119850", + "url": "https://data.microbiomedata.org/1777_95819/img_annotation/Ga0482165_structural_annotation.gff", + "md5_checksum": "6e16b6f3d73dc771eba4c729600c9551", + "file_size_bytes": 3385, + "id": "nmdc:6e16b6f3d73dc771eba4c729600c9551", + "name": "gold:Gp0119850_Structural annotation GFF file", + "data_object_type": "Structural Annotation GFF", + "type": "nmdc:DataObject" + }, + { + "_id": { + "$oid": "649b003f1ae706d7b5b16c5a" + }, + "description": "EC TSV File for gold:Gp0119850", + "url": "https://data.microbiomedata.org/1777_95819/img_annotation/Ga0482165_ec.tsv", + "md5_checksum": "dd84dbd2e1b611a1cb6f855f578538d3", + "file_size_bytes": 3385, + "id": "nmdc:dd84dbd2e1b611a1cb6f855f578538d3", + "name": "gold:Gp0119850_EC TSV File", + "data_object_type": "Annotation Enzyme Commission", + "type": "nmdc:DataObject" + } + ], + "mags_activity_set": [ + { + "_id": { + "$oid": "649b0052ec087f6bbab346ea" + }, + "has_input": [ + "nmdc:97dacc54aaa7258b55ecc55aa29bf686", + "nmdc:53857e6c98fda98819f57c2bfb926b4e", + "nmdc:136b8f13132fd6ee08c56497b9a149bb" + ], + "too_short_contig_num": 8921, + "part_of": [ + "nmdc:mga0wjj555" + ], + "binned_contig_num": 255, + "git_url": "https://github.com/microbiomedata/metaG/releases/tag/0.1", + "has_output": [ + "nmdc:d41d8cd98f00b204e9800998ecf8427e", + "nmdc:4bc01c7e8cba367adb7f0d1ea6d795ff", + "nmdc:439a3405b2afc5c635c8493b256cac07", + "nmdc:77cda87ff58e9ba219eb885240e5098f", + "nmdc:ad282c70035b72c35a5b795b5e8c2f12", + "nmdc:9c71f0c0d9bd14711440e42aa36b2a13" + ], + "was_informed_by": "gold:Gp0119850", + "input_contig_num": 11045, + "id": "nmdc:f8697f5eabfbe268fe8c46d629d74a3d", + "execution_resource": "NERSC-Cori", + "name": "MAGs Analysis Activity for nmdc:mga0wjj555", + "mags_list": [ + { + "number_of_contig": 99, + "completeness": 17.78, + "bin_name": "bins.1", + "gene_count": 546, + "bin_quality": "LQ", + "gtdbtk_species": "", + "gtdbtk_order": "", + "num_16s": 0, + "gtdbtk_family": "", + "gtdbtk_domain": "", + "contamination": 0.0, + "gtdbtk_class": "", + "gtdbtk_phylum": "", + "num_5s": 0, + "num_23s": 0, + "gtdbtk_genus": "", + "num_t_rna": 10 + }, + { + "number_of_contig": 10, + "completeness": 100.0, + "bin_name": "bins.2", + "gene_count": 2110, + "bin_quality": "HQ", + "gtdbtk_species": "Thermotoga petrophila", + "gtdbtk_order": "Thermotogales", + "num_16s": 1, + "gtdbtk_family": "Thermotogaceae", + "gtdbtk_domain": "Bacteria", + "contamination": 1.79, + "gtdbtk_class": "Thermotogae", + "gtdbtk_phylum": "Thermotogota", + "num_5s": 1, + "num_23s": 1, + "gtdbtk_genus": "Thermotoga", + "num_t_rna": 47 + }, + { + "number_of_contig": 146, + "completeness": 90.77, + "bin_name": "bins.3", + "gene_count": 2144, + "bin_quality": "HQ", + "gtdbtk_species": "Thermococcus_A sp000430485", + "gtdbtk_order": "Thermococcales", + "num_16s": 1, + "gtdbtk_family": "Thermococcaceae", + "gtdbtk_domain": "Archaea", + "contamination": 0.5, + "gtdbtk_class": "Thermococci", + "gtdbtk_phylum": "Euryarchaeota", + "num_5s": 2, + "num_23s": 1, + "gtdbtk_genus": "Thermococcus_A", + "num_t_rna": 40 + } + ], + "unbinned_contig_num": 1869, + "started_at_time": "2021-10-11T02:23:26Z", + "type": "nmdc:MAGsAnalysisActivity", + "ended_at_time": "2021-10-11T04:59:48+00:00" + } + ], + "metagenome_annotation_activity_set": [ + { + "_id": { + "$oid": "649b005bbf2caae0415ef988" + }, + "has_input": [ + "nmdc:97dacc54aaa7258b55ecc55aa29bf686" + ], + "part_of": [ + "nmdc:mga0wjj555" + ], + "git_url": "https://github.com/microbiomedata/metaG/releases/tag/0.1", + "has_output": [ + "nmdc:3054d5060fb91a8d8c282be2582ca78e", + "nmdc:0cfdd3ec305d7775a5e67ca8780ee941", + "nmdc:136b8f13132fd6ee08c56497b9a149bb", + "nmdc:7f7c8c6bcc4e56d3fe80bd798f912e39", + "nmdc:eb1bcf94c4824f8a311d542708a2fea5", + "nmdc:fd34f0a50116264cbebd9f95feecd6a7", + "nmdc:331ad6a440c199dc322733a38a9a75e1", + "nmdc:658d49561441147d1b22a9d6e1007d47", + "nmdc:6c638dd4c5368f164844667000943e44", + "nmdc:2d41e48366af1cc1b0f31abb85c7b620", + "nmdc:477db31098cdc2fa0064a92f8d4749bc", + "nmdc:2e15cb998f4582ab1d12906db18adab2" + ], + "was_informed_by": "gold:Gp0119850", + "id": "nmdc:f8697f5eabfbe268fe8c46d629d74a3d", + "execution_resource": "NERSC-Cori", + "name": "Annotation Activity for nmdc:mga0wjj555", + "started_at_time": "2021-10-11T02:23:26Z", + "type": "nmdc:MetagenomeAnnotationActivity", + "ended_at_time": "2021-10-11T04:59:48+00:00" + } + ], + "metagenome_assembly_set": [ + { + "_id": { + "$oid": "649b005f2ca5ee4adb139f7a" + }, + "has_input": [ + "nmdc:927f7cd7cfb5e77d745a13c380ad8554" + ], + "part_of": [ + "nmdc:mga0wjj555" + ], + "ctg_logsum": 100164, + "scaf_logsum": 107773, + "gap_pct": 0.0897, + "git_url": "https://github.com/microbiomedata/metaG/releases/tag/0.1", + "has_output": [ + "nmdc:97dacc54aaa7258b55ecc55aa29bf686", + "nmdc:ed5aaaab658157602b542ea5558fecc6", + "nmdc:276bab3d85b7efe0dee9978e94aba7dd", + "nmdc:cdb5d42847ef59ef565fea89a4ee7de6", + "nmdc:53857e6c98fda98819f57c2bfb926b4e" + ], + "asm_score": 23.208, + "was_informed_by": "gold:Gp0119850", + "ctg_powsum": 16061, + "scaf_max": 795557, + "id": "nmdc:f8697f5eabfbe268fe8c46d629d74a3d", + "scaf_powsum": 17422, + "execution_resource": "NERSC-Cori", + "contigs": 11049, + "name": "Assembly Activity for nmdc:mga0wjj555", + "ctg_max": 662565, + "gc_std": 0.06847, + "contig_bp": 11474561, + "gc_avg": 0.3903, + "started_at_time": "2021-10-11T02:23:26Z", + "scaf_bp": 11484863, + "type": "nmdc:MetagenomeAssembly", + "scaffolds": 10019, + "ended_at_time": "2021-10-11T04:59:48+00:00", + "ctg_l50": 1820, + "ctg_l90": 360, + "ctg_n50": 886, + "ctg_n90": 7304, + "scaf_l50": 2271, + "scaf_l90": 374, + "scaf_n50": 743, + "scaf_n90": 6298, + "scaf_l_gt50k": 2014007, + "scaf_n_gt50k": 10, + "scaf_pct_gt50k": 17.536186 + } + ], + "omics_processing_set": [ + { + "_id": { + "$oid": "649b009773e8249959349b69" + }, + "id": "nmdc:omprc-11-m7p5j003", + "name": "Deep subsurface shale carbon reservoir microbial communities from Ohio, USA - Utica-2 Time Series 2014_10_11", + "description": "Microbial controls on biogeochemical cycling in deep subsurface shale carbon reservoirs", + "has_input": [ + "nmdc:bsm-11-nc1c5h12" + ], + "has_output": [ + "jgi:560df3730d878540fd6fe1cd" + ], + "part_of": [ + "nmdc:sty-11-8fb6t785" + ], + "add_date": "2015-08-15", + "mod_date": "2020-04-05", + "ncbi_project_name": "Deep subsurface shale carbon reservoir microbial communities from Ohio, USA - Utica-2 Time Series 2014_10_11", + "omics_type": { + "has_raw_value": "Metagenome" + }, + "principal_investigator": { + "has_raw_value": "Kelly Wrighton" + }, + "processing_institution": "JGI", + "type": "nmdc:OmicsProcessing", + "gold_sequencing_project_identifiers": [ + "gold:Gp0119850" + ] + } + ], + "read_qc_analysis_activity_set": [ + { + "_id": { + "$oid": "649b009d6bdd4fd20273c846" + }, + "has_input": [ + "nmdc:c3972431e1afd5f96eb623ae35e2b196" + ], + "part_of": [ + "nmdc:mga0wjj555" + ], + "git_url": "https://github.com/microbiomedata/metaG/releases/tag/0.1", + "has_output": [ + "nmdc:927f7cd7cfb5e77d745a13c380ad8554", + "nmdc:e2a4d00eadac0c31535144af71d802b4" + ], + "was_informed_by": "gold:Gp0119850", + "input_read_count": 70388194, + "output_read_bases": 10267249832, + "id": "nmdc:f8697f5eabfbe268fe8c46d629d74a3d", + "execution_resource": "NERSC-Cori", + "input_read_bases": 10628617294, + "name": "Read QC Activity for nmdc:mga0wjj555", + "output_read_count": 69969286, + "started_at_time": "2021-10-11T02:23:26Z", + "type": "nmdc:ReadQCAnalysisActivity", + "ended_at_time": "2021-10-11T04:59:48+00:00" + } + ], + "read_based_taxonomy_analysis_activity_set": [ + { + "_id": { + "$oid": "649b009bff710ae353f8cf0d" + }, + "has_input": [ + "nmdc:927f7cd7cfb5e77d745a13c380ad8554" + ], + "git_url": "https://github.com/microbiomedata/metaG/releases/tag/0.1", + "has_output": [ + "nmdc:51bedf2b27be026f81ba0c57d6ffa064", + "nmdc:3ed5f5501460699e35c05a40a6f8fd43", + "nmdc:c8c0673586e67c6005170d38f47dc9b4", + "nmdc:ccf06daa05d1a692815fff27c69bc119", + "nmdc:70f650b8d6d021fb6dd71588b0959c23", + "nmdc:d1b39fc2d889ec3821be0b5929ac8cf1", + "nmdc:89b1975173c1b889b27bbfef18ecd4ed", + "nmdc:a6fb4c15e5684dc8fa75f3c7aaafcf47", + "nmdc:293c1b40f998968f921242bd4a0359e4" + ], + "was_informed_by": "gold:Gp0119850", + "id": "nmdc:f8697f5eabfbe268fe8c46d629d74a3d", + "execution_resource": "NERSC-Cori", + "name": "ReadBased Analysis Activity for nmdc:mga0wjj555", + "started_at_time": "2021-10-11T02:23:26Z", + "type": "nmdc:ReadbasedAnalysis", + "ended_at_time": "2021-10-11T04:59:48+00:00" + } + ] + }, + { + "data_object_set": [ + { + "description": "Raw sequencer read data", + "file_size_bytes": 7683549010, + "type": "nmdc:DataObject", + "id": "jgi:560df5ac0d878540fd6fe1eb", + "name": "9567.7.137562.AGGCAGA-TATCCTC.fastq.gz" + }, + { + "name": "Gp0119855_Filtered Reads", + "description": "Filtered Reads for Gp0119855", + "data_object_type": "Filtered Sequencing Reads", + "url": "https://data.microbiomedata.org/data/nmdc:mga0rhwf78/qa/nmdc_mga0rhwf78_filtered.fastq.gz", + "md5_checksum": "c4c25a3b4cb43f6829a9e91ebf38d46d", + "id": "nmdc:c4c25a3b4cb43f6829a9e91ebf38d46d", + "file_size_bytes": 4329123012 + }, + { + "name": "Gp0119855_Filtered Stats", + "description": "Filtered Stats for Gp0119855", + "data_object_type": "QC Statistics", + "url": "https://data.microbiomedata.org/data/nmdc:mga0rhwf78/qa/nmdc_mga0rhwf78_filterStats.txt", + "md5_checksum": "edda4c2c71997011ec13230a36955fb7", + "id": "nmdc:edda4c2c71997011ec13230a36955fb7", + "file_size_bytes": 285 + }, + { + "name": "Gp0119855_Gottcha2 TSV report", + "description": "Gottcha2 TSV report for Gp0119855", + "url": "https://data.microbiomedata.org/data/nmdc:mga0rhwf78/ReadbasedAnalysis/nmdc_mga0rhwf78_gottcha2_report.tsv", + "md5_checksum": "3ddc464c63e36738e0aa69b472a06d85", + "id": "nmdc:3ddc464c63e36738e0aa69b472a06d85", + "file_size_bytes": 9163 + }, + { + "name": "Gp0119855_Gottcha2 full TSV report", + "description": "Gottcha2 full TSV report for Gp0119855", + "url": "https://data.microbiomedata.org/data/nmdc:mga0rhwf78/ReadbasedAnalysis/nmdc_mga0rhwf78_gottcha2_report_full.tsv", + "md5_checksum": "0b0a32234f0f24150414912dde84d517", + "id": "nmdc:0b0a32234f0f24150414912dde84d517", + "file_size_bytes": 391271 + }, + { + "name": "Gp0119855_Gottcha2 Krona HTML report", + "description": "Gottcha2 Krona HTML report for Gp0119855", + "data_object_type": "GOTTCHA2 Krona Plot", + "url": "https://data.microbiomedata.org/data/nmdc:mga0rhwf78/ReadbasedAnalysis/nmdc_mga0rhwf78_gottcha2_krona.html", + "md5_checksum": "7048fd73e300305a69e150993b2b2199", + "id": "nmdc:7048fd73e300305a69e150993b2b2199", + "file_size_bytes": 255703 + }, + { + "name": "Gp0119855_Centrifuge classification TSV report", + "description": "Centrifuge classification TSV report for Gp0119855", + "data_object_type": "Centrifuge Taxonomic Classification", + "url": "https://data.microbiomedata.org/data/nmdc:mga0rhwf78/ReadbasedAnalysis/nmdc_mga0rhwf78_centrifuge_classification.tsv", + "md5_checksum": "24055da71c4fa3cccbd40334b18ad3f8", + "id": "nmdc:24055da71c4fa3cccbd40334b18ad3f8", + "file_size_bytes": 5443521434 + }, + { + "name": "Gp0119855_Centrifuge TSV report", + "description": "Centrifuge TSV report for Gp0119855", + "data_object_type": "Centrifuge Classification Report", + "url": "https://data.microbiomedata.org/data/nmdc:mga0rhwf78/ReadbasedAnalysis/nmdc_mga0rhwf78_centrifuge_report.tsv", + "md5_checksum": "a3e2201278b2290c0bf5a3124c60b2f5", + "id": "nmdc:a3e2201278b2290c0bf5a3124c60b2f5", + "file_size_bytes": 231867 + }, + { + "name": "Gp0119855_Centrifuge Krona HTML report", + "description": "Centrifuge Krona HTML report for Gp0119855", + "data_object_type": "Centrifuge Krona Plot", + "url": "https://data.microbiomedata.org/data/nmdc:mga0rhwf78/ReadbasedAnalysis/nmdc_mga0rhwf78_centrifuge_krona.html", + "md5_checksum": "bebb860b29a80c50e8b8d77d09267d3e", + "id": "nmdc:bebb860b29a80c50e8b8d77d09267d3e", + "file_size_bytes": 2209175 + }, + { + "name": "Gp0119855_Kraken classification TSV report", + "description": "Kraken classification TSV report for Gp0119855", + "data_object_type": "Kraken2 Taxonomic Classification", + "url": "https://data.microbiomedata.org/data/nmdc:mga0rhwf78/ReadbasedAnalysis/nmdc_mga0rhwf78_kraken2_classification.tsv", + "md5_checksum": "7640dd6bc8fd2ab4ac8db0f223c811db", + "id": "nmdc:7640dd6bc8fd2ab4ac8db0f223c811db", + "file_size_bytes": 4560184420 + }, + { + "name": "Gp0119855_Kraken2 TSV report", + "description": "Kraken2 TSV report for Gp0119855", + "data_object_type": "Kraken2 Classification Report", + "url": "https://data.microbiomedata.org/data/nmdc:mga0rhwf78/ReadbasedAnalysis/nmdc_mga0rhwf78_kraken2_report.tsv", + "md5_checksum": "150bcf191dc41f8e53ebf05299d1889f", + "id": "nmdc:150bcf191dc41f8e53ebf05299d1889f", + "file_size_bytes": 513031 + }, + { + "name": "Gp0119855_Kraken2 Krona HTML report", + "description": "Kraken2 Krona HTML report for Gp0119855", + "data_object_type": "Kraken2 Krona Plot", + "url": "https://data.microbiomedata.org/data/nmdc:mga0rhwf78/ReadbasedAnalysis/nmdc_mga0rhwf78_kraken2_krona.html", + "md5_checksum": "56ebbe81ee6e491f2e97aa4793494038", + "id": "nmdc:56ebbe81ee6e491f2e97aa4793494038", + "file_size_bytes": 3293381 + }, + { + "name": "Gp0119855_Assembled contigs fasta", + "description": "Assembled contigs fasta for Gp0119855", + "data_object_type": "Assembly Contigs", + "url": "https://data.microbiomedata.org/data/nmdc:mga0rhwf78/assembly/nmdc_mga0rhwf78_contigs.fna", + "md5_checksum": "72d9b2da970226ddc4b29af3a80369f2", + "id": "nmdc:72d9b2da970226ddc4b29af3a80369f2", + "file_size_bytes": 30161250 + }, + { + "name": "Gp0119855_Assembled scaffold fasta", + "description": "Assembled scaffold fasta for Gp0119855", + "data_object_type": "Assembly Scaffolds", + "url": "https://data.microbiomedata.org/data/nmdc:mga0rhwf78/assembly/nmdc_mga0rhwf78_scaffolds.fna", + "md5_checksum": "86896d7fad67fa01144d51eb1c5f4194", + "id": "nmdc:86896d7fad67fa01144d51eb1c5f4194", + "file_size_bytes": 30107423 + }, + { + "name": "Gp0119855_Metagenome Contig Coverage Stats", + "description": "Metagenome Contig Coverage Stats for Gp0119855", + "url": "https://data.microbiomedata.org/data/nmdc:mga0rhwf78/assembly/nmdc_mga0rhwf78_covstats.txt", + "md5_checksum": "fef50041169407aebd79ac3db3a23247", + "id": "nmdc:fef50041169407aebd79ac3db3a23247", + "file_size_bytes": 1457822 + }, + { + "name": "Gp0119855_Assembled AGP file", + "description": "Assembled AGP file for Gp0119855", + "data_object_type": "Assembly AGP", + "url": "https://data.microbiomedata.org/data/nmdc:mga0rhwf78/assembly/nmdc_mga0rhwf78_assembly.agp", + "md5_checksum": "120c0ab8dd7f764030bf7a6a3b8f2f2a", + "id": "nmdc:120c0ab8dd7f764030bf7a6a3b8f2f2a", + "file_size_bytes": 1326552 + }, + { + "name": "Gp0119855_Metagenome Alignment BAM file", + "description": "Metagenome Alignment BAM file for Gp0119855", + "data_object_type": "Assembly Coverage BAM", + "url": "https://data.microbiomedata.org/data/nmdc:mga0rhwf78/assembly/nmdc_mga0rhwf78_pairedMapped_sorted.bam", + "md5_checksum": "1170daa2f3051db4fe87f70770b57ebc", + "id": "nmdc:1170daa2f3051db4fe87f70770b57ebc", + "file_size_bytes": 5367264111 + }, + { + "name": "Gp0119855_Protein FAA", + "description": "Protein FAA for Gp0119855", + "data_object_type": "Annotation Amino Acid FASTA", + "url": "https://data.microbiomedata.org/data/nmdc:mga0rhwf78/annotation/nmdc_mga0rhwf78_proteins.faa", + "md5_checksum": "eeccb048d24ed73e29c98a7f17435db2", + "id": "nmdc:eeccb048d24ed73e29c98a7f17435db2", + "file_size_bytes": 14022448 + }, + { + "name": "Gp0119855_Structural annotation GFF file", + "description": "Structural annotation GFF file for Gp0119855", + "data_object_type": "Structural Annotation GFF", + "url": "https://data.microbiomedata.org/data/nmdc:mga0rhwf78/annotation/nmdc_mga0rhwf78_structural_annotation.gff", + "md5_checksum": "7dd5a8ee8352b2992bdd4d82f4a6644a", + "id": "nmdc:7dd5a8ee8352b2992bdd4d82f4a6644a", + "file_size_bytes": 2515 + }, + { + "name": "Gp0119855_Functional annotation GFF file", + "description": "Functional annotation GFF file for Gp0119855", + "data_object_type": "Functional Annotation GFF", + "url": "https://data.microbiomedata.org/data/nmdc:mga0rhwf78/annotation/nmdc_mga0rhwf78_functional_annotation.gff", + "md5_checksum": "291e5bba1424916559850e836ac53750", + "id": "nmdc:291e5bba1424916559850e836ac53750", + "file_size_bytes": 12410261 + }, + { + "name": "Gp0119855_KO TSV file", + "description": "KO TSV file for Gp0119855", + "data_object_type": "Annotation KEGG Orthology", + "url": "https://data.microbiomedata.org/data/nmdc:mga0rhwf78/annotation/nmdc_mga0rhwf78_ko.tsv", + "md5_checksum": "f08e6505606ddadb256232c3509372d3", + "id": "nmdc:f08e6505606ddadb256232c3509372d3", + "file_size_bytes": 1912368 + }, + { + "name": "Gp0119855_EC TSV file", + "description": "EC TSV file for Gp0119855", + "data_object_type": "Annotation Enzyme Commission", + "url": "https://data.microbiomedata.org/data/nmdc:mga0rhwf78/annotation/nmdc_mga0rhwf78_ec.tsv", + "md5_checksum": "cf0bd3a64372722f017a2670ad7f4d64", + "id": "nmdc:cf0bd3a64372722f017a2670ad7f4d64", + "file_size_bytes": 1145426 + }, + { + "name": "Gp0119855_COG GFF file", + "description": "COG GFF file for Gp0119855", + "url": "https://data.microbiomedata.org/data/nmdc:mga0rhwf78/annotation/nmdc_mga0rhwf78_cog.gff", + "md5_checksum": "fc16c105e79bbd7456ccb7ba6a86c6b8", + "id": "nmdc:fc16c105e79bbd7456ccb7ba6a86c6b8", + "file_size_bytes": 8732348 + }, + { + "name": "Gp0119855_PFAM GFF file", + "description": "PFAM GFF file for Gp0119855", + "url": "https://data.microbiomedata.org/data/nmdc:mga0rhwf78/annotation/nmdc_mga0rhwf78_pfam.gff", + "md5_checksum": "5da5924c6a1dceea9174f1699f3234d9", + "id": "nmdc:5da5924c6a1dceea9174f1699f3234d9", + "file_size_bytes": 8691283 + }, + { + "name": "Gp0119855_TigrFam GFF file", + "description": "TigrFam GFF file for Gp0119855", + "url": "https://data.microbiomedata.org/data/nmdc:mga0rhwf78/annotation/nmdc_mga0rhwf78_tigrfam.gff", + "md5_checksum": "26b2f10cb1bb7991f3324291884d1fbf", + "id": "nmdc:26b2f10cb1bb7991f3324291884d1fbf", + "file_size_bytes": 1998502 + }, + { + "name": "Gp0119855_SMART GFF file", + "description": "SMART GFF file for Gp0119855", + "url": "https://data.microbiomedata.org/data/nmdc:mga0rhwf78/annotation/nmdc_mga0rhwf78_smart.gff", + "md5_checksum": "069d272754b0f51539f2e730293bd051", + "id": "nmdc:069d272754b0f51539f2e730293bd051", + "file_size_bytes": 2770788 + }, + { + "name": "Gp0119855_SuperFam GFF file", + "description": "SuperFam GFF file for Gp0119855", + "url": "https://data.microbiomedata.org/data/nmdc:mga0rhwf78/annotation/nmdc_mga0rhwf78_supfam.gff", + "md5_checksum": "35243540dd1e2d89352f8cfd5ab68382", + "id": "nmdc:35243540dd1e2d89352f8cfd5ab68382", + "file_size_bytes": 11739490 + }, + { + "name": "Gp0119855_Cath FunFam GFF file", + "description": "Cath FunFam GFF file for Gp0119855", + "url": "https://data.microbiomedata.org/data/nmdc:mga0rhwf78/annotation/nmdc_mga0rhwf78_cath_funfam.gff", + "md5_checksum": "c966f9f11d9212288d708bde36577f73", + "id": "nmdc:c966f9f11d9212288d708bde36577f73", + "file_size_bytes": 11204315 + }, + { + "name": "Gp0119855_KO_EC GFF file", + "description": "KO_EC GFF file for Gp0119855", + "url": "https://data.microbiomedata.org/data/nmdc:mga0rhwf78/annotation/nmdc_mga0rhwf78_ko_ec.gff", + "md5_checksum": "181267dd543332fa2105b68296b3aadb", + "id": "nmdc:181267dd543332fa2105b68296b3aadb", + "file_size_bytes": 6270034 + }, + { + "name": "gold:Gp0452679_metabat2 bin checkm quality assessment result", + "description": "metabat2 bin checkm quality assessment result for gold:Gp0452679", + "data_object_type": "CheckM Statistics", + "url": "https://data.microbiomedata.org/data/nmdc:mga0fsjy84/MAGs/nmdc_mga0fsjy84_checkm_qa.out", + "md5_checksum": "d41d8cd98f00b204e9800998ecf8427e", + "id": "nmdc:d41d8cd98f00b204e9800998ecf8427e", + "file_size_bytes": 0 + }, + { + "name": "Gp0119855_tooShort (< 3kb) filtered contigs fasta file by metaBat2", + "description": "tooShort (< 3kb) filtered contigs fasta file by metaBat2 for Gp0119855", + "url": "https://data.microbiomedata.org/data/nmdc:mga0rhwf78/MAGs/nmdc_mga0rhwf78_bins.tooShort.fa", + "md5_checksum": "f8833ab43df655c1973b44c8dea6b4f2", + "id": "nmdc:f8833ab43df655c1973b44c8dea6b4f2", + "file_size_bytes": 6549209 + }, + { + "name": "Gp0119855_unbinned fasta file from metabat2", + "description": "unbinned fasta file from metabat2 for Gp0119855", + "url": "https://data.microbiomedata.org/data/nmdc:mga0rhwf78/MAGs/nmdc_mga0rhwf78_bins.unbinned.fa", + "md5_checksum": "1a97539a761e951c70491d9e67d74a24", + "id": "nmdc:1a97539a761e951c70491d9e67d74a24", + "file_size_bytes": 9982448 + }, + { + "name": "Gp0119855_metabat2 bin checkm quality assessment result", + "description": "metabat2 bin checkm quality assessment result for Gp0119855", + "data_object_type": "CheckM Statistics", + "url": "https://data.microbiomedata.org/data/nmdc:mga0rhwf78/MAGs/nmdc_mga0rhwf78_checkm_qa.out", + "md5_checksum": "384c87eed849db5e3dfd46f7336d5f9b", + "id": "nmdc:384c87eed849db5e3dfd46f7336d5f9b", + "file_size_bytes": 1956 + }, + { + "name": "Gp0119855_high-quality and medium-quality bins", + "description": "high-quality and medium-quality bins for Gp0119855", + "data_object_type": "Metagenome Bins", + "url": "https://data.microbiomedata.org/data/nmdc:mga0rhwf78/MAGs/nmdc_mga0rhwf78_hqmq_bin.zip", + "md5_checksum": "1751bbe019e4710a6f42fe2e000ed576", + "id": "nmdc:1751bbe019e4710a6f42fe2e000ed576", + "file_size_bytes": 2626389 + }, + { + "name": "Gp0119855_metabat2 bins", + "description": "metabat2 bins for Gp0119855", + "url": "https://data.microbiomedata.org/data/nmdc:mga0rhwf78/MAGs/nmdc_mga0rhwf78_metabat_bin.zip", + "md5_checksum": "4126a0912ea6d0094209753e3a01bf4c", + "id": "nmdc:4126a0912ea6d0094209753e3a01bf4c", + "file_size_bytes": 1514744 + }, + { + "_id": { + "$oid": "649b003d1ae706d7b5b14de4" + }, + "description": "Assembled contigs fasta for gold:Gp0119855", + "url": "https://data.microbiomedata.org/data/1777_95824/assembly/assembly_contigs.fna", + "file_size_bytes": 30071075, + "type": "nmdc:DataObject", + "id": "nmdc:fb5ab52924b554184ac31308e4126b44", + "name": "assembly_contigs.fna", + "data_object_type": "Assembly Contigs" + }, + { + "_id": { + "$oid": "649b003d1ae706d7b5b14de5" + }, + "description": "Metagenome Contig Coverage Stats for gold:Gp0119855", + "url": "https://data.microbiomedata.org/data/1777_95824/assembly/mapping_stats.txt", + "file_size_bytes": 1367647, + "type": "nmdc:DataObject", + "id": "nmdc:5b9b8f8ea55b1c8cf9a143725a56bc62", + "name": "mapping_stats.txt", + "data_object_type": "Assembly Coverage Stats" + }, + { + "_id": { + "$oid": "649b003d1ae706d7b5b14de6" + }, + "description": "Assembled scaffold fasta for gold:Gp0119855", + "url": "https://data.microbiomedata.org/data/1777_95824/assembly/assembly_scaffolds.fna", + "file_size_bytes": 30018568, + "type": "nmdc:DataObject", + "id": "nmdc:76eddeda38dc154736510ca352ce7770", + "name": "assembly_scaffolds.fna", + "data_object_type": "Assembly Scaffolds" + }, + { + "_id": { + "$oid": "649b003d1ae706d7b5b14de7" + }, + "description": "Assembled AGP file for gold:Gp0119855", + "url": "https://data.microbiomedata.org/data/1777_95824/assembly/assembly.agp", + "file_size_bytes": 1143442, + "type": "nmdc:DataObject", + "id": "nmdc:f6d9798694aafc92799757f523143382", + "name": "assembly.agp", + "data_object_type": "Assembly AGP" + }, + { + "_id": { + "$oid": "649b003d1ae706d7b5b14df5" + }, + "description": "Metagenome Alignment BAM file for gold:Gp0119855", + "url": "https://data.microbiomedata.org/data/1777_95824/assembly/pairedMapped_sorted.bam", + "file_size_bytes": 5325085543, + "type": "nmdc:DataObject", + "id": "nmdc:2b2bed2fa832e8c66ec6c496101fa58b", + "name": "pairedMapped_sorted.bam", + "data_object_type": "Assembly Coverage BAM" + }, + { + "_id": { + "$oid": "649b003d1ae706d7b5b15a77" + }, + "id": "nmdc:d662610281b72bedc42f9c8c0102493b", + "name": "1777_95824.krona.html", + "description": "Gold:Gp0119855 KRONA plot HTML file", + "url": "https://data.microbiomedata.org/1777_95824/ReadbasedAnalysis/centrifuge/1777_95824.krona.html", + "file_size_bytes": 3385, + "data_object_type": "Centrifuge Krona Plot", + "type": "nmdc:DataObject" + }, + { + "_id": { + "$oid": "649b003d1ae706d7b5b15a82" + }, + "id": "nmdc:16867e01f6307cde3298f40d4b5f5000", + "name": "1777_95824.json", + "description": "Gold:Gp0119855 ReadbasedAnalysis result JSON file", + "url": "https://data.microbiomedata.org/1777_95824/ReadbasedAnalysis/1777_95824.json", + "file_size_bytes": 3385, + "type": "nmdc:DataObject" + }, + { + "_id": { + "$oid": "649b003f1ae706d7b5b1634c" + }, + "id": "nmdc:0dc2a823d9c967a7d7fdd7332e5a0920", + "name": "bins.unbinned.fa", + "description": "unbinned fasta file from metabat2 for gold:Gp0119855", + "file_size_bytes": 17028645, + "url": "https://data.microbiomedata.org/data/1777_95824/img_MAGs/bins.unbinned.fa", + "type": "nmdc:DataObject" + }, + { + "_id": { + "$oid": "649b003f1ae706d7b5b1634d" + }, + "id": "nmdc:e176a953c50e23c485c9469b1f82bc08", + "name": "bins.tooShort.fa", + "description": "tooShort (< 3kb) filtered contigs fasta file by metaBat2 for gold:Gp0119855", + "file_size_bytes": 6384931, + "url": "https://data.microbiomedata.org/data/1777_95824/img_MAGs/bins.tooShort.fa", + "type": "nmdc:DataObject" + }, + { + "_id": { + "$oid": "649b003f1ae706d7b5b1634e" + }, + "id": "nmdc:c80480946846739010ec328825363e1f", + "name": "checkm_qa.out", + "description": "metabat2 bin checkm quality assessment result for gold:Gp0119855", + "file_size_bytes": 3634, + "url": "https://data.microbiomedata.org/data/1777_95824/img_MAGs/checkm_qa.out", + "type": "nmdc:DataObject", + "data_object_type": "CheckM Statistics" + }, + { + "_id": { + "$oid": "649b003f1ae706d7b5b1634f" + }, + "id": "nmdc:306b9bb89641274f696040599f193fc5", + "name": "gtdbtk.bac120.summary.tsv", + "description": "gtdbtk bacterial assignment result summary table for gold:Gp0119855", + "file_size_bytes": 1352, + "url": "https://data.microbiomedata.org/data/1777_95824/img_MAGs/gtdbtk_output/classify/gtdbtk.bac120.summary.tsv", + "type": "nmdc:DataObject" + }, + { + "_id": { + "$oid": "649b003f1ae706d7b5b16350" + }, + "id": "nmdc:a00d6fc9094ede64edf29f74f66ca0a6", + "name": "gold:Gp0119855.bins.13.fa", + "description": "metabat2 binned contig file for gold:Gp0119855", + "file_size_bytes": 669397, + "url": "https://data.microbiomedata.org/data/1777_95824/img_MAGs/metabat-bins/bins.13.fa", + "type": "nmdc:DataObject", + "data_object_type": "Metagenome Bins" + }, + { + "_id": { + "$oid": "649b003f1ae706d7b5b16351" + }, + "id": "nmdc:10379f6ddb2076e30dfebfbaa398e85d", + "name": "gold:Gp0119855.bins.9.fa", + "description": "metabat2 binned contig file for gold:Gp0119855", + "file_size_bytes": 638415, + "url": "https://data.microbiomedata.org/data/1777_95824/img_MAGs/metabat-bins/bins.9.fa", + "type": "nmdc:DataObject", + "data_object_type": "Metagenome Bins" + }, + { + "_id": { + "$oid": "649b003f1ae706d7b5b16352" + }, + "id": "nmdc:6cfc5731c5517b917a760cd492cc2b9d", + "name": "gold:Gp0119855.bins.18.fa", + "description": "metabat2 binned contig file for gold:Gp0119855", + "file_size_bytes": 331997, + "url": "https://data.microbiomedata.org/data/1777_95824/img_MAGs/metabat-bins/bins.18.fa", + "type": "nmdc:DataObject", + "data_object_type": "Metagenome Bins" + }, + { + "_id": { + "$oid": "649b003f1ae706d7b5b16353" + }, + "id": "nmdc:6dc879bdc58ecf8d502c31eb1568c4fa", + "name": "gold:Gp0119855.bins.5.fa", + "description": "metabat2 binned contig file for gold:Gp0119855", + "file_size_bytes": 299156, + "url": "https://data.microbiomedata.org/data/1777_95824/img_MAGs/metabat-bins/bins.5.fa", + "type": "nmdc:DataObject", + "data_object_type": "Metagenome Bins" + }, + { + "_id": { + "$oid": "649b003f1ae706d7b5b16354" + }, + "id": "nmdc:4e49578765714e382a104b08f08e45aa", + "name": "gold:Gp0119855.bins.19.fa", + "description": "metabat2 binned contig file for gold:Gp0119855", + "file_size_bytes": 227707, + "url": "https://data.microbiomedata.org/data/1777_95824/img_MAGs/metabat-bins/bins.19.fa", + "type": "nmdc:DataObject", + "data_object_type": "Metagenome Bins" + }, + { + "_id": { + "$oid": "649b003f1ae706d7b5b16355" + }, + "id": "nmdc:8ea3c3d92e3dd4c0101be580471e4df5", + "name": "gold:Gp0119855.bins.2.fa", + "description": "metabat2 binned contig file for gold:Gp0119855", + "file_size_bytes": 206899, + "url": "https://data.microbiomedata.org/data/1777_95824/img_MAGs/metabat-bins/bins.2.fa", + "type": "nmdc:DataObject", + "data_object_type": "Metagenome Bins" + }, + { + "_id": { + "$oid": "649b003f1ae706d7b5b16356" + }, + "id": "nmdc:184f4a756b1c02eb2eb5c30324b22ba8", + "name": "gold:Gp0119855.bins.11.fa", + "description": "metabat2 binned contig file for gold:Gp0119855", + "file_size_bytes": 345412, + "url": "https://data.microbiomedata.org/data/1777_95824/img_MAGs/metabat-bins/bins.11.fa", + "type": "nmdc:DataObject", + "data_object_type": "Metagenome Bins" + }, + { + "_id": { + "$oid": "649b003f1ae706d7b5b16357" + }, + "id": "nmdc:7a30228d3e15f98645ad71a3d9820c12", + "name": "gold:Gp0119855.bins.3.fa", + "description": "metabat2 binned contig file for gold:Gp0119855", + "file_size_bytes": 271735, + "url": "https://data.microbiomedata.org/data/1777_95824/img_MAGs/metabat-bins/bins.3.fa", + "type": "nmdc:DataObject", + "data_object_type": "Metagenome Bins" + }, + { + "_id": { + "$oid": "649b003f1ae706d7b5b16358" + }, + "id": "nmdc:f1f14e477fd46f7f4cc6cac361ed37de", + "name": "gold:Gp0119855.bins.4.fa", + "description": "metabat2 binned contig file for gold:Gp0119855", + "file_size_bytes": 493308, + "url": "https://data.microbiomedata.org/data/1777_95824/img_MAGs/metabat-bins/bins.4.fa", + "type": "nmdc:DataObject", + "data_object_type": "Metagenome Bins" + }, + { + "_id": { + "$oid": "649b003f1ae706d7b5b16359" + }, + "id": "nmdc:3ed850e21f5f5196bab4680153ade5d8", + "name": "gold:Gp0119855.bins.10.fa", + "description": "metabat2 binned contig file for gold:Gp0119855", + "file_size_bytes": 410925, + "url": "https://data.microbiomedata.org/data/1777_95824/img_MAGs/metabat-bins/bins.10.fa", + "type": "nmdc:DataObject", + "data_object_type": "Metagenome Bins" + }, + { + "_id": { + "$oid": "649b003f1ae706d7b5b1635a" + }, + "id": "nmdc:2a6ab89514eaffac5661cf694f383275", + "name": "gold:Gp0119855.bins.1.fa", + "description": "metabat2 binned contig file for gold:Gp0119855", + "file_size_bytes": 375008, + "url": "https://data.microbiomedata.org/data/1777_95824/img_MAGs/metabat-bins/bins.1.fa", + "type": "nmdc:DataObject", + "data_object_type": "Metagenome Bins" + }, + { + "_id": { + "$oid": "649b003f1ae706d7b5b1635b" + }, + "id": "nmdc:fcc00bc801e6093ebff250a2daa95eba", + "name": "gold:Gp0119855.bins.12.fa", + "description": "metabat2 binned contig file for gold:Gp0119855", + "file_size_bytes": 297288, + "url": "https://data.microbiomedata.org/data/1777_95824/img_MAGs/metabat-bins/bins.12.fa", + "type": "nmdc:DataObject", + "data_object_type": "Metagenome Bins" + }, + { + "_id": { + "$oid": "649b003f1ae706d7b5b1635c" + }, + "id": "nmdc:3996111d96fb39b003982149b80ea21b", + "name": "gold:Gp0119855.bins.16.fa", + "description": "metabat2 binned contig file for gold:Gp0119855", + "file_size_bytes": 250862, + "url": "https://data.microbiomedata.org/data/1777_95824/img_MAGs/metabat-bins/bins.16.fa", + "type": "nmdc:DataObject", + "data_object_type": "Metagenome Bins" + }, + { + "_id": { + "$oid": "649b003f1ae706d7b5b1635d" + }, + "id": "nmdc:3784a22638f3516e3f2ab5c233c683d5", + "name": "gold:Gp0119855.bins.14.fa", + "description": "metabat2 binned contig file for gold:Gp0119855", + "file_size_bytes": 423592, + "url": "https://data.microbiomedata.org/data/1777_95824/img_MAGs/metabat-bins/bins.14.fa", + "type": "nmdc:DataObject", + "data_object_type": "Metagenome Bins" + }, + { + "_id": { + "$oid": "649b003f1ae706d7b5b1635e" + }, + "id": "nmdc:3673776bf3dc6b2d8c77ca77cee61379", + "name": "gold:Gp0119855.bins.8.fa", + "description": "metabat2 binned contig file for gold:Gp0119855", + "file_size_bytes": 256694, + "url": "https://data.microbiomedata.org/data/1777_95824/img_MAGs/metabat-bins/bins.8.fa", + "type": "nmdc:DataObject", + "data_object_type": "Metagenome Bins" + }, + { + "_id": { + "$oid": "649b003f1ae706d7b5b1635f" + }, + "id": "nmdc:e69371ff7c292defbde05121fb914877", + "name": "gold:Gp0119855.bins.17.fa", + "description": "metabat2 binned contig file for gold:Gp0119855", + "file_size_bytes": 267895, + "url": "https://data.microbiomedata.org/data/1777_95824/img_MAGs/metabat-bins/bins.17.fa", + "type": "nmdc:DataObject", + "data_object_type": "Metagenome Bins" + }, + { + "_id": { + "$oid": "649b003f1ae706d7b5b16360" + }, + "id": "nmdc:c8cd29ca6f08c087243ff7d5a6734a89", + "name": "gold:Gp0119855.bins.15.fa", + "description": "metabat2 binned contig file for gold:Gp0119855", + "file_size_bytes": 240352, + "url": "https://data.microbiomedata.org/data/1777_95824/img_MAGs/metabat-bins/bins.15.fa", + "type": "nmdc:DataObject", + "data_object_type": "Metagenome Bins" + }, + { + "_id": { + "$oid": "649b003f1ae706d7b5b16361" + }, + "id": "nmdc:0419ae3c5c2772ab8b90347f1aeedd24", + "name": "gold:Gp0119855.bins.7.fa", + "description": "metabat2 binned contig file for gold:Gp0119855", + "file_size_bytes": 294252, + "url": "https://data.microbiomedata.org/data/1777_95824/img_MAGs/metabat-bins/bins.7.fa", + "type": "nmdc:DataObject", + "data_object_type": "Metagenome Bins" + }, + { + "_id": { + "$oid": "649b003f1ae706d7b5b16362" + }, + "id": "nmdc:95b0bbdcba77ee385321289ab22e4a2b", + "name": "gold:Gp0119855.bins.6.fa", + "description": "metabat2 binned contig file for gold:Gp0119855", + "file_size_bytes": 223799, + "url": "https://data.microbiomedata.org/data/1777_95824/img_MAGs/metabat-bins/bins.6.fa", + "type": "nmdc:DataObject", + "data_object_type": "Metagenome Bins" + }, + { + "_id": { + "$oid": "649b003f1ae706d7b5b16c64" + }, + "description": "Protein FAA for gold:Gp0119855", + "url": "https://data.microbiomedata.org/1777_95824/img_annotation/Ga0482160_proteins.faa", + "md5_checksum": "bb8a58ce8c8850f95dc84aa49a7a96f7", + "file_size_bytes": 3385, + "id": "nmdc:bb8a58ce8c8850f95dc84aa49a7a96f7", + "name": "gold:Gp0119855_Protein FAA", + "data_object_type": "Annotation Amino Acid FASTA", + "type": "nmdc:DataObject" + }, + { + "_id": { + "$oid": "649b003f1ae706d7b5b16c66" + }, + "description": "Functional annotation GFF file for gold:Gp0119855", + "url": "https://data.microbiomedata.org/1777_95824/img_annotation/Ga0482160_functional_annotation.gff", + "md5_checksum": "7a953d15a0f368c71cb365c2dd888a83", + "file_size_bytes": 3385, + "id": "nmdc:7a953d15a0f368c71cb365c2dd888a83", + "name": "gold:Gp0119855_Functional annotation GFF file", + "data_object_type": "Functional Annotation GFF", + "type": "nmdc:DataObject" + }, + { + "_id": { + "$oid": "649b003f1ae706d7b5b16c67" + }, + "description": "Structural annotation GFF file for gold:Gp0119855", + "url": "https://data.microbiomedata.org/1777_95824/img_annotation/Ga0482160_structural_annotation.gff", + "md5_checksum": "3a05f63e1899f82d11d931adfe69d86a", + "file_size_bytes": 3385, + "id": "nmdc:3a05f63e1899f82d11d931adfe69d86a", + "name": "gold:Gp0119855_Structural annotation GFF file", + "data_object_type": "Structural Annotation GFF", + "type": "nmdc:DataObject" + }, + { + "_id": { + "$oid": "649b003f1ae706d7b5b16c71" + }, + "description": "EC TSV File for gold:Gp0119855", + "url": "https://data.microbiomedata.org/1777_95824/img_annotation/Ga0482160_ec.tsv", + "md5_checksum": "465d9547f4c5d5aa4d341555f5769d9a", + "file_size_bytes": 3385, + "id": "nmdc:465d9547f4c5d5aa4d341555f5769d9a", + "name": "gold:Gp0119855_EC TSV File", + "data_object_type": "Annotation Enzyme Commission", + "type": "nmdc:DataObject" + }, + { + "_id": { + "$oid": "649b003f1ae706d7b5b16c74" + }, + "description": "KO TSV File for gold:Gp0119855", + "url": "https://data.microbiomedata.org/1777_95824/img_annotation/Ga0482160_ko.tsv", + "md5_checksum": "34a4bbe14e8867d45e95e205738b6d3f", + "file_size_bytes": 3385, + "id": "nmdc:34a4bbe14e8867d45e95e205738b6d3f", + "name": "gold:Gp0119855_KO TSV File", + "data_object_type": "Annotation KEGG Orthology", + "type": "nmdc:DataObject" + } + ], + "mags_activity_set": [ + { + "_id": { + "$oid": "649b0052ec087f6bbab346f4" + }, + "has_input": [ + "nmdc:72d9b2da970226ddc4b29af3a80369f2", + "nmdc:1170daa2f3051db4fe87f70770b57ebc", + "nmdc:291e5bba1424916559850e836ac53750" + ], + "too_short_contig_num": 13043, + "part_of": [ + "nmdc:mga0rhwf78" + ], + "binned_contig_num": 907, + "git_url": "https://github.com/microbiomedata/metaG/releases/tag/0.1", + "has_output": [ + "nmdc:d41d8cd98f00b204e9800998ecf8427e", + "nmdc:f8833ab43df655c1973b44c8dea6b4f2", + "nmdc:1a97539a761e951c70491d9e67d74a24", + "nmdc:384c87eed849db5e3dfd46f7336d5f9b", + "nmdc:1751bbe019e4710a6f42fe2e000ed576", + "nmdc:4126a0912ea6d0094209753e3a01bf4c" + ], + "was_informed_by": "gold:Gp0119855", + "input_contig_num": 18035, + "id": "nmdc:3242b01ba1155fcbdf2f140ce4439b41", + "execution_resource": "NERSC-Cori", + "name": "MAGs Analysis Activity for nmdc:mga0rhwf78", + "mags_list": [ + { + "number_of_contig": 79, + "completeness": 43.08, + "bin_name": "bins.1", + "gene_count": 684, + "bin_quality": "LQ", + "gtdbtk_species": "", + "gtdbtk_order": "", + "num_16s": 0, + "gtdbtk_family": "", + "gtdbtk_domain": "", + "contamination": 0.65, + "gtdbtk_class": "", + "gtdbtk_phylum": "", + "num_5s": 0, + "num_23s": 0, + "gtdbtk_genus": "", + "num_t_rna": 11 + }, + { + "number_of_contig": 29, + "completeness": 95.54, + "bin_name": "bins.2", + "gene_count": 1884, + "bin_quality": "HQ", + "gtdbtk_species": "Thermotoga petrophila", + "gtdbtk_order": "Thermotogales", + "num_16s": 1, + "gtdbtk_family": "Thermotogaceae", + "gtdbtk_domain": "Bacteria", + "contamination": 1.79, + "gtdbtk_class": "Thermotogae", + "gtdbtk_phylum": "Thermotogota", + "num_5s": 1, + "num_23s": 1, + "gtdbtk_genus": "Thermotoga", + "num_t_rna": 44 + }, + { + "number_of_contig": 79, + "completeness": 87.87, + "bin_name": "bins.3", + "gene_count": 1936, + "bin_quality": "MQ", + "gtdbtk_species": "Thermococcus_A sp000430485", + "gtdbtk_order": "Thermococcales", + "num_16s": 0, + "gtdbtk_family": "Thermococcaceae", + "gtdbtk_domain": "Archaea", + "contamination": 0.0, + "gtdbtk_class": "Thermococci", + "gtdbtk_phylum": "Euryarchaeota", + "num_5s": 1, + "num_23s": 0, + "gtdbtk_genus": "Thermococcus_A", + "num_t_rna": 30 + }, + { + "number_of_contig": 70, + "completeness": 79.31, + "bin_name": "bins.4", + "gene_count": 2872, + "bin_quality": "MQ", + "gtdbtk_species": "Halanaerobium sp003070825", + "gtdbtk_order": "Halanaerobiales", + "num_16s": 0, + "gtdbtk_family": "Halanaerobiaceae", + "gtdbtk_domain": "Bacteria", + "contamination": 3.45, + "gtdbtk_class": "Halanaerobiia", + "gtdbtk_phylum": "Firmicutes_F", + "num_5s": 4, + "num_23s": 0, + "gtdbtk_genus": "Halanaerobium", + "num_t_rna": 53 + }, + { + "number_of_contig": 220, + "completeness": 89.71, + "bin_name": "bins.5", + "gene_count": 2432, + "bin_quality": "MQ", + "gtdbtk_species": "Halanaerobium congolense", + "gtdbtk_order": "Halanaerobiales", + "num_16s": 0, + "gtdbtk_family": "Halanaerobiaceae", + "gtdbtk_domain": "Bacteria", + "contamination": 4.42, + "gtdbtk_class": "Halanaerobiia", + "gtdbtk_phylum": "Firmicutes_F", + "num_5s": 2, + "num_23s": 0, + "gtdbtk_genus": "Halanaerobium", + "num_t_rna": 19 + }, + { + "number_of_contig": 367, + "completeness": 97.05, + "bin_name": "bins.6", + "gene_count": 3950, + "bin_quality": "LQ", + "gtdbtk_species": "", + "gtdbtk_order": "", + "num_16s": 2, + "gtdbtk_family": "", + "gtdbtk_domain": "", + "contamination": 88.58, + "gtdbtk_class": "", + "gtdbtk_phylum": "", + "num_5s": 2, + "num_23s": 0, + "gtdbtk_genus": "", + "num_t_rna": 56 + }, + { + "number_of_contig": 37, + "completeness": 0.0, + "bin_name": "bins.7", + "gene_count": 299, + "bin_quality": "LQ", + "gtdbtk_species": "", + "gtdbtk_order": "", + "num_16s": 0, + "gtdbtk_family": "", + "gtdbtk_domain": "", + "contamination": 0.0, + "gtdbtk_class": "", + "gtdbtk_phylum": "", + "num_5s": 0, + "num_23s": 0, + "gtdbtk_genus": "", + "num_t_rna": 0 + }, + { + "number_of_contig": 26, + "completeness": 0.0, + "bin_name": "bins.8", + "gene_count": 238, + "bin_quality": "LQ", + "gtdbtk_species": "", + "gtdbtk_order": "", + "num_16s": 0, + "gtdbtk_family": "", + "gtdbtk_domain": "", + "contamination": 0.0, + "gtdbtk_class": "", + "gtdbtk_phylum": "", + "num_5s": 0, + "num_23s": 0, + "gtdbtk_genus": "", + "num_t_rna": 2 + } + ], + "unbinned_contig_num": 4085, + "started_at_time": "2021-10-11T02:23:28Z", + "type": "nmdc:MAGsAnalysisActivity", + "ended_at_time": "2021-10-11T04:55:32+00:00" + } + ], + "metagenome_annotation_activity_set": [ + { + "_id": { + "$oid": "649b005bbf2caae0415ef990" + }, + "has_input": [ + "nmdc:72d9b2da970226ddc4b29af3a80369f2" + ], + "part_of": [ + "nmdc:mga0rhwf78" + ], + "git_url": "https://github.com/microbiomedata/metaG/releases/tag/0.1", + "has_output": [ + "nmdc:eeccb048d24ed73e29c98a7f17435db2", + "nmdc:7dd5a8ee8352b2992bdd4d82f4a6644a", + "nmdc:291e5bba1424916559850e836ac53750", + "nmdc:f08e6505606ddadb256232c3509372d3", + "nmdc:cf0bd3a64372722f017a2670ad7f4d64", + "nmdc:fc16c105e79bbd7456ccb7ba6a86c6b8", + "nmdc:5da5924c6a1dceea9174f1699f3234d9", + "nmdc:26b2f10cb1bb7991f3324291884d1fbf", + "nmdc:069d272754b0f51539f2e730293bd051", + "nmdc:35243540dd1e2d89352f8cfd5ab68382", + "nmdc:c966f9f11d9212288d708bde36577f73", + "nmdc:181267dd543332fa2105b68296b3aadb" + ], + "was_informed_by": "gold:Gp0119855", + "id": "nmdc:3242b01ba1155fcbdf2f140ce4439b41", + "execution_resource": "NERSC-Cori", + "name": "Annotation Activity for nmdc:mga0rhwf78", + "started_at_time": "2021-10-11T02:23:28Z", + "type": "nmdc:MetagenomeAnnotationActivity", + "ended_at_time": "2021-10-11T04:55:32+00:00" + } + ], + "metagenome_assembly_set": [ + { + "_id": { + "$oid": "649b005f2ca5ee4adb139f80" + }, + "has_input": [ + "nmdc:c4c25a3b4cb43f6829a9e91ebf38d46d" + ], + "part_of": [ + "nmdc:mga0rhwf78" + ], + "ctg_logsum": 308922, + "scaf_logsum": 311415, + "gap_pct": 0.02038, + "git_url": "https://github.com/microbiomedata/metaG/releases/tag/0.1", + "has_output": [ + "nmdc:72d9b2da970226ddc4b29af3a80369f2", + "nmdc:86896d7fad67fa01144d51eb1c5f4194", + "nmdc:fef50041169407aebd79ac3db3a23247", + "nmdc:120c0ab8dd7f764030bf7a6a3b8f2f2a", + "nmdc:1170daa2f3051db4fe87f70770b57ebc" + ], + "asm_score": 21.612, + "was_informed_by": "gold:Gp0119855", + "ctg_powsum": 44670, + "scaf_max": 271279, + "id": "nmdc:3242b01ba1155fcbdf2f140ce4439b41", + "scaf_powsum": 45337, + "execution_resource": "NERSC-Cori", + "contigs": 18041, + "name": "Assembly Activity for nmdc:mga0rhwf78", + "ctg_max": 271279, + "gc_std": 0.06544, + "contig_bp": 29138452, + "gc_avg": 0.37753, + "started_at_time": "2021-10-11T02:23:28Z", + "scaf_bp": 29144392, + "type": "nmdc:MetagenomeAssembly", + "scaffolds": 17771, + "ended_at_time": "2021-10-11T04:55:32+00:00", + "ctg_l50": 5444, + "ctg_l90": 488, + "ctg_n50": 848, + "ctg_n90": 9551, + "scaf_l50": 5777, + "scaf_l90": 494, + "scaf_n50": 809, + "scaf_n90": 9322, + "scaf_l_gt50k": 4038176, + "scaf_n_gt50k": 43, + "scaf_pct_gt50k": 13.855757 + } + ], + "omics_processing_set": [ + { + "_id": { + "$oid": "649b009773e8249959349b6a" + }, + "id": "nmdc:omprc-11-pmgah004", + "name": "Deep subsurface shale carbon reservoir microbial communities from Ohio, USA - Utica-2 Time Series 2014_10_22", + "description": "Microbial controls on biogeochemical cycling in deep subsurface shale carbon reservoirs", + "has_input": [ + "nmdc:bsm-11-rmxfpb74" + ], + "has_output": [ + "jgi:560df5ac0d878540fd6fe1eb" + ], + "part_of": [ + "nmdc:sty-11-8fb6t785" + ], + "add_date": "2015-08-15", + "mod_date": "2020-04-05", + "ncbi_project_name": "Deep subsurface shale carbon reservoir microbial communities from Ohio, USA - Utica-2 Time Series 2014_10_22", + "omics_type": { + "has_raw_value": "Metagenome" + }, + "principal_investigator": { + "has_raw_value": "Kelly Wrighton" + }, + "processing_institution": "JGI", + "type": "nmdc:OmicsProcessing", + "gold_sequencing_project_identifiers": [ + "gold:Gp0119855" + ] + } + ], + "read_qc_analysis_activity_set": [ + { + "_id": { + "$oid": "649b009d6bdd4fd20273c84d" + }, + "has_input": [ + "nmdc:a51c8d51e453f237c19e3e4a70157eac" + ], + "part_of": [ + "nmdc:mga0rhwf78" + ], + "git_url": "https://github.com/microbiomedata/metaG/releases/tag/0.1", + "has_output": [ + "nmdc:c4c25a3b4cb43f6829a9e91ebf38d46d", + "nmdc:edda4c2c71997011ec13230a36955fb7" + ], + "was_informed_by": "gold:Gp0119855", + "input_read_count": 74893374, + "output_read_bases": 10916291909, + "id": "nmdc:3242b01ba1155fcbdf2f140ce4439b41", + "execution_resource": "NERSC-Cori", + "input_read_bases": 11308899474, + "name": "Read QC Activity for nmdc:mga0rhwf78", + "output_read_count": 74414458, + "started_at_time": "2021-10-11T02:23:28Z", + "type": "nmdc:ReadQCAnalysisActivity", + "ended_at_time": "2021-10-11T04:55:32+00:00" + } + ], + "read_based_taxonomy_analysis_activity_set": [ + { + "_id": { + "$oid": "649b009bff710ae353f8cf0e" + }, + "has_input": [ + "nmdc:c4c25a3b4cb43f6829a9e91ebf38d46d" + ], + "git_url": "https://github.com/microbiomedata/metaG/releases/tag/0.1", + "has_output": [ + "nmdc:3ddc464c63e36738e0aa69b472a06d85", + "nmdc:0b0a32234f0f24150414912dde84d517", + "nmdc:7048fd73e300305a69e150993b2b2199", + "nmdc:24055da71c4fa3cccbd40334b18ad3f8", + "nmdc:a3e2201278b2290c0bf5a3124c60b2f5", + "nmdc:bebb860b29a80c50e8b8d77d09267d3e", + "nmdc:7640dd6bc8fd2ab4ac8db0f223c811db", + "nmdc:150bcf191dc41f8e53ebf05299d1889f", + "nmdc:56ebbe81ee6e491f2e97aa4793494038" + ], + "was_informed_by": "gold:Gp0119855", + "id": "nmdc:3242b01ba1155fcbdf2f140ce4439b41", + "execution_resource": "NERSC-Cori", + "name": "ReadBased Analysis Activity for nmdc:mga0rhwf78", + "started_at_time": "2021-10-11T02:23:28Z", + "type": "nmdc:ReadbasedAnalysis", + "ended_at_time": "2021-10-11T04:55:32+00:00" + } + ] + }, + { + "data_object_set": [ + { + "description": "Raw sequencer read data", + "file_size_bytes": 6299980526, + "type": "nmdc:DataObject", + "id": "jgi:560df5af0d878540fd6fe1f0", + "name": "9567.7.137562.GGACTCC-GCGTAAG.fastq.gz" + }, + { + "name": "Gp0119857_Filtered Reads", + "description": "Filtered Reads for Gp0119857", + "data_object_type": "Filtered Sequencing Reads", + "url": "https://data.microbiomedata.org/data/nmdc:mga08dbn21/qa/nmdc_mga08dbn21_filtered.fastq.gz", + "md5_checksum": "6af614f4bf666a67a271811f292a4fd0", + "id": "nmdc:6af614f4bf666a67a271811f292a4fd0", + "file_size_bytes": 3425921251 + }, + { + "name": "Gp0119857_Filtered Stats", + "description": "Filtered Stats for Gp0119857", + "data_object_type": "QC Statistics", + "url": "https://data.microbiomedata.org/data/nmdc:mga08dbn21/qa/nmdc_mga08dbn21_filterStats.txt", + "md5_checksum": "52d7e275243ea438d59dfdeb927c2b69", + "id": "nmdc:52d7e275243ea438d59dfdeb927c2b69", + "file_size_bytes": 283 + }, + { + "name": "Gp0119857_Gottcha2 TSV report", + "description": "Gottcha2 TSV report for Gp0119857", + "url": "https://data.microbiomedata.org/data/nmdc:mga08dbn21/ReadbasedAnalysis/nmdc_mga08dbn21_gottcha2_report.tsv", + "md5_checksum": "342a8c1e5ca14ad5608c5cb61e7b6b15", + "id": "nmdc:342a8c1e5ca14ad5608c5cb61e7b6b15", + "file_size_bytes": 7622 + }, + { + "name": "Gp0119857_Gottcha2 full TSV report", + "description": "Gottcha2 full TSV report for Gp0119857", + "url": "https://data.microbiomedata.org/data/nmdc:mga08dbn21/ReadbasedAnalysis/nmdc_mga08dbn21_gottcha2_report_full.tsv", + "md5_checksum": "7505a7554466ccb7f62a11720635b363", + "id": "nmdc:7505a7554466ccb7f62a11720635b363", + "file_size_bytes": 465802 + }, + { + "name": "Gp0119857_Gottcha2 Krona HTML report", + "description": "Gottcha2 Krona HTML report for Gp0119857", + "data_object_type": "GOTTCHA2 Krona Plot", + "url": "https://data.microbiomedata.org/data/nmdc:mga08dbn21/ReadbasedAnalysis/nmdc_mga08dbn21_gottcha2_krona.html", + "md5_checksum": "83a5b54e9f7315cebba2a9a69460f300", + "id": "nmdc:83a5b54e9f7315cebba2a9a69460f300", + "file_size_bytes": 250618 + }, + { + "name": "Gp0119857_Centrifuge classification TSV report", + "description": "Centrifuge classification TSV report for Gp0119857", + "data_object_type": "Centrifuge Taxonomic Classification", + "url": "https://data.microbiomedata.org/data/nmdc:mga08dbn21/ReadbasedAnalysis/nmdc_mga08dbn21_centrifuge_classification.tsv", + "md5_checksum": "51995b5bbe53c70d7d7ad1a75a815a04", + "id": "nmdc:51995b5bbe53c70d7d7ad1a75a815a04", + "file_size_bytes": 4976604747 + }, + { + "name": "Gp0119857_Centrifuge TSV report", + "description": "Centrifuge TSV report for Gp0119857", + "data_object_type": "Centrifuge Classification Report", + "url": "https://data.microbiomedata.org/data/nmdc:mga08dbn21/ReadbasedAnalysis/nmdc_mga08dbn21_centrifuge_report.tsv", + "md5_checksum": "7738f32c03f01f0a929ec1c572574aac", + "id": "nmdc:7738f32c03f01f0a929ec1c572574aac", + "file_size_bytes": 232284 + }, + { + "name": "Gp0119857_Centrifuge Krona HTML report", + "description": "Centrifuge Krona HTML report for Gp0119857", + "data_object_type": "Centrifuge Krona Plot", + "url": "https://data.microbiomedata.org/data/nmdc:mga08dbn21/ReadbasedAnalysis/nmdc_mga08dbn21_centrifuge_krona.html", + "md5_checksum": "6d5107e3c88e4beafe7d54b927f95013", + "id": "nmdc:6d5107e3c88e4beafe7d54b927f95013", + "file_size_bytes": 2211387 + }, + { + "name": "Gp0119857_Kraken classification TSV report", + "description": "Kraken classification TSV report for Gp0119857", + "data_object_type": "Kraken2 Taxonomic Classification", + "url": "https://data.microbiomedata.org/data/nmdc:mga08dbn21/ReadbasedAnalysis/nmdc_mga08dbn21_kraken2_classification.tsv", + "md5_checksum": "ec26120e4cf82af3ec7e9bb65f254553", + "id": "nmdc:ec26120e4cf82af3ec7e9bb65f254553", + "file_size_bytes": 3864484659 + }, + { + "name": "Gp0119857_Kraken2 TSV report", + "description": "Kraken2 TSV report for Gp0119857", + "data_object_type": "Kraken2 Classification Report", + "url": "https://data.microbiomedata.org/data/nmdc:mga08dbn21/ReadbasedAnalysis/nmdc_mga08dbn21_kraken2_report.tsv", + "md5_checksum": "21a4a81d59984113dfa7bf4d4194252d", + "id": "nmdc:21a4a81d59984113dfa7bf4d4194252d", + "file_size_bytes": 525680 + }, + { + "name": "Gp0119857_Kraken2 Krona HTML report", + "description": "Kraken2 Krona HTML report for Gp0119857", + "data_object_type": "Kraken2 Krona Plot", + "url": "https://data.microbiomedata.org/data/nmdc:mga08dbn21/ReadbasedAnalysis/nmdc_mga08dbn21_kraken2_krona.html", + "md5_checksum": "3036f6c68b5d7b8cf7629baf471eda96", + "id": "nmdc:3036f6c68b5d7b8cf7629baf471eda96", + "file_size_bytes": 3374594 + }, + { + "name": "Gp0119857_Assembled contigs fasta", + "description": "Assembled contigs fasta for Gp0119857", + "data_object_type": "Assembly Contigs", + "url": "https://data.microbiomedata.org/data/nmdc:mga08dbn21/assembly/nmdc_mga08dbn21_contigs.fna", + "md5_checksum": "7bf168977c3b03b331530c3832e2a76a", + "id": "nmdc:7bf168977c3b03b331530c3832e2a76a", + "file_size_bytes": 14786279 + }, + { + "name": "Gp0119857_Assembled scaffold fasta", + "description": "Assembled scaffold fasta for Gp0119857", + "data_object_type": "Assembly Scaffolds", + "url": "https://data.microbiomedata.org/data/nmdc:mga08dbn21/assembly/nmdc_mga08dbn21_scaffolds.fna", + "md5_checksum": "1ff83a710f6d59d631e0aeaaf541ab34", + "id": "nmdc:1ff83a710f6d59d631e0aeaaf541ab34", + "file_size_bytes": 14764820 + }, + { + "name": "Gp0119857_Metagenome Contig Coverage Stats", + "description": "Metagenome Contig Coverage Stats for Gp0119857", + "url": "https://data.microbiomedata.org/data/nmdc:mga08dbn21/assembly/nmdc_mga08dbn21_covstats.txt", + "md5_checksum": "17956479897475b8468a09021eb20377", + "id": "nmdc:17956479897475b8468a09021eb20377", + "file_size_bytes": 658656 + }, + { + "name": "Gp0119857_Assembled AGP file", + "description": "Assembled AGP file for Gp0119857", + "data_object_type": "Assembly AGP", + "url": "https://data.microbiomedata.org/data/nmdc:mga08dbn21/assembly/nmdc_mga08dbn21_assembly.agp", + "md5_checksum": "948a460cfcd924239888545687a33431", + "id": "nmdc:948a460cfcd924239888545687a33431", + "file_size_bytes": 596040 + }, + { + "name": "Gp0119857_Metagenome Alignment BAM file", + "description": "Metagenome Alignment BAM file for Gp0119857", + "data_object_type": "Assembly Coverage BAM", + "url": "https://data.microbiomedata.org/data/nmdc:mga08dbn21/assembly/nmdc_mga08dbn21_pairedMapped_sorted.bam", + "md5_checksum": "5a04c4c3a150eeb88cd188db2acf750b", + "id": "nmdc:5a04c4c3a150eeb88cd188db2acf750b", + "file_size_bytes": 4381031238 + }, + { + "name": "Gp0119857_Protein FAA", + "description": "Protein FAA for Gp0119857", + "data_object_type": "Annotation Amino Acid FASTA", + "url": "https://data.microbiomedata.org/data/nmdc:mga08dbn21/annotation/nmdc_mga08dbn21_proteins.faa", + "md5_checksum": "0f1f5ef39d4ed088890cc3ebf0948a4e", + "id": "nmdc:0f1f5ef39d4ed088890cc3ebf0948a4e", + "file_size_bytes": 6837194 + }, + { + "name": "Gp0119857_Structural annotation GFF file", + "description": "Structural annotation GFF file for Gp0119857", + "data_object_type": "Structural Annotation GFF", + "url": "https://data.microbiomedata.org/data/nmdc:mga08dbn21/annotation/nmdc_mga08dbn21_structural_annotation.gff", + "md5_checksum": "86c3e08f61c9a644cc6c301bfb6065f8", + "id": "nmdc:86c3e08f61c9a644cc6c301bfb6065f8", + "file_size_bytes": 2513 + }, + { + "name": "Gp0119857_Functional annotation GFF file", + "description": "Functional annotation GFF file for Gp0119857", + "data_object_type": "Functional Annotation GFF", + "url": "https://data.microbiomedata.org/data/nmdc:mga08dbn21/annotation/nmdc_mga08dbn21_functional_annotation.gff", + "md5_checksum": "74da64b1bae62c587ef86a649a832d1d", + "id": "nmdc:74da64b1bae62c587ef86a649a832d1d", + "file_size_bytes": 6381052 + }, + { + "name": "Gp0119857_KO TSV file", + "description": "KO TSV file for Gp0119857", + "data_object_type": "Annotation KEGG Orthology", + "url": "https://data.microbiomedata.org/data/nmdc:mga08dbn21/annotation/nmdc_mga08dbn21_ko.tsv", + "md5_checksum": "a8c1c6fd2759b92797a42ff496bb42b2", + "id": "nmdc:a8c1c6fd2759b92797a42ff496bb42b2", + "file_size_bytes": 974120 + }, + { + "name": "Gp0119857_EC TSV file", + "description": "EC TSV file for Gp0119857", + "data_object_type": "Annotation Enzyme Commission", + "url": "https://data.microbiomedata.org/data/nmdc:mga08dbn21/annotation/nmdc_mga08dbn21_ec.tsv", + "md5_checksum": "8ddfaa10876d27e0db762e5a9952a9f4", + "id": "nmdc:8ddfaa10876d27e0db762e5a9952a9f4", + "file_size_bytes": 585423 + }, + { + "name": "Gp0119857_COG GFF file", + "description": "COG GFF file for Gp0119857", + "url": "https://data.microbiomedata.org/data/nmdc:mga08dbn21/annotation/nmdc_mga08dbn21_cog.gff", + "md5_checksum": "23bd6b3f3e14a45aae85bce359297e50", + "id": "nmdc:23bd6b3f3e14a45aae85bce359297e50", + "file_size_bytes": 4320641 + }, + { + "name": "Gp0119857_PFAM GFF file", + "description": "PFAM GFF file for Gp0119857", + "url": "https://data.microbiomedata.org/data/nmdc:mga08dbn21/annotation/nmdc_mga08dbn21_pfam.gff", + "md5_checksum": "438fa90c7bb640f5058690fbd6c1266b", + "id": "nmdc:438fa90c7bb640f5058690fbd6c1266b", + "file_size_bytes": 4284382 + }, + { + "name": "Gp0119857_TigrFam GFF file", + "description": "TigrFam GFF file for Gp0119857", + "url": "https://data.microbiomedata.org/data/nmdc:mga08dbn21/annotation/nmdc_mga08dbn21_tigrfam.gff", + "md5_checksum": "cbcf9c31fa4ceebc741a2198c3fa5ada", + "id": "nmdc:cbcf9c31fa4ceebc741a2198c3fa5ada", + "file_size_bytes": 975502 + }, + { + "name": "Gp0119857_SMART GFF file", + "description": "SMART GFF file for Gp0119857", + "url": "https://data.microbiomedata.org/data/nmdc:mga08dbn21/annotation/nmdc_mga08dbn21_smart.gff", + "md5_checksum": "72a572abd1d907ddf7022015dfe970f0", + "id": "nmdc:72a572abd1d907ddf7022015dfe970f0", + "file_size_bytes": 1352639 + }, + { + "name": "Gp0119857_SuperFam GFF file", + "description": "SuperFam GFF file for Gp0119857", + "url": "https://data.microbiomedata.org/data/nmdc:mga08dbn21/annotation/nmdc_mga08dbn21_supfam.gff", + "md5_checksum": "8d98ad473b0b1967a8059fba18f6ed95", + "id": "nmdc:8d98ad473b0b1967a8059fba18f6ed95", + "file_size_bytes": 5834964 + }, + { + "name": "Gp0119857_Cath FunFam GFF file", + "description": "Cath FunFam GFF file for Gp0119857", + "url": "https://data.microbiomedata.org/data/nmdc:mga08dbn21/annotation/nmdc_mga08dbn21_cath_funfam.gff", + "md5_checksum": "98a36932a6eeaca28f47753e4109501a", + "id": "nmdc:98a36932a6eeaca28f47753e4109501a", + "file_size_bytes": 5492540 + }, + { + "name": "Gp0119857_KO_EC GFF file", + "description": "KO_EC GFF file for Gp0119857", + "url": "https://data.microbiomedata.org/data/nmdc:mga08dbn21/annotation/nmdc_mga08dbn21_ko_ec.gff", + "md5_checksum": "9823209d53b34bd65e98c2d16d1c592f", + "id": "nmdc:9823209d53b34bd65e98c2d16d1c592f", + "file_size_bytes": 3301876 + }, + { + "name": "gold:Gp0452679_metabat2 bin checkm quality assessment result", + "description": "metabat2 bin checkm quality assessment result for gold:Gp0452679", + "data_object_type": "CheckM Statistics", + "url": "https://data.microbiomedata.org/data/nmdc:mga0fsjy84/MAGs/nmdc_mga0fsjy84_checkm_qa.out", + "md5_checksum": "d41d8cd98f00b204e9800998ecf8427e", + "id": "nmdc:d41d8cd98f00b204e9800998ecf8427e", + "file_size_bytes": 0 + }, + { + "name": "Gp0119857_tooShort (< 3kb) filtered contigs fasta file by metaBat2", + "description": "tooShort (< 3kb) filtered contigs fasta file by metaBat2 for Gp0119857", + "url": "https://data.microbiomedata.org/data/nmdc:mga08dbn21/MAGs/nmdc_mga08dbn21_bins.tooShort.fa", + "md5_checksum": "f08a62aee3702f494f905234c8653bb6", + "id": "nmdc:f08a62aee3702f494f905234c8653bb6", + "file_size_bytes": 2745175 + }, + { + "name": "Gp0119857_unbinned fasta file from metabat2", + "description": "unbinned fasta file from metabat2 for Gp0119857", + "url": "https://data.microbiomedata.org/data/nmdc:mga08dbn21/MAGs/nmdc_mga08dbn21_bins.unbinned.fa", + "md5_checksum": "e5f0d48d62132e811d5d5d97caa9adcd", + "id": "nmdc:e5f0d48d62132e811d5d5d97caa9adcd", + "file_size_bytes": 4300637 + }, + { + "name": "Gp0119857_metabat2 bin checkm quality assessment result", + "description": "metabat2 bin checkm quality assessment result for Gp0119857", + "data_object_type": "CheckM Statistics", + "url": "https://data.microbiomedata.org/data/nmdc:mga08dbn21/MAGs/nmdc_mga08dbn21_checkm_qa.out", + "md5_checksum": "bd035be5ec4e2706f5326a2cdc3947ad", + "id": "nmdc:bd035be5ec4e2706f5326a2cdc3947ad", + "file_size_bytes": 1557 + }, + { + "name": "Gp0119857_high-quality and medium-quality bins", + "description": "high-quality and medium-quality bins for Gp0119857", + "data_object_type": "Metagenome Bins", + "url": "https://data.microbiomedata.org/data/nmdc:mga08dbn21/MAGs/nmdc_mga08dbn21_hqmq_bin.zip", + "md5_checksum": "ba37cbbba32e9696c976744ea91ae152", + "id": "nmdc:ba37cbbba32e9696c976744ea91ae152", + "file_size_bytes": 2261711 + }, + { + "name": "Gp0119857_metabat2 bins", + "description": "metabat2 bins for Gp0119857", + "url": "https://data.microbiomedata.org/data/nmdc:mga08dbn21/MAGs/nmdc_mga08dbn21_metabat_bin.zip", + "md5_checksum": "1e3396219406b7571a744cf3da65cb9b", + "id": "nmdc:1e3396219406b7571a744cf3da65cb9b", + "file_size_bytes": 85437 + }, + { + "_id": { + "$oid": "649b003d1ae706d7b5b14df6" + }, + "description": "Assembled scaffold fasta for gold:Gp0119857", + "url": "https://data.microbiomedata.org/data/1777_95826/assembly/assembly_scaffolds.fna", + "file_size_bytes": 14724370, + "type": "nmdc:DataObject", + "id": "nmdc:0920916000ea342071e3d7937158e985", + "name": "assembly_scaffolds.fna", + "data_object_type": "Assembly Scaffolds" + }, + { + "_id": { + "$oid": "649b003d1ae706d7b5b14df8" + }, + "description": "Metagenome Contig Coverage Stats for gold:Gp0119857", + "url": "https://data.microbiomedata.org/data/1777_95826/assembly/mapping_stats.txt", + "file_size_bytes": 617636, + "type": "nmdc:DataObject", + "id": "nmdc:f637eaa4b847fc3367109652cacb0956", + "name": "mapping_stats.txt", + "data_object_type": "Assembly Coverage Stats" + }, + { + "_id": { + "$oid": "649b003d1ae706d7b5b14df9" + }, + "description": "Metagenome Alignment BAM file for gold:Gp0119857", + "url": "https://data.microbiomedata.org/data/1777_95826/assembly/pairedMapped_sorted.bam", + "file_size_bytes": 4349823544, + "type": "nmdc:DataObject", + "id": "nmdc:cb9e40c71aa2d55bf61403de39114e1c", + "name": "pairedMapped_sorted.bam", + "data_object_type": "Assembly Coverage BAM" + }, + { + "_id": { + "$oid": "649b003d1ae706d7b5b14e01" + }, + "description": "Assembled contigs fasta for gold:Gp0119857", + "url": "https://data.microbiomedata.org/data/1777_95826/assembly/assembly_contigs.fna", + "file_size_bytes": 14745259, + "type": "nmdc:DataObject", + "id": "nmdc:33fa88501b23f0498c6af05c5924704e", + "name": "assembly_contigs.fna", + "data_object_type": "Assembly Contigs" + }, + { + "_id": { + "$oid": "649b003d1ae706d7b5b14e04" + }, + "description": "Assembled AGP file for gold:Gp0119857", + "url": "https://data.microbiomedata.org/data/1777_95826/assembly/assembly.agp", + "file_size_bytes": 512540, + "type": "nmdc:DataObject", + "id": "nmdc:0c84690930a696bec14b4e61dbb72011", + "name": "assembly.agp", + "data_object_type": "Assembly AGP" + }, + { + "_id": { + "$oid": "649b003d1ae706d7b5b15a50" + }, + "id": "nmdc:0674ff311d239d69ef08ed2b6a479a03", + "name": "1777_95826.krona.html", + "description": "Gold:Gp0119857 KRONA plot HTML file", + "url": "https://data.microbiomedata.org/1777_95826/ReadbasedAnalysis/centrifuge/1777_95826.krona.html", + "file_size_bytes": 3385, + "data_object_type": "Centrifuge Krona Plot", + "type": "nmdc:DataObject" + }, + { + "_id": { + "$oid": "649b003d1ae706d7b5b15a57" + }, + "id": "nmdc:97203a4b0dd4f6e84b415a49b727b779", + "name": "1777_95826.json", + "description": "Gold:Gp0119857 ReadbasedAnalysis result JSON file", + "url": "https://data.microbiomedata.org/1777_95826/ReadbasedAnalysis/1777_95826.json", + "file_size_bytes": 3385, + "type": "nmdc:DataObject" + }, + { + "_id": { + "$oid": "649b003f1ae706d7b5b16369" + }, + "id": "nmdc:60d0b9c0d6142ca2e0b427ff3524ec79", + "name": "bins.tooShort.fa", + "description": "tooShort (< 3kb) filtered contigs fasta file by metaBat2 for gold:Gp0119857", + "file_size_bytes": 2665030, + "url": "https://data.microbiomedata.org/data/1777_95826/img_MAGs/bins.tooShort.fa", + "type": "nmdc:DataObject" + }, + { + "_id": { + "$oid": "649b003f1ae706d7b5b1636a" + }, + "id": "nmdc:018e3f113491630c0bcd5244d57e7810", + "name": "checkm_qa.out", + "description": "metabat2 bin checkm quality assessment result for gold:Gp0119857", + "file_size_bytes": 2560, + "url": "https://data.microbiomedata.org/data/1777_95826/img_MAGs/checkm_qa.out", + "type": "nmdc:DataObject", + "data_object_type": "CheckM Statistics" + }, + { + "_id": { + "$oid": "649b003f1ae706d7b5b1636b" + }, + "id": "nmdc:60ceae985ecd64923240a1f583dec8e0", + "name": "gtdbtk.bac120.summary.tsv", + "description": "gtdbtk bacterial assignment result summary table for gold:Gp0119857", + "file_size_bytes": 1351, + "url": "https://data.microbiomedata.org/data/1777_95826/img_MAGs/gtdbtk_output/classify/gtdbtk.bac120.summary.tsv", + "type": "nmdc:DataObject" + }, + { + "_id": { + "$oid": "649b003f1ae706d7b5b1636d" + }, + "id": "nmdc:e605f33a8e4b73b146ba73ed41eab3a0", + "name": "gold:Gp0119857.bins.9.fa", + "description": "metabat2 binned contig file for gold:Gp0119857", + "file_size_bytes": 349604, + "url": "https://data.microbiomedata.org/data/1777_95826/img_MAGs/metabat-bins/bins.9.fa", + "type": "nmdc:DataObject", + "data_object_type": "Metagenome Bins" + }, + { + "_id": { + "$oid": "649b003f1ae706d7b5b1636e" + }, + "id": "nmdc:acd2eebc2ed5b2abfe51c3899f551412", + "name": "gold:Gp0119857.bins.5.fa", + "description": "metabat2 binned contig file for gold:Gp0119857", + "file_size_bytes": 215971, + "url": "https://data.microbiomedata.org/data/1777_95826/img_MAGs/metabat-bins/bins.5.fa", + "type": "nmdc:DataObject", + "data_object_type": "Metagenome Bins" + }, + { + "_id": { + "$oid": "649b003f1ae706d7b5b1636f" + }, + "id": "nmdc:7ae15313514b8f2ed6e1c3c759646580", + "name": "bins.unbinned.fa", + "description": "unbinned fasta file from metabat2 for gold:Gp0119857", + "file_size_bytes": 6925856, + "url": "https://data.microbiomedata.org/data/1777_95826/img_MAGs/bins.unbinned.fa", + "type": "nmdc:DataObject" + }, + { + "_id": { + "$oid": "649b003f1ae706d7b5b16370" + }, + "id": "nmdc:8762bed22a7c34d20fffc831201cb305", + "name": "gold:Gp0119857.bins.2.fa", + "description": "metabat2 binned contig file for gold:Gp0119857", + "file_size_bytes": 218028, + "url": "https://data.microbiomedata.org/data/1777_95826/img_MAGs/metabat-bins/bins.2.fa", + "type": "nmdc:DataObject", + "data_object_type": "Metagenome Bins" + }, + { + "_id": { + "$oid": "649b003f1ae706d7b5b16371" + }, + "id": "nmdc:461b4e418186da171ff99504c41ea83d", + "name": "gold:Gp0119857.bins.3.fa", + "description": "metabat2 binned contig file for gold:Gp0119857", + "file_size_bytes": 1293718, + "url": "https://data.microbiomedata.org/data/1777_95826/img_MAGs/metabat-bins/bins.3.fa", + "type": "nmdc:DataObject", + "data_object_type": "Metagenome Bins" + }, + { + "_id": { + "$oid": "649b003f1ae706d7b5b16372" + }, + "id": "nmdc:60934f4dfad7b5d50b8074e4c239a25f", + "name": "gold:Gp0119857.bins.7.fa", + "description": "metabat2 binned contig file for gold:Gp0119857", + "file_size_bytes": 851371, + "url": "https://data.microbiomedata.org/data/1777_95826/img_MAGs/metabat-bins/bins.7.fa", + "type": "nmdc:DataObject", + "data_object_type": "Metagenome Bins" + }, + { + "_id": { + "$oid": "649b003f1ae706d7b5b16373" + }, + "id": "nmdc:deb09f541bf6493d1af4899920129fd0", + "name": "gold:Gp0119857.bins.11.fa", + "description": "metabat2 binned contig file for gold:Gp0119857", + "file_size_bytes": 244762, + "url": "https://data.microbiomedata.org/data/1777_95826/img_MAGs/metabat-bins/bins.11.fa", + "type": "nmdc:DataObject", + "data_object_type": "Metagenome Bins" + }, + { + "_id": { + "$oid": "649b003f1ae706d7b5b16374" + }, + "id": "nmdc:bd2dcb9cd92679736b6b198e77b85b55", + "name": "gold:Gp0119857.bins.4.fa", + "description": "metabat2 binned contig file for gold:Gp0119857", + "file_size_bytes": 210461, + "url": "https://data.microbiomedata.org/data/1777_95826/img_MAGs/metabat-bins/bins.4.fa", + "type": "nmdc:DataObject", + "data_object_type": "Metagenome Bins" + }, + { + "_id": { + "$oid": "649b003f1ae706d7b5b16375" + }, + "id": "nmdc:a92b6b2a1adfebd4346b6a3360144a8c", + "name": "gold:Gp0119857.bins.12.fa", + "description": "metabat2 binned contig file for gold:Gp0119857", + "file_size_bytes": 284558, + "url": "https://data.microbiomedata.org/data/1777_95826/img_MAGs/metabat-bins/bins.12.fa", + "type": "nmdc:DataObject", + "data_object_type": "Metagenome Bins" + }, + { + "_id": { + "$oid": "649b003f1ae706d7b5b16376" + }, + "id": "nmdc:47d392db518d913270ca38bf17381ca1", + "name": "gold:Gp0119857.bins.10.fa", + "description": "metabat2 binned contig file for gold:Gp0119857", + "file_size_bytes": 417999, + "url": "https://data.microbiomedata.org/data/1777_95826/img_MAGs/metabat-bins/bins.10.fa", + "type": "nmdc:DataObject", + "data_object_type": "Metagenome Bins" + }, + { + "_id": { + "$oid": "649b003f1ae706d7b5b16377" + }, + "id": "nmdc:f991db92623d0daee9720ba37a7e202a", + "name": "gold:Gp0119857.bins.1.fa", + "description": "metabat2 binned contig file for gold:Gp0119857", + "file_size_bytes": 303791, + "url": "https://data.microbiomedata.org/data/1777_95826/img_MAGs/metabat-bins/bins.1.fa", + "type": "nmdc:DataObject", + "data_object_type": "Metagenome Bins" + }, + { + "_id": { + "$oid": "649b003f1ae706d7b5b16379" + }, + "id": "nmdc:f14c9c505ee32bd939e6cf58d46b573a", + "name": "gold:Gp0119857.bins.6.fa", + "description": "metabat2 binned contig file for gold:Gp0119857", + "file_size_bytes": 232592, + "url": "https://data.microbiomedata.org/data/1777_95826/img_MAGs/metabat-bins/bins.6.fa", + "type": "nmdc:DataObject", + "data_object_type": "Metagenome Bins" + }, + { + "_id": { + "$oid": "649b003f1ae706d7b5b1637b" + }, + "id": "nmdc:4ea2e6fbd2bdc1b582c0a4dd5c6a4b6e", + "name": "gold:Gp0119857.bins.8.fa", + "description": "metabat2 binned contig file for gold:Gp0119857", + "file_size_bytes": 467076, + "url": "https://data.microbiomedata.org/data/1777_95826/img_MAGs/metabat-bins/bins.8.fa", + "type": "nmdc:DataObject", + "data_object_type": "Metagenome Bins" + }, + { + "_id": { + "$oid": "649b003f1ae706d7b5b16c6f" + }, + "description": "EC TSV File for gold:Gp0119857", + "url": "https://data.microbiomedata.org/1777_95826/img_annotation/Ga0482158_ec.tsv", + "md5_checksum": "ecbfb703f2b4204f9f67de20840bd355", + "file_size_bytes": 3385, + "id": "nmdc:ecbfb703f2b4204f9f67de20840bd355", + "name": "gold:Gp0119857_EC TSV File", + "data_object_type": "Annotation Enzyme Commission", + "type": "nmdc:DataObject" + }, + { + "_id": { + "$oid": "649b003f1ae706d7b5b16c72" + }, + "description": "KO TSV File for gold:Gp0119857", + "url": "https://data.microbiomedata.org/1777_95826/img_annotation/Ga0482158_ko.tsv", + "md5_checksum": "60524c5c69a1c8b4a2609f4a9cb3a490", + "file_size_bytes": 3385, + "id": "nmdc:60524c5c69a1c8b4a2609f4a9cb3a490", + "name": "gold:Gp0119857_KO TSV File", + "data_object_type": "Annotation KEGG Orthology", + "type": "nmdc:DataObject" + }, + { + "_id": { + "$oid": "649b003f1ae706d7b5b16c73" + }, + "description": "Functional annotation GFF file for gold:Gp0119857", + "url": "https://data.microbiomedata.org/1777_95826/img_annotation/Ga0482158_functional_annotation.gff", + "md5_checksum": "4a4032318b50bdc3bae5d290a7f0735e", + "file_size_bytes": 3385, + "id": "nmdc:4a4032318b50bdc3bae5d290a7f0735e", + "name": "gold:Gp0119857_Functional annotation GFF file", + "data_object_type": "Functional Annotation GFF", + "type": "nmdc:DataObject" + }, + { + "_id": { + "$oid": "649b003f1ae706d7b5b16c76" + }, + "description": "Protein FAA for gold:Gp0119857", + "url": "https://data.microbiomedata.org/1777_95826/img_annotation/Ga0482158_proteins.faa", + "md5_checksum": "5a77bb24847fcbf1c3fec6c40aa2ac14", + "file_size_bytes": 3385, + "id": "nmdc:5a77bb24847fcbf1c3fec6c40aa2ac14", + "name": "gold:Gp0119857_Protein FAA", + "data_object_type": "Annotation Amino Acid FASTA", + "type": "nmdc:DataObject" + }, + { + "_id": { + "$oid": "649b003f1ae706d7b5b16c77" + }, + "description": "Structural annotation GFF file for gold:Gp0119857", + "url": "https://data.microbiomedata.org/1777_95826/img_annotation/Ga0482158_structural_annotation.gff", + "md5_checksum": "89ef0619642dead830035ac9389ca802", + "file_size_bytes": 3385, + "id": "nmdc:89ef0619642dead830035ac9389ca802", + "name": "gold:Gp0119857_Structural annotation GFF file", + "data_object_type": "Structural Annotation GFF", + "type": "nmdc:DataObject" + } + ], + "mags_activity_set": [ + { + "_id": { + "$oid": "649b0052ec087f6bbab346ec" + }, + "has_input": [ + "nmdc:7bf168977c3b03b331530c3832e2a76a", + "nmdc:5a04c4c3a150eeb88cd188db2acf750b", + "nmdc:74da64b1bae62c587ef86a649a832d1d" + ], + "too_short_contig_num": 6167, + "part_of": [ + "nmdc:mga08dbn21" + ], + "binned_contig_num": 393, + "git_url": "https://github.com/microbiomedata/metaG/releases/tag/0.1", + "has_output": [ + "nmdc:d41d8cd98f00b204e9800998ecf8427e", + "nmdc:f08a62aee3702f494f905234c8653bb6", + "nmdc:e5f0d48d62132e811d5d5d97caa9adcd", + "nmdc:bd035be5ec4e2706f5326a2cdc3947ad", + "nmdc:ba37cbbba32e9696c976744ea91ae152", + "nmdc:1e3396219406b7571a744cf3da65cb9b" + ], + "was_informed_by": "gold:Gp0119857", + "input_contig_num": 8204, + "id": "nmdc:1d176422b1bc213621a6f1d5460c249c", + "execution_resource": "NERSC-Cori", + "name": "MAGs Analysis Activity for nmdc:mga08dbn21", + "mags_list": [ + { + "number_of_contig": 197, + "completeness": 56.43, + "bin_name": "bins.1", + "gene_count": 1524, + "bin_quality": "MQ", + "gtdbtk_species": "Thermoanaerobacter pseudethanolicus", + "gtdbtk_order": "Thermoanaerobacterales", + "num_16s": 2, + "gtdbtk_family": "Thermoanaerobacteraceae", + "gtdbtk_domain": "Bacteria", + "contamination": 2.78, + "gtdbtk_class": "Thermoanaerobacteria", + "gtdbtk_phylum": "Firmicutes_A", + "num_5s": 0, + "num_23s": 0, + "gtdbtk_genus": "Thermoanaerobacter", + "num_t_rna": 27 + }, + { + "number_of_contig": 14, + "completeness": 100.0, + "bin_name": "bins.2", + "gene_count": 2028, + "bin_quality": "HQ", + "gtdbtk_species": "Thermotoga petrophila", + "gtdbtk_order": "Thermotogales", + "num_16s": 1, + "gtdbtk_family": "Thermotogaceae", + "gtdbtk_domain": "Bacteria", + "contamination": 1.79, + "gtdbtk_class": "Thermotogae", + "gtdbtk_phylum": "Thermotogota", + "num_5s": 1, + "num_23s": 1, + "gtdbtk_genus": "Thermotoga", + "num_t_rna": 45 + }, + { + "number_of_contig": 56, + "completeness": 15.95, + "bin_name": "bins.3", + "gene_count": 335, + "bin_quality": "LQ", + "gtdbtk_species": "", + "gtdbtk_order": "", + "num_16s": 0, + "gtdbtk_family": "", + "gtdbtk_domain": "", + "contamination": 0.0, + "gtdbtk_class": "", + "gtdbtk_phylum": "", + "num_5s": 0, + "num_23s": 0, + "gtdbtk_genus": "", + "num_t_rna": 5 + }, + { + "number_of_contig": 37, + "completeness": 89.57, + "bin_name": "bins.4", + "gene_count": 2772, + "bin_quality": "MQ", + "gtdbtk_species": "Halanaerobium congolense", + "gtdbtk_order": "Halanaerobiales", + "num_16s": 0, + "gtdbtk_family": "Halanaerobiaceae", + "gtdbtk_domain": "Bacteria", + "contamination": 0.94, + "gtdbtk_class": "Halanaerobiia", + "gtdbtk_phylum": "Firmicutes_F", + "num_5s": 3, + "num_23s": 0, + "gtdbtk_genus": "Halanaerobium", + "num_t_rna": 39 + }, + { + "number_of_contig": 89, + "completeness": 89.11, + "bin_name": "bins.5", + "gene_count": 1925, + "bin_quality": "MQ", + "gtdbtk_species": "Thermococcus_A sp000430485", + "gtdbtk_order": "Thermococcales", + "num_16s": 0, + "gtdbtk_family": "Thermococcaceae", + "gtdbtk_domain": "Archaea", + "contamination": 0.0, + "gtdbtk_class": "Thermococci", + "gtdbtk_phylum": "Euryarchaeota", + "num_5s": 1, + "num_23s": 0, + "gtdbtk_genus": "Thermococcus_A", + "num_t_rna": 38 + } + ], + "unbinned_contig_num": 1644, + "started_at_time": "2021-10-11T02:23:27Z", + "type": "nmdc:MAGsAnalysisActivity", + "ended_at_time": "2021-10-11T04:02:34+00:00" + } + ], + "metagenome_annotation_activity_set": [ + { + "_id": { + "$oid": "649b005bbf2caae0415ef992" + }, + "has_input": [ + "nmdc:7bf168977c3b03b331530c3832e2a76a" + ], + "part_of": [ + "nmdc:mga08dbn21" + ], + "git_url": "https://github.com/microbiomedata/metaG/releases/tag/0.1", + "has_output": [ + "nmdc:0f1f5ef39d4ed088890cc3ebf0948a4e", + "nmdc:86c3e08f61c9a644cc6c301bfb6065f8", + "nmdc:74da64b1bae62c587ef86a649a832d1d", + "nmdc:a8c1c6fd2759b92797a42ff496bb42b2", + "nmdc:8ddfaa10876d27e0db762e5a9952a9f4", + "nmdc:23bd6b3f3e14a45aae85bce359297e50", + "nmdc:438fa90c7bb640f5058690fbd6c1266b", + "nmdc:cbcf9c31fa4ceebc741a2198c3fa5ada", + "nmdc:72a572abd1d907ddf7022015dfe970f0", + "nmdc:8d98ad473b0b1967a8059fba18f6ed95", + "nmdc:98a36932a6eeaca28f47753e4109501a", + "nmdc:9823209d53b34bd65e98c2d16d1c592f" + ], + "was_informed_by": "gold:Gp0119857", + "id": "nmdc:1d176422b1bc213621a6f1d5460c249c", + "execution_resource": "NERSC-Cori", + "name": "Annotation Activity for nmdc:mga08dbn21", + "started_at_time": "2021-10-11T02:23:27Z", + "type": "nmdc:MetagenomeAnnotationActivity", + "ended_at_time": "2021-10-11T04:02:34+00:00" + } + ], + "metagenome_assembly_set": [ + { + "_id": { + "$oid": "649b005f2ca5ee4adb139f7c" + }, + "has_input": [ + "nmdc:6af614f4bf666a67a271811f292a4fd0" + ], + "part_of": [ + "nmdc:mga08dbn21" + ], + "ctg_logsum": 168480, + "scaf_logsum": 170487, + "gap_pct": 0.02103, + "git_url": "https://github.com/microbiomedata/metaG/releases/tag/0.1", + "has_output": [ + "nmdc:7bf168977c3b03b331530c3832e2a76a", + "nmdc:1ff83a710f6d59d631e0aeaaf541ab34", + "nmdc:17956479897475b8468a09021eb20377", + "nmdc:948a460cfcd924239888545687a33431", + "nmdc:5a04c4c3a150eeb88cd188db2acf750b" + ], + "asm_score": 28.433, + "was_informed_by": "gold:Gp0119857", + "ctg_powsum": 27788, + "scaf_max": 664050, + "id": "nmdc:1d176422b1bc213621a6f1d5460c249c", + "scaf_powsum": 28657, + "execution_resource": "NERSC-Cori", + "contigs": 8220, + "name": "Assembly Activity for nmdc:mga08dbn21", + "ctg_max": 664050, + "gc_std": 0.11979, + "contig_bp": 14310341, + "gc_avg": 0.38695, + "started_at_time": "2021-10-11T02:23:27Z", + "scaf_bp": 14313351, + "type": "nmdc:MetagenomeAssembly", + "scaffolds": 8090, + "ended_at_time": "2021-10-11T04:02:34+00:00", + "ctg_l50": 8813, + "ctg_l90": 468, + "ctg_n50": 198, + "ctg_n90": 3643, + "scaf_l50": 9018, + "scaf_l90": 478, + "scaf_n50": 188, + "scaf_n90": 3538, + "scaf_l_gt50k": 4337836, + "scaf_n_gt50k": 32, + "scaf_pct_gt50k": 30.306223 + } + ], + "omics_processing_set": [ + { + "_id": { + "$oid": "649b009773e8249959349b6b" + }, + "id": "nmdc:omprc-11-d8hs4q36", + "name": "Deep subsurface shale carbon reservoir microbial communities from Ohio, USA - Utica-2 Time Series 2014_11_5", + "description": "Microbial controls on biogeochemical cycling in deep subsurface shale carbon reservoirs", + "has_input": [ + "nmdc:bsm-11-j4d11578" + ], + "has_output": [ + "jgi:560df5af0d878540fd6fe1f0" + ], + "part_of": [ + "nmdc:sty-11-8fb6t785" + ], + "add_date": "2015-08-15", + "mod_date": "2020-04-05", + "ncbi_project_name": "Deep subsurface shale carbon reservoir microbial communities from Ohio, USA - Utica-2 Time Series 2014_11_5", + "omics_type": { + "has_raw_value": "Metagenome" + }, + "principal_investigator": { + "has_raw_value": "Kelly Wrighton" + }, + "processing_institution": "JGI", + "type": "nmdc:OmicsProcessing", + "gold_sequencing_project_identifiers": [ + "gold:Gp0119857" + ] + } + ], + "read_qc_analysis_activity_set": [ + { + "_id": { + "$oid": "649b009d6bdd4fd20273c847" + }, + "has_input": [ + "nmdc:855ae6700ece4571af8713bb622b12e3" + ], + "part_of": [ + "nmdc:mga08dbn21" + ], + "git_url": "https://github.com/microbiomedata/metaG/releases/tag/0.1", + "has_output": [ + "nmdc:6af614f4bf666a67a271811f292a4fd0", + "nmdc:52d7e275243ea438d59dfdeb927c2b69" + ], + "was_informed_by": "gold:Gp0119857", + "input_read_count": 61774156, + "output_read_bases": 9063177747, + "id": "nmdc:1d176422b1bc213621a6f1d5460c249c", + "execution_resource": "NERSC-Cori", + "input_read_bases": 9327897556, + "name": "Read QC Activity for nmdc:mga08dbn21", + "output_read_count": 61389016, + "started_at_time": "2021-10-11T02:23:27Z", + "type": "nmdc:ReadQCAnalysisActivity", + "ended_at_time": "2021-10-11T04:02:34+00:00" + } + ], + "read_based_taxonomy_analysis_activity_set": [ + { + "_id": { + "$oid": "649b009bff710ae353f8cf21" + }, + "has_input": [ + "nmdc:6af614f4bf666a67a271811f292a4fd0" + ], + "git_url": "https://github.com/microbiomedata/metaG/releases/tag/0.1", + "has_output": [ + "nmdc:342a8c1e5ca14ad5608c5cb61e7b6b15", + "nmdc:7505a7554466ccb7f62a11720635b363", + "nmdc:83a5b54e9f7315cebba2a9a69460f300", + "nmdc:51995b5bbe53c70d7d7ad1a75a815a04", + "nmdc:7738f32c03f01f0a929ec1c572574aac", + "nmdc:6d5107e3c88e4beafe7d54b927f95013", + "nmdc:ec26120e4cf82af3ec7e9bb65f254553", + "nmdc:21a4a81d59984113dfa7bf4d4194252d", + "nmdc:3036f6c68b5d7b8cf7629baf471eda96" + ], + "was_informed_by": "gold:Gp0119857", + "id": "nmdc:1d176422b1bc213621a6f1d5460c249c", + "execution_resource": "NERSC-Cori", + "name": "ReadBased Analysis Activity for nmdc:mga08dbn21", + "started_at_time": "2021-10-11T02:23:27Z", + "type": "nmdc:ReadbasedAnalysis", + "ended_at_time": "2021-10-11T04:02:34+00:00" + } + ] + }, + { + "data_object_set": [ + { + "description": "Raw sequencer read data", + "file_size_bytes": 7194685424, + "type": "nmdc:DataObject", + "id": "jgi:560df5b10d878540fd6fe1f4", + "name": "9567.7.137562.GGACTCC-CTCTCTA.fastq.gz" + }, + { + "name": "Gp0119860_Filtered Reads", + "description": "Filtered Reads for Gp0119860", + "data_object_type": "Filtered Sequencing Reads", + "url": "https://data.microbiomedata.org/data/nmdc:mga072kj20/qa/nmdc_mga072kj20_filtered.fastq.gz", + "md5_checksum": "72bca8012b3b44cabec9cd3cf8a8b1aa", + "id": "nmdc:72bca8012b3b44cabec9cd3cf8a8b1aa", + "file_size_bytes": 3963144922 + }, + { + "name": "Gp0119860_Filtered Stats", + "description": "Filtered Stats for Gp0119860", + "data_object_type": "QC Statistics", + "url": "https://data.microbiomedata.org/data/nmdc:mga072kj20/qa/nmdc_mga072kj20_filterStats.txt", + "md5_checksum": "4259cedf8236bb79bd22c9981bd178d3", + "id": "nmdc:4259cedf8236bb79bd22c9981bd178d3", + "file_size_bytes": 285 + }, + { + "name": "Gp0119860_Gottcha2 TSV report", + "description": "Gottcha2 TSV report for Gp0119860", + "url": "https://data.microbiomedata.org/data/nmdc:mga072kj20/ReadbasedAnalysis/nmdc_mga072kj20_gottcha2_report.tsv", + "md5_checksum": "30d55843684f4668e0b0a25b7766e07d", + "id": "nmdc:30d55843684f4668e0b0a25b7766e07d", + "file_size_bytes": 2886 + }, + { + "name": "Gp0119860_Gottcha2 full TSV report", + "description": "Gottcha2 full TSV report for Gp0119860", + "url": "https://data.microbiomedata.org/data/nmdc:mga072kj20/ReadbasedAnalysis/nmdc_mga072kj20_gottcha2_report_full.tsv", + "md5_checksum": "7272381b06bd0cdf751e8f59e6a2c4f3", + "id": "nmdc:7272381b06bd0cdf751e8f59e6a2c4f3", + "file_size_bytes": 110954 + }, + { + "name": "Gp0119860_Gottcha2 Krona HTML report", + "description": "Gottcha2 Krona HTML report for Gp0119860", + "data_object_type": "GOTTCHA2 Krona Plot", + "url": "https://data.microbiomedata.org/data/nmdc:mga072kj20/ReadbasedAnalysis/nmdc_mga072kj20_gottcha2_krona.html", + "md5_checksum": "f68887321b67024a212d42248d458500", + "id": "nmdc:f68887321b67024a212d42248d458500", + "file_size_bytes": 234894 + }, + { + "name": "Gp0119860_Centrifuge classification TSV report", + "description": "Centrifuge classification TSV report for Gp0119860", + "data_object_type": "Centrifuge Taxonomic Classification", + "url": "https://data.microbiomedata.org/data/nmdc:mga072kj20/ReadbasedAnalysis/nmdc_mga072kj20_centrifuge_classification.tsv", + "md5_checksum": "541588f6e257f8857c3be04bf30c4764", + "id": "nmdc:541588f6e257f8857c3be04bf30c4764", + "file_size_bytes": 5000022590 + }, + { + "name": "Gp0119860_Centrifuge TSV report", + "description": "Centrifuge TSV report for Gp0119860", + "data_object_type": "Centrifuge Classification Report", + "url": "https://data.microbiomedata.org/data/nmdc:mga072kj20/ReadbasedAnalysis/nmdc_mga072kj20_centrifuge_report.tsv", + "md5_checksum": "12bf780ce6ac1a4b7719a22ce13fd94d", + "id": "nmdc:12bf780ce6ac1a4b7719a22ce13fd94d", + "file_size_bytes": 213750 + }, + { + "name": "Gp0119860_Centrifuge Krona HTML report", + "description": "Centrifuge Krona HTML report for Gp0119860", + "data_object_type": "Centrifuge Krona Plot", + "url": "https://data.microbiomedata.org/data/nmdc:mga072kj20/ReadbasedAnalysis/nmdc_mga072kj20_centrifuge_krona.html", + "md5_checksum": "c47a32bfb71522ab04146c82957963fc", + "id": "nmdc:c47a32bfb71522ab04146c82957963fc", + "file_size_bytes": 2117178 + }, + { + "name": "Gp0119860_Kraken classification TSV report", + "description": "Kraken classification TSV report for Gp0119860", + "data_object_type": "Kraken2 Taxonomic Classification", + "url": "https://data.microbiomedata.org/data/nmdc:mga072kj20/ReadbasedAnalysis/nmdc_mga072kj20_kraken2_classification.tsv", + "md5_checksum": "be495745508eb65e3176301a7685e4e0", + "id": "nmdc:be495745508eb65e3176301a7685e4e0", + "file_size_bytes": 4368648453 + }, + { + "name": "Gp0119860_Kraken2 TSV report", + "description": "Kraken2 TSV report for Gp0119860", + "data_object_type": "Kraken2 Classification Report", + "url": "https://data.microbiomedata.org/data/nmdc:mga072kj20/ReadbasedAnalysis/nmdc_mga072kj20_kraken2_report.tsv", + "md5_checksum": "e49a05db8b7c3371fb1031361fc2582b", + "id": "nmdc:e49a05db8b7c3371fb1031361fc2582b", + "file_size_bytes": 442233 + }, + { + "name": "Gp0119860_Kraken2 Krona HTML report", + "description": "Kraken2 Krona HTML report for Gp0119860", + "data_object_type": "Kraken2 Krona Plot", + "url": "https://data.microbiomedata.org/data/nmdc:mga072kj20/ReadbasedAnalysis/nmdc_mga072kj20_kraken2_krona.html", + "md5_checksum": "a7148eb3e520bf6b53e2750d721368a8", + "id": "nmdc:a7148eb3e520bf6b53e2750d721368a8", + "file_size_bytes": 2905428 + }, + { + "name": "Gp0119860_Assembled contigs fasta", + "description": "Assembled contigs fasta for Gp0119860", + "data_object_type": "Assembly Contigs", + "url": "https://data.microbiomedata.org/data/nmdc:mga072kj20/assembly/nmdc_mga072kj20_contigs.fna", + "md5_checksum": "4a7606943c840d2932d95785e1ac98b6", + "id": "nmdc:4a7606943c840d2932d95785e1ac98b6", + "file_size_bytes": 9375203 + }, + { + "name": "Gp0119860_Assembled scaffold fasta", + "description": "Assembled scaffold fasta for Gp0119860", + "data_object_type": "Assembly Scaffolds", + "url": "https://data.microbiomedata.org/data/nmdc:mga072kj20/assembly/nmdc_mga072kj20_scaffolds.fna", + "md5_checksum": "f8e831027ac09d74844afdbea2d16616", + "id": "nmdc:f8e831027ac09d74844afdbea2d16616", + "file_size_bytes": 9370001 + }, + { + "name": "Gp0119860_Metagenome Contig Coverage Stats", + "description": "Metagenome Contig Coverage Stats for Gp0119860", + "url": "https://data.microbiomedata.org/data/nmdc:mga072kj20/assembly/nmdc_mga072kj20_covstats.txt", + "md5_checksum": "5c3431c6d6b7c1dbd796d7de934927f8", + "id": "nmdc:5c3431c6d6b7c1dbd796d7de934927f8", + "file_size_bytes": 231260 + }, + { + "name": "Gp0119860_Assembled AGP file", + "description": "Assembled AGP file for Gp0119860", + "data_object_type": "Assembly AGP", + "url": "https://data.microbiomedata.org/data/nmdc:mga072kj20/assembly/nmdc_mga072kj20_assembly.agp", + "md5_checksum": "5368525b5a4829523cd3656804eb4df6", + "id": "nmdc:5368525b5a4829523cd3656804eb4df6", + "file_size_bytes": 197378 + }, + { + "name": "Gp0119860_Metagenome Alignment BAM file", + "description": "Metagenome Alignment BAM file for Gp0119860", + "data_object_type": "Assembly Coverage BAM", + "url": "https://data.microbiomedata.org/data/nmdc:mga072kj20/assembly/nmdc_mga072kj20_pairedMapped_sorted.bam", + "md5_checksum": "625b5b8c24e30e6712ff45bf3caed868", + "id": "nmdc:625b5b8c24e30e6712ff45bf3caed868", + "file_size_bytes": 4965806176 + }, + { + "name": "Gp0119860_Protein FAA", + "description": "Protein FAA for Gp0119860", + "data_object_type": "Annotation Amino Acid FASTA", + "url": "https://data.microbiomedata.org/data/nmdc:mga072kj20/annotation/nmdc_mga072kj20_proteins.faa", + "md5_checksum": "9c9518338544e4733e6c0797a83a4987", + "id": "nmdc:9c9518338544e4733e6c0797a83a4987", + "file_size_bytes": 4012415 + }, + { + "name": "Gp0119860_Structural annotation GFF file", + "description": "Structural annotation GFF file for Gp0119860", + "data_object_type": "Structural Annotation GFF", + "url": "https://data.microbiomedata.org/data/nmdc:mga072kj20/annotation/nmdc_mga072kj20_structural_annotation.gff", + "md5_checksum": "fea164c4df1c3d9b6b825ab25be80e9c", + "id": "nmdc:fea164c4df1c3d9b6b825ab25be80e9c", + "file_size_bytes": 2479 + }, + { + "name": "Gp0119860_Functional annotation GFF file", + "description": "Functional annotation GFF file for Gp0119860", + "data_object_type": "Functional Annotation GFF", + "url": "https://data.microbiomedata.org/data/nmdc:mga072kj20/annotation/nmdc_mga072kj20_functional_annotation.gff", + "md5_checksum": "16adb9f2174f148d3fd05185d4dea71e", + "id": "nmdc:16adb9f2174f148d3fd05185d4dea71e", + "file_size_bytes": 3257964 + }, + { + "name": "Gp0119860_KO TSV file", + "description": "KO TSV file for Gp0119860", + "data_object_type": "Annotation KEGG Orthology", + "url": "https://data.microbiomedata.org/data/nmdc:mga072kj20/annotation/nmdc_mga072kj20_ko.tsv", + "md5_checksum": "8f01685588833485696ff22cf9ffb172", + "id": "nmdc:8f01685588833485696ff22cf9ffb172", + "file_size_bytes": 458148 + }, + { + "name": "Gp0119860_EC TSV file", + "description": "EC TSV file for Gp0119860", + "data_object_type": "Annotation Enzyme Commission", + "url": "https://data.microbiomedata.org/data/nmdc:mga072kj20/annotation/nmdc_mga072kj20_ec.tsv", + "md5_checksum": "f1014b4332d7049c61b1b1b93f39771f", + "id": "nmdc:f1014b4332d7049c61b1b1b93f39771f", + "file_size_bytes": 267914 + }, + { + "name": "Gp0119860_COG GFF file", + "description": "COG GFF file for Gp0119860", + "url": "https://data.microbiomedata.org/data/nmdc:mga072kj20/annotation/nmdc_mga072kj20_cog.gff", + "md5_checksum": "ac539889f559ad8392c48a420ca305bb", + "id": "nmdc:ac539889f559ad8392c48a420ca305bb", + "file_size_bytes": 2294567 + }, + { + "name": "Gp0119860_PFAM GFF file", + "description": "PFAM GFF file for Gp0119860", + "url": "https://data.microbiomedata.org/data/nmdc:mga072kj20/annotation/nmdc_mga072kj20_pfam.gff", + "md5_checksum": "04ef3b2fb029fed22149b1bbe630ec12", + "id": "nmdc:04ef3b2fb029fed22149b1bbe630ec12", + "file_size_bytes": 2446101 + }, + { + "name": "Gp0119860_TigrFam GFF file", + "description": "TigrFam GFF file for Gp0119860", + "url": "https://data.microbiomedata.org/data/nmdc:mga072kj20/annotation/nmdc_mga072kj20_tigrfam.gff", + "md5_checksum": "d5c065c0d417c6e16b613f9d6a5df1f7", + "id": "nmdc:d5c065c0d417c6e16b613f9d6a5df1f7", + "file_size_bytes": 597127 + }, + { + "name": "Gp0119860_SMART GFF file", + "description": "SMART GFF file for Gp0119860", + "url": "https://data.microbiomedata.org/data/nmdc:mga072kj20/annotation/nmdc_mga072kj20_smart.gff", + "md5_checksum": "0e55c05f3c402081071cbbc8af5c62e3", + "id": "nmdc:0e55c05f3c402081071cbbc8af5c62e3", + "file_size_bytes": 813347 + }, + { + "name": "Gp0119860_SuperFam GFF file", + "description": "SuperFam GFF file for Gp0119860", + "url": "https://data.microbiomedata.org/data/nmdc:mga072kj20/annotation/nmdc_mga072kj20_supfam.gff", + "md5_checksum": "2fe815eaf3a2d03ceaa7e37bf6fd3c54", + "id": "nmdc:2fe815eaf3a2d03ceaa7e37bf6fd3c54", + "file_size_bytes": 3360034 + }, + { + "name": "Gp0119860_Cath FunFam GFF file", + "description": "Cath FunFam GFF file for Gp0119860", + "url": "https://data.microbiomedata.org/data/nmdc:mga072kj20/annotation/nmdc_mga072kj20_cath_funfam.gff", + "md5_checksum": "16d787b8a1f72d093f93f68eeced5209", + "id": "nmdc:16d787b8a1f72d093f93f68eeced5209", + "file_size_bytes": 3239060 + }, + { + "name": "Gp0119860_KO_EC GFF file", + "description": "KO_EC GFF file for Gp0119860", + "url": "https://data.microbiomedata.org/data/nmdc:mga072kj20/annotation/nmdc_mga072kj20_ko_ec.gff", + "md5_checksum": "cc618677e541b73b195c304718ce4669", + "id": "nmdc:cc618677e541b73b195c304718ce4669", + "file_size_bytes": 1617336 + }, + { + "name": "gold:Gp0452679_metabat2 bin checkm quality assessment result", + "description": "metabat2 bin checkm quality assessment result for gold:Gp0452679", + "data_object_type": "CheckM Statistics", + "url": "https://data.microbiomedata.org/data/nmdc:mga0fsjy84/MAGs/nmdc_mga0fsjy84_checkm_qa.out", + "md5_checksum": "d41d8cd98f00b204e9800998ecf8427e", + "id": "nmdc:d41d8cd98f00b204e9800998ecf8427e", + "file_size_bytes": 0 + }, + { + "name": "Gp0119860_tooShort (< 3kb) filtered contigs fasta file by metaBat2", + "description": "tooShort (< 3kb) filtered contigs fasta file by metaBat2 for Gp0119860", + "url": "https://data.microbiomedata.org/data/nmdc:mga072kj20/MAGs/nmdc_mga072kj20_bins.tooShort.fa", + "md5_checksum": "5836dbcde1807ef21aa88be550c32671", + "id": "nmdc:5836dbcde1807ef21aa88be550c32671", + "file_size_bytes": 756051 + }, + { + "name": "Gp0119860_unbinned fasta file from metabat2", + "description": "unbinned fasta file from metabat2 for Gp0119860", + "url": "https://data.microbiomedata.org/data/nmdc:mga072kj20/MAGs/nmdc_mga072kj20_bins.unbinned.fa", + "md5_checksum": "c4cd3018e7a5626fdbac8c51620f1cbf", + "id": "nmdc:c4cd3018e7a5626fdbac8c51620f1cbf", + "file_size_bytes": 4075752 + }, + { + "name": "Gp0119860_metabat2 bin checkm quality assessment result", + "description": "metabat2 bin checkm quality assessment result for Gp0119860", + "data_object_type": "CheckM Statistics", + "url": "https://data.microbiomedata.org/data/nmdc:mga072kj20/MAGs/nmdc_mga072kj20_checkm_qa.out", + "md5_checksum": "045a9e8261603605c684b0f17831fb5e", + "id": "nmdc:045a9e8261603605c684b0f17831fb5e", + "file_size_bytes": 1106 + }, + { + "name": "Gp0119860_high-quality and medium-quality bins", + "description": "high-quality and medium-quality bins for Gp0119860", + "data_object_type": "Metagenome Bins", + "url": "https://data.microbiomedata.org/data/nmdc:mga072kj20/MAGs/nmdc_mga072kj20_hqmq_bin.zip", + "md5_checksum": "76745fd157e3c127b281d36da839685c", + "id": "nmdc:76745fd157e3c127b281d36da839685c", + "file_size_bytes": 1281316 + }, + { + "name": "Gp0119860_metabat2 bins", + "description": "metabat2 bins for Gp0119860", + "url": "https://data.microbiomedata.org/data/nmdc:mga072kj20/MAGs/nmdc_mga072kj20_metabat_bin.zip", + "md5_checksum": "2a4751b981eecc7cf15dd923bc920a15", + "id": "nmdc:2a4751b981eecc7cf15dd923bc920a15", + "file_size_bytes": 87460 + }, + { + "_id": { + "$oid": "649b003d1ae706d7b5b14e07" + }, + "description": "Assembled AGP file for gold:Gp0119860", + "url": "https://data.microbiomedata.org/data/1777_95829/assembly/assembly.agp", + "file_size_bytes": 169688, + "type": "nmdc:DataObject", + "id": "nmdc:56655b3db50d0723a92b42b255915d7d", + "name": "assembly.agp", + "data_object_type": "Assembly AGP" + }, + { + "_id": { + "$oid": "649b003d1ae706d7b5b14e09" + }, + "description": "Assembled scaffold fasta for gold:Gp0119860", + "url": "https://data.microbiomedata.org/data/1777_95829/assembly/assembly_scaffolds.fna", + "file_size_bytes": 9356686, + "type": "nmdc:DataObject", + "id": "nmdc:334b666ac13e8cea54579594889a62e2", + "name": "assembly_scaffolds.fna", + "data_object_type": "Assembly Scaffolds" + }, + { + "_id": { + "$oid": "649b003d1ae706d7b5b14e0b" + }, + "description": "Metagenome Alignment BAM file for gold:Gp0119860", + "url": "https://data.microbiomedata.org/data/1777_95829/assembly/pairedMapped_sorted.bam", + "file_size_bytes": 4930027649, + "type": "nmdc:DataObject", + "id": "nmdc:6db44a020ca36b6256d5a2b206f8eb00", + "name": "pairedMapped_sorted.bam", + "data_object_type": "Assembly Coverage BAM" + }, + { + "_id": { + "$oid": "649b003d1ae706d7b5b14e0c" + }, + "description": "Assembled contigs fasta for gold:Gp0119860", + "url": "https://data.microbiomedata.org/data/1777_95829/assembly/assembly_contigs.fna", + "file_size_bytes": 9361628, + "type": "nmdc:DataObject", + "id": "nmdc:fdc028609bab950eb4289953fbe03aa5", + "name": "assembly_contigs.fna", + "data_object_type": "Assembly Contigs" + }, + { + "_id": { + "$oid": "649b003d1ae706d7b5b14e13" + }, + "description": "Metagenome Contig Coverage Stats for gold:Gp0119860", + "url": "https://data.microbiomedata.org/data/1777_95829/assembly/mapping_stats.txt", + "file_size_bytes": 217685, + "type": "nmdc:DataObject", + "id": "nmdc:50f1941e82e1d729dfdea7396db90ec0", + "name": "mapping_stats.txt", + "data_object_type": "Assembly Coverage Stats" + }, + { + "_id": { + "$oid": "649b003d1ae706d7b5b15a76" + }, + "id": "nmdc:b9c05acc1029c5ed761a660a145e45c2", + "name": "1777_95829.krona.html", + "description": "Gold:Gp0119860 KRONA plot HTML file", + "url": "https://data.microbiomedata.org/1777_95829/ReadbasedAnalysis/centrifuge/1777_95829.krona.html", + "file_size_bytes": 3385, + "data_object_type": "Centrifuge Krona Plot", + "type": "nmdc:DataObject" + }, + { + "_id": { + "$oid": "649b003d1ae706d7b5b15a7f" + }, + "id": "nmdc:40369877f0186d25041a34076246e420", + "name": "1777_95829.json", + "description": "Gold:Gp0119860 ReadbasedAnalysis result JSON file", + "url": "https://data.microbiomedata.org/1777_95829/ReadbasedAnalysis/1777_95829.json", + "file_size_bytes": 3385, + "type": "nmdc:DataObject" + }, + { + "_id": { + "$oid": "649b003f1ae706d7b5b16383" + }, + "id": "nmdc:dba3367577240848012fe5d66fda1d70", + "name": "bins.tooShort.fa", + "description": "tooShort (< 3kb) filtered contigs fasta file by metaBat2 for gold:Gp0119860", + "file_size_bytes": 734410, + "url": "https://data.microbiomedata.org/data/1777_95829/img_MAGs/bins.tooShort.fa", + "type": "nmdc:DataObject" + }, + { + "_id": { + "$oid": "649b003f1ae706d7b5b16385" + }, + "id": "nmdc:e366b6fbc0ac06b16bf3fddf7bd7e57f", + "name": "gold:Gp0119860.bins.2.fa", + "description": "metabat2 binned contig file for gold:Gp0119860", + "file_size_bytes": 1258730, + "url": "https://data.microbiomedata.org/data/1777_95829/img_MAGs/metabat-bins/bins.2.fa", + "type": "nmdc:DataObject", + "data_object_type": "Metagenome Bins" + }, + { + "_id": { + "$oid": "649b003f1ae706d7b5b16386" + }, + "id": "nmdc:db89859ff61788cf06e776a504ec5817", + "name": "bins.unbinned.fa", + "description": "unbinned fasta file from metabat2 for gold:Gp0119860", + "file_size_bytes": 6955983, + "url": "https://data.microbiomedata.org/data/1777_95829/img_MAGs/bins.unbinned.fa", + "type": "nmdc:DataObject" + }, + { + "_id": { + "$oid": "649b003f1ae706d7b5b16389" + }, + "id": "nmdc:33be0e2d5cd3467ffb373d04b87559b3", + "name": "checkm_qa.out", + "description": "metabat2 bin checkm quality assessment result for gold:Gp0119860", + "file_size_bytes": 930, + "url": "https://data.microbiomedata.org/data/1777_95829/img_MAGs/checkm_qa.out", + "type": "nmdc:DataObject", + "data_object_type": "CheckM Statistics" + }, + { + "_id": { + "$oid": "649b003f1ae706d7b5b1638a" + }, + "id": "nmdc:823ea558b3b245e69f36c017260c517a", + "name": "gold:Gp0119860.bins.1.fa", + "description": "metabat2 binned contig file for gold:Gp0119860", + "file_size_bytes": 391963, + "url": "https://data.microbiomedata.org/data/1777_95829/img_MAGs/metabat-bins/bins.1.fa", + "type": "nmdc:DataObject", + "data_object_type": "Metagenome Bins" + }, + { + "_id": { + "$oid": "649b003f1ae706d7b5b16c7f" + }, + "description": "KO TSV File for gold:Gp0119860", + "url": "https://data.microbiomedata.org/1777_95829/img_annotation/Ga0482155_ko.tsv", + "md5_checksum": "7137d225d4e84d63175b89e1f74b7728", + "file_size_bytes": 3385, + "id": "nmdc:7137d225d4e84d63175b89e1f74b7728", + "name": "gold:Gp0119860_KO TSV File", + "data_object_type": "Annotation KEGG Orthology", + "type": "nmdc:DataObject" + }, + { + "_id": { + "$oid": "649b003f1ae706d7b5b16c80" + }, + "description": "Functional annotation GFF file for gold:Gp0119860", + "url": "https://data.microbiomedata.org/1777_95829/img_annotation/Ga0482155_functional_annotation.gff", + "md5_checksum": "2fa5357b6e025470478224cc8c4d8443", + "file_size_bytes": 3385, + "id": "nmdc:2fa5357b6e025470478224cc8c4d8443", + "name": "gold:Gp0119860_Functional annotation GFF file", + "data_object_type": "Functional Annotation GFF", + "type": "nmdc:DataObject" + }, + { + "_id": { + "$oid": "649b003f1ae706d7b5b16c81" + }, + "description": "EC TSV File for gold:Gp0119860", + "url": "https://data.microbiomedata.org/1777_95829/img_annotation/Ga0482155_ec.tsv", + "md5_checksum": "cf2282967219cfc4462f91840aae4126", + "file_size_bytes": 3385, + "id": "nmdc:cf2282967219cfc4462f91840aae4126", + "name": "gold:Gp0119860_EC TSV File", + "data_object_type": "Annotation Enzyme Commission", + "type": "nmdc:DataObject" + }, + { + "_id": { + "$oid": "649b003f1ae706d7b5b16c84" + }, + "description": "Protein FAA for gold:Gp0119860", + "url": "https://data.microbiomedata.org/1777_95829/img_annotation/Ga0482155_proteins.faa", + "md5_checksum": "613152955d79f4344828738fb898d679", + "file_size_bytes": 3385, + "id": "nmdc:613152955d79f4344828738fb898d679", + "name": "gold:Gp0119860_Protein FAA", + "data_object_type": "Annotation Amino Acid FASTA", + "type": "nmdc:DataObject" + }, + { + "_id": { + "$oid": "649b003f1ae706d7b5b16c85" + }, + "description": "Structural annotation GFF file for gold:Gp0119860", + "url": "https://data.microbiomedata.org/1777_95829/img_annotation/Ga0482155_structural_annotation.gff", + "md5_checksum": "959ef4ff2d105052b094138aa09b5c94", + "file_size_bytes": 3385, + "id": "nmdc:959ef4ff2d105052b094138aa09b5c94", + "name": "gold:Gp0119860_Structural annotation GFF file", + "data_object_type": "Structural Annotation GFF", + "type": "nmdc:DataObject" + } + ], + "mags_activity_set": [ + { + "_id": { + "$oid": "649b0052ec087f6bbab346f6" + }, + "has_input": [ + "nmdc:4a7606943c840d2932d95785e1ac98b6", + "nmdc:625b5b8c24e30e6712ff45bf3caed868", + "nmdc:16adb9f2174f148d3fd05185d4dea71e" + ], + "too_short_contig_num": 1665, + "part_of": [ + "nmdc:mga072kj20" + ], + "binned_contig_num": 239, + "git_url": "https://github.com/microbiomedata/metaG/releases/tag/0.1", + "has_output": [ + "nmdc:d41d8cd98f00b204e9800998ecf8427e", + "nmdc:5836dbcde1807ef21aa88be550c32671", + "nmdc:c4cd3018e7a5626fdbac8c51620f1cbf", + "nmdc:045a9e8261603605c684b0f17831fb5e", + "nmdc:76745fd157e3c127b281d36da839685c", + "nmdc:2a4751b981eecc7cf15dd923bc920a15" + ], + "was_informed_by": "gold:Gp0119860", + "input_contig_num": 2715, + "id": "nmdc:3e61df1bd053b23abbda1322dd07af71", + "execution_resource": "NERSC-Cori", + "name": "MAGs Analysis Activity for nmdc:mga072kj20", + "mags_list": [ + { + "number_of_contig": 21, + "completeness": 0.0, + "bin_name": "bins.1", + "gene_count": 300, + "bin_quality": "LQ", + "gtdbtk_species": "", + "gtdbtk_order": "", + "num_16s": 0, + "gtdbtk_family": "", + "gtdbtk_domain": "", + "contamination": 0.0, + "gtdbtk_class": "", + "gtdbtk_phylum": "", + "num_5s": 0, + "num_23s": 0, + "gtdbtk_genus": "", + "num_t_rna": 0 + }, + { + "number_of_contig": 105, + "completeness": 94.83, + "bin_name": "bins.2", + "gene_count": 2426, + "bin_quality": "HQ", + "gtdbtk_species": "Frackibacter sp900114655", + "gtdbtk_order": "Halobacteroidales", + "num_16s": 1, + "gtdbtk_family": "Acetohalobiaceae", + "gtdbtk_domain": "Bacteria", + "contamination": 0.0, + "gtdbtk_class": "Halanaerobiia", + "gtdbtk_phylum": "Firmicutes_F", + "num_5s": 2, + "num_23s": 1, + "gtdbtk_genus": "Frackibacter", + "num_t_rna": 52 + }, + { + "number_of_contig": 113, + "completeness": 88.7, + "bin_name": "bins.3", + "gene_count": 1963, + "bin_quality": "MQ", + "gtdbtk_species": "Halanaerobium congolense", + "gtdbtk_order": "Halanaerobiales", + "num_16s": 0, + "gtdbtk_family": "Halanaerobiaceae", + "gtdbtk_domain": "Bacteria", + "contamination": 0.94, + "gtdbtk_class": "Halanaerobiia", + "gtdbtk_phylum": "Firmicutes_F", + "num_5s": 2, + "num_23s": 0, + "gtdbtk_genus": "Halanaerobium", + "num_t_rna": 28 + } + ], + "unbinned_contig_num": 811, + "started_at_time": "2021-10-11T02:23:25Z", + "type": "nmdc:MAGsAnalysisActivity", + "ended_at_time": "2021-10-11T04:31:01+00:00" + } + ], + "metagenome_annotation_activity_set": [ + { + "_id": { + "$oid": "649b005bbf2caae0415ef99f" + }, + "has_input": [ + "nmdc:4a7606943c840d2932d95785e1ac98b6" + ], + "part_of": [ + "nmdc:mga072kj20" + ], + "git_url": "https://github.com/microbiomedata/metaG/releases/tag/0.1", + "has_output": [ + "nmdc:9c9518338544e4733e6c0797a83a4987", + "nmdc:fea164c4df1c3d9b6b825ab25be80e9c", + "nmdc:16adb9f2174f148d3fd05185d4dea71e", + "nmdc:8f01685588833485696ff22cf9ffb172", + "nmdc:f1014b4332d7049c61b1b1b93f39771f", + "nmdc:ac539889f559ad8392c48a420ca305bb", + "nmdc:04ef3b2fb029fed22149b1bbe630ec12", + "nmdc:d5c065c0d417c6e16b613f9d6a5df1f7", + "nmdc:0e55c05f3c402081071cbbc8af5c62e3", + "nmdc:2fe815eaf3a2d03ceaa7e37bf6fd3c54", + "nmdc:16d787b8a1f72d093f93f68eeced5209", + "nmdc:cc618677e541b73b195c304718ce4669" + ], + "was_informed_by": "gold:Gp0119860", + "id": "nmdc:3e61df1bd053b23abbda1322dd07af71", + "execution_resource": "NERSC-Cori", + "name": "Annotation Activity for nmdc:mga072kj20", + "started_at_time": "2021-10-11T02:23:25Z", + "type": "nmdc:MetagenomeAnnotationActivity", + "ended_at_time": "2021-10-11T04:31:01+00:00" + } + ], + "metagenome_assembly_set": [ + { + "_id": { + "$oid": "649b005f2ca5ee4adb139f88" + }, + "has_input": [ + "nmdc:72bca8012b3b44cabec9cd3cf8a8b1aa" + ], + "part_of": [ + "nmdc:mga072kj20" + ], + "ctg_logsum": 117522, + "scaf_logsum": 118360, + "gap_pct": 0.04416, + "git_url": "https://github.com/microbiomedata/metaG/releases/tag/0.1", + "has_output": [ + "nmdc:4a7606943c840d2932d95785e1ac98b6", + "nmdc:f8e831027ac09d74844afdbea2d16616", + "nmdc:5c3431c6d6b7c1dbd796d7de934927f8", + "nmdc:5368525b5a4829523cd3656804eb4df6", + "nmdc:625b5b8c24e30e6712ff45bf3caed868" + ], + "asm_score": 24.711, + "was_informed_by": "gold:Gp0119860", + "ctg_powsum": 17327, + "scaf_max": 87430, + "id": "nmdc:3e61df1bd053b23abbda1322dd07af71", + "scaf_powsum": 17612, + "execution_resource": "NERSC-Cori", + "contigs": 2716, + "name": "Assembly Activity for nmdc:mga072kj20", + "ctg_max": 87430, + "gc_std": 0.04766, + "contig_bp": 9143969, + "gc_avg": 0.32837, + "started_at_time": "2021-10-11T02:23:25Z", + "scaf_bp": 9148009, + "type": "nmdc:MetagenomeAssembly", + "scaffolds": 2663, + "ended_at_time": "2021-10-11T04:31:01+00:00", + "ctg_l50": 16264, + "ctg_l90": 1449, + "ctg_n50": 143, + "ctg_n90": 870, + "scaf_l50": 18042, + "scaf_l90": 1480, + "scaf_n50": 134, + "scaf_n90": 824, + "scaf_l_gt50k": 1281258, + "scaf_n_gt50k": 20, + "scaf_pct_gt50k": 14.005867 + } + ], + "omics_processing_set": [ + { + "_id": { + "$oid": "649b009773e8249959349b6c" + }, + "id": "nmdc:omprc-11-bpf8k271", + "name": "Deep subsurface shale carbon reservoir microbial communities from Ohio, USA - Utica-2 Time Series 2014_12_17", + "description": "Microbial controls on biogeochemical cycling in deep subsurface shale carbon reservoirs", + "has_input": [ + "nmdc:bsm-11-9c105189" + ], + "has_output": [ + "jgi:560df5b10d878540fd6fe1f4" + ], + "part_of": [ + "nmdc:sty-11-8fb6t785" + ], + "add_date": "2015-08-15", + "mod_date": "2020-04-05", + "ncbi_project_name": "Deep subsurface shale carbon reservoir microbial communities from Ohio, USA - Utica-2 Time Series 2014_12_17", + "omics_type": { + "has_raw_value": "Metagenome" + }, + "principal_investigator": { + "has_raw_value": "Kelly Wrighton" + }, + "processing_institution": "JGI", + "type": "nmdc:OmicsProcessing", + "gold_sequencing_project_identifiers": [ + "gold:Gp0119860" + ] + } + ], + "read_qc_analysis_activity_set": [ + { + "_id": { + "$oid": "649b009d6bdd4fd20273c852" + }, + "has_input": [ + "nmdc:ac149f7f9a73158498845d9bc1bf2771" + ], + "part_of": [ + "nmdc:mga072kj20" + ], + "git_url": "https://github.com/microbiomedata/metaG/releases/tag/0.1", + "has_output": [ + "nmdc:72bca8012b3b44cabec9cd3cf8a8b1aa", + "nmdc:4259cedf8236bb79bd22c9981bd178d3" + ], + "was_informed_by": "gold:Gp0119860", + "input_read_count": 71431368, + "output_read_bases": 10371997760, + "id": "nmdc:3e61df1bd053b23abbda1322dd07af71", + "execution_resource": "NERSC-Cori", + "input_read_bases": 10786136568, + "name": "Read QC Activity for nmdc:mga072kj20", + "output_read_count": 70952846, + "started_at_time": "2021-10-11T02:23:25Z", + "type": "nmdc:ReadQCAnalysisActivity", + "ended_at_time": "2021-10-11T04:31:01+00:00" + } + ], + "read_based_taxonomy_analysis_activity_set": [ + { + "_id": { + "$oid": "649b009bff710ae353f8cf11" + }, + "has_input": [ + "nmdc:72bca8012b3b44cabec9cd3cf8a8b1aa" + ], + "git_url": "https://github.com/microbiomedata/metaG/releases/tag/0.1", + "has_output": [ + "nmdc:30d55843684f4668e0b0a25b7766e07d", + "nmdc:7272381b06bd0cdf751e8f59e6a2c4f3", + "nmdc:f68887321b67024a212d42248d458500", + "nmdc:541588f6e257f8857c3be04bf30c4764", + "nmdc:12bf780ce6ac1a4b7719a22ce13fd94d", + "nmdc:c47a32bfb71522ab04146c82957963fc", + "nmdc:be495745508eb65e3176301a7685e4e0", + "nmdc:e49a05db8b7c3371fb1031361fc2582b", + "nmdc:a7148eb3e520bf6b53e2750d721368a8" + ], + "was_informed_by": "gold:Gp0119860", + "id": "nmdc:3e61df1bd053b23abbda1322dd07af71", + "execution_resource": "NERSC-Cori", + "name": "ReadBased Analysis Activity for nmdc:mga072kj20", + "started_at_time": "2021-10-11T02:23:25Z", + "type": "nmdc:ReadbasedAnalysis", + "ended_at_time": "2021-10-11T04:31:01+00:00" + } + ] + }, + { + "data_object_set": [ + { + "description": "Raw sequencer read data", + "file_size_bytes": 6882960053, + "type": "nmdc:DataObject", + "id": "jgi:560df3740d878540fd6fe1d0", + "name": "9567.6.137555.CGTACTA-TATCCTC.fastq.gz" + }, + { + "name": "Gp0119852_Filtered Reads", + "description": "Filtered Reads for Gp0119852", + "data_object_type": "Filtered Sequencing Reads", + "url": "https://data.microbiomedata.org/data/nmdc:mga081c477/qa/nmdc_mga081c477_filtered.fastq.gz", + "md5_checksum": "ae51b88d02bdf8eacd5961858c411176", + "id": "nmdc:ae51b88d02bdf8eacd5961858c411176", + "file_size_bytes": 3441438302 + }, + { + "name": "Gp0119852_Filtered Stats", + "description": "Filtered Stats for Gp0119852", + "data_object_type": "QC Statistics", + "url": "https://data.microbiomedata.org/data/nmdc:mga081c477/qa/nmdc_mga081c477_filterStats.txt", + "md5_checksum": "94385668ec580d98db6f78f591ef9141", + "id": "nmdc:94385668ec580d98db6f78f591ef9141", + "file_size_bytes": 285 + }, + { + "name": "Gp0119852_Gottcha2 TSV report", + "description": "Gottcha2 TSV report for Gp0119852", + "url": "https://data.microbiomedata.org/data/nmdc:mga081c477/ReadbasedAnalysis/nmdc_mga081c477_gottcha2_report.tsv", + "md5_checksum": "20a38327a96fff90ddb96f32f53658b3", + "id": "nmdc:20a38327a96fff90ddb96f32f53658b3", + "file_size_bytes": 4907 + }, + { + "name": "Gp0119852_Gottcha2 full TSV report", + "description": "Gottcha2 full TSV report for Gp0119852", + "url": "https://data.microbiomedata.org/data/nmdc:mga081c477/ReadbasedAnalysis/nmdc_mga081c477_gottcha2_report_full.tsv", + "md5_checksum": "e80871c194c85d0210c94481a3f874fd", + "id": "nmdc:e80871c194c85d0210c94481a3f874fd", + "file_size_bytes": 102340 + }, + { + "name": "Gp0119852_Gottcha2 Krona HTML report", + "description": "Gottcha2 Krona HTML report for Gp0119852", + "data_object_type": "GOTTCHA2 Krona Plot", + "url": "https://data.microbiomedata.org/data/nmdc:mga081c477/ReadbasedAnalysis/nmdc_mga081c477_gottcha2_krona.html", + "md5_checksum": "dc8364af5aa36e2fc309a208346cd5cd", + "id": "nmdc:dc8364af5aa36e2fc309a208346cd5cd", + "file_size_bytes": 241138 + }, + { + "name": "Gp0119852_Centrifuge classification TSV report", + "description": "Centrifuge classification TSV report for Gp0119852", + "data_object_type": "Centrifuge Taxonomic Classification", + "url": "https://data.microbiomedata.org/data/nmdc:mga081c477/ReadbasedAnalysis/nmdc_mga081c477_centrifuge_classification.tsv", + "md5_checksum": "b40cccb0bc80e6e119b0e89d1c7cf4fb", + "id": "nmdc:b40cccb0bc80e6e119b0e89d1c7cf4fb", + "file_size_bytes": 6882530918 + }, + { + "name": "Gp0119852_Centrifuge TSV report", + "description": "Centrifuge TSV report for Gp0119852", + "data_object_type": "Centrifuge Classification Report", + "url": "https://data.microbiomedata.org/data/nmdc:mga081c477/ReadbasedAnalysis/nmdc_mga081c477_centrifuge_report.tsv", + "md5_checksum": "0be4ba0cb2326b14fd2308bc467d1596", + "id": "nmdc:0be4ba0cb2326b14fd2308bc467d1596", + "file_size_bytes": 216649 + }, + { + "name": "Gp0119852_Centrifuge Krona HTML report", + "description": "Centrifuge Krona HTML report for Gp0119852", + "data_object_type": "Centrifuge Krona Plot", + "url": "https://data.microbiomedata.org/data/nmdc:mga081c477/ReadbasedAnalysis/nmdc_mga081c477_centrifuge_krona.html", + "md5_checksum": "e9d8ed8061a83f545b6ebff8e7ab1fad", + "id": "nmdc:e9d8ed8061a83f545b6ebff8e7ab1fad", + "file_size_bytes": 2145475 + }, + { + "name": "Gp0119852_Kraken classification TSV report", + "description": "Kraken classification TSV report for Gp0119852", + "data_object_type": "Kraken2 Taxonomic Classification", + "url": "https://data.microbiomedata.org/data/nmdc:mga081c477/ReadbasedAnalysis/nmdc_mga081c477_kraken2_classification.tsv", + "md5_checksum": "8fe98fea4f226e43945f4995d763a5d5", + "id": "nmdc:8fe98fea4f226e43945f4995d763a5d5", + "file_size_bytes": 4308751340 + }, + { + "name": "Gp0119852_Kraken2 TSV report", + "description": "Kraken2 TSV report for Gp0119852", + "data_object_type": "Kraken2 Classification Report", + "url": "https://data.microbiomedata.org/data/nmdc:mga081c477/ReadbasedAnalysis/nmdc_mga081c477_kraken2_report.tsv", + "md5_checksum": "ba91c04e83f013145e15d8c3f01dccad", + "id": "nmdc:ba91c04e83f013145e15d8c3f01dccad", + "file_size_bytes": 444119 + }, + { + "name": "Gp0119852_Kraken2 Krona HTML report", + "description": "Kraken2 Krona HTML report for Gp0119852", + "data_object_type": "Kraken2 Krona Plot", + "url": "https://data.microbiomedata.org/data/nmdc:mga081c477/ReadbasedAnalysis/nmdc_mga081c477_kraken2_krona.html", + "md5_checksum": "dc0fb7a4abc36d6a569551aabed37c34", + "id": "nmdc:dc0fb7a4abc36d6a569551aabed37c34", + "file_size_bytes": 2912821 + }, + { + "name": "Gp0119852_Assembled contigs fasta", + "description": "Assembled contigs fasta for Gp0119852", + "data_object_type": "Assembly Contigs", + "url": "https://data.microbiomedata.org/data/nmdc:mga081c477/assembly/nmdc_mga081c477_contigs.fna", + "md5_checksum": "b919fff43d3280141610fb99fd5d577c", + "id": "nmdc:b919fff43d3280141610fb99fd5d577c", + "file_size_bytes": 9849522 + }, + { + "name": "Gp0119852_Assembled scaffold fasta", + "description": "Assembled scaffold fasta for Gp0119852", + "data_object_type": "Assembly Scaffolds", + "url": "https://data.microbiomedata.org/data/nmdc:mga081c477/assembly/nmdc_mga081c477_scaffolds.fna", + "md5_checksum": "0780e51675a17d7df84c4177e99cd6d1", + "id": "nmdc:0780e51675a17d7df84c4177e99cd6d1", + "file_size_bytes": 9817634 + }, + { + "name": "Gp0119852_Metagenome Contig Coverage Stats", + "description": "Metagenome Contig Coverage Stats for Gp0119852", + "url": "https://data.microbiomedata.org/data/nmdc:mga081c477/assembly/nmdc_mga081c477_covstats.txt", + "md5_checksum": "4149e119c8484f6b020224cf2aa51d4e", + "id": "nmdc:4149e119c8484f6b020224cf2aa51d4e", + "file_size_bytes": 815280 + }, + { + "name": "Gp0119852_Assembled AGP file", + "description": "Assembled AGP file for Gp0119852", + "data_object_type": "Assembly AGP", + "url": "https://data.microbiomedata.org/data/nmdc:mga081c477/assembly/nmdc_mga081c477_assembly.agp", + "md5_checksum": "e4874e4aee515d116494c29489901a5a", + "id": "nmdc:e4874e4aee515d116494c29489901a5a", + "file_size_bytes": 689032 + }, + { + "name": "Gp0119852_Metagenome Alignment BAM file", + "description": "Metagenome Alignment BAM file for Gp0119852", + "data_object_type": "Assembly Coverage BAM", + "url": "https://data.microbiomedata.org/data/nmdc:mga081c477/assembly/nmdc_mga081c477_pairedMapped_sorted.bam", + "md5_checksum": "5cf78b3447accc51b7cb41e767e46365", + "id": "nmdc:5cf78b3447accc51b7cb41e767e46365", + "file_size_bytes": 4185264360 + }, + { + "name": "Gp0119852_Protein FAA", + "description": "Protein FAA for Gp0119852", + "data_object_type": "Annotation Amino Acid FASTA", + "url": "https://data.microbiomedata.org/data/nmdc:mga081c477/annotation/nmdc_mga081c477_proteins.faa", + "md5_checksum": "498953d33517eb0e27fd781175012af3", + "id": "nmdc:498953d33517eb0e27fd781175012af3", + "file_size_bytes": 4917796 + }, + { + "name": "Gp0119852_Structural annotation GFF file", + "description": "Structural annotation GFF file for Gp0119852", + "data_object_type": "Structural Annotation GFF", + "url": "https://data.microbiomedata.org/data/nmdc:mga081c477/annotation/nmdc_mga081c477_structural_annotation.gff", + "md5_checksum": "e7e8bc388bf6dbafd8db3baa2aa5d13a", + "id": "nmdc:e7e8bc388bf6dbafd8db3baa2aa5d13a", + "file_size_bytes": 2493 + }, + { + "name": "Gp0119852_Functional annotation GFF file", + "description": "Functional annotation GFF file for Gp0119852", + "data_object_type": "Functional Annotation GFF", + "url": "https://data.microbiomedata.org/data/nmdc:mga081c477/annotation/nmdc_mga081c477_functional_annotation.gff", + "md5_checksum": "ebc73853151574540513397b6938c065", + "id": "nmdc:ebc73853151574540513397b6938c065", + "file_size_bytes": 4843386 + }, + { + "name": "Gp0119852_KO TSV file", + "description": "KO TSV file for Gp0119852", + "data_object_type": "Annotation KEGG Orthology", + "url": "https://data.microbiomedata.org/data/nmdc:mga081c477/annotation/nmdc_mga081c477_ko.tsv", + "md5_checksum": "aa429e7a30c60327756ecf7cd1c80156", + "id": "nmdc:aa429e7a30c60327756ecf7cd1c80156", + "file_size_bytes": 818259 + }, + { + "name": "Gp0119852_EC TSV file", + "description": "EC TSV file for Gp0119852", + "data_object_type": "Annotation Enzyme Commission", + "url": "https://data.microbiomedata.org/data/nmdc:mga081c477/annotation/nmdc_mga081c477_ec.tsv", + "md5_checksum": "2b6040d7c5bfe9a94279f278e83509f2", + "id": "nmdc:2b6040d7c5bfe9a94279f278e83509f2", + "file_size_bytes": 499919 + }, + { + "name": "Gp0119852_COG GFF file", + "description": "COG GFF file for Gp0119852", + "url": "https://data.microbiomedata.org/data/nmdc:mga081c477/annotation/nmdc_mga081c477_cog.gff", + "md5_checksum": "88b98920c96b264610ab29d79f38b211", + "id": "nmdc:88b98920c96b264610ab29d79f38b211", + "file_size_bytes": 3485289 + }, + { + "name": "Gp0119852_PFAM GFF file", + "description": "PFAM GFF file for Gp0119852", + "url": "https://data.microbiomedata.org/data/nmdc:mga081c477/annotation/nmdc_mga081c477_pfam.gff", + "md5_checksum": "d6b7a5ba51d1e7d4306772d36d7ffc3b", + "id": "nmdc:d6b7a5ba51d1e7d4306772d36d7ffc3b", + "file_size_bytes": 3060874 + }, + { + "name": "Gp0119852_TigrFam GFF file", + "description": "TigrFam GFF file for Gp0119852", + "url": "https://data.microbiomedata.org/data/nmdc:mga081c477/annotation/nmdc_mga081c477_tigrfam.gff", + "md5_checksum": "c249aba87cb3839ab92032b304804ff5", + "id": "nmdc:c249aba87cb3839ab92032b304804ff5", + "file_size_bytes": 603193 + }, + { + "name": "Gp0119852_SMART GFF file", + "description": "SMART GFF file for Gp0119852", + "url": "https://data.microbiomedata.org/data/nmdc:mga081c477/annotation/nmdc_mga081c477_smart.gff", + "md5_checksum": "8c120dd12278da775a16cb0af24567f4", + "id": "nmdc:8c120dd12278da775a16cb0af24567f4", + "file_size_bytes": 969032 + }, + { + "name": "Gp0119852_SuperFam GFF file", + "description": "SuperFam GFF file for Gp0119852", + "url": "https://data.microbiomedata.org/data/nmdc:mga081c477/annotation/nmdc_mga081c477_supfam.gff", + "md5_checksum": "c0d1600b0a63066027d425c5f539959b", + "id": "nmdc:c0d1600b0a63066027d425c5f539959b", + "file_size_bytes": 4383804 + }, + { + "name": "Gp0119852_Cath FunFam GFF file", + "description": "Cath FunFam GFF file for Gp0119852", + "url": "https://data.microbiomedata.org/data/nmdc:mga081c477/annotation/nmdc_mga081c477_cath_funfam.gff", + "md5_checksum": "37fcc1a29fc9a708f69d3a7723ef58cd", + "id": "nmdc:37fcc1a29fc9a708f69d3a7723ef58cd", + "file_size_bytes": 3975516 + }, + { + "name": "Gp0119852_KO_EC GFF file", + "description": "KO_EC GFF file for Gp0119852", + "url": "https://data.microbiomedata.org/data/nmdc:mga081c477/annotation/nmdc_mga081c477_ko_ec.gff", + "md5_checksum": "b5717b7a28bc536d85cfd7028a6cc545", + "id": "nmdc:b5717b7a28bc536d85cfd7028a6cc545", + "file_size_bytes": 2700029 + }, + { + "name": "Gp0119852_metabat2 bin checkm quality assessment result", + "description": "metabat2 bin checkm quality assessment result for Gp0119852", + "data_object_type": "CheckM Statistics", + "url": "https://data.microbiomedata.org/data/nmdc:mga081c477/MAGs/nmdc_mga081c477_checkm_qa.out", + "md5_checksum": "ef2f9ec82c267f811bb9560742ae73ba", + "id": "nmdc:ef2f9ec82c267f811bb9560742ae73ba", + "file_size_bytes": 930 + }, + { + "name": "Gp0119852_high-quality and medium-quality bins", + "description": "high-quality and medium-quality bins for Gp0119852", + "data_object_type": "Metagenome Bins", + "url": "https://data.microbiomedata.org/data/nmdc:mga081c477/MAGs/nmdc_mga081c477_hqmq_bin.zip", + "md5_checksum": "fa4fa4c09c1f4f9171f74f708ddb120e", + "id": "nmdc:fa4fa4c09c1f4f9171f74f708ddb120e", + "file_size_bytes": 1303210 + }, + { + "_id": { + "$oid": "649b003d1ae706d7b5b14dd2" + }, + "description": "Assembled contigs fasta for gold:Gp0119852", + "url": "https://data.microbiomedata.org/data/1777_95821/assembly/assembly_contigs.fna", + "file_size_bytes": 9802797, + "type": "nmdc:DataObject", + "id": "nmdc:9f3d6c465517c7bca53391ab998dad82", + "name": "assembly_contigs.fna", + "data_object_type": "Assembly Contigs" + }, + { + "_id": { + "$oid": "649b003d1ae706d7b5b14dd3" + }, + "description": "Assembled scaffold fasta for gold:Gp0119852", + "url": "https://data.microbiomedata.org/data/1777_95821/assembly/assembly_scaffolds.fna", + "file_size_bytes": 9772484, + "type": "nmdc:DataObject", + "id": "nmdc:5ab47e88ba7953638b9486d062342726", + "name": "assembly_scaffolds.fna", + "data_object_type": "Assembly Scaffolds" + }, + { + "_id": { + "$oid": "649b003d1ae706d7b5b14dd5" + }, + "description": "Assembled AGP file for gold:Gp0119852", + "url": "https://data.microbiomedata.org/data/1777_95821/assembly/assembly.agp", + "file_size_bytes": 592332, + "type": "nmdc:DataObject", + "id": "nmdc:1f9eab40d99463a3c6fcdb61140d327a", + "name": "assembly.agp", + "data_object_type": "Assembly AGP" + }, + { + "_id": { + "$oid": "649b003d1ae706d7b5b14dd7" + }, + "description": "Metagenome Alignment BAM file for gold:Gp0119852", + "url": "https://data.microbiomedata.org/data/1777_95821/assembly/pairedMapped_sorted.bam", + "file_size_bytes": 4123125208, + "type": "nmdc:DataObject", + "id": "nmdc:7301392759f8433306616d8fda38f509", + "name": "pairedMapped_sorted.bam", + "data_object_type": "Assembly Coverage BAM" + }, + { + "_id": { + "$oid": "649b003d1ae706d7b5b14ddf" + }, + "description": "Metagenome Contig Coverage Stats for gold:Gp0119852", + "url": "https://data.microbiomedata.org/data/1777_95821/assembly/mapping_stats.txt", + "file_size_bytes": 768555, + "type": "nmdc:DataObject", + "id": "nmdc:bf1b73bd5f16ce969115642bf7162da7", + "name": "mapping_stats.txt", + "data_object_type": "Assembly Coverage Stats" + }, + { + "_id": { + "$oid": "649b003d1ae706d7b5b15a2c" + }, + "id": "nmdc:51d84067b24629db20151d89cebdebb3", + "name": "1777_95821.krona.html", + "description": "Gold:Gp0119852 KRONA plot HTML file", + "url": "https://data.microbiomedata.org/1777_95821/ReadbasedAnalysis/centrifuge/1777_95821.krona.html", + "file_size_bytes": 3385, + "data_object_type": "Centrifuge Krona Plot", + "type": "nmdc:DataObject" + }, + { + "_id": { + "$oid": "649b003d1ae706d7b5b15a2f" + }, + "id": "nmdc:d71e60d9e520bfd6e1c362572f2eed10", + "name": "1777_95821.json", + "description": "Gold:Gp0119852 ReadbasedAnalysis result JSON file", + "url": "https://data.microbiomedata.org/1777_95821/ReadbasedAnalysis/1777_95821.json", + "file_size_bytes": 3385, + "type": "nmdc:DataObject" + }, + { + "_id": { + "$oid": "649b003f1ae706d7b5b16321" + }, + "id": "nmdc:9cf748d1aba26e412ef54e0718dce9c2", + "name": "bins.unbinned.fa", + "description": "unbinned fasta file from metabat2 for gold:Gp0119852", + "file_size_bytes": 5490613, + "url": "https://data.microbiomedata.org/data/1777_95821/img_MAGs/bins.unbinned.fa", + "type": "nmdc:DataObject" + }, + { + "_id": { + "$oid": "649b003f1ae706d7b5b16323" + }, + "id": "nmdc:5088ab7c292c38b5f033ae4dbe59d668", + "name": "gold:Gp0119852.bins.2.fa", + "description": "metabat2 binned contig file for gold:Gp0119852", + "file_size_bytes": 244583, + "url": "https://data.microbiomedata.org/data/1777_95821/img_MAGs/metabat-bins/bins.2.fa", + "type": "nmdc:DataObject", + "data_object_type": "Metagenome Bins" + }, + { + "_id": { + "$oid": "649b003f1ae706d7b5b16324" + }, + "id": "nmdc:3ecf713be53d493601972684ad9dc69a", + "name": "gold:Gp0119852.bins.1.fa", + "description": "metabat2 binned contig file for gold:Gp0119852", + "file_size_bytes": 248350, + "url": "https://data.microbiomedata.org/data/1777_95821/img_MAGs/metabat-bins/bins.1.fa", + "type": "nmdc:DataObject", + "data_object_type": "Metagenome Bins" + }, + { + "_id": { + "$oid": "649b003f1ae706d7b5b16328" + }, + "id": "nmdc:3300a199c5f8cccfbca8d8d77b604c51", + "name": "checkm_qa.out", + "description": "metabat2 bin checkm quality assessment result for gold:Gp0119852", + "file_size_bytes": 888, + "url": "https://data.microbiomedata.org/data/1777_95821/img_MAGs/checkm_qa.out", + "type": "nmdc:DataObject", + "data_object_type": "CheckM Statistics" + }, + { + "_id": { + "$oid": "649b003f1ae706d7b5b1632b" + }, + "id": "nmdc:b430b7ce9a99be1e85ef2a96e97a9213", + "name": "bins.tooShort.fa", + "description": "tooShort (< 3kb) filtered contigs fasta file by metaBat2 for gold:Gp0119852", + "file_size_bytes": 3745841, + "url": "https://data.microbiomedata.org/data/1777_95821/img_MAGs/bins.tooShort.fa", + "type": "nmdc:DataObject" + }, + { + "_id": { + "$oid": "649b003f1ae706d7b5b16c4f" + }, + "description": "EC TSV File for gold:Gp0119852", + "url": "https://data.microbiomedata.org/1777_95821/img_annotation/Ga0482163_ec.tsv", + "md5_checksum": "de071cd0e3010778b9f491e846b95179", + "file_size_bytes": 3385, + "id": "nmdc:de071cd0e3010778b9f491e846b95179", + "name": "gold:Gp0119852_EC TSV File", + "data_object_type": "Annotation Enzyme Commission", + "type": "nmdc:DataObject" + }, + { + "_id": { + "$oid": "649b003f1ae706d7b5b16c50" + }, + "description": "KO TSV File for gold:Gp0119852", + "url": "https://data.microbiomedata.org/1777_95821/img_annotation/Ga0482163_ko.tsv", + "md5_checksum": "793aa923c27dd8cc762625f96ff52f80", + "file_size_bytes": 3385, + "id": "nmdc:793aa923c27dd8cc762625f96ff52f80", + "name": "gold:Gp0119852_KO TSV File", + "data_object_type": "Annotation KEGG Orthology", + "type": "nmdc:DataObject" + }, + { + "_id": { + "$oid": "649b003f1ae706d7b5b16c51" + }, + "description": "Protein FAA for gold:Gp0119852", + "url": "https://data.microbiomedata.org/1777_95821/img_annotation/Ga0482163_proteins.faa", + "md5_checksum": "18c7f9fe5340509f00326c70c1815e88", + "file_size_bytes": 3385, + "id": "nmdc:18c7f9fe5340509f00326c70c1815e88", + "name": "gold:Gp0119852_Protein FAA", + "data_object_type": "Annotation Amino Acid FASTA", + "type": "nmdc:DataObject" + }, + { + "_id": { + "$oid": "649b003f1ae706d7b5b16c53" + }, + "description": "Functional annotation GFF file for gold:Gp0119852", + "url": "https://data.microbiomedata.org/1777_95821/img_annotation/Ga0482163_functional_annotation.gff", + "md5_checksum": "678d467ad6c4ad6d7ba2f259585c4acd", + "file_size_bytes": 3385, + "id": "nmdc:678d467ad6c4ad6d7ba2f259585c4acd", + "name": "gold:Gp0119852_Functional annotation GFF file", + "data_object_type": "Functional Annotation GFF", + "type": "nmdc:DataObject" + }, + { + "_id": { + "$oid": "649b003f1ae706d7b5b16c54" + }, + "description": "Structural annotation GFF file for gold:Gp0119852", + "url": "https://data.microbiomedata.org/1777_95821/img_annotation/Ga0482163_structural_annotation.gff", + "md5_checksum": "4b43a24ab20530a72e06a8721b2c09db", + "file_size_bytes": 3385, + "id": "nmdc:4b43a24ab20530a72e06a8721b2c09db", + "name": "gold:Gp0119852_Structural annotation GFF file", + "data_object_type": "Structural Annotation GFF", + "type": "nmdc:DataObject" + } + ], + "mags_activity_set": [ + { + "_id": { + "$oid": "649b0052ec087f6bbab346ee" + }, + "has_input": [ + "nmdc:b919fff43d3280141610fb99fd5d577c", + "nmdc:5cf78b3447accc51b7cb41e767e46365", + "nmdc:ebc73853151574540513397b6938c065" + ], + "too_short_contig_num": 8204, + "part_of": [ + "nmdc:mga081c477" + ], + "binned_contig_num": 319, + "git_url": "https://github.com/microbiomedata/metaG/releases/tag/0.1", + "has_output": [ + "nmdc:ef2f9ec82c267f811bb9560742ae73ba", + "nmdc:fa4fa4c09c1f4f9171f74f708ddb120e" + ], + "was_informed_by": "gold:Gp0119852", + "input_contig_num": 9345, + "id": "nmdc:8eb796d18151cac86f952cadf6db39e3", + "execution_resource": "NERSC-Cori", + "name": "MAGs Analysis Activity for nmdc:mga081c477", + "mags_list": [ + { + "number_of_contig": 220, + "completeness": 81.03, + "bin_name": "bins.1", + "gene_count": 2644, + "bin_quality": "MQ", + "gtdbtk_species": "Halanaerobium sp003070825", + "gtdbtk_order": "Halanaerobiales", + "num_16s": 0, + "gtdbtk_family": "Halanaerobiaceae", + "gtdbtk_domain": "Bacteria", + "contamination": 1.72, + "gtdbtk_class": "Halanaerobiia", + "gtdbtk_phylum": "Firmicutes_F", + "num_5s": 3, + "num_23s": 0, + "gtdbtk_genus": "Halanaerobium", + "num_t_rna": 44 + }, + { + "number_of_contig": 99, + "completeness": 98.21, + "bin_name": "bins.2", + "gene_count": 1881, + "bin_quality": "MQ", + "gtdbtk_species": "Thermotoga petrophila", + "gtdbtk_order": "Thermotogales", + "num_16s": 0, + "gtdbtk_family": "Thermotogaceae", + "gtdbtk_domain": "Bacteria", + "contamination": 1.79, + "gtdbtk_class": "Thermotogae", + "gtdbtk_phylum": "Thermotogota", + "num_5s": 0, + "num_23s": 0, + "gtdbtk_genus": "Thermotoga", + "num_t_rna": 43 + } + ], + "unbinned_contig_num": 822, + "started_at_time": "2021-12-01T21:31:04Z", + "type": "nmdc:MAGsAnalysisActivity", + "ended_at_time": "2021-12-02T21:04:22+00:00" + } + ], + "metagenome_annotation_activity_set": [ + { + "_id": { + "$oid": "649b005bbf2caae0415ef993" + }, + "has_input": [ + "nmdc:b919fff43d3280141610fb99fd5d577c" + ], + "part_of": [ + "nmdc:mga081c477" + ], + "git_url": "https://github.com/microbiomedata/metaG/releases/tag/0.1", + "has_output": [ + "nmdc:498953d33517eb0e27fd781175012af3", + "nmdc:e7e8bc388bf6dbafd8db3baa2aa5d13a", + "nmdc:ebc73853151574540513397b6938c065", + "nmdc:aa429e7a30c60327756ecf7cd1c80156", + "nmdc:2b6040d7c5bfe9a94279f278e83509f2", + "nmdc:88b98920c96b264610ab29d79f38b211", + "nmdc:d6b7a5ba51d1e7d4306772d36d7ffc3b", + "nmdc:c249aba87cb3839ab92032b304804ff5", + "nmdc:8c120dd12278da775a16cb0af24567f4", + "nmdc:c0d1600b0a63066027d425c5f539959b", + "nmdc:37fcc1a29fc9a708f69d3a7723ef58cd", + "nmdc:b5717b7a28bc536d85cfd7028a6cc545" + ], + "was_informed_by": "gold:Gp0119852", + "id": "nmdc:8eb796d18151cac86f952cadf6db39e3", + "execution_resource": "NERSC-Cori", + "name": "Annotation Activity for nmdc:mga081c477", + "started_at_time": "2021-12-01T21:31:04Z", + "type": "nmdc:MetagenomeAnnotationActivity", + "ended_at_time": "2021-12-02T21:04:22+00:00" + } + ], + "metagenome_assembly_set": [ + { + "_id": { + "$oid": "649b005f2ca5ee4adb139f81" + }, + "has_input": [ + "nmdc:ae51b88d02bdf8eacd5961858c411176" + ], + "part_of": [ + "nmdc:mga081c477" + ], + "ctg_logsum": 78582, + "scaf_logsum": 80980, + "gap_pct": 0.03492, + "git_url": "https://github.com/microbiomedata/metaG/releases/tag/0.1", + "has_output": [ + "nmdc:b919fff43d3280141610fb99fd5d577c", + "nmdc:0780e51675a17d7df84c4177e99cd6d1", + "nmdc:4149e119c8484f6b020224cf2aa51d4e", + "nmdc:e4874e4aee515d116494c29489901a5a", + "nmdc:5cf78b3447accc51b7cb41e767e46365" + ], + "asm_score": 21.676, + "was_informed_by": "gold:Gp0119852", + "ctg_powsum": 11126, + "scaf_max": 116950, + "id": "nmdc:8eb796d18151cac86f952cadf6db39e3", + "scaf_powsum": 11598, + "execution_resource": "NERSC-Cori", + "contigs": 9350, + "name": "Assembly Activity for nmdc:mga081c477", + "ctg_max": 116950, + "gc_std": 0.06897, + "gc_avg": 0.39583, + "contig_bp": 9419415, + "started_at_time": "2021-12-01T21:31:04Z", + "scaf_bp": 9422705, + "type": "nmdc:MetagenomeAssembly", + "scaffolds": 9030, + "ended_at_time": "2021-12-02T21:04:22+00:00", + "ctg_l50": 3027, + "ctg_l90": 339, + "ctg_n50": 392, + "ctg_n90": 6235, + "scaf_l50": 3519, + "scaf_l90": 342, + "scaf_n50": 345, + "scaf_n90": 5919, + "scaf_l_gt50k": 725282, + "scaf_n_gt50k": 10, + "scaf_pct_gt50k": 7.697174 + } + ], + "omics_processing_set": [ + { + "_id": { + "$oid": "649b009773e8249959349b6d" + }, + "id": "nmdc:omprc-11-qs59yk26", + "name": "Deep subsurface shale carbon reservoir microbial communities from Ohio, USA - Utica-2 Time Series 2014_10_16", + "description": "Microbial controls on biogeochemical cycling in deep subsurface shale carbon reservoirs", + "has_input": [ + "nmdc:bsm-11-jxbxms84" + ], + "has_output": [ + "jgi:560df3740d878540fd6fe1d0" + ], + "part_of": [ + "nmdc:sty-11-8fb6t785" + ], + "add_date": "2015-08-15", + "mod_date": "2020-04-05", + "ncbi_project_name": "Deep subsurface shale carbon reservoir microbial communities from Ohio, USA - Utica-2 Time Series 2014_10_16", + "omics_type": { + "has_raw_value": "Metagenome" + }, + "principal_investigator": { + "has_raw_value": "Kelly Wrighton" + }, + "processing_institution": "JGI", + "type": "nmdc:OmicsProcessing", + "gold_sequencing_project_identifiers": [ + "gold:Gp0119852" + ] + } + ], + "read_qc_analysis_activity_set": [ + { + "_id": { + "$oid": "649b009d6bdd4fd20273c84b" + }, + "has_input": [ + "nmdc:2dde8314c7bd455c16f3c57a043ea82f" + ], + "part_of": [ + "nmdc:mga081c477" + ], + "git_url": "https://github.com/microbiomedata/metaG/releases/tag/0.1", + "has_output": [ + "nmdc:ae51b88d02bdf8eacd5961858c411176", + "nmdc:94385668ec580d98db6f78f591ef9141" + ], + "was_informed_by": "gold:Gp0119852", + "input_read_count": 65488676, + "output_read_bases": 9447474424, + "id": "nmdc:8eb796d18151cac86f952cadf6db39e3", + "execution_resource": "NERSC-Cori", + "input_read_bases": 9888790076, + "name": "Read QC Activity for nmdc:mga081c477", + "output_read_count": 65001172, + "started_at_time": "2021-12-01T21:31:04Z", + "type": "nmdc:ReadQCAnalysisActivity", + "ended_at_time": "2021-12-02T21:04:22+00:00" + } + ], + "read_based_taxonomy_analysis_activity_set": [ + { + "_id": { + "$oid": "649b009bff710ae353f8cf34" + }, + "has_input": [ + "nmdc:ae51b88d02bdf8eacd5961858c411176" + ], + "git_url": "https://github.com/microbiomedata/metaG/releases/tag/0.1", + "has_output": [ + "nmdc:20a38327a96fff90ddb96f32f53658b3", + "nmdc:e80871c194c85d0210c94481a3f874fd", + "nmdc:dc8364af5aa36e2fc309a208346cd5cd", + "nmdc:b40cccb0bc80e6e119b0e89d1c7cf4fb", + "nmdc:0be4ba0cb2326b14fd2308bc467d1596", + "nmdc:e9d8ed8061a83f545b6ebff8e7ab1fad", + "nmdc:8fe98fea4f226e43945f4995d763a5d5", + "nmdc:ba91c04e83f013145e15d8c3f01dccad", + "nmdc:dc0fb7a4abc36d6a569551aabed37c34" + ], + "was_informed_by": "gold:Gp0119852", + "id": "nmdc:8eb796d18151cac86f952cadf6db39e3", + "execution_resource": "NERSC-Cori", + "name": "ReadBased Analysis Activity for nmdc:mga081c477", + "started_at_time": "2021-12-01T21:31:04Z", + "type": "nmdc:ReadbasedAnalysis", + "ended_at_time": "2021-12-02T21:04:22+00:00" + } + ] + }, + { + "data_object_set": [ + { + "description": "Raw sequencer read data", + "file_size_bytes": 8913389036, + "type": "nmdc:DataObject", + "id": "jgi:563bf9500d8785441a9214bc", + "name": "9704.2.141521.AGGCAGA-GCGTAAG.fastq.gz" + }, + { + "name": "Gp0119854_Filtered Reads", + "description": "Filtered Reads for Gp0119854", + "data_object_type": "Filtered Sequencing Reads", + "url": "https://data.microbiomedata.org/data/nmdc:mga0bk7j89/qa/nmdc_mga0bk7j89_filtered.fastq.gz", + "md5_checksum": "5ee58d9ea16f71276d8b5af71d0833ab", + "id": "nmdc:5ee58d9ea16f71276d8b5af71d0833ab", + "file_size_bytes": 4823293677 + }, + { + "name": "Gp0119854_Filtered Stats", + "description": "Filtered Stats for Gp0119854", + "data_object_type": "QC Statistics", + "url": "https://data.microbiomedata.org/data/nmdc:mga0bk7j89/qa/nmdc_mga0bk7j89_filterStats.txt", + "md5_checksum": "1dfc67d4f45c493b036283e8f52e5fb5", + "id": "nmdc:1dfc67d4f45c493b036283e8f52e5fb5", + "file_size_bytes": 288 + }, + { + "name": "Gp0119854_Gottcha2 TSV report", + "description": "Gottcha2 TSV report for Gp0119854", + "url": "https://data.microbiomedata.org/data/nmdc:mga0bk7j89/ReadbasedAnalysis/nmdc_mga0bk7j89_gottcha2_report.tsv", + "md5_checksum": "6b9b41e06b8ff71610843a49abb73ffd", + "id": "nmdc:6b9b41e06b8ff71610843a49abb73ffd", + "file_size_bytes": 7649 + }, + { + "name": "Gp0119854_Gottcha2 full TSV report", + "description": "Gottcha2 full TSV report for Gp0119854", + "url": "https://data.microbiomedata.org/data/nmdc:mga0bk7j89/ReadbasedAnalysis/nmdc_mga0bk7j89_gottcha2_report_full.tsv", + "md5_checksum": "78af48b43e374313d7776f0deaf55c86", + "id": "nmdc:78af48b43e374313d7776f0deaf55c86", + "file_size_bytes": 259350 + }, + { + "name": "Gp0119854_Gottcha2 Krona HTML report", + "description": "Gottcha2 Krona HTML report for Gp0119854", + "data_object_type": "GOTTCHA2 Krona Plot", + "url": "https://data.microbiomedata.org/data/nmdc:mga0bk7j89/ReadbasedAnalysis/nmdc_mga0bk7j89_gottcha2_krona.html", + "md5_checksum": "951c2814cbb3324be8c91980da47b83a", + "id": "nmdc:951c2814cbb3324be8c91980da47b83a", + "file_size_bytes": 250358 + }, + { + "name": "Gp0119854_Centrifuge classification TSV report", + "description": "Centrifuge classification TSV report for Gp0119854", + "data_object_type": "Centrifuge Taxonomic Classification", + "url": "https://data.microbiomedata.org/data/nmdc:mga0bk7j89/ReadbasedAnalysis/nmdc_mga0bk7j89_centrifuge_classification.tsv", + "md5_checksum": "d0610bc3a80bd193d2a248e871a39b03", + "id": "nmdc:d0610bc3a80bd193d2a248e871a39b03", + "file_size_bytes": 6683337904 + }, + { + "name": "Gp0119854_Centrifuge TSV report", + "description": "Centrifuge TSV report for Gp0119854", + "data_object_type": "Centrifuge Classification Report", + "url": "https://data.microbiomedata.org/data/nmdc:mga0bk7j89/ReadbasedAnalysis/nmdc_mga0bk7j89_centrifuge_report.tsv", + "md5_checksum": "4ff2b914c0cfb5703f19034ded2c568d", + "id": "nmdc:4ff2b914c0cfb5703f19034ded2c568d", + "file_size_bytes": 231605 + }, + { + "name": "Gp0119854_Centrifuge Krona HTML report", + "description": "Centrifuge Krona HTML report for Gp0119854", + "data_object_type": "Centrifuge Krona Plot", + "url": "https://data.microbiomedata.org/data/nmdc:mga0bk7j89/ReadbasedAnalysis/nmdc_mga0bk7j89_centrifuge_krona.html", + "md5_checksum": "c49e3e30010264b4d4755ab2344c7c71", + "id": "nmdc:c49e3e30010264b4d4755ab2344c7c71", + "file_size_bytes": 2210828 + }, + { + "name": "Gp0119854_Kraken classification TSV report", + "description": "Kraken classification TSV report for Gp0119854", + "data_object_type": "Kraken2 Taxonomic Classification", + "url": "https://data.microbiomedata.org/data/nmdc:mga0bk7j89/ReadbasedAnalysis/nmdc_mga0bk7j89_kraken2_classification.tsv", + "md5_checksum": "9adf095611968698fc1e98d985538101", + "id": "nmdc:9adf095611968698fc1e98d985538101", + "file_size_bytes": 5392177049 + }, + { + "name": "Gp0119854_Kraken2 TSV report", + "description": "Kraken2 TSV report for Gp0119854", + "data_object_type": "Kraken2 Classification Report", + "url": "https://data.microbiomedata.org/data/nmdc:mga0bk7j89/ReadbasedAnalysis/nmdc_mga0bk7j89_kraken2_report.tsv", + "md5_checksum": "9f8bbc4f92da1d2fe2f4c950e19c59dc", + "id": "nmdc:9f8bbc4f92da1d2fe2f4c950e19c59dc", + "file_size_bytes": 506459 + }, + { + "name": "Gp0119854_Kraken2 Krona HTML report", + "description": "Kraken2 Krona HTML report for Gp0119854", + "data_object_type": "Kraken2 Krona Plot", + "url": "https://data.microbiomedata.org/data/nmdc:mga0bk7j89/ReadbasedAnalysis/nmdc_mga0bk7j89_kraken2_krona.html", + "md5_checksum": "e094739ab309ea1a127dce97939b6d68", + "id": "nmdc:e094739ab309ea1a127dce97939b6d68", + "file_size_bytes": 3256437 + }, + { + "name": "Gp0119854_Assembled contigs fasta", + "description": "Assembled contigs fasta for Gp0119854", + "data_object_type": "Assembly Contigs", + "url": "https://data.microbiomedata.org/data/nmdc:mga0bk7j89/assembly/nmdc_mga0bk7j89_contigs.fna", + "md5_checksum": "9ce3d663d53cc47f54df5e83459e0f3e", + "id": "nmdc:9ce3d663d53cc47f54df5e83459e0f3e", + "file_size_bytes": 28861345 + }, + { + "name": "Gp0119854_Assembled scaffold fasta", + "description": "Assembled scaffold fasta for Gp0119854", + "data_object_type": "Assembly Scaffolds", + "url": "https://data.microbiomedata.org/data/nmdc:mga0bk7j89/assembly/nmdc_mga0bk7j89_scaffolds.fna", + "md5_checksum": "d3756f1d709b33a29509560c0a68327a", + "id": "nmdc:d3756f1d709b33a29509560c0a68327a", + "file_size_bytes": 28806277 + }, + { + "name": "Gp0119854_Metagenome Contig Coverage Stats", + "description": "Metagenome Contig Coverage Stats for Gp0119854", + "url": "https://data.microbiomedata.org/data/nmdc:mga0bk7j89/assembly/nmdc_mga0bk7j89_covstats.txt", + "md5_checksum": "b9da00b620edc38e80a5010b1957fa18", + "id": "nmdc:b9da00b620edc38e80a5010b1957fa18", + "file_size_bytes": 1451528 + }, + { + "name": "Gp0119854_Assembled AGP file", + "description": "Assembled AGP file for Gp0119854", + "data_object_type": "Assembly AGP", + "url": "https://data.microbiomedata.org/data/nmdc:mga0bk7j89/assembly/nmdc_mga0bk7j89_assembly.agp", + "md5_checksum": "cd032090443757cf4b4d295a2fafe394", + "id": "nmdc:cd032090443757cf4b4d295a2fafe394", + "file_size_bytes": 1309505 + }, + { + "name": "Gp0119854_Metagenome Alignment BAM file", + "description": "Metagenome Alignment BAM file for Gp0119854", + "data_object_type": "Assembly Coverage BAM", + "url": "https://data.microbiomedata.org/data/nmdc:mga0bk7j89/assembly/nmdc_mga0bk7j89_pairedMapped_sorted.bam", + "md5_checksum": "537fbc3db2e752c5ee4e33243cb152a4", + "id": "nmdc:537fbc3db2e752c5ee4e33243cb152a4", + "file_size_bytes": 5961196490 + }, + { + "name": "Gp0119854_Protein FAA", + "description": "Protein FAA for Gp0119854", + "data_object_type": "Annotation Amino Acid FASTA", + "url": "https://data.microbiomedata.org/data/nmdc:mga0bk7j89/annotation/nmdc_mga0bk7j89_proteins.faa", + "md5_checksum": "20e8f2ad489accd55bfd12f9eccbd1fa", + "id": "nmdc:20e8f2ad489accd55bfd12f9eccbd1fa", + "file_size_bytes": 13470280 + }, + { + "name": "Gp0119854_Structural annotation GFF file", + "description": "Structural annotation GFF file for Gp0119854", + "data_object_type": "Structural Annotation GFF", + "url": "https://data.microbiomedata.org/data/nmdc:mga0bk7j89/annotation/nmdc_mga0bk7j89_structural_annotation.gff", + "md5_checksum": "db512876b78ff3b32ce5db6e6998b772", + "id": "nmdc:db512876b78ff3b32ce5db6e6998b772", + "file_size_bytes": 2518 + }, + { + "name": "Gp0119854_Functional annotation GFF file", + "description": "Functional annotation GFF file for Gp0119854", + "data_object_type": "Functional Annotation GFF", + "url": "https://data.microbiomedata.org/data/nmdc:mga0bk7j89/annotation/nmdc_mga0bk7j89_functional_annotation.gff", + "md5_checksum": "d3eaf63075eaadeda97a0e68d2ea1bad", + "id": "nmdc:d3eaf63075eaadeda97a0e68d2ea1bad", + "file_size_bytes": 12062353 + }, + { + "name": "Gp0119854_KO TSV file", + "description": "KO TSV file for Gp0119854", + "data_object_type": "Annotation KEGG Orthology", + "url": "https://data.microbiomedata.org/data/nmdc:mga0bk7j89/annotation/nmdc_mga0bk7j89_ko.tsv", + "md5_checksum": "e880dc9557c1e1c84efe3ee74f3ce93e", + "id": "nmdc:e880dc9557c1e1c84efe3ee74f3ce93e", + "file_size_bytes": 1872866 + }, + { + "name": "Gp0119854_EC TSV file", + "description": "EC TSV file for Gp0119854", + "data_object_type": "Annotation Enzyme Commission", + "url": "https://data.microbiomedata.org/data/nmdc:mga0bk7j89/annotation/nmdc_mga0bk7j89_ec.tsv", + "md5_checksum": "a28b8d266586f100793dfd72c527c160", + "id": "nmdc:a28b8d266586f100793dfd72c527c160", + "file_size_bytes": 1137751 + }, + { + "name": "Gp0119854_COG GFF file", + "description": "COG GFF file for Gp0119854", + "url": "https://data.microbiomedata.org/data/nmdc:mga0bk7j89/annotation/nmdc_mga0bk7j89_cog.gff", + "md5_checksum": "b3d17567e523f9b8ba6a6fe4b67416ac", + "id": "nmdc:b3d17567e523f9b8ba6a6fe4b67416ac", + "file_size_bytes": 8502523 + }, + { + "name": "Gp0119854_PFAM GFF file", + "description": "PFAM GFF file for Gp0119854", + "url": "https://data.microbiomedata.org/data/nmdc:mga0bk7j89/annotation/nmdc_mga0bk7j89_pfam.gff", + "md5_checksum": "08b898a741f2b15a87fc9d2df2e3b082", + "id": "nmdc:08b898a741f2b15a87fc9d2df2e3b082", + "file_size_bytes": 8336310 + }, + { + "name": "Gp0119854_TigrFam GFF file", + "description": "TigrFam GFF file for Gp0119854", + "url": "https://data.microbiomedata.org/data/nmdc:mga0bk7j89/annotation/nmdc_mga0bk7j89_tigrfam.gff", + "md5_checksum": "e2790e24b26ca09baf7b4b4c84223e25", + "id": "nmdc:e2790e24b26ca09baf7b4b4c84223e25", + "file_size_bytes": 1880339 + }, + { + "name": "Gp0119854_SMART GFF file", + "description": "SMART GFF file for Gp0119854", + "url": "https://data.microbiomedata.org/data/nmdc:mga0bk7j89/annotation/nmdc_mga0bk7j89_smart.gff", + "md5_checksum": "e68bfdf0862734c6cfe10d3be8f64d31", + "id": "nmdc:e68bfdf0862734c6cfe10d3be8f64d31", + "file_size_bytes": 2668297 + }, + { + "name": "Gp0119854_SuperFam GFF file", + "description": "SuperFam GFF file for Gp0119854", + "url": "https://data.microbiomedata.org/data/nmdc:mga0bk7j89/annotation/nmdc_mga0bk7j89_supfam.gff", + "md5_checksum": "bd900a8b9c2bb973be13a1cb2434d5b2", + "id": "nmdc:bd900a8b9c2bb973be13a1cb2434d5b2", + "file_size_bytes": 11326814 + }, + { + "name": "Gp0119854_Cath FunFam GFF file", + "description": "Cath FunFam GFF file for Gp0119854", + "url": "https://data.microbiomedata.org/data/nmdc:mga0bk7j89/annotation/nmdc_mga0bk7j89_cath_funfam.gff", + "md5_checksum": "ac4b50ec40b0db1f7f4b4bcdc3be0cab", + "id": "nmdc:ac4b50ec40b0db1f7f4b4bcdc3be0cab", + "file_size_bytes": 10784902 + }, + { + "name": "Gp0119854_KO_EC GFF file", + "description": "KO_EC GFF file for Gp0119854", + "url": "https://data.microbiomedata.org/data/nmdc:mga0bk7j89/annotation/nmdc_mga0bk7j89_ko_ec.gff", + "md5_checksum": "b3f1c6388b68010c5ce30d5e12b810b3", + "id": "nmdc:b3f1c6388b68010c5ce30d5e12b810b3", + "file_size_bytes": 6145421 + }, + { + "name": "gold:Gp0452679_metabat2 bin checkm quality assessment result", + "description": "metabat2 bin checkm quality assessment result for gold:Gp0452679", + "data_object_type": "CheckM Statistics", + "url": "https://data.microbiomedata.org/data/nmdc:mga0fsjy84/MAGs/nmdc_mga0fsjy84_checkm_qa.out", + "md5_checksum": "d41d8cd98f00b204e9800998ecf8427e", + "id": "nmdc:d41d8cd98f00b204e9800998ecf8427e", + "file_size_bytes": 0 + }, + { + "name": "Gp0119854_tooShort (< 3kb) filtered contigs fasta file by metaBat2", + "description": "tooShort (< 3kb) filtered contigs fasta file by metaBat2 for Gp0119854", + "url": "https://data.microbiomedata.org/data/nmdc:mga0bk7j89/MAGs/nmdc_mga0bk7j89_bins.tooShort.fa", + "md5_checksum": "e6301f8e5167067b3608a7fab2aca541", + "id": "nmdc:e6301f8e5167067b3608a7fab2aca541", + "file_size_bytes": 6499695 + }, + { + "name": "Gp0119854_unbinned fasta file from metabat2", + "description": "unbinned fasta file from metabat2 for Gp0119854", + "url": "https://data.microbiomedata.org/data/nmdc:mga0bk7j89/MAGs/nmdc_mga0bk7j89_bins.unbinned.fa", + "md5_checksum": "1d47a5ebbf8f1140cdbb9c7f2fbcca00", + "id": "nmdc:1d47a5ebbf8f1140cdbb9c7f2fbcca00", + "file_size_bytes": 8689690 + }, + { + "name": "Gp0119854_metabat2 bin checkm quality assessment result", + "description": "metabat2 bin checkm quality assessment result for Gp0119854", + "data_object_type": "CheckM Statistics", + "url": "https://data.microbiomedata.org/data/nmdc:mga0bk7j89/MAGs/nmdc_mga0bk7j89_checkm_qa.out", + "md5_checksum": "d93a8c2aa058b6d251a5691c2e7eb503", + "id": "nmdc:d93a8c2aa058b6d251a5691c2e7eb503", + "file_size_bytes": 1771 + }, + { + "name": "Gp0119854_high-quality and medium-quality bins", + "description": "high-quality and medium-quality bins for Gp0119854", + "data_object_type": "Metagenome Bins", + "url": "https://data.microbiomedata.org/data/nmdc:mga0bk7j89/MAGs/nmdc_mga0bk7j89_hqmq_bin.zip", + "md5_checksum": "35e7e5fbe485f1ca6a9635a7dc86d54f", + "id": "nmdc:35e7e5fbe485f1ca6a9635a7dc86d54f", + "file_size_bytes": 2654194 + }, + { + "name": "Gp0119854_metabat2 bins", + "description": "metabat2 bins for Gp0119854", + "url": "https://data.microbiomedata.org/data/nmdc:mga0bk7j89/MAGs/nmdc_mga0bk7j89_metabat_bin.zip", + "md5_checksum": "34db417986f230f457ee567f339f5a78", + "id": "nmdc:34db417986f230f457ee567f339f5a78", + "file_size_bytes": 1499579 + }, + { + "_id": { + "$oid": "649b003d1ae706d7b5b14dde" + }, + "description": "Metagenome Contig Coverage Stats for gold:Gp0119854", + "url": "https://data.microbiomedata.org/data/1777_95823/assembly/mapping_stats.txt", + "file_size_bytes": 1362643, + "type": "nmdc:DataObject", + "id": "nmdc:3c9a784b1d657beee36b6e5a5a0ab0c2", + "name": "mapping_stats.txt", + "data_object_type": "Assembly Coverage Stats" + }, + { + "_id": { + "$oid": "649b003d1ae706d7b5b14de0" + }, + "description": "Assembled contigs fasta for gold:Gp0119854", + "url": "https://data.microbiomedata.org/data/1777_95823/assembly/assembly_contigs.fna", + "file_size_bytes": 28772460, + "type": "nmdc:DataObject", + "id": "nmdc:772095f1e8e7033ff8f5e5a42308347c", + "name": "assembly_contigs.fna", + "data_object_type": "Assembly Contigs" + }, + { + "_id": { + "$oid": "649b003d1ae706d7b5b14de1" + }, + "description": "Assembled scaffold fasta for gold:Gp0119854", + "url": "https://data.microbiomedata.org/data/1777_95823/assembly/assembly_scaffolds.fna", + "file_size_bytes": 28718922, + "type": "nmdc:DataObject", + "id": "nmdc:b97bf49ad48dca19059ed8954191a585", + "name": "assembly_scaffolds.fna", + "data_object_type": "Assembly Scaffolds" + }, + { + "_id": { + "$oid": "649b003d1ae706d7b5b14de2" + }, + "description": "Assembled AGP file for gold:Gp0119854", + "url": "https://data.microbiomedata.org/data/1777_95823/assembly/assembly.agp", + "file_size_bytes": 1128575, + "type": "nmdc:DataObject", + "id": "nmdc:fcf4fbb5bf7f350721806edf25971a51", + "name": "assembly.agp", + "data_object_type": "Assembly AGP" + }, + { + "_id": { + "$oid": "649b003d1ae706d7b5b14de3" + }, + "description": "Metagenome Alignment BAM file for gold:Gp0119854", + "url": "https://data.microbiomedata.org/data/1777_95823/assembly/pairedMapped_sorted.bam", + "file_size_bytes": 5915051748, + "type": "nmdc:DataObject", + "id": "nmdc:3b5ea7a90c69ccb52e27acbbed6d9f16", + "name": "pairedMapped_sorted.bam", + "data_object_type": "Assembly Coverage BAM" + }, + { + "_id": { + "$oid": "649b003d1ae706d7b5b15a37" + }, + "id": "nmdc:8c2a14babfadfdb9cb5f041d17ed88d2", + "name": "1777_95823.krona.html", + "description": "Gold:Gp0119854 KRONA plot HTML file", + "url": "https://data.microbiomedata.org/1777_95823/ReadbasedAnalysis/centrifuge/1777_95823.krona.html", + "file_size_bytes": 3385, + "data_object_type": "Centrifuge Krona Plot", + "type": "nmdc:DataObject" + }, + { + "_id": { + "$oid": "649b003d1ae706d7b5b15a41" + }, + "id": "nmdc:c8beaf1d0d1ea7a2771a9e706a674c76", + "name": "1777_95823.json", + "description": "Gold:Gp0119854 ReadbasedAnalysis result JSON file", + "url": "https://data.microbiomedata.org/1777_95823/ReadbasedAnalysis/1777_95823.json", + "file_size_bytes": 3385, + "type": "nmdc:DataObject" + }, + { + "_id": { + "$oid": "649b003f1ae706d7b5b1633a" + }, + "id": "nmdc:d9543af33e9608ac30d8f0b653fe9697", + "name": "bins.unbinned.fa", + "description": "unbinned fasta file from metabat2 for gold:Gp0119854", + "file_size_bytes": 16933832, + "url": "https://data.microbiomedata.org/data/1777_95823/img_MAGs/bins.unbinned.fa", + "type": "nmdc:DataObject" + }, + { + "_id": { + "$oid": "649b003f1ae706d7b5b1633b" + }, + "id": "nmdc:3c2843b8572ccb4fa2dd0e14a3a8e660", + "name": "bins.tooShort.fa", + "description": "tooShort (< 3kb) filtered contigs fasta file by metaBat2 for gold:Gp0119854", + "file_size_bytes": 6331156, + "url": "https://data.microbiomedata.org/data/1777_95823/img_MAGs/bins.tooShort.fa", + "type": "nmdc:DataObject" + }, + { + "_id": { + "$oid": "649b003f1ae706d7b5b1633c" + }, + "id": "nmdc:32a8e336ede2df4395be831caaf3cd87", + "name": "checkm_qa.out", + "description": "metabat2 bin checkm quality assessment result for gold:Gp0119854", + "file_size_bytes": 2686, + "url": "https://data.microbiomedata.org/data/1777_95823/img_MAGs/checkm_qa.out", + "type": "nmdc:DataObject", + "data_object_type": "CheckM Statistics" + }, + { + "_id": { + "$oid": "649b003f1ae706d7b5b1633e" + }, + "id": "nmdc:cf1f7aa4a913f0755d8b1d4eb2fc5862", + "name": "gold:Gp0119854.bins.7.fa", + "description": "metabat2 binned contig file for gold:Gp0119854", + "file_size_bytes": 660656, + "url": "https://data.microbiomedata.org/data/1777_95823/img_MAGs/metabat-bins/bins.7.fa", + "type": "nmdc:DataObject", + "data_object_type": "Metagenome Bins" + }, + { + "_id": { + "$oid": "649b003f1ae706d7b5b1633f" + }, + "id": "nmdc:448cdedf074646e806929249f4d007a7", + "name": "gtdbtk.bac120.summary.tsv", + "description": "gtdbtk bacterial assignment result summary table for gold:Gp0119854", + "file_size_bytes": 1237, + "url": "https://data.microbiomedata.org/data/1777_95823/img_MAGs/gtdbtk_output/classify/gtdbtk.bac120.summary.tsv", + "type": "nmdc:DataObject" + }, + { + "_id": { + "$oid": "649b003f1ae706d7b5b16340" + }, + "id": "nmdc:b690d8350ca2146ac00548a67607ab09", + "name": "gold:Gp0119854.bins.9.fa", + "description": "metabat2 binned contig file for gold:Gp0119854", + "file_size_bytes": 300295, + "url": "https://data.microbiomedata.org/data/1777_95823/img_MAGs/metabat-bins/bins.9.fa", + "type": "nmdc:DataObject", + "data_object_type": "Metagenome Bins" + }, + { + "_id": { + "$oid": "649b003f1ae706d7b5b16341" + }, + "id": "nmdc:9d43e8f6551cd69c09519c8e0f460ef3", + "name": "gold:Gp0119854.bins.5.fa", + "description": "metabat2 binned contig file for gold:Gp0119854", + "file_size_bytes": 463206, + "url": "https://data.microbiomedata.org/data/1777_95823/img_MAGs/metabat-bins/bins.5.fa", + "type": "nmdc:DataObject", + "data_object_type": "Metagenome Bins" + }, + { + "_id": { + "$oid": "649b003f1ae706d7b5b16342" + }, + "id": "nmdc:68095a94c245fa592cf865a33f559e24", + "name": "gold:Gp0119854.bins.2.fa", + "description": "metabat2 binned contig file for gold:Gp0119854", + "file_size_bytes": 297808, + "url": "https://data.microbiomedata.org/data/1777_95823/img_MAGs/metabat-bins/bins.2.fa", + "type": "nmdc:DataObject", + "data_object_type": "Metagenome Bins" + }, + { + "_id": { + "$oid": "649b003f1ae706d7b5b16343" + }, + "id": "nmdc:ec3029c56b83c791fc9fc50648cc3ab4", + "name": "gold:Gp0119854.bins.4.fa", + "description": "metabat2 binned contig file for gold:Gp0119854", + "file_size_bytes": 350763, + "url": "https://data.microbiomedata.org/data/1777_95823/img_MAGs/metabat-bins/bins.4.fa", + "type": "nmdc:DataObject", + "data_object_type": "Metagenome Bins" + }, + { + "_id": { + "$oid": "649b003f1ae706d7b5b16344" + }, + "id": "nmdc:e85cfa7cf31385d2a6af7bbbff82ad2e", + "name": "gold:Gp0119854.bins.3.fa", + "description": "metabat2 binned contig file for gold:Gp0119854", + "file_size_bytes": 575954, + "url": "https://data.microbiomedata.org/data/1777_95823/img_MAGs/metabat-bins/bins.3.fa", + "type": "nmdc:DataObject", + "data_object_type": "Metagenome Bins" + }, + { + "_id": { + "$oid": "649b003f1ae706d7b5b16345" + }, + "id": "nmdc:452dfbb9d55ef55365821d2062b76c4f", + "name": "gold:Gp0119854.bins.11.fa", + "description": "metabat2 binned contig file for gold:Gp0119854", + "file_size_bytes": 360071, + "url": "https://data.microbiomedata.org/data/1777_95823/img_MAGs/metabat-bins/bins.11.fa", + "type": "nmdc:DataObject", + "data_object_type": "Metagenome Bins" + }, + { + "_id": { + "$oid": "649b003f1ae706d7b5b16346" + }, + "id": "nmdc:82a0546a29f8da00b6428bde59bfa17e", + "name": "gold:Gp0119854.bins.12.fa", + "description": "metabat2 binned contig file for gold:Gp0119854", + "file_size_bytes": 207202, + "url": "https://data.microbiomedata.org/data/1777_95823/img_MAGs/metabat-bins/bins.12.fa", + "type": "nmdc:DataObject", + "data_object_type": "Metagenome Bins" + }, + { + "_id": { + "$oid": "649b003f1ae706d7b5b16347" + }, + "id": "nmdc:1964b218faacafa814232b788d369aa9", + "name": "gold:Gp0119854.bins.8.fa", + "description": "metabat2 binned contig file for gold:Gp0119854", + "file_size_bytes": 694051, + "url": "https://data.microbiomedata.org/data/1777_95823/img_MAGs/metabat-bins/bins.8.fa", + "type": "nmdc:DataObject", + "data_object_type": "Metagenome Bins" + }, + { + "_id": { + "$oid": "649b003f1ae706d7b5b16348" + }, + "id": "nmdc:e78eb6580de24f3bb360a9a9b734667e", + "name": "gold:Gp0119854.bins.10.fa", + "description": "metabat2 binned contig file for gold:Gp0119854", + "file_size_bytes": 321030, + "url": "https://data.microbiomedata.org/data/1777_95823/img_MAGs/metabat-bins/bins.10.fa", + "type": "nmdc:DataObject", + "data_object_type": "Metagenome Bins" + }, + { + "_id": { + "$oid": "649b003f1ae706d7b5b16349" + }, + "id": "nmdc:223a31502c352d4ef3166f05cc71366d", + "name": "gold:Gp0119854.bins.6.fa", + "description": "metabat2 binned contig file for gold:Gp0119854", + "file_size_bytes": 346303, + "url": "https://data.microbiomedata.org/data/1777_95823/img_MAGs/metabat-bins/bins.6.fa", + "type": "nmdc:DataObject", + "data_object_type": "Metagenome Bins" + }, + { + "_id": { + "$oid": "649b003f1ae706d7b5b1634a" + }, + "id": "nmdc:a506ab97b7c76a47eabb936e036eab64", + "name": "gold:Gp0119854.bins.1.fa", + "description": "metabat2 binned contig file for gold:Gp0119854", + "file_size_bytes": 203668, + "url": "https://data.microbiomedata.org/data/1777_95823/img_MAGs/metabat-bins/bins.1.fa", + "type": "nmdc:DataObject", + "data_object_type": "Metagenome Bins" + }, + { + "_id": { + "$oid": "649b003f1ae706d7b5b1634b" + }, + "id": "nmdc:faaf6e45f1d591b9739656100906763b", + "name": "gold:Gp0119854.bins.13.fa", + "description": "metabat2 binned contig file for gold:Gp0119854", + "file_size_bytes": 595759, + "url": "https://data.microbiomedata.org/data/1777_95823/img_MAGs/metabat-bins/bins.13.fa", + "type": "nmdc:DataObject", + "data_object_type": "Metagenome Bins" + }, + { + "_id": { + "$oid": "649b003f1ae706d7b5b16c5d" + }, + "description": "Protein FAA for gold:Gp0119854", + "url": "https://data.microbiomedata.org/1777_95823/img_annotation/Ga0482161_proteins.faa", + "md5_checksum": "dba970c890226f16b008f8043ca38d9b", + "file_size_bytes": 3385, + "id": "nmdc:dba970c890226f16b008f8043ca38d9b", + "name": "gold:Gp0119854_Protein FAA", + "data_object_type": "Annotation Amino Acid FASTA", + "type": "nmdc:DataObject" + }, + { + "_id": { + "$oid": "649b003f1ae706d7b5b16c5e" + }, + "description": "Structural annotation GFF file for gold:Gp0119854", + "url": "https://data.microbiomedata.org/1777_95823/img_annotation/Ga0482161_structural_annotation.gff", + "md5_checksum": "b476e6c11fed4a3254eda4d15a4183c7", + "file_size_bytes": 3385, + "id": "nmdc:b476e6c11fed4a3254eda4d15a4183c7", + "name": "gold:Gp0119854_Structural annotation GFF file", + "data_object_type": "Structural Annotation GFF", + "type": "nmdc:DataObject" + }, + { + "_id": { + "$oid": "649b003f1ae706d7b5b16c5f" + }, + "description": "Functional annotation GFF file for gold:Gp0119854", + "url": "https://data.microbiomedata.org/1777_95823/img_annotation/Ga0482161_functional_annotation.gff", + "md5_checksum": "3361194f90da2d18830213cd4587aa71", + "file_size_bytes": 3385, + "id": "nmdc:3361194f90da2d18830213cd4587aa71", + "name": "gold:Gp0119854_Functional annotation GFF file", + "data_object_type": "Functional Annotation GFF", + "type": "nmdc:DataObject" + }, + { + "_id": { + "$oid": "649b003f1ae706d7b5b16c62" + }, + "description": "KO TSV File for gold:Gp0119854", + "url": "https://data.microbiomedata.org/1777_95823/img_annotation/Ga0482161_ko.tsv", + "md5_checksum": "6b974729dc47b7795b17fac36d496cfa", + "file_size_bytes": 3385, + "id": "nmdc:6b974729dc47b7795b17fac36d496cfa", + "name": "gold:Gp0119854_KO TSV File", + "data_object_type": "Annotation KEGG Orthology", + "type": "nmdc:DataObject" + }, + { + "_id": { + "$oid": "649b003f1ae706d7b5b16c6d" + }, + "description": "EC TSV File for gold:Gp0119854", + "url": "https://data.microbiomedata.org/1777_95823/img_annotation/Ga0482161_ec.tsv", + "md5_checksum": "93e586051957e0f4a451b70ceeac04ab", + "file_size_bytes": 3385, + "id": "nmdc:93e586051957e0f4a451b70ceeac04ab", + "name": "gold:Gp0119854_EC TSV File", + "data_object_type": "Annotation Enzyme Commission", + "type": "nmdc:DataObject" + } + ], + "mags_activity_set": [ + { + "_id": { + "$oid": "649b0052ec087f6bbab346f5" + }, + "has_input": [ + "nmdc:9ce3d663d53cc47f54df5e83459e0f3e", + "nmdc:537fbc3db2e752c5ee4e33243cb152a4", + "nmdc:d3eaf63075eaadeda97a0e68d2ea1bad" + ], + "too_short_contig_num": 13424, + "part_of": [ + "nmdc:mga0bk7j89" + ], + "binned_contig_num": 795, + "git_url": "https://github.com/microbiomedata/metaG/releases/tag/0.1", + "has_output": [ + "nmdc:d41d8cd98f00b204e9800998ecf8427e", + "nmdc:e6301f8e5167067b3608a7fab2aca541", + "nmdc:1d47a5ebbf8f1140cdbb9c7f2fbcca00", + "nmdc:d93a8c2aa058b6d251a5691c2e7eb503", + "nmdc:35e7e5fbe485f1ca6a9635a7dc86d54f", + "nmdc:34db417986f230f457ee567f339f5a78" + ], + "was_informed_by": "gold:Gp0119854", + "input_contig_num": 17777, + "id": "nmdc:34f751af8eade7eecf7a0ba611991cb0", + "execution_resource": "NERSC-Cori", + "name": "MAGs Analysis Activity for nmdc:mga0bk7j89", + "mags_list": [ + { + "number_of_contig": 231, + "completeness": 85.87, + "bin_name": "bins.1", + "gene_count": 2562, + "bin_quality": "MQ", + "gtdbtk_species": "Halanaerobium congolense", + "gtdbtk_order": "Halanaerobiales", + "num_16s": 0, + "gtdbtk_family": "Halanaerobiaceae", + "gtdbtk_domain": "Bacteria", + "contamination": 0.22, + "gtdbtk_class": "Halanaerobiia", + "gtdbtk_phylum": "Firmicutes_F", + "num_5s": 1, + "num_23s": 0, + "gtdbtk_genus": "Halanaerobium", + "num_t_rna": 13 + }, + { + "number_of_contig": 282, + "completeness": 98.25, + "bin_name": "bins.2", + "gene_count": 4353, + "bin_quality": "LQ", + "gtdbtk_species": "", + "gtdbtk_order": "", + "num_16s": 1, + "gtdbtk_family": "", + "gtdbtk_domain": "", + "contamination": 84.08, + "gtdbtk_class": "", + "gtdbtk_phylum": "", + "num_5s": 2, + "num_23s": 1, + "gtdbtk_genus": "", + "num_t_rna": 71 + }, + { + "number_of_contig": 85, + "completeness": 93.04, + "bin_name": "bins.3", + "gene_count": 2831, + "bin_quality": "MQ", + "gtdbtk_species": "Halanaerobium sp003070825", + "gtdbtk_order": "Halanaerobiales", + "num_16s": 0, + "gtdbtk_family": "Halanaerobiaceae", + "gtdbtk_domain": "Bacteria", + "contamination": 2.46, + "gtdbtk_class": "Halanaerobiia", + "gtdbtk_phylum": "Firmicutes_F", + "num_5s": 4, + "num_23s": 0, + "gtdbtk_genus": "Halanaerobium", + "num_t_rna": 51 + }, + { + "number_of_contig": 94, + "completeness": 92.08, + "bin_name": "bins.4", + "gene_count": 1939, + "bin_quality": "HQ", + "gtdbtk_species": "Thermococcus_A sp000430485", + "gtdbtk_order": "Thermococcales", + "num_16s": 1, + "gtdbtk_family": "Thermococcaceae", + "gtdbtk_domain": "Archaea", + "contamination": 0.5, + "gtdbtk_class": "Thermococci", + "gtdbtk_phylum": "Euryarchaeota", + "num_5s": 2, + "num_23s": 1, + "gtdbtk_genus": "Thermococcus_A", + "num_t_rna": 39 + }, + { + "number_of_contig": 35, + "completeness": 98.21, + "bin_name": "bins.5", + "gene_count": 1922, + "bin_quality": "HQ", + "gtdbtk_species": "Thermotoga petrophila", + "gtdbtk_order": "Thermotogales", + "num_16s": 1, + "gtdbtk_family": "Thermotogaceae", + "gtdbtk_domain": "Bacteria", + "contamination": 1.79, + "gtdbtk_class": "Thermotogae", + "gtdbtk_phylum": "Thermotogota", + "num_5s": 1, + "num_23s": 1, + "gtdbtk_genus": "Thermotoga", + "num_t_rna": 42 + }, + { + "number_of_contig": 18, + "completeness": 0.0, + "bin_name": "bins.6", + "gene_count": 280, + "bin_quality": "LQ", + "gtdbtk_species": "", + "gtdbtk_order": "", + "num_16s": 0, + "gtdbtk_family": "", + "gtdbtk_domain": "", + "contamination": 0.0, + "gtdbtk_class": "", + "gtdbtk_phylum": "", + "num_5s": 0, + "num_23s": 0, + "gtdbtk_genus": "", + "num_t_rna": 0 + }, + { + "number_of_contig": 50, + "completeness": 18.91, + "bin_name": "bins.7", + "gene_count": 347, + "bin_quality": "LQ", + "gtdbtk_species": "", + "gtdbtk_order": "", + "num_16s": 0, + "gtdbtk_family": "", + "gtdbtk_domain": "", + "contamination": 0.93, + "gtdbtk_class": "", + "gtdbtk_phylum": "", + "num_5s": 0, + "num_23s": 0, + "gtdbtk_genus": "", + "num_t_rna": 5 + } + ], + "unbinned_contig_num": 3558, + "started_at_time": "2021-10-11T02:23:25Z", + "type": "nmdc:MAGsAnalysisActivity", + "ended_at_time": "2021-10-11T05:12:58+00:00" + } + ], + "metagenome_annotation_activity_set": [ + { + "_id": { + "$oid": "649b005bbf2caae0415ef9a4" + }, + "has_input": [ + "nmdc:9ce3d663d53cc47f54df5e83459e0f3e" + ], + "part_of": [ + "nmdc:mga0bk7j89" + ], + "git_url": "https://github.com/microbiomedata/metaG/releases/tag/0.1", + "has_output": [ + "nmdc:20e8f2ad489accd55bfd12f9eccbd1fa", + "nmdc:db512876b78ff3b32ce5db6e6998b772", + "nmdc:d3eaf63075eaadeda97a0e68d2ea1bad", + "nmdc:e880dc9557c1e1c84efe3ee74f3ce93e", + "nmdc:a28b8d266586f100793dfd72c527c160", + "nmdc:b3d17567e523f9b8ba6a6fe4b67416ac", + "nmdc:08b898a741f2b15a87fc9d2df2e3b082", + "nmdc:e2790e24b26ca09baf7b4b4c84223e25", + "nmdc:e68bfdf0862734c6cfe10d3be8f64d31", + "nmdc:bd900a8b9c2bb973be13a1cb2434d5b2", + "nmdc:ac4b50ec40b0db1f7f4b4bcdc3be0cab", + "nmdc:b3f1c6388b68010c5ce30d5e12b810b3" + ], + "was_informed_by": "gold:Gp0119854", + "id": "nmdc:34f751af8eade7eecf7a0ba611991cb0", + "execution_resource": "NERSC-Cori", + "name": "Annotation Activity for nmdc:mga0bk7j89", + "started_at_time": "2021-10-11T02:23:25Z", + "type": "nmdc:MetagenomeAnnotationActivity", + "ended_at_time": "2021-10-11T05:12:58+00:00" + } + ], + "metagenome_assembly_set": [ + { + "_id": { + "$oid": "649b005f2ca5ee4adb139f87" + }, + "has_input": [ + "nmdc:5ee58d9ea16f71276d8b5af71d0833ab" + ], + "part_of": [ + "nmdc:mga0bk7j89" + ], + "ctg_logsum": 296204, + "scaf_logsum": 299233, + "gap_pct": 0.01891, + "git_url": "https://github.com/microbiomedata/metaG/releases/tag/0.1", + "has_output": [ + "nmdc:9ce3d663d53cc47f54df5e83459e0f3e", + "nmdc:d3756f1d709b33a29509560c0a68327a", + "nmdc:b9da00b620edc38e80a5010b1957fa18", + "nmdc:cd032090443757cf4b4d295a2fafe394", + "nmdc:537fbc3db2e752c5ee4e33243cb152a4" + ], + "asm_score": 23.501, + "was_informed_by": "gold:Gp0119854", + "ctg_powsum": 43252, + "scaf_max": 315713, + "id": "nmdc:34f751af8eade7eecf7a0ba611991cb0", + "scaf_powsum": 44106, + "execution_resource": "NERSC-Cori", + "contigs": 17782, + "name": "Assembly Activity for nmdc:mga0bk7j89", + "ctg_max": 195648, + "gc_std": 0.06571, + "contig_bp": 27867399, + "gc_avg": 0.3805, + "started_at_time": "2021-10-11T02:23:25Z", + "scaf_bp": 27872669, + "type": "nmdc:MetagenomeAssembly", + "scaffolds": 17471, + "ended_at_time": "2021-10-11T05:12:58+00:00", + "ctg_l50": 6634, + "ctg_l90": 451, + "ctg_n50": 673, + "ctg_n90": 9267, + "scaf_l50": 6893, + "scaf_l90": 458, + "scaf_n50": 645, + "scaf_n90": 8989, + "scaf_l_gt50k": 4072075, + "scaf_n_gt50k": 46, + "scaf_pct_gt50k": 14.609563 + } + ], + "omics_processing_set": [ + { + "_id": { + "$oid": "649b009773e8249959349b6e" + }, + "id": "nmdc:omprc-11-8r4tbs19", + "name": "Deep subsurface shale carbon reservoir microbial communities from Ohio, USA - Utica-2 Time Series 2014_10_20", + "description": "Microbial controls on biogeochemical cycling in deep subsurface shale carbon reservoirs", + "has_input": [ + "nmdc:bsm-11-fxxph207" + ], + "has_output": [ + "jgi:563bf9500d8785441a9214bc" + ], + "part_of": [ + "nmdc:sty-11-8fb6t785" + ], + "add_date": "2015-08-15", + "mod_date": "2020-04-05", + "ncbi_project_name": "Deep subsurface shale carbon reservoir microbial communities from Ohio, USA - Utica-2 Time Series 2014_10_20", + "omics_type": { + "has_raw_value": "Metagenome" + }, + "principal_investigator": { + "has_raw_value": "Kelly Wrighton" + }, + "processing_institution": "JGI", + "type": "nmdc:OmicsProcessing", + "gold_sequencing_project_identifiers": [ + "gold:Gp0119854" + ] + } + ], + "read_qc_analysis_activity_set": [ + { + "_id": { + "$oid": "649b009d6bdd4fd20273c84f" + }, + "has_input": [ + "nmdc:f6679cb27c7e9f8f695a7f96b3937c04" + ], + "part_of": [ + "nmdc:mga0bk7j89" + ], + "git_url": "https://github.com/microbiomedata/metaG/releases/tag/0.1", + "has_output": [ + "nmdc:5ee58d9ea16f71276d8b5af71d0833ab", + "nmdc:1dfc67d4f45c493b036283e8f52e5fb5" + ], + "was_informed_by": "gold:Gp0119854", + "input_read_count": 87775470, + "output_read_bases": 12367533459, + "id": "nmdc:34f751af8eade7eecf7a0ba611991cb0", + "execution_resource": "NERSC-Cori", + "input_read_bases": 13254095970, + "name": "Read QC Activity for nmdc:mga0bk7j89", + "output_read_count": 86606226, + "started_at_time": "2021-10-11T02:23:25Z", + "type": "nmdc:ReadQCAnalysisActivity", + "ended_at_time": "2021-10-11T05:12:58+00:00" + } + ], + "read_based_taxonomy_analysis_activity_set": [ + { + "_id": { + "$oid": "649b009bff710ae353f8cf0f" + }, + "has_input": [ + "nmdc:5ee58d9ea16f71276d8b5af71d0833ab" + ], + "git_url": "https://github.com/microbiomedata/metaG/releases/tag/0.1", + "has_output": [ + "nmdc:6b9b41e06b8ff71610843a49abb73ffd", + "nmdc:78af48b43e374313d7776f0deaf55c86", + "nmdc:951c2814cbb3324be8c91980da47b83a", + "nmdc:d0610bc3a80bd193d2a248e871a39b03", + "nmdc:4ff2b914c0cfb5703f19034ded2c568d", + "nmdc:c49e3e30010264b4d4755ab2344c7c71", + "nmdc:9adf095611968698fc1e98d985538101", + "nmdc:9f8bbc4f92da1d2fe2f4c950e19c59dc", + "nmdc:e094739ab309ea1a127dce97939b6d68" + ], + "was_informed_by": "gold:Gp0119854", + "id": "nmdc:34f751af8eade7eecf7a0ba611991cb0", + "execution_resource": "NERSC-Cori", + "name": "ReadBased Analysis Activity for nmdc:mga0bk7j89", + "started_at_time": "2021-10-11T02:23:25Z", + "type": "nmdc:ReadbasedAnalysis", + "ended_at_time": "2021-10-11T05:12:58+00:00" + } + ] + }, + { + "data_object_set": [ + { + "description": "Raw sequencer read data", + "file_size_bytes": 6735786289, + "type": "nmdc:DataObject", + "id": "jgi:560df5bb0d878540fd6fe205", + "name": "9567.8.137569.TAGGCAT-TATCCTC.fastq.gz" + }, + { + "name": "Gp0119861_Filtered Reads", + "description": "Filtered Reads for Gp0119861", + "data_object_type": "Filtered Sequencing Reads", + "url": "https://data.microbiomedata.org/data/nmdc:mga0nscm44/qa/nmdc_mga0nscm44_filtered.fastq.gz", + "md5_checksum": "a2a8a2d19043b4aa30236bd76b486c49", + "id": "nmdc:a2a8a2d19043b4aa30236bd76b486c49", + "file_size_bytes": 3454086570 + }, + { + "name": "Gp0119861_Filtered Stats", + "description": "Filtered Stats for Gp0119861", + "data_object_type": "QC Statistics", + "url": "https://data.microbiomedata.org/data/nmdc:mga0nscm44/qa/nmdc_mga0nscm44_filterStats.txt", + "md5_checksum": "7e59b95935fdb62f119e1348143c6f92", + "id": "nmdc:7e59b95935fdb62f119e1348143c6f92", + "file_size_bytes": 288 + }, + { + "name": "Gp0119861_Gottcha2 TSV report", + "description": "Gottcha2 TSV report for Gp0119861", + "url": "https://data.microbiomedata.org/data/nmdc:mga0nscm44/ReadbasedAnalysis/nmdc_mga0nscm44_gottcha2_report.tsv", + "md5_checksum": "59dc1a81b9d778a58e84574d4b7ceb8f", + "id": "nmdc:59dc1a81b9d778a58e84574d4b7ceb8f", + "file_size_bytes": 2082 + }, + { + "name": "Gp0119861_Gottcha2 full TSV report", + "description": "Gottcha2 full TSV report for Gp0119861", + "url": "https://data.microbiomedata.org/data/nmdc:mga0nscm44/ReadbasedAnalysis/nmdc_mga0nscm44_gottcha2_report_full.tsv", + "md5_checksum": "208e9969cb3fc7874adae2c6bf47fed7", + "id": "nmdc:208e9969cb3fc7874adae2c6bf47fed7", + "file_size_bytes": 108607 + }, + { + "name": "Gp0119861_Gottcha2 Krona HTML report", + "description": "Gottcha2 Krona HTML report for Gp0119861", + "data_object_type": "GOTTCHA2 Krona Plot", + "url": "https://data.microbiomedata.org/data/nmdc:mga0nscm44/ReadbasedAnalysis/nmdc_mga0nscm44_gottcha2_krona.html", + "md5_checksum": "1d6bcddd2f9684f437df096820551756", + "id": "nmdc:1d6bcddd2f9684f437df096820551756", + "file_size_bytes": 232533 + }, + { + "name": "Gp0119861_Centrifuge classification TSV report", + "description": "Centrifuge classification TSV report for Gp0119861", + "data_object_type": "Centrifuge Taxonomic Classification", + "url": "https://data.microbiomedata.org/data/nmdc:mga0nscm44/ReadbasedAnalysis/nmdc_mga0nscm44_centrifuge_classification.tsv", + "md5_checksum": "3a252d87f16049bba0df957b1de9ee57", + "id": "nmdc:3a252d87f16049bba0df957b1de9ee57", + "file_size_bytes": 4965214464 + }, + { + "name": "Gp0119861_Centrifuge TSV report", + "description": "Centrifuge TSV report for Gp0119861", + "data_object_type": "Centrifuge Classification Report", + "url": "https://data.microbiomedata.org/data/nmdc:mga0nscm44/ReadbasedAnalysis/nmdc_mga0nscm44_centrifuge_report.tsv", + "md5_checksum": "262cd993624acccaeceeee458b9eee29", + "id": "nmdc:262cd993624acccaeceeee458b9eee29", + "file_size_bytes": 209383 + }, + { + "name": "Gp0119861_Centrifuge Krona HTML report", + "description": "Centrifuge Krona HTML report for Gp0119861", + "data_object_type": "Centrifuge Krona Plot", + "url": "https://data.microbiomedata.org/data/nmdc:mga0nscm44/ReadbasedAnalysis/nmdc_mga0nscm44_centrifuge_krona.html", + "md5_checksum": "019ea732fb19fa1d45d9d99d7d5c36f1", + "id": "nmdc:019ea732fb19fa1d45d9d99d7d5c36f1", + "file_size_bytes": 2093807 + }, + { + "name": "Gp0119861_Kraken classification TSV report", + "description": "Kraken classification TSV report for Gp0119861", + "data_object_type": "Kraken2 Taxonomic Classification", + "url": "https://data.microbiomedata.org/data/nmdc:mga0nscm44/ReadbasedAnalysis/nmdc_mga0nscm44_kraken2_classification.tsv", + "md5_checksum": "27412b97953a7a840e88efdc2658c0c8", + "id": "nmdc:27412b97953a7a840e88efdc2658c0c8", + "file_size_bytes": 4326306139 + }, + { + "name": "Gp0119861_Kraken2 TSV report", + "description": "Kraken2 TSV report for Gp0119861", + "data_object_type": "Kraken2 Classification Report", + "url": "https://data.microbiomedata.org/data/nmdc:mga0nscm44/ReadbasedAnalysis/nmdc_mga0nscm44_kraken2_report.tsv", + "md5_checksum": "540464c5e83abbcc4b5ecf50ab32a919", + "id": "nmdc:540464c5e83abbcc4b5ecf50ab32a919", + "file_size_bytes": 426766 + }, + { + "name": "Gp0119861_Kraken2 Krona HTML report", + "description": "Kraken2 Krona HTML report for Gp0119861", + "data_object_type": "Kraken2 Krona Plot", + "url": "https://data.microbiomedata.org/data/nmdc:mga0nscm44/ReadbasedAnalysis/nmdc_mga0nscm44_kraken2_krona.html", + "md5_checksum": "daa6d34fdba15cb80f86a6597a0f4f08", + "id": "nmdc:daa6d34fdba15cb80f86a6597a0f4f08", + "file_size_bytes": 2822505 + }, + { + "name": "Gp0119861_Assembled contigs fasta", + "description": "Assembled contigs fasta for Gp0119861", + "data_object_type": "Assembly Contigs", + "url": "https://data.microbiomedata.org/data/nmdc:mga0nscm44/assembly/nmdc_mga0nscm44_contigs.fna", + "md5_checksum": "c6f3589cf1ecdb6912648b63106b048c", + "id": "nmdc:c6f3589cf1ecdb6912648b63106b048c", + "file_size_bytes": 7169506 + }, + { + "name": "Gp0119861_Assembled scaffold fasta", + "description": "Assembled scaffold fasta for Gp0119861", + "data_object_type": "Assembly Scaffolds", + "url": "https://data.microbiomedata.org/data/nmdc:mga0nscm44/assembly/nmdc_mga0nscm44_scaffolds.fna", + "md5_checksum": "c4798a2ceec5160b7a795d5227087e7f", + "id": "nmdc:c4798a2ceec5160b7a795d5227087e7f", + "file_size_bytes": 7165239 + }, + { + "name": "Gp0119861_Metagenome Contig Coverage Stats", + "description": "Metagenome Contig Coverage Stats for Gp0119861", + "url": "https://data.microbiomedata.org/data/nmdc:mga0nscm44/assembly/nmdc_mga0nscm44_covstats.txt", + "md5_checksum": "ad8c2effa027607544596689eb720965", + "id": "nmdc:ad8c2effa027607544596689eb720965", + "file_size_bytes": 210050 + }, + { + "name": "Gp0119861_Assembled AGP file", + "description": "Assembled AGP file for Gp0119861", + "data_object_type": "Assembly AGP", + "url": "https://data.microbiomedata.org/data/nmdc:mga0nscm44/assembly/nmdc_mga0nscm44_assembly.agp", + "md5_checksum": "4273c32b1de2a07c20b7b397af61ee7d", + "id": "nmdc:4273c32b1de2a07c20b7b397af61ee7d", + "file_size_bytes": 183997 + }, + { + "name": "Gp0119861_Metagenome Alignment BAM file", + "description": "Metagenome Alignment BAM file for Gp0119861", + "data_object_type": "Assembly Coverage BAM", + "url": "https://data.microbiomedata.org/data/nmdc:mga0nscm44/assembly/nmdc_mga0nscm44_pairedMapped_sorted.bam", + "md5_checksum": "3e06c46cdf6e0b40c502f2777d37b00c", + "id": "nmdc:3e06c46cdf6e0b40c502f2777d37b00c", + "file_size_bytes": 4565161349 + }, + { + "name": "Gp0119861_Protein FAA", + "description": "Protein FAA for Gp0119861", + "data_object_type": "Annotation Amino Acid FASTA", + "url": "https://data.microbiomedata.org/data/nmdc:mga0nscm44/annotation/nmdc_mga0nscm44_proteins.faa", + "md5_checksum": "079c2e1610dfce2e44b5784a55ca060b", + "id": "nmdc:079c2e1610dfce2e44b5784a55ca060b", + "file_size_bytes": 3105729 + }, + { + "name": "Gp0119861_Structural annotation GFF file", + "description": "Structural annotation GFF file for Gp0119861", + "data_object_type": "Structural Annotation GFF", + "url": "https://data.microbiomedata.org/data/nmdc:mga0nscm44/annotation/nmdc_mga0nscm44_structural_annotation.gff", + "md5_checksum": "3a8e13526a9dc7c7c36c192d23157b4d", + "id": "nmdc:3a8e13526a9dc7c7c36c192d23157b4d", + "file_size_bytes": 2481 + }, + { + "name": "Gp0119861_Functional annotation GFF file", + "description": "Functional annotation GFF file for Gp0119861", + "data_object_type": "Functional Annotation GFF", + "url": "https://data.microbiomedata.org/data/nmdc:mga0nscm44/annotation/nmdc_mga0nscm44_functional_annotation.gff", + "md5_checksum": "20cd71f09358206b66ec97a42c590d44", + "id": "nmdc:20cd71f09358206b66ec97a42c590d44", + "file_size_bytes": 2551393 + }, + { + "name": "Gp0119861_KO TSV file", + "description": "KO TSV file for Gp0119861", + "data_object_type": "Annotation KEGG Orthology", + "url": "https://data.microbiomedata.org/data/nmdc:mga0nscm44/annotation/nmdc_mga0nscm44_ko.tsv", + "md5_checksum": "85656937e2eb9f2c9dfe84357de9b4c0", + "id": "nmdc:85656937e2eb9f2c9dfe84357de9b4c0", + "file_size_bytes": 341816 + }, + { + "name": "Gp0119861_EC TSV file", + "description": "EC TSV file for Gp0119861", + "data_object_type": "Annotation Enzyme Commission", + "url": "https://data.microbiomedata.org/data/nmdc:mga0nscm44/annotation/nmdc_mga0nscm44_ec.tsv", + "md5_checksum": "85ce86c775ce55cc9b627eb0e27c09db", + "id": "nmdc:85ce86c775ce55cc9b627eb0e27c09db", + "file_size_bytes": 202956 + }, + { + "name": "Gp0119861_COG GFF file", + "description": "COG GFF file for Gp0119861", + "url": "https://data.microbiomedata.org/data/nmdc:mga0nscm44/annotation/nmdc_mga0nscm44_cog.gff", + "md5_checksum": "53e4f9174f3eee3f3bd5920c7ef4b71e", + "id": "nmdc:53e4f9174f3eee3f3bd5920c7ef4b71e", + "file_size_bytes": 1748821 + }, + { + "name": "Gp0119861_PFAM GFF file", + "description": "PFAM GFF file for Gp0119861", + "url": "https://data.microbiomedata.org/data/nmdc:mga0nscm44/annotation/nmdc_mga0nscm44_pfam.gff", + "md5_checksum": "29ced99cfa5bce640bab89d02bee55ed", + "id": "nmdc:29ced99cfa5bce640bab89d02bee55ed", + "file_size_bytes": 1773240 + }, + { + "name": "Gp0119861_TigrFam GFF file", + "description": "TigrFam GFF file for Gp0119861", + "url": "https://data.microbiomedata.org/data/nmdc:mga0nscm44/annotation/nmdc_mga0nscm44_tigrfam.gff", + "md5_checksum": "c7630dde1984ce171d1e6dac5ae755ba", + "id": "nmdc:c7630dde1984ce171d1e6dac5ae755ba", + "file_size_bytes": 405198 + }, + { + "name": "Gp0119861_SMART GFF file", + "description": "SMART GFF file for Gp0119861", + "url": "https://data.microbiomedata.org/data/nmdc:mga0nscm44/annotation/nmdc_mga0nscm44_smart.gff", + "md5_checksum": "3a9dd7f47cd984636359b57a91072537", + "id": "nmdc:3a9dd7f47cd984636359b57a91072537", + "file_size_bytes": 589339 + }, + { + "name": "Gp0119861_SuperFam GFF file", + "description": "SuperFam GFF file for Gp0119861", + "url": "https://data.microbiomedata.org/data/nmdc:mga0nscm44/annotation/nmdc_mga0nscm44_supfam.gff", + "md5_checksum": "739dd7ce71cddb2e6a014fa21403d9fe", + "id": "nmdc:739dd7ce71cddb2e6a014fa21403d9fe", + "file_size_bytes": 2580242 + }, + { + "name": "Gp0119861_Cath FunFam GFF file", + "description": "Cath FunFam GFF file for Gp0119861", + "url": "https://data.microbiomedata.org/data/nmdc:mga0nscm44/annotation/nmdc_mga0nscm44_cath_funfam.gff", + "md5_checksum": "da890918c632ae8216b9b7436545d6cd", + "id": "nmdc:da890918c632ae8216b9b7436545d6cd", + "file_size_bytes": 2411538 + }, + { + "name": "Gp0119861_KO_EC GFF file", + "description": "KO_EC GFF file for Gp0119861", + "url": "https://data.microbiomedata.org/data/nmdc:mga0nscm44/annotation/nmdc_mga0nscm44_ko_ec.gff", + "md5_checksum": "5dc240e912e1a4aa4051260d936cc545", + "id": "nmdc:5dc240e912e1a4aa4051260d936cc545", + "file_size_bytes": 1241780 + }, + { + "name": "gold:Gp0452679_metabat2 bin checkm quality assessment result", + "description": "metabat2 bin checkm quality assessment result for gold:Gp0452679", + "data_object_type": "CheckM Statistics", + "url": "https://data.microbiomedata.org/data/nmdc:mga0fsjy84/MAGs/nmdc_mga0fsjy84_checkm_qa.out", + "md5_checksum": "d41d8cd98f00b204e9800998ecf8427e", + "id": "nmdc:d41d8cd98f00b204e9800998ecf8427e", + "file_size_bytes": 0 + }, + { + "name": "Gp0119861_tooShort (< 3kb) filtered contigs fasta file by metaBat2", + "description": "tooShort (< 3kb) filtered contigs fasta file by metaBat2 for Gp0119861", + "url": "https://data.microbiomedata.org/data/nmdc:mga0nscm44/MAGs/nmdc_mga0nscm44_bins.tooShort.fa", + "md5_checksum": "022c8bd54edbf30714f42158d138a677", + "id": "nmdc:022c8bd54edbf30714f42158d138a677", + "file_size_bytes": 694163 + }, + { + "name": "Gp0119861_unbinned fasta file from metabat2", + "description": "unbinned fasta file from metabat2 for Gp0119861", + "url": "https://data.microbiomedata.org/data/nmdc:mga0nscm44/MAGs/nmdc_mga0nscm44_bins.unbinned.fa", + "md5_checksum": "503ccea5f4608f7e99f1f03d38be912b", + "id": "nmdc:503ccea5f4608f7e99f1f03d38be912b", + "file_size_bytes": 3779944 + }, + { + "name": "Gp0119861_metabat2 bin checkm quality assessment result", + "description": "metabat2 bin checkm quality assessment result for Gp0119861", + "data_object_type": "CheckM Statistics", + "url": "https://data.microbiomedata.org/data/nmdc:mga0nscm44/MAGs/nmdc_mga0nscm44_checkm_qa.out", + "md5_checksum": "ab976437d069e9aad1875ce3f4a8b383", + "id": "nmdc:ab976437d069e9aad1875ce3f4a8b383", + "file_size_bytes": 1113 + }, + { + "name": "Gp0119861_high-quality and medium-quality bins", + "description": "high-quality and medium-quality bins for Gp0119861", + "data_object_type": "Metagenome Bins", + "url": "https://data.microbiomedata.org/data/nmdc:mga0nscm44/MAGs/nmdc_mga0nscm44_hqmq_bin.zip", + "md5_checksum": "ed22f96bb9f7e5216cf5f592134f6f40", + "id": "nmdc:ed22f96bb9f7e5216cf5f592134f6f40", + "file_size_bytes": 615127 + }, + { + "name": "Gp0119861_metabat2 bins", + "description": "metabat2 bins for Gp0119861", + "url": "https://data.microbiomedata.org/data/nmdc:mga0nscm44/MAGs/nmdc_mga0nscm44_metabat_bin.zip", + "md5_checksum": "c7cdc49f69c1ed44f7d167b5fbe08b99", + "id": "nmdc:c7cdc49f69c1ed44f7d167b5fbe08b99", + "file_size_bytes": 193681 + }, + { + "_id": { + "$oid": "649b003d1ae706d7b5b14e0a" + }, + "description": "Assembled contigs fasta for gold:Gp0119861", + "url": "https://data.microbiomedata.org/data/1777_95830/assembly/assembly_contigs.fna", + "file_size_bytes": 7157131, + "type": "nmdc:DataObject", + "id": "nmdc:0cb2e3b5ce7e14d6bb08eec2a31d247a", + "name": "assembly_contigs.fna", + "data_object_type": "Assembly Contigs" + }, + { + "_id": { + "$oid": "649b003d1ae706d7b5b14e0d" + }, + "description": "Assembled scaffold fasta for gold:Gp0119861", + "url": "https://data.microbiomedata.org/data/1777_95830/assembly/assembly_scaffolds.fna", + "file_size_bytes": 7153349, + "type": "nmdc:DataObject", + "id": "nmdc:f0d31feb906764f5289d269951289482", + "name": "assembly_scaffolds.fna", + "data_object_type": "Assembly Scaffolds" + }, + { + "_id": { + "$oid": "649b003d1ae706d7b5b14e0e" + }, + "description": "Metagenome Alignment BAM file for gold:Gp0119861", + "url": "https://data.microbiomedata.org/data/1777_95830/assembly/pairedMapped_sorted.bam", + "file_size_bytes": 4532184488, + "type": "nmdc:DataObject", + "id": "nmdc:84a0a61df32c4550a8903971cb17cf25", + "name": "pairedMapped_sorted.bam", + "data_object_type": "Assembly Coverage BAM" + }, + { + "_id": { + "$oid": "649b003d1ae706d7b5b14e11" + }, + "description": "Metagenome Contig Coverage Stats for gold:Gp0119861", + "url": "https://data.microbiomedata.org/data/1777_95830/assembly/mapping_stats.txt", + "file_size_bytes": 197675, + "type": "nmdc:DataObject", + "id": "nmdc:6e6796e590f3b9a1ba579c7023ffaa9f", + "name": "mapping_stats.txt", + "data_object_type": "Assembly Coverage Stats" + }, + { + "_id": { + "$oid": "649b003d1ae706d7b5b14e14" + }, + "description": "Assembled AGP file for gold:Gp0119861", + "url": "https://data.microbiomedata.org/data/1777_95830/assembly/assembly.agp", + "file_size_bytes": 158177, + "type": "nmdc:DataObject", + "id": "nmdc:cee78bfb9623028ce2aacda661048604", + "name": "assembly.agp", + "data_object_type": "Assembly AGP" + }, + { + "_id": { + "$oid": "649b003d1ae706d7b5b15a79" + }, + "id": "nmdc:3054d7ad832678a7209558fafa4cd5e5", + "name": "1777_95830.krona.html", + "description": "Gold:Gp0119861 KRONA plot HTML file", + "url": "https://data.microbiomedata.org/1777_95830/ReadbasedAnalysis/centrifuge/1777_95830.krona.html", + "file_size_bytes": 3385, + "data_object_type": "Centrifuge Krona Plot", + "type": "nmdc:DataObject" + }, + { + "_id": { + "$oid": "649b003d1ae706d7b5b15a8f" + }, + "id": "nmdc:59b7f28516fbae93c89383396d440e17", + "name": "1777_95830.json", + "description": "Gold:Gp0119861 ReadbasedAnalysis result JSON file", + "url": "https://data.microbiomedata.org/1777_95830/ReadbasedAnalysis/1777_95830.json", + "file_size_bytes": 3385, + "type": "nmdc:DataObject" + }, + { + "_id": { + "$oid": "649b003f1ae706d7b5b16387" + }, + "id": "nmdc:263996ab11bcbdcffe08ec6bf57ef4e2", + "name": "bins.tooShort.fa", + "description": "tooShort (< 3kb) filtered contigs fasta file by metaBat2 for gold:Gp0119861", + "file_size_bytes": 674179, + "url": "https://data.microbiomedata.org/data/1777_95830/img_MAGs/bins.tooShort.fa", + "type": "nmdc:DataObject" + }, + { + "_id": { + "$oid": "649b003f1ae706d7b5b1638d" + }, + "id": "nmdc:564422e1565b75901d83a97f3aca7ef7", + "name": "bins.unbinned.fa", + "description": "unbinned fasta file from metabat2 for gold:Gp0119861", + "file_size_bytes": 6464389, + "url": "https://data.microbiomedata.org/data/1777_95830/img_MAGs/bins.unbinned.fa", + "type": "nmdc:DataObject" + }, + { + "_id": { + "$oid": "649b003f1ae706d7b5b16c86" + }, + "description": "EC TSV File for gold:Gp0119861", + "url": "https://data.microbiomedata.org/1777_95830/img_annotation/Ga0482154_ec.tsv", + "md5_checksum": "066d13964ce47a41a1e2ef7a5b19cb59", + "file_size_bytes": 3385, + "id": "nmdc:066d13964ce47a41a1e2ef7a5b19cb59", + "name": "gold:Gp0119861_EC TSV File", + "data_object_type": "Annotation Enzyme Commission", + "type": "nmdc:DataObject" + }, + { + "_id": { + "$oid": "649b003f1ae706d7b5b16c87" + }, + "description": "KO TSV File for gold:Gp0119861", + "url": "https://data.microbiomedata.org/1777_95830/img_annotation/Ga0482154_ko.tsv", + "md5_checksum": "218b0daa573a8a7f5fc31d6cfe1edad5", + "file_size_bytes": 3385, + "id": "nmdc:218b0daa573a8a7f5fc31d6cfe1edad5", + "name": "gold:Gp0119861_KO TSV File", + "data_object_type": "Annotation KEGG Orthology", + "type": "nmdc:DataObject" + }, + { + "_id": { + "$oid": "649b003f1ae706d7b5b16c89" + }, + "description": "Functional annotation GFF file for gold:Gp0119861", + "url": "https://data.microbiomedata.org/1777_95830/img_annotation/Ga0482154_functional_annotation.gff", + "md5_checksum": "4f13ca8028e6581e0510d8667456bf28", + "file_size_bytes": 3385, + "id": "nmdc:4f13ca8028e6581e0510d8667456bf28", + "name": "gold:Gp0119861_Functional annotation GFF file", + "data_object_type": "Functional Annotation GFF", + "type": "nmdc:DataObject" + }, + { + "_id": { + "$oid": "649b003f1ae706d7b5b16c8a" + }, + "description": "Protein FAA for gold:Gp0119861", + "url": "https://data.microbiomedata.org/1777_95830/img_annotation/Ga0482154_proteins.faa", + "md5_checksum": "1712f2bb1fa749515ce0bf2b34f8b559", + "file_size_bytes": 3385, + "id": "nmdc:1712f2bb1fa749515ce0bf2b34f8b559", + "name": "gold:Gp0119861_Protein FAA", + "data_object_type": "Annotation Amino Acid FASTA", + "type": "nmdc:DataObject" + }, + { + "_id": { + "$oid": "649b003f1ae706d7b5b16c8b" + }, + "description": "Structural annotation GFF file for gold:Gp0119861", + "url": "https://data.microbiomedata.org/1777_95830/img_annotation/Ga0482154_structural_annotation.gff", + "md5_checksum": "0f06cb228b08e2aee24c65444d0bfc21", + "file_size_bytes": 3385, + "id": "nmdc:0f06cb228b08e2aee24c65444d0bfc21", + "name": "gold:Gp0119861_Structural annotation GFF file", + "data_object_type": "Structural Annotation GFF", + "type": "nmdc:DataObject" + } + ], + "mags_activity_set": [ + { + "_id": { + "$oid": "649b0052ec087f6bbab346ed" + }, + "has_input": [ + "nmdc:c6f3589cf1ecdb6912648b63106b048c", + "nmdc:3e06c46cdf6e0b40c502f2777d37b00c", + "nmdc:20cd71f09358206b66ec97a42c590d44" + ], + "too_short_contig_num": 1550, + "part_of": [ + "nmdc:mga0nscm44" + ], + "binned_contig_num": 140, + "git_url": "https://github.com/microbiomedata/metaG/releases/tag/0.1", + "has_output": [ + "nmdc:d41d8cd98f00b204e9800998ecf8427e", + "nmdc:022c8bd54edbf30714f42158d138a677", + "nmdc:503ccea5f4608f7e99f1f03d38be912b", + "nmdc:ab976437d069e9aad1875ce3f4a8b383", + "nmdc:ed22f96bb9f7e5216cf5f592134f6f40", + "nmdc:c7cdc49f69c1ed44f7d167b5fbe08b99" + ], + "was_informed_by": "gold:Gp0119861", + "input_contig_num": 2475, + "id": "nmdc:3d0802ceb5124c47cedf0ea035aa607f", + "execution_resource": "NERSC-Cori", + "name": "MAGs Analysis Activity for nmdc:mga0nscm44", + "mags_list": [ + { + "number_of_contig": 19, + "completeness": 3.45, + "bin_name": "bins.1", + "gene_count": 405, + "bin_quality": "LQ", + "gtdbtk_species": "", + "gtdbtk_order": "", + "num_16s": 0, + "gtdbtk_family": "", + "gtdbtk_domain": "", + "contamination": 0.0, + "gtdbtk_class": "", + "gtdbtk_phylum": "", + "num_5s": 0, + "num_23s": 0, + "gtdbtk_genus": "", + "num_t_rna": 7 + }, + { + "number_of_contig": 14, + "completeness": 0.0, + "bin_name": "bins.2", + "gene_count": 262, + "bin_quality": "LQ", + "gtdbtk_species": "", + "gtdbtk_order": "", + "num_16s": 0, + "gtdbtk_family": "", + "gtdbtk_domain": "", + "contamination": 0.0, + "gtdbtk_class": "", + "gtdbtk_phylum": "", + "num_5s": 0, + "num_23s": 0, + "gtdbtk_genus": "", + "num_t_rna": 1 + }, + { + "number_of_contig": 107, + "completeness": 85.43, + "bin_name": "bins.3", + "gene_count": 2098, + "bin_quality": "MQ", + "gtdbtk_species": "Halanaerobium congolense", + "gtdbtk_order": "Halanaerobiales", + "num_16s": 0, + "gtdbtk_family": "Halanaerobiaceae", + "gtdbtk_domain": "Bacteria", + "contamination": 1.38, + "gtdbtk_class": "Halanaerobiia", + "gtdbtk_phylum": "Firmicutes_F", + "num_5s": 2, + "num_23s": 0, + "gtdbtk_genus": "Halanaerobium", + "num_t_rna": 33 + } + ], + "unbinned_contig_num": 785, + "started_at_time": "2021-10-11T02:23:26Z", + "type": "nmdc:MAGsAnalysisActivity", + "ended_at_time": "2021-10-11T04:03:11+00:00" + } + ], + "metagenome_annotation_activity_set": [ + { + "_id": { + "$oid": "649b005bbf2caae0415ef98b" + }, + "has_input": [ + "nmdc:c6f3589cf1ecdb6912648b63106b048c" + ], + "part_of": [ + "nmdc:mga0nscm44" + ], + "git_url": "https://github.com/microbiomedata/metaG/releases/tag/0.1", + "has_output": [ + "nmdc:079c2e1610dfce2e44b5784a55ca060b", + "nmdc:3a8e13526a9dc7c7c36c192d23157b4d", + "nmdc:20cd71f09358206b66ec97a42c590d44", + "nmdc:85656937e2eb9f2c9dfe84357de9b4c0", + "nmdc:85ce86c775ce55cc9b627eb0e27c09db", + "nmdc:53e4f9174f3eee3f3bd5920c7ef4b71e", + "nmdc:29ced99cfa5bce640bab89d02bee55ed", + "nmdc:c7630dde1984ce171d1e6dac5ae755ba", + "nmdc:3a9dd7f47cd984636359b57a91072537", + "nmdc:739dd7ce71cddb2e6a014fa21403d9fe", + "nmdc:da890918c632ae8216b9b7436545d6cd", + "nmdc:5dc240e912e1a4aa4051260d936cc545" + ], + "was_informed_by": "gold:Gp0119861", + "id": "nmdc:3d0802ceb5124c47cedf0ea035aa607f", + "execution_resource": "NERSC-Cori", + "name": "Annotation Activity for nmdc:mga0nscm44", + "started_at_time": "2021-10-11T02:23:26Z", + "type": "nmdc:MetagenomeAnnotationActivity", + "ended_at_time": "2021-10-11T04:03:11+00:00" + } + ], + "metagenome_assembly_set": [ + { + "_id": { + "$oid": "649b005f2ca5ee4adb139f7d" + }, + "has_input": [ + "nmdc:a2a8a2d19043b4aa30236bd76b486c49" + ], + "part_of": [ + "nmdc:mga0nscm44" + ], + "ctg_logsum": 86852, + "scaf_logsum": 88211, + "gap_pct": 0.06741, + "git_url": "https://github.com/microbiomedata/metaG/releases/tag/0.1", + "has_output": [ + "nmdc:c6f3589cf1ecdb6912648b63106b048c", + "nmdc:c4798a2ceec5160b7a795d5227087e7f", + "nmdc:ad8c2effa027607544596689eb720965", + "nmdc:4273c32b1de2a07c20b7b397af61ee7d", + "nmdc:3e06c46cdf6e0b40c502f2777d37b00c" + ], + "asm_score": 23.552, + "was_informed_by": "gold:Gp0119861", + "ctg_powsum": 12565, + "scaf_max": 126183, + "id": "nmdc:3d0802ceb5124c47cedf0ea035aa607f", + "scaf_powsum": 13011, + "execution_resource": "NERSC-Cori", + "contigs": 2480, + "name": "Assembly Activity for nmdc:mga0nscm44", + "ctg_max": 81007, + "gc_std": 0.05281, + "contig_bp": 6982167, + "gc_avg": 0.32465, + "started_at_time": "2021-10-11T02:23:26Z", + "scaf_bp": 6986877, + "type": "nmdc:MetagenomeAssembly", + "scaffolds": 2378, + "ended_at_time": "2021-10-11T04:03:11+00:00", + "ctg_l50": 13537, + "ctg_l90": 1122, + "ctg_n50": 130, + "ctg_n90": 870, + "scaf_l50": 14525, + "scaf_l90": 1181, + "scaf_n50": 114, + "scaf_n90": 799, + "scaf_l_gt50k": 1012166, + "scaf_n_gt50k": 15, + "scaf_pct_gt50k": 14.486672 + } + ], + "omics_processing_set": [ + { + "_id": { + "$oid": "649b009773e8249959349b6f" + }, + "id": "nmdc:omprc-11-3d14m472", + "name": "Deep subsurface shale carbon reservoir microbial communities from Ohio, USA - Utica-2 Time Series 2015_1_7", + "description": "Microbial controls on biogeochemical cycling in deep subsurface shale carbon reservoirs", + "has_input": [ + "nmdc:bsm-11-f76q1n90" + ], + "has_output": [ + "jgi:560df5bb0d878540fd6fe205" + ], + "part_of": [ + "nmdc:sty-11-8fb6t785" + ], + "add_date": "2015-08-15", + "mod_date": "2020-04-05", + "ncbi_project_name": "Deep subsurface shale carbon reservoir microbial communities from Ohio, USA - Utica-2 Time Series 2015_1_7", + "omics_type": { + "has_raw_value": "Metagenome" + }, + "principal_investigator": { + "has_raw_value": "Kelly Wrighton" + }, + "processing_institution": "JGI", + "type": "nmdc:OmicsProcessing", + "gold_sequencing_project_identifiers": [ + "gold:Gp0119861" + ] + } + ], + "read_qc_analysis_activity_set": [ + { + "_id": { + "$oid": "649b009d6bdd4fd20273c84a" + }, + "has_input": [ + "nmdc:d9c999ad1aaa2dd8c498d71eb6dcefb3" + ], + "part_of": [ + "nmdc:mga0nscm44" + ], + "git_url": "https://github.com/microbiomedata/metaG/releases/tag/0.1", + "has_output": [ + "nmdc:a2a8a2d19043b4aa30236bd76b486c49", + "nmdc:7e59b95935fdb62f119e1348143c6f92" + ], + "was_informed_by": "gold:Gp0119861", + "input_read_count": 70972940, + "output_read_bases": 10408021588, + "id": "nmdc:3d0802ceb5124c47cedf0ea035aa607f", + "execution_resource": "NERSC-Cori", + "input_read_bases": 10716913940, + "name": "Read QC Activity for nmdc:mga0nscm44", + "output_read_count": 70652426, + "started_at_time": "2021-10-11T02:23:26Z", + "type": "nmdc:ReadQCAnalysisActivity", + "ended_at_time": "2021-10-11T04:03:11+00:00" + } + ], + "read_based_taxonomy_analysis_activity_set": [ + { + "_id": { + "$oid": "649b009bff710ae353f8cf2c" + }, + "has_input": [ + "nmdc:a2a8a2d19043b4aa30236bd76b486c49" + ], + "git_url": "https://github.com/microbiomedata/metaG/releases/tag/0.1", + "has_output": [ + "nmdc:59dc1a81b9d778a58e84574d4b7ceb8f", + "nmdc:208e9969cb3fc7874adae2c6bf47fed7", + "nmdc:1d6bcddd2f9684f437df096820551756", + "nmdc:3a252d87f16049bba0df957b1de9ee57", + "nmdc:262cd993624acccaeceeee458b9eee29", + "nmdc:019ea732fb19fa1d45d9d99d7d5c36f1", + "nmdc:27412b97953a7a840e88efdc2658c0c8", + "nmdc:540464c5e83abbcc4b5ecf50ab32a919", + "nmdc:daa6d34fdba15cb80f86a6597a0f4f08" + ], + "was_informed_by": "gold:Gp0119861", + "id": "nmdc:3d0802ceb5124c47cedf0ea035aa607f", + "execution_resource": "NERSC-Cori", + "name": "ReadBased Analysis Activity for nmdc:mga0nscm44", + "started_at_time": "2021-10-11T02:23:26Z", + "type": "nmdc:ReadbasedAnalysis", + "ended_at_time": "2021-10-11T04:03:11+00:00" + } + ] + }, + { + "data_object_set": [ + { + "description": "Raw sequencer read data", + "file_size_bytes": 6110524221, + "type": "nmdc:DataObject", + "id": "jgi:563bf9520d8785441a9214be", + "name": "9704.2.141521.TCCTGAG-CTCTCTA.fastq.gz" + }, + { + "name": "Gp0119864_Filtered Reads", + "description": "Filtered Reads for Gp0119864", + "data_object_type": "Filtered Sequencing Reads", + "url": "https://data.microbiomedata.org/data/nmdc:mga045k105/qa/nmdc_mga045k105_filtered.fastq.gz", + "md5_checksum": "952359154aeab2474f9a72322ef4ab99", + "id": "nmdc:952359154aeab2474f9a72322ef4ab99", + "file_size_bytes": 3466504476 + }, + { + "name": "Gp0119864_Filtered Stats", + "description": "Filtered Stats for Gp0119864", + "data_object_type": "QC Statistics", + "url": "https://data.microbiomedata.org/data/nmdc:mga045k105/qa/nmdc_mga045k105_filterStats.txt", + "md5_checksum": "960fde6ef41e5b25a798296e5b48507c", + "id": "nmdc:960fde6ef41e5b25a798296e5b48507c", + "file_size_bytes": 287 + }, + { + "name": "Gp0119864_Gottcha2 TSV report", + "description": "Gottcha2 TSV report for Gp0119864", + "url": "https://data.microbiomedata.org/data/nmdc:mga045k105/ReadbasedAnalysis/nmdc_mga045k105_gottcha2_report.tsv", + "md5_checksum": "6a4bb5650a5d3d4b2c882d957a012248", + "id": "nmdc:6a4bb5650a5d3d4b2c882d957a012248", + "file_size_bytes": 8696 + }, + { + "name": "Gp0119864_Gottcha2 full TSV report", + "description": "Gottcha2 full TSV report for Gp0119864", + "url": "https://data.microbiomedata.org/data/nmdc:mga045k105/ReadbasedAnalysis/nmdc_mga045k105_gottcha2_report_full.tsv", + "md5_checksum": "66ad85cbe9197882b289d19034de278f", + "id": "nmdc:66ad85cbe9197882b289d19034de278f", + "file_size_bytes": 474713 + }, + { + "name": "Gp0119864_Gottcha2 Krona HTML report", + "description": "Gottcha2 Krona HTML report for Gp0119864", + "data_object_type": "GOTTCHA2 Krona Plot", + "url": "https://data.microbiomedata.org/data/nmdc:mga045k105/ReadbasedAnalysis/nmdc_mga045k105_gottcha2_krona.html", + "md5_checksum": "f799b0fec237fc5dd25fd62c43543d99", + "id": "nmdc:f799b0fec237fc5dd25fd62c43543d99", + "file_size_bytes": 247973 + }, + { + "name": "Gp0119864_Centrifuge classification TSV report", + "description": "Centrifuge classification TSV report for Gp0119864", + "data_object_type": "Centrifuge Taxonomic Classification", + "url": "https://data.microbiomedata.org/data/nmdc:mga045k105/ReadbasedAnalysis/nmdc_mga045k105_centrifuge_classification.tsv", + "md5_checksum": "70a6917dae126999097bd53514b5d460", + "id": "nmdc:70a6917dae126999097bd53514b5d460", + "file_size_bytes": 4109389162 + }, + { + "name": "Gp0119864_Centrifuge TSV report", + "description": "Centrifuge TSV report for Gp0119864", + "data_object_type": "Centrifuge Classification Report", + "url": "https://data.microbiomedata.org/data/nmdc:mga045k105/ReadbasedAnalysis/nmdc_mga045k105_centrifuge_report.tsv", + "md5_checksum": "249bcf7ae2ffa92648e4e956e70a89ed", + "id": "nmdc:249bcf7ae2ffa92648e4e956e70a89ed", + "file_size_bytes": 257610 + }, + { + "name": "Gp0119864_Centrifuge Krona HTML report", + "description": "Centrifuge Krona HTML report for Gp0119864", + "data_object_type": "Centrifuge Krona Plot", + "url": "https://data.microbiomedata.org/data/nmdc:mga045k105/ReadbasedAnalysis/nmdc_mga045k105_centrifuge_krona.html", + "md5_checksum": "2aad48ba88c824179107071fdebb0eb9", + "id": "nmdc:2aad48ba88c824179107071fdebb0eb9", + "file_size_bytes": 2322344 + }, + { + "name": "Gp0119864_Kraken classification TSV report", + "description": "Kraken classification TSV report for Gp0119864", + "data_object_type": "Kraken2 Taxonomic Classification", + "url": "https://data.microbiomedata.org/data/nmdc:mga045k105/ReadbasedAnalysis/nmdc_mga045k105_kraken2_classification.tsv", + "md5_checksum": "3d68fa10c0b996395b526ab5348763e9", + "id": "nmdc:3d68fa10c0b996395b526ab5348763e9", + "file_size_bytes": 3470572862 + }, + { + "name": "Gp0119864_Kraken2 TSV report", + "description": "Kraken2 TSV report for Gp0119864", + "data_object_type": "Kraken2 Classification Report", + "url": "https://data.microbiomedata.org/data/nmdc:mga045k105/ReadbasedAnalysis/nmdc_mga045k105_kraken2_report.tsv", + "md5_checksum": "ddb1cdfc19d494e370800b7f8e01d191", + "id": "nmdc:ddb1cdfc19d494e370800b7f8e01d191", + "file_size_bytes": 603125 + }, + { + "name": "Gp0119864_Kraken2 Krona HTML report", + "description": "Kraken2 Krona HTML report for Gp0119864", + "data_object_type": "Kraken2 Krona Plot", + "url": "https://data.microbiomedata.org/data/nmdc:mga045k105/ReadbasedAnalysis/nmdc_mga045k105_kraken2_krona.html", + "md5_checksum": "7fd0db850f52295131fa1ecb4dc2e12a", + "id": "nmdc:7fd0db850f52295131fa1ecb4dc2e12a", + "file_size_bytes": 3739166 + }, + { + "name": "Gp0119864_Assembled contigs fasta", + "description": "Assembled contigs fasta for Gp0119864", + "data_object_type": "Assembly Contigs", + "url": "https://data.microbiomedata.org/data/nmdc:mga045k105/assembly/nmdc_mga045k105_contigs.fna", + "md5_checksum": "68a5964bd650f4cf9bd920cea3110b70", + "id": "nmdc:68a5964bd650f4cf9bd920cea3110b70", + "file_size_bytes": 197089631 + }, + { + "name": "Gp0119864_Assembled scaffold fasta", + "description": "Assembled scaffold fasta for Gp0119864", + "data_object_type": "Assembly Scaffolds", + "url": "https://data.microbiomedata.org/data/nmdc:mga045k105/assembly/nmdc_mga045k105_scaffolds.fna", + "md5_checksum": "07a578646d15845e29ab846c59fec0e2", + "id": "nmdc:07a578646d15845e29ab846c59fec0e2", + "file_size_bytes": 195986422 + }, + { + "name": "Gp0119864_Metagenome Contig Coverage Stats", + "description": "Metagenome Contig Coverage Stats for Gp0119864", + "url": "https://data.microbiomedata.org/data/nmdc:mga045k105/assembly/nmdc_mga045k105_covstats.txt", + "md5_checksum": "7278b4030d71eb56591ce4e01b5b311d", + "id": "nmdc:7278b4030d71eb56591ce4e01b5b311d", + "file_size_bytes": 27137260 + }, + { + "name": "Gp0119864_Assembled AGP file", + "description": "Assembled AGP file for Gp0119864", + "data_object_type": "Assembly AGP", + "url": "https://data.microbiomedata.org/data/nmdc:mga045k105/assembly/nmdc_mga045k105_assembly.agp", + "md5_checksum": "8f395301a0f7016da6d0f60443d73d15", + "id": "nmdc:8f395301a0f7016da6d0f60443d73d15", + "file_size_bytes": 25082664 + }, + { + "name": "Gp0119864_Metagenome Alignment BAM file", + "description": "Metagenome Alignment BAM file for Gp0119864", + "data_object_type": "Assembly Coverage BAM", + "url": "https://data.microbiomedata.org/data/nmdc:mga045k105/assembly/nmdc_mga045k105_pairedMapped_sorted.bam", + "md5_checksum": "c64a1b9d4a72b419ced6d353bbb93624", + "id": "nmdc:c64a1b9d4a72b419ced6d353bbb93624", + "file_size_bytes": 4113536118 + }, + { + "name": "Gp0119864_Protein FAA", + "description": "Protein FAA for Gp0119864", + "data_object_type": "Annotation Amino Acid FASTA", + "url": "https://data.microbiomedata.org/data/nmdc:mga045k105/annotation/nmdc_mga045k105_proteins.faa", + "md5_checksum": "ee6edad04e0e3133d60fd3aabcb30192", + "id": "nmdc:ee6edad04e0e3133d60fd3aabcb30192", + "file_size_bytes": 108901431 + }, + { + "name": "Gp0119864_Structural annotation GFF file", + "description": "Structural annotation GFF file for Gp0119864", + "data_object_type": "Structural Annotation GFF", + "url": "https://data.microbiomedata.org/data/nmdc:mga045k105/annotation/nmdc_mga045k105_structural_annotation.gff", + "md5_checksum": "f3fdaffbce39c3f5f9515d3fdc0c8443", + "id": "nmdc:f3fdaffbce39c3f5f9515d3fdc0c8443", + "file_size_bytes": 2543 + }, + { + "name": "Gp0119864_Functional annotation GFF file", + "description": "Functional annotation GFF file for Gp0119864", + "data_object_type": "Functional Annotation GFF", + "url": "https://data.microbiomedata.org/data/nmdc:mga045k105/annotation/nmdc_mga045k105_functional_annotation.gff", + "md5_checksum": "82005b2329289060337a1bbd4e7c604d", + "id": "nmdc:82005b2329289060337a1bbd4e7c604d", + "file_size_bytes": 116643569 + }, + { + "name": "Gp0119864_KO TSV file", + "description": "KO TSV file for Gp0119864", + "data_object_type": "Annotation KEGG Orthology", + "url": "https://data.microbiomedata.org/data/nmdc:mga045k105/annotation/nmdc_mga045k105_ko.tsv", + "md5_checksum": "a270b973ad045f77c2187f5f7a93b7a9", + "id": "nmdc:a270b973ad045f77c2187f5f7a93b7a9", + "file_size_bytes": 16341859 + }, + { + "name": "Gp0119864_EC TSV file", + "description": "EC TSV file for Gp0119864", + "data_object_type": "Annotation Enzyme Commission", + "url": "https://data.microbiomedata.org/data/nmdc:mga045k105/annotation/nmdc_mga045k105_ec.tsv", + "md5_checksum": "d734cb3fdf937b4236763e5cfbf38056", + "id": "nmdc:d734cb3fdf937b4236763e5cfbf38056", + "file_size_bytes": 11693314 + }, + { + "name": "Gp0119864_COG GFF file", + "description": "COG GFF file for Gp0119864", + "url": "https://data.microbiomedata.org/data/nmdc:mga045k105/annotation/nmdc_mga045k105_cog.gff", + "md5_checksum": "cb1fe0c8eeccc07b716ebbced6ddf17f", + "id": "nmdc:cb1fe0c8eeccc07b716ebbced6ddf17f", + "file_size_bytes": 66843769 + }, + { + "name": "Gp0119864_PFAM GFF file", + "description": "PFAM GFF file for Gp0119864", + "url": "https://data.microbiomedata.org/data/nmdc:mga045k105/annotation/nmdc_mga045k105_pfam.gff", + "md5_checksum": "d94cdb3894ede5117e78e58d9e7c5caf", + "id": "nmdc:d94cdb3894ede5117e78e58d9e7c5caf", + "file_size_bytes": 54768286 + }, + { + "name": "Gp0119864_TigrFam GFF file", + "description": "TigrFam GFF file for Gp0119864", + "url": "https://data.microbiomedata.org/data/nmdc:mga045k105/annotation/nmdc_mga045k105_tigrfam.gff", + "md5_checksum": "4e2531215d6457090d6e01ec02950201", + "id": "nmdc:4e2531215d6457090d6e01ec02950201", + "file_size_bytes": 8491150 + }, + { + "name": "Gp0119864_SMART GFF file", + "description": "SMART GFF file for Gp0119864", + "url": "https://data.microbiomedata.org/data/nmdc:mga045k105/annotation/nmdc_mga045k105_smart.gff", + "md5_checksum": "9eda71e2f99b84d46f2ae9635e7ceb47", + "id": "nmdc:9eda71e2f99b84d46f2ae9635e7ceb47", + "file_size_bytes": 15046294 + }, + { + "name": "Gp0119864_SuperFam GFF file", + "description": "SuperFam GFF file for Gp0119864", + "url": "https://data.microbiomedata.org/data/nmdc:mga045k105/annotation/nmdc_mga045k105_supfam.gff", + "md5_checksum": "be12e92bf79259175e7316bb9e782a4a", + "id": "nmdc:be12e92bf79259175e7316bb9e782a4a", + "file_size_bytes": 85250242 + }, + { + "name": "Gp0119864_Cath FunFam GFF file", + "description": "Cath FunFam GFF file for Gp0119864", + "url": "https://data.microbiomedata.org/data/nmdc:mga045k105/annotation/nmdc_mga045k105_cath_funfam.gff", + "md5_checksum": "ab1933e40211a8a8692b028fe300b917", + "id": "nmdc:ab1933e40211a8a8692b028fe300b917", + "file_size_bytes": 70983522 + }, + { + "name": "Gp0119864_KO_EC GFF file", + "description": "KO_EC GFF file for Gp0119864", + "url": "https://data.microbiomedata.org/data/nmdc:mga045k105/annotation/nmdc_mga045k105_ko_ec.gff", + "md5_checksum": "0e1caa62c9532e82bf092e6b1e8373df", + "id": "nmdc:0e1caa62c9532e82bf092e6b1e8373df", + "file_size_bytes": 52132717 + }, + { + "name": "gold:Gp0452679_metabat2 bin checkm quality assessment result", + "description": "metabat2 bin checkm quality assessment result for gold:Gp0452679", + "data_object_type": "CheckM Statistics", + "url": "https://data.microbiomedata.org/data/nmdc:mga0fsjy84/MAGs/nmdc_mga0fsjy84_checkm_qa.out", + "md5_checksum": "d41d8cd98f00b204e9800998ecf8427e", + "id": "nmdc:d41d8cd98f00b204e9800998ecf8427e", + "file_size_bytes": 0 + }, + { + "name": "Gp0119864_tooShort (< 3kb) filtered contigs fasta file by metaBat2", + "description": "tooShort (< 3kb) filtered contigs fasta file by metaBat2 for Gp0119864", + "url": "https://data.microbiomedata.org/data/nmdc:mga045k105/MAGs/nmdc_mga045k105_bins.tooShort.fa", + "md5_checksum": "bc66168e02a9285e9102da93c6c6a33f", + "id": "nmdc:bc66168e02a9285e9102da93c6c6a33f", + "file_size_bytes": 135924847 + }, + { + "name": "Gp0119864_unbinned fasta file from metabat2", + "description": "unbinned fasta file from metabat2 for Gp0119864", + "url": "https://data.microbiomedata.org/data/nmdc:mga045k105/MAGs/nmdc_mga045k105_bins.unbinned.fa", + "md5_checksum": "1bfd2ab9754f7ebf6e2d0424758ed45a", + "id": "nmdc:1bfd2ab9754f7ebf6e2d0424758ed45a", + "file_size_bytes": 47397698 + }, + { + "name": "Gp0119864_metabat2 bin checkm quality assessment result", + "description": "metabat2 bin checkm quality assessment result for Gp0119864", + "data_object_type": "CheckM Statistics", + "url": "https://data.microbiomedata.org/data/nmdc:mga045k105/MAGs/nmdc_mga045k105_checkm_qa.out", + "md5_checksum": "1cbc9fb8168bf1fcafc98ecd9f043e03", + "id": "nmdc:1cbc9fb8168bf1fcafc98ecd9f043e03", + "file_size_bytes": 3420 + }, + { + "name": "Gp0119864_high-quality and medium-quality bins", + "description": "high-quality and medium-quality bins for Gp0119864", + "data_object_type": "Metagenome Bins", + "url": "https://data.microbiomedata.org/data/nmdc:mga045k105/MAGs/nmdc_mga045k105_hqmq_bin.zip", + "md5_checksum": "11d21ea68cbca8166e8c676c353c2d99", + "id": "nmdc:11d21ea68cbca8166e8c676c353c2d99", + "file_size_bytes": 933589 + }, + { + "name": "Gp0119864_metabat2 bins", + "description": "metabat2 bins for Gp0119864", + "url": "https://data.microbiomedata.org/data/nmdc:mga045k105/MAGs/nmdc_mga045k105_metabat_bin.zip", + "md5_checksum": "45bbef4aa22e41bc6a328852e191d0ad", + "id": "nmdc:45bbef4aa22e41bc6a328852e191d0ad", + "file_size_bytes": 3228558 + }, + { + "_id": { + "$oid": "649b003d1ae706d7b5b14e1c" + }, + "description": "Metagenome Contig Coverage Stats for gold:Gp0119864", + "url": "https://data.microbiomedata.org/data/1777_95833/assembly/mapping_stats.txt", + "file_size_bytes": 25495300, + "type": "nmdc:DataObject", + "id": "nmdc:51e6ad834e20d897e24db401cd32ee43", + "name": "mapping_stats.txt", + "data_object_type": "Assembly Coverage Stats" + }, + { + "_id": { + "$oid": "649b003d1ae706d7b5b14e1d" + }, + "description": "Assembled contigs fasta for gold:Gp0119864", + "url": "https://data.microbiomedata.org/data/1777_95833/assembly/assembly_contigs.fna", + "file_size_bytes": 195447671, + "type": "nmdc:DataObject", + "id": "nmdc:279ac03ea9aa6cce7c1e23119643b70e", + "name": "assembly_contigs.fna", + "data_object_type": "Assembly Contigs" + }, + { + "_id": { + "$oid": "649b003d1ae706d7b5b14e1e" + }, + "description": "Assembled AGP file for gold:Gp0119864", + "url": "https://data.microbiomedata.org/data/1777_95833/assembly/assembly.agp", + "file_size_bytes": 21719094, + "type": "nmdc:DataObject", + "id": "nmdc:25621213a92c445a74ff9eb07ea75b3e", + "name": "assembly.agp", + "data_object_type": "Assembly AGP" + }, + { + "_id": { + "$oid": "649b003d1ae706d7b5b14e21" + }, + "description": "Metagenome Alignment BAM file for gold:Gp0119864", + "url": "https://data.microbiomedata.org/data/1777_95833/assembly/pairedMapped_sorted.bam", + "file_size_bytes": 4074186273, + "type": "nmdc:DataObject", + "id": "nmdc:0001a31e0c56c8de2d40e3c905652344", + "name": "pairedMapped_sorted.bam", + "data_object_type": "Assembly Coverage BAM" + }, + { + "_id": { + "$oid": "649b003d1ae706d7b5b14e2c" + }, + "description": "Assembled scaffold fasta for gold:Gp0119864", + "url": "https://data.microbiomedata.org/data/1777_95833/assembly/assembly_scaffolds.fna", + "file_size_bytes": 194383547, + "type": "nmdc:DataObject", + "id": "nmdc:70135e777e299d94cf718ca6418dcd1d", + "name": "assembly_scaffolds.fna", + "data_object_type": "Assembly Scaffolds" + }, + { + "_id": { + "$oid": "649b003d1ae706d7b5b15a9b" + }, + "id": "nmdc:d242c9e018e3775d99d5f9a5ce508066", + "name": "1777_95833.krona.html", + "description": "Gold:Gp0119864 KRONA plot HTML file", + "url": "https://data.microbiomedata.org/1777_95833/ReadbasedAnalysis/centrifuge/1777_95833.krona.html", + "file_size_bytes": 3385, + "data_object_type": "Centrifuge Krona Plot", + "type": "nmdc:DataObject" + }, + { + "_id": { + "$oid": "649b003d1ae706d7b5b15aa3" + }, + "id": "nmdc:5952ed34b8c5f3558c9bc7ea86b4c3db", + "name": "1777_95833.json", + "description": "Gold:Gp0119864 ReadbasedAnalysis result JSON file", + "url": "https://data.microbiomedata.org/1777_95833/ReadbasedAnalysis/1777_95833.json", + "file_size_bytes": 3385, + "type": "nmdc:DataObject" + }, + { + "_id": { + "$oid": "649b003f1ae706d7b5b16390" + }, + "id": "nmdc:bc32387cb632a8d9ad9f27afac4125e8", + "name": "bins.unbinned.fa", + "description": "unbinned fasta file from metabat2 for gold:Gp0119864", + "file_size_bytes": 57351127, + "url": "https://data.microbiomedata.org/data/1777_95833/img_MAGs/bins.unbinned.fa", + "type": "nmdc:DataObject" + }, + { + "_id": { + "$oid": "649b003f1ae706d7b5b16392" + }, + "id": "nmdc:df92c232bfe179a71f815a32f01ebee7", + "name": "bins.tooShort.fa", + "description": "tooShort (< 3kb) filtered contigs fasta file by metaBat2 for gold:Gp0119864", + "file_size_bytes": 132125103, + "url": "https://data.microbiomedata.org/data/1777_95833/img_MAGs/bins.tooShort.fa", + "type": "nmdc:DataObject" + }, + { + "_id": { + "$oid": "649b003f1ae706d7b5b16394" + }, + "id": "nmdc:a2d7f628be177b70a0665ec2e3038f37", + "name": "checkm_qa.out", + "description": "metabat2 bin checkm quality assessment result for gold:Gp0119864", + "file_size_bytes": 2445, + "url": "https://data.microbiomedata.org/data/1777_95833/img_MAGs/checkm_qa.out", + "type": "nmdc:DataObject", + "data_object_type": "CheckM Statistics" + }, + { + "_id": { + "$oid": "649b003f1ae706d7b5b16395" + }, + "id": "nmdc:b1e328aaf992b27d2c155006269ac361", + "name": "gold:Gp0119864.bins.3.fa", + "description": "metabat2 binned contig file for gold:Gp0119864", + "file_size_bytes": 234271, + "url": "https://data.microbiomedata.org/data/1777_95833/img_MAGs/metabat-bins/bins.3.fa", + "type": "nmdc:DataObject", + "data_object_type": "Metagenome Bins" + }, + { + "_id": { + "$oid": "649b003f1ae706d7b5b16396" + }, + "id": "nmdc:d34625e1e2958d101e70c343499ac968", + "name": "gold:Gp0119864.bins.5.fa", + "description": "metabat2 binned contig file for gold:Gp0119864", + "file_size_bytes": 261448, + "url": "https://data.microbiomedata.org/data/1777_95833/img_MAGs/metabat-bins/bins.5.fa", + "type": "nmdc:DataObject", + "data_object_type": "Metagenome Bins" + }, + { + "_id": { + "$oid": "649b003f1ae706d7b5b16397" + }, + "id": "nmdc:694f6f5bded356ef27f7fcdb08cb8f3c", + "name": "gold:Gp0119864.bins.2.fa", + "description": "metabat2 binned contig file for gold:Gp0119864", + "file_size_bytes": 429672, + "url": "https://data.microbiomedata.org/data/1777_95833/img_MAGs/metabat-bins/bins.2.fa", + "type": "nmdc:DataObject", + "data_object_type": "Metagenome Bins" + }, + { + "_id": { + "$oid": "649b003f1ae706d7b5b16398" + }, + "id": "nmdc:7acdc8e8e4cb1a796404c7dfa2a77b9b", + "name": "gold:Gp0119864.bins.7.fa", + "description": "metabat2 binned contig file for gold:Gp0119864", + "file_size_bytes": 336170, + "url": "https://data.microbiomedata.org/data/1777_95833/img_MAGs/metabat-bins/bins.7.fa", + "type": "nmdc:DataObject", + "data_object_type": "Metagenome Bins" + }, + { + "_id": { + "$oid": "649b003f1ae706d7b5b16399" + }, + "id": "nmdc:20e10cacf1e5b7099ac35091f8208373", + "name": "gold:Gp0119864.bins.4.fa", + "description": "metabat2 binned contig file for gold:Gp0119864", + "file_size_bytes": 230765, + "url": "https://data.microbiomedata.org/data/1777_95833/img_MAGs/metabat-bins/bins.4.fa", + "type": "nmdc:DataObject", + "data_object_type": "Metagenome Bins" + }, + { + "_id": { + "$oid": "649b003f1ae706d7b5b1639a" + }, + "id": "nmdc:f630bffc26e18df7c73d1fcbc7282987", + "name": "gold:Gp0119864.bins.11.fa", + "description": "metabat2 binned contig file for gold:Gp0119864", + "file_size_bytes": 334746, + "url": "https://data.microbiomedata.org/data/1777_95833/img_MAGs/metabat-bins/bins.11.fa", + "type": "nmdc:DataObject", + "data_object_type": "Metagenome Bins" + }, + { + "_id": { + "$oid": "649b003f1ae706d7b5b1639b" + }, + "id": "nmdc:9a6717bf37df8d49f36e225769e7059c", + "name": "gold:Gp0119864.bins.9.fa", + "description": "metabat2 binned contig file for gold:Gp0119864", + "file_size_bytes": 243630, + "url": "https://data.microbiomedata.org/data/1777_95833/img_MAGs/metabat-bins/bins.9.fa", + "type": "nmdc:DataObject", + "data_object_type": "Metagenome Bins" + }, + { + "_id": { + "$oid": "649b003f1ae706d7b5b1639c" + }, + "id": "nmdc:975362bccf72c5e665caf867c3ef2acf", + "name": "gold:Gp0119864.bins.1.fa", + "description": "metabat2 binned contig file for gold:Gp0119864", + "file_size_bytes": 329753, + "url": "https://data.microbiomedata.org/data/1777_95833/img_MAGs/metabat-bins/bins.1.fa", + "type": "nmdc:DataObject", + "data_object_type": "Metagenome Bins" + }, + { + "_id": { + "$oid": "649b003f1ae706d7b5b1639d" + }, + "id": "nmdc:a5868905be0f2bbb47fd89d85eda9d83", + "name": "gold:Gp0119864.bins.10.fa", + "description": "metabat2 binned contig file for gold:Gp0119864", + "file_size_bytes": 299862, + "url": "https://data.microbiomedata.org/data/1777_95833/img_MAGs/metabat-bins/bins.10.fa", + "type": "nmdc:DataObject", + "data_object_type": "Metagenome Bins" + }, + { + "_id": { + "$oid": "649b003f1ae706d7b5b1639e" + }, + "id": "nmdc:919e42197fd7400ff9c8c9841a2a6d3f", + "name": "gold:Gp0119864.bins.6.fa", + "description": "metabat2 binned contig file for gold:Gp0119864", + "file_size_bytes": 513076, + "url": "https://data.microbiomedata.org/data/1777_95833/img_MAGs/metabat-bins/bins.6.fa", + "type": "nmdc:DataObject", + "data_object_type": "Metagenome Bins" + }, + { + "_id": { + "$oid": "649b003f1ae706d7b5b163a0" + }, + "id": "nmdc:0f972c609b0f4f7d1cf4921f9941b68e", + "name": "gold:Gp0119864.bins.8.fa", + "description": "metabat2 binned contig file for gold:Gp0119864", + "file_size_bytes": 252257, + "url": "https://data.microbiomedata.org/data/1777_95833/img_MAGs/metabat-bins/bins.8.fa", + "type": "nmdc:DataObject", + "data_object_type": "Metagenome Bins" + }, + { + "_id": { + "$oid": "649b003f1ae706d7b5b16c95" + }, + "description": "KO TSV File for gold:Gp0119864", + "url": "https://data.microbiomedata.org/1777_95833/img_annotation/Ga0482151_ko.tsv", + "md5_checksum": "30dc605e7458353cd12e0700042f8b23", + "file_size_bytes": 3385, + "id": "nmdc:30dc605e7458353cd12e0700042f8b23", + "name": "gold:Gp0119864_KO TSV File", + "data_object_type": "Annotation KEGG Orthology", + "type": "nmdc:DataObject" + }, + { + "_id": { + "$oid": "649b003f1ae706d7b5b16c97" + }, + "description": "EC TSV File for gold:Gp0119864", + "url": "https://data.microbiomedata.org/1777_95833/img_annotation/Ga0482151_ec.tsv", + "md5_checksum": "cc49ea1669ce6a40e73b17880ebdf36e", + "file_size_bytes": 3385, + "id": "nmdc:cc49ea1669ce6a40e73b17880ebdf36e", + "name": "gold:Gp0119864_EC TSV File", + "data_object_type": "Annotation Enzyme Commission", + "type": "nmdc:DataObject" + }, + { + "_id": { + "$oid": "649b003f1ae706d7b5b16c98" + }, + "description": "Functional annotation GFF file for gold:Gp0119864", + "url": "https://data.microbiomedata.org/1777_95833/img_annotation/Ga0482151_functional_annotation.gff", + "md5_checksum": "7ae6e443b360f01d0655aa6066e06c87", + "file_size_bytes": 3385, + "id": "nmdc:7ae6e443b360f01d0655aa6066e06c87", + "name": "gold:Gp0119864_Functional annotation GFF file", + "data_object_type": "Functional Annotation GFF", + "type": "nmdc:DataObject" + }, + { + "_id": { + "$oid": "649b003f1ae706d7b5b16c99" + }, + "description": "Structural annotation GFF file for gold:Gp0119864", + "url": "https://data.microbiomedata.org/1777_95833/img_annotation/Ga0482151_structural_annotation.gff", + "md5_checksum": "3695d84852c6345c02f5f1c3e3f3a423", + "file_size_bytes": 3385, + "id": "nmdc:3695d84852c6345c02f5f1c3e3f3a423", + "name": "gold:Gp0119864_Structural annotation GFF file", + "data_object_type": "Structural Annotation GFF", + "type": "nmdc:DataObject" + }, + { + "_id": { + "$oid": "649b003f1ae706d7b5b16c9c" + }, + "description": "Protein FAA for gold:Gp0119864", + "url": "https://data.microbiomedata.org/1777_95833/img_annotation/Ga0482151_proteins.faa", + "md5_checksum": "b803f40f00f0c9988dd5968acd4821cb", + "file_size_bytes": 3385, + "id": "nmdc:b803f40f00f0c9988dd5968acd4821cb", + "name": "gold:Gp0119864_Protein FAA", + "data_object_type": "Annotation Amino Acid FASTA", + "type": "nmdc:DataObject" + } + ], + "mags_activity_set": [ + { + "_id": { + "$oid": "649b0052ec087f6bbab346f0" + }, + "has_input": [ + "nmdc:68a5964bd650f4cf9bd920cea3110b70", + "nmdc:c64a1b9d4a72b419ced6d353bbb93624", + "nmdc:82005b2329289060337a1bbd4e7c604d" + ], + "too_short_contig_num": 298403, + "part_of": [ + "nmdc:mga045k105" + ], + "binned_contig_num": 2295, + "git_url": "https://github.com/microbiomedata/metaG/releases/tag/0.1", + "has_output": [ + "nmdc:d41d8cd98f00b204e9800998ecf8427e", + "nmdc:bc66168e02a9285e9102da93c6c6a33f", + "nmdc:1bfd2ab9754f7ebf6e2d0424758ed45a", + "nmdc:1cbc9fb8168bf1fcafc98ecd9f043e03", + "nmdc:11d21ea68cbca8166e8c676c353c2d99", + "nmdc:45bbef4aa22e41bc6a328852e191d0ad" + ], + "was_informed_by": "gold:Gp0119864", + "input_contig_num": 328392, + "id": "nmdc:08a00210b9b5961713a99c0def8819d1", + "execution_resource": "NERSC-Cori", + "name": "MAGs Analysis Activity for nmdc:mga045k105", + "mags_list": [ + { + "number_of_contig": 88, + "completeness": 20.83, + "bin_name": "bins.1", + "gene_count": 442, + "bin_quality": "LQ", + "gtdbtk_species": "", + "gtdbtk_order": "", + "num_16s": 0, + "gtdbtk_family": "", + "gtdbtk_domain": "", + "contamination": 0.0, + "gtdbtk_class": "", + "gtdbtk_phylum": "", + "num_5s": 0, + "num_23s": 0, + "gtdbtk_genus": "", + "num_t_rna": 4 + }, + { + "number_of_contig": 559, + "completeness": 91.38, + "bin_name": "bins.10", + "gene_count": 5018, + "bin_quality": "LQ", + "gtdbtk_species": "", + "gtdbtk_order": "", + "num_16s": 0, + "gtdbtk_family": "", + "gtdbtk_domain": "", + "contamination": 49.06, + "gtdbtk_class": "", + "gtdbtk_phylum": "", + "num_5s": 2, + "num_23s": 0, + "gtdbtk_genus": "", + "num_t_rna": 66 + }, + { + "number_of_contig": 120, + "completeness": 58.08, + "bin_name": "bins.11", + "gene_count": 1035, + "bin_quality": "MQ", + "gtdbtk_species": "", + "gtdbtk_order": "Nanopelagicales", + "num_16s": 0, + "gtdbtk_family": "AcAMD-5", + "gtdbtk_domain": "Bacteria", + "contamination": 2.39, + "gtdbtk_class": "Actinobacteria", + "gtdbtk_phylum": "Actinobacteriota", + "num_5s": 0, + "num_23s": 0, + "gtdbtk_genus": "ATZT02", + "num_t_rna": 24 + }, + { + "number_of_contig": 192, + "completeness": 33.8, + "bin_name": "bins.12", + "gene_count": 1027, + "bin_quality": "LQ", + "gtdbtk_species": "", + "gtdbtk_order": "", + "num_16s": 0, + "gtdbtk_family": "", + "gtdbtk_domain": "", + "contamination": 0.0, + "gtdbtk_class": "", + "gtdbtk_phylum": "", + "num_5s": 0, + "num_23s": 0, + "gtdbtk_genus": "", + "num_t_rna": 11 + }, + { + "number_of_contig": 93, + "completeness": 19.83, + "bin_name": "bins.13", + "gene_count": 531, + "bin_quality": "LQ", + "gtdbtk_species": "", + "gtdbtk_order": "", + "num_16s": 0, + "gtdbtk_family": "", + "gtdbtk_domain": "", + "contamination": 0.0, + "gtdbtk_class": "", + "gtdbtk_phylum": "", + "num_5s": 0, + "num_23s": 0, + "gtdbtk_genus": "", + "num_t_rna": 13 + }, + { + "number_of_contig": 43, + "completeness": 7.21, + "bin_name": "bins.14", + "gene_count": 244, + "bin_quality": "LQ", + "gtdbtk_species": "", + "gtdbtk_order": "", + "num_16s": 0, + "gtdbtk_family": "", + "gtdbtk_domain": "", + "contamination": 0.0, + "gtdbtk_class": "", + "gtdbtk_phylum": "", + "num_5s": 0, + "num_23s": 0, + "gtdbtk_genus": "", + "num_t_rna": 8 + }, + { + "number_of_contig": 99, + "completeness": 51.59, + "bin_name": "bins.15", + "gene_count": 762, + "bin_quality": "MQ", + "gtdbtk_species": "", + "gtdbtk_order": "Nanopelagicales", + "num_16s": 0, + "gtdbtk_family": "Nanopelagicaceae", + "gtdbtk_domain": "Bacteria", + "contamination": 0.0, + "gtdbtk_class": "Actinobacteria", + "gtdbtk_phylum": "Actinobacteriota", + "num_5s": 0, + "num_23s": 0, + "gtdbtk_genus": "Planktophila", + "num_t_rna": 18 + }, + { + "number_of_contig": 209, + "completeness": 58.86, + "bin_name": "bins.16", + "gene_count": 1572, + "bin_quality": "MQ", + "gtdbtk_species": "", + "gtdbtk_order": "Burkholderiales", + "num_16s": 0, + "gtdbtk_family": "Burkholderiaceae", + "gtdbtk_domain": "Bacteria", + "contamination": 0.91, + "gtdbtk_class": "Gammaproteobacteria", + "gtdbtk_phylum": "Proteobacteria", + "num_5s": 1, + "num_23s": 0, + "gtdbtk_genus": "UBA2463", + "num_t_rna": 20 + }, + { + "number_of_contig": 258, + "completeness": 29.31, + "bin_name": "bins.2", + "gene_count": 1040, + "bin_quality": "LQ", + "gtdbtk_species": "", + "gtdbtk_order": "", + "num_16s": 0, + "gtdbtk_family": "", + "gtdbtk_domain": "", + "contamination": 0.0, + "gtdbtk_class": "", + "gtdbtk_phylum": "", + "num_5s": 0, + "num_23s": 0, + "gtdbtk_genus": "", + "num_t_rna": 10 + }, + { + "number_of_contig": 99, + "completeness": 11.64, + "bin_name": "bins.3", + "gene_count": 417, + "bin_quality": "LQ", + "gtdbtk_species": "", + "gtdbtk_order": "", + "num_16s": 1, + "gtdbtk_family": "", + "gtdbtk_domain": "", + "contamination": 0.0, + "gtdbtk_class": "", + "gtdbtk_phylum": "", + "num_5s": 0, + "num_23s": 0, + "gtdbtk_genus": "", + "num_t_rna": 4 + }, + { + "number_of_contig": 93, + "completeness": 25.08, + "bin_name": "bins.4", + "gene_count": 567, + "bin_quality": "LQ", + "gtdbtk_species": "", + "gtdbtk_order": "", + "num_16s": 0, + "gtdbtk_family": "", + "gtdbtk_domain": "", + "contamination": 0.0, + "gtdbtk_class": "", + "gtdbtk_phylum": "", + "num_5s": 0, + "num_23s": 0, + "gtdbtk_genus": "", + "num_t_rna": 11 + }, + { + "number_of_contig": 106, + "completeness": 8.13, + "bin_name": "bins.5", + "gene_count": 494, + "bin_quality": "LQ", + "gtdbtk_species": "", + "gtdbtk_order": "", + "num_16s": 0, + "gtdbtk_family": "", + "gtdbtk_domain": "", + "contamination": 0.0, + "gtdbtk_class": "", + "gtdbtk_phylum": "", + "num_5s": 0, + "num_23s": 0, + "gtdbtk_genus": "", + "num_t_rna": 10 + }, + { + "number_of_contig": 54, + "completeness": 22.41, + "bin_name": "bins.6", + "gene_count": 288, + "bin_quality": "LQ", + "gtdbtk_species": "", + "gtdbtk_order": "", + "num_16s": 0, + "gtdbtk_family": "", + "gtdbtk_domain": "", + "contamination": 0.0, + "gtdbtk_class": "", + "gtdbtk_phylum": "", + "num_5s": 0, + "num_23s": 0, + "gtdbtk_genus": "", + "num_t_rna": 12 + }, + { + "number_of_contig": 75, + "completeness": 17.4, + "bin_name": "bins.7", + "gene_count": 501, + "bin_quality": "LQ", + "gtdbtk_species": "", + "gtdbtk_order": "", + "num_16s": 0, + "gtdbtk_family": "", + "gtdbtk_domain": "", + "contamination": 0.0, + "gtdbtk_class": "", + "gtdbtk_phylum": "", + "num_5s": 0, + "num_23s": 0, + "gtdbtk_genus": "", + "num_t_rna": 13 + }, + { + "number_of_contig": 78, + "completeness": 4.17, + "bin_name": "bins.8", + "gene_count": 340, + "bin_quality": "LQ", + "gtdbtk_species": "", + "gtdbtk_order": "", + "num_16s": 0, + "gtdbtk_family": "", + "gtdbtk_domain": "", + "contamination": 0.0, + "gtdbtk_class": "", + "gtdbtk_phylum": "", + "num_5s": 0, + "num_23s": 0, + "gtdbtk_genus": "", + "num_t_rna": 0 + }, + { + "number_of_contig": 129, + "completeness": 49.01, + "bin_name": "bins.9", + "gene_count": 954, + "bin_quality": "LQ", + "gtdbtk_species": "", + "gtdbtk_order": "", + "num_16s": 0, + "gtdbtk_family": "", + "gtdbtk_domain": "", + "contamination": 0.0, + "gtdbtk_class": "", + "gtdbtk_phylum": "", + "num_5s": 0, + "num_23s": 0, + "gtdbtk_genus": "", + "num_t_rna": 18 + } + ], + "unbinned_contig_num": 27694, + "started_at_time": "2021-10-11T02:23:25Z", + "type": "nmdc:MAGsAnalysisActivity", + "ended_at_time": "2021-10-11T04:29:59+00:00" + } + ], + "metagenome_annotation_activity_set": [ + { + "_id": { + "$oid": "649b005bbf2caae0415ef995" + }, + "has_input": [ + "nmdc:68a5964bd650f4cf9bd920cea3110b70" + ], + "part_of": [ + "nmdc:mga045k105" + ], + "git_url": "https://github.com/microbiomedata/metaG/releases/tag/0.1", + "has_output": [ + "nmdc:ee6edad04e0e3133d60fd3aabcb30192", + "nmdc:f3fdaffbce39c3f5f9515d3fdc0c8443", + "nmdc:82005b2329289060337a1bbd4e7c604d", + "nmdc:a270b973ad045f77c2187f5f7a93b7a9", + "nmdc:d734cb3fdf937b4236763e5cfbf38056", + "nmdc:cb1fe0c8eeccc07b716ebbced6ddf17f", + "nmdc:d94cdb3894ede5117e78e58d9e7c5caf", + "nmdc:4e2531215d6457090d6e01ec02950201", + "nmdc:9eda71e2f99b84d46f2ae9635e7ceb47", + "nmdc:be12e92bf79259175e7316bb9e782a4a", + "nmdc:ab1933e40211a8a8692b028fe300b917", + "nmdc:0e1caa62c9532e82bf092e6b1e8373df" + ], + "was_informed_by": "gold:Gp0119864", + "id": "nmdc:08a00210b9b5961713a99c0def8819d1", + "execution_resource": "NERSC-Cori", + "name": "Annotation Activity for nmdc:mga045k105", + "started_at_time": "2021-10-11T02:23:25Z", + "type": "nmdc:MetagenomeAnnotationActivity", + "ended_at_time": "2021-10-11T04:29:59+00:00" + } + ], + "metagenome_assembly_set": [ + { + "_id": { + "$oid": "649b005f2ca5ee4adb139f82" + }, + "has_input": [ + "nmdc:952359154aeab2474f9a72322ef4ab99" + ], + "part_of": [ + "nmdc:mga045k105" + ], + "ctg_logsum": 668669, + "scaf_logsum": 714515, + "gap_pct": 0.0432, + "git_url": "https://github.com/microbiomedata/metaG/releases/tag/0.1", + "has_output": [ + "nmdc:68a5964bd650f4cf9bd920cea3110b70", + "nmdc:07a578646d15845e29ab846c59fec0e2", + "nmdc:7278b4030d71eb56591ce4e01b5b311d", + "nmdc:8f395301a0f7016da6d0f60443d73d15", + "nmdc:c64a1b9d4a72b419ced6d353bbb93624" + ], + "asm_score": 6.279, + "was_informed_by": "gold:Gp0119864", + "ctg_powsum": 76121, + "scaf_max": 123964, + "id": "nmdc:08a00210b9b5961713a99c0def8819d1", + "scaf_powsum": 81494, + "execution_resource": "NERSC-Cori", + "contigs": 328466, + "name": "Assembly Activity for nmdc:mga045k105", + "ctg_max": 123964, + "gc_std": 0.11475, + "contig_bp": 183830260, + "gc_avg": 0.51854, + "started_at_time": "2021-10-11T02:23:25Z", + "scaf_bp": 183909710, + "type": "nmdc:MetagenomeAssembly", + "scaffolds": 320575, + "ended_at_time": "2021-10-11T04:29:59+00:00", + "ctg_l50": 587, + "ctg_l90": 293, + "ctg_n50": 73884, + "ctg_n90": 263575, + "scaf_l50": 618, + "scaf_l90": 294, + "scaf_n50": 68904, + "scaf_n90": 255022, + "scaf_l_gt50k": 238174, + "scaf_n_gt50k": 3, + "scaf_pct_gt50k": 0.12950593 + } + ], + "omics_processing_set": [ + { + "_id": { + "$oid": "649b009773e8249959349b70" + }, + "id": "nmdc:omprc-11-3a2hb308", + "name": "Lake water microbial communities from Ohio, USA - Utica-2 Time Series LW 2014_7_11", + "description": "Lake water microbial communities from Ohio, USA", + "has_input": [ + "nmdc:bsm-11-2czfhp59" + ], + "has_output": [ + "jgi:563bf9520d8785441a9214be" + ], + "part_of": [ + "nmdc:sty-11-8fb6t785" + ], + "add_date": "2015-08-15", + "mod_date": "2021-01-15", + "ncbi_project_name": "Lake water microbial communities from Ohio, USA - Utica-2 Time Series LW 2014_7_11", + "omics_type": { + "has_raw_value": "Metagenome" + }, + "principal_investigator": { + "has_raw_value": "Kelly Wrighton" + }, + "processing_institution": "JGI", + "type": "nmdc:OmicsProcessing", + "gold_sequencing_project_identifiers": [ + "gold:Gp0119864" + ] + } + ], + "read_qc_analysis_activity_set": [ + { + "_id": { + "$oid": "649b009d6bdd4fd20273c84c" + }, + "has_input": [ + "nmdc:548035c4e7d1dddc8dd1188decfc678f" + ], + "part_of": [ + "nmdc:mga045k105" + ], + "git_url": "https://github.com/microbiomedata/metaG/releases/tag/0.1", + "has_output": [ + "nmdc:952359154aeab2474f9a72322ef4ab99", + "nmdc:960fde6ef41e5b25a798296e5b48507c" + ], + "was_informed_by": "gold:Gp0119864", + "input_read_count": 58879956, + "output_read_bases": 8077811761, + "id": "nmdc:08a00210b9b5961713a99c0def8819d1", + "execution_resource": "NERSC-Cori", + "input_read_bases": 8890873356, + "name": "Read QC Activity for nmdc:mga045k105", + "output_read_count": 57174638, + "started_at_time": "2021-10-11T02:23:25Z", + "type": "nmdc:ReadQCAnalysisActivity", + "ended_at_time": "2021-10-11T04:29:59+00:00" + } + ], + "read_based_taxonomy_analysis_activity_set": [ + { + "_id": { + "$oid": "649b009bff710ae353f8cf2e" + }, + "has_input": [ + "nmdc:952359154aeab2474f9a72322ef4ab99" + ], + "git_url": "https://github.com/microbiomedata/metaG/releases/tag/0.1", + "has_output": [ + "nmdc:6a4bb5650a5d3d4b2c882d957a012248", + "nmdc:66ad85cbe9197882b289d19034de278f", + "nmdc:f799b0fec237fc5dd25fd62c43543d99", + "nmdc:70a6917dae126999097bd53514b5d460", + "nmdc:249bcf7ae2ffa92648e4e956e70a89ed", + "nmdc:2aad48ba88c824179107071fdebb0eb9", + "nmdc:3d68fa10c0b996395b526ab5348763e9", + "nmdc:ddb1cdfc19d494e370800b7f8e01d191", + "nmdc:7fd0db850f52295131fa1ecb4dc2e12a" + ], + "was_informed_by": "gold:Gp0119864", + "id": "nmdc:08a00210b9b5961713a99c0def8819d1", + "execution_resource": "NERSC-Cori", + "name": "ReadBased Analysis Activity for nmdc:mga045k105", + "started_at_time": "2021-10-11T02:23:25Z", + "type": "nmdc:ReadbasedAnalysis", + "ended_at_time": "2021-10-11T04:29:59+00:00" + } + ] + }, + { + "data_object_set": [ + { + "description": "Raw sequencer read data", + "file_size_bytes": 4797784577, + "type": "nmdc:DataObject", + "id": "jgi:560df5b60d878540fd6fe1fd", + "name": "9567.8.137569.TAGGCAT-CTCTCTA.fastq.gz" + }, + { + "name": "Gp0119862_Filtered Reads", + "description": "Filtered Reads for Gp0119862", + "data_object_type": "Filtered Sequencing Reads", + "url": "https://data.microbiomedata.org/data/nmdc:mga0p8hk47/qa/nmdc_mga0p8hk47_filtered.fastq.gz", + "md5_checksum": "249d5db946764bcfd9046cf8b3af6c45", + "id": "nmdc:249d5db946764bcfd9046cf8b3af6c45", + "file_size_bytes": 2467770182 + }, + { + "name": "Gp0119862_Filtered Stats", + "description": "Filtered Stats for Gp0119862", + "data_object_type": "QC Statistics", + "url": "https://data.microbiomedata.org/data/nmdc:mga0p8hk47/qa/nmdc_mga0p8hk47_filterStats.txt", + "md5_checksum": "481a07ca42f6bd89498ea652c37ac5da", + "id": "nmdc:481a07ca42f6bd89498ea652c37ac5da", + "file_size_bytes": 285 + }, + { + "name": "Gp0119862_Gottcha2 TSV report", + "description": "Gottcha2 TSV report for Gp0119862", + "url": "https://data.microbiomedata.org/data/nmdc:mga0p8hk47/ReadbasedAnalysis/nmdc_mga0p8hk47_gottcha2_report.tsv", + "md5_checksum": "d5290266a55ae7443f37a324ad39bd1c", + "id": "nmdc:d5290266a55ae7443f37a324ad39bd1c", + "file_size_bytes": 2451 + }, + { + "name": "Gp0119862_Gottcha2 full TSV report", + "description": "Gottcha2 full TSV report for Gp0119862", + "url": "https://data.microbiomedata.org/data/nmdc:mga0p8hk47/ReadbasedAnalysis/nmdc_mga0p8hk47_gottcha2_report_full.tsv", + "md5_checksum": "b46f62c891f3599df6f4b3500a0ea6bc", + "id": "nmdc:b46f62c891f3599df6f4b3500a0ea6bc", + "file_size_bytes": 109473 + }, + { + "name": "Gp0119862_Gottcha2 Krona HTML report", + "description": "Gottcha2 Krona HTML report for Gp0119862", + "data_object_type": "GOTTCHA2 Krona Plot", + "url": "https://data.microbiomedata.org/data/nmdc:mga0p8hk47/ReadbasedAnalysis/nmdc_mga0p8hk47_gottcha2_krona.html", + "md5_checksum": "2ff1d363f571e640a124aaefd9bd5016", + "id": "nmdc:2ff1d363f571e640a124aaefd9bd5016", + "file_size_bytes": 233875 + }, + { + "name": "Gp0119862_Centrifuge classification TSV report", + "description": "Centrifuge classification TSV report for Gp0119862", + "data_object_type": "Centrifuge Taxonomic Classification", + "url": "https://data.microbiomedata.org/data/nmdc:mga0p8hk47/ReadbasedAnalysis/nmdc_mga0p8hk47_centrifuge_classification.tsv", + "md5_checksum": "1f42ab9767d8d9fc3d1f0992f1239fb3", + "id": "nmdc:1f42ab9767d8d9fc3d1f0992f1239fb3", + "file_size_bytes": 3534632764 + }, + { + "name": "Gp0119862_Centrifuge TSV report", + "description": "Centrifuge TSV report for Gp0119862", + "data_object_type": "Centrifuge Classification Report", + "url": "https://data.microbiomedata.org/data/nmdc:mga0p8hk47/ReadbasedAnalysis/nmdc_mga0p8hk47_centrifuge_report.tsv", + "md5_checksum": "a8d6b9af9a28b30a62609b1227d03fc0", + "id": "nmdc:a8d6b9af9a28b30a62609b1227d03fc0", + "file_size_bytes": 213184 + }, + { + "name": "Gp0119862_Centrifuge Krona HTML report", + "description": "Centrifuge Krona HTML report for Gp0119862", + "data_object_type": "Centrifuge Krona Plot", + "url": "https://data.microbiomedata.org/data/nmdc:mga0p8hk47/ReadbasedAnalysis/nmdc_mga0p8hk47_centrifuge_krona.html", + "md5_checksum": "fbfc71706e2f297b98f59618462dece9", + "id": "nmdc:fbfc71706e2f297b98f59618462dece9", + "file_size_bytes": 2125773 + }, + { + "name": "Gp0119862_Kraken classification TSV report", + "description": "Kraken classification TSV report for Gp0119862", + "data_object_type": "Kraken2 Taxonomic Classification", + "url": "https://data.microbiomedata.org/data/nmdc:mga0p8hk47/ReadbasedAnalysis/nmdc_mga0p8hk47_kraken2_classification.tsv", + "md5_checksum": "304e57dc8ffddd75aa697203722d92f0", + "id": "nmdc:304e57dc8ffddd75aa697203722d92f0", + "file_size_bytes": 3078162918 + }, + { + "name": "Gp0119862_Kraken2 TSV report", + "description": "Kraken2 TSV report for Gp0119862", + "data_object_type": "Kraken2 Classification Report", + "url": "https://data.microbiomedata.org/data/nmdc:mga0p8hk47/ReadbasedAnalysis/nmdc_mga0p8hk47_kraken2_report.tsv", + "md5_checksum": "b0bac742555f05f570817557d73f1888", + "id": "nmdc:b0bac742555f05f570817557d73f1888", + "file_size_bytes": 434375 + }, + { + "name": "Gp0119862_Kraken2 Krona HTML report", + "description": "Kraken2 Krona HTML report for Gp0119862", + "data_object_type": "Kraken2 Krona Plot", + "url": "https://data.microbiomedata.org/data/nmdc:mga0p8hk47/ReadbasedAnalysis/nmdc_mga0p8hk47_kraken2_krona.html", + "md5_checksum": "ae65120bdbbe75ad4038efa88a5d5f95", + "id": "nmdc:ae65120bdbbe75ad4038efa88a5d5f95", + "file_size_bytes": 2876473 + }, + { + "name": "Gp0119862_Assembled contigs fasta", + "description": "Assembled contigs fasta for Gp0119862", + "data_object_type": "Assembly Contigs", + "url": "https://data.microbiomedata.org/data/nmdc:mga0p8hk47/assembly/nmdc_mga0p8hk47_contigs.fna", + "md5_checksum": "3e669c295c05549732f1a62e6c53d143", + "id": "nmdc:3e669c295c05549732f1a62e6c53d143", + "file_size_bytes": 9744122 + }, + { + "name": "Gp0119862_Assembled scaffold fasta", + "description": "Assembled scaffold fasta for Gp0119862", + "data_object_type": "Assembly Scaffolds", + "url": "https://data.microbiomedata.org/data/nmdc:mga0p8hk47/assembly/nmdc_mga0p8hk47_scaffolds.fna", + "md5_checksum": "e3b7e1357b7f94ce3262118eba6b6236", + "id": "nmdc:e3b7e1357b7f94ce3262118eba6b6236", + "file_size_bytes": 9727015 + }, + { + "name": "Gp0119862_Metagenome Contig Coverage Stats", + "description": "Metagenome Contig Coverage Stats for Gp0119862", + "url": "https://data.microbiomedata.org/data/nmdc:mga0p8hk47/assembly/nmdc_mga0p8hk47_covstats.txt", + "md5_checksum": "a8b15986d2a43b68de0ce7003b2a85e4", + "id": "nmdc:a8b15986d2a43b68de0ce7003b2a85e4", + "file_size_bytes": 461339 + }, + { + "name": "Gp0119862_Assembled AGP file", + "description": "Assembled AGP file for Gp0119862", + "data_object_type": "Assembly AGP", + "url": "https://data.microbiomedata.org/data/nmdc:mga0p8hk47/assembly/nmdc_mga0p8hk47_assembly.agp", + "md5_checksum": "ce8d4dbf59c7ee15ea3b85b517161b32", + "id": "nmdc:ce8d4dbf59c7ee15ea3b85b517161b32", + "file_size_bytes": 422358 + }, + { + "name": "Gp0119862_Metagenome Alignment BAM file", + "description": "Metagenome Alignment BAM file for Gp0119862", + "data_object_type": "Assembly Coverage BAM", + "url": "https://data.microbiomedata.org/data/nmdc:mga0p8hk47/assembly/nmdc_mga0p8hk47_pairedMapped_sorted.bam", + "md5_checksum": "555fcecf248888317ba16524e0345034", + "id": "nmdc:555fcecf248888317ba16524e0345034", + "file_size_bytes": 3266260381 + }, + { + "name": "Gp0119862_Protein FAA", + "description": "Protein FAA for Gp0119862", + "data_object_type": "Annotation Amino Acid FASTA", + "url": "https://data.microbiomedata.org/data/nmdc:mga0p8hk47/annotation/nmdc_mga0p8hk47_proteins.faa", + "md5_checksum": "1b4a86a2369f8908af57d91ae83ec8bb", + "id": "nmdc:1b4a86a2369f8908af57d91ae83ec8bb", + "file_size_bytes": 4409032 + }, + { + "name": "Gp0119862_Structural annotation GFF file", + "description": "Structural annotation GFF file for Gp0119862", + "data_object_type": "Structural Annotation GFF", + "url": "https://data.microbiomedata.org/data/nmdc:mga0p8hk47/annotation/nmdc_mga0p8hk47_structural_annotation.gff", + "md5_checksum": "d40e267ba84e2658d38a6f1bfa7dedd0", + "id": "nmdc:d40e267ba84e2658d38a6f1bfa7dedd0", + "file_size_bytes": 2492 + }, + { + "name": "Gp0119862_Functional annotation GFF file", + "description": "Functional annotation GFF file for Gp0119862", + "data_object_type": "Functional Annotation GFF", + "url": "https://data.microbiomedata.org/data/nmdc:mga0p8hk47/annotation/nmdc_mga0p8hk47_functional_annotation.gff", + "md5_checksum": "0c71c00e4bc71cf37823be31cdd707c3", + "id": "nmdc:0c71c00e4bc71cf37823be31cdd707c3", + "file_size_bytes": 3859001 + }, + { + "name": "Gp0119862_KO TSV file", + "description": "KO TSV file for Gp0119862", + "data_object_type": "Annotation KEGG Orthology", + "url": "https://data.microbiomedata.org/data/nmdc:mga0p8hk47/annotation/nmdc_mga0p8hk47_ko.tsv", + "md5_checksum": "b4ebca55e0658c361a55fedd1b1c980b", + "id": "nmdc:b4ebca55e0658c361a55fedd1b1c980b", + "file_size_bytes": 580903 + }, + { + "name": "Gp0119862_EC TSV file", + "description": "EC TSV file for Gp0119862", + "data_object_type": "Annotation Enzyme Commission", + "url": "https://data.microbiomedata.org/data/nmdc:mga0p8hk47/annotation/nmdc_mga0p8hk47_ec.tsv", + "md5_checksum": "1e1bc8867ff9d014974acb0ec26228bf", + "id": "nmdc:1e1bc8867ff9d014974acb0ec26228bf", + "file_size_bytes": 340434 + }, + { + "name": "Gp0119862_COG GFF file", + "description": "COG GFF file for Gp0119862", + "url": "https://data.microbiomedata.org/data/nmdc:mga0p8hk47/annotation/nmdc_mga0p8hk47_cog.gff", + "md5_checksum": "985b14f04ec6a07e1031326e6688e86c", + "id": "nmdc:985b14f04ec6a07e1031326e6688e86c", + "file_size_bytes": 2715563 + }, + { + "name": "Gp0119862_PFAM GFF file", + "description": "PFAM GFF file for Gp0119862", + "url": "https://data.microbiomedata.org/data/nmdc:mga0p8hk47/annotation/nmdc_mga0p8hk47_pfam.gff", + "md5_checksum": "0577d8fc3be7dde2c9cbaff26d419ddf", + "id": "nmdc:0577d8fc3be7dde2c9cbaff26d419ddf", + "file_size_bytes": 2708140 + }, + { + "name": "Gp0119862_TigrFam GFF file", + "description": "TigrFam GFF file for Gp0119862", + "url": "https://data.microbiomedata.org/data/nmdc:mga0p8hk47/annotation/nmdc_mga0p8hk47_tigrfam.gff", + "md5_checksum": "f809c89425e7317f99d1933be4a9035f", + "id": "nmdc:f809c89425e7317f99d1933be4a9035f", + "file_size_bytes": 616971 + }, + { + "name": "Gp0119862_SMART GFF file", + "description": "SMART GFF file for Gp0119862", + "url": "https://data.microbiomedata.org/data/nmdc:mga0p8hk47/annotation/nmdc_mga0p8hk47_smart.gff", + "md5_checksum": "510ead1adfa241229c3d5cb25b37b986", + "id": "nmdc:510ead1adfa241229c3d5cb25b37b986", + "file_size_bytes": 863056 + }, + { + "name": "Gp0119862_SuperFam GFF file", + "description": "SuperFam GFF file for Gp0119862", + "url": "https://data.microbiomedata.org/data/nmdc:mga0p8hk47/annotation/nmdc_mga0p8hk47_supfam.gff", + "md5_checksum": "53a66879b7343d2ee080589ab28ad97a", + "id": "nmdc:53a66879b7343d2ee080589ab28ad97a", + "file_size_bytes": 3749603 + }, + { + "name": "Gp0119862_Cath FunFam GFF file", + "description": "Cath FunFam GFF file for Gp0119862", + "url": "https://data.microbiomedata.org/data/nmdc:mga0p8hk47/annotation/nmdc_mga0p8hk47_cath_funfam.gff", + "md5_checksum": "01a56a32c3c2677e936abbe362739a31", + "id": "nmdc:01a56a32c3c2677e936abbe362739a31", + "file_size_bytes": 3557102 + }, + { + "name": "Gp0119862_KO_EC GFF file", + "description": "KO_EC GFF file for Gp0119862", + "url": "https://data.microbiomedata.org/data/nmdc:mga0p8hk47/annotation/nmdc_mga0p8hk47_ko_ec.gff", + "md5_checksum": "237be703d5b8555935e255ed34231927", + "id": "nmdc:237be703d5b8555935e255ed34231927", + "file_size_bytes": 2013449 + }, + { + "name": "gold:Gp0452679_metabat2 bin checkm quality assessment result", + "description": "metabat2 bin checkm quality assessment result for gold:Gp0452679", + "data_object_type": "CheckM Statistics", + "url": "https://data.microbiomedata.org/data/nmdc:mga0fsjy84/MAGs/nmdc_mga0fsjy84_checkm_qa.out", + "md5_checksum": "d41d8cd98f00b204e9800998ecf8427e", + "id": "nmdc:d41d8cd98f00b204e9800998ecf8427e", + "file_size_bytes": 0 + }, + { + "name": "Gp0119862_tooShort (< 3kb) filtered contigs fasta file by metaBat2", + "description": "tooShort (< 3kb) filtered contigs fasta file by metaBat2 for Gp0119862", + "url": "https://data.microbiomedata.org/data/nmdc:mga0p8hk47/MAGs/nmdc_mga0p8hk47_bins.tooShort.fa", + "md5_checksum": "fd404511f6ad57425d354ce2b5b61e9f", + "id": "nmdc:fd404511f6ad57425d354ce2b5b61e9f", + "file_size_bytes": 1993265 + }, + { + "name": "Gp0119862_unbinned fasta file from metabat2", + "description": "unbinned fasta file from metabat2 for Gp0119862", + "url": "https://data.microbiomedata.org/data/nmdc:mga0p8hk47/MAGs/nmdc_mga0p8hk47_bins.unbinned.fa", + "md5_checksum": "a6825d18edddb8cc5344c982ebe0795d", + "id": "nmdc:a6825d18edddb8cc5344c982ebe0795d", + "file_size_bytes": 4364810 + }, + { + "name": "Gp0119862_metabat2 bin checkm quality assessment result", + "description": "metabat2 bin checkm quality assessment result for Gp0119862", + "data_object_type": "CheckM Statistics", + "url": "https://data.microbiomedata.org/data/nmdc:mga0p8hk47/MAGs/nmdc_mga0p8hk47_checkm_qa.out", + "md5_checksum": "b9830ec5d1b94c6140f06929e97f1279", + "id": "nmdc:b9830ec5d1b94c6140f06929e97f1279", + "file_size_bytes": 1113 + }, + { + "name": "Gp0119862_high-quality and medium-quality bins", + "description": "high-quality and medium-quality bins for Gp0119862", + "data_object_type": "Metagenome Bins", + "url": "https://data.microbiomedata.org/data/nmdc:mga0p8hk47/MAGs/nmdc_mga0p8hk47_hqmq_bin.zip", + "md5_checksum": "97f97be994210c09e883589d1cb74470", + "id": "nmdc:97f97be994210c09e883589d1cb74470", + "file_size_bytes": 758328 + }, + { + "name": "Gp0119862_metabat2 bins", + "description": "metabat2 bins for Gp0119862", + "url": "https://data.microbiomedata.org/data/nmdc:mga0p8hk47/MAGs/nmdc_mga0p8hk47_metabat_bin.zip", + "md5_checksum": "5b9884f04ee0780327d6b40e2f76a529", + "id": "nmdc:5b9884f04ee0780327d6b40e2f76a529", + "file_size_bytes": 261281 + }, + { + "_id": { + "$oid": "649b003d1ae706d7b5b14e0f" + }, + "description": "Metagenome Contig Coverage Stats for gold:Gp0119862", + "url": "https://data.microbiomedata.org/data/1777_95831/assembly/mapping_stats.txt", + "file_size_bytes": 432874, + "type": "nmdc:DataObject", + "id": "nmdc:4557ad72f05533102be90e90d126b77c", + "name": "mapping_stats.txt", + "data_object_type": "Assembly Coverage Stats" + }, + { + "_id": { + "$oid": "649b003d1ae706d7b5b14e10" + }, + "description": "Assembled scaffold fasta for gold:Gp0119862", + "url": "https://data.microbiomedata.org/data/1777_95831/assembly/assembly_scaffolds.fna", + "file_size_bytes": 9699615, + "type": "nmdc:DataObject", + "id": "nmdc:08d367471498883e19d41e3f42dd2a40", + "name": "assembly_scaffolds.fna", + "data_object_type": "Assembly Scaffolds" + }, + { + "_id": { + "$oid": "649b003d1ae706d7b5b14e12" + }, + "description": "Assembled contigs fasta for gold:Gp0119862", + "url": "https://data.microbiomedata.org/data/1777_95831/assembly/assembly_contigs.fna", + "file_size_bytes": 9715657, + "type": "nmdc:DataObject", + "id": "nmdc:1b4116c5fa8023a05811814bde3d66f0", + "name": "assembly_contigs.fna", + "data_object_type": "Assembly Contigs" + }, + { + "_id": { + "$oid": "649b003d1ae706d7b5b14e15" + }, + "description": "Metagenome Alignment BAM file for gold:Gp0119862", + "url": "https://data.microbiomedata.org/data/1777_95831/assembly/pairedMapped_sorted.bam", + "file_size_bytes": 3243323474, + "type": "nmdc:DataObject", + "id": "nmdc:9c348ff1b8abb72691ecc90e36e18f45", + "name": "pairedMapped_sorted.bam", + "data_object_type": "Assembly Coverage BAM" + }, + { + "_id": { + "$oid": "649b003d1ae706d7b5b14e16" + }, + "description": "Assembled AGP file for gold:Gp0119862", + "url": "https://data.microbiomedata.org/data/1777_95831/assembly/assembly.agp", + "file_size_bytes": 363298, + "type": "nmdc:DataObject", + "id": "nmdc:244fe398b4b806ffa55d01f620cace22", + "name": "assembly.agp", + "data_object_type": "Assembly AGP" + }, + { + "_id": { + "$oid": "649b003d1ae706d7b5b15a87" + }, + "id": "nmdc:80b6b93456228e09be7507463fecaa56", + "name": "1777_95831.json", + "description": "Gold:Gp0119862 ReadbasedAnalysis result JSON file", + "url": "https://data.microbiomedata.org/1777_95831/ReadbasedAnalysis/1777_95831.json", + "file_size_bytes": 3385, + "type": "nmdc:DataObject" + }, + { + "_id": { + "$oid": "649b003d1ae706d7b5b15a96" + }, + "id": "nmdc:7e35346b602ddac0a8671e6c872b3ad1", + "name": "1777_95831.krona.html", + "description": "Gold:Gp0119862 KRONA plot HTML file", + "url": "https://data.microbiomedata.org/1777_95831/ReadbasedAnalysis/centrifuge/1777_95831.krona.html", + "file_size_bytes": 3385, + "data_object_type": "Centrifuge Krona Plot", + "type": "nmdc:DataObject" + }, + { + "_id": { + "$oid": "649b003f1ae706d7b5b1638b" + }, + "id": "nmdc:138650ac0b762b8ec3337744a2e0d32c", + "name": "bins.unbinned.fa", + "description": "unbinned fasta file from metabat2 for gold:Gp0119862", + "file_size_bytes": 7730127, + "url": "https://data.microbiomedata.org/data/1777_95831/img_MAGs/bins.unbinned.fa", + "type": "nmdc:DataObject" + }, + { + "_id": { + "$oid": "649b003f1ae706d7b5b1638e" + }, + "id": "nmdc:2cf290115105c097cac5d6a173450343", + "name": "bins.tooShort.fa", + "description": "tooShort (< 3kb) filtered contigs fasta file by metaBat2 for gold:Gp0119862", + "file_size_bytes": 1941250, + "url": "https://data.microbiomedata.org/data/1777_95831/img_MAGs/bins.tooShort.fa", + "type": "nmdc:DataObject" + }, + { + "_id": { + "$oid": "649b003f1ae706d7b5b16c8c" + }, + "description": "EC TSV File for gold:Gp0119862", + "url": "https://data.microbiomedata.org/1777_95831/img_annotation/Ga0482153_ec.tsv", + "md5_checksum": "ceb802e43562a90b5dc028d818a8f56d", + "file_size_bytes": 3385, + "id": "nmdc:ceb802e43562a90b5dc028d818a8f56d", + "name": "gold:Gp0119862_EC TSV File", + "data_object_type": "Annotation Enzyme Commission", + "type": "nmdc:DataObject" + }, + { + "_id": { + "$oid": "649b003f1ae706d7b5b16c8d" + }, + "description": "Functional annotation GFF file for gold:Gp0119862", + "url": "https://data.microbiomedata.org/1777_95831/img_annotation/Ga0482153_functional_annotation.gff", + "md5_checksum": "895b148dc332f979f35fae25156762d8", + "file_size_bytes": 3385, + "id": "nmdc:895b148dc332f979f35fae25156762d8", + "name": "gold:Gp0119862_Functional annotation GFF file", + "data_object_type": "Functional Annotation GFF", + "type": "nmdc:DataObject" + }, + { + "_id": { + "$oid": "649b003f1ae706d7b5b16c8e" + }, + "description": "Protein FAA for gold:Gp0119862", + "url": "https://data.microbiomedata.org/1777_95831/img_annotation/Ga0482153_proteins.faa", + "md5_checksum": "19e4e0238ac9c1d07646431ee62fa2d9", + "file_size_bytes": 3385, + "id": "nmdc:19e4e0238ac9c1d07646431ee62fa2d9", + "name": "gold:Gp0119862_Protein FAA", + "data_object_type": "Annotation Amino Acid FASTA", + "type": "nmdc:DataObject" + }, + { + "_id": { + "$oid": "649b003f1ae706d7b5b16c8f" + }, + "description": "KO TSV File for gold:Gp0119862", + "url": "https://data.microbiomedata.org/1777_95831/img_annotation/Ga0482153_ko.tsv", + "md5_checksum": "9ebfce5805ecaeb07521c28690fa810c", + "file_size_bytes": 3385, + "id": "nmdc:9ebfce5805ecaeb07521c28690fa810c", + "name": "gold:Gp0119862_KO TSV File", + "data_object_type": "Annotation KEGG Orthology", + "type": "nmdc:DataObject" + }, + { + "_id": { + "$oid": "649b003f1ae706d7b5b16c90" + }, + "description": "Structural annotation GFF file for gold:Gp0119862", + "url": "https://data.microbiomedata.org/1777_95831/img_annotation/Ga0482153_structural_annotation.gff", + "md5_checksum": "d8e967e38a491678164ab203551aad61", + "file_size_bytes": 3385, + "id": "nmdc:d8e967e38a491678164ab203551aad61", + "name": "gold:Gp0119862_Structural annotation GFF file", + "data_object_type": "Structural Annotation GFF", + "type": "nmdc:DataObject" + } + ], + "mags_activity_set": [ + { + "_id": { + "$oid": "649b0052ec087f6bbab346f9" + }, + "has_input": [ + "nmdc:3e669c295c05549732f1a62e6c53d143", + "nmdc:555fcecf248888317ba16524e0345034", + "nmdc:0c71c00e4bc71cf37823be31cdd707c3" + ], + "too_short_contig_num": 4004, + "part_of": [ + "nmdc:mga0p8hk47" + ], + "binned_contig_num": 241, + "git_url": "https://github.com/microbiomedata/metaG/releases/tag/0.1", + "has_output": [ + "nmdc:d41d8cd98f00b204e9800998ecf8427e", + "nmdc:fd404511f6ad57425d354ce2b5b61e9f", + "nmdc:a6825d18edddb8cc5344c982ebe0795d", + "nmdc:b9830ec5d1b94c6140f06929e97f1279", + "nmdc:97f97be994210c09e883589d1cb74470", + "nmdc:5b9884f04ee0780327d6b40e2f76a529" + ], + "was_informed_by": "gold:Gp0119862", + "input_contig_num": 5693, + "id": "nmdc:9d49b6ad9e235a52bc80dcf8c48ccd64", + "execution_resource": "NERSC-Cori", + "name": "MAGs Analysis Activity for nmdc:mga0p8hk47", + "mags_list": [ + { + "number_of_contig": 83, + "completeness": 35.13, + "bin_name": "bins.1", + "gene_count": 625, + "bin_quality": "LQ", + "gtdbtk_species": "", + "gtdbtk_order": "", + "num_16s": 0, + "gtdbtk_family": "", + "gtdbtk_domain": "", + "contamination": 0.0, + "gtdbtk_class": "", + "gtdbtk_phylum": "", + "num_5s": 0, + "num_23s": 0, + "gtdbtk_genus": "", + "num_t_rna": 8 + }, + { + "number_of_contig": 146, + "completeness": 88.04, + "bin_name": "bins.2", + "gene_count": 2588, + "bin_quality": "MQ", + "gtdbtk_species": "Halanaerobium congolense", + "gtdbtk_order": "Halanaerobiales", + "num_16s": 0, + "gtdbtk_family": "Halanaerobiaceae", + "gtdbtk_domain": "Bacteria", + "contamination": 2.25, + "gtdbtk_class": "Halanaerobiia", + "gtdbtk_phylum": "Firmicutes_F", + "num_5s": 4, + "num_23s": 0, + "gtdbtk_genus": "Halanaerobium", + "num_t_rna": 48 + }, + { + "number_of_contig": 12, + "completeness": 0.0, + "bin_name": "bins.3", + "gene_count": 240, + "bin_quality": "LQ", + "gtdbtk_species": "", + "gtdbtk_order": "", + "num_16s": 0, + "gtdbtk_family": "", + "gtdbtk_domain": "", + "contamination": 0.0, + "gtdbtk_class": "", + "gtdbtk_phylum": "", + "num_5s": 0, + "num_23s": 0, + "gtdbtk_genus": "", + "num_t_rna": 4 + } + ], + "unbinned_contig_num": 1448, + "started_at_time": "2021-10-11T02:23:33Z", + "type": "nmdc:MAGsAnalysisActivity", + "ended_at_time": "2021-10-11T03:21:24+00:00" + } + ], + "metagenome_annotation_activity_set": [ + { + "_id": { + "$oid": "649b005bbf2caae0415ef99b" + }, + "has_input": [ + "nmdc:3e669c295c05549732f1a62e6c53d143" + ], + "part_of": [ + "nmdc:mga0p8hk47" + ], + "git_url": "https://github.com/microbiomedata/metaG/releases/tag/0.1", + "has_output": [ + "nmdc:1b4a86a2369f8908af57d91ae83ec8bb", + "nmdc:d40e267ba84e2658d38a6f1bfa7dedd0", + "nmdc:0c71c00e4bc71cf37823be31cdd707c3", + "nmdc:b4ebca55e0658c361a55fedd1b1c980b", + "nmdc:1e1bc8867ff9d014974acb0ec26228bf", + "nmdc:985b14f04ec6a07e1031326e6688e86c", + "nmdc:0577d8fc3be7dde2c9cbaff26d419ddf", + "nmdc:f809c89425e7317f99d1933be4a9035f", + "nmdc:510ead1adfa241229c3d5cb25b37b986", + "nmdc:53a66879b7343d2ee080589ab28ad97a", + "nmdc:01a56a32c3c2677e936abbe362739a31", + "nmdc:237be703d5b8555935e255ed34231927" + ], + "was_informed_by": "gold:Gp0119862", + "id": "nmdc:9d49b6ad9e235a52bc80dcf8c48ccd64", + "execution_resource": "NERSC-Cori", + "name": "Annotation Activity for nmdc:mga0p8hk47", + "started_at_time": "2021-10-11T02:23:33Z", + "type": "nmdc:MetagenomeAnnotationActivity", + "ended_at_time": "2021-10-11T03:21:24+00:00" + } + ], + "metagenome_assembly_set": [ + { + "_id": { + "$oid": "649b005f2ca5ee4adb139f7f" + }, + "has_input": [ + "nmdc:249d5db946764bcfd9046cf8b3af6c45" + ], + "part_of": [ + "nmdc:mga0p8hk47" + ], + "ctg_logsum": 98561, + "scaf_logsum": 100385, + "gap_pct": 0.05697, + "git_url": "https://github.com/microbiomedata/metaG/releases/tag/0.1", + "has_output": [ + "nmdc:3e669c295c05549732f1a62e6c53d143", + "nmdc:e3b7e1357b7f94ce3262118eba6b6236", + "nmdc:a8b15986d2a43b68de0ce7003b2a85e4", + "nmdc:ce8d4dbf59c7ee15ea3b85b517161b32", + "nmdc:555fcecf248888317ba16524e0345034" + ], + "asm_score": 17.87, + "was_informed_by": "gold:Gp0119862", + "ctg_powsum": 13403, + "scaf_max": 106405, + "id": "nmdc:9d49b6ad9e235a52bc80dcf8c48ccd64", + "scaf_powsum": 13866, + "execution_resource": "NERSC-Cori", + "contigs": 5693, + "name": "Assembly Activity for nmdc:mga0p8hk47", + "ctg_max": 106405, + "gc_std": 0.08365, + "contig_bp": 9420479, + "gc_avg": 0.34122, + "started_at_time": "2021-10-11T02:23:33Z", + "scaf_bp": 9425849, + "type": "nmdc:MetagenomeAssembly", + "scaffolds": 5480, + "ended_at_time": "2021-10-11T03:21:24+00:00", + "ctg_l50": 4997, + "ctg_l90": 499, + "ctg_n50": 345, + "ctg_n90": 2995, + "scaf_l50": 5444, + "scaf_l90": 520, + "scaf_n50": 309, + "scaf_n90": 2820, + "scaf_l_gt50k": 753982, + "scaf_n_gt50k": 11, + "scaf_pct_gt50k": 7.9990883 + } + ], + "omics_processing_set": [ + { + "_id": { + "$oid": "649b009773e8249959349b71" + }, + "id": "nmdc:omprc-11-c2269540", + "name": "Deep subsurface shale carbon reservoir microbial communities from Ohio, USA - Utica-2 Time Series 2015_2_5", + "description": "Microbial controls on biogeochemical cycling in deep subsurface shale carbon reservoirs", + "has_input": [ + "nmdc:bsm-11-09jxca72" + ], + "has_output": [ + "jgi:560df5b60d878540fd6fe1fd" + ], + "part_of": [ + "nmdc:sty-11-8fb6t785" + ], + "add_date": "2015-08-15", + "mod_date": "2020-04-05", + "ncbi_project_name": "Deep subsurface shale carbon reservoir microbial communities from Ohio, USA - Utica-2 Time Series 2015_2_5", + "omics_type": { + "has_raw_value": "Metagenome" + }, + "principal_investigator": { + "has_raw_value": "Kelly Wrighton" + }, + "processing_institution": "JGI", + "type": "nmdc:OmicsProcessing", + "gold_sequencing_project_identifiers": [ + "gold:Gp0119862" + ] + } + ], + "read_qc_analysis_activity_set": [ + { + "_id": { + "$oid": "649b009d6bdd4fd20273c854" + }, + "has_input": [ + "nmdc:360e1d827c9dfa6a483750e82795a152" + ], + "part_of": [ + "nmdc:mga0p8hk47" + ], + "git_url": "https://github.com/microbiomedata/metaG/releases/tag/0.1", + "has_output": [ + "nmdc:249d5db946764bcfd9046cf8b3af6c45", + "nmdc:481a07ca42f6bd89498ea652c37ac5da" + ], + "was_informed_by": "gold:Gp0119862", + "input_read_count": 50605916, + "output_read_bases": 7422104857, + "id": "nmdc:9d49b6ad9e235a52bc80dcf8c48ccd64", + "execution_resource": "NERSC-Cori", + "input_read_bases": 7641493316, + "name": "Read QC Activity for nmdc:mga0p8hk47", + "output_read_count": 50382102, + "started_at_time": "2021-10-11T02:23:33Z", + "type": "nmdc:ReadQCAnalysisActivity", + "ended_at_time": "2021-10-11T03:21:24+00:00" + } + ], + "read_based_taxonomy_analysis_activity_set": [ + { + "_id": { + "$oid": "649b009bff710ae353f8cf1d" + }, + "has_input": [ + "nmdc:249d5db946764bcfd9046cf8b3af6c45" + ], + "git_url": "https://github.com/microbiomedata/metaG/releases/tag/0.1", + "has_output": [ + "nmdc:d5290266a55ae7443f37a324ad39bd1c", + "nmdc:b46f62c891f3599df6f4b3500a0ea6bc", + "nmdc:2ff1d363f571e640a124aaefd9bd5016", + "nmdc:1f42ab9767d8d9fc3d1f0992f1239fb3", + "nmdc:a8d6b9af9a28b30a62609b1227d03fc0", + "nmdc:fbfc71706e2f297b98f59618462dece9", + "nmdc:304e57dc8ffddd75aa697203722d92f0", + "nmdc:b0bac742555f05f570817557d73f1888", + "nmdc:ae65120bdbbe75ad4038efa88a5d5f95" + ], + "was_informed_by": "gold:Gp0119862", + "id": "nmdc:9d49b6ad9e235a52bc80dcf8c48ccd64", + "execution_resource": "NERSC-Cori", + "name": "ReadBased Analysis Activity for nmdc:mga0p8hk47", + "started_at_time": "2021-10-11T02:23:33Z", + "type": "nmdc:ReadbasedAnalysis", + "ended_at_time": "2021-10-11T03:21:24+00:00" + } + ] + }, + { + "data_object_set": [ + { + "description": "Raw sequencer read data", + "file_size_bytes": 7851113842, + "type": "nmdc:DataObject", + "id": "jgi:560df5ad0d878540fd6fe1ed", + "name": "9567.7.137562.TCCTGAG-AGAGTAG.fastq.gz" + }, + { + "name": "Gp0119856_Filtered Reads", + "description": "Filtered Reads for Gp0119856", + "data_object_type": "Filtered Sequencing Reads", + "url": "https://data.microbiomedata.org/data/nmdc:mga03ps698/qa/nmdc_mga03ps698_filtered.fastq.gz", + "md5_checksum": "e985ccfb7ad711809e81832bfb35ba15", + "id": "nmdc:e985ccfb7ad711809e81832bfb35ba15", + "file_size_bytes": 4274219903 + }, + { + "name": "Gp0119856_Filtered Stats", + "description": "Filtered Stats for Gp0119856", + "data_object_type": "QC Statistics", + "url": "https://data.microbiomedata.org/data/nmdc:mga03ps698/qa/nmdc_mga03ps698_filterStats.txt", + "md5_checksum": "b929eef65de7815dea35f015046c580d", + "id": "nmdc:b929eef65de7815dea35f015046c580d", + "file_size_bytes": 286 + }, + { + "name": "Gp0119856_Gottcha2 TSV report", + "description": "Gottcha2 TSV report for Gp0119856", + "url": "https://data.microbiomedata.org/data/nmdc:mga03ps698/ReadbasedAnalysis/nmdc_mga03ps698_gottcha2_report.tsv", + "md5_checksum": "96078df96cab0402cb61d340dcefa622", + "id": "nmdc:96078df96cab0402cb61d340dcefa622", + "file_size_bytes": 11510 + }, + { + "name": "Gp0119856_Gottcha2 full TSV report", + "description": "Gottcha2 full TSV report for Gp0119856", + "url": "https://data.microbiomedata.org/data/nmdc:mga03ps698/ReadbasedAnalysis/nmdc_mga03ps698_gottcha2_report_full.tsv", + "md5_checksum": "fbda66832ef6e779f6f3c138f5433317", + "id": "nmdc:fbda66832ef6e779f6f3c138f5433317", + "file_size_bytes": 670046 + }, + { + "name": "Gp0119856_Gottcha2 Krona HTML report", + "description": "Gottcha2 Krona HTML report for Gp0119856", + "data_object_type": "GOTTCHA2 Krona Plot", + "url": "https://data.microbiomedata.org/data/nmdc:mga03ps698/ReadbasedAnalysis/nmdc_mga03ps698_gottcha2_krona.html", + "md5_checksum": "da1a46379e44457942130c21038ded8a", + "id": "nmdc:da1a46379e44457942130c21038ded8a", + "file_size_bytes": 262739 + }, + { + "name": "Gp0119856_Centrifuge classification TSV report", + "description": "Centrifuge classification TSV report for Gp0119856", + "data_object_type": "Centrifuge Taxonomic Classification", + "url": "https://data.microbiomedata.org/data/nmdc:mga03ps698/ReadbasedAnalysis/nmdc_mga03ps698_centrifuge_classification.tsv", + "md5_checksum": "961e680c8d9cc4b454f987890c12384b", + "id": "nmdc:961e680c8d9cc4b454f987890c12384b", + "file_size_bytes": 11250350591 + }, + { + "name": "Gp0119856_Centrifuge TSV report", + "description": "Centrifuge TSV report for Gp0119856", + "data_object_type": "Centrifuge Classification Report", + "url": "https://data.microbiomedata.org/data/nmdc:mga03ps698/ReadbasedAnalysis/nmdc_mga03ps698_centrifuge_report.tsv", + "md5_checksum": "2ed6765cefd8223324f0f96e980a3247", + "id": "nmdc:2ed6765cefd8223324f0f96e980a3247", + "file_size_bytes": 228655 + }, + { + "name": "Gp0119856_Centrifuge Krona HTML report", + "description": "Centrifuge Krona HTML report for Gp0119856", + "data_object_type": "Centrifuge Krona Plot", + "url": "https://data.microbiomedata.org/data/nmdc:mga03ps698/ReadbasedAnalysis/nmdc_mga03ps698_centrifuge_krona.html", + "md5_checksum": "9b0719633d97e05c945c98ef12343fb0", + "id": "nmdc:9b0719633d97e05c945c98ef12343fb0", + "file_size_bytes": 2188306 + }, + { + "name": "Gp0119856_Kraken classification TSV report", + "description": "Kraken classification TSV report for Gp0119856", + "data_object_type": "Kraken2 Taxonomic Classification", + "url": "https://data.microbiomedata.org/data/nmdc:mga03ps698/ReadbasedAnalysis/nmdc_mga03ps698_kraken2_classification.tsv", + "md5_checksum": "0e45e4ee21e1b51b116bbd94a4476718", + "id": "nmdc:0e45e4ee21e1b51b116bbd94a4476718", + "file_size_bytes": 5434467659 + }, + { + "name": "Gp0119856_Kraken2 TSV report", + "description": "Kraken2 TSV report for Gp0119856", + "data_object_type": "Kraken2 Classification Report", + "url": "https://data.microbiomedata.org/data/nmdc:mga03ps698/ReadbasedAnalysis/nmdc_mga03ps698_kraken2_report.tsv", + "md5_checksum": "4d067967e956efc580f838a72b93751d", + "id": "nmdc:4d067967e956efc580f838a72b93751d", + "file_size_bytes": 481136 + }, + { + "name": "Gp0119856_Kraken2 Krona HTML report", + "description": "Kraken2 Krona HTML report for Gp0119856", + "data_object_type": "Kraken2 Krona Plot", + "url": "https://data.microbiomedata.org/data/nmdc:mga03ps698/ReadbasedAnalysis/nmdc_mga03ps698_kraken2_krona.html", + "md5_checksum": "f7adc2159a0bbc6344542d41632ea3b2", + "id": "nmdc:f7adc2159a0bbc6344542d41632ea3b2", + "file_size_bytes": 3115509 + }, + { + "name": "Gp0119856_Assembled contigs fasta", + "description": "Assembled contigs fasta for Gp0119856", + "data_object_type": "Assembly Contigs", + "url": "https://data.microbiomedata.org/data/nmdc:mga03ps698/assembly/nmdc_mga03ps698_contigs.fna", + "md5_checksum": "a3ab9cc161429504fed67fb8cce116ec", + "id": "nmdc:a3ab9cc161429504fed67fb8cce116ec", + "file_size_bytes": 15480718 + }, + { + "name": "Gp0119856_Assembled scaffold fasta", + "description": "Assembled scaffold fasta for Gp0119856", + "data_object_type": "Assembly Scaffolds", + "url": "https://data.microbiomedata.org/data/nmdc:mga03ps698/assembly/nmdc_mga03ps698_scaffolds.fna", + "md5_checksum": "0b6f8c1befc1be6a6ac46288756abee5", + "id": "nmdc:0b6f8c1befc1be6a6ac46288756abee5", + "file_size_bytes": 15419975 + }, + { + "name": "Gp0119856_Metagenome Contig Coverage Stats", + "description": "Metagenome Contig Coverage Stats for Gp0119856", + "url": "https://data.microbiomedata.org/data/nmdc:mga03ps698/assembly/nmdc_mga03ps698_covstats.txt", + "md5_checksum": "5e1d739c320b8679354fb44983d377f7", + "id": "nmdc:5e1d739c320b8679354fb44983d377f7", + "file_size_bytes": 1645592 + }, + { + "name": "Gp0119856_Assembled AGP file", + "description": "Assembled AGP file for Gp0119856", + "data_object_type": "Assembly AGP", + "url": "https://data.microbiomedata.org/data/nmdc:mga03ps698/assembly/nmdc_mga03ps698_assembly.agp", + "md5_checksum": "176c6d024d0f9ff34a9e5a0d8b94a0af", + "id": "nmdc:176c6d024d0f9ff34a9e5a0d8b94a0af", + "file_size_bytes": 1466662 + }, + { + "name": "Gp0119856_Metagenome Alignment BAM file", + "description": "Metagenome Alignment BAM file for Gp0119856", + "data_object_type": "Assembly Coverage BAM", + "url": "https://data.microbiomedata.org/data/nmdc:mga03ps698/assembly/nmdc_mga03ps698_pairedMapped_sorted.bam", + "md5_checksum": "10a5dbedea8e37bc7e72e9a39555ab82", + "id": "nmdc:10a5dbedea8e37bc7e72e9a39555ab82", + "file_size_bytes": 5266463915 + }, + { + "name": "Gp0119856_Protein FAA", + "description": "Protein FAA for Gp0119856", + "data_object_type": "Annotation Amino Acid FASTA", + "url": "https://data.microbiomedata.org/data/nmdc:mga03ps698/annotation/nmdc_mga03ps698_proteins.faa", + "md5_checksum": "ac3db9c6705623843ae6ef1456d0fdc0", + "id": "nmdc:ac3db9c6705623843ae6ef1456d0fdc0", + "file_size_bytes": 8177540 + }, + { + "name": "Gp0119856_Structural annotation GFF file", + "description": "Structural annotation GFF file for Gp0119856", + "data_object_type": "Structural Annotation GFF", + "url": "https://data.microbiomedata.org/data/nmdc:mga03ps698/annotation/nmdc_mga03ps698_structural_annotation.gff", + "md5_checksum": "359ace71019714576182a050ca7526c8", + "id": "nmdc:359ace71019714576182a050ca7526c8", + "file_size_bytes": 2503 + }, + { + "name": "Gp0119856_Functional annotation GFF file", + "description": "Functional annotation GFF file for Gp0119856", + "data_object_type": "Functional Annotation GFF", + "url": "https://data.microbiomedata.org/data/nmdc:mga03ps698/annotation/nmdc_mga03ps698_functional_annotation.gff", + "md5_checksum": "3cf2fb1d0e65a8792d423b1d68390928", + "id": "nmdc:3cf2fb1d0e65a8792d423b1d68390928", + "file_size_bytes": 8912351 + }, + { + "name": "Gp0119856_KO TSV file", + "description": "KO TSV file for Gp0119856", + "data_object_type": "Annotation KEGG Orthology", + "url": "https://data.microbiomedata.org/data/nmdc:mga03ps698/annotation/nmdc_mga03ps698_ko.tsv", + "md5_checksum": "c65d4074f13e42af10ed393cbdf74011", + "id": "nmdc:c65d4074f13e42af10ed393cbdf74011", + "file_size_bytes": 1322194 + }, + { + "name": "Gp0119856_EC TSV file", + "description": "EC TSV file for Gp0119856", + "data_object_type": "Annotation Enzyme Commission", + "url": "https://data.microbiomedata.org/data/nmdc:mga03ps698/annotation/nmdc_mga03ps698_ec.tsv", + "md5_checksum": "f7ff0230f761d924ed76dc665ea67adb", + "id": "nmdc:f7ff0230f761d924ed76dc665ea67adb", + "file_size_bytes": 829594 + }, + { + "name": "Gp0119856_COG GFF file", + "description": "COG GFF file for Gp0119856", + "url": "https://data.microbiomedata.org/data/nmdc:mga03ps698/annotation/nmdc_mga03ps698_cog.gff", + "md5_checksum": "f36ee37f47256c89578a3e4a1231672a", + "id": "nmdc:f36ee37f47256c89578a3e4a1231672a", + "file_size_bytes": 5538543 + }, + { + "name": "Gp0119856_PFAM GFF file", + "description": "PFAM GFF file for Gp0119856", + "url": "https://data.microbiomedata.org/data/nmdc:mga03ps698/annotation/nmdc_mga03ps698_pfam.gff", + "md5_checksum": "e58fc865d06b02c7982a680beb6d321f", + "id": "nmdc:e58fc865d06b02c7982a680beb6d321f", + "file_size_bytes": 4727414 + }, + { + "name": "Gp0119856_TigrFam GFF file", + "description": "TigrFam GFF file for Gp0119856", + "url": "https://data.microbiomedata.org/data/nmdc:mga03ps698/annotation/nmdc_mga03ps698_tigrfam.gff", + "md5_checksum": "a17ce1fd6ffd590bf3dbfc0efdae126c", + "id": "nmdc:a17ce1fd6ffd590bf3dbfc0efdae126c", + "file_size_bytes": 871732 + }, + { + "name": "Gp0119856_SMART GFF file", + "description": "SMART GFF file for Gp0119856", + "url": "https://data.microbiomedata.org/data/nmdc:mga03ps698/annotation/nmdc_mga03ps698_smart.gff", + "md5_checksum": "2d24478e3ed2312e2244df7510214235", + "id": "nmdc:2d24478e3ed2312e2244df7510214235", + "file_size_bytes": 1442089 + }, + { + "name": "Gp0119856_SuperFam GFF file", + "description": "SuperFam GFF file for Gp0119856", + "url": "https://data.microbiomedata.org/data/nmdc:mga03ps698/annotation/nmdc_mga03ps698_supfam.gff", + "md5_checksum": "afa06cef0742ab1b012cb4d8fb58ff90", + "id": "nmdc:afa06cef0742ab1b012cb4d8fb58ff90", + "file_size_bytes": 7062431 + }, + { + "name": "Gp0119856_Cath FunFam GFF file", + "description": "Cath FunFam GFF file for Gp0119856", + "url": "https://data.microbiomedata.org/data/nmdc:mga03ps698/annotation/nmdc_mga03ps698_cath_funfam.gff", + "md5_checksum": "28f48ddfad55e71f4c5e5b54306e2425", + "id": "nmdc:28f48ddfad55e71f4c5e5b54306e2425", + "file_size_bytes": 5981666 + }, + { + "name": "Gp0119856_KO_EC GFF file", + "description": "KO_EC GFF file for Gp0119856", + "url": "https://data.microbiomedata.org/data/nmdc:mga03ps698/annotation/nmdc_mga03ps698_ko_ec.gff", + "md5_checksum": "beb8c2b58d2a96de62b2a429404deb91", + "id": "nmdc:beb8c2b58d2a96de62b2a429404deb91", + "file_size_bytes": 4365449 + }, + { + "name": "Gp0119856_metabat2 bin checkm quality assessment result", + "description": "metabat2 bin checkm quality assessment result for Gp0119856", + "data_object_type": "CheckM Statistics", + "url": "https://data.microbiomedata.org/data/nmdc:mga03ps698/MAGs/nmdc_mga03ps698_checkm_qa.out", + "md5_checksum": "fc6ac370d7165bd692b28402078c46cc", + "id": "nmdc:fc6ac370d7165bd692b28402078c46cc", + "file_size_bytes": 1204 + }, + { + "name": "Gp0119856_high-quality and medium-quality bins", + "description": "high-quality and medium-quality bins for Gp0119856", + "data_object_type": "Metagenome Bins", + "url": "https://data.microbiomedata.org/data/nmdc:mga03ps698/MAGs/nmdc_mga03ps698_hqmq_bin.zip", + "md5_checksum": "bdc87a797fc694cc54c4c9c47f8aae7b", + "id": "nmdc:bdc87a797fc694cc54c4c9c47f8aae7b", + "file_size_bytes": 1496000 + }, + { + "_id": { + "$oid": "649b003d1ae706d7b5b14def" + }, + "description": "Metagenome Contig Coverage Stats for gold:Gp0119856", + "url": "https://data.microbiomedata.org/data/1777_95825/assembly/mapping_stats.txt", + "file_size_bytes": 1544742, + "type": "nmdc:DataObject", + "id": "nmdc:5e4bd848b9fb9a6bd061605fb67b7730", + "name": "mapping_stats.txt", + "data_object_type": "Assembly Coverage Stats" + }, + { + "_id": { + "$oid": "649b003d1ae706d7b5b14df3" + }, + "description": "Assembled scaffold fasta for gold:Gp0119856", + "url": "https://data.microbiomedata.org/data/1777_95825/assembly/assembly_scaffolds.fna", + "file_size_bytes": 15319900, + "type": "nmdc:DataObject", + "id": "nmdc:8a054867244d918c1c8b84e568a8f58e", + "name": "assembly_scaffolds.fna", + "data_object_type": "Assembly Scaffolds" + }, + { + "_id": { + "$oid": "649b003d1ae706d7b5b14df4" + }, + "description": "Assembled AGP file for gold:Gp0119856", + "url": "https://data.microbiomedata.org/data/1777_95825/assembly/assembly.agp", + "file_size_bytes": 1263312, + "type": "nmdc:DataObject", + "id": "nmdc:58ad76a61592d5df97ad5b1f4d0e15d0", + "name": "assembly.agp", + "data_object_type": "Assembly AGP" + }, + { + "_id": { + "$oid": "649b003d1ae706d7b5b14dfc" + }, + "description": "Assembled contigs fasta for gold:Gp0119856", + "url": "https://data.microbiomedata.org/data/1777_95825/assembly/assembly_contigs.fna", + "file_size_bytes": 15379868, + "type": "nmdc:DataObject", + "id": "nmdc:9d774aebab719174e5b3113c65fe5860", + "name": "assembly_contigs.fna", + "data_object_type": "Assembly Contigs" + }, + { + "_id": { + "$oid": "649b003d1ae706d7b5b14e00" + }, + "description": "Metagenome Alignment BAM file for gold:Gp0119856", + "url": "https://data.microbiomedata.org/data/1777_95825/assembly/pairedMapped_sorted.bam", + "file_size_bytes": 5220370634, + "type": "nmdc:DataObject", + "id": "nmdc:9a1308b4d966ce1137883100921e2f12", + "name": "pairedMapped_sorted.bam", + "data_object_type": "Assembly Coverage BAM" + }, + { + "_id": { + "$oid": "649b003d1ae706d7b5b15a46" + }, + "id": "nmdc:0c79fa2be0aae5a1c9157bd8176a720e", + "name": "1777_95825.krona.html", + "description": "Gold:Gp0119856 KRONA plot HTML file", + "url": "https://data.microbiomedata.org/1777_95825/ReadbasedAnalysis/centrifuge/1777_95825.krona.html", + "file_size_bytes": 3385, + "data_object_type": "Centrifuge Krona Plot", + "type": "nmdc:DataObject" + }, + { + "_id": { + "$oid": "649b003d1ae706d7b5b15a4d" + }, + "id": "nmdc:4128186d4b8a1dfd66160eedebc1da3b", + "name": "1777_95825.json", + "description": "Gold:Gp0119856 ReadbasedAnalysis result JSON file", + "url": "https://data.microbiomedata.org/1777_95825/ReadbasedAnalysis/1777_95825.json", + "file_size_bytes": 3385, + "type": "nmdc:DataObject" + }, + { + "_id": { + "$oid": "649b003f1ae706d7b5b16363" + }, + "id": "nmdc:f7ec0e0d32087a0914a6a53553e77adf", + "name": "bins.tooShort.fa", + "description": "tooShort (< 3kb) filtered contigs fasta file by metaBat2 for gold:Gp0119856", + "file_size_bytes": 6979697, + "url": "https://data.microbiomedata.org/data/1777_95825/img_MAGs/bins.tooShort.fa", + "type": "nmdc:DataObject" + }, + { + "_id": { + "$oid": "649b003f1ae706d7b5b16364" + }, + "id": "nmdc:2c60e50cb84ca774eb6df9385eb89ea5", + "name": "checkm_qa.out", + "description": "metabat2 bin checkm quality assessment result for gold:Gp0119856", + "file_size_bytes": 1092, + "url": "https://data.microbiomedata.org/data/1777_95825/img_MAGs/checkm_qa.out", + "type": "nmdc:DataObject", + "data_object_type": "CheckM Statistics" + }, + { + "_id": { + "$oid": "649b003f1ae706d7b5b16365" + }, + "id": "nmdc:86350721659a53ff9ed47b3a9859bfaf", + "name": "bins.unbinned.fa", + "description": "unbinned fasta file from metabat2 for gold:Gp0119856", + "file_size_bytes": 6840637, + "url": "https://data.microbiomedata.org/data/1777_95825/img_MAGs/bins.unbinned.fa", + "type": "nmdc:DataObject" + }, + { + "_id": { + "$oid": "649b003f1ae706d7b5b16366" + }, + "id": "nmdc:2f89ddc88cd21a39e9bd9cd3449f7800", + "name": "gtdbtk.bac120.summary.tsv", + "description": "gtdbtk bacterial assignment result summary table for gold:Gp0119856", + "file_size_bytes": 1351, + "url": "https://data.microbiomedata.org/data/1777_95825/img_MAGs/gtdbtk_output/classify/gtdbtk.bac120.summary.tsv", + "type": "nmdc:DataObject" + }, + { + "_id": { + "$oid": "649b003f1ae706d7b5b16367" + }, + "id": "nmdc:f694fc71c16dfc9d956b03747c47e053", + "name": "gold:Gp0119856.bins.2.fa", + "description": "metabat2 binned contig file for gold:Gp0119856", + "file_size_bytes": 236805, + "url": "https://data.microbiomedata.org/data/1777_95825/img_MAGs/metabat-bins/bins.2.fa", + "type": "nmdc:DataObject", + "data_object_type": "Metagenome Bins" + }, + { + "_id": { + "$oid": "649b003f1ae706d7b5b16368" + }, + "id": "nmdc:704bf9b6c5086b775cfb99fc30dfbd93", + "name": "gold:Gp0119856.bins.3.fa", + "description": "metabat2 binned contig file for gold:Gp0119856", + "file_size_bytes": 249808, + "url": "https://data.microbiomedata.org/data/1777_95825/img_MAGs/metabat-bins/bins.3.fa", + "type": "nmdc:DataObject", + "data_object_type": "Metagenome Bins" + }, + { + "_id": { + "$oid": "649b003f1ae706d7b5b1636c" + }, + "id": "nmdc:55a3ced89d47216287d0fb19618d7419", + "name": "gold:Gp0119856.bins.1.fa", + "description": "metabat2 binned contig file for gold:Gp0119856", + "file_size_bytes": 922904, + "url": "https://data.microbiomedata.org/data/1777_95825/img_MAGs/metabat-bins/bins.1.fa", + "type": "nmdc:DataObject", + "data_object_type": "Metagenome Bins" + }, + { + "_id": { + "$oid": "649b003f1ae706d7b5b16c69" + }, + "description": "EC TSV File for gold:Gp0119856", + "url": "https://data.microbiomedata.org/1777_95825/img_annotation/Ga0482159_ec.tsv", + "md5_checksum": "e1c28a3028e3fb994d48c67c3cad4ab4", + "file_size_bytes": 3385, + "id": "nmdc:e1c28a3028e3fb994d48c67c3cad4ab4", + "name": "gold:Gp0119856_EC TSV File", + "data_object_type": "Annotation Enzyme Commission", + "type": "nmdc:DataObject" + }, + { + "_id": { + "$oid": "649b003f1ae706d7b5b16c6b" + }, + "description": "Functional annotation GFF file for gold:Gp0119856", + "url": "https://data.microbiomedata.org/1777_95825/img_annotation/Ga0482159_functional_annotation.gff", + "md5_checksum": "5726d2d4081cd3a257e15e98f5a45f8c", + "file_size_bytes": 3385, + "id": "nmdc:5726d2d4081cd3a257e15e98f5a45f8c", + "name": "gold:Gp0119856_Functional annotation GFF file", + "data_object_type": "Functional Annotation GFF", + "type": "nmdc:DataObject" + }, + { + "_id": { + "$oid": "649b003f1ae706d7b5b16c6c" + }, + "description": "Protein FAA for gold:Gp0119856", + "url": "https://data.microbiomedata.org/1777_95825/img_annotation/Ga0482159_proteins.faa", + "md5_checksum": "42164be43a9ab6da393c7d48825e289e", + "file_size_bytes": 3385, + "id": "nmdc:42164be43a9ab6da393c7d48825e289e", + "name": "gold:Gp0119856_Protein FAA", + "data_object_type": "Annotation Amino Acid FASTA", + "type": "nmdc:DataObject" + }, + { + "_id": { + "$oid": "649b003f1ae706d7b5b16c6e" + }, + "description": "Structural annotation GFF file for gold:Gp0119856", + "url": "https://data.microbiomedata.org/1777_95825/img_annotation/Ga0482159_structural_annotation.gff", + "md5_checksum": "72aaf6784de9aa35be58e12898bff353", + "file_size_bytes": 3385, + "id": "nmdc:72aaf6784de9aa35be58e12898bff353", + "name": "gold:Gp0119856_Structural annotation GFF file", + "data_object_type": "Structural Annotation GFF", + "type": "nmdc:DataObject" + }, + { + "_id": { + "$oid": "649b003f1ae706d7b5b16c75" + }, + "description": "KO TSV File for gold:Gp0119856", + "url": "https://data.microbiomedata.org/1777_95825/img_annotation/Ga0482159_ko.tsv", + "md5_checksum": "41b3a8590d7ad384f5a92bfd74267f5f", + "file_size_bytes": 3385, + "id": "nmdc:41b3a8590d7ad384f5a92bfd74267f5f", + "name": "gold:Gp0119856_KO TSV File", + "data_object_type": "Annotation KEGG Orthology", + "type": "nmdc:DataObject" + } + ], + "mags_activity_set": [ + { + "_id": { + "$oid": "649b0052ec087f6bbab346f8" + }, + "has_input": [ + "nmdc:a3ab9cc161429504fed67fb8cce116ec", + "nmdc:10a5dbedea8e37bc7e72e9a39555ab82", + "nmdc:3cf2fb1d0e65a8792d423b1d68390928" + ], + "too_short_contig_num": 18329, + "part_of": [ + "nmdc:mga03ps698" + ], + "binned_contig_num": 352, + "git_url": "https://github.com/microbiomedata/metaG/releases/tag/0.1", + "has_output": [ + "nmdc:fc6ac370d7165bd692b28402078c46cc", + "nmdc:bdc87a797fc694cc54c4c9c47f8aae7b" + ], + "was_informed_by": "gold:Gp0119856", + "input_contig_num": 20170, + "id": "nmdc:01f8c294a098bd6902ee3c3476bae40c", + "execution_resource": "NERSC-Cori", + "name": "MAGs Analysis Activity for nmdc:mga03ps698", + "mags_list": [ + { + "number_of_contig": 21, + "completeness": 98.21, + "bin_name": "bins.1", + "gene_count": 2054, + "bin_quality": "HQ", + "gtdbtk_species": "Thermotoga petrophila", + "gtdbtk_order": "Thermotogales", + "num_16s": 1, + "gtdbtk_family": "Thermotogaceae", + "gtdbtk_domain": "Bacteria", + "contamination": 1.79, + "gtdbtk_class": "Thermotogae", + "gtdbtk_phylum": "Thermotogota", + "num_5s": 1, + "num_23s": 1, + "gtdbtk_genus": "Thermotoga", + "num_t_rna": 47 + }, + { + "number_of_contig": 224, + "completeness": 60.06, + "bin_name": "bins.2", + "gene_count": 1586, + "bin_quality": "MQ", + "gtdbtk_species": "Thermoanaerobacter pseudethanolicus", + "gtdbtk_order": "Thermoanaerobacterales", + "num_16s": 2, + "gtdbtk_family": "Thermoanaerobacteraceae", + "gtdbtk_domain": "Bacteria", + "contamination": 2.15, + "gtdbtk_class": "Thermoanaerobacteria", + "gtdbtk_phylum": "Firmicutes_A", + "num_5s": 0, + "num_23s": 0, + "gtdbtk_genus": "Thermoanaerobacter", + "num_t_rna": 28 + }, + { + "number_of_contig": 107, + "completeness": 95.05, + "bin_name": "bins.3", + "gene_count": 2098, + "bin_quality": "HQ", + "gtdbtk_species": "Thermococcus_A sp000430485", + "gtdbtk_order": "Thermococcales", + "num_16s": 1, + "gtdbtk_family": "Thermococcaceae", + "gtdbtk_domain": "Archaea", + "contamination": 0.5, + "gtdbtk_class": "Thermococci", + "gtdbtk_phylum": "Euryarchaeota", + "num_5s": 2, + "num_23s": 1, + "gtdbtk_genus": "Thermococcus_A", + "num_t_rna": 45 + } + ], + "unbinned_contig_num": 1489, + "started_at_time": "2021-12-01T21:22:50Z", + "type": "nmdc:MAGsAnalysisActivity", + "ended_at_time": "2021-12-02T21:01:27+00:00" + } + ], + "metagenome_annotation_activity_set": [ + { + "_id": { + "$oid": "649b005bbf2caae0415ef994" + }, + "has_input": [ + "nmdc:a3ab9cc161429504fed67fb8cce116ec" + ], + "part_of": [ + "nmdc:mga03ps698" + ], + "git_url": "https://github.com/microbiomedata/metaG/releases/tag/0.1", + "has_output": [ + "nmdc:ac3db9c6705623843ae6ef1456d0fdc0", + "nmdc:359ace71019714576182a050ca7526c8", + "nmdc:3cf2fb1d0e65a8792d423b1d68390928", + "nmdc:c65d4074f13e42af10ed393cbdf74011", + "nmdc:f7ff0230f761d924ed76dc665ea67adb", + "nmdc:f36ee37f47256c89578a3e4a1231672a", + "nmdc:e58fc865d06b02c7982a680beb6d321f", + "nmdc:a17ce1fd6ffd590bf3dbfc0efdae126c", + "nmdc:2d24478e3ed2312e2244df7510214235", + "nmdc:afa06cef0742ab1b012cb4d8fb58ff90", + "nmdc:28f48ddfad55e71f4c5e5b54306e2425", + "nmdc:beb8c2b58d2a96de62b2a429404deb91" + ], + "was_informed_by": "gold:Gp0119856", + "id": "nmdc:01f8c294a098bd6902ee3c3476bae40c", + "execution_resource": "NERSC-Cori", + "name": "Annotation Activity for nmdc:mga03ps698", + "started_at_time": "2021-12-01T21:22:50Z", + "type": "nmdc:MetagenomeAnnotationActivity", + "ended_at_time": "2021-12-02T21:01:27+00:00" + } + ], + "metagenome_assembly_set": [ + { + "_id": { + "$oid": "649b005f2ca5ee4adb139f8a" + }, + "has_input": [ + "nmdc:e985ccfb7ad711809e81832bfb35ba15" + ], + "part_of": [ + "nmdc:mga03ps698" + ], + "ctg_logsum": 109167, + "scaf_logsum": 110427, + "gap_pct": 0.01954, + "git_url": "https://github.com/microbiomedata/metaG/releases/tag/0.1", + "has_output": [ + "nmdc:a3ab9cc161429504fed67fb8cce116ec", + "nmdc:0b6f8c1befc1be6a6ac46288756abee5", + "nmdc:5e1d739c320b8679354fb44983d377f7", + "nmdc:176c6d024d0f9ff34a9e5a0d8b94a0af", + "nmdc:10a5dbedea8e37bc7e72e9a39555ab82" + ], + "asm_score": 22.419, + "was_informed_by": "gold:Gp0119856", + "ctg_powsum": 16454, + "scaf_max": 505165, + "id": "nmdc:01f8c294a098bd6902ee3c3476bae40c", + "scaf_powsum": 16821, + "execution_resource": "NERSC-Cori", + "contigs": 20175, + "name": "Assembly Activity for nmdc:mga03ps698", + "ctg_max": 505165, + "gc_std": 0.1487, + "gc_avg": 0.47021, + "contig_bp": 14635548, + "started_at_time": "2021-12-01T21:22:50Z", + "scaf_bp": 14638408, + "type": "nmdc:MetagenomeAssembly", + "scaffolds": 20015, + "ended_at_time": "2021-12-02T21:01:27+00:00", + "ctg_l50": 1481, + "ctg_l90": 285, + "ctg_n50": 1211, + "ctg_n90": 15261, + "scaf_l50": 1534, + "scaf_l90": 285, + "scaf_n50": 1169, + "scaf_n90": 15121, + "scaf_l_gt50k": 1883977, + "scaf_n_gt50k": 15, + "scaf_pct_gt50k": 12.870094 + } + ], + "omics_processing_set": [ + { + "_id": { + "$oid": "649b009773e8249959349b72" + }, + "id": "nmdc:omprc-11-52xgyt85", + "name": "Deep subsurface shale carbon reservoir microbial communities from Ohio, USA - Utica-2 Time Series 2014_1_29", + "description": "Microbial controls on biogeochemical cycling in deep subsurface shale carbon reservoirs", + "has_input": [ + "nmdc:bsm-11-s7pavy15" + ], + "has_output": [ + "jgi:560df5ad0d878540fd6fe1ed" + ], + "part_of": [ + "nmdc:sty-11-8fb6t785" + ], + "add_date": "2015-08-15", + "mod_date": "2020-04-05", + "ncbi_project_name": "Deep subsurface shale carbon reservoir microbial communities from Ohio, USA - Utica-2 Time Series 2014_1_29", + "omics_type": { + "has_raw_value": "Metagenome" + }, + "principal_investigator": { + "has_raw_value": "Kelly Wrighton" + }, + "processing_institution": "JGI", + "type": "nmdc:OmicsProcessing", + "gold_sequencing_project_identifiers": [ + "gold:Gp0119856" + ] + } + ], + "read_qc_analysis_activity_set": [ + { + "_id": { + "$oid": "649b009d6bdd4fd20273c855" + }, + "has_input": [ + "nmdc:d9ed7c9e7af7b1a6952763fca8b22eec" + ], + "part_of": [ + "nmdc:mga03ps698" + ], + "git_url": "https://github.com/microbiomedata/metaG/releases/tag/0.1", + "has_output": [ + "nmdc:e985ccfb7ad711809e81832bfb35ba15", + "nmdc:b929eef65de7815dea35f015046c580d" + ], + "was_informed_by": "gold:Gp0119856", + "input_read_count": 74370278, + "output_read_bases": 10790123501, + "id": "nmdc:01f8c294a098bd6902ee3c3476bae40c", + "execution_resource": "NERSC-Cori", + "input_read_bases": 11229911978, + "name": "Read QC Activity for nmdc:mga03ps698", + "output_read_count": 73460412, + "started_at_time": "2021-12-01T21:22:50Z", + "type": "nmdc:ReadQCAnalysisActivity", + "ended_at_time": "2021-12-02T21:01:27+00:00" + } + ], + "read_based_taxonomy_analysis_activity_set": [ + { + "_id": { + "$oid": "649b009bff710ae353f8cf10" + }, + "has_input": [ + "nmdc:e985ccfb7ad711809e81832bfb35ba15" + ], + "git_url": "https://github.com/microbiomedata/metaG/releases/tag/0.1", + "has_output": [ + "nmdc:96078df96cab0402cb61d340dcefa622", + "nmdc:fbda66832ef6e779f6f3c138f5433317", + "nmdc:da1a46379e44457942130c21038ded8a", + "nmdc:961e680c8d9cc4b454f987890c12384b", + "nmdc:2ed6765cefd8223324f0f96e980a3247", + "nmdc:9b0719633d97e05c945c98ef12343fb0", + "nmdc:0e45e4ee21e1b51b116bbd94a4476718", + "nmdc:4d067967e956efc580f838a72b93751d", + "nmdc:f7adc2159a0bbc6344542d41632ea3b2" + ], + "was_informed_by": "gold:Gp0119856", + "id": "nmdc:01f8c294a098bd6902ee3c3476bae40c", + "execution_resource": "NERSC-Cori", + "name": "ReadBased Analysis Activity for nmdc:mga03ps698", + "started_at_time": "2021-12-01T21:22:50Z", + "type": "nmdc:ReadbasedAnalysis", + "ended_at_time": "2021-12-02T21:01:27+00:00" + } + ] + }, + { + "data_object_set": [ + { + "description": "Raw sequencer read data", + "file_size_bytes": 7539787989, + "type": "nmdc:DataObject", + "id": "jgi:560df5b00d878540fd6fe1f1", + "name": "9567.7.137562.TCCTGAG-GCGTAAG.fastq.gz" + }, + { + "name": "Gp0119859_Filtered Reads", + "description": "Filtered Reads for Gp0119859", + "data_object_type": "Filtered Sequencing Reads", + "url": "https://data.microbiomedata.org/data/nmdc:mga02ft906/qa/nmdc_mga02ft906_filtered.fastq.gz", + "md5_checksum": "55301513fa1cf4c3beb9a81f6e506295", + "id": "nmdc:55301513fa1cf4c3beb9a81f6e506295", + "file_size_bytes": 4145401042 + }, + { + "name": "Gp0119859_Filtered Stats", + "description": "Filtered Stats for Gp0119859", + "data_object_type": "QC Statistics", + "url": "https://data.microbiomedata.org/data/nmdc:mga02ft906/qa/nmdc_mga02ft906_filterStats.txt", + "md5_checksum": "70ccd6640e1b5690e2c1033793c27c8c", + "id": "nmdc:70ccd6640e1b5690e2c1033793c27c8c", + "file_size_bytes": 285 + }, + { + "name": "Gp0119859_Gottcha2 TSV report", + "description": "Gottcha2 TSV report for Gp0119859", + "url": "https://data.microbiomedata.org/data/nmdc:mga02ft906/ReadbasedAnalysis/nmdc_mga02ft906_gottcha2_report.tsv", + "md5_checksum": "5dada7666e44fe7fe683311c19779393", + "id": "nmdc:5dada7666e44fe7fe683311c19779393", + "file_size_bytes": 2936 + }, + { + "name": "Gp0119859_Gottcha2 full TSV report", + "description": "Gottcha2 full TSV report for Gp0119859", + "url": "https://data.microbiomedata.org/data/nmdc:mga02ft906/ReadbasedAnalysis/nmdc_mga02ft906_gottcha2_report_full.tsv", + "md5_checksum": "31ab4b19e0bff7e3b91fd70caa5e6eb5", + "id": "nmdc:31ab4b19e0bff7e3b91fd70caa5e6eb5", + "file_size_bytes": 92287 + }, + { + "name": "Gp0119859_Gottcha2 Krona HTML report", + "description": "Gottcha2 Krona HTML report for Gp0119859", + "data_object_type": "GOTTCHA2 Krona Plot", + "url": "https://data.microbiomedata.org/data/nmdc:mga02ft906/ReadbasedAnalysis/nmdc_mga02ft906_gottcha2_krona.html", + "md5_checksum": "05b48152583979ce9fb85348f8cbc62c", + "id": "nmdc:05b48152583979ce9fb85348f8cbc62c", + "file_size_bytes": 235644 + }, + { + "name": "Gp0119859_Centrifuge classification TSV report", + "description": "Centrifuge classification TSV report for Gp0119859", + "data_object_type": "Centrifuge Taxonomic Classification", + "url": "https://data.microbiomedata.org/data/nmdc:mga02ft906/ReadbasedAnalysis/nmdc_mga02ft906_centrifuge_classification.tsv", + "md5_checksum": "8703b36fa8d6e2c6a96c50ad02282970", + "id": "nmdc:8703b36fa8d6e2c6a96c50ad02282970", + "file_size_bytes": 5180945354 + }, + { + "name": "Gp0119859_Centrifuge TSV report", + "description": "Centrifuge TSV report for Gp0119859", + "data_object_type": "Centrifuge Classification Report", + "url": "https://data.microbiomedata.org/data/nmdc:mga02ft906/ReadbasedAnalysis/nmdc_mga02ft906_centrifuge_report.tsv", + "md5_checksum": "2ef9dda0235c23fc6fe4c40e8d8d83b6", + "id": "nmdc:2ef9dda0235c23fc6fe4c40e8d8d83b6", + "file_size_bytes": 217529 + }, + { + "name": "Gp0119859_Centrifuge Krona HTML report", + "description": "Centrifuge Krona HTML report for Gp0119859", + "data_object_type": "Centrifuge Krona Plot", + "url": "https://data.microbiomedata.org/data/nmdc:mga02ft906/ReadbasedAnalysis/nmdc_mga02ft906_centrifuge_krona.html", + "md5_checksum": "eceb995fbde7adc664fae108bd715006", + "id": "nmdc:eceb995fbde7adc664fae108bd715006", + "file_size_bytes": 2146318 + }, + { + "name": "Gp0119859_Kraken classification TSV report", + "description": "Kraken classification TSV report for Gp0119859", + "data_object_type": "Kraken2 Taxonomic Classification", + "url": "https://data.microbiomedata.org/data/nmdc:mga02ft906/ReadbasedAnalysis/nmdc_mga02ft906_kraken2_classification.tsv", + "md5_checksum": "bd077ec8550f6e4f46d2bd9149a4a35d", + "id": "nmdc:bd077ec8550f6e4f46d2bd9149a4a35d", + "file_size_bytes": 4493838910 + }, + { + "name": "Gp0119859_Kraken2 TSV report", + "description": "Kraken2 TSV report for Gp0119859", + "data_object_type": "Kraken2 Classification Report", + "url": "https://data.microbiomedata.org/data/nmdc:mga02ft906/ReadbasedAnalysis/nmdc_mga02ft906_kraken2_report.tsv", + "md5_checksum": "45e6c74aef6398cf5b33478ff53d2ae8", + "id": "nmdc:45e6c74aef6398cf5b33478ff53d2ae8", + "file_size_bytes": 457745 + }, + { + "name": "Gp0119859_Kraken2 Krona HTML report", + "description": "Kraken2 Krona HTML report for Gp0119859", + "data_object_type": "Kraken2 Krona Plot", + "url": "https://data.microbiomedata.org/data/nmdc:mga02ft906/ReadbasedAnalysis/nmdc_mga02ft906_kraken2_krona.html", + "md5_checksum": "f820e44088dd97b465a5ca40422cc906", + "id": "nmdc:f820e44088dd97b465a5ca40422cc906", + "file_size_bytes": 2993202 + }, + { + "name": "Gp0119859_Assembled contigs fasta", + "description": "Assembled contigs fasta for Gp0119859", + "data_object_type": "Assembly Contigs", + "url": "https://data.microbiomedata.org/data/nmdc:mga02ft906/assembly/nmdc_mga02ft906_contigs.fna", + "md5_checksum": "8a5e0e081df6642a15c35fe35061035b", + "id": "nmdc:8a5e0e081df6642a15c35fe35061035b", + "file_size_bytes": 9080938 + }, + { + "name": "Gp0119859_Assembled scaffold fasta", + "description": "Assembled scaffold fasta for Gp0119859", + "data_object_type": "Assembly Scaffolds", + "url": "https://data.microbiomedata.org/data/nmdc:mga02ft906/assembly/nmdc_mga02ft906_scaffolds.fna", + "md5_checksum": "22fe2044be672c6269ce0626e2bb3180", + "id": "nmdc:22fe2044be672c6269ce0626e2bb3180", + "file_size_bytes": 9077450 + }, + { + "name": "Gp0119859_Metagenome Contig Coverage Stats", + "description": "Metagenome Contig Coverage Stats for Gp0119859", + "url": "https://data.microbiomedata.org/data/nmdc:mga02ft906/assembly/nmdc_mga02ft906_covstats.txt", + "md5_checksum": "e4ef64953cba2887dc4274ff3b6030d7", + "id": "nmdc:e4ef64953cba2887dc4274ff3b6030d7", + "file_size_bytes": 179756 + }, + { + "name": "Gp0119859_Assembled AGP file", + "description": "Assembled AGP file for Gp0119859", + "data_object_type": "Assembly AGP", + "url": "https://data.microbiomedata.org/data/nmdc:mga02ft906/assembly/nmdc_mga02ft906_assembly.agp", + "md5_checksum": "c94be208f5c58a2d6261f6713390b5ab", + "id": "nmdc:c94be208f5c58a2d6261f6713390b5ab", + "file_size_bytes": 153852 + }, + { + "name": "Gp0119859_Metagenome Alignment BAM file", + "description": "Metagenome Alignment BAM file for Gp0119859", + "data_object_type": "Assembly Coverage BAM", + "url": "https://data.microbiomedata.org/data/nmdc:mga02ft906/assembly/nmdc_mga02ft906_pairedMapped_sorted.bam", + "md5_checksum": "8e8fa21850844a9a4f432961c8a291f0", + "id": "nmdc:8e8fa21850844a9a4f432961c8a291f0", + "file_size_bytes": 5248099434 + }, + { + "name": "Gp0119859_Protein FAA", + "description": "Protein FAA for Gp0119859", + "data_object_type": "Annotation Amino Acid FASTA", + "url": "https://data.microbiomedata.org/data/nmdc:mga02ft906/annotation/nmdc_mga02ft906_proteins.faa", + "md5_checksum": "49b4b5f2ef568d9dcfa34929d4c29f77", + "id": "nmdc:49b4b5f2ef568d9dcfa34929d4c29f77", + "file_size_bytes": 3857419 + }, + { + "name": "Gp0119859_Structural annotation GFF file", + "description": "Structural annotation GFF file for Gp0119859", + "data_object_type": "Structural Annotation GFF", + "url": "https://data.microbiomedata.org/data/nmdc:mga02ft906/annotation/nmdc_mga02ft906_structural_annotation.gff", + "md5_checksum": "d9581041bb886b0491548a06e9a3079f", + "id": "nmdc:d9581041bb886b0491548a06e9a3079f", + "file_size_bytes": 2478 + }, + { + "name": "Gp0119859_Functional annotation GFF file", + "description": "Functional annotation GFF file for Gp0119859", + "data_object_type": "Functional Annotation GFF", + "url": "https://data.microbiomedata.org/data/nmdc:mga02ft906/annotation/nmdc_mga02ft906_functional_annotation.gff", + "md5_checksum": "84e0b4e0a5d9aa137a28cb45d4578d8a", + "id": "nmdc:84e0b4e0a5d9aa137a28cb45d4578d8a", + "file_size_bytes": 3090236 + }, + { + "name": "Gp0119859_KO TSV file", + "description": "KO TSV file for Gp0119859", + "data_object_type": "Annotation KEGG Orthology", + "url": "https://data.microbiomedata.org/data/nmdc:mga02ft906/annotation/nmdc_mga02ft906_ko.tsv", + "md5_checksum": "7f9d2c3366f6932310f36ad14da9b2b3", + "id": "nmdc:7f9d2c3366f6932310f36ad14da9b2b3", + "file_size_bytes": 432559 + }, + { + "name": "Gp0119859_EC TSV file", + "description": "EC TSV file for Gp0119859", + "data_object_type": "Annotation Enzyme Commission", + "url": "https://data.microbiomedata.org/data/nmdc:mga02ft906/annotation/nmdc_mga02ft906_ec.tsv", + "md5_checksum": "1ea625fe7e5a6d94825c07d58aa184ea", + "id": "nmdc:1ea625fe7e5a6d94825c07d58aa184ea", + "file_size_bytes": 252592 + }, + { + "name": "Gp0119859_COG GFF file", + "description": "COG GFF file for Gp0119859", + "url": "https://data.microbiomedata.org/data/nmdc:mga02ft906/annotation/nmdc_mga02ft906_cog.gff", + "md5_checksum": "ef42481ebb4520759e56170c2d6f48b9", + "id": "nmdc:ef42481ebb4520759e56170c2d6f48b9", + "file_size_bytes": 2175422 + }, + { + "name": "Gp0119859_PFAM GFF file", + "description": "PFAM GFF file for Gp0119859", + "url": "https://data.microbiomedata.org/data/nmdc:mga02ft906/annotation/nmdc_mga02ft906_pfam.gff", + "md5_checksum": "6d587d4acb37f3cd2f1ca6666f66f026", + "id": "nmdc:6d587d4acb37f3cd2f1ca6666f66f026", + "file_size_bytes": 2367144 + }, + { + "name": "Gp0119859_TigrFam GFF file", + "description": "TigrFam GFF file for Gp0119859", + "url": "https://data.microbiomedata.org/data/nmdc:mga02ft906/annotation/nmdc_mga02ft906_tigrfam.gff", + "md5_checksum": "6dae7bc00ee8ab493bab36c2fd6eaceb", + "id": "nmdc:6dae7bc00ee8ab493bab36c2fd6eaceb", + "file_size_bytes": 584841 + }, + { + "name": "Gp0119859_SMART GFF file", + "description": "SMART GFF file for Gp0119859", + "url": "https://data.microbiomedata.org/data/nmdc:mga02ft906/annotation/nmdc_mga02ft906_smart.gff", + "md5_checksum": "612ca1aba943797556bcea7a5155fdaa", + "id": "nmdc:612ca1aba943797556bcea7a5155fdaa", + "file_size_bytes": 790398 + }, + { + "name": "Gp0119859_SuperFam GFF file", + "description": "SuperFam GFF file for Gp0119859", + "url": "https://data.microbiomedata.org/data/nmdc:mga02ft906/annotation/nmdc_mga02ft906_supfam.gff", + "md5_checksum": "0ce402a4e456d36283b5baec876dc047", + "id": "nmdc:0ce402a4e456d36283b5baec876dc047", + "file_size_bytes": 3227125 + }, + { + "name": "Gp0119859_Cath FunFam GFF file", + "description": "Cath FunFam GFF file for Gp0119859", + "url": "https://data.microbiomedata.org/data/nmdc:mga02ft906/annotation/nmdc_mga02ft906_cath_funfam.gff", + "md5_checksum": "04209b50ca91fe78339fa1b5828705f9", + "id": "nmdc:04209b50ca91fe78339fa1b5828705f9", + "file_size_bytes": 3123827 + }, + { + "name": "Gp0119859_KO_EC GFF file", + "description": "KO_EC GFF file for Gp0119859", + "url": "https://data.microbiomedata.org/data/nmdc:mga02ft906/annotation/nmdc_mga02ft906_ko_ec.gff", + "md5_checksum": "a9b8d31bee697075aa24c6fddcd0abb6", + "id": "nmdc:a9b8d31bee697075aa24c6fddcd0abb6", + "file_size_bytes": 1531769 + }, + { + "name": "gold:Gp0452679_metabat2 bin checkm quality assessment result", + "description": "metabat2 bin checkm quality assessment result for gold:Gp0452679", + "data_object_type": "CheckM Statistics", + "url": "https://data.microbiomedata.org/data/nmdc:mga0fsjy84/MAGs/nmdc_mga0fsjy84_checkm_qa.out", + "md5_checksum": "d41d8cd98f00b204e9800998ecf8427e", + "id": "nmdc:d41d8cd98f00b204e9800998ecf8427e", + "file_size_bytes": 0 + }, + { + "name": "Gp0119859_tooShort (< 3kb) filtered contigs fasta file by metaBat2", + "description": "tooShort (< 3kb) filtered contigs fasta file by metaBat2 for Gp0119859", + "url": "https://data.microbiomedata.org/data/nmdc:mga02ft906/MAGs/nmdc_mga02ft906_bins.tooShort.fa", + "md5_checksum": "18416fae07b292d815e9f051aa657c8c", + "id": "nmdc:18416fae07b292d815e9f051aa657c8c", + "file_size_bytes": 576702 + }, + { + "name": "Gp0119859_unbinned fasta file from metabat2", + "description": "unbinned fasta file from metabat2 for Gp0119859", + "url": "https://data.microbiomedata.org/data/nmdc:mga02ft906/MAGs/nmdc_mga02ft906_bins.unbinned.fa", + "md5_checksum": "87748578fd75df6d933c621acf36f753", + "id": "nmdc:87748578fd75df6d933c621acf36f753", + "file_size_bytes": 3605485 + }, + { + "name": "Gp0119859_metabat2 bin checkm quality assessment result", + "description": "metabat2 bin checkm quality assessment result for Gp0119859", + "data_object_type": "CheckM Statistics", + "url": "https://data.microbiomedata.org/data/nmdc:mga02ft906/MAGs/nmdc_mga02ft906_checkm_qa.out", + "md5_checksum": "4ba7180367f554b42b01980aeb356e84", + "id": "nmdc:4ba7180367f554b42b01980aeb356e84", + "file_size_bytes": 1106 + }, + { + "name": "Gp0119859_high-quality and medium-quality bins", + "description": "high-quality and medium-quality bins for Gp0119859", + "data_object_type": "Metagenome Bins", + "url": "https://data.microbiomedata.org/data/nmdc:mga02ft906/MAGs/nmdc_mga02ft906_hqmq_bin.zip", + "md5_checksum": "38a3d661a97167cbd21f6f845dcc885f", + "id": "nmdc:38a3d661a97167cbd21f6f845dcc885f", + "file_size_bytes": 1375434 + }, + { + "name": "Gp0119859_metabat2 bins", + "description": "metabat2 bins for Gp0119859", + "url": "https://data.microbiomedata.org/data/nmdc:mga02ft906/MAGs/nmdc_mga02ft906_metabat_bin.zip", + "md5_checksum": "306afd5c7ae5cf5854d164a6ccd4481c", + "id": "nmdc:306afd5c7ae5cf5854d164a6ccd4481c", + "file_size_bytes": 97825 + }, + { + "_id": { + "$oid": "649b003d1ae706d7b5b14dff" + }, + "description": "Metagenome Contig Coverage Stats for gold:Gp0119859", + "url": "https://data.microbiomedata.org/data/1777_95828/assembly/mapping_stats.txt", + "file_size_bytes": 169321, + "type": "nmdc:DataObject", + "id": "nmdc:cc0064b459393626d3a65d72d9fcb2e0", + "name": "mapping_stats.txt", + "data_object_type": "Assembly Coverage Stats" + }, + { + "_id": { + "$oid": "649b003d1ae706d7b5b14e02" + }, + "description": "Assembled AGP file for gold:Gp0119859", + "url": "https://data.microbiomedata.org/data/1777_95828/assembly/assembly.agp", + "file_size_bytes": 132242, + "type": "nmdc:DataObject", + "id": "nmdc:826b89ce806bbe81a36dbfd23e207bc6", + "name": "assembly.agp", + "data_object_type": "Assembly AGP" + }, + { + "_id": { + "$oid": "649b003d1ae706d7b5b14e03" + }, + "description": "Assembled contigs fasta for gold:Gp0119859", + "url": "https://data.microbiomedata.org/data/1777_95828/assembly/assembly_contigs.fna", + "file_size_bytes": 9070503, + "type": "nmdc:DataObject", + "id": "nmdc:b6adc0392be1b4d327e57c7630de47f5", + "name": "assembly_contigs.fna", + "data_object_type": "Assembly Contigs" + }, + { + "_id": { + "$oid": "649b003d1ae706d7b5b14e05" + }, + "description": "Assembled scaffold fasta for gold:Gp0119859", + "url": "https://data.microbiomedata.org/data/1777_95828/assembly/assembly_scaffolds.fna", + "file_size_bytes": 9067375, + "type": "nmdc:DataObject", + "id": "nmdc:9cc380cf30113ab5e16b404d454c2282", + "name": "assembly_scaffolds.fna", + "data_object_type": "Assembly Scaffolds" + }, + { + "_id": { + "$oid": "649b003d1ae706d7b5b14e06" + }, + "description": "Metagenome Alignment BAM file for gold:Gp0119859", + "url": "https://data.microbiomedata.org/data/1777_95828/assembly/pairedMapped_sorted.bam", + "file_size_bytes": 5210168320, + "type": "nmdc:DataObject", + "id": "nmdc:905e62a4d4cc702a82ac27c370357cef", + "name": "pairedMapped_sorted.bam", + "data_object_type": "Assembly Coverage BAM" + }, + { + "_id": { + "$oid": "649b003d1ae706d7b5b15a67" + }, + "id": "nmdc:0510e49468dd5bffb41da0762e789fbb", + "name": "1777_95828.krona.html", + "description": "Gold:Gp0119859 KRONA plot HTML file", + "url": "https://data.microbiomedata.org/1777_95828/ReadbasedAnalysis/centrifuge/1777_95828.krona.html", + "file_size_bytes": 3385, + "data_object_type": "Centrifuge Krona Plot", + "type": "nmdc:DataObject" + }, + { + "_id": { + "$oid": "649b003d1ae706d7b5b15a6e" + }, + "id": "nmdc:3ef5d8ebe98ee7f901f9ee59e1722ea8", + "name": "1777_95828.json", + "description": "Gold:Gp0119859 ReadbasedAnalysis result JSON file", + "url": "https://data.microbiomedata.org/1777_95828/ReadbasedAnalysis/1777_95828.json", + "file_size_bytes": 3385, + "type": "nmdc:DataObject" + }, + { + "_id": { + "$oid": "649b003f1ae706d7b5b1637d" + }, + "id": "nmdc:0841cb4763a9f4e742a4494137ede6ba", + "name": "checkm_qa.out", + "description": "metabat2 bin checkm quality assessment result for gold:Gp0119859", + "file_size_bytes": 1106, + "url": "https://data.microbiomedata.org/data/1777_95828/img_MAGs/checkm_qa.out", + "type": "nmdc:DataObject", + "data_object_type": "CheckM Statistics" + }, + { + "_id": { + "$oid": "649b003f1ae706d7b5b16380" + }, + "id": "nmdc:e6d3c18dfffca7adaddf7e80c3ed8b37", + "name": "bins.tooShort.fa", + "description": "tooShort (< 3kb) filtered contigs fasta file by metaBat2 for gold:Gp0119859", + "file_size_bytes": 560887, + "url": "https://data.microbiomedata.org/data/1777_95828/img_MAGs/bins.tooShort.fa", + "type": "nmdc:DataObject" + }, + { + "_id": { + "$oid": "649b003f1ae706d7b5b16381" + }, + "id": "nmdc:3d9e7821cc4bee0d9b44fb188f3dc5a6", + "name": "bins.unbinned.fa", + "description": "unbinned fasta file from metabat2 for gold:Gp0119859", + "file_size_bytes": 7182173, + "url": "https://data.microbiomedata.org/data/1777_95828/img_MAGs/bins.unbinned.fa", + "type": "nmdc:DataObject" + }, + { + "_id": { + "$oid": "649b003f1ae706d7b5b16382" + }, + "id": "nmdc:a1be96f9ef222085276da8cf538bdb06", + "name": "gold:Gp0119859.bins.1.fa", + "description": "metabat2 binned contig file for gold:Gp0119859", + "file_size_bytes": 700299, + "url": "https://data.microbiomedata.org/data/1777_95828/img_MAGs/metabat-bins/bins.1.fa", + "type": "nmdc:DataObject", + "data_object_type": "Metagenome Bins" + }, + { + "_id": { + "$oid": "649b003f1ae706d7b5b16384" + }, + "id": "nmdc:58bdce6c9246741c8da7d765dc413519", + "name": "gold:Gp0119859.bins.2.fa", + "description": "metabat2 binned contig file for gold:Gp0119859", + "file_size_bytes": 379336, + "url": "https://data.microbiomedata.org/data/1777_95828/img_MAGs/metabat-bins/bins.2.fa", + "type": "nmdc:DataObject", + "data_object_type": "Metagenome Bins" + }, + { + "_id": { + "$oid": "649b003f1ae706d7b5b16388" + }, + "id": "nmdc:47a463d198ae362c2eb01631d791b868", + "name": "gold:Gp0119859.bins.3.fa", + "description": "metabat2 binned contig file for gold:Gp0119859", + "file_size_bytes": 232316, + "url": "https://data.microbiomedata.org/data/1777_95828/img_MAGs/metabat-bins/bins.3.fa", + "type": "nmdc:DataObject", + "data_object_type": "Metagenome Bins" + }, + { + "_id": { + "$oid": "649b003f1ae706d7b5b16c7c" + }, + "description": "EC TSV File for gold:Gp0119859", + "url": "https://data.microbiomedata.org/1777_95828/img_annotation/Ga0482156_ec.tsv", + "md5_checksum": "ed4bc44713888809173573d96114f11e", + "file_size_bytes": 3385, + "id": "nmdc:ed4bc44713888809173573d96114f11e", + "name": "gold:Gp0119859_EC TSV File", + "data_object_type": "Annotation Enzyme Commission", + "type": "nmdc:DataObject" + }, + { + "_id": { + "$oid": "649b003f1ae706d7b5b16c7d" + }, + "description": "KO TSV File for gold:Gp0119859", + "url": "https://data.microbiomedata.org/1777_95828/img_annotation/Ga0482156_ko.tsv", + "md5_checksum": "18cdf12e824a7fb6255dbcac79a2a9a0", + "file_size_bytes": 3385, + "id": "nmdc:18cdf12e824a7fb6255dbcac79a2a9a0", + "name": "gold:Gp0119859_KO TSV File", + "data_object_type": "Annotation KEGG Orthology", + "type": "nmdc:DataObject" + }, + { + "_id": { + "$oid": "649b003f1ae706d7b5b16c7e" + }, + "description": "Protein FAA for gold:Gp0119859", + "url": "https://data.microbiomedata.org/1777_95828/img_annotation/Ga0482156_proteins.faa", + "md5_checksum": "eb09f7f461ad13a47bf5609cdcfc853c", + "file_size_bytes": 3385, + "id": "nmdc:eb09f7f461ad13a47bf5609cdcfc853c", + "name": "gold:Gp0119859_Protein FAA", + "data_object_type": "Annotation Amino Acid FASTA", + "type": "nmdc:DataObject" + }, + { + "_id": { + "$oid": "649b003f1ae706d7b5b16c82" + }, + "description": "Structural annotation GFF file for gold:Gp0119859", + "url": "https://data.microbiomedata.org/1777_95828/img_annotation/Ga0482156_structural_annotation.gff", + "md5_checksum": "10128acfa04b42839c133fe298eab941", + "file_size_bytes": 3385, + "id": "nmdc:10128acfa04b42839c133fe298eab941", + "name": "gold:Gp0119859_Structural annotation GFF file", + "data_object_type": "Structural Annotation GFF", + "type": "nmdc:DataObject" + }, + { + "_id": { + "$oid": "649b003f1ae706d7b5b16c88" + }, + "description": "Functional annotation GFF file for gold:Gp0119859", + "url": "https://data.microbiomedata.org/1777_95828/img_annotation/Ga0482156_functional_annotation.gff", + "md5_checksum": "1729d54d35a1445759d4a85cbd4e305c", + "file_size_bytes": 3385, + "id": "nmdc:1729d54d35a1445759d4a85cbd4e305c", + "name": "gold:Gp0119859_Functional annotation GFF file", + "data_object_type": "Functional Annotation GFF", + "type": "nmdc:DataObject" + } + ], + "mags_activity_set": [ + { + "_id": { + "$oid": "649b0052ec087f6bbab346fa" + }, + "has_input": [ + "nmdc:8a5e0e081df6642a15c35fe35061035b", + "nmdc:8e8fa21850844a9a4f432961c8a291f0", + "nmdc:84e0b4e0a5d9aa137a28cb45d4578d8a" + ], + "too_short_contig_num": 1233, + "part_of": [ + "nmdc:mga02ft906" + ], + "binned_contig_num": 176, + "git_url": "https://github.com/microbiomedata/metaG/releases/tag/0.1", + "has_output": [ + "nmdc:d41d8cd98f00b204e9800998ecf8427e", + "nmdc:18416fae07b292d815e9f051aa657c8c", + "nmdc:87748578fd75df6d933c621acf36f753", + "nmdc:4ba7180367f554b42b01980aeb356e84", + "nmdc:38a3d661a97167cbd21f6f845dcc885f", + "nmdc:306afd5c7ae5cf5854d164a6ccd4481c" + ], + "was_informed_by": "gold:Gp0119859", + "input_contig_num": 2087, + "id": "nmdc:afcdd58d609459c62f95d7987781515e", + "execution_resource": "NERSC-Cori", + "name": "MAGs Analysis Activity for nmdc:mga02ft906", + "mags_list": [ + { + "number_of_contig": 55, + "completeness": 94.83, + "bin_name": "bins.1", + "gene_count": 2720, + "bin_quality": "HQ", + "gtdbtk_species": "Frackibacter sp900114655", + "gtdbtk_order": "Halobacteroidales", + "num_16s": 1, + "gtdbtk_family": "Acetohalobiaceae", + "gtdbtk_domain": "Bacteria", + "contamination": 0.03, + "gtdbtk_class": "Halanaerobiia", + "gtdbtk_phylum": "Firmicutes_F", + "num_5s": 4, + "num_23s": 2, + "gtdbtk_genus": "Frackibacter", + "num_t_rna": 71 + }, + { + "number_of_contig": 22, + "completeness": 0.0, + "bin_name": "bins.2", + "gene_count": 321, + "bin_quality": "LQ", + "gtdbtk_species": "", + "gtdbtk_order": "", + "num_16s": 0, + "gtdbtk_family": "", + "gtdbtk_domain": "", + "contamination": 0.0, + "gtdbtk_class": "", + "gtdbtk_phylum": "", + "num_5s": 0, + "num_23s": 0, + "gtdbtk_genus": "", + "num_t_rna": 1 + }, + { + "number_of_contig": 99, + "completeness": 87.83, + "bin_name": "bins.3", + "gene_count": 2085, + "bin_quality": "MQ", + "gtdbtk_species": "Halanaerobium congolense", + "gtdbtk_order": "Halanaerobiales", + "num_16s": 0, + "gtdbtk_family": "Halanaerobiaceae", + "gtdbtk_domain": "Bacteria", + "contamination": 0.29, + "gtdbtk_class": "Halanaerobiia", + "gtdbtk_phylum": "Firmicutes_F", + "num_5s": 2, + "num_23s": 0, + "gtdbtk_genus": "Halanaerobium", + "num_t_rna": 38 + } + ], + "unbinned_contig_num": 678, + "started_at_time": "2021-10-11T02:23:29Z", + "type": "nmdc:MAGsAnalysisActivity", + "ended_at_time": "2021-10-11T04:35:54+00:00" + } + ], + "metagenome_annotation_activity_set": [ + { + "_id": { + "$oid": "649b005bbf2caae0415ef997" + }, + "has_input": [ + "nmdc:8a5e0e081df6642a15c35fe35061035b" + ], + "part_of": [ + "nmdc:mga02ft906" + ], + "git_url": "https://github.com/microbiomedata/metaG/releases/tag/0.1", + "has_output": [ + "nmdc:49b4b5f2ef568d9dcfa34929d4c29f77", + "nmdc:d9581041bb886b0491548a06e9a3079f", + "nmdc:84e0b4e0a5d9aa137a28cb45d4578d8a", + "nmdc:7f9d2c3366f6932310f36ad14da9b2b3", + "nmdc:1ea625fe7e5a6d94825c07d58aa184ea", + "nmdc:ef42481ebb4520759e56170c2d6f48b9", + "nmdc:6d587d4acb37f3cd2f1ca6666f66f026", + "nmdc:6dae7bc00ee8ab493bab36c2fd6eaceb", + "nmdc:612ca1aba943797556bcea7a5155fdaa", + "nmdc:0ce402a4e456d36283b5baec876dc047", + "nmdc:04209b50ca91fe78339fa1b5828705f9", + "nmdc:a9b8d31bee697075aa24c6fddcd0abb6" + ], + "was_informed_by": "gold:Gp0119859", + "id": "nmdc:afcdd58d609459c62f95d7987781515e", + "execution_resource": "NERSC-Cori", + "name": "Annotation Activity for nmdc:mga02ft906", + "started_at_time": "2021-10-11T02:23:29Z", + "type": "nmdc:MetagenomeAnnotationActivity", + "ended_at_time": "2021-10-11T04:35:54+00:00" + } + ], + "metagenome_assembly_set": [ + { + "_id": { + "$oid": "649b005f2ca5ee4adb139f85" + }, + "has_input": [ + "nmdc:55301513fa1cf4c3beb9a81f6e506295" + ], + "part_of": [ + "nmdc:mga02ft906" + ], + "ctg_logsum": 120584, + "scaf_logsum": 121780, + "gap_pct": 0.04878, + "git_url": "https://github.com/microbiomedata/metaG/releases/tag/0.1", + "has_output": [ + "nmdc:8a5e0e081df6642a15c35fe35061035b", + "nmdc:22fe2044be672c6269ce0626e2bb3180", + "nmdc:e4ef64953cba2887dc4274ff3b6030d7", + "nmdc:c94be208f5c58a2d6261f6713390b5ab", + "nmdc:8e8fa21850844a9a4f432961c8a291f0" + ], + "asm_score": 31.142, + "was_informed_by": "gold:Gp0119859", + "ctg_powsum": 18995, + "scaf_max": 203457, + "id": "nmdc:afcdd58d609459c62f95d7987781515e", + "scaf_powsum": 19393, + "execution_resource": "NERSC-Cori", + "contigs": 2088, + "name": "Assembly Activity for nmdc:mga02ft906", + "ctg_max": 203457, + "gc_std": 0.05957, + "contig_bp": 8872877, + "gc_avg": 0.32862, + "started_at_time": "2021-10-11T02:23:29Z", + "scaf_bp": 8877207, + "type": "nmdc:MetagenomeAssembly", + "scaffolds": 2015, + "ended_at_time": "2021-10-11T04:35:54+00:00", + "ctg_l50": 24458, + "ctg_l90": 1845, + "ctg_n50": 89, + "ctg_n90": 596, + "scaf_l50": 28510, + "scaf_l90": 1949, + "scaf_n50": 81, + "scaf_n90": 548, + "scaf_l_gt50k": 2689766, + "scaf_n_gt50k": 35, + "scaf_pct_gt50k": 30.299688 + } + ], + "omics_processing_set": [ + { + "_id": { + "$oid": "649b009773e8249959349b73" + }, + "id": "nmdc:omprc-11-11rjpd98", + "name": "Deep subsurface shale carbon reservoir microbial communities from Ohio, USA - Utica-2 Time Series 2014_12_3", + "description": "Microbial controls on biogeochemical cycling in deep subsurface shale carbon reservoirs", + "has_input": [ + "nmdc:bsm-11-9798z411" + ], + "has_output": [ + "jgi:560df5b00d878540fd6fe1f1" + ], + "part_of": [ + "nmdc:sty-11-8fb6t785" + ], + "add_date": "2015-08-15", + "mod_date": "2020-04-05", + "ncbi_project_name": "Deep subsurface shale carbon reservoir microbial communities from Ohio, USA - Utica-2 Time Series 2014_12_3", + "omics_type": { + "has_raw_value": "Metagenome" + }, + "principal_investigator": { + "has_raw_value": "Kelly Wrighton" + }, + "processing_institution": "JGI", + "type": "nmdc:OmicsProcessing", + "gold_sequencing_project_identifiers": [ + "gold:Gp0119859" + ] + } + ], + "read_qc_analysis_activity_set": [ + { + "_id": { + "$oid": "649b009d6bdd4fd20273c856" + }, + "has_input": [ + "nmdc:4a60af64224ffd21361b2bc3e10701b9" + ], + "part_of": [ + "nmdc:mga02ft906" + ], + "git_url": "https://github.com/microbiomedata/metaG/releases/tag/0.1", + "has_output": [ + "nmdc:55301513fa1cf4c3beb9a81f6e506295", + "nmdc:70ccd6640e1b5690e2c1033793c27c8c" + ], + "was_informed_by": "gold:Gp0119859", + "input_read_count": 74142158, + "output_read_bases": 10807441756, + "id": "nmdc:afcdd58d609459c62f95d7987781515e", + "execution_resource": "NERSC-Cori", + "input_read_bases": 11195465858, + "name": "Read QC Activity for nmdc:mga02ft906", + "output_read_count": 73682820, + "started_at_time": "2021-10-11T02:23:29Z", + "type": "nmdc:ReadQCAnalysisActivity", + "ended_at_time": "2021-10-11T04:35:54+00:00" + } + ], + "read_based_taxonomy_analysis_activity_set": [ + { + "_id": { + "$oid": "649b009bff710ae353f8cf1b" + }, + "has_input": [ + "nmdc:55301513fa1cf4c3beb9a81f6e506295" + ], + "git_url": "https://github.com/microbiomedata/metaG/releases/tag/0.1", + "has_output": [ + "nmdc:5dada7666e44fe7fe683311c19779393", + "nmdc:31ab4b19e0bff7e3b91fd70caa5e6eb5", + "nmdc:05b48152583979ce9fb85348f8cbc62c", + "nmdc:8703b36fa8d6e2c6a96c50ad02282970", + "nmdc:2ef9dda0235c23fc6fe4c40e8d8d83b6", + "nmdc:eceb995fbde7adc664fae108bd715006", + "nmdc:bd077ec8550f6e4f46d2bd9149a4a35d", + "nmdc:45e6c74aef6398cf5b33478ff53d2ae8", + "nmdc:f820e44088dd97b465a5ca40422cc906" + ], + "was_informed_by": "gold:Gp0119859", + "id": "nmdc:afcdd58d609459c62f95d7987781515e", + "execution_resource": "NERSC-Cori", + "name": "ReadBased Analysis Activity for nmdc:mga02ft906", + "started_at_time": "2021-10-11T02:23:29Z", + "type": "nmdc:ReadbasedAnalysis", + "ended_at_time": "2021-10-11T04:35:54+00:00" + } + ] + }, + { + "data_object_set": [ + { + "description": "Raw sequencer read data", + "file_size_bytes": 12432659919, + "type": "nmdc:DataObject", + "id": "jgi:560df3680d878540fd6fe1bc", + "name": "9567.5.137550.CTTGTA.fastq.gz" + }, + { + "name": "Gp0119869_Filtered Reads", + "description": "Filtered Reads for Gp0119869", + "data_object_type": "Filtered Sequencing Reads", + "url": "https://data.microbiomedata.org/data/nmdc:mga0r4w677/qa/nmdc_mga0r4w677_filtered.fastq.gz", + "md5_checksum": "b2d2811e4dffdcf84f6a07748dbef4f3", + "id": "nmdc:b2d2811e4dffdcf84f6a07748dbef4f3", + "file_size_bytes": 6312551446 + }, + { + "name": "Gp0119869_Filtered Stats", + "description": "Filtered Stats for Gp0119869", + "data_object_type": "QC Statistics", + "url": "https://data.microbiomedata.org/data/nmdc:mga0r4w677/qa/nmdc_mga0r4w677_filterStats.txt", + "md5_checksum": "a4bc908d98cae11fa812b2da84e5ac11", + "id": "nmdc:a4bc908d98cae11fa812b2da84e5ac11", + "file_size_bytes": 296 + }, + { + "name": "Gp0119869_Gottcha2 TSV report", + "description": "Gottcha2 TSV report for Gp0119869", + "url": "https://data.microbiomedata.org/data/nmdc:mga0r4w677/ReadbasedAnalysis/nmdc_mga0r4w677_gottcha2_report.tsv", + "md5_checksum": "92ece69a1e806e56a3e0fdd7d711a44a", + "id": "nmdc:92ece69a1e806e56a3e0fdd7d711a44a", + "file_size_bytes": 1415 + }, + { + "name": "Gp0119869_Gottcha2 full TSV report", + "description": "Gottcha2 full TSV report for Gp0119869", + "url": "https://data.microbiomedata.org/data/nmdc:mga0r4w677/ReadbasedAnalysis/nmdc_mga0r4w677_gottcha2_report_full.tsv", + "md5_checksum": "869277bdc451ef159ec275dd3ea3704a", + "id": "nmdc:869277bdc451ef159ec275dd3ea3704a", + "file_size_bytes": 95601 + }, + { + "name": "Gp0119869_Gottcha2 Krona HTML report", + "description": "Gottcha2 Krona HTML report for Gp0119869", + "data_object_type": "GOTTCHA2 Krona Plot", + "url": "https://data.microbiomedata.org/data/nmdc:mga0r4w677/ReadbasedAnalysis/nmdc_mga0r4w677_gottcha2_krona.html", + "md5_checksum": "35dc6c6bf36dde4c8c2d2048dbd9655f", + "id": "nmdc:35dc6c6bf36dde4c8c2d2048dbd9655f", + "file_size_bytes": 229659 + }, + { + "name": "Gp0119869_Centrifuge classification TSV report", + "description": "Centrifuge classification TSV report for Gp0119869", + "data_object_type": "Centrifuge Taxonomic Classification", + "url": "https://data.microbiomedata.org/data/nmdc:mga0r4w677/ReadbasedAnalysis/nmdc_mga0r4w677_centrifuge_classification.tsv", + "md5_checksum": "66311cee98fb89e51479022afbaefa1a", + "id": "nmdc:66311cee98fb89e51479022afbaefa1a", + "file_size_bytes": 9250954189 + }, + { + "name": "Gp0119869_Centrifuge TSV report", + "description": "Centrifuge TSV report for Gp0119869", + "data_object_type": "Centrifuge Classification Report", + "url": "https://data.microbiomedata.org/data/nmdc:mga0r4w677/ReadbasedAnalysis/nmdc_mga0r4w677_centrifuge_report.tsv", + "md5_checksum": "9d61f7de31aa3b757620c59c1bab2f33", + "id": "nmdc:9d61f7de31aa3b757620c59c1bab2f33", + "file_size_bytes": 218727 + }, + { + "name": "Gp0119869_Centrifuge Krona HTML report", + "description": "Centrifuge Krona HTML report for Gp0119869", + "data_object_type": "Centrifuge Krona Plot", + "url": "https://data.microbiomedata.org/data/nmdc:mga0r4w677/ReadbasedAnalysis/nmdc_mga0r4w677_centrifuge_krona.html", + "md5_checksum": "f3f1581ff3ea40e77fdf66d485f95b5e", + "id": "nmdc:f3f1581ff3ea40e77fdf66d485f95b5e", + "file_size_bytes": 2157098 + }, + { + "name": "Gp0119869_Kraken classification TSV report", + "description": "Kraken classification TSV report for Gp0119869", + "data_object_type": "Kraken2 Taxonomic Classification", + "url": "https://data.microbiomedata.org/data/nmdc:mga0r4w677/ReadbasedAnalysis/nmdc_mga0r4w677_kraken2_classification.tsv", + "md5_checksum": "b95b47fc8cdaf67fb747db90e3e8ff6b", + "id": "nmdc:b95b47fc8cdaf67fb747db90e3e8ff6b", + "file_size_bytes": 7921249233 + }, + { + "name": "Gp0119869_Kraken2 TSV report", + "description": "Kraken2 TSV report for Gp0119869", + "data_object_type": "Kraken2 Classification Report", + "url": "https://data.microbiomedata.org/data/nmdc:mga0r4w677/ReadbasedAnalysis/nmdc_mga0r4w677_kraken2_report.tsv", + "md5_checksum": "2508a61e0b33dc330740eda4e778c5b3", + "id": "nmdc:2508a61e0b33dc330740eda4e778c5b3", + "file_size_bytes": 495382 + }, + { + "name": "Gp0119869_Kraken2 Krona HTML report", + "description": "Kraken2 Krona HTML report for Gp0119869", + "data_object_type": "Kraken2 Krona Plot", + "url": "https://data.microbiomedata.org/data/nmdc:mga0r4w677/ReadbasedAnalysis/nmdc_mga0r4w677_kraken2_krona.html", + "md5_checksum": "90681cd5726365d2ea33a81222ce14ca", + "id": "nmdc:90681cd5726365d2ea33a81222ce14ca", + "file_size_bytes": 3218641 + }, + { + "name": "Gp0119869_Assembled contigs fasta", + "description": "Assembled contigs fasta for Gp0119869", + "data_object_type": "Assembly Contigs", + "url": "https://data.microbiomedata.org/data/nmdc:mga0r4w677/assembly/nmdc_mga0r4w677_contigs.fna", + "md5_checksum": "cc022c1da96eca121826d4628f9c8a81", + "id": "nmdc:cc022c1da96eca121826d4628f9c8a81", + "file_size_bytes": 3011012 + }, + { + "name": "Gp0119869_Assembled scaffold fasta", + "description": "Assembled scaffold fasta for Gp0119869", + "data_object_type": "Assembly Scaffolds", + "url": "https://data.microbiomedata.org/data/nmdc:mga0r4w677/assembly/nmdc_mga0r4w677_scaffolds.fna", + "md5_checksum": "9210a57acf3ca42c9fcd08e1bc87d96c", + "id": "nmdc:9210a57acf3ca42c9fcd08e1bc87d96c", + "file_size_bytes": 3008502 + }, + { + "name": "Gp0119869_Metagenome Contig Coverage Stats", + "description": "Metagenome Contig Coverage Stats for Gp0119869", + "url": "https://data.microbiomedata.org/data/nmdc:mga0r4w677/assembly/nmdc_mga0r4w677_covstats.txt", + "md5_checksum": "e9fcbc77a1f1473ab071e18162ca2938", + "id": "nmdc:e9fcbc77a1f1473ab071e18162ca2938", + "file_size_bytes": 72313 + }, + { + "name": "Gp0119869_Assembled AGP file", + "description": "Assembled AGP file for Gp0119869", + "data_object_type": "Assembly AGP", + "url": "https://data.microbiomedata.org/data/nmdc:mga0r4w677/assembly/nmdc_mga0r4w677_assembly.agp", + "md5_checksum": "8ad0628cb3d77894d8a48176f919ba4b", + "id": "nmdc:8ad0628cb3d77894d8a48176f919ba4b", + "file_size_bytes": 61486 + }, + { + "name": "Gp0119869_Metagenome Alignment BAM file", + "description": "Metagenome Alignment BAM file for Gp0119869", + "data_object_type": "Assembly Coverage BAM", + "url": "https://data.microbiomedata.org/data/nmdc:mga0r4w677/assembly/nmdc_mga0r4w677_pairedMapped_sorted.bam", + "md5_checksum": "7d670a486f32042950a4f139241d09c8", + "id": "nmdc:7d670a486f32042950a4f139241d09c8", + "file_size_bytes": 8385708796 + }, + { + "name": "Gp0119869_Protein FAA", + "description": "Protein FAA for Gp0119869", + "data_object_type": "Annotation Amino Acid FASTA", + "url": "https://data.microbiomedata.org/data/nmdc:mga0r4w677/annotation/nmdc_mga0r4w677_proteins.faa", + "md5_checksum": "d3bc3bfe7dac319421d916b45d2873fe", + "id": "nmdc:d3bc3bfe7dac319421d916b45d2873fe", + "file_size_bytes": 1289760 + }, + { + "name": "Gp0119869_Structural annotation GFF file", + "description": "Structural annotation GFF file for Gp0119869", + "data_object_type": "Structural Annotation GFF", + "url": "https://data.microbiomedata.org/data/nmdc:mga0r4w677/annotation/nmdc_mga0r4w677_structural_annotation.gff", + "md5_checksum": "9bb6d24c77ae7710b0b579a1379a8896", + "id": "nmdc:9bb6d24c77ae7710b0b579a1379a8896", + "file_size_bytes": 2411 + }, + { + "name": "Gp0119869_Functional annotation GFF file", + "description": "Functional annotation GFF file for Gp0119869", + "data_object_type": "Functional Annotation GFF", + "url": "https://data.microbiomedata.org/data/nmdc:mga0r4w677/annotation/nmdc_mga0r4w677_functional_annotation.gff", + "md5_checksum": "b4250044f1067162fc38f14879c2a863", + "id": "nmdc:b4250044f1067162fc38f14879c2a863", + "file_size_bytes": 1034660 + }, + { + "name": "Gp0119869_KO TSV file", + "description": "KO TSV file for Gp0119869", + "data_object_type": "Annotation KEGG Orthology", + "url": "https://data.microbiomedata.org/data/nmdc:mga0r4w677/annotation/nmdc_mga0r4w677_ko.tsv", + "md5_checksum": "130f6d77ac92a2299cb9ef27a737e354", + "id": "nmdc:130f6d77ac92a2299cb9ef27a737e354", + "file_size_bytes": 158879 + }, + { + "name": "Gp0119869_EC TSV file", + "description": "EC TSV file for Gp0119869", + "data_object_type": "Annotation Enzyme Commission", + "url": "https://data.microbiomedata.org/data/nmdc:mga0r4w677/annotation/nmdc_mga0r4w677_ec.tsv", + "md5_checksum": "694c0424ef4ce5e710c948c709d1e097", + "id": "nmdc:694c0424ef4ce5e710c948c709d1e097", + "file_size_bytes": 93028 + }, + { + "name": "Gp0119869_COG GFF file", + "description": "COG GFF file for Gp0119869", + "url": "https://data.microbiomedata.org/data/nmdc:mga0r4w677/annotation/nmdc_mga0r4w677_cog.gff", + "md5_checksum": "c5a24d519a53502ed090dae5359323ac", + "id": "nmdc:c5a24d519a53502ed090dae5359323ac", + "file_size_bytes": 781434 + }, + { + "name": "Gp0119869_PFAM GFF file", + "description": "PFAM GFF file for Gp0119869", + "url": "https://data.microbiomedata.org/data/nmdc:mga0r4w677/annotation/nmdc_mga0r4w677_pfam.gff", + "md5_checksum": "ea0155ba1d7f1b869d1cd1406647fc71", + "id": "nmdc:ea0155ba1d7f1b869d1cd1406647fc71", + "file_size_bytes": 832811 + }, + { + "name": "Gp0119869_TigrFam GFF file", + "description": "TigrFam GFF file for Gp0119869", + "url": "https://data.microbiomedata.org/data/nmdc:mga0r4w677/annotation/nmdc_mga0r4w677_tigrfam.gff", + "md5_checksum": "b50fa54ec7223f261c5ebf24e2061d00", + "id": "nmdc:b50fa54ec7223f261c5ebf24e2061d00", + "file_size_bytes": 203267 + }, + { + "name": "Gp0119869_SMART GFF file", + "description": "SMART GFF file for Gp0119869", + "url": "https://data.microbiomedata.org/data/nmdc:mga0r4w677/annotation/nmdc_mga0r4w677_smart.gff", + "md5_checksum": "12088833d1b1b6ee6fb9cfd008d008c8", + "id": "nmdc:12088833d1b1b6ee6fb9cfd008d008c8", + "file_size_bytes": 279842 + }, + { + "name": "Gp0119869_SuperFam GFF file", + "description": "SuperFam GFF file for Gp0119869", + "url": "https://data.microbiomedata.org/data/nmdc:mga0r4w677/annotation/nmdc_mga0r4w677_supfam.gff", + "md5_checksum": "bc963a84a0d6f4d89979e72a5e8c9422", + "id": "nmdc:bc963a84a0d6f4d89979e72a5e8c9422", + "file_size_bytes": 1131205 + }, + { + "name": "Gp0119869_Cath FunFam GFF file", + "description": "Cath FunFam GFF file for Gp0119869", + "url": "https://data.microbiomedata.org/data/nmdc:mga0r4w677/annotation/nmdc_mga0r4w677_cath_funfam.gff", + "md5_checksum": "34da377ced224fbf8a007afe80091da1", + "id": "nmdc:34da377ced224fbf8a007afe80091da1", + "file_size_bytes": 1128387 + }, + { + "name": "Gp0119869_KO_EC GFF file", + "description": "KO_EC GFF file for Gp0119869", + "url": "https://data.microbiomedata.org/data/nmdc:mga0r4w677/annotation/nmdc_mga0r4w677_ko_ec.gff", + "md5_checksum": "0586821370ff5ff714280ffa539e01a7", + "id": "nmdc:0586821370ff5ff714280ffa539e01a7", + "file_size_bytes": 620626 + }, + { + "name": "gold:Gp0452679_metabat2 bin checkm quality assessment result", + "description": "metabat2 bin checkm quality assessment result for gold:Gp0452679", + "data_object_type": "CheckM Statistics", + "url": "https://data.microbiomedata.org/data/nmdc:mga0fsjy84/MAGs/nmdc_mga0fsjy84_checkm_qa.out", + "md5_checksum": "d41d8cd98f00b204e9800998ecf8427e", + "id": "nmdc:d41d8cd98f00b204e9800998ecf8427e", + "file_size_bytes": 0 + }, + { + "name": "Gp0119869_tooShort (< 3kb) filtered contigs fasta file by metaBat2", + "description": "tooShort (< 3kb) filtered contigs fasta file by metaBat2 for Gp0119869", + "url": "https://data.microbiomedata.org/data/nmdc:mga0r4w677/MAGs/nmdc_mga0r4w677_bins.tooShort.fa", + "md5_checksum": "90fa2566b26c28900202d62af77d733c", + "id": "nmdc:90fa2566b26c28900202d62af77d733c", + "file_size_bytes": 267677 + }, + { + "name": "Gp0119869_unbinned fasta file from metabat2", + "description": "unbinned fasta file from metabat2 for Gp0119869", + "url": "https://data.microbiomedata.org/data/nmdc:mga0r4w677/MAGs/nmdc_mga0r4w677_bins.unbinned.fa", + "md5_checksum": "59edad9019c0305934b5ab7ef4298af3", + "id": "nmdc:59edad9019c0305934b5ab7ef4298af3", + "file_size_bytes": 492706 + }, + { + "name": "Gp0119869_metabat2 bin checkm quality assessment result", + "description": "metabat2 bin checkm quality assessment result for Gp0119869", + "data_object_type": "CheckM Statistics", + "url": "https://data.microbiomedata.org/data/nmdc:mga0r4w677/MAGs/nmdc_mga0r4w677_checkm_qa.out", + "md5_checksum": "53595e24304cc9c29346d8b9b0e0a0ca", + "id": "nmdc:53595e24304cc9c29346d8b9b0e0a0ca", + "file_size_bytes": 1106 + }, + { + "name": "Gp0119869_high-quality and medium-quality bins", + "description": "high-quality and medium-quality bins for Gp0119869", + "data_object_type": "Metagenome Bins", + "url": "https://data.microbiomedata.org/data/nmdc:mga0r4w677/MAGs/nmdc_mga0r4w677_hqmq_bin.zip", + "md5_checksum": "c34d6089fcb70dc9a3e731292bf6ecc3", + "id": "nmdc:c34d6089fcb70dc9a3e731292bf6ecc3", + "file_size_bytes": 397942 + }, + { + "name": "Gp0119869_metabat2 bins", + "description": "metabat2 bins for Gp0119869", + "url": "https://data.microbiomedata.org/data/nmdc:mga0r4w677/MAGs/nmdc_mga0r4w677_metabat_bin.zip", + "md5_checksum": "646517160998bb30b46bf3c4c13875ab", + "id": "nmdc:646517160998bb30b46bf3c4c13875ab", + "file_size_bytes": 279575 + }, + { + "_id": { + "$oid": "649b003d1ae706d7b5b14e33" + }, + "description": "Metagenome Contig Coverage Stats for gold:Gp0119869", + "url": "https://data.microbiomedata.org/data/1777_95838/assembly/mapping_stats.txt", + "file_size_bytes": 67873, + "type": "nmdc:DataObject", + "id": "nmdc:68269f048c9b2bc4ba1864068946c1a3", + "name": "mapping_stats.txt", + "data_object_type": "Assembly Coverage Stats" + }, + { + "_id": { + "$oid": "649b003d1ae706d7b5b14e34" + }, + "description": "Assembled contigs fasta for gold:Gp0119869", + "url": "https://data.microbiomedata.org/data/1777_95838/assembly/assembly_contigs.fna", + "file_size_bytes": 3006572, + "type": "nmdc:DataObject", + "id": "nmdc:7ab8f7dcc03e512ef3ab2143b63cc8de", + "name": "assembly_contigs.fna", + "data_object_type": "Assembly Contigs" + }, + { + "_id": { + "$oid": "649b003d1ae706d7b5b14e36" + }, + "description": "Assembled scaffold fasta for gold:Gp0119869", + "url": "https://data.microbiomedata.org/data/1777_95838/assembly/assembly_scaffolds.fna", + "file_size_bytes": 3004072, + "type": "nmdc:DataObject", + "id": "nmdc:ebb92f935a29f5e48c9dae02712deecf", + "name": "assembly_scaffolds.fna", + "data_object_type": "Assembly Scaffolds" + }, + { + "_id": { + "$oid": "649b003d1ae706d7b5b14e39" + }, + "description": "Assembled AGP file for gold:Gp0119869", + "url": "https://data.microbiomedata.org/data/1777_95838/assembly/assembly.agp", + "file_size_bytes": 52586, + "type": "nmdc:DataObject", + "id": "nmdc:6b4e11c7fd0c24628f647991b18ebfca", + "name": "assembly.agp", + "data_object_type": "Assembly AGP" + }, + { + "_id": { + "$oid": "649b003d1ae706d7b5b14e3a" + }, + "description": "Metagenome Alignment BAM file for gold:Gp0119869", + "url": "https://data.microbiomedata.org/data/1777_95838/assembly/pairedMapped_sorted.bam", + "file_size_bytes": 8321515374, + "type": "nmdc:DataObject", + "id": "nmdc:3d624a34d8cd9633e6627ba35de601d2", + "name": "pairedMapped_sorted.bam", + "data_object_type": "Assembly Coverage BAM" + }, + { + "_id": { + "$oid": "649b003d1ae706d7b5b15ac8" + }, + "id": "nmdc:45882927b4a454fde7abc1d43a5a447e", + "name": "1777_95838.krona.html", + "description": "Gold:Gp0119869 KRONA plot HTML file", + "url": "https://data.microbiomedata.org/1777_95838/ReadbasedAnalysis/centrifuge/1777_95838.krona.html", + "file_size_bytes": 3385, + "data_object_type": "Centrifuge Krona Plot", + "type": "nmdc:DataObject" + }, + { + "_id": { + "$oid": "649b003d1ae706d7b5b15acd" + }, + "id": "nmdc:00f5246d878f2009e31ee8d4f4c452ac", + "name": "1777_95838.json", + "description": "Gold:Gp0119869 ReadbasedAnalysis result JSON file", + "url": "https://data.microbiomedata.org/1777_95838/ReadbasedAnalysis/1777_95838.json", + "file_size_bytes": 3385, + "type": "nmdc:DataObject" + }, + { + "_id": { + "$oid": "649b003f1ae706d7b5b16472" + }, + "id": "nmdc:7ee7bdfd4e46faa4d0836b2b9b8606de", + "name": "bins.unbinned.fa", + "description": "unbinned fasta file from metabat2 for gold:Gp0119869", + "file_size_bytes": 1263371, + "url": "https://data.microbiomedata.org/data/1777_95838/img_MAGs/bins.unbinned.fa", + "type": "nmdc:DataObject" + }, + { + "_id": { + "$oid": "649b003f1ae706d7b5b16474" + }, + "id": "nmdc:5b51e618c3b1237496195069b0695656", + "name": "bins.tooShort.fa", + "description": "tooShort (< 3kb) filtered contigs fasta file by metaBat2 for gold:Gp0119869", + "file_size_bytes": 257242, + "url": "https://data.microbiomedata.org/data/1777_95838/img_MAGs/bins.tooShort.fa", + "type": "nmdc:DataObject" + }, + { + "_id": { + "$oid": "649b003f1ae706d7b5b16475" + }, + "id": "nmdc:9b87a585c916640f28ed434c3f5882b3", + "name": "checkm_qa.out", + "description": "metabat2 bin checkm quality assessment result for gold:Gp0119869", + "file_size_bytes": 1240, + "url": "https://data.microbiomedata.org/data/1777_95838/img_MAGs/checkm_qa.out", + "type": "nmdc:DataObject", + "data_object_type": "CheckM Statistics" + }, + { + "_id": { + "$oid": "649b003f1ae706d7b5b16477" + }, + "id": "nmdc:7222beaabe4f87791b027f3e8434f0dc", + "name": "gold:Gp0119869.bins.4.fa", + "description": "metabat2 binned contig file for gold:Gp0119869", + "file_size_bytes": 218367, + "url": "https://data.microbiomedata.org/data/1777_95838/img_MAGs/metabat-bins/bins.4.fa", + "type": "nmdc:DataObject", + "data_object_type": "Metagenome Bins" + }, + { + "_id": { + "$oid": "649b003f1ae706d7b5b16478" + }, + "id": "nmdc:1a5ff51ebd1cd8002c5deb3797a12ac2", + "name": "gold:Gp0119869.bins.2.fa", + "description": "metabat2 binned contig file for gold:Gp0119869", + "file_size_bytes": 321429, + "url": "https://data.microbiomedata.org/data/1777_95838/img_MAGs/metabat-bins/bins.2.fa", + "type": "nmdc:DataObject", + "data_object_type": "Metagenome Bins" + }, + { + "_id": { + "$oid": "649b003f1ae706d7b5b1647b" + }, + "id": "nmdc:0aea6500a118f8a9dbd77ca54eb3f579", + "name": "gold:Gp0119869.bins.1.fa", + "description": "metabat2 binned contig file for gold:Gp0119869", + "file_size_bytes": 702241, + "url": "https://data.microbiomedata.org/data/1777_95838/img_MAGs/metabat-bins/bins.1.fa", + "type": "nmdc:DataObject", + "data_object_type": "Metagenome Bins" + }, + { + "_id": { + "$oid": "649b003f1ae706d7b5b1647c" + }, + "id": "nmdc:e51a86657cddaee1cca35488c79a101a", + "name": "gold:Gp0119869.bins.3.fa", + "description": "metabat2 binned contig file for gold:Gp0119869", + "file_size_bytes": 236928, + "url": "https://data.microbiomedata.org/data/1777_95838/img_MAGs/metabat-bins/bins.3.fa", + "type": "nmdc:DataObject", + "data_object_type": "Metagenome Bins" + }, + { + "_id": { + "$oid": "649b003f1ae706d7b5b16cad" + }, + "description": "EC TSV File for gold:Gp0119869", + "url": "https://data.microbiomedata.org/1777_95838/img_annotation/Ga0480975_ec.tsv", + "md5_checksum": "aa524a698ead96378589ebfec51375f4", + "file_size_bytes": 3385, + "id": "nmdc:aa524a698ead96378589ebfec51375f4", + "name": "gold:Gp0119869_EC TSV File", + "data_object_type": "Annotation Enzyme Commission", + "type": "nmdc:DataObject" + }, + { + "_id": { + "$oid": "649b003f1ae706d7b5b16cb0" + }, + "description": "Functional annotation GFF file for gold:Gp0119869", + "url": "https://data.microbiomedata.org/1777_95838/img_annotation/Ga0480975_functional_annotation.gff", + "md5_checksum": "ec5a78019044659ab89e4703a2ab57ff", + "file_size_bytes": 3385, + "id": "nmdc:ec5a78019044659ab89e4703a2ab57ff", + "name": "gold:Gp0119869_Functional annotation GFF file", + "data_object_type": "Functional Annotation GFF", + "type": "nmdc:DataObject" + }, + { + "_id": { + "$oid": "649b003f1ae706d7b5b16cb1" + }, + "description": "Structural annotation GFF file for gold:Gp0119869", + "url": "https://data.microbiomedata.org/1777_95838/img_annotation/Ga0480975_structural_annotation.gff", + "md5_checksum": "431354e60f7a35b67c2f7ec52b955808", + "file_size_bytes": 3385, + "id": "nmdc:431354e60f7a35b67c2f7ec52b955808", + "name": "gold:Gp0119869_Structural annotation GFF file", + "data_object_type": "Structural Annotation GFF", + "type": "nmdc:DataObject" + }, + { + "_id": { + "$oid": "649b003f1ae706d7b5b16cb2" + }, + "description": "Protein FAA for gold:Gp0119869", + "url": "https://data.microbiomedata.org/1777_95838/img_annotation/Ga0480975_proteins.faa", + "md5_checksum": "c46f4c6ee971aff627dcbfc8f1e3a9e8", + "file_size_bytes": 3385, + "id": "nmdc:c46f4c6ee971aff627dcbfc8f1e3a9e8", + "name": "gold:Gp0119869_Protein FAA", + "data_object_type": "Annotation Amino Acid FASTA", + "type": "nmdc:DataObject" + }, + { + "_id": { + "$oid": "649b003f1ae706d7b5b16cb3" + }, + "description": "KO TSV File for gold:Gp0119869", + "url": "https://data.microbiomedata.org/1777_95838/img_annotation/Ga0480975_ko.tsv", + "md5_checksum": "475a785b8c7e1de1e88d36b2863ad1d8", + "file_size_bytes": 3385, + "id": "nmdc:475a785b8c7e1de1e88d36b2863ad1d8", + "name": "gold:Gp0119869_KO TSV File", + "data_object_type": "Annotation KEGG Orthology", + "type": "nmdc:DataObject" + } + ], + "mags_activity_set": [ + { + "_id": { + "$oid": "649b0052ec087f6bbab346fe" + }, + "has_input": [ + "nmdc:cc022c1da96eca121826d4628f9c8a81", + "nmdc:7d670a486f32042950a4f139241d09c8", + "nmdc:b4250044f1067162fc38f14879c2a863" + ], + "too_short_contig_num": 804, + "part_of": [ + "nmdc:mga0r4w677" + ], + "binned_contig_num": 35, + "git_url": "https://github.com/microbiomedata/metaG/releases/tag/0.1", + "has_output": [ + "nmdc:d41d8cd98f00b204e9800998ecf8427e", + "nmdc:90fa2566b26c28900202d62af77d733c", + "nmdc:59edad9019c0305934b5ab7ef4298af3", + "nmdc:53595e24304cc9c29346d8b9b0e0a0ca", + "nmdc:c34d6089fcb70dc9a3e731292bf6ecc3", + "nmdc:646517160998bb30b46bf3c4c13875ab" + ], + "was_informed_by": "gold:Gp0119869", + "input_contig_num": 888, + "id": "nmdc:60eb75a015d54a3255879924684c5cb3", + "execution_resource": "NERSC-Cori", + "name": "MAGs Analysis Activity for nmdc:mga0r4w677", + "mags_list": [ + { + "number_of_contig": 11, + "completeness": 8.33, + "bin_name": "bins.1", + "gene_count": 603, + "bin_quality": "LQ", + "gtdbtk_species": "", + "gtdbtk_order": "", + "num_16s": 0, + "gtdbtk_family": "", + "gtdbtk_domain": "", + "contamination": 0.0, + "gtdbtk_class": "", + "gtdbtk_phylum": "", + "num_5s": 2, + "num_23s": 0, + "gtdbtk_genus": "", + "num_t_rna": 2 + }, + { + "number_of_contig": 21, + "completeness": 74.78, + "bin_name": "bins.2", + "gene_count": 1305, + "bin_quality": "MQ", + "gtdbtk_species": "Halanaerobium congolense", + "gtdbtk_order": "Halanaerobiales", + "num_16s": 0, + "gtdbtk_family": "Halanaerobiaceae", + "gtdbtk_domain": "Bacteria", + "contamination": 0.0, + "gtdbtk_class": "Halanaerobiia", + "gtdbtk_phylum": "Firmicutes_F", + "num_5s": 1, + "num_23s": 0, + "gtdbtk_genus": "Halanaerobium", + "num_t_rna": 31 + }, + { + "number_of_contig": 3, + "completeness": 0.0, + "bin_name": "bins.3", + "gene_count": 313, + "bin_quality": "LQ", + "gtdbtk_species": "", + "gtdbtk_order": "", + "num_16s": 0, + "gtdbtk_family": "", + "gtdbtk_domain": "", + "contamination": 0.0, + "gtdbtk_class": "", + "gtdbtk_phylum": "", + "num_5s": 0, + "num_23s": 0, + "gtdbtk_genus": "", + "num_t_rna": 4 + } + ], + "unbinned_contig_num": 49, + "started_at_time": "2021-10-10T20:07:44Z", + "type": "nmdc:MAGsAnalysisActivity", + "ended_at_time": "2021-10-10T21:03:20+00:00" + } + ], + "metagenome_annotation_activity_set": [ + { + "_id": { + "$oid": "649b005bbf2caae0415ef99e" + }, + "has_input": [ + "nmdc:cc022c1da96eca121826d4628f9c8a81" + ], + "part_of": [ + "nmdc:mga0r4w677" + ], + "git_url": "https://github.com/microbiomedata/metaG/releases/tag/0.1", + "has_output": [ + "nmdc:d3bc3bfe7dac319421d916b45d2873fe", + "nmdc:9bb6d24c77ae7710b0b579a1379a8896", + "nmdc:b4250044f1067162fc38f14879c2a863", + "nmdc:130f6d77ac92a2299cb9ef27a737e354", + "nmdc:694c0424ef4ce5e710c948c709d1e097", + "nmdc:c5a24d519a53502ed090dae5359323ac", + "nmdc:ea0155ba1d7f1b869d1cd1406647fc71", + "nmdc:b50fa54ec7223f261c5ebf24e2061d00", + "nmdc:12088833d1b1b6ee6fb9cfd008d008c8", + "nmdc:bc963a84a0d6f4d89979e72a5e8c9422", + "nmdc:34da377ced224fbf8a007afe80091da1", + "nmdc:0586821370ff5ff714280ffa539e01a7" + ], + "was_informed_by": "gold:Gp0119869", + "id": "nmdc:60eb75a015d54a3255879924684c5cb3", + "execution_resource": "NERSC-Cori", + "name": "Annotation Activity for nmdc:mga0r4w677", + "started_at_time": "2021-10-10T20:07:44Z", + "type": "nmdc:MetagenomeAnnotationActivity", + "ended_at_time": "2021-10-10T21:03:20+00:00" + } + ], + "metagenome_assembly_set": [ + { + "_id": { + "$oid": "649b005f2ca5ee4adb139f8c" + }, + "has_input": [ + "nmdc:b2d2811e4dffdcf84f6a07748dbef4f3" + ], + "part_of": [ + "nmdc:mga0r4w677" + ], + "ctg_logsum": 43123, + "scaf_logsum": 43133, + "gap_pct": 0.00681, + "git_url": "https://github.com/microbiomedata/metaG/releases/tag/0.1", + "has_output": [ + "nmdc:cc022c1da96eca121826d4628f9c8a81", + "nmdc:9210a57acf3ca42c9fcd08e1bc87d96c", + "nmdc:e9fcbc77a1f1473ab071e18162ca2938", + "nmdc:8ad0628cb3d77894d8a48176f919ba4b", + "nmdc:7d670a486f32042950a4f139241d09c8" + ], + "asm_score": 42.99, + "was_informed_by": "gold:Gp0119869", + "ctg_powsum": 7868.448, + "scaf_max": 191994, + "id": "nmdc:60eb75a015d54a3255879924684c5cb3", + "scaf_powsum": 7870.535, + "execution_resource": "NERSC-Cori", + "contigs": 888, + "name": "Assembly Activity for nmdc:mga0r4w677", + "ctg_max": 191994, + "gc_std": 0.05875, + "contig_bp": 2936957, + "gc_avg": 0.32915, + "started_at_time": "2021-10-10T20:07:44Z", + "scaf_bp": 2937157, + "type": "nmdc:MetagenomeAssembly", + "scaffolds": 886, + "ended_at_time": "2021-10-10T21:03:20+00:00", + "ctg_l50": 68418, + "ctg_l90": 5231, + "ctg_n50": 12, + "ctg_n90": 58, + "scaf_l50": 68418, + "scaf_l90": 5231, + "scaf_n50": 12, + "scaf_n90": 58, + "scaf_l_gt50k": 1750742, + "scaf_n_gt50k": 17, + "scaf_pct_gt50k": 59.60669 + } + ], + "omics_processing_set": [ + { + "_id": { + "$oid": "649b009773e8249959349b74" + }, + "id": "nmdc:omprc-11-g6htbk44", + "name": "Lab enriched deep subsurface shale carbon reservoir microbial communities from Ohio, USA - Halanaerobium induction", + "description": "Microbial controls on biogeochemical cycling in deep subsurface shale carbon reservoirs", + "has_input": [ + "nmdc:bsm-11-p9y24592" + ], + "has_output": [ + "jgi:560df3680d878540fd6fe1bc" + ], + "part_of": [ + "nmdc:sty-11-8fb6t785" + ], + "add_date": "2015-08-15", + "mod_date": "2021-06-15", + "ncbi_project_name": "Deep subsurface shale carbon reservoir microbial communities from Ohio, USA - Halanaerobium induction", + "omics_type": { + "has_raw_value": "Metagenome" + }, + "principal_investigator": { + "has_raw_value": "Kelly Wrighton" + }, + "processing_institution": "JGI", + "type": "nmdc:OmicsProcessing", + "gold_sequencing_project_identifiers": [ + "gold:Gp0119869" + ] + } + ], + "read_qc_analysis_activity_set": [ + { + "_id": { + "$oid": "649b009d6bdd4fd20273c85c" + }, + "has_input": [ + "nmdc:2f2851d687737e0aea677a3fec21b0e9" + ], + "part_of": [ + "nmdc:mga0r4w677" + ], + "git_url": "https://github.com/microbiomedata/metaG/releases/tag/0.1", + "has_output": [ + "nmdc:b2d2811e4dffdcf84f6a07748dbef4f3", + "nmdc:a4bc908d98cae11fa812b2da84e5ac11" + ], + "was_informed_by": "gold:Gp0119869", + "input_read_count": 133791830, + "output_read_bases": 19741486167, + "id": "nmdc:60eb75a015d54a3255879924684c5cb3", + "execution_resource": "NERSC-Cori", + "input_read_bases": 20202566330, + "name": "Read QC Activity for nmdc:mga0r4w677", + "output_read_count": 132419060, + "started_at_time": "2021-10-10T20:07:44Z", + "type": "nmdc:ReadQCAnalysisActivity", + "ended_at_time": "2021-10-10T21:03:20+00:00" + } + ], + "read_based_taxonomy_analysis_activity_set": [ + { + "_id": { + "$oid": "649b009bff710ae353f8cf18" + }, + "has_input": [ + "nmdc:b2d2811e4dffdcf84f6a07748dbef4f3" + ], + "git_url": "https://github.com/microbiomedata/metaG/releases/tag/0.1", + "has_output": [ + "nmdc:92ece69a1e806e56a3e0fdd7d711a44a", + "nmdc:869277bdc451ef159ec275dd3ea3704a", + "nmdc:35dc6c6bf36dde4c8c2d2048dbd9655f", + "nmdc:66311cee98fb89e51479022afbaefa1a", + "nmdc:9d61f7de31aa3b757620c59c1bab2f33", + "nmdc:f3f1581ff3ea40e77fdf66d485f95b5e", + "nmdc:b95b47fc8cdaf67fb747db90e3e8ff6b", + "nmdc:2508a61e0b33dc330740eda4e778c5b3", + "nmdc:90681cd5726365d2ea33a81222ce14ca" + ], + "was_informed_by": "gold:Gp0119869", + "id": "nmdc:60eb75a015d54a3255879924684c5cb3", + "execution_resource": "NERSC-Cori", + "name": "ReadBased Analysis Activity for nmdc:mga0r4w677", + "started_at_time": "2021-10-10T20:07:44Z", + "type": "nmdc:ReadbasedAnalysis", + "ended_at_time": "2021-10-10T21:03:20+00:00" + } + ] + }, + { + "data_object_set": [ + { + "description": "Raw sequencer read data", + "file_size_bytes": 6465739889, + "type": "nmdc:DataObject", + "id": "jgi:56a6899d0d878559e286ce67", + "name": "10115.4.149473.AGTTCC.fastq.gz" + }, + { + "name": "Gp0119871_Filtered Reads", + "description": "Filtered Reads for Gp0119871", + "data_object_type": "Filtered Sequencing Reads", + "url": "https://data.microbiomedata.org/data/nmdc:mga06akz86/qa/nmdc_mga06akz86_filtered.fastq.gz", + "md5_checksum": "4cac70dbe447572e04bfa9e362e1aafc", + "id": "nmdc:4cac70dbe447572e04bfa9e362e1aafc", + "file_size_bytes": 4130570630 + }, + { + "name": "Gp0119871_Filtered Stats", + "description": "Filtered Stats for Gp0119871", + "data_object_type": "QC Statistics", + "url": "https://data.microbiomedata.org/data/nmdc:mga06akz86/qa/nmdc_mga06akz86_filterStats.txt", + "md5_checksum": "f2cbc77fed4639eb21d7532dd62c2a25", + "id": "nmdc:f2cbc77fed4639eb21d7532dd62c2a25", + "file_size_bytes": 286 + }, + { + "name": "Gp0119871_Gottcha2 TSV report", + "description": "Gottcha2 TSV report for Gp0119871", + "url": "https://data.microbiomedata.org/data/nmdc:mga06akz86/ReadbasedAnalysis/nmdc_mga06akz86_gottcha2_report.tsv", + "md5_checksum": "bc090921f030024644cde2fd69d5b12e", + "id": "nmdc:bc090921f030024644cde2fd69d5b12e", + "file_size_bytes": 7136 + }, + { + "name": "Gp0119871_Gottcha2 full TSV report", + "description": "Gottcha2 full TSV report for Gp0119871", + "url": "https://data.microbiomedata.org/data/nmdc:mga06akz86/ReadbasedAnalysis/nmdc_mga06akz86_gottcha2_report_full.tsv", + "md5_checksum": "08578df055323144d8daecd706a3e4df", + "id": "nmdc:08578df055323144d8daecd706a3e4df", + "file_size_bytes": 719344 + }, + { + "name": "Gp0119871_Gottcha2 Krona HTML report", + "description": "Gottcha2 Krona HTML report for Gp0119871", + "data_object_type": "GOTTCHA2 Krona Plot", + "url": "https://data.microbiomedata.org/data/nmdc:mga06akz86/ReadbasedAnalysis/nmdc_mga06akz86_gottcha2_krona.html", + "md5_checksum": "ac553a799f0d1ad34a68d669c58ff896", + "id": "nmdc:ac553a799f0d1ad34a68d669c58ff896", + "file_size_bytes": 250265 + }, + { + "name": "Gp0119871_Centrifuge classification TSV report", + "description": "Centrifuge classification TSV report for Gp0119871", + "data_object_type": "Centrifuge Taxonomic Classification", + "url": "https://data.microbiomedata.org/data/nmdc:mga06akz86/ReadbasedAnalysis/nmdc_mga06akz86_centrifuge_classification.tsv", + "md5_checksum": "30bc8aeebcd6101139d76549dec969b9", + "id": "nmdc:30bc8aeebcd6101139d76549dec969b9", + "file_size_bytes": 4794844292 + }, + { + "name": "Gp0119871_Centrifuge TSV report", + "description": "Centrifuge TSV report for Gp0119871", + "data_object_type": "Centrifuge Classification Report", + "url": "https://data.microbiomedata.org/data/nmdc:mga06akz86/ReadbasedAnalysis/nmdc_mga06akz86_centrifuge_report.tsv", + "md5_checksum": "acb3bf94a6cba7733f65ff8417d49c56", + "id": "nmdc:acb3bf94a6cba7733f65ff8417d49c56", + "file_size_bytes": 262565 + }, + { + "name": "Gp0119871_Centrifuge Krona HTML report", + "description": "Centrifuge Krona HTML report for Gp0119871", + "data_object_type": "Centrifuge Krona Plot", + "url": "https://data.microbiomedata.org/data/nmdc:mga06akz86/ReadbasedAnalysis/nmdc_mga06akz86_centrifuge_krona.html", + "md5_checksum": "79d40f492c3c532861bbb2ca4e7d2b62", + "id": "nmdc:79d40f492c3c532861bbb2ca4e7d2b62", + "file_size_bytes": 2352480 + }, + { + "name": "Gp0119871_Kraken classification TSV report", + "description": "Kraken classification TSV report for Gp0119871", + "data_object_type": "Kraken2 Taxonomic Classification", + "url": "https://data.microbiomedata.org/data/nmdc:mga06akz86/ReadbasedAnalysis/nmdc_mga06akz86_kraken2_classification.tsv", + "md5_checksum": "fc33db3ee4bb68c5cc71297346123d00", + "id": "nmdc:fc33db3ee4bb68c5cc71297346123d00", + "file_size_bytes": 3944107480 + }, + { + "name": "Gp0119871_Kraken2 TSV report", + "description": "Kraken2 TSV report for Gp0119871", + "data_object_type": "Kraken2 Classification Report", + "url": "https://data.microbiomedata.org/data/nmdc:mga06akz86/ReadbasedAnalysis/nmdc_mga06akz86_kraken2_report.tsv", + "md5_checksum": "e052993cd2c7362e656961567594c509", + "id": "nmdc:e052993cd2c7362e656961567594c509", + "file_size_bytes": 648817 + }, + { + "name": "Gp0119871_Kraken2 Krona HTML report", + "description": "Kraken2 Krona HTML report for Gp0119871", + "data_object_type": "Kraken2 Krona Plot", + "url": "https://data.microbiomedata.org/data/nmdc:mga06akz86/ReadbasedAnalysis/nmdc_mga06akz86_kraken2_krona.html", + "md5_checksum": "9c6c5ba4a06ca36ec6b983c94de9635f", + "id": "nmdc:9c6c5ba4a06ca36ec6b983c94de9635f", + "file_size_bytes": 3998183 + }, + { + "name": "Gp0119871_Assembled contigs fasta", + "description": "Assembled contigs fasta for Gp0119871", + "data_object_type": "Assembly Contigs", + "url": "https://data.microbiomedata.org/data/nmdc:mga06akz86/assembly/nmdc_mga06akz86_contigs.fna", + "md5_checksum": "7b09172d1801c8b722b203fcd3414ced", + "id": "nmdc:7b09172d1801c8b722b203fcd3414ced", + "file_size_bytes": 338153055 + }, + { + "name": "Gp0119871_Assembled scaffold fasta", + "description": "Assembled scaffold fasta for Gp0119871", + "data_object_type": "Assembly Scaffolds", + "url": "https://data.microbiomedata.org/data/nmdc:mga06akz86/assembly/nmdc_mga06akz86_scaffolds.fna", + "md5_checksum": "0e9490e2f0fdf7ad952058f7a74cf532", + "id": "nmdc:0e9490e2f0fdf7ad952058f7a74cf532", + "file_size_bytes": 337091781 + }, + { + "name": "Gp0119871_Metagenome Contig Coverage Stats", + "description": "Metagenome Contig Coverage Stats for Gp0119871", + "url": "https://data.microbiomedata.org/data/nmdc:mga06akz86/assembly/nmdc_mga06akz86_covstats.txt", + "md5_checksum": "d33b2f9915c7de9f4d38675b299f57b5", + "id": "nmdc:d33b2f9915c7de9f4d38675b299f57b5", + "file_size_bytes": 28129142 + }, + { + "name": "Gp0119871_Assembled AGP file", + "description": "Assembled AGP file for Gp0119871", + "data_object_type": "Assembly AGP", + "url": "https://data.microbiomedata.org/data/nmdc:mga06akz86/assembly/nmdc_mga06akz86_assembly.agp", + "md5_checksum": "5f1f751638ef6b988e338c43388ef155", + "id": "nmdc:5f1f751638ef6b988e338c43388ef155", + "file_size_bytes": 26344193 + }, + { + "name": "Gp0119871_Metagenome Alignment BAM file", + "description": "Metagenome Alignment BAM file for Gp0119871", + "data_object_type": "Assembly Coverage BAM", + "url": "https://data.microbiomedata.org/data/nmdc:mga06akz86/assembly/nmdc_mga06akz86_pairedMapped_sorted.bam", + "md5_checksum": "e8cd5d9afc47b0df91aa06d13f5b5e94", + "id": "nmdc:e8cd5d9afc47b0df91aa06d13f5b5e94", + "file_size_bytes": 5189531679 + }, + { + "name": "Gp0119871_Protein FAA", + "description": "Protein FAA for Gp0119871", + "data_object_type": "Annotation Amino Acid FASTA", + "url": "https://data.microbiomedata.org/data/nmdc:mga06akz86/annotation/nmdc_mga06akz86_proteins.faa", + "md5_checksum": "7d1f1662c10394d7890010ecd658fb8d", + "id": "nmdc:7d1f1662c10394d7890010ecd658fb8d", + "file_size_bytes": 166480554 + }, + { + "name": "Gp0119871_Structural annotation GFF file", + "description": "Structural annotation GFF file for Gp0119871", + "data_object_type": "Structural Annotation GFF", + "url": "https://data.microbiomedata.org/data/nmdc:mga06akz86/annotation/nmdc_mga06akz86_structural_annotation.gff", + "md5_checksum": "f50096a301c544b750982bd4624a22c2", + "id": "nmdc:f50096a301c544b750982bd4624a22c2", + "file_size_bytes": 2789 + }, + { + "name": "Gp0119871_Functional annotation GFF file", + "description": "Functional annotation GFF file for Gp0119871", + "data_object_type": "Functional Annotation GFF", + "url": "https://data.microbiomedata.org/data/nmdc:mga06akz86/annotation/nmdc_mga06akz86_functional_annotation.gff", + "md5_checksum": "40a3d52848d223f0dc702c8bc170ac08", + "id": "nmdc:40a3d52848d223f0dc702c8bc170ac08", + "file_size_bytes": 155722346 + }, + { + "name": "Gp0119871_KO TSV file", + "description": "KO TSV file for Gp0119871", + "data_object_type": "Annotation KEGG Orthology", + "url": "https://data.microbiomedata.org/data/nmdc:mga06akz86/annotation/nmdc_mga06akz86_ko.tsv", + "md5_checksum": "7c006a93f93ea27a51807507d6892ad2", + "id": "nmdc:7c006a93f93ea27a51807507d6892ad2", + "file_size_bytes": 20389095 + }, + { + "name": "Gp0119871_EC TSV file", + "description": "EC TSV file for Gp0119871", + "data_object_type": "Annotation Enzyme Commission", + "url": "https://data.microbiomedata.org/data/nmdc:mga06akz86/annotation/nmdc_mga06akz86_ec.tsv", + "md5_checksum": "96494f25686c97972f89365f97e0789e", + "id": "nmdc:96494f25686c97972f89365f97e0789e", + "file_size_bytes": 13155738 + }, + { + "name": "Gp0119871_COG GFF file", + "description": "COG GFF file for Gp0119871", + "url": "https://data.microbiomedata.org/data/nmdc:mga06akz86/annotation/nmdc_mga06akz86_cog.gff", + "md5_checksum": "43580863f0544bc993d135eb45336133", + "id": "nmdc:43580863f0544bc993d135eb45336133", + "file_size_bytes": 97779003 + }, + { + "name": "Gp0119871_PFAM GFF file", + "description": "PFAM GFF file for Gp0119871", + "url": "https://data.microbiomedata.org/data/nmdc:mga06akz86/annotation/nmdc_mga06akz86_pfam.gff", + "md5_checksum": "5b7a6250e2f0ac56c3ffe2388759231d", + "id": "nmdc:5b7a6250e2f0ac56c3ffe2388759231d", + "file_size_bytes": 89921771 + }, + { + "name": "Gp0119871_TigrFam GFF file", + "description": "TigrFam GFF file for Gp0119871", + "url": "https://data.microbiomedata.org/data/nmdc:mga06akz86/annotation/nmdc_mga06akz86_tigrfam.gff", + "md5_checksum": "fe75d12ff308b517d79e93efd3957715", + "id": "nmdc:fe75d12ff308b517d79e93efd3957715", + "file_size_bytes": 15193921 + }, + { + "name": "Gp0119871_SMART GFF file", + "description": "SMART GFF file for Gp0119871", + "url": "https://data.microbiomedata.org/data/nmdc:mga06akz86/annotation/nmdc_mga06akz86_smart.gff", + "md5_checksum": "f79ec9083c0c9ae63a3e0753645e3189", + "id": "nmdc:f79ec9083c0c9ae63a3e0753645e3189", + "file_size_bytes": 29009879 + }, + { + "name": "Gp0119871_SuperFam GFF file", + "description": "SuperFam GFF file for Gp0119871", + "url": "https://data.microbiomedata.org/data/nmdc:mga06akz86/annotation/nmdc_mga06akz86_supfam.gff", + "md5_checksum": "9006b9acf9eab67dd33e9cdcb86a5140", + "id": "nmdc:9006b9acf9eab67dd33e9cdcb86a5140", + "file_size_bytes": 125112112 + }, + { + "name": "Gp0119871_Cath FunFam GFF file", + "description": "Cath FunFam GFF file for Gp0119871", + "url": "https://data.microbiomedata.org/data/nmdc:mga06akz86/annotation/nmdc_mga06akz86_cath_funfam.gff", + "md5_checksum": "7551c18b7ffe271f9c6304e6c6693876", + "id": "nmdc:7551c18b7ffe271f9c6304e6c6693876", + "file_size_bytes": 112029151 + }, + { + "name": "Gp0119871_KO_EC GFF file", + "description": "KO_EC GFF file for Gp0119871", + "url": "https://data.microbiomedata.org/data/nmdc:mga06akz86/annotation/nmdc_mga06akz86_ko_ec.gff", + "md5_checksum": "f39ebbf448dec2cf26a30206f420c328", + "id": "nmdc:f39ebbf448dec2cf26a30206f420c328", + "file_size_bytes": 64818609 + }, + { + "name": "gold:Gp0452679_metabat2 bin checkm quality assessment result", + "description": "metabat2 bin checkm quality assessment result for gold:Gp0452679", + "data_object_type": "CheckM Statistics", + "url": "https://data.microbiomedata.org/data/nmdc:mga0fsjy84/MAGs/nmdc_mga0fsjy84_checkm_qa.out", + "md5_checksum": "d41d8cd98f00b204e9800998ecf8427e", + "id": "nmdc:d41d8cd98f00b204e9800998ecf8427e", + "file_size_bytes": 0 + }, + { + "name": "Gp0119871_tooShort (< 3kb) filtered contigs fasta file by metaBat2", + "description": "tooShort (< 3kb) filtered contigs fasta file by metaBat2 for Gp0119871", + "url": "https://data.microbiomedata.org/data/nmdc:mga06akz86/MAGs/nmdc_mga06akz86_bins.tooShort.fa", + "md5_checksum": "8d6cf34db674d451277709fe676d979a", + "id": "nmdc:8d6cf34db674d451277709fe676d979a", + "file_size_bytes": 145188707 + }, + { + "name": "Gp0119871_unbinned fasta file from metabat2", + "description": "unbinned fasta file from metabat2 for Gp0119871", + "url": "https://data.microbiomedata.org/data/nmdc:mga06akz86/MAGs/nmdc_mga06akz86_bins.unbinned.fa", + "md5_checksum": "afdb1f847a73fa075b6e5220467efd66", + "id": "nmdc:afdb1f847a73fa075b6e5220467efd66", + "file_size_bytes": 86156430 + }, + { + "name": "Gp0119871_metabat2 bin checkm quality assessment result", + "description": "metabat2 bin checkm quality assessment result for Gp0119871", + "data_object_type": "CheckM Statistics", + "url": "https://data.microbiomedata.org/data/nmdc:mga06akz86/MAGs/nmdc_mga06akz86_checkm_qa.out", + "md5_checksum": "58c90ee508e2c58a08675bae9b1613cf", + "id": "nmdc:58c90ee508e2c58a08675bae9b1613cf", + "file_size_bytes": 7656 + }, + { + "name": "Gp0119871_high-quality and medium-quality bins", + "description": "high-quality and medium-quality bins for Gp0119871", + "data_object_type": "Metagenome Bins", + "url": "https://data.microbiomedata.org/data/nmdc:mga06akz86/MAGs/nmdc_mga06akz86_hqmq_bin.zip", + "md5_checksum": "96655b86195384992cc636c5c2e024b6", + "id": "nmdc:96655b86195384992cc636c5c2e024b6", + "file_size_bytes": 16583626 + }, + { + "name": "Gp0119871_metabat2 bins", + "description": "metabat2 bins for Gp0119871", + "url": "https://data.microbiomedata.org/data/nmdc:mga06akz86/MAGs/nmdc_mga06akz86_metabat_bin.zip", + "md5_checksum": "a5e5088a7709dbfbbaf78b4a269e8cd2", + "id": "nmdc:a5e5088a7709dbfbbaf78b4a269e8cd2", + "file_size_bytes": 15935902 + }, + { + "_id": { + "$oid": "649b003d1ae706d7b5b14e3e" + }, + "description": "Assembled contigs fasta for gold:Gp0119871", + "url": "https://data.microbiomedata.org/data/1777_96350/assembly/assembly_contigs.fna", + "file_size_bytes": 336393385, + "type": "nmdc:DataObject", + "id": "nmdc:b35b812f128516b0d12910c71df77b1c", + "name": "assembly_contigs.fna", + "data_object_type": "Assembly Contigs" + }, + { + "_id": { + "$oid": "649b003d1ae706d7b5b14e3f" + }, + "description": "Assembled scaffold fasta for gold:Gp0119871", + "url": "https://data.microbiomedata.org/data/1777_96350/assembly/assembly_scaffolds.fna", + "file_size_bytes": 335335781, + "type": "nmdc:DataObject", + "id": "nmdc:3028a2ae3b759c0f108d3af82d75f058", + "name": "assembly_scaffolds.fna", + "data_object_type": "Assembly Scaffolds" + }, + { + "_id": { + "$oid": "649b003d1ae706d7b5b14e40" + }, + "description": "Assembled AGP file for gold:Gp0119871", + "url": "https://data.microbiomedata.org/data/1777_96350/assembly/assembly.agp", + "file_size_bytes": 22817193, + "type": "nmdc:DataObject", + "id": "nmdc:d35d3b5adef61165bb14b7b2c24dac1e", + "name": "assembly.agp", + "data_object_type": "Assembly AGP" + }, + { + "_id": { + "$oid": "649b003d1ae706d7b5b14e41" + }, + "description": "Metagenome Contig Coverage Stats for gold:Gp0119871", + "url": "https://data.microbiomedata.org/data/1777_96350/assembly/mapping_stats.txt", + "file_size_bytes": 26369472, + "type": "nmdc:DataObject", + "id": "nmdc:0ab8fdb1b8d9815540cf3acec47aba8b", + "name": "mapping_stats.txt", + "data_object_type": "Assembly Coverage Stats" + }, + { + "_id": { + "$oid": "649b003d1ae706d7b5b14e42" + }, + "description": "Metagenome Alignment BAM file for gold:Gp0119871", + "url": "https://data.microbiomedata.org/data/1777_96350/assembly/pairedMapped_sorted.bam", + "file_size_bytes": 5136711565, + "type": "nmdc:DataObject", + "id": "nmdc:895ef3f68ba17c79f504a56ab63aeb37", + "name": "pairedMapped_sorted.bam", + "data_object_type": "Assembly Coverage BAM" + }, + { + "_id": { + "$oid": "649b003d1ae706d7b5b15ada" + }, + "id": "nmdc:24fefda69c4669b2a000ed07d3e510f3", + "name": "1777_96350.krona.html", + "description": "Gold:Gp0119871 KRONA plot HTML file", + "url": "https://data.microbiomedata.org/1777_96350/ReadbasedAnalysis/centrifuge/1777_96350.krona.html", + "file_size_bytes": 3385, + "data_object_type": "Centrifuge Krona Plot", + "type": "nmdc:DataObject" + }, + { + "_id": { + "$oid": "649b003d1ae706d7b5b15ae1" + }, + "id": "nmdc:26c800225370d0e116b23805dc4e8f24", + "name": "1777_96350.json", + "description": "Gold:Gp0119871 ReadbasedAnalysis result JSON file", + "url": "https://data.microbiomedata.org/1777_96350/ReadbasedAnalysis/1777_96350.json", + "file_size_bytes": 3385, + "type": "nmdc:DataObject" + }, + { + "_id": { + "$oid": "649b003f1ae706d7b5b1655b" + }, + "id": "nmdc:c27c9feafe53ce1518e5087f30cc2c07", + "name": "bins.tooShort.fa", + "description": "tooShort (< 3kb) filtered contigs fasta file by metaBat2 for gold:Gp0119871", + "file_size_bytes": 141360154, + "url": "https://data.microbiomedata.org/data/1777_96350/img_MAGs/bins.tooShort.fa", + "type": "nmdc:DataObject" + }, + { + "_id": { + "$oid": "649b003f1ae706d7b5b16562" + }, + "id": "nmdc:d3cc00645e3f3d448e4e77668018e867", + "name": "checkm_qa.out", + "description": "metabat2 bin checkm quality assessment result for gold:Gp0119871", + "file_size_bytes": 14448, + "url": "https://data.microbiomedata.org/data/1777_96350/img_MAGs/checkm_qa.out", + "type": "nmdc:DataObject", + "data_object_type": "CheckM Statistics" + }, + { + "_id": { + "$oid": "649b003f1ae706d7b5b16563" + }, + "id": "nmdc:71cea65032cbca74a1a75ea509309eae", + "name": "bins.unbinned.fa", + "description": "unbinned fasta file from metabat2 for gold:Gp0119871", + "file_size_bytes": 95384435, + "url": "https://data.microbiomedata.org/data/1777_96350/img_MAGs/bins.unbinned.fa", + "type": "nmdc:DataObject" + }, + { + "_id": { + "$oid": "649b003f1ae706d7b5b16565" + }, + "id": "nmdc:0aeeda05f961abfc94a5cdee482f2c63", + "name": "gold:Gp0119871.bins.26.fa", + "description": "metabat2 binned contig file for gold:Gp0119871", + "file_size_bytes": 4976150, + "url": "https://data.microbiomedata.org/data/1777_96350/img_MAGs/metabat-bins/bins.26.fa", + "type": "nmdc:DataObject", + "data_object_type": "Metagenome Bins" + }, + { + "_id": { + "$oid": "649b003f1ae706d7b5b16566" + }, + "id": "nmdc:968e8b0b0d161493be3a35154cad6974", + "name": "gold:Gp0119871.bins.67.fa", + "description": "metabat2 binned contig file for gold:Gp0119871", + "file_size_bytes": 493041, + "url": "https://data.microbiomedata.org/data/1777_96350/img_MAGs/metabat-bins/bins.67.fa", + "type": "nmdc:DataObject", + "data_object_type": "Metagenome Bins" + }, + { + "_id": { + "$oid": "649b003f1ae706d7b5b16568" + }, + "id": "nmdc:551e520e7931c10a904cd08db8dc9e72", + "name": "gtdbtk.bac120.summary.tsv", + "description": "gtdbtk bacterial assignment result summary table for gold:Gp0119871", + "file_size_bytes": 6772, + "url": "https://data.microbiomedata.org/data/1777_96350/img_MAGs/gtdbtk_output/classify/gtdbtk.bac120.summary.tsv", + "type": "nmdc:DataObject" + }, + { + "_id": { + "$oid": "649b003f1ae706d7b5b16569" + }, + "id": "nmdc:21a21f0413b0c80bd9ccb960ca262f3d", + "name": "gold:Gp0119871.bins.24.fa", + "description": "metabat2 binned contig file for gold:Gp0119871", + "file_size_bytes": 2943004, + "url": "https://data.microbiomedata.org/data/1777_96350/img_MAGs/metabat-bins/bins.24.fa", + "type": "nmdc:DataObject", + "data_object_type": "Metagenome Bins" + }, + { + "_id": { + "$oid": "649b003f1ae706d7b5b1656a" + }, + "id": "nmdc:4aa754321195ff914f853c3a3ab8d1e3", + "name": "gold:Gp0119871.bins.18.fa", + "description": "metabat2 binned contig file for gold:Gp0119871", + "file_size_bytes": 1809212, + "url": "https://data.microbiomedata.org/data/1777_96350/img_MAGs/metabat-bins/bins.18.fa", + "type": "nmdc:DataObject", + "data_object_type": "Metagenome Bins" + }, + { + "_id": { + "$oid": "649b003f1ae706d7b5b1656b" + }, + "id": "nmdc:96003d3c0a17c13868fdac7ffc315990", + "name": "gold:Gp0119871.bins.4.fa", + "description": "metabat2 binned contig file for gold:Gp0119871", + "file_size_bytes": 3085422, + "url": "https://data.microbiomedata.org/data/1777_96350/img_MAGs/metabat-bins/bins.4.fa", + "type": "nmdc:DataObject", + "data_object_type": "Metagenome Bins" + }, + { + "_id": { + "$oid": "649b003f1ae706d7b5b1656c" + }, + "id": "nmdc:737c3edc7351dbe27eff6a1dee4ba21a", + "name": "gold:Gp0119871.bins.13.fa", + "description": "hqmq binned contig file for gold:Gp0119871", + "file_size_bytes": 2490452, + "url": "https://data.microbiomedata.org/data/1777_96350/img_MAGs/hqmq-metabat-bins/bins.13.fa", + "type": "nmdc:DataObject", + "data_object_type": "Metagenome Bins" + }, + { + "_id": { + "$oid": "649b003f1ae706d7b5b1656d" + }, + "id": "nmdc:87ff75ed1fbff0f33d10147d0cb71b2f", + "name": "gold:Gp0119871.bins.43.fa", + "description": "metabat2 binned contig file for gold:Gp0119871", + "file_size_bytes": 4016592, + "url": "https://data.microbiomedata.org/data/1777_96350/img_MAGs/metabat-bins/bins.43.fa", + "type": "nmdc:DataObject", + "data_object_type": "Metagenome Bins" + }, + { + "_id": { + "$oid": "649b003f1ae706d7b5b1656e" + }, + "id": "nmdc:aa03847b81904530fe767876a0a7fa32", + "name": "gold:Gp0119871.bins.23.fa", + "description": "metabat2 binned contig file for gold:Gp0119871", + "file_size_bytes": 3105731, + "url": "https://data.microbiomedata.org/data/1777_96350/img_MAGs/metabat-bins/bins.23.fa", + "type": "nmdc:DataObject", + "data_object_type": "Metagenome Bins" + }, + { + "_id": { + "$oid": "649b003f1ae706d7b5b1656f" + }, + "id": "nmdc:a3c4bf87c91ee29e1205ed3dbb77dabc", + "name": "gold:Gp0119871.bins.73.fa", + "description": "metabat2 binned contig file for gold:Gp0119871", + "file_size_bytes": 1613573, + "url": "https://data.microbiomedata.org/data/1777_96350/img_MAGs/metabat-bins/bins.73.fa", + "type": "nmdc:DataObject", + "data_object_type": "Metagenome Bins" + }, + { + "_id": { + "$oid": "649b003f1ae706d7b5b16570" + }, + "id": "nmdc:e30399ecf03e73e1f22f8421796e6e95", + "name": "gold:Gp0119871.bins.31.fa", + "description": "metabat2 binned contig file for gold:Gp0119871", + "file_size_bytes": 2666995, + "url": "https://data.microbiomedata.org/data/1777_96350/img_MAGs/metabat-bins/bins.31.fa", + "type": "nmdc:DataObject", + "data_object_type": "Metagenome Bins" + }, + { + "_id": { + "$oid": "649b003f1ae706d7b5b16571" + }, + "id": "nmdc:a867d8ab87c295faaf27c690135b4a16", + "name": "gold:Gp0119871.bins.9.fa", + "description": "metabat2 binned contig file for gold:Gp0119871", + "file_size_bytes": 694791, + "url": "https://data.microbiomedata.org/data/1777_96350/img_MAGs/metabat-bins/bins.9.fa", + "type": "nmdc:DataObject", + "data_object_type": "Metagenome Bins" + }, + { + "_id": { + "$oid": "649b003f1ae706d7b5b16573" + }, + "id": "nmdc:681ccac6a3041dfd54d7c90f67789b16", + "name": "gold:Gp0119871.bins.28.fa", + "description": "metabat2 binned contig file for gold:Gp0119871", + "file_size_bytes": 1082661, + "url": "https://data.microbiomedata.org/data/1777_96350/img_MAGs/metabat-bins/bins.28.fa", + "type": "nmdc:DataObject", + "data_object_type": "Metagenome Bins" + }, + { + "_id": { + "$oid": "649b003f1ae706d7b5b16574" + }, + "id": "nmdc:2a0b9424c8bee8992d6448b73612c19e", + "name": "gold:Gp0119871.bins.58.fa", + "description": "metabat2 binned contig file for gold:Gp0119871", + "file_size_bytes": 272300, + "url": "https://data.microbiomedata.org/data/1777_96350/img_MAGs/metabat-bins/bins.58.fa", + "type": "nmdc:DataObject", + "data_object_type": "Metagenome Bins" + }, + { + "_id": { + "$oid": "649b003f1ae706d7b5b16575" + }, + "id": "nmdc:a581d3d00a0daec9e5b412af9e3bd30b", + "name": "gold:Gp0119871.bins.40.fa", + "description": "metabat2 binned contig file for gold:Gp0119871", + "file_size_bytes": 2617222, + "url": "https://data.microbiomedata.org/data/1777_96350/img_MAGs/metabat-bins/bins.40.fa", + "type": "nmdc:DataObject", + "data_object_type": "Metagenome Bins" + }, + { + "_id": { + "$oid": "649b003f1ae706d7b5b16576" + }, + "id": "nmdc:7fb09e5e893fdcee486442eec81cd4a4", + "name": "gold:Gp0119871.bins.66.fa", + "description": "metabat2 binned contig file for gold:Gp0119871", + "file_size_bytes": 1107431, + "url": "https://data.microbiomedata.org/data/1777_96350/img_MAGs/metabat-bins/bins.66.fa", + "type": "nmdc:DataObject", + "data_object_type": "Metagenome Bins" + }, + { + "_id": { + "$oid": "649b003f1ae706d7b5b16577" + }, + "id": "nmdc:49c652e8385c6356f7c8245f1553b579", + "name": "gold:Gp0119871.bins.35.fa", + "description": "metabat2 binned contig file for gold:Gp0119871", + "file_size_bytes": 622288, + "url": "https://data.microbiomedata.org/data/1777_96350/img_MAGs/metabat-bins/bins.35.fa", + "type": "nmdc:DataObject", + "data_object_type": "Metagenome Bins" + }, + { + "_id": { + "$oid": "649b003f1ae706d7b5b16578" + }, + "id": "nmdc:a5a64ee4da0cc628479a7ee94159b8df", + "name": "gold:Gp0119871.bins.53.fa", + "description": "metabat2 binned contig file for gold:Gp0119871", + "file_size_bytes": 1668723, + "url": "https://data.microbiomedata.org/data/1777_96350/img_MAGs/metabat-bins/bins.53.fa", + "type": "nmdc:DataObject", + "data_object_type": "Metagenome Bins" + }, + { + "_id": { + "$oid": "649b003f1ae706d7b5b16579" + }, + "id": "nmdc:0d0e0316265011294146b751b1484121", + "name": "gold:Gp0119871.bins.41.fa", + "description": "metabat2 binned contig file for gold:Gp0119871", + "file_size_bytes": 1775119, + "url": "https://data.microbiomedata.org/data/1777_96350/img_MAGs/metabat-bins/bins.41.fa", + "type": "nmdc:DataObject", + "data_object_type": "Metagenome Bins" + }, + { + "_id": { + "$oid": "649b003f1ae706d7b5b1657a" + }, + "id": "nmdc:177b496c26ed474e5c63cfd8cfb2e003", + "name": "gold:Gp0119871.bins.5.fa", + "description": "metabat2 binned contig file for gold:Gp0119871", + "file_size_bytes": 309891, + "url": "https://data.microbiomedata.org/data/1777_96350/img_MAGs/metabat-bins/bins.5.fa", + "type": "nmdc:DataObject", + "data_object_type": "Metagenome Bins" + }, + { + "_id": { + "$oid": "649b003f1ae706d7b5b1657b" + }, + "id": "nmdc:04d7f031c996398a22928b5d1a695049", + "name": "gold:Gp0119871.bins.47.fa", + "description": "metabat2 binned contig file for gold:Gp0119871", + "file_size_bytes": 833389, + "url": "https://data.microbiomedata.org/data/1777_96350/img_MAGs/metabat-bins/bins.47.fa", + "type": "nmdc:DataObject", + "data_object_type": "Metagenome Bins" + }, + { + "_id": { + "$oid": "649b003f1ae706d7b5b1657c" + }, + "id": "nmdc:fffb4b54502c8b68dca3a1286c61528b", + "name": "gold:Gp0119871.bins.46.fa", + "description": "metabat2 binned contig file for gold:Gp0119871", + "file_size_bytes": 1031503, + "url": "https://data.microbiomedata.org/data/1777_96350/img_MAGs/metabat-bins/bins.46.fa", + "type": "nmdc:DataObject", + "data_object_type": "Metagenome Bins" + }, + { + "_id": { + "$oid": "649b003f1ae706d7b5b1657d" + }, + "id": "nmdc:bb4cf377efbcac5ac5f4d9346c0c9ebb", + "name": "gold:Gp0119871.bins.32.fa", + "description": "metabat2 binned contig file for gold:Gp0119871", + "file_size_bytes": 3124555, + "url": "https://data.microbiomedata.org/data/1777_96350/img_MAGs/metabat-bins/bins.32.fa", + "type": "nmdc:DataObject", + "data_object_type": "Metagenome Bins" + }, + { + "_id": { + "$oid": "649b003f1ae706d7b5b1657e" + }, + "id": "nmdc:1c17a30493a6ab9a0341b5cf0566f21b", + "name": "gold:Gp0119871.bins.62.fa", + "description": "metabat2 binned contig file for gold:Gp0119871", + "file_size_bytes": 548425, + "url": "https://data.microbiomedata.org/data/1777_96350/img_MAGs/metabat-bins/bins.62.fa", + "type": "nmdc:DataObject", + "data_object_type": "Metagenome Bins" + }, + { + "_id": { + "$oid": "649b003f1ae706d7b5b1657f" + }, + "id": "nmdc:b036ddb7c91ce80e3f87773ac6fc1c78", + "name": "gold:Gp0119871.bins.44.fa", + "description": "metabat2 binned contig file for gold:Gp0119871", + "file_size_bytes": 513743, + "url": "https://data.microbiomedata.org/data/1777_96350/img_MAGs/metabat-bins/bins.44.fa", + "type": "nmdc:DataObject", + "data_object_type": "Metagenome Bins" + }, + { + "_id": { + "$oid": "649b003f1ae706d7b5b16580" + }, + "id": "nmdc:26724cd791652fe13fbf47b033cb8fe2", + "name": "gold:Gp0119871.bins.64.fa", + "description": "metabat2 binned contig file for gold:Gp0119871", + "file_size_bytes": 392512, + "url": "https://data.microbiomedata.org/data/1777_96350/img_MAGs/metabat-bins/bins.64.fa", + "type": "nmdc:DataObject", + "data_object_type": "Metagenome Bins" + }, + { + "_id": { + "$oid": "649b003f1ae706d7b5b16581" + }, + "id": "nmdc:7dec130a92b405df816e50c7dd0d83c5", + "name": "gold:Gp0119871.bins.29.fa", + "description": "metabat2 binned contig file for gold:Gp0119871", + "file_size_bytes": 344119, + "url": "https://data.microbiomedata.org/data/1777_96350/img_MAGs/metabat-bins/bins.29.fa", + "type": "nmdc:DataObject", + "data_object_type": "Metagenome Bins" + }, + { + "_id": { + "$oid": "649b003f1ae706d7b5b16582" + }, + "id": "nmdc:d09940a195b0f8ed77db288ad4723025", + "name": "gold:Gp0119871.bins.68.fa", + "description": "metabat2 binned contig file for gold:Gp0119871", + "file_size_bytes": 463150, + "url": "https://data.microbiomedata.org/data/1777_96350/img_MAGs/metabat-bins/bins.68.fa", + "type": "nmdc:DataObject", + "data_object_type": "Metagenome Bins" + }, + { + "_id": { + "$oid": "649b003f1ae706d7b5b16583" + }, + "id": "nmdc:9f89dfbdd2b4944a14d2282e3ecfe05a", + "name": "gold:Gp0119871.bins.25.fa", + "description": "metabat2 binned contig file for gold:Gp0119871", + "file_size_bytes": 860996, + "url": "https://data.microbiomedata.org/data/1777_96350/img_MAGs/metabat-bins/bins.25.fa", + "type": "nmdc:DataObject", + "data_object_type": "Metagenome Bins" + }, + { + "_id": { + "$oid": "649b003f1ae706d7b5b16584" + }, + "id": "nmdc:76cf4b14197f59effdd4e973fcab3401", + "name": "gold:Gp0119871.bins.22.fa", + "description": "metabat2 binned contig file for gold:Gp0119871", + "file_size_bytes": 370238, + "url": "https://data.microbiomedata.org/data/1777_96350/img_MAGs/metabat-bins/bins.22.fa", + "type": "nmdc:DataObject", + "data_object_type": "Metagenome Bins" + }, + { + "_id": { + "$oid": "649b003f1ae706d7b5b16585" + }, + "id": "nmdc:2275a15782cca5c661c5106d688de292", + "name": "gold:Gp0119871.bins.80.fa", + "description": "metabat2 binned contig file for gold:Gp0119871", + "file_size_bytes": 1305800, + "url": "https://data.microbiomedata.org/data/1777_96350/img_MAGs/metabat-bins/bins.80.fa", + "type": "nmdc:DataObject", + "data_object_type": "Metagenome Bins" + }, + { + "_id": { + "$oid": "649b003f1ae706d7b5b16586" + }, + "id": "nmdc:211702fea4173b724697095491bea64a", + "name": "gold:Gp0119871.bins.50.fa", + "description": "metabat2 binned contig file for gold:Gp0119871", + "file_size_bytes": 1775354, + "url": "https://data.microbiomedata.org/data/1777_96350/img_MAGs/metabat-bins/bins.50.fa", + "type": "nmdc:DataObject", + "data_object_type": "Metagenome Bins" + }, + { + "_id": { + "$oid": "649b003f1ae706d7b5b16587" + }, + "id": "nmdc:a322827b9594d565353297b020d67ee5", + "name": "gold:Gp0119871.bins.19.fa", + "description": "metabat2 binned contig file for gold:Gp0119871", + "file_size_bytes": 594813, + "url": "https://data.microbiomedata.org/data/1777_96350/img_MAGs/metabat-bins/bins.19.fa", + "type": "nmdc:DataObject", + "data_object_type": "Metagenome Bins" + }, + { + "_id": { + "$oid": "649b003f1ae706d7b5b16588" + }, + "id": "nmdc:402b674a51ddc59e8b59bddf5e9a7d06", + "name": "gold:Gp0119871.bins.42.fa", + "description": "metabat2 binned contig file for gold:Gp0119871", + "file_size_bytes": 365452, + "url": "https://data.microbiomedata.org/data/1777_96350/img_MAGs/metabat-bins/bins.42.fa", + "type": "nmdc:DataObject", + "data_object_type": "Metagenome Bins" + }, + { + "_id": { + "$oid": "649b003f1ae706d7b5b16589" + }, + "id": "nmdc:e9038fc3a023caf7a6cba19f585f27ac", + "name": "gold:Gp0119871.bins.38.fa", + "description": "metabat2 binned contig file for gold:Gp0119871", + "file_size_bytes": 350003, + "url": "https://data.microbiomedata.org/data/1777_96350/img_MAGs/metabat-bins/bins.38.fa", + "type": "nmdc:DataObject", + "data_object_type": "Metagenome Bins" + }, + { + "_id": { + "$oid": "649b003f1ae706d7b5b1658a" + }, + "id": "nmdc:9c5a42f661f7f8157fefef433ac18612", + "name": "gold:Gp0119871.bins.63.fa", + "description": "metabat2 binned contig file for gold:Gp0119871", + "file_size_bytes": 524875, + "url": "https://data.microbiomedata.org/data/1777_96350/img_MAGs/metabat-bins/bins.63.fa", + "type": "nmdc:DataObject", + "data_object_type": "Metagenome Bins" + }, + { + "_id": { + "$oid": "649b003f1ae706d7b5b1658b" + }, + "id": "nmdc:f9084455a3a6fb2bb44ab2caefeb2345", + "name": "gold:Gp0119871.bins.55.fa", + "description": "metabat2 binned contig file for gold:Gp0119871", + "file_size_bytes": 498834, + "url": "https://data.microbiomedata.org/data/1777_96350/img_MAGs/metabat-bins/bins.55.fa", + "type": "nmdc:DataObject", + "data_object_type": "Metagenome Bins" + }, + { + "_id": { + "$oid": "649b003f1ae706d7b5b1658c" + }, + "id": "nmdc:dc5ed50412dc81599e5cbc52dcd959a1", + "name": "gold:Gp0119871.bins.79.fa", + "description": "metabat2 binned contig file for gold:Gp0119871", + "file_size_bytes": 634511, + "url": "https://data.microbiomedata.org/data/1777_96350/img_MAGs/metabat-bins/bins.79.fa", + "type": "nmdc:DataObject", + "data_object_type": "Metagenome Bins" + }, + { + "_id": { + "$oid": "649b003f1ae706d7b5b1658d" + }, + "id": "nmdc:be2953e338f4b104d4842ce90db3c353", + "name": "gold:Gp0119871.bins.59.fa", + "description": "metabat2 binned contig file for gold:Gp0119871", + "file_size_bytes": 209826, + "url": "https://data.microbiomedata.org/data/1777_96350/img_MAGs/metabat-bins/bins.59.fa", + "type": "nmdc:DataObject", + "data_object_type": "Metagenome Bins" + }, + { + "_id": { + "$oid": "649b003f1ae706d7b5b1658e" + }, + "id": "nmdc:bf191ce73386f232fd1abcfadacc1e3e", + "name": "gold:Gp0119871.bins.45.fa", + "description": "metabat2 binned contig file for gold:Gp0119871", + "file_size_bytes": 727089, + "url": "https://data.microbiomedata.org/data/1777_96350/img_MAGs/metabat-bins/bins.45.fa", + "type": "nmdc:DataObject", + "data_object_type": "Metagenome Bins" + }, + { + "_id": { + "$oid": "649b003f1ae706d7b5b1658f" + }, + "id": "nmdc:d1e72190a40659e23665439cefe89388", + "name": "gold:Gp0119871.bins.77.fa", + "description": "metabat2 binned contig file for gold:Gp0119871", + "file_size_bytes": 979570, + "url": "https://data.microbiomedata.org/data/1777_96350/img_MAGs/metabat-bins/bins.77.fa", + "type": "nmdc:DataObject", + "data_object_type": "Metagenome Bins" + }, + { + "_id": { + "$oid": "649b003f1ae706d7b5b16590" + }, + "id": "nmdc:bc2910cbccae3f7786dd4119fd0901bd", + "name": "gold:Gp0119871.bins.39.fa", + "description": "metabat2 binned contig file for gold:Gp0119871", + "file_size_bytes": 631596, + "url": "https://data.microbiomedata.org/data/1777_96350/img_MAGs/metabat-bins/bins.39.fa", + "type": "nmdc:DataObject", + "data_object_type": "Metagenome Bins" + }, + { + "_id": { + "$oid": "649b003f1ae706d7b5b16591" + }, + "id": "nmdc:f44dc681d6a163ccbac8af970ceb34d3", + "name": "gold:Gp0119871.bins.3.fa", + "description": "metabat2 binned contig file for gold:Gp0119871", + "file_size_bytes": 513862, + "url": "https://data.microbiomedata.org/data/1777_96350/img_MAGs/metabat-bins/bins.3.fa", + "type": "nmdc:DataObject", + "data_object_type": "Metagenome Bins" + }, + { + "_id": { + "$oid": "649b003f1ae706d7b5b16592" + }, + "id": "nmdc:3d9ace91b8f3135e92044c54db24933b", + "name": "gold:Gp0119871.bins.56.fa", + "description": "metabat2 binned contig file for gold:Gp0119871", + "file_size_bytes": 927007, + "url": "https://data.microbiomedata.org/data/1777_96350/img_MAGs/metabat-bins/bins.56.fa", + "type": "nmdc:DataObject", + "data_object_type": "Metagenome Bins" + }, + { + "_id": { + "$oid": "649b003f1ae706d7b5b16593" + }, + "id": "nmdc:d1629c6352134ef76e84dbd84ec9dabd", + "name": "gold:Gp0119871.bins.76.fa", + "description": "metabat2 binned contig file for gold:Gp0119871", + "file_size_bytes": 220267, + "url": "https://data.microbiomedata.org/data/1777_96350/img_MAGs/metabat-bins/bins.76.fa", + "type": "nmdc:DataObject", + "data_object_type": "Metagenome Bins" + }, + { + "_id": { + "$oid": "649b003f1ae706d7b5b16594" + }, + "id": "nmdc:9619e834021d7b04c986ebb19a8d7f06", + "name": "gold:Gp0119871.bins.72.fa", + "description": "metabat2 binned contig file for gold:Gp0119871", + "file_size_bytes": 316398, + "url": "https://data.microbiomedata.org/data/1777_96350/img_MAGs/metabat-bins/bins.72.fa", + "type": "nmdc:DataObject", + "data_object_type": "Metagenome Bins" + }, + { + "_id": { + "$oid": "649b003f1ae706d7b5b16595" + }, + "id": "nmdc:e5f0db5698aa2c4e336be8224e9cf7ac", + "name": "gold:Gp0119871.bins.2.fa", + "description": "metabat2 binned contig file for gold:Gp0119871", + "file_size_bytes": 272462, + "url": "https://data.microbiomedata.org/data/1777_96350/img_MAGs/metabat-bins/bins.2.fa", + "type": "nmdc:DataObject", + "data_object_type": "Metagenome Bins" + }, + { + "_id": { + "$oid": "649b003f1ae706d7b5b16596" + }, + "id": "nmdc:168a6b287d90adb577d2f3c44344b7fb", + "name": "gold:Gp0119871.bins.71.fa", + "description": "metabat2 binned contig file for gold:Gp0119871", + "file_size_bytes": 2924017, + "url": "https://data.microbiomedata.org/data/1777_96350/img_MAGs/metabat-bins/bins.71.fa", + "type": "nmdc:DataObject", + "data_object_type": "Metagenome Bins" + }, + { + "_id": { + "$oid": "649b003f1ae706d7b5b16597" + }, + "id": "nmdc:9ef51fb856b8f8aa931d14b6f08c01d9", + "name": "gold:Gp0119871.bins.69.fa", + "description": "metabat2 binned contig file for gold:Gp0119871", + "file_size_bytes": 781858, + "url": "https://data.microbiomedata.org/data/1777_96350/img_MAGs/metabat-bins/bins.69.fa", + "type": "nmdc:DataObject", + "data_object_type": "Metagenome Bins" + }, + { + "_id": { + "$oid": "649b003f1ae706d7b5b16598" + }, + "id": "nmdc:c4b1b0277e3b62d802dcc5d1f4bbe1b5", + "name": "gold:Gp0119871.bins.34.fa", + "description": "metabat2 binned contig file for gold:Gp0119871", + "file_size_bytes": 2074729, + "url": "https://data.microbiomedata.org/data/1777_96350/img_MAGs/metabat-bins/bins.34.fa", + "type": "nmdc:DataObject", + "data_object_type": "Metagenome Bins" + }, + { + "_id": { + "$oid": "649b003f1ae706d7b5b16599" + }, + "id": "nmdc:cece62511618862e8fe6e3fc7c2e5291", + "name": "gold:Gp0119871.bins.70.fa", + "description": "metabat2 binned contig file for gold:Gp0119871", + "file_size_bytes": 833866, + "url": "https://data.microbiomedata.org/data/1777_96350/img_MAGs/metabat-bins/bins.70.fa", + "type": "nmdc:DataObject", + "data_object_type": "Metagenome Bins" + }, + { + "_id": { + "$oid": "649b003f1ae706d7b5b1659a" + }, + "id": "nmdc:afaf27d76f403b352c10b70f593e1501", + "name": "gold:Gp0119871.bins.37.fa", + "description": "metabat2 binned contig file for gold:Gp0119871", + "file_size_bytes": 247698, + "url": "https://data.microbiomedata.org/data/1777_96350/img_MAGs/metabat-bins/bins.37.fa", + "type": "nmdc:DataObject", + "data_object_type": "Metagenome Bins" + }, + { + "_id": { + "$oid": "649b003f1ae706d7b5b1659b" + }, + "id": "nmdc:354e592759972eacc410e283a062b159", + "name": "gold:Gp0119871.bins.11.fa", + "description": "metabat2 binned contig file for gold:Gp0119871", + "file_size_bytes": 571229, + "url": "https://data.microbiomedata.org/data/1777_96350/img_MAGs/metabat-bins/bins.11.fa", + "type": "nmdc:DataObject", + "data_object_type": "Metagenome Bins" + }, + { + "_id": { + "$oid": "649b003f1ae706d7b5b1659c" + }, + "id": "nmdc:170fcb9d04bdff84b082f042d79d0eea", + "name": "gold:Gp0119871.bins.21.fa", + "description": "metabat2 binned contig file for gold:Gp0119871", + "file_size_bytes": 745550, + "url": "https://data.microbiomedata.org/data/1777_96350/img_MAGs/metabat-bins/bins.21.fa", + "type": "nmdc:DataObject", + "data_object_type": "Metagenome Bins" + }, + { + "_id": { + "$oid": "649b003f1ae706d7b5b1659d" + }, + "id": "nmdc:66f0d2ff1b6c5910b016c3daa9c6e6d3", + "name": "gold:Gp0119871.bins.61.fa", + "description": "metabat2 binned contig file for gold:Gp0119871", + "file_size_bytes": 294517, + "url": "https://data.microbiomedata.org/data/1777_96350/img_MAGs/metabat-bins/bins.61.fa", + "type": "nmdc:DataObject", + "data_object_type": "Metagenome Bins" + }, + { + "_id": { + "$oid": "649b003f1ae706d7b5b1659e" + }, + "id": "nmdc:34a4bc13ace170270d4a45d06901e82f", + "name": "gold:Gp0119871.bins.75.fa", + "description": "metabat2 binned contig file for gold:Gp0119871", + "file_size_bytes": 575974, + "url": "https://data.microbiomedata.org/data/1777_96350/img_MAGs/metabat-bins/bins.75.fa", + "type": "nmdc:DataObject", + "data_object_type": "Metagenome Bins" + }, + { + "_id": { + "$oid": "649b003f1ae706d7b5b1659f" + }, + "id": "nmdc:db4195faa056efbbf84f34060a2247c6", + "name": "gold:Gp0119871.bins.7.fa", + "description": "metabat2 binned contig file for gold:Gp0119871", + "file_size_bytes": 496586, + "url": "https://data.microbiomedata.org/data/1777_96350/img_MAGs/metabat-bins/bins.7.fa", + "type": "nmdc:DataObject", + "data_object_type": "Metagenome Bins" + }, + { + "_id": { + "$oid": "649b003f1ae706d7b5b165a0" + }, + "id": "nmdc:ba2250bf0e9dcfc72d1441b0861ba753", + "name": "gold:Gp0119871.bins.54.fa", + "description": "metabat2 binned contig file for gold:Gp0119871", + "file_size_bytes": 285797, + "url": "https://data.microbiomedata.org/data/1777_96350/img_MAGs/metabat-bins/bins.54.fa", + "type": "nmdc:DataObject", + "data_object_type": "Metagenome Bins" + }, + { + "_id": { + "$oid": "649b003f1ae706d7b5b165a1" + }, + "id": "nmdc:e616b29132db32e5525cbe3cc877ed49", + "name": "gold:Gp0119871.bins.57.fa", + "description": "metabat2 binned contig file for gold:Gp0119871", + "file_size_bytes": 212520, + "url": "https://data.microbiomedata.org/data/1777_96350/img_MAGs/metabat-bins/bins.57.fa", + "type": "nmdc:DataObject", + "data_object_type": "Metagenome Bins" + }, + { + "_id": { + "$oid": "649b003f1ae706d7b5b165a2" + }, + "id": "nmdc:3150c80ab5e6fe2175596a81c8ffa819", + "name": "gold:Gp0119871.bins.27.fa", + "description": "metabat2 binned contig file for gold:Gp0119871", + "file_size_bytes": 363720, + "url": "https://data.microbiomedata.org/data/1777_96350/img_MAGs/metabat-bins/bins.27.fa", + "type": "nmdc:DataObject", + "data_object_type": "Metagenome Bins" + }, + { + "_id": { + "$oid": "649b003f1ae706d7b5b165a3" + }, + "id": "nmdc:3ec223535286b9c5d2b32cc1a0356df0", + "name": "gold:Gp0119871.bins.52.fa", + "description": "metabat2 binned contig file for gold:Gp0119871", + "file_size_bytes": 646714, + "url": "https://data.microbiomedata.org/data/1777_96350/img_MAGs/metabat-bins/bins.52.fa", + "type": "nmdc:DataObject", + "data_object_type": "Metagenome Bins" + }, + { + "_id": { + "$oid": "649b003f1ae706d7b5b165a4" + }, + "id": "nmdc:abe1583f6b7bcc4c3ba80daf217353d1", + "name": "gold:Gp0119871.bins.51.fa", + "description": "metabat2 binned contig file for gold:Gp0119871", + "file_size_bytes": 286900, + "url": "https://data.microbiomedata.org/data/1777_96350/img_MAGs/metabat-bins/bins.51.fa", + "type": "nmdc:DataObject", + "data_object_type": "Metagenome Bins" + }, + { + "_id": { + "$oid": "649b003f1ae706d7b5b165a5" + }, + "id": "nmdc:647b1aa6081cdd3afe7079c93e5d817f", + "name": "gold:Gp0119871.bins.12.fa", + "description": "metabat2 binned contig file for gold:Gp0119871", + "file_size_bytes": 316757, + "url": "https://data.microbiomedata.org/data/1777_96350/img_MAGs/metabat-bins/bins.12.fa", + "type": "nmdc:DataObject", + "data_object_type": "Metagenome Bins" + }, + { + "_id": { + "$oid": "649b003f1ae706d7b5b165a6" + }, + "id": "nmdc:ef98f910f3526693df9306591cf4a129", + "name": "gold:Gp0119871.bins.10.fa", + "description": "metabat2 binned contig file for gold:Gp0119871", + "file_size_bytes": 2084485, + "url": "https://data.microbiomedata.org/data/1777_96350/img_MAGs/metabat-bins/bins.10.fa", + "type": "nmdc:DataObject", + "data_object_type": "Metagenome Bins" + }, + { + "_id": { + "$oid": "649b003f1ae706d7b5b165a7" + }, + "id": "nmdc:3f445344d2d7c7a18f32776ec82133f1", + "name": "gold:Gp0119871.bins.13.fa", + "description": "metabat2 binned contig file for gold:Gp0119871", + "file_size_bytes": 2539690, + "url": "https://data.microbiomedata.org/data/1777_96350/img_MAGs/metabat-bins/bins.13.fa", + "type": "nmdc:DataObject", + "data_object_type": "Metagenome Bins" + }, + { + "_id": { + "$oid": "649b003f1ae706d7b5b165a8" + }, + "id": "nmdc:5b907066152968c521576a1086a248a4", + "name": "gold:Gp0119871.bins.1.fa", + "description": "metabat2 binned contig file for gold:Gp0119871", + "file_size_bytes": 564130, + "url": "https://data.microbiomedata.org/data/1777_96350/img_MAGs/metabat-bins/bins.1.fa", + "type": "nmdc:DataObject", + "data_object_type": "Metagenome Bins" + }, + { + "_id": { + "$oid": "649b003f1ae706d7b5b165a9" + }, + "id": "nmdc:c7f330d0cf24c063c4a9b0670b6338f1", + "name": "gold:Gp0119871.bins.16.fa", + "description": "metabat2 binned contig file for gold:Gp0119871", + "file_size_bytes": 301330, + "url": "https://data.microbiomedata.org/data/1777_96350/img_MAGs/metabat-bins/bins.16.fa", + "type": "nmdc:DataObject", + "data_object_type": "Metagenome Bins" + }, + { + "_id": { + "$oid": "649b003f1ae706d7b5b165aa" + }, + "id": "nmdc:d5017395849819c7632ac06bb1198227", + "name": "gold:Gp0119871.bins.14.fa", + "description": "metabat2 binned contig file for gold:Gp0119871", + "file_size_bytes": 1020059, + "url": "https://data.microbiomedata.org/data/1777_96350/img_MAGs/metabat-bins/bins.14.fa", + "type": "nmdc:DataObject", + "data_object_type": "Metagenome Bins" + }, + { + "_id": { + "$oid": "649b003f1ae706d7b5b165ab" + }, + "id": "nmdc:4fc211cfffb451a0764fa79356edf40b", + "name": "gold:Gp0119871.bins.30.fa", + "description": "metabat2 binned contig file for gold:Gp0119871", + "file_size_bytes": 1057259, + "url": "https://data.microbiomedata.org/data/1777_96350/img_MAGs/metabat-bins/bins.30.fa", + "type": "nmdc:DataObject", + "data_object_type": "Metagenome Bins" + }, + { + "_id": { + "$oid": "649b003f1ae706d7b5b165ac" + }, + "id": "nmdc:cc38947cd2efb33c7b3001ca46858535", + "name": "gold:Gp0119871.bins.65.fa", + "description": "metabat2 binned contig file for gold:Gp0119871", + "file_size_bytes": 9351860, + "url": "https://data.microbiomedata.org/data/1777_96350/img_MAGs/metabat-bins/bins.65.fa", + "type": "nmdc:DataObject", + "data_object_type": "Metagenome Bins" + }, + { + "_id": { + "$oid": "649b003f1ae706d7b5b165ad" + }, + "id": "nmdc:fdf6e1f7e73aac2ed2f89c7b7d392ed3", + "name": "gold:Gp0119871.bins.78.fa", + "description": "metabat2 binned contig file for gold:Gp0119871", + "file_size_bytes": 875623, + "url": "https://data.microbiomedata.org/data/1777_96350/img_MAGs/metabat-bins/bins.78.fa", + "type": "nmdc:DataObject", + "data_object_type": "Metagenome Bins" + }, + { + "_id": { + "$oid": "649b003f1ae706d7b5b165ae" + }, + "id": "nmdc:23f5c4f8a09ed8cf6625e6505c845d36", + "name": "gold:Gp0119871.bins.8.fa", + "description": "metabat2 binned contig file for gold:Gp0119871", + "file_size_bytes": 1618307, + "url": "https://data.microbiomedata.org/data/1777_96350/img_MAGs/metabat-bins/bins.8.fa", + "type": "nmdc:DataObject", + "data_object_type": "Metagenome Bins" + }, + { + "_id": { + "$oid": "649b003f1ae706d7b5b165af" + }, + "id": "nmdc:acd3a3a159c405441fb48c575a70f629", + "name": "gold:Gp0119871.bins.74.fa", + "description": "metabat2 binned contig file for gold:Gp0119871", + "file_size_bytes": 1659822, + "url": "https://data.microbiomedata.org/data/1777_96350/img_MAGs/metabat-bins/bins.74.fa", + "type": "nmdc:DataObject", + "data_object_type": "Metagenome Bins" + }, + { + "_id": { + "$oid": "649b003f1ae706d7b5b165b0" + }, + "id": "nmdc:6bef8d0c2858748879f4aff5286b8c4b", + "name": "gold:Gp0119871.bins.33.fa", + "description": "metabat2 binned contig file for gold:Gp0119871", + "file_size_bytes": 496731, + "url": "https://data.microbiomedata.org/data/1777_96350/img_MAGs/metabat-bins/bins.33.fa", + "type": "nmdc:DataObject", + "data_object_type": "Metagenome Bins" + }, + { + "_id": { + "$oid": "649b003f1ae706d7b5b165b1" + }, + "id": "nmdc:16b8ff451ee709344b6ff6071034f816", + "name": "gold:Gp0119871.bins.48.fa", + "description": "metabat2 binned contig file for gold:Gp0119871", + "file_size_bytes": 1356742, + "url": "https://data.microbiomedata.org/data/1777_96350/img_MAGs/metabat-bins/bins.48.fa", + "type": "nmdc:DataObject", + "data_object_type": "Metagenome Bins" + }, + { + "_id": { + "$oid": "649b003f1ae706d7b5b165b2" + }, + "id": "nmdc:f02046f0f4d8e0536b9d6820d3881ae2", + "name": "gold:Gp0119871.bins.60.fa", + "description": "metabat2 binned contig file for gold:Gp0119871", + "file_size_bytes": 219721, + "url": "https://data.microbiomedata.org/data/1777_96350/img_MAGs/metabat-bins/bins.60.fa", + "type": "nmdc:DataObject", + "data_object_type": "Metagenome Bins" + }, + { + "_id": { + "$oid": "649b003f1ae706d7b5b165b3" + }, + "id": "nmdc:561b75718eb4681abc5231d57fea8eaf", + "name": "gold:Gp0119871.bins.17.fa", + "description": "metabat2 binned contig file for gold:Gp0119871", + "file_size_bytes": 1859470, + "url": "https://data.microbiomedata.org/data/1777_96350/img_MAGs/metabat-bins/bins.17.fa", + "type": "nmdc:DataObject", + "data_object_type": "Metagenome Bins" + }, + { + "_id": { + "$oid": "649b003f1ae706d7b5b165b4" + }, + "id": "nmdc:ae16df7198aa6d90ee9534769eada684", + "name": "gold:Gp0119871.bins.36.fa", + "description": "metabat2 binned contig file for gold:Gp0119871", + "file_size_bytes": 477335, + "url": "https://data.microbiomedata.org/data/1777_96350/img_MAGs/metabat-bins/bins.36.fa", + "type": "nmdc:DataObject", + "data_object_type": "Metagenome Bins" + }, + { + "_id": { + "$oid": "649b003f1ae706d7b5b165b5" + }, + "id": "nmdc:47537bb166fda940e9d9d34e0e8c4678", + "name": "gold:Gp0119871.bins.15.fa", + "description": "metabat2 binned contig file for gold:Gp0119871", + "file_size_bytes": 3875056, + "url": "https://data.microbiomedata.org/data/1777_96350/img_MAGs/metabat-bins/bins.15.fa", + "type": "nmdc:DataObject", + "data_object_type": "Metagenome Bins" + }, + { + "_id": { + "$oid": "649b003f1ae706d7b5b165b6" + }, + "id": "nmdc:b6f04e8fc4b7eded27a05d94ba1be671", + "name": "gold:Gp0119871.bins.20.fa", + "description": "metabat2 binned contig file for gold:Gp0119871", + "file_size_bytes": 410754, + "url": "https://data.microbiomedata.org/data/1777_96350/img_MAGs/metabat-bins/bins.20.fa", + "type": "nmdc:DataObject", + "data_object_type": "Metagenome Bins" + }, + { + "_id": { + "$oid": "649b003f1ae706d7b5b165b7" + }, + "id": "nmdc:a094b771aa320d98625fbc38dbff3343", + "name": "gold:Gp0119871.bins.6.fa", + "description": "metabat2 binned contig file for gold:Gp0119871", + "file_size_bytes": 435034, + "url": "https://data.microbiomedata.org/data/1777_96350/img_MAGs/metabat-bins/bins.6.fa", + "type": "nmdc:DataObject", + "data_object_type": "Metagenome Bins" + }, + { + "_id": { + "$oid": "649b003f1ae706d7b5b165c5" + }, + "id": "nmdc:a763efbf1909c13e45e32a304e7e68f0", + "name": "gold:Gp0119871.bins.49.fa", + "description": "metabat2 binned contig file for gold:Gp0119871", + "file_size_bytes": 893079, + "url": "https://data.microbiomedata.org/data/1777_96350/img_MAGs/metabat-bins/bins.49.fa", + "type": "nmdc:DataObject", + "data_object_type": "Metagenome Bins" + }, + { + "_id": { + "$oid": "649b003f1ae706d7b5b16cb8" + }, + "description": "EC TSV File for gold:Gp0119871", + "url": "https://data.microbiomedata.org/1777_96350/img_annotation/Ga0482145_ec.tsv", + "md5_checksum": "0c72d3d72c8f29761c48d2844cbbd858", + "file_size_bytes": 3385, + "id": "nmdc:0c72d3d72c8f29761c48d2844cbbd858", + "name": "gold:Gp0119871_EC TSV File", + "data_object_type": "Annotation Enzyme Commission", + "type": "nmdc:DataObject" + }, + { + "_id": { + "$oid": "649b003f1ae706d7b5b16cba" + }, + "description": "Functional annotation GFF file for gold:Gp0119871", + "url": "https://data.microbiomedata.org/1777_96350/img_annotation/Ga0482145_functional_annotation.gff", + "md5_checksum": "c2b7fc5a3e992bb271559774a779c890", + "file_size_bytes": 3385, + "id": "nmdc:c2b7fc5a3e992bb271559774a779c890", + "name": "gold:Gp0119871_Functional annotation GFF file", + "data_object_type": "Functional Annotation GFF", + "type": "nmdc:DataObject" + }, + { + "_id": { + "$oid": "649b003f1ae706d7b5b16cbb" + }, + "description": "Protein FAA for gold:Gp0119871", + "url": "https://data.microbiomedata.org/1777_96350/img_annotation/Ga0482145_proteins.faa", + "md5_checksum": "8c6e1665a12e9478f7f13b9177b08083", + "file_size_bytes": 3385, + "id": "nmdc:8c6e1665a12e9478f7f13b9177b08083", + "name": "gold:Gp0119871_Protein FAA", + "data_object_type": "Annotation Amino Acid FASTA", + "type": "nmdc:DataObject" + }, + { + "_id": { + "$oid": "649b003f1ae706d7b5b16cbc" + }, + "description": "KO TSV File for gold:Gp0119871", + "url": "https://data.microbiomedata.org/1777_96350/img_annotation/Ga0482145_ko.tsv", + "md5_checksum": "cf3c06ff9bb95e6e76b26bdd29add799", + "file_size_bytes": 3385, + "id": "nmdc:cf3c06ff9bb95e6e76b26bdd29add799", + "name": "gold:Gp0119871_KO TSV File", + "data_object_type": "Annotation KEGG Orthology", + "type": "nmdc:DataObject" + }, + { + "_id": { + "$oid": "649b003f1ae706d7b5b16cbe" + }, + "description": "Structural annotation GFF file for gold:Gp0119871", + "url": "https://data.microbiomedata.org/1777_96350/img_annotation/Ga0482145_structural_annotation.gff", + "md5_checksum": "b2f6e385af8cbe49df3044d523da5c82", + "file_size_bytes": 3385, + "id": "nmdc:b2f6e385af8cbe49df3044d523da5c82", + "name": "gold:Gp0119871_Structural annotation GFF file", + "data_object_type": "Structural Annotation GFF", + "type": "nmdc:DataObject" + } + ], + "mags_activity_set": [ + { + "_id": { + "$oid": "649b0052ec087f6bbab34710" + }, + "has_input": [ + "nmdc:7b09172d1801c8b722b203fcd3414ced", + "nmdc:e8cd5d9afc47b0df91aa06d13f5b5e94", + "nmdc:40a3d52848d223f0dc702c8bc170ac08" + ], + "too_short_contig_num": 298117, + "part_of": [ + "nmdc:mga06akz86" + ], + "binned_contig_num": 7716, + "git_url": "https://github.com/microbiomedata/metaG/releases/tag/0.1", + "has_output": [ + "nmdc:d41d8cd98f00b204e9800998ecf8427e", + "nmdc:8d6cf34db674d451277709fe676d979a", + "nmdc:afdb1f847a73fa075b6e5220467efd66", + "nmdc:58c90ee508e2c58a08675bae9b1613cf", + "nmdc:96655b86195384992cc636c5c2e024b6", + "nmdc:a5e5088a7709dbfbbaf78b4a269e8cd2" + ], + "was_informed_by": "gold:Gp0119871", + "input_contig_num": 351934, + "id": "nmdc:489dd3d27ecf95d802d4ab3490f6e39b", + "execution_resource": "NERSC-Cori", + "name": "MAGs Analysis Activity for nmdc:mga06akz86", + "mags_list": [ + { + "number_of_contig": 118, + "completeness": 92.13, + "bin_name": "bins.1", + "gene_count": 3146, + "bin_quality": "HQ", + "gtdbtk_species": "", + "gtdbtk_order": "Elusimicrobiales", + "num_16s": 1, + "gtdbtk_family": "UBA9959", + "gtdbtk_domain": "Bacteria", + "contamination": 1.12, + "gtdbtk_class": "Elusimicrobia", + "gtdbtk_phylum": "Elusimicrobiota", + "num_5s": 1, + "num_23s": 1, + "gtdbtk_genus": "UBA9959", + "num_t_rna": 41 + }, + { + "number_of_contig": 229, + "completeness": 5.26, + "bin_name": "bins.10", + "gene_count": 1201, + "bin_quality": "LQ", + "gtdbtk_species": "", + "gtdbtk_order": "", + "num_16s": 0, + "gtdbtk_family": "", + "gtdbtk_domain": "", + "contamination": 0.0, + "gtdbtk_class": "", + "gtdbtk_phylum": "", + "num_5s": 0, + "num_23s": 0, + "gtdbtk_genus": "", + "num_t_rna": 6 + }, + { + "number_of_contig": 24, + "completeness": 96.67, + "bin_name": "bins.11", + "gene_count": 2130, + "bin_quality": "HQ", + "gtdbtk_species": "", + "gtdbtk_order": "OPB41", + "num_16s": 1, + "gtdbtk_family": "PALSA-660", + "gtdbtk_domain": "Bacteria", + "contamination": 3.61, + "gtdbtk_class": "Coriobacteriia", + "gtdbtk_phylum": "Actinobacteriota", + "num_5s": 1, + "num_23s": 1, + "gtdbtk_genus": "PALSA-660", + "num_t_rna": 48 + }, + { + "number_of_contig": 389, + "completeness": 91.1, + "bin_name": "bins.12", + "gene_count": 4764, + "bin_quality": "MQ", + "gtdbtk_species": "", + "gtdbtk_order": "Geobacterales", + "num_16s": 0, + "gtdbtk_family": "Geobacteraceae", + "gtdbtk_domain": "Bacteria", + "contamination": 1.18, + "gtdbtk_class": "Desulfuromonadia", + "gtdbtk_phylum": "Desulfuromonadota", + "num_5s": 2, + "num_23s": 0, + "gtdbtk_genus": "Geobacter_A", + "num_t_rna": 47 + }, + { + "number_of_contig": 453, + "completeness": 61.82, + "bin_name": "bins.13", + "gene_count": 2704, + "bin_quality": "MQ", + "gtdbtk_species": "", + "gtdbtk_order": "Bacteroidales", + "num_16s": 0, + "gtdbtk_family": "Lentimicrobiaceae", + "gtdbtk_domain": "Bacteria", + "contamination": 1.61, + "gtdbtk_class": "Bacteroidia", + "gtdbtk_phylum": "Bacteroidota", + "num_5s": 0, + "num_23s": 0, + "gtdbtk_genus": "UBA4417", + "num_t_rna": 25 + }, + { + "number_of_contig": 49, + "completeness": 60.82, + "bin_name": "bins.14", + "gene_count": 4760, + "bin_quality": "LQ", + "gtdbtk_species": "", + "gtdbtk_order": "", + "num_16s": 0, + "gtdbtk_family": "", + "gtdbtk_domain": "", + "contamination": 48.41, + "gtdbtk_class": "", + "gtdbtk_phylum": "", + "num_5s": 1, + "num_23s": 0, + "gtdbtk_genus": "", + "num_t_rna": 41 + }, + { + "number_of_contig": 25, + "completeness": 90.79, + "bin_name": "bins.15", + "gene_count": 3322, + "bin_quality": "HQ", + "gtdbtk_species": "", + "gtdbtk_order": "Bacteroidales", + "num_16s": 1, + "gtdbtk_family": "", + "gtdbtk_domain": "Bacteria", + "contamination": 1.67, + "gtdbtk_class": "Bacteroidia", + "gtdbtk_phylum": "Bacteroidota", + "num_5s": 1, + "num_23s": 2, + "gtdbtk_genus": "", + "num_t_rna": 48 + }, + { + "number_of_contig": 128, + "completeness": 4.17, + "bin_name": "bins.16", + "gene_count": 718, + "bin_quality": "LQ", + "gtdbtk_species": "", + "gtdbtk_order": "", + "num_16s": 1, + "gtdbtk_family": "", + "gtdbtk_domain": "", + "contamination": 0.0, + "gtdbtk_class": "", + "gtdbtk_phylum": "", + "num_5s": 0, + "num_23s": 0, + "gtdbtk_genus": "", + "num_t_rna": 9 + }, + { + "number_of_contig": 781, + "completeness": 92.27, + "bin_name": "bins.17", + "gene_count": 4361, + "bin_quality": "LQ", + "gtdbtk_species": "", + "gtdbtk_order": "", + "num_16s": 0, + "gtdbtk_family": "", + "gtdbtk_domain": "", + "contamination": 68.9, + "gtdbtk_class": "", + "gtdbtk_phylum": "", + "num_5s": 0, + "num_23s": 0, + "gtdbtk_genus": "", + "num_t_rna": 65 + }, + { + "number_of_contig": 85, + "completeness": 42.16, + "bin_name": "bins.18", + "gene_count": 705, + "bin_quality": "LQ", + "gtdbtk_species": "", + "gtdbtk_order": "", + "num_16s": 0, + "gtdbtk_family": "", + "gtdbtk_domain": "", + "contamination": 0.0, + "gtdbtk_class": "", + "gtdbtk_phylum": "", + "num_5s": 1, + "num_23s": 1, + "gtdbtk_genus": "", + "num_t_rna": 16 + }, + { + "number_of_contig": 181, + "completeness": 19.31, + "bin_name": "bins.19", + "gene_count": 1080, + "bin_quality": "LQ", + "gtdbtk_species": "", + "gtdbtk_order": "", + "num_16s": 0, + "gtdbtk_family": "", + "gtdbtk_domain": "", + "contamination": 0.0, + "gtdbtk_class": "", + "gtdbtk_phylum": "", + "num_5s": 1, + "num_23s": 0, + "gtdbtk_genus": "", + "num_t_rna": 10 + }, + { + "number_of_contig": 6, + "completeness": 77.48, + "bin_name": "bins.2", + "gene_count": 808, + "bin_quality": "MQ", + "gtdbtk_species": "", + "gtdbtk_order": "UBA1875", + "num_16s": 1, + "gtdbtk_family": "", + "gtdbtk_domain": "Bacteria", + "contamination": 0.0, + "gtdbtk_class": "CPR2", + "gtdbtk_phylum": "Patescibacteria", + "num_5s": 1, + "num_23s": 1, + "gtdbtk_genus": "", + "num_t_rna": 45 + }, + { + "number_of_contig": 7, + "completeness": 76.88, + "bin_name": "bins.20", + "gene_count": 2239, + "bin_quality": "MQ", + "gtdbtk_species": "", + "gtdbtk_order": "Bacteroidales", + "num_16s": 0, + "gtdbtk_family": "Lentimicrobiaceae", + "gtdbtk_domain": "Bacteria", + "contamination": 1.08, + "gtdbtk_class": "Bacteroidia", + "gtdbtk_phylum": "Bacteroidota", + "num_5s": 0, + "num_23s": 0, + "gtdbtk_genus": "UBA5507", + "num_t_rna": 34 + }, + { + "number_of_contig": 388, + "completeness": 42.29, + "bin_name": "bins.21", + "gene_count": 2029, + "bin_quality": "LQ", + "gtdbtk_species": "", + "gtdbtk_order": "", + "num_16s": 1, + "gtdbtk_family": "", + "gtdbtk_domain": "", + "contamination": 0.16, + "gtdbtk_class": "", + "gtdbtk_phylum": "", + "num_5s": 0, + "num_23s": 0, + "gtdbtk_genus": "", + "num_t_rna": 11 + }, + { + "number_of_contig": 200, + "completeness": 26.72, + "bin_name": "bins.22", + "gene_count": 1309, + "bin_quality": "LQ", + "gtdbtk_species": "", + "gtdbtk_order": "", + "num_16s": 0, + "gtdbtk_family": "", + "gtdbtk_domain": "", + "contamination": 0.0, + "gtdbtk_class": "", + "gtdbtk_phylum": "", + "num_5s": 1, + "num_23s": 0, + "gtdbtk_genus": "", + "num_t_rna": 9 + }, + { + "number_of_contig": 38, + "completeness": 86.52, + "bin_name": "bins.23", + "gene_count": 1357, + "bin_quality": "MQ", + "gtdbtk_species": "", + "gtdbtk_order": "RFN20", + "num_16s": 1, + "gtdbtk_family": "CAG-826", + "gtdbtk_domain": "Bacteria", + "contamination": 0.0, + "gtdbtk_class": "Bacilli", + "gtdbtk_phylum": "Firmicutes", + "num_5s": 1, + "num_23s": 1, + "gtdbtk_genus": "", + "num_t_rna": 48 + }, + { + "number_of_contig": 73, + "completeness": 52.63, + "bin_name": "bins.24", + "gene_count": 4585, + "bin_quality": "LQ", + "gtdbtk_species": "", + "gtdbtk_order": "", + "num_16s": 0, + "gtdbtk_family": "", + "gtdbtk_domain": "", + "contamination": 28.07, + "gtdbtk_class": "", + "gtdbtk_phylum": "", + "num_5s": 4, + "num_23s": 2, + "gtdbtk_genus": "", + "num_t_rna": 38 + }, + { + "number_of_contig": 31, + "completeness": 62.37, + "bin_name": "bins.25", + "gene_count": 2238, + "bin_quality": "MQ", + "gtdbtk_species": "", + "gtdbtk_order": "Bacteroidales", + "num_16s": 1, + "gtdbtk_family": "", + "gtdbtk_domain": "Bacteria", + "contamination": 0.0, + "gtdbtk_class": "Bacteroidia", + "gtdbtk_phylum": "Bacteroidota", + "num_5s": 0, + "num_23s": 0, + "gtdbtk_genus": "", + "num_t_rna": 27 + }, + { + "number_of_contig": 22, + "completeness": 98.71, + "bin_name": "bins.26", + "gene_count": 4077, + "bin_quality": "HQ", + "gtdbtk_species": "", + "gtdbtk_order": "Geobacterales", + "num_16s": 1, + "gtdbtk_family": "Pelobacteraceae", + "gtdbtk_domain": "Bacteria", + "contamination": 0.0, + "gtdbtk_class": "Desulfuromonadia", + "gtdbtk_phylum": "Desulfuromonadota", + "num_5s": 1, + "num_23s": 1, + "gtdbtk_genus": "Pelobacter_C", + "num_t_rna": 47 + }, + { + "number_of_contig": 326, + "completeness": 76.44, + "bin_name": "bins.27", + "gene_count": 3063, + "bin_quality": "MQ", + "gtdbtk_species": "", + "gtdbtk_order": "Methylococcales", + "num_16s": 0, + "gtdbtk_family": "Methylomonadaceae", + "gtdbtk_domain": "Bacteria", + "contamination": 2.69, + "gtdbtk_class": "Gammaproteobacteria", + "gtdbtk_phylum": "Proteobacteria", + "num_5s": 0, + "num_23s": 0, + "gtdbtk_genus": "Methylobacter_A", + "num_t_rna": 31 + }, + { + "number_of_contig": 171, + "completeness": 26.21, + "bin_name": "bins.28", + "gene_count": 865, + "bin_quality": "LQ", + "gtdbtk_species": "", + "gtdbtk_order": "", + "num_16s": 0, + "gtdbtk_family": "", + "gtdbtk_domain": "", + "contamination": 0.0, + "gtdbtk_class": "", + "gtdbtk_phylum": "", + "num_5s": 0, + "num_23s": 0, + "gtdbtk_genus": "", + "num_t_rna": 7 + }, + { + "number_of_contig": 116, + "completeness": 16.67, + "bin_name": "bins.29", + "gene_count": 716, + "bin_quality": "LQ", + "gtdbtk_species": "", + "gtdbtk_order": "", + "num_16s": 0, + "gtdbtk_family": "", + "gtdbtk_domain": "", + "contamination": 0.0, + "gtdbtk_class": "", + "gtdbtk_phylum": "", + "num_5s": 0, + "num_23s": 0, + "gtdbtk_genus": "", + "num_t_rna": 8 + }, + { + "number_of_contig": 198, + "completeness": 21.89, + "bin_name": "bins.3", + "gene_count": 964, + "bin_quality": "LQ", + "gtdbtk_species": "", + "gtdbtk_order": "", + "num_16s": 0, + "gtdbtk_family": "", + "gtdbtk_domain": "", + "contamination": 0.0, + "gtdbtk_class": "", + "gtdbtk_phylum": "", + "num_5s": 0, + "num_23s": 0, + "gtdbtk_genus": "", + "num_t_rna": 8 + }, + { + "number_of_contig": 164, + "completeness": 89.37, + "bin_name": "bins.30", + "gene_count": 3016, + "bin_quality": "MQ", + "gtdbtk_species": "", + "gtdbtk_order": "Holophagales", + "num_16s": 2, + "gtdbtk_family": "Holophagaceae", + "gtdbtk_domain": "Bacteria", + "contamination": 1.32, + "gtdbtk_class": "Holophagae", + "gtdbtk_phylum": "Acidobacteriota", + "num_5s": 1, + "num_23s": 0, + "gtdbtk_genus": "", + "num_t_rna": 42 + }, + { + "number_of_contig": 6, + "completeness": 60.22, + "bin_name": "bins.31", + "gene_count": 2231, + "bin_quality": "MQ", + "gtdbtk_species": "", + "gtdbtk_order": "Bacteroidales", + "num_16s": 1, + "gtdbtk_family": "CG2-30-32-10", + "gtdbtk_domain": "Bacteria", + "contamination": 1.08, + "gtdbtk_class": "Bacteroidia", + "gtdbtk_phylum": "Bacteroidota", + "num_5s": 1, + "num_23s": 1, + "gtdbtk_genus": "", + "num_t_rna": 26 + }, + { + "number_of_contig": 13, + "completeness": 77.82, + "bin_name": "bins.32", + "gene_count": 838, + "bin_quality": "MQ", + "gtdbtk_species": "", + "gtdbtk_order": "SC72", + "num_16s": 0, + "gtdbtk_family": "SC72", + "gtdbtk_domain": "Bacteria", + "contamination": 0.0, + "gtdbtk_class": "Dojkabacteria", + "gtdbtk_phylum": "Patescibacteria", + "num_5s": 1, + "num_23s": 1, + "gtdbtk_genus": "UBA12078", + "num_t_rna": 46 + }, + { + "number_of_contig": 5, + "completeness": 34.48, + "bin_name": "bins.33", + "gene_count": 1423, + "bin_quality": "LQ", + "gtdbtk_species": "", + "gtdbtk_order": "", + "num_16s": 0, + "gtdbtk_family": "", + "gtdbtk_domain": "", + "contamination": 1.72, + "gtdbtk_class": "", + "gtdbtk_phylum": "", + "num_5s": 0, + "num_23s": 0, + "gtdbtk_genus": "", + "num_t_rna": 17 + }, + { + "number_of_contig": 6, + "completeness": 34.48, + "bin_name": "bins.34", + "gene_count": 1337, + "bin_quality": "LQ", + "gtdbtk_species": "", + "gtdbtk_order": "", + "num_16s": 0, + "gtdbtk_family": "", + "gtdbtk_domain": "", + "contamination": 0.0, + "gtdbtk_class": "", + "gtdbtk_phylum": "", + "num_5s": 1, + "num_23s": 0, + "gtdbtk_genus": "", + "num_t_rna": 21 + }, + { + "number_of_contig": 36, + "completeness": 27.59, + "bin_name": "bins.35", + "gene_count": 958, + "bin_quality": "LQ", + "gtdbtk_species": "", + "gtdbtk_order": "", + "num_16s": 0, + "gtdbtk_family": "", + "gtdbtk_domain": "", + "contamination": 0.0, + "gtdbtk_class": "", + "gtdbtk_phylum": "", + "num_5s": 0, + "num_23s": 0, + "gtdbtk_genus": "", + "num_t_rna": 10 + }, + { + "number_of_contig": 73, + "completeness": 73.66, + "bin_name": "bins.36", + "gene_count": 2770, + "bin_quality": "MQ", + "gtdbtk_species": "", + "gtdbtk_order": "Bacteroidales", + "num_16s": 0, + "gtdbtk_family": "Paludibacteraceae", + "gtdbtk_domain": "Bacteria", + "contamination": 1.08, + "gtdbtk_class": "Bacteroidia", + "gtdbtk_phylum": "Bacteroidota", + "num_5s": 1, + "num_23s": 0, + "gtdbtk_genus": "Paludibacter", + "num_t_rna": 36 + }, + { + "number_of_contig": 154, + "completeness": 97.31, + "bin_name": "bins.37", + "gene_count": 4508, + "bin_quality": "MQ", + "gtdbtk_species": "", + "gtdbtk_order": "Bacteroidales", + "num_16s": 0, + "gtdbtk_family": "Lentimicrobiaceae", + "gtdbtk_domain": "Bacteria", + "contamination": 9.41, + "gtdbtk_class": "Bacteroidia", + "gtdbtk_phylum": "Bacteroidota", + "num_5s": 1, + "num_23s": 0, + "gtdbtk_genus": "UBA4417", + "num_t_rna": 42 + }, + { + "number_of_contig": 1079, + "completeness": 83.87, + "bin_name": "bins.38", + "gene_count": 5873, + "bin_quality": "LQ", + "gtdbtk_species": "", + "gtdbtk_order": "", + "num_16s": 0, + "gtdbtk_family": "", + "gtdbtk_domain": "", + "contamination": 26.65, + "gtdbtk_class": "", + "gtdbtk_phylum": "", + "num_5s": 2, + "num_23s": 1, + "gtdbtk_genus": "", + "num_t_rna": 54 + }, + { + "number_of_contig": 250, + "completeness": 86.13, + "bin_name": "bins.39", + "gene_count": 3014, + "bin_quality": "MQ", + "gtdbtk_species": "", + "gtdbtk_order": "Holophagales", + "num_16s": 0, + "gtdbtk_family": "Holophagaceae", + "gtdbtk_domain": "Bacteria", + "contamination": 2.63, + "gtdbtk_class": "Holophagae", + "gtdbtk_phylum": "Acidobacteriota", + "num_5s": 1, + "num_23s": 0, + "gtdbtk_genus": "", + "num_t_rna": 42 + }, + { + "number_of_contig": 76, + "completeness": 6.58, + "bin_name": "bins.4", + "gene_count": 340, + "bin_quality": "LQ", + "gtdbtk_species": "", + "gtdbtk_order": "", + "num_16s": 0, + "gtdbtk_family": "", + "gtdbtk_domain": "", + "contamination": 0.0, + "gtdbtk_class": "", + "gtdbtk_phylum": "", + "num_5s": 1, + "num_23s": 1, + "gtdbtk_genus": "", + "num_t_rna": 1 + }, + { + "number_of_contig": 246, + "completeness": 41.44, + "bin_name": "bins.40", + "gene_count": 1327, + "bin_quality": "LQ", + "gtdbtk_species": "", + "gtdbtk_order": "", + "num_16s": 1, + "gtdbtk_family": "", + "gtdbtk_domain": "", + "contamination": 0.48, + "gtdbtk_class": "", + "gtdbtk_phylum": "", + "num_5s": 0, + "num_23s": 0, + "gtdbtk_genus": "", + "num_t_rna": 13 + }, + { + "number_of_contig": 149, + "completeness": 84.69, + "bin_name": "bins.5", + "gene_count": 2435, + "bin_quality": "MQ", + "gtdbtk_species": "", + "gtdbtk_order": "SG8-4", + "num_16s": 1, + "gtdbtk_family": "SG8-4", + "gtdbtk_domain": "Bacteria", + "contamination": 7.95, + "gtdbtk_class": "Phycisphaerae", + "gtdbtk_phylum": "Planctomycetota", + "num_5s": 1, + "num_23s": 1, + "gtdbtk_genus": "GWF2-50-10", + "num_t_rna": 42 + }, + { + "number_of_contig": 5, + "completeness": 52.99, + "bin_name": "bins.6", + "gene_count": 1199, + "bin_quality": "MQ", + "gtdbtk_species": "", + "gtdbtk_order": "Burkholderiales", + "num_16s": 0, + "gtdbtk_family": "Methylophilaceae", + "gtdbtk_domain": "Bacteria", + "contamination": 0.0, + "gtdbtk_class": "Gammaproteobacteria", + "gtdbtk_phylum": "Proteobacteria", + "num_5s": 1, + "num_23s": 0, + "gtdbtk_genus": "Methylotenera", + "num_t_rna": 21 + }, + { + "number_of_contig": 1, + "completeness": 17.24, + "bin_name": "bins.7", + "gene_count": 811, + "bin_quality": "LQ", + "gtdbtk_species": "", + "gtdbtk_order": "", + "num_16s": 0, + "gtdbtk_family": "", + "gtdbtk_domain": "", + "contamination": 0.0, + "gtdbtk_class": "", + "gtdbtk_phylum": "", + "num_5s": 0, + "num_23s": 0, + "gtdbtk_genus": "", + "num_t_rna": 7 + }, + { + "number_of_contig": 145, + "completeness": 41.38, + "bin_name": "bins.8", + "gene_count": 1154, + "bin_quality": "LQ", + "gtdbtk_species": "", + "gtdbtk_order": "", + "num_16s": 0, + "gtdbtk_family": "", + "gtdbtk_domain": "", + "contamination": 0.0, + "gtdbtk_class": "", + "gtdbtk_phylum": "", + "num_5s": 0, + "num_23s": 0, + "gtdbtk_genus": "", + "num_t_rna": 27 + }, + { + "number_of_contig": 1270, + "completeness": 97.7, + "bin_name": "bins.9", + "gene_count": 13896, + "bin_quality": "LQ", + "gtdbtk_species": "", + "gtdbtk_order": "", + "num_16s": 1, + "gtdbtk_family": "", + "gtdbtk_domain": "", + "contamination": 172.7, + "gtdbtk_class": "", + "gtdbtk_phylum": "", + "num_5s": 1, + "num_23s": 1, + "gtdbtk_genus": "", + "num_t_rna": 143 + } + ], + "unbinned_contig_num": 46101, + "started_at_time": "2021-10-11T02:23:26Z", + "type": "nmdc:MAGsAnalysisActivity", + "ended_at_time": "2021-10-11T05:56:20+00:00" + } + ], + "metagenome_annotation_activity_set": [ + { + "_id": { + "$oid": "649b005bbf2caae0415ef9a3" + }, + "has_input": [ + "nmdc:7b09172d1801c8b722b203fcd3414ced" + ], + "part_of": [ + "nmdc:mga06akz86" + ], + "git_url": "https://github.com/microbiomedata/metaG/releases/tag/0.1", + "has_output": [ + "nmdc:7d1f1662c10394d7890010ecd658fb8d", + "nmdc:f50096a301c544b750982bd4624a22c2", + "nmdc:40a3d52848d223f0dc702c8bc170ac08", + "nmdc:7c006a93f93ea27a51807507d6892ad2", + "nmdc:96494f25686c97972f89365f97e0789e", + "nmdc:43580863f0544bc993d135eb45336133", + "nmdc:5b7a6250e2f0ac56c3ffe2388759231d", + "nmdc:fe75d12ff308b517d79e93efd3957715", + "nmdc:f79ec9083c0c9ae63a3e0753645e3189", + "nmdc:9006b9acf9eab67dd33e9cdcb86a5140", + "nmdc:7551c18b7ffe271f9c6304e6c6693876", + "nmdc:f39ebbf448dec2cf26a30206f420c328" + ], + "was_informed_by": "gold:Gp0119871", + "id": "nmdc:489dd3d27ecf95d802d4ab3490f6e39b", + "execution_resource": "NERSC-Cori", + "name": "Annotation Activity for nmdc:mga06akz86", + "started_at_time": "2021-10-11T02:23:26Z", + "type": "nmdc:MetagenomeAnnotationActivity", + "ended_at_time": "2021-10-11T05:56:20+00:00" + } + ], + "metagenome_assembly_set": [ + { + "_id": { + "$oid": "649b005f2ca5ee4adb139f92" + }, + "has_input": [ + "nmdc:4cac70dbe447572e04bfa9e362e1aafc" + ], + "part_of": [ + "nmdc:mga06akz86" + ], + "ctg_logsum": 2519411, + "scaf_logsum": 2529511, + "gap_pct": 0.0035, + "git_url": "https://github.com/microbiomedata/metaG/releases/tag/0.1", + "has_output": [ + "nmdc:7b09172d1801c8b722b203fcd3414ced", + "nmdc:0e9490e2f0fdf7ad952058f7a74cf532", + "nmdc:d33b2f9915c7de9f4d38675b299f57b5", + "nmdc:5f1f751638ef6b988e338c43388ef155", + "nmdc:e8cd5d9afc47b0df91aa06d13f5b5e94" + ], + "asm_score": 23.841, + "was_informed_by": "gold:Gp0119871", + "ctg_powsum": 388990, + "scaf_max": 2060560, + "id": "nmdc:489dd3d27ecf95d802d4ab3490f6e39b", + "scaf_powsum": 394116, + "execution_resource": "NERSC-Cori", + "contigs": 351950, + "name": "Assembly Activity for nmdc:mga06akz86", + "ctg_max": 997819, + "gc_std": 0.11589, + "contig_bp": 321826495, + "gc_avg": 0.48925, + "started_at_time": "2021-10-11T02:23:26Z", + "scaf_bp": 321837775, + "type": "nmdc:MetagenomeAssembly", + "scaffolds": 351200, + "ended_at_time": "2021-10-11T05:56:20+00:00", + "ctg_l50": 1460, + "ctg_l90": 340, + "ctg_n50": 31002, + "ctg_n90": 245468, + "scaf_l50": 1469, + "scaf_l90": 340, + "scaf_n50": 30762, + "scaf_n90": 244906, + "scaf_l_gt50k": 47031183, + "scaf_n_gt50k": 328, + "scaf_pct_gt50k": 14.61332 + } + ], + "omics_processing_set": [ + { + "_id": { + "$oid": "649b009773e8249959349b75" + }, + "id": "nmdc:omprc-11-nwkq6w46", + "name": "Enriched soil microbial communities from Old Woman Creek wetland in Ohio, USA - Methanotroph_Enrichment_5", + "description": "Microbial controls on biogeochemical cycling in deep subsurface shale carbon reservoirs", + "has_input": [ + "nmdc:bsm-11-q278p759" + ], + "has_output": [ + "jgi:56a6899d0d878559e286ce67" + ], + "part_of": [ + "nmdc:sty-11-8fb6t785" + ], + "add_date": "2015-08-15", + "mod_date": "2021-06-15", + "ncbi_project_name": "Deep subsurface shale carbon reservoir microbial communities from Ohio, USA - Methanotroph_Enrichment_5", + "omics_type": { + "has_raw_value": "Metagenome" + }, + "principal_investigator": { + "has_raw_value": "Kelly Wrighton" + }, + "processing_institution": "JGI", + "type": "nmdc:OmicsProcessing", + "gold_sequencing_project_identifiers": [ + "gold:Gp0119871" + ] + } + ], + "read_qc_analysis_activity_set": [ + { + "_id": { + "$oid": "649b009d6bdd4fd20273c861" + }, + "has_input": [ + "nmdc:ce107eca7576661e3995b8dd4ca5e3e5" + ], + "part_of": [ + "nmdc:mga06akz86" + ], + "git_url": "https://github.com/microbiomedata/metaG/releases/tag/0.1", + "has_output": [ + "nmdc:4cac70dbe447572e04bfa9e362e1aafc", + "nmdc:f2cbc77fed4639eb21d7532dd62c2a25" + ], + "was_informed_by": "gold:Gp0119871", + "input_read_count": 67566316, + "output_read_bases": 10037258979, + "id": "nmdc:489dd3d27ecf95d802d4ab3490f6e39b", + "execution_resource": "NERSC-Cori", + "input_read_bases": 10202513716, + "name": "Read QC Activity for nmdc:mga06akz86", + "output_read_count": 66970044, + "started_at_time": "2021-10-11T02:23:26Z", + "type": "nmdc:ReadQCAnalysisActivity", + "ended_at_time": "2021-10-11T05:56:20+00:00" + } + ], + "read_based_taxonomy_analysis_activity_set": [ + { + "_id": { + "$oid": "649b009bff710ae353f8cf1f" + }, + "has_input": [ + "nmdc:4cac70dbe447572e04bfa9e362e1aafc" + ], + "git_url": "https://github.com/microbiomedata/metaG/releases/tag/0.1", + "has_output": [ + "nmdc:bc090921f030024644cde2fd69d5b12e", + "nmdc:08578df055323144d8daecd706a3e4df", + "nmdc:ac553a799f0d1ad34a68d669c58ff896", + "nmdc:30bc8aeebcd6101139d76549dec969b9", + "nmdc:acb3bf94a6cba7733f65ff8417d49c56", + "nmdc:79d40f492c3c532861bbb2ca4e7d2b62", + "nmdc:fc33db3ee4bb68c5cc71297346123d00", + "nmdc:e052993cd2c7362e656961567594c509", + "nmdc:9c6c5ba4a06ca36ec6b983c94de9635f" + ], + "was_informed_by": "gold:Gp0119871", + "id": "nmdc:489dd3d27ecf95d802d4ab3490f6e39b", + "execution_resource": "NERSC-Cori", + "name": "ReadBased Analysis Activity for nmdc:mga06akz86", + "started_at_time": "2021-10-11T02:23:26Z", + "type": "nmdc:ReadbasedAnalysis", + "ended_at_time": "2021-10-11T05:56:20+00:00" + } + ] + }, + { + "data_object_set": [ + { + "description": "Raw sequencer read data", + "file_size_bytes": 7280923625, + "type": "nmdc:DataObject", + "id": "jgi:560df3700d878540fd6fe1c9", + "name": "9567.6.137555.TAAGGCG-TATCCTC.fastq.gz" + }, + { + "name": "Gp0119863_Filtered Reads", + "description": "Filtered Reads for Gp0119863", + "data_object_type": "Filtered Sequencing Reads", + "url": "https://data.microbiomedata.org/data/nmdc:mga06mqp49/qa/nmdc_mga06mqp49_filtered.fastq.gz", + "md5_checksum": "661b4638f9d026b74763805523e596a1", + "id": "nmdc:661b4638f9d026b74763805523e596a1", + "file_size_bytes": 4118218886 + }, + { + "name": "Gp0119863_Filtered Stats", + "description": "Filtered Stats for Gp0119863", + "data_object_type": "QC Statistics", + "url": "https://data.microbiomedata.org/data/nmdc:mga06mqp49/qa/nmdc_mga06mqp49_filterStats.txt", + "md5_checksum": "10f926a2604c4c807ff297a697b072aa", + "id": "nmdc:10f926a2604c4c807ff297a697b072aa", + "file_size_bytes": 287 + }, + { + "name": "Gp0119863_Gottcha2 TSV report", + "description": "Gottcha2 TSV report for Gp0119863", + "url": "https://data.microbiomedata.org/data/nmdc:mga06mqp49/ReadbasedAnalysis/nmdc_mga06mqp49_gottcha2_report.tsv", + "md5_checksum": "c4f085e9c4ec8f14aebbdc61091d3b2e", + "id": "nmdc:c4f085e9c4ec8f14aebbdc61091d3b2e", + "file_size_bytes": 6113 + }, + { + "name": "Gp0119863_Gottcha2 full TSV report", + "description": "Gottcha2 full TSV report for Gp0119863", + "url": "https://data.microbiomedata.org/data/nmdc:mga06mqp49/ReadbasedAnalysis/nmdc_mga06mqp49_gottcha2_report_full.tsv", + "md5_checksum": "992a2d95b42a38d1e6420de4a5a0e67b", + "id": "nmdc:992a2d95b42a38d1e6420de4a5a0e67b", + "file_size_bytes": 460407 + }, + { + "name": "Gp0119863_Gottcha2 Krona HTML report", + "description": "Gottcha2 Krona HTML report for Gp0119863", + "data_object_type": "GOTTCHA2 Krona Plot", + "url": "https://data.microbiomedata.org/data/nmdc:mga06mqp49/ReadbasedAnalysis/nmdc_mga06mqp49_gottcha2_krona.html", + "md5_checksum": "c6c82d1240f902c0b5b06d9ae902de78", + "id": "nmdc:c6c82d1240f902c0b5b06d9ae902de78", + "file_size_bytes": 245310 + }, + { + "name": "Gp0119863_Centrifuge classification TSV report", + "description": "Centrifuge classification TSV report for Gp0119863", + "data_object_type": "Centrifuge Taxonomic Classification", + "url": "https://data.microbiomedata.org/data/nmdc:mga06mqp49/ReadbasedAnalysis/nmdc_mga06mqp49_centrifuge_classification.tsv", + "md5_checksum": "c672b747d8d95f5f94240c01f192a8cb", + "id": "nmdc:c672b747d8d95f5f94240c01f192a8cb", + "file_size_bytes": 4857108739 + }, + { + "name": "Gp0119863_Centrifuge TSV report", + "description": "Centrifuge TSV report for Gp0119863", + "data_object_type": "Centrifuge Classification Report", + "url": "https://data.microbiomedata.org/data/nmdc:mga06mqp49/ReadbasedAnalysis/nmdc_mga06mqp49_centrifuge_report.tsv", + "md5_checksum": "ce754df464750917b2dd7a66cad45ff5", + "id": "nmdc:ce754df464750917b2dd7a66cad45ff5", + "file_size_bytes": 226531 + }, + { + "name": "Gp0119863_Centrifuge Krona HTML report", + "description": "Centrifuge Krona HTML report for Gp0119863", + "data_object_type": "Centrifuge Krona Plot", + "url": "https://data.microbiomedata.org/data/nmdc:mga06mqp49/ReadbasedAnalysis/nmdc_mga06mqp49_centrifuge_krona.html", + "md5_checksum": "92ee5dbe5627b52f2da06f5154f3ef9e", + "id": "nmdc:92ee5dbe5627b52f2da06f5154f3ef9e", + "file_size_bytes": 2187337 + }, + { + "name": "Gp0119863_Kraken classification TSV report", + "description": "Kraken classification TSV report for Gp0119863", + "data_object_type": "Kraken2 Taxonomic Classification", + "url": "https://data.microbiomedata.org/data/nmdc:mga06mqp49/ReadbasedAnalysis/nmdc_mga06mqp49_kraken2_classification.tsv", + "md5_checksum": "60fa56d06006ee5b8d292f79c04a453e", + "id": "nmdc:60fa56d06006ee5b8d292f79c04a453e", + "file_size_bytes": 4220908060 + }, + { + "name": "Gp0119863_Kraken2 TSV report", + "description": "Kraken2 TSV report for Gp0119863", + "data_object_type": "Kraken2 Classification Report", + "url": "https://data.microbiomedata.org/data/nmdc:mga06mqp49/ReadbasedAnalysis/nmdc_mga06mqp49_kraken2_report.tsv", + "md5_checksum": "913fd34a744dba7f2d8e25ab646250a5", + "id": "nmdc:913fd34a744dba7f2d8e25ab646250a5", + "file_size_bytes": 515319 + }, + { + "name": "Gp0119863_Kraken2 Krona HTML report", + "description": "Kraken2 Krona HTML report for Gp0119863", + "data_object_type": "Kraken2 Krona Plot", + "url": "https://data.microbiomedata.org/data/nmdc:mga06mqp49/ReadbasedAnalysis/nmdc_mga06mqp49_kraken2_krona.html", + "md5_checksum": "3ff6bdc3c50e72dc9736f8a3bafa7dfc", + "id": "nmdc:3ff6bdc3c50e72dc9736f8a3bafa7dfc", + "file_size_bytes": 3317461 + }, + { + "name": "Gp0119863_Assembled contigs fasta", + "description": "Assembled contigs fasta for Gp0119863", + "data_object_type": "Assembly Contigs", + "url": "https://data.microbiomedata.org/data/nmdc:mga06mqp49/assembly/nmdc_mga06mqp49_contigs.fna", + "md5_checksum": "b01bd02c68c06c38e5f5d5d8ecaefd2e", + "id": "nmdc:b01bd02c68c06c38e5f5d5d8ecaefd2e", + "file_size_bytes": 6614130 + }, + { + "name": "Gp0119863_Assembled scaffold fasta", + "description": "Assembled scaffold fasta for Gp0119863", + "data_object_type": "Assembly Scaffolds", + "url": "https://data.microbiomedata.org/data/nmdc:mga06mqp49/assembly/nmdc_mga06mqp49_scaffolds.fna", + "md5_checksum": "e80be7a0f37f668d57e6d32d891c48de", + "id": "nmdc:e80be7a0f37f668d57e6d32d891c48de", + "file_size_bytes": 6606100 + }, + { + "name": "Gp0119863_Metagenome Contig Coverage Stats", + "description": "Metagenome Contig Coverage Stats for Gp0119863", + "url": "https://data.microbiomedata.org/data/nmdc:mga06mqp49/assembly/nmdc_mga06mqp49_covstats.txt", + "md5_checksum": "12a18a8e9f401890808c6a009ee32b23", + "id": "nmdc:12a18a8e9f401890808c6a009ee32b23", + "file_size_bytes": 273779 + }, + { + "name": "Gp0119863_Assembled AGP file", + "description": "Assembled AGP file for Gp0119863", + "data_object_type": "Assembly AGP", + "url": "https://data.microbiomedata.org/data/nmdc:mga06mqp49/assembly/nmdc_mga06mqp49_assembly.agp", + "md5_checksum": "b96ac952849316d4d00ebba64927b710", + "id": "nmdc:b96ac952849316d4d00ebba64927b710", + "file_size_bytes": 241727 + }, + { + "name": "Gp0119863_Metagenome Alignment BAM file", + "description": "Metagenome Alignment BAM file for Gp0119863", + "data_object_type": "Assembly Coverage BAM", + "url": "https://data.microbiomedata.org/data/nmdc:mga06mqp49/assembly/nmdc_mga06mqp49_pairedMapped_sorted.bam", + "md5_checksum": "dca9e5b9a5043d60877f5e61c8081ba5", + "id": "nmdc:dca9e5b9a5043d60877f5e61c8081ba5", + "file_size_bytes": 5137329204 + }, + { + "name": "Gp0119863_Protein FAA", + "description": "Protein FAA for Gp0119863", + "data_object_type": "Annotation Amino Acid FASTA", + "url": "https://data.microbiomedata.org/data/nmdc:mga06mqp49/annotation/nmdc_mga06mqp49_proteins.faa", + "md5_checksum": "930e8a942b44b72f98148d7bf32e9b9c", + "id": "nmdc:930e8a942b44b72f98148d7bf32e9b9c", + "file_size_bytes": 2920226 + }, + { + "name": "Gp0119863_Structural annotation GFF file", + "description": "Structural annotation GFF file for Gp0119863", + "data_object_type": "Structural Annotation GFF", + "url": "https://data.microbiomedata.org/data/nmdc:mga06mqp49/annotation/nmdc_mga06mqp49_structural_annotation.gff", + "md5_checksum": "323b5d4d47c5d0199fe30399c72c49ec", + "id": "nmdc:323b5d4d47c5d0199fe30399c72c49ec", + "file_size_bytes": 2484 + }, + { + "name": "Gp0119863_Functional annotation GFF file", + "description": "Functional annotation GFF file for Gp0119863", + "data_object_type": "Functional Annotation GFF", + "url": "https://data.microbiomedata.org/data/nmdc:mga06mqp49/annotation/nmdc_mga06mqp49_functional_annotation.gff", + "md5_checksum": "6c09233592f03b658e024baecbcb3e90", + "id": "nmdc:6c09233592f03b658e024baecbcb3e90", + "file_size_bytes": 2531084 + }, + { + "name": "Gp0119863_KO TSV file", + "description": "KO TSV file for Gp0119863", + "data_object_type": "Annotation KEGG Orthology", + "url": "https://data.microbiomedata.org/data/nmdc:mga06mqp49/annotation/nmdc_mga06mqp49_ko.tsv", + "md5_checksum": "02a20d5a8ed38c0898c116049764a07f", + "id": "nmdc:02a20d5a8ed38c0898c116049764a07f", + "file_size_bytes": 344548 + }, + { + "name": "Gp0119863_EC TSV file", + "description": "EC TSV file for Gp0119863", + "data_object_type": "Annotation Enzyme Commission", + "url": "https://data.microbiomedata.org/data/nmdc:mga06mqp49/annotation/nmdc_mga06mqp49_ec.tsv", + "md5_checksum": "cc439741bb2b49672efe8e4611bafdc3", + "id": "nmdc:cc439741bb2b49672efe8e4611bafdc3", + "file_size_bytes": 203287 + }, + { + "name": "Gp0119863_COG GFF file", + "description": "COG GFF file for Gp0119863", + "url": "https://data.microbiomedata.org/data/nmdc:mga06mqp49/annotation/nmdc_mga06mqp49_cog.gff", + "md5_checksum": "42122e3d78742a892b1279ad526f03f4", + "id": "nmdc:42122e3d78742a892b1279ad526f03f4", + "file_size_bytes": 1688691 + }, + { + "name": "Gp0119863_PFAM GFF file", + "description": "PFAM GFF file for Gp0119863", + "url": "https://data.microbiomedata.org/data/nmdc:mga06mqp49/annotation/nmdc_mga06mqp49_pfam.gff", + "md5_checksum": "8cda311b730ba9832ac6150cb6a96573", + "id": "nmdc:8cda311b730ba9832ac6150cb6a96573", + "file_size_bytes": 1661104 + }, + { + "name": "Gp0119863_TigrFam GFF file", + "description": "TigrFam GFF file for Gp0119863", + "url": "https://data.microbiomedata.org/data/nmdc:mga06mqp49/annotation/nmdc_mga06mqp49_tigrfam.gff", + "md5_checksum": "9c6632d4efe86fb4666c55d7974adae4", + "id": "nmdc:9c6632d4efe86fb4666c55d7974adae4", + "file_size_bytes": 373704 + }, + { + "name": "Gp0119863_SMART GFF file", + "description": "SMART GFF file for Gp0119863", + "url": "https://data.microbiomedata.org/data/nmdc:mga06mqp49/annotation/nmdc_mga06mqp49_smart.gff", + "md5_checksum": "31d685c60fc28422b46f1531993b02b0", + "id": "nmdc:31d685c60fc28422b46f1531993b02b0", + "file_size_bytes": 542872 + }, + { + "name": "Gp0119863_SuperFam GFF file", + "description": "SuperFam GFF file for Gp0119863", + "url": "https://data.microbiomedata.org/data/nmdc:mga06mqp49/annotation/nmdc_mga06mqp49_supfam.gff", + "md5_checksum": "8a969c2ccdece0f1eefbf881666bd66c", + "id": "nmdc:8a969c2ccdece0f1eefbf881666bd66c", + "file_size_bytes": 2445599 + }, + { + "name": "Gp0119863_Cath FunFam GFF file", + "description": "Cath FunFam GFF file for Gp0119863", + "url": "https://data.microbiomedata.org/data/nmdc:mga06mqp49/annotation/nmdc_mga06mqp49_cath_funfam.gff", + "md5_checksum": "065b0358e14f3bf1e789c01a6e1c3a0f", + "id": "nmdc:065b0358e14f3bf1e789c01a6e1c3a0f", + "file_size_bytes": 2291864 + }, + { + "name": "Gp0119863_KO_EC GFF file", + "description": "KO_EC GFF file for Gp0119863", + "url": "https://data.microbiomedata.org/data/nmdc:mga06mqp49/annotation/nmdc_mga06mqp49_ko_ec.gff", + "md5_checksum": "5a5dab66b1fc530fd1a2d18f29bfc9a5", + "id": "nmdc:5a5dab66b1fc530fd1a2d18f29bfc9a5", + "file_size_bytes": 1257889 + }, + { + "name": "Gp0119863_metabat2 bin checkm quality assessment result", + "description": "metabat2 bin checkm quality assessment result for Gp0119863", + "data_object_type": "CheckM Statistics", + "url": "https://data.microbiomedata.org/data/nmdc:mga06mqp49/MAGs/nmdc_mga06mqp49_checkm_qa.out", + "md5_checksum": "8f04581f652d19124ab08e39d3317d10", + "id": "nmdc:8f04581f652d19124ab08e39d3317d10", + "file_size_bytes": 775 + }, + { + "name": "Gp0119863_high-quality and medium-quality bins", + "description": "high-quality and medium-quality bins for Gp0119863", + "data_object_type": "Metagenome Bins", + "url": "https://data.microbiomedata.org/data/nmdc:mga06mqp49/MAGs/nmdc_mga06mqp49_hqmq_bin.zip", + "md5_checksum": "e46f63dff0e8d71ef63f7582e8213399", + "id": "nmdc:e46f63dff0e8d71ef63f7582e8213399", + "file_size_bytes": 666185 + }, + { + "_id": { + "$oid": "649b003d1ae706d7b5b14e17" + }, + "description": "Assembled contigs fasta for gold:Gp0119863", + "url": "https://data.microbiomedata.org/data/1777_95832/assembly/assembly_contigs.fna", + "file_size_bytes": 6597435, + "type": "nmdc:DataObject", + "id": "nmdc:8565741217a0632488535b5b20afa036", + "name": "assembly_contigs.fna", + "data_object_type": "Assembly Contigs" + }, + { + "_id": { + "$oid": "649b003d1ae706d7b5b14e18" + }, + "description": "Metagenome Contig Coverage Stats for gold:Gp0119863", + "url": "https://data.microbiomedata.org/data/1777_95832/assembly/mapping_stats.txt", + "file_size_bytes": 257084, + "type": "nmdc:DataObject", + "id": "nmdc:7a7a68bfb6f467a61a325ad2b75818df", + "name": "mapping_stats.txt", + "data_object_type": "Assembly Coverage Stats" + }, + { + "_id": { + "$oid": "649b003d1ae706d7b5b14e19" + }, + "description": "Assembled scaffold fasta for gold:Gp0119863", + "url": "https://data.microbiomedata.org/data/1777_95832/assembly/assembly_scaffolds.fna", + "file_size_bytes": 6589720, + "type": "nmdc:DataObject", + "id": "nmdc:f81059d88da363f1fbcf918f5cd9f4d5", + "name": "assembly_scaffolds.fna", + "data_object_type": "Assembly Scaffolds" + }, + { + "_id": { + "$oid": "649b003d1ae706d7b5b14e1a" + }, + "description": "Assembled AGP file for gold:Gp0119863", + "url": "https://data.microbiomedata.org/data/1777_95832/assembly/assembly.agp", + "file_size_bytes": 207707, + "type": "nmdc:DataObject", + "id": "nmdc:878cecf1677ff39cc25d99c258bf39cd", + "name": "assembly.agp", + "data_object_type": "Assembly AGP" + }, + { + "_id": { + "$oid": "649b003d1ae706d7b5b14e1b" + }, + "description": "Metagenome Alignment BAM file for gold:Gp0119863", + "url": "https://data.microbiomedata.org/data/1777_95832/assembly/pairedMapped_sorted.bam", + "file_size_bytes": 5103272865, + "type": "nmdc:DataObject", + "id": "nmdc:49c50b254b80812add64ec988c665ea4", + "name": "pairedMapped_sorted.bam", + "data_object_type": "Assembly Coverage BAM" + }, + { + "_id": { + "$oid": "649b003d1ae706d7b5b15a90" + }, + "id": "nmdc:01caa07feb598384edd6488cd7bd0a05", + "name": "1777_95832.krona.html", + "description": "Gold:Gp0119863 KRONA plot HTML file", + "url": "https://data.microbiomedata.org/1777_95832/ReadbasedAnalysis/centrifuge/1777_95832.krona.html", + "file_size_bytes": 3385, + "data_object_type": "Centrifuge Krona Plot", + "type": "nmdc:DataObject" + }, + { + "_id": { + "$oid": "649b003d1ae706d7b5b15a98" + }, + "id": "nmdc:7561e11af44ff9612d6599cb3a146b08", + "name": "1777_95832.json", + "description": "Gold:Gp0119863 ReadbasedAnalysis result JSON file", + "url": "https://data.microbiomedata.org/1777_95832/ReadbasedAnalysis/1777_95832.json", + "file_size_bytes": 3385, + "type": "nmdc:DataObject" + }, + { + "_id": { + "$oid": "649b003f1ae706d7b5b1638c" + }, + "id": "nmdc:90aacaa89de9f4b09074ba63b5a500e9", + "name": "bins.unbinned.fa", + "description": "unbinned fasta file from metabat2 for gold:Gp0119863", + "file_size_bytes": 5274476, + "url": "https://data.microbiomedata.org/data/1777_95832/img_MAGs/bins.unbinned.fa", + "type": "nmdc:DataObject" + }, + { + "_id": { + "$oid": "649b003f1ae706d7b5b1638f" + }, + "id": "nmdc:cbe239b54d22d7a0ae813ce5770218f9", + "name": "checkm_qa.out", + "description": "metabat2 bin checkm quality assessment result for gold:Gp0119863", + "file_size_bytes": 775, + "url": "https://data.microbiomedata.org/data/1777_95832/img_MAGs/checkm_qa.out", + "type": "nmdc:DataObject", + "data_object_type": "CheckM Statistics" + }, + { + "_id": { + "$oid": "649b003f1ae706d7b5b16391" + }, + "id": "nmdc:60f8dddb135584d877edfe2956868b8d", + "name": "gold:Gp0119863.bins.1.fa", + "description": "metabat2 binned contig file for gold:Gp0119863", + "file_size_bytes": 211815, + "url": "https://data.microbiomedata.org/data/1777_95832/img_MAGs/metabat-bins/bins.1.fa", + "type": "nmdc:DataObject", + "data_object_type": "Metagenome Bins" + }, + { + "_id": { + "$oid": "649b003f1ae706d7b5b16393" + }, + "id": "nmdc:329e257ae45b4c9494716a0e2274a143", + "name": "bins.tooShort.fa", + "description": "tooShort (< 3kb) filtered contigs fasta file by metaBat2 for gold:Gp0119863", + "file_size_bytes": 1085627, + "url": "https://data.microbiomedata.org/data/1777_95832/img_MAGs/bins.tooShort.fa", + "type": "nmdc:DataObject" + }, + { + "_id": { + "$oid": "649b003f1ae706d7b5b16c91" + }, + "description": "EC TSV File for gold:Gp0119863", + "url": "https://data.microbiomedata.org/1777_95832/img_annotation/Ga0482152_ec.tsv", + "md5_checksum": "091275dd5ad1358b576671f22c2f8157", + "file_size_bytes": 3385, + "id": "nmdc:091275dd5ad1358b576671f22c2f8157", + "name": "gold:Gp0119863_EC TSV File", + "data_object_type": "Annotation Enzyme Commission", + "type": "nmdc:DataObject" + }, + { + "_id": { + "$oid": "649b003f1ae706d7b5b16c92" + }, + "description": "KO TSV File for gold:Gp0119863", + "url": "https://data.microbiomedata.org/1777_95832/img_annotation/Ga0482152_ko.tsv", + "md5_checksum": "2318ee29172751d9f66edc54c7a456fa", + "file_size_bytes": 3385, + "id": "nmdc:2318ee29172751d9f66edc54c7a456fa", + "name": "gold:Gp0119863_KO TSV File", + "data_object_type": "Annotation KEGG Orthology", + "type": "nmdc:DataObject" + }, + { + "_id": { + "$oid": "649b003f1ae706d7b5b16c93" + }, + "description": "Functional annotation GFF file for gold:Gp0119863", + "url": "https://data.microbiomedata.org/1777_95832/img_annotation/Ga0482152_functional_annotation.gff", + "md5_checksum": "f5a9c910bcb6cbec92291db526caa1db", + "file_size_bytes": 3385, + "id": "nmdc:f5a9c910bcb6cbec92291db526caa1db", + "name": "gold:Gp0119863_Functional annotation GFF file", + "data_object_type": "Functional Annotation GFF", + "type": "nmdc:DataObject" + }, + { + "_id": { + "$oid": "649b003f1ae706d7b5b16c94" + }, + "description": "Protein FAA for gold:Gp0119863", + "url": "https://data.microbiomedata.org/1777_95832/img_annotation/Ga0482152_proteins.faa", + "md5_checksum": "8be139ccce758a8845092122546ff2b3", + "file_size_bytes": 3385, + "id": "nmdc:8be139ccce758a8845092122546ff2b3", + "name": "gold:Gp0119863_Protein FAA", + "data_object_type": "Annotation Amino Acid FASTA", + "type": "nmdc:DataObject" + }, + { + "_id": { + "$oid": "649b003f1ae706d7b5b16c96" + }, + "description": "Structural annotation GFF file for gold:Gp0119863", + "url": "https://data.microbiomedata.org/1777_95832/img_annotation/Ga0482152_structural_annotation.gff", + "md5_checksum": "46cf62901adcbcb2ca2c890b6dc8e6bd", + "file_size_bytes": 3385, + "id": "nmdc:46cf62901adcbcb2ca2c890b6dc8e6bd", + "name": "gold:Gp0119863_Structural annotation GFF file", + "data_object_type": "Structural Annotation GFF", + "type": "nmdc:DataObject" + } + ], + "mags_activity_set": [ + { + "_id": { + "$oid": "649b0052ec087f6bbab346ef" + }, + "has_input": [ + "nmdc:b01bd02c68c06c38e5f5d5d8ecaefd2e", + "nmdc:dca9e5b9a5043d60877f5e61c8081ba5", + "nmdc:6c09233592f03b658e024baecbcb3e90" + ], + "too_short_contig_num": 2515, + "part_of": [ + "nmdc:mga06mqp49" + ], + "binned_contig_num": 110, + "git_url": "https://github.com/microbiomedata/metaG/releases/tag/0.1", + "has_output": [ + "nmdc:8f04581f652d19124ab08e39d3317d10", + "nmdc:e46f63dff0e8d71ef63f7582e8213399" + ], + "was_informed_by": "gold:Gp0119863", + "input_contig_num": 3339, + "id": "nmdc:e97afa642973e545c1c6a5bfb98dff78", + "execution_resource": "NERSC-Cori", + "name": "MAGs Analysis Activity for nmdc:mga06mqp49", + "mags_list": [ + { + "number_of_contig": 110, + "completeness": 81.03, + "bin_name": "bins.1", + "gene_count": 2223, + "bin_quality": "MQ", + "gtdbtk_species": "Halanaerobium congolense", + "gtdbtk_order": "Halanaerobiales", + "num_16s": 0, + "gtdbtk_family": "Halanaerobiaceae", + "gtdbtk_domain": "Bacteria", + "contamination": 0.0, + "gtdbtk_class": "Halanaerobiia", + "gtdbtk_phylum": "Firmicutes_F", + "num_5s": 2, + "num_23s": 0, + "gtdbtk_genus": "Halanaerobium", + "num_t_rna": 43 + } + ], + "unbinned_contig_num": 714, + "started_at_time": "2021-12-01T21:30:30Z", + "type": "nmdc:MAGsAnalysisActivity", + "ended_at_time": "2021-12-02T21:02:29+00:00" + } + ], + "metagenome_annotation_activity_set": [ + { + "_id": { + "$oid": "649b005bbf2caae0415ef98e" + }, + "has_input": [ + "nmdc:b01bd02c68c06c38e5f5d5d8ecaefd2e" + ], + "part_of": [ + "nmdc:mga06mqp49" + ], + "git_url": "https://github.com/microbiomedata/metaG/releases/tag/0.1", + "has_output": [ + "nmdc:930e8a942b44b72f98148d7bf32e9b9c", + "nmdc:323b5d4d47c5d0199fe30399c72c49ec", + "nmdc:6c09233592f03b658e024baecbcb3e90", + "nmdc:02a20d5a8ed38c0898c116049764a07f", + "nmdc:cc439741bb2b49672efe8e4611bafdc3", + "nmdc:42122e3d78742a892b1279ad526f03f4", + "nmdc:8cda311b730ba9832ac6150cb6a96573", + "nmdc:9c6632d4efe86fb4666c55d7974adae4", + "nmdc:31d685c60fc28422b46f1531993b02b0", + "nmdc:8a969c2ccdece0f1eefbf881666bd66c", + "nmdc:065b0358e14f3bf1e789c01a6e1c3a0f", + "nmdc:5a5dab66b1fc530fd1a2d18f29bfc9a5" + ], + "was_informed_by": "gold:Gp0119863", + "id": "nmdc:e97afa642973e545c1c6a5bfb98dff78", + "execution_resource": "NERSC-Cori", + "name": "Annotation Activity for nmdc:mga06mqp49", + "started_at_time": "2021-12-01T21:30:30Z", + "type": "nmdc:MetagenomeAnnotationActivity", + "ended_at_time": "2021-12-02T21:02:29+00:00" + } + ], + "metagenome_assembly_set": [ + { + "_id": { + "$oid": "649b005f2ca5ee4adb139f83" + }, + "has_input": [ + "nmdc:661b4638f9d026b74763805523e596a1" + ], + "part_of": [ + "nmdc:mga06mqp49" + ], + "ctg_logsum": 73503, + "scaf_logsum": 74406, + "gap_pct": 0.05473, + "git_url": "https://github.com/microbiomedata/metaG/releases/tag/0.1", + "has_output": [ + "nmdc:b01bd02c68c06c38e5f5d5d8ecaefd2e", + "nmdc:e80be7a0f37f668d57e6d32d891c48de", + "nmdc:12a18a8e9f401890808c6a009ee32b23", + "nmdc:b96ac952849316d4d00ebba64927b710", + "nmdc:dca9e5b9a5043d60877f5e61c8081ba5" + ], + "asm_score": 23.455, + "was_informed_by": "gold:Gp0119863", + "ctg_powsum": 10601, + "scaf_max": 95653, + "id": "nmdc:e97afa642973e545c1c6a5bfb98dff78", + "scaf_powsum": 10888, + "execution_resource": "NERSC-Cori", + "contigs": 3339, + "name": "Assembly Activity for nmdc:mga06mqp49", + "ctg_max": 95653, + "gc_std": 0.08386, + "gc_avg": 0.33303, + "contig_bp": 6410104, + "started_at_time": "2021-12-01T21:30:30Z", + "scaf_bp": 6413614, + "type": "nmdc:MetagenomeAssembly", + "scaffolds": 3276, + "ended_at_time": "2021-12-02T21:02:29+00:00", + "ctg_l50": 11920, + "ctg_l90": 514, + "ctg_n50": 130, + "ctg_n90": 1371, + "scaf_l50": 13327, + "scaf_l90": 520, + "scaf_n50": 116, + "scaf_n90": 1313, + "scaf_l_gt50k": 803457, + "scaf_n_gt50k": 13, + "scaf_pct_gt50k": 12.527367 + } + ], + "omics_processing_set": [ + { + "_id": { + "$oid": "649b009773e8249959349b76" + }, + "id": "nmdc:omprc-11-87xcf918", + "name": "Deep subsurface shale carbon reservoir microbial communities from Ohio, USA - Utica-2 Time Series 2015_5_14", + "description": "Microbial controls on biogeochemical cycling in deep subsurface shale carbon reservoirs", + "has_input": [ + "nmdc:bsm-11-2b75gg30" + ], + "has_output": [ + "jgi:560df3700d878540fd6fe1c9" + ], + "part_of": [ + "nmdc:sty-11-8fb6t785" + ], + "add_date": "2015-08-15", + "mod_date": "2020-04-05", + "ncbi_project_name": "Deep subsurface shale carbon reservoir microbial communities from Ohio, USA - Utica-2 Time Series 2015_5_14", + "omics_type": { + "has_raw_value": "Metagenome" + }, + "principal_investigator": { + "has_raw_value": "Kelly Wrighton" + }, + "processing_institution": "JGI", + "type": "nmdc:OmicsProcessing", + "gold_sequencing_project_identifiers": [ + "gold:Gp0119863" + ] + } + ], + "read_qc_analysis_activity_set": [ + { + "_id": { + "$oid": "649b009d6bdd4fd20273c848" + }, + "has_input": [ + "nmdc:eb680995a5cffb21b210748ee943d122" + ], + "part_of": [ + "nmdc:mga06mqp49" + ], + "git_url": "https://github.com/microbiomedata/metaG/releases/tag/0.1", + "has_output": [ + "nmdc:661b4638f9d026b74763805523e596a1", + "nmdc:10f926a2604c4c807ff297a697b072aa" + ], + "was_informed_by": "gold:Gp0119863", + "input_read_count": 69740856, + "output_read_bases": 10130377306, + "id": "nmdc:e97afa642973e545c1c6a5bfb98dff78", + "execution_resource": "NERSC-Cori", + "input_read_bases": 10530869256, + "name": "Read QC Activity for nmdc:mga06mqp49", + "output_read_count": 69208056, + "started_at_time": "2021-12-01T21:30:30Z", + "type": "nmdc:ReadQCAnalysisActivity", + "ended_at_time": "2021-12-02T21:02:29+00:00" + } + ], + "read_based_taxonomy_analysis_activity_set": [ + { + "_id": { + "$oid": "649b009bff710ae353f8cf12" + }, + "has_input": [ + "nmdc:661b4638f9d026b74763805523e596a1" + ], + "git_url": "https://github.com/microbiomedata/metaG/releases/tag/0.1", + "has_output": [ + "nmdc:c4f085e9c4ec8f14aebbdc61091d3b2e", + "nmdc:992a2d95b42a38d1e6420de4a5a0e67b", + "nmdc:c6c82d1240f902c0b5b06d9ae902de78", + "nmdc:c672b747d8d95f5f94240c01f192a8cb", + "nmdc:ce754df464750917b2dd7a66cad45ff5", + "nmdc:92ee5dbe5627b52f2da06f5154f3ef9e", + "nmdc:60fa56d06006ee5b8d292f79c04a453e", + "nmdc:913fd34a744dba7f2d8e25ab646250a5", + "nmdc:3ff6bdc3c50e72dc9736f8a3bafa7dfc" + ], + "was_informed_by": "gold:Gp0119863", + "id": "nmdc:e97afa642973e545c1c6a5bfb98dff78", + "execution_resource": "NERSC-Cori", + "name": "ReadBased Analysis Activity for nmdc:mga06mqp49", + "started_at_time": "2021-12-01T21:30:30Z", + "type": "nmdc:ReadbasedAnalysis", + "ended_at_time": "2021-12-02T21:02:29+00:00" + } + ] + }, + { + "data_object_set": [ + { + "description": "Raw sequencer read data", + "file_size_bytes": 7026629112, + "type": "nmdc:DataObject", + "id": "jgi:560df3720d878540fd6fe1cc", + "name": "9567.6.137555.AGGCAGA-GCGTAAG.fastq.gz" + }, + { + "name": "Gp0119866_Filtered Reads", + "description": "Filtered Reads for Gp0119866", + "data_object_type": "Filtered Sequencing Reads", + "url": "https://data.microbiomedata.org/data/nmdc:mga09f1f50/qa/nmdc_mga09f1f50_filtered.fastq.gz", + "md5_checksum": "448625936760095c0efc271e1003510d", + "id": "nmdc:448625936760095c0efc271e1003510d", + "file_size_bytes": 5520370499 + }, + { + "name": "Gp0119866_Filtered Stats", + "description": "Filtered Stats for Gp0119866", + "data_object_type": "QC Statistics", + "url": "https://data.microbiomedata.org/data/nmdc:mga09f1f50/qa/nmdc_mga09f1f50_filterStats.txt", + "md5_checksum": "28929f0addf71bbaafb2806253f7108e", + "id": "nmdc:28929f0addf71bbaafb2806253f7108e", + "file_size_bytes": 284 + }, + { + "name": "Gp0119866_Gottcha2 TSV report", + "description": "Gottcha2 TSV report for Gp0119866", + "url": "https://data.microbiomedata.org/data/nmdc:mga09f1f50/ReadbasedAnalysis/nmdc_mga09f1f50_gottcha2_report.tsv", + "md5_checksum": "57e8059128df37fe5763162f7c1b992e", + "id": "nmdc:57e8059128df37fe5763162f7c1b992e", + "file_size_bytes": 35324 + }, + { + "name": "Gp0119866_Gottcha2 full TSV report", + "description": "Gottcha2 full TSV report for Gp0119866", + "url": "https://data.microbiomedata.org/data/nmdc:mga09f1f50/ReadbasedAnalysis/nmdc_mga09f1f50_gottcha2_report_full.tsv", + "md5_checksum": "57f89e0f77210981a40cfb0e38014221", + "id": "nmdc:57f89e0f77210981a40cfb0e38014221", + "file_size_bytes": 1274632 + }, + { + "name": "Gp0119866_Gottcha2 Krona HTML report", + "description": "Gottcha2 Krona HTML report for Gp0119866", + "data_object_type": "GOTTCHA2 Krona Plot", + "url": "https://data.microbiomedata.org/data/nmdc:mga09f1f50/ReadbasedAnalysis/nmdc_mga09f1f50_gottcha2_krona.html", + "md5_checksum": "af57c9f1974fe0d450382d57482b6c78", + "id": "nmdc:af57c9f1974fe0d450382d57482b6c78", + "file_size_bytes": 341174 + }, + { + "name": "Gp0119866_Centrifuge classification TSV report", + "description": "Centrifuge classification TSV report for Gp0119866", + "data_object_type": "Centrifuge Taxonomic Classification", + "url": "https://data.microbiomedata.org/data/nmdc:mga09f1f50/ReadbasedAnalysis/nmdc_mga09f1f50_centrifuge_classification.tsv", + "md5_checksum": "20ce26b373108b6f80dbff7e96b5864b", + "id": "nmdc:20ce26b373108b6f80dbff7e96b5864b", + "file_size_bytes": 4656351464 + }, + { + "name": "Gp0119866_Centrifuge TSV report", + "description": "Centrifuge TSV report for Gp0119866", + "data_object_type": "Centrifuge Classification Report", + "url": "https://data.microbiomedata.org/data/nmdc:mga09f1f50/ReadbasedAnalysis/nmdc_mga09f1f50_centrifuge_report.tsv", + "md5_checksum": "fd2a7a459a59296a18b2a464e2868214", + "id": "nmdc:fd2a7a459a59296a18b2a464e2868214", + "file_size_bytes": 267611 + }, + { + "name": "Gp0119866_Centrifuge Krona HTML report", + "description": "Centrifuge Krona HTML report for Gp0119866", + "data_object_type": "Centrifuge Krona Plot", + "url": "https://data.microbiomedata.org/data/nmdc:mga09f1f50/ReadbasedAnalysis/nmdc_mga09f1f50_centrifuge_krona.html", + "md5_checksum": "53c74feb3725c590a1db3486edc2efc3", + "id": "nmdc:53c74feb3725c590a1db3486edc2efc3", + "file_size_bytes": 2352191 + }, + { + "name": "Gp0119866_Kraken classification TSV report", + "description": "Kraken classification TSV report for Gp0119866", + "data_object_type": "Kraken2 Taxonomic Classification", + "url": "https://data.microbiomedata.org/data/nmdc:mga09f1f50/ReadbasedAnalysis/nmdc_mga09f1f50_kraken2_classification.tsv", + "md5_checksum": "0449bf5f5699dd5c63c945682ac06f4e", + "id": "nmdc:0449bf5f5699dd5c63c945682ac06f4e", + "file_size_bytes": 4159018306 + }, + { + "name": "Gp0119866_Kraken2 TSV report", + "description": "Kraken2 TSV report for Gp0119866", + "data_object_type": "Kraken2 Classification Report", + "url": "https://data.microbiomedata.org/data/nmdc:mga09f1f50/ReadbasedAnalysis/nmdc_mga09f1f50_kraken2_report.tsv", + "md5_checksum": "0f1a5387576931ff49370f8e786c92d5", + "id": "nmdc:0f1a5387576931ff49370f8e786c92d5", + "file_size_bytes": 677105 + }, + { + "name": "Gp0119866_Kraken2 Krona HTML report", + "description": "Kraken2 Krona HTML report for Gp0119866", + "data_object_type": "Kraken2 Krona Plot", + "url": "https://data.microbiomedata.org/data/nmdc:mga09f1f50/ReadbasedAnalysis/nmdc_mga09f1f50_kraken2_krona.html", + "md5_checksum": "e3331958d361d72116018bbdb1a74d5d", + "id": "nmdc:e3331958d361d72116018bbdb1a74d5d", + "file_size_bytes": 4064406 + }, + { + "name": "Gp0119866_Assembled contigs fasta", + "description": "Assembled contigs fasta for Gp0119866", + "data_object_type": "Assembly Contigs", + "url": "https://data.microbiomedata.org/data/nmdc:mga09f1f50/assembly/nmdc_mga09f1f50_contigs.fna", + "md5_checksum": "9c04a2492162eb6b6aef8c389f3e182f", + "id": "nmdc:9c04a2492162eb6b6aef8c389f3e182f", + "file_size_bytes": 418087151 + }, + { + "name": "Gp0119866_Assembled scaffold fasta", + "description": "Assembled scaffold fasta for Gp0119866", + "data_object_type": "Assembly Scaffolds", + "url": "https://data.microbiomedata.org/data/nmdc:mga09f1f50/assembly/nmdc_mga09f1f50_scaffolds.fna", + "md5_checksum": "00ec3306f3e74333c27b7e82e38a1c66", + "id": "nmdc:00ec3306f3e74333c27b7e82e38a1c66", + "file_size_bytes": 416508977 + }, + { + "name": "Gp0119866_Metagenome Contig Coverage Stats", + "description": "Metagenome Contig Coverage Stats for Gp0119866", + "url": "https://data.microbiomedata.org/data/nmdc:mga09f1f50/assembly/nmdc_mga09f1f50_covstats.txt", + "md5_checksum": "837fafb2ce56bc381abe7c6916b9bcab", + "id": "nmdc:837fafb2ce56bc381abe7c6916b9bcab", + "file_size_bytes": 41023143 + }, + { + "name": "Gp0119866_Assembled AGP file", + "description": "Assembled AGP file for Gp0119866", + "data_object_type": "Assembly AGP", + "url": "https://data.microbiomedata.org/data/nmdc:mga09f1f50/assembly/nmdc_mga09f1f50_assembly.agp", + "md5_checksum": "fba64d9fa6867f4ae1df6023309f0673", + "id": "nmdc:fba64d9fa6867f4ae1df6023309f0673", + "file_size_bytes": 38463052 + }, + { + "name": "Gp0119866_Metagenome Alignment BAM file", + "description": "Metagenome Alignment BAM file for Gp0119866", + "data_object_type": "Assembly Coverage BAM", + "url": "https://data.microbiomedata.org/data/nmdc:mga09f1f50/assembly/nmdc_mga09f1f50_pairedMapped_sorted.bam", + "md5_checksum": "07a6287efeb2fe2278ce06243619cd5f", + "id": "nmdc:07a6287efeb2fe2278ce06243619cd5f", + "file_size_bytes": 6090640266 + }, + { + "name": "Gp0119866_Protein FAA", + "description": "Protein FAA for Gp0119866", + "data_object_type": "Annotation Amino Acid FASTA", + "url": "https://data.microbiomedata.org/data/nmdc:mga09f1f50/annotation/nmdc_mga09f1f50_proteins.faa", + "md5_checksum": "321c7bdfeb7bcc3b9398e468390c47ce", + "id": "nmdc:321c7bdfeb7bcc3b9398e468390c47ce", + "file_size_bytes": 219712101 + }, + { + "name": "Gp0119866_Structural annotation GFF file", + "description": "Structural annotation GFF file for Gp0119866", + "data_object_type": "Structural Annotation GFF", + "url": "https://data.microbiomedata.org/data/nmdc:mga09f1f50/annotation/nmdc_mga09f1f50_structural_annotation.gff", + "md5_checksum": "7240223762a9eca1a3c373f2a08116ae", + "id": "nmdc:7240223762a9eca1a3c373f2a08116ae", + "file_size_bytes": 2534 + }, + { + "name": "Gp0119866_Functional annotation GFF file", + "description": "Functional annotation GFF file for Gp0119866", + "data_object_type": "Functional Annotation GFF", + "url": "https://data.microbiomedata.org/data/nmdc:mga09f1f50/annotation/nmdc_mga09f1f50_functional_annotation.gff", + "md5_checksum": "7595ad06b43e74c1e4a1e3f707df911b", + "id": "nmdc:7595ad06b43e74c1e4a1e3f707df911b", + "file_size_bytes": 215797794 + }, + { + "name": "Gp0119866_KO TSV file", + "description": "KO TSV file for Gp0119866", + "data_object_type": "Annotation KEGG Orthology", + "url": "https://data.microbiomedata.org/data/nmdc:mga09f1f50/annotation/nmdc_mga09f1f50_ko.tsv", + "md5_checksum": "22cca73dcdda0402e2702dc732f4d524", + "id": "nmdc:22cca73dcdda0402e2702dc732f4d524", + "file_size_bytes": 31880320 + }, + { + "name": "Gp0119866_EC TSV file", + "description": "EC TSV file for Gp0119866", + "data_object_type": "Annotation Enzyme Commission", + "url": "https://data.microbiomedata.org/data/nmdc:mga09f1f50/annotation/nmdc_mga09f1f50_ec.tsv", + "md5_checksum": "b3caf3a48d011a563b4ff53f76bd31a3", + "id": "nmdc:b3caf3a48d011a563b4ff53f76bd31a3", + "file_size_bytes": 22344271 + }, + { + "name": "Gp0119866_COG GFF file", + "description": "COG GFF file for Gp0119866", + "url": "https://data.microbiomedata.org/data/nmdc:mga09f1f50/annotation/nmdc_mga09f1f50_cog.gff", + "md5_checksum": "e2295c31130dd9c63d0a1328fbfaa3ac", + "id": "nmdc:e2295c31130dd9c63d0a1328fbfaa3ac", + "file_size_bytes": 132290882 + }, + { + "name": "Gp0119866_PFAM GFF file", + "description": "PFAM GFF file for Gp0119866", + "url": "https://data.microbiomedata.org/data/nmdc:mga09f1f50/annotation/nmdc_mga09f1f50_pfam.gff", + "md5_checksum": "493f1e741f4322f9af1baa0cfc166834", + "id": "nmdc:493f1e741f4322f9af1baa0cfc166834", + "file_size_bytes": 116234623 + }, + { + "name": "Gp0119866_TigrFam GFF file", + "description": "TigrFam GFF file for Gp0119866", + "url": "https://data.microbiomedata.org/data/nmdc:mga09f1f50/annotation/nmdc_mga09f1f50_tigrfam.gff", + "md5_checksum": "972d8a94081dab52d0ea43f31a420807", + "id": "nmdc:972d8a94081dab52d0ea43f31a420807", + "file_size_bytes": 20163506 + }, + { + "name": "Gp0119866_SMART GFF file", + "description": "SMART GFF file for Gp0119866", + "url": "https://data.microbiomedata.org/data/nmdc:mga09f1f50/annotation/nmdc_mga09f1f50_smart.gff", + "md5_checksum": "2cce9cf7c0cce1bc86c3f16200f45e24", + "id": "nmdc:2cce9cf7c0cce1bc86c3f16200f45e24", + "file_size_bytes": 28949647 + }, + { + "name": "Gp0119866_SuperFam GFF file", + "description": "SuperFam GFF file for Gp0119866", + "url": "https://data.microbiomedata.org/data/nmdc:mga09f1f50/annotation/nmdc_mga09f1f50_supfam.gff", + "md5_checksum": "ebe3dddf7e5a6af42d0cb5bc45b80655", + "id": "nmdc:ebe3dddf7e5a6af42d0cb5bc45b80655", + "file_size_bytes": 159098329 + }, + { + "name": "Gp0119866_Cath FunFam GFF file", + "description": "Cath FunFam GFF file for Gp0119866", + "url": "https://data.microbiomedata.org/data/nmdc:mga09f1f50/annotation/nmdc_mga09f1f50_cath_funfam.gff", + "md5_checksum": "3e6b6157c4e5edf6ba07b1a771bb8a80", + "id": "nmdc:3e6b6157c4e5edf6ba07b1a771bb8a80", + "file_size_bytes": 145649187 + }, + { + "name": "Gp0119866_KO_EC GFF file", + "description": "KO_EC GFF file for Gp0119866", + "url": "https://data.microbiomedata.org/data/nmdc:mga09f1f50/annotation/nmdc_mga09f1f50_ko_ec.gff", + "md5_checksum": "77abc57adce2014a9dc77fd82c51caa2", + "id": "nmdc:77abc57adce2014a9dc77fd82c51caa2", + "file_size_bytes": 101853311 + }, + { + "name": "gold:Gp0452679_metabat2 bin checkm quality assessment result", + "description": "metabat2 bin checkm quality assessment result for gold:Gp0452679", + "data_object_type": "CheckM Statistics", + "url": "https://data.microbiomedata.org/data/nmdc:mga0fsjy84/MAGs/nmdc_mga0fsjy84_checkm_qa.out", + "md5_checksum": "d41d8cd98f00b204e9800998ecf8427e", + "id": "nmdc:d41d8cd98f00b204e9800998ecf8427e", + "file_size_bytes": 0 + }, + { + "name": "Gp0119866_tooShort (< 3kb) filtered contigs fasta file by metaBat2", + "description": "tooShort (< 3kb) filtered contigs fasta file by metaBat2 for Gp0119866", + "url": "https://data.microbiomedata.org/data/nmdc:mga09f1f50/MAGs/nmdc_mga09f1f50_bins.tooShort.fa", + "md5_checksum": "4e55be27fe6db3985ce888576f36ad4d", + "id": "nmdc:4e55be27fe6db3985ce888576f36ad4d", + "file_size_bytes": 209488470 + }, + { + "name": "Gp0119866_unbinned fasta file from metabat2", + "description": "unbinned fasta file from metabat2 for Gp0119866", + "url": "https://data.microbiomedata.org/data/nmdc:mga09f1f50/MAGs/nmdc_mga09f1f50_bins.unbinned.fa", + "md5_checksum": "4015203d1ba510e71cf1ef81d598a907", + "id": "nmdc:4015203d1ba510e71cf1ef81d598a907", + "file_size_bytes": 135452739 + }, + { + "name": "Gp0119866_metabat2 bin checkm quality assessment result", + "description": "metabat2 bin checkm quality assessment result for Gp0119866", + "data_object_type": "CheckM Statistics", + "url": "https://data.microbiomedata.org/data/nmdc:mga09f1f50/MAGs/nmdc_mga09f1f50_checkm_qa.out", + "md5_checksum": "d219f9f9abbab37e40d6151ee9280cda", + "id": "nmdc:d219f9f9abbab37e40d6151ee9280cda", + "file_size_bytes": 11937 + }, + { + "name": "Gp0119866_high-quality and medium-quality bins", + "description": "high-quality and medium-quality bins for Gp0119866", + "data_object_type": "Metagenome Bins", + "url": "https://data.microbiomedata.org/data/nmdc:mga09f1f50/MAGs/nmdc_mga09f1f50_hqmq_bin.zip", + "md5_checksum": "421a2892a62d614c29593153dd4862aa", + "id": "nmdc:421a2892a62d614c29593153dd4862aa", + "file_size_bytes": 10722389 + }, + { + "name": "Gp0119866_metabat2 bins", + "description": "metabat2 bins for Gp0119866", + "url": "https://data.microbiomedata.org/data/nmdc:mga09f1f50/MAGs/nmdc_mga09f1f50_metabat_bin.zip", + "md5_checksum": "4473d2437f80006f43d623ccc918d9f0", + "id": "nmdc:4473d2437f80006f43d623ccc918d9f0", + "file_size_bytes": 11520594 + }, + { + "_id": { + "$oid": "649b003d1ae706d7b5b14e22" + }, + "description": "Metagenome Contig Coverage Stats for gold:Gp0119866", + "url": "https://data.microbiomedata.org/data/1777_95835/assembly/mapping_stats.txt", + "file_size_bytes": 38481708, + "type": "nmdc:DataObject", + "id": "nmdc:1c6773773812be9ac314beab2c9720e2", + "name": "mapping_stats.txt", + "data_object_type": "Assembly Coverage Stats" + }, + { + "_id": { + "$oid": "649b003d1ae706d7b5b14e24" + }, + "description": "Assembled contigs fasta for gold:Gp0119866", + "url": "https://data.microbiomedata.org/data/1777_95835/assembly/assembly_contigs.fna", + "file_size_bytes": 415545716, + "type": "nmdc:DataObject", + "id": "nmdc:3d0615c9034cfba43866390d717f8de8", + "name": "assembly_contigs.fna", + "data_object_type": "Assembly Contigs" + }, + { + "_id": { + "$oid": "649b003d1ae706d7b5b14e26" + }, + "description": "Assembled scaffold fasta for gold:Gp0119866", + "url": "https://data.microbiomedata.org/data/1777_95835/assembly/assembly_scaffolds.fna", + "file_size_bytes": 413991787, + "type": "nmdc:DataObject", + "id": "nmdc:baacb1edcf909452c290e4732f1cf7fb", + "name": "assembly_scaffolds.fna", + "data_object_type": "Assembly Scaffolds" + }, + { + "_id": { + "$oid": "649b003d1ae706d7b5b14e27" + }, + "description": "Assembled AGP file for gold:Gp0119866", + "url": "https://data.microbiomedata.org/data/1777_95835/assembly/assembly.agp", + "file_size_bytes": 33329332, + "type": "nmdc:DataObject", + "id": "nmdc:7eda145f687e3a6302565cf0727d04ac", + "name": "assembly.agp", + "data_object_type": "Assembly AGP" + }, + { + "_id": { + "$oid": "649b003d1ae706d7b5b14e28" + }, + "description": "Metagenome Alignment BAM file for gold:Gp0119866", + "url": "https://data.microbiomedata.org/data/1777_95835/assembly/pairedMapped_sorted.bam", + "file_size_bytes": 6037317161, + "type": "nmdc:DataObject", + "id": "nmdc:98920c9f1653a2ac7634894ff51d53bd", + "name": "pairedMapped_sorted.bam", + "data_object_type": "Assembly Coverage BAM" + }, + { + "_id": { + "$oid": "649b003d1ae706d7b5b15aae" + }, + "id": "nmdc:674027d4c3ae42e925d07093dddfb608", + "name": "1777_95835.krona.html", + "description": "Gold:Gp0119866 KRONA plot HTML file", + "url": "https://data.microbiomedata.org/1777_95835/ReadbasedAnalysis/centrifuge/1777_95835.krona.html", + "file_size_bytes": 3385, + "data_object_type": "Centrifuge Krona Plot", + "type": "nmdc:DataObject" + }, + { + "_id": { + "$oid": "649b003d1ae706d7b5b15ab2" + }, + "id": "nmdc:96315784605e0970b2cb797e73b52c2c", + "name": "1777_95835.json", + "description": "Gold:Gp0119866 ReadbasedAnalysis result JSON file", + "url": "https://data.microbiomedata.org/1777_95835/ReadbasedAnalysis/1777_95835.json", + "file_size_bytes": 3385, + "type": "nmdc:DataObject" + }, + { + "_id": { + "$oid": "649b003f1ae706d7b5b163d9" + }, + "id": "nmdc:510895c9632db0b7146f915b168c61c8", + "name": "bins.tooShort.fa", + "description": "tooShort (< 3kb) filtered contigs fasta file by metaBat2 for gold:Gp0119866", + "file_size_bytes": 203954166, + "url": "https://data.microbiomedata.org/data/1777_95835/img_MAGs/bins.tooShort.fa", + "type": "nmdc:DataObject" + }, + { + "_id": { + "$oid": "649b003f1ae706d7b5b163db" + }, + "id": "nmdc:3c04c7658fb75e4588ecf98ed007d14c", + "name": "bins.unbinned.fa", + "description": "unbinned fasta file from metabat2 for gold:Gp0119866", + "file_size_bytes": 158400338, + "url": "https://data.microbiomedata.org/data/1777_95835/img_MAGs/bins.unbinned.fa", + "type": "nmdc:DataObject" + }, + { + "_id": { + "$oid": "649b003f1ae706d7b5b163e0" + }, + "id": "nmdc:1d7c04f80155bc479873a73008a7e075", + "name": "gtdbtk.bac120.summary.tsv", + "description": "gtdbtk bacterial assignment result summary table for gold:Gp0119866", + "file_size_bytes": 3148, + "url": "https://data.microbiomedata.org/data/1777_95835/img_MAGs/gtdbtk_output/classify/gtdbtk.bac120.summary.tsv", + "type": "nmdc:DataObject" + }, + { + "_id": { + "$oid": "649b003f1ae706d7b5b163e1" + }, + "id": "nmdc:0b89d6f0dea353eadfbbcaf3acfc16ab", + "name": "gold:Gp0119866.bins.25.fa", + "description": "metabat2 binned contig file for gold:Gp0119866", + "file_size_bytes": 1986555, + "url": "https://data.microbiomedata.org/data/1777_95835/img_MAGs/metabat-bins/bins.25.fa", + "type": "nmdc:DataObject", + "data_object_type": "Metagenome Bins" + }, + { + "_id": { + "$oid": "649b003f1ae706d7b5b163e2" + }, + "id": "nmdc:0ceeb6ebc393296e06d1761e601af5dc", + "name": "gold:Gp0119866.bins.79.fa", + "description": "metabat2 binned contig file for gold:Gp0119866", + "file_size_bytes": 1365279, + "url": "https://data.microbiomedata.org/data/1777_95835/img_MAGs/metabat-bins/bins.79.fa", + "type": "nmdc:DataObject", + "data_object_type": "Metagenome Bins" + }, + { + "_id": { + "$oid": "649b003f1ae706d7b5b163e3" + }, + "id": "nmdc:e34f49a3c7e4b4d814ed9c991beaed56", + "name": "gold:Gp0119866.bins.30.fa", + "description": "metabat2 binned contig file for gold:Gp0119866", + "file_size_bytes": 2364016, + "url": "https://data.microbiomedata.org/data/1777_95835/img_MAGs/metabat-bins/bins.30.fa", + "type": "nmdc:DataObject", + "data_object_type": "Metagenome Bins" + }, + { + "_id": { + "$oid": "649b003f1ae706d7b5b163e4" + }, + "id": "nmdc:6564d1f8508369b4aff5f896146a41b6", + "name": "gold:Gp0119866.bins.64.fa", + "description": "metabat2 binned contig file for gold:Gp0119866", + "file_size_bytes": 3044018, + "url": "https://data.microbiomedata.org/data/1777_95835/img_MAGs/metabat-bins/bins.64.fa", + "type": "nmdc:DataObject", + "data_object_type": "Metagenome Bins" + }, + { + "_id": { + "$oid": "649b003f1ae706d7b5b163e5" + }, + "id": "nmdc:986503a619bc66c55262f9391b5aa2f9", + "name": "checkm_qa.out", + "description": "metabat2 bin checkm quality assessment result for gold:Gp0119866", + "file_size_bytes": 14364, + "url": "https://data.microbiomedata.org/data/1777_95835/img_MAGs/checkm_qa.out", + "type": "nmdc:DataObject", + "data_object_type": "CheckM Statistics" + }, + { + "_id": { + "$oid": "649b003f1ae706d7b5b163e6" + }, + "id": "nmdc:c879c26a86f999bc7fedcce72cfb8dbe", + "name": "gold:Gp0119866.bins.8.fa", + "description": "metabat2 binned contig file for gold:Gp0119866", + "file_size_bytes": 931266, + "url": "https://data.microbiomedata.org/data/1777_95835/img_MAGs/metabat-bins/bins.8.fa", + "type": "nmdc:DataObject", + "data_object_type": "Metagenome Bins" + }, + { + "_id": { + "$oid": "649b003f1ae706d7b5b163e7" + }, + "id": "nmdc:d15ba65af6c094374895d53ba3f6b490", + "name": "gold:Gp0119866.bins.11.fa", + "description": "metabat2 binned contig file for gold:Gp0119866", + "file_size_bytes": 2770440, + "url": "https://data.microbiomedata.org/data/1777_95835/img_MAGs/metabat-bins/bins.11.fa", + "type": "nmdc:DataObject", + "data_object_type": "Metagenome Bins" + }, + { + "_id": { + "$oid": "649b003f1ae706d7b5b163e8" + }, + "id": "nmdc:edf7d6527ec0aba457b2be1ef7ff5f03", + "name": "gold:Gp0119866.bins.18.fa", + "description": "metabat2 binned contig file for gold:Gp0119866", + "file_size_bytes": 340739, + "url": "https://data.microbiomedata.org/data/1777_95835/img_MAGs/metabat-bins/bins.18.fa", + "type": "nmdc:DataObject", + "data_object_type": "Metagenome Bins" + }, + { + "_id": { + "$oid": "649b003f1ae706d7b5b163e9" + }, + "id": "nmdc:db77766b0847796f66ddeb1ed27851ed", + "name": "gold:Gp0119866.bins.23.fa", + "description": "metabat2 binned contig file for gold:Gp0119866", + "file_size_bytes": 469806, + "url": "https://data.microbiomedata.org/data/1777_95835/img_MAGs/metabat-bins/bins.23.fa", + "type": "nmdc:DataObject", + "data_object_type": "Metagenome Bins" + }, + { + "_id": { + "$oid": "649b003f1ae706d7b5b163ea" + }, + "id": "nmdc:1e177e50031081391249f13b96c16f87", + "name": "gold:Gp0119866.bins.66.fa", + "description": "metabat2 binned contig file for gold:Gp0119866", + "file_size_bytes": 352982, + "url": "https://data.microbiomedata.org/data/1777_95835/img_MAGs/metabat-bins/bins.66.fa", + "type": "nmdc:DataObject", + "data_object_type": "Metagenome Bins" + }, + { + "_id": { + "$oid": "649b003f1ae706d7b5b163eb" + }, + "id": "nmdc:6e66c8db5a1702dfefaf2daea9c438b2", + "name": "gold:Gp0119866.bins.67.fa", + "description": "metabat2 binned contig file for gold:Gp0119866", + "file_size_bytes": 3056293, + "url": "https://data.microbiomedata.org/data/1777_95835/img_MAGs/metabat-bins/bins.67.fa", + "type": "nmdc:DataObject", + "data_object_type": "Metagenome Bins" + }, + { + "_id": { + "$oid": "649b003f1ae706d7b5b163ec" + }, + "id": "nmdc:712bab2bf792503743ed226191a4441b", + "name": "gold:Gp0119866.bins.53.fa", + "description": "metabat2 binned contig file for gold:Gp0119866", + "file_size_bytes": 245007, + "url": "https://data.microbiomedata.org/data/1777_95835/img_MAGs/metabat-bins/bins.53.fa", + "type": "nmdc:DataObject", + "data_object_type": "Metagenome Bins" + }, + { + "_id": { + "$oid": "649b003f1ae706d7b5b163ed" + }, + "id": "nmdc:4633ded83df7d7dd9bb5df4bda96ac41", + "name": "gold:Gp0119866.bins.41.fa", + "description": "metabat2 binned contig file for gold:Gp0119866", + "file_size_bytes": 411152, + "url": "https://data.microbiomedata.org/data/1777_95835/img_MAGs/metabat-bins/bins.41.fa", + "type": "nmdc:DataObject", + "data_object_type": "Metagenome Bins" + }, + { + "_id": { + "$oid": "649b003f1ae706d7b5b163ee" + }, + "id": "nmdc:8709968e0f087e1383dbeaa0cdef509a", + "name": "gold:Gp0119866.bins.5.fa", + "description": "metabat2 binned contig file for gold:Gp0119866", + "file_size_bytes": 292760, + "url": "https://data.microbiomedata.org/data/1777_95835/img_MAGs/metabat-bins/bins.5.fa", + "type": "nmdc:DataObject", + "data_object_type": "Metagenome Bins" + }, + { + "_id": { + "$oid": "649b003f1ae706d7b5b163ef" + }, + "id": "nmdc:3aaa48a3737ca686948a2f127878e3fb", + "name": "gold:Gp0119866.bins.26.fa", + "description": "metabat2 binned contig file for gold:Gp0119866", + "file_size_bytes": 383731, + "url": "https://data.microbiomedata.org/data/1777_95835/img_MAGs/metabat-bins/bins.26.fa", + "type": "nmdc:DataObject", + "data_object_type": "Metagenome Bins" + }, + { + "_id": { + "$oid": "649b003f1ae706d7b5b163f0" + }, + "id": "nmdc:c4553fa7101384ee6480863321d2d36f", + "name": "gold:Gp0119866.bins.47.fa", + "description": "metabat2 binned contig file for gold:Gp0119866", + "file_size_bytes": 686615, + "url": "https://data.microbiomedata.org/data/1777_95835/img_MAGs/metabat-bins/bins.47.fa", + "type": "nmdc:DataObject", + "data_object_type": "Metagenome Bins" + }, + { + "_id": { + "$oid": "649b003f1ae706d7b5b163f1" + }, + "id": "nmdc:f18af6317fc20e7950b675e82a2a24bd", + "name": "gold:Gp0119866.bins.46.fa", + "description": "metabat2 binned contig file for gold:Gp0119866", + "file_size_bytes": 483342, + "url": "https://data.microbiomedata.org/data/1777_95835/img_MAGs/metabat-bins/bins.46.fa", + "type": "nmdc:DataObject", + "data_object_type": "Metagenome Bins" + }, + { + "_id": { + "$oid": "649b003f1ae706d7b5b163f2" + }, + "id": "nmdc:e7472b4f244c19fbeb516d8e6fe11dad", + "name": "gold:Gp0119866.bins.44.fa", + "description": "metabat2 binned contig file for gold:Gp0119866", + "file_size_bytes": 249992, + "url": "https://data.microbiomedata.org/data/1777_95835/img_MAGs/metabat-bins/bins.44.fa", + "type": "nmdc:DataObject", + "data_object_type": "Metagenome Bins" + }, + { + "_id": { + "$oid": "649b003f1ae706d7b5b163f3" + }, + "id": "nmdc:cbdbfafedef844aa6a545c1b66e7d9ba", + "name": "gold:Gp0119866.bins.58.fa", + "description": "metabat2 binned contig file for gold:Gp0119866", + "file_size_bytes": 314948, + "url": "https://data.microbiomedata.org/data/1777_95835/img_MAGs/metabat-bins/bins.58.fa", + "type": "nmdc:DataObject", + "data_object_type": "Metagenome Bins" + }, + { + "_id": { + "$oid": "649b003f1ae706d7b5b163f4" + }, + "id": "nmdc:34d8275d8acc72ed551ed54c90e97558", + "name": "gold:Gp0119866.bins.35.fa", + "description": "metabat2 binned contig file for gold:Gp0119866", + "file_size_bytes": 563895, + "url": "https://data.microbiomedata.org/data/1777_95835/img_MAGs/metabat-bins/bins.35.fa", + "type": "nmdc:DataObject", + "data_object_type": "Metagenome Bins" + }, + { + "_id": { + "$oid": "649b003f1ae706d7b5b163f5" + }, + "id": "nmdc:b5379fd954c872f2b6705b2391ca073c", + "name": "gold:Gp0119866.bins.28.fa", + "description": "metabat2 binned contig file for gold:Gp0119866", + "file_size_bytes": 518430, + "url": "https://data.microbiomedata.org/data/1777_95835/img_MAGs/metabat-bins/bins.28.fa", + "type": "nmdc:DataObject", + "data_object_type": "Metagenome Bins" + }, + { + "_id": { + "$oid": "649b003f1ae706d7b5b163f6" + }, + "id": "nmdc:a7f9d8f907ca10cad4498bf65d2a81be", + "name": "gold:Gp0119866.bins.32.fa", + "description": "metabat2 binned contig file for gold:Gp0119866", + "file_size_bytes": 496037, + "url": "https://data.microbiomedata.org/data/1777_95835/img_MAGs/metabat-bins/bins.32.fa", + "type": "nmdc:DataObject", + "data_object_type": "Metagenome Bins" + }, + { + "_id": { + "$oid": "649b003f1ae706d7b5b163f8" + }, + "id": "nmdc:73322153c07d4904452facf27e99147d", + "name": "gold:Gp0119866.bins.62.fa", + "description": "metabat2 binned contig file for gold:Gp0119866", + "file_size_bytes": 662921, + "url": "https://data.microbiomedata.org/data/1777_95835/img_MAGs/metabat-bins/bins.62.fa", + "type": "nmdc:DataObject", + "data_object_type": "Metagenome Bins" + }, + { + "_id": { + "$oid": "649b003f1ae706d7b5b163f9" + }, + "id": "nmdc:0fe2340239637260fb4a7aa8245acea8", + "name": "gold:Gp0119866.bins.68.fa", + "description": "metabat2 binned contig file for gold:Gp0119866", + "file_size_bytes": 289093, + "url": "https://data.microbiomedata.org/data/1777_95835/img_MAGs/metabat-bins/bins.68.fa", + "type": "nmdc:DataObject", + "data_object_type": "Metagenome Bins" + }, + { + "_id": { + "$oid": "649b003f1ae706d7b5b163fa" + }, + "id": "nmdc:52bd1e840b0239fac0313780372c45ef", + "name": "gold:Gp0119866.bins.55.fa", + "description": "metabat2 binned contig file for gold:Gp0119866", + "file_size_bytes": 486995, + "url": "https://data.microbiomedata.org/data/1777_95835/img_MAGs/metabat-bins/bins.55.fa", + "type": "nmdc:DataObject", + "data_object_type": "Metagenome Bins" + }, + { + "_id": { + "$oid": "649b003f1ae706d7b5b163fb" + }, + "id": "nmdc:db553eebba750c66ea9dc352ce9d2920", + "name": "gold:Gp0119866.bins.29.fa", + "description": "metabat2 binned contig file for gold:Gp0119866", + "file_size_bytes": 407773, + "url": "https://data.microbiomedata.org/data/1777_95835/img_MAGs/metabat-bins/bins.29.fa", + "type": "nmdc:DataObject", + "data_object_type": "Metagenome Bins" + }, + { + "_id": { + "$oid": "649b003f1ae706d7b5b163fc" + }, + "id": "nmdc:fd24597af65d8d22416118df578393fa", + "name": "gold:Gp0119866.bins.42.fa", + "description": "metabat2 binned contig file for gold:Gp0119866", + "file_size_bytes": 440118, + "url": "https://data.microbiomedata.org/data/1777_95835/img_MAGs/metabat-bins/bins.42.fa", + "type": "nmdc:DataObject", + "data_object_type": "Metagenome Bins" + }, + { + "_id": { + "$oid": "649b003f1ae706d7b5b163fd" + }, + "id": "nmdc:0aff5f05189e1d2d16a89dda7cbce77d", + "name": "gold:Gp0119866.bins.80.fa", + "description": "metabat2 binned contig file for gold:Gp0119866", + "file_size_bytes": 253101, + "url": "https://data.microbiomedata.org/data/1777_95835/img_MAGs/metabat-bins/bins.80.fa", + "type": "nmdc:DataObject", + "data_object_type": "Metagenome Bins" + }, + { + "_id": { + "$oid": "649b003f1ae706d7b5b163fe" + }, + "id": "nmdc:af80c8f8de3ffdf57fb20190350946d3", + "name": "gold:Gp0119866.bins.22.fa", + "description": "metabat2 binned contig file for gold:Gp0119866", + "file_size_bytes": 395258, + "url": "https://data.microbiomedata.org/data/1777_95835/img_MAGs/metabat-bins/bins.22.fa", + "type": "nmdc:DataObject", + "data_object_type": "Metagenome Bins" + }, + { + "_id": { + "$oid": "649b003f1ae706d7b5b163ff" + }, + "id": "nmdc:0869e896c44152b4a8c1e341b88feb88", + "name": "gold:Gp0119866.bins.77.fa", + "description": "metabat2 binned contig file for gold:Gp0119866", + "file_size_bytes": 204671, + "url": "https://data.microbiomedata.org/data/1777_95835/img_MAGs/metabat-bins/bins.77.fa", + "type": "nmdc:DataObject", + "data_object_type": "Metagenome Bins" + }, + { + "_id": { + "$oid": "649b003f1ae706d7b5b16400" + }, + "id": "nmdc:09fd74622b54636c65b13ad165ac05e4", + "name": "gold:Gp0119866.bins.38.fa", + "description": "metabat2 binned contig file for gold:Gp0119866", + "file_size_bytes": 721308, + "url": "https://data.microbiomedata.org/data/1777_95835/img_MAGs/metabat-bins/bins.38.fa", + "type": "nmdc:DataObject", + "data_object_type": "Metagenome Bins" + }, + { + "_id": { + "$oid": "649b003f1ae706d7b5b16401" + }, + "id": "nmdc:1be39db44868a56427a657c436776918", + "name": "gold:Gp0119866.bins.19.fa", + "description": "metabat2 binned contig file for gold:Gp0119866", + "file_size_bytes": 284439, + "url": "https://data.microbiomedata.org/data/1777_95835/img_MAGs/metabat-bins/bins.19.fa", + "type": "nmdc:DataObject", + "data_object_type": "Metagenome Bins" + }, + { + "_id": { + "$oid": "649b003f1ae706d7b5b16402" + }, + "id": "nmdc:547b5210391325b1e0fd02fe44c8d9f9", + "name": "gold:Gp0119866.bins.63.fa", + "description": "metabat2 binned contig file for gold:Gp0119866", + "file_size_bytes": 296272, + "url": "https://data.microbiomedata.org/data/1777_95835/img_MAGs/metabat-bins/bins.63.fa", + "type": "nmdc:DataObject", + "data_object_type": "Metagenome Bins" + }, + { + "_id": { + "$oid": "649b003f1ae706d7b5b16403" + }, + "id": "nmdc:3ca9f5cd32ebab24e43d4c2e2336a142", + "name": "gold:Gp0119866.bins.72.fa", + "description": "metabat2 binned contig file for gold:Gp0119866", + "file_size_bytes": 283754, + "url": "https://data.microbiomedata.org/data/1777_95835/img_MAGs/metabat-bins/bins.72.fa", + "type": "nmdc:DataObject", + "data_object_type": "Metagenome Bins" + }, + { + "_id": { + "$oid": "649b003f1ae706d7b5b16404" + }, + "id": "nmdc:aa6aa99facbeaf90df8f6f8cb1c01664", + "name": "gold:Gp0119866.bins.45.fa", + "description": "metabat2 binned contig file for gold:Gp0119866", + "file_size_bytes": 498881, + "url": "https://data.microbiomedata.org/data/1777_95835/img_MAGs/metabat-bins/bins.45.fa", + "type": "nmdc:DataObject", + "data_object_type": "Metagenome Bins" + }, + { + "_id": { + "$oid": "649b003f1ae706d7b5b16405" + }, + "id": "nmdc:5c5c5ea662a73b4ea890d9b70b78a1a3", + "name": "gold:Gp0119866.bins.65.fa", + "description": "metabat2 binned contig file for gold:Gp0119866", + "file_size_bytes": 283790, + "url": "https://data.microbiomedata.org/data/1777_95835/img_MAGs/metabat-bins/bins.65.fa", + "type": "nmdc:DataObject", + "data_object_type": "Metagenome Bins" + }, + { + "_id": { + "$oid": "649b003f1ae706d7b5b16406" + }, + "id": "nmdc:a74f0d0272d6b5d94629805409fc69ad", + "name": "gold:Gp0119866.bins.21.fa", + "description": "metabat2 binned contig file for gold:Gp0119866", + "file_size_bytes": 247658, + "url": "https://data.microbiomedata.org/data/1777_95835/img_MAGs/metabat-bins/bins.21.fa", + "type": "nmdc:DataObject", + "data_object_type": "Metagenome Bins" + }, + { + "_id": { + "$oid": "649b003f1ae706d7b5b16407" + }, + "id": "nmdc:0ecce0f05a013e78f407ac8d02f8ef58", + "name": "gold:Gp0119866.bins.49.fa", + "description": "metabat2 binned contig file for gold:Gp0119866", + "file_size_bytes": 227820, + "url": "https://data.microbiomedata.org/data/1777_95835/img_MAGs/metabat-bins/bins.49.fa", + "type": "nmdc:DataObject", + "data_object_type": "Metagenome Bins" + }, + { + "_id": { + "$oid": "649b003f1ae706d7b5b16408" + }, + "id": "nmdc:07ce2a17ef91f25333c35f6c77fd73f3", + "name": "gold:Gp0119866.bins.50.fa", + "description": "metabat2 binned contig file for gold:Gp0119866", + "file_size_bytes": 269343, + "url": "https://data.microbiomedata.org/data/1777_95835/img_MAGs/metabat-bins/bins.50.fa", + "type": "nmdc:DataObject", + "data_object_type": "Metagenome Bins" + }, + { + "_id": { + "$oid": "649b003f1ae706d7b5b1640a" + }, + "id": "nmdc:ca62dbd93cad67cb5813d6a87867a814", + "name": "gold:Gp0119866.bins.59.fa", + "description": "metabat2 binned contig file for gold:Gp0119866", + "file_size_bytes": 207950, + "url": "https://data.microbiomedata.org/data/1777_95835/img_MAGs/metabat-bins/bins.59.fa", + "type": "nmdc:DataObject", + "data_object_type": "Metagenome Bins" + }, + { + "_id": { + "$oid": "649b003f1ae706d7b5b1640b" + }, + "id": "nmdc:722c536e239b9457eb5cda5421f92cde", + "name": "gold:Gp0119866.bins.39.fa", + "description": "metabat2 binned contig file for gold:Gp0119866", + "file_size_bytes": 315226, + "url": "https://data.microbiomedata.org/data/1777_95835/img_MAGs/metabat-bins/bins.39.fa", + "type": "nmdc:DataObject", + "data_object_type": "Metagenome Bins" + }, + { + "_id": { + "$oid": "649b003f1ae706d7b5b1640c" + }, + "id": "nmdc:0b987b6c8f643e89bb32d504087794fb", + "name": "gold:Gp0119866.bins.71.fa", + "description": "metabat2 binned contig file for gold:Gp0119866", + "file_size_bytes": 207129, + "url": "https://data.microbiomedata.org/data/1777_95835/img_MAGs/metabat-bins/bins.71.fa", + "type": "nmdc:DataObject", + "data_object_type": "Metagenome Bins" + }, + { + "_id": { + "$oid": "649b003f1ae706d7b5b1640d" + }, + "id": "nmdc:9aad7bfab7aff14a8dff95e0b8f7e9cb", + "name": "gold:Gp0119866.bins.56.fa", + "description": "metabat2 binned contig file for gold:Gp0119866", + "file_size_bytes": 842774, + "url": "https://data.microbiomedata.org/data/1777_95835/img_MAGs/metabat-bins/bins.56.fa", + "type": "nmdc:DataObject", + "data_object_type": "Metagenome Bins" + }, + { + "_id": { + "$oid": "649b003f1ae706d7b5b1640e" + }, + "id": "nmdc:7f1f0b940bd015cacfc8b5cec86ff130", + "name": "gold:Gp0119866.bins.76.fa", + "description": "metabat2 binned contig file for gold:Gp0119866", + "file_size_bytes": 364360, + "url": "https://data.microbiomedata.org/data/1777_95835/img_MAGs/metabat-bins/bins.76.fa", + "type": "nmdc:DataObject", + "data_object_type": "Metagenome Bins" + }, + { + "_id": { + "$oid": "649b003f1ae706d7b5b16410" + }, + "id": "nmdc:22a659e06305f6fec41820141b32d263", + "name": "gold:Gp0119866.bins.2.fa", + "description": "metabat2 binned contig file for gold:Gp0119866", + "file_size_bytes": 1567291, + "url": "https://data.microbiomedata.org/data/1777_95835/img_MAGs/metabat-bins/bins.2.fa", + "type": "nmdc:DataObject", + "data_object_type": "Metagenome Bins" + }, + { + "_id": { + "$oid": "649b003f1ae706d7b5b16411" + }, + "id": "nmdc:c474dc82e02f57e5315c19eca8cb3108", + "name": "gold:Gp0119866.bins.3.fa", + "description": "metabat2 binned contig file for gold:Gp0119866", + "file_size_bytes": 271499, + "url": "https://data.microbiomedata.org/data/1777_95835/img_MAGs/metabat-bins/bins.3.fa", + "type": "nmdc:DataObject", + "data_object_type": "Metagenome Bins" + }, + { + "_id": { + "$oid": "649b003f1ae706d7b5b16412" + }, + "id": "nmdc:70d38a907225c3437796d974d72d5e0e", + "name": "gold:Gp0119866.bins.34.fa", + "description": "metabat2 binned contig file for gold:Gp0119866", + "file_size_bytes": 471909, + "url": "https://data.microbiomedata.org/data/1777_95835/img_MAGs/metabat-bins/bins.34.fa", + "type": "nmdc:DataObject", + "data_object_type": "Metagenome Bins" + }, + { + "_id": { + "$oid": "649b003f1ae706d7b5b16413" + }, + "id": "nmdc:b4de7e2571a167d47b0f76c05b945530", + "name": "gold:Gp0119866.bins.4.fa", + "description": "metabat2 binned contig file for gold:Gp0119866", + "file_size_bytes": 616411, + "url": "https://data.microbiomedata.org/data/1777_95835/img_MAGs/metabat-bins/bins.4.fa", + "type": "nmdc:DataObject", + "data_object_type": "Metagenome Bins" + }, + { + "_id": { + "$oid": "649b003f1ae706d7b5b16414" + }, + "id": "nmdc:713b5f68e5bd6e636485f9adc09a6217", + "name": "gold:Gp0119866.bins.9.fa", + "description": "metabat2 binned contig file for gold:Gp0119866", + "file_size_bytes": 766431, + "url": "https://data.microbiomedata.org/data/1777_95835/img_MAGs/metabat-bins/bins.9.fa", + "type": "nmdc:DataObject", + "data_object_type": "Metagenome Bins" + }, + { + "_id": { + "$oid": "649b003f1ae706d7b5b16415" + }, + "id": "nmdc:5d36e8b1cf3e19cce9df7dc53d67c5b6", + "name": "gold:Gp0119866.bins.54.fa", + "description": "metabat2 binned contig file for gold:Gp0119866", + "file_size_bytes": 538984, + "url": "https://data.microbiomedata.org/data/1777_95835/img_MAGs/metabat-bins/bins.54.fa", + "type": "nmdc:DataObject", + "data_object_type": "Metagenome Bins" + }, + { + "_id": { + "$oid": "649b003f1ae706d7b5b16416" + }, + "id": "nmdc:7df0ce466e981f914e6c60db7479e0c3", + "name": "gold:Gp0119866.bins.75.fa", + "description": "metabat2 binned contig file for gold:Gp0119866", + "file_size_bytes": 289076, + "url": "https://data.microbiomedata.org/data/1777_95835/img_MAGs/metabat-bins/bins.75.fa", + "type": "nmdc:DataObject", + "data_object_type": "Metagenome Bins" + }, + { + "_id": { + "$oid": "649b003f1ae706d7b5b16417" + }, + "id": "nmdc:3e2499985648986bab6d0a10b66045b4", + "name": "gold:Gp0119866.bins.57.fa", + "description": "metabat2 binned contig file for gold:Gp0119866", + "file_size_bytes": 279158, + "url": "https://data.microbiomedata.org/data/1777_95835/img_MAGs/metabat-bins/bins.57.fa", + "type": "nmdc:DataObject", + "data_object_type": "Metagenome Bins" + }, + { + "_id": { + "$oid": "649b003f1ae706d7b5b16418" + }, + "id": "nmdc:38651ad81f424174b5e9079840ebd7a1", + "name": "gold:Gp0119866.bins.37.fa", + "description": "metabat2 binned contig file for gold:Gp0119866", + "file_size_bytes": 211330, + "url": "https://data.microbiomedata.org/data/1777_95835/img_MAGs/metabat-bins/bins.37.fa", + "type": "nmdc:DataObject", + "data_object_type": "Metagenome Bins" + }, + { + "_id": { + "$oid": "649b003f1ae706d7b5b16419" + }, + "id": "nmdc:232e3bfd4db0bddbbe2129eb42aecb29", + "name": "gold:Gp0119866.bins.13.fa", + "description": "metabat2 binned contig file for gold:Gp0119866", + "file_size_bytes": 489735, + "url": "https://data.microbiomedata.org/data/1777_95835/img_MAGs/metabat-bins/bins.13.fa", + "type": "nmdc:DataObject", + "data_object_type": "Metagenome Bins" + }, + { + "_id": { + "$oid": "649b003f1ae706d7b5b1641a" + }, + "id": "nmdc:bc8fbd09696646494c5a7b2f717d9e02", + "name": "gold:Gp0119866.bins.1.fa", + "description": "metabat2 binned contig file for gold:Gp0119866", + "file_size_bytes": 232412, + "url": "https://data.microbiomedata.org/data/1777_95835/img_MAGs/metabat-bins/bins.1.fa", + "type": "nmdc:DataObject", + "data_object_type": "Metagenome Bins" + }, + { + "_id": { + "$oid": "649b003f1ae706d7b5b1641b" + }, + "id": "nmdc:1b30fcc3d0b9b586c6daaa9ef4596cef", + "name": "gold:Gp0119866.bins.24.fa", + "description": "metabat2 binned contig file for gold:Gp0119866", + "file_size_bytes": 505822, + "url": "https://data.microbiomedata.org/data/1777_95835/img_MAGs/metabat-bins/bins.24.fa", + "type": "nmdc:DataObject", + "data_object_type": "Metagenome Bins" + }, + { + "_id": { + "$oid": "649b003f1ae706d7b5b1641c" + }, + "id": "nmdc:7e8d9f6e1dad38d5a87949c60abbdec1", + "name": "gold:Gp0119866.bins.70.fa", + "description": "metabat2 binned contig file for gold:Gp0119866", + "file_size_bytes": 237909, + "url": "https://data.microbiomedata.org/data/1777_95835/img_MAGs/metabat-bins/bins.70.fa", + "type": "nmdc:DataObject", + "data_object_type": "Metagenome Bins" + }, + { + "_id": { + "$oid": "649b003f1ae706d7b5b1641d" + }, + "id": "nmdc:077b341365ba08e76f19d209704a0dd3", + "name": "gold:Gp0119866.bins.69.fa", + "description": "metabat2 binned contig file for gold:Gp0119866", + "file_size_bytes": 495620, + "url": "https://data.microbiomedata.org/data/1777_95835/img_MAGs/metabat-bins/bins.69.fa", + "type": "nmdc:DataObject", + "data_object_type": "Metagenome Bins" + }, + { + "_id": { + "$oid": "649b003f1ae706d7b5b1641e" + }, + "id": "nmdc:ecc196fa284eb01e82f10968b5ecc487", + "name": "gold:Gp0119866.bins.7.fa", + "description": "metabat2 binned contig file for gold:Gp0119866", + "file_size_bytes": 260913, + "url": "https://data.microbiomedata.org/data/1777_95835/img_MAGs/metabat-bins/bins.7.fa", + "type": "nmdc:DataObject", + "data_object_type": "Metagenome Bins" + }, + { + "_id": { + "$oid": "649b003f1ae706d7b5b1641f" + }, + "id": "nmdc:06fb265abb69eb83b9eaa16449294b42", + "name": "gold:Gp0119866.bins.73.fa", + "description": "metabat2 binned contig file for gold:Gp0119866", + "file_size_bytes": 246590, + "url": "https://data.microbiomedata.org/data/1777_95835/img_MAGs/metabat-bins/bins.73.fa", + "type": "nmdc:DataObject", + "data_object_type": "Metagenome Bins" + }, + { + "_id": { + "$oid": "649b003f1ae706d7b5b16420" + }, + "id": "nmdc:c9c9fbea9fe2067abdb6a79a91b4f5e7", + "name": "gold:Gp0119866.bins.74.fa", + "description": "metabat2 binned contig file for gold:Gp0119866", + "file_size_bytes": 294301, + "url": "https://data.microbiomedata.org/data/1777_95835/img_MAGs/metabat-bins/bins.74.fa", + "type": "nmdc:DataObject", + "data_object_type": "Metagenome Bins" + }, + { + "_id": { + "$oid": "649b003f1ae706d7b5b16421" + }, + "id": "nmdc:7f1e25a623734aa2a128b638722f480e", + "name": "gold:Gp0119866.bins.61.fa", + "description": "metabat2 binned contig file for gold:Gp0119866", + "file_size_bytes": 622169, + "url": "https://data.microbiomedata.org/data/1777_95835/img_MAGs/metabat-bins/bins.61.fa", + "type": "nmdc:DataObject", + "data_object_type": "Metagenome Bins" + }, + { + "_id": { + "$oid": "649b003f1ae706d7b5b16422" + }, + "id": "nmdc:5b912e5f9abc12b23c80f7f538fb82a1", + "name": "gold:Gp0119866.bins.20.fa", + "description": "metabat2 binned contig file for gold:Gp0119866", + "file_size_bytes": 1555827, + "url": "https://data.microbiomedata.org/data/1777_95835/img_MAGs/metabat-bins/bins.20.fa", + "type": "nmdc:DataObject", + "data_object_type": "Metagenome Bins" + }, + { + "_id": { + "$oid": "649b003f1ae706d7b5b16423" + }, + "id": "nmdc:3db450b5ee576a3f7d985a7fcc59c28b", + "name": "gold:Gp0119866.bins.33.fa", + "description": "metabat2 binned contig file for gold:Gp0119866", + "file_size_bytes": 264520, + "url": "https://data.microbiomedata.org/data/1777_95835/img_MAGs/metabat-bins/bins.33.fa", + "type": "nmdc:DataObject", + "data_object_type": "Metagenome Bins" + }, + { + "_id": { + "$oid": "649b003f1ae706d7b5b16424" + }, + "id": "nmdc:ef96e454476dcacf50cd6a42b8bec307", + "name": "gold:Gp0119866.bins.51.fa", + "description": "metabat2 binned contig file for gold:Gp0119866", + "file_size_bytes": 816501, + "url": "https://data.microbiomedata.org/data/1777_95835/img_MAGs/metabat-bins/bins.51.fa", + "type": "nmdc:DataObject", + "data_object_type": "Metagenome Bins" + }, + { + "_id": { + "$oid": "649b003f1ae706d7b5b16425" + }, + "id": "nmdc:5917b327d45ec30a67ec311179c2b3be", + "name": "gold:Gp0119866.bins.52.fa", + "description": "metabat2 binned contig file for gold:Gp0119866", + "file_size_bytes": 2337883, + "url": "https://data.microbiomedata.org/data/1777_95835/img_MAGs/metabat-bins/bins.52.fa", + "type": "nmdc:DataObject", + "data_object_type": "Metagenome Bins" + }, + { + "_id": { + "$oid": "649b003f1ae706d7b5b16426" + }, + "id": "nmdc:bf384b3b8c994a5e927e0d6ba9cd4b75", + "name": "gold:Gp0119866.bins.78.fa", + "description": "metabat2 binned contig file for gold:Gp0119866", + "file_size_bytes": 232077, + "url": "https://data.microbiomedata.org/data/1777_95835/img_MAGs/metabat-bins/bins.78.fa", + "type": "nmdc:DataObject", + "data_object_type": "Metagenome Bins" + }, + { + "_id": { + "$oid": "649b003f1ae706d7b5b16427" + }, + "id": "nmdc:1605b914b1f9ad1108f2ed4562b84f70", + "name": "gold:Gp0119866.bins.14.fa", + "description": "metabat2 binned contig file for gold:Gp0119866", + "file_size_bytes": 259088, + "url": "https://data.microbiomedata.org/data/1777_95835/img_MAGs/metabat-bins/bins.14.fa", + "type": "nmdc:DataObject", + "data_object_type": "Metagenome Bins" + }, + { + "_id": { + "$oid": "649b003f1ae706d7b5b16428" + }, + "id": "nmdc:149f978dd8d1958189954b1538a43a6e", + "name": "gold:Gp0119866.bins.27.fa", + "description": "metabat2 binned contig file for gold:Gp0119866", + "file_size_bytes": 341702, + "url": "https://data.microbiomedata.org/data/1777_95835/img_MAGs/metabat-bins/bins.27.fa", + "type": "nmdc:DataObject", + "data_object_type": "Metagenome Bins" + }, + { + "_id": { + "$oid": "649b003f1ae706d7b5b16429" + }, + "id": "nmdc:25199144d0dec63c93e43800e1acd630", + "name": "gold:Gp0119866.bins.60.fa", + "description": "metabat2 binned contig file for gold:Gp0119866", + "file_size_bytes": 1333762, + "url": "https://data.microbiomedata.org/data/1777_95835/img_MAGs/metabat-bins/bins.60.fa", + "type": "nmdc:DataObject", + "data_object_type": "Metagenome Bins" + }, + { + "_id": { + "$oid": "649b003f1ae706d7b5b1642a" + }, + "id": "nmdc:50c4d24eddf734d9de929d211dfcf10b", + "name": "gold:Gp0119866.bins.16.fa", + "description": "metabat2 binned contig file for gold:Gp0119866", + "file_size_bytes": 811583, + "url": "https://data.microbiomedata.org/data/1777_95835/img_MAGs/metabat-bins/bins.16.fa", + "type": "nmdc:DataObject", + "data_object_type": "Metagenome Bins" + }, + { + "_id": { + "$oid": "649b003f1ae706d7b5b1642b" + }, + "id": "nmdc:c12bfb857ae917bc4b9d227f030e779f", + "name": "gold:Gp0119866.bins.10.fa", + "description": "metabat2 binned contig file for gold:Gp0119866", + "file_size_bytes": 288011, + "url": "https://data.microbiomedata.org/data/1777_95835/img_MAGs/metabat-bins/bins.10.fa", + "type": "nmdc:DataObject", + "data_object_type": "Metagenome Bins" + }, + { + "_id": { + "$oid": "649b003f1ae706d7b5b1642c" + }, + "id": "nmdc:cb856f04581a4b4a3a4805723f6b8db8", + "name": "gold:Gp0119866.bins.17.fa", + "description": "metabat2 binned contig file for gold:Gp0119866", + "file_size_bytes": 275712, + "url": "https://data.microbiomedata.org/data/1777_95835/img_MAGs/metabat-bins/bins.17.fa", + "type": "nmdc:DataObject", + "data_object_type": "Metagenome Bins" + }, + { + "_id": { + "$oid": "649b003f1ae706d7b5b1642d" + }, + "id": "nmdc:22898562c9d45115b8188d906c465116", + "name": "gold:Gp0119866.bins.36.fa", + "description": "metabat2 binned contig file for gold:Gp0119866", + "file_size_bytes": 305159, + "url": "https://data.microbiomedata.org/data/1777_95835/img_MAGs/metabat-bins/bins.36.fa", + "type": "nmdc:DataObject", + "data_object_type": "Metagenome Bins" + }, + { + "_id": { + "$oid": "649b003f1ae706d7b5b1642e" + }, + "id": "nmdc:620ed07dcdc0b051c38e9b09e1c8dd5a", + "name": "gold:Gp0119866.bins.15.fa", + "description": "metabat2 binned contig file for gold:Gp0119866", + "file_size_bytes": 220576, + "url": "https://data.microbiomedata.org/data/1777_95835/img_MAGs/metabat-bins/bins.15.fa", + "type": "nmdc:DataObject", + "data_object_type": "Metagenome Bins" + }, + { + "_id": { + "$oid": "649b003f1ae706d7b5b1642f" + }, + "id": "nmdc:fdc2e6518269f654ae4d3185490984b3", + "name": "gold:Gp0119866.bins.12.fa", + "description": "metabat2 binned contig file for gold:Gp0119866", + "file_size_bytes": 447185, + "url": "https://data.microbiomedata.org/data/1777_95835/img_MAGs/metabat-bins/bins.12.fa", + "type": "nmdc:DataObject", + "data_object_type": "Metagenome Bins" + }, + { + "_id": { + "$oid": "649b003f1ae706d7b5b16430" + }, + "id": "nmdc:94678eea8d9883239cd1a8a2d44bde47", + "name": "gold:Gp0119866.bins.48.fa", + "description": "metabat2 binned contig file for gold:Gp0119866", + "file_size_bytes": 309387, + "url": "https://data.microbiomedata.org/data/1777_95835/img_MAGs/metabat-bins/bins.48.fa", + "type": "nmdc:DataObject", + "data_object_type": "Metagenome Bins" + }, + { + "_id": { + "$oid": "649b003f1ae706d7b5b16431" + }, + "id": "nmdc:8d165a11650e2f478793244e349b35db", + "name": "gold:Gp0119866.bins.6.fa", + "description": "metabat2 binned contig file for gold:Gp0119866", + "file_size_bytes": 609474, + "url": "https://data.microbiomedata.org/data/1777_95835/img_MAGs/metabat-bins/bins.6.fa", + "type": "nmdc:DataObject", + "data_object_type": "Metagenome Bins" + }, + { + "_id": { + "$oid": "649b003f1ae706d7b5b16432" + }, + "id": "nmdc:1a75ced7fc40d7b547fd59cb9cdd2b73", + "name": "gold:Gp0119866.bins.43.fa", + "description": "metabat2 binned contig file for gold:Gp0119866", + "file_size_bytes": 278479, + "url": "https://data.microbiomedata.org/data/1777_95835/img_MAGs/metabat-bins/bins.43.fa", + "type": "nmdc:DataObject", + "data_object_type": "Metagenome Bins" + }, + { + "_id": { + "$oid": "649b003f1ae706d7b5b16433" + }, + "id": "nmdc:5628d94e18d991f59eb7208e1264fd7e", + "name": "gold:Gp0119866.bins.31.fa", + "description": "metabat2 binned contig file for gold:Gp0119866", + "file_size_bytes": 208882, + "url": "https://data.microbiomedata.org/data/1777_95835/img_MAGs/metabat-bins/bins.31.fa", + "type": "nmdc:DataObject", + "data_object_type": "Metagenome Bins" + }, + { + "_id": { + "$oid": "649b003f1ae706d7b5b16435" + }, + "id": "nmdc:5478bfead6edb734e90b78617f248bfb", + "name": "gold:Gp0119866.bins.40.fa", + "description": "metabat2 binned contig file for gold:Gp0119866", + "file_size_bytes": 429941, + "url": "https://data.microbiomedata.org/data/1777_95835/img_MAGs/metabat-bins/bins.40.fa", + "type": "nmdc:DataObject", + "data_object_type": "Metagenome Bins" + }, + { + "_id": { + "$oid": "649b003f1ae706d7b5b16ca0" + }, + "description": "EC TSV File for gold:Gp0119866", + "url": "https://data.microbiomedata.org/1777_95835/img_annotation/Ga0482149_ec.tsv", + "md5_checksum": "9101306c31f4b8c544f1671e66607a87", + "file_size_bytes": 3385, + "id": "nmdc:9101306c31f4b8c544f1671e66607a87", + "name": "gold:Gp0119866_EC TSV File", + "data_object_type": "Annotation Enzyme Commission", + "type": "nmdc:DataObject" + }, + { + "_id": { + "$oid": "649b003f1ae706d7b5b16ca1" + }, + "description": "Functional annotation GFF file for gold:Gp0119866", + "url": "https://data.microbiomedata.org/1777_95835/img_annotation/Ga0482149_functional_annotation.gff", + "md5_checksum": "e4f33c52b87b07d9f50cda6bf067d710", + "file_size_bytes": 3385, + "id": "nmdc:e4f33c52b87b07d9f50cda6bf067d710", + "name": "gold:Gp0119866_Functional annotation GFF file", + "data_object_type": "Functional Annotation GFF", + "type": "nmdc:DataObject" + }, + { + "_id": { + "$oid": "649b003f1ae706d7b5b16ca2" + }, + "description": "Protein FAA for gold:Gp0119866", + "url": "https://data.microbiomedata.org/1777_95835/img_annotation/Ga0482149_proteins.faa", + "md5_checksum": "2de8312087081f9aeac56c56195a5fc2", + "file_size_bytes": 3385, + "id": "nmdc:2de8312087081f9aeac56c56195a5fc2", + "name": "gold:Gp0119866_Protein FAA", + "data_object_type": "Annotation Amino Acid FASTA", + "type": "nmdc:DataObject" + }, + { + "_id": { + "$oid": "649b003f1ae706d7b5b16ca3" + }, + "description": "Structural annotation GFF file for gold:Gp0119866", + "url": "https://data.microbiomedata.org/1777_95835/img_annotation/Ga0482149_structural_annotation.gff", + "md5_checksum": "5d1bb402cf5170de632179f2515a224f", + "file_size_bytes": 3385, + "id": "nmdc:5d1bb402cf5170de632179f2515a224f", + "name": "gold:Gp0119866_Structural annotation GFF file", + "data_object_type": "Structural Annotation GFF", + "type": "nmdc:DataObject" + }, + { + "_id": { + "$oid": "649b003f1ae706d7b5b16ca6" + }, + "description": "KO TSV File for gold:Gp0119866", + "url": "https://data.microbiomedata.org/1777_95835/img_annotation/Ga0482149_ko.tsv", + "md5_checksum": "4ef712b7b38e384c25a31fc24c39aab9", + "file_size_bytes": 3385, + "id": "nmdc:4ef712b7b38e384c25a31fc24c39aab9", + "name": "gold:Gp0119866_KO TSV File", + "data_object_type": "Annotation KEGG Orthology", + "type": "nmdc:DataObject" + } + ], + "mags_activity_set": [ + { + "_id": { + "$oid": "649b0052ec087f6bbab3470f" + }, + "has_input": [ + "nmdc:9c04a2492162eb6b6aef8c389f3e182f", + "nmdc:07a6287efeb2fe2278ce06243619cd5f", + "nmdc:7595ad06b43e74c1e4a1e3f707df911b" + ], + "too_short_contig_num": 427528, + "part_of": [ + "nmdc:mga09f1f50" + ], + "binned_contig_num": 9558, + "git_url": "https://github.com/microbiomedata/metaG/releases/tag/0.1", + "has_output": [ + "nmdc:d41d8cd98f00b204e9800998ecf8427e", + "nmdc:4e55be27fe6db3985ce888576f36ad4d", + "nmdc:4015203d1ba510e71cf1ef81d598a907", + "nmdc:d219f9f9abbab37e40d6151ee9280cda", + "nmdc:421a2892a62d614c29593153dd4862aa", + "nmdc:4473d2437f80006f43d623ccc918d9f0" + ], + "was_informed_by": "gold:Gp0119866", + "input_contig_num": 508287, + "id": "nmdc:7c72f6c62f76f92f4d0b22fdabd10281", + "execution_resource": "NERSC-Cori", + "name": "MAGs Analysis Activity for nmdc:mga09f1f50", + "mags_list": [ + { + "number_of_contig": 328, + "completeness": 25.86, + "bin_name": "bins.1", + "gene_count": 2109, + "bin_quality": "LQ", + "gtdbtk_species": "", + "gtdbtk_order": "", + "num_16s": 0, + "gtdbtk_family": "", + "gtdbtk_domain": "", + "contamination": 6.03, + "gtdbtk_class": "", + "gtdbtk_phylum": "", + "num_5s": 0, + "num_23s": 0, + "gtdbtk_genus": "", + "num_t_rna": 9 + }, + { + "number_of_contig": 96, + "completeness": 55.17, + "bin_name": "bins.10", + "gene_count": 877, + "bin_quality": "MQ", + "gtdbtk_species": "", + "gtdbtk_order": "Nanopelagicales", + "num_16s": 0, + "gtdbtk_family": "Nanopelagicaceae", + "gtdbtk_domain": "Bacteria", + "contamination": 1.72, + "gtdbtk_class": "Actinobacteria", + "gtdbtk_phylum": "Actinobacteriota", + "num_5s": 0, + "num_23s": 0, + "gtdbtk_genus": "Planktophila", + "num_t_rna": 30 + }, + { + "number_of_contig": 102, + "completeness": 27.92, + "bin_name": "bins.11", + "gene_count": 778, + "bin_quality": "LQ", + "gtdbtk_species": "", + "gtdbtk_order": "", + "num_16s": 0, + "gtdbtk_family": "", + "gtdbtk_domain": "", + "contamination": 2.59, + "gtdbtk_class": "", + "gtdbtk_phylum": "", + "num_5s": 1, + "num_23s": 1, + "gtdbtk_genus": "", + "num_t_rna": 13 + }, + { + "number_of_contig": 138, + "completeness": 43.97, + "bin_name": "bins.12", + "gene_count": 865, + "bin_quality": "LQ", + "gtdbtk_species": "", + "gtdbtk_order": "", + "num_16s": 0, + "gtdbtk_family": "", + "gtdbtk_domain": "", + "contamination": 0.86, + "gtdbtk_class": "", + "gtdbtk_phylum": "", + "num_5s": 0, + "num_23s": 0, + "gtdbtk_genus": "", + "num_t_rna": 13 + }, + { + "number_of_contig": 31, + "completeness": 14.66, + "bin_name": "bins.13", + "gene_count": 231, + "bin_quality": "LQ", + "gtdbtk_species": "", + "gtdbtk_order": "", + "num_16s": 0, + "gtdbtk_family": "", + "gtdbtk_domain": "", + "contamination": 1.72, + "gtdbtk_class": "", + "gtdbtk_phylum": "", + "num_5s": 0, + "num_23s": 0, + "gtdbtk_genus": "", + "num_t_rna": 9 + }, + { + "number_of_contig": 121, + "completeness": 46.98, + "bin_name": "bins.14", + "gene_count": 959, + "bin_quality": "LQ", + "gtdbtk_species": "", + "gtdbtk_order": "", + "num_16s": 0, + "gtdbtk_family": "", + "gtdbtk_domain": "", + "contamination": 3.45, + "gtdbtk_class": "", + "gtdbtk_phylum": "", + "num_5s": 0, + "num_23s": 0, + "gtdbtk_genus": "", + "num_t_rna": 21 + }, + { + "number_of_contig": 59, + "completeness": 25.55, + "bin_name": "bins.15", + "gene_count": 422, + "bin_quality": "LQ", + "gtdbtk_species": "", + "gtdbtk_order": "", + "num_16s": 0, + "gtdbtk_family": "", + "gtdbtk_domain": "", + "contamination": 7.02, + "gtdbtk_class": "", + "gtdbtk_phylum": "", + "num_5s": 0, + "num_23s": 0, + "gtdbtk_genus": "", + "num_t_rna": 11 + }, + { + "number_of_contig": 190, + "completeness": 61.0, + "bin_name": "bins.16", + "gene_count": 1389, + "bin_quality": "LQ", + "gtdbtk_species": "", + "gtdbtk_order": "", + "num_16s": 1, + "gtdbtk_family": "", + "gtdbtk_domain": "", + "contamination": 21.08, + "gtdbtk_class": "", + "gtdbtk_phylum": "", + "num_5s": 0, + "num_23s": 0, + "gtdbtk_genus": "", + "num_t_rna": 36 + }, + { + "number_of_contig": 387, + "completeness": 68.24, + "bin_name": "bins.17", + "gene_count": 2342, + "bin_quality": "LQ", + "gtdbtk_species": "", + "gtdbtk_order": "", + "num_16s": 1, + "gtdbtk_family": "", + "gtdbtk_domain": "", + "contamination": 54.94, + "gtdbtk_class": "", + "gtdbtk_phylum": "", + "num_5s": 0, + "num_23s": 0, + "gtdbtk_genus": "", + "num_t_rna": 53 + }, + { + "number_of_contig": 24, + "completeness": 0.0, + "bin_name": "bins.18", + "gene_count": 905, + "bin_quality": "LQ", + "gtdbtk_species": "", + "gtdbtk_order": "", + "num_16s": 0, + "gtdbtk_family": "", + "gtdbtk_domain": "", + "contamination": 0.0, + "gtdbtk_class": "", + "gtdbtk_phylum": "", + "num_5s": 0, + "num_23s": 0, + "gtdbtk_genus": "", + "num_t_rna": 0 + }, + { + "number_of_contig": 244, + "completeness": 33.33, + "bin_name": "bins.19", + "gene_count": 1426, + "bin_quality": "LQ", + "gtdbtk_species": "", + "gtdbtk_order": "", + "num_16s": 0, + "gtdbtk_family": "", + "gtdbtk_domain": "", + "contamination": 20.83, + "gtdbtk_class": "", + "gtdbtk_phylum": "", + "num_5s": 0, + "num_23s": 0, + "gtdbtk_genus": "", + "num_t_rna": 16 + }, + { + "number_of_contig": 42, + "completeness": 0.0, + "bin_name": "bins.2", + "gene_count": 279, + "bin_quality": "LQ", + "gtdbtk_species": "", + "gtdbtk_order": "", + "num_16s": 0, + "gtdbtk_family": "", + "gtdbtk_domain": "", + "contamination": 0.0, + "gtdbtk_class": "", + "gtdbtk_phylum": "", + "num_5s": 0, + "num_23s": 0, + "gtdbtk_genus": "", + "num_t_rna": 7 + }, + { + "number_of_contig": 254, + "completeness": 46.91, + "bin_name": "bins.20", + "gene_count": 1407, + "bin_quality": "LQ", + "gtdbtk_species": "", + "gtdbtk_order": "", + "num_16s": 0, + "gtdbtk_family": "", + "gtdbtk_domain": "", + "contamination": 0.0, + "gtdbtk_class": "", + "gtdbtk_phylum": "", + "num_5s": 0, + "num_23s": 0, + "gtdbtk_genus": "", + "num_t_rna": 19 + }, + { + "number_of_contig": 58, + "completeness": 20.99, + "bin_name": "bins.21", + "gene_count": 431, + "bin_quality": "LQ", + "gtdbtk_species": "", + "gtdbtk_order": "", + "num_16s": 0, + "gtdbtk_family": "", + "gtdbtk_domain": "", + "contamination": 0.0, + "gtdbtk_class": "", + "gtdbtk_phylum": "", + "num_5s": 0, + "num_23s": 0, + "gtdbtk_genus": "", + "num_t_rna": 17 + }, + { + "number_of_contig": 83, + "completeness": 52.59, + "bin_name": "bins.22", + "gene_count": 1096, + "bin_quality": "MQ", + "gtdbtk_species": "", + "gtdbtk_order": "Microtrichales", + "num_16s": 1, + "gtdbtk_family": "Ilumatobacteraceae", + "gtdbtk_domain": "Bacteria", + "contamination": 0.0, + "gtdbtk_class": "Acidimicrobiia", + "gtdbtk_phylum": "Actinobacteriota", + "num_5s": 1, + "num_23s": 1, + "gtdbtk_genus": "UBA3006", + "num_t_rna": 12 + }, + { + "number_of_contig": 122, + "completeness": 37.01, + "bin_name": "bins.23", + "gene_count": 981, + "bin_quality": "LQ", + "gtdbtk_species": "", + "gtdbtk_order": "", + "num_16s": 0, + "gtdbtk_family": "", + "gtdbtk_domain": "", + "contamination": 0.0, + "gtdbtk_class": "", + "gtdbtk_phylum": "", + "num_5s": 0, + "num_23s": 0, + "gtdbtk_genus": "", + "num_t_rna": 22 + }, + { + "number_of_contig": 165, + "completeness": 45.22, + "bin_name": "bins.24", + "gene_count": 1126, + "bin_quality": "LQ", + "gtdbtk_species": "", + "gtdbtk_order": "", + "num_16s": 0, + "gtdbtk_family": "", + "gtdbtk_domain": "", + "contamination": 4.39, + "gtdbtk_class": "", + "gtdbtk_phylum": "", + "num_5s": 0, + "num_23s": 0, + "gtdbtk_genus": "", + "num_t_rna": 36 + }, + { + "number_of_contig": 245, + "completeness": 91.45, + "bin_name": "bins.25", + "gene_count": 3134, + "bin_quality": "MQ", + "gtdbtk_species": "", + "gtdbtk_order": "S15B-MN24", + "num_16s": 0, + "gtdbtk_family": "UBA4093", + "gtdbtk_domain": "Bacteria", + "contamination": 1.71, + "gtdbtk_class": "Sericytochromatia", + "gtdbtk_phylum": "Cyanobacteria", + "num_5s": 1, + "num_23s": 0, + "gtdbtk_genus": "", + "num_t_rna": 43 + }, + { + "number_of_contig": 167, + "completeness": 72.04, + "bin_name": "bins.26", + "gene_count": 1656, + "bin_quality": "MQ", + "gtdbtk_species": "", + "gtdbtk_order": "Burkholderiales", + "num_16s": 0, + "gtdbtk_family": "Burkholderiaceae", + "gtdbtk_domain": "Bacteria", + "contamination": 0.1, + "gtdbtk_class": "Gammaproteobacteria", + "gtdbtk_phylum": "Proteobacteria", + "num_5s": 1, + "num_23s": 1, + "gtdbtk_genus": "UBA2463", + "num_t_rna": 31 + }, + { + "number_of_contig": 125, + "completeness": 33.97, + "bin_name": "bins.27", + "gene_count": 765, + "bin_quality": "LQ", + "gtdbtk_species": "", + "gtdbtk_order": "", + "num_16s": 0, + "gtdbtk_family": "", + "gtdbtk_domain": "", + "contamination": 1.72, + "gtdbtk_class": "", + "gtdbtk_phylum": "", + "num_5s": 0, + "num_23s": 0, + "gtdbtk_genus": "", + "num_t_rna": 8 + }, + { + "number_of_contig": 142, + "completeness": 18.97, + "bin_name": "bins.28", + "gene_count": 643, + "bin_quality": "LQ", + "gtdbtk_species": "", + "gtdbtk_order": "", + "num_16s": 0, + "gtdbtk_family": "", + "gtdbtk_domain": "", + "contamination": 0.0, + "gtdbtk_class": "", + "gtdbtk_phylum": "", + "num_5s": 0, + "num_23s": 0, + "gtdbtk_genus": "", + "num_t_rna": 4 + }, + { + "number_of_contig": 165, + "completeness": 93.98, + "bin_name": "bins.29", + "gene_count": 2421, + "bin_quality": "MQ", + "gtdbtk_species": "", + "gtdbtk_order": "Cytophagales", + "num_16s": 1, + "gtdbtk_family": "Cyclobacteriaceae", + "gtdbtk_domain": "Bacteria", + "contamination": 0.78, + "gtdbtk_class": "Bacteroidia", + "gtdbtk_phylum": "Bacteroidota", + "num_5s": 0, + "num_23s": 0, + "gtdbtk_genus": "Algoriphagus", + "num_t_rna": 35 + }, + { + "number_of_contig": 402, + "completeness": 57.47, + "bin_name": "bins.3", + "gene_count": 2112, + "bin_quality": "MQ", + "gtdbtk_species": "", + "gtdbtk_order": "Phycisphaerales", + "num_16s": 1, + "gtdbtk_family": "SM1A02", + "gtdbtk_domain": "Bacteria", + "contamination": 3.69, + "gtdbtk_class": "Phycisphaerae", + "gtdbtk_phylum": "Planctomycetota", + "num_5s": 0, + "num_23s": 0, + "gtdbtk_genus": "", + "num_t_rna": 19 + }, + { + "number_of_contig": 50, + "completeness": 4.17, + "bin_name": "bins.30", + "gene_count": 384, + "bin_quality": "LQ", + "gtdbtk_species": "", + "gtdbtk_order": "", + "num_16s": 0, + "gtdbtk_family": "", + "gtdbtk_domain": "", + "contamination": 0.0, + "gtdbtk_class": "", + "gtdbtk_phylum": "", + "num_5s": 0, + "num_23s": 0, + "gtdbtk_genus": "", + "num_t_rna": 19 + }, + { + "number_of_contig": 78, + "completeness": 29.31, + "bin_name": "bins.31", + "gene_count": 500, + "bin_quality": "LQ", + "gtdbtk_species": "", + "gtdbtk_order": "", + "num_16s": 0, + "gtdbtk_family": "", + "gtdbtk_domain": "", + "contamination": 1.72, + "gtdbtk_class": "", + "gtdbtk_phylum": "", + "num_5s": 0, + "num_23s": 0, + "gtdbtk_genus": "", + "num_t_rna": 9 + }, + { + "number_of_contig": 655, + "completeness": 82.53, + "bin_name": "bins.32", + "gene_count": 4261, + "bin_quality": "LQ", + "gtdbtk_species": "", + "gtdbtk_order": "", + "num_16s": 0, + "gtdbtk_family": "", + "gtdbtk_domain": "", + "contamination": 51.01, + "gtdbtk_class": "", + "gtdbtk_phylum": "", + "num_5s": 0, + "num_23s": 0, + "gtdbtk_genus": "", + "num_t_rna": 61 + }, + { + "number_of_contig": 24, + "completeness": 22.73, + "bin_name": "bins.33", + "gene_count": 276, + "bin_quality": "LQ", + "gtdbtk_species": "", + "gtdbtk_order": "", + "num_16s": 1, + "gtdbtk_family": "", + "gtdbtk_domain": "", + "contamination": 1.72, + "gtdbtk_class": "", + "gtdbtk_phylum": "", + "num_5s": 0, + "num_23s": 0, + "gtdbtk_genus": "", + "num_t_rna": 4 + }, + { + "number_of_contig": 212, + "completeness": 88.21, + "bin_name": "bins.34", + "gene_count": 1769, + "bin_quality": "MQ", + "gtdbtk_species": "", + "gtdbtk_order": "Chitinophagales", + "num_16s": 0, + "gtdbtk_family": "Chitinophagaceae", + "gtdbtk_domain": "Bacteria", + "contamination": 0.1, + "gtdbtk_class": "Bacteroidia", + "gtdbtk_phylum": "Bacteroidota", + "num_5s": 0, + "num_23s": 0, + "gtdbtk_genus": "UBA8137", + "num_t_rna": 24 + }, + { + "number_of_contig": 129, + "completeness": 25.86, + "bin_name": "bins.35", + "gene_count": 739, + "bin_quality": "LQ", + "gtdbtk_species": "", + "gtdbtk_order": "", + "num_16s": 0, + "gtdbtk_family": "", + "gtdbtk_domain": "", + "contamination": 0.05, + "gtdbtk_class": "", + "gtdbtk_phylum": "", + "num_5s": 0, + "num_23s": 0, + "gtdbtk_genus": "", + "num_t_rna": 13 + }, + { + "number_of_contig": 100, + "completeness": 34.81, + "bin_name": "bins.36", + "gene_count": 626, + "bin_quality": "LQ", + "gtdbtk_species": "", + "gtdbtk_order": "", + "num_16s": 0, + "gtdbtk_family": "", + "gtdbtk_domain": "", + "contamination": 0.0, + "gtdbtk_class": "", + "gtdbtk_phylum": "", + "num_5s": 0, + "num_23s": 0, + "gtdbtk_genus": "", + "num_t_rna": 17 + }, + { + "number_of_contig": 84, + "completeness": 46.55, + "bin_name": "bins.37", + "gene_count": 655, + "bin_quality": "LQ", + "gtdbtk_species": "", + "gtdbtk_order": "", + "num_16s": 0, + "gtdbtk_family": "", + "gtdbtk_domain": "", + "contamination": 4.31, + "gtdbtk_class": "", + "gtdbtk_phylum": "", + "num_5s": 1, + "num_23s": 0, + "gtdbtk_genus": "", + "num_t_rna": 26 + }, + { + "number_of_contig": 42, + "completeness": 20.69, + "bin_name": "bins.38", + "gene_count": 307, + "bin_quality": "LQ", + "gtdbtk_species": "", + "gtdbtk_order": "", + "num_16s": 0, + "gtdbtk_family": "", + "gtdbtk_domain": "", + "contamination": 0.0, + "gtdbtk_class": "", + "gtdbtk_phylum": "", + "num_5s": 0, + "num_23s": 0, + "gtdbtk_genus": "", + "num_t_rna": 6 + }, + { + "number_of_contig": 103, + "completeness": 15.52, + "bin_name": "bins.39", + "gene_count": 712, + "bin_quality": "LQ", + "gtdbtk_species": "", + "gtdbtk_order": "", + "num_16s": 0, + "gtdbtk_family": "", + "gtdbtk_domain": "", + "contamination": 0.0, + "gtdbtk_class": "", + "gtdbtk_phylum": "", + "num_5s": 0, + "num_23s": 0, + "gtdbtk_genus": "", + "num_t_rna": 4 + }, + { + "number_of_contig": 56, + "completeness": 18.97, + "bin_name": "bins.4", + "gene_count": 479, + "bin_quality": "LQ", + "gtdbtk_species": "", + "gtdbtk_order": "", + "num_16s": 0, + "gtdbtk_family": "", + "gtdbtk_domain": "", + "contamination": 0.0, + "gtdbtk_class": "", + "gtdbtk_phylum": "", + "num_5s": 1, + "num_23s": 0, + "gtdbtk_genus": "", + "num_t_rna": 13 + }, + { + "number_of_contig": 158, + "completeness": 26.06, + "bin_name": "bins.40", + "gene_count": 848, + "bin_quality": "LQ", + "gtdbtk_species": "", + "gtdbtk_order": "", + "num_16s": 0, + "gtdbtk_family": "", + "gtdbtk_domain": "", + "contamination": 0.0, + "gtdbtk_class": "", + "gtdbtk_phylum": "", + "num_5s": 0, + "num_23s": 0, + "gtdbtk_genus": "", + "num_t_rna": 10 + }, + { + "number_of_contig": 34, + "completeness": 20.69, + "bin_name": "bins.41", + "gene_count": 235, + "bin_quality": "LQ", + "gtdbtk_species": "", + "gtdbtk_order": "", + "num_16s": 0, + "gtdbtk_family": "", + "gtdbtk_domain": "", + "contamination": 0.0, + "gtdbtk_class": "", + "gtdbtk_phylum": "", + "num_5s": 0, + "num_23s": 0, + "gtdbtk_genus": "", + "num_t_rna": 6 + }, + { + "number_of_contig": 98, + "completeness": 19.3, + "bin_name": "bins.42", + "gene_count": 627, + "bin_quality": "LQ", + "gtdbtk_species": "", + "gtdbtk_order": "", + "num_16s": 0, + "gtdbtk_family": "", + "gtdbtk_domain": "", + "contamination": 0.0, + "gtdbtk_class": "", + "gtdbtk_phylum": "", + "num_5s": 0, + "num_23s": 0, + "gtdbtk_genus": "", + "num_t_rna": 15 + }, + { + "number_of_contig": 84, + "completeness": 25.86, + "bin_name": "bins.43", + "gene_count": 529, + "bin_quality": "LQ", + "gtdbtk_species": "", + "gtdbtk_order": "", + "num_16s": 0, + "gtdbtk_family": "", + "gtdbtk_domain": "", + "contamination": 1.72, + "gtdbtk_class": "", + "gtdbtk_phylum": "", + "num_5s": 0, + "num_23s": 0, + "gtdbtk_genus": "", + "num_t_rna": 16 + }, + { + "number_of_contig": 33, + "completeness": 0.0, + "bin_name": "bins.44", + "gene_count": 279, + "bin_quality": "LQ", + "gtdbtk_species": "", + "gtdbtk_order": "", + "num_16s": 0, + "gtdbtk_family": "", + "gtdbtk_domain": "", + "contamination": 0.0, + "gtdbtk_class": "", + "gtdbtk_phylum": "", + "num_5s": 0, + "num_23s": 0, + "gtdbtk_genus": "", + "num_t_rna": 0 + }, + { + "number_of_contig": 39, + "completeness": 7.47, + "bin_name": "bins.45", + "gene_count": 279, + "bin_quality": "LQ", + "gtdbtk_species": "", + "gtdbtk_order": "", + "num_16s": 0, + "gtdbtk_family": "", + "gtdbtk_domain": "", + "contamination": 0.0, + "gtdbtk_class": "", + "gtdbtk_phylum": "", + "num_5s": 0, + "num_23s": 0, + "gtdbtk_genus": "", + "num_t_rna": 9 + }, + { + "number_of_contig": 210, + "completeness": 58.25, + "bin_name": "bins.46", + "gene_count": 6029, + "bin_quality": "MQ", + "gtdbtk_species": "Pseudomonas_E sp001984065", + "gtdbtk_order": "Pseudomonadales", + "num_16s": 0, + "gtdbtk_family": "Pseudomonadaceae", + "gtdbtk_domain": "Bacteria", + "contamination": 0.0, + "gtdbtk_class": "Gammaproteobacteria", + "gtdbtk_phylum": "Proteobacteria", + "num_5s": 1, + "num_23s": 0, + "gtdbtk_genus": "Pseudomonas_E", + "num_t_rna": 45 + }, + { + "number_of_contig": 54, + "completeness": 10.32, + "bin_name": "bins.47", + "gene_count": 305, + "bin_quality": "LQ", + "gtdbtk_species": "", + "gtdbtk_order": "", + "num_16s": 0, + "gtdbtk_family": "", + "gtdbtk_domain": "", + "contamination": 0.0, + "gtdbtk_class": "", + "gtdbtk_phylum": "", + "num_5s": 0, + "num_23s": 0, + "gtdbtk_genus": "", + "num_t_rna": 9 + }, + { + "number_of_contig": 116, + "completeness": 14.4, + "bin_name": "bins.48", + "gene_count": 600, + "bin_quality": "LQ", + "gtdbtk_species": "", + "gtdbtk_order": "", + "num_16s": 0, + "gtdbtk_family": "", + "gtdbtk_domain": "", + "contamination": 0.0, + "gtdbtk_class": "", + "gtdbtk_phylum": "", + "num_5s": 0, + "num_23s": 0, + "gtdbtk_genus": "", + "num_t_rna": 4 + }, + { + "number_of_contig": 190, + "completeness": 23.28, + "bin_name": "bins.49", + "gene_count": 679, + "bin_quality": "LQ", + "gtdbtk_species": "", + "gtdbtk_order": "", + "num_16s": 0, + "gtdbtk_family": "", + "gtdbtk_domain": "", + "contamination": 0.0, + "gtdbtk_class": "", + "gtdbtk_phylum": "", + "num_5s": 0, + "num_23s": 0, + "gtdbtk_genus": "", + "num_t_rna": 1 + }, + { + "number_of_contig": 267, + "completeness": 72.2, + "bin_name": "bins.5", + "gene_count": 2115, + "bin_quality": "MQ", + "gtdbtk_species": "", + "gtdbtk_order": "Burkholderiales", + "num_16s": 0, + "gtdbtk_family": "Rhodocyclaceae", + "gtdbtk_domain": "Bacteria", + "contamination": 0.56, + "gtdbtk_class": "Gammaproteobacteria", + "gtdbtk_phylum": "Proteobacteria", + "num_5s": 0, + "num_23s": 0, + "gtdbtk_genus": "", + "num_t_rna": 36 + }, + { + "number_of_contig": 190, + "completeness": 35.97, + "bin_name": "bins.50", + "gene_count": 1121, + "bin_quality": "LQ", + "gtdbtk_species": "", + "gtdbtk_order": "", + "num_16s": 0, + "gtdbtk_family": "", + "gtdbtk_domain": "", + "contamination": 0.0, + "gtdbtk_class": "", + "gtdbtk_phylum": "", + "num_5s": 0, + "num_23s": 0, + "gtdbtk_genus": "", + "num_t_rna": 5 + }, + { + "number_of_contig": 237, + "completeness": 65.47, + "bin_name": "bins.51", + "gene_count": 1630, + "bin_quality": "MQ", + "gtdbtk_species": "", + "gtdbtk_order": "", + "num_16s": 0, + "gtdbtk_family": "", + "gtdbtk_domain": "Bacteria", + "contamination": 0.48, + "gtdbtk_class": "Bacteroidia", + "gtdbtk_phylum": "Bacteroidota", + "num_5s": 0, + "num_23s": 0, + "gtdbtk_genus": "", + "num_t_rna": 12 + }, + { + "number_of_contig": 33, + "completeness": 61.11, + "bin_name": "bins.52", + "gene_count": 929, + "bin_quality": "MQ", + "gtdbtk_species": "", + "gtdbtk_order": "Nanopelagicales", + "num_16s": 1, + "gtdbtk_family": "AcAMD-5", + "gtdbtk_domain": "Bacteria", + "contamination": 0.05, + "gtdbtk_class": "Actinobacteria", + "gtdbtk_phylum": "Actinobacteriota", + "num_5s": 1, + "num_23s": 1, + "gtdbtk_genus": "ATZT02", + "num_t_rna": 18 + }, + { + "number_of_contig": 10, + "completeness": 0.0, + "bin_name": "bins.53", + "gene_count": 471, + "bin_quality": "LQ", + "gtdbtk_species": "", + "gtdbtk_order": "", + "num_16s": 0, + "gtdbtk_family": "", + "gtdbtk_domain": "", + "contamination": 0.0, + "gtdbtk_class": "", + "gtdbtk_phylum": "", + "num_5s": 0, + "num_23s": 0, + "gtdbtk_genus": "", + "num_t_rna": 31 + }, + { + "number_of_contig": 93, + "completeness": 46.65, + "bin_name": "bins.54", + "gene_count": 931, + "bin_quality": "LQ", + "gtdbtk_species": "", + "gtdbtk_order": "", + "num_16s": 1, + "gtdbtk_family": "", + "gtdbtk_domain": "", + "contamination": 1.75, + "gtdbtk_class": "", + "gtdbtk_phylum": "", + "num_5s": 1, + "num_23s": 1, + "gtdbtk_genus": "", + "num_t_rna": 26 + }, + { + "number_of_contig": 54, + "completeness": 16.67, + "bin_name": "bins.55", + "gene_count": 382, + "bin_quality": "LQ", + "gtdbtk_species": "", + "gtdbtk_order": "", + "num_16s": 0, + "gtdbtk_family": "", + "gtdbtk_domain": "", + "contamination": 0.0, + "gtdbtk_class": "", + "gtdbtk_phylum": "", + "num_5s": 0, + "num_23s": 0, + "gtdbtk_genus": "", + "num_t_rna": 7 + }, + { + "number_of_contig": 98, + "completeness": 49.73, + "bin_name": "bins.56", + "gene_count": 876, + "bin_quality": "LQ", + "gtdbtk_species": "", + "gtdbtk_order": "", + "num_16s": 0, + "gtdbtk_family": "", + "gtdbtk_domain": "", + "contamination": 0.0, + "gtdbtk_class": "", + "gtdbtk_phylum": "", + "num_5s": 0, + "num_23s": 0, + "gtdbtk_genus": "", + "num_t_rna": 21 + }, + { + "number_of_contig": 164, + "completeness": 51.78, + "bin_name": "bins.57", + "gene_count": 1105, + "bin_quality": "LQ", + "gtdbtk_species": "", + "gtdbtk_order": "", + "num_16s": 0, + "gtdbtk_family": "", + "gtdbtk_domain": "", + "contamination": 10.69, + "gtdbtk_class": "", + "gtdbtk_phylum": "", + "num_5s": 0, + "num_23s": 0, + "gtdbtk_genus": "", + "num_t_rna": 16 + }, + { + "number_of_contig": 50, + "completeness": 8.95, + "bin_name": "bins.58", + "gene_count": 274, + "bin_quality": "LQ", + "gtdbtk_species": "", + "gtdbtk_order": "", + "num_16s": 0, + "gtdbtk_family": "", + "gtdbtk_domain": "", + "contamination": 0.0, + "gtdbtk_class": "", + "gtdbtk_phylum": "", + "num_5s": 0, + "num_23s": 0, + "gtdbtk_genus": "", + "num_t_rna": 3 + }, + { + "number_of_contig": 88, + "completeness": 32.05, + "bin_name": "bins.59", + "gene_count": 593, + "bin_quality": "LQ", + "gtdbtk_species": "", + "gtdbtk_order": "", + "num_16s": 0, + "gtdbtk_family": "", + "gtdbtk_domain": "", + "contamination": 0.0, + "gtdbtk_class": "", + "gtdbtk_phylum": "", + "num_5s": 0, + "num_23s": 0, + "gtdbtk_genus": "", + "num_t_rna": 17 + }, + { + "number_of_contig": 238, + "completeness": 19.55, + "bin_name": "bins.6", + "gene_count": 1153, + "bin_quality": "LQ", + "gtdbtk_species": "", + "gtdbtk_order": "", + "num_16s": 0, + "gtdbtk_family": "", + "gtdbtk_domain": "", + "contamination": 0.0, + "gtdbtk_class": "", + "gtdbtk_phylum": "", + "num_5s": 0, + "num_23s": 0, + "gtdbtk_genus": "", + "num_t_rna": 3 + }, + { + "number_of_contig": 56, + "completeness": 22.41, + "bin_name": "bins.60", + "gene_count": 324, + "bin_quality": "LQ", + "gtdbtk_species": "", + "gtdbtk_order": "", + "num_16s": 0, + "gtdbtk_family": "", + "gtdbtk_domain": "", + "contamination": 3.45, + "gtdbtk_class": "", + "gtdbtk_phylum": "", + "num_5s": 0, + "num_23s": 0, + "gtdbtk_genus": "", + "num_t_rna": 12 + }, + { + "number_of_contig": 270, + "completeness": 84.62, + "bin_name": "bins.61", + "gene_count": 3169, + "bin_quality": "MQ", + "gtdbtk_species": "", + "gtdbtk_order": "", + "num_16s": 0, + "gtdbtk_family": "", + "gtdbtk_domain": "Bacteria", + "contamination": 1.28, + "gtdbtk_class": "Sericytochromatia", + "gtdbtk_phylum": "Cyanobacteria", + "num_5s": 0, + "num_23s": 0, + "gtdbtk_genus": "", + "num_t_rna": 43 + }, + { + "number_of_contig": 106, + "completeness": 28.95, + "bin_name": "bins.62", + "gene_count": 688, + "bin_quality": "LQ", + "gtdbtk_species": "", + "gtdbtk_order": "", + "num_16s": 0, + "gtdbtk_family": "", + "gtdbtk_domain": "", + "contamination": 3.51, + "gtdbtk_class": "", + "gtdbtk_phylum": "", + "num_5s": 0, + "num_23s": 0, + "gtdbtk_genus": "", + "num_t_rna": 14 + }, + { + "number_of_contig": 38, + "completeness": 48.59, + "bin_name": "bins.63", + "gene_count": 573, + "bin_quality": "LQ", + "gtdbtk_species": "", + "gtdbtk_order": "", + "num_16s": 0, + "gtdbtk_family": "", + "gtdbtk_domain": "", + "contamination": 0.0, + "gtdbtk_class": "", + "gtdbtk_phylum": "", + "num_5s": 1, + "num_23s": 0, + "gtdbtk_genus": "", + "num_t_rna": 14 + }, + { + "number_of_contig": 132, + "completeness": 95.87, + "bin_name": "bins.64", + "gene_count": 2451, + "bin_quality": "MQ", + "gtdbtk_species": "", + "gtdbtk_order": "Burkholderiales", + "num_16s": 2, + "gtdbtk_family": "Aquaspirillaceae", + "gtdbtk_domain": "Bacteria", + "contamination": 0.0, + "gtdbtk_class": "Gammaproteobacteria", + "gtdbtk_phylum": "Proteobacteria", + "num_5s": 3, + "num_23s": 0, + "gtdbtk_genus": "Aquaspirillum", + "num_t_rna": 50 + }, + { + "number_of_contig": 329, + "completeness": 76.34, + "bin_name": "bins.65", + "gene_count": 2609, + "bin_quality": "MQ", + "gtdbtk_species": "", + "gtdbtk_order": "UBA1135", + "num_16s": 1, + "gtdbtk_family": "GCA-002686595", + "gtdbtk_domain": "Bacteria", + "contamination": 1.08, + "gtdbtk_class": "UBA1135", + "gtdbtk_phylum": "Planctomycetota", + "num_5s": 1, + "num_23s": 1, + "gtdbtk_genus": "", + "num_t_rna": 36 + }, + { + "number_of_contig": 368, + "completeness": 71.76, + "bin_name": "bins.7", + "gene_count": 2097, + "bin_quality": "LQ", + "gtdbtk_species": "", + "gtdbtk_order": "", + "num_16s": 1, + "gtdbtk_family": "", + "gtdbtk_domain": "", + "contamination": 26.82, + "gtdbtk_class": "", + "gtdbtk_phylum": "", + "num_5s": 0, + "num_23s": 0, + "gtdbtk_genus": "", + "num_t_rna": 25 + }, + { + "number_of_contig": 140, + "completeness": 28.87, + "bin_name": "bins.8", + "gene_count": 737, + "bin_quality": "LQ", + "gtdbtk_species": "", + "gtdbtk_order": "", + "num_16s": 0, + "gtdbtk_family": "", + "gtdbtk_domain": "", + "contamination": 0.0, + "gtdbtk_class": "", + "gtdbtk_phylum": "", + "num_5s": 0, + "num_23s": 0, + "gtdbtk_genus": "", + "num_t_rna": 9 + }, + { + "number_of_contig": 358, + "completeness": 76.61, + "bin_name": "bins.9", + "gene_count": 3078, + "bin_quality": "MQ", + "gtdbtk_species": "", + "gtdbtk_order": "Acetobacterales", + "num_16s": 1, + "gtdbtk_family": "Acetobacteraceae", + "gtdbtk_domain": "Bacteria", + "contamination": 1.49, + "gtdbtk_class": "Alphaproteobacteria", + "gtdbtk_phylum": "Proteobacteria", + "num_5s": 0, + "num_23s": 0, + "gtdbtk_genus": "Ga0074136", + "num_t_rna": 29 + } + ], + "unbinned_contig_num": 71201, + "started_at_time": "2021-10-11T02:22:55Z", + "type": "nmdc:MAGsAnalysisActivity", + "ended_at_time": "2021-10-11T05:33:48+00:00" + } + ], + "metagenome_annotation_activity_set": [ + { + "_id": { + "$oid": "649b005bbf2caae0415ef9a0" + }, + "has_input": [ + "nmdc:9c04a2492162eb6b6aef8c389f3e182f" + ], + "part_of": [ + "nmdc:mga09f1f50" + ], + "git_url": "https://github.com/microbiomedata/metaG/releases/tag/0.1", + "has_output": [ + "nmdc:321c7bdfeb7bcc3b9398e468390c47ce", + "nmdc:7240223762a9eca1a3c373f2a08116ae", + "nmdc:7595ad06b43e74c1e4a1e3f707df911b", + "nmdc:22cca73dcdda0402e2702dc732f4d524", + "nmdc:b3caf3a48d011a563b4ff53f76bd31a3", + "nmdc:e2295c31130dd9c63d0a1328fbfaa3ac", + "nmdc:493f1e741f4322f9af1baa0cfc166834", + "nmdc:972d8a94081dab52d0ea43f31a420807", + "nmdc:2cce9cf7c0cce1bc86c3f16200f45e24", + "nmdc:ebe3dddf7e5a6af42d0cb5bc45b80655", + "nmdc:3e6b6157c4e5edf6ba07b1a771bb8a80", + "nmdc:77abc57adce2014a9dc77fd82c51caa2" + ], + "was_informed_by": "gold:Gp0119866", + "id": "nmdc:7c72f6c62f76f92f4d0b22fdabd10281", + "execution_resource": "NERSC-Cori", + "name": "Annotation Activity for nmdc:mga09f1f50", + "started_at_time": "2021-10-11T02:22:55Z", + "type": "nmdc:MetagenomeAnnotationActivity", + "ended_at_time": "2021-10-11T05:33:48+00:00" + } + ], + "metagenome_assembly_set": [ + { + "_id": { + "$oid": "649b005f2ca5ee4adb139f96" + }, + "has_input": [ + "nmdc:448625936760095c0efc271e1003510d" + ], + "part_of": [ + "nmdc:mga09f1f50" + ], + "ctg_logsum": 2411499, + "scaf_logsum": 2446472, + "gap_pct": 0.01426, + "git_url": "https://github.com/microbiomedata/metaG/releases/tag/0.1", + "has_output": [ + "nmdc:9c04a2492162eb6b6aef8c389f3e182f", + "nmdc:00ec3306f3e74333c27b7e82e38a1c66", + "nmdc:837fafb2ce56bc381abe7c6916b9bcab", + "nmdc:fba64d9fa6867f4ae1df6023309f0673", + "nmdc:07a6287efeb2fe2278ce06243619cd5f" + ], + "asm_score": 10.852, + "was_informed_by": "gold:Gp0119866", + "ctg_powsum": 294982, + "scaf_max": 288939, + "id": "nmdc:7c72f6c62f76f92f4d0b22fdabd10281", + "scaf_powsum": 301169, + "execution_resource": "NERSC-Cori", + "contigs": 508405, + "name": "Assembly Activity for nmdc:mga09f1f50", + "ctg_max": 178527, + "gc_std": 0.1059, + "contig_bp": 395633285, + "gc_avg": 0.52339, + "started_at_time": "2021-10-11T02:22:55Z", + "scaf_bp": 395689705, + "type": "nmdc:MetagenomeAssembly", + "scaffolds": 503438, + "ended_at_time": "2021-10-11T05:33:48+00:00", + "ctg_l50": 1042, + "ctg_l90": 327, + "ctg_n50": 75974, + "ctg_n90": 373168, + "scaf_l50": 1063, + "scaf_l90": 328, + "scaf_n50": 74068, + "scaf_n90": 367883, + "scaf_l_gt50k": 8118901, + "scaf_n_gt50k": 98, + "scaf_pct_gt50k": 2.0518353 + } + ], + "omics_processing_set": [ + { + "_id": { + "$oid": "649b009773e8249959349b78" + }, + "id": "nmdc:omprc-11-0sx8df80", + "name": "Fresh water mixed with fracking chemicals microbial communities from Ohio, USA - Utica-2 Time Series FC 2014_7_11", + "description": "Microbial controls on biogeochemical cycling in deep subsurface shale carbon reservoirs", + "has_input": [ + "nmdc:bsm-11-474rh027" + ], + "has_output": [ + "jgi:560df3720d878540fd6fe1cc" + ], + "part_of": [ + "nmdc:sty-11-8fb6t785" + ], + "add_date": "2015-08-15", + "mod_date": "2021-06-15", + "ncbi_project_name": "Deep subsurface shale carbon reservoir microbial communities from Ohio, USA - Utica-2 Time Series FC 2014_7_11", + "omics_type": { + "has_raw_value": "Metagenome" + }, + "principal_investigator": { + "has_raw_value": "Kelly Wrighton" + }, + "processing_institution": "JGI", + "type": "nmdc:OmicsProcessing", + "gold_sequencing_project_identifiers": [ + "gold:Gp0119866" + ] + } + ], + "read_qc_analysis_activity_set": [ + { + "_id": { + "$oid": "649b009d6bdd4fd20273c859" + }, + "has_input": [ + "nmdc:27c7491bb86bb84be09a68ad33cdd8e3" + ], + "part_of": [ + "nmdc:mga09f1f50" + ], + "git_url": "https://github.com/microbiomedata/metaG/releases/tag/0.1", + "has_output": [ + "nmdc:448625936760095c0efc271e1003510d", + "nmdc:28929f0addf71bbaafb2806253f7108e" + ], + "was_informed_by": "gold:Gp0119866", + "input_read_count": 63807614, + "output_read_bases": 9152227163, + "id": "nmdc:7c72f6c62f76f92f4d0b22fdabd10281", + "execution_resource": "NERSC-Cori", + "input_read_bases": 9634949714, + "name": "Read QC Activity for nmdc:mga09f1f50", + "output_read_count": 62665380, + "started_at_time": "2021-10-11T02:22:55Z", + "type": "nmdc:ReadQCAnalysisActivity", + "ended_at_time": "2021-10-11T05:33:48+00:00" + } + ], + "read_based_taxonomy_analysis_activity_set": [ + { + "_id": { + "$oid": "649b009bff710ae353f8cf1e" + }, + "has_input": [ + "nmdc:448625936760095c0efc271e1003510d" + ], + "git_url": "https://github.com/microbiomedata/metaG/releases/tag/0.1", + "has_output": [ + "nmdc:57e8059128df37fe5763162f7c1b992e", + "nmdc:57f89e0f77210981a40cfb0e38014221", + "nmdc:af57c9f1974fe0d450382d57482b6c78", + "nmdc:20ce26b373108b6f80dbff7e96b5864b", + "nmdc:fd2a7a459a59296a18b2a464e2868214", + "nmdc:53c74feb3725c590a1db3486edc2efc3", + "nmdc:0449bf5f5699dd5c63c945682ac06f4e", + "nmdc:0f1a5387576931ff49370f8e786c92d5", + "nmdc:e3331958d361d72116018bbdb1a74d5d" + ], + "was_informed_by": "gold:Gp0119866", + "id": "nmdc:7c72f6c62f76f92f4d0b22fdabd10281", + "execution_resource": "NERSC-Cori", + "name": "ReadBased Analysis Activity for nmdc:mga09f1f50", + "started_at_time": "2021-10-11T02:22:55Z", + "type": "nmdc:ReadbasedAnalysis", + "ended_at_time": "2021-10-11T05:33:48+00:00" + } + ] + }, + { + "data_object_set": [ + { + "description": "Raw sequencer read data", + "file_size_bytes": 12611045743, + "type": "nmdc:DataObject", + "id": "jgi:57be41397ded5e0c87137bca", + "name": "10772.8.178397.AGTCAA.fastq.gz" + }, + { + "name": "Gp0153825_Filtered Reads", + "description": "Filtered Reads for Gp0153825", + "data_object_type": "Filtered Sequencing Reads", + "url": "https://data.microbiomedata.org/data/nmdc:mga0t64728/qa/nmdc_mga0t64728_filtered.fastq.gz", + "md5_checksum": "02f0bfce391292bffdc3fb0ede465fe9", + "id": "nmdc:02f0bfce391292bffdc3fb0ede465fe9", + "file_size_bytes": 6648951740 + }, + { + "name": "Gp0153825_Filtered Stats", + "description": "Filtered Stats for Gp0153825", + "data_object_type": "QC Statistics", + "url": "https://data.microbiomedata.org/data/nmdc:mga0t64728/qa/nmdc_mga0t64728_filterStats.txt", + "md5_checksum": "611e94aab854d6b45180b9b673412931", + "id": "nmdc:611e94aab854d6b45180b9b673412931", + "file_size_bytes": 286 + }, + { + "name": "Gp0153825_Gottcha2 TSV report", + "description": "Gottcha2 TSV report for Gp0153825", + "url": "https://data.microbiomedata.org/data/nmdc:mga0t64728/ReadbasedAnalysis/nmdc_mga0t64728_gottcha2_report.tsv", + "md5_checksum": "79fac15db82a5de044af7ab4f1b31a0e", + "id": "nmdc:79fac15db82a5de044af7ab4f1b31a0e", + "file_size_bytes": 3059 + }, + { + "name": "Gp0153825_Gottcha2 full TSV report", + "description": "Gottcha2 full TSV report for Gp0153825", + "url": "https://data.microbiomedata.org/data/nmdc:mga0t64728/ReadbasedAnalysis/nmdc_mga0t64728_gottcha2_report_full.tsv", + "md5_checksum": "e9ebfe8a61aa67e14c9018ed39f75e06", + "id": "nmdc:e9ebfe8a61aa67e14c9018ed39f75e06", + "file_size_bytes": 247485 + }, + { + "name": "Gp0153825_Gottcha2 Krona HTML report", + "description": "Gottcha2 Krona HTML report for Gp0153825", + "data_object_type": "GOTTCHA2 Krona Plot", + "url": "https://data.microbiomedata.org/data/nmdc:mga0t64728/ReadbasedAnalysis/nmdc_mga0t64728_gottcha2_krona.html", + "md5_checksum": "6d17eebb0556fee2fbeb3e44efc1c917", + "id": "nmdc:6d17eebb0556fee2fbeb3e44efc1c917", + "file_size_bytes": 234898 + }, + { + "name": "Gp0153825_Centrifuge classification TSV report", + "description": "Centrifuge classification TSV report for Gp0153825", + "data_object_type": "Centrifuge Taxonomic Classification", + "url": "https://data.microbiomedata.org/data/nmdc:mga0t64728/ReadbasedAnalysis/nmdc_mga0t64728_centrifuge_classification.tsv", + "md5_checksum": "96dddb7a2c71faf71b18d84e8bc85746", + "id": "nmdc:96dddb7a2c71faf71b18d84e8bc85746", + "file_size_bytes": 8973773238 + }, + { + "name": "Gp0153825_Centrifuge TSV report", + "description": "Centrifuge TSV report for Gp0153825", + "data_object_type": "Centrifuge Classification Report", + "url": "https://data.microbiomedata.org/data/nmdc:mga0t64728/ReadbasedAnalysis/nmdc_mga0t64728_centrifuge_report.tsv", + "md5_checksum": "0be6b57ed1f51413d251c49090442fdd", + "id": "nmdc:0be6b57ed1f51413d251c49090442fdd", + "file_size_bytes": 221400 + }, + { + "name": "Gp0153825_Centrifuge Krona HTML report", + "description": "Centrifuge Krona HTML report for Gp0153825", + "data_object_type": "Centrifuge Krona Plot", + "url": "https://data.microbiomedata.org/data/nmdc:mga0t64728/ReadbasedAnalysis/nmdc_mga0t64728_centrifuge_krona.html", + "md5_checksum": "3efa21710c728332bd1d139190321703", + "id": "nmdc:3efa21710c728332bd1d139190321703", + "file_size_bytes": 2166532 + }, + { + "name": "Gp0153825_Kraken classification TSV report", + "description": "Kraken classification TSV report for Gp0153825", + "data_object_type": "Kraken2 Taxonomic Classification", + "url": "https://data.microbiomedata.org/data/nmdc:mga0t64728/ReadbasedAnalysis/nmdc_mga0t64728_kraken2_classification.tsv", + "md5_checksum": "e11972119e273e1e29803b7261511c37", + "id": "nmdc:e11972119e273e1e29803b7261511c37", + "file_size_bytes": 7640525991 + }, + { + "name": "Gp0153825_Kraken2 TSV report", + "description": "Kraken2 TSV report for Gp0153825", + "data_object_type": "Kraken2 Classification Report", + "url": "https://data.microbiomedata.org/data/nmdc:mga0t64728/ReadbasedAnalysis/nmdc_mga0t64728_kraken2_report.tsv", + "md5_checksum": "e99695e0b4b6a09ab54c16d1529f0a63", + "id": "nmdc:e99695e0b4b6a09ab54c16d1529f0a63", + "file_size_bytes": 515243 + }, + { + "name": "Gp0153825_Kraken2 Krona HTML report", + "description": "Kraken2 Krona HTML report for Gp0153825", + "data_object_type": "Kraken2 Krona Plot", + "url": "https://data.microbiomedata.org/data/nmdc:mga0t64728/ReadbasedAnalysis/nmdc_mga0t64728_kraken2_krona.html", + "md5_checksum": "fe4c1a2df3f76da9634250d52b18dc9a", + "id": "nmdc:fe4c1a2df3f76da9634250d52b18dc9a", + "file_size_bytes": 3327356 + }, + { + "name": "Gp0153825_Assembled contigs fasta", + "description": "Assembled contigs fasta for Gp0153825", + "data_object_type": "Assembly Contigs", + "url": "https://data.microbiomedata.org/data/nmdc:mga0t64728/assembly/nmdc_mga0t64728_contigs.fna", + "md5_checksum": "b1a02f313d0deacb3698f8d72680e91c", + "id": "nmdc:b1a02f313d0deacb3698f8d72680e91c", + "file_size_bytes": 3271344 + }, + { + "name": "Gp0153825_Assembled scaffold fasta", + "description": "Assembled scaffold fasta for Gp0153825", + "data_object_type": "Assembly Scaffolds", + "url": "https://data.microbiomedata.org/data/nmdc:mga0t64728/assembly/nmdc_mga0t64728_scaffolds.fna", + "md5_checksum": "8a6320f759b630b7e17c1d80ae30583f", + "id": "nmdc:8a6320f759b630b7e17c1d80ae30583f", + "file_size_bytes": 3267499 + }, + { + "name": "Gp0153825_Metagenome Contig Coverage Stats", + "description": "Metagenome Contig Coverage Stats for Gp0153825", + "url": "https://data.microbiomedata.org/data/nmdc:mga0t64728/assembly/nmdc_mga0t64728_covstats.txt", + "md5_checksum": "20ae7270d9f89e5eda4f5c346d7cd247", + "id": "nmdc:20ae7270d9f89e5eda4f5c346d7cd247", + "file_size_bytes": 115897 + }, + { + "name": "Gp0153825_Assembled AGP file", + "description": "Assembled AGP file for Gp0153825", + "data_object_type": "Assembly AGP", + "url": "https://data.microbiomedata.org/data/nmdc:mga0t64728/assembly/nmdc_mga0t64728_assembly.agp", + "md5_checksum": "39dd1dd711567e66cc7aaaa755d7a248", + "id": "nmdc:39dd1dd711567e66cc7aaaa755d7a248", + "file_size_bytes": 101434 + }, + { + "name": "Gp0153825_Metagenome Alignment BAM file", + "description": "Metagenome Alignment BAM file for Gp0153825", + "data_object_type": "Assembly Coverage BAM", + "url": "https://data.microbiomedata.org/data/nmdc:mga0t64728/assembly/nmdc_mga0t64728_pairedMapped_sorted.bam", + "md5_checksum": "8da8a1735e49708827f4b10a40bde0c2", + "id": "nmdc:8da8a1735e49708827f4b10a40bde0c2", + "file_size_bytes": 8726674924 + }, + { + "name": "Gp0153825_Protein FAA", + "description": "Protein FAA for Gp0153825", + "data_object_type": "Annotation Amino Acid FASTA", + "url": "https://data.microbiomedata.org/data/nmdc:mga0t64728/annotation/nmdc_mga0t64728_proteins.faa", + "md5_checksum": "a882f6c6fe3b1e5526465311b574ce63", + "id": "nmdc:a882f6c6fe3b1e5526465311b574ce63", + "file_size_bytes": 1434152 + }, + { + "name": "Gp0153825_Structural annotation GFF file", + "description": "Structural annotation GFF file for Gp0153825", + "data_object_type": "Structural Annotation GFF", + "url": "https://data.microbiomedata.org/data/nmdc:mga0t64728/annotation/nmdc_mga0t64728_structural_annotation.gff", + "md5_checksum": "bd668c202635dda1fe70d9807f3e3c32", + "id": "nmdc:bd668c202635dda1fe70d9807f3e3c32", + "file_size_bytes": 2475 + }, + { + "name": "Gp0153825_Functional annotation GFF file", + "description": "Functional annotation GFF file for Gp0153825", + "data_object_type": "Functional Annotation GFF", + "url": "https://data.microbiomedata.org/data/nmdc:mga0t64728/annotation/nmdc_mga0t64728_functional_annotation.gff", + "md5_checksum": "eb70ce45557a854bc46edd1eb0b36300", + "id": "nmdc:eb70ce45557a854bc46edd1eb0b36300", + "file_size_bytes": 1214534 + }, + { + "name": "Gp0153825_KO TSV file", + "description": "KO TSV file for Gp0153825", + "data_object_type": "Annotation KEGG Orthology", + "url": "https://data.microbiomedata.org/data/nmdc:mga0t64728/annotation/nmdc_mga0t64728_ko.tsv", + "md5_checksum": "962718857055cbd134b22274d4ea224f", + "id": "nmdc:962718857055cbd134b22274d4ea224f", + "file_size_bytes": 187941 + }, + { + "name": "Gp0153825_EC TSV file", + "description": "EC TSV file for Gp0153825", + "data_object_type": "Annotation Enzyme Commission", + "url": "https://data.microbiomedata.org/data/nmdc:mga0t64728/annotation/nmdc_mga0t64728_ec.tsv", + "md5_checksum": "a2f9d370c2395f31cc0bb99a5ec57eaa", + "id": "nmdc:a2f9d370c2395f31cc0bb99a5ec57eaa", + "file_size_bytes": 113925 + }, + { + "name": "Gp0153825_COG GFF file", + "description": "COG GFF file for Gp0153825", + "url": "https://data.microbiomedata.org/data/nmdc:mga0t64728/annotation/nmdc_mga0t64728_cog.gff", + "md5_checksum": "1f8e76e5a46aafa7e17a34758e1c1715", + "id": "nmdc:1f8e76e5a46aafa7e17a34758e1c1715", + "file_size_bytes": 886087 + }, + { + "name": "Gp0153825_PFAM GFF file", + "description": "PFAM GFF file for Gp0153825", + "url": "https://data.microbiomedata.org/data/nmdc:mga0t64728/annotation/nmdc_mga0t64728_pfam.gff", + "md5_checksum": "c6c9c61be127a0c0d4ca78f5eb56e93f", + "id": "nmdc:c6c9c61be127a0c0d4ca78f5eb56e93f", + "file_size_bytes": 903207 + }, + { + "name": "Gp0153825_TigrFam GFF file", + "description": "TigrFam GFF file for Gp0153825", + "url": "https://data.microbiomedata.org/data/nmdc:mga0t64728/annotation/nmdc_mga0t64728_tigrfam.gff", + "md5_checksum": "eb77752d55321979f5d9522fa3a313dd", + "id": "nmdc:eb77752d55321979f5d9522fa3a313dd", + "file_size_bytes": 213397 + }, + { + "name": "Gp0153825_SMART GFF file", + "description": "SMART GFF file for Gp0153825", + "url": "https://data.microbiomedata.org/data/nmdc:mga0t64728/annotation/nmdc_mga0t64728_smart.gff", + "md5_checksum": "8509d9f39a145cec3b881caa3a0490f9", + "id": "nmdc:8509d9f39a145cec3b881caa3a0490f9", + "file_size_bytes": 307239 + }, + { + "name": "Gp0153825_SuperFam GFF file", + "description": "SuperFam GFF file for Gp0153825", + "url": "https://data.microbiomedata.org/data/nmdc:mga0t64728/annotation/nmdc_mga0t64728_supfam.gff", + "md5_checksum": "514f657faad64828b05a510474427cc2", + "id": "nmdc:514f657faad64828b05a510474427cc2", + "file_size_bytes": 1264605 + }, + { + "name": "Gp0153825_Cath FunFam GFF file", + "description": "Cath FunFam GFF file for Gp0153825", + "url": "https://data.microbiomedata.org/data/nmdc:mga0t64728/annotation/nmdc_mga0t64728_cath_funfam.gff", + "md5_checksum": "eb71218dd2153978f0873f5e3f911c0c", + "id": "nmdc:eb71218dd2153978f0873f5e3f911c0c", + "file_size_bytes": 1228373 + }, + { + "name": "Gp0153825_KO_EC GFF file", + "description": "KO_EC GFF file for Gp0153825", + "url": "https://data.microbiomedata.org/data/nmdc:mga0t64728/annotation/nmdc_mga0t64728_ko_ec.gff", + "md5_checksum": "931ce7602c4d2e405893584ec8cd2729", + "id": "nmdc:931ce7602c4d2e405893584ec8cd2729", + "file_size_bytes": 734963 + }, + { + "name": "Gp0153825_metabat2 bin checkm quality assessment result", + "description": "metabat2 bin checkm quality assessment result for Gp0153825", + "data_object_type": "CheckM Statistics", + "url": "https://data.microbiomedata.org/data/nmdc:mga0t64728/MAGs/nmdc_mga0t64728_checkm_qa.out", + "md5_checksum": "eaee4528131cebe229ddf8da1eb2e50e", + "id": "nmdc:eaee4528131cebe229ddf8da1eb2e50e", + "file_size_bytes": 775 + }, + { + "name": "Gp0153825_high-quality and medium-quality bins", + "description": "high-quality and medium-quality bins for Gp0153825", + "data_object_type": "Metagenome Bins", + "url": "https://data.microbiomedata.org/data/nmdc:mga0t64728/MAGs/nmdc_mga0t64728_hqmq_bin.zip", + "md5_checksum": "8906ae85f1184d4e212440d01080089b", + "id": "nmdc:8906ae85f1184d4e212440d01080089b", + "file_size_bytes": 650475 + }, + { + "_id": { + "$oid": "649b003d1ae706d7b5b14f25" + }, + "description": "Assembled AGP file for gold:Gp0153825", + "url": "https://data.microbiomedata.org/data/1777_118770/assembly/assembly.agp", + "file_size_bytes": 89794, + "type": "nmdc:DataObject", + "id": "nmdc:796afae2a8742bee3db2d7cfec348bbc", + "name": "assembly.agp", + "data_object_type": "Assembly AGP" + }, + { + "_id": { + "$oid": "649b003d1ae706d7b5b14f26" + }, + "description": "Assembled contigs fasta for gold:Gp0153825", + "url": "https://data.microbiomedata.org/data/1777_118770/assembly/assembly_contigs.fna", + "file_size_bytes": 3265544, + "type": "nmdc:DataObject", + "id": "nmdc:b4b798cc9e7e9253ae8256a8237fd371", + "name": "assembly_contigs.fna", + "data_object_type": "Assembly Contigs" + }, + { + "_id": { + "$oid": "649b003d1ae706d7b5b14f27" + }, + "description": "Assembled scaffold fasta for gold:Gp0153825", + "url": "https://data.microbiomedata.org/data/1777_118770/assembly/assembly_scaffolds.fna", + "file_size_bytes": 3261711, + "type": "nmdc:DataObject", + "id": "nmdc:7c207e3fd3a1897160036cca2a2aae3a", + "name": "assembly_scaffolds.fna", + "data_object_type": "Assembly Scaffolds" + }, + { + "_id": { + "$oid": "649b003d1ae706d7b5b14f28" + }, + "description": "Metagenome Contig Coverage Stats for gold:Gp0153825", + "url": "https://data.microbiomedata.org/data/1777_118770/assembly/mapping_stats.txt", + "file_size_bytes": 110097, + "type": "nmdc:DataObject", + "id": "nmdc:ab2947dd90a799b508e6620d97844033", + "name": "mapping_stats.txt", + "data_object_type": "Assembly Coverage Stats" + }, + { + "_id": { + "$oid": "649b003d1ae706d7b5b14f2e" + }, + "description": "Metagenome Alignment BAM file for gold:Gp0153825", + "url": "https://data.microbiomedata.org/data/1777_118770/assembly/pairedMapped_sorted.bam", + "file_size_bytes": 8665043442, + "type": "nmdc:DataObject", + "id": "nmdc:be822bf54f717c659de39a911a8f6746", + "name": "pairedMapped_sorted.bam", + "data_object_type": "Assembly Coverage BAM" + }, + { + "_id": { + "$oid": "649b003d1ae706d7b5b15c48" + }, + "id": "nmdc:70a77d1bd0c80688d146ba96459cc174", + "name": "1777_118770.krona.html", + "description": "Gold:Gp0153825 KRONA plot HTML file", + "url": "https://data.microbiomedata.org/1777_118770/ReadbasedAnalysis/centrifuge/1777_118770.krona.html", + "file_size_bytes": 3385, + "data_object_type": "Centrifuge Krona Plot", + "type": "nmdc:DataObject" + }, + { + "_id": { + "$oid": "649b003d1ae706d7b5b15c4f" + }, + "id": "nmdc:bdf6b09af9e00d8e6d82ca1b17d01a64", + "name": "1777_118770.json", + "description": "Gold:Gp0153825 ReadbasedAnalysis result JSON file", + "url": "https://data.microbiomedata.org/1777_118770/ReadbasedAnalysis/1777_118770.json", + "file_size_bytes": 3385, + "type": "nmdc:DataObject" + }, + { + "_id": { + "$oid": "649b003f1ae706d7b5b1664d" + }, + "id": "nmdc:128d500ab4d33d4966fad8fa91ba4b3f", + "name": "bins.tooShort.fa", + "description": "tooShort (< 3kb) filtered contigs fasta file by metaBat2 for gold:Gp0153825", + "file_size_bytes": 442513, + "url": "https://data.microbiomedata.org/data/1777_118770/img_MAGs/bins.tooShort.fa", + "type": "nmdc:DataObject" + }, + { + "_id": { + "$oid": "649b003f1ae706d7b5b16650" + }, + "id": "nmdc:c59c9bceababfe82830ddac7f9dd82ed", + "name": "bins.unbinned.fa", + "description": "unbinned fasta file from metabat2 for gold:Gp0153825", + "file_size_bytes": 2280119, + "url": "https://data.microbiomedata.org/data/1777_118770/img_MAGs/bins.unbinned.fa", + "type": "nmdc:DataObject" + }, + { + "_id": { + "$oid": "649b003f1ae706d7b5b16651" + }, + "id": "nmdc:2e022fe8f8aefc9c57966d509ffe9cf2", + "name": "checkm_qa.out", + "description": "metabat2 bin checkm quality assessment result for gold:Gp0153825", + "file_size_bytes": 930, + "url": "https://data.microbiomedata.org/data/1777_118770/img_MAGs/checkm_qa.out", + "type": "nmdc:DataObject", + "data_object_type": "CheckM Statistics" + }, + { + "_id": { + "$oid": "649b003f1ae706d7b5b16654" + }, + "id": "nmdc:fdb8246e82e96fa622f46ece87d4c2e6", + "name": "gold:Gp0153825.bins.1.fa", + "description": "metabat2 binned contig file for gold:Gp0153825", + "file_size_bytes": 222891, + "url": "https://data.microbiomedata.org/data/1777_118770/img_MAGs/metabat-bins/bins.1.fa", + "type": "nmdc:DataObject", + "data_object_type": "Metagenome Bins" + }, + { + "_id": { + "$oid": "649b003f1ae706d7b5b1665a" + }, + "id": "nmdc:782f8c80cc152fee437601f80a44e179", + "name": "gold:Gp0153825.bins.2.fa", + "description": "metabat2 binned contig file for gold:Gp0153825", + "file_size_bytes": 308085, + "url": "https://data.microbiomedata.org/data/1777_118770/img_MAGs/metabat-bins/bins.2.fa", + "type": "nmdc:DataObject", + "data_object_type": "Metagenome Bins" + }, + { + "_id": { + "$oid": "649b003f1ae706d7b5b16c42" + }, + "description": "EC TSV File for gold:Gp0153825", + "url": "https://data.microbiomedata.org/1777_118770/img_annotation/Ga0482144_ec.tsv", + "md5_checksum": "a1f2c190aa6d470f2eea681126e0470e", + "file_size_bytes": 3385, + "id": "nmdc:a1f2c190aa6d470f2eea681126e0470e", + "name": "gold:Gp0153825_EC TSV File", + "data_object_type": "Annotation Enzyme Commission", + "type": "nmdc:DataObject" + }, + { + "_id": { + "$oid": "649b003f1ae706d7b5b16c47" + }, + "description": "Functional annotation GFF file for gold:Gp0153825", + "url": "https://data.microbiomedata.org/1777_118770/img_annotation/Ga0482144_functional_annotation.gff", + "md5_checksum": "c3ea4b3caf0c86e27118b3ffd51014b8", + "file_size_bytes": 3385, + "id": "nmdc:c3ea4b3caf0c86e27118b3ffd51014b8", + "name": "gold:Gp0153825_Functional annotation GFF file", + "data_object_type": "Functional Annotation GFF", + "type": "nmdc:DataObject" + }, + { + "_id": { + "$oid": "649b003f1ae706d7b5b16c52" + }, + "description": "KO TSV File for gold:Gp0153825", + "url": "https://data.microbiomedata.org/1777_118770/img_annotation/Ga0482144_ko.tsv", + "md5_checksum": "7d69d28f4abec72a7ad66411312c37fb", + "file_size_bytes": 3385, + "id": "nmdc:7d69d28f4abec72a7ad66411312c37fb", + "name": "gold:Gp0153825_KO TSV File", + "data_object_type": "Annotation KEGG Orthology", + "type": "nmdc:DataObject" + }, + { + "_id": { + "$oid": "649b003f1ae706d7b5b16c5b" + }, + "description": "Protein FAA for gold:Gp0153825", + "url": "https://data.microbiomedata.org/1777_118770/img_annotation/Ga0482144_proteins.faa", + "md5_checksum": "a79973ef9a0c96d13fa19b2725b21d17", + "file_size_bytes": 3385, + "id": "nmdc:a79973ef9a0c96d13fa19b2725b21d17", + "name": "gold:Gp0153825_Protein FAA", + "data_object_type": "Annotation Amino Acid FASTA", + "type": "nmdc:DataObject" + }, + { + "_id": { + "$oid": "649b003f1ae706d7b5b16c68" + }, + "description": "Structural annotation GFF file for gold:Gp0153825", + "url": "https://data.microbiomedata.org/1777_118770/img_annotation/Ga0482144_structural_annotation.gff", + "md5_checksum": "1055b8fab0f63a1e56312813f47897ec", + "file_size_bytes": 3385, + "id": "nmdc:1055b8fab0f63a1e56312813f47897ec", + "name": "gold:Gp0153825_Structural annotation GFF file", + "data_object_type": "Structural Annotation GFF", + "type": "nmdc:DataObject" + } + ], + "mags_activity_set": [ + { + "_id": { + "$oid": "649b0052ec087f6bbab346eb" + }, + "has_input": [ + "nmdc:b1a02f313d0deacb3698f8d72680e91c", + "nmdc:8da8a1735e49708827f4b10a40bde0c2", + "nmdc:eb70ce45557a854bc46edd1eb0b36300" + ], + "too_short_contig_num": 1350, + "part_of": [ + "nmdc:mga0t64728" + ], + "binned_contig_num": 39, + "git_url": "https://github.com/microbiomedata/metaG/releases/tag/0.1", + "has_output": [ + "nmdc:eaee4528131cebe229ddf8da1eb2e50e", + "nmdc:8906ae85f1184d4e212440d01080089b" + ], + "was_informed_by": "gold:Gp0153825", + "input_contig_num": 1450, + "id": "nmdc:2b6aa51bbeabec960c878b011e09836b", + "execution_resource": "NERSC-Cori", + "name": "MAGs Analysis Activity for nmdc:mga0t64728", + "mags_list": [ + { + "number_of_contig": 39, + "completeness": 75.86, + "bin_name": "bins.1", + "gene_count": 2129, + "bin_quality": "MQ", + "gtdbtk_species": "Halanaerobium congolense", + "gtdbtk_order": "Halanaerobiales", + "num_16s": 0, + "gtdbtk_family": "Halanaerobiaceae", + "gtdbtk_domain": "Bacteria", + "contamination": 1.72, + "gtdbtk_class": "Halanaerobiia", + "gtdbtk_phylum": "Firmicutes_F", + "num_5s": 4, + "num_23s": 0, + "gtdbtk_genus": "Halanaerobium", + "num_t_rna": 50 + } + ], + "unbinned_contig_num": 61, + "started_at_time": "2021-12-01T21:21:54Z", + "type": "nmdc:MAGsAnalysisActivity", + "ended_at_time": "2021-12-02T21:03:45+00:00" + } + ], + "metagenome_annotation_activity_set": [ + { + "_id": { + "$oid": "649b005bbf2caae0415ef98a" + }, + "has_input": [ + "nmdc:b1a02f313d0deacb3698f8d72680e91c" + ], + "part_of": [ + "nmdc:mga0t64728" + ], + "git_url": "https://github.com/microbiomedata/metaG/releases/tag/0.1", + "has_output": [ + "nmdc:a882f6c6fe3b1e5526465311b574ce63", + "nmdc:bd668c202635dda1fe70d9807f3e3c32", + "nmdc:eb70ce45557a854bc46edd1eb0b36300", + "nmdc:962718857055cbd134b22274d4ea224f", + "nmdc:a2f9d370c2395f31cc0bb99a5ec57eaa", + "nmdc:1f8e76e5a46aafa7e17a34758e1c1715", + "nmdc:c6c9c61be127a0c0d4ca78f5eb56e93f", + "nmdc:eb77752d55321979f5d9522fa3a313dd", + "nmdc:8509d9f39a145cec3b881caa3a0490f9", + "nmdc:514f657faad64828b05a510474427cc2", + "nmdc:eb71218dd2153978f0873f5e3f911c0c", + "nmdc:931ce7602c4d2e405893584ec8cd2729" + ], + "was_informed_by": "gold:Gp0153825", + "id": "nmdc:2b6aa51bbeabec960c878b011e09836b", + "execution_resource": "NERSC-Cori", + "name": "Annotation Activity for nmdc:mga0t64728", + "started_at_time": "2021-12-01T21:21:54Z", + "type": "nmdc:MetagenomeAnnotationActivity", + "ended_at_time": "2021-12-02T21:03:45+00:00" + } + ], + "metagenome_assembly_set": [ + { + "_id": { + "$oid": "649b005f2ca5ee4adb139f7b" + }, + "has_input": [ + "nmdc:02f0bfce391292bffdc3fb0ede465fe9" + ], + "part_of": [ + "nmdc:mga0t64728" + ], + "ctg_logsum": 43626, + "scaf_logsum": 43800, + "gap_pct": 0.01259, + "git_url": "https://github.com/microbiomedata/metaG/releases/tag/0.1", + "has_output": [ + "nmdc:b1a02f313d0deacb3698f8d72680e91c", + "nmdc:8a6320f759b630b7e17c1d80ae30583f", + "nmdc:20ae7270d9f89e5eda4f5c346d7cd247", + "nmdc:39dd1dd711567e66cc7aaaa755d7a248", + "nmdc:8da8a1735e49708827f4b10a40bde0c2" + ], + "asm_score": 42.42, + "was_informed_by": "gold:Gp0153825", + "ctg_powsum": 7770.154, + "scaf_max": 191993, + "id": "nmdc:2b6aa51bbeabec960c878b011e09836b", + "scaf_powsum": 7863.356, + "execution_resource": "NERSC-Cori", + "contigs": 1451, + "name": "Assembly Activity for nmdc:mga0t64728", + "ctg_max": 191993, + "gc_std": 0.08631, + "gc_avg": 0.3309, + "contig_bp": 3177017, + "started_at_time": "2021-12-01T21:21:54Z", + "scaf_bp": 3177417, + "type": "nmdc:MetagenomeAssembly", + "scaffolds": 1447, + "ended_at_time": "2021-12-02T21:03:45+00:00", + "ctg_l50": 55037, + "ctg_l90": 317, + "ctg_n50": 17, + "ctg_n90": 368, + "scaf_l50": 56195, + "scaf_l90": 317, + "scaf_n50": 16, + "scaf_n90": 365, + "scaf_l_gt50k": 1771181, + "scaf_n_gt50k": 19, + "scaf_pct_gt50k": 55.742794 + } + ], + "omics_processing_set": [ + { + "_id": { + "$oid": "649b009773e8249959349b7a" + }, + "id": "nmdc:omprc-11-fgdnjq59", + "name": "Lab enriched deep subsurface shale carbon reservoir microbial communities from Ohio, USA - WG8 viral induction", + "description": "Metagenome sequencing to improve our understanding of microbial diversity and metabolism in deep subsurface shale carbon reservoirs", + "has_input": [ + "nmdc:bsm-11-5hhxdt51" + ], + "has_output": [ + "jgi:57be41397ded5e0c87137bca" + ], + "part_of": [ + "nmdc:sty-11-8fb6t785" + ], + "add_date": "2016-07-11", + "mod_date": "2021-06-15", + "ncbi_project_name": "Deep subsurface shale carbon reservoir microbial communities from Ohio, USA - WG8 viral induction", + "omics_type": { + "has_raw_value": "Metagenome" + }, + "principal_investigator": { + "has_raw_value": "Kelly Wrighton" + }, + "processing_institution": "JGI", + "type": "nmdc:OmicsProcessing", + "gold_sequencing_project_identifiers": [ + "gold:Gp0153825" + ] + } + ], + "read_qc_analysis_activity_set": [ + { + "_id": { + "$oid": "649b009d6bdd4fd20273c849" + }, + "has_input": [ + "nmdc:b5cf29afb875f40bda3d6a3ce3c7ae3e" + ], + "part_of": [ + "nmdc:mga0t64728" + ], + "git_url": "https://github.com/microbiomedata/metaG/releases/tag/0.1", + "has_output": [ + "nmdc:02f0bfce391292bffdc3fb0ede465fe9", + "nmdc:611e94aab854d6b45180b9b673412931" + ], + "was_informed_by": "gold:Gp0153825", + "input_read_count": 130593486, + "output_read_bases": 19258590273, + "id": "nmdc:2b6aa51bbeabec960c878b011e09836b", + "execution_resource": "NERSC-Cori", + "input_read_bases": 19719616386, + "name": "Read QC Activity for nmdc:mga0t64728", + "output_read_count": 128807482, + "started_at_time": "2021-12-01T21:21:54Z", + "type": "nmdc:ReadQCAnalysisActivity", + "ended_at_time": "2021-12-02T21:03:45+00:00" + } + ], + "read_based_taxonomy_analysis_activity_set": [ + { + "_id": { + "$oid": "649b009bff710ae353f8cf09" + }, + "has_input": [ + "nmdc:02f0bfce391292bffdc3fb0ede465fe9" + ], + "git_url": "https://github.com/microbiomedata/metaG/releases/tag/0.1", + "has_output": [ + "nmdc:79fac15db82a5de044af7ab4f1b31a0e", + "nmdc:e9ebfe8a61aa67e14c9018ed39f75e06", + "nmdc:6d17eebb0556fee2fbeb3e44efc1c917", + "nmdc:96dddb7a2c71faf71b18d84e8bc85746", + "nmdc:0be6b57ed1f51413d251c49090442fdd", + "nmdc:3efa21710c728332bd1d139190321703", + "nmdc:e11972119e273e1e29803b7261511c37", + "nmdc:e99695e0b4b6a09ab54c16d1529f0a63", + "nmdc:fe4c1a2df3f76da9634250d52b18dc9a" + ], + "was_informed_by": "gold:Gp0153825", + "id": "nmdc:2b6aa51bbeabec960c878b011e09836b", + "execution_resource": "NERSC-Cori", + "name": "ReadBased Analysis Activity for nmdc:mga0t64728", + "started_at_time": "2021-12-01T21:21:54Z", + "type": "nmdc:ReadbasedAnalysis", + "ended_at_time": "2021-12-02T21:03:45+00:00" + } + ] + }, + { + "data_object_set": [ + { + "description": "Raw sequencer read data", + "file_size_bytes": 2939226135, + "type": "nmdc:DataObject", + "id": "jgi:560df5b70d878540fd6fe1ff", + "name": "9567.8.137569.TCCTGAG-CTCTCTA.fastq.gz" + }, + { + "name": "Gp0119867_Filtered Reads", + "description": "Filtered Reads for Gp0119867", + "data_object_type": "Filtered Sequencing Reads", + "url": "https://data.microbiomedata.org/data/nmdc:mga0pdcf20/qa/nmdc_mga0pdcf20_filtered.fastq.gz", + "md5_checksum": "9f76849dfee6201591d9e4f97b1e5601", + "id": "nmdc:9f76849dfee6201591d9e4f97b1e5601", + "file_size_bytes": 2039534129 + }, + { + "name": "Gp0119867_Filtered Stats", + "description": "Filtered Stats for Gp0119867", + "data_object_type": "QC Statistics", + "url": "https://data.microbiomedata.org/data/nmdc:mga0pdcf20/qa/nmdc_mga0pdcf20_filterStats.txt", + "md5_checksum": "1ca7ab14098e5484e7edced918100c15", + "id": "nmdc:1ca7ab14098e5484e7edced918100c15", + "file_size_bytes": 284 + }, + { + "name": "Gp0119867_Gottcha2 TSV report", + "description": "Gottcha2 TSV report for Gp0119867", + "url": "https://data.microbiomedata.org/data/nmdc:mga0pdcf20/ReadbasedAnalysis/nmdc_mga0pdcf20_gottcha2_report.tsv", + "md5_checksum": "efed1192940d7d794c348b2fb5254b5f", + "id": "nmdc:efed1192940d7d794c348b2fb5254b5f", + "file_size_bytes": 20306 + }, + { + "name": "Gp0119867_Gottcha2 full TSV report", + "description": "Gottcha2 full TSV report for Gp0119867", + "url": "https://data.microbiomedata.org/data/nmdc:mga0pdcf20/ReadbasedAnalysis/nmdc_mga0pdcf20_gottcha2_report_full.tsv", + "md5_checksum": "ccabc2c1f15f4cf457557695c16b62c9", + "id": "nmdc:ccabc2c1f15f4cf457557695c16b62c9", + "file_size_bytes": 789563 + }, + { + "name": "Gp0119867_Gottcha2 Krona HTML report", + "description": "Gottcha2 Krona HTML report for Gp0119867", + "data_object_type": "GOTTCHA2 Krona Plot", + "url": "https://data.microbiomedata.org/data/nmdc:mga0pdcf20/ReadbasedAnalysis/nmdc_mga0pdcf20_gottcha2_krona.html", + "md5_checksum": "925f6bba420531e83285198bffd06edf", + "id": "nmdc:925f6bba420531e83285198bffd06edf", + "file_size_bytes": 285960 + }, + { + "name": "Gp0119867_Centrifuge classification TSV report", + "description": "Centrifuge classification TSV report for Gp0119867", + "data_object_type": "Centrifuge Taxonomic Classification", + "url": "https://data.microbiomedata.org/data/nmdc:mga0pdcf20/ReadbasedAnalysis/nmdc_mga0pdcf20_centrifuge_classification.tsv", + "md5_checksum": "a6d5c4d0616134d6066eae0d90b93a50", + "id": "nmdc:a6d5c4d0616134d6066eae0d90b93a50", + "file_size_bytes": 2116776551 + }, + { + "name": "Gp0119867_Centrifuge TSV report", + "description": "Centrifuge TSV report for Gp0119867", + "data_object_type": "Centrifuge Classification Report", + "url": "https://data.microbiomedata.org/data/nmdc:mga0pdcf20/ReadbasedAnalysis/nmdc_mga0pdcf20_centrifuge_report.tsv", + "md5_checksum": "479e9174f330c6016382ab18e9c9d644", + "id": "nmdc:479e9174f330c6016382ab18e9c9d644", + "file_size_bytes": 257245 + }, + { + "name": "Gp0119867_Centrifuge Krona HTML report", + "description": "Centrifuge Krona HTML report for Gp0119867", + "data_object_type": "Centrifuge Krona Plot", + "url": "https://data.microbiomedata.org/data/nmdc:mga0pdcf20/ReadbasedAnalysis/nmdc_mga0pdcf20_centrifuge_krona.html", + "md5_checksum": "f1a4bca4e16853feb276de24db77ecd4", + "id": "nmdc:f1a4bca4e16853feb276de24db77ecd4", + "file_size_bytes": 2327920 + }, + { + "name": "Gp0119867_Kraken classification TSV report", + "description": "Kraken classification TSV report for Gp0119867", + "data_object_type": "Kraken2 Taxonomic Classification", + "url": "https://data.microbiomedata.org/data/nmdc:mga0pdcf20/ReadbasedAnalysis/nmdc_mga0pdcf20_kraken2_classification.tsv", + "md5_checksum": "cbea85cd59798a6d231fb8fffffa60fc", + "id": "nmdc:cbea85cd59798a6d231fb8fffffa60fc", + "file_size_bytes": 1851902063 + }, + { + "name": "Gp0119867_Kraken2 TSV report", + "description": "Kraken2 TSV report for Gp0119867", + "data_object_type": "Kraken2 Classification Report", + "url": "https://data.microbiomedata.org/data/nmdc:mga0pdcf20/ReadbasedAnalysis/nmdc_mga0pdcf20_kraken2_report.tsv", + "md5_checksum": "75101ddd772a807aeca554363347b81b", + "id": "nmdc:75101ddd772a807aeca554363347b81b", + "file_size_bytes": 611878 + }, + { + "name": "Gp0119867_Kraken2 Krona HTML report", + "description": "Kraken2 Krona HTML report for Gp0119867", + "data_object_type": "Kraken2 Krona Plot", + "url": "https://data.microbiomedata.org/data/nmdc:mga0pdcf20/ReadbasedAnalysis/nmdc_mga0pdcf20_kraken2_krona.html", + "md5_checksum": "6667e15f9ef824b772f3f428b9f19388", + "id": "nmdc:6667e15f9ef824b772f3f428b9f19388", + "file_size_bytes": 3811046 + }, + { + "name": "Gp0119867_Assembled contigs fasta", + "description": "Assembled contigs fasta for Gp0119867", + "data_object_type": "Assembly Contigs", + "url": "https://data.microbiomedata.org/data/nmdc:mga0pdcf20/assembly/nmdc_mga0pdcf20_contigs.fna", + "md5_checksum": "2cb0c4b36cf19611d4eaef5f36318fe7", + "id": "nmdc:2cb0c4b36cf19611d4eaef5f36318fe7", + "file_size_bytes": 226765509 + }, + { + "name": "Gp0119867_Assembled scaffold fasta", + "description": "Assembled scaffold fasta for Gp0119867", + "data_object_type": "Assembly Scaffolds", + "url": "https://data.microbiomedata.org/data/nmdc:mga0pdcf20/assembly/nmdc_mga0pdcf20_scaffolds.fna", + "md5_checksum": "87f32d5751e7418852eaa0bdce88b520", + "id": "nmdc:87f32d5751e7418852eaa0bdce88b520", + "file_size_bytes": 225851967 + }, + { + "name": "Gp0119867_Metagenome Contig Coverage Stats", + "description": "Metagenome Contig Coverage Stats for Gp0119867", + "url": "https://data.microbiomedata.org/data/nmdc:mga0pdcf20/assembly/nmdc_mga0pdcf20_covstats.txt", + "md5_checksum": "3af4f79ba558f9e4740dca5ac82bfb73", + "id": "nmdc:3af4f79ba558f9e4740dca5ac82bfb73", + "file_size_bytes": 23262951 + }, + { + "name": "Gp0119867_Assembled AGP file", + "description": "Assembled AGP file for Gp0119867", + "data_object_type": "Assembly AGP", + "url": "https://data.microbiomedata.org/data/nmdc:mga0pdcf20/assembly/nmdc_mga0pdcf20_assembly.agp", + "md5_checksum": "bd0e70488d70efefed25ae7603b513b5", + "id": "nmdc:bd0e70488d70efefed25ae7603b513b5", + "file_size_bytes": 21835632 + }, + { + "name": "Gp0119867_Metagenome Alignment BAM file", + "description": "Metagenome Alignment BAM file for Gp0119867", + "data_object_type": "Assembly Coverage BAM", + "url": "https://data.microbiomedata.org/data/nmdc:mga0pdcf20/assembly/nmdc_mga0pdcf20_pairedMapped_sorted.bam", + "md5_checksum": "f8dc145afa51127bb13cc568855875b9", + "id": "nmdc:f8dc145afa51127bb13cc568855875b9", + "file_size_bytes": 2500360063 + }, + { + "name": "Gp0119867_Protein FAA", + "description": "Protein FAA for Gp0119867", + "data_object_type": "Annotation Amino Acid FASTA", + "url": "https://data.microbiomedata.org/data/nmdc:mga0pdcf20/annotation/nmdc_mga0pdcf20_proteins.faa", + "md5_checksum": "e77fcea6fad6513fa189cb8bfe513745", + "id": "nmdc:e77fcea6fad6513fa189cb8bfe513745", + "file_size_bytes": 121017424 + }, + { + "name": "Gp0119867_Structural annotation GFF file", + "description": "Structural annotation GFF file for Gp0119867", + "data_object_type": "Structural Annotation GFF", + "url": "https://data.microbiomedata.org/data/nmdc:mga0pdcf20/annotation/nmdc_mga0pdcf20_structural_annotation.gff", + "md5_checksum": "23b1167d33f4bc7d20f06b3f52f6d110", + "id": "nmdc:23b1167d33f4bc7d20f06b3f52f6d110", + "file_size_bytes": 2536 + }, + { + "name": "Gp0119867_Functional annotation GFF file", + "description": "Functional annotation GFF file for Gp0119867", + "data_object_type": "Functional Annotation GFF", + "url": "https://data.microbiomedata.org/data/nmdc:mga0pdcf20/annotation/nmdc_mga0pdcf20_functional_annotation.gff", + "md5_checksum": "291c2e5e3e4129c1d29a051e884b515b", + "id": "nmdc:291c2e5e3e4129c1d29a051e884b515b", + "file_size_bytes": 120429028 + }, + { + "name": "Gp0119867_KO TSV file", + "description": "KO TSV file for Gp0119867", + "data_object_type": "Annotation KEGG Orthology", + "url": "https://data.microbiomedata.org/data/nmdc:mga0pdcf20/annotation/nmdc_mga0pdcf20_ko.tsv", + "md5_checksum": "fa2e612c33c6eeb41bb8d2ce1cddbdd7", + "id": "nmdc:fa2e612c33c6eeb41bb8d2ce1cddbdd7", + "file_size_bytes": 17770886 + }, + { + "name": "Gp0119867_EC TSV file", + "description": "EC TSV file for Gp0119867", + "data_object_type": "Annotation Enzyme Commission", + "url": "https://data.microbiomedata.org/data/nmdc:mga0pdcf20/annotation/nmdc_mga0pdcf20_ec.tsv", + "md5_checksum": "602bc048f491ce8bc3b5ee2fd571df68", + "id": "nmdc:602bc048f491ce8bc3b5ee2fd571df68", + "file_size_bytes": 12704985 + }, + { + "name": "Gp0119867_COG GFF file", + "description": "COG GFF file for Gp0119867", + "url": "https://data.microbiomedata.org/data/nmdc:mga0pdcf20/annotation/nmdc_mga0pdcf20_cog.gff", + "md5_checksum": "18db119a96eacbb8f877be4e41c44768", + "id": "nmdc:18db119a96eacbb8f877be4e41c44768", + "file_size_bytes": 72778015 + }, + { + "name": "Gp0119867_PFAM GFF file", + "description": "PFAM GFF file for Gp0119867", + "url": "https://data.microbiomedata.org/data/nmdc:mga0pdcf20/annotation/nmdc_mga0pdcf20_pfam.gff", + "md5_checksum": "bb7a287b3075de92b96504518596e537", + "id": "nmdc:bb7a287b3075de92b96504518596e537", + "file_size_bytes": 63043647 + }, + { + "name": "Gp0119867_TigrFam GFF file", + "description": "TigrFam GFF file for Gp0119867", + "url": "https://data.microbiomedata.org/data/nmdc:mga0pdcf20/annotation/nmdc_mga0pdcf20_tigrfam.gff", + "md5_checksum": "b1a21a32d5c814bbb7281b4fb9c0012c", + "id": "nmdc:b1a21a32d5c814bbb7281b4fb9c0012c", + "file_size_bytes": 10974908 + }, + { + "name": "Gp0119867_SMART GFF file", + "description": "SMART GFF file for Gp0119867", + "url": "https://data.microbiomedata.org/data/nmdc:mga0pdcf20/annotation/nmdc_mga0pdcf20_smart.gff", + "md5_checksum": "231a70ea7ab9fcb6d53db92f3da63ccd", + "id": "nmdc:231a70ea7ab9fcb6d53db92f3da63ccd", + "file_size_bytes": 16216746 + }, + { + "name": "Gp0119867_SuperFam GFF file", + "description": "SuperFam GFF file for Gp0119867", + "url": "https://data.microbiomedata.org/data/nmdc:mga0pdcf20/annotation/nmdc_mga0pdcf20_supfam.gff", + "md5_checksum": "d7cf03f03618b16bb353d2c7a00ac154", + "id": "nmdc:d7cf03f03618b16bb353d2c7a00ac154", + "file_size_bytes": 92273317 + }, + { + "name": "Gp0119867_Cath FunFam GFF file", + "description": "Cath FunFam GFF file for Gp0119867", + "url": "https://data.microbiomedata.org/data/nmdc:mga0pdcf20/annotation/nmdc_mga0pdcf20_cath_funfam.gff", + "md5_checksum": "79dcba2c535e44dc34b36e3f1e39a750", + "id": "nmdc:79dcba2c535e44dc34b36e3f1e39a750", + "file_size_bytes": 80750444 + }, + { + "name": "Gp0119867_KO_EC GFF file", + "description": "KO_EC GFF file for Gp0119867", + "url": "https://data.microbiomedata.org/data/nmdc:mga0pdcf20/annotation/nmdc_mga0pdcf20_ko_ec.gff", + "md5_checksum": "abb912ad792ec018d9af5576e36da8e9", + "id": "nmdc:abb912ad792ec018d9af5576e36da8e9", + "file_size_bytes": 56679831 + }, + { + "name": "gold:Gp0452679_metabat2 bin checkm quality assessment result", + "description": "metabat2 bin checkm quality assessment result for gold:Gp0452679", + "data_object_type": "CheckM Statistics", + "url": "https://data.microbiomedata.org/data/nmdc:mga0fsjy84/MAGs/nmdc_mga0fsjy84_checkm_qa.out", + "md5_checksum": "d41d8cd98f00b204e9800998ecf8427e", + "id": "nmdc:d41d8cd98f00b204e9800998ecf8427e", + "file_size_bytes": 0 + }, + { + "name": "Gp0119867_tooShort (< 3kb) filtered contigs fasta file by metaBat2", + "description": "tooShort (< 3kb) filtered contigs fasta file by metaBat2 for Gp0119867", + "url": "https://data.microbiomedata.org/data/nmdc:mga0pdcf20/MAGs/nmdc_mga0pdcf20_bins.tooShort.fa", + "md5_checksum": "5c39b38fc9a0898c8223142edcf534c3", + "id": "nmdc:5c39b38fc9a0898c8223142edcf534c3", + "file_size_bytes": 117838325 + }, + { + "name": "Gp0119867_unbinned fasta file from metabat2", + "description": "unbinned fasta file from metabat2 for Gp0119867", + "url": "https://data.microbiomedata.org/data/nmdc:mga0pdcf20/MAGs/nmdc_mga0pdcf20_bins.unbinned.fa", + "md5_checksum": "4e9a010790fcf050219ad3382479d683", + "id": "nmdc:4e9a010790fcf050219ad3382479d683", + "file_size_bytes": 74813513 + }, + { + "name": "Gp0119867_metabat2 bin checkm quality assessment result", + "description": "metabat2 bin checkm quality assessment result for Gp0119867", + "data_object_type": "CheckM Statistics", + "url": "https://data.microbiomedata.org/data/nmdc:mga0pdcf20/MAGs/nmdc_mga0pdcf20_checkm_qa.out", + "md5_checksum": "b3a8fbbaaf4fc08627144ce88cd98d0d", + "id": "nmdc:b3a8fbbaaf4fc08627144ce88cd98d0d", + "file_size_bytes": 7353 + }, + { + "name": "Gp0119867_high-quality and medium-quality bins", + "description": "high-quality and medium-quality bins for Gp0119867", + "data_object_type": "Metagenome Bins", + "url": "https://data.microbiomedata.org/data/nmdc:mga0pdcf20/MAGs/nmdc_mga0pdcf20_hqmq_bin.zip", + "md5_checksum": "d6c5e04125ba6a6b9c78cd16d71aafdb", + "id": "nmdc:d6c5e04125ba6a6b9c78cd16d71aafdb", + "file_size_bytes": 3641395 + }, + { + "name": "Gp0119867_metabat2 bins", + "description": "metabat2 bins for Gp0119867", + "url": "https://data.microbiomedata.org/data/nmdc:mga0pdcf20/MAGs/nmdc_mga0pdcf20_metabat_bin.zip", + "md5_checksum": "08f765d3d2726deea2b9d42cbf1b7f7d", + "id": "nmdc:08f765d3d2726deea2b9d42cbf1b7f7d", + "file_size_bytes": 6742889 + }, + { + "_id": { + "$oid": "649b003d1ae706d7b5b14e29" + }, + "description": "Assembled contigs fasta for gold:Gp0119867", + "url": "https://data.microbiomedata.org/data/1777_95836/assembly/assembly_contigs.fna", + "file_size_bytes": 225321304, + "type": "nmdc:DataObject", + "id": "nmdc:203d4e9e3d7999df5978a77217526d42", + "name": "assembly_contigs.fna", + "data_object_type": "Assembly Contigs" + }, + { + "_id": { + "$oid": "649b003d1ae706d7b5b14e2b" + }, + "description": "Assembled scaffold fasta for gold:Gp0119867", + "url": "https://data.microbiomedata.org/data/1777_95836/assembly/assembly_scaffolds.fna", + "file_size_bytes": 224426447, + "type": "nmdc:DataObject", + "id": "nmdc:7a932747640c6c634ac543f0cddcc7b2", + "name": "assembly_scaffolds.fna", + "data_object_type": "Assembly Scaffolds" + }, + { + "_id": { + "$oid": "649b003d1ae706d7b5b14e2d" + }, + "description": "Assembled AGP file for gold:Gp0119867", + "url": "https://data.microbiomedata.org/data/1777_95836/assembly/assembly.agp", + "file_size_bytes": 18908372, + "type": "nmdc:DataObject", + "id": "nmdc:98f05214a90525072f0adef89d1635f3", + "name": "assembly.agp", + "data_object_type": "Assembly AGP" + }, + { + "_id": { + "$oid": "649b003d1ae706d7b5b14e2f" + }, + "description": "Metagenome Alignment BAM file for gold:Gp0119867", + "url": "https://data.microbiomedata.org/data/1777_95836/assembly/pairedMapped_sorted.bam", + "file_size_bytes": 2476051879, + "type": "nmdc:DataObject", + "id": "nmdc:3c9f7c4012779bf6b6f5908f635430c0", + "name": "pairedMapped_sorted.bam", + "data_object_type": "Assembly Coverage BAM" + }, + { + "_id": { + "$oid": "649b003d1ae706d7b5b14e30" + }, + "description": "Metagenome Contig Coverage Stats for gold:Gp0119867", + "url": "https://data.microbiomedata.org/data/1777_95836/assembly/mapping_stats.txt", + "file_size_bytes": 21818746, + "type": "nmdc:DataObject", + "id": "nmdc:f4cb6f0f4d4430fca83a54cc43d7ec78", + "name": "mapping_stats.txt", + "data_object_type": "Assembly Coverage Stats" + }, + { + "_id": { + "$oid": "649b003d1ae706d7b5b15ab7" + }, + "id": "nmdc:627cddc6b482af573cf356a902af5d64", + "name": "1777_95836.json", + "description": "Gold:Gp0119867 ReadbasedAnalysis result JSON file", + "url": "https://data.microbiomedata.org/1777_95836/ReadbasedAnalysis/1777_95836.json", + "file_size_bytes": 3385, + "type": "nmdc:DataObject" + }, + { + "_id": { + "$oid": "649b003d1ae706d7b5b15abf" + }, + "id": "nmdc:fe68a61cce4d78fc14ca93ff42ce1e6c", + "name": "1777_95836.krona.html", + "description": "Gold:Gp0119867 KRONA plot HTML file", + "url": "https://data.microbiomedata.org/1777_95836/ReadbasedAnalysis/centrifuge/1777_95836.krona.html", + "file_size_bytes": 3385, + "data_object_type": "Centrifuge Krona Plot", + "type": "nmdc:DataObject" + }, + { + "_id": { + "$oid": "649b003f1ae706d7b5b16434" + }, + "id": "nmdc:ba4390b10ded1aed81acdd38a9c3b1b6", + "name": "bins.tooShort.fa", + "description": "tooShort (< 3kb) filtered contigs fasta file by metaBat2 for gold:Gp0119867", + "file_size_bytes": 114739827, + "url": "https://data.microbiomedata.org/data/1777_95836/img_MAGs/bins.tooShort.fa", + "type": "nmdc:DataObject" + }, + { + "_id": { + "$oid": "649b003f1ae706d7b5b16436" + }, + "id": "nmdc:ecb69faeb1822abf8f7babba8e277424", + "name": "bins.unbinned.fa", + "description": "unbinned fasta file from metabat2 for gold:Gp0119867", + "file_size_bytes": 91714218, + "url": "https://data.microbiomedata.org/data/1777_95836/img_MAGs/bins.unbinned.fa", + "type": "nmdc:DataObject" + }, + { + "_id": { + "$oid": "649b003f1ae706d7b5b16437" + }, + "id": "nmdc:03955dcd50c8c2a500801401ab053d94", + "name": "checkm_qa.out", + "description": "metabat2 bin checkm quality assessment result for gold:Gp0119867", + "file_size_bytes": 6630, + "url": "https://data.microbiomedata.org/data/1777_95836/img_MAGs/checkm_qa.out", + "type": "nmdc:DataObject", + "data_object_type": "CheckM Statistics" + }, + { + "_id": { + "$oid": "649b003f1ae706d7b5b16438" + }, + "id": "nmdc:8a1e858053c11db9c0f5da63779c1564", + "name": "gtdbtk.bac120.summary.tsv", + "description": "gtdbtk bacterial assignment result summary table for gold:Gp0119867", + "file_size_bytes": 1304, + "url": "https://data.microbiomedata.org/data/1777_95836/img_MAGs/gtdbtk_output/classify/gtdbtk.bac120.summary.tsv", + "type": "nmdc:DataObject" + }, + { + "_id": { + "$oid": "649b003f1ae706d7b5b16439" + }, + "id": "nmdc:0f48e2d743585a0e287b1f8831f728cb", + "name": "gold:Gp0119867.bins.23.fa", + "description": "metabat2 binned contig file for gold:Gp0119867", + "file_size_bytes": 256723, + "url": "https://data.microbiomedata.org/data/1777_95836/img_MAGs/metabat-bins/bins.23.fa", + "type": "nmdc:DataObject", + "data_object_type": "Metagenome Bins" + }, + { + "_id": { + "$oid": "649b003f1ae706d7b5b1643a" + }, + "id": "nmdc:11d2f03ceb103418eea393a045f442a1", + "name": "gold:Gp0119867.bins.27.fa", + "description": "metabat2 binned contig file for gold:Gp0119867", + "file_size_bytes": 2132515, + "url": "https://data.microbiomedata.org/data/1777_95836/img_MAGs/metabat-bins/bins.27.fa", + "type": "nmdc:DataObject", + "data_object_type": "Metagenome Bins" + }, + { + "_id": { + "$oid": "649b003f1ae706d7b5b1643b" + }, + "id": "nmdc:324a060a8404925875db0dfda0cbcc59", + "name": "gold:Gp0119867.bins.18.fa", + "description": "metabat2 binned contig file for gold:Gp0119867", + "file_size_bytes": 274724, + "url": "https://data.microbiomedata.org/data/1777_95836/img_MAGs/metabat-bins/bins.18.fa", + "type": "nmdc:DataObject", + "data_object_type": "Metagenome Bins" + }, + { + "_id": { + "$oid": "649b003f1ae706d7b5b1643c" + }, + "id": "nmdc:6d922a5cc073356dc2268f59e58ddaac", + "name": "gold:Gp0119867.bins.9.fa", + "description": "metabat2 binned contig file for gold:Gp0119867", + "file_size_bytes": 222043, + "url": "https://data.microbiomedata.org/data/1777_95836/img_MAGs/metabat-bins/bins.9.fa", + "type": "nmdc:DataObject", + "data_object_type": "Metagenome Bins" + }, + { + "_id": { + "$oid": "649b003f1ae706d7b5b1643d" + }, + "id": "nmdc:0aba6d167620ce2060205eb829c6187e", + "name": "gold:Gp0119867.bins.35.fa", + "description": "metabat2 binned contig file for gold:Gp0119867", + "file_size_bytes": 285410, + "url": "https://data.microbiomedata.org/data/1777_95836/img_MAGs/metabat-bins/bins.35.fa", + "type": "nmdc:DataObject", + "data_object_type": "Metagenome Bins" + }, + { + "_id": { + "$oid": "649b003f1ae706d7b5b1643e" + }, + "id": "nmdc:b9998e4c1557371828c9ef60b407b2f0", + "name": "gold:Gp0119867.bins.5.fa", + "description": "metabat2 binned contig file for gold:Gp0119867", + "file_size_bytes": 249347, + "url": "https://data.microbiomedata.org/data/1777_95836/img_MAGs/metabat-bins/bins.5.fa", + "type": "nmdc:DataObject", + "data_object_type": "Metagenome Bins" + }, + { + "_id": { + "$oid": "649b003f1ae706d7b5b1643f" + }, + "id": "nmdc:352b1e6fb3dfeb261f15e90393702f13", + "name": "gold:Gp0119867.bins.28.fa", + "description": "metabat2 binned contig file for gold:Gp0119867", + "file_size_bytes": 288117, + "url": "https://data.microbiomedata.org/data/1777_95836/img_MAGs/metabat-bins/bins.28.fa", + "type": "nmdc:DataObject", + "data_object_type": "Metagenome Bins" + }, + { + "_id": { + "$oid": "649b003f1ae706d7b5b16440" + }, + "id": "nmdc:b3a81e353132434d98631ba3f61414d6", + "name": "gold:Gp0119867.bins.26.fa", + "description": "metabat2 binned contig file for gold:Gp0119867", + "file_size_bytes": 378877, + "url": "https://data.microbiomedata.org/data/1777_95836/img_MAGs/metabat-bins/bins.26.fa", + "type": "nmdc:DataObject", + "data_object_type": "Metagenome Bins" + }, + { + "_id": { + "$oid": "649b003f1ae706d7b5b16441" + }, + "id": "nmdc:214bda1321dad9b57dd00112f89766a3", + "name": "gold:Gp0119867.bins.29.fa", + "description": "metabat2 binned contig file for gold:Gp0119867", + "file_size_bytes": 226638, + "url": "https://data.microbiomedata.org/data/1777_95836/img_MAGs/metabat-bins/bins.29.fa", + "type": "nmdc:DataObject", + "data_object_type": "Metagenome Bins" + }, + { + "_id": { + "$oid": "649b003f1ae706d7b5b16442" + }, + "id": "nmdc:446f5989252f3513241a64effc321c5c", + "name": "gold:Gp0119867.bins.22.fa", + "description": "metabat2 binned contig file for gold:Gp0119867", + "file_size_bytes": 356680, + "url": "https://data.microbiomedata.org/data/1777_95836/img_MAGs/metabat-bins/bins.22.fa", + "type": "nmdc:DataObject", + "data_object_type": "Metagenome Bins" + }, + { + "_id": { + "$oid": "649b003f1ae706d7b5b16443" + }, + "id": "nmdc:c38ccd2b71d72c20e9d9abe502c622d5", + "name": "gold:Gp0119867.bins.25.fa", + "description": "metabat2 binned contig file for gold:Gp0119867", + "file_size_bytes": 381200, + "url": "https://data.microbiomedata.org/data/1777_95836/img_MAGs/metabat-bins/bins.25.fa", + "type": "nmdc:DataObject", + "data_object_type": "Metagenome Bins" + }, + { + "_id": { + "$oid": "649b003f1ae706d7b5b16444" + }, + "id": "nmdc:4b39474632409849ea766db2b4674393", + "name": "gold:Gp0119867.bins.32.fa", + "description": "metabat2 binned contig file for gold:Gp0119867", + "file_size_bytes": 315365, + "url": "https://data.microbiomedata.org/data/1777_95836/img_MAGs/metabat-bins/bins.32.fa", + "type": "nmdc:DataObject", + "data_object_type": "Metagenome Bins" + }, + { + "_id": { + "$oid": "649b003f1ae706d7b5b16445" + }, + "id": "nmdc:980b03bf41bd71a7f6d6b573c7678632", + "name": "gold:Gp0119867.bins.19.fa", + "description": "metabat2 binned contig file for gold:Gp0119867", + "file_size_bytes": 257754, + "url": "https://data.microbiomedata.org/data/1777_95836/img_MAGs/metabat-bins/bins.19.fa", + "type": "nmdc:DataObject", + "data_object_type": "Metagenome Bins" + }, + { + "_id": { + "$oid": "649b003f1ae706d7b5b16446" + }, + "id": "nmdc:08b987a2464b4cf73104800e1dca95a6", + "name": "gold:Gp0119867.bins.30.fa", + "description": "metabat2 binned contig file for gold:Gp0119867", + "file_size_bytes": 409787, + "url": "https://data.microbiomedata.org/data/1777_95836/img_MAGs/metabat-bins/bins.30.fa", + "type": "nmdc:DataObject", + "data_object_type": "Metagenome Bins" + }, + { + "_id": { + "$oid": "649b003f1ae706d7b5b16447" + }, + "id": "nmdc:a7c080cb7f1b12e923603f3f49a6c3bd", + "name": "gold:Gp0119867.bins.21.fa", + "description": "metabat2 binned contig file for gold:Gp0119867", + "file_size_bytes": 382015, + "url": "https://data.microbiomedata.org/data/1777_95836/img_MAGs/metabat-bins/bins.21.fa", + "type": "nmdc:DataObject", + "data_object_type": "Metagenome Bins" + }, + { + "_id": { + "$oid": "649b003f1ae706d7b5b16448" + }, + "id": "nmdc:6c93b54c0c557d97c2eec4a19e289d6b", + "name": "gold:Gp0119867.bins.3.fa", + "description": "metabat2 binned contig file for gold:Gp0119867", + "file_size_bytes": 262844, + "url": "https://data.microbiomedata.org/data/1777_95836/img_MAGs/metabat-bins/bins.3.fa", + "type": "nmdc:DataObject", + "data_object_type": "Metagenome Bins" + }, + { + "_id": { + "$oid": "649b003f1ae706d7b5b16449" + }, + "id": "nmdc:1538308c49853dcb9fcd2cffa92e11f7", + "name": "gold:Gp0119867.bins.11.fa", + "description": "metabat2 binned contig file for gold:Gp0119867", + "file_size_bytes": 255808, + "url": "https://data.microbiomedata.org/data/1777_95836/img_MAGs/metabat-bins/bins.11.fa", + "type": "nmdc:DataObject", + "data_object_type": "Metagenome Bins" + }, + { + "_id": { + "$oid": "649b003f1ae706d7b5b1644a" + }, + "id": "nmdc:86527e63ffdb6ef5f5277d3ac805374f", + "name": "gold:Gp0119867.bins.2.fa", + "description": "metabat2 binned contig file for gold:Gp0119867", + "file_size_bytes": 239544, + "url": "https://data.microbiomedata.org/data/1777_95836/img_MAGs/metabat-bins/bins.2.fa", + "type": "nmdc:DataObject", + "data_object_type": "Metagenome Bins" + }, + { + "_id": { + "$oid": "649b003f1ae706d7b5b1644b" + }, + "id": "nmdc:7e9d6488ac78f06394ba5133cd00ff68", + "name": "gold:Gp0119867.bins.24.fa", + "description": "metabat2 binned contig file for gold:Gp0119867", + "file_size_bytes": 513634, + "url": "https://data.microbiomedata.org/data/1777_95836/img_MAGs/metabat-bins/bins.24.fa", + "type": "nmdc:DataObject", + "data_object_type": "Metagenome Bins" + }, + { + "_id": { + "$oid": "649b003f1ae706d7b5b1644c" + }, + "id": "nmdc:33a7bbe3a576ce9be0caa8cf6cd1605b", + "name": "gold:Gp0119867.bins.34.fa", + "description": "metabat2 binned contig file for gold:Gp0119867", + "file_size_bytes": 369789, + "url": "https://data.microbiomedata.org/data/1777_95836/img_MAGs/metabat-bins/bins.34.fa", + "type": "nmdc:DataObject", + "data_object_type": "Metagenome Bins" + }, + { + "_id": { + "$oid": "649b003f1ae706d7b5b1644d" + }, + "id": "nmdc:42ff901fe37d7faa78493837d9e02c15", + "name": "gold:Gp0119867.bins.4.fa", + "description": "metabat2 binned contig file for gold:Gp0119867", + "file_size_bytes": 341493, + "url": "https://data.microbiomedata.org/data/1777_95836/img_MAGs/metabat-bins/bins.4.fa", + "type": "nmdc:DataObject", + "data_object_type": "Metagenome Bins" + }, + { + "_id": { + "$oid": "649b003f1ae706d7b5b1644e" + }, + "id": "nmdc:483111a7039d81fc5903b29b2785bfb5", + "name": "gold:Gp0119867.bins.7.fa", + "description": "metabat2 binned contig file for gold:Gp0119867", + "file_size_bytes": 227342, + "url": "https://data.microbiomedata.org/data/1777_95836/img_MAGs/metabat-bins/bins.7.fa", + "type": "nmdc:DataObject", + "data_object_type": "Metagenome Bins" + }, + { + "_id": { + "$oid": "649b003f1ae706d7b5b1644f" + }, + "id": "nmdc:09f31a089cd4707753fef5b2cdaf387c", + "name": "gold:Gp0119867.bins.12.fa", + "description": "metabat2 binned contig file for gold:Gp0119867", + "file_size_bytes": 287875, + "url": "https://data.microbiomedata.org/data/1777_95836/img_MAGs/metabat-bins/bins.12.fa", + "type": "nmdc:DataObject", + "data_object_type": "Metagenome Bins" + }, + { + "_id": { + "$oid": "649b003f1ae706d7b5b16450" + }, + "id": "nmdc:7ba24b216ffbaced8891e687e2b78cbe", + "name": "gold:Gp0119867.bins.20.fa", + "description": "metabat2 binned contig file for gold:Gp0119867", + "file_size_bytes": 490845, + "url": "https://data.microbiomedata.org/data/1777_95836/img_MAGs/metabat-bins/bins.20.fa", + "type": "nmdc:DataObject", + "data_object_type": "Metagenome Bins" + }, + { + "_id": { + "$oid": "649b003f1ae706d7b5b16451" + }, + "id": "nmdc:7a82670c8d653b51b658fbceb6df91a5", + "name": "gold:Gp0119867.bins.10.fa", + "description": "metabat2 binned contig file for gold:Gp0119867", + "file_size_bytes": 290101, + "url": "https://data.microbiomedata.org/data/1777_95836/img_MAGs/metabat-bins/bins.10.fa", + "type": "nmdc:DataObject", + "data_object_type": "Metagenome Bins" + }, + { + "_id": { + "$oid": "649b003f1ae706d7b5b16452" + }, + "id": "nmdc:7fba69edff917737b7ebfbb0335e2935", + "name": "gold:Gp0119867.bins.1.fa", + "description": "metabat2 binned contig file for gold:Gp0119867", + "file_size_bytes": 292085, + "url": "https://data.microbiomedata.org/data/1777_95836/img_MAGs/metabat-bins/bins.1.fa", + "type": "nmdc:DataObject", + "data_object_type": "Metagenome Bins" + }, + { + "_id": { + "$oid": "649b003f1ae706d7b5b16453" + }, + "id": "nmdc:f52508da499a084a30a2460391b0f2aa", + "name": "gold:Gp0119867.bins.13.fa", + "description": "metabat2 binned contig file for gold:Gp0119867", + "file_size_bytes": 1541771, + "url": "https://data.microbiomedata.org/data/1777_95836/img_MAGs/metabat-bins/bins.13.fa", + "type": "nmdc:DataObject", + "data_object_type": "Metagenome Bins" + }, + { + "_id": { + "$oid": "649b003f1ae706d7b5b16454" + }, + "id": "nmdc:81dfa85547b65e0265a562015d917259", + "name": "gold:Gp0119867.bins.8.fa", + "description": "metabat2 binned contig file for gold:Gp0119867", + "file_size_bytes": 2125055, + "url": "https://data.microbiomedata.org/data/1777_95836/img_MAGs/metabat-bins/bins.8.fa", + "type": "nmdc:DataObject", + "data_object_type": "Metagenome Bins" + }, + { + "_id": { + "$oid": "649b003f1ae706d7b5b16455" + }, + "id": "nmdc:a0b6bc6c0bed32c96ef97937b6e52034", + "name": "gold:Gp0119867.bins.16.fa", + "description": "metabat2 binned contig file for gold:Gp0119867", + "file_size_bytes": 476595, + "url": "https://data.microbiomedata.org/data/1777_95836/img_MAGs/metabat-bins/bins.16.fa", + "type": "nmdc:DataObject", + "data_object_type": "Metagenome Bins" + }, + { + "_id": { + "$oid": "649b003f1ae706d7b5b16456" + }, + "id": "nmdc:15d1a16e79b26cbe293378825d7d34ef", + "name": "gold:Gp0119867.bins.33.fa", + "description": "metabat2 binned contig file for gold:Gp0119867", + "file_size_bytes": 336884, + "url": "https://data.microbiomedata.org/data/1777_95836/img_MAGs/metabat-bins/bins.33.fa", + "type": "nmdc:DataObject", + "data_object_type": "Metagenome Bins" + }, + { + "_id": { + "$oid": "649b003f1ae706d7b5b16457" + }, + "id": "nmdc:43eea3f321a9b93ae094409379cab8db", + "name": "gold:Gp0119867.bins.17.fa", + "description": "metabat2 binned contig file for gold:Gp0119867", + "file_size_bytes": 599196, + "url": "https://data.microbiomedata.org/data/1777_95836/img_MAGs/metabat-bins/bins.17.fa", + "type": "nmdc:DataObject", + "data_object_type": "Metagenome Bins" + }, + { + "_id": { + "$oid": "649b003f1ae706d7b5b16458" + }, + "id": "nmdc:a76bb2d5a1faafe1dfb30762e602f526", + "name": "gold:Gp0119867.bins.6.fa", + "description": "metabat2 binned contig file for gold:Gp0119867", + "file_size_bytes": 565391, + "url": "https://data.microbiomedata.org/data/1777_95836/img_MAGs/metabat-bins/bins.6.fa", + "type": "nmdc:DataObject", + "data_object_type": "Metagenome Bins" + }, + { + "_id": { + "$oid": "649b003f1ae706d7b5b16459" + }, + "id": "nmdc:072506e6a317cb4425b54f8bf3a2d2a6", + "name": "gold:Gp0119867.bins.15.fa", + "description": "metabat2 binned contig file for gold:Gp0119867", + "file_size_bytes": 338314, + "url": "https://data.microbiomedata.org/data/1777_95836/img_MAGs/metabat-bins/bins.15.fa", + "type": "nmdc:DataObject", + "data_object_type": "Metagenome Bins" + }, + { + "_id": { + "$oid": "649b003f1ae706d7b5b1645c" + }, + "id": "nmdc:cec885c1b67f3e0daac20d671583e613", + "name": "gold:Gp0119867.bins.31.fa", + "description": "metabat2 binned contig file for gold:Gp0119867", + "file_size_bytes": 302264, + "url": "https://data.microbiomedata.org/data/1777_95836/img_MAGs/metabat-bins/bins.31.fa", + "type": "nmdc:DataObject", + "data_object_type": "Metagenome Bins" + }, + { + "_id": { + "$oid": "649b003f1ae706d7b5b1645f" + }, + "id": "nmdc:6d59d3e32690e01616e49f7c8f127da6", + "name": "gold:Gp0119867.bins.14.fa", + "description": "metabat2 binned contig file for gold:Gp0119867", + "file_size_bytes": 398497, + "url": "https://data.microbiomedata.org/data/1777_95836/img_MAGs/metabat-bins/bins.14.fa", + "type": "nmdc:DataObject", + "data_object_type": "Metagenome Bins" + }, + { + "_id": { + "$oid": "649b003f1ae706d7b5b16ca4" + }, + "description": "EC TSV File for gold:Gp0119867", + "url": "https://data.microbiomedata.org/1777_95836/img_annotation/Ga0482148_ec.tsv", + "md5_checksum": "e1b8a1bb51b7c8421ed748d3da31cab9", + "file_size_bytes": 3385, + "id": "nmdc:e1b8a1bb51b7c8421ed748d3da31cab9", + "name": "gold:Gp0119867_EC TSV File", + "data_object_type": "Annotation Enzyme Commission", + "type": "nmdc:DataObject" + }, + { + "_id": { + "$oid": "649b003f1ae706d7b5b16ca5" + }, + "description": "KO TSV File for gold:Gp0119867", + "url": "https://data.microbiomedata.org/1777_95836/img_annotation/Ga0482148_ko.tsv", + "md5_checksum": "e5a23d1c8e78d044bb907acb7490c223", + "file_size_bytes": 3385, + "id": "nmdc:e5a23d1c8e78d044bb907acb7490c223", + "name": "gold:Gp0119867_KO TSV File", + "data_object_type": "Annotation KEGG Orthology", + "type": "nmdc:DataObject" + }, + { + "_id": { + "$oid": "649b003f1ae706d7b5b16ca7" + }, + "description": "Structural annotation GFF file for gold:Gp0119867", + "url": "https://data.microbiomedata.org/1777_95836/img_annotation/Ga0482148_structural_annotation.gff", + "md5_checksum": "fd11757a61a2400b5ed8077c62cc97b9", + "file_size_bytes": 3385, + "id": "nmdc:fd11757a61a2400b5ed8077c62cc97b9", + "name": "gold:Gp0119867_Structural annotation GFF file", + "data_object_type": "Structural Annotation GFF", + "type": "nmdc:DataObject" + }, + { + "_id": { + "$oid": "649b003f1ae706d7b5b16caa" + }, + "description": "Protein FAA for gold:Gp0119867", + "url": "https://data.microbiomedata.org/1777_95836/img_annotation/Ga0482148_proteins.faa", + "md5_checksum": "b20ef196169b86e1498e45194adeff8b", + "file_size_bytes": 3385, + "id": "nmdc:b20ef196169b86e1498e45194adeff8b", + "name": "gold:Gp0119867_Protein FAA", + "data_object_type": "Annotation Amino Acid FASTA", + "type": "nmdc:DataObject" + }, + { + "_id": { + "$oid": "649b003f1ae706d7b5b16cab" + }, + "description": "Functional annotation GFF file for gold:Gp0119867", + "url": "https://data.microbiomedata.org/1777_95836/img_annotation/Ga0482148_functional_annotation.gff", + "md5_checksum": "a4403a73bfd1f57ce219127cd1ebed51", + "file_size_bytes": 3385, + "id": "nmdc:a4403a73bfd1f57ce219127cd1ebed51", + "name": "gold:Gp0119867_Functional annotation GFF file", + "data_object_type": "Functional Annotation GFF", + "type": "nmdc:DataObject" + } + ], + "mags_activity_set": [ + { + "_id": { + "$oid": "649b0052ec087f6bbab3470a" + }, + "has_input": [ + "nmdc:2cb0c4b36cf19611d4eaef5f36318fe7", + "nmdc:f8dc145afa51127bb13cc568855875b9", + "nmdc:291c2e5e3e4129c1d29a051e884b515b" + ], + "too_short_contig_num": 242819, + "part_of": [ + "nmdc:mga0pdcf20" + ], + "binned_contig_num": 5678, + "git_url": "https://github.com/microbiomedata/metaG/releases/tag/0.1", + "has_output": [ + "nmdc:d41d8cd98f00b204e9800998ecf8427e", + "nmdc:5c39b38fc9a0898c8223142edcf534c3", + "nmdc:4e9a010790fcf050219ad3382479d683", + "nmdc:b3a8fbbaaf4fc08627144ce88cd98d0d", + "nmdc:d6c5e04125ba6a6b9c78cd16d71aafdb", + "nmdc:08f765d3d2726deea2b9d42cbf1b7f7d" + ], + "was_informed_by": "gold:Gp0119867", + "input_contig_num": 288841, + "id": "nmdc:2ccfca5db7d4b7a9791b679a8bebf109", + "execution_resource": "NERSC-Cori", + "name": "MAGs Analysis Activity for nmdc:mga0pdcf20", + "mags_list": [ + { + "number_of_contig": 201, + "completeness": 39.97, + "bin_name": "bins.1", + "gene_count": 1213, + "bin_quality": "LQ", + "gtdbtk_species": "", + "gtdbtk_order": "", + "num_16s": 0, + "gtdbtk_family": "", + "gtdbtk_domain": "", + "contamination": 0.01, + "gtdbtk_class": "", + "gtdbtk_phylum": "", + "num_5s": 0, + "num_23s": 0, + "gtdbtk_genus": "", + "num_t_rna": 12 + }, + { + "number_of_contig": 79, + "completeness": 0.0, + "bin_name": "bins.10", + "gene_count": 987, + "bin_quality": "LQ", + "gtdbtk_species": "", + "gtdbtk_order": "", + "num_16s": 0, + "gtdbtk_family": "", + "gtdbtk_domain": "", + "contamination": 0.0, + "gtdbtk_class": "", + "gtdbtk_phylum": "", + "num_5s": 0, + "num_23s": 0, + "gtdbtk_genus": "", + "num_t_rna": 3 + }, + { + "number_of_contig": 226, + "completeness": 48.41, + "bin_name": "bins.11", + "gene_count": 1195, + "bin_quality": "LQ", + "gtdbtk_species": "", + "gtdbtk_order": "", + "num_16s": 1, + "gtdbtk_family": "", + "gtdbtk_domain": "", + "contamination": 0.38, + "gtdbtk_class": "", + "gtdbtk_phylum": "", + "num_5s": 0, + "num_23s": 0, + "gtdbtk_genus": "", + "num_t_rna": 14 + }, + { + "number_of_contig": 145, + "completeness": 14.66, + "bin_name": "bins.12", + "gene_count": 508, + "bin_quality": "LQ", + "gtdbtk_species": "", + "gtdbtk_order": "", + "num_16s": 0, + "gtdbtk_family": "", + "gtdbtk_domain": "", + "contamination": 0.0, + "gtdbtk_class": "", + "gtdbtk_phylum": "", + "num_5s": 0, + "num_23s": 0, + "gtdbtk_genus": "", + "num_t_rna": 3 + }, + { + "number_of_contig": 106, + "completeness": 50.0, + "bin_name": "bins.13", + "gene_count": 978, + "bin_quality": "MQ", + "gtdbtk_species": "", + "gtdbtk_order": "Nanopelagicales", + "num_16s": 0, + "gtdbtk_family": "Nanopelagicaceae", + "gtdbtk_domain": "Bacteria", + "contamination": 3.45, + "gtdbtk_class": "Actinobacteria", + "gtdbtk_phylum": "Actinobacteriota", + "num_5s": 0, + "num_23s": 0, + "gtdbtk_genus": "Planktophila", + "num_t_rna": 27 + }, + { + "number_of_contig": 32, + "completeness": 0.0, + "bin_name": "bins.14", + "gene_count": 348, + "bin_quality": "LQ", + "gtdbtk_species": "", + "gtdbtk_order": "", + "num_16s": 0, + "gtdbtk_family": "", + "gtdbtk_domain": "", + "contamination": 0.0, + "gtdbtk_class": "", + "gtdbtk_phylum": "", + "num_5s": 0, + "num_23s": 0, + "gtdbtk_genus": "", + "num_t_rna": 6 + }, + { + "number_of_contig": 118, + "completeness": 57.76, + "bin_name": "bins.15", + "gene_count": 1154, + "bin_quality": "MQ", + "gtdbtk_species": "", + "gtdbtk_order": "Microtrichales", + "num_16s": 1, + "gtdbtk_family": "Ilumatobacteraceae", + "gtdbtk_domain": "Bacteria", + "contamination": 0.0, + "gtdbtk_class": "Acidimicrobiia", + "gtdbtk_phylum": "Actinobacteriota", + "num_5s": 1, + "num_23s": 1, + "gtdbtk_genus": "UBA3006", + "num_t_rna": 15 + }, + { + "number_of_contig": 145, + "completeness": 23.43, + "bin_name": "bins.16", + "gene_count": 790, + "bin_quality": "LQ", + "gtdbtk_species": "", + "gtdbtk_order": "", + "num_16s": 0, + "gtdbtk_family": "", + "gtdbtk_domain": "", + "contamination": 6.9, + "gtdbtk_class": "", + "gtdbtk_phylum": "", + "num_5s": 0, + "num_23s": 0, + "gtdbtk_genus": "", + "num_t_rna": 13 + }, + { + "number_of_contig": 66, + "completeness": 12.07, + "bin_name": "bins.17", + "gene_count": 469, + "bin_quality": "LQ", + "gtdbtk_species": "", + "gtdbtk_order": "", + "num_16s": 0, + "gtdbtk_family": "", + "gtdbtk_domain": "", + "contamination": 0.0, + "gtdbtk_class": "", + "gtdbtk_phylum": "", + "num_5s": 0, + "num_23s": 0, + "gtdbtk_genus": "", + "num_t_rna": 10 + }, + { + "number_of_contig": 173, + "completeness": 80.34, + "bin_name": "bins.18", + "gene_count": 1785, + "bin_quality": "MQ", + "gtdbtk_species": "", + "gtdbtk_order": "Burkholderiales", + "num_16s": 0, + "gtdbtk_family": "Burkholderiaceae", + "gtdbtk_domain": "Bacteria", + "contamination": 1.98, + "gtdbtk_class": "Gammaproteobacteria", + "gtdbtk_phylum": "Proteobacteria", + "num_5s": 1, + "num_23s": 1, + "gtdbtk_genus": "UBA2463", + "num_t_rna": 30 + }, + { + "number_of_contig": 31, + "completeness": 0.0, + "bin_name": "bins.19", + "gene_count": 242, + "bin_quality": "LQ", + "gtdbtk_species": "", + "gtdbtk_order": "", + "num_16s": 0, + "gtdbtk_family": "", + "gtdbtk_domain": "", + "contamination": 0.0, + "gtdbtk_class": "", + "gtdbtk_phylum": "", + "num_5s": 0, + "num_23s": 0, + "gtdbtk_genus": "", + "num_t_rna": 4 + }, + { + "number_of_contig": 149, + "completeness": 52.74, + "bin_name": "bins.2", + "gene_count": 968, + "bin_quality": "LQ", + "gtdbtk_species": "", + "gtdbtk_order": "", + "num_16s": 0, + "gtdbtk_family": "", + "gtdbtk_domain": "", + "contamination": 13.36, + "gtdbtk_class": "", + "gtdbtk_phylum": "", + "num_5s": 1, + "num_23s": 0, + "gtdbtk_genus": "", + "num_t_rna": 15 + }, + { + "number_of_contig": 135, + "completeness": 65.02, + "bin_name": "bins.20", + "gene_count": 1028, + "bin_quality": "MQ", + "gtdbtk_species": "", + "gtdbtk_order": "CSP1-4", + "num_16s": 0, + "gtdbtk_family": "UBA10416", + "gtdbtk_domain": "Bacteria", + "contamination": 2.88, + "gtdbtk_class": "Ellin6529", + "gtdbtk_phylum": "Chloroflexota_A", + "num_5s": 0, + "num_23s": 0, + "gtdbtk_genus": "UBA10416", + "num_t_rna": 29 + }, + { + "number_of_contig": 59, + "completeness": 9.45, + "bin_name": "bins.21", + "gene_count": 228, + "bin_quality": "LQ", + "gtdbtk_species": "", + "gtdbtk_order": "", + "num_16s": 0, + "gtdbtk_family": "", + "gtdbtk_domain": "", + "contamination": 0.0, + "gtdbtk_class": "", + "gtdbtk_phylum": "", + "num_5s": 0, + "num_23s": 0, + "gtdbtk_genus": "", + "num_t_rna": 1 + }, + { + "number_of_contig": 31, + "completeness": 17.67, + "bin_name": "bins.22", + "gene_count": 251, + "bin_quality": "LQ", + "gtdbtk_species": "", + "gtdbtk_order": "", + "num_16s": 0, + "gtdbtk_family": "", + "gtdbtk_domain": "", + "contamination": 0.0, + "gtdbtk_class": "", + "gtdbtk_phylum": "", + "num_5s": 0, + "num_23s": 0, + "gtdbtk_genus": "", + "num_t_rna": 6 + }, + { + "number_of_contig": 155, + "completeness": 40.69, + "bin_name": "bins.23", + "gene_count": 952, + "bin_quality": "LQ", + "gtdbtk_species": "", + "gtdbtk_order": "", + "num_16s": 0, + "gtdbtk_family": "", + "gtdbtk_domain": "", + "contamination": 7.76, + "gtdbtk_class": "", + "gtdbtk_phylum": "", + "num_5s": 0, + "num_23s": 0, + "gtdbtk_genus": "", + "num_t_rna": 22 + }, + { + "number_of_contig": 72, + "completeness": 8.33, + "bin_name": "bins.24", + "gene_count": 439, + "bin_quality": "LQ", + "gtdbtk_species": "", + "gtdbtk_order": "", + "num_16s": 0, + "gtdbtk_family": "", + "gtdbtk_domain": "", + "contamination": 0.0, + "gtdbtk_class": "", + "gtdbtk_phylum": "", + "num_5s": 0, + "num_23s": 0, + "gtdbtk_genus": "", + "num_t_rna": 16 + }, + { + "number_of_contig": 37, + "completeness": 0.0, + "bin_name": "bins.25", + "gene_count": 259, + "bin_quality": "LQ", + "gtdbtk_species": "", + "gtdbtk_order": "", + "num_16s": 0, + "gtdbtk_family": "", + "gtdbtk_domain": "", + "contamination": 0.0, + "gtdbtk_class": "", + "gtdbtk_phylum": "", + "num_5s": 0, + "num_23s": 0, + "gtdbtk_genus": "", + "num_t_rna": 3 + }, + { + "number_of_contig": 140, + "completeness": 12.5, + "bin_name": "bins.26", + "gene_count": 803, + "bin_quality": "LQ", + "gtdbtk_species": "", + "gtdbtk_order": "", + "num_16s": 0, + "gtdbtk_family": "", + "gtdbtk_domain": "", + "contamination": 4.17, + "gtdbtk_class": "", + "gtdbtk_phylum": "", + "num_5s": 0, + "num_23s": 0, + "gtdbtk_genus": "", + "num_t_rna": 16 + }, + { + "number_of_contig": 331, + "completeness": 52.01, + "bin_name": "bins.27", + "gene_count": 1577, + "bin_quality": "MQ", + "gtdbtk_species": "", + "gtdbtk_order": "Phycisphaerales", + "num_16s": 0, + "gtdbtk_family": "SM1A02", + "gtdbtk_domain": "Bacteria", + "contamination": 0.57, + "gtdbtk_class": "Phycisphaerae", + "gtdbtk_phylum": "Planctomycetota", + "num_5s": 0, + "num_23s": 0, + "gtdbtk_genus": "", + "num_t_rna": 12 + }, + { + "number_of_contig": 154, + "completeness": 46.55, + "bin_name": "bins.28", + "gene_count": 1023, + "bin_quality": "LQ", + "gtdbtk_species": "", + "gtdbtk_order": "", + "num_16s": 0, + "gtdbtk_family": "", + "gtdbtk_domain": "", + "contamination": 0.0, + "gtdbtk_class": "", + "gtdbtk_phylum": "", + "num_5s": 0, + "num_23s": 0, + "gtdbtk_genus": "", + "num_t_rna": 25 + }, + { + "number_of_contig": 126, + "completeness": 14.66, + "bin_name": "bins.29", + "gene_count": 672, + "bin_quality": "LQ", + "gtdbtk_species": "", + "gtdbtk_order": "", + "num_16s": 0, + "gtdbtk_family": "", + "gtdbtk_domain": "", + "contamination": 0.0, + "gtdbtk_class": "", + "gtdbtk_phylum": "", + "num_5s": 0, + "num_23s": 0, + "gtdbtk_genus": "", + "num_t_rna": 9 + }, + { + "number_of_contig": 79, + "completeness": 33.49, + "bin_name": "bins.3", + "gene_count": 522, + "bin_quality": "LQ", + "gtdbtk_species": "", + "gtdbtk_order": "", + "num_16s": 0, + "gtdbtk_family": "", + "gtdbtk_domain": "", + "contamination": 8.77, + "gtdbtk_class": "", + "gtdbtk_phylum": "", + "num_5s": 0, + "num_23s": 0, + "gtdbtk_genus": "", + "num_t_rna": 11 + }, + { + "number_of_contig": 312, + "completeness": 62.24, + "bin_name": "bins.30", + "gene_count": 1995, + "bin_quality": "LQ", + "gtdbtk_species": "", + "gtdbtk_order": "", + "num_16s": 0, + "gtdbtk_family": "", + "gtdbtk_domain": "", + "contamination": 34.48, + "gtdbtk_class": "", + "gtdbtk_phylum": "", + "num_5s": 0, + "num_23s": 0, + "gtdbtk_genus": "", + "num_t_rna": 52 + }, + { + "number_of_contig": 84, + "completeness": 12.5, + "bin_name": "bins.31", + "gene_count": 455, + "bin_quality": "LQ", + "gtdbtk_species": "", + "gtdbtk_order": "", + "num_16s": 0, + "gtdbtk_family": "", + "gtdbtk_domain": "", + "contamination": 0.0, + "gtdbtk_class": "", + "gtdbtk_phylum": "", + "num_5s": 0, + "num_23s": 0, + "gtdbtk_genus": "", + "num_t_rna": 6 + }, + { + "number_of_contig": 69, + "completeness": 12.93, + "bin_name": "bins.32", + "gene_count": 420, + "bin_quality": "LQ", + "gtdbtk_species": "", + "gtdbtk_order": "", + "num_16s": 0, + "gtdbtk_family": "", + "gtdbtk_domain": "", + "contamination": 0.0, + "gtdbtk_class": "", + "gtdbtk_phylum": "", + "num_5s": 0, + "num_23s": 0, + "gtdbtk_genus": "", + "num_t_rna": 13 + }, + { + "number_of_contig": 294, + "completeness": 68.06, + "bin_name": "bins.33", + "gene_count": 1754, + "bin_quality": "MQ", + "gtdbtk_species": "", + "gtdbtk_order": "Chitinophagales", + "num_16s": 0, + "gtdbtk_family": "Chitinophagaceae", + "gtdbtk_domain": "Bacteria", + "contamination": 0.49, + "gtdbtk_class": "Bacteroidia", + "gtdbtk_phylum": "Bacteroidota", + "num_5s": 0, + "num_23s": 0, + "gtdbtk_genus": "", + "num_t_rna": 11 + }, + { + "number_of_contig": 149, + "completeness": 31.58, + "bin_name": "bins.34", + "gene_count": 864, + "bin_quality": "LQ", + "gtdbtk_species": "", + "gtdbtk_order": "", + "num_16s": 0, + "gtdbtk_family": "", + "gtdbtk_domain": "", + "contamination": 9.65, + "gtdbtk_class": "", + "gtdbtk_phylum": "", + "num_5s": 0, + "num_23s": 0, + "gtdbtk_genus": "", + "num_t_rna": 26 + }, + { + "number_of_contig": 29, + "completeness": 55.21, + "bin_name": "bins.35", + "gene_count": 874, + "bin_quality": "MQ", + "gtdbtk_species": "", + "gtdbtk_order": "Nanopelagicales", + "num_16s": 1, + "gtdbtk_family": "AcAMD-5", + "gtdbtk_domain": "Bacteria", + "contamination": 0.15, + "gtdbtk_class": "Actinobacteria", + "gtdbtk_phylum": "Actinobacteriota", + "num_5s": 1, + "num_23s": 1, + "gtdbtk_genus": "ATZT02", + "num_t_rna": 18 + }, + { + "number_of_contig": 319, + "completeness": 79.28, + "bin_name": "bins.36", + "gene_count": 2682, + "bin_quality": "MQ", + "gtdbtk_species": "", + "gtdbtk_order": "Synechococcales", + "num_16s": 0, + "gtdbtk_family": "Cyanobiaceae", + "gtdbtk_domain": "Bacteria", + "contamination": 3.35, + "gtdbtk_class": "Cyanobacteriia", + "gtdbtk_phylum": "Cyanobacteria", + "num_5s": 0, + "num_23s": 0, + "gtdbtk_genus": "PCC7001", + "num_t_rna": 38 + }, + { + "number_of_contig": 34, + "completeness": 8.62, + "bin_name": "bins.37", + "gene_count": 266, + "bin_quality": "LQ", + "gtdbtk_species": "", + "gtdbtk_order": "", + "num_16s": 0, + "gtdbtk_family": "", + "gtdbtk_domain": "", + "contamination": 1.72, + "gtdbtk_class": "", + "gtdbtk_phylum": "", + "num_5s": 0, + "num_23s": 0, + "gtdbtk_genus": "", + "num_t_rna": 14 + }, + { + "number_of_contig": 244, + "completeness": 36.79, + "bin_name": "bins.38", + "gene_count": 1231, + "bin_quality": "LQ", + "gtdbtk_species": "", + "gtdbtk_order": "", + "num_16s": 1, + "gtdbtk_family": "", + "gtdbtk_domain": "", + "contamination": 0.21, + "gtdbtk_class": "", + "gtdbtk_phylum": "", + "num_5s": 0, + "num_23s": 0, + "gtdbtk_genus": "", + "num_t_rna": 18 + }, + { + "number_of_contig": 468, + "completeness": 4.17, + "bin_name": "bins.39", + "gene_count": 2617, + "bin_quality": "LQ", + "gtdbtk_species": "", + "gtdbtk_order": "", + "num_16s": 0, + "gtdbtk_family": "", + "gtdbtk_domain": "", + "contamination": 0.0, + "gtdbtk_class": "", + "gtdbtk_phylum": "", + "num_5s": 0, + "num_23s": 0, + "gtdbtk_genus": "", + "num_t_rna": 11 + }, + { + "number_of_contig": 87, + "completeness": 19.91, + "bin_name": "bins.4", + "gene_count": 528, + "bin_quality": "LQ", + "gtdbtk_species": "", + "gtdbtk_order": "", + "num_16s": 0, + "gtdbtk_family": "", + "gtdbtk_domain": "", + "contamination": 0.0, + "gtdbtk_class": "", + "gtdbtk_phylum": "", + "num_5s": 0, + "num_23s": 0, + "gtdbtk_genus": "", + "num_t_rna": 9 + }, + { + "number_of_contig": 87, + "completeness": 56.47, + "bin_name": "bins.5", + "gene_count": 942, + "bin_quality": "MQ", + "gtdbtk_species": "", + "gtdbtk_order": "Nanopelagicales", + "num_16s": 0, + "gtdbtk_family": "Nanopelagicaceae", + "gtdbtk_domain": "Bacteria", + "contamination": 3.88, + "gtdbtk_class": "Actinobacteria", + "gtdbtk_phylum": "Actinobacteriota", + "num_5s": 1, + "num_23s": 1, + "gtdbtk_genus": "Planktophila", + "num_t_rna": 14 + }, + { + "number_of_contig": 96, + "completeness": 27.11, + "bin_name": "bins.6", + "gene_count": 696, + "bin_quality": "LQ", + "gtdbtk_species": "", + "gtdbtk_order": "", + "num_16s": 0, + "gtdbtk_family": "", + "gtdbtk_domain": "", + "contamination": 0.0, + "gtdbtk_class": "", + "gtdbtk_phylum": "", + "num_5s": 0, + "num_23s": 0, + "gtdbtk_genus": "", + "num_t_rna": 21 + }, + { + "number_of_contig": 197, + "completeness": 34.05, + "bin_name": "bins.7", + "gene_count": 1353, + "bin_quality": "LQ", + "gtdbtk_species": "", + "gtdbtk_order": "", + "num_16s": 0, + "gtdbtk_family": "", + "gtdbtk_domain": "", + "contamination": 3.45, + "gtdbtk_class": "", + "gtdbtk_phylum": "", + "num_5s": 1, + "num_23s": 0, + "gtdbtk_genus": "", + "num_t_rna": 27 + }, + { + "number_of_contig": 173, + "completeness": 37.93, + "bin_name": "bins.8", + "gene_count": 784, + "bin_quality": "LQ", + "gtdbtk_species": "", + "gtdbtk_order": "", + "num_16s": 0, + "gtdbtk_family": "", + "gtdbtk_domain": "", + "contamination": 0.0, + "gtdbtk_class": "", + "gtdbtk_phylum": "", + "num_5s": 0, + "num_23s": 0, + "gtdbtk_genus": "", + "num_t_rna": 10 + }, + { + "number_of_contig": 246, + "completeness": 11.21, + "bin_name": "bins.9", + "gene_count": 1176, + "bin_quality": "LQ", + "gtdbtk_species": "", + "gtdbtk_order": "", + "num_16s": 0, + "gtdbtk_family": "", + "gtdbtk_domain": "", + "contamination": 0.0, + "gtdbtk_class": "", + "gtdbtk_phylum": "", + "num_5s": 0, + "num_23s": 0, + "gtdbtk_genus": "", + "num_t_rna": 5 + } + ], + "unbinned_contig_num": 40344, + "started_at_time": "2021-10-11T02:23:33Z", + "type": "nmdc:MAGsAnalysisActivity", + "ended_at_time": "2021-10-11T03:26:12+00:00" + } + ], + "metagenome_annotation_activity_set": [ + { + "_id": { + "$oid": "649b005bbf2caae0415ef99c" + }, + "has_input": [ + "nmdc:2cb0c4b36cf19611d4eaef5f36318fe7" + ], + "part_of": [ + "nmdc:mga0pdcf20" + ], + "git_url": "https://github.com/microbiomedata/metaG/releases/tag/0.1", + "has_output": [ + "nmdc:e77fcea6fad6513fa189cb8bfe513745", + "nmdc:23b1167d33f4bc7d20f06b3f52f6d110", + "nmdc:291c2e5e3e4129c1d29a051e884b515b", + "nmdc:fa2e612c33c6eeb41bb8d2ce1cddbdd7", + "nmdc:602bc048f491ce8bc3b5ee2fd571df68", + "nmdc:18db119a96eacbb8f877be4e41c44768", + "nmdc:bb7a287b3075de92b96504518596e537", + "nmdc:b1a21a32d5c814bbb7281b4fb9c0012c", + "nmdc:231a70ea7ab9fcb6d53db92f3da63ccd", + "nmdc:d7cf03f03618b16bb353d2c7a00ac154", + "nmdc:79dcba2c535e44dc34b36e3f1e39a750", + "nmdc:abb912ad792ec018d9af5576e36da8e9" + ], + "was_informed_by": "gold:Gp0119867", + "id": "nmdc:2ccfca5db7d4b7a9791b679a8bebf109", + "execution_resource": "NERSC-Cori", + "name": "Annotation Activity for nmdc:mga0pdcf20", + "started_at_time": "2021-10-11T02:23:33Z", + "type": "nmdc:MetagenomeAnnotationActivity", + "ended_at_time": "2021-10-11T03:26:12+00:00" + } + ], + "metagenome_assembly_set": [ + { + "_id": { + "$oid": "649b005f2ca5ee4adb139f8e" + }, + "has_input": [ + "nmdc:9f76849dfee6201591d9e4f97b1e5601" + ], + "part_of": [ + "nmdc:mga0pdcf20" + ], + "ctg_logsum": 1224237, + "scaf_logsum": 1248224, + "gap_pct": 0.01867, + "git_url": "https://github.com/microbiomedata/metaG/releases/tag/0.1", + "has_output": [ + "nmdc:2cb0c4b36cf19611d4eaef5f36318fe7", + "nmdc:87f32d5751e7418852eaa0bdce88b520", + "nmdc:3af4f79ba558f9e4740dca5ac82bfb73", + "nmdc:bd0e70488d70efefed25ae7603b513b5", + "nmdc:f8dc145afa51127bb13cc568855875b9" + ], + "asm_score": 7.07, + "was_informed_by": "gold:Gp0119867", + "ctg_powsum": 143439, + "scaf_max": 162178, + "id": "nmdc:2ccfca5db7d4b7a9791b679a8bebf109", + "scaf_powsum": 146663, + "execution_resource": "NERSC-Cori", + "contigs": 288915, + "name": "Assembly Activity for nmdc:mga0pdcf20", + "ctg_max": 162178, + "gc_std": 0.10005, + "contig_bp": 214234039, + "gc_avg": 0.48909, + "started_at_time": "2021-10-11T02:23:33Z", + "scaf_bp": 214274039, + "type": "nmdc:MetagenomeAssembly", + "scaffolds": 285104, + "ended_at_time": "2021-10-11T03:26:12+00:00", + "ctg_l50": 978, + "ctg_l90": 322, + "ctg_n50": 47437, + "ctg_n90": 214762, + "scaf_l50": 1006, + "scaf_l90": 322, + "scaf_n50": 45827, + "scaf_n90": 211872, + "scaf_l_gt50k": 1017514, + "scaf_n_gt50k": 11, + "scaf_pct_gt50k": 0.47486576 + } + ], + "omics_processing_set": [ + { + "_id": { + "$oid": "649b009773e8249959349b7c" + }, + "id": "nmdc:omprc-11-mskcbv41", + "name": "Fresh water mixed with fracking chemicals microbial communities from Ohio, USA - Utica-2 Time Series FC 2014_7_16", + "description": "Microbial controls on biogeochemical cycling in deep subsurface shale carbon reservoirs", + "has_input": [ + "nmdc:bsm-11-9wsknt94" + ], + "has_output": [ + "jgi:560df5b70d878540fd6fe1ff" + ], + "part_of": [ + "nmdc:sty-11-8fb6t785" + ], + "add_date": "2015-08-15", + "mod_date": "2021-06-15", + "ncbi_project_name": "Deep subsurface shale carbon reservoir microbial communities from Ohio, USA - Utica-2 Time Series FC 2014_7_16", + "omics_type": { + "has_raw_value": "Metagenome" + }, + "principal_investigator": { + "has_raw_value": "Kelly Wrighton" + }, + "processing_institution": "JGI", + "type": "nmdc:OmicsProcessing", + "gold_sequencing_project_identifiers": [ + "gold:Gp0119867" + ] + } + ], + "read_qc_analysis_activity_set": [ + { + "_id": { + "$oid": "649b009d6bdd4fd20273c85d" + }, + "has_input": [ + "nmdc:42495b9b0129e0a6694f8714df6ddadf" + ], + "part_of": [ + "nmdc:mga0pdcf20" + ], + "git_url": "https://github.com/microbiomedata/metaG/releases/tag/0.1", + "has_output": [ + "nmdc:9f76849dfee6201591d9e4f97b1e5601", + "nmdc:1ca7ab14098e5484e7edced918100c15" + ], + "was_informed_by": "gold:Gp0119867", + "input_read_count": 29798478, + "output_read_bases": 4255244328, + "id": "nmdc:2ccfca5db7d4b7a9791b679a8bebf109", + "execution_resource": "NERSC-Cori", + "input_read_bases": 4499570178, + "name": "Read QC Activity for nmdc:mga0pdcf20", + "output_read_count": 29408192, + "started_at_time": "2021-10-11T02:23:33Z", + "type": "nmdc:ReadQCAnalysisActivity", + "ended_at_time": "2021-10-11T03:26:12+00:00" + } + ], + "read_based_taxonomy_analysis_activity_set": [ + { + "_id": { + "$oid": "649b009bff710ae353f8cf19" + }, + "has_input": [ + "nmdc:9f76849dfee6201591d9e4f97b1e5601" + ], + "git_url": "https://github.com/microbiomedata/metaG/releases/tag/0.1", + "has_output": [ + "nmdc:efed1192940d7d794c348b2fb5254b5f", + "nmdc:ccabc2c1f15f4cf457557695c16b62c9", + "nmdc:925f6bba420531e83285198bffd06edf", + "nmdc:a6d5c4d0616134d6066eae0d90b93a50", + "nmdc:479e9174f330c6016382ab18e9c9d644", + "nmdc:f1a4bca4e16853feb276de24db77ecd4", + "nmdc:cbea85cd59798a6d231fb8fffffa60fc", + "nmdc:75101ddd772a807aeca554363347b81b", + "nmdc:6667e15f9ef824b772f3f428b9f19388" + ], + "was_informed_by": "gold:Gp0119867", + "id": "nmdc:2ccfca5db7d4b7a9791b679a8bebf109", + "execution_resource": "NERSC-Cori", + "name": "ReadBased Analysis Activity for nmdc:mga0pdcf20", + "started_at_time": "2021-10-11T02:23:33Z", + "type": "nmdc:ReadbasedAnalysis", + "ended_at_time": "2021-10-11T03:26:12+00:00" + } + ] + }, + { + "data_object_set": [ + { + "description": "Raw sequencer read data", + "file_size_bytes": 8006972300, + "type": "nmdc:DataObject", + "id": "jgi:560df3750d878540fd6fe1d1", + "name": "9567.6.137555.TAAGGCG-AGAGTAG.fastq.gz" + }, + { + "name": "Gp0119865_Filtered Reads", + "description": "Filtered Reads for Gp0119865", + "data_object_type": "Filtered Sequencing Reads", + "url": "https://data.microbiomedata.org/data/nmdc:mga0dqhf53/qa/nmdc_mga0dqhf53_filtered.fastq.gz", + "md5_checksum": "bef640a10fcdf22c758158c572557903", + "id": "nmdc:bef640a10fcdf22c758158c572557903", + "file_size_bytes": 5969707264 + }, + { + "name": "Gp0119865_Filtered Stats", + "description": "Filtered Stats for Gp0119865", + "data_object_type": "QC Statistics", + "url": "https://data.microbiomedata.org/data/nmdc:mga0dqhf53/qa/nmdc_mga0dqhf53_filterStats.txt", + "md5_checksum": "c3c7612d03968cf94263bc4247e349c8", + "id": "nmdc:c3c7612d03968cf94263bc4247e349c8", + "file_size_bytes": 288 + }, + { + "name": "Gp0119865_Gottcha2 TSV report", + "description": "Gottcha2 TSV report for Gp0119865", + "url": "https://data.microbiomedata.org/data/nmdc:mga0dqhf53/ReadbasedAnalysis/nmdc_mga0dqhf53_gottcha2_report.tsv", + "md5_checksum": "c584ed2932bd4a7a2f9dd3ec6e276666", + "id": "nmdc:c584ed2932bd4a7a2f9dd3ec6e276666", + "file_size_bytes": 12528 + }, + { + "name": "Gp0119865_Gottcha2 full TSV report", + "description": "Gottcha2 full TSV report for Gp0119865", + "url": "https://data.microbiomedata.org/data/nmdc:mga0dqhf53/ReadbasedAnalysis/nmdc_mga0dqhf53_gottcha2_report_full.tsv", + "md5_checksum": "26ea3d0f6789a8b4c069d95b4683223a", + "id": "nmdc:26ea3d0f6789a8b4c069d95b4683223a", + "file_size_bytes": 762988 + }, + { + "name": "Gp0119865_Gottcha2 Krona HTML report", + "description": "Gottcha2 Krona HTML report for Gp0119865", + "data_object_type": "GOTTCHA2 Krona Plot", + "url": "https://data.microbiomedata.org/data/nmdc:mga0dqhf53/ReadbasedAnalysis/nmdc_mga0dqhf53_gottcha2_krona.html", + "md5_checksum": "8aa4029ecd774b56ff0a609b88f58491", + "id": "nmdc:8aa4029ecd774b56ff0a609b88f58491", + "file_size_bytes": 260335 + }, + { + "name": "Gp0119865_Centrifuge classification TSV report", + "description": "Centrifuge classification TSV report for Gp0119865", + "data_object_type": "Centrifuge Taxonomic Classification", + "url": "https://data.microbiomedata.org/data/nmdc:mga0dqhf53/ReadbasedAnalysis/nmdc_mga0dqhf53_centrifuge_classification.tsv", + "md5_checksum": "a1cb4436b54b8103c2023fff44c71605", + "id": "nmdc:a1cb4436b54b8103c2023fff44c71605", + "file_size_bytes": 5042522528 + }, + { + "name": "Gp0119865_Centrifuge TSV report", + "description": "Centrifuge TSV report for Gp0119865", + "data_object_type": "Centrifuge Classification Report", + "url": "https://data.microbiomedata.org/data/nmdc:mga0dqhf53/ReadbasedAnalysis/nmdc_mga0dqhf53_centrifuge_report.tsv", + "md5_checksum": "e67bddc790aa53a4c80a19aa40b5ceee", + "id": "nmdc:e67bddc790aa53a4c80a19aa40b5ceee", + "file_size_bytes": 265349 + }, + { + "name": "Gp0119865_Centrifuge Krona HTML report", + "description": "Centrifuge Krona HTML report for Gp0119865", + "data_object_type": "Centrifuge Krona Plot", + "url": "https://data.microbiomedata.org/data/nmdc:mga0dqhf53/ReadbasedAnalysis/nmdc_mga0dqhf53_centrifuge_krona.html", + "md5_checksum": "20e6e923e0648ef56c5d48e0c3f407d5", + "id": "nmdc:20e6e923e0648ef56c5d48e0c3f407d5", + "file_size_bytes": 2353440 + }, + { + "name": "Gp0119865_Kraken classification TSV report", + "description": "Kraken classification TSV report for Gp0119865", + "data_object_type": "Kraken2 Taxonomic Classification", + "url": "https://data.microbiomedata.org/data/nmdc:mga0dqhf53/ReadbasedAnalysis/nmdc_mga0dqhf53_kraken2_classification.tsv", + "md5_checksum": "3dc48571d8b3899fb7482dba34e66127", + "id": "nmdc:3dc48571d8b3899fb7482dba34e66127", + "file_size_bytes": 4306925654 + }, + { + "name": "Gp0119865_Kraken2 TSV report", + "description": "Kraken2 TSV report for Gp0119865", + "data_object_type": "Kraken2 Classification Report", + "url": "https://data.microbiomedata.org/data/nmdc:mga0dqhf53/ReadbasedAnalysis/nmdc_mga0dqhf53_kraken2_report.tsv", + "md5_checksum": "7b62dd0dab192394469b049096d614c5", + "id": "nmdc:7b62dd0dab192394469b049096d614c5", + "file_size_bytes": 672327 + }, + { + "name": "Gp0119865_Kraken2 Krona HTML report", + "description": "Kraken2 Krona HTML report for Gp0119865", + "data_object_type": "Kraken2 Krona Plot", + "url": "https://data.microbiomedata.org/data/nmdc:mga0dqhf53/ReadbasedAnalysis/nmdc_mga0dqhf53_kraken2_krona.html", + "md5_checksum": "ec0dc7ee83062fcc43b212662be9b802", + "id": "nmdc:ec0dc7ee83062fcc43b212662be9b802", + "file_size_bytes": 4043126 + }, + { + "name": "Gp0119865_Assembled contigs fasta", + "description": "Assembled contigs fasta for Gp0119865", + "data_object_type": "Assembly Contigs", + "url": "https://data.microbiomedata.org/data/nmdc:mga0dqhf53/assembly/nmdc_mga0dqhf53_contigs.fna", + "md5_checksum": "55b3394e2c5819bcece9ee95ea7ed820", + "id": "nmdc:55b3394e2c5819bcece9ee95ea7ed820", + "file_size_bytes": 418594001 + }, + { + "name": "Gp0119865_Assembled scaffold fasta", + "description": "Assembled scaffold fasta for Gp0119865", + "data_object_type": "Assembly Scaffolds", + "url": "https://data.microbiomedata.org/data/nmdc:mga0dqhf53/assembly/nmdc_mga0dqhf53_scaffolds.fna", + "md5_checksum": "e3b7f40adfb21ec645f62d482f591f4c", + "id": "nmdc:e3b7f40adfb21ec645f62d482f591f4c", + "file_size_bytes": 416699181 + }, + { + "name": "Gp0119865_Metagenome Contig Coverage Stats", + "description": "Metagenome Contig Coverage Stats for Gp0119865", + "url": "https://data.microbiomedata.org/data/nmdc:mga0dqhf53/assembly/nmdc_mga0dqhf53_covstats.txt", + "md5_checksum": "6d699d3aa5732307706f2e55779e8b9a", + "id": "nmdc:6d699d3aa5732307706f2e55779e8b9a", + "file_size_bytes": 44808206 + }, + { + "name": "Gp0119865_Assembled AGP file", + "description": "Assembled AGP file for Gp0119865", + "data_object_type": "Assembly AGP", + "url": "https://data.microbiomedata.org/data/nmdc:mga0dqhf53/assembly/nmdc_mga0dqhf53_assembly.agp", + "md5_checksum": "fb4363a7625400ef200cf4596c782682", + "id": "nmdc:fb4363a7625400ef200cf4596c782682", + "file_size_bytes": 43008632 + }, + { + "name": "Gp0119865_Metagenome Alignment BAM file", + "description": "Metagenome Alignment BAM file for Gp0119865", + "data_object_type": "Assembly Coverage BAM", + "url": "https://data.microbiomedata.org/data/nmdc:mga0dqhf53/assembly/nmdc_mga0dqhf53_pairedMapped_sorted.bam", + "md5_checksum": "f4f66aba79bd6d06b56f1f7ca60c7a9a", + "id": "nmdc:f4f66aba79bd6d06b56f1f7ca60c7a9a", + "file_size_bytes": 6754467755 + }, + { + "name": "Gp0119865_Protein FAA", + "description": "Protein FAA for Gp0119865", + "data_object_type": "Annotation Amino Acid FASTA", + "url": "https://data.microbiomedata.org/data/nmdc:mga0dqhf53/annotation/nmdc_mga0dqhf53_proteins.faa", + "md5_checksum": "fc8bef54f2e80752641a2f316fea248c", + "id": "nmdc:fc8bef54f2e80752641a2f316fea248c", + "file_size_bytes": 218219310 + }, + { + "name": "Gp0119865_Structural annotation GFF file", + "description": "Structural annotation GFF file for Gp0119865", + "data_object_type": "Structural Annotation GFF", + "url": "https://data.microbiomedata.org/data/nmdc:mga0dqhf53/annotation/nmdc_mga0dqhf53_structural_annotation.gff", + "md5_checksum": "cbf87e8c596a5af525de057ff1762d0f", + "id": "nmdc:cbf87e8c596a5af525de057ff1762d0f", + "file_size_bytes": 2548 + }, + { + "name": "Gp0119865_Functional annotation GFF file", + "description": "Functional annotation GFF file for Gp0119865", + "data_object_type": "Functional Annotation GFF", + "url": "https://data.microbiomedata.org/data/nmdc:mga0dqhf53/annotation/nmdc_mga0dqhf53_functional_annotation.gff", + "md5_checksum": "7fac51eabb726e277d2c81155117551b", + "id": "nmdc:7fac51eabb726e277d2c81155117551b", + "file_size_bytes": 216810167 + }, + { + "name": "Gp0119865_KO TSV file", + "description": "KO TSV file for Gp0119865", + "data_object_type": "Annotation KEGG Orthology", + "url": "https://data.microbiomedata.org/data/nmdc:mga0dqhf53/annotation/nmdc_mga0dqhf53_ko.tsv", + "md5_checksum": "500246e7472eb2b51065a4d5e041d0b3", + "id": "nmdc:500246e7472eb2b51065a4d5e041d0b3", + "file_size_bytes": 28980084 + }, + { + "name": "Gp0119865_EC TSV file", + "description": "EC TSV file for Gp0119865", + "data_object_type": "Annotation Enzyme Commission", + "url": "https://data.microbiomedata.org/data/nmdc:mga0dqhf53/annotation/nmdc_mga0dqhf53_ec.tsv", + "md5_checksum": "d87802e97e09c9ec0670bf0ae93d6b55", + "id": "nmdc:d87802e97e09c9ec0670bf0ae93d6b55", + "file_size_bytes": 20720457 + }, + { + "name": "Gp0119865_COG GFF file", + "description": "COG GFF file for Gp0119865", + "url": "https://data.microbiomedata.org/data/nmdc:mga0dqhf53/annotation/nmdc_mga0dqhf53_cog.gff", + "md5_checksum": "1cf22adcc77e40fa24bf1880ea4864a4", + "id": "nmdc:1cf22adcc77e40fa24bf1880ea4864a4", + "file_size_bytes": 123155022 + }, + { + "name": "Gp0119865_PFAM GFF file", + "description": "PFAM GFF file for Gp0119865", + "url": "https://data.microbiomedata.org/data/nmdc:mga0dqhf53/annotation/nmdc_mga0dqhf53_pfam.gff", + "md5_checksum": "77ba77ffe195895a69ae422d97a8c630", + "id": "nmdc:77ba77ffe195895a69ae422d97a8c630", + "file_size_bytes": 107356158 + }, + { + "name": "Gp0119865_TigrFam GFF file", + "description": "TigrFam GFF file for Gp0119865", + "url": "https://data.microbiomedata.org/data/nmdc:mga0dqhf53/annotation/nmdc_mga0dqhf53_tigrfam.gff", + "md5_checksum": "86866d379e9193d501e1f6b1f72d2f21", + "id": "nmdc:86866d379e9193d501e1f6b1f72d2f21", + "file_size_bytes": 18183553 + }, + { + "name": "Gp0119865_SMART GFF file", + "description": "SMART GFF file for Gp0119865", + "url": "https://data.microbiomedata.org/data/nmdc:mga0dqhf53/annotation/nmdc_mga0dqhf53_smart.gff", + "md5_checksum": "ad0427ce52177c7c2ea4fe05baa9b280", + "id": "nmdc:ad0427ce52177c7c2ea4fe05baa9b280", + "file_size_bytes": 29136090 + }, + { + "name": "Gp0119865_SuperFam GFF file", + "description": "SuperFam GFF file for Gp0119865", + "url": "https://data.microbiomedata.org/data/nmdc:mga0dqhf53/annotation/nmdc_mga0dqhf53_supfam.gff", + "md5_checksum": "b1a70cb1e731b5245f008e703f4f32f4", + "id": "nmdc:b1a70cb1e731b5245f008e703f4f32f4", + "file_size_bytes": 154070805 + }, + { + "name": "Gp0119865_Cath FunFam GFF file", + "description": "Cath FunFam GFF file for Gp0119865", + "url": "https://data.microbiomedata.org/data/nmdc:mga0dqhf53/annotation/nmdc_mga0dqhf53_cath_funfam.gff", + "md5_checksum": "0c0d49ca5d4c004cbfe8f1b1617153f6", + "id": "nmdc:0c0d49ca5d4c004cbfe8f1b1617153f6", + "file_size_bytes": 141299883 + }, + { + "name": "Gp0119865_KO_EC GFF file", + "description": "KO_EC GFF file for Gp0119865", + "url": "https://data.microbiomedata.org/data/nmdc:mga0dqhf53/annotation/nmdc_mga0dqhf53_ko_ec.gff", + "md5_checksum": "e6262f38ee3407f7fd6e19b6c4a95454", + "id": "nmdc:e6262f38ee3407f7fd6e19b6c4a95454", + "file_size_bytes": 92350585 + }, + { + "name": "gold:Gp0452679_metabat2 bin checkm quality assessment result", + "description": "metabat2 bin checkm quality assessment result for gold:Gp0452679", + "data_object_type": "CheckM Statistics", + "url": "https://data.microbiomedata.org/data/nmdc:mga0fsjy84/MAGs/nmdc_mga0fsjy84_checkm_qa.out", + "md5_checksum": "d41d8cd98f00b204e9800998ecf8427e", + "id": "nmdc:d41d8cd98f00b204e9800998ecf8427e", + "file_size_bytes": 0 + }, + { + "name": "Gp0119865_tooShort (< 3kb) filtered contigs fasta file by metaBat2", + "description": "tooShort (< 3kb) filtered contigs fasta file by metaBat2 for Gp0119865", + "url": "https://data.microbiomedata.org/data/nmdc:mga0dqhf53/MAGs/nmdc_mga0dqhf53_bins.tooShort.fa", + "md5_checksum": "415c63515c68663b5c4ee5f69433d18d", + "id": "nmdc:415c63515c68663b5c4ee5f69433d18d", + "file_size_bytes": 218521870 + }, + { + "name": "Gp0119865_unbinned fasta file from metabat2", + "description": "unbinned fasta file from metabat2 for Gp0119865", + "url": "https://data.microbiomedata.org/data/nmdc:mga0dqhf53/MAGs/nmdc_mga0dqhf53_bins.unbinned.fa", + "md5_checksum": "46f77d71bd6fa8591feac662b2e2e777", + "id": "nmdc:46f77d71bd6fa8591feac662b2e2e777", + "file_size_bytes": 134166939 + }, + { + "name": "Gp0119865_metabat2 bin checkm quality assessment result", + "description": "metabat2 bin checkm quality assessment result for Gp0119865", + "data_object_type": "CheckM Statistics", + "url": "https://data.microbiomedata.org/data/nmdc:mga0dqhf53/MAGs/nmdc_mga0dqhf53_checkm_qa.out", + "md5_checksum": "6980ea491d86f3c78b19339e95a8a9ec", + "id": "nmdc:6980ea491d86f3c78b19339e95a8a9ec", + "file_size_bytes": 11457 + }, + { + "name": "Gp0119865_high-quality and medium-quality bins", + "description": "high-quality and medium-quality bins for Gp0119865", + "data_object_type": "Metagenome Bins", + "url": "https://data.microbiomedata.org/data/nmdc:mga0dqhf53/MAGs/nmdc_mga0dqhf53_hqmq_bin.zip", + "md5_checksum": "53ca5abe0b0fd48f88e1f4e98d291cee", + "id": "nmdc:53ca5abe0b0fd48f88e1f4e98d291cee", + "file_size_bytes": 7639037 + }, + { + "name": "Gp0119865_metabat2 bins", + "description": "metabat2 bins for Gp0119865", + "url": "https://data.microbiomedata.org/data/nmdc:mga0dqhf53/MAGs/nmdc_mga0dqhf53_metabat_bin.zip", + "md5_checksum": "811090ea1b1405947745eb47c2a30c1f", + "id": "nmdc:811090ea1b1405947745eb47c2a30c1f", + "file_size_bytes": 12410920 + }, + { + "_id": { + "$oid": "649b003d1ae706d7b5b14e1f" + }, + "description": "Assembled scaffold fasta for gold:Gp0119865", + "url": "https://data.microbiomedata.org/data/1777_95834/assembly/assembly_scaffolds.fna", + "file_size_bytes": 414029461, + "type": "nmdc:DataObject", + "id": "nmdc:8e4f20d0dadb92f5964234843aea26da", + "name": "assembly_scaffolds.fna", + "data_object_type": "Assembly Scaffolds" + }, + { + "_id": { + "$oid": "649b003d1ae706d7b5b14e20" + }, + "description": "Metagenome Contig Coverage Stats for gold:Gp0119865", + "url": "https://data.microbiomedata.org/data/1777_95834/assembly/mapping_stats.txt", + "file_size_bytes": 42042091, + "type": "nmdc:DataObject", + "id": "nmdc:efd3ed8707f5a98c522fb35375798cf4", + "name": "mapping_stats.txt", + "data_object_type": "Assembly Coverage Stats" + }, + { + "_id": { + "$oid": "649b003d1ae706d7b5b14e23" + }, + "description": "Metagenome Alignment BAM file for gold:Gp0119865", + "url": "https://data.microbiomedata.org/data/1777_95834/assembly/pairedMapped_sorted.bam", + "file_size_bytes": 6702043442, + "type": "nmdc:DataObject", + "id": "nmdc:f24f8f65b5c51d8f6663d581f8d783d2", + "name": "pairedMapped_sorted.bam", + "data_object_type": "Assembly Coverage BAM" + }, + { + "_id": { + "$oid": "649b003d1ae706d7b5b14e25" + }, + "description": "Assembled contigs fasta for gold:Gp0119865", + "url": "https://data.microbiomedata.org/data/1777_95834/assembly/assembly_contigs.fna", + "file_size_bytes": 415827886, + "type": "nmdc:DataObject", + "id": "nmdc:cbb78531961f6eab3b0f8ef2fa488801", + "name": "assembly_contigs.fna", + "data_object_type": "Assembly Contigs" + }, + { + "_id": { + "$oid": "649b003d1ae706d7b5b14e2a" + }, + "description": "Assembled AGP file for gold:Gp0119865", + "url": "https://data.microbiomedata.org/data/1777_95834/assembly/assembly.agp", + "file_size_bytes": 37274792, + "type": "nmdc:DataObject", + "id": "nmdc:7c1afaf60c66f3fdaa3cc6f02328e36e", + "name": "assembly.agp", + "data_object_type": "Assembly AGP" + }, + { + "_id": { + "$oid": "649b003d1ae706d7b5b15aa2" + }, + "id": "nmdc:0b9101c016edb57a657aeab7fd02cb4a", + "name": "1777_95834.krona.html", + "description": "Gold:Gp0119865 KRONA plot HTML file", + "url": "https://data.microbiomedata.org/1777_95834/ReadbasedAnalysis/centrifuge/1777_95834.krona.html", + "file_size_bytes": 3385, + "data_object_type": "Centrifuge Krona Plot", + "type": "nmdc:DataObject" + }, + { + "_id": { + "$oid": "649b003d1ae706d7b5b15aa7" + }, + "id": "nmdc:ffa8d1c6b7e9063d148d8e05075d30c7", + "name": "1777_95834.json", + "description": "Gold:Gp0119865 ReadbasedAnalysis result JSON file", + "url": "https://data.microbiomedata.org/1777_95834/ReadbasedAnalysis/1777_95834.json", + "file_size_bytes": 3385, + "type": "nmdc:DataObject" + }, + { + "_id": { + "$oid": "649b003f1ae706d7b5b1639f" + }, + "id": "nmdc:a0024864c1035eb569a85ce958f88281", + "name": "bins.tooShort.fa", + "description": "tooShort (< 3kb) filtered contigs fasta file by metaBat2 for gold:Gp0119865", + "file_size_bytes": 212384686, + "url": "https://data.microbiomedata.org/data/1777_95834/img_MAGs/bins.tooShort.fa", + "type": "nmdc:DataObject" + }, + { + "_id": { + "$oid": "649b003f1ae706d7b5b163a1" + }, + "id": "nmdc:9039edcbc718d080105bd62872b6ddf3", + "name": "bins.unbinned.fa", + "description": "unbinned fasta file from metabat2 for gold:Gp0119865", + "file_size_bytes": 170112919, + "url": "https://data.microbiomedata.org/data/1777_95834/img_MAGs/bins.unbinned.fa", + "type": "nmdc:DataObject" + }, + { + "_id": { + "$oid": "649b003f1ae706d7b5b163a2" + }, + "id": "nmdc:30f1b9861da4a8addf3446a7a58c99be", + "name": "gold:Gp0119865.bins.28.fa", + "description": "metabat2 binned contig file for gold:Gp0119865", + "file_size_bytes": 1711626, + "url": "https://data.microbiomedata.org/data/1777_95834/img_MAGs/metabat-bins/bins.28.fa", + "type": "nmdc:DataObject", + "data_object_type": "Metagenome Bins" + }, + { + "_id": { + "$oid": "649b003f1ae706d7b5b163a3" + }, + "id": "nmdc:f89d957497be2d5e72f537b68940a67b", + "name": "gold:Gp0119865.bins.23.fa", + "description": "metabat2 binned contig file for gold:Gp0119865", + "file_size_bytes": 1041922, + "url": "https://data.microbiomedata.org/data/1777_95834/img_MAGs/metabat-bins/bins.23.fa", + "type": "nmdc:DataObject", + "data_object_type": "Metagenome Bins" + }, + { + "_id": { + "$oid": "649b003f1ae706d7b5b163a4" + }, + "id": "nmdc:395dfbfeb842c74cedb53987f6503cf1", + "name": "gold:Gp0119865.bins.9.fa", + "description": "metabat2 binned contig file for gold:Gp0119865", + "file_size_bytes": 463796, + "url": "https://data.microbiomedata.org/data/1777_95834/img_MAGs/metabat-bins/bins.9.fa", + "type": "nmdc:DataObject", + "data_object_type": "Metagenome Bins" + }, + { + "_id": { + "$oid": "649b003f1ae706d7b5b163a5" + }, + "id": "nmdc:eae60ea4450a00417f5f5dadec62179f", + "name": "gold:Gp0119865.bins.58.fa", + "description": "metabat2 binned contig file for gold:Gp0119865", + "file_size_bytes": 257439, + "url": "https://data.microbiomedata.org/data/1777_95834/img_MAGs/metabat-bins/bins.58.fa", + "type": "nmdc:DataObject", + "data_object_type": "Metagenome Bins" + }, + { + "_id": { + "$oid": "649b003f1ae706d7b5b163a6" + }, + "id": "nmdc:e0d3ab2f6688b99eb5bbb27db414a1c5", + "name": "gold:Gp0119865.bins.41.fa", + "description": "metabat2 binned contig file for gold:Gp0119865", + "file_size_bytes": 224922, + "url": "https://data.microbiomedata.org/data/1777_95834/img_MAGs/metabat-bins/bins.41.fa", + "type": "nmdc:DataObject", + "data_object_type": "Metagenome Bins" + }, + { + "_id": { + "$oid": "649b003f1ae706d7b5b163a7" + }, + "id": "nmdc:4f9adc5389f4bd4c5eb480518539d137", + "name": "gold:Gp0119865.bins.35.fa", + "description": "metabat2 binned contig file for gold:Gp0119865", + "file_size_bytes": 339078, + "url": "https://data.microbiomedata.org/data/1777_95834/img_MAGs/metabat-bins/bins.35.fa", + "type": "nmdc:DataObject", + "data_object_type": "Metagenome Bins" + }, + { + "_id": { + "$oid": "649b003f1ae706d7b5b163a8" + }, + "id": "nmdc:c911341ad4739dffb68d37aac7563cc5", + "name": "checkm_qa.out", + "description": "metabat2 bin checkm quality assessment result for gold:Gp0119865", + "file_size_bytes": 11180, + "url": "https://data.microbiomedata.org/data/1777_95834/img_MAGs/checkm_qa.out", + "type": "nmdc:DataObject", + "data_object_type": "CheckM Statistics" + }, + { + "_id": { + "$oid": "649b003f1ae706d7b5b163a9" + }, + "id": "nmdc:643f9caae8d6c11215a8679dd810d218", + "name": "gold:Gp0119865.bins.53.fa", + "description": "metabat2 binned contig file for gold:Gp0119865", + "file_size_bytes": 304340, + "url": "https://data.microbiomedata.org/data/1777_95834/img_MAGs/metabat-bins/bins.53.fa", + "type": "nmdc:DataObject", + "data_object_type": "Metagenome Bins" + }, + { + "_id": { + "$oid": "649b003f1ae706d7b5b163aa" + }, + "id": "nmdc:a6bb1b45a95b4b9934a6c49a8cbcf2c4", + "name": "gold:Gp0119865.bins.5.fa", + "description": "metabat2 binned contig file for gold:Gp0119865", + "file_size_bytes": 558097, + "url": "https://data.microbiomedata.org/data/1777_95834/img_MAGs/metabat-bins/bins.5.fa", + "type": "nmdc:DataObject", + "data_object_type": "Metagenome Bins" + }, + { + "_id": { + "$oid": "649b003f1ae706d7b5b163ab" + }, + "id": "nmdc:2f79fd2970c535aa150fa6fff2c4845d", + "name": "gold:Gp0119865.bins.18.fa", + "description": "metabat2 binned contig file for gold:Gp0119865", + "file_size_bytes": 215030, + "url": "https://data.microbiomedata.org/data/1777_95834/img_MAGs/metabat-bins/bins.18.fa", + "type": "nmdc:DataObject", + "data_object_type": "Metagenome Bins" + }, + { + "_id": { + "$oid": "649b003f1ae706d7b5b163ac" + }, + "id": "nmdc:6c23c9fd69fddcd76234b0ba4327cbac", + "name": "gtdbtk.bac120.summary.tsv", + "description": "gtdbtk bacterial assignment result summary table for gold:Gp0119865", + "file_size_bytes": 2498, + "url": "https://data.microbiomedata.org/data/1777_95834/img_MAGs/gtdbtk_output/classify/gtdbtk.bac120.summary.tsv", + "type": "nmdc:DataObject" + }, + { + "_id": { + "$oid": "649b003f1ae706d7b5b163ad" + }, + "id": "nmdc:f431a61b95ebe5b7e0e77f856867b279", + "name": "gold:Gp0119865.bins.47.fa", + "description": "metabat2 binned contig file for gold:Gp0119865", + "file_size_bytes": 613183, + "url": "https://data.microbiomedata.org/data/1777_95834/img_MAGs/metabat-bins/bins.47.fa", + "type": "nmdc:DataObject", + "data_object_type": "Metagenome Bins" + }, + { + "_id": { + "$oid": "649b003f1ae706d7b5b163ae" + }, + "id": "nmdc:09269bf2cae21ecfd6890fd1c3014308", + "name": "gold:Gp0119865.bins.26.fa", + "description": "metabat2 binned contig file for gold:Gp0119865", + "file_size_bytes": 301715, + "url": "https://data.microbiomedata.org/data/1777_95834/img_MAGs/metabat-bins/bins.26.fa", + "type": "nmdc:DataObject", + "data_object_type": "Metagenome Bins" + }, + { + "_id": { + "$oid": "649b003f1ae706d7b5b163af" + }, + "id": "nmdc:337a42870110c6e0976c853e9dd07d3d", + "name": "gold:Gp0119865.bins.44.fa", + "description": "metabat2 binned contig file for gold:Gp0119865", + "file_size_bytes": 209337, + "url": "https://data.microbiomedata.org/data/1777_95834/img_MAGs/metabat-bins/bins.44.fa", + "type": "nmdc:DataObject", + "data_object_type": "Metagenome Bins" + }, + { + "_id": { + "$oid": "649b003f1ae706d7b5b163b0" + }, + "id": "nmdc:effc84ee1c25e0d71c2d662f14ec18f1", + "name": "gold:Gp0119865.bins.46.fa", + "description": "metabat2 binned contig file for gold:Gp0119865", + "file_size_bytes": 204126, + "url": "https://data.microbiomedata.org/data/1777_95834/img_MAGs/metabat-bins/bins.46.fa", + "type": "nmdc:DataObject", + "data_object_type": "Metagenome Bins" + }, + { + "_id": { + "$oid": "649b003f1ae706d7b5b163b1" + }, + "id": "nmdc:bcf5eb55e32b9f66c8f953a72d3a111d", + "name": "gold:Gp0119865.bins.22.fa", + "description": "metabat2 binned contig file for gold:Gp0119865", + "file_size_bytes": 697223, + "url": "https://data.microbiomedata.org/data/1777_95834/img_MAGs/metabat-bins/bins.22.fa", + "type": "nmdc:DataObject", + "data_object_type": "Metagenome Bins" + }, + { + "_id": { + "$oid": "649b003f1ae706d7b5b163b2" + }, + "id": "nmdc:42fa278e6a868b7f15a32ce5751f5357", + "name": "gold:Gp0119865.bins.29.fa", + "description": "metabat2 binned contig file for gold:Gp0119865", + "file_size_bytes": 720757, + "url": "https://data.microbiomedata.org/data/1777_95834/img_MAGs/metabat-bins/bins.29.fa", + "type": "nmdc:DataObject", + "data_object_type": "Metagenome Bins" + }, + { + "_id": { + "$oid": "649b003f1ae706d7b5b163b3" + }, + "id": "nmdc:1df46c78f61cb590063d4e16227d2613", + "name": "gold:Gp0119865.bins.25.fa", + "description": "metabat2 binned contig file for gold:Gp0119865", + "file_size_bytes": 240334, + "url": "https://data.microbiomedata.org/data/1777_95834/img_MAGs/metabat-bins/bins.25.fa", + "type": "nmdc:DataObject", + "data_object_type": "Metagenome Bins" + }, + { + "_id": { + "$oid": "649b003f1ae706d7b5b163b4" + }, + "id": "nmdc:a36c49aae66d1ba71b029e46d9c4f925", + "name": "gold:Gp0119865.bins.55.fa", + "description": "metabat2 binned contig file for gold:Gp0119865", + "file_size_bytes": 270616, + "url": "https://data.microbiomedata.org/data/1777_95834/img_MAGs/metabat-bins/bins.55.fa", + "type": "nmdc:DataObject", + "data_object_type": "Metagenome Bins" + }, + { + "_id": { + "$oid": "649b003f1ae706d7b5b163b5" + }, + "id": "nmdc:ecad99c846a7e76365db3955f008e35b", + "name": "gold:Gp0119865.bins.42.fa", + "description": "metabat2 binned contig file for gold:Gp0119865", + "file_size_bytes": 608415, + "url": "https://data.microbiomedata.org/data/1777_95834/img_MAGs/metabat-bins/bins.42.fa", + "type": "nmdc:DataObject", + "data_object_type": "Metagenome Bins" + }, + { + "_id": { + "$oid": "649b003f1ae706d7b5b163b6" + }, + "id": "nmdc:056ff4a47b2c73f0e5dd97d68e4326bc", + "name": "gold:Gp0119865.bins.38.fa", + "description": "metabat2 binned contig file for gold:Gp0119865", + "file_size_bytes": 617014, + "url": "https://data.microbiomedata.org/data/1777_95834/img_MAGs/metabat-bins/bins.38.fa", + "type": "nmdc:DataObject", + "data_object_type": "Metagenome Bins" + }, + { + "_id": { + "$oid": "649b003f1ae706d7b5b163b7" + }, + "id": "nmdc:faaafa41abfaba2aace6e5d67a588488", + "name": "gold:Gp0119865.bins.19.fa", + "description": "metabat2 binned contig file for gold:Gp0119865", + "file_size_bytes": 457059, + "url": "https://data.microbiomedata.org/data/1777_95834/img_MAGs/metabat-bins/bins.19.fa", + "type": "nmdc:DataObject", + "data_object_type": "Metagenome Bins" + }, + { + "_id": { + "$oid": "649b003f1ae706d7b5b163b8" + }, + "id": "nmdc:d6ea68c8f0b94b1d8d9cc64db76dab45", + "name": "gold:Gp0119865.bins.45.fa", + "description": "metabat2 binned contig file for gold:Gp0119865", + "file_size_bytes": 216144, + "url": "https://data.microbiomedata.org/data/1777_95834/img_MAGs/metabat-bins/bins.45.fa", + "type": "nmdc:DataObject", + "data_object_type": "Metagenome Bins" + }, + { + "_id": { + "$oid": "649b003f1ae706d7b5b163b9" + }, + "id": "nmdc:1ab7e01e9633e652a563afdcae556daf", + "name": "gold:Gp0119865.bins.30.fa", + "description": "metabat2 binned contig file for gold:Gp0119865", + "file_size_bytes": 238851, + "url": "https://data.microbiomedata.org/data/1777_95834/img_MAGs/metabat-bins/bins.30.fa", + "type": "nmdc:DataObject", + "data_object_type": "Metagenome Bins" + }, + { + "_id": { + "$oid": "649b003f1ae706d7b5b163ba" + }, + "id": "nmdc:4a121bfedd55c978469b33f9558ab384", + "name": "gold:Gp0119865.bins.50.fa", + "description": "metabat2 binned contig file for gold:Gp0119865", + "file_size_bytes": 410025, + "url": "https://data.microbiomedata.org/data/1777_95834/img_MAGs/metabat-bins/bins.50.fa", + "type": "nmdc:DataObject", + "data_object_type": "Metagenome Bins" + }, + { + "_id": { + "$oid": "649b003f1ae706d7b5b163bb" + }, + "id": "nmdc:c00b2d5fa849ef7ca7a464f8a2990ddf", + "name": "gold:Gp0119865.bins.32.fa", + "description": "metabat2 binned contig file for gold:Gp0119865", + "file_size_bytes": 331099, + "url": "https://data.microbiomedata.org/data/1777_95834/img_MAGs/metabat-bins/bins.32.fa", + "type": "nmdc:DataObject", + "data_object_type": "Metagenome Bins" + }, + { + "_id": { + "$oid": "649b003f1ae706d7b5b163bc" + }, + "id": "nmdc:227f952037e1f97e674cc5f361e14a1b", + "name": "gold:Gp0119865.bins.21.fa", + "description": "metabat2 binned contig file for gold:Gp0119865", + "file_size_bytes": 241886, + "url": "https://data.microbiomedata.org/data/1777_95834/img_MAGs/metabat-bins/bins.21.fa", + "type": "nmdc:DataObject", + "data_object_type": "Metagenome Bins" + }, + { + "_id": { + "$oid": "649b003f1ae706d7b5b163bd" + }, + "id": "nmdc:412337c8784a291e14ec9be5538ee5c5", + "name": "gold:Gp0119865.bins.11.fa", + "description": "metabat2 binned contig file for gold:Gp0119865", + "file_size_bytes": 222364, + "url": "https://data.microbiomedata.org/data/1777_95834/img_MAGs/metabat-bins/bins.11.fa", + "type": "nmdc:DataObject", + "data_object_type": "Metagenome Bins" + }, + { + "_id": { + "$oid": "649b003f1ae706d7b5b163be" + }, + "id": "nmdc:6724ad1564d831c59c1ec7555429bba9", + "name": "gold:Gp0119865.bins.49.fa", + "description": "metabat2 binned contig file for gold:Gp0119865", + "file_size_bytes": 328213, + "url": "https://data.microbiomedata.org/data/1777_95834/img_MAGs/metabat-bins/bins.49.fa", + "type": "nmdc:DataObject", + "data_object_type": "Metagenome Bins" + }, + { + "_id": { + "$oid": "649b003f1ae706d7b5b163bf" + }, + "id": "nmdc:dab14676a0b9e391219009825b17fafa", + "name": "gold:Gp0119865.bins.59.fa", + "description": "metabat2 binned contig file for gold:Gp0119865", + "file_size_bytes": 427968, + "url": "https://data.microbiomedata.org/data/1777_95834/img_MAGs/metabat-bins/bins.59.fa", + "type": "nmdc:DataObject", + "data_object_type": "Metagenome Bins" + }, + { + "_id": { + "$oid": "649b003f1ae706d7b5b163c0" + }, + "id": "nmdc:289381cf2f82263116fc36b80e742247", + "name": "gold:Gp0119865.bins.39.fa", + "description": "metabat2 binned contig file for gold:Gp0119865", + "file_size_bytes": 969526, + "url": "https://data.microbiomedata.org/data/1777_95834/img_MAGs/metabat-bins/bins.39.fa", + "type": "nmdc:DataObject", + "data_object_type": "Metagenome Bins" + }, + { + "_id": { + "$oid": "649b003f1ae706d7b5b163c1" + }, + "id": "nmdc:b113fa712d95c63e572619f5e3ef130c", + "name": "gold:Gp0119865.bins.2.fa", + "description": "metabat2 binned contig file for gold:Gp0119865", + "file_size_bytes": 373449, + "url": "https://data.microbiomedata.org/data/1777_95834/img_MAGs/metabat-bins/bins.2.fa", + "type": "nmdc:DataObject", + "data_object_type": "Metagenome Bins" + }, + { + "_id": { + "$oid": "649b003f1ae706d7b5b163c2" + }, + "id": "nmdc:9bca7b276fe5dbe86fabb9a424777220", + "name": "gold:Gp0119865.bins.54.fa", + "description": "metabat2 binned contig file for gold:Gp0119865", + "file_size_bytes": 209342, + "url": "https://data.microbiomedata.org/data/1777_95834/img_MAGs/metabat-bins/bins.54.fa", + "type": "nmdc:DataObject", + "data_object_type": "Metagenome Bins" + }, + { + "_id": { + "$oid": "649b003f1ae706d7b5b163c3" + }, + "id": "nmdc:4d81fed2cc11d3e540fa4285fd74cb37", + "name": "gold:Gp0119865.bins.7.fa", + "description": "metabat2 binned contig file for gold:Gp0119865", + "file_size_bytes": 282478, + "url": "https://data.microbiomedata.org/data/1777_95834/img_MAGs/metabat-bins/bins.7.fa", + "type": "nmdc:DataObject", + "data_object_type": "Metagenome Bins" + }, + { + "_id": { + "$oid": "649b003f1ae706d7b5b163c4" + }, + "id": "nmdc:e08f45f03980496773c6c9ad48ec3b0f", + "name": "gold:Gp0119865.bins.1.fa", + "description": "metabat2 binned contig file for gold:Gp0119865", + "file_size_bytes": 220128, + "url": "https://data.microbiomedata.org/data/1777_95834/img_MAGs/metabat-bins/bins.1.fa", + "type": "nmdc:DataObject", + "data_object_type": "Metagenome Bins" + }, + { + "_id": { + "$oid": "649b003f1ae706d7b5b163c5" + }, + "id": "nmdc:fd652ecc9b1e73f942c2b6934c614dd0", + "name": "gold:Gp0119865.bins.20.fa", + "description": "metabat2 binned contig file for gold:Gp0119865", + "file_size_bytes": 374339, + "url": "https://data.microbiomedata.org/data/1777_95834/img_MAGs/metabat-bins/bins.20.fa", + "type": "nmdc:DataObject", + "data_object_type": "Metagenome Bins" + }, + { + "_id": { + "$oid": "649b003f1ae706d7b5b163c6" + }, + "id": "nmdc:76c10823b33c022c082612b82b621bd5", + "name": "gold:Gp0119865.bins.37.fa", + "description": "metabat2 binned contig file for gold:Gp0119865", + "file_size_bytes": 2358840, + "url": "https://data.microbiomedata.org/data/1777_95834/img_MAGs/metabat-bins/bins.37.fa", + "type": "nmdc:DataObject", + "data_object_type": "Metagenome Bins" + }, + { + "_id": { + "$oid": "649b003f1ae706d7b5b163c7" + }, + "id": "nmdc:0e6b7d605b342b175cda33236913d2bf", + "name": "gold:Gp0119865.bins.51.fa", + "description": "metabat2 binned contig file for gold:Gp0119865", + "file_size_bytes": 266402, + "url": "https://data.microbiomedata.org/data/1777_95834/img_MAGs/metabat-bins/bins.51.fa", + "type": "nmdc:DataObject", + "data_object_type": "Metagenome Bins" + }, + { + "_id": { + "$oid": "649b003f1ae706d7b5b163c8" + }, + "id": "nmdc:c837026b49ecc889bd40c978af74b0a1", + "name": "gold:Gp0119865.bins.61.fa", + "description": "metabat2 binned contig file for gold:Gp0119865", + "file_size_bytes": 529420, + "url": "https://data.microbiomedata.org/data/1777_95834/img_MAGs/metabat-bins/bins.61.fa", + "type": "nmdc:DataObject", + "data_object_type": "Metagenome Bins" + }, + { + "_id": { + "$oid": "649b003f1ae706d7b5b163c9" + }, + "id": "nmdc:0b3960210bbc7a4751b805d69cae7b5e", + "name": "gold:Gp0119865.bins.13.fa", + "description": "metabat2 binned contig file for gold:Gp0119865", + "file_size_bytes": 594142, + "url": "https://data.microbiomedata.org/data/1777_95834/img_MAGs/metabat-bins/bins.13.fa", + "type": "nmdc:DataObject", + "data_object_type": "Metagenome Bins" + }, + { + "_id": { + "$oid": "649b003f1ae706d7b5b163ca" + }, + "id": "nmdc:3b04342e5a96e1ff0ffe0875a1a819af", + "name": "gold:Gp0119865.bins.56.fa", + "description": "metabat2 binned contig file for gold:Gp0119865", + "file_size_bytes": 248441, + "url": "https://data.microbiomedata.org/data/1777_95834/img_MAGs/metabat-bins/bins.56.fa", + "type": "nmdc:DataObject", + "data_object_type": "Metagenome Bins" + }, + { + "_id": { + "$oid": "649b003f1ae706d7b5b163cb" + }, + "id": "nmdc:dd0808e480cd404572f95ccc5e3a02ba", + "name": "gold:Gp0119865.bins.8.fa", + "description": "metabat2 binned contig file for gold:Gp0119865", + "file_size_bytes": 608216, + "url": "https://data.microbiomedata.org/data/1777_95834/img_MAGs/metabat-bins/bins.8.fa", + "type": "nmdc:DataObject", + "data_object_type": "Metagenome Bins" + }, + { + "_id": { + "$oid": "649b003f1ae706d7b5b163cc" + }, + "id": "nmdc:8f5d968c464eaaa2b950a35daa402001", + "name": "gold:Gp0119865.bins.52.fa", + "description": "metabat2 binned contig file for gold:Gp0119865", + "file_size_bytes": 519444, + "url": "https://data.microbiomedata.org/data/1777_95834/img_MAGs/metabat-bins/bins.52.fa", + "type": "nmdc:DataObject", + "data_object_type": "Metagenome Bins" + }, + { + "_id": { + "$oid": "649b003f1ae706d7b5b163cd" + }, + "id": "nmdc:cda2e22b064478afb2f75aa30df98fa4", + "name": "gold:Gp0119865.bins.14.fa", + "description": "metabat2 binned contig file for gold:Gp0119865", + "file_size_bytes": 204571, + "url": "https://data.microbiomedata.org/data/1777_95834/img_MAGs/metabat-bins/bins.14.fa", + "type": "nmdc:DataObject", + "data_object_type": "Metagenome Bins" + }, + { + "_id": { + "$oid": "649b003f1ae706d7b5b163ce" + }, + "id": "nmdc:11cc6cd2fded4205421850aeb5a70478", + "name": "gold:Gp0119865.bins.34.fa", + "description": "metabat2 binned contig file for gold:Gp0119865", + "file_size_bytes": 300221, + "url": "https://data.microbiomedata.org/data/1777_95834/img_MAGs/metabat-bins/bins.34.fa", + "type": "nmdc:DataObject", + "data_object_type": "Metagenome Bins" + }, + { + "_id": { + "$oid": "649b003f1ae706d7b5b163cf" + }, + "id": "nmdc:680ebc4143e51a32d601ca75b70ed1b0", + "name": "gold:Gp0119865.bins.12.fa", + "description": "metabat2 binned contig file for gold:Gp0119865", + "file_size_bytes": 704485, + "url": "https://data.microbiomedata.org/data/1777_95834/img_MAGs/metabat-bins/bins.12.fa", + "type": "nmdc:DataObject", + "data_object_type": "Metagenome Bins" + }, + { + "_id": { + "$oid": "649b003f1ae706d7b5b163d0" + }, + "id": "nmdc:7673d37ee3541910550c17b5355b4793", + "name": "gold:Gp0119865.bins.16.fa", + "description": "metabat2 binned contig file for gold:Gp0119865", + "file_size_bytes": 435848, + "url": "https://data.microbiomedata.org/data/1777_95834/img_MAGs/metabat-bins/bins.16.fa", + "type": "nmdc:DataObject", + "data_object_type": "Metagenome Bins" + }, + { + "_id": { + "$oid": "649b003f1ae706d7b5b163d1" + }, + "id": "nmdc:15b1b3ef4a5ccdb971e9db8bd2c323ef", + "name": "gold:Gp0119865.bins.60.fa", + "description": "metabat2 binned contig file for gold:Gp0119865", + "file_size_bytes": 526635, + "url": "https://data.microbiomedata.org/data/1777_95834/img_MAGs/metabat-bins/bins.60.fa", + "type": "nmdc:DataObject", + "data_object_type": "Metagenome Bins" + }, + { + "_id": { + "$oid": "649b003f1ae706d7b5b163d2" + }, + "id": "nmdc:03b9c11598742dde211d7f3dd561a3bd", + "name": "gold:Gp0119865.bins.33.fa", + "description": "metabat2 binned contig file for gold:Gp0119865", + "file_size_bytes": 237251, + "url": "https://data.microbiomedata.org/data/1777_95834/img_MAGs/metabat-bins/bins.33.fa", + "type": "nmdc:DataObject", + "data_object_type": "Metagenome Bins" + }, + { + "_id": { + "$oid": "649b003f1ae706d7b5b163d3" + }, + "id": "nmdc:11794bb19c6ae496e718f2746d58b207", + "name": "gold:Gp0119865.bins.48.fa", + "description": "metabat2 binned contig file for gold:Gp0119865", + "file_size_bytes": 275899, + "url": "https://data.microbiomedata.org/data/1777_95834/img_MAGs/metabat-bins/bins.48.fa", + "type": "nmdc:DataObject", + "data_object_type": "Metagenome Bins" + }, + { + "_id": { + "$oid": "649b003f1ae706d7b5b163d4" + }, + "id": "nmdc:d621dafa60936463b74e262189e3a3ef", + "name": "gold:Gp0119865.bins.17.fa", + "description": "metabat2 binned contig file for gold:Gp0119865", + "file_size_bytes": 273208, + "url": "https://data.microbiomedata.org/data/1777_95834/img_MAGs/metabat-bins/bins.17.fa", + "type": "nmdc:DataObject", + "data_object_type": "Metagenome Bins" + }, + { + "_id": { + "$oid": "649b003f1ae706d7b5b163d5" + }, + "id": "nmdc:779719fa5869c6e46102812e0df4699d", + "name": "gold:Gp0119865.bins.15.fa", + "description": "metabat2 binned contig file for gold:Gp0119865", + "file_size_bytes": 1798985, + "url": "https://data.microbiomedata.org/data/1777_95834/img_MAGs/metabat-bins/bins.15.fa", + "type": "nmdc:DataObject", + "data_object_type": "Metagenome Bins" + }, + { + "_id": { + "$oid": "649b003f1ae706d7b5b163d6" + }, + "id": "nmdc:eb23f6bf6868b1efd0aca5b5d9aab122", + "name": "gold:Gp0119865.bins.10.fa", + "description": "metabat2 binned contig file for gold:Gp0119865", + "file_size_bytes": 382457, + "url": "https://data.microbiomedata.org/data/1777_95834/img_MAGs/metabat-bins/bins.10.fa", + "type": "nmdc:DataObject", + "data_object_type": "Metagenome Bins" + }, + { + "_id": { + "$oid": "649b003f1ae706d7b5b163d7" + }, + "id": "nmdc:d49332b7c5821b2012190b672121c07a", + "name": "gold:Gp0119865.bins.27.fa", + "description": "metabat2 binned contig file for gold:Gp0119865", + "file_size_bytes": 356357, + "url": "https://data.microbiomedata.org/data/1777_95834/img_MAGs/metabat-bins/bins.27.fa", + "type": "nmdc:DataObject", + "data_object_type": "Metagenome Bins" + }, + { + "_id": { + "$oid": "649b003f1ae706d7b5b163d8" + }, + "id": "nmdc:b53c4854cf734572173aad4e7aeca501", + "name": "gold:Gp0119865.bins.36.fa", + "description": "metabat2 binned contig file for gold:Gp0119865", + "file_size_bytes": 464657, + "url": "https://data.microbiomedata.org/data/1777_95834/img_MAGs/metabat-bins/bins.36.fa", + "type": "nmdc:DataObject", + "data_object_type": "Metagenome Bins" + }, + { + "_id": { + "$oid": "649b003f1ae706d7b5b163da" + }, + "id": "nmdc:c6d4fc2a8a87fae21acc7fd249ed7a70", + "name": "gold:Gp0119865.bins.6.fa", + "description": "metabat2 binned contig file for gold:Gp0119865", + "file_size_bytes": 577250, + "url": "https://data.microbiomedata.org/data/1777_95834/img_MAGs/metabat-bins/bins.6.fa", + "type": "nmdc:DataObject", + "data_object_type": "Metagenome Bins" + }, + { + "_id": { + "$oid": "649b003f1ae706d7b5b163dc" + }, + "id": "nmdc:f9c306decf83d949ee5d9db3b7753766", + "name": "gold:Gp0119865.bins.31.fa", + "description": "metabat2 binned contig file for gold:Gp0119865", + "file_size_bytes": 326292, + "url": "https://data.microbiomedata.org/data/1777_95834/img_MAGs/metabat-bins/bins.31.fa", + "type": "nmdc:DataObject", + "data_object_type": "Metagenome Bins" + }, + { + "_id": { + "$oid": "649b003f1ae706d7b5b163dd" + }, + "id": "nmdc:8914b3b8f75cbb093837f9c84f2a1e27", + "name": "gold:Gp0119865.bins.40.fa", + "description": "metabat2 binned contig file for gold:Gp0119865", + "file_size_bytes": 239893, + "url": "https://data.microbiomedata.org/data/1777_95834/img_MAGs/metabat-bins/bins.40.fa", + "type": "nmdc:DataObject", + "data_object_type": "Metagenome Bins" + }, + { + "_id": { + "$oid": "649b003f1ae706d7b5b163de" + }, + "id": "nmdc:1f81cf809048a24a09acfab21ddca562", + "name": "gold:Gp0119865.bins.3.fa", + "description": "metabat2 binned contig file for gold:Gp0119865", + "file_size_bytes": 418053, + "url": "https://data.microbiomedata.org/data/1777_95834/img_MAGs/metabat-bins/bins.3.fa", + "type": "nmdc:DataObject", + "data_object_type": "Metagenome Bins" + }, + { + "_id": { + "$oid": "649b003f1ae706d7b5b163df" + }, + "id": "nmdc:c0d43028c12bfd0c2b84e927a3162cab", + "name": "gold:Gp0119865.bins.43.fa", + "description": "metabat2 binned contig file for gold:Gp0119865", + "file_size_bytes": 213045, + "url": "https://data.microbiomedata.org/data/1777_95834/img_MAGs/metabat-bins/bins.43.fa", + "type": "nmdc:DataObject", + "data_object_type": "Metagenome Bins" + }, + { + "_id": { + "$oid": "649b003f1ae706d7b5b163f7" + }, + "id": "nmdc:713cd201b24ea3ae984d700cd7ee3584", + "name": "gold:Gp0119865.bins.24.fa", + "description": "metabat2 binned contig file for gold:Gp0119865", + "file_size_bytes": 480092, + "url": "https://data.microbiomedata.org/data/1777_95834/img_MAGs/metabat-bins/bins.24.fa", + "type": "nmdc:DataObject", + "data_object_type": "Metagenome Bins" + }, + { + "_id": { + "$oid": "649b003f1ae706d7b5b16409" + }, + "id": "nmdc:e7d8e0671cf4806cd28a0a1c81caf7f9", + "name": "gold:Gp0119865.bins.57.fa", + "description": "metabat2 binned contig file for gold:Gp0119865", + "file_size_bytes": 478891, + "url": "https://data.microbiomedata.org/data/1777_95834/img_MAGs/metabat-bins/bins.57.fa", + "type": "nmdc:DataObject", + "data_object_type": "Metagenome Bins" + }, + { + "_id": { + "$oid": "649b003f1ae706d7b5b1640f" + }, + "id": "nmdc:690172fed29715bdc79b04c459dbb554", + "name": "gold:Gp0119865.bins.4.fa", + "description": "metabat2 binned contig file for gold:Gp0119865", + "file_size_bytes": 315402, + "url": "https://data.microbiomedata.org/data/1777_95834/img_MAGs/metabat-bins/bins.4.fa", + "type": "nmdc:DataObject", + "data_object_type": "Metagenome Bins" + }, + { + "_id": { + "$oid": "649b003f1ae706d7b5b16c9a" + }, + "description": "EC TSV File for gold:Gp0119865", + "url": "https://data.microbiomedata.org/1777_95834/img_annotation/Ga0482150_ec.tsv", + "md5_checksum": "e34ef5954fd12e5fdab0bae08414b001", + "file_size_bytes": 3385, + "id": "nmdc:e34ef5954fd12e5fdab0bae08414b001", + "name": "gold:Gp0119865_EC TSV File", + "data_object_type": "Annotation Enzyme Commission", + "type": "nmdc:DataObject" + }, + { + "_id": { + "$oid": "649b003f1ae706d7b5b16c9b" + }, + "description": "KO TSV File for gold:Gp0119865", + "url": "https://data.microbiomedata.org/1777_95834/img_annotation/Ga0482150_ko.tsv", + "md5_checksum": "a58d0ad82bcd82747443a05133e86d4a", + "file_size_bytes": 3385, + "id": "nmdc:a58d0ad82bcd82747443a05133e86d4a", + "name": "gold:Gp0119865_KO TSV File", + "data_object_type": "Annotation KEGG Orthology", + "type": "nmdc:DataObject" + }, + { + "_id": { + "$oid": "649b003f1ae706d7b5b16c9d" + }, + "description": "Functional annotation GFF file for gold:Gp0119865", + "url": "https://data.microbiomedata.org/1777_95834/img_annotation/Ga0482150_functional_annotation.gff", + "md5_checksum": "1f5ea69be09b6f086c09af241d6b9df5", + "file_size_bytes": 3385, + "id": "nmdc:1f5ea69be09b6f086c09af241d6b9df5", + "name": "gold:Gp0119865_Functional annotation GFF file", + "data_object_type": "Functional Annotation GFF", + "type": "nmdc:DataObject" + }, + { + "_id": { + "$oid": "649b003f1ae706d7b5b16c9e" + }, + "description": "Structural annotation GFF file for gold:Gp0119865", + "url": "https://data.microbiomedata.org/1777_95834/img_annotation/Ga0482150_structural_annotation.gff", + "md5_checksum": "9f23cbec97a89ce503564d05b061f37f", + "file_size_bytes": 3385, + "id": "nmdc:9f23cbec97a89ce503564d05b061f37f", + "name": "gold:Gp0119865_Structural annotation GFF file", + "data_object_type": "Structural Annotation GFF", + "type": "nmdc:DataObject" + }, + { + "_id": { + "$oid": "649b003f1ae706d7b5b16c9f" + }, + "description": "Protein FAA for gold:Gp0119865", + "url": "https://data.microbiomedata.org/1777_95834/img_annotation/Ga0482150_proteins.faa", + "md5_checksum": "73e37fa7c4626d47580d5705cb656383", + "file_size_bytes": 3385, + "id": "nmdc:73e37fa7c4626d47580d5705cb656383", + "name": "gold:Gp0119865_Protein FAA", + "data_object_type": "Annotation Amino Acid FASTA", + "type": "nmdc:DataObject" + } + ], + "mags_activity_set": [ + { + "_id": { + "$oid": "649b0052ec087f6bbab34704" + }, + "has_input": [ + "nmdc:55b3394e2c5819bcece9ee95ea7ed820", + "nmdc:f4f66aba79bd6d06b56f1f7ca60c7a9a", + "nmdc:7fac51eabb726e277d2c81155117551b" + ], + "too_short_contig_num": 475163, + "part_of": [ + "nmdc:mga0dqhf53" + ], + "binned_contig_num": 9953, + "git_url": "https://github.com/microbiomedata/metaG/releases/tag/0.1", + "has_output": [ + "nmdc:d41d8cd98f00b204e9800998ecf8427e", + "nmdc:415c63515c68663b5c4ee5f69433d18d", + "nmdc:46f77d71bd6fa8591feac662b2e2e777", + "nmdc:6980ea491d86f3c78b19339e95a8a9ec", + "nmdc:53ca5abe0b0fd48f88e1f4e98d291cee", + "nmdc:811090ea1b1405947745eb47c2a30c1f" + ], + "was_informed_by": "gold:Gp0119865", + "input_contig_num": 553223, + "id": "nmdc:fb034382963da665da210554890d65a6", + "execution_resource": "NERSC-Cori", + "name": "MAGs Analysis Activity for nmdc:mga0dqhf53", + "mags_list": [ + { + "number_of_contig": 187, + "completeness": 37.07, + "bin_name": "bins.1", + "gene_count": 1000, + "bin_quality": "LQ", + "gtdbtk_species": "", + "gtdbtk_order": "", + "num_16s": 0, + "gtdbtk_family": "", + "gtdbtk_domain": "", + "contamination": 0.0, + "gtdbtk_class": "", + "gtdbtk_phylum": "", + "num_5s": 0, + "num_23s": 0, + "gtdbtk_genus": "", + "num_t_rna": 6 + }, + { + "number_of_contig": 82, + "completeness": 15.52, + "bin_name": "bins.10", + "gene_count": 380, + "bin_quality": "LQ", + "gtdbtk_species": "", + "gtdbtk_order": "", + "num_16s": 0, + "gtdbtk_family": "", + "gtdbtk_domain": "", + "contamination": 0.0, + "gtdbtk_class": "", + "gtdbtk_phylum": "", + "num_5s": 0, + "num_23s": 0, + "gtdbtk_genus": "", + "num_t_rna": 10 + }, + { + "number_of_contig": 238, + "completeness": 76.74, + "bin_name": "bins.11", + "gene_count": 1732, + "bin_quality": "MQ", + "gtdbtk_species": "", + "gtdbtk_order": "Flavobacteriales", + "num_16s": 0, + "gtdbtk_family": "GCA-002722245", + "gtdbtk_domain": "Bacteria", + "contamination": 0.27, + "gtdbtk_class": "Bacteroidia", + "gtdbtk_phylum": "Bacteroidota", + "num_5s": 0, + "num_23s": 0, + "gtdbtk_genus": "", + "num_t_rna": 27 + }, + { + "number_of_contig": 238, + "completeness": 10.34, + "bin_name": "bins.12", + "gene_count": 1060, + "bin_quality": "LQ", + "gtdbtk_species": "", + "gtdbtk_order": "", + "num_16s": 0, + "gtdbtk_family": "", + "gtdbtk_domain": "", + "contamination": 1.72, + "gtdbtk_class": "", + "gtdbtk_phylum": "", + "num_5s": 0, + "num_23s": 0, + "gtdbtk_genus": "", + "num_t_rna": 4 + }, + { + "number_of_contig": 258, + "completeness": 53.35, + "bin_name": "bins.13", + "gene_count": 1611, + "bin_quality": "MQ", + "gtdbtk_species": "", + "gtdbtk_order": "NS11-12g", + "num_16s": 0, + "gtdbtk_family": "UBA955", + "gtdbtk_domain": "Bacteria", + "contamination": 0.69, + "gtdbtk_class": "Bacteroidia", + "gtdbtk_phylum": "Bacteroidota", + "num_5s": 1, + "num_23s": 0, + "gtdbtk_genus": "", + "num_t_rna": 20 + }, + { + "number_of_contig": 249, + "completeness": 23.42, + "bin_name": "bins.14", + "gene_count": 1229, + "bin_quality": "LQ", + "gtdbtk_species": "", + "gtdbtk_order": "", + "num_16s": 0, + "gtdbtk_family": "", + "gtdbtk_domain": "", + "contamination": 0.0, + "gtdbtk_class": "", + "gtdbtk_phylum": "", + "num_5s": 0, + "num_23s": 0, + "gtdbtk_genus": "", + "num_t_rna": 9 + }, + { + "number_of_contig": 133, + "completeness": 98.91, + "bin_name": "bins.15", + "gene_count": 3136, + "bin_quality": "MQ", + "gtdbtk_species": "", + "gtdbtk_order": "Synechococcales", + "num_16s": 0, + "gtdbtk_family": "Cyanobiaceae", + "gtdbtk_domain": "Bacteria", + "contamination": 0.54, + "gtdbtk_class": "Cyanobacteriia", + "gtdbtk_phylum": "Cyanobacteria", + "num_5s": 3, + "num_23s": 0, + "gtdbtk_genus": "PCC7001", + "num_t_rna": 41 + }, + { + "number_of_contig": 498, + "completeness": 81.03, + "bin_name": "bins.16", + "gene_count": 3528, + "bin_quality": "LQ", + "gtdbtk_species": "", + "gtdbtk_order": "", + "num_16s": 0, + "gtdbtk_family": "", + "gtdbtk_domain": "", + "contamination": 41.97, + "gtdbtk_class": "", + "gtdbtk_phylum": "", + "num_5s": 1, + "num_23s": 0, + "gtdbtk_genus": "", + "num_t_rna": 49 + }, + { + "number_of_contig": 68, + "completeness": 33.62, + "bin_name": "bins.17", + "gene_count": 580, + "bin_quality": "LQ", + "gtdbtk_species": "", + "gtdbtk_order": "", + "num_16s": 0, + "gtdbtk_family": "", + "gtdbtk_domain": "", + "contamination": 1.72, + "gtdbtk_class": "", + "gtdbtk_phylum": "", + "num_5s": 0, + "num_23s": 0, + "gtdbtk_genus": "", + "num_t_rna": 3 + }, + { + "number_of_contig": 105, + "completeness": 21.55, + "bin_name": "bins.18", + "gene_count": 663, + "bin_quality": "LQ", + "gtdbtk_species": "", + "gtdbtk_order": "", + "num_16s": 0, + "gtdbtk_family": "", + "gtdbtk_domain": "", + "contamination": 0.0, + "gtdbtk_class": "", + "gtdbtk_phylum": "", + "num_5s": 0, + "num_23s": 0, + "gtdbtk_genus": "", + "num_t_rna": 9 + }, + { + "number_of_contig": 46, + "completeness": 3.29, + "bin_name": "bins.19", + "gene_count": 383, + "bin_quality": "LQ", + "gtdbtk_species": "", + "gtdbtk_order": "", + "num_16s": 0, + "gtdbtk_family": "", + "gtdbtk_domain": "", + "contamination": 0.58, + "gtdbtk_class": "", + "gtdbtk_phylum": "", + "num_5s": 0, + "num_23s": 0, + "gtdbtk_genus": "", + "num_t_rna": 7 + }, + { + "number_of_contig": 50, + "completeness": 10.5, + "bin_name": "bins.2", + "gene_count": 329, + "bin_quality": "LQ", + "gtdbtk_species": "", + "gtdbtk_order": "", + "num_16s": 0, + "gtdbtk_family": "", + "gtdbtk_domain": "", + "contamination": 0.0, + "gtdbtk_class": "", + "gtdbtk_phylum": "", + "num_5s": 0, + "num_23s": 0, + "gtdbtk_genus": "", + "num_t_rna": 7 + }, + { + "number_of_contig": 135, + "completeness": 53.62, + "bin_name": "bins.20", + "gene_count": 860, + "bin_quality": "LQ", + "gtdbtk_species": "", + "gtdbtk_order": "", + "num_16s": 0, + "gtdbtk_family": "", + "gtdbtk_domain": "", + "contamination": 13.79, + "gtdbtk_class": "", + "gtdbtk_phylum": "", + "num_5s": 0, + "num_23s": 0, + "gtdbtk_genus": "", + "num_t_rna": 17 + }, + { + "number_of_contig": 550, + "completeness": 85.38, + "bin_name": "bins.21", + "gene_count": 3573, + "bin_quality": "LQ", + "gtdbtk_species": "", + "gtdbtk_order": "", + "num_16s": 0, + "gtdbtk_family": "", + "gtdbtk_domain": "", + "contamination": 47.41, + "gtdbtk_class": "", + "gtdbtk_phylum": "", + "num_5s": 0, + "num_23s": 0, + "gtdbtk_genus": "", + "num_t_rna": 39 + }, + { + "number_of_contig": 112, + "completeness": 57.38, + "bin_name": "bins.22", + "gene_count": 935, + "bin_quality": "MQ", + "gtdbtk_species": "", + "gtdbtk_order": "Actinomycetales", + "num_16s": 1, + "gtdbtk_family": "Microbacteriaceae", + "gtdbtk_domain": "Bacteria", + "contamination": 1.56, + "gtdbtk_class": "Actinobacteria", + "gtdbtk_phylum": "Actinobacteriota", + "num_5s": 1, + "num_23s": 1, + "gtdbtk_genus": "Rhodoluna", + "num_t_rna": 28 + }, + { + "number_of_contig": 46, + "completeness": 0.0, + "bin_name": "bins.23", + "gene_count": 294, + "bin_quality": "LQ", + "gtdbtk_species": "", + "gtdbtk_order": "", + "num_16s": 0, + "gtdbtk_family": "", + "gtdbtk_domain": "", + "contamination": 0.0, + "gtdbtk_class": "", + "gtdbtk_phylum": "", + "num_5s": 0, + "num_23s": 0, + "gtdbtk_genus": "", + "num_t_rna": 3 + }, + { + "number_of_contig": 61, + "completeness": 46.87, + "bin_name": "bins.24", + "gene_count": 708, + "bin_quality": "LQ", + "gtdbtk_species": "", + "gtdbtk_order": "", + "num_16s": 0, + "gtdbtk_family": "", + "gtdbtk_domain": "", + "contamination": 0.0, + "gtdbtk_class": "", + "gtdbtk_phylum": "", + "num_5s": 0, + "num_23s": 0, + "gtdbtk_genus": "", + "num_t_rna": 16 + }, + { + "number_of_contig": 326, + "completeness": 55.66, + "bin_name": "bins.25", + "gene_count": 1808, + "bin_quality": "LQ", + "gtdbtk_species": "", + "gtdbtk_order": "", + "num_16s": 0, + "gtdbtk_family": "", + "gtdbtk_domain": "", + "contamination": 17.06, + "gtdbtk_class": "", + "gtdbtk_phylum": "", + "num_5s": 0, + "num_23s": 0, + "gtdbtk_genus": "", + "num_t_rna": 23 + }, + { + "number_of_contig": 185, + "completeness": 38.95, + "bin_name": "bins.26", + "gene_count": 1300, + "bin_quality": "LQ", + "gtdbtk_species": "", + "gtdbtk_order": "", + "num_16s": 0, + "gtdbtk_family": "", + "gtdbtk_domain": "", + "contamination": 10.69, + "gtdbtk_class": "", + "gtdbtk_phylum": "", + "num_5s": 0, + "num_23s": 0, + "gtdbtk_genus": "", + "num_t_rna": 24 + }, + { + "number_of_contig": 240, + "completeness": 26.72, + "bin_name": "bins.27", + "gene_count": 1147, + "bin_quality": "LQ", + "gtdbtk_species": "", + "gtdbtk_order": "", + "num_16s": 0, + "gtdbtk_family": "", + "gtdbtk_domain": "", + "contamination": 3.45, + "gtdbtk_class": "", + "gtdbtk_phylum": "", + "num_5s": 0, + "num_23s": 0, + "gtdbtk_genus": "", + "num_t_rna": 6 + }, + { + "number_of_contig": 134, + "completeness": 0.0, + "bin_name": "bins.28", + "gene_count": 861, + "bin_quality": "LQ", + "gtdbtk_species": "", + "gtdbtk_order": "", + "num_16s": 0, + "gtdbtk_family": "", + "gtdbtk_domain": "", + "contamination": 0.0, + "gtdbtk_class": "", + "gtdbtk_phylum": "", + "num_5s": 3, + "num_23s": 0, + "gtdbtk_genus": "", + "num_t_rna": 12 + }, + { + "number_of_contig": 292, + "completeness": 54.47, + "bin_name": "bins.29", + "gene_count": 1942, + "bin_quality": "LQ", + "gtdbtk_species": "", + "gtdbtk_order": "", + "num_16s": 0, + "gtdbtk_family": "", + "gtdbtk_domain": "", + "contamination": 18.97, + "gtdbtk_class": "", + "gtdbtk_phylum": "", + "num_5s": 0, + "num_23s": 0, + "gtdbtk_genus": "", + "num_t_rna": 41 + }, + { + "number_of_contig": 104, + "completeness": 23.28, + "bin_name": "bins.3", + "gene_count": 743, + "bin_quality": "LQ", + "gtdbtk_species": "", + "gtdbtk_order": "", + "num_16s": 0, + "gtdbtk_family": "", + "gtdbtk_domain": "", + "contamination": 1.72, + "gtdbtk_class": "", + "gtdbtk_phylum": "", + "num_5s": 1, + "num_23s": 1, + "gtdbtk_genus": "", + "num_t_rna": 17 + }, + { + "number_of_contig": 91, + "completeness": 6.14, + "bin_name": "bins.30", + "gene_count": 602, + "bin_quality": "LQ", + "gtdbtk_species": "", + "gtdbtk_order": "", + "num_16s": 0, + "gtdbtk_family": "", + "gtdbtk_domain": "", + "contamination": 0.0, + "gtdbtk_class": "", + "gtdbtk_phylum": "", + "num_5s": 0, + "num_23s": 0, + "gtdbtk_genus": "", + "num_t_rna": 14 + }, + { + "number_of_contig": 167, + "completeness": 44.83, + "bin_name": "bins.31", + "gene_count": 1058, + "bin_quality": "LQ", + "gtdbtk_species": "", + "gtdbtk_order": "", + "num_16s": 0, + "gtdbtk_family": "", + "gtdbtk_domain": "", + "contamination": 0.0, + "gtdbtk_class": "", + "gtdbtk_phylum": "", + "num_5s": 1, + "num_23s": 1, + "gtdbtk_genus": "", + "num_t_rna": 23 + }, + { + "number_of_contig": 53, + "completeness": 14.35, + "bin_name": "bins.32", + "gene_count": 463, + "bin_quality": "LQ", + "gtdbtk_species": "", + "gtdbtk_order": "", + "num_16s": 1, + "gtdbtk_family": "", + "gtdbtk_domain": "", + "contamination": 2.81, + "gtdbtk_class": "", + "gtdbtk_phylum": "", + "num_5s": 0, + "num_23s": 0, + "gtdbtk_genus": "", + "num_t_rna": 12 + }, + { + "number_of_contig": 116, + "completeness": 55.93, + "bin_name": "bins.33", + "gene_count": 945, + "bin_quality": "MQ", + "gtdbtk_species": "Pontimonas sp000754445", + "gtdbtk_order": "Actinomycetales", + "num_16s": 1, + "gtdbtk_family": "Microbacteriaceae", + "gtdbtk_domain": "Bacteria", + "contamination": 0.0, + "gtdbtk_class": "Actinobacteria", + "gtdbtk_phylum": "Actinobacteriota", + "num_5s": 1, + "num_23s": 1, + "gtdbtk_genus": "Pontimonas", + "num_t_rna": 17 + }, + { + "number_of_contig": 117, + "completeness": 59.66, + "bin_name": "bins.34", + "gene_count": 878, + "bin_quality": "MQ", + "gtdbtk_species": "", + "gtdbtk_order": "CSP1-4", + "num_16s": 0, + "gtdbtk_family": "UBA10416", + "gtdbtk_domain": "Bacteria", + "contamination": 4.31, + "gtdbtk_class": "Ellin6529", + "gtdbtk_phylum": "Chloroflexota_A", + "num_5s": 0, + "num_23s": 0, + "gtdbtk_genus": "UBA10416", + "num_t_rna": 31 + }, + { + "number_of_contig": 92, + "completeness": 39.04, + "bin_name": "bins.35", + "gene_count": 830, + "bin_quality": "LQ", + "gtdbtk_species": "", + "gtdbtk_order": "", + "num_16s": 0, + "gtdbtk_family": "", + "gtdbtk_domain": "", + "contamination": 0.59, + "gtdbtk_class": "", + "gtdbtk_phylum": "", + "num_5s": 0, + "num_23s": 0, + "gtdbtk_genus": "", + "num_t_rna": 21 + }, + { + "number_of_contig": 182, + "completeness": 88.68, + "bin_name": "bins.36", + "gene_count": 1802, + "bin_quality": "MQ", + "gtdbtk_species": "", + "gtdbtk_order": "Chitinophagales", + "num_16s": 1, + "gtdbtk_family": "Chitinophagaceae", + "gtdbtk_domain": "Bacteria", + "contamination": 3.45, + "gtdbtk_class": "Bacteroidia", + "gtdbtk_phylum": "Bacteroidota", + "num_5s": 0, + "num_23s": 0, + "gtdbtk_genus": "UBA8137", + "num_t_rna": 27 + }, + { + "number_of_contig": 265, + "completeness": 77.07, + "bin_name": "bins.37", + "gene_count": 2027, + "bin_quality": "MQ", + "gtdbtk_species": "", + "gtdbtk_order": "Cytophagales", + "num_16s": 1, + "gtdbtk_family": "Cyclobacteriaceae", + "gtdbtk_domain": "Bacteria", + "contamination": 0.94, + "gtdbtk_class": "Bacteroidia", + "gtdbtk_phylum": "Bacteroidota", + "num_5s": 0, + "num_23s": 0, + "gtdbtk_genus": "Algoriphagus", + "num_t_rna": 26 + }, + { + "number_of_contig": 90, + "completeness": 27.31, + "bin_name": "bins.38", + "gene_count": 616, + "bin_quality": "LQ", + "gtdbtk_species": "", + "gtdbtk_order": "", + "num_16s": 0, + "gtdbtk_family": "", + "gtdbtk_domain": "", + "contamination": 0.49, + "gtdbtk_class": "", + "gtdbtk_phylum": "", + "num_5s": 0, + "num_23s": 0, + "gtdbtk_genus": "", + "num_t_rna": 9 + }, + { + "number_of_contig": 198, + "completeness": 10.42, + "bin_name": "bins.39", + "gene_count": 1159, + "bin_quality": "LQ", + "gtdbtk_species": "", + "gtdbtk_order": "", + "num_16s": 0, + "gtdbtk_family": "", + "gtdbtk_domain": "", + "contamination": 20.6, + "gtdbtk_class": "", + "gtdbtk_phylum": "", + "num_5s": 0, + "num_23s": 0, + "gtdbtk_genus": "", + "num_t_rna": 12 + }, + { + "number_of_contig": 179, + "completeness": 73.64, + "bin_name": "bins.4", + "gene_count": 1612, + "bin_quality": "MQ", + "gtdbtk_species": "", + "gtdbtk_order": "Burkholderiales", + "num_16s": 1, + "gtdbtk_family": "Burkholderiaceae", + "gtdbtk_domain": "Bacteria", + "contamination": 0.3, + "gtdbtk_class": "Gammaproteobacteria", + "gtdbtk_phylum": "Proteobacteria", + "num_5s": 0, + "num_23s": 0, + "gtdbtk_genus": "UBA2463", + "num_t_rna": 26 + }, + { + "number_of_contig": 47, + "completeness": 0.0, + "bin_name": "bins.40", + "gene_count": 308, + "bin_quality": "LQ", + "gtdbtk_species": "", + "gtdbtk_order": "", + "num_16s": 0, + "gtdbtk_family": "", + "gtdbtk_domain": "", + "contamination": 0.0, + "gtdbtk_class": "", + "gtdbtk_phylum": "", + "num_5s": 0, + "num_23s": 0, + "gtdbtk_genus": "", + "num_t_rna": 0 + }, + { + "number_of_contig": 77, + "completeness": 0.0, + "bin_name": "bins.41", + "gene_count": 417, + "bin_quality": "LQ", + "gtdbtk_species": "", + "gtdbtk_order": "", + "num_16s": 0, + "gtdbtk_family": "", + "gtdbtk_domain": "", + "contamination": 0.0, + "gtdbtk_class": "", + "gtdbtk_phylum": "", + "num_5s": 0, + "num_23s": 4, + "gtdbtk_genus": "", + "num_t_rna": 2 + }, + { + "number_of_contig": 197, + "completeness": 31.99, + "bin_name": "bins.42", + "gene_count": 1179, + "bin_quality": "LQ", + "gtdbtk_species": "", + "gtdbtk_order": "", + "num_16s": 0, + "gtdbtk_family": "", + "gtdbtk_domain": "", + "contamination": 0.0, + "gtdbtk_class": "", + "gtdbtk_phylum": "", + "num_5s": 0, + "num_23s": 0, + "gtdbtk_genus": "", + "num_t_rna": 7 + }, + { + "number_of_contig": 360, + "completeness": 53.21, + "bin_name": "bins.43", + "gene_count": 1829, + "bin_quality": "MQ", + "gtdbtk_species": "", + "gtdbtk_order": "Phycisphaerales", + "num_16s": 1, + "gtdbtk_family": "SM1A02", + "gtdbtk_domain": "Bacteria", + "contamination": 0.57, + "gtdbtk_class": "Phycisphaerae", + "gtdbtk_phylum": "Planctomycetota", + "num_5s": 0, + "num_23s": 0, + "gtdbtk_genus": "", + "num_t_rna": 21 + }, + { + "number_of_contig": 129, + "completeness": 66.9, + "bin_name": "bins.44", + "gene_count": 1232, + "bin_quality": "MQ", + "gtdbtk_species": "", + "gtdbtk_order": "Microtrichales", + "num_16s": 1, + "gtdbtk_family": "Ilumatobacteraceae", + "gtdbtk_domain": "Bacteria", + "contamination": 1.72, + "gtdbtk_class": "Acidimicrobiia", + "gtdbtk_phylum": "Actinobacteriota", + "num_5s": 1, + "num_23s": 1, + "gtdbtk_genus": "UBA3006", + "num_t_rna": 20 + }, + { + "number_of_contig": 46, + "completeness": 0.0, + "bin_name": "bins.45", + "gene_count": 234, + "bin_quality": "LQ", + "gtdbtk_species": "", + "gtdbtk_order": "", + "num_16s": 0, + "gtdbtk_family": "", + "gtdbtk_domain": "", + "contamination": 0.0, + "gtdbtk_class": "", + "gtdbtk_phylum": "", + "num_5s": 0, + "num_23s": 0, + "gtdbtk_genus": "", + "num_t_rna": 1 + }, + { + "number_of_contig": 333, + "completeness": 38.6, + "bin_name": "bins.46", + "gene_count": 2093, + "bin_quality": "LQ", + "gtdbtk_species": "", + "gtdbtk_order": "", + "num_16s": 0, + "gtdbtk_family": "", + "gtdbtk_domain": "", + "contamination": 0.0, + "gtdbtk_class": "", + "gtdbtk_phylum": "", + "num_5s": 0, + "num_23s": 0, + "gtdbtk_genus": "", + "num_t_rna": 20 + }, + { + "number_of_contig": 60, + "completeness": 22.55, + "bin_name": "bins.47", + "gene_count": 392, + "bin_quality": "LQ", + "gtdbtk_species": "", + "gtdbtk_order": "", + "num_16s": 0, + "gtdbtk_family": "", + "gtdbtk_domain": "", + "contamination": 0.0, + "gtdbtk_class": "", + "gtdbtk_phylum": "", + "num_5s": 0, + "num_23s": 0, + "gtdbtk_genus": "", + "num_t_rna": 18 + }, + { + "number_of_contig": 96, + "completeness": 29.31, + "bin_name": "bins.48", + "gene_count": 788, + "bin_quality": "LQ", + "gtdbtk_species": "", + "gtdbtk_order": "", + "num_16s": 0, + "gtdbtk_family": "", + "gtdbtk_domain": "", + "contamination": 0.0, + "gtdbtk_class": "", + "gtdbtk_phylum": "", + "num_5s": 0, + "num_23s": 0, + "gtdbtk_genus": "", + "num_t_rna": 18 + }, + { + "number_of_contig": 163, + "completeness": 18.76, + "bin_name": "bins.49", + "gene_count": 704, + "bin_quality": "LQ", + "gtdbtk_species": "", + "gtdbtk_order": "", + "num_16s": 0, + "gtdbtk_family": "", + "gtdbtk_domain": "", + "contamination": 0.0, + "gtdbtk_class": "", + "gtdbtk_phylum": "", + "num_5s": 0, + "num_23s": 0, + "gtdbtk_genus": "", + "num_t_rna": 9 + }, + { + "number_of_contig": 271, + "completeness": 20.3, + "bin_name": "bins.5", + "gene_count": 1571, + "bin_quality": "LQ", + "gtdbtk_species": "", + "gtdbtk_order": "", + "num_16s": 0, + "gtdbtk_family": "", + "gtdbtk_domain": "", + "contamination": 0.0, + "gtdbtk_class": "", + "gtdbtk_phylum": "", + "num_5s": 0, + "num_23s": 0, + "gtdbtk_genus": "", + "num_t_rna": 14 + }, + { + "number_of_contig": 81, + "completeness": 20.86, + "bin_name": "bins.50", + "gene_count": 569, + "bin_quality": "LQ", + "gtdbtk_species": "", + "gtdbtk_order": "", + "num_16s": 0, + "gtdbtk_family": "", + "gtdbtk_domain": "", + "contamination": 1.75, + "gtdbtk_class": "", + "gtdbtk_phylum": "", + "num_5s": 0, + "num_23s": 0, + "gtdbtk_genus": "", + "num_t_rna": 18 + }, + { + "number_of_contig": 241, + "completeness": 49.67, + "bin_name": "bins.51", + "gene_count": 1293, + "bin_quality": "LQ", + "gtdbtk_species": "", + "gtdbtk_order": "", + "num_16s": 1, + "gtdbtk_family": "", + "gtdbtk_domain": "", + "contamination": 0.07, + "gtdbtk_class": "", + "gtdbtk_phylum": "", + "num_5s": 0, + "num_23s": 0, + "gtdbtk_genus": "", + "num_t_rna": 21 + }, + { + "number_of_contig": 40, + "completeness": 0.0, + "bin_name": "bins.52", + "gene_count": 469, + "bin_quality": "LQ", + "gtdbtk_species": "", + "gtdbtk_order": "", + "num_16s": 0, + "gtdbtk_family": "", + "gtdbtk_domain": "", + "contamination": 0.0, + "gtdbtk_class": "", + "gtdbtk_phylum": "", + "num_5s": 0, + "num_23s": 0, + "gtdbtk_genus": "", + "num_t_rna": 0 + }, + { + "number_of_contig": 13, + "completeness": 40.62, + "bin_name": "bins.53", + "gene_count": 640, + "bin_quality": "LQ", + "gtdbtk_species": "", + "gtdbtk_order": "", + "num_16s": 1, + "gtdbtk_family": "", + "gtdbtk_domain": "", + "contamination": 0.0, + "gtdbtk_class": "", + "gtdbtk_phylum": "", + "num_5s": 1, + "num_23s": 1, + "gtdbtk_genus": "", + "num_t_rna": 10 + }, + { + "number_of_contig": 139, + "completeness": 48.75, + "bin_name": "bins.54", + "gene_count": 910, + "bin_quality": "LQ", + "gtdbtk_species": "", + "gtdbtk_order": "", + "num_16s": 0, + "gtdbtk_family": "", + "gtdbtk_domain": "", + "contamination": 0.18, + "gtdbtk_class": "", + "gtdbtk_phylum": "", + "num_5s": 0, + "num_23s": 0, + "gtdbtk_genus": "", + "num_t_rna": 12 + }, + { + "number_of_contig": 265, + "completeness": 37.94, + "bin_name": "bins.55", + "gene_count": 1400, + "bin_quality": "LQ", + "gtdbtk_species": "", + "gtdbtk_order": "", + "num_16s": 0, + "gtdbtk_family": "", + "gtdbtk_domain": "", + "contamination": 0.0, + "gtdbtk_class": "", + "gtdbtk_phylum": "", + "num_5s": 0, + "num_23s": 0, + "gtdbtk_genus": "", + "num_t_rna": 16 + }, + { + "number_of_contig": 101, + "completeness": 49.28, + "bin_name": "bins.56", + "gene_count": 624, + "bin_quality": "LQ", + "gtdbtk_species": "", + "gtdbtk_order": "", + "num_16s": 0, + "gtdbtk_family": "", + "gtdbtk_domain": "", + "contamination": 6.14, + "gtdbtk_class": "", + "gtdbtk_phylum": "", + "num_5s": 0, + "num_23s": 0, + "gtdbtk_genus": "", + "num_t_rna": 33 + }, + { + "number_of_contig": 78, + "completeness": 75.84, + "bin_name": "bins.57", + "gene_count": 809, + "bin_quality": "MQ", + "gtdbtk_species": "", + "gtdbtk_order": "RFN20", + "num_16s": 0, + "gtdbtk_family": "CAG-826", + "gtdbtk_domain": "Bacteria", + "contamination": 1.12, + "gtdbtk_class": "Bacilli", + "gtdbtk_phylum": "Firmicutes", + "num_5s": 1, + "num_23s": 0, + "gtdbtk_genus": "UBA970", + "num_t_rna": 15 + }, + { + "number_of_contig": 215, + "completeness": 86.47, + "bin_name": "bins.58", + "gene_count": 2498, + "bin_quality": "MQ", + "gtdbtk_species": "", + "gtdbtk_order": "Caulobacterales", + "num_16s": 1, + "gtdbtk_family": "Caulobacteraceae", + "gtdbtk_domain": "Bacteria", + "contamination": 0.6, + "gtdbtk_class": "Alphaproteobacteria", + "gtdbtk_phylum": "Proteobacteria", + "num_5s": 1, + "num_23s": 1, + "gtdbtk_genus": "Phenylobacterium", + "num_t_rna": 34 + }, + { + "number_of_contig": 65, + "completeness": 50.86, + "bin_name": "bins.59", + "gene_count": 542, + "bin_quality": "MQ", + "gtdbtk_species": "", + "gtdbtk_order": "Nanopelagicales", + "num_16s": 0, + "gtdbtk_family": "Nanopelagicaceae", + "gtdbtk_domain": "Bacteria", + "contamination": 1.72, + "gtdbtk_class": "Actinobacteria", + "gtdbtk_phylum": "Actinobacteriota", + "num_5s": 0, + "num_23s": 0, + "gtdbtk_genus": "Planktophila", + "num_t_rna": 15 + }, + { + "number_of_contig": 58, + "completeness": 18.5, + "bin_name": "bins.6", + "gene_count": 328, + "bin_quality": "LQ", + "gtdbtk_species": "", + "gtdbtk_order": "", + "num_16s": 0, + "gtdbtk_family": "", + "gtdbtk_domain": "", + "contamination": 0.0, + "gtdbtk_class": "", + "gtdbtk_phylum": "", + "num_5s": 0, + "num_23s": 0, + "gtdbtk_genus": "", + "num_t_rna": 4 + }, + { + "number_of_contig": 70, + "completeness": 25.73, + "bin_name": "bins.60", + "gene_count": 471, + "bin_quality": "LQ", + "gtdbtk_species": "", + "gtdbtk_order": "", + "num_16s": 1, + "gtdbtk_family": "", + "gtdbtk_domain": "", + "contamination": 0.0, + "gtdbtk_class": "", + "gtdbtk_phylum": "", + "num_5s": 0, + "num_23s": 0, + "gtdbtk_genus": "", + "num_t_rna": 11 + }, + { + "number_of_contig": 12, + "completeness": 19.72, + "bin_name": "bins.61", + "gene_count": 277, + "bin_quality": "LQ", + "gtdbtk_species": "", + "gtdbtk_order": "", + "num_16s": 0, + "gtdbtk_family": "", + "gtdbtk_domain": "", + "contamination": 0.0, + "gtdbtk_class": "", + "gtdbtk_phylum": "", + "num_5s": 0, + "num_23s": 0, + "gtdbtk_genus": "", + "num_t_rna": 8 + }, + { + "number_of_contig": 453, + "completeness": 74.76, + "bin_name": "bins.62", + "gene_count": 3186, + "bin_quality": "MQ", + "gtdbtk_species": "", + "gtdbtk_order": "Chitinophagales", + "num_16s": 1, + "gtdbtk_family": "Saprospiraceae", + "gtdbtk_domain": "Bacteria", + "contamination": 4.32, + "gtdbtk_class": "Bacteroidia", + "gtdbtk_phylum": "Bacteroidota", + "num_5s": 0, + "num_23s": 1, + "gtdbtk_genus": "Haliscomenobacter", + "num_t_rna": 34 + }, + { + "number_of_contig": 226, + "completeness": 32.76, + "bin_name": "bins.63", + "gene_count": 1294, + "bin_quality": "LQ", + "gtdbtk_species": "", + "gtdbtk_order": "", + "num_16s": 0, + "gtdbtk_family": "", + "gtdbtk_domain": "", + "contamination": 0.0, + "gtdbtk_class": "", + "gtdbtk_phylum": "", + "num_5s": 0, + "num_23s": 0, + "gtdbtk_genus": "", + "num_t_rna": 16 + }, + { + "number_of_contig": 95, + "completeness": 55.09, + "bin_name": "bins.7", + "gene_count": 979, + "bin_quality": "MQ", + "gtdbtk_species": "", + "gtdbtk_order": "Nanopelagicales", + "num_16s": 0, + "gtdbtk_family": "Nanopelagicaceae", + "gtdbtk_domain": "Bacteria", + "contamination": 1.75, + "gtdbtk_class": "Actinobacteria", + "gtdbtk_phylum": "Actinobacteriota", + "num_5s": 0, + "num_23s": 0, + "gtdbtk_genus": "Planktophila", + "num_t_rna": 28 + }, + { + "number_of_contig": 89, + "completeness": 6.9, + "bin_name": "bins.8", + "gene_count": 475, + "bin_quality": "LQ", + "gtdbtk_species": "", + "gtdbtk_order": "", + "num_16s": 0, + "gtdbtk_family": "", + "gtdbtk_domain": "", + "contamination": 0.0, + "gtdbtk_class": "", + "gtdbtk_phylum": "", + "num_5s": 0, + "num_23s": 0, + "gtdbtk_genus": "", + "num_t_rna": 4 + }, + { + "number_of_contig": 46, + "completeness": 5.17, + "bin_name": "bins.9", + "gene_count": 237, + "bin_quality": "LQ", + "gtdbtk_species": "", + "gtdbtk_order": "", + "num_16s": 0, + "gtdbtk_family": "", + "gtdbtk_domain": "", + "contamination": 0.0, + "gtdbtk_class": "", + "gtdbtk_phylum": "", + "num_5s": 0, + "num_23s": 0, + "gtdbtk_genus": "", + "num_t_rna": 1 + } + ], + "unbinned_contig_num": 68107, + "started_at_time": "2021-10-11T02:23:25Z", + "type": "nmdc:MAGsAnalysisActivity", + "ended_at_time": "2021-10-11T06:17:45+00:00" + } + ], + "metagenome_annotation_activity_set": [ + { + "_id": { + "$oid": "649b005bbf2caae0415ef9a9" + }, + "has_input": [ + "nmdc:55b3394e2c5819bcece9ee95ea7ed820" + ], + "part_of": [ + "nmdc:mga0dqhf53" + ], + "git_url": "https://github.com/microbiomedata/metaG/releases/tag/0.1", + "has_output": [ + "nmdc:fc8bef54f2e80752641a2f316fea248c", + "nmdc:cbf87e8c596a5af525de057ff1762d0f", + "nmdc:7fac51eabb726e277d2c81155117551b", + "nmdc:500246e7472eb2b51065a4d5e041d0b3", + "nmdc:d87802e97e09c9ec0670bf0ae93d6b55", + "nmdc:1cf22adcc77e40fa24bf1880ea4864a4", + "nmdc:77ba77ffe195895a69ae422d97a8c630", + "nmdc:86866d379e9193d501e1f6b1f72d2f21", + "nmdc:ad0427ce52177c7c2ea4fe05baa9b280", + "nmdc:b1a70cb1e731b5245f008e703f4f32f4", + "nmdc:0c0d49ca5d4c004cbfe8f1b1617153f6", + "nmdc:e6262f38ee3407f7fd6e19b6c4a95454" + ], + "was_informed_by": "gold:Gp0119865", + "id": "nmdc:fb034382963da665da210554890d65a6", + "execution_resource": "NERSC-Cori", + "name": "Annotation Activity for nmdc:mga0dqhf53", + "started_at_time": "2021-10-11T02:23:25Z", + "type": "nmdc:MetagenomeAnnotationActivity", + "ended_at_time": "2021-10-11T06:17:45+00:00" + } + ], + "metagenome_assembly_set": [ + { + "_id": { + "$oid": "649b005f2ca5ee4adb139f8b" + }, + "has_input": [ + "nmdc:bef640a10fcdf22c758158c572557903" + ], + "part_of": [ + "nmdc:mga0dqhf53" + ], + "ctg_logsum": 2285365, + "scaf_logsum": 2410612, + "gap_pct": 0.05264, + "git_url": "https://github.com/microbiomedata/metaG/releases/tag/0.1", + "has_output": [ + "nmdc:55b3394e2c5819bcece9ee95ea7ed820", + "nmdc:e3b7f40adfb21ec645f62d482f591f4c", + "nmdc:6d699d3aa5732307706f2e55779e8b9a", + "nmdc:fb4363a7625400ef200cf4596c782682", + "nmdc:f4f66aba79bd6d06b56f1f7ca60c7a9a" + ], + "asm_score": 8.229, + "was_informed_by": "gold:Gp0119865", + "ctg_powsum": 272951, + "scaf_max": 316993, + "id": "nmdc:fb034382963da665da210554890d65a6", + "scaf_powsum": 290106, + "execution_resource": "NERSC-Cori", + "contigs": 553664, + "name": "Assembly Activity for nmdc:mga0dqhf53", + "ctg_max": 316993, + "gc_std": 0.1172, + "contig_bp": 394822204, + "gc_avg": 0.47242, + "started_at_time": "2021-10-11T02:23:25Z", + "scaf_bp": 395030144, + "type": "nmdc:MetagenomeAssembly", + "scaffolds": 533944, + "ended_at_time": "2021-10-11T06:17:45+00:00", + "ctg_l50": 972, + "ctg_l90": 304, + "ctg_n50": 81110, + "ctg_n90": 413923, + "scaf_l50": 1066, + "scaf_l90": 305, + "scaf_n50": 72686, + "scaf_n90": 395204, + "scaf_l_gt50k": 2621293, + "scaf_n_gt50k": 35, + "scaf_pct_gt50k": 0.66356784 + } + ], + "omics_processing_set": [ + { + "_id": { + "$oid": "649b009773e8249959349b99" + }, + "id": "nmdc:omprc-11-5sf7gh95", + "name": "Fresh water microbial communities from Ohio, USA - Utica-2 Time Series HT 2014_7_11", + "description": "Microbial controls on biogeochemical cycling in deep subsurface shale carbon reservoirs", + "has_input": [ + "nmdc:bsm-11-464tkc75" + ], + "has_output": [ + "jgi:560df3750d878540fd6fe1d1" + ], + "part_of": [ + "nmdc:sty-11-8fb6t785" + ], + "add_date": "2015-08-15", + "mod_date": "2021-06-15", + "ncbi_project_name": "Deep subsurface shale carbon reservoir microbial communities from Ohio, USA - Utica-2 Time Series HT 2014_7_11", + "omics_type": { + "has_raw_value": "Metagenome" + }, + "principal_investigator": { + "has_raw_value": "Kelly Wrighton" + }, + "processing_institution": "JGI", + "type": "nmdc:OmicsProcessing", + "gold_sequencing_project_identifiers": [ + "gold:Gp0119865" + ] + } + ], + "read_qc_analysis_activity_set": [ + { + "_id": { + "$oid": "649b009d6bdd4fd20273c853" + }, + "has_input": [ + "nmdc:dfad92b111bf26c4746cc7a9bda354ad" + ], + "part_of": [ + "nmdc:mga0dqhf53" + ], + "git_url": "https://github.com/microbiomedata/metaG/releases/tag/0.1", + "has_output": [ + "nmdc:bef640a10fcdf22c758158c572557903", + "nmdc:c3c7612d03968cf94263bc4247e349c8" + ], + "was_informed_by": "gold:Gp0119865", + "input_read_count": 71602402, + "output_read_bases": 10500628757, + "id": "nmdc:fb034382963da665da210554890d65a6", + "execution_resource": "NERSC-Cori", + "input_read_bases": 10811962702, + "name": "Read QC Activity for nmdc:mga0dqhf53", + "output_read_count": 70797856, + "started_at_time": "2021-10-11T02:23:25Z", + "type": "nmdc:ReadQCAnalysisActivity", + "ended_at_time": "2021-10-11T06:17:45+00:00" + } + ], + "read_based_taxonomy_analysis_activity_set": [ + { + "_id": { + "$oid": "649b009bff710ae353f8cf1a" + }, + "has_input": [ + "nmdc:bef640a10fcdf22c758158c572557903" + ], + "git_url": "https://github.com/microbiomedata/metaG/releases/tag/0.1", + "has_output": [ + "nmdc:c584ed2932bd4a7a2f9dd3ec6e276666", + "nmdc:26ea3d0f6789a8b4c069d95b4683223a", + "nmdc:8aa4029ecd774b56ff0a609b88f58491", + "nmdc:a1cb4436b54b8103c2023fff44c71605", + "nmdc:e67bddc790aa53a4c80a19aa40b5ceee", + "nmdc:20e6e923e0648ef56c5d48e0c3f407d5", + "nmdc:3dc48571d8b3899fb7482dba34e66127", + "nmdc:7b62dd0dab192394469b049096d614c5", + "nmdc:ec0dc7ee83062fcc43b212662be9b802" + ], + "was_informed_by": "gold:Gp0119865", + "id": "nmdc:fb034382963da665da210554890d65a6", + "execution_resource": "NERSC-Cori", + "name": "ReadBased Analysis Activity for nmdc:mga0dqhf53", + "started_at_time": "2021-10-11T02:23:25Z", + "type": "nmdc:ReadbasedAnalysis", + "ended_at_time": "2021-10-11T06:17:45+00:00" + } + ] + }, + { + "data_object_set": [ + { + "description": "Raw sequencer read data", + "file_size_bytes": 14205672089, + "type": "nmdc:DataObject", + "id": "jgi:560df3660d878540fd6fe1b9", + "name": "9567.5.137550.GGCTAC.fastq.gz" + }, + { + "name": "Gp0119868_Filtered Reads", + "description": "Filtered Reads for Gp0119868", + "data_object_type": "Filtered Sequencing Reads", + "url": "https://data.microbiomedata.org/data/nmdc:mga0tc4144/qa/nmdc_mga0tc4144_filtered.fastq.gz", + "md5_checksum": "f593f3aee7fc5760af7c039bdf62ba57", + "id": "nmdc:f593f3aee7fc5760af7c039bdf62ba57", + "file_size_bytes": 13424641315 + }, + { + "name": "Gp0119868_Filtered Stats", + "description": "Filtered Stats for Gp0119868", + "data_object_type": "QC Statistics", + "url": "https://data.microbiomedata.org/data/nmdc:mga0tc4144/qa/nmdc_mga0tc4144_filterStats.txt", + "md5_checksum": "8aec820c1193dad3bb21ba4084d43a20", + "id": "nmdc:8aec820c1193dad3bb21ba4084d43a20", + "file_size_bytes": 295 + }, + { + "name": "Gp0119868_Gottcha2 TSV report", + "description": "Gottcha2 TSV report for Gp0119868", + "url": "https://data.microbiomedata.org/data/nmdc:mga0tc4144/ReadbasedAnalysis/nmdc_mga0tc4144_gottcha2_report.tsv", + "md5_checksum": "fe347ec8d4621fca2e07d8fe6cfcea55", + "id": "nmdc:fe347ec8d4621fca2e07d8fe6cfcea55", + "file_size_bytes": 14290 + }, + { + "name": "Gp0119868_Gottcha2 full TSV report", + "description": "Gottcha2 full TSV report for Gp0119868", + "url": "https://data.microbiomedata.org/data/nmdc:mga0tc4144/ReadbasedAnalysis/nmdc_mga0tc4144_gottcha2_report_full.tsv", + "md5_checksum": "73e3da946aae23b311d50688663ef847", + "id": "nmdc:73e3da946aae23b311d50688663ef847", + "file_size_bytes": 1419852 + }, + { + "name": "Gp0119868_Gottcha2 Krona HTML report", + "description": "Gottcha2 Krona HTML report for Gp0119868", + "data_object_type": "GOTTCHA2 Krona Plot", + "url": "https://data.microbiomedata.org/data/nmdc:mga0tc4144/ReadbasedAnalysis/nmdc_mga0tc4144_gottcha2_krona.html", + "md5_checksum": "abf78291d78d1c78eafe96d6cfb062aa", + "id": "nmdc:abf78291d78d1c78eafe96d6cfb062aa", + "file_size_bytes": 271183 + }, + { + "name": "Gp0119868_Centrifuge classification TSV report", + "description": "Centrifuge classification TSV report for Gp0119868", + "data_object_type": "Centrifuge Taxonomic Classification", + "url": "https://data.microbiomedata.org/data/nmdc:mga0tc4144/ReadbasedAnalysis/nmdc_mga0tc4144_centrifuge_classification.tsv", + "md5_checksum": "089f1a254b33552d2c8465e7e8de1186", + "id": "nmdc:089f1a254b33552d2c8465e7e8de1186", + "file_size_bytes": 9770944349 + }, + { + "name": "Gp0119868_Centrifuge TSV report", + "description": "Centrifuge TSV report for Gp0119868", + "data_object_type": "Centrifuge Classification Report", + "url": "https://data.microbiomedata.org/data/nmdc:mga0tc4144/ReadbasedAnalysis/nmdc_mga0tc4144_centrifuge_report.tsv", + "md5_checksum": "c01688a45c35e13a9ebeea38e04fb8ef", + "id": "nmdc:c01688a45c35e13a9ebeea38e04fb8ef", + "file_size_bytes": 269215 + }, + { + "name": "Gp0119868_Centrifuge Krona HTML report", + "description": "Centrifuge Krona HTML report for Gp0119868", + "data_object_type": "Centrifuge Krona Plot", + "url": "https://data.microbiomedata.org/data/nmdc:mga0tc4144/ReadbasedAnalysis/nmdc_mga0tc4144_centrifuge_krona.html", + "md5_checksum": "d42c6a1ade379bfacea8a2abc762b45a", + "id": "nmdc:d42c6a1ade379bfacea8a2abc762b45a", + "file_size_bytes": 2371627 + }, + { + "name": "Gp0119868_Kraken classification TSV report", + "description": "Kraken classification TSV report for Gp0119868", + "data_object_type": "Kraken2 Taxonomic Classification", + "url": "https://data.microbiomedata.org/data/nmdc:mga0tc4144/ReadbasedAnalysis/nmdc_mga0tc4144_kraken2_classification.tsv", + "md5_checksum": "606adc1b169bba287f1d231635fe756f", + "id": "nmdc:606adc1b169bba287f1d231635fe756f", + "file_size_bytes": 7874246629 + }, + { + "name": "Gp0119868_Kraken2 TSV report", + "description": "Kraken2 TSV report for Gp0119868", + "data_object_type": "Kraken2 Classification Report", + "url": "https://data.microbiomedata.org/data/nmdc:mga0tc4144/ReadbasedAnalysis/nmdc_mga0tc4144_kraken2_report.tsv", + "md5_checksum": "9efccbd1e5bbb4d10ce94c2e8d2b33aa", + "id": "nmdc:9efccbd1e5bbb4d10ce94c2e8d2b33aa", + "file_size_bytes": 809866 + }, + { + "name": "Gp0119868_Kraken2 Krona HTML report", + "description": "Kraken2 Krona HTML report for Gp0119868", + "data_object_type": "Kraken2 Krona Plot", + "url": "https://data.microbiomedata.org/data/nmdc:mga0tc4144/ReadbasedAnalysis/nmdc_mga0tc4144_kraken2_krona.html", + "md5_checksum": "0c7920c75acfbd3d3b2d654e3b2cd9ec", + "id": "nmdc:0c7920c75acfbd3d3b2d654e3b2cd9ec", + "file_size_bytes": 4803443 + }, + { + "name": "Gp0119868_Assembled contigs fasta", + "description": "Assembled contigs fasta for Gp0119868", + "data_object_type": "Assembly Contigs", + "url": "https://data.microbiomedata.org/data/nmdc:mga0tc4144/assembly/nmdc_mga0tc4144_contigs.fna", + "md5_checksum": "9a56bc21f20b6e1bc5edce3a88c469c9", + "id": "nmdc:9a56bc21f20b6e1bc5edce3a88c469c9", + "file_size_bytes": 774444994 + }, + { + "name": "Gp0119868_Assembled scaffold fasta", + "description": "Assembled scaffold fasta for Gp0119868", + "data_object_type": "Assembly Scaffolds", + "url": "https://data.microbiomedata.org/data/nmdc:mga0tc4144/assembly/nmdc_mga0tc4144_scaffolds.fna", + "md5_checksum": "9b82712bfcf3013e35482c9e68b9095b", + "id": "nmdc:9b82712bfcf3013e35482c9e68b9095b", + "file_size_bytes": 770398214 + }, + { + "name": "Gp0119868_Metagenome Contig Coverage Stats", + "description": "Metagenome Contig Coverage Stats for Gp0119868", + "url": "https://data.microbiomedata.org/data/nmdc:mga0tc4144/assembly/nmdc_mga0tc4144_covstats.txt", + "md5_checksum": "76f1b90c58a3672147718346c40e81c9", + "id": "nmdc:76f1b90c58a3672147718346c40e81c9", + "file_size_bytes": 108014654 + }, + { + "name": "Gp0119868_Assembled AGP file", + "description": "Assembled AGP file for Gp0119868", + "data_object_type": "Assembly AGP", + "url": "https://data.microbiomedata.org/data/nmdc:mga0tc4144/assembly/nmdc_mga0tc4144_assembly.agp", + "md5_checksum": "0fe116e4fc9c24bf39029cafe4d2256d", + "id": "nmdc:0fe116e4fc9c24bf39029cafe4d2256d", + "file_size_bytes": 101536366 + }, + { + "name": "Gp0119868_Metagenome Alignment BAM file", + "description": "Metagenome Alignment BAM file for Gp0119868", + "data_object_type": "Assembly Coverage BAM", + "url": "https://data.microbiomedata.org/data/nmdc:mga0tc4144/assembly/nmdc_mga0tc4144_pairedMapped_sorted.bam", + "md5_checksum": "bda4bbf191832c401a301364ceaa99b9", + "id": "nmdc:bda4bbf191832c401a301364ceaa99b9", + "file_size_bytes": 14466110638 + }, + { + "name": "Gp0119868_Protein FAA", + "description": "Protein FAA for Gp0119868", + "data_object_type": "Annotation Amino Acid FASTA", + "url": "https://data.microbiomedata.org/data/nmdc:mga0tc4144/annotation/nmdc_mga0tc4144_proteins.faa", + "md5_checksum": "7b83b2503caf6ef8448c50fc0a6c7199", + "id": "nmdc:7b83b2503caf6ef8448c50fc0a6c7199", + "file_size_bytes": 431262619 + }, + { + "name": "Gp0119868_Structural annotation GFF file", + "description": "Structural annotation GFF file for Gp0119868", + "data_object_type": "Structural Annotation GFF", + "url": "https://data.microbiomedata.org/data/nmdc:mga0tc4144/annotation/nmdc_mga0tc4144_structural_annotation.gff", + "md5_checksum": "b6e6beea1a1e2bc5fd3defefb4329096", + "id": "nmdc:b6e6beea1a1e2bc5fd3defefb4329096", + "file_size_bytes": 2558 + }, + { + "name": "Gp0119868_Functional annotation GFF file", + "description": "Functional annotation GFF file for Gp0119868", + "data_object_type": "Functional Annotation GFF", + "url": "https://data.microbiomedata.org/data/nmdc:mga0tc4144/annotation/nmdc_mga0tc4144_functional_annotation.gff", + "md5_checksum": "e30b169c1a0529b20903cf960b162d84", + "id": "nmdc:e30b169c1a0529b20903cf960b162d84", + "file_size_bytes": 467362409 + }, + { + "name": "Gp0119868_KO TSV file", + "description": "KO TSV file for Gp0119868", + "data_object_type": "Annotation KEGG Orthology", + "url": "https://data.microbiomedata.org/data/nmdc:mga0tc4144/annotation/nmdc_mga0tc4144_ko.tsv", + "md5_checksum": "16ffd084e72cab2403866158e5e6a6cb", + "id": "nmdc:16ffd084e72cab2403866158e5e6a6cb", + "file_size_bytes": 59932484 + }, + { + "name": "Gp0119868_EC TSV file", + "description": "EC TSV file for Gp0119868", + "data_object_type": "Annotation Enzyme Commission", + "url": "https://data.microbiomedata.org/data/nmdc:mga0tc4144/annotation/nmdc_mga0tc4144_ec.tsv", + "md5_checksum": "dbc18fc26ae72645371911c8519549fa", + "id": "nmdc:dbc18fc26ae72645371911c8519549fa", + "file_size_bytes": 38628489 + }, + { + "name": "Gp0119868_COG GFF file", + "description": "COG GFF file for Gp0119868", + "url": "https://data.microbiomedata.org/data/nmdc:mga0tc4144/annotation/nmdc_mga0tc4144_cog.gff", + "md5_checksum": "3d67230b15a3119104d0a96c47e3a51e", + "id": "nmdc:3d67230b15a3119104d0a96c47e3a51e", + "file_size_bytes": 282914278 + }, + { + "name": "Gp0119868_PFAM GFF file", + "description": "PFAM GFF file for Gp0119868", + "url": "https://data.microbiomedata.org/data/nmdc:mga0tc4144/annotation/nmdc_mga0tc4144_pfam.gff", + "md5_checksum": "69e4ba430ae17ea5e82d2ec1a8595ffd", + "id": "nmdc:69e4ba430ae17ea5e82d2ec1a8595ffd", + "file_size_bytes": 225868620 + }, + { + "name": "Gp0119868_TigrFam GFF file", + "description": "TigrFam GFF file for Gp0119868", + "url": "https://data.microbiomedata.org/data/nmdc:mga0tc4144/annotation/nmdc_mga0tc4144_tigrfam.gff", + "md5_checksum": "cb48f118d9c407c955482ec145d1d705", + "id": "nmdc:cb48f118d9c407c955482ec145d1d705", + "file_size_bytes": 30514501 + }, + { + "name": "Gp0119868_SMART GFF file", + "description": "SMART GFF file for Gp0119868", + "url": "https://data.microbiomedata.org/data/nmdc:mga0tc4144/annotation/nmdc_mga0tc4144_smart.gff", + "md5_checksum": "8c880ce9cade09ededdca6d52db621d1", + "id": "nmdc:8c880ce9cade09ededdca6d52db621d1", + "file_size_bytes": 65495127 + }, + { + "name": "Gp0119868_SuperFam GFF file", + "description": "SuperFam GFF file for Gp0119868", + "url": "https://data.microbiomedata.org/data/nmdc:mga0tc4144/annotation/nmdc_mga0tc4144_supfam.gff", + "md5_checksum": "74b62c9e6ab9bd7d843b64b436361743", + "id": "nmdc:74b62c9e6ab9bd7d843b64b436361743", + "file_size_bytes": 312351237 + }, + { + "name": "Gp0119868_Cath FunFam GFF file", + "description": "Cath FunFam GFF file for Gp0119868", + "url": "https://data.microbiomedata.org/data/nmdc:mga0tc4144/annotation/nmdc_mga0tc4144_cath_funfam.gff", + "md5_checksum": "0dd92986c02650f665649b79cd95c3ad", + "id": "nmdc:0dd92986c02650f665649b79cd95c3ad", + "file_size_bytes": 271944809 + }, + { + "name": "Gp0119868_KO_EC GFF file", + "description": "KO_EC GFF file for Gp0119868", + "url": "https://data.microbiomedata.org/data/nmdc:mga0tc4144/annotation/nmdc_mga0tc4144_ko_ec.gff", + "md5_checksum": "cc9ac778259f4a97efb179d199561126", + "id": "nmdc:cc9ac778259f4a97efb179d199561126", + "file_size_bytes": 189920850 + }, + { + "name": "gold:Gp0452679_metabat2 bin checkm quality assessment result", + "description": "metabat2 bin checkm quality assessment result for gold:Gp0452679", + "data_object_type": "CheckM Statistics", + "url": "https://data.microbiomedata.org/data/nmdc:mga0fsjy84/MAGs/nmdc_mga0fsjy84_checkm_qa.out", + "md5_checksum": "d41d8cd98f00b204e9800998ecf8427e", + "id": "nmdc:d41d8cd98f00b204e9800998ecf8427e", + "file_size_bytes": 0 + }, + { + "name": "Gp0119868_tooShort (< 3kb) filtered contigs fasta file by metaBat2", + "description": "tooShort (< 3kb) filtered contigs fasta file by metaBat2 for Gp0119868", + "url": "https://data.microbiomedata.org/data/nmdc:mga0tc4144/MAGs/nmdc_mga0tc4144_bins.tooShort.fa", + "md5_checksum": "498ce8a4b78ff4c3df69922bf1be3bad", + "id": "nmdc:498ce8a4b78ff4c3df69922bf1be3bad", + "file_size_bytes": 576995325 + }, + { + "name": "Gp0119868_unbinned fasta file from metabat2", + "description": "unbinned fasta file from metabat2 for Gp0119868", + "url": "https://data.microbiomedata.org/data/nmdc:mga0tc4144/MAGs/nmdc_mga0tc4144_bins.unbinned.fa", + "md5_checksum": "7d43fe4cf26508913cf3787719268e19", + "id": "nmdc:7d43fe4cf26508913cf3787719268e19", + "file_size_bytes": 169030507 + }, + { + "name": "Gp0119868_metabat2 bin checkm quality assessment result", + "description": "metabat2 bin checkm quality assessment result for Gp0119868", + "data_object_type": "CheckM Statistics", + "url": "https://data.microbiomedata.org/data/nmdc:mga0tc4144/MAGs/nmdc_mga0tc4144_checkm_qa.out", + "md5_checksum": "f3094463068795ea860272240e7d16fa", + "id": "nmdc:f3094463068795ea860272240e7d16fa", + "file_size_bytes": 4446 + }, + { + "name": "Gp0119868_high-quality and medium-quality bins", + "description": "high-quality and medium-quality bins for Gp0119868", + "data_object_type": "Metagenome Bins", + "url": "https://data.microbiomedata.org/data/nmdc:mga0tc4144/MAGs/nmdc_mga0tc4144_hqmq_bin.zip", + "md5_checksum": "de95068612befe1bdf0e2dc4dee6f08c", + "id": "nmdc:de95068612befe1bdf0e2dc4dee6f08c", + "file_size_bytes": 3537823 + }, + { + "name": "Gp0119868_metabat2 bins", + "description": "metabat2 bins for Gp0119868", + "url": "https://data.microbiomedata.org/data/nmdc:mga0tc4144/MAGs/nmdc_mga0tc4144_metabat_bin.zip", + "md5_checksum": "b81a8b6ba415f515a85a7ea04f797c4f", + "id": "nmdc:b81a8b6ba415f515a85a7ea04f797c4f", + "file_size_bytes": 5092909 + }, + { + "_id": { + "$oid": "649b003d1ae706d7b5b14e2e" + }, + "description": "Assembled scaffold fasta for gold:Gp0119868", + "url": "https://data.microbiomedata.org/data/1777_95837/assembly/assembly_scaffolds.fna", + "file_size_bytes": 763685039, + "type": "nmdc:DataObject", + "id": "nmdc:9915d590bb2e432df4013627d3ff73f0", + "name": "assembly_scaffolds.fna", + "data_object_type": "Assembly Scaffolds" + }, + { + "_id": { + "$oid": "649b003d1ae706d7b5b14e31" + }, + "description": "Assembled contigs fasta for gold:Gp0119868", + "url": "https://data.microbiomedata.org/data/1777_95837/assembly/assembly_contigs.fna", + "file_size_bytes": 767726834, + "type": "nmdc:DataObject", + "id": "nmdc:e30d13f5b218da7692120ac3351f9569", + "name": "assembly_contigs.fna", + "data_object_type": "Assembly Contigs" + }, + { + "_id": { + "$oid": "649b003d1ae706d7b5b14e32" + }, + "description": "Metagenome Contig Coverage Stats for gold:Gp0119868", + "url": "https://data.microbiomedata.org/data/1777_95837/assembly/mapping_stats.txt", + "file_size_bytes": 101296494, + "type": "nmdc:DataObject", + "id": "nmdc:27a234e0d1b77e892b983efdc21dde66", + "name": "mapping_stats.txt", + "data_object_type": "Assembly Coverage Stats" + }, + { + "_id": { + "$oid": "649b003d1ae706d7b5b14e35" + }, + "description": "Metagenome Alignment BAM file for gold:Gp0119868", + "url": "https://data.microbiomedata.org/data/1777_95837/assembly/pairedMapped_sorted.bam", + "file_size_bytes": 14326033080, + "type": "nmdc:DataObject", + "id": "nmdc:a46cbd68501ec2d9bfcb554f5dbd2750", + "name": "pairedMapped_sorted.bam", + "data_object_type": "Assembly Coverage BAM" + }, + { + "_id": { + "$oid": "649b003d1ae706d7b5b14e38" + }, + "description": "Assembled AGP file for gold:Gp0119868", + "url": "https://data.microbiomedata.org/data/1777_95837/assembly/assembly.agp", + "file_size_bytes": 88089936, + "type": "nmdc:DataObject", + "id": "nmdc:694a426536347a0a53ee0de1d4b26011", + "name": "assembly.agp", + "data_object_type": "Assembly AGP" + }, + { + "_id": { + "$oid": "649b003d1ae706d7b5b15abc" + }, + "id": "nmdc:f983a93833b133e11f59c068eb3f6802", + "name": "1777_95837.krona.html", + "description": "Gold:Gp0119868 KRONA plot HTML file", + "url": "https://data.microbiomedata.org/1777_95837/ReadbasedAnalysis/centrifuge/1777_95837.krona.html", + "file_size_bytes": 3385, + "data_object_type": "Centrifuge Krona Plot", + "type": "nmdc:DataObject" + }, + { + "_id": { + "$oid": "649b003d1ae706d7b5b15ac6" + }, + "id": "nmdc:2f8e21bd35b3295920684697ab9d9d99", + "name": "1777_95837.json", + "description": "Gold:Gp0119868 ReadbasedAnalysis result JSON file", + "url": "https://data.microbiomedata.org/1777_95837/ReadbasedAnalysis/1777_95837.json", + "file_size_bytes": 3385, + "type": "nmdc:DataObject" + }, + { + "_id": { + "$oid": "649b003f1ae706d7b5b1645a" + }, + "id": "nmdc:56c2a3473c823da7f93263db79d0a976", + "name": "gtdbtk.bac120.summary.tsv", + "description": "gtdbtk bacterial assignment result summary table for gold:Gp0119868", + "file_size_bytes": 1020, + "url": "https://data.microbiomedata.org/data/1777_95837/img_MAGs/gtdbtk_output/classify/gtdbtk.bac120.summary.tsv", + "type": "nmdc:DataObject" + }, + { + "_id": { + "$oid": "649b003f1ae706d7b5b1645b" + }, + "id": "nmdc:00ea11105e8835daa53d5d76da4d0ec3", + "name": "bins.tooShort.fa", + "description": "tooShort (< 3kb) filtered contigs fasta file by metaBat2 for gold:Gp0119868", + "file_size_bytes": 561853584, + "url": "https://data.microbiomedata.org/data/1777_95837/img_MAGs/bins.tooShort.fa", + "type": "nmdc:DataObject" + }, + { + "_id": { + "$oid": "649b003f1ae706d7b5b1645d" + }, + "id": "nmdc:efe781987888e5e0d88a115b2398481f", + "name": "checkm_qa.out", + "description": "metabat2 bin checkm quality assessment result for gold:Gp0119868", + "file_size_bytes": 3933, + "url": "https://data.microbiomedata.org/data/1777_95837/img_MAGs/checkm_qa.out", + "type": "nmdc:DataObject", + "data_object_type": "CheckM Statistics" + }, + { + "_id": { + "$oid": "649b003f1ae706d7b5b1645e" + }, + "id": "nmdc:5b572d0e8c69fc955ed84513262fde5e", + "name": "bins.unbinned.fa", + "description": "unbinned fasta file from metabat2 for gold:Gp0119868", + "file_size_bytes": 177594798, + "url": "https://data.microbiomedata.org/data/1777_95837/img_MAGs/bins.unbinned.fa", + "type": "nmdc:DataObject" + }, + { + "_id": { + "$oid": "649b003f1ae706d7b5b16460" + }, + "id": "nmdc:fc18ecf3be9cf2ee5827e2b49688778b", + "name": "gold:Gp0119868.bins.1.fa", + "description": "metabat2 binned contig file for gold:Gp0119868", + "file_size_bytes": 3866355, + "url": "https://data.microbiomedata.org/data/1777_95837/img_MAGs/metabat-bins/bins.1.fa", + "type": "nmdc:DataObject", + "data_object_type": "Metagenome Bins" + }, + { + "_id": { + "$oid": "649b003f1ae706d7b5b16461" + }, + "id": "nmdc:35ee5307b549a8b3aa306ff92b327e8a", + "name": "gold:Gp0119868.bins.18.fa", + "description": "metabat2 binned contig file for gold:Gp0119868", + "file_size_bytes": 606551, + "url": "https://data.microbiomedata.org/data/1777_95837/img_MAGs/metabat-bins/bins.18.fa", + "type": "nmdc:DataObject", + "data_object_type": "Metagenome Bins" + }, + { + "_id": { + "$oid": "649b003f1ae706d7b5b16462" + }, + "id": "nmdc:3d9a3ff6301eb00d8136e7a54d1e3c05", + "name": "gold:Gp0119868.bins.12.fa", + "description": "hqmq binned contig file for gold:Gp0119868", + "file_size_bytes": 1728221, + "url": "https://data.microbiomedata.org/data/1777_95837/img_MAGs/hqmq-metabat-bins/bins.12.fa", + "type": "nmdc:DataObject", + "data_object_type": "Metagenome Bins" + }, + { + "_id": { + "$oid": "649b003f1ae706d7b5b16463" + }, + "id": "nmdc:8a4d2b08ebea91522a8c7476314485c8", + "name": "gold:Gp0119868.bins.9.fa", + "description": "metabat2 binned contig file for gold:Gp0119868", + "file_size_bytes": 239611, + "url": "https://data.microbiomedata.org/data/1777_95837/img_MAGs/metabat-bins/bins.9.fa", + "type": "nmdc:DataObject", + "data_object_type": "Metagenome Bins" + }, + { + "_id": { + "$oid": "649b003f1ae706d7b5b16464" + }, + "id": "nmdc:8da865651bab286f7001103a4f19a774", + "name": "gold:Gp0119868.bins.19.fa", + "description": "metabat2 binned contig file for gold:Gp0119868", + "file_size_bytes": 522090, + "url": "https://data.microbiomedata.org/data/1777_95837/img_MAGs/metabat-bins/bins.19.fa", + "type": "nmdc:DataObject", + "data_object_type": "Metagenome Bins" + }, + { + "_id": { + "$oid": "649b003f1ae706d7b5b16465" + }, + "id": "nmdc:df15576913acfa7534fccd25826fd268", + "name": "gold:Gp0119868.bins.3.fa", + "description": "metabat2 binned contig file for gold:Gp0119868", + "file_size_bytes": 218789, + "url": "https://data.microbiomedata.org/data/1777_95837/img_MAGs/metabat-bins/bins.3.fa", + "type": "nmdc:DataObject", + "data_object_type": "Metagenome Bins" + }, + { + "_id": { + "$oid": "649b003f1ae706d7b5b16466" + }, + "id": "nmdc:bc50625fb2fc738dd5413851fd6824b6", + "name": "gold:Gp0119868.bins.5.fa", + "description": "metabat2 binned contig file for gold:Gp0119868", + "file_size_bytes": 224555, + "url": "https://data.microbiomedata.org/data/1777_95837/img_MAGs/metabat-bins/bins.5.fa", + "type": "nmdc:DataObject", + "data_object_type": "Metagenome Bins" + }, + { + "_id": { + "$oid": "649b003f1ae706d7b5b16467" + }, + "id": "nmdc:1aeca6328784b46accd685576a8da7cf", + "name": "gold:Gp0119868.bins.7.fa", + "description": "metabat2 binned contig file for gold:Gp0119868", + "file_size_bytes": 3821686, + "url": "https://data.microbiomedata.org/data/1777_95837/img_MAGs/metabat-bins/bins.7.fa", + "type": "nmdc:DataObject", + "data_object_type": "Metagenome Bins" + }, + { + "_id": { + "$oid": "649b003f1ae706d7b5b16468" + }, + "id": "nmdc:257113c7d570c4f2b27efcff3cc37e45", + "name": "gold:Gp0119868.bins.11.fa", + "description": "metabat2 binned contig file for gold:Gp0119868", + "file_size_bytes": 1642412, + "url": "https://data.microbiomedata.org/data/1777_95837/img_MAGs/metabat-bins/bins.11.fa", + "type": "nmdc:DataObject", + "data_object_type": "Metagenome Bins" + }, + { + "_id": { + "$oid": "649b003f1ae706d7b5b16469" + }, + "id": "nmdc:c6226e5c4592d4687f7f31c8afb1d844", + "name": "gold:Gp0119868.bins.4.fa", + "description": "metabat2 binned contig file for gold:Gp0119868", + "file_size_bytes": 247043, + "url": "https://data.microbiomedata.org/data/1777_95837/img_MAGs/metabat-bins/bins.4.fa", + "type": "nmdc:DataObject", + "data_object_type": "Metagenome Bins" + }, + { + "_id": { + "$oid": "649b003f1ae706d7b5b1646a" + }, + "id": "nmdc:8773dc2585852b53d0da55e5d45c812d", + "name": "gold:Gp0119868.bins.12.fa", + "description": "metabat2 binned contig file for gold:Gp0119868", + "file_size_bytes": 1765442, + "url": "https://data.microbiomedata.org/data/1777_95837/img_MAGs/metabat-bins/bins.12.fa", + "type": "nmdc:DataObject", + "data_object_type": "Metagenome Bins" + }, + { + "_id": { + "$oid": "649b003f1ae706d7b5b1646b" + }, + "id": "nmdc:9a0691f1a3a732fd830afa1af459a889", + "name": "gold:Gp0119868.bins.10.fa", + "description": "metabat2 binned contig file for gold:Gp0119868", + "file_size_bytes": 414633, + "url": "https://data.microbiomedata.org/data/1777_95837/img_MAGs/metabat-bins/bins.10.fa", + "type": "nmdc:DataObject", + "data_object_type": "Metagenome Bins" + }, + { + "_id": { + "$oid": "649b003f1ae706d7b5b1646c" + }, + "id": "nmdc:06e697f51753f6f0845bba991c956983", + "name": "gold:Gp0119868.bins.8.fa", + "description": "metabat2 binned contig file for gold:Gp0119868", + "file_size_bytes": 357508, + "url": "https://data.microbiomedata.org/data/1777_95837/img_MAGs/metabat-bins/bins.8.fa", + "type": "nmdc:DataObject", + "data_object_type": "Metagenome Bins" + }, + { + "_id": { + "$oid": "649b003f1ae706d7b5b1646d" + }, + "id": "nmdc:c6adbca08bb35ba5c5a9df0a1da8112c", + "name": "gold:Gp0119868.bins.13.fa", + "description": "metabat2 binned contig file for gold:Gp0119868", + "file_size_bytes": 443607, + "url": "https://data.microbiomedata.org/data/1777_95837/img_MAGs/metabat-bins/bins.13.fa", + "type": "nmdc:DataObject", + "data_object_type": "Metagenome Bins" + }, + { + "_id": { + "$oid": "649b003f1ae706d7b5b1646e" + }, + "id": "nmdc:c52b44f7a2c63513d73185465df42abf", + "name": "gold:Gp0119868.bins.2.fa", + "description": "metabat2 binned contig file for gold:Gp0119868", + "file_size_bytes": 226810, + "url": "https://data.microbiomedata.org/data/1777_95837/img_MAGs/metabat-bins/bins.2.fa", + "type": "nmdc:DataObject", + "data_object_type": "Metagenome Bins" + }, + { + "_id": { + "$oid": "649b003f1ae706d7b5b1646f" + }, + "id": "nmdc:15713c75068ea297cc02730c966b4637", + "name": "gold:Gp0119868.bins.16.fa", + "description": "metabat2 binned contig file for gold:Gp0119868", + "file_size_bytes": 375100, + "url": "https://data.microbiomedata.org/data/1777_95837/img_MAGs/metabat-bins/bins.16.fa", + "type": "nmdc:DataObject", + "data_object_type": "Metagenome Bins" + }, + { + "_id": { + "$oid": "649b003f1ae706d7b5b16470" + }, + "id": "nmdc:0295837cde5ff290c9ebadbe2866be2d", + "name": "gold:Gp0119868.bins.14.fa", + "description": "metabat2 binned contig file for gold:Gp0119868", + "file_size_bytes": 223663, + "url": "https://data.microbiomedata.org/data/1777_95837/img_MAGs/metabat-bins/bins.14.fa", + "type": "nmdc:DataObject", + "data_object_type": "Metagenome Bins" + }, + { + "_id": { + "$oid": "649b003f1ae706d7b5b16471" + }, + "id": "nmdc:09cede22e2f354a3f943784fdfae7ea6", + "name": "gold:Gp0119868.bins.15.fa", + "description": "metabat2 binned contig file for gold:Gp0119868", + "file_size_bytes": 228672, + "url": "https://data.microbiomedata.org/data/1777_95837/img_MAGs/metabat-bins/bins.15.fa", + "type": "nmdc:DataObject", + "data_object_type": "Metagenome Bins" + }, + { + "_id": { + "$oid": "649b003f1ae706d7b5b16473" + }, + "id": "nmdc:8c215b3e6db610fc8c4b3e7db5ef333e", + "name": "gold:Gp0119868.bins.6.fa", + "description": "metabat2 binned contig file for gold:Gp0119868", + "file_size_bytes": 1012884, + "url": "https://data.microbiomedata.org/data/1777_95837/img_MAGs/metabat-bins/bins.6.fa", + "type": "nmdc:DataObject", + "data_object_type": "Metagenome Bins" + }, + { + "_id": { + "$oid": "649b003f1ae706d7b5b16476" + }, + "id": "nmdc:f847034a20df45684bc7fe93f96d90aa", + "name": "gold:Gp0119868.bins.17.fa", + "description": "metabat2 binned contig file for gold:Gp0119868", + "file_size_bytes": 2204984, + "url": "https://data.microbiomedata.org/data/1777_95837/img_MAGs/metabat-bins/bins.17.fa", + "type": "nmdc:DataObject", + "data_object_type": "Metagenome Bins" + }, + { + "_id": { + "$oid": "649b003f1ae706d7b5b16ca8" + }, + "description": "EC TSV File for gold:Gp0119868", + "url": "https://data.microbiomedata.org/1777_95837/img_annotation/Ga0482147_ec.tsv", + "md5_checksum": "0289e4fda11b48fb48c641c8e309b3ed", + "file_size_bytes": 3385, + "id": "nmdc:0289e4fda11b48fb48c641c8e309b3ed", + "name": "gold:Gp0119868_EC TSV File", + "data_object_type": "Annotation Enzyme Commission", + "type": "nmdc:DataObject" + }, + { + "_id": { + "$oid": "649b003f1ae706d7b5b16ca9" + }, + "description": "KO TSV File for gold:Gp0119868", + "url": "https://data.microbiomedata.org/1777_95837/img_annotation/Ga0482147_ko.tsv", + "md5_checksum": "df46e32e073a15273bb1f709b15abcd3", + "file_size_bytes": 3385, + "id": "nmdc:df46e32e073a15273bb1f709b15abcd3", + "name": "gold:Gp0119868_KO TSV File", + "data_object_type": "Annotation KEGG Orthology", + "type": "nmdc:DataObject" + }, + { + "_id": { + "$oid": "649b003f1ae706d7b5b16cac" + }, + "description": "Functional annotation GFF file for gold:Gp0119868", + "url": "https://data.microbiomedata.org/1777_95837/img_annotation/Ga0482147_functional_annotation.gff", + "md5_checksum": "ff55ba482b074f7f3d56c64da1b3c9c8", + "file_size_bytes": 3385, + "id": "nmdc:ff55ba482b074f7f3d56c64da1b3c9c8", + "name": "gold:Gp0119868_Functional annotation GFF file", + "data_object_type": "Functional Annotation GFF", + "type": "nmdc:DataObject" + }, + { + "_id": { + "$oid": "649b003f1ae706d7b5b16cae" + }, + "description": "Structural annotation GFF file for gold:Gp0119868", + "url": "https://data.microbiomedata.org/1777_95837/img_annotation/Ga0482147_structural_annotation.gff", + "md5_checksum": "804bc97cd8e16289290024ea3a7edbde", + "file_size_bytes": 3385, + "id": "nmdc:804bc97cd8e16289290024ea3a7edbde", + "name": "gold:Gp0119868_Structural annotation GFF file", + "data_object_type": "Structural Annotation GFF", + "type": "nmdc:DataObject" + }, + { + "_id": { + "$oid": "649b003f1ae706d7b5b16caf" + }, + "description": "Protein FAA for gold:Gp0119868", + "url": "https://data.microbiomedata.org/1777_95837/img_annotation/Ga0482147_proteins.faa", + "md5_checksum": "3c788da6e9004cd3ec276d5e44b19548", + "file_size_bytes": 3385, + "id": "nmdc:3c788da6e9004cd3ec276d5e44b19548", + "name": "gold:Gp0119868_Protein FAA", + "data_object_type": "Annotation Amino Acid FASTA", + "type": "nmdc:DataObject" + } + ], + "mags_activity_set": [ + { + "_id": { + "$oid": "649b0052ec087f6bbab34703" + }, + "has_input": [ + "nmdc:9a56bc21f20b6e1bc5edce3a88c469c9", + "nmdc:bda4bbf191832c401a301364ceaa99b9", + "nmdc:e30b169c1a0529b20903cf960b162d84" + ], + "too_short_contig_num": 1233331, + "part_of": [ + "nmdc:mga0tc4144" + ], + "binned_contig_num": 5316, + "git_url": "https://github.com/microbiomedata/metaG/releases/tag/0.1", + "has_output": [ + "nmdc:d41d8cd98f00b204e9800998ecf8427e", + "nmdc:498ce8a4b78ff4c3df69922bf1be3bad", + "nmdc:7d43fe4cf26508913cf3787719268e19", + "nmdc:f3094463068795ea860272240e7d16fa", + "nmdc:de95068612befe1bdf0e2dc4dee6f08c", + "nmdc:b81a8b6ba415f515a85a7ea04f797c4f" + ], + "was_informed_by": "gold:Gp0119868", + "input_contig_num": 1343632, + "id": "nmdc:9238b3016b2ebfeb268f6e32a396fead", + "execution_resource": "NERSC-Cori", + "name": "MAGs Analysis Activity for nmdc:mga0tc4144", + "mags_list": [ + { + "number_of_contig": 139, + "completeness": 0.0, + "bin_name": "bins.1", + "gene_count": 634, + "bin_quality": "LQ", + "gtdbtk_species": "", + "gtdbtk_order": "", + "num_16s": 0, + "gtdbtk_family": "", + "gtdbtk_domain": "", + "contamination": 0.0, + "gtdbtk_class": "", + "gtdbtk_phylum": "", + "num_5s": 0, + "num_23s": 0, + "gtdbtk_genus": "", + "num_t_rna": 2 + }, + { + "number_of_contig": 124, + "completeness": 14.55, + "bin_name": "bins.10", + "gene_count": 573, + "bin_quality": "LQ", + "gtdbtk_species": "", + "gtdbtk_order": "", + "num_16s": 0, + "gtdbtk_family": "", + "gtdbtk_domain": "", + "contamination": 0.0, + "gtdbtk_class": "", + "gtdbtk_phylum": "", + "num_5s": 0, + "num_23s": 0, + "gtdbtk_genus": "", + "num_t_rna": 10 + }, + { + "number_of_contig": 260, + "completeness": 43.47, + "bin_name": "bins.11", + "gene_count": 1387, + "bin_quality": "LQ", + "gtdbtk_species": "", + "gtdbtk_order": "", + "num_16s": 0, + "gtdbtk_family": "", + "gtdbtk_domain": "", + "contamination": 2.31, + "gtdbtk_class": "", + "gtdbtk_phylum": "", + "num_5s": 0, + "num_23s": 0, + "gtdbtk_genus": "", + "num_t_rna": 9 + }, + { + "number_of_contig": 457, + "completeness": 35.45, + "bin_name": "bins.12", + "gene_count": 2076, + "bin_quality": "LQ", + "gtdbtk_species": "", + "gtdbtk_order": "", + "num_16s": 0, + "gtdbtk_family": "", + "gtdbtk_domain": "", + "contamination": 8.18, + "gtdbtk_class": "", + "gtdbtk_phylum": "", + "num_5s": 1, + "num_23s": 0, + "gtdbtk_genus": "", + "num_t_rna": 13 + }, + { + "number_of_contig": 10, + "completeness": 0.0, + "bin_name": "bins.13", + "gene_count": 640, + "bin_quality": "LQ", + "gtdbtk_species": "", + "gtdbtk_order": "", + "num_16s": 0, + "gtdbtk_family": "", + "gtdbtk_domain": "", + "contamination": 0.0, + "gtdbtk_class": "", + "gtdbtk_phylum": "", + "num_5s": 0, + "num_23s": 0, + "gtdbtk_genus": "", + "num_t_rna": 27 + }, + { + "number_of_contig": 158, + "completeness": 36.56, + "bin_name": "bins.14", + "gene_count": 712, + "bin_quality": "LQ", + "gtdbtk_species": "", + "gtdbtk_order": "", + "num_16s": 0, + "gtdbtk_family": "", + "gtdbtk_domain": "", + "contamination": 1.68, + "gtdbtk_class": "", + "gtdbtk_phylum": "", + "num_5s": 0, + "num_23s": 0, + "gtdbtk_genus": "", + "num_t_rna": 9 + }, + { + "number_of_contig": 820, + "completeness": 79.46, + "bin_name": "bins.15", + "gene_count": 5037, + "bin_quality": "LQ", + "gtdbtk_species": "", + "gtdbtk_order": "", + "num_16s": 1, + "gtdbtk_family": "", + "gtdbtk_domain": "", + "contamination": 38.3, + "gtdbtk_class": "", + "gtdbtk_phylum": "", + "num_5s": 0, + "num_23s": 0, + "gtdbtk_genus": "", + "num_t_rna": 22 + }, + { + "number_of_contig": 27, + "completeness": 0.31, + "bin_name": "bins.16", + "gene_count": 310, + "bin_quality": "LQ", + "gtdbtk_species": "", + "gtdbtk_order": "", + "num_16s": 0, + "gtdbtk_family": "", + "gtdbtk_domain": "", + "contamination": 0.16, + "gtdbtk_class": "", + "gtdbtk_phylum": "", + "num_5s": 0, + "num_23s": 0, + "gtdbtk_genus": "", + "num_t_rna": 8 + }, + { + "number_of_contig": 591, + "completeness": 72.67, + "bin_name": "bins.17", + "gene_count": 3964, + "bin_quality": "MQ", + "gtdbtk_species": "", + "gtdbtk_order": "UBA9980", + "num_16s": 0, + "gtdbtk_family": "UBA9980", + "gtdbtk_domain": "Bacteria", + "contamination": 4.24, + "gtdbtk_class": "UBA9980", + "gtdbtk_phylum": "Wallbacteria", + "num_5s": 0, + "num_23s": 0, + "gtdbtk_genus": "", + "num_t_rna": 33 + }, + { + "number_of_contig": 490, + "completeness": 80.55, + "bin_name": "bins.18", + "gene_count": 3685, + "bin_quality": "MQ", + "gtdbtk_species": "", + "gtdbtk_order": "UBA8953", + "num_16s": 1, + "gtdbtk_family": "UBA8953", + "gtdbtk_domain": "Bacteria", + "contamination": 2.12, + "gtdbtk_class": "UBA8953", + "gtdbtk_phylum": "Riflebacteria", + "num_5s": 0, + "num_23s": 0, + "gtdbtk_genus": "", + "num_t_rna": 40 + }, + { + "number_of_contig": 238, + "completeness": 34.12, + "bin_name": "bins.19", + "gene_count": 1179, + "bin_quality": "LQ", + "gtdbtk_species": "", + "gtdbtk_order": "", + "num_16s": 0, + "gtdbtk_family": "", + "gtdbtk_domain": "", + "contamination": 1.52, + "gtdbtk_class": "", + "gtdbtk_phylum": "", + "num_5s": 0, + "num_23s": 0, + "gtdbtk_genus": "", + "num_t_rna": 5 + }, + { + "number_of_contig": 326, + "completeness": 54.23, + "bin_name": "bins.2", + "gene_count": 1584, + "bin_quality": "LQ", + "gtdbtk_species": "", + "gtdbtk_order": "", + "num_16s": 0, + "gtdbtk_family": "", + "gtdbtk_domain": "", + "contamination": 11.52, + "gtdbtk_class": "", + "gtdbtk_phylum": "", + "num_5s": 0, + "num_23s": 0, + "gtdbtk_genus": "", + "num_t_rna": 18 + }, + { + "number_of_contig": 147, + "completeness": 31.83, + "bin_name": "bins.20", + "gene_count": 678, + "bin_quality": "LQ", + "gtdbtk_species": "", + "gtdbtk_order": "", + "num_16s": 0, + "gtdbtk_family": "", + "gtdbtk_domain": "", + "contamination": 0.71, + "gtdbtk_class": "", + "gtdbtk_phylum": "", + "num_5s": 0, + "num_23s": 0, + "gtdbtk_genus": "", + "num_t_rna": 11 + }, + { + "number_of_contig": 392, + "completeness": 31.9, + "bin_name": "bins.21", + "gene_count": 1724, + "bin_quality": "LQ", + "gtdbtk_species": "", + "gtdbtk_order": "", + "num_16s": 0, + "gtdbtk_family": "", + "gtdbtk_domain": "", + "contamination": 1.72, + "gtdbtk_class": "", + "gtdbtk_phylum": "", + "num_5s": 0, + "num_23s": 0, + "gtdbtk_genus": "", + "num_t_rna": 12 + }, + { + "number_of_contig": 146, + "completeness": 21.99, + "bin_name": "bins.22", + "gene_count": 680, + "bin_quality": "LQ", + "gtdbtk_species": "", + "gtdbtk_order": "", + "num_16s": 0, + "gtdbtk_family": "", + "gtdbtk_domain": "", + "contamination": 0.85, + "gtdbtk_class": "", + "gtdbtk_phylum": "", + "num_5s": 0, + "num_23s": 0, + "gtdbtk_genus": "", + "num_t_rna": 4 + }, + { + "number_of_contig": 359, + "completeness": 58.43, + "bin_name": "bins.3", + "gene_count": 1968, + "bin_quality": "MQ", + "gtdbtk_species": "", + "gtdbtk_order": "BM002", + "num_16s": 0, + "gtdbtk_family": "BM002", + "gtdbtk_domain": "Bacteria", + "contamination": 0.68, + "gtdbtk_class": "Syntrophobacteria", + "gtdbtk_phylum": "Desulfobacterota", + "num_5s": 0, + "num_23s": 0, + "gtdbtk_genus": "", + "num_t_rna": 23 + }, + { + "number_of_contig": 59, + "completeness": 13.27, + "bin_name": "bins.4", + "gene_count": 239, + "bin_quality": "LQ", + "gtdbtk_species": "", + "gtdbtk_order": "", + "num_16s": 0, + "gtdbtk_family": "", + "gtdbtk_domain": "", + "contamination": 0.0, + "gtdbtk_class": "", + "gtdbtk_phylum": "", + "num_5s": 0, + "num_23s": 0, + "gtdbtk_genus": "", + "num_t_rna": 0 + }, + { + "number_of_contig": 50, + "completeness": 19.19, + "bin_name": "bins.5", + "gene_count": 283, + "bin_quality": "LQ", + "gtdbtk_species": "", + "gtdbtk_order": "", + "num_16s": 0, + "gtdbtk_family": "", + "gtdbtk_domain": "", + "contamination": 0.0, + "gtdbtk_class": "", + "gtdbtk_phylum": "", + "num_5s": 0, + "num_23s": 0, + "gtdbtk_genus": "", + "num_t_rna": 5 + }, + { + "number_of_contig": 90, + "completeness": 23.33, + "bin_name": "bins.6", + "gene_count": 434, + "bin_quality": "LQ", + "gtdbtk_species": "", + "gtdbtk_order": "", + "num_16s": 0, + "gtdbtk_family": "", + "gtdbtk_domain": "", + "contamination": 0.91, + "gtdbtk_class": "", + "gtdbtk_phylum": "", + "num_5s": 0, + "num_23s": 0, + "gtdbtk_genus": "", + "num_t_rna": 13 + }, + { + "number_of_contig": 94, + "completeness": 15.52, + "bin_name": "bins.7", + "gene_count": 465, + "bin_quality": "LQ", + "gtdbtk_species": "", + "gtdbtk_order": "", + "num_16s": 0, + "gtdbtk_family": "", + "gtdbtk_domain": "", + "contamination": 0.0, + "gtdbtk_class": "", + "gtdbtk_phylum": "", + "num_5s": 0, + "num_23s": 0, + "gtdbtk_genus": "", + "num_t_rna": 7 + }, + { + "number_of_contig": 48, + "completeness": 0.0, + "bin_name": "bins.8", + "gene_count": 260, + "bin_quality": "LQ", + "gtdbtk_species": "", + "gtdbtk_order": "", + "num_16s": 0, + "gtdbtk_family": "", + "gtdbtk_domain": "", + "contamination": 0.0, + "gtdbtk_class": "", + "gtdbtk_phylum": "", + "num_5s": 0, + "num_23s": 0, + "gtdbtk_genus": "", + "num_t_rna": 4 + }, + { + "number_of_contig": 291, + "completeness": 50.33, + "bin_name": "bins.9", + "gene_count": 1509, + "bin_quality": "MQ", + "gtdbtk_species": "", + "gtdbtk_order": "Bacteroidales", + "num_16s": 0, + "gtdbtk_family": "vadinHA17", + "gtdbtk_domain": "Bacteria", + "contamination": 3.59, + "gtdbtk_class": "Bacteroidia", + "gtdbtk_phylum": "Bacteroidota", + "num_5s": 0, + "num_23s": 0, + "gtdbtk_genus": "SR-FBR-E99", + "num_t_rna": 15 + } + ], + "unbinned_contig_num": 104985, + "started_at_time": "2021-10-11T02:23:28Z", + "type": "nmdc:MAGsAnalysisActivity", + "ended_at_time": "2021-10-11T09:39:23+00:00" + } + ], + "metagenome_annotation_activity_set": [ + { + "_id": { + "$oid": "649b005bbf2caae0415ef9a1" + }, + "has_input": [ + "nmdc:9a56bc21f20b6e1bc5edce3a88c469c9" + ], + "part_of": [ + "nmdc:mga0tc4144" + ], + "git_url": "https://github.com/microbiomedata/metaG/releases/tag/0.1", + "has_output": [ + "nmdc:7b83b2503caf6ef8448c50fc0a6c7199", + "nmdc:b6e6beea1a1e2bc5fd3defefb4329096", + "nmdc:e30b169c1a0529b20903cf960b162d84", + "nmdc:16ffd084e72cab2403866158e5e6a6cb", + "nmdc:dbc18fc26ae72645371911c8519549fa", + "nmdc:3d67230b15a3119104d0a96c47e3a51e", + "nmdc:69e4ba430ae17ea5e82d2ec1a8595ffd", + "nmdc:cb48f118d9c407c955482ec145d1d705", + "nmdc:8c880ce9cade09ededdca6d52db621d1", + "nmdc:74b62c9e6ab9bd7d843b64b436361743", + "nmdc:0dd92986c02650f665649b79cd95c3ad", + "nmdc:cc9ac778259f4a97efb179d199561126" + ], + "was_informed_by": "gold:Gp0119868", + "id": "nmdc:9238b3016b2ebfeb268f6e32a396fead", + "execution_resource": "NERSC-Cori", + "name": "Annotation Activity for nmdc:mga0tc4144", + "started_at_time": "2021-10-11T02:23:28Z", + "type": "nmdc:MetagenomeAnnotationActivity", + "ended_at_time": "2021-10-11T09:39:23+00:00" + } + ], + "metagenome_assembly_set": [ + { + "_id": { + "$oid": "649b005f2ca5ee4adb139f8f" + }, + "has_input": [ + "nmdc:f593f3aee7fc5760af7c039bdf62ba57" + ], + "part_of": [ + "nmdc:mga0tc4144" + ], + "ctg_logsum": 2099698, + "scaf_logsum": 2106188, + "gap_pct": 0.00141, + "git_url": "https://github.com/microbiomedata/metaG/releases/tag/0.1", + "has_output": [ + "nmdc:9a56bc21f20b6e1bc5edce3a88c469c9", + "nmdc:9b82712bfcf3013e35482c9e68b9095b", + "nmdc:76f1b90c58a3672147718346c40e81c9", + "nmdc:0fe116e4fc9c24bf39029cafe4d2256d", + "nmdc:bda4bbf191832c401a301364ceaa99b9" + ], + "asm_score": 5.69, + "was_informed_by": "gold:Gp0119868", + "ctg_powsum": 232346, + "scaf_max": 125304, + "id": "nmdc:9238b3016b2ebfeb268f6e32a396fead", + "scaf_powsum": 233179, + "execution_resource": "NERSC-Cori", + "contigs": 1343639, + "name": "Assembly Activity for nmdc:mga0tc4144", + "ctg_max": 125304, + "gc_std": 0.10463, + "contig_bp": 719936137, + "gc_avg": 0.56302, + "started_at_time": "2021-10-11T02:23:28Z", + "scaf_bp": 719946267, + "type": "nmdc:MetagenomeAssembly", + "scaffolds": 1342635, + "ended_at_time": "2021-10-11T09:39:23+00:00", + "ctg_l50": 550, + "ctg_l90": 300, + "ctg_n50": 346594, + "ctg_n90": 1096925, + "scaf_l50": 550, + "scaf_l90": 300, + "scaf_n50": 346521, + "scaf_n90": 1096070, + "scaf_l_gt50k": 1277940, + "scaf_n_gt50k": 18, + "scaf_pct_gt50k": 0.17750493 + } + ], + "omics_processing_set": [ + { + "_id": { + "$oid": "649b009773e8249959349ba3" + }, + "id": "nmdc:omprc-11-fy71mq09", + "name": "Enriched soil microbial communities from Old Woman Creek wetland in Ohio, USA - Methanogen_OWC", + "description": "Microbial controls on biogeochemical cycling in deep subsurface shale carbon reservoirs", + "has_input": [ + "nmdc:bsm-11-754aq706" + ], + "has_output": [ + "jgi:560df3660d878540fd6fe1b9" + ], + "part_of": [ + "nmdc:sty-11-8fb6t785" + ], + "add_date": "2015-08-15", + "mod_date": "2021-06-15", + "ncbi_project_name": "Deep subsurface shale carbon reservoir microbial communities from Ohio, USA - Methanogen_OWC", + "omics_type": { + "has_raw_value": "Metagenome" + }, + "principal_investigator": { + "has_raw_value": "Kelly Wrighton" + }, + "processing_institution": "JGI", + "type": "nmdc:OmicsProcessing", + "gold_sequencing_project_identifiers": [ + "gold:Gp0119868" + ] + } + ], + "read_qc_analysis_activity_set": [ + { + "_id": { + "$oid": "649b009d6bdd4fd20273c85e" + }, + "has_input": [ + "nmdc:fbcc8eac251f36bedf3497f34a05cd4a" + ], + "part_of": [ + "nmdc:mga0tc4144" + ], + "git_url": "https://github.com/microbiomedata/metaG/releases/tag/0.1", + "has_output": [ + "nmdc:f593f3aee7fc5760af7c039bdf62ba57", + "nmdc:8aec820c1193dad3bb21ba4084d43a20" + ], + "was_informed_by": "gold:Gp0119868", + "input_read_count": 138421166, + "output_read_bases": 20393388409, + "id": "nmdc:9238b3016b2ebfeb268f6e32a396fead", + "execution_resource": "NERSC-Cori", + "input_read_bases": 20901596066, + "name": "Read QC Activity for nmdc:mga0tc4144", + "output_read_count": 136125146, + "started_at_time": "2021-10-11T02:23:28Z", + "type": "nmdc:ReadQCAnalysisActivity", + "ended_at_time": "2021-10-11T09:39:23+00:00" + } + ], + "read_based_taxonomy_analysis_activity_set": [ + { + "_id": { + "$oid": "649b009bff710ae353f8cf20" + }, + "has_input": [ + "nmdc:f593f3aee7fc5760af7c039bdf62ba57" + ], + "git_url": "https://github.com/microbiomedata/metaG/releases/tag/0.1", + "has_output": [ + "nmdc:fe347ec8d4621fca2e07d8fe6cfcea55", + "nmdc:73e3da946aae23b311d50688663ef847", + "nmdc:abf78291d78d1c78eafe96d6cfb062aa", + "nmdc:089f1a254b33552d2c8465e7e8de1186", + "nmdc:c01688a45c35e13a9ebeea38e04fb8ef", + "nmdc:d42c6a1ade379bfacea8a2abc762b45a", + "nmdc:606adc1b169bba287f1d231635fe756f", + "nmdc:9efccbd1e5bbb4d10ce94c2e8d2b33aa", + "nmdc:0c7920c75acfbd3d3b2d654e3b2cd9ec" + ], + "was_informed_by": "gold:Gp0119868", + "id": "nmdc:9238b3016b2ebfeb268f6e32a396fead", + "execution_resource": "NERSC-Cori", + "name": "ReadBased Analysis Activity for nmdc:mga0tc4144", + "started_at_time": "2021-10-11T02:23:28Z", + "type": "nmdc:ReadbasedAnalysis", + "ended_at_time": "2021-10-11T09:39:23+00:00" + } + ] + } +] \ No newline at end of file From 7fc1ef7da389d95c0e58c48c4fedc3ef6b053701 Mon Sep 17 00:00:00 2001 From: Michael Thornton Date: Thu, 16 Nov 2023 10:39:15 -0800 Subject: [PATCH 68/91] delete dry-run output data files --- .../nmdc_wfmgas-11-3epd0s35_scaffolds.fna | 2 -- 1 file changed, 2 deletions(-) delete mode 100644 nmdc_automation/re_iding/scripts/data/dryrun_data/results/nmdc:omprc-11-bn8jcq58/nmdc:wfmgas-11-3epd0s35/nmdc_wfmgas-11-3epd0s35_scaffolds.fna diff --git a/nmdc_automation/re_iding/scripts/data/dryrun_data/results/nmdc:omprc-11-bn8jcq58/nmdc:wfmgas-11-3epd0s35/nmdc_wfmgas-11-3epd0s35_scaffolds.fna b/nmdc_automation/re_iding/scripts/data/dryrun_data/results/nmdc:omprc-11-bn8jcq58/nmdc:wfmgas-11-3epd0s35/nmdc_wfmgas-11-3epd0s35_scaffolds.fna deleted file mode 100644 index 93ef5e29..00000000 --- a/nmdc_automation/re_iding/scripts/data/dryrun_data/results/nmdc:omprc-11-bn8jcq58/nmdc:wfmgas-11-3epd0s35/nmdc_wfmgas-11-3epd0s35_scaffolds.fna +++ /dev/null @@ -1,2 +0,0 @@ -nmdc:wfmgas-11-3epd0s35>nmdc:wfmgas-11-3epd0s35snmdc:wfmgas-11-3epd0s35enmdc:wfmgas-11-3epd0s35qnmdc:wfmgas-11-3epd0s35unmdc:wfmgas-11-3epd0s35enmdc:wfmgas-11-3epd0s35nnmdc:wfmgas-11-3epd0s35cnmdc:wfmgas-11-3epd0s35enmdc:wfmgas-11-3epd0s35Inmdc:wfmgas-11-3epd0s35Dnmdc:wfmgas-11-3epd0s35-nmdc:wfmgas-11-3epd0s350nmdc:wfmgas-11-3epd0s350nmdc:wfmgas-11-3epd0s351nmdc:wfmgas-11-3epd0s35 nmdc:wfmgas-11-3epd0s35dnmdc:wfmgas-11-3epd0s35enmdc:wfmgas-11-3epd0s35snmdc:wfmgas-11-3epd0s35cnmdc:wfmgas-11-3epd0s35rnmdc:wfmgas-11-3epd0s35inmdc:wfmgas-11-3epd0s35pnmdc:wfmgas-11-3epd0s35tnmdc:wfmgas-11-3epd0s35inmdc:wfmgas-11-3epd0s35onmdc:wfmgas-11-3epd0s35nnmdc:wfmgas-11-3epd0s35 -nmdc:wfmgas-11-3epd0s35ACGT From 0426dd2b99e8800e13776bab9f7873891ad5b3a4 Mon Sep 17 00:00:00 2001 From: Michal Babinski Date: Thu, 16 Nov 2023 14:19:11 -0800 Subject: [PATCH 69/91] added iteration to activity ids --- nmdc_automation/re_iding/base.py | 10 +++++----- 1 file changed, 5 insertions(+), 5 deletions(-) diff --git a/nmdc_automation/re_iding/base.py b/nmdc_automation/re_iding/base.py index 5bf4aa58..b4876ab5 100644 --- a/nmdc_automation/re_iding/base.py +++ b/nmdc_automation/re_iding/base.py @@ -41,9 +41,10 @@ class ReIdTool: def __init__(self, api_client: NmdcRuntimeApi, data_dir: str, - template_file: str = None): + template_file: str = None, iteration: str = "1"): self.api_client = api_client self.data_dir = data_dir + self.workflow_iteration = iteration if template_file is None: template_file = NAPA_TEMPLATE with open(template_file, "r") as f: @@ -148,7 +149,7 @@ def update_reads_qc_analysis_activity_set(self, db_record: Dict, omics_processing_id = new_omics_processing.id has_input = new_omics_processing.has_output - new_activity_id = self.api_client.minter(activity_type) + new_activity_id = self.api_client.minter(activity_type) + "." + self.workflow_iteration logging.info(f"New activity id created for {omics_processing_id} activity type {activity_type}: {new_activity_id}") new_readsqc_base_dir = os.path.join(self.data_dir, omics_processing_id, @@ -206,7 +207,7 @@ def update_metagenome_assembly_set(self, db_record: Dict, updated_has_output = [] - new_activity_id = self.api_client.minter(activity_type) + new_activity_id = self.api_client.minter(activity_type) + "." + self.workflow_iteration logging.info(f"New activity id created for {omics_processing_id} activity type {activity_type}: {new_activity_id}") new_assembly_base_dir = os.path.join(self.data_dir, omics_processing_id, @@ -268,7 +269,7 @@ def update_read_based_taxonomy_analysis_activity_set(self, db_record: Dict, omics_processing_id = new_omics_processing.id has_input = [self._get_input_do_id(new_db, "Filtered Sequencing Reads")] - new_activity_id = self.api_client.minter(activity_type) + new_activity_id = self.api_client.minter(activity_type) + "." + self.workflow_iteration logging.info(f"New activity id created for {omics_processing_id} activity type {activity_type}: {new_activity_id}") new_readbased_base_dir = os.path.join(self.data_dir, omics_processing_id, @@ -316,7 +317,6 @@ def _get_input_do_id(self, new_db, data_object_type: str): """Returns the string representation of a data object id given data object type""" for rec in new_db.data_object_set: - print(type(rec.data_object_type)) if str(rec.data_object_type) == data_object_type: return str(rec.id) From 6b06e681c0f45ccb6959bd2e66238e7ac3137c1e Mon Sep 17 00:00:00 2001 From: Michal Babinski Date: Fri, 17 Nov 2023 10:26:46 -0800 Subject: [PATCH 70/91] fix id iteration and name slot --- nmdc_automation/re_iding/base.py | 8 +++++--- 1 file changed, 5 insertions(+), 3 deletions(-) diff --git a/nmdc_automation/re_iding/base.py b/nmdc_automation/re_iding/base.py index b4876ab5..ea9493f0 100644 --- a/nmdc_automation/re_iding/base.py +++ b/nmdc_automation/re_iding/base.py @@ -117,6 +117,7 @@ def update_omics_processing_has_output(self, db_record: Dict, # make new data objects with updated IDs for old_do_id in omics_record["has_output"]: old_do_rec = get_data_object_record_by_id(db_record, old_do_id) + old_do_rec["data_object_type"] = "Metagenome Raw Reads" old_do_id = old_do_rec.get("id") params = copy.deepcopy(old_do_rec) params.pop("id", None) @@ -287,6 +288,7 @@ def update_read_based_taxonomy_analysis_activity_set(self, db_record: Dict, old_do_rec["url"], new_readbased_base_dir, new_activity_id, self.data_dir ) logging.info(f"New file path computed for {data_object_type}: {new_file_path}") + new_do = self.make_new_data_object( omics_processing_id, activity_type, new_activity_id, old_do_rec, data_object_type ) @@ -343,7 +345,7 @@ def _make_new_activity_set_object(self, omics_processing_id: str, new_activity_i name=template["Activity"]["name"].replace("{id}", omics_processing_id), git_url=template["Git_repo"], version=template["Version"], part_of=[omics_processing_id], - execution_resource="NERSC - Perlmutter", + execution_resource="NERSC-Cori", started_at_time=activity_set_rec["started_at_time"], has_input=has_input, has_output=has_output, @@ -377,9 +379,9 @@ def make_new_data_object(self, omics_processing_id: str, data_object = NmdcDataObject( id=new_data_object_id, - name=template["name"].replace("{id}", omics_processing_id), + name=new_filename, description=new_description, - type="nmdc:Data_Object", + type="nmdc:DataObject", file_size_bytes=data_object_record["file_size_bytes"], md5_checksum=data_object_record["md5_checksum"], url=new_url, From 03bff679e63913cc30ccb8fc23329ee505cafa56 Mon Sep 17 00:00:00 2001 From: Michal Babinski Date: Fri, 17 Nov 2023 10:33:00 -0800 Subject: [PATCH 71/91] change Dry run outdir for test with real files --- nmdc_automation/re_iding/scripts/re_id_tool.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/nmdc_automation/re_iding/scripts/re_id_tool.py b/nmdc_automation/re_iding/scripts/re_id_tool.py index e784244a..9950c94e 100755 --- a/nmdc_automation/re_iding/scripts/re_id_tool.py +++ b/nmdc_automation/re_iding/scripts/re_id_tool.py @@ -25,7 +25,7 @@ BASE_DATAFILE_DIR = "/global/cfs/cdirs/m3408/results" -DRYRUN_DATAFILE_DIR = "./data/dryrun_data/results" +DRYRUN_DATAFILE_DIR = "/global/cfs/cdirs/m3408/results" DATA_DIR = Path(__file__).parent.absolute().joinpath("data") From 6d9945e92bf61ab972acb6bc42e62f887a854501 Mon Sep 17 00:00:00 2001 From: Michal Babinski Date: Fri, 17 Nov 2023 10:33:28 -0800 Subject: [PATCH 72/91] dry run reflecting files on nersc --- .../data/dryrun_re_ided_record_dump.json | 199 +++++++++--------- 1 file changed, 100 insertions(+), 99 deletions(-) diff --git a/nmdc_automation/re_iding/scripts/data/dryrun_re_ided_record_dump.json b/nmdc_automation/re_iding/scripts/data/dryrun_re_ided_record_dump.json index 4972881a..ee84252d 100644 --- a/nmdc_automation/re_iding/scripts/data/dryrun_re_ided_record_dump.json +++ b/nmdc_automation/re_iding/scripts/data/dryrun_re_ided_record_dump.json @@ -2,191 +2,192 @@ { "data_object_set": [ { - "id": "nmdc:dobj-11-y134xn31", + "id": "nmdc:dobj-11-k7vny888", "name": "9422.8.132674.GTTTCG.fastq.gz", "description": "Raw sequencer read data", "file_size_bytes": 2861414297, + "data_object_type": "Metagenome Raw Reads", "type": "nmdc:DataObject" }, { - "id": "nmdc:dobj-11-dz3wvm61", - "name": "Reads QC result fastq (clean data)", + "id": "nmdc:dobj-11-019yes10", + "name": "nmdc_wfrqc-11-zma0ys31.1_filtered.fastq.gz", "description": "Filtered Reads for nmdc:omprc-11-bn8jcq58", "file_size_bytes": 2571324879, "md5_checksum": "7bf778baef033d36f118f8591256d6ef", "data_object_type": "Filtered Sequencing Reads", - "url": "https://data.microbiomedata.org/data/nmdc:omprc-11-bn8jcq58/nmdc:wfrqc-11-8nssbh35/nmdc_wfrqc-11-8nssbh35_filtered.fastq.gz", - "type": "nmdc:Data_Object" + "url": "https://data.microbiomedata.org/data/nmdc:omprc-11-bn8jcq58/nmdc:wfrqc-11-zma0ys31.1/nmdc_wfrqc-11-zma0ys31.1_filtered.fastq.gz", + "type": "nmdc:DataObject" }, { - "id": "nmdc:dobj-11-1pp19646", - "name": "Reads QC summary statistics", + "id": "nmdc:dobj-11-hty12n62", + "name": "nmdc_wfrqc-11-zma0ys31.1_filterStats.txt", "description": "Filtered Stats for nmdc:omprc-11-bn8jcq58", "file_size_bytes": 290, "md5_checksum": "b99ce8adc125c95f0bfdadf36a3f6848", "data_object_type": "QC Statistics", - "url": "https://data.microbiomedata.org/data/nmdc:omprc-11-bn8jcq58/nmdc:wfrqc-11-8nssbh35/nmdc_wfrqc-11-8nssbh35_filterStats.txt", - "type": "nmdc:Data_Object" + "url": "https://data.microbiomedata.org/data/nmdc:omprc-11-bn8jcq58/nmdc:wfrqc-11-zma0ys31.1/nmdc_wfrqc-11-zma0ys31.1_filterStats.txt", + "type": "nmdc:DataObject" }, { - "id": "nmdc:dobj-11-vfk56697", - "name": "Final assembly contigs fasta", + "id": "nmdc:dobj-11-gast3j11", + "name": "nmdc_wfmgas-11-3jvymb63.1_contigs.fna", "description": "Assembled contigs fasta for nmdc:omprc-11-bn8jcq58", - "file_size_bytes": 336, - "md5_checksum": "d4c4941391714c4d337bdf3444b2f108", + "file_size_bytes": 91134523, + "md5_checksum": "b96c8e7796616a8eefe473bff2c62e52", "data_object_type": "Assembly Contigs", - "url": "https://data.microbiomedata.org/data/nmdc:omprc-11-bn8jcq58/nmdc:wfmgas-11-ryns7t13/nmdc_wfmgas-11-ryns7t13_contigs.fna", - "type": "nmdc:Data_Object" + "url": "https://data.microbiomedata.org/data/nmdc:omprc-11-bn8jcq58/nmdc:wfmgas-11-3jvymb63.1/nmdc_wfmgas-11-3jvymb63.1_contigs.fna", + "type": "nmdc:DataObject" }, { - "id": "nmdc:dobj-11-pdqkv709", - "name": "Final assembly scaffolds fasta", + "id": "nmdc:dobj-11-bkza5366", + "name": "nmdc_wfmgas-11-3jvymb63.1_scaffolds.fna", "description": "Assembled scaffold fasta for nmdc:omprc-11-bn8jcq58", - "file_size_bytes": 700, - "md5_checksum": "a57129f6b1ac8b75a42f4ab610b4c20a", + "file_size_bytes": 90622585, + "md5_checksum": "6ca496a8b9b298278ad2b4010a7c8cb2", "data_object_type": "Assembly Scaffolds", - "url": "https://data.microbiomedata.org/data/nmdc:omprc-11-bn8jcq58/nmdc:wfmgas-11-ryns7t13/nmdc_wfmgas-11-ryns7t13_scaffolds.fna", - "type": "nmdc:Data_Object" + "url": "https://data.microbiomedata.org/data/nmdc:omprc-11-bn8jcq58/nmdc:wfmgas-11-3jvymb63.1/nmdc_wfmgas-11-3jvymb63.1_scaffolds.fna", + "type": "nmdc:DataObject" }, { - "id": "nmdc:dobj-11-ry36ec34", - "name": "Assembled contigs coverage information", + "id": "nmdc:dobj-11-v9xfxp70", + "name": "nmdc_wfmgas-11-3jvymb63.1_covstats.txt", "description": "Metagenome Contig Coverage Stats for nmdc:omprc-11-bn8jcq58", - "file_size_bytes": 0, - "md5_checksum": "d41d8cd98f00b204e9800998ecf8427e", + "file_size_bytes": 14431055, + "md5_checksum": "19782102f68575b03b7c12dd3d48e840", "data_object_type": "Assembly Coverage Stats", - "url": "https://data.microbiomedata.org/data/nmdc:omprc-11-bn8jcq58/nmdc:wfmgas-11-ryns7t13/nmdc_wfmgas-11-ryns7t13_covstats.txt", - "type": "nmdc:Data_Object" + "url": "https://data.microbiomedata.org/data/nmdc:omprc-11-bn8jcq58/nmdc:wfmgas-11-3jvymb63.1/nmdc_wfmgas-11-3jvymb63.1_covstats.txt", + "type": "nmdc:DataObject" }, { - "id": "nmdc:dobj-11-v2dcgb32", - "name": "An AGP format file that describes the assembly", + "id": "nmdc:dobj-11-dz2mw103", + "name": "nmdc_wfmgas-11-3jvymb63.1_assembly.agp", "description": "Assembled AGP file for nmdc:omprc-11-bn8jcq58", - "file_size_bytes": 0, - "md5_checksum": "d41d8cd98f00b204e9800998ecf8427e", + "file_size_bytes": 14581247, + "md5_checksum": "419b294106e3fca4a06d18fd3c8e9181", "data_object_type": "Assembly AGP", - "url": "https://data.microbiomedata.org/data/nmdc:omprc-11-bn8jcq58/nmdc:wfmgas-11-ryns7t13/nmdc_wfmgas-11-ryns7t13_assembly.agp", - "type": "nmdc:Data_Object" + "url": "https://data.microbiomedata.org/data/nmdc:omprc-11-bn8jcq58/nmdc:wfmgas-11-3jvymb63.1/nmdc_wfmgas-11-3jvymb63.1_assembly.agp", + "type": "nmdc:DataObject" }, { - "id": "nmdc:dobj-11-7f8z6a09", - "name": "Sorted bam file of reads mapping back to the final assembly", + "id": "nmdc:dobj-11-75skzn36", + "name": "nmdc_wfmgas-11-3jvymb63.1_pairedMapped_sorted.bam", "description": "Metagenome Alignment BAM file for nmdc:omprc-11-bn8jcq58", "file_size_bytes": 0, "md5_checksum": "d41d8cd98f00b204e9800998ecf8427e", "data_object_type": "Assembly Coverage BAM", - "url": "https://data.microbiomedata.org/data/nmdc:omprc-11-bn8jcq58/nmdc:wfmgas-11-ryns7t13/nmdc_wfmgas-11-ryns7t13_pairedMapped_sorted.bam", - "type": "nmdc:Data_Object" + "url": "https://data.microbiomedata.org/data/nmdc:omprc-11-bn8jcq58/nmdc:wfmgas-11-3jvymb63.1/nmdc_wfmgas-11-3jvymb63.1_pairedMapped_sorted.bam", + "type": "nmdc:DataObject" }, { - "id": "nmdc:dobj-11-7q03xt64", - "name": "GOTTCHA2 classification report file", + "id": "nmdc:dobj-11-ppa5pg23", + "name": "nmdc_wfrbt-11-e79d5x03.1_gottcha2_report.tsv", "description": "Gottcha2 TSV report for nmdc:omprc-11-bn8jcq58", "file_size_bytes": 13174, "md5_checksum": "bc7c1bda004aab357c8f6cf5a42242f9", "data_object_type": "GOTTCHA2 Classification Report", - "url": "https://data.microbiomedata.org/data/nmdc:omprc-11-bn8jcq58/nmdc:wfrbt-11-mxr19q91/nmdc_wfrbt-11-mxr19q91_gottcha2_report.tsv", - "type": "nmdc:Data_Object" + "url": "https://data.microbiomedata.org/data/nmdc:omprc-11-bn8jcq58/nmdc:wfrbt-11-e79d5x03.1/nmdc_wfrbt-11-e79d5x03.1_gottcha2_report.tsv", + "type": "nmdc:DataObject" }, { - "id": "nmdc:dobj-11-qd17za87", - "name": "GOTTCHA2 report file", + "id": "nmdc:dobj-11-0yn4b055", + "name": "nmdc_wfrbt-11-e79d5x03.1_gottcha2_report_full.tsv", "description": "Gottcha2 full TSV report for nmdc:omprc-11-bn8jcq58", "file_size_bytes": 1035818, "md5_checksum": "9481434cadd0d6c154e2ec4c11ef0e04", "data_object_type": "GOTTCHA2 Report Full", - "url": "https://data.microbiomedata.org/data/nmdc:omprc-11-bn8jcq58/nmdc:wfrbt-11-mxr19q91/nmdc_wfrbt-11-mxr19q91_gottcha2_report_full.tsv", - "type": "nmdc:Data_Object" + "url": "https://data.microbiomedata.org/data/nmdc:omprc-11-bn8jcq58/nmdc:wfrbt-11-e79d5x03.1/nmdc_wfrbt-11-e79d5x03.1_gottcha2_report_full.tsv", + "type": "nmdc:DataObject" }, { - "id": "nmdc:dobj-11-f0pzg444", - "name": "GOTTCHA2 krona plot HTML file", + "id": "nmdc:dobj-11-ty0z3p61", + "name": "nmdc_wfrbt-11-e79d5x03.1_gottcha2_krona.html", "description": "Gottcha2 Krona HTML report for nmdc:omprc-11-bn8jcq58", "file_size_bytes": 262669, "md5_checksum": "6b5bc6ce7f11c1336a5f85a98fc18541", "data_object_type": "GOTTCHA2 Krona Plot", - "url": "https://data.microbiomedata.org/data/nmdc:omprc-11-bn8jcq58/nmdc:wfrbt-11-mxr19q91/nmdc_wfrbt-11-mxr19q91_gottcha2_krona.html", - "type": "nmdc:Data_Object" + "url": "https://data.microbiomedata.org/data/nmdc:omprc-11-bn8jcq58/nmdc:wfrbt-11-e79d5x03.1/nmdc_wfrbt-11-e79d5x03.1_gottcha2_krona.html", + "type": "nmdc:DataObject" }, { - "id": "nmdc:dobj-11-kmysd345", - "name": "Centrifuge output read classification file", + "id": "nmdc:dobj-11-e6h68y35", + "name": "nmdc_wfrbt-11-e79d5x03.1_centrifuge_classification.tsv", "description": "Centrifuge classification TSV report for nmdc:omprc-11-bn8jcq58", "file_size_bytes": 2189843623, "md5_checksum": "933c71bbc2f4a2e84d50f0d3864cf940", "data_object_type": "Centrifuge Taxonomic Classification", - "url": "https://data.microbiomedata.org/data/nmdc:omprc-11-bn8jcq58/nmdc:wfrbt-11-mxr19q91/nmdc_wfrbt-11-mxr19q91_centrifuge_classification.tsv", - "type": "nmdc:Data_Object" + "url": "https://data.microbiomedata.org/data/nmdc:omprc-11-bn8jcq58/nmdc:wfrbt-11-e79d5x03.1/nmdc_wfrbt-11-e79d5x03.1_centrifuge_classification.tsv", + "type": "nmdc:DataObject" }, { - "id": "nmdc:dobj-11-mgdfaj98", - "name": "Centrifuge output report file", + "id": "nmdc:dobj-11-chgp8k25", + "name": "nmdc_wfrbt-11-e79d5x03.1_centrifuge_report.tsv", "description": "Centrifuge TSV report for nmdc:omprc-11-bn8jcq58", "file_size_bytes": 260134, "md5_checksum": "1a208e2519770ef50740ac39f1b9ba9a", "data_object_type": "Centrifuge Classification Report", - "url": "https://data.microbiomedata.org/data/nmdc:omprc-11-bn8jcq58/nmdc:wfrbt-11-mxr19q91/nmdc_wfrbt-11-mxr19q91_centrifuge_report.tsv", - "type": "nmdc:Data_Object" + "url": "https://data.microbiomedata.org/data/nmdc:omprc-11-bn8jcq58/nmdc:wfrbt-11-e79d5x03.1/nmdc_wfrbt-11-e79d5x03.1_centrifuge_report.tsv", + "type": "nmdc:DataObject" }, { - "id": "nmdc:dobj-11-g38md352", - "name": "Centrifug krona plot HTML file", + "id": "nmdc:dobj-11-0wbjqw24", + "name": "nmdc_wfrbt-11-e79d5x03.1_centrifuge_krona.html", "description": "Centrifuge Krona HTML report for nmdc:omprc-11-bn8jcq58", "file_size_bytes": 2343980, "md5_checksum": "f112a3840464ae7a9cf4a3bf295edd5c", "data_object_type": "Centrifuge Krona Plot", - "url": "https://data.microbiomedata.org/data/nmdc:omprc-11-bn8jcq58/nmdc:wfrbt-11-mxr19q91/nmdc_wfrbt-11-mxr19q91_centrifuge_krona.html", - "type": "nmdc:Data_Object" + "url": "https://data.microbiomedata.org/data/nmdc:omprc-11-bn8jcq58/nmdc:wfrbt-11-e79d5x03.1/nmdc_wfrbt-11-e79d5x03.1_centrifuge_krona.html", + "type": "nmdc:DataObject" }, { - "id": "nmdc:dobj-11-d67rkh18", - "name": "Kraken2 output read classification file", + "id": "nmdc:dobj-11-xteq6n75", + "name": "nmdc_wfrbt-11-e79d5x03.1_kraken2_classification.tsv", "description": "Kraken classification TSV report for nmdc:omprc-11-bn8jcq58", "file_size_bytes": 1785563917, "md5_checksum": "7ca01ea379f0baed96f87d1435925f95", "data_object_type": "Kraken2 Taxonomic Classification", - "url": "https://data.microbiomedata.org/data/nmdc:omprc-11-bn8jcq58/nmdc:wfrbt-11-mxr19q91/nmdc_wfrbt-11-mxr19q91_kraken2_classification.tsv", - "type": "nmdc:Data_Object" + "url": "https://data.microbiomedata.org/data/nmdc:omprc-11-bn8jcq58/nmdc:wfrbt-11-e79d5x03.1/nmdc_wfrbt-11-e79d5x03.1_kraken2_classification.tsv", + "type": "nmdc:DataObject" }, { - "id": "nmdc:dobj-11-ptchwj55", - "name": "Kraken2 output report file", + "id": "nmdc:dobj-11-1n5y1278", + "name": "nmdc_wfrbt-11-e79d5x03.1_kraken2_report.tsv", "description": "Kraken2 TSV report for nmdc:omprc-11-bn8jcq58", "file_size_bytes": 699896, "md5_checksum": "c85f2f2b4a518c4adb23970448a5cb45", "data_object_type": "Kraken2 Classification Report", - "url": "https://data.microbiomedata.org/data/nmdc:omprc-11-bn8jcq58/nmdc:wfrbt-11-mxr19q91/nmdc_wfrbt-11-mxr19q91_kraken2_report.tsv", - "type": "nmdc:Data_Object" + "url": "https://data.microbiomedata.org/data/nmdc:omprc-11-bn8jcq58/nmdc:wfrbt-11-e79d5x03.1/nmdc_wfrbt-11-e79d5x03.1_kraken2_report.tsv", + "type": "nmdc:DataObject" }, { - "id": "nmdc:dobj-11-d0qt9513", - "name": "Kraken2 Krona plot HTML file", + "id": "nmdc:dobj-11-rtjb8n73", + "name": "nmdc_wfrbt-11-e79d5x03.1_kraken2_krona.html", "description": "Kraken2 Krona HTML report for nmdc:omprc-11-bn8jcq58", "file_size_bytes": 4221977, "md5_checksum": "94ee1bc2dc74830a21d5c3471d6cf223", "data_object_type": "Kraken2 Krona Plot", - "url": "https://data.microbiomedata.org/data/nmdc:omprc-11-bn8jcq58/nmdc:wfrbt-11-mxr19q91/nmdc_wfrbt-11-mxr19q91_kraken2_krona.html", - "type": "nmdc:Data_Object" + "url": "https://data.microbiomedata.org/data/nmdc:omprc-11-bn8jcq58/nmdc:wfrbt-11-e79d5x03.1/nmdc_wfrbt-11-e79d5x03.1_kraken2_krona.html", + "type": "nmdc:DataObject" } ], "metagenome_assembly_set": [ { - "id": "nmdc:wfmgas-11-ryns7t13", + "id": "nmdc:wfmgas-11-3jvymb63.1", "name": "Metagenome Assembly Activity for nmdc:omprc-11-bn8jcq58", "started_at_time": "2021-10-11T02:28:26Z", "ended_at_time": "2021-10-11T04:56:04+00:00", "was_informed_by": "nmdc:omprc-11-bn8jcq58", - "execution_resource": "NERSC - Perlmutter", + "execution_resource": "NERSC-Cori", "git_url": "https://github.com/microbiomedata/metaAssembly", "has_input": [ - "nmdc:dobj-11-dz3wvm61" + "nmdc:dobj-11-019yes10" ], "has_output": [ - "nmdc:dobj-11-vfk56697", - "nmdc:dobj-11-pdqkv709", - "nmdc:dobj-11-ry36ec34", - "nmdc:dobj-11-v2dcgb32", - "nmdc:dobj-11-7f8z6a09" + "nmdc:dobj-11-gast3j11", + "nmdc:dobj-11-bkza5366", + "nmdc:dobj-11-v9xfxp70", + "nmdc:dobj-11-dz2mw103", + "nmdc:dobj-11-75skzn36" ], "type": "nmdc:MetagenomeAssembly", "part_of": [ @@ -233,7 +234,7 @@ "gold:Gp0115663" ], "has_output": [ - "nmdc:dobj-11-y134xn31" + "nmdc:dobj-11-k7vny888" ], "mod_date": "2021-06-15", "ncbi_project_name": "Sand microcosm microbial communities from a hyporheic zone in Columbia River, Washington, USA - GW-RW T2_23-Sept-14", @@ -252,19 +253,19 @@ ], "read_qc_analysis_activity_set": [ { - "id": "nmdc:wfrqc-11-8nssbh35", + "id": "nmdc:wfrqc-11-zma0ys31.1", "name": "Read QC Activity for nmdc:omprc-11-bn8jcq58", "started_at_time": "2021-10-11T02:28:26Z", "ended_at_time": "2021-10-11T04:56:04+00:00", "was_informed_by": "nmdc:omprc-11-bn8jcq58", - "execution_resource": "NERSC - Perlmutter", + "execution_resource": "NERSC-Cori", "git_url": "https://github.com/microbiomedata/ReadsQC", "has_input": [ - "nmdc:dobj-11-y134xn31" + "nmdc:dobj-11-k7vny888" ], "has_output": [ - "nmdc:dobj-11-dz3wvm61", - "nmdc:dobj-11-1pp19646" + "nmdc:dobj-11-019yes10", + "nmdc:dobj-11-hty12n62" ], "type": "nmdc:ReadQcAnalysisActivity", "part_of": [ @@ -279,26 +280,26 @@ ], "read_based_taxonomy_analysis_activity_set": [ { - "id": "nmdc:wfrbt-11-mxr19q91", + "id": "nmdc:wfrbt-11-e79d5x03.1", "name": "Readbased Taxonomy Analysis Activity for nmdc:omprc-11-bn8jcq58", "started_at_time": "2021-10-11T02:28:26Z", "ended_at_time": "2021-10-11T04:56:04+00:00", "was_informed_by": "nmdc:omprc-11-bn8jcq58", - "execution_resource": "NERSC - Perlmutter", + "execution_resource": "NERSC-Cori", "git_url": "https://github.com/microbiomedata/ReadbasedAnalysis", "has_input": [ - "nmdc:dobj-11-dz3wvm61" + "nmdc:dobj-11-019yes10" ], "has_output": [ - "nmdc:dobj-11-7q03xt64", - "nmdc:dobj-11-qd17za87", - "nmdc:dobj-11-f0pzg444", - "nmdc:dobj-11-kmysd345", - "nmdc:dobj-11-mgdfaj98", - "nmdc:dobj-11-g38md352", - "nmdc:dobj-11-d67rkh18", - "nmdc:dobj-11-ptchwj55", - "nmdc:dobj-11-d0qt9513" + "nmdc:dobj-11-ppa5pg23", + "nmdc:dobj-11-0yn4b055", + "nmdc:dobj-11-ty0z3p61", + "nmdc:dobj-11-e6h68y35", + "nmdc:dobj-11-chgp8k25", + "nmdc:dobj-11-0wbjqw24", + "nmdc:dobj-11-xteq6n75", + "nmdc:dobj-11-1n5y1278", + "nmdc:dobj-11-rtjb8n73" ], "type": "nmdc:ReadBasedTaxonomyAnalysisActivity", "part_of": [ From 836a0ca322be61faaca69162ed76907edc6cafae Mon Sep 17 00:00:00 2001 From: Michal Babinski Date: Fri, 17 Nov 2023 12:25:56 -0800 Subject: [PATCH 73/91] added command to ingest records --- .../re_iding/scripts/re_id_tool.py | 22 +++++++++++++++++++ 1 file changed, 22 insertions(+) diff --git a/nmdc_automation/re_iding/scripts/re_id_tool.py b/nmdc_automation/re_iding/scripts/re_id_tool.py index 9950c94e..49e946f8 100755 --- a/nmdc_automation/re_iding/scripts/re_id_tool.py +++ b/nmdc_automation/re_iding/scripts/re_id_tool.py @@ -221,7 +221,29 @@ def process_records(ctx, dryrun, study_id, data_dir): inject_type=False)) with open(db_outfile, "w") as f: f.write(json.dumps(json_data, indent=4)) + +@cli.command() +@click.argument('reid_records_file', type=click.Path(exists=True)) +@click.pass_context +def ingest_records(ctx, reid_records_file): + """ + Read in json dump of re_id'd records and submit them to the /v1/workflows/activities endpoint + """ + start_time = time.time() + logging.info(f"Submitting re id'd records from : {reid_records_file}") + + config = Config(ctx.obj['site_config']) + api_client = NmdcRuntimeUserApi(username=config.napa_username, password=config.napa_password, + base_url=config.napa_base_url) + + with open(reid_records_file, "r") as f: + db_records = json.load(f) + + for record in db_records: + resp = api_client.post_objects(record) + + logger.info(f"{record} posted, got response: {resp}") def _get_data_dir(data_dir, dryrun): """ From ae75309564119c9461f2e010f0180397248bbf31 Mon Sep 17 00:00:00 2001 From: Michal Babinski Date: Mon, 20 Nov 2023 13:43:52 -0800 Subject: [PATCH 74/91] update to reflect proper versions --- configs/re_iding_worklfows.yaml | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/configs/re_iding_worklfows.yaml b/configs/re_iding_worklfows.yaml index 5f71a7da..3745401b 100644 --- a/configs/re_iding_worklfows.yaml +++ b/configs/re_iding_worklfows.yaml @@ -3,7 +3,7 @@ Workflows: Type: nmdc:ReadQcAnalysisActivity Enabled: True Git_repo: https://github.com/microbiomedata/ReadsQC - Version: v1.0.8 + Version: v1.0.2 WDL: rqcfilter.wdl Collection: read_qc_analysis_activity_set ActivityRange: ReadQcAnalysisActivity @@ -44,7 +44,7 @@ Workflows: Type: nmdc:MetagenomeAssembly Enabled: True Git_repo: https://github.com/microbiomedata/metaAssembly - Version: v1.0.3 + Version: v1.0.2 WDL: jgi_assembly.wdl Collection: metagenome_assembly_set ActivityRange: MetagenomeAssembly @@ -120,7 +120,7 @@ Workflows: Type: nmdc:ReadBasedTaxonomyAnalysisActivity Enabled: True Git_repo: https://github.com/microbiomedata/ReadbasedAnalysis - Version: v1.0.5 + Version: v1.0.2 WDL: ReadbasedAnalysis.wdl Collection: read_based_taxonomy_analysis_activity_set ActivityRange: ReadBasedTaxonomyAnalysisActivity From da59e122fd11fb60c5c6f3b3986a0e2e6218b9dc Mon Sep 17 00:00:00 2001 From: Michal Babinski Date: Mon, 20 Nov 2023 14:07:13 -0800 Subject: [PATCH 75/91] added full stegen re-ided records --- ...c:sty-11-aygzgv51_re_ided_record_dump.json | 15395 ++++++++++++++++ 1 file changed, 15395 insertions(+) create mode 100644 nmdc_automation/re_iding/scripts/data/nmdc:sty-11-aygzgv51_re_ided_record_dump.json diff --git a/nmdc_automation/re_iding/scripts/data/nmdc:sty-11-aygzgv51_re_ided_record_dump.json b/nmdc_automation/re_iding/scripts/data/nmdc:sty-11-aygzgv51_re_ided_record_dump.json new file mode 100644 index 00000000..06d35da9 --- /dev/null +++ b/nmdc_automation/re_iding/scripts/data/nmdc:sty-11-aygzgv51_re_ided_record_dump.json @@ -0,0 +1,15395 @@ +[ + { + "data_object_set": [ + { + "id": "nmdc:dobj-11-xkx6jy64", + "name": "9422.8.132674.GTTTCG.fastq.gz", + "description": "Raw sequencer read data", + "file_size_bytes": 2861414297, + "data_object_type": "Metagenome Raw Reads", + "type": "nmdc:DataObject" + }, + { + "id": "nmdc:dobj-11-ct3ee035", + "name": "nmdc_wfrqc-11-0ve6kv13.1_filtered.fastq.gz", + "description": "Filtered Reads for nmdc:omprc-11-bn8jcq58", + "file_size_bytes": 2571324879, + "md5_checksum": "7bf778baef033d36f118f8591256d6ef", + "data_object_type": "Filtered Sequencing Reads", + "url": "https://data.microbiomedata.org/data/nmdc:omprc-11-bn8jcq58/nmdc:wfrqc-11-0ve6kv13.1/nmdc_wfrqc-11-0ve6kv13.1_filtered.fastq.gz", + "type": "nmdc:DataObject" + }, + { + "id": "nmdc:dobj-11-8be0nr71", + "name": "nmdc_wfrqc-11-0ve6kv13.1_filterStats.txt", + "description": "Filtered Stats for nmdc:omprc-11-bn8jcq58", + "file_size_bytes": 290, + "md5_checksum": "b99ce8adc125c95f0bfdadf36a3f6848", + "data_object_type": "QC Statistics", + "url": "https://data.microbiomedata.org/data/nmdc:omprc-11-bn8jcq58/nmdc:wfrqc-11-0ve6kv13.1/nmdc_wfrqc-11-0ve6kv13.1_filterStats.txt", + "type": "nmdc:DataObject" + }, + { + "id": "nmdc:dobj-11-36rmmb88", + "name": "nmdc_wfmgas-11-mm1xhm21.1_contigs.fna", + "description": "Assembled contigs fasta for nmdc:omprc-11-bn8jcq58", + "file_size_bytes": 91134523, + "md5_checksum": "058a25368fb61d9ae2ae0f9572cc7820", + "data_object_type": "Assembly Contigs", + "url": "https://data.microbiomedata.org/data/nmdc:omprc-11-bn8jcq58/nmdc:wfmgas-11-mm1xhm21.1/nmdc_wfmgas-11-mm1xhm21.1_contigs.fna", + "type": "nmdc:DataObject" + }, + { + "id": "nmdc:dobj-11-j5tv3r84", + "name": "nmdc_wfmgas-11-mm1xhm21.1_scaffolds.fna", + "description": "Assembled scaffold fasta for nmdc:omprc-11-bn8jcq58", + "file_size_bytes": 90622585, + "md5_checksum": "c981cbc33496f7e0cac7102d06f4e273", + "data_object_type": "Assembly Scaffolds", + "url": "https://data.microbiomedata.org/data/nmdc:omprc-11-bn8jcq58/nmdc:wfmgas-11-mm1xhm21.1/nmdc_wfmgas-11-mm1xhm21.1_scaffolds.fna", + "type": "nmdc:DataObject" + }, + { + "id": "nmdc:dobj-11-htqksg07", + "name": "nmdc_wfmgas-11-mm1xhm21.1_covstats.txt", + "description": "Metagenome Contig Coverage Stats for nmdc:omprc-11-bn8jcq58", + "file_size_bytes": 14431055, + "md5_checksum": "cb9b732dc061c51bf791f70028d6a132", + "data_object_type": "Assembly Coverage Stats", + "url": "https://data.microbiomedata.org/data/nmdc:omprc-11-bn8jcq58/nmdc:wfmgas-11-mm1xhm21.1/nmdc_wfmgas-11-mm1xhm21.1_covstats.txt", + "type": "nmdc:DataObject" + }, + { + "id": "nmdc:dobj-11-kykej776", + "name": "nmdc_wfmgas-11-mm1xhm21.1_assembly.agp", + "description": "Assembled AGP file for nmdc:omprc-11-bn8jcq58", + "file_size_bytes": 14581247, + "md5_checksum": "1e531b7998e50f6b71fd913cb9dcb027", + "data_object_type": "Assembly AGP", + "url": "https://data.microbiomedata.org/data/nmdc:omprc-11-bn8jcq58/nmdc:wfmgas-11-mm1xhm21.1/nmdc_wfmgas-11-mm1xhm21.1_assembly.agp", + "type": "nmdc:DataObject" + }, + { + "id": "nmdc:dobj-11-9153zz50", + "name": "nmdc_wfmgas-11-mm1xhm21.1_pairedMapped_sorted.bam", + "description": "Metagenome Alignment BAM file for nmdc:omprc-11-bn8jcq58", + "file_size_bytes": 0, + "md5_checksum": "d41d8cd98f00b204e9800998ecf8427e", + "data_object_type": "Assembly Coverage BAM", + "url": "https://data.microbiomedata.org/data/nmdc:omprc-11-bn8jcq58/nmdc:wfmgas-11-mm1xhm21.1/nmdc_wfmgas-11-mm1xhm21.1_pairedMapped_sorted.bam", + "type": "nmdc:DataObject" + }, + { + "id": "nmdc:dobj-11-8j315t40", + "name": "nmdc_wfrbt-11-ddtz8b82.1_gottcha2_report.tsv", + "description": "Gottcha2 TSV report for nmdc:omprc-11-bn8jcq58", + "file_size_bytes": 13174, + "md5_checksum": "bc7c1bda004aab357c8f6cf5a42242f9", + "data_object_type": "GOTTCHA2 Classification Report", + "url": "https://data.microbiomedata.org/data/nmdc:omprc-11-bn8jcq58/nmdc:wfrbt-11-ddtz8b82.1/nmdc_wfrbt-11-ddtz8b82.1_gottcha2_report.tsv", + "type": "nmdc:DataObject" + }, + { + "id": "nmdc:dobj-11-qgrfgk34", + "name": "nmdc_wfrbt-11-ddtz8b82.1_gottcha2_report_full.tsv", + "description": "Gottcha2 full TSV report for nmdc:omprc-11-bn8jcq58", + "file_size_bytes": 1035818, + "md5_checksum": "9481434cadd0d6c154e2ec4c11ef0e04", + "data_object_type": "GOTTCHA2 Report Full", + "url": "https://data.microbiomedata.org/data/nmdc:omprc-11-bn8jcq58/nmdc:wfrbt-11-ddtz8b82.1/nmdc_wfrbt-11-ddtz8b82.1_gottcha2_report_full.tsv", + "type": "nmdc:DataObject" + }, + { + "id": "nmdc:dobj-11-q439wj06", + "name": "nmdc_wfrbt-11-ddtz8b82.1_gottcha2_krona.html", + "description": "Gottcha2 Krona HTML report for nmdc:omprc-11-bn8jcq58", + "file_size_bytes": 262669, + "md5_checksum": "6b5bc6ce7f11c1336a5f85a98fc18541", + "data_object_type": "GOTTCHA2 Krona Plot", + "url": "https://data.microbiomedata.org/data/nmdc:omprc-11-bn8jcq58/nmdc:wfrbt-11-ddtz8b82.1/nmdc_wfrbt-11-ddtz8b82.1_gottcha2_krona.html", + "type": "nmdc:DataObject" + }, + { + "id": "nmdc:dobj-11-9dfrtv83", + "name": "nmdc_wfrbt-11-ddtz8b82.1_centrifuge_classification.tsv", + "description": "Centrifuge classification TSV report for nmdc:omprc-11-bn8jcq58", + "file_size_bytes": 2189843623, + "md5_checksum": "933c71bbc2f4a2e84d50f0d3864cf940", + "data_object_type": "Centrifuge Taxonomic Classification", + "url": "https://data.microbiomedata.org/data/nmdc:omprc-11-bn8jcq58/nmdc:wfrbt-11-ddtz8b82.1/nmdc_wfrbt-11-ddtz8b82.1_centrifuge_classification.tsv", + "type": "nmdc:DataObject" + }, + { + "id": "nmdc:dobj-11-ydwz4r55", + "name": "nmdc_wfrbt-11-ddtz8b82.1_centrifuge_report.tsv", + "description": "Centrifuge TSV report for nmdc:omprc-11-bn8jcq58", + "file_size_bytes": 260134, + "md5_checksum": "1a208e2519770ef50740ac39f1b9ba9a", + "data_object_type": "Centrifuge Classification Report", + "url": "https://data.microbiomedata.org/data/nmdc:omprc-11-bn8jcq58/nmdc:wfrbt-11-ddtz8b82.1/nmdc_wfrbt-11-ddtz8b82.1_centrifuge_report.tsv", + "type": "nmdc:DataObject" + }, + { + "id": "nmdc:dobj-11-nzqmzy34", + "name": "nmdc_wfrbt-11-ddtz8b82.1_centrifuge_krona.html", + "description": "Centrifuge Krona HTML report for nmdc:omprc-11-bn8jcq58", + "file_size_bytes": 2343980, + "md5_checksum": "f112a3840464ae7a9cf4a3bf295edd5c", + "data_object_type": "Centrifuge Krona Plot", + "url": "https://data.microbiomedata.org/data/nmdc:omprc-11-bn8jcq58/nmdc:wfrbt-11-ddtz8b82.1/nmdc_wfrbt-11-ddtz8b82.1_centrifuge_krona.html", + "type": "nmdc:DataObject" + }, + { + "id": "nmdc:dobj-11-ke6wvj86", + "name": "nmdc_wfrbt-11-ddtz8b82.1_kraken2_classification.tsv", + "description": "Kraken classification TSV report for nmdc:omprc-11-bn8jcq58", + "file_size_bytes": 1785563917, + "md5_checksum": "7ca01ea379f0baed96f87d1435925f95", + "data_object_type": "Kraken2 Taxonomic Classification", + "url": "https://data.microbiomedata.org/data/nmdc:omprc-11-bn8jcq58/nmdc:wfrbt-11-ddtz8b82.1/nmdc_wfrbt-11-ddtz8b82.1_kraken2_classification.tsv", + "type": "nmdc:DataObject" + }, + { + "id": "nmdc:dobj-11-91jzv767", + "name": "nmdc_wfrbt-11-ddtz8b82.1_kraken2_report.tsv", + "description": "Kraken2 TSV report for nmdc:omprc-11-bn8jcq58", + "file_size_bytes": 699896, + "md5_checksum": "c85f2f2b4a518c4adb23970448a5cb45", + "data_object_type": "Kraken2 Classification Report", + "url": "https://data.microbiomedata.org/data/nmdc:omprc-11-bn8jcq58/nmdc:wfrbt-11-ddtz8b82.1/nmdc_wfrbt-11-ddtz8b82.1_kraken2_report.tsv", + "type": "nmdc:DataObject" + }, + { + "id": "nmdc:dobj-11-snxdsz38", + "name": "nmdc_wfrbt-11-ddtz8b82.1_kraken2_krona.html", + "description": "Kraken2 Krona HTML report for nmdc:omprc-11-bn8jcq58", + "file_size_bytes": 4221977, + "md5_checksum": "94ee1bc2dc74830a21d5c3471d6cf223", + "data_object_type": "Kraken2 Krona Plot", + "url": "https://data.microbiomedata.org/data/nmdc:omprc-11-bn8jcq58/nmdc:wfrbt-11-ddtz8b82.1/nmdc_wfrbt-11-ddtz8b82.1_kraken2_krona.html", + "type": "nmdc:DataObject" + } + ], + "metagenome_assembly_set": [ + { + "id": "nmdc:wfmgas-11-mm1xhm21.1", + "name": "Metagenome Assembly Activity for nmdc:omprc-11-bn8jcq58", + "started_at_time": "2021-10-11T02:28:26Z", + "ended_at_time": "2021-10-11T04:56:04+00:00", + "was_informed_by": "nmdc:omprc-11-bn8jcq58", + "execution_resource": "NERSC-Cori", + "git_url": "https://github.com/microbiomedata/metaAssembly", + "has_input": [ + "nmdc:dobj-11-ct3ee035" + ], + "has_output": [ + "nmdc:dobj-11-36rmmb88", + "nmdc:dobj-11-j5tv3r84", + "nmdc:dobj-11-htqksg07", + "nmdc:dobj-11-kykej776", + "nmdc:dobj-11-9153zz50" + ], + "type": "nmdc:MetagenomeAssembly", + "part_of": [ + "nmdc:omprc-11-bn8jcq58" + ], + "version": "v1.0.2", + "asm_score": 6.577, + "scaffolds": 169645, + "scaf_logsum": 215363, + "scaf_powsum": 24422, + "scaf_max": 68135, + "scaf_bp": 83496490, + "scaf_n50": 45550, + "scaf_n90": 141870, + "scaf_l50": 470, + "scaf_l90": 290, + "scaf_n_gt50k": 1, + "scaf_l_gt50k": 68135, + "scaf_pct_gt50k": 0.08160224, + "contigs": 169784, + "contig_bp": 83494920, + "ctg_n50": 45584, + "ctg_l50": 470, + "ctg_n90": 141996, + "ctg_l90": 290, + "ctg_logsum": 214373, + "ctg_powsum": 24284, + "ctg_max": 68135, + "gap_pct": 0.00188, + "gc_std": 0.11726, + "gc_avg": 0.46001 + } + ], + "omics_processing_set": [ + { + "id": "nmdc:omprc-11-bn8jcq58", + "name": "Sand microcosm microbial communities from a hyporheic zone in Columbia River, Washington, USA - GW-RW T2_23-Sept-14", + "description": "Sterilized sand packs were incubated back in the ground and collected at time point T2.", + "has_input": [ + "nmdc:bsm-11-qq8s6x03" + ], + "add_date": "2015-05-28", + "gold_sequencing_project_identifiers": [ + "gold:Gp0115663" + ], + "has_output": [ + "nmdc:dobj-11-xkx6jy64" + ], + "mod_date": "2021-06-15", + "ncbi_project_name": "Sand microcosm microbial communities from a hyporheic zone in Columbia River, Washington, USA - GW-RW T2_23-Sept-14", + "omics_type": { + "has_raw_value": "Metagenome" + }, + "part_of": [ + "nmdc:sty-11-aygzgv51" + ], + "principal_investigator": { + "has_raw_value": "James Stegen" + }, + "processing_institution": "JGI", + "type": "nmdc:OmicsProcessing" + } + ], + "read_qc_analysis_activity_set": [ + { + "id": "nmdc:wfrqc-11-0ve6kv13.1", + "name": "Read QC Activity for nmdc:omprc-11-bn8jcq58", + "started_at_time": "2021-10-11T02:28:26Z", + "ended_at_time": "2021-10-11T04:56:04+00:00", + "was_informed_by": "nmdc:omprc-11-bn8jcq58", + "execution_resource": "NERSC-Cori", + "git_url": "https://github.com/microbiomedata/ReadsQC", + "has_input": [ + "nmdc:dobj-11-xkx6jy64" + ], + "has_output": [ + "nmdc:dobj-11-ct3ee035", + "nmdc:dobj-11-8be0nr71" + ], + "type": "nmdc:ReadQcAnalysisActivity", + "part_of": [ + "nmdc:omprc-11-bn8jcq58" + ], + "version": "v1.0.2", + "input_read_count": 32238374, + "output_read_count": 30774080, + "input_read_bases": 4867994474, + "output_read_bases": 4608772924 + } + ], + "read_based_taxonomy_analysis_activity_set": [ + { + "id": "nmdc:wfrbt-11-ddtz8b82.1", + "name": "Readbased Taxonomy Analysis Activity for nmdc:omprc-11-bn8jcq58", + "started_at_time": "2021-10-11T02:28:26Z", + "ended_at_time": "2021-10-11T04:56:04+00:00", + "was_informed_by": "nmdc:omprc-11-bn8jcq58", + "execution_resource": "NERSC-Cori", + "git_url": "https://github.com/microbiomedata/ReadbasedAnalysis", + "has_input": [ + "nmdc:dobj-11-ct3ee035" + ], + "has_output": [ + "nmdc:dobj-11-8j315t40", + "nmdc:dobj-11-qgrfgk34", + "nmdc:dobj-11-q439wj06", + "nmdc:dobj-11-9dfrtv83", + "nmdc:dobj-11-ydwz4r55", + "nmdc:dobj-11-nzqmzy34", + "nmdc:dobj-11-ke6wvj86", + "nmdc:dobj-11-91jzv767", + "nmdc:dobj-11-snxdsz38" + ], + "type": "nmdc:ReadBasedTaxonomyAnalysisActivity", + "part_of": [ + "nmdc:omprc-11-bn8jcq58" + ], + "version": "v1.0.2" + } + ] + }, + { + "data_object_set": [ + { + "id": "nmdc:dobj-11-26xaf448", + "name": "9387.2.132031.CCGTCC.fastq.gz", + "description": "Raw sequencer read data", + "file_size_bytes": 2080914094, + "data_object_type": "Metagenome Raw Reads", + "type": "nmdc:DataObject" + }, + { + "id": "nmdc:dobj-11-vgdrj975", + "name": "nmdc_wfrqc-11-y09tjs84.1_filtered.fastq.gz", + "description": "Filtered Reads for nmdc:omprc-11-zp2ar437", + "file_size_bytes": 1806510860, + "md5_checksum": "0b301d2dd917c2be31422dd0e986dd5e", + "data_object_type": "Filtered Sequencing Reads", + "url": "https://data.microbiomedata.org/data/nmdc:omprc-11-zp2ar437/nmdc:wfrqc-11-y09tjs84.1/nmdc_wfrqc-11-y09tjs84.1_filtered.fastq.gz", + "type": "nmdc:DataObject" + }, + { + "id": "nmdc:dobj-11-757e3g55", + "name": "nmdc_wfrqc-11-y09tjs84.1_filterStats.txt", + "description": "Filtered Stats for nmdc:omprc-11-zp2ar437", + "file_size_bytes": 289, + "md5_checksum": "0634e8261ce976d167457993d7f7a4ec", + "data_object_type": "QC Statistics", + "url": "https://data.microbiomedata.org/data/nmdc:omprc-11-zp2ar437/nmdc:wfrqc-11-y09tjs84.1/nmdc_wfrqc-11-y09tjs84.1_filterStats.txt", + "type": "nmdc:DataObject" + }, + { + "id": "nmdc:dobj-11-f5praf79", + "name": "nmdc_wfmgas-11-7kypqk51.1_contigs.fna", + "description": "Assembled contigs fasta for nmdc:omprc-11-zp2ar437", + "file_size_bytes": 63969438, + "md5_checksum": "ea9ab8401db9da184a2865ce7441ed91", + "data_object_type": "Assembly Contigs", + "url": "https://data.microbiomedata.org/data/nmdc:omprc-11-zp2ar437/nmdc:wfmgas-11-7kypqk51.1/nmdc_wfmgas-11-7kypqk51.1_contigs.fna", + "type": "nmdc:DataObject" + }, + { + "id": "nmdc:dobj-11-b7a42t48", + "name": "nmdc_wfmgas-11-7kypqk51.1_scaffolds.fna", + "description": "Assembled scaffold fasta for nmdc:omprc-11-zp2ar437", + "file_size_bytes": 63617304, + "md5_checksum": "1b45790030d0cd0544abf19775000b88", + "data_object_type": "Assembly Scaffolds", + "url": "https://data.microbiomedata.org/data/nmdc:omprc-11-zp2ar437/nmdc:wfmgas-11-7kypqk51.1/nmdc_wfmgas-11-7kypqk51.1_scaffolds.fna", + "type": "nmdc:DataObject" + }, + { + "id": "nmdc:dobj-11-0ppdgt80", + "name": "nmdc_wfmgas-11-7kypqk51.1_covstats.txt", + "description": "Metagenome Contig Coverage Stats for nmdc:omprc-11-zp2ar437", + "file_size_bytes": 9879735, + "md5_checksum": "7e9750911549885c82f8576345cafac4", + "data_object_type": "Assembly Coverage Stats", + "url": "https://data.microbiomedata.org/data/nmdc:omprc-11-zp2ar437/nmdc:wfmgas-11-7kypqk51.1/nmdc_wfmgas-11-7kypqk51.1_covstats.txt", + "type": "nmdc:DataObject" + }, + { + "id": "nmdc:dobj-11-veezfn59", + "name": "nmdc_wfmgas-11-7kypqk51.1_assembly.agp", + "description": "Assembled AGP file for nmdc:omprc-11-zp2ar437", + "file_size_bytes": 9951300, + "md5_checksum": "e3d2e171879b428a247409efee1d5185", + "data_object_type": "Assembly AGP", + "url": "https://data.microbiomedata.org/data/nmdc:omprc-11-zp2ar437/nmdc:wfmgas-11-7kypqk51.1/nmdc_wfmgas-11-7kypqk51.1_assembly.agp", + "type": "nmdc:DataObject" + }, + { + "id": "nmdc:dobj-11-jve9cp31", + "name": "nmdc_wfmgas-11-7kypqk51.1_pairedMapped_sorted.bam", + "description": "Metagenome Alignment BAM file for nmdc:omprc-11-zp2ar437", + "file_size_bytes": 0, + "md5_checksum": "d41d8cd98f00b204e9800998ecf8427e", + "data_object_type": "Assembly Coverage BAM", + "url": "https://data.microbiomedata.org/data/nmdc:omprc-11-zp2ar437/nmdc:wfmgas-11-7kypqk51.1/nmdc_wfmgas-11-7kypqk51.1_pairedMapped_sorted.bam", + "type": "nmdc:DataObject" + }, + { + "id": "nmdc:dobj-11-vh1cpa93", + "name": "nmdc_wfrbt-11-1zrs2092.1_gottcha2_report.tsv", + "description": "Gottcha2 TSV report for nmdc:omprc-11-zp2ar437", + "file_size_bytes": 10721, + "md5_checksum": "17454627f873cc37e80700c4751c81d6", + "data_object_type": "GOTTCHA2 Classification Report", + "url": "https://data.microbiomedata.org/data/nmdc:omprc-11-zp2ar437/nmdc:wfrbt-11-1zrs2092.1/nmdc_wfrbt-11-1zrs2092.1_gottcha2_report.tsv", + "type": "nmdc:DataObject" + }, + { + "id": "nmdc:dobj-11-hh4am062", + "name": "nmdc_wfrbt-11-1zrs2092.1_gottcha2_report_full.tsv", + "description": "Gottcha2 full TSV report for nmdc:omprc-11-zp2ar437", + "file_size_bytes": 920924, + "md5_checksum": "e0479eb7fd3345aaf134640e0b9e11b0", + "data_object_type": "GOTTCHA2 Report Full", + "url": "https://data.microbiomedata.org/data/nmdc:omprc-11-zp2ar437/nmdc:wfrbt-11-1zrs2092.1/nmdc_wfrbt-11-1zrs2092.1_gottcha2_report_full.tsv", + "type": "nmdc:DataObject" + }, + { + "id": "nmdc:dobj-11-vfjyd480", + "name": "nmdc_wfrbt-11-1zrs2092.1_gottcha2_krona.html", + "description": "Gottcha2 Krona HTML report for nmdc:omprc-11-zp2ar437", + "file_size_bytes": 257441, + "md5_checksum": "a8433a0b17d7380fc836e4c9f85a7a54", + "data_object_type": "GOTTCHA2 Krona Plot", + "url": "https://data.microbiomedata.org/data/nmdc:omprc-11-zp2ar437/nmdc:wfrbt-11-1zrs2092.1/nmdc_wfrbt-11-1zrs2092.1_gottcha2_krona.html", + "type": "nmdc:DataObject" + }, + { + "id": "nmdc:dobj-11-p3qkbd58", + "name": "nmdc_wfrbt-11-1zrs2092.1_centrifuge_classification.tsv", + "description": "Centrifuge classification TSV report for nmdc:omprc-11-zp2ar437", + "file_size_bytes": 1468295025, + "md5_checksum": "9e061ad19d4a6a3f209d1992d02df9f9", + "data_object_type": "Centrifuge Taxonomic Classification", + "url": "https://data.microbiomedata.org/data/nmdc:omprc-11-zp2ar437/nmdc:wfrbt-11-1zrs2092.1/nmdc_wfrbt-11-1zrs2092.1_centrifuge_classification.tsv", + "type": "nmdc:DataObject" + }, + { + "id": "nmdc:dobj-11-btep0y96", + "name": "nmdc_wfrbt-11-1zrs2092.1_centrifuge_report.tsv", + "description": "Centrifuge TSV report for nmdc:omprc-11-zp2ar437", + "file_size_bytes": 257081, + "md5_checksum": "1d46eebd0f194f57dd9e92c9bc992891", + "data_object_type": "Centrifuge Classification Report", + "url": "https://data.microbiomedata.org/data/nmdc:omprc-11-zp2ar437/nmdc:wfrbt-11-1zrs2092.1/nmdc_wfrbt-11-1zrs2092.1_centrifuge_report.tsv", + "type": "nmdc:DataObject" + }, + { + "id": "nmdc:dobj-11-sb3f1596", + "name": "nmdc_wfrbt-11-1zrs2092.1_centrifuge_krona.html", + "description": "Centrifuge Krona HTML report for nmdc:omprc-11-zp2ar437", + "file_size_bytes": 2331968, + "md5_checksum": "e5227b1cfdbc266c44d23028c92150a9", + "data_object_type": "Centrifuge Krona Plot", + "url": "https://data.microbiomedata.org/data/nmdc:omprc-11-zp2ar437/nmdc:wfrbt-11-1zrs2092.1/nmdc_wfrbt-11-1zrs2092.1_centrifuge_krona.html", + "type": "nmdc:DataObject" + }, + { + "id": "nmdc:dobj-11-a3c87y52", + "name": "nmdc_wfrbt-11-1zrs2092.1_kraken2_classification.tsv", + "description": "Kraken classification TSV report for nmdc:omprc-11-zp2ar437", + "file_size_bytes": 1204548180, + "md5_checksum": "05f7680c6646904cfb16fc146c0fed4a", + "data_object_type": "Kraken2 Taxonomic Classification", + "url": "https://data.microbiomedata.org/data/nmdc:omprc-11-zp2ar437/nmdc:wfrbt-11-1zrs2092.1/nmdc_wfrbt-11-1zrs2092.1_kraken2_classification.tsv", + "type": "nmdc:DataObject" + }, + { + "id": "nmdc:dobj-11-taf9hs47", + "name": "nmdc_wfrbt-11-1zrs2092.1_kraken2_report.tsv", + "description": "Kraken2 TSV report for nmdc:omprc-11-zp2ar437", + "file_size_bytes": 653697, + "md5_checksum": "368cf81424348cdf46d17c13908280e7", + "data_object_type": "Kraken2 Classification Report", + "url": "https://data.microbiomedata.org/data/nmdc:omprc-11-zp2ar437/nmdc:wfrbt-11-1zrs2092.1/nmdc_wfrbt-11-1zrs2092.1_kraken2_report.tsv", + "type": "nmdc:DataObject" + }, + { + "id": "nmdc:dobj-11-41gf4f49", + "name": "nmdc_wfrbt-11-1zrs2092.1_kraken2_krona.html", + "description": "Kraken2 Krona HTML report for nmdc:omprc-11-zp2ar437", + "file_size_bytes": 3983935, + "md5_checksum": "b5091cfeed4fbea8316e50fbceea89bc", + "data_object_type": "Kraken2 Krona Plot", + "url": "https://data.microbiomedata.org/data/nmdc:omprc-11-zp2ar437/nmdc:wfrbt-11-1zrs2092.1/nmdc_wfrbt-11-1zrs2092.1_kraken2_krona.html", + "type": "nmdc:DataObject" + } + ], + "metagenome_assembly_set": [ + { + "id": "nmdc:wfmgas-11-7kypqk51.1", + "name": "Metagenome Assembly Activity for nmdc:omprc-11-zp2ar437", + "started_at_time": "2021-10-11T02:28:09Z", + "ended_at_time": "2021-10-11T04:06:19+00:00", + "was_informed_by": "nmdc:omprc-11-zp2ar437", + "execution_resource": "NERSC-Cori", + "git_url": "https://github.com/microbiomedata/metaAssembly", + "has_input": [ + "nmdc:dobj-11-vgdrj975" + ], + "has_output": [ + "nmdc:dobj-11-f5praf79", + "nmdc:dobj-11-b7a42t48", + "nmdc:dobj-11-0ppdgt80", + "nmdc:dobj-11-veezfn59", + "nmdc:dobj-11-jve9cp31" + ], + "type": "nmdc:MetagenomeAssembly", + "part_of": [ + "nmdc:omprc-11-zp2ar437" + ], + "version": "v1.0.2", + "asm_score": 5.224, + "scaffolds": 116565, + "scaf_logsum": 182081, + "scaf_powsum": 20721, + "scaf_max": 25973, + "scaf_bp": 58736060, + "scaf_n50": 27775, + "scaf_n90": 95875, + "scaf_l50": 493, + "scaf_l90": 286, + "contigs": 116661, + "contig_bp": 58735100, + "ctg_n50": 27791, + "ctg_l50": 493, + "ctg_n90": 95962, + "ctg_l90": 286, + "ctg_logsum": 181484, + "ctg_powsum": 20653, + "ctg_max": 25973, + "gap_pct": 0.00163, + "gc_std": 0.10759, + "gc_avg": 0.57262 + } + ], + "omics_processing_set": [ + { + "id": "nmdc:omprc-11-zp2ar437", + "name": "Sand microcosm microbial communities from a hyporheic zone in Columbia River, Washington, USA - GW-RW T3_23-Sept-14", + "description": "Sterilized sand packs were incubated back in the ground and collected at time point T3.", + "has_input": [ + "nmdc:bsm-11-4qsqg549" + ], + "add_date": "2015-05-28", + "gold_sequencing_project_identifiers": [ + "gold:Gp0115666" + ], + "has_output": [ + "nmdc:dobj-11-26xaf448" + ], + "mod_date": "2021-06-15", + "ncbi_project_name": "Sand microcosm microbial communities from a hyporheic zone in Columbia River, Washington, USA - GW-RW T3_23-Sept-14", + "omics_type": { + "has_raw_value": "Metagenome" + }, + "part_of": [ + "nmdc:sty-11-aygzgv51" + ], + "principal_investigator": { + "has_raw_value": "James Stegen" + }, + "processing_institution": "JGI", + "type": "nmdc:OmicsProcessing" + } + ], + "read_qc_analysis_activity_set": [ + { + "id": "nmdc:wfrqc-11-y09tjs84.1", + "name": "Read QC Activity for nmdc:omprc-11-zp2ar437", + "started_at_time": "2021-10-11T02:28:09Z", + "ended_at_time": "2021-10-11T04:06:19+00:00", + "was_informed_by": "nmdc:omprc-11-zp2ar437", + "execution_resource": "NERSC-Cori", + "git_url": "https://github.com/microbiomedata/ReadsQC", + "has_input": [ + "nmdc:dobj-11-26xaf448" + ], + "has_output": [ + "nmdc:dobj-11-vgdrj975", + "nmdc:dobj-11-757e3g55" + ], + "type": "nmdc:ReadQcAnalysisActivity", + "part_of": [ + "nmdc:omprc-11-zp2ar437" + ], + "version": "v1.0.2", + "input_read_count": 22183982, + "output_read_count": 20195754, + "input_read_bases": 3349781282, + "output_read_bases": 3025260554 + } + ], + "read_based_taxonomy_analysis_activity_set": [ + { + "id": "nmdc:wfrbt-11-1zrs2092.1", + "name": "Readbased Taxonomy Analysis Activity for nmdc:omprc-11-zp2ar437", + "started_at_time": "2021-10-11T02:28:09Z", + "ended_at_time": "2021-10-11T04:06:19+00:00", + "was_informed_by": "nmdc:omprc-11-zp2ar437", + "execution_resource": "NERSC-Cori", + "git_url": "https://github.com/microbiomedata/ReadbasedAnalysis", + "has_input": [ + "nmdc:dobj-11-vgdrj975" + ], + "has_output": [ + "nmdc:dobj-11-vh1cpa93", + "nmdc:dobj-11-hh4am062", + "nmdc:dobj-11-vfjyd480", + "nmdc:dobj-11-p3qkbd58", + "nmdc:dobj-11-btep0y96", + "nmdc:dobj-11-sb3f1596", + "nmdc:dobj-11-a3c87y52", + "nmdc:dobj-11-taf9hs47", + "nmdc:dobj-11-41gf4f49" + ], + "type": "nmdc:ReadBasedTaxonomyAnalysisActivity", + "part_of": [ + "nmdc:omprc-11-zp2ar437" + ], + "version": "v1.0.2" + } + ] + }, + { + "data_object_set": [ + { + "id": "nmdc:dobj-11-ee1cn292", + "name": "9422.8.132674.GTGGCC.fastq.gz", + "description": "Raw sequencer read data", + "file_size_bytes": 3050291373, + "data_object_type": "Metagenome Raw Reads", + "type": "nmdc:DataObject" + }, + { + "id": "nmdc:dobj-11-ffhnpy82", + "name": "nmdc_wfrqc-11-9etasc61.1_filtered.fastq.gz", + "description": "Filtered Reads for nmdc:omprc-11-wepaa271", + "file_size_bytes": 2665008319, + "md5_checksum": "121b1c25e803f2a010ae5a2206a8d1d2", + "data_object_type": "Filtered Sequencing Reads", + "url": "https://data.microbiomedata.org/data/nmdc:omprc-11-wepaa271/nmdc:wfrqc-11-9etasc61.1/nmdc_wfrqc-11-9etasc61.1_filtered.fastq.gz", + "type": "nmdc:DataObject" + }, + { + "id": "nmdc:dobj-11-a8nerf23", + "name": "nmdc_wfrqc-11-9etasc61.1_filterStats.txt", + "description": "Filtered Stats for nmdc:omprc-11-wepaa271", + "file_size_bytes": 289, + "md5_checksum": "63fb5949ebafd1846ba60f2ce033191c", + "data_object_type": "QC Statistics", + "url": "https://data.microbiomedata.org/data/nmdc:omprc-11-wepaa271/nmdc:wfrqc-11-9etasc61.1/nmdc_wfrqc-11-9etasc61.1_filterStats.txt", + "type": "nmdc:DataObject" + }, + { + "id": "nmdc:dobj-11-7k2asm94", + "name": "nmdc_wfmgas-11-0dxjvv12.1_contigs.fna", + "description": "Assembled contigs fasta for nmdc:omprc-11-wepaa271", + "file_size_bytes": 184428159, + "md5_checksum": "fe39ba5de2380823788ce4003a47a31e", + "data_object_type": "Assembly Contigs", + "url": "https://data.microbiomedata.org/data/nmdc:omprc-11-wepaa271/nmdc:wfmgas-11-0dxjvv12.1/nmdc_wfmgas-11-0dxjvv12.1_contigs.fna", + "type": "nmdc:DataObject" + }, + { + "id": "nmdc:dobj-11-kqn65p93", + "name": "nmdc_wfmgas-11-0dxjvv12.1_scaffolds.fna", + "description": "Assembled scaffold fasta for nmdc:omprc-11-wepaa271", + "file_size_bytes": 183452292, + "md5_checksum": "51d640650a9fa5b6aca374c35f5707a3", + "data_object_type": "Assembly Scaffolds", + "url": "https://data.microbiomedata.org/data/nmdc:omprc-11-wepaa271/nmdc:wfmgas-11-0dxjvv12.1/nmdc_wfmgas-11-0dxjvv12.1_scaffolds.fna", + "type": "nmdc:DataObject" + }, + { + "id": "nmdc:dobj-11-s8bt5468", + "name": "nmdc_wfmgas-11-0dxjvv12.1_covstats.txt", + "description": "Metagenome Contig Coverage Stats for nmdc:omprc-11-wepaa271", + "file_size_bytes": 27621864, + "md5_checksum": "38bdba5d5d76a8d163d14fd59fb1e9d6", + "data_object_type": "Assembly Coverage Stats", + "url": "https://data.microbiomedata.org/data/nmdc:omprc-11-wepaa271/nmdc:wfmgas-11-0dxjvv12.1/nmdc_wfmgas-11-0dxjvv12.1_covstats.txt", + "type": "nmdc:DataObject" + }, + { + "id": "nmdc:dobj-11-y35zbx52", + "name": "nmdc_wfmgas-11-0dxjvv12.1_assembly.agp", + "description": "Assembled AGP file for nmdc:omprc-11-wepaa271", + "file_size_bytes": 27986937, + "md5_checksum": "c96edd3767cedc24b74523797f00dcc9", + "data_object_type": "Assembly AGP", + "url": "https://data.microbiomedata.org/data/nmdc:omprc-11-wepaa271/nmdc:wfmgas-11-0dxjvv12.1/nmdc_wfmgas-11-0dxjvv12.1_assembly.agp", + "type": "nmdc:DataObject" + }, + { + "id": "nmdc:dobj-11-6fjpw307", + "name": "nmdc_wfmgas-11-0dxjvv12.1_pairedMapped_sorted.bam", + "description": "Metagenome Alignment BAM file for nmdc:omprc-11-wepaa271", + "file_size_bytes": 0, + "md5_checksum": "d41d8cd98f00b204e9800998ecf8427e", + "data_object_type": "Assembly Coverage BAM", + "url": "https://data.microbiomedata.org/data/nmdc:omprc-11-wepaa271/nmdc:wfmgas-11-0dxjvv12.1/nmdc_wfmgas-11-0dxjvv12.1_pairedMapped_sorted.bam", + "type": "nmdc:DataObject" + }, + { + "id": "nmdc:dobj-11-qb8vxx15", + "name": "nmdc_wfrbt-11-ar24ad30.1_gottcha2_report.tsv", + "description": "Gottcha2 TSV report for nmdc:omprc-11-wepaa271", + "file_size_bytes": 13875, + "md5_checksum": "8bdf8bbee24242aaaee763c1d851c05e", + "data_object_type": "GOTTCHA2 Classification Report", + "url": "https://data.microbiomedata.org/data/nmdc:omprc-11-wepaa271/nmdc:wfrbt-11-ar24ad30.1/nmdc_wfrbt-11-ar24ad30.1_gottcha2_report.tsv", + "type": "nmdc:DataObject" + }, + { + "id": "nmdc:dobj-11-6gb30p82", + "name": "nmdc_wfrbt-11-ar24ad30.1_gottcha2_report_full.tsv", + "description": "Gottcha2 full TSV report for nmdc:omprc-11-wepaa271", + "file_size_bytes": 956974, + "md5_checksum": "2529ede10eb159148711d016ec022af3", + "data_object_type": "GOTTCHA2 Report Full", + "url": "https://data.microbiomedata.org/data/nmdc:omprc-11-wepaa271/nmdc:wfrbt-11-ar24ad30.1/nmdc_wfrbt-11-ar24ad30.1_gottcha2_report_full.tsv", + "type": "nmdc:DataObject" + }, + { + "id": "nmdc:dobj-11-jes2n322", + "name": "nmdc_wfrbt-11-ar24ad30.1_gottcha2_krona.html", + "description": "Gottcha2 Krona HTML report for nmdc:omprc-11-wepaa271", + "file_size_bytes": 265076, + "md5_checksum": "a0631ed87dc2e7c69355ef575dbe4e60", + "data_object_type": "GOTTCHA2 Krona Plot", + "url": "https://data.microbiomedata.org/data/nmdc:omprc-11-wepaa271/nmdc:wfrbt-11-ar24ad30.1/nmdc_wfrbt-11-ar24ad30.1_gottcha2_krona.html", + "type": "nmdc:DataObject" + }, + { + "id": "nmdc:dobj-11-2jzak926", + "name": "nmdc_wfrbt-11-ar24ad30.1_centrifuge_classification.tsv", + "description": "Centrifuge classification TSV report for nmdc:omprc-11-wepaa271", + "file_size_bytes": 2377445510, + "md5_checksum": "93d26b69073bd4d6283aee3c7e5997d4", + "data_object_type": "Centrifuge Taxonomic Classification", + "url": "https://data.microbiomedata.org/data/nmdc:omprc-11-wepaa271/nmdc:wfrbt-11-ar24ad30.1/nmdc_wfrbt-11-ar24ad30.1_centrifuge_classification.tsv", + "type": "nmdc:DataObject" + }, + { + "id": "nmdc:dobj-11-2gzw1e23", + "name": "nmdc_wfrbt-11-ar24ad30.1_centrifuge_report.tsv", + "description": "Centrifuge TSV report for nmdc:omprc-11-wepaa271", + "file_size_bytes": 258291, + "md5_checksum": "d7a49bf0d9797a2b603643a2de896b5c", + "data_object_type": "Centrifuge Classification Report", + "url": "https://data.microbiomedata.org/data/nmdc:omprc-11-wepaa271/nmdc:wfrbt-11-ar24ad30.1/nmdc_wfrbt-11-ar24ad30.1_centrifuge_report.tsv", + "type": "nmdc:DataObject" + }, + { + "id": "nmdc:dobj-11-kceg9012", + "name": "nmdc_wfrbt-11-ar24ad30.1_centrifuge_krona.html", + "description": "Centrifuge Krona HTML report for nmdc:omprc-11-wepaa271", + "file_size_bytes": 2333775, + "md5_checksum": "890f9f52d828e1ea8277b52566763069", + "data_object_type": "Centrifuge Krona Plot", + "url": "https://data.microbiomedata.org/data/nmdc:omprc-11-wepaa271/nmdc:wfrbt-11-ar24ad30.1/nmdc_wfrbt-11-ar24ad30.1_centrifuge_krona.html", + "type": "nmdc:DataObject" + }, + { + "id": "nmdc:dobj-11-kkyjpr76", + "name": "nmdc_wfrbt-11-ar24ad30.1_kraken2_classification.tsv", + "description": "Kraken classification TSV report for nmdc:omprc-11-wepaa271", + "file_size_bytes": 1966520263, + "md5_checksum": "371b7fabbcbc2d22c3ca84b422a88863", + "data_object_type": "Kraken2 Taxonomic Classification", + "url": "https://data.microbiomedata.org/data/nmdc:omprc-11-wepaa271/nmdc:wfrbt-11-ar24ad30.1/nmdc_wfrbt-11-ar24ad30.1_kraken2_classification.tsv", + "type": "nmdc:DataObject" + }, + { + "id": "nmdc:dobj-11-2aag1n47", + "name": "nmdc_wfrbt-11-ar24ad30.1_kraken2_report.tsv", + "description": "Kraken2 TSV report for nmdc:omprc-11-wepaa271", + "file_size_bytes": 707661, + "md5_checksum": "8677985c5e8ad92dd6d051f85950a636", + "data_object_type": "Kraken2 Classification Report", + "url": "https://data.microbiomedata.org/data/nmdc:omprc-11-wepaa271/nmdc:wfrbt-11-ar24ad30.1/nmdc_wfrbt-11-ar24ad30.1_kraken2_report.tsv", + "type": "nmdc:DataObject" + }, + { + "id": "nmdc:dobj-11-jtrr8s86", + "name": "nmdc_wfrbt-11-ar24ad30.1_kraken2_krona.html", + "description": "Kraken2 Krona HTML report for nmdc:omprc-11-wepaa271", + "file_size_bytes": 4276256, + "md5_checksum": "9b2f355a4c2ff3651a3d1179212e2914", + "data_object_type": "Kraken2 Krona Plot", + "url": "https://data.microbiomedata.org/data/nmdc:omprc-11-wepaa271/nmdc:wfrbt-11-ar24ad30.1/nmdc_wfrbt-11-ar24ad30.1_kraken2_krona.html", + "type": "nmdc:DataObject" + } + ], + "metagenome_assembly_set": [ + { + "id": "nmdc:wfmgas-11-0dxjvv12.1", + "name": "Metagenome Assembly Activity for nmdc:omprc-11-wepaa271", + "started_at_time": "2021-10-11T02:28:43Z", + "ended_at_time": "2021-10-11T05:19:17+00:00", + "was_informed_by": "nmdc:omprc-11-wepaa271", + "execution_resource": "NERSC-Cori", + "git_url": "https://github.com/microbiomedata/metaAssembly", + "has_input": [ + "nmdc:dobj-11-ffhnpy82" + ], + "has_output": [ + "nmdc:dobj-11-7k2asm94", + "nmdc:dobj-11-kqn65p93", + "nmdc:dobj-11-s8bt5468", + "nmdc:dobj-11-y35zbx52", + "nmdc:dobj-11-6fjpw307" + ], + "type": "nmdc:MetagenomeAssembly", + "part_of": [ + "nmdc:omprc-11-wepaa271" + ], + "version": "v1.0.2", + "asm_score": 4.087, + "scaffolds": 322890, + "scaf_logsum": 491574, + "scaf_powsum": 53839, + "scaf_max": 53286, + "scaf_bp": 169607136, + "scaf_n50": 83307, + "scaf_n90": 263381, + "scaf_l50": 526, + "scaf_l90": 299, + "scaf_n_gt50k": 1, + "scaf_l_gt50k": 53286, + "scaf_pct_gt50k": 0.03141731, + "contigs": 323269, + "contig_bp": 169601906, + "ctg_n50": 83667, + "ctg_l50": 525, + "ctg_n90": 263711, + "ctg_l90": 299, + "ctg_logsum": 489108, + "ctg_powsum": 53542, + "ctg_max": 53286, + "gap_pct": 0.00308, + "gc_std": 0.10793, + "gc_avg": 0.39548 + } + ], + "omics_processing_set": [ + { + "id": "nmdc:omprc-11-wepaa271", + "name": "Sand microcosm microbial communities from a hyporheic zone in Columbia River, Washington, USA - GW-RW T3_30-Apr-14", + "description": "Sterilized sand packs were incubated back in the ground and collected at time point T3.", + "has_input": [ + "nmdc:bsm-11-srz83p34" + ], + "add_date": "2015-05-28", + "gold_sequencing_project_identifiers": [ + "gold:Gp0115668" + ], + "has_output": [ + "nmdc:dobj-11-ee1cn292" + ], + "mod_date": "2021-06-15", + "ncbi_project_name": "Sand microcosm microbial communities from a hyporheic zone in Columbia River, Washington, USA - GW-RW T3_30-Apr-14", + "omics_type": { + "has_raw_value": "Metagenome" + }, + "part_of": [ + "nmdc:sty-11-aygzgv51" + ], + "principal_investigator": { + "has_raw_value": "James Stegen" + }, + "processing_institution": "JGI", + "type": "nmdc:OmicsProcessing" + } + ], + "read_qc_analysis_activity_set": [ + { + "id": "nmdc:wfrqc-11-9etasc61.1", + "name": "Read QC Activity for nmdc:omprc-11-wepaa271", + "started_at_time": "2021-10-11T02:28:43Z", + "ended_at_time": "2021-10-11T05:19:17+00:00", + "was_informed_by": "nmdc:omprc-11-wepaa271", + "execution_resource": "NERSC-Cori", + "git_url": "https://github.com/microbiomedata/ReadsQC", + "has_input": [ + "nmdc:dobj-11-ee1cn292" + ], + "has_output": [ + "nmdc:dobj-11-ffhnpy82", + "nmdc:dobj-11-a8nerf23" + ], + "type": "nmdc:ReadQcAnalysisActivity", + "part_of": [ + "nmdc:omprc-11-wepaa271" + ], + "version": "v1.0.2", + "input_read_count": 35064492, + "output_read_count": 33873238, + "input_read_bases": 5294738292, + "output_read_bases": 5069132469 + } + ], + "read_based_taxonomy_analysis_activity_set": [ + { + "id": "nmdc:wfrbt-11-ar24ad30.1", + "name": "Readbased Taxonomy Analysis Activity for nmdc:omprc-11-wepaa271", + "started_at_time": "2021-10-11T02:28:43Z", + "ended_at_time": "2021-10-11T05:19:17+00:00", + "was_informed_by": "nmdc:omprc-11-wepaa271", + "execution_resource": "NERSC-Cori", + "git_url": "https://github.com/microbiomedata/ReadbasedAnalysis", + "has_input": [ + "nmdc:dobj-11-ffhnpy82" + ], + "has_output": [ + "nmdc:dobj-11-qb8vxx15", + "nmdc:dobj-11-6gb30p82", + "nmdc:dobj-11-jes2n322", + "nmdc:dobj-11-2jzak926", + "nmdc:dobj-11-2gzw1e23", + "nmdc:dobj-11-kceg9012", + "nmdc:dobj-11-kkyjpr76", + "nmdc:dobj-11-2aag1n47", + "nmdc:dobj-11-jtrr8s86" + ], + "type": "nmdc:ReadBasedTaxonomyAnalysisActivity", + "part_of": [ + "nmdc:omprc-11-wepaa271" + ], + "version": "v1.0.2" + } + ] + }, + { + "data_object_set": [ + { + "id": "nmdc:dobj-11-kt904y47", + "name": "9289.1.128215.GGACTCC-AGAGTAG.fastq.gz", + "description": "Raw sequencer read data", + "file_size_bytes": 6863035214, + "data_object_type": "Metagenome Raw Reads", + "type": "nmdc:DataObject" + }, + { + "id": "nmdc:dobj-11-sv0bh713", + "name": "nmdc_wfrqc-11-7phvda54.1_filtered.fastq.gz", + "description": "Filtered Reads for nmdc:omprc-11-hymrq852", + "file_size_bytes": 5673282665, + "md5_checksum": "7e294ff66cb7ddf84edf9c8bed576bcd", + "data_object_type": "Filtered Sequencing Reads", + "url": "https://data.microbiomedata.org/data/nmdc:omprc-11-hymrq852/nmdc:wfrqc-11-7phvda54.1/nmdc_wfrqc-11-7phvda54.1_filtered.fastq.gz", + "type": "nmdc:DataObject" + }, + { + "id": "nmdc:dobj-11-cx1am139", + "name": "nmdc_wfrqc-11-7phvda54.1_filterStats.txt", + "description": "Filtered Stats for nmdc:omprc-11-hymrq852", + "file_size_bytes": 276, + "md5_checksum": "08e2a96f7aaaff5ff6f747cfe6f49e49", + "data_object_type": "QC Statistics", + "url": "https://data.microbiomedata.org/data/nmdc:omprc-11-hymrq852/nmdc:wfrqc-11-7phvda54.1/nmdc_wfrqc-11-7phvda54.1_filterStats.txt", + "type": "nmdc:DataObject" + }, + { + "id": "nmdc:dobj-11-rqawq123", + "name": "nmdc_wfmgas-11-r6fgf792.1_contigs.fna", + "description": "Assembled contigs fasta for nmdc:omprc-11-hymrq852", + "file_size_bytes": 278221490, + "md5_checksum": "4fb27b2822272ab6050fd881282093ce", + "data_object_type": "Assembly Contigs", + "url": "https://data.microbiomedata.org/data/nmdc:omprc-11-hymrq852/nmdc:wfmgas-11-r6fgf792.1/nmdc_wfmgas-11-r6fgf792.1_contigs.fna", + "type": "nmdc:DataObject" + }, + { + "id": "nmdc:dobj-11-kw3rda39", + "name": "nmdc_wfmgas-11-r6fgf792.1_scaffolds.fna", + "description": "Assembled scaffold fasta for nmdc:omprc-11-hymrq852", + "file_size_bytes": 276478225, + "md5_checksum": "89487f952027b55fe8ac84645d59f901", + "data_object_type": "Assembly Scaffolds", + "url": "https://data.microbiomedata.org/data/nmdc:omprc-11-hymrq852/nmdc:wfmgas-11-r6fgf792.1/nmdc_wfmgas-11-r6fgf792.1_scaffolds.fna", + "type": "nmdc:DataObject" + }, + { + "id": "nmdc:dobj-11-4e0nnx88", + "name": "nmdc_wfmgas-11-r6fgf792.1_covstats.txt", + "description": "Metagenome Contig Coverage Stats for nmdc:omprc-11-hymrq852", + "file_size_bytes": 45559440, + "md5_checksum": "e90d4b7c84b5ba0d2d0ea48f94d1169c", + "data_object_type": "Assembly Coverage Stats", + "url": "https://data.microbiomedata.org/data/nmdc:omprc-11-hymrq852/nmdc:wfmgas-11-r6fgf792.1/nmdc_wfmgas-11-r6fgf792.1_covstats.txt", + "type": "nmdc:DataObject" + }, + { + "id": "nmdc:dobj-11-9r6q4b72", + "name": "nmdc_wfmgas-11-r6fgf792.1_assembly.agp", + "description": "Assembled AGP file for nmdc:omprc-11-hymrq852", + "file_size_bytes": 46693740, + "md5_checksum": "fb355165ac7c16a9c150bf1d2f3939c1", + "data_object_type": "Assembly AGP", + "url": "https://data.microbiomedata.org/data/nmdc:omprc-11-hymrq852/nmdc:wfmgas-11-r6fgf792.1/nmdc_wfmgas-11-r6fgf792.1_assembly.agp", + "type": "nmdc:DataObject" + }, + { + "id": "nmdc:dobj-11-scfac495", + "name": "nmdc_wfmgas-11-r6fgf792.1_pairedMapped_sorted.bam", + "description": "Metagenome Alignment BAM file for nmdc:omprc-11-hymrq852", + "file_size_bytes": 0, + "md5_checksum": "d41d8cd98f00b204e9800998ecf8427e", + "data_object_type": "Assembly Coverage BAM", + "url": "https://data.microbiomedata.org/data/nmdc:omprc-11-hymrq852/nmdc:wfmgas-11-r6fgf792.1/nmdc_wfmgas-11-r6fgf792.1_pairedMapped_sorted.bam", + "type": "nmdc:DataObject" + }, + { + "id": "nmdc:dobj-11-4krx5r94", + "name": "nmdc_wfrbt-11-hsty2w92.1_gottcha2_report.tsv", + "description": "Gottcha2 TSV report for nmdc:omprc-11-hymrq852", + "file_size_bytes": 18551, + "md5_checksum": "e20f8c00473472fa073adde871860801", + "data_object_type": "GOTTCHA2 Classification Report", + "url": "https://data.microbiomedata.org/data/nmdc:omprc-11-hymrq852/nmdc:wfrbt-11-hsty2w92.1/nmdc_wfrbt-11-hsty2w92.1_gottcha2_report.tsv", + "type": "nmdc:DataObject" + }, + { + "id": "nmdc:dobj-11-pj0afc11", + "name": "nmdc_wfrbt-11-hsty2w92.1_gottcha2_report_full.tsv", + "description": "Gottcha2 full TSV report for nmdc:omprc-11-hymrq852", + "file_size_bytes": 1200541, + "md5_checksum": "52f8c91d04e8d179af98e7fac35a8ff1", + "data_object_type": "GOTTCHA2 Report Full", + "url": "https://data.microbiomedata.org/data/nmdc:omprc-11-hymrq852/nmdc:wfrbt-11-hsty2w92.1/nmdc_wfrbt-11-hsty2w92.1_gottcha2_report_full.tsv", + "type": "nmdc:DataObject" + }, + { + "id": "nmdc:dobj-11-2t97ac35", + "name": "nmdc_wfrbt-11-hsty2w92.1_gottcha2_krona.html", + "description": "Gottcha2 Krona HTML report for nmdc:omprc-11-hymrq852", + "file_size_bytes": 278990, + "md5_checksum": "f721d9dd168b0dea080b191a4396167e", + "data_object_type": "GOTTCHA2 Krona Plot", + "url": "https://data.microbiomedata.org/data/nmdc:omprc-11-hymrq852/nmdc:wfrbt-11-hsty2w92.1/nmdc_wfrbt-11-hsty2w92.1_gottcha2_krona.html", + "type": "nmdc:DataObject" + }, + { + "id": "nmdc:dobj-11-c52pr431", + "name": "nmdc_wfrbt-11-hsty2w92.1_centrifuge_classification.tsv", + "description": "Centrifuge classification TSV report for nmdc:omprc-11-hymrq852", + "file_size_bytes": 4742886512, + "md5_checksum": "ab77e396ec643b58b54da92848b88a96", + "data_object_type": "Centrifuge Taxonomic Classification", + "url": "https://data.microbiomedata.org/data/nmdc:omprc-11-hymrq852/nmdc:wfrbt-11-hsty2w92.1/nmdc_wfrbt-11-hsty2w92.1_centrifuge_classification.tsv", + "type": "nmdc:DataObject" + }, + { + "id": "nmdc:dobj-11-bfabx233", + "name": "nmdc_wfrbt-11-hsty2w92.1_centrifuge_report.tsv", + "description": "Centrifuge TSV report for nmdc:omprc-11-hymrq852", + "file_size_bytes": 266907, + "md5_checksum": "f2514844e47a9e3d268671f80f152bc1", + "data_object_type": "Centrifuge Classification Report", + "url": "https://data.microbiomedata.org/data/nmdc:omprc-11-hymrq852/nmdc:wfrbt-11-hsty2w92.1/nmdc_wfrbt-11-hsty2w92.1_centrifuge_report.tsv", + "type": "nmdc:DataObject" + }, + { + "id": "nmdc:dobj-11-n8frtb02", + "name": "nmdc_wfrbt-11-hsty2w92.1_centrifuge_krona.html", + "description": "Centrifuge Krona HTML report for nmdc:omprc-11-hymrq852", + "file_size_bytes": 2359747, + "md5_checksum": "a3e49f39f33c54bc8d9430a947cd4b16", + "data_object_type": "Centrifuge Krona Plot", + "url": "https://data.microbiomedata.org/data/nmdc:omprc-11-hymrq852/nmdc:wfrbt-11-hsty2w92.1/nmdc_wfrbt-11-hsty2w92.1_centrifuge_krona.html", + "type": "nmdc:DataObject" + }, + { + "id": "nmdc:dobj-11-ytgfy117", + "name": "nmdc_wfrbt-11-hsty2w92.1_kraken2_classification.tsv", + "description": "Kraken classification TSV report for nmdc:omprc-11-hymrq852", + "file_size_bytes": 3859620862, + "md5_checksum": "17bc87145b0dcabbb8e3de0f393f4d4d", + "data_object_type": "Kraken2 Taxonomic Classification", + "url": "https://data.microbiomedata.org/data/nmdc:omprc-11-hymrq852/nmdc:wfrbt-11-hsty2w92.1/nmdc_wfrbt-11-hsty2w92.1_kraken2_classification.tsv", + "type": "nmdc:DataObject" + }, + { + "id": "nmdc:dobj-11-ejaas840", + "name": "nmdc_wfrbt-11-hsty2w92.1_kraken2_report.tsv", + "description": "Kraken2 TSV report for nmdc:omprc-11-hymrq852", + "file_size_bytes": 729541, + "md5_checksum": "aecb320fdfe4c4da35c0206dd34e0f40", + "data_object_type": "Kraken2 Classification Report", + "url": "https://data.microbiomedata.org/data/nmdc:omprc-11-hymrq852/nmdc:wfrbt-11-hsty2w92.1/nmdc_wfrbt-11-hsty2w92.1_kraken2_report.tsv", + "type": "nmdc:DataObject" + }, + { + "id": "nmdc:dobj-11-p4whzn48", + "name": "nmdc_wfrbt-11-hsty2w92.1_kraken2_krona.html", + "description": "Kraken2 Krona HTML report for nmdc:omprc-11-hymrq852", + "file_size_bytes": 4358324, + "md5_checksum": "77860ee043ae9738e7702a3f665b15fa", + "data_object_type": "Kraken2 Krona Plot", + "url": "https://data.microbiomedata.org/data/nmdc:omprc-11-hymrq852/nmdc:wfrbt-11-hsty2w92.1/nmdc_wfrbt-11-hsty2w92.1_kraken2_krona.html", + "type": "nmdc:DataObject" + } + ], + "metagenome_assembly_set": [ + { + "id": "nmdc:wfmgas-11-r6fgf792.1", + "name": "Metagenome Assembly Activity for nmdc:omprc-11-hymrq852", + "started_at_time": "2021-10-11T02:23:30Z", + "ended_at_time": "2021-10-11T06:30:42+00:00", + "was_informed_by": "nmdc:omprc-11-hymrq852", + "execution_resource": "NERSC-Cori", + "git_url": "https://github.com/microbiomedata/metaAssembly", + "has_input": [ + "nmdc:dobj-11-sv0bh713" + ], + "has_output": [ + "nmdc:dobj-11-rqawq123", + "nmdc:dobj-11-kw3rda39", + "nmdc:dobj-11-4e0nnx88", + "nmdc:dobj-11-9r6q4b72", + "nmdc:dobj-11-scfac495" + ], + "type": "nmdc:MetagenomeAssembly", + "part_of": [ + "nmdc:omprc-11-hymrq852" + ], + "version": "v1.0.2", + "asm_score": 12.582, + "scaffolds": 525116, + "scaf_logsum": 725191, + "scaf_powsum": 89882, + "scaf_max": 884972, + "scaf_bp": 254270837, + "scaf_n50": 133535, + "scaf_n90": 445430, + "scaf_l50": 455, + "scaf_l90": 285, + "scaf_n_gt50k": 34, + "scaf_l_gt50k": 3540548, + "scaf_pct_gt50k": 1.3924317, + "contigs": 531791, + "contig_bp": 254202396, + "ctg_n50": 139317, + "ctg_l50": 449, + "ctg_n90": 451813, + "ctg_l90": 285, + "ctg_logsum": 682158, + "ctg_powsum": 84136, + "ctg_max": 719201, + "gap_pct": 0.02692, + "gc_std": 0.09689, + "gc_avg": 0.48697 + } + ], + "omics_processing_set": [ + { + "id": "nmdc:omprc-11-hymrq852", + "name": "Sand microcosm microbial communities from a hyporheic zone in Columbia River, Washington, USA - GW-RW T4_4-Nov-14", + "description": "Sterilized sand packs were incubated back in the ground and collected at time point T4.", + "has_input": [ + "nmdc:bsm-11-11219w54" + ], + "add_date": "2015-05-28", + "gold_sequencing_project_identifiers": [ + "gold:Gp0115679" + ], + "has_output": [ + "nmdc:dobj-11-kt904y47" + ], + "mod_date": "2021-06-15", + "ncbi_project_name": "Sand microcosm microbial communities from a hyporheic zone in Columbia River, Washington, USA - GW-RW T4_4-Nov-14", + "omics_type": { + "has_raw_value": "Metagenome" + }, + "part_of": [ + "nmdc:sty-11-aygzgv51" + ], + "principal_investigator": { + "has_raw_value": "James Stegen" + }, + "processing_institution": "JGI", + "type": "nmdc:OmicsProcessing" + } + ], + "read_qc_analysis_activity_set": [ + { + "id": "nmdc:wfrqc-11-7phvda54.1", + "name": "Read QC Activity for nmdc:omprc-11-hymrq852", + "started_at_time": "2021-10-11T02:23:30Z", + "ended_at_time": "2021-10-11T06:30:42+00:00", + "was_informed_by": "nmdc:omprc-11-hymrq852", + "execution_resource": "NERSC-Cori", + "git_url": "https://github.com/microbiomedata/ReadsQC", + "has_input": [ + "nmdc:dobj-11-kt904y47" + ], + "has_output": [ + "nmdc:dobj-11-sv0bh713", + "nmdc:dobj-11-cx1am139" + ], + "type": "nmdc:ReadQcAnalysisActivity", + "part_of": [ + "nmdc:omprc-11-hymrq852" + ], + "version": "v1.0.2", + "input_read_count": 67696542, + "output_read_count": 67147510, + "input_read_bases": 10222177842, + "output_read_bases": 9825387057 + } + ], + "read_based_taxonomy_analysis_activity_set": [ + { + "id": "nmdc:wfrbt-11-hsty2w92.1", + "name": "Readbased Taxonomy Analysis Activity for nmdc:omprc-11-hymrq852", + "started_at_time": "2021-10-11T02:23:30Z", + "ended_at_time": "2021-10-11T06:30:42+00:00", + "was_informed_by": "nmdc:omprc-11-hymrq852", + "execution_resource": "NERSC-Cori", + "git_url": "https://github.com/microbiomedata/ReadbasedAnalysis", + "has_input": [ + "nmdc:dobj-11-sv0bh713" + ], + "has_output": [ + "nmdc:dobj-11-4krx5r94", + "nmdc:dobj-11-pj0afc11", + "nmdc:dobj-11-2t97ac35", + "nmdc:dobj-11-c52pr431", + "nmdc:dobj-11-bfabx233", + "nmdc:dobj-11-n8frtb02", + "nmdc:dobj-11-ytgfy117", + "nmdc:dobj-11-ejaas840", + "nmdc:dobj-11-p4whzn48" + ], + "type": "nmdc:ReadBasedTaxonomyAnalysisActivity", + "part_of": [ + "nmdc:omprc-11-hymrq852" + ], + "version": "v1.0.2" + } + ] + }, + { + "data_object_set": [ + { + "id": "nmdc:dobj-11-1qqy8057", + "name": "9387.2.132031.GTCCGC.fastq.gz", + "description": "Raw sequencer read data", + "file_size_bytes": 1840708400, + "data_object_type": "Metagenome Raw Reads", + "type": "nmdc:DataObject" + }, + { + "id": "nmdc:dobj-11-a5sh8709", + "name": "nmdc_wfrqc-11-q99a8888.1_filtered.fastq.gz", + "description": "Filtered Reads for nmdc:omprc-11-yt8css91", + "file_size_bytes": 1599931347, + "md5_checksum": "7d4057e3a44a05171c13fb0ed3e2294a", + "data_object_type": "Filtered Sequencing Reads", + "url": "https://data.microbiomedata.org/data/nmdc:omprc-11-yt8css91/nmdc:wfrqc-11-q99a8888.1/nmdc_wfrqc-11-q99a8888.1_filtered.fastq.gz", + "type": "nmdc:DataObject" + }, + { + "id": "nmdc:dobj-11-gz34wj94", + "name": "nmdc_wfrqc-11-q99a8888.1_filterStats.txt", + "description": "Filtered Stats for nmdc:omprc-11-yt8css91", + "file_size_bytes": 286, + "md5_checksum": "dae7c6e067f69ef6db39b4240cc450ba", + "data_object_type": "QC Statistics", + "url": "https://data.microbiomedata.org/data/nmdc:omprc-11-yt8css91/nmdc:wfrqc-11-q99a8888.1/nmdc_wfrqc-11-q99a8888.1_filterStats.txt", + "type": "nmdc:DataObject" + }, + { + "id": "nmdc:dobj-11-67radr13", + "name": "nmdc_wfmgas-11-yp64ym13.1_contigs.fna", + "description": "Assembled contigs fasta for nmdc:omprc-11-yt8css91", + "file_size_bytes": 63622816, + "md5_checksum": "595a0fa08f1eaedae3845e39f34db2de", + "data_object_type": "Assembly Contigs", + "url": "https://data.microbiomedata.org/data/nmdc:omprc-11-yt8css91/nmdc:wfmgas-11-yp64ym13.1/nmdc_wfmgas-11-yp64ym13.1_contigs.fna", + "type": "nmdc:DataObject" + }, + { + "id": "nmdc:dobj-11-vbsqm228", + "name": "nmdc_wfmgas-11-yp64ym13.1_scaffolds.fna", + "description": "Assembled scaffold fasta for nmdc:omprc-11-yt8css91", + "file_size_bytes": 63273688, + "md5_checksum": "1e8fcb92d5cd50dc0fcbf7ce9a37d3d1", + "data_object_type": "Assembly Scaffolds", + "url": "https://data.microbiomedata.org/data/nmdc:omprc-11-yt8css91/nmdc:wfmgas-11-yp64ym13.1/nmdc_wfmgas-11-yp64ym13.1_scaffolds.fna", + "type": "nmdc:DataObject" + }, + { + "id": "nmdc:dobj-11-nsfsh625", + "name": "nmdc_wfmgas-11-yp64ym13.1_covstats.txt", + "description": "Metagenome Contig Coverage Stats for nmdc:omprc-11-yt8css91", + "file_size_bytes": 9885905, + "md5_checksum": "54d36268565df4f9fb13996eef09ba1f", + "data_object_type": "Assembly Coverage Stats", + "url": "https://data.microbiomedata.org/data/nmdc:omprc-11-yt8css91/nmdc:wfmgas-11-yp64ym13.1/nmdc_wfmgas-11-yp64ym13.1_covstats.txt", + "type": "nmdc:DataObject" + }, + { + "id": "nmdc:dobj-11-xdmf8m91", + "name": "nmdc_wfmgas-11-yp64ym13.1_assembly.agp", + "description": "Assembled AGP file for nmdc:omprc-11-yt8css91", + "file_size_bytes": 9903675, + "md5_checksum": "e086cc0222e1d06261b94060fdeb3336", + "data_object_type": "Assembly AGP", + "url": "https://data.microbiomedata.org/data/nmdc:omprc-11-yt8css91/nmdc:wfmgas-11-yp64ym13.1/nmdc_wfmgas-11-yp64ym13.1_assembly.agp", + "type": "nmdc:DataObject" + }, + { + "id": "nmdc:dobj-11-pfwep293", + "name": "nmdc_wfmgas-11-yp64ym13.1_pairedMapped_sorted.bam", + "description": "Metagenome Alignment BAM file for nmdc:omprc-11-yt8css91", + "file_size_bytes": 0, + "md5_checksum": "d41d8cd98f00b204e9800998ecf8427e", + "data_object_type": "Assembly Coverage BAM", + "url": "https://data.microbiomedata.org/data/nmdc:omprc-11-yt8css91/nmdc:wfmgas-11-yp64ym13.1/nmdc_wfmgas-11-yp64ym13.1_pairedMapped_sorted.bam", + "type": "nmdc:DataObject" + }, + { + "id": "nmdc:dobj-11-rfstat49", + "name": "nmdc_wfrbt-11-a2kbnp97.1_gottcha2_report.tsv", + "description": "Gottcha2 TSV report for nmdc:omprc-11-yt8css91", + "file_size_bytes": 10576, + "md5_checksum": "56edf81e5f5102edf7e416bc9430fbb6", + "data_object_type": "GOTTCHA2 Classification Report", + "url": "https://data.microbiomedata.org/data/nmdc:omprc-11-yt8css91/nmdc:wfrbt-11-a2kbnp97.1/nmdc_wfrbt-11-a2kbnp97.1_gottcha2_report.tsv", + "type": "nmdc:DataObject" + }, + { + "id": "nmdc:dobj-11-m2m4ys84", + "name": "nmdc_wfrbt-11-a2kbnp97.1_gottcha2_report_full.tsv", + "description": "Gottcha2 full TSV report for nmdc:omprc-11-yt8css91", + "file_size_bytes": 792905, + "md5_checksum": "c3d0f03afb44520ef5f2ea14e6daf705", + "data_object_type": "GOTTCHA2 Report Full", + "url": "https://data.microbiomedata.org/data/nmdc:omprc-11-yt8css91/nmdc:wfrbt-11-a2kbnp97.1/nmdc_wfrbt-11-a2kbnp97.1_gottcha2_report_full.tsv", + "type": "nmdc:DataObject" + }, + { + "id": "nmdc:dobj-11-xsk3z684", + "name": "nmdc_wfrbt-11-a2kbnp97.1_gottcha2_krona.html", + "description": "Gottcha2 Krona HTML report for nmdc:omprc-11-yt8css91", + "file_size_bytes": 254763, + "md5_checksum": "2afff209a40ca4895307f3a47080c534", + "data_object_type": "GOTTCHA2 Krona Plot", + "url": "https://data.microbiomedata.org/data/nmdc:omprc-11-yt8css91/nmdc:wfrbt-11-a2kbnp97.1/nmdc_wfrbt-11-a2kbnp97.1_gottcha2_krona.html", + "type": "nmdc:DataObject" + }, + { + "id": "nmdc:dobj-11-cv7cx378", + "name": "nmdc_wfrbt-11-a2kbnp97.1_centrifuge_classification.tsv", + "description": "Centrifuge classification TSV report for nmdc:omprc-11-yt8css91", + "file_size_bytes": 1336111813, + "md5_checksum": "d76c80bf15c4fd84f28c7150f24a8143", + "data_object_type": "Centrifuge Taxonomic Classification", + "url": "https://data.microbiomedata.org/data/nmdc:omprc-11-yt8css91/nmdc:wfrbt-11-a2kbnp97.1/nmdc_wfrbt-11-a2kbnp97.1_centrifuge_classification.tsv", + "type": "nmdc:DataObject" + }, + { + "id": "nmdc:dobj-11-3760bf86", + "name": "nmdc_wfrbt-11-a2kbnp97.1_centrifuge_report.tsv", + "description": "Centrifuge TSV report for nmdc:omprc-11-yt8css91", + "file_size_bytes": 254506, + "md5_checksum": "b9d6d8a8297f9a604ac85a334a3412de", + "data_object_type": "Centrifuge Classification Report", + "url": "https://data.microbiomedata.org/data/nmdc:omprc-11-yt8css91/nmdc:wfrbt-11-a2kbnp97.1/nmdc_wfrbt-11-a2kbnp97.1_centrifuge_report.tsv", + "type": "nmdc:DataObject" + }, + { + "id": "nmdc:dobj-11-tm6c6b90", + "name": "nmdc_wfrbt-11-a2kbnp97.1_centrifuge_krona.html", + "description": "Centrifuge Krona HTML report for nmdc:omprc-11-yt8css91", + "file_size_bytes": 2323153, + "md5_checksum": "fe4bd9f63c32f50676792e3c4adced08", + "data_object_type": "Centrifuge Krona Plot", + "url": "https://data.microbiomedata.org/data/nmdc:omprc-11-yt8css91/nmdc:wfrbt-11-a2kbnp97.1/nmdc_wfrbt-11-a2kbnp97.1_centrifuge_krona.html", + "type": "nmdc:DataObject" + }, + { + "id": "nmdc:dobj-11-ghm6c687", + "name": "nmdc_wfrbt-11-a2kbnp97.1_kraken2_classification.tsv", + "description": "Kraken classification TSV report for nmdc:omprc-11-yt8css91", + "file_size_bytes": 1097852664, + "md5_checksum": "eb189cbf0543203d2521397b73d4d34b", + "data_object_type": "Kraken2 Taxonomic Classification", + "url": "https://data.microbiomedata.org/data/nmdc:omprc-11-yt8css91/nmdc:wfrbt-11-a2kbnp97.1/nmdc_wfrbt-11-a2kbnp97.1_kraken2_classification.tsv", + "type": "nmdc:DataObject" + }, + { + "id": "nmdc:dobj-11-9x3znn63", + "name": "nmdc_wfrbt-11-a2kbnp97.1_kraken2_report.tsv", + "description": "Kraken2 TSV report for nmdc:omprc-11-yt8css91", + "file_size_bytes": 639213, + "md5_checksum": "ce3f002a824efde4a7134e6cd2e6306b", + "data_object_type": "Kraken2 Classification Report", + "url": "https://data.microbiomedata.org/data/nmdc:omprc-11-yt8css91/nmdc:wfrbt-11-a2kbnp97.1/nmdc_wfrbt-11-a2kbnp97.1_kraken2_report.tsv", + "type": "nmdc:DataObject" + }, + { + "id": "nmdc:dobj-11-1shxbf11", + "name": "nmdc_wfrbt-11-a2kbnp97.1_kraken2_krona.html", + "description": "Kraken2 Krona HTML report for nmdc:omprc-11-yt8css91", + "file_size_bytes": 3979807, + "md5_checksum": "ac90bf3384ce44d097f7897ac5ff8134", + "data_object_type": "Kraken2 Krona Plot", + "url": "https://data.microbiomedata.org/data/nmdc:omprc-11-yt8css91/nmdc:wfrbt-11-a2kbnp97.1/nmdc_wfrbt-11-a2kbnp97.1_kraken2_krona.html", + "type": "nmdc:DataObject" + } + ], + "metagenome_assembly_set": [ + { + "id": "nmdc:wfmgas-11-yp64ym13.1", + "name": "Metagenome Assembly Activity for nmdc:omprc-11-yt8css91", + "started_at_time": "2021-10-11T02:28:16Z", + "ended_at_time": "2021-10-11T03:58:24+00:00", + "was_informed_by": "nmdc:omprc-11-yt8css91", + "execution_resource": "NERSC-Cori", + "git_url": "https://github.com/microbiomedata/metaAssembly", + "has_input": [ + "nmdc:dobj-11-a5sh8709" + ], + "has_output": [ + "nmdc:dobj-11-67radr13", + "nmdc:dobj-11-vbsqm228", + "nmdc:dobj-11-nsfsh625", + "nmdc:dobj-11-xdmf8m91", + "nmdc:dobj-11-pfwep293" + ], + "type": "nmdc:MetagenomeAssembly", + "part_of": [ + "nmdc:omprc-11-yt8css91" + ], + "version": "v1.0.2", + "asm_score": 17.061, + "scaffolds": 116033, + "scaf_logsum": 196103, + "scaf_powsum": 25552, + "scaf_max": 245816, + "scaf_bp": 58415492, + "scaf_n50": 26889, + "scaf_n90": 95057, + "scaf_l50": 479, + "scaf_l90": 286, + "scaf_n_gt50k": 17, + "scaf_l_gt50k": 1865703, + "scaf_pct_gt50k": 3.1938498, + "contigs": 116132, + "contig_bp": 58413782, + "ctg_n50": 26909, + "ctg_l50": 479, + "ctg_n90": 95138, + "ctg_l90": 286, + "ctg_logsum": 195440, + "ctg_powsum": 25448, + "ctg_max": 245816, + "gap_pct": 0.00293, + "gc_std": 0.12277, + "gc_avg": 0.47644 + } + ], + "omics_processing_set": [ + { + "id": "nmdc:omprc-11-yt8css91", + "name": "Sand microcosm microbial communities from a hyporheic zone in Columbia River, Washington, USA - GW-RW T3_25-Nov-14", + "description": "Sterilized sand packs were incubated back in the ground and collected at time point T3.", + "has_input": [ + "nmdc:bsm-11-ynevd369" + ], + "add_date": "2015-05-28", + "gold_sequencing_project_identifiers": [ + "gold:Gp0115667" + ], + "has_output": [ + "nmdc:dobj-11-1qqy8057" + ], + "mod_date": "2021-06-15", + "ncbi_project_name": "Sand microcosm microbial communities from a hyporheic zone in Columbia River, Washington, USA - GW-RW T3_25-Nov-14", + "omics_type": { + "has_raw_value": "Metagenome" + }, + "part_of": [ + "nmdc:sty-11-aygzgv51" + ], + "principal_investigator": { + "has_raw_value": "James Stegen" + }, + "processing_institution": "JGI", + "type": "nmdc:OmicsProcessing" + } + ], + "read_qc_analysis_activity_set": [ + { + "id": "nmdc:wfrqc-11-q99a8888.1", + "name": "Read QC Activity for nmdc:omprc-11-yt8css91", + "started_at_time": "2021-10-11T02:28:16Z", + "ended_at_time": "2021-10-11T03:58:24+00:00", + "was_informed_by": "nmdc:omprc-11-yt8css91", + "execution_resource": "NERSC-Cori", + "git_url": "https://github.com/microbiomedata/ReadsQC", + "has_input": [ + "nmdc:dobj-11-1qqy8057" + ], + "has_output": [ + "nmdc:dobj-11-a5sh8709", + "nmdc:dobj-11-gz34wj94" + ], + "type": "nmdc:ReadQcAnalysisActivity", + "part_of": [ + "nmdc:omprc-11-yt8css91" + ], + "version": "v1.0.2", + "input_read_count": 19416222, + "output_read_count": 18855352, + "input_read_bases": 2931849522, + "output_read_bases": 2825090769 + } + ], + "read_based_taxonomy_analysis_activity_set": [ + { + "id": "nmdc:wfrbt-11-a2kbnp97.1", + "name": "Readbased Taxonomy Analysis Activity for nmdc:omprc-11-yt8css91", + "started_at_time": "2021-10-11T02:28:16Z", + "ended_at_time": "2021-10-11T03:58:24+00:00", + "was_informed_by": "nmdc:omprc-11-yt8css91", + "execution_resource": "NERSC-Cori", + "git_url": "https://github.com/microbiomedata/ReadbasedAnalysis", + "has_input": [ + "nmdc:dobj-11-a5sh8709" + ], + "has_output": [ + "nmdc:dobj-11-rfstat49", + "nmdc:dobj-11-m2m4ys84", + "nmdc:dobj-11-xsk3z684", + "nmdc:dobj-11-cv7cx378", + "nmdc:dobj-11-3760bf86", + "nmdc:dobj-11-tm6c6b90", + "nmdc:dobj-11-ghm6c687", + "nmdc:dobj-11-9x3znn63", + "nmdc:dobj-11-1shxbf11" + ], + "type": "nmdc:ReadBasedTaxonomyAnalysisActivity", + "part_of": [ + "nmdc:omprc-11-yt8css91" + ], + "version": "v1.0.2" + } + ] + }, + { + "data_object_set": [ + { + "id": "nmdc:dobj-11-d8vd2744", + "name": "9387.2.132031.TAGCTT.fastq.gz", + "description": "Raw sequencer read data", + "file_size_bytes": 1792111281, + "data_object_type": "Metagenome Raw Reads", + "type": "nmdc:DataObject" + }, + { + "id": "nmdc:dobj-11-0g6qq893", + "name": "nmdc_wfrqc-11-n7qc2h26.1_filtered.fastq.gz", + "description": "Filtered Reads for nmdc:omprc-11-hgehsc37", + "file_size_bytes": 1566732675, + "md5_checksum": "232e31505b6a0251df2303c0563d64c1", + "data_object_type": "Filtered Sequencing Reads", + "url": "https://data.microbiomedata.org/data/nmdc:omprc-11-hgehsc37/nmdc:wfrqc-11-n7qc2h26.1/nmdc_wfrqc-11-n7qc2h26.1_filtered.fastq.gz", + "type": "nmdc:DataObject" + }, + { + "id": "nmdc:dobj-11-vtp48631", + "name": "nmdc_wfrqc-11-n7qc2h26.1_filterStats.txt", + "description": "Filtered Stats for nmdc:omprc-11-hgehsc37", + "file_size_bytes": 289, + "md5_checksum": "f3f4f75f19c92af6e98d2b45cccaacd5", + "data_object_type": "QC Statistics", + "url": "https://data.microbiomedata.org/data/nmdc:omprc-11-hgehsc37/nmdc:wfrqc-11-n7qc2h26.1/nmdc_wfrqc-11-n7qc2h26.1_filterStats.txt", + "type": "nmdc:DataObject" + }, + { + "id": "nmdc:dobj-11-e7d6gc57", + "name": "nmdc_wfmgas-11-vrcn7q19.1_contigs.fna", + "description": "Assembled contigs fasta for nmdc:omprc-11-hgehsc37", + "file_size_bytes": 36482853, + "md5_checksum": "f2fe28f4b81f168151fb034da71ef017", + "data_object_type": "Assembly Contigs", + "url": "https://data.microbiomedata.org/data/nmdc:omprc-11-hgehsc37/nmdc:wfmgas-11-vrcn7q19.1/nmdc_wfmgas-11-vrcn7q19.1_contigs.fna", + "type": "nmdc:DataObject" + }, + { + "id": "nmdc:dobj-11-1zmpbx90", + "name": "nmdc_wfmgas-11-vrcn7q19.1_scaffolds.fna", + "description": "Assembled scaffold fasta for nmdc:omprc-11-hgehsc37", + "file_size_bytes": 36246294, + "md5_checksum": "044e757f7143b8a31d5cde9d862022ae", + "data_object_type": "Assembly Scaffolds", + "url": "https://data.microbiomedata.org/data/nmdc:omprc-11-hgehsc37/nmdc:wfmgas-11-vrcn7q19.1/nmdc_wfmgas-11-vrcn7q19.1_scaffolds.fna", + "type": "nmdc:DataObject" + }, + { + "id": "nmdc:dobj-11-8bm57n65", + "name": "nmdc_wfmgas-11-vrcn7q19.1_covstats.txt", + "description": "Metagenome Contig Coverage Stats for nmdc:omprc-11-hgehsc37", + "file_size_bytes": 6613533, + "md5_checksum": "e10d0752ff5cbdbe74759f7f90a16bab", + "data_object_type": "Assembly Coverage Stats", + "url": "https://data.microbiomedata.org/data/nmdc:omprc-11-hgehsc37/nmdc:wfmgas-11-vrcn7q19.1/nmdc_wfmgas-11-vrcn7q19.1_covstats.txt", + "type": "nmdc:DataObject" + }, + { + "id": "nmdc:dobj-11-gdgrra34", + "name": "nmdc_wfmgas-11-vrcn7q19.1_assembly.agp", + "description": "Assembled AGP file for nmdc:omprc-11-hgehsc37", + "file_size_bytes": 6651506, + "md5_checksum": "677f4c859655fb71e26a4d42777fee54", + "data_object_type": "Assembly AGP", + "url": "https://data.microbiomedata.org/data/nmdc:omprc-11-hgehsc37/nmdc:wfmgas-11-vrcn7q19.1/nmdc_wfmgas-11-vrcn7q19.1_assembly.agp", + "type": "nmdc:DataObject" + }, + { + "id": "nmdc:dobj-11-wxfjb753", + "name": "nmdc_wfmgas-11-vrcn7q19.1_pairedMapped_sorted.bam", + "description": "Metagenome Alignment BAM file for nmdc:omprc-11-hgehsc37", + "file_size_bytes": 0, + "md5_checksum": "d41d8cd98f00b204e9800998ecf8427e", + "data_object_type": "Assembly Coverage BAM", + "url": "https://data.microbiomedata.org/data/nmdc:omprc-11-hgehsc37/nmdc:wfmgas-11-vrcn7q19.1/nmdc_wfmgas-11-vrcn7q19.1_pairedMapped_sorted.bam", + "type": "nmdc:DataObject" + }, + { + "id": "nmdc:dobj-11-nttd8r57", + "name": "nmdc_wfrbt-11-92e80740.1_gottcha2_report.tsv", + "description": "Gottcha2 TSV report for nmdc:omprc-11-hgehsc37", + "file_size_bytes": 9591, + "md5_checksum": "9d61d9f0c31a98f88ad8cde86254148d", + "data_object_type": "GOTTCHA2 Classification Report", + "url": "https://data.microbiomedata.org/data/nmdc:omprc-11-hgehsc37/nmdc:wfrbt-11-92e80740.1/nmdc_wfrbt-11-92e80740.1_gottcha2_report.tsv", + "type": "nmdc:DataObject" + }, + { + "id": "nmdc:dobj-11-tsb8g573", + "name": "nmdc_wfrbt-11-92e80740.1_gottcha2_report_full.tsv", + "description": "Gottcha2 full TSV report for nmdc:omprc-11-hgehsc37", + "file_size_bytes": 885985, + "md5_checksum": "7f93f97242aed036019f13492f5af35c", + "data_object_type": "GOTTCHA2 Report Full", + "url": "https://data.microbiomedata.org/data/nmdc:omprc-11-hgehsc37/nmdc:wfrbt-11-92e80740.1/nmdc_wfrbt-11-92e80740.1_gottcha2_report_full.tsv", + "type": "nmdc:DataObject" + }, + { + "id": "nmdc:dobj-11-f36r4q94", + "name": "nmdc_wfrbt-11-92e80740.1_gottcha2_krona.html", + "description": "Gottcha2 Krona HTML report for nmdc:omprc-11-hgehsc37", + "file_size_bytes": 251303, + "md5_checksum": "b4d0179bcc68b5186a3544d9ee0c6941", + "data_object_type": "GOTTCHA2 Krona Plot", + "url": "https://data.microbiomedata.org/data/nmdc:omprc-11-hgehsc37/nmdc:wfrbt-11-92e80740.1/nmdc_wfrbt-11-92e80740.1_gottcha2_krona.html", + "type": "nmdc:DataObject" + }, + { + "id": "nmdc:dobj-11-w9e7pa09", + "name": "nmdc_wfrbt-11-92e80740.1_centrifuge_classification.tsv", + "description": "Centrifuge classification TSV report for nmdc:omprc-11-hgehsc37", + "file_size_bytes": 1268144933, + "md5_checksum": "a4243f71a0288f489c566ae85d85891d", + "data_object_type": "Centrifuge Taxonomic Classification", + "url": "https://data.microbiomedata.org/data/nmdc:omprc-11-hgehsc37/nmdc:wfrbt-11-92e80740.1/nmdc_wfrbt-11-92e80740.1_centrifuge_classification.tsv", + "type": "nmdc:DataObject" + }, + { + "id": "nmdc:dobj-11-8me7eh50", + "name": "nmdc_wfrbt-11-92e80740.1_centrifuge_report.tsv", + "description": "Centrifuge TSV report for nmdc:omprc-11-hgehsc37", + "file_size_bytes": 254575, + "md5_checksum": "f8b6ef830b94c6470056a3cd0a0eafc1", + "data_object_type": "Centrifuge Classification Report", + "url": "https://data.microbiomedata.org/data/nmdc:omprc-11-hgehsc37/nmdc:wfrbt-11-92e80740.1/nmdc_wfrbt-11-92e80740.1_centrifuge_report.tsv", + "type": "nmdc:DataObject" + }, + { + "id": "nmdc:dobj-11-anbazd07", + "name": "nmdc_wfrbt-11-92e80740.1_centrifuge_krona.html", + "description": "Centrifuge Krona HTML report for nmdc:omprc-11-hgehsc37", + "file_size_bytes": 2327293, + "md5_checksum": "a80779b32415ef001d0403f0b618b612", + "data_object_type": "Centrifuge Krona Plot", + "url": "https://data.microbiomedata.org/data/nmdc:omprc-11-hgehsc37/nmdc:wfrbt-11-92e80740.1/nmdc_wfrbt-11-92e80740.1_centrifuge_krona.html", + "type": "nmdc:DataObject" + }, + { + "id": "nmdc:dobj-11-6wdrxm02", + "name": "nmdc_wfrbt-11-92e80740.1_kraken2_classification.tsv", + "description": "Kraken classification TSV report for nmdc:omprc-11-hgehsc37", + "file_size_bytes": 1037932028, + "md5_checksum": "01581429336a43d7dc2f85b8d49d6c6e", + "data_object_type": "Kraken2 Taxonomic Classification", + "url": "https://data.microbiomedata.org/data/nmdc:omprc-11-hgehsc37/nmdc:wfrbt-11-92e80740.1/nmdc_wfrbt-11-92e80740.1_kraken2_classification.tsv", + "type": "nmdc:DataObject" + }, + { + "id": "nmdc:dobj-11-03x8y926", + "name": "nmdc_wfrbt-11-92e80740.1_kraken2_report.tsv", + "description": "Kraken2 TSV report for nmdc:omprc-11-hgehsc37", + "file_size_bytes": 641242, + "md5_checksum": "ce47d6686edb7b3472102d5883229c45", + "data_object_type": "Kraken2 Classification Report", + "url": "https://data.microbiomedata.org/data/nmdc:omprc-11-hgehsc37/nmdc:wfrbt-11-92e80740.1/nmdc_wfrbt-11-92e80740.1_kraken2_report.tsv", + "type": "nmdc:DataObject" + }, + { + "id": "nmdc:dobj-11-61v3wa19", + "name": "nmdc_wfrbt-11-92e80740.1_kraken2_krona.html", + "description": "Kraken2 Krona HTML report for nmdc:omprc-11-hgehsc37", + "file_size_bytes": 3995680, + "md5_checksum": "29b75e78b0b7fd8115614d8e9d341d46", + "data_object_type": "Kraken2 Krona Plot", + "url": "https://data.microbiomedata.org/data/nmdc:omprc-11-hgehsc37/nmdc:wfrbt-11-92e80740.1/nmdc_wfrbt-11-92e80740.1_kraken2_krona.html", + "type": "nmdc:DataObject" + } + ], + "metagenome_assembly_set": [ + { + "id": "nmdc:wfmgas-11-vrcn7q19.1", + "name": "Metagenome Assembly Activity for nmdc:omprc-11-hgehsc37", + "started_at_time": "2021-10-11T02:28:16Z", + "ended_at_time": "2021-10-11T03:33:34+00:00", + "was_informed_by": "nmdc:omprc-11-hgehsc37", + "execution_resource": "NERSC-Cori", + "git_url": "https://github.com/microbiomedata/metaAssembly", + "has_input": [ + "nmdc:dobj-11-0g6qq893" + ], + "has_output": [ + "nmdc:dobj-11-e7d6gc57", + "nmdc:dobj-11-1zmpbx90", + "nmdc:dobj-11-8bm57n65", + "nmdc:dobj-11-gdgrra34", + "nmdc:dobj-11-wxfjb753" + ], + "type": "nmdc:MetagenomeAssembly", + "part_of": [ + "nmdc:omprc-11-hgehsc37" + ], + "version": "v1.0.2", + "asm_score": 4.21, + "scaffolds": 78311, + "scaf_logsum": 60806, + "scaf_powsum": 6720.964, + "scaf_max": 15348, + "scaf_bp": 33089402, + "scaf_n50": 23850, + "scaf_n90": 67169, + "scaf_l50": 377, + "scaf_l90": 283, + "contigs": 78376, + "contig_bp": 33088752, + "ctg_n50": 23883, + "ctg_l50": 377, + "ctg_n90": 67231, + "ctg_l90": 283, + "ctg_logsum": 60365, + "ctg_powsum": 6668.288, + "ctg_max": 15348, + "gap_pct": 0.00196, + "gc_std": 0.11459, + "gc_avg": 0.5432 + } + ], + "omics_processing_set": [ + { + "id": "nmdc:omprc-11-hgehsc37", + "name": "Sand microcosm microbial communities from a hyporheic zone in Columbia River, Washington, USA - GW-RW T2_25-Nov-14", + "description": "Sterilized sand packs were incubated back in the ground and collected at time point T2.", + "has_input": [ + "nmdc:bsm-11-qxntpg05" + ], + "add_date": "2015-05-28", + "gold_sequencing_project_identifiers": [ + "gold:Gp0115664" + ], + "has_output": [ + "nmdc:dobj-11-d8vd2744" + ], + "mod_date": "2021-06-15", + "ncbi_project_name": "Sand microcosm microbial communities from a hyporheic zone in Columbia River, Washington, USA - GW-RW T2_25-Nov-14", + "omics_type": { + "has_raw_value": "Metagenome" + }, + "part_of": [ + "nmdc:sty-11-aygzgv51" + ], + "principal_investigator": { + "has_raw_value": "James Stegen" + }, + "processing_institution": "JGI", + "type": "nmdc:OmicsProcessing" + } + ], + "read_qc_analysis_activity_set": [ + { + "id": "nmdc:wfrqc-11-n7qc2h26.1", + "name": "Read QC Activity for nmdc:omprc-11-hgehsc37", + "started_at_time": "2021-10-11T02:28:16Z", + "ended_at_time": "2021-10-11T03:33:34+00:00", + "was_informed_by": "nmdc:omprc-11-hgehsc37", + "execution_resource": "NERSC-Cori", + "git_url": "https://github.com/microbiomedata/ReadsQC", + "has_input": [ + "nmdc:dobj-11-d8vd2744" + ], + "has_output": [ + "nmdc:dobj-11-0g6qq893", + "nmdc:dobj-11-vtp48631" + ], + "type": "nmdc:ReadQcAnalysisActivity", + "part_of": [ + "nmdc:omprc-11-hgehsc37" + ], + "version": "v1.0.2", + "input_read_count": 19058974, + "output_read_count": 17338778, + "input_read_bases": 2877905074, + "output_read_bases": 2597325375 + } + ], + "read_based_taxonomy_analysis_activity_set": [ + { + "id": "nmdc:wfrbt-11-92e80740.1", + "name": "Readbased Taxonomy Analysis Activity for nmdc:omprc-11-hgehsc37", + "started_at_time": "2021-10-11T02:28:16Z", + "ended_at_time": "2021-10-11T03:33:34+00:00", + "was_informed_by": "nmdc:omprc-11-hgehsc37", + "execution_resource": "NERSC-Cori", + "git_url": "https://github.com/microbiomedata/ReadbasedAnalysis", + "has_input": [ + "nmdc:dobj-11-0g6qq893" + ], + "has_output": [ + "nmdc:dobj-11-nttd8r57", + "nmdc:dobj-11-tsb8g573", + "nmdc:dobj-11-f36r4q94", + "nmdc:dobj-11-w9e7pa09", + "nmdc:dobj-11-8me7eh50", + "nmdc:dobj-11-anbazd07", + "nmdc:dobj-11-6wdrxm02", + "nmdc:dobj-11-03x8y926", + "nmdc:dobj-11-61v3wa19" + ], + "type": "nmdc:ReadBasedTaxonomyAnalysisActivity", + "part_of": [ + "nmdc:omprc-11-hgehsc37" + ], + "version": "v1.0.2" + } + ] + }, + { + "data_object_set": [ + { + "id": "nmdc:dobj-11-xhvk5873", + "name": "9491.1.134352.AGTCAA.fastq.gz", + "description": "Raw sequencer read data", + "file_size_bytes": 4674996922, + "data_object_type": "Metagenome Raw Reads", + "type": "nmdc:DataObject" + }, + { + "id": "nmdc:dobj-11-swen8p47", + "name": "nmdc_wfrqc-11-fzd1m093.1_filtered.fastq.gz", + "description": "Filtered Reads for nmdc:omprc-11-7vsv7h78", + "file_size_bytes": 4090026888, + "md5_checksum": "e0ce93b88419f87568ff206e0efe3a24", + "data_object_type": "Filtered Sequencing Reads", + "url": "https://data.microbiomedata.org/data/nmdc:omprc-11-7vsv7h78/nmdc:wfrqc-11-fzd1m093.1/nmdc_wfrqc-11-fzd1m093.1_filtered.fastq.gz", + "type": "nmdc:DataObject" + }, + { + "id": "nmdc:dobj-11-w5ta8647", + "name": "nmdc_wfrqc-11-fzd1m093.1_filterStats.txt", + "description": "Filtered Stats for nmdc:omprc-11-7vsv7h78", + "file_size_bytes": 292, + "md5_checksum": "7bf8ff4cf0d98cccd8e1c20f77dd1690", + "data_object_type": "QC Statistics", + "url": "https://data.microbiomedata.org/data/nmdc:omprc-11-7vsv7h78/nmdc:wfrqc-11-fzd1m093.1/nmdc_wfrqc-11-fzd1m093.1_filterStats.txt", + "type": "nmdc:DataObject" + }, + { + "id": "nmdc:dobj-11-hr4km224", + "name": "nmdc_wfmgas-11-gae05w12.1_contigs.fna", + "description": "Assembled contigs fasta for nmdc:omprc-11-7vsv7h78", + "file_size_bytes": 207743861, + "md5_checksum": "62ba3f7a070e68edfbd5d773d9158738", + "data_object_type": "Assembly Contigs", + "url": "https://data.microbiomedata.org/data/nmdc:omprc-11-7vsv7h78/nmdc:wfmgas-11-gae05w12.1/nmdc_wfmgas-11-gae05w12.1_contigs.fna", + "type": "nmdc:DataObject" + }, + { + "id": "nmdc:dobj-11-33zrdw08", + "name": "nmdc_wfmgas-11-gae05w12.1_scaffolds.fna", + "description": "Assembled scaffold fasta for nmdc:omprc-11-7vsv7h78", + "file_size_bytes": 206587359, + "md5_checksum": "e802efb26c7692bd9d9c39bc22da5e03", + "data_object_type": "Assembly Scaffolds", + "url": "https://data.microbiomedata.org/data/nmdc:omprc-11-7vsv7h78/nmdc:wfmgas-11-gae05w12.1/nmdc_wfmgas-11-gae05w12.1_scaffolds.fna", + "type": "nmdc:DataObject" + }, + { + "id": "nmdc:dobj-11-5mfsnb08", + "name": "nmdc_wfmgas-11-gae05w12.1_covstats.txt", + "description": "Metagenome Contig Coverage Stats for nmdc:omprc-11-7vsv7h78", + "file_size_bytes": 32772333, + "md5_checksum": "279c9f2f48cbd77ed47b91bb882b58c5", + "data_object_type": "Assembly Coverage Stats", + "url": "https://data.microbiomedata.org/data/nmdc:omprc-11-7vsv7h78/nmdc:wfmgas-11-gae05w12.1/nmdc_wfmgas-11-gae05w12.1_covstats.txt", + "type": "nmdc:DataObject" + }, + { + "id": "nmdc:dobj-11-tzg3qj43", + "name": "nmdc_wfmgas-11-gae05w12.1_assembly.agp", + "description": "Assembled AGP file for nmdc:omprc-11-7vsv7h78", + "file_size_bytes": 33226994, + "md5_checksum": "20e86d5edf8df1b745462274ad7bee59", + "data_object_type": "Assembly AGP", + "url": "https://data.microbiomedata.org/data/nmdc:omprc-11-7vsv7h78/nmdc:wfmgas-11-gae05w12.1/nmdc_wfmgas-11-gae05w12.1_assembly.agp", + "type": "nmdc:DataObject" + }, + { + "id": "nmdc:dobj-11-6xc8ky57", + "name": "nmdc_wfmgas-11-gae05w12.1_pairedMapped_sorted.bam", + "description": "Metagenome Alignment BAM file for nmdc:omprc-11-7vsv7h78", + "file_size_bytes": 0, + "md5_checksum": "d41d8cd98f00b204e9800998ecf8427e", + "data_object_type": "Assembly Coverage BAM", + "url": "https://data.microbiomedata.org/data/nmdc:omprc-11-7vsv7h78/nmdc:wfmgas-11-gae05w12.1/nmdc_wfmgas-11-gae05w12.1_pairedMapped_sorted.bam", + "type": "nmdc:DataObject" + }, + { + "id": "nmdc:dobj-11-gt3wk415", + "name": "nmdc_wfrbt-11-82fvcx60.1_gottcha2_report.tsv", + "description": "Gottcha2 TSV report for nmdc:omprc-11-7vsv7h78", + "file_size_bytes": 19085, + "md5_checksum": "05bab80e2ff02d160b8e808f056ee2b5", + "data_object_type": "GOTTCHA2 Classification Report", + "url": "https://data.microbiomedata.org/data/nmdc:omprc-11-7vsv7h78/nmdc:wfrbt-11-82fvcx60.1/nmdc_wfrbt-11-82fvcx60.1_gottcha2_report.tsv", + "type": "nmdc:DataObject" + }, + { + "id": "nmdc:dobj-11-8j6c8x63", + "name": "nmdc_wfrbt-11-82fvcx60.1_gottcha2_report_full.tsv", + "description": "Gottcha2 full TSV report for nmdc:omprc-11-7vsv7h78", + "file_size_bytes": 1243929, + "md5_checksum": "12b2d6afc355bce76249d750a9fab534", + "data_object_type": "GOTTCHA2 Report Full", + "url": "https://data.microbiomedata.org/data/nmdc:omprc-11-7vsv7h78/nmdc:wfrbt-11-82fvcx60.1/nmdc_wfrbt-11-82fvcx60.1_gottcha2_report_full.tsv", + "type": "nmdc:DataObject" + }, + { + "id": "nmdc:dobj-11-09943068", + "name": "nmdc_wfrbt-11-82fvcx60.1_gottcha2_krona.html", + "description": "Gottcha2 Krona HTML report for nmdc:omprc-11-7vsv7h78", + "file_size_bytes": 281148, + "md5_checksum": "18214017d56658a48723c9c998dcba7e", + "data_object_type": "GOTTCHA2 Krona Plot", + "url": "https://data.microbiomedata.org/data/nmdc:omprc-11-7vsv7h78/nmdc:wfrbt-11-82fvcx60.1/nmdc_wfrbt-11-82fvcx60.1_gottcha2_krona.html", + "type": "nmdc:DataObject" + }, + { + "id": "nmdc:dobj-11-skg59163", + "name": "nmdc_wfrbt-11-82fvcx60.1_centrifuge_classification.tsv", + "description": "Centrifuge classification TSV report for nmdc:omprc-11-7vsv7h78", + "file_size_bytes": 3491726958, + "md5_checksum": "99ef009c73c128e561a4b9dcb70d7ff2", + "data_object_type": "Centrifuge Taxonomic Classification", + "url": "https://data.microbiomedata.org/data/nmdc:omprc-11-7vsv7h78/nmdc:wfrbt-11-82fvcx60.1/nmdc_wfrbt-11-82fvcx60.1_centrifuge_classification.tsv", + "type": "nmdc:DataObject" + }, + { + "id": "nmdc:dobj-11-smr55c52", + "name": "nmdc_wfrbt-11-82fvcx60.1_centrifuge_report.tsv", + "description": "Centrifuge TSV report for nmdc:omprc-11-7vsv7h78", + "file_size_bytes": 264123, + "md5_checksum": "78dab6988b57c654462ef3dbeb64d8d6", + "data_object_type": "Centrifuge Classification Report", + "url": "https://data.microbiomedata.org/data/nmdc:omprc-11-7vsv7h78/nmdc:wfrbt-11-82fvcx60.1/nmdc_wfrbt-11-82fvcx60.1_centrifuge_report.tsv", + "type": "nmdc:DataObject" + }, + { + "id": "nmdc:dobj-11-61wr5n10", + "name": "nmdc_wfrbt-11-82fvcx60.1_centrifuge_krona.html", + "description": "Centrifuge Krona HTML report for nmdc:omprc-11-7vsv7h78", + "file_size_bytes": 2352347, + "md5_checksum": "f9c01985f057825149d35de0650095a8", + "data_object_type": "Centrifuge Krona Plot", + "url": "https://data.microbiomedata.org/data/nmdc:omprc-11-7vsv7h78/nmdc:wfrbt-11-82fvcx60.1/nmdc_wfrbt-11-82fvcx60.1_centrifuge_krona.html", + "type": "nmdc:DataObject" + }, + { + "id": "nmdc:dobj-11-nwqdf714", + "name": "nmdc_wfrbt-11-82fvcx60.1_kraken2_classification.tsv", + "description": "Kraken classification TSV report for nmdc:omprc-11-7vsv7h78", + "file_size_bytes": 2880889483, + "md5_checksum": "bcea8bbe63625ad0f3142abe69a4a11d", + "data_object_type": "Kraken2 Taxonomic Classification", + "url": "https://data.microbiomedata.org/data/nmdc:omprc-11-7vsv7h78/nmdc:wfrbt-11-82fvcx60.1/nmdc_wfrbt-11-82fvcx60.1_kraken2_classification.tsv", + "type": "nmdc:DataObject" + }, + { + "id": "nmdc:dobj-11-97afk968", + "name": "nmdc_wfrbt-11-82fvcx60.1_kraken2_report.tsv", + "description": "Kraken2 TSV report for nmdc:omprc-11-7vsv7h78", + "file_size_bytes": 735519, + "md5_checksum": "054c3097c9682bc9a6e07f88fdecc0ee", + "data_object_type": "Kraken2 Classification Report", + "url": "https://data.microbiomedata.org/data/nmdc:omprc-11-7vsv7h78/nmdc:wfrbt-11-82fvcx60.1/nmdc_wfrbt-11-82fvcx60.1_kraken2_report.tsv", + "type": "nmdc:DataObject" + }, + { + "id": "nmdc:dobj-11-5rrq2q12", + "name": "nmdc_wfrbt-11-82fvcx60.1_kraken2_krona.html", + "description": "Kraken2 Krona HTML report for nmdc:omprc-11-7vsv7h78", + "file_size_bytes": 4410156, + "md5_checksum": "38d41d4299141abe28bf0405af80cdfc", + "data_object_type": "Kraken2 Krona Plot", + "url": "https://data.microbiomedata.org/data/nmdc:omprc-11-7vsv7h78/nmdc:wfrbt-11-82fvcx60.1/nmdc_wfrbt-11-82fvcx60.1_kraken2_krona.html", + "type": "nmdc:DataObject" + } + ], + "metagenome_assembly_set": [ + { + "id": "nmdc:wfmgas-11-gae05w12.1", + "name": "Metagenome Assembly Activity for nmdc:omprc-11-7vsv7h78", + "started_at_time": "2021-10-11T02:23:30Z", + "ended_at_time": "2021-10-11T06:18:17+00:00", + "was_informed_by": "nmdc:omprc-11-7vsv7h78", + "execution_resource": "NERSC-Cori", + "git_url": "https://github.com/microbiomedata/metaAssembly", + "has_input": [ + "nmdc:dobj-11-swen8p47" + ], + "has_output": [ + "nmdc:dobj-11-hr4km224", + "nmdc:dobj-11-33zrdw08", + "nmdc:dobj-11-5mfsnb08", + "nmdc:dobj-11-tzg3qj43", + "nmdc:dobj-11-6xc8ky57" + ], + "type": "nmdc:MetagenomeAssembly", + "part_of": [ + "nmdc:omprc-11-7vsv7h78" + ], + "version": "v1.0.2", + "asm_score": 7.785, + "scaffolds": 383447, + "scaf_logsum": 496628, + "scaf_powsum": 57689, + "scaf_max": 116556, + "scaf_bp": 190313553, + "scaf_n50": 102177, + "scaf_n90": 321076, + "scaf_l50": 474, + "scaf_l90": 290, + "scaf_n_gt50k": 6, + "scaf_l_gt50k": 453691, + "scaf_pct_gt50k": 0.23839132, + "contigs": 383712, + "contig_bp": 190310453, + "ctg_n50": 102228, + "ctg_l50": 474, + "ctg_n90": 321321, + "ctg_l90": 290, + "ctg_logsum": 494917, + "ctg_powsum": 57423, + "ctg_max": 116556, + "gap_pct": 0.00163, + "gc_std": 0.13426, + "gc_avg": 0.48844 + } + ], + "omics_processing_set": [ + { + "id": "nmdc:omprc-11-7vsv7h78", + "name": "Sand microcosm microbial communities from a hyporheic zone in Columbia River, Washington, USA - GW-RW T4_30-Apr-14", + "description": "Sterilized sand packs were incubated back in the ground and collected at time point T4.", + "has_input": [ + "nmdc:bsm-11-j0wbx741" + ], + "add_date": "2015-05-28", + "gold_sequencing_project_identifiers": [ + "gold:Gp0115678" + ], + "has_output": [ + "nmdc:dobj-11-xhvk5873" + ], + "mod_date": "2021-06-15", + "ncbi_project_name": "Sand microcosm microbial communities from a hyporheic zone in Columbia River, Washington, USA - GW-RW T4_30-Apr-14", + "omics_type": { + "has_raw_value": "Metagenome" + }, + "part_of": [ + "nmdc:sty-11-aygzgv51" + ], + "principal_investigator": { + "has_raw_value": "James Stegen" + }, + "processing_institution": "JGI", + "type": "nmdc:OmicsProcessing" + } + ], + "read_qc_analysis_activity_set": [ + { + "id": "nmdc:wfrqc-11-fzd1m093.1", + "name": "Read QC Activity for nmdc:omprc-11-7vsv7h78", + "started_at_time": "2021-10-11T02:23:30Z", + "ended_at_time": "2021-10-11T06:18:17+00:00", + "was_informed_by": "nmdc:omprc-11-7vsv7h78", + "execution_resource": "NERSC-Cori", + "git_url": "https://github.com/microbiomedata/ReadsQC", + "has_input": [ + "nmdc:dobj-11-xhvk5873" + ], + "has_output": [ + "nmdc:dobj-11-swen8p47", + "nmdc:dobj-11-w5ta8647" + ], + "type": "nmdc:ReadQcAnalysisActivity", + "part_of": [ + "nmdc:omprc-11-7vsv7h78" + ], + "version": "v1.0.2", + "input_read_count": 51286688, + "output_read_count": 48276864, + "input_read_bases": 7744289888, + "output_read_bases": 7231449575 + } + ], + "read_based_taxonomy_analysis_activity_set": [ + { + "id": "nmdc:wfrbt-11-82fvcx60.1", + "name": "Readbased Taxonomy Analysis Activity for nmdc:omprc-11-7vsv7h78", + "started_at_time": "2021-10-11T02:23:30Z", + "ended_at_time": "2021-10-11T06:18:17+00:00", + "was_informed_by": "nmdc:omprc-11-7vsv7h78", + "execution_resource": "NERSC-Cori", + "git_url": "https://github.com/microbiomedata/ReadbasedAnalysis", + "has_input": [ + "nmdc:dobj-11-swen8p47" + ], + "has_output": [ + "nmdc:dobj-11-gt3wk415", + "nmdc:dobj-11-8j6c8x63", + "nmdc:dobj-11-09943068", + "nmdc:dobj-11-skg59163", + "nmdc:dobj-11-smr55c52", + "nmdc:dobj-11-61wr5n10", + "nmdc:dobj-11-nwqdf714", + "nmdc:dobj-11-97afk968", + "nmdc:dobj-11-5rrq2q12" + ], + "type": "nmdc:ReadBasedTaxonomyAnalysisActivity", + "part_of": [ + "nmdc:omprc-11-7vsv7h78" + ], + "version": "v1.0.2" + } + ] + }, + { + "data_object_set": [ + { + "id": "nmdc:dobj-11-ym2gpg27", + "name": "10533.1.165310.GAGCTCA-TTGAGCT.fastq.gz", + "description": "Raw sequencer read data", + "file_size_bytes": 2106076506, + "data_object_type": "Metagenome Raw Reads", + "type": "nmdc:DataObject" + }, + { + "id": "nmdc:dobj-11-nfx9kp15", + "name": "nmdc_wfrqc-11-a98q5s27.1_filtered.fastq.gz", + "description": "Filtered Reads for nmdc:omprc-11-5r54nt37", + "file_size_bytes": 1917552858, + "md5_checksum": "6a8409b21c45ba9feba873ec269c8ff7", + "data_object_type": "Filtered Sequencing Reads", + "url": "https://data.microbiomedata.org/data/nmdc:omprc-11-5r54nt37/nmdc:wfrqc-11-a98q5s27.1/nmdc_wfrqc-11-a98q5s27.1_filtered.fastq.gz", + "type": "nmdc:DataObject" + }, + { + "id": "nmdc:dobj-11-78pqap21", + "name": "nmdc_wfrqc-11-a98q5s27.1_filterStats.txt", + "description": "Filtered Stats for nmdc:omprc-11-5r54nt37", + "file_size_bytes": 283, + "md5_checksum": "61fb06de10fe3a0c49c5afe14ab7fb32", + "data_object_type": "QC Statistics", + "url": "https://data.microbiomedata.org/data/nmdc:omprc-11-5r54nt37/nmdc:wfrqc-11-a98q5s27.1/nmdc_wfrqc-11-a98q5s27.1_filterStats.txt", + "type": "nmdc:DataObject" + }, + { + "id": "nmdc:dobj-11-70kap344", + "name": "nmdc_wfmgas-11-j3444209.1_contigs.fna", + "description": "Assembled contigs fasta for nmdc:omprc-11-5r54nt37", + "file_size_bytes": 55930696, + "md5_checksum": "3396cb15a0b511228bbfd5fdb640ec5f", + "data_object_type": "Assembly Contigs", + "url": "https://data.microbiomedata.org/data/nmdc:omprc-11-5r54nt37/nmdc:wfmgas-11-j3444209.1/nmdc_wfmgas-11-j3444209.1_contigs.fna", + "type": "nmdc:DataObject" + }, + { + "id": "nmdc:dobj-11-x1j3sf41", + "name": "nmdc_wfmgas-11-j3444209.1_scaffolds.fna", + "description": "Assembled scaffold fasta for nmdc:omprc-11-5r54nt37", + "file_size_bytes": 55574732, + "md5_checksum": "4355038d3062f3362b811a03f7e07b7c", + "data_object_type": "Assembly Scaffolds", + "url": "https://data.microbiomedata.org/data/nmdc:omprc-11-5r54nt37/nmdc:wfmgas-11-j3444209.1/nmdc_wfmgas-11-j3444209.1_scaffolds.fna", + "type": "nmdc:DataObject" + }, + { + "id": "nmdc:dobj-11-vq52jw17", + "name": "nmdc_wfmgas-11-j3444209.1_covstats.txt", + "description": "Metagenome Contig Coverage Stats for nmdc:omprc-11-5r54nt37", + "file_size_bytes": 10032413, + "md5_checksum": "d9ce31b08bd418711df2bb00e95108f5", + "data_object_type": "Assembly Coverage Stats", + "url": "https://data.microbiomedata.org/data/nmdc:omprc-11-5r54nt37/nmdc:wfmgas-11-j3444209.1/nmdc_wfmgas-11-j3444209.1_covstats.txt", + "type": "nmdc:DataObject" + }, + { + "id": "nmdc:dobj-11-1kt09302", + "name": "nmdc_wfmgas-11-j3444209.1_assembly.agp", + "description": "Assembled AGP file for nmdc:omprc-11-5r54nt37", + "file_size_bytes": 10091751, + "md5_checksum": "bc88787a903f10d5d1ded2591a66de9c", + "data_object_type": "Assembly AGP", + "url": "https://data.microbiomedata.org/data/nmdc:omprc-11-5r54nt37/nmdc:wfmgas-11-j3444209.1/nmdc_wfmgas-11-j3444209.1_assembly.agp", + "type": "nmdc:DataObject" + }, + { + "id": "nmdc:dobj-11-4sdqw362", + "name": "nmdc_wfmgas-11-j3444209.1_pairedMapped_sorted.bam", + "description": "Metagenome Alignment BAM file for nmdc:omprc-11-5r54nt37", + "file_size_bytes": 0, + "md5_checksum": "d41d8cd98f00b204e9800998ecf8427e", + "data_object_type": "Assembly Coverage BAM", + "url": "https://data.microbiomedata.org/data/nmdc:omprc-11-5r54nt37/nmdc:wfmgas-11-j3444209.1/nmdc_wfmgas-11-j3444209.1_pairedMapped_sorted.bam", + "type": "nmdc:DataObject" + }, + { + "id": "nmdc:dobj-11-5r3y7f04", + "name": "nmdc_wfrbt-11-gwm7pc10.1_gottcha2_report.tsv", + "description": "Gottcha2 TSV report for nmdc:omprc-11-5r54nt37", + "file_size_bytes": 1553, + "md5_checksum": "ac39e916e17e08a845bb40d97519d8be", + "data_object_type": "GOTTCHA2 Classification Report", + "url": "https://data.microbiomedata.org/data/nmdc:omprc-11-5r54nt37/nmdc:wfrbt-11-gwm7pc10.1/nmdc_wfrbt-11-gwm7pc10.1_gottcha2_report.tsv", + "type": "nmdc:DataObject" + }, + { + "id": "nmdc:dobj-11-jt1vsq30", + "name": "nmdc_wfrbt-11-gwm7pc10.1_gottcha2_report_full.tsv", + "description": "Gottcha2 full TSV report for nmdc:omprc-11-5r54nt37", + "file_size_bytes": 836575, + "md5_checksum": "c6fd5c573ef8605d9b43ff9c698af423", + "data_object_type": "GOTTCHA2 Report Full", + "url": "https://data.microbiomedata.org/data/nmdc:omprc-11-5r54nt37/nmdc:wfrbt-11-gwm7pc10.1/nmdc_wfrbt-11-gwm7pc10.1_gottcha2_report_full.tsv", + "type": "nmdc:DataObject" + }, + { + "id": "nmdc:dobj-11-42yt4311", + "name": "nmdc_wfrbt-11-gwm7pc10.1_gottcha2_krona.html", + "description": "Gottcha2 Krona HTML report for nmdc:omprc-11-5r54nt37", + "file_size_bytes": 231097, + "md5_checksum": "eda0c04d692ecf137585676c15924626", + "data_object_type": "GOTTCHA2 Krona Plot", + "url": "https://data.microbiomedata.org/data/nmdc:omprc-11-5r54nt37/nmdc:wfrbt-11-gwm7pc10.1/nmdc_wfrbt-11-gwm7pc10.1_gottcha2_krona.html", + "type": "nmdc:DataObject" + }, + { + "id": "nmdc:dobj-11-jtkkw042", + "name": "nmdc_wfrbt-11-gwm7pc10.1_centrifuge_classification.tsv", + "description": "Centrifuge classification TSV report for nmdc:omprc-11-5r54nt37", + "file_size_bytes": 1669254765, + "md5_checksum": "d9ea063be9ab8ea102c1e2ec2fa9f177", + "data_object_type": "Centrifuge Taxonomic Classification", + "url": "https://data.microbiomedata.org/data/nmdc:omprc-11-5r54nt37/nmdc:wfrbt-11-gwm7pc10.1/nmdc_wfrbt-11-gwm7pc10.1_centrifuge_classification.tsv", + "type": "nmdc:DataObject" + }, + { + "id": "nmdc:dobj-11-s6hnm974", + "name": "nmdc_wfrbt-11-gwm7pc10.1_centrifuge_report.tsv", + "description": "Centrifuge TSV report for nmdc:omprc-11-5r54nt37", + "file_size_bytes": 255784, + "md5_checksum": "e1f164c534830cd628d67c564ace863b", + "data_object_type": "Centrifuge Classification Report", + "url": "https://data.microbiomedata.org/data/nmdc:omprc-11-5r54nt37/nmdc:wfrbt-11-gwm7pc10.1/nmdc_wfrbt-11-gwm7pc10.1_centrifuge_report.tsv", + "type": "nmdc:DataObject" + }, + { + "id": "nmdc:dobj-11-564b6r37", + "name": "nmdc_wfrbt-11-gwm7pc10.1_centrifuge_krona.html", + "description": "Centrifuge Krona HTML report for nmdc:omprc-11-5r54nt37", + "file_size_bytes": 2333760, + "md5_checksum": "a1062576d998b7b82e39b8d8520fa37e", + "data_object_type": "Centrifuge Krona Plot", + "url": "https://data.microbiomedata.org/data/nmdc:omprc-11-5r54nt37/nmdc:wfrbt-11-gwm7pc10.1/nmdc_wfrbt-11-gwm7pc10.1_centrifuge_krona.html", + "type": "nmdc:DataObject" + }, + { + "id": "nmdc:dobj-11-17g43003", + "name": "nmdc_wfrbt-11-gwm7pc10.1_kraken2_classification.tsv", + "description": "Kraken classification TSV report for nmdc:omprc-11-5r54nt37", + "file_size_bytes": 1335651191, + "md5_checksum": "040e6ca695283a12711c16344acd1e76", + "data_object_type": "Kraken2 Taxonomic Classification", + "url": "https://data.microbiomedata.org/data/nmdc:omprc-11-5r54nt37/nmdc:wfrbt-11-gwm7pc10.1/nmdc_wfrbt-11-gwm7pc10.1_kraken2_classification.tsv", + "type": "nmdc:DataObject" + }, + { + "id": "nmdc:dobj-11-epx26m04", + "name": "nmdc_wfrbt-11-gwm7pc10.1_kraken2_report.tsv", + "description": "Kraken2 TSV report for nmdc:omprc-11-5r54nt37", + "file_size_bytes": 647609, + "md5_checksum": "ed4ced0ccbe3f6b34c35bd842e882cad", + "data_object_type": "Kraken2 Classification Report", + "url": "https://data.microbiomedata.org/data/nmdc:omprc-11-5r54nt37/nmdc:wfrbt-11-gwm7pc10.1/nmdc_wfrbt-11-gwm7pc10.1_kraken2_report.tsv", + "type": "nmdc:DataObject" + }, + { + "id": "nmdc:dobj-11-p9c3wm04", + "name": "nmdc_wfrbt-11-gwm7pc10.1_kraken2_krona.html", + "description": "Kraken2 Krona HTML report for nmdc:omprc-11-5r54nt37", + "file_size_bytes": 3949449, + "md5_checksum": "f2eed9669268f69dbc31f0c4f839fccf", + "data_object_type": "Kraken2 Krona Plot", + "url": "https://data.microbiomedata.org/data/nmdc:omprc-11-5r54nt37/nmdc:wfrbt-11-gwm7pc10.1/nmdc_wfrbt-11-gwm7pc10.1_kraken2_krona.html", + "type": "nmdc:DataObject" + } + ], + "metagenome_assembly_set": [ + { + "id": "nmdc:wfmgas-11-j3444209.1", + "name": "Metagenome Assembly Activity for nmdc:omprc-11-5r54nt37", + "started_at_time": "2021-10-11T02:23:30Z", + "ended_at_time": "2021-10-11T02:42:25+00:00", + "was_informed_by": "nmdc:omprc-11-5r54nt37", + "execution_resource": "NERSC-Cori", + "git_url": "https://github.com/microbiomedata/metaAssembly", + "has_input": [ + "nmdc:dobj-11-nfx9kp15" + ], + "has_output": [ + "nmdc:dobj-11-70kap344", + "nmdc:dobj-11-x1j3sf41", + "nmdc:dobj-11-vq52jw17", + "nmdc:dobj-11-1kt09302", + "nmdc:dobj-11-4sdqw362" + ], + "type": "nmdc:MetagenomeAssembly", + "part_of": [ + "nmdc:omprc-11-5r54nt37" + ], + "version": "v1.0.2", + "asm_score": 3.626, + "scaffolds": 118391, + "scaf_logsum": 70885, + "scaf_powsum": 7618.086, + "scaf_max": 12785, + "scaf_bp": 50762716, + "scaf_n50": 37659, + "scaf_n90": 100956, + "scaf_l50": 402, + "scaf_l90": 285, + "contigs": 118423, + "contig_bp": 50762396, + "ctg_n50": 37682, + "ctg_l50": 402, + "ctg_n90": 100987, + "ctg_l90": 285, + "ctg_logsum": 70596, + "ctg_powsum": 7584.611, + "ctg_max": 11834, + "gap_pct": 0.00063, + "gc_std": 0.12108, + "gc_avg": 0.59992 + } + ], + "omics_processing_set": [ + { + "id": "nmdc:omprc-11-5r54nt37", + "name": "Riverbed sediment microbial communities from areas with vegetation nearby in Columbia River, Washington, USA - GW-RW S2_20_30", + "description": "Riverbed sediment samples were collected from an area with a lot of surrounding vegetation in a hyporheic zone (subsurface groundwater - surface water mixing zone)", + "has_input": [ + "nmdc:bsm-11-r7ggfc16" + ], + "add_date": "2016-01-11", + "gold_sequencing_project_identifiers": [ + "gold:Gp0127623" + ], + "has_output": [ + "nmdc:dobj-11-ym2gpg27" + ], + "mod_date": "2021-06-15", + "ncbi_project_name": "Riverbed sediment microbial communities from areas with vegetation nearby in Columbia River, Washington, USA - GW-RW S2_20_30", + "omics_type": { + "has_raw_value": "Metagenome" + }, + "part_of": [ + "nmdc:sty-11-aygzgv51" + ], + "principal_investigator": { + "has_raw_value": "James Stegen" + }, + "processing_institution": "JGI", + "type": "nmdc:OmicsProcessing" + } + ], + "read_qc_analysis_activity_set": [ + { + "id": "nmdc:wfrqc-11-a98q5s27.1", + "name": "Read QC Activity for nmdc:omprc-11-5r54nt37", + "started_at_time": "2021-10-11T02:23:30Z", + "ended_at_time": "2021-10-11T02:42:25+00:00", + "was_informed_by": "nmdc:omprc-11-5r54nt37", + "execution_resource": "NERSC-Cori", + "git_url": "https://github.com/microbiomedata/ReadsQC", + "has_input": [ + "nmdc:dobj-11-ym2gpg27" + ], + "has_output": [ + "nmdc:dobj-11-nfx9kp15", + "nmdc:dobj-11-78pqap21" + ], + "type": "nmdc:ReadQcAnalysisActivity", + "part_of": [ + "nmdc:omprc-11-5r54nt37" + ], + "version": "v1.0.2", + "input_read_count": 23705118, + "output_read_count": 22801896, + "input_read_bases": 3579472818, + "output_read_bases": 3409425046 + } + ], + "read_based_taxonomy_analysis_activity_set": [ + { + "id": "nmdc:wfrbt-11-gwm7pc10.1", + "name": "Readbased Taxonomy Analysis Activity for nmdc:omprc-11-5r54nt37", + "started_at_time": "2021-10-11T02:23:30Z", + "ended_at_time": "2021-10-11T02:42:25+00:00", + "was_informed_by": "nmdc:omprc-11-5r54nt37", + "execution_resource": "NERSC-Cori", + "git_url": "https://github.com/microbiomedata/ReadbasedAnalysis", + "has_input": [ + "nmdc:dobj-11-nfx9kp15" + ], + "has_output": [ + "nmdc:dobj-11-5r3y7f04", + "nmdc:dobj-11-jt1vsq30", + "nmdc:dobj-11-42yt4311", + "nmdc:dobj-11-jtkkw042", + "nmdc:dobj-11-s6hnm974", + "nmdc:dobj-11-564b6r37", + "nmdc:dobj-11-17g43003", + "nmdc:dobj-11-epx26m04", + "nmdc:dobj-11-p9c3wm04" + ], + "type": "nmdc:ReadBasedTaxonomyAnalysisActivity", + "part_of": [ + "nmdc:omprc-11-5r54nt37" + ], + "version": "v1.0.2" + } + ] + }, + { + "data_object_set": [ + { + "id": "nmdc:dobj-11-mgpw8m22", + "name": "10533.2.165322.CGGTTGT-AACAACC.fastq.gz", + "description": "Raw sequencer read data", + "file_size_bytes": 2351763069, + "data_object_type": "Metagenome Raw Reads", + "type": "nmdc:DataObject" + }, + { + "id": "nmdc:dobj-11-xgacqj04", + "name": "nmdc_wfrqc-11-bnzc1778.1_filtered.fastq.gz", + "description": "Filtered Reads for nmdc:omprc-11-76ebsj44", + "file_size_bytes": 2037866145, + "md5_checksum": "2d13b3a30339b9c5b4fba099f9d4b10f", + "data_object_type": "Filtered Sequencing Reads", + "url": "https://data.microbiomedata.org/data/nmdc:omprc-11-76ebsj44/nmdc:wfrqc-11-bnzc1778.1/nmdc_wfrqc-11-bnzc1778.1_filtered.fastq.gz", + "type": "nmdc:DataObject" + }, + { + "id": "nmdc:dobj-11-awxkxq42", + "name": "nmdc_wfrqc-11-bnzc1778.1_filterStats.txt", + "description": "Filtered Stats for nmdc:omprc-11-76ebsj44", + "file_size_bytes": 284, + "md5_checksum": "42be49edad69619e550ddd69d150490f", + "data_object_type": "QC Statistics", + "url": "https://data.microbiomedata.org/data/nmdc:omprc-11-76ebsj44/nmdc:wfrqc-11-bnzc1778.1/nmdc_wfrqc-11-bnzc1778.1_filterStats.txt", + "type": "nmdc:DataObject" + }, + { + "id": "nmdc:dobj-11-h1cam976", + "name": "nmdc_wfmgas-11-anseqn83.1_contigs.fna", + "description": "Assembled contigs fasta for nmdc:omprc-11-76ebsj44", + "file_size_bytes": 173503832, + "md5_checksum": "7525e2b427b29215f8f922ecb240867d", + "data_object_type": "Assembly Contigs", + "url": "https://data.microbiomedata.org/data/nmdc:omprc-11-76ebsj44/nmdc:wfmgas-11-anseqn83.1/nmdc_wfmgas-11-anseqn83.1_contigs.fna", + "type": "nmdc:DataObject" + }, + { + "id": "nmdc:dobj-11-gb3vp850", + "name": "nmdc_wfmgas-11-anseqn83.1_scaffolds.fna", + "description": "Assembled scaffold fasta for nmdc:omprc-11-76ebsj44", + "file_size_bytes": 172599209, + "md5_checksum": "9d1d80e399c5ddb3306b23262d9edbc2", + "data_object_type": "Assembly Scaffolds", + "url": "https://data.microbiomedata.org/data/nmdc:omprc-11-76ebsj44/nmdc:wfmgas-11-anseqn83.1/nmdc_wfmgas-11-anseqn83.1_scaffolds.fna", + "type": "nmdc:DataObject" + }, + { + "id": "nmdc:dobj-11-t2vy7a91", + "name": "nmdc_wfmgas-11-anseqn83.1_covstats.txt", + "description": "Metagenome Contig Coverage Stats for nmdc:omprc-11-76ebsj44", + "file_size_bytes": 25676445, + "md5_checksum": "ce2822363ea20de071e010f50e11058d", + "data_object_type": "Assembly Coverage Stats", + "url": "https://data.microbiomedata.org/data/nmdc:omprc-11-76ebsj44/nmdc:wfmgas-11-anseqn83.1/nmdc_wfmgas-11-anseqn83.1_covstats.txt", + "type": "nmdc:DataObject" + }, + { + "id": "nmdc:dobj-11-qzpc0b53", + "name": "nmdc_wfmgas-11-anseqn83.1_assembly.agp", + "description": "Assembled AGP file for nmdc:omprc-11-76ebsj44", + "file_size_bytes": 25954905, + "md5_checksum": "45f67aaf5aa7d551ed404adcb6e774d5", + "data_object_type": "Assembly AGP", + "url": "https://data.microbiomedata.org/data/nmdc:omprc-11-76ebsj44/nmdc:wfmgas-11-anseqn83.1/nmdc_wfmgas-11-anseqn83.1_assembly.agp", + "type": "nmdc:DataObject" + }, + { + "id": "nmdc:dobj-11-aaa0cj88", + "name": "nmdc_wfmgas-11-anseqn83.1_pairedMapped_sorted.bam", + "description": "Metagenome Alignment BAM file for nmdc:omprc-11-76ebsj44", + "file_size_bytes": 0, + "md5_checksum": "d41d8cd98f00b204e9800998ecf8427e", + "data_object_type": "Assembly Coverage BAM", + "url": "https://data.microbiomedata.org/data/nmdc:omprc-11-76ebsj44/nmdc:wfmgas-11-anseqn83.1/nmdc_wfmgas-11-anseqn83.1_pairedMapped_sorted.bam", + "type": "nmdc:DataObject" + }, + { + "id": "nmdc:dobj-11-nn1pq627", + "name": "nmdc_wfrbt-11-1vrr4b04.1_gottcha2_report.tsv", + "description": "Gottcha2 TSV report for nmdc:omprc-11-76ebsj44", + "file_size_bytes": 754, + "md5_checksum": "550b631e1de3e01392154e54493d47ef", + "data_object_type": "GOTTCHA2 Classification Report", + "url": "https://data.microbiomedata.org/data/nmdc:omprc-11-76ebsj44/nmdc:wfrbt-11-1vrr4b04.1/nmdc_wfrbt-11-1vrr4b04.1_gottcha2_report.tsv", + "type": "nmdc:DataObject" + }, + { + "id": "nmdc:dobj-11-c8dc8a23", + "name": "nmdc_wfrbt-11-1vrr4b04.1_gottcha2_report_full.tsv", + "description": "Gottcha2 full TSV report for nmdc:omprc-11-76ebsj44", + "file_size_bytes": 641658, + "md5_checksum": "3f14ff51550d9d78dae3a7ec08514907", + "data_object_type": "GOTTCHA2 Report Full", + "url": "https://data.microbiomedata.org/data/nmdc:omprc-11-76ebsj44/nmdc:wfrbt-11-1vrr4b04.1/nmdc_wfrbt-11-1vrr4b04.1_gottcha2_report_full.tsv", + "type": "nmdc:DataObject" + }, + { + "id": "nmdc:dobj-11-1qefv468", + "name": "nmdc_wfrbt-11-1vrr4b04.1_gottcha2_krona.html", + "description": "Gottcha2 Krona HTML report for nmdc:omprc-11-76ebsj44", + "file_size_bytes": 228494, + "md5_checksum": "1a7b8f8968f451b5d5ccb97a10a56d89", + "data_object_type": "GOTTCHA2 Krona Plot", + "url": "https://data.microbiomedata.org/data/nmdc:omprc-11-76ebsj44/nmdc:wfrbt-11-1vrr4b04.1/nmdc_wfrbt-11-1vrr4b04.1_gottcha2_krona.html", + "type": "nmdc:DataObject" + }, + { + "id": "nmdc:dobj-11-x9y3k778", + "name": "nmdc_wfrbt-11-1vrr4b04.1_centrifuge_classification.tsv", + "description": "Centrifuge classification TSV report for nmdc:omprc-11-76ebsj44", + "file_size_bytes": 1849982678, + "md5_checksum": "b09795fc768257d881e8ce547be0ce68", + "data_object_type": "Centrifuge Taxonomic Classification", + "url": "https://data.microbiomedata.org/data/nmdc:omprc-11-76ebsj44/nmdc:wfrbt-11-1vrr4b04.1/nmdc_wfrbt-11-1vrr4b04.1_centrifuge_classification.tsv", + "type": "nmdc:DataObject" + }, + { + "id": "nmdc:dobj-11-s1h0dz98", + "name": "nmdc_wfrbt-11-1vrr4b04.1_centrifuge_report.tsv", + "description": "Centrifuge TSV report for nmdc:omprc-11-76ebsj44", + "file_size_bytes": 253852, + "md5_checksum": "064ba18473eb80ff0b484311565d2894", + "data_object_type": "Centrifuge Classification Report", + "url": "https://data.microbiomedata.org/data/nmdc:omprc-11-76ebsj44/nmdc:wfrbt-11-1vrr4b04.1/nmdc_wfrbt-11-1vrr4b04.1_centrifuge_report.tsv", + "type": "nmdc:DataObject" + }, + { + "id": "nmdc:dobj-11-5bk7y434", + "name": "nmdc_wfrbt-11-1vrr4b04.1_centrifuge_krona.html", + "description": "Centrifuge Krona HTML report for nmdc:omprc-11-76ebsj44", + "file_size_bytes": 2331556, + "md5_checksum": "a7b6cc370371668be2e3bb90f5ca0fd1", + "data_object_type": "Centrifuge Krona Plot", + "url": "https://data.microbiomedata.org/data/nmdc:omprc-11-76ebsj44/nmdc:wfrbt-11-1vrr4b04.1/nmdc_wfrbt-11-1vrr4b04.1_centrifuge_krona.html", + "type": "nmdc:DataObject" + }, + { + "id": "nmdc:dobj-11-ttehq139", + "name": "nmdc_wfrbt-11-1vrr4b04.1_kraken2_classification.tsv", + "description": "Kraken classification TSV report for nmdc:omprc-11-76ebsj44", + "file_size_bytes": 1471976767, + "md5_checksum": "60c663a34b79db2ee71edf1afe4c14e3", + "data_object_type": "Kraken2 Taxonomic Classification", + "url": "https://data.microbiomedata.org/data/nmdc:omprc-11-76ebsj44/nmdc:wfrbt-11-1vrr4b04.1/nmdc_wfrbt-11-1vrr4b04.1_kraken2_classification.tsv", + "type": "nmdc:DataObject" + }, + { + "id": "nmdc:dobj-11-f88jdj41", + "name": "nmdc_wfrbt-11-1vrr4b04.1_kraken2_report.tsv", + "description": "Kraken2 TSV report for nmdc:omprc-11-76ebsj44", + "file_size_bytes": 627498, + "md5_checksum": "bc8acb862c8942616ef07302667c334f", + "data_object_type": "Kraken2 Classification Report", + "url": "https://data.microbiomedata.org/data/nmdc:omprc-11-76ebsj44/nmdc:wfrbt-11-1vrr4b04.1/nmdc_wfrbt-11-1vrr4b04.1_kraken2_report.tsv", + "type": "nmdc:DataObject" + }, + { + "id": "nmdc:dobj-11-ajjtne65", + "name": "nmdc_wfrbt-11-1vrr4b04.1_kraken2_krona.html", + "description": "Kraken2 Krona HTML report for nmdc:omprc-11-76ebsj44", + "file_size_bytes": 3921941, + "md5_checksum": "b797ed6cb135c993b582cac368b2a93c", + "data_object_type": "Kraken2 Krona Plot", + "url": "https://data.microbiomedata.org/data/nmdc:omprc-11-76ebsj44/nmdc:wfrbt-11-1vrr4b04.1/nmdc_wfrbt-11-1vrr4b04.1_kraken2_krona.html", + "type": "nmdc:DataObject" + } + ], + "metagenome_assembly_set": [ + { + "id": "nmdc:wfmgas-11-anseqn83.1", + "name": "Metagenome Assembly Activity for nmdc:omprc-11-76ebsj44", + "started_at_time": "2021-10-11T02:23:30Z", + "ended_at_time": "2021-10-11T03:29:50+00:00", + "was_informed_by": "nmdc:omprc-11-76ebsj44", + "execution_resource": "NERSC-Cori", + "git_url": "https://github.com/microbiomedata/metaAssembly", + "has_input": [ + "nmdc:dobj-11-xgacqj04" + ], + "has_output": [ + "nmdc:dobj-11-h1cam976", + "nmdc:dobj-11-gb3vp850", + "nmdc:dobj-11-t2vy7a91", + "nmdc:dobj-11-qzpc0b53", + "nmdc:dobj-11-aaa0cj88" + ], + "type": "nmdc:MetagenomeAssembly", + "part_of": [ + "nmdc:omprc-11-76ebsj44" + ], + "version": "v1.0.2", + "asm_score": 3.923, + "scaffolds": 299890, + "scaf_logsum": 453436, + "scaf_powsum": 49370, + "scaf_max": 29400, + "scaf_bp": 159711824, + "scaf_n50": 78517, + "scaf_n90": 244244, + "scaf_l50": 546, + "scaf_l90": 301, + "contigs": 300102, + "contig_bp": 159709614, + "ctg_n50": 78532, + "ctg_l50": 546, + "ctg_n90": 244428, + "ctg_l90": 301, + "ctg_logsum": 452076, + "ctg_powsum": 49204, + "ctg_max": 29400, + "gap_pct": 0.00138, + "gc_std": 0.0955, + "gc_avg": 0.6367 + } + ], + "omics_processing_set": [ + { + "id": "nmdc:omprc-11-76ebsj44", + "name": "Riverbed sediment microbial communities from areas with no vegetation in Columbia River, Washington, USA - GW-RW N1_20_30", + "description": "Riverbed sediment samples were collected from an area with no vegetation in a hyporheic zone (subsurface groundwater - surface water mixing zone)", + "has_input": [ + "nmdc:bsm-11-k3t2wk45" + ], + "add_date": "2016-01-11", + "gold_sequencing_project_identifiers": [ + "gold:Gp0127625" + ], + "has_output": [ + "nmdc:dobj-11-mgpw8m22" + ], + "mod_date": "2021-06-15", + "ncbi_project_name": "Riverbed sediment microbial communities from areas with no vegetation in Columbia River, Washington, USA - GW-RW N1_20_30", + "omics_type": { + "has_raw_value": "Metagenome" + }, + "part_of": [ + "nmdc:sty-11-aygzgv51" + ], + "principal_investigator": { + "has_raw_value": "James Stegen" + }, + "processing_institution": "JGI", + "type": "nmdc:OmicsProcessing" + } + ], + "read_qc_analysis_activity_set": [ + { + "id": "nmdc:wfrqc-11-bnzc1778.1", + "name": "Read QC Activity for nmdc:omprc-11-76ebsj44", + "started_at_time": "2021-10-11T02:23:30Z", + "ended_at_time": "2021-10-11T03:29:50+00:00", + "was_informed_by": "nmdc:omprc-11-76ebsj44", + "execution_resource": "NERSC-Cori", + "git_url": "https://github.com/microbiomedata/ReadsQC", + "has_input": [ + "nmdc:dobj-11-mgpw8m22" + ], + "has_output": [ + "nmdc:dobj-11-xgacqj04", + "nmdc:dobj-11-awxkxq42" + ], + "type": "nmdc:ReadQcAnalysisActivity", + "part_of": [ + "nmdc:omprc-11-76ebsj44" + ], + "version": "v1.0.2", + "input_read_count": 26227312, + "output_read_count": 25182244, + "input_read_bases": 3960324112, + "output_read_bases": 3764845015 + } + ], + "read_based_taxonomy_analysis_activity_set": [ + { + "id": "nmdc:wfrbt-11-1vrr4b04.1", + "name": "Readbased Taxonomy Analysis Activity for nmdc:omprc-11-76ebsj44", + "started_at_time": "2021-10-11T02:23:30Z", + "ended_at_time": "2021-10-11T03:29:50+00:00", + "was_informed_by": "nmdc:omprc-11-76ebsj44", + "execution_resource": "NERSC-Cori", + "git_url": "https://github.com/microbiomedata/ReadbasedAnalysis", + "has_input": [ + "nmdc:dobj-11-xgacqj04" + ], + "has_output": [ + "nmdc:dobj-11-nn1pq627", + "nmdc:dobj-11-c8dc8a23", + "nmdc:dobj-11-1qefv468", + "nmdc:dobj-11-x9y3k778", + "nmdc:dobj-11-s1h0dz98", + "nmdc:dobj-11-5bk7y434", + "nmdc:dobj-11-ttehq139", + "nmdc:dobj-11-f88jdj41", + "nmdc:dobj-11-ajjtne65" + ], + "type": "nmdc:ReadBasedTaxonomyAnalysisActivity", + "part_of": [ + "nmdc:omprc-11-76ebsj44" + ], + "version": "v1.0.2" + } + ] + }, + { + "data_object_set": [ + { + "id": "nmdc:dobj-11-vkxna711", + "name": "10533.2.165322.TACCAAC-GGTTGGT.fastq.gz", + "description": "Raw sequencer read data", + "file_size_bytes": 2167583658, + "data_object_type": "Metagenome Raw Reads", + "type": "nmdc:DataObject" + }, + { + "id": "nmdc:dobj-11-7bfm0w47", + "name": "nmdc_wfrqc-11-41xbp492.1_filtered.fastq.gz", + "description": "Filtered Reads for nmdc:omprc-11-s6wqag22", + "file_size_bytes": 1944721961, + "md5_checksum": "07499ad2f2b80f42bd7109732b1eef90", + "data_object_type": "Filtered Sequencing Reads", + "url": "https://data.microbiomedata.org/data/nmdc:omprc-11-s6wqag22/nmdc:wfrqc-11-41xbp492.1/nmdc_wfrqc-11-41xbp492.1_filtered.fastq.gz", + "type": "nmdc:DataObject" + }, + { + "id": "nmdc:dobj-11-dtm7d928", + "name": "nmdc_wfrqc-11-41xbp492.1_filterStats.txt", + "description": "Filtered Stats for nmdc:omprc-11-s6wqag22", + "file_size_bytes": 287, + "md5_checksum": "9089d07fdee5ed03e901c1656206af02", + "data_object_type": "QC Statistics", + "url": "https://data.microbiomedata.org/data/nmdc:omprc-11-s6wqag22/nmdc:wfrqc-11-41xbp492.1/nmdc_wfrqc-11-41xbp492.1_filterStats.txt", + "type": "nmdc:DataObject" + }, + { + "id": "nmdc:dobj-11-byqvjv18", + "name": "nmdc_wfmgas-11-2gazt604.1_contigs.fna", + "description": "Assembled contigs fasta for nmdc:omprc-11-s6wqag22", + "file_size_bytes": 47947718, + "md5_checksum": "d8e9059b8c40422b02f00bb8a6e2aff4", + "data_object_type": "Assembly Contigs", + "url": "https://data.microbiomedata.org/data/nmdc:omprc-11-s6wqag22/nmdc:wfmgas-11-2gazt604.1/nmdc_wfmgas-11-2gazt604.1_contigs.fna", + "type": "nmdc:DataObject" + }, + { + "id": "nmdc:dobj-11-k4w8qs13", + "name": "nmdc_wfmgas-11-2gazt604.1_scaffolds.fna", + "description": "Assembled scaffold fasta for nmdc:omprc-11-s6wqag22", + "file_size_bytes": 47630939, + "md5_checksum": "bcafa28ee8b69c82a3789d95fd3102f6", + "data_object_type": "Assembly Scaffolds", + "url": "https://data.microbiomedata.org/data/nmdc:omprc-11-s6wqag22/nmdc:wfmgas-11-2gazt604.1/nmdc_wfmgas-11-2gazt604.1_scaffolds.fna", + "type": "nmdc:DataObject" + }, + { + "id": "nmdc:dobj-11-4jngmn43", + "name": "nmdc_wfmgas-11-2gazt604.1_covstats.txt", + "description": "Metagenome Contig Coverage Stats for nmdc:omprc-11-s6wqag22", + "file_size_bytes": 8902615, + "md5_checksum": "d0eecd1c60d25aaca4539029934ebd4a", + "data_object_type": "Assembly Coverage Stats", + "url": "https://data.microbiomedata.org/data/nmdc:omprc-11-s6wqag22/nmdc:wfmgas-11-2gazt604.1/nmdc_wfmgas-11-2gazt604.1_covstats.txt", + "type": "nmdc:DataObject" + }, + { + "id": "nmdc:dobj-11-51vnr518", + "name": "nmdc_wfmgas-11-2gazt604.1_assembly.agp", + "description": "Assembled AGP file for nmdc:omprc-11-s6wqag22", + "file_size_bytes": 8955469, + "md5_checksum": "fbeaa000b952dc90e79ac689227ea6d6", + "data_object_type": "Assembly AGP", + "url": "https://data.microbiomedata.org/data/nmdc:omprc-11-s6wqag22/nmdc:wfmgas-11-2gazt604.1/nmdc_wfmgas-11-2gazt604.1_assembly.agp", + "type": "nmdc:DataObject" + }, + { + "id": "nmdc:dobj-11-1fgwc151", + "name": "nmdc_wfmgas-11-2gazt604.1_pairedMapped_sorted.bam", + "description": "Metagenome Alignment BAM file for nmdc:omprc-11-s6wqag22", + "file_size_bytes": 0, + "md5_checksum": "d41d8cd98f00b204e9800998ecf8427e", + "data_object_type": "Assembly Coverage BAM", + "url": "https://data.microbiomedata.org/data/nmdc:omprc-11-s6wqag22/nmdc:wfmgas-11-2gazt604.1/nmdc_wfmgas-11-2gazt604.1_pairedMapped_sorted.bam", + "type": "nmdc:DataObject" + }, + { + "id": "nmdc:dobj-11-p62fmk91", + "name": "nmdc_wfrbt-11-heh0td77.1_gottcha2_report.tsv", + "description": "Gottcha2 TSV report for nmdc:omprc-11-s6wqag22", + "file_size_bytes": 2399, + "md5_checksum": "a91f8dccb2baa53550216f5bdfbf1473", + "data_object_type": "GOTTCHA2 Classification Report", + "url": "https://data.microbiomedata.org/data/nmdc:omprc-11-s6wqag22/nmdc:wfrbt-11-heh0td77.1/nmdc_wfrbt-11-heh0td77.1_gottcha2_report.tsv", + "type": "nmdc:DataObject" + }, + { + "id": "nmdc:dobj-11-f2nawf37", + "name": "nmdc_wfrbt-11-heh0td77.1_gottcha2_report_full.tsv", + "description": "Gottcha2 full TSV report for nmdc:omprc-11-s6wqag22", + "file_size_bytes": 743066, + "md5_checksum": "a81ddf4e3bc044e8601554117cd887aa", + "data_object_type": "GOTTCHA2 Report Full", + "url": "https://data.microbiomedata.org/data/nmdc:omprc-11-s6wqag22/nmdc:wfrbt-11-heh0td77.1/nmdc_wfrbt-11-heh0td77.1_gottcha2_report_full.tsv", + "type": "nmdc:DataObject" + }, + { + "id": "nmdc:dobj-11-axc60h37", + "name": "nmdc_wfrbt-11-heh0td77.1_gottcha2_krona.html", + "description": "Gottcha2 Krona HTML report for nmdc:omprc-11-s6wqag22", + "file_size_bytes": 233970, + "md5_checksum": "a012dc3a7b44774019c313fd8ee88efc", + "data_object_type": "GOTTCHA2 Krona Plot", + "url": "https://data.microbiomedata.org/data/nmdc:omprc-11-s6wqag22/nmdc:wfrbt-11-heh0td77.1/nmdc_wfrbt-11-heh0td77.1_gottcha2_krona.html", + "type": "nmdc:DataObject" + }, + { + "id": "nmdc:dobj-11-z8m52121", + "name": "nmdc_wfrbt-11-heh0td77.1_centrifuge_classification.tsv", + "description": "Centrifuge classification TSV report for nmdc:omprc-11-s6wqag22", + "file_size_bytes": 1673697764, + "md5_checksum": "dd4023a1488bdfc73b12c422b62b274a", + "data_object_type": "Centrifuge Taxonomic Classification", + "url": "https://data.microbiomedata.org/data/nmdc:omprc-11-s6wqag22/nmdc:wfrbt-11-heh0td77.1/nmdc_wfrbt-11-heh0td77.1_centrifuge_classification.tsv", + "type": "nmdc:DataObject" + }, + { + "id": "nmdc:dobj-11-8hwg7f80", + "name": "nmdc_wfrbt-11-heh0td77.1_centrifuge_report.tsv", + "description": "Centrifuge TSV report for nmdc:omprc-11-s6wqag22", + "file_size_bytes": 253730, + "md5_checksum": "2f9b1c55d52cc61affbe99f5163b48c8", + "data_object_type": "Centrifuge Classification Report", + "url": "https://data.microbiomedata.org/data/nmdc:omprc-11-s6wqag22/nmdc:wfrbt-11-heh0td77.1/nmdc_wfrbt-11-heh0td77.1_centrifuge_report.tsv", + "type": "nmdc:DataObject" + }, + { + "id": "nmdc:dobj-11-3dfp3q69", + "name": "nmdc_wfrbt-11-heh0td77.1_centrifuge_krona.html", + "description": "Centrifuge Krona HTML report for nmdc:omprc-11-s6wqag22", + "file_size_bytes": 2327521, + "md5_checksum": "ccf7f447a25ebf354ce44b3f1f90f223", + "data_object_type": "Centrifuge Krona Plot", + "url": "https://data.microbiomedata.org/data/nmdc:omprc-11-s6wqag22/nmdc:wfrbt-11-heh0td77.1/nmdc_wfrbt-11-heh0td77.1_centrifuge_krona.html", + "type": "nmdc:DataObject" + }, + { + "id": "nmdc:dobj-11-50efv906", + "name": "nmdc_wfrbt-11-heh0td77.1_kraken2_classification.tsv", + "description": "Kraken classification TSV report for nmdc:omprc-11-s6wqag22", + "file_size_bytes": 1343921825, + "md5_checksum": "2c8efdb77cbcd1276c4fb386fd37bd6d", + "data_object_type": "Kraken2 Taxonomic Classification", + "url": "https://data.microbiomedata.org/data/nmdc:omprc-11-s6wqag22/nmdc:wfrbt-11-heh0td77.1/nmdc_wfrbt-11-heh0td77.1_kraken2_classification.tsv", + "type": "nmdc:DataObject" + }, + { + "id": "nmdc:dobj-11-vfn3tv98", + "name": "nmdc_wfrbt-11-heh0td77.1_kraken2_report.tsv", + "description": "Kraken2 TSV report for nmdc:omprc-11-s6wqag22", + "file_size_bytes": 638478, + "md5_checksum": "806b27f1fa5a423100b113bb56edc708", + "data_object_type": "Kraken2 Classification Report", + "url": "https://data.microbiomedata.org/data/nmdc:omprc-11-s6wqag22/nmdc:wfrbt-11-heh0td77.1/nmdc_wfrbt-11-heh0td77.1_kraken2_report.tsv", + "type": "nmdc:DataObject" + }, + { + "id": "nmdc:dobj-11-jxejae14", + "name": "nmdc_wfrbt-11-heh0td77.1_kraken2_krona.html", + "description": "Kraken2 Krona HTML report for nmdc:omprc-11-s6wqag22", + "file_size_bytes": 3987411, + "md5_checksum": "bb3e6793c4f036b9756f075d41846964", + "data_object_type": "Kraken2 Krona Plot", + "url": "https://data.microbiomedata.org/data/nmdc:omprc-11-s6wqag22/nmdc:wfrbt-11-heh0td77.1/nmdc_wfrbt-11-heh0td77.1_kraken2_krona.html", + "type": "nmdc:DataObject" + } + ], + "metagenome_assembly_set": [ + { + "id": "nmdc:wfmgas-11-2gazt604.1", + "name": "Metagenome Assembly Activity for nmdc:omprc-11-s6wqag22", + "started_at_time": "2021-12-01T21:31:29Z", + "ended_at_time": "2021-12-02T20:54:56+00:00", + "was_informed_by": "nmdc:omprc-11-s6wqag22", + "execution_resource": "NERSC-Cori", + "git_url": "https://github.com/microbiomedata/metaAssembly", + "has_input": [ + "nmdc:dobj-11-7bfm0w47" + ], + "has_output": [ + "nmdc:dobj-11-byqvjv18", + "nmdc:dobj-11-k4w8qs13", + "nmdc:dobj-11-4jngmn43", + "nmdc:dobj-11-51vnr518", + "nmdc:dobj-11-1fgwc151" + ], + "type": "nmdc:MetagenomeAssembly", + "part_of": [ + "nmdc:omprc-11-s6wqag22" + ], + "version": "v1.0.2", + "asm_score": 7.629, + "scaffolds": 105366, + "scaf_logsum": 63657, + "scaf_powsum": 7386.413, + "scaf_max": 30685, + "scaf_bp": 43390661, + "scaf_n50": 34749, + "scaf_n90": 91567, + "scaf_l50": 368, + "scaf_l90": 284, + "contigs": 105397, + "contig_bp": 43390261, + "ctg_n50": 34766, + "ctg_l50": 368, + "ctg_n90": 91597, + "ctg_l90": 284, + "ctg_logsum": 63429, + "ctg_powsum": 7359.443, + "ctg_max": 30685, + "gap_pct": 0.00092, + "gc_std": 0.09232, + "gc_avg": 0.60819 + } + ], + "omics_processing_set": [ + { + "id": "nmdc:omprc-11-s6wqag22", + "name": "Riverbed sediment microbial communities from areas with no vegetation in Columbia River, Washington, USA - GW-RW N3_40_50", + "description": "Riverbed sediment samples were collected from an area with no vegetation in a hyporheic zone (subsurface groundwater - surface water mixing zone)", + "has_input": [ + "nmdc:bsm-11-mxdygh62" + ], + "add_date": "2016-01-11", + "gold_sequencing_project_identifiers": [ + "gold:Gp0127626" + ], + "has_output": [ + "nmdc:dobj-11-vkxna711" + ], + "mod_date": "2021-06-15", + "ncbi_project_name": "Riverbed sediment microbial communities from areas with no vegetation in Columbia River, Washington, USA - GW-RW N3_40_50", + "omics_type": { + "has_raw_value": "Metagenome" + }, + "part_of": [ + "nmdc:sty-11-aygzgv51" + ], + "principal_investigator": { + "has_raw_value": "James Stegen" + }, + "processing_institution": "JGI", + "type": "nmdc:OmicsProcessing" + } + ], + "read_qc_analysis_activity_set": [ + { + "id": "nmdc:wfrqc-11-41xbp492.1", + "name": "Read QC Activity for nmdc:omprc-11-s6wqag22", + "started_at_time": "2021-12-01T21:31:29Z", + "ended_at_time": "2021-12-02T20:54:56+00:00", + "was_informed_by": "nmdc:omprc-11-s6wqag22", + "execution_resource": "NERSC-Cori", + "git_url": "https://github.com/microbiomedata/ReadsQC", + "has_input": [ + "nmdc:dobj-11-vkxna711" + ], + "has_output": [ + "nmdc:dobj-11-7bfm0w47", + "nmdc:dobj-11-dtm7d928" + ], + "type": "nmdc:ReadQcAnalysisActivity", + "part_of": [ + "nmdc:omprc-11-s6wqag22" + ], + "version": "v1.0.2", + "input_read_count": 24223170, + "output_read_count": 22768968, + "input_read_bases": 3657698670, + "output_read_bases": 3405205631 + } + ], + "read_based_taxonomy_analysis_activity_set": [ + { + "id": "nmdc:wfrbt-11-heh0td77.1", + "name": "Readbased Taxonomy Analysis Activity for nmdc:omprc-11-s6wqag22", + "started_at_time": "2021-12-01T21:31:29Z", + "ended_at_time": "2021-12-02T20:54:56+00:00", + "was_informed_by": "nmdc:omprc-11-s6wqag22", + "execution_resource": "NERSC-Cori", + "git_url": "https://github.com/microbiomedata/ReadbasedAnalysis", + "has_input": [ + "nmdc:dobj-11-7bfm0w47" + ], + "has_output": [ + "nmdc:dobj-11-p62fmk91", + "nmdc:dobj-11-f2nawf37", + "nmdc:dobj-11-axc60h37", + "nmdc:dobj-11-z8m52121", + "nmdc:dobj-11-8hwg7f80", + "nmdc:dobj-11-3dfp3q69", + "nmdc:dobj-11-50efv906", + "nmdc:dobj-11-vfn3tv98", + "nmdc:dobj-11-jxejae14" + ], + "type": "nmdc:ReadBasedTaxonomyAnalysisActivity", + "part_of": [ + "nmdc:omprc-11-s6wqag22" + ], + "version": "v1.0.2" + } + ] + }, + { + "data_object_set": [ + { + "id": "nmdc:dobj-11-38txm578", + "name": "10533.1.165310.ATAGCGG-ACCGCTA.fastq.gz", + "description": "Raw sequencer read data", + "file_size_bytes": 2150489977, + "data_object_type": "Metagenome Raw Reads", + "type": "nmdc:DataObject" + }, + { + "id": "nmdc:dobj-11-kc9meh18", + "name": "nmdc_wfrqc-11-wv6ahz57.1_filtered.fastq.gz", + "description": "Filtered Reads for nmdc:omprc-11-x0es2p18", + "file_size_bytes": 1795382596, + "md5_checksum": "8585f6896702bddf64b02191be5921f4", + "data_object_type": "Filtered Sequencing Reads", + "url": "https://data.microbiomedata.org/data/nmdc:omprc-11-x0es2p18/nmdc:wfrqc-11-wv6ahz57.1/nmdc_wfrqc-11-wv6ahz57.1_filtered.fastq.gz", + "type": "nmdc:DataObject" + }, + { + "id": "nmdc:dobj-11-22zdtg08", + "name": "nmdc_wfrqc-11-wv6ahz57.1_filterStats.txt", + "description": "Filtered Stats for nmdc:omprc-11-x0es2p18", + "file_size_bytes": 289, + "md5_checksum": "b9b6464ecc746a4cc39b549696c5fe9c", + "data_object_type": "QC Statistics", + "url": "https://data.microbiomedata.org/data/nmdc:omprc-11-x0es2p18/nmdc:wfrqc-11-wv6ahz57.1/nmdc_wfrqc-11-wv6ahz57.1_filterStats.txt", + "type": "nmdc:DataObject" + }, + { + "id": "nmdc:dobj-11-46n06572", + "name": "nmdc_wfmgas-11-4fp3ec47.1_contigs.fna", + "description": "Assembled contigs fasta for nmdc:omprc-11-x0es2p18", + "file_size_bytes": 96614071, + "md5_checksum": "ee9d5f5e5f8e307dc59d15026b78dfa6", + "data_object_type": "Assembly Contigs", + "url": "https://data.microbiomedata.org/data/nmdc:omprc-11-x0es2p18/nmdc:wfmgas-11-4fp3ec47.1/nmdc_wfmgas-11-4fp3ec47.1_contigs.fna", + "type": "nmdc:DataObject" + }, + { + "id": "nmdc:dobj-11-wzxr3e63", + "name": "nmdc_wfmgas-11-4fp3ec47.1_scaffolds.fna", + "description": "Assembled scaffold fasta for nmdc:omprc-11-x0es2p18", + "file_size_bytes": 96039561, + "md5_checksum": "181604f824b03dbf5a07df8c39e31573", + "data_object_type": "Assembly Scaffolds", + "url": "https://data.microbiomedata.org/data/nmdc:omprc-11-x0es2p18/nmdc:wfmgas-11-4fp3ec47.1/nmdc_wfmgas-11-4fp3ec47.1_scaffolds.fna", + "type": "nmdc:DataObject" + }, + { + "id": "nmdc:dobj-11-g7491476", + "name": "nmdc_wfmgas-11-4fp3ec47.1_covstats.txt", + "description": "Metagenome Contig Coverage Stats for nmdc:omprc-11-x0es2p18", + "file_size_bytes": 16258702, + "md5_checksum": "56e037e03040393965de81cbbcbee84d", + "data_object_type": "Assembly Coverage Stats", + "url": "https://data.microbiomedata.org/data/nmdc:omprc-11-x0es2p18/nmdc:wfmgas-11-4fp3ec47.1/nmdc_wfmgas-11-4fp3ec47.1_covstats.txt", + "type": "nmdc:DataObject" + }, + { + "id": "nmdc:dobj-11-ccq8w122", + "name": "nmdc_wfmgas-11-4fp3ec47.1_assembly.agp", + "description": "Assembled AGP file for nmdc:omprc-11-x0es2p18", + "file_size_bytes": 16419809, + "md5_checksum": "70db4b5de54a88eb0ba60e8dd924c266", + "data_object_type": "Assembly AGP", + "url": "https://data.microbiomedata.org/data/nmdc:omprc-11-x0es2p18/nmdc:wfmgas-11-4fp3ec47.1/nmdc_wfmgas-11-4fp3ec47.1_assembly.agp", + "type": "nmdc:DataObject" + }, + { + "id": "nmdc:dobj-11-kp7x3p89", + "name": "nmdc_wfmgas-11-4fp3ec47.1_pairedMapped_sorted.bam", + "description": "Metagenome Alignment BAM file for nmdc:omprc-11-x0es2p18", + "file_size_bytes": 0, + "md5_checksum": "d41d8cd98f00b204e9800998ecf8427e", + "data_object_type": "Assembly Coverage BAM", + "url": "https://data.microbiomedata.org/data/nmdc:omprc-11-x0es2p18/nmdc:wfmgas-11-4fp3ec47.1/nmdc_wfmgas-11-4fp3ec47.1_pairedMapped_sorted.bam", + "type": "nmdc:DataObject" + }, + { + "id": "nmdc:dobj-11-9snyj165", + "name": "nmdc_wfrbt-11-byfsnm31.1_gottcha2_report.tsv", + "description": "Gottcha2 TSV report for nmdc:omprc-11-x0es2p18", + "file_size_bytes": 1500, + "md5_checksum": "fef871a81032dd1f3e57dc1c7d5aa3db", + "data_object_type": "GOTTCHA2 Classification Report", + "url": "https://data.microbiomedata.org/data/nmdc:omprc-11-x0es2p18/nmdc:wfrbt-11-byfsnm31.1/nmdc_wfrbt-11-byfsnm31.1_gottcha2_report.tsv", + "type": "nmdc:DataObject" + }, + { + "id": "nmdc:dobj-11-6z8rxk69", + "name": "nmdc_wfrbt-11-byfsnm31.1_gottcha2_report_full.tsv", + "description": "Gottcha2 full TSV report for nmdc:omprc-11-x0es2p18", + "file_size_bytes": 692993, + "md5_checksum": "6c7fec765f2a225f168ebb1f69961013", + "data_object_type": "GOTTCHA2 Report Full", + "url": "https://data.microbiomedata.org/data/nmdc:omprc-11-x0es2p18/nmdc:wfrbt-11-byfsnm31.1/nmdc_wfrbt-11-byfsnm31.1_gottcha2_report_full.tsv", + "type": "nmdc:DataObject" + }, + { + "id": "nmdc:dobj-11-ye09ze12", + "name": "nmdc_wfrbt-11-byfsnm31.1_gottcha2_krona.html", + "description": "Gottcha2 Krona HTML report for nmdc:omprc-11-x0es2p18", + "file_size_bytes": 230779, + "md5_checksum": "6e660d5a062f9c3ad7b49d8d438453d7", + "data_object_type": "GOTTCHA2 Krona Plot", + "url": "https://data.microbiomedata.org/data/nmdc:omprc-11-x0es2p18/nmdc:wfrbt-11-byfsnm31.1/nmdc_wfrbt-11-byfsnm31.1_gottcha2_krona.html", + "type": "nmdc:DataObject" + }, + { + "id": "nmdc:dobj-11-7he50g23", + "name": "nmdc_wfrbt-11-byfsnm31.1_centrifuge_classification.tsv", + "description": "Centrifuge classification TSV report for nmdc:omprc-11-x0es2p18", + "file_size_bytes": 1645928829, + "md5_checksum": "77db34862804280185d3b1ce961e5338", + "data_object_type": "Centrifuge Taxonomic Classification", + "url": "https://data.microbiomedata.org/data/nmdc:omprc-11-x0es2p18/nmdc:wfrbt-11-byfsnm31.1/nmdc_wfrbt-11-byfsnm31.1_centrifuge_classification.tsv", + "type": "nmdc:DataObject" + }, + { + "id": "nmdc:dobj-11-de1j7f31", + "name": "nmdc_wfrbt-11-byfsnm31.1_centrifuge_report.tsv", + "description": "Centrifuge TSV report for nmdc:omprc-11-x0es2p18", + "file_size_bytes": 254646, + "md5_checksum": "84e3efb84d961d189ece310911ccf475", + "data_object_type": "Centrifuge Classification Report", + "url": "https://data.microbiomedata.org/data/nmdc:omprc-11-x0es2p18/nmdc:wfrbt-11-byfsnm31.1/nmdc_wfrbt-11-byfsnm31.1_centrifuge_report.tsv", + "type": "nmdc:DataObject" + }, + { + "id": "nmdc:dobj-11-fg5qsr83", + "name": "nmdc_wfrbt-11-byfsnm31.1_centrifuge_krona.html", + "description": "Centrifuge Krona HTML report for nmdc:omprc-11-x0es2p18", + "file_size_bytes": 2332082, + "md5_checksum": "b8fd31679921f8b68c80917e14caa260", + "data_object_type": "Centrifuge Krona Plot", + "url": "https://data.microbiomedata.org/data/nmdc:omprc-11-x0es2p18/nmdc:wfrbt-11-byfsnm31.1/nmdc_wfrbt-11-byfsnm31.1_centrifuge_krona.html", + "type": "nmdc:DataObject" + }, + { + "id": "nmdc:dobj-11-hc12pc88", + "name": "nmdc_wfrbt-11-byfsnm31.1_kraken2_classification.tsv", + "description": "Kraken classification TSV report for nmdc:omprc-11-x0es2p18", + "file_size_bytes": 1316771556, + "md5_checksum": "715c66c69b621478da7d48481f3cbd1d", + "data_object_type": "Kraken2 Taxonomic Classification", + "url": "https://data.microbiomedata.org/data/nmdc:omprc-11-x0es2p18/nmdc:wfrbt-11-byfsnm31.1/nmdc_wfrbt-11-byfsnm31.1_kraken2_classification.tsv", + "type": "nmdc:DataObject" + }, + { + "id": "nmdc:dobj-11-rek7xf60", + "name": "nmdc_wfrbt-11-byfsnm31.1_kraken2_report.tsv", + "description": "Kraken2 TSV report for nmdc:omprc-11-x0es2p18", + "file_size_bytes": 626940, + "md5_checksum": "0781e8042688219035efafe7d75858d0", + "data_object_type": "Kraken2 Classification Report", + "url": "https://data.microbiomedata.org/data/nmdc:omprc-11-x0es2p18/nmdc:wfrbt-11-byfsnm31.1/nmdc_wfrbt-11-byfsnm31.1_kraken2_report.tsv", + "type": "nmdc:DataObject" + }, + { + "id": "nmdc:dobj-11-gvcy1m93", + "name": "nmdc_wfrbt-11-byfsnm31.1_kraken2_krona.html", + "description": "Kraken2 Krona HTML report for nmdc:omprc-11-x0es2p18", + "file_size_bytes": 3921891, + "md5_checksum": "85547ab860ef9d6877ba7abc8881740a", + "data_object_type": "Kraken2 Krona Plot", + "url": "https://data.microbiomedata.org/data/nmdc:omprc-11-x0es2p18/nmdc:wfrbt-11-byfsnm31.1/nmdc_wfrbt-11-byfsnm31.1_kraken2_krona.html", + "type": "nmdc:DataObject" + } + ], + "metagenome_assembly_set": [ + { + "id": "nmdc:wfmgas-11-4fp3ec47.1", + "name": "Metagenome Assembly Activity for nmdc:omprc-11-x0es2p18", + "started_at_time": "2021-10-11T02:23:30Z", + "ended_at_time": "2021-10-11T03:30:59+00:00", + "was_informed_by": "nmdc:omprc-11-x0es2p18", + "execution_resource": "NERSC-Cori", + "git_url": "https://github.com/microbiomedata/metaAssembly", + "has_input": [ + "nmdc:dobj-11-kc9meh18" + ], + "has_output": [ + "nmdc:dobj-11-46n06572", + "nmdc:dobj-11-wzxr3e63", + "nmdc:dobj-11-g7491476", + "nmdc:dobj-11-ccq8w122", + "nmdc:dobj-11-kp7x3p89" + ], + "type": "nmdc:MetagenomeAssembly", + "part_of": [ + "nmdc:omprc-11-x0es2p18" + ], + "version": "v1.0.2", + "asm_score": 5.95, + "scaffolds": 190940, + "scaf_logsum": 174680, + "scaf_powsum": 19462, + "scaf_max": 33408, + "scaf_bp": 88103488, + "scaf_n50": 56334, + "scaf_n90": 162481, + "scaf_l50": 434, + "scaf_l90": 288, + "contigs": 191010, + "contig_bp": 88102698, + "ctg_n50": 56361, + "ctg_l50": 434, + "ctg_n90": 162547, + "ctg_l90": 288, + "ctg_logsum": 174168, + "ctg_powsum": 19404, + "ctg_max": 33408, + "gap_pct": 0.0009, + "gc_std": 0.09154, + "gc_avg": 0.62452 + } + ], + "omics_processing_set": [ + { + "id": "nmdc:omprc-11-x0es2p18", + "name": "Riverbed sediment microbial communities from areas with no vegetation in Columbia River, Washington, USA - GW-RW N3_10_20", + "description": "Riverbed sediment samples were collected from an area with no vegetation in a hyporheic zone (subsurface groundwater - surface water mixing zone)", + "has_input": [ + "nmdc:bsm-11-msqbhe76" + ], + "add_date": "2016-01-11", + "gold_sequencing_project_identifiers": [ + "gold:Gp0127624" + ], + "has_output": [ + "nmdc:dobj-11-38txm578" + ], + "mod_date": "2021-06-15", + "ncbi_project_name": "Riverbed sediment microbial communities from areas with no vegetation in Columbia River, Washington, USA - GW-RW N3_10_20", + "omics_type": { + "has_raw_value": "Metagenome" + }, + "part_of": [ + "nmdc:sty-11-aygzgv51" + ], + "principal_investigator": { + "has_raw_value": "James Stegen" + }, + "processing_institution": "JGI", + "type": "nmdc:OmicsProcessing" + } + ], + "read_qc_analysis_activity_set": [ + { + "id": "nmdc:wfrqc-11-wv6ahz57.1", + "name": "Read QC Activity for nmdc:omprc-11-x0es2p18", + "started_at_time": "2021-10-11T02:23:30Z", + "ended_at_time": "2021-10-11T03:30:59+00:00", + "was_informed_by": "nmdc:omprc-11-x0es2p18", + "execution_resource": "NERSC-Cori", + "git_url": "https://github.com/microbiomedata/ReadsQC", + "has_input": [ + "nmdc:dobj-11-38txm578" + ], + "has_output": [ + "nmdc:dobj-11-kc9meh18", + "nmdc:dobj-11-22zdtg08" + ], + "type": "nmdc:ReadQcAnalysisActivity", + "part_of": [ + "nmdc:omprc-11-x0es2p18" + ], + "version": "v1.0.2", + "input_read_count": 25674112, + "output_read_count": 22503352, + "input_read_bases": 3876790912, + "output_read_bases": 3361311014 + } + ], + "read_based_taxonomy_analysis_activity_set": [ + { + "id": "nmdc:wfrbt-11-byfsnm31.1", + "name": "Readbased Taxonomy Analysis Activity for nmdc:omprc-11-x0es2p18", + "started_at_time": "2021-10-11T02:23:30Z", + "ended_at_time": "2021-10-11T03:30:59+00:00", + "was_informed_by": "nmdc:omprc-11-x0es2p18", + "execution_resource": "NERSC-Cori", + "git_url": "https://github.com/microbiomedata/ReadbasedAnalysis", + "has_input": [ + "nmdc:dobj-11-kc9meh18" + ], + "has_output": [ + "nmdc:dobj-11-9snyj165", + "nmdc:dobj-11-6z8rxk69", + "nmdc:dobj-11-ye09ze12", + "nmdc:dobj-11-7he50g23", + "nmdc:dobj-11-de1j7f31", + "nmdc:dobj-11-fg5qsr83", + "nmdc:dobj-11-hc12pc88", + "nmdc:dobj-11-rek7xf60", + "nmdc:dobj-11-gvcy1m93" + ], + "type": "nmdc:ReadBasedTaxonomyAnalysisActivity", + "part_of": [ + "nmdc:omprc-11-x0es2p18" + ], + "version": "v1.0.2" + } + ] + }, + { + "data_object_set": [ + { + "id": "nmdc:dobj-11-h7t4fx79", + "name": "10533.1.165310.CCAGTGT-AACACTG.fastq.gz", + "description": "Raw sequencer read data", + "file_size_bytes": 2057112594, + "data_object_type": "Metagenome Raw Reads", + "type": "nmdc:DataObject" + }, + { + "id": "nmdc:dobj-11-gjsj3m90", + "name": "nmdc_wfrqc-11-6sggph61.1_filtered.fastq.gz", + "description": "Filtered Reads for nmdc:omprc-11-1nvcer55", + "file_size_bytes": 1807840952, + "md5_checksum": "0db98173ae3395106e24d250b2655f06", + "data_object_type": "Filtered Sequencing Reads", + "url": "https://data.microbiomedata.org/data/nmdc:omprc-11-1nvcer55/nmdc:wfrqc-11-6sggph61.1/nmdc_wfrqc-11-6sggph61.1_filtered.fastq.gz", + "type": "nmdc:DataObject" + }, + { + "id": "nmdc:dobj-11-skf18g29", + "name": "nmdc_wfrqc-11-6sggph61.1_filterStats.txt", + "description": "Filtered Stats for nmdc:omprc-11-1nvcer55", + "file_size_bytes": 284, + "md5_checksum": "bc0874c01bbd31c644cd598e2fdad3c4", + "data_object_type": "QC Statistics", + "url": "https://data.microbiomedata.org/data/nmdc:omprc-11-1nvcer55/nmdc:wfrqc-11-6sggph61.1/nmdc_wfrqc-11-6sggph61.1_filterStats.txt", + "type": "nmdc:DataObject" + }, + { + "id": "nmdc:dobj-11-jgsjr510", + "name": "nmdc_wfmgas-11-sxp33a13.1_contigs.fna", + "description": "Assembled contigs fasta for nmdc:omprc-11-1nvcer55", + "file_size_bytes": 110395396, + "md5_checksum": "97659967fb7d9c239631419ecf9d64ed", + "data_object_type": "Assembly Contigs", + "url": "https://data.microbiomedata.org/data/nmdc:omprc-11-1nvcer55/nmdc:wfmgas-11-sxp33a13.1/nmdc_wfmgas-11-sxp33a13.1_contigs.fna", + "type": "nmdc:DataObject" + }, + { + "id": "nmdc:dobj-11-hd2xr638", + "name": "nmdc_wfmgas-11-sxp33a13.1_scaffolds.fna", + "description": "Assembled scaffold fasta for nmdc:omprc-11-1nvcer55", + "file_size_bytes": 109767585, + "md5_checksum": "29d75f24b4a9f1c1174f5a3a2b15e504", + "data_object_type": "Assembly Scaffolds", + "url": "https://data.microbiomedata.org/data/nmdc:omprc-11-1nvcer55/nmdc:wfmgas-11-sxp33a13.1/nmdc_wfmgas-11-sxp33a13.1_scaffolds.fna", + "type": "nmdc:DataObject" + }, + { + "id": "nmdc:dobj-11-7t8t4a37", + "name": "nmdc_wfmgas-11-sxp33a13.1_covstats.txt", + "description": "Metagenome Contig Coverage Stats for nmdc:omprc-11-1nvcer55", + "file_size_bytes": 17788231, + "md5_checksum": "bcbf67bd134bffbbb9115f782fda437c", + "data_object_type": "Assembly Coverage Stats", + "url": "https://data.microbiomedata.org/data/nmdc:omprc-11-1nvcer55/nmdc:wfmgas-11-sxp33a13.1/nmdc_wfmgas-11-sxp33a13.1_covstats.txt", + "type": "nmdc:DataObject" + }, + { + "id": "nmdc:dobj-11-bm125q52", + "name": "nmdc_wfmgas-11-sxp33a13.1_assembly.agp", + "description": "Assembled AGP file for nmdc:omprc-11-1nvcer55", + "file_size_bytes": 17958193, + "md5_checksum": "689c61ee8f8684daf22e388d99195d0b", + "data_object_type": "Assembly AGP", + "url": "https://data.microbiomedata.org/data/nmdc:omprc-11-1nvcer55/nmdc:wfmgas-11-sxp33a13.1/nmdc_wfmgas-11-sxp33a13.1_assembly.agp", + "type": "nmdc:DataObject" + }, + { + "id": "nmdc:dobj-11-24pa3094", + "name": "nmdc_wfmgas-11-sxp33a13.1_pairedMapped_sorted.bam", + "description": "Metagenome Alignment BAM file for nmdc:omprc-11-1nvcer55", + "file_size_bytes": 0, + "md5_checksum": "d41d8cd98f00b204e9800998ecf8427e", + "data_object_type": "Assembly Coverage BAM", + "url": "https://data.microbiomedata.org/data/nmdc:omprc-11-1nvcer55/nmdc:wfmgas-11-sxp33a13.1/nmdc_wfmgas-11-sxp33a13.1_pairedMapped_sorted.bam", + "type": "nmdc:DataObject" + }, + { + "id": "nmdc:dobj-11-e6pwn045", + "name": "nmdc_wfrbt-11-g5xg0w61.1_gottcha2_report.tsv", + "description": "Gottcha2 TSV report for nmdc:omprc-11-1nvcer55", + "file_size_bytes": 1206, + "md5_checksum": "f4f810491708ff25956cddd005cc9944", + "data_object_type": "GOTTCHA2 Classification Report", + "url": "https://data.microbiomedata.org/data/nmdc:omprc-11-1nvcer55/nmdc:wfrbt-11-g5xg0w61.1/nmdc_wfrbt-11-g5xg0w61.1_gottcha2_report.tsv", + "type": "nmdc:DataObject" + }, + { + "id": "nmdc:dobj-11-8wn50b30", + "name": "nmdc_wfrbt-11-g5xg0w61.1_gottcha2_report_full.tsv", + "description": "Gottcha2 full TSV report for nmdc:omprc-11-1nvcer55", + "file_size_bytes": 662074, + "md5_checksum": "67e3c200d3765733af33d1db1f4bf968", + "data_object_type": "GOTTCHA2 Report Full", + "url": "https://data.microbiomedata.org/data/nmdc:omprc-11-1nvcer55/nmdc:wfrbt-11-g5xg0w61.1/nmdc_wfrbt-11-g5xg0w61.1_gottcha2_report_full.tsv", + "type": "nmdc:DataObject" + }, + { + "id": "nmdc:dobj-11-w7w79k82", + "name": "nmdc_wfrbt-11-g5xg0w61.1_gottcha2_krona.html", + "description": "Gottcha2 Krona HTML report for nmdc:omprc-11-1nvcer55", + "file_size_bytes": 229307, + "md5_checksum": "26cd6390e8362da2ee1d7691360d2dfb", + "data_object_type": "GOTTCHA2 Krona Plot", + "url": "https://data.microbiomedata.org/data/nmdc:omprc-11-1nvcer55/nmdc:wfrbt-11-g5xg0w61.1/nmdc_wfrbt-11-g5xg0w61.1_gottcha2_krona.html", + "type": "nmdc:DataObject" + }, + { + "id": "nmdc:dobj-11-y1n6sf14", + "name": "nmdc_wfrbt-11-g5xg0w61.1_centrifuge_classification.tsv", + "description": "Centrifuge classification TSV report for nmdc:omprc-11-1nvcer55", + "file_size_bytes": 1667543500, + "md5_checksum": "80fe705d97ef4a0701b1320e9ba19a82", + "data_object_type": "Centrifuge Taxonomic Classification", + "url": "https://data.microbiomedata.org/data/nmdc:omprc-11-1nvcer55/nmdc:wfrbt-11-g5xg0w61.1/nmdc_wfrbt-11-g5xg0w61.1_centrifuge_classification.tsv", + "type": "nmdc:DataObject" + }, + { + "id": "nmdc:dobj-11-f31f3v08", + "name": "nmdc_wfrbt-11-g5xg0w61.1_centrifuge_report.tsv", + "description": "Centrifuge TSV report for nmdc:omprc-11-1nvcer55", + "file_size_bytes": 253079, + "md5_checksum": "6a216ec913587e26ddc036b703126d76", + "data_object_type": "Centrifuge Classification Report", + "url": "https://data.microbiomedata.org/data/nmdc:omprc-11-1nvcer55/nmdc:wfrbt-11-g5xg0w61.1/nmdc_wfrbt-11-g5xg0w61.1_centrifuge_report.tsv", + "type": "nmdc:DataObject" + }, + { + "id": "nmdc:dobj-11-x0ggrr48", + "name": "nmdc_wfrbt-11-g5xg0w61.1_centrifuge_krona.html", + "description": "Centrifuge Krona HTML report for nmdc:omprc-11-1nvcer55", + "file_size_bytes": 2326900, + "md5_checksum": "ebed7286f886596764a66a0d1dac3e43", + "data_object_type": "Centrifuge Krona Plot", + "url": "https://data.microbiomedata.org/data/nmdc:omprc-11-1nvcer55/nmdc:wfrbt-11-g5xg0w61.1/nmdc_wfrbt-11-g5xg0w61.1_centrifuge_krona.html", + "type": "nmdc:DataObject" + }, + { + "id": "nmdc:dobj-11-j7v79m62", + "name": "nmdc_wfrbt-11-g5xg0w61.1_kraken2_classification.tsv", + "description": "Kraken classification TSV report for nmdc:omprc-11-1nvcer55", + "file_size_bytes": 1328025421, + "md5_checksum": "80dd3584d257e8f84b59118ffd0d5e21", + "data_object_type": "Kraken2 Taxonomic Classification", + "url": "https://data.microbiomedata.org/data/nmdc:omprc-11-1nvcer55/nmdc:wfrbt-11-g5xg0w61.1/nmdc_wfrbt-11-g5xg0w61.1_kraken2_classification.tsv", + "type": "nmdc:DataObject" + }, + { + "id": "nmdc:dobj-11-8a5c1c03", + "name": "nmdc_wfrbt-11-g5xg0w61.1_kraken2_report.tsv", + "description": "Kraken2 TSV report for nmdc:omprc-11-1nvcer55", + "file_size_bytes": 628969, + "md5_checksum": "61b5fe5664ca99f6354c7a5a0222678c", + "data_object_type": "Kraken2 Classification Report", + "url": "https://data.microbiomedata.org/data/nmdc:omprc-11-1nvcer55/nmdc:wfrbt-11-g5xg0w61.1/nmdc_wfrbt-11-g5xg0w61.1_kraken2_report.tsv", + "type": "nmdc:DataObject" + }, + { + "id": "nmdc:dobj-11-e1qmmw42", + "name": "nmdc_wfrbt-11-g5xg0w61.1_kraken2_krona.html", + "description": "Kraken2 Krona HTML report for nmdc:omprc-11-1nvcer55", + "file_size_bytes": 3933712, + "md5_checksum": "81108175d5ef2ca158f516bfc75d3cd9", + "data_object_type": "Kraken2 Krona Plot", + "url": "https://data.microbiomedata.org/data/nmdc:omprc-11-1nvcer55/nmdc:wfrbt-11-g5xg0w61.1/nmdc_wfrbt-11-g5xg0w61.1_kraken2_krona.html", + "type": "nmdc:DataObject" + } + ], + "metagenome_assembly_set": [ + { + "id": "nmdc:wfmgas-11-sxp33a13.1", + "name": "Metagenome Assembly Activity for nmdc:omprc-11-1nvcer55", + "started_at_time": "2021-10-11T02:23:35Z", + "ended_at_time": "2021-10-11T03:33:33+00:00", + "was_informed_by": "nmdc:omprc-11-1nvcer55", + "execution_resource": "NERSC-Cori", + "git_url": "https://github.com/microbiomedata/metaAssembly", + "has_input": [ + "nmdc:dobj-11-gjsj3m90" + ], + "has_output": [ + "nmdc:dobj-11-jgsjr510", + "nmdc:dobj-11-hd2xr638", + "nmdc:dobj-11-7t8t4a37", + "nmdc:dobj-11-bm125q52", + "nmdc:dobj-11-24pa3094" + ], + "type": "nmdc:MetagenomeAssembly", + "part_of": [ + "nmdc:omprc-11-1nvcer55" + ], + "version": "v1.0.2", + "asm_score": 3.305, + "scaffolds": 208427, + "scaf_logsum": 212917, + "scaf_powsum": 22826, + "scaf_max": 23996, + "scaf_bp": 101013301, + "scaf_n50": 59864, + "scaf_n90": 174416, + "scaf_l50": 478, + "scaf_l90": 290, + "contigs": 208553, + "contig_bp": 101011771, + "ctg_n50": 59884, + "ctg_l50": 478, + "ctg_n90": 174522, + "ctg_l90": 290, + "ctg_logsum": 212258, + "ctg_powsum": 22751, + "ctg_max": 23996, + "gap_pct": 0.00151, + "gc_std": 0.1053, + "gc_avg": 0.62056 + } + ], + "omics_processing_set": [ + { + "id": "nmdc:omprc-11-1nvcer55", + "name": "Riverbed sediment microbial communities from areas with vegetation nearby in Columbia River, Washington, USA - GW-RW S3_10_20", + "description": "Riverbed sediment samples were collected from an area with a lot of surrounding vegetation in a hyporheic zone (subsurface groundwater - surface water mixing zone)", + "has_input": [ + "nmdc:bsm-11-3sfanv57" + ], + "add_date": "2016-01-11", + "gold_sequencing_project_identifiers": [ + "gold:Gp0127629" + ], + "has_output": [ + "nmdc:dobj-11-h7t4fx79" + ], + "mod_date": "2021-06-15", + "ncbi_project_name": "Riverbed sediment microbial communities from areas with vegetation nearby in Columbia River, Washington, USA - GW-RW S3_10_20", + "omics_type": { + "has_raw_value": "Metagenome" + }, + "part_of": [ + "nmdc:sty-11-aygzgv51" + ], + "principal_investigator": { + "has_raw_value": "James Stegen" + }, + "processing_institution": "JGI", + "type": "nmdc:OmicsProcessing" + } + ], + "read_qc_analysis_activity_set": [ + { + "id": "nmdc:wfrqc-11-6sggph61.1", + "name": "Read QC Activity for nmdc:omprc-11-1nvcer55", + "started_at_time": "2021-10-11T02:23:35Z", + "ended_at_time": "2021-10-11T03:33:33+00:00", + "was_informed_by": "nmdc:omprc-11-1nvcer55", + "execution_resource": "NERSC-Cori", + "git_url": "https://github.com/microbiomedata/ReadsQC", + "has_input": [ + "nmdc:dobj-11-h7t4fx79" + ], + "has_output": [ + "nmdc:dobj-11-gjsj3m90", + "nmdc:dobj-11-skf18g29" + ], + "type": "nmdc:ReadQcAnalysisActivity", + "part_of": [ + "nmdc:omprc-11-1nvcer55" + ], + "version": "v1.0.2", + "input_read_count": 23886420, + "output_read_count": 22738452, + "input_read_bases": 3606849420, + "output_read_bases": 3395256515 + } + ], + "read_based_taxonomy_analysis_activity_set": [ + { + "id": "nmdc:wfrbt-11-g5xg0w61.1", + "name": "Readbased Taxonomy Analysis Activity for nmdc:omprc-11-1nvcer55", + "started_at_time": "2021-10-11T02:23:35Z", + "ended_at_time": "2021-10-11T03:33:33+00:00", + "was_informed_by": "nmdc:omprc-11-1nvcer55", + "execution_resource": "NERSC-Cori", + "git_url": "https://github.com/microbiomedata/ReadbasedAnalysis", + "has_input": [ + "nmdc:dobj-11-gjsj3m90" + ], + "has_output": [ + "nmdc:dobj-11-e6pwn045", + "nmdc:dobj-11-8wn50b30", + "nmdc:dobj-11-w7w79k82", + "nmdc:dobj-11-y1n6sf14", + "nmdc:dobj-11-f31f3v08", + "nmdc:dobj-11-x0ggrr48", + "nmdc:dobj-11-j7v79m62", + "nmdc:dobj-11-8a5c1c03", + "nmdc:dobj-11-e1qmmw42" + ], + "type": "nmdc:ReadBasedTaxonomyAnalysisActivity", + "part_of": [ + "nmdc:omprc-11-1nvcer55" + ], + "version": "v1.0.2" + } + ] + }, + { + "data_object_set": [ + { + "id": "nmdc:dobj-11-zc62cx90", + "name": "10533.3.165334.CTGACAC-TGTGTCA.fastq.gz", + "description": "Raw sequencer read data", + "file_size_bytes": 2825784199, + "data_object_type": "Metagenome Raw Reads", + "type": "nmdc:DataObject" + }, + { + "id": "nmdc:dobj-11-he827a33", + "name": "nmdc_wfrqc-11-bdy57e26.1_filtered.fastq.gz", + "description": "Filtered Reads for nmdc:omprc-11-b051xn44", + "file_size_bytes": 2548975208, + "md5_checksum": "f6f1760721d73fc57919b2115a1d47ec", + "data_object_type": "Filtered Sequencing Reads", + "url": "https://data.microbiomedata.org/data/nmdc:omprc-11-b051xn44/nmdc:wfrqc-11-bdy57e26.1/nmdc_wfrqc-11-bdy57e26.1_filtered.fastq.gz", + "type": "nmdc:DataObject" + }, + { + "id": "nmdc:dobj-11-0vr7yw11", + "name": "nmdc_wfrqc-11-bdy57e26.1_filterStats.txt", + "description": "Filtered Stats for nmdc:omprc-11-b051xn44", + "file_size_bytes": 291, + "md5_checksum": "2225f9d41343590d818186fa2d66852d", + "data_object_type": "QC Statistics", + "url": "https://data.microbiomedata.org/data/nmdc:omprc-11-b051xn44/nmdc:wfrqc-11-bdy57e26.1/nmdc_wfrqc-11-bdy57e26.1_filterStats.txt", + "type": "nmdc:DataObject" + }, + { + "id": "nmdc:dobj-11-mqf9qv68", + "name": "nmdc_wfmgas-11-zrkyqh64.1_contigs.fna", + "description": "Assembled contigs fasta for nmdc:omprc-11-b051xn44", + "file_size_bytes": 75224885, + "md5_checksum": "d5372d6bdb0055e5ce5a88aa38c7cc02", + "data_object_type": "Assembly Contigs", + "url": "https://data.microbiomedata.org/data/nmdc:omprc-11-b051xn44/nmdc:wfmgas-11-zrkyqh64.1/nmdc_wfmgas-11-zrkyqh64.1_contigs.fna", + "type": "nmdc:DataObject" + }, + { + "id": "nmdc:dobj-11-mwxez110", + "name": "nmdc_wfmgas-11-zrkyqh64.1_scaffolds.fna", + "description": "Assembled scaffold fasta for nmdc:omprc-11-b051xn44", + "file_size_bytes": 74749633, + "md5_checksum": "d77476b8b2c37282580fe7f505d0a244", + "data_object_type": "Assembly Scaffolds", + "url": "https://data.microbiomedata.org/data/nmdc:omprc-11-b051xn44/nmdc:wfmgas-11-zrkyqh64.1/nmdc_wfmgas-11-zrkyqh64.1_scaffolds.fna", + "type": "nmdc:DataObject" + }, + { + "id": "nmdc:dobj-11-9k1gjq34", + "name": "nmdc_wfmgas-11-zrkyqh64.1_covstats.txt", + "description": "Metagenome Contig Coverage Stats for nmdc:omprc-11-b051xn44", + "file_size_bytes": 13409273, + "md5_checksum": "36d48edf220d1fedbc1c076d6f164833", + "data_object_type": "Assembly Coverage Stats", + "url": "https://data.microbiomedata.org/data/nmdc:omprc-11-b051xn44/nmdc:wfmgas-11-zrkyqh64.1/nmdc_wfmgas-11-zrkyqh64.1_covstats.txt", + "type": "nmdc:DataObject" + }, + { + "id": "nmdc:dobj-11-58k5q708", + "name": "nmdc_wfmgas-11-zrkyqh64.1_assembly.agp", + "description": "Assembled AGP file for nmdc:omprc-11-b051xn44", + "file_size_bytes": 13531680, + "md5_checksum": "e7e761f33edca6b8c515660ce5852118", + "data_object_type": "Assembly AGP", + "url": "https://data.microbiomedata.org/data/nmdc:omprc-11-b051xn44/nmdc:wfmgas-11-zrkyqh64.1/nmdc_wfmgas-11-zrkyqh64.1_assembly.agp", + "type": "nmdc:DataObject" + }, + { + "id": "nmdc:dobj-11-w0ng9078", + "name": "nmdc_wfmgas-11-zrkyqh64.1_pairedMapped_sorted.bam", + "description": "Metagenome Alignment BAM file for nmdc:omprc-11-b051xn44", + "file_size_bytes": 0, + "md5_checksum": "d41d8cd98f00b204e9800998ecf8427e", + "data_object_type": "Assembly Coverage BAM", + "url": "https://data.microbiomedata.org/data/nmdc:omprc-11-b051xn44/nmdc:wfmgas-11-zrkyqh64.1/nmdc_wfmgas-11-zrkyqh64.1_pairedMapped_sorted.bam", + "type": "nmdc:DataObject" + }, + { + "id": "nmdc:dobj-11-dg168b12", + "name": "nmdc_wfrbt-11-5asxk887.1_gottcha2_report.tsv", + "description": "Gottcha2 TSV report for nmdc:omprc-11-b051xn44", + "file_size_bytes": 3472, + "md5_checksum": "a6ed9af48a9ad473ab66721829a5c226", + "data_object_type": "GOTTCHA2 Classification Report", + "url": "https://data.microbiomedata.org/data/nmdc:omprc-11-b051xn44/nmdc:wfrbt-11-5asxk887.1/nmdc_wfrbt-11-5asxk887.1_gottcha2_report.tsv", + "type": "nmdc:DataObject" + }, + { + "id": "nmdc:dobj-11-yegq8s07", + "name": "nmdc_wfrbt-11-5asxk887.1_gottcha2_report_full.tsv", + "description": "Gottcha2 full TSV report for nmdc:omprc-11-b051xn44", + "file_size_bytes": 863867, + "md5_checksum": "335dbf6f1055de0950988a002f432c0b", + "data_object_type": "GOTTCHA2 Report Full", + "url": "https://data.microbiomedata.org/data/nmdc:omprc-11-b051xn44/nmdc:wfrbt-11-5asxk887.1/nmdc_wfrbt-11-5asxk887.1_gottcha2_report_full.tsv", + "type": "nmdc:DataObject" + }, + { + "id": "nmdc:dobj-11-rcbfjf06", + "name": "nmdc_wfrbt-11-5asxk887.1_gottcha2_krona.html", + "description": "Gottcha2 Krona HTML report for nmdc:omprc-11-b051xn44", + "file_size_bytes": 234974, + "md5_checksum": "35da19bc0e50db1f9a02fe1550d1df0e", + "data_object_type": "GOTTCHA2 Krona Plot", + "url": "https://data.microbiomedata.org/data/nmdc:omprc-11-b051xn44/nmdc:wfrbt-11-5asxk887.1/nmdc_wfrbt-11-5asxk887.1_gottcha2_krona.html", + "type": "nmdc:DataObject" + }, + { + "id": "nmdc:dobj-11-f9nkga08", + "name": "nmdc_wfrbt-11-5asxk887.1_centrifuge_classification.tsv", + "description": "Centrifuge classification TSV report for nmdc:omprc-11-b051xn44", + "file_size_bytes": 2220789142, + "md5_checksum": "224085164a389c6f207967ed03b3e6af", + "data_object_type": "Centrifuge Taxonomic Classification", + "url": "https://data.microbiomedata.org/data/nmdc:omprc-11-b051xn44/nmdc:wfrbt-11-5asxk887.1/nmdc_wfrbt-11-5asxk887.1_centrifuge_classification.tsv", + "type": "nmdc:DataObject" + }, + { + "id": "nmdc:dobj-11-hnz0zr97", + "name": "nmdc_wfrbt-11-5asxk887.1_centrifuge_report.tsv", + "description": "Centrifuge TSV report for nmdc:omprc-11-b051xn44", + "file_size_bytes": 257030, + "md5_checksum": "39ba17263c144761a8bdcc1645c034f5", + "data_object_type": "Centrifuge Classification Report", + "url": "https://data.microbiomedata.org/data/nmdc:omprc-11-b051xn44/nmdc:wfrbt-11-5asxk887.1/nmdc_wfrbt-11-5asxk887.1_centrifuge_report.tsv", + "type": "nmdc:DataObject" + }, + { + "id": "nmdc:dobj-11-k1rpfx85", + "name": "nmdc_wfrbt-11-5asxk887.1_centrifuge_krona.html", + "description": "Centrifuge Krona HTML report for nmdc:omprc-11-b051xn44", + "file_size_bytes": 2337568, + "md5_checksum": "84debc9bd1c09328d60f073d7fc2db4f", + "data_object_type": "Centrifuge Krona Plot", + "url": "https://data.microbiomedata.org/data/nmdc:omprc-11-b051xn44/nmdc:wfrbt-11-5asxk887.1/nmdc_wfrbt-11-5asxk887.1_centrifuge_krona.html", + "type": "nmdc:DataObject" + }, + { + "id": "nmdc:dobj-11-0a0e2950", + "name": "nmdc_wfrbt-11-5asxk887.1_kraken2_classification.tsv", + "description": "Kraken classification TSV report for nmdc:omprc-11-b051xn44", + "file_size_bytes": 1776487262, + "md5_checksum": "8f75800abbcf5a94043ad677d7cb975c", + "data_object_type": "Kraken2 Taxonomic Classification", + "url": "https://data.microbiomedata.org/data/nmdc:omprc-11-b051xn44/nmdc:wfrbt-11-5asxk887.1/nmdc_wfrbt-11-5asxk887.1_kraken2_classification.tsv", + "type": "nmdc:DataObject" + }, + { + "id": "nmdc:dobj-11-3daxd587", + "name": "nmdc_wfrbt-11-5asxk887.1_kraken2_report.tsv", + "description": "Kraken2 TSV report for nmdc:omprc-11-b051xn44", + "file_size_bytes": 664011, + "md5_checksum": "aae9e961d8ed716457616c8a8841037b", + "data_object_type": "Kraken2 Classification Report", + "url": "https://data.microbiomedata.org/data/nmdc:omprc-11-b051xn44/nmdc:wfrbt-11-5asxk887.1/nmdc_wfrbt-11-5asxk887.1_kraken2_report.tsv", + "type": "nmdc:DataObject" + }, + { + "id": "nmdc:dobj-11-py945794", + "name": "nmdc_wfrbt-11-5asxk887.1_kraken2_krona.html", + "description": "Kraken2 Krona HTML report for nmdc:omprc-11-b051xn44", + "file_size_bytes": 4035375, + "md5_checksum": "ba83d6ab837403f4bcbc9400a0460457", + "data_object_type": "Kraken2 Krona Plot", + "url": "https://data.microbiomedata.org/data/nmdc:omprc-11-b051xn44/nmdc:wfrbt-11-5asxk887.1/nmdc_wfrbt-11-5asxk887.1_kraken2_krona.html", + "type": "nmdc:DataObject" + } + ], + "metagenome_assembly_set": [ + { + "id": "nmdc:wfmgas-11-zrkyqh64.1", + "name": "Metagenome Assembly Activity for nmdc:omprc-11-b051xn44", + "started_at_time": "2021-10-11T02:25:13Z", + "ended_at_time": "2021-10-11T04:45:59+00:00", + "was_informed_by": "nmdc:omprc-11-b051xn44", + "execution_resource": "NERSC-Cori", + "git_url": "https://github.com/microbiomedata/metaAssembly", + "has_input": [ + "nmdc:dobj-11-he827a33" + ], + "has_output": [ + "nmdc:dobj-11-mqf9qv68", + "nmdc:dobj-11-mwxez110", + "nmdc:dobj-11-9k1gjq34", + "nmdc:dobj-11-58k5q708", + "nmdc:dobj-11-w0ng9078" + ], + "type": "nmdc:MetagenomeAssembly", + "part_of": [ + "nmdc:omprc-11-b051xn44" + ], + "version": "v1.0.2", + "asm_score": 4.319, + "scaffolds": 157774, + "scaf_logsum": 111226, + "scaf_powsum": 12026, + "scaf_max": 45540, + "scaf_bp": 68289129, + "scaf_n50": 49230, + "scaf_n90": 135095, + "scaf_l50": 400, + "scaf_l90": 285, + "contigs": 157859, + "contig_bp": 68288279, + "ctg_n50": 49248, + "ctg_l50": 400, + "ctg_n90": 135173, + "ctg_l90": 285, + "ctg_logsum": 110768, + "ctg_powsum": 11962, + "ctg_max": 40273, + "gap_pct": 0.00124, + "gc_std": 0.10673, + "gc_avg": 0.61453 + } + ], + "omics_processing_set": [ + { + "id": "nmdc:omprc-11-b051xn44", + "name": "Riverbed sediment microbial communities from areas with vegetation nearby in Columbia River, Washington, USA - GW-RW S3_0_10", + "description": "Riverbed sediment samples were collected from an area with a lot of surrounding vegetation in a hyporheic zone (subsurface groundwater - surface water mixing zone)", + "has_input": [ + "nmdc:bsm-11-jdsasr43" + ], + "add_date": "2016-01-11", + "gold_sequencing_project_identifiers": [ + "gold:Gp0127628" + ], + "has_output": [ + "nmdc:dobj-11-zc62cx90" + ], + "mod_date": "2021-06-15", + "ncbi_project_name": "Riverbed sediment microbial communities from areas with vegetation nearby in Columbia River, Washington, USA - GW-RW S3_0_10", + "omics_type": { + "has_raw_value": "Metagenome" + }, + "part_of": [ + "nmdc:sty-11-aygzgv51" + ], + "principal_investigator": { + "has_raw_value": "James Stegen" + }, + "processing_institution": "JGI", + "type": "nmdc:OmicsProcessing" + } + ], + "read_qc_analysis_activity_set": [ + { + "id": "nmdc:wfrqc-11-bdy57e26.1", + "name": "Read QC Activity for nmdc:omprc-11-b051xn44", + "started_at_time": "2021-10-11T02:25:13Z", + "ended_at_time": "2021-10-11T04:45:59+00:00", + "was_informed_by": "nmdc:omprc-11-b051xn44", + "execution_resource": "NERSC-Cori", + "git_url": "https://github.com/microbiomedata/ReadsQC", + "has_input": [ + "nmdc:dobj-11-zc62cx90" + ], + "has_output": [ + "nmdc:dobj-11-he827a33", + "nmdc:dobj-11-0vr7yw11" + ], + "type": "nmdc:ReadQcAnalysisActivity", + "part_of": [ + "nmdc:omprc-11-b051xn44" + ], + "version": "v1.0.2", + "input_read_count": 31715882, + "output_read_count": 30212248, + "input_read_bases": 4789098182, + "output_read_bases": 4516265181 + } + ], + "read_based_taxonomy_analysis_activity_set": [ + { + "id": "nmdc:wfrbt-11-5asxk887.1", + "name": "Readbased Taxonomy Analysis Activity for nmdc:omprc-11-b051xn44", + "started_at_time": "2021-10-11T02:25:13Z", + "ended_at_time": "2021-10-11T04:45:59+00:00", + "was_informed_by": "nmdc:omprc-11-b051xn44", + "execution_resource": "NERSC-Cori", + "git_url": "https://github.com/microbiomedata/ReadbasedAnalysis", + "has_input": [ + "nmdc:dobj-11-he827a33" + ], + "has_output": [ + "nmdc:dobj-11-dg168b12", + "nmdc:dobj-11-yegq8s07", + "nmdc:dobj-11-rcbfjf06", + "nmdc:dobj-11-f9nkga08", + "nmdc:dobj-11-hnz0zr97", + "nmdc:dobj-11-k1rpfx85", + "nmdc:dobj-11-0a0e2950", + "nmdc:dobj-11-3daxd587", + "nmdc:dobj-11-py945794" + ], + "type": "nmdc:ReadBasedTaxonomyAnalysisActivity", + "part_of": [ + "nmdc:omprc-11-b051xn44" + ], + "version": "v1.0.2" + } + ] + }, + { + "data_object_set": [ + { + "id": "nmdc:dobj-11-ba1s7a39", + "name": "10533.1.165310.TCGCTGT-AACAGCG.fastq.gz", + "description": "Raw sequencer read data", + "file_size_bytes": 2291612962, + "data_object_type": "Metagenome Raw Reads", + "type": "nmdc:DataObject" + }, + { + "id": "nmdc:dobj-11-1f1x8v64", + "name": "nmdc_wfrqc-11-bxb8yc98.1_filtered.fastq.gz", + "description": "Filtered Reads for nmdc:omprc-11-k8kt2j31", + "file_size_bytes": 2030538721, + "md5_checksum": "6969fd7f4b1a5a34fb30d31b92cd6bf8", + "data_object_type": "Filtered Sequencing Reads", + "url": "https://data.microbiomedata.org/data/nmdc:omprc-11-k8kt2j31/nmdc:wfrqc-11-bxb8yc98.1/nmdc_wfrqc-11-bxb8yc98.1_filtered.fastq.gz", + "type": "nmdc:DataObject" + }, + { + "id": "nmdc:dobj-11-jgw63170", + "name": "nmdc_wfrqc-11-bxb8yc98.1_filterStats.txt", + "description": "Filtered Stats for nmdc:omprc-11-k8kt2j31", + "file_size_bytes": 284, + "md5_checksum": "b280141d234edf10cde8794539700654", + "data_object_type": "QC Statistics", + "url": "https://data.microbiomedata.org/data/nmdc:omprc-11-k8kt2j31/nmdc:wfrqc-11-bxb8yc98.1/nmdc_wfrqc-11-bxb8yc98.1_filterStats.txt", + "type": "nmdc:DataObject" + }, + { + "id": "nmdc:dobj-11-x08q6m28", + "name": "nmdc_wfmgas-11-1b4kdt83.1_contigs.fna", + "description": "Assembled contigs fasta for nmdc:omprc-11-k8kt2j31", + "file_size_bytes": 130138492, + "md5_checksum": "f54f2dfc5ac4e8fe8f3898493e3c07e4", + "data_object_type": "Assembly Contigs", + "url": "https://data.microbiomedata.org/data/nmdc:omprc-11-k8kt2j31/nmdc:wfmgas-11-1b4kdt83.1/nmdc_wfmgas-11-1b4kdt83.1_contigs.fna", + "type": "nmdc:DataObject" + }, + { + "id": "nmdc:dobj-11-kk9ket27", + "name": "nmdc_wfmgas-11-1b4kdt83.1_scaffolds.fna", + "description": "Assembled scaffold fasta for nmdc:omprc-11-k8kt2j31", + "file_size_bytes": 129421594, + "md5_checksum": "6d08fa76d188c5b89ff80316a8105900", + "data_object_type": "Assembly Scaffolds", + "url": "https://data.microbiomedata.org/data/nmdc:omprc-11-k8kt2j31/nmdc:wfmgas-11-1b4kdt83.1/nmdc_wfmgas-11-1b4kdt83.1_scaffolds.fna", + "type": "nmdc:DataObject" + }, + { + "id": "nmdc:dobj-11-eg8xz412", + "name": "nmdc_wfmgas-11-1b4kdt83.1_covstats.txt", + "description": "Metagenome Contig Coverage Stats for nmdc:omprc-11-k8kt2j31", + "file_size_bytes": 20255856, + "md5_checksum": "986b3420d2b3a84ef02e293e7aa473ef", + "data_object_type": "Assembly Coverage Stats", + "url": "https://data.microbiomedata.org/data/nmdc:omprc-11-k8kt2j31/nmdc:wfmgas-11-1b4kdt83.1/nmdc_wfmgas-11-1b4kdt83.1_covstats.txt", + "type": "nmdc:DataObject" + }, + { + "id": "nmdc:dobj-11-v42m0p94", + "name": "nmdc_wfmgas-11-1b4kdt83.1_assembly.agp", + "description": "Assembled AGP file for nmdc:omprc-11-k8kt2j31", + "file_size_bytes": 20485652, + "md5_checksum": "111c41bda8481474414f54d6ead9c5eb", + "data_object_type": "Assembly AGP", + "url": "https://data.microbiomedata.org/data/nmdc:omprc-11-k8kt2j31/nmdc:wfmgas-11-1b4kdt83.1/nmdc_wfmgas-11-1b4kdt83.1_assembly.agp", + "type": "nmdc:DataObject" + }, + { + "id": "nmdc:dobj-11-re9pkh81", + "name": "nmdc_wfmgas-11-1b4kdt83.1_pairedMapped_sorted.bam", + "description": "Metagenome Alignment BAM file for nmdc:omprc-11-k8kt2j31", + "file_size_bytes": 0, + "md5_checksum": "d41d8cd98f00b204e9800998ecf8427e", + "data_object_type": "Assembly Coverage BAM", + "url": "https://data.microbiomedata.org/data/nmdc:omprc-11-k8kt2j31/nmdc:wfmgas-11-1b4kdt83.1/nmdc_wfmgas-11-1b4kdt83.1_pairedMapped_sorted.bam", + "type": "nmdc:DataObject" + }, + { + "id": "nmdc:dobj-11-zm381v68", + "name": "nmdc_wfrbt-11-2zbsse56.1_gottcha2_report.tsv", + "description": "Gottcha2 TSV report for nmdc:omprc-11-k8kt2j31", + "file_size_bytes": 1227, + "md5_checksum": "b78e8246144185beb95c0caf65ef1f1a", + "data_object_type": "GOTTCHA2 Classification Report", + "url": "https://data.microbiomedata.org/data/nmdc:omprc-11-k8kt2j31/nmdc:wfrbt-11-2zbsse56.1/nmdc_wfrbt-11-2zbsse56.1_gottcha2_report.tsv", + "type": "nmdc:DataObject" + }, + { + "id": "nmdc:dobj-11-gb3qqz48", + "name": "nmdc_wfrbt-11-2zbsse56.1_gottcha2_report_full.tsv", + "description": "Gottcha2 full TSV report for nmdc:omprc-11-k8kt2j31", + "file_size_bytes": 647196, + "md5_checksum": "8875c6ce19e13ed9a88447f2f78bb049", + "data_object_type": "GOTTCHA2 Report Full", + "url": "https://data.microbiomedata.org/data/nmdc:omprc-11-k8kt2j31/nmdc:wfrbt-11-2zbsse56.1/nmdc_wfrbt-11-2zbsse56.1_gottcha2_report_full.tsv", + "type": "nmdc:DataObject" + }, + { + "id": "nmdc:dobj-11-txwstg82", + "name": "nmdc_wfrbt-11-2zbsse56.1_gottcha2_krona.html", + "description": "Gottcha2 Krona HTML report for nmdc:omprc-11-k8kt2j31", + "file_size_bytes": 229312, + "md5_checksum": "3b0aee019c772a695bf4cc8f4a390f4e", + "data_object_type": "GOTTCHA2 Krona Plot", + "url": "https://data.microbiomedata.org/data/nmdc:omprc-11-k8kt2j31/nmdc:wfrbt-11-2zbsse56.1/nmdc_wfrbt-11-2zbsse56.1_gottcha2_krona.html", + "type": "nmdc:DataObject" + }, + { + "id": "nmdc:dobj-11-9zt4dp06", + "name": "nmdc_wfrbt-11-2zbsse56.1_centrifuge_classification.tsv", + "description": "Centrifuge classification TSV report for nmdc:omprc-11-k8kt2j31", + "file_size_bytes": 1861431092, + "md5_checksum": "0d1729a83798b752f33eeb8d97afe972", + "data_object_type": "Centrifuge Taxonomic Classification", + "url": "https://data.microbiomedata.org/data/nmdc:omprc-11-k8kt2j31/nmdc:wfrbt-11-2zbsse56.1/nmdc_wfrbt-11-2zbsse56.1_centrifuge_classification.tsv", + "type": "nmdc:DataObject" + }, + { + "id": "nmdc:dobj-11-qkhpg368", + "name": "nmdc_wfrbt-11-2zbsse56.1_centrifuge_report.tsv", + "description": "Centrifuge TSV report for nmdc:omprc-11-k8kt2j31", + "file_size_bytes": 254665, + "md5_checksum": "77561a0de3bb8aae04d110429fd9ad0c", + "data_object_type": "Centrifuge Classification Report", + "url": "https://data.microbiomedata.org/data/nmdc:omprc-11-k8kt2j31/nmdc:wfrbt-11-2zbsse56.1/nmdc_wfrbt-11-2zbsse56.1_centrifuge_report.tsv", + "type": "nmdc:DataObject" + }, + { + "id": "nmdc:dobj-11-8c3p5z29", + "name": "nmdc_wfrbt-11-2zbsse56.1_centrifuge_krona.html", + "description": "Centrifuge Krona HTML report for nmdc:omprc-11-k8kt2j31", + "file_size_bytes": 2334578, + "md5_checksum": "ea27c005b1788434c2198ad60939d4bc", + "data_object_type": "Centrifuge Krona Plot", + "url": "https://data.microbiomedata.org/data/nmdc:omprc-11-k8kt2j31/nmdc:wfrbt-11-2zbsse56.1/nmdc_wfrbt-11-2zbsse56.1_centrifuge_krona.html", + "type": "nmdc:DataObject" + }, + { + "id": "nmdc:dobj-11-ehy5z232", + "name": "nmdc_wfrbt-11-2zbsse56.1_kraken2_classification.tsv", + "description": "Kraken classification TSV report for nmdc:omprc-11-k8kt2j31", + "file_size_bytes": 1483354621, + "md5_checksum": "6a46583da876b9d6287302308df0b9fd", + "data_object_type": "Kraken2 Taxonomic Classification", + "url": "https://data.microbiomedata.org/data/nmdc:omprc-11-k8kt2j31/nmdc:wfrbt-11-2zbsse56.1/nmdc_wfrbt-11-2zbsse56.1_kraken2_classification.tsv", + "type": "nmdc:DataObject" + }, + { + "id": "nmdc:dobj-11-8ma95v53", + "name": "nmdc_wfrbt-11-2zbsse56.1_kraken2_report.tsv", + "description": "Kraken2 TSV report for nmdc:omprc-11-k8kt2j31", + "file_size_bytes": 640329, + "md5_checksum": "af619dc5a0423509a4beaca26aa61000", + "data_object_type": "Kraken2 Classification Report", + "url": "https://data.microbiomedata.org/data/nmdc:omprc-11-k8kt2j31/nmdc:wfrbt-11-2zbsse56.1/nmdc_wfrbt-11-2zbsse56.1_kraken2_report.tsv", + "type": "nmdc:DataObject" + }, + { + "id": "nmdc:dobj-11-8tftj659", + "name": "nmdc_wfrbt-11-2zbsse56.1_kraken2_krona.html", + "description": "Kraken2 Krona HTML report for nmdc:omprc-11-k8kt2j31", + "file_size_bytes": 3993246, + "md5_checksum": "50093825ec73dcabe66aa353de766beb", + "data_object_type": "Kraken2 Krona Plot", + "url": "https://data.microbiomedata.org/data/nmdc:omprc-11-k8kt2j31/nmdc:wfrbt-11-2zbsse56.1/nmdc_wfrbt-11-2zbsse56.1_kraken2_krona.html", + "type": "nmdc:DataObject" + } + ], + "metagenome_assembly_set": [ + { + "id": "nmdc:wfmgas-11-1b4kdt83.1", + "name": "Metagenome Assembly Activity for nmdc:omprc-11-k8kt2j31", + "started_at_time": "2021-10-11T02:26:22Z", + "ended_at_time": "2021-10-11T04:40:31+00:00", + "was_informed_by": "nmdc:omprc-11-k8kt2j31", + "execution_resource": "NERSC-Cori", + "git_url": "https://github.com/microbiomedata/metaAssembly", + "has_input": [ + "nmdc:dobj-11-1f1x8v64" + ], + "has_output": [ + "nmdc:dobj-11-x08q6m28", + "nmdc:dobj-11-kk9ket27", + "nmdc:dobj-11-eg8xz412", + "nmdc:dobj-11-v42m0p94", + "nmdc:dobj-11-re9pkh81" + ], + "type": "nmdc:MetagenomeAssembly", + "part_of": [ + "nmdc:omprc-11-k8kt2j31" + ], + "version": "v1.0.2", + "asm_score": 3.117, + "scaffolds": 237183, + "scaf_logsum": 307525, + "scaf_powsum": 33057, + "scaf_max": 14244, + "scaf_bp": 119369963, + "scaf_n50": 64017, + "scaf_n90": 195424, + "scaf_l50": 500, + "scaf_l90": 292, + "contigs": 237399, + "contig_bp": 119367623, + "ctg_n50": 64310, + "ctg_l50": 499, + "ctg_n90": 195626, + "ctg_l90": 292, + "ctg_logsum": 306128, + "ctg_powsum": 32898, + "ctg_max": 14244, + "gap_pct": 0.00196, + "gc_std": 0.09594, + "gc_avg": 0.62364 + } + ], + "omics_processing_set": [ + { + "id": "nmdc:omprc-11-k8kt2j31", + "name": "Riverbed sediment microbial communities from areas with vegetation nearby in Columbia River, Washington, USA - GW-RW S1_20_30", + "description": "Riverbed sediment samples were collected from an area with a lot of surrounding vegetation in a hyporheic zone (subsurface groundwater - surface water mixing zone)", + "has_input": [ + "nmdc:bsm-11-4vqhvw07" + ], + "add_date": "2016-01-11", + "gold_sequencing_project_identifiers": [ + "gold:Gp0127631" + ], + "has_output": [ + "nmdc:dobj-11-ba1s7a39" + ], + "mod_date": "2021-06-15", + "ncbi_project_name": "Riverbed sediment microbial communities from areas with vegetation nearby in Columbia River, Washington, USA - GW-RW S1_20_30", + "omics_type": { + "has_raw_value": "Metagenome" + }, + "part_of": [ + "nmdc:sty-11-aygzgv51" + ], + "principal_investigator": { + "has_raw_value": "James Stegen" + }, + "processing_institution": "JGI", + "type": "nmdc:OmicsProcessing" + } + ], + "read_qc_analysis_activity_set": [ + { + "id": "nmdc:wfrqc-11-bxb8yc98.1", + "name": "Read QC Activity for nmdc:omprc-11-k8kt2j31", + "started_at_time": "2021-10-11T02:26:22Z", + "ended_at_time": "2021-10-11T04:40:31+00:00", + "was_informed_by": "nmdc:omprc-11-k8kt2j31", + "execution_resource": "NERSC-Cori", + "git_url": "https://github.com/microbiomedata/ReadsQC", + "has_input": [ + "nmdc:dobj-11-ba1s7a39" + ], + "has_output": [ + "nmdc:dobj-11-1f1x8v64", + "nmdc:dobj-11-jgw63170" + ], + "type": "nmdc:ReadQcAnalysisActivity", + "part_of": [ + "nmdc:omprc-11-k8kt2j31" + ], + "version": "v1.0.2", + "input_read_count": 26419652, + "output_read_count": 25434840, + "input_read_bases": 3989367452, + "output_read_bases": 3798930297 + } + ], + "read_based_taxonomy_analysis_activity_set": [ + { + "id": "nmdc:wfrbt-11-2zbsse56.1", + "name": "Readbased Taxonomy Analysis Activity for nmdc:omprc-11-k8kt2j31", + "started_at_time": "2021-10-11T02:26:22Z", + "ended_at_time": "2021-10-11T04:40:31+00:00", + "was_informed_by": "nmdc:omprc-11-k8kt2j31", + "execution_resource": "NERSC-Cori", + "git_url": "https://github.com/microbiomedata/ReadbasedAnalysis", + "has_input": [ + "nmdc:dobj-11-1f1x8v64" + ], + "has_output": [ + "nmdc:dobj-11-zm381v68", + "nmdc:dobj-11-gb3qqz48", + "nmdc:dobj-11-txwstg82", + "nmdc:dobj-11-9zt4dp06", + "nmdc:dobj-11-qkhpg368", + "nmdc:dobj-11-8c3p5z29", + "nmdc:dobj-11-ehy5z232", + "nmdc:dobj-11-8ma95v53", + "nmdc:dobj-11-8tftj659" + ], + "type": "nmdc:ReadBasedTaxonomyAnalysisActivity", + "part_of": [ + "nmdc:omprc-11-k8kt2j31" + ], + "version": "v1.0.2" + } + ] + }, + { + "data_object_set": [ + { + "id": "nmdc:dobj-11-s1m79s13", + "name": "10533.2.165322.TGTACAC-GGTGTAC.fastq.gz", + "description": "Raw sequencer read data", + "file_size_bytes": 2557650099, + "data_object_type": "Metagenome Raw Reads", + "type": "nmdc:DataObject" + }, + { + "id": "nmdc:dobj-11-0z9p6f12", + "name": "nmdc_wfrqc-11-s5bg8f63.1_filtered.fastq.gz", + "description": "Filtered Reads for nmdc:omprc-11-9pbab972", + "file_size_bytes": 2294158265, + "md5_checksum": "eaffb16b5247d85c08f8af73bcb8b65e", + "data_object_type": "Filtered Sequencing Reads", + "url": "https://data.microbiomedata.org/data/nmdc:omprc-11-9pbab972/nmdc:wfrqc-11-s5bg8f63.1/nmdc_wfrqc-11-s5bg8f63.1_filtered.fastq.gz", + "type": "nmdc:DataObject" + }, + { + "id": "nmdc:dobj-11-6901eg67", + "name": "nmdc_wfrqc-11-s5bg8f63.1_filterStats.txt", + "description": "Filtered Stats for nmdc:omprc-11-9pbab972", + "file_size_bytes": 288, + "md5_checksum": "088fd18cb9169097e739289d2e5ebb13", + "data_object_type": "QC Statistics", + "url": "https://data.microbiomedata.org/data/nmdc:omprc-11-9pbab972/nmdc:wfrqc-11-s5bg8f63.1/nmdc_wfrqc-11-s5bg8f63.1_filterStats.txt", + "type": "nmdc:DataObject" + }, + { + "id": "nmdc:dobj-11-4m54k858", + "name": "nmdc_wfmgas-11-8jw76g61.1_contigs.fna", + "description": "Assembled contigs fasta for nmdc:omprc-11-9pbab972", + "file_size_bytes": 58275358, + "md5_checksum": "5e7926d9668b9914a7260a8e21a7ca04", + "data_object_type": "Assembly Contigs", + "url": "https://data.microbiomedata.org/data/nmdc:omprc-11-9pbab972/nmdc:wfmgas-11-8jw76g61.1/nmdc_wfmgas-11-8jw76g61.1_contigs.fna", + "type": "nmdc:DataObject" + }, + { + "id": "nmdc:dobj-11-mb906j70", + "name": "nmdc_wfmgas-11-8jw76g61.1_scaffolds.fna", + "description": "Assembled scaffold fasta for nmdc:omprc-11-9pbab972", + "file_size_bytes": 57892322, + "md5_checksum": "ece6e2c7f1b99422470c75e2e54a1426", + "data_object_type": "Assembly Scaffolds", + "url": "https://data.microbiomedata.org/data/nmdc:omprc-11-9pbab972/nmdc:wfmgas-11-8jw76g61.1/nmdc_wfmgas-11-8jw76g61.1_scaffolds.fna", + "type": "nmdc:DataObject" + }, + { + "id": "nmdc:dobj-11-hcknt738", + "name": "nmdc_wfmgas-11-8jw76g61.1_covstats.txt", + "description": "Metagenome Contig Coverage Stats for nmdc:omprc-11-9pbab972", + "file_size_bytes": 10784007, + "md5_checksum": "fb1da11538698ff59c3994b539c3b914", + "data_object_type": "Assembly Coverage Stats", + "url": "https://data.microbiomedata.org/data/nmdc:omprc-11-9pbab972/nmdc:wfmgas-11-8jw76g61.1/nmdc_wfmgas-11-8jw76g61.1_covstats.txt", + "type": "nmdc:DataObject" + }, + { + "id": "nmdc:dobj-11-m64zdv86", + "name": "nmdc_wfmgas-11-8jw76g61.1_assembly.agp", + "description": "Assembled AGP file for nmdc:omprc-11-9pbab972", + "file_size_bytes": 10866115, + "md5_checksum": "f3991a097a1850e27ea2730c3916d202", + "data_object_type": "Assembly AGP", + "url": "https://data.microbiomedata.org/data/nmdc:omprc-11-9pbab972/nmdc:wfmgas-11-8jw76g61.1/nmdc_wfmgas-11-8jw76g61.1_assembly.agp", + "type": "nmdc:DataObject" + }, + { + "id": "nmdc:dobj-11-9nxa4791", + "name": "nmdc_wfmgas-11-8jw76g61.1_pairedMapped_sorted.bam", + "description": "Metagenome Alignment BAM file for nmdc:omprc-11-9pbab972", + "file_size_bytes": 0, + "md5_checksum": "d41d8cd98f00b204e9800998ecf8427e", + "data_object_type": "Assembly Coverage BAM", + "url": "https://data.microbiomedata.org/data/nmdc:omprc-11-9pbab972/nmdc:wfmgas-11-8jw76g61.1/nmdc_wfmgas-11-8jw76g61.1_pairedMapped_sorted.bam", + "type": "nmdc:DataObject" + }, + { + "id": "nmdc:dobj-11-6j01mf11", + "name": "nmdc_wfrbt-11-q8ezyz31.1_gottcha2_report.tsv", + "description": "Gottcha2 TSV report for nmdc:omprc-11-9pbab972", + "file_size_bytes": 3373, + "md5_checksum": "ad8aa7d317d86bcd1b33e6e68a917198", + "data_object_type": "GOTTCHA2 Classification Report", + "url": "https://data.microbiomedata.org/data/nmdc:omprc-11-9pbab972/nmdc:wfrbt-11-q8ezyz31.1/nmdc_wfrbt-11-q8ezyz31.1_gottcha2_report.tsv", + "type": "nmdc:DataObject" + }, + { + "id": "nmdc:dobj-11-7r14qr35", + "name": "nmdc_wfrbt-11-q8ezyz31.1_gottcha2_report_full.tsv", + "description": "Gottcha2 full TSV report for nmdc:omprc-11-9pbab972", + "file_size_bytes": 791488, + "md5_checksum": "e5f1da9ed5be2adcd65763d387387c9f", + "data_object_type": "GOTTCHA2 Report Full", + "url": "https://data.microbiomedata.org/data/nmdc:omprc-11-9pbab972/nmdc:wfrbt-11-q8ezyz31.1/nmdc_wfrbt-11-q8ezyz31.1_gottcha2_report_full.tsv", + "type": "nmdc:DataObject" + }, + { + "id": "nmdc:dobj-11-e3328653", + "name": "nmdc_wfrbt-11-q8ezyz31.1_gottcha2_krona.html", + "description": "Gottcha2 Krona HTML report for nmdc:omprc-11-9pbab972", + "file_size_bytes": 235803, + "md5_checksum": "db82b41936f37bbbeaa027ffc25b58cd", + "data_object_type": "GOTTCHA2 Krona Plot", + "url": "https://data.microbiomedata.org/data/nmdc:omprc-11-9pbab972/nmdc:wfrbt-11-q8ezyz31.1/nmdc_wfrbt-11-q8ezyz31.1_gottcha2_krona.html", + "type": "nmdc:DataObject" + }, + { + "id": "nmdc:dobj-11-q2h7ck36", + "name": "nmdc_wfrbt-11-q8ezyz31.1_centrifuge_classification.tsv", + "description": "Centrifuge classification TSV report for nmdc:omprc-11-9pbab972", + "file_size_bytes": 1974171566, + "md5_checksum": "2f21fd19f055d1931ab82016ed781a12", + "data_object_type": "Centrifuge Taxonomic Classification", + "url": "https://data.microbiomedata.org/data/nmdc:omprc-11-9pbab972/nmdc:wfrbt-11-q8ezyz31.1/nmdc_wfrbt-11-q8ezyz31.1_centrifuge_classification.tsv", + "type": "nmdc:DataObject" + }, + { + "id": "nmdc:dobj-11-5748yz23", + "name": "nmdc_wfrbt-11-q8ezyz31.1_centrifuge_report.tsv", + "description": "Centrifuge TSV report for nmdc:omprc-11-9pbab972", + "file_size_bytes": 255012, + "md5_checksum": "890f494d1dd5e130d6c1688e78f27ff2", + "data_object_type": "Centrifuge Classification Report", + "url": "https://data.microbiomedata.org/data/nmdc:omprc-11-9pbab972/nmdc:wfrbt-11-q8ezyz31.1/nmdc_wfrbt-11-q8ezyz31.1_centrifuge_report.tsv", + "type": "nmdc:DataObject" + }, + { + "id": "nmdc:dobj-11-njr2yx39", + "name": "nmdc_wfrbt-11-q8ezyz31.1_centrifuge_krona.html", + "description": "Centrifuge Krona HTML report for nmdc:omprc-11-9pbab972", + "file_size_bytes": 2330430, + "md5_checksum": "813232a3034ddb9a05efc2f2e9b78cce", + "data_object_type": "Centrifuge Krona Plot", + "url": "https://data.microbiomedata.org/data/nmdc:omprc-11-9pbab972/nmdc:wfrbt-11-q8ezyz31.1/nmdc_wfrbt-11-q8ezyz31.1_centrifuge_krona.html", + "type": "nmdc:DataObject" + }, + { + "id": "nmdc:dobj-11-7tykzg40", + "name": "nmdc_wfrbt-11-q8ezyz31.1_kraken2_classification.tsv", + "description": "Kraken classification TSV report for nmdc:omprc-11-9pbab972", + "file_size_bytes": 1584744477, + "md5_checksum": "ef490241b537bb4c19bd5548cd7b7f6b", + "data_object_type": "Kraken2 Taxonomic Classification", + "url": "https://data.microbiomedata.org/data/nmdc:omprc-11-9pbab972/nmdc:wfrbt-11-q8ezyz31.1/nmdc_wfrbt-11-q8ezyz31.1_kraken2_classification.tsv", + "type": "nmdc:DataObject" + }, + { + "id": "nmdc:dobj-11-ckefd980", + "name": "nmdc_wfrbt-11-q8ezyz31.1_kraken2_report.tsv", + "description": "Kraken2 TSV report for nmdc:omprc-11-9pbab972", + "file_size_bytes": 650172, + "md5_checksum": "6a7de24b01ad1c63ba6edb758e25af40", + "data_object_type": "Kraken2 Classification Report", + "url": "https://data.microbiomedata.org/data/nmdc:omprc-11-9pbab972/nmdc:wfrbt-11-q8ezyz31.1/nmdc_wfrbt-11-q8ezyz31.1_kraken2_report.tsv", + "type": "nmdc:DataObject" + }, + { + "id": "nmdc:dobj-11-e0361828", + "name": "nmdc_wfrbt-11-q8ezyz31.1_kraken2_krona.html", + "description": "Kraken2 Krona HTML report for nmdc:omprc-11-9pbab972", + "file_size_bytes": 3962195, + "md5_checksum": "fc8a855916eb1ba0f7d278b7c1f1786f", + "data_object_type": "Kraken2 Krona Plot", + "url": "https://data.microbiomedata.org/data/nmdc:omprc-11-9pbab972/nmdc:wfrbt-11-q8ezyz31.1/nmdc_wfrbt-11-q8ezyz31.1_kraken2_krona.html", + "type": "nmdc:DataObject" + } + ], + "metagenome_assembly_set": [ + { + "id": "nmdc:wfmgas-11-8jw76g61.1", + "name": "Metagenome Assembly Activity for nmdc:omprc-11-9pbab972", + "started_at_time": "2021-10-11T02:26:53Z", + "ended_at_time": "2021-10-11T04:54:22+00:00", + "was_informed_by": "nmdc:omprc-11-9pbab972", + "execution_resource": "NERSC-Cori", + "git_url": "https://github.com/microbiomedata/metaAssembly", + "has_input": [ + "nmdc:dobj-11-0z9p6f12" + ], + "has_output": [ + "nmdc:dobj-11-4m54k858", + "nmdc:dobj-11-mb906j70", + "nmdc:dobj-11-hcknt738", + "nmdc:dobj-11-m64zdv86", + "nmdc:dobj-11-9nxa4791" + ], + "type": "nmdc:MetagenomeAssembly", + "part_of": [ + "nmdc:omprc-11-9pbab972" + ], + "version": "v1.0.2", + "asm_score": 6.312, + "scaffolds": 127272, + "scaf_logsum": 77428, + "scaf_powsum": 8795.268, + "scaf_max": 31136, + "scaf_bp": 52741482, + "scaf_n50": 41856, + "scaf_n90": 110834, + "scaf_l50": 372, + "scaf_l90": 284, + "contigs": 127321, + "contig_bp": 52740992, + "ctg_n50": 41888, + "ctg_l50": 372, + "ctg_n90": 110882, + "ctg_l90": 284, + "ctg_logsum": 77070, + "ctg_powsum": 8755.579, + "ctg_max": 31136, + "gap_pct": 0.00093, + "gc_std": 0.09346, + "gc_avg": 0.61288 + } + ], + "omics_processing_set": [ + { + "id": "nmdc:omprc-11-9pbab972", + "name": "Riverbed sediment microbial communities from areas with no vegetation in Columbia River, Washington, USA - GW-RW N3_30_40", + "description": "Riverbed sediment samples were collected from an area with no vegetation in a hyporheic zone (subsurface groundwater - surface water mixing zone)", + "has_input": [ + "nmdc:bsm-11-3yjh4z33" + ], + "add_date": "2016-01-11", + "gold_sequencing_project_identifiers": [ + "gold:Gp0127630" + ], + "has_output": [ + "nmdc:dobj-11-s1m79s13" + ], + "mod_date": "2021-06-15", + "ncbi_project_name": "Riverbed sediment microbial communities from areas with no vegetation in Columbia River, Washington, USA - GW-RW N3_30_40", + "omics_type": { + "has_raw_value": "Metagenome" + }, + "part_of": [ + "nmdc:sty-11-aygzgv51" + ], + "principal_investigator": { + "has_raw_value": "James Stegen" + }, + "processing_institution": "JGI", + "type": "nmdc:OmicsProcessing" + } + ], + "read_qc_analysis_activity_set": [ + { + "id": "nmdc:wfrqc-11-s5bg8f63.1", + "name": "Read QC Activity for nmdc:omprc-11-9pbab972", + "started_at_time": "2021-10-11T02:26:53Z", + "ended_at_time": "2021-10-11T04:54:22+00:00", + "was_informed_by": "nmdc:omprc-11-9pbab972", + "execution_resource": "NERSC-Cori", + "git_url": "https://github.com/microbiomedata/ReadsQC", + "has_input": [ + "nmdc:dobj-11-s1m79s13" + ], + "has_output": [ + "nmdc:dobj-11-0z9p6f12", + "nmdc:dobj-11-6901eg67" + ], + "type": "nmdc:ReadQcAnalysisActivity", + "part_of": [ + "nmdc:omprc-11-9pbab972" + ], + "version": "v1.0.2", + "input_read_count": 28569382, + "output_read_count": 26868700, + "input_read_bases": 4313976682, + "output_read_bases": 4016672570 + } + ], + "read_based_taxonomy_analysis_activity_set": [ + { + "id": "nmdc:wfrbt-11-q8ezyz31.1", + "name": "Readbased Taxonomy Analysis Activity for nmdc:omprc-11-9pbab972", + "started_at_time": "2021-10-11T02:26:53Z", + "ended_at_time": "2021-10-11T04:54:22+00:00", + "was_informed_by": "nmdc:omprc-11-9pbab972", + "execution_resource": "NERSC-Cori", + "git_url": "https://github.com/microbiomedata/ReadbasedAnalysis", + "has_input": [ + "nmdc:dobj-11-0z9p6f12" + ], + "has_output": [ + "nmdc:dobj-11-6j01mf11", + "nmdc:dobj-11-7r14qr35", + "nmdc:dobj-11-e3328653", + "nmdc:dobj-11-q2h7ck36", + "nmdc:dobj-11-5748yz23", + "nmdc:dobj-11-njr2yx39", + "nmdc:dobj-11-7tykzg40", + "nmdc:dobj-11-ckefd980", + "nmdc:dobj-11-e0361828" + ], + "type": "nmdc:ReadBasedTaxonomyAnalysisActivity", + "part_of": [ + "nmdc:omprc-11-9pbab972" + ], + "version": "v1.0.2" + } + ] + }, + { + "data_object_set": [ + { + "id": "nmdc:dobj-11-br8p6y08", + "name": "10533.1.165310.GGACTGT-AACAGTC.fastq.gz", + "description": "Raw sequencer read data", + "file_size_bytes": 2003194973, + "data_object_type": "Metagenome Raw Reads", + "type": "nmdc:DataObject" + }, + { + "id": "nmdc:dobj-11-g8cwm256", + "name": "nmdc_wfrqc-11-mkgjde26.1_filtered.fastq.gz", + "description": "Filtered Reads for nmdc:omprc-11-0g415160", + "file_size_bytes": 1727224362, + "md5_checksum": "7cbd497624d8b60ab2a5e7fdbe4730f2", + "data_object_type": "Filtered Sequencing Reads", + "url": "https://data.microbiomedata.org/data/nmdc:omprc-11-0g415160/nmdc:wfrqc-11-mkgjde26.1/nmdc_wfrqc-11-mkgjde26.1_filtered.fastq.gz", + "type": "nmdc:DataObject" + }, + { + "id": "nmdc:dobj-11-z7zj5m25", + "name": "nmdc_wfrqc-11-mkgjde26.1_filterStats.txt", + "description": "Filtered Stats for nmdc:omprc-11-0g415160", + "file_size_bytes": 280, + "md5_checksum": "eccf0501d08f920a88b6598d573a8e3e", + "data_object_type": "QC Statistics", + "url": "https://data.microbiomedata.org/data/nmdc:omprc-11-0g415160/nmdc:wfrqc-11-mkgjde26.1/nmdc_wfrqc-11-mkgjde26.1_filterStats.txt", + "type": "nmdc:DataObject" + }, + { + "id": "nmdc:dobj-11-08r4vg96", + "name": "nmdc_wfmgas-11-txcpae82.1_contigs.fna", + "description": "Assembled contigs fasta for nmdc:omprc-11-0g415160", + "file_size_bytes": 154451818, + "md5_checksum": "3757c0a9cb68fb1593c7abef962dc402", + "data_object_type": "Assembly Contigs", + "url": "https://data.microbiomedata.org/data/nmdc:omprc-11-0g415160/nmdc:wfmgas-11-txcpae82.1/nmdc_wfmgas-11-txcpae82.1_contigs.fna", + "type": "nmdc:DataObject" + }, + { + "id": "nmdc:dobj-11-4zb2ap69", + "name": "nmdc_wfmgas-11-txcpae82.1_scaffolds.fna", + "description": "Assembled scaffold fasta for nmdc:omprc-11-0g415160", + "file_size_bytes": 153629204, + "md5_checksum": "7cab98ae02849de9dd18c016a65c65c3", + "data_object_type": "Assembly Scaffolds", + "url": "https://data.microbiomedata.org/data/nmdc:omprc-11-0g415160/nmdc:wfmgas-11-txcpae82.1/nmdc_wfmgas-11-txcpae82.1_scaffolds.fna", + "type": "nmdc:DataObject" + }, + { + "id": "nmdc:dobj-11-r186zy13", + "name": "nmdc_wfmgas-11-txcpae82.1_covstats.txt", + "description": "Metagenome Contig Coverage Stats for nmdc:omprc-11-0g415160", + "file_size_bytes": 23315444, + "md5_checksum": "1727dc775a516d82c0c079dbcf988fe9", + "data_object_type": "Assembly Coverage Stats", + "url": "https://data.microbiomedata.org/data/nmdc:omprc-11-0g415160/nmdc:wfmgas-11-txcpae82.1/nmdc_wfmgas-11-txcpae82.1_covstats.txt", + "type": "nmdc:DataObject" + }, + { + "id": "nmdc:dobj-11-9nd0m981", + "name": "nmdc_wfmgas-11-txcpae82.1_assembly.agp", + "description": "Assembled AGP file for nmdc:omprc-11-0g415160", + "file_size_bytes": 23581607, + "md5_checksum": "25a336e606b336122b5cf57069ef170b", + "data_object_type": "Assembly AGP", + "url": "https://data.microbiomedata.org/data/nmdc:omprc-11-0g415160/nmdc:wfmgas-11-txcpae82.1/nmdc_wfmgas-11-txcpae82.1_assembly.agp", + "type": "nmdc:DataObject" + }, + { + "id": "nmdc:dobj-11-p5adkn82", + "name": "nmdc_wfmgas-11-txcpae82.1_pairedMapped_sorted.bam", + "description": "Metagenome Alignment BAM file for nmdc:omprc-11-0g415160", + "file_size_bytes": 0, + "md5_checksum": "d41d8cd98f00b204e9800998ecf8427e", + "data_object_type": "Assembly Coverage BAM", + "url": "https://data.microbiomedata.org/data/nmdc:omprc-11-0g415160/nmdc:wfmgas-11-txcpae82.1/nmdc_wfmgas-11-txcpae82.1_pairedMapped_sorted.bam", + "type": "nmdc:DataObject" + }, + { + "id": "nmdc:dobj-11-j50e0z83", + "name": "nmdc_wfrbt-11-06ttdb72.1_gottcha2_report.tsv", + "description": "Gottcha2 TSV report for nmdc:omprc-11-0g415160", + "file_size_bytes": 875, + "md5_checksum": "8bd9eb762acabbac5d079c379c28e381", + "data_object_type": "GOTTCHA2 Classification Report", + "url": "https://data.microbiomedata.org/data/nmdc:omprc-11-0g415160/nmdc:wfrbt-11-06ttdb72.1/nmdc_wfrbt-11-06ttdb72.1_gottcha2_report.tsv", + "type": "nmdc:DataObject" + }, + { + "id": "nmdc:dobj-11-rn1byq37", + "name": "nmdc_wfrbt-11-06ttdb72.1_gottcha2_report_full.tsv", + "description": "Gottcha2 full TSV report for nmdc:omprc-11-0g415160", + "file_size_bytes": 578856, + "md5_checksum": "77351dd18ca40e5552ac1380ba94acbf", + "data_object_type": "GOTTCHA2 Report Full", + "url": "https://data.microbiomedata.org/data/nmdc:omprc-11-0g415160/nmdc:wfrbt-11-06ttdb72.1/nmdc_wfrbt-11-06ttdb72.1_gottcha2_report_full.tsv", + "type": "nmdc:DataObject" + }, + { + "id": "nmdc:dobj-11-zndhcs22", + "name": "nmdc_wfrbt-11-06ttdb72.1_gottcha2_krona.html", + "description": "Gottcha2 Krona HTML report for nmdc:omprc-11-0g415160", + "file_size_bytes": 228067, + "md5_checksum": "f445af1a7774572d156f55a898d26f09", + "data_object_type": "GOTTCHA2 Krona Plot", + "url": "https://data.microbiomedata.org/data/nmdc:omprc-11-0g415160/nmdc:wfrbt-11-06ttdb72.1/nmdc_wfrbt-11-06ttdb72.1_gottcha2_krona.html", + "type": "nmdc:DataObject" + }, + { + "id": "nmdc:dobj-11-0jhp9b14", + "name": "nmdc_wfrbt-11-06ttdb72.1_centrifuge_classification.tsv", + "description": "Centrifuge classification TSV report for nmdc:omprc-11-0g415160", + "file_size_bytes": 1646942155, + "md5_checksum": "e11fcbf66318878c05984fa3d893e3b7", + "data_object_type": "Centrifuge Taxonomic Classification", + "url": "https://data.microbiomedata.org/data/nmdc:omprc-11-0g415160/nmdc:wfrbt-11-06ttdb72.1/nmdc_wfrbt-11-06ttdb72.1_centrifuge_classification.tsv", + "type": "nmdc:DataObject" + }, + { + "id": "nmdc:dobj-11-p1rmaz28", + "name": "nmdc_wfrbt-11-06ttdb72.1_centrifuge_report.tsv", + "description": "Centrifuge TSV report for nmdc:omprc-11-0g415160", + "file_size_bytes": 252735, + "md5_checksum": "28beb8baabdaf346f2066b40f375a152", + "data_object_type": "Centrifuge Classification Report", + "url": "https://data.microbiomedata.org/data/nmdc:omprc-11-0g415160/nmdc:wfrbt-11-06ttdb72.1/nmdc_wfrbt-11-06ttdb72.1_centrifuge_report.tsv", + "type": "nmdc:DataObject" + }, + { + "id": "nmdc:dobj-11-880gtq83", + "name": "nmdc_wfrbt-11-06ttdb72.1_centrifuge_krona.html", + "description": "Centrifuge Krona HTML report for nmdc:omprc-11-0g415160", + "file_size_bytes": 2329168, + "md5_checksum": "1f74a43724c4afed5563499d05601e22", + "data_object_type": "Centrifuge Krona Plot", + "url": "https://data.microbiomedata.org/data/nmdc:omprc-11-0g415160/nmdc:wfrbt-11-06ttdb72.1/nmdc_wfrbt-11-06ttdb72.1_centrifuge_krona.html", + "type": "nmdc:DataObject" + }, + { + "id": "nmdc:dobj-11-481szv06", + "name": "nmdc_wfrbt-11-06ttdb72.1_kraken2_classification.tsv", + "description": "Kraken classification TSV report for nmdc:omprc-11-0g415160", + "file_size_bytes": 1310443491, + "md5_checksum": "4825177c6d0a8b67db82e6070cfbc35f", + "data_object_type": "Kraken2 Taxonomic Classification", + "url": "https://data.microbiomedata.org/data/nmdc:omprc-11-0g415160/nmdc:wfrbt-11-06ttdb72.1/nmdc_wfrbt-11-06ttdb72.1_kraken2_classification.tsv", + "type": "nmdc:DataObject" + }, + { + "id": "nmdc:dobj-11-ag7p6r13", + "name": "nmdc_wfrbt-11-06ttdb72.1_kraken2_report.tsv", + "description": "Kraken2 TSV report for nmdc:omprc-11-0g415160", + "file_size_bytes": 621441, + "md5_checksum": "275268a6b5aca33c427d11877bcfa674", + "data_object_type": "Kraken2 Classification Report", + "url": "https://data.microbiomedata.org/data/nmdc:omprc-11-0g415160/nmdc:wfrbt-11-06ttdb72.1/nmdc_wfrbt-11-06ttdb72.1_kraken2_report.tsv", + "type": "nmdc:DataObject" + }, + { + "id": "nmdc:dobj-11-b254nc26", + "name": "nmdc_wfrbt-11-06ttdb72.1_kraken2_krona.html", + "description": "Kraken2 Krona HTML report for nmdc:omprc-11-0g415160", + "file_size_bytes": 3891844, + "md5_checksum": "89e810af4915f0e117eaa60550587453", + "data_object_type": "Kraken2 Krona Plot", + "url": "https://data.microbiomedata.org/data/nmdc:omprc-11-0g415160/nmdc:wfrbt-11-06ttdb72.1/nmdc_wfrbt-11-06ttdb72.1_kraken2_krona.html", + "type": "nmdc:DataObject" + } + ], + "metagenome_assembly_set": [ + { + "id": "nmdc:wfmgas-11-txcpae82.1", + "name": "Metagenome Assembly Activity for nmdc:omprc-11-0g415160", + "started_at_time": "2021-10-11T02:24:58Z", + "ended_at_time": "2021-10-11T03:40:06+00:00", + "was_informed_by": "nmdc:omprc-11-0g415160", + "execution_resource": "NERSC-Cori", + "git_url": "https://github.com/microbiomedata/metaAssembly", + "has_input": [ + "nmdc:dobj-11-g8cwm256" + ], + "has_output": [ + "nmdc:dobj-11-08r4vg96", + "nmdc:dobj-11-4zb2ap69", + "nmdc:dobj-11-r186zy13", + "nmdc:dobj-11-9nd0m981", + "nmdc:dobj-11-p5adkn82" + ], + "type": "nmdc:MetagenomeAssembly", + "part_of": [ + "nmdc:omprc-11-0g415160" + ], + "version": "v1.0.2", + "asm_score": 4.48, + "scaffolds": 272628, + "scaf_logsum": 380592, + "scaf_powsum": 41655, + "scaf_max": 30530, + "scaf_bp": 141977427, + "scaf_n50": 72571, + "scaf_n90": 223970, + "scaf_l50": 527, + "scaf_l90": 298, + "contigs": 272879, + "contig_bp": 141974737, + "ctg_n50": 72824, + "ctg_l50": 526, + "ctg_n90": 224178, + "ctg_l90": 298, + "ctg_logsum": 378958, + "ctg_powsum": 41464, + "ctg_max": 30530, + "gap_pct": 0.00189, + "gc_std": 0.08353, + "gc_avg": 0.63381 + } + ], + "omics_processing_set": [ + { + "id": "nmdc:omprc-11-0g415160", + "name": "Riverbed sediment microbial communities from areas with vegetation nearby in Columbia River, Washington, USA - GW-RW S1_30_40", + "description": "Riverbed sediment samples were collected from an area with a lot of surrounding vegetation in a hyporheic zone (subsurface groundwater - surface water mixing zone)", + "has_input": [ + "nmdc:bsm-11-nbgp1x53" + ], + "add_date": "2016-01-11", + "gold_sequencing_project_identifiers": [ + "gold:Gp0127633" + ], + "has_output": [ + "nmdc:dobj-11-br8p6y08" + ], + "mod_date": "2021-06-15", + "ncbi_project_name": "Riverbed sediment microbial communities from areas with vegetation nearby in Columbia River, Washington, USA - GW-RW S1_30_40", + "omics_type": { + "has_raw_value": "Metagenome" + }, + "part_of": [ + "nmdc:sty-11-aygzgv51" + ], + "principal_investigator": { + "has_raw_value": "James Stegen" + }, + "processing_institution": "JGI", + "type": "nmdc:OmicsProcessing" + } + ], + "read_qc_analysis_activity_set": [ + { + "id": "nmdc:wfrqc-11-mkgjde26.1", + "name": "Read QC Activity for nmdc:omprc-11-0g415160", + "started_at_time": "2021-10-11T02:24:58Z", + "ended_at_time": "2021-10-11T03:40:06+00:00", + "was_informed_by": "nmdc:omprc-11-0g415160", + "execution_resource": "NERSC-Cori", + "git_url": "https://github.com/microbiomedata/ReadsQC", + "has_input": [ + "nmdc:dobj-11-br8p6y08" + ], + "has_output": [ + "nmdc:dobj-11-g8cwm256", + "nmdc:dobj-11-z7zj5m25" + ], + "type": "nmdc:ReadQcAnalysisActivity", + "part_of": [ + "nmdc:omprc-11-0g415160" + ], + "version": "v1.0.2", + "input_read_count": 23291434, + "output_read_count": 22556158, + "input_read_bases": 3517006534, + "output_read_bases": 3367024367 + } + ], + "read_based_taxonomy_analysis_activity_set": [ + { + "id": "nmdc:wfrbt-11-06ttdb72.1", + "name": "Readbased Taxonomy Analysis Activity for nmdc:omprc-11-0g415160", + "started_at_time": "2021-10-11T02:24:58Z", + "ended_at_time": "2021-10-11T03:40:06+00:00", + "was_informed_by": "nmdc:omprc-11-0g415160", + "execution_resource": "NERSC-Cori", + "git_url": "https://github.com/microbiomedata/ReadbasedAnalysis", + "has_input": [ + "nmdc:dobj-11-g8cwm256" + ], + "has_output": [ + "nmdc:dobj-11-j50e0z83", + "nmdc:dobj-11-rn1byq37", + "nmdc:dobj-11-zndhcs22", + "nmdc:dobj-11-0jhp9b14", + "nmdc:dobj-11-p1rmaz28", + "nmdc:dobj-11-880gtq83", + "nmdc:dobj-11-481szv06", + "nmdc:dobj-11-ag7p6r13", + "nmdc:dobj-11-b254nc26" + ], + "type": "nmdc:ReadBasedTaxonomyAnalysisActivity", + "part_of": [ + "nmdc:omprc-11-0g415160" + ], + "version": "v1.0.2" + } + ] + }, + { + "data_object_set": [ + { + "id": "nmdc:dobj-11-pk311p17", + "name": "10533.3.165334.GCCTTGT-AACAAGG.fastq.gz", + "description": "Raw sequencer read data", + "file_size_bytes": 1875083220, + "data_object_type": "Metagenome Raw Reads", + "type": "nmdc:DataObject" + }, + { + "id": "nmdc:dobj-11-wb2pxr61", + "name": "nmdc_wfrqc-11-tg10x049.1_filtered.fastq.gz", + "description": "Filtered Reads for nmdc:omprc-11-z5qv0f24", + "file_size_bytes": 1752924191, + "md5_checksum": "ed95796b3fd964c6bedb141d70737ebf", + "data_object_type": "Filtered Sequencing Reads", + "url": "https://data.microbiomedata.org/data/nmdc:omprc-11-z5qv0f24/nmdc:wfrqc-11-tg10x049.1/nmdc_wfrqc-11-tg10x049.1_filtered.fastq.gz", + "type": "nmdc:DataObject" + }, + { + "id": "nmdc:dobj-11-91s3sx63", + "name": "nmdc_wfrqc-11-tg10x049.1_filterStats.txt", + "description": "Filtered Stats for nmdc:omprc-11-z5qv0f24", + "file_size_bytes": 281, + "md5_checksum": "308ae373809697291bbc7947a1e4ed2d", + "data_object_type": "QC Statistics", + "url": "https://data.microbiomedata.org/data/nmdc:omprc-11-z5qv0f24/nmdc:wfrqc-11-tg10x049.1/nmdc_wfrqc-11-tg10x049.1_filterStats.txt", + "type": "nmdc:DataObject" + }, + { + "id": "nmdc:dobj-11-p73qg436", + "name": "nmdc_wfmgas-11-gj6set57.1_contigs.fna", + "description": "Assembled contigs fasta for nmdc:omprc-11-z5qv0f24", + "file_size_bytes": 20160804, + "md5_checksum": "da26391820a6f2c0aee778f7fc8012c6", + "data_object_type": "Assembly Contigs", + "url": "https://data.microbiomedata.org/data/nmdc:omprc-11-z5qv0f24/nmdc:wfmgas-11-gj6set57.1/nmdc_wfmgas-11-gj6set57.1_contigs.fna", + "type": "nmdc:DataObject" + }, + { + "id": "nmdc:dobj-11-4x8xzz25", + "name": "nmdc_wfmgas-11-gj6set57.1_scaffolds.fna", + "description": "Assembled scaffold fasta for nmdc:omprc-11-z5qv0f24", + "file_size_bytes": 20007066, + "md5_checksum": "9f80b77bbce2c055ae8b5bcb773b4013", + "data_object_type": "Assembly Scaffolds", + "url": "https://data.microbiomedata.org/data/nmdc:omprc-11-z5qv0f24/nmdc:wfmgas-11-gj6set57.1/nmdc_wfmgas-11-gj6set57.1_scaffolds.fna", + "type": "nmdc:DataObject" + }, + { + "id": "nmdc:dobj-11-s7dm6791", + "name": "nmdc_wfmgas-11-gj6set57.1_covstats.txt", + "description": "Metagenome Contig Coverage Stats for nmdc:omprc-11-z5qv0f24", + "file_size_bytes": 4304973, + "md5_checksum": "a70dbfdc405d10f8e00ab7c2f723becc", + "data_object_type": "Assembly Coverage Stats", + "url": "https://data.microbiomedata.org/data/nmdc:omprc-11-z5qv0f24/nmdc:wfmgas-11-gj6set57.1/nmdc_wfmgas-11-gj6set57.1_covstats.txt", + "type": "nmdc:DataObject" + }, + { + "id": "nmdc:dobj-11-70tkdn65", + "name": "nmdc_wfmgas-11-gj6set57.1_assembly.agp", + "description": "Assembled AGP file for nmdc:omprc-11-z5qv0f24", + "file_size_bytes": 4330253, + "md5_checksum": "8e92d29e290bafda80cc58b9444e04ad", + "data_object_type": "Assembly AGP", + "url": "https://data.microbiomedata.org/data/nmdc:omprc-11-z5qv0f24/nmdc:wfmgas-11-gj6set57.1/nmdc_wfmgas-11-gj6set57.1_assembly.agp", + "type": "nmdc:DataObject" + }, + { + "id": "nmdc:dobj-11-meewxh44", + "name": "nmdc_wfmgas-11-gj6set57.1_pairedMapped_sorted.bam", + "description": "Metagenome Alignment BAM file for nmdc:omprc-11-z5qv0f24", + "file_size_bytes": 0, + "md5_checksum": "d41d8cd98f00b204e9800998ecf8427e", + "data_object_type": "Assembly Coverage BAM", + "url": "https://data.microbiomedata.org/data/nmdc:omprc-11-z5qv0f24/nmdc:wfmgas-11-gj6set57.1/nmdc_wfmgas-11-gj6set57.1_pairedMapped_sorted.bam", + "type": "nmdc:DataObject" + }, + { + "id": "nmdc:dobj-11-tr530382", + "name": "nmdc_wfrbt-11-3rxd2z23.1_gottcha2_report.tsv", + "description": "Gottcha2 TSV report for nmdc:omprc-11-z5qv0f24", + "file_size_bytes": 5530, + "md5_checksum": "a5ac6665e5d66242b1c885a911236982", + "data_object_type": "GOTTCHA2 Classification Report", + "url": "https://data.microbiomedata.org/data/nmdc:omprc-11-z5qv0f24/nmdc:wfrbt-11-3rxd2z23.1/nmdc_wfrbt-11-3rxd2z23.1_gottcha2_report.tsv", + "type": "nmdc:DataObject" + }, + { + "id": "nmdc:dobj-11-w7spt959", + "name": "nmdc_wfrbt-11-3rxd2z23.1_gottcha2_report_full.tsv", + "description": "Gottcha2 full TSV report for nmdc:omprc-11-z5qv0f24", + "file_size_bytes": 825047, + "md5_checksum": "d19478a191693d643157a89c69cc02d1", + "data_object_type": "GOTTCHA2 Report Full", + "url": "https://data.microbiomedata.org/data/nmdc:omprc-11-z5qv0f24/nmdc:wfrbt-11-3rxd2z23.1/nmdc_wfrbt-11-3rxd2z23.1_gottcha2_report_full.tsv", + "type": "nmdc:DataObject" + }, + { + "id": "nmdc:dobj-11-b85vz991", + "name": "nmdc_wfrbt-11-3rxd2z23.1_gottcha2_krona.html", + "description": "Gottcha2 Krona HTML report for nmdc:omprc-11-z5qv0f24", + "file_size_bytes": 241114, + "md5_checksum": "679a82699663e88a5e8828ee081fa967", + "data_object_type": "GOTTCHA2 Krona Plot", + "url": "https://data.microbiomedata.org/data/nmdc:omprc-11-z5qv0f24/nmdc:wfrbt-11-3rxd2z23.1/nmdc_wfrbt-11-3rxd2z23.1_gottcha2_krona.html", + "type": "nmdc:DataObject" + }, + { + "id": "nmdc:dobj-11-2ctbz614", + "name": "nmdc_wfrbt-11-3rxd2z23.1_centrifuge_classification.tsv", + "description": "Centrifuge classification TSV report for nmdc:omprc-11-z5qv0f24", + "file_size_bytes": 1463660267, + "md5_checksum": "95b3150e6fb62195c1e5ebf06f87c7d5", + "data_object_type": "Centrifuge Taxonomic Classification", + "url": "https://data.microbiomedata.org/data/nmdc:omprc-11-z5qv0f24/nmdc:wfrbt-11-3rxd2z23.1/nmdc_wfrbt-11-3rxd2z23.1_centrifuge_classification.tsv", + "type": "nmdc:DataObject" + }, + { + "id": "nmdc:dobj-11-9tj4mh74", + "name": "nmdc_wfrbt-11-3rxd2z23.1_centrifuge_report.tsv", + "description": "Centrifuge TSV report for nmdc:omprc-11-z5qv0f24", + "file_size_bytes": 254347, + "md5_checksum": "0380e478962be82e0d97a6339f7f3b91", + "data_object_type": "Centrifuge Classification Report", + "url": "https://data.microbiomedata.org/data/nmdc:omprc-11-z5qv0f24/nmdc:wfrbt-11-3rxd2z23.1/nmdc_wfrbt-11-3rxd2z23.1_centrifuge_report.tsv", + "type": "nmdc:DataObject" + }, + { + "id": "nmdc:dobj-11-fn633330", + "name": "nmdc_wfrbt-11-3rxd2z23.1_centrifuge_krona.html", + "description": "Centrifuge Krona HTML report for nmdc:omprc-11-z5qv0f24", + "file_size_bytes": 2330603, + "md5_checksum": "0c1d139abdfa9fa10f26923abb4d6bda", + "data_object_type": "Centrifuge Krona Plot", + "url": "https://data.microbiomedata.org/data/nmdc:omprc-11-z5qv0f24/nmdc:wfrbt-11-3rxd2z23.1/nmdc_wfrbt-11-3rxd2z23.1_centrifuge_krona.html", + "type": "nmdc:DataObject" + }, + { + "id": "nmdc:dobj-11-bc3zme13", + "name": "nmdc_wfrbt-11-3rxd2z23.1_kraken2_classification.tsv", + "description": "Kraken classification TSV report for nmdc:omprc-11-z5qv0f24", + "file_size_bytes": 1177609473, + "md5_checksum": "f388f7f0d79d0b2bbec1c3c0c5641814", + "data_object_type": "Kraken2 Taxonomic Classification", + "url": "https://data.microbiomedata.org/data/nmdc:omprc-11-z5qv0f24/nmdc:wfrbt-11-3rxd2z23.1/nmdc_wfrbt-11-3rxd2z23.1_kraken2_classification.tsv", + "type": "nmdc:DataObject" + }, + { + "id": "nmdc:dobj-11-x7c19x69", + "name": "nmdc_wfrbt-11-3rxd2z23.1_kraken2_report.tsv", + "description": "Kraken2 TSV report for nmdc:omprc-11-z5qv0f24", + "file_size_bytes": 643281, + "md5_checksum": "a2a0029691c04851f4a98003a773fe3f", + "data_object_type": "Kraken2 Classification Report", + "url": "https://data.microbiomedata.org/data/nmdc:omprc-11-z5qv0f24/nmdc:wfrbt-11-3rxd2z23.1/nmdc_wfrbt-11-3rxd2z23.1_kraken2_report.tsv", + "type": "nmdc:DataObject" + }, + { + "id": "nmdc:dobj-11-r0c9z724", + "name": "nmdc_wfrbt-11-3rxd2z23.1_kraken2_krona.html", + "description": "Kraken2 Krona HTML report for nmdc:omprc-11-z5qv0f24", + "file_size_bytes": 3926756, + "md5_checksum": "bab24ab64ad432d115f182df7198d46e", + "data_object_type": "Kraken2 Krona Plot", + "url": "https://data.microbiomedata.org/data/nmdc:omprc-11-z5qv0f24/nmdc:wfrbt-11-3rxd2z23.1/nmdc_wfrbt-11-3rxd2z23.1_kraken2_krona.html", + "type": "nmdc:DataObject" + } + ], + "metagenome_assembly_set": [ + { + "id": "nmdc:wfmgas-11-gj6set57.1", + "name": "Metagenome Assembly Activity for nmdc:omprc-11-z5qv0f24", + "started_at_time": "2021-11-13T18:47:34Z", + "ended_at_time": "2021-11-13T19:08:49+00:00", + "was_informed_by": "nmdc:omprc-11-z5qv0f24", + "execution_resource": "NERSC-Cori", + "git_url": "https://github.com/microbiomedata/metaAssembly", + "has_input": [ + "nmdc:dobj-11-wb2pxr61" + ], + "has_output": [ + "nmdc:dobj-11-p73qg436", + "nmdc:dobj-11-4x8xzz25", + "nmdc:dobj-11-s7dm6791", + "nmdc:dobj-11-70tkdn65", + "nmdc:dobj-11-meewxh44" + ], + "type": "nmdc:MetagenomeAssembly", + "part_of": [ + "nmdc:omprc-11-z5qv0f24" + ], + "version": "v1.0.2", + "asm_score": 4.807, + "scaffolds": 51180, + "scaf_logsum": 6368.36, + "scaf_powsum": 683.717, + "scaf_max": 15604, + "scaf_bp": 18008251, + "scaf_n50": 20413, + "scaf_n90": 44748, + "scaf_l50": 321, + "scaf_l90": 282, + "contigs": 51188, + "contig_bp": 18008171, + "ctg_n50": 20415, + "ctg_l50": 321, + "ctg_n90": 44756, + "ctg_l90": 282, + "ctg_logsum": 6346.305, + "ctg_powsum": 681.483, + "ctg_max": 15604, + "gap_pct": 0.00044, + "gc_std": 0.11462, + "gc_avg": 0.57328 + } + ], + "omics_processing_set": [ + { + "id": "nmdc:omprc-11-z5qv0f24", + "name": "Riverbed sediment microbial communities from areas with vegetation nearby in Columbia River, Washington, USA - GW-RW S1_40_50", + "description": "Riverbed sediment samples were collected from an area with a lot of surrounding vegetation in a hyporheic zone (subsurface groundwater - surface water mixing zone)", + "has_input": [ + "nmdc:bsm-11-v0q5ak63" + ], + "add_date": "2016-01-11", + "gold_sequencing_project_identifiers": [ + "gold:Gp0127627" + ], + "has_output": [ + "nmdc:dobj-11-pk311p17" + ], + "mod_date": "2021-06-15", + "ncbi_project_name": "Riverbed sediment microbial communities from areas with vegetation nearby in Columbia River, Washington, USA - GW-RW S1_40_50", + "omics_type": { + "has_raw_value": "Metagenome" + }, + "part_of": [ + "nmdc:sty-11-aygzgv51" + ], + "principal_investigator": { + "has_raw_value": "James Stegen" + }, + "processing_institution": "JGI", + "type": "nmdc:OmicsProcessing" + } + ], + "read_qc_analysis_activity_set": [ + { + "id": "nmdc:wfrqc-11-tg10x049.1", + "name": "Read QC Activity for nmdc:omprc-11-z5qv0f24", + "started_at_time": "2021-11-13T18:47:34Z", + "ended_at_time": "2021-11-13T19:08:49+00:00", + "was_informed_by": "nmdc:omprc-11-z5qv0f24", + "execution_resource": "NERSC-Cori", + "git_url": "https://github.com/microbiomedata/ReadsQC", + "has_input": [ + "nmdc:dobj-11-pk311p17" + ], + "has_output": [ + "nmdc:dobj-11-wb2pxr61", + "nmdc:dobj-11-91s3sx63" + ], + "type": "nmdc:ReadQcAnalysisActivity", + "part_of": [ + "nmdc:omprc-11-z5qv0f24" + ], + "version": "v1.0.2", + "input_read_count": 20505370, + "output_read_count": 19995028, + "input_read_bases": 3096310870, + "output_read_bases": 2992084693 + } + ], + "read_based_taxonomy_analysis_activity_set": [ + { + "id": "nmdc:wfrbt-11-3rxd2z23.1", + "name": "Readbased Taxonomy Analysis Activity for nmdc:omprc-11-z5qv0f24", + "started_at_time": "2021-11-13T18:47:34Z", + "ended_at_time": "2021-11-13T19:08:49+00:00", + "was_informed_by": "nmdc:omprc-11-z5qv0f24", + "execution_resource": "NERSC-Cori", + "git_url": "https://github.com/microbiomedata/ReadbasedAnalysis", + "has_input": [ + "nmdc:dobj-11-wb2pxr61" + ], + "has_output": [ + "nmdc:dobj-11-tr530382", + "nmdc:dobj-11-w7spt959", + "nmdc:dobj-11-b85vz991", + "nmdc:dobj-11-2ctbz614", + "nmdc:dobj-11-9tj4mh74", + "nmdc:dobj-11-fn633330", + "nmdc:dobj-11-bc3zme13", + "nmdc:dobj-11-x7c19x69", + "nmdc:dobj-11-r0c9z724" + ], + "type": "nmdc:ReadBasedTaxonomyAnalysisActivity", + "part_of": [ + "nmdc:omprc-11-z5qv0f24" + ], + "version": "v1.0.2" + } + ] + }, + { + "data_object_set": [ + { + "id": "nmdc:dobj-11-rbvvgr79", + "name": "10533.2.165322.AGCTAAC-GGTTAGC.fastq.gz", + "description": "Raw sequencer read data", + "file_size_bytes": 2456584646, + "data_object_type": "Metagenome Raw Reads", + "type": "nmdc:DataObject" + }, + { + "id": "nmdc:dobj-11-qz56y752", + "name": "nmdc_wfrqc-11-5t5brd73.1_filtered.fastq.gz", + "description": "Filtered Reads for nmdc:omprc-11-8qms8262", + "file_size_bytes": 2199178772, + "md5_checksum": "a43bfb55389206c2fc5ddb53e6aa2bc6", + "data_object_type": "Filtered Sequencing Reads", + "url": "https://data.microbiomedata.org/data/nmdc:omprc-11-8qms8262/nmdc:wfrqc-11-5t5brd73.1/nmdc_wfrqc-11-5t5brd73.1_filtered.fastq.gz", + "type": "nmdc:DataObject" + }, + { + "id": "nmdc:dobj-11-aw4sed63", + "name": "nmdc_wfrqc-11-5t5brd73.1_filterStats.txt", + "description": "Filtered Stats for nmdc:omprc-11-8qms8262", + "file_size_bytes": 289, + "md5_checksum": "919c5aade4fffb76f743a33b035b2839", + "data_object_type": "QC Statistics", + "url": "https://data.microbiomedata.org/data/nmdc:omprc-11-8qms8262/nmdc:wfrqc-11-5t5brd73.1/nmdc_wfrqc-11-5t5brd73.1_filterStats.txt", + "type": "nmdc:DataObject" + }, + { + "id": "nmdc:dobj-11-9w3eah23", + "name": "nmdc_wfmgas-11-4ke0xh29.1_contigs.fna", + "description": "Assembled contigs fasta for nmdc:omprc-11-8qms8262", + "file_size_bytes": 60725364, + "md5_checksum": "36a7dd856df0fa9743fbf4db8715bd33", + "data_object_type": "Assembly Contigs", + "url": "https://data.microbiomedata.org/data/nmdc:omprc-11-8qms8262/nmdc:wfmgas-11-4ke0xh29.1/nmdc_wfmgas-11-4ke0xh29.1_contigs.fna", + "type": "nmdc:DataObject" + }, + { + "id": "nmdc:dobj-11-hdr4hb16", + "name": "nmdc_wfmgas-11-4ke0xh29.1_scaffolds.fna", + "description": "Assembled scaffold fasta for nmdc:omprc-11-8qms8262", + "file_size_bytes": 60326981, + "md5_checksum": "4d52f141a505777b54fbf51ce907672c", + "data_object_type": "Assembly Scaffolds", + "url": "https://data.microbiomedata.org/data/nmdc:omprc-11-8qms8262/nmdc:wfmgas-11-4ke0xh29.1/nmdc_wfmgas-11-4ke0xh29.1_scaffolds.fna", + "type": "nmdc:DataObject" + }, + { + "id": "nmdc:dobj-11-q9djrf70", + "name": "nmdc_wfmgas-11-4ke0xh29.1_covstats.txt", + "description": "Metagenome Contig Coverage Stats for nmdc:omprc-11-8qms8262", + "file_size_bytes": 11223670, + "md5_checksum": "d4f70eef69c8c5309e51b3fe584cee0e", + "data_object_type": "Assembly Coverage Stats", + "url": "https://data.microbiomedata.org/data/nmdc:omprc-11-8qms8262/nmdc:wfmgas-11-4ke0xh29.1/nmdc_wfmgas-11-4ke0xh29.1_covstats.txt", + "type": "nmdc:DataObject" + }, + { + "id": "nmdc:dobj-11-1srqtf18", + "name": "nmdc_wfmgas-11-4ke0xh29.1_assembly.agp", + "description": "Assembled AGP file for nmdc:omprc-11-8qms8262", + "file_size_bytes": 11316447, + "md5_checksum": "69b869115d6848b1652b407266614311", + "data_object_type": "Assembly AGP", + "url": "https://data.microbiomedata.org/data/nmdc:omprc-11-8qms8262/nmdc:wfmgas-11-4ke0xh29.1/nmdc_wfmgas-11-4ke0xh29.1_assembly.agp", + "type": "nmdc:DataObject" + }, + { + "id": "nmdc:dobj-11-4wyf9p12", + "name": "nmdc_wfmgas-11-4ke0xh29.1_pairedMapped_sorted.bam", + "description": "Metagenome Alignment BAM file for nmdc:omprc-11-8qms8262", + "file_size_bytes": 0, + "md5_checksum": "d41d8cd98f00b204e9800998ecf8427e", + "data_object_type": "Assembly Coverage BAM", + "url": "https://data.microbiomedata.org/data/nmdc:omprc-11-8qms8262/nmdc:wfmgas-11-4ke0xh29.1/nmdc_wfmgas-11-4ke0xh29.1_pairedMapped_sorted.bam", + "type": "nmdc:DataObject" + }, + { + "id": "nmdc:dobj-11-vz34zd81", + "name": "nmdc_wfrbt-11-ndbdyx70.1_gottcha2_report.tsv", + "description": "Gottcha2 TSV report for nmdc:omprc-11-8qms8262", + "file_size_bytes": 2899, + "md5_checksum": "3e583cccbbc068e0879ba6618bb6407c", + "data_object_type": "GOTTCHA2 Classification Report", + "url": "https://data.microbiomedata.org/data/nmdc:omprc-11-8qms8262/nmdc:wfrbt-11-ndbdyx70.1/nmdc_wfrbt-11-ndbdyx70.1_gottcha2_report.tsv", + "type": "nmdc:DataObject" + }, + { + "id": "nmdc:dobj-11-zzqnfy13", + "name": "nmdc_wfrbt-11-ndbdyx70.1_gottcha2_report_full.tsv", + "description": "Gottcha2 full TSV report for nmdc:omprc-11-8qms8262", + "file_size_bytes": 769416, + "md5_checksum": "6c54105711e818c4d8169ab595b05efe", + "data_object_type": "GOTTCHA2 Report Full", + "url": "https://data.microbiomedata.org/data/nmdc:omprc-11-8qms8262/nmdc:wfrbt-11-ndbdyx70.1/nmdc_wfrbt-11-ndbdyx70.1_gottcha2_report_full.tsv", + "type": "nmdc:DataObject" + }, + { + "id": "nmdc:dobj-11-273spx08", + "name": "nmdc_wfrbt-11-ndbdyx70.1_gottcha2_krona.html", + "description": "Gottcha2 Krona HTML report for nmdc:omprc-11-8qms8262", + "file_size_bytes": 235384, + "md5_checksum": "adb155cdb656648496484998a62fb96f", + "data_object_type": "GOTTCHA2 Krona Plot", + "url": "https://data.microbiomedata.org/data/nmdc:omprc-11-8qms8262/nmdc:wfrbt-11-ndbdyx70.1/nmdc_wfrbt-11-ndbdyx70.1_gottcha2_krona.html", + "type": "nmdc:DataObject" + }, + { + "id": "nmdc:dobj-11-fjhqf951", + "name": "nmdc_wfrbt-11-ndbdyx70.1_centrifuge_classification.tsv", + "description": "Centrifuge classification TSV report for nmdc:omprc-11-8qms8262", + "file_size_bytes": 1917130445, + "md5_checksum": "0a03ac5737750a3b336e7299e9f01ead", + "data_object_type": "Centrifuge Taxonomic Classification", + "url": "https://data.microbiomedata.org/data/nmdc:omprc-11-8qms8262/nmdc:wfrbt-11-ndbdyx70.1/nmdc_wfrbt-11-ndbdyx70.1_centrifuge_classification.tsv", + "type": "nmdc:DataObject" + }, + { + "id": "nmdc:dobj-11-j3mpcx18", + "name": "nmdc_wfrbt-11-ndbdyx70.1_centrifuge_report.tsv", + "description": "Centrifuge TSV report for nmdc:omprc-11-8qms8262", + "file_size_bytes": 255290, + "md5_checksum": "f345b3a57c37097a860e38d5e83835b8", + "data_object_type": "Centrifuge Classification Report", + "url": "https://data.microbiomedata.org/data/nmdc:omprc-11-8qms8262/nmdc:wfrbt-11-ndbdyx70.1/nmdc_wfrbt-11-ndbdyx70.1_centrifuge_report.tsv", + "type": "nmdc:DataObject" + }, + { + "id": "nmdc:dobj-11-byx8j688", + "name": "nmdc_wfrbt-11-ndbdyx70.1_centrifuge_krona.html", + "description": "Centrifuge Krona HTML report for nmdc:omprc-11-8qms8262", + "file_size_bytes": 2333225, + "md5_checksum": "c1f4471d943b284720a8becb5a2e32b4", + "data_object_type": "Centrifuge Krona Plot", + "url": "https://data.microbiomedata.org/data/nmdc:omprc-11-8qms8262/nmdc:wfrbt-11-ndbdyx70.1/nmdc_wfrbt-11-ndbdyx70.1_centrifuge_krona.html", + "type": "nmdc:DataObject" + }, + { + "id": "nmdc:dobj-11-29xww886", + "name": "nmdc_wfrbt-11-ndbdyx70.1_kraken2_classification.tsv", + "description": "Kraken classification TSV report for nmdc:omprc-11-8qms8262", + "file_size_bytes": 1537863470, + "md5_checksum": "50cfcfc5d0d89245b8370abf6bfef23c", + "data_object_type": "Kraken2 Taxonomic Classification", + "url": "https://data.microbiomedata.org/data/nmdc:omprc-11-8qms8262/nmdc:wfrbt-11-ndbdyx70.1/nmdc_wfrbt-11-ndbdyx70.1_kraken2_classification.tsv", + "type": "nmdc:DataObject" + }, + { + "id": "nmdc:dobj-11-686w9831", + "name": "nmdc_wfrbt-11-ndbdyx70.1_kraken2_report.tsv", + "description": "Kraken2 TSV report for nmdc:omprc-11-8qms8262", + "file_size_bytes": 648597, + "md5_checksum": "a8dd7aa20043510158ad3b2bbe961b42", + "data_object_type": "Kraken2 Classification Report", + "url": "https://data.microbiomedata.org/data/nmdc:omprc-11-8qms8262/nmdc:wfrbt-11-ndbdyx70.1/nmdc_wfrbt-11-ndbdyx70.1_kraken2_report.tsv", + "type": "nmdc:DataObject" + }, + { + "id": "nmdc:dobj-11-pmpsa510", + "name": "nmdc_wfrbt-11-ndbdyx70.1_kraken2_krona.html", + "description": "Kraken2 Krona HTML report for nmdc:omprc-11-8qms8262", + "file_size_bytes": 3959152, + "md5_checksum": "e350fda9bd0651755171d79b413b8da3", + "data_object_type": "Kraken2 Krona Plot", + "url": "https://data.microbiomedata.org/data/nmdc:omprc-11-8qms8262/nmdc:wfrbt-11-ndbdyx70.1/nmdc_wfrbt-11-ndbdyx70.1_kraken2_krona.html", + "type": "nmdc:DataObject" + } + ], + "metagenome_assembly_set": [ + { + "id": "nmdc:wfmgas-11-4ke0xh29.1", + "name": "Metagenome Assembly Activity for nmdc:omprc-11-8qms8262", + "started_at_time": "2021-10-11T02:23:42Z", + "ended_at_time": "2021-10-11T04:08:32+00:00", + "was_informed_by": "nmdc:omprc-11-8qms8262", + "execution_resource": "NERSC-Cori", + "git_url": "https://github.com/microbiomedata/metaAssembly", + "has_input": [ + "nmdc:dobj-11-qz56y752" + ], + "has_output": [ + "nmdc:dobj-11-9w3eah23", + "nmdc:dobj-11-hdr4hb16", + "nmdc:dobj-11-q9djrf70", + "nmdc:dobj-11-1srqtf18", + "nmdc:dobj-11-4wyf9p12" + ], + "type": "nmdc:MetagenomeAssembly", + "part_of": [ + "nmdc:omprc-11-8qms8262" + ], + "version": "v1.0.2", + "asm_score": 5.986, + "scaffolds": 132455, + "scaf_logsum": 81839, + "scaf_powsum": 9304.689, + "scaf_max": 23706, + "scaf_bp": 54960268, + "scaf_n50": 43524, + "scaf_n90": 113522, + "scaf_l50": 372, + "scaf_l90": 285, + "contigs": 132499, + "contig_bp": 54959738, + "ctg_n50": 43541, + "ctg_l50": 372, + "ctg_n90": 113564, + "ctg_l90": 285, + "ctg_logsum": 81568, + "ctg_powsum": 9274.272, + "ctg_max": 23706, + "gap_pct": 0.00096, + "gc_std": 0.09103, + "gc_avg": 0.61354 + } + ], + "omics_processing_set": [ + { + "id": "nmdc:omprc-11-8qms8262", + "name": "Riverbed sediment microbial communities from areas with vegetation nearby in Columbia River, Washington, USA - GW-RW S3_40_50", + "description": "Riverbed sediment samples were collected from an area with a lot of surrounding vegetation in a hyporheic zone (subsurface groundwater - surface water mixing zone)", + "has_input": [ + "nmdc:bsm-11-0xprxw22" + ], + "add_date": "2016-01-11", + "gold_sequencing_project_identifiers": [ + "gold:Gp0127632" + ], + "has_output": [ + "nmdc:dobj-11-rbvvgr79" + ], + "mod_date": "2021-06-15", + "ncbi_project_name": "Riverbed sediment microbial communities from areas with vegetation nearby in Columbia River, Washington, USA - GW-RW S3_40_50", + "omics_type": { + "has_raw_value": "Metagenome" + }, + "part_of": [ + "nmdc:sty-11-aygzgv51" + ], + "principal_investigator": { + "has_raw_value": "James Stegen" + }, + "processing_institution": "JGI", + "type": "nmdc:OmicsProcessing" + } + ], + "read_qc_analysis_activity_set": [ + { + "id": "nmdc:wfrqc-11-5t5brd73.1", + "name": "Read QC Activity for nmdc:omprc-11-8qms8262", + "started_at_time": "2021-10-11T02:23:42Z", + "ended_at_time": "2021-10-11T04:08:32+00:00", + "was_informed_by": "nmdc:omprc-11-8qms8262", + "execution_resource": "NERSC-Cori", + "git_url": "https://github.com/microbiomedata/ReadsQC", + "has_input": [ + "nmdc:dobj-11-rbvvgr79" + ], + "has_output": [ + "nmdc:dobj-11-qz56y752", + "nmdc:dobj-11-aw4sed63" + ], + "type": "nmdc:ReadQcAnalysisActivity", + "part_of": [ + "nmdc:omprc-11-8qms8262" + ], + "version": "v1.0.2", + "input_read_count": 27906294, + "output_read_count": 26116440, + "input_read_bases": 4213850394, + "output_read_bases": 3905482172 + } + ], + "read_based_taxonomy_analysis_activity_set": [ + { + "id": "nmdc:wfrbt-11-ndbdyx70.1", + "name": "Readbased Taxonomy Analysis Activity for nmdc:omprc-11-8qms8262", + "started_at_time": "2021-10-11T02:23:42Z", + "ended_at_time": "2021-10-11T04:08:32+00:00", + "was_informed_by": "nmdc:omprc-11-8qms8262", + "execution_resource": "NERSC-Cori", + "git_url": "https://github.com/microbiomedata/ReadbasedAnalysis", + "has_input": [ + "nmdc:dobj-11-qz56y752" + ], + "has_output": [ + "nmdc:dobj-11-vz34zd81", + "nmdc:dobj-11-zzqnfy13", + "nmdc:dobj-11-273spx08", + "nmdc:dobj-11-fjhqf951", + "nmdc:dobj-11-j3mpcx18", + "nmdc:dobj-11-byx8j688", + "nmdc:dobj-11-29xww886", + "nmdc:dobj-11-686w9831", + "nmdc:dobj-11-pmpsa510" + ], + "type": "nmdc:ReadBasedTaxonomyAnalysisActivity", + "part_of": [ + "nmdc:omprc-11-8qms8262" + ], + "version": "v1.0.2" + } + ] + }, + { + "data_object_set": [ + { + "id": "nmdc:dobj-11-ezyfxj50", + "name": "10533.3.165334.ACAGCAA-GTTGCTG.fastq.gz", + "description": "Raw sequencer read data", + "file_size_bytes": 2759159406, + "data_object_type": "Metagenome Raw Reads", + "type": "nmdc:DataObject" + }, + { + "id": "nmdc:dobj-11-r3ng6j40", + "name": "nmdc_wfrqc-11-8xx98t50.1_filtered.fastq.gz", + "description": "Filtered Reads for nmdc:omprc-11-k675bw84", + "file_size_bytes": 2463342132, + "md5_checksum": "e4f5675c728fd1896682eb669656b5d6", + "data_object_type": "Filtered Sequencing Reads", + "url": "https://data.microbiomedata.org/data/nmdc:omprc-11-k675bw84/nmdc:wfrqc-11-8xx98t50.1/nmdc_wfrqc-11-8xx98t50.1_filtered.fastq.gz", + "type": "nmdc:DataObject" + }, + { + "id": "nmdc:dobj-11-x2j74y85", + "name": "nmdc_wfrqc-11-8xx98t50.1_filterStats.txt", + "description": "Filtered Stats for nmdc:omprc-11-k675bw84", + "file_size_bytes": 293, + "md5_checksum": "64f455185b1bc610a8d74a84ed12683f", + "data_object_type": "QC Statistics", + "url": "https://data.microbiomedata.org/data/nmdc:omprc-11-k675bw84/nmdc:wfrqc-11-8xx98t50.1/nmdc_wfrqc-11-8xx98t50.1_filterStats.txt", + "type": "nmdc:DataObject" + }, + { + "id": "nmdc:dobj-11-hd5hcj59", + "name": "nmdc_wfmgas-11-jf1sj032.1_contigs.fna", + "description": "Assembled contigs fasta for nmdc:omprc-11-k675bw84", + "file_size_bytes": 39635644, + "md5_checksum": "746ebfcb92d3aed15965e58d7d232d7b", + "data_object_type": "Assembly Contigs", + "url": "https://data.microbiomedata.org/data/nmdc:omprc-11-k675bw84/nmdc:wfmgas-11-jf1sj032.1/nmdc_wfmgas-11-jf1sj032.1_contigs.fna", + "type": "nmdc:DataObject" + }, + { + "id": "nmdc:dobj-11-50gyjy37", + "name": "nmdc_wfmgas-11-jf1sj032.1_scaffolds.fna", + "description": "Assembled scaffold fasta for nmdc:omprc-11-k675bw84", + "file_size_bytes": 39348348, + "md5_checksum": "5b82e453220e2bdc832b6cbde32a5b58", + "data_object_type": "Assembly Scaffolds", + "url": "https://data.microbiomedata.org/data/nmdc:omprc-11-k675bw84/nmdc:wfmgas-11-jf1sj032.1/nmdc_wfmgas-11-jf1sj032.1_scaffolds.fna", + "type": "nmdc:DataObject" + }, + { + "id": "nmdc:dobj-11-5ghgfp55", + "name": "nmdc_wfmgas-11-jf1sj032.1_covstats.txt", + "description": "Metagenome Contig Coverage Stats for nmdc:omprc-11-k675bw84", + "file_size_bytes": 8069585, + "md5_checksum": "e7062a97cd30534dbe638633652f467e", + "data_object_type": "Assembly Coverage Stats", + "url": "https://data.microbiomedata.org/data/nmdc:omprc-11-k675bw84/nmdc:wfmgas-11-jf1sj032.1/nmdc_wfmgas-11-jf1sj032.1_covstats.txt", + "type": "nmdc:DataObject" + }, + { + "id": "nmdc:dobj-11-bkpphj28", + "name": "nmdc_wfmgas-11-jf1sj032.1_assembly.agp", + "description": "Assembled AGP file for nmdc:omprc-11-k675bw84", + "file_size_bytes": 8110063, + "md5_checksum": "c0dce658da7d9c42d950c857abebed23", + "data_object_type": "Assembly AGP", + "url": "https://data.microbiomedata.org/data/nmdc:omprc-11-k675bw84/nmdc:wfmgas-11-jf1sj032.1/nmdc_wfmgas-11-jf1sj032.1_assembly.agp", + "type": "nmdc:DataObject" + }, + { + "id": "nmdc:dobj-11-hanqjd26", + "name": "nmdc_wfmgas-11-jf1sj032.1_pairedMapped_sorted.bam", + "description": "Metagenome Alignment BAM file for nmdc:omprc-11-k675bw84", + "file_size_bytes": 0, + "md5_checksum": "d41d8cd98f00b204e9800998ecf8427e", + "data_object_type": "Assembly Coverage BAM", + "url": "https://data.microbiomedata.org/data/nmdc:omprc-11-k675bw84/nmdc:wfmgas-11-jf1sj032.1/nmdc_wfmgas-11-jf1sj032.1_pairedMapped_sorted.bam", + "type": "nmdc:DataObject" + }, + { + "id": "nmdc:dobj-11-4rkx0771", + "name": "nmdc_wfrbt-11-c6pp2n50.1_gottcha2_report.tsv", + "description": "Gottcha2 TSV report for nmdc:omprc-11-k675bw84", + "file_size_bytes": 5547, + "md5_checksum": "50d80a30d4ff113e36f6fd64b1f28547", + "data_object_type": "GOTTCHA2 Classification Report", + "url": "https://data.microbiomedata.org/data/nmdc:omprc-11-k675bw84/nmdc:wfrbt-11-c6pp2n50.1/nmdc_wfrbt-11-c6pp2n50.1_gottcha2_report.tsv", + "type": "nmdc:DataObject" + }, + { + "id": "nmdc:dobj-11-b4k8h033", + "name": "nmdc_wfrbt-11-c6pp2n50.1_gottcha2_report_full.tsv", + "description": "Gottcha2 full TSV report for nmdc:omprc-11-k675bw84", + "file_size_bytes": 965042, + "md5_checksum": "c2cd20a2011592a76397f49dc3acd6b7", + "data_object_type": "GOTTCHA2 Report Full", + "url": "https://data.microbiomedata.org/data/nmdc:omprc-11-k675bw84/nmdc:wfrbt-11-c6pp2n50.1/nmdc_wfrbt-11-c6pp2n50.1_gottcha2_report_full.tsv", + "type": "nmdc:DataObject" + }, + { + "id": "nmdc:dobj-11-k46v7w87", + "name": "nmdc_wfrbt-11-c6pp2n50.1_gottcha2_krona.html", + "description": "Gottcha2 Krona HTML report for nmdc:omprc-11-k675bw84", + "file_size_bytes": 242495, + "md5_checksum": "827ad863c875ea14473c9903d192fa73", + "data_object_type": "GOTTCHA2 Krona Plot", + "url": "https://data.microbiomedata.org/data/nmdc:omprc-11-k675bw84/nmdc:wfrbt-11-c6pp2n50.1/nmdc_wfrbt-11-c6pp2n50.1_gottcha2_krona.html", + "type": "nmdc:DataObject" + }, + { + "id": "nmdc:dobj-11-c0aajr06", + "name": "nmdc_wfrbt-11-c6pp2n50.1_centrifuge_classification.tsv", + "description": "Centrifuge classification TSV report for nmdc:omprc-11-k675bw84", + "file_size_bytes": 2151939041, + "md5_checksum": "957074ca49765b22348e27b0133d8ba0", + "data_object_type": "Centrifuge Taxonomic Classification", + "url": "https://data.microbiomedata.org/data/nmdc:omprc-11-k675bw84/nmdc:wfrbt-11-c6pp2n50.1/nmdc_wfrbt-11-c6pp2n50.1_centrifuge_classification.tsv", + "type": "nmdc:DataObject" + }, + { + "id": "nmdc:dobj-11-jvaz3g43", + "name": "nmdc_wfrbt-11-c6pp2n50.1_centrifuge_report.tsv", + "description": "Centrifuge TSV report for nmdc:omprc-11-k675bw84", + "file_size_bytes": 257932, + "md5_checksum": "9253645582296696cb33b11754832574", + "data_object_type": "Centrifuge Classification Report", + "url": "https://data.microbiomedata.org/data/nmdc:omprc-11-k675bw84/nmdc:wfrbt-11-c6pp2n50.1/nmdc_wfrbt-11-c6pp2n50.1_centrifuge_report.tsv", + "type": "nmdc:DataObject" + }, + { + "id": "nmdc:dobj-11-3sc9f229", + "name": "nmdc_wfrbt-11-c6pp2n50.1_centrifuge_krona.html", + "description": "Centrifuge Krona HTML report for nmdc:omprc-11-k675bw84", + "file_size_bytes": 2335219, + "md5_checksum": "9aef1d9e04acfe0b7fb1b9dc3b842912", + "data_object_type": "Centrifuge Krona Plot", + "url": "https://data.microbiomedata.org/data/nmdc:omprc-11-k675bw84/nmdc:wfrbt-11-c6pp2n50.1/nmdc_wfrbt-11-c6pp2n50.1_centrifuge_krona.html", + "type": "nmdc:DataObject" + }, + { + "id": "nmdc:dobj-11-5smq4a13", + "name": "nmdc_wfrbt-11-c6pp2n50.1_kraken2_classification.tsv", + "description": "Kraken classification TSV report for nmdc:omprc-11-k675bw84", + "file_size_bytes": 1746049273, + "md5_checksum": "75180fce38f38a6307231b47a8d2b23b", + "data_object_type": "Kraken2 Taxonomic Classification", + "url": "https://data.microbiomedata.org/data/nmdc:omprc-11-k675bw84/nmdc:wfrbt-11-c6pp2n50.1/nmdc_wfrbt-11-c6pp2n50.1_kraken2_classification.tsv", + "type": "nmdc:DataObject" + }, + { + "id": "nmdc:dobj-11-bt0b6w67", + "name": "nmdc_wfrbt-11-c6pp2n50.1_kraken2_report.tsv", + "description": "Kraken2 TSV report for nmdc:omprc-11-k675bw84", + "file_size_bytes": 660975, + "md5_checksum": "b4524a34937893768dbd3752068dee0c", + "data_object_type": "Kraken2 Classification Report", + "url": "https://data.microbiomedata.org/data/nmdc:omprc-11-k675bw84/nmdc:wfrbt-11-c6pp2n50.1/nmdc_wfrbt-11-c6pp2n50.1_kraken2_report.tsv", + "type": "nmdc:DataObject" + }, + { + "id": "nmdc:dobj-11-rc1ksr59", + "name": "nmdc_wfrbt-11-c6pp2n50.1_kraken2_krona.html", + "description": "Kraken2 Krona HTML report for nmdc:omprc-11-k675bw84", + "file_size_bytes": 4020978, + "md5_checksum": "f1543441c59aaaf8ec52036a5bbbe3f4", + "data_object_type": "Kraken2 Krona Plot", + "url": "https://data.microbiomedata.org/data/nmdc:omprc-11-k675bw84/nmdc:wfrbt-11-c6pp2n50.1/nmdc_wfrbt-11-c6pp2n50.1_kraken2_krona.html", + "type": "nmdc:DataObject" + } + ], + "metagenome_assembly_set": [ + { + "id": "nmdc:wfmgas-11-jf1sj032.1", + "name": "Metagenome Assembly Activity for nmdc:omprc-11-k675bw84", + "started_at_time": "2021-10-11T02:23:42Z", + "ended_at_time": "2021-11-13T18:49:37+00:00", + "was_informed_by": "nmdc:omprc-11-k675bw84", + "execution_resource": "NERSC-Cori", + "git_url": "https://github.com/microbiomedata/metaAssembly", + "has_input": [ + "nmdc:dobj-11-r3ng6j40" + ], + "has_output": [ + "nmdc:dobj-11-hd5hcj59", + "nmdc:dobj-11-50gyjy37", + "nmdc:dobj-11-5ghgfp55", + "nmdc:dobj-11-bkpphj28", + "nmdc:dobj-11-hanqjd26" + ], + "type": "nmdc:MetagenomeAssembly", + "part_of": [ + "nmdc:omprc-11-k675bw84" + ], + "version": "v1.0.2", + "asm_score": 3.618, + "scaffolds": 95584, + "scaf_logsum": 36615, + "scaf_powsum": 3993.143, + "scaf_max": 23067, + "scaf_bp": 35573308, + "scaf_n50": 35220, + "scaf_n90": 83355, + "scaf_l50": 329, + "scaf_l90": 282, + "contigs": 95606, + "contig_bp": 35573088, + "ctg_n50": 35238, + "ctg_l50": 329, + "ctg_n90": 83377, + "ctg_l90": 282, + "ctg_logsum": 36469, + "ctg_powsum": 3976.058, + "ctg_max": 23067, + "gap_pct": 0.00062, + "gc_std": 0.11099, + "gc_avg": 0.57474 + } + ], + "omics_processing_set": [ + { + "id": "nmdc:omprc-11-k675bw84", + "name": "Riverbed sediment microbial communities from areas with no vegetation in Columbia River, Washington, USA - GW-RW N2_30_40", + "description": "Riverbed sediment samples were collected from an area with no vegetation in a hyporheic zone (subsurface groundwater - surface water mixing zone)", + "has_input": [ + "nmdc:bsm-11-rtf54942" + ], + "add_date": "2016-01-11", + "gold_sequencing_project_identifiers": [ + "gold:Gp0127636" + ], + "has_output": [ + "nmdc:dobj-11-ezyfxj50" + ], + "mod_date": "2021-06-15", + "ncbi_project_name": "Riverbed sediment microbial communities from areas with no vegetation in Columbia River, Washington, USA - GW-RW N2_30_40", + "omics_type": { + "has_raw_value": "Metagenome" + }, + "part_of": [ + "nmdc:sty-11-aygzgv51" + ], + "principal_investigator": { + "has_raw_value": "James Stegen" + }, + "processing_institution": "JGI", + "type": "nmdc:OmicsProcessing" + } + ], + "read_qc_analysis_activity_set": [ + { + "id": "nmdc:wfrqc-11-8xx98t50.1", + "name": "Read QC Activity for nmdc:omprc-11-k675bw84", + "started_at_time": "2021-10-11T02:23:42Z", + "ended_at_time": "2021-11-13T18:49:37+00:00", + "was_informed_by": "nmdc:omprc-11-k675bw84", + "execution_resource": "NERSC-Cori", + "git_url": "https://github.com/microbiomedata/ReadsQC", + "has_input": [ + "nmdc:dobj-11-ezyfxj50" + ], + "has_output": [ + "nmdc:dobj-11-r3ng6j40", + "nmdc:dobj-11-x2j74y85" + ], + "type": "nmdc:ReadQcAnalysisActivity", + "part_of": [ + "nmdc:omprc-11-k675bw84" + ], + "version": "v1.0.2", + "input_read_count": 31642056, + "output_read_count": 29115818, + "input_read_bases": 4777950456, + "output_read_bases": 4354491393 + } + ], + "read_based_taxonomy_analysis_activity_set": [ + { + "id": "nmdc:wfrbt-11-c6pp2n50.1", + "name": "Readbased Taxonomy Analysis Activity for nmdc:omprc-11-k675bw84", + "started_at_time": "2021-10-11T02:23:42Z", + "ended_at_time": "2021-11-13T18:49:37+00:00", + "was_informed_by": "nmdc:omprc-11-k675bw84", + "execution_resource": "NERSC-Cori", + "git_url": "https://github.com/microbiomedata/ReadbasedAnalysis", + "has_input": [ + "nmdc:dobj-11-r3ng6j40" + ], + "has_output": [ + "nmdc:dobj-11-4rkx0771", + "nmdc:dobj-11-b4k8h033", + "nmdc:dobj-11-k46v7w87", + "nmdc:dobj-11-c0aajr06", + "nmdc:dobj-11-jvaz3g43", + "nmdc:dobj-11-3sc9f229", + "nmdc:dobj-11-5smq4a13", + "nmdc:dobj-11-bt0b6w67", + "nmdc:dobj-11-rc1ksr59" + ], + "type": "nmdc:ReadBasedTaxonomyAnalysisActivity", + "part_of": [ + "nmdc:omprc-11-k675bw84" + ], + "version": "v1.0.2" + } + ] + }, + { + "data_object_set": [ + { + "id": "nmdc:dobj-11-y27yqy58", + "name": "10533.3.165334.ACGGAAC-TGTTCCG.fastq.gz", + "description": "Raw sequencer read data", + "file_size_bytes": 2620687542, + "data_object_type": "Metagenome Raw Reads", + "type": "nmdc:DataObject" + }, + { + "id": "nmdc:dobj-11-r57zvb61", + "name": "nmdc_wfrqc-11-xgbfpn47.1_filtered.fastq.gz", + "description": "Filtered Reads for nmdc:omprc-11-mbv2jc69", + "file_size_bytes": 2316462404, + "md5_checksum": "ac889627d813c8e34cfbf79a4264c590", + "data_object_type": "Filtered Sequencing Reads", + "url": "https://data.microbiomedata.org/data/nmdc:omprc-11-mbv2jc69/nmdc:wfrqc-11-xgbfpn47.1/nmdc_wfrqc-11-xgbfpn47.1_filtered.fastq.gz", + "type": "nmdc:DataObject" + }, + { + "id": "nmdc:dobj-11-7ptqj932", + "name": "nmdc_wfrqc-11-xgbfpn47.1_filterStats.txt", + "description": "Filtered Stats for nmdc:omprc-11-mbv2jc69", + "file_size_bytes": 291, + "md5_checksum": "0dfd55be1779ae7922d80aa22034c9a1", + "data_object_type": "QC Statistics", + "url": "https://data.microbiomedata.org/data/nmdc:omprc-11-mbv2jc69/nmdc:wfrqc-11-xgbfpn47.1/nmdc_wfrqc-11-xgbfpn47.1_filterStats.txt", + "type": "nmdc:DataObject" + }, + { + "id": "nmdc:dobj-11-67mdw562", + "name": "nmdc_wfmgas-11-2cp0jv49.1_contigs.fna", + "description": "Assembled contigs fasta for nmdc:omprc-11-mbv2jc69", + "file_size_bytes": 85994819, + "md5_checksum": "ee4eabbed9a6ef1963b9abb79301dd9d", + "data_object_type": "Assembly Contigs", + "url": "https://data.microbiomedata.org/data/nmdc:omprc-11-mbv2jc69/nmdc:wfmgas-11-2cp0jv49.1/nmdc_wfmgas-11-2cp0jv49.1_contigs.fna", + "type": "nmdc:DataObject" + }, + { + "id": "nmdc:dobj-11-2vkjm955", + "name": "nmdc_wfmgas-11-2cp0jv49.1_scaffolds.fna", + "description": "Assembled scaffold fasta for nmdc:omprc-11-mbv2jc69", + "file_size_bytes": 85465948, + "md5_checksum": "38e31a9555e48a8c0308094482da28ce", + "data_object_type": "Assembly Scaffolds", + "url": "https://data.microbiomedata.org/data/nmdc:omprc-11-mbv2jc69/nmdc:wfmgas-11-2cp0jv49.1/nmdc_wfmgas-11-2cp0jv49.1_scaffolds.fna", + "type": "nmdc:DataObject" + }, + { + "id": "nmdc:dobj-11-vy8v0k86", + "name": "nmdc_wfmgas-11-2cp0jv49.1_covstats.txt", + "description": "Metagenome Contig Coverage Stats for nmdc:omprc-11-mbv2jc69", + "file_size_bytes": 14950441, + "md5_checksum": "decacf88b07927aa8037eb162c2f0fa3", + "data_object_type": "Assembly Coverage Stats", + "url": "https://data.microbiomedata.org/data/nmdc:omprc-11-mbv2jc69/nmdc:wfmgas-11-2cp0jv49.1/nmdc_wfmgas-11-2cp0jv49.1_covstats.txt", + "type": "nmdc:DataObject" + }, + { + "id": "nmdc:dobj-11-76gdsn13", + "name": "nmdc_wfmgas-11-2cp0jv49.1_assembly.agp", + "description": "Assembled AGP file for nmdc:omprc-11-mbv2jc69", + "file_size_bytes": 15096930, + "md5_checksum": "9920b7e176b028a4d312a39163d247b4", + "data_object_type": "Assembly AGP", + "url": "https://data.microbiomedata.org/data/nmdc:omprc-11-mbv2jc69/nmdc:wfmgas-11-2cp0jv49.1/nmdc_wfmgas-11-2cp0jv49.1_assembly.agp", + "type": "nmdc:DataObject" + }, + { + "id": "nmdc:dobj-11-e89whc43", + "name": "nmdc_wfmgas-11-2cp0jv49.1_pairedMapped_sorted.bam", + "description": "Metagenome Alignment BAM file for nmdc:omprc-11-mbv2jc69", + "file_size_bytes": 0, + "md5_checksum": "d41d8cd98f00b204e9800998ecf8427e", + "data_object_type": "Assembly Coverage BAM", + "url": "https://data.microbiomedata.org/data/nmdc:omprc-11-mbv2jc69/nmdc:wfmgas-11-2cp0jv49.1/nmdc_wfmgas-11-2cp0jv49.1_pairedMapped_sorted.bam", + "type": "nmdc:DataObject" + }, + { + "id": "nmdc:dobj-11-68an4v58", + "name": "nmdc_wfrbt-11-jc67mz77.1_gottcha2_report.tsv", + "description": "Gottcha2 TSV report for nmdc:omprc-11-mbv2jc69", + "file_size_bytes": 4224, + "md5_checksum": "0526ea84f6e7893f5b6d62a32f81a199", + "data_object_type": "GOTTCHA2 Classification Report", + "url": "https://data.microbiomedata.org/data/nmdc:omprc-11-mbv2jc69/nmdc:wfrbt-11-jc67mz77.1/nmdc_wfrbt-11-jc67mz77.1_gottcha2_report.tsv", + "type": "nmdc:DataObject" + }, + { + "id": "nmdc:dobj-11-p6a0c577", + "name": "nmdc_wfrbt-11-jc67mz77.1_gottcha2_report_full.tsv", + "description": "Gottcha2 full TSV report for nmdc:omprc-11-mbv2jc69", + "file_size_bytes": 875501, + "md5_checksum": "1a7380f5adb59f36c98c840bf28ad4bd", + "data_object_type": "GOTTCHA2 Report Full", + "url": "https://data.microbiomedata.org/data/nmdc:omprc-11-mbv2jc69/nmdc:wfrbt-11-jc67mz77.1/nmdc_wfrbt-11-jc67mz77.1_gottcha2_report_full.tsv", + "type": "nmdc:DataObject" + }, + { + "id": "nmdc:dobj-11-xyy6pf66", + "name": "nmdc_wfrbt-11-jc67mz77.1_gottcha2_krona.html", + "description": "Gottcha2 Krona HTML report for nmdc:omprc-11-mbv2jc69", + "file_size_bytes": 238755, + "md5_checksum": "366ab38bb6de9591f31a086d42ac23d6", + "data_object_type": "GOTTCHA2 Krona Plot", + "url": "https://data.microbiomedata.org/data/nmdc:omprc-11-mbv2jc69/nmdc:wfrbt-11-jc67mz77.1/nmdc_wfrbt-11-jc67mz77.1_gottcha2_krona.html", + "type": "nmdc:DataObject" + }, + { + "id": "nmdc:dobj-11-gmd3dr88", + "name": "nmdc_wfrbt-11-jc67mz77.1_centrifuge_classification.tsv", + "description": "Centrifuge classification TSV report for nmdc:omprc-11-mbv2jc69", + "file_size_bytes": 2051793471, + "md5_checksum": "c44ba44bc6910c2f3ed3a60a52b4a616", + "data_object_type": "Centrifuge Taxonomic Classification", + "url": "https://data.microbiomedata.org/data/nmdc:omprc-11-mbv2jc69/nmdc:wfrbt-11-jc67mz77.1/nmdc_wfrbt-11-jc67mz77.1_centrifuge_classification.tsv", + "type": "nmdc:DataObject" + }, + { + "id": "nmdc:dobj-11-p0f99y83", + "name": "nmdc_wfrbt-11-jc67mz77.1_centrifuge_report.tsv", + "description": "Centrifuge TSV report for nmdc:omprc-11-mbv2jc69", + "file_size_bytes": 256560, + "md5_checksum": "0ca043b630ba304cb80603e8332c78cf", + "data_object_type": "Centrifuge Classification Report", + "url": "https://data.microbiomedata.org/data/nmdc:omprc-11-mbv2jc69/nmdc:wfrbt-11-jc67mz77.1/nmdc_wfrbt-11-jc67mz77.1_centrifuge_report.tsv", + "type": "nmdc:DataObject" + }, + { + "id": "nmdc:dobj-11-3z6pq318", + "name": "nmdc_wfrbt-11-jc67mz77.1_centrifuge_krona.html", + "description": "Centrifuge Krona HTML report for nmdc:omprc-11-mbv2jc69", + "file_size_bytes": 2334325, + "md5_checksum": "059ff39ced52c0df45a331c4e9e10fdd", + "data_object_type": "Centrifuge Krona Plot", + "url": "https://data.microbiomedata.org/data/nmdc:omprc-11-mbv2jc69/nmdc:wfrbt-11-jc67mz77.1/nmdc_wfrbt-11-jc67mz77.1_centrifuge_krona.html", + "type": "nmdc:DataObject" + }, + { + "id": "nmdc:dobj-11-4sjfzz92", + "name": "nmdc_wfrbt-11-jc67mz77.1_kraken2_classification.tsv", + "description": "Kraken classification TSV report for nmdc:omprc-11-mbv2jc69", + "file_size_bytes": 1649071235, + "md5_checksum": "7bfa3b5b29ec5cf9882251585d99f9bf", + "data_object_type": "Kraken2 Taxonomic Classification", + "url": "https://data.microbiomedata.org/data/nmdc:omprc-11-mbv2jc69/nmdc:wfrbt-11-jc67mz77.1/nmdc_wfrbt-11-jc67mz77.1_kraken2_classification.tsv", + "type": "nmdc:DataObject" + }, + { + "id": "nmdc:dobj-11-trfp8j29", + "name": "nmdc_wfrbt-11-jc67mz77.1_kraken2_report.tsv", + "description": "Kraken2 TSV report for nmdc:omprc-11-mbv2jc69", + "file_size_bytes": 654782, + "md5_checksum": "2fceef0aaf1c3c3e3edd8d69bb72c8d3", + "data_object_type": "Kraken2 Classification Report", + "url": "https://data.microbiomedata.org/data/nmdc:omprc-11-mbv2jc69/nmdc:wfrbt-11-jc67mz77.1/nmdc_wfrbt-11-jc67mz77.1_kraken2_report.tsv", + "type": "nmdc:DataObject" + }, + { + "id": "nmdc:dobj-11-ckpzdz85", + "name": "nmdc_wfrbt-11-jc67mz77.1_kraken2_krona.html", + "description": "Kraken2 Krona HTML report for nmdc:omprc-11-mbv2jc69", + "file_size_bytes": 3988988, + "md5_checksum": "678e7c401a6971629f7d3ada83b307ab", + "data_object_type": "Kraken2 Krona Plot", + "url": "https://data.microbiomedata.org/data/nmdc:omprc-11-mbv2jc69/nmdc:wfrbt-11-jc67mz77.1/nmdc_wfrbt-11-jc67mz77.1_kraken2_krona.html", + "type": "nmdc:DataObject" + } + ], + "metagenome_assembly_set": [ + { + "id": "nmdc:wfmgas-11-2cp0jv49.1", + "name": "Metagenome Assembly Activity for nmdc:omprc-11-mbv2jc69", + "started_at_time": "2021-10-11T02:23:30Z", + "ended_at_time": "2021-10-11T04:49:55+00:00", + "was_informed_by": "nmdc:omprc-11-mbv2jc69", + "execution_resource": "NERSC-Cori", + "git_url": "https://github.com/microbiomedata/metaAssembly", + "has_input": [ + "nmdc:dobj-11-r57zvb61" + ], + "has_output": [ + "nmdc:dobj-11-67mdw562", + "nmdc:dobj-11-2vkjm955", + "nmdc:dobj-11-vy8v0k86", + "nmdc:dobj-11-76gdsn13", + "nmdc:dobj-11-e89whc43" + ], + "type": "nmdc:MetagenomeAssembly", + "part_of": [ + "nmdc:omprc-11-mbv2jc69" + ], + "version": "v1.0.2", + "asm_score": 5.751, + "scaffolds": 175734, + "scaf_logsum": 142614, + "scaf_powsum": 15897, + "scaf_max": 33833, + "scaf_bp": 78220371, + "scaf_n50": 53321, + "scaf_n90": 150048, + "scaf_l50": 412, + "scaf_l90": 286, + "contigs": 175824, + "contig_bp": 78219291, + "ctg_n50": 53340, + "ctg_l50": 412, + "ctg_n90": 150131, + "ctg_l90": 286, + "ctg_logsum": 142091, + "ctg_powsum": 15837, + "ctg_max": 33833, + "gap_pct": 0.00138, + "gc_std": 0.09424, + "gc_avg": 0.62214 + } + ], + "omics_processing_set": [ + { + "id": "nmdc:omprc-11-mbv2jc69", + "name": "Riverbed sediment microbial communities from areas with no vegetation in Columbia River, Washington, USA - GW-RW N2_50_60", + "description": "Riverbed sediment samples were collected from an area with no vegetation in a hyporheic zone (subsurface groundwater - surface water mixing zone)", + "has_input": [ + "nmdc:bsm-11-jdgzjq31" + ], + "add_date": "2016-01-11", + "gold_sequencing_project_identifiers": [ + "gold:Gp0127634" + ], + "has_output": [ + "nmdc:dobj-11-y27yqy58" + ], + "mod_date": "2021-06-15", + "ncbi_project_name": "Riverbed sediment microbial communities from areas with no vegetation in Columbia River, Washington, USA - GW-RW N2_50_60", + "omics_type": { + "has_raw_value": "Metagenome" + }, + "part_of": [ + "nmdc:sty-11-aygzgv51" + ], + "principal_investigator": { + "has_raw_value": "James Stegen" + }, + "processing_institution": "JGI", + "type": "nmdc:OmicsProcessing" + } + ], + "read_qc_analysis_activity_set": [ + { + "id": "nmdc:wfrqc-11-xgbfpn47.1", + "name": "Read QC Activity for nmdc:omprc-11-mbv2jc69", + "started_at_time": "2021-10-11T02:23:30Z", + "ended_at_time": "2021-10-11T04:49:55+00:00", + "was_informed_by": "nmdc:omprc-11-mbv2jc69", + "execution_resource": "NERSC-Cori", + "git_url": "https://github.com/microbiomedata/ReadsQC", + "has_input": [ + "nmdc:dobj-11-y27yqy58" + ], + "has_output": [ + "nmdc:dobj-11-r57zvb61", + "nmdc:dobj-11-7ptqj932" + ], + "type": "nmdc:ReadQcAnalysisActivity", + "part_of": [ + "nmdc:omprc-11-mbv2jc69" + ], + "version": "v1.0.2", + "input_read_count": 29872658, + "output_read_count": 27896694, + "input_read_bases": 4510771358, + "output_read_bases": 4172764161 + } + ], + "read_based_taxonomy_analysis_activity_set": [ + { + "id": "nmdc:wfrbt-11-jc67mz77.1", + "name": "Readbased Taxonomy Analysis Activity for nmdc:omprc-11-mbv2jc69", + "started_at_time": "2021-10-11T02:23:30Z", + "ended_at_time": "2021-10-11T04:49:55+00:00", + "was_informed_by": "nmdc:omprc-11-mbv2jc69", + "execution_resource": "NERSC-Cori", + "git_url": "https://github.com/microbiomedata/ReadbasedAnalysis", + "has_input": [ + "nmdc:dobj-11-r57zvb61" + ], + "has_output": [ + "nmdc:dobj-11-68an4v58", + "nmdc:dobj-11-p6a0c577", + "nmdc:dobj-11-xyy6pf66", + "nmdc:dobj-11-gmd3dr88", + "nmdc:dobj-11-p0f99y83", + "nmdc:dobj-11-3z6pq318", + "nmdc:dobj-11-4sjfzz92", + "nmdc:dobj-11-trfp8j29", + "nmdc:dobj-11-ckpzdz85" + ], + "type": "nmdc:ReadBasedTaxonomyAnalysisActivity", + "part_of": [ + "nmdc:omprc-11-mbv2jc69" + ], + "version": "v1.0.2" + } + ] + }, + { + "data_object_set": [ + { + "id": "nmdc:dobj-11-mmsvm346", + "name": "10533.1.165310.GTTCGGT-AACCGAA.fastq.gz", + "description": "Raw sequencer read data", + "file_size_bytes": 2197847748, + "data_object_type": "Metagenome Raw Reads", + "type": "nmdc:DataObject" + }, + { + "id": "nmdc:dobj-11-7dk5qb15", + "name": "nmdc_wfrqc-11-nwsaz354.1_filtered.fastq.gz", + "description": "Filtered Reads for nmdc:omprc-11-kc23zq65", + "file_size_bytes": 1951049105, + "md5_checksum": "f8bc16e232f7ba0f6d6b5ca35a708c36", + "data_object_type": "Filtered Sequencing Reads", + "url": "https://data.microbiomedata.org/data/nmdc:omprc-11-kc23zq65/nmdc:wfrqc-11-nwsaz354.1/nmdc_wfrqc-11-nwsaz354.1_filtered.fastq.gz", + "type": "nmdc:DataObject" + }, + { + "id": "nmdc:dobj-11-n43spc33", + "name": "nmdc_wfrqc-11-nwsaz354.1_filterStats.txt", + "description": "Filtered Stats for nmdc:omprc-11-kc23zq65", + "file_size_bytes": 280, + "md5_checksum": "fbc260443529d6e8067efdac3b58a8c1", + "data_object_type": "QC Statistics", + "url": "https://data.microbiomedata.org/data/nmdc:omprc-11-kc23zq65/nmdc:wfrqc-11-nwsaz354.1/nmdc_wfrqc-11-nwsaz354.1_filterStats.txt", + "type": "nmdc:DataObject" + }, + { + "id": "nmdc:dobj-11-7vpfqw97", + "name": "nmdc_wfmgas-11-9z3krp31.1_contigs.fna", + "description": "Assembled contigs fasta for nmdc:omprc-11-kc23zq65", + "file_size_bytes": 113205152, + "md5_checksum": "877f35c13b865c4313bfeeead1c9a637", + "data_object_type": "Assembly Contigs", + "url": "https://data.microbiomedata.org/data/nmdc:omprc-11-kc23zq65/nmdc:wfmgas-11-9z3krp31.1/nmdc_wfmgas-11-9z3krp31.1_contigs.fna", + "type": "nmdc:DataObject" + }, + { + "id": "nmdc:dobj-11-f5sws914", + "name": "nmdc_wfmgas-11-9z3krp31.1_scaffolds.fna", + "description": "Assembled scaffold fasta for nmdc:omprc-11-kc23zq65", + "file_size_bytes": 112582261, + "md5_checksum": "e458d704511ec37f1e351870637a5749", + "data_object_type": "Assembly Scaffolds", + "url": "https://data.microbiomedata.org/data/nmdc:omprc-11-kc23zq65/nmdc:wfmgas-11-9z3krp31.1/nmdc_wfmgas-11-9z3krp31.1_scaffolds.fna", + "type": "nmdc:DataObject" + }, + { + "id": "nmdc:dobj-11-b5shqf06", + "name": "nmdc_wfmgas-11-9z3krp31.1_covstats.txt", + "description": "Metagenome Contig Coverage Stats for nmdc:omprc-11-kc23zq65", + "file_size_bytes": 17638512, + "md5_checksum": "6f05c26d2a4bb08af3bad659192b5a0e", + "data_object_type": "Assembly Coverage Stats", + "url": "https://data.microbiomedata.org/data/nmdc:omprc-11-kc23zq65/nmdc:wfmgas-11-9z3krp31.1/nmdc_wfmgas-11-9z3krp31.1_covstats.txt", + "type": "nmdc:DataObject" + }, + { + "id": "nmdc:dobj-11-cdbg1d34", + "name": "nmdc_wfmgas-11-9z3krp31.1_assembly.agp", + "description": "Assembled AGP file for nmdc:omprc-11-kc23zq65", + "file_size_bytes": 17808321, + "md5_checksum": "8bfdac03c3d0e99e094b5e7f15da8ee4", + "data_object_type": "Assembly AGP", + "url": "https://data.microbiomedata.org/data/nmdc:omprc-11-kc23zq65/nmdc:wfmgas-11-9z3krp31.1/nmdc_wfmgas-11-9z3krp31.1_assembly.agp", + "type": "nmdc:DataObject" + }, + { + "id": "nmdc:dobj-11-s5gef030", + "name": "nmdc_wfmgas-11-9z3krp31.1_pairedMapped_sorted.bam", + "description": "Metagenome Alignment BAM file for nmdc:omprc-11-kc23zq65", + "file_size_bytes": 0, + "md5_checksum": "d41d8cd98f00b204e9800998ecf8427e", + "data_object_type": "Assembly Coverage BAM", + "url": "https://data.microbiomedata.org/data/nmdc:omprc-11-kc23zq65/nmdc:wfmgas-11-9z3krp31.1/nmdc_wfmgas-11-9z3krp31.1_pairedMapped_sorted.bam", + "type": "nmdc:DataObject" + }, + { + "id": "nmdc:dobj-11-6wmv9d89", + "name": "nmdc_wfrbt-11-skex5j86.1_gottcha2_report.tsv", + "description": "Gottcha2 TSV report for nmdc:omprc-11-kc23zq65", + "file_size_bytes": 3696, + "md5_checksum": "d8a410c52c8f6cf0097b674492cc3926", + "data_object_type": "GOTTCHA2 Classification Report", + "url": "https://data.microbiomedata.org/data/nmdc:omprc-11-kc23zq65/nmdc:wfrbt-11-skex5j86.1/nmdc_wfrbt-11-skex5j86.1_gottcha2_report.tsv", + "type": "nmdc:DataObject" + }, + { + "id": "nmdc:dobj-11-vz7jj577", + "name": "nmdc_wfrbt-11-skex5j86.1_gottcha2_report_full.tsv", + "description": "Gottcha2 full TSV report for nmdc:omprc-11-kc23zq65", + "file_size_bytes": 677459, + "md5_checksum": "ddec46781153da60da815c65871f5413", + "data_object_type": "GOTTCHA2 Report Full", + "url": "https://data.microbiomedata.org/data/nmdc:omprc-11-kc23zq65/nmdc:wfrbt-11-skex5j86.1/nmdc_wfrbt-11-skex5j86.1_gottcha2_report_full.tsv", + "type": "nmdc:DataObject" + }, + { + "id": "nmdc:dobj-11-zxj1ap31", + "name": "nmdc_wfrbt-11-skex5j86.1_gottcha2_krona.html", + "description": "Gottcha2 Krona HTML report for nmdc:omprc-11-kc23zq65", + "file_size_bytes": 236164, + "md5_checksum": "e626ec18dba4885613240927cbb99d8b", + "data_object_type": "GOTTCHA2 Krona Plot", + "url": "https://data.microbiomedata.org/data/nmdc:omprc-11-kc23zq65/nmdc:wfrbt-11-skex5j86.1/nmdc_wfrbt-11-skex5j86.1_gottcha2_krona.html", + "type": "nmdc:DataObject" + }, + { + "id": "nmdc:dobj-11-bjgwxw90", + "name": "nmdc_wfrbt-11-skex5j86.1_centrifuge_classification.tsv", + "description": "Centrifuge classification TSV report for nmdc:omprc-11-kc23zq65", + "file_size_bytes": 1796179546, + "md5_checksum": "f8486e4ee029038a452a3484db10cabc", + "data_object_type": "Centrifuge Taxonomic Classification", + "url": "https://data.microbiomedata.org/data/nmdc:omprc-11-kc23zq65/nmdc:wfrbt-11-skex5j86.1/nmdc_wfrbt-11-skex5j86.1_centrifuge_classification.tsv", + "type": "nmdc:DataObject" + }, + { + "id": "nmdc:dobj-11-bcraza13", + "name": "nmdc_wfrbt-11-skex5j86.1_centrifuge_report.tsv", + "description": "Centrifuge TSV report for nmdc:omprc-11-kc23zq65", + "file_size_bytes": 254661, + "md5_checksum": "4121f2ec52b80b7feb9d9a4749080125", + "data_object_type": "Centrifuge Classification Report", + "url": "https://data.microbiomedata.org/data/nmdc:omprc-11-kc23zq65/nmdc:wfrbt-11-skex5j86.1/nmdc_wfrbt-11-skex5j86.1_centrifuge_report.tsv", + "type": "nmdc:DataObject" + }, + { + "id": "nmdc:dobj-11-mnjd0e22", + "name": "nmdc_wfrbt-11-skex5j86.1_centrifuge_krona.html", + "description": "Centrifuge Krona HTML report for nmdc:omprc-11-kc23zq65", + "file_size_bytes": 2333534, + "md5_checksum": "5b8c1cd8ba47041c20d3e18cb902a854", + "data_object_type": "Centrifuge Krona Plot", + "url": "https://data.microbiomedata.org/data/nmdc:omprc-11-kc23zq65/nmdc:wfrbt-11-skex5j86.1/nmdc_wfrbt-11-skex5j86.1_centrifuge_krona.html", + "type": "nmdc:DataObject" + }, + { + "id": "nmdc:dobj-11-sb0pqd59", + "name": "nmdc_wfrbt-11-skex5j86.1_kraken2_classification.tsv", + "description": "Kraken classification TSV report for nmdc:omprc-11-kc23zq65", + "file_size_bytes": 1432249556, + "md5_checksum": "59807dae5216b11c96df5593a26d9a88", + "data_object_type": "Kraken2 Taxonomic Classification", + "url": "https://data.microbiomedata.org/data/nmdc:omprc-11-kc23zq65/nmdc:wfrbt-11-skex5j86.1/nmdc_wfrbt-11-skex5j86.1_kraken2_classification.tsv", + "type": "nmdc:DataObject" + }, + { + "id": "nmdc:dobj-11-8ra1j498", + "name": "nmdc_wfrbt-11-skex5j86.1_kraken2_report.tsv", + "description": "Kraken2 TSV report for nmdc:omprc-11-kc23zq65", + "file_size_bytes": 639738, + "md5_checksum": "a491f6797bd7294dbc5ba301efb3466e", + "data_object_type": "Kraken2 Classification Report", + "url": "https://data.microbiomedata.org/data/nmdc:omprc-11-kc23zq65/nmdc:wfrbt-11-skex5j86.1/nmdc_wfrbt-11-skex5j86.1_kraken2_report.tsv", + "type": "nmdc:DataObject" + }, + { + "id": "nmdc:dobj-11-dmhk0z85", + "name": "nmdc_wfrbt-11-skex5j86.1_kraken2_krona.html", + "description": "Kraken2 Krona HTML report for nmdc:omprc-11-kc23zq65", + "file_size_bytes": 3996293, + "md5_checksum": "6748020214a3d68ad588e3548107208e", + "data_object_type": "Kraken2 Krona Plot", + "url": "https://data.microbiomedata.org/data/nmdc:omprc-11-kc23zq65/nmdc:wfrbt-11-skex5j86.1/nmdc_wfrbt-11-skex5j86.1_kraken2_krona.html", + "type": "nmdc:DataObject" + } + ], + "metagenome_assembly_set": [ + { + "id": "nmdc:wfmgas-11-9z3krp31.1", + "name": "Metagenome Assembly Activity for nmdc:omprc-11-kc23zq65", + "started_at_time": "2021-10-11T02:26:59Z", + "ended_at_time": "2021-10-11T04:11:48+00:00", + "was_informed_by": "nmdc:omprc-11-kc23zq65", + "execution_resource": "NERSC-Cori", + "git_url": "https://github.com/microbiomedata/metaAssembly", + "has_input": [ + "nmdc:dobj-11-7dk5qb15" + ], + "has_output": [ + "nmdc:dobj-11-7vpfqw97", + "nmdc:dobj-11-f5sws914", + "nmdc:dobj-11-b5shqf06", + "nmdc:dobj-11-cdbg1d34", + "nmdc:dobj-11-s5gef030" + ], + "type": "nmdc:MetagenomeAssembly", + "part_of": [ + "nmdc:omprc-11-kc23zq65" + ], + "version": "v1.0.2", + "asm_score": 3.934, + "scaffolds": 206599, + "scaf_logsum": 270403, + "scaf_powsum": 29544, + "scaf_max": 23775, + "scaf_bp": 103844032, + "scaf_n50": 55067, + "scaf_n90": 171721, + "scaf_l50": 497, + "scaf_l90": 290, + "contigs": 206757, + "contig_bp": 103842002, + "ctg_n50": 55322, + "ctg_l50": 496, + "ctg_n90": 171862, + "ctg_l90": 290, + "ctg_logsum": 269360, + "ctg_powsum": 29422, + "ctg_max": 23775, + "gap_pct": 0.00195, + "gc_std": 0.10033, + "gc_avg": 0.61621 + } + ], + "omics_processing_set": [ + { + "id": "nmdc:omprc-11-kc23zq65", + "name": "Riverbed sediment microbial communities from areas with no vegetation in Columbia River, Washington, USA - GW-RW N1_10_20", + "description": "Riverbed sediment samples were collected from an area with no vegetation in a hyporheic zone (subsurface groundwater - surface water mixing zone)", + "has_input": [ + "nmdc:bsm-11-59xteq78" + ], + "add_date": "2016-01-11", + "gold_sequencing_project_identifiers": [ + "gold:Gp0127635" + ], + "has_output": [ + "nmdc:dobj-11-mmsvm346" + ], + "mod_date": "2021-06-15", + "ncbi_project_name": "Riverbed sediment microbial communities from areas with no vegetation in Columbia River, Washington, USA - GW-RW N1_10_20", + "omics_type": { + "has_raw_value": "Metagenome" + }, + "part_of": [ + "nmdc:sty-11-aygzgv51" + ], + "principal_investigator": { + "has_raw_value": "James Stegen" + }, + "processing_institution": "JGI", + "type": "nmdc:OmicsProcessing" + } + ], + "read_qc_analysis_activity_set": [ + { + "id": "nmdc:wfrqc-11-nwsaz354.1", + "name": "Read QC Activity for nmdc:omprc-11-kc23zq65", + "started_at_time": "2021-10-11T02:26:59Z", + "ended_at_time": "2021-10-11T04:11:48+00:00", + "was_informed_by": "nmdc:omprc-11-kc23zq65", + "execution_resource": "NERSC-Cori", + "git_url": "https://github.com/microbiomedata/ReadsQC", + "has_input": [ + "nmdc:dobj-11-mmsvm346" + ], + "has_output": [ + "nmdc:dobj-11-7dk5qb15", + "nmdc:dobj-11-n43spc33" + ], + "type": "nmdc:ReadQcAnalysisActivity", + "part_of": [ + "nmdc:omprc-11-kc23zq65" + ], + "version": "v1.0.2", + "input_read_count": 25320866, + "output_read_count": 24600396, + "input_read_bases": 3823450766, + "output_read_bases": 3673182178 + } + ], + "read_based_taxonomy_analysis_activity_set": [ + { + "id": "nmdc:wfrbt-11-skex5j86.1", + "name": "Readbased Taxonomy Analysis Activity for nmdc:omprc-11-kc23zq65", + "started_at_time": "2021-10-11T02:26:59Z", + "ended_at_time": "2021-10-11T04:11:48+00:00", + "was_informed_by": "nmdc:omprc-11-kc23zq65", + "execution_resource": "NERSC-Cori", + "git_url": "https://github.com/microbiomedata/ReadbasedAnalysis", + "has_input": [ + "nmdc:dobj-11-7dk5qb15" + ], + "has_output": [ + "nmdc:dobj-11-6wmv9d89", + "nmdc:dobj-11-vz7jj577", + "nmdc:dobj-11-zxj1ap31", + "nmdc:dobj-11-bjgwxw90", + "nmdc:dobj-11-bcraza13", + "nmdc:dobj-11-mnjd0e22", + "nmdc:dobj-11-sb0pqd59", + "nmdc:dobj-11-8ra1j498", + "nmdc:dobj-11-dmhk0z85" + ], + "type": "nmdc:ReadBasedTaxonomyAnalysisActivity", + "part_of": [ + "nmdc:omprc-11-kc23zq65" + ], + "version": "v1.0.2" + } + ] + }, + { + "data_object_set": [ + { + "id": "nmdc:dobj-11-0vpq2471", + "name": "10533.1.165310.CGTAGGT-AACCTAC.fastq.gz", + "description": "Raw sequencer read data", + "file_size_bytes": 1954789686, + "data_object_type": "Metagenome Raw Reads", + "type": "nmdc:DataObject" + }, + { + "id": "nmdc:dobj-11-c5qr8837", + "name": "nmdc_wfrqc-11-z1v9ak25.1_filtered.fastq.gz", + "description": "Filtered Reads for nmdc:omprc-11-c8dzx197", + "file_size_bytes": 1553219358, + "md5_checksum": "805310f4b1e39a0cc9e5b5787576cb8b", + "data_object_type": "Filtered Sequencing Reads", + "url": "https://data.microbiomedata.org/data/nmdc:omprc-11-c8dzx197/nmdc:wfrqc-11-z1v9ak25.1/nmdc_wfrqc-11-z1v9ak25.1_filtered.fastq.gz", + "type": "nmdc:DataObject" + }, + { + "id": "nmdc:dobj-11-ytb0hm70", + "name": "nmdc_wfrqc-11-z1v9ak25.1_filterStats.txt", + "description": "Filtered Stats for nmdc:omprc-11-c8dzx197", + "file_size_bytes": 289, + "md5_checksum": "611e67df261e050860b1075c6a6a5ff5", + "data_object_type": "QC Statistics", + "url": "https://data.microbiomedata.org/data/nmdc:omprc-11-c8dzx197/nmdc:wfrqc-11-z1v9ak25.1/nmdc_wfrqc-11-z1v9ak25.1_filterStats.txt", + "type": "nmdc:DataObject" + }, + { + "id": "nmdc:dobj-11-e6kbzk68", + "name": "nmdc_wfmgas-11-fhj6wr32.1_contigs.fna", + "description": "Assembled contigs fasta for nmdc:omprc-11-c8dzx197", + "file_size_bytes": 118489955, + "md5_checksum": "f7cb2b43c84a34d3ee380a85b217516a", + "data_object_type": "Assembly Contigs", + "url": "https://data.microbiomedata.org/data/nmdc:omprc-11-c8dzx197/nmdc:wfmgas-11-fhj6wr32.1/nmdc_wfmgas-11-fhj6wr32.1_contigs.fna", + "type": "nmdc:DataObject" + }, + { + "id": "nmdc:dobj-11-pz3g8d24", + "name": "nmdc_wfmgas-11-fhj6wr32.1_scaffolds.fna", + "description": "Assembled scaffold fasta for nmdc:omprc-11-c8dzx197", + "file_size_bytes": 117843060, + "md5_checksum": "f6eead9147fe6a66dea51b3525756385", + "data_object_type": "Assembly Scaffolds", + "url": "https://data.microbiomedata.org/data/nmdc:omprc-11-c8dzx197/nmdc:wfmgas-11-fhj6wr32.1/nmdc_wfmgas-11-fhj6wr32.1_scaffolds.fna", + "type": "nmdc:DataObject" + }, + { + "id": "nmdc:dobj-11-gy2cwm17", + "name": "nmdc_wfmgas-11-fhj6wr32.1_covstats.txt", + "description": "Metagenome Contig Coverage Stats for nmdc:omprc-11-c8dzx197", + "file_size_bytes": 18326932, + "md5_checksum": "10b818a1396759dc8492c66fa928bb0b", + "data_object_type": "Assembly Coverage Stats", + "url": "https://data.microbiomedata.org/data/nmdc:omprc-11-c8dzx197/nmdc:wfmgas-11-fhj6wr32.1/nmdc_wfmgas-11-fhj6wr32.1_covstats.txt", + "type": "nmdc:DataObject" + }, + { + "id": "nmdc:dobj-11-3hf56j53", + "name": "nmdc_wfmgas-11-fhj6wr32.1_assembly.agp", + "description": "Assembled AGP file for nmdc:omprc-11-c8dzx197", + "file_size_bytes": 18511231, + "md5_checksum": "1a0aacf1eeb09407a1276722c74fe27b", + "data_object_type": "Assembly AGP", + "url": "https://data.microbiomedata.org/data/nmdc:omprc-11-c8dzx197/nmdc:wfmgas-11-fhj6wr32.1/nmdc_wfmgas-11-fhj6wr32.1_assembly.agp", + "type": "nmdc:DataObject" + }, + { + "id": "nmdc:dobj-11-561cpw08", + "name": "nmdc_wfmgas-11-fhj6wr32.1_pairedMapped_sorted.bam", + "description": "Metagenome Alignment BAM file for nmdc:omprc-11-c8dzx197", + "file_size_bytes": 0, + "md5_checksum": "d41d8cd98f00b204e9800998ecf8427e", + "data_object_type": "Assembly Coverage BAM", + "url": "https://data.microbiomedata.org/data/nmdc:omprc-11-c8dzx197/nmdc:wfmgas-11-fhj6wr32.1/nmdc_wfmgas-11-fhj6wr32.1_pairedMapped_sorted.bam", + "type": "nmdc:DataObject" + }, + { + "id": "nmdc:dobj-11-2k69w505", + "name": "nmdc_wfrbt-11-xk21p577.1_gottcha2_report.tsv", + "description": "Gottcha2 TSV report for nmdc:omprc-11-c8dzx197", + "file_size_bytes": 660, + "md5_checksum": "9268e073dacb7f7cd5f9513393cb0b2a", + "data_object_type": "GOTTCHA2 Classification Report", + "url": "https://data.microbiomedata.org/data/nmdc:omprc-11-c8dzx197/nmdc:wfrbt-11-xk21p577.1/nmdc_wfrbt-11-xk21p577.1_gottcha2_report.tsv", + "type": "nmdc:DataObject" + }, + { + "id": "nmdc:dobj-11-x2e0q336", + "name": "nmdc_wfrbt-11-xk21p577.1_gottcha2_report_full.tsv", + "description": "Gottcha2 full TSV report for nmdc:omprc-11-c8dzx197", + "file_size_bytes": 594054, + "md5_checksum": "37dd1d73ad47979ee5284830d27df535", + "data_object_type": "GOTTCHA2 Report Full", + "url": "https://data.microbiomedata.org/data/nmdc:omprc-11-c8dzx197/nmdc:wfrbt-11-xk21p577.1/nmdc_wfrbt-11-xk21p577.1_gottcha2_report_full.tsv", + "type": "nmdc:DataObject" + }, + { + "id": "nmdc:dobj-11-j3tcvf79", + "name": "nmdc_wfrbt-11-xk21p577.1_gottcha2_krona.html", + "description": "Gottcha2 Krona HTML report for nmdc:omprc-11-c8dzx197", + "file_size_bytes": 227750, + "md5_checksum": "43bffbfb830c6e3ccc140ec0dff1e773", + "data_object_type": "GOTTCHA2 Krona Plot", + "url": "https://data.microbiomedata.org/data/nmdc:omprc-11-c8dzx197/nmdc:wfrbt-11-xk21p577.1/nmdc_wfrbt-11-xk21p577.1_gottcha2_krona.html", + "type": "nmdc:DataObject" + }, + { + "id": "nmdc:dobj-11-mm6n3n18", + "name": "nmdc_wfrbt-11-xk21p577.1_centrifuge_classification.tsv", + "description": "Centrifuge classification TSV report for nmdc:omprc-11-c8dzx197", + "file_size_bytes": 1457058272, + "md5_checksum": "cb3bd5ca5088484cb4e580ad91d736b2", + "data_object_type": "Centrifuge Taxonomic Classification", + "url": "https://data.microbiomedata.org/data/nmdc:omprc-11-c8dzx197/nmdc:wfrbt-11-xk21p577.1/nmdc_wfrbt-11-xk21p577.1_centrifuge_classification.tsv", + "type": "nmdc:DataObject" + }, + { + "id": "nmdc:dobj-11-pwfz0g25", + "name": "nmdc_wfrbt-11-xk21p577.1_centrifuge_report.tsv", + "description": "Centrifuge TSV report for nmdc:omprc-11-c8dzx197", + "file_size_bytes": 251867, + "md5_checksum": "f44a5d59785cdededea0fe4a6a429c30", + "data_object_type": "Centrifuge Classification Report", + "url": "https://data.microbiomedata.org/data/nmdc:omprc-11-c8dzx197/nmdc:wfrbt-11-xk21p577.1/nmdc_wfrbt-11-xk21p577.1_centrifuge_report.tsv", + "type": "nmdc:DataObject" + }, + { + "id": "nmdc:dobj-11-wsd6gp78", + "name": "nmdc_wfrbt-11-xk21p577.1_centrifuge_krona.html", + "description": "Centrifuge Krona HTML report for nmdc:omprc-11-c8dzx197", + "file_size_bytes": 2325282, + "md5_checksum": "81a6efbd082e07bc2db174a88d64a272", + "data_object_type": "Centrifuge Krona Plot", + "url": "https://data.microbiomedata.org/data/nmdc:omprc-11-c8dzx197/nmdc:wfrbt-11-xk21p577.1/nmdc_wfrbt-11-xk21p577.1_centrifuge_krona.html", + "type": "nmdc:DataObject" + }, + { + "id": "nmdc:dobj-11-p1c4vv61", + "name": "nmdc_wfrbt-11-xk21p577.1_kraken2_classification.tsv", + "description": "Kraken classification TSV report for nmdc:omprc-11-c8dzx197", + "file_size_bytes": 1160106364, + "md5_checksum": "f63856a84bc9afb8954ccdb1803d5fde", + "data_object_type": "Kraken2 Taxonomic Classification", + "url": "https://data.microbiomedata.org/data/nmdc:omprc-11-c8dzx197/nmdc:wfrbt-11-xk21p577.1/nmdc_wfrbt-11-xk21p577.1_kraken2_classification.tsv", + "type": "nmdc:DataObject" + }, + { + "id": "nmdc:dobj-11-24twen58", + "name": "nmdc_wfrbt-11-xk21p577.1_kraken2_report.tsv", + "description": "Kraken2 TSV report for nmdc:omprc-11-c8dzx197", + "file_size_bytes": 613810, + "md5_checksum": "9a1826f66ee45187d627076d11dc491f", + "data_object_type": "Kraken2 Classification Report", + "url": "https://data.microbiomedata.org/data/nmdc:omprc-11-c8dzx197/nmdc:wfrbt-11-xk21p577.1/nmdc_wfrbt-11-xk21p577.1_kraken2_report.tsv", + "type": "nmdc:DataObject" + }, + { + "id": "nmdc:dobj-11-35ae1336", + "name": "nmdc_wfrbt-11-xk21p577.1_kraken2_krona.html", + "description": "Kraken2 Krona HTML report for nmdc:omprc-11-c8dzx197", + "file_size_bytes": 3853908, + "md5_checksum": "67adb9cc2c75251f556a90b1a959ea72", + "data_object_type": "Kraken2 Krona Plot", + "url": "https://data.microbiomedata.org/data/nmdc:omprc-11-c8dzx197/nmdc:wfrbt-11-xk21p577.1/nmdc_wfrbt-11-xk21p577.1_kraken2_krona.html", + "type": "nmdc:DataObject" + } + ], + "metagenome_assembly_set": [ + { + "id": "nmdc:wfmgas-11-fhj6wr32.1", + "name": "Metagenome Assembly Activity for nmdc:omprc-11-c8dzx197", + "started_at_time": "2021-10-11T02:24:01Z", + "ended_at_time": "2021-10-11T03:11:56+00:00", + "was_informed_by": "nmdc:omprc-11-c8dzx197", + "execution_resource": "NERSC-Cori", + "git_url": "https://github.com/microbiomedata/metaAssembly", + "has_input": [ + "nmdc:dobj-11-c5qr8837" + ], + "has_output": [ + "nmdc:dobj-11-e6kbzk68", + "nmdc:dobj-11-pz3g8d24", + "nmdc:dobj-11-gy2cwm17", + "nmdc:dobj-11-3hf56j53", + "nmdc:dobj-11-561cpw08" + ], + "type": "nmdc:MetagenomeAssembly", + "part_of": [ + "nmdc:omprc-11-c8dzx197" + ], + "version": "v1.0.2", + "asm_score": 5.062, + "scaffolds": 214737, + "scaf_logsum": 272416, + "scaf_powsum": 29983, + "scaf_max": 43650, + "scaf_bp": 108741284, + "scaf_n50": 58469, + "scaf_n90": 177412, + "scaf_l50": 505, + "scaf_l90": 294, + "contigs": 214863, + "contig_bp": 108739484, + "ctg_n50": 58474, + "ctg_l50": 505, + "ctg_n90": 177521, + "ctg_l90": 294, + "ctg_logsum": 271617, + "ctg_powsum": 29885, + "ctg_max": 43650, + "gap_pct": 0.00166, + "gc_std": 0.08814, + "gc_avg": 0.63266 + } + ], + "omics_processing_set": [ + { + "id": "nmdc:omprc-11-c8dzx197", + "name": "Riverbed sediment microbial communities from areas with vegetation nearby in Columbia River, Washington, USA - GW-RW S1_0_10", + "description": "Riverbed sediment samples were collected from an area with a lot of surrounding vegetation in a hyporheic zone (subsurface groundwater - surface water mixing zone)", + "has_input": [ + "nmdc:bsm-11-kwfbp795" + ], + "add_date": "2016-01-11", + "gold_sequencing_project_identifiers": [ + "gold:Gp0127637" + ], + "has_output": [ + "nmdc:dobj-11-0vpq2471" + ], + "mod_date": "2021-06-15", + "ncbi_project_name": "Riverbed sediment microbial communities from areas with vegetation nearby in Columbia River, Washington, USA - GW-RW S1_0_10", + "omics_type": { + "has_raw_value": "Metagenome" + }, + "part_of": [ + "nmdc:sty-11-aygzgv51" + ], + "principal_investigator": { + "has_raw_value": "James Stegen" + }, + "processing_institution": "JGI", + "type": "nmdc:OmicsProcessing" + } + ], + "read_qc_analysis_activity_set": [ + { + "id": "nmdc:wfrqc-11-z1v9ak25.1", + "name": "Read QC Activity for nmdc:omprc-11-c8dzx197", + "started_at_time": "2021-10-11T02:24:01Z", + "ended_at_time": "2021-10-11T03:11:56+00:00", + "was_informed_by": "nmdc:omprc-11-c8dzx197", + "execution_resource": "NERSC-Cori", + "git_url": "https://github.com/microbiomedata/ReadsQC", + "has_input": [ + "nmdc:dobj-11-0vpq2471" + ], + "has_output": [ + "nmdc:dobj-11-c5qr8837", + "nmdc:dobj-11-ytb0hm70" + ], + "type": "nmdc:ReadQcAnalysisActivity", + "part_of": [ + "nmdc:omprc-11-c8dzx197" + ], + "version": "v1.0.2", + "input_read_count": 24239336, + "output_read_count": 19917090, + "input_read_bases": 3660139736, + "output_read_bases": 2975652755 + } + ], + "read_based_taxonomy_analysis_activity_set": [ + { + "id": "nmdc:wfrbt-11-xk21p577.1", + "name": "Readbased Taxonomy Analysis Activity for nmdc:omprc-11-c8dzx197", + "started_at_time": "2021-10-11T02:24:01Z", + "ended_at_time": "2021-10-11T03:11:56+00:00", + "was_informed_by": "nmdc:omprc-11-c8dzx197", + "execution_resource": "NERSC-Cori", + "git_url": "https://github.com/microbiomedata/ReadbasedAnalysis", + "has_input": [ + "nmdc:dobj-11-c5qr8837" + ], + "has_output": [ + "nmdc:dobj-11-2k69w505", + "nmdc:dobj-11-x2e0q336", + "nmdc:dobj-11-j3tcvf79", + "nmdc:dobj-11-mm6n3n18", + "nmdc:dobj-11-pwfz0g25", + "nmdc:dobj-11-wsd6gp78", + "nmdc:dobj-11-p1c4vv61", + "nmdc:dobj-11-24twen58", + "nmdc:dobj-11-35ae1336" + ], + "type": "nmdc:ReadBasedTaxonomyAnalysisActivity", + "part_of": [ + "nmdc:omprc-11-c8dzx197" + ], + "version": "v1.0.2" + } + ] + }, + { + "data_object_set": [ + { + "id": "nmdc:dobj-11-assd7y33", + "name": "10533.2.165322.TCATCAC-GGTGATG.fastq.gz", + "description": "Raw sequencer read data", + "file_size_bytes": 1920284821, + "data_object_type": "Metagenome Raw Reads", + "type": "nmdc:DataObject" + }, + { + "id": "nmdc:dobj-11-ma9wn665", + "name": "nmdc_wfrqc-11-rp34gx65.1_filtered.fastq.gz", + "description": "Filtered Reads for nmdc:omprc-11-tgxmb243", + "file_size_bytes": 1649318115, + "md5_checksum": "56ba2416c050decd6c16c618c1e4a752", + "data_object_type": "Filtered Sequencing Reads", + "url": "https://data.microbiomedata.org/data/nmdc:omprc-11-tgxmb243/nmdc:wfrqc-11-rp34gx65.1/nmdc_wfrqc-11-rp34gx65.1_filtered.fastq.gz", + "type": "nmdc:DataObject" + }, + { + "id": "nmdc:dobj-11-p591nw13", + "name": "nmdc_wfrqc-11-rp34gx65.1_filterStats.txt", + "description": "Filtered Stats for nmdc:omprc-11-tgxmb243", + "file_size_bytes": 283, + "md5_checksum": "5c9398042e9ff608befa78e86597bdf0", + "data_object_type": "QC Statistics", + "url": "https://data.microbiomedata.org/data/nmdc:omprc-11-tgxmb243/nmdc:wfrqc-11-rp34gx65.1/nmdc_wfrqc-11-rp34gx65.1_filterStats.txt", + "type": "nmdc:DataObject" + }, + { + "id": "nmdc:dobj-11-51qmwd41", + "name": "nmdc_wfmgas-11-qtd0ns28.1_contigs.fna", + "description": "Assembled contigs fasta for nmdc:omprc-11-tgxmb243", + "file_size_bytes": 85325246, + "md5_checksum": "b6ad2dbbcbcbbbf8d0b368cbed22ccf1", + "data_object_type": "Assembly Contigs", + "url": "https://data.microbiomedata.org/data/nmdc:omprc-11-tgxmb243/nmdc:wfmgas-11-qtd0ns28.1/nmdc_wfmgas-11-qtd0ns28.1_contigs.fna", + "type": "nmdc:DataObject" + }, + { + "id": "nmdc:dobj-11-gqwc3x40", + "name": "nmdc_wfmgas-11-qtd0ns28.1_scaffolds.fna", + "description": "Assembled scaffold fasta for nmdc:omprc-11-tgxmb243", + "file_size_bytes": 84814670, + "md5_checksum": "8f6c31cea9f705f1070a4fb6c1c7bcd7", + "data_object_type": "Assembly Scaffolds", + "url": "https://data.microbiomedata.org/data/nmdc:omprc-11-tgxmb243/nmdc:wfmgas-11-qtd0ns28.1/nmdc_wfmgas-11-qtd0ns28.1_scaffolds.fna", + "type": "nmdc:DataObject" + }, + { + "id": "nmdc:dobj-11-527yxs26", + "name": "nmdc_wfmgas-11-qtd0ns28.1_covstats.txt", + "description": "Metagenome Contig Coverage Stats for nmdc:omprc-11-tgxmb243", + "file_size_bytes": 14431981, + "md5_checksum": "3457c7965a687952de0f69eead80b3c0", + "data_object_type": "Assembly Coverage Stats", + "url": "https://data.microbiomedata.org/data/nmdc:omprc-11-tgxmb243/nmdc:wfmgas-11-qtd0ns28.1/nmdc_wfmgas-11-qtd0ns28.1_covstats.txt", + "type": "nmdc:DataObject" + }, + { + "id": "nmdc:dobj-11-y8yx1e22", + "name": "nmdc_wfmgas-11-qtd0ns28.1_assembly.agp", + "description": "Assembled AGP file for nmdc:omprc-11-tgxmb243", + "file_size_bytes": 14563404, + "md5_checksum": "f9b9efdcffa75e510f2abdcd7d5c7b51", + "data_object_type": "Assembly AGP", + "url": "https://data.microbiomedata.org/data/nmdc:omprc-11-tgxmb243/nmdc:wfmgas-11-qtd0ns28.1/nmdc_wfmgas-11-qtd0ns28.1_assembly.agp", + "type": "nmdc:DataObject" + }, + { + "id": "nmdc:dobj-11-gb4qq971", + "name": "nmdc_wfmgas-11-qtd0ns28.1_pairedMapped_sorted.bam", + "description": "Metagenome Alignment BAM file for nmdc:omprc-11-tgxmb243", + "file_size_bytes": 0, + "md5_checksum": "d41d8cd98f00b204e9800998ecf8427e", + "data_object_type": "Assembly Coverage BAM", + "url": "https://data.microbiomedata.org/data/nmdc:omprc-11-tgxmb243/nmdc:wfmgas-11-qtd0ns28.1/nmdc_wfmgas-11-qtd0ns28.1_pairedMapped_sorted.bam", + "type": "nmdc:DataObject" + }, + { + "id": "nmdc:dobj-11-g05eb923", + "name": "nmdc_wfrbt-11-f7ymgp41.1_gottcha2_report.tsv", + "description": "Gottcha2 TSV report for nmdc:omprc-11-tgxmb243", + "file_size_bytes": 2025, + "md5_checksum": "dbbd6ca6777b71d1fac4aae2cd947deb", + "data_object_type": "GOTTCHA2 Classification Report", + "url": "https://data.microbiomedata.org/data/nmdc:omprc-11-tgxmb243/nmdc:wfrbt-11-f7ymgp41.1/nmdc_wfrbt-11-f7ymgp41.1_gottcha2_report.tsv", + "type": "nmdc:DataObject" + }, + { + "id": "nmdc:dobj-11-cgb78v45", + "name": "nmdc_wfrbt-11-f7ymgp41.1_gottcha2_report_full.tsv", + "description": "Gottcha2 full TSV report for nmdc:omprc-11-tgxmb243", + "file_size_bytes": 655633, + "md5_checksum": "b6de56746a284f8226dd86817c8ae04e", + "data_object_type": "GOTTCHA2 Report Full", + "url": "https://data.microbiomedata.org/data/nmdc:omprc-11-tgxmb243/nmdc:wfrbt-11-f7ymgp41.1/nmdc_wfrbt-11-f7ymgp41.1_gottcha2_report_full.tsv", + "type": "nmdc:DataObject" + }, + { + "id": "nmdc:dobj-11-wk5ahy53", + "name": "nmdc_wfrbt-11-f7ymgp41.1_gottcha2_krona.html", + "description": "Gottcha2 Krona HTML report for nmdc:omprc-11-tgxmb243", + "file_size_bytes": 232133, + "md5_checksum": "d9572e708af9f0a06e98cfddfb298359", + "data_object_type": "GOTTCHA2 Krona Plot", + "url": "https://data.microbiomedata.org/data/nmdc:omprc-11-tgxmb243/nmdc:wfrbt-11-f7ymgp41.1/nmdc_wfrbt-11-f7ymgp41.1_gottcha2_krona.html", + "type": "nmdc:DataObject" + }, + { + "id": "nmdc:dobj-11-yf4d2m97", + "name": "nmdc_wfrbt-11-f7ymgp41.1_centrifuge_classification.tsv", + "description": "Centrifuge classification TSV report for nmdc:omprc-11-tgxmb243", + "file_size_bytes": 1448205544, + "md5_checksum": "e9946f36795474182b7759d3d7532b57", + "data_object_type": "Centrifuge Taxonomic Classification", + "url": "https://data.microbiomedata.org/data/nmdc:omprc-11-tgxmb243/nmdc:wfrbt-11-f7ymgp41.1/nmdc_wfrbt-11-f7ymgp41.1_centrifuge_classification.tsv", + "type": "nmdc:DataObject" + }, + { + "id": "nmdc:dobj-11-tkzz2c22", + "name": "nmdc_wfrbt-11-f7ymgp41.1_centrifuge_report.tsv", + "description": "Centrifuge TSV report for nmdc:omprc-11-tgxmb243", + "file_size_bytes": 253872, + "md5_checksum": "33ff1d85d17d763afc9e21e481cc10d2", + "data_object_type": "Centrifuge Classification Report", + "url": "https://data.microbiomedata.org/data/nmdc:omprc-11-tgxmb243/nmdc:wfrbt-11-f7ymgp41.1/nmdc_wfrbt-11-f7ymgp41.1_centrifuge_report.tsv", + "type": "nmdc:DataObject" + }, + { + "id": "nmdc:dobj-11-a6ve3k59", + "name": "nmdc_wfrbt-11-f7ymgp41.1_centrifuge_krona.html", + "description": "Centrifuge Krona HTML report for nmdc:omprc-11-tgxmb243", + "file_size_bytes": 2331772, + "md5_checksum": "997a66f49a232750bd7132639f3387e7", + "data_object_type": "Centrifuge Krona Plot", + "url": "https://data.microbiomedata.org/data/nmdc:omprc-11-tgxmb243/nmdc:wfrbt-11-f7ymgp41.1/nmdc_wfrbt-11-f7ymgp41.1_centrifuge_krona.html", + "type": "nmdc:DataObject" + }, + { + "id": "nmdc:dobj-11-614zpa55", + "name": "nmdc_wfrbt-11-f7ymgp41.1_kraken2_classification.tsv", + "description": "Kraken classification TSV report for nmdc:omprc-11-tgxmb243", + "file_size_bytes": 1157365410, + "md5_checksum": "d3f604a59babf001839d38a617b62931", + "data_object_type": "Kraken2 Taxonomic Classification", + "url": "https://data.microbiomedata.org/data/nmdc:omprc-11-tgxmb243/nmdc:wfrbt-11-f7ymgp41.1/nmdc_wfrbt-11-f7ymgp41.1_kraken2_classification.tsv", + "type": "nmdc:DataObject" + }, + { + "id": "nmdc:dobj-11-4vy61770", + "name": "nmdc_wfrbt-11-f7ymgp41.1_kraken2_report.tsv", + "description": "Kraken2 TSV report for nmdc:omprc-11-tgxmb243", + "file_size_bytes": 621484, + "md5_checksum": "3abfaa434ee1449cbbb69985e48488b4", + "data_object_type": "Kraken2 Classification Report", + "url": "https://data.microbiomedata.org/data/nmdc:omprc-11-tgxmb243/nmdc:wfrbt-11-f7ymgp41.1/nmdc_wfrbt-11-f7ymgp41.1_kraken2_report.tsv", + "type": "nmdc:DataObject" + }, + { + "id": "nmdc:dobj-11-t90gg135", + "name": "nmdc_wfrbt-11-f7ymgp41.1_kraken2_krona.html", + "description": "Kraken2 Krona HTML report for nmdc:omprc-11-tgxmb243", + "file_size_bytes": 3896830, + "md5_checksum": "70c2fc1a2c7c0032528ff91ad1576465", + "data_object_type": "Kraken2 Krona Plot", + "url": "https://data.microbiomedata.org/data/nmdc:omprc-11-tgxmb243/nmdc:wfrbt-11-f7ymgp41.1/nmdc_wfrbt-11-f7ymgp41.1_kraken2_krona.html", + "type": "nmdc:DataObject" + } + ], + "metagenome_assembly_set": [ + { + "id": "nmdc:wfmgas-11-qtd0ns28.1", + "name": "Metagenome Assembly Activity for nmdc:omprc-11-tgxmb243", + "started_at_time": "2021-12-01T21:31:29Z", + "ended_at_time": "2021-12-02T20:49:51+00:00", + "was_informed_by": "nmdc:omprc-11-tgxmb243", + "execution_resource": "NERSC-Cori", + "git_url": "https://github.com/microbiomedata/metaAssembly", + "has_input": [ + "nmdc:dobj-11-ma9wn665" + ], + "has_output": [ + "nmdc:dobj-11-51qmwd41", + "nmdc:dobj-11-gqwc3x40", + "nmdc:dobj-11-527yxs26", + "nmdc:dobj-11-y8yx1e22", + "nmdc:dobj-11-gb4qq971" + ], + "type": "nmdc:MetagenomeAssembly", + "part_of": [ + "nmdc:omprc-11-tgxmb243" + ], + "version": "v1.0.2", + "asm_score": 6.89, + "scaffolds": 169622, + "scaf_logsum": 141966, + "scaf_powsum": 15801, + "scaf_max": 48487, + "scaf_bp": 77784618, + "scaf_n50": 51437, + "scaf_n90": 144234, + "scaf_l50": 433, + "scaf_l90": 289, + "contigs": 169698, + "contig_bp": 77783768, + "ctg_n50": 51455, + "ctg_l50": 433, + "ctg_n90": 144304, + "ctg_l90": 289, + "ctg_logsum": 141543, + "ctg_powsum": 15753, + "ctg_max": 48487, + "gap_pct": 0.00109, + "gc_std": 0.08917, + "gc_avg": 0.63213 + } + ], + "omics_processing_set": [ + { + "id": "nmdc:omprc-11-tgxmb243", + "name": "Riverbed sediment microbial communities from areas with no vegetation in Columbia River, Washington, USA - GW-RW N2_40_50", + "description": "Riverbed sediment samples were collected from an area with no vegetation in a hyporheic zone (subsurface groundwater - surface water mixing zone)", + "has_input": [ + "nmdc:bsm-11-pq3zmp51" + ], + "add_date": "2016-01-11", + "gold_sequencing_project_identifiers": [ + "gold:Gp0127638" + ], + "has_output": [ + "nmdc:dobj-11-assd7y33" + ], + "mod_date": "2021-06-15", + "ncbi_project_name": "Riverbed sediment microbial communities from areas with no vegetation in Columbia River, Washington, USA - GW-RW N2_40_50", + "omics_type": { + "has_raw_value": "Metagenome" + }, + "part_of": [ + "nmdc:sty-11-aygzgv51" + ], + "principal_investigator": { + "has_raw_value": "James Stegen" + }, + "processing_institution": "JGI", + "type": "nmdc:OmicsProcessing" + } + ], + "read_qc_analysis_activity_set": [ + { + "id": "nmdc:wfrqc-11-rp34gx65.1", + "name": "Read QC Activity for nmdc:omprc-11-tgxmb243", + "started_at_time": "2021-12-01T21:31:29Z", + "ended_at_time": "2021-12-02T20:49:51+00:00", + "was_informed_by": "nmdc:omprc-11-tgxmb243", + "execution_resource": "NERSC-Cori", + "git_url": "https://github.com/microbiomedata/ReadsQC", + "has_input": [ + "nmdc:dobj-11-assd7y33" + ], + "has_output": [ + "nmdc:dobj-11-ma9wn665", + "nmdc:dobj-11-p591nw13" + ], + "type": "nmdc:ReadQcAnalysisActivity", + "part_of": [ + "nmdc:omprc-11-tgxmb243" + ], + "version": "v1.0.2", + "input_read_count": 21721428, + "output_read_count": 19723416, + "input_read_bases": 3279935628, + "output_read_bases": 2949961420 + } + ], + "read_based_taxonomy_analysis_activity_set": [ + { + "id": "nmdc:wfrbt-11-f7ymgp41.1", + "name": "Readbased Taxonomy Analysis Activity for nmdc:omprc-11-tgxmb243", + "started_at_time": "2021-12-01T21:31:29Z", + "ended_at_time": "2021-12-02T20:49:51+00:00", + "was_informed_by": "nmdc:omprc-11-tgxmb243", + "execution_resource": "NERSC-Cori", + "git_url": "https://github.com/microbiomedata/ReadbasedAnalysis", + "has_input": [ + "nmdc:dobj-11-ma9wn665" + ], + "has_output": [ + "nmdc:dobj-11-g05eb923", + "nmdc:dobj-11-cgb78v45", + "nmdc:dobj-11-wk5ahy53", + "nmdc:dobj-11-yf4d2m97", + "nmdc:dobj-11-tkzz2c22", + "nmdc:dobj-11-a6ve3k59", + "nmdc:dobj-11-614zpa55", + "nmdc:dobj-11-4vy61770", + "nmdc:dobj-11-t90gg135" + ], + "type": "nmdc:ReadBasedTaxonomyAnalysisActivity", + "part_of": [ + "nmdc:omprc-11-tgxmb243" + ], + "version": "v1.0.2" + } + ] + }, + { + "data_object_set": [ + { + "id": "nmdc:dobj-11-1v0g7c04", + "name": "9422.8.132674.CGTACG.fastq.gz", + "description": "Raw sequencer read data", + "file_size_bytes": 3408915289, + "data_object_type": "Metagenome Raw Reads", + "type": "nmdc:DataObject" + }, + { + "id": "nmdc:dobj-11-1yecfn74", + "name": "nmdc_wfrqc-11-3yggfm34.1_filtered.fastq.gz", + "description": "Filtered Reads for nmdc:omprc-11-t0xjjc50", + "file_size_bytes": 3012174785, + "md5_checksum": "7f6b353300583c60d2d668880b4134cd", + "data_object_type": "Filtered Sequencing Reads", + "url": "https://data.microbiomedata.org/data/nmdc:omprc-11-t0xjjc50/nmdc:wfrqc-11-3yggfm34.1/nmdc_wfrqc-11-3yggfm34.1_filtered.fastq.gz", + "type": "nmdc:DataObject" + }, + { + "id": "nmdc:dobj-11-ajg3nm76", + "name": "nmdc_wfrqc-11-3yggfm34.1_filterStats.txt", + "description": "Filtered Stats for nmdc:omprc-11-t0xjjc50", + "file_size_bytes": 291, + "md5_checksum": "a4f65d101293fa4345cd865f86597464", + "data_object_type": "QC Statistics", + "url": "https://data.microbiomedata.org/data/nmdc:omprc-11-t0xjjc50/nmdc:wfrqc-11-3yggfm34.1/nmdc_wfrqc-11-3yggfm34.1_filterStats.txt", + "type": "nmdc:DataObject" + }, + { + "id": "nmdc:dobj-11-xbd0qj66", + "name": "nmdc_wfmgas-11-5xs99z93.1_contigs.fna", + "description": "Assembled contigs fasta for nmdc:omprc-11-t0xjjc50", + "file_size_bytes": 86493890, + "md5_checksum": "87286fe1201e9e58b6fe3b83548928fb", + "data_object_type": "Assembly Contigs", + "url": "https://data.microbiomedata.org/data/nmdc:omprc-11-t0xjjc50/nmdc:wfmgas-11-5xs99z93.1/nmdc_wfmgas-11-5xs99z93.1_contigs.fna", + "type": "nmdc:DataObject" + }, + { + "id": "nmdc:dobj-11-8fwk1e15", + "name": "nmdc_wfmgas-11-5xs99z93.1_scaffolds.fna", + "description": "Assembled scaffold fasta for nmdc:omprc-11-t0xjjc50", + "file_size_bytes": 86029934, + "md5_checksum": "733d2b882be700e7aa2d4f9437fba4a6", + "data_object_type": "Assembly Scaffolds", + "url": "https://data.microbiomedata.org/data/nmdc:omprc-11-t0xjjc50/nmdc:wfmgas-11-5xs99z93.1/nmdc_wfmgas-11-5xs99z93.1_scaffolds.fna", + "type": "nmdc:DataObject" + }, + { + "id": "nmdc:dobj-11-9aw9wk94", + "name": "nmdc_wfmgas-11-5xs99z93.1_covstats.txt", + "description": "Metagenome Contig Coverage Stats for nmdc:omprc-11-t0xjjc50", + "file_size_bytes": 12984513, + "md5_checksum": "62108e7c4fd62cc2db450e89328276c2", + "data_object_type": "Assembly Coverage Stats", + "url": "https://data.microbiomedata.org/data/nmdc:omprc-11-t0xjjc50/nmdc:wfmgas-11-5xs99z93.1/nmdc_wfmgas-11-5xs99z93.1_covstats.txt", + "type": "nmdc:DataObject" + }, + { + "id": "nmdc:dobj-11-k4r4t232", + "name": "nmdc_wfmgas-11-5xs99z93.1_assembly.agp", + "description": "Assembled AGP file for nmdc:omprc-11-t0xjjc50", + "file_size_bytes": 13098795, + "md5_checksum": "81e0286dc40e8fb65af2abbe4260c49b", + "data_object_type": "Assembly AGP", + "url": "https://data.microbiomedata.org/data/nmdc:omprc-11-t0xjjc50/nmdc:wfmgas-11-5xs99z93.1/nmdc_wfmgas-11-5xs99z93.1_assembly.agp", + "type": "nmdc:DataObject" + }, + { + "id": "nmdc:dobj-11-cx562y22", + "name": "nmdc_wfmgas-11-5xs99z93.1_pairedMapped_sorted.bam", + "description": "Metagenome Alignment BAM file for nmdc:omprc-11-t0xjjc50", + "file_size_bytes": 0, + "md5_checksum": "d41d8cd98f00b204e9800998ecf8427e", + "data_object_type": "Assembly Coverage BAM", + "url": "https://data.microbiomedata.org/data/nmdc:omprc-11-t0xjjc50/nmdc:wfmgas-11-5xs99z93.1/nmdc_wfmgas-11-5xs99z93.1_pairedMapped_sorted.bam", + "type": "nmdc:DataObject" + }, + { + "id": "nmdc:dobj-11-q736zc71", + "name": "nmdc_wfrbt-11-qcdskb53.1_gottcha2_report.tsv", + "description": "Gottcha2 TSV report for nmdc:omprc-11-t0xjjc50", + "file_size_bytes": 13758, + "md5_checksum": "e316502f9e7a78c9db3996ef832aa9d7", + "data_object_type": "GOTTCHA2 Classification Report", + "url": "https://data.microbiomedata.org/data/nmdc:omprc-11-t0xjjc50/nmdc:wfrbt-11-qcdskb53.1/nmdc_wfrbt-11-qcdskb53.1_gottcha2_report.tsv", + "type": "nmdc:DataObject" + }, + { + "id": "nmdc:dobj-11-8pxrqk13", + "name": "nmdc_wfrbt-11-qcdskb53.1_gottcha2_report_full.tsv", + "description": "Gottcha2 full TSV report for nmdc:omprc-11-t0xjjc50", + "file_size_bytes": 1116084, + "md5_checksum": "1ac2be77491e7d425da1d62f69f1508d", + "data_object_type": "GOTTCHA2 Report Full", + "url": "https://data.microbiomedata.org/data/nmdc:omprc-11-t0xjjc50/nmdc:wfrbt-11-qcdskb53.1/nmdc_wfrbt-11-qcdskb53.1_gottcha2_report_full.tsv", + "type": "nmdc:DataObject" + }, + { + "id": "nmdc:dobj-11-1cmrz021", + "name": "nmdc_wfrbt-11-qcdskb53.1_gottcha2_krona.html", + "description": "Gottcha2 Krona HTML report for nmdc:omprc-11-t0xjjc50", + "file_size_bytes": 268542, + "md5_checksum": "de5b15fa9d3bdbc3abcc2475ee351323", + "data_object_type": "GOTTCHA2 Krona Plot", + "url": "https://data.microbiomedata.org/data/nmdc:omprc-11-t0xjjc50/nmdc:wfrbt-11-qcdskb53.1/nmdc_wfrbt-11-qcdskb53.1_gottcha2_krona.html", + "type": "nmdc:DataObject" + }, + { + "id": "nmdc:dobj-11-9qjgpn17", + "name": "nmdc_wfrbt-11-qcdskb53.1_centrifuge_classification.tsv", + "description": "Centrifuge classification TSV report for nmdc:omprc-11-t0xjjc50", + "file_size_bytes": 2458475116, + "md5_checksum": "a9bbb74833404a2bf3bbd05e83a7a0ed", + "data_object_type": "Centrifuge Taxonomic Classification", + "url": "https://data.microbiomedata.org/data/nmdc:omprc-11-t0xjjc50/nmdc:wfrbt-11-qcdskb53.1/nmdc_wfrbt-11-qcdskb53.1_centrifuge_classification.tsv", + "type": "nmdc:DataObject" + }, + { + "id": "nmdc:dobj-11-vhm8gq73", + "name": "nmdc_wfrbt-11-qcdskb53.1_centrifuge_report.tsv", + "description": "Centrifuge TSV report for nmdc:omprc-11-t0xjjc50", + "file_size_bytes": 261692, + "md5_checksum": "c065784bed2b2495d512af93d05967de", + "data_object_type": "Centrifuge Classification Report", + "url": "https://data.microbiomedata.org/data/nmdc:omprc-11-t0xjjc50/nmdc:wfrbt-11-qcdskb53.1/nmdc_wfrbt-11-qcdskb53.1_centrifuge_report.tsv", + "type": "nmdc:DataObject" + }, + { + "id": "nmdc:dobj-11-tvfrxs39", + "name": "nmdc_wfrbt-11-qcdskb53.1_centrifuge_krona.html", + "description": "Centrifuge Krona HTML report for nmdc:omprc-11-t0xjjc50", + "file_size_bytes": 2343355, + "md5_checksum": "a34dbcbdebae0861e41c09e7b9a5d9f0", + "data_object_type": "Centrifuge Krona Plot", + "url": "https://data.microbiomedata.org/data/nmdc:omprc-11-t0xjjc50/nmdc:wfrbt-11-qcdskb53.1/nmdc_wfrbt-11-qcdskb53.1_centrifuge_krona.html", + "type": "nmdc:DataObject" + }, + { + "id": "nmdc:dobj-11-hsxx1887", + "name": "nmdc_wfrbt-11-qcdskb53.1_kraken2_classification.tsv", + "description": "Kraken classification TSV report for nmdc:omprc-11-t0xjjc50", + "file_size_bytes": 2019980511, + "md5_checksum": "b2122f5a910a1d4ae8a62956d1cd731c", + "data_object_type": "Kraken2 Taxonomic Classification", + "url": "https://data.microbiomedata.org/data/nmdc:omprc-11-t0xjjc50/nmdc:wfrbt-11-qcdskb53.1/nmdc_wfrbt-11-qcdskb53.1_kraken2_classification.tsv", + "type": "nmdc:DataObject" + }, + { + "id": "nmdc:dobj-11-revjh004", + "name": "nmdc_wfrbt-11-qcdskb53.1_kraken2_report.tsv", + "description": "Kraken2 TSV report for nmdc:omprc-11-t0xjjc50", + "file_size_bytes": 694029, + "md5_checksum": "8a26d8496a70f4777be0e1237092e44c", + "data_object_type": "Kraken2 Classification Report", + "url": "https://data.microbiomedata.org/data/nmdc:omprc-11-t0xjjc50/nmdc:wfrbt-11-qcdskb53.1/nmdc_wfrbt-11-qcdskb53.1_kraken2_report.tsv", + "type": "nmdc:DataObject" + }, + { + "id": "nmdc:dobj-11-dqkxht06", + "name": "nmdc_wfrbt-11-qcdskb53.1_kraken2_krona.html", + "description": "Kraken2 Krona HTML report for nmdc:omprc-11-t0xjjc50", + "file_size_bytes": 4190653, + "md5_checksum": "694b83f0b6f599948d4248dd48dd9ba9", + "data_object_type": "Kraken2 Krona Plot", + "url": "https://data.microbiomedata.org/data/nmdc:omprc-11-t0xjjc50/nmdc:wfrbt-11-qcdskb53.1/nmdc_wfrbt-11-qcdskb53.1_kraken2_krona.html", + "type": "nmdc:DataObject" + } + ], + "metagenome_assembly_set": [ + { + "id": "nmdc:wfmgas-11-5xs99z93.1", + "name": "Metagenome Assembly Activity for nmdc:omprc-11-t0xjjc50", + "started_at_time": "2021-10-11T02:28:43Z", + "ended_at_time": "2021-10-11T05:55:52+00:00", + "was_informed_by": "nmdc:omprc-11-t0xjjc50", + "execution_resource": "NERSC-Cori", + "git_url": "https://github.com/microbiomedata/metaAssembly", + "has_input": [ + "nmdc:dobj-11-1yecfn74" + ], + "has_output": [ + "nmdc:dobj-11-xbd0qj66", + "nmdc:dobj-11-8fwk1e15", + "nmdc:dobj-11-9aw9wk94", + "nmdc:dobj-11-k4r4t232", + "nmdc:dobj-11-cx562y22" + ], + "type": "nmdc:MetagenomeAssembly", + "part_of": [ + "nmdc:omprc-11-t0xjjc50" + ], + "version": "v1.0.2", + "asm_score": 12.57, + "scaffolds": 152330, + "scaf_logsum": 274450, + "scaf_powsum": 33865, + "scaf_max": 211520, + "scaf_bp": 79566293, + "scaf_n50": 35340, + "scaf_n90": 126070, + "scaf_l50": 493, + "scaf_l90": 290, + "scaf_n_gt50k": 21, + "scaf_l_gt50k": 1744421, + "scaf_pct_gt50k": 2.192412, + "contigs": 152605, + "contig_bp": 79563543, + "ctg_n50": 35595, + "ctg_l50": 492, + "ctg_n90": 126332, + "ctg_l90": 290, + "ctg_logsum": 272574, + "ctg_powsum": 33596, + "ctg_max": 211520, + "gap_pct": 0.00346, + "gc_std": 0.125, + "gc_avg": 0.57036 + } + ], + "omics_processing_set": [ + { + "id": "nmdc:omprc-11-t0xjjc50", + "name": "Sand microcosm microbial communities from a hyporheic zone in Columbia River, Washington, USA - GW-RW T4_10-June-14", + "description": "Sterilized sand packs were incubated back in the ground and collected at time point T4.", + "has_input": [ + "nmdc:bsm-11-vg9vy382" + ], + "add_date": "2015-05-28", + "gold_sequencing_project_identifiers": [ + "gold:Gp0115670" + ], + "has_output": [ + "nmdc:dobj-11-1v0g7c04" + ], + "mod_date": "2021-06-15", + "ncbi_project_name": "Sand microcosm microbial communities from a hyporheic zone in Columbia River, Washington, USA - GW-RW T4_10-June-14", + "omics_type": { + "has_raw_value": "Metagenome" + }, + "part_of": [ + "nmdc:sty-11-aygzgv51" + ], + "principal_investigator": { + "has_raw_value": "James Stegen" + }, + "processing_institution": "JGI", + "type": "nmdc:OmicsProcessing" + } + ], + "read_qc_analysis_activity_set": [ + { + "id": "nmdc:wfrqc-11-3yggfm34.1", + "name": "Read QC Activity for nmdc:omprc-11-t0xjjc50", + "started_at_time": "2021-10-11T02:28:43Z", + "ended_at_time": "2021-10-11T05:55:52+00:00", + "was_informed_by": "nmdc:omprc-11-t0xjjc50", + "execution_resource": "NERSC-Cori", + "git_url": "https://github.com/microbiomedata/ReadsQC", + "has_input": [ + "nmdc:dobj-11-1v0g7c04" + ], + "has_output": [ + "nmdc:dobj-11-1yecfn74", + "nmdc:dobj-11-ajg3nm76" + ], + "type": "nmdc:ReadQcAnalysisActivity", + "part_of": [ + "nmdc:omprc-11-t0xjjc50" + ], + "version": "v1.0.2", + "input_read_count": 36554212, + "output_read_count": 33663942, + "input_read_bases": 5519686012, + "output_read_bases": 5044444014 + } + ], + "read_based_taxonomy_analysis_activity_set": [ + { + "id": "nmdc:wfrbt-11-qcdskb53.1", + "name": "Readbased Taxonomy Analysis Activity for nmdc:omprc-11-t0xjjc50", + "started_at_time": "2021-10-11T02:28:43Z", + "ended_at_time": "2021-10-11T05:55:52+00:00", + "was_informed_by": "nmdc:omprc-11-t0xjjc50", + "execution_resource": "NERSC-Cori", + "git_url": "https://github.com/microbiomedata/ReadbasedAnalysis", + "has_input": [ + "nmdc:dobj-11-1yecfn74" + ], + "has_output": [ + "nmdc:dobj-11-q736zc71", + "nmdc:dobj-11-8pxrqk13", + "nmdc:dobj-11-1cmrz021", + "nmdc:dobj-11-9qjgpn17", + "nmdc:dobj-11-vhm8gq73", + "nmdc:dobj-11-tvfrxs39", + "nmdc:dobj-11-hsxx1887", + "nmdc:dobj-11-revjh004", + "nmdc:dobj-11-dqkxht06" + ], + "type": "nmdc:ReadBasedTaxonomyAnalysisActivity", + "part_of": [ + "nmdc:omprc-11-t0xjjc50" + ], + "version": "v1.0.2" + } + ] + }, + { + "data_object_set": [ + { + "id": "nmdc:dobj-11-k5qf8f61", + "name": "9422.8.132674.GGTAGC.fastq.gz", + "description": "Raw sequencer read data", + "file_size_bytes": 2350177247, + "data_object_type": "Metagenome Raw Reads", + "type": "nmdc:DataObject" + }, + { + "id": "nmdc:dobj-11-rdxg7056", + "name": "nmdc_wfrqc-11-yn9hdr95.1_filtered.fastq.gz", + "description": "Filtered Reads for nmdc:omprc-11-1avd3d16", + "file_size_bytes": 2126353222, + "md5_checksum": "538fd5695eb3decd48891e72acebb8ce", + "data_object_type": "Filtered Sequencing Reads", + "url": "https://data.microbiomedata.org/data/nmdc:omprc-11-1avd3d16/nmdc:wfrqc-11-yn9hdr95.1/nmdc_wfrqc-11-yn9hdr95.1_filtered.fastq.gz", + "type": "nmdc:DataObject" + }, + { + "id": "nmdc:dobj-11-ecscm265", + "name": "nmdc_wfrqc-11-yn9hdr95.1_filterStats.txt", + "description": "Filtered Stats for nmdc:omprc-11-1avd3d16", + "file_size_bytes": 288, + "md5_checksum": "dde2b1748e16380e63476430ee27083a", + "data_object_type": "QC Statistics", + "url": "https://data.microbiomedata.org/data/nmdc:omprc-11-1avd3d16/nmdc:wfrqc-11-yn9hdr95.1/nmdc_wfrqc-11-yn9hdr95.1_filterStats.txt", + "type": "nmdc:DataObject" + }, + { + "id": "nmdc:dobj-11-n5c8xs22", + "name": "nmdc_wfmgas-11-as5sxm68.1_contigs.fna", + "description": "Assembled contigs fasta for nmdc:omprc-11-1avd3d16", + "file_size_bytes": 79522449, + "md5_checksum": "9e5e2c88726ae0e8a65354f47f014c28", + "data_object_type": "Assembly Contigs", + "url": "https://data.microbiomedata.org/data/nmdc:omprc-11-1avd3d16/nmdc:wfmgas-11-as5sxm68.1/nmdc_wfmgas-11-as5sxm68.1_contigs.fna", + "type": "nmdc:DataObject" + }, + { + "id": "nmdc:dobj-11-d75a4r87", + "name": "nmdc_wfmgas-11-as5sxm68.1_scaffolds.fna", + "description": "Assembled scaffold fasta for nmdc:omprc-11-1avd3d16", + "file_size_bytes": 79103141, + "md5_checksum": "3cf3bc6e3cb15fd5a90a4c3dd54ecd1e", + "data_object_type": "Assembly Scaffolds", + "url": "https://data.microbiomedata.org/data/nmdc:omprc-11-1avd3d16/nmdc:wfmgas-11-as5sxm68.1/nmdc_wfmgas-11-as5sxm68.1_scaffolds.fna", + "type": "nmdc:DataObject" + }, + { + "id": "nmdc:dobj-11-jngvaw27", + "name": "nmdc_wfmgas-11-as5sxm68.1_covstats.txt", + "description": "Metagenome Contig Coverage Stats for nmdc:omprc-11-1avd3d16", + "file_size_bytes": 11815988, + "md5_checksum": "33bef1b1c015b8d177390383d4a08a7f", + "data_object_type": "Assembly Coverage Stats", + "url": "https://data.microbiomedata.org/data/nmdc:omprc-11-1avd3d16/nmdc:wfmgas-11-as5sxm68.1/nmdc_wfmgas-11-as5sxm68.1_covstats.txt", + "type": "nmdc:DataObject" + }, + { + "id": "nmdc:dobj-11-fxmrgg27", + "name": "nmdc_wfmgas-11-as5sxm68.1_assembly.agp", + "description": "Assembled AGP file for nmdc:omprc-11-1avd3d16", + "file_size_bytes": 11922506, + "md5_checksum": "9dd05b32b5df752583e2412913f2c789", + "data_object_type": "Assembly AGP", + "url": "https://data.microbiomedata.org/data/nmdc:omprc-11-1avd3d16/nmdc:wfmgas-11-as5sxm68.1/nmdc_wfmgas-11-as5sxm68.1_assembly.agp", + "type": "nmdc:DataObject" + }, + { + "id": "nmdc:dobj-11-zbfj1g67", + "name": "nmdc_wfmgas-11-as5sxm68.1_pairedMapped_sorted.bam", + "description": "Metagenome Alignment BAM file for nmdc:omprc-11-1avd3d16", + "file_size_bytes": 0, + "md5_checksum": "d41d8cd98f00b204e9800998ecf8427e", + "data_object_type": "Assembly Coverage BAM", + "url": "https://data.microbiomedata.org/data/nmdc:omprc-11-1avd3d16/nmdc:wfmgas-11-as5sxm68.1/nmdc_wfmgas-11-as5sxm68.1_pairedMapped_sorted.bam", + "type": "nmdc:DataObject" + }, + { + "id": "nmdc:dobj-11-e5rw8a31", + "name": "nmdc_wfrbt-11-cpmeqn98.1_gottcha2_report.tsv", + "description": "Gottcha2 TSV report for nmdc:omprc-11-1avd3d16", + "file_size_bytes": 13768, + "md5_checksum": "7d6ec08ff0d080997fda7c7417f9c3d4", + "data_object_type": "GOTTCHA2 Classification Report", + "url": "https://data.microbiomedata.org/data/nmdc:omprc-11-1avd3d16/nmdc:wfrbt-11-cpmeqn98.1/nmdc_wfrbt-11-cpmeqn98.1_gottcha2_report.tsv", + "type": "nmdc:DataObject" + }, + { + "id": "nmdc:dobj-11-vkgqqm26", + "name": "nmdc_wfrbt-11-cpmeqn98.1_gottcha2_report_full.tsv", + "description": "Gottcha2 full TSV report for nmdc:omprc-11-1avd3d16", + "file_size_bytes": 1022858, + "md5_checksum": "df0dfd58dc386f5e0ded0b65b4a88c58", + "data_object_type": "GOTTCHA2 Report Full", + "url": "https://data.microbiomedata.org/data/nmdc:omprc-11-1avd3d16/nmdc:wfrbt-11-cpmeqn98.1/nmdc_wfrbt-11-cpmeqn98.1_gottcha2_report_full.tsv", + "type": "nmdc:DataObject" + }, + { + "id": "nmdc:dobj-11-27fbgd56", + "name": "nmdc_wfrbt-11-cpmeqn98.1_gottcha2_krona.html", + "description": "Gottcha2 Krona HTML report for nmdc:omprc-11-1avd3d16", + "file_size_bytes": 269166, + "md5_checksum": "ce3f31985e0a99f97bd4751bc2469bcb", + "data_object_type": "GOTTCHA2 Krona Plot", + "url": "https://data.microbiomedata.org/data/nmdc:omprc-11-1avd3d16/nmdc:wfrbt-11-cpmeqn98.1/nmdc_wfrbt-11-cpmeqn98.1_gottcha2_krona.html", + "type": "nmdc:DataObject" + }, + { + "id": "nmdc:dobj-11-jvrrq740", + "name": "nmdc_wfrbt-11-cpmeqn98.1_centrifuge_classification.tsv", + "description": "Centrifuge classification TSV report for nmdc:omprc-11-1avd3d16", + "file_size_bytes": 1904303690, + "md5_checksum": "f8740b1fadbc29aef50d32706c955199", + "data_object_type": "Centrifuge Taxonomic Classification", + "url": "https://data.microbiomedata.org/data/nmdc:omprc-11-1avd3d16/nmdc:wfrbt-11-cpmeqn98.1/nmdc_wfrbt-11-cpmeqn98.1_centrifuge_classification.tsv", + "type": "nmdc:DataObject" + }, + { + "id": "nmdc:dobj-11-wqhj7y02", + "name": "nmdc_wfrbt-11-cpmeqn98.1_centrifuge_report.tsv", + "description": "Centrifuge TSV report for nmdc:omprc-11-1avd3d16", + "file_size_bytes": 258748, + "md5_checksum": "80abfcc9b09476af4083b2af1760834f", + "data_object_type": "Centrifuge Classification Report", + "url": "https://data.microbiomedata.org/data/nmdc:omprc-11-1avd3d16/nmdc:wfrbt-11-cpmeqn98.1/nmdc_wfrbt-11-cpmeqn98.1_centrifuge_report.tsv", + "type": "nmdc:DataObject" + }, + { + "id": "nmdc:dobj-11-wmctch86", + "name": "nmdc_wfrbt-11-cpmeqn98.1_centrifuge_krona.html", + "description": "Centrifuge Krona HTML report for nmdc:omprc-11-1avd3d16", + "file_size_bytes": 2335000, + "md5_checksum": "f189624af50d8d62908f8ddd5f3451ad", + "data_object_type": "Centrifuge Krona Plot", + "url": "https://data.microbiomedata.org/data/nmdc:omprc-11-1avd3d16/nmdc:wfrbt-11-cpmeqn98.1/nmdc_wfrbt-11-cpmeqn98.1_centrifuge_krona.html", + "type": "nmdc:DataObject" + }, + { + "id": "nmdc:dobj-11-bpmynh32", + "name": "nmdc_wfrbt-11-cpmeqn98.1_kraken2_classification.tsv", + "description": "Kraken classification TSV report for nmdc:omprc-11-1avd3d16", + "file_size_bytes": 1574286150, + "md5_checksum": "09302fbc8e30758a95fac09ee5cfd449", + "data_object_type": "Kraken2 Taxonomic Classification", + "url": "https://data.microbiomedata.org/data/nmdc:omprc-11-1avd3d16/nmdc:wfrbt-11-cpmeqn98.1/nmdc_wfrbt-11-cpmeqn98.1_kraken2_classification.tsv", + "type": "nmdc:DataObject" + }, + { + "id": "nmdc:dobj-11-sfsksj64", + "name": "nmdc_wfrbt-11-cpmeqn98.1_kraken2_report.tsv", + "description": "Kraken2 TSV report for nmdc:omprc-11-1avd3d16", + "file_size_bytes": 671800, + "md5_checksum": "e44f717fc6f3458c17b4f5129a5e7920", + "data_object_type": "Kraken2 Classification Report", + "url": "https://data.microbiomedata.org/data/nmdc:omprc-11-1avd3d16/nmdc:wfrbt-11-cpmeqn98.1/nmdc_wfrbt-11-cpmeqn98.1_kraken2_report.tsv", + "type": "nmdc:DataObject" + }, + { + "id": "nmdc:dobj-11-bqt08n32", + "name": "nmdc_wfrbt-11-cpmeqn98.1_kraken2_krona.html", + "description": "Kraken2 Krona HTML report for nmdc:omprc-11-1avd3d16", + "file_size_bytes": 4070548, + "md5_checksum": "19eb52a96c1dedc9036ec9a0aaeda079", + "data_object_type": "Kraken2 Krona Plot", + "url": "https://data.microbiomedata.org/data/nmdc:omprc-11-1avd3d16/nmdc:wfrbt-11-cpmeqn98.1/nmdc_wfrbt-11-cpmeqn98.1_kraken2_krona.html", + "type": "nmdc:DataObject" + } + ], + "metagenome_assembly_set": [ + { + "id": "nmdc:wfmgas-11-as5sxm68.1", + "name": "Metagenome Assembly Activity for nmdc:omprc-11-1avd3d16", + "started_at_time": "2021-10-11T02:28:52Z", + "ended_at_time": "2021-10-11T05:21:41+00:00", + "was_informed_by": "nmdc:omprc-11-1avd3d16", + "execution_resource": "NERSC-Cori", + "git_url": "https://github.com/microbiomedata/metaAssembly", + "has_input": [ + "nmdc:dobj-11-rdxg7056" + ], + "has_output": [ + "nmdc:dobj-11-n5c8xs22", + "nmdc:dobj-11-d75a4r87", + "nmdc:dobj-11-jngvaw27", + "nmdc:dobj-11-fxmrgg27", + "nmdc:dobj-11-zbfj1g67" + ], + "type": "nmdc:MetagenomeAssembly", + "part_of": [ + "nmdc:omprc-11-1avd3d16" + ], + "version": "v1.0.2", + "asm_score": 18.19, + "scaffolds": 139236, + "scaf_logsum": 272657, + "scaf_powsum": 36239, + "scaf_max": 176505, + "scaf_bp": 73196685, + "scaf_n50": 30582, + "scaf_n90": 114932, + "scaf_l50": 482, + "scaf_l90": 290, + "scaf_n_gt50k": 32, + "scaf_l_gt50k": 2506146, + "scaf_pct_gt50k": 3.4238515, + "contigs": 139326, + "contig_bp": 73195425, + "ctg_n50": 30768, + "ctg_l50": 481, + "ctg_n90": 115008, + "ctg_l90": 290, + "ctg_logsum": 272042, + "ctg_powsum": 36133, + "ctg_max": 176505, + "gap_pct": 0.00172, + "gc_std": 0.12397, + "gc_avg": 0.56886 + } + ], + "omics_processing_set": [ + { + "id": "nmdc:omprc-11-1avd3d16", + "name": "Sand microcosm microbial communities from a hyporheic zone in Columbia River, Washington, USA - GW-RW T4_21-May-14", + "description": "Sterilized sand packs were incubated back in the ground and collected at time point T4.", + "has_input": [ + "nmdc:bsm-11-5xjtzc47" + ], + "add_date": "2015-05-28", + "gold_sequencing_project_identifiers": [ + "gold:Gp0115674" + ], + "has_output": [ + "nmdc:dobj-11-k5qf8f61" + ], + "mod_date": "2021-06-15", + "ncbi_project_name": "Sand microcosm microbial communities from a hyporheic zone in Columbia River, Washington, USA - GW-RW T4_21-May-14", + "omics_type": { + "has_raw_value": "Metagenome" + }, + "part_of": [ + "nmdc:sty-11-aygzgv51" + ], + "principal_investigator": { + "has_raw_value": "James Stegen" + }, + "processing_institution": "JGI", + "type": "nmdc:OmicsProcessing" + } + ], + "read_qc_analysis_activity_set": [ + { + "id": "nmdc:wfrqc-11-yn9hdr95.1", + "name": "Read QC Activity for nmdc:omprc-11-1avd3d16", + "started_at_time": "2021-10-11T02:28:52Z", + "ended_at_time": "2021-10-11T05:21:41+00:00", + "was_informed_by": "nmdc:omprc-11-1avd3d16", + "execution_resource": "NERSC-Cori", + "git_url": "https://github.com/microbiomedata/ReadsQC", + "has_input": [ + "nmdc:dobj-11-k5qf8f61" + ], + "has_output": [ + "nmdc:dobj-11-rdxg7056", + "nmdc:dobj-11-ecscm265" + ], + "type": "nmdc:ReadQcAnalysisActivity", + "part_of": [ + "nmdc:omprc-11-1avd3d16" + ], + "version": "v1.0.2", + "input_read_count": 26546332, + "output_read_count": 25776010, + "input_read_bases": 4008496132, + "output_read_bases": 3862169938 + } + ], + "read_based_taxonomy_analysis_activity_set": [ + { + "id": "nmdc:wfrbt-11-cpmeqn98.1", + "name": "Readbased Taxonomy Analysis Activity for nmdc:omprc-11-1avd3d16", + "started_at_time": "2021-10-11T02:28:52Z", + "ended_at_time": "2021-10-11T05:21:41+00:00", + "was_informed_by": "nmdc:omprc-11-1avd3d16", + "execution_resource": "NERSC-Cori", + "git_url": "https://github.com/microbiomedata/ReadbasedAnalysis", + "has_input": [ + "nmdc:dobj-11-rdxg7056" + ], + "has_output": [ + "nmdc:dobj-11-e5rw8a31", + "nmdc:dobj-11-vkgqqm26", + "nmdc:dobj-11-27fbgd56", + "nmdc:dobj-11-jvrrq740", + "nmdc:dobj-11-wqhj7y02", + "nmdc:dobj-11-wmctch86", + "nmdc:dobj-11-bpmynh32", + "nmdc:dobj-11-sfsksj64", + "nmdc:dobj-11-bqt08n32" + ], + "type": "nmdc:ReadBasedTaxonomyAnalysisActivity", + "part_of": [ + "nmdc:omprc-11-1avd3d16" + ], + "version": "v1.0.2" + } + ] + }, + { + "data_object_set": [ + { + "id": "nmdc:dobj-11-wvje1j80", + "name": "9387.2.132031.CTTGTA.fastq.gz", + "description": "Raw sequencer read data", + "file_size_bytes": 1698585233, + "data_object_type": "Metagenome Raw Reads", + "type": "nmdc:DataObject" + }, + { + "id": "nmdc:dobj-11-bpbwfm49", + "name": "nmdc_wfrqc-11-wb98gb17.1_filtered.fastq.gz", + "description": "Filtered Reads for nmdc:omprc-11-hk1bje46", + "file_size_bytes": 1492820163, + "md5_checksum": "268918f610926421d2af43f175553680", + "data_object_type": "Filtered Sequencing Reads", + "url": "https://data.microbiomedata.org/data/nmdc:omprc-11-hk1bje46/nmdc:wfrqc-11-wb98gb17.1/nmdc_wfrqc-11-wb98gb17.1_filtered.fastq.gz", + "type": "nmdc:DataObject" + }, + { + "id": "nmdc:dobj-11-77f4fs19", + "name": "nmdc_wfrqc-11-wb98gb17.1_filterStats.txt", + "description": "Filtered Stats for nmdc:omprc-11-hk1bje46", + "file_size_bytes": 287, + "md5_checksum": "4610980cf3558f5a9830797ead97362a", + "data_object_type": "QC Statistics", + "url": "https://data.microbiomedata.org/data/nmdc:omprc-11-hk1bje46/nmdc:wfrqc-11-wb98gb17.1/nmdc_wfrqc-11-wb98gb17.1_filterStats.txt", + "type": "nmdc:DataObject" + }, + { + "id": "nmdc:dobj-11-hqydk289", + "name": "nmdc_wfmgas-11-g8rjzj56.1_contigs.fna", + "description": "Assembled contigs fasta for nmdc:omprc-11-hk1bje46", + "file_size_bytes": 50148994, + "md5_checksum": "42edb00ca315cacba82b16cbc52259ba", + "data_object_type": "Assembly Contigs", + "url": "https://data.microbiomedata.org/data/nmdc:omprc-11-hk1bje46/nmdc:wfmgas-11-g8rjzj56.1/nmdc_wfmgas-11-g8rjzj56.1_contigs.fna", + "type": "nmdc:DataObject" + }, + { + "id": "nmdc:dobj-11-gxn76s88", + "name": "nmdc_wfmgas-11-g8rjzj56.1_scaffolds.fna", + "description": "Assembled scaffold fasta for nmdc:omprc-11-hk1bje46", + "file_size_bytes": 49876917, + "md5_checksum": "bb4ce66dbda624e5944f65961337a8cf", + "data_object_type": "Assembly Scaffolds", + "url": "https://data.microbiomedata.org/data/nmdc:omprc-11-hk1bje46/nmdc:wfmgas-11-g8rjzj56.1/nmdc_wfmgas-11-g8rjzj56.1_scaffolds.fna", + "type": "nmdc:DataObject" + }, + { + "id": "nmdc:dobj-11-k262v203", + "name": "nmdc_wfmgas-11-g8rjzj56.1_covstats.txt", + "description": "Metagenome Contig Coverage Stats for nmdc:omprc-11-hk1bje46", + "file_size_bytes": 7587352, + "md5_checksum": "a818a463501e107d7f2ef30ec1c10dee", + "data_object_type": "Assembly Coverage Stats", + "url": "https://data.microbiomedata.org/data/nmdc:omprc-11-hk1bje46/nmdc:wfmgas-11-g8rjzj56.1/nmdc_wfmgas-11-g8rjzj56.1_covstats.txt", + "type": "nmdc:DataObject" + }, + { + "id": "nmdc:dobj-11-sbtj8s23", + "name": "nmdc_wfmgas-11-g8rjzj56.1_assembly.agp", + "description": "Assembled AGP file for nmdc:omprc-11-hk1bje46", + "file_size_bytes": 7636878, + "md5_checksum": "18bf95537ed3d8aca11e52ce0f6f5322", + "data_object_type": "Assembly AGP", + "url": "https://data.microbiomedata.org/data/nmdc:omprc-11-hk1bje46/nmdc:wfmgas-11-g8rjzj56.1/nmdc_wfmgas-11-g8rjzj56.1_assembly.agp", + "type": "nmdc:DataObject" + }, + { + "id": "nmdc:dobj-11-zkbbx723", + "name": "nmdc_wfmgas-11-g8rjzj56.1_pairedMapped_sorted.bam", + "description": "Metagenome Alignment BAM file for nmdc:omprc-11-hk1bje46", + "file_size_bytes": 0, + "md5_checksum": "d41d8cd98f00b204e9800998ecf8427e", + "data_object_type": "Assembly Coverage BAM", + "url": "https://data.microbiomedata.org/data/nmdc:omprc-11-hk1bje46/nmdc:wfmgas-11-g8rjzj56.1/nmdc_wfmgas-11-g8rjzj56.1_pairedMapped_sorted.bam", + "type": "nmdc:DataObject" + }, + { + "id": "nmdc:dobj-11-j5t6dk19", + "name": "nmdc_wfrbt-11-1y1f9q38.1_gottcha2_report.tsv", + "description": "Gottcha2 TSV report for nmdc:omprc-11-hk1bje46", + "file_size_bytes": 9782, + "md5_checksum": "c7b24571b61a33018cf118b5424b787f", + "data_object_type": "GOTTCHA2 Classification Report", + "url": "https://data.microbiomedata.org/data/nmdc:omprc-11-hk1bje46/nmdc:wfrbt-11-1y1f9q38.1/nmdc_wfrbt-11-1y1f9q38.1_gottcha2_report.tsv", + "type": "nmdc:DataObject" + }, + { + "id": "nmdc:dobj-11-9drt5402", + "name": "nmdc_wfrbt-11-1y1f9q38.1_gottcha2_report_full.tsv", + "description": "Gottcha2 full TSV report for nmdc:omprc-11-hk1bje46", + "file_size_bytes": 856112, + "md5_checksum": "e185734176505343bf4c83c16a0a9fe2", + "data_object_type": "GOTTCHA2 Report Full", + "url": "https://data.microbiomedata.org/data/nmdc:omprc-11-hk1bje46/nmdc:wfrbt-11-1y1f9q38.1/nmdc_wfrbt-11-1y1f9q38.1_gottcha2_report_full.tsv", + "type": "nmdc:DataObject" + }, + { + "id": "nmdc:dobj-11-ckyrtf63", + "name": "nmdc_wfrbt-11-1y1f9q38.1_gottcha2_krona.html", + "description": "Gottcha2 Krona HTML report for nmdc:omprc-11-hk1bje46", + "file_size_bytes": 255142, + "md5_checksum": "7c6b0ef44450c747580826a2e218844b", + "data_object_type": "GOTTCHA2 Krona Plot", + "url": "https://data.microbiomedata.org/data/nmdc:omprc-11-hk1bje46/nmdc:wfrbt-11-1y1f9q38.1/nmdc_wfrbt-11-1y1f9q38.1_gottcha2_krona.html", + "type": "nmdc:DataObject" + }, + { + "id": "nmdc:dobj-11-6s67nh08", + "name": "nmdc_wfrbt-11-1y1f9q38.1_centrifuge_classification.tsv", + "description": "Centrifuge classification TSV report for nmdc:omprc-11-hk1bje46", + "file_size_bytes": 1218364738, + "md5_checksum": "5b98c377f424d7609f1a09e350cfb837", + "data_object_type": "Centrifuge Taxonomic Classification", + "url": "https://data.microbiomedata.org/data/nmdc:omprc-11-hk1bje46/nmdc:wfrbt-11-1y1f9q38.1/nmdc_wfrbt-11-1y1f9q38.1_centrifuge_classification.tsv", + "type": "nmdc:DataObject" + }, + { + "id": "nmdc:dobj-11-dvsxza29", + "name": "nmdc_wfrbt-11-1y1f9q38.1_centrifuge_report.tsv", + "description": "Centrifuge TSV report for nmdc:omprc-11-hk1bje46", + "file_size_bytes": 254923, + "md5_checksum": "b5f7a68a94b356001014d1be024231af", + "data_object_type": "Centrifuge Classification Report", + "url": "https://data.microbiomedata.org/data/nmdc:omprc-11-hk1bje46/nmdc:wfrbt-11-1y1f9q38.1/nmdc_wfrbt-11-1y1f9q38.1_centrifuge_report.tsv", + "type": "nmdc:DataObject" + }, + { + "id": "nmdc:dobj-11-1h1bjg77", + "name": "nmdc_wfrbt-11-1y1f9q38.1_centrifuge_krona.html", + "description": "Centrifuge Krona HTML report for nmdc:omprc-11-hk1bje46", + "file_size_bytes": 2323219, + "md5_checksum": "75bca66cfcdd38331c10edbba03fa0d3", + "data_object_type": "Centrifuge Krona Plot", + "url": "https://data.microbiomedata.org/data/nmdc:omprc-11-hk1bje46/nmdc:wfrbt-11-1y1f9q38.1/nmdc_wfrbt-11-1y1f9q38.1_centrifuge_krona.html", + "type": "nmdc:DataObject" + }, + { + "id": "nmdc:dobj-11-dygxt096", + "name": "nmdc_wfrbt-11-1y1f9q38.1_kraken2_classification.tsv", + "description": "Kraken classification TSV report for nmdc:omprc-11-hk1bje46", + "file_size_bytes": 1001134031, + "md5_checksum": "35bf579641b2ffb3614098d9811a4968", + "data_object_type": "Kraken2 Taxonomic Classification", + "url": "https://data.microbiomedata.org/data/nmdc:omprc-11-hk1bje46/nmdc:wfrbt-11-1y1f9q38.1/nmdc_wfrbt-11-1y1f9q38.1_kraken2_classification.tsv", + "type": "nmdc:DataObject" + }, + { + "id": "nmdc:dobj-11-3132wc25", + "name": "nmdc_wfrbt-11-1y1f9q38.1_kraken2_report.tsv", + "description": "Kraken2 TSV report for nmdc:omprc-11-hk1bje46", + "file_size_bytes": 640671, + "md5_checksum": "801b79f5442e5bfaa0d15f76786cfbc0", + "data_object_type": "Kraken2 Classification Report", + "url": "https://data.microbiomedata.org/data/nmdc:omprc-11-hk1bje46/nmdc:wfrbt-11-1y1f9q38.1/nmdc_wfrbt-11-1y1f9q38.1_kraken2_report.tsv", + "type": "nmdc:DataObject" + }, + { + "id": "nmdc:dobj-11-vg5hkt48", + "name": "nmdc_wfrbt-11-1y1f9q38.1_kraken2_krona.html", + "description": "Kraken2 Krona HTML report for nmdc:omprc-11-hk1bje46", + "file_size_bytes": 3995499, + "md5_checksum": "a7030fa8e9622e3396c2b96448e90c3b", + "data_object_type": "Kraken2 Krona Plot", + "url": "https://data.microbiomedata.org/data/nmdc:omprc-11-hk1bje46/nmdc:wfrbt-11-1y1f9q38.1/nmdc_wfrbt-11-1y1f9q38.1_kraken2_krona.html", + "type": "nmdc:DataObject" + } + ], + "metagenome_assembly_set": [ + { + "id": "nmdc:wfmgas-11-g8rjzj56.1", + "name": "Metagenome Assembly Activity for nmdc:omprc-11-hk1bje46", + "started_at_time": "2021-10-11T02:28:36Z", + "ended_at_time": "2021-10-11T03:32:43+00:00", + "was_informed_by": "nmdc:omprc-11-hk1bje46", + "execution_resource": "NERSC-Cori", + "git_url": "https://github.com/microbiomedata/metaAssembly", + "has_input": [ + "nmdc:dobj-11-bpbwfm49" + ], + "has_output": [ + "nmdc:dobj-11-hqydk289", + "nmdc:dobj-11-gxn76s88", + "nmdc:dobj-11-k262v203", + "nmdc:dobj-11-sbtj8s23", + "nmdc:dobj-11-zkbbx723" + ], + "type": "nmdc:MetagenomeAssembly", + "part_of": [ + "nmdc:omprc-11-hk1bje46" + ], + "version": "v1.0.2", + "asm_score": 6.419, + "scaffolds": 89660, + "scaf_logsum": 161291, + "scaf_powsum": 18825, + "scaf_max": 39252, + "scaf_bp": 46122177, + "scaf_n50": 19797, + "scaf_n90": 73347, + "scaf_l50": 494, + "scaf_l90": 286, + "contigs": 89808, + "contig_bp": 46120517, + "ctg_n50": 19910, + "ctg_l50": 493, + "ctg_n90": 73487, + "ctg_l90": 286, + "ctg_logsum": 160283, + "ctg_powsum": 18694, + "ctg_max": 39252, + "gap_pct": 0.0036, + "gc_std": 0.11246, + "gc_avg": 0.55483 + } + ], + "omics_processing_set": [ + { + "id": "nmdc:omprc-11-hk1bje46", + "name": "Sand microcosm microbial communities from a hyporheic zone in Columbia River, Washington, USA - GW-RW T4_2-Sept-14", + "description": "Sterilized sand packs were incubated back in the ground and collected at time point T4.", + "has_input": [ + "nmdc:bsm-11-5h7px351" + ], + "add_date": "2015-05-28", + "gold_sequencing_project_identifiers": [ + "gold:Gp0115673" + ], + "has_output": [ + "nmdc:dobj-11-wvje1j80" + ], + "mod_date": "2021-06-15", + "ncbi_project_name": "Sand microcosm microbial communities from a hyporheic zone in Columbia River, Washington, USA - GW-RW T4_2-Sept-14", + "omics_type": { + "has_raw_value": "Metagenome" + }, + "part_of": [ + "nmdc:sty-11-aygzgv51" + ], + "principal_investigator": { + "has_raw_value": "James Stegen" + }, + "processing_institution": "JGI", + "type": "nmdc:OmicsProcessing" + } + ], + "read_qc_analysis_activity_set": [ + { + "id": "nmdc:wfrqc-11-wb98gb17.1", + "name": "Read QC Activity for nmdc:omprc-11-hk1bje46", + "started_at_time": "2021-10-11T02:28:36Z", + "ended_at_time": "2021-10-11T03:32:43+00:00", + "was_informed_by": "nmdc:omprc-11-hk1bje46", + "execution_resource": "NERSC-Cori", + "git_url": "https://github.com/microbiomedata/ReadsQC", + "has_input": [ + "nmdc:dobj-11-wvje1j80" + ], + "has_output": [ + "nmdc:dobj-11-bpbwfm49", + "nmdc:dobj-11-77f4fs19" + ], + "type": "nmdc:ReadQcAnalysisActivity", + "part_of": [ + "nmdc:omprc-11-hk1bje46" + ], + "version": "v1.0.2", + "input_read_count": 17796788, + "output_read_count": 16817496, + "input_read_bases": 2687314988, + "output_read_bases": 2520029380 + } + ], + "read_based_taxonomy_analysis_activity_set": [ + { + "id": "nmdc:wfrbt-11-1y1f9q38.1", + "name": "Readbased Taxonomy Analysis Activity for nmdc:omprc-11-hk1bje46", + "started_at_time": "2021-10-11T02:28:36Z", + "ended_at_time": "2021-10-11T03:32:43+00:00", + "was_informed_by": "nmdc:omprc-11-hk1bje46", + "execution_resource": "NERSC-Cori", + "git_url": "https://github.com/microbiomedata/ReadbasedAnalysis", + "has_input": [ + "nmdc:dobj-11-bpbwfm49" + ], + "has_output": [ + "nmdc:dobj-11-j5t6dk19", + "nmdc:dobj-11-9drt5402", + "nmdc:dobj-11-ckyrtf63", + "nmdc:dobj-11-6s67nh08", + "nmdc:dobj-11-dvsxza29", + "nmdc:dobj-11-1h1bjg77", + "nmdc:dobj-11-dygxt096", + "nmdc:dobj-11-3132wc25", + "nmdc:dobj-11-vg5hkt48" + ], + "type": "nmdc:ReadBasedTaxonomyAnalysisActivity", + "part_of": [ + "nmdc:omprc-11-hk1bje46" + ], + "version": "v1.0.2" + } + ] + }, + { + "data_object_set": [ + { + "id": "nmdc:dobj-11-sp1dx351", + "name": "9387.2.132031.ATGTCA.fastq.gz", + "description": "Raw sequencer read data", + "file_size_bytes": 2065080622, + "data_object_type": "Metagenome Raw Reads", + "type": "nmdc:DataObject" + }, + { + "id": "nmdc:dobj-11-mvfs2859", + "name": "nmdc_wfrqc-11-6qq68y05.1_filtered.fastq.gz", + "description": "Filtered Reads for nmdc:omprc-11-qtje8r57", + "file_size_bytes": 1806996776, + "md5_checksum": "445f37bc3019e9fe3b29a2ac5bcbfc9c", + "data_object_type": "Filtered Sequencing Reads", + "url": "https://data.microbiomedata.org/data/nmdc:omprc-11-qtje8r57/nmdc:wfrqc-11-6qq68y05.1/nmdc_wfrqc-11-6qq68y05.1_filtered.fastq.gz", + "type": "nmdc:DataObject" + }, + { + "id": "nmdc:dobj-11-c383ph33", + "name": "nmdc_wfrqc-11-6qq68y05.1_filterStats.txt", + "description": "Filtered Stats for nmdc:omprc-11-qtje8r57", + "file_size_bytes": 289, + "md5_checksum": "24440b4c5534da30eee650b68eccda84", + "data_object_type": "QC Statistics", + "url": "https://data.microbiomedata.org/data/nmdc:omprc-11-qtje8r57/nmdc:wfrqc-11-6qq68y05.1/nmdc_wfrqc-11-6qq68y05.1_filterStats.txt", + "type": "nmdc:DataObject" + }, + { + "id": "nmdc:dobj-11-tz487854", + "name": "nmdc_wfmgas-11-ctbbh665.1_contigs.fna", + "description": "Assembled contigs fasta for nmdc:omprc-11-qtje8r57", + "file_size_bytes": 59466666, + "md5_checksum": "3272e82c974fe804e76838058cbc266f", + "data_object_type": "Assembly Contigs", + "url": "https://data.microbiomedata.org/data/nmdc:omprc-11-qtje8r57/nmdc:wfmgas-11-ctbbh665.1/nmdc_wfmgas-11-ctbbh665.1_contigs.fna", + "type": "nmdc:DataObject" + }, + { + "id": "nmdc:dobj-11-49xp6781", + "name": "nmdc_wfmgas-11-ctbbh665.1_scaffolds.fna", + "description": "Assembled scaffold fasta for nmdc:omprc-11-qtje8r57", + "file_size_bytes": 59103832, + "md5_checksum": "8b32fab555ff3f2221c1eb616cc57a72", + "data_object_type": "Assembly Scaffolds", + "url": "https://data.microbiomedata.org/data/nmdc:omprc-11-qtje8r57/nmdc:wfmgas-11-ctbbh665.1/nmdc_wfmgas-11-ctbbh665.1_scaffolds.fna", + "type": "nmdc:DataObject" + }, + { + "id": "nmdc:dobj-11-mjgtxb34", + "name": "nmdc_wfmgas-11-ctbbh665.1_covstats.txt", + "description": "Metagenome Contig Coverage Stats for nmdc:omprc-11-qtje8r57", + "file_size_bytes": 10186666, + "md5_checksum": "f3d7613474eadcc0af0e3d2a7f87579a", + "data_object_type": "Assembly Coverage Stats", + "url": "https://data.microbiomedata.org/data/nmdc:omprc-11-qtje8r57/nmdc:wfmgas-11-ctbbh665.1/nmdc_wfmgas-11-ctbbh665.1_covstats.txt", + "type": "nmdc:DataObject" + }, + { + "id": "nmdc:dobj-11-sesd9x35", + "name": "nmdc_wfmgas-11-ctbbh665.1_assembly.agp", + "description": "Assembled AGP file for nmdc:omprc-11-qtje8r57", + "file_size_bytes": 10265372, + "md5_checksum": "e2bee7ea29af5657ba43aee3aabb48bb", + "data_object_type": "Assembly AGP", + "url": "https://data.microbiomedata.org/data/nmdc:omprc-11-qtje8r57/nmdc:wfmgas-11-ctbbh665.1/nmdc_wfmgas-11-ctbbh665.1_assembly.agp", + "type": "nmdc:DataObject" + }, + { + "id": "nmdc:dobj-11-mbx38914", + "name": "nmdc_wfmgas-11-ctbbh665.1_pairedMapped_sorted.bam", + "description": "Metagenome Alignment BAM file for nmdc:omprc-11-qtje8r57", + "file_size_bytes": 0, + "md5_checksum": "d41d8cd98f00b204e9800998ecf8427e", + "data_object_type": "Assembly Coverage BAM", + "url": "https://data.microbiomedata.org/data/nmdc:omprc-11-qtje8r57/nmdc:wfmgas-11-ctbbh665.1/nmdc_wfmgas-11-ctbbh665.1_pairedMapped_sorted.bam", + "type": "nmdc:DataObject" + }, + { + "id": "nmdc:dobj-11-bfg3hp29", + "name": "nmdc_wfrbt-11-qxd7m466.1_gottcha2_report.tsv", + "description": "Gottcha2 TSV report for nmdc:omprc-11-qtje8r57", + "file_size_bytes": 11833, + "md5_checksum": "358559c32b69eff51758db66ac01021b", + "data_object_type": "GOTTCHA2 Classification Report", + "url": "https://data.microbiomedata.org/data/nmdc:omprc-11-qtje8r57/nmdc:wfrbt-11-qxd7m466.1/nmdc_wfrbt-11-qxd7m466.1_gottcha2_report.tsv", + "type": "nmdc:DataObject" + }, + { + "id": "nmdc:dobj-11-rbmyjf20", + "name": "nmdc_wfrbt-11-qxd7m466.1_gottcha2_report_full.tsv", + "description": "Gottcha2 full TSV report for nmdc:omprc-11-qtje8r57", + "file_size_bytes": 888177, + "md5_checksum": "befbd648249c2871bd27999120e50bf7", + "data_object_type": "GOTTCHA2 Report Full", + "url": "https://data.microbiomedata.org/data/nmdc:omprc-11-qtje8r57/nmdc:wfrbt-11-qxd7m466.1/nmdc_wfrbt-11-qxd7m466.1_gottcha2_report_full.tsv", + "type": "nmdc:DataObject" + }, + { + "id": "nmdc:dobj-11-dqftx366", + "name": "nmdc_wfrbt-11-qxd7m466.1_gottcha2_krona.html", + "description": "Gottcha2 Krona HTML report for nmdc:omprc-11-qtje8r57", + "file_size_bytes": 261703, + "md5_checksum": "cacb8f623a808d0cae094d46f2801dd3", + "data_object_type": "GOTTCHA2 Krona Plot", + "url": "https://data.microbiomedata.org/data/nmdc:omprc-11-qtje8r57/nmdc:wfrbt-11-qxd7m466.1/nmdc_wfrbt-11-qxd7m466.1_gottcha2_krona.html", + "type": "nmdc:DataObject" + }, + { + "id": "nmdc:dobj-11-htk7b896", + "name": "nmdc_wfrbt-11-qxd7m466.1_centrifuge_classification.tsv", + "description": "Centrifuge classification TSV report for nmdc:omprc-11-qtje8r57", + "file_size_bytes": 1474970402, + "md5_checksum": "1b15ffb745e320a9bf0cac7e672e974b", + "data_object_type": "Centrifuge Taxonomic Classification", + "url": "https://data.microbiomedata.org/data/nmdc:omprc-11-qtje8r57/nmdc:wfrbt-11-qxd7m466.1/nmdc_wfrbt-11-qxd7m466.1_centrifuge_classification.tsv", + "type": "nmdc:DataObject" + }, + { + "id": "nmdc:dobj-11-2mgbzx37", + "name": "nmdc_wfrbt-11-qxd7m466.1_centrifuge_report.tsv", + "description": "Centrifuge TSV report for nmdc:omprc-11-qtje8r57", + "file_size_bytes": 255777, + "md5_checksum": "90b77c7118bf6ec1f99836a50d562a7f", + "data_object_type": "Centrifuge Classification Report", + "url": "https://data.microbiomedata.org/data/nmdc:omprc-11-qtje8r57/nmdc:wfrbt-11-qxd7m466.1/nmdc_wfrbt-11-qxd7m466.1_centrifuge_report.tsv", + "type": "nmdc:DataObject" + }, + { + "id": "nmdc:dobj-11-2a0kzk27", + "name": "nmdc_wfrbt-11-qxd7m466.1_centrifuge_krona.html", + "description": "Centrifuge Krona HTML report for nmdc:omprc-11-qtje8r57", + "file_size_bytes": 2329875, + "md5_checksum": "e0736ff520260ba2097c02b9e767362c", + "data_object_type": "Centrifuge Krona Plot", + "url": "https://data.microbiomedata.org/data/nmdc:omprc-11-qtje8r57/nmdc:wfrbt-11-qxd7m466.1/nmdc_wfrbt-11-qxd7m466.1_centrifuge_krona.html", + "type": "nmdc:DataObject" + }, + { + "id": "nmdc:dobj-11-ay1gh298", + "name": "nmdc_wfrbt-11-qxd7m466.1_kraken2_classification.tsv", + "description": "Kraken classification TSV report for nmdc:omprc-11-qtje8r57", + "file_size_bytes": 1213240496, + "md5_checksum": "a00960655f9e80726fdb0fade1bec958", + "data_object_type": "Kraken2 Taxonomic Classification", + "url": "https://data.microbiomedata.org/data/nmdc:omprc-11-qtje8r57/nmdc:wfrbt-11-qxd7m466.1/nmdc_wfrbt-11-qxd7m466.1_kraken2_classification.tsv", + "type": "nmdc:DataObject" + }, + { + "id": "nmdc:dobj-11-z016kg16", + "name": "nmdc_wfrbt-11-qxd7m466.1_kraken2_report.tsv", + "description": "Kraken2 TSV report for nmdc:omprc-11-qtje8r57", + "file_size_bytes": 659715, + "md5_checksum": "366bf195f71d2c35a9b47c0f29381e85", + "data_object_type": "Kraken2 Classification Report", + "url": "https://data.microbiomedata.org/data/nmdc:omprc-11-qtje8r57/nmdc:wfrbt-11-qxd7m466.1/nmdc_wfrbt-11-qxd7m466.1_kraken2_report.tsv", + "type": "nmdc:DataObject" + }, + { + "id": "nmdc:dobj-11-0bkh0g64", + "name": "nmdc_wfrbt-11-qxd7m466.1_kraken2_krona.html", + "description": "Kraken2 Krona HTML report for nmdc:omprc-11-qtje8r57", + "file_size_bytes": 4010701, + "md5_checksum": "e111cd4927f6736e5de6f6e81e7e6d72", + "data_object_type": "Kraken2 Krona Plot", + "url": "https://data.microbiomedata.org/data/nmdc:omprc-11-qtje8r57/nmdc:wfrbt-11-qxd7m466.1/nmdc_wfrbt-11-qxd7m466.1_kraken2_krona.html", + "type": "nmdc:DataObject" + } + ], + "metagenome_assembly_set": [ + { + "id": "nmdc:wfmgas-11-ctbbh665.1", + "name": "Metagenome Assembly Activity for nmdc:omprc-11-qtje8r57", + "started_at_time": "2021-10-11T02:27:50Z", + "ended_at_time": "2021-10-11T03:39:05+00:00", + "was_informed_by": "nmdc:omprc-11-qtje8r57", + "execution_resource": "NERSC-Cori", + "git_url": "https://github.com/microbiomedata/metaAssembly", + "has_input": [ + "nmdc:dobj-11-mvfs2859" + ], + "has_output": [ + "nmdc:dobj-11-tz487854", + "nmdc:dobj-11-49xp6781", + "nmdc:dobj-11-mjgtxb34", + "nmdc:dobj-11-sesd9x35", + "nmdc:dobj-11-mbx38914" + ], + "type": "nmdc:MetagenomeAssembly", + "part_of": [ + "nmdc:omprc-11-qtje8r57" + ], + "version": "v1.0.2", + "asm_score": 3.588, + "scaffolds": 120242, + "scaf_logsum": 112140, + "scaf_powsum": 12215, + "scaf_max": 16504, + "scaf_bp": 54172210, + "scaf_n50": 34687, + "scaf_n90": 101345, + "scaf_l50": 421, + "scaf_l90": 285, + "contigs": 120326, + "contig_bp": 54171370, + "ctg_n50": 34725, + "ctg_l50": 421, + "ctg_n90": 101428, + "ctg_l90": 285, + "ctg_logsum": 111611, + "ctg_powsum": 12152, + "ctg_max": 16504, + "gap_pct": 0.00155, + "gc_std": 0.11331, + "gc_avg": 0.54451 + } + ], + "omics_processing_set": [ + { + "id": "nmdc:omprc-11-qtje8r57", + "name": "Sand microcosm microbial communities from a hyporheic zone in Columbia River, Washington, USA - GW-RW T4_12-Aug-14", + "description": "Sterilized sand packs were incubated back in the ground and collected at time point T4.", + "has_input": [ + "nmdc:bsm-11-wzdqhh45" + ], + "add_date": "2015-05-28", + "gold_sequencing_project_identifiers": [ + "gold:Gp0115671" + ], + "has_output": [ + "nmdc:dobj-11-sp1dx351" + ], + "mod_date": "2021-06-15", + "ncbi_project_name": "Sand microcosm microbial communities from a hyporheic zone in Columbia River, Washington, USA - GW-RW T4_12-Aug-14", + "omics_type": { + "has_raw_value": "Metagenome" + }, + "part_of": [ + "nmdc:sty-11-aygzgv51" + ], + "principal_investigator": { + "has_raw_value": "James Stegen" + }, + "processing_institution": "JGI", + "type": "nmdc:OmicsProcessing" + } + ], + "read_qc_analysis_activity_set": [ + { + "id": "nmdc:wfrqc-11-6qq68y05.1", + "name": "Read QC Activity for nmdc:omprc-11-qtje8r57", + "started_at_time": "2021-10-11T02:27:50Z", + "ended_at_time": "2021-10-11T03:39:05+00:00", + "was_informed_by": "nmdc:omprc-11-qtje8r57", + "execution_resource": "NERSC-Cori", + "git_url": "https://github.com/microbiomedata/ReadsQC", + "has_input": [ + "nmdc:dobj-11-sp1dx351" + ], + "has_output": [ + "nmdc:dobj-11-mvfs2859", + "nmdc:dobj-11-c383ph33" + ], + "type": "nmdc:ReadQcAnalysisActivity", + "part_of": [ + "nmdc:omprc-11-qtje8r57" + ], + "version": "v1.0.2", + "input_read_count": 22298982, + "output_read_count": 20445042, + "input_read_bases": 3367146282, + "output_read_bases": 3062549086 + } + ], + "read_based_taxonomy_analysis_activity_set": [ + { + "id": "nmdc:wfrbt-11-qxd7m466.1", + "name": "Readbased Taxonomy Analysis Activity for nmdc:omprc-11-qtje8r57", + "started_at_time": "2021-10-11T02:27:50Z", + "ended_at_time": "2021-10-11T03:39:05+00:00", + "was_informed_by": "nmdc:omprc-11-qtje8r57", + "execution_resource": "NERSC-Cori", + "git_url": "https://github.com/microbiomedata/ReadbasedAnalysis", + "has_input": [ + "nmdc:dobj-11-mvfs2859" + ], + "has_output": [ + "nmdc:dobj-11-bfg3hp29", + "nmdc:dobj-11-rbmyjf20", + "nmdc:dobj-11-dqftx366", + "nmdc:dobj-11-htk7b896", + "nmdc:dobj-11-2mgbzx37", + "nmdc:dobj-11-2a0kzk27", + "nmdc:dobj-11-ay1gh298", + "nmdc:dobj-11-z016kg16", + "nmdc:dobj-11-0bkh0g64" + ], + "type": "nmdc:ReadBasedTaxonomyAnalysisActivity", + "part_of": [ + "nmdc:omprc-11-qtje8r57" + ], + "version": "v1.0.2" + } + ] + }, + { + "data_object_set": [ + { + "id": "nmdc:dobj-11-h4fha619", + "name": "9422.8.132674.GTGAAA.fastq.gz", + "description": "Raw sequencer read data", + "file_size_bytes": 3492714581, + "data_object_type": "Metagenome Raw Reads", + "type": "nmdc:DataObject" + }, + { + "id": "nmdc:dobj-11-67e2rh25", + "name": "nmdc_wfrqc-11-da629005.1_filtered.fastq.gz", + "description": "Filtered Reads for nmdc:omprc-11-7ey2jr63", + "file_size_bytes": 3113249122, + "md5_checksum": "e777bc518da4bbe0ab7b2959f00e2b08", + "data_object_type": "Filtered Sequencing Reads", + "url": "https://data.microbiomedata.org/data/nmdc:omprc-11-7ey2jr63/nmdc:wfrqc-11-da629005.1/nmdc_wfrqc-11-da629005.1_filtered.fastq.gz", + "type": "nmdc:DataObject" + }, + { + "id": "nmdc:dobj-11-ev3p0f32", + "name": "nmdc_wfrqc-11-da629005.1_filterStats.txt", + "description": "Filtered Stats for nmdc:omprc-11-7ey2jr63", + "file_size_bytes": 292, + "md5_checksum": "79815495339053b7935b55dbde02b2ff", + "data_object_type": "QC Statistics", + "url": "https://data.microbiomedata.org/data/nmdc:omprc-11-7ey2jr63/nmdc:wfrqc-11-da629005.1/nmdc_wfrqc-11-da629005.1_filterStats.txt", + "type": "nmdc:DataObject" + }, + { + "id": "nmdc:dobj-11-6rknzs95", + "name": "nmdc_wfmgas-11-q6f3w480.1_contigs.fna", + "description": "Assembled contigs fasta for nmdc:omprc-11-7ey2jr63", + "file_size_bytes": 106133418, + "md5_checksum": "c5f02e589bade634da3706fdf2e509da", + "data_object_type": "Assembly Contigs", + "url": "https://data.microbiomedata.org/data/nmdc:omprc-11-7ey2jr63/nmdc:wfmgas-11-q6f3w480.1/nmdc_wfmgas-11-q6f3w480.1_contigs.fna", + "type": "nmdc:DataObject" + }, + { + "id": "nmdc:dobj-11-hq9hx554", + "name": "nmdc_wfmgas-11-q6f3w480.1_scaffolds.fna", + "description": "Assembled scaffold fasta for nmdc:omprc-11-7ey2jr63", + "file_size_bytes": 105567352, + "md5_checksum": "e27ef8f0c2368a7544b417197bbd972d", + "data_object_type": "Assembly Scaffolds", + "url": "https://data.microbiomedata.org/data/nmdc:omprc-11-7ey2jr63/nmdc:wfmgas-11-q6f3w480.1/nmdc_wfmgas-11-q6f3w480.1_scaffolds.fna", + "type": "nmdc:DataObject" + }, + { + "id": "nmdc:dobj-11-31676r95", + "name": "nmdc_wfmgas-11-q6f3w480.1_covstats.txt", + "description": "Metagenome Contig Coverage Stats for nmdc:omprc-11-7ey2jr63", + "file_size_bytes": 15934516, + "md5_checksum": "52e60a3f5f2059d5ca6ac5feed9d6e61", + "data_object_type": "Assembly Coverage Stats", + "url": "https://data.microbiomedata.org/data/nmdc:omprc-11-7ey2jr63/nmdc:wfmgas-11-q6f3w480.1/nmdc_wfmgas-11-q6f3w480.1_covstats.txt", + "type": "nmdc:DataObject" + }, + { + "id": "nmdc:dobj-11-j8m97a49", + "name": "nmdc_wfmgas-11-q6f3w480.1_assembly.agp", + "description": "Assembled AGP file for nmdc:omprc-11-7ey2jr63", + "file_size_bytes": 16102397, + "md5_checksum": "8f1ab020164377572c222049a733c495", + "data_object_type": "Assembly AGP", + "url": "https://data.microbiomedata.org/data/nmdc:omprc-11-7ey2jr63/nmdc:wfmgas-11-q6f3w480.1/nmdc_wfmgas-11-q6f3w480.1_assembly.agp", + "type": "nmdc:DataObject" + }, + { + "id": "nmdc:dobj-11-ew6d5035", + "name": "nmdc_wfmgas-11-q6f3w480.1_pairedMapped_sorted.bam", + "description": "Metagenome Alignment BAM file for nmdc:omprc-11-7ey2jr63", + "file_size_bytes": 0, + "md5_checksum": "d41d8cd98f00b204e9800998ecf8427e", + "data_object_type": "Assembly Coverage BAM", + "url": "https://data.microbiomedata.org/data/nmdc:omprc-11-7ey2jr63/nmdc:wfmgas-11-q6f3w480.1/nmdc_wfmgas-11-q6f3w480.1_pairedMapped_sorted.bam", + "type": "nmdc:DataObject" + }, + { + "id": "nmdc:dobj-11-nswdmk16", + "name": "nmdc_wfrbt-11-5hcjqk42.1_gottcha2_report.tsv", + "description": "Gottcha2 TSV report for nmdc:omprc-11-7ey2jr63", + "file_size_bytes": 13659, + "md5_checksum": "13343b2533892633bcc3655a1ebe788f", + "data_object_type": "GOTTCHA2 Classification Report", + "url": "https://data.microbiomedata.org/data/nmdc:omprc-11-7ey2jr63/nmdc:wfrbt-11-5hcjqk42.1/nmdc_wfrbt-11-5hcjqk42.1_gottcha2_report.tsv", + "type": "nmdc:DataObject" + }, + { + "id": "nmdc:dobj-11-a3kcmn16", + "name": "nmdc_wfrbt-11-5hcjqk42.1_gottcha2_report_full.tsv", + "description": "Gottcha2 full TSV report for nmdc:omprc-11-7ey2jr63", + "file_size_bytes": 1168924, + "md5_checksum": "87b36326bee32ad5642e3ffc2f5ac7db", + "data_object_type": "GOTTCHA2 Report Full", + "url": "https://data.microbiomedata.org/data/nmdc:omprc-11-7ey2jr63/nmdc:wfrbt-11-5hcjqk42.1/nmdc_wfrbt-11-5hcjqk42.1_gottcha2_report_full.tsv", + "type": "nmdc:DataObject" + }, + { + "id": "nmdc:dobj-11-dprqj957", + "name": "nmdc_wfrbt-11-5hcjqk42.1_gottcha2_krona.html", + "description": "Gottcha2 Krona HTML report for nmdc:omprc-11-7ey2jr63", + "file_size_bytes": 267660, + "md5_checksum": "95a2de8be672fd50bf542215194dc4d4", + "data_object_type": "GOTTCHA2 Krona Plot", + "url": "https://data.microbiomedata.org/data/nmdc:omprc-11-7ey2jr63/nmdc:wfrbt-11-5hcjqk42.1/nmdc_wfrbt-11-5hcjqk42.1_gottcha2_krona.html", + "type": "nmdc:DataObject" + }, + { + "id": "nmdc:dobj-11-4fv99443", + "name": "nmdc_wfrbt-11-5hcjqk42.1_centrifuge_classification.tsv", + "description": "Centrifuge classification TSV report for nmdc:omprc-11-7ey2jr63", + "file_size_bytes": 2721808152, + "md5_checksum": "6cd0210b345d6908ad8ab683b1a11572", + "data_object_type": "Centrifuge Taxonomic Classification", + "url": "https://data.microbiomedata.org/data/nmdc:omprc-11-7ey2jr63/nmdc:wfrbt-11-5hcjqk42.1/nmdc_wfrbt-11-5hcjqk42.1_centrifuge_classification.tsv", + "type": "nmdc:DataObject" + }, + { + "id": "nmdc:dobj-11-35k33569", + "name": "nmdc_wfrbt-11-5hcjqk42.1_centrifuge_report.tsv", + "description": "Centrifuge TSV report for nmdc:omprc-11-7ey2jr63", + "file_size_bytes": 263207, + "md5_checksum": "5049a65d2a42d73c5d47373e990b70f7", + "data_object_type": "Centrifuge Classification Report", + "url": "https://data.microbiomedata.org/data/nmdc:omprc-11-7ey2jr63/nmdc:wfrbt-11-5hcjqk42.1/nmdc_wfrbt-11-5hcjqk42.1_centrifuge_report.tsv", + "type": "nmdc:DataObject" + }, + { + "id": "nmdc:dobj-11-3264m552", + "name": "nmdc_wfrbt-11-5hcjqk42.1_centrifuge_krona.html", + "description": "Centrifuge Krona HTML report for nmdc:omprc-11-7ey2jr63", + "file_size_bytes": 2347912, + "md5_checksum": "6e1e28773094884d35c04072309e285a", + "data_object_type": "Centrifuge Krona Plot", + "url": "https://data.microbiomedata.org/data/nmdc:omprc-11-7ey2jr63/nmdc:wfrbt-11-5hcjqk42.1/nmdc_wfrbt-11-5hcjqk42.1_centrifuge_krona.html", + "type": "nmdc:DataObject" + }, + { + "id": "nmdc:dobj-11-dw2s7e60", + "name": "nmdc_wfrbt-11-5hcjqk42.1_kraken2_classification.tsv", + "description": "Kraken classification TSV report for nmdc:omprc-11-7ey2jr63", + "file_size_bytes": 2224468607, + "md5_checksum": "7fa3aba8b1e31ccc00cf56f04f5605ac", + "data_object_type": "Kraken2 Taxonomic Classification", + "url": "https://data.microbiomedata.org/data/nmdc:omprc-11-7ey2jr63/nmdc:wfrbt-11-5hcjqk42.1/nmdc_wfrbt-11-5hcjqk42.1_kraken2_classification.tsv", + "type": "nmdc:DataObject" + }, + { + "id": "nmdc:dobj-11-1jm97b55", + "name": "nmdc_wfrbt-11-5hcjqk42.1_kraken2_report.tsv", + "description": "Kraken2 TSV report for nmdc:omprc-11-7ey2jr63", + "file_size_bytes": 701128, + "md5_checksum": "3b3abe337d79d09e9c7ba0a40045ad93", + "data_object_type": "Kraken2 Classification Report", + "url": "https://data.microbiomedata.org/data/nmdc:omprc-11-7ey2jr63/nmdc:wfrbt-11-5hcjqk42.1/nmdc_wfrbt-11-5hcjqk42.1_kraken2_report.tsv", + "type": "nmdc:DataObject" + }, + { + "id": "nmdc:dobj-11-hgybse24", + "name": "nmdc_wfrbt-11-5hcjqk42.1_kraken2_krona.html", + "description": "Kraken2 Krona HTML report for nmdc:omprc-11-7ey2jr63", + "file_size_bytes": 4217185, + "md5_checksum": "e8602b20781cdbbd84e6dcb92c048a6b", + "data_object_type": "Kraken2 Krona Plot", + "url": "https://data.microbiomedata.org/data/nmdc:omprc-11-7ey2jr63/nmdc:wfrbt-11-5hcjqk42.1/nmdc_wfrbt-11-5hcjqk42.1_kraken2_krona.html", + "type": "nmdc:DataObject" + } + ], + "metagenome_assembly_set": [ + { + "id": "nmdc:wfmgas-11-q6f3w480.1", + "name": "Metagenome Assembly Activity for nmdc:omprc-11-7ey2jr63", + "started_at_time": "2021-10-11T02:26:37Z", + "ended_at_time": "2021-10-11T05:40:05+00:00", + "was_informed_by": "nmdc:omprc-11-7ey2jr63", + "execution_resource": "NERSC-Cori", + "git_url": "https://github.com/microbiomedata/metaAssembly", + "has_input": [ + "nmdc:dobj-11-67e2rh25" + ], + "has_output": [ + "nmdc:dobj-11-6rknzs95", + "nmdc:dobj-11-hq9hx554", + "nmdc:dobj-11-31676r95", + "nmdc:dobj-11-j8m97a49", + "nmdc:dobj-11-ew6d5035" + ], + "type": "nmdc:MetagenomeAssembly", + "part_of": [ + "nmdc:omprc-11-7ey2jr63" + ], + "version": "v1.0.2", + "asm_score": 10.939, + "scaffolds": 186895, + "scaf_logsum": 337025, + "scaf_powsum": 40973, + "scaf_max": 163197, + "scaf_bp": 97613509, + "scaf_n50": 42593, + "scaf_n90": 155449, + "scaf_l50": 499, + "scaf_l90": 288, + "scaf_n_gt50k": 11, + "scaf_l_gt50k": 743033, + "scaf_pct_gt50k": 0.7611989, + "contigs": 187125, + "contig_bp": 97611209, + "ctg_n50": 42676, + "ctg_l50": 499, + "ctg_n90": 155670, + "ctg_l90": 288, + "ctg_logsum": 335229, + "ctg_powsum": 40696, + "ctg_max": 163197, + "gap_pct": 0.00236, + "gc_std": 0.10616, + "gc_avg": 0.5929 + } + ], + "omics_processing_set": [ + { + "id": "nmdc:omprc-11-7ey2jr63", + "name": "Sand microcosm microbial communities from a hyporheic zone in Columbia River, Washington, USA - GW-RW T4_23-Sept-14", + "description": "Sterilized sand packs were incubated back in the ground and collected at time point T4.", + "has_input": [ + "nmdc:bsm-11-pkgtg048" + ], + "add_date": "2015-05-28", + "gold_sequencing_project_identifiers": [ + "gold:Gp0115676" + ], + "has_output": [ + "nmdc:dobj-11-h4fha619" + ], + "mod_date": "2021-06-15", + "ncbi_project_name": "Sand microcosm microbial communities from a hyporheic zone in Columbia River, Washington, USA - GW-RW T4_23-Sept-14", + "omics_type": { + "has_raw_value": "Metagenome" + }, + "part_of": [ + "nmdc:sty-11-aygzgv51" + ], + "principal_investigator": { + "has_raw_value": "James Stegen" + }, + "processing_institution": "JGI", + "type": "nmdc:OmicsProcessing" + } + ], + "read_qc_analysis_activity_set": [ + { + "id": "nmdc:wfrqc-11-da629005.1", + "name": "Read QC Activity for nmdc:omprc-11-7ey2jr63", + "started_at_time": "2021-10-11T02:26:37Z", + "ended_at_time": "2021-10-11T05:40:05+00:00", + "was_informed_by": "nmdc:omprc-11-7ey2jr63", + "execution_resource": "NERSC-Cori", + "git_url": "https://github.com/microbiomedata/ReadsQC", + "has_input": [ + "nmdc:dobj-11-h4fha619" + ], + "has_output": [ + "nmdc:dobj-11-67e2rh25", + "nmdc:dobj-11-ev3p0f32" + ], + "type": "nmdc:ReadQcAnalysisActivity", + "part_of": [ + "nmdc:omprc-11-7ey2jr63" + ], + "version": "v1.0.2", + "input_read_count": 39069214, + "output_read_count": 37037822, + "input_read_bases": 5899451314, + "output_read_bases": 5550744725 + } + ], + "read_based_taxonomy_analysis_activity_set": [ + { + "id": "nmdc:wfrbt-11-5hcjqk42.1", + "name": "Readbased Taxonomy Analysis Activity for nmdc:omprc-11-7ey2jr63", + "started_at_time": "2021-10-11T02:26:37Z", + "ended_at_time": "2021-10-11T05:40:05+00:00", + "was_informed_by": "nmdc:omprc-11-7ey2jr63", + "execution_resource": "NERSC-Cori", + "git_url": "https://github.com/microbiomedata/ReadbasedAnalysis", + "has_input": [ + "nmdc:dobj-11-67e2rh25" + ], + "has_output": [ + "nmdc:dobj-11-nswdmk16", + "nmdc:dobj-11-a3kcmn16", + "nmdc:dobj-11-dprqj957", + "nmdc:dobj-11-4fv99443", + "nmdc:dobj-11-35k33569", + "nmdc:dobj-11-3264m552", + "nmdc:dobj-11-dw2s7e60", + "nmdc:dobj-11-1jm97b55", + "nmdc:dobj-11-hgybse24" + ], + "type": "nmdc:ReadBasedTaxonomyAnalysisActivity", + "part_of": [ + "nmdc:omprc-11-7ey2jr63" + ], + "version": "v1.0.2" + } + ] + }, + { + "data_object_set": [ + { + "id": "nmdc:dobj-11-p9rq4261", + "name": "9289.1.128215.TCCTGAG-TATCCTC.fastq.gz", + "description": "Raw sequencer read data", + "file_size_bytes": 6700067822, + "data_object_type": "Metagenome Raw Reads", + "type": "nmdc:DataObject" + }, + { + "id": "nmdc:dobj-11-t9s7ky43", + "name": "nmdc_wfrqc-11-0dj1cv23.1_filtered.fastq.gz", + "description": "Filtered Reads for nmdc:omprc-11-qngh7497", + "file_size_bytes": 5307348388, + "md5_checksum": "63c857b3011dec61a08044d518291f23", + "data_object_type": "Filtered Sequencing Reads", + "url": "https://data.microbiomedata.org/data/nmdc:omprc-11-qngh7497/nmdc:wfrqc-11-0dj1cv23.1/nmdc_wfrqc-11-0dj1cv23.1_filtered.fastq.gz", + "type": "nmdc:DataObject" + }, + { + "id": "nmdc:dobj-11-840rhn20", + "name": "nmdc_wfrqc-11-0dj1cv23.1_filterStats.txt", + "description": "Filtered Stats for nmdc:omprc-11-qngh7497", + "file_size_bytes": 279, + "md5_checksum": "2a79d7978caecf9b08fb2029fa42c9b3", + "data_object_type": "QC Statistics", + "url": "https://data.microbiomedata.org/data/nmdc:omprc-11-qngh7497/nmdc:wfrqc-11-0dj1cv23.1/nmdc_wfrqc-11-0dj1cv23.1_filterStats.txt", + "type": "nmdc:DataObject" + }, + { + "id": "nmdc:dobj-11-d0p5c469", + "name": "nmdc_wfmgas-11-0q0rh178.1_contigs.fna", + "description": "Assembled contigs fasta for nmdc:omprc-11-qngh7497", + "file_size_bytes": 254039819, + "md5_checksum": "9cbd5238f51695287448a3599f4ae813", + "data_object_type": "Assembly Contigs", + "url": "https://data.microbiomedata.org/data/nmdc:omprc-11-qngh7497/nmdc:wfmgas-11-0q0rh178.1/nmdc_wfmgas-11-0q0rh178.1_contigs.fna", + "type": "nmdc:DataObject" + }, + { + "id": "nmdc:dobj-11-28hffr46", + "name": "nmdc_wfmgas-11-0q0rh178.1_scaffolds.fna", + "description": "Assembled scaffold fasta for nmdc:omprc-11-qngh7497", + "file_size_bytes": 252264972, + "md5_checksum": "1d7487b254671e51f84d164e1de78241", + "data_object_type": "Assembly Scaffolds", + "url": "https://data.microbiomedata.org/data/nmdc:omprc-11-qngh7497/nmdc:wfmgas-11-0q0rh178.1/nmdc_wfmgas-11-0q0rh178.1_scaffolds.fna", + "type": "nmdc:DataObject" + }, + { + "id": "nmdc:dobj-11-b3amzx70", + "name": "nmdc_wfmgas-11-0q0rh178.1_covstats.txt", + "description": "Metagenome Contig Coverage Stats for nmdc:omprc-11-qngh7497", + "file_size_bytes": 47009211, + "md5_checksum": "5baf6be627bc8f7c7074859819bb7816", + "data_object_type": "Assembly Coverage Stats", + "url": "https://data.microbiomedata.org/data/nmdc:omprc-11-qngh7497/nmdc:wfmgas-11-0q0rh178.1/nmdc_wfmgas-11-0q0rh178.1_covstats.txt", + "type": "nmdc:DataObject" + }, + { + "id": "nmdc:dobj-11-bk51qr75", + "name": "nmdc_wfmgas-11-0q0rh178.1_assembly.agp", + "description": "Assembled AGP file for nmdc:omprc-11-qngh7497", + "file_size_bytes": 48063881, + "md5_checksum": "6469270e631357515deb83bbb6acfef6", + "data_object_type": "Assembly AGP", + "url": "https://data.microbiomedata.org/data/nmdc:omprc-11-qngh7497/nmdc:wfmgas-11-0q0rh178.1/nmdc_wfmgas-11-0q0rh178.1_assembly.agp", + "type": "nmdc:DataObject" + }, + { + "id": "nmdc:dobj-11-sp1nyv42", + "name": "nmdc_wfmgas-11-0q0rh178.1_pairedMapped_sorted.bam", + "description": "Metagenome Alignment BAM file for nmdc:omprc-11-qngh7497", + "file_size_bytes": 0, + "md5_checksum": "d41d8cd98f00b204e9800998ecf8427e", + "data_object_type": "Assembly Coverage BAM", + "url": "https://data.microbiomedata.org/data/nmdc:omprc-11-qngh7497/nmdc:wfmgas-11-0q0rh178.1/nmdc_wfmgas-11-0q0rh178.1_pairedMapped_sorted.bam", + "type": "nmdc:DataObject" + }, + { + "id": "nmdc:dobj-11-rekwdb49", + "name": "nmdc_wfrbt-11-z1vfdd44.1_gottcha2_report.tsv", + "description": "Gottcha2 TSV report for nmdc:omprc-11-qngh7497", + "file_size_bytes": 17895, + "md5_checksum": "ba32f20b0cc5143783e00c5d1ba15223", + "data_object_type": "GOTTCHA2 Classification Report", + "url": "https://data.microbiomedata.org/data/nmdc:omprc-11-qngh7497/nmdc:wfrbt-11-z1vfdd44.1/nmdc_wfrbt-11-z1vfdd44.1_gottcha2_report.tsv", + "type": "nmdc:DataObject" + }, + { + "id": "nmdc:dobj-11-ragjd254", + "name": "nmdc_wfrbt-11-z1vfdd44.1_gottcha2_report_full.tsv", + "description": "Gottcha2 full TSV report for nmdc:omprc-11-qngh7497", + "file_size_bytes": 1182538, + "md5_checksum": "c1730daf5e6017219fd9fc079e42c132", + "data_object_type": "GOTTCHA2 Report Full", + "url": "https://data.microbiomedata.org/data/nmdc:omprc-11-qngh7497/nmdc:wfrbt-11-z1vfdd44.1/nmdc_wfrbt-11-z1vfdd44.1_gottcha2_report_full.tsv", + "type": "nmdc:DataObject" + }, + { + "id": "nmdc:dobj-11-3rjqwc26", + "name": "nmdc_wfrbt-11-z1vfdd44.1_gottcha2_krona.html", + "description": "Gottcha2 Krona HTML report for nmdc:omprc-11-qngh7497", + "file_size_bytes": 276802, + "md5_checksum": "55b6c047c48f5bf9fb156f139992e4d8", + "data_object_type": "GOTTCHA2 Krona Plot", + "url": "https://data.microbiomedata.org/data/nmdc:omprc-11-qngh7497/nmdc:wfrbt-11-z1vfdd44.1/nmdc_wfrbt-11-z1vfdd44.1_gottcha2_krona.html", + "type": "nmdc:DataObject" + }, + { + "id": "nmdc:dobj-11-zf13f858", + "name": "nmdc_wfrbt-11-z1vfdd44.1_centrifuge_classification.tsv", + "description": "Centrifuge classification TSV report for nmdc:omprc-11-qngh7497", + "file_size_bytes": 4716470614, + "md5_checksum": "1c2e2dff881b35a25b4622bbc66c3140", + "data_object_type": "Centrifuge Taxonomic Classification", + "url": "https://data.microbiomedata.org/data/nmdc:omprc-11-qngh7497/nmdc:wfrbt-11-z1vfdd44.1/nmdc_wfrbt-11-z1vfdd44.1_centrifuge_classification.tsv", + "type": "nmdc:DataObject" + }, + { + "id": "nmdc:dobj-11-ymbka839", + "name": "nmdc_wfrbt-11-z1vfdd44.1_centrifuge_report.tsv", + "description": "Centrifuge TSV report for nmdc:omprc-11-qngh7497", + "file_size_bytes": 267231, + "md5_checksum": "50f771c7bc17a0b184c2a10a24013f08", + "data_object_type": "Centrifuge Classification Report", + "url": "https://data.microbiomedata.org/data/nmdc:omprc-11-qngh7497/nmdc:wfrbt-11-z1vfdd44.1/nmdc_wfrbt-11-z1vfdd44.1_centrifuge_report.tsv", + "type": "nmdc:DataObject" + }, + { + "id": "nmdc:dobj-11-wc4s4w16", + "name": "nmdc_wfrbt-11-z1vfdd44.1_centrifuge_krona.html", + "description": "Centrifuge Krona HTML report for nmdc:omprc-11-qngh7497", + "file_size_bytes": 2356003, + "md5_checksum": "229017cdb1832bb718d22dc27db44125", + "data_object_type": "Centrifuge Krona Plot", + "url": "https://data.microbiomedata.org/data/nmdc:omprc-11-qngh7497/nmdc:wfrbt-11-z1vfdd44.1/nmdc_wfrbt-11-z1vfdd44.1_centrifuge_krona.html", + "type": "nmdc:DataObject" + }, + { + "id": "nmdc:dobj-11-znv5m089", + "name": "nmdc_wfrbt-11-z1vfdd44.1_kraken2_classification.tsv", + "description": "Kraken classification TSV report for nmdc:omprc-11-qngh7497", + "file_size_bytes": 3857487871, + "md5_checksum": "49d5d11132bd5a02c3dd077d42a6a16b", + "data_object_type": "Kraken2 Taxonomic Classification", + "url": "https://data.microbiomedata.org/data/nmdc:omprc-11-qngh7497/nmdc:wfrbt-11-z1vfdd44.1/nmdc_wfrbt-11-z1vfdd44.1_kraken2_classification.tsv", + "type": "nmdc:DataObject" + }, + { + "id": "nmdc:dobj-11-sx1kba78", + "name": "nmdc_wfrbt-11-z1vfdd44.1_kraken2_report.tsv", + "description": "Kraken2 TSV report for nmdc:omprc-11-qngh7497", + "file_size_bytes": 708598, + "md5_checksum": "bdd701b44e67929ec8bbe279697da937", + "data_object_type": "Kraken2 Classification Report", + "url": "https://data.microbiomedata.org/data/nmdc:omprc-11-qngh7497/nmdc:wfrbt-11-z1vfdd44.1/nmdc_wfrbt-11-z1vfdd44.1_kraken2_report.tsv", + "type": "nmdc:DataObject" + }, + { + "id": "nmdc:dobj-11-40nwga04", + "name": "nmdc_wfrbt-11-z1vfdd44.1_kraken2_krona.html", + "description": "Kraken2 Krona HTML report for nmdc:omprc-11-qngh7497", + "file_size_bytes": 4250180, + "md5_checksum": "d35583a5ed45df5a58bf084fc67bf988", + "data_object_type": "Kraken2 Krona Plot", + "url": "https://data.microbiomedata.org/data/nmdc:omprc-11-qngh7497/nmdc:wfrbt-11-z1vfdd44.1/nmdc_wfrbt-11-z1vfdd44.1_kraken2_krona.html", + "type": "nmdc:DataObject" + } + ], + "metagenome_assembly_set": [ + { + "id": "nmdc:wfmgas-11-0q0rh178.1", + "name": "Metagenome Assembly Activity for nmdc:omprc-11-qngh7497", + "started_at_time": "2021-10-11T02:24:49Z", + "ended_at_time": "2021-10-11T06:26:42+00:00", + "was_informed_by": "nmdc:omprc-11-qngh7497", + "execution_resource": "NERSC-Cori", + "git_url": "https://github.com/microbiomedata/metaAssembly", + "has_input": [ + "nmdc:dobj-11-t9s7ky43" + ], + "has_output": [ + "nmdc:dobj-11-d0p5c469", + "nmdc:dobj-11-28hffr46", + "nmdc:dobj-11-b3amzx70", + "nmdc:dobj-11-bk51qr75", + "nmdc:dobj-11-sp1nyv42" + ], + "type": "nmdc:MetagenomeAssembly", + "part_of": [ + "nmdc:omprc-11-qngh7497" + ], + "version": "v1.0.2", + "asm_score": 13.853, + "scaffolds": 543003, + "scaf_logsum": 442802, + "scaf_powsum": 55815, + "scaf_max": 582605, + "scaf_bp": 229858665, + "scaf_n50": 164840, + "scaf_n90": 466121, + "scaf_l50": 378, + "scaf_l90": 283, + "scaf_n_gt50k": 23, + "scaf_l_gt50k": 2790937, + "scaf_pct_gt50k": 1.2141969, + "contigs": 548764, + "contig_bp": 229799767, + "ctg_n50": 171281, + "ctg_l50": 375, + "ctg_n90": 471697, + "ctg_l90": 283, + "ctg_logsum": 407938, + "ctg_powsum": 50872, + "ctg_max": 464697, + "gap_pct": 0.02562, + "gc_std": 0.11035, + "gc_avg": 0.55184 + } + ], + "omics_processing_set": [ + { + "id": "nmdc:omprc-11-qngh7497", + "name": "Sand microcosm microbial communities from a hyporheic zone in Columbia River, Washington, USA - GW-RW T4_25-Nov-14", + "description": "Sterilized sand packs were incubated back in the ground and collected at time point T4.", + "has_input": [ + "nmdc:bsm-11-8362vs44" + ], + "add_date": "2015-05-28", + "gold_sequencing_project_identifiers": [ + "gold:Gp0115677" + ], + "has_output": [ + "nmdc:dobj-11-p9rq4261" + ], + "mod_date": "2021-06-15", + "ncbi_project_name": "Sand microcosm microbial communities from a hyporheic zone in Columbia River, Washington, USA - GW-RW T4_25-Nov-14", + "omics_type": { + "has_raw_value": "Metagenome" + }, + "part_of": [ + "nmdc:sty-11-aygzgv51" + ], + "principal_investigator": { + "has_raw_value": "James Stegen" + }, + "processing_institution": "JGI", + "type": "nmdc:OmicsProcessing" + } + ], + "read_qc_analysis_activity_set": [ + { + "id": "nmdc:wfrqc-11-0dj1cv23.1", + "name": "Read QC Activity for nmdc:omprc-11-qngh7497", + "started_at_time": "2021-10-11T02:24:49Z", + "ended_at_time": "2021-10-11T06:26:42+00:00", + "was_informed_by": "nmdc:omprc-11-qngh7497", + "execution_resource": "NERSC-Cori", + "git_url": "https://github.com/microbiomedata/ReadsQC", + "has_input": [ + "nmdc:dobj-11-p9rq4261" + ], + "has_output": [ + "nmdc:dobj-11-t9s7ky43", + "nmdc:dobj-11-840rhn20" + ], + "type": "nmdc:ReadQcAnalysisActivity", + "part_of": [ + "nmdc:omprc-11-qngh7497" + ], + "version": "v1.0.2", + "input_read_count": 65434428, + "output_read_count": 64887080, + "input_read_bases": 9880598628, + "output_read_bases": 9483843059 + } + ], + "read_based_taxonomy_analysis_activity_set": [ + { + "id": "nmdc:wfrbt-11-z1vfdd44.1", + "name": "Readbased Taxonomy Analysis Activity for nmdc:omprc-11-qngh7497", + "started_at_time": "2021-10-11T02:24:49Z", + "ended_at_time": "2021-10-11T06:26:42+00:00", + "was_informed_by": "nmdc:omprc-11-qngh7497", + "execution_resource": "NERSC-Cori", + "git_url": "https://github.com/microbiomedata/ReadbasedAnalysis", + "has_input": [ + "nmdc:dobj-11-t9s7ky43" + ], + "has_output": [ + "nmdc:dobj-11-rekwdb49", + "nmdc:dobj-11-ragjd254", + "nmdc:dobj-11-3rjqwc26", + "nmdc:dobj-11-zf13f858", + "nmdc:dobj-11-ymbka839", + "nmdc:dobj-11-wc4s4w16", + "nmdc:dobj-11-znv5m089", + "nmdc:dobj-11-sx1kba78", + "nmdc:dobj-11-40nwga04" + ], + "type": "nmdc:ReadBasedTaxonomyAnalysisActivity", + "part_of": [ + "nmdc:omprc-11-qngh7497" + ], + "version": "v1.0.2" + } + ] + }, + { + "data_object_set": [ + { + "id": "nmdc:dobj-11-phjw3w62", + "name": "9387.2.132031.GGCTAC.fastq.gz", + "description": "Raw sequencer read data", + "file_size_bytes": 1777604881, + "data_object_type": "Metagenome Raw Reads", + "type": "nmdc:DataObject" + }, + { + "id": "nmdc:dobj-11-err66c44", + "name": "nmdc_wfrqc-11-cd102r17.1_filtered.fastq.gz", + "description": "Filtered Reads for nmdc:omprc-11-jk7zjz92", + "file_size_bytes": 1533239347, + "md5_checksum": "54e3a71218d04224719e0dc8a7fdf9c7", + "data_object_type": "Filtered Sequencing Reads", + "url": "https://data.microbiomedata.org/data/nmdc:omprc-11-jk7zjz92/nmdc:wfrqc-11-cd102r17.1/nmdc_wfrqc-11-cd102r17.1_filtered.fastq.gz", + "type": "nmdc:DataObject" + }, + { + "id": "nmdc:dobj-11-rh6r3j07", + "name": "nmdc_wfrqc-11-cd102r17.1_filterStats.txt", + "description": "Filtered Stats for nmdc:omprc-11-jk7zjz92", + "file_size_bytes": 287, + "md5_checksum": "2507e3f107100ce0c72c57191d450818", + "data_object_type": "QC Statistics", + "url": "https://data.microbiomedata.org/data/nmdc:omprc-11-jk7zjz92/nmdc:wfrqc-11-cd102r17.1/nmdc_wfrqc-11-cd102r17.1_filterStats.txt", + "type": "nmdc:DataObject" + }, + { + "id": "nmdc:dobj-11-wwjavj57", + "name": "nmdc_wfmgas-11-1pt7h674.1_contigs.fna", + "description": "Assembled contigs fasta for nmdc:omprc-11-jk7zjz92", + "file_size_bytes": 42147499, + "md5_checksum": "f90412108d18555221fae5e071b9f92f", + "data_object_type": "Assembly Contigs", + "url": "https://data.microbiomedata.org/data/nmdc:omprc-11-jk7zjz92/nmdc:wfmgas-11-1pt7h674.1/nmdc_wfmgas-11-1pt7h674.1_contigs.fna", + "type": "nmdc:DataObject" + }, + { + "id": "nmdc:dobj-11-gp436019", + "name": "nmdc_wfmgas-11-1pt7h674.1_scaffolds.fna", + "description": "Assembled scaffold fasta for nmdc:omprc-11-jk7zjz92", + "file_size_bytes": 41901870, + "md5_checksum": "1f5c8ce1d48dbe909d479b51eaf1be9d", + "data_object_type": "Assembly Scaffolds", + "url": "https://data.microbiomedata.org/data/nmdc:omprc-11-jk7zjz92/nmdc:wfmgas-11-1pt7h674.1/nmdc_wfmgas-11-1pt7h674.1_scaffolds.fna", + "type": "nmdc:DataObject" + }, + { + "id": "nmdc:dobj-11-zzg1fq83", + "name": "nmdc_wfmgas-11-1pt7h674.1_covstats.txt", + "description": "Metagenome Contig Coverage Stats for nmdc:omprc-11-jk7zjz92", + "file_size_bytes": 6824013, + "md5_checksum": "a5d5ceaf949187ccef91559b72121b7b", + "data_object_type": "Assembly Coverage Stats", + "url": "https://data.microbiomedata.org/data/nmdc:omprc-11-jk7zjz92/nmdc:wfmgas-11-1pt7h674.1/nmdc_wfmgas-11-1pt7h674.1_covstats.txt", + "type": "nmdc:DataObject" + }, + { + "id": "nmdc:dobj-11-kvkp7590", + "name": "nmdc_wfmgas-11-1pt7h674.1_assembly.agp", + "description": "Assembled AGP file for nmdc:omprc-11-jk7zjz92", + "file_size_bytes": 6873472, + "md5_checksum": "91c6d089babb8a4d9d97c2456b7ec8a5", + "data_object_type": "Assembly AGP", + "url": "https://data.microbiomedata.org/data/nmdc:omprc-11-jk7zjz92/nmdc:wfmgas-11-1pt7h674.1/nmdc_wfmgas-11-1pt7h674.1_assembly.agp", + "type": "nmdc:DataObject" + }, + { + "id": "nmdc:dobj-11-4c5jbd45", + "name": "nmdc_wfmgas-11-1pt7h674.1_pairedMapped_sorted.bam", + "description": "Metagenome Alignment BAM file for nmdc:omprc-11-jk7zjz92", + "file_size_bytes": 0, + "md5_checksum": "d41d8cd98f00b204e9800998ecf8427e", + "data_object_type": "Assembly Coverage BAM", + "url": "https://data.microbiomedata.org/data/nmdc:omprc-11-jk7zjz92/nmdc:wfmgas-11-1pt7h674.1/nmdc_wfmgas-11-1pt7h674.1_pairedMapped_sorted.bam", + "type": "nmdc:DataObject" + }, + { + "id": "nmdc:dobj-11-7tjh4125", + "name": "nmdc_wfrbt-11-w8kab509.1_gottcha2_report.tsv", + "description": "Gottcha2 TSV report for nmdc:omprc-11-jk7zjz92", + "file_size_bytes": 8921, + "md5_checksum": "60d673988c4f4447feb5985e8501e914", + "data_object_type": "GOTTCHA2 Classification Report", + "url": "https://data.microbiomedata.org/data/nmdc:omprc-11-jk7zjz92/nmdc:wfrbt-11-w8kab509.1/nmdc_wfrbt-11-w8kab509.1_gottcha2_report.tsv", + "type": "nmdc:DataObject" + }, + { + "id": "nmdc:dobj-11-drtyae39", + "name": "nmdc_wfrbt-11-w8kab509.1_gottcha2_report_full.tsv", + "description": "Gottcha2 full TSV report for nmdc:omprc-11-jk7zjz92", + "file_size_bytes": 871109, + "md5_checksum": "a8f93ed13033eb949109b4e83980a893", + "data_object_type": "GOTTCHA2 Report Full", + "url": "https://data.microbiomedata.org/data/nmdc:omprc-11-jk7zjz92/nmdc:wfrbt-11-w8kab509.1/nmdc_wfrbt-11-w8kab509.1_gottcha2_report_full.tsv", + "type": "nmdc:DataObject" + }, + { + "id": "nmdc:dobj-11-9d7skb15", + "name": "nmdc_wfrbt-11-w8kab509.1_gottcha2_krona.html", + "description": "Gottcha2 Krona HTML report for nmdc:omprc-11-jk7zjz92", + "file_size_bytes": 252578, + "md5_checksum": "31dd6eb616f1e9815778453ab1601195", + "data_object_type": "GOTTCHA2 Krona Plot", + "url": "https://data.microbiomedata.org/data/nmdc:omprc-11-jk7zjz92/nmdc:wfrbt-11-w8kab509.1/nmdc_wfrbt-11-w8kab509.1_gottcha2_krona.html", + "type": "nmdc:DataObject" + }, + { + "id": "nmdc:dobj-11-xstndk11", + "name": "nmdc_wfrbt-11-w8kab509.1_centrifuge_classification.tsv", + "description": "Centrifuge classification TSV report for nmdc:omprc-11-jk7zjz92", + "file_size_bytes": 1218767711, + "md5_checksum": "6d7a930d79f220b06cde8fbf8339e744", + "data_object_type": "Centrifuge Taxonomic Classification", + "url": "https://data.microbiomedata.org/data/nmdc:omprc-11-jk7zjz92/nmdc:wfrbt-11-w8kab509.1/nmdc_wfrbt-11-w8kab509.1_centrifuge_classification.tsv", + "type": "nmdc:DataObject" + }, + { + "id": "nmdc:dobj-11-dh10hq66", + "name": "nmdc_wfrbt-11-w8kab509.1_centrifuge_report.tsv", + "description": "Centrifuge TSV report for nmdc:omprc-11-jk7zjz92", + "file_size_bytes": 254260, + "md5_checksum": "0aaac507db0e29827e1c87df47324932", + "data_object_type": "Centrifuge Classification Report", + "url": "https://data.microbiomedata.org/data/nmdc:omprc-11-jk7zjz92/nmdc:wfrbt-11-w8kab509.1/nmdc_wfrbt-11-w8kab509.1_centrifuge_report.tsv", + "type": "nmdc:DataObject" + }, + { + "id": "nmdc:dobj-11-ew7awv32", + "name": "nmdc_wfrbt-11-w8kab509.1_centrifuge_krona.html", + "description": "Centrifuge Krona HTML report for nmdc:omprc-11-jk7zjz92", + "file_size_bytes": 2324387, + "md5_checksum": "6aec8677139ed24ef9cfe0c75b30056f", + "data_object_type": "Centrifuge Krona Plot", + "url": "https://data.microbiomedata.org/data/nmdc:omprc-11-jk7zjz92/nmdc:wfrbt-11-w8kab509.1/nmdc_wfrbt-11-w8kab509.1_centrifuge_krona.html", + "type": "nmdc:DataObject" + }, + { + "id": "nmdc:dobj-11-0ex35252", + "name": "nmdc_wfrbt-11-w8kab509.1_kraken2_classification.tsv", + "description": "Kraken classification TSV report for nmdc:omprc-11-jk7zjz92", + "file_size_bytes": 1001846607, + "md5_checksum": "d39369f32ada967d7cf52cb503fccf4a", + "data_object_type": "Kraken2 Taxonomic Classification", + "url": "https://data.microbiomedata.org/data/nmdc:omprc-11-jk7zjz92/nmdc:wfrbt-11-w8kab509.1/nmdc_wfrbt-11-w8kab509.1_kraken2_classification.tsv", + "type": "nmdc:DataObject" + }, + { + "id": "nmdc:dobj-11-kbrfgs82", + "name": "nmdc_wfrbt-11-w8kab509.1_kraken2_report.tsv", + "description": "Kraken2 TSV report for nmdc:omprc-11-jk7zjz92", + "file_size_bytes": 635541, + "md5_checksum": "1ec0247d86889fcef13f39a58a92b066", + "data_object_type": "Kraken2 Classification Report", + "url": "https://data.microbiomedata.org/data/nmdc:omprc-11-jk7zjz92/nmdc:wfrbt-11-w8kab509.1/nmdc_wfrbt-11-w8kab509.1_kraken2_report.tsv", + "type": "nmdc:DataObject" + }, + { + "id": "nmdc:dobj-11-fgv8nt05", + "name": "nmdc_wfrbt-11-w8kab509.1_kraken2_krona.html", + "description": "Kraken2 Krona HTML report for nmdc:omprc-11-jk7zjz92", + "file_size_bytes": 3968420, + "md5_checksum": "242a1c60f6cb14ba8430375171fda436", + "data_object_type": "Kraken2 Krona Plot", + "url": "https://data.microbiomedata.org/data/nmdc:omprc-11-jk7zjz92/nmdc:wfrbt-11-w8kab509.1/nmdc_wfrbt-11-w8kab509.1_kraken2_krona.html", + "type": "nmdc:DataObject" + } + ], + "metagenome_assembly_set": [ + { + "id": "nmdc:wfmgas-11-1pt7h674.1", + "name": "Metagenome Assembly Activity for nmdc:omprc-11-jk7zjz92", + "started_at_time": "2021-10-11T02:28:05Z", + "ended_at_time": "2021-10-11T03:25:21+00:00", + "was_informed_by": "nmdc:omprc-11-jk7zjz92", + "execution_resource": "NERSC-Cori", + "git_url": "https://github.com/microbiomedata/metaAssembly", + "has_input": [ + "nmdc:dobj-11-err66c44" + ], + "has_output": [ + "nmdc:dobj-11-wwjavj57", + "nmdc:dobj-11-gp436019", + "nmdc:dobj-11-zzg1fq83", + "nmdc:dobj-11-kvkp7590", + "nmdc:dobj-11-4c5jbd45" + ], + "type": "nmdc:MetagenomeAssembly", + "part_of": [ + "nmdc:omprc-11-jk7zjz92" + ], + "version": "v1.0.2", + "asm_score": 4.718, + "scaffolds": 80703, + "scaf_logsum": 116377, + "scaf_powsum": 13311, + "scaf_max": 25635, + "scaf_bp": 38573126, + "scaf_n50": 19754, + "scaf_n90": 68272, + "scaf_l50": 436, + "scaf_l90": 284, + "contigs": 80858, + "contig_bp": 38571486, + "ctg_n50": 19932, + "ctg_l50": 435, + "ctg_n90": 68422, + "ctg_l90": 284, + "ctg_logsum": 115425, + "ctg_powsum": 13174, + "ctg_max": 25635, + "gap_pct": 0.00425, + "gc_std": 0.10716, + "gc_avg": 0.56103 + } + ], + "omics_processing_set": [ + { + "id": "nmdc:omprc-11-jk7zjz92", + "name": "Sand microcosm microbial communities from a hyporheic zone in Columbia River, Washington, USA- GW-RW T4_22-July-14", + "description": "Sterilized sand packs were incubated back in the ground and collected at time point T4.", + "has_input": [ + "nmdc:bsm-11-a5d23e19" + ], + "add_date": "2015-05-28", + "gold_sequencing_project_identifiers": [ + "gold:Gp0115675" + ], + "has_output": [ + "nmdc:dobj-11-phjw3w62" + ], + "mod_date": "2021-06-15", + "ncbi_project_name": "Sand microcosm microbial communities from a hyporheic zone in Columbia River, Washington, USA- GW-RW T4_22-July-14", + "omics_type": { + "has_raw_value": "Metagenome" + }, + "part_of": [ + "nmdc:sty-11-aygzgv51" + ], + "principal_investigator": { + "has_raw_value": "James Stegen" + }, + "processing_institution": "JGI", + "type": "nmdc:OmicsProcessing" + } + ], + "read_qc_analysis_activity_set": [ + { + "id": "nmdc:wfrqc-11-cd102r17.1", + "name": "Read QC Activity for nmdc:omprc-11-jk7zjz92", + "started_at_time": "2021-10-11T02:28:05Z", + "ended_at_time": "2021-10-11T03:25:21+00:00", + "was_informed_by": "nmdc:omprc-11-jk7zjz92", + "execution_resource": "NERSC-Cori", + "git_url": "https://github.com/microbiomedata/ReadsQC", + "has_input": [ + "nmdc:dobj-11-phjw3w62" + ], + "has_output": [ + "nmdc:dobj-11-err66c44", + "nmdc:dobj-11-rh6r3j07" + ], + "type": "nmdc:ReadQcAnalysisActivity", + "part_of": [ + "nmdc:omprc-11-jk7zjz92" + ], + "version": "v1.0.2", + "input_read_count": 18827380, + "output_read_count": 16749572, + "input_read_bases": 2842934380, + "output_read_bases": 2508839784 + } + ], + "read_based_taxonomy_analysis_activity_set": [ + { + "id": "nmdc:wfrbt-11-w8kab509.1", + "name": "Readbased Taxonomy Analysis Activity for nmdc:omprc-11-jk7zjz92", + "started_at_time": "2021-10-11T02:28:05Z", + "ended_at_time": "2021-10-11T03:25:21+00:00", + "was_informed_by": "nmdc:omprc-11-jk7zjz92", + "execution_resource": "NERSC-Cori", + "git_url": "https://github.com/microbiomedata/ReadbasedAnalysis", + "has_input": [ + "nmdc:dobj-11-err66c44" + ], + "has_output": [ + "nmdc:dobj-11-7tjh4125", + "nmdc:dobj-11-drtyae39", + "nmdc:dobj-11-9d7skb15", + "nmdc:dobj-11-xstndk11", + "nmdc:dobj-11-dh10hq66", + "nmdc:dobj-11-ew7awv32", + "nmdc:dobj-11-0ex35252", + "nmdc:dobj-11-kbrfgs82", + "nmdc:dobj-11-fgv8nt05" + ], + "type": "nmdc:ReadBasedTaxonomyAnalysisActivity", + "part_of": [ + "nmdc:omprc-11-jk7zjz92" + ], + "version": "v1.0.2" + } + ] + }, + { + "data_object_set": [ + { + "id": "nmdc:dobj-11-7pwab132", + "name": "9491.1.134352.AGTTCC.fastq.gz", + "description": "Raw sequencer read data", + "file_size_bytes": 4637325661, + "data_object_type": "Metagenome Raw Reads", + "type": "nmdc:DataObject" + }, + { + "id": "nmdc:dobj-11-fhq51509", + "name": "nmdc_wfrqc-11-nn1n1282.1_filtered.fastq.gz", + "description": "Filtered Reads for nmdc:omprc-11-2jt0jk84", + "file_size_bytes": 4096192298, + "md5_checksum": "b0462e18cf9dafc9d2207a58bf085530", + "data_object_type": "Filtered Sequencing Reads", + "url": "https://data.microbiomedata.org/data/nmdc:omprc-11-2jt0jk84/nmdc:wfrqc-11-nn1n1282.1/nmdc_wfrqc-11-nn1n1282.1_filtered.fastq.gz", + "type": "nmdc:DataObject" + }, + { + "id": "nmdc:dobj-11-jzj1zx14", + "name": "nmdc_wfrqc-11-nn1n1282.1_filterStats.txt", + "description": "Filtered Stats for nmdc:omprc-11-2jt0jk84", + "file_size_bytes": 291, + "md5_checksum": "f0e1b9004b0e9aafb06c444444a522c7", + "data_object_type": "QC Statistics", + "url": "https://data.microbiomedata.org/data/nmdc:omprc-11-2jt0jk84/nmdc:wfrqc-11-nn1n1282.1/nmdc_wfrqc-11-nn1n1282.1_filterStats.txt", + "type": "nmdc:DataObject" + }, + { + "id": "nmdc:dobj-11-23jjn849", + "name": "nmdc_wfmgas-11-1ewhg081.1_contigs.fna", + "description": "Assembled contigs fasta for nmdc:omprc-11-2jt0jk84", + "file_size_bytes": 187992981, + "md5_checksum": "dc6eb6bde5200aafa1ded041e708585e", + "data_object_type": "Assembly Contigs", + "url": "https://data.microbiomedata.org/data/nmdc:omprc-11-2jt0jk84/nmdc:wfmgas-11-1ewhg081.1/nmdc_wfmgas-11-1ewhg081.1_contigs.fna", + "type": "nmdc:DataObject" + }, + { + "id": "nmdc:dobj-11-kt361681", + "name": "nmdc_wfmgas-11-1ewhg081.1_scaffolds.fna", + "description": "Assembled scaffold fasta for nmdc:omprc-11-2jt0jk84", + "file_size_bytes": 186929972, + "md5_checksum": "dd760ebb7b4c984aa39d130f6a9a3c26", + "data_object_type": "Assembly Scaffolds", + "url": "https://data.microbiomedata.org/data/nmdc:omprc-11-2jt0jk84/nmdc:wfmgas-11-1ewhg081.1/nmdc_wfmgas-11-1ewhg081.1_scaffolds.fna", + "type": "nmdc:DataObject" + }, + { + "id": "nmdc:dobj-11-0f9qaa60", + "name": "nmdc_wfmgas-11-1ewhg081.1_covstats.txt", + "description": "Metagenome Contig Coverage Stats for nmdc:omprc-11-2jt0jk84", + "file_size_bytes": 30074125, + "md5_checksum": "fe2ababceac7294c0b27ae2beeac9d9f", + "data_object_type": "Assembly Coverage Stats", + "url": "https://data.microbiomedata.org/data/nmdc:omprc-11-2jt0jk84/nmdc:wfmgas-11-1ewhg081.1/nmdc_wfmgas-11-1ewhg081.1_covstats.txt", + "type": "nmdc:DataObject" + }, + { + "id": "nmdc:dobj-11-v0ha9r77", + "name": "nmdc_wfmgas-11-1ewhg081.1_assembly.agp", + "description": "Assembled AGP file for nmdc:omprc-11-2jt0jk84", + "file_size_bytes": 30476826, + "md5_checksum": "7d0d110cfe22d9f6a42d1914b3048026", + "data_object_type": "Assembly AGP", + "url": "https://data.microbiomedata.org/data/nmdc:omprc-11-2jt0jk84/nmdc:wfmgas-11-1ewhg081.1/nmdc_wfmgas-11-1ewhg081.1_assembly.agp", + "type": "nmdc:DataObject" + }, + { + "id": "nmdc:dobj-11-bwkzr247", + "name": "nmdc_wfmgas-11-1ewhg081.1_pairedMapped_sorted.bam", + "description": "Metagenome Alignment BAM file for nmdc:omprc-11-2jt0jk84", + "file_size_bytes": 0, + "md5_checksum": "d41d8cd98f00b204e9800998ecf8427e", + "data_object_type": "Assembly Coverage BAM", + "url": "https://data.microbiomedata.org/data/nmdc:omprc-11-2jt0jk84/nmdc:wfmgas-11-1ewhg081.1/nmdc_wfmgas-11-1ewhg081.1_pairedMapped_sorted.bam", + "type": "nmdc:DataObject" + }, + { + "id": "nmdc:dobj-11-c8h2mt77", + "name": "nmdc_wfrbt-11-djtn7g44.1_gottcha2_report.tsv", + "description": "Gottcha2 TSV report for nmdc:omprc-11-2jt0jk84", + "file_size_bytes": 18015, + "md5_checksum": "432fedddcbacb4e69c0350354ab44080", + "data_object_type": "GOTTCHA2 Classification Report", + "url": "https://data.microbiomedata.org/data/nmdc:omprc-11-2jt0jk84/nmdc:wfrbt-11-djtn7g44.1/nmdc_wfrbt-11-djtn7g44.1_gottcha2_report.tsv", + "type": "nmdc:DataObject" + }, + { + "id": "nmdc:dobj-11-0r5k4063", + "name": "nmdc_wfrbt-11-djtn7g44.1_gottcha2_report_full.tsv", + "description": "Gottcha2 full TSV report for nmdc:omprc-11-2jt0jk84", + "file_size_bytes": 1283220, + "md5_checksum": "50b9a4c83b2ec0d1dd683cb8814ed5ad", + "data_object_type": "GOTTCHA2 Report Full", + "url": "https://data.microbiomedata.org/data/nmdc:omprc-11-2jt0jk84/nmdc:wfrbt-11-djtn7g44.1/nmdc_wfrbt-11-djtn7g44.1_gottcha2_report_full.tsv", + "type": "nmdc:DataObject" + }, + { + "id": "nmdc:dobj-11-hqwn6n70", + "name": "nmdc_wfrbt-11-djtn7g44.1_gottcha2_krona.html", + "description": "Gottcha2 Krona HTML report for nmdc:omprc-11-2jt0jk84", + "file_size_bytes": 281366, + "md5_checksum": "e3d7339ba5c7677be13854f391462474", + "data_object_type": "GOTTCHA2 Krona Plot", + "url": "https://data.microbiomedata.org/data/nmdc:omprc-11-2jt0jk84/nmdc:wfrbt-11-djtn7g44.1/nmdc_wfrbt-11-djtn7g44.1_gottcha2_krona.html", + "type": "nmdc:DataObject" + }, + { + "id": "nmdc:dobj-11-xjmbaj75", + "name": "nmdc_wfrbt-11-djtn7g44.1_centrifuge_classification.tsv", + "description": "Centrifuge classification TSV report for nmdc:omprc-11-2jt0jk84", + "file_size_bytes": 3481369185, + "md5_checksum": "7bf922ee2f9fc298c031e2ff7d5abe0d", + "data_object_type": "Centrifuge Taxonomic Classification", + "url": "https://data.microbiomedata.org/data/nmdc:omprc-11-2jt0jk84/nmdc:wfrbt-11-djtn7g44.1/nmdc_wfrbt-11-djtn7g44.1_centrifuge_classification.tsv", + "type": "nmdc:DataObject" + }, + { + "id": "nmdc:dobj-11-m47n2325", + "name": "nmdc_wfrbt-11-djtn7g44.1_centrifuge_report.tsv", + "description": "Centrifuge TSV report for nmdc:omprc-11-2jt0jk84", + "file_size_bytes": 263480, + "md5_checksum": "33a20a77c3dc5b4feb102d66dfbfbe11", + "data_object_type": "Centrifuge Classification Report", + "url": "https://data.microbiomedata.org/data/nmdc:omprc-11-2jt0jk84/nmdc:wfrbt-11-djtn7g44.1/nmdc_wfrbt-11-djtn7g44.1_centrifuge_report.tsv", + "type": "nmdc:DataObject" + }, + { + "id": "nmdc:dobj-11-7445f556", + "name": "nmdc_wfrbt-11-djtn7g44.1_centrifuge_krona.html", + "description": "Centrifuge Krona HTML report for nmdc:omprc-11-2jt0jk84", + "file_size_bytes": 2347079, + "md5_checksum": "30bdf0aedf771221ca3f7f18ff4e0067", + "data_object_type": "Centrifuge Krona Plot", + "url": "https://data.microbiomedata.org/data/nmdc:omprc-11-2jt0jk84/nmdc:wfrbt-11-djtn7g44.1/nmdc_wfrbt-11-djtn7g44.1_centrifuge_krona.html", + "type": "nmdc:DataObject" + }, + { + "id": "nmdc:dobj-11-4g2ckr79", + "name": "nmdc_wfrbt-11-djtn7g44.1_kraken2_classification.tsv", + "description": "Kraken classification TSV report for nmdc:omprc-11-2jt0jk84", + "file_size_bytes": 2866138771, + "md5_checksum": "8e21ac30de17de0d1051d7d223d0aa0f", + "data_object_type": "Kraken2 Taxonomic Classification", + "url": "https://data.microbiomedata.org/data/nmdc:omprc-11-2jt0jk84/nmdc:wfrbt-11-djtn7g44.1/nmdc_wfrbt-11-djtn7g44.1_kraken2_classification.tsv", + "type": "nmdc:DataObject" + }, + { + "id": "nmdc:dobj-11-ffd01y46", + "name": "nmdc_wfrbt-11-djtn7g44.1_kraken2_report.tsv", + "description": "Kraken2 TSV report for nmdc:omprc-11-2jt0jk84", + "file_size_bytes": 728030, + "md5_checksum": "64459bec7843953a70f8ea2b09a7e9de", + "data_object_type": "Kraken2 Classification Report", + "url": "https://data.microbiomedata.org/data/nmdc:omprc-11-2jt0jk84/nmdc:wfrbt-11-djtn7g44.1/nmdc_wfrbt-11-djtn7g44.1_kraken2_report.tsv", + "type": "nmdc:DataObject" + }, + { + "id": "nmdc:dobj-11-4jnt7607", + "name": "nmdc_wfrbt-11-djtn7g44.1_kraken2_krona.html", + "description": "Kraken2 Krona HTML report for nmdc:omprc-11-2jt0jk84", + "file_size_bytes": 4374689, + "md5_checksum": "9aa0ec113eb8dd22e7f574216d1760b2", + "data_object_type": "Kraken2 Krona Plot", + "url": "https://data.microbiomedata.org/data/nmdc:omprc-11-2jt0jk84/nmdc:wfrbt-11-djtn7g44.1/nmdc_wfrbt-11-djtn7g44.1_kraken2_krona.html", + "type": "nmdc:DataObject" + } + ], + "metagenome_assembly_set": [ + { + "id": "nmdc:wfmgas-11-1ewhg081.1", + "name": "Metagenome Assembly Activity for nmdc:omprc-11-2jt0jk84", + "started_at_time": "2021-10-11T02:28:54Z", + "ended_at_time": "2021-10-11T06:19:29+00:00", + "was_informed_by": "nmdc:omprc-11-2jt0jk84", + "execution_resource": "NERSC-Cori", + "git_url": "https://github.com/microbiomedata/metaAssembly", + "has_input": [ + "nmdc:dobj-11-fhq51509" + ], + "has_output": [ + "nmdc:dobj-11-23jjn849", + "nmdc:dobj-11-kt361681", + "nmdc:dobj-11-0f9qaa60", + "nmdc:dobj-11-v0ha9r77", + "nmdc:dobj-11-bwkzr247" + ], + "type": "nmdc:MetagenomeAssembly", + "part_of": [ + "nmdc:omprc-11-2jt0jk84" + ], + "version": "v1.0.2", + "asm_score": 5.768, + "scaffolds": 351728, + "scaf_logsum": 429769, + "scaf_powsum": 48321, + "scaf_max": 44931, + "scaf_bp": 172054628, + "scaf_n50": 95446, + "scaf_n90": 294658, + "scaf_l50": 468, + "scaf_l90": 289, + "contigs": 352055, + "contig_bp": 172051088, + "ctg_n50": 95561, + "ctg_l50": 468, + "ctg_n90": 294969, + "ctg_l90": 289, + "ctg_logsum": 427633, + "ctg_powsum": 48025, + "ctg_max": 44931, + "gap_pct": 0.00206, + "gc_std": 0.13027, + "gc_avg": 0.51918 + } + ], + "omics_processing_set": [ + { + "id": "nmdc:omprc-11-2jt0jk84", + "name": "Sand microcosm microbial communities from a hyporheic zone in Columbia River, Washington, USA - GW-RW T2_30-Apr-14", + "description": "Sterilized sand packs were incubated back in the ground and collected at time point T2.", + "has_input": [ + "nmdc:bsm-11-qjtgh002" + ], + "add_date": "2015-05-28", + "gold_sequencing_project_identifiers": [ + "gold:Gp0115665" + ], + "has_output": [ + "nmdc:dobj-11-7pwab132" + ], + "mod_date": "2021-06-15", + "ncbi_project_name": "Sand microcosm microbial communities from a hyporheic zone in Columbia River, Washington, USA - GW-RW T2_30-Apr-14", + "omics_type": { + "has_raw_value": "Metagenome" + }, + "part_of": [ + "nmdc:sty-11-aygzgv51" + ], + "principal_investigator": { + "has_raw_value": "James Stegen" + }, + "processing_institution": "JGI", + "type": "nmdc:OmicsProcessing" + } + ], + "read_qc_analysis_activity_set": [ + { + "id": "nmdc:wfrqc-11-nn1n1282.1", + "name": "Read QC Activity for nmdc:omprc-11-2jt0jk84", + "started_at_time": "2021-10-11T02:28:54Z", + "ended_at_time": "2021-10-11T06:19:29+00:00", + "was_informed_by": "nmdc:omprc-11-2jt0jk84", + "execution_resource": "NERSC-Cori", + "git_url": "https://github.com/microbiomedata/ReadsQC", + "has_input": [ + "nmdc:dobj-11-7pwab132" + ], + "has_output": [ + "nmdc:dobj-11-fhq51509", + "nmdc:dobj-11-jzj1zx14" + ], + "type": "nmdc:ReadQcAnalysisActivity", + "part_of": [ + "nmdc:omprc-11-2jt0jk84" + ], + "version": "v1.0.2", + "input_read_count": 50719572, + "output_read_count": 47896142, + "input_read_bases": 7658655372, + "output_read_bases": 7175148255 + } + ], + "read_based_taxonomy_analysis_activity_set": [ + { + "id": "nmdc:wfrbt-11-djtn7g44.1", + "name": "Readbased Taxonomy Analysis Activity for nmdc:omprc-11-2jt0jk84", + "started_at_time": "2021-10-11T02:28:54Z", + "ended_at_time": "2021-10-11T06:19:29+00:00", + "was_informed_by": "nmdc:omprc-11-2jt0jk84", + "execution_resource": "NERSC-Cori", + "git_url": "https://github.com/microbiomedata/ReadbasedAnalysis", + "has_input": [ + "nmdc:dobj-11-fhq51509" + ], + "has_output": [ + "nmdc:dobj-11-c8h2mt77", + "nmdc:dobj-11-0r5k4063", + "nmdc:dobj-11-hqwn6n70", + "nmdc:dobj-11-xjmbaj75", + "nmdc:dobj-11-m47n2325", + "nmdc:dobj-11-7445f556", + "nmdc:dobj-11-4g2ckr79", + "nmdc:dobj-11-ffd01y46", + "nmdc:dobj-11-4jnt7607" + ], + "type": "nmdc:ReadBasedTaxonomyAnalysisActivity", + "part_of": [ + "nmdc:omprc-11-2jt0jk84" + ], + "version": "v1.0.2" + } + ] + }, + { + "data_object_set": [ + { + "id": "nmdc:dobj-11-7g54r371", + "name": "9387.2.132031.GTAGAG.fastq.gz", + "description": "Raw sequencer read data", + "file_size_bytes": 1988838112, + "data_object_type": "Metagenome Raw Reads", + "type": "nmdc:DataObject" + }, + { + "id": "nmdc:dobj-11-g6sfg477", + "name": "nmdc_wfrqc-11-s9dhkx03.1_filtered.fastq.gz", + "description": "Filtered Reads for nmdc:omprc-11-hqmmwn16", + "file_size_bytes": 1806935637, + "md5_checksum": "6eef104db92b99c9741b26c667d75cd9", + "data_object_type": "Filtered Sequencing Reads", + "url": "https://data.microbiomedata.org/data/nmdc:omprc-11-hqmmwn16/nmdc:wfrqc-11-s9dhkx03.1/nmdc_wfrqc-11-s9dhkx03.1_filtered.fastq.gz", + "type": "nmdc:DataObject" + }, + { + "id": "nmdc:dobj-11-c0ay2314", + "name": "nmdc_wfrqc-11-s9dhkx03.1_filterStats.txt", + "description": "Filtered Stats for nmdc:omprc-11-hqmmwn16", + "file_size_bytes": 286, + "md5_checksum": "58fde3e96dbb28af9133bede850a2653", + "data_object_type": "QC Statistics", + "url": "https://data.microbiomedata.org/data/nmdc:omprc-11-hqmmwn16/nmdc:wfrqc-11-s9dhkx03.1/nmdc_wfrqc-11-s9dhkx03.1_filterStats.txt", + "type": "nmdc:DataObject" + }, + { + "id": "nmdc:dobj-11-39wjyp66", + "name": "nmdc_wfmgas-11-0ykzmy51.1_contigs.fna", + "description": "Assembled contigs fasta for nmdc:omprc-11-hqmmwn16", + "file_size_bytes": 59636118, + "md5_checksum": "c1f5ee06fff14c1480fcdf61e5e99749", + "data_object_type": "Assembly Contigs", + "url": "https://data.microbiomedata.org/data/nmdc:omprc-11-hqmmwn16/nmdc:wfmgas-11-0ykzmy51.1/nmdc_wfmgas-11-0ykzmy51.1_contigs.fna", + "type": "nmdc:DataObject" + }, + { + "id": "nmdc:dobj-11-08k21n12", + "name": "nmdc_wfmgas-11-0ykzmy51.1_scaffolds.fna", + "description": "Assembled scaffold fasta for nmdc:omprc-11-hqmmwn16", + "file_size_bytes": 59291823, + "md5_checksum": "6af0630c68c0491fb239cda2be53ff6b", + "data_object_type": "Assembly Scaffolds", + "url": "https://data.microbiomedata.org/data/nmdc:omprc-11-hqmmwn16/nmdc:wfmgas-11-0ykzmy51.1/nmdc_wfmgas-11-0ykzmy51.1_scaffolds.fna", + "type": "nmdc:DataObject" + }, + { + "id": "nmdc:dobj-11-zzpd8j23", + "name": "nmdc_wfmgas-11-0ykzmy51.1_covstats.txt", + "description": "Metagenome Contig Coverage Stats for nmdc:omprc-11-hqmmwn16", + "file_size_bytes": 9663313, + "md5_checksum": "445596d832e456b0b6e385a1bea11fb1", + "data_object_type": "Assembly Coverage Stats", + "url": "https://data.microbiomedata.org/data/nmdc:omprc-11-hqmmwn16/nmdc:wfmgas-11-0ykzmy51.1/nmdc_wfmgas-11-0ykzmy51.1_covstats.txt", + "type": "nmdc:DataObject" + }, + { + "id": "nmdc:dobj-11-pkh28878", + "name": "nmdc_wfmgas-11-0ykzmy51.1_assembly.agp", + "description": "Assembled AGP file for nmdc:omprc-11-hqmmwn16", + "file_size_bytes": 9728610, + "md5_checksum": "257c8869b7500b00e7135d1f9a7699d1", + "data_object_type": "Assembly AGP", + "url": "https://data.microbiomedata.org/data/nmdc:omprc-11-hqmmwn16/nmdc:wfmgas-11-0ykzmy51.1/nmdc_wfmgas-11-0ykzmy51.1_assembly.agp", + "type": "nmdc:DataObject" + }, + { + "id": "nmdc:dobj-11-03sd1550", + "name": "nmdc_wfmgas-11-0ykzmy51.1_pairedMapped_sorted.bam", + "description": "Metagenome Alignment BAM file for nmdc:omprc-11-hqmmwn16", + "file_size_bytes": 0, + "md5_checksum": "d41d8cd98f00b204e9800998ecf8427e", + "data_object_type": "Assembly Coverage BAM", + "url": "https://data.microbiomedata.org/data/nmdc:omprc-11-hqmmwn16/nmdc:wfmgas-11-0ykzmy51.1/nmdc_wfmgas-11-0ykzmy51.1_pairedMapped_sorted.bam", + "type": "nmdc:DataObject" + }, + { + "id": "nmdc:dobj-11-dshw3185", + "name": "nmdc_wfrbt-11-4rtk1927.1_gottcha2_report.tsv", + "description": "Gottcha2 TSV report for nmdc:omprc-11-hqmmwn16", + "file_size_bytes": 11362, + "md5_checksum": "05933784d02331b60b2531e2025cd3b7", + "data_object_type": "GOTTCHA2 Classification Report", + "url": "https://data.microbiomedata.org/data/nmdc:omprc-11-hqmmwn16/nmdc:wfrbt-11-4rtk1927.1/nmdc_wfrbt-11-4rtk1927.1_gottcha2_report.tsv", + "type": "nmdc:DataObject" + }, + { + "id": "nmdc:dobj-11-w2j0x746", + "name": "nmdc_wfrbt-11-4rtk1927.1_gottcha2_report_full.tsv", + "description": "Gottcha2 full TSV report for nmdc:omprc-11-hqmmwn16", + "file_size_bytes": 909325, + "md5_checksum": "50fc279637cb7048aaaeec9b223d0286", + "data_object_type": "GOTTCHA2 Report Full", + "url": "https://data.microbiomedata.org/data/nmdc:omprc-11-hqmmwn16/nmdc:wfrbt-11-4rtk1927.1/nmdc_wfrbt-11-4rtk1927.1_gottcha2_report_full.tsv", + "type": "nmdc:DataObject" + }, + { + "id": "nmdc:dobj-11-766mwx64", + "name": "nmdc_wfrbt-11-4rtk1927.1_gottcha2_krona.html", + "description": "Gottcha2 Krona HTML report for nmdc:omprc-11-hqmmwn16", + "file_size_bytes": 261412, + "md5_checksum": "c3add9c5d34e3ca719096ba3ba9b1c08", + "data_object_type": "GOTTCHA2 Krona Plot", + "url": "https://data.microbiomedata.org/data/nmdc:omprc-11-hqmmwn16/nmdc:wfrbt-11-4rtk1927.1/nmdc_wfrbt-11-4rtk1927.1_gottcha2_krona.html", + "type": "nmdc:DataObject" + }, + { + "id": "nmdc:dobj-11-mqn0q670", + "name": "nmdc_wfrbt-11-4rtk1927.1_centrifuge_classification.tsv", + "description": "Centrifuge classification TSV report for nmdc:omprc-11-hqmmwn16", + "file_size_bytes": 1481087410, + "md5_checksum": "2777a04ec7e23aff356bb4f2733e55b7", + "data_object_type": "Centrifuge Taxonomic Classification", + "url": "https://data.microbiomedata.org/data/nmdc:omprc-11-hqmmwn16/nmdc:wfrbt-11-4rtk1927.1/nmdc_wfrbt-11-4rtk1927.1_centrifuge_classification.tsv", + "type": "nmdc:DataObject" + }, + { + "id": "nmdc:dobj-11-ahc13632", + "name": "nmdc_wfrbt-11-4rtk1927.1_centrifuge_report.tsv", + "description": "Centrifuge TSV report for nmdc:omprc-11-hqmmwn16", + "file_size_bytes": 256139, + "md5_checksum": "de45d70cc01749e9b5691dc24674545d", + "data_object_type": "Centrifuge Classification Report", + "url": "https://data.microbiomedata.org/data/nmdc:omprc-11-hqmmwn16/nmdc:wfrbt-11-4rtk1927.1/nmdc_wfrbt-11-4rtk1927.1_centrifuge_report.tsv", + "type": "nmdc:DataObject" + }, + { + "id": "nmdc:dobj-11-vx8zg790", + "name": "nmdc_wfrbt-11-4rtk1927.1_centrifuge_krona.html", + "description": "Centrifuge Krona HTML report for nmdc:omprc-11-hqmmwn16", + "file_size_bytes": 2323658, + "md5_checksum": "534f97f3792b74385c4da305196a1b1d", + "data_object_type": "Centrifuge Krona Plot", + "url": "https://data.microbiomedata.org/data/nmdc:omprc-11-hqmmwn16/nmdc:wfrbt-11-4rtk1927.1/nmdc_wfrbt-11-4rtk1927.1_centrifuge_krona.html", + "type": "nmdc:DataObject" + }, + { + "id": "nmdc:dobj-11-yywqnh16", + "name": "nmdc_wfrbt-11-4rtk1927.1_kraken2_classification.tsv", + "description": "Kraken classification TSV report for nmdc:omprc-11-hqmmwn16", + "file_size_bytes": 1220980345, + "md5_checksum": "fc3e489df923ec344ac0cce7316f49d6", + "data_object_type": "Kraken2 Taxonomic Classification", + "url": "https://data.microbiomedata.org/data/nmdc:omprc-11-hqmmwn16/nmdc:wfrbt-11-4rtk1927.1/nmdc_wfrbt-11-4rtk1927.1_kraken2_classification.tsv", + "type": "nmdc:DataObject" + }, + { + "id": "nmdc:dobj-11-pve05893", + "name": "nmdc_wfrbt-11-4rtk1927.1_kraken2_report.tsv", + "description": "Kraken2 TSV report for nmdc:omprc-11-hqmmwn16", + "file_size_bytes": 651795, + "md5_checksum": "07b6457a094fab96563168ed287dc59f", + "data_object_type": "Kraken2 Classification Report", + "url": "https://data.microbiomedata.org/data/nmdc:omprc-11-hqmmwn16/nmdc:wfrbt-11-4rtk1927.1/nmdc_wfrbt-11-4rtk1927.1_kraken2_report.tsv", + "type": "nmdc:DataObject" + }, + { + "id": "nmdc:dobj-11-fra2vk39", + "name": "nmdc_wfrbt-11-4rtk1927.1_kraken2_krona.html", + "description": "Kraken2 Krona HTML report for nmdc:omprc-11-hqmmwn16", + "file_size_bytes": 3963303, + "md5_checksum": "164a1bc50e8d6509446ae2877be8231c", + "data_object_type": "Kraken2 Krona Plot", + "url": "https://data.microbiomedata.org/data/nmdc:omprc-11-hqmmwn16/nmdc:wfrbt-11-4rtk1927.1/nmdc_wfrbt-11-4rtk1927.1_kraken2_krona.html", + "type": "nmdc:DataObject" + } + ], + "metagenome_assembly_set": [ + { + "id": "nmdc:wfmgas-11-0ykzmy51.1", + "name": "Metagenome Assembly Activity for nmdc:omprc-11-hqmmwn16", + "started_at_time": "2021-10-11T02:28:43Z", + "ended_at_time": "2021-10-11T04:20:07+00:00", + "was_informed_by": "nmdc:omprc-11-hqmmwn16", + "execution_resource": "NERSC-Cori", + "git_url": "https://github.com/microbiomedata/metaAssembly", + "has_input": [ + "nmdc:dobj-11-g6sfg477" + ], + "has_output": [ + "nmdc:dobj-11-39wjyp66", + "nmdc:dobj-11-08k21n12", + "nmdc:dobj-11-zzpd8j23", + "nmdc:dobj-11-pkh28878", + "nmdc:dobj-11-03sd1550" + ], + "type": "nmdc:MetagenomeAssembly", + "part_of": [ + "nmdc:omprc-11-hqmmwn16" + ], + "version": "v1.0.2", + "asm_score": 4.733, + "scaffolds": 114011, + "scaf_logsum": 152336, + "scaf_powsum": 17101, + "scaf_max": 20100, + "scaf_bp": 54568699, + "scaf_n50": 28976, + "scaf_n90": 94720, + "scaf_l50": 451, + "scaf_l90": 285, + "contigs": 114114, + "contig_bp": 54567489, + "ctg_n50": 29019, + "ctg_l50": 451, + "ctg_n90": 94816, + "ctg_l90": 285, + "ctg_logsum": 151663, + "ctg_powsum": 17017, + "ctg_max": 20100, + "gap_pct": 0.00222, + "gc_std": 0.11871, + "gc_avg": 0.55923 + } + ], + "omics_processing_set": [ + { + "id": "nmdc:omprc-11-hqmmwn16", + "name": "Sand microcosm microbial communities from a hyporheic zone in Columbia River, Washington, USA - GW-RW T4_1-July-14", + "description": "Sterilized sand packs were incubated back in the ground and collected at time point T4.", + "has_input": [ + "nmdc:bsm-11-47nxfg85" + ], + "add_date": "2015-05-28", + "gold_sequencing_project_identifiers": [ + "gold:Gp0115669" + ], + "has_output": [ + "nmdc:dobj-11-7g54r371" + ], + "mod_date": "2021-06-15", + "ncbi_project_name": "Sand microcosm microbial communities from a hyporheic zone in Columbia River, Washington, USA - GW-RW T4_1-July-14", + "omics_type": { + "has_raw_value": "Metagenome" + }, + "part_of": [ + "nmdc:sty-11-aygzgv51" + ], + "principal_investigator": { + "has_raw_value": "James Stegen" + }, + "processing_institution": "JGI", + "type": "nmdc:OmicsProcessing" + } + ], + "read_qc_analysis_activity_set": [ + { + "id": "nmdc:wfrqc-11-s9dhkx03.1", + "name": "Read QC Activity for nmdc:omprc-11-hqmmwn16", + "started_at_time": "2021-10-11T02:28:43Z", + "ended_at_time": "2021-10-11T04:20:07+00:00", + "was_informed_by": "nmdc:omprc-11-hqmmwn16", + "execution_resource": "NERSC-Cori", + "git_url": "https://github.com/microbiomedata/ReadsQC", + "has_input": [ + "nmdc:dobj-11-7g54r371" + ], + "has_output": [ + "nmdc:dobj-11-g6sfg477", + "nmdc:dobj-11-c0ay2314" + ], + "type": "nmdc:ReadQcAnalysisActivity", + "part_of": [ + "nmdc:omprc-11-hqmmwn16" + ], + "version": "v1.0.2", + "input_read_count": 20957834, + "output_read_count": 20454422, + "input_read_bases": 3164632934, + "output_read_bases": 3065138996 + } + ], + "read_based_taxonomy_analysis_activity_set": [ + { + "id": "nmdc:wfrbt-11-4rtk1927.1", + "name": "Readbased Taxonomy Analysis Activity for nmdc:omprc-11-hqmmwn16", + "started_at_time": "2021-10-11T02:28:43Z", + "ended_at_time": "2021-10-11T04:20:07+00:00", + "was_informed_by": "nmdc:omprc-11-hqmmwn16", + "execution_resource": "NERSC-Cori", + "git_url": "https://github.com/microbiomedata/ReadbasedAnalysis", + "has_input": [ + "nmdc:dobj-11-g6sfg477" + ], + "has_output": [ + "nmdc:dobj-11-dshw3185", + "nmdc:dobj-11-w2j0x746", + "nmdc:dobj-11-766mwx64", + "nmdc:dobj-11-mqn0q670", + "nmdc:dobj-11-ahc13632", + "nmdc:dobj-11-vx8zg790", + "nmdc:dobj-11-yywqnh16", + "nmdc:dobj-11-pve05893", + "nmdc:dobj-11-fra2vk39" + ], + "type": "nmdc:ReadBasedTaxonomyAnalysisActivity", + "part_of": [ + "nmdc:omprc-11-hqmmwn16" + ], + "version": "v1.0.2" + } + ] + }, + { + "data_object_set": [ + { + "id": "nmdc:dobj-11-zx1hkd49", + "name": "9422.8.132674.GAGTGG.fastq.gz", + "description": "Raw sequencer read data", + "file_size_bytes": 3054717241, + "data_object_type": "Metagenome Raw Reads", + "type": "nmdc:DataObject" + }, + { + "id": "nmdc:dobj-11-zwdnr790", + "name": "nmdc_wfrqc-11-rj958378.1_filtered.fastq.gz", + "description": "Filtered Reads for nmdc:omprc-11-qsxwf517", + "file_size_bytes": 2704299418, + "md5_checksum": "eb516fb673793f5161fb634fc19de310", + "data_object_type": "Filtered Sequencing Reads", + "url": "https://data.microbiomedata.org/data/nmdc:omprc-11-qsxwf517/nmdc:wfrqc-11-rj958378.1/nmdc_wfrqc-11-rj958378.1_filtered.fastq.gz", + "type": "nmdc:DataObject" + }, + { + "id": "nmdc:dobj-11-ypym5207", + "name": "nmdc_wfrqc-11-rj958378.1_filterStats.txt", + "description": "Filtered Stats for nmdc:omprc-11-qsxwf517", + "file_size_bytes": 290, + "md5_checksum": "f4b68d1bd25f8d2fa8986aeef5fbec3f", + "data_object_type": "QC Statistics", + "url": "https://data.microbiomedata.org/data/nmdc:omprc-11-qsxwf517/nmdc:wfrqc-11-rj958378.1/nmdc_wfrqc-11-rj958378.1_filterStats.txt", + "type": "nmdc:DataObject" + }, + { + "id": "nmdc:dobj-11-9xyht268", + "name": "nmdc_wfmgas-11-t0kstj15.1_contigs.fna", + "description": "Assembled contigs fasta for nmdc:omprc-11-qsxwf517", + "file_size_bytes": 130647435, + "md5_checksum": "5d78cd7582a6fcb9582ee13feabe40e6", + "data_object_type": "Assembly Contigs", + "url": "https://data.microbiomedata.org/data/nmdc:omprc-11-qsxwf517/nmdc:wfmgas-11-t0kstj15.1/nmdc_wfmgas-11-t0kstj15.1_contigs.fna", + "type": "nmdc:DataObject" + }, + { + "id": "nmdc:dobj-11-r84wdf14", + "name": "nmdc_wfmgas-11-t0kstj15.1_scaffolds.fna", + "description": "Assembled scaffold fasta for nmdc:omprc-11-qsxwf517", + "file_size_bytes": 129980381, + "md5_checksum": "ee7711069a238f67abf9afb91c5bf415", + "data_object_type": "Assembly Scaffolds", + "url": "https://data.microbiomedata.org/data/nmdc:omprc-11-qsxwf517/nmdc:wfmgas-11-t0kstj15.1/nmdc_wfmgas-11-t0kstj15.1_scaffolds.fna", + "type": "nmdc:DataObject" + }, + { + "id": "nmdc:dobj-11-cyjkrn07", + "name": "nmdc_wfmgas-11-t0kstj15.1_covstats.txt", + "description": "Metagenome Contig Coverage Stats for nmdc:omprc-11-qsxwf517", + "file_size_bytes": 18822519, + "md5_checksum": "13beb8aa8b436f3016fb451fe01cfcad", + "data_object_type": "Assembly Coverage Stats", + "url": "https://data.microbiomedata.org/data/nmdc:omprc-11-qsxwf517/nmdc:wfmgas-11-t0kstj15.1/nmdc_wfmgas-11-t0kstj15.1_covstats.txt", + "type": "nmdc:DataObject" + }, + { + "id": "nmdc:dobj-11-ej0ps005", + "name": "nmdc_wfmgas-11-t0kstj15.1_assembly.agp", + "description": "Assembled AGP file for nmdc:omprc-11-qsxwf517", + "file_size_bytes": 19056056, + "md5_checksum": "4eea7dbfd1065f6c53a6ad7474705a3f", + "data_object_type": "Assembly AGP", + "url": "https://data.microbiomedata.org/data/nmdc:omprc-11-qsxwf517/nmdc:wfmgas-11-t0kstj15.1/nmdc_wfmgas-11-t0kstj15.1_assembly.agp", + "type": "nmdc:DataObject" + }, + { + "id": "nmdc:dobj-11-wkygmk79", + "name": "nmdc_wfmgas-11-t0kstj15.1_pairedMapped_sorted.bam", + "description": "Metagenome Alignment BAM file for nmdc:omprc-11-qsxwf517", + "file_size_bytes": 0, + "md5_checksum": "d41d8cd98f00b204e9800998ecf8427e", + "data_object_type": "Assembly Coverage BAM", + "url": "https://data.microbiomedata.org/data/nmdc:omprc-11-qsxwf517/nmdc:wfmgas-11-t0kstj15.1/nmdc_wfmgas-11-t0kstj15.1_pairedMapped_sorted.bam", + "type": "nmdc:DataObject" + }, + { + "id": "nmdc:dobj-11-a732bv91", + "name": "nmdc_wfrbt-11-0z8fpj32.1_gottcha2_report.tsv", + "description": "Gottcha2 TSV report for nmdc:omprc-11-qsxwf517", + "file_size_bytes": 15806, + "md5_checksum": "5a9326e2e450663a5ed8c97389136b25", + "data_object_type": "GOTTCHA2 Classification Report", + "url": "https://data.microbiomedata.org/data/nmdc:omprc-11-qsxwf517/nmdc:wfrbt-11-0z8fpj32.1/nmdc_wfrbt-11-0z8fpj32.1_gottcha2_report.tsv", + "type": "nmdc:DataObject" + }, + { + "id": "nmdc:dobj-11-xp0fte49", + "name": "nmdc_wfrbt-11-0z8fpj32.1_gottcha2_report_full.tsv", + "description": "Gottcha2 full TSV report for nmdc:omprc-11-qsxwf517", + "file_size_bytes": 1142479, + "md5_checksum": "6044f2e33e0dd3e951484e9c50ae10f4", + "data_object_type": "GOTTCHA2 Report Full", + "url": "https://data.microbiomedata.org/data/nmdc:omprc-11-qsxwf517/nmdc:wfrbt-11-0z8fpj32.1/nmdc_wfrbt-11-0z8fpj32.1_gottcha2_report_full.tsv", + "type": "nmdc:DataObject" + }, + { + "id": "nmdc:dobj-11-3ec0y445", + "name": "nmdc_wfrbt-11-0z8fpj32.1_gottcha2_krona.html", + "description": "Gottcha2 Krona HTML report for nmdc:omprc-11-qsxwf517", + "file_size_bytes": 273611, + "md5_checksum": "39a46887587926c9b81e126bb1036005", + "data_object_type": "GOTTCHA2 Krona Plot", + "url": "https://data.microbiomedata.org/data/nmdc:omprc-11-qsxwf517/nmdc:wfrbt-11-0z8fpj32.1/nmdc_wfrbt-11-0z8fpj32.1_gottcha2_krona.html", + "type": "nmdc:DataObject" + }, + { + "id": "nmdc:dobj-11-5dxr3194", + "name": "nmdc_wfrbt-11-0z8fpj32.1_centrifuge_classification.tsv", + "description": "Centrifuge classification TSV report for nmdc:omprc-11-qsxwf517", + "file_size_bytes": 2436637487, + "md5_checksum": "b8dde2c047141d9097317c86f723eded", + "data_object_type": "Centrifuge Taxonomic Classification", + "url": "https://data.microbiomedata.org/data/nmdc:omprc-11-qsxwf517/nmdc:wfrbt-11-0z8fpj32.1/nmdc_wfrbt-11-0z8fpj32.1_centrifuge_classification.tsv", + "type": "nmdc:DataObject" + }, + { + "id": "nmdc:dobj-11-05gqee28", + "name": "nmdc_wfrbt-11-0z8fpj32.1_centrifuge_report.tsv", + "description": "Centrifuge TSV report for nmdc:omprc-11-qsxwf517", + "file_size_bytes": 261520, + "md5_checksum": "d530342b37f0785f92650e9650f31d6a", + "data_object_type": "Centrifuge Classification Report", + "url": "https://data.microbiomedata.org/data/nmdc:omprc-11-qsxwf517/nmdc:wfrbt-11-0z8fpj32.1/nmdc_wfrbt-11-0z8fpj32.1_centrifuge_report.tsv", + "type": "nmdc:DataObject" + }, + { + "id": "nmdc:dobj-11-5gm9f544", + "name": "nmdc_wfrbt-11-0z8fpj32.1_centrifuge_krona.html", + "description": "Centrifuge Krona HTML report for nmdc:omprc-11-qsxwf517", + "file_size_bytes": 2342832, + "md5_checksum": "6672aa851b5d39d7381211232b4f6cb2", + "data_object_type": "Centrifuge Krona Plot", + "url": "https://data.microbiomedata.org/data/nmdc:omprc-11-qsxwf517/nmdc:wfrbt-11-0z8fpj32.1/nmdc_wfrbt-11-0z8fpj32.1_centrifuge_krona.html", + "type": "nmdc:DataObject" + }, + { + "id": "nmdc:dobj-11-7fp9m687", + "name": "nmdc_wfrbt-11-0z8fpj32.1_kraken2_classification.tsv", + "description": "Kraken classification TSV report for nmdc:omprc-11-qsxwf517", + "file_size_bytes": 1993150715, + "md5_checksum": "61e3c875231ae8999b5aa1dbf7d55cca", + "data_object_type": "Kraken2 Taxonomic Classification", + "url": "https://data.microbiomedata.org/data/nmdc:omprc-11-qsxwf517/nmdc:wfrbt-11-0z8fpj32.1/nmdc_wfrbt-11-0z8fpj32.1_kraken2_classification.tsv", + "type": "nmdc:DataObject" + }, + { + "id": "nmdc:dobj-11-9r84z990", + "name": "nmdc_wfrbt-11-0z8fpj32.1_kraken2_report.tsv", + "description": "Kraken2 TSV report for nmdc:omprc-11-qsxwf517", + "file_size_bytes": 693572, + "md5_checksum": "3049835ed4e3533acce49e9cc60b03fc", + "data_object_type": "Kraken2 Classification Report", + "url": "https://data.microbiomedata.org/data/nmdc:omprc-11-qsxwf517/nmdc:wfrbt-11-0z8fpj32.1/nmdc_wfrbt-11-0z8fpj32.1_kraken2_report.tsv", + "type": "nmdc:DataObject" + }, + { + "id": "nmdc:dobj-11-h1q4ya97", + "name": "nmdc_wfrbt-11-0z8fpj32.1_kraken2_krona.html", + "description": "Kraken2 Krona HTML report for nmdc:omprc-11-qsxwf517", + "file_size_bytes": 4177114, + "md5_checksum": "3266e79813577aae1d4377c62e73332c", + "data_object_type": "Kraken2 Krona Plot", + "url": "https://data.microbiomedata.org/data/nmdc:omprc-11-qsxwf517/nmdc:wfrbt-11-0z8fpj32.1/nmdc_wfrbt-11-0z8fpj32.1_kraken2_krona.html", + "type": "nmdc:DataObject" + } + ], + "metagenome_assembly_set": [ + { + "id": "nmdc:wfmgas-11-t0kstj15.1", + "name": "Metagenome Assembly Activity for nmdc:omprc-11-qsxwf517", + "started_at_time": "2021-10-11T02:28:16Z", + "ended_at_time": "2021-10-11T05:56:20+00:00", + "was_informed_by": "nmdc:omprc-11-qsxwf517", + "execution_resource": "NERSC-Cori", + "git_url": "https://github.com/microbiomedata/metaAssembly", + "has_input": [ + "nmdc:dobj-11-zwdnr790" + ], + "has_output": [ + "nmdc:dobj-11-9xyht268", + "nmdc:dobj-11-r84wdf14", + "nmdc:dobj-11-cyjkrn07", + "nmdc:dobj-11-ej0ps005", + "nmdc:dobj-11-wkygmk79" + ], + "type": "nmdc:MetagenomeAssembly", + "part_of": [ + "nmdc:omprc-11-qsxwf517" + ], + "version": "v1.0.2", + "asm_score": 13.127, + "scaffolds": 220853, + "scaf_logsum": 448446, + "scaf_powsum": 56113, + "scaf_max": 157008, + "scaf_bp": 120473505, + "scaf_n50": 48077, + "scaf_n90": 178708, + "scaf_l50": 529, + "scaf_l90": 293, + "scaf_n_gt50k": 28, + "scaf_l_gt50k": 2147966, + "scaf_pct_gt50k": 1.7829365, + "contigs": 221046, + "contig_bp": 120471215, + "ctg_n50": 48327, + "ctg_l50": 528, + "ctg_n90": 178881, + "ctg_l90": 293, + "ctg_logsum": 447149, + "ctg_powsum": 55923, + "ctg_max": 157008, + "gap_pct": 0.0019, + "gc_std": 0.10619, + "gc_avg": 0.56196 + } + ], + "omics_processing_set": [ + { + "id": "nmdc:omprc-11-qsxwf517", + "name": "Sand microcosm microbial communities from a hyporheic zone in Columbia River, Washington, USA - GW-RW T4_14-Oct-14", + "description": "Sterilized sand packs were incubated back in the ground and collected at time point T4.", + "has_input": [ + "nmdc:bsm-11-sdhyr752" + ], + "add_date": "2015-05-28", + "gold_sequencing_project_identifiers": [ + "gold:Gp0115672" + ], + "has_output": [ + "nmdc:dobj-11-zx1hkd49" + ], + "mod_date": "2021-06-15", + "ncbi_project_name": "Sand microcosm microbial communities from a hyporheic zone in Columbia River, Washington, USA - GW-RW T4_14-Oct-14", + "omics_type": { + "has_raw_value": "Metagenome" + }, + "part_of": [ + "nmdc:sty-11-aygzgv51" + ], + "principal_investigator": { + "has_raw_value": "James Stegen" + }, + "processing_institution": "JGI", + "type": "nmdc:OmicsProcessing" + } + ], + "read_qc_analysis_activity_set": [ + { + "id": "nmdc:wfrqc-11-rj958378.1", + "name": "Read QC Activity for nmdc:omprc-11-qsxwf517", + "started_at_time": "2021-10-11T02:28:16Z", + "ended_at_time": "2021-10-11T05:56:20+00:00", + "was_informed_by": "nmdc:omprc-11-qsxwf517", + "execution_resource": "NERSC-Cori", + "git_url": "https://github.com/microbiomedata/ReadsQC", + "has_input": [ + "nmdc:dobj-11-zx1hkd49" + ], + "has_output": [ + "nmdc:dobj-11-zwdnr790", + "nmdc:dobj-11-ypym5207" + ], + "type": "nmdc:ReadQcAnalysisActivity", + "part_of": [ + "nmdc:omprc-11-qsxwf517" + ], + "version": "v1.0.2", + "input_read_count": 34522052, + "output_read_count": 33454554, + "input_read_bases": 5212829852, + "output_read_bases": 5012430912 + } + ], + "read_based_taxonomy_analysis_activity_set": [ + { + "id": "nmdc:wfrbt-11-0z8fpj32.1", + "name": "Readbased Taxonomy Analysis Activity for nmdc:omprc-11-qsxwf517", + "started_at_time": "2021-10-11T02:28:16Z", + "ended_at_time": "2021-10-11T05:56:20+00:00", + "was_informed_by": "nmdc:omprc-11-qsxwf517", + "execution_resource": "NERSC-Cori", + "git_url": "https://github.com/microbiomedata/ReadbasedAnalysis", + "has_input": [ + "nmdc:dobj-11-zwdnr790" + ], + "has_output": [ + "nmdc:dobj-11-a732bv91", + "nmdc:dobj-11-xp0fte49", + "nmdc:dobj-11-3ec0y445", + "nmdc:dobj-11-5dxr3194", + "nmdc:dobj-11-05gqee28", + "nmdc:dobj-11-5gm9f544", + "nmdc:dobj-11-7fp9m687", + "nmdc:dobj-11-9r84z990", + "nmdc:dobj-11-h1q4ya97" + ], + "type": "nmdc:ReadBasedTaxonomyAnalysisActivity", + "part_of": [ + "nmdc:omprc-11-qsxwf517" + ], + "version": "v1.0.2" + } + ] + }, + { + "data_object_set": [ + { + "id": "nmdc:dobj-11-zb61bz69", + "name": "10533.3.165334.ACCATCC-TGGATGG.fastq.gz", + "description": "Raw sequencer read data", + "file_size_bytes": 2619328583, + "data_object_type": "Metagenome Raw Reads", + "type": "nmdc:DataObject" + }, + { + "id": "nmdc:dobj-11-5kqsbx06", + "name": "nmdc_wfrqc-11-p83a2k28.1_filtered.fastq.gz", + "description": "Filtered Reads for nmdc:omprc-11-932jcd76", + "file_size_bytes": 2416846292, + "md5_checksum": "534c94e20d292a6bf09c0a42b550b4c2", + "data_object_type": "Filtered Sequencing Reads", + "url": "https://data.microbiomedata.org/data/nmdc:omprc-11-932jcd76/nmdc:wfrqc-11-p83a2k28.1/nmdc_wfrqc-11-p83a2k28.1_filtered.fastq.gz", + "type": "nmdc:DataObject" + }, + { + "id": "nmdc:dobj-11-sz5ded60", + "name": "nmdc_wfrqc-11-p83a2k28.1_filterStats.txt", + "description": "Filtered Stats for nmdc:omprc-11-932jcd76", + "file_size_bytes": 285, + "md5_checksum": "db5ccad12d6ddb46947fbd815aae7f9a", + "data_object_type": "QC Statistics", + "url": "https://data.microbiomedata.org/data/nmdc:omprc-11-932jcd76/nmdc:wfrqc-11-p83a2k28.1/nmdc_wfrqc-11-p83a2k28.1_filterStats.txt", + "type": "nmdc:DataObject" + }, + { + "id": "nmdc:dobj-11-whhqy480", + "name": "nmdc_wfmgas-11-wax11t55.1_contigs.fna", + "description": "Assembled contigs fasta for nmdc:omprc-11-932jcd76", + "file_size_bytes": 44883641, + "md5_checksum": "bf89a35cc75724ae272db3164120f04d", + "data_object_type": "Assembly Contigs", + "url": "https://data.microbiomedata.org/data/nmdc:omprc-11-932jcd76/nmdc:wfmgas-11-wax11t55.1/nmdc_wfmgas-11-wax11t55.1_contigs.fna", + "type": "nmdc:DataObject" + }, + { + "id": "nmdc:dobj-11-f4kmd631", + "name": "nmdc_wfmgas-11-wax11t55.1_scaffolds.fna", + "description": "Assembled scaffold fasta for nmdc:omprc-11-932jcd76", + "file_size_bytes": 44563208, + "md5_checksum": "6b15dc8b04fcb321572749e9f121327c", + "data_object_type": "Assembly Scaffolds", + "url": "https://data.microbiomedata.org/data/nmdc:omprc-11-932jcd76/nmdc:wfmgas-11-wax11t55.1/nmdc_wfmgas-11-wax11t55.1_scaffolds.fna", + "type": "nmdc:DataObject" + }, + { + "id": "nmdc:dobj-11-bm53ed48", + "name": "nmdc_wfmgas-11-wax11t55.1_covstats.txt", + "description": "Metagenome Contig Coverage Stats for nmdc:omprc-11-932jcd76", + "file_size_bytes": 9005373, + "md5_checksum": "b5d10a098870f660fcbfe7dbf70532d9", + "data_object_type": "Assembly Coverage Stats", + "url": "https://data.microbiomedata.org/data/nmdc:omprc-11-932jcd76/nmdc:wfmgas-11-wax11t55.1/nmdc_wfmgas-11-wax11t55.1_covstats.txt", + "type": "nmdc:DataObject" + }, + { + "id": "nmdc:dobj-11-m21hgy27", + "name": "nmdc_wfmgas-11-wax11t55.1_assembly.agp", + "description": "Assembled AGP file for nmdc:omprc-11-932jcd76", + "file_size_bytes": 9062997, + "md5_checksum": "a6a8982281270643a20e1fae662613f1", + "data_object_type": "Assembly AGP", + "url": "https://data.microbiomedata.org/data/nmdc:omprc-11-932jcd76/nmdc:wfmgas-11-wax11t55.1/nmdc_wfmgas-11-wax11t55.1_assembly.agp", + "type": "nmdc:DataObject" + }, + { + "id": "nmdc:dobj-11-2kvm1994", + "name": "nmdc_wfmgas-11-wax11t55.1_pairedMapped_sorted.bam", + "description": "Metagenome Alignment BAM file for nmdc:omprc-11-932jcd76", + "file_size_bytes": 0, + "md5_checksum": "d41d8cd98f00b204e9800998ecf8427e", + "data_object_type": "Assembly Coverage BAM", + "url": "https://data.microbiomedata.org/data/nmdc:omprc-11-932jcd76/nmdc:wfmgas-11-wax11t55.1/nmdc_wfmgas-11-wax11t55.1_pairedMapped_sorted.bam", + "type": "nmdc:DataObject" + }, + { + "id": "nmdc:dobj-11-ecd3qx21", + "name": "nmdc_wfrbt-11-c41n6h03.1_gottcha2_report.tsv", + "description": "Gottcha2 TSV report for nmdc:omprc-11-932jcd76", + "file_size_bytes": 3824, + "md5_checksum": "7e79b2eba131ed6df71a56f47b1b901f", + "data_object_type": "GOTTCHA2 Classification Report", + "url": "https://data.microbiomedata.org/data/nmdc:omprc-11-932jcd76/nmdc:wfrbt-11-c41n6h03.1/nmdc_wfrbt-11-c41n6h03.1_gottcha2_report.tsv", + "type": "nmdc:DataObject" + }, + { + "id": "nmdc:dobj-11-gsw8bq96", + "name": "nmdc_wfrbt-11-c41n6h03.1_gottcha2_report_full.tsv", + "description": "Gottcha2 full TSV report for nmdc:omprc-11-932jcd76", + "file_size_bytes": 850491, + "md5_checksum": "bc82dcb8151fc20c22be71b6531a1fb2", + "data_object_type": "GOTTCHA2 Report Full", + "url": "https://data.microbiomedata.org/data/nmdc:omprc-11-932jcd76/nmdc:wfrbt-11-c41n6h03.1/nmdc_wfrbt-11-c41n6h03.1_gottcha2_report_full.tsv", + "type": "nmdc:DataObject" + }, + { + "id": "nmdc:dobj-11-0y6sf503", + "name": "nmdc_wfrbt-11-c41n6h03.1_gottcha2_krona.html", + "description": "Gottcha2 Krona HTML report for nmdc:omprc-11-932jcd76", + "file_size_bytes": 236151, + "md5_checksum": "d5e45563875efca0653ba2dd47ee3d68", + "data_object_type": "GOTTCHA2 Krona Plot", + "url": "https://data.microbiomedata.org/data/nmdc:omprc-11-932jcd76/nmdc:wfrbt-11-c41n6h03.1/nmdc_wfrbt-11-c41n6h03.1_gottcha2_krona.html", + "type": "nmdc:DataObject" + }, + { + "id": "nmdc:dobj-11-9wvvsh04", + "name": "nmdc_wfrbt-11-c41n6h03.1_centrifuge_classification.tsv", + "description": "Centrifuge classification TSV report for nmdc:omprc-11-932jcd76", + "file_size_bytes": 2057333090, + "md5_checksum": "bf5aa70f6ff14da2ef1393124ec29c4d", + "data_object_type": "Centrifuge Taxonomic Classification", + "url": "https://data.microbiomedata.org/data/nmdc:omprc-11-932jcd76/nmdc:wfrbt-11-c41n6h03.1/nmdc_wfrbt-11-c41n6h03.1_centrifuge_classification.tsv", + "type": "nmdc:DataObject" + }, + { + "id": "nmdc:dobj-11-mz520s16", + "name": "nmdc_wfrbt-11-c41n6h03.1_centrifuge_report.tsv", + "description": "Centrifuge TSV report for nmdc:omprc-11-932jcd76", + "file_size_bytes": 256577, + "md5_checksum": "61f1f6d57fd4d445682e25ec34901721", + "data_object_type": "Centrifuge Classification Report", + "url": "https://data.microbiomedata.org/data/nmdc:omprc-11-932jcd76/nmdc:wfrbt-11-c41n6h03.1/nmdc_wfrbt-11-c41n6h03.1_centrifuge_report.tsv", + "type": "nmdc:DataObject" + }, + { + "id": "nmdc:dobj-11-mcb1b542", + "name": "nmdc_wfrbt-11-c41n6h03.1_centrifuge_krona.html", + "description": "Centrifuge Krona HTML report for nmdc:omprc-11-932jcd76", + "file_size_bytes": 2334984, + "md5_checksum": "7c31728fc2a51c8d202f9f74b1919886", + "data_object_type": "Centrifuge Krona Plot", + "url": "https://data.microbiomedata.org/data/nmdc:omprc-11-932jcd76/nmdc:wfrbt-11-c41n6h03.1/nmdc_wfrbt-11-c41n6h03.1_centrifuge_krona.html", + "type": "nmdc:DataObject" + }, + { + "id": "nmdc:dobj-11-e56ks564", + "name": "nmdc_wfrbt-11-c41n6h03.1_kraken2_classification.tsv", + "description": "Kraken classification TSV report for nmdc:omprc-11-932jcd76", + "file_size_bytes": 1658481192, + "md5_checksum": "f36c2b28e63d21ca4d9e84035450c8e1", + "data_object_type": "Kraken2 Taxonomic Classification", + "url": "https://data.microbiomedata.org/data/nmdc:omprc-11-932jcd76/nmdc:wfrbt-11-c41n6h03.1/nmdc_wfrbt-11-c41n6h03.1_kraken2_classification.tsv", + "type": "nmdc:DataObject" + }, + { + "id": "nmdc:dobj-11-rby8a863", + "name": "nmdc_wfrbt-11-c41n6h03.1_kraken2_report.tsv", + "description": "Kraken2 TSV report for nmdc:omprc-11-932jcd76", + "file_size_bytes": 653129, + "md5_checksum": "e2939606fc9ff1c0046b333e1740f258", + "data_object_type": "Kraken2 Classification Report", + "url": "https://data.microbiomedata.org/data/nmdc:omprc-11-932jcd76/nmdc:wfrbt-11-c41n6h03.1/nmdc_wfrbt-11-c41n6h03.1_kraken2_report.tsv", + "type": "nmdc:DataObject" + }, + { + "id": "nmdc:dobj-11-4xzkqq90", + "name": "nmdc_wfrbt-11-c41n6h03.1_kraken2_krona.html", + "description": "Kraken2 Krona HTML report for nmdc:omprc-11-932jcd76", + "file_size_bytes": 3977820, + "md5_checksum": "d47144fd7ec0608e7677550d9589c889", + "data_object_type": "Kraken2 Krona Plot", + "url": "https://data.microbiomedata.org/data/nmdc:omprc-11-932jcd76/nmdc:wfrbt-11-c41n6h03.1/nmdc_wfrbt-11-c41n6h03.1_kraken2_krona.html", + "type": "nmdc:DataObject" + } + ], + "metagenome_assembly_set": [ + { + "id": "nmdc:wfmgas-11-wax11t55.1", + "name": "Metagenome Assembly Activity for nmdc:omprc-11-932jcd76", + "started_at_time": "2021-10-11T02:24:27Z", + "ended_at_time": "2021-10-11T04:33:17+00:00", + "was_informed_by": "nmdc:omprc-11-932jcd76", + "execution_resource": "NERSC-Cori", + "git_url": "https://github.com/microbiomedata/metaAssembly", + "has_input": [ + "nmdc:dobj-11-5kqsbx06" + ], + "has_output": [ + "nmdc:dobj-11-whhqy480", + "nmdc:dobj-11-f4kmd631", + "nmdc:dobj-11-bm53ed48", + "nmdc:dobj-11-m21hgy27", + "nmdc:dobj-11-2kvm1994" + ], + "type": "nmdc:MetagenomeAssembly", + "part_of": [ + "nmdc:omprc-11-932jcd76" + ], + "version": "v1.0.2", + "asm_score": 5.471, + "scaffolds": 106645, + "scaf_logsum": 42987, + "scaf_powsum": 4913.296, + "scaf_max": 27880, + "scaf_bp": 40331709, + "scaf_n50": 38534, + "scaf_n90": 94506, + "scaf_l50": 336, + "scaf_l90": 282, + "contigs": 106665, + "contig_bp": 40331509, + "ctg_n50": 38543, + "ctg_l50": 336, + "ctg_n90": 94525, + "ctg_l90": 282, + "ctg_logsum": 42879, + "ctg_powsum": 4901.253, + "ctg_max": 27880, + "gap_pct": 0.0005, + "gc_std": 0.10189, + "gc_avg": 0.58648 + } + ], + "omics_processing_set": [ + { + "id": "nmdc:omprc-11-932jcd76", + "name": "Riverbed sediment microbial communities from areas with no vegetation in Columbia River, Washington, USA - GW-RW N3_50_60", + "description": "Riverbed sediment samples were collected from an area with no vegetation in a hyporheic zone (subsurface groundwater - surface water mixing zone)", + "has_input": [ + "nmdc:bsm-11-pvcgp635" + ], + "add_date": "2016-01-11", + "gold_sequencing_project_identifiers": [ + "gold:Gp0127640" + ], + "has_output": [ + "nmdc:dobj-11-zb61bz69" + ], + "mod_date": "2021-06-15", + "ncbi_project_name": "Riverbed sediment microbial communities from areas with no vegetation in Columbia River, Washington, USA - GW-RW N3_50_60", + "omics_type": { + "has_raw_value": "Metagenome" + }, + "part_of": [ + "nmdc:sty-11-aygzgv51" + ], + "principal_investigator": { + "has_raw_value": "James Stegen" + }, + "processing_institution": "JGI", + "type": "nmdc:OmicsProcessing" + } + ], + "read_qc_analysis_activity_set": [ + { + "id": "nmdc:wfrqc-11-p83a2k28.1", + "name": "Read QC Activity for nmdc:omprc-11-932jcd76", + "started_at_time": "2021-10-11T02:24:27Z", + "ended_at_time": "2021-10-11T04:33:17+00:00", + "was_informed_by": "nmdc:omprc-11-932jcd76", + "execution_resource": "NERSC-Cori", + "git_url": "https://github.com/microbiomedata/ReadsQC", + "has_input": [ + "nmdc:dobj-11-zb61bz69" + ], + "has_output": [ + "nmdc:dobj-11-5kqsbx06", + "nmdc:dobj-11-sz5ded60" + ], + "type": "nmdc:ReadQcAnalysisActivity", + "part_of": [ + "nmdc:omprc-11-932jcd76" + ], + "version": "v1.0.2", + "input_read_count": 28754670, + "output_read_count": 27981268, + "input_read_bases": 4341955170, + "output_read_bases": 4186416440 + } + ], + "read_based_taxonomy_analysis_activity_set": [ + { + "id": "nmdc:wfrbt-11-c41n6h03.1", + "name": "Readbased Taxonomy Analysis Activity for nmdc:omprc-11-932jcd76", + "started_at_time": "2021-10-11T02:24:27Z", + "ended_at_time": "2021-10-11T04:33:17+00:00", + "was_informed_by": "nmdc:omprc-11-932jcd76", + "execution_resource": "NERSC-Cori", + "git_url": "https://github.com/microbiomedata/ReadbasedAnalysis", + "has_input": [ + "nmdc:dobj-11-5kqsbx06" + ], + "has_output": [ + "nmdc:dobj-11-ecd3qx21", + "nmdc:dobj-11-gsw8bq96", + "nmdc:dobj-11-0y6sf503", + "nmdc:dobj-11-9wvvsh04", + "nmdc:dobj-11-mz520s16", + "nmdc:dobj-11-mcb1b542", + "nmdc:dobj-11-e56ks564", + "nmdc:dobj-11-rby8a863", + "nmdc:dobj-11-4xzkqq90" + ], + "type": "nmdc:ReadBasedTaxonomyAnalysisActivity", + "part_of": [ + "nmdc:omprc-11-932jcd76" + ], + "version": "v1.0.2" + } + ] + }, + { + "data_object_set": [ + { + "id": "nmdc:dobj-11-r2hspr31", + "name": "10533.1.165310.GCTACGT-AACGTAG.fastq.gz", + "description": "Raw sequencer read data", + "file_size_bytes": 2061929348, + "data_object_type": "Metagenome Raw Reads", + "type": "nmdc:DataObject" + }, + { + "id": "nmdc:dobj-11-brcn0q13", + "name": "nmdc_wfrqc-11-hpgccd55.1_filtered.fastq.gz", + "description": "Filtered Reads for nmdc:omprc-11-p0jdew93", + "file_size_bytes": 1787020792, + "md5_checksum": "a2700afe93abad6f004a3701348622a2", + "data_object_type": "Filtered Sequencing Reads", + "url": "https://data.microbiomedata.org/data/nmdc:omprc-11-p0jdew93/nmdc:wfrqc-11-hpgccd55.1/nmdc_wfrqc-11-hpgccd55.1_filtered.fastq.gz", + "type": "nmdc:DataObject" + }, + { + "id": "nmdc:dobj-11-f3hmrc31", + "name": "nmdc_wfrqc-11-hpgccd55.1_filterStats.txt", + "description": "Filtered Stats for nmdc:omprc-11-p0jdew93", + "file_size_bytes": 289, + "md5_checksum": "aaa9a8a3d8e147116953394a8755742d", + "data_object_type": "QC Statistics", + "url": "https://data.microbiomedata.org/data/nmdc:omprc-11-p0jdew93/nmdc:wfrqc-11-hpgccd55.1/nmdc_wfrqc-11-hpgccd55.1_filterStats.txt", + "type": "nmdc:DataObject" + }, + { + "id": "nmdc:dobj-11-kedzj993", + "name": "nmdc_wfmgas-11-1vg4es64.1_contigs.fna", + "description": "Assembled contigs fasta for nmdc:omprc-11-p0jdew93", + "file_size_bytes": 103535976, + "md5_checksum": "a489888a976c43f0754ca19c042628f4", + "data_object_type": "Assembly Contigs", + "url": "https://data.microbiomedata.org/data/nmdc:omprc-11-p0jdew93/nmdc:wfmgas-11-1vg4es64.1/nmdc_wfmgas-11-1vg4es64.1_contigs.fna", + "type": "nmdc:DataObject" + }, + { + "id": "nmdc:dobj-11-d6x47x33", + "name": "nmdc_wfmgas-11-1vg4es64.1_scaffolds.fna", + "description": "Assembled scaffold fasta for nmdc:omprc-11-p0jdew93", + "file_size_bytes": 102957531, + "md5_checksum": "dc1d6b34729e90828ae679642e701b53", + "data_object_type": "Assembly Scaffolds", + "url": "https://data.microbiomedata.org/data/nmdc:omprc-11-p0jdew93/nmdc:wfmgas-11-1vg4es64.1/nmdc_wfmgas-11-1vg4es64.1_scaffolds.fna", + "type": "nmdc:DataObject" + }, + { + "id": "nmdc:dobj-11-2015bk84", + "name": "nmdc_wfmgas-11-1vg4es64.1_covstats.txt", + "description": "Metagenome Contig Coverage Stats for nmdc:omprc-11-p0jdew93", + "file_size_bytes": 16355882, + "md5_checksum": "0bd78e7ce35e84966f0c5523c1df1a83", + "data_object_type": "Assembly Coverage Stats", + "url": "https://data.microbiomedata.org/data/nmdc:omprc-11-p0jdew93/nmdc:wfmgas-11-1vg4es64.1/nmdc_wfmgas-11-1vg4es64.1_covstats.txt", + "type": "nmdc:DataObject" + }, + { + "id": "nmdc:dobj-11-0abm6770", + "name": "nmdc_wfmgas-11-1vg4es64.1_assembly.agp", + "description": "Assembled AGP file for nmdc:omprc-11-p0jdew93", + "file_size_bytes": 16510648, + "md5_checksum": "e832b31e8082bd31d6f90758b1d0e559", + "data_object_type": "Assembly AGP", + "url": "https://data.microbiomedata.org/data/nmdc:omprc-11-p0jdew93/nmdc:wfmgas-11-1vg4es64.1/nmdc_wfmgas-11-1vg4es64.1_assembly.agp", + "type": "nmdc:DataObject" + }, + { + "id": "nmdc:dobj-11-03ngz945", + "name": "nmdc_wfmgas-11-1vg4es64.1_pairedMapped_sorted.bam", + "description": "Metagenome Alignment BAM file for nmdc:omprc-11-p0jdew93", + "file_size_bytes": 0, + "md5_checksum": "d41d8cd98f00b204e9800998ecf8427e", + "data_object_type": "Assembly Coverage BAM", + "url": "https://data.microbiomedata.org/data/nmdc:omprc-11-p0jdew93/nmdc:wfmgas-11-1vg4es64.1/nmdc_wfmgas-11-1vg4es64.1_pairedMapped_sorted.bam", + "type": "nmdc:DataObject" + }, + { + "id": "nmdc:dobj-11-8kq9jh40", + "name": "nmdc_wfrbt-11-gka6g406.1_gottcha2_report.tsv", + "description": "Gottcha2 TSV report for nmdc:omprc-11-p0jdew93", + "file_size_bytes": 3331, + "md5_checksum": "0d021c80bfd39c8293a8b355b8ff3605", + "data_object_type": "GOTTCHA2 Classification Report", + "url": "https://data.microbiomedata.org/data/nmdc:omprc-11-p0jdew93/nmdc:wfrbt-11-gka6g406.1/nmdc_wfrbt-11-gka6g406.1_gottcha2_report.tsv", + "type": "nmdc:DataObject" + }, + { + "id": "nmdc:dobj-11-9ynzzk63", + "name": "nmdc_wfrbt-11-gka6g406.1_gottcha2_report_full.tsv", + "description": "Gottcha2 full TSV report for nmdc:omprc-11-p0jdew93", + "file_size_bytes": 761359, + "md5_checksum": "a42312841b816448d8bd5d3adfa65f58", + "data_object_type": "GOTTCHA2 Report Full", + "url": "https://data.microbiomedata.org/data/nmdc:omprc-11-p0jdew93/nmdc:wfrbt-11-gka6g406.1/nmdc_wfrbt-11-gka6g406.1_gottcha2_report_full.tsv", + "type": "nmdc:DataObject" + }, + { + "id": "nmdc:dobj-11-33xb6s68", + "name": "nmdc_wfrbt-11-gka6g406.1_gottcha2_krona.html", + "description": "Gottcha2 Krona HTML report for nmdc:omprc-11-p0jdew93", + "file_size_bytes": 236161, + "md5_checksum": "f473f4a99336a49105d2722888ae0510", + "data_object_type": "GOTTCHA2 Krona Plot", + "url": "https://data.microbiomedata.org/data/nmdc:omprc-11-p0jdew93/nmdc:wfrbt-11-gka6g406.1/nmdc_wfrbt-11-gka6g406.1_gottcha2_krona.html", + "type": "nmdc:DataObject" + }, + { + "id": "nmdc:dobj-11-twg34n69", + "name": "nmdc_wfrbt-11-gka6g406.1_centrifuge_classification.tsv", + "description": "Centrifuge classification TSV report for nmdc:omprc-11-p0jdew93", + "file_size_bytes": 1635953327, + "md5_checksum": "ae51ea50660f44fa3b317a45f3015556", + "data_object_type": "Centrifuge Taxonomic Classification", + "url": "https://data.microbiomedata.org/data/nmdc:omprc-11-p0jdew93/nmdc:wfrbt-11-gka6g406.1/nmdc_wfrbt-11-gka6g406.1_centrifuge_classification.tsv", + "type": "nmdc:DataObject" + }, + { + "id": "nmdc:dobj-11-bqxztv22", + "name": "nmdc_wfrbt-11-gka6g406.1_centrifuge_report.tsv", + "description": "Centrifuge TSV report for nmdc:omprc-11-p0jdew93", + "file_size_bytes": 255166, + "md5_checksum": "ef39b44a90c8525e93f45e500b3ae934", + "data_object_type": "Centrifuge Classification Report", + "url": "https://data.microbiomedata.org/data/nmdc:omprc-11-p0jdew93/nmdc:wfrbt-11-gka6g406.1/nmdc_wfrbt-11-gka6g406.1_centrifuge_report.tsv", + "type": "nmdc:DataObject" + }, + { + "id": "nmdc:dobj-11-qy0vdt10", + "name": "nmdc_wfrbt-11-gka6g406.1_centrifuge_krona.html", + "description": "Centrifuge Krona HTML report for nmdc:omprc-11-p0jdew93", + "file_size_bytes": 2332521, + "md5_checksum": "e2653a4ce3f34c235ad7b01e87dd1016", + "data_object_type": "Centrifuge Krona Plot", + "url": "https://data.microbiomedata.org/data/nmdc:omprc-11-p0jdew93/nmdc:wfrbt-11-gka6g406.1/nmdc_wfrbt-11-gka6g406.1_centrifuge_krona.html", + "type": "nmdc:DataObject" + }, + { + "id": "nmdc:dobj-11-hvc93k14", + "name": "nmdc_wfrbt-11-gka6g406.1_kraken2_classification.tsv", + "description": "Kraken classification TSV report for nmdc:omprc-11-p0jdew93", + "file_size_bytes": 1307934195, + "md5_checksum": "869730c4d81163e0c238dd4ae27ebd9e", + "data_object_type": "Kraken2 Taxonomic Classification", + "url": "https://data.microbiomedata.org/data/nmdc:omprc-11-p0jdew93/nmdc:wfrbt-11-gka6g406.1/nmdc_wfrbt-11-gka6g406.1_kraken2_classification.tsv", + "type": "nmdc:DataObject" + }, + { + "id": "nmdc:dobj-11-j9g1cv95", + "name": "nmdc_wfrbt-11-gka6g406.1_kraken2_report.tsv", + "description": "Kraken2 TSV report for nmdc:omprc-11-p0jdew93", + "file_size_bytes": 635050, + "md5_checksum": "dc193d1a1693589003f992c820606bab", + "data_object_type": "Kraken2 Classification Report", + "url": "https://data.microbiomedata.org/data/nmdc:omprc-11-p0jdew93/nmdc:wfrbt-11-gka6g406.1/nmdc_wfrbt-11-gka6g406.1_kraken2_report.tsv", + "type": "nmdc:DataObject" + }, + { + "id": "nmdc:dobj-11-qwh40f54", + "name": "nmdc_wfrbt-11-gka6g406.1_kraken2_krona.html", + "description": "Kraken2 Krona HTML report for nmdc:omprc-11-p0jdew93", + "file_size_bytes": 3964515, + "md5_checksum": "2f36b41c419efa1b1dfb6a9576b965ee", + "data_object_type": "Kraken2 Krona Plot", + "url": "https://data.microbiomedata.org/data/nmdc:omprc-11-p0jdew93/nmdc:wfrbt-11-gka6g406.1/nmdc_wfrbt-11-gka6g406.1_kraken2_krona.html", + "type": "nmdc:DataObject" + } + ], + "metagenome_assembly_set": [ + { + "id": "nmdc:wfmgas-11-1vg4es64.1", + "name": "Metagenome Assembly Activity for nmdc:omprc-11-p0jdew93", + "started_at_time": "2021-10-11T02:27:18Z", + "ended_at_time": "2021-10-11T04:05:47+00:00", + "was_informed_by": "nmdc:omprc-11-p0jdew93", + "execution_resource": "NERSC-Cori", + "git_url": "https://github.com/microbiomedata/metaAssembly", + "has_input": [ + "nmdc:dobj-11-brcn0q13" + ], + "has_output": [ + "nmdc:dobj-11-kedzj993", + "nmdc:dobj-11-d6x47x33", + "nmdc:dobj-11-2015bk84", + "nmdc:dobj-11-0abm6770", + "nmdc:dobj-11-03ngz945" + ], + "type": "nmdc:MetagenomeAssembly", + "part_of": [ + "nmdc:omprc-11-p0jdew93" + ], + "version": "v1.0.2", + "asm_score": 3.367, + "scaffolds": 191777, + "scaf_logsum": 225846, + "scaf_powsum": 24365, + "scaf_max": 18020, + "scaf_bp": 94879455, + "scaf_n50": 53021, + "scaf_n90": 159560, + "scaf_l50": 489, + "scaf_l90": 290, + "contigs": 191907, + "contig_bp": 94878155, + "ctg_n50": 53038, + "ctg_l50": 489, + "ctg_n90": 159679, + "ctg_l90": 290, + "ctg_logsum": 224925, + "ctg_powsum": 24264, + "ctg_max": 18020, + "gap_pct": 0.00137, + "gc_std": 0.10192, + "gc_avg": 0.61857 + } + ], + "omics_processing_set": [ + { + "id": "nmdc:omprc-11-p0jdew93", + "name": "Riverbed sediment microbial communities from areas with vegetation nearby in Columbia River, Washington, USA - GW-RW S1_10_20", + "description": "Riverbed sediment samples were collected from an area with a lot of surrounding vegetation in a hyporheic zone (subsurface groundwater - surface water mixing zone)", + "has_input": [ + "nmdc:bsm-11-fgtanh42" + ], + "add_date": "2016-01-11", + "gold_sequencing_project_identifiers": [ + "gold:Gp0127641" + ], + "has_output": [ + "nmdc:dobj-11-r2hspr31" + ], + "mod_date": "2021-06-15", + "ncbi_project_name": "Riverbed sediment microbial communities from areas with vegetation nearby in Columbia River, Washington, USA - GW-RW S1_10_20", + "omics_type": { + "has_raw_value": "Metagenome" + }, + "part_of": [ + "nmdc:sty-11-aygzgv51" + ], + "principal_investigator": { + "has_raw_value": "James Stegen" + }, + "processing_institution": "JGI", + "type": "nmdc:OmicsProcessing" + } + ], + "read_qc_analysis_activity_set": [ + { + "id": "nmdc:wfrqc-11-hpgccd55.1", + "name": "Read QC Activity for nmdc:omprc-11-p0jdew93", + "started_at_time": "2021-10-11T02:27:18Z", + "ended_at_time": "2021-10-11T04:05:47+00:00", + "was_informed_by": "nmdc:omprc-11-p0jdew93", + "execution_resource": "NERSC-Cori", + "git_url": "https://github.com/microbiomedata/ReadsQC", + "has_input": [ + "nmdc:dobj-11-r2hspr31" + ], + "has_output": [ + "nmdc:dobj-11-brcn0q13", + "nmdc:dobj-11-f3hmrc31" + ], + "type": "nmdc:ReadQcAnalysisActivity", + "part_of": [ + "nmdc:omprc-11-p0jdew93" + ], + "version": "v1.0.2", + "input_read_count": 24261468, + "output_read_count": 22362924, + "input_read_bases": 3663481668, + "output_read_bases": 3340338011 + } + ], + "read_based_taxonomy_analysis_activity_set": [ + { + "id": "nmdc:wfrbt-11-gka6g406.1", + "name": "Readbased Taxonomy Analysis Activity for nmdc:omprc-11-p0jdew93", + "started_at_time": "2021-10-11T02:27:18Z", + "ended_at_time": "2021-10-11T04:05:47+00:00", + "was_informed_by": "nmdc:omprc-11-p0jdew93", + "execution_resource": "NERSC-Cori", + "git_url": "https://github.com/microbiomedata/ReadbasedAnalysis", + "has_input": [ + "nmdc:dobj-11-brcn0q13" + ], + "has_output": [ + "nmdc:dobj-11-8kq9jh40", + "nmdc:dobj-11-9ynzzk63", + "nmdc:dobj-11-33xb6s68", + "nmdc:dobj-11-twg34n69", + "nmdc:dobj-11-bqxztv22", + "nmdc:dobj-11-qy0vdt10", + "nmdc:dobj-11-hvc93k14", + "nmdc:dobj-11-j9g1cv95", + "nmdc:dobj-11-qwh40f54" + ], + "type": "nmdc:ReadBasedTaxonomyAnalysisActivity", + "part_of": [ + "nmdc:omprc-11-p0jdew93" + ], + "version": "v1.0.2" + } + ] + }, + { + "data_object_set": [ + { + "id": "nmdc:dobj-11-dvy2av42", + "name": "10533.1.165310.TCCGAGT-AACTCGG.fastq.gz", + "description": "Raw sequencer read data", + "file_size_bytes": 2168673471, + "data_object_type": "Metagenome Raw Reads", + "type": "nmdc:DataObject" + }, + { + "id": "nmdc:dobj-11-hss8hh61", + "name": "nmdc_wfrqc-11-jazrye31.1_filtered.fastq.gz", + "description": "Filtered Reads for nmdc:omprc-11-dtsr6z90", + "file_size_bytes": 1891088172, + "md5_checksum": "2ef23543e3064ca73c3034713d87c026", + "data_object_type": "Filtered Sequencing Reads", + "url": "https://data.microbiomedata.org/data/nmdc:omprc-11-dtsr6z90/nmdc:wfrqc-11-jazrye31.1/nmdc_wfrqc-11-jazrye31.1_filtered.fastq.gz", + "type": "nmdc:DataObject" + }, + { + "id": "nmdc:dobj-11-g1f97s42", + "name": "nmdc_wfrqc-11-jazrye31.1_filterStats.txt", + "description": "Filtered Stats for nmdc:omprc-11-dtsr6z90", + "file_size_bytes": 289, + "md5_checksum": "87b172ead58a37be8d199c0acfc96759", + "data_object_type": "QC Statistics", + "url": "https://data.microbiomedata.org/data/nmdc:omprc-11-dtsr6z90/nmdc:wfrqc-11-jazrye31.1/nmdc_wfrqc-11-jazrye31.1_filterStats.txt", + "type": "nmdc:DataObject" + }, + { + "id": "nmdc:dobj-11-6mc2cw26", + "name": "nmdc_wfmgas-11-zf6mge79.1_contigs.fna", + "description": "Assembled contigs fasta for nmdc:omprc-11-dtsr6z90", + "file_size_bytes": 114026687, + "md5_checksum": "0de0783b4277ecf862d647a770881577", + "data_object_type": "Assembly Contigs", + "url": "https://data.microbiomedata.org/data/nmdc:omprc-11-dtsr6z90/nmdc:wfmgas-11-zf6mge79.1/nmdc_wfmgas-11-zf6mge79.1_contigs.fna", + "type": "nmdc:DataObject" + }, + { + "id": "nmdc:dobj-11-t8g7qa27", + "name": "nmdc_wfmgas-11-zf6mge79.1_scaffolds.fna", + "description": "Assembled scaffold fasta for nmdc:omprc-11-dtsr6z90", + "file_size_bytes": 113395837, + "md5_checksum": "d6182e724bf12333ea97ba56cb2f548b", + "data_object_type": "Assembly Scaffolds", + "url": "https://data.microbiomedata.org/data/nmdc:omprc-11-dtsr6z90/nmdc:wfmgas-11-zf6mge79.1/nmdc_wfmgas-11-zf6mge79.1_scaffolds.fna", + "type": "nmdc:DataObject" + }, + { + "id": "nmdc:dobj-11-q78ftb45", + "name": "nmdc_wfmgas-11-zf6mge79.1_covstats.txt", + "description": "Metagenome Contig Coverage Stats for nmdc:omprc-11-dtsr6z90", + "file_size_bytes": 17816999, + "md5_checksum": "85543ddcd45aaa903a97de6d37058988", + "data_object_type": "Assembly Coverage Stats", + "url": "https://data.microbiomedata.org/data/nmdc:omprc-11-dtsr6z90/nmdc:wfmgas-11-zf6mge79.1/nmdc_wfmgas-11-zf6mge79.1_covstats.txt", + "type": "nmdc:DataObject" + }, + { + "id": "nmdc:dobj-11-mj7vfr12", + "name": "nmdc_wfmgas-11-zf6mge79.1_assembly.agp", + "description": "Assembled AGP file for nmdc:omprc-11-dtsr6z90", + "file_size_bytes": 18003090, + "md5_checksum": "98d05992fec93a940ac412a2adbb6003", + "data_object_type": "Assembly AGP", + "url": "https://data.microbiomedata.org/data/nmdc:omprc-11-dtsr6z90/nmdc:wfmgas-11-zf6mge79.1/nmdc_wfmgas-11-zf6mge79.1_assembly.agp", + "type": "nmdc:DataObject" + }, + { + "id": "nmdc:dobj-11-sxdnxf17", + "name": "nmdc_wfmgas-11-zf6mge79.1_pairedMapped_sorted.bam", + "description": "Metagenome Alignment BAM file for nmdc:omprc-11-dtsr6z90", + "file_size_bytes": 0, + "md5_checksum": "d41d8cd98f00b204e9800998ecf8427e", + "data_object_type": "Assembly Coverage BAM", + "url": "https://data.microbiomedata.org/data/nmdc:omprc-11-dtsr6z90/nmdc:wfmgas-11-zf6mge79.1/nmdc_wfmgas-11-zf6mge79.1_pairedMapped_sorted.bam", + "type": "nmdc:DataObject" + }, + { + "id": "nmdc:dobj-11-b9yh0s54", + "name": "nmdc_wfrbt-11-av0gv114.1_gottcha2_report.tsv", + "description": "Gottcha2 TSV report for nmdc:omprc-11-dtsr6z90", + "file_size_bytes": 1326, + "md5_checksum": "e8f825653e5736e29b73de55bd11a270", + "data_object_type": "GOTTCHA2 Classification Report", + "url": "https://data.microbiomedata.org/data/nmdc:omprc-11-dtsr6z90/nmdc:wfrbt-11-av0gv114.1/nmdc_wfrbt-11-av0gv114.1_gottcha2_report.tsv", + "type": "nmdc:DataObject" + }, + { + "id": "nmdc:dobj-11-g9j1ay83", + "name": "nmdc_wfrbt-11-av0gv114.1_gottcha2_report_full.tsv", + "description": "Gottcha2 full TSV report for nmdc:omprc-11-dtsr6z90", + "file_size_bytes": 664131, + "md5_checksum": "99bb1311b220e9a03da619fe5fb58f0f", + "data_object_type": "GOTTCHA2 Report Full", + "url": "https://data.microbiomedata.org/data/nmdc:omprc-11-dtsr6z90/nmdc:wfrbt-11-av0gv114.1/nmdc_wfrbt-11-av0gv114.1_gottcha2_report_full.tsv", + "type": "nmdc:DataObject" + }, + { + "id": "nmdc:dobj-11-zye8mx10", + "name": "nmdc_wfrbt-11-av0gv114.1_gottcha2_krona.html", + "description": "Gottcha2 Krona HTML report for nmdc:omprc-11-dtsr6z90", + "file_size_bytes": 229630, + "md5_checksum": "5c97bc15d4d5999f140664b3b2777c6d", + "data_object_type": "GOTTCHA2 Krona Plot", + "url": "https://data.microbiomedata.org/data/nmdc:omprc-11-dtsr6z90/nmdc:wfrbt-11-av0gv114.1/nmdc_wfrbt-11-av0gv114.1_gottcha2_krona.html", + "type": "nmdc:DataObject" + }, + { + "id": "nmdc:dobj-11-7naz6t58", + "name": "nmdc_wfrbt-11-av0gv114.1_centrifuge_classification.tsv", + "description": "Centrifuge classification TSV report for nmdc:omprc-11-dtsr6z90", + "file_size_bytes": 1726867547, + "md5_checksum": "c9074b2e05765afd68463dc301b87995", + "data_object_type": "Centrifuge Taxonomic Classification", + "url": "https://data.microbiomedata.org/data/nmdc:omprc-11-dtsr6z90/nmdc:wfrbt-11-av0gv114.1/nmdc_wfrbt-11-av0gv114.1_centrifuge_classification.tsv", + "type": "nmdc:DataObject" + }, + { + "id": "nmdc:dobj-11-0ytzy432", + "name": "nmdc_wfrbt-11-av0gv114.1_centrifuge_report.tsv", + "description": "Centrifuge TSV report for nmdc:omprc-11-dtsr6z90", + "file_size_bytes": 254021, + "md5_checksum": "ed2c05d1702a9a811b8a98de748bc82a", + "data_object_type": "Centrifuge Classification Report", + "url": "https://data.microbiomedata.org/data/nmdc:omprc-11-dtsr6z90/nmdc:wfrbt-11-av0gv114.1/nmdc_wfrbt-11-av0gv114.1_centrifuge_report.tsv", + "type": "nmdc:DataObject" + }, + { + "id": "nmdc:dobj-11-wpetdt35", + "name": "nmdc_wfrbt-11-av0gv114.1_centrifuge_krona.html", + "description": "Centrifuge Krona HTML report for nmdc:omprc-11-dtsr6z90", + "file_size_bytes": 2331702, + "md5_checksum": "6465fe59472b111ead1f0414ccf39f62", + "data_object_type": "Centrifuge Krona Plot", + "url": "https://data.microbiomedata.org/data/nmdc:omprc-11-dtsr6z90/nmdc:wfrbt-11-av0gv114.1/nmdc_wfrbt-11-av0gv114.1_centrifuge_krona.html", + "type": "nmdc:DataObject" + }, + { + "id": "nmdc:dobj-11-pn5vr913", + "name": "nmdc_wfrbt-11-av0gv114.1_kraken2_classification.tsv", + "description": "Kraken classification TSV report for nmdc:omprc-11-dtsr6z90", + "file_size_bytes": 1376409913, + "md5_checksum": "9855ca52bce074c34dcebfd154fa94ff", + "data_object_type": "Kraken2 Taxonomic Classification", + "url": "https://data.microbiomedata.org/data/nmdc:omprc-11-dtsr6z90/nmdc:wfrbt-11-av0gv114.1/nmdc_wfrbt-11-av0gv114.1_kraken2_classification.tsv", + "type": "nmdc:DataObject" + }, + { + "id": "nmdc:dobj-11-gf44h576", + "name": "nmdc_wfrbt-11-av0gv114.1_kraken2_report.tsv", + "description": "Kraken2 TSV report for nmdc:omprc-11-dtsr6z90", + "file_size_bytes": 640506, + "md5_checksum": "ed8059f366d60112deb41a0c307bc6fc", + "data_object_type": "Kraken2 Classification Report", + "url": "https://data.microbiomedata.org/data/nmdc:omprc-11-dtsr6z90/nmdc:wfrbt-11-av0gv114.1/nmdc_wfrbt-11-av0gv114.1_kraken2_report.tsv", + "type": "nmdc:DataObject" + }, + { + "id": "nmdc:dobj-11-rzs4ky24", + "name": "nmdc_wfrbt-11-av0gv114.1_kraken2_krona.html", + "description": "Kraken2 Krona HTML report for nmdc:omprc-11-dtsr6z90", + "file_size_bytes": 3998448, + "md5_checksum": "f98bae155bced880c058ecde7d539c18", + "data_object_type": "Kraken2 Krona Plot", + "url": "https://data.microbiomedata.org/data/nmdc:omprc-11-dtsr6z90/nmdc:wfrbt-11-av0gv114.1/nmdc_wfrbt-11-av0gv114.1_kraken2_krona.html", + "type": "nmdc:DataObject" + } + ], + "metagenome_assembly_set": [ + { + "id": "nmdc:wfmgas-11-zf6mge79.1", + "name": "Metagenome Assembly Activity for nmdc:omprc-11-dtsr6z90", + "started_at_time": "2021-10-11T02:27:00Z", + "ended_at_time": "2021-10-11T04:04:16+00:00", + "was_informed_by": "nmdc:omprc-11-dtsr6z90", + "execution_resource": "NERSC-Cori", + "git_url": "https://github.com/microbiomedata/metaAssembly", + "has_input": [ + "nmdc:dobj-11-hss8hh61" + ], + "has_output": [ + "nmdc:dobj-11-6mc2cw26", + "nmdc:dobj-11-t8g7qa27", + "nmdc:dobj-11-q78ftb45", + "nmdc:dobj-11-mj7vfr12", + "nmdc:dobj-11-sxdnxf17" + ], + "type": "nmdc:MetagenomeAssembly", + "part_of": [ + "nmdc:omprc-11-dtsr6z90" + ], + "version": "v1.0.2", + "asm_score": 3.329, + "scaffolds": 208793, + "scaf_logsum": 260132, + "scaf_powsum": 27998, + "scaf_max": 12873, + "scaf_bp": 104569329, + "scaf_n50": 56935, + "scaf_n90": 172256, + "scaf_l50": 498, + "scaf_l90": 292, + "contigs": 208967, + "contig_bp": 104567589, + "ctg_n50": 57164, + "ctg_l50": 497, + "ctg_n90": 172414, + "ctg_l90": 292, + "ctg_logsum": 258957, + "ctg_powsum": 27868, + "ctg_max": 12873, + "gap_pct": 0.00166, + "gc_std": 0.09438, + "gc_avg": 0.63102 + } + ], + "omics_processing_set": [ + { + "id": "nmdc:omprc-11-dtsr6z90", + "name": "Riverbed sediment microbial communities from areas with no vegetation in Columbia River, Washington, USA - GW-RW N1_0_10", + "description": "Riverbed sediment samples were collected from an area with no vegetation in a hyporheic zone (subsurface groundwater - surface water mixing zone)", + "has_input": [ + "nmdc:bsm-11-g079t498" + ], + "add_date": "2016-01-11", + "gold_sequencing_project_identifiers": [ + "gold:Gp0127643" + ], + "has_output": [ + "nmdc:dobj-11-dvy2av42" + ], + "mod_date": "2021-06-15", + "ncbi_project_name": "Riverbed sediment microbial communities from areas with no vegetation in Columbia River, Washington, USA - GW-RW N1_0_10", + "omics_type": { + "has_raw_value": "Metagenome" + }, + "part_of": [ + "nmdc:sty-11-aygzgv51" + ], + "principal_investigator": { + "has_raw_value": "James Stegen" + }, + "processing_institution": "JGI", + "type": "nmdc:OmicsProcessing" + } + ], + "read_qc_analysis_activity_set": [ + { + "id": "nmdc:wfrqc-11-jazrye31.1", + "name": "Read QC Activity for nmdc:omprc-11-dtsr6z90", + "started_at_time": "2021-10-11T02:27:00Z", + "ended_at_time": "2021-10-11T04:04:16+00:00", + "was_informed_by": "nmdc:omprc-11-dtsr6z90", + "execution_resource": "NERSC-Cori", + "git_url": "https://github.com/microbiomedata/ReadsQC", + "has_input": [ + "nmdc:dobj-11-dvy2av42" + ], + "has_output": [ + "nmdc:dobj-11-hss8hh61", + "nmdc:dobj-11-g1f97s42" + ], + "type": "nmdc:ReadQcAnalysisActivity", + "part_of": [ + "nmdc:omprc-11-dtsr6z90" + ], + "version": "v1.0.2", + "input_read_count": 25305566, + "output_read_count": 23508042, + "input_read_bases": 3821140466, + "output_read_bases": 3510483777 + } + ], + "read_based_taxonomy_analysis_activity_set": [ + { + "id": "nmdc:wfrbt-11-av0gv114.1", + "name": "Readbased Taxonomy Analysis Activity for nmdc:omprc-11-dtsr6z90", + "started_at_time": "2021-10-11T02:27:00Z", + "ended_at_time": "2021-10-11T04:04:16+00:00", + "was_informed_by": "nmdc:omprc-11-dtsr6z90", + "execution_resource": "NERSC-Cori", + "git_url": "https://github.com/microbiomedata/ReadbasedAnalysis", + "has_input": [ + "nmdc:dobj-11-hss8hh61" + ], + "has_output": [ + "nmdc:dobj-11-b9yh0s54", + "nmdc:dobj-11-g9j1ay83", + "nmdc:dobj-11-zye8mx10", + "nmdc:dobj-11-7naz6t58", + "nmdc:dobj-11-0ytzy432", + "nmdc:dobj-11-wpetdt35", + "nmdc:dobj-11-pn5vr913", + "nmdc:dobj-11-gf44h576", + "nmdc:dobj-11-rzs4ky24" + ], + "type": "nmdc:ReadBasedTaxonomyAnalysisActivity", + "part_of": [ + "nmdc:omprc-11-dtsr6z90" + ], + "version": "v1.0.2" + } + ] + }, + { + "data_object_set": [ + { + "id": "nmdc:dobj-11-h1bew282", + "name": "10533.3.165334.AGTCTCA-GTGAGAC.fastq.gz", + "description": "Raw sequencer read data", + "file_size_bytes": 939616475, + "data_object_type": "Metagenome Raw Reads", + "url": "https://data.microbiomedata.org/data/raw/10533.3.165334.AGTCTCA-GTGAGAC.fastq.gz", + "type": "nmdc:DataObject" + }, + { + "id": "nmdc:dobj-11-syhdkg43", + "name": "nmdc_wfrqc-11-vkqhnf31.1_filtered.fastq.gz", + "description": "Filtered Reads for nmdc:omprc-11-hwadfm25", + "file_size_bytes": 694199131, + "md5_checksum": "98da35678c59689ce738b2a6bc708692", + "data_object_type": "Filtered Sequencing Reads", + "url": "https://data.microbiomedata.org/data/nmdc:omprc-11-hwadfm25/nmdc:wfrqc-11-vkqhnf31.1/nmdc_wfrqc-11-vkqhnf31.1_filtered.fastq.gz", + "type": "nmdc:DataObject" + }, + { + "id": "nmdc:dobj-11-84yy0z39", + "name": "nmdc_wfrqc-11-vkqhnf31.1_filterStats.txt", + "description": "Filtered Stats for nmdc:omprc-11-hwadfm25", + "file_size_bytes": 280, + "md5_checksum": "ff08ea52254e0cc1011c56656505b27b", + "data_object_type": "QC Statistics", + "url": "https://data.microbiomedata.org/data/nmdc:omprc-11-hwadfm25/nmdc:wfrqc-11-vkqhnf31.1/nmdc_wfrqc-11-vkqhnf31.1_filterStats.txt", + "type": "nmdc:DataObject" + }, + { + "id": "nmdc:dobj-11-s9xapm91", + "name": "nmdc_wfmgas-11-j9y9a119.1_contigs.fna", + "description": "Assembled contigs fasta for nmdc:omprc-11-hwadfm25", + "file_size_bytes": 22158421, + "md5_checksum": "ffbbdaaad0b2febbba439e52394ea709", + "data_object_type": "Assembly Contigs", + "url": "https://data.microbiomedata.org/data/nmdc:omprc-11-hwadfm25/nmdc:wfmgas-11-j9y9a119.1/nmdc_wfmgas-11-j9y9a119.1_contigs.fna", + "type": "nmdc:DataObject" + }, + { + "id": "nmdc:dobj-11-mwvv6005", + "name": "nmdc_wfmgas-11-j9y9a119.1_scaffolds.fna", + "description": "Assembled scaffold fasta for nmdc:omprc-11-hwadfm25", + "file_size_bytes": 22019708, + "md5_checksum": "5e66aa57bd55866ea781fad5039f4591", + "data_object_type": "Assembly Scaffolds", + "url": "https://data.microbiomedata.org/data/nmdc:omprc-11-hwadfm25/nmdc:wfmgas-11-j9y9a119.1/nmdc_wfmgas-11-j9y9a119.1_scaffolds.fna", + "type": "nmdc:DataObject" + }, + { + "id": "nmdc:dobj-11-anh2rz14", + "name": "nmdc_wfmgas-11-j9y9a119.1_covstats.txt", + "description": "Metagenome Contig Coverage Stats for nmdc:omprc-11-hwadfm25", + "file_size_bytes": 3888895, + "md5_checksum": "31f00b09a286e9b2ceed27041e07a35f", + "data_object_type": "Assembly Coverage Stats", + "url": "https://data.microbiomedata.org/data/nmdc:omprc-11-hwadfm25/nmdc:wfmgas-11-j9y9a119.1/nmdc_wfmgas-11-j9y9a119.1_covstats.txt", + "type": "nmdc:DataObject" + }, + { + "id": "nmdc:dobj-11-2baz8p28", + "name": "nmdc_wfmgas-11-j9y9a119.1_assembly.agp", + "description": "Assembled AGP file for nmdc:omprc-11-hwadfm25", + "file_size_bytes": 3904386, + "md5_checksum": "2b05f645cd82ef91c23d2a2dd925533d", + "data_object_type": "Assembly AGP", + "url": "https://data.microbiomedata.org/data/nmdc:omprc-11-hwadfm25/nmdc:wfmgas-11-j9y9a119.1/nmdc_wfmgas-11-j9y9a119.1_assembly.agp", + "type": "nmdc:DataObject" + }, + { + "id": "nmdc:dobj-11-eqak9v68", + "name": "nmdc_wfmgas-11-j9y9a119.1_pairedMapped_sorted.bam", + "description": "Metagenome Alignment BAM file for nmdc:omprc-11-hwadfm25", + "file_size_bytes": 0, + "md5_checksum": "d41d8cd98f00b204e9800998ecf8427e", + "data_object_type": "Assembly Coverage BAM", + "url": "https://data.microbiomedata.org/data/nmdc:omprc-11-hwadfm25/nmdc:wfmgas-11-j9y9a119.1/nmdc_wfmgas-11-j9y9a119.1_pairedMapped_sorted.bam", + "type": "nmdc:DataObject" + }, + { + "id": "nmdc:dobj-11-qa1ymp54", + "name": "nmdc_wfrbt-11-k4yp6b12.1_gottcha2_report.tsv", + "description": "Gottcha2 TSV report for nmdc:omprc-11-hwadfm25", + "file_size_bytes": 109, + "md5_checksum": "dc2e21becda8d6b010a95897cf97ae90", + "data_object_type": "GOTTCHA2 Classification Report", + "url": "https://data.microbiomedata.org/data/nmdc:omprc-11-hwadfm25/nmdc:wfrbt-11-k4yp6b12.1/nmdc_wfrbt-11-k4yp6b12.1_gottcha2_report.tsv", + "type": "nmdc:DataObject" + }, + { + "id": "nmdc:dobj-11-7qrm7990", + "name": "nmdc_wfrbt-11-k4yp6b12.1_gottcha2_report_full.tsv", + "description": "Gottcha2 full TSV report for nmdc:omprc-11-hwadfm25", + "file_size_bytes": 426075, + "md5_checksum": "0dd334c92557f3a8ac8c78b437c75eaf", + "data_object_type": "GOTTCHA2 Report Full", + "url": "https://data.microbiomedata.org/data/nmdc:omprc-11-hwadfm25/nmdc:wfrbt-11-k4yp6b12.1/nmdc_wfrbt-11-k4yp6b12.1_gottcha2_report_full.tsv", + "type": "nmdc:DataObject" + }, + { + "id": "nmdc:dobj-11-2h0mb261", + "name": "nmdc_wfrbt-11-k4yp6b12.1_gottcha2_krona.html", + "description": "Gottcha2 Krona HTML report for nmdc:omprc-11-hwadfm25", + "file_size_bytes": 226638, + "md5_checksum": "425873a08e598b0ca2987ff7b9b5da1f", + "data_object_type": "GOTTCHA2 Krona Plot", + "url": "https://data.microbiomedata.org/data/nmdc:omprc-11-hwadfm25/nmdc:wfrbt-11-k4yp6b12.1/nmdc_wfrbt-11-k4yp6b12.1_gottcha2_krona.html", + "type": "nmdc:DataObject" + }, + { + "id": "nmdc:dobj-11-eadae650", + "name": "nmdc_wfrbt-11-k4yp6b12.1_centrifuge_classification.tsv", + "description": "Centrifuge classification TSV report for nmdc:omprc-11-hwadfm25", + "file_size_bytes": 610862986, + "md5_checksum": "b0f2449065b52935ddba8abd6ae6bc88", + "data_object_type": "Centrifuge Taxonomic Classification", + "url": "https://data.microbiomedata.org/data/nmdc:omprc-11-hwadfm25/nmdc:wfrbt-11-k4yp6b12.1/nmdc_wfrbt-11-k4yp6b12.1_centrifuge_classification.tsv", + "type": "nmdc:DataObject" + }, + { + "id": "nmdc:dobj-11-jz22dg76", + "name": "nmdc_wfrbt-11-k4yp6b12.1_centrifuge_report.tsv", + "description": "Centrifuge TSV report for nmdc:omprc-11-hwadfm25", + "file_size_bytes": 243322, + "md5_checksum": "9baa708296f62334e099cf61711b5e16", + "data_object_type": "Centrifuge Classification Report", + "url": "https://data.microbiomedata.org/data/nmdc:omprc-11-hwadfm25/nmdc:wfrbt-11-k4yp6b12.1/nmdc_wfrbt-11-k4yp6b12.1_centrifuge_report.tsv", + "type": "nmdc:DataObject" + }, + { + "id": "nmdc:dobj-11-psph9k09", + "name": "nmdc_wfrbt-11-k4yp6b12.1_centrifuge_krona.html", + "description": "Centrifuge Krona HTML report for nmdc:omprc-11-hwadfm25", + "file_size_bytes": 2294995, + "md5_checksum": "f2a43278b06876cae5d4e8cdef17cfe1", + "data_object_type": "Centrifuge Krona Plot", + "url": "https://data.microbiomedata.org/data/nmdc:omprc-11-hwadfm25/nmdc:wfrbt-11-k4yp6b12.1/nmdc_wfrbt-11-k4yp6b12.1_centrifuge_krona.html", + "type": "nmdc:DataObject" + }, + { + "id": "nmdc:dobj-11-9fejn459", + "name": "nmdc_wfrbt-11-k4yp6b12.1_kraken2_classification.tsv", + "description": "Kraken classification TSV report for nmdc:omprc-11-hwadfm25", + "file_size_bytes": 487178087, + "md5_checksum": "f1a811dbc523f9a27dbc004b8a66f0cb", + "data_object_type": "Kraken2 Taxonomic Classification", + "url": "https://data.microbiomedata.org/data/nmdc:omprc-11-hwadfm25/nmdc:wfrbt-11-k4yp6b12.1/nmdc_wfrbt-11-k4yp6b12.1_kraken2_classification.tsv", + "type": "nmdc:DataObject" + }, + { + "id": "nmdc:dobj-11-j0abn149", + "name": "nmdc_wfrbt-11-k4yp6b12.1_kraken2_report.tsv", + "description": "Kraken2 TSV report for nmdc:omprc-11-hwadfm25", + "file_size_bytes": 557688, + "md5_checksum": "8983fa1acb03f2905bbec3a6ccee2854", + "data_object_type": "Kraken2 Classification Report", + "url": "https://data.microbiomedata.org/data/nmdc:omprc-11-hwadfm25/nmdc:wfrbt-11-k4yp6b12.1/nmdc_wfrbt-11-k4yp6b12.1_kraken2_report.tsv", + "type": "nmdc:DataObject" + }, + { + "id": "nmdc:dobj-11-8eqa0t20", + "name": "nmdc_wfrbt-11-k4yp6b12.1_kraken2_krona.html", + "description": "Kraken2 Krona HTML report for nmdc:omprc-11-hwadfm25", + "file_size_bytes": 3567307, + "md5_checksum": "a07c6c5fb68d1a56e39d93e8745b96cb", + "data_object_type": "Kraken2 Krona Plot", + "url": "https://data.microbiomedata.org/data/nmdc:omprc-11-hwadfm25/nmdc:wfrbt-11-k4yp6b12.1/nmdc_wfrbt-11-k4yp6b12.1_kraken2_krona.html", + "type": "nmdc:DataObject" + } + ], + "metagenome_assembly_set": [ + { + "id": "nmdc:wfmgas-11-j9y9a119.1", + "name": "Metagenome Assembly Activity for nmdc:omprc-11-hwadfm25", + "started_at_time": "2021-10-11T02:26:47Z", + "ended_at_time": "2021-10-11T02:55:00+00:00", + "was_informed_by": "nmdc:omprc-11-hwadfm25", + "execution_resource": "NERSC-Cori", + "git_url": "https://github.com/microbiomedata/metaAssembly", + "has_input": [ + "nmdc:dobj-11-syhdkg43" + ], + "has_output": [ + "nmdc:dobj-11-s9xapm91", + "nmdc:dobj-11-mwvv6005", + "nmdc:dobj-11-anh2rz14", + "nmdc:dobj-11-2baz8p28", + "nmdc:dobj-11-eqak9v68" + ], + "type": "nmdc:MetagenomeAssembly", + "part_of": [ + "nmdc:omprc-11-hwadfm25" + ], + "version": "v1.0.2", + "asm_score": 3.712, + "scaffolds": 46121, + "scaf_logsum": 38062, + "scaf_powsum": 4172.955, + "scaf_max": 11252, + "scaf_bp": 20152643, + "scaf_n50": 13959, + "scaf_n90": 39626, + "scaf_l50": 395, + "scaf_l90": 285, + "contigs": 46135, + "contig_bp": 20152503, + "ctg_n50": 14034, + "ctg_l50": 394, + "ctg_n90": 39639, + "ctg_l90": 285, + "ctg_logsum": 37962, + "ctg_powsum": 4162.045, + "ctg_max": 11252, + "gap_pct": 0.00069, + "gc_std": 0.09328, + "gc_avg": 0.6086 + } + ], + "omics_processing_set": [ + { + "id": "nmdc:omprc-11-hwadfm25", + "name": "Riverbed sediment microbial communities from areas with vegetation nearby in Columbia River, Washington, USA - GW-RW S3_30_40", + "description": "Riverbed sediment samples were collected from an area with a lot of surrounding vegetation in a hyporheic zone (subsurface groundwater - surface water mixing zone)", + "has_input": [ + "nmdc:bsm-11-n80sx618" + ], + "add_date": "2016-01-11", + "gold_sequencing_project_identifiers": [ + "gold:Gp0127644" + ], + "has_output": [ + "nmdc:dobj-11-h1bew282" + ], + "mod_date": "2021-06-15", + "ncbi_project_name": "Riverbed sediment microbial communities from areas with vegetation nearby in Columbia River, Washington, USA - GW-RW S3_30_40", + "omics_type": { + "has_raw_value": "Metagenome" + }, + "part_of": [ + "nmdc:sty-11-aygzgv51" + ], + "principal_investigator": { + "has_raw_value": "James Stegen" + }, + "processing_institution": "JGI", + "type": "nmdc:OmicsProcessing" + } + ], + "read_qc_analysis_activity_set": [ + { + "id": "nmdc:wfrqc-11-vkqhnf31.1", + "name": "Read QC Activity for nmdc:omprc-11-hwadfm25", + "started_at_time": "2021-10-11T02:26:47Z", + "ended_at_time": "2021-10-11T02:55:00+00:00", + "was_informed_by": "nmdc:omprc-11-hwadfm25", + "execution_resource": "NERSC-Cori", + "git_url": "https://github.com/microbiomedata/ReadsQC", + "has_input": [ + "nmdc:dobj-11-h1bew282" + ], + "has_output": [ + "nmdc:dobj-11-syhdkg43", + "nmdc:dobj-11-84yy0z39" + ], + "type": "nmdc:ReadQcAnalysisActivity", + "part_of": [ + "nmdc:omprc-11-hwadfm25" + ], + "version": "v1.0.2", + "input_read_count": 11431762, + "output_read_count": 8322164, + "input_read_bases": 1726196062, + "output_read_bases": 1245433047 + } + ], + "read_based_taxonomy_analysis_activity_set": [ + { + "id": "nmdc:wfrbt-11-k4yp6b12.1", + "name": "Readbased Taxonomy Analysis Activity for nmdc:omprc-11-hwadfm25", + "started_at_time": "2021-10-11T02:26:47Z", + "ended_at_time": "2021-10-11T02:55:00+00:00", + "was_informed_by": "nmdc:omprc-11-hwadfm25", + "execution_resource": "NERSC-Cori", + "git_url": "https://github.com/microbiomedata/ReadbasedAnalysis", + "has_input": [ + "nmdc:dobj-11-syhdkg43" + ], + "has_output": [ + "nmdc:dobj-11-qa1ymp54", + "nmdc:dobj-11-7qrm7990", + "nmdc:dobj-11-2h0mb261", + "nmdc:dobj-11-eadae650", + "nmdc:dobj-11-jz22dg76", + "nmdc:dobj-11-psph9k09", + "nmdc:dobj-11-9fejn459", + "nmdc:dobj-11-j0abn149", + "nmdc:dobj-11-8eqa0t20" + ], + "type": "nmdc:ReadBasedTaxonomyAnalysisActivity", + "part_of": [ + "nmdc:omprc-11-hwadfm25" + ], + "version": "v1.0.2" + } + ] + }, + { + "data_object_set": [ + { + "id": "nmdc:dobj-11-5mt10622", + "name": "10533.1.165310.TGTGCGT-AACGCAC.fastq.gz", + "description": "Raw sequencer read data", + "file_size_bytes": 1941323184, + "data_object_type": "Metagenome Raw Reads", + "type": "nmdc:DataObject" + }, + { + "id": "nmdc:dobj-11-yq3t9s16", + "name": "nmdc_wfrqc-11-xapv5209.1_filtered.fastq.gz", + "description": "Filtered Reads for nmdc:omprc-11-vnnn4722", + "file_size_bytes": 1585232805, + "md5_checksum": "833077b40372c6daa20beaed04ed0ae1", + "data_object_type": "Filtered Sequencing Reads", + "url": "https://data.microbiomedata.org/data/nmdc:omprc-11-vnnn4722/nmdc:wfrqc-11-xapv5209.1/nmdc_wfrqc-11-xapv5209.1_filtered.fastq.gz", + "type": "nmdc:DataObject" + }, + { + "id": "nmdc:dobj-11-ptrp0e71", + "name": "nmdc_wfrqc-11-xapv5209.1_filterStats.txt", + "description": "Filtered Stats for nmdc:omprc-11-vnnn4722", + "file_size_bytes": 289, + "md5_checksum": "b68178eebde030fad0850797adbb2624", + "data_object_type": "QC Statistics", + "url": "https://data.microbiomedata.org/data/nmdc:omprc-11-vnnn4722/nmdc:wfrqc-11-xapv5209.1/nmdc_wfrqc-11-xapv5209.1_filterStats.txt", + "type": "nmdc:DataObject" + }, + { + "id": "nmdc:dobj-11-74c2wx42", + "name": "nmdc_wfmgas-11-v4psmj08.1_contigs.fna", + "description": "Assembled contigs fasta for nmdc:omprc-11-vnnn4722", + "file_size_bytes": 121772830, + "md5_checksum": "9b3a9a3bd7537ab67388cfaa0f6f6772", + "data_object_type": "Assembly Contigs", + "url": "https://data.microbiomedata.org/data/nmdc:omprc-11-vnnn4722/nmdc:wfmgas-11-v4psmj08.1/nmdc_wfmgas-11-v4psmj08.1_contigs.fna", + "type": "nmdc:DataObject" + }, + { + "id": "nmdc:dobj-11-d5pzm540", + "name": "nmdc_wfmgas-11-v4psmj08.1_scaffolds.fna", + "description": "Assembled scaffold fasta for nmdc:omprc-11-vnnn4722", + "file_size_bytes": 121131381, + "md5_checksum": "9703d4ff34dd48b20b2adb4d25645de4", + "data_object_type": "Assembly Scaffolds", + "url": "https://data.microbiomedata.org/data/nmdc:omprc-11-vnnn4722/nmdc:wfmgas-11-v4psmj08.1/nmdc_wfmgas-11-v4psmj08.1_scaffolds.fna", + "type": "nmdc:DataObject" + }, + { + "id": "nmdc:dobj-11-d1gpjd56", + "name": "nmdc_wfmgas-11-v4psmj08.1_covstats.txt", + "description": "Metagenome Contig Coverage Stats for nmdc:omprc-11-vnnn4722", + "file_size_bytes": 18148019, + "md5_checksum": "f9f1406eef98272d84e4d92e0661322b", + "data_object_type": "Assembly Coverage Stats", + "url": "https://data.microbiomedata.org/data/nmdc:omprc-11-vnnn4722/nmdc:wfmgas-11-v4psmj08.1/nmdc_wfmgas-11-v4psmj08.1_covstats.txt", + "type": "nmdc:DataObject" + }, + { + "id": "nmdc:dobj-11-w3e0hd73", + "name": "nmdc_wfmgas-11-v4psmj08.1_assembly.agp", + "description": "Assembled AGP file for nmdc:omprc-11-vnnn4722", + "file_size_bytes": 18321793, + "md5_checksum": "83b3607a11b344fb1ec2b47c6be2b9d8", + "data_object_type": "Assembly AGP", + "url": "https://data.microbiomedata.org/data/nmdc:omprc-11-vnnn4722/nmdc:wfmgas-11-v4psmj08.1/nmdc_wfmgas-11-v4psmj08.1_assembly.agp", + "type": "nmdc:DataObject" + }, + { + "id": "nmdc:dobj-11-x8f6dr04", + "name": "nmdc_wfmgas-11-v4psmj08.1_pairedMapped_sorted.bam", + "description": "Metagenome Alignment BAM file for nmdc:omprc-11-vnnn4722", + "file_size_bytes": 0, + "md5_checksum": "d41d8cd98f00b204e9800998ecf8427e", + "data_object_type": "Assembly Coverage BAM", + "url": "https://data.microbiomedata.org/data/nmdc:omprc-11-vnnn4722/nmdc:wfmgas-11-v4psmj08.1/nmdc_wfmgas-11-v4psmj08.1_pairedMapped_sorted.bam", + "type": "nmdc:DataObject" + }, + { + "id": "nmdc:dobj-11-we9gx944", + "name": "nmdc_wfrbt-11-ae5b1h90.1_gottcha2_report.tsv", + "description": "Gottcha2 TSV report for nmdc:omprc-11-vnnn4722", + "file_size_bytes": 648, + "md5_checksum": "514172bb91ef3b125ae2d001b47bff0b", + "data_object_type": "GOTTCHA2 Classification Report", + "url": "https://data.microbiomedata.org/data/nmdc:omprc-11-vnnn4722/nmdc:wfrbt-11-ae5b1h90.1/nmdc_wfrbt-11-ae5b1h90.1_gottcha2_report.tsv", + "type": "nmdc:DataObject" + }, + { + "id": "nmdc:dobj-11-zqpttq04", + "name": "nmdc_wfrbt-11-ae5b1h90.1_gottcha2_report_full.tsv", + "description": "Gottcha2 full TSV report for nmdc:omprc-11-vnnn4722", + "file_size_bytes": 588644, + "md5_checksum": "82f072d1931154fbc722531d3d0dc41c", + "data_object_type": "GOTTCHA2 Report Full", + "url": "https://data.microbiomedata.org/data/nmdc:omprc-11-vnnn4722/nmdc:wfrbt-11-ae5b1h90.1/nmdc_wfrbt-11-ae5b1h90.1_gottcha2_report_full.tsv", + "type": "nmdc:DataObject" + }, + { + "id": "nmdc:dobj-11-3ppjkh94", + "name": "nmdc_wfrbt-11-ae5b1h90.1_gottcha2_krona.html", + "description": "Gottcha2 Krona HTML report for nmdc:omprc-11-vnnn4722", + "file_size_bytes": 228175, + "md5_checksum": "62a817ebcbfaf2c8feb1abedc35a736f", + "data_object_type": "GOTTCHA2 Krona Plot", + "url": "https://data.microbiomedata.org/data/nmdc:omprc-11-vnnn4722/nmdc:wfrbt-11-ae5b1h90.1/nmdc_wfrbt-11-ae5b1h90.1_gottcha2_krona.html", + "type": "nmdc:DataObject" + }, + { + "id": "nmdc:dobj-11-jy4x2133", + "name": "nmdc_wfrbt-11-ae5b1h90.1_centrifuge_classification.tsv", + "description": "Centrifuge classification TSV report for nmdc:omprc-11-vnnn4722", + "file_size_bytes": 1468498728, + "md5_checksum": "81281fef2c0778516a84b3a672cc0230", + "data_object_type": "Centrifuge Taxonomic Classification", + "url": "https://data.microbiomedata.org/data/nmdc:omprc-11-vnnn4722/nmdc:wfrbt-11-ae5b1h90.1/nmdc_wfrbt-11-ae5b1h90.1_centrifuge_classification.tsv", + "type": "nmdc:DataObject" + }, + { + "id": "nmdc:dobj-11-5ks51s58", + "name": "nmdc_wfrbt-11-ae5b1h90.1_centrifuge_report.tsv", + "description": "Centrifuge TSV report for nmdc:omprc-11-vnnn4722", + "file_size_bytes": 251338, + "md5_checksum": "86ae054ba9def1126579c8f76db8a07a", + "data_object_type": "Centrifuge Classification Report", + "url": "https://data.microbiomedata.org/data/nmdc:omprc-11-vnnn4722/nmdc:wfrbt-11-ae5b1h90.1/nmdc_wfrbt-11-ae5b1h90.1_centrifuge_report.tsv", + "type": "nmdc:DataObject" + }, + { + "id": "nmdc:dobj-11-3xc8xn76", + "name": "nmdc_wfrbt-11-ae5b1h90.1_centrifuge_krona.html", + "description": "Centrifuge Krona HTML report for nmdc:omprc-11-vnnn4722", + "file_size_bytes": 2322720, + "md5_checksum": "9db20a88fa3d02eb00f64d1671ef8521", + "data_object_type": "Centrifuge Krona Plot", + "url": "https://data.microbiomedata.org/data/nmdc:omprc-11-vnnn4722/nmdc:wfrbt-11-ae5b1h90.1/nmdc_wfrbt-11-ae5b1h90.1_centrifuge_krona.html", + "type": "nmdc:DataObject" + }, + { + "id": "nmdc:dobj-11-z8tgbg79", + "name": "nmdc_wfrbt-11-ae5b1h90.1_kraken2_classification.tsv", + "description": "Kraken classification TSV report for nmdc:omprc-11-vnnn4722", + "file_size_bytes": 1168015909, + "md5_checksum": "848fc10ed4365047cb139a4b40303808", + "data_object_type": "Kraken2 Taxonomic Classification", + "url": "https://data.microbiomedata.org/data/nmdc:omprc-11-vnnn4722/nmdc:wfrbt-11-ae5b1h90.1/nmdc_wfrbt-11-ae5b1h90.1_kraken2_classification.tsv", + "type": "nmdc:DataObject" + }, + { + "id": "nmdc:dobj-11-8bxk7n29", + "name": "nmdc_wfrbt-11-ae5b1h90.1_kraken2_report.tsv", + "description": "Kraken2 TSV report for nmdc:omprc-11-vnnn4722", + "file_size_bytes": 616202, + "md5_checksum": "94e422e0bae86c608fba1c3815e08e92", + "data_object_type": "Kraken2 Classification Report", + "url": "https://data.microbiomedata.org/data/nmdc:omprc-11-vnnn4722/nmdc:wfrbt-11-ae5b1h90.1/nmdc_wfrbt-11-ae5b1h90.1_kraken2_report.tsv", + "type": "nmdc:DataObject" + }, + { + "id": "nmdc:dobj-11-ad375m61", + "name": "nmdc_wfrbt-11-ae5b1h90.1_kraken2_krona.html", + "description": "Kraken2 Krona HTML report for nmdc:omprc-11-vnnn4722", + "file_size_bytes": 3863456, + "md5_checksum": "c6eb85143a2489921c53f8184d536129", + "data_object_type": "Kraken2 Krona Plot", + "url": "https://data.microbiomedata.org/data/nmdc:omprc-11-vnnn4722/nmdc:wfrbt-11-ae5b1h90.1/nmdc_wfrbt-11-ae5b1h90.1_kraken2_krona.html", + "type": "nmdc:DataObject" + } + ], + "metagenome_assembly_set": [ + { + "id": "nmdc:wfmgas-11-v4psmj08.1", + "name": "Metagenome Assembly Activity for nmdc:omprc-11-vnnn4722", + "started_at_time": "2021-10-11T02:27:08Z", + "ended_at_time": "2021-10-11T03:27:12+00:00", + "was_informed_by": "nmdc:omprc-11-vnnn4722", + "execution_resource": "NERSC-Cori", + "git_url": "https://github.com/microbiomedata/metaAssembly", + "has_input": [ + "nmdc:dobj-11-yq3t9s16" + ], + "has_output": [ + "nmdc:dobj-11-74c2wx42", + "nmdc:dobj-11-d5pzm540", + "nmdc:dobj-11-d1gpjd56", + "nmdc:dobj-11-w3e0hd73", + "nmdc:dobj-11-x8f6dr04" + ], + "type": "nmdc:MetagenomeAssembly", + "part_of": [ + "nmdc:omprc-11-vnnn4722" + ], + "version": "v1.0.2", + "asm_score": 3.397, + "scaffolds": 212379, + "scaf_logsum": 318786, + "scaf_powsum": 34485, + "scaf_max": 19860, + "scaf_bp": 112055193, + "scaf_n50": 55395, + "scaf_n90": 173826, + "scaf_l50": 539, + "scaf_l90": 298, + "contigs": 212560, + "contig_bp": 112053293, + "ctg_n50": 55584, + "ctg_l50": 538, + "ctg_n90": 173977, + "ctg_l90": 298, + "ctg_logsum": 317684, + "ctg_powsum": 34356, + "ctg_max": 19860, + "gap_pct": 0.0017, + "gc_std": 0.09375, + "gc_avg": 0.63186 + } + ], + "omics_processing_set": [ + { + "id": "nmdc:omprc-11-vnnn4722", + "name": "Riverbed sediment microbial communities from areas with no vegetation in Columbia River, Washington, USA - GW-RW N1_30_40", + "description": "Riverbed sediment samples were collected from an area with no vegetation in a hyporheic zone (subsurface groundwater - surface water mixing zone)", + "has_input": [ + "nmdc:bsm-11-tzp60785" + ], + "add_date": "2016-01-11", + "gold_sequencing_project_identifiers": [ + "gold:Gp0127639" + ], + "has_output": [ + "nmdc:dobj-11-5mt10622" + ], + "mod_date": "2021-06-15", + "ncbi_project_name": "Riverbed sediment microbial communities from areas with no vegetation in Columbia River, Washington, USA - GW-RW N1_30_40", + "omics_type": { + "has_raw_value": "Metagenome" + }, + "part_of": [ + "nmdc:sty-11-aygzgv51" + ], + "principal_investigator": { + "has_raw_value": "James Stegen" + }, + "processing_institution": "JGI", + "type": "nmdc:OmicsProcessing" + } + ], + "read_qc_analysis_activity_set": [ + { + "id": "nmdc:wfrqc-11-xapv5209.1", + "name": "Read QC Activity for nmdc:omprc-11-vnnn4722", + "started_at_time": "2021-10-11T02:27:08Z", + "ended_at_time": "2021-10-11T03:27:12+00:00", + "was_informed_by": "nmdc:omprc-11-vnnn4722", + "execution_resource": "NERSC-Cori", + "git_url": "https://github.com/microbiomedata/ReadsQC", + "has_input": [ + "nmdc:dobj-11-5mt10622" + ], + "has_output": [ + "nmdc:dobj-11-yq3t9s16", + "nmdc:dobj-11-ptrp0e71" + ], + "type": "nmdc:ReadQcAnalysisActivity", + "part_of": [ + "nmdc:omprc-11-vnnn4722" + ], + "version": "v1.0.2", + "input_read_count": 23535784, + "output_read_count": 20011156, + "input_read_bases": 3553903384, + "output_read_bases": 2989527376 + } + ], + "read_based_taxonomy_analysis_activity_set": [ + { + "id": "nmdc:wfrbt-11-ae5b1h90.1", + "name": "Readbased Taxonomy Analysis Activity for nmdc:omprc-11-vnnn4722", + "started_at_time": "2021-10-11T02:27:08Z", + "ended_at_time": "2021-10-11T03:27:12+00:00", + "was_informed_by": "nmdc:omprc-11-vnnn4722", + "execution_resource": "NERSC-Cori", + "git_url": "https://github.com/microbiomedata/ReadbasedAnalysis", + "has_input": [ + "nmdc:dobj-11-yq3t9s16" + ], + "has_output": [ + "nmdc:dobj-11-we9gx944", + "nmdc:dobj-11-zqpttq04", + "nmdc:dobj-11-3ppjkh94", + "nmdc:dobj-11-jy4x2133", + "nmdc:dobj-11-5ks51s58", + "nmdc:dobj-11-3xc8xn76", + "nmdc:dobj-11-z8tgbg79", + "nmdc:dobj-11-8bxk7n29", + "nmdc:dobj-11-ad375m61" + ], + "type": "nmdc:ReadBasedTaxonomyAnalysisActivity", + "part_of": [ + "nmdc:omprc-11-vnnn4722" + ], + "version": "v1.0.2" + } + ] + }, + { + "data_object_set": [ + { + "id": "nmdc:dobj-11-ggn5ha23", + "name": "10533.3.165334.CGCTTAA-GTTAAGC.fastq.gz", + "description": "Raw sequencer read data", + "file_size_bytes": 2500707412, + "data_object_type": "Metagenome Raw Reads", + "type": "nmdc:DataObject" + }, + { + "id": "nmdc:dobj-11-hwn46y98", + "name": "nmdc_wfrqc-11-t40vb863.1_filtered.fastq.gz", + "description": "Filtered Reads for nmdc:omprc-11-p21wp875", + "file_size_bytes": 2304174057, + "md5_checksum": "603166d1e0da357d356a2029215d76ea", + "data_object_type": "Filtered Sequencing Reads", + "url": "https://data.microbiomedata.org/data/nmdc:omprc-11-p21wp875/nmdc:wfrqc-11-t40vb863.1/nmdc_wfrqc-11-t40vb863.1_filtered.fastq.gz", + "type": "nmdc:DataObject" + }, + { + "id": "nmdc:dobj-11-kz2cxv94", + "name": "nmdc_wfrqc-11-t40vb863.1_filterStats.txt", + "description": "Filtered Stats for nmdc:omprc-11-p21wp875", + "file_size_bytes": 284, + "md5_checksum": "639d9630c859c9b2f6f7a2eff1e1a863", + "data_object_type": "QC Statistics", + "url": "https://data.microbiomedata.org/data/nmdc:omprc-11-p21wp875/nmdc:wfrqc-11-t40vb863.1/nmdc_wfrqc-11-t40vb863.1_filterStats.txt", + "type": "nmdc:DataObject" + }, + { + "id": "nmdc:dobj-11-w37arb28", + "name": "nmdc_wfmgas-11-jvpxz703.1_contigs.fna", + "description": "Assembled contigs fasta for nmdc:omprc-11-p21wp875", + "file_size_bytes": 44994026, + "md5_checksum": "297106236ee1553ef5ffd0634932bd4d", + "data_object_type": "Assembly Contigs", + "url": "https://data.microbiomedata.org/data/nmdc:omprc-11-p21wp875/nmdc:wfmgas-11-jvpxz703.1/nmdc_wfmgas-11-jvpxz703.1_contigs.fna", + "type": "nmdc:DataObject" + }, + { + "id": "nmdc:dobj-11-af34p782", + "name": "nmdc_wfmgas-11-jvpxz703.1_scaffolds.fna", + "description": "Assembled scaffold fasta for nmdc:omprc-11-p21wp875", + "file_size_bytes": 44684048, + "md5_checksum": "a031dd4e85f2096f84b1bd82e6aaf69f", + "data_object_type": "Assembly Scaffolds", + "url": "https://data.microbiomedata.org/data/nmdc:omprc-11-p21wp875/nmdc:wfmgas-11-jvpxz703.1/nmdc_wfmgas-11-jvpxz703.1_scaffolds.fna", + "type": "nmdc:DataObject" + }, + { + "id": "nmdc:dobj-11-w2v0nk03", + "name": "nmdc_wfmgas-11-jvpxz703.1_covstats.txt", + "description": "Metagenome Contig Coverage Stats for nmdc:omprc-11-p21wp875", + "file_size_bytes": 8709651, + "md5_checksum": "bd4310feb4fe56e55b735dbd2aed5ddb", + "data_object_type": "Assembly Coverage Stats", + "url": "https://data.microbiomedata.org/data/nmdc:omprc-11-p21wp875/nmdc:wfmgas-11-jvpxz703.1/nmdc_wfmgas-11-jvpxz703.1_covstats.txt", + "type": "nmdc:DataObject" + }, + { + "id": "nmdc:dobj-11-kxcxmt44", + "name": "nmdc_wfmgas-11-jvpxz703.1_assembly.agp", + "description": "Assembled AGP file for nmdc:omprc-11-p21wp875", + "file_size_bytes": 8762839, + "md5_checksum": "c7aeb4e419d02db55ec0bfb78b7fe936", + "data_object_type": "Assembly AGP", + "url": "https://data.microbiomedata.org/data/nmdc:omprc-11-p21wp875/nmdc:wfmgas-11-jvpxz703.1/nmdc_wfmgas-11-jvpxz703.1_assembly.agp", + "type": "nmdc:DataObject" + }, + { + "id": "nmdc:dobj-11-j06tc889", + "name": "nmdc_wfmgas-11-jvpxz703.1_pairedMapped_sorted.bam", + "description": "Metagenome Alignment BAM file for nmdc:omprc-11-p21wp875", + "file_size_bytes": 0, + "md5_checksum": "d41d8cd98f00b204e9800998ecf8427e", + "data_object_type": "Assembly Coverage BAM", + "url": "https://data.microbiomedata.org/data/nmdc:omprc-11-p21wp875/nmdc:wfmgas-11-jvpxz703.1/nmdc_wfmgas-11-jvpxz703.1_pairedMapped_sorted.bam", + "type": "nmdc:DataObject" + }, + { + "id": "nmdc:dobj-11-efd29e63", + "name": "nmdc_wfrbt-11-bmctra17.1_gottcha2_report.tsv", + "description": "Gottcha2 TSV report for nmdc:omprc-11-p21wp875", + "file_size_bytes": 5303, + "md5_checksum": "bc7f6a9435c3a9aaca7ce9efe9d16e41", + "data_object_type": "GOTTCHA2 Classification Report", + "url": "https://data.microbiomedata.org/data/nmdc:omprc-11-p21wp875/nmdc:wfrbt-11-bmctra17.1/nmdc_wfrbt-11-bmctra17.1_gottcha2_report.tsv", + "type": "nmdc:DataObject" + }, + { + "id": "nmdc:dobj-11-727dwd14", + "name": "nmdc_wfrbt-11-bmctra17.1_gottcha2_report_full.tsv", + "description": "Gottcha2 full TSV report for nmdc:omprc-11-p21wp875", + "file_size_bytes": 948120, + "md5_checksum": "0a079e34648ce23b0837dff31e2be5df", + "data_object_type": "GOTTCHA2 Report Full", + "url": "https://data.microbiomedata.org/data/nmdc:omprc-11-p21wp875/nmdc:wfrbt-11-bmctra17.1/nmdc_wfrbt-11-bmctra17.1_gottcha2_report_full.tsv", + "type": "nmdc:DataObject" + }, + { + "id": "nmdc:dobj-11-zze6et28", + "name": "nmdc_wfrbt-11-bmctra17.1_gottcha2_krona.html", + "description": "Gottcha2 Krona HTML report for nmdc:omprc-11-p21wp875", + "file_size_bytes": 241990, + "md5_checksum": "f19bf1723f0f0e9f2158b137d2618b08", + "data_object_type": "GOTTCHA2 Krona Plot", + "url": "https://data.microbiomedata.org/data/nmdc:omprc-11-p21wp875/nmdc:wfrbt-11-bmctra17.1/nmdc_wfrbt-11-bmctra17.1_gottcha2_krona.html", + "type": "nmdc:DataObject" + }, + { + "id": "nmdc:dobj-11-5ahjmc68", + "name": "nmdc_wfrbt-11-bmctra17.1_centrifuge_classification.tsv", + "description": "Centrifuge classification TSV report for nmdc:omprc-11-p21wp875", + "file_size_bytes": 2023464022, + "md5_checksum": "81fc62d01a53a7ab5037829a158f0b64", + "data_object_type": "Centrifuge Taxonomic Classification", + "url": "https://data.microbiomedata.org/data/nmdc:omprc-11-p21wp875/nmdc:wfrbt-11-bmctra17.1/nmdc_wfrbt-11-bmctra17.1_centrifuge_classification.tsv", + "type": "nmdc:DataObject" + }, + { + "id": "nmdc:dobj-11-7bm8jr60", + "name": "nmdc_wfrbt-11-bmctra17.1_centrifuge_report.tsv", + "description": "Centrifuge TSV report for nmdc:omprc-11-p21wp875", + "file_size_bytes": 257700, + "md5_checksum": "05cc05eefdcb0d7bac19031619244a4b", + "data_object_type": "Centrifuge Classification Report", + "url": "https://data.microbiomedata.org/data/nmdc:omprc-11-p21wp875/nmdc:wfrbt-11-bmctra17.1/nmdc_wfrbt-11-bmctra17.1_centrifuge_report.tsv", + "type": "nmdc:DataObject" + }, + { + "id": "nmdc:dobj-11-7tkv6b80", + "name": "nmdc_wfrbt-11-bmctra17.1_centrifuge_krona.html", + "description": "Centrifuge Krona HTML report for nmdc:omprc-11-p21wp875", + "file_size_bytes": 2339227, + "md5_checksum": "bb92f0d18280f32aacf482a43a841372", + "data_object_type": "Centrifuge Krona Plot", + "url": "https://data.microbiomedata.org/data/nmdc:omprc-11-p21wp875/nmdc:wfrbt-11-bmctra17.1/nmdc_wfrbt-11-bmctra17.1_centrifuge_krona.html", + "type": "nmdc:DataObject" + }, + { + "id": "nmdc:dobj-11-pwm0vx67", + "name": "nmdc_wfrbt-11-bmctra17.1_kraken2_classification.tsv", + "description": "Kraken classification TSV report for nmdc:omprc-11-p21wp875", + "file_size_bytes": 1630988221, + "md5_checksum": "2fddd33160498548fa73e95dfc304d1a", + "data_object_type": "Kraken2 Taxonomic Classification", + "url": "https://data.microbiomedata.org/data/nmdc:omprc-11-p21wp875/nmdc:wfrbt-11-bmctra17.1/nmdc_wfrbt-11-bmctra17.1_kraken2_classification.tsv", + "type": "nmdc:DataObject" + }, + { + "id": "nmdc:dobj-11-d4610z76", + "name": "nmdc_wfrbt-11-bmctra17.1_kraken2_report.tsv", + "description": "Kraken2 TSV report for nmdc:omprc-11-p21wp875", + "file_size_bytes": 659136, + "md5_checksum": "272e3daee292c6e284026ee95b72d290", + "data_object_type": "Kraken2 Classification Report", + "url": "https://data.microbiomedata.org/data/nmdc:omprc-11-p21wp875/nmdc:wfrbt-11-bmctra17.1/nmdc_wfrbt-11-bmctra17.1_kraken2_report.tsv", + "type": "nmdc:DataObject" + }, + { + "id": "nmdc:dobj-11-ffk2ex62", + "name": "nmdc_wfrbt-11-bmctra17.1_kraken2_krona.html", + "description": "Kraken2 Krona HTML report for nmdc:omprc-11-p21wp875", + "file_size_bytes": 4013188, + "md5_checksum": "bca8c2988929e7c176ec7b6609445db2", + "data_object_type": "Kraken2 Krona Plot", + "url": "https://data.microbiomedata.org/data/nmdc:omprc-11-p21wp875/nmdc:wfrbt-11-bmctra17.1/nmdc_wfrbt-11-bmctra17.1_kraken2_krona.html", + "type": "nmdc:DataObject" + } + ], + "metagenome_assembly_set": [ + { + "id": "nmdc:wfmgas-11-jvpxz703.1", + "name": "Metagenome Assembly Activity for nmdc:omprc-11-p21wp875", + "started_at_time": "2021-12-01T21:30:33Z", + "ended_at_time": "2021-12-02T20:50:24+00:00", + "was_informed_by": "nmdc:omprc-11-p21wp875", + "execution_resource": "NERSC-Cori", + "git_url": "https://github.com/microbiomedata/metaAssembly", + "has_input": [ + "nmdc:dobj-11-hwn46y98" + ], + "has_output": [ + "nmdc:dobj-11-w37arb28", + "nmdc:dobj-11-af34p782", + "nmdc:dobj-11-w2v0nk03", + "nmdc:dobj-11-kxcxmt44", + "nmdc:dobj-11-j06tc889" + ], + "type": "nmdc:MetagenomeAssembly", + "part_of": [ + "nmdc:omprc-11-p21wp875" + ], + "version": "v1.0.2", + "asm_score": 7.947, + "scaffolds": 103181, + "scaf_logsum": 50816, + "scaf_powsum": 5993.216, + "scaf_max": 27286, + "scaf_bp": 40567599, + "scaf_n50": 35472, + "scaf_n90": 88751, + "scaf_l50": 348, + "scaf_l90": 283, + "contigs": 103206, + "contig_bp": 40567169, + "ctg_n50": 35487, + "ctg_l50": 348, + "ctg_n90": 88775, + "ctg_l90": 283, + "ctg_logsum": 50653, + "ctg_powsum": 5974.26, + "ctg_max": 27286, + "gap_pct": 0.00106, + "gc_std": 0.1028, + "gc_avg": 0.60377 + } + ], + "omics_processing_set": [ + { + "id": "nmdc:omprc-11-p21wp875", + "name": "Riverbed sediment microbial communities from areas with no vegetation in Columbia River, Washington, USA - GW-RW N2_0_30", + "description": "Riverbed sediment samples were collected from an area with no vegetation in a hyporheic zone (subsurface groundwater - surface water mixing zone)", + "has_input": [ + "nmdc:bsm-11-qpve9v25" + ], + "add_date": "2016-01-11", + "gold_sequencing_project_identifiers": [ + "gold:Gp0127642" + ], + "has_output": [ + "nmdc:dobj-11-ggn5ha23" + ], + "mod_date": "2021-06-15", + "ncbi_project_name": "Riverbed sediment microbial communities from areas with no vegetation in Columbia River, Washington, USA - GW-RW N2_0_30", + "omics_type": { + "has_raw_value": "Metagenome" + }, + "part_of": [ + "nmdc:sty-11-aygzgv51" + ], + "principal_investigator": { + "has_raw_value": "James Stegen" + }, + "processing_institution": "JGI", + "type": "nmdc:OmicsProcessing" + } + ], + "read_qc_analysis_activity_set": [ + { + "id": "nmdc:wfrqc-11-t40vb863.1", + "name": "Read QC Activity for nmdc:omprc-11-p21wp875", + "started_at_time": "2021-12-01T21:30:33Z", + "ended_at_time": "2021-12-02T20:50:24+00:00", + "was_informed_by": "nmdc:omprc-11-p21wp875", + "execution_resource": "NERSC-Cori", + "git_url": "https://github.com/microbiomedata/ReadsQC", + "has_input": [ + "nmdc:dobj-11-ggn5ha23" + ], + "has_output": [ + "nmdc:dobj-11-hwn46y98", + "nmdc:dobj-11-kz2cxv94" + ], + "type": "nmdc:ReadQcAnalysisActivity", + "part_of": [ + "nmdc:omprc-11-p21wp875" + ], + "version": "v1.0.2", + "input_read_count": 28024960, + "output_read_count": 27378404, + "input_read_bases": 4231768960, + "output_read_bases": 4095196321 + } + ], + "read_based_taxonomy_analysis_activity_set": [ + { + "id": "nmdc:wfrbt-11-bmctra17.1", + "name": "Readbased Taxonomy Analysis Activity for nmdc:omprc-11-p21wp875", + "started_at_time": "2021-12-01T21:30:33Z", + "ended_at_time": "2021-12-02T20:50:24+00:00", + "was_informed_by": "nmdc:omprc-11-p21wp875", + "execution_resource": "NERSC-Cori", + "git_url": "https://github.com/microbiomedata/ReadbasedAnalysis", + "has_input": [ + "nmdc:dobj-11-hwn46y98" + ], + "has_output": [ + "nmdc:dobj-11-efd29e63", + "nmdc:dobj-11-727dwd14", + "nmdc:dobj-11-zze6et28", + "nmdc:dobj-11-5ahjmc68", + "nmdc:dobj-11-7bm8jr60", + "nmdc:dobj-11-7tkv6b80", + "nmdc:dobj-11-pwm0vx67", + "nmdc:dobj-11-d4610z76", + "nmdc:dobj-11-ffk2ex62" + ], + "type": "nmdc:ReadBasedTaxonomyAnalysisActivity", + "part_of": [ + "nmdc:omprc-11-p21wp875" + ], + "version": "v1.0.2" + } + ] + }, + { + "data_object_set": [ + { + "id": "nmdc:dobj-11-1jxf0m20", + "name": "10533.2.165322.TTCGTAC-GGTACGA.fastq.gz", + "description": "Raw sequencer read data", + "file_size_bytes": 2463257736, + "data_object_type": "Metagenome Raw Reads", + "type": "nmdc:DataObject" + }, + { + "id": "nmdc:dobj-11-z7c4m372", + "name": "nmdc_wfrqc-11-rsxb7a38.1_filtered.fastq.gz", + "description": "Filtered Reads for nmdc:omprc-11-vs67yj43", + "file_size_bytes": 2209739723, + "md5_checksum": "208a3777ef0b99408f0d5832dee576e0", + "data_object_type": "Filtered Sequencing Reads", + "url": "https://data.microbiomedata.org/data/nmdc:omprc-11-vs67yj43/nmdc:wfrqc-11-rsxb7a38.1/nmdc_wfrqc-11-rsxb7a38.1_filtered.fastq.gz", + "type": "nmdc:DataObject" + }, + { + "id": "nmdc:dobj-11-hw7ebe06", + "name": "nmdc_wfrqc-11-rsxb7a38.1_filterStats.txt", + "description": "Filtered Stats for nmdc:omprc-11-vs67yj43", + "file_size_bytes": 291, + "md5_checksum": "8533a56006bdc1841b6fc16e99b6a84a", + "data_object_type": "QC Statistics", + "url": "https://data.microbiomedata.org/data/nmdc:omprc-11-vs67yj43/nmdc:wfrqc-11-rsxb7a38.1/nmdc_wfrqc-11-rsxb7a38.1_filterStats.txt", + "type": "nmdc:DataObject" + }, + { + "id": "nmdc:dobj-11-jy0vtn08", + "name": "nmdc_wfmgas-11-6f3cw050.1_contigs.fna", + "description": "Assembled contigs fasta for nmdc:omprc-11-vs67yj43", + "file_size_bytes": 33560582, + "md5_checksum": "a489f411887fb891fdff72b5b1a260f4", + "data_object_type": "Assembly Contigs", + "url": "https://data.microbiomedata.org/data/nmdc:omprc-11-vs67yj43/nmdc:wfmgas-11-6f3cw050.1/nmdc_wfmgas-11-6f3cw050.1_contigs.fna", + "type": "nmdc:DataObject" + }, + { + "id": "nmdc:dobj-11-3ne16y54", + "name": "nmdc_wfmgas-11-6f3cw050.1_scaffolds.fna", + "description": "Assembled scaffold fasta for nmdc:omprc-11-vs67yj43", + "file_size_bytes": 33315354, + "md5_checksum": "96213980572bdd6b88c71462966a37cd", + "data_object_type": "Assembly Scaffolds", + "url": "https://data.microbiomedata.org/data/nmdc:omprc-11-vs67yj43/nmdc:wfmgas-11-6f3cw050.1/nmdc_wfmgas-11-6f3cw050.1_scaffolds.fna", + "type": "nmdc:DataObject" + }, + { + "id": "nmdc:dobj-11-wxpant31", + "name": "nmdc_wfmgas-11-6f3cw050.1_covstats.txt", + "description": "Metagenome Contig Coverage Stats for nmdc:omprc-11-vs67yj43", + "file_size_bytes": 6883590, + "md5_checksum": "cd07ae3997f9194633927722a2d9d627", + "data_object_type": "Assembly Coverage Stats", + "url": "https://data.microbiomedata.org/data/nmdc:omprc-11-vs67yj43/nmdc:wfmgas-11-6f3cw050.1/nmdc_wfmgas-11-6f3cw050.1_covstats.txt", + "type": "nmdc:DataObject" + }, + { + "id": "nmdc:dobj-11-vjgnnx48", + "name": "nmdc_wfmgas-11-6f3cw050.1_assembly.agp", + "description": "Assembled AGP file for nmdc:omprc-11-vs67yj43", + "file_size_bytes": 6922551, + "md5_checksum": "05633bd75cf211314c8030f4a715aee0", + "data_object_type": "Assembly AGP", + "url": "https://data.microbiomedata.org/data/nmdc:omprc-11-vs67yj43/nmdc:wfmgas-11-6f3cw050.1/nmdc_wfmgas-11-6f3cw050.1_assembly.agp", + "type": "nmdc:DataObject" + }, + { + "id": "nmdc:dobj-11-7wh6kw02", + "name": "nmdc_wfmgas-11-6f3cw050.1_pairedMapped_sorted.bam", + "description": "Metagenome Alignment BAM file for nmdc:omprc-11-vs67yj43", + "file_size_bytes": 0, + "md5_checksum": "d41d8cd98f00b204e9800998ecf8427e", + "data_object_type": "Assembly Coverage BAM", + "url": "https://data.microbiomedata.org/data/nmdc:omprc-11-vs67yj43/nmdc:wfmgas-11-6f3cw050.1/nmdc_wfmgas-11-6f3cw050.1_pairedMapped_sorted.bam", + "type": "nmdc:DataObject" + }, + { + "id": "nmdc:dobj-11-grnqkh48", + "name": "nmdc_wfrbt-11-t793q539.1_gottcha2_report.tsv", + "description": "Gottcha2 TSV report for nmdc:omprc-11-vs67yj43", + "file_size_bytes": 4650, + "md5_checksum": "3e0598df41941463bac0fdec5df29f55", + "data_object_type": "GOTTCHA2 Classification Report", + "url": "https://data.microbiomedata.org/data/nmdc:omprc-11-vs67yj43/nmdc:wfrbt-11-t793q539.1/nmdc_wfrbt-11-t793q539.1_gottcha2_report.tsv", + "type": "nmdc:DataObject" + }, + { + "id": "nmdc:dobj-11-jpjrzp60", + "name": "nmdc_wfrbt-11-t793q539.1_gottcha2_report_full.tsv", + "description": "Gottcha2 full TSV report for nmdc:omprc-11-vs67yj43", + "file_size_bytes": 877659, + "md5_checksum": "1a625b148d8f6d9fe9aeab6cfb67df6c", + "data_object_type": "GOTTCHA2 Report Full", + "url": "https://data.microbiomedata.org/data/nmdc:omprc-11-vs67yj43/nmdc:wfrbt-11-t793q539.1/nmdc_wfrbt-11-t793q539.1_gottcha2_report_full.tsv", + "type": "nmdc:DataObject" + }, + { + "id": "nmdc:dobj-11-90bd5t22", + "name": "nmdc_wfrbt-11-t793q539.1_gottcha2_krona.html", + "description": "Gottcha2 Krona HTML report for nmdc:omprc-11-vs67yj43", + "file_size_bytes": 236676, + "md5_checksum": "bc8e157195d042d7207d67b4982fea96", + "data_object_type": "GOTTCHA2 Krona Plot", + "url": "https://data.microbiomedata.org/data/nmdc:omprc-11-vs67yj43/nmdc:wfrbt-11-t793q539.1/nmdc_wfrbt-11-t793q539.1_gottcha2_krona.html", + "type": "nmdc:DataObject" + }, + { + "id": "nmdc:dobj-11-4z6ef448", + "name": "nmdc_wfrbt-11-t793q539.1_centrifuge_classification.tsv", + "description": "Centrifuge classification TSV report for nmdc:omprc-11-vs67yj43", + "file_size_bytes": 1901493736, + "md5_checksum": "a8fc683bb9b3aba316cb605c5fb591ec", + "data_object_type": "Centrifuge Taxonomic Classification", + "url": "https://data.microbiomedata.org/data/nmdc:omprc-11-vs67yj43/nmdc:wfrbt-11-t793q539.1/nmdc_wfrbt-11-t793q539.1_centrifuge_classification.tsv", + "type": "nmdc:DataObject" + }, + { + "id": "nmdc:dobj-11-cyy9ee55", + "name": "nmdc_wfrbt-11-t793q539.1_centrifuge_report.tsv", + "description": "Centrifuge TSV report for nmdc:omprc-11-vs67yj43", + "file_size_bytes": 256274, + "md5_checksum": "b5fe0189dbf00662d78cc55b8b0cc803", + "data_object_type": "Centrifuge Classification Report", + "url": "https://data.microbiomedata.org/data/nmdc:omprc-11-vs67yj43/nmdc:wfrbt-11-t793q539.1/nmdc_wfrbt-11-t793q539.1_centrifuge_report.tsv", + "type": "nmdc:DataObject" + }, + { + "id": "nmdc:dobj-11-zbj40g71", + "name": "nmdc_wfrbt-11-t793q539.1_centrifuge_krona.html", + "description": "Centrifuge Krona HTML report for nmdc:omprc-11-vs67yj43", + "file_size_bytes": 2333722, + "md5_checksum": "cd10cca62774e66f60d60380ee18132e", + "data_object_type": "Centrifuge Krona Plot", + "url": "https://data.microbiomedata.org/data/nmdc:omprc-11-vs67yj43/nmdc:wfrbt-11-t793q539.1/nmdc_wfrbt-11-t793q539.1_centrifuge_krona.html", + "type": "nmdc:DataObject" + }, + { + "id": "nmdc:dobj-11-gg67md47", + "name": "nmdc_wfrbt-11-t793q539.1_kraken2_classification.tsv", + "description": "Kraken classification TSV report for nmdc:omprc-11-vs67yj43", + "file_size_bytes": 1534616616, + "md5_checksum": "b13ee2ee52d15c3669aecd2e913f2658", + "data_object_type": "Kraken2 Taxonomic Classification", + "url": "https://data.microbiomedata.org/data/nmdc:omprc-11-vs67yj43/nmdc:wfrbt-11-t793q539.1/nmdc_wfrbt-11-t793q539.1_kraken2_classification.tsv", + "type": "nmdc:DataObject" + }, + { + "id": "nmdc:dobj-11-29yxd763", + "name": "nmdc_wfrbt-11-t793q539.1_kraken2_report.tsv", + "description": "Kraken2 TSV report for nmdc:omprc-11-vs67yj43", + "file_size_bytes": 663507, + "md5_checksum": "09a2d722810b3d90207bc4cfa626133b", + "data_object_type": "Kraken2 Classification Report", + "url": "https://data.microbiomedata.org/data/nmdc:omprc-11-vs67yj43/nmdc:wfrbt-11-t793q539.1/nmdc_wfrbt-11-t793q539.1_kraken2_report.tsv", + "type": "nmdc:DataObject" + }, + { + "id": "nmdc:dobj-11-7t723t39", + "name": "nmdc_wfrbt-11-t793q539.1_kraken2_krona.html", + "description": "Kraken2 Krona HTML report for nmdc:omprc-11-vs67yj43", + "file_size_bytes": 4031909, + "md5_checksum": "c3a8d9f48266a43ad74fc581132e2bba", + "data_object_type": "Kraken2 Krona Plot", + "url": "https://data.microbiomedata.org/data/nmdc:omprc-11-vs67yj43/nmdc:wfrbt-11-t793q539.1/nmdc_wfrbt-11-t793q539.1_kraken2_krona.html", + "type": "nmdc:DataObject" + } + ], + "metagenome_assembly_set": [ + { + "id": "nmdc:wfmgas-11-6f3cw050.1", + "name": "Metagenome Assembly Activity for nmdc:omprc-11-vs67yj43", + "started_at_time": "2021-10-11T02:23:42Z", + "ended_at_time": "2021-10-11T04:05:12+00:00", + "was_informed_by": "nmdc:omprc-11-vs67yj43", + "execution_resource": "NERSC-Cori", + "git_url": "https://github.com/microbiomedata/metaAssembly", + "has_input": [ + "nmdc:dobj-11-z7c4m372" + ], + "has_output": [ + "nmdc:dobj-11-jy0vtn08", + "nmdc:dobj-11-3ne16y54", + "nmdc:dobj-11-wxpant31", + "nmdc:dobj-11-vjgnnx48", + "nmdc:dobj-11-7wh6kw02" + ], + "type": "nmdc:MetagenomeAssembly", + "part_of": [ + "nmdc:omprc-11-vs67yj43" + ], + "version": "v1.0.2", + "asm_score": 17.863, + "scaffolds": 81627, + "scaf_logsum": 20954, + "scaf_powsum": 2545.156, + "scaf_max": 88400, + "scaf_bp": 30097563, + "scaf_n50": 30518, + "scaf_n90": 71614, + "scaf_l50": 332, + "scaf_l90": 282, + "scaf_n_gt50k": 2, + "scaf_l_gt50k": 150260, + "scaf_pct_gt50k": 0.49924305, + "contigs": 81653, + "contig_bp": 30097213, + "ctg_n50": 30532, + "ctg_l50": 332, + "ctg_n90": 71638, + "ctg_l90": 282, + "ctg_logsum": 20856, + "ctg_powsum": 2534.931, + "ctg_max": 88400, + "gap_pct": 0.00116, + "gc_std": 0.13273, + "gc_avg": 0.55961 + } + ], + "omics_processing_set": [ + { + "id": "nmdc:omprc-11-vs67yj43", + "name": "Riverbed sediment microbial communities from areas with no vegetation in Columbia River, Washington, USA - GW-RW N3_0_10", + "description": "Riverbed sediment samples were collected from an area with no vegetation in a hyporheic zone (subsurface groundwater - surface water mixing zone)", + "has_input": [ + "nmdc:bsm-11-0n5nks24" + ], + "add_date": "2016-01-11", + "gold_sequencing_project_identifiers": [ + "gold:Gp0127646" + ], + "has_output": [ + "nmdc:dobj-11-1jxf0m20" + ], + "mod_date": "2021-06-15", + "ncbi_project_name": "Riverbed sediment microbial communities from areas with no vegetation in Columbia River, Washington, USA - GW-RW N3_0_10", + "omics_type": { + "has_raw_value": "Metagenome" + }, + "part_of": [ + "nmdc:sty-11-aygzgv51" + ], + "principal_investigator": { + "has_raw_value": "James Stegen" + }, + "processing_institution": "JGI", + "type": "nmdc:OmicsProcessing" + } + ], + "read_qc_analysis_activity_set": [ + { + "id": "nmdc:wfrqc-11-rsxb7a38.1", + "name": "Read QC Activity for nmdc:omprc-11-vs67yj43", + "started_at_time": "2021-10-11T02:23:42Z", + "ended_at_time": "2021-10-11T04:05:12+00:00", + "was_informed_by": "nmdc:omprc-11-vs67yj43", + "execution_resource": "NERSC-Cori", + "git_url": "https://github.com/microbiomedata/ReadsQC", + "has_input": [ + "nmdc:dobj-11-1jxf0m20" + ], + "has_output": [ + "nmdc:dobj-11-z7c4m372", + "nmdc:dobj-11-hw7ebe06" + ], + "type": "nmdc:ReadQcAnalysisActivity", + "part_of": [ + "nmdc:omprc-11-vs67yj43" + ], + "version": "v1.0.2", + "input_read_count": 27835800, + "output_read_count": 25862834, + "input_read_bases": 4203205800, + "output_read_bases": 3867340900 + } + ], + "read_based_taxonomy_analysis_activity_set": [ + { + "id": "nmdc:wfrbt-11-t793q539.1", + "name": "Readbased Taxonomy Analysis Activity for nmdc:omprc-11-vs67yj43", + "started_at_time": "2021-10-11T02:23:42Z", + "ended_at_time": "2021-10-11T04:05:12+00:00", + "was_informed_by": "nmdc:omprc-11-vs67yj43", + "execution_resource": "NERSC-Cori", + "git_url": "https://github.com/microbiomedata/ReadbasedAnalysis", + "has_input": [ + "nmdc:dobj-11-z7c4m372" + ], + "has_output": [ + "nmdc:dobj-11-grnqkh48", + "nmdc:dobj-11-jpjrzp60", + "nmdc:dobj-11-90bd5t22", + "nmdc:dobj-11-4z6ef448", + "nmdc:dobj-11-cyy9ee55", + "nmdc:dobj-11-zbj40g71", + "nmdc:dobj-11-gg67md47", + "nmdc:dobj-11-29yxd763", + "nmdc:dobj-11-7t723t39" + ], + "type": "nmdc:ReadBasedTaxonomyAnalysisActivity", + "part_of": [ + "nmdc:omprc-11-vs67yj43" + ], + "version": "v1.0.2" + } + ] + }, + { + "data_object_set": [ + { + "id": "nmdc:dobj-11-aw4wks31", + "name": "10533.3.165334.GTAACGA-GTCGTTA.fastq.gz", + "description": "Raw sequencer read data", + "file_size_bytes": 2446032142, + "data_object_type": "Metagenome Raw Reads", + "type": "nmdc:DataObject" + }, + { + "id": "nmdc:dobj-11-2zszjh18", + "name": "nmdc_wfrqc-11-j4dhfa07.1_filtered.fastq.gz", + "description": "Filtered Reads for nmdc:omprc-11-nhf5m035", + "file_size_bytes": 2191252492, + "md5_checksum": "fcc3a92dd2b6ab6045f4be27da6f2cdd", + "data_object_type": "Filtered Sequencing Reads", + "url": "https://data.microbiomedata.org/data/nmdc:omprc-11-nhf5m035/nmdc:wfrqc-11-j4dhfa07.1/nmdc_wfrqc-11-j4dhfa07.1_filtered.fastq.gz", + "type": "nmdc:DataObject" + }, + { + "id": "nmdc:dobj-11-y6wh7w15", + "name": "nmdc_wfrqc-11-j4dhfa07.1_filterStats.txt", + "description": "Filtered Stats for nmdc:omprc-11-nhf5m035", + "file_size_bytes": 289, + "md5_checksum": "2208c88cac6b941799d4492dbf5f0887", + "data_object_type": "QC Statistics", + "url": "https://data.microbiomedata.org/data/nmdc:omprc-11-nhf5m035/nmdc:wfrqc-11-j4dhfa07.1/nmdc_wfrqc-11-j4dhfa07.1_filterStats.txt", + "type": "nmdc:DataObject" + }, + { + "id": "nmdc:dobj-11-hrm5x358", + "name": "nmdc_wfmgas-11-nac63s36.1_contigs.fna", + "description": "Assembled contigs fasta for nmdc:omprc-11-nhf5m035", + "file_size_bytes": 68323307, + "md5_checksum": "ce0cca368755d9447d048f2c4acb208e", + "data_object_type": "Assembly Contigs", + "url": "https://data.microbiomedata.org/data/nmdc:omprc-11-nhf5m035/nmdc:wfmgas-11-nac63s36.1/nmdc_wfmgas-11-nac63s36.1_contigs.fna", + "type": "nmdc:DataObject" + }, + { + "id": "nmdc:dobj-11-0az5y327", + "name": "nmdc_wfmgas-11-nac63s36.1_scaffolds.fna", + "description": "Assembled scaffold fasta for nmdc:omprc-11-nhf5m035", + "file_size_bytes": 67879766, + "md5_checksum": "711341d90449f8dd90017a7e2345eb10", + "data_object_type": "Assembly Scaffolds", + "url": "https://data.microbiomedata.org/data/nmdc:omprc-11-nhf5m035/nmdc:wfmgas-11-nac63s36.1/nmdc_wfmgas-11-nac63s36.1_scaffolds.fna", + "type": "nmdc:DataObject" + }, + { + "id": "nmdc:dobj-11-36s9q602", + "name": "nmdc_wfmgas-11-nac63s36.1_covstats.txt", + "description": "Metagenome Contig Coverage Stats for nmdc:omprc-11-nhf5m035", + "file_size_bytes": 12494714, + "md5_checksum": "1b1764602b301268c36db12a77cd517a", + "data_object_type": "Assembly Coverage Stats", + "url": "https://data.microbiomedata.org/data/nmdc:omprc-11-nhf5m035/nmdc:wfmgas-11-nac63s36.1/nmdc_wfmgas-11-nac63s36.1_covstats.txt", + "type": "nmdc:DataObject" + }, + { + "id": "nmdc:dobj-11-gpxh9656", + "name": "nmdc_wfmgas-11-nac63s36.1_assembly.agp", + "description": "Assembled AGP file for nmdc:omprc-11-nhf5m035", + "file_size_bytes": 12611298, + "md5_checksum": "dc45db52e7b1cd8f769cb0cecdb32421", + "data_object_type": "Assembly AGP", + "url": "https://data.microbiomedata.org/data/nmdc:omprc-11-nhf5m035/nmdc:wfmgas-11-nac63s36.1/nmdc_wfmgas-11-nac63s36.1_assembly.agp", + "type": "nmdc:DataObject" + }, + { + "id": "nmdc:dobj-11-ps5s0m14", + "name": "nmdc_wfmgas-11-nac63s36.1_pairedMapped_sorted.bam", + "description": "Metagenome Alignment BAM file for nmdc:omprc-11-nhf5m035", + "file_size_bytes": 0, + "md5_checksum": "d41d8cd98f00b204e9800998ecf8427e", + "data_object_type": "Assembly Coverage BAM", + "url": "https://data.microbiomedata.org/data/nmdc:omprc-11-nhf5m035/nmdc:wfmgas-11-nac63s36.1/nmdc_wfmgas-11-nac63s36.1_pairedMapped_sorted.bam", + "type": "nmdc:DataObject" + }, + { + "id": "nmdc:dobj-11-1kf4vz52", + "name": "nmdc_wfrbt-11-v14pxj70.1_gottcha2_report.tsv", + "description": "Gottcha2 TSV report for nmdc:omprc-11-nhf5m035", + "file_size_bytes": 3323, + "md5_checksum": "5e64b9ccf92f0c974c51bd8393dea50c", + "data_object_type": "GOTTCHA2 Classification Report", + "url": "https://data.microbiomedata.org/data/nmdc:omprc-11-nhf5m035/nmdc:wfrbt-11-v14pxj70.1/nmdc_wfrbt-11-v14pxj70.1_gottcha2_report.tsv", + "type": "nmdc:DataObject" + }, + { + "id": "nmdc:dobj-11-64t1p647", + "name": "nmdc_wfrbt-11-v14pxj70.1_gottcha2_report_full.tsv", + "description": "Gottcha2 full TSV report for nmdc:omprc-11-nhf5m035", + "file_size_bytes": 782039, + "md5_checksum": "1357df297d8d8a872b335e0c3222d102", + "data_object_type": "GOTTCHA2 Report Full", + "url": "https://data.microbiomedata.org/data/nmdc:omprc-11-nhf5m035/nmdc:wfrbt-11-v14pxj70.1/nmdc_wfrbt-11-v14pxj70.1_gottcha2_report_full.tsv", + "type": "nmdc:DataObject" + }, + { + "id": "nmdc:dobj-11-mx4g9724", + "name": "nmdc_wfrbt-11-v14pxj70.1_gottcha2_krona.html", + "description": "Gottcha2 Krona HTML report for nmdc:omprc-11-nhf5m035", + "file_size_bytes": 236971, + "md5_checksum": "5b510e336e60b6120b43e9b6420a074e", + "data_object_type": "GOTTCHA2 Krona Plot", + "url": "https://data.microbiomedata.org/data/nmdc:omprc-11-nhf5m035/nmdc:wfrbt-11-v14pxj70.1/nmdc_wfrbt-11-v14pxj70.1_gottcha2_krona.html", + "type": "nmdc:DataObject" + }, + { + "id": "nmdc:dobj-11-j036zy16", + "name": "nmdc_wfrbt-11-v14pxj70.1_centrifuge_classification.tsv", + "description": "Centrifuge classification TSV report for nmdc:omprc-11-nhf5m035", + "file_size_bytes": 1945479328, + "md5_checksum": "33bf814280051c220e0c4a06f7935728", + "data_object_type": "Centrifuge Taxonomic Classification", + "url": "https://data.microbiomedata.org/data/nmdc:omprc-11-nhf5m035/nmdc:wfrbt-11-v14pxj70.1/nmdc_wfrbt-11-v14pxj70.1_centrifuge_classification.tsv", + "type": "nmdc:DataObject" + }, + { + "id": "nmdc:dobj-11-t3azp202", + "name": "nmdc_wfrbt-11-v14pxj70.1_centrifuge_report.tsv", + "description": "Centrifuge TSV report for nmdc:omprc-11-nhf5m035", + "file_size_bytes": 255338, + "md5_checksum": "e77a1d052b0d2a99e0a1df3b3c038f7c", + "data_object_type": "Centrifuge Classification Report", + "url": "https://data.microbiomedata.org/data/nmdc:omprc-11-nhf5m035/nmdc:wfrbt-11-v14pxj70.1/nmdc_wfrbt-11-v14pxj70.1_centrifuge_report.tsv", + "type": "nmdc:DataObject" + }, + { + "id": "nmdc:dobj-11-dj8tm942", + "name": "nmdc_wfrbt-11-v14pxj70.1_centrifuge_krona.html", + "description": "Centrifuge Krona HTML report for nmdc:omprc-11-nhf5m035", + "file_size_bytes": 2333371, + "md5_checksum": "0efb0ad19234056d7e2e3726dead3622", + "data_object_type": "Centrifuge Krona Plot", + "url": "https://data.microbiomedata.org/data/nmdc:omprc-11-nhf5m035/nmdc:wfrbt-11-v14pxj70.1/nmdc_wfrbt-11-v14pxj70.1_centrifuge_krona.html", + "type": "nmdc:DataObject" + }, + { + "id": "nmdc:dobj-11-r6y4a534", + "name": "nmdc_wfrbt-11-v14pxj70.1_kraken2_classification.tsv", + "description": "Kraken classification TSV report for nmdc:omprc-11-nhf5m035", + "file_size_bytes": 1562011343, + "md5_checksum": "222bac312efdd6c86d2475ad224b7907", + "data_object_type": "Kraken2 Taxonomic Classification", + "url": "https://data.microbiomedata.org/data/nmdc:omprc-11-nhf5m035/nmdc:wfrbt-11-v14pxj70.1/nmdc_wfrbt-11-v14pxj70.1_kraken2_classification.tsv", + "type": "nmdc:DataObject" + }, + { + "id": "nmdc:dobj-11-hgbehz05", + "name": "nmdc_wfrbt-11-v14pxj70.1_kraken2_report.tsv", + "description": "Kraken2 TSV report for nmdc:omprc-11-nhf5m035", + "file_size_bytes": 647859, + "md5_checksum": "baaca868b1fed932b463e489708dd741", + "data_object_type": "Kraken2 Classification Report", + "url": "https://data.microbiomedata.org/data/nmdc:omprc-11-nhf5m035/nmdc:wfrbt-11-v14pxj70.1/nmdc_wfrbt-11-v14pxj70.1_kraken2_report.tsv", + "type": "nmdc:DataObject" + }, + { + "id": "nmdc:dobj-11-y9b99f89", + "name": "nmdc_wfrbt-11-v14pxj70.1_kraken2_krona.html", + "description": "Kraken2 Krona HTML report for nmdc:omprc-11-nhf5m035", + "file_size_bytes": 3952548, + "md5_checksum": "b549d169e5b0693152555373a6d8ee75", + "data_object_type": "Kraken2 Krona Plot", + "url": "https://data.microbiomedata.org/data/nmdc:omprc-11-nhf5m035/nmdc:wfrbt-11-v14pxj70.1/nmdc_wfrbt-11-v14pxj70.1_kraken2_krona.html", + "type": "nmdc:DataObject" + } + ], + "metagenome_assembly_set": [ + { + "id": "nmdc:wfmgas-11-nac63s36.1", + "name": "Metagenome Assembly Activity for nmdc:omprc-11-nhf5m035", + "started_at_time": "2021-10-11T02:23:29Z", + "ended_at_time": "2021-10-11T04:13:04+00:00", + "was_informed_by": "nmdc:omprc-11-nhf5m035", + "execution_resource": "NERSC-Cori", + "git_url": "https://github.com/microbiomedata/metaAssembly", + "has_input": [ + "nmdc:dobj-11-2zszjh18" + ], + "has_output": [ + "nmdc:dobj-11-hrm5x358", + "nmdc:dobj-11-0az5y327", + "nmdc:dobj-11-36s9q602", + "nmdc:dobj-11-gpxh9656", + "nmdc:dobj-11-ps5s0m14" + ], + "type": "nmdc:MetagenomeAssembly", + "part_of": [ + "nmdc:omprc-11-nhf5m035" + ], + "version": "v1.0.2", + "asm_score": 4.996, + "scaffolds": 147272, + "scaf_logsum": 91521, + "scaf_powsum": 10208, + "scaf_max": 23974, + "scaf_bp": 61887639, + "scaf_n50": 47464, + "scaf_n90": 125972, + "scaf_l50": 381, + "scaf_l90": 285, + "contigs": 147340, + "contig_bp": 61886959, + "ctg_n50": 47493, + "ctg_l50": 381, + "ctg_n90": 126039, + "ctg_l90": 285, + "ctg_logsum": 91193, + "ctg_powsum": 10170, + "ctg_max": 23974, + "gap_pct": 0.0011, + "gc_std": 0.0855, + "gc_avg": 0.61759 + } + ], + "omics_processing_set": [ + { + "id": "nmdc:omprc-11-nhf5m035", + "name": "Riverbed sediment microbial communities from areas with vegetation nearby in Columbia River, Washington, USA - GW-RW S1_50_60", + "description": "Riverbed sediment samples were collected from an area with a lot of surrounding vegetation in a hyporheic zone (subsurface groundwater - surface water mixing zone)", + "has_input": [ + "nmdc:bsm-11-b7nrtg75" + ], + "add_date": "2016-01-11", + "gold_sequencing_project_identifiers": [ + "gold:Gp0127648" + ], + "has_output": [ + "nmdc:dobj-11-aw4wks31" + ], + "mod_date": "2021-06-15", + "ncbi_project_name": "Riverbed sediment microbial communities from areas with vegetation nearby in Columbia River, Washington, USA - GW-RW S1_50_60", + "omics_type": { + "has_raw_value": "Metagenome" + }, + "part_of": [ + "nmdc:sty-11-aygzgv51" + ], + "principal_investigator": { + "has_raw_value": "James Stegen" + }, + "processing_institution": "JGI", + "type": "nmdc:OmicsProcessing" + } + ], + "read_qc_analysis_activity_set": [ + { + "id": "nmdc:wfrqc-11-j4dhfa07.1", + "name": "Read QC Activity for nmdc:omprc-11-nhf5m035", + "started_at_time": "2021-10-11T02:23:29Z", + "ended_at_time": "2021-10-11T04:13:04+00:00", + "was_informed_by": "nmdc:omprc-11-nhf5m035", + "execution_resource": "NERSC-Cori", + "git_url": "https://github.com/microbiomedata/ReadsQC", + "has_input": [ + "nmdc:dobj-11-aw4wks31" + ], + "has_output": [ + "nmdc:dobj-11-2zszjh18", + "nmdc:dobj-11-y6wh7w15" + ], + "type": "nmdc:ReadQcAnalysisActivity", + "part_of": [ + "nmdc:omprc-11-nhf5m035" + ], + "version": "v1.0.2", + "input_read_count": 28064750, + "output_read_count": 26438892, + "input_read_bases": 4237777250, + "output_read_bases": 3953713958 + } + ], + "read_based_taxonomy_analysis_activity_set": [ + { + "id": "nmdc:wfrbt-11-v14pxj70.1", + "name": "Readbased Taxonomy Analysis Activity for nmdc:omprc-11-nhf5m035", + "started_at_time": "2021-10-11T02:23:29Z", + "ended_at_time": "2021-10-11T04:13:04+00:00", + "was_informed_by": "nmdc:omprc-11-nhf5m035", + "execution_resource": "NERSC-Cori", + "git_url": "https://github.com/microbiomedata/ReadbasedAnalysis", + "has_input": [ + "nmdc:dobj-11-2zszjh18" + ], + "has_output": [ + "nmdc:dobj-11-1kf4vz52", + "nmdc:dobj-11-64t1p647", + "nmdc:dobj-11-mx4g9724", + "nmdc:dobj-11-j036zy16", + "nmdc:dobj-11-t3azp202", + "nmdc:dobj-11-dj8tm942", + "nmdc:dobj-11-r6y4a534", + "nmdc:dobj-11-hgbehz05", + "nmdc:dobj-11-y9b99f89" + ], + "type": "nmdc:ReadBasedTaxonomyAnalysisActivity", + "part_of": [ + "nmdc:omprc-11-nhf5m035" + ], + "version": "v1.0.2" + } + ] + }, + { + "data_object_set": [ + { + "id": "nmdc:dobj-11-a52d6695", + "name": "10533.2.165322.ACGGTCT-AAGACCG.fastq.gz", + "description": "Raw sequencer read data", + "file_size_bytes": 2236205196, + "data_object_type": "Metagenome Raw Reads", + "type": "nmdc:DataObject" + }, + { + "id": "nmdc:dobj-11-h3hqdz71", + "name": "nmdc_wfrqc-11-mnpxq718.1_filtered.fastq.gz", + "description": "Filtered Reads for nmdc:omprc-11-w3v30q48", + "file_size_bytes": 2052448806, + "md5_checksum": "c082eff434fe4863c0e29c79b759d100", + "data_object_type": "Filtered Sequencing Reads", + "url": "https://data.microbiomedata.org/data/nmdc:omprc-11-w3v30q48/nmdc:wfrqc-11-mnpxq718.1/nmdc_wfrqc-11-mnpxq718.1_filtered.fastq.gz", + "type": "nmdc:DataObject" + }, + { + "id": "nmdc:dobj-11-mwbfzb10", + "name": "nmdc_wfrqc-11-mnpxq718.1_filterStats.txt", + "description": "Filtered Stats for nmdc:omprc-11-w3v30q48", + "file_size_bytes": 282, + "md5_checksum": "7f204d0d1d45e77b39d9c9b2362c6b0b", + "data_object_type": "QC Statistics", + "url": "https://data.microbiomedata.org/data/nmdc:omprc-11-w3v30q48/nmdc:wfrqc-11-mnpxq718.1/nmdc_wfrqc-11-mnpxq718.1_filterStats.txt", + "type": "nmdc:DataObject" + }, + { + "id": "nmdc:dobj-11-p2qqzn06", + "name": "nmdc_wfmgas-11-16gmwq04.1_contigs.fna", + "description": "Assembled contigs fasta for nmdc:omprc-11-w3v30q48", + "file_size_bytes": 42280606, + "md5_checksum": "ba4fdf970e96978210b1e5d5b1e413e3", + "data_object_type": "Assembly Contigs", + "url": "https://data.microbiomedata.org/data/nmdc:omprc-11-w3v30q48/nmdc:wfmgas-11-16gmwq04.1/nmdc_wfmgas-11-16gmwq04.1_contigs.fna", + "type": "nmdc:DataObject" + }, + { + "id": "nmdc:dobj-11-hggyve72", + "name": "nmdc_wfmgas-11-16gmwq04.1_scaffolds.fna", + "description": "Assembled scaffold fasta for nmdc:omprc-11-w3v30q48", + "file_size_bytes": 41987788, + "md5_checksum": "5ef10281205ffe315f353be4e60c385c", + "data_object_type": "Assembly Scaffolds", + "url": "https://data.microbiomedata.org/data/nmdc:omprc-11-w3v30q48/nmdc:wfmgas-11-16gmwq04.1/nmdc_wfmgas-11-16gmwq04.1_scaffolds.fna", + "type": "nmdc:DataObject" + }, + { + "id": "nmdc:dobj-11-prb1rv76", + "name": "nmdc_wfmgas-11-16gmwq04.1_covstats.txt", + "description": "Metagenome Contig Coverage Stats for nmdc:omprc-11-w3v30q48", + "file_size_bytes": 8213648, + "md5_checksum": "e0fd3fe2211c986d532cf7c5dc7a457f", + "data_object_type": "Assembly Coverage Stats", + "url": "https://data.microbiomedata.org/data/nmdc:omprc-11-w3v30q48/nmdc:wfmgas-11-16gmwq04.1/nmdc_wfmgas-11-16gmwq04.1_covstats.txt", + "type": "nmdc:DataObject" + }, + { + "id": "nmdc:dobj-11-p2tmxs58", + "name": "nmdc_wfmgas-11-16gmwq04.1_assembly.agp", + "description": "Assembled AGP file for nmdc:omprc-11-w3v30q48", + "file_size_bytes": 8259836, + "md5_checksum": "904fff4c8a91f9a40b989c8b38b03d9f", + "data_object_type": "Assembly AGP", + "url": "https://data.microbiomedata.org/data/nmdc:omprc-11-w3v30q48/nmdc:wfmgas-11-16gmwq04.1/nmdc_wfmgas-11-16gmwq04.1_assembly.agp", + "type": "nmdc:DataObject" + }, + { + "id": "nmdc:dobj-11-t0dt8d11", + "name": "nmdc_wfmgas-11-16gmwq04.1_pairedMapped_sorted.bam", + "description": "Metagenome Alignment BAM file for nmdc:omprc-11-w3v30q48", + "file_size_bytes": 0, + "md5_checksum": "d41d8cd98f00b204e9800998ecf8427e", + "data_object_type": "Assembly Coverage BAM", + "url": "https://data.microbiomedata.org/data/nmdc:omprc-11-w3v30q48/nmdc:wfmgas-11-16gmwq04.1/nmdc_wfmgas-11-16gmwq04.1_pairedMapped_sorted.bam", + "type": "nmdc:DataObject" + }, + { + "id": "nmdc:dobj-11-xvy2r621", + "name": "nmdc_wfrbt-11-hde4qq28.1_gottcha2_report.tsv", + "description": "Gottcha2 TSV report for nmdc:omprc-11-w3v30q48", + "file_size_bytes": 4666, + "md5_checksum": "7e1438bf8076daf46f3d782d8f9656b4", + "data_object_type": "GOTTCHA2 Classification Report", + "url": "https://data.microbiomedata.org/data/nmdc:omprc-11-w3v30q48/nmdc:wfrbt-11-hde4qq28.1/nmdc_wfrbt-11-hde4qq28.1_gottcha2_report.tsv", + "type": "nmdc:DataObject" + }, + { + "id": "nmdc:dobj-11-3rsc6163", + "name": "nmdc_wfrbt-11-hde4qq28.1_gottcha2_report_full.tsv", + "description": "Gottcha2 full TSV report for nmdc:omprc-11-w3v30q48", + "file_size_bytes": 786018, + "md5_checksum": "cfd63309cd38a293615ddce5e8ea6402", + "data_object_type": "GOTTCHA2 Report Full", + "url": "https://data.microbiomedata.org/data/nmdc:omprc-11-w3v30q48/nmdc:wfrbt-11-hde4qq28.1/nmdc_wfrbt-11-hde4qq28.1_gottcha2_report_full.tsv", + "type": "nmdc:DataObject" + }, + { + "id": "nmdc:dobj-11-n6629513", + "name": "nmdc_wfrbt-11-hde4qq28.1_gottcha2_krona.html", + "description": "Gottcha2 Krona HTML report for nmdc:omprc-11-w3v30q48", + "file_size_bytes": 237895, + "md5_checksum": "7e353b7bfb1586773fa00b515dffe6ec", + "data_object_type": "GOTTCHA2 Krona Plot", + "url": "https://data.microbiomedata.org/data/nmdc:omprc-11-w3v30q48/nmdc:wfrbt-11-hde4qq28.1/nmdc_wfrbt-11-hde4qq28.1_gottcha2_krona.html", + "type": "nmdc:DataObject" + }, + { + "id": "nmdc:dobj-11-7gasm339", + "name": "nmdc_wfrbt-11-hde4qq28.1_centrifuge_classification.tsv", + "description": "Centrifuge classification TSV report for nmdc:omprc-11-w3v30q48", + "file_size_bytes": 1767305277, + "md5_checksum": "6667be33e7867ca2aabfa5d663e2970a", + "data_object_type": "Centrifuge Taxonomic Classification", + "url": "https://data.microbiomedata.org/data/nmdc:omprc-11-w3v30q48/nmdc:wfrbt-11-hde4qq28.1/nmdc_wfrbt-11-hde4qq28.1_centrifuge_classification.tsv", + "type": "nmdc:DataObject" + }, + { + "id": "nmdc:dobj-11-kvdwz191", + "name": "nmdc_wfrbt-11-hde4qq28.1_centrifuge_report.tsv", + "description": "Centrifuge TSV report for nmdc:omprc-11-w3v30q48", + "file_size_bytes": 254858, + "md5_checksum": "7ee0b0b21444ee06752e6b9c32f476af", + "data_object_type": "Centrifuge Classification Report", + "url": "https://data.microbiomedata.org/data/nmdc:omprc-11-w3v30q48/nmdc:wfrbt-11-hde4qq28.1/nmdc_wfrbt-11-hde4qq28.1_centrifuge_report.tsv", + "type": "nmdc:DataObject" + }, + { + "id": "nmdc:dobj-11-3ceaz134", + "name": "nmdc_wfrbt-11-hde4qq28.1_centrifuge_krona.html", + "description": "Centrifuge Krona HTML report for nmdc:omprc-11-w3v30q48", + "file_size_bytes": 2332396, + "md5_checksum": "d3b27bed597f07ad4bb4a500ad2fb928", + "data_object_type": "Centrifuge Krona Plot", + "url": "https://data.microbiomedata.org/data/nmdc:omprc-11-w3v30q48/nmdc:wfrbt-11-hde4qq28.1/nmdc_wfrbt-11-hde4qq28.1_centrifuge_krona.html", + "type": "nmdc:DataObject" + }, + { + "id": "nmdc:dobj-11-gbjf3287", + "name": "nmdc_wfrbt-11-hde4qq28.1_kraken2_classification.tsv", + "description": "Kraken classification TSV report for nmdc:omprc-11-w3v30q48", + "file_size_bytes": 1419938277, + "md5_checksum": "45617f93e5f072fbad25a0308ead6c3d", + "data_object_type": "Kraken2 Taxonomic Classification", + "url": "https://data.microbiomedata.org/data/nmdc:omprc-11-w3v30q48/nmdc:wfrbt-11-hde4qq28.1/nmdc_wfrbt-11-hde4qq28.1_kraken2_classification.tsv", + "type": "nmdc:DataObject" + }, + { + "id": "nmdc:dobj-11-dm44vp24", + "name": "nmdc_wfrbt-11-hde4qq28.1_kraken2_report.tsv", + "description": "Kraken2 TSV report for nmdc:omprc-11-w3v30q48", + "file_size_bytes": 661837, + "md5_checksum": "460e7594fcd06678df1b9c5e5075cb4d", + "data_object_type": "Kraken2 Classification Report", + "url": "https://data.microbiomedata.org/data/nmdc:omprc-11-w3v30q48/nmdc:wfrbt-11-hde4qq28.1/nmdc_wfrbt-11-hde4qq28.1_kraken2_report.tsv", + "type": "nmdc:DataObject" + }, + { + "id": "nmdc:dobj-11-dxwvhz14", + "name": "nmdc_wfrbt-11-hde4qq28.1_kraken2_krona.html", + "description": "Kraken2 Krona HTML report for nmdc:omprc-11-w3v30q48", + "file_size_bytes": 4028822, + "md5_checksum": "ab80fc324c9206a41a66d64227a97179", + "data_object_type": "Kraken2 Krona Plot", + "url": "https://data.microbiomedata.org/data/nmdc:omprc-11-w3v30q48/nmdc:wfrbt-11-hde4qq28.1/nmdc_wfrbt-11-hde4qq28.1_kraken2_krona.html", + "type": "nmdc:DataObject" + } + ], + "metagenome_assembly_set": [ + { + "id": "nmdc:wfmgas-11-16gmwq04.1", + "name": "Metagenome Assembly Activity for nmdc:omprc-11-w3v30q48", + "started_at_time": "2021-10-11T02:24:27Z", + "ended_at_time": "2021-10-11T03:38:33+00:00", + "was_informed_by": "nmdc:omprc-11-w3v30q48", + "execution_resource": "NERSC-Cori", + "git_url": "https://github.com/microbiomedata/metaAssembly", + "has_input": [ + "nmdc:dobj-11-h3hqdz71" + ], + "has_output": [ + "nmdc:dobj-11-p2qqzn06", + "nmdc:dobj-11-hggyve72", + "nmdc:dobj-11-prb1rv76", + "nmdc:dobj-11-p2tmxs58", + "nmdc:dobj-11-t0dt8d11" + ], + "type": "nmdc:MetagenomeAssembly", + "part_of": [ + "nmdc:omprc-11-w3v30q48" + ], + "version": "v1.0.2", + "asm_score": 14.664, + "scaffolds": 97316, + "scaf_logsum": 37899, + "scaf_powsum": 4362.772, + "scaf_max": 96788, + "scaf_bp": 38110647, + "scaf_n50": 34125, + "scaf_n90": 85353, + "scaf_l50": 353, + "scaf_l90": 283, + "scaf_n_gt50k": 2, + "scaf_l_gt50k": 153917, + "scaf_pct_gt50k": 0.40386876, + "contigs": 97351, + "contig_bp": 38110297, + "ctg_n50": 34144, + "ctg_l50": 353, + "ctg_n90": 85387, + "ctg_l90": 283, + "ctg_logsum": 37666, + "ctg_powsum": 4336.355, + "ctg_max": 96788, + "gap_pct": 0.00092, + "gc_std": 0.13435, + "gc_avg": 0.5552 + } + ], + "omics_processing_set": [ + { + "id": "nmdc:omprc-11-w3v30q48", + "name": "Riverbed sediment microbial communities from areas with vegetation nearby in Columbia River, Washington, USA - GW-RW S2_10_20", + "description": "Riverbed sediment samples were collected from an area with a lot of surrounding vegetation in a hyporheic zone (subsurface groundwater - surface water mixing zone)", + "has_input": [ + "nmdc:bsm-11-q44pjf87" + ], + "add_date": "2016-01-11", + "gold_sequencing_project_identifiers": [ + "gold:Gp0127647" + ], + "has_output": [ + "nmdc:dobj-11-a52d6695" + ], + "mod_date": "2021-06-15", + "ncbi_project_name": "Riverbed sediment microbial communities from areas with vegetation nearby in Columbia River, Washington, USA - GW-RW S2_10_20", + "omics_type": { + "has_raw_value": "Metagenome" + }, + "part_of": [ + "nmdc:sty-11-aygzgv51" + ], + "principal_investigator": { + "has_raw_value": "James Stegen" + }, + "processing_institution": "JGI", + "type": "nmdc:OmicsProcessing" + } + ], + "read_qc_analysis_activity_set": [ + { + "id": "nmdc:wfrqc-11-mnpxq718.1", + "name": "Read QC Activity for nmdc:omprc-11-w3v30q48", + "started_at_time": "2021-10-11T02:24:27Z", + "ended_at_time": "2021-10-11T03:38:33+00:00", + "was_informed_by": "nmdc:omprc-11-w3v30q48", + "execution_resource": "NERSC-Cori", + "git_url": "https://github.com/microbiomedata/ReadsQC", + "has_input": [ + "nmdc:dobj-11-a52d6695" + ], + "has_output": [ + "nmdc:dobj-11-h3hqdz71", + "nmdc:dobj-11-mwbfzb10" + ], + "type": "nmdc:ReadQcAnalysisActivity", + "part_of": [ + "nmdc:omprc-11-w3v30q48" + ], + "version": "v1.0.2", + "input_read_count": 24906858, + "output_read_count": 24128544, + "input_read_bases": 3760935558, + "output_read_bases": 3608754154 + } + ], + "read_based_taxonomy_analysis_activity_set": [ + { + "id": "nmdc:wfrbt-11-hde4qq28.1", + "name": "Readbased Taxonomy Analysis Activity for nmdc:omprc-11-w3v30q48", + "started_at_time": "2021-10-11T02:24:27Z", + "ended_at_time": "2021-10-11T03:38:33+00:00", + "was_informed_by": "nmdc:omprc-11-w3v30q48", + "execution_resource": "NERSC-Cori", + "git_url": "https://github.com/microbiomedata/ReadbasedAnalysis", + "has_input": [ + "nmdc:dobj-11-h3hqdz71" + ], + "has_output": [ + "nmdc:dobj-11-xvy2r621", + "nmdc:dobj-11-3rsc6163", + "nmdc:dobj-11-n6629513", + "nmdc:dobj-11-7gasm339", + "nmdc:dobj-11-kvdwz191", + "nmdc:dobj-11-3ceaz134", + "nmdc:dobj-11-gbjf3287", + "nmdc:dobj-11-dm44vp24", + "nmdc:dobj-11-dxwvhz14" + ], + "type": "nmdc:ReadBasedTaxonomyAnalysisActivity", + "part_of": [ + "nmdc:omprc-11-w3v30q48" + ], + "version": "v1.0.2" + } + ] + }, + { + "data_object_set": [ + { + "id": "nmdc:dobj-11-yjmvgb38", + "name": "10533.1.165310.CCTCAGT-AACTGAG.fastq.gz", + "description": "Raw sequencer read data", + "file_size_bytes": 2092289780, + "data_object_type": "Metagenome Raw Reads", + "type": "nmdc:DataObject" + }, + { + "id": "nmdc:dobj-11-zc55t718", + "name": "nmdc_wfrqc-11-56n56z63.1_filtered.fastq.gz", + "description": "Filtered Reads for nmdc:omprc-11-vykcbs96", + "file_size_bytes": 1909192845, + "md5_checksum": "034df323b47f010f27e7c032d445a891", + "data_object_type": "Filtered Sequencing Reads", + "url": "https://data.microbiomedata.org/data/nmdc:omprc-11-vykcbs96/nmdc:wfrqc-11-56n56z63.1/nmdc_wfrqc-11-56n56z63.1_filtered.fastq.gz", + "type": "nmdc:DataObject" + }, + { + "id": "nmdc:dobj-11-g0wt6g71", + "name": "nmdc_wfrqc-11-56n56z63.1_filterStats.txt", + "description": "Filtered Stats for nmdc:omprc-11-vykcbs96", + "file_size_bytes": 283, + "md5_checksum": "ca137bf5e2df6541425f22b5d1fec492", + "data_object_type": "QC Statistics", + "url": "https://data.microbiomedata.org/data/nmdc:omprc-11-vykcbs96/nmdc:wfrqc-11-56n56z63.1/nmdc_wfrqc-11-56n56z63.1_filterStats.txt", + "type": "nmdc:DataObject" + }, + { + "id": "nmdc:dobj-11-ezgkmr74", + "name": "nmdc_wfmgas-11-d89ach23.1_contigs.fna", + "description": "Assembled contigs fasta for nmdc:omprc-11-vykcbs96", + "file_size_bytes": 50120426, + "md5_checksum": "d12098664b746680f3ecb72566ed253c", + "data_object_type": "Assembly Contigs", + "url": "https://data.microbiomedata.org/data/nmdc:omprc-11-vykcbs96/nmdc:wfmgas-11-d89ach23.1/nmdc_wfmgas-11-d89ach23.1_contigs.fna", + "type": "nmdc:DataObject" + }, + { + "id": "nmdc:dobj-11-n8tdap98", + "name": "nmdc_wfmgas-11-d89ach23.1_scaffolds.fna", + "description": "Assembled scaffold fasta for nmdc:omprc-11-vykcbs96", + "file_size_bytes": 49798855, + "md5_checksum": "8742edfe29067fab55a0ad46131a8960", + "data_object_type": "Assembly Scaffolds", + "url": "https://data.microbiomedata.org/data/nmdc:omprc-11-vykcbs96/nmdc:wfmgas-11-d89ach23.1/nmdc_wfmgas-11-d89ach23.1_scaffolds.fna", + "type": "nmdc:DataObject" + }, + { + "id": "nmdc:dobj-11-sct98y90", + "name": "nmdc_wfmgas-11-d89ach23.1_covstats.txt", + "description": "Metagenome Contig Coverage Stats for nmdc:omprc-11-vykcbs96", + "file_size_bytes": 9035671, + "md5_checksum": "8a122d6bb935ceb7c3ec2e91ad924efe", + "data_object_type": "Assembly Coverage Stats", + "url": "https://data.microbiomedata.org/data/nmdc:omprc-11-vykcbs96/nmdc:wfmgas-11-d89ach23.1/nmdc_wfmgas-11-d89ach23.1_covstats.txt", + "type": "nmdc:DataObject" + }, + { + "id": "nmdc:dobj-11-qfvegn79", + "name": "nmdc_wfmgas-11-d89ach23.1_assembly.agp", + "description": "Assembled AGP file for nmdc:omprc-11-vykcbs96", + "file_size_bytes": 9087107, + "md5_checksum": "fdf1b59433d0dda705a78c04fda3d779", + "data_object_type": "Assembly AGP", + "url": "https://data.microbiomedata.org/data/nmdc:omprc-11-vykcbs96/nmdc:wfmgas-11-d89ach23.1/nmdc_wfmgas-11-d89ach23.1_assembly.agp", + "type": "nmdc:DataObject" + }, + { + "id": "nmdc:dobj-11-rx358g02", + "name": "nmdc_wfmgas-11-d89ach23.1_pairedMapped_sorted.bam", + "description": "Metagenome Alignment BAM file for nmdc:omprc-11-vykcbs96", + "file_size_bytes": 0, + "md5_checksum": "d41d8cd98f00b204e9800998ecf8427e", + "data_object_type": "Assembly Coverage BAM", + "url": "https://data.microbiomedata.org/data/nmdc:omprc-11-vykcbs96/nmdc:wfmgas-11-d89ach23.1/nmdc_wfmgas-11-d89ach23.1_pairedMapped_sorted.bam", + "type": "nmdc:DataObject" + }, + { + "id": "nmdc:dobj-11-qmacty57", + "name": "nmdc_wfrbt-11-592st706.1_gottcha2_report.tsv", + "description": "Gottcha2 TSV report for nmdc:omprc-11-vykcbs96", + "file_size_bytes": 3780, + "md5_checksum": "694374188ba4372344536fa26a2282b8", + "data_object_type": "GOTTCHA2 Classification Report", + "url": "https://data.microbiomedata.org/data/nmdc:omprc-11-vykcbs96/nmdc:wfrbt-11-592st706.1/nmdc_wfrbt-11-592st706.1_gottcha2_report.tsv", + "type": "nmdc:DataObject" + }, + { + "id": "nmdc:dobj-11-wrqbt484", + "name": "nmdc_wfrbt-11-592st706.1_gottcha2_report_full.tsv", + "description": "Gottcha2 full TSV report for nmdc:omprc-11-vykcbs96", + "file_size_bytes": 822292, + "md5_checksum": "e11dfa7178e8c426c7c930b57aa40377", + "data_object_type": "GOTTCHA2 Report Full", + "url": "https://data.microbiomedata.org/data/nmdc:omprc-11-vykcbs96/nmdc:wfrbt-11-592st706.1/nmdc_wfrbt-11-592st706.1_gottcha2_report_full.tsv", + "type": "nmdc:DataObject" + }, + { + "id": "nmdc:dobj-11-3w4gr946", + "name": "nmdc_wfrbt-11-592st706.1_gottcha2_krona.html", + "description": "Gottcha2 Krona HTML report for nmdc:omprc-11-vykcbs96", + "file_size_bytes": 236496, + "md5_checksum": "46e203465faf61780fad8f626e9ab623", + "data_object_type": "GOTTCHA2 Krona Plot", + "url": "https://data.microbiomedata.org/data/nmdc:omprc-11-vykcbs96/nmdc:wfrbt-11-592st706.1/nmdc_wfrbt-11-592st706.1_gottcha2_krona.html", + "type": "nmdc:DataObject" + }, + { + "id": "nmdc:dobj-11-m53pkn77", + "name": "nmdc_wfrbt-11-592st706.1_centrifuge_classification.tsv", + "description": "Centrifuge classification TSV report for nmdc:omprc-11-vykcbs96", + "file_size_bytes": 1699052782, + "md5_checksum": "7a6b2ded3f49663d9916eaea3e129dc7", + "data_object_type": "Centrifuge Taxonomic Classification", + "url": "https://data.microbiomedata.org/data/nmdc:omprc-11-vykcbs96/nmdc:wfrbt-11-592st706.1/nmdc_wfrbt-11-592st706.1_centrifuge_classification.tsv", + "type": "nmdc:DataObject" + }, + { + "id": "nmdc:dobj-11-czq9e865", + "name": "nmdc_wfrbt-11-592st706.1_centrifuge_report.tsv", + "description": "Centrifuge TSV report for nmdc:omprc-11-vykcbs96", + "file_size_bytes": 256209, + "md5_checksum": "6f8be89c7aab1c3f392b4f80c7ddf6a5", + "data_object_type": "Centrifuge Classification Report", + "url": "https://data.microbiomedata.org/data/nmdc:omprc-11-vykcbs96/nmdc:wfrbt-11-592st706.1/nmdc_wfrbt-11-592st706.1_centrifuge_report.tsv", + "type": "nmdc:DataObject" + }, + { + "id": "nmdc:dobj-11-9b8cw604", + "name": "nmdc_wfrbt-11-592st706.1_centrifuge_krona.html", + "description": "Centrifuge Krona HTML report for nmdc:omprc-11-vykcbs96", + "file_size_bytes": 2336400, + "md5_checksum": "4299b438a815becc8beed40fcb803e9f", + "data_object_type": "Centrifuge Krona Plot", + "url": "https://data.microbiomedata.org/data/nmdc:omprc-11-vykcbs96/nmdc:wfrbt-11-592st706.1/nmdc_wfrbt-11-592st706.1_centrifuge_krona.html", + "type": "nmdc:DataObject" + }, + { + "id": "nmdc:dobj-11-vvyxbc66", + "name": "nmdc_wfrbt-11-592st706.1_kraken2_classification.tsv", + "description": "Kraken classification TSV report for nmdc:omprc-11-vykcbs96", + "file_size_bytes": 1359323947, + "md5_checksum": "4ae4dbd13c7338df5c00555bc6755947", + "data_object_type": "Kraken2 Taxonomic Classification", + "url": "https://data.microbiomedata.org/data/nmdc:omprc-11-vykcbs96/nmdc:wfrbt-11-592st706.1/nmdc_wfrbt-11-592st706.1_kraken2_classification.tsv", + "type": "nmdc:DataObject" + }, + { + "id": "nmdc:dobj-11-9bsj7r69", + "name": "nmdc_wfrbt-11-592st706.1_kraken2_report.tsv", + "description": "Kraken2 TSV report for nmdc:omprc-11-vykcbs96", + "file_size_bytes": 651624, + "md5_checksum": "2be07eb38d408077a55ecb48e123f7f8", + "data_object_type": "Kraken2 Classification Report", + "url": "https://data.microbiomedata.org/data/nmdc:omprc-11-vykcbs96/nmdc:wfrbt-11-592st706.1/nmdc_wfrbt-11-592st706.1_kraken2_report.tsv", + "type": "nmdc:DataObject" + }, + { + "id": "nmdc:dobj-11-xh5sb174", + "name": "nmdc_wfrbt-11-592st706.1_kraken2_krona.html", + "description": "Kraken2 Krona HTML report for nmdc:omprc-11-vykcbs96", + "file_size_bytes": 3973557, + "md5_checksum": "f318581f0df6e04b7ae2384f9237da06", + "data_object_type": "Kraken2 Krona Plot", + "url": "https://data.microbiomedata.org/data/nmdc:omprc-11-vykcbs96/nmdc:wfrbt-11-592st706.1/nmdc_wfrbt-11-592st706.1_kraken2_krona.html", + "type": "nmdc:DataObject" + } + ], + "metagenome_assembly_set": [ + { + "id": "nmdc:wfmgas-11-d89ach23.1", + "name": "Metagenome Assembly Activity for nmdc:omprc-11-vykcbs96", + "started_at_time": "2021-10-11T02:24:42Z", + "ended_at_time": "2021-10-11T04:07:11+00:00", + "was_informed_by": "nmdc:omprc-11-vykcbs96", + "execution_resource": "NERSC-Cori", + "git_url": "https://github.com/microbiomedata/metaAssembly", + "has_input": [ + "nmdc:dobj-11-zc55t718" + ], + "has_output": [ + "nmdc:dobj-11-ezgkmr74", + "nmdc:dobj-11-n8tdap98", + "nmdc:dobj-11-sct98y90", + "nmdc:dobj-11-qfvegn79", + "nmdc:dobj-11-rx358g02" + ], + "type": "nmdc:MetagenomeAssembly", + "part_of": [ + "nmdc:omprc-11-vykcbs96" + ], + "version": "v1.0.2", + "asm_score": 2.823, + "scaffolds": 106821, + "scaf_logsum": 65979, + "scaf_powsum": 6995.401, + "scaf_max": 6924, + "scaf_bp": 45474295, + "scaf_n50": 33825, + "scaf_n90": 92004, + "scaf_l50": 395, + "scaf_l90": 284, + "contigs": 106865, + "contig_bp": 45473855, + "ctg_n50": 33845, + "ctg_l50": 395, + "ctg_n90": 92046, + "ctg_l90": 284, + "ctg_logsum": 65663, + "ctg_powsum": 6960.932, + "ctg_max": 6924, + "gap_pct": 0.00097, + "gc_std": 0.12472, + "gc_avg": 0.58373 + } + ], + "omics_processing_set": [ + { + "id": "nmdc:omprc-11-vykcbs96", + "name": "Riverbed sediment microbial communities from areas with no vegetation in Columbia River, Washington, USA - GW-RW N1_40_50", + "description": "Riverbed sediment samples were collected from an area with no vegetation in a hyporheic zone (subsurface groundwater - surface water mixing zone)", + "has_input": [ + "nmdc:bsm-11-ffqcqd73" + ], + "add_date": "2016-01-11", + "gold_sequencing_project_identifiers": [ + "gold:Gp0127645" + ], + "has_output": [ + "nmdc:dobj-11-yjmvgb38" + ], + "mod_date": "2021-06-15", + "ncbi_project_name": "Riverbed sediment microbial communities from areas with no vegetation in Columbia River, Washington, USA - GW-RW N1_40_50", + "omics_type": { + "has_raw_value": "Metagenome" + }, + "part_of": [ + "nmdc:sty-11-aygzgv51" + ], + "principal_investigator": { + "has_raw_value": "James Stegen" + }, + "processing_institution": "JGI", + "type": "nmdc:OmicsProcessing" + } + ], + "read_qc_analysis_activity_set": [ + { + "id": "nmdc:wfrqc-11-56n56z63.1", + "name": "Read QC Activity for nmdc:omprc-11-vykcbs96", + "started_at_time": "2021-10-11T02:24:42Z", + "ended_at_time": "2021-10-11T04:07:11+00:00", + "was_informed_by": "nmdc:omprc-11-vykcbs96", + "execution_resource": "NERSC-Cori", + "git_url": "https://github.com/microbiomedata/ReadsQC", + "has_input": [ + "nmdc:dobj-11-yjmvgb38" + ], + "has_output": [ + "nmdc:dobj-11-zc55t718", + "nmdc:dobj-11-g0wt6g71" + ], + "type": "nmdc:ReadQcAnalysisActivity", + "part_of": [ + "nmdc:omprc-11-vykcbs96" + ], + "version": "v1.0.2", + "input_read_count": 24139032, + "output_read_count": 23262948, + "input_read_bases": 3644993832, + "output_read_bases": 3475317024 + } + ], + "read_based_taxonomy_analysis_activity_set": [ + { + "id": "nmdc:wfrbt-11-592st706.1", + "name": "Readbased Taxonomy Analysis Activity for nmdc:omprc-11-vykcbs96", + "started_at_time": "2021-10-11T02:24:42Z", + "ended_at_time": "2021-10-11T04:07:11+00:00", + "was_informed_by": "nmdc:omprc-11-vykcbs96", + "execution_resource": "NERSC-Cori", + "git_url": "https://github.com/microbiomedata/ReadbasedAnalysis", + "has_input": [ + "nmdc:dobj-11-zc55t718" + ], + "has_output": [ + "nmdc:dobj-11-qmacty57", + "nmdc:dobj-11-wrqbt484", + "nmdc:dobj-11-3w4gr946", + "nmdc:dobj-11-m53pkn77", + "nmdc:dobj-11-czq9e865", + "nmdc:dobj-11-9b8cw604", + "nmdc:dobj-11-vvyxbc66", + "nmdc:dobj-11-9bsj7r69", + "nmdc:dobj-11-xh5sb174" + ], + "type": "nmdc:ReadBasedTaxonomyAnalysisActivity", + "part_of": [ + "nmdc:omprc-11-vykcbs96" + ], + "version": "v1.0.2" + } + ] + }, + { + "data_object_set": [ + { + "id": "nmdc:dobj-11-pr8zqm21", + "name": "10533.2.165322.GAACGCT-AAGCGTT.fastq.gz", + "description": "Raw sequencer read data", + "file_size_bytes": 2196954131, + "data_object_type": "Metagenome Raw Reads", + "type": "nmdc:DataObject" + }, + { + "id": "nmdc:dobj-11-pd5ekz58", + "name": "nmdc_wfrqc-11-stdh6256.1_filtered.fastq.gz", + "description": "Filtered Reads for nmdc:omprc-11-dw7shd52", + "file_size_bytes": 1967546513, + "md5_checksum": "ed0ea2f2ef6b667c5f8e60cd7d197cf5", + "data_object_type": "Filtered Sequencing Reads", + "url": "https://data.microbiomedata.org/data/nmdc:omprc-11-dw7shd52/nmdc:wfrqc-11-stdh6256.1/nmdc_wfrqc-11-stdh6256.1_filtered.fastq.gz", + "type": "nmdc:DataObject" + }, + { + "id": "nmdc:dobj-11-n3y0vj17", + "name": "nmdc_wfrqc-11-stdh6256.1_filterStats.txt", + "description": "Filtered Stats for nmdc:omprc-11-dw7shd52", + "file_size_bytes": 283, + "md5_checksum": "25a7ff469ffae5906d6ade4d74cab88f", + "data_object_type": "QC Statistics", + "url": "https://data.microbiomedata.org/data/nmdc:omprc-11-dw7shd52/nmdc:wfrqc-11-stdh6256.1/nmdc_wfrqc-11-stdh6256.1_filterStats.txt", + "type": "nmdc:DataObject" + }, + { + "id": "nmdc:dobj-11-958r0f53", + "name": "nmdc_wfmgas-11-nwb00j66.1_contigs.fna", + "description": "Assembled contigs fasta for nmdc:omprc-11-dw7shd52", + "file_size_bytes": 95992786, + "md5_checksum": "59c6d3527075253a0329011df613784d", + "data_object_type": "Assembly Contigs", + "url": "https://data.microbiomedata.org/data/nmdc:omprc-11-dw7shd52/nmdc:wfmgas-11-nwb00j66.1/nmdc_wfmgas-11-nwb00j66.1_contigs.fna", + "type": "nmdc:DataObject" + }, + { + "id": "nmdc:dobj-11-sv6dw724", + "name": "nmdc_wfmgas-11-nwb00j66.1_scaffolds.fna", + "description": "Assembled scaffold fasta for nmdc:omprc-11-dw7shd52", + "file_size_bytes": 95419852, + "md5_checksum": "a7b4f3b3ba90131b91e41114f199b61a", + "data_object_type": "Assembly Scaffolds", + "url": "https://data.microbiomedata.org/data/nmdc:omprc-11-dw7shd52/nmdc:wfmgas-11-nwb00j66.1/nmdc_wfmgas-11-nwb00j66.1_scaffolds.fna", + "type": "nmdc:DataObject" + }, + { + "id": "nmdc:dobj-11-athrxh93", + "name": "nmdc_wfmgas-11-nwb00j66.1_covstats.txt", + "description": "Metagenome Contig Coverage Stats for nmdc:omprc-11-dw7shd52", + "file_size_bytes": 16169788, + "md5_checksum": "58f366fbc8419c332e619b555d39b1b1", + "data_object_type": "Assembly Coverage Stats", + "url": "https://data.microbiomedata.org/data/nmdc:omprc-11-dw7shd52/nmdc:wfmgas-11-nwb00j66.1/nmdc_wfmgas-11-nwb00j66.1_covstats.txt", + "type": "nmdc:DataObject" + }, + { + "id": "nmdc:dobj-11-yebnnj95", + "name": "nmdc_wfmgas-11-nwb00j66.1_assembly.agp", + "description": "Assembled AGP file for nmdc:omprc-11-dw7shd52", + "file_size_bytes": 16338899, + "md5_checksum": "f5cc92a4f0084bb3f67f00ac2b723a9e", + "data_object_type": "Assembly AGP", + "url": "https://data.microbiomedata.org/data/nmdc:omprc-11-dw7shd52/nmdc:wfmgas-11-nwb00j66.1/nmdc_wfmgas-11-nwb00j66.1_assembly.agp", + "type": "nmdc:DataObject" + }, + { + "id": "nmdc:dobj-11-bzahgz31", + "name": "nmdc_wfmgas-11-nwb00j66.1_pairedMapped_sorted.bam", + "description": "Metagenome Alignment BAM file for nmdc:omprc-11-dw7shd52", + "file_size_bytes": 0, + "md5_checksum": "d41d8cd98f00b204e9800998ecf8427e", + "data_object_type": "Assembly Coverage BAM", + "url": "https://data.microbiomedata.org/data/nmdc:omprc-11-dw7shd52/nmdc:wfmgas-11-nwb00j66.1/nmdc_wfmgas-11-nwb00j66.1_pairedMapped_sorted.bam", + "type": "nmdc:DataObject" + }, + { + "id": "nmdc:dobj-11-rrpb0g41", + "name": "nmdc_wfrbt-11-pb58eb54.1_gottcha2_report.tsv", + "description": "Gottcha2 TSV report for nmdc:omprc-11-dw7shd52", + "file_size_bytes": 2079, + "md5_checksum": "c30cb5928ad608e7c8fe1ce77d81933a", + "data_object_type": "GOTTCHA2 Classification Report", + "url": "https://data.microbiomedata.org/data/nmdc:omprc-11-dw7shd52/nmdc:wfrbt-11-pb58eb54.1/nmdc_wfrbt-11-pb58eb54.1_gottcha2_report.tsv", + "type": "nmdc:DataObject" + }, + { + "id": "nmdc:dobj-11-ep3hb794", + "name": "nmdc_wfrbt-11-pb58eb54.1_gottcha2_report_full.tsv", + "description": "Gottcha2 full TSV report for nmdc:omprc-11-dw7shd52", + "file_size_bytes": 642861, + "md5_checksum": "4aa159b1ee973c6e3e309ef60d351018", + "data_object_type": "GOTTCHA2 Report Full", + "url": "https://data.microbiomedata.org/data/nmdc:omprc-11-dw7shd52/nmdc:wfrbt-11-pb58eb54.1/nmdc_wfrbt-11-pb58eb54.1_gottcha2_report_full.tsv", + "type": "nmdc:DataObject" + }, + { + "id": "nmdc:dobj-11-byx14935", + "name": "nmdc_wfrbt-11-pb58eb54.1_gottcha2_krona.html", + "description": "Gottcha2 Krona HTML report for nmdc:omprc-11-dw7shd52", + "file_size_bytes": 230792, + "md5_checksum": "8c1683fa4041bd10711aa3beb4735811", + "data_object_type": "GOTTCHA2 Krona Plot", + "url": "https://data.microbiomedata.org/data/nmdc:omprc-11-dw7shd52/nmdc:wfrbt-11-pb58eb54.1/nmdc_wfrbt-11-pb58eb54.1_gottcha2_krona.html", + "type": "nmdc:DataObject" + }, + { + "id": "nmdc:dobj-11-rkrtcp34", + "name": "nmdc_wfrbt-11-pb58eb54.1_centrifuge_classification.tsv", + "description": "Centrifuge classification TSV report for nmdc:omprc-11-dw7shd52", + "file_size_bytes": 1743695420, + "md5_checksum": "b8be7144441cbd6fbe4a8193f9e055ab", + "data_object_type": "Centrifuge Taxonomic Classification", + "url": "https://data.microbiomedata.org/data/nmdc:omprc-11-dw7shd52/nmdc:wfrbt-11-pb58eb54.1/nmdc_wfrbt-11-pb58eb54.1_centrifuge_classification.tsv", + "type": "nmdc:DataObject" + }, + { + "id": "nmdc:dobj-11-6t23xp27", + "name": "nmdc_wfrbt-11-pb58eb54.1_centrifuge_report.tsv", + "description": "Centrifuge TSV report for nmdc:omprc-11-dw7shd52", + "file_size_bytes": 254036, + "md5_checksum": "d4f57641e41f0249f3fde7b973289cf5", + "data_object_type": "Centrifuge Classification Report", + "url": "https://data.microbiomedata.org/data/nmdc:omprc-11-dw7shd52/nmdc:wfrbt-11-pb58eb54.1/nmdc_wfrbt-11-pb58eb54.1_centrifuge_report.tsv", + "type": "nmdc:DataObject" + }, + { + "id": "nmdc:dobj-11-rxh35157", + "name": "nmdc_wfrbt-11-pb58eb54.1_centrifuge_krona.html", + "description": "Centrifuge Krona HTML report for nmdc:omprc-11-dw7shd52", + "file_size_bytes": 2332943, + "md5_checksum": "4e9ec619c5611cb0166ea127496fadeb", + "data_object_type": "Centrifuge Krona Plot", + "url": "https://data.microbiomedata.org/data/nmdc:omprc-11-dw7shd52/nmdc:wfrbt-11-pb58eb54.1/nmdc_wfrbt-11-pb58eb54.1_centrifuge_krona.html", + "type": "nmdc:DataObject" + }, + { + "id": "nmdc:dobj-11-j3rjym54", + "name": "nmdc_wfrbt-11-pb58eb54.1_kraken2_classification.tsv", + "description": "Kraken classification TSV report for nmdc:omprc-11-dw7shd52", + "file_size_bytes": 1387669799, + "md5_checksum": "ed2b2495ca211e17298ca2e212fe3811", + "data_object_type": "Kraken2 Taxonomic Classification", + "url": "https://data.microbiomedata.org/data/nmdc:omprc-11-dw7shd52/nmdc:wfrbt-11-pb58eb54.1/nmdc_wfrbt-11-pb58eb54.1_kraken2_classification.tsv", + "type": "nmdc:DataObject" + }, + { + "id": "nmdc:dobj-11-v6wzv743", + "name": "nmdc_wfrbt-11-pb58eb54.1_kraken2_report.tsv", + "description": "Kraken2 TSV report for nmdc:omprc-11-dw7shd52", + "file_size_bytes": 637131, + "md5_checksum": "05d35fc4e391296ff0e716c3fcbbee89", + "data_object_type": "Kraken2 Classification Report", + "url": "https://data.microbiomedata.org/data/nmdc:omprc-11-dw7shd52/nmdc:wfrbt-11-pb58eb54.1/nmdc_wfrbt-11-pb58eb54.1_kraken2_report.tsv", + "type": "nmdc:DataObject" + }, + { + "id": "nmdc:dobj-11-f933wm58", + "name": "nmdc_wfrbt-11-pb58eb54.1_kraken2_krona.html", + "description": "Kraken2 Krona HTML report for nmdc:omprc-11-dw7shd52", + "file_size_bytes": 3976407, + "md5_checksum": "0d07551972f3230ec2ef4a0e04929b97", + "data_object_type": "Kraken2 Krona Plot", + "url": "https://data.microbiomedata.org/data/nmdc:omprc-11-dw7shd52/nmdc:wfrbt-11-pb58eb54.1/nmdc_wfrbt-11-pb58eb54.1_kraken2_krona.html", + "type": "nmdc:DataObject" + } + ], + "metagenome_assembly_set": [ + { + "id": "nmdc:wfmgas-11-nwb00j66.1", + "name": "Metagenome Assembly Activity for nmdc:omprc-11-dw7shd52", + "started_at_time": "2021-10-11T02:23:29Z", + "ended_at_time": "2021-10-11T03:38:32+00:00", + "was_informed_by": "nmdc:omprc-11-dw7shd52", + "execution_resource": "NERSC-Cori", + "git_url": "https://github.com/microbiomedata/metaAssembly", + "has_input": [ + "nmdc:dobj-11-pd5ekz58" + ], + "has_output": [ + "nmdc:dobj-11-958r0f53", + "nmdc:dobj-11-sv6dw724", + "nmdc:dobj-11-athrxh93", + "nmdc:dobj-11-yebnnj95", + "nmdc:dobj-11-bzahgz31" + ], + "type": "nmdc:MetagenomeAssembly", + "part_of": [ + "nmdc:omprc-11-dw7shd52" + ], + "version": "v1.0.2", + "asm_score": 3.279, + "scaffolds": 189880, + "scaf_logsum": 158661, + "scaf_powsum": 16967, + "scaf_max": 28201, + "scaf_bp": 87529475, + "scaf_n50": 57416, + "scaf_n90": 160823, + "scaf_l50": 440, + "scaf_l90": 289, + "contigs": 190009, + "contig_bp": 87528185, + "ctg_n50": 57445, + "ctg_l50": 440, + "ctg_n90": 160942, + "ctg_l90": 289, + "ctg_logsum": 157844, + "ctg_powsum": 16877, + "ctg_max": 28201, + "gap_pct": 0.00147, + "gc_std": 0.09385, + "gc_avg": 0.62766 + } + ], + "omics_processing_set": [ + { + "id": "nmdc:omprc-11-dw7shd52", + "name": "Riverbed sediment microbial communities from areas with vegetation nearby in Columbia River, Washington, USA - GW-RW S2_50_60", + "description": "Riverbed sediment samples were collected from an area with a lot of surrounding vegetation in a hyporheic zone (subsurface groundwater - surface water mixing zone)", + "has_input": [ + "nmdc:bsm-11-7fedgs13" + ], + "add_date": "2016-01-11", + "gold_sequencing_project_identifiers": [ + "gold:Gp0127649" + ], + "has_output": [ + "nmdc:dobj-11-pr8zqm21" + ], + "mod_date": "2021-06-15", + "ncbi_project_name": "Riverbed sediment microbial communities from areas with vegetation nearby in Columbia River, Washington, USA - GW-RW S2_50_60", + "omics_type": { + "has_raw_value": "Metagenome" + }, + "part_of": [ + "nmdc:sty-11-aygzgv51" + ], + "principal_investigator": { + "has_raw_value": "James Stegen" + }, + "processing_institution": "JGI", + "type": "nmdc:OmicsProcessing" + } + ], + "read_qc_analysis_activity_set": [ + { + "id": "nmdc:wfrqc-11-stdh6256.1", + "name": "Read QC Activity for nmdc:omprc-11-dw7shd52", + "started_at_time": "2021-10-11T02:23:29Z", + "ended_at_time": "2021-10-11T03:38:32+00:00", + "was_informed_by": "nmdc:omprc-11-dw7shd52", + "execution_resource": "NERSC-Cori", + "git_url": "https://github.com/microbiomedata/ReadsQC", + "has_input": [ + "nmdc:dobj-11-pr8zqm21" + ], + "has_output": [ + "nmdc:dobj-11-pd5ekz58", + "nmdc:dobj-11-n3y0vj17" + ], + "type": "nmdc:ReadQcAnalysisActivity", + "part_of": [ + "nmdc:omprc-11-dw7shd52" + ], + "version": "v1.0.2", + "input_read_count": 24889788, + "output_read_count": 23803802, + "input_read_bases": 3758357988, + "output_read_bases": 3558782964 + } + ], + "read_based_taxonomy_analysis_activity_set": [ + { + "id": "nmdc:wfrbt-11-pb58eb54.1", + "name": "Readbased Taxonomy Analysis Activity for nmdc:omprc-11-dw7shd52", + "started_at_time": "2021-10-11T02:23:29Z", + "ended_at_time": "2021-10-11T03:38:32+00:00", + "was_informed_by": "nmdc:omprc-11-dw7shd52", + "execution_resource": "NERSC-Cori", + "git_url": "https://github.com/microbiomedata/ReadbasedAnalysis", + "has_input": [ + "nmdc:dobj-11-pd5ekz58" + ], + "has_output": [ + "nmdc:dobj-11-rrpb0g41", + "nmdc:dobj-11-ep3hb794", + "nmdc:dobj-11-byx14935", + "nmdc:dobj-11-rkrtcp34", + "nmdc:dobj-11-6t23xp27", + "nmdc:dobj-11-rxh35157", + "nmdc:dobj-11-j3rjym54", + "nmdc:dobj-11-v6wzv743", + "nmdc:dobj-11-f933wm58" + ], + "type": "nmdc:ReadBasedTaxonomyAnalysisActivity", + "part_of": [ + "nmdc:omprc-11-dw7shd52" + ], + "version": "v1.0.2" + } + ] + }, + { + "data_object_set": [ + { + "id": "nmdc:dobj-11-srrrsz66", + "name": "10533.3.165334.CAATCGA-GTCGATT.fastq.gz", + "description": "Raw sequencer read data", + "file_size_bytes": 2318220660, + "data_object_type": "Metagenome Raw Reads", + "type": "nmdc:DataObject" + }, + { + "id": "nmdc:dobj-11-8jwtwf15", + "name": "nmdc_wfrqc-11-r06gdw37.1_filtered.fastq.gz", + "description": "Filtered Reads for nmdc:omprc-11-j43hz774", + "file_size_bytes": 2019434951, + "md5_checksum": "60f03b815160b29125c2bd0776a330bf", + "data_object_type": "Filtered Sequencing Reads", + "url": "https://data.microbiomedata.org/data/nmdc:omprc-11-j43hz774/nmdc:wfrqc-11-r06gdw37.1/nmdc_wfrqc-11-r06gdw37.1_filtered.fastq.gz", + "type": "nmdc:DataObject" + }, + { + "id": "nmdc:dobj-11-cxrk0y07", + "name": "nmdc_wfrqc-11-r06gdw37.1_filterStats.txt", + "description": "Filtered Stats for nmdc:omprc-11-j43hz774", + "file_size_bytes": 290, + "md5_checksum": "c40fa552711f6b19130b2a559f2d4cdc", + "data_object_type": "QC Statistics", + "url": "https://data.microbiomedata.org/data/nmdc:omprc-11-j43hz774/nmdc:wfrqc-11-r06gdw37.1/nmdc_wfrqc-11-r06gdw37.1_filterStats.txt", + "type": "nmdc:DataObject" + }, + { + "id": "nmdc:dobj-11-853q0r09", + "name": "nmdc_wfmgas-11-xmw7nz24.1_contigs.fna", + "description": "Assembled contigs fasta for nmdc:omprc-11-j43hz774", + "file_size_bytes": 118373353, + "md5_checksum": "b592bacdb0908a77aec0a4d9fa75f0c6", + "data_object_type": "Assembly Contigs", + "url": "https://data.microbiomedata.org/data/nmdc:omprc-11-j43hz774/nmdc:wfmgas-11-xmw7nz24.1/nmdc_wfmgas-11-xmw7nz24.1_contigs.fna", + "type": "nmdc:DataObject" + }, + { + "id": "nmdc:dobj-11-tfbp1p53", + "name": "nmdc_wfmgas-11-xmw7nz24.1_scaffolds.fna", + "description": "Assembled scaffold fasta for nmdc:omprc-11-j43hz774", + "file_size_bytes": 117719951, + "md5_checksum": "f892cf66ae05c1c986175e76be1985cc", + "data_object_type": "Assembly Scaffolds", + "url": "https://data.microbiomedata.org/data/nmdc:omprc-11-j43hz774/nmdc:wfmgas-11-xmw7nz24.1/nmdc_wfmgas-11-xmw7nz24.1_scaffolds.fna", + "type": "nmdc:DataObject" + }, + { + "id": "nmdc:dobj-11-v9jba850", + "name": "nmdc_wfmgas-11-xmw7nz24.1_covstats.txt", + "description": "Metagenome Contig Coverage Stats for nmdc:omprc-11-j43hz774", + "file_size_bytes": 18439149, + "md5_checksum": "21084dca856113dffa7c8a25b0137914", + "data_object_type": "Assembly Coverage Stats", + "url": "https://data.microbiomedata.org/data/nmdc:omprc-11-j43hz774/nmdc:wfmgas-11-xmw7nz24.1/nmdc_wfmgas-11-xmw7nz24.1_covstats.txt", + "type": "nmdc:DataObject" + }, + { + "id": "nmdc:dobj-11-2n7x0w81", + "name": "nmdc_wfmgas-11-xmw7nz24.1_assembly.agp", + "description": "Assembled AGP file for nmdc:omprc-11-j43hz774", + "file_size_bytes": 18641775, + "md5_checksum": "d45edd3fe502dc12bef85450f0806696", + "data_object_type": "Assembly AGP", + "url": "https://data.microbiomedata.org/data/nmdc:omprc-11-j43hz774/nmdc:wfmgas-11-xmw7nz24.1/nmdc_wfmgas-11-xmw7nz24.1_assembly.agp", + "type": "nmdc:DataObject" + }, + { + "id": "nmdc:dobj-11-bbh3xh85", + "name": "nmdc_wfmgas-11-xmw7nz24.1_pairedMapped_sorted.bam", + "description": "Metagenome Alignment BAM file for nmdc:omprc-11-j43hz774", + "file_size_bytes": 0, + "md5_checksum": "d41d8cd98f00b204e9800998ecf8427e", + "data_object_type": "Assembly Coverage BAM", + "url": "https://data.microbiomedata.org/data/nmdc:omprc-11-j43hz774/nmdc:wfmgas-11-xmw7nz24.1/nmdc_wfmgas-11-xmw7nz24.1_pairedMapped_sorted.bam", + "type": "nmdc:DataObject" + }, + { + "id": "nmdc:dobj-11-674tr692", + "name": "nmdc_wfrbt-11-91d3ee31.1_gottcha2_report.tsv", + "description": "Gottcha2 TSV report for nmdc:omprc-11-j43hz774", + "file_size_bytes": 1524, + "md5_checksum": "70f29a321c925cfc0e2003515f708400", + "data_object_type": "GOTTCHA2 Classification Report", + "url": "https://data.microbiomedata.org/data/nmdc:omprc-11-j43hz774/nmdc:wfrbt-11-91d3ee31.1/nmdc_wfrbt-11-91d3ee31.1_gottcha2_report.tsv", + "type": "nmdc:DataObject" + }, + { + "id": "nmdc:dobj-11-mygn5j98", + "name": "nmdc_wfrbt-11-91d3ee31.1_gottcha2_report_full.tsv", + "description": "Gottcha2 full TSV report for nmdc:omprc-11-j43hz774", + "file_size_bytes": 670250, + "md5_checksum": "93d5419c0b31e0696ab8ffef477945fb", + "data_object_type": "GOTTCHA2 Report Full", + "url": "https://data.microbiomedata.org/data/nmdc:omprc-11-j43hz774/nmdc:wfrbt-11-91d3ee31.1/nmdc_wfrbt-11-91d3ee31.1_gottcha2_report_full.tsv", + "type": "nmdc:DataObject" + }, + { + "id": "nmdc:dobj-11-vph75a11", + "name": "nmdc_wfrbt-11-91d3ee31.1_gottcha2_krona.html", + "description": "Gottcha2 Krona HTML report for nmdc:omprc-11-j43hz774", + "file_size_bytes": 229949, + "md5_checksum": "9cd3b2939adabd809741ae6a84260266", + "data_object_type": "GOTTCHA2 Krona Plot", + "url": "https://data.microbiomedata.org/data/nmdc:omprc-11-j43hz774/nmdc:wfrbt-11-91d3ee31.1/nmdc_wfrbt-11-91d3ee31.1_gottcha2_krona.html", + "type": "nmdc:DataObject" + }, + { + "id": "nmdc:dobj-11-enag3083", + "name": "nmdc_wfrbt-11-91d3ee31.1_centrifuge_classification.tsv", + "description": "Centrifuge classification TSV report for nmdc:omprc-11-j43hz774", + "file_size_bytes": 1814515284, + "md5_checksum": "acea91fced8993a40cf1eb9cda29c4cd", + "data_object_type": "Centrifuge Taxonomic Classification", + "url": "https://data.microbiomedata.org/data/nmdc:omprc-11-j43hz774/nmdc:wfrbt-11-91d3ee31.1/nmdc_wfrbt-11-91d3ee31.1_centrifuge_classification.tsv", + "type": "nmdc:DataObject" + }, + { + "id": "nmdc:dobj-11-2jjars57", + "name": "nmdc_wfrbt-11-91d3ee31.1_centrifuge_report.tsv", + "description": "Centrifuge TSV report for nmdc:omprc-11-j43hz774", + "file_size_bytes": 253730, + "md5_checksum": "b623a0d3bdff34fb97530c74bb558aeb", + "data_object_type": "Centrifuge Classification Report", + "url": "https://data.microbiomedata.org/data/nmdc:omprc-11-j43hz774/nmdc:wfrbt-11-91d3ee31.1/nmdc_wfrbt-11-91d3ee31.1_centrifuge_report.tsv", + "type": "nmdc:DataObject" + }, + { + "id": "nmdc:dobj-11-fm6p7a57", + "name": "nmdc_wfrbt-11-91d3ee31.1_centrifuge_krona.html", + "description": "Centrifuge Krona HTML report for nmdc:omprc-11-j43hz774", + "file_size_bytes": 2330558, + "md5_checksum": "e461b2e81a22514fcd691caeaa7b0ca1", + "data_object_type": "Centrifuge Krona Plot", + "url": "https://data.microbiomedata.org/data/nmdc:omprc-11-j43hz774/nmdc:wfrbt-11-91d3ee31.1/nmdc_wfrbt-11-91d3ee31.1_centrifuge_krona.html", + "type": "nmdc:DataObject" + }, + { + "id": "nmdc:dobj-11-fx5bgj56", + "name": "nmdc_wfrbt-11-91d3ee31.1_kraken2_classification.tsv", + "description": "Kraken classification TSV report for nmdc:omprc-11-j43hz774", + "file_size_bytes": 1445957300, + "md5_checksum": "38b7c63d0157f8bf4316f4295f0e6e28", + "data_object_type": "Kraken2 Taxonomic Classification", + "url": "https://data.microbiomedata.org/data/nmdc:omprc-11-j43hz774/nmdc:wfrbt-11-91d3ee31.1/nmdc_wfrbt-11-91d3ee31.1_kraken2_classification.tsv", + "type": "nmdc:DataObject" + }, + { + "id": "nmdc:dobj-11-345fcs52", + "name": "nmdc_wfrbt-11-91d3ee31.1_kraken2_report.tsv", + "description": "Kraken2 TSV report for nmdc:omprc-11-j43hz774", + "file_size_bytes": 639677, + "md5_checksum": "be0c2bc71cefcb0f0a23d270d047f30c", + "data_object_type": "Kraken2 Classification Report", + "url": "https://data.microbiomedata.org/data/nmdc:omprc-11-j43hz774/nmdc:wfrbt-11-91d3ee31.1/nmdc_wfrbt-11-91d3ee31.1_kraken2_report.tsv", + "type": "nmdc:DataObject" + }, + { + "id": "nmdc:dobj-11-mfyyks47", + "name": "nmdc_wfrbt-11-91d3ee31.1_kraken2_krona.html", + "description": "Kraken2 Krona HTML report for nmdc:omprc-11-j43hz774", + "file_size_bytes": 3991377, + "md5_checksum": "1df4b479c887b43319d89cc80dc35239", + "data_object_type": "Kraken2 Krona Plot", + "url": "https://data.microbiomedata.org/data/nmdc:omprc-11-j43hz774/nmdc:wfrbt-11-91d3ee31.1/nmdc_wfrbt-11-91d3ee31.1_kraken2_krona.html", + "type": "nmdc:DataObject" + } + ], + "metagenome_assembly_set": [ + { + "id": "nmdc:wfmgas-11-xmw7nz24.1", + "name": "Metagenome Assembly Activity for nmdc:omprc-11-j43hz774", + "started_at_time": "2021-10-11T02:27:08Z", + "ended_at_time": "2021-10-11T04:45:21+00:00", + "was_informed_by": "nmdc:omprc-11-j43hz774", + "execution_resource": "NERSC-Cori", + "git_url": "https://github.com/microbiomedata/metaAssembly", + "has_input": [ + "nmdc:dobj-11-8jwtwf15" + ], + "has_output": [ + "nmdc:dobj-11-853q0r09", + "nmdc:dobj-11-tfbp1p53", + "nmdc:dobj-11-v9jba850", + "nmdc:dobj-11-2n7x0w81", + "nmdc:dobj-11-bbh3xh85" + ], + "type": "nmdc:MetagenomeAssembly", + "part_of": [ + "nmdc:omprc-11-j43hz774" + ], + "version": "v1.0.2", + "asm_score": 3.266, + "scaffolds": 216046, + "scaf_logsum": 294510, + "scaf_powsum": 31903, + "scaf_max": 16883, + "scaf_bp": 108577150, + "scaf_n50": 56962, + "scaf_n90": 179563, + "scaf_l50": 493, + "scaf_l90": 290, + "contigs": 216252, + "contig_bp": 108575090, + "ctg_n50": 57034, + "ctg_l50": 493, + "ctg_n90": 179762, + "ctg_l90": 290, + "ctg_logsum": 293195, + "ctg_powsum": 31744, + "ctg_max": 16883, + "gap_pct": 0.0019, + "gc_std": 0.09516, + "gc_avg": 0.63494 + } + ], + "omics_processing_set": [ + { + "id": "nmdc:omprc-11-j43hz774", + "name": "Riverbed sediment microbial communities from areas with vegetation nearby in Columbia River, Washington, USA - GW-RW S3_50_60", + "description": "Riverbed sediment samples were collected from an area with a lot of surrounding vegetation in a hyporheic zone (subsurface groundwater - surface water mixing zone)", + "has_input": [ + "nmdc:bsm-11-xngecc18" + ], + "add_date": "2016-01-11", + "gold_sequencing_project_identifiers": [ + "gold:Gp0127652" + ], + "has_output": [ + "nmdc:dobj-11-srrrsz66" + ], + "mod_date": "2021-06-15", + "ncbi_project_name": "Riverbed sediment microbial communities from areas with vegetation nearby in Columbia River, Washington, USA - GW-RW S3_50_60", + "omics_type": { + "has_raw_value": "Metagenome" + }, + "part_of": [ + "nmdc:sty-11-aygzgv51" + ], + "principal_investigator": { + "has_raw_value": "James Stegen" + }, + "processing_institution": "JGI", + "type": "nmdc:OmicsProcessing" + } + ], + "read_qc_analysis_activity_set": [ + { + "id": "nmdc:wfrqc-11-r06gdw37.1", + "name": "Read QC Activity for nmdc:omprc-11-j43hz774", + "started_at_time": "2021-10-11T02:27:08Z", + "ended_at_time": "2021-10-11T04:45:21+00:00", + "was_informed_by": "nmdc:omprc-11-j43hz774", + "execution_resource": "NERSC-Cori", + "git_url": "https://github.com/microbiomedata/ReadsQC", + "has_input": [ + "nmdc:dobj-11-srrrsz66" + ], + "has_output": [ + "nmdc:dobj-11-8jwtwf15", + "nmdc:dobj-11-cxrk0y07" + ], + "type": "nmdc:ReadQcAnalysisActivity", + "part_of": [ + "nmdc:omprc-11-j43hz774" + ], + "version": "v1.0.2", + "input_read_count": 26604768, + "output_read_count": 24717950, + "input_read_bases": 4017319968, + "output_read_bases": 3697162034 + } + ], + "read_based_taxonomy_analysis_activity_set": [ + { + "id": "nmdc:wfrbt-11-91d3ee31.1", + "name": "Readbased Taxonomy Analysis Activity for nmdc:omprc-11-j43hz774", + "started_at_time": "2021-10-11T02:27:08Z", + "ended_at_time": "2021-10-11T04:45:21+00:00", + "was_informed_by": "nmdc:omprc-11-j43hz774", + "execution_resource": "NERSC-Cori", + "git_url": "https://github.com/microbiomedata/ReadbasedAnalysis", + "has_input": [ + "nmdc:dobj-11-8jwtwf15" + ], + "has_output": [ + "nmdc:dobj-11-674tr692", + "nmdc:dobj-11-mygn5j98", + "nmdc:dobj-11-vph75a11", + "nmdc:dobj-11-enag3083", + "nmdc:dobj-11-2jjars57", + "nmdc:dobj-11-fm6p7a57", + "nmdc:dobj-11-fx5bgj56", + "nmdc:dobj-11-345fcs52", + "nmdc:dobj-11-mfyyks47" + ], + "type": "nmdc:ReadBasedTaxonomyAnalysisActivity", + "part_of": [ + "nmdc:omprc-11-j43hz774" + ], + "version": "v1.0.2" + } + ] + }, + { + "data_object_set": [ + { + "id": "nmdc:dobj-11-qjrhfc82", + "name": "10533.3.165334.TGACTGA-GTCAGTC.fastq.gz", + "description": "Raw sequencer read data", + "file_size_bytes": 2711112988, + "data_object_type": "Metagenome Raw Reads", + "type": "nmdc:DataObject" + }, + { + "id": "nmdc:dobj-11-4d553h76", + "name": "nmdc_wfrqc-11-3xsr9d33.1_filtered.fastq.gz", + "description": "Filtered Reads for nmdc:omprc-11-kgxpef29", + "file_size_bytes": 2479437709, + "md5_checksum": "c4f29a07f3ce03ee2a2d11c90e8b43d6", + "data_object_type": "Filtered Sequencing Reads", + "url": "https://data.microbiomedata.org/data/nmdc:omprc-11-kgxpef29/nmdc:wfrqc-11-3xsr9d33.1/nmdc_wfrqc-11-3xsr9d33.1_filtered.fastq.gz", + "type": "nmdc:DataObject" + }, + { + "id": "nmdc:dobj-11-fg7s9m57", + "name": "nmdc_wfrqc-11-3xsr9d33.1_filterStats.txt", + "description": "Filtered Stats for nmdc:omprc-11-kgxpef29", + "file_size_bytes": 284, + "md5_checksum": "9c600ec3be94d876f00d22808f3e8a59", + "data_object_type": "QC Statistics", + "url": "https://data.microbiomedata.org/data/nmdc:omprc-11-kgxpef29/nmdc:wfrqc-11-3xsr9d33.1/nmdc_wfrqc-11-3xsr9d33.1_filterStats.txt", + "type": "nmdc:DataObject" + }, + { + "id": "nmdc:dobj-11-7jttmj78", + "name": "nmdc_wfmgas-11-jfrsvn56.1_contigs.fna", + "description": "Assembled contigs fasta for nmdc:omprc-11-kgxpef29", + "file_size_bytes": 94450971, + "md5_checksum": "f3a9e375dc651b30401413363579eb6f", + "data_object_type": "Assembly Contigs", + "url": "https://data.microbiomedata.org/data/nmdc:omprc-11-kgxpef29/nmdc:wfmgas-11-jfrsvn56.1/nmdc_wfmgas-11-jfrsvn56.1_contigs.fna", + "type": "nmdc:DataObject" + }, + { + "id": "nmdc:dobj-11-4rvvn578", + "name": "nmdc_wfmgas-11-jfrsvn56.1_scaffolds.fna", + "description": "Assembled scaffold fasta for nmdc:omprc-11-kgxpef29", + "file_size_bytes": 93856416, + "md5_checksum": "763f8b00f602e2b06dcec94e91b6bb81", + "data_object_type": "Assembly Scaffolds", + "url": "https://data.microbiomedata.org/data/nmdc:omprc-11-kgxpef29/nmdc:wfmgas-11-jfrsvn56.1/nmdc_wfmgas-11-jfrsvn56.1_scaffolds.fna", + "type": "nmdc:DataObject" + }, + { + "id": "nmdc:dobj-11-6w77qa31", + "name": "nmdc_wfmgas-11-jfrsvn56.1_covstats.txt", + "description": "Metagenome Contig Coverage Stats for nmdc:omprc-11-kgxpef29", + "file_size_bytes": 16819849, + "md5_checksum": "dd7429c2a60c6cb2db73d1695f6919f9", + "data_object_type": "Assembly Coverage Stats", + "url": "https://data.microbiomedata.org/data/nmdc:omprc-11-kgxpef29/nmdc:wfmgas-11-jfrsvn56.1/nmdc_wfmgas-11-jfrsvn56.1_covstats.txt", + "type": "nmdc:DataObject" + }, + { + "id": "nmdc:dobj-11-dq1yw319", + "name": "nmdc_wfmgas-11-jfrsvn56.1_assembly.agp", + "description": "Assembled AGP file for nmdc:omprc-11-kgxpef29", + "file_size_bytes": 16997209, + "md5_checksum": "3429e3ac4cbaf8f6ad4c74b68c15c08f", + "data_object_type": "Assembly AGP", + "url": "https://data.microbiomedata.org/data/nmdc:omprc-11-kgxpef29/nmdc:wfmgas-11-jfrsvn56.1/nmdc_wfmgas-11-jfrsvn56.1_assembly.agp", + "type": "nmdc:DataObject" + }, + { + "id": "nmdc:dobj-11-wfxn1v20", + "name": "nmdc_wfmgas-11-jfrsvn56.1_pairedMapped_sorted.bam", + "description": "Metagenome Alignment BAM file for nmdc:omprc-11-kgxpef29", + "file_size_bytes": 0, + "md5_checksum": "d41d8cd98f00b204e9800998ecf8427e", + "data_object_type": "Assembly Coverage BAM", + "url": "https://data.microbiomedata.org/data/nmdc:omprc-11-kgxpef29/nmdc:wfmgas-11-jfrsvn56.1/nmdc_wfmgas-11-jfrsvn56.1_pairedMapped_sorted.bam", + "type": "nmdc:DataObject" + }, + { + "id": "nmdc:dobj-11-zv66za24", + "name": "nmdc_wfrbt-11-wcrqqa38.1_gottcha2_report.tsv", + "description": "Gottcha2 TSV report for nmdc:omprc-11-kgxpef29", + "file_size_bytes": 3508, + "md5_checksum": "130ee7559789726a2cadccd3126dacad", + "data_object_type": "GOTTCHA2 Classification Report", + "url": "https://data.microbiomedata.org/data/nmdc:omprc-11-kgxpef29/nmdc:wfrbt-11-wcrqqa38.1/nmdc_wfrbt-11-wcrqqa38.1_gottcha2_report.tsv", + "type": "nmdc:DataObject" + }, + { + "id": "nmdc:dobj-11-cnj6at15", + "name": "nmdc_wfrbt-11-wcrqqa38.1_gottcha2_report_full.tsv", + "description": "Gottcha2 full TSV report for nmdc:omprc-11-kgxpef29", + "file_size_bytes": 798264, + "md5_checksum": "c955eae73afbfe1ad4c4eb2eac51f3f3", + "data_object_type": "GOTTCHA2 Report Full", + "url": "https://data.microbiomedata.org/data/nmdc:omprc-11-kgxpef29/nmdc:wfrbt-11-wcrqqa38.1/nmdc_wfrbt-11-wcrqqa38.1_gottcha2_report_full.tsv", + "type": "nmdc:DataObject" + }, + { + "id": "nmdc:dobj-11-ff14qt45", + "name": "nmdc_wfrbt-11-wcrqqa38.1_gottcha2_krona.html", + "description": "Gottcha2 Krona HTML report for nmdc:omprc-11-kgxpef29", + "file_size_bytes": 234834, + "md5_checksum": "7ccb4ee5a0728322154b29a79d13c842", + "data_object_type": "GOTTCHA2 Krona Plot", + "url": "https://data.microbiomedata.org/data/nmdc:omprc-11-kgxpef29/nmdc:wfrbt-11-wcrqqa38.1/nmdc_wfrbt-11-wcrqqa38.1_gottcha2_krona.html", + "type": "nmdc:DataObject" + }, + { + "id": "nmdc:dobj-11-tn4pqq71", + "name": "nmdc_wfrbt-11-wcrqqa38.1_centrifuge_classification.tsv", + "description": "Centrifuge classification TSV report for nmdc:omprc-11-kgxpef29", + "file_size_bytes": 2231971137, + "md5_checksum": "8b88e19f3d4f22c8bb71f66e7aec6dba", + "data_object_type": "Centrifuge Taxonomic Classification", + "url": "https://data.microbiomedata.org/data/nmdc:omprc-11-kgxpef29/nmdc:wfrbt-11-wcrqqa38.1/nmdc_wfrbt-11-wcrqqa38.1_centrifuge_classification.tsv", + "type": "nmdc:DataObject" + }, + { + "id": "nmdc:dobj-11-4cajbc27", + "name": "nmdc_wfrbt-11-wcrqqa38.1_centrifuge_report.tsv", + "description": "Centrifuge TSV report for nmdc:omprc-11-kgxpef29", + "file_size_bytes": 257151, + "md5_checksum": "35a0d72edac6c5e7f9c8ddf86c5534e0", + "data_object_type": "Centrifuge Classification Report", + "url": "https://data.microbiomedata.org/data/nmdc:omprc-11-kgxpef29/nmdc:wfrbt-11-wcrqqa38.1/nmdc_wfrbt-11-wcrqqa38.1_centrifuge_report.tsv", + "type": "nmdc:DataObject" + }, + { + "id": "nmdc:dobj-11-dhmhbp81", + "name": "nmdc_wfrbt-11-wcrqqa38.1_centrifuge_krona.html", + "description": "Centrifuge Krona HTML report for nmdc:omprc-11-kgxpef29", + "file_size_bytes": 2341088, + "md5_checksum": "f808a89810cdb2a911a5b5388b70ce94", + "data_object_type": "Centrifuge Krona Plot", + "url": "https://data.microbiomedata.org/data/nmdc:omprc-11-kgxpef29/nmdc:wfrbt-11-wcrqqa38.1/nmdc_wfrbt-11-wcrqqa38.1_centrifuge_krona.html", + "type": "nmdc:DataObject" + }, + { + "id": "nmdc:dobj-11-xegn7n45", + "name": "nmdc_wfrbt-11-wcrqqa38.1_kraken2_classification.tsv", + "description": "Kraken classification TSV report for nmdc:omprc-11-kgxpef29", + "file_size_bytes": 1782429285, + "md5_checksum": "dfc90170aa038c2425702be223cb2f23", + "data_object_type": "Kraken2 Taxonomic Classification", + "url": "https://data.microbiomedata.org/data/nmdc:omprc-11-kgxpef29/nmdc:wfrbt-11-wcrqqa38.1/nmdc_wfrbt-11-wcrqqa38.1_kraken2_classification.tsv", + "type": "nmdc:DataObject" + }, + { + "id": "nmdc:dobj-11-t8y3xh58", + "name": "nmdc_wfrbt-11-wcrqqa38.1_kraken2_report.tsv", + "description": "Kraken2 TSV report for nmdc:omprc-11-kgxpef29", + "file_size_bytes": 661482, + "md5_checksum": "84255d3bab9ea79151db5ad7bcbc677c", + "data_object_type": "Kraken2 Classification Report", + "url": "https://data.microbiomedata.org/data/nmdc:omprc-11-kgxpef29/nmdc:wfrbt-11-wcrqqa38.1/nmdc_wfrbt-11-wcrqqa38.1_kraken2_report.tsv", + "type": "nmdc:DataObject" + }, + { + "id": "nmdc:dobj-11-xmqsbx35", + "name": "nmdc_wfrbt-11-wcrqqa38.1_kraken2_krona.html", + "description": "Kraken2 Krona HTML report for nmdc:omprc-11-kgxpef29", + "file_size_bytes": 4020719, + "md5_checksum": "1c8339d96884c4a408de7804e00490d1", + "data_object_type": "Kraken2 Krona Plot", + "url": "https://data.microbiomedata.org/data/nmdc:omprc-11-kgxpef29/nmdc:wfrbt-11-wcrqqa38.1/nmdc_wfrbt-11-wcrqqa38.1_kraken2_krona.html", + "type": "nmdc:DataObject" + } + ], + "metagenome_assembly_set": [ + { + "id": "nmdc:wfmgas-11-jfrsvn56.1", + "name": "Metagenome Assembly Activity for nmdc:omprc-11-kgxpef29", + "started_at_time": "2021-10-11T02:23:29Z", + "ended_at_time": "2021-10-11T03:58:56+00:00", + "was_informed_by": "nmdc:omprc-11-kgxpef29", + "execution_resource": "NERSC-Cori", + "git_url": "https://github.com/microbiomedata/metaAssembly", + "has_input": [ + "nmdc:dobj-11-4d553h76" + ], + "has_output": [ + "nmdc:dobj-11-7jttmj78", + "nmdc:dobj-11-4rvvn578", + "nmdc:dobj-11-6w77qa31", + "nmdc:dobj-11-dq1yw319", + "nmdc:dobj-11-wfxn1v20" + ], + "type": "nmdc:MetagenomeAssembly", + "part_of": [ + "nmdc:omprc-11-kgxpef29" + ], + "version": "v1.0.2", + "asm_score": 4.409, + "scaffolds": 197600, + "scaf_logsum": 130537, + "scaf_powsum": 13961, + "scaf_max": 69027, + "scaf_bp": 85732440, + "scaf_n50": 62435, + "scaf_n90": 168596, + "scaf_l50": 404, + "scaf_l90": 286, + "scaf_n_gt50k": 1, + "scaf_l_gt50k": 69027, + "scaf_pct_gt50k": 0.080514446, + "contigs": 197669, + "contig_bp": 85731750, + "ctg_n50": 62467, + "ctg_l50": 404, + "ctg_n90": 168661, + "ctg_l90": 286, + "ctg_logsum": 130142, + "ctg_powsum": 13918, + "ctg_max": 69027, + "gap_pct": 0.0008, + "gc_std": 0.09749, + "gc_avg": 0.62891 + } + ], + "omics_processing_set": [ + { + "id": "nmdc:omprc-11-kgxpef29", + "name": "Riverbed sediment microbial communities from areas with vegetation nearby in Columbia River, Washington, USA - GW-RW S2_40_50", + "description": "Riverbed sediment samples were collected from an area with a lot of surrounding vegetation in a hyporheic zone (subsurface groundwater - surface water mixing zone)", + "has_input": [ + "nmdc:bsm-11-tpk9x619" + ], + "add_date": "2016-01-11", + "gold_sequencing_project_identifiers": [ + "gold:Gp0127654" + ], + "has_output": [ + "nmdc:dobj-11-qjrhfc82" + ], + "mod_date": "2021-06-15", + "ncbi_project_name": "Riverbed sediment microbial communities from areas with vegetation nearby in Columbia River, Washington, USA - GW-RW S2_40_50", + "omics_type": { + "has_raw_value": "Metagenome" + }, + "part_of": [ + "nmdc:sty-11-aygzgv51" + ], + "principal_investigator": { + "has_raw_value": "James Stegen" + }, + "processing_institution": "JGI", + "type": "nmdc:OmicsProcessing" + } + ], + "read_qc_analysis_activity_set": [ + { + "id": "nmdc:wfrqc-11-3xsr9d33.1", + "name": "Read QC Activity for nmdc:omprc-11-kgxpef29", + "started_at_time": "2021-10-11T02:23:29Z", + "ended_at_time": "2021-10-11T03:58:56+00:00", + "was_informed_by": "nmdc:omprc-11-kgxpef29", + "execution_resource": "NERSC-Cori", + "git_url": "https://github.com/microbiomedata/ReadsQC", + "has_input": [ + "nmdc:dobj-11-qjrhfc82" + ], + "has_output": [ + "nmdc:dobj-11-4d553h76", + "nmdc:dobj-11-fg7s9m57" + ], + "type": "nmdc:ReadQcAnalysisActivity", + "part_of": [ + "nmdc:omprc-11-kgxpef29" + ], + "version": "v1.0.2", + "input_read_count": 30951192, + "output_read_count": 30289044, + "input_read_bases": 4673629992, + "output_read_bases": 4526478748 + } + ], + "read_based_taxonomy_analysis_activity_set": [ + { + "id": "nmdc:wfrbt-11-wcrqqa38.1", + "name": "Readbased Taxonomy Analysis Activity for nmdc:omprc-11-kgxpef29", + "started_at_time": "2021-10-11T02:23:29Z", + "ended_at_time": "2021-10-11T03:58:56+00:00", + "was_informed_by": "nmdc:omprc-11-kgxpef29", + "execution_resource": "NERSC-Cori", + "git_url": "https://github.com/microbiomedata/ReadbasedAnalysis", + "has_input": [ + "nmdc:dobj-11-4d553h76" + ], + "has_output": [ + "nmdc:dobj-11-zv66za24", + "nmdc:dobj-11-cnj6at15", + "nmdc:dobj-11-ff14qt45", + "nmdc:dobj-11-tn4pqq71", + "nmdc:dobj-11-4cajbc27", + "nmdc:dobj-11-dhmhbp81", + "nmdc:dobj-11-xegn7n45", + "nmdc:dobj-11-t8y3xh58", + "nmdc:dobj-11-xmqsbx35" + ], + "type": "nmdc:ReadBasedTaxonomyAnalysisActivity", + "part_of": [ + "nmdc:omprc-11-kgxpef29" + ], + "version": "v1.0.2" + } + ] + }, + { + "data_object_set": [ + { + "id": "nmdc:dobj-11-zqgnfj09", + "name": "10533.3.165334.ACGATGA-GTCATCG.fastq.gz", + "description": "Raw sequencer read data", + "file_size_bytes": 2411560282, + "data_object_type": "Metagenome Raw Reads", + "type": "nmdc:DataObject" + }, + { + "id": "nmdc:dobj-11-q3wm2a75", + "name": "nmdc_wfrqc-11-9z2pkg86.1_filtered.fastq.gz", + "description": "Filtered Reads for nmdc:omprc-11-qrsway30", + "file_size_bytes": 2195848744, + "md5_checksum": "cec95659bb04ae095f51821ddaa9fa59", + "data_object_type": "Filtered Sequencing Reads", + "url": "https://data.microbiomedata.org/data/nmdc:omprc-11-qrsway30/nmdc:wfrqc-11-9z2pkg86.1/nmdc_wfrqc-11-9z2pkg86.1_filtered.fastq.gz", + "type": "nmdc:DataObject" + }, + { + "id": "nmdc:dobj-11-f21fgz27", + "name": "nmdc_wfrqc-11-9z2pkg86.1_filterStats.txt", + "description": "Filtered Stats for nmdc:omprc-11-qrsway30", + "file_size_bytes": 284, + "md5_checksum": "7b4f365bbe942a523890abf13d1b6436", + "data_object_type": "QC Statistics", + "url": "https://data.microbiomedata.org/data/nmdc:omprc-11-qrsway30/nmdc:wfrqc-11-9z2pkg86.1/nmdc_wfrqc-11-9z2pkg86.1_filterStats.txt", + "type": "nmdc:DataObject" + }, + { + "id": "nmdc:dobj-11-fz30g584", + "name": "nmdc_wfmgas-11-9frn1687.1_contigs.fna", + "description": "Assembled contigs fasta for nmdc:omprc-11-qrsway30", + "file_size_bytes": 79955448, + "md5_checksum": "6bfa068f1a9d661e232ec2f5df989247", + "data_object_type": "Assembly Contigs", + "url": "https://data.microbiomedata.org/data/nmdc:omprc-11-qrsway30/nmdc:wfmgas-11-9frn1687.1/nmdc_wfmgas-11-9frn1687.1_contigs.fna", + "type": "nmdc:DataObject" + }, + { + "id": "nmdc:dobj-11-hgnt5k80", + "name": "nmdc_wfmgas-11-9frn1687.1_scaffolds.fna", + "description": "Assembled scaffold fasta for nmdc:omprc-11-qrsway30", + "file_size_bytes": 79445257, + "md5_checksum": "95c0276e82c5ba591d0a42c19eb2c935", + "data_object_type": "Assembly Scaffolds", + "url": "https://data.microbiomedata.org/data/nmdc:omprc-11-qrsway30/nmdc:wfmgas-11-9frn1687.1/nmdc_wfmgas-11-9frn1687.1_scaffolds.fna", + "type": "nmdc:DataObject" + }, + { + "id": "nmdc:dobj-11-z2vsrg57", + "name": "nmdc_wfmgas-11-9frn1687.1_covstats.txt", + "description": "Metagenome Contig Coverage Stats for nmdc:omprc-11-qrsway30", + "file_size_bytes": 14401352, + "md5_checksum": "d964370faf79eecc541529827ee32d86", + "data_object_type": "Assembly Coverage Stats", + "url": "https://data.microbiomedata.org/data/nmdc:omprc-11-qrsway30/nmdc:wfmgas-11-9frn1687.1/nmdc_wfmgas-11-9frn1687.1_covstats.txt", + "type": "nmdc:DataObject" + }, + { + "id": "nmdc:dobj-11-9d6a3m20", + "name": "nmdc_wfmgas-11-9frn1687.1_assembly.agp", + "description": "Assembled AGP file for nmdc:omprc-11-qrsway30", + "file_size_bytes": 14542912, + "md5_checksum": "18b0b976c325ecde83d5cc082a4debb6", + "data_object_type": "Assembly AGP", + "url": "https://data.microbiomedata.org/data/nmdc:omprc-11-qrsway30/nmdc:wfmgas-11-9frn1687.1/nmdc_wfmgas-11-9frn1687.1_assembly.agp", + "type": "nmdc:DataObject" + }, + { + "id": "nmdc:dobj-11-t20v4989", + "name": "nmdc_wfmgas-11-9frn1687.1_pairedMapped_sorted.bam", + "description": "Metagenome Alignment BAM file for nmdc:omprc-11-qrsway30", + "file_size_bytes": 0, + "md5_checksum": "d41d8cd98f00b204e9800998ecf8427e", + "data_object_type": "Assembly Coverage BAM", + "url": "https://data.microbiomedata.org/data/nmdc:omprc-11-qrsway30/nmdc:wfmgas-11-9frn1687.1/nmdc_wfmgas-11-9frn1687.1_pairedMapped_sorted.bam", + "type": "nmdc:DataObject" + }, + { + "id": "nmdc:dobj-11-benqhc17", + "name": "nmdc_wfrbt-11-cynv2854.1_gottcha2_report.tsv", + "description": "Gottcha2 TSV report for nmdc:omprc-11-qrsway30", + "file_size_bytes": 2418, + "md5_checksum": "ccbe419157d8286626330fd0eb0dd0e0", + "data_object_type": "GOTTCHA2 Classification Report", + "url": "https://data.microbiomedata.org/data/nmdc:omprc-11-qrsway30/nmdc:wfrbt-11-cynv2854.1/nmdc_wfrbt-11-cynv2854.1_gottcha2_report.tsv", + "type": "nmdc:DataObject" + }, + { + "id": "nmdc:dobj-11-12bbfj05", + "name": "nmdc_wfrbt-11-cynv2854.1_gottcha2_report_full.tsv", + "description": "Gottcha2 full TSV report for nmdc:omprc-11-qrsway30", + "file_size_bytes": 759212, + "md5_checksum": "92ab65cdaca3367552e03d895123e04f", + "data_object_type": "GOTTCHA2 Report Full", + "url": "https://data.microbiomedata.org/data/nmdc:omprc-11-qrsway30/nmdc:wfrbt-11-cynv2854.1/nmdc_wfrbt-11-cynv2854.1_gottcha2_report_full.tsv", + "type": "nmdc:DataObject" + }, + { + "id": "nmdc:dobj-11-4drz1t62", + "name": "nmdc_wfrbt-11-cynv2854.1_gottcha2_krona.html", + "description": "Gottcha2 Krona HTML report for nmdc:omprc-11-qrsway30", + "file_size_bytes": 231563, + "md5_checksum": "0b3ff6503723d6ea9b84552f68ed4270", + "data_object_type": "GOTTCHA2 Krona Plot", + "url": "https://data.microbiomedata.org/data/nmdc:omprc-11-qrsway30/nmdc:wfrbt-11-cynv2854.1/nmdc_wfrbt-11-cynv2854.1_gottcha2_krona.html", + "type": "nmdc:DataObject" + }, + { + "id": "nmdc:dobj-11-ta5vzc39", + "name": "nmdc_wfrbt-11-cynv2854.1_centrifuge_classification.tsv", + "description": "Centrifuge classification TSV report for nmdc:omprc-11-qrsway30", + "file_size_bytes": 1950007455, + "md5_checksum": "8e5ad12b7fa8873463088d7bf361f7c5", + "data_object_type": "Centrifuge Taxonomic Classification", + "url": "https://data.microbiomedata.org/data/nmdc:omprc-11-qrsway30/nmdc:wfrbt-11-cynv2854.1/nmdc_wfrbt-11-cynv2854.1_centrifuge_classification.tsv", + "type": "nmdc:DataObject" + }, + { + "id": "nmdc:dobj-11-h41ewj22", + "name": "nmdc_wfrbt-11-cynv2854.1_centrifuge_report.tsv", + "description": "Centrifuge TSV report for nmdc:omprc-11-qrsway30", + "file_size_bytes": 255724, + "md5_checksum": "a3255df52cd6150f03bbf7cbd655ec76", + "data_object_type": "Centrifuge Classification Report", + "url": "https://data.microbiomedata.org/data/nmdc:omprc-11-qrsway30/nmdc:wfrbt-11-cynv2854.1/nmdc_wfrbt-11-cynv2854.1_centrifuge_report.tsv", + "type": "nmdc:DataObject" + }, + { + "id": "nmdc:dobj-11-gzdnsd26", + "name": "nmdc_wfrbt-11-cynv2854.1_centrifuge_krona.html", + "description": "Centrifuge Krona HTML report for nmdc:omprc-11-qrsway30", + "file_size_bytes": 2337553, + "md5_checksum": "a25a5d7e399624e5e5735b65a9dd322a", + "data_object_type": "Centrifuge Krona Plot", + "url": "https://data.microbiomedata.org/data/nmdc:omprc-11-qrsway30/nmdc:wfrbt-11-cynv2854.1/nmdc_wfrbt-11-cynv2854.1_centrifuge_krona.html", + "type": "nmdc:DataObject" + }, + { + "id": "nmdc:dobj-11-rhdgkn39", + "name": "nmdc_wfrbt-11-cynv2854.1_kraken2_classification.tsv", + "description": "Kraken classification TSV report for nmdc:omprc-11-qrsway30", + "file_size_bytes": 1555636513, + "md5_checksum": "dd953aebfd5cf624a5ffa8c6d6b64b08", + "data_object_type": "Kraken2 Taxonomic Classification", + "url": "https://data.microbiomedata.org/data/nmdc:omprc-11-qrsway30/nmdc:wfrbt-11-cynv2854.1/nmdc_wfrbt-11-cynv2854.1_kraken2_classification.tsv", + "type": "nmdc:DataObject" + }, + { + "id": "nmdc:dobj-11-mhxeh779", + "name": "nmdc_wfrbt-11-cynv2854.1_kraken2_report.tsv", + "description": "Kraken2 TSV report for nmdc:omprc-11-qrsway30", + "file_size_bytes": 647090, + "md5_checksum": "96f47f6cd2350fb1c7c7b746d2e9d811", + "data_object_type": "Kraken2 Classification Report", + "url": "https://data.microbiomedata.org/data/nmdc:omprc-11-qrsway30/nmdc:wfrbt-11-cynv2854.1/nmdc_wfrbt-11-cynv2854.1_kraken2_report.tsv", + "type": "nmdc:DataObject" + }, + { + "id": "nmdc:dobj-11-zj55cb78", + "name": "nmdc_wfrbt-11-cynv2854.1_kraken2_krona.html", + "description": "Kraken2 Krona HTML report for nmdc:omprc-11-qrsway30", + "file_size_bytes": 3939982, + "md5_checksum": "ae369194e4b24e137fc23da0412277a6", + "data_object_type": "Kraken2 Krona Plot", + "url": "https://data.microbiomedata.org/data/nmdc:omprc-11-qrsway30/nmdc:wfrbt-11-cynv2854.1/nmdc_wfrbt-11-cynv2854.1_kraken2_krona.html", + "type": "nmdc:DataObject" + } + ], + "metagenome_assembly_set": [ + { + "id": "nmdc:wfmgas-11-9frn1687.1", + "name": "Metagenome Assembly Activity for nmdc:omprc-11-qrsway30", + "started_at_time": "2021-10-11T02:23:35Z", + "ended_at_time": "2021-10-11T03:58:56+00:00", + "was_informed_by": "nmdc:omprc-11-qrsway30", + "execution_resource": "NERSC-Cori", + "git_url": "https://github.com/microbiomedata/metaAssembly", + "has_input": [ + "nmdc:dobj-11-q3wm2a75" + ], + "has_output": [ + "nmdc:dobj-11-fz30g584", + "nmdc:dobj-11-hgnt5k80", + "nmdc:dobj-11-z2vsrg57", + "nmdc:dobj-11-9d6a3m20", + "nmdc:dobj-11-t20v4989" + ], + "type": "nmdc:MetagenomeAssembly", + "part_of": [ + "nmdc:omprc-11-qrsway30" + ], + "version": "v1.0.2", + "asm_score": 2.914, + "scaffolds": 169419, + "scaf_logsum": 99077, + "scaf_powsum": 10508, + "scaf_max": 9079, + "scaf_bp": 72512268, + "scaf_n50": 54616, + "scaf_n90": 144376, + "scaf_l50": 399, + "scaf_l90": 286, + "contigs": 169495, + "contig_bp": 72511508, + "ctg_n50": 54638, + "ctg_l50": 399, + "ctg_n90": 144448, + "ctg_l90": 286, + "ctg_logsum": 98556, + "ctg_powsum": 10453, + "ctg_max": 9079, + "gap_pct": 0.00105, + "gc_std": 0.09653, + "gc_avg": 0.62989 + } + ], + "omics_processing_set": [ + { + "id": "nmdc:omprc-11-qrsway30", + "name": "Riverbed sediment microbial communities from areas with vegetation nearby in Columbia River, Washington, USA - GW-RW S2_30_40", + "description": "Riverbed sediment samples were collected from an area with a lot of surrounding vegetation in a hyporheic zone (subsurface groundwater - surface water mixing zone)", + "has_input": [ + "nmdc:bsm-11-mmr87q87" + ], + "add_date": "2016-01-11", + "gold_sequencing_project_identifiers": [ + "gold:Gp0127656" + ], + "has_output": [ + "nmdc:dobj-11-zqgnfj09" + ], + "mod_date": "2021-06-18", + "ncbi_project_name": "Riverbed sediment microbial communities from areas with vegetation nearby in Columbia River, Washington, USA - GW-RW S2_30_40", + "omics_type": { + "has_raw_value": "Metagenome" + }, + "part_of": [ + "nmdc:sty-11-aygzgv51" + ], + "principal_investigator": { + "has_raw_value": "James Stegen" + }, + "processing_institution": "JGI", + "type": "nmdc:OmicsProcessing" + } + ], + "read_qc_analysis_activity_set": [ + { + "id": "nmdc:wfrqc-11-9z2pkg86.1", + "name": "Read QC Activity for nmdc:omprc-11-qrsway30", + "started_at_time": "2021-10-11T02:23:35Z", + "ended_at_time": "2021-10-11T03:58:56+00:00", + "was_informed_by": "nmdc:omprc-11-qrsway30", + "execution_resource": "NERSC-Cori", + "git_url": "https://github.com/microbiomedata/ReadsQC", + "has_input": [ + "nmdc:dobj-11-zqgnfj09" + ], + "has_output": [ + "nmdc:dobj-11-q3wm2a75", + "nmdc:dobj-11-f21fgz27" + ], + "type": "nmdc:ReadQcAnalysisActivity", + "part_of": [ + "nmdc:omprc-11-qrsway30" + ], + "version": "v1.0.2", + "input_read_count": 27317020, + "output_read_count": 26481746, + "input_read_bases": 4124870020, + "output_read_bases": 3960490395 + } + ], + "read_based_taxonomy_analysis_activity_set": [ + { + "id": "nmdc:wfrbt-11-cynv2854.1", + "name": "Readbased Taxonomy Analysis Activity for nmdc:omprc-11-qrsway30", + "started_at_time": "2021-10-11T02:23:35Z", + "ended_at_time": "2021-10-11T03:58:56+00:00", + "was_informed_by": "nmdc:omprc-11-qrsway30", + "execution_resource": "NERSC-Cori", + "git_url": "https://github.com/microbiomedata/ReadbasedAnalysis", + "has_input": [ + "nmdc:dobj-11-q3wm2a75" + ], + "has_output": [ + "nmdc:dobj-11-benqhc17", + "nmdc:dobj-11-12bbfj05", + "nmdc:dobj-11-4drz1t62", + "nmdc:dobj-11-ta5vzc39", + "nmdc:dobj-11-h41ewj22", + "nmdc:dobj-11-gzdnsd26", + "nmdc:dobj-11-rhdgkn39", + "nmdc:dobj-11-mhxeh779", + "nmdc:dobj-11-zj55cb78" + ], + "type": "nmdc:ReadBasedTaxonomyAnalysisActivity", + "part_of": [ + "nmdc:omprc-11-qrsway30" + ], + "version": "v1.0.2" + } + ] + }, + { + "data_object_set": [ + { + "id": "nmdc:dobj-11-hkern724", + "name": "10533.2.165322.GTGAGCT-AAGCTCA.fastq.gz", + "description": "Raw sequencer read data", + "file_size_bytes": 2103957707, + "data_object_type": "Metagenome Raw Reads", + "type": "nmdc:DataObject" + }, + { + "id": "nmdc:dobj-11-ct1wrn31", + "name": "nmdc_wfrqc-11-d8yhrn23.1_filtered.fastq.gz", + "description": "Filtered Reads for nmdc:omprc-11-nry91b19", + "file_size_bytes": 1856919615, + "md5_checksum": "2791a196017767af3b5b21a3029799c0", + "data_object_type": "Filtered Sequencing Reads", + "url": "https://data.microbiomedata.org/data/nmdc:omprc-11-nry91b19/nmdc:wfrqc-11-d8yhrn23.1/nmdc_wfrqc-11-d8yhrn23.1_filtered.fastq.gz", + "type": "nmdc:DataObject" + }, + { + "id": "nmdc:dobj-11-wt8c7q67", + "name": "nmdc_wfrqc-11-d8yhrn23.1_filterStats.txt", + "description": "Filtered Stats for nmdc:omprc-11-nry91b19", + "file_size_bytes": 283, + "md5_checksum": "92cb49efbff5d5977e00dbad1c4d0d9f", + "data_object_type": "QC Statistics", + "url": "https://data.microbiomedata.org/data/nmdc:omprc-11-nry91b19/nmdc:wfrqc-11-d8yhrn23.1/nmdc_wfrqc-11-d8yhrn23.1_filterStats.txt", + "type": "nmdc:DataObject" + }, + { + "id": "nmdc:dobj-11-demhf496", + "name": "nmdc_wfmgas-11-zdhfwc82.1_contigs.fna", + "description": "Assembled contigs fasta for nmdc:omprc-11-nry91b19", + "file_size_bytes": 97040152, + "md5_checksum": "9314dac9240604dbb7285bf97081173c", + "data_object_type": "Assembly Contigs", + "url": "https://data.microbiomedata.org/data/nmdc:omprc-11-nry91b19/nmdc:wfmgas-11-zdhfwc82.1/nmdc_wfmgas-11-zdhfwc82.1_contigs.fna", + "type": "nmdc:DataObject" + }, + { + "id": "nmdc:dobj-11-r36r3895", + "name": "nmdc_wfmgas-11-zdhfwc82.1_scaffolds.fna", + "description": "Assembled scaffold fasta for nmdc:omprc-11-nry91b19", + "file_size_bytes": 96496564, + "md5_checksum": "179b4bf74a83de503faac445e808a23a", + "data_object_type": "Assembly Scaffolds", + "url": "https://data.microbiomedata.org/data/nmdc:omprc-11-nry91b19/nmdc:wfmgas-11-zdhfwc82.1/nmdc_wfmgas-11-zdhfwc82.1_scaffolds.fna", + "type": "nmdc:DataObject" + }, + { + "id": "nmdc:dobj-11-3cnark60", + "name": "nmdc_wfmgas-11-zdhfwc82.1_covstats.txt", + "description": "Metagenome Contig Coverage Stats for nmdc:omprc-11-nry91b19", + "file_size_bytes": 15372010, + "md5_checksum": "98dd31846cf6c856e7f6b532a2ca8c25", + "data_object_type": "Assembly Coverage Stats", + "url": "https://data.microbiomedata.org/data/nmdc:omprc-11-nry91b19/nmdc:wfmgas-11-zdhfwc82.1/nmdc_wfmgas-11-zdhfwc82.1_covstats.txt", + "type": "nmdc:DataObject" + }, + { + "id": "nmdc:dobj-11-0skqf746", + "name": "nmdc_wfmgas-11-zdhfwc82.1_assembly.agp", + "description": "Assembled AGP file for nmdc:omprc-11-nry91b19", + "file_size_bytes": 15510419, + "md5_checksum": "12f0641dcc109eebce41a65b5be5ac03", + "data_object_type": "Assembly AGP", + "url": "https://data.microbiomedata.org/data/nmdc:omprc-11-nry91b19/nmdc:wfmgas-11-zdhfwc82.1/nmdc_wfmgas-11-zdhfwc82.1_assembly.agp", + "type": "nmdc:DataObject" + }, + { + "id": "nmdc:dobj-11-4713r752", + "name": "nmdc_wfmgas-11-zdhfwc82.1_pairedMapped_sorted.bam", + "description": "Metagenome Alignment BAM file for nmdc:omprc-11-nry91b19", + "file_size_bytes": 0, + "md5_checksum": "d41d8cd98f00b204e9800998ecf8427e", + "data_object_type": "Assembly Coverage BAM", + "url": "https://data.microbiomedata.org/data/nmdc:omprc-11-nry91b19/nmdc:wfmgas-11-zdhfwc82.1/nmdc_wfmgas-11-zdhfwc82.1_pairedMapped_sorted.bam", + "type": "nmdc:DataObject" + }, + { + "id": "nmdc:dobj-11-ks12xn40", + "name": "nmdc_wfrbt-11-aer0h346.1_gottcha2_report.tsv", + "description": "Gottcha2 TSV report for nmdc:omprc-11-nry91b19", + "file_size_bytes": 1199, + "md5_checksum": "53ee263960c39126e039656a121deb96", + "data_object_type": "GOTTCHA2 Classification Report", + "url": "https://data.microbiomedata.org/data/nmdc:omprc-11-nry91b19/nmdc:wfrbt-11-aer0h346.1/nmdc_wfrbt-11-aer0h346.1_gottcha2_report.tsv", + "type": "nmdc:DataObject" + }, + { + "id": "nmdc:dobj-11-wdbrt838", + "name": "nmdc_wfrbt-11-aer0h346.1_gottcha2_report_full.tsv", + "description": "Gottcha2 full TSV report for nmdc:omprc-11-nry91b19", + "file_size_bytes": 703299, + "md5_checksum": "2781b9269b8e24f49a1a301d44d0e3d5", + "data_object_type": "GOTTCHA2 Report Full", + "url": "https://data.microbiomedata.org/data/nmdc:omprc-11-nry91b19/nmdc:wfrbt-11-aer0h346.1/nmdc_wfrbt-11-aer0h346.1_gottcha2_report_full.tsv", + "type": "nmdc:DataObject" + }, + { + "id": "nmdc:dobj-11-empreg95", + "name": "nmdc_wfrbt-11-aer0h346.1_gottcha2_krona.html", + "description": "Gottcha2 Krona HTML report for nmdc:omprc-11-nry91b19", + "file_size_bytes": 229311, + "md5_checksum": "0ed808b8ce29d39c3b555e7d5bf4c274", + "data_object_type": "GOTTCHA2 Krona Plot", + "url": "https://data.microbiomedata.org/data/nmdc:omprc-11-nry91b19/nmdc:wfrbt-11-aer0h346.1/nmdc_wfrbt-11-aer0h346.1_gottcha2_krona.html", + "type": "nmdc:DataObject" + }, + { + "id": "nmdc:dobj-11-xz0bje42", + "name": "nmdc_wfrbt-11-aer0h346.1_centrifuge_classification.tsv", + "description": "Centrifuge classification TSV report for nmdc:omprc-11-nry91b19", + "file_size_bytes": 1642196063, + "md5_checksum": "a7d8f038b87bd28843e30c5dd115704b", + "data_object_type": "Centrifuge Taxonomic Classification", + "url": "https://data.microbiomedata.org/data/nmdc:omprc-11-nry91b19/nmdc:wfrbt-11-aer0h346.1/nmdc_wfrbt-11-aer0h346.1_centrifuge_classification.tsv", + "type": "nmdc:DataObject" + }, + { + "id": "nmdc:dobj-11-xj9m3980", + "name": "nmdc_wfrbt-11-aer0h346.1_centrifuge_report.tsv", + "description": "Centrifuge TSV report for nmdc:omprc-11-nry91b19", + "file_size_bytes": 254418, + "md5_checksum": "b4cbc81c986c67c1037c8b7280924683", + "data_object_type": "Centrifuge Classification Report", + "url": "https://data.microbiomedata.org/data/nmdc:omprc-11-nry91b19/nmdc:wfrbt-11-aer0h346.1/nmdc_wfrbt-11-aer0h346.1_centrifuge_report.tsv", + "type": "nmdc:DataObject" + }, + { + "id": "nmdc:dobj-11-vaaa4v87", + "name": "nmdc_wfrbt-11-aer0h346.1_centrifuge_krona.html", + "description": "Centrifuge Krona HTML report for nmdc:omprc-11-nry91b19", + "file_size_bytes": 2333132, + "md5_checksum": "e0c61a191258597984a05d86eaf4d71f", + "data_object_type": "Centrifuge Krona Plot", + "url": "https://data.microbiomedata.org/data/nmdc:omprc-11-nry91b19/nmdc:wfrbt-11-aer0h346.1/nmdc_wfrbt-11-aer0h346.1_centrifuge_krona.html", + "type": "nmdc:DataObject" + }, + { + "id": "nmdc:dobj-11-q8j6f398", + "name": "nmdc_wfrbt-11-aer0h346.1_kraken2_classification.tsv", + "description": "Kraken classification TSV report for nmdc:omprc-11-nry91b19", + "file_size_bytes": 1309125719, + "md5_checksum": "e1cbcfa86444a4ff4e992bcb6653d18f", + "data_object_type": "Kraken2 Taxonomic Classification", + "url": "https://data.microbiomedata.org/data/nmdc:omprc-11-nry91b19/nmdc:wfrbt-11-aer0h346.1/nmdc_wfrbt-11-aer0h346.1_kraken2_classification.tsv", + "type": "nmdc:DataObject" + }, + { + "id": "nmdc:dobj-11-paj2dc93", + "name": "nmdc_wfrbt-11-aer0h346.1_kraken2_report.tsv", + "description": "Kraken2 TSV report for nmdc:omprc-11-nry91b19", + "file_size_bytes": 639737, + "md5_checksum": "d2e10038a40e81e81ba94f75ed1ec52c", + "data_object_type": "Kraken2 Classification Report", + "url": "https://data.microbiomedata.org/data/nmdc:omprc-11-nry91b19/nmdc:wfrbt-11-aer0h346.1/nmdc_wfrbt-11-aer0h346.1_kraken2_report.tsv", + "type": "nmdc:DataObject" + }, + { + "id": "nmdc:dobj-11-023xz213", + "name": "nmdc_wfrbt-11-aer0h346.1_kraken2_krona.html", + "description": "Kraken2 Krona HTML report for nmdc:omprc-11-nry91b19", + "file_size_bytes": 3988966, + "md5_checksum": "ddba84cd45462d3a55df4ac62bb4eeb8", + "data_object_type": "Kraken2 Krona Plot", + "url": "https://data.microbiomedata.org/data/nmdc:omprc-11-nry91b19/nmdc:wfrbt-11-aer0h346.1/nmdc_wfrbt-11-aer0h346.1_kraken2_krona.html", + "type": "nmdc:DataObject" + } + ], + "metagenome_assembly_set": [ + { + "id": "nmdc:wfmgas-11-zdhfwc82.1", + "name": "Metagenome Assembly Activity for nmdc:omprc-11-nry91b19", + "started_at_time": "2021-10-11T02:27:15Z", + "ended_at_time": "2021-10-11T03:57:48+00:00", + "was_informed_by": "nmdc:omprc-11-nry91b19", + "execution_resource": "NERSC-Cori", + "git_url": "https://github.com/microbiomedata/metaAssembly", + "has_input": [ + "nmdc:dobj-11-ct1wrn31" + ], + "has_output": [ + "nmdc:dobj-11-demhf496", + "nmdc:dobj-11-r36r3895", + "nmdc:dobj-11-3cnark60", + "nmdc:dobj-11-0skqf746", + "nmdc:dobj-11-4713r752" + ], + "type": "nmdc:MetagenomeAssembly", + "part_of": [ + "nmdc:omprc-11-nry91b19" + ], + "version": "v1.0.2", + "asm_score": 4.164, + "scaffolds": 180310, + "scaf_logsum": 193641, + "scaf_powsum": 20844, + "scaf_max": 29106, + "scaf_bp": 88912475, + "scaf_n50": 51225, + "scaf_n90": 148971, + "scaf_l50": 493, + "scaf_l90": 292, + "contigs": 180439, + "contig_bp": 88911005, + "ctg_n50": 51430, + "ctg_l50": 492, + "ctg_n90": 149085, + "ctg_l90": 292, + "ctg_logsum": 192880, + "ctg_powsum": 20759, + "ctg_max": 29106, + "gap_pct": 0.00165, + "gc_std": 0.109, + "gc_avg": 0.62144 + } + ], + "omics_processing_set": [ + { + "id": "nmdc:omprc-11-nry91b19", + "name": "Riverbed sediment microbial communities from areas with no vegetation in Columbia River, Washington, USA - GW-RW N3_20_30", + "description": "Riverbed sediment samples were collected from an area with no vegetation in a hyporheic zone (subsurface groundwater - surface water mixing zone)", + "has_input": [ + "nmdc:bsm-11-jqzk1523" + ], + "add_date": "2016-01-11", + "gold_sequencing_project_identifiers": [ + "gold:Gp0127651" + ], + "has_output": [ + "nmdc:dobj-11-hkern724" + ], + "mod_date": "2021-06-15", + "ncbi_project_name": "Riverbed sediment microbial communities from areas with no vegetation in Columbia River, Washington, USA - GW-RW N3_20_30", + "omics_type": { + "has_raw_value": "Metagenome" + }, + "part_of": [ + "nmdc:sty-11-aygzgv51" + ], + "principal_investigator": { + "has_raw_value": "James Stegen" + }, + "processing_institution": "JGI", + "type": "nmdc:OmicsProcessing" + } + ], + "read_qc_analysis_activity_set": [ + { + "id": "nmdc:wfrqc-11-d8yhrn23.1", + "name": "Read QC Activity for nmdc:omprc-11-nry91b19", + "started_at_time": "2021-10-11T02:27:15Z", + "ended_at_time": "2021-10-11T03:57:48+00:00", + "was_informed_by": "nmdc:omprc-11-nry91b19", + "execution_resource": "NERSC-Cori", + "git_url": "https://github.com/microbiomedata/ReadsQC", + "has_input": [ + "nmdc:dobj-11-hkern724" + ], + "has_output": [ + "nmdc:dobj-11-ct1wrn31", + "nmdc:dobj-11-wt8c7q67" + ], + "type": "nmdc:ReadQcAnalysisActivity", + "part_of": [ + "nmdc:omprc-11-nry91b19" + ], + "version": "v1.0.2", + "input_read_count": 23728904, + "output_read_count": 22416634, + "input_read_bases": 3583064504, + "output_read_bases": 3352071049 + } + ], + "read_based_taxonomy_analysis_activity_set": [ + { + "id": "nmdc:wfrbt-11-aer0h346.1", + "name": "Readbased Taxonomy Analysis Activity for nmdc:omprc-11-nry91b19", + "started_at_time": "2021-10-11T02:27:15Z", + "ended_at_time": "2021-10-11T03:57:48+00:00", + "was_informed_by": "nmdc:omprc-11-nry91b19", + "execution_resource": "NERSC-Cori", + "git_url": "https://github.com/microbiomedata/ReadbasedAnalysis", + "has_input": [ + "nmdc:dobj-11-ct1wrn31" + ], + "has_output": [ + "nmdc:dobj-11-ks12xn40", + "nmdc:dobj-11-wdbrt838", + "nmdc:dobj-11-empreg95", + "nmdc:dobj-11-xz0bje42", + "nmdc:dobj-11-xj9m3980", + "nmdc:dobj-11-vaaa4v87", + "nmdc:dobj-11-q8j6f398", + "nmdc:dobj-11-paj2dc93", + "nmdc:dobj-11-023xz213" + ], + "type": "nmdc:ReadBasedTaxonomyAnalysisActivity", + "part_of": [ + "nmdc:omprc-11-nry91b19" + ], + "version": "v1.0.2" + } + ] + }, + { + "data_object_set": [ + { + "id": "nmdc:dobj-11-9wykaf95", + "name": "10533.2.165322.GTCTCCT-AAGGAGA.fastq.gz", + "description": "Raw sequencer read data", + "file_size_bytes": 2116898122, + "data_object_type": "Metagenome Raw Reads", + "type": "nmdc:DataObject" + }, + { + "id": "nmdc:dobj-11-5y2r1q08", + "name": "nmdc_wfrqc-11-07sbpc11.1_filtered.fastq.gz", + "description": "Filtered Reads for nmdc:omprc-11-0n8y1d07", + "file_size_bytes": 1880069213, + "md5_checksum": "04b9014981f7035c39bd7f870613ed93", + "data_object_type": "Filtered Sequencing Reads", + "url": "https://data.microbiomedata.org/data/nmdc:omprc-11-0n8y1d07/nmdc:wfrqc-11-07sbpc11.1/nmdc_wfrqc-11-07sbpc11.1_filtered.fastq.gz", + "type": "nmdc:DataObject" + }, + { + "id": "nmdc:dobj-11-rmj8h545", + "name": "nmdc_wfrqc-11-07sbpc11.1_filterStats.txt", + "description": "Filtered Stats for nmdc:omprc-11-0n8y1d07", + "file_size_bytes": 286, + "md5_checksum": "b66266969ab3df4c1cb2b16c1fa7d098", + "data_object_type": "QC Statistics", + "url": "https://data.microbiomedata.org/data/nmdc:omprc-11-0n8y1d07/nmdc:wfrqc-11-07sbpc11.1/nmdc_wfrqc-11-07sbpc11.1_filterStats.txt", + "type": "nmdc:DataObject" + }, + { + "id": "nmdc:dobj-11-ac2g5s07", + "name": "nmdc_wfmgas-11-26yar996.1_contigs.fna", + "description": "Assembled contigs fasta for nmdc:omprc-11-0n8y1d07", + "file_size_bytes": 94543096, + "md5_checksum": "256042019c134613a715e6dec66b9974", + "data_object_type": "Assembly Contigs", + "url": "https://data.microbiomedata.org/data/nmdc:omprc-11-0n8y1d07/nmdc:wfmgas-11-26yar996.1/nmdc_wfmgas-11-26yar996.1_contigs.fna", + "type": "nmdc:DataObject" + }, + { + "id": "nmdc:dobj-11-jw9zxt64", + "name": "nmdc_wfmgas-11-26yar996.1_scaffolds.fna", + "description": "Assembled scaffold fasta for nmdc:omprc-11-0n8y1d07", + "file_size_bytes": 93992610, + "md5_checksum": "fb74cf9f23d49c8d2ae9bc85cf284b4b", + "data_object_type": "Assembly Scaffolds", + "url": "https://data.microbiomedata.org/data/nmdc:omprc-11-0n8y1d07/nmdc:wfmgas-11-26yar996.1/nmdc_wfmgas-11-26yar996.1_scaffolds.fna", + "type": "nmdc:DataObject" + }, + { + "id": "nmdc:dobj-11-q8kxes45", + "name": "nmdc_wfmgas-11-26yar996.1_covstats.txt", + "description": "Metagenome Contig Coverage Stats for nmdc:omprc-11-0n8y1d07", + "file_size_bytes": 15571972, + "md5_checksum": "94ebb48b227eb863fb8cc1f6002ba50c", + "data_object_type": "Assembly Coverage Stats", + "url": "https://data.microbiomedata.org/data/nmdc:omprc-11-0n8y1d07/nmdc:wfmgas-11-26yar996.1/nmdc_wfmgas-11-26yar996.1_covstats.txt", + "type": "nmdc:DataObject" + }, + { + "id": "nmdc:dobj-11-8n8jvw40", + "name": "nmdc_wfmgas-11-26yar996.1_assembly.agp", + "description": "Assembled AGP file for nmdc:omprc-11-0n8y1d07", + "file_size_bytes": 15719536, + "md5_checksum": "a5d821dadb25d08c7e296bf3aded2274", + "data_object_type": "Assembly AGP", + "url": "https://data.microbiomedata.org/data/nmdc:omprc-11-0n8y1d07/nmdc:wfmgas-11-26yar996.1/nmdc_wfmgas-11-26yar996.1_assembly.agp", + "type": "nmdc:DataObject" + }, + { + "id": "nmdc:dobj-11-4ftjpd69", + "name": "nmdc_wfmgas-11-26yar996.1_pairedMapped_sorted.bam", + "description": "Metagenome Alignment BAM file for nmdc:omprc-11-0n8y1d07", + "file_size_bytes": 0, + "md5_checksum": "d41d8cd98f00b204e9800998ecf8427e", + "data_object_type": "Assembly Coverage BAM", + "url": "https://data.microbiomedata.org/data/nmdc:omprc-11-0n8y1d07/nmdc:wfmgas-11-26yar996.1/nmdc_wfmgas-11-26yar996.1_pairedMapped_sorted.bam", + "type": "nmdc:DataObject" + }, + { + "id": "nmdc:dobj-11-b8ngh713", + "name": "nmdc_wfrbt-11-z3n95784.1_gottcha2_report.tsv", + "description": "Gottcha2 TSV report for nmdc:omprc-11-0n8y1d07", + "file_size_bytes": 2178, + "md5_checksum": "46371c7bc8259e459f975f915aaac26f", + "data_object_type": "GOTTCHA2 Classification Report", + "url": "https://data.microbiomedata.org/data/nmdc:omprc-11-0n8y1d07/nmdc:wfrbt-11-z3n95784.1/nmdc_wfrbt-11-z3n95784.1_gottcha2_report.tsv", + "type": "nmdc:DataObject" + }, + { + "id": "nmdc:dobj-11-vn3mzy43", + "name": "nmdc_wfrbt-11-z3n95784.1_gottcha2_report_full.tsv", + "description": "Gottcha2 full TSV report for nmdc:omprc-11-0n8y1d07", + "file_size_bytes": 697690, + "md5_checksum": "5dd9bc51105920f3f629e8106235af3b", + "data_object_type": "GOTTCHA2 Report Full", + "url": "https://data.microbiomedata.org/data/nmdc:omprc-11-0n8y1d07/nmdc:wfrbt-11-z3n95784.1/nmdc_wfrbt-11-z3n95784.1_gottcha2_report_full.tsv", + "type": "nmdc:DataObject" + }, + { + "id": "nmdc:dobj-11-v058ey31", + "name": "nmdc_wfrbt-11-z3n95784.1_gottcha2_krona.html", + "description": "Gottcha2 Krona HTML report for nmdc:omprc-11-0n8y1d07", + "file_size_bytes": 231103, + "md5_checksum": "1879e0e9af6d568ac9c7ffdb47fc7f12", + "data_object_type": "GOTTCHA2 Krona Plot", + "url": "https://data.microbiomedata.org/data/nmdc:omprc-11-0n8y1d07/nmdc:wfrbt-11-z3n95784.1/nmdc_wfrbt-11-z3n95784.1_gottcha2_krona.html", + "type": "nmdc:DataObject" + }, + { + "id": "nmdc:dobj-11-q3h1h761", + "name": "nmdc_wfrbt-11-z3n95784.1_centrifuge_classification.tsv", + "description": "Centrifuge classification TSV report for nmdc:omprc-11-0n8y1d07", + "file_size_bytes": 1676897166, + "md5_checksum": "e3f410adc2347396abfdec2a848000d9", + "data_object_type": "Centrifuge Taxonomic Classification", + "url": "https://data.microbiomedata.org/data/nmdc:omprc-11-0n8y1d07/nmdc:wfrbt-11-z3n95784.1/nmdc_wfrbt-11-z3n95784.1_centrifuge_classification.tsv", + "type": "nmdc:DataObject" + }, + { + "id": "nmdc:dobj-11-wa26qc82", + "name": "nmdc_wfrbt-11-z3n95784.1_centrifuge_report.tsv", + "description": "Centrifuge TSV report for nmdc:omprc-11-0n8y1d07", + "file_size_bytes": 253692, + "md5_checksum": "ed6c4f17d6ae759487164ca8ed5edf45", + "data_object_type": "Centrifuge Classification Report", + "url": "https://data.microbiomedata.org/data/nmdc:omprc-11-0n8y1d07/nmdc:wfrbt-11-z3n95784.1/nmdc_wfrbt-11-z3n95784.1_centrifuge_report.tsv", + "type": "nmdc:DataObject" + }, + { + "id": "nmdc:dobj-11-nrz8g574", + "name": "nmdc_wfrbt-11-z3n95784.1_centrifuge_krona.html", + "description": "Centrifuge Krona HTML report for nmdc:omprc-11-0n8y1d07", + "file_size_bytes": 2329422, + "md5_checksum": "6d54f73f251de1bd5c4ca8665f098ac0", + "data_object_type": "Centrifuge Krona Plot", + "url": "https://data.microbiomedata.org/data/nmdc:omprc-11-0n8y1d07/nmdc:wfrbt-11-z3n95784.1/nmdc_wfrbt-11-z3n95784.1_centrifuge_krona.html", + "type": "nmdc:DataObject" + }, + { + "id": "nmdc:dobj-11-6s2jaq18", + "name": "nmdc_wfrbt-11-z3n95784.1_kraken2_classification.tsv", + "description": "Kraken classification TSV report for nmdc:omprc-11-0n8y1d07", + "file_size_bytes": 1336793184, + "md5_checksum": "1d4f5a605d4549801fda16da567efe56", + "data_object_type": "Kraken2 Taxonomic Classification", + "url": "https://data.microbiomedata.org/data/nmdc:omprc-11-0n8y1d07/nmdc:wfrbt-11-z3n95784.1/nmdc_wfrbt-11-z3n95784.1_kraken2_classification.tsv", + "type": "nmdc:DataObject" + }, + { + "id": "nmdc:dobj-11-c3y44f77", + "name": "nmdc_wfrbt-11-z3n95784.1_kraken2_report.tsv", + "description": "Kraken2 TSV report for nmdc:omprc-11-0n8y1d07", + "file_size_bytes": 632192, + "md5_checksum": "8bb5c66575c7c953719ae9947600ad49", + "data_object_type": "Kraken2 Classification Report", + "url": "https://data.microbiomedata.org/data/nmdc:omprc-11-0n8y1d07/nmdc:wfrbt-11-z3n95784.1/nmdc_wfrbt-11-z3n95784.1_kraken2_report.tsv", + "type": "nmdc:DataObject" + }, + { + "id": "nmdc:dobj-11-yw6rax13", + "name": "nmdc_wfrbt-11-z3n95784.1_kraken2_krona.html", + "description": "Kraken2 Krona HTML report for nmdc:omprc-11-0n8y1d07", + "file_size_bytes": 3946317, + "md5_checksum": "157f7672690ba8207808cc4386ff10a4", + "data_object_type": "Kraken2 Krona Plot", + "url": "https://data.microbiomedata.org/data/nmdc:omprc-11-0n8y1d07/nmdc:wfrbt-11-z3n95784.1/nmdc_wfrbt-11-z3n95784.1_kraken2_krona.html", + "type": "nmdc:DataObject" + } + ], + "metagenome_assembly_set": [ + { + "id": "nmdc:wfmgas-11-26yar996.1", + "name": "Metagenome Assembly Activity for nmdc:omprc-11-0n8y1d07", + "started_at_time": "2021-10-11T02:23:42Z", + "ended_at_time": "2021-10-11T03:21:25+00:00", + "was_informed_by": "nmdc:omprc-11-0n8y1d07", + "execution_resource": "NERSC-Cori", + "git_url": "https://github.com/microbiomedata/metaAssembly", + "has_input": [ + "nmdc:dobj-11-5y2r1q08" + ], + "has_output": [ + "nmdc:dobj-11-ac2g5s07", + "nmdc:dobj-11-jw9zxt64", + "nmdc:dobj-11-q8kxes45", + "nmdc:dobj-11-8n8jvw40", + "nmdc:dobj-11-4ftjpd69" + ], + "type": "nmdc:MetagenomeAssembly", + "part_of": [ + "nmdc:omprc-11-0n8y1d07" + ], + "version": "v1.0.2", + "asm_score": 3.393, + "scaffolds": 182865, + "scaf_logsum": 171254, + "scaf_powsum": 18458, + "scaf_max": 16317, + "scaf_bp": 86363345, + "scaf_n50": 53484, + "scaf_n90": 154812, + "scaf_l50": 457, + "scaf_l90": 289, + "contigs": 182939, + "contig_bp": 86362605, + "ctg_n50": 53760, + "ctg_l50": 456, + "ctg_n90": 154881, + "ctg_l90": 289, + "ctg_logsum": 170806, + "ctg_powsum": 18408, + "ctg_max": 16317, + "gap_pct": 0.00086, + "gc_std": 0.09607, + "gc_avg": 0.63666 + } + ], + "omics_processing_set": [ + { + "id": "nmdc:omprc-11-0n8y1d07", + "name": "Riverbed sediment microbial communities from areas with vegetation nearby in Columbia River, Washington, USA - GW-RW S3_20_30", + "description": "Riverbed sediment samples were collected from an area with a lot of surrounding vegetation in a hyporheic zone (subsurface groundwater - surface water mixing zone)", + "has_input": [ + "nmdc:bsm-11-a7fxtx60" + ], + "add_date": "2016-01-11", + "gold_sequencing_project_identifiers": [ + "gold:Gp0127655" + ], + "has_output": [ + "nmdc:dobj-11-9wykaf95" + ], + "mod_date": "2021-06-15", + "ncbi_project_name": "Riverbed sediment microbial communities from areas with vegetation nearby in Columbia River, Washington, USA - GW-RW S3_20_30", + "omics_type": { + "has_raw_value": "Metagenome" + }, + "part_of": [ + "nmdc:sty-11-aygzgv51" + ], + "principal_investigator": { + "has_raw_value": "James Stegen" + }, + "processing_institution": "JGI", + "type": "nmdc:OmicsProcessing" + } + ], + "read_qc_analysis_activity_set": [ + { + "id": "nmdc:wfrqc-11-07sbpc11.1", + "name": "Read QC Activity for nmdc:omprc-11-0n8y1d07", + "started_at_time": "2021-10-11T02:23:42Z", + "ended_at_time": "2021-10-11T03:21:25+00:00", + "was_informed_by": "nmdc:omprc-11-0n8y1d07", + "execution_resource": "NERSC-Cori", + "git_url": "https://github.com/microbiomedata/ReadsQC", + "has_input": [ + "nmdc:dobj-11-9wykaf95" + ], + "has_output": [ + "nmdc:dobj-11-5y2r1q08", + "nmdc:dobj-11-rmj8h545" + ], + "type": "nmdc:ReadQcAnalysisActivity", + "part_of": [ + "nmdc:omprc-11-0n8y1d07" + ], + "version": "v1.0.2", + "input_read_count": 23985924, + "output_read_count": 22751496, + "input_read_bases": 3621874524, + "output_read_bases": 3400452550 + } + ], + "read_based_taxonomy_analysis_activity_set": [ + { + "id": "nmdc:wfrbt-11-z3n95784.1", + "name": "Readbased Taxonomy Analysis Activity for nmdc:omprc-11-0n8y1d07", + "started_at_time": "2021-10-11T02:23:42Z", + "ended_at_time": "2021-10-11T03:21:25+00:00", + "was_informed_by": "nmdc:omprc-11-0n8y1d07", + "execution_resource": "NERSC-Cori", + "git_url": "https://github.com/microbiomedata/ReadbasedAnalysis", + "has_input": [ + "nmdc:dobj-11-5y2r1q08" + ], + "has_output": [ + "nmdc:dobj-11-b8ngh713", + "nmdc:dobj-11-vn3mzy43", + "nmdc:dobj-11-v058ey31", + "nmdc:dobj-11-q3h1h761", + "nmdc:dobj-11-wa26qc82", + "nmdc:dobj-11-nrz8g574", + "nmdc:dobj-11-6s2jaq18", + "nmdc:dobj-11-c3y44f77", + "nmdc:dobj-11-yw6rax13" + ], + "type": "nmdc:ReadBasedTaxonomyAnalysisActivity", + "part_of": [ + "nmdc:omprc-11-0n8y1d07" + ], + "version": "v1.0.2" + } + ] + }, + { + "data_object_set": [ + { + "id": "nmdc:dobj-11-r0ygq360", + "name": "10533.2.165322.CCTTCCT-AAGGAAG.fastq.gz", + "description": "Raw sequencer read data", + "file_size_bytes": 1827996307, + "data_object_type": "Metagenome Raw Reads", + "type": "nmdc:DataObject" + }, + { + "id": "nmdc:dobj-11-b63t0y87", + "name": "nmdc_wfrqc-11-m18pd088.1_filtered.fastq.gz", + "description": "Filtered Reads for nmdc:omprc-11-p1735e67", + "file_size_bytes": 1661017378, + "md5_checksum": "8eec0e9c14abb418b906504d1675ecc5", + "data_object_type": "Filtered Sequencing Reads", + "url": "https://data.microbiomedata.org/data/nmdc:omprc-11-p1735e67/nmdc:wfrqc-11-m18pd088.1/nmdc_wfrqc-11-m18pd088.1_filtered.fastq.gz", + "type": "nmdc:DataObject" + }, + { + "id": "nmdc:dobj-11-5erdny64", + "name": "nmdc_wfrqc-11-m18pd088.1_filterStats.txt", + "description": "Filtered Stats for nmdc:omprc-11-p1735e67", + "file_size_bytes": 286, + "md5_checksum": "5d07358bbc48f25e157ffc91ea7ae3e0", + "data_object_type": "QC Statistics", + "url": "https://data.microbiomedata.org/data/nmdc:omprc-11-p1735e67/nmdc:wfrqc-11-m18pd088.1/nmdc_wfrqc-11-m18pd088.1_filterStats.txt", + "type": "nmdc:DataObject" + }, + { + "id": "nmdc:dobj-11-c86x5c83", + "name": "nmdc_wfmgas-11-n8rdwv90.1_contigs.fna", + "description": "Assembled contigs fasta for nmdc:omprc-11-p1735e67", + "file_size_bytes": 19015894, + "md5_checksum": "cccaaca66bf7c4d0fe36aa8d4297bd67", + "data_object_type": "Assembly Contigs", + "url": "https://data.microbiomedata.org/data/nmdc:omprc-11-p1735e67/nmdc:wfmgas-11-n8rdwv90.1/nmdc_wfmgas-11-n8rdwv90.1_contigs.fna", + "type": "nmdc:DataObject" + }, + { + "id": "nmdc:dobj-11-ybsg8010", + "name": "nmdc_wfmgas-11-n8rdwv90.1_scaffolds.fna", + "description": "Assembled scaffold fasta for nmdc:omprc-11-p1735e67", + "file_size_bytes": 18869172, + "md5_checksum": "614ba4fef96c6a8885d40f8dd99b27cc", + "data_object_type": "Assembly Scaffolds", + "url": "https://data.microbiomedata.org/data/nmdc:omprc-11-p1735e67/nmdc:wfmgas-11-n8rdwv90.1/nmdc_wfmgas-11-n8rdwv90.1_scaffolds.fna", + "type": "nmdc:DataObject" + }, + { + "id": "nmdc:dobj-11-q2y9nw08", + "name": "nmdc_wfmgas-11-n8rdwv90.1_covstats.txt", + "description": "Metagenome Contig Coverage Stats for nmdc:omprc-11-p1735e67", + "file_size_bytes": 4117727, + "md5_checksum": "73841dc769254f008fb28cc5853f43f6", + "data_object_type": "Assembly Coverage Stats", + "url": "https://data.microbiomedata.org/data/nmdc:omprc-11-p1735e67/nmdc:wfmgas-11-n8rdwv90.1/nmdc_wfmgas-11-n8rdwv90.1_covstats.txt", + "type": "nmdc:DataObject" + }, + { + "id": "nmdc:dobj-11-ekej0s30", + "name": "nmdc_wfmgas-11-n8rdwv90.1_assembly.agp", + "description": "Assembled AGP file for nmdc:omprc-11-p1735e67", + "file_size_bytes": 4138391, + "md5_checksum": "650908be526ecaa40c7be3026654a068", + "data_object_type": "Assembly AGP", + "url": "https://data.microbiomedata.org/data/nmdc:omprc-11-p1735e67/nmdc:wfmgas-11-n8rdwv90.1/nmdc_wfmgas-11-n8rdwv90.1_assembly.agp", + "type": "nmdc:DataObject" + }, + { + "id": "nmdc:dobj-11-nmfbbv83", + "name": "nmdc_wfmgas-11-n8rdwv90.1_pairedMapped_sorted.bam", + "description": "Metagenome Alignment BAM file for nmdc:omprc-11-p1735e67", + "file_size_bytes": 0, + "md5_checksum": "d41d8cd98f00b204e9800998ecf8427e", + "data_object_type": "Assembly Coverage BAM", + "url": "https://data.microbiomedata.org/data/nmdc:omprc-11-p1735e67/nmdc:wfmgas-11-n8rdwv90.1/nmdc_wfmgas-11-n8rdwv90.1_pairedMapped_sorted.bam", + "type": "nmdc:DataObject" + }, + { + "id": "nmdc:dobj-11-9wvf0h80", + "name": "nmdc_wfrbt-11-z1ybct38.1_gottcha2_report.tsv", + "description": "Gottcha2 TSV report for nmdc:omprc-11-p1735e67", + "file_size_bytes": 3812, + "md5_checksum": "fbbad3e21108a372e3d53c9ee8fc3cd5", + "data_object_type": "GOTTCHA2 Classification Report", + "url": "https://data.microbiomedata.org/data/nmdc:omprc-11-p1735e67/nmdc:wfrbt-11-z1ybct38.1/nmdc_wfrbt-11-z1ybct38.1_gottcha2_report.tsv", + "type": "nmdc:DataObject" + }, + { + "id": "nmdc:dobj-11-t83ze308", + "name": "nmdc_wfrbt-11-z1ybct38.1_gottcha2_report_full.tsv", + "description": "Gottcha2 full TSV report for nmdc:omprc-11-p1735e67", + "file_size_bytes": 857087, + "md5_checksum": "dbf03e26f7e1529762830161fe1f1906", + "data_object_type": "GOTTCHA2 Report Full", + "url": "https://data.microbiomedata.org/data/nmdc:omprc-11-p1735e67/nmdc:wfrbt-11-z1ybct38.1/nmdc_wfrbt-11-z1ybct38.1_gottcha2_report_full.tsv", + "type": "nmdc:DataObject" + }, + { + "id": "nmdc:dobj-11-7bf4mb91", + "name": "nmdc_wfrbt-11-z1ybct38.1_gottcha2_krona.html", + "description": "Gottcha2 Krona HTML report for nmdc:omprc-11-p1735e67", + "file_size_bytes": 235621, + "md5_checksum": "284ce1b28b8964cb525025d678277dba", + "data_object_type": "GOTTCHA2 Krona Plot", + "url": "https://data.microbiomedata.org/data/nmdc:omprc-11-p1735e67/nmdc:wfrbt-11-z1ybct38.1/nmdc_wfrbt-11-z1ybct38.1_gottcha2_krona.html", + "type": "nmdc:DataObject" + }, + { + "id": "nmdc:dobj-11-zx14g348", + "name": "nmdc_wfrbt-11-z1ybct38.1_centrifuge_classification.tsv", + "description": "Centrifuge classification TSV report for nmdc:omprc-11-p1735e67", + "file_size_bytes": 1437707313, + "md5_checksum": "a379527f61806391e42b3512146013a8", + "data_object_type": "Centrifuge Taxonomic Classification", + "url": "https://data.microbiomedata.org/data/nmdc:omprc-11-p1735e67/nmdc:wfrbt-11-z1ybct38.1/nmdc_wfrbt-11-z1ybct38.1_centrifuge_classification.tsv", + "type": "nmdc:DataObject" + }, + { + "id": "nmdc:dobj-11-z1qw6j28", + "name": "nmdc_wfrbt-11-z1ybct38.1_centrifuge_report.tsv", + "description": "Centrifuge TSV report for nmdc:omprc-11-p1735e67", + "file_size_bytes": 255105, + "md5_checksum": "3659ac6c99dea0fb1385c58eac8b1335", + "data_object_type": "Centrifuge Classification Report", + "url": "https://data.microbiomedata.org/data/nmdc:omprc-11-p1735e67/nmdc:wfrbt-11-z1ybct38.1/nmdc_wfrbt-11-z1ybct38.1_centrifuge_report.tsv", + "type": "nmdc:DataObject" + }, + { + "id": "nmdc:dobj-11-n5h02k27", + "name": "nmdc_wfrbt-11-z1ybct38.1_centrifuge_krona.html", + "description": "Centrifuge Krona HTML report for nmdc:omprc-11-p1735e67", + "file_size_bytes": 2327985, + "md5_checksum": "3219058371bf2f8081b2dd2b434ec145", + "data_object_type": "Centrifuge Krona Plot", + "url": "https://data.microbiomedata.org/data/nmdc:omprc-11-p1735e67/nmdc:wfrbt-11-z1ybct38.1/nmdc_wfrbt-11-z1ybct38.1_centrifuge_krona.html", + "type": "nmdc:DataObject" + }, + { + "id": "nmdc:dobj-11-46b3v052", + "name": "nmdc_wfrbt-11-z1ybct38.1_kraken2_classification.tsv", + "description": "Kraken classification TSV report for nmdc:omprc-11-p1735e67", + "file_size_bytes": 1164013677, + "md5_checksum": "be29ebcd7358653afec7381f9ca43431", + "data_object_type": "Kraken2 Taxonomic Classification", + "url": "https://data.microbiomedata.org/data/nmdc:omprc-11-p1735e67/nmdc:wfrbt-11-z1ybct38.1/nmdc_wfrbt-11-z1ybct38.1_kraken2_classification.tsv", + "type": "nmdc:DataObject" + }, + { + "id": "nmdc:dobj-11-f8p94069", + "name": "nmdc_wfrbt-11-z1ybct38.1_kraken2_report.tsv", + "description": "Kraken2 TSV report for nmdc:omprc-11-p1735e67", + "file_size_bytes": 638368, + "md5_checksum": "a9e6ab6db23ddce02317e3e21ea3f618", + "data_object_type": "Kraken2 Classification Report", + "url": "https://data.microbiomedata.org/data/nmdc:omprc-11-p1735e67/nmdc:wfrbt-11-z1ybct38.1/nmdc_wfrbt-11-z1ybct38.1_kraken2_report.tsv", + "type": "nmdc:DataObject" + }, + { + "id": "nmdc:dobj-11-3xwemf74", + "name": "nmdc_wfrbt-11-z1ybct38.1_kraken2_krona.html", + "description": "Kraken2 Krona HTML report for nmdc:omprc-11-p1735e67", + "file_size_bytes": 3982485, + "md5_checksum": "4c1aae1a46e51359f9146e48fff0e7f0", + "data_object_type": "Kraken2 Krona Plot", + "url": "https://data.microbiomedata.org/data/nmdc:omprc-11-p1735e67/nmdc:wfrbt-11-z1ybct38.1/nmdc_wfrbt-11-z1ybct38.1_kraken2_krona.html", + "type": "nmdc:DataObject" + } + ], + "metagenome_assembly_set": [ + { + "id": "nmdc:wfmgas-11-n8rdwv90.1", + "name": "Metagenome Assembly Activity for nmdc:omprc-11-p1735e67", + "started_at_time": "2021-10-11T02:23:35Z", + "ended_at_time": "2021-11-13T18:52:13+00:00", + "was_informed_by": "nmdc:omprc-11-p1735e67", + "execution_resource": "NERSC-Cori", + "git_url": "https://github.com/microbiomedata/metaAssembly", + "has_input": [ + "nmdc:dobj-11-b63t0y87" + ], + "has_output": [ + "nmdc:dobj-11-c86x5c83", + "nmdc:dobj-11-ybsg8010", + "nmdc:dobj-11-q2y9nw08", + "nmdc:dobj-11-ekej0s30", + "nmdc:dobj-11-nmfbbv83" + ], + "type": "nmdc:MetagenomeAssembly", + "part_of": [ + "nmdc:omprc-11-p1735e67" + ], + "version": "v1.0.2", + "asm_score": 13.921, + "scaffolds": 48925, + "scaf_logsum": 9156.336, + "scaf_powsum": 1101.795, + "scaf_max": 58655, + "scaf_bp": 16964029, + "scaf_n50": 19539, + "scaf_n90": 43028, + "scaf_l50": 309, + "scaf_l90": 281, + "scaf_n_gt50k": 1, + "scaf_l_gt50k": 58655, + "scaf_pct_gt50k": 0.34576103, + "contigs": 48932, + "contig_bp": 16963869, + "ctg_n50": 19544, + "ctg_l50": 309, + "ctg_n90": 43034, + "ctg_l90": 281, + "ctg_logsum": 9125.582, + "ctg_powsum": 1096.518, + "ctg_max": 58655, + "gap_pct": 0.00094, + "gc_std": 0.10928, + "gc_avg": 0.57867 + } + ], + "omics_processing_set": [ + { + "id": "nmdc:omprc-11-p1735e67", + "name": "Riverbed sediment microbial communities from areas with vegetation nearby in Columbia River, Washington, USA - GW-RW S2_0_10", + "description": "Riverbed sediment samples were collected from an area with a lot of surrounding vegetation in a hyporheic zone (subsurface groundwater - surface water mixing zone)", + "has_input": [ + "nmdc:bsm-11-k4wa0808" + ], + "add_date": "2016-01-11", + "gold_sequencing_project_identifiers": [ + "gold:Gp0127653" + ], + "has_output": [ + "nmdc:dobj-11-r0ygq360" + ], + "mod_date": "2021-06-15", + "ncbi_project_name": "Riverbed sediment microbial communities from areas with vegetation nearby in Columbia River, Washington, USA - GW-RW S2_0_10", + "omics_type": { + "has_raw_value": "Metagenome" + }, + "part_of": [ + "nmdc:sty-11-aygzgv51" + ], + "principal_investigator": { + "has_raw_value": "James Stegen" + }, + "processing_institution": "JGI", + "type": "nmdc:OmicsProcessing" + } + ], + "read_qc_analysis_activity_set": [ + { + "id": "nmdc:wfrqc-11-m18pd088.1", + "name": "Read QC Activity for nmdc:omprc-11-p1735e67", + "started_at_time": "2021-10-11T02:23:35Z", + "ended_at_time": "2021-11-13T18:52:13+00:00", + "was_informed_by": "nmdc:omprc-11-p1735e67", + "execution_resource": "NERSC-Cori", + "git_url": "https://github.com/microbiomedata/ReadsQC", + "has_input": [ + "nmdc:dobj-11-r0ygq360" + ], + "has_output": [ + "nmdc:dobj-11-b63t0y87", + "nmdc:dobj-11-5erdny64" + ], + "type": "nmdc:ReadQcAnalysisActivity", + "part_of": [ + "nmdc:omprc-11-p1735e67" + ], + "version": "v1.0.2", + "input_read_count": 20780788, + "output_read_count": 19516330, + "input_read_bases": 3137898988, + "output_read_bases": 2918466866 + } + ], + "read_based_taxonomy_analysis_activity_set": [ + { + "id": "nmdc:wfrbt-11-z1ybct38.1", + "name": "Readbased Taxonomy Analysis Activity for nmdc:omprc-11-p1735e67", + "started_at_time": "2021-10-11T02:23:35Z", + "ended_at_time": "2021-11-13T18:52:13+00:00", + "was_informed_by": "nmdc:omprc-11-p1735e67", + "execution_resource": "NERSC-Cori", + "git_url": "https://github.com/microbiomedata/ReadbasedAnalysis", + "has_input": [ + "nmdc:dobj-11-b63t0y87" + ], + "has_output": [ + "nmdc:dobj-11-9wvf0h80", + "nmdc:dobj-11-t83ze308", + "nmdc:dobj-11-7bf4mb91", + "nmdc:dobj-11-zx14g348", + "nmdc:dobj-11-z1qw6j28", + "nmdc:dobj-11-n5h02k27", + "nmdc:dobj-11-46b3v052", + "nmdc:dobj-11-f8p94069", + "nmdc:dobj-11-3xwemf74" + ], + "type": "nmdc:ReadBasedTaxonomyAnalysisActivity", + "part_of": [ + "nmdc:omprc-11-p1735e67" + ], + "version": "v1.0.2" + } + ] + } +] \ No newline at end of file From c13e93f25e27850717fb7e8d5cd21c45340587a0 Mon Sep 17 00:00:00 2001 From: Michal Babinski Date: Mon, 20 Nov 2023 14:07:57 -0800 Subject: [PATCH 76/91] add delete method --- nmdc_automation/re_iding/re_id_file_operations.py | 14 +++++++++++++- 1 file changed, 13 insertions(+), 1 deletion(-) diff --git a/nmdc_automation/re_iding/re_id_file_operations.py b/nmdc_automation/re_iding/re_id_file_operations.py index a82181e5..3e54e72c 100644 --- a/nmdc_automation/re_iding/re_id_file_operations.py +++ b/nmdc_automation/re_iding/re_id_file_operations.py @@ -1,4 +1,4 @@ -import subprocess +import shutil import gzip import os import json @@ -41,7 +41,19 @@ def read_json_file(filename): data = json.load(json_file) return data +def delete_directory(path): + """Deletes the directory at the specified path along with all its contents. + Args: + path (str): Full path of the directory to be deleted. + """ + if os.path.exists(path) and os.path.isdir(path): + shutil.rmtree(path) + print(f"Directory '{path}' has been deleted.") + else: + print(f"Directory '{path}' does not exist or is not a directory.") + + def rewrite_id(src, dst, old_id, new_id, prefix=None): """ Rewrite lines in a file, replacing occurrences of an old ID with a new ID. From 722f6ce342e7dedac57baee303c69a9e495df64d Mon Sep 17 00:00:00 2001 From: Michal Babinski Date: Mon, 20 Nov 2023 14:43:02 -0800 Subject: [PATCH 77/91] changed versions --- ...c:sty-11-aygzgv51_re_ided_record_dump.json | 200 +++++++++--------- 1 file changed, 100 insertions(+), 100 deletions(-) diff --git a/nmdc_automation/re_iding/scripts/data/nmdc:sty-11-aygzgv51_re_ided_record_dump.json b/nmdc_automation/re_iding/scripts/data/nmdc:sty-11-aygzgv51_re_ided_record_dump.json index 06d35da9..d054d989 100644 --- a/nmdc_automation/re_iding/scripts/data/nmdc:sty-11-aygzgv51_re_ided_record_dump.json +++ b/nmdc_automation/re_iding/scripts/data/nmdc:sty-11-aygzgv51_re_ided_record_dump.json @@ -193,7 +193,7 @@ "part_of": [ "nmdc:omprc-11-bn8jcq58" ], - "version": "v1.0.2", + "version": "1.0.2", "asm_score": 6.577, "scaffolds": 169645, "scaf_logsum": 215363, @@ -271,7 +271,7 @@ "part_of": [ "nmdc:omprc-11-bn8jcq58" ], - "version": "v1.0.2", + "version": "1.0.2", "input_read_count": 32238374, "output_read_count": 30774080, "input_read_bases": 4867994474, @@ -503,7 +503,7 @@ "part_of": [ "nmdc:omprc-11-zp2ar437" ], - "version": "v1.0.2", + "version": "1.0.2", "asm_score": 5.224, "scaffolds": 116565, "scaf_logsum": 182081, @@ -578,7 +578,7 @@ "part_of": [ "nmdc:omprc-11-zp2ar437" ], - "version": "v1.0.2", + "version": "1.0.2", "input_read_count": 22183982, "output_read_count": 20195754, "input_read_bases": 3349781282, @@ -810,7 +810,7 @@ "part_of": [ "nmdc:omprc-11-wepaa271" ], - "version": "v1.0.2", + "version": "1.0.2", "asm_score": 4.087, "scaffolds": 322890, "scaf_logsum": 491574, @@ -888,7 +888,7 @@ "part_of": [ "nmdc:omprc-11-wepaa271" ], - "version": "v1.0.2", + "version": "1.0.2", "input_read_count": 35064492, "output_read_count": 33873238, "input_read_bases": 5294738292, @@ -1120,7 +1120,7 @@ "part_of": [ "nmdc:omprc-11-hymrq852" ], - "version": "v1.0.2", + "version": "1.0.2", "asm_score": 12.582, "scaffolds": 525116, "scaf_logsum": 725191, @@ -1198,7 +1198,7 @@ "part_of": [ "nmdc:omprc-11-hymrq852" ], - "version": "v1.0.2", + "version": "1.0.2", "input_read_count": 67696542, "output_read_count": 67147510, "input_read_bases": 10222177842, @@ -1430,7 +1430,7 @@ "part_of": [ "nmdc:omprc-11-yt8css91" ], - "version": "v1.0.2", + "version": "1.0.2", "asm_score": 17.061, "scaffolds": 116033, "scaf_logsum": 196103, @@ -1508,7 +1508,7 @@ "part_of": [ "nmdc:omprc-11-yt8css91" ], - "version": "v1.0.2", + "version": "1.0.2", "input_read_count": 19416222, "output_read_count": 18855352, "input_read_bases": 2931849522, @@ -1740,7 +1740,7 @@ "part_of": [ "nmdc:omprc-11-hgehsc37" ], - "version": "v1.0.2", + "version": "1.0.2", "asm_score": 4.21, "scaffolds": 78311, "scaf_logsum": 60806, @@ -1815,7 +1815,7 @@ "part_of": [ "nmdc:omprc-11-hgehsc37" ], - "version": "v1.0.2", + "version": "1.0.2", "input_read_count": 19058974, "output_read_count": 17338778, "input_read_bases": 2877905074, @@ -2047,7 +2047,7 @@ "part_of": [ "nmdc:omprc-11-7vsv7h78" ], - "version": "v1.0.2", + "version": "1.0.2", "asm_score": 7.785, "scaffolds": 383447, "scaf_logsum": 496628, @@ -2125,7 +2125,7 @@ "part_of": [ "nmdc:omprc-11-7vsv7h78" ], - "version": "v1.0.2", + "version": "1.0.2", "input_read_count": 51286688, "output_read_count": 48276864, "input_read_bases": 7744289888, @@ -2357,7 +2357,7 @@ "part_of": [ "nmdc:omprc-11-5r54nt37" ], - "version": "v1.0.2", + "version": "1.0.2", "asm_score": 3.626, "scaffolds": 118391, "scaf_logsum": 70885, @@ -2432,7 +2432,7 @@ "part_of": [ "nmdc:omprc-11-5r54nt37" ], - "version": "v1.0.2", + "version": "1.0.2", "input_read_count": 23705118, "output_read_count": 22801896, "input_read_bases": 3579472818, @@ -2664,7 +2664,7 @@ "part_of": [ "nmdc:omprc-11-76ebsj44" ], - "version": "v1.0.2", + "version": "1.0.2", "asm_score": 3.923, "scaffolds": 299890, "scaf_logsum": 453436, @@ -2739,7 +2739,7 @@ "part_of": [ "nmdc:omprc-11-76ebsj44" ], - "version": "v1.0.2", + "version": "1.0.2", "input_read_count": 26227312, "output_read_count": 25182244, "input_read_bases": 3960324112, @@ -2971,7 +2971,7 @@ "part_of": [ "nmdc:omprc-11-s6wqag22" ], - "version": "v1.0.2", + "version": "1.0.2", "asm_score": 7.629, "scaffolds": 105366, "scaf_logsum": 63657, @@ -3046,7 +3046,7 @@ "part_of": [ "nmdc:omprc-11-s6wqag22" ], - "version": "v1.0.2", + "version": "1.0.2", "input_read_count": 24223170, "output_read_count": 22768968, "input_read_bases": 3657698670, @@ -3278,7 +3278,7 @@ "part_of": [ "nmdc:omprc-11-x0es2p18" ], - "version": "v1.0.2", + "version": "1.0.2", "asm_score": 5.95, "scaffolds": 190940, "scaf_logsum": 174680, @@ -3353,7 +3353,7 @@ "part_of": [ "nmdc:omprc-11-x0es2p18" ], - "version": "v1.0.2", + "version": "1.0.2", "input_read_count": 25674112, "output_read_count": 22503352, "input_read_bases": 3876790912, @@ -3585,7 +3585,7 @@ "part_of": [ "nmdc:omprc-11-1nvcer55" ], - "version": "v1.0.2", + "version": "1.0.2", "asm_score": 3.305, "scaffolds": 208427, "scaf_logsum": 212917, @@ -3660,7 +3660,7 @@ "part_of": [ "nmdc:omprc-11-1nvcer55" ], - "version": "v1.0.2", + "version": "1.0.2", "input_read_count": 23886420, "output_read_count": 22738452, "input_read_bases": 3606849420, @@ -3892,7 +3892,7 @@ "part_of": [ "nmdc:omprc-11-b051xn44" ], - "version": "v1.0.2", + "version": "1.0.2", "asm_score": 4.319, "scaffolds": 157774, "scaf_logsum": 111226, @@ -3967,7 +3967,7 @@ "part_of": [ "nmdc:omprc-11-b051xn44" ], - "version": "v1.0.2", + "version": "1.0.2", "input_read_count": 31715882, "output_read_count": 30212248, "input_read_bases": 4789098182, @@ -4199,7 +4199,7 @@ "part_of": [ "nmdc:omprc-11-k8kt2j31" ], - "version": "v1.0.2", + "version": "1.0.2", "asm_score": 3.117, "scaffolds": 237183, "scaf_logsum": 307525, @@ -4274,7 +4274,7 @@ "part_of": [ "nmdc:omprc-11-k8kt2j31" ], - "version": "v1.0.2", + "version": "1.0.2", "input_read_count": 26419652, "output_read_count": 25434840, "input_read_bases": 3989367452, @@ -4506,7 +4506,7 @@ "part_of": [ "nmdc:omprc-11-9pbab972" ], - "version": "v1.0.2", + "version": "1.0.2", "asm_score": 6.312, "scaffolds": 127272, "scaf_logsum": 77428, @@ -4581,7 +4581,7 @@ "part_of": [ "nmdc:omprc-11-9pbab972" ], - "version": "v1.0.2", + "version": "1.0.2", "input_read_count": 28569382, "output_read_count": 26868700, "input_read_bases": 4313976682, @@ -4813,7 +4813,7 @@ "part_of": [ "nmdc:omprc-11-0g415160" ], - "version": "v1.0.2", + "version": "1.0.2", "asm_score": 4.48, "scaffolds": 272628, "scaf_logsum": 380592, @@ -4888,7 +4888,7 @@ "part_of": [ "nmdc:omprc-11-0g415160" ], - "version": "v1.0.2", + "version": "1.0.2", "input_read_count": 23291434, "output_read_count": 22556158, "input_read_bases": 3517006534, @@ -5120,7 +5120,7 @@ "part_of": [ "nmdc:omprc-11-z5qv0f24" ], - "version": "v1.0.2", + "version": "1.0.2", "asm_score": 4.807, "scaffolds": 51180, "scaf_logsum": 6368.36, @@ -5195,7 +5195,7 @@ "part_of": [ "nmdc:omprc-11-z5qv0f24" ], - "version": "v1.0.2", + "version": "1.0.2", "input_read_count": 20505370, "output_read_count": 19995028, "input_read_bases": 3096310870, @@ -5427,7 +5427,7 @@ "part_of": [ "nmdc:omprc-11-8qms8262" ], - "version": "v1.0.2", + "version": "1.0.2", "asm_score": 5.986, "scaffolds": 132455, "scaf_logsum": 81839, @@ -5502,7 +5502,7 @@ "part_of": [ "nmdc:omprc-11-8qms8262" ], - "version": "v1.0.2", + "version": "1.0.2", "input_read_count": 27906294, "output_read_count": 26116440, "input_read_bases": 4213850394, @@ -5734,7 +5734,7 @@ "part_of": [ "nmdc:omprc-11-k675bw84" ], - "version": "v1.0.2", + "version": "1.0.2", "asm_score": 3.618, "scaffolds": 95584, "scaf_logsum": 36615, @@ -5809,7 +5809,7 @@ "part_of": [ "nmdc:omprc-11-k675bw84" ], - "version": "v1.0.2", + "version": "1.0.2", "input_read_count": 31642056, "output_read_count": 29115818, "input_read_bases": 4777950456, @@ -6041,7 +6041,7 @@ "part_of": [ "nmdc:omprc-11-mbv2jc69" ], - "version": "v1.0.2", + "version": "1.0.2", "asm_score": 5.751, "scaffolds": 175734, "scaf_logsum": 142614, @@ -6116,7 +6116,7 @@ "part_of": [ "nmdc:omprc-11-mbv2jc69" ], - "version": "v1.0.2", + "version": "1.0.2", "input_read_count": 29872658, "output_read_count": 27896694, "input_read_bases": 4510771358, @@ -6348,7 +6348,7 @@ "part_of": [ "nmdc:omprc-11-kc23zq65" ], - "version": "v1.0.2", + "version": "1.0.2", "asm_score": 3.934, "scaffolds": 206599, "scaf_logsum": 270403, @@ -6423,7 +6423,7 @@ "part_of": [ "nmdc:omprc-11-kc23zq65" ], - "version": "v1.0.2", + "version": "1.0.2", "input_read_count": 25320866, "output_read_count": 24600396, "input_read_bases": 3823450766, @@ -6655,7 +6655,7 @@ "part_of": [ "nmdc:omprc-11-c8dzx197" ], - "version": "v1.0.2", + "version": "1.0.2", "asm_score": 5.062, "scaffolds": 214737, "scaf_logsum": 272416, @@ -6730,7 +6730,7 @@ "part_of": [ "nmdc:omprc-11-c8dzx197" ], - "version": "v1.0.2", + "version": "1.0.2", "input_read_count": 24239336, "output_read_count": 19917090, "input_read_bases": 3660139736, @@ -6962,7 +6962,7 @@ "part_of": [ "nmdc:omprc-11-tgxmb243" ], - "version": "v1.0.2", + "version": "1.0.2", "asm_score": 6.89, "scaffolds": 169622, "scaf_logsum": 141966, @@ -7037,7 +7037,7 @@ "part_of": [ "nmdc:omprc-11-tgxmb243" ], - "version": "v1.0.2", + "version": "1.0.2", "input_read_count": 21721428, "output_read_count": 19723416, "input_read_bases": 3279935628, @@ -7269,7 +7269,7 @@ "part_of": [ "nmdc:omprc-11-t0xjjc50" ], - "version": "v1.0.2", + "version": "1.0.2", "asm_score": 12.57, "scaffolds": 152330, "scaf_logsum": 274450, @@ -7347,7 +7347,7 @@ "part_of": [ "nmdc:omprc-11-t0xjjc50" ], - "version": "v1.0.2", + "version": "1.0.2", "input_read_count": 36554212, "output_read_count": 33663942, "input_read_bases": 5519686012, @@ -7579,7 +7579,7 @@ "part_of": [ "nmdc:omprc-11-1avd3d16" ], - "version": "v1.0.2", + "version": "1.0.2", "asm_score": 18.19, "scaffolds": 139236, "scaf_logsum": 272657, @@ -7657,7 +7657,7 @@ "part_of": [ "nmdc:omprc-11-1avd3d16" ], - "version": "v1.0.2", + "version": "1.0.2", "input_read_count": 26546332, "output_read_count": 25776010, "input_read_bases": 4008496132, @@ -7889,7 +7889,7 @@ "part_of": [ "nmdc:omprc-11-hk1bje46" ], - "version": "v1.0.2", + "version": "1.0.2", "asm_score": 6.419, "scaffolds": 89660, "scaf_logsum": 161291, @@ -7964,7 +7964,7 @@ "part_of": [ "nmdc:omprc-11-hk1bje46" ], - "version": "v1.0.2", + "version": "1.0.2", "input_read_count": 17796788, "output_read_count": 16817496, "input_read_bases": 2687314988, @@ -8196,7 +8196,7 @@ "part_of": [ "nmdc:omprc-11-qtje8r57" ], - "version": "v1.0.2", + "version": "1.0.2", "asm_score": 3.588, "scaffolds": 120242, "scaf_logsum": 112140, @@ -8271,7 +8271,7 @@ "part_of": [ "nmdc:omprc-11-qtje8r57" ], - "version": "v1.0.2", + "version": "1.0.2", "input_read_count": 22298982, "output_read_count": 20445042, "input_read_bases": 3367146282, @@ -8503,7 +8503,7 @@ "part_of": [ "nmdc:omprc-11-7ey2jr63" ], - "version": "v1.0.2", + "version": "1.0.2", "asm_score": 10.939, "scaffolds": 186895, "scaf_logsum": 337025, @@ -8581,7 +8581,7 @@ "part_of": [ "nmdc:omprc-11-7ey2jr63" ], - "version": "v1.0.2", + "version": "1.0.2", "input_read_count": 39069214, "output_read_count": 37037822, "input_read_bases": 5899451314, @@ -8813,7 +8813,7 @@ "part_of": [ "nmdc:omprc-11-qngh7497" ], - "version": "v1.0.2", + "version": "1.0.2", "asm_score": 13.853, "scaffolds": 543003, "scaf_logsum": 442802, @@ -8891,7 +8891,7 @@ "part_of": [ "nmdc:omprc-11-qngh7497" ], - "version": "v1.0.2", + "version": "1.0.2", "input_read_count": 65434428, "output_read_count": 64887080, "input_read_bases": 9880598628, @@ -9123,7 +9123,7 @@ "part_of": [ "nmdc:omprc-11-jk7zjz92" ], - "version": "v1.0.2", + "version": "1.0.2", "asm_score": 4.718, "scaffolds": 80703, "scaf_logsum": 116377, @@ -9198,7 +9198,7 @@ "part_of": [ "nmdc:omprc-11-jk7zjz92" ], - "version": "v1.0.2", + "version": "1.0.2", "input_read_count": 18827380, "output_read_count": 16749572, "input_read_bases": 2842934380, @@ -9430,7 +9430,7 @@ "part_of": [ "nmdc:omprc-11-2jt0jk84" ], - "version": "v1.0.2", + "version": "1.0.2", "asm_score": 5.768, "scaffolds": 351728, "scaf_logsum": 429769, @@ -9505,7 +9505,7 @@ "part_of": [ "nmdc:omprc-11-2jt0jk84" ], - "version": "v1.0.2", + "version": "1.0.2", "input_read_count": 50719572, "output_read_count": 47896142, "input_read_bases": 7658655372, @@ -9737,7 +9737,7 @@ "part_of": [ "nmdc:omprc-11-hqmmwn16" ], - "version": "v1.0.2", + "version": "1.0.2", "asm_score": 4.733, "scaffolds": 114011, "scaf_logsum": 152336, @@ -9812,7 +9812,7 @@ "part_of": [ "nmdc:omprc-11-hqmmwn16" ], - "version": "v1.0.2", + "version": "1.0.2", "input_read_count": 20957834, "output_read_count": 20454422, "input_read_bases": 3164632934, @@ -10044,7 +10044,7 @@ "part_of": [ "nmdc:omprc-11-qsxwf517" ], - "version": "v1.0.2", + "version": "1.0.2", "asm_score": 13.127, "scaffolds": 220853, "scaf_logsum": 448446, @@ -10122,7 +10122,7 @@ "part_of": [ "nmdc:omprc-11-qsxwf517" ], - "version": "v1.0.2", + "version": "1.0.2", "input_read_count": 34522052, "output_read_count": 33454554, "input_read_bases": 5212829852, @@ -10354,7 +10354,7 @@ "part_of": [ "nmdc:omprc-11-932jcd76" ], - "version": "v1.0.2", + "version": "1.0.2", "asm_score": 5.471, "scaffolds": 106645, "scaf_logsum": 42987, @@ -10429,7 +10429,7 @@ "part_of": [ "nmdc:omprc-11-932jcd76" ], - "version": "v1.0.2", + "version": "1.0.2", "input_read_count": 28754670, "output_read_count": 27981268, "input_read_bases": 4341955170, @@ -10661,7 +10661,7 @@ "part_of": [ "nmdc:omprc-11-p0jdew93" ], - "version": "v1.0.2", + "version": "1.0.2", "asm_score": 3.367, "scaffolds": 191777, "scaf_logsum": 225846, @@ -10736,7 +10736,7 @@ "part_of": [ "nmdc:omprc-11-p0jdew93" ], - "version": "v1.0.2", + "version": "1.0.2", "input_read_count": 24261468, "output_read_count": 22362924, "input_read_bases": 3663481668, @@ -10968,7 +10968,7 @@ "part_of": [ "nmdc:omprc-11-dtsr6z90" ], - "version": "v1.0.2", + "version": "1.0.2", "asm_score": 3.329, "scaffolds": 208793, "scaf_logsum": 260132, @@ -11043,7 +11043,7 @@ "part_of": [ "nmdc:omprc-11-dtsr6z90" ], - "version": "v1.0.2", + "version": "1.0.2", "input_read_count": 25305566, "output_read_count": 23508042, "input_read_bases": 3821140466, @@ -11276,7 +11276,7 @@ "part_of": [ "nmdc:omprc-11-hwadfm25" ], - "version": "v1.0.2", + "version": "1.0.2", "asm_score": 3.712, "scaffolds": 46121, "scaf_logsum": 38062, @@ -11351,7 +11351,7 @@ "part_of": [ "nmdc:omprc-11-hwadfm25" ], - "version": "v1.0.2", + "version": "1.0.2", "input_read_count": 11431762, "output_read_count": 8322164, "input_read_bases": 1726196062, @@ -11583,7 +11583,7 @@ "part_of": [ "nmdc:omprc-11-vnnn4722" ], - "version": "v1.0.2", + "version": "1.0.2", "asm_score": 3.397, "scaffolds": 212379, "scaf_logsum": 318786, @@ -11658,7 +11658,7 @@ "part_of": [ "nmdc:omprc-11-vnnn4722" ], - "version": "v1.0.2", + "version": "1.0.2", "input_read_count": 23535784, "output_read_count": 20011156, "input_read_bases": 3553903384, @@ -11890,7 +11890,7 @@ "part_of": [ "nmdc:omprc-11-p21wp875" ], - "version": "v1.0.2", + "version": "1.0.2", "asm_score": 7.947, "scaffolds": 103181, "scaf_logsum": 50816, @@ -11965,7 +11965,7 @@ "part_of": [ "nmdc:omprc-11-p21wp875" ], - "version": "v1.0.2", + "version": "1.0.2", "input_read_count": 28024960, "output_read_count": 27378404, "input_read_bases": 4231768960, @@ -12197,7 +12197,7 @@ "part_of": [ "nmdc:omprc-11-vs67yj43" ], - "version": "v1.0.2", + "version": "1.0.2", "asm_score": 17.863, "scaffolds": 81627, "scaf_logsum": 20954, @@ -12275,7 +12275,7 @@ "part_of": [ "nmdc:omprc-11-vs67yj43" ], - "version": "v1.0.2", + "version": "1.0.2", "input_read_count": 27835800, "output_read_count": 25862834, "input_read_bases": 4203205800, @@ -12507,7 +12507,7 @@ "part_of": [ "nmdc:omprc-11-nhf5m035" ], - "version": "v1.0.2", + "version": "1.0.2", "asm_score": 4.996, "scaffolds": 147272, "scaf_logsum": 91521, @@ -12582,7 +12582,7 @@ "part_of": [ "nmdc:omprc-11-nhf5m035" ], - "version": "v1.0.2", + "version": "1.0.2", "input_read_count": 28064750, "output_read_count": 26438892, "input_read_bases": 4237777250, @@ -12814,7 +12814,7 @@ "part_of": [ "nmdc:omprc-11-w3v30q48" ], - "version": "v1.0.2", + "version": "1.0.2", "asm_score": 14.664, "scaffolds": 97316, "scaf_logsum": 37899, @@ -12892,7 +12892,7 @@ "part_of": [ "nmdc:omprc-11-w3v30q48" ], - "version": "v1.0.2", + "version": "1.0.2", "input_read_count": 24906858, "output_read_count": 24128544, "input_read_bases": 3760935558, @@ -13124,7 +13124,7 @@ "part_of": [ "nmdc:omprc-11-vykcbs96" ], - "version": "v1.0.2", + "version": "1.0.2", "asm_score": 2.823, "scaffolds": 106821, "scaf_logsum": 65979, @@ -13199,7 +13199,7 @@ "part_of": [ "nmdc:omprc-11-vykcbs96" ], - "version": "v1.0.2", + "version": "1.0.2", "input_read_count": 24139032, "output_read_count": 23262948, "input_read_bases": 3644993832, @@ -13431,7 +13431,7 @@ "part_of": [ "nmdc:omprc-11-dw7shd52" ], - "version": "v1.0.2", + "version": "1.0.2", "asm_score": 3.279, "scaffolds": 189880, "scaf_logsum": 158661, @@ -13506,7 +13506,7 @@ "part_of": [ "nmdc:omprc-11-dw7shd52" ], - "version": "v1.0.2", + "version": "1.0.2", "input_read_count": 24889788, "output_read_count": 23803802, "input_read_bases": 3758357988, @@ -13738,7 +13738,7 @@ "part_of": [ "nmdc:omprc-11-j43hz774" ], - "version": "v1.0.2", + "version": "1.0.2", "asm_score": 3.266, "scaffolds": 216046, "scaf_logsum": 294510, @@ -13813,7 +13813,7 @@ "part_of": [ "nmdc:omprc-11-j43hz774" ], - "version": "v1.0.2", + "version": "1.0.2", "input_read_count": 26604768, "output_read_count": 24717950, "input_read_bases": 4017319968, @@ -14045,7 +14045,7 @@ "part_of": [ "nmdc:omprc-11-kgxpef29" ], - "version": "v1.0.2", + "version": "1.0.2", "asm_score": 4.409, "scaffolds": 197600, "scaf_logsum": 130537, @@ -14123,7 +14123,7 @@ "part_of": [ "nmdc:omprc-11-kgxpef29" ], - "version": "v1.0.2", + "version": "1.0.2", "input_read_count": 30951192, "output_read_count": 30289044, "input_read_bases": 4673629992, @@ -14355,7 +14355,7 @@ "part_of": [ "nmdc:omprc-11-qrsway30" ], - "version": "v1.0.2", + "version": "1.0.2", "asm_score": 2.914, "scaffolds": 169419, "scaf_logsum": 99077, @@ -14430,7 +14430,7 @@ "part_of": [ "nmdc:omprc-11-qrsway30" ], - "version": "v1.0.2", + "version": "1.0.2", "input_read_count": 27317020, "output_read_count": 26481746, "input_read_bases": 4124870020, @@ -14662,7 +14662,7 @@ "part_of": [ "nmdc:omprc-11-nry91b19" ], - "version": "v1.0.2", + "version": "1.0.2", "asm_score": 4.164, "scaffolds": 180310, "scaf_logsum": 193641, @@ -14737,7 +14737,7 @@ "part_of": [ "nmdc:omprc-11-nry91b19" ], - "version": "v1.0.2", + "version": "1.0.2", "input_read_count": 23728904, "output_read_count": 22416634, "input_read_bases": 3583064504, @@ -14969,7 +14969,7 @@ "part_of": [ "nmdc:omprc-11-0n8y1d07" ], - "version": "v1.0.2", + "version": "1.0.2", "asm_score": 3.393, "scaffolds": 182865, "scaf_logsum": 171254, @@ -15044,7 +15044,7 @@ "part_of": [ "nmdc:omprc-11-0n8y1d07" ], - "version": "v1.0.2", + "version": "1.0.2", "input_read_count": 23985924, "output_read_count": 22751496, "input_read_bases": 3621874524, @@ -15276,7 +15276,7 @@ "part_of": [ "nmdc:omprc-11-p1735e67" ], - "version": "v1.0.2", + "version": "1.0.2", "asm_score": 13.921, "scaffolds": 48925, "scaf_logsum": 9156.336, @@ -15354,7 +15354,7 @@ "part_of": [ "nmdc:omprc-11-p1735e67" ], - "version": "v1.0.2", + "version": "1.0.2", "input_read_count": 20780788, "output_read_count": 19516330, "input_read_bases": 3137898988, From a06695dd88b617c661cafc0edaa847548201792b Mon Sep 17 00:00:00 2001 From: Michal Babinski Date: Mon, 20 Nov 2023 14:43:18 -0800 Subject: [PATCH 78/91] changed versions --- configs/re_iding_worklfows.yaml | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/configs/re_iding_worklfows.yaml b/configs/re_iding_worklfows.yaml index 3745401b..a763e3e0 100644 --- a/configs/re_iding_worklfows.yaml +++ b/configs/re_iding_worklfows.yaml @@ -3,7 +3,7 @@ Workflows: Type: nmdc:ReadQcAnalysisActivity Enabled: True Git_repo: https://github.com/microbiomedata/ReadsQC - Version: v1.0.2 + Version: 1.0.2 WDL: rqcfilter.wdl Collection: read_qc_analysis_activity_set ActivityRange: ReadQcAnalysisActivity @@ -44,7 +44,7 @@ Workflows: Type: nmdc:MetagenomeAssembly Enabled: True Git_repo: https://github.com/microbiomedata/metaAssembly - Version: v1.0.2 + Version: 1.0.2 WDL: jgi_assembly.wdl Collection: metagenome_assembly_set ActivityRange: MetagenomeAssembly From db5581ab00f55b6cda2e7175ec07618d69677b9e Mon Sep 17 00:00:00 2001 From: Michal Babinski Date: Tue, 21 Nov 2023 12:01:15 -0800 Subject: [PATCH 79/91] exclude omics_processing_set from ingest --- nmdc_automation/re_iding/scripts/re_id_tool.py | 10 ++++++---- 1 file changed, 6 insertions(+), 4 deletions(-) diff --git a/nmdc_automation/re_iding/scripts/re_id_tool.py b/nmdc_automation/re_iding/scripts/re_id_tool.py index 49e946f8..1f808039 100755 --- a/nmdc_automation/re_iding/scripts/re_id_tool.py +++ b/nmdc_automation/re_iding/scripts/re_id_tool.py @@ -180,7 +180,6 @@ def process_records(ctx, dryrun, study_id, data_dir): config = ctx.obj['site_config'] api_client = NmdcRuntimeApi(config) - # Get Database dump file paths and the data directory db_infile, db_outfile = _get_database_paths(study_id, dryrun) data_dir = _get_data_dir(data_dir, dryrun) @@ -233,14 +232,17 @@ def ingest_records(ctx, reid_records_file): start_time = time.time() logging.info(f"Submitting re id'd records from : {reid_records_file}") - config = Config(ctx.obj['site_config']) - api_client = NmdcRuntimeUserApi(username=config.napa_username, password=config.napa_password, - base_url=config.napa_base_url) + # Get API client + config = ctx.obj['site_config'] + api_client = NmdcRuntimeApi(config) with open(reid_records_file, "r") as f: db_records = json.load(f) for record in db_records: + time.sleep(3) + if 'omics_processing_set' in record: + del record['omics_processing_set'] resp = api_client.post_objects(record) logger.info(f"{record} posted, got response: {resp}") From 35cae661ab139c0ab2af31d2ea7ad8a4b08071fc Mon Sep 17 00:00:00 2001 From: Michael Thornton Date: Tue, 21 Nov 2023 14:59:38 -0800 Subject: [PATCH 80/91] add changesheet-only logic to re-id-ing ingest_records --- nmdc_automation/api/nmdcapi.py | 11 +- nmdc_automation/re_iding/changesheets.py | 87 +++++++++++++++ .../dryrun_changesheet-20231121-142444.tsv | 3 + ...1-aygzgv51_changesheet-20231121-142542.tsv | 101 ++++++++++++++++++ .../re_iding/scripts/re_id_tool.py | 72 ++++++++++--- 5 files changed, 258 insertions(+), 16 deletions(-) create mode 100644 nmdc_automation/re_iding/changesheets.py create mode 100644 nmdc_automation/re_iding/changesheets_output/dryrun_changesheet-20231121-142444.tsv create mode 100644 nmdc_automation/re_iding/changesheets_output/nmdc:sty-11-aygzgv51_changesheet-20231121-142542.tsv diff --git a/nmdc_automation/api/nmdcapi.py b/nmdc_automation/api/nmdcapi.py index 0927c23a..2c1e5031 100755 --- a/nmdc_automation/api/nmdcapi.py +++ b/nmdc_automation/api/nmdcapi.py @@ -320,10 +320,13 @@ class NmdcRuntimeUserApi: """ Basic Runtime API Client with user/password authentication. """ - def __init__(self, username: str, password: str, base_url: str): - self.username = username - self.password = password - self.base_url = base_url + def __init__(self, site_configuration): + self.config = Config(site_configuration) + + # TODO: remove hard-coded values here + self.username = self.config.napa_username + self.password = self.config.napa_password + self.base_url = self.config.napa_base_url self.headers = {} self.token_response = None self.refresh_token_after = None diff --git a/nmdc_automation/re_iding/changesheets.py b/nmdc_automation/re_iding/changesheets.py new file mode 100644 index 00000000..df962c40 --- /dev/null +++ b/nmdc_automation/re_iding/changesheets.py @@ -0,0 +1,87 @@ +# nmdc_automation/re_iding/changesheets.py +""" +changesheets.py: Provides data classes for creating changesheets for NMDC +database objects. +""" + +import logging +import time +from dataclasses import dataclass, field +from pathlib import Path +import requests +from typing import Any, ClassVar, Dict, Optional + + +logging.basicConfig( + level=logging.INFO, format="%(asctime)s %(levelname)s %(" "message)s" +) + +CHANGESHEETS_DIR = Path(__file__).parent.absolute().joinpath("changesheets_output") + + +@dataclass +class ChangesheetLineItem: + """ + A line item in a changesheet + """ + + id: str + action: str + attribute: str + value: str + + @property + def line(self) -> str: + """ + Return the line item as a tab-separated string + """ + cleaned_value = self.value.replace("\n", " ").replace("\t", " ").strip() + return f"{self.id}\t{self.action}\t{self.attribute}\t{cleaned_value}" + + +@dataclass +class Changesheet: + """ + A changesheet + """ + + name: str + line_items: list = field(default_factory=list) + header: ClassVar[str] = "id\taction\tattribute\tvalue" + output_dir: Optional[Path] = None + + def __post_init__(self): + self.line_items = [] + if self.output_dir is None: + self.output_dir = CHANGESHEETS_DIR + self.output_dir.mkdir(parents=True, exist_ok=True) + self.output_filename_root: str = f"{self.name}-{time.strftime('%Y%m%d-%H%M%S')}" + self.output_filename: str = f"{self.output_filename_root}.tsv" + self.output_filepath: Path = self.output_dir.joinpath(self.output_filename) + + def validate_changesheet(self, base_url: str) -> bool: + """ + Validate the changesheet + :return: None + """ + logging.info(f"Validating changesheet {self.output_filepath}") + url = f"{base_url}metadata/changesheets:validate" + logging.info(f"Posting to {url}") + resp = requests.post( + url, + files={"uploaded_file": open(self.output_filepath, "rb")}, + ) + if not resp.ok: + logging.error(f"Changesheet validation failed: {resp.text}") + return resp.ok + + def write_changesheet(self) -> None: + """ + Write the changesheet to a file + :return: None + """ + with open(self.output_filepath, "w") as f: + logging.info(f"Writing changesheet to {self.output_filepath}") + f.write(self.header + "\n") + for line_item in self.line_items: + f.write(line_item.line + "\n") diff --git a/nmdc_automation/re_iding/changesheets_output/dryrun_changesheet-20231121-142444.tsv b/nmdc_automation/re_iding/changesheets_output/dryrun_changesheet-20231121-142444.tsv new file mode 100644 index 00000000..69b360ec --- /dev/null +++ b/nmdc_automation/re_iding/changesheets_output/dryrun_changesheet-20231121-142444.tsv @@ -0,0 +1,3 @@ +id action attribute value +nmdc:omprc-11-bn8jcq58 remove item has_output jgi:55d740280d8785342fcf7e39| +nmdc:omprc-11-bn8jcq58 insert has_output nmdc:dobj-11-k7vny888| diff --git a/nmdc_automation/re_iding/changesheets_output/nmdc:sty-11-aygzgv51_changesheet-20231121-142542.tsv b/nmdc_automation/re_iding/changesheets_output/nmdc:sty-11-aygzgv51_changesheet-20231121-142542.tsv new file mode 100644 index 00000000..5c7dccb0 --- /dev/null +++ b/nmdc_automation/re_iding/changesheets_output/nmdc:sty-11-aygzgv51_changesheet-20231121-142542.tsv @@ -0,0 +1,101 @@ +id action attribute value +nmdc:omprc-11-bn8jcq58 remove item has_output jgi:55d740280d8785342fcf7e39| +nmdc:omprc-11-bn8jcq58 insert has_output nmdc:dobj-11-xkx6jy64| +nmdc:omprc-11-zp2ar437 remove item has_output jgi:55d817fc0d8785342fcf8274| +nmdc:omprc-11-zp2ar437 insert has_output nmdc:dobj-11-26xaf448| +nmdc:omprc-11-wepaa271 remove item has_output jgi:55d740240d8785342fcf7e37| +nmdc:omprc-11-wepaa271 insert has_output nmdc:dobj-11-ee1cn292| +nmdc:omprc-11-hymrq852 remove item has_output jgi:55a9cb010d87852b21508920| +nmdc:omprc-11-hymrq852 insert has_output nmdc:dobj-11-kt904y47| +nmdc:omprc-11-yt8css91 remove item has_output jgi:55d818010d8785342fcf8278| +nmdc:omprc-11-yt8css91 insert has_output nmdc:dobj-11-1qqy8057| +nmdc:omprc-11-hgehsc37 remove item has_output jgi:55d817f20d8785342fcf826c| +nmdc:omprc-11-hgehsc37 insert has_output nmdc:dobj-11-d8vd2744| +nmdc:omprc-11-7vsv7h78 remove item has_output jgi:55f23d790d8785306f96497e| +nmdc:omprc-11-7vsv7h78 insert has_output nmdc:dobj-11-xhvk5873| +nmdc:omprc-11-5r54nt37 remove item has_output jgi:574fde547ded5e3df1ee13fa| +nmdc:omprc-11-5r54nt37 insert has_output nmdc:dobj-11-ym2gpg27| +nmdc:omprc-11-76ebsj44 remove item has_output jgi:574fde787ded5e3df1ee1416| +nmdc:omprc-11-76ebsj44 insert has_output nmdc:dobj-11-mgpw8m22| +nmdc:omprc-11-s6wqag22 remove item has_output jgi:574fde7b7ded5e3df1ee1418| +nmdc:omprc-11-s6wqag22 insert has_output nmdc:dobj-11-vkxna711| +nmdc:omprc-11-x0es2p18 remove item has_output jgi:574fde577ded5e3df1ee13fc| +nmdc:omprc-11-x0es2p18 insert has_output nmdc:dobj-11-38txm578| +nmdc:omprc-11-1nvcer55 remove item has_output jgi:574fde587ded5e3df1ee13fd| +nmdc:omprc-11-1nvcer55 insert has_output nmdc:dobj-11-h7t4fx79| +nmdc:omprc-11-b051xn44 remove item has_output jgi:574fe09a7ded5e3df1ee1485| +nmdc:omprc-11-b051xn44 insert has_output nmdc:dobj-11-zc62cx90| +nmdc:omprc-11-k8kt2j31 remove item has_output jgi:574fde5b7ded5e3df1ee13ff| +nmdc:omprc-11-k8kt2j31 insert has_output nmdc:dobj-11-ba1s7a39| +nmdc:omprc-11-9pbab972 remove item has_output jgi:574fde7c7ded5e3df1ee1419| +nmdc:omprc-11-9pbab972 insert has_output nmdc:dobj-11-s1m79s13| +nmdc:omprc-11-0g415160 remove item has_output jgi:574fde5e7ded5e3df1ee1401| +nmdc:omprc-11-0g415160 insert has_output nmdc:dobj-11-br8p6y08| +nmdc:omprc-11-z5qv0f24 remove item has_output jgi:574fe0967ded5e3df1ee1482| +nmdc:omprc-11-z5qv0f24 insert has_output nmdc:dobj-11-pk311p17| +nmdc:omprc-11-8qms8262 remove item has_output jgi:574fde807ded5e3df1ee141b| +nmdc:omprc-11-8qms8262 insert has_output nmdc:dobj-11-rbvvgr79| +nmdc:omprc-11-k675bw84 remove item has_output jgi:574fe09f7ded5e3df1ee1489| +nmdc:omprc-11-k675bw84 insert has_output nmdc:dobj-11-ezyfxj50| +nmdc:omprc-11-mbv2jc69 remove item has_output jgi:574fe09c7ded5e3df1ee1487| +nmdc:omprc-11-mbv2jc69 insert has_output nmdc:dobj-11-y27yqy58| +nmdc:omprc-11-kc23zq65 remove item has_output jgi:574fde607ded5e3df1ee1403| +nmdc:omprc-11-kc23zq65 insert has_output nmdc:dobj-11-mmsvm346| +nmdc:omprc-11-c8dzx197 remove item has_output jgi:574fde647ded5e3df1ee1406| +nmdc:omprc-11-c8dzx197 insert has_output nmdc:dobj-11-0vpq2471| +nmdc:omprc-11-tgxmb243 remove item has_output jgi:574fde837ded5e3df1ee141d| +nmdc:omprc-11-tgxmb243 insert has_output nmdc:dobj-11-assd7y33| +nmdc:omprc-11-t0xjjc50 remove item has_output jgi:55d7402a0d8785342fcf7e3b| +nmdc:omprc-11-t0xjjc50 insert has_output nmdc:dobj-11-1v0g7c04| +nmdc:omprc-11-1avd3d16 remove item has_output jgi:55d7402c0d8785342fcf7e3e| +nmdc:omprc-11-1avd3d16 insert has_output nmdc:dobj-11-k5qf8f61| +nmdc:omprc-11-hk1bje46 remove item has_output jgi:55d817f70d8785342fcf8270| +nmdc:omprc-11-hk1bje46 insert has_output nmdc:dobj-11-wvje1j80| +nmdc:omprc-11-qtje8r57 remove item has_output jgi:55d817fa0d8785342fcf8272| +nmdc:omprc-11-qtje8r57 insert has_output nmdc:dobj-11-sp1dx351| +nmdc:omprc-11-7ey2jr63 remove item has_output jgi:55d740220d8785342fcf7e35| +nmdc:omprc-11-7ey2jr63 insert has_output nmdc:dobj-11-h4fha619| +nmdc:omprc-11-qngh7497 remove item has_output jgi:55a9caff0d87852b2150891e| +nmdc:omprc-11-qngh7497 insert has_output nmdc:dobj-11-p9rq4261| +nmdc:omprc-11-jk7zjz92 remove item has_output jgi:55d817f30d8785342fcf826d| +nmdc:omprc-11-jk7zjz92 insert has_output nmdc:dobj-11-phjw3w62| +nmdc:omprc-11-2jt0jk84 remove item has_output jgi:55f23d820d8785306f964980| +nmdc:omprc-11-2jt0jk84 insert has_output nmdc:dobj-11-7pwab132| +nmdc:omprc-11-hqmmwn16 remove item has_output jgi:55d817fe0d8785342fcf8276| +nmdc:omprc-11-hqmmwn16 insert has_output nmdc:dobj-11-7g54r371| +nmdc:omprc-11-qsxwf517 remove item has_output jgi:55d7402b0d8785342fcf7e3c| +nmdc:omprc-11-qsxwf517 insert has_output nmdc:dobj-11-zx1hkd49| +nmdc:omprc-11-932jcd76 remove item has_output jgi:574fe0a17ded5e3df1ee148a| +nmdc:omprc-11-932jcd76 insert has_output nmdc:dobj-11-zb61bz69| +nmdc:omprc-11-p0jdew93 remove item has_output jgi:574fde697ded5e3df1ee140a| +nmdc:omprc-11-p0jdew93 insert has_output nmdc:dobj-11-r2hspr31| +nmdc:omprc-11-dtsr6z90 remove item has_output jgi:574fde6c7ded5e3df1ee140c| +nmdc:omprc-11-dtsr6z90 insert has_output nmdc:dobj-11-dvy2av42| +nmdc:omprc-11-hwadfm25 remove item has_output jgi:574fe0a87ded5e3df1ee148e| +nmdc:omprc-11-hwadfm25 insert has_output nmdc:dobj-11-h1bew282| +nmdc:omprc-11-vnnn4722 remove item has_output jgi:574fde667ded5e3df1ee1407| +nmdc:omprc-11-vnnn4722 insert has_output nmdc:dobj-11-5mt10622| +nmdc:omprc-11-p21wp875 remove item has_output jgi:574fe0a67ded5e3df1ee148d| +nmdc:omprc-11-p21wp875 insert has_output nmdc:dobj-11-ggn5ha23| +nmdc:omprc-11-vs67yj43 remove item has_output jgi:574fde867ded5e3df1ee1420| +nmdc:omprc-11-vs67yj43 insert has_output nmdc:dobj-11-1jxf0m20| +nmdc:omprc-11-nhf5m035 remove item has_output jgi:574fe0ac7ded5e3df1ee1491| +nmdc:omprc-11-nhf5m035 insert has_output nmdc:dobj-11-aw4wks31| +nmdc:omprc-11-w3v30q48 remove item has_output jgi:574fde8a7ded5e3df1ee1422| +nmdc:omprc-11-w3v30q48 insert has_output nmdc:dobj-11-a52d6695| +nmdc:omprc-11-vykcbs96 remove item has_output jgi:574fde6e7ded5e3df1ee140d| +nmdc:omprc-11-vykcbs96 insert has_output nmdc:dobj-11-yjmvgb38| +nmdc:omprc-11-dw7shd52 remove item has_output jgi:574fde8c7ded5e3df1ee1424| +nmdc:omprc-11-dw7shd52 insert has_output nmdc:dobj-11-pr8zqm21| +nmdc:omprc-11-j43hz774 remove item has_output jgi:574fe0af7ded5e3df1ee1493| +nmdc:omprc-11-j43hz774 insert has_output nmdc:dobj-11-srrrsz66| +nmdc:omprc-11-kgxpef29 remove item has_output jgi:574fe0b17ded5e3df1ee1494| +nmdc:omprc-11-kgxpef29 insert has_output nmdc:dobj-11-qjrhfc82| +nmdc:omprc-11-qrsway30 remove item has_output jgi:574fe0b47ded5e3df1ee1496| +nmdc:omprc-11-qrsway30 insert has_output nmdc:dobj-11-zqgnfj09| +nmdc:omprc-11-nry91b19 remove item has_output jgi:574fde907ded5e3df1ee1426| +nmdc:omprc-11-nry91b19 insert has_output nmdc:dobj-11-hkern724| +nmdc:omprc-11-0n8y1d07 remove item has_output jgi:574fde947ded5e3df1ee1429| +nmdc:omprc-11-0n8y1d07 insert has_output nmdc:dobj-11-9wykaf95| +nmdc:omprc-11-p1735e67 remove item has_output jgi:574fde937ded5e3df1ee1428| +nmdc:omprc-11-p1735e67 insert has_output nmdc:dobj-11-r0ygq360| diff --git a/nmdc_automation/re_iding/scripts/re_id_tool.py b/nmdc_automation/re_iding/scripts/re_id_tool.py index 1f808039..2eca95ba 100755 --- a/nmdc_automation/re_iding/scripts/re_id_tool.py +++ b/nmdc_automation/re_iding/scripts/re_id_tool.py @@ -16,6 +16,7 @@ from nmdc_automation.config import Config import nmdc_schema.nmdc as nmdc from nmdc_automation.re_iding.base import ReIdTool +from nmdc_automation.re_iding.changesheets import Changesheet, ChangesheetLineItem from nmdc_automation.re_iding.db_utils import get_omics_processing_id # Defaults @@ -69,9 +70,8 @@ def extract_records(ctx, study_id): logging.info(f"Extracting workflow records for study_id: {study_id}") logging.info(f"study_id: {study_id}") - config = Config(ctx.obj['site_config']) - api_client = NmdcRuntimeUserApi(username=config.napa_username, password=config.napa_password, - base_url=config.napa_base_url) + config = ctx.obj['site_config'] + api_client = NmdcRuntimeUserApi(config) # 1. Retrieve all OmicsProcessing records for the updated NMDC study ID omics_processing_records = api_client.get_omics_processing_records_for_nmdc_study( @@ -224,28 +224,76 @@ def process_records(ctx, dryrun, study_id, data_dir): @cli.command() @click.argument('reid_records_file', type=click.Path(exists=True)) +@click.option('--changesheet_only', is_flag=True, default=False,) @click.pass_context -def ingest_records(ctx, reid_records_file): +def ingest_records(ctx, reid_records_file, changesheet_only): """ - Read in json dump of re_id'd records and submit them to the /v1/workflows/activities endpoint + Read in json dump of re_id'd records and: + submit them to the + /v1/workflows/activities endpoint """ start_time = time.time() logging.info(f"Submitting re id'd records from : {reid_records_file}") + reid_records_filename = Path(reid_records_file).name + reid_base_name = reid_records_filename.split("_")[0] - # Get API client + # Get API client(s) config = ctx.obj['site_config'] api_client = NmdcRuntimeApi(config) + api_user_client = NmdcRuntimeUserApi(config) with open(reid_records_file, "r") as f: db_records = json.load(f) - + + changesheet = Changesheet(name=f"{reid_base_name}_changesheet") for record in db_records: time.sleep(3) - if 'omics_processing_set' in record: - del record['omics_processing_set'] - resp = api_client.post_objects(record) - - logger.info(f"{record} posted, got response: {resp}") + # remove the omics_processing_set and use it to generate + # changes to omics_processing has_output + omics_processing_set = record.pop("omics_processing_set") + for omics_processing_record in omics_processing_set: + omics_processing_id = omics_processing_record["id"] + logging.info(f"omics_processing_id: {omics_processing_id}") + # find legacy has_output and create change to remove it + # need to strip the nmdc: prefix for the objects endpoint + trimmed_omics_processing_id = omics_processing_id.split(":")[1] + resp = api_user_client.request( + "GET", f"objects/{trimmed_omics_processing_id}" + ) + legacy_omics_processing_record = resp.json() + # delete legacy has_output + change = ChangesheetLineItem( + id=omics_processing_id, action="remove item", + attribute="has_output", + value="|".join(legacy_omics_processing_record["has_output"]) + "|", ) + changesheet.line_items.append(change) + logging.info(f"changes: {change}") + + + + # insert new has_output + change = ChangesheetLineItem( + id=omics_processing_id, action="insert", + attribute="has_output", + value="|".join(omics_processing_record["has_output"]) + "|", ) + changesheet.line_items.append(change) + logging.info(f"changes: {change}") + + # submit the record to the workflows endpoint + if not changesheet_only: + resp = api_client.post_objects(record) + logger.info(f"{record} posted, got response: {resp}") + else: + logger.info(f"changesheet_only is True, skipping ingest") + + changesheet.write_changesheet() + logging.info(f"changesheet written to {changesheet.output_filepath}") + if changesheet.validate_changesheet(api_client.config.napa_base_url): + logging.info(f"changesheet validated") + else: + logging.info(f"changesheet validation failed") + + def _get_data_dir(data_dir, dryrun): """ From c76f63b867cd43117662d0cd56b4cb1c25e34a57 Mon Sep 17 00:00:00 2001 From: Michael Thornton Date: Wed, 29 Nov 2023 09:15:19 -0800 Subject: [PATCH 81/91] split changesheet to test api --- ...1-aygzgv51_changesheet-20231121-142542.tsv | 52 +------------------ .../nmdc:sty-11-aygzgv51_changesheet_pt2.tsv | 52 +++++++++++++++++++ 2 files changed, 53 insertions(+), 51 deletions(-) create mode 100644 nmdc_automation/re_iding/changesheets_output/nmdc:sty-11-aygzgv51_changesheet_pt2.tsv diff --git a/nmdc_automation/re_iding/changesheets_output/nmdc:sty-11-aygzgv51_changesheet-20231121-142542.tsv b/nmdc_automation/re_iding/changesheets_output/nmdc:sty-11-aygzgv51_changesheet-20231121-142542.tsv index 5c7dccb0..5f7a9c5f 100644 --- a/nmdc_automation/re_iding/changesheets_output/nmdc:sty-11-aygzgv51_changesheet-20231121-142542.tsv +++ b/nmdc_automation/re_iding/changesheets_output/nmdc:sty-11-aygzgv51_changesheet-20231121-142542.tsv @@ -48,54 +48,4 @@ nmdc:omprc-11-tgxmb243 insert has_output nmdc:dobj-11-assd7y33| nmdc:omprc-11-t0xjjc50 remove item has_output jgi:55d7402a0d8785342fcf7e3b| nmdc:omprc-11-t0xjjc50 insert has_output nmdc:dobj-11-1v0g7c04| nmdc:omprc-11-1avd3d16 remove item has_output jgi:55d7402c0d8785342fcf7e3e| -nmdc:omprc-11-1avd3d16 insert has_output nmdc:dobj-11-k5qf8f61| -nmdc:omprc-11-hk1bje46 remove item has_output jgi:55d817f70d8785342fcf8270| -nmdc:omprc-11-hk1bje46 insert has_output nmdc:dobj-11-wvje1j80| -nmdc:omprc-11-qtje8r57 remove item has_output jgi:55d817fa0d8785342fcf8272| -nmdc:omprc-11-qtje8r57 insert has_output nmdc:dobj-11-sp1dx351| -nmdc:omprc-11-7ey2jr63 remove item has_output jgi:55d740220d8785342fcf7e35| -nmdc:omprc-11-7ey2jr63 insert has_output nmdc:dobj-11-h4fha619| -nmdc:omprc-11-qngh7497 remove item has_output jgi:55a9caff0d87852b2150891e| -nmdc:omprc-11-qngh7497 insert has_output nmdc:dobj-11-p9rq4261| -nmdc:omprc-11-jk7zjz92 remove item has_output jgi:55d817f30d8785342fcf826d| -nmdc:omprc-11-jk7zjz92 insert has_output nmdc:dobj-11-phjw3w62| -nmdc:omprc-11-2jt0jk84 remove item has_output jgi:55f23d820d8785306f964980| -nmdc:omprc-11-2jt0jk84 insert has_output nmdc:dobj-11-7pwab132| -nmdc:omprc-11-hqmmwn16 remove item has_output jgi:55d817fe0d8785342fcf8276| -nmdc:omprc-11-hqmmwn16 insert has_output nmdc:dobj-11-7g54r371| -nmdc:omprc-11-qsxwf517 remove item has_output jgi:55d7402b0d8785342fcf7e3c| -nmdc:omprc-11-qsxwf517 insert has_output nmdc:dobj-11-zx1hkd49| -nmdc:omprc-11-932jcd76 remove item has_output jgi:574fe0a17ded5e3df1ee148a| -nmdc:omprc-11-932jcd76 insert has_output nmdc:dobj-11-zb61bz69| -nmdc:omprc-11-p0jdew93 remove item has_output jgi:574fde697ded5e3df1ee140a| -nmdc:omprc-11-p0jdew93 insert has_output nmdc:dobj-11-r2hspr31| -nmdc:omprc-11-dtsr6z90 remove item has_output jgi:574fde6c7ded5e3df1ee140c| -nmdc:omprc-11-dtsr6z90 insert has_output nmdc:dobj-11-dvy2av42| -nmdc:omprc-11-hwadfm25 remove item has_output jgi:574fe0a87ded5e3df1ee148e| -nmdc:omprc-11-hwadfm25 insert has_output nmdc:dobj-11-h1bew282| -nmdc:omprc-11-vnnn4722 remove item has_output jgi:574fde667ded5e3df1ee1407| -nmdc:omprc-11-vnnn4722 insert has_output nmdc:dobj-11-5mt10622| -nmdc:omprc-11-p21wp875 remove item has_output jgi:574fe0a67ded5e3df1ee148d| -nmdc:omprc-11-p21wp875 insert has_output nmdc:dobj-11-ggn5ha23| -nmdc:omprc-11-vs67yj43 remove item has_output jgi:574fde867ded5e3df1ee1420| -nmdc:omprc-11-vs67yj43 insert has_output nmdc:dobj-11-1jxf0m20| -nmdc:omprc-11-nhf5m035 remove item has_output jgi:574fe0ac7ded5e3df1ee1491| -nmdc:omprc-11-nhf5m035 insert has_output nmdc:dobj-11-aw4wks31| -nmdc:omprc-11-w3v30q48 remove item has_output jgi:574fde8a7ded5e3df1ee1422| -nmdc:omprc-11-w3v30q48 insert has_output nmdc:dobj-11-a52d6695| -nmdc:omprc-11-vykcbs96 remove item has_output jgi:574fde6e7ded5e3df1ee140d| -nmdc:omprc-11-vykcbs96 insert has_output nmdc:dobj-11-yjmvgb38| -nmdc:omprc-11-dw7shd52 remove item has_output jgi:574fde8c7ded5e3df1ee1424| -nmdc:omprc-11-dw7shd52 insert has_output nmdc:dobj-11-pr8zqm21| -nmdc:omprc-11-j43hz774 remove item has_output jgi:574fe0af7ded5e3df1ee1493| -nmdc:omprc-11-j43hz774 insert has_output nmdc:dobj-11-srrrsz66| -nmdc:omprc-11-kgxpef29 remove item has_output jgi:574fe0b17ded5e3df1ee1494| -nmdc:omprc-11-kgxpef29 insert has_output nmdc:dobj-11-qjrhfc82| -nmdc:omprc-11-qrsway30 remove item has_output jgi:574fe0b47ded5e3df1ee1496| -nmdc:omprc-11-qrsway30 insert has_output nmdc:dobj-11-zqgnfj09| -nmdc:omprc-11-nry91b19 remove item has_output jgi:574fde907ded5e3df1ee1426| -nmdc:omprc-11-nry91b19 insert has_output nmdc:dobj-11-hkern724| -nmdc:omprc-11-0n8y1d07 remove item has_output jgi:574fde947ded5e3df1ee1429| -nmdc:omprc-11-0n8y1d07 insert has_output nmdc:dobj-11-9wykaf95| -nmdc:omprc-11-p1735e67 remove item has_output jgi:574fde937ded5e3df1ee1428| -nmdc:omprc-11-p1735e67 insert has_output nmdc:dobj-11-r0ygq360| + diff --git a/nmdc_automation/re_iding/changesheets_output/nmdc:sty-11-aygzgv51_changesheet_pt2.tsv b/nmdc_automation/re_iding/changesheets_output/nmdc:sty-11-aygzgv51_changesheet_pt2.tsv new file mode 100644 index 00000000..b0be76c3 --- /dev/null +++ b/nmdc_automation/re_iding/changesheets_output/nmdc:sty-11-aygzgv51_changesheet_pt2.tsv @@ -0,0 +1,52 @@ +id action attribute value +nmdc:omprc-11-1avd3d16 insert has_output nmdc:dobj-11-k5qf8f61| +nmdc:omprc-11-hk1bje46 remove item has_output jgi:55d817f70d8785342fcf8270| +nmdc:omprc-11-hk1bje46 insert has_output nmdc:dobj-11-wvje1j80| +nmdc:omprc-11-qtje8r57 remove item has_output jgi:55d817fa0d8785342fcf8272| +nmdc:omprc-11-qtje8r57 insert has_output nmdc:dobj-11-sp1dx351| +nmdc:omprc-11-7ey2jr63 remove item has_output jgi:55d740220d8785342fcf7e35| +nmdc:omprc-11-7ey2jr63 insert has_output nmdc:dobj-11-h4fha619| +nmdc:omprc-11-qngh7497 remove item has_output jgi:55a9caff0d87852b2150891e| +nmdc:omprc-11-qngh7497 insert has_output nmdc:dobj-11-p9rq4261| +nmdc:omprc-11-jk7zjz92 remove item has_output jgi:55d817f30d8785342fcf826d| +nmdc:omprc-11-jk7zjz92 insert has_output nmdc:dobj-11-phjw3w62| +nmdc:omprc-11-2jt0jk84 remove item has_output jgi:55f23d820d8785306f964980| +nmdc:omprc-11-2jt0jk84 insert has_output nmdc:dobj-11-7pwab132| +nmdc:omprc-11-hqmmwn16 remove item has_output jgi:55d817fe0d8785342fcf8276| +nmdc:omprc-11-hqmmwn16 insert has_output nmdc:dobj-11-7g54r371| +nmdc:omprc-11-qsxwf517 remove item has_output jgi:55d7402b0d8785342fcf7e3c| +nmdc:omprc-11-qsxwf517 insert has_output nmdc:dobj-11-zx1hkd49| +nmdc:omprc-11-932jcd76 remove item has_output jgi:574fe0a17ded5e3df1ee148a| +nmdc:omprc-11-932jcd76 insert has_output nmdc:dobj-11-zb61bz69| +nmdc:omprc-11-p0jdew93 remove item has_output jgi:574fde697ded5e3df1ee140a| +nmdc:omprc-11-p0jdew93 insert has_output nmdc:dobj-11-r2hspr31| +nmdc:omprc-11-dtsr6z90 remove item has_output jgi:574fde6c7ded5e3df1ee140c| +nmdc:omprc-11-dtsr6z90 insert has_output nmdc:dobj-11-dvy2av42| +nmdc:omprc-11-hwadfm25 remove item has_output jgi:574fe0a87ded5e3df1ee148e| +nmdc:omprc-11-hwadfm25 insert has_output nmdc:dobj-11-h1bew282| +nmdc:omprc-11-vnnn4722 remove item has_output jgi:574fde667ded5e3df1ee1407| +nmdc:omprc-11-vnnn4722 insert has_output nmdc:dobj-11-5mt10622| +nmdc:omprc-11-p21wp875 remove item has_output jgi:574fe0a67ded5e3df1ee148d| +nmdc:omprc-11-p21wp875 insert has_output nmdc:dobj-11-ggn5ha23| +nmdc:omprc-11-vs67yj43 remove item has_output jgi:574fde867ded5e3df1ee1420| +nmdc:omprc-11-vs67yj43 insert has_output nmdc:dobj-11-1jxf0m20| +nmdc:omprc-11-nhf5m035 remove item has_output jgi:574fe0ac7ded5e3df1ee1491| +nmdc:omprc-11-nhf5m035 insert has_output nmdc:dobj-11-aw4wks31| +nmdc:omprc-11-w3v30q48 remove item has_output jgi:574fde8a7ded5e3df1ee1422| +nmdc:omprc-11-w3v30q48 insert has_output nmdc:dobj-11-a52d6695| +nmdc:omprc-11-vykcbs96 remove item has_output jgi:574fde6e7ded5e3df1ee140d| +nmdc:omprc-11-vykcbs96 insert has_output nmdc:dobj-11-yjmvgb38| +nmdc:omprc-11-dw7shd52 remove item has_output jgi:574fde8c7ded5e3df1ee1424| +nmdc:omprc-11-dw7shd52 insert has_output nmdc:dobj-11-pr8zqm21| +nmdc:omprc-11-j43hz774 remove item has_output jgi:574fe0af7ded5e3df1ee1493| +nmdc:omprc-11-j43hz774 insert has_output nmdc:dobj-11-srrrsz66| +nmdc:omprc-11-kgxpef29 remove item has_output jgi:574fe0b17ded5e3df1ee1494| +nmdc:omprc-11-kgxpef29 insert has_output nmdc:dobj-11-qjrhfc82| +nmdc:omprc-11-qrsway30 remove item has_output jgi:574fe0b47ded5e3df1ee1496| +nmdc:omprc-11-qrsway30 insert has_output nmdc:dobj-11-zqgnfj09| +nmdc:omprc-11-nry91b19 remove item has_output jgi:574fde907ded5e3df1ee1426| +nmdc:omprc-11-nry91b19 insert has_output nmdc:dobj-11-hkern724| +nmdc:omprc-11-0n8y1d07 remove item has_output jgi:574fde947ded5e3df1ee1429| +nmdc:omprc-11-0n8y1d07 insert has_output nmdc:dobj-11-9wykaf95| +nmdc:omprc-11-p1735e67 remove item has_output jgi:574fde937ded5e3df1ee1428| +nmdc:omprc-11-p1735e67 insert has_output nmdc:dobj-11-r0ygq360| \ No newline at end of file From b4de0b5b8c4fbb67a8d88c2426ddf3506e0cd715 Mon Sep 17 00:00:00 2001 From: Michal Babinski Date: Thu, 7 Dec 2023 11:29:21 -0800 Subject: [PATCH 82/91] added function to delete old records from db --- .../re_iding/scripts/re_id_tool.py | 62 +++++++++++++++++++ 1 file changed, 62 insertions(+) diff --git a/nmdc_automation/re_iding/scripts/re_id_tool.py b/nmdc_automation/re_iding/scripts/re_id_tool.py index 2eca95ba..3d501808 100755 --- a/nmdc_automation/re_iding/scripts/re_id_tool.py +++ b/nmdc_automation/re_iding/scripts/re_id_tool.py @@ -294,6 +294,68 @@ def ingest_records(ctx, reid_records_file, changesheet_only): logging.info(f"changesheet validation failed") +@cli.command() +@click.argument('old_records_file', type=click.Path(exists=True)) +@click.pass_context +def delete_old_records(ctx, old_records_file): + """ + Read in json dump of old records and: + delete them using + /queries/run endpoint + """ + + logging.info(f"Deleting old objects found in : {old_records_file}") + old_records_filename = Path(old_records_file).name + old_base_name = old_records_filename.split("_")[0] + + # Get API client(s) + config = ctx.obj['site_config'] + api_user_client = NmdcRuntimeUserApi(config) + + #get old db records + with open(old_records_file, "r") as f: + old_db_records = json.load(f) + + #set list to capture annotation genes for agg set + gene_id_list = [] + + for record in old_db_records: + for set_name, object_record in record.items(): + if set_name == "omics_processing_set": + continue + if isinstance(object_record, list): + for item in object_record: + if "id" in item: + if set_name == "metagenome_annotation_activity_set": + gene_id_list.append(item["id"]) + delete_query = { + "delete": set_name, + "deletes": [{"q": {"id": item['id']}, "limit": 1}] + } + logging.info(f"Running query: {delete_query}, deleting {set_name} with id: {item['id']}") + + run_query_response = api_user_client.delete_query(delete_query) + + logging.info(f"Deleting query posted with response: {run_query_response}") + + for annotation_id in gene_id_list: + logging.info(f"Deleting functional aggregate record with id: {annotation_id}") + delete_query_agg = { + "delete": "functional_annotation_agg", + "deletes": [{"q": {"id": annotation_id}}] + } + + run_query_agg_response = api_user_client.delete_query(delete_query_agg) + + logging.info(f"Response for deleting function annotation agg record returned: {run_query_agg_response}") + + + + + + + + def _get_data_dir(data_dir, dryrun): """ From 9fb3f274aefec3c9fb24ecbc1e9ad414e211e408 Mon Sep 17 00:00:00 2001 From: Michal Babinski Date: Thu, 7 Dec 2023 11:29:59 -0800 Subject: [PATCH 83/91] added method for /qeuries/run --- nmdc_automation/api/nmdcapi.py | 17 +++++++++++++++++ 1 file changed, 17 insertions(+) diff --git a/nmdc_automation/api/nmdcapi.py b/nmdc_automation/api/nmdcapi.py index 2c1e5031..29861d8c 100755 --- a/nmdc_automation/api/nmdcapi.py +++ b/nmdc_automation/api/nmdcapi.py @@ -433,6 +433,23 @@ def get_data_object_by_id(self, data_object_id: str): ) data_object_record = response.json() return data_object_record + + def run_query(self, query: dict): + """ + Function to run a query using the Microbiome Data API. + + Parameters: + query (dict): The query to be run in JSON format. + + Returns: + dict: The response from the API. + """ + + self.ensure_token() + url = "https://api.microbiomedata.org/queries/run" + + response = requests.post(url, json=query, headers=self.headers) + return response.json() def jprint(obj): print(json.dumps(obj, indent=2)) From d09597dff30ad0b4028c515633a2b81b820f7acc Mon Sep 17 00:00:00 2001 From: Michal Babinski Date: Thu, 7 Dec 2023 11:39:43 -0800 Subject: [PATCH 84/91] fixed query key for deleting functional agg record --- .../re_iding/scripts/re_id_tool.py | 179 ++++++++++-------- 1 file changed, 97 insertions(+), 82 deletions(-) diff --git a/nmdc_automation/re_iding/scripts/re_id_tool.py b/nmdc_automation/re_iding/scripts/re_id_tool.py index 3d501808..cb7dda3e 100755 --- a/nmdc_automation/re_iding/scripts/re_id_tool.py +++ b/nmdc_automation/re_iding/scripts/re_id_tool.py @@ -32,31 +32,34 @@ logging.basicConfig( level=logging.INFO, - format='%(asctime)s - %(name)s - %(levelname)s - %(message)s', - handlers=[ - logging.StreamHandler() - ] + format="%(asctime)s - %(name)s - %(levelname)s - %(message)s", + handlers=[logging.StreamHandler()], ) logger = logging.getLogger(__name__) - @click.group() -@click.option("--site-config", type=click.Path(exists=True), - default=NAPA_CONFIG,) +@click.option( + "--site-config", + type=click.Path(exists=True), + default=NAPA_CONFIG, +) @click.pass_context def cli(ctx, site_config): """ NMDC re-ID tool """ ctx.ensure_object(dict) - ctx.obj['site_config'] = site_config + ctx.obj["site_config"] = site_config @cli.command() -@click.option('--study_id', default=STUDY_ID, - help=f'Optional updated study ID. Default: {STUDY_ID}') +@click.option( + "--study_id", + default=STUDY_ID, + help=f"Optional updated study ID. Default: {STUDY_ID}", +) @click.pass_context def extract_records(ctx, study_id): """ @@ -70,7 +73,7 @@ def extract_records(ctx, study_id): logging.info(f"Extracting workflow records for study_id: {study_id}") logging.info(f"study_id: {study_id}") - config = ctx.obj['site_config'] + config = ctx.obj["site_config"] api_client = NmdcRuntimeUserApi(config) # 1. Retrieve all OmicsProcessing records for the updated NMDC study ID @@ -85,30 +88,29 @@ def extract_records(ctx, study_id): # 2. For each OmicsProcessing record, find the legacy identifier: for omics_processing_record in omics_processing_records: db = nmdc.Database() - logging.info( - f"omics_processing_record: " - f"{omics_processing_record['id']}" - ) + logging.info(f"omics_processing_record: " f"{omics_processing_record['id']}") legacy_id = _get_legacy_id(omics_processing_record) logging.info(f"legacy_id: {legacy_id}") - if (omics_processing_record["omics_type"]["has_raw_value"] != - "Metagenome"): + if omics_processing_record["omics_type"]["has_raw_value"] != "Metagenome": logging.info( f"omics_processing_record {omics_processing_record['id']} " f"is not a Metagenome" - ) + ) continue db.omics_processing_set.append(omics_processing_record) for data_object_id in omics_processing_record["has_output"]: - data_object_record = api_client.get_data_object_by_id( - data_object_id - ) + data_object_record = api_client.get_data_object_by_id(data_object_id) db.data_object_set.append(data_object_record) # downstream workflow activity sets - (read_qc_records, readbased_records, metagenome_assembly_records, - metagenome_annotation_records, mags_records) = [], [], [], [], [] + ( + read_qc_records, + readbased_records, + metagenome_assembly_records, + metagenome_annotation_records, + mags_records, + ) = ([], [], [], [], []) downstream_workflow_activity_sets = { "read_qc_analysis_activity_set": read_qc_records, @@ -118,9 +120,7 @@ def extract_records(ctx, study_id): "mags_activity_set": mags_records, } for set_name, records in downstream_workflow_activity_sets.items(): - records = api_client.get_workflow_activity_informed_by( - set_name, legacy_id - ) + records = api_client.get_workflow_activity_informed_by(set_name, legacy_id) db.__setattr__(set_name, records) # Add the data objects referenced by the `has_output` property for record in records: @@ -132,13 +132,11 @@ def extract_records(ctx, study_id): logging.info( f"data_object_record: " f"{data_object_record['id']}, {data_object_record['description']}" - ) + ) db.data_object_set.append(data_object_record) # Search for orphaned data objects with the legacy ID in the description - orphaned_data_objects = api_client.get_data_objects_by_description( - legacy_id - ) + orphaned_data_objects = api_client.get_data_objects_by_description(legacy_id) # check that we don't already have the data object in the set for data_object in orphaned_data_objects: if data_object["id"] not in [d["id"] for d in db.data_object_set]: @@ -146,7 +144,7 @@ def extract_records(ctx, study_id): logging.info( f"Added orphaned data object: " f"{data_object['id']}, {data_object['description']}" - ) + ) retrieved_databases.append(db) @@ -157,12 +155,22 @@ def extract_records(ctx, study_id): @cli.command() -@click.option('--dryrun / --no-dryrun', is_flag=True, default=True, - help='Dryrun mode: use local data dir and do not save results') -@click.option('--study_id', default=STUDY_ID, - help=f'Optional updated study ID. Default: {STUDY_ID}') -@click.option('--data_dir', default=BASE_DATAFILE_DIR, - help=f'Optional base datafile directory. Default: {BASE_DATAFILE_DIR}') +@click.option( + "--dryrun / --no-dryrun", + is_flag=True, + default=True, + help="Dryrun mode: use local data dir and do not save results", +) +@click.option( + "--study_id", + default=STUDY_ID, + help=f"Optional updated study ID. Default: {STUDY_ID}", +) +@click.option( + "--data_dir", + default=BASE_DATAFILE_DIR, + help=f"Optional base datafile directory. Default: {BASE_DATAFILE_DIR}", +) @click.pass_context def process_records(ctx, dryrun, study_id, data_dir): """ @@ -177,7 +185,7 @@ def process_records(ctx, dryrun, study_id, data_dir): logging.info("Running in dryrun mode") # Get API client - config = ctx.obj['site_config'] + config = ctx.obj["site_config"] api_client = NmdcRuntimeApi(config) # Get Database dump file paths and the data directory @@ -188,7 +196,6 @@ def process_records(ctx, dryrun, study_id, data_dir): # Initialize re-ID tool reid_tool = ReIdTool(api_client, data_dir) - # Read extracted DB records logging.info(f"Using db_infile: {db_infile}") with open(db_infile, "r") as f: @@ -211,20 +218,24 @@ def process_records(ctx, dryrun, study_id, data_dir): # update Metagenome Assembly new_db = reid_tool.update_metagenome_assembly_set(db_record, new_db) # update Read Based Taxonomy Analysis - new_db = reid_tool.update_read_based_taxonomy_analysis_activity_set(db_record, new_db) + new_db = reid_tool.update_read_based_taxonomy_analysis_activity_set( + db_record, new_db + ) re_ided_db_records.append(new_db) - - json_data = json.loads(json_dumper.dumps(re_ided_db_records, - inject_type=False)) + json_data = json.loads(json_dumper.dumps(re_ided_db_records, inject_type=False)) with open(db_outfile, "w") as f: f.write(json.dumps(json_data, indent=4)) - + @cli.command() -@click.argument('reid_records_file', type=click.Path(exists=True)) -@click.option('--changesheet_only', is_flag=True, default=False,) +@click.argument("reid_records_file", type=click.Path(exists=True)) +@click.option( + "--changesheet_only", + is_flag=True, + default=False, +) @click.pass_context def ingest_records(ctx, reid_records_file, changesheet_only): """ @@ -238,10 +249,10 @@ def ingest_records(ctx, reid_records_file, changesheet_only): reid_base_name = reid_records_filename.split("_")[0] # Get API client(s) - config = ctx.obj['site_config'] + config = ctx.obj["site_config"] api_client = NmdcRuntimeApi(config) api_user_client = NmdcRuntimeUserApi(config) - + with open(reid_records_file, "r") as f: db_records = json.load(f) @@ -263,19 +274,21 @@ def ingest_records(ctx, reid_records_file, changesheet_only): legacy_omics_processing_record = resp.json() # delete legacy has_output change = ChangesheetLineItem( - id=omics_processing_id, action="remove item", + id=omics_processing_id, + action="remove item", attribute="has_output", - value="|".join(legacy_omics_processing_record["has_output"]) + "|", ) + value="|".join(legacy_omics_processing_record["has_output"]) + "|", + ) changesheet.line_items.append(change) logging.info(f"changes: {change}") - - # insert new has_output change = ChangesheetLineItem( - id=omics_processing_id, action="insert", + id=omics_processing_id, + action="insert", attribute="has_output", - value="|".join(omics_processing_record["has_output"]) + "|", ) + value="|".join(omics_processing_record["has_output"]) + "|", + ) changesheet.line_items.append(change) logging.info(f"changes: {change}") @@ -295,7 +308,7 @@ def ingest_records(ctx, reid_records_file, changesheet_only): @cli.command() -@click.argument('old_records_file', type=click.Path(exists=True)) +@click.argument("old_records_file", type=click.Path(exists=True)) @click.pass_context def delete_old_records(ctx, old_records_file): """ @@ -303,22 +316,22 @@ def delete_old_records(ctx, old_records_file): delete them using /queries/run endpoint """ - + logging.info(f"Deleting old objects found in : {old_records_file}") old_records_filename = Path(old_records_file).name old_base_name = old_records_filename.split("_")[0] # Get API client(s) - config = ctx.obj['site_config'] + config = ctx.obj["site_config"] api_user_client = NmdcRuntimeUserApi(config) - - #get old db records + + # get old db records with open(old_records_file, "r") as f: old_db_records = json.load(f) - - #set list to capture annotation genes for agg set + + # set list to capture annotation genes for agg set gene_id_list = [] - + for record in old_db_records: for set_name, object_record in record.items(): if set_name == "omics_processing_set": @@ -330,32 +343,31 @@ def delete_old_records(ctx, old_records_file): gene_id_list.append(item["id"]) delete_query = { "delete": set_name, - "deletes": [{"q": {"id": item['id']}, "limit": 1}] - } - logging.info(f"Running query: {delete_query}, deleting {set_name} with id: {item['id']}") - + "deletes": [{"q": {"id": item["id"]}, "limit": 1}], + } + logging.info( + f"Running query: {delete_query}, deleting {set_name} with id: {item['id']}" + ) + run_query_response = api_user_client.delete_query(delete_query) - - logging.info(f"Deleting query posted with response: {run_query_response}") - + + logging.info( + f"Deleting query posted with response: {run_query_response}" + ) + for annotation_id in gene_id_list: logging.info(f"Deleting functional aggregate record with id: {annotation_id}") delete_query_agg = { "delete": "functional_annotation_agg", - "deletes": [{"q": {"id": annotation_id}}] - } - + "deletes": [{"q": {"metagenome_annotation_id": annotation_id}}], + } + run_query_agg_response = api_user_client.delete_query(delete_query_agg) - - logging.info(f"Response for deleting function annotation agg record returned: {run_query_agg_response}") - - - - - - - + logging.info( + f"Response for deleting function annotation agg record returned: {run_query_agg_response}" + ) + def _get_data_dir(data_dir, dryrun): """ @@ -369,6 +381,7 @@ def _get_data_dir(data_dir, dryrun): logging.info(f"Using datafile_dir: {data_dir}") return data_dir + def _get_database_paths(study_id, dryrun): """ Return the paths to the input and output data files @@ -383,6 +396,7 @@ def _get_database_paths(study_id, dryrun): db_outfile = DATA_DIR.joinpath(f"{study_id}{db_outfile_suffix}") return db_infile, db_outfile + def _get_legacy_id(omics_processing_record: dict) -> str: """ Get the legacy ID for the given OmicsProcessing record. @@ -407,5 +421,6 @@ def _get_legacy_id(omics_processing_record: dict) -> str: legacy_id = legacy_ids[0] return legacy_id -if __name__ == '__main__': + +if __name__ == "__main__": cli(obj={}) From e4fd0272f776753742f6299705e54544b766410f Mon Sep 17 00:00:00 2001 From: Michal Babinski Date: Thu, 7 Dec 2023 11:53:26 -0800 Subject: [PATCH 85/91] added try-except block to catch errors --- .../re_iding/scripts/re_id_tool.py | 52 ++++++++++++------- 1 file changed, 33 insertions(+), 19 deletions(-) diff --git a/nmdc_automation/re_iding/scripts/re_id_tool.py b/nmdc_automation/re_iding/scripts/re_id_tool.py index cb7dda3e..8a9c1498 100755 --- a/nmdc_automation/re_iding/scripts/re_id_tool.py +++ b/nmdc_automation/re_iding/scripts/re_id_tool.py @@ -9,7 +9,7 @@ from pathlib import Path import json import click - +import requests from linkml_runtime.dumpers import json_dumper from nmdc_automation.api import NmdcRuntimeApi, NmdcRuntimeUserApi @@ -345,28 +345,42 @@ def delete_old_records(ctx, old_records_file): "delete": set_name, "deletes": [{"q": {"id": item["id"]}, "limit": 1}], } - logging.info( - f"Running query: {delete_query}, deleting {set_name} with id: {item['id']}" - ) - - run_query_response = api_user_client.delete_query(delete_query) - - logging.info( - f"Deleting query posted with response: {run_query_response}" - ) + try: + logging.info( + f"Running query: {delete_query}, deleting {set_name} with id: {item['id']}" + ) + + run_query_response = api_user_client.delete_query( + delete_query + ) + + logging.info( + f"Deleting query posted with response: {run_query_response}" + ) + except: + logging.info( + f"An error occured while running: {delete_query}, response retutrned: {e}" + ) for annotation_id in gene_id_list: - logging.info(f"Deleting functional aggregate record with id: {annotation_id}") - delete_query_agg = { - "delete": "functional_annotation_agg", - "deletes": [{"q": {"metagenome_annotation_id": annotation_id}}], - } + try: + logging.info( + f"Deleting functional aggregate record with id: {annotation_id}" + ) + delete_query_agg = { + "delete": "functional_annotation_agg", + "deletes": [{"q": {"metagenome_annotation_id": annotation_id}}], + } - run_query_agg_response = api_user_client.delete_query(delete_query_agg) + run_query_agg_response = api_user_client.delete_query(delete_query_agg) - logging.info( - f"Response for deleting function annotation agg record returned: {run_query_agg_response}" - ) + logging.info( + f"Response for deleting functional annotation agg record returned: {run_query_agg_response}" + ) + except requests.exceptions.RequestException as e: + logging.error( + f"An error occurred while deleting annotation id {annotation_id}: {e}" + ) def _get_data_dir(data_dir, dryrun): From afb0a740b873464c7da53d52457d4e68078b9b25 Mon Sep 17 00:00:00 2001 From: Michal Babinski Date: Tue, 19 Dec 2023 12:15:46 -0800 Subject: [PATCH 86/91] fixed functional annotation agg --- nmdc_automation/api/nmdcapi.py | 4 ++-- nmdc_automation/re_iding/scripts/re_id_tool.py | 9 ++++----- 2 files changed, 6 insertions(+), 7 deletions(-) diff --git a/nmdc_automation/api/nmdcapi.py b/nmdc_automation/api/nmdcapi.py index 29861d8c..aa386997 100755 --- a/nmdc_automation/api/nmdcapi.py +++ b/nmdc_automation/api/nmdcapi.py @@ -446,8 +446,8 @@ def run_query(self, query: dict): """ self.ensure_token() - url = "https://api.microbiomedata.org/queries/run" - + url = "https://api.microbiomedata.org/queries:run" + response = requests.post(url, json=query, headers=self.headers) return response.json() diff --git a/nmdc_automation/re_iding/scripts/re_id_tool.py b/nmdc_automation/re_iding/scripts/re_id_tool.py index 8a9c1498..f39d83d9 100755 --- a/nmdc_automation/re_iding/scripts/re_id_tool.py +++ b/nmdc_automation/re_iding/scripts/re_id_tool.py @@ -331,7 +331,6 @@ def delete_old_records(ctx, old_records_file): # set list to capture annotation genes for agg set gene_id_list = [] - for record in old_db_records: for set_name, object_record in record.items(): if set_name == "omics_processing_set": @@ -350,14 +349,14 @@ def delete_old_records(ctx, old_records_file): f"Running query: {delete_query}, deleting {set_name} with id: {item['id']}" ) - run_query_response = api_user_client.delete_query( + run_query_response = api_user_client.run_query( delete_query ) logging.info( f"Deleting query posted with response: {run_query_response}" ) - except: + except requests.exceptions.RequestException as e: logging.info( f"An error occured while running: {delete_query}, response retutrned: {e}" ) @@ -369,10 +368,10 @@ def delete_old_records(ctx, old_records_file): ) delete_query_agg = { "delete": "functional_annotation_agg", - "deletes": [{"q": {"metagenome_annotation_id": annotation_id}}], + "deletes": [{"q": {"metagenome_annotation_id": annotation_id}, "limit": 1}], } - run_query_agg_response = api_user_client.delete_query(delete_query_agg) + run_query_agg_response = api_user_client.run_query(delete_query_agg) logging.info( f"Response for deleting functional annotation agg record returned: {run_query_agg_response}" From 65fa67ea55ba3db2ba8d78eb20e34d1edb3e5c41 Mon Sep 17 00:00:00 2001 From: Michael Thornton Date: Tue, 9 Jan 2024 07:59:21 -0800 Subject: [PATCH 87/91] update make test to use poetry remove max_page_size args from test --- Makefile | 2 +- tests/test_nmdcapi.py | 6 +++--- 2 files changed, 4 insertions(+), 4 deletions(-) diff --git a/Makefile b/Makefile index 51500800..a1dce99a 100644 --- a/Makefile +++ b/Makefile @@ -1,3 +1,3 @@ test: - PYTHONPATH=$(shell pwd) pytest --cov-report term --cov=nmdc_automation ./tests + poetry run pytest --cov-report term --cov=nmdc_automation ./tests diff --git a/tests/test_nmdcapi.py b/tests/test_nmdcapi.py index a5525c34..2d93ceeb 100644 --- a/tests/test_nmdcapi.py +++ b/tests/test_nmdcapi.py @@ -58,15 +58,15 @@ def test_list_funcs(mock_api, requests_mock): # TODO: ccheck the full url requests_mock.get("http://localhost/jobs", json=mock_resp) - resp = n.list_jobs(filt="a=b", max_page_size=10) + resp = n.list_jobs(filt="a=b") assert resp is not None requests_mock.get("http://localhost/operations", json=[]) - resp = n.list_ops(filt="a=b", max_page_size=10) + resp = n.list_ops(filt="a=b") assert resp is not None requests_mock.get("http://localhost/objects", json=[]) - resp = n.list_objs(filt="a=b", max_page_size=10) + resp = n.list_objs(filt="a=b") assert resp is not None From e1161d8fb5899cd334c8accdf6cd2cdc4dc77df5 Mon Sep 17 00:00:00 2001 From: Michael Thornton Date: Tue, 9 Jan 2024 08:07:24 -0800 Subject: [PATCH 88/91] poetry add flake8 --- poetry.lock | 1039 +++++++++++++++++------------------------------- pyproject.toml | 2 + 2 files changed, 375 insertions(+), 666 deletions(-) diff --git a/poetry.lock b/poetry.lock index 05fa8c4a..08a4c1ea 100644 --- a/poetry.lock +++ b/poetry.lock @@ -42,21 +42,22 @@ test = ["dateparser (==1.*)", "pre-commit", "pytest", "pytest-cov", "pytest-mock [[package]] name = "attrs" -version = "23.1.0" +version = "23.2.0" description = "Classes Without Boilerplate" optional = false python-versions = ">=3.7" files = [ - {file = "attrs-23.1.0-py3-none-any.whl", hash = "sha256:1f28b4522cdc2fb4256ac1a020c78acf9cba2c6b461ccd2c126f3aa8e8335d04"}, - {file = "attrs-23.1.0.tar.gz", hash = "sha256:6279836d581513a26f1bf235f9acd333bc9115683f14f7e8fae46c98fc50e015"}, + {file = "attrs-23.2.0-py3-none-any.whl", hash = "sha256:99b87a485a5820b23b879f04c2305b44b951b502fd64be915879d77a7e8fc6f1"}, + {file = "attrs-23.2.0.tar.gz", hash = "sha256:935dc3b529c262f6cf76e50877d35a4bd3c1de194fd41f47a2b7ae8f19971f30"}, ] [package.extras] cov = ["attrs[tests]", "coverage[toml] (>=5.3)"] -dev = ["attrs[docs,tests]", "pre-commit"] +dev = ["attrs[tests]", "pre-commit"] docs = ["furo", "myst-parser", "sphinx", "sphinx-notfound-page", "sphinxcontrib-towncrier", "towncrier", "zope-interface"] tests = ["attrs[tests-no-zope]", "zope-interface"] -tests-no-zope = ["cloudpickle", "hypothesis", "mypy (>=1.1.1)", "pympler", "pytest (>=4.3.0)", "pytest-mypy-plugins", "pytest-xdist[psutil]"] +tests-mypy = ["mypy (>=1.6)", "pytest-mypy-plugins"] +tests-no-zope = ["attrs[tests-mypy]", "cloudpickle", "hypothesis", "pympler", "pytest (>=4.3.0)", "pytest-xdist[psutil]"] [[package]] name = "cachetools" @@ -228,73 +229,6 @@ files = [ {file = "colorama-0.4.6.tar.gz", hash = "sha256:08695f5cb7ed6e0531a20572697297273c47b8cae5a63ffc6d6ed5c201be6e44"}, ] -[[package]] -name = "coverage" -version = "7.3.2" -description = "Code coverage measurement for Python" -optional = false -python-versions = ">=3.8" -files = [ - {file = "coverage-7.3.2-cp310-cp310-macosx_10_9_x86_64.whl", hash = "sha256:d872145f3a3231a5f20fd48500274d7df222e291d90baa2026cc5152b7ce86bf"}, - {file = "coverage-7.3.2-cp310-cp310-macosx_11_0_arm64.whl", hash = "sha256:310b3bb9c91ea66d59c53fa4989f57d2436e08f18fb2f421a1b0b6b8cc7fffda"}, - {file = "coverage-7.3.2-cp310-cp310-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:f47d39359e2c3779c5331fc740cf4bce6d9d680a7b4b4ead97056a0ae07cb49a"}, - {file = "coverage-7.3.2-cp310-cp310-manylinux_2_5_i686.manylinux1_i686.manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:aa72dbaf2c2068404b9870d93436e6d23addd8bbe9295f49cbca83f6e278179c"}, - {file = "coverage-7.3.2-cp310-cp310-manylinux_2_5_x86_64.manylinux1_x86_64.manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:beaa5c1b4777f03fc63dfd2a6bd820f73f036bfb10e925fce067b00a340d0f3f"}, - {file = "coverage-7.3.2-cp310-cp310-musllinux_1_1_aarch64.whl", hash = "sha256:dbc1b46b92186cc8074fee9d9fbb97a9dd06c6cbbef391c2f59d80eabdf0faa6"}, - {file = "coverage-7.3.2-cp310-cp310-musllinux_1_1_i686.whl", hash = "sha256:315a989e861031334d7bee1f9113c8770472db2ac484e5b8c3173428360a9148"}, - {file = "coverage-7.3.2-cp310-cp310-musllinux_1_1_x86_64.whl", hash = "sha256:d1bc430677773397f64a5c88cb522ea43175ff16f8bfcc89d467d974cb2274f9"}, - {file = "coverage-7.3.2-cp310-cp310-win32.whl", hash = "sha256:a889ae02f43aa45032afe364c8ae84ad3c54828c2faa44f3bfcafecb5c96b02f"}, - {file = "coverage-7.3.2-cp310-cp310-win_amd64.whl", hash = "sha256:c0ba320de3fb8c6ec16e0be17ee1d3d69adcda99406c43c0409cb5c41788a611"}, - {file = "coverage-7.3.2-cp311-cp311-macosx_10_9_x86_64.whl", hash = "sha256:ac8c802fa29843a72d32ec56d0ca792ad15a302b28ca6203389afe21f8fa062c"}, - {file = "coverage-7.3.2-cp311-cp311-macosx_11_0_arm64.whl", hash = "sha256:89a937174104339e3a3ffcf9f446c00e3a806c28b1841c63edb2b369310fd074"}, - {file = "coverage-7.3.2-cp311-cp311-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:e267e9e2b574a176ddb983399dec325a80dbe161f1a32715c780b5d14b5f583a"}, - {file = "coverage-7.3.2-cp311-cp311-manylinux_2_5_i686.manylinux1_i686.manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:2443cbda35df0d35dcfb9bf8f3c02c57c1d6111169e3c85fc1fcc05e0c9f39a3"}, - {file = "coverage-7.3.2-cp311-cp311-manylinux_2_5_x86_64.manylinux1_x86_64.manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:4175e10cc8dda0265653e8714b3174430b07c1dca8957f4966cbd6c2b1b8065a"}, - {file = "coverage-7.3.2-cp311-cp311-musllinux_1_1_aarch64.whl", hash = "sha256:0cbf38419fb1a347aaf63481c00f0bdc86889d9fbf3f25109cf96c26b403fda1"}, - {file = "coverage-7.3.2-cp311-cp311-musllinux_1_1_i686.whl", hash = "sha256:5c913b556a116b8d5f6ef834038ba983834d887d82187c8f73dec21049abd65c"}, - {file = "coverage-7.3.2-cp311-cp311-musllinux_1_1_x86_64.whl", hash = "sha256:1981f785239e4e39e6444c63a98da3a1db8e971cb9ceb50a945ba6296b43f312"}, - {file = "coverage-7.3.2-cp311-cp311-win32.whl", hash = "sha256:43668cabd5ca8258f5954f27a3aaf78757e6acf13c17604d89648ecc0cc66640"}, - {file = "coverage-7.3.2-cp311-cp311-win_amd64.whl", hash = "sha256:e10c39c0452bf6e694511c901426d6b5ac005acc0f78ff265dbe36bf81f808a2"}, - {file = "coverage-7.3.2-cp312-cp312-macosx_10_9_x86_64.whl", hash = "sha256:4cbae1051ab791debecc4a5dcc4a1ff45fc27b91b9aee165c8a27514dd160836"}, - {file = "coverage-7.3.2-cp312-cp312-macosx_11_0_arm64.whl", hash = "sha256:12d15ab5833a997716d76f2ac1e4b4d536814fc213c85ca72756c19e5a6b3d63"}, - {file = "coverage-7.3.2-cp312-cp312-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:3c7bba973ebee5e56fe9251300c00f1579652587a9f4a5ed8404b15a0471f216"}, - {file = "coverage-7.3.2-cp312-cp312-manylinux_2_5_i686.manylinux1_i686.manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:fe494faa90ce6381770746077243231e0b83ff3f17069d748f645617cefe19d4"}, - {file = "coverage-7.3.2-cp312-cp312-manylinux_2_5_x86_64.manylinux1_x86_64.manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:f6e9589bd04d0461a417562649522575d8752904d35c12907d8c9dfeba588faf"}, - {file = "coverage-7.3.2-cp312-cp312-musllinux_1_1_aarch64.whl", hash = "sha256:d51ac2a26f71da1b57f2dc81d0e108b6ab177e7d30e774db90675467c847bbdf"}, - {file = "coverage-7.3.2-cp312-cp312-musllinux_1_1_i686.whl", hash = "sha256:99b89d9f76070237975b315b3d5f4d6956ae354a4c92ac2388a5695516e47c84"}, - {file = "coverage-7.3.2-cp312-cp312-musllinux_1_1_x86_64.whl", hash = "sha256:fa28e909776dc69efb6ed975a63691bc8172b64ff357e663a1bb06ff3c9b589a"}, - {file = "coverage-7.3.2-cp312-cp312-win32.whl", hash = "sha256:289fe43bf45a575e3ab10b26d7b6f2ddb9ee2dba447499f5401cfb5ecb8196bb"}, - {file = "coverage-7.3.2-cp312-cp312-win_amd64.whl", hash = "sha256:7dbc3ed60e8659bc59b6b304b43ff9c3ed858da2839c78b804973f613d3e92ed"}, - {file = "coverage-7.3.2-cp38-cp38-macosx_10_9_x86_64.whl", hash = "sha256:f94b734214ea6a36fe16e96a70d941af80ff3bfd716c141300d95ebc85339738"}, - {file = "coverage-7.3.2-cp38-cp38-macosx_11_0_arm64.whl", hash = "sha256:af3d828d2c1cbae52d34bdbb22fcd94d1ce715d95f1a012354a75e5913f1bda2"}, - {file = "coverage-7.3.2-cp38-cp38-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:630b13e3036e13c7adc480ca42fa7afc2a5d938081d28e20903cf7fd687872e2"}, - {file = "coverage-7.3.2-cp38-cp38-manylinux_2_5_i686.manylinux1_i686.manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:c9eacf273e885b02a0273bb3a2170f30e2d53a6d53b72dbe02d6701b5296101c"}, - {file = "coverage-7.3.2-cp38-cp38-manylinux_2_5_x86_64.manylinux1_x86_64.manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:d8f17966e861ff97305e0801134e69db33b143bbfb36436efb9cfff6ec7b2fd9"}, - {file = "coverage-7.3.2-cp38-cp38-musllinux_1_1_aarch64.whl", hash = "sha256:b4275802d16882cf9c8b3d057a0839acb07ee9379fa2749eca54efbce1535b82"}, - {file = "coverage-7.3.2-cp38-cp38-musllinux_1_1_i686.whl", hash = "sha256:72c0cfa5250f483181e677ebc97133ea1ab3eb68645e494775deb6a7f6f83901"}, - {file = "coverage-7.3.2-cp38-cp38-musllinux_1_1_x86_64.whl", hash = "sha256:cb536f0dcd14149425996821a168f6e269d7dcd2c273a8bff8201e79f5104e76"}, - {file = "coverage-7.3.2-cp38-cp38-win32.whl", hash = "sha256:307adb8bd3abe389a471e649038a71b4eb13bfd6b7dd9a129fa856f5c695cf92"}, - {file = "coverage-7.3.2-cp38-cp38-win_amd64.whl", hash = "sha256:88ed2c30a49ea81ea3b7f172e0269c182a44c236eb394718f976239892c0a27a"}, - {file = "coverage-7.3.2-cp39-cp39-macosx_10_9_x86_64.whl", hash = "sha256:b631c92dfe601adf8f5ebc7fc13ced6bb6e9609b19d9a8cd59fa47c4186ad1ce"}, - {file = "coverage-7.3.2-cp39-cp39-macosx_11_0_arm64.whl", hash = "sha256:d3d9df4051c4a7d13036524b66ecf7a7537d14c18a384043f30a303b146164e9"}, - {file = "coverage-7.3.2-cp39-cp39-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:5f7363d3b6a1119ef05015959ca24a9afc0ea8a02c687fe7e2d557705375c01f"}, - {file = "coverage-7.3.2-cp39-cp39-manylinux_2_5_i686.manylinux1_i686.manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:2f11cc3c967a09d3695d2a6f03fb3e6236622b93be7a4b5dc09166a861be6d25"}, - {file = "coverage-7.3.2-cp39-cp39-manylinux_2_5_x86_64.manylinux1_x86_64.manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:149de1d2401ae4655c436a3dced6dd153f4c3309f599c3d4bd97ab172eaf02d9"}, - {file = "coverage-7.3.2-cp39-cp39-musllinux_1_1_aarch64.whl", hash = "sha256:3a4006916aa6fee7cd38db3bfc95aa9c54ebb4ffbfc47c677c8bba949ceba0a6"}, - {file = "coverage-7.3.2-cp39-cp39-musllinux_1_1_i686.whl", hash = "sha256:9028a3871280110d6e1aa2df1afd5ef003bab5fb1ef421d6dc748ae1c8ef2ebc"}, - {file = "coverage-7.3.2-cp39-cp39-musllinux_1_1_x86_64.whl", hash = "sha256:9f805d62aec8eb92bab5b61c0f07329275b6f41c97d80e847b03eb894f38d083"}, - {file = "coverage-7.3.2-cp39-cp39-win32.whl", hash = "sha256:d1c88ec1a7ff4ebca0219f5b1ef863451d828cccf889c173e1253aa84b1e07ce"}, - {file = "coverage-7.3.2-cp39-cp39-win_amd64.whl", hash = "sha256:b4767da59464bb593c07afceaddea61b154136300881844768037fd5e859353f"}, - {file = "coverage-7.3.2-pp38.pp39.pp310-none-any.whl", hash = "sha256:ae97af89f0fbf373400970c0a21eef5aa941ffeed90aee43650b81f7d7f47637"}, - {file = "coverage-7.3.2.tar.gz", hash = "sha256:be32ad29341b0170e795ca590e1c07e81fc061cb5b10c74ce7203491484404ef"}, -] - -[package.dependencies] -tomli = {version = "*", optional = true, markers = "python_full_version <= \"3.11.0a6\" and extra == \"toml\""} - -[package.extras] -toml = ["tomli"] - [[package]] name = "curies" version = "0.7.4" @@ -356,13 +290,13 @@ dev = ["PyTest", "PyTest-Cov", "bump2version (<1)", "sphinx (<2)", "tox"] [[package]] name = "distlib" -version = "0.3.7" +version = "0.3.8" description = "Distribution utilities" optional = false python-versions = "*" files = [ - {file = "distlib-0.3.7-py2.py3-none-any.whl", hash = "sha256:2e24928bc811348f0feb63014e97aaae3037f2cf48712d51ae61df7fd6075057"}, - {file = "distlib-0.3.7.tar.gz", hash = "sha256:9dafe54b34a028eafd95039d5e5d4851a13734540f1331060d31c9916e7147a8"}, + {file = "distlib-0.3.8-py2.py3-none-any.whl", hash = "sha256:034db59a0b96f8ca18035f36290806a9a6e6bd9d1ff91e45a7f172eb17e51784"}, + {file = "distlib-0.3.8.tar.gz", hash = "sha256:1530ea13e350031b6312d8580ddb6b27a104275a31106523b8f123787f494f64"}, ] [[package]] @@ -427,19 +361,19 @@ typing = ["typing-extensions (>=4.8)"] [[package]] name = "flake8" -version = "6.1.0" +version = "7.0.0" description = "the modular source code checker: pep8 pyflakes and co" optional = false python-versions = ">=3.8.1" files = [ - {file = "flake8-6.1.0-py2.py3-none-any.whl", hash = "sha256:ffdfce58ea94c6580c77888a86506937f9a1a227dfcd15f245d694ae20a6b6e5"}, - {file = "flake8-6.1.0.tar.gz", hash = "sha256:d5b3857f07c030bdb5bf41c7f53799571d75c4491748a3adcd47de929e34cd23"}, + {file = "flake8-7.0.0-py2.py3-none-any.whl", hash = "sha256:a6dfbb75e03252917f2473ea9653f7cd799c3064e54d4c8140044c5c065f53c3"}, + {file = "flake8-7.0.0.tar.gz", hash = "sha256:33f96621059e65eec474169085dc92bf26e7b2d47366b70be2f67ab80dc25132"}, ] [package.dependencies] mccabe = ">=0.7.0,<0.8.0" pycodestyle = ">=2.11.0,<2.12.0" -pyflakes = ">=3.1.0,<3.2.0" +pyflakes = ">=3.2.0,<3.3.0" [[package]] name = "fqdn" @@ -470,72 +404,73 @@ test = ["coverage", "mock (>=4)", "pytest (>=7)", "pytest-cov", "pytest-mock (>= [[package]] name = "greenlet" -version = "3.0.1" +version = "3.0.3" description = "Lightweight in-process concurrent programming" optional = false python-versions = ">=3.7" files = [ - {file = "greenlet-3.0.1-cp310-cp310-macosx_10_9_universal2.whl", hash = "sha256:f89e21afe925fcfa655965ca8ea10f24773a1791400989ff32f467badfe4a064"}, - {file = "greenlet-3.0.1-cp310-cp310-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:28e89e232c7593d33cac35425b58950789962011cc274aa43ef8865f2e11f46d"}, - {file = "greenlet-3.0.1-cp310-cp310-manylinux_2_17_ppc64le.manylinux2014_ppc64le.whl", hash = "sha256:b8ba29306c5de7717b5761b9ea74f9c72b9e2b834e24aa984da99cbfc70157fd"}, - {file = "greenlet-3.0.1-cp310-cp310-manylinux_2_17_s390x.manylinux2014_s390x.whl", hash = "sha256:19bbdf1cce0346ef7341705d71e2ecf6f41a35c311137f29b8a2dc2341374565"}, - {file = "greenlet-3.0.1-cp310-cp310-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:599daf06ea59bfedbec564b1692b0166a0045f32b6f0933b0dd4df59a854caf2"}, - {file = "greenlet-3.0.1-cp310-cp310-manylinux_2_24_x86_64.manylinux_2_28_x86_64.whl", hash = "sha256:b641161c302efbb860ae6b081f406839a8b7d5573f20a455539823802c655f63"}, - {file = "greenlet-3.0.1-cp310-cp310-musllinux_1_1_aarch64.whl", hash = "sha256:d57e20ba591727da0c230ab2c3f200ac9d6d333860d85348816e1dca4cc4792e"}, - {file = "greenlet-3.0.1-cp310-cp310-musllinux_1_1_x86_64.whl", hash = "sha256:5805e71e5b570d490938d55552f5a9e10f477c19400c38bf1d5190d760691846"}, - {file = "greenlet-3.0.1-cp310-cp310-win_amd64.whl", hash = "sha256:52e93b28db27ae7d208748f45d2db8a7b6a380e0d703f099c949d0f0d80b70e9"}, - {file = "greenlet-3.0.1-cp311-cp311-macosx_10_9_universal2.whl", hash = "sha256:f7bfb769f7efa0eefcd039dd19d843a4fbfbac52f1878b1da2ed5793ec9b1a65"}, - {file = "greenlet-3.0.1-cp311-cp311-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:91e6c7db42638dc45cf2e13c73be16bf83179f7859b07cfc139518941320be96"}, - {file = "greenlet-3.0.1-cp311-cp311-manylinux_2_17_ppc64le.manylinux2014_ppc64le.whl", hash = "sha256:1757936efea16e3f03db20efd0cd50a1c86b06734f9f7338a90c4ba85ec2ad5a"}, - {file = "greenlet-3.0.1-cp311-cp311-manylinux_2_17_s390x.manylinux2014_s390x.whl", hash = "sha256:19075157a10055759066854a973b3d1325d964d498a805bb68a1f9af4aaef8ec"}, - {file = "greenlet-3.0.1-cp311-cp311-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:e9d21aaa84557d64209af04ff48e0ad5e28c5cca67ce43444e939579d085da72"}, - {file = "greenlet-3.0.1-cp311-cp311-manylinux_2_24_x86_64.manylinux_2_28_x86_64.whl", hash = "sha256:2847e5d7beedb8d614186962c3d774d40d3374d580d2cbdab7f184580a39d234"}, - {file = "greenlet-3.0.1-cp311-cp311-musllinux_1_1_aarch64.whl", hash = "sha256:97e7ac860d64e2dcba5c5944cfc8fa9ea185cd84061c623536154d5a89237884"}, - {file = "greenlet-3.0.1-cp311-cp311-musllinux_1_1_x86_64.whl", hash = "sha256:b2c02d2ad98116e914d4f3155ffc905fd0c025d901ead3f6ed07385e19122c94"}, - {file = "greenlet-3.0.1-cp311-cp311-win_amd64.whl", hash = "sha256:22f79120a24aeeae2b4471c711dcf4f8c736a2bb2fabad2a67ac9a55ea72523c"}, - {file = "greenlet-3.0.1-cp312-cp312-macosx_10_9_universal2.whl", hash = "sha256:100f78a29707ca1525ea47388cec8a049405147719f47ebf3895e7509c6446aa"}, - {file = "greenlet-3.0.1-cp312-cp312-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:60d5772e8195f4e9ebf74046a9121bbb90090f6550f81d8956a05387ba139353"}, - {file = "greenlet-3.0.1-cp312-cp312-manylinux_2_17_ppc64le.manylinux2014_ppc64le.whl", hash = "sha256:daa7197b43c707462f06d2c693ffdbb5991cbb8b80b5b984007de431493a319c"}, - {file = "greenlet-3.0.1-cp312-cp312-manylinux_2_17_s390x.manylinux2014_s390x.whl", hash = "sha256:ea6b8aa9e08eea388c5f7a276fabb1d4b6b9d6e4ceb12cc477c3d352001768a9"}, - {file = "greenlet-3.0.1-cp312-cp312-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:8d11ebbd679e927593978aa44c10fc2092bc454b7d13fdc958d3e9d508aba7d0"}, - {file = "greenlet-3.0.1-cp312-cp312-manylinux_2_24_x86_64.manylinux_2_28_x86_64.whl", hash = "sha256:dbd4c177afb8a8d9ba348d925b0b67246147af806f0b104af4d24f144d461cd5"}, - {file = "greenlet-3.0.1-cp312-cp312-musllinux_1_1_aarch64.whl", hash = "sha256:20107edf7c2c3644c67c12205dc60b1bb11d26b2610b276f97d666110d1b511d"}, - {file = "greenlet-3.0.1-cp312-cp312-musllinux_1_1_x86_64.whl", hash = "sha256:8bef097455dea90ffe855286926ae02d8faa335ed8e4067326257cb571fc1445"}, - {file = "greenlet-3.0.1-cp312-cp312-win_amd64.whl", hash = "sha256:b2d3337dcfaa99698aa2377c81c9ca72fcd89c07e7eb62ece3f23a3fe89b2ce4"}, - {file = "greenlet-3.0.1-cp37-cp37m-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:80ac992f25d10aaebe1ee15df45ca0d7571d0f70b645c08ec68733fb7a020206"}, - {file = "greenlet-3.0.1-cp37-cp37m-manylinux_2_17_ppc64le.manylinux2014_ppc64le.whl", hash = "sha256:337322096d92808f76ad26061a8f5fccb22b0809bea39212cd6c406f6a7060d2"}, - {file = "greenlet-3.0.1-cp37-cp37m-manylinux_2_17_s390x.manylinux2014_s390x.whl", hash = "sha256:b9934adbd0f6e476f0ecff3c94626529f344f57b38c9a541f87098710b18af0a"}, - {file = "greenlet-3.0.1-cp37-cp37m-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:dc4d815b794fd8868c4d67602692c21bf5293a75e4b607bb92a11e821e2b859a"}, - {file = "greenlet-3.0.1-cp37-cp37m-manylinux_2_24_x86_64.manylinux_2_28_x86_64.whl", hash = "sha256:41bdeeb552d814bcd7fb52172b304898a35818107cc8778b5101423c9017b3de"}, - {file = "greenlet-3.0.1-cp37-cp37m-musllinux_1_1_aarch64.whl", hash = "sha256:6e6061bf1e9565c29002e3c601cf68569c450be7fc3f7336671af7ddb4657166"}, - {file = "greenlet-3.0.1-cp37-cp37m-musllinux_1_1_x86_64.whl", hash = "sha256:fa24255ae3c0ab67e613556375a4341af04a084bd58764731972bcbc8baeba36"}, - {file = "greenlet-3.0.1-cp37-cp37m-win32.whl", hash = "sha256:b489c36d1327868d207002391f662a1d163bdc8daf10ab2e5f6e41b9b96de3b1"}, - {file = "greenlet-3.0.1-cp37-cp37m-win_amd64.whl", hash = "sha256:f33f3258aae89da191c6ebaa3bc517c6c4cbc9b9f689e5d8452f7aedbb913fa8"}, - {file = "greenlet-3.0.1-cp38-cp38-macosx_11_0_universal2.whl", hash = "sha256:d2905ce1df400360463c772b55d8e2518d0e488a87cdea13dd2c71dcb2a1fa16"}, - {file = "greenlet-3.0.1-cp38-cp38-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:0a02d259510b3630f330c86557331a3b0e0c79dac3d166e449a39363beaae174"}, - {file = "greenlet-3.0.1-cp38-cp38-manylinux_2_17_ppc64le.manylinux2014_ppc64le.whl", hash = "sha256:55d62807f1c5a1682075c62436702aaba941daa316e9161e4b6ccebbbf38bda3"}, - {file = "greenlet-3.0.1-cp38-cp38-manylinux_2_17_s390x.manylinux2014_s390x.whl", hash = "sha256:3fcc780ae8edbb1d050d920ab44790201f027d59fdbd21362340a85c79066a74"}, - {file = "greenlet-3.0.1-cp38-cp38-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:4eddd98afc726f8aee1948858aed9e6feeb1758889dfd869072d4465973f6bfd"}, - {file = "greenlet-3.0.1-cp38-cp38-manylinux_2_24_x86_64.manylinux_2_28_x86_64.whl", hash = "sha256:eabe7090db68c981fca689299c2d116400b553f4b713266b130cfc9e2aa9c5a9"}, - {file = "greenlet-3.0.1-cp38-cp38-musllinux_1_1_aarch64.whl", hash = "sha256:f2f6d303f3dee132b322a14cd8765287b8f86cdc10d2cb6a6fae234ea488888e"}, - {file = "greenlet-3.0.1-cp38-cp38-musllinux_1_1_x86_64.whl", hash = "sha256:d923ff276f1c1f9680d32832f8d6c040fe9306cbfb5d161b0911e9634be9ef0a"}, - {file = "greenlet-3.0.1-cp38-cp38-win32.whl", hash = "sha256:0b6f9f8ca7093fd4433472fd99b5650f8a26dcd8ba410e14094c1e44cd3ceddd"}, - {file = "greenlet-3.0.1-cp38-cp38-win_amd64.whl", hash = "sha256:990066bff27c4fcf3b69382b86f4c99b3652bab2a7e685d968cd4d0cfc6f67c6"}, - {file = "greenlet-3.0.1-cp39-cp39-macosx_10_9_universal2.whl", hash = "sha256:ce85c43ae54845272f6f9cd8320d034d7a946e9773c693b27d620edec825e376"}, - {file = "greenlet-3.0.1-cp39-cp39-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:89ee2e967bd7ff85d84a2de09df10e021c9b38c7d91dead95b406ed6350c6997"}, - {file = "greenlet-3.0.1-cp39-cp39-manylinux_2_17_ppc64le.manylinux2014_ppc64le.whl", hash = "sha256:87c8ceb0cf8a5a51b8008b643844b7f4a8264a2c13fcbcd8a8316161725383fe"}, - {file = "greenlet-3.0.1-cp39-cp39-manylinux_2_17_s390x.manylinux2014_s390x.whl", hash = "sha256:d6a8c9d4f8692917a3dc7eb25a6fb337bff86909febe2f793ec1928cd97bedfc"}, - {file = "greenlet-3.0.1-cp39-cp39-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:9fbc5b8f3dfe24784cee8ce0be3da2d8a79e46a276593db6868382d9c50d97b1"}, - {file = "greenlet-3.0.1-cp39-cp39-manylinux_2_24_x86_64.manylinux_2_28_x86_64.whl", hash = "sha256:85d2b77e7c9382f004b41d9c72c85537fac834fb141b0296942d52bf03fe4a3d"}, - {file = "greenlet-3.0.1-cp39-cp39-musllinux_1_1_aarch64.whl", hash = "sha256:696d8e7d82398e810f2b3622b24e87906763b6ebfd90e361e88eb85b0e554dc8"}, - {file = "greenlet-3.0.1-cp39-cp39-musllinux_1_1_x86_64.whl", hash = "sha256:329c5a2e5a0ee942f2992c5e3ff40be03e75f745f48847f118a3cfece7a28546"}, - {file = "greenlet-3.0.1-cp39-cp39-win32.whl", hash = "sha256:cf868e08690cb89360eebc73ba4be7fb461cfbc6168dd88e2fbbe6f31812cd57"}, - {file = "greenlet-3.0.1-cp39-cp39-win_amd64.whl", hash = "sha256:ac4a39d1abae48184d420aa8e5e63efd1b75c8444dd95daa3e03f6c6310e9619"}, - {file = "greenlet-3.0.1.tar.gz", hash = "sha256:816bd9488a94cba78d93e1abb58000e8266fa9cc2aa9ccdd6eb0696acb24005b"}, + {file = "greenlet-3.0.3-cp310-cp310-macosx_11_0_universal2.whl", hash = "sha256:9da2bd29ed9e4f15955dd1595ad7bc9320308a3b766ef7f837e23ad4b4aac31a"}, + {file = "greenlet-3.0.3-cp310-cp310-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:d353cadd6083fdb056bb46ed07e4340b0869c305c8ca54ef9da3421acbdf6881"}, + {file = "greenlet-3.0.3-cp310-cp310-manylinux_2_17_ppc64le.manylinux2014_ppc64le.whl", hash = "sha256:dca1e2f3ca00b84a396bc1bce13dd21f680f035314d2379c4160c98153b2059b"}, + {file = "greenlet-3.0.3-cp310-cp310-manylinux_2_17_s390x.manylinux2014_s390x.whl", hash = "sha256:3ed7fb269f15dc662787f4119ec300ad0702fa1b19d2135a37c2c4de6fadfd4a"}, + {file = "greenlet-3.0.3-cp310-cp310-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:dd4f49ae60e10adbc94b45c0b5e6a179acc1736cf7a90160b404076ee283cf83"}, + {file = "greenlet-3.0.3-cp310-cp310-manylinux_2_24_x86_64.manylinux_2_28_x86_64.whl", hash = "sha256:73a411ef564e0e097dbe7e866bb2dda0f027e072b04da387282b02c308807405"}, + {file = "greenlet-3.0.3-cp310-cp310-musllinux_1_1_aarch64.whl", hash = "sha256:7f362975f2d179f9e26928c5b517524e89dd48530a0202570d55ad6ca5d8a56f"}, + {file = "greenlet-3.0.3-cp310-cp310-musllinux_1_1_x86_64.whl", hash = "sha256:649dde7de1a5eceb258f9cb00bdf50e978c9db1b996964cd80703614c86495eb"}, + {file = "greenlet-3.0.3-cp310-cp310-win_amd64.whl", hash = "sha256:68834da854554926fbedd38c76e60c4a2e3198c6fbed520b106a8986445caaf9"}, + {file = "greenlet-3.0.3-cp311-cp311-macosx_11_0_universal2.whl", hash = "sha256:b1b5667cced97081bf57b8fa1d6bfca67814b0afd38208d52538316e9422fc61"}, + {file = "greenlet-3.0.3-cp311-cp311-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:52f59dd9c96ad2fc0d5724107444f76eb20aaccb675bf825df6435acb7703559"}, + {file = "greenlet-3.0.3-cp311-cp311-manylinux_2_17_ppc64le.manylinux2014_ppc64le.whl", hash = "sha256:afaff6cf5200befd5cec055b07d1c0a5a06c040fe5ad148abcd11ba6ab9b114e"}, + {file = "greenlet-3.0.3-cp311-cp311-manylinux_2_17_s390x.manylinux2014_s390x.whl", hash = "sha256:fe754d231288e1e64323cfad462fcee8f0288654c10bdf4f603a39ed923bef33"}, + {file = "greenlet-3.0.3-cp311-cp311-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:2797aa5aedac23af156bbb5a6aa2cd3427ada2972c828244eb7d1b9255846379"}, + {file = "greenlet-3.0.3-cp311-cp311-manylinux_2_24_x86_64.manylinux_2_28_x86_64.whl", hash = "sha256:b7f009caad047246ed379e1c4dbcb8b020f0a390667ea74d2387be2998f58a22"}, + {file = "greenlet-3.0.3-cp311-cp311-musllinux_1_1_aarch64.whl", hash = "sha256:c5e1536de2aad7bf62e27baf79225d0d64360d4168cf2e6becb91baf1ed074f3"}, + {file = "greenlet-3.0.3-cp311-cp311-musllinux_1_1_x86_64.whl", hash = "sha256:894393ce10ceac937e56ec00bb71c4c2f8209ad516e96033e4b3b1de270e200d"}, + {file = "greenlet-3.0.3-cp311-cp311-win_amd64.whl", hash = "sha256:1ea188d4f49089fc6fb283845ab18a2518d279c7cd9da1065d7a84e991748728"}, + {file = "greenlet-3.0.3-cp312-cp312-macosx_11_0_universal2.whl", hash = "sha256:70fb482fdf2c707765ab5f0b6655e9cfcf3780d8d87355a063547b41177599be"}, + {file = "greenlet-3.0.3-cp312-cp312-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:d4d1ac74f5c0c0524e4a24335350edad7e5f03b9532da7ea4d3c54d527784f2e"}, + {file = "greenlet-3.0.3-cp312-cp312-manylinux_2_17_ppc64le.manylinux2014_ppc64le.whl", hash = "sha256:149e94a2dd82d19838fe4b2259f1b6b9957d5ba1b25640d2380bea9c5df37676"}, + {file = "greenlet-3.0.3-cp312-cp312-manylinux_2_17_s390x.manylinux2014_s390x.whl", hash = "sha256:15d79dd26056573940fcb8c7413d84118086f2ec1a8acdfa854631084393efcc"}, + {file = "greenlet-3.0.3-cp312-cp312-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:881b7db1ebff4ba09aaaeae6aa491daeb226c8150fc20e836ad00041bcb11230"}, + {file = "greenlet-3.0.3-cp312-cp312-manylinux_2_24_x86_64.manylinux_2_28_x86_64.whl", hash = "sha256:fcd2469d6a2cf298f198f0487e0a5b1a47a42ca0fa4dfd1b6862c999f018ebbf"}, + {file = "greenlet-3.0.3-cp312-cp312-musllinux_1_1_aarch64.whl", hash = "sha256:1f672519db1796ca0d8753f9e78ec02355e862d0998193038c7073045899f305"}, + {file = "greenlet-3.0.3-cp312-cp312-musllinux_1_1_x86_64.whl", hash = "sha256:2516a9957eed41dd8f1ec0c604f1cdc86758b587d964668b5b196a9db5bfcde6"}, + {file = "greenlet-3.0.3-cp312-cp312-win_amd64.whl", hash = "sha256:bba5387a6975598857d86de9eac14210a49d554a77eb8261cc68b7d082f78ce2"}, + {file = "greenlet-3.0.3-cp37-cp37m-macosx_11_0_universal2.whl", hash = "sha256:5b51e85cb5ceda94e79d019ed36b35386e8c37d22f07d6a751cb659b180d5274"}, + {file = "greenlet-3.0.3-cp37-cp37m-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:daf3cb43b7cf2ba96d614252ce1684c1bccee6b2183a01328c98d36fcd7d5cb0"}, + {file = "greenlet-3.0.3-cp37-cp37m-manylinux_2_17_ppc64le.manylinux2014_ppc64le.whl", hash = "sha256:99bf650dc5d69546e076f413a87481ee1d2d09aaaaaca058c9251b6d8c14783f"}, + {file = "greenlet-3.0.3-cp37-cp37m-manylinux_2_17_s390x.manylinux2014_s390x.whl", hash = "sha256:2dd6e660effd852586b6a8478a1d244b8dc90ab5b1321751d2ea15deb49ed414"}, + {file = "greenlet-3.0.3-cp37-cp37m-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:e3391d1e16e2a5a1507d83e4a8b100f4ee626e8eca43cf2cadb543de69827c4c"}, + {file = "greenlet-3.0.3-cp37-cp37m-manylinux_2_24_x86_64.manylinux_2_28_x86_64.whl", hash = "sha256:e1f145462f1fa6e4a4ae3c0f782e580ce44d57c8f2c7aae1b6fa88c0b2efdb41"}, + {file = "greenlet-3.0.3-cp37-cp37m-musllinux_1_1_aarch64.whl", hash = "sha256:1a7191e42732df52cb5f39d3527217e7ab73cae2cb3694d241e18f53d84ea9a7"}, + {file = "greenlet-3.0.3-cp37-cp37m-musllinux_1_1_x86_64.whl", hash = "sha256:0448abc479fab28b00cb472d278828b3ccca164531daab4e970a0458786055d6"}, + {file = "greenlet-3.0.3-cp37-cp37m-win32.whl", hash = "sha256:b542be2440edc2d48547b5923c408cbe0fc94afb9f18741faa6ae970dbcb9b6d"}, + {file = "greenlet-3.0.3-cp37-cp37m-win_amd64.whl", hash = "sha256:01bc7ea167cf943b4c802068e178bbf70ae2e8c080467070d01bfa02f337ee67"}, + {file = "greenlet-3.0.3-cp38-cp38-macosx_11_0_universal2.whl", hash = "sha256:1996cb9306c8595335bb157d133daf5cf9f693ef413e7673cb07e3e5871379ca"}, + {file = "greenlet-3.0.3-cp38-cp38-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:3ddc0f794e6ad661e321caa8d2f0a55ce01213c74722587256fb6566049a8b04"}, + {file = "greenlet-3.0.3-cp38-cp38-manylinux_2_17_ppc64le.manylinux2014_ppc64le.whl", hash = "sha256:c9db1c18f0eaad2f804728c67d6c610778456e3e1cc4ab4bbd5eeb8e6053c6fc"}, + {file = "greenlet-3.0.3-cp38-cp38-manylinux_2_17_s390x.manylinux2014_s390x.whl", hash = "sha256:7170375bcc99f1a2fbd9c306f5be8764eaf3ac6b5cb968862cad4c7057756506"}, + {file = "greenlet-3.0.3-cp38-cp38-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:6b66c9c1e7ccabad3a7d037b2bcb740122a7b17a53734b7d72a344ce39882a1b"}, + {file = "greenlet-3.0.3-cp38-cp38-manylinux_2_24_x86_64.manylinux_2_28_x86_64.whl", hash = "sha256:098d86f528c855ead3479afe84b49242e174ed262456c342d70fc7f972bc13c4"}, + {file = "greenlet-3.0.3-cp38-cp38-musllinux_1_1_aarch64.whl", hash = "sha256:81bb9c6d52e8321f09c3d165b2a78c680506d9af285bfccbad9fb7ad5a5da3e5"}, + {file = "greenlet-3.0.3-cp38-cp38-musllinux_1_1_x86_64.whl", hash = "sha256:fd096eb7ffef17c456cfa587523c5f92321ae02427ff955bebe9e3c63bc9f0da"}, + {file = "greenlet-3.0.3-cp38-cp38-win32.whl", hash = "sha256:d46677c85c5ba00a9cb6f7a00b2bfa6f812192d2c9f7d9c4f6a55b60216712f3"}, + {file = "greenlet-3.0.3-cp38-cp38-win_amd64.whl", hash = "sha256:419b386f84949bf0e7c73e6032e3457b82a787c1ab4a0e43732898a761cc9dbf"}, + {file = "greenlet-3.0.3-cp39-cp39-macosx_11_0_universal2.whl", hash = "sha256:da70d4d51c8b306bb7a031d5cff6cc25ad253affe89b70352af5f1cb68e74b53"}, + {file = "greenlet-3.0.3-cp39-cp39-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:086152f8fbc5955df88382e8a75984e2bb1c892ad2e3c80a2508954e52295257"}, + {file = "greenlet-3.0.3-cp39-cp39-manylinux_2_17_ppc64le.manylinux2014_ppc64le.whl", hash = "sha256:d73a9fe764d77f87f8ec26a0c85144d6a951a6c438dfe50487df5595c6373eac"}, + {file = "greenlet-3.0.3-cp39-cp39-manylinux_2_17_s390x.manylinux2014_s390x.whl", hash = "sha256:b7dcbe92cc99f08c8dd11f930de4d99ef756c3591a5377d1d9cd7dd5e896da71"}, + {file = "greenlet-3.0.3-cp39-cp39-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:1551a8195c0d4a68fac7a4325efac0d541b48def35feb49d803674ac32582f61"}, + {file = "greenlet-3.0.3-cp39-cp39-manylinux_2_24_x86_64.manylinux_2_28_x86_64.whl", hash = "sha256:64d7675ad83578e3fc149b617a444fab8efdafc9385471f868eb5ff83e446b8b"}, + {file = "greenlet-3.0.3-cp39-cp39-musllinux_1_1_aarch64.whl", hash = "sha256:b37eef18ea55f2ffd8f00ff8fe7c8d3818abd3e25fb73fae2ca3b672e333a7a6"}, + {file = "greenlet-3.0.3-cp39-cp39-musllinux_1_1_x86_64.whl", hash = "sha256:77457465d89b8263bca14759d7c1684df840b6811b2499838cc5b040a8b5b113"}, + {file = "greenlet-3.0.3-cp39-cp39-win32.whl", hash = "sha256:57e8974f23e47dac22b83436bdcf23080ade568ce77df33159e019d161ce1d1e"}, + {file = "greenlet-3.0.3-cp39-cp39-win_amd64.whl", hash = "sha256:c5ee858cfe08f34712f548c3c363e807e7186f03ad7a5039ebadb29e8c6be067"}, + {file = "greenlet-3.0.3.tar.gz", hash = "sha256:43374442353259554ce33599da8b692d5aa96f8976d567d4badf263371fbe491"}, ] [package.extras] -docs = ["Sphinx"] +docs = ["Sphinx", "furo"] test = ["objgraph", "psutil"] [[package]] @@ -726,13 +661,13 @@ format-nongpl = ["fqdn", "idna", "isoduration", "jsonpointer (>1.13)", "rfc3339- [[package]] name = "jsonschema-specifications" -version = "2023.11.2" +version = "2023.12.1" description = "The JSON Schema meta-schemas and vocabularies, exposed as a Registry" optional = false python-versions = ">=3.8" files = [ - {file = "jsonschema_specifications-2023.11.2-py3-none-any.whl", hash = "sha256:e74ba7c0a65e8cb49dc26837d6cfe576557084a8b423ed16a420984228104f93"}, - {file = "jsonschema_specifications-2023.11.2.tar.gz", hash = "sha256:9472fc4fea474cd74bea4a2b190daeccb5a9e4db2ea80efcf7a1b582fc9a81b8"}, + {file = "jsonschema_specifications-2023.12.1-py3-none-any.whl", hash = "sha256:87e4fdf3a94858b8a2ba2778d9ba57d8a9cafca7c7489c46ba0d30a8bc6a9c3c"}, + {file = "jsonschema_specifications-2023.12.1.tar.gz", hash = "sha256:48a76787b3e70f5ed53f1160d2b81f586e4ca6d1548c5de7085d1682674764cc"}, ] [package.dependencies] @@ -740,13 +675,13 @@ referencing = ">=0.31.0" [[package]] name = "linkml" -version = "1.6.3" +version = "1.6.7" description = "Linked Open Data Modeling Language" optional = false python-versions = ">=3.8.1,<4.0.0" files = [ - {file = "linkml-1.6.3-py3-none-any.whl", hash = "sha256:2bcfa9e35b3e0d868f396fa48ede1b9e3c6016df956dd5ca92d70ff621ffc77e"}, - {file = "linkml-1.6.3.tar.gz", hash = "sha256:2692243c9fb1c262ea83b725c70d6f537844ae7a9405101c94bccf65def6d952"}, + {file = "linkml-1.6.7-py3-none-any.whl", hash = "sha256:e7c936d1025891138e6b2876ed1b388fa8495281fc152f873ddc9ee60a58950d"}, + {file = "linkml-1.6.7.tar.gz", hash = "sha256:fd7d55a3a1d894a4a5de25336bc70eaeb6e40582ff497da03b51589219c73c64"}, ] [package.dependencies] @@ -795,13 +730,13 @@ linkml-runtime = ">=1.1.6" [[package]] name = "linkml-runtime" -version = "1.6.2" +version = "1.6.3" description = "Runtime environment for LinkML, the Linked open data modeling language" optional = false python-versions = ">=3.7.6,<4.0.0" files = [ - {file = "linkml_runtime-1.6.2-py3-none-any.whl", hash = "sha256:d9a915faf0ee8c8749f42d38394a06d37f016e155e077ff9cb5c5a21f24e19d6"}, - {file = "linkml_runtime-1.6.2.tar.gz", hash = "sha256:bcbeff96d24433276755db85375e3dff0c9af0fd04f8a05f7ccd8a669a9e9877"}, + {file = "linkml_runtime-1.6.3-py3-none-any.whl", hash = "sha256:08f616302ce493be775104c87bd5a2ec0eb2c67624e611a3f7e28c978d08a4d1"}, + {file = "linkml_runtime-1.6.3.tar.gz", hash = "sha256:88e3d0b776055723d187128c03527145dc1ffdc2d2a69e89f8aabce203d418a3"}, ] [package.dependencies] @@ -899,21 +834,6 @@ files = [ {file = "mccabe-0.7.0.tar.gz", hash = "sha256:348e0240c33b60bbdf4e523192ef919f28cb2c3d7d5c7794f74009290f236325"}, ] -[[package]] -name = "mongomock" -version = "4.1.2" -description = "Fake pymongo stub for testing simple MongoDB-dependent code" -optional = false -python-versions = "*" -files = [ - {file = "mongomock-4.1.2-py2.py3-none-any.whl", hash = "sha256:08a24938a05c80c69b6b8b19a09888d38d8c6e7328547f94d46cadb7f47209f2"}, - {file = "mongomock-4.1.2.tar.gz", hash = "sha256:f06cd62afb8ae3ef63ba31349abd220a657ef0dd4f0243a29587c5213f931b7d"}, -] - -[package.dependencies] -packaging = "*" -sentinels = "*" - [[package]] name = "nmdc-schema" version = "7.8.0" @@ -929,51 +849,6 @@ files = [ linkml = ">=1.5.6,<2.0.0" linkml-runtime = ">=1.5.4,<2.0.0" -[[package]] -name = "numpy" -version = "1.26.2" -description = "Fundamental package for array computing in Python" -optional = false -python-versions = ">=3.9" -files = [ - {file = "numpy-1.26.2-cp310-cp310-macosx_10_9_x86_64.whl", hash = "sha256:3703fc9258a4a122d17043e57b35e5ef1c5a5837c3db8be396c82e04c1cf9b0f"}, - {file = "numpy-1.26.2-cp310-cp310-macosx_11_0_arm64.whl", hash = "sha256:cc392fdcbd21d4be6ae1bb4475a03ce3b025cd49a9be5345d76d7585aea69440"}, - {file = "numpy-1.26.2-cp310-cp310-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:36340109af8da8805d8851ef1d74761b3b88e81a9bd80b290bbfed61bd2b4f75"}, - {file = "numpy-1.26.2-cp310-cp310-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:bcc008217145b3d77abd3e4d5ef586e3bdfba8fe17940769f8aa09b99e856c00"}, - {file = "numpy-1.26.2-cp310-cp310-musllinux_1_1_aarch64.whl", hash = "sha256:3ced40d4e9e18242f70dd02d739e44698df3dcb010d31f495ff00a31ef6014fe"}, - {file = "numpy-1.26.2-cp310-cp310-musllinux_1_1_x86_64.whl", hash = "sha256:b272d4cecc32c9e19911891446b72e986157e6a1809b7b56518b4f3755267523"}, - {file = "numpy-1.26.2-cp310-cp310-win32.whl", hash = "sha256:22f8fc02fdbc829e7a8c578dd8d2e15a9074b630d4da29cda483337e300e3ee9"}, - {file = "numpy-1.26.2-cp310-cp310-win_amd64.whl", hash = "sha256:26c9d33f8e8b846d5a65dd068c14e04018d05533b348d9eaeef6c1bd787f9919"}, - {file = "numpy-1.26.2-cp311-cp311-macosx_10_9_x86_64.whl", hash = "sha256:b96e7b9c624ef3ae2ae0e04fa9b460f6b9f17ad8b4bec6d7756510f1f6c0c841"}, - {file = "numpy-1.26.2-cp311-cp311-macosx_11_0_arm64.whl", hash = "sha256:aa18428111fb9a591d7a9cc1b48150097ba6a7e8299fb56bdf574df650e7d1f1"}, - {file = "numpy-1.26.2-cp311-cp311-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:06fa1ed84aa60ea6ef9f91ba57b5ed963c3729534e6e54055fc151fad0423f0a"}, - {file = "numpy-1.26.2-cp311-cp311-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:96ca5482c3dbdd051bcd1fce8034603d6ebfc125a7bd59f55b40d8f5d246832b"}, - {file = "numpy-1.26.2-cp311-cp311-musllinux_1_1_aarch64.whl", hash = "sha256:854ab91a2906ef29dc3925a064fcd365c7b4da743f84b123002f6139bcb3f8a7"}, - {file = "numpy-1.26.2-cp311-cp311-musllinux_1_1_x86_64.whl", hash = "sha256:f43740ab089277d403aa07567be138fc2a89d4d9892d113b76153e0e412409f8"}, - {file = "numpy-1.26.2-cp311-cp311-win32.whl", hash = "sha256:a2bbc29fcb1771cd7b7425f98b05307776a6baf43035d3b80c4b0f29e9545186"}, - {file = "numpy-1.26.2-cp311-cp311-win_amd64.whl", hash = "sha256:2b3fca8a5b00184828d12b073af4d0fc5fdd94b1632c2477526f6bd7842d700d"}, - {file = "numpy-1.26.2-cp312-cp312-macosx_10_9_x86_64.whl", hash = "sha256:a4cd6ed4a339c21f1d1b0fdf13426cb3b284555c27ac2f156dfdaaa7e16bfab0"}, - {file = "numpy-1.26.2-cp312-cp312-macosx_11_0_arm64.whl", hash = "sha256:5d5244aabd6ed7f312268b9247be47343a654ebea52a60f002dc70c769048e75"}, - {file = "numpy-1.26.2-cp312-cp312-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:6a3cdb4d9c70e6b8c0814239ead47da00934666f668426fc6e94cce869e13fd7"}, - {file = "numpy-1.26.2-cp312-cp312-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:aa317b2325f7aa0a9471663e6093c210cb2ae9c0ad824732b307d2c51983d5b6"}, - {file = "numpy-1.26.2-cp312-cp312-musllinux_1_1_aarch64.whl", hash = "sha256:174a8880739c16c925799c018f3f55b8130c1f7c8e75ab0a6fa9d41cab092fd6"}, - {file = "numpy-1.26.2-cp312-cp312-musllinux_1_1_x86_64.whl", hash = "sha256:f79b231bf5c16b1f39c7f4875e1ded36abee1591e98742b05d8a0fb55d8a3eec"}, - {file = "numpy-1.26.2-cp312-cp312-win32.whl", hash = "sha256:4a06263321dfd3598cacb252f51e521a8cb4b6df471bb12a7ee5cbab20ea9167"}, - {file = "numpy-1.26.2-cp312-cp312-win_amd64.whl", hash = "sha256:b04f5dc6b3efdaab541f7857351aac359e6ae3c126e2edb376929bd3b7f92d7e"}, - {file = "numpy-1.26.2-cp39-cp39-macosx_10_9_x86_64.whl", hash = "sha256:4eb8df4bf8d3d90d091e0146f6c28492b0be84da3e409ebef54349f71ed271ef"}, - {file = "numpy-1.26.2-cp39-cp39-macosx_11_0_arm64.whl", hash = "sha256:1a13860fdcd95de7cf58bd6f8bc5a5ef81c0b0625eb2c9a783948847abbef2c2"}, - {file = "numpy-1.26.2-cp39-cp39-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:64308ebc366a8ed63fd0bf426b6a9468060962f1a4339ab1074c228fa6ade8e3"}, - {file = "numpy-1.26.2-cp39-cp39-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:baf8aab04a2c0e859da118f0b38617e5ee65d75b83795055fb66c0d5e9e9b818"}, - {file = "numpy-1.26.2-cp39-cp39-musllinux_1_1_aarch64.whl", hash = "sha256:d73a3abcac238250091b11caef9ad12413dab01669511779bc9b29261dd50210"}, - {file = "numpy-1.26.2-cp39-cp39-musllinux_1_1_x86_64.whl", hash = "sha256:b361d369fc7e5e1714cf827b731ca32bff8d411212fccd29ad98ad622449cc36"}, - {file = "numpy-1.26.2-cp39-cp39-win32.whl", hash = "sha256:bd3f0091e845164a20bd5a326860c840fe2af79fa12e0469a12768a3ec578d80"}, - {file = "numpy-1.26.2-cp39-cp39-win_amd64.whl", hash = "sha256:2beef57fb031dcc0dc8fa4fe297a742027b954949cabb52a2a376c144e5e6060"}, - {file = "numpy-1.26.2-pp39-pypy39_pp73-macosx_10_9_x86_64.whl", hash = "sha256:1cc3d5029a30fb5f06704ad6b23b35e11309491c999838c31f124fee32107c79"}, - {file = "numpy-1.26.2-pp39-pypy39_pp73-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:94cc3c222bb9fb5a12e334d0479b97bb2df446fbe622b470928f5284ffca3f8d"}, - {file = "numpy-1.26.2-pp39-pypy39_pp73-win_amd64.whl", hash = "sha256:fe6b44fb8fcdf7eda4ef4461b97b3f63c466b27ab151bec2366db8b197387841"}, - {file = "numpy-1.26.2.tar.gz", hash = "sha256:f65738447676ab5777f11e6bbbdb8ce11b785e105f690bc45966574816b6d3ea"}, -] - [[package]] name = "openpyxl" version = "3.1.2" @@ -1013,74 +888,6 @@ files = [ {file = "packaging-23.2.tar.gz", hash = "sha256:048fb0e9405036518eaaf48a55953c750c11e1a1b68e0dd1a9d62ed0c092cfc5"}, ] -[[package]] -name = "pandas" -version = "2.1.3" -description = "Powerful data structures for data analysis, time series, and statistics" -optional = false -python-versions = ">=3.9" -files = [ - {file = "pandas-2.1.3-cp310-cp310-macosx_10_9_x86_64.whl", hash = "sha256:acf08a73b5022b479c1be155d4988b72f3020f308f7a87c527702c5f8966d34f"}, - {file = "pandas-2.1.3-cp310-cp310-macosx_11_0_arm64.whl", hash = "sha256:3cc4469ff0cf9aa3a005870cb49ab8969942b7156e0a46cc3f5abd6b11051dfb"}, - {file = "pandas-2.1.3-cp310-cp310-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:35172bff95f598cc5866c047f43c7f4df2c893acd8e10e6653a4b792ed7f19bb"}, - {file = "pandas-2.1.3-cp310-cp310-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:59dfe0e65a2f3988e940224e2a70932edc964df79f3356e5f2997c7d63e758b4"}, - {file = "pandas-2.1.3-cp310-cp310-musllinux_1_1_x86_64.whl", hash = "sha256:0296a66200dee556850d99b24c54c7dfa53a3264b1ca6f440e42bad424caea03"}, - {file = "pandas-2.1.3-cp310-cp310-win_amd64.whl", hash = "sha256:465571472267a2d6e00657900afadbe6097c8e1dc43746917db4dfc862e8863e"}, - {file = "pandas-2.1.3-cp311-cp311-macosx_10_9_x86_64.whl", hash = "sha256:04d4c58e1f112a74689da707be31cf689db086949c71828ef5da86727cfe3f82"}, - {file = "pandas-2.1.3-cp311-cp311-macosx_11_0_arm64.whl", hash = "sha256:7fa2ad4ff196768ae63a33f8062e6838efed3a319cf938fdf8b95e956c813042"}, - {file = "pandas-2.1.3-cp311-cp311-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:4441ac94a2a2613e3982e502ccec3bdedefe871e8cea54b8775992485c5660ef"}, - {file = "pandas-2.1.3-cp311-cp311-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:d5ded6ff28abbf0ea7689f251754d3789e1edb0c4d0d91028f0b980598418a58"}, - {file = "pandas-2.1.3-cp311-cp311-musllinux_1_1_x86_64.whl", hash = "sha256:fca5680368a5139d4920ae3dc993eb5106d49f814ff24018b64d8850a52c6ed2"}, - {file = "pandas-2.1.3-cp311-cp311-win_amd64.whl", hash = "sha256:de21e12bf1511190fc1e9ebc067f14ca09fccfb189a813b38d63211d54832f5f"}, - {file = "pandas-2.1.3-cp312-cp312-macosx_10_9_x86_64.whl", hash = "sha256:a5d53c725832e5f1645e7674989f4c106e4b7249c1d57549023ed5462d73b140"}, - {file = "pandas-2.1.3-cp312-cp312-macosx_11_0_arm64.whl", hash = "sha256:7cf4cf26042476e39394f1f86868d25b265ff787c9b2f0d367280f11afbdee6d"}, - {file = "pandas-2.1.3-cp312-cp312-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:72c84ec1b1d8e5efcbff5312abe92bfb9d5b558f11e0cf077f5496c4f4a3c99e"}, - {file = "pandas-2.1.3-cp312-cp312-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:1f539e113739a3e0cc15176bf1231a553db0239bfa47a2c870283fd93ba4f683"}, - {file = "pandas-2.1.3-cp312-cp312-musllinux_1_1_x86_64.whl", hash = "sha256:fc77309da3b55732059e484a1efc0897f6149183c522390772d3561f9bf96c00"}, - {file = "pandas-2.1.3-cp312-cp312-win_amd64.whl", hash = "sha256:08637041279b8981a062899da0ef47828df52a1838204d2b3761fbd3e9fcb549"}, - {file = "pandas-2.1.3-cp39-cp39-macosx_10_9_x86_64.whl", hash = "sha256:b99c4e51ef2ed98f69099c72c75ec904dd610eb41a32847c4fcbc1a975f2d2b8"}, - {file = "pandas-2.1.3-cp39-cp39-macosx_11_0_arm64.whl", hash = "sha256:f7ea8ae8004de0381a2376662c0505bb0a4f679f4c61fbfd122aa3d1b0e5f09d"}, - {file = "pandas-2.1.3-cp39-cp39-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:fcd76d67ca2d48f56e2db45833cf9d58f548f97f61eecd3fdc74268417632b8a"}, - {file = "pandas-2.1.3-cp39-cp39-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:1329dbe93a880a3d7893149979caa82d6ba64a25e471682637f846d9dbc10dd2"}, - {file = "pandas-2.1.3-cp39-cp39-musllinux_1_1_x86_64.whl", hash = "sha256:321ecdb117bf0f16c339cc6d5c9a06063854f12d4d9bc422a84bb2ed3207380a"}, - {file = "pandas-2.1.3-cp39-cp39-win_amd64.whl", hash = "sha256:11a771450f36cebf2a4c9dbd3a19dfa8c46c4b905a3ea09dc8e556626060fe71"}, - {file = "pandas-2.1.3.tar.gz", hash = "sha256:22929f84bca106921917eb73c1521317ddd0a4c71b395bcf767a106e3494209f"}, -] - -[package.dependencies] -numpy = [ - {version = ">=1.22.4,<2", markers = "python_version < \"3.11\""}, - {version = ">=1.23.2,<2", markers = "python_version == \"3.11\""}, - {version = ">=1.26.0,<2", markers = "python_version >= \"3.12\""}, -] -python-dateutil = ">=2.8.2" -pytz = ">=2020.1" -tzdata = ">=2022.1" - -[package.extras] -all = ["PyQt5 (>=5.15.6)", "SQLAlchemy (>=1.4.36)", "beautifulsoup4 (>=4.11.1)", "bottleneck (>=1.3.4)", "dataframe-api-compat (>=0.1.7)", "fastparquet (>=0.8.1)", "fsspec (>=2022.05.0)", "gcsfs (>=2022.05.0)", "html5lib (>=1.1)", "hypothesis (>=6.46.1)", "jinja2 (>=3.1.2)", "lxml (>=4.8.0)", "matplotlib (>=3.6.1)", "numba (>=0.55.2)", "numexpr (>=2.8.0)", "odfpy (>=1.4.1)", "openpyxl (>=3.0.10)", "pandas-gbq (>=0.17.5)", "psycopg2 (>=2.9.3)", "pyarrow (>=7.0.0)", "pymysql (>=1.0.2)", "pyreadstat (>=1.1.5)", "pytest (>=7.3.2)", "pytest-xdist (>=2.2.0)", "pyxlsb (>=1.0.9)", "qtpy (>=2.2.0)", "s3fs (>=2022.05.0)", "scipy (>=1.8.1)", "tables (>=3.7.0)", "tabulate (>=0.8.10)", "xarray (>=2022.03.0)", "xlrd (>=2.0.1)", "xlsxwriter (>=3.0.3)", "zstandard (>=0.17.0)"] -aws = ["s3fs (>=2022.05.0)"] -clipboard = ["PyQt5 (>=5.15.6)", "qtpy (>=2.2.0)"] -compression = ["zstandard (>=0.17.0)"] -computation = ["scipy (>=1.8.1)", "xarray (>=2022.03.0)"] -consortium-standard = ["dataframe-api-compat (>=0.1.7)"] -excel = ["odfpy (>=1.4.1)", "openpyxl (>=3.0.10)", "pyxlsb (>=1.0.9)", "xlrd (>=2.0.1)", "xlsxwriter (>=3.0.3)"] -feather = ["pyarrow (>=7.0.0)"] -fss = ["fsspec (>=2022.05.0)"] -gcp = ["gcsfs (>=2022.05.0)", "pandas-gbq (>=0.17.5)"] -hdf5 = ["tables (>=3.7.0)"] -html = ["beautifulsoup4 (>=4.11.1)", "html5lib (>=1.1)", "lxml (>=4.8.0)"] -mysql = ["SQLAlchemy (>=1.4.36)", "pymysql (>=1.0.2)"] -output-formatting = ["jinja2 (>=3.1.2)", "tabulate (>=0.8.10)"] -parquet = ["pyarrow (>=7.0.0)"] -performance = ["bottleneck (>=1.3.4)", "numba (>=0.55.2)", "numexpr (>=2.8.0)"] -plot = ["matplotlib (>=3.6.1)"] -postgresql = ["SQLAlchemy (>=1.4.36)", "psycopg2 (>=2.9.3)"] -spss = ["pyreadstat (>=1.1.5)"] -sql-other = ["SQLAlchemy (>=1.4.36)"] -test = ["hypothesis (>=6.46.1)", "pytest (>=7.3.2)", "pytest-xdist (>=2.2.0)"] -xml = ["lxml (>=4.8.0)"] - [[package]] name = "parse" version = "1.20.0" @@ -1094,13 +901,13 @@ files = [ [[package]] name = "platformdirs" -version = "4.0.0" +version = "4.1.0" description = "A small Python package for determining appropriate platform-specific dirs, e.g. a \"user data dir\"." optional = false -python-versions = ">=3.7" +python-versions = ">=3.8" files = [ - {file = "platformdirs-4.0.0-py3-none-any.whl", hash = "sha256:118c954d7e949b35437270383a3f2531e99dd93cf7ce4dc8340d3356d30f173b"}, - {file = "platformdirs-4.0.0.tar.gz", hash = "sha256:cb633b2bcf10c51af60beb0ab06d2f1d69064b43abf4c185ca6b28865f3f9731"}, + {file = "platformdirs-4.1.0-py3-none-any.whl", hash = "sha256:11c8f37bcca40db96d8144522d925583bdb7a31f7b0e37e3ed4318400a8e2380"}, + {file = "platformdirs-4.1.0.tar.gz", hash = "sha256:906d548203468492d432bcb294d4bc2fff751bf84971fbb2c10918cc206ee420"}, ] [package.extras] @@ -1152,13 +959,13 @@ requests = ">=2.28.1,<3.0.0" [[package]] name = "prefixmaps" -version = "0.2.0" +version = "0.2.1" description = "A python library for retrieving semantic prefix maps" optional = false python-versions = ">=3.9,<4.0" files = [ - {file = "prefixmaps-0.2.0-py3-none-any.whl", hash = "sha256:a0f87077ac1651f1b386c178b80cd2edc294a6b7b628b30143ac98a1dae6f0b7"}, - {file = "prefixmaps-0.2.0.tar.gz", hash = "sha256:0e3fdabf90b991e1f44a2b455e9042f6ba0e498fa7a769ffd8a6ad248f43af0c"}, + {file = "prefixmaps-0.2.1-py3-none-any.whl", hash = "sha256:0d204caeb4c368cb7a981de98117665fd8885ba76f058714f27f5963e620d5a1"}, + {file = "prefixmaps-0.2.1.tar.gz", hash = "sha256:8aed88739f900e6c0df0ada061b90223c79c29ca074f5ae6d3de423aef8fa3cf"}, ] [package.dependencies] @@ -1185,18 +992,18 @@ files = [ [[package]] name = "pydantic" -version = "2.5.2" +version = "2.5.3" description = "Data validation using Python type hints" optional = false python-versions = ">=3.7" files = [ - {file = "pydantic-2.5.2-py3-none-any.whl", hash = "sha256:80c50fb8e3dcecfddae1adbcc00ec5822918490c99ab31f6cf6140ca1c1429f0"}, - {file = "pydantic-2.5.2.tar.gz", hash = "sha256:ff177ba64c6faf73d7afa2e8cad38fd456c0dbe01c9954e71038001cd15a6edd"}, + {file = "pydantic-2.5.3-py3-none-any.whl", hash = "sha256:d0caf5954bee831b6bfe7e338c32b9e30c85dfe080c843680783ac2b631673b4"}, + {file = "pydantic-2.5.3.tar.gz", hash = "sha256:b3ef57c62535b0941697cce638c08900d87fcb67e29cfa99e8a68f747f393f7a"}, ] [package.dependencies] annotated-types = ">=0.4.0" -pydantic-core = "2.14.5" +pydantic-core = "2.14.6" typing-extensions = ">=4.6.1" [package.extras] @@ -1204,116 +1011,116 @@ email = ["email-validator (>=2.0.0)"] [[package]] name = "pydantic-core" -version = "2.14.5" +version = "2.14.6" description = "" optional = false python-versions = ">=3.7" files = [ - {file = "pydantic_core-2.14.5-cp310-cp310-macosx_10_7_x86_64.whl", hash = "sha256:7e88f5696153dc516ba6e79f82cc4747e87027205f0e02390c21f7cb3bd8abfd"}, - {file = "pydantic_core-2.14.5-cp310-cp310-macosx_11_0_arm64.whl", hash = "sha256:4641e8ad4efb697f38a9b64ca0523b557c7931c5f84e0fd377a9a3b05121f0de"}, - {file = "pydantic_core-2.14.5-cp310-cp310-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:774de879d212db5ce02dfbf5b0da9a0ea386aeba12b0b95674a4ce0593df3d07"}, - {file = "pydantic_core-2.14.5-cp310-cp310-manylinux_2_17_armv7l.manylinux2014_armv7l.whl", hash = "sha256:ebb4e035e28f49b6f1a7032920bb9a0c064aedbbabe52c543343d39341a5b2a3"}, - {file = "pydantic_core-2.14.5-cp310-cp310-manylinux_2_17_ppc64le.manylinux2014_ppc64le.whl", hash = "sha256:b53e9ad053cd064f7e473a5f29b37fc4cc9dc6d35f341e6afc0155ea257fc911"}, - {file = "pydantic_core-2.14.5-cp310-cp310-manylinux_2_17_s390x.manylinux2014_s390x.whl", hash = "sha256:8aa1768c151cf562a9992462239dfc356b3d1037cc5a3ac829bb7f3bda7cc1f9"}, - {file = "pydantic_core-2.14.5-cp310-cp310-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:eac5c82fc632c599f4639a5886f96867ffced74458c7db61bc9a66ccb8ee3113"}, - {file = "pydantic_core-2.14.5-cp310-cp310-manylinux_2_5_i686.manylinux1_i686.whl", hash = "sha256:d2ae91f50ccc5810b2f1b6b858257c9ad2e08da70bf890dee02de1775a387c66"}, - {file = "pydantic_core-2.14.5-cp310-cp310-musllinux_1_1_aarch64.whl", hash = "sha256:6b9ff467ffbab9110e80e8c8de3bcfce8e8b0fd5661ac44a09ae5901668ba997"}, - {file = "pydantic_core-2.14.5-cp310-cp310-musllinux_1_1_x86_64.whl", hash = "sha256:61ea96a78378e3bd5a0be99b0e5ed00057b71f66115f5404d0dae4819f495093"}, - {file = "pydantic_core-2.14.5-cp310-none-win32.whl", hash = "sha256:bb4c2eda937a5e74c38a41b33d8c77220380a388d689bcdb9b187cf6224c9720"}, - {file = "pydantic_core-2.14.5-cp310-none-win_amd64.whl", hash = "sha256:b7851992faf25eac90bfcb7bfd19e1f5ffa00afd57daec8a0042e63c74a4551b"}, - {file = "pydantic_core-2.14.5-cp311-cp311-macosx_10_7_x86_64.whl", hash = "sha256:4e40f2bd0d57dac3feb3a3aed50f17d83436c9e6b09b16af271b6230a2915459"}, - {file = "pydantic_core-2.14.5-cp311-cp311-macosx_11_0_arm64.whl", hash = "sha256:ab1cdb0f14dc161ebc268c09db04d2c9e6f70027f3b42446fa11c153521c0e88"}, - {file = "pydantic_core-2.14.5-cp311-cp311-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:aae7ea3a1c5bb40c93cad361b3e869b180ac174656120c42b9fadebf685d121b"}, - {file = "pydantic_core-2.14.5-cp311-cp311-manylinux_2_17_armv7l.manylinux2014_armv7l.whl", hash = "sha256:60b7607753ba62cf0739177913b858140f11b8af72f22860c28eabb2f0a61937"}, - {file = "pydantic_core-2.14.5-cp311-cp311-manylinux_2_17_ppc64le.manylinux2014_ppc64le.whl", hash = "sha256:2248485b0322c75aee7565d95ad0e16f1c67403a470d02f94da7344184be770f"}, - {file = "pydantic_core-2.14.5-cp311-cp311-manylinux_2_17_s390x.manylinux2014_s390x.whl", hash = "sha256:823fcc638f67035137a5cd3f1584a4542d35a951c3cc68c6ead1df7dac825c26"}, - {file = "pydantic_core-2.14.5-cp311-cp311-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:96581cfefa9123accc465a5fd0cc833ac4d75d55cc30b633b402e00e7ced00a6"}, - {file = "pydantic_core-2.14.5-cp311-cp311-manylinux_2_5_i686.manylinux1_i686.whl", hash = "sha256:a33324437018bf6ba1bb0f921788788641439e0ed654b233285b9c69704c27b4"}, - {file = "pydantic_core-2.14.5-cp311-cp311-musllinux_1_1_aarch64.whl", hash = "sha256:9bd18fee0923ca10f9a3ff67d4851c9d3e22b7bc63d1eddc12f439f436f2aada"}, - {file = "pydantic_core-2.14.5-cp311-cp311-musllinux_1_1_x86_64.whl", hash = "sha256:853a2295c00f1d4429db4c0fb9475958543ee80cfd310814b5c0ef502de24dda"}, - {file = "pydantic_core-2.14.5-cp311-none-win32.whl", hash = "sha256:cb774298da62aea5c80a89bd58c40205ab4c2abf4834453b5de207d59d2e1651"}, - {file = "pydantic_core-2.14.5-cp311-none-win_amd64.whl", hash = "sha256:e87fc540c6cac7f29ede02e0f989d4233f88ad439c5cdee56f693cc9c1c78077"}, - {file = "pydantic_core-2.14.5-cp311-none-win_arm64.whl", hash = "sha256:57d52fa717ff445cb0a5ab5237db502e6be50809b43a596fb569630c665abddf"}, - {file = "pydantic_core-2.14.5-cp312-cp312-macosx_10_7_x86_64.whl", hash = "sha256:e60f112ac88db9261ad3a52032ea46388378034f3279c643499edb982536a093"}, - {file = "pydantic_core-2.14.5-cp312-cp312-macosx_11_0_arm64.whl", hash = "sha256:6e227c40c02fd873c2a73a98c1280c10315cbebe26734c196ef4514776120aeb"}, - {file = "pydantic_core-2.14.5-cp312-cp312-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:f0cbc7fff06a90bbd875cc201f94ef0ee3929dfbd5c55a06674b60857b8b85ed"}, - {file = "pydantic_core-2.14.5-cp312-cp312-manylinux_2_17_armv7l.manylinux2014_armv7l.whl", hash = "sha256:103ef8d5b58596a731b690112819501ba1db7a36f4ee99f7892c40da02c3e189"}, - {file = "pydantic_core-2.14.5-cp312-cp312-manylinux_2_17_ppc64le.manylinux2014_ppc64le.whl", hash = "sha256:c949f04ecad823f81b1ba94e7d189d9dfb81edbb94ed3f8acfce41e682e48cef"}, - {file = "pydantic_core-2.14.5-cp312-cp312-manylinux_2_17_s390x.manylinux2014_s390x.whl", hash = "sha256:c1452a1acdf914d194159439eb21e56b89aa903f2e1c65c60b9d874f9b950e5d"}, - {file = "pydantic_core-2.14.5-cp312-cp312-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:cb4679d4c2b089e5ef89756bc73e1926745e995d76e11925e3e96a76d5fa51fc"}, - {file = "pydantic_core-2.14.5-cp312-cp312-manylinux_2_5_i686.manylinux1_i686.whl", hash = "sha256:cf9d3fe53b1ee360e2421be95e62ca9b3296bf3f2fb2d3b83ca49ad3f925835e"}, - {file = "pydantic_core-2.14.5-cp312-cp312-musllinux_1_1_aarch64.whl", hash = "sha256:70f4b4851dbb500129681d04cc955be2a90b2248d69273a787dda120d5cf1f69"}, - {file = "pydantic_core-2.14.5-cp312-cp312-musllinux_1_1_x86_64.whl", hash = "sha256:59986de5710ad9613ff61dd9b02bdd2f615f1a7052304b79cc8fa2eb4e336d2d"}, - {file = "pydantic_core-2.14.5-cp312-none-win32.whl", hash = "sha256:699156034181e2ce106c89ddb4b6504c30db8caa86e0c30de47b3e0654543260"}, - {file = "pydantic_core-2.14.5-cp312-none-win_amd64.whl", hash = "sha256:5baab5455c7a538ac7e8bf1feec4278a66436197592a9bed538160a2e7d11e36"}, - {file = "pydantic_core-2.14.5-cp312-none-win_arm64.whl", hash = "sha256:e47e9a08bcc04d20975b6434cc50bf82665fbc751bcce739d04a3120428f3e27"}, - {file = "pydantic_core-2.14.5-cp37-cp37m-macosx_10_7_x86_64.whl", hash = "sha256:af36f36538418f3806048f3b242a1777e2540ff9efaa667c27da63d2749dbce0"}, - {file = "pydantic_core-2.14.5-cp37-cp37m-macosx_11_0_arm64.whl", hash = "sha256:45e95333b8418ded64745f14574aa9bfc212cb4fbeed7a687b0c6e53b5e188cd"}, - {file = "pydantic_core-2.14.5-cp37-cp37m-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:4e47a76848f92529879ecfc417ff88a2806438f57be4a6a8bf2961e8f9ca9ec7"}, - {file = "pydantic_core-2.14.5-cp37-cp37m-manylinux_2_17_armv7l.manylinux2014_armv7l.whl", hash = "sha256:d81e6987b27bc7d101c8597e1cd2bcaa2fee5e8e0f356735c7ed34368c471550"}, - {file = "pydantic_core-2.14.5-cp37-cp37m-manylinux_2_17_ppc64le.manylinux2014_ppc64le.whl", hash = "sha256:34708cc82c330e303f4ce87758828ef6e457681b58ce0e921b6e97937dd1e2a3"}, - {file = "pydantic_core-2.14.5-cp37-cp37m-manylinux_2_17_s390x.manylinux2014_s390x.whl", hash = "sha256:652c1988019752138b974c28f43751528116bcceadad85f33a258869e641d753"}, - {file = "pydantic_core-2.14.5-cp37-cp37m-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:6e4d090e73e0725b2904fdbdd8d73b8802ddd691ef9254577b708d413bf3006e"}, - {file = "pydantic_core-2.14.5-cp37-cp37m-manylinux_2_5_i686.manylinux1_i686.whl", hash = "sha256:5c7d5b5005f177764e96bd584d7bf28d6e26e96f2a541fdddb934c486e36fd59"}, - {file = "pydantic_core-2.14.5-cp37-cp37m-musllinux_1_1_aarch64.whl", hash = "sha256:a71891847f0a73b1b9eb86d089baee301477abef45f7eaf303495cd1473613e4"}, - {file = "pydantic_core-2.14.5-cp37-cp37m-musllinux_1_1_x86_64.whl", hash = "sha256:a717aef6971208f0851a2420b075338e33083111d92041157bbe0e2713b37325"}, - {file = "pydantic_core-2.14.5-cp37-none-win32.whl", hash = "sha256:de790a3b5aa2124b8b78ae5faa033937a72da8efe74b9231698b5a1dd9be3405"}, - {file = "pydantic_core-2.14.5-cp37-none-win_amd64.whl", hash = "sha256:6c327e9cd849b564b234da821236e6bcbe4f359a42ee05050dc79d8ed2a91588"}, - {file = "pydantic_core-2.14.5-cp38-cp38-macosx_10_7_x86_64.whl", hash = "sha256:ef98ca7d5995a82f43ec0ab39c4caf6a9b994cb0b53648ff61716370eadc43cf"}, - {file = "pydantic_core-2.14.5-cp38-cp38-macosx_11_0_arm64.whl", hash = "sha256:c6eae413494a1c3f89055da7a5515f32e05ebc1a234c27674a6956755fb2236f"}, - {file = "pydantic_core-2.14.5-cp38-cp38-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:dcf4e6d85614f7a4956c2de5a56531f44efb973d2fe4a444d7251df5d5c4dcfd"}, - {file = "pydantic_core-2.14.5-cp38-cp38-manylinux_2_17_armv7l.manylinux2014_armv7l.whl", hash = "sha256:6637560562134b0e17de333d18e69e312e0458ee4455bdad12c37100b7cad706"}, - {file = "pydantic_core-2.14.5-cp38-cp38-manylinux_2_17_ppc64le.manylinux2014_ppc64le.whl", hash = "sha256:77fa384d8e118b3077cccfcaf91bf83c31fe4dc850b5e6ee3dc14dc3d61bdba1"}, - {file = "pydantic_core-2.14.5-cp38-cp38-manylinux_2_17_s390x.manylinux2014_s390x.whl", hash = "sha256:16e29bad40bcf97aac682a58861249ca9dcc57c3f6be22f506501833ddb8939c"}, - {file = "pydantic_core-2.14.5-cp38-cp38-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:531f4b4252fac6ca476fbe0e6f60f16f5b65d3e6b583bc4d87645e4e5ddde331"}, - {file = "pydantic_core-2.14.5-cp38-cp38-manylinux_2_5_i686.manylinux1_i686.whl", hash = "sha256:074f3d86f081ce61414d2dc44901f4f83617329c6f3ab49d2bc6c96948b2c26b"}, - {file = "pydantic_core-2.14.5-cp38-cp38-musllinux_1_1_aarch64.whl", hash = "sha256:c2adbe22ab4babbca99c75c5d07aaf74f43c3195384ec07ccbd2f9e3bddaecec"}, - {file = "pydantic_core-2.14.5-cp38-cp38-musllinux_1_1_x86_64.whl", hash = "sha256:0f6116a558fd06d1b7c2902d1c4cf64a5bd49d67c3540e61eccca93f41418124"}, - {file = "pydantic_core-2.14.5-cp38-none-win32.whl", hash = "sha256:fe0a5a1025eb797752136ac8b4fa21aa891e3d74fd340f864ff982d649691867"}, - {file = "pydantic_core-2.14.5-cp38-none-win_amd64.whl", hash = "sha256:079206491c435b60778cf2b0ee5fd645e61ffd6e70c47806c9ed51fc75af078d"}, - {file = "pydantic_core-2.14.5-cp39-cp39-macosx_10_7_x86_64.whl", hash = "sha256:a6a16f4a527aae4f49c875da3cdc9508ac7eef26e7977952608610104244e1b7"}, - {file = "pydantic_core-2.14.5-cp39-cp39-macosx_11_0_arm64.whl", hash = "sha256:abf058be9517dc877227ec3223f0300034bd0e9f53aebd63cf4456c8cb1e0863"}, - {file = "pydantic_core-2.14.5-cp39-cp39-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:49b08aae5013640a3bfa25a8eebbd95638ec3f4b2eaf6ed82cf0c7047133f03b"}, - {file = "pydantic_core-2.14.5-cp39-cp39-manylinux_2_17_armv7l.manylinux2014_armv7l.whl", hash = "sha256:c2d97e906b4ff36eb464d52a3bc7d720bd6261f64bc4bcdbcd2c557c02081ed2"}, - {file = "pydantic_core-2.14.5-cp39-cp39-manylinux_2_17_ppc64le.manylinux2014_ppc64le.whl", hash = "sha256:3128e0bbc8c091ec4375a1828d6118bc20404883169ac95ffa8d983b293611e6"}, - {file = "pydantic_core-2.14.5-cp39-cp39-manylinux_2_17_s390x.manylinux2014_s390x.whl", hash = "sha256:88e74ab0cdd84ad0614e2750f903bb0d610cc8af2cc17f72c28163acfcf372a4"}, - {file = "pydantic_core-2.14.5-cp39-cp39-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:c339dabd8ee15f8259ee0f202679b6324926e5bc9e9a40bf981ce77c038553db"}, - {file = "pydantic_core-2.14.5-cp39-cp39-manylinux_2_5_i686.manylinux1_i686.whl", hash = "sha256:3387277f1bf659caf1724e1afe8ee7dbc9952a82d90f858ebb931880216ea955"}, - {file = "pydantic_core-2.14.5-cp39-cp39-musllinux_1_1_aarch64.whl", hash = "sha256:ba6b6b3846cfc10fdb4c971980a954e49d447cd215ed5a77ec8190bc93dd7bc5"}, - {file = "pydantic_core-2.14.5-cp39-cp39-musllinux_1_1_x86_64.whl", hash = "sha256:ca61d858e4107ce5e1330a74724fe757fc7135190eb5ce5c9d0191729f033209"}, - {file = "pydantic_core-2.14.5-cp39-none-win32.whl", hash = "sha256:ec1e72d6412f7126eb7b2e3bfca42b15e6e389e1bc88ea0069d0cc1742f477c6"}, - {file = "pydantic_core-2.14.5-cp39-none-win_amd64.whl", hash = "sha256:c0b97ec434041827935044bbbe52b03d6018c2897349670ff8fe11ed24d1d4ab"}, - {file = "pydantic_core-2.14.5-pp310-pypy310_pp73-macosx_10_7_x86_64.whl", hash = "sha256:79e0a2cdbdc7af3f4aee3210b1172ab53d7ddb6a2d8c24119b5706e622b346d0"}, - {file = "pydantic_core-2.14.5-pp310-pypy310_pp73-macosx_11_0_arm64.whl", hash = "sha256:678265f7b14e138d9a541ddabbe033012a2953315739f8cfa6d754cc8063e8ca"}, - {file = "pydantic_core-2.14.5-pp310-pypy310_pp73-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:95b15e855ae44f0c6341ceb74df61b606e11f1087e87dcb7482377374aac6abe"}, - {file = "pydantic_core-2.14.5-pp310-pypy310_pp73-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:09b0e985fbaf13e6b06a56d21694d12ebca6ce5414b9211edf6f17738d82b0f8"}, - {file = "pydantic_core-2.14.5-pp310-pypy310_pp73-manylinux_2_5_i686.manylinux1_i686.whl", hash = "sha256:3ad873900297bb36e4b6b3f7029d88ff9829ecdc15d5cf20161775ce12306f8a"}, - {file = "pydantic_core-2.14.5-pp310-pypy310_pp73-musllinux_1_1_aarch64.whl", hash = "sha256:2d0ae0d8670164e10accbeb31d5ad45adb71292032d0fdb9079912907f0085f4"}, - {file = "pydantic_core-2.14.5-pp310-pypy310_pp73-musllinux_1_1_x86_64.whl", hash = "sha256:d37f8ec982ead9ba0a22a996129594938138a1503237b87318392a48882d50b7"}, - {file = "pydantic_core-2.14.5-pp310-pypy310_pp73-win_amd64.whl", hash = "sha256:35613015f0ba7e14c29ac6c2483a657ec740e5ac5758d993fdd5870b07a61d8b"}, - {file = "pydantic_core-2.14.5-pp37-pypy37_pp73-macosx_10_7_x86_64.whl", hash = "sha256:ab4ea451082e684198636565224bbb179575efc1658c48281b2c866bfd4ddf04"}, - {file = "pydantic_core-2.14.5-pp37-pypy37_pp73-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:4ce601907e99ea5b4adb807ded3570ea62186b17f88e271569144e8cca4409c7"}, - {file = "pydantic_core-2.14.5-pp37-pypy37_pp73-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:fb2ed8b3fe4bf4506d6dab3b93b83bbc22237e230cba03866d561c3577517d18"}, - {file = "pydantic_core-2.14.5-pp37-pypy37_pp73-manylinux_2_5_i686.manylinux1_i686.whl", hash = "sha256:70f947628e074bb2526ba1b151cee10e4c3b9670af4dbb4d73bc8a89445916b5"}, - {file = "pydantic_core-2.14.5-pp37-pypy37_pp73-musllinux_1_1_aarch64.whl", hash = "sha256:4bc536201426451f06f044dfbf341c09f540b4ebdb9fd8d2c6164d733de5e634"}, - {file = "pydantic_core-2.14.5-pp37-pypy37_pp73-musllinux_1_1_x86_64.whl", hash = "sha256:f4791cf0f8c3104ac668797d8c514afb3431bc3305f5638add0ba1a5a37e0d88"}, - {file = "pydantic_core-2.14.5-pp38-pypy38_pp73-macosx_10_7_x86_64.whl", hash = "sha256:038c9f763e650712b899f983076ce783175397c848da04985658e7628cbe873b"}, - {file = "pydantic_core-2.14.5-pp38-pypy38_pp73-macosx_11_0_arm64.whl", hash = "sha256:27548e16c79702f1e03f5628589c6057c9ae17c95b4c449de3c66b589ead0520"}, - {file = "pydantic_core-2.14.5-pp38-pypy38_pp73-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:c97bee68898f3f4344eb02fec316db93d9700fb1e6a5b760ffa20d71d9a46ce3"}, - {file = "pydantic_core-2.14.5-pp38-pypy38_pp73-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:b9b759b77f5337b4ea024f03abc6464c9f35d9718de01cfe6bae9f2e139c397e"}, - {file = "pydantic_core-2.14.5-pp38-pypy38_pp73-manylinux_2_5_i686.manylinux1_i686.whl", hash = "sha256:439c9afe34638ace43a49bf72d201e0ffc1a800295bed8420c2a9ca8d5e3dbb3"}, - {file = "pydantic_core-2.14.5-pp38-pypy38_pp73-musllinux_1_1_aarch64.whl", hash = "sha256:ba39688799094c75ea8a16a6b544eb57b5b0f3328697084f3f2790892510d144"}, - {file = "pydantic_core-2.14.5-pp38-pypy38_pp73-musllinux_1_1_x86_64.whl", hash = "sha256:ccd4d5702bb90b84df13bd491be8d900b92016c5a455b7e14630ad7449eb03f8"}, - {file = "pydantic_core-2.14.5-pp38-pypy38_pp73-win_amd64.whl", hash = "sha256:81982d78a45d1e5396819bbb4ece1fadfe5f079335dd28c4ab3427cd95389944"}, - {file = "pydantic_core-2.14.5-pp39-pypy39_pp73-macosx_10_7_x86_64.whl", hash = "sha256:7f8210297b04e53bc3da35db08b7302a6a1f4889c79173af69b72ec9754796b8"}, - {file = "pydantic_core-2.14.5-pp39-pypy39_pp73-macosx_11_0_arm64.whl", hash = "sha256:8c8a8812fe6f43a3a5b054af6ac2d7b8605c7bcab2804a8a7d68b53f3cd86e00"}, - {file = "pydantic_core-2.14.5-pp39-pypy39_pp73-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:206ed23aecd67c71daf5c02c3cd19c0501b01ef3cbf7782db9e4e051426b3d0d"}, - {file = "pydantic_core-2.14.5-pp39-pypy39_pp73-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:c2027d05c8aebe61d898d4cffd774840a9cb82ed356ba47a90d99ad768f39789"}, - {file = "pydantic_core-2.14.5-pp39-pypy39_pp73-manylinux_2_5_i686.manylinux1_i686.whl", hash = "sha256:40180930807ce806aa71eda5a5a5447abb6b6a3c0b4b3b1b1962651906484d68"}, - {file = "pydantic_core-2.14.5-pp39-pypy39_pp73-musllinux_1_1_aarch64.whl", hash = "sha256:615a0a4bff11c45eb3c1996ceed5bdaa2f7b432425253a7c2eed33bb86d80abc"}, - {file = "pydantic_core-2.14.5-pp39-pypy39_pp73-musllinux_1_1_x86_64.whl", hash = "sha256:f5e412d717366e0677ef767eac93566582518fe8be923361a5c204c1a62eaafe"}, - {file = "pydantic_core-2.14.5-pp39-pypy39_pp73-win_amd64.whl", hash = "sha256:513b07e99c0a267b1d954243845d8a833758a6726a3b5d8948306e3fe14675e3"}, - {file = "pydantic_core-2.14.5.tar.gz", hash = "sha256:6d30226dfc816dd0fdf120cae611dd2215117e4f9b124af8c60ab9093b6e8e71"}, + {file = "pydantic_core-2.14.6-cp310-cp310-macosx_10_7_x86_64.whl", hash = "sha256:72f9a942d739f09cd42fffe5dc759928217649f070056f03c70df14f5770acf9"}, + {file = "pydantic_core-2.14.6-cp310-cp310-macosx_11_0_arm64.whl", hash = "sha256:6a31d98c0d69776c2576dda4b77b8e0c69ad08e8b539c25c7d0ca0dc19a50d6c"}, + {file = "pydantic_core-2.14.6-cp310-cp310-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:5aa90562bc079c6c290f0512b21768967f9968e4cfea84ea4ff5af5d917016e4"}, + {file = "pydantic_core-2.14.6-cp310-cp310-manylinux_2_17_armv7l.manylinux2014_armv7l.whl", hash = "sha256:370ffecb5316ed23b667d99ce4debe53ea664b99cc37bfa2af47bc769056d534"}, + {file = "pydantic_core-2.14.6-cp310-cp310-manylinux_2_17_ppc64le.manylinux2014_ppc64le.whl", hash = "sha256:f85f3843bdb1fe80e8c206fe6eed7a1caeae897e496542cee499c374a85c6e08"}, + {file = "pydantic_core-2.14.6-cp310-cp310-manylinux_2_17_s390x.manylinux2014_s390x.whl", hash = "sha256:9862bf828112e19685b76ca499b379338fd4c5c269d897e218b2ae8fcb80139d"}, + {file = "pydantic_core-2.14.6-cp310-cp310-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:036137b5ad0cb0004c75b579445a1efccd072387a36c7f217bb8efd1afbe5245"}, + {file = "pydantic_core-2.14.6-cp310-cp310-manylinux_2_5_i686.manylinux1_i686.whl", hash = "sha256:92879bce89f91f4b2416eba4429c7b5ca22c45ef4a499c39f0c5c69257522c7c"}, + {file = "pydantic_core-2.14.6-cp310-cp310-musllinux_1_1_aarch64.whl", hash = "sha256:0c08de15d50fa190d577e8591f0329a643eeaed696d7771760295998aca6bc66"}, + {file = "pydantic_core-2.14.6-cp310-cp310-musllinux_1_1_x86_64.whl", hash = "sha256:36099c69f6b14fc2c49d7996cbf4f87ec4f0e66d1c74aa05228583225a07b590"}, + {file = "pydantic_core-2.14.6-cp310-none-win32.whl", hash = "sha256:7be719e4d2ae6c314f72844ba9d69e38dff342bc360379f7c8537c48e23034b7"}, + {file = "pydantic_core-2.14.6-cp310-none-win_amd64.whl", hash = "sha256:36fa402dcdc8ea7f1b0ddcf0df4254cc6b2e08f8cd80e7010d4c4ae6e86b2a87"}, + {file = "pydantic_core-2.14.6-cp311-cp311-macosx_10_7_x86_64.whl", hash = "sha256:dea7fcd62915fb150cdc373212141a30037e11b761fbced340e9db3379b892d4"}, + {file = "pydantic_core-2.14.6-cp311-cp311-macosx_11_0_arm64.whl", hash = "sha256:ffff855100bc066ff2cd3aa4a60bc9534661816b110f0243e59503ec2df38421"}, + {file = "pydantic_core-2.14.6-cp311-cp311-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:1b027c86c66b8627eb90e57aee1f526df77dc6d8b354ec498be9a757d513b92b"}, + {file = "pydantic_core-2.14.6-cp311-cp311-manylinux_2_17_armv7l.manylinux2014_armv7l.whl", hash = "sha256:00b1087dabcee0b0ffd104f9f53d7d3eaddfaa314cdd6726143af6bc713aa27e"}, + {file = "pydantic_core-2.14.6-cp311-cp311-manylinux_2_17_ppc64le.manylinux2014_ppc64le.whl", hash = "sha256:75ec284328b60a4e91010c1acade0c30584f28a1f345bc8f72fe8b9e46ec6a96"}, + {file = "pydantic_core-2.14.6-cp311-cp311-manylinux_2_17_s390x.manylinux2014_s390x.whl", hash = "sha256:7e1f4744eea1501404b20b0ac059ff7e3f96a97d3e3f48ce27a139e053bb370b"}, + {file = "pydantic_core-2.14.6-cp311-cp311-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:b2602177668f89b38b9f84b7b3435d0a72511ddef45dc14446811759b82235a1"}, + {file = "pydantic_core-2.14.6-cp311-cp311-manylinux_2_5_i686.manylinux1_i686.whl", hash = "sha256:6c8edaea3089bf908dd27da8f5d9e395c5b4dc092dbcce9b65e7156099b4b937"}, + {file = "pydantic_core-2.14.6-cp311-cp311-musllinux_1_1_aarch64.whl", hash = "sha256:478e9e7b360dfec451daafe286998d4a1eeaecf6d69c427b834ae771cad4b622"}, + {file = "pydantic_core-2.14.6-cp311-cp311-musllinux_1_1_x86_64.whl", hash = "sha256:b6ca36c12a5120bad343eef193cc0122928c5c7466121da7c20f41160ba00ba2"}, + {file = "pydantic_core-2.14.6-cp311-none-win32.whl", hash = "sha256:2b8719037e570639e6b665a4050add43134d80b687288ba3ade18b22bbb29dd2"}, + {file = "pydantic_core-2.14.6-cp311-none-win_amd64.whl", hash = "sha256:78ee52ecc088c61cce32b2d30a826f929e1708f7b9247dc3b921aec367dc1b23"}, + {file = "pydantic_core-2.14.6-cp311-none-win_arm64.whl", hash = "sha256:a19b794f8fe6569472ff77602437ec4430f9b2b9ec7a1105cfd2232f9ba355e6"}, + {file = "pydantic_core-2.14.6-cp312-cp312-macosx_10_7_x86_64.whl", hash = "sha256:667aa2eac9cd0700af1ddb38b7b1ef246d8cf94c85637cbb03d7757ca4c3fdec"}, + {file = "pydantic_core-2.14.6-cp312-cp312-macosx_11_0_arm64.whl", hash = "sha256:cdee837710ef6b56ebd20245b83799fce40b265b3b406e51e8ccc5b85b9099b7"}, + {file = "pydantic_core-2.14.6-cp312-cp312-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:2c5bcf3414367e29f83fd66f7de64509a8fd2368b1edf4351e862910727d3e51"}, + {file = "pydantic_core-2.14.6-cp312-cp312-manylinux_2_17_armv7l.manylinux2014_armv7l.whl", hash = "sha256:26a92ae76f75d1915806b77cf459811e772d8f71fd1e4339c99750f0e7f6324f"}, + {file = "pydantic_core-2.14.6-cp312-cp312-manylinux_2_17_ppc64le.manylinux2014_ppc64le.whl", hash = "sha256:a983cca5ed1dd9a35e9e42ebf9f278d344603bfcb174ff99a5815f953925140a"}, + {file = "pydantic_core-2.14.6-cp312-cp312-manylinux_2_17_s390x.manylinux2014_s390x.whl", hash = "sha256:cb92f9061657287eded380d7dc455bbf115430b3aa4741bdc662d02977e7d0af"}, + {file = "pydantic_core-2.14.6-cp312-cp312-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:e4ace1e220b078c8e48e82c081e35002038657e4b37d403ce940fa679e57113b"}, + {file = "pydantic_core-2.14.6-cp312-cp312-manylinux_2_5_i686.manylinux1_i686.whl", hash = "sha256:ef633add81832f4b56d3b4c9408b43d530dfca29e68fb1b797dcb861a2c734cd"}, + {file = "pydantic_core-2.14.6-cp312-cp312-musllinux_1_1_aarch64.whl", hash = "sha256:7e90d6cc4aad2cc1f5e16ed56e46cebf4877c62403a311af20459c15da76fd91"}, + {file = "pydantic_core-2.14.6-cp312-cp312-musllinux_1_1_x86_64.whl", hash = "sha256:e8a5ac97ea521d7bde7621d86c30e86b798cdecd985723c4ed737a2aa9e77d0c"}, + {file = "pydantic_core-2.14.6-cp312-none-win32.whl", hash = "sha256:f27207e8ca3e5e021e2402ba942e5b4c629718e665c81b8b306f3c8b1ddbb786"}, + {file = "pydantic_core-2.14.6-cp312-none-win_amd64.whl", hash = "sha256:b3e5fe4538001bb82e2295b8d2a39356a84694c97cb73a566dc36328b9f83b40"}, + {file = "pydantic_core-2.14.6-cp312-none-win_arm64.whl", hash = "sha256:64634ccf9d671c6be242a664a33c4acf12882670b09b3f163cd00a24cffbd74e"}, + {file = "pydantic_core-2.14.6-cp37-cp37m-macosx_10_7_x86_64.whl", hash = "sha256:24368e31be2c88bd69340fbfe741b405302993242ccb476c5c3ff48aeee1afe0"}, + {file = "pydantic_core-2.14.6-cp37-cp37m-macosx_11_0_arm64.whl", hash = "sha256:e33b0834f1cf779aa839975f9d8755a7c2420510c0fa1e9fa0497de77cd35d2c"}, + {file = "pydantic_core-2.14.6-cp37-cp37m-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:6af4b3f52cc65f8a0bc8b1cd9676f8c21ef3e9132f21fed250f6958bd7223bed"}, + {file = "pydantic_core-2.14.6-cp37-cp37m-manylinux_2_17_armv7l.manylinux2014_armv7l.whl", hash = "sha256:d15687d7d7f40333bd8266f3814c591c2e2cd263fa2116e314f60d82086e353a"}, + {file = "pydantic_core-2.14.6-cp37-cp37m-manylinux_2_17_ppc64le.manylinux2014_ppc64le.whl", hash = "sha256:095b707bb287bfd534044166ab767bec70a9bba3175dcdc3371782175c14e43c"}, + {file = "pydantic_core-2.14.6-cp37-cp37m-manylinux_2_17_s390x.manylinux2014_s390x.whl", hash = "sha256:94fc0e6621e07d1e91c44e016cc0b189b48db053061cc22d6298a611de8071bb"}, + {file = "pydantic_core-2.14.6-cp37-cp37m-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:1ce830e480f6774608dedfd4a90c42aac4a7af0a711f1b52f807130c2e434c06"}, + {file = "pydantic_core-2.14.6-cp37-cp37m-manylinux_2_5_i686.manylinux1_i686.whl", hash = "sha256:a306cdd2ad3a7d795d8e617a58c3a2ed0f76c8496fb7621b6cd514eb1532cae8"}, + {file = "pydantic_core-2.14.6-cp37-cp37m-musllinux_1_1_aarch64.whl", hash = "sha256:2f5fa187bde8524b1e37ba894db13aadd64faa884657473b03a019f625cee9a8"}, + {file = "pydantic_core-2.14.6-cp37-cp37m-musllinux_1_1_x86_64.whl", hash = "sha256:438027a975cc213a47c5d70672e0d29776082155cfae540c4e225716586be75e"}, + {file = "pydantic_core-2.14.6-cp37-none-win32.whl", hash = "sha256:f96ae96a060a8072ceff4cfde89d261837b4294a4f28b84a28765470d502ccc6"}, + {file = "pydantic_core-2.14.6-cp37-none-win_amd64.whl", hash = "sha256:e646c0e282e960345314f42f2cea5e0b5f56938c093541ea6dbf11aec2862391"}, + {file = "pydantic_core-2.14.6-cp38-cp38-macosx_10_7_x86_64.whl", hash = "sha256:db453f2da3f59a348f514cfbfeb042393b68720787bbef2b4c6068ea362c8149"}, + {file = "pydantic_core-2.14.6-cp38-cp38-macosx_11_0_arm64.whl", hash = "sha256:3860c62057acd95cc84044e758e47b18dcd8871a328ebc8ccdefd18b0d26a21b"}, + {file = "pydantic_core-2.14.6-cp38-cp38-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:36026d8f99c58d7044413e1b819a67ca0e0b8ebe0f25e775e6c3d1fabb3c38fb"}, + {file = "pydantic_core-2.14.6-cp38-cp38-manylinux_2_17_armv7l.manylinux2014_armv7l.whl", hash = "sha256:8ed1af8692bd8d2a29d702f1a2e6065416d76897d726e45a1775b1444f5928a7"}, + {file = "pydantic_core-2.14.6-cp38-cp38-manylinux_2_17_ppc64le.manylinux2014_ppc64le.whl", hash = "sha256:314ccc4264ce7d854941231cf71b592e30d8d368a71e50197c905874feacc8a8"}, + {file = "pydantic_core-2.14.6-cp38-cp38-manylinux_2_17_s390x.manylinux2014_s390x.whl", hash = "sha256:982487f8931067a32e72d40ab6b47b1628a9c5d344be7f1a4e668fb462d2da42"}, + {file = "pydantic_core-2.14.6-cp38-cp38-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:2dbe357bc4ddda078f79d2a36fc1dd0494a7f2fad83a0a684465b6f24b46fe80"}, + {file = "pydantic_core-2.14.6-cp38-cp38-manylinux_2_5_i686.manylinux1_i686.whl", hash = "sha256:2f6ffc6701a0eb28648c845f4945a194dc7ab3c651f535b81793251e1185ac3d"}, + {file = "pydantic_core-2.14.6-cp38-cp38-musllinux_1_1_aarch64.whl", hash = "sha256:7f5025db12fc6de7bc1104d826d5aee1d172f9ba6ca936bf6474c2148ac336c1"}, + {file = "pydantic_core-2.14.6-cp38-cp38-musllinux_1_1_x86_64.whl", hash = "sha256:dab03ed811ed1c71d700ed08bde8431cf429bbe59e423394f0f4055f1ca0ea60"}, + {file = "pydantic_core-2.14.6-cp38-none-win32.whl", hash = "sha256:dfcbebdb3c4b6f739a91769aea5ed615023f3c88cb70df812849aef634c25fbe"}, + {file = "pydantic_core-2.14.6-cp38-none-win_amd64.whl", hash = "sha256:99b14dbea2fdb563d8b5a57c9badfcd72083f6006caf8e126b491519c7d64ca8"}, + {file = "pydantic_core-2.14.6-cp39-cp39-macosx_10_7_x86_64.whl", hash = "sha256:4ce8299b481bcb68e5c82002b96e411796b844d72b3e92a3fbedfe8e19813eab"}, + {file = "pydantic_core-2.14.6-cp39-cp39-macosx_11_0_arm64.whl", hash = "sha256:b9a9d92f10772d2a181b5ca339dee066ab7d1c9a34ae2421b2a52556e719756f"}, + {file = "pydantic_core-2.14.6-cp39-cp39-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:fd9e98b408384989ea4ab60206b8e100d8687da18b5c813c11e92fd8212a98e0"}, + {file = "pydantic_core-2.14.6-cp39-cp39-manylinux_2_17_armv7l.manylinux2014_armv7l.whl", hash = "sha256:4f86f1f318e56f5cbb282fe61eb84767aee743ebe32c7c0834690ebea50c0a6b"}, + {file = "pydantic_core-2.14.6-cp39-cp39-manylinux_2_17_ppc64le.manylinux2014_ppc64le.whl", hash = "sha256:86ce5fcfc3accf3a07a729779d0b86c5d0309a4764c897d86c11089be61da160"}, + {file = "pydantic_core-2.14.6-cp39-cp39-manylinux_2_17_s390x.manylinux2014_s390x.whl", hash = "sha256:3dcf1978be02153c6a31692d4fbcc2a3f1db9da36039ead23173bc256ee3b91b"}, + {file = "pydantic_core-2.14.6-cp39-cp39-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:eedf97be7bc3dbc8addcef4142f4b4164066df0c6f36397ae4aaed3eb187d8ab"}, + {file = "pydantic_core-2.14.6-cp39-cp39-manylinux_2_5_i686.manylinux1_i686.whl", hash = "sha256:d5f916acf8afbcab6bacbb376ba7dc61f845367901ecd5e328fc4d4aef2fcab0"}, + {file = "pydantic_core-2.14.6-cp39-cp39-musllinux_1_1_aarch64.whl", hash = "sha256:8a14c192c1d724c3acbfb3f10a958c55a2638391319ce8078cb36c02283959b9"}, + {file = "pydantic_core-2.14.6-cp39-cp39-musllinux_1_1_x86_64.whl", hash = "sha256:0348b1dc6b76041516e8a854ff95b21c55f5a411c3297d2ca52f5528e49d8411"}, + {file = "pydantic_core-2.14.6-cp39-none-win32.whl", hash = "sha256:de2a0645a923ba57c5527497daf8ec5df69c6eadf869e9cd46e86349146e5975"}, + {file = "pydantic_core-2.14.6-cp39-none-win_amd64.whl", hash = "sha256:aca48506a9c20f68ee61c87f2008f81f8ee99f8d7f0104bff3c47e2d148f89d9"}, + {file = "pydantic_core-2.14.6-pp310-pypy310_pp73-macosx_10_7_x86_64.whl", hash = "sha256:d5c28525c19f5bb1e09511669bb57353d22b94cf8b65f3a8d141c389a55dec95"}, + {file = "pydantic_core-2.14.6-pp310-pypy310_pp73-macosx_11_0_arm64.whl", hash = "sha256:78d0768ee59baa3de0f4adac9e3748b4b1fffc52143caebddfd5ea2961595277"}, + {file = "pydantic_core-2.14.6-pp310-pypy310_pp73-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:8b93785eadaef932e4fe9c6e12ba67beb1b3f1e5495631419c784ab87e975670"}, + {file = "pydantic_core-2.14.6-pp310-pypy310_pp73-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:a874f21f87c485310944b2b2734cd6d318765bcbb7515eead33af9641816506e"}, + {file = "pydantic_core-2.14.6-pp310-pypy310_pp73-manylinux_2_5_i686.manylinux1_i686.whl", hash = "sha256:b89f4477d915ea43b4ceea6756f63f0288941b6443a2b28c69004fe07fde0d0d"}, + {file = "pydantic_core-2.14.6-pp310-pypy310_pp73-musllinux_1_1_aarch64.whl", hash = "sha256:172de779e2a153d36ee690dbc49c6db568d7b33b18dc56b69a7514aecbcf380d"}, + {file = "pydantic_core-2.14.6-pp310-pypy310_pp73-musllinux_1_1_x86_64.whl", hash = "sha256:dfcebb950aa7e667ec226a442722134539e77c575f6cfaa423f24371bb8d2e94"}, + {file = "pydantic_core-2.14.6-pp310-pypy310_pp73-win_amd64.whl", hash = "sha256:55a23dcd98c858c0db44fc5c04fc7ed81c4b4d33c653a7c45ddaebf6563a2f66"}, + {file = "pydantic_core-2.14.6-pp37-pypy37_pp73-macosx_10_7_x86_64.whl", hash = "sha256:4241204e4b36ab5ae466ecec5c4c16527a054c69f99bba20f6f75232a6a534e2"}, + {file = "pydantic_core-2.14.6-pp37-pypy37_pp73-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:e574de99d735b3fc8364cba9912c2bec2da78775eba95cbb225ef7dda6acea24"}, + {file = "pydantic_core-2.14.6-pp37-pypy37_pp73-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:1302a54f87b5cd8528e4d6d1bf2133b6aa7c6122ff8e9dc5220fbc1e07bffebd"}, + {file = "pydantic_core-2.14.6-pp37-pypy37_pp73-manylinux_2_5_i686.manylinux1_i686.whl", hash = "sha256:f8e81e4b55930e5ffab4a68db1af431629cf2e4066dbdbfef65348b8ab804ea8"}, + {file = "pydantic_core-2.14.6-pp37-pypy37_pp73-musllinux_1_1_aarch64.whl", hash = "sha256:c99462ffc538717b3e60151dfaf91125f637e801f5ab008f81c402f1dff0cd0f"}, + {file = "pydantic_core-2.14.6-pp37-pypy37_pp73-musllinux_1_1_x86_64.whl", hash = "sha256:e4cf2d5829f6963a5483ec01578ee76d329eb5caf330ecd05b3edd697e7d768a"}, + {file = "pydantic_core-2.14.6-pp38-pypy38_pp73-macosx_10_7_x86_64.whl", hash = "sha256:cf10b7d58ae4a1f07fccbf4a0a956d705356fea05fb4c70608bb6fa81d103cda"}, + {file = "pydantic_core-2.14.6-pp38-pypy38_pp73-macosx_11_0_arm64.whl", hash = "sha256:399ac0891c284fa8eb998bcfa323f2234858f5d2efca3950ae58c8f88830f145"}, + {file = "pydantic_core-2.14.6-pp38-pypy38_pp73-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:9c6a5c79b28003543db3ba67d1df336f253a87d3112dac3a51b94f7d48e4c0e1"}, + {file = "pydantic_core-2.14.6-pp38-pypy38_pp73-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:599c87d79cab2a6a2a9df4aefe0455e61e7d2aeede2f8577c1b7c0aec643ee8e"}, + {file = "pydantic_core-2.14.6-pp38-pypy38_pp73-manylinux_2_5_i686.manylinux1_i686.whl", hash = "sha256:43e166ad47ba900f2542a80d83f9fc65fe99eb63ceec4debec160ae729824052"}, + {file = "pydantic_core-2.14.6-pp38-pypy38_pp73-musllinux_1_1_aarch64.whl", hash = "sha256:3a0b5db001b98e1c649dd55afa928e75aa4087e587b9524a4992316fa23c9fba"}, + {file = "pydantic_core-2.14.6-pp38-pypy38_pp73-musllinux_1_1_x86_64.whl", hash = "sha256:747265448cb57a9f37572a488a57d873fd96bf51e5bb7edb52cfb37124516da4"}, + {file = "pydantic_core-2.14.6-pp38-pypy38_pp73-win_amd64.whl", hash = "sha256:7ebe3416785f65c28f4f9441e916bfc8a54179c8dea73c23023f7086fa601c5d"}, + {file = "pydantic_core-2.14.6-pp39-pypy39_pp73-macosx_10_7_x86_64.whl", hash = "sha256:86c963186ca5e50d5c8287b1d1c9d3f8f024cbe343d048c5bd282aec2d8641f2"}, + {file = "pydantic_core-2.14.6-pp39-pypy39_pp73-macosx_11_0_arm64.whl", hash = "sha256:e0641b506486f0b4cd1500a2a65740243e8670a2549bb02bc4556a83af84ae03"}, + {file = "pydantic_core-2.14.6-pp39-pypy39_pp73-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:71d72ca5eaaa8d38c8df16b7deb1a2da4f650c41b58bb142f3fb75d5ad4a611f"}, + {file = "pydantic_core-2.14.6-pp39-pypy39_pp73-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:27e524624eace5c59af499cd97dc18bb201dc6a7a2da24bfc66ef151c69a5f2a"}, + {file = "pydantic_core-2.14.6-pp39-pypy39_pp73-manylinux_2_5_i686.manylinux1_i686.whl", hash = "sha256:a3dde6cac75e0b0902778978d3b1646ca9f438654395a362cb21d9ad34b24acf"}, + {file = "pydantic_core-2.14.6-pp39-pypy39_pp73-musllinux_1_1_aarch64.whl", hash = "sha256:00646784f6cd993b1e1c0e7b0fdcbccc375d539db95555477771c27555e3c556"}, + {file = "pydantic_core-2.14.6-pp39-pypy39_pp73-musllinux_1_1_x86_64.whl", hash = "sha256:23598acb8ccaa3d1d875ef3b35cb6376535095e9405d91a3d57a8c7db5d29341"}, + {file = "pydantic_core-2.14.6-pp39-pypy39_pp73-win_amd64.whl", hash = "sha256:7f41533d7e3cf9520065f610b41ac1c76bc2161415955fbcead4981b22c7611e"}, + {file = "pydantic_core-2.14.6.tar.gz", hash = "sha256:1fd0c1d395372843fba13a51c28e3bb9d59bd7aebfeb17358ffaaa1e4dbbe948"}, ] [package.dependencies] @@ -1321,13 +1128,13 @@ typing-extensions = ">=4.6.0,<4.7.0 || >4.7.0" [[package]] name = "pyflakes" -version = "3.1.0" +version = "3.2.0" description = "passive checker of Python programs" optional = false python-versions = ">=3.8" files = [ - {file = "pyflakes-3.1.0-py2.py3-none-any.whl", hash = "sha256:4132f6d49cb4dae6819e5379898f2b8cce3c5f23994194c24b77d5da2e36f774"}, - {file = "pyflakes-3.1.0.tar.gz", hash = "sha256:a0aae034c444db0071aa077972ba4768d40c830d9539fd45bf4cd3f8f6992efc"}, + {file = "pyflakes-3.2.0-py2.py3-none-any.whl", hash = "sha256:84b5be138a2dfbb40689ca07e2152deb896a65c3a3e24c251c5c62489568074a"}, + {file = "pyflakes-3.2.0.tar.gz", hash = "sha256:1c61603ff154621fb2a9172037d84dca3500def8c8b630657d1701f026f8af3f"}, ] [[package]] @@ -1447,20 +1254,6 @@ snappy = ["python-snappy"] test = ["pytest (>=7)"] zstd = ["zstandard"] -[[package]] -name = "pymongo-inmemory" -version = "0.4.0" -description = "A mongo mocking library with an ephemeral MongoDB running in memory." -optional = false -python-versions = ">=3.9,<4.0" -files = [ - {file = "pymongo_inmemory-0.4.0-py3-none-any.whl", hash = "sha256:cf248c7ebcc9d36ae149cbe1111c87a6b8b72e6be5f89dcb27a3bab2bb7f26a2"}, - {file = "pymongo_inmemory-0.4.0.tar.gz", hash = "sha256:f5828ead0b59850f5464d635811d4819700799d62dcf6e089877eab1df5f1a3e"}, -] - -[package.dependencies] -pymongo = "*" - [[package]] name = "pyparsing" version = "3.1.1" @@ -1537,13 +1330,13 @@ shexjsg = ">=0.8.1" [[package]] name = "pytest" -version = "7.4.3" +version = "7.4.4" description = "pytest: simple powerful testing with Python" optional = false python-versions = ">=3.7" files = [ - {file = "pytest-7.4.3-py3-none-any.whl", hash = "sha256:0d009c083ea859a71b76adf7c1d502e4bc170b80a8ef002da5806527b9591fac"}, - {file = "pytest-7.4.3.tar.gz", hash = "sha256:d989d136982de4e3b29dabcc838ad581c64e8ed52c11fbe86ddebd9da0818cd5"}, + {file = "pytest-7.4.4-py3-none-any.whl", hash = "sha256:b090cdf5ed60bf4c45261be03239c2c1c22df034fbffe691abe93cd80cea01d8"}, + {file = "pytest-7.4.4.tar.gz", hash = "sha256:2cf0005922c6ace4a3e2ec8b4080eb0d9753fdc93107415332f50ce9e7994280"}, ] [package.dependencies] @@ -1557,41 +1350,6 @@ tomli = {version = ">=1.0.0", markers = "python_version < \"3.11\""} [package.extras] testing = ["argcomplete", "attrs (>=19.2.0)", "hypothesis (>=3.56)", "mock", "nose", "pygments (>=2.7.2)", "requests", "setuptools", "xmlschema"] -[[package]] -name = "pytest-cov" -version = "4.1.0" -description = "Pytest plugin for measuring coverage." -optional = false -python-versions = ">=3.7" -files = [ - {file = "pytest-cov-4.1.0.tar.gz", hash = "sha256:3904b13dfbfec47f003b8e77fd5b589cd11904a21ddf1ab38a64f204d6a10ef6"}, - {file = "pytest_cov-4.1.0-py3-none-any.whl", hash = "sha256:6ba70b9e97e69fcc3fb45bfeab2d0a138fb65c4d0d6a41ef33983ad114be8c3a"}, -] - -[package.dependencies] -coverage = {version = ">=5.2.1", extras = ["toml"]} -pytest = ">=4.6" - -[package.extras] -testing = ["fields", "hunter", "process-tests", "pytest-xdist", "six", "virtualenv"] - -[[package]] -name = "pytest-local-badge" -version = "1.0.3" -description = "Generate local badges (shields) reporting your test suite status." -optional = false -python-versions = ">=3.7" -files = [ - {file = "pytest-local-badge-1.0.3.tar.gz", hash = "sha256:e13274cc9cadf91a1e3fa290d0bbbd371a365f9d719411e475596f722c2725a5"}, - {file = "pytest_local_badge-1.0.3-py3-none-any.whl", hash = "sha256:e14dc79922598e5b27ef4398a3537535ad115df2229ebba0259f6c6d92c6047b"}, -] - -[package.dependencies] -pytest = ">=6.1.0" - -[package.extras] -develop = ["black (>=22.12.0)", "build", "flake8-bugbear", "flake8-comprehensions", "flake8-import-order", "flake8-print", "pep8-naming", "pytest (>=7.1.0,<8)", "pytest-cov", "pytest-mock"] - [[package]] name = "pytest-logging" version = "2015.11.4" @@ -1785,13 +1543,13 @@ rdflib-jsonld = "0.6.1" [[package]] name = "referencing" -version = "0.31.1" +version = "0.32.1" description = "JSON Referencing + Python" optional = false python-versions = ">=3.8" files = [ - {file = "referencing-0.31.1-py3-none-any.whl", hash = "sha256:c19c4d006f1757e3dd75c4f784d38f8698d87b649c54f9ace14e5e8c9667c01d"}, - {file = "referencing-0.31.1.tar.gz", hash = "sha256:81a1471c68c9d5e3831c30ad1dd9815c45b558e596653db751a2bfdd17b3b9ec"}, + {file = "referencing-0.32.1-py3-none-any.whl", hash = "sha256:7e4dc12271d8e15612bfe35792f5ea1c40970dadf8624602e33db2758f7ee554"}, + {file = "referencing-0.32.1.tar.gz", hash = "sha256:3c57da0513e9563eb7e203ebe9bb3a1b509b042016433bd1e45a2853466c3dd3"}, ] [package.dependencies] @@ -1819,25 +1577,6 @@ urllib3 = ">=1.21.1,<3" socks = ["PySocks (>=1.5.6,!=1.5.7)"] use-chardet-on-py3 = ["chardet (>=3.0.2,<6)"] -[[package]] -name = "requests-mock" -version = "1.11.0" -description = "Mock out responses from the requests package" -optional = false -python-versions = "*" -files = [ - {file = "requests-mock-1.11.0.tar.gz", hash = "sha256:ef10b572b489a5f28e09b708697208c4a3b2b89ef80a9f01584340ea357ec3c4"}, - {file = "requests_mock-1.11.0-py2.py3-none-any.whl", hash = "sha256:f7fae383f228633f6bececebdab236c478ace2284d6292c6e7e2867b9ab74d15"}, -] - -[package.dependencies] -requests = ">=2.3,<3" -six = "*" - -[package.extras] -fixture = ["fixtures"] -test = ["fixtures", "mock", "purl", "pytest", "requests-futures", "sphinx", "testtools"] - [[package]] name = "rfc3339-validator" version = "0.1.4" @@ -1865,110 +1604,110 @@ files = [ [[package]] name = "rpds-py" -version = "0.13.2" +version = "0.16.2" description = "Python bindings to Rust's persistent data structures (rpds)" optional = false python-versions = ">=3.8" files = [ - {file = "rpds_py-0.13.2-cp310-cp310-macosx_10_7_x86_64.whl", hash = "sha256:1ceebd0ae4f3e9b2b6b553b51971921853ae4eebf3f54086be0565d59291e53d"}, - {file = "rpds_py-0.13.2-cp310-cp310-macosx_11_0_arm64.whl", hash = "sha256:46e1ed994a0920f350a4547a38471217eb86f57377e9314fbaaa329b71b7dfe3"}, - {file = "rpds_py-0.13.2-cp310-cp310-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:ee353bb51f648924926ed05e0122b6a0b1ae709396a80eb583449d5d477fcdf7"}, - {file = "rpds_py-0.13.2-cp310-cp310-manylinux_2_17_armv7l.manylinux2014_armv7l.whl", hash = "sha256:530190eb0cd778363bbb7596612ded0bb9fef662daa98e9d92a0419ab27ae914"}, - {file = "rpds_py-0.13.2-cp310-cp310-manylinux_2_17_ppc64le.manylinux2014_ppc64le.whl", hash = "sha256:29d311e44dd16d2434d5506d57ef4d7036544fc3c25c14b6992ef41f541b10fb"}, - {file = "rpds_py-0.13.2-cp310-cp310-manylinux_2_17_s390x.manylinux2014_s390x.whl", hash = "sha256:2e72f750048b32d39e87fc85c225c50b2a6715034848dbb196bf3348aa761fa1"}, - {file = "rpds_py-0.13.2-cp310-cp310-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:db09b98c7540df69d4b47218da3fbd7cb466db0fb932e971c321f1c76f155266"}, - {file = "rpds_py-0.13.2-cp310-cp310-manylinux_2_5_i686.manylinux1_i686.whl", hash = "sha256:2ac26f50736324beb0282c819668328d53fc38543fa61eeea2c32ea8ea6eab8d"}, - {file = "rpds_py-0.13.2-cp310-cp310-musllinux_1_2_aarch64.whl", hash = "sha256:12ecf89bd54734c3c2c79898ae2021dca42750c7bcfb67f8fb3315453738ac8f"}, - {file = "rpds_py-0.13.2-cp310-cp310-musllinux_1_2_i686.whl", hash = "sha256:3a44c8440183b43167fd1a0819e8356692bf5db1ad14ce140dbd40a1485f2dea"}, - {file = "rpds_py-0.13.2-cp310-cp310-musllinux_1_2_x86_64.whl", hash = "sha256:bcef4f2d3dc603150421de85c916da19471f24d838c3c62a4f04c1eb511642c1"}, - {file = "rpds_py-0.13.2-cp310-none-win32.whl", hash = "sha256:ee6faebb265e28920a6f23a7d4c362414b3f4bb30607141d718b991669e49ddc"}, - {file = "rpds_py-0.13.2-cp310-none-win_amd64.whl", hash = "sha256:ac96d67b37f28e4b6ecf507c3405f52a40658c0a806dffde624a8fcb0314d5fd"}, - {file = "rpds_py-0.13.2-cp311-cp311-macosx_10_7_x86_64.whl", hash = "sha256:b5f6328e8e2ae8238fc767703ab7b95785521c42bb2b8790984e3477d7fa71ad"}, - {file = "rpds_py-0.13.2-cp311-cp311-macosx_11_0_arm64.whl", hash = "sha256:729408136ef8d45a28ee9a7411917c9e3459cf266c7e23c2f7d4bb8ef9e0da42"}, - {file = "rpds_py-0.13.2-cp311-cp311-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:65cfed9c807c27dee76407e8bb29e6f4e391e436774bcc769a037ff25ad8646e"}, - {file = "rpds_py-0.13.2-cp311-cp311-manylinux_2_17_armv7l.manylinux2014_armv7l.whl", hash = "sha256:aefbdc934115d2f9278f153952003ac52cd2650e7313750390b334518c589568"}, - {file = "rpds_py-0.13.2-cp311-cp311-manylinux_2_17_ppc64le.manylinux2014_ppc64le.whl", hash = "sha256:d48db29bd47814671afdd76c7652aefacc25cf96aad6daefa82d738ee87461e2"}, - {file = "rpds_py-0.13.2-cp311-cp311-manylinux_2_17_s390x.manylinux2014_s390x.whl", hash = "sha256:3c55d7f2d817183d43220738270efd3ce4e7a7b7cbdaefa6d551ed3d6ed89190"}, - {file = "rpds_py-0.13.2-cp311-cp311-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:6aadae3042f8e6db3376d9e91f194c606c9a45273c170621d46128f35aef7cd0"}, - {file = "rpds_py-0.13.2-cp311-cp311-manylinux_2_5_i686.manylinux1_i686.whl", hash = "sha256:5feae2f9aa7270e2c071f488fab256d768e88e01b958f123a690f1cc3061a09c"}, - {file = "rpds_py-0.13.2-cp311-cp311-musllinux_1_2_aarch64.whl", hash = "sha256:51967a67ea0d7b9b5cd86036878e2d82c0b6183616961c26d825b8c994d4f2c8"}, - {file = "rpds_py-0.13.2-cp311-cp311-musllinux_1_2_i686.whl", hash = "sha256:4d0c10d803549427f427085ed7aebc39832f6e818a011dcd8785e9c6a1ba9b3e"}, - {file = "rpds_py-0.13.2-cp311-cp311-musllinux_1_2_x86_64.whl", hash = "sha256:603d5868f7419081d616dab7ac3cfa285296735e7350f7b1e4f548f6f953ee7d"}, - {file = "rpds_py-0.13.2-cp311-none-win32.whl", hash = "sha256:b8996ffb60c69f677245f5abdbcc623e9442bcc91ed81b6cd6187129ad1fa3e7"}, - {file = "rpds_py-0.13.2-cp311-none-win_amd64.whl", hash = "sha256:5379e49d7e80dca9811b36894493d1c1ecb4c57de05c36f5d0dd09982af20211"}, - {file = "rpds_py-0.13.2-cp312-cp312-macosx_10_7_x86_64.whl", hash = "sha256:8a776a29b77fe0cc28fedfd87277b0d0f7aa930174b7e504d764e0b43a05f381"}, - {file = "rpds_py-0.13.2-cp312-cp312-macosx_11_0_arm64.whl", hash = "sha256:2a1472956c5bcc49fb0252b965239bffe801acc9394f8b7c1014ae9258e4572b"}, - {file = "rpds_py-0.13.2-cp312-cp312-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:f252dfb4852a527987a9156cbcae3022a30f86c9d26f4f17b8c967d7580d65d2"}, - {file = "rpds_py-0.13.2-cp312-cp312-manylinux_2_17_armv7l.manylinux2014_armv7l.whl", hash = "sha256:f0d320e70b6b2300ff6029e234e79fe44e9dbbfc7b98597ba28e054bd6606a57"}, - {file = "rpds_py-0.13.2-cp312-cp312-manylinux_2_17_ppc64le.manylinux2014_ppc64le.whl", hash = "sha256:ade2ccb937060c299ab0dfb2dea3d2ddf7e098ed63ee3d651ebfc2c8d1e8632a"}, - {file = "rpds_py-0.13.2-cp312-cp312-manylinux_2_17_s390x.manylinux2014_s390x.whl", hash = "sha256:b9d121be0217787a7d59a5c6195b0842d3f701007333426e5154bf72346aa658"}, - {file = "rpds_py-0.13.2-cp312-cp312-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:8fa6bd071ec6d90f6e7baa66ae25820d57a8ab1b0a3c6d3edf1834d4b26fafa2"}, - {file = "rpds_py-0.13.2-cp312-cp312-manylinux_2_5_i686.manylinux1_i686.whl", hash = "sha256:c918621ee0a3d1fe61c313f2489464f2ae3d13633e60f520a8002a5e910982ee"}, - {file = "rpds_py-0.13.2-cp312-cp312-musllinux_1_2_aarch64.whl", hash = "sha256:25b28b3d33ec0a78e944aaaed7e5e2a94ac811bcd68b557ca48a0c30f87497d2"}, - {file = "rpds_py-0.13.2-cp312-cp312-musllinux_1_2_i686.whl", hash = "sha256:31e220a040b89a01505128c2f8a59ee74732f666439a03e65ccbf3824cdddae7"}, - {file = "rpds_py-0.13.2-cp312-cp312-musllinux_1_2_x86_64.whl", hash = "sha256:15253fff410873ebf3cfba1cc686a37711efcd9b8cb30ea21bb14a973e393f60"}, - {file = "rpds_py-0.13.2-cp312-none-win32.whl", hash = "sha256:b981a370f8f41c4024c170b42fbe9e691ae2dbc19d1d99151a69e2c84a0d194d"}, - {file = "rpds_py-0.13.2-cp312-none-win_amd64.whl", hash = "sha256:4c4e314d36d4f31236a545696a480aa04ea170a0b021e9a59ab1ed94d4c3ef27"}, - {file = "rpds_py-0.13.2-cp38-cp38-macosx_10_7_x86_64.whl", hash = "sha256:80e5acb81cb49fd9f2d5c08f8b74ffff14ee73b10ca88297ab4619e946bcb1e1"}, - {file = "rpds_py-0.13.2-cp38-cp38-macosx_11_0_arm64.whl", hash = "sha256:efe093acc43e869348f6f2224df7f452eab63a2c60a6c6cd6b50fd35c4e075ba"}, - {file = "rpds_py-0.13.2-cp38-cp38-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:8c2a61c0e4811012b0ba9f6cdcb4437865df5d29eab5d6018ba13cee1c3064a0"}, - {file = "rpds_py-0.13.2-cp38-cp38-manylinux_2_17_armv7l.manylinux2014_armv7l.whl", hash = "sha256:751758d9dd04d548ec679224cc00e3591f5ebf1ff159ed0d4aba6a0746352452"}, - {file = "rpds_py-0.13.2-cp38-cp38-manylinux_2_17_ppc64le.manylinux2014_ppc64le.whl", hash = "sha256:6ba8858933f0c1a979781272a5f65646fca8c18c93c99c6ddb5513ad96fa54b1"}, - {file = "rpds_py-0.13.2-cp38-cp38-manylinux_2_17_s390x.manylinux2014_s390x.whl", hash = "sha256:bfdfbe6a36bc3059fff845d64c42f2644cf875c65f5005db54f90cdfdf1df815"}, - {file = "rpds_py-0.13.2-cp38-cp38-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:aa0379c1935c44053c98826bc99ac95f3a5355675a297ac9ce0dfad0ce2d50ca"}, - {file = "rpds_py-0.13.2-cp38-cp38-manylinux_2_5_i686.manylinux1_i686.whl", hash = "sha256:d5593855b5b2b73dd8413c3fdfa5d95b99d657658f947ba2c4318591e745d083"}, - {file = "rpds_py-0.13.2-cp38-cp38-musllinux_1_2_aarch64.whl", hash = "sha256:2a7bef6977043673750a88da064fd513f89505111014b4e00fbdd13329cd4e9a"}, - {file = "rpds_py-0.13.2-cp38-cp38-musllinux_1_2_i686.whl", hash = "sha256:3ab96754d23372009638a402a1ed12a27711598dd49d8316a22597141962fe66"}, - {file = "rpds_py-0.13.2-cp38-cp38-musllinux_1_2_x86_64.whl", hash = "sha256:e06cfea0ece444571d24c18ed465bc93afb8c8d8d74422eb7026662f3d3f779b"}, - {file = "rpds_py-0.13.2-cp38-none-win32.whl", hash = "sha256:5493569f861fb7b05af6d048d00d773c6162415ae521b7010197c98810a14cab"}, - {file = "rpds_py-0.13.2-cp38-none-win_amd64.whl", hash = "sha256:b07501b720cf060c5856f7b5626e75b8e353b5f98b9b354a21eb4bfa47e421b1"}, - {file = "rpds_py-0.13.2-cp39-cp39-macosx_10_7_x86_64.whl", hash = "sha256:881df98f0a8404d32b6de0fd33e91c1b90ed1516a80d4d6dc69d414b8850474c"}, - {file = "rpds_py-0.13.2-cp39-cp39-macosx_11_0_arm64.whl", hash = "sha256:d79c159adea0f1f4617f54aa156568ac69968f9ef4d1e5fefffc0a180830308e"}, - {file = "rpds_py-0.13.2-cp39-cp39-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:38d4f822ee2f338febcc85aaa2547eb5ba31ba6ff68d10b8ec988929d23bb6b4"}, - {file = "rpds_py-0.13.2-cp39-cp39-manylinux_2_17_armv7l.manylinux2014_armv7l.whl", hash = "sha256:5d75d6d220d55cdced2f32cc22f599475dbe881229aeddba6c79c2e9df35a2b3"}, - {file = "rpds_py-0.13.2-cp39-cp39-manylinux_2_17_ppc64le.manylinux2014_ppc64le.whl", hash = "sha256:5d97e9ae94fb96df1ee3cb09ca376c34e8a122f36927230f4c8a97f469994bff"}, - {file = "rpds_py-0.13.2-cp39-cp39-manylinux_2_17_s390x.manylinux2014_s390x.whl", hash = "sha256:67a429520e97621a763cf9b3ba27574779c4e96e49a27ff8a1aa99ee70beb28a"}, - {file = "rpds_py-0.13.2-cp39-cp39-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:188435794405c7f0573311747c85a96b63c954a5f2111b1df8018979eca0f2f0"}, - {file = "rpds_py-0.13.2-cp39-cp39-manylinux_2_5_i686.manylinux1_i686.whl", hash = "sha256:38f9bf2ad754b4a45b8210a6c732fe876b8a14e14d5992a8c4b7c1ef78740f53"}, - {file = "rpds_py-0.13.2-cp39-cp39-musllinux_1_2_aarch64.whl", hash = "sha256:a6ba2cb7d676e9415b9e9ac7e2aae401dc1b1e666943d1f7bc66223d3d73467b"}, - {file = "rpds_py-0.13.2-cp39-cp39-musllinux_1_2_i686.whl", hash = "sha256:eaffbd8814bb1b5dc3ea156a4c5928081ba50419f9175f4fc95269e040eff8f0"}, - {file = "rpds_py-0.13.2-cp39-cp39-musllinux_1_2_x86_64.whl", hash = "sha256:5a4c1058cdae6237d97af272b326e5f78ee7ee3bbffa6b24b09db4d828810468"}, - {file = "rpds_py-0.13.2-cp39-none-win32.whl", hash = "sha256:b5267feb19070bef34b8dea27e2b504ebd9d31748e3ecacb3a4101da6fcb255c"}, - {file = "rpds_py-0.13.2-cp39-none-win_amd64.whl", hash = "sha256:ddf23960cb42b69bce13045d5bc66f18c7d53774c66c13f24cf1b9c144ba3141"}, - {file = "rpds_py-0.13.2-pp310-pypy310_pp73-macosx_10_7_x86_64.whl", hash = "sha256:97163a1ab265a1073a6372eca9f4eeb9f8c6327457a0b22ddfc4a17dcd613e74"}, - {file = "rpds_py-0.13.2-pp310-pypy310_pp73-macosx_11_0_arm64.whl", hash = "sha256:25ea41635d22b2eb6326f58e608550e55d01df51b8a580ea7e75396bafbb28e9"}, - {file = "rpds_py-0.13.2-pp310-pypy310_pp73-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:76d59d4d451ba77f08cb4cd9268dec07be5bc65f73666302dbb5061989b17198"}, - {file = "rpds_py-0.13.2-pp310-pypy310_pp73-manylinux_2_17_armv7l.manylinux2014_armv7l.whl", hash = "sha256:e7c564c58cf8f248fe859a4f0fe501b050663f3d7fbc342172f259124fb59933"}, - {file = "rpds_py-0.13.2-pp310-pypy310_pp73-manylinux_2_17_ppc64le.manylinux2014_ppc64le.whl", hash = "sha256:61dbc1e01dc0c5875da2f7ae36d6e918dc1b8d2ce04e871793976594aad8a57a"}, - {file = "rpds_py-0.13.2-pp310-pypy310_pp73-manylinux_2_17_s390x.manylinux2014_s390x.whl", hash = "sha256:fdb82eb60d31b0c033a8e8ee9f3fc7dfbaa042211131c29da29aea8531b4f18f"}, - {file = "rpds_py-0.13.2-pp310-pypy310_pp73-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:d204957169f0b3511fb95395a9da7d4490fb361763a9f8b32b345a7fe119cb45"}, - {file = "rpds_py-0.13.2-pp310-pypy310_pp73-manylinux_2_5_i686.manylinux1_i686.whl", hash = "sha256:c45008ca79bad237cbc03c72bc5205e8c6f66403773929b1b50f7d84ef9e4d07"}, - {file = "rpds_py-0.13.2-pp310-pypy310_pp73-musllinux_1_2_aarch64.whl", hash = "sha256:79bf58c08f0756adba691d480b5a20e4ad23f33e1ae121584cf3a21717c36dfa"}, - {file = "rpds_py-0.13.2-pp310-pypy310_pp73-musllinux_1_2_i686.whl", hash = "sha256:e86593bf8637659e6a6ed58854b6c87ec4e9e45ee8a4adfd936831cef55c2d21"}, - {file = "rpds_py-0.13.2-pp310-pypy310_pp73-musllinux_1_2_x86_64.whl", hash = "sha256:d329896c40d9e1e5c7715c98529e4a188a1f2df51212fd65102b32465612b5dc"}, - {file = "rpds_py-0.13.2-pp38-pypy38_pp73-macosx_10_7_x86_64.whl", hash = "sha256:4a5375c5fff13f209527cd886dc75394f040c7d1ecad0a2cb0627f13ebe78a12"}, - {file = "rpds_py-0.13.2-pp38-pypy38_pp73-macosx_11_0_arm64.whl", hash = "sha256:06d218e4464d31301e943b65b2c6919318ea6f69703a351961e1baaf60347276"}, - {file = "rpds_py-0.13.2-pp38-pypy38_pp73-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:c1f41d32a2ddc5a94df4b829b395916a4b7f103350fa76ba6de625fcb9e773ac"}, - {file = "rpds_py-0.13.2-pp38-pypy38_pp73-manylinux_2_17_armv7l.manylinux2014_armv7l.whl", hash = "sha256:6bc568b05e02cd612be53900c88aaa55012e744930ba2eeb56279db4c6676eb3"}, - {file = "rpds_py-0.13.2-pp38-pypy38_pp73-manylinux_2_17_ppc64le.manylinux2014_ppc64le.whl", hash = "sha256:9d94d78418203904730585efa71002286ac4c8ac0689d0eb61e3c465f9e608ff"}, - {file = "rpds_py-0.13.2-pp38-pypy38_pp73-manylinux_2_17_s390x.manylinux2014_s390x.whl", hash = "sha256:bed0252c85e21cf73d2d033643c945b460d6a02fc4a7d644e3b2d6f5f2956c64"}, - {file = "rpds_py-0.13.2-pp38-pypy38_pp73-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:244e173bb6d8f3b2f0c4d7370a1aa341f35da3e57ffd1798e5b2917b91731fd3"}, - {file = "rpds_py-0.13.2-pp38-pypy38_pp73-manylinux_2_5_i686.manylinux1_i686.whl", hash = "sha256:7f55cd9cf1564b7b03f238e4c017ca4794c05b01a783e9291065cb2858d86ce4"}, - {file = "rpds_py-0.13.2-pp38-pypy38_pp73-musllinux_1_2_aarch64.whl", hash = "sha256:f03a1b3a4c03e3e0161642ac5367f08479ab29972ea0ffcd4fa18f729cd2be0a"}, - {file = "rpds_py-0.13.2-pp38-pypy38_pp73-musllinux_1_2_i686.whl", hash = "sha256:f5f4424cb87a20b016bfdc157ff48757b89d2cc426256961643d443c6c277007"}, - {file = "rpds_py-0.13.2-pp38-pypy38_pp73-musllinux_1_2_x86_64.whl", hash = "sha256:c82bbf7e03748417c3a88c1b0b291288ce3e4887a795a3addaa7a1cfd9e7153e"}, - {file = "rpds_py-0.13.2-pp39-pypy39_pp73-macosx_10_7_x86_64.whl", hash = "sha256:c0095b8aa3e432e32d372e9a7737e65b58d5ed23b9620fea7cb81f17672f1fa1"}, - {file = "rpds_py-0.13.2-pp39-pypy39_pp73-macosx_11_0_arm64.whl", hash = "sha256:4c2d26aa03d877c9730bf005621c92da263523a1e99247590abbbe252ccb7824"}, - {file = "rpds_py-0.13.2-pp39-pypy39_pp73-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:96f2975fb14f39c5fe75203f33dd3010fe37d1c4e33177feef1107b5ced750e3"}, - {file = "rpds_py-0.13.2-pp39-pypy39_pp73-manylinux_2_17_armv7l.manylinux2014_armv7l.whl", hash = "sha256:4dcc5ee1d0275cb78d443fdebd0241e58772a354a6d518b1d7af1580bbd2c4e8"}, - {file = "rpds_py-0.13.2-pp39-pypy39_pp73-manylinux_2_17_ppc64le.manylinux2014_ppc64le.whl", hash = "sha256:61d42d2b08430854485135504f672c14d4fc644dd243a9c17e7c4e0faf5ed07e"}, - {file = "rpds_py-0.13.2-pp39-pypy39_pp73-manylinux_2_17_s390x.manylinux2014_s390x.whl", hash = "sha256:d3a61e928feddc458a55110f42f626a2a20bea942ccedb6fb4cee70b4830ed41"}, - {file = "rpds_py-0.13.2-pp39-pypy39_pp73-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:7de12b69d95072394998c622cfd7e8cea8f560db5fca6a62a148f902a1029f8b"}, - {file = "rpds_py-0.13.2-pp39-pypy39_pp73-manylinux_2_5_i686.manylinux1_i686.whl", hash = "sha256:87a90f5545fd61f6964e65eebde4dc3fa8660bb7d87adb01d4cf17e0a2b484ad"}, - {file = "rpds_py-0.13.2-pp39-pypy39_pp73-musllinux_1_2_aarch64.whl", hash = "sha256:9c95a1a290f9acf7a8f2ebbdd183e99215d491beea52d61aa2a7a7d2c618ddc6"}, - {file = "rpds_py-0.13.2-pp39-pypy39_pp73-musllinux_1_2_i686.whl", hash = "sha256:35f53c76a712e323c779ca39b9a81b13f219a8e3bc15f106ed1e1462d56fcfe9"}, - {file = "rpds_py-0.13.2-pp39-pypy39_pp73-musllinux_1_2_x86_64.whl", hash = "sha256:96fb0899bb2ab353f42e5374c8f0789f54e0a94ef2f02b9ac7149c56622eaf31"}, - {file = "rpds_py-0.13.2.tar.gz", hash = "sha256:f8eae66a1304de7368932b42d801c67969fd090ddb1a7a24f27b435ed4bed68f"}, + {file = "rpds_py-0.16.2-cp310-cp310-macosx_10_12_x86_64.whl", hash = "sha256:509b617ac787cd1149600e731db9274ebbef094503ca25158e6f23edaba1ca8f"}, + {file = "rpds_py-0.16.2-cp310-cp310-macosx_11_0_arm64.whl", hash = "sha256:413b9c17388bbd0d87a329d8e30c1a4c6e44e2bb25457f43725a8e6fe4161e9e"}, + {file = "rpds_py-0.16.2-cp310-cp310-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:2946b120718eba9af2b4dd103affc1164a87b9e9ebff8c3e4c05d7b7a7e274e2"}, + {file = "rpds_py-0.16.2-cp310-cp310-manylinux_2_17_armv7l.manylinux2014_armv7l.whl", hash = "sha256:35ae5ece284cf36464eb160880018cf6088a9ac5ddc72292a6092b6ef3f4da53"}, + {file = "rpds_py-0.16.2-cp310-cp310-manylinux_2_17_ppc64le.manylinux2014_ppc64le.whl", hash = "sha256:3dc6a7620ba7639a3db6213da61312cb4aa9ac0ca6e00dc1cbbdc21c2aa6eb57"}, + {file = "rpds_py-0.16.2-cp310-cp310-manylinux_2_17_s390x.manylinux2014_s390x.whl", hash = "sha256:8cb6fe8ecdfffa0e711a75c931fb39f4ba382b4b3ccedeca43f18693864fe850"}, + {file = "rpds_py-0.16.2-cp310-cp310-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:6dace7b26a13353e24613417ce2239491b40a6ad44e5776a18eaff7733488b44"}, + {file = "rpds_py-0.16.2-cp310-cp310-manylinux_2_5_i686.manylinux1_i686.whl", hash = "sha256:1bdbc5fcb04a7309074de6b67fa9bc4b418ab3fc435fec1f2779a0eced688d04"}, + {file = "rpds_py-0.16.2-cp310-cp310-musllinux_1_2_aarch64.whl", hash = "sha256:f42e25c016927e2a6b1ce748112c3ab134261fc2ddc867e92d02006103e1b1b7"}, + {file = "rpds_py-0.16.2-cp310-cp310-musllinux_1_2_i686.whl", hash = "sha256:eab36eae3f3e8e24b05748ec9acc66286662f5d25c52ad70cadab544e034536b"}, + {file = "rpds_py-0.16.2-cp310-cp310-musllinux_1_2_x86_64.whl", hash = "sha256:0474df4ade9a3b4af96c3d36eb81856cb9462e4c6657d4caecfd840d2a13f3c9"}, + {file = "rpds_py-0.16.2-cp310-none-win32.whl", hash = "sha256:84c5a4d1f9dd7e2d2c44097fb09fffe728629bad31eb56caf97719e55575aa82"}, + {file = "rpds_py-0.16.2-cp310-none-win_amd64.whl", hash = "sha256:2bd82db36cd70b3628c0c57d81d2438e8dd4b7b32a6a9f25f24ab0e657cb6c4e"}, + {file = "rpds_py-0.16.2-cp311-cp311-macosx_10_12_x86_64.whl", hash = "sha256:adc0c3d6fc6ae35fee3e4917628983f6ce630d513cbaad575b4517d47e81b4bb"}, + {file = "rpds_py-0.16.2-cp311-cp311-macosx_11_0_arm64.whl", hash = "sha256:ec23fcad480e77ede06cf4127a25fc440f7489922e17fc058f426b5256ee0edb"}, + {file = "rpds_py-0.16.2-cp311-cp311-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:07aab64e2808c3ebac2a44f67e9dc0543812b715126dfd6fe4264df527556cb6"}, + {file = "rpds_py-0.16.2-cp311-cp311-manylinux_2_17_armv7l.manylinux2014_armv7l.whl", hash = "sha256:a4ebb8b20bd09c5ce7884c8f0388801100f5e75e7f733b1b6613c713371feefc"}, + {file = "rpds_py-0.16.2-cp311-cp311-manylinux_2_17_ppc64le.manylinux2014_ppc64le.whl", hash = "sha256:a3d7e2ea25d3517c6d7e5a1cc3702cffa6bd18d9ef8d08d9af6717fc1c700eed"}, + {file = "rpds_py-0.16.2-cp311-cp311-manylinux_2_17_s390x.manylinux2014_s390x.whl", hash = "sha256:f28ac0e8e7242d140f99402a903a2c596ab71550272ae9247ad78f9a932b5698"}, + {file = "rpds_py-0.16.2-cp311-cp311-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:19f00f57fdd38db4bb5ad09f9ead1b535332dbf624200e9029a45f1f35527ebb"}, + {file = "rpds_py-0.16.2-cp311-cp311-manylinux_2_5_i686.manylinux1_i686.whl", hash = "sha256:3da5a4c56953bdbf6d04447c3410309616c54433146ccdb4a277b9cb499bc10e"}, + {file = "rpds_py-0.16.2-cp311-cp311-musllinux_1_2_aarch64.whl", hash = "sha256:ec2e1cf025b2c0f48ec17ff3e642661da7ee332d326f2e6619366ce8e221f018"}, + {file = "rpds_py-0.16.2-cp311-cp311-musllinux_1_2_i686.whl", hash = "sha256:e0441fb4fdd39a230477b2ca9be90868af64425bfe7b122b57e61e45737a653b"}, + {file = "rpds_py-0.16.2-cp311-cp311-musllinux_1_2_x86_64.whl", hash = "sha256:9f0350ef2fba5f34eb0c9000ea328e51b9572b403d2f7f3b19f24085f6f598e8"}, + {file = "rpds_py-0.16.2-cp311-none-win32.whl", hash = "sha256:5a80e2f83391ad0808b4646732af2a7b67550b98f0cae056cb3b40622a83dbb3"}, + {file = "rpds_py-0.16.2-cp311-none-win_amd64.whl", hash = "sha256:e04e56b4ca7a770593633556e8e9e46579d66ec2ada846b401252a2bdcf70a6d"}, + {file = "rpds_py-0.16.2-cp312-cp312-macosx_10_12_x86_64.whl", hash = "sha256:5e6caa3809e50690bd92fa490f5c38caa86082c8c3315aa438bce43786d5e90d"}, + {file = "rpds_py-0.16.2-cp312-cp312-macosx_11_0_arm64.whl", hash = "sha256:2e53b9b25cac9065328901713a7e9e3b12e4f57ef4280b370fbbf6fef2052eef"}, + {file = "rpds_py-0.16.2-cp312-cp312-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:af27423662f32d7501a00c5e7342f7dbd1e4a718aea7a239781357d15d437133"}, + {file = "rpds_py-0.16.2-cp312-cp312-manylinux_2_17_armv7l.manylinux2014_armv7l.whl", hash = "sha256:43d4dd5fb16eb3825742bad8339d454054261ab59fed2fbac84e1d84d5aae7ba"}, + {file = "rpds_py-0.16.2-cp312-cp312-manylinux_2_17_ppc64le.manylinux2014_ppc64le.whl", hash = "sha256:e061de3b745fe611e23cd7318aec2c8b0e4153939c25c9202a5811ca911fd733"}, + {file = "rpds_py-0.16.2-cp312-cp312-manylinux_2_17_s390x.manylinux2014_s390x.whl", hash = "sha256:3b811d182ad17ea294f2ec63c0621e7be92a1141e1012383461872cead87468f"}, + {file = "rpds_py-0.16.2-cp312-cp312-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:5552f328eaef1a75ff129d4d0c437bf44e43f9436d3996e8eab623ea0f5fcf73"}, + {file = "rpds_py-0.16.2-cp312-cp312-manylinux_2_5_i686.manylinux1_i686.whl", hash = "sha256:dcbe1f8dd179e4d69b70b1f1d9bb6fd1e7e1bdc9c9aad345cdeb332e29d40748"}, + {file = "rpds_py-0.16.2-cp312-cp312-musllinux_1_2_aarch64.whl", hash = "sha256:8aad80645a011abae487d356e0ceb359f4938dfb6f7bcc410027ed7ae4f7bb8b"}, + {file = "rpds_py-0.16.2-cp312-cp312-musllinux_1_2_i686.whl", hash = "sha256:b6f5549d6ed1da9bfe3631ca9483ae906f21410be2445b73443fa9f017601c6f"}, + {file = "rpds_py-0.16.2-cp312-cp312-musllinux_1_2_x86_64.whl", hash = "sha256:d452817e0d9c749c431a1121d56a777bd7099b720b3d1c820f1725cb40928f58"}, + {file = "rpds_py-0.16.2-cp312-none-win32.whl", hash = "sha256:888a97002e986eca10d8546e3c8b97da1d47ad8b69726dcfeb3e56348ebb28a3"}, + {file = "rpds_py-0.16.2-cp312-none-win_amd64.whl", hash = "sha256:d8dda2a806dfa4a9b795950c4f5cc56d6d6159f7d68080aedaff3bdc9b5032f5"}, + {file = "rpds_py-0.16.2-cp38-cp38-macosx_10_12_x86_64.whl", hash = "sha256:071980663c273bf3d388fe5c794c547e6f35ba3335477072c713a3176bf14a60"}, + {file = "rpds_py-0.16.2-cp38-cp38-macosx_11_0_arm64.whl", hash = "sha256:726ac36e8a3bb8daef2fd482534cabc5e17334052447008405daca7ca04a3108"}, + {file = "rpds_py-0.16.2-cp38-cp38-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:e9e557db6a177470316c82f023e5d571811c9a4422b5ea084c85da9aa3c035fc"}, + {file = "rpds_py-0.16.2-cp38-cp38-manylinux_2_17_armv7l.manylinux2014_armv7l.whl", hash = "sha256:90123853fc8b1747f80b0d354be3d122b4365a93e50fc3aacc9fb4c2488845d6"}, + {file = "rpds_py-0.16.2-cp38-cp38-manylinux_2_17_ppc64le.manylinux2014_ppc64le.whl", hash = "sha256:a61f659665a39a4d17d699ab3593d7116d66e1e2e3f03ef3fb8f484e91908808"}, + {file = "rpds_py-0.16.2-cp38-cp38-manylinux_2_17_s390x.manylinux2014_s390x.whl", hash = "sha256:cc97f0640e91d7776530f06e6836c546c1c752a52de158720c4224c9e8053cad"}, + {file = "rpds_py-0.16.2-cp38-cp38-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:44a54e99a2b9693a37ebf245937fd6e9228b4cbd64b9cc961e1f3391ec6c7391"}, + {file = "rpds_py-0.16.2-cp38-cp38-manylinux_2_5_i686.manylinux1_i686.whl", hash = "sha256:bd4b677d929cf1f6bac07ad76e0f2d5de367e6373351c01a9c0a39f6b21b4a8b"}, + {file = "rpds_py-0.16.2-cp38-cp38-musllinux_1_2_aarch64.whl", hash = "sha256:5ef00873303d678aaf8b0627e111fd434925ca01c657dbb2641410f1cdaef261"}, + {file = "rpds_py-0.16.2-cp38-cp38-musllinux_1_2_i686.whl", hash = "sha256:349cb40897fd529ca15317c22c0eab67f5ac5178b5bd2c6adc86172045210acc"}, + {file = "rpds_py-0.16.2-cp38-cp38-musllinux_1_2_x86_64.whl", hash = "sha256:2ddef620e70eaffebed5932ce754d539c0930f676aae6212f8e16cd9743dd365"}, + {file = "rpds_py-0.16.2-cp38-none-win32.whl", hash = "sha256:882ce6e25e585949c3d9f9abd29202367175e0aab3aba0c58c9abbb37d4982ff"}, + {file = "rpds_py-0.16.2-cp38-none-win_amd64.whl", hash = "sha256:f4bd4578e44f26997e9e56c96dedc5f1af43cc9d16c4daa29c771a00b2a26851"}, + {file = "rpds_py-0.16.2-cp39-cp39-macosx_10_12_x86_64.whl", hash = "sha256:69ac7ea9897ec201ce68b48582f3eb34a3f9924488a5432a93f177bf76a82a7e"}, + {file = "rpds_py-0.16.2-cp39-cp39-macosx_11_0_arm64.whl", hash = "sha256:a9880b4656efe36ccad41edc66789e191e5ee19a1ea8811e0aed6f69851a82f4"}, + {file = "rpds_py-0.16.2-cp39-cp39-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:ee94cb58c0ba2c62ee108c2b7c9131b2c66a29e82746e8fa3aa1a1effbd3dcf1"}, + {file = "rpds_py-0.16.2-cp39-cp39-manylinux_2_17_armv7l.manylinux2014_armv7l.whl", hash = "sha256:24f7a2eb3866a9e91f4599851e0c8d39878a470044875c49bd528d2b9b88361c"}, + {file = "rpds_py-0.16.2-cp39-cp39-manylinux_2_17_ppc64le.manylinux2014_ppc64le.whl", hash = "sha256:ca57468da2d9a660bcf8961637c85f2fbb2aa64d9bc3f9484e30c3f9f67b1dd7"}, + {file = "rpds_py-0.16.2-cp39-cp39-manylinux_2_17_s390x.manylinux2014_s390x.whl", hash = "sha256:ccd4e400309e1f34a5095bf9249d371f0fd60f8a3a5c4a791cad7b99ce1fd38d"}, + {file = "rpds_py-0.16.2-cp39-cp39-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:80443fe2f7b3ea3934c5d75fb0e04a5dbb4a8e943e5ff2de0dec059202b70a8b"}, + {file = "rpds_py-0.16.2-cp39-cp39-manylinux_2_5_i686.manylinux1_i686.whl", hash = "sha256:4d6a9f052e72d493efd92a77f861e45bab2f6be63e37fa8ecf0c6fd1a58fedb0"}, + {file = "rpds_py-0.16.2-cp39-cp39-musllinux_1_2_aarch64.whl", hash = "sha256:35953f4f2b3216421af86fd236b7c0c65935936a94ea83ddbd4904ba60757773"}, + {file = "rpds_py-0.16.2-cp39-cp39-musllinux_1_2_i686.whl", hash = "sha256:981d135c7cdaf6cd8eadae1c950de43b976de8f09d8e800feed307140d3d6d00"}, + {file = "rpds_py-0.16.2-cp39-cp39-musllinux_1_2_x86_64.whl", hash = "sha256:d0dd7ed2f16df2e129496e7fbe59a34bc2d7fc8db443a606644d069eb69cbd45"}, + {file = "rpds_py-0.16.2-cp39-none-win32.whl", hash = "sha256:703d95c75a72e902544fda08e965885525e297578317989fd15a6ce58414b41d"}, + {file = "rpds_py-0.16.2-cp39-none-win_amd64.whl", hash = "sha256:e93ec1b300acf89730cf27975ef574396bc04edecc358e9bd116fb387a123239"}, + {file = "rpds_py-0.16.2-pp310-pypy310_pp73-macosx_10_12_x86_64.whl", hash = "sha256:44627b6ca7308680a70766454db5249105fa6344853af6762eaad4158a2feebe"}, + {file = "rpds_py-0.16.2-pp310-pypy310_pp73-macosx_11_0_arm64.whl", hash = "sha256:3f91df8e6dbb7360e176d1affd5fb0246d2b88d16aa5ebc7db94fd66b68b61da"}, + {file = "rpds_py-0.16.2-pp310-pypy310_pp73-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:6d904c5693e08bad240f16d79305edba78276be87061c872a4a15e2c301fa2c0"}, + {file = "rpds_py-0.16.2-pp310-pypy310_pp73-manylinux_2_17_armv7l.manylinux2014_armv7l.whl", hash = "sha256:290a81cfbe4673285cdf140ec5cd1658ffbf63ab359f2b352ebe172e7cfa5bf0"}, + {file = "rpds_py-0.16.2-pp310-pypy310_pp73-manylinux_2_17_ppc64le.manylinux2014_ppc64le.whl", hash = "sha256:b634c5ec0103c5cbebc24ebac4872b045cccb9456fc59efdcf6fe39775365bd2"}, + {file = "rpds_py-0.16.2-pp310-pypy310_pp73-manylinux_2_17_s390x.manylinux2014_s390x.whl", hash = "sha256:a297a4d08cc67c7466c873c78039d87840fb50d05473db0ec1b7b03d179bf322"}, + {file = "rpds_py-0.16.2-pp310-pypy310_pp73-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:b2e75e17bd0bb66ee34a707da677e47c14ee51ccef78ed6a263a4cc965a072a1"}, + {file = "rpds_py-0.16.2-pp310-pypy310_pp73-manylinux_2_5_i686.manylinux1_i686.whl", hash = "sha256:f1b9d9260e06ea017feb7172976ab261e011c1dc2f8883c7c274f6b2aabfe01a"}, + {file = "rpds_py-0.16.2-pp310-pypy310_pp73-musllinux_1_2_aarch64.whl", hash = "sha256:162d7cd9cd311c1b0ff1c55a024b8f38bd8aad1876b648821da08adc40e95734"}, + {file = "rpds_py-0.16.2-pp310-pypy310_pp73-musllinux_1_2_i686.whl", hash = "sha256:9b32f742ce5b57201305f19c2ef7a184b52f6f9ba6871cc042c2a61f0d6b49b8"}, + {file = "rpds_py-0.16.2-pp310-pypy310_pp73-musllinux_1_2_x86_64.whl", hash = "sha256:ac08472f41ea77cd6a5dae36ae7d4ed3951d6602833af87532b556c1b4601d63"}, + {file = "rpds_py-0.16.2-pp38-pypy38_pp73-macosx_10_12_x86_64.whl", hash = "sha256:495a14b72bbe217f2695dcd9b5ab14d4f8066a00f5d209ed94f0aca307f85f6e"}, + {file = "rpds_py-0.16.2-pp38-pypy38_pp73-macosx_11_0_arm64.whl", hash = "sha256:8d6b6937ae9eac6d6c0ca3c42774d89fa311f55adff3970fb364b34abde6ed3d"}, + {file = "rpds_py-0.16.2-pp38-pypy38_pp73-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:6a61226465bda9283686db8f17d02569a98e4b13c637be5a26d44aa1f1e361c2"}, + {file = "rpds_py-0.16.2-pp38-pypy38_pp73-manylinux_2_17_armv7l.manylinux2014_armv7l.whl", hash = "sha256:5cf6af100ffb5c195beec11ffaa8cf8523057f123afa2944e6571d54da84cdc9"}, + {file = "rpds_py-0.16.2-pp38-pypy38_pp73-manylinux_2_17_ppc64le.manylinux2014_ppc64le.whl", hash = "sha256:6df15846ee3fb2e6397fe25d7ca6624af9f89587f3f259d177b556fed6bebe2c"}, + {file = "rpds_py-0.16.2-pp38-pypy38_pp73-manylinux_2_17_s390x.manylinux2014_s390x.whl", hash = "sha256:1be2f033df1b8be8c3167ba3c29d5dca425592ee31e35eac52050623afba5772"}, + {file = "rpds_py-0.16.2-pp38-pypy38_pp73-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:96f957d6ab25a78b9e7fc9749d754b98eac825a112b4e666525ce89afcbd9ed5"}, + {file = "rpds_py-0.16.2-pp38-pypy38_pp73-manylinux_2_5_i686.manylinux1_i686.whl", hash = "sha256:088396c7c70e59872f67462fcac3ecbded5233385797021976a09ebd55961dfe"}, + {file = "rpds_py-0.16.2-pp38-pypy38_pp73-musllinux_1_2_aarch64.whl", hash = "sha256:4c46ad6356e1561f2a54f08367d1d2e70a0a1bb2db2282d2c1972c1d38eafc3b"}, + {file = "rpds_py-0.16.2-pp38-pypy38_pp73-musllinux_1_2_i686.whl", hash = "sha256:47713dc4fce213f5c74ca8a1f6a59b622fc1b90868deb8e8e4d993e421b4b39d"}, + {file = "rpds_py-0.16.2-pp38-pypy38_pp73-musllinux_1_2_x86_64.whl", hash = "sha256:f811771019f063bbd0aa7bb72c8a934bc13ebacb4672d712fc1639cfd314cccc"}, + {file = "rpds_py-0.16.2-pp39-pypy39_pp73-macosx_10_12_x86_64.whl", hash = "sha256:f19afcfc0dd0dca35694df441e9b0f95bc231b512f51bded3c3d8ca32153ec19"}, + {file = "rpds_py-0.16.2-pp39-pypy39_pp73-macosx_11_0_arm64.whl", hash = "sha256:a4b682c5775d6a3d21e314c10124599976809455ee67020e8e72df1769b87bc3"}, + {file = "rpds_py-0.16.2-pp39-pypy39_pp73-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:c647ca87fc0ebe808a41de912e9a1bfef9acb85257e5d63691364ac16b81c1f0"}, + {file = "rpds_py-0.16.2-pp39-pypy39_pp73-manylinux_2_17_armv7l.manylinux2014_armv7l.whl", hash = "sha256:302bd4983bbd47063e452c38be66153760112f6d3635c7eeefc094299fa400a9"}, + {file = "rpds_py-0.16.2-pp39-pypy39_pp73-manylinux_2_17_ppc64le.manylinux2014_ppc64le.whl", hash = "sha256:bf721ede3eb7b829e4a9b8142bd55db0bdc82902720548a703f7e601ee13bdc3"}, + {file = "rpds_py-0.16.2-pp39-pypy39_pp73-manylinux_2_17_s390x.manylinux2014_s390x.whl", hash = "sha256:358dafc89ce3894c7f486c615ba914609f38277ef67f566abc4c854d23b997fa"}, + {file = "rpds_py-0.16.2-pp39-pypy39_pp73-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:cad0f59ee3dc35526039f4bc23642d52d5f6616b5f687d846bfc6d0d6d486db0"}, + {file = "rpds_py-0.16.2-pp39-pypy39_pp73-manylinux_2_5_i686.manylinux1_i686.whl", hash = "sha256:cffa76b385dfe1e38527662a302b19ffb0e7f5cf7dd5e89186d2c94a22dd9d0c"}, + {file = "rpds_py-0.16.2-pp39-pypy39_pp73-musllinux_1_2_aarch64.whl", hash = "sha256:83640a5d7cd3bff694747d50436b8b541b5b9b9782b0c8c1688931d6ee1a1f2d"}, + {file = "rpds_py-0.16.2-pp39-pypy39_pp73-musllinux_1_2_i686.whl", hash = "sha256:ed99b4f7179d2111702020fd7d156e88acd533f5a7d3971353e568b6051d5c97"}, + {file = "rpds_py-0.16.2-pp39-pypy39_pp73-musllinux_1_2_x86_64.whl", hash = "sha256:4022b9dc620e14f30201a8a73898a873c8e910cb642bcd2f3411123bc527f6ac"}, + {file = "rpds_py-0.16.2.tar.gz", hash = "sha256:781ef8bfc091b19960fc0142a23aedadafa826bc32b433fdfe6fd7f964d7ef44"}, ] [[package]] @@ -2048,36 +1787,15 @@ files = [ {file = "ruamel.yaml.clib-0.2.8.tar.gz", hash = "sha256:beb2e0404003de9a4cab9753a8805a8fe9320ee6673136ed7f04255fe60bb512"}, ] -[[package]] -name = "semver" -version = "3.0.2" -description = "Python helper for Semantic Versioning (https://semver.org)" -optional = false -python-versions = ">=3.7" -files = [ - {file = "semver-3.0.2-py3-none-any.whl", hash = "sha256:b1ea4686fe70b981f85359eda33199d60c53964284e0cfb4977d243e37cf4bf4"}, - {file = "semver-3.0.2.tar.gz", hash = "sha256:6253adb39c70f6e51afed2fa7152bcd414c411286088fb4b9effb133885ab4cc"}, -] - -[[package]] -name = "sentinels" -version = "1.0.0" -description = "Various objects to denote special meanings in python" -optional = false -python-versions = "*" -files = [ - {file = "sentinels-1.0.0.tar.gz", hash = "sha256:7be0704d7fe1925e397e92d18669ace2f619c92b5d4eb21a89f31e026f9ff4b1"}, -] - [[package]] name = "setuptools" -version = "69.0.2" +version = "69.0.3" description = "Easily download, build, install, upgrade, and uninstall Python packages" optional = false python-versions = ">=3.8" files = [ - {file = "setuptools-69.0.2-py3-none-any.whl", hash = "sha256:1e8fdff6797d3865f37397be788a4e3cba233608e9b509382a2777d25ebde7f2"}, - {file = "setuptools-69.0.2.tar.gz", hash = "sha256:735896e78a4742605974de002ac60562d286fa8051a7e2299445e8e8fbb01aa6"}, + {file = "setuptools-69.0.3-py3-none-any.whl", hash = "sha256:385eb4edd9c9d5c17540511303e39a147ce2fc04bc55289c322b9e5904fe2c05"}, + {file = "setuptools-69.0.3.tar.gz", hash = "sha256:be1af57fc409f93647f2e8e4573a142ed38724b8cdd389706a867bb4efcf1e78"}, ] [package.extras] @@ -2159,70 +1877,70 @@ pandas = ["pandas (>=1.3.5)"] [[package]] name = "sqlalchemy" -version = "2.0.23" +version = "2.0.25" description = "Database Abstraction Library" optional = false python-versions = ">=3.7" files = [ - {file = "SQLAlchemy-2.0.23-cp310-cp310-macosx_10_9_x86_64.whl", hash = "sha256:638c2c0b6b4661a4fd264f6fb804eccd392745c5887f9317feb64bb7cb03b3ea"}, - {file = "SQLAlchemy-2.0.23-cp310-cp310-macosx_11_0_arm64.whl", hash = "sha256:e3b5036aa326dc2df50cba3c958e29b291a80f604b1afa4c8ce73e78e1c9f01d"}, - {file = "SQLAlchemy-2.0.23-cp310-cp310-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:787af80107fb691934a01889ca8f82a44adedbf5ef3d6ad7d0f0b9ac557e0c34"}, - {file = "SQLAlchemy-2.0.23-cp310-cp310-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:c14eba45983d2f48f7546bb32b47937ee2cafae353646295f0e99f35b14286ab"}, - {file = "SQLAlchemy-2.0.23-cp310-cp310-musllinux_1_1_aarch64.whl", hash = "sha256:0666031df46b9badba9bed00092a1ffa3aa063a5e68fa244acd9f08070e936d3"}, - {file = "SQLAlchemy-2.0.23-cp310-cp310-musllinux_1_1_x86_64.whl", hash = "sha256:89a01238fcb9a8af118eaad3ffcc5dedaacbd429dc6fdc43fe430d3a941ff965"}, - {file = "SQLAlchemy-2.0.23-cp310-cp310-win32.whl", hash = "sha256:cabafc7837b6cec61c0e1e5c6d14ef250b675fa9c3060ed8a7e38653bd732ff8"}, - {file = "SQLAlchemy-2.0.23-cp310-cp310-win_amd64.whl", hash = "sha256:87a3d6b53c39cd173990de2f5f4b83431d534a74f0e2f88bd16eabb5667e65c6"}, - {file = "SQLAlchemy-2.0.23-cp311-cp311-macosx_10_9_x86_64.whl", hash = "sha256:d5578e6863eeb998980c212a39106ea139bdc0b3f73291b96e27c929c90cd8e1"}, - {file = "SQLAlchemy-2.0.23-cp311-cp311-macosx_11_0_arm64.whl", hash = "sha256:62d9e964870ea5ade4bc870ac4004c456efe75fb50404c03c5fd61f8bc669a72"}, - {file = "SQLAlchemy-2.0.23-cp311-cp311-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:c80c38bd2ea35b97cbf7c21aeb129dcbebbf344ee01a7141016ab7b851464f8e"}, - {file = "SQLAlchemy-2.0.23-cp311-cp311-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:75eefe09e98043cff2fb8af9796e20747ae870c903dc61d41b0c2e55128f958d"}, - {file = "SQLAlchemy-2.0.23-cp311-cp311-musllinux_1_1_aarch64.whl", hash = "sha256:bd45a5b6c68357578263d74daab6ff9439517f87da63442d244f9f23df56138d"}, - {file = "SQLAlchemy-2.0.23-cp311-cp311-musllinux_1_1_x86_64.whl", hash = "sha256:a86cb7063e2c9fb8e774f77fbf8475516d270a3e989da55fa05d08089d77f8c4"}, - {file = "SQLAlchemy-2.0.23-cp311-cp311-win32.whl", hash = "sha256:b41f5d65b54cdf4934ecede2f41b9c60c9f785620416e8e6c48349ab18643855"}, - {file = "SQLAlchemy-2.0.23-cp311-cp311-win_amd64.whl", hash = "sha256:9ca922f305d67605668e93991aaf2c12239c78207bca3b891cd51a4515c72e22"}, - {file = "SQLAlchemy-2.0.23-cp312-cp312-macosx_10_9_x86_64.whl", hash = "sha256:d0f7fb0c7527c41fa6fcae2be537ac137f636a41b4c5a4c58914541e2f436b45"}, - {file = "SQLAlchemy-2.0.23-cp312-cp312-macosx_11_0_arm64.whl", hash = "sha256:7c424983ab447dab126c39d3ce3be5bee95700783204a72549c3dceffe0fc8f4"}, - {file = "SQLAlchemy-2.0.23-cp312-cp312-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:f508ba8f89e0a5ecdfd3761f82dda2a3d7b678a626967608f4273e0dba8f07ac"}, - {file = "SQLAlchemy-2.0.23-cp312-cp312-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:6463aa765cf02b9247e38b35853923edbf2f6fd1963df88706bc1d02410a5577"}, - {file = "SQLAlchemy-2.0.23-cp312-cp312-musllinux_1_1_aarch64.whl", hash = "sha256:e599a51acf3cc4d31d1a0cf248d8f8d863b6386d2b6782c5074427ebb7803bda"}, - {file = "SQLAlchemy-2.0.23-cp312-cp312-musllinux_1_1_x86_64.whl", hash = "sha256:fd54601ef9cc455a0c61e5245f690c8a3ad67ddb03d3b91c361d076def0b4c60"}, - {file = "SQLAlchemy-2.0.23-cp312-cp312-win32.whl", hash = "sha256:42d0b0290a8fb0165ea2c2781ae66e95cca6e27a2fbe1016ff8db3112ac1e846"}, - {file = "SQLAlchemy-2.0.23-cp312-cp312-win_amd64.whl", hash = "sha256:227135ef1e48165f37590b8bfc44ed7ff4c074bf04dc8d6f8e7f1c14a94aa6ca"}, - {file = "SQLAlchemy-2.0.23-cp37-cp37m-macosx_10_9_x86_64.whl", hash = "sha256:14aebfe28b99f24f8a4c1346c48bc3d63705b1f919a24c27471136d2f219f02d"}, - {file = "SQLAlchemy-2.0.23-cp37-cp37m-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:3e983fa42164577d073778d06d2cc5d020322425a509a08119bdcee70ad856bf"}, - {file = "SQLAlchemy-2.0.23-cp37-cp37m-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:7e0dc9031baa46ad0dd5a269cb7a92a73284d1309228be1d5935dac8fb3cae24"}, - {file = "SQLAlchemy-2.0.23-cp37-cp37m-musllinux_1_1_aarch64.whl", hash = "sha256:5f94aeb99f43729960638e7468d4688f6efccb837a858b34574e01143cf11f89"}, - {file = "SQLAlchemy-2.0.23-cp37-cp37m-musllinux_1_1_x86_64.whl", hash = "sha256:63bfc3acc970776036f6d1d0e65faa7473be9f3135d37a463c5eba5efcdb24c8"}, - {file = "SQLAlchemy-2.0.23-cp37-cp37m-win32.whl", hash = "sha256:f48ed89dd11c3c586f45e9eec1e437b355b3b6f6884ea4a4c3111a3358fd0c18"}, - {file = "SQLAlchemy-2.0.23-cp37-cp37m-win_amd64.whl", hash = "sha256:1e018aba8363adb0599e745af245306cb8c46b9ad0a6fc0a86745b6ff7d940fc"}, - {file = "SQLAlchemy-2.0.23-cp38-cp38-macosx_10_9_x86_64.whl", hash = "sha256:64ac935a90bc479fee77f9463f298943b0e60005fe5de2aa654d9cdef46c54df"}, - {file = "SQLAlchemy-2.0.23-cp38-cp38-macosx_11_0_arm64.whl", hash = "sha256:c4722f3bc3c1c2fcc3702dbe0016ba31148dd6efcd2a2fd33c1b4897c6a19693"}, - {file = "SQLAlchemy-2.0.23-cp38-cp38-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:4af79c06825e2836de21439cb2a6ce22b2ca129bad74f359bddd173f39582bf5"}, - {file = "SQLAlchemy-2.0.23-cp38-cp38-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:683ef58ca8eea4747737a1c35c11372ffeb84578d3aab8f3e10b1d13d66f2bc4"}, - {file = "SQLAlchemy-2.0.23-cp38-cp38-musllinux_1_1_aarch64.whl", hash = "sha256:d4041ad05b35f1f4da481f6b811b4af2f29e83af253bf37c3c4582b2c68934ab"}, - {file = "SQLAlchemy-2.0.23-cp38-cp38-musllinux_1_1_x86_64.whl", hash = "sha256:aeb397de65a0a62f14c257f36a726945a7f7bb60253462e8602d9b97b5cbe204"}, - {file = "SQLAlchemy-2.0.23-cp38-cp38-win32.whl", hash = "sha256:42ede90148b73fe4ab4a089f3126b2cfae8cfefc955c8174d697bb46210c8306"}, - {file = "SQLAlchemy-2.0.23-cp38-cp38-win_amd64.whl", hash = "sha256:964971b52daab357d2c0875825e36584d58f536e920f2968df8d581054eada4b"}, - {file = "SQLAlchemy-2.0.23-cp39-cp39-macosx_10_9_x86_64.whl", hash = "sha256:616fe7bcff0a05098f64b4478b78ec2dfa03225c23734d83d6c169eb41a93e55"}, - {file = "SQLAlchemy-2.0.23-cp39-cp39-macosx_11_0_arm64.whl", hash = "sha256:0e680527245895aba86afbd5bef6c316831c02aa988d1aad83c47ffe92655e74"}, - {file = "SQLAlchemy-2.0.23-cp39-cp39-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:9585b646ffb048c0250acc7dad92536591ffe35dba624bb8fd9b471e25212a35"}, - {file = "SQLAlchemy-2.0.23-cp39-cp39-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:4895a63e2c271ffc7a81ea424b94060f7b3b03b4ea0cd58ab5bb676ed02f4221"}, - {file = "SQLAlchemy-2.0.23-cp39-cp39-musllinux_1_1_aarch64.whl", hash = "sha256:cc1d21576f958c42d9aec68eba5c1a7d715e5fc07825a629015fe8e3b0657fb0"}, - {file = "SQLAlchemy-2.0.23-cp39-cp39-musllinux_1_1_x86_64.whl", hash = "sha256:967c0b71156f793e6662dd839da54f884631755275ed71f1539c95bbada9aaab"}, - {file = "SQLAlchemy-2.0.23-cp39-cp39-win32.whl", hash = "sha256:0a8c6aa506893e25a04233bc721c6b6cf844bafd7250535abb56cb6cc1368884"}, - {file = "SQLAlchemy-2.0.23-cp39-cp39-win_amd64.whl", hash = "sha256:f3420d00d2cb42432c1d0e44540ae83185ccbbc67a6054dcc8ab5387add6620b"}, - {file = "SQLAlchemy-2.0.23-py3-none-any.whl", hash = "sha256:31952bbc527d633b9479f5f81e8b9dfada00b91d6baba021a869095f1a97006d"}, - {file = "SQLAlchemy-2.0.23.tar.gz", hash = "sha256:c1bda93cbbe4aa2aa0aa8655c5aeda505cd219ff3e8da91d1d329e143e4aff69"}, + {file = "SQLAlchemy-2.0.25-cp310-cp310-macosx_10_9_x86_64.whl", hash = "sha256:4344d059265cc8b1b1be351bfb88749294b87a8b2bbe21dfbe066c4199541ebd"}, + {file = "SQLAlchemy-2.0.25-cp310-cp310-macosx_11_0_arm64.whl", hash = "sha256:6f9e2e59cbcc6ba1488404aad43de005d05ca56e069477b33ff74e91b6319735"}, + {file = "SQLAlchemy-2.0.25-cp310-cp310-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:84daa0a2055df9ca0f148a64fdde12ac635e30edbca80e87df9b3aaf419e144a"}, + {file = "SQLAlchemy-2.0.25-cp310-cp310-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:bc8b7dabe8e67c4832891a5d322cec6d44ef02f432b4588390017f5cec186a84"}, + {file = "SQLAlchemy-2.0.25-cp310-cp310-musllinux_1_1_aarch64.whl", hash = "sha256:f5693145220517b5f42393e07a6898acdfe820e136c98663b971906120549da5"}, + {file = "SQLAlchemy-2.0.25-cp310-cp310-musllinux_1_1_x86_64.whl", hash = "sha256:db854730a25db7c956423bb9fb4bdd1216c839a689bf9cc15fada0a7fb2f4570"}, + {file = "SQLAlchemy-2.0.25-cp310-cp310-win32.whl", hash = "sha256:14a6f68e8fc96e5e8f5647ef6cda6250c780612a573d99e4d881581432ef1669"}, + {file = "SQLAlchemy-2.0.25-cp310-cp310-win_amd64.whl", hash = "sha256:87f6e732bccd7dcf1741c00f1ecf33797383128bd1c90144ac8adc02cbb98643"}, + {file = "SQLAlchemy-2.0.25-cp311-cp311-macosx_10_9_x86_64.whl", hash = "sha256:342d365988ba88ada8af320d43df4e0b13a694dbd75951f537b2d5e4cb5cd002"}, + {file = "SQLAlchemy-2.0.25-cp311-cp311-macosx_11_0_arm64.whl", hash = "sha256:f37c0caf14b9e9b9e8f6dbc81bc56db06acb4363eba5a633167781a48ef036ed"}, + {file = "SQLAlchemy-2.0.25-cp311-cp311-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:aa9373708763ef46782d10e950b49d0235bfe58facebd76917d3f5cbf5971aed"}, + {file = "SQLAlchemy-2.0.25-cp311-cp311-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:d24f571990c05f6b36a396218f251f3e0dda916e0c687ef6fdca5072743208f5"}, + {file = "SQLAlchemy-2.0.25-cp311-cp311-musllinux_1_1_aarch64.whl", hash = "sha256:75432b5b14dc2fff43c50435e248b45c7cdadef73388e5610852b95280ffd0e9"}, + {file = "SQLAlchemy-2.0.25-cp311-cp311-musllinux_1_1_x86_64.whl", hash = "sha256:884272dcd3ad97f47702965a0e902b540541890f468d24bd1d98bcfe41c3f018"}, + {file = "SQLAlchemy-2.0.25-cp311-cp311-win32.whl", hash = "sha256:e607cdd99cbf9bb80391f54446b86e16eea6ad309361942bf88318bcd452363c"}, + {file = "SQLAlchemy-2.0.25-cp311-cp311-win_amd64.whl", hash = "sha256:7d505815ac340568fd03f719446a589162d55c52f08abd77ba8964fbb7eb5b5f"}, + {file = "SQLAlchemy-2.0.25-cp312-cp312-macosx_10_9_x86_64.whl", hash = "sha256:0dacf67aee53b16f365c589ce72e766efaabd2b145f9de7c917777b575e3659d"}, + {file = "SQLAlchemy-2.0.25-cp312-cp312-macosx_11_0_arm64.whl", hash = "sha256:b801154027107461ee992ff4b5c09aa7cc6ec91ddfe50d02bca344918c3265c6"}, + {file = "SQLAlchemy-2.0.25-cp312-cp312-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:59a21853f5daeb50412d459cfb13cb82c089ad4c04ec208cd14dddd99fc23b39"}, + {file = "SQLAlchemy-2.0.25-cp312-cp312-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:29049e2c299b5ace92cbed0c1610a7a236f3baf4c6b66eb9547c01179f638ec5"}, + {file = "SQLAlchemy-2.0.25-cp312-cp312-musllinux_1_1_aarch64.whl", hash = "sha256:b64b183d610b424a160b0d4d880995e935208fc043d0302dd29fee32d1ee3f95"}, + {file = "SQLAlchemy-2.0.25-cp312-cp312-musllinux_1_1_x86_64.whl", hash = "sha256:4f7a7d7fcc675d3d85fbf3b3828ecd5990b8d61bd6de3f1b260080b3beccf215"}, + {file = "SQLAlchemy-2.0.25-cp312-cp312-win32.whl", hash = "sha256:cf18ff7fc9941b8fc23437cc3e68ed4ebeff3599eec6ef5eebf305f3d2e9a7c2"}, + {file = "SQLAlchemy-2.0.25-cp312-cp312-win_amd64.whl", hash = "sha256:91f7d9d1c4dd1f4f6e092874c128c11165eafcf7c963128f79e28f8445de82d5"}, + {file = "SQLAlchemy-2.0.25-cp37-cp37m-macosx_10_9_x86_64.whl", hash = "sha256:bb209a73b8307f8fe4fe46f6ad5979649be01607f11af1eb94aa9e8a3aaf77f0"}, + {file = "SQLAlchemy-2.0.25-cp37-cp37m-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:798f717ae7c806d67145f6ae94dc7c342d3222d3b9a311a784f371a4333212c7"}, + {file = "SQLAlchemy-2.0.25-cp37-cp37m-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:5fdd402169aa00df3142149940b3bf9ce7dde075928c1886d9a1df63d4b8de62"}, + {file = "SQLAlchemy-2.0.25-cp37-cp37m-musllinux_1_1_aarch64.whl", hash = "sha256:0d3cab3076af2e4aa5693f89622bef7fa770c6fec967143e4da7508b3dceb9b9"}, + {file = "SQLAlchemy-2.0.25-cp37-cp37m-musllinux_1_1_x86_64.whl", hash = "sha256:74b080c897563f81062b74e44f5a72fa44c2b373741a9ade701d5f789a10ba23"}, + {file = "SQLAlchemy-2.0.25-cp37-cp37m-win32.whl", hash = "sha256:87d91043ea0dc65ee583026cb18e1b458d8ec5fc0a93637126b5fc0bc3ea68c4"}, + {file = "SQLAlchemy-2.0.25-cp37-cp37m-win_amd64.whl", hash = "sha256:75f99202324383d613ddd1f7455ac908dca9c2dd729ec8584c9541dd41822a2c"}, + {file = "SQLAlchemy-2.0.25-cp38-cp38-macosx_10_9_x86_64.whl", hash = "sha256:420362338681eec03f53467804541a854617faed7272fe71a1bfdb07336a381e"}, + {file = "SQLAlchemy-2.0.25-cp38-cp38-macosx_11_0_arm64.whl", hash = "sha256:7c88f0c7dcc5f99bdb34b4fd9b69b93c89f893f454f40219fe923a3a2fd11625"}, + {file = "SQLAlchemy-2.0.25-cp38-cp38-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:a3be4987e3ee9d9a380b66393b77a4cd6d742480c951a1c56a23c335caca4ce3"}, + {file = "SQLAlchemy-2.0.25-cp38-cp38-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:f2a159111a0f58fb034c93eeba211b4141137ec4b0a6e75789ab7a3ef3c7e7e3"}, + {file = "SQLAlchemy-2.0.25-cp38-cp38-musllinux_1_1_aarch64.whl", hash = "sha256:8b8cb63d3ea63b29074dcd29da4dc6a97ad1349151f2d2949495418fd6e48db9"}, + {file = "SQLAlchemy-2.0.25-cp38-cp38-musllinux_1_1_x86_64.whl", hash = "sha256:736ea78cd06de6c21ecba7416499e7236a22374561493b456a1f7ffbe3f6cdb4"}, + {file = "SQLAlchemy-2.0.25-cp38-cp38-win32.whl", hash = "sha256:10331f129982a19df4284ceac6fe87353ca3ca6b4ca77ff7d697209ae0a5915e"}, + {file = "SQLAlchemy-2.0.25-cp38-cp38-win_amd64.whl", hash = "sha256:c55731c116806836a5d678a70c84cb13f2cedba920212ba7dcad53260997666d"}, + {file = "SQLAlchemy-2.0.25-cp39-cp39-macosx_10_9_x86_64.whl", hash = "sha256:605b6b059f4b57b277f75ace81cc5bc6335efcbcc4ccb9066695e515dbdb3900"}, + {file = "SQLAlchemy-2.0.25-cp39-cp39-macosx_11_0_arm64.whl", hash = "sha256:665f0a3954635b5b777a55111ababf44b4fc12b1f3ba0a435b602b6387ffd7cf"}, + {file = "SQLAlchemy-2.0.25-cp39-cp39-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:ecf6d4cda1f9f6cb0b45803a01ea7f034e2f1aed9475e883410812d9f9e3cfcf"}, + {file = "SQLAlchemy-2.0.25-cp39-cp39-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:c51db269513917394faec5e5c00d6f83829742ba62e2ac4fa5c98d58be91662f"}, + {file = "SQLAlchemy-2.0.25-cp39-cp39-musllinux_1_1_aarch64.whl", hash = "sha256:790f533fa5c8901a62b6fef5811d48980adeb2f51f1290ade8b5e7ba990ba3de"}, + {file = "SQLAlchemy-2.0.25-cp39-cp39-musllinux_1_1_x86_64.whl", hash = "sha256:1b1180cda6df7af84fe72e4530f192231b1f29a7496951db4ff38dac1687202d"}, + {file = "SQLAlchemy-2.0.25-cp39-cp39-win32.whl", hash = "sha256:555651adbb503ac7f4cb35834c5e4ae0819aab2cd24857a123370764dc7d7e24"}, + {file = "SQLAlchemy-2.0.25-cp39-cp39-win_amd64.whl", hash = "sha256:dc55990143cbd853a5d038c05e79284baedf3e299661389654551bd02a6a68d7"}, + {file = "SQLAlchemy-2.0.25-py3-none-any.whl", hash = "sha256:a86b4240e67d4753dc3092d9511886795b3c2852abe599cffe108952f7af7ac3"}, + {file = "SQLAlchemy-2.0.25.tar.gz", hash = "sha256:a2c69a7664fb2d54b8682dd774c3b54f67f84fa123cf84dda2a5f40dcaa04e08"}, ] [package.dependencies] greenlet = {version = "!=0.4.17", markers = "platform_machine == \"aarch64\" or platform_machine == \"ppc64le\" or platform_machine == \"x86_64\" or platform_machine == \"amd64\" or platform_machine == \"AMD64\" or platform_machine == \"win32\" or platform_machine == \"WIN32\""} -typing-extensions = ">=4.2.0" +typing-extensions = ">=4.6.0" [package.extras] aiomysql = ["aiomysql (>=0.2.0)", "greenlet (!=0.4.17)"] aioodbc = ["aioodbc", "greenlet (!=0.4.17)"] -aiosqlite = ["aiosqlite", "greenlet (!=0.4.17)", "typing-extensions (!=3.10.0.1)"] +aiosqlite = ["aiosqlite", "greenlet (!=0.4.17)", "typing_extensions (!=3.10.0.1)"] asyncio = ["greenlet (!=0.4.17)"] asyncmy = ["asyncmy (>=0.2.3,!=0.2.4,!=0.2.6)", "greenlet (!=0.4.17)"] mariadb-connector = ["mariadb (>=1.0.1,!=1.1.2,!=1.1.5)"] @@ -2232,7 +1950,7 @@ mssql-pyodbc = ["pyodbc"] mypy = ["mypy (>=0.910)"] mysql = ["mysqlclient (>=1.4.0)"] mysql-connector = ["mysql-connector-python"] -oracle = ["cx-oracle (>=8)"] +oracle = ["cx_oracle (>=8)"] oracle-oracledb = ["oracledb (>=1.0.1)"] postgresql = ["psycopg2 (>=2.7)"] postgresql-asyncpg = ["asyncpg", "greenlet (!=0.4.17)"] @@ -2242,7 +1960,7 @@ postgresql-psycopg2binary = ["psycopg2-binary"] postgresql-psycopg2cffi = ["psycopg2cffi"] postgresql-psycopgbinary = ["psycopg[binary] (>=3.0.7)"] pymysql = ["pymysql"] -sqlcipher = ["sqlcipher3-binary"] +sqlcipher = ["sqlcipher3_binary"] [[package]] name = "tomli" @@ -2284,35 +2002,24 @@ testing = ["build[virtualenv] (>=0.10)", "covdefaults (>=2.3)", "detect-test-pol [[package]] name = "types-python-dateutil" -version = "2.8.19.14" +version = "2.8.19.20240106" description = "Typing stubs for python-dateutil" optional = false -python-versions = "*" +python-versions = ">=3.8" files = [ - {file = "types-python-dateutil-2.8.19.14.tar.gz", hash = "sha256:1f4f10ac98bb8b16ade9dbee3518d9ace017821d94b057a425b069f834737f4b"}, - {file = "types_python_dateutil-2.8.19.14-py3-none-any.whl", hash = "sha256:f977b8de27787639986b4e28963263fd0e5158942b3ecef91b9335c130cb1ce9"}, + {file = "types-python-dateutil-2.8.19.20240106.tar.gz", hash = "sha256:1f8db221c3b98e6ca02ea83a58371b22c374f42ae5bbdf186db9c9a76581459f"}, + {file = "types_python_dateutil-2.8.19.20240106-py3-none-any.whl", hash = "sha256:efbbdc54590d0f16152fa103c9879c7d4a00e82078f6e2cf01769042165acaa2"}, ] [[package]] name = "typing-extensions" -version = "4.8.0" +version = "4.9.0" description = "Backported and Experimental Type Hints for Python 3.8+" optional = false python-versions = ">=3.8" files = [ - {file = "typing_extensions-4.8.0-py3-none-any.whl", hash = "sha256:8f92fc8806f9a6b641eaa5318da32b44d401efaac0f6678c9bc448ba3605faa0"}, - {file = "typing_extensions-4.8.0.tar.gz", hash = "sha256:df8e4339e9cb77357558cbdbceca33c303714cf861d1eef15e1070055ae8b7ef"}, -] - -[[package]] -name = "tzdata" -version = "2023.3" -description = "Provider of IANA time zone data" -optional = false -python-versions = ">=2" -files = [ - {file = "tzdata-2023.3-py2.py3-none-any.whl", hash = "sha256:7e65763eef3120314099b6939b5546db7adce1e7d6f2e179e3df563c70511eda"}, - {file = "tzdata-2023.3.tar.gz", hash = "sha256:11ef1e08e54acb0d4f95bdb1be05da659673de4acbd21bf9c69e94cc5e907a3a"}, + {file = "typing_extensions-4.9.0-py3-none-any.whl", hash = "sha256:af72aea155e91adfc61c3ae9e0e342dbc0cba726d6cba4b6c72c1f34e47291cd"}, + {file = "typing_extensions-4.9.0.tar.gz", hash = "sha256:23478f88c37f27d76ac8aee6c905017a143b0b1b886c3c9f66bc2fd94f9f5783"}, ] [[package]] @@ -2501,4 +2208,4 @@ files = [ [metadata] lock-version = "2.0" python-versions = "^3.9" -content-hash = "211c7b05291828dc0859f1df7e90d97cb941e420bae65330b930a582627565c1" +content-hash = "a2274b559edcfe85dfa748761736502fe4769dde2c29c05f406e0df8ed800aab" diff --git a/pyproject.toml b/pyproject.toml index 2d3b1673..8aed7640 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -19,6 +19,8 @@ python-dotenv = "^1.0.0" click = "^8.1.3" pytest = "^7.3.1" pytest-mock = "^3.12.0" +flake8 = "^7.0.0" +setuptools = "^69.0.3" [tool.poetry.group.dev.dependencies] pytest = "^7.3.1" From 0a08bf0ab80ccbb768e8cb80358d017756311848 Mon Sep 17 00:00:00 2001 From: Michael Thornton Date: Tue, 9 Jan 2024 08:54:30 -0800 Subject: [PATCH 89/91] update poetry env somehow dropped pandas --- poetry.lock | 126 ++++++++++++++++++++++++++++++++++++++++++++++++- pyproject.toml | 1 + 2 files changed, 126 insertions(+), 1 deletion(-) diff --git a/poetry.lock b/poetry.lock index 08a4c1ea..a621e8a1 100644 --- a/poetry.lock +++ b/poetry.lock @@ -849,6 +849,51 @@ files = [ linkml = ">=1.5.6,<2.0.0" linkml-runtime = ">=1.5.4,<2.0.0" +[[package]] +name = "numpy" +version = "1.26.3" +description = "Fundamental package for array computing in Python" +optional = false +python-versions = ">=3.9" +files = [ + {file = "numpy-1.26.3-cp310-cp310-macosx_10_9_x86_64.whl", hash = "sha256:806dd64230dbbfaca8a27faa64e2f414bf1c6622ab78cc4264f7f5f028fee3bf"}, + {file = "numpy-1.26.3-cp310-cp310-macosx_11_0_arm64.whl", hash = "sha256:02f98011ba4ab17f46f80f7f8f1c291ee7d855fcef0a5a98db80767a468c85cd"}, + {file = "numpy-1.26.3-cp310-cp310-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:6d45b3ec2faed4baca41c76617fcdcfa4f684ff7a151ce6fc78ad3b6e85af0a6"}, + {file = "numpy-1.26.3-cp310-cp310-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:bdd2b45bf079d9ad90377048e2747a0c82351989a2165821f0c96831b4a2a54b"}, + {file = "numpy-1.26.3-cp310-cp310-musllinux_1_1_aarch64.whl", hash = "sha256:211ddd1e94817ed2d175b60b6374120244a4dd2287f4ece45d49228b4d529178"}, + {file = "numpy-1.26.3-cp310-cp310-musllinux_1_1_x86_64.whl", hash = "sha256:b1240f767f69d7c4c8a29adde2310b871153df9b26b5cb2b54a561ac85146485"}, + {file = "numpy-1.26.3-cp310-cp310-win32.whl", hash = "sha256:21a9484e75ad018974a2fdaa216524d64ed4212e418e0a551a2d83403b0531d3"}, + {file = "numpy-1.26.3-cp310-cp310-win_amd64.whl", hash = "sha256:9e1591f6ae98bcfac2a4bbf9221c0b92ab49762228f38287f6eeb5f3f55905ce"}, + {file = "numpy-1.26.3-cp311-cp311-macosx_10_9_x86_64.whl", hash = "sha256:b831295e5472954104ecb46cd98c08b98b49c69fdb7040483aff799a755a7374"}, + {file = "numpy-1.26.3-cp311-cp311-macosx_11_0_arm64.whl", hash = "sha256:9e87562b91f68dd8b1c39149d0323b42e0082db7ddb8e934ab4c292094d575d6"}, + {file = "numpy-1.26.3-cp311-cp311-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:8c66d6fec467e8c0f975818c1796d25c53521124b7cfb760114be0abad53a0a2"}, + {file = "numpy-1.26.3-cp311-cp311-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:f25e2811a9c932e43943a2615e65fc487a0b6b49218899e62e426e7f0a57eeda"}, + {file = "numpy-1.26.3-cp311-cp311-musllinux_1_1_aarch64.whl", hash = "sha256:af36e0aa45e25c9f57bf684b1175e59ea05d9a7d3e8e87b7ae1a1da246f2767e"}, + {file = "numpy-1.26.3-cp311-cp311-musllinux_1_1_x86_64.whl", hash = "sha256:51c7f1b344f302067b02e0f5b5d2daa9ed4a721cf49f070280ac202738ea7f00"}, + {file = "numpy-1.26.3-cp311-cp311-win32.whl", hash = "sha256:7ca4f24341df071877849eb2034948459ce3a07915c2734f1abb4018d9c49d7b"}, + {file = "numpy-1.26.3-cp311-cp311-win_amd64.whl", hash = "sha256:39763aee6dfdd4878032361b30b2b12593fb445ddb66bbac802e2113eb8a6ac4"}, + {file = "numpy-1.26.3-cp312-cp312-macosx_10_9_x86_64.whl", hash = "sha256:a7081fd19a6d573e1a05e600c82a1c421011db7935ed0d5c483e9dd96b99cf13"}, + {file = "numpy-1.26.3-cp312-cp312-macosx_11_0_arm64.whl", hash = "sha256:12c70ac274b32bc00c7f61b515126c9205323703abb99cd41836e8125ea0043e"}, + {file = "numpy-1.26.3-cp312-cp312-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:7f784e13e598e9594750b2ef6729bcd5a47f6cfe4a12cca13def35e06d8163e3"}, + {file = "numpy-1.26.3-cp312-cp312-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:5f24750ef94d56ce6e33e4019a8a4d68cfdb1ef661a52cdaee628a56d2437419"}, + {file = "numpy-1.26.3-cp312-cp312-musllinux_1_1_aarch64.whl", hash = "sha256:77810ef29e0fb1d289d225cabb9ee6cf4d11978a00bb99f7f8ec2132a84e0166"}, + {file = "numpy-1.26.3-cp312-cp312-musllinux_1_1_x86_64.whl", hash = "sha256:8ed07a90f5450d99dad60d3799f9c03c6566709bd53b497eb9ccad9a55867f36"}, + {file = "numpy-1.26.3-cp312-cp312-win32.whl", hash = "sha256:f73497e8c38295aaa4741bdfa4fda1a5aedda5473074369eca10626835445511"}, + {file = "numpy-1.26.3-cp312-cp312-win_amd64.whl", hash = "sha256:da4b0c6c699a0ad73c810736303f7fbae483bcb012e38d7eb06a5e3b432c981b"}, + {file = "numpy-1.26.3-cp39-cp39-macosx_10_9_x86_64.whl", hash = "sha256:1666f634cb3c80ccbd77ec97bc17337718f56d6658acf5d3b906ca03e90ce87f"}, + {file = "numpy-1.26.3-cp39-cp39-macosx_11_0_arm64.whl", hash = "sha256:18c3319a7d39b2c6a9e3bb75aab2304ab79a811ac0168a671a62e6346c29b03f"}, + {file = "numpy-1.26.3-cp39-cp39-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:0b7e807d6888da0db6e7e75838444d62495e2b588b99e90dd80c3459594e857b"}, + {file = "numpy-1.26.3-cp39-cp39-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:b4d362e17bcb0011738c2d83e0a65ea8ce627057b2fdda37678f4374a382a137"}, + {file = "numpy-1.26.3-cp39-cp39-musllinux_1_1_aarch64.whl", hash = "sha256:b8c275f0ae90069496068c714387b4a0eba5d531aace269559ff2b43655edd58"}, + {file = "numpy-1.26.3-cp39-cp39-musllinux_1_1_x86_64.whl", hash = "sha256:cc0743f0302b94f397a4a65a660d4cd24267439eb16493fb3caad2e4389bccbb"}, + {file = "numpy-1.26.3-cp39-cp39-win32.whl", hash = "sha256:9bc6d1a7f8cedd519c4b7b1156d98e051b726bf160715b769106661d567b3f03"}, + {file = "numpy-1.26.3-cp39-cp39-win_amd64.whl", hash = "sha256:867e3644e208c8922a3be26fc6bbf112a035f50f0a86497f98f228c50c607bb2"}, + {file = "numpy-1.26.3-pp39-pypy39_pp73-macosx_10_9_x86_64.whl", hash = "sha256:3c67423b3703f8fbd90f5adaa37f85b5794d3366948efe9a5190a5f3a83fc34e"}, + {file = "numpy-1.26.3-pp39-pypy39_pp73-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:46f47ee566d98849323f01b349d58f2557f02167ee301e5e28809a8c0e27a2d0"}, + {file = "numpy-1.26.3-pp39-pypy39_pp73-win_amd64.whl", hash = "sha256:a8474703bffc65ca15853d5fd4d06b18138ae90c17c8d12169968e998e448bb5"}, + {file = "numpy-1.26.3.tar.gz", hash = "sha256:697df43e2b6310ecc9d95f05d5ef20eacc09c7c4ecc9da3f235d39e71b7da1e4"}, +] + [[package]] name = "openpyxl" version = "3.1.2" @@ -888,6 +933,74 @@ files = [ {file = "packaging-23.2.tar.gz", hash = "sha256:048fb0e9405036518eaaf48a55953c750c11e1a1b68e0dd1a9d62ed0c092cfc5"}, ] +[[package]] +name = "pandas" +version = "2.1.4" +description = "Powerful data structures for data analysis, time series, and statistics" +optional = false +python-versions = ">=3.9" +files = [ + {file = "pandas-2.1.4-cp310-cp310-macosx_10_9_x86_64.whl", hash = "sha256:bdec823dc6ec53f7a6339a0e34c68b144a7a1fd28d80c260534c39c62c5bf8c9"}, + {file = "pandas-2.1.4-cp310-cp310-macosx_11_0_arm64.whl", hash = "sha256:294d96cfaf28d688f30c918a765ea2ae2e0e71d3536754f4b6de0ea4a496d034"}, + {file = "pandas-2.1.4-cp310-cp310-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:6b728fb8deba8905b319f96447a27033969f3ea1fea09d07d296c9030ab2ed1d"}, + {file = "pandas-2.1.4-cp310-cp310-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:00028e6737c594feac3c2df15636d73ace46b8314d236100b57ed7e4b9ebe8d9"}, + {file = "pandas-2.1.4-cp310-cp310-musllinux_1_1_x86_64.whl", hash = "sha256:426dc0f1b187523c4db06f96fb5c8d1a845e259c99bda74f7de97bd8a3bb3139"}, + {file = "pandas-2.1.4-cp310-cp310-win_amd64.whl", hash = "sha256:f237e6ca6421265643608813ce9793610ad09b40154a3344a088159590469e46"}, + {file = "pandas-2.1.4-cp311-cp311-macosx_10_9_x86_64.whl", hash = "sha256:b7d852d16c270e4331f6f59b3e9aa23f935f5c4b0ed2d0bc77637a8890a5d092"}, + {file = "pandas-2.1.4-cp311-cp311-macosx_11_0_arm64.whl", hash = "sha256:bd7d5f2f54f78164b3d7a40f33bf79a74cdee72c31affec86bfcabe7e0789821"}, + {file = "pandas-2.1.4-cp311-cp311-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:0aa6e92e639da0d6e2017d9ccff563222f4eb31e4b2c3cf32a2a392fc3103c0d"}, + {file = "pandas-2.1.4-cp311-cp311-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:d797591b6846b9db79e65dc2d0d48e61f7db8d10b2a9480b4e3faaddc421a171"}, + {file = "pandas-2.1.4-cp311-cp311-musllinux_1_1_x86_64.whl", hash = "sha256:d2d3e7b00f703aea3945995ee63375c61b2e6aa5aa7871c5d622870e5e137623"}, + {file = "pandas-2.1.4-cp311-cp311-win_amd64.whl", hash = "sha256:dc9bf7ade01143cddc0074aa6995edd05323974e6e40d9dbde081021ded8510e"}, + {file = "pandas-2.1.4-cp312-cp312-macosx_10_9_x86_64.whl", hash = "sha256:482d5076e1791777e1571f2e2d789e940dedd927325cc3cb6d0800c6304082f6"}, + {file = "pandas-2.1.4-cp312-cp312-macosx_11_0_arm64.whl", hash = "sha256:8a706cfe7955c4ca59af8c7a0517370eafbd98593155b48f10f9811da440248b"}, + {file = "pandas-2.1.4-cp312-cp312-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:b0513a132a15977b4a5b89aabd304647919bc2169eac4c8536afb29c07c23540"}, + {file = "pandas-2.1.4-cp312-cp312-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:e9f17f2b6fc076b2a0078862547595d66244db0f41bf79fc5f64a5c4d635bead"}, + {file = "pandas-2.1.4-cp312-cp312-musllinux_1_1_x86_64.whl", hash = "sha256:45d63d2a9b1b37fa6c84a68ba2422dc9ed018bdaa668c7f47566a01188ceeec1"}, + {file = "pandas-2.1.4-cp312-cp312-win_amd64.whl", hash = "sha256:f69b0c9bb174a2342818d3e2778584e18c740d56857fc5cdb944ec8bbe4082cf"}, + {file = "pandas-2.1.4-cp39-cp39-macosx_10_9_x86_64.whl", hash = "sha256:3f06bda01a143020bad20f7a85dd5f4a1600112145f126bc9e3e42077c24ef34"}, + {file = "pandas-2.1.4-cp39-cp39-macosx_11_0_arm64.whl", hash = "sha256:ab5796839eb1fd62a39eec2916d3e979ec3130509930fea17fe6f81e18108f6a"}, + {file = "pandas-2.1.4-cp39-cp39-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:edbaf9e8d3a63a9276d707b4d25930a262341bca9874fcb22eff5e3da5394732"}, + {file = "pandas-2.1.4-cp39-cp39-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:1ebfd771110b50055712b3b711b51bee5d50135429364d0498e1213a7adc2be8"}, + {file = "pandas-2.1.4-cp39-cp39-musllinux_1_1_x86_64.whl", hash = "sha256:8ea107e0be2aba1da619cc6ba3f999b2bfc9669a83554b1904ce3dd9507f0860"}, + {file = "pandas-2.1.4-cp39-cp39-win_amd64.whl", hash = "sha256:d65148b14788b3758daf57bf42725caa536575da2b64df9964c563b015230984"}, + {file = "pandas-2.1.4.tar.gz", hash = "sha256:fcb68203c833cc735321512e13861358079a96c174a61f5116a1de89c58c0ef7"}, +] + +[package.dependencies] +numpy = [ + {version = ">=1.22.4,<2", markers = "python_version < \"3.11\""}, + {version = ">=1.23.2,<2", markers = "python_version == \"3.11\""}, + {version = ">=1.26.0,<2", markers = "python_version >= \"3.12\""}, +] +python-dateutil = ">=2.8.2" +pytz = ">=2020.1" +tzdata = ">=2022.1" + +[package.extras] +all = ["PyQt5 (>=5.15.6)", "SQLAlchemy (>=1.4.36)", "beautifulsoup4 (>=4.11.1)", "bottleneck (>=1.3.4)", "dataframe-api-compat (>=0.1.7)", "fastparquet (>=0.8.1)", "fsspec (>=2022.05.0)", "gcsfs (>=2022.05.0)", "html5lib (>=1.1)", "hypothesis (>=6.46.1)", "jinja2 (>=3.1.2)", "lxml (>=4.8.0)", "matplotlib (>=3.6.1)", "numba (>=0.55.2)", "numexpr (>=2.8.0)", "odfpy (>=1.4.1)", "openpyxl (>=3.0.10)", "pandas-gbq (>=0.17.5)", "psycopg2 (>=2.9.3)", "pyarrow (>=7.0.0)", "pymysql (>=1.0.2)", "pyreadstat (>=1.1.5)", "pytest (>=7.3.2)", "pytest-xdist (>=2.2.0)", "pyxlsb (>=1.0.9)", "qtpy (>=2.2.0)", "s3fs (>=2022.05.0)", "scipy (>=1.8.1)", "tables (>=3.7.0)", "tabulate (>=0.8.10)", "xarray (>=2022.03.0)", "xlrd (>=2.0.1)", "xlsxwriter (>=3.0.3)", "zstandard (>=0.17.0)"] +aws = ["s3fs (>=2022.05.0)"] +clipboard = ["PyQt5 (>=5.15.6)", "qtpy (>=2.2.0)"] +compression = ["zstandard (>=0.17.0)"] +computation = ["scipy (>=1.8.1)", "xarray (>=2022.03.0)"] +consortium-standard = ["dataframe-api-compat (>=0.1.7)"] +excel = ["odfpy (>=1.4.1)", "openpyxl (>=3.0.10)", "pyxlsb (>=1.0.9)", "xlrd (>=2.0.1)", "xlsxwriter (>=3.0.3)"] +feather = ["pyarrow (>=7.0.0)"] +fss = ["fsspec (>=2022.05.0)"] +gcp = ["gcsfs (>=2022.05.0)", "pandas-gbq (>=0.17.5)"] +hdf5 = ["tables (>=3.7.0)"] +html = ["beautifulsoup4 (>=4.11.1)", "html5lib (>=1.1)", "lxml (>=4.8.0)"] +mysql = ["SQLAlchemy (>=1.4.36)", "pymysql (>=1.0.2)"] +output-formatting = ["jinja2 (>=3.1.2)", "tabulate (>=0.8.10)"] +parquet = ["pyarrow (>=7.0.0)"] +performance = ["bottleneck (>=1.3.4)", "numba (>=0.55.2)", "numexpr (>=2.8.0)"] +plot = ["matplotlib (>=3.6.1)"] +postgresql = ["SQLAlchemy (>=1.4.36)", "psycopg2 (>=2.9.3)"] +spss = ["pyreadstat (>=1.1.5)"] +sql-other = ["SQLAlchemy (>=1.4.36)"] +test = ["hypothesis (>=6.46.1)", "pytest (>=7.3.2)", "pytest-xdist (>=2.2.0)"] +xml = ["lxml (>=4.8.0)"] + [[package]] name = "parse" version = "1.20.0" @@ -2022,6 +2135,17 @@ files = [ {file = "typing_extensions-4.9.0.tar.gz", hash = "sha256:23478f88c37f27d76ac8aee6c905017a143b0b1b886c3c9f66bc2fd94f9f5783"}, ] +[[package]] +name = "tzdata" +version = "2023.4" +description = "Provider of IANA time zone data" +optional = false +python-versions = ">=2" +files = [ + {file = "tzdata-2023.4-py2.py3-none-any.whl", hash = "sha256:aa3ace4329eeacda5b7beb7ea08ece826c28d761cda36e747cfbf97996d39bf3"}, + {file = "tzdata-2023.4.tar.gz", hash = "sha256:dd54c94f294765522c77399649b4fefd95522479a664a0cec87f41bebc6148c9"}, +] + [[package]] name = "uri-template" version = "1.3.0" @@ -2208,4 +2332,4 @@ files = [ [metadata] lock-version = "2.0" python-versions = "^3.9" -content-hash = "a2274b559edcfe85dfa748761736502fe4769dde2c29c05f406e0df8ed800aab" +content-hash = "b24f0167717021cc1dff5c4194b9e68f252ffc89158b068b783d37673fae9fac" diff --git a/pyproject.toml b/pyproject.toml index 8aed7640..4a00c350 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -21,6 +21,7 @@ pytest = "^7.3.1" pytest-mock = "^3.12.0" flake8 = "^7.0.0" setuptools = "^69.0.3" +pandas = "^2.1.4" [tool.poetry.group.dev.dependencies] pytest = "^7.3.1" From 27c80b9291e88cec28f7a5720ba50dcc8f075e3c Mon Sep 17 00:00:00 2001 From: Michael Thornton Date: Tue, 9 Jan 2024 08:57:30 -0800 Subject: [PATCH 90/91] update reqs somehow dropped from poetry env --- poetry.lock | 170 ++++++++++++++++++++++++++++++++++++++++++++++++- pyproject.toml | 4 ++ 2 files changed, 173 insertions(+), 1 deletion(-) diff --git a/poetry.lock b/poetry.lock index a621e8a1..0cb077e9 100644 --- a/poetry.lock +++ b/poetry.lock @@ -229,6 +229,73 @@ files = [ {file = "colorama-0.4.6.tar.gz", hash = "sha256:08695f5cb7ed6e0531a20572697297273c47b8cae5a63ffc6d6ed5c201be6e44"}, ] +[[package]] +name = "coverage" +version = "7.4.0" +description = "Code coverage measurement for Python" +optional = false +python-versions = ">=3.8" +files = [ + {file = "coverage-7.4.0-cp310-cp310-macosx_10_9_x86_64.whl", hash = "sha256:36b0ea8ab20d6a7564e89cb6135920bc9188fb5f1f7152e94e8300b7b189441a"}, + {file = "coverage-7.4.0-cp310-cp310-macosx_11_0_arm64.whl", hash = "sha256:0676cd0ba581e514b7f726495ea75aba3eb20899d824636c6f59b0ed2f88c471"}, + {file = "coverage-7.4.0-cp310-cp310-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:d0ca5c71a5a1765a0f8f88022c52b6b8be740e512980362f7fdbb03725a0d6b9"}, + {file = "coverage-7.4.0-cp310-cp310-manylinux_2_5_i686.manylinux1_i686.manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:a7c97726520f784239f6c62506bc70e48d01ae71e9da128259d61ca5e9788516"}, + {file = "coverage-7.4.0-cp310-cp310-manylinux_2_5_x86_64.manylinux1_x86_64.manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:815ac2d0f3398a14286dc2cea223a6f338109f9ecf39a71160cd1628786bc6f5"}, + {file = "coverage-7.4.0-cp310-cp310-musllinux_1_1_aarch64.whl", hash = "sha256:80b5ee39b7f0131ebec7968baa9b2309eddb35b8403d1869e08f024efd883566"}, + {file = "coverage-7.4.0-cp310-cp310-musllinux_1_1_i686.whl", hash = "sha256:5b2ccb7548a0b65974860a78c9ffe1173cfb5877460e5a229238d985565574ae"}, + {file = "coverage-7.4.0-cp310-cp310-musllinux_1_1_x86_64.whl", hash = "sha256:995ea5c48c4ebfd898eacb098164b3cc826ba273b3049e4a889658548e321b43"}, + {file = "coverage-7.4.0-cp310-cp310-win32.whl", hash = "sha256:79287fd95585ed36e83182794a57a46aeae0b64ca53929d1176db56aacc83451"}, + {file = "coverage-7.4.0-cp310-cp310-win_amd64.whl", hash = "sha256:5b14b4f8760006bfdb6e08667af7bc2d8d9bfdb648351915315ea17645347137"}, + {file = "coverage-7.4.0-cp311-cp311-macosx_10_9_x86_64.whl", hash = "sha256:04387a4a6ecb330c1878907ce0dc04078ea72a869263e53c72a1ba5bbdf380ca"}, + {file = "coverage-7.4.0-cp311-cp311-macosx_11_0_arm64.whl", hash = "sha256:ea81d8f9691bb53f4fb4db603203029643caffc82bf998ab5b59ca05560f4c06"}, + {file = "coverage-7.4.0-cp311-cp311-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:74775198b702868ec2d058cb92720a3c5a9177296f75bd97317c787daf711505"}, + {file = "coverage-7.4.0-cp311-cp311-manylinux_2_5_i686.manylinux1_i686.manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:76f03940f9973bfaee8cfba70ac991825611b9aac047e5c80d499a44079ec0bc"}, + {file = "coverage-7.4.0-cp311-cp311-manylinux_2_5_x86_64.manylinux1_x86_64.manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:485e9f897cf4856a65a57c7f6ea3dc0d4e6c076c87311d4bc003f82cfe199d25"}, + {file = "coverage-7.4.0-cp311-cp311-musllinux_1_1_aarch64.whl", hash = "sha256:6ae8c9d301207e6856865867d762a4b6fd379c714fcc0607a84b92ee63feff70"}, + {file = "coverage-7.4.0-cp311-cp311-musllinux_1_1_i686.whl", hash = "sha256:bf477c355274a72435ceb140dc42de0dc1e1e0bf6e97195be30487d8eaaf1a09"}, + {file = "coverage-7.4.0-cp311-cp311-musllinux_1_1_x86_64.whl", hash = "sha256:83c2dda2666fe32332f8e87481eed056c8b4d163fe18ecc690b02802d36a4d26"}, + {file = "coverage-7.4.0-cp311-cp311-win32.whl", hash = "sha256:697d1317e5290a313ef0d369650cfee1a114abb6021fa239ca12b4849ebbd614"}, + {file = "coverage-7.4.0-cp311-cp311-win_amd64.whl", hash = "sha256:26776ff6c711d9d835557ee453082025d871e30b3fd6c27fcef14733f67f0590"}, + {file = "coverage-7.4.0-cp312-cp312-macosx_10_9_x86_64.whl", hash = "sha256:13eaf476ec3e883fe3e5fe3707caeb88268a06284484a3daf8250259ef1ba143"}, + {file = "coverage-7.4.0-cp312-cp312-macosx_11_0_arm64.whl", hash = "sha256:846f52f46e212affb5bcf131c952fb4075b55aae6b61adc9856222df89cbe3e2"}, + {file = "coverage-7.4.0-cp312-cp312-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:26f66da8695719ccf90e794ed567a1549bb2644a706b41e9f6eae6816b398c4a"}, + {file = "coverage-7.4.0-cp312-cp312-manylinux_2_5_i686.manylinux1_i686.manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:164fdcc3246c69a6526a59b744b62e303039a81e42cfbbdc171c91a8cc2f9446"}, + {file = "coverage-7.4.0-cp312-cp312-manylinux_2_5_x86_64.manylinux1_x86_64.manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:316543f71025a6565677d84bc4df2114e9b6a615aa39fb165d697dba06a54af9"}, + {file = "coverage-7.4.0-cp312-cp312-musllinux_1_1_aarch64.whl", hash = "sha256:bb1de682da0b824411e00a0d4da5a784ec6496b6850fdf8c865c1d68c0e318dd"}, + {file = "coverage-7.4.0-cp312-cp312-musllinux_1_1_i686.whl", hash = "sha256:0e8d06778e8fbffccfe96331a3946237f87b1e1d359d7fbe8b06b96c95a5407a"}, + {file = "coverage-7.4.0-cp312-cp312-musllinux_1_1_x86_64.whl", hash = "sha256:a56de34db7b7ff77056a37aedded01b2b98b508227d2d0979d373a9b5d353daa"}, + {file = "coverage-7.4.0-cp312-cp312-win32.whl", hash = "sha256:51456e6fa099a8d9d91497202d9563a320513fcf59f33991b0661a4a6f2ad450"}, + {file = "coverage-7.4.0-cp312-cp312-win_amd64.whl", hash = "sha256:cd3c1e4cb2ff0083758f09be0f77402e1bdf704adb7f89108007300a6da587d0"}, + {file = "coverage-7.4.0-cp38-cp38-macosx_10_9_x86_64.whl", hash = "sha256:e9d1bf53c4c8de58d22e0e956a79a5b37f754ed1ffdbf1a260d9dcfa2d8a325e"}, + {file = "coverage-7.4.0-cp38-cp38-macosx_11_0_arm64.whl", hash = "sha256:109f5985182b6b81fe33323ab4707011875198c41964f014579cf82cebf2bb85"}, + {file = "coverage-7.4.0-cp38-cp38-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:3cc9d4bc55de8003663ec94c2f215d12d42ceea128da8f0f4036235a119c88ac"}, + {file = "coverage-7.4.0-cp38-cp38-manylinux_2_5_i686.manylinux1_i686.manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:cc6d65b21c219ec2072c1293c505cf36e4e913a3f936d80028993dd73c7906b1"}, + {file = "coverage-7.4.0-cp38-cp38-manylinux_2_5_x86_64.manylinux1_x86_64.manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:5a10a4920def78bbfff4eff8a05c51be03e42f1c3735be42d851f199144897ba"}, + {file = "coverage-7.4.0-cp38-cp38-musllinux_1_1_aarch64.whl", hash = "sha256:b8e99f06160602bc64da35158bb76c73522a4010f0649be44a4e167ff8555952"}, + {file = "coverage-7.4.0-cp38-cp38-musllinux_1_1_i686.whl", hash = "sha256:7d360587e64d006402b7116623cebf9d48893329ef035278969fa3bbf75b697e"}, + {file = "coverage-7.4.0-cp38-cp38-musllinux_1_1_x86_64.whl", hash = "sha256:29f3abe810930311c0b5d1a7140f6395369c3db1be68345638c33eec07535105"}, + {file = "coverage-7.4.0-cp38-cp38-win32.whl", hash = "sha256:5040148f4ec43644702e7b16ca864c5314ccb8ee0751ef617d49aa0e2d6bf4f2"}, + {file = "coverage-7.4.0-cp38-cp38-win_amd64.whl", hash = "sha256:9864463c1c2f9cb3b5db2cf1ff475eed2f0b4285c2aaf4d357b69959941aa555"}, + {file = "coverage-7.4.0-cp39-cp39-macosx_10_9_x86_64.whl", hash = "sha256:936d38794044b26c99d3dd004d8af0035ac535b92090f7f2bb5aa9c8e2f5cd42"}, + {file = "coverage-7.4.0-cp39-cp39-macosx_11_0_arm64.whl", hash = "sha256:799c8f873794a08cdf216aa5d0531c6a3747793b70c53f70e98259720a6fe2d7"}, + {file = "coverage-7.4.0-cp39-cp39-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:e7defbb9737274023e2d7af02cac77043c86ce88a907c58f42b580a97d5bcca9"}, + {file = "coverage-7.4.0-cp39-cp39-manylinux_2_5_i686.manylinux1_i686.manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:a1526d265743fb49363974b7aa8d5899ff64ee07df47dd8d3e37dcc0818f09ed"}, + {file = "coverage-7.4.0-cp39-cp39-manylinux_2_5_x86_64.manylinux1_x86_64.manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:bf635a52fc1ea401baf88843ae8708591aa4adff875e5c23220de43b1ccf575c"}, + {file = "coverage-7.4.0-cp39-cp39-musllinux_1_1_aarch64.whl", hash = "sha256:756ded44f47f330666843b5781be126ab57bb57c22adbb07d83f6b519783b870"}, + {file = "coverage-7.4.0-cp39-cp39-musllinux_1_1_i686.whl", hash = "sha256:0eb3c2f32dabe3a4aaf6441dde94f35687224dfd7eb2a7f47f3fd9428e421058"}, + {file = "coverage-7.4.0-cp39-cp39-musllinux_1_1_x86_64.whl", hash = "sha256:bfd5db349d15c08311702611f3dccbef4b4e2ec148fcc636cf8739519b4a5c0f"}, + {file = "coverage-7.4.0-cp39-cp39-win32.whl", hash = "sha256:53d7d9158ee03956e0eadac38dfa1ec8068431ef8058fe6447043db1fb40d932"}, + {file = "coverage-7.4.0-cp39-cp39-win_amd64.whl", hash = "sha256:cfd2a8b6b0d8e66e944d47cdec2f47c48fef2ba2f2dff5a9a75757f64172857e"}, + {file = "coverage-7.4.0-pp38.pp39.pp310-none-any.whl", hash = "sha256:c530833afc4707fe48524a44844493f36d8727f04dcce91fb978c414a8556cc6"}, + {file = "coverage-7.4.0.tar.gz", hash = "sha256:707c0f58cb1712b8809ece32b68996ee1e609f71bd14615bd8f87a1293cb610e"}, +] + +[package.dependencies] +tomli = {version = "*", optional = true, markers = "python_full_version <= \"3.11.0a6\" and extra == \"toml\""} + +[package.extras] +toml = ["tomli"] + [[package]] name = "curies" version = "0.7.4" @@ -834,6 +901,21 @@ files = [ {file = "mccabe-0.7.0.tar.gz", hash = "sha256:348e0240c33b60bbdf4e523192ef919f28cb2c3d7d5c7794f74009290f236325"}, ] +[[package]] +name = "mongomock" +version = "4.1.2" +description = "Fake pymongo stub for testing simple MongoDB-dependent code" +optional = false +python-versions = "*" +files = [ + {file = "mongomock-4.1.2-py2.py3-none-any.whl", hash = "sha256:08a24938a05c80c69b6b8b19a09888d38d8c6e7328547f94d46cadb7f47209f2"}, + {file = "mongomock-4.1.2.tar.gz", hash = "sha256:f06cd62afb8ae3ef63ba31349abd220a657ef0dd4f0243a29587c5213f931b7d"}, +] + +[package.dependencies] +packaging = "*" +sentinels = "*" + [[package]] name = "nmdc-schema" version = "7.8.0" @@ -1463,6 +1545,52 @@ tomli = {version = ">=1.0.0", markers = "python_version < \"3.11\""} [package.extras] testing = ["argcomplete", "attrs (>=19.2.0)", "hypothesis (>=3.56)", "mock", "nose", "pygments (>=2.7.2)", "requests", "setuptools", "xmlschema"] +[[package]] +name = "pytest-cov" +version = "4.1.0" +description = "Pytest plugin for measuring coverage." +optional = false +python-versions = ">=3.7" +files = [ + {file = "pytest-cov-4.1.0.tar.gz", hash = "sha256:3904b13dfbfec47f003b8e77fd5b589cd11904a21ddf1ab38a64f204d6a10ef6"}, + {file = "pytest_cov-4.1.0-py3-none-any.whl", hash = "sha256:6ba70b9e97e69fcc3fb45bfeab2d0a138fb65c4d0d6a41ef33983ad114be8c3a"}, +] + +[package.dependencies] +coverage = {version = ">=5.2.1", extras = ["toml"]} +pytest = ">=4.6" + +[package.extras] +testing = ["fields", "hunter", "process-tests", "pytest-xdist", "six", "virtualenv"] + +[[package]] +name = "pytest-cover" +version = "3.0.0" +description = "Pytest plugin for measuring coverage. Forked from `pytest-cov`." +optional = false +python-versions = "*" +files = [ + {file = "pytest-cover-3.0.0.tar.gz", hash = "sha256:5bdb6c1cc3dd75583bb7bc2c57f5e1034a1bfcb79d27c71aceb0b16af981dbf4"}, + {file = "pytest_cover-3.0.0-py2.py3-none-any.whl", hash = "sha256:578249955eb3b5f3991209df6e532bb770b647743b7392d3d97698dc02f39ebb"}, +] + +[package.dependencies] +pytest-cov = ">=2.0" + +[[package]] +name = "pytest-coverage" +version = "0.0" +description = "Pytest plugin for measuring coverage. Forked from `pytest-cov`." +optional = false +python-versions = "*" +files = [ + {file = "pytest-coverage-0.0.tar.gz", hash = "sha256:db6af2cbd7e458c7c9fd2b4207cee75258243c8a81cad31a7ee8cfad5be93c05"}, + {file = "pytest_coverage-0.0-py2.py3-none-any.whl", hash = "sha256:dedd084c5e74d8e669355325916dc011539b190355021b037242514dee546368"}, +] + +[package.dependencies] +pytest-cover = "*" + [[package]] name = "pytest-logging" version = "2015.11.4" @@ -1690,6 +1818,25 @@ urllib3 = ">=1.21.1,<3" socks = ["PySocks (>=1.5.6,!=1.5.7)"] use-chardet-on-py3 = ["chardet (>=3.0.2,<6)"] +[[package]] +name = "requests-mock" +version = "1.11.0" +description = "Mock out responses from the requests package" +optional = false +python-versions = "*" +files = [ + {file = "requests-mock-1.11.0.tar.gz", hash = "sha256:ef10b572b489a5f28e09b708697208c4a3b2b89ef80a9f01584340ea357ec3c4"}, + {file = "requests_mock-1.11.0-py2.py3-none-any.whl", hash = "sha256:f7fae383f228633f6bececebdab236c478ace2284d6292c6e7e2867b9ab74d15"}, +] + +[package.dependencies] +requests = ">=2.3,<3" +six = "*" + +[package.extras] +fixture = ["fixtures"] +test = ["fixtures", "mock", "purl", "pytest", "requests-futures", "sphinx", "testtools"] + [[package]] name = "rfc3339-validator" version = "0.1.4" @@ -1900,6 +2047,27 @@ files = [ {file = "ruamel.yaml.clib-0.2.8.tar.gz", hash = "sha256:beb2e0404003de9a4cab9753a8805a8fe9320ee6673136ed7f04255fe60bb512"}, ] +[[package]] +name = "semver" +version = "3.0.2" +description = "Python helper for Semantic Versioning (https://semver.org)" +optional = false +python-versions = ">=3.7" +files = [ + {file = "semver-3.0.2-py3-none-any.whl", hash = "sha256:b1ea4686fe70b981f85359eda33199d60c53964284e0cfb4977d243e37cf4bf4"}, + {file = "semver-3.0.2.tar.gz", hash = "sha256:6253adb39c70f6e51afed2fa7152bcd414c411286088fb4b9effb133885ab4cc"}, +] + +[[package]] +name = "sentinels" +version = "1.0.0" +description = "Various objects to denote special meanings in python" +optional = false +python-versions = "*" +files = [ + {file = "sentinels-1.0.0.tar.gz", hash = "sha256:7be0704d7fe1925e397e92d18669ace2f619c92b5d4eb21a89f31e026f9ff4b1"}, +] + [[package]] name = "setuptools" version = "69.0.3" @@ -2332,4 +2500,4 @@ files = [ [metadata] lock-version = "2.0" python-versions = "^3.9" -content-hash = "b24f0167717021cc1dff5c4194b9e68f252ffc89158b068b783d37673fae9fac" +content-hash = "ba9830cf7d267f38ced14561877761cb1e853e5a794177952be073450dbf2a3b" diff --git a/pyproject.toml b/pyproject.toml index 4a00c350..9b09c50f 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -22,6 +22,10 @@ pytest-mock = "^3.12.0" flake8 = "^7.0.0" setuptools = "^69.0.3" pandas = "^2.1.4" +pytest-coverage = "^0.0" +semver = "^3.0.2" +mongomock = "^4.1.2" +requests-mock = "^1.11.0" [tool.poetry.group.dev.dependencies] pytest = "^7.3.1" From e7596c7aa5a4929c03662a1f4fe6b69931168ca6 Mon Sep 17 00:00:00 2001 From: Michael Thornton Date: Tue, 9 Jan 2024 09:20:33 -0800 Subject: [PATCH 91/91] add pytest-local-badge --- poetry.lock | 19 ++++++++++++++++++- pyproject.toml | 1 + 2 files changed, 19 insertions(+), 1 deletion(-) diff --git a/poetry.lock b/poetry.lock index 0cb077e9..bc045e4d 100644 --- a/poetry.lock +++ b/poetry.lock @@ -1591,6 +1591,23 @@ files = [ [package.dependencies] pytest-cover = "*" +[[package]] +name = "pytest-local-badge" +version = "1.0.3" +description = "Generate local badges (shields) reporting your test suite status." +optional = false +python-versions = ">=3.7" +files = [ + {file = "pytest-local-badge-1.0.3.tar.gz", hash = "sha256:e13274cc9cadf91a1e3fa290d0bbbd371a365f9d719411e475596f722c2725a5"}, + {file = "pytest_local_badge-1.0.3-py3-none-any.whl", hash = "sha256:e14dc79922598e5b27ef4398a3537535ad115df2229ebba0259f6c6d92c6047b"}, +] + +[package.dependencies] +pytest = ">=6.1.0" + +[package.extras] +develop = ["black (>=22.12.0)", "build", "flake8-bugbear", "flake8-comprehensions", "flake8-import-order", "flake8-print", "pep8-naming", "pytest (>=7.1.0,<8)", "pytest-cov", "pytest-mock"] + [[package]] name = "pytest-logging" version = "2015.11.4" @@ -2500,4 +2517,4 @@ files = [ [metadata] lock-version = "2.0" python-versions = "^3.9" -content-hash = "ba9830cf7d267f38ced14561877761cb1e853e5a794177952be073450dbf2a3b" +content-hash = "76e09a3ecb5e07197345b22578b7389f6a2c61c8f1a6077c6ded64dbc369c504" diff --git a/pyproject.toml b/pyproject.toml index 9b09c50f..4eb20f8a 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -26,6 +26,7 @@ pytest-coverage = "^0.0" semver = "^3.0.2" mongomock = "^4.1.2" requests-mock = "^1.11.0" +pytest-local-badge = "^1.0.3" [tool.poetry.group.dev.dependencies] pytest = "^7.3.1"